Binary files /tmp/tmpbxizuc2k/IdJh2OBgKu/mgcv-1.8-40/build/partial.rdb and /tmp/tmpbxizuc2k/8NfxokhoM9/mgcv-1.8-41/build/partial.rdb differ
diff -Nru mgcv-1.8-40/ChangeLog mgcv-1.8-41/ChangeLog
--- mgcv-1.8-40/ChangeLog	2022-03-22 09:38:40.000000000 +0000
+++ mgcv-1.8-41/ChangeLog	2022-10-20 20:31:59.000000000 +0000
@@ -27,7 +27,56 @@
   which no smoother is penalized. Also there is no option to use the identical
   discretization to that used in fitting when predicting.
 
-* bam(..., discrete=TRUE) te fixed sp ordering wrong.
+
+1.8-41
+
+** 'cnorm' family added for left, right, interval or un censored Gaussian 
+  data. Useful for log normal Accelerated Failure Time models, Tobit 
+  regression, rounded data etc.
+
+** 'sz' factor smooth interaction class added for implementing models with
+  main effect smooths and difference smooths for levels of a factor.
+  See ?factor.smooth.
+
+** 'NCV' smoothing parameter method added, but still experimental. 
+
+* replacment of some old (K&R) style C function declarations.
+
+* mono.con corrected for cases with upper bounds. Thanks to Sean Wu.
+
+* plot.gam(...,seWithMean=TRUE) modified to only include mean uncertainty 
+  for the linear predictor of which the smooth is a part, when there are 
+  multiple linear predictors. Thanks to Gavin Simpson.
+
+* modifications of sparce matrix coercions, to avoid deprecated direct 
+  coercions. as(as(as(a, "dMatrix"), "generalMatrix"), "XsparseMatrix") in
+  place of as(a,"dgXMatrix") where 'X' is 'C' or 'T'. Actually this requires
+  Matrix 1.4-2 to work (will be added to dependencies in future). 
+
+* vis.gam now deals properly with models with more than one linear predictor.
+
+* slight change in bgam.fitd to check scale parameter estimate converged when
+  using bam(...,discrete=TRUE), otherwise scale could be wrong for all fixed
+  smoothing parameters.
+
+* predict.gam modified so that 'terms' and 'exclude' control all terms, smooth
+  or parametric, in the same way. Including the "(Intercept)" term.
+
+* Warnings from 'model.matrix' suppressed in 'terms2tensor' called by e.g. 
+  predict.bamd. There is a warning if any extra contrasts are supplied to 
+  model matrix that do not relate to a term in the model (which contradicts 
+  the documentation). Doc vs code bug report also filed. 
+
+* Fix of broken rank deficiency handling in gam.fit5. Thanks to Cesko Voeten.
+
+* trind.generator modified to allow return of index functions in place 
+  of index arrays.  
+
+* summary.gam (recov/reTest) modified to deal with 'fs' smooths fitted using
+  'gamm'. 
+
+* gam.fit4 convergence testing improved and bug fix in computation of dVkk 
+  matrix used to check for converged 'infinite' smoothing parameters in bfgs.
 
 1.8-40
 
diff -Nru mgcv-1.8-40/debian/changelog mgcv-1.8-41/debian/changelog
--- mgcv-1.8-40/debian/changelog	2022-04-30 21:44:41.000000000 +0000
+++ mgcv-1.8-41/debian/changelog	2022-10-31 01:08:44.000000000 +0000
@@ -1,8 +1,17 @@
-mgcv (1.8-40-1.2204.0) jammy; urgency=medium
+mgcv (1.8-41-1.2204.0) jammy; urgency=medium
 
-  * Compilation for Ubuntu 22.04 LTS
+  * Compilation for Ubuntu 22.04.1 LTS
 
- -- Michael Rutter <marutter@gmail.com>  Sat, 30 Apr 2022 21:44:41 +0000
+ -- Michael Rutter <marutter@gmail.com>  Mon, 31 Oct 2022 01:08:44 +0000
+
+mgcv (1.8-41-1) unstable; urgency=medium
+
+  * New upstream release
+
+  * debian/control: Set Build-Depends: to current R version
+  * debian/control: Set Standards-Version: to current version  
+
+ -- Dirk Eddelbuettel <edd@debian.org>  Fri, 21 Oct 2022 09:15:51 -0500
 
 mgcv (1.8-40-1) unstable; urgency=medium
 
diff -Nru mgcv-1.8-40/debian/compat mgcv-1.8-41/debian/compat
--- mgcv-1.8-40/debian/compat	1970-01-01 00:00:00.000000000 +0000
+++ mgcv-1.8-41/debian/compat	2022-10-31 01:08:44.000000000 +0000
@@ -0,0 +1 @@
+10
diff -Nru mgcv-1.8-40/debian/control mgcv-1.8-41/debian/control
--- mgcv-1.8-40/debian/control	2022-03-29 12:36:08.000000000 +0000
+++ mgcv-1.8-41/debian/control	2022-10-31 01:08:44.000000000 +0000
@@ -2,8 +2,8 @@
 Section: gnu-r
 Priority: optional
 Maintainer: Dirk Eddelbuettel <edd@debian.org>
-Build-Depends: debhelper-compat (= 11), r-base-dev (>= 4.1.3), r-cran-nlme, r-cran-matrix, dh-r
-Standards-Version: 4.6.0
+Build-Depends: debhelper, r-base-dev (>= 4.2.1), r-cran-nlme, r-cran-matrix, dh-r
+Standards-Version: 4.6.1
 Vcs-Browser: https://salsa.debian.org/edd/r-cran-mgcv
 Vcs-Git: https://salsa.debian.org/edd/r-cran-mgcv.git
 Homepage: https://cran.r-project.org/package=mgcv
diff -Nru mgcv-1.8-40/DESCRIPTION mgcv-1.8-41/DESCRIPTION
--- mgcv-1.8-40/DESCRIPTION	2022-03-29 09:50:02.000000000 +0000
+++ mgcv-1.8-41/DESCRIPTION	2022-10-21 13:52:37.000000000 +0000
@@ -1,5 +1,5 @@
 Package: mgcv
-Version: 1.8-40
+Version: 1.8-41
 Author: Simon Wood <simon.wood@r-project.org>
 Maintainer: Simon Wood <simon.wood@r-project.org>
 Title: Mixed GAM Computation Vehicle with Automatic Smoothness
@@ -20,6 +20,6 @@
 ByteCompile: yes
 License: GPL (>= 2)
 NeedsCompilation: yes
-Packaged: 2022-03-22 09:39:41 UTC; sw283
+Packaged: 2022-10-21 11:50:05 UTC; sw283
 Repository: CRAN
-Date/Publication: 2022-03-29 09:50:02 UTC
+Date/Publication: 2022-10-21 13:52:37 UTC
diff -Nru mgcv-1.8-40/man/bam.Rd mgcv-1.8-41/man/bam.Rd
--- mgcv-1.8-40/man/bam.Rd	2022-02-01 10:18:29.000000000 +0000
+++ mgcv-1.8-41/man/bam.Rd	2022-07-06 11:02:55.000000000 +0000
@@ -96,7 +96,8 @@
  Smoothing parameters must be supplied in the order that the smooth terms appear in the model 
 formula. Negative elements indicate that the parameter should be estimated, and hence a mixture 
 of fixed and estimated parameters is possible. If smooths share smoothing parameters then \code{length(sp)} 
-must correspond to the number of underlying smoothing parameters.}
+must correspond to the number of underlying smoothing parameters. Note that \code{discrete=TRUE}may result in
+re-ordering of variables in tensor product smooths for improved efficiency, and \code{sp} must be supplied in re-ordered order.}
 
 \item{min.sp}{Lower bounds can be supplied for the smoothing parameters. Note
 that if this option is used then the smoothing parameters \code{full.sp}, in the 
diff -Nru mgcv-1.8-40/man/chol.down.Rd mgcv-1.8-41/man/chol.down.Rd
--- mgcv-1.8-40/man/chol.down.Rd	2022-02-01 10:18:29.000000000 +0000
+++ mgcv-1.8-41/man/chol.down.Rd	2022-07-06 11:02:55.000000000 +0000
@@ -58,7 +58,7 @@
   ## Downdate - just going back from R to R0
   Rd <-  cholup(R,u,FALSE)
   R0;Rd
-  range(R-Ru)
+  range(R0-Rd)
   
 }
 
diff -Nru mgcv-1.8-40/man/cnorm.Rd mgcv-1.8-41/man/cnorm.Rd
--- mgcv-1.8-40/man/cnorm.Rd	1970-01-01 00:00:00.000000000 +0000
+++ mgcv-1.8-41/man/cnorm.Rd	2022-08-22 06:55:50.000000000 +0000
@@ -0,0 +1,103 @@
+\name{cnorm}
+\alias{cnorm}
+\alias{Tobit}
+%- Also NEED an `\alias' for EACH other topic documented here.
+\title{GAM censored normal family for log-normal AFT and Tobit models}
+\description{Family for use with \code{\link{gam}} or \code{\link{bam}}, implementing regression for censored
+normal data. If \eqn{y}{y} is the response with mean \eqn{\mu}{m} and standard deviation \eqn{w^{-1/2}\exp(\theta)}{w^{-1/2}exp(theta)},
+then \eqn{w^{1/2}(y-\mu)\exp(-\theta)}{w^{1/2}(y-m)exp(-theta)} follows an \eqn{N(0,1)}{N(0,1)} distribution. That is
+\deqn{y \sim N(\mu,e^{2\theta}w^{-1}).}{y ~ N(m,exp(2 theta)/w).} \eqn{\theta}{theta} is a single scalar for all observations. Observations may be left, interval or right censored or uncensored.
+
+Useful for log-normal accelerated failure time (AFT) models, Tobit regression, and crudely rounded data, for example. 
+}
+
+\usage{
+cnorm(theta=NULL,link="identity")
+}
+\arguments{
+\item{theta}{ log standard deviation parameter. If supplied and positive then taken as a fixed value of standard deviation (not its log). If supplied and negative taken as negative of initial value for standard deviation (not its log).}
+
+\item{link}{The link function: \code{"identity"}, \code{"log"} or \code{"sqrt"}.}
+
+}
+\value{
+An object of class \code{extended.family}.
+}
+
+\details{If the family is used with a vector response, then it assumed that there is no censoring, and a regular Gaussian regression results. If there is censoring then the response should be supplied as a two column matrix. The first column is always numeric. Entries in the second column are as follows.
+\itemize{
+\item If an entry is identical to the corresponding first column entry, then it is an uncensored observation.
+\item If an entry is numeric and different to the first column entry then there is interval censoring. The first column entry is the lower interval limit and the second column entry is the upper interval limit. \eqn{y}{y} is only known to be between these limits.
+\item If the second column entry is \code{-Inf} then the observation is left censored at the value of the entry in the first column. It is only known that \eqn{y}{y} is less than or equal to the first column value.
+\item If the second column entry is \code{Inf} then the observation is right censored at the value of the entry in the first column. It is only known that \eqn{y}{y} is greater than or equal to the first column value.
+}
+Any mixture of censored and uncensored data is allowed, but be aware that data consisting only of right and/or left censored data contain very little information.
+}
+
+%- maybe also `usage' for other objects documented here.
+
+\author{ Simon N. Wood \email{simon.wood@r-project.org}
+}
+
+\references{
+Wood, S.N., N. Pya and B. Saefken (2016), Smoothing parameter and
+model selection for general smooth models.
+Journal of the American Statistical Association 111, 1548-1575
+\doi{10.1080/01621459.2016.1180986}
+}
+
+
+\examples{
+library(mgcv)
+
+#######################################################
+## AFT model example for colon cancer survivial data...
+#######################################################
+
+library(survival) ## for data
+col1 <- colon[colon$etype==1,] ## concentrate on single event
+col1$differ <- as.factor(col1$differ)
+col1$sex <- as.factor(col1$sex)
+
+## set up the AFT response... 
+logt <- cbind(log(col1$time),log(col1$time))
+logt[col1$status==0,2] <- Inf ## right censoring
+col1$logt <- -logt ## -ve conventional for AFT versus Cox PH comparison
+
+## fit the model...
+b <- gam(logt~s(age,by=sex)+sex+s(nodes)+perfor+rx+obstruct+adhere,
+         family=cnorm(),data=col1)
+plot(b,pages=1)	 
+## ... compare this to ?cox.ph
+
+################################
+## A Tobit regression example...
+################################
+
+set.seed(3);n<-400
+dat <- gamSim(1,n=n)
+ys <- dat$y - 5 ## shift data down
+
+## truncate at zero, and set up response indicating this has happened...
+y <- cbind(ys,ys)
+y[ys<0,2] <- -Inf
+y[ys<0,1] <- 0
+dat$yt <- y
+b <- gam(yt~s(x0)+s(x1)+s(x2)+s(x3),family=cnorm,data=dat)
+plot(b,pages=1)
+
+##############################
+## A model for rounded data...
+##############################
+
+dat <- gamSim(1,n=n)
+y <- round(dat$y)
+y <- cbind(y-.5,y+.5) ## set up to indicate interval censoring
+dat$yi <- y
+b <- gam(yi~s(x0)+s(x1)+s(x2)+s(x3),family=cnorm,data=dat)
+plot(b,pages=1)
+
+}
+\keyword{models} \keyword{regression}%-- one or more ..
+
+
diff -Nru mgcv-1.8-40/man/coxph.Rd mgcv-1.8-41/man/coxph.Rd
--- mgcv-1.8-40/man/coxph.Rd	2022-02-01 10:18:29.000000000 +0000
+++ mgcv-1.8-41/man/coxph.Rd	2022-07-20 14:34:00.000000000 +0000
@@ -60,7 +60,7 @@
 \doi{10.1080/01621459.2016.1180986}
 }
 
-\seealso{\code{\link{cox.pht}}}
+\seealso{\code{\link{cox.pht}}, \code{\link{cnorm}}}
 
 \examples{
 library(mgcv)
diff -Nru mgcv-1.8-40/man/coxpht.Rd mgcv-1.8-41/man/coxpht.Rd
--- mgcv-1.8-40/man/coxpht.Rd	2022-02-01 10:18:29.000000000 +0000
+++ mgcv-1.8-41/man/coxpht.Rd	2022-08-22 06:55:50.000000000 +0000
@@ -6,7 +6,7 @@
 
 The trick is to generate an artificial Poisson observation for each subject in the risk set at each non-censored event time. The corresponding covariate values for each subject are whatever they are at the event time, while the Poisson response is zero for all subjects except those experiencing the event at that time (this corresponds to Peto's correction for ties). The linear predictor for the model must include an intercept for each event time (the cumulative sum of the exponential of these is the Breslow estimate of the baseline hazard). 
 
-Below is some example code employing this trick for the \code{\link[survival]{pbcseq}} data from the \code{survival} package. It uses \code{\link{bam}} for fitting with the \code{discrete=TRUE} option for efficiency: there is some approximation involved in doing this, and the exact equivalent to what is done in \code{\link{cox.ph}} is rather obtained by using \code{\link{gam}} with \code{method="REML"} (taking some 14 times the computational time for the example below).
+Below is some example code employing this trick for the \code{\link[survival]{pbcseq}} data from the \code{survival} package. It uses \code{\link{bam}} for fitting with the \code{discrete=TRUE} option for efficiency: there is some approximation involved in doing this, and the exact equivalent to what is done in \code{\link{cox.ph}} is rather obtained by using \code{\link{gam}} with \code{method="REML"} (taking many times the computational time for the example below). An alternative fits the model as a conditional logistic model using stratified Cox PH with event times as strata (see example). This would be identical in the unpenalized case, but smoothing parameter estimates can differ.
 
 The function \code{tdpois} in the example code uses crude piecewise constant interpolation for the covariates, in which the covariate value at an event time is taken to be whatever it was the previous time that it was measured. Obviously more sophisticated interpolation schemes might be preferable. 
 
@@ -73,6 +73,11 @@
 b <- bam(z ~ tf - 1 + sex + trt + s(sqrt(protime)) + s(platelet)+ s(age)+
 s(bili)+s(albumin), family=poisson,data=pb,discrete=TRUE,nthreads=2)
 
+pb$dumt <- rep(1,nrow(pb)) ## dummy time
+## Fit as conditional logistic... 
+b1 <- gam(cbind(dumt,tf) ~ sex + trt + s(sqrt(protime)) + s(platelet)
++ s(age) + s(bili) + s(albumin),family=cox.ph,data=pb,weights=z)
+
 par(mfrow=c(2,3))
 plot(b,scale=0)
 
diff -Nru mgcv-1.8-40/man/dpnorm.Rd mgcv-1.8-41/man/dpnorm.Rd
--- mgcv-1.8-40/man/dpnorm.Rd	1970-01-01 00:00:00.000000000 +0000
+++ mgcv-1.8-41/man/dpnorm.Rd	2022-07-20 15:17:58.000000000 +0000
@@ -0,0 +1,46 @@
+\name{dpnorm}
+\alias{dpnorm}
+%- Also NEED an `\alias' for EACH other topic documented here.
+\title{Stable evaluation of difference between normal c.d.f.s}
+\description{Evaluates the difference between two \eqn{N(0,1)}{N(0,1)} cumulative distribution functions avoiding cancellation error.  
+}
+\usage{
+dpnorm(x0,x1)
+}
+%- maybe also `usage' for other objects documented here.
+\arguments{
+ \item{x0}{vector of lower values at which to evaluate standard normal distribution function.}
+ \item{x1}{vector of upper values at which to evaluate standard normal distribution function.}
+}
+
+\details{ Equivalent to \code{pnorm(x1)-pnorm(x0)}, but stable when \code{x0} and \code{x1} values are very close, or in the upper tail of the standard normal.}
+ 
+
+\author{ Simon N. Wood \email{simon.wood@r-project.org}}
+
+\examples{
+require(mgcv)
+x <- seq(-10,10,length=10000)
+eps <- 1e-10
+y0 <- pnorm(x+eps)-pnorm(x) ## cancellation prone
+y1 <- dpnorm(x,x+eps)       ## stable
+## illustrate stable computation in black, and
+## cancellation prone in red...
+par(mfrow=c(1,2),mar=c(4,4,1,1))
+plot(log(y1),log(y0),type="l")
+lines(log(y1[x>0]),log(y0[x>0]),col=2)
+plot(x,log(y1),type="l")
+lines(x,log(y0),col=2)
+
+}
+
+\keyword{models} \keyword{smooth} \keyword{regression}%-- one or more ..
+
+
+
+
+
+
+
+
+
diff -Nru mgcv-1.8-40/man/factor.smooth.Rd mgcv-1.8-41/man/factor.smooth.Rd
--- mgcv-1.8-40/man/factor.smooth.Rd	1970-01-01 00:00:00.000000000 +0000
+++ mgcv-1.8-41/man/factor.smooth.Rd	2022-09-27 06:59:38.000000000 +0000
@@ -0,0 +1,59 @@
+\name{factor.smooth}
+\alias{factor.smooth.interaction}
+\alias{factor.smooth}
+%- Also NEED an `\alias' for EACH other topic documented here.
+\title{Factor smooth interactions in GAMs}
+
+\description{The interaction of one or more factors with a smooth effect, produces a separate smooth for each factor level. These smooths can have different smoothing parameters, or all have the same smoothing parameter. There are several vays to set them up.
+\describe{
+\item{Factor \code{by} variables.}{If the \code{by} variables for a smooth (specified using \code{\link{s}}, \code{\link{te}}, \code{\link{ti}} or \code{\link{t2}}) is a factor, then a separate smooth is produced for each factor level. If the factor is ordered, then no smooth is produced for its first level: this is useful for setting up models which have a reference level smooth and then difference to reference smooths for each factor level except the first (which is the reference). Giving the smooth an \code{id} forces the same smoothing parameter to be used for all levels of the factor. For example \code{s(x,by=fac,id=1)} would produce a separate smooth of \code{x} for each level of \code{fac}, with each smooth having the same smoothing parameter. See \link{gam.models} for more.}
+
+\item{Sum to zero smooth interactions}{\code{bs="sz"} These factor smooth interactions are specified using \code{s(...,bs="sz")}. There may be several factors supplied, and a smooth is produced for each combination of factor levels. The smooths are constructed to exclude the `main effect' smooth, or the effects of individual smooths produced for lower order combinations of factor levels. For example, with a single factor, the smooths for the different factor levels are so constrained that the sum over all factor levels of equivalent spline coefficients are all zero. This allows the meaningful and identifiable construction of models with a main effect smooth plus smooths for the difference between each factor level and the main effect. Such a construction is often more natural than the \code{by} variable with ordered factors construction. See \code{\link{smooth.construct.sz.smooth.spec}}.}
+
+\item{Random wiggly curves}{\code{bs="fs"} This approach produces a smooth curve for each level of a single factor, treating the curves as entirely random. This means that in principle a model can be constructed with a main effect plus factor level smooth deviations from that effect. However the model is not forced to make the main effect do as much of the work as possible, in the way that the \code{"sz"} approach does. This approach can be very efficient with \code{\link{gamm}} as it exploits the nested estimation available in \code{lme}. See \code{\link{smooth.construct.fs.smooth.spec}}.
+}
+}
+}
+\author{ Simon N. Wood \email{simon.wood@r-project.org} with input from Matteo Fasiolo.}
+
+\seealso{\code{\link{smooth.construct.fs.smooth.spec}}, \code{\link{smooth.construct.sz.smooth.spec}}}
+
+\examples{
+library(mgcv)
+set.seed(0)
+## simulate data...
+f0 <- function(x) 2 * sin(pi * x)
+f1 <- function(x,a=2,b=-1) exp(a * x)+b
+f2 <- function(x) 0.2 * x^11 * (10 * (1 - x))^6 + 10 * 
+            (10 * x)^3 * (1 - x)^10
+n <- 500;nf <- 25
+fac <- sample(1:nf,n,replace=TRUE)
+x0 <- runif(n);x1 <- runif(n);x2 <- runif(n)
+a <- rnorm(nf)*.2 + 2;b <- rnorm(nf)*.5
+f <- f0(x0) + f1(x1,a[fac],b[fac]) + f2(x2)
+fac <- factor(fac)
+y <- f + rnorm(n)*2
+## so response depends on global smooths of x0 and 
+## x2, and a smooth of x1 for each level of fac.
+
+## fit model...
+bm <- gamm(y~s(x0)+ s(x1,fac,bs="fs",k=5)+s(x2,k=20))
+plot(bm$gam,pages=1)
+summary(bm$gam)
+
+bd <- bam(y~s(x0)+ s(x1) + s(x1,fac,bs="sz",k=5)+s(x2,k=20),discrete=TRUE)
+plot(bd,pages=1)
+summary(bd)
+
+
+
+## Could also use...
+## b <- gam(y~s(x0)+ s(x1,fac,bs="fs",k=5)+s(x2,k=20),method="ML")
+## ... but its slower (increasingly so with increasing nf)
+## b <- gam(y~s(x0)+ t2(x1,fac,bs=c("tp","re"),k=5,full=TRUE)+
+##        s(x2,k=20),method="ML"))
+## ... is exactly equivalent. 
+}
+\keyword{models} \keyword{regression}%-- one or more ..
+
+
diff -Nru mgcv-1.8-40/man/family.mgcv.Rd mgcv-1.8-41/man/family.mgcv.Rd
--- mgcv-1.8-40/man/family.mgcv.Rd	2022-02-01 10:18:29.000000000 +0000
+++ mgcv-1.8-41/man/family.mgcv.Rd	2022-08-22 06:55:50.000000000 +0000
@@ -10,26 +10,27 @@
 The following families are in the exponential family given the value of a single parameter. They are usable with all modelling functions.
 \itemize{
 \item \code{\link{Tweedie}} An exponential family distribution for which the variance of the response is given by the mean response to the power \code{p}. 
-\code{p} is in (1,2) and must be supplied. Alternatively, see \code{\link{tw}} to estimate \code{p} (\code{gam} only).
-\item \code{\link{negbin}} The negative binomial. Alternatively see \code{\link{nb}} to estimate the \code{theta} parameter of the negative binomial (\code{gam} only). 
+\code{p} is in (1,2) and must be supplied. Alternatively, see \code{\link{tw}} to estimate \code{p} (\code{gam/bam} only).
+\item \code{\link{negbin}} The negative binomial. Alternatively see \code{\link{nb}} to estimate the \code{theta} parameter of the negative binomial (\code{gam/bam} only). 
 
 }
 
 
 The following families are for regression type models dependent on a single linear predictor, and with a log likelihood
-which is a sum of independent terms, each coprresponding to a single response observation. Usable with \code{\link{gam}}, with smoothing parameter estimation by \code{"REML"} or \code{"ML"} (the latter does not integrate the unpenalized and parameteric effects out of the marginal likelihood optimized for the smoothing parameters). Also usable with \code{\link{bam}}.
+which is a sum of independent terms, each corresponding to a single response observation. Usable with \code{\link{gam}}, with smoothing parameter estimation by \code{"REML"} or \code{"ML"} (the latter does not integrate the unpenalized and parameteric effects out of the marginal likelihood optimized for the smoothing parameters). Also usable with \code{\link{bam}}.
 \itemize{
-\item \code{\link{ocat}} for ordered categorical data.
-\item \code{\link{tw}} for Tweedie distributed data, when the power parameter relating the variance to the mean is to be estimated.
-\item \code{\link{nb}} for negative binomial data when the \code{theta} parameter is to be estimated.
 \item \code{\link{betar}} for proportions data on (0,1) when the binomial is not appropriate.
+\item \code{\link{cnorm}} censored normal distribution, for log normal accelerated failure time models, Tobit regression and rounded data, for example. 
+\item \code{\link{nb}} for negative binomial data when the \code{theta} parameter is to be estimated.
+\item \code{\link{ocat}} for ordered categorical data.
 \item \code{\link{scat}} scaled t for heavy tailed data that would otherwise be modelled as Gaussian.
+\item \code{\link{tw}} for Tweedie distributed data, when the power parameter relating the variance to the mean is to be estimated.
 \item \code{\link{ziP}} for zero inflated Poisson data, when the zero inflation rate depends simply on the Poisson mean.
 } %% end itemize 
 
-The following families implement more general model classes. Usable only with \code{\link{gam}} and only with REML smoothing parameter estimation.
+The following families implement more general model classes. Usable only with \code{\link{gam}} and only with REML or NCV smoothing parameter estimation.
 \itemize{
-\item \code{\link{cox.ph}} the Cox Proportional Hazards model for survival data.
+\item \code{\link{cox.ph}} the Cox Proportional Hazards model for survival data (no NCV).
 \item \code{\link{gammals}} a gamma location-scale model, where the mean and standared deviation are modelled with separate linear predictors.
 \item \code{\link{gaulss}} a Gaussian location-scale model where the mean and the standard deviation are both modelled using smooth linear predictors.
 \item \code{\link{gevlss}} a generalized extreme value (GEV) model where the location, scale and shape parameters are each modelled using a linear predictor.
@@ -37,7 +38,7 @@
 \item \code{\link{shash}} Sinh-arcsinh location scale and shape model family (4 linear predicors).
 \item \code{\link{ziplss}} a `two-stage' zero inflated Poisson model, in which 'potential-presence' is modelled with one linear predictor, and Poisson mean abundance
 given potential presence is modelled with a second linear predictor. 
-\item \code{\link{mvn}}: multivariate normal additive models.  
+\item \code{\link{mvn}}: multivariate normal additive models (no NCV). 
 \item \code{\link{multinom}}: multinomial logistic regression, for unordered categorical responses.  
  } %% end itemize 
 }
diff -Nru mgcv-1.8-40/man/formula.gam.Rd mgcv-1.8-41/man/formula.gam.Rd
--- mgcv-1.8-40/man/formula.gam.Rd	2022-02-01 10:18:29.000000000 +0000
+++ mgcv-1.8-41/man/formula.gam.Rd	2022-07-20 12:31:05.000000000 +0000
@@ -89,7 +89,7 @@
 be indexed, 1 to d where d is the number of linear predictors, and the indexing is in the order in which the 
 formulae appear in the list. It is possible to supply extra formulae specifying that several linear predictors 
 should share some terms. To do this a formula is supplied in which the response is replaced by numbers specifying the 
-indices of the linear predictors which will shre the terms specified on the r.h.s. For example \code{1+3~s(x)+z-1} specifies that linear predictors 1 and 3 will share the terms \code{s(x)} and \code{z} (but we don't want an extra intercept, as this would usually be unidentifiable). Note that it is possible that a linear predictor only includes shared terms: it must still have its own formula, but the r.h.s. would simply be \code{-1} (e.g. \code{y ~ -1} or \code{~ -1}).  
+indices of the linear predictors which will shre the terms specified on the r.h.s. For example \code{1+3~s(x)+z-1} specifies that linear predictors 1 and 3 will share the terms \code{s(x)} and \code{z} (but we don't want an extra intercept, as this would usually be unidentifiable). Note that it is possible that a linear predictor only includes shared terms: it must still have its own formula, but the r.h.s. would simply be \code{-1} (e.g. \code{y ~ -1} or \code{~ -1}). See \code{\link{multinom}} for an example. 
 
 }
 
diff -Nru mgcv-1.8-40/man/gam.control.Rd mgcv-1.8-41/man/gam.control.Rd
--- mgcv-1.8-40/man/gam.control.Rd	2022-02-01 10:18:29.000000000 +0000
+++ mgcv-1.8-41/man/gam.control.Rd	2022-07-20 12:31:05.000000000 +0000
@@ -10,7 +10,7 @@
 choise of fitting method, see \code{\link{gam}} arguments \code{method} and \code{optimizer}.
 }
 \usage{
-gam.control(nthreads=1,irls.reg=0.0,epsilon = 1e-07, maxit = 200,
+gam.control(nthreads=1,ncv.threads=1,irls.reg=0.0,epsilon = 1e-07, maxit = 200,
             mgcv.tol=1e-7,mgcv.half=15, trace = FALSE,
             rank.tol=.Machine$double.eps^0.5,nlm=list(),
 	    optim=list(),newton=list(),outerPIsteps=0,
@@ -23,6 +23,8 @@
 parallelization in the C code if your R installation supports openMP, and \code{nthreads} is set to 
 more than 1. Note that it is usually better to use the number of physical cores here, rather than the 
 number of hyper-threading cores.}
+\item{ncv.threads}{The computations for neighbourhood cross-validation (NCV) typically scale better than the rest of the GAM computations and are worth parallelizing. \code{ncv.threads} allows you to set the number of theads to use separately. 
+}
 \item{irls.reg}{For most models this should be 0. The iteratively re-weighted least squares method
 by which GAMs are fitted  can fail to converge in some circumstances. For example, data with many zeroes can cause 
 problems in a model with a log link, because a mean of zero corresponds to an infinite range of linear predictor 
diff -Nru mgcv-1.8-40/man/gam.fit3.Rd mgcv-1.8-41/man/gam.fit3.Rd
--- mgcv-1.8-40/man/gam.fit3.Rd	2022-02-01 10:18:29.000000000 +0000
+++ mgcv-1.8-41/man/gam.fit3.Rd	2022-09-17 17:09:21.000000000 +0000
@@ -1,7 +1,7 @@
 \name{gam.fit3}
 \alias{gam.fit3}
 %- Also NEED an `\alias' for EACH other topic documented here.
-\title{P-IRLS GAM estimation with GCV \& UBRE/AIC or RE/ML derivative calculation}
+\title{P-IRLS GAM estimation with GCV, UBRE/AIC or RE/ML derivative calculation}
 \description{Estimation of GAM smoothing parameters is most stable if
 optimization of the UBRE/AIC, GCV, GACV, REML or ML score is outer to the penalized iteratively
 re-weighted least squares scheme used to estimate the model given smoothing 
@@ -27,7 +27,7 @@
          Mp = -1, family = gaussian(), control = gam.control(), 
          intercept = TRUE,deriv=2,gamma=1,scale=1,
          printWarn=TRUE,scoreType="REML",null.coef=rep(0,ncol(x)),
-         pearson.extra=0,dev.extra=0,n.true=-1,Sl=NULL,...)
+         pearson.extra=0,dev.extra=0,n.true=-1,Sl=NULL,nei=NULL,...)
 }
 %- maybe also `usage' for other objects documented here.
 \arguments{ 
@@ -92,6 +92,8 @@
 
 \item{Sl}{A smooth list suitable for passing to gam.fit5. }
 
+\item{nei}{List specifying neighbourhood structure if NCV used. See \code{\link{gam}}.}
+
 \item{...}{Other arguments: ignored.}
 }
 \details{ This routine is basically \code{\link{glm.fit}} with some
diff -Nru mgcv-1.8-40/man/gam.fit5.post.proc.Rd mgcv-1.8-41/man/gam.fit5.post.proc.Rd
--- mgcv-1.8-40/man/gam.fit5.post.proc.Rd	2022-02-01 10:18:29.000000000 +0000
+++ mgcv-1.8-41/man/gam.fit5.post.proc.Rd	2022-07-20 12:31:05.000000000 +0000
@@ -1,10 +1,9 @@
-% Generated by roxygen2: do not edit by hand
-% Please edit documentation in R/mgcvExports.R
+
 \name{gam.fit5.post.proc}
 \alias{gam.fit5.post.proc}
 \title{Post-processing output of gam.fit5}
 \usage{
-gam.fit5.post.proc(object, Sl, L, lsp0, S, off)
+gam.fit5.post.proc(object, Sl, L, lsp0, S, off, gamma)
 }
 \arguments{
 \item{object}{output of \code{gam.fit5}.}
@@ -18,6 +17,8 @@
 \item{S}{penalty matrix.}
 
 \item{off}{vector of offsets.}
+
+\item{gamma}{parameter for increasing model smoothness in fitting.}
 }
 \value{
 A list containing: \itemize{
diff -Nru mgcv-1.8-40/man/gamlss.gH.Rd mgcv-1.8-41/man/gamlss.gH.Rd
--- mgcv-1.8-40/man/gamlss.gH.Rd	2022-02-01 10:18:29.000000000 +0000
+++ mgcv-1.8-41/man/gamlss.gH.Rd	2022-07-20 12:31:05.000000000 +0000
@@ -5,7 +5,7 @@
 \title{Calculating derivatives of log-likelihood wrt regression coefficients}
 \usage{
 gamlss.gH(X, jj, l1, l2, i2, l3 = 0, i3 = 0, l4 = 0, i4 = 0, d1b = 0,
-  d2b = 0, deriv = 0, fh = NULL, D = NULL)
+  d2b = 0, deriv = 0, fh = NULL, D = NULL,sandwich=FALSE)
 }
 \arguments{
 \item{X}{matrix containing the model matrices of all the linear predictors.}
@@ -42,6 +42,8 @@
 
 \item{D}{diagonal matrix, used to provide some scaling.}
 
+\item{sandwich}{set to \code{TRUE} to return sandwich estimator 'filling', as opposed to the Hessian, in \code{l2}.}
+
 }
 \value{
 A list containing \code{lb} - the grad vector w.r.t. coefs; \code{lbb} - the Hessian matrix w.r.t. coefs;
diff -Nru mgcv-1.8-40/man/gam.outer.Rd mgcv-1.8-41/man/gam.outer.Rd
--- mgcv-1.8-40/man/gam.outer.Rd	2022-02-01 10:18:29.000000000 +0000
+++ mgcv-1.8-41/man/gam.outer.Rd	2022-07-20 12:31:05.000000000 +0000
@@ -21,7 +21,7 @@
 }
 \usage{
 gam.outer(lsp,fscale,family,control,method,optimizer,
-          criterion,scale,gamma,G,start=NULL,...)
+          criterion,scale,gamma,G,start=NULL,nei=NULL,...)
 }
 %- maybe also `usage' for other objects documented here.
 \arguments{ 
@@ -48,6 +48,7 @@
 \item{G}{List produced by \code{mgcv:::gam.setup}, containing most of what's
 needed to actually fit a GAM.}
 \item{start}{starting parameter values.}
+\item{nei}{List specifying neighbourhood structure if NCV used. See \code{\link{gam}}.}
 \item{...}{other arguments, typically for passing on to \code{gam.fit3} (ultimately).}
 }
 \details{ 
diff -Nru mgcv-1.8-40/man/gam.Rd mgcv-1.8-41/man/gam.Rd
--- mgcv-1.8-40/man/gam.Rd	2022-02-01 10:18:29.000000000 +0000
+++ mgcv-1.8-41/man/gam.Rd	2022-10-03 07:29:40.000000000 +0000
@@ -13,7 +13,7 @@
 available for any quantity predicted using a fitted model.
 
 Smooth terms are represented using penalized regression splines (or similar smoothers)
-with smoothing parameters selected by GCV/UBRE/AIC/REML or by regression splines with
+with smoothing parameters selected by GCV/UBRE/AIC/REML/NCV or by regression splines with
 fixed degrees of freedom (mixtures of the two are permitted). Multi-dimensional smooths are 
 available using penalized thin plate regression splines (isotropic) or tensor product splines 
 (when an isotropic smooth is inappropriate), and users can add smooths. 
@@ -32,7 +32,7 @@
     optimizer=c("outer","newton"),control=list(),scale=0,
     select=FALSE,knots=NULL,sp=NULL,min.sp=NULL,H=NULL,gamma=1,
     fit=TRUE,paraPen=NULL,G=NULL,in.out,drop.unused.levels=TRUE,
-    drop.intercept=NULL,discrete=FALSE,...)
+    drop.intercept=NULL,nei=NULL,discrete=FALSE,...)
 }
 %- maybe also `usage' for other objects documented here.
 
@@ -77,10 +77,11 @@
 \code{\link{gam.control}}. Values not set assume default values. }
 
 \item{method}{The smoothing parameter estimation method. \code{"GCV.Cp"} to use GCV for unknown scale parameter and
-Mallows' Cp/UBRE/AIC for known scale. \code{"GACV.Cp"} is equivalent, but using GACV in place of GCV. \code{"REML"} 
+Mallows' Cp/UBRE/AIC for known scale. \code{"GACV.Cp"} is equivalent, but using GACV in place of GCV. \code{"NCV"}
+for neighbourhood cross-validation using the neighbourhood structure speficied be \code{nei}. \code{"REML"} 
 for REML estimation, including of unknown scale, \code{"P-REML"} for REML estimation, but using a Pearson estimate 
 of the scale. \code{"ML"} and \code{"P-ML"} are similar, but using maximum likelihood in place of REML. Beyond the 
-exponential family \code{"REML"} is the default, and the only other option is \code{"ML"}.}
+exponential family \code{"REML"} is the default, and the only other options are \code{"ML"} or \code{"NCV"}.}
 
 \item{optimizer}{An array specifying the numerical optimization method to use to optimize the smoothing 
 parameter estimation criterion (given by \code{method}). \code{"perf"} (deprecated) for performance iteration. \code{"outer"} 
@@ -153,6 +154,9 @@
 \item{drop.intercept}{Set to \code{TRUE} to force the model to really not have the a constant in the parametric model part,
 even with factor variables present. Can be vector when \code{formula} is a list.}
 
+\item{nei}{A list specifying the neighbourhood structure for NCV. \code{k} is the vector of indices to be dropped
+for each neighbourhood and \code{m} gives the end of each neighbourhood. So \code{nei$k[(nei$m[j-1]+1):nei$m[j]]} gives the points dropped for the neighbourhood \code{j}. \code{i} is the vector of indices of points to predict, with corresponding endpoints \code{mi}. So \code{nei$i[(nei$mi[j-1]+1):nei$mi[j]]} indexes the points to predict for neighbourhood j. If \code{nei==NULL} (or \code{k} or \code{m} are missing) then leave-one-out cross validation is obtained.}
+
 \item{discrete}{experimental option for setting up models for use with discrete methods employed in \code{\link{bam}}. Do not modify.}
 
 \item{...}{further arguments for 
@@ -205,7 +209,7 @@
 \eqn{DoF}{DoF} the effective degrees of freedom of the model. Notice that UBRE is effectively
 just AIC rescaled, but is only used when \eqn{s}{s} is known. 
 
-Alternatives are GACV, or a Laplace approximation to REML. There
+Alternatives are GACV, NCV or a Laplace approximation to REML. There
 is some evidence that the latter may actually be the most effective choice. 
 The main computational challenge solved 
 by the \code{mgcv} package is to optimize the smoothness selection criteria efficiently and reliably. 
diff -Nru mgcv-1.8-40/man/multinom.Rd mgcv-1.8-41/man/multinom.Rd
--- mgcv-1.8-40/man/multinom.Rd	2022-02-01 10:18:29.000000000 +0000
+++ mgcv-1.8-41/man/multinom.Rd	2022-07-20 12:31:05.000000000 +0000
@@ -72,6 +72,11 @@
 pc <- apply(pp,1,function(x) which(max(x)==x)[1])-1
 plot(gr,col=pc+3,pch=19)
 
+## example sharing a smoother between linear predictors
+## ?formula.gam gives more details.
+b <- gam(list(y~s(x1),~s(x1),1+2~s(x2)-1),family=multinom(K=2))
+plot(b,pages=1)
+
 }
 \keyword{models} \keyword{regression}%-- one or more ..
 
diff -Nru mgcv-1.8-40/man/plot.gam.Rd mgcv-1.8-41/man/plot.gam.Rd
--- mgcv-1.8-40/man/plot.gam.Rd	2022-02-01 10:18:29.000000000 +0000
+++ mgcv-1.8-41/man/plot.gam.Rd	2022-10-19 10:27:22.000000000 +0000
@@ -84,7 +84,7 @@
 
 \item{all.terms}{if set to \code{TRUE} then the partial effects of parametric
 model components are also plotted, via a call to \code{\link{termplot}}. Only
-terms of order 1 can be plotted in this way.}
+terms of order 1 can be plotted in this way. Also see warnings.}
 
 \item{shade}{Set to \code{TRUE} to produce shaded regions as confidence bands
 for smooths (not avaliable for parametric terms, which are plotted using \code{termplot}).}
@@ -207,7 +207,7 @@
 
 Plots of 2-D smooths with standard error contours shown can not easily be customized.
 
-The function can not deal with smooths of more than 2 variables!
+\code{all.terms} uses \code{\link{termplot}} which looks for the original data in the environment of the fitted model object formula. Since \code{gam} resets this environment to avoid large saved model objects containing data in hidden environments, this can fail. 
 } 
 
 \seealso{  \code{\link{gam}}, \code{\link{predict.gam}}, \code{\link{vis.gam}}}
diff -Nru mgcv-1.8-40/man/predict.bam.Rd mgcv-1.8-41/man/predict.bam.Rd
--- mgcv-1.8-40/man/predict.bam.Rd	2022-02-01 10:18:29.000000000 +0000
+++ mgcv-1.8-41/man/predict.bam.Rd	2022-07-06 11:02:55.000000000 +0000
@@ -2,8 +2,9 @@
 \alias{predict.bam}
 %- Also NEED an `\alias' for EACH other topic documented here.
 \title{Prediction from fitted Big Additive Model model}
-\description{ Essentially a wrapper for \code{\link{predict.gam}} for prediction from a 
-model fitted by \code{\link{bam}}. Can compute on a parallel cluster. 
+\description{ In most cases essentially a wrapper for \code{\link{predict.gam}} for prediction from a 
+model fitted by \code{\link{bam}}. Can compute on a parallel cluster. For models fitted using discrete
+methods with \code{discrete=TRUE} then discrete prediction methods are used instead. 
 
 Takes a fitted \code{bam} object produced by \code{\link{bam}} 
 and produces predictions given a new set of values for the model covariates 
@@ -50,11 +51,11 @@
 
 \item{se.fit}{ when this is TRUE (not default) standard error estimates are returned for each prediction.}
 
-\item{terms}{if \code{type=="terms"} or \code{type="iterms"} then only results for the terms (smooth or parametric) named in this array will be returned. Otherwise any smooth terms not named in this array will be set to zero. If \code{NULL} then all terms are included.}
+\item{terms}{if \code{type=="terms"} or \code{type="iterms"} then only results for the terms (smooth or parametric) named in this array will be returned. Otherwise any terms not named in this array will be set to zero. If \code{NULL} then all terms are included. \code{"(Intercept)"} is the intercept term.}
 
 \item{exclude}{if \code{type=="terms"} or \code{type="iterms"} then terms (smooth or parametric) named in this array
-will not be returned. Otherwise any smooth terms named in this array will be set to zero. 
-If \code{NULL} then no terms are excluded. To avoid supplying covariate values for excluded terms, set \code{newdata.guaranteed=TRUE},
+will not be returned. Otherwise any terms named in this array will be set to zero. 
+If \code{NULL} then no terms are excluded. To avoid supplying covariate values for excluded smooth terms, set \code{newdata.guaranteed=TRUE},
 but note that this skips all checks of \code{newdata}. }
 
 
diff -Nru mgcv-1.8-40/man/predict.gam.Rd mgcv-1.8-41/man/predict.gam.Rd
--- mgcv-1.8-40/man/predict.gam.Rd	2022-02-01 10:18:29.000000000 +0000
+++ mgcv-1.8-41/man/predict.gam.Rd	2022-08-22 06:55:50.000000000 +0000
@@ -49,12 +49,12 @@
 \item{se.fit}{ when this is TRUE (not default) standard error estimates are returned for each prediction.}
 
 \item{terms}{if \code{type=="terms"} or \code{type="iterms"} then only results for the terms (smooth or parametric) named in this array
-will be returned. Otherwise any smooth terms not named in this array will be set to zero. If \code{NULL} then all terms are included.}
+will be returned. Otherwise any terms not named in this array will be set to zero. If \code{NULL} then all terms are included.}
 
-\item{exclude}{if \code{type=="terms"} or \code{type="iterms"} then terms (smooth or parametric) named in this array will not be returned. Otherwise any smooth terms named in this array will be set to zero. 
+\item{exclude}{if \code{type=="terms"} or \code{type="iterms"} then terms (smooth or parametric) named in this array will not be returned. Otherwise any terms named in this array will be set to zero. 
 If \code{NULL} then no terms are excluded. Note that this is the term names as it appears in the model summary, see example.
-You can avoid providing the covariates for the excluded terms by setting \code{newdata.guaranteed=TRUE}, which will avoid all
-checks on \code{newdata}.}
+You can avoid providing the covariates for excluded smooth terms by setting \code{newdata.guaranteed=TRUE}, which will avoid all
+checks on \code{newdata} (covariates for parametric terms can not be skipped).}
 
 \item{block.size}{maximum number of predictions to process per call to underlying
 code: larger is quicker, but more memory intensive. Set to < 1 to use total number
@@ -150,11 +150,11 @@
 
 \examples{
 library(mgcv)
-n<-200
+n <- 200
 sig <- 2
 dat <- gamSim(1,n=n,scale=sig)
 
-b<-gam(y~s(x0)+s(I(x1^2))+s(x2)+offset(x3),data=dat)
+b <- gam(y~s(x0)+s(I(x1^2))+s(x2)+offset(x3),data=dat)
 
 newd <- data.frame(x0=(0:30)/30,x1=(0:30)/30,x2=(0:30)/30,x3=(0:30)/30)
 pred <- predict.gam(b,newd)
@@ -163,6 +163,14 @@
 newd1 <- newd;newd1$x0 <- NULL ## remove x0 from `newd1'
 pred1 <- predict(b,newd1,exclude="s(x0)",newdata.guaranteed=TRUE)
 
+## custom perspective plot...
+
+m1 <- 20;m2 <- 30; n <- m1*m2
+x1 <- seq(.2,.8,length=m1);x2 <- seq(.2,.8,length=m2) ## marginal grid points
+df <- data.frame(x0=rep(.5,n),x1=rep(x1,m2),x2=rep(x2,each=m1),x3=rep(0,n))
+pf <- predict(b,newdata=df,type="terms")
+persp(x1,x2,matrix(pf[,2]+pf[,3],m1,m2),theta=-130,col="blue",zlab="")
+
 #############################################
 ## difference between "terms" and "iterms"
 #############################################
diff -Nru mgcv-1.8-40/man/shash.Rd mgcv-1.8-41/man/shash.Rd
--- mgcv-1.8-40/man/shash.Rd	2022-02-01 10:18:29.000000000 +0000
+++ mgcv-1.8-41/man/shash.Rd	2022-07-06 11:02:55.000000000 +0000
@@ -22,16 +22,18 @@
 }
 \details{
 The density function of the shash family is 
-         \deqn{p(y|\mu,\sigma,\epsilon,\delta)=C(z) exp{-S(z)^2/2} / \sigma{2\pi(1+z^2)}^1/2, }
-         where \eqn{ C(z)={1+S(z)^2}^1/2 }, \eqn{ S(z)=sinh{\delta sinh^(-1)(z)-\epsilon} } and
-         \eqn{z=(y-\mu)/(\sigma\delta)}. Here \eqn{\mu} and \eqn{\sigma > 0} control, respectively, location and 
+         \deqn{p(y|\mu,\sigma,\epsilon,\delta)= C(z) \exp\{-S(z)^2/2\} \{2\pi(1+z^2)\}^{-1/2}/\sigma,}{p(y|\mu,\sigma,\epsilon,\delta)=C(z)exp{-S(z)^2/2}{2\pi(1+z^2)}^-0.5/\sigma}
+         where \eqn{ C(z)=\{1+S(z)^2\}^{1/2} }{C(z)={1+S(z)^2}^0.5}, \eqn{ S(z)=\sinh\{\delta \sinh^{-1}(z)-\epsilon\} }{ S(z)=sinh{\delta asinh(z)-\epsilon} } and
+         \eqn{z = (y - \mu)/(\sigma \delta)}. Here \eqn{\mu} and \eqn{\sigma > 0} control, respectively, location and 
          scale, \eqn{\epsilon} determines skewness, while \eqn{\delta > 0} controls tailweight.
          \code{shash} can model skewness to either side, depending on the sign of \eqn{\epsilon}. 
          Also, shash can have tails that are lighter (\eqn{\delta>1}) or heavier (\eqn{0<\delta<1}) that a normal.
-         For fitting purposes, here we are using \eqn{\tau = log(\sigma)} and \eqn{\phi = log(\delta)}.
+         For fitting purposes, here we are using \eqn{\tau = \log(\sigma)}{\tau = log(\sigma)} and \eqn{\phi = \log(\delta)}{\phi = log(\delta)}.
+
+The density is based on the expression given on the second line of section 4.1 and equation (2) of Jones and Pewsey (2009), and uses the simple reparameterization given in section 4.3. 
          
-The link function used for \eqn{\tau} is logeb with is \eqn{\eta = log{exp(\tau)-b}} so that the inverse link is
-\eqn{\tau = log(\sigma) = log{exp(\eta)+b}}. The point is that we are don't allow \eqn{\sigma} to become smaller
+The link function used for \eqn{\tau} is logeb with is \eqn{\eta = \log \{\exp(\tau)-b\}}{\eta = log{exp(\tau)-b}} so that the inverse link is
+\eqn{\tau = \log(\sigma) = \log\{\exp(\eta)+b\}}{\tau = log(\sigma) = log{exp(\eta)+b}}. The point is that we are don't allow \eqn{\sigma} to become smaller
 than a small constant b. The likelihood includes a ridge penalty \eqn{- phiPen * \phi^2}, which shrinks \eqn{\phi} toward zero. When sufficient data is available the ridge penalty does not change the fit much, but it is useful to include it when fitting the model to small data sets, to avoid \eqn{\phi} diverging to +infinity (a problem already identified by Jones and Pewsey (2009)). 
 }
 \examples{
@@ -125,7 +127,8 @@
 }
 
 \references{
-Jones, M. and A. Pewsey (2009). Sinh-arcsinh distributions. Biometrika 96 (4), 761-780.
+Jones, M. and A. Pewsey (2009). Sinh-arcsinh distributions. Biometrika 96 (4), 761-780. \doi{10.1093/biomet/asp053}
+
 Wood, S.N., N. Pya and B. Saefken (2016), Smoothing parameter and
 model selection for general smooth models.
 Journal of the American Statistical Association 111, 1548-1575
diff -Nru mgcv-1.8-40/man/smooth.construct.fs.smooth.spec.Rd mgcv-1.8-41/man/smooth.construct.fs.smooth.spec.Rd
--- mgcv-1.8-40/man/smooth.construct.fs.smooth.spec.Rd	2022-02-01 10:18:29.000000000 +0000
+++ mgcv-1.8-41/man/smooth.construct.fs.smooth.spec.Rd	2022-09-26 09:10:45.000000000 +0000
@@ -1,7 +1,7 @@
 \name{smooth.construct.fs.smooth.spec}
 \alias{smooth.construct.fs.smooth.spec}
 \alias{Predict.matrix.fs.interaction}
-\alias{factor.smooth.interaction}
+
 %- Also NEED an `\alias' for EACH other topic documented here.
 \title{Factor smooth interactions in GAMs}
 
@@ -9,8 +9,7 @@
 This smooth class allows a separate smooth for each level of a factor, with the same smoothing parameter for all 
 smooths. It is an alternative to using factor \code{by} variables.
 
-See the discussion of \code{by} variables in \code{\link{gam.models}} for more general alternatives 
-for factor smooth interactions (including interactions of tensor product smooths with factors).
+See \code{\link{factor.smooth}} for more genral alternatives for factor smooth interactions (including interactions of tensor product smooths with factors).
 }
 
 \usage{
@@ -66,7 +65,7 @@
 
 \author{ Simon N. Wood \email{simon.wood@r-project.org} with input from Matteo Fasiolo.}
 
-\seealso{\code{\link{gam.models}}, \code{\link{gamm}}}
+\seealso{\code{\link{factor.smooth}}, \code{\link{gamm}}, \code{\link{smooth.construct.sz.smooth.spec}}}
 
 \examples{
 library(mgcv)
@@ -86,12 +85,16 @@
 ## so response depends on global smooths of x0 and 
 ## x2, and a smooth of x1 for each level of fac.
 
-## fit model (note p-values not available when fit 
-## using gamm)...
+## fit model...
 bm <- gamm(y~s(x0)+ s(x1,fac,bs="fs",k=5)+s(x2,k=20))
 plot(bm$gam,pages=1)
 summary(bm$gam)
 
+## Also efficient using bam(..., discrete=TRUE)
+bd <- bam(y~s(x0)+ s(x1,fac,bs="fs",k=5)+s(x2,k=20),discrete=TRUE)
+plot(bd,pages=1)
+summary(bd)
+
 ## Could also use...
 ## b <- gam(y~s(x0)+ s(x1,fac,bs="fs",k=5)+s(x2,k=20),method="ML")
 ## ... but its slower (increasingly so with increasing nf)
diff -Nru mgcv-1.8-40/man/smooth.construct.sz.smooth.spec.Rd mgcv-1.8-41/man/smooth.construct.sz.smooth.spec.Rd
--- mgcv-1.8-40/man/smooth.construct.sz.smooth.spec.Rd	1970-01-01 00:00:00.000000000 +0000
+++ mgcv-1.8-41/man/smooth.construct.sz.smooth.spec.Rd	2022-09-27 06:56:41.000000000 +0000
@@ -0,0 +1,88 @@
+\name{smooth.construct.sz.smooth.spec}
+\alias{smooth.construct.sz.smooth.spec}
+\alias{Predict.matrix.sz.interaction}
+
+%- Also NEED an `\alias' for EACH other topic documented here.
+\title{Constrained factor smooth interactions in GAMs}
+
+\description{Factor smooth interactions constructed to exclude main effects (and lower order factor smooth interactions). A smooth is constucted for each combination of the supplied factor levels. By appropriate application of sum to zero contrasts to equivalent smooth coefficients across factor levels, the required exclusion of lower order effects is achieved.   
+
+
+See \link{factor.smooth} for alternative factor smooth interactions.
+}
+
+\usage{
+\method{smooth.construct}{sz.smooth.spec}(object, data, knots)
+\method{Predict.matrix}{sz.interaction}(object, data)
+}
+
+\arguments{ 
+\item{object}{For the \code{smooth.construct} method a smooth specification object, 
+usually generated by a term \code{s(x,...,bs="sz",)}. For the \code{predict.Matrix} method 
+an object of class \code{"sz.interaction"} produced by the \code{smooth.construct} method.}
+
+\item{data}{a list containing just the data (including any \code{by} variable) required by this term, 
+            with names corresponding to \code{object$term}.} 
+
+\item{knots}{ a list containing any knots supplied for smooth basis setup.}
+
+}
+
+\value{ An object of class \code{"sz.interaction"} or a matrix mapping the coefficients of the factor smooth interaction to the smooths themselves.
+}
+
+\details{This class produces a smooth for each combination of the levels of the supplied factor variables. \code{s(fac,x,bs="sz")} produces a smooth of \code{x} for each level of \code{fac}, for example. The smooths are constrained to represent deviations from the main effect smooth, so that models such as
+\deqn{g(\mu_i) = f(x_i) + f_{k(i)}(x_i)}{g(mu_i) = f(x_i) + f_k(i)(x_i)}
+can be estimated in an identifiable manner, where \eqn{k(i)}{k(i)} indicates the level of some factor that applies for the ith observation. Identifiability in this case is ensured by constraining the coefficients of the splines representing the \eqn{f_{k}}{k}. In particular if \eqn{\beta_{ki}}{beta_ki} is the ith coefficient of \eqn{f_k}{f_k} then the constraints are \eqn{\sum_k \beta_{ki} = 0}{\sum_k beta_ki = 0}.
+
+Such sum to zero constraints are implemented using sum to zero contrasts: identity matrices with an extra row of -1s appended. Consider the case of a single factor first. The model matrix corresponding to a smooth per factor level is the row tensor product (see \code{\link{tensor.prod.model.matrix}}) of the model matrix for the factor, and the model matrix for the smooth. The contrast matrix is then the Kronecker product of the sum to zero contrast for the factor, and an identity matrix of dimension determined by the number of coefficients of the smooth.
+
+If there are multiple factors then the overall model matrix is the row Kronecker product of all the factor model matrices and the smooth, while the contrast is the Kronecker product of all the sum-to-zero contrasts for the factors and a final identity matrix. Notice that this construction means that the main effects (and any interactions) of the factors are included in the factor level dependent smooths. In other words the individual smooths are not each centered. This means that adding main effects or interactions of the factors will lead to a rank deficient model. 
+
+The terms can have a smoothing parameter per smooth, or a single smoothing parameter for all the smooths. The latter is specified by giving the smooth term an \code{id}. e.g. \code{s(fac,x,bs="sz",id=1)}.
+
+The basis for the smooths can be selected by supplying a list as the \code{xt} argument to \code{\link{s}}, with a \code{bs} item. e.g. \code{s(fac,x,xt=list(bs="cr"))} selectes the \code{"cr"} basis. The default is \code{"tp"}
+
+The plot method for this class has two schemes. \code{scheme==0} is in colour, while \code{scheme==1} is black and white. Currently it only works for 1D smooths.
+}
+
+
+\author{ Simon N. Wood \email{simon.wood@r-project.org} with input from Matteo Fasiolo.}
+
+\seealso{\code{\link{gam.models}}, \code{\link{gamm}}, \link{factor.smooth}}
+
+\examples{
+library(mgcv)
+set.seed(0)
+dat <- gamSim(4)
+
+b <- gam(y ~ s(x2)+s(fac,x2,bs="sz")+s(x0),data=dat,method="REML")
+plot(b,pages=1)
+summary(b)
+
+## Example involving 2 factors
+
+f1 <- function(x2) 2 * sin(pi * x2)
+f2 <- function(x2) exp(2 * x2) - 3.75887
+f3 <- function(x2) 0.2 * x2^11 * (10 * (1 - x2))^6 + 10 * (10 * x2)^3 * 
+            (1 - x2)^10
+
+n <- 600
+x <- runif(n)
+f1 <- factor(sample(c("a","b","c"),n,replace=TRUE))
+f2 <- factor(sample(c("foo","bar"),n,replace=TRUE))
+
+mu <- f3(x)
+for (i in 1:3) mu <- mu + exp(2*(2-i)*x)*(f1==levels(f1)[i])
+for (i in 1:2) mu <- mu + 10*i*x*(1-x)*(f2==levels(f2)[i])
+y <- mu + rnorm(n)
+dat <- data.frame(y=y,x=x,f1=f1,f2=f2)
+b <- gam(y ~ s(x)+s(f1,x,bs="sz")+s(f2,x,bs="sz")+s(f1,f2,x,bs="sz",id=1),data=dat,method="REML")
+plot(b,pages=1,scale=0)
+
+
+
+}
+\keyword{models} \keyword{regression}%-- one or more ..
+
+
diff -Nru mgcv-1.8-40/man/smooth.terms.Rd mgcv-1.8-41/man/smooth.terms.Rd
--- mgcv-1.8-40/man/smooth.terms.Rd	2022-02-01 10:18:29.000000000 +0000
+++ mgcv-1.8-41/man/smooth.terms.Rd	2022-09-26 09:10:45.000000000 +0000
@@ -1,5 +1,6 @@
 \name{smooth.terms}
 \alias{smooth.terms}
+\alias{smooths}
 \title{Smooth terms in GAM}
 \description{
 Smooth terms are specified in a \code{\link{gam}} formula using \code{\link{s}}, \code{\link{te}}, \code{\link{ti}} 
@@ -50,6 +51,11 @@
 the analogue of thin plate splines for the sphere. Useful for data sampled over a large portion of the globe, 
 when isotropy is appropriate. See \code{\link{Spherical.Spline}} for details.}
 
+\item{B-splines}{\code{bs="bs"}}{
+B-spline basis with integrated squared derivative penalties. The order of basis and penalty can be chosen separately, and several penalties of different orders can be applied. Somewhat like a derivative penalty version of P-splines. See \link{b.spline} for details.
+
+}
+
 \item{P-splines}{\code{bs="ps"}. 
 These are P-splines as proposed by Eilers and Marx (1996). They combine a B-spline basis, with a discrete penalty
 on the basis coefficients, and any sane combination of penalty and basis order is allowed. Although this penalty has no exact interpretation in terms of function shape, in the way that the derivative penalties do, P-splines perform almost as well as conventional splines in many standard applications, and can perform better in particular cases where it is advantageous to mix different orders of basis and penalty.
@@ -99,13 +105,14 @@
 achieved by splitting the penalty into several `basis penalties' these terms are not suitable as components of tensor 
 product smooths, and are not supported by \code{gamm}.}
 
-\item{Factor smooth interactions}{\code{bs="fs"}
-Smooth factor interactions are often produced using \code{by} variables (see \code{\link{gam.models}}), but a special smoother
-class (see \code{\link{factor.smooth.interaction}}) is available for the case in which a smooth is required at each of a large number of factor levels (for example a smooth for each patient in a study), and each smooth should have the same smoothing parameter. The \code{"fs"} smoothers are set up to be efficient when used with \code{\link{gamm}}, and have penalties on each null sapce component (i.e. they are fully `random effects'). 
-}
-}
+\item{Factor smooth interactions}{\code{bs="sz"}
+Smooth factor interactions (see \link{factor.smooth}) are often produced using \code{by} variables (see \code{\link{gam.models}}), but it is often desirable to include smooths which represent the deviations from some main effect smooth that apply for each level of a factor (or combination of factors).
+See \code{\link{smooth.construct.sz.smooth.spec}} for details.}
 
+\item{Random factor smooth interactions}{\code{bs="fs"}
+A special smoother class (see \code{\link{smooth.construct.fs.smooth.spec}}) is available for the case in which a smooth is required at each of a large number of factor levels (for example a smooth for each patient in a study), and each smooth should have the same smoothing parameter. The \code{"fs"} smoothers are set up to be efficient when used with \code{\link{gamm}}, and have penalties on each null sapce component (i.e. they are fully `random effects'). 
 }
+}}
 
 \seealso{\code{\link{s}}, \code{\link{te}}, \code{\link{t2}} \code{\link{tprs}},\code{\link{Duchon.spline}}, 
 \code{\link{cubic.regression.spline}},\code{\link{p.spline}}, \code{\link{mrf}}, \code{\link{soap}}, 
diff -Nru mgcv-1.8-40/man/trind.generator.Rd mgcv-1.8-41/man/trind.generator.Rd
--- mgcv-1.8-40/man/trind.generator.Rd	2022-02-01 10:18:29.000000000 +0000
+++ mgcv-1.8-41/man/trind.generator.Rd	2022-10-21 11:49:45.000000000 +0000
@@ -2,14 +2,18 @@
 \alias{trind.generator}
 \title{Generates index arrays for upper triangular storage}
 \usage{
-trind.generator(K = 2)
+trind.generator(K = 2, ifunc=FALSE, reverse= !ifunc)
 }
 \arguments{
 \item{K}{positive integer determining the size of the array.}
+\item{ifunc}{if \code{TRUE} index functions are returned in place of index arrays.}
+\item{reverse}{should the reverse indices be computed? Probably not if \code{ifunc==TRUE}.}
 }
 \value{
 A list where the entries \code{i1} to \code{i4} are arrays in up to four dimensions, 
-        containing K indexes along each dimension.
+        containing K indexes along each dimension. If \code{ifunc==TRUE} index functions
+	are returned in place of index arrays. If \code{reverse==TRUE} reverse indices
+	\code{i1r} to \code{i4r} are returned (always as arrays).
 }
 \description{
 Generates index arrays for upper triangular storage up to order four. Useful when
@@ -24,11 +28,17 @@
           Clearly in storage we have the restriction l>=k>=j>=i, but for access we 
           want no restriction on the indices. \code{i4[i,j,k,l]} produces the 
           appropriate \code{m} for unrestricted indices. \code{i3} and {i2} do the same 
-          for 3d and 2d arrays.
+          for 3d and 2d arrays. If \code{ifunc==TRUE} then \code{i2}, \code{i3} and \code{i4}
+	  are functions, so \code{i4(i,j,k,l)} returns appropriate \code{m}. For high \code{K}
+	  the function versions save storage, but are slower.
+
+If computed, the reverse indices pick out the unique elements of a symmetric array stored redundantly.
+The indices refer to the location of the elements when the redundant array is accessed as its underlying
+vector. For example the reverse indices for a 3 by 3 symmetric matrix are 1,2,3,5,6,9.
 }
 \examples{
 library(mgcv)
-A <- trind.generator(3)
+A <- trind.generator(3,reverse=TRUE)
 
 # All permutations of c(1, 2, 3) point to the same index (5)
 A$i3[1, 2, 3] 
@@ -36,6 +46,20 @@
 A$i3[2, 3, 1]
 A$i3[3, 1, 2]
 A$i3[1, 3, 2]
+
+## use reverse indices to pick out unique elements
+## just for illustration...
+A$i2;A$i2[A$i2r]
+A$i3[A$i3r]
+
+
+## same again using function indices...
+A <- trind.generator(3,ifunc=TRUE)
+A$i3(1, 2, 3) 
+A$i3(2, 1, 3)
+A$i3(2, 3, 1)
+A$i3(3, 1, 2)
+A$i3(1, 3, 2)
 }
 \author{
 Simon N. Wood <simon.wood@r-project.org>.
diff -Nru mgcv-1.8-40/man/vcov.gam.Rd mgcv-1.8-41/man/vcov.gam.Rd
--- mgcv-1.8-40/man/vcov.gam.Rd	2022-02-01 10:18:29.000000000 +0000
+++ mgcv-1.8-41/man/vcov.gam.Rd	2022-07-06 11:02:55.000000000 +0000
@@ -7,21 +7,23 @@
 from a fitted \code{gam} object.
 }
 \usage{
-\method{vcov}{gam}(object, freq = FALSE, dispersion = NULL,unconditional=FALSE, ...)
+\method{vcov}{gam}(object, sandwich=FALSE, freq = FALSE, dispersion = NULL,unconditional=FALSE, ...)
 }
 %- maybe also `usage' for other objects documented here.
 \arguments{ 
-\item{object}{ fitted model object of class \code{gam} as produced by \code{gam()}.}
+\item{object}{fitted model object of class \code{gam} as produced by \code{gam()}.}
+\item{sandwich}{compute sandwich estimate of covariance matrix. Currently expensive for discrete bam fits.}
 \item{freq}{ \code{TRUE} to return the frequentist covariance matrix of the
 parameter estimators, \code{FALSE} to return the Bayesian posterior covariance
-matrix of the parameters.} 
+matrix of the parameters. The latter option includes the expected squared bias
+according to the Bayesian smoothing prior.} 
 \item{dispersion}{ a value for the dispersion parameter: not normally used.}
 \item{unconditional}{ if \code{TRUE} (and \code{freq==FALSE}) then the Bayesian 
 smoothing parameter 
 uncertainty corrected covariance matrix is returned, if available. }
 \item{...}{ other arguments, currently ignored.}
 }
-\details{ Basically, just extracts \code{object$Ve} or \code{object$Vp} from a \code{\link{gamObject}}.
+\details{ Basically, just extracts \code{object$Ve}, \code{object$Vp} or \code{object$Vc} (if available) from a \code{\link{gamObject}}, unless \code{sandwich==TRUE} in which case the sandwich estimate is computed (with or without the squared bias component). 
 }
 
 \value{ A matrix corresponding to the estimated frequentist covariance matrix
@@ -34,7 +36,7 @@
 }
 
 \references{
-Wood, S.N. (2006) On confidence intervals for generalized additive models based on penalized regression splines. Australian and New Zealand Journal of Statistics. 48(4): 445-464.
+Wood, S.N. (2017) Generalized Additive Models: An Introductio with R (2nd ed) CRC Press
 }
 
 
diff -Nru mgcv-1.8-40/man/vis.gam.Rd mgcv-1.8-41/man/vis.gam.Rd
--- mgcv-1.8-40/man/vis.gam.Rd	2022-02-01 10:18:29.000000000 +0000
+++ mgcv-1.8-41/man/vis.gam.Rd	2022-07-20 12:31:05.000000000 +0000
@@ -5,7 +5,7 @@
 \usage{
 vis.gam(x,view=NULL,cond=list(),n.grid=30,too.far=0,col=NA,
         color="heat",contour.col=NULL,se=-1,type="link",
-        plot.type="persp",zlim=NULL,nCol=50,...)
+        plot.type="persp",zlim=NULL,nCol=50,lp=1,...)
 }
 
 \arguments{
@@ -44,7 +44,7 @@
 \item{zlim}{a two item array giving the lower and upper limits for the z-axis
 scale. \code{NULL} to choose automatically.}
 \item{nCol}{The number of colors to use in color schemes.}
-
+\item{lp}{selects the linear predictor for models with more than one.}
 \item{...}{other options to pass on to \code{\link{persp}},
 \code{\link{image}} or \code{\link{contour}}. In particular \code{ticktype="detailed"} will add proper axes 
 labelling to the plots. }
diff -Nru mgcv-1.8-40/MD5 mgcv-1.8-41/MD5
--- mgcv-1.8-40/MD5	2022-03-29 09:50:02.000000000 +0000
+++ mgcv-1.8-41/MD5	2022-10-21 13:52:37.000000000 +0000
@@ -1,27 +1,27 @@
-f93ba8699745d8ca1c3851c2d6226c40 *ChangeLog
-c38a7b3bd831449160a45a0b343a2e81 *DESCRIPTION
+b06b583aa93b950873f05e0cea0d2981 *ChangeLog
+5160f8b8ee1626c07a5872048297e52e *DESCRIPTION
 eb723b61539feef013de476e68b5c50a *GPL-2
-890c0621bebf0f64603c13bcf74a7c9c *NAMESPACE
-1f668b0b6c0ebf965e88a44c30234730 *R/bam.r
-a4a473bc1452eed0937674b91218f926 *R/coxph.r
-1f80161708737292ea18e335aa48d3e1 *R/efam.r
-df58393f0b781d3c9ad481cb0eee7742 *R/fast-REML.r
-aa23254515b0077ca2b975e6eccbcbf6 *R/gam.fit3.r
-ae51e87b202f49ad590596dfe26ef29e *R/gam.fit4.r
+f5125b79558443e087aae605f83e0bda *NAMESPACE
+224b8b1830cedfc2f31ddffcbf063b10 *R/bam.r
+cdd64a571d69f8066863e56edc8ec095 *R/coxph.r
+7a1c2a8c480f2aa77efcb2dffd9eb5b0 *R/efam.r
+a35059ce646e597692e85c732df16bd4 *R/fast-REML.r
+272a209eed96006e49c9398a73e58810 *R/gam.fit3.r
+35379e1c5fcdfa3461152048eaf954f4 *R/gam.fit4.r
 1b620b840ca67c9759639bd6e53824e3 *R/gam.sim.r
-340da4258918a7f9687f2602efadd460 *R/gamlss.r
-c817aa45e315fd79d907435ffee51afc *R/gamm.r
-8788bef359ca254c1f60b49a2198a9c5 *R/inla.r
+214ab2bd930ba173a4be8681abda4549 *R/gamlss.r
+154bb10ef97dfce6508232c5d66e01fd *R/gamm.r
+11444d504c6bf0fada884070235ffa8f *R/inla.r
 528a5ed32821238e5c49c6111d7d1e7b *R/jagam.r
 65ccc76dd95cba29fdebf94a54ec7209 *R/mcmc.r
-e080a05afb5c9cc787ec898712b61309 *R/mgcv.r
-c8688e5a317f38934eea689fdb51b895 *R/misc.r
+627d70204184483301fb72e51461a6cb *R/mgcv.r
+74d3effab2f186a06bde603328b26f52 *R/misc.r
 16affc004cd2b26024513647a50e01bd *R/mvam.r
-0ae2cbec76745cbefd0fa1b49d1b3a18 *R/plots.r
-6d1a533a973ded05fb01db18beb971c7 *R/smooth.r
+c44046507c1aa3ea0813543d30d1daa9 *R/plots.r
+02f41d9172f18fb5b6629a3bf22dc76a *R/smooth.r
 d869c9c2658860f7a23e4590100939b2 *R/soap.r
 bde1774ce7903cabc57b3196f8872ea8 *R/sparse.r
-f22f179833fb1b272b0feb1497c0ed09 *build/partial.rdb
+439bf689fa27cf9affd0335332142165 *build/partial.rdb
 e468195a83fab90da8e760c2c3884bd3 *data/columb.polys.rda
 40874e3ced720a596750f499ded8a60a *data/columb.rda
 e70536903ca327ade1496529ab817b60 *inst/CITATION
@@ -47,38 +47,41 @@
 4f80b36717d5d6d5c6b8d70cfb0dbfc0 *man/Tweedie.Rd
 6558519f1663e70511e4d9e417fee9b4 *man/XWXd.Rd
 98577aa92d0a16473b29b7ef73f37d82 *man/anova.gam.Rd
-d51fb7f82a095052e9fd85441c7e99ff *man/bam.Rd
+4b88111ac235b0d4a5ab1f90938ee8ac *man/bam.Rd
 d88a2032b0cd56410e4b63e287b76025 *man/bam.update.Rd
 cf5f1ee0aab639c7c4b9b357434f15b2 *man/bandchol.Rd
 8136baf62776238c206f98fc03326cfd *man/blas.thread.test.Rd
 745cbf31eb14fc1c5916fc634c74d998 *man/bug.reports.mgcv.Rd
 530b8b4bacffa9353561f19ecfccfe19 *man/cSplineDes.Rd
-8133260bd3c26231322cd7cfbfa3b421 *man/chol.down.Rd
+a8a16b9af364f6af709bf5c72b9cb97b *man/chol.down.Rd
 690ac6705a903b8a579c825d126ac9d1 *man/choose.k.Rd
+d3e31c60c7faa49114831624f137b370 *man/cnorm.Rd
 c03748964ef606621418e428ae49b103 *man/columb.Rd
 9de5dd1f32797f8a5db48cbaedddaa53 *man/concurvity.Rd
-6c1857246bd76e16222b69d1177dbc90 *man/coxph.Rd
-239e4c9f917ff2d94d02972fa6c31e4d *man/coxpht.Rd
+32804ce2a1e312d9cf9e557fe1e6bf64 *man/coxph.Rd
+d21bc19e99c425bd8961dd4876d71dd3 *man/coxpht.Rd
 b78faf4ab9477183e7a3fbbd8801afeb *man/dDeta.Rd
+12b22fa7d8bc5ed0bb7f277e90ef18ab *man/dpnorm.Rd
 111891bff298325366d82f0951157bc6 *man/exclude.too.far.Rd
 7add4f0fe8426045141a2a8ef675b9ef *man/extract.lme.cov.Rd
-abe2c383203e7588840d4c1bd605ae50 *man/family.mgcv.Rd
+f29b7d0afd8b6eb8975f41c7fbca99b3 *man/factor.smooth.Rd
+bb75dddccd8512722c095a2edd2f7b54 *man/family.mgcv.Rd
 42534ae5dffc0a7f6806270c901cbdd4 *man/fix.family.link.Rd
 b7830b485a29b13b520fd184e6717b0d *man/fixDependence.Rd
 d625c0817960f6b91ef10e1e878c3987 *man/formXtViX.Rd
-64c307314aad3ac3c9392a2e6499e685 *man/formula.gam.Rd
+296bfe7013831ebc0244593ed67b0ac3 *man/formula.gam.Rd
 4da4d585b329769eb44f0c7a6e7dd554 *man/fs.test.Rd
 6f405acde2d7b6f464cf45f5395113ba *man/full.score.Rd
-385c799ef84e30c3caeae766beb88aa3 *man/gam.Rd
+b64928ff49498a1124eb9ecf17a583b8 *man/gam.Rd
 5b5f64941101d8a2edb858edc0b3d573 *man/gam.check.Rd
-a6b27dcef5a7ad0ece9bda81c702ec51 *man/gam.control.Rd
+931dbee51c6e16042b66818b1f1bab33 *man/gam.control.Rd
 694ff4bed4b64be9fe6ec6da4c51637b *man/gam.convergence.Rd
 1cf5145859af2263f4e3459f40e1ab23 *man/gam.fit.Rd
-38d1695ad2a5e0292a080535bd2d80df *man/gam.fit3.Rd
-ec46b6f8190bac4b2d9eaee991c3e0a3 *man/gam.fit5.post.proc.Rd
+14071175c06a7060025c8cf118dd820d *man/gam.fit3.Rd
+11e6c74217ae517a9d1d27302ddf0f4b *man/gam.fit5.post.proc.Rd
 768095200bafa9220f345e655c23bf2e *man/gam.mh.Rd
 b4bbc35c5ab48dbc71881564fd4d0391 *man/gam.models.Rd
-cd19e5a09ff100c579317e9f0b6dd794 *man/gam.outer.Rd
+eaf9c8378001218cfa5ec0773d5cfc49 *man/gam.outer.Rd
 f50059fd42d0da09271a5768811a0bc4 *man/gam.reparam.Rd
 c17814cea1b11e5ca374e72d6e1cbd98 *man/gam.scale.Rd
 9332eaac6caccfea289b99c4534e5f8f *man/gam.selection.Rd
@@ -88,7 +91,7 @@
 ed77ce6e1b941625485706d7e307b816 *man/gamObject.Rd
 89148f2dc12caff5073ac70c8873b33f *man/gamSim.Rd
 e5d2541f32dab56972f58b0773eba50c *man/gamlss.etamu.Rd
-c7f140d128d1d1d76909499900faf49e *man/gamlss.gH.Rd
+a99c6a970301e1f95a45a04647595bf1 *man/gamlss.gH.Rd
 f90ff330a8a1e2b64f6a86ec3245d45c *man/gamm.Rd
 3267d64a3f54d9f0cbdf2a90eb3595e1 *man/gammals.Rd
 4d047d4e0d3b40679c35148929e2cbf5 *man/gaulss.Rd
@@ -119,7 +122,7 @@
 00ccf213c31910cd14f1df65a300eb33 *man/model.matrix.gam.Rd
 e2d304075310caea012570e85ec315ff *man/mono.con.Rd
 d33914a328f645af13f5a42914ca0f35 *man/mroot.Rd
-a4fdf5d610dbf728bc629e1451e01ed1 *man/multinom.Rd
+106c10124f7f90b886a80da4a2864c06 *man/multinom.Rd
 4cb6dc2d3a6dc963a6df0fbc754fcdd4 *man/mvn.Rd
 8b7b9248abf5e31abca688d9011cebde *man/negbin.Rd
 33cbfe17d58a06d167980b7d640aa16f *man/new.name.Rd
@@ -133,10 +136,10 @@
 c4bec31ed0d3216852f06d52fe833298 *man/pdTens.Rd
 1721f1b266d9e14827e8226e2cb74a81 *man/pen.edf.Rd
 6c5b7b8d4edd4bdf3cc2960dd333d773 *man/place.knots.Rd
-1724c9c2f4ded94d4b855d474673e72a *man/plot.gam.Rd
+08c198f9ecc7fa944baa00fc920df6eb *man/plot.gam.Rd
 c27a6b886929b1dc83bf4b90cae848f9 *man/polys.plot.Rd
-7b59e0e0546605581176cf0d677427df *man/predict.bam.Rd
-d11a89b629f88f61c368319c11c6cae9 *man/predict.gam.Rd
+fbe582ee241104feea2c65327eee8f8e *man/predict.bam.Rd
+b2a3167f22678df719995e0092add07e *man/predict.gam.Rd
 eb5a78dc0ceb7113d601042114856b2f *man/print.gam.Rd
 0a5f07356cdb5d0eb567ca709c5ca411 *man/psum.chisq.Rd
 515e24e5066c21a59b22c534212b1049 *man/qq.gam.Rd
@@ -148,7 +151,7 @@
 9f1abf5aae6e8a0113fbb12155eb98ad *man/s.Rd
 1a0ed010378243af25bf7edbaa757019 *man/scat.Rd
 898e7cc2def2ee234475e68d0b904b29 *man/sdiag.Rd
-01e58e2112ff136909cbd9218a9938a6 *man/shash.Rd
+63e0d765dd519a445bc97b9e80799e8e *man/shash.Rd
 d54f4042e212fca7704cf8428bdaea38 *man/single.index.Rd
 6f03e337d54221bc167d531e25af1eea *man/slanczos.Rd
 c9c91026ea8281bf190226bce7558f72 *man/smooth.construct.Rd
@@ -156,18 +159,19 @@
 9be4f5eb8c51f42d58c794429d144d35 *man/smooth.construct.bs.smooth.spec.Rd
 4742f53ee8cea8b371bc0ca9a123d5f8 *man/smooth.construct.cr.smooth.spec.Rd
 f5e6d0f5122f61c336827b3615482157 *man/smooth.construct.ds.smooth.spec.Rd
-c60df28c123e9fa9addc00cff2323bfa *man/smooth.construct.fs.smooth.spec.Rd
+48be178894eebe0c13e0e55aecbf57cd *man/smooth.construct.fs.smooth.spec.Rd
 732950d96f2fde5da2d5af869c5b54bb *man/smooth.construct.gp.smooth.spec.Rd
 656f9e77ad4564ab3cd610118780b8ac *man/smooth.construct.mrf.smooth.spec.Rd
 49e4a296f8243ae8749dc1f7954438ba *man/smooth.construct.ps.smooth.spec.Rd
 f106ea5df761acde88ed479966b00bba *man/smooth.construct.re.smooth.spec.Rd
 324e810b271d53efeca8c9eadcd19b86 *man/smooth.construct.so.smooth.spec.Rd
 962ba22ac245e9de2c8e2618d000d5ad *man/smooth.construct.sos.smooth.spec.Rd
+1464b52008f084de390b1b6ea2eae22b *man/smooth.construct.sz.smooth.spec.Rd
 3cb4e59f915c8d64b90754eaeeb5a86f *man/smooth.construct.t2.smooth.spec.Rd
 8672633a1fad8df3cb1f53d7fa883620 *man/smooth.construct.tensor.smooth.spec.Rd
 c522c270c217e5b83cf8f3e95220a03f *man/smooth.construct.tp.smooth.spec.Rd
 3e7d556e875e8803476ef79ef4f9c90f *man/smooth.info.Rd
-ae5e27524e37d57505754639455f18a5 *man/smooth.terms.Rd
+543451d8e40bcf1642e0b9509e7a7450 *man/smooth.terms.Rd
 f642b1caecf3d2bcdbbc0a884e1d3fa5 *man/smooth2random.Rd
 de44bcadb21fd175765a1809a6dab925 *man/smoothCon.Rd
 fc985fa781e4dfe2b33d014c3d1d5b73 *man/sp.vcov.Rd
@@ -179,11 +183,11 @@
 d9416fead22c912099612650d37679f1 *man/tensor.prod.model.matrix.Rd
 a6feff25ec8241bf5afb3d9fe219d26d *man/totalPenaltySpace.Rd
 f22f1cee0ff2b70628846d1d0f8e9a66 *man/trichol.Rd
-87e6b4437d00fab4fc814f4cefa3795c *man/trind.generator.Rd
+05548b47d1abfaf81eeb24e8499c890b *man/trind.generator.Rd
 c45d48723239b42ec499fe4f2ea2b045 *man/twlss.Rd
 bc350bfd3f4f8316d3b29b247292a16d *man/uniquecombs.Rd
-a16b3a5a4d13c705dcab8d1cd1b3347e *man/vcov.gam.Rd
-281e73658c726997196727a99a4a1f9e *man/vis.gam.Rd
+5c6864793c3d03d3bf6ad51ce18e6b8e *man/vcov.gam.Rd
+d8846f1a9a854e0e00908ecb2cc31764 *man/vis.gam.Rd
 761c703d1d17e6c3ecd2ee8c134d11b4 *man/ziP.Rd
 dd807b69a490503d8ac5425150a83f48 *man/ziplss.Rd
 aa1bbd4ccea7effe3d28552d60badeb3 *po/R-de.po
@@ -198,26 +202,27 @@
 c43db22b7b523d11637edb6f007d546c *po/ko.po
 dfd5b0dfa421012bbc720f86be2705dc *po/mgcv.pot
 5c63c56297c786bd170b8951df4ca295 *po/pl.po
-03972284b3400cf82cacd5d2dc4b8cb3 *src/Makevars
+51aaab043f33640ea88d38d62922e78b *src/Makevars
 e16c691700bbb44215597c6b0c7e6d2e *src/coxph.c
 a06f879ace39efc246697d6db7184d43 *src/davies.c
-d54b10e85d61149a94c6e6a52f30300e *src/discrete.c
-aa5b49d0137595304bb754de5192ae2a *src/gdi.c
+81d1f02223e08c9d1cf55df4ab888302 *src/discrete.c
+c13f343534ce070a2e8edc293ac34f8d *src/gdi.c
 2436f9b328e80370ce2203dbf1dd813c *src/general.h
-a958d9a8e9695186ff1976b20fc04851 *src/init.c
+0bbaf9c8574f9ebaee9d1944c13a2cc6 *src/init.c
 94cb9e030ded330bc3502502c5168f49 *src/magic.c
-809462a9fa223ac26deb437fc676af31 *src/mat.c
-bb75d10bb56dc479f207325e97f6a935 *src/matrix.c
-de37b0972199b796654405efc007f25b *src/matrix.h
-5dbce5b588a13a57c06fdece4c5bb76e *src/mgcv.c
-fb7ee4d01a05d7df16a828ef84c97946 *src/mgcv.h
-04dbc0f375e85ce7b0a7f3ece439cb15 *src/misc.c
+335b9614f547c69b3384aa5c2713be82 *src/mat.c
+c4b293e458d834eea4dff327434ad09d *src/matrix.c
+f9ac97fd6dda099d13fa2d2baecfe2f0 *src/matrix.h
+2e1956bc8a1b27681e7caa187fec3603 *src/mgcv.c
+21cf36cb06870c547343f5a2b1fae8ce *src/mgcv.h
+b1e5c75ee89d5e93c4f72fd99574b11b *src/misc.c
 057e8332e5856342b61682c980654bbc *src/mvn.c
-b1038392f9dab746cb7a87ea6a2ff9a7 *src/qp.c
+9e5f797b33da52b6c16034c28004e6d3 *src/ncv.c
+a5cff8b4637111a89f9384555ea3a6e4 *src/qp.c
 073a4b5b0bc6e869c5b35478c47facf1 *src/qp.h
 d5673b88f6f3d85c62a1337f49abba24 *src/soap.c
 c4e0f0bd5a90402d3758572c8ca980d3 *src/sparse-smooth.c
-ab2e8c43ffcd714a7970dc69ad2f9741 *src/sparse.c
+23c39905ba8395bb9a693556954ec7b4 *src/sparse.c
 915c917b2bcfe839c41cc688a5fa09cd *src/tprs.c
 5bd85bf0319a7b7c755cf49c91a7cd94 *src/tprs.h
 38e593a85a6fd0bb4fbed836f3361406 *tests/bam.R
diff -Nru mgcv-1.8-40/NAMESPACE mgcv-1.8-41/NAMESPACE
--- mgcv-1.8-40/NAMESPACE	2022-02-01 10:18:29.000000000 +0000
+++ mgcv-1.8-41/NAMESPACE	2022-09-26 09:10:45.000000000 +0000
@@ -2,8 +2,8 @@
 
 export("%.%",anova.gam, bam, bam.update,bandchol, betar,
        blas.thread.test, 
-       choldrop,cholup,cox.ph,concurvity, 
-       cSplineDes,dDeta,dmvn,d.mvt,
+       choldrop,cholup,cnorm,cox.ph,concurvity, 
+       cSplineDes,dDeta,dmvn,d.mvt,dpnorm,
        exclude.too.far,extract.lme.cov, extract.lme.cov2,FFdes,
        formXtViX, full.score, formula.gam,fixDependence,fix.family.link,
        fix.family.var, fix.family.ls, fix.family.qf,fix.family.rd,   
@@ -80,7 +80,7 @@
        ziP, ziplss,zipll)
 
 importFrom(grDevices,cm.colors,dev.interactive,devAskNewPage,gray,grey,
-           heat.colors,terrain.colors,topo.colors,axisTicks)
+           heat.colors,terrain.colors,topo.colors,axisTicks,hcl.colors)
 importFrom(graphics,abline,axis,axTicks,box,contour,hist,image,lines,
            mtext, par, persp,plot,points,
            polygon,rect,strheight,strwidth,text,title)
diff -Nru mgcv-1.8-40/R/bam.r mgcv-1.8-41/R/bam.r
--- mgcv-1.8-40/R/bam.r	2022-02-01 10:18:29.000000000 +0000
+++ mgcv-1.8-41/R/bam.r	2022-09-20 08:51:05.000000000 +0000
@@ -866,7 +866,7 @@
       if (!is.finite(dev)) stop("Non-finite deviance")
 
       ## preparation for working model fit is ready, but need to test for convergence first
-      if (iter>2 && abs(dev - devold)/(0.1 + abs(dev)) < control$epsilon) {
+      if (iter>2 && abs(dev - devold)/(0.1 + abs(dev)) < control$epsilon && (scale>0 || abs(Nstep[n.sp+1])<control$epsilon*(abs(log.phi)+1))) {
           conv <- TRUE
           #coef <- start
           break
@@ -1030,7 +1030,8 @@
     ##nvars <- ncol(G$X)
     offset <- G$offset
     family <- G$family
-
+    ## extended family may have non-standard y that requires careful subsetting (e.g. cnorm)
+    subsety <- if (is.null(G$family$subsety)) function(y,ind) y[ind] else G$family$subsety
     if (inherits(G$family,"extended.family")) { ## preinitialize extended family
       efam <- TRUE
       pini <- if (is.null(G$family$preinitialize)) NULL else G$family$preinitialize(y,G$family)
@@ -1136,7 +1137,7 @@
 	  arg[[i]]$variance <- variance
         }
         arg[[i]]$G$w <- G$w[ind];arg[[i]]$G$model <- NULL
-        arg[[i]]$G$y <- G$y[ind]
+        arg[[i]]$G$y <- subsety(G$y,ind) ##G$y[ind]
       }
     } else { ## single thread, requires single indices
       ## construct indices for splitting up model matrix construction... 
@@ -1177,7 +1178,7 @@
              rownames(X) <- NULL
              if (is.null(coef)) eta1 <- eta[ind] else eta[ind] <- eta1 <- drop(X%*%coef) + offset[ind]
              mu <- linkinv(eta1) 
-             y <- G$y[ind] ## G$model[[gp$response]] ## - G$offset[ind]
+             y <- subsety(G$y,ind) #G$y[ind] ## G$model[[gp$response]] ## - G$offset[ind]
              weights <- G$w[ind]
              if (efam) { ## extended family case
                 dd <- dDeta(y,mu,weights,theta=theta,family,0)
@@ -2341,11 +2342,15 @@
       vn <- varn[as.logical(fac[,i])]
       if (no.int||!identify) {
         fm <- as.formula(paste("~",vn,"-1"))
-        if (!dummy) X[[k]] <- if (sparse) sparse.model.matrix(fm,data,contrasts.arg) else model.matrix(fm,data,contrasts.arg)
+	## unfortunately a warning has been introduced in model.matrix that contradicts the documentation and will complain about any
+	## element of contrast.arg that does not match a variable name in the formula. Rather than introduce redundant work around
+	## code it seems better to just suppress the warning...
+        if (!dummy) X[[k]] <- if (sparse) sparse.model.matrix(fm,data,contrasts.arg) else suppressWarnings(model.matrix(fm,data,contrasts.arg))
         no.int <- FALSE
       } else {
         fm <- as.formula(paste("~",vn))
-        if (!dummy) X[[k]] <- if (sparse) sparse.model.matrix(fm,data,contrasts.arg)[,-1,drop=FALSE] else model.matrix(fm,data,contrasts.arg)[,-1,drop=FALSE]
+        if (!dummy) X[[k]] <-
+	if (sparse) sparse.model.matrix(fm,data,contrasts.arg)[,-1,drop=FALSE] else suppressWarnings(model.matrix(fm,data,contrasts.arg)[,-1,drop=FALSE])
       }
       xname[k] <- if (!dummy && vn %in% names(data)) vn else all.vars(fm)
       form[[k]] <- fm; environment(form[[k]]) <- NULL
@@ -2357,10 +2362,11 @@
         vn <- varn[m[j]]
         if (fac[m[j],i]==2||!identify) { ## no contrast
 	  fm <- as.formula(paste("~",vn,"-1"))
-	  if (!dummy) X[[k]] <- if (sparse) sparse.model.matrix(fm,data,contrasts.arg) else model.matrix(fm,data,contrasts.arg)
+	  if (!dummy) X[[k]] <- if (sparse) sparse.model.matrix(fm,data,contrasts.arg) else suppressWarnings(model.matrix(fm,data,contrasts.arg))
 	} else { ## with contrast
           fm <- as.formula(paste("~",vn))
-	  if (!dummy) X[[k]] <- if (sparse) sparse.model.matrix(fm,data,contrasts.arg)[,-1,drop=FALSE] else model.matrix(fm,data,contrasts.arg)[,-1,drop=FALSE]
+	  if (!dummy) X[[k]] <-
+	  if (sparse) sparse.model.matrix(fm,data,contrasts.arg)[,-1,drop=FALSE] else suppressWarnings(model.matrix(fm,data,contrasts.arg)[,-1,drop=FALSE])
         }
 	xname[k] <- if (!dummy && vn %in% names(data)) vn else all.vars(fm)
 	form[[k]] <- fm; environment(form[[k]]) <- NULL
@@ -2679,6 +2685,10 @@
               if (inherits(qrc,"qr")) {
                 v[[kb]] <- qrc$qr/sqrt(qrc$qraux);v[[kb]][1] <- sqrt(qrc$qraux)
                 qc[kb] <- 1 ## indicate a constraint
+              } else if (length(qrc)>1) { ## Kronecker product of set to zero constraints
+	        ## on entry qrc is [unused.index, dim1, dim2,..., total number of constraints]
+                v[[kb]] <- c(length(qrc)-2,qrc[-1]) ## number of sum-to-zero contrasts, their dimensions, number of constraints
+		qc[kb] <- -1 
               } else { 
                 v[[kb]] <- rep(0,0) ##
                 if (!inherits(qrc,"character")||qrc!="no constraints") warning("unknown tensor constraint type")
@@ -2698,7 +2708,8 @@
           }  
 	  #jj <- G$smooth[[i]]$first.para:G$smooth[[i]]$last.para;
 	  if (sb==1&&qc[kb]) {
-            jj <- 1:(np-1) + lp0; lp0 <- lp0 + np - 1
+	    ncon <- if (qc[kb]<0) v[[kb]][length(v[[kb]])] else 1 
+            jj <- 1:(np-ncon) + lp0; lp0 <- lp0 + np - ncon 
 	    ## Hard to think of an application requiring constraint when nsub>1, hence not 
 	    ## worked out yet. Add warning to make sure this is flagged if attempt made
 	    ## to do this in future....
diff -Nru mgcv-1.8-40/R/coxph.r mgcv-1.8-41/R/coxph.r
--- mgcv-1.8-40/R/coxph.r	2022-02-01 10:18:29.000000000 +0000
+++ mgcv-1.8-41/R/coxph.r	2022-08-22 06:55:50.000000000 +0000
@@ -144,13 +144,16 @@
 	  a[,trind] <- matrix(oo$A[1:(p*nti)],p,nti)
 	  ## compute Schoenfeld resiudals, within stratum
           if (p) {
-            Xs <- X[ind,] <- (X[ind,,drop=FALSE] - apply(gamma[ind]*X[ind,],2,cumsum)/cumsum(gamma[ind]))*wt[ind]
+            Xs <- X[ind,] <- (X[ind,,drop=FALSE] - apply(gamma[ind]*X[ind,,drop=FALSE],2,cumsum)/cumsum(gamma[ind]))*wt[ind]
 	    n <- nrow(Xs)
 	    Rs <- apply(Xs[n:1,,drop=FALSE],2,cumsum)[n:1,,drop=FALSE] ## score residuals
 	    ## now remove the penalization induced drift...
-	    x <- (1:n)[wt[ind]!=0]; dx <- seq(1,0,length=sum(wt[ind]!=0))
-	    drift <- approx(x,dx,xout=1:n,method="constant",rule=2)$y
-	    Rs <- Rs - t(t(matrix(drift,n,ncol(R)))*Rs[1,])
+	    x <- (1:n)[wt[ind]!=0];
+	    if (length(x)) {
+	      dx <- seq(1,0,length=sum(wt[ind]!=0))
+	      drift <- approx(x,dx,xout=1:n,method="constant",rule=2)$y
+	      Rs <- Rs - t(t(matrix(drift,n,ncol(R)))*Rs[1,])
+	    }  
 	    R[ind,] <- Rs
 	  }  
         }
diff -Nru mgcv-1.8-40/R/efam.r mgcv-1.8-41/R/efam.r
--- mgcv-1.8-40/R/efam.r	2022-02-01 10:18:29.000000000 +0000
+++ mgcv-1.8-41/R/efam.r	2022-10-20 07:53:23.000000000 +0000
@@ -154,8 +154,481 @@
 ##               function. - deprecated (commented out below - appears to be used nowhere)
 ## scale - < 0 to estimate. ignored if NULL 
 
+#######################
+## negative binomial...
+#######################
+
+nb <- function (theta = NULL, link = "log") { 
+## Extended family object for negative binomial, to allow direct estimation of theta
+## as part of REML optimization. Currently the template for extended family objects.
+  linktemp <- substitute(link)
+  if (!is.character(linktemp)) linktemp <- deparse(linktemp)
+  if (linktemp %in% c("log", "identity", "sqrt")) stats <- make.link(linktemp)
+  else if (is.character(link)) {
+    stats <- make.link(link)
+    linktemp <- link
+  } else {
+    if (inherits(link, "link-glm")) {
+       stats <- link
+            if (!is.null(stats$name))
+                linktemp <- stats$name
+        }
+        else stop(linktemp, " link not available for negative binomial family; available links are \"identity\", \"log\" and \"sqrt\"")
+  }
+  ## Theta <-  NULL;
+  n.theta <- 1
+  if (!is.null(theta)&&theta!=0) {
+      if (theta>0) { 
+        iniTheta <- log(theta) ## fixed theta supplied
+        n.theta <- 0 ## signal that there are no theta parameters to estimate
+      } else iniTheta <- log(-theta) ## initial theta supplied
+  } else iniTheta <- 0 ## inital log theta value
+    
+    env <- new.env(parent = .GlobalEnv)
+    assign(".Theta", iniTheta, envir = env)
+    getTheta <- function(trans=FALSE) if (trans) exp(get(".Theta")) else get(".Theta") # get(".Theta")
+    putTheta <- function(theta) assign(".Theta", theta,envir=environment(sys.function()))
+
+    variance <- function(mu) mu + mu^2/exp(get(".Theta")) ## Not actually needed!
+
+    validmu <- function(mu) all(mu > 0)
+
+    dev.resids <- function(y, mu, wt,theta=NULL) {
+      if (is.null(theta)) theta <- get(".Theta")
+      theta <- exp(theta) ## note log theta supplied
+      mu[mu<=0] <- NA
+      2 * wt * (y * log(pmax(1, y)/mu) - 
+        (y + theta) * log((y + theta)/(mu + theta))) 
+    }
+    
+    Dd <- function(y, mu, theta, wt, level=0) {
+    ## derivatives of the nb deviance...
+      ##ltheta <- theta
+      theta <- exp(theta)
+      yth <- y + theta
+      muth <- mu + theta
+      r <- list()
+      ## get the quantities needed for IRLS. 
+      ## Dmu2eta2 is deriv of D w.r.t mu twice and eta twice,
+      ## Dmu is deriv w.r.t. mu once, etc...
+      r$Dmu <- 2 * wt * (yth/muth - y/mu)
+      r$Dmu2 <- -2 * wt * (yth/muth^2 - y/mu^2)
+      r$EDmu2 <- 2 * wt * (1/mu - 1/muth) ## exact (or estimated) expected weight
+      if (level>0) { ## quantities needed for first derivatives
+        r$Dth <- -2 * wt * theta * (log(yth/muth) + (1 - yth/muth) ) 
+        r$Dmuth <- 2 * wt * theta * (1 - yth/muth)/muth
+        r$Dmu3 <- 4 * wt * (yth/muth^3 - y/mu^3)
+        r$Dmu2th <- 2 * wt * theta * (2*yth/muth - 1)/muth^2
+	r$EDmu2th <- 2 * wt / muth^2
+      } 
+      if (level>1) { ## whole damn lot
+        r$Dmu4 <- 2 * wt * (6*y/mu^4 - 6*yth/muth^4)
+        r$Dth2 <- -2 * wt * theta * (log(yth/muth) +
+                     theta*yth/muth^2 - yth/muth - 2*theta/muth + 1 +
+                     theta /yth)
+        r$Dmuth2 <- 2 * wt * theta * (2*theta*yth/muth^2 - yth/muth - 2*theta/muth + 1)/muth
+        r$Dmu2th2 <- 2 * wt * theta * (- 6*yth*theta/muth^2 + 2*yth/muth + 4*theta/muth - 1) /muth^2
+        r$Dmu3th <- 4 * wt * theta * (1 - 3*yth/muth)/muth^3
+      }
+      r
+    }
+
+    aic <- function(y, mu, theta=NULL, wt, dev) {
+        if (is.null(theta)) theta <- get(".Theta")
+        Theta <- exp(theta)
+        term <- (y + Theta) * log(mu + Theta) - y * log(mu) +
+            lgamma(y + 1) - Theta * log(Theta) + lgamma(Theta) -
+            lgamma(Theta + y)
+        2 * sum(term * wt)
+    }
+    
+    ls <- function(y,w,theta,scale) {
+       ## the log saturated likelihood function for nb
+       Theta <- exp(theta)
+       #vec <- !is.null(attr(theta,"vec.grad")) ## lsth by component?
+       ylogy <- y;ind <- y>0;ylogy[ind] <- y[ind]*log(y[ind])
+       term <- (y + Theta) * log(y + Theta) - ylogy +
+            lgamma(y + 1) - Theta * log(Theta) + lgamma(Theta) -
+            lgamma(Theta + y)
+       ls <- -sum(term*w)
+       ## first derivative wrt theta...
+       yth <- y+Theta
+       lyth <- log(yth)
+       psi0.yth <- digamma(yth) 
+       psi0.th <- digamma(Theta)
+       term <- Theta * (lyth - psi0.yth + psi0.th-theta)
+       #lsth <- if (vec) -term*w else -sum(term*w)
+       LSTH <- matrix(-term*w,ncol=1)
+       lsth <- sum(LSTH)
+       ## second deriv wrt theta...
+       psi1.yth <- trigamma(yth) 
+       psi1.th <- trigamma(Theta)
+       term <- Theta * (lyth - Theta*psi1.yth - psi0.yth + Theta/yth + Theta * psi1.th + psi0.th - theta -1)        
+       lsth2 <- -sum(term*w)
+       list(ls=ls, ## saturated log likelihood
+            lsth1=lsth, ## first deriv vector w.r.t theta - last element relates to scale, if free
+	    LSTH1=LSTH, ## rows are above derivs by datum
+            lsth2=lsth2) ## Hessian w.r.t. theta, last row/col relates to scale, if free
+    }
+
+    initialize <- expression({
+        if (any(y < 0)) stop("negative values not allowed for the negative binomial family")
+        ##n <- rep(1, nobs)
+        mustart <- y + (y == 0)/6
+    })
+  
+    postproc <- function(family,y,prior.weights,fitted,linear.predictors,offset,intercept){
+      posr <- list()
+      posr$null.deviance <- find.null.dev(family,y,eta=linear.predictors,offset,prior.weights)
+      posr$family <- 
+      paste("Negative Binomial(",round(family$getTheta(TRUE),3),")",sep="")
+      posr
+    }
+
+    rd <- function(mu,wt,scale) {
+      Theta <- exp(get(".Theta"))
+      rnbinom(n=length(mu),size=Theta,mu=mu)
+    }
+
+    qf <- function(p,mu,wt,scale) {
+      Theta <- exp(get(".Theta"))
+      qnbinom(p,size=Theta,mu=mu)
+    }
+ 
+
+     environment(dev.resids) <- environment(aic) <- environment(getTheta) <- 
+     environment(rd)<- environment(qf)<- environment(variance) <- environment(putTheta) <- env
+    structure(list(family = "negative binomial", link = linktemp, linkfun = stats$linkfun,
+        linkinv = stats$linkinv, dev.resids = dev.resids,Dd=Dd,variance=variance,
+        aic = aic, mu.eta = stats$mu.eta, initialize = initialize,postproc=postproc,ls=ls,
+        validmu = validmu, valideta = stats$valideta,n.theta=n.theta, 
+        ini.theta = iniTheta,putTheta=putTheta,getTheta=getTheta,rd=rd,qf=qf),
+        class = c("extended.family","family"))
+} ## nb
+
+#########################
+## Censored normal family
+#########################
+
+cnorm <- function (theta = NULL, link = "identity") { 
+## Extended family object for censored Gaussian, as required for Tobit regression or log-normal
+## Accelarated Failure Time models. 
+  linktemp <- substitute(link)
+  if (!is.character(linktemp)) linktemp <- deparse(linktemp)
+  if (linktemp %in% c("log", "identity", "sqrt")) stats <- make.link(linktemp)
+  else if (is.character(link)) {
+    stats <- make.link(link)
+    linktemp <- link
+  } else {
+    if (inherits(link, "link-glm")) {
+       stats <- link
+            if (!is.null(stats$name))
+                linktemp <- stats$name
+        }
+        else stop(linktemp, " link not available for negative binomial family; available links are \"identity\", \"log\" and \"sqrt\"")
+  }
+  ## Theta <-  NULL;
+  n.theta <- 1
+  if (!is.null(theta)&&theta!=0) {
+      if (theta>0) { 
+        iniTheta <- log(theta) ## fixed theta supplied
+        n.theta <- 0 ## signal that there are no theta parameters to estimate
+      } else iniTheta <- log(-theta) ## initial theta supplied
+  } else iniTheta <- 0 ## inital log theta value
+    
+    env <- new.env(parent = .GlobalEnv)
+    assign(".Theta", iniTheta, envir = env)
+    getTheta <- function(trans=FALSE) if (trans) exp(get(".Theta")) else get(".Theta") # get(".Theta")
+    putTheta <- function(theta) assign(".Theta", theta,envir=environment(sys.function()))
+
+    validmu <- if (link=="identity") function(mu) all(is.finite(mu)) else function(mu) all(mu>0)
+
+    dev.resids <- function(y, mu, wt,theta=NULL) { ## cnorm
+      if (is.null(theta)) theta <- get(".Theta")
+      th <- theta - log(wt)/2
+      yat <- attr(y,"censor")
+      if (is.null(yat)) yat <- rep(NA,length(y))
+      ii <- which(yat==y) ## uncensored observations
+      d <- rep(0,length(y))
+      if (length(ii)) d[ii] <- (y[ii]-mu[ii])^2*exp(-2*th[ii])
+      ii <- which(is.finite(yat)&yat!=y) ## interval censored
+      if (length(ii)) {
+        y1 <- pmax(yat[ii],y[ii]); y0 <- pmin(yat[ii],y[ii])
+	y10 <- (y1-y0)*exp(-th[ii])/2
+        d[ii] <- 2*log(dpnorm(-y10,y10)) - ## 2 * log saturated likelihood
+	         2*log(dpnorm((y0-mu[ii])*exp(-th[ii]),(y1-mu[ii])*exp(-th[ii])))
+      }
+      ii <- which(yat == -Inf) ## left censored
+      if (length(ii)) d[ii] <- -2*pnorm((y[ii]-mu[ii])*exp(-th[ii]),log.p=TRUE)
+      ii <- which(yat == Inf) ## right censored
+      if (length(ii)) d[ii] <- -2*pnorm(-(y[ii]-mu[ii])*exp(-th[ii]),log.p=TRUE)
+      d
+    } ## dev.resids cnorm 
+    
+    Dd <- function(y, mu, theta, wt, level=0) {
+    ## derivatives of the cnorm deviance...
+     
+      th <- theta - log(wt)/2
+      eth <- exp(-th)
+      e2th <- eth*eth
+      e3th <- e2th*eth
+      yat <- attr(y,"censor")
+      if (is.null(yat)) yat <- y
+      ## get case indices...
+      iu <- which(yat==y) ## uncensored observations
+      ii <- which(is.finite(yat*y)&yat!=y) ## interval censored
+      il <- which(yat == -Inf) ## left censored
+      ir <- which(yat == Inf) ## right censored
+      n <- length(mu)
+      Dmu <- Dmu2 <- rep(0,n)
+      if (level>0) Dth <- Dmuth <- Dmu3 <- Dmu2th <- Dmu
+      if (level>1) Dmu4 <- Dth2 <- Dmuth2 <- Dmu2th2 <- Dmu3th <- Dmu
+      if (length(iu)) { ## uncensored
+        ethi <- eth[iu]; e2thi <- e2th[iu]
+        ymeth <- (y[iu]-mu[iu])*ethi
+        Dmu[iu] <- Dmui <- -2*ymeth*ethi
+	Dmu2[iu] <- 2*e2thi
+	if (level>0) {
+          Dth[iu] <- -2*ymeth^2
+	  Dmuth[iu] <- -2*Dmui
+	  Dmu3[iu] <- 0
+	  Dmu2th[iu] <- -4*e2thi
+        }
+	if (level>1) {
+          Dmu4[iu] <- 0
+	  Dth2[iu] <- -2*Dth[iu]
+	  Dmuth2[iu] <- 4*Dmui
+	  Dmu2th2[iu] <- 8*e2thi
+	  Dmu3th[iu] <- 0
+        }
+      } ## uncensored done
+      if (length(ii)) { ## interval censored
+        y0 <- pmin(y[ii],yat[ii]); y1 <- pmax(y[ii],yat[ii])
+	ethi <- eth[ii];e2thi <- e2th[ii];e3thi <- e3th[ii] 
+	y10 <- (y1-y0)*ethi/2
+	ymeth0 <- (y0-mu[ii])*ethi;ymeth1 <- (y1-mu[ii])*ethi
+	D0 <- dpnorm(ymeth0,ymeth1)
+	
+	dnorm0 <- dnorm(ymeth0);dnorm1 <- dnorm(ymeth1)
+	Dmui <- Dmu[ii] <- 2*ethi*(dnorm1-dnorm0)/D0
+	Dmu2[ii] <- Dmui^2/2 + 2*e2thi*(dnorm1*ymeth1-dnorm0*ymeth0)/D0
+	if (level>0) {
+	  Dmu2i <- Dmu2[ii];
+	  ymeth12 <- ymeth1^2; ymeth13 <- ymeth12*ymeth1
+	  ymeth02 <- ymeth0^2; ymeth03 <- ymeth02*ymeth0
+	  Dls <- dpnorm(-y10,y10)
+          Dth[ii] <- Dthi <- 2*(dnorm1*ymeth1-dnorm0*ymeth0)/D0
+	  Dmuth[ii] <- Dmuthi <- Dmui*Dthi/2 + 2*ethi*(dnorm1*(ymeth12-1)-dnorm0*(ymeth02-1))/D0
+	  Dmu3[ii] <- Dmui*(3*Dmu2i/2 - Dmui^2/4 - e2thi) +
+	              2*e3thi*(dnorm1*ymeth12-dnorm0*ymeth02)/D0
+	  Dmu2th[ii] <- (Dmu2i*Dthi+Dmui*Dmuthi)/2 +
+	  ethi*(dnorm1*(2*ymeth13*ethi+ Dmui*ymeth12-6*ymeth1*ethi - Dmui)-
+                   dnorm0*(2*ymeth03*ethi+ Dmui*ymeth02-6*ymeth0*ethi - Dmui))/D0
+        }
+	if (level>1) {
+	  ymeth14 <- ymeth13*ymeth1; ymeth15 <- ymeth12*ymeth13
+	  ymeth04 <- ymeth03*ymeth0; ymeth05 <- ymeth02*ymeth03
+	  Dmu3i <- Dmu3[ii]; Dmu2thi <- Dmu2th[ii]
+          Dmu4[ii] <- Dmu2i*(3*Dmu2i/2-Dmui^2/4-e2thi) + Dmui*(3*Dmu3i/2-Dmui*Dmu2i/2) +
+	              e3thi*(dnorm1*(2*ymeth13*ethi + Dmui*ymeth12-4*ymeth1*ethi) -
+		             dnorm0*(2*ymeth03*ethi + Dmui*ymeth02-4*ymeth0*ethi))/D0
+	  Dth2[ii] <- Dth2i <- Dthi^2/2 + 2*(dnorm1*(ymeth13 - ymeth1) - dnorm0*(ymeth03 - ymeth0))/D0 
+	  Dmuth2[ii] <- (Dmuthi*Dthi+Dmui*Dth2i)/2 +
+	             ethi*(dnorm1*(2*ymeth14 + (Dthi-8)*ymeth12 + 2-Dthi) -
+	                   dnorm0*(2*ymeth04 + (Dthi-8)*ymeth02 + 2-Dthi))/D0
+	  Dmu2th2[ii] <- (Dmu2thi*Dthi+Dmuthi^2 + Dmu2i*Dth2i + Dmui*Dmuth2[ii])/2 +
+	  0.5*ethi*(dnorm1*(4*ymeth15*ethi+2*Dmui*ymeth14+2*(Dthi-16)*ymeth13*ethi+
+	  2*(18-3*Dthi)*ymeth1*ethi+(2*Dmuthi+(Dthi-8)*Dmui)*ymeth12+(2-Dthi)*Dmui-2*Dmuthi)-
+	            dnorm0*(4*ymeth05*ethi+2*Dmui*ymeth04+2*(Dthi-16)*ymeth03*ethi+
+	  2*(18-3*Dthi)*ymeth0*ethi+(2*Dmuthi+(Dthi-8)*Dmui)*ymeth02+(2-Dthi)*Dmui-2*Dmuthi)
+	  )/D0
+	
+	  Dmu3th[ii] <- Dmu3i*Dthi/2 + Dmu2i*Dmuthi + Dmui*Dmu2thi/2 +
+	      0.5*ethi*(dnorm1*(4*ymeth14*e2thi + 4*Dmui*ymeth13*ethi - 12*Dmui*ymeth1*ethi +
+	                        (Dmui^2+2*Dmu2i-24*e2thi)*ymeth12+ 12*e2thi -(Dmui^2+2*Dmu2i)) -
+		        dnorm0*(4*ymeth04*e2thi + 4*Dmui*ymeth03*ethi - 12*Dmui*ymeth0*ethi +
+			        (Dmui^2+2*Dmu2i-24*e2thi)*ymeth02+ 12*e2thi -(Dmui^2+2*Dmu2i)))/D0
+        }
+	if (level>0)  Dth[ii] <- Dthi -4*dnorm(y10)*y10/Dls
+	if (level>1)  Dth2[ii] <- Dth2i - 8*(dnorm(y10)*y10/Dls)^2  - 4*dnorm(y10)*(y10^3 -y10)/Dls
+      } ## interval censored done
+      if (length(il)) { ## left censoring (y0 = -Inf, basically)
+        ethi <- eth[il];e2thi <- e2th[il];e3thi <- e3th[il]
+        ymeth1 <- (y[il]-mu[il])*ethi;dnorm1 <- dnorm(ymeth1)
+        D0 <- pnorm(ymeth1)
+	Dmui <- Dmu[il] <- 2*ethi*dnorm1/D0
+	Dmu2[il] <- Dmui^2/2 + 2*e2thi*dnorm1*ymeth1/D0
+	if (level>0) {
+	  ymeth12 <- ymeth1^2; ymeth13 <- ymeth12*ymeth1
+	  Dmu2i <- Dmu2[il]
+          Dth[il] <- Dthi <- 2*dnorm1*ymeth1/D0
+	  Dmuth[il] <- Dmuthi <- Dmui*Dthi/2 + 2*ethi*dnorm1*(ymeth12-1)/D0
+	  Dmu3[il] <- Dmui*(3*Dmu2i/2 - Dmui^2/4 - e2thi) + 2*e3thi*dnorm1*ymeth12/D0
+	  Dmu2th[il] <- (Dmu2i*Dthi+Dmui*Dmuthi)/2 +
+	  ethi*dnorm1*(2*ymeth13*ethi+ Dmui*ymeth12-6*ymeth1*ethi - Dmui)/D0
+        }
+	if (level>1) {
+	  ymeth14 <- ymeth13*ymeth1; ymeth15 <- ymeth12*ymeth13
+	  Dmu3i <- Dmu3[il]; Dmu2thi <- Dmu2th[il]
+          Dmu4[il] <- Dmu2i*(3*Dmu2i/2-Dmui^2/4-e2thi) + Dmui*(3*Dmu3i/2-Dmui*Dmu2i/2) +
+	              e3thi*dnorm1*(2*ymeth13*ethi + Dmui*ymeth12-4*ymeth1*ethi)/D0
+	  Dth2[il] <- Dth2i <- Dthi^2/2 + 2*dnorm1*(ymeth13 - ymeth1)/D0
+	  Dmuth2[il] <- (Dmuthi*Dthi+Dmui*Dth2i)/2 +
+	                ethi*dnorm1*(2*ymeth14 + (Dthi-8)*ymeth12 + 2-Dthi)/D0
+	  Dmu2th2[il] <- (Dmu2thi*Dthi+Dmuthi^2 + Dmu2i*Dth2i + Dmui*Dmuth2[il])/2 +
+	  0.5*ethi*dnorm1*(4*ymeth15*ethi+2*Dmui*ymeth14+2*(Dthi-16)*ymeth13*ethi+
+	  2*(18-3*Dthi)*ymeth1*ethi+(2*Dmuthi+(Dthi-8)*Dmui)*ymeth12+(2-Dthi)*Dmui-2*Dmuthi)/D0
+	
+	  Dmu3th[il] <- Dmu3i*Dthi/2 + Dmu2i*Dmuthi + Dmui*Dmu2thi/2 +
+	      0.5*ethi*dnorm1*(4*ymeth14*e2thi + 4*Dmui*ymeth13*ethi - 12*Dmui*ymeth1*ethi +
+	      (Dmui^2+2*Dmu2i-24*e2thi)*ymeth12+ 12*e2thi -(Dmui^2+2*Dmu2i))/D0
+        }
+      } # left censoring done
+      if (length(ir)) { ## right censoring - basically y1 = Inf
+        ethi <- eth[ir];e2thi <- e2th[ir];e3thi <- e3th[ir]
+	ymeth0 <- (y[ir]-mu[ir])*ethi;
+	D0 <- pnorm(-ymeth0)
+	dnorm0 <- dnorm(ymeth0);
+	Dmu[ir] <- Dmui <- -2*ethi*dnorm0/D0
+	Dmu2[ir] <- Dmui^2/2 - 2*e2thi*dnorm0*ymeth0/D0
+	if (level>0) {
+	  ymeth02 <- ymeth0^2; ymeth03 <- ymeth02*ymeth0
+	  Dmu2i <- Dmu2[ir]
+          Dth[ir] <- Dthi <- -2*dnorm0*ymeth0/D0
+	  Dmuth[ir] <- Dmuthi <- Dmui*Dthi/2 - 2*ethi*dnorm0*(ymeth02-1)/D0
+	  Dmu3[ir] <- Dmui*(3*Dmu2i/2 - Dmui^2/4 - e2thi) - 2*e3thi*dnorm0*ymeth02/D0
+	  Dmu2th[ir] <- (Dmu2i*Dthi+Dmui*Dmuthi)/2 -
+	                ethi*dnorm0*(2*ymeth0^3*ethi+ Dmui*ymeth02-6*ymeth0*ethi - Dmui)/D0
+        }
+	if (level>1) {
+	  ymeth04 <- ymeth03*ymeth0; ymeth05 <- ymeth02*ymeth03
+	  Dmu3i <- Dmu3[ir]; Dmu2thi <- Dmu2th[ir]
+          Dmu4[ir] <- Dmu2i*(3*Dmu2i/2-Dmui^2/4-e2thi) + Dmui*(3*Dmu3i/2-Dmui*Dmu2i/2) -
+	              e3thi*dnorm0*(2*ymeth0^3*ethi + Dmui*ymeth02-4*ymeth0*ethi)/D0
+	  Dth2[ir] <- Dthi^2/2 - 2*dnorm0*(ymeth0^3 - ymeth0)/D0
+	  Dmuth2[ir] <- (Dmuthi*Dthi+Dmui*Dth2[ir])/2 -
+	                 ethi*dnorm0*(2*ymeth04 + (Dthi-8)*ymeth02 + 2-Dthi)/D0
+	  Dmu2th2[ir] <- (Dmu2thi*Dthi+Dmuthi^2 + Dmu2i*Dth2[ir] + Dmui*Dmuth2[ir])/2 -
+	  0.5*ethi*dnorm0*(4*ymeth05*ethi+2*Dmui*ymeth04+2*(Dthi-16)*ymeth0^3*ethi+
+	  2*(18-3*Dthi)*ymeth0*ethi+(2*Dmuthi+(Dthi-8)*Dmui)*ymeth02+(2-Dthi)*Dmui-2*Dmuthi)/D0
+	
+	  Dmu3th[ir] <- Dmu3i*Dthi/2 + Dmu2i*Dmuthi + Dmui*Dmu2thi/2 -
+	      0.5*ethi*(dnorm0*(4*ymeth04*e2thi + 4*Dmui*ymeth0^3*ethi -
+              12*Dmui*ymeth0*ethi + (Dmui^2+2*Dmu2i-24*e2thi)*ymeth02+ 12*e2thi -(Dmui^2+2*Dmu2i)))/D0
+        }
+      } # right censoring done
+      r <- list(Dmu=Dmu,Dmu2=Dmu2,EDmu2=Dmu2)
+      if (level>0) {
+        r$Dth <- Dth;r$Dmuth <- Dmuth;r$Dmu3 <- Dmu3
+	r$EDmu2th <- r$Dmu2th <- Dmu2th; 
+      }
+      if (level>1) {
+        r$Dmu4 <- Dmu4; r$Dth2 <- Dth2;  r$Dmuth2 <- Dmuth2;
+	r$Dmu2th2 <- Dmu2th2; r$Dmu3th <- Dmu3th
+      }
+      r
+    } ## Dd cnorm
+
+    aic <- function(y, mu, theta=NULL, wt, dev) { ## cnorm AIC
+        if (is.null(theta)) theta <- get(".Theta")
+        th <- theta - log(wt)/2
+	yat <- attr(y,"censor")
+        if (is.null(yat)) yat <- y
+        ii <- which(is.na(yat)|yat==y) ## uncensored observations
+        d <- rep(0,length(y))
+        if (length(ii)) d[ii] <- (y[ii]-mu[ii])^2*exp(-2*th[ii]) + log(2*pi) + 2*th[ii] 
+        ii <- which(is.finite(yat)&yat!=y) ## interval censored
+        if (length(ii)) {
+          y1 <- pmax(yat[ii],y[ii]); y0 <- pmin(yat[ii],y[ii])
+          d[ii] <- - 2*log(dpnorm((y0-mu[ii])*exp(-th[ii]),(y1-mu[ii])*exp(-th[ii])))
+        }
+        ii <- which(yat == -Inf) ## left censored
+        if (length(ii)) d[ii] <- -2*pnorm((y[ii]-mu[ii])*exp(-th[ii]),log.p=TRUE)
+        ii <- which(yat == Inf) ## right censored
+        if (length(ii)) d[ii] <- -2*pnorm(-(y[ii]-mu[ii])*exp(-th[ii]),log.p=TRUE)
+ 
+        sum(d) ## -2*log likelihood
+    } ## AIC cnorm
+    
+    ls <- function(y,w,theta,scale) {
+       ## the cnorm log saturated likelihood function.
+       th <- theta - log(w)/2
+       yat <- attr(y,"censor")
+       if (is.null(yat)) yat <- y
+       ii <- which(yat==y) ## uncensored observations
+       d2 <- d1 <- d <- rep(0,length(y))
+       if (length(ii)) {
+         d[ii] <- log(2*pi)/2 - th[ii]
+	 d1[ii] <- -1
+       }	 
+       ii <- which(is.finite(yat)&yat!=y) ## interval censored
+       if (length(ii)) {
+         y1 <- pmax(yat[ii],y[ii]); y0 <- pmin(yat[ii],y[ii])
+	 y10 <- (y1-y0)*exp(-th[ii])/2
+	 d0 <- dpnorm(-y10,y10)
+         d[ii] <- log(d0)  ## log saturated likelihood
+	 d1[ii] <- -2*dnorm(y10)*y10/d0
+	 d2[ii] <- -d1[ii]^2 - 2*dnorm(y10)*y10^2*(y1-y0)/2
+       }
+       ## right or left censored saturated log likelihoods are zero.
+       list(ls=sum(d), ## saturated log likelihood
+            lsth1=sum(d1), ## first deriv vector w.r.t theta - last element relates to scale, if free
+	    LSTH1=matrix(d1,ncol=1),
+            lsth2=sum(d2)) ## Hessian w.r.t. theta, last row/col relates to scale, if free
+    } ## ls cnorm
+
+    initialize <- expression({ ## cnorm
+        if (is.matrix(y)) {
+	 .yat <- y[,2]
+	 y <- y[,1]
+	 attr(y,"censor") <- .yat
+	} 
+        mustart <- if (family$link=="identity") y else pmax(y,min(y>0))
+    })
+  
+    postproc <- function(family,y,prior.weights,fitted,linear.predictors,offset,intercept){
+      posr <- list()
+      if (is.matrix(y)) {
+	 .yat <- y[,2]
+	 y <- y[,1]
+	 attr(y,"censor") <- .yat
+      } 
+      posr$null.deviance <- find.null.dev(family,y,eta=linear.predictors,offset,prior.weights)
+      posr$family <- 
+      paste("cnorm(",round(family$getTheta(TRUE),3),")",sep="")
+      posr
+    } ## postproc cnorm
+
+    rd <- function(mu,wt,scale) { ## NOTE - not done
+      Theta <- exp(get(".Theta"))
+    }
+
+    qf <- function(p,mu,wt,scale) { ## NOTE - not done
+      Theta <- exp(get(".Theta"))
+    }
+
+    subsety <- function(y,ind) { ## function to subset response
+      if (is.matrix(y)) return(y[ind,])
+      yat <- attr(y,"censor")
+      y <- y[ind]
+      if (!is.null(yat)) attr(y,"censor") <- yat[ind]
+      y
+    } ## subsety
 
+     environment(dev.resids) <- environment(aic) <- environment(getTheta) <- 
+     environment(rd)<- environment(qf)<- environment(putTheta) <- env
+    structure(list(family = "censored normal", link = linktemp, linkfun = stats$linkfun,
+        linkinv = stats$linkinv, dev.resids = dev.resids,Dd=Dd,subsety=subsety,#variance=variance,
+        aic = aic, mu.eta = stats$mu.eta, initialize = initialize,postproc=postproc,ls=ls,
+        validmu = validmu, valideta = stats$valideta,n.theta=n.theta, 
+        ini.theta = iniTheta,putTheta=putTheta,getTheta=getTheta),#,rd=rd,qf=qf),
+        class = c("extended.family","family"))
+} ## cnorm
+
+
+#################################################
 ## extended family object for ordered categorical
+#################################################
 
 ocat <- function(theta=NULL,link="identity",R=NULL) {
 ## extended family object for ordered categorical model.
@@ -455,7 +928,7 @@
 
   ls <- function(y,w,theta,scale) {
     ## the log saturated likelihood function. 
-    return(list(ls=0,lsth1=rep(0,R-2),lsth2=matrix(0,R-2,R-2)))
+    return(list(ls=0,lsth1=rep(0,R-2),LSTH1=matrix(0,length(y),R-2),lsth2=matrix(0,R-2,R-2)))
   } ## end of ls
   
   ## initialization is interesting -- needs to be with reference to initial cut-points
@@ -618,155 +1091,6 @@
         getTheta=getTheta,no.r.sq=TRUE), class = c("extended.family","family"))
 } ## end of ocat
 
-#######################
-## negative binomial...
-#######################
-
-nb <- function (theta = NULL, link = "log") { 
-## Extended family object for negative binomial, to allow direct estimation of theta
-## as part of REML optimization. Currently the template for extended family objects.
-  linktemp <- substitute(link)
-  if (!is.character(linktemp)) linktemp <- deparse(linktemp)
-  if (linktemp %in% c("log", "identity", "sqrt")) stats <- make.link(linktemp)
-  else if (is.character(link)) {
-    stats <- make.link(link)
-    linktemp <- link
-  } else {
-    if (inherits(link, "link-glm")) {
-       stats <- link
-            if (!is.null(stats$name))
-                linktemp <- stats$name
-        }
-        else stop(linktemp, " link not available for negative binomial family; available links are \"identity\", \"log\" and \"sqrt\"")
-  }
-  ## Theta <-  NULL;
-  n.theta <- 1
-  if (!is.null(theta)&&theta!=0) {
-      if (theta>0) { 
-        iniTheta <- log(theta) ## fixed theta supplied
-        n.theta <- 0 ## signal that there are no theta parameters to estimate
-      } else iniTheta <- log(-theta) ## initial theta supplied
-  } else iniTheta <- 0 ## inital log theta value
-    
-    env <- new.env(parent = .GlobalEnv)
-    assign(".Theta", iniTheta, envir = env)
-    getTheta <- function(trans=FALSE) if (trans) exp(get(".Theta")) else get(".Theta") # get(".Theta")
-    putTheta <- function(theta) assign(".Theta", theta,envir=environment(sys.function()))
-
-    variance <- function(mu) mu + mu^2/exp(get(".Theta"))
-
-    validmu <- function(mu) all(mu > 0)
-
-    dev.resids <- function(y, mu, wt,theta=NULL) {
-      if (is.null(theta)) theta <- get(".Theta")
-      theta <- exp(theta) ## note log theta supplied
-      mu[mu<=0] <- NA
-      2 * wt * (y * log(pmax(1, y)/mu) - 
-        (y + theta) * log((y + theta)/(mu + theta))) 
-    }
-    
-    Dd <- function(y, mu, theta, wt, level=0) {
-    ## derivatives of the nb deviance...
-      ##ltheta <- theta
-      theta <- exp(theta)
-      yth <- y + theta
-      muth <- mu + theta
-      r <- list()
-      ## get the quantities needed for IRLS. 
-      ## Dmu2eta2 is deriv of D w.r.t mu twice and eta twice,
-      ## Dmu is deriv w.r.t. mu once, etc...
-      r$Dmu <- 2 * wt * (yth/muth - y/mu)
-      r$Dmu2 <- -2 * wt * (yth/muth^2 - y/mu^2)
-      r$EDmu2 <- 2 * wt * (1/mu - 1/muth) ## exact (or estimated) expected weight
-      if (level>0) { ## quantities needed for first derivatives
-        r$Dth <- -2 * wt * theta * (log(yth/muth) + (1 - yth/muth) ) 
-        r$Dmuth <- 2 * wt * theta * (1 - yth/muth)/muth
-        r$Dmu3 <- 4 * wt * (yth/muth^3 - y/mu^3)
-        r$Dmu2th <- 2 * wt * theta * (2*yth/muth - 1)/muth^2
-	r$EDmu2th <- 2 * wt / muth^2
-      } 
-      if (level>1) { ## whole damn lot
-        r$Dmu4 <- 2 * wt * (6*y/mu^4 - 6*yth/muth^4)
-        r$Dth2 <- -2 * wt * theta * (log(yth/muth) +
-                     theta*yth/muth^2 - yth/muth - 2*theta/muth + 1 +
-                     theta /yth)
-        r$Dmuth2 <- 2 * wt * theta * (2*theta*yth/muth^2 - yth/muth - 2*theta/muth + 1)/muth
-        r$Dmu2th2 <- 2 * wt * theta * (- 6*yth*theta/muth^2 + 2*yth/muth + 4*theta/muth - 1) /muth^2
-        r$Dmu3th <- 4 * wt * theta * (1 - 3*yth/muth)/muth^3
-      }
-      r
-    }
-
-    aic <- function(y, mu, theta=NULL, wt, dev) {
-        if (is.null(theta)) theta <- get(".Theta")
-        Theta <- exp(theta)
-        term <- (y + Theta) * log(mu + Theta) - y * log(mu) +
-            lgamma(y + 1) - Theta * log(Theta) + lgamma(Theta) -
-            lgamma(Theta + y)
-        2 * sum(term * wt)
-    }
-    
-    ls <- function(y,w,theta,scale) {
-       ## the log saturated likelihood function.
-       Theta <- exp(theta)
-       #vec <- !is.null(attr(theta,"vec.grad")) ## lsth by component?
-       ylogy <- y;ind <- y>0;ylogy[ind] <- y[ind]*log(y[ind])
-       term <- (y + Theta) * log(y + Theta) - ylogy +
-            lgamma(y + 1) - Theta * log(Theta) + lgamma(Theta) -
-            lgamma(Theta + y)
-       ls <- -sum(term*w)
-       ## first derivative wrt theta...
-       yth <- y+Theta
-       lyth <- log(yth)
-       psi0.yth <- digamma(yth) 
-       psi0.th <- digamma(Theta)
-       term <- Theta * (lyth - psi0.yth + psi0.th-theta)
-       #lsth <- if (vec) -term*w else -sum(term*w)
-       lsth <- -sum(term*w)
-       ## second deriv wrt theta...
-       psi1.yth <- trigamma(yth) 
-       psi1.th <- trigamma(Theta)
-       term <- Theta * (lyth - Theta*psi1.yth - psi0.yth + Theta/yth + Theta * psi1.th + psi0.th - theta -1)        
-       lsth2 <- -sum(term*w)
-       list(ls=ls, ## saturated log likelihood
-            lsth1=lsth, ## first deriv vector w.r.t theta - last element relates to scale, if free
-            lsth2=lsth2) ## Hessian w.r.t. theta, last row/col relates to scale, if free
-    }
-
-    initialize <- expression({
-        if (any(y < 0)) stop("negative values not allowed for the negative binomial family")
-        ##n <- rep(1, nobs)
-        mustart <- y + (y == 0)/6
-    })
-  
-    postproc <- function(family,y,prior.weights,fitted,linear.predictors,offset,intercept){
-      posr <- list()
-      posr$null.deviance <- find.null.dev(family,y,eta=linear.predictors,offset,prior.weights)
-      posr$family <- 
-      paste("Negative Binomial(",round(family$getTheta(TRUE),3),")",sep="")
-      posr
-    }
-
-    rd <- function(mu,wt,scale) {
-      Theta <- exp(get(".Theta"))
-      rnbinom(n=length(mu),size=Theta,mu=mu)
-    }
-
-    qf <- function(p,mu,wt,scale) {
-      Theta <- exp(get(".Theta"))
-      qnbinom(p,size=Theta,mu=mu)
-    }
- 
-
-     environment(dev.resids) <- environment(aic) <- environment(getTheta) <- 
-     environment(rd)<- environment(qf)<- environment(variance) <- environment(putTheta) <- env
-    structure(list(family = "negative binomial", link = linktemp, linkfun = stats$linkfun,
-        linkinv = stats$linkinv, dev.resids = dev.resids,Dd=Dd,variance=variance,
-        aic = aic, mu.eta = stats$mu.eta, initialize = initialize,postproc=postproc,ls=ls,
-        validmu = validmu, valideta = stats$valideta,n.theta=n.theta, 
-        ini.theta = iniTheta,putTheta=putTheta,getTheta=getTheta,rd=rd,qf=qf),
-        class = c("extended.family","family"))
-} ## nb
 
 
 ## Tweedie....
@@ -903,16 +1227,13 @@
     }
 
     ls <- function(y, w, theta, scale) {
-        ## evaluate saturated log likelihood + derivs w.r.t. working params and log(scale)
+        ## evaluate saturated log likelihood + derivs w.r.t. working params and log(scale) Tweedie
         a <- get(".a");b <- get(".b")
-	#vec <- !is.null(attr(theta,"vec.grad"))
-        LS <- w * ldTweedie(y, y, rho=log(scale), theta=theta,a=a,b=b)
-	#if (vec) lsth1 <- LS[,c(4,2)]
-	LS <- colSums(LS)
-        #if (!vec) lsth1 <- c(LS[4],LS[2])
+        Ls <- w * ldTweedie(y, y, rho=log(scale), theta=theta,a=a,b=b)
+	LS <- colSums(Ls)
 	lsth1 <- c(LS[4],LS[2]) ## deriv w.r.t. p then log scale
         lsth2 <- matrix(c(LS[5],LS[6],LS[6],LS[3]),2,2)
-        list(ls=LS[1],lsth1=lsth1,lsth2=lsth2)
+        list(ls=LS[1],lsth1=lsth1,LSTH1=Ls[,c(4,2)],lsth2=lsth2)
     }
 
  
@@ -1059,10 +1380,11 @@
     }
     
     ls <- function(y,w,theta,scale) {
-       ## the log saturated likelihood function.
+       ## the log saturated likelihood function for betar
        ## ls is defined as zero for REML/ML expression as deviance is defined as -2*log.lik 
        list(ls=0,## saturated log likelihood
             lsth1=0,  ## first deriv vector w.r.t theta - last element relates to scale
+	    LSTH1 = matrix(0,length(y),1),
             lsth2=0) ##Hessian w.r.t. theta
      }
 
@@ -1389,7 +1711,7 @@
     }
     
     ls <- function(y,w,theta,scale) {
-       ## the log saturated likelihood function.
+       ## the log saturated likelihood function for scat
        ## (Note these are correct but do not correspond to NP notes)
        if (length(w)==1) w <- rep(w,length(y))
        #vec <- !is.null(attr(theta,"vec.grad"))
@@ -1401,8 +1723,8 @@
        ## first derivative wrt theta...
        lsth2 <- matrix(0,2,2)  ## rep(0, 3)
        term <- nu2 * digamma(nu12)/2- nu2 * digamma(nu/2)/2 - 0.5*nu2nu
-       #lsth <- if (vec) cbind(w*term,-1*w) else c(sum(w*term),sum(-w))
-       lsth <- c(sum(w*term),sum(-w))
+       LSTH <- cbind(w*term,-1*w)
+       lsth <- colSums(LSTH)
        ## second deriv...      
        term <-  nu2^2 * trigamma(nu12)/4 + nu2 * digamma(nu12)/2 -
            nu2^2 * trigamma(nu/2)/4 - nu2 * digamma(nu/2)/2 + 0.5*(nu2nu)^2 - 0.5*nu2nu
@@ -1410,6 +1732,7 @@
        lsth2[1,2] <- lsth2[2,1] <- lsth2[2,2] <- 0
        list(ls=ls,## saturated log likelihood
             lsth1=lsth, ## first derivative vector wrt theta
+	    LSTH1=LSTH,
             lsth2=lsth2) ## Hessian wrt theta
     }
 
@@ -1646,12 +1969,13 @@
   }
 
   ls <- function(y,w,theta,scale) {
-       ## the log saturated likelihood function.
+       ## the log saturated likelihood function for ziP
        ## ls is defined as zero for REML/ML expression as deviance is defined as -2*log.lik
        #vec <- !is.null(attr(theta,"vec.grad"))
        #lsth1 <- if (vec) matrix(0,length(y),2) else c(0,0)
        list(ls=0,## saturated log likelihood
             lsth1=c(0,0),  ## first deriv vector w.r.t theta - last element relates to scale
+            LSTH1=matrix(0,length(y),2),
             lsth2=matrix(0,2,2)) ##Hessian w.r.t. theta
   }
 
diff -Nru mgcv-1.8-40/R/fast-REML.r mgcv-1.8-41/R/fast-REML.r
--- mgcv-1.8-40/R/fast-REML.r	2022-02-01 10:18:29.000000000 +0000
+++ mgcv-1.8-41/R/fast-REML.r	2022-08-22 06:55:50.000000000 +0000
@@ -247,7 +247,8 @@
       Sl[[b]] <- Sl[[b]]$updateS(Sl[[b]]$lambda,Sl[[b]])
       lambda <- c(lambda,Sl[[b]]$lambda)
       if (sparse) {
-        E0 <- as(Sl[[b]]$St(Sl[[b]],1)$E,"dgTMatrix")
+        ## E0 <- as(Sl[[b]]$St(Sl[[b]],1)$E,"dgTMatrix") deprecated
+	E0 <- as(as(as(Sl[[b]]$St(Sl[[b]],1)$E, "dMatrix"), "generalMatrix"), "TsparseMatrix")
 	E$i[[b]] <- E0@i + Sl[[b]]$start
 	E$j[[b]] <- E0@j + Sl[[b]]$start
 	E$x[[b]] <- E0@x
@@ -319,7 +320,8 @@
 	        sqrt(Sl[[b]]$ev+Sl[[b]]$nl.reg)*t(Sl[[b]]$U)
         D.norm <- norm(D); D <- D/D.norm
         if (sparse) {
-	  D <- as(D,"dgTMatrix")
+	  ## D <- as(D,"dgTMatrix") deprecated
+	  D <- as(as(as(D, "dMatrix"), "generalMatrix"), "TsparseMatrix")
 	  E$i[[b]] <- D@i + Sl[[b]]$start
 	  E$j[[b]] <- D@j + Sl[[b]]$start
 	  E$x[[b]] <- D@x
@@ -380,7 +382,8 @@
       St <- (t(St) + St)/2  ## avoid over-zealous chol sym check
       St <- t(mroot(St,Sl[[b]]$rank))
       if (sparse) {
-        St <- as(St,"dgTMatrix")
+        ## St <- as(St,"dgTMatrix") - deprecated
+	St <- as(as(as(St, "dMatrix"), "generalMatrix"), "TsparseMatrix")
         E$i[[b]] <- St@i + Sl[[b]]$start
 	E$j[[b]] <- St@j + Sl[[b]]$start
 	E$x[[b]] <- St@x
@@ -765,7 +768,8 @@
       k.sp <- k.sp + Sl[[b]]$n.sp
       Sl[[b]]$lambda <- rho[ind] ## not really used in non-linear interface
       if (root) if (sparse) {
-        E0 <- as(Sl[[b]]$St(Sl[[b]],1)$E,"dgTMatrix")
+        ## E0 <- as(Sl[[b]]$St(Sl[[b]],1)$E,"dgTMatrix") - deprecated
+	E0 <- as(as(as(Sl[[b]]$St(Sl[[b]],1)$E, "dMatrix"), "generalMatrix"), "TsparseMatrix")
 	E$i[[b]] <- E0@i + Sl[[b]]$start
 	E$j[[b]] <- E0@j + Sl[[b]]$start
 	E$x[[b]] <- E0@x
@@ -794,8 +798,11 @@
         } else { ## root has to be in original parameterization...
           if (sparse) {
 	    ## dgTMatrix is triplet form, which makes combining easier...
-	    D <- if (is.null(Sl[[b]]$nl.reg)) as(Sl[[b]]$Di[1:Sl[[b]]$rank,]* exp(rho[k.sp]*.5),"dgTMatrix") else
-	         as(sqrt(Sl[[b]]$ev*exp(rho[k.sp])+Sl[[b]]$nl.reg)*t(Sl[[b]]$U),"dgTMatrix")
+	    #D <- if (is.null(Sl[[b]]$nl.reg)) as(Sl[[b]]$Di[1:Sl[[b]]$rank,]* exp(rho[k.sp]*.5),"dgTMatrix") else
+	    #     as(sqrt(Sl[[b]]$ev*exp(rho[k.sp])+Sl[[b]]$nl.reg)*t(Sl[[b]]$U),"dgTMatrix")
+	    D <- if (is.null(Sl[[b]]$nl.reg)) as(as(as(Sl[[b]]$Di[1:Sl[[b]]$rank,]* exp(rho[k.sp]*.5), "dMatrix"),
+	         "generalMatrix"), "TsparseMatrix") else as(as(as(sqrt(Sl[[b]]$ev*exp(rho[k.sp])+Sl[[b]]$nl.reg)*
+		 t(Sl[[b]]$U) , "dMatrix"), "generalMatrix"), "TsparseMatrix")	 
 	    E$i[[b]] <- D@i + Sl[[b]]$start
 	    E$j[[b]] <- D@j + Sl[[b]]$start
 	    E$x[[b]] <- D@x
@@ -854,7 +861,8 @@
       if (Sl[[b]]$repara) {
         if (root) { ## unpack the square root E'E
           if (sparse) {
-            E0 <- as(grp$E,"dgTMatrix")
+            # E0 <- as(grp$E,"dgTMatrix") deprecated
+	    E0 <- as(as(as(grp$E, "dMatrix"), "generalMatrix"), "TsparseMatrix")
 	    E$i[[b]] <- E0@i + Sl[[b]]$start
 	    E$j[[b]] <- E0@j + Sl[[b]]$start
 	    E$x[[b]] <- E0@x
@@ -878,7 +886,8 @@
         if (root) {
           Eb <- t(mroot(Sl[[b]]$St,Sl[[b]]$rank))
 	  if (sparse) {
-	    Eb <- as(Eb,"dgTMatrix")
+	    # Eb <- as(Eb,"dgTMatrix") - deprecated
+	    Eb <- as(as(as(Eb, "dMatrix"), "generalMatrix"), "TsparseMatrix")
 	    E$i[[b]] <- Eb@i + Sl[[b]]$start
 	    E$j[[b]] <- Eb@j + Sl[[b]]$start
 	    E$x[[b]] <- Eb@x
diff -Nru mgcv-1.8-40/R/gam.fit3.r mgcv-1.8-41/R/gam.fit3.r
--- mgcv-1.8-40/R/gam.fit3.r	2022-02-28 10:51:29.000000000 +0000
+++ mgcv-1.8-41/R/gam.fit3.r	2022-10-19 14:31:41.000000000 +0000
@@ -1,5 +1,5 @@
 ## R routines for gam fitting with calculation of derivatives w.r.t. sp.s
-## (c) Simon Wood 2004-2013
+## (c) Simon Wood 2004-2022
 
 ## These routines are for type 3 gam fitting. The basic idea is that a P-IRLS
 ## is run to convergence, and only then is a scheme for evaluating the 
@@ -69,7 +69,7 @@
             mustart = NULL, offset = rep(0, nobs),U1=diag(ncol(x)), Mp=-1, family = gaussian(), 
             control = gam.control(), intercept = TRUE,deriv=2,
             gamma=1,scale=1,printWarn=TRUE,scoreType="REML",null.coef=rep(0,ncol(x)),
-            pearson.extra=0,dev.extra=0,n.true=-1,Sl=NULL,...) {
+            pearson.extra=0,dev.extra=0,n.true=-1,Sl=NULL,nei=NULL,...) {
 ## Inputs:
 ## * x model matrix
 ## * y response
@@ -106,13 +106,13 @@
     if (inherits(family,"extended.family")) { ## then actually gam.fit4/5 is needed
       if (inherits(family,"general.family")) {
         return(gam.fit5(x,y,sp,Sl=Sl,weights=weights,offset=offset,deriv=deriv,
-                        family=family,control=control,Mp=Mp,start=start,gamma=gamma))
+                        family=family,scoreType=scoreType,control=control,Mp=Mp,start=start,gamma=gamma,nei=nei))
       } else
       return(gam.fit4(x, y, sp, Eb,UrS=UrS,
             weights = weights, start = start, etastart = etastart, 
             mustart = mustart, offset = offset,U1=U1, Mp=Mp, family = family, 
             control = control, deriv=deriv,gamma=gamma,
-            scale=scale,scoreType=scoreType,null.coef=null.coef,...))
+            scale=scale,scoreType=scoreType,null.coef=null.coef,nei=nei,...))
     }
 
     if (family$link==family$canonical) fisher <- TRUE else fisher=FALSE 
@@ -150,8 +150,7 @@
  #       for (i in 1:length(sp)) {
  #         spp <- sp; spp[i] <- spp[i] + eps/2
  #         rp1 <- gam.reparam(UrS,spp,grderiv)
- #         spp[i] <- spp[i] - eps
- #         rp0 <- gam.reparam(UrS,spp,grderiv)
+ #         spp[i] <- spp[i] - eps #         rp0 <- gam.reparam(UrS,spp,grderiv)
  #         fd.grad[i] <- (rp1$det-rp0$det)/eps
  #       }
  #       print(fd.grad)
@@ -232,7 +231,7 @@
 
     D1 <- D2 <- P <- P1 <- P2 <- trA <- trA1 <- trA2 <- 
         GCV<- GCV1<- GCV2<- GACV<- GACV1<- GACV2<- UBRE <-
-        UBRE1<- UBRE2<- REML<- REML1<- REML2 <-NULL
+        UBRE1<- UBRE2<- REML<- REML1<- REML2 <- NCV <- NCV1 <- NULL
 
     if (EMPTY) {
         eta <- rep.int(0, nobs) + offset
@@ -257,7 +256,7 @@
         scale.est <- alpha / (nobs - trA)
     } ### end if (EMPTY)
     else {
-        ##coefold <- NULL
+       
         eta <- if (!is.null(etastart)) 
             etastart
         else if (!is.null(start)) 
@@ -271,12 +270,9 @@
                 else x %*% start)
             }
         else family$linkfun(mustart)
-        #etaold <- eta
-        ##muold <- 
+       
         mu <- linkinv(eta)
-        #if (!(validmu(mu) && valideta(eta))) 
-        #    stop("Can't find valid starting values: please specify some")
-    
+      
         boundary <- conv <- FALSE
         rV=matrix(0,ncol(x),ncol(x))   
        
@@ -330,8 +326,7 @@
               w <- weg*alpha*mevg^2/var.mug
             }
 
-            ## Here a Fortran call has been replaced by pls_fit1 call
-           
+            
             if (sum(good)<ncol(x)) stop("Not enough informative observations.")
             if (control$trace) t1 <- proc.time()
 
@@ -459,7 +454,6 @@
                   old.pdev <- pdev
                   coef <- coefold <- start
                   etaold <- eta 
-                  ##muold <- mu
                 } else {
                   conv <- TRUE
                   coef <- start
@@ -475,7 +469,7 @@
        
         wdr <- dev.resids(y, mu, weights)
         dev <- sum(wdr) 
-        wdr <- sign(y-mu)*sqrt(pmax(wdr,0)) ## used below in scale estimation 
+        #wdr <- sign(y-mu)*sqrt(pmax(wdr,0)) ## used below in scale estimation 
   
         ## Now call the derivative calculation scheme. This requires the
         ## following inputs:
@@ -548,7 +542,7 @@
          if (control$trace) cat("calling gdi...")
 
        REML <- 0 ## signals GCV/AIC used
-       if (scoreType%in%c("REML","P-REML")) {REML <- 1;remlInd <- 1} else 
+       if (scoreType%in%c("REML","P-REML","NCV")) {REML <- 1;remlInd <- 1} else 
        if (scoreType%in%c("ML","P-ML")) {REML <- -1;remlInd <- 0} 
 
        if (REML==0) rSncol <- unlist(lapply(rS,ncol)) else rSncol <- unlist(lapply(UrS,ncol))
@@ -575,9 +569,9 @@
            cat("done!\n")
          }
  
-         ## get dbeta/drho, directly in original parameterization
-         db.drho <- if (deriv) T%*%matrix(oo$b1,ncol(x),nSp) else NULL
-         dw.drho <- if (deriv) matrix(oo$w1,length(z),nSp) else NULL
+         ## get dbeta/drho, original parameterization restored on return
+         db.drho <- if (deriv) matrix(oo$b1,ncol(x),nSp) else NULL
+         dw.drho <- if (deriv) matrix(oo$w1,length(z),nSp) else NULL ## NOTE: only deriv of Newton weights if REML=1 in gdi1 call
 
          rV <- matrix(oo$rV,ncol(x),ncol(x)) ## rV%*%t(rV)*scale gives covariance matrix 
          
@@ -612,7 +606,8 @@
            scale.est <- (dev+dev.extra)/(n.true-trA)
          }
 
-        reml.scale <- NA  
+        reml.scale <- NA
+	Vg=NULL ## empirical cov matrix for grad (see NCV)
 
         if (scoreType%in%c("REML","ML")) { ## use Laplace (RE)ML
           
@@ -667,7 +662,83 @@
             }
           }
  
-        } else { ## Not REML ....
+        } else if (scoreType %in% "NCV") { ## neighbourhood cross validation
+	   ## requires that neighbours are supplied in nei (if NULL) each point is its own
+	   ## neighbour recovering LOOCV. nei$k[nei$m[i-1]+1):nei$m[i]] are the indices of
+	   ## neighbours of point i, where nei$m[0]=0 by convention.
+	 
+	   ww <- w1 <- rep(0,nobs)
+	   ww[good] <- weg*(yg - mug)*mevg/var.mug
+	   w1[good] <- w
+	   eta.cv <- rep(0.0,length(nei$i))
+	   deta.cv <- if (deriv) matrix(0.0,length(nei$i),length(rS)) else matrix(0.0,1,length(rS))
+           if (nei$jackknife>2) { ## need coef changes for each NCV drop fold.
+             dd <- matrix(0,ncol(x),length(nei$m))
+	     if (deriv>0) stop("jackknife and derivatives requested together")
+	     deriv1 <- -1
+           } else { ## dd unused
+             dd <- matrix(1.0,1,1);
+	     deriv1 <- deriv
+	   }  
+	   R <- try(chol(crossprod(x,w1*x)+St),silent=TRUE)
+	   if (inherits(R,"try-error")) { ## use CG approach...
+	     Hi <- tcrossprod(rV) ## inverse of penalized Expected Hessian - inverse actual Hessian probably better
+             cg.iter <- .Call(C_ncv,x,Hi,ww,w1,db.drho,dw.drho,rS,nei$i-1,nei$mi,nei$m,nei$k-1,coef,exp(sp),eta.cv, deta.cv, dd, deriv1);
+	     warn[[length(warn)+1]] <- "NCV positive definite update check not possible"
+           } else { ## use Cholesky update approach
+	     pdef.fails <- .Call(C_Rncv,x,R,ww,w1,db.drho,dw.drho,rS,nei$i-1,nei$mi,nei$m,nei$k-1,coef,exp(sp),eta.cv, deta.cv,
+	                         dd, deriv1,.Machine$double.eps,control$ncv.threads);
+	     if (pdef.fails) warn[[length(warn)+1]] <- "some NCV updates not positive definite"
+	   }
+	  
+	   if (family$qapprox) {
+             NCV <- sum(wdr[nei$i]) + gamma*sum(-2*ww[nei$i]*(eta.cv-eta[nei$i]) + w1[nei$i]*(eta.cv-eta[nei$i])^2)
+             if (deriv) {
+	       deta <- x%*%db.drho
+	       alpha1 <- if (fisher) 0 else (-(V1+g2) + (y-mu)*(V2-V1^2+g3-g2^2))/alpha
+	       w3 <- w1/g1*(alpha1 - V1 - 2 * g2)
+               ncv1 <- -2*ww[nei$i]*((1-gamma)*deta[nei$i,] + gamma*deta.cv) + 2*gamma*w1[nei$i]*(deta.cv*(eta.cv-eta[nei$i])) +
+	                gamma*w3[nei$i]* deta[nei$i,]*(eta.cv-eta[nei$i])^2
+             }
+           } else { ## exact version
+             if (TRUE) {
+	       ## version that doesn't just drop neighbourhood, but tries equivalent (gamma==2) perturbation beyond dropping
+               eta.cv <- gamma*(eta.cv) - (gamma-1)*eta[nei$i]
+	       if (deriv && gamma!=1) deta.cv <- gamma*(deta.cv) - (gamma-1)*(x%*%db.drho)[nei$i,,drop=FALSE]
+	       gamma <- 1
+             }
+     	     mu.cv <- linkinv(eta.cv)
+	     NCV <- gamma*sum(dev.resids(y[nei$i],mu.cv,weights[nei$i])) - (gamma-1)*sum(wdr[nei$i]) ## the NCV score - simply LOOCV if nei(i) = i for all i
+	    
+             if (deriv) {
+               dev1 <- if (gamma==1) 0 else -2*(ww*(x%*%db.drho))[nei$i,,drop=FALSE]
+               var.mug <- variance(mu.cv)
+               mevg <- mu.eta(eta.cv)
+	       mug <- mu.cv
+	       ww1 <- weights[nei$i]*(y[nei$i]-mug)*mevg/var.mug
+	       ww1[!is.finite(ww1)] <- 0
+	       ncv1 <- -2*ww1*deta.cv*gamma - (gamma-1)*dev1
+	       #gjk <- colSums(ww1*x[nei$i,]) ## jackknife deriv of log lik estimate (multiplied by scale)
+             } ## if deriv
+	   }
+
+           if (deriv) {
+             NCV1 <- colSums(ncv1) ## grad
+	     #attr(NCV1,"gjk") <- gjk ## jackknife grad estimate - incomplete - need qapprox version as well
+	     Vg <- crossprod(ncv1) ## empirical cov matrix of grad
+           }
+
+	   if (nei$jackknife>2) {
+             nk <- c(nei$m[1],diff(nei$m)) ## dropped fold sizes
+             jkw <- sqrt((nobs-nk)/(nobs*nk)) ## jackknife weights
+	     dd <-jkw*t(dd)%*%t(T)
+	     Vj <- crossprod(dd) ## jackknife cov matrix
+             attr(Vj,"dd") <- dd
+             attr(NCV,"Vj") <- Vj
+	   }  
+	   attr(NCV,"eta.cv") <- eta.cv
+	   if (deriv) attr(NCV,"deta.cv") <- deta.cv
+	} else { ## GCV/GACV etc ....
 
            P <- oo$P
            
@@ -753,10 +824,13 @@
     names(wt) <- ynames
     names(weights) <- ynames
     names(y) <- ynames
-    if (deriv && nrow(dw.drho)!=nrow(x)) {
-      w1 <- dw.drho
-      dw.drho <- matrix(0,nrow(x),ncol(w1))
-      dw.drho[good,] <- w1
+    if (deriv) {
+      db.drho <- T%*%db.drho
+      if (nrow(dw.drho)!=nrow(x)) {
+        w1 <- dw.drho
+        dw.drho <- matrix(0,nrow(x),ncol(w1))
+        dw.drho[good,] <- w1
+      }
     }
     
     sumw <- sum(weights)
@@ -782,9 +856,9 @@
          family = family, linear.predictors = eta, deviance = dev, 
         null.deviance = nulldev, iter = iter, weights = wt, working.weights=ww,prior.weights = weights, z=z,
         df.null = nulldf, y = y, converged = conv,##pearson.warning = pearson.warning,
-        boundary = boundary,D1=D1,D2=D2,P=P,P1=P1,P2=P2,trA=trA,trA1=trA1,trA2=trA2,
+        boundary = boundary,D1=D1,D2=D2,P=P,P1=P1,P2=P2,trA=trA,trA1=trA1,trA2=trA2,NCV=NCV,NCV1=NCV1,
         GCV=GCV,GCV1=GCV1,GCV2=GCV2,GACV=GACV,GACV1=GACV1,GACV2=GACV2,UBRE=UBRE,
-        UBRE1=UBRE1,UBRE2=UBRE2,REML=REML,REML1=REML1,REML2=REML2,rV=rV,db.drho=db.drho,
+        UBRE1=UBRE1,UBRE2=UBRE2,REML=REML,REML1=REML1,REML2=REML2,rV=rV,Vg=Vg,db.drho=db.drho,
         dw.drho=dw.drho,dVkk = matrix(oo$dVkk,nSp,nSp),ldetS1 = if (grderiv) rp$det1 else 0,
         scale.est=scale.est,reml.scale= reml.scale,aic=aic.model,rank=oo$rank.est,K=Kmat,warn=warn)
 } ## end gam.fit3
@@ -875,7 +949,7 @@
   vcorr(dR,Vr,FALSE) ## NOTE: unscaled!!
 } ## Vb.corr
 
-gam.fit3.post.proc <- function(X,L,lsp0,S,off,object) {
+gam.fit3.post.proc <- function(X,L,lsp0,S,off,object,gamma) {
 ## get edf array and covariance matrices after a gam fit. 
 ## X is original model matrix, L the mapping from working to full sp
   scale <- if (object$scale.estimated) object$scale.est else object$scale
@@ -895,8 +969,19 @@
   ## get QR factor R of WX - more efficient to do this
   ## in gdi_1 really, but that means making QR of augmented 
   ## a two stage thing, so not clear cut...
-  qrx <- pqr(sqrt(object$weights)*X,object$control$nthreads)
+  WX <- sqrt(object$weights)*X
+  qrx <- pqr(WX,object$control$nthreads)
   R <- pqr.R(qrx);R[,qrx$pivot] <- R
+#  if (gamma!=1) { ## compute Vp assuming gamma is inverse learning rate - wrong parameterization s.t. Vp*gamma is it!
+#    H <- crossprod(WX)/gamma
+#    lsp <- lsp0 + if (is.null(L)) log(object$sp) else L %*% log(object$sp)
+#    sp <- exp(lsp)
+#    if (length(S)) for (i in 1:length(S)) {
+#      ii <- 1:nrow(S[[i]]) + off[i] - 1
+#      H[ii,ii] <- H[ii,ii] + sp[i] * S[[i]]
+#    }
+#    Vl <- chol2inv(chol(H))*scale
+#  } else Vl <- NULL
   if (!is.na(object$reml.scale)&&!is.null(object$db.drho)) { ## compute sp uncertainty correction
     hess <- object$outer.info$hess
     edge.correct <- if (is.null(attr(hess,"edge.correct"))) FALSE else TRUE
@@ -948,6 +1033,7 @@
         }
       }
     } ## k loop
+
     V.sp <- Vr;attr(V.sp,"L") <- L;attr(V.sp,"spind") <- spind <- (nth+1):M
     ## EXPERIMENTAL ## NOTE: no L handling - what about incomplete z/w??
     #P <- Vb %*% X/scale
@@ -972,7 +1058,9 @@
     }
     ## END EXPERIMENTAL
   } else V.sp <- edf2 <- Vc <- NULL
-  list(Vc=Vc,Vp=Vb,Ve=Ve,V.sp=V.sp,edf=edf,edf1=edf1,edf2=edf2,hat=hat,F=F,R=R)
+  ret <- list(Vp=Vb,Ve=Ve,V.sp=V.sp,edf=edf,edf1=edf1,edf2=edf2,hat=hat,F=F,R=R)
+  if (is.null(object$Vc)) ret$Vc <- Vc
+  ret
 } ## gam.fit3.post.proc
 
 
@@ -984,6 +1072,7 @@
 ## plot a transect through the score for sp[ii]
   np <- 200
   if (scoreType%in%c("REML","P-REML","ML","P-ML")) reml <- TRUE else reml <- FALSE
+  
   score <- spi <- seq(-30,30,length=np)
   for (i in 1:np) {
 
@@ -1015,15 +1104,16 @@
             mustart = NULL, offset = rep(0, length(y)),U1,Mp,family = gaussian(), 
             control = gam.control(), intercept = TRUE,deriv=2,
             gamma=1,scale=1,printWarn=TRUE,scoreType="REML",eps=1e-7,
-            null.coef=rep(0,ncol(x)),Sl=Sl,...)
+            null.coef=rep(0,ncol(x)),Sl=Sl,nei=nei,...)
 ## FD checking of derivatives: basically a debugging routine
-{  if (!deriv%in%c(1,2)) stop("deriv should be 1 or 2")
+{  
+   if (!deriv%in%c(1,2)) stop("deriv should be 1 or 2")
    if (control$epsilon>1e-9) control$epsilon <- 1e-9 
    b<-gam.fit3(x=x, y=y, sp=sp,Eb=Eb,UrS=UrS,
       offset = offset,U1=U1,Mp=Mp,family = family,weights=weights,deriv=deriv,
       control=control,gamma=gamma,scale=scale,printWarn=FALSE,
       start=start,etastart=etastart,mustart=mustart,scoreType=scoreType,
-      null.coef=null.coef,Sl=Sl,...)
+      null.coef=null.coef,Sl=Sl,nei=nei,...)
 
    P0 <- b$P;fd.P1 <- P10 <- b$P1;  if (deriv==2) fd.P2 <- P2 <- b$P2 
    trA0 <- b$trA;fd.gtrA <- gtrA0 <- b$trA1 ; if (deriv==2) fd.htrA <- htrA <- b$trA2 
@@ -1031,17 +1121,13 @@
    fd.db <- b$db.drho*0
    
    if (scoreType%in%c("REML","P-REML","ML","P-ML")) reml <- TRUE else reml <- FALSE
+   sname <- if (reml) "REML" else scoreType
+   sname1 <- paste(sname,"1",sep=""); sname2 <- paste(sname,"2",sep="")
+   if (scoreType=="NCV") reml <- TRUE ## to avoid un-needed stuff
+
+   score0 <- b[[sname]];grad0 <- b[[sname1]]; if (deriv==2) hess <- b[[sname2]] 
+
 
-   if (reml) {
-     score0 <- b$REML;grad0 <- b$REML1; if (deriv==2) hess <- b$REML2 
-   } else if (scoreType=="GACV") {
-     score0 <- b$GACV;grad0 <- b$GACV1;if (deriv==2) hess <- b$GACV2 
-   } else if (scoreType=="UBRE"){
-     score0 <- b$UBRE;grad0 <- b$UBRE1;if (deriv==2) hess <- b$UBRE2 
-   } else { ## default to deviance based GCV
-     score0 <- b$GCV;grad0 <- b$GCV1;if (deriv==2) hess <- b$GCV2
-   }
-  
    fd.grad <- grad0*0
    if (deriv==2) fd.hess <- hess
    diter <- rep(20,length(sp))
@@ -1051,14 +1137,14 @@
       offset = offset,U1=U1,Mp=Mp,family = family,weights=weights,deriv=deriv,
       control=control,gamma=gamma,scale=scale,printWarn=FALSE,
       start=start,etastart=etastart,mustart=mustart,scoreType=scoreType,
-      null.coef=null.coef,Sl=Sl,...)
+      null.coef=null.coef,Sl=Sl,nei=nei,...)
       
      sp1 <- sp;sp1[i] <- sp[i]-eps/2
      bb<-gam.fit3(x=x, y=y, sp=sp1, Eb=Eb,UrS=UrS,
       offset = offset,U1=U1,Mp=Mp,family = family,weights=weights,deriv=deriv,
       control=control,gamma=gamma,scale=scale,printWarn=FALSE,
       start=start,etastart=etastart,mustart=mustart,scoreType=scoreType,
-     null.coef=null.coef,Sl=Sl,...)
+     null.coef=null.coef,Sl=Sl,nei=nei,...)
      diter[i] <- bf$iter - bb$iter ## check iteration count same 
      if (i<=ncol(fd.db)) fd.db[,i] <- (bf$coefficients - bb$coefficients)/eps
 
@@ -1071,21 +1157,9 @@
         devb <- bb$deviance;devf <- bf$deviance
         D1b <- bb$D1;D1f <- bf$D1
       }
-     
 
-      if (reml) {
-        scoreb <- bb$REML;scoref <- bf$REML;
-        if (deriv==2) { gradb <- bb$REML1;gradf <- bf$REML1}
-      } else if (scoreType=="GACV") {
-        scoreb <- bb$GACV;scoref <- bf$GACV;
-        if (deriv==2) { gradb <- bb$GACV1;gradf <- bf$GACV1}
-      } else if (scoreType=="UBRE"){
-        scoreb <- bb$UBRE; scoref <- bf$UBRE;
-        if (deriv==2) { gradb <- bb$UBRE1;gradf <- bf$UBRE1} 
-      } else { ## default to deviance based GCV
-        scoreb <- bb$GCV;scoref <- bf$GCV;
-        if (deriv==2) { gradb <- bb$GCV1;gradf <- bf$GCV1}
-      }
+      scoreb <- bb[[sname]];scoref <- bf[[sname]];
+      if (deriv==2) { gradb <- bb[[sname1]];gradf <- bf[[sname1]]}
 
       if (!reml) {
         fd.P1[i] <- (Pf-Pb)/eps
@@ -1139,10 +1213,11 @@
    plot(b$db.drho,fd.db,pch=".")
    for (i in 1:ncol(fd.db)) points(b$db.drho[,i],fd.db[,i],pch=19,cex=.3,col=i)
 
+  
    cat("\n\n The objective...\n")
    cat("diter    ");print(diter)
-   cat("grad    ");print(grad0)
-   cat("fd.grad ");print(fd.grad)
+   cat("grad    ");print(as.numeric(grad0))
+   cat("fd.grad ");print(as.numeric(fd.grad))
    if (deriv==2) {
      fd.hess <- .5*(fd.hess + t(fd.hess))
      cat("hess\n");print(hess)
@@ -1191,11 +1266,12 @@
 simplyFit <- function(lsp,X,y,Eb,UrS,L,lsp0,offset,U1,Mp,family,weights,
                    control,gamma,scale,conv.tol=1e-6,maxNstep=5,maxSstep=2,
                    maxHalf=30,printWarn=FALSE,scoreType="deviance",
-                   mustart = NULL,null.coef=rep(0,ncol(X)),Sl=Sl,...)
+                   mustart = NULL,null.coef=rep(0,ncol(X)),Sl=Sl,nei=NULL,...)
 ## function with same argument list as `newton' and `bfgs' which simply fits
 ## the model given the supplied smoothing parameters...
 { reml <- scoreType%in%c("REML","P-REML","ML","P-ML") ## REML/ML indicator
-
+  sname <- if (reml) "REML" else scoreType
+  
   ## sanity check L
   if (is.null(L)) L <- diag(length(lsp)) else {
     if (!inherits(L,"matrix")) stop("L must be a matrix.")
@@ -1208,17 +1284,10 @@
   b<-gam.fit3(x=X, y=y, sp=L%*%lsp+lsp0, Eb=Eb,UrS=UrS,
      offset = offset,U1=U1,Mp=Mp,family = family,weights=weights,deriv=0,
      control=control,gamma=gamma,scale=scale,
-     printWarn=FALSE,mustart=mustart,scoreType=scoreType,null.coef=null.coef,Sl=Sl,...)
+     printWarn=FALSE,mustart=mustart,scoreType=scoreType,null.coef=null.coef,Sl=Sl,nei=nei,...)
 
   if (!is.null(b$warn)&&length(b$warn)>0) for (i in 1:length(b$warn)) warning(b$warn[[i]])
-
-  if (reml) {       
-          score <- b$REML
-  } else if (scoreType=="GACV") {
-          score <- b$GACV
-  } else if (scoreType=="UBRE") {
-          score <- b$UBRE
-  } else score <- b$GCV
+  score <- b[[sname]]
 
   list(score=score,lsp=lsp,lsp.full=L%*%lsp+lsp0,grad=NULL,hess=NULL,score.hist=NULL,iter=0,conv =NULL,object=b)
 
@@ -1229,7 +1298,7 @@
                    control,gamma,scale,conv.tol=1e-6,maxNstep=5,maxSstep=2,
                    maxHalf=30,printWarn=FALSE,scoreType="deviance",start=NULL,
                    mustart = NULL,null.coef=rep(0,ncol(X)),pearson.extra,
-                   dev.extra=0,n.true=-1,Sl=NULL,edge.correct=FALSE,...)
+                   dev.extra=0,n.true=-1,Sl=NULL,edge.correct=FALSE,nei=NULL,...)
 ## Newton optimizer for GAM reml/gcv/aic optimization that can cope with an 
 ## indefinite Hessian. Main enhancements are: 
 ## i) always perturbs the Hessian to +ve definite if indefinite 
@@ -1247,6 +1316,9 @@
 
   reml <- scoreType%in%c("REML","P-REML","ML","P-ML") ## REML/ML indicator
 
+  sname <- if (reml) "REML" else scoreType
+  sname1 <- paste(sname,"1",sep=""); sname2 <- paste(sname,"2",sep="")
+
   ## sanity check L
   if (is.null(L)) L <- diag(length(lsp)) else {
     if (!inherits(L,"matrix")) stop("L must be a matrix.")
@@ -1282,19 +1354,11 @@
          offset = offset,U1=U1,Mp=Mp,family = family,weights=weights,deriv=deriv,
          control=control,gamma=gamma,scale=scale,
          printWarn=FALSE,start=start,mustart=mustart,
-         scoreType=scoreType,eps=eps,null.coef=null.coef,Sl=Sl,...)
+         scoreType=scoreType,eps=eps,null.coef=null.coef,Sl=Sl,nei=nei,...)
 
      
   }
 
-#  ii <- 0
-#  if (ii>0) {
-#    score.transect(ii,x=X, y=y, sp=L%*%lsp+lsp0, Eb=Eb,UrS=UrS,
-#         offset = offset,U1=U1,Mp=Mp,family = family,weights=weights,deriv=deriv,
-#         control=control,gamma=gamma,scale=scale,
-#         printWarn=FALSE,mustart=mustart,
-#         scoreType=scoreType,eps=eps,null.coef=null.coef,...)
-#  }
   ## ... end of debugging code 
 
 
@@ -1304,21 +1368,12 @@
      offset = offset,U1=U1,Mp=Mp,family = family,weights=weights,deriv=2,
      control=control,gamma=gamma,scale=scale,printWarn=FALSE,start=start,
      mustart=mustart,scoreType=scoreType,null.coef=null.coef,pearson.extra=pearson.extra,
-     dev.extra=dev.extra,n.true=n.true,Sl=Sl,...)
+     dev.extra=dev.extra,n.true=n.true,Sl=Sl,nei=nei,...)
 
   mustart <- b$fitted.values
   etastart <- b$linear.predictors
   start <- b$coefficients
-
-  if (reml) {
-     old.score <- score <- b$REML;grad <- b$REML1;hess <- b$REML2 
-  } else if (scoreType=="GACV") {
-    old.score <- score <- b$GACV;grad <- b$GACV1;hess <- b$GACV2 
-  } else if (scoreType=="UBRE"){
-    old.score <- score <- b$UBRE;grad <- b$UBRE1;hess <- b$UBRE2 
-  } else { ## default to deviance based GCV
-    old.score <- score <- b$GCV;grad <- b$GCV1;hess <- b$GCV2
-  }
+  old.score <- score <- b[[sname]];grad <- b[[sname1]];hess <- b[[sname2]]
   
   grad <- t(L)%*%grad
   hess <- t(L)%*%hess%*%L
@@ -1356,13 +1411,13 @@
          offset = offset,U1=U1,Mp=Mp,family = family,weights=weights,deriv=trial.der,
          control=control,gamma=gamma,scale=scale,printWarn=FALSE,start=start,
          mustart=mustart,scoreType=scoreType,null.coef=null.coef,
-         pearson.extra=pearson.extra,dev.extra=dev.extra,n.true=n.true,Sl=Sl,...)  
+         pearson.extra=pearson.extra,dev.extra=dev.extra,n.true=n.true,Sl=Sl,nei=nei,...)  
      }
      deriv.check(x=X, y=y, sp=L%*%lsp+lsp0, Eb=Eb,UrS=UrS,
          offset = offset,U1=U1,Mp=Mp,family = family,weights=weights,deriv=deriv,
          control=control,gamma=gamma,scale=scale,
          printWarn=FALSE,etastart=etastart,start=start,
-         scoreType=scoreType,eps=eps,null.coef=null.coef,Sl=Sl,...)
+         scoreType=scoreType,eps=eps,null.coef=null.coef,Sl=Sl,nei=nei,...)
      if (inherits(family,"general.family")) { ## call gam.fit5 checking
        eps <- 1e-6
        spe <- 1e-3
@@ -1370,18 +1425,11 @@
             weights = weights, start = start,
             offset = offset,Mp=Mp,family = family, 
             control = control,deriv=deriv,eps=eps,spe=spe,
-            Sl=Sl,...) ## ignore codetools warning
+            Sl=Sl,nei=nei,...) ## ignore codetools warning
    
      }
    } ## end of derivative checking
-#    ii <- 0
-#    if (ii>0) {
-#    score.transect(ii,x=X, y=y, sp=L%*%lsp+lsp0, Eb=Eb,UrS=UrS,
-#         offset = offset,U1=U1,Mp=Mp,family = family,weights=weights,deriv=deriv,
-#         control=control,gamma=gamma,scale=scale,
-#         printWarn=FALSE,mustart=mustart,
-#         scoreType=scoreType,eps=eps,null.coef=null.coef,...)
-#    }
+
     ## exclude dimensions from Newton step when the derviative is
     ## tiny relative to largest, as this space is likely to be poorly
     ## modelled on scale of Newton step...
@@ -1445,18 +1493,12 @@
          offset = offset,U1=U1,Mp=Mp,family = family,weights=weights,deriv=as.numeric(pdef)*2,
          control=control,gamma=gamma,scale=scale,printWarn=FALSE,start=start,
          mustart=mustart,scoreType=scoreType,null.coef=null.coef,
-         pearson.extra=pearson.extra,dev.extra=dev.extra,n.true=n.true,Sl=Sl,...)    
+         pearson.extra=pearson.extra,dev.extra=dev.extra,n.true=n.true,Sl=Sl,nei=nei,...)    
 
     ## get the change predicted for this step according to the quadratic model
     pred.change <- sum(grad*Nstep) + 0.5*t(Nstep) %*% hess %*% Nstep
-    
-    if (reml) {
-      score1 <- b$REML
-    } else if (scoreType=="GACV") {
-      score1 <- b$GACV
-    } else if (scoreType=="UBRE") {
-      score1 <- b$UBRE
-    } else score1 <- b$GCV
+    score1 <- b[[sname]]
+ 
     ## accept if improvement, else step halve
     ii <- 0 ## step halving counter
     score.change <- score1 - score
@@ -1467,13 +1509,8 @@
       etastart <- b$linear.predictors
       start <- b$coefficients
       lsp <- lsp1
-      if (reml) {
-          score <- b$REML;grad <- b$REML1;hess <- b$REML2 
-      } else if (scoreType=="GACV") {
-          score <- b$GACV;grad <- b$GACV1;hess <- b$GACV2
-      } else if (scoreType=="UBRE") {
-          score <- b$UBRE;grad <- b$UBRE1;hess <- b$UBRE2 
-      } else { score <- b$GCV;grad <- b$GCV1;hess <- b$GCV2} 
+      score <- b[[sname]]; grad <- b[[sname1]]; hess <- b[[sname2]] 
+    
       grad <- t(L)%*%grad
       hess <- t(L)%*%hess%*%L
       
@@ -1501,15 +1538,10 @@
 	             family = family,weights=weights,deriv=0,control=control,gamma=gamma,
 		     scale=scale,printWarn=FALSE,start=start,mustart=mustart,scoreType=scoreType,
 		     null.coef=null.coef,pearson.extra=pearson.extra,dev.extra=dev.extra,
-		     n.true=n.true,Sl=Sl,...)
+		     n.true=n.true,Sl=Sl,nei=nei,...)
         pred.change <- sum(grad*step) + 0.5*t(step) %*% hess %*% step ## Taylor prediction of change 
-        if (reml) {       
-          score1 <- b1$REML
-        } else if (scoreType=="GACV") {
-          score1 <- b1$GACV
-        } else if (scoreType=="UBRE") {
-          score1 <- b1$UBRE
-        } else score1 <- b1$GCV
+        score1 <- b1[[sname]]
+
 	score.change <- score1 - score
 	## don't allow step to fail altogether just because of qerror
 	qerror <- if (ii>min(4,maxHalf/2)) qerror.thresh/2 else
@@ -1520,19 +1552,13 @@
                  offset = offset,U1=U1,Mp=Mp,family = family,weights=weights,deriv=2,
                  control=control,gamma=gamma,scale=scale,printWarn=FALSE,start=start,
                  mustart=mustart,scoreType=scoreType,null.coef=null.coef,
-                 pearson.extra=pearson.extra,dev.extra=dev.extra,n.true=n.true,Sl=Sl,...)
+                 pearson.extra=pearson.extra,dev.extra=dev.extra,n.true=n.true,Sl=Sl,nei=nei,...)
             mustart <- b$fitted.values 
             etastart <- b$linear.predictors
             start <- b$coefficients
             old.score <- score;lsp <- lsp1
-         
-            if (reml) {
-              score <- b$REML;grad <- b$REML1;hess <- b$REML2 
-            } else if (scoreType=="GACV") {
-              score <- b$GACV;grad <- b$GACV1;hess <- b$GACV2
-            } else if (scoreType=="UBRE") {
-              score <- b$UBRE;grad <- b$UBRE1;hess <- b$UBRE2 
-            } else { score <- b$GCV;grad <- b$GCV1;hess <- b$GCV2}
+            score <- b[[sname]];grad <- b[[sname1]];hess <- b[[sname2]] 
+
             grad <- t(L)%*%grad
             hess <- t(L)%*%hess%*%L
             if (!is.null(lsp.max)) { ## need to transform to delta space
@@ -1573,15 +1599,10 @@
               control=control,gamma=gamma,scale=scale,
               printWarn=FALSE,start=start,mustart=mustart,scoreType=scoreType,
               null.coef=null.coef,pearson.extra=pearson.extra,
-              dev.extra=dev.extra,n.true=n.true,Sl=Sl,...)
+              dev.extra=dev.extra,n.true=n.true,Sl=Sl,nei=nei,...)
 	pred.change <- sum(grad*step) + 0.5*t(step) %*% hess %*% step ## Taylor prediction of change 
-        if (reml) {       
-          score3 <- b1$REML
-        } else if (scoreType=="GACV") {
-          score3 <- b1$GACV
-        } else if (scoreType=="UBRE") {
-          score3 <- b1$UBRE
-        } else score3 <- b1$GCV
+        score3 <- b1[[sname]]
+      
 	score.change <- score3 - score
         qerror <- abs(pred.change-score.change)/(max(abs(pred.change),abs(score.change))+score.scale*conv.tol) ## quadratic approx error 
         if (!is.finite(score2)||(is.finite(score3)&&score3<=score2&&qerror<qerror.thresh)) { ## record step - best SD step so far
@@ -1607,19 +1628,13 @@
                  offset = offset,U1=U1,Mp=Mp,family = family,weights=weights,deriv=2,
                  control=control,gamma=gamma,scale=scale,printWarn=FALSE,start=start,
                  mustart=mustart,scoreType=scoreType,null.coef=null.coef,
-                 pearson.extra=pearson.extra,dev.extra=dev.extra,n.true=n.true,Sl=Sl,...)
+                 pearson.extra=pearson.extra,dev.extra=dev.extra,n.true=n.true,Sl=Sl,nei=nei,...)
       mustart <- b$fitted.values 
       etastart <- b$linear.predictors
       start <- b$coefficients
       old.score <- score;lsp <- lsp1
-         
-      if (reml) {
-         score <- b$REML;grad <- b$REML1;hess <- b$REML2 
-      } else if (scoreType=="GACV") {
-         score <- b$GACV;grad <- b$GACV1;hess <- b$GACV2
-      } else if (scoreType=="UBRE") {
-         score <- b$UBRE;grad <- b$UBRE1;hess <- b$UBRE2 
-      } else { score <- b$GCV;grad <- b$GCV1;hess <- b$GCV2}
+      score <- b[[sname]]; grad <- b[[sname1]]; hess <- b[[sname2]]  
+
       grad <- t(L)%*%grad
       hess <- t(L)%*%hess%*%L
       if (!is.null(lsp.max)) { ## need to transform to delta space
@@ -1662,7 +1677,7 @@
     ## for those smoothing parameters that appear to be at working infinity
     ## reduce them until there is a detectable increase in RE/ML...
     flat <- which(abs(grad2) < abs(grad)*100) ## candidates for reduction
-    REML <- b$REML
+    REML <- b[[sname]]
     alpha <- if (is.logical(edge.correct)) .02 else abs(edge.correct) ## target RE/ML change per sp
     b1 <- b; lsp1 <- lsp
     if (length(flat)) {
@@ -1675,7 +1690,7 @@
               offset = offset,U1=U1,Mp=Mp,family = family,weights=weights,deriv=0,
               control=control,gamma=gamma,scale=scale,printWarn=FALSE,start=start,
               mustart=mustart,scoreType=scoreType,null.coef=null.coef,
-              pearson.extra=pearson.extra,dev.extra=dev.extra,n.true=n.true,Sl=Sl,...)
+              pearson.extra=pearson.extra,dev.extra=dev.extra,n.true=n.true,Sl=Sl,nei=nei,...)
         }
       }
     } ## if length(flat) 
@@ -1683,9 +1698,9 @@
                  offset = offset,U1=U1,Mp=Mp,family = family,weights=weights,deriv=2,
                  control=control,gamma=gamma,scale=scale,printWarn=FALSE,start=start,
                  mustart=mustart,scoreType=scoreType,null.coef=null.coef,
-                 pearson.extra=pearson.extra,dev.extra=dev.extra,n.true=n.true,Sl=Sl,...)
+                 pearson.extra=pearson.extra,dev.extra=dev.extra,n.true=n.true,Sl=Sl,nei=nei,...)
   
-    score1 <- b1$REML;grad1 <- b1$REML1;hess1 <- b1$REML2 
+    score1 <- b1[[sname]];grad1 <- b1[[sname1]];hess1 <- b1[[sname2]] 
            
     grad1 <- t(L)%*%grad1
     hess1 <- t(L)%*%hess1%*%L
@@ -1703,8 +1718,9 @@
     attr(hess,"lsp1") <- lsp1
     attr(hess,"rp") <- b1$rp
   } ## if edge.correct
-  ## report any warnings from inner loop of outer not converged...
-  if (!is.null(b$warn)&&length(b$warn)>0&&ct!="full convergence") for (i in 1:length(b$warn)) warning(b$warn[[i]])
+  ## report any warnings from inner loop if outer not converged...
+  #if (!is.null(b$warn)&&length(b$warn)>0&&ct!="full convergence") for (i in 1:length(b$warn)) warning(b$warn[[i]])
+  if (!is.null(b$warn)&&length(b$warn)>0) for (i in 1:length(b$warn)) warning(b$warn[[i]])
   list(score=score,lsp=lsp,lsp.full=L%*%lsp+lsp0,grad=grad,hess=hess,iter=i,
        conv =ct,score.hist = score.hist[!is.na(score.hist)],object=b)
 } ## newton
@@ -1714,7 +1730,7 @@
                    control,gamma,scale,conv.tol=1e-6,maxNstep=3,maxSstep=2,
                    maxHalf=30,printWarn=FALSE,scoreType="GCV",start=NULL,
                    mustart = NULL,null.coef=rep(0,ncol(X)),pearson.extra=0,
-                   dev.extra=0,n.true=-1,Sl=NULL,...)
+                   dev.extra=0,n.true=-1,Sl=NULL,nei=NULL,...)
 
 ## BFGS optimizer to estimate smoothing parameters of models fitted by
 ## gam.fit3....
@@ -1747,20 +1763,13 @@
            offset = offset,U1=U1,Mp=Mp,family = family,weights=weights,deriv=0,
            control=control,gamma=gamma,scale=scale,printWarn=FALSE,start=lo$start,
            mustart=lo$mustart,scoreType=scoreType,null.coef=null.coef,
-           pearson.extra=pearson.extra,dev.extra=dev.extra,n.true=n.true,Sl=Sl,...)
+           pearson.extra=pearson.extra,dev.extra=dev.extra,n.true=n.true,Sl=Sl,nei=nei,...)
 
       trial$mustart <- fitted(b)
       trial$scale.est <- b$scale.est ## previously dev, but this differs from newton
       trial$start <- coef(b)
-      if (reml) {
-        trial$score <- b$REML; 
-      } else if (scoreType=="GACV") {
-        trial$score <- b$GACV; 
-      } else if (scoreType=="UBRE"){
-        trial$score <- b$UBRE; 
-      } else { ## default to deviance based GCV
-        trial$score <- b$GCV;
-      } 
+      trial$score <- b[[sname]]
+      
       rm(b)  
       if (trial$score>initial$score+trial$alpha*c1*initial$dscore||trial$score>=lo$score) {
         hi <- trial ## failed Wolfe 1 - insufficient decrease - step too long
@@ -1771,17 +1780,10 @@
            control=control,gamma=gamma,scale=scale,printWarn=FALSE,
            start=trial$start,mustart=trial$mustart,
            scoreType=scoreType,null.coef=null.coef,pearson.extra=pearson.extra,
-           dev.extra=dev.extra,n.true=n.true,Sl=Sl,...)
+           dev.extra=dev.extra,n.true=n.true,Sl=Sl,nei=nei,...)
 
-        if (reml) {
-          trial$grad <- t(L)%*%b$REML1;
-        } else if (scoreType=="GACV") {
-          trial$grad <- t(L)%*%b$GACV1; 
-        } else if (scoreType=="UBRE"){
-          trial$grad <- t(L)%*%b$UBRE1  
-        } else { ## default to deviance based GCV
-          trial$grad <- t(L)%*%b$GCV1;
-        }  
+        trial$grad <- t(L)%*%b[[sname1]];
+      
         trial$dVkk <- diag(t(L0) %*% b$dVkk %*% L0)
         trial$scale.est <- b$scale.est;rm(b);
         trial$dscore <- sum(step*trial$grad) ## directional derivative
@@ -1801,6 +1803,9 @@
 
   reml <- scoreType%in%c("REML","P-REML","ML","P-ML") ## REML/ML indicator
 
+  sname <- if (reml) "REML" else scoreType ## name of score 
+  sname1 <- paste(sname,"1",sep="")        ## names of its derivative
+
   ## sanity check L
   if (is.null(L)) L <- diag(length(lsp)) else {
     if (!inherits(L,"matrix")) stop("L must be a matrix.")
@@ -1818,18 +1823,11 @@
                control=control,gamma=gamma,scale=scale,printWarn=FALSE,
                start=start,mustart=mustart,
                scoreType=scoreType,null.coef=null.coef,
-               pearson.extra=pearson.extra,dev.extra=dev.extra,n.true=n.true,Sl=Sl,...)
+               pearson.extra=pearson.extra,dev.extra=dev.extra,n.true=n.true,Sl=Sl,nei=nei,...)
 
   initial <- list(alpha = 0,mustart=b$fitted.values,start=coef(b))
-  if (reml) {
-     score <- b$REML;grad <- t(L)%*%b$REML1;
-  } else if (scoreType=="GACV") {
-     score <- b$GACV;grad <- t(L)%*%b$GACV1; 
-  } else if (scoreType=="UBRE"){
-     score <- b$UBRE;grad <- t(L)%*%b$UBRE1  
-  } else { ## default to deviance based GCV
-     score <- b$GCV;grad <- t(L)%*%b$GCV1;
-  }
+  score <- b[[sname]];grad <- t(L)%*%b[[sname1]];
+
   ## dVkk only refers to smoothing parameters, but sp may contain
   ## extra parameters at start and scale parameter at end. Have
   ## to reduce L accordingly... 
@@ -1848,6 +1846,10 @@
   initial$dVkk <- diag(t(L0) %*% b$dVkk %*% L0)
   initial$score <- score;initial$grad <- grad;
   initial$scale.est <- b$scale.est
+
+  if (reml) score.scale <- 1 + abs(initial$score) 
+            else score.scale <- abs(initial$scale.est) + abs(initial$score)   
+  
   start0 <- coef(b)
   mustart0 <- fitted(b)
   rm(b)
@@ -1861,16 +1863,9 @@
                control=control,gamma=gamma,scale=scale,printWarn=FALSE,
                start=start0,mustart=mustart0,
                scoreType=scoreType,null.coef=null.coef,
-               pearson.extra=pearson.extra,dev.extra=dev.extra,n.true=n.true,Sl=Sl,...) 
-     if (reml) {
-       grad1 <- t(L)%*%b$REML1;
-     } else if (scoreType=="GACV") {
-       grad1 <- t(L)%*%b$GACV1; 
-     } else if (scoreType=="UBRE"){
-       grad1 <- t(L)%*%b$UBRE1  
-     } else { ## default to deviance based GCV
-       grad1 <- t(L)%*%b$GCV1;
-     } 
+               pearson.extra=pearson.extra,dev.extra=dev.extra,n.true=n.true,Sl=Sl,nei=nei,...) 
+     grad1 <- t(L)%*%b[[sname1]];
+
      B[i,] <- (grad1-grad)/feps 
      rm(b)
   } ## end of FD Hessian loop
@@ -1934,7 +1929,7 @@
                     offset = offset,U1=U1,Mp=Mp,family = family,weights=weights,deriv=deriv,
                     control=control,gamma=gamma,scale=scale,printWarn=FALSE,start=prev$start,
                     mustart=prev$mustart,scoreType=scoreType,null.coef=null.coef,
-                    pearson.extra=pearson.extra,dev.extra=dev.extra,n.true=n.true,Sl=Sl,...)
+                    pearson.extra=pearson.extra,dev.extra=dev.extra,n.true=n.true,Sl=Sl,nei=nei,...)
      
      ### Derivative testing code. Not usually called and not part of BFGS...
      ok <- check.derivs
@@ -1945,48 +1940,38 @@
          offset = offset,U1=U1,Mp=Mp,family = family,weights=weights,deriv=1,
          control=control,gamma=gamma,scale=scale,
          printWarn=FALSE,mustart=prev$mustart,start=prev$start,
-         scoreType=scoreType,eps=eps,null.coef=null.coef,Sl=Sl,...)
+         scoreType=scoreType,eps=eps,null.coef=null.coef,Sl=Sl,nei=nei,...)
        ## deal with fact that deriv might be 0...	 
        bb <- if (deriv==1) b else gam.fit3(x=X, y=y, sp=L%*%lsp+lsp0,Eb=Eb,UrS=UrS,
                     offset = offset,U1=U1,Mp=Mp,family = family,weights=weights,deriv=1,
                     control=control,gamma=gamma,scale=scale,printWarn=FALSE,start=prev$start,
                     mustart=prev$mustart,scoreType=scoreType,null.coef=null.coef,
-                    pearson.extra=pearson.extra,dev.extra=dev.extra,n.true=n.true,Sl=Sl,...)
-       fdH <- bb$dH
+                    pearson.extra=pearson.extra,dev.extra=dev.extra,n.true=n.true,Sl=Sl,nei=nei,...)
+       #fdH <- bb$dH
        fdb.dr <- bb$db.drho*0
-       for (j in 1:length(lsp)) { ## check dH and db.drho
+       if (!is.null(bb$NCV)) {
+         deta.cv <-  attr(bb$NCV,"deta.cv")
+	 fd.eta <- deta.cv*0
+       }	 
+       for (j in 1:ncol(fdb.dr)) { ## check dH and db.drho
          lsp1 <- lsp;lsp1[j] <- lsp[j] + eps
          ba <- gam.fit3(x=X, y=y, sp=L%*%lsp1+lsp0,Eb=Eb,UrS=UrS,
                     offset = offset,U1=U1,Mp=Mp,family = family,weights=weights,deriv=1,
                     control=control,gamma=gamma,scale=scale,printWarn=FALSE,start=prev$start,
                     mustart=prev$mustart,scoreType=scoreType,null.coef=null.coef,
-                    pearson.extra=pearson.extra,dev.extra=dev.extra,n.true=n.true,Sl=Sl,...)
-         fdH[[j]] <- (ba$H - bb$H)/eps
+                    pearson.extra=pearson.extra,dev.extra=dev.extra,n.true=n.true,Sl=Sl,nei=nei,...)
+        # fdH[[j]] <- (ba$H - bb$H)/eps
          fdb.dr[,j] <- (ba$coefficients - bb$coefficients)/eps
+	 if (!is.null(bb$NCV)) fd.eta[,j] <- as.numeric(attr(ba$NCV,"eta.cv")-attr(bb$NCV,"eta.cv"))/eps
        }
      } 
      ### end of derivative testing. BFGS code resumes...
-
-      if (reml) {
-        trial$score <- b$REML; 
-      } else if (scoreType=="GACV") {
-        trial$score <- b$GACV; 
-      } else if (scoreType=="UBRE"){
-        trial$score <- b$UBRE; 
-      } else { ## default to deviance based GCV
-        trial$score <- b$GCV;
-      }  
+      trial$score <- b[[sname]];
+  
 
       if (deriv>0) {
-        if (reml) {
-          trial$grad <- t(L)%*%b$REML1;
-        } else if (scoreType=="GACV") {
-          trial$grad <- t(L)%*%b$GACV1; 
-        } else if (scoreType=="UBRE"){
-          trial$grad <- t(L)%*%b$UBRE1  
-        } else { ## default to deviance based GCV
-          trial$grad <- t(L)%*%b$GCV1;
-        }  
+        trial$grad <- t(L)%*%b[[sname1]];
+       
         trial$dVkk <- diag(t(L0) %*% b$dVkk %*% L0) ## curvature testing matrix
         trial$dscore <- sum(trial$grad*step)
         deriv <- 0 
@@ -2009,16 +1994,9 @@
                       control=control,gamma=gamma,scale=scale,printWarn=FALSE,
                       start=trial$start,mustart=trial$mustart,
                       scoreType=scoreType,null.coef=null.coef,pearson.extra=pearson.extra,
-                      dev.extra=dev.extra,n.true=n.true,Sl=Sl,...)
-        if (reml) {
-          trial$grad <- t(L)%*%b$REML1;
-        } else if (scoreType=="GACV") {
-          trial$grad <- t(L)%*%b$GACV1; 
-        } else if (scoreType=="UBRE"){
-          trial$grad <- t(L)%*%b$UBRE1  
-        } else { ## default to deviance based GCV
-          trial$grad <- t(L)%*%b$GCV1;
-        } 
+                      dev.extra=dev.extra,n.true=n.true,Sl=Sl,nei=nei,...)
+        trial$grad <- t(L)%*%b[[sname1]];
+       
         trial$dscore <- sum(trial$grad*step)
         trial$scale.est <- b$scale.est
         trial$dVkk <- diag(t(L0) %*% b$dVkk %*% L0) ## curvature testing matrix
@@ -2105,20 +2083,10 @@
                       control=control,gamma=gamma,scale=scale,printWarn=FALSE,
                       start=trial$start,mustart=trial$mustart,
                       scoreType=scoreType,null.coef=null.coef,pearson.extra=pearson.extra,
-                      dev.extra=dev.extra,n.true=n.true,Sl=Sl,...)
-          if (reml) {
-            trial$score <- b$REML
-            trial$grad <- t(L)%*%b$REML1;
-          } else if (scoreType=="GACV") {
-            trial$score <- b$GACV
-            trial$grad <- t(L)%*%b$GACV1; 
-          } else if (scoreType=="UBRE"){
-            trial$score <- b$UBRE
-            trial$grad <- t(L)%*%b$UBRE1  
-          } else { ## default to deviance based GCV
-            trial$score <- b$GCV
-            trial$grad <- t(L)%*%b$GCV1;
-          } 
+                      dev.extra=dev.extra,n.true=n.true,Sl=Sl,nei=nei,...)
+          trial$score <- b[[sname]]
+          trial$grad <- t(L)%*%b[[sname1]];
+
           trial$dscore <- sum(trial$grad*step)
           trial$scale.est <- b$scale.est
           trial$dVkk <- diag(t(L0) %*% b$dVkk %*% L0) ## curvature testing matrix 
@@ -2155,16 +2123,15 @@
                 control=control,gamma=gamma,scale=scale,printWarn=FALSE,
                 start=trial$start,mustart=trial$mustart,
                 scoreType=scoreType,null.coef=null.coef,pearson.extra=pearson.extra,
-                dev.extra=dev.extra,n.true=n.true,Sl=Sl,...)
-  if (reml) {
-     score <- b$REML;grad <- t(L)%*%b$REML1;
-  } else if (scoreType=="GACV") {
-     score <- b$GACV;grad <- t(L)%*%b$GACV1; 
-  } else if (scoreType=="UBRE"){
-     score <- b$UBRE;grad <- t(L)%*%b$UBRE1  
-  } else { ## default to deviance based GCV
-     score <- b$GCV;grad <- t(L)%*%b$GCV1;
-  } 
+                dev.extra=dev.extra,n.true=n.true,Sl=Sl,nei=nei,...)
+  score <- b[[sname]];grad <- t(L)%*%b[[sname1]];
+
+  if (!is.null(b$Vg)) {
+    M <- ncol(b$db.drho)
+    b$Vg <- (B%*%t(L)%*%b$Vg%*%L%*%B)[1:M,1:M] ## sandwich estimate of 
+    db.drho <- b$db.drho%*%L[1:M,,drop=FALSE]
+    b$Vc <- db.drho %*% b$Vg %*% t(db.drho) ## correction term for cov matrices
+  }
   b$dVkk <- NULL
   ## get approximate Hessian...
   ev <- eigen(B,symmetric=TRUE)
@@ -2172,7 +2139,8 @@
   ev$values[ind] <- 1/ev$values[ind]
   ev$values[!ind] <- 0
   B <- ev$vectors %*% (ev$values*t(ev$vectors))
-  if (!is.null(b$warn)&&length(b$warn)>0&&ct!="full convergence") for (j in 1:length(b$warn)) warning(b$warn[[j]])
+  #if (!is.null(b$warn)&&length(b$warn)>0&&ct!="full convergence") for (j in 1:length(b$warn)) warning(b$warn[[j]])
+  if (!is.null(b$warn)&&length(b$warn)>0) for (j in 1:length(b$warn)) warning(b$warn[[j]])
   list(score=score,lsp=lsp,lsp.full=L%*%lsp+lsp0,grad=grad,hess=B,iter=i,conv =ct,
        score.hist=score.hist[!is.na(score.hist)],object=b)
 } ## end of bfgs
@@ -2186,20 +2154,16 @@
 ## args is a list containing the arguments for gam.fit3
 ## For use as optim() objective gradient
 { reml <- args$scoreType%in%c("REML","P-REML","ML","P-ML") ## REML/ML indicator
+  sname <- if (reml) "REML" else args$scoreType
+  sname1 <- paste(sname,"1",sep=""); 
   if (!is.null(args$L)) {
     lsp <- args$L%*%lsp + args$lsp0
   }
   b<-gam.fit3(x=args$X, y=args$y, sp=lsp,Eb=args$Eb,UrS=args$UrS,
      offset = args$offset,U1=args$U1,Mp=args$Mp,family = args$family,weights=args$w,deriv=1,
      control=args$control,gamma=args$gamma,scale=args$scale,scoreType=args$scoreType,
-     null.coef=args$null.coef,n.true=args$n.true,Sl=args$Sl,...)
-  if (reml) {
-          ret <- b$REML1 
-  } else if (args$scoreType=="GACV") {
-          ret <- b$GACV1
-  } else if (args$scoreType=="UBRE") {
-          ret <- b$UBRE1
-  } else { ret <- b$GCV1}
+     null.coef=args$null.coef,n.true=args$n.true,Sl=args$Sl,nei=args$nei,...)
+  ret <- b[[sname1]]
   if (!is.null(args$L)) ret <- t(args$L)%*%ret
   ret
 } ## gam2derivative
@@ -2210,20 +2174,15 @@
 ## args is a list containing the arguments for gam.fit3
 ## For use as optim() objective
 { reml <- args$scoreType%in%c("REML","P-REML","ML","P-ML") ## REML/ML indicator
+  sname <- if (reml) "REML" else args$scoreType
   if (!is.null(args$L)) {
     lsp <- args$L%*%lsp + args$lsp0
   }
   b<-gam.fit3(x=args$X, y=args$y, sp=lsp,Eb=args$Eb,UrS=args$UrS,
      offset = args$offset,U1=args$U1,Mp=args$Mp,family = args$family,weights=args$w,deriv=0,
      control=args$control,gamma=args$gamma,scale=args$scale,scoreType=args$scoreType,
-     null.coef=args$null.coef,n.true=args$n.true,Sl=args$Sl,start=args$start,...)
-  if (reml) {
-          ret <- b$REML 
-  } else if (args$scoreType=="GACV") {
-          ret <- b$GACV
-  } else if (args$scoreType=="UBRE") {
-          ret <- b$UBRE
-  } else { ret <- b$GCV}
+     null.coef=args$null.coef,n.true=args$n.true,Sl=args$Sl,start=args$start,nei=args$nei,...)
+  ret <- b[[sname]]
   attr(ret,"full.fit") <- b
   ret
 } ## gam2objective
@@ -2236,21 +2195,17 @@
 ## args is a list containing the arguments for gam.fit3
 ## For use as nlm() objective
 { reml <- args$scoreType%in%c("REML","P-REML","ML","P-ML") ## REML/ML indicator
+  sname <- if (reml) "REML" else args$scoreType
+  sname1 <- paste(sname,"1",sep=""); 
   if (!is.null(args$L)) {
     lsp <- args$L%*%lsp + args$lsp0
   }
   b<-gam.fit3(x=args$X, y=args$y, sp=lsp, Eb=args$Eb,UrS=args$UrS,
      offset = args$offset,U1=args$U1,Mp=args$Mp,family = args$family,weights=args$w,deriv=1,
      control=args$control,gamma=args$gamma,scale=args$scale,scoreType=args$scoreType,
-     null.coef=args$null.coef,Sl=args$Sl,start=args$start,...)
-  
-  if (reml) {
-          ret <- b$REML;at <- b$REML1
-  } else if (args$scoreType=="GACV") {
-          ret <- b$GACV;at <- b$GACV1
-  } else if (args$scoreType=="UBRE") {
-          ret <- b$UBRE;at <- b$UBRE1
-  } else { ret <- b$GCV;at <- b$GCV1}  
+     null.coef=args$null.coef,Sl=args$Sl,start=args$start,nei=args$nei,...)
+  ret <- b[[sname]]
+  at <- b[[sname1]]
 
   attr(ret,"full.fit") <- b
 
diff -Nru mgcv-1.8-40/R/gam.fit4.r mgcv-1.8-41/R/gam.fit4.r
--- mgcv-1.8-40/R/gam.fit4.r	2022-02-28 07:59:08.000000000 +0000
+++ mgcv-1.8-41/R/gam.fit4.r	2022-10-20 19:52:47.000000000 +0000
@@ -1,7 +1,6 @@
-## (c) Simon N. Wood (2013-2015). Provided under GPL 2.
+## (c) Simon N. Wood (2013-2022). Provided under GPL 2.
 ## Routines for gam estimation beyond exponential family.
 
-
 dDeta <- function(y,mu,wt,theta,fam,deriv=0) {
 ## What is available directly from the family are derivatives of the 
 ## deviance and link w.r.t. mu. This routine converts these to the
@@ -34,8 +33,6 @@
     
      g2g <- fam$g2g(mu)
 
-##   ig12 <- ig1^2;ig13 <- ig12 * ig1
-
      d$Deta <- r$Dmu * ig1
      d$Deta2 <- r$Dmu2*ig12 - r$Dmu*g2g*ig1
      d$EDeta2 <- r$EDmu2*ig12
@@ -241,7 +238,7 @@
             weights = rep(1, nobs), start = NULL, etastart = NULL, 
             mustart = NULL, offset = rep(0, nobs),U1=diag(ncol(x)), Mp=-1, family = gaussian(), 
             control = gam.control(), deriv=2,gamma=1,
-            scale=1,scoreType="REML",null.coef=rep(0,ncol(x)),...) {
+            scale=1,scoreType="REML",null.coef=rep(0,ncol(x)),nei=NULL,...) {
 ## Routine for fitting GAMs beyond exponential family.
 ## Inputs as gam.fit3 except that family is of class "extended.family", while
 ## sp contains the vector of extended family parameters, followed by the log smoothing parameters,
@@ -525,7 +522,7 @@
        ## ok. Testing coefs unchanged is problematic under rank deficiency (not guaranteed to
        ## drop same parameter every iteration!)
        grad <- 2 * t(x[good,,drop=FALSE])%*%((w[good]*(x%*%start)[good]-wz[good]))+ 2*St%*%start 
-       if (max(abs(grad)) > control$epsilon*max(abs(start+coefold))/2) {
+       if (max(abs(grad)) > control$epsilon*(abs(pdev)+scale)) {
          old.pdev <- pdev  ## not converged quite enough
          coef <- coefold <- start
          etaold <- eta 
@@ -626,52 +623,159 @@
             rSncol=as.integer(rSncol),deriv=as.integer(deriv),
 	    fixed.penalty = as.integer(rp$fixed.penalty),nt=as.integer(control$nthreads),
             type=as.integer(gdi.type),dVkk=as.double(rep(0,nSp^2)))
+
    rV <- matrix(oo$rV,ncol(x),ncol(x)) ## rV%*%t(rV)*scale gives covariance matrix 
-   rV <- T %*% rV   
+   #rV <- rV # transform before return   
    ## derivatives of coefs w.r.t. sps etc...
-   db.drho <- if (deriv) T %*% matrix(oo$b1,ncol(x),ntot) else NULL 
+
+   ## note that db.drho and dw.drho start with derivs wrt theta, then wrt sp (no scale param of course) 
+
+   db.drho <- if (deriv) matrix(oo$b1,ncol(x),ntot) else NULL ## transform before return
+
    dw.drho <- if (deriv) matrix(oo$w1,length(z),ntot) else NULL
+
    Kmat <- matrix(0,nrow(x),ncol(x)) 
    Kmat[good,] <- oo$X                    ## rV%*%t(K)%*%(sqrt(wf)*X) = F; diag(F) is edf array 
 
-   D2 <- matrix(oo$D2,ntot,ntot); ldet2 <- matrix(oo$ldet2,ntot,ntot)
-   bSb2 <- matrix(oo$P2,ntot,ntot)
-   ## compute the REML score...
-   ls <- family$ls(y,weights,theta,scale)
-   nt <- length(theta)
-   lsth1 <- ls$lsth1[1:nt];
-   lsth2 <- as.matrix(ls$lsth2)[1:nt,1:nt] ## exclude any derivs w.r.t log scale here
-   REML <- ((dev+oo$P)/(2*scale) - ls$ls)/gamma + (oo$ldet - rp$det)/2 - 
-           as.numeric(scoreType=="REML") * Mp * (log(2*pi*scale)/2-log(gamma)/2)
-   REML1 <- REML2 <- NULL
-   if (deriv) {
-     det1 <- oo$ldet1
-     if (nSp) {
-       ind <- 1:nSp + length(theta)
-       det1[ind] <- det1[ind] - rp$det1
-     }
-     REML1 <- ((oo$D1+oo$P1)/(2*scale) - c(lsth1,rep(0,length(sp))))/gamma + (det1)/2
-     if (deriv>1) {
-       ls2 <- D2*0;ls2[1:nt,1:nt] <- lsth2 
-       if (nSp) ldet2[ind,ind] <- ldet2[ind,ind] - rp$det2
-       REML2 <- ((D2+bSb2)/(2*scale) - ls2)/gamma + ldet2/2
-     }
-   } 
+   Vg <- NCV <- NCV1 <- REML <- REML1 <- REML2 <- NULL
+   if (scoreType=="NCV") {
+     eta.cv <- rep(0.0,length(nei$i))
+     deta.cv <- if (deriv) matrix(0.0,length(nei$i),ntot) else matrix(0.0,1,ntot)
+     w1 <- -dd$Deta/2; w2 <- dd$Deta2/2; dth <- dd$Detath/2 ## !?
+     R <- try(chol(crossprod(x,w*x)+St),silent=TRUE)
+     if (nei$jackknife > 2) { ## return NCV coef changes for each fold 
+       if (deriv>0) stop("jackknife and derivatives requested together")
+       dth <- matrix(0,ncol(x),length(nei$m))
+       deriv1 <- -1
+     } else deriv1 <- deriv
+     if (inherits(R,"try-error")) { ## use CG approach...
+	Hi <- tcrossprod(rV) ## inverse of penalized Expected Hessian - inverse actual Hessian probably better
+        cg.iter <- .Call(C_ncv,x,Hi,w1,w2,db.drho,dw.drho,rS,nei$i-1,nei$mi,nei$m,nei$k-1,oo$beta,exp(sp),eta.cv, deta.cv, dth, deriv1);
+	warn[[length(warn)+1]] <- "NCV positive definite update check not possible"
+     } else { ## use Cholesky update approach
+	pdef.fails <- .Call(C_Rncv,x,R,w1,w2,db.drho,dw.drho,rS,nei$i-1,nei$mi,nei$m,nei$k-1,oo$beta,exp(sp),eta.cv,
+	                    deta.cv, dth, deriv1,.Machine$double.eps,control$ncv.threads);
+	if (pdef.fails) warn[[length(warn)+1]] <- "some NCV updates not positive definite"
+     }   
+     mu.cv <- linkinv(eta.cv)
+     nt <- family$n.theta
+     if (deriv) keep <- if (length(theta)>nt) (length(theta)+1):ncol(db.drho) else 1:ncol(db.drho)
+     #dev0 <- sum(dev.resids(y[nei$i], mu[nei$i], weights[nei$i],theta))
+     dev0 <- dev.resids(y[nei$i], mu[nei$i], weights[nei$i],theta)
+     ls0 <- family$ls(y[nei$i],weights[nei$i],theta,scale)
+     if (family$qapprox) { ## quadratic approximation to NCV
+       #qdev <- dev0 + gamma*sum(dd$Deta[nei$i]*(eta.cv-eta[nei$i])) + 0.5*gamma*sum(dd$Deta2[nei$i]*(eta.cv-eta[nei$i])^2)
+       qdev <- dev0 + gamma*dd$Deta[nei$i]*(eta.cv-eta[nei$i]) + 0.5*gamma*dd$Deta2[nei$i]*(eta.cv-eta[nei$i])^2
+       NCV <- sum(qdev)/(2*scale) - ls0$ls
+       if (deriv) {
+         deta <- x %*% db.drho
+         #NCV1 <- (colSums(dd$Deta[nei$i]*((1-gamma)*deta[nei$i,,drop=FALSE]+gamma*deta.cv)) +
+	 #gamma*colSums(dd$Deta2[nei$i]*deta.cv*(eta.cv-eta[nei$i])) +
+	 #0.5*gamma*colSums(as.numeric(dd$Deta3[nei$i])*deta[nei$i,,drop=FALSE]*(eta.cv-eta[nei$i])^2))/(2*scale)
+	 ncv1 <- (dd$Deta[nei$i]*((1-gamma)*deta[nei$i,,drop=FALSE]+gamma*deta.cv) +
+	          gamma*dd$Deta2[nei$i]*deta.cv*(eta.cv-eta[nei$i]) +
+	          0.5*gamma*as.numeric(dd$Deta3[nei$i])*deta[nei$i,,drop=FALSE]*(eta.cv-eta[nei$i])^2)/(2*scale)
+	 if (nt>0) { ## deal with direct dependence on the theta parameters
+           #NCV1[1:nt] <- NCV1[1:nt]- ls0$lsth1[1:nt] +
+	   #   if (nt==1) (sum(dd$Dth[nei$i]) + gamma*sum(dd$Detath[nei$i]*(eta.cv-eta[nei$i])) + 0.5*gamma*sum(dd$Deta2th[nei$i]*(eta.cv-eta[nei$i])^2))/(2*scale)
+	   #   else (colSums(dd$Dth[nei$i,]) + gamma*colSums(dd$Detath[nei$i,]*(eta.cv-eta[nei$i])) + 0.5*gamma*colSums(dd$Deta2th[nei$i,]*(eta.cv-eta[nei$i])^2))/(2*scale)
+           ncv1[,1:nt] <- ncv1[,1:nt]- ls0$LSTH1[,1:nt] +
+	      if (nt==1) (dd$Dth[nei$i] + gamma*dd$Detath[nei$i]*(eta.cv-eta[nei$i]) + 0.5*gamma*dd$Deta2th[nei$i]*(eta.cv-eta[nei$i])^2)/(2*scale)
+	      else (dd$Dth[nei$i,] + gamma*dd$Detath[nei$i,]*(eta.cv-eta[nei$i]) + 0.5*gamma*dd$Deta2th[nei$i,]*(eta.cv-eta[nei$i])^2)/(2*scale)
+         }
+	 if (!scale.known) {
+           #NCV1 <- c(NCV1,-qdev/(2*scale) - ls0$lsth1[1+nt])
+	   ncv1 <- cbind(ncv1,-qdev/(2*scale) - ls0$LSTH1[,1+nt])
+         }
+       }
+     } else { ## exact NCV
+       #dev.cv <- sum(dev.resids(y, mu.cv, weights,theta))
+       dev.cv <- dev.resids(y, mu.cv, weights,theta)
+       NCV <- sum(dev.cv)/(2*scale) - ls0$ls
+       DEV <- sum(dev0)/(2*scale) - ls0$ls 
+       if (gamma!=1) NCV <- gamma*NCV - (gamma-1)*DEV
+       if (deriv) {
+         dd.cv <- dDeta(y[nei$i],mu.cv,weights[nei$i],theta,family,1) 
+         #NCV1 <- colSums(dd.cv$Deta*deta.cv)/(2*scale)
+	 ncv1 <- dd.cv$Deta*deta.cv/(2*scale)
+         #if (gamma!=1) DEV1 <- colSums((dd$Deta*(x%*%db.drho))[nei$i,,drop=FALSE])/(2*scale)
+	 if (gamma!=1) dev1 <- (dd$Deta*(x%*%db.drho))[nei$i,,drop=FALSE]/(2*scale)
+         if (nt>0) {
+           #NCV1[1:nt] <- NCV1[1:nt] + colSums(as.matrix(dd.cv$Dth/(2*scale))) - ls0$lsth1[1:nt]
+	   #if (gamma!=1) DEV1[1:nt] <- DEV1[1:nt] + colSums(as.matrix(dd$Dth/(2*scale))[nei$i,,drop=FALSE]) - ls0$lsth1[1:nt]
+	   ncv1[,1:nt] <- ncv1[,1:nt] + as.matrix(dd.cv$Dth/(2*scale)) - ls0$LSTH1[,1:nt]
+	   if (gamma!=1) dev1[,1:nt] <- dev1[,1:nt] + as.matrix(dd$Dth/(2*scale))[nei$i,,drop=FALSE] - ls0$LSTH1[,1:nt]
+         }
+         if (!scale.known) { ## deal with log scale parameter derivative
+           #NCV1 <- c(NCV1,-dev.cv/(2*scale) - ls0$lsth1[1+nt])
+	   #if (gamma!=1) DEV1 <- c(DEV1,-dev0/(2*scale) - ls0$lsth1[1+nt])
+	   ncv1 <- cbind(ncv1,-dev.cv/(2*scale) - ls0$LSTH1[,1+nt])
+	   if (gamma!=1) dev1 <- cbind(dev1,-dev0/(2*scale) - ls0$lSTH1[,1+nt])
+         }
+         #if (gamma!=1) NCV1 <- gamma*NCV1 - (gamma-1)*DEV1
+	 if (gamma!=1) ncv1 <- gamma*ncv1 - (gamma-1)*dev1
+       }
+     } ## exact NCV
+     
+     if (nei$jackknife>2) { 
+       nk <- c(nei$m[1],diff(nei$m)) ## dropped fold sizes
+       jkw <- sqrt((nobs-nk)/(nobs*nk)) ## jackknife weights
+       dth <-jkw*t(dth)%*%t(T)
+       Vj <- crossprod(dd) ## jackknife cov matrix for coefs (beta)
+       attr(Vj,"dd") <- dd
+       attr(NCV,"Vj") <- Vj
+     }  
+
+     attr(NCV,"eta.cv") <- eta.cv
+     if (deriv) {
+       attr(NCV,"deta.cv") <- deta.cv;
+       NCV1 <- colSums(ncv1)
+       NCV1 <- NCV1[keep] ## drop derivatives for any fixed theta parameters
+       Vg <- crossprod(ncv1[,keep,drop=FALSE]) ## empirical cov matrix of grad
+     }  
+   } else { ## RE/ML
+    
+     D2 <- matrix(oo$D2,ntot,ntot); ldet2 <- matrix(oo$ldet2,ntot,ntot)
+     bSb2 <- matrix(oo$P2,ntot,ntot)
+     ## compute the REML score...
+     ls <- family$ls(y,weights,theta,scale)
+     nt <- length(theta)
+     lsth1 <- ls$lsth1[1:nt];
+     lsth2 <- as.matrix(ls$lsth2)[1:nt,1:nt] ## exclude any derivs w.r.t log scale here
+     REML <- ((dev+oo$P)/(2*scale) - ls$ls)/gamma + (oo$ldet - rp$det)/2 - 
+             as.numeric(scoreType=="REML") * Mp * (log(2*pi*scale)/2-log(gamma)/2)
+     
+     if (deriv) {
+       det1 <- oo$ldet1
+       if (nSp) {
+         ind <- 1:nSp + length(theta)
+         det1[ind] <- det1[ind] - rp$det1
+       }
+       REML1 <- ((oo$D1+oo$P1)/(2*scale) - c(lsth1,rep(0,length(sp))))/gamma + (det1)/2
+       if (deriv>1) {
+         ls2 <- D2*0;ls2[1:nt,1:nt] <- lsth2 
+         if (nSp) ldet2[ind,ind] <- ldet2[ind,ind] - rp$det2
+         REML2 <- ((D2+bSb2)/(2*scale) - ls2)/gamma + ldet2/2
+       }
+     } 
 
-   if (!scale.known&&deriv) { ## need derivatives wrt log scale, too 
-      Dp <- dev + oo$P
-      dlr.dlphi <- (-Dp/(2 *scale) - ls$lsth1[nt+1])/gamma - as.numeric(scoreType=="REML") * Mp/2
-      d2lr.d2lphi <- (Dp/(2*scale) - ls$lsth2[nt+1,nt+1])/gamma 
-      d2lr.dspphi <- -(oo$D1+oo$P1)/(2*scale*gamma) 
-      d2lr.dspphi[1:nt] <- d2lr.dspphi[1:nt] - ls$lsth2[nt+1,1:nt]/gamma
-      REML1 <- c(REML1,dlr.dlphi)
-      if (deriv==2) {
+     if (!scale.known&&deriv) { ## need derivatives wrt log scale, too 
+        Dp <- dev + oo$P
+        dlr.dlphi <- (-Dp/(2 *scale) - ls$lsth1[nt+1])/gamma - as.numeric(scoreType=="REML") * Mp/2
+        d2lr.d2lphi <- (Dp/(2*scale) - ls$lsth2[nt+1,nt+1])/gamma 
+        d2lr.dspphi <- -(oo$D1+oo$P1)/(2*scale*gamma) 
+        d2lr.dspphi[1:nt] <- d2lr.dspphi[1:nt] - ls$lsth2[nt+1,1:nt]/gamma
+        REML1 <- c(REML1,dlr.dlphi)
+        if (deriv==2) {
               REML2 <- rbind(REML2,as.numeric(d2lr.dspphi))
               REML2 <- cbind(REML2,c(as.numeric(d2lr.dspphi),d2lr.d2lphi))
-      }
+        }
+     }
    }
-   
    nth <- length(theta)
+
+   if (deriv) db.drho <- T %*% db.drho
+
    if (deriv>0&&family$n.theta==0&&nth>0) { ## need to drop derivs for fixed theta
      REML1 <- REML1[-(1:nth)]
      if (deriv>1) REML2 <- REML2[-(1:nth),-(1:nth)]
@@ -708,12 +812,12 @@
         working.weights = ww, ## working weights
         df.null = nulldf, y = y, converged = conv,z=z,
         boundary = boundary,
-        REML=REML,REML1=REML1,REML2=REML2,
-        rV=rV,db.drho=db.drho,dw.drho=dw.drho,
+        REML=REML,REML1=REML1,REML2=REML2,NCV=NCV,NCV1=NCV1,
+        rV=T %*% rV,db.drho=db.drho,dw.drho=dw.drho,
         scale.est=scale,reml.scale=scale,
         aic=aic.model,
         rank=oo$rank.est,
-        K=Kmat,control=control,
+        K=Kmat,control=control,Vg=Vg,
         dVkk = matrix(oo$dVkk,nSp,nSp),ldetS1 = if (grderiv) rp$det1 else 0
         #,D1=oo$D1,D2=D2,
         #ldet=oo$ldet,ldet1=oo$ldet1,ldet2=ldet2,
@@ -844,8 +948,8 @@
 } ## efsudr
 
 
-gam.fit5 <- function(x,y,lsp,Sl,weights=NULL,offset=NULL,deriv=2,family,
-                     control=gam.control(),Mp=-1,start=NULL,gamma=1){
+gam.fit5 <- function(x,y,lsp,Sl,weights=NULL,offset=NULL,deriv=2,family,scoreType="REML",
+                     control=gam.control(),Mp=-1,start=NULL,gamma=1,nei=NULL){
 ## NOTE: offset handling - needs to be passed to ll code
 ## fit models by general penalized likelihood method, 
 ## given doubly extended family in family. lsp is log smoothing parameters
@@ -875,14 +979,11 @@
     ## the stability reparameterization + log|S|_+ and derivs... 
     rp <- ldetS(Sl,rho=lsp,fixed=rep(FALSE,length(lsp)),np=q,root=TRUE) 
     x <- Sl.repara(rp$rp,x) ## apply re-parameterization to x
-    #x <- Sl.repa(rp$rp,x,r=-1)
     Eb <- Sl.repara(rp$rp,Eb) ## root balanced penalty
-    #Eb <- Sl.repa(rp$rp,Eb,r=-1)
     St <- crossprod(rp$E) ## total penalty matrix
     E <- rp$E ## root total penalty
     attr(E,"use.unscaled") <- TRUE ## signal initialization code that E not to be further scaled   
     if (!is.null(start)) start  <- Sl.repara(rp$rp,start) ## re-para start
-    #if (!is.null(start)) start  <- Sl.repa(rp$rp,start,l=1) ## re-para start
     ## NOTE: it can be that other attributes need re-parameterization here
     ##       this should be done in 'family$initialize' - see mvn for an example. 
 
@@ -915,7 +1016,7 @@
   iconv <- max(abs(grad))<control$epsilon*abs(ll0)
   Hp <- -ll$lbb+St
   rank.checked <- FALSE ## not yet checked the intrinsic rank of problem 
-  rank <- q;drop <- NULL
+  rank <- q
   eigen.fix <- FALSE
   converged <- FALSE
   check.deriv <- FALSE; eps <- 1e-5 
@@ -935,6 +1036,7 @@
     } ## derivative checking end
     #grad <- ll$lb - St%*%coef 
     #Hp <- -ll$lbb+St
+    kappaH <- kappa(Hp)
     D <- diag(Hp)
     if (sum(!is.finite(D))>0) stop("non finite values in Hessian")
 
@@ -1004,7 +1106,8 @@
     khalf <- 0;fac <- 2
 
     ## with ll1 < ll0 in place of ll1 <= ll0 in next line than we can repeatedly accept
-    ## miniscule steps that do not actually improve anything. 
+    ## miniscule steps that do not actually improve anything.
+    llold <- ll ## avoid losing lbb slot on stp failure
     while ((!is.finite(ll1)||ll1 <= ll0) && khalf < 25) { ## step halve until it succeeds...
       step <- step/fac;coef1 <- coef + step
       ll <- llf(y,x,coef1,weights,family,offset=offset,deriv=0)
@@ -1050,14 +1153,13 @@
       Hp <- -ll$lbb+St
       ## convergence test...
       ok <- (iter==control$maxit || max(abs(grad)) < control$epsilon*abs(ll0))
-        # (abs(ll1-ll0) < control$epsilon*abs(ll0) && max(abs(grad)) < .Machine$double.eps^.5*abs(ll0))) 
       if (ok) { ## appears to have converged
         if (indefinite) { ## not a well defined maximum
           if (perturbed==5) stop("indefinite penalized likelihood in gam.fit5 ")
           if (iter<4||rank.checked) {
             perturbed <- perturbed + 1
-            coef <- coef*(1+(runif(length(coef))*.02-.01)*perturbed) + 
-                    (runif(length(coef)) - 0.5 ) * mean(abs(coef))*1e-5*perturbed 
+            coef <- coef*(1+(rep_len(c(0,1),length(coef))*.02-.01)*perturbed) + 
+                    (rep_len(c(0,1),length(coef)) - 0.5 ) * mean(abs(coef))*1e-5*perturbed 
             ll <- llf(y,x,coef,weights,family,offset=offset,deriv=1) 
             ll0 <- ll$l - (t(coef)%*%St%*%coef)/2
           } else {        
@@ -1096,25 +1198,35 @@
               attr(x,"drop") <- drop ## useful if family has precomputed something from x
               ll <- llf(y,x,coef,weights,family,offset=offset,deriv=1) 
               ll0 <- ll$l - (t(coef)%*%St%*%coef)/2
+	      grad <- ll$lb - St%*%coef
+              Hp <- -ll$lbb+St
             } 
           }
 
         } else { ## not indefinite really converged
           converged <- TRUE
-          #break - don't break until L and D made to match final Hp
+          # ... don't break until L and D made to match final Hp
         }
       } else ll0 <- ll1 ## step ok but not converged yet
     } else { ## step failed.
+      ll <- llold ## restore old ll with lbb slot
       if (is.null(drop)) bdrop <- rep(FALSE,q)
       if (iconv && iter==1) { ## OK to fail on first step if apparently converged to start with
         converged <- TRUE     ## Note: important to check if improvement possible even if apparently
 	coef <- start         ## converged, otherwise sp changes can lead to no sp objective change!
       } else {
         converged  <- FALSE
-	## NOTE: the threshold can be unrealistic if gradient can't be computed to this accuracy, e.g.
-	## because of very large smoothing parameters - could estimate grad accuracy from machine zero
-	## perturbation of grad calc, but perhaps too fussy. 
-        warn[[length(warn)+1]] <- paste("gam.fit5 step failed: max magnitude relative grad =",max(abs(grad/drop(ll0))))
+	## NOTE: the threshold can be unrealistic if gradient or step can't be computed to this accuracy, e.g.
+	## because of very large smoothing parameters - could estimate grad/step accuracy from machine zero
+	## perturbation of grad/step calc, but perhaps too fussy.
+	coefp <- coef*(1+rep_len(c(-1,1),length(coef))*.Machine$double.eps^.9)
+	llp <- llf(y,x,coef,weights,family,offset=offset,deriv=1)
+	gradp <- llp$lb - St%*%coefp
+	err <- min(1e-3,kappaH*max(1,mean(abs(gradp-grad))/mean(abs(coefp-coef)))*.Machine$double.eps)
+	## err is an estimate of the acheivable relative error, capped above at 1e-3 to ensure
+	## this level of stability loss gets reported!
+        if (max(abs(grad/drop(ll0)))>max(err,control$epsilon*2)) warn[[length(warn)+1]] <-
+	  paste("gam.fit5 step failed: max magnitude relative grad =",max(abs(grad/drop(ll0))))
       }
       break ## no need to recompute L and D, so break now
     }
@@ -1133,7 +1245,7 @@
   } else fcoef <- coef
 
   dVkk <- d1l <- d2l <- d1bSb <- d2bSb <- d1b <- d2b <- d1ldetH <- d2ldetH <- d1b <- d2b <- NULL
-
+  ncv <- scoreType=="NCV"
   if (deriv>0) {  ## Implicit differentiation for derivs...
 
     m <- nSp
@@ -1153,7 +1265,8 @@
     ## Now call the family again to get first derivative of Hessian w.r.t
     ## smoothing parameters, in list d1H...
 
-    ll <- llf(y,x,coef,weights,family,offset=offset,deriv=3,d1b=d1b)
+    ll <- if (ncv) llf(y,x,coef,weights,family,offset=offset,deriv=3,d1b=d1b,ncv=TRUE) else
+                   llf(y,x,coef,weights,family,offset=offset,deriv=3,d1b=d1b)
     # d1l <- colSums(ll$lb*d1b) # cancels
     
 
@@ -1186,69 +1299,104 @@
     } ## if (deriv > 1)
   } ## if (deriv > 0)
 
-  ## Compute the derivatives of log|H+S|... 
-  if (deriv > 0) {
-    d1ldetH <- rep(0,m)
-    d1Hp <- list()
-    for (i in 1:m) {
-      A <- -ll$d1H[[i]] + Sl.mult(rp$Sl,diag(q),i)[!bdrop,!bdrop]
-      d1Hp[[i]] <- D*(backsolve(L,forwardsolve(t(L),(D*A)[piv,]))[ipiv,,drop=FALSE])  
-      d1ldetH[i] <- sum(diag(d1Hp[[i]]))
+  if (scoreType=="NCV") {
+    REML <- REML1 <- REML2 <- NULL
+    if (deriv==0) ll <- llf(y,x,coef,weights,family,offset=offset,deriv=1,d1b=d1b,ncv=TRUE) ## otherwise l1, l2 not returned
+    ncv <- family$ncv ## helps debugging!
+    deriv1 <- if (deriv==0) 0 else 1
+    ## create nei if null
+    if (is.null(nei)||is.null(nei$k)||is.null(nei$m)) nei <- list(i=1:nobs,mi=1:nobs,m=1:nobs,k=1:nobs) ## LOOCV
+    if (is.null(nei$i)) if (length(nei$m)==nobs) nei$mi <- nei$i <- 1:nobs else stop("unclear which points NCV neighbourhoods belong to")
+    if (length(nei$mi)!=length(nei$m)) stop("for NCV number of dropped and predicted neighbourhoods must match")
+    ## complete dH
+    if (deriv>0) {
+      for (i in 1:length(ll$d1H)) ll$d1H[[i]] <- ll$d1H[[i]] - Sl.mult(rp$Sl,diag(q),i)[!bdrop,!bdrop] 
     }
-  } ## if (deriv > 0)
+  
+    R1 <- try(chol(t(Hp/D)/D),silent=TRUE)
+    ll$gamma <- gamma;
+    ## note: use of quadratic approx to NCV signalled by family$qapprox
+    #if (overlap||inherits(R1,"try-error"))
+    if (inherits(R1,"try-error")) {
+      ## get H (Hp?) and Hi
+      Hi <- t(D*chol2inv(L)[ipiv,ipiv])*D
+      ret <- ncv(x,y,weights,nei,coef,family,ll,H=t(Hp/D)/D,Hi=Hi,offset=offset,dH=ll$d1H,
+                 db=d1b,deriv=deriv1)
+      warn[[length(warn)+1]] <- "NCV update positive definite check not possible"
+    } else { ## cholesky version
+      ret <- ncv(x,y,weights,nei,coef,family,ll,R=R1,offset=offset,dH=ll$d1H,db=d1b,
+                 deriv=deriv1,nt=control$ncv.threads)
+      if (ret$error>0) warn[[length(warn)+1]] <- "some NCV updates not positive definite" 		 
+    }
+    NCV <- ret$NCV
+    NCV1 <- ret$NCV1
+  } else { ## REML required
+    NCV <- NCV1 <- NULL
+    ## Compute the derivatives of log|H+S|... 
+    if (deriv > 0) {
+      d1ldetH <- rep(0,m)
+      d1Hp <- list()
+      for (i in 1:m) {
+        A <- -ll$d1H[[i]] + Sl.mult(rp$Sl,diag(q),i)[!bdrop,!bdrop]
+        d1Hp[[i]] <- D*(backsolve(L,forwardsolve(t(L),(D*A)[piv,]))[ipiv,,drop=FALSE])  
+        d1ldetH[i] <- sum(diag(d1Hp[[i]]))
+      }
+    } ## if (deriv > 0)
 
-  if (deriv > 1) {
-    d2ldetH <- matrix(0,m,m)
-    k <- 0
-    for (i in 1:m) for (j in i:m) {
-      k <- k + 1
-      d2ldetH[i,j] <- -sum(d1Hp[[i]]*t(d1Hp[[j]])) - llr$trHid2H[k] 
-      if (i==j) { ## need to add term relating to smoothing penalty
-        A <- Sl.mult(rp$Sl,diag(q),i,full=TRUE)[!bdrop,!bdrop]
-        bind <- rowSums(abs(A))!=0 ## row/cols of non-zero block
-        A <- A[,bind,drop=FALSE] ## drop the zero columns  
-        A <- D*(backsolve(L,forwardsolve(t(L),(D*A)[piv,]))[ipiv,,drop=FALSE])
-        d2ldetH[i,j] <- d2ldetH[i,j] + sum(diag(A[bind,,drop=FALSE]))
-      } else d2ldetH[j,i] <- d2ldetH[i,j]
-    }
-  } ## if (deriv > 1)
-
-  ## Compute derivs of b'Sb...
-
-  if (deriv>0) {
-    Skb <- Sl.termMult(rp$Sl,fcoef,full=TRUE)
-    d1bSb <- rep(0,m)
-    for (i in 1:m) { 
-      Skb[[i]] <- Skb[[i]][!bdrop]
-      d1bSb[i] <- sum(coef*Skb[[i]])
+    if (deriv > 1) {
+      d2ldetH <- matrix(0,m,m)
+      k <- 0
+      for (i in 1:m) for (j in i:m) {
+        k <- k + 1
+        d2ldetH[i,j] <- -sum(d1Hp[[i]]*t(d1Hp[[j]])) - llr$trHid2H[k] 
+        if (i==j) { ## need to add term relating to smoothing penalty
+          A <- Sl.mult(rp$Sl,diag(q),i,full=TRUE)[!bdrop,!bdrop]
+          bind <- rowSums(abs(A))!=0 ## row/cols of non-zero block
+          A <- A[,bind,drop=FALSE] ## drop the zero columns  
+          A <- D*(backsolve(L,forwardsolve(t(L),(D*A)[piv,]))[ipiv,,drop=FALSE])
+          d2ldetH[i,j] <- d2ldetH[i,j] + sum(diag(A[bind,,drop=FALSE]))
+        } else d2ldetH[j,i] <- d2ldetH[i,j]
+      }
+    } ## if (deriv > 1)
+
+    ## Compute derivs of b'Sb...
+
+    if (deriv>0) {
+      Skb <- Sl.termMult(rp$Sl,fcoef,full=TRUE)
+      d1bSb <- rep(0,m)
+      for (i in 1:m) { 
+        Skb[[i]] <- Skb[[i]][!bdrop]
+        d1bSb[i] <- sum(coef*Skb[[i]])
+      }
     }
-  }
  
-  if (deriv>1) {
-    d2bSb <- matrix(0,m,m)
-    for (i in 1:m) {
-      Sd1b <- St%*%d1b[,i] 
-      for (j in i:m) {
-         d2bSb[j,i] <- d2bSb[i,j] <- 2*sum( 
-         d1b[,i]*Skb[[j]] + d1b[,j]*Skb[[i]] + d1b[,j]*Sd1b)
+    if (deriv>1) {
+      d2bSb <- matrix(0,m,m)
+      for (i in 1:m) {
+        Sd1b <- St%*%d1b[,i] 
+        for (j in i:m) {
+          d2bSb[j,i] <- d2bSb[i,j] <- 2*sum( 
+          d1b[,i]*Skb[[j]] + d1b[,j]*Skb[[i]] + d1b[,j]*Sd1b)
+        }
+        d2bSb[i,i] <-  d2bSb[i,i] + sum(coef*Skb[[i]]) 
       }
-      d2bSb[i,i] <-  d2bSb[i,i] + sum(coef*Skb[[i]]) 
     }
-  }
 
-  ## get grad and Hessian of REML score...
-  REML <- -as.numeric((ll$l - drop(t(coef)%*%St%*%coef)/2)/gamma + rp$ldetS/2  - ldetHp/2  +
-           Mp*(log(2*pi)/2)-log(gamma)/2)
+    ## get grad and Hessian of REML score...
+    REML <- -as.numeric((ll$l - drop(t(coef)%*%St%*%coef)/2)/gamma + rp$ldetS/2  - ldetHp/2  +
+             Mp*(log(2*pi)/2)-log(gamma)/2)
  
-  REML1 <- if (deriv<1) NULL else -as.numeric( # d1l # cancels
+    REML1 <- if (deriv<1) NULL else -as.numeric( # d1l # cancels
                                    - d1bSb/(2*gamma) + rp$ldet1/2  - d1ldetH/2 ) 
-
+    REML2 <- if (deriv<2) NULL else -( (d2l - d2bSb/2)/gamma + rp$ldet2/2  - d2ldetH/2 ) 
+  } ## REML computations
+  
   if (control$trace) {
     cat("\niter =",iter,"  ll =",ll$l,"  REML =",REML,"  bSb =",t(coef)%*%St%*%coef/2,"\n")
     cat("log|S| =",rp$ldetS,"  log|H+S| =",ldetHp,"  n.drop =",length(drop),"\n")
     if (!is.null(REML1)) cat("REML1 =",REML1,"\n")
   }
-  REML2 <- if (deriv<2) NULL else -( (d2l - d2bSb/2)/gamma + rp$ldet2/2  - d2ldetH/2 ) 
+  
 
   ## Get possibly multiple linear predictors
   lpi <- attr(x,"lpi")
@@ -1280,7 +1428,7 @@
   ret <- list(coefficients=coef,family=family,y=y,prior.weights=weights,
        fitted.values=fitted.values, linear.predictors=linear.predictors,
        scale.est=1, ### NOTE: needed by newton, but what is sensible here? 
-       REML= REML,REML1= REML1,REML2=REML2,
+       REML= REML,REML1= REML1,REML2=REML2,NCV=NCV,NCV1=NCV1,
        rank=rank,aic = -2*ll$l, ## 2*edf needs to be added
        ##deviance = -2*ll$l,
        l= ll$l,## l1 =d1l,l2 =d2l,
@@ -1396,7 +1544,7 @@
   fit
 } ## efsud
 
-gam.fit5.post.proc <- function(object,Sl,L,lsp0,S,off) {
+gam.fit5.post.proc <- function(object,Sl,L,lsp0,S,off,gamma) {
 ## object is object returned by gam.fit5, Sl is penalty object, L maps working sp
 ## vector to full sp vector 
 ## Computes:
@@ -1417,6 +1565,10 @@
   ipiv[piv] <- 1:p
   ##  Vb0 <- crossprod(forwardsolve(t(object$L),diag(object$D,nrow=p)[piv,])[ipiv,])
 
+  ## Bayes cov matrix with learning rate 1/gamma. Wrong - parameterization s.t. Vp*gamma is it
+  #Vl <- if (gamma==1) NULL else chol2inv(chol(-object$lbb/gamma+object$St))
+  
+
   ## need to pre-condition lbb before testing rank...
   lbb <- object$D*t(object$D*lbb)
  
@@ -1547,13 +1699,13 @@
             weights = rep(1, length(y)), start = NULL,
             offset = rep(0, length(y)),Mp,family = gaussian(), 
             control = gam.control(),deriv=2,eps=1e-7,spe=1e-3,
-            Sl,gamma=1,...)
+            Sl,gamma=1,nei=nei,...)
 ## FD checking of derivatives for gam.fit5: a debugging routine
 {  if (!deriv%in%c(1,2)) stop("deriv should be 1 or 2")
    if (control$epsilon>1e-9) control$epsilon <- 1e-9 
    ## first obtain the fit corresponding to sp...
    b <- gam.fit5(x=x,y=y,lsp=sp,Sl=Sl,weights=weights,offset=offset,deriv=deriv,
-        family=family,control=control,Mp=Mp,start=start,gamma=gamma)
+        family=family,control=control,Mp=Mp,start=start,gamma=gamma,nei=nei)
    ## now get the derivatives of the likelihood w.r.t. coefs...
    ll <- family$ll(y=y,X=x,coef=b$coefficients,wt=weights,family=family,
                    deriv=1,d1b=0,d2b=0,Hp=NULL,rank=0,fh=NULL,D=NULL)
@@ -1585,9 +1737,9 @@
    for (i in 1:M) { ## the smoothing parameter loop
      sp0 <- sp1 <- sp;sp1[i] <- sp[i] + spe/2;sp0[i] <- sp[i] - spe/2
      b0 <- gam.fit5(x=x,y=y,lsp=sp0,Sl=Sl,weights=weights,offset=offset,deriv=1,
-          family=family,control=control,Mp=Mp,start=start,gamma=gamma)
+          family=family,control=control,Mp=Mp,start=start,gamma=gamma,nei=nei)
      b1 <- gam.fit5(x=x,y=y,lsp=sp1,Sl=Sl,weights=weights,offset=offset,deriv=1,
-          family=family,control=control,Mp=Mp,start=start,gamma=gamma)
+          family=family,control=control,Mp=Mp,start=start,gamma=gamma,nei=nei)
      fd.br[,i] <- (b1$coefficients - b0$coefficients)/spe
      if (!is.null(b$b2)) {
        for (j in i:M) {
diff -Nru mgcv-1.8-40/R/gamlss.r mgcv-1.8-41/R/gamlss.r
--- mgcv-1.8-40/R/gamlss.r	2022-02-01 10:18:29.000000000 +0000
+++ mgcv-1.8-41/R/gamlss.r	2022-10-21 11:48:31.000000000 +0000
@@ -17,7 +17,7 @@
 ## given l >= k >= j >= i structure then the block for index
 ## i,j,k,l starts at i4[i,j,k,l]*n+1, given symmetry over the indices. 
 
-trind.generator <- function(K=2) {
+trind.generator <- function(K=2,ifunc=FALSE,reverse=!ifunc) {
 ## Generates index arrays for 'upper triangular' storage up to order 4
 ## Suppose you fill an array using code like...
 ## m = 1
@@ -29,59 +29,180 @@
 ## but for access we want no restriction on the indices.
 ## i4[i,j,k,l] produces the appropriate m for unrestricted 
 ## indices. i3 and i2 do the same for 3d and 2d arrays.
+## if ifunc==TRUE then rather than index arrays, index functions
+## are returned, so e.g.i4(i,j,k,l) is equivalent to above.
+## Index functions require less storage for high K.
 ## ixr will extract the unique elements from an x dimensional
 ## upper triangular array in the correct order.
-  i4 <- array(0,dim=c(K,K,K,K))
   m.start <- 1
-  m <- m.start
-  for (i in 1:K) for (j in i:K) for (k in j:K) for (l in k:K) {
-    i4[i,j,k,l] <- i4[i,j,l,k] <- i4[i,k,l,j] <- i4[i,k,j,l] <- i4[i,l,j,k] <- 
-    i4[i,l,k,j] <- 
-    i4[j,i,k,l] <- i4[j,i,l,k] <- i4[j,k,l,i] <- i4[j,k,i,l] <- i4[j,l,i,k] <- 
-    i4[j,l,k,i] <- 
-    i4[k,j,i,l] <- i4[k,j,l,i] <- i4[k,i,l,j] <- i4[k,i,j,l] <- i4[k,l,j,i] <- 
-    i4[k,l,i,j] <- 
-    i4[l,j,k,i] <- i4[l,j,i,k] <- i4[l,k,i,j] <- i4[l,k,j,i] <- i4[l,i,j,k] <- 
-    i4[l,i,k,j] <- m
-    m <- m + 1
-  }
-
-  i3 <- array(0,dim=c(K,K,K))
-  m <- m.start
-  for (j in 1:K) for (k in j:K) for (l in k:K) {
-    i3[j,k,l] <- i3[j,l,k] <- i3[k,l,j] <- i3[k,j,l] <- i3[l,j,k] <- 
-    i3[l,k,j] <- m
-    m <- m + 1
-  }
-
-  i2 <- array(0,dim=c(K,K))
-  m <- m.start
-  for (k in 1:K) for (l in k:K) {
-    i2[k,l] <- i2[l,k] <- m
-    m <- m + 1
-  }
+  if (ifunc) { ## return index functions
+   eval(parse(text= paste("i2 <- function(i,j) {\n",
+        "  if (i>j) {ii <- i;i <- j;j <- ii}\n",
+        "  (i-1)*(",2*K+2,"-i)/2 +j-i+1\n}",sep="")))
+
+   eval(parse(text=paste("i3 <- function(i,j,k) {\n",
+        "  if (i>j||j>k) { \n    ii <- sort(c(i,j,k))\n",
+	"    i <- ii[1];j <- ii[2];k <- ii[3]\n  }\n",
+        "  (i-1)*(",3*K*(K+1),"+(i-2)*(i-",3*(K+1),"))/6 + (j-i)*(",2*K+3,"-i-j)/2+k-j+1 \n}",
+	sep="")))
+
+   eval(parse(text=paste("i4 <- function(i,j,k,l) {\n",
+        "  if (i>j||j>k||k>l) { \n    ii <- sort(c(i,j,k,l))\n",
+	"    i <- ii[1];j <- ii[2];k <- ii[3];l <- ii[4]\n  }\n",
+        "  i1 <- i-1;i2 <- i-2; i1i2 <- i1*i2/2\n",
+	"  l-k+1 + (k-j)*(",2*K+3,"-j-k)/2 +",
+	"  (j-i)*(3*(",K+1,"-i)^2+3*(",K+1,"-i) + (j-i-1)*(j+2*i-",3*K+5,"))/6 +\n",
+	"  (i1*",K^3+3*K^2+2*K,"+i1i2*(",K+1,"*(2*i-3) - ",3*K^2+6*K+2,"-i1i2))/6\n}",
+	sep="")))
+  } else { ## return index arrays
+    i4 <- array(0,dim=c(K,K,K,K))  
+    m <- m.start
+    for (i in 1:K) for (j in i:K) for (k in j:K) for (l in k:K) {
+      i4[i,j,k,l] <- i4[i,j,l,k] <- i4[i,k,l,j] <- i4[i,k,j,l] <- i4[i,l,j,k] <- 
+      i4[i,l,k,j] <- i4[j,i,k,l] <- i4[j,i,l,k] <- i4[j,k,l,i] <- i4[j,k,i,l] <-
+      i4[j,l,i,k] <- i4[j,l,k,i] <- i4[k,j,i,l] <- i4[k,j,l,i] <- i4[k,i,l,j] <-
+      i4[k,i,j,l] <- i4[k,l,j,i] <- i4[k,l,i,j] <- i4[l,j,k,i] <- i4[l,j,i,k] <-
+      i4[l,k,i,j] <- i4[l,k,j,i] <- i4[l,i,j,k] <- i4[l,i,k,j] <- m
+      m <- m + 1
+    }
+
+    i3 <- array(0,dim=c(K,K,K))
+    m <- m.start
+    for (j in 1:K) for (k in j:K) for (l in k:K) {
+      i3[j,k,l] <- i3[j,l,k] <- i3[k,l,j] <- i3[k,j,l] <- i3[l,j,k] <- 
+      i3[l,k,j] <- m
+      m <- m + 1
+    }
+
+    i2 <- array(0,dim=c(K,K))
+    m <- m.start
+    for (k in 1:K) for (l in k:K) {
+      i2[k,l] <- i2[l,k] <- m
+      m <- m + 1
+    }
+  }  
   ## now create the reverse indices...
-  m <- m.start
-  i4r <- rep(0,max(i4)) ## extracts the unique elements from a symmetric array in packing order.
-  for (i in 1:K) for (j in i:K) for (k in j:K) for (l in k:K) {
-    i4r[m] <- l + (k-1)*K + (j-1)*K^2 + (i-1)*K^3
-    m <- m + 1
-  }
-  m <- m.start
-  i3r <- rep(0,max(i3)) ## extracts the unique elements from a symmetric array in packing order.
-  for (j in 1:K) for (k in j:K) for (l in k:K) {
-    i3r[m] <- l + (k-1)*K + (j-1)*K^2
-    m <- m + 1
-  }
-  m <- m.start
-  i2r <- rep(0,max(i2)) ## extracts the unique elements from a symmetric array in packing order.
-  for (k in 1:K) for (l in k:K) {
-    i2r[m] <- l + (k-1)*K
-    m <- m + 1
-  }
+  if (reverse) {
+    m <- m.start
+    maxi4 <- if (ifunc) i4(K,K,K,K) else i4[K,K,K,K]
+    i4r <- rep(0,maxi4) ## extracts the unique elements from a symmetric array in packing order.
+    for (i in 1:K) for (j in i:K) for (k in j:K) for (l in k:K) {
+      i4r[m] <- l + (k-1)*K + (j-1)*K^2 + (i-1)*K^3
+      m <- m + 1
+    }
+    m <- m.start
+    maxi3 <- if (ifunc) i3(K,K,K) else i3[K,K,K]
+    i3r <- rep(0,maxi3) ## extracts the unique elements from a symmetric array in packing order.
+    for (j in 1:K) for (k in j:K) for (l in k:K) {
+      i3r[m] <- l + (k-1)*K + (j-1)*K^2
+      m <- m + 1
+    }
+    m <- m.start
+    maxi2 <- if (ifunc) i2(K,K) else i2[K,K]
+    i2r <- rep(0,maxi2) ## extracts the unique elements from a symmetric array in packing order.
+    for (k in 1:K) for (l in k:K) {
+      i2r[m] <- l + (k-1)*K
+      m <- m + 1
+    }
+  } else i2r <- i3r <- i4r <- NULL  
   list(i2=i2,i3=i3,i4=i4,i2r=i2r,i3r=i3r,i4r=i4r)
 } ## trind.generator
 
+gamlss.ncv <- function(X,y,wt,nei,beta,family,llf,H=NULL,Hi=NULL,R=NULL,offset=NULL,dH=NULL,db=NULL,deriv=FALSE,nt=1) {
+## computes the neighbourhood cross validation score and its derivative for a
+## gamlss model. llf is what was returned by family$ll when ncv info requested.
+## If derivs not required then ll must be called with deriv >=1, otherwise deriv >= 3.
+## To enable NCV for a gamlss family:
+## 1. the 'll' function must be modified to have an 'ncv' argument. When this is TRUE and
+##    deriv!=0 then ll should return l1, l2 and l3 the derivatives of the log likelihood
+##    w.r.t. the linear preditors (typically returned from gamlss.mueta).
+## 2. the 'll' function must have an eta argument allowing the linear predictors to be
+##    supplied directly, rather than being computed from X and beta. 
+## 3. The family must contain an 'ncv' wrapper function, which simply calls this function.
+## ... gaulss provides an example.
+  jj <- attr(X,"lpi") ## extract linear predictor index
+  nlp <- length(jj); n <- nrow(X)
+  if (deriv) {
+    nsp <- ncol(db)
+    deta <- matrix(0,n*nlp,nsp)
+    ind <- 1:n
+    for (i in 1:nlp) {
+      deta[ind,] <- X[,jj[[i]],drop=FALSE] %*% db[jj[[i]],,drop=FALSE]
+      ind <- ind + n
+    }  
+  } else deta <- 0.0
+  ## debug section
+  eta <- matrix(0,n,nlp)
+  for (i in 1:nlp) eta[,i] <- X[,jj[[i]],drop=FALSE] %*% beta[jj[[i]]]
+  ## end debug
+  nm <- length(nei$i)
+  eta.cv <- matrix(0,nm,nlp)
+  
+  deta.cv <- if (deriv) matrix(0,nm*nlp,nsp) else 0.0
+  if (is.null(R)) {
+    cg.iter <- .Call(C_ncvls,X,jj,H,Hi,dH,llf$l1,llf$l2,llf$l3,nei$i-1,nei$mi,nei$m,nei$k-1,beta,eta.cv,deta.cv,
+                   deta,db,deriv)		   
+  } else {
+    cg.iter <- .Call(C_Rncvls,X,jj,R,dH,llf$l1,llf$l2,llf$l3,nei$i-1,nei$mi,nei$m,nei$k-1,beta,eta.cv,deta.cv,
+                   deta,db,deriv,.Machine$double.eps,nt)
+  }
+  if (!is.null(offset)) {
+    for (i in 1:ncol(eta.cv)) if (i <= length(offset)&&!is.null(offset[[i]])) eta.cv[,i] <- eta.cv[,i] + offset[[i]][nei$i]
+  }
+  ## ll must be set up to return l1..l3 as derivs w.r.t. linear predictors if ncv=TRUE
+  ncv1 <- NULL
+  gamma <- llf$gamma;qapprox <- family$qapprox
+  dev <- if (gamma!=1||qapprox) -sum(family$ll(y,X,beta,wt,family,offset,deriv=0,db,eta=eta,ncv=TRUE)$l0[nei$i]) else 0 
+  if (qapprox) { ## quadratic approximate version
+    ncv <-  dev - gamma*sum(llf$l1[nei$i,]*(eta.cv-eta[nei$i,]))
+    k <- 0
+    for (i in 1:nlp) for (j in i:nlp) {
+      k <- k  + 1
+      ncv <- ncv - 0.5*gamma*(1+(i!=j))*sum(llf$l2[nei$i,k]*(eta.cv[,i]-eta[nei$i,i])*(eta.cv[,j]-eta[nei$i,j])) ## symmetric term
+    }
+    if (deriv) {
+      ncv1 <- -colSums(as.numeric(llf$l1[nei$i,])*(deta.cv*gamma+(1-gamma)*deta)) 
+      kk <- 0;jj <- 0
+      for (j in 1:nlp) for (k in j:nlp) {
+        kk <- kk  + 1
+	ncv1 <- ncv1 - colSums(llf$l2[nei$i,kk]*((deta[1:nm+(k-1)*nm,] + deta.cv[1:nm+(k-1)*nm,])*(eta.cv[,j]-eta[nei$i,j]) + 
+	                   (eta.cv[,k]-eta[nei$i,k])*(deta.cv[1:nm+(j-1)*nm,] - deta[nei$i+(j-1)*n,])))*gamma*.5
+
+        if (j!=k) ncv1 <- ncv1 - colSums(llf$l2[nei$i,kk]*((deta[1:nm+(j-1)*nm,] + deta.cv[1:nm+(j-1)*nm,])*(eta.cv[,k]-eta[nei$i,k]) + 
+	                   (eta.cv[,j]-eta[nei$i,j])*(deta.cv[1:nm+(k-1)*nm,] - deta[nei$i+(k-1)*n,])))*gamma*.5		  
+        for (l in k:nlp) {
+          jj <- jj + 1
+	  ncv1 <- ncv1 - (1+(j!=k)) * gamma*.5 * colSums(
+   	          llf$l3[nei$i,jj]*deta[nei$i+(l-1)*n,]*(eta.cv[,k]-eta[nei$i,k])*(eta.cv[,j]-eta[nei$i,j]))
+	  if (l!=k) ncv1 <- ncv1 - (1+(l!=j&&j!=k)) * gamma * .5 * colSums(
+	          llf$l3[nei$i,jj]*deta[nei$i+(k-1)*n,]*(eta.cv[,l]-eta[nei$i,l])*(eta.cv[,j]-eta[nei$i,j]))
+	  if (l!=j) ncv1 <- ncv1 - (1+(l!=k&&j!=k)) * gamma * .5 * colSums(
+	          llf$l3[nei$i,jj]*deta[nei$i+(j-1)*n,]*(eta.cv[,k]-eta[nei$i,k])*(eta.cv[,l]-eta[nei$i,l]))
+        }
+      }
+    } 
+  } else { ## exact
+    offi <- offset
+    if (!is.null(offset)) for (i in 1:length(offset)) if (!is.null(offset[[i]])) offi[[i]] <- offset[[i]][nei$i]
+    ll <- family$ll(y[nei$i],X[nei$i,],beta,wt[nei$i],family,offi,deriv=1,db,eta=eta.cv,ncv=TRUE)
+    ncv <- -ll$l
+    ncv <- gamma*ncv - (gamma-1)*dev
+    if (deriv) {
+      dev1 <- ncv1 <- rep(0,nsp)
+      ind <- 1:nm; iin <- 1:n
+      for (i in 1:nlp) {
+        ncv1 <- ncv1 - colSums(ll$l1[,i]*deta.cv[ind,])
+        if (gamma!=1) dev1 <- dev1 - colSums((llf$l1[,i]*deta[iin,])[nei$i,,drop=FALSE])
+        ind <- ind + nm; iin <- iin + n
+      }
+      ncv1 <- gamma*ncv1 - (gamma-1)*dev1
+    } 
+  }
+  attr(ncv,"eta.cv") <- eta.cv
+  if (deriv) attr(ncv,"deta.cv") <- deta.cv
+  return(list(NCV=ncv,NCV1=ncv1,error=cg.iter))
+} ## gamlss.ncv
+
 gamlss.etamu <- function(l1,l2,l3=NULL,l4=NULL,ig1,g2,g3=NULL,g4=NULL,i2,i3=NULL,i4=NULL,deriv=0) {
 ## lj is the array of jth order derivatives of l
 ## gj[,k] contains the jth derivatives for the link of the kth lp
@@ -99,6 +220,7 @@
     d1[,i] <- l1[,i]*ig1[,i]
   }
 
+  ifunc <- !is.array(i2) ## are index functions provided in place of index arrays?
   ##n <- length(ig1[,1])
 
   k <- 0
@@ -133,14 +255,16 @@
     ## l3[,k] is derivative to transform
     mo <- max(ord)
     if (mo==3) { ## pure 3rd derivative transform
-      d3[,k] <- (l3[,k] - 3*l2[,i2[i,i]]*g2[,i]*ig1[,i] +
+      mind <- if (ifunc) i2(i,i) else i2[i,i]
+      d3[,k] <- (l3[,k] - 3*l2[,mind]*g2[,i]*ig1[,i] +
                 l1[,i]*(3*g2[,i]^2*ig1[,i]^2 - g3[,i]*ig1[,i]))*ig1[,i]^3          
     } else if (mo==1) { ## all first derivative
       d3[,k] <- l3[,k]*ig1[,i]*ig1[,j]*ig1[,l]
     } else { ## 2,1 deriv
       k1 <- ii[ord==1] ## index of order 1 deriv
       k2 <- ii[ord==2] ## index of order 2 part
-      d3[,k] <- (l3[,k] - l2[,i2[k2,k1]]*g2[,k2]*ig1[,k2])*
+      mind <- if (ifunc) i2(k2,k1) else i2[k2,k1]
+      d3[,k] <- (l3[,k] - l2[,mind]*g2[,k2]*ig1[,k2])*
                 ig1[,k1]*ig1[,k2]^2
     } 
   } ## 3rd order transform done
@@ -163,8 +287,10 @@
     ## l4[,k] is derivative to transform
     mo <- max(ord)
     if (mo==4) { ## pure 4th derivative transform
-    d4[,k] <-  (l4[,k] - 6*l3[,i3[i,i,i]]*g2[,i]*ig1[,i] + 
-        l2[,i2[i,i]]*(15*g2[,i]^2*ig1[,i]^2 - 4*g3[,i]*ig1[,i]) - 
+    mi2 <- if (ifunc) i2(i,i) else i2[i,i]
+    mi3 <- if (ifunc) i3(i,i,i) else i3[i,i,i]
+    d4[,k] <-  (l4[,k] - 6*l3[,mi3]*g2[,i]*ig1[,i] + 
+        l2[,mi2]*(15*g2[,i]^2*ig1[,i]^2 - 4*g3[,i]*ig1[,i]) - 
         l1[,i]*(15*g2[,i]^3*ig1[,i]^3 - 10*g2[,i]*g3[,i]*ig1[,i]^2 
          + g4[,i]*ig1[,i]))*ig1[,i]^4    
     } else if (mo==1) { ## all first derivative
@@ -172,20 +298,26 @@
     } else if (mo==3) { ## 3,1 deriv
       k1 <- ii[ord==1] ## index of order 1 deriv
       k3 <- ii[ord==3] ## index of order 3 part
-      d4[,k] <- (l4[,k] - 3*l3[,i3[k3,k3,k1]]*g2[,k3]*ig1[,k3] +
-        l2[,i2[k3,k1]]*(3*g2[,k3]^2*ig1[,k3]^2 - g3[,k3]*ig1[,k3])         
+      mi2 <- if (ifunc) i2(k3,k1) else i2[k3,k1]
+      mi3 <- if (ifunc) i3(k3,k3,k1) else i3[k3,k3,k1]
+      d4[,k] <- (l4[,k] - 3*l3[,mi3]*g2[,k3]*ig1[,k3] +
+        l2[,mi2]*(3*g2[,k3]^2*ig1[,k3]^2 - g3[,k3]*ig1[,k3])         
       )*ig1[,k1]*ig1[,k3]^3
     } else { 
       if (sum(ord==2)==2) { ## 2,2
         k2a <- (ii[ord==2])[1];k2b <- (ii[ord==2])[2]
-        d4[,k] <- (l4[,k] - l3[,i3[k2a,k2b,k2b]]*g2[,k2a]*ig1[,k2a]
-          -l3[,i3[k2a,k2a,k2b]]*g2[,k2b]*ig1[,k2b] + 
-           l2[,i2[k2a,k2b]]*g2[,k2a]*g2[,k2b]*ig1[,k2a]*ig1[,k2b]
+	mi2 <- if (ifunc) i2(k2a,k2b) else i2[k2a,k2b]
+	mi3 <- if (ifunc) i3(k2a,k2b,k2b) else i3[k2a,k2b,k2b]
+	mi3a <- if (ifunc) i3(k2a,k2a,k2b) else i3[k2a,k2a,k2b]
+        d4[,k] <- (l4[,k] - l3[,mi3]*g2[,k2a]*ig1[,k2a]
+          -l3[,mi3a]*g2[,k2b]*ig1[,k2b] + 
+           l2[,mi2]*g2[,k2a]*g2[,k2b]*ig1[,k2a]*ig1[,k2b]
         )*ig1[,k2a]^2*ig1[,k2b]^2
       } else { ## 2,1,1
         k2 <- ii[ord==2] ## index of order 2 derivative
         k1a <- (ii[ord==1])[1];k1b <- (ii[ord==1])[2]
-        d4[,k] <- (l4[,k] - l3[,i3[k2,k1a,k1b]]*g2[,k2]*ig1[,k2] 
+	mi3 <- if (ifunc) i3(k2,k1a,k1b) else i3[k2,k1a,k1b]
+        d4[,k] <- (l4[,k] - l3[,mi3]*g2[,k2]*ig1[,k2] 
                    )*ig1[,k1a]*ig1[,k1b]*ig1[,k2]^2
       }
     }
@@ -195,7 +327,7 @@
 } # gamlss.etamu
 
 
-gamlss.gH <- function(X,jj,l1,l2,i2,l3=0,i3=0,l4=0,i4=0,d1b=0,d2b=0,deriv=0,fh=NULL,D=NULL) {
+gamlss.gH <- function(X,jj,l1,l2,i2,l3=0,i3=0,l4=0,i4=0,d1b=0,d2b=0,deriv=0,fh=NULL,D=NULL,sandwich=FALSE) {
 ## X[,jj[[i]]] is the ith model matrix.
 ## lj contains jth derivatives of the likelihood for each datum,
 ## columns are w.r.t. different combinations of parameters.
@@ -219,6 +351,7 @@
     p <- ncol(X);n <- nrow(X)
   }  
   trHid2H <- d1H <- d2H <- NULL ## defaults
+  ifunc <- !is.array(i2) ## are index functions provided in place of index arrays?
 
   ## the gradient...
   lb <- rep(0,p)
@@ -229,10 +362,16 @@
   
   ## the Hessian...
   lbb <- if (sparse) Matrix(0,p,p) else matrix(0,p,p)
+  if (sandwich) { ## reset l2 so that Hessian becomes 'filling' for sandwich estimate
+    if (deriv>0) warning("sandwich requested with higher derivatives")
+    k <- 0;
+    for (i in 1:K) for (j in i:K) { k <- k + 1;l2[,k] <- l1[,i]*l1[,j] }
+  }
   for (i in 1:K) for (j in i:K) {
     ## A <- t(X[,jj[[i]],drop=FALSE])%*%(l2[,i2[i,j]]*X[,jj[[j]],drop=FALSE])
-    A <- if (discrete) XWXd(X$Xd,w=l2[,i2[i,j]],k=X$kd,ks=X$ks,ts=X$ts,dt=X$dt,v=X$v,qc=X$qc,nthreads=1,drop=X$drop,lt=X$lpid[[i]],rt=X$lpid[[j]]) else
-             crossprod(X[,jj[[i]],drop=FALSE],l2[,i2[i,j]]*X[,jj[[j]],drop=FALSE])
+    mi2 <- if (ifunc) i2(i,j) else i2[i,j] 
+    A <- if (discrete) XWXd(X$Xd,w=l2[,mi2],k=X$kd,ks=X$ks,ts=X$ts,dt=X$dt,v=X$v,qc=X$qc,nthreads=1,drop=X$drop,lt=X$lpid[[i]],rt=X$lpid[[j]]) else
+             crossprod(X[,jj[[i]],drop=FALSE],l2[,mi2]*X[,jj[[j]],drop=FALSE])
     lbb[jj[[i]],jj[[j]]] <- lbb[jj[[i]],jj[[j]]] + A 
     if (j>i) lbb[jj[[j]],jj[[i]]] <- lbb[jj[[j]],jj[[i]]] + t(A) 
   } 
@@ -278,7 +417,8 @@
        for (l in 1:m) { ## sp loop
          v <- rep(0,n);ind <- 1:n
          for (q in 1:K) { ## diagonal accumulation loop
-           v <- v + l3[,i3[i,j,q]] * d1eta[ind,l]
+	   mi3 <- if (ifunc) i3(i,j,q) else i3[i,j,q] 
+           v <- v + l3[,mi3] * d1eta[ind,l]
            ind <- ind + n
          }
 	 XVX <- XWXd(X$Xd,w=v,k=X$kd,ks=X$ks,ts=X$ts,dt=X$dt,v=X$v,qc=X$qc,nthreads=1,drop=X$drop,lt=X$lpid[[i]],rt=X$lpid[[j]])
@@ -335,7 +475,8 @@
       for (l in 1:m) { ## sp loop
         v <- rep(0,n);ind <- 1:n
         for (q in 1:K) { ## diagonal accumulation loop
-          v <- v + l3[,i3[i,j,q]] * d1eta[ind,l]
+	  mi3 <- if (ifunc) i3(i,j,q) else i3[i,j,q] 
+          v <- v + l3[,mi3] * d1eta[ind,l]
           ind <- ind + n
         }
 	mult <- if (i==j) 1 else 2
@@ -351,8 +492,9 @@
       d1H[[l]] <- matrix(0,p,p)
       for (i in 1:K) for (j in i:K) {
         v <- rep(0,n);ind <- 1:n
-        for (q in 1:K) { 
-          v <- v + l3[,i3[i,j,q]] * d1eta[ind,l]
+        for (q in 1:K) {
+	  mi3 <- if (ifunc) i3(i,j,q) else i3[i,j,q] 
+          v <- v + l3[,mi3] * d1eta[ind,l]
           ind <- ind + n
         }
         ## d1H[[l]][jj[[j]],jj[[i]]] <- 
@@ -395,10 +537,12 @@
       for (i in 1:K) for (j in 1:K) {
         v <- rep(0,n);ind <- 1:n
         for (q in 1:K) { ## accumulate the diagonal matrix for X_i'diag(v)X_j
-          v <- v + d2eta[ind,kk]*l3[,i3[i,j,q]]
+	  mi3 <- if (ifunc) i3(i,j,q) else i3[i,j,q] 
+          v <- v + d2eta[ind,kk]*l3[,mi3]
           ins <- 1:n
-          for (s in 1:K) { 
-            v <- v + d1eta[ind,k]*d1eta[ins,l]*l4[,i4[i,j,q,s]]
+          for (s in 1:K) {
+	    mi4 <- if (ifunc) i4(i,j,q,s) else i4[i,j,q,s] 
+            v <- v + d1eta[ind,k]*d1eta[ins,l]*l4[,mi4]
             ins <- ins + n
           }
           ind <- ind + n
@@ -418,7 +562,7 @@
   } ## if deriv>2
 
   list(lb=lb,lbb=lbb,d1H=d1H,d2H=d2H,trHid2H=trHid2H)
-} ## end of gamlss.gH
+} ## end of gamlss.gH 
 
 
 
@@ -480,7 +624,11 @@
     object$null.deviance <- sum(((object$y-mean(object$y))*object$fitted[,2])^2)
   })
 
-  ll <- function(y,X,coef,wt,family,offset=NULL,deriv=0,d1b=0,d2b=0,Hp=NULL,rank=0,fh=NULL,D=NULL) {
+  ncv <- function(X,y,wt,nei,beta,family,llf,H=NULL,Hi=NULL,R=NULL,offset=NULL,dH=NULL,db=NULL,deriv=FALSE,nt=1) {
+    gamlss.ncv(X,y,wt,nei,beta,family,llf,H=H,Hi=Hi,R=R,offset=offset,dH=dH,db=db,deriv=deriv,nt=nt)
+  } ## ncv  
+
+  ll <- function(y,X,coef,wt,family,offset=NULL,deriv=0,d1b=0,d2b=0,Hp=NULL,rank=0,fh=NULL,D=NULL,eta=NULL,ncv=FALSE,sandwich=FALSE) {
   ## function defining the gamlss Gaussian model log lik. 
   ## N(mu,sigma^2) parameterized in terms of mu and log(sigma)
   ## deriv: 0 - eval
@@ -491,18 +639,25 @@
     if (!is.null(offset)) offset[[3]] <- 0
     discrete <- is.list(X)
     jj <- attr(X,"lpi") ## extract linear predictor index
-    eta <- if (discrete) Xbd(X$Xd,coef,k=X$kd,ks=X$ks,ts=X$ts,dt=X$dt,v=X$v,qc=X$qc,drop=X$drop,lt=X$lpid[[1]]) else X[,jj[[1]],drop=FALSE]%*%coef[jj[[1]]]
-    if (!is.null(offset[[1]])) eta <- eta + offset[[1]]
+    if (is.null(eta)) {
+      eta <- if (discrete) Xbd(X$Xd,coef,k=X$kd,ks=X$ks,ts=X$ts,dt=X$dt,v=X$v,qc=X$qc,drop=X$drop,lt=X$lpid[[1]]) else X[,jj[[1]],drop=FALSE]%*%coef[jj[[1]]]
+      if (!is.null(offset[[1]])) eta <- eta + offset[[1]]
+      eta1 <- if (discrete) Xbd(X$Xd,coef,k=X$kd,ks=X$ks,ts=X$ts,dt=X$dt,v=X$v,qc=X$qc,drop=X$drop,lt=X$lpid[[2]]) else X[,jj[[2]],drop=FALSE]%*%coef[jj[[2]]]
+      if (!is.null(offset[[2]])) eta1 <- eta1 + offset[[2]]
+    } else { ## eta supplied directly
+      eta1 <- eta[,2]
+      eta <- eta[,1]
+    }
+    
     mu <- family$linfo[[1]]$linkinv(eta)
-    eta1 <- if (discrete) Xbd(X$Xd,coef,k=X$kd,ks=X$ks,ts=X$ts,dt=X$dt,v=X$v,qc=X$qc,drop=X$drop,lt=X$lpid[[2]]) else X[,jj[[2]],drop=FALSE]%*%coef[jj[[2]]]
-    if (!is.null(offset[[2]])) eta1 <- eta1 + offset[[2]]
     tau <-  family$linfo[[2]]$linkinv(eta1) ## tau = 1/sig here
     
     n <- length(y)
     l1 <- matrix(0,n,2)
     ymu <- y-mu;ymu2 <- ymu^2;tau2 <- tau^2
  
-    l <- sum(-.5 * ymu2 * tau2 - .5 * log(2*pi) + log(tau))
+    l0 <- -.5 * ymu2 * tau2 - .5 * log(2*pi) + log(tau)
+    l <- sum(l0)
 
     if (deriv>0) {
 
@@ -555,11 +710,19 @@
 
       ## get the gradient and Hessian...
       ret <- gamlss.gH(X,jj,de$l1,de$l2,i2,l3=de$l3,i3=i3,l4=de$l4,i4=i4,
-                      d1b=d1b,d2b=d2b,deriv=deriv-1,fh=fh,D=D) 
+                      d1b=d1b,d2b=d2b,deriv=deriv-1,fh=fh,D=D,sandwich=sandwich)
+      if (ncv) {
+        ret$l1 <- de$l1; ret$l2 = de$l2; ret$l3 = de$l3
+      }
     } else ret <- list()
-    ret$l <- l; ret
+    ret$l <- l; ret$l0 <- l0; ret
   } ## end ll gaulss
 
+  sandwich <- function(y,X,coef,wt,family,offset=NULL) {
+  ## compute filling for sandwich estimate of cov matrix
+    ll(y,X,coef,wt,family,offset=NULL,deriv=1,sandwich=TRUE)$lbb
+  }
+  
   initialize <- expression({
   ## idea is to regress g(y) on model matrix for mean, and then 
   ## to regress the corresponding log absolute residuals on 
@@ -637,7 +800,7 @@
   } ## rd
 
 
-  structure(list(family="gaulss",ll=ll,link=paste(link),nlp=2,
+  structure(list(family="gaulss",ll=ll,link=paste(link),ncv=ncv,nlp=2,sandwich=sandwich,
     tri = trind.generator(2), ## symmetric indices for accessing derivative arrays
     initialize=initialize,postproc=postproc,residuals=residuals,
     linfo = stats,rd=rd, ## link information list
@@ -773,12 +936,14 @@
     multinom$gamma <- log(multinom$gamma/sum(multinom$gamma))
     object$null.deviance <- -2*sum(multinom$gamma[object$y+1])
   })
+  
+  ncv <- function(X,y,wt,nei,beta,family,llf,H=NULL,Hi=NULL,R=NULL,offset=NULL,dH=NULL,db=NULL,deriv=FALSE,nt=1) {
+    gamlss.ncv(X,y,wt,nei,beta,family,llf,H=H,Hi=Hi,R=R,offset=offset,dH=dH,db=db,deriv=deriv,nt=nt)
+  } ## ncv  
 
-  ll <- function(y,X,coef,wt,family,offset=NULL,deriv=0,d1b=0,d2b=0,Hp=NULL,rank=0,fh=NULL,D=NULL,eta=NULL) {
+  ll <- function(y,X,coef,wt,family,offset=NULL,deriv=0,d1b=0,d2b=0,Hp=NULL,rank=0,fh=NULL,D=NULL,eta=NULL,ncv=FALSE,sandwich=FALSE) {
   ## Function defining the logistic multimomial model log lik. 
   ## Assumption is that coding runs from 0..K, with 0 class having no l.p.
-  ## argument eta is for debugging only, and allows direct FD testing of the 
-  ## derivatives w.r.t. eta. 
   ## ... this matches binary log reg case... 
   ## deriv: 0 - eval
   ##        1 - grad and Hess
@@ -786,10 +951,10 @@
   ##        3 - first deriv of Hess
   ##        4 - everything.
     n <- length(y)
+    jj <- attr(X,"lpi") ## extract linear predictor index
     if (is.null(eta)) {
       discrete <- is.list(X)
-      return.l <- FALSE
-      jj <- attr(X,"lpi") ## extract linear predictor index
+      ##return.l <- FALSE
       K <- length(jj) ## number of linear predictors 
       eta <- matrix(1,n,K+1) ## linear predictor matrix (dummy 1's in first column)
       if (is.null(offset)) offset <- list()
@@ -797,7 +962,7 @@
       for (i in 1:K) if (is.null(offset[[i]])) offset[[i]] <- 0
       for (i in 1:K) eta[,i+1] <- offset[[i]] + if (discrete) Xbd(X$Xd,coef,k=X$kd,ks=X$ks,ts=X$ts,dt=X$dt,v=X$v,qc=X$qc,
                                   drop=X$drop,lt=X$lpid[[i]]) else X[,jj[[i]],drop=FALSE]%*%coef[jj[[i]]]
-    } else { l2 <- 0;K <- ncol(eta);eta <- cbind(1,eta); return.l <- TRUE}
+    } else { l2 <- 0;K <- ncol(eta);eta <- cbind(1,eta)} ##; return.l <- TRUE}
  
     if (K!=family$nlp) stop("number of linear predictors doesn't match")
     y <- round(y) ## just in case
@@ -871,16 +1036,22 @@
       }
     } ## if deriv>3
 
-    if (return.l) return(list(l=l0,l1=l1,l2=l2,l3=l3,l4=l4)) ## for testing...
+    ##if (return.l) return(list(l=l0,l1=l1,l2=l2,l3=l3,l4=l4)) ## for testing...
 
     if (deriv) {
       ## get the gradient and Hessian...
       ret <- gamlss.gH(X,jj,l1,l2,tri$i2,l3=l3,i3=tri$i3,l4=l4,i4=tri$i4,
-                      d1b=d1b,d2b=d2b,deriv=deriv-1,fh=fh,D=D) 
+                      d1b=d1b,d2b=d2b,deriv=deriv-1,fh=fh,D=D,sandwich=sandwich)
+      if (ncv) { ret$l1=l1; ret$l2=l2; ret$l3=l3 }		      
     } else ret <- list()
     ret$l <- l; ret
   } ## end ll multinom
 
+  sandwich <- function(y,X,coef,wt,family,offset=NULL) {
+  ## compute filling for sandwich estimate of cov matrix
+    ll(y,X,coef,wt,family,offset=NULL,deriv=1,sandwich=TRUE)$lbb
+  }
+
   rd <- function(mu,wt,scale) {
     ## simulate data given fitted linear predictor matrix in mu 
     p <- exp(cbind(0,mu))
@@ -936,7 +1107,7 @@
   }) ## initialize multinom
 
   structure(list(family="multinom",ll=ll,link=NULL,#paste(link),
-    nlp=round(K),rd=rd,
+    nlp=round(K),rd=rd,ncv=ncv,sandwich=sandwich,
     tri = trind.generator(K), ## symmetric indices for accessing derivative arrays
     initialize=initialize,postproc=postproc,residuals=residuals,predict=predict,
     linfo = stats, ## link information list
@@ -1342,9 +1513,13 @@
     object$null.deviance <- 2*(sum(ls(object$y)) - lnull)
    
   }) ## postproc
+  
+  ncv <- function(X,y,wt,nei,beta,family,llf,H=NULL,Hi=NULL,R=NULL,offset=NULL,dH=NULL,db=NULL,deriv=FALSE,nt=1) {
+    gamlss.ncv(X,y,wt,nei,beta,family,llf,H=H,Hi=Hi,R=R,offset=offset,dH=dH,db=db,deriv=deriv,nt=nt)
+  } ## ncv  
 
 
-  ll <- function(y,X,coef,wt,family,offset=NULL,deriv=0,d1b=0,d2b=0,Hp=NULL,rank=0,fh=NULL,D=NULL) {
+  ll <- function(y,X,coef,wt,family,offset=NULL,deriv=0,d1b=0,d2b=0,Hp=NULL,rank=0,fh=NULL,D=NULL,eta=NULL,ncv=FALSE,sandwich=FALSE) {
   ## function defining the gamlss ZIP model log lik. 
   ## First l.p. defines Poisson mean, given presence (lambda)
   ## Second l.p. defines probability of presence (p)
@@ -1356,9 +1531,14 @@
     if (is.null(offset)) offset <- list(0,0) else offset[[3]] <- 0
     for (i in 1:2) if (is.null(offset[[i]])) offset[[i]] <- 0
     jj <- attr(X,"lpi") ## extract linear predictor index
-    eta <- X[,jj[[1]],drop=FALSE]%*%coef[jj[[1]]] + offset[[1]]
+    if (is.null(eta)) {
+      eta <- X[,jj[[1]],drop=FALSE]%*%coef[jj[[1]]] + offset[[1]]
+      eta1 <- X[,jj[[2]],drop=FALSE]%*%coef[jj[[2]]] +offset[[2]]
+    } else { ## eta supplied
+      eta1 <- eta[,2]
+      eta <- eta[,1]
+    }
     lambda <- family$linfo[[1]]$linkinv(eta)
-    eta1 <- X[,jj[[2]],drop=FALSE]%*%coef[jj[[2]]] +offset[[2]]
     p <-  family$linfo[[2]]$linkinv(eta1) 
     
     ##n <- length(y)
@@ -1394,11 +1574,19 @@
 
       ## get the gradient and Hessian...
       ret <- gamlss.gH(X,jj,de$l1,de$l2,i2,l3=de$l3,i3=i3,l4=de$l4,i4=i4,
-                      d1b=d1b,d2b=d2b,deriv=deriv-1,fh=fh,D=D) 
+                      d1b=d1b,d2b=d2b,deriv=deriv-1,fh=fh,D=D,sandwich=sandwich)
+      if (ncv) {
+        ret$l1 <- de$l1; ret$l2 = de$l2; ret$l3 = de$l3
+      }		      
     } else ret <- list()
     ret$l <- sum(zl$l); ret
   } ## end ll for ZIP
 
+  sandwich <- function(y,X,coef,wt,family,offset=NULL) {
+  ## compute filling for sandwich estimate of cov matrix
+    ll(y,X,coef,wt,family,offset=NULL,deriv=1,sandwich=TRUE)$lbb
+  }
+
   initialize <- expression({ ## for ZIP
   ## Idea is to regress binarized y on model matrix for p. 
   ## Then downweight any y=0 with p<0.5 and regress g(y) on 
@@ -1448,7 +1636,7 @@
       }
   }) ## initialize ziplss
 
-  structure(list(family="ziplss",ll=ll,link=paste(link),nlp=2,
+  structure(list(family="ziplss",ll=ll,link=paste(link),nlp=2,ncv=ncv,sandwich=sandwich,
     tri = trind.generator(2), ## symmetric indices for accessing derivative arrays
     initialize=initialize,postproc=postproc,residuals=residuals,rd=rd,predict=predict,
     linfo = stats, ## link information list
@@ -1486,6 +1674,7 @@
     stats[[i]]$d3link <- fam$d3link
     stats[[i]]$d4link <- fam$d4link
   }
+  
   if (link[[3]]=="logit") { ## shifted logit link to confine xi to (-1,.5)
     ## Smith '85 Biometrika shows that -1 limit needed for MLE consistency
     ## but would need -0.5 for normality...
@@ -1526,8 +1715,12 @@
     object$null.deviance <- NA
     
   })
+  
+  ncv <- function(X,y,wt,nei,beta,family,llf,H=NULL,Hi=NULL,R=NULL,offset=NULL,dH=NULL,db=NULL,deriv=FALSE,nt=1) {
+    gamlss.ncv(X,y,wt,nei,beta,family,llf,H=H,Hi=Hi,R=R,offset=offset,dH=dH,db=db,deriv=deriv,nt=nt)
+  } ## ncv  
 
-  ll <- function(y,X,coef,wt,family,offset=NULL,deriv=0,d1b=0,d2b=0,Hp=NULL,rank=0,fh=NULL,D=NULL) {
+  ll <- function(y,X,coef,wt,family,offset=NULL,deriv=0,d1b=0,d2b=0,Hp=NULL,rank=0,fh=NULL,D=NULL,eta=NULL,ncv=FALSE,sandwich=FALSE) {
   ## function defining the gamlss GEV model log lik. 
   ## deriv: 0 - eval
   ##        1 - grad and Hess
@@ -1537,14 +1730,20 @@
     if (!is.null(offset)) offset[[4]] <- 0
     discrete <- is.list(X)
     jj <- attr(X,"lpi") ## extract linear predictor index
-    eta <- if (discrete) Xbd(X$Xd,coef,k=X$kd,ks=X$ks,ts=X$ts,dt=X$dt,v=X$v,qc=X$qc,drop=X$drop,lt=X$lpid[[1]]) else X[,jj[[1]],drop=FALSE]%*%coef[jj[[1]]]
-    if (!is.null(offset[[1]])) eta <- eta + offset[[1]] 
+    if (is.null(eta)) {
+      eta <- if (discrete) Xbd(X$Xd,coef,k=X$kd,ks=X$ks,ts=X$ts,dt=X$dt,v=X$v,qc=X$qc,drop=X$drop,lt=X$lpid[[1]]) else X[,jj[[1]],drop=FALSE]%*%coef[jj[[1]]]
+      if (!is.null(offset[[1]])) eta <- eta + offset[[1]] 
+      etar <- if (discrete) Xbd(X$Xd,coef,k=X$kd,ks=X$ks,ts=X$ts,dt=X$dt,v=X$v,qc=X$qc,drop=X$drop,lt=X$lpid[[2]]) else X[,jj[[2]],drop=FALSE]%*%coef[jj[[2]]] ## log sigma
+      if (!is.null(offset[[2]])) etar <- etar + offset[[2]]
+      etax <- if (discrete) Xbd(X$Xd,coef,k=X$kd,ks=X$ks,ts=X$ts,dt=X$dt,v=X$v,qc=X$qc,drop=X$drop,lt=X$lpid[[3]]) else X[,jj[[3]],drop=FALSE]%*%coef[jj[[3]]] ## shape parameter
+      if (!is.null(offset[[3]])) etax <- etax + offset[[3]]
+    } else { ## eta supplied
+      etar <- eta[,2]
+      etax <- eta[,3]
+      eta <- eta[,1]
+    }
     mu <- family$linfo[[1]]$linkinv(eta) ## mean
-    etar <- if (discrete) Xbd(X$Xd,coef,k=X$kd,ks=X$ks,ts=X$ts,dt=X$dt,v=X$v,qc=X$qc,drop=X$drop,lt=X$lpid[[2]]) else X[,jj[[2]],drop=FALSE]%*%coef[jj[[2]]] ## log sigma
-    if (!is.null(offset[[2]])) etar <- etar + offset[[2]]
     rho <- family$linfo[[2]]$linkinv(etar) ## log sigma
-    etax <- if (discrete) Xbd(X$Xd,coef,k=X$kd,ks=X$ks,ts=X$ts,dt=X$dt,v=X$v,qc=X$qc,drop=X$drop,lt=X$lpid[[3]]) else X[,jj[[3]],drop=FALSE]%*%coef[jj[[3]]] ## shape parameter
-    if (!is.null(offset[[3]])) etax <- etax + offset[[3]]
     xi <- family$linfo[[3]]$linkinv(etax) ## shape parameter
     
     ## Avoid xi == 0 - using a separate branch for xi==0 requires
@@ -1580,8 +1779,8 @@
     log.aa1 <- log1p(aa0) ## added
     aa1 <- aa0 + 1 # (xi*(y-mu))/exp1^rho+1;
     aa2 <- 1/xi;
-    l  <-  sum((-aa2*(1+xi)*log.aa1)-1/aa1^aa2-rho);
-    #if (length(ind)>0) cat(aa0[ind]," l = ",l,"\n")
+    l0  <- (-aa2*(1+xi)*log.aa1)-1/aa1^aa2-rho;
+    l <- sum(l0)
 
     if (deriv>0) {
       ## first derivatives m, r, x...
@@ -1794,11 +1993,19 @@
 
       ## get the gradient and Hessian...
       ret <- gamlss.gH(X,jj,de$l1,de$l2,i2,l3=de$l3,i3=i3,l4=de$l4,i4=i4,
-                      d1b=d1b,d2b=d2b,deriv=deriv-1,fh=fh,D=D) 
+                      d1b=d1b,d2b=d2b,deriv=deriv-1,fh=fh,D=D,sandwich=sandwich)
+      if (ncv) {
+        ret$l1 <- de$l1; ret$l2 = de$l2; ret$l3 = de$l3
+      }		      
     } else ret <- list()
-    ret$l <- l; ret
+    ret$l <- l; ret$l0 <- l0; ret
   } ## end ll gevlss
 
+  sandwich <- function(y,X,coef,wt,family,offset=NULL) {
+  ## compute filling for sandwich estimate of cov matrix
+    ll(y,X,coef,wt,family,offset=NULL,deriv=1,sandwich=TRUE)$lbb
+  }
+
   initialize <- expression({
   ## start out with xi close to zero. If xi==0 then
   ## mean is mu + sigma*gamma and var is sigma^2*pi^2/6
@@ -1899,7 +2106,7 @@
     Fi.gev(runif(nrow(mu)),mu[,1],exp(mu[,2]),mu[,3])
   } ## gevlss rd
 
-  structure(list(family="gevlss",ll=ll,link=paste(link),nlp=3,
+  structure(list(family="gevlss",ll=ll,link=paste(link),nlp=3,ncv=ncv,sandwich=sandwich,
     tri = trind.generator(3), ## symmetric indices for accessing derivative arrays
     initialize=initialize,postproc=postproc,residuals=residuals,
     linfo = stats, ## link information list
@@ -1907,7 +2114,7 @@
     ls=1, ## signals that ls not needed here
     rd=rd,
     available.derivs = 2, ## can use full Newton here
-    discrete.ok = TRUE
+    discrete.ok = TRUE,qapprox=TRUE
     ),class = c("general.family","extended.family","family"))
 } ## end gevlss
 
@@ -2019,7 +2226,7 @@
     object$null.deviance <- sum(pmax(2 * (object$y * tw.theta - tw.kappa) * object$prior.weights/exp(object$fitted.values[,3]),0))
   })
 
-  ll <- function(y,X,coef,wt,family,offset=NULL,deriv=0,d1b=0,d2b=0,Hp=NULL,rank=0,fh=NULL,D=NULL) {
+  ll <- function(y,X,coef,wt,family,offset=NULL,deriv=0,d1b=0,d2b=0,Hp=NULL,rank=0,fh=NULL,D=NULL,sandwich=FALSE) {
   ## function defining the gamlss Tweedie model log lik. 
   ## deriv: 0 - eval
   ##        1 - grad and Hess
@@ -2038,7 +2245,8 @@
    
     ld <- ldTweedie(y,mu=mu,p=NA,phi=NA,rho=rho,theta=theta,a=a,b=b,all.derivs=TRUE)
     ## m, t, r ; mm, mt, mr, tt, tr, rr
-    l <- sum(ld[,1])
+    l0 <- ld[,1]
+    l <- sum(l0)
     l1 <- cbind(ld[,7],ld[,4],ld[,2])
     l2 <- cbind(ld[,8],ld[,9],ld[,10],ld[,5],ld[,6],ld[,3])
 
@@ -2061,11 +2269,16 @@
 
       ## get the gradient and Hessian...
       ret <- gamlss.gH(X,jj,de$l1,de$l2,i2,l3=de$l3,i3=i3,l4=de$l4,i4=i4,
-                      d1b=d1b,d2b=d2b,deriv=0,fh=fh,D=D) 
+                      d1b=d1b,d2b=d2b,deriv=0,fh=fh,D=D,sandwich=sandwich) 
     } else ret <- list()
-    ret$l <- l; ret
+    ret$l <- l; ret$l0 <- l0; ret
   } ## end ll twlss
 
+  sandwich <- function(y,X,coef,wt,family,offset=NULL) {
+  ## compute filling for sandwich estimate of cov matrix
+    ll(y,X,coef,wt,family,offset=NULL,deriv=1,sandwich=TRUE)$lbb
+  }
+
   initialize <- expression({
    ## idea is to regress g(y) on model matrix for mean, and then 
    ## to regress the corresponding log absolute scaled residuals on 
@@ -2109,7 +2322,7 @@
 
   environment(ll) <- environment(residuals) <- env
 
-  structure(list(family="twlss",ll=ll,link=paste(link),nlp=3,
+  structure(list(family="twlss",ll=ll,link=paste(link),nlp=3,sandwich=sandwich,
     tri = trind.generator(3), ## symmetric indices for accessing derivative arrays
     initialize=initialize,postproc=postproc,residuals=residuals,
     linfo = stats, ## link information list
@@ -2225,7 +2438,12 @@
     object$null.deviance <- sum(((object$y-.my)/.my-log(object$y/.my))*exp(-object$fitted.values[,2]))*2
   })
 
-  ll <- function(y,X,coef,wt,family,offset=NULL,deriv=0,d1b=0,d2b=0,Hp=NULL,rank=0,fh=NULL,D=NULL) {
+  ncv <- function(X,y,wt,nei,beta,family,llf,H=NULL,Hi=NULL,R=NULL,offset=NULL,dH=NULL,db=NULL,deriv=FALSE,nt=1) {
+    gamlss.ncv(X,y,wt,nei,beta,family,llf,H=H,Hi=Hi,R=R,offset=offset,dH=dH,db=db,deriv=deriv,nt=nt)
+  } ## ncv  
+
+ 
+  ll <- function(y,X,coef,wt,family,offset=NULL,deriv=0,d1b=0,d2b=0,Hp=NULL,rank=0,fh=NULL,D=NULL,eta=NULL,ncv=FALSE,sandwich=FALSE) {
   ## function defining the gamlss gamma model log lik. 
   ## deriv: 0 - eval
   ##        1 - grad and Hess
@@ -2236,11 +2454,17 @@
     if (!is.null(offset)) offset[[3]] <- 0
     discrete <- is.list(X)
     jj <- attr(X,"lpi") ## extract linear predictor index
-    eta <- if (discrete) Xbd(X$Xd,coef,k=X$kd,ks=X$ks,ts=X$ts,dt=X$dt,v=X$v,qc=X$qc,drop=X$drop,lt=X$lpid[[1]]) else X[,jj[[1]],drop=FALSE]%*%coef[jj[[1]]]
-    if (!is.null(offset[[1]])) eta <- eta + offset[[1]] ## log mu
+    if (is.null(eta)) {
+      eta <- if (discrete) Xbd(X$Xd,coef,k=X$kd,ks=X$ks,ts=X$ts,dt=X$dt,v=X$v,qc=X$qc,drop=X$drop,lt=X$lpid[[1]]) else X[,jj[[1]],drop=FALSE]%*%coef[jj[[1]]]
+      if (!is.null(offset[[1]])) eta <- eta + offset[[1]] ## log mu
+  
+      etat <- if (discrete) Xbd(X$Xd,coef,k=X$kd,ks=X$ks,ts=X$ts,dt=X$dt,v=X$v,qc=X$qc,drop=X$drop,lt=X$lpid[[2]]) else X[,jj[[2]],drop=FALSE]%*%coef[jj[[2]]]
+      if (!is.null(offset[[2]])) etat <- etat + offset[[2]]
+    } else {
+      etat <- eta[,2]
+      eta <- eta[,1]
+    }
     mu <- family$linfo[[1]]$linkinv(eta) ## mean
-    etat <- if (discrete) Xbd(X$Xd,coef,k=X$kd,ks=X$ks,ts=X$ts,dt=X$dt,v=X$v,qc=X$qc,drop=X$drop,lt=X$lpid[[2]]) else X[,jj[[2]],drop=FALSE]%*%coef[jj[[2]]]
-    if (!is.null(offset[[2]])) etat <- etat + offset[[2]] 
     th <-  family$linfo[[2]]$linkinv(etat) ## log sigma
  
     eth <- exp(-th) ## 1/exp1^th;
@@ -2250,7 +2474,8 @@
     etlymt <- eth*(logy-mu-th)
     n <- length(y)
 
-    l  <-  sum(etlymt-logy-ethmuy-lgamma(eth)) ## l
+    l0  <-  etlymt-logy-ethmuy-lgamma(eth) ## l
+    l <- sum(l0)
 
     if (deriv>0) {
       l1 <- matrix(0,n,2)
@@ -2310,11 +2535,19 @@
 
       ## get the gradient and Hessian...
       ret <- gamlss.gH(X,jj,de$l1,de$l2,i2,l3=de$l3,i3=i3,l4=de$l4,i4=i4,
-                      d1b=d1b,d2b=d2b,deriv=deriv-1,fh=fh,D=D) 
+                      d1b=d1b,d2b=d2b,deriv=deriv-1,fh=fh,D=D,sandwich=sandwich)
+      if (ncv) {
+        ret$l1 <- de$l1; ret$l2 = de$l2; ret$l3 = de$l3
+      }		      
     } else ret <- list()
-    ret$l <- l; ret
+    ret$l <- l; ret$l0 <- l0; ret
   } ## end ll gammals
 
+  sandwich <- function(y,X,coef,wt,family,offset=NULL) {
+  ## compute filling for sandwich estimate of cov matrix
+    ll(y,X,coef,wt,family,offset=NULL,deriv=1,sandwich=TRUE)$lbb
+  }
+
   initialize <- expression({
   ## regress X[,[jj[[1]]] on log(y) then X[,jj[[2]]] on log abs
   ## raw residuals.
@@ -2431,7 +2664,7 @@
     list(fit=gamma)
   } ## gammals predict
 
-  structure(list(family="gammals",ll=ll,link=paste(link),nlp=2,
+  structure(list(family="gammals",ll=ll,link=paste(link),nlp=2,ncv=ncv,sandwich=sandwich,
     tri = trind.generator(2), ## symmetric indices for accessing derivative arrays
     initialize=initialize,postproc=postproc,residuals=residuals,
     linfo = stats,rd=rd,predict=predict, ## link information list
@@ -2543,7 +2776,12 @@
     object$null.deviance <- NA
   })
 
-  ll <- function(y,X,coef,wt,family,offset=NULL,deriv=0,d1b=0,d2b=0,Hp=NULL,rank=0,fh=NULL,D=NULL) {
+  ncv <- function(X,y,wt,nei,beta,family,llf,H=NULL,Hi=NULL,R=NULL,offset=NULL,dH=NULL,db=NULL,deriv=FALSE,nt=1) {
+    gamlss.ncv(X,y,wt,nei,beta,family,llf,H=H,Hi=Hi,R=R,offset=offset,dH=dH,db=db,deriv=deriv,nt=nt)
+  } ## ncv  
+
+
+  ll <- function(y,X,coef,wt,family,offset=NULL,deriv=0,d1b=0,d2b=0,Hp=NULL,rank=0,fh=NULL,D=NULL,eta=NULL,ncv=FALSE,sandwich=FALSE) {
   ## function defining the gamlss gamma model log lik. 
   ## deriv: 0 - eval
   ##        1 - grad and Hess
@@ -2554,17 +2792,26 @@
     if (!is.null(offset)) offset[[3]] <- 0
     discrete <- is.list(X)
     jj <- attr(X,"lpi") ## extract linear predictor index
-    eta <- if (discrete) Xbd(X$Xd,coef,k=X$kd,ks=X$ks,ts=X$ts,dt=X$dt,v=X$v,qc=X$qc,drop=X$drop,lt=X$lpid[[1]]) else X[,jj[[1]],drop=FALSE]%*%coef[jj[[1]]]
-    if (!is.null(offset[[1]])) eta <- eta + offset[[1]] ## mu
+    if (is.null(eta)) {
+      eta <- if (discrete) Xbd(X$Xd,coef,k=X$kd,ks=X$ks,ts=X$ts,dt=X$dt,v=X$v,qc=X$qc,drop=X$drop,lt=X$lpid[[1]]) else X[,jj[[1]],drop=FALSE]%*%coef[jj[[1]]]
+      if (!is.null(offset[[1]])) eta <- eta + offset[[1]] ## mu
+    
+      etab <- if (discrete) Xbd(X$Xd,coef,k=X$kd,ks=X$ks,ts=X$ts,dt=X$dt,v=X$v,qc=X$qc,drop=X$drop,lt=X$lpid[[2]]) else X[,jj[[2]],drop=FALSE]%*%coef[jj[[2]]]
+      if (!is.null(offset[[2]])) etab <- etab + offset[[2]]
+    } else {
+      etab <- eta[,2]
+      eta <- eta[,1]
+    }
     mu <- family$linfo[[1]]$linkinv(eta) ## mean
-    etab <- if (discrete) Xbd(X$Xd,coef,k=X$kd,ks=X$ks,ts=X$ts,dt=X$dt,v=X$v,qc=X$qc,drop=X$drop,lt=X$lpid[[2]]) else X[,jj[[2]],drop=FALSE]%*%coef[jj[[2]]]
-    if (!is.null(offset[[2]])) etab <- etab + offset[[2]] 
     beta <-  family$linfo[[2]]$linkinv(etab) ## log beta
 
     eb <- exp(-beta)
     z <- (y-mu)*eb
     ez <- exp(-z)
-    l <- sum(-beta - z - ez)
+    
+    l0 <- -beta - z - ez
+    l <- sum(l0)
+    
     n <- length(y)
      
     if (deriv>0) {
@@ -2623,11 +2870,19 @@
 
       ## get the gradient and Hessian...
       ret <- gamlss.gH(X,jj,de$l1,de$l2,i2,l3=de$l3,i3=i3,l4=de$l4,i4=i4,
-                      d1b=d1b,d2b=d2b,deriv=deriv-1,fh=fh,D=D) 
+                      d1b=d1b,d2b=d2b,deriv=deriv-1,fh=fh,D=D,sandwich=sandwich)
+      if (ncv) {
+        ret$l1 <- de$l1; ret$l2 = de$l2; ret$l3 = de$l3
+      }		      
     } else ret <- list()
-    ret$l <- l; ret
+    ret$l <- l;ret$l0 <- l0; ret
   } ## end ll gumbls
 
+  sandwich <- function(y,X,coef,wt,family,offset=NULL) {
+  ## compute filling for sandwich estimate of cov matrix
+    ll(y,X,coef,wt,family,offset=NULL,deriv=1,sandwich=TRUE)$lbb
+  }
+
   initialize <- expression({
   ## regress X[,[jj[[1]]] on y then X[,jj[[2]]] on
   ## log((y-mu)^2)/2 - 0.25 where mu is fit to y
@@ -2749,7 +3004,7 @@
     list(fit=gamma)
   } ## gumbls predict
 
-  structure(list(family="gumbls",ll=ll,link=paste(link),nlp=2,
+  structure(list(family="gumbls",ll=ll,link=paste(link),nlp=2,ncv=ncv,sandwich=sandwich,
     tri = trind.generator(2), ## symmetric indices for accessing derivative arrays
     initialize=initialize,postproc=postproc,residuals=residuals,
     linfo = stats,rd=rd,predict=predict, ## link information list
@@ -2843,8 +3098,14 @@
     }
     rsd
   } ## residuals
-  
-  ll <- function(y, X, coef, wt, family, offset = NULL, deriv=0, d1b=0, d2b=0, Hp=NULL, rank=0, fh=NULL, D=NULL) {
+
+  ncv <- function(X,y,wt,nei,beta,family,llf,H=NULL,Hi=NULL,R=NULL,offset=NULL,dH=NULL,db=NULL,deriv=FALSE,nt=1) {
+    gamlss.ncv(X,y,wt,nei,beta,family,llf,H=H,Hi=Hi,R=R,offset=offset,dH=dH,db=db,deriv=deriv,nt=nt)
+  } ## ncv  
+
+
+  ll <- function(y, X, coef, wt, family, offset = NULL, deriv=0, d1b=0, d2b=0, Hp=NULL, rank=0, fh=NULL, D=NULL,
+                 eta=NULL,ncv=FALSE,sandwich=FALSE) {
     ## function defining the shash model log lik. 
     ## deriv: 0 - eval
     ##        1 - grad and Hess
@@ -2898,12 +3159,17 @@
     
     npar <- 4
     n <- length(y)
-    
-    eta <-  drop( X[ , jj[[1]], drop=FALSE] %*% coef[jj[[1]]] )
-    eta1 <- drop( X[ , jj[[2]], drop=FALSE] %*% coef[jj[[2]]] )
-    eta2 <- drop( X[ , jj[[3]], drop=FALSE] %*% coef[jj[[3]]] )
-    eta3 <- drop( X[ , jj[[4]], drop=FALSE] %*% coef[jj[[4]]] )
-    
+    if (is.null(eta)) {
+      eta <-  drop( X[ , jj[[1]], drop=FALSE] %*% coef[jj[[1]]] )
+      eta1 <- drop( X[ , jj[[2]], drop=FALSE] %*% coef[jj[[2]]] )
+      eta2 <- drop( X[ , jj[[3]], drop=FALSE] %*% coef[jj[[3]]] )
+      eta3 <- drop( X[ , jj[[4]], drop=FALSE] %*% coef[jj[[4]]] )
+    } else {
+      eta1 <- eta[,2]
+      eta2 <- eta[,3]
+      eta3 <- eta[,4]
+      eta <- eta[,1]
+    }
     mu <-  family$linfo[[1]]$linkinv( eta )
     tau <- family$linfo[[2]]$linkinv( eta1 )
     eps <- family$linfo[[3]]$linkinv( eta2 )
@@ -2919,8 +3185,9 @@
     CC <- cosh( dTasMe )
     SS <- sinh( dTasMe )
     
-    l <- sum( - tau - 0.5*log(2*pi) + log(CC) - 0.5*.log1pexp(2*log(abs(z))) - 0.5*SS^2 - phiPen*phi^2 ) 
-  
+    l0 <-  - tau - 0.5*log(2*pi) + log(CC) - 0.5*.log1pexp(2*log(abs(z))) - 0.5*SS^2 - phiPen*phi^2 
+    l <- sum(l0)
+    
     if (deriv>0) {
       
       zsd <- z*sig*del
@@ -3378,12 +3645,19 @@
       
       ## get the gradient and Hessian...
       ret <- gamlss.gH(X,jj,de$l1,de$l2,I2,l3=de$l3,i3=I3,l4=de$l4,i4=I4,
-                       d1b=d1b,d2b=d2b,deriv=deriv-1,fh=fh,D=D) 
-      
+                       d1b=d1b,d2b=d2b,deriv=deriv-1,fh=fh,D=D,sandwich=sandwich) 
+      if (ncv) {
+        ret$l1 <- de$l1; ret$l2 = de$l2; ret$l3 = de$l3
+      }
     } else ret <- list()
-    ret$l <- l; ret
+    ret$l <- l;ret$l0 <- l0; ret
   } ## end ll
-  
+
+  sandwich <- function(y,X,coef,wt,family,offset=NULL) {
+  ## compute filling for sandwich estimate of cov matrix
+    ll(y,X,coef,wt,family,offset=NULL,deriv=1,sandwich=TRUE)$lbb
+  }
+
   initialize <- expression({
     ## idea is to regress g(y) on model matrix for mean, and then 
     ## to regress the corresponding log absolute residuals on 
@@ -3477,7 +3751,7 @@
   }
   
   
-  structure(list(family="shash",ll=ll, link=paste(link), nlp=npar,
+  structure(list(family="shash",ll=ll, link=paste(link), nlp=npar,ncv=ncv,sandwich=sandwich,
                  tri = trind.generator(npar), ## symmetric indices for accessing derivative arrays
                  initialize=initialize,
                  #postproc=postproc,
diff -Nru mgcv-1.8-40/R/gamm.r mgcv-1.8-41/R/gamm.r
--- mgcv-1.8-40/R/gamm.r	2022-02-01 10:18:29.000000000 +0000
+++ mgcv-1.8-41/R/gamm.r	2022-08-22 06:55:50.000000000 +0000
@@ -393,11 +393,13 @@
       attr(random[[i]],"Xr.name") <- term.name
       attr(random[[i]],"Xr") <- X
     } else { ## gamm4 form --- whole sparse matrices
-      
-      Xr <- as(matrix(0,nrow(X),0),"dgCMatrix")
+      ## Xr <- as(matrix(0,nrow(X),0),"dgCMatrix") - deprecated, use...
+      Xr <- as(as(as(matrix(0,nrow(X),0), "dMatrix"), "generalMatrix"), "CsparseMatrix")
       ii <- 0
       for (j in 1:n.lev) { ## assemble full sparse model matrix
-        Xr <- cbind2(Xr,as(X*as.numeric(object$fac==object$flev[j]),"dgCMatrix"))
+        ## Xr <- cbind2(Xr,as(X*as.numeric(object$fac==object$flev[j]),"dgCMatrix")) - deprecated
+	Xr <- cbind2(Xr,
+	  as(as(as(X*as.numeric(object$fac==object$flev[j]), "dMatrix"), "generalMatrix"), "CsparseMatrix"))
         pen.ind[indi+ii] <- i;ii <- ii + colx
       }
       random[[i]] <- if (is.null(object$Xb)) Xr else as(Xr,"matrix") 
diff -Nru mgcv-1.8-40/R/inla.r mgcv-1.8-41/R/inla.r
--- mgcv-1.8-40/R/inla.r	2022-02-01 10:18:29.000000000 +0000
+++ mgcv-1.8-41/R/inla.r	2022-07-06 11:02:55.000000000 +0000
@@ -430,6 +430,6 @@
     }
     inla$reml <- reml
   } ## if nip
-  if (prog) cat("\n")
+  if (prog) close(prg)
   inla
 } ## ginla or gam inla newton enhanced (ginlane)
diff -Nru mgcv-1.8-40/R/mgcv.r mgcv-1.8-41/R/mgcv.r
--- mgcv-1.8-40/R/mgcv.r	2022-02-01 10:18:29.000000000 +0000
+++ mgcv-1.8-41/R/mgcv.r	2022-10-20 20:00:29.000000000 +0000
@@ -1464,7 +1464,7 @@
 
 
 
-gam.outer <- function(lsp,fscale,family,control,method,optimizer,criterion,scale,gamma,G,start=NULL,...)
+gam.outer <- function(lsp,fscale,family,control,method,optimizer,criterion,scale,gamma,G,start=NULL,nei=NULL,...)
 # function for smoothing parameter estimation by outer optimization. i.e.
 # P-IRLS scheme iterated to convergence for each trial set of smoothing
 # parameters.
@@ -1478,11 +1478,6 @@
 
   if (optimizer[2]%in%c("nlm.fd")) .Deprecated(msg=paste("optimizer",optimizer[2],"is deprecated, please use newton or bfgs"))
 
-#  if (optimizer[1]=="efs" && !inherits(family,"general.family")) {
-#    warning("Extended Fellner Schall only implemented for general families")
-#    optimizer <- c("outer","newton")
-#  }
-
   if (length(lsp)==0) { ## no sp estimation to do -- run a fit instead
     optimizer[2] <- "no.sps" ## will cause gam2objective to be called, below
   }
@@ -1495,7 +1490,7 @@
             control$nlm$stepmax, ndigit = control$nlm$ndigit,
 	    gradtol = control$nlm$gradtol, steptol = control$nlm$steptol, 
             iterlim = control$nlm$iterlim, G=G,family=family,control=control,
-            gamma=gamma,start=start,...)
+            gamma=gamma,start=start,nei=nei,...)
     lsp<-um$estimate
     object<-attr(full.score(lsp,G,family,control,gamma=gamma,...),"full.gam.object")
     object$gcv.ubre <- um$minimum
@@ -1527,13 +1522,13 @@
                 family=family,weights=G$w,control=control,gamma=gamma,scale=scale,conv.tol=control$newton$conv.tol,
                 maxNstep= control$newton$maxNstep,maxSstep=control$newton$maxSstep,maxHalf=control$newton$maxHalf, 
                 printWarn=FALSE,scoreType=criterion,null.coef=G$null.coef,start=start,
-                pearson.extra=G$pearson.extra,dev.extra=G$dev.extra,n.true=G$n.true,Sl=G$Sl,...) else
+                pearson.extra=G$pearson.extra,dev.extra=G$dev.extra,n.true=G$n.true,Sl=G$Sl,nei=nei,...) else
     b <- newton(lsp=lsp,X=G$X,y=G$y,Eb=G$Eb,UrS=G$UrS,L=G$L,lsp0=G$lsp0,offset=G$offset,U1=G$U1,Mp=G$Mp,
                 family=family,weights=G$w,control=control,gamma=gamma,scale=scale,conv.tol=control$newton$conv.tol,
                 maxNstep= control$newton$maxNstep,maxSstep=control$newton$maxSstep,maxHalf=control$newton$maxHalf, 
                 printWarn=FALSE,scoreType=criterion,null.coef=G$null.coef,start=start,
                 pearson.extra=G$pearson.extra,dev.extra=G$dev.extra,n.true=G$n.true,Sl=G$Sl,
-		edge.correct=control$edge.correct,...)                
+		edge.correct=control$edge.correct,,nei=nei,...)                
                 
     object <- b$object
     object$REML <- object$REML1 <- object$REML2 <-
@@ -1546,7 +1541,7 @@
   } else { ## methods calling gam.fit3 
     args <- list(X=G$X,y=G$y,Eb=G$Eb,UrS=G$UrS,offset=G$offset,U1=G$U1,Mp=G$Mp,family=family,
              weights=G$w,control=control,scoreType=criterion,gamma=gamma,scale=scale,
-             L=G$L,lsp0=G$lsp0,null.coef=G$null.coef,n.true=G$n.true,Sl=G$Sl,start=start)
+             L=G$L,lsp0=G$lsp0,null.coef=G$null.coef,n.true=G$n.true,Sl=G$Sl,start=start,nei=nei)
   
     if (optimizer[2]=="nlm") {
        b <- nlm(gam4objective, lsp, typsize = lsp, fscale = fscale, 
@@ -1578,12 +1573,28 @@
   object$control <- control
   object$method <- method
   if (inherits(family,"general.family")) {
-    mv <- gam.fit5.post.proc(object,G$Sl,G$L,G$lsp0,G$S,G$off)
+    mv <- gam.fit5.post.proc(object,G$Sl,G$L,G$lsp0,G$S,G$off,gamma)
     ## object$coefficients <- Sl.initial.repara(G$Sl,object$coefficients,inverse=TRUE)
-  } else mv <- gam.fit3.post.proc(G$X,G$L,G$lsp0,G$S,G$off,object)
+  } else mv <- gam.fit3.post.proc(G$X,G$L,G$lsp0,G$S,G$off,object,gamma)
 
   object[names(mv)] <- mv
 
+  if (!is.null(nei)&&(criterion!="NCV"||nei$jackknife)) { ## returning NCV when other criterion used for sp selection, or computing perturbations
+    if (!is.null(nei$QNCV)&&nei$GNCV) family$qapprox <- TRUE
+    if (is.null(family$qapprox)) family$qapprox <- FALSE
+    lsp <- if (is.null(G$L)) log(object$sp) + G$lsp0 else G$L%*%log(object$sp)+G$lsp0
+    if (object$scale.estimated && criterion %in% c("REML","ML","EFS")) lsp <- lsp[-length(lsp)] ## drop log scale estimate
+    if (is.null(nei$gamma)) nei$gamma <- 1 ## a major application of this NCV is to select gamma - so it must not itself change with gamma!
+    if (nei$jackknife) nei$jackknife <- 10 ## signal that cross-validated beta perturbations are required
+    b <- gam.fit3(x=G$X, y=G$y, sp=lsp,Eb=G$Eb,UrS=G$UrS,
+                 offset = G$offset,U1=G$U1,Mp=G$Mp,family = family,weights=G$w,deriv=0,
+                 control=control,gamma=nei$gamma, 
+		 scale=scale,printWarn=FALSE,start=start,scoreType="NCV",null.coef=G$null.coef,
+                 pearson.extra=G$pearson.extra,dev.extra=G$dev.extra,n.true=G$n.true,Sl=G$Sl,nei=nei,...)
+    object$NCV <- as.numeric(b$NCV)
+    object$Vj <- attr(b$NCV,"Vj")
+    
+  }
   ## note: use of the following (Vc) in place of Vp appears to mess up p-values for smooths,
   ##       but doesn't change r.e. p-values of course. 
   #if (!is.null(mv$Vc)) object$Vc <- mv$Vc 
@@ -1598,7 +1609,7 @@
   #object$R <- mv$R ## qr.R(sqrt(W)X)
   object$aic <- object$aic + 2*sum(object$edf)
   object$nsdf <- G$nsdf
-  object$K <-  object$D1 <-  object$D2 <-  object$P <-  object$P1 <-  object$P2 <-  
+  object$K <-  object$D1 <-  object$D2 <-  object$P <-  object$P1 <-  object$P2 <- object$dw.drho <-  
   object$GACV <-  object$GACV1 <-  object$GACV2 <-  object$REML <-  object$REML1 <-  object$REML2 <-  
   object$GCV<-object$GCV1<- object$GCV2 <- object$UBRE <-object$UBRE1 <- object$UBRE2 <- object$trA <-
   object$trA1<- object$trA2 <- object$alpha <- object$alpha1 <- object$scale.est <- NULL
@@ -1625,15 +1636,28 @@
   list(null.coef=null.coef,null.scale=null.scale)
 }
 
-estimate.gam <- function (G,method,optimizer,control,in.out,scale,gamma,start=NULL,...) {
+estimate.gam <- function (G,method,optimizer,control,in.out,scale,gamma,start=NULL,nei=NULL,...) {
 ## Do gam estimation and smoothness selection...
-  
+
+  if (method %in% c("QNCV","NCV")||!is.null(nei)) {
+    optimizer <- c("outer","bfgs")
+    if (method=="QNCV") { method <- "NCV";G$family$qapprox <- TRUE } else G$family$qapprox <- FALSE
+    if (is.null(nei)) nei <- list(i=1:G$n,mi=1:G$n,m=1:G$n,k=1:G$n) ## LOOCV
+    if (is.null(nei$k)||is.null(nei$m)) nei$k <- nei$m <- nei$mi <- nei$i <- 1:G$n 
+    if (is.null(nei$i)) if (length(nei$m)==G$n) nei$mi <- nei$i <- 1:G$n else stop("unclear which points NCV neighbourhoods belong to")
+    if (length(nei$mi)!=length(nei$m)) stop("for NCV number of dropped and predicted neighbourhoods must match")
+    if (is.null(nei$jackknife)) nei$jackknife <- FALSE
+  }  
+
   if (inherits(G$family,"extended.family")) { ## then there are some restrictions...
-    if (!(method%in%c("REML","ML"))) method <- "REML"
+    if (!(method%in%c("REML","ML","NCV"))) method <- "REML"
     if (optimizer[1]=="perf") optimizer <- c("outer","newton") 
     if (inherits(G$family,"general.family")) {
-    
-       method <- "REML" ## any method you like as long as it's REML
+       if (!(method%in%c("REML","NCV"))||optimizer[1]=="efs") method <- "REML"
+       if (method=="NCV"&&is.null(G$family$ncv)) {
+         warning("family lacks a Neighbourhood Cross Validation method")
+         method <- "REML"
+       }	 
        G$Sl <- Sl.setup(G) ## prepare penalty sequence
       
        G$X <- Sl.initial.repara(G$Sl,G$X,both.sides=FALSE) ## re-parameterize accordingly
@@ -1650,17 +1674,16 @@
 
   if (!optimizer[1]%in%c("perf","outer","efs")) stop("unknown optimizer")
   if (optimizer[1]=="efs") method <- "REML"
-  if (!method%in%c("GCV.Cp","GACV.Cp","REML","P-REML","ML","P-ML")) stop("unknown smoothness selection criterion") 
+  if (!method%in%c("GCV.Cp","GACV.Cp","REML","P-REML","ML","P-ML","NCV")) stop("unknown smoothness selection criterion") 
   G$family <- fix.family(G$family)
   G$rS <- mini.roots(G$S,G$off,ncol(G$X),G$rank)
-
-  if (method%in%c("REML","P-REML","ML","P-ML")) {
-    if (optimizer[1]=="perf") {
-      warning("Reset optimizer to outer/newton") 
-      optimizer <- c("outer","newton")
-    }
-    reml <- TRUE
-  } else reml <- FALSE ## experimental insert
+ 
+  reml <- method%in%c("REML","P-REML","ML","P-ML","NCV")
+  if ((reml||!is.null(nei)) && optimizer[1]=="perf") {
+    warning("Reset optimizer to outer/newton") 
+    optimizer <- c("outer","newton")
+  } 
+  
   Ssp <- totalPenaltySpace(G$S,G$H,G$off,ncol(G$X))
   G$Eb <- Ssp$E       ## balanced penalty square root for rank determination purposes 
   G$U1 <- cbind(Ssp$Y,Ssp$Z) ## eigen space basis
@@ -1673,7 +1696,7 @@
 
 
   # is outer looping needed ?
-  outer.looping <- ((!G$am && (optimizer[1]!="perf"))||reml||method=="GACV.Cp") ## && length(G$S)>0 && sum(G$sp<0)!=0
+  outer.looping <- ((!G$am && (optimizer[1]!="perf"))||reml||method=="GACV.Cp"||method=="NCV"||!is.null(nei)) ## && length(G$S)>0 && sum(G$sp<0)!=0
 
   ## sort out exact sp selection criterion to use
 
@@ -1685,7 +1708,7 @@
   } else {G$sig2 <- scale}
 
 
-  if (reml) { ## then RE(ML) selection, but which variant?
+  if (reml||method=="NCV") { ## then RE(ML) selection, but which variant?
    criterion <- method
    if (fam.name == "binomial"||fam.name == "poisson") scale <- 1
    if (inherits(G$family,"extended.family") && scale <=0) {
@@ -1779,8 +1802,10 @@
       if (is.null(G$family$get.null.coef)) get.null.coef(G,...) else G$family$get.null.coef(G,...)
     }
     if (fixedSteps>0&&is.null(in.out)) mgcv.conv <- object$mgcv.conv else mgcv.conv <- NULL
-    
-    if (criterion%in%c("REML","ML")&&scale<=0) { ## log(scale) to be estimated as a smoothing parameter
+
+    scale.as.sp <- (criterion%in%c("REML","ML")||(criterion=="NCV"&&inherits(G$family,"extended.family")))&&scale<=0
+
+    if (scale.as.sp) { ## log(scale) to be estimated as a smoothing parameter
       if (fixedSteps>0) {
         log.scale <-  log(sum(object$weights*object$residuals^2)/(G$n-sum(object$edf)))
       } else {
@@ -1822,9 +1847,9 @@
 
     object <- gam.outer(lsp,fscale=null.stuff$null.scale, ##abs(object$gcv.ubre)+object$sig2/length(G$y),
                         family=G$family,control=control,criterion=criterion,method=method,
-                        optimizer=optimizer,scale=scale,gamma=gamma,G=G,start=start,...)
+                        optimizer=optimizer,scale=scale,gamma=gamma,G=G,start=start,nei=nei,...)
     
-    if (criterion%in%c("REML","ML")&&scale<=0)  object$sp <- 
+    if (scale.as.sp)  object$sp <- 
                                                 object$sp[-length(object$sp)] ## drop scale estimate from sp array
     
     if (inherits(G$family,"extended.family")&&nth>0) object$sp <- object$sp[-(1:nth)] ## drop theta params
@@ -1838,7 +1863,7 @@
   if (!inherits(G$family,"extended.family")&&G$intercept&&any(G$offset!=0)) object$null.deviance <-
          glm(object$y~offset(G$offset),family=object$family,weights=object$prior.weights)$deviance
 
-  object$method <- criterion
+  object$method <- if (method=="NCV"&&G$family$qapprox) "QNCV" else criterion
 
   object$smooth<-G$smooth
 
@@ -1940,7 +1965,8 @@
 gam <- function(formula,family=gaussian(),data=list(),weights=NULL,subset=NULL,na.action,offset=NULL,
                 method="GCV.Cp",optimizer=c("outer","newton"),control=list(),#gam.control(),
                 scale=0,select=FALSE,knots=NULL,sp=NULL,min.sp=NULL,H=NULL,gamma=1,fit=TRUE,
-                paraPen=NULL,G=NULL,in.out=NULL,drop.unused.levels=TRUE,drop.intercept=NULL,discrete=FALSE,...) {
+                paraPen=NULL,G=NULL,in.out=NULL,drop.unused.levels=TRUE,drop.intercept=NULL,
+		nei=NULL,discrete=FALSE,...) {
 ## Routine to fit a GAM to some data. The model is stated in the formula, which is then 
 ## interpreted to figure out which bits relate to smooth terms and which to parametric terms.
 ## Basic steps:
@@ -1970,7 +1996,7 @@
     cl <- match.call() # call needed in gam object for update to work
     mf <- match.call(expand.dots=FALSE)
     mf$formula <- gp$fake.formula 
-    mf$family <- mf$control<-mf$scale<-mf$knots<-mf$sp<-mf$min.sp<-mf$H<-mf$select <- mf$drop.intercept <-
+    mf$family <- mf$control<-mf$scale<-mf$knots<-mf$sp<-mf$min.sp<-mf$H<-mf$select <- mf$drop.intercept <- mf$nei <-
                  mf$gamma<-mf$method<-mf$fit<-mf$paraPen<-mf$G<-mf$optimizer <- mf$in.out <- mf$discrete <- mf$...<-NULL
     mf$drop.unused.levels <- drop.unused.levels
     mf[[1]] <- quote(stats::model.frame) ## as.name("model.frame")
@@ -2085,7 +2111,7 @@
   G$conv.tol <- control$mgcv.tol      # tolerence for mgcv
   G$max.half <- control$mgcv.half # max step halving in Newton update mgcv
 
-  object <- estimate.gam(G,method,optimizer,control,in.out,scale,gamma,...)
+  object <- estimate.gam(G,method,optimizer,control,in.out,scale,gamma,nei=nei,...)
 
   
   if (!is.null(G$L)) { 
@@ -2121,6 +2147,9 @@
   class(object) <- c("gam","glm","lm")
   if (is.null(object$deviance)) object$deviance <- sum(residuals(object,"deviance")^2)
   names(object$gcv.ubre) <- method
+  ## The following lines avoid potentially very large objects in hidden environments being stored
+  ## with fitted gam objects. The downside is that functions like 'termplot' that rely on searching in
+  ## the environment of the formula can fail...
   environment(object$formula) <- environment(object$pred.formula) <-
   environment(object$terms) <- environment(object$pterms) <- .GlobalEnv
   if (!is.null(object$model))  environment(attr(object$model,"terms"))  <- .GlobalEnv
@@ -2158,7 +2187,7 @@
   invisible(x)
 }
 
-gam.control <- function (nthreads=1,irls.reg=0.0,epsilon = 1e-7, maxit = 200,
+gam.control <- function (nthreads=1,ncv.threads=1,irls.reg=0.0,epsilon = 1e-7, maxit = 200,
                          mgcv.tol=1e-7,mgcv.half=15,trace =FALSE,
                          rank.tol=.Machine$double.eps^0.5,
                          nlm=list(),optim=list(),newton=list(),outerPIsteps=0,
@@ -2176,7 +2205,8 @@
 {   scale.est <- match.arg(scale.est,c("fletcher","pearson","deviance"))
     if (!is.logical(edge.correct)&&(!is.numeric(edge.correct)||edge.correct<0)) stop(
         "edge.correct must be logical or a positive number")
-    if (!is.numeric(nthreads) || nthreads <1) stop("nthreads must be a positive integer") 
+    if (!is.numeric(nthreads) || nthreads <1) stop("nthreads must be a positive integer")
+    if (!is.numeric(ncv.threads) || ncv.threads <1) stop("ncv.threads must be a positive integer") 
     if (!is.numeric(irls.reg) || irls.reg <0.0) stop("IRLS regularizing parameter must be a non-negative number.")
     if (!is.numeric(epsilon) || epsilon <= 0) 
         stop("value of epsilon must be > 0")
@@ -2217,7 +2247,7 @@
     optim$factr <- abs(optim$factr)
     if (efs.tol<=0) efs.tol <- .1
 
-    list(nthreads=round(nthreads),irls.reg=irls.reg,epsilon = epsilon, maxit = maxit,
+    list(nthreads=round(nthreads),ncv.threads=round(ncv.threads),irls.reg=irls.reg,epsilon = epsilon, maxit = maxit,
          trace = trace, mgcv.tol=mgcv.tol,mgcv.half=mgcv.half,
          rank.tol=rank.tol,nlm=nlm,
          optim=optim,newton=newton,outerPIsteps=outerPIsteps,
@@ -2884,6 +2914,17 @@
         Xp <- model.matrix(Terms[[i]],object$model)
         mf <- newdata # needed in case of offset, below
       }
+      if (!is.null(terms)||!is.null(exclude)) { ## work out which parts of Xp to zero
+        assign <- attr(Xp,"assign") ## assign[i] is the term to which Xp[,i] relates
+	if (min(assign)==0&&("(Intercept)"%in%exclude||(!is.null(terms)&&!"(Intercept)"%in%terms))) Xp[,which(assign==0)] <- 0
+	tlab <- attr(Terms[[i]],"term.labels")
+	ii <- which(assign%in%which(tlab%in%exclude))
+	if (length(ii)) Xp[,ii] <- 0
+	if (!is.null(terms)) {
+	  ii <- which(assign%in%which(!tlab%in%terms))
+	  if (length(ii)) Xp[,ii] <- 0
+	}   
+      }
       offi <- attr(Terms[[i]],"offset")
       if (is.null(offi)) offs[[i]] <- 0 else { ## extract offset
         offs[[i]] <- mf[[names(attr(Terms[[i]],"dataClasses"))[offi+1]]]
@@ -3363,6 +3404,32 @@
   pr/nq 
 }
 
+packing.ind <- function(first,last,p,n) {
+## Let A be a matrix with n rows, and B be a p by p matrix.
+## A[first:last,first:last] is to be filled using B. If
+## B is of the corect dimension then A[first:last,first:last] <-B
+## but if p is a submultiple of (last-first+1) then the leading
+## block diagonal of the block is to be filled repeatedly with
+## B. In either case this routine returns an index ii such that
+## A[ii] <- B fills the block appropriately
+  if (last-first == p-1) {
+    ii <- first:last
+    return(rep(ii,p) + n*rep(ii-1,each=p))
+  } else {
+    k <- round((last-first+1)/p)
+    if (k*p!=last-first+1) stop("incorrect sub-block size")
+    ii <- rep(1:p,p) + n*rep(1:p-1,each=p)
+    p2 <- p*p
+    ind <- rep(0,p2*k)
+    jj <- 1:(p2)
+    for (i in 1:k) {
+      bs <- first + (i-1)*p -1 
+      ind[jj] <- ii + bs + bs*n
+      jj <- jj + p2
+    }
+    return(ind)
+  }
+} ## packing.ind
 
 recov <- function(b,re=rep(0,0),m=0) {
 ## b is a fitted gam object. re is an array of indices of 
@@ -3384,10 +3451,14 @@
       k <- 1;S1 <- matrix(0,np,np)
       for (i in 1:length(b$smooth)) { 
         ns <- length(b$smooth[[i]]$S)
-        ind <- b$smooth[[i]]$first.para:b$smooth[[i]]$last.para
-        if (ns>0) for (j in 1:ns) {
-          S1[ind,ind] <- S1[ind,ind] + sp[k]*b$smooth[[i]]$S[[j]]
-          k <- k + 1
+        #ind <- b$smooth[[i]]$first.para:b$smooth[[i]]$last.para
+        if (ns>0) {
+          ii <- packing.ind(b$smooth[[i]]$first.para,b$smooth[[i]]$last.para,ncol(b$smooth[[i]]$S[[1]]),np) 
+          for (j in 1:ns) {
+            #S1[ind,ind] <- S1[ind,ind] + sp[k]*b$smooth[[i]]$S[[j]]
+            S1[ii] <- S1[ii] + sp[k]*as.numeric(b$smooth[[i]]$S[[j]])
+            k <- k + 1
+          }
         }
       }
       LRB <- rbind(b$R,t(mroot(S1)))
@@ -3425,12 +3496,17 @@
   k <- 1
   for (i in 1:length(b$smooth)) { 
     ns <- length(b$smooth[[i]]$S)
-    ind <- map[b$smooth[[i]]$first.para:b$smooth[[i]]$last.para]
+    #ind <- map[b$smooth[[i]]$first.para:b$smooth[[i]]$last.para]
     is.random <- i%in%re
-    if (ns>0) for (j in 1:ns) {
-      if (is.random) S2[ind,ind] <- S2[ind,ind] +  sp[k]*b$smooth[[i]]$S[[j]] else
-         S1[ind,ind] <- S1[ind,ind] + sp[k]*b$smooth[[i]]$S[[j]]
-      k <- k + 1
+    if (ns>0) { 
+     ii <- packing.ind(map[b$smooth[[i]]$first.para],map[b$smooth[[i]]$last.para],ncol(b$smooth[[i]]$S[[1]]),if (is.random) p2 else p1)
+     for (j in 1:ns) {
+        #if (is.random) S2[ind,ind] <- S2[ind,ind] +  sp[k]*b$smooth[[i]]$S[[j]] else
+        #   S1[ind,ind] <- S1[ind,ind] + sp[k]*b$smooth[[i]]$S[[j]]
+        if (is.random) S2[ii] <- S2[ii] +  sp[k]*as.numeric(b$smooth[[i]]$S[[j]]) else
+           S1[ii] <- S1[ii] + sp[k]*as.numeric(b$smooth[[i]]$S[[j]])
+        k <- k + 1
+      }
     }
   }
   ## pseudoinvert S2
@@ -3479,10 +3555,10 @@
   ## check that smooth penalty matrices are full size.  
   ## e.g. "fs" type smooths estimated by gamm do not 
   ## have full sized S matrices, and we can't compute 
-  ## p-values here....
-  if (ncol(b$smooth[[m]]$S[[1]]) != b$smooth[[m]]$last.para-b$smooth[[m]]$first.para+1) {
-    return(list(stat=NA,pval=NA,rank=NA)) 
-  }
+  ## p-values here - actually we can see recov!
+  #if (ncol(b$smooth[[m]]$S[[1]]) != b$smooth[[m]]$last.para-b$smooth[[m]]$first.para+1) {
+  #  return(list(stat=NA,pval=NA,rank=NA)) 
+  #}
 
   ## find indices of random effects other than m
   rind <- rep(0,0)
@@ -4103,12 +4179,36 @@
 } ## end of gam.vcomp
 
 
-vcov.gam <- function(object, freq = FALSE, dispersion = NULL,unconditional=FALSE, ...)
+gam.sandwich <- function(b,freq=FALSE) {
+## computes sandwich estimator of variance
+  B2 <- if (freq) 0 else b$Vp - b$Ve ## Bayes squared bias estimate
+  X <- model.matrix(b)
+  m <- nrow(X); m <- m/(m-sum(b$edf))
+  if (inherits(b$family,"extended.family")) {
+    if (inherits(b$family,"general.family")) {
+       if (is.null(b$family$sandwich)) stop("no sandwich estimate available for this model")
+       Vs <- m*b$Vp%*%b$family$sandwich(b$y,X,b$coefficients,b$prior.weights,b$family,offset=attr(X,"offset"))%*%b$Vp + B2
+    } else {
+      dd <- dDeta(b$y,b$fitted.values,b$prior.weights,b$family$getTheta(),b$family,deriv=0)
+      Vs <- m*b$Vp%*%crossprod(0.5/b$sig2*dd$Deta*X)%*%b$Vp + B2 
+    }
+  } else { ## exponential family
+    mu <- b$fitted.values
+    w <- b$family$mu.eta(b$linear.predictors)*(b$y - mu)/(b$sig2*b$family$variance(mu))
+    Vs <- m*b$Vp%*%crossprod(w*X)%*%b$Vp + B2
+  }
+  Vs
+} ## gam.sandwich
+
+
+vcov.gam <- function(object, sandwich=FALSE, freq = FALSE, dispersion = NULL,unconditional=FALSE, ...)
 ## supplied by Henric Nilsson <henric.nilsson@statisticon.se> 
-{ if (freq)
-    vc <- object$Ve
-  else { 
-    vc <- if (unconditional&&!is.null(object$Vc)) object$Vc else object$Vp
+{ if (sandwich) vc <- gam.sandwich(object,freq) else { 
+    if (freq)
+      vc <- object$Ve
+    else { 
+      vc <- if (unconditional&&!is.null(object$Vc)) object$Vc else object$Vp
+    }
   }
   if (!is.null(dispersion))
     vc <- dispersion * vc / object$sig2
diff -Nru mgcv-1.8-40/R/misc.r mgcv-1.8-41/R/misc.r
--- mgcv-1.8-40/R/misc.r	2022-02-01 10:18:29.000000000 +0000
+++ mgcv-1.8-41/R/misc.r	2022-09-26 09:10:45.000000000 +0000
@@ -1,10 +1,29 @@
 ## (c) Simon N. Wood 2011-2019
 ## Many of the following are simple wrappers for C functions
 
+dpnorm <- function(x0,x1) {
+  ## Cancellation avoiding evaluation of pnorm(x1)-pnorm(x0) 
+  ## first avoid 1-1 problems by exchanging and changing sign of double +ve
+  ii <- x1>0&x0>0
+  d <- x0[ii];x0[ii] <- -x1[ii];x1[ii] <- -d
+  ## now deal with points that are so close that cancellation error
+  ## too large - might as well use density times interval width
+  ii <- abs(x1-x0) < sqrt(.Machine$double.eps)*dnorm((x1+x0)/2)
+  p <- x0; d <- x1[ii]-x0[ii]; m <- (x1[ii]+x0[ii])/2
+  p[ii] <- dnorm(m)*d
+  p[!ii] <- pnorm(x1[!ii]) - pnorm(x0[!ii])
+  p
+} ## dpnorm
+
+
 "%.%" <- function(a,b) {
   if (inherits(a,"dgCMatrix")||inherits(b,"dgCMatrix"))
-  tensor.prod.model.matrix(list(as(a,"dgCMatrix"),as(b,"dgCMatrix"))) else
-  tensor.prod.model.matrix(list(as.matrix(a),as.matrix(b)))
+  tensor.prod.model.matrix(list(
+  as(as(as(a, "dMatrix"), "generalMatrix"), "CsparseMatrix"),
+  as(as(as(b, "dMatrix"), "generalMatrix"), "CsparseMatrix")
+  )) else tensor.prod.model.matrix(list(as.matrix(a),as.matrix(b)))
+#  tensor.prod.model.matrix(list(as(a,"dgCMatrix"),as(b,"dgCMatrix"))) else - deprecated
+#  tensor.prod.model.matrix(list(as.matrix(a),as.matrix(b)))
 }
 
 blas.thread.test <- function(n=1000,nt=4) {
@@ -176,12 +195,16 @@
 ## * if both NULL all terms are included, if only one is NULL then used for left and right. 
   m <- unlist(lapply(X,nrow));p <- unlist(lapply(X,ncol))
   nx <- length(X);nt <- length(ts)
-  n <- length(w);pt <- 0;
-  for (i in 1:nt) pt <- pt + prod(p[ts[i]:(ts[i]+dt[i]-1)]) - as.numeric(qc[i]>0)
- 
+  n <- length(w);ptfull <- pt <- 0;
+  for (i in 1:nt) {
+    fullsize <- prod(p[ts[i]:(ts[i]+dt[i]-1)])
+    ptfull <- ptfull + fullsize
+    pt <- pt + fullsize - if (qc[i]>0) 1 else if (qc[i]<0) v[[i]][v[[i]][1]+2] else 0
+  } 
   if (inherits(X[[1]],"dgCMatrix")) { ## the marginals are sparse
     if (length(ar.stop)>1||ar.stop!=-1) warning("AR not available with sparse marginals")
     ## create list for passing to C
+    if (any(qc<0)) stop("sparse method for Kronecker product contrasts not implemented")
     m <- list(Xd=X,kd=k,ks=ks,v=v,ts=ts,dt=dt,qc=qc)
     m$off <- attr(X,"off"); m$r <- attr(X,"r")
     if (is.null(m$off)||is.null(m$r)) stop("reverse indices missing from sparse discrete marginals")
@@ -224,8 +247,8 @@
   }
   ## block oriented code...
   if (is.null(lt)&&is.null(lt)) {
-    #t0 <- system.time(
-    oo <- .C(C_XWXd0,XWX =as.double(rep(0,(pt+nt)^2)),X= as.double(unlist(X)),w=as.double(w),
+    #t0 <- system.time( ## BUG dodgy assumption about full sized XWX, based on one constraint per term!!!
+    oo <- .C(C_XWXd0,XWX =as.double(rep(0,ptfull^2)),X= as.double(unlist(X)),w=as.double(w),
            k=as.integer(k-1),ks=as.integer(ks-1),m=as.integer(m),p=as.integer(p), n=as.integer(n), 
            ns=as.integer(nx), ts=as.integer(ts-1), as.integer(dt), nt=as.integer(nt),
            v = as.double(unlist(v)),qc=as.integer(qc),nthreads=as.integer(nthreads),
@@ -247,7 +270,7 @@
       } else ncs <- length(rt)
     }  
     #t0 <- system.time(
-    oo <- .C(C_XWXd1,XWX =as.double(rep(0,(pt+nt)^2)),X= as.double(unlist(X)),w=as.double(w),
+    oo <- .C(C_XWXd1,XWX =as.double(rep(0,ptfull^2)),X= as.double(unlist(X)),w=as.double(w),
            k=as.integer(k-1),ks=as.integer(ks-1),m=as.integer(m),p=as.integer(p), n=as.integer(n), 
            ns=as.integer(nx), ts=as.integer(ts-1), as.integer(dt), nt=as.integer(nt),
            v = as.double(unlist(v)),qc=as.integer(qc),nthreads=as.integer(nthreads),
@@ -285,7 +308,7 @@
   ##for (i in 1:nt) pt <- pt + prod(p[ts[i]:(ts[i]+dt[i]-1)]) - as.numeric(qc[i]>0)
   if (is.null(lt)) {
     pt <- 0
-    for (i in 1:nt) pt <- pt + prod(p[ts[i]:(ts[i]+dt[i]-1)]) - as.numeric(qc[i]>0)
+    for (i in 1:nt) pt <- pt + prod(p[ts[i]:(ts[i]+dt[i]-1)]) - if (qc[i]>0) 1 else if (qc[i]<0) v[[i]][v[[i]][1]+2] else 0
     lt <- 1:nt
   } else {
     lpip <- attr(X,"lpip") ## list of coefs for each term 
@@ -777,6 +800,17 @@
   }  
 } ## temp.seed
 
+mat.rowsum <- function(X,m,k) {
+## Let X be n by p and m of length M. Produces an M by p matrix B
+## where the ith row of B is the sum of the rows X[k[j],] where
+## j = (m[i-1]+1):m[i]. m[0] is taken as 0.
+## n <- 10;p <- 5;X <- matrix(runif(n*p),n,p)
+## m <- c(3,5,8,11);k <- c(1,4,3,6,1,5,7,10,9,5,6)
+## mgcv:::mat.rowsum(X,m,k)
+  if (max(k)>nrow(X)||min(k)<1) stop("index vector has invalid entries")
+  k <- k - 1 ## R to C index conversion
+  .Call(C_mrow_sum,X,m,k)
+} ## mat.rowsum
 
 isa <- function(R,nt=1) {
 ## Finds the elements of (R'R)^{-1} on NZP(R+R').
@@ -797,3 +831,34 @@
   A
 } ## AddBVB
 
+minres <- function(R,u,b) {
+## routine to solve (R'R-uu')x = b using minres algorithm.
+## set.seed(0);n <- 100;p <- 20;X <- matrix(runif(n*p)-.5,n,p);R <- chol(crossprod(X));b <- runif(p);k <- 1;
+## solve(crossprod(X[-k,]),b);mgcv:::minres(R,t(X[k,]),b)
+  x <- b; p <- length(b);
+  m <- if (is.matrix(u)) ncol(u) else 1
+  work <- rep(0,p*(m+7)+m)
+  oo <- .C(C_minres,R=as.double(R), u=as.double(u),b=as.double(b), x=as.double(x), p=as.integer(p),m=as.integer(m),work=as.double(work))
+  cat("\n niter : ",oo$m,"\n")
+  oo$x
+}
+
+## following are wrappers for KP STZ constraints - intended for testing only
+
+Zb <- function(b0,v,qc,p,w) {
+  b1 <- rep(0,p)
+  oo <- .C(C_Zb,b1=as.double(b1),as.double(b0),as.double(v),as.integer(qc),as.integer(p),as.double(w))
+  oo$b1
+}
+
+Ztb <- function(b0,v,qc,di,p,w) {
+  ## p is length(b0)/di
+  w <- rep(0,2*p)
+  M <- v[1]
+  pp <- p
+  for (i in 1:M) pp <- pp/v[i+1];
+  p0 <- prod(v[1+1:M]-1)*pp
+  b1 <- rep(0,p0*di)
+  oo <- .C(C_Ztb,b1=as.double(b1),as.double(b0),as.double(v),as.integer(qc),as.integer(di),as.integer(p),as.double(w))
+  oo$b1
+} 
\ No newline at end of file
diff -Nru mgcv-1.8-40/R/plots.r mgcv-1.8-41/R/plots.r
--- mgcv-1.8-40/R/plots.r	2022-02-01 10:18:29.000000000 +0000
+++ mgcv-1.8-41/R/plots.r	2022-09-27 08:11:33.000000000 +0000
@@ -287,7 +287,7 @@
     plot(fv, napredict(b$na.action, b$y),
          xlab="Fitted Values",ylab="Response",main="Response vs. Fitted Values",...)
 
-    gamm <- !(b$method%in%c("GCV","GACV","UBRE","REML","ML","P-ML","P-REML","fREML")) ## gamm `gam' object
+    gamm <- !(b$method%in%c("GCV","GACV","UBRE","REML","ML","P-ML","P-REML","fREML","NCV")) ## gamm `gam' object
 
     #if (is.null(.Platform$GUI) || .Platform$GUI != "RStudio") par(old.par)
     #   return(invisible())
@@ -676,6 +676,95 @@
 
 } ## end plot.mrf.smooth
 
+
+plot.sz.interaction <- function(x,P=NULL,data=NULL,label="",se1.mult=2,se2.mult=1,
+                     partial.resids=FALSE,rug=TRUE,se=TRUE,scale=-1,n=100,n2=40,n3=3,
+                     pers=FALSE,theta=30,phi=30,jit=FALSE,xlab=NULL,ylab=NULL,main=NULL,
+                     ylim=NULL,xlim=NULL,too.far=0.1,shade=FALSE,shade.col="gray80",
+                     shift=0,trans=I,by.resids=FALSE,scheme=0,...) {
+## plot method for factor smooth interactions designed for models such as s(x) + s(fac,x) where
+## the factor level dependent smooths are strictly deviations from the main effect smooth.
+
+  nf2i <- function(nf,i) {
+    k <- length(nf)
+    kk <- rep(0,k)
+    i <- i-1
+    for (j in k:1) {
+      kk[j] <- i %% nf[j] + 1
+      i <- i %/% nf[j]
+    }
+    kk
+  } ## nf2i
+
+  if (is.null(P)) { ## get plotting info
+    if (x$base$dim!=1) return(NULL) ## no method for base smooth dim > 1
+    raw <- data[x$base$term][[1]]
+    xx <- seq(min(raw),max(raw),length=n) # generate x sequence for prediction
+    nf <- unlist(lapply(x$flev,length)) ## levels per grouping factor
+    dat <- data.frame(rep(xx,prod(nf)))
+    k <- length(x$flev) ## number of factors
+    for (i in 1:k) {
+      re <- if (i<k) prod(nf[(i+1):k]) else 1
+      rs <- if (i>1) prod(nf[1:(i-1)]) else 1
+      dat[,i+1] <- factor(rep(rep(x$flev[[i]],each=re*n),rs),levels=x$flev[[i]])
+    }
+    names(dat) <- c(x$base$term,x$fterm)  
+    if (x$by!="NA") {        # deal with any by variables
+      dat[[x$by]] <- rep(1,n)
+    }
+    X <- PredictMat(x,dat)
+    if (is.null(xlab)) xlabel <- x$base$term else xlabel <- xlab
+    if (is.null(ylab)) ylabel <- label else ylabel <- ylab
+    return(list(X=X,scale=TRUE,se=TRUE,se.mult=se1.mult,raw=raw,xlab=xlabel,ylab=ylabel,
+             main="",x=xx,n=n,nf=nf))
+  } else { ## produce the plot
+    nft <- prod(P$nf) ## total number of curves
+    if (scheme!=1) {
+      kol <- hcl.colors(nft,palette = "viridis", alpha = .33) ## CI
+      lkol <- hcl.colors(nft,palette = "viridis", alpha = .66) ## mode
+      tkol <- hcl.colors(nft,palette = "viridis", alpha = 1) ## label
+    }  
+    xlim <- range(P$x);dx <- xlim[2]-xlim[1]
+    xt <- xlim[1] + (1:nft-.5)*dx/nft ## text locations
+    ind <- 1:P$n; mind <- P$n:1
+    
+    if(is.null(ylim)) ylim <- trans(range(c(P$fit+P$se,P$fit-P$se))+shift)
+
+    plot(P$x[ind],trans(P$fit[ind]+shift),ylim=ylim,xlab=P$xlab,ylab=P$ylab,type="n",...)
+
+    nfac <- length(P$nf) ## number of factors
+    kk <- rep(0,nfac) ## factor level index vector
+    if (scheme==1) {
+      for (i in 1:nft) {
+        ul <- trans(P$fit[ind] + P$se[ind]+shift)
+        ll <- trans(P$fit[ind] - P$se[ind]+shift)
+	lines(P$x,ul,col="grey",lty=i);lines(P$x,ll,col="grey",lty=i)
+        ii <- P$x < xt[i] - dx/30
+	yt <- approx(P$x,P$fit[ind],xt[i])$y
+        lines(P$x[ii],(P$fit[ind])[ii],lty=i,lwd=2)
+        text(xt[i],yt,paste(nf2i(P$nf,i),collapse="."))
+        ii <- P$x > xt[i] + dx/30
+        lines(P$x[ii],(P$fit[ind])[ii],lty=i,lwd=2)
+        ind <- ind + P$n; mind <- mind + P$n
+      }	
+    } else {
+      for (i in 1:nft) {
+        ul <- trans(P$fit[ind] + P$se[ind]+shift)
+        ll <- trans(P$fit[mind] - P$se[mind]+shift)
+        polygon(c(P$x,P$x[P$n:1]),c(ul,ll),col=kol[i],border=kol[i])
+        yt <- approx(P$x,P$fit[ind],xt[i])$y
+        ii <- P$x < xt[i] - dx/30
+        lines(P$x[ii],(P$fit[ind])[ii],col=lkol[i])
+        text(xt[i],yt,paste(nf2i(P$nf,i),collapse="."),col=tkol[i])
+        ii <- P$x > xt[i] + dx/30
+        lines(P$x[ii],(P$fit[ind])[ii],col=lkol[i])
+        ind <- ind + P$n; mind <- mind + P$n
+      }
+    }  
+  }
+} ## end plot.sz.interaction
+
+
 plot.fs.interaction <- function(x,P=NULL,data=NULL,label="",se1.mult=2,se2.mult=1,
                      partial.resids=FALSE,rug=TRUE,se=TRUE,scale=-1,n=100,n2=40,n3=3,
                      pers=FALSE,theta=30,phi=30,jit=FALSE,xlab=NULL,ylab=NULL,main=NULL,
@@ -1269,7 +1358,12 @@
           meanL1 <- x$smooth[[i]]$meanL1
           if (!is.null(meanL1)) X1 <- X1 / meanL1
           X1[,first:last] <- P$X
-          se.fit <- sqrt(pmax(0,rowSums(as(X1%*%x$Vp,"matrix")*X1)))
+          lpi <- attr(x$formula,"lpi")
+          if (is.null(lpi)) se.fit <- sqrt(pmax(0,rowSums(as(X1%*%x$Vp,"matrix")*X1))) else {
+            ii <- rep(0,0) ## only include constant uncertainty from relevant linear predictors  
+            for (q in 1:length(lpi)) if (any(first:last%in%lpi[[q]])) ii <- c(ii,lpi[[q]])
+            se.fit <- sqrt(pmax(0,rowSums(as(X1[,ii]%*%x$Vp[ii,ii],"matrix")*X1[,ii])))
+          }
         } else se.fit <- ## se in centred (or anyway unconstained) space only
         sqrt(pmax(0,rowSums(as(P$X%*%x$Vp[first:last,first:last,drop=FALSE],"matrix")*P$X)))
         if (!is.null(P$exclude)) se.fit[P$exclude] <- NA
@@ -1439,7 +1533,7 @@
 
 
 vis.gam <- function(x,view=NULL,cond=list(),n.grid=30,too.far=0,col=NA,color="heat",
-           contour.col=NULL,se=-1,type="link",plot.type="persp",zlim=NULL,nCol=50,...)
+           contour.col=NULL,se=-1,type="link",plot.type="persp",zlim=NULL,nCol=50,lp=1,...)
 # takes a gam object and plots 2D views of it, supply ticktype="detailed" to get proper axis anotation
 # (c) Simon N. Wood 23/2/03
 { fac.seq<-function(fac,n.grid)
@@ -1527,53 +1621,62 @@
   newd[[view[1]]]<-v1
   newd[[view[2]]]<-v2
   # call predict.gam to get predictions.....
-  if (type=="link") zlab<-paste("linear predictor") ## ignore codetools
-  else if (type=="response") zlab<-type
+  if (type=="link") zlab <- paste("linear predictor") ## ignore codetools
+  else if (type=="response") zlab <- type
   else stop("type must be \"link\" or \"response\"")
   fv <- predict.gam(x,newdata=newd,se.fit=TRUE,type=type)
   z <- fv$fit # store NA free copy now
-  if (too.far>0) # exclude predictions too far from data
-  { ex.tf <- exclude.too.far(v1,v2,x$model[,view[1]],x$model[,view[2]],dist=too.far)
-    fv$se.fit[ex.tf] <- fv$fit[ex.tf]<-NA
+  if (is.matrix(z)) {
+    lp <- min(ncol(z),max(1,round(lp)))
+    z <- z[,lp] ## retain selected linear predictor
+    fv$fit <- fv$fit[,lp]
+    fv$se.fit <- fv$se.fit[,lp]
+  }
+  if (too.far>0) { # exclude predictions too far from data
+    ex.tf <- exclude.too.far(v1,v2,x$model[,view[1]],x$model[,view[2]],dist=too.far)
+    fv$se.fit[ex.tf] <- fv$fit[ex.tf] <- NA
   }
   # produce a continuous scale in place of any factors
-  if (is.factor(m1)) 
-  { m1<-as.numeric(m1);m1<-seq(min(m1)-0.5,max(m1)+0.5,length=n.grid) }
-  if (is.factor(m2)) 
-  { m2<-as.numeric(m2);m2<-seq(min(m1)-0.5,max(m2)+0.5,length=n.grid) }
-  if (se<=0)
-  { old.warn<-options(warn=-1)
-    av<-matrix(c(0.5,0.5,rep(0,n.grid-1)),n.grid,n.grid-1)
+  if (is.factor(m1)) { 
+    m1<-as.numeric(m1);m1<-seq(min(m1)-0.5,max(m1)+0.5,length=n.grid) 
+  }
+  if (is.factor(m2)) { 
+    m2<-as.numeric(m2);m2<-seq(min(m1)-0.5,max(m2)+0.5,length=n.grid) 
+  }
+  if (se<=0) { 
+    old.warn<-options(warn=-1)
+    av <- matrix(c(0.5,0.5,rep(0,n.grid-1)),n.grid,n.grid-1)
     options(old.warn)
     # z is without any exclusion of gridpoints, so that averaging works nicely
     max.z <- max(z,na.rm=TRUE)
     z[is.na(z)] <- max.z*10000 # make sure NA's don't mess it up
-    z<-matrix(z,n.grid,n.grid) # convert to matrix
-    surf.col<-t(av)%*%z%*%av   # average over tiles  
+    z <- matrix(z,n.grid,n.grid) # convert to matrix
+    surf.col <- t(av)%*%z%*%av   # average over tiles  
     surf.col[surf.col>max.z*2] <- NA # restore NA's
     # use only non-NA data to set colour limits
-    if (!is.null(zlim))
-    { if (length(zlim)!=2||zlim[1]>=zlim[2]) stop("Something wrong with zlim")
-      min.z<-zlim[1]
-      max.z<-zlim[2]
-    } else
-    { min.z<-min(fv$fit,na.rm=TRUE)
-      max.z<-max(fv$fit,na.rm=TRUE)
+    if (!is.null(zlim)) { 
+      if (length(zlim)!=2||zlim[1]>=zlim[2]) stop("Something wrong with zlim")
+      min.z <- zlim[1]
+      max.z <- zlim[2]
+    } else { 
+      min.z <- min(fv$fit,na.rm=TRUE)
+      max.z <- max(fv$fit,na.rm=TRUE)
     }
-    surf.col<-surf.col-min.z
-    surf.col<-surf.col/(max.z-min.z)  
-    surf.col<-round(surf.col*nCol)
-    con.col <-1
+    if (min.z==max.z) {min.z <- min.z-1;max.z <- max.z + 1}
+    surf.col <- surf.col-min.z
+    surf.col <- surf.col/(max.z-min.z)  
+    surf.col <- round(surf.col*nCol)
+    con.col <- 1
     if (color=="heat") { pal<-heat.colors(nCol);con.col<-4;}
     else if (color=="topo") { pal<-topo.colors(nCol);con.col<-2;}
     else if (color=="cm") { pal<-cm.colors(nCol);con.col<-1;}
     else if (color=="terrain") { pal<-terrain.colors(nCol);con.col<-2;}
     else if (color=="gray"||color=="bw") {pal <- gray(seq(0.1,0.9,length=nCol));con.col<-1}
     else stop("color scheme not recognised")
-    if (is.null(contour.col)) contour.col<-con.col   # default colour scheme
-    surf.col[surf.col<1]<-1;surf.col[surf.col>nCol]<-nCol # otherwise NA tiles can get e.g. -ve index
-    if (is.na(col)) col<-pal[as.array(surf.col)]
-    z<-matrix(fv$fit,n.grid,n.grid)
+    if (is.null(contour.col)) contour.col <- con.col   # default colour scheme
+    surf.col[surf.col<1] <- 1; surf.col[surf.col>nCol] <- nCol # otherwise NA tiles can get e.g. -ve index
+    if (is.na(col)) col <- pal[as.array(surf.col)]
+    z <- matrix(fv$fit,n.grid,n.grid)
     if (plot.type=="contour")
     { stub <- paste(ifelse("xlab" %in% dnm, "" , ",xlab=view[1]"),
                     ifelse("ylab" %in% dnm, "" , ",ylab=view[2]"),
@@ -1614,14 +1717,14 @@
     }
     if (!is.null(zlim)) {
       if (length(zlim)!=2||zlim[1]>=zlim[2]) stop("Something wrong with zlim")
-      min.z<-zlim[1]
-      max.z<-zlim[2]
+      min.z <- zlim[1]
+      max.z <- zlim[2]
     } else {
-      max.z<-max(fv$fit+fv$se.fit*se,na.rm=TRUE)
-      min.z<-min(fv$fit-fv$se.fit*se,na.rm=TRUE)
+      max.z <- max(fv$fit+fv$se.fit*se,na.rm=TRUE)
+      min.z <- min(fv$fit-fv$se.fit*se,na.rm=TRUE)
       zlim<-c(min.z,max.z)
-    }
-    z<-fv$fit-fv$se.fit*se;z<-matrix(z,n.grid,n.grid)
+    } 
+    z <- fv$fit - fv$se.fit*se; z <- matrix(z,n.grid,n.grid)
     if (plot.type=="contour") warning("sorry no option for contouring with errors: try plot.gam")
 
     stub <-  paste(ifelse("xlab" %in% dnm, "" , ",xlab=view[1]"),
@@ -1635,7 +1738,7 @@
     eval(parse(text=txt))
 
     par(new=TRUE) # don't clean device
-    z<-fv$fit;z<-matrix(z,n.grid,n.grid)
+    z <- fv$fit; z <- matrix(z,n.grid,n.grid)
 
     txt <- paste("persp(m1,m2,z,col=col,zlim=zlim",
                  ifelse("border" %in% dnm, "" ,",border=\"black\""),
@@ -1643,13 +1746,12 @@
     eval(parse(text=txt))
 
     par(new=TRUE) # don't clean device
-    z<-fv$fit+se*fv$se.fit;z<-matrix(z,n.grid,n.grid)
+    z <- fv$fit+se*fv$se.fit; z <- matrix(z,n.grid,n.grid)
     
     txt <- paste("persp(m1,m2,z,col=col,zlim=zlim",
                  ifelse("border" %in% dnm, "" ,",border=hi.col"),
                  stub,sep="")
     eval(parse(text=txt))
-
   }
 } ## vis.gam
 
diff -Nru mgcv-1.8-40/R/smooth.r mgcv-1.8-41/R/smooth.r
--- mgcv-1.8-40/R/smooth.r	2022-03-22 09:38:40.000000000 +0000
+++ mgcv-1.8-41/R/smooth.r	2022-09-26 09:10:45.000000000 +0000
@@ -2116,7 +2116,7 @@
 ## to use for smooths. Only one smoothing parameter for the whole term.
 ## If called from gamm, is set up for efficient computation by nesting
 ## smooth within factor.
-## Unsuitable for tensor products. 
+## Unsuitable for tensor product margins. 
 
   if (!is.null(attr(object,"gamm"))) gamm <- TRUE else ## signals call from gamm
   gamm <- FALSE 
@@ -2289,6 +2289,190 @@
   X
 } ## Predict.matrix.fs.interaction
 
+#######################################################################
+# General smooth-factor interactions, constrained to be differences to
+# a main effect smooth. 
+#######################################################################
+
+smooth.info.sz.smooth.spec <- function(object) {
+  object$tensor.possible <- TRUE ## signal that a tensor product construction is  possible here
+  object
+}
+
+smooth.construct.sz.smooth.spec <- function(object,data,knots) {
+## Smooths in which one covariate is a factor. Generates a smooth
+## for each level of the factor. Let b_{jk} be the kth coefficient
+## of the jth smooth. Construction ensures that \sum_k b_{jk} = 0,
+## for all j. Hence the smooths can be estimated in addition to an
+## overall main effect.
+## xt element specifies basis to use for smooths.
+
+  if (is.null(object$xt)) object$base.bs <- "tp" ## default smooth class
+  else if (is.list(object$xt)) {
+    if (is.null(object$xt$bs)) object$base.bs <- "tp" else
+    object$base.bs <- object$xt$bs 
+  } else { 
+    object$base.bs <- object$xt
+    object$xt <- NULL ## avoid messing up call to base constructor
+  }
+  object$base.bs <- paste(object$base.bs,".smooth.spec",sep="")
+
+  fterm <- NULL ## identify the factor variables
+  for (i in 1:length(object$term)) if (is.factor(data[[object$term[i]]])) { 
+    if (is.null(fterm)) fterm <- object$term[i] else fterm[length(fterm)+1] <- object$term[i]
+  }
+  
+  ## deal with no factor case, just base smooth constructor
+  if (is.null(fterm)) {
+    class(object) <- object$base.bs
+    return(smooth.construct(object,data,knots))
+  }
+
+  ## deal with factor only case, just transfer to "re" class
+  if (length(object$term)==length(fterm)) {
+    class(object) <- "re.smooth.spec"
+    return(smooth.construct(object,data,knots))
+  } 
+
+  ## Now remove factor terms from data...
+  fac <- data[fterm] 
+  data[fterm] <- NULL
+  k <- 0
+  oterm <- object$term
+
+  ## and strip it from the terms...
+  for (i in 1:object$dim) if (!object$term[i]%in%fterm) {
+    k <- k + 1
+    object$term[k] <- object$term[i]
+  }
+  object$term <- object$term[1:k]
+  object$dim <- length(object$term)
+
+  
+  ## call base constructor...
+  spec.class <- class(object)
+  class(object) <- object$base.bs
+  object <- smooth.construct(object,data,knots)
+  if (length(object$S)>1) stop("\"sz\" smooth cannot use a multiply penalized basis (wrong basis in xt)")
+
+  ## save some base smooth information
+
+  object$base <- list(bs=class(object),bs.dim=object$bs.dim,
+                      rank=object$rank,null.space.dim=object$null.space.dim,
+                      term=object$term,dim=object$dim)
+  object$term <- oterm ## restore original term list
+  object$dim <- length(object$term)
+  object$fterm <- fterm ## the factor names...
+
+  ## Store the base model matrix/S in case user wants to convert to r.e.
+  object$Xb <- object$X
+  object$base$S <- object$S
+  
+  nf <- rep(0,length(fac))
+  object$flev <- list()
+
+  Xf <- list()
+  n <- nrow(object$X)
+  for (j in 1:length(fac)) {
+    object$flev[[j]] <- levels(fac[[j]])
+
+    ## construct the sum to zero contrast matrix, P, ... 
+    nf[j] <- length(object$flev[[j]])
+   
+    Xf[[j]] <- matrix(as.numeric(rep(object$flev[[j]],each=n)==fac[[j]]),n,nf[j]) ## factor matrix
+  }
+  Xf[[j+1]] <- object$X
+  ## duplicate model matrix columns, and penalties...
+    
+  p0 <- ncol(object$X)
+  p <- p0*prod(nf)
+
+  X <- tensor.prod.model.matrix(Xf)
+
+  ind <- 1:p0
+  S <- list()
+  object$null.space.dim <- object$null.space.dim*prod(nf-1)
+  if (is.null(object$id)) { ## one penalty and one sp per smooth
+    for (i in 1:prod(nf)) { 
+      S0 <- matrix(0,p,p)
+      S0[ind,ind] <- object$S[[1]]
+      S[[i]] <- S0
+      ind <- ind + p0
+    }
+    object$rank <- rep(object$rank,prod(nf))
+  } else { ## one penalty, one sp
+    S0 <- matrix(0,p,p)
+    for (i in 1:prod(nf)) {
+      S0[ind,ind] <- S0[ind,ind] + object$S[[1]]
+      ind <- ind + p0
+    }
+    S[[1]] <- S0
+    object$rank <- prod(nf-1)*object$bs.dim -object$null.space.dim
+  }
+  
+  object$S <- S
+  object$X <- X 
+  
+  object$bs.dim <-prod(nf-1)*object$bs.dim #ncol(object$X) 
+  object$te.ok <- 0
+  
+  
+  object$side.constrain <- FALSE ## don't apply side constraints - these are really random effects
+  
+  object$C <- c(0,nf)
+  object$plot.me <- TRUE
+  class(object) <- if ("tensor.smooth.spec"%in%spec.class) c("sz.interaction","tensor.smooth")  else 
+                   "sz.interaction"
+  if ("tensor.smooth.spec"%in%spec.class) { 
+    ## give object margins like a tensor product smooth...
+    ## need just enough for fitting and discrete prediction to work
+    object$margin <- list()
+    nf <- length(fterm)
+    for (i in 1:nf) { 
+      form1 <- as.formula(paste("~",object$fterm[i],"-1"))
+      object$margin[[i]] <- list(X=Xf[[i]],term=fterm[i],form=form1,by="NA")
+      class(object$margin[[i]]) <- "random.effect"
+    }
+    object$margin[[nf+1]] <- object
+    object$margin[[nf+1]]$X <- Xf[[nf+1]]
+    object$margin[[nf+1]]$margin.only <- TRUE
+    object$margin[[nf+1]]$margin <- NULL
+    object$margin[[nf+1]]$term <- object$term[!object$term%in%object$fterm]
+  
+  }
+  object
+} ## end of smooth.construct.sz.smooth.spec
+
+
+Predict.matrix.sz.interaction <- function(object,data) {
+# prediction method function for the zero mean smooth-factor interaction class
+  ## first remove factor from the data...  
+  fac <- data[object$fterm]
+  data[object$fterm] <- NULL
+
+  ## now get base prediction matrix...
+  class(object) <- object$base$bs
+  object$rank <- object$base$rank
+  object$null.space.dim <- object$base$null.space.dim
+  object$bs.dim <- object$base$bs.dim
+  object$term <- object$base$term
+  object$dim <- object$base$dim
+  Xb <- Predict.matrix(object,data)
+  if (!is.null(object$margin.only)) return(Xb)
+  n <- nrow(Xb)
+  Xf <- list()
+  for (j in 1:length(object$flev)) {
+    nf <- length(object$flev[[j]])
+    Xf[[j]] <- matrix(as.numeric(rep(object$flev[[j]],each=n)==fac[[j]]),n,nf) ## factor matrix
+  }
+  Xf[[j+1]] <- Xb
+  X <- tensor.prod.model.matrix(Xf)
+  
+  X 
+} ## Predict.matrix.sz.interaction
+
+
+
 
 ##########################################
 ## Adaptive smooth constructors start here
@@ -3649,6 +3833,23 @@
    return(list(data=dat,knots=knt))
 } ## ExtractData
 
+XZKr <- function(X,m) {
+## postmultiplies X by contrast matrix constructed from Kronecker product
+## of sequence of sum to zero contrasts and a final identity matrix.
+## Returns transpose of result (since sometimes this is actually what's needed)
+## Sum to zero contrasts are rbind(diag(m[i]-1),-1). See Fackler, PL
+## (2019) ACM transactions on Mathematical Software 45(2) Article 22. 
+  p <- ncol(X)/prod(m) ## dimension of final identity matrix
+  n <- nrow(X)
+  for (i in 1:length(m)) {
+    dim(X) <- c(length(X)/m[i],m[i])
+    X <- t(X[,1:(m[i]-1)]-X[,m[i]])
+  }
+  dim(X) <- c(length(X)/p,p)
+  X <- t(X)
+  dim(X) <- c(length(X)/n,n)
+  X ## returns transpose of result
+} ## XZKr
 
 #########################################################################
 ## What follows are the wrapper functions that gam.setup actually
@@ -3856,11 +4057,16 @@
       ## if by variable is an ordered factor then first level is taken as a 
       ## reference level, and smooths are only generated for the other levels
       ## this can help to ensure identifiability in complex models. 
-      if (is.ordered(by)&&length(lev)>1) lev <- lev[-1] 
+      if (is.ordered(by)&&length(lev)>1) lev <- lev[-1]
+      #sm$rank[length(sm$S)+1] <- ncol(sm$X) ## TEST CENTERING PENALTY
+      #sm$C <- matrix(0,0,1) ## TEST CENTERING PENALTY
       for (j in 1:length(lev)) {
         sml[[j]] <- sm  ## replicate smooth for each factor level
         by.dum <- as.numeric(lev[j]==by)
         sml[[j]]$X <- by.dum*sm$X   ## multiply model matrix by dummy for level
+
+        #sml[[j]]$S[[length(sm$S)+1]] <- crossprod(sm$X[by.dum==1,]) ## TEST CENTERING PENALTY
+	
         sml[[j]]$by.level <- lev[j] ## store level
         sml[[j]]$label <- paste(sm$label,":",object$by,lev[j],sep="") 
         if (!is.null(offs)) {
@@ -3981,20 +4187,7 @@
             sml[[i]]$null.space.dim <- max(0,sml[[i]]$null.space.dim - j)
             ## ... so qr.qy(attr(sm,"qrc"),c(rep(0,nrow(sm$C)),b)) gives original para.'s
           } ## end smooth list loop
-        } else { ## full null space created
-        #  if (drop>0) { ## sweep and drop constraints
-        #    qrc <- c(drop,as.numeric(sm$C)[-drop])
-        #    class(qrc) <- "sweepDrop"
-        #    for (i in 1:length(sml)) { ## loop through smooth list
-        #      ## sml[[i]]$X <- sweep(sml[[i]]$X[,-drop],2,qrc[-1])
-        #      sml[[i]]$X <- sml[[i]]$X[,-drop] - 
-        #                    matrix(qrc[-1],nrow(sml[[i]]$X),ncol(sml[[i]]$X)-1,byrow=TRUE)
-        #      if (length(sm$S)>0)
-        #      for (l in 1:length(sm$S)) { # some smooths have > 1 penalty 
-        #        sml[[i]]$S[[l]]<-sml[[i]]$S[[l]][-drop,-drop]
-        #      }
-        #    }
-        #  } else 
+        } else { 
           { ## full QR based approach
             qrc<-qr(t(sm$C)) 
             for (i in 1:length(sml)) { ## loop through smooth list
@@ -4024,6 +4217,23 @@
          attr(sml[[i]],"nCons") <- 0;
         }
       } ## end else no constraints
+    } else if (length(sm$C)>1) { ## Kronecker product of sum-to-zero contrasts (first element unused to allow index for alternatives)
+      m <- sm$C[-1] ## contrast order
+      for (i in 1:length(sml)) { ## loop through smooth list
+        if (length(sm$S)>0)
+        for (l in 1:length(sm$S)) { # some smooths have > 1 penalty 
+          sml[[i]]$S[[l]] <- XZKr(XZKr(sml[[i]]$S[[l]],m),m)
+        }
+	p <- ncol(sml[[i]]$X) 
+        sml[[i]]$X <- t(XZKr(sml[[i]]$X,m))
+	total.null.dim <- prod(m-1)*p/prod(m)
+	nc <- p - prod(m-1)*p/prod(m)
+	attr(sml[[i]],"nCons") <- nc
+        attr(sml[[i]],"qrc") <- c(sm$C,nc) ## unused, dim1, dim2, ..., n.cons
+	sml[[i]]$C <- NULL
+        ## NOTE: assumption here is that constructor returns rank, null.space.dim
+	## and df, post constraint.
+      }	
     } else if (sm$C>0) { ## set to zero constraints
        for (i in 1:length(sml)) { ## loop through smooth list
           if (length(sm$S)>0)
@@ -4232,7 +4442,7 @@
     if (j>0) { ## there were constraints to absorb - need to untransform
       k<-ncol(X)
       if (inherits(qrc,"qr")) {
-        indi <- attr(object,"indi") ## index of constrained parameters
+        indi <- attr(object,"indi") ## index of constrained parameters (only with QR constraints!)
         if (is.null(indi)) {
           if (sum(is.na(X))) {
             ind <- !is.na(rowSums(X))
@@ -4260,6 +4470,9 @@
         ## Remainder are constants to be swept out of remaining columns 
         ## Actually better handled first (see above)
         #X <- X[,-qrc[1],drop=FALSE] - matrix(qrc[-1],nrow(X),ncol(X)-1,byrow=TRUE)
+      } else if (length(qrc)>0) { ## Kronecker product of sum-to-zero contrasts
+        m <- qrc[-c(1,length(qrc))] ## contrast dimensions - less initial code and final number of constraints
+	X <- t(XZKr(X,m))
       } else if (qrc>0) { ## simple set to zero constraint
         X <- X[,-qrc,drop=FALSE]
       } else if (qrc<0) { ## params sum to zero
diff -Nru mgcv-1.8-40/src/discrete.c mgcv-1.8-41/src/discrete.c
--- mgcv-1.8-40/src/discrete.c	2022-02-01 10:18:29.000000000 +0000
+++ mgcv-1.8-41/src/discrete.c	2022-10-18 13:59:48.000000000 +0000
@@ -183,6 +183,109 @@
 } /* Cdgemv */ 
 
 
+/* constraint application helper functions */
+
+void Zb(double *b1,double *b0,double *v,int *qc, int *p,double *w) {
+/* Form b1 = Z b0 where constraint matrix Z has more rows than columns. 
+   b1 and b0 must be separate storage.
+   p is dim(b1)
+   qc > 0 for simple HH sum to zero constraint.
+   qc < 0 for Kronecker product of sum to zero contrasts.
+   v is either the householder vector, or a vector giving: 
+   [the number of contrasts, the leading dimension of each, the total number of 
+    linear constraints implied]
+   w is work space at least 2p long in the qc<0 case, but unused otherwise.
+      
+*/
+  double x,*p0,*p1,*p2,*p3,*w0,*w1,z;
+  int M,k0,pp,i,j,k,q,mk,p0m;
+  if (*qc>0) { 
+    *b1 = 0.0;x=0.0;
+    for (p0 = b1+1,p1=b1+ *p,p2 = b0,p3=v+1;p0<p1;p0++,p2++,p3++) {
+      *p0 = *p2; x += *p0 * *p3; /* v'(0,b0')' */
+    }  
+    for (p0=b1,p2=v;p0<p1;p0++,p2++) *p0 -= *p2 * x; /* (I-vv')(0,b0')' */
+  } else if (*qc < 0){ /* Z is sequential Kronecker product of M sum-to-zero contrasts and an identity matrix */ 
+    M = (int)round(v[0]); /* number of sum to zero contrasts making up contrast */
+    for (pp = *p,k0=1,i=0; i<M;i++) {
+      mk = (int) round(v[i+1]); /* stz contrast dimension */
+      k0 *= mk - 1;
+      pp /= mk; /* dimension of final I */
+    }
+    k0 *= pp; /* dim b0 */
+    w1 = w + *p;w0=w;
+    for (k=0;k<=M;k++) {
+      if (k<M) mk = (int) round(v[k+1])-1; else {
+	mk = pp;
+	w1=b1; /* final w1 is b1 */ 
+      }	
+      p0m = k0/mk;q = 0;
+      for (i=0;i<p0m;i++) {
+        x=0.0;
+	for (j=0;j<mk;j++) {
+          z = b0[j*p0m+i];
+	  x += z;
+	  w1[q] = z; q++;
+	}
+	if (k<M) {
+	  w1[q] = -x; q++;
+	}  
+      }
+      if (k<M) k0 += p0m;
+      b0 = w1;w1=w0;w0=b0;
+    }
+    //for (i=0;i<p;i++) b1[i] = b0[i]; //- redundant?
+  } /* Kronecker stz end */
+} /* Zb */  
+
+void Ztb(double *b1,double *b0,double *v,int *qc,int *di, int *p,double *w) {
+/* Apply constraint to vector p-vector b0. b1 = Z'b0. b0 and b1 may be spaced 
+   within arrays b0 and b1. b0[0:(p-1)*di] and b1[0:(p-k)*di]  are the elements 
+   to be operated on (where k-1 is the number of constraints). 
+   This facilitates computation of b0'Z where b0' is the row of a matrix. 
+   If qc==1 then v is the p-vector defining a Housholder rotation (I-vv') and 
+   Z'b0 is (I-vv')b0 with the first element dropped.
+   if (qc == -1) then the constraint is the Kronecker product of a sequence of 
+   sum-to-zero contrasts and a final identity matrix. In that case v[0] is the 
+   number of sum to zero contrasts, the ith of which is v[1+i] by (v[1+i]-1).
+   NOTE: b1 and b0 may point to same array - care needed in implementing new 
+         constraints.
+   w is workspace and should be double the length of b0. It is not required 
+   and not referenced if qc==1.
+*/
+  double z,x,*p0,*p1,*p2,*w0,*w1;
+  int k1,M,i,j,q,k,p1m,mk,pp,mk1;
+  if (*qc>0) { /* signals a rank 1 HH constraint */
+    for (x=0.0,p0=b0,p1=v,p2=v + *p;p1<p2;p0 += *di,p1++) x += *p0 * *p1; /* b0'v */
+    for (p1=v+1,b0 += *di;p1<p2;b0 += *di,b1 += *di,p1++) *b1 = *b0 - *p1 * x;
+  } else if (*qc<0) { /* signals a constraint based on sum to zero contrasts */
+    /* Z is a Kronecker product of a sequence of sum-to-zero contrasts and an identity matrix */
+    for (p0=w,p1=w + *p,p2=b0;p0<p1;p0++,p2 += *di) *p0 = *p2; /* copy b0 to w */
+    M = (int) round(v[0]); /* number of sum to zero contrasts */
+    for (pp = *p,i=0; i<M;i++) {
+      mk =  (int) round(v[i+1]);
+      pp /= mk;
+    } 
+    /* pp is dimension of final identity matrix in constraint */
+    k1 = *p;w0 = w;w1 = w + *p;
+    for (k=0;k<=M;k++) {
+      if (k<M) { mk = (int) round(v[k+1]);mk1 = mk-1;} else mk1=mk = pp;
+      p1m = k1/mk;
+      q = 0;
+      for (i=0;i<p1m;i++) {
+        if (k<M) z = w0[i+(mk-1)*p1m]; else z = 0.0;
+	for (j=0;j<mk1;j++) {
+	  w1[q] = w0[i+j*p1m] - z; q++;
+	}  
+      }
+      if (k<M) k1 -= p1m;
+      p0 = w0;w0 = w1;w1 = p0;
+    }
+    for (p0=w0,p1=w0+k1,p2=b1;p0<p1;p0++,p2 += *di) *p2 = *p0; /* copy w0 to b1 */
+  } /* sum-to-zero Kronecker constraints */
+} /* Ztb */  
+
+
 /* basic extraction operations */ 
 
 void singleXj(double *Xj,double *X, int *m, int *k, int *n,int *j) {
@@ -300,38 +403,39 @@
    stored in X. ith such is m[i] by p[i]. 
    work is an n vector. C is an m[d] by pb working matrix.  
    v is a vector such that if Q=I-vv' and Z is Q with the first column dropped then 
-   Z is a null space basis for the identifiability constraint. If *qc <= 0 then
+   Z is a null space basis for the identifiability constraint. If *qc == 0 then
    no constraint is applied. Thread safe.
    k[,kstart[i]...kstop[i]-1] are the indices for the ith component of this term 
    (starting at i=0).
 */ 
   char trans='N';
   int pb=1,md,*kp,*kd,pd,i,j,q;
-  double *M,done=1.0,dzero=0.0,*p0,*p1,*p2,*p3,*pf,*pc,x;
+  double *M,done=1.0,dzero=0.0,*p0,*p1,*pf,*pc;
   M = X;
   for (i=0;i<*dt-1;i++) {
     pb *= p[i];
     M += m[i] * (ptrdiff_t) p[i]; /* final marginal */ 
+
   }
   md = m[*dt - 1];
   pd = p[*dt -1];
   //kd = k + (*dt-1) * (ptrdiff_t) *n; /* index vector for final term */
   kd = k + kstart[*dt-1] * (ptrdiff_t) *n;
   /* form work = M B, where vec(B) = beta */
-  if (*qc<=0) { /* no constraint supplied */
+  if (*qc==0) { /* no constraint supplied */
     F77_CALL(dgemm)(&trans,&trans,&md,&pb, &pd, &done,
 		    M,&md,beta,&pd,&dzero,C,&md FCONE FCONE); 
   } else { /* there is a constraint matrix */
     /* first map supplied beta to unconstrained parameterization */ 
     j = pb * pd; /* total number of coeffs - length of unconstrained beta */
     
-    *work = 0.0;x=0.0;
-    for (p0=work+1,p1=p0+j-1,p2=beta,p3=v+1;p0<p1;p0++,p2++,p3++) { 
-      *p0 = *p2; 
-      x += *p0 * *p3; /* v'beta where beta padded with extra zero at start */ 
-    }
-    for (p0=work,p1=p0+j,p2=v;p0<p1;p0++,p2++) *p0 -= *p2 * x; /* (I-vv')(0,beta')' */
-
+    //*work = 0.0;x=0.0;
+    //for (p0=work+1,p1=p0+j-1,p2=beta,p3=v+1;p0<p1;p0++,p2++,p3++) { 
+    //  *p0 = *p2; 
+    //  x += *p0 * *p3; /* v'beta where beta padded with extra zero at start */ 
+    //}
+    //for (p0=work,p1=p0+j,p2=v;p0<p1;p0++,p2++) *p0 -= *p2 * x; /* (I-vv')(0,beta')' */
+    Zb(work,beta,v,qc,&j,work+j);
     /*F77_CALL(dgemv)(&trans, &j, qc,&done,Q,&j,beta,&one,&dzero,work,&one); old when Q full matrix */
     F77_CALL(dgemm)(&trans,&trans,&md,&pb, &pd, &done,
 		    M,&md,work,&pd,&dzero,C,&md FCONE FCONE);
@@ -356,6 +460,7 @@
 /* Forms f = X beta for X stored in the packed form described in function XWX
    bc is number of cols of beta and f... 
    This is original version before allowing selection of terms.
+   unused in mgcv at present - not updated for Kronecker product constraints
    LIMITED thread safety. Allocates/frees memory using R_chk_calloc/R_chk_free (usually),
    protected within critical sections. Not safe to use with other routines allocating memory using these routines
    within a parallel section. Safe to use as the only allocator of memory within a parallel section. 
@@ -386,7 +491,7 @@
     else tps[i+1] = tps[i] + pt[i] - 1; /* there is a tensor constraint to apply - reducing param count*/
   }
   /* now form the product term by term... */ 
-  i = *n; if (i<maxp) i=maxp; if (i<maxrow) i=maxrow;
+  i = *n; if (i<3*maxp) i=3*maxp; if (i<maxrow) i=maxrow;
 #pragma omp critical (xbdcalloc)
   { pf=f0 = (double *)CALLOC((size_t)*n,sizeof(double));
     work = (double *)CALLOC((size_t)i,sizeof(double));
@@ -449,7 +554,10 @@
       }
       if (j==0) pt[i] = p[q]; else pt[i] *= p[q]; /* term dimension */
     } 
-    if (qc[i]>0) voff[i+1] = voff[i] + pt[i]; else voff[i+1] = voff[i]; /* start of ith v matrix */
+    if (qc[i]==0) voff[i+1] = voff[i]; else if (qc[i]>0) voff[i+1] = voff[i] + pt[i]; else {  /* start of ith v matrix */
+      kk = (int) round(v[voff[i]]); /* number of contrasts in this KP contrast */
+      voff[i+1] = voff[i] + kk + 2; 
+    }  
     if (maxp < pt[i]) maxp = pt[i];
     //if (qc[i]<=0) tps[i+1] = tps[i] + pt[i]; /* where ith terms starts in param vector */ 
     //else tps[i+1] = tps[i] + pt[i] - 1; /* there is a tensor constraint to apply - reducing param count*/
@@ -460,8 +568,12 @@
   }  
   for (kk=j=0;j<*ncs;j++) { /* get the offsets for the returned terms in the output */
     i = cs[j];tps[i] = kk;
-    if (qc[i]<=0) kk += pt[i]; /* where cth terms starts in param vector */ 
-    else kk += pt[i] - 1; /* there is a tensor constraint to apply - reducing param count*/
+    if (qc[i]==0) kk += pt[i]; /* where cth terms starts in param vector */ 
+    else if (qc[i]>0) kk += pt[i] - 1; else { /* there is a tensor constraint to apply - reducing param count*/
+      /* Kronecker product of sum-to-zero contrasts */
+      q = (int) round(v[voff[i]]); /* number of contrasts */
+      kk += pt[i] - (int) round(v[voff[i]+q+1]); /* subtracting number of constraints */
+    }
   }
   tps[*nt] = kk;
   //  Rprintf("\n pt:");
@@ -469,7 +581,7 @@
   //Rprintf("\n nt = %d ncs = %d cs, tps[cs]:\n",*nt,*ncs);
   //for (i=0;i< *ncs;i++) Rprintf(" %d %d",cs[i],tps[cs[i]]); 
   /* now form the product term by term... */ 
-  i = *n; if (i<maxp) i=maxp; if (i<maxrow) i=maxrow;
+  i = *n; if (i<3*maxp) i=3*maxp; if (i<maxrow) i=maxrow;
 #pragma omp critical (xbdcalloc)
   { pf=f0 = (double *)CALLOC((size_t)*n,sizeof(double));
     work = (double *)CALLOC((size_t)i,sizeof(double));
@@ -607,7 +719,9 @@
 void XWyd0(double *XWy,double *y,double *X,double *w,int *k,int *ks, int *m,int *p, int *n, 
          int *nx, int *ts, int *dt, int *nt,double *v,int *qc,
          int *ar_stop,int *ar_row,double *ar_weights) {
-/* NOT thread safe. Original version without term sub-setting */
+/* NOT thread safe. Original version without term sub-setting - not currently used in mgcv - 
+   Kronecker contrast constriants not dealt with.
+*/
   double *Wy,*p0,*p1,*p2,*p3,*Xy0,*work,*work1,x;
   ptrdiff_t i,j,*off,*voff;
   int *tps,maxm=0,maxp=0,one=1,zero=0,*pt,add,q;
@@ -676,7 +790,7 @@
 	  int *ar_stop,int *ar_row,double *ar_weights,int *cs, int *ncs) {
 /* NOT thread safe. 
    If ncs > 0 then cs contains the subset of terms (blocks of model matrix columns) to include.   */
-  double *Wy,*p0,*p1,*p2,*p3,*Xy0,*work,*work1,x;
+  double *Wy,*p0,*p1,*p2,*Xy0,*work,*work1,*work2;
   ptrdiff_t i,j,*off,*voff;
   int *tps,maxm=0,maxp=0,one=1,zero=0,*pt,add,q,kk,n_XWy;
   if (*ar_stop>=0) { /* model has AR component, requiring sqrt(weights) */
@@ -693,7 +807,10 @@
       if (j==0) pt[i] = p[q]; else pt[i] *= p[q]; /* term dimension */
       if (maxm<m[q]) maxm=m[q];
     } 
-    if (qc[i]>0) voff[i+1] = voff[i] + pt[i]; else voff[i+1] = voff[i];  /* start of ith Q matrix */
+    if (qc[i]==0) voff[i+1] = voff[i]; else if (qc[i]>0) voff[i+1] = voff[i] + pt[i]; else {  /* start of ith Q matrix */
+      kk = (int) round(v[voff[i]]); /* number of contrasts in this KP contrast */
+      voff[i+1] = voff[i] + kk + 2; 
+    }  
     if (maxp < pt[i]) maxp=pt[i];
     //if (qc[i]<=0) tps[i+1] = tps[i] + pt[i]; /* where ith terms starts in param vector */ 
     //else tps[i+1] = tps[i] + pt[i] - 1; /* there is a tensor constraint to apply - reducing param count*/
@@ -704,8 +821,12 @@
   }  
   for (kk=j=0;j<*ncs;j++) { /* get the offsets for the returned terms in the output */
     i = cs[j];tps[i] = kk;
-    if (qc[i]<=0) kk += pt[i]; /* where cth terms starts in param vector */ 
-    else kk += pt[i] - 1; /* there is a tensor constraint to apply - reducing param count*/
+    if (qc[i]==0) kk += pt[i]; /* where cth terms starts in param vector */ 
+    else if (qc[i]>0) kk += pt[i] - 1; /* there is a tensor constraint to apply - reducing param count*/
+    else { /* Kronecker product of sum-to-zero contrasts */
+      q = (int) round(v[voff[i]]); /* number of contrasts */
+      kk += pt[i] - (int) round(v[voff[i]+q+1]); /* subtracting number of constraints */
+    }  
   } /* kk is number of rows of XWy, at this point */
 
   n_XWy = kk;
@@ -713,6 +834,7 @@
   Xy0 =  (double *) CALLOC((size_t)maxp,sizeof(double));
   work =  (double *) CALLOC((size_t)*n,sizeof(double));
   work1 = (double *) CALLOC((size_t)maxm,sizeof(double));
+  work2 = (double *) CALLOC((size_t)maxp * 2,sizeof(double));
   
   Wy = (double *) CALLOC((size_t)*n,sizeof(double)); /* Wy */
   for (j=0;j<*cy;j++) { /* loop over columns of y */
@@ -732,11 +854,12 @@
           tensorXty(Xy0,work,work1,Wy,X+off[ts[i]],m+ts[i],p+ts[i],dt+i,k,n,&add,ks+ts[i],&q);
           add=1;
         }
-        if (qc[i]>0) { /* there is a constraint to apply Z'Xy0: form Q'Xy0 and discard first row... */
+        if (qc[i]!=0) { /* there is a constraint to apply Z'Xy0: form Q'Xy0 and discard first row... */
           /* Q' = I - vv' */
-          for (x=0.0,p0=Xy0,p1=p0 + pt[i],p2=v+voff[i];p0<p1;p0++,p2++) x += *p0 * *p2; /* x = v'Xy0 */
-          p0=XWy + tps[i];p1 = p0 + pt[i]-1;p2 = v+voff[i] + 1;p3=Xy0+1;
-          for (;p0<p1;p0++,p2++,p3++) *p0 = *p3 - x * *p2; /* (I-vv')Xy0 less first element */
+          //for (x=0.0,p0=Xy0,p1=p0 + pt[i],p2=v+voff[i];p0<p1;p0++,p2++) x += *p0 * *p2; /* x = v'Xy0 */
+          //p0=XWy + tps[i];p1 = p0 + pt[i]-1;p2 = v+voff[i] + 1;p3=Xy0+1;
+          //for (;p0<p1;p0++,p2++,p3++) *p0 = *p3 - x * *p2; /* (I-vv')Xy0 less first element */
+	  Ztb(XWy+tps[i],Xy0,v+voff[i],qc+i,&one,pt+i,work2); // BUG: tps[i] wrong for general constraints - general issue with handling dimensions under general constraint
         } else { /* straight copy */
           for (p0=Xy0,p1=p0+pt[i],p2=XWy+tps[i];p0<p1;p0++,p2++) *p2 = *p0;
         }
@@ -750,13 +873,14 @@
     } /* term loop */
     XWy += n_XWy; /* moving on to next column */
   }  
-  FREE(Wy); FREE(Xy0); FREE(work); FREE(work1); 
+  FREE(Wy); FREE(Xy0); FREE(work); FREE(work1); FREE(work2);
   FREE(pt); FREE(off); FREE(voff); FREE(tps);
 } /* XWy */
 
 void XWXd(double *XWX,double *X,double *w,int *k,int *ks, int *m,int *p, int *n, int *nx, int *ts, 
           int *dt, int *nt,double *v,int *qc,int *nthreads,int *ar_stop,int *ar_row,double *ar_weights) {
 /* Based on Wood, Li, Shaddick and Augustin (2017) JASA algorithms. i.e. vector oriented.
+   Not currently used in mgcv. Not updated for Kronecker contrast constraints.  
 
    This version uses open MP by column, within sub-block.
 
@@ -946,12 +1070,13 @@
 ptrdiff_t XWXijspace(int i,int j,int r,int c,int *k, int *ks, int *m, int *p,int nx,int n,int *ts, int *dt,int nt, int tri) {
 /*  computes working memory requirement of XWXijs for given block - called by XWXspace below
 */
-  int si,sj,ri,rj,jm,im,kk,ddtj,
+  int si,sj,//ri,rj,
+    jm,im,kk,ddtj,
     ii,rfac,ddti,tensi,tensj,acc_w,alpha;
   ptrdiff_t nwork=0,mim,mjm; /* avoid integer overflow in large pointer calculations */ 
   si = ks[ts[i]+nx]-ks[ts[i]]; /* number of terms in summation convention for i */
   /* compute number of columns in dXi/ number of rows of blocks in product */
-  for (ri=1,kk=ts[i];kk<ts[i]+dt[i]-1;kk++) ri *= p[kk];
+  //for (ri=1,kk=ts[i];kk<ts[i]+dt[i]-1;kk++) ri *= p[kk];
   im = ts[i]+dt[i]-1; /* the index of the final marginal for term i */
   mim = (ptrdiff_t) m[im];
   /* Allocate work space for dXi(n), dXj(n) and initialze pdXj*/
@@ -966,7 +1091,7 @@
     				  
   } else { /* general case */
     sj = ks[ts[j]+nx]-ks[ts[j]]; /* number of terms in summation convention for j */
-    for (rj=1,kk=ts[j];kk<ts[j]+dt[j]-1;kk++) rj *= p[kk];
+    //for (rj=1,kk=ts[j];kk<ts[j]+dt[j]-1;kk++) rj *= p[kk];
     jm = ts[j]+dt[j]-1; /* the index of the final marginal for term j */
     ddti = dt[i]-1;ddtj = dt[j]-1; /* number of marginals excluding final */
     if (ddti) tensi = 1; else tensi = 0; /* is term i a tensor? */
@@ -1055,7 +1180,8 @@
    ht is a 256 vector initialized by SMinihash. sm is length n. SMstack is length n if tri==0, 3n otherwise.
 
 */
-  int si,sj,ri,rj,jm,im,kk,ddt,ddtj,koff,*K,*Ki,*Kj,pim,pjm,
+  int si,sj,//ri,rj,
+    jm,im,kk,ddt,ddtj,koff,*K,*Ki,*Kj,pim,pjm,
     ii,jj,rfac,t,s,ddti,tensi,tensj,acc_w,alpha,*Kik,*Kjk,*Kik1,*Kjk1,*Kjl1,q;
   ptrdiff_t mim,mjm; /* avoid integer overflow in large pointer calculations */ 
   double x,*wl,*dXi,*dXj,*pdXj,*Xt,*Xi,*Xj,done=1.0,dzero=0.0,*Cq,*Dq,
@@ -1064,7 +1190,7 @@
   si = ks[ts[i]+nx]-ks[ts[i]]; /* number of terms in summation convention for i */
   if (tri) wl = ws + n - 1; else wl=ws; /* sub-diagonal else only to keep compiler happy */
   /* compute number of columns in dXi/ number of rows of blocks in product */
-  for (ri=1,kk=ts[i];kk<ts[i]+dt[i]-1;kk++) ri *= p[kk];
+  //for (ri=1,kk=ts[i];kk<ts[i]+dt[i]-1;kk++) ri *= p[kk];
   im = ts[i]+dt[i]-1; /* the index of the final marginal for term i */
   mim = (ptrdiff_t) m[im];
   /* Allocate work space for dXi(n), dXj(n) and initialze pdXj*/
@@ -1157,11 +1283,12 @@
     /* Now form the Xi'WXi... */
     Xt = X + off[im]; /* final marginal model matrix */
     pim = p[im]; /* cols of Xt */
-    if (r!=c)  for (jj=0;jj<pim;jj++) for (ii=0;ii<pim;ii++) { /* block need not be symmetric */
+    if (r!=c) { for (jj=0;jj<pim;jj++) for (ii=0;ii<pim;ii++) { /* block need not be symmetric */
         Xi = Xt + mim * ii; /* iith col of Xt */
         Xj = Xt + mim * jj; /* jjth col of Xt */
         for (x=0.0,kk=0;kk<mim;kk++) x += wb[kk]*Xj[kk]*Xi[kk];
         XWX[c*pim+jj+(r*pim +ii)* (ptrdiff_t) nxwx] = XWX[r*pim+ii+(c*pim +jj)*(ptrdiff_t)nxwx] = x; 
+      }
     } else for (ii=0;ii<pim;ii++) for (jj=ii;jj<pim;jj++) { /* diagonal and symmetric */ 
         Xi = Xt + mim * ii; /* iith col of Xt */
         Xj = Xt + mim * jj; /* jjth col of Xt */
@@ -1171,7 +1298,7 @@
     }					  
   } else { /* general case */
     sj = ks[ts[j]+nx]-ks[ts[j]]; /* number of terms in summation convention for j */
-    for (rj=1,kk=ts[j];kk<ts[j]+dt[j]-1;kk++) rj *= p[kk];
+    //for (rj=1,kk=ts[j];kk<ts[j]+dt[j]-1;kk++) rj *= p[kk];
     jm = ts[j]+dt[j]-1; /* the index of the final marginal for term j */
     ddti = dt[i]-1;ddtj = dt[j]-1; /* number of marginals excluding final */
     if (ddti) tensi = 1; else tensi = 0; /* is term i a tensor? */
@@ -1420,9 +1547,10 @@
    Requires XWX to be over-sized on entry - namely n.params + n.terms by n.params + n.terms instead of
    n.params by n.params.
 */   
-  int *pt, *pd,i,j,si,maxp=0,tri,r,c,rb,cb,rt,ct,pa,*tps,*tpsu,ptot,*b,*B,*C,*R,*sb,N,kk,kb,tid=0,nxwx=0,qi=0,*worki;
+  int *pt, *pd,i,j,si,maxp=0,tri,r,c,rb,cb,rt,ct,pa,*tps,*tpsu,ptot,*b,*B,*C,*R,*sb,N,
+    kk,kb,tid=0,nxwx=0,qi=0,*worki,one=1;
   ptrdiff_t *off,*voff,mmp,q;
-  double *work,*ws=NULL,*Cost,*cost,*x0,*x1,*p0,*p1,*p2,x;
+  double *work,*ws=NULL,*Cost,*cost,*x0,*x1,*p0,*p1,x;
   unsigned long long ht[256];
   SM **sm,*SMstack;
   #ifndef OPENMP_ON
@@ -1442,19 +1570,24 @@
       if (j==dt[i]-1) pd[i] = p[q]; /* the relevant dimension for deciding product ordering */
       off[q+1] = off[q] + p[q] * (ptrdiff_t) m[q]; /* submatrix start offsets */
       if (j==0) pt[i] = p[q]; else pt[i] *= p[q]; /* term dimension */
-      //if (maxm<m[q] && m[q] < *n) maxm=m[q]; /* most rows of a non-dense sub-matrix */
-      //if (maxmp<p[q]) maxmp=p[q];
-    } 
-    if (qc[i]>0) voff[i+1] = voff[i] + pt[i]; else voff[i+1] = voff[i]; /* start of ith v vector */
+    }
+    if (qc[i]==0) voff[i+1] = voff[i]; else if (qc[i]>0) voff[i+1] = voff[i] + pt[i]; else {
+      si = (int) round(v[voff[i]]); /* number of contrasts in this KP contrast */
+      voff[i+1] = voff[i] + si + 2; 
+    } /* start of ith v vector */
     if (maxp<pt[i]) maxp=pt[i];
-    if (qc[i]<=0) tps[i+1] = tps[i] + pt[i]; /* where ith terms starts in param vector */ 
-    else tps[i+1] = tps[i] + pt[i] - 1; /* there is a tensor constraint to apply - reducing param count*/
+    if (qc[i]==0) tps[i+1] = tps[i] + pt[i]; /* where ith terms starts in param vector */ 
+    else if (qc[i]>0) tps[i+1] = tps[i] + pt[i] - 1; /* there is a tensor constraint to apply - reducing param count*/
+    else { /* Kronecker product of sum to zero contrasts */ 
+      si = (int) round(v[voff[i]]); /* number of contrasts */
+      tps[i+1] = tps[i] + pt[i] - (int) round(v[voff[i]+si+1]); /* subtracting number of constraints */
+    }  
     tpsu[i+1] = tpsu[i] + pt[i]; /* where ith term starts in unconstrained param vector */ 
   }
   qi = 6 * *n; /* integer work space */
   // maxm and maxmp only used here...
   //q = 6 * *n + maxm + maxm * maxmp; /* note that we never allocate a W accumulation matrix with more than n elements */
-  //work = (double *)CALLOC((size_t)q * *nthreads,sizeof(double));
+
   worki = (int *)CALLOC((size_t)qi * *nthreads,sizeof(int));
   mmp = maxp;mmp = mmp*mmp;
   ptot = tps[*nt]; /* total number of parameters */
@@ -1542,16 +1675,21 @@
     /* NOTE: above will write directly to oversized XWX, then have constraints applied post-hoc. */ 
   } /* block loop */
 
+  FREE(work);
+  work = (double *)CALLOC((size_t) 2*nxwx,sizeof(double)); /* working for Ztb */
+  
   /* now XWX contains the unconstrained X'WX, but the constraints have to be applied to blocks involving tensor products */
   for (r=0;r < *nt;r++) for (c=r;c< *nt;c++) {
     /* if Xr is tensor, may need to apply constraint */
-    if (dt[r]>1&&qc[r]>0) { /* first term is a tensor with a constraint */
+    if (dt[r]>1&&qc[r]!=0) { /* first term is a tensor with a constraint */
       /* col by col form (I-vv')xwx, dropping first row... */
+      /* col by col form Z' xwx where Z is constraint matrix */
       for (j=0;j<pt[c];j++) { /* loop over columns */
         x0 = XWX + tpsu[r] + (tpsu[c]+j) * (ptrdiff_t) nxwx; /* jth col of raw block */
 	x1 = XWX + tps[r] + (tpsu[c]+j) * (ptrdiff_t) nxwx;   /* jth col of constrained block */
-	for (x=0.0,p0=x0,p1=x0+pt[r],p2=v+voff[r];p0<p1;p0++,p2++) x+= *p0 * *p2; 
-	for (p2=v+voff[r]+1,p1=x0+pt[r],x0++;x0<p1;x1++,x0++,p2++) *x1 = *x0 - *p2 * x;
+	//for (x=0.0,p0=x0,p1=x0+pt[r],p2=v+voff[r];p0<p1;p0++,p2++) x+= *p0 * *p2; 
+	//for (p2=v+voff[r]+1,p1=x0+pt[r],x0++;x0<p1;x1++,x0++,p2++) *x1 = *x0 - *p2 * x;
+	Ztb(x1,x0,v+voff[r],qc+r,&one,pt+r,work);
       }
       pa = pt[r]-1;
     } else {
@@ -1564,13 +1702,15 @@
 	}  
       }	
     }  
-    if (dt[c]>1&&qc[c]>0) { /* Xc term is a tensor with a constraint */
+    if (dt[c]>1&&qc[c]!=0) { /* Xc term is a tensor with a constraint */
       /* row by row form xwx(I-vv') dropping first col... */
+      /* row by row form xwx Z, where Z is constraint matrix */
       for (j=0;j<pa;j++) { /* work down rows */
         x0 = XWX + tps[r] + j + tpsu[c] * (ptrdiff_t) nxwx; /* jth col of raw block */
 	x1 = XWX + tps[r] + j + tps[c] * (ptrdiff_t) nxwx;   /* jth col of constrained block */
-	for (x=0.0,p0=x0,p1=v+voff[c],p2=p1+pt[c];p1<p2;p0 += nxwx,p1++) x += *p0 * *p1;
-	for (p1=v+voff[c]+1,x0+= nxwx;p1<p2;x1 += nxwx,x0+= nxwx,p1++) *x1 = *x0 - *p1 *x;
+	//for (x=0.0,p0=x0,p1=v+voff[c],p2=p1+pt[c];p1<p2;p0 += nxwx,p1++) x += *p0 * *p1;
+	//for (p1=v+voff[c]+1,x0+= nxwx;p1<p2;x1 += nxwx,x0+= nxwx,p1++) *x1 = *x0 - *p1 *x;
+	Ztb(x1,x0,v+voff[c],qc+c,&nxwx,pt+c,work);
       }
     } else if (tpsu[c]!=tps[c]) { /* still need to shift cols leftwards */
       for (j=0;j<pa;j++) { /* work down rows */
@@ -1642,9 +1782,9 @@
 
 */   
   int *pt, *pd,i,j,ri,ci,si,maxp=0,tri,r,c,rb,cb,rt,ct,pa,*tpsr,*tpsur,*tpsc,*tpsuc,ptot,
-      *b,*B,*C,*R,*sb,N,kk,kb,tid=0,nxwx=0,qi=0,*worki,symmetric=1;
+    *b,*B,*C,*R,*sb,N,kk,kb,tid=0,nxwx=0,qi=0,*worki,symmetric=1,one=1;
   ptrdiff_t *off,*voff,mmp,q;
-  double *work,*ws=NULL,*Cost,*cost,*x0,*x1,*p0,*p1,*p2,x;
+  double *work,*ws=NULL,*Cost,*cost,*x0,*x1,*p0,*p1,x;
   unsigned long long ht[256];
   SM **sm,*SMstack;
   #ifndef OPENMP_ON
@@ -1678,20 +1818,22 @@
       if (j==dt[i]-1) pd[i] = p[q]; /* the relevant dimension for deciding product ordering */
       off[q+1] = off[q] + p[q] * (ptrdiff_t) m[q]; /* submatrix start offsets */
       if (j==0) pt[i] = p[q]; else pt[i] *= p[q]; /* term dimension */
-      //if (maxm<m[q] && m[q] < *n) maxm=m[q]; /* most rows of a non-dense sub-matrix */
-      //if (maxmp<p[q]) maxmp=p[q];
     } 
-    if (qc[i]>0) voff[i+1] = voff[i] + pt[i]; else voff[i+1] = voff[i]; /* start of ith v vector */
+    if (qc[i]==0) voff[i+1] = voff[i]; else if (qc[i]>0) voff[i+1] = voff[i] + pt[i]; else {
+      ri = (int) round(v[voff[i]]); /* number of contrasts in this KP contrast */
+      voff[i+1] = voff[i] + ri + 2; 
+    } /* start of ith v vector */
     if (maxp<pt[i]) maxp=pt[i];
-    //if (qc[i]<=0) tps[i+1] = tps[i] + pt[i]; /* where ith terms starts in param vector */ 
-    //else tps[i+1] = tps[i] + pt[i] - 1; /* there is a tensor constraint to apply - reducing param count*/
-    //tpsu[i+1] = tpsu[i] + pt[i]; /* where ith term starts in unconstrained param vector */ 
   }
   for (kk=i=j=0;j<*nrs;j++) {
     r = rs[j];
     tpsr[r] = kk;tpsur[r] = i;
-    if (qc[r]<=0) kk += pt[r]; /* where rth terms starts in param vector */ 
-    else kk += pt[r] - 1; /* there is a tensor constraint to apply - reducing param count*/
+    if (qc[r]==0) kk += pt[r]; /* where rth terms starts in param vector */ 
+    else if (qc[r]>0) kk += pt[r] - 1; /* there is a tensor constraint to apply - reducing param count*/
+    else { /* Kronecker product of sum-to-zero contrasts */
+      ri = (int) round(v[voff[r]]); /* number of contrasts */
+      kk += pt[r] - (int) round(v[voff[r]+ri+1]); /* subtracting number of constraints */
+    }	
     i += pt[r]; /* where rth term starts in unconstrained param vector */ 
   }
   ptot = kk;//tpsr[*nrs]; /* rows of computed XWX post constraint */
@@ -1699,8 +1841,12 @@
   for (kk=i=j=0;j<*ncs;j++) {
     c = cs[j];
     tpsc[c] = kk;tpsuc[c] = i;
-    if (qc[c]<=0) kk += pt[c]; /* where cth terms starts in param vector */ 
-    else kk += pt[c] - 1; /* there is a tensor constraint to apply - reducing param count*/
+    if (qc[c]==0) kk += pt[c]; /* where cth terms starts in param vector */ 
+    else if (qc[c]>0) kk += pt[c] - 1; /* there is a tensor constraint to apply - reducing param count*/
+    else { /* Kronecker product of sum-to-zero contrasts */
+      ri = (int) round(v[voff[c]]); /* number of contrasts */
+      kk += pt[c] - (int) round(v[voff[c]+ri+1]); /* subtracting number of constraints */
+    }	
     i +=  pt[c]; /* where cth term starts in unconstrained param vector */ 
   }
   
@@ -1803,22 +1949,27 @@
     /* NOTE: above will write directly to oversized XWX, then have constraints applied post-hoc. */ 
   } /* block loop */
 
+  FREE(work);
+  work = (double *)CALLOC((size_t) 2*nxwx,sizeof(double)); /* working for Ztb */
+
   /* now XWX contains the unconstrained X'WX, but the constraints have to be applied to blocks involving tensor products */
-  for (ri=0;ri < *nrs;ri++) {
+  for (ri=0;ri < *nrs;ri++) { /* loop over required block rows */
     if (symmetric) ci=ri; else ci = 0;
-    for (;ci< *ncs;ci++) {
+    for (;ci< *ncs;ci++) { /* and over required block cols */
       /* if Xr is tensor, may need to apply constraint */
       r = rs[ri];c = cs[ci];
-      if (dt[r]>1&&qc[r]>0) { /* first term is a tensor with a constraint */
+      if (dt[r]>1&&qc[r]!=0) { /* first term is a tensor with a constraint */
       /* col by col form (I-vv')xwx, dropping first row... */
+      /* col by col form Z' xwx where Z is constraint matrix */
         for (j=0;j<pt[c];j++) { /* loop over columns */
           x0 = XWX + tpsur[r] + (tpsuc[c]+j) * (ptrdiff_t) nxwx; /* jth col of raw block */
 	  x1 = XWX + tpsr[r] + (tpsuc[c]+j) * (ptrdiff_t) nxwx;   /* jth col of constrained block */
-	  for (x=0.0,p0=x0,p1=x0+pt[r],p2=v+voff[r];p0<p1;p0++,p2++) x+= *p0 * *p2; 
-	  for (p2=v+voff[r]+1,p1=x0+pt[r],x0++;x0<p1;x1++,x0++,p2++) *x1 = *x0 - *p2 * x;
+	  //for (x=0.0,p0=x0,p1=x0+pt[r],p2=v+voff[r];p0<p1;p0++,p2++) x+= *p0 * *p2; /* inner product of col and HH vector, v */
+	  //for (p2=v+voff[r]+1,p1=x0+pt[r],x0++;x0<p1;x1++,x0++,p2++) *x1 = *x0 - *p2 * x; /* HH transformed to new col */
+	  Ztb(x1,x0,v+voff[r],qc+r,&one,pt+r,work);
         }
-        pa = pt[r]-1;
-      } else {
+        pa = pt[r]-1; /* number of block rows after constraint */ 
+      } else { /* block may still need to be shifted because of prior constraints */
         pa = pt[r];
         if (tpsur[r]!=tpsr[r]) { /* still need to shift rows upwards */
           for (j=0;j<pt[c];j++) { /* loop over columns */
@@ -1828,13 +1979,15 @@
 	  }  
         }	
       }  
-      if (dt[c]>1&&qc[c]>0) { /* Xc term is a tensor with a constraint */
+      if (dt[c]>1&&qc[c]!=0) { /* Xc term is a tensor with a constraint */
         /* row by row form xwx(I-vv') dropping first col... */
+	/* row by row form xwx Z, where Z is constraint matrix */
         for (j=0;j<pa;j++) { /* work down rows */
           x0 = XWX + tpsr[r] + j + tpsuc[c] * (ptrdiff_t) nxwx; /* jth col of raw block */
 	  x1 = XWX + tpsr[r] + j + tpsc[c] * (ptrdiff_t) nxwx;   /* jth col of constrained block */
-	  for (x=0.0,p0=x0,p1=v+voff[c],p2=p1+pt[c];p1<p2;p0 += nxwx,p1++) x += *p0 * *p1;
-	  for (p1=v+voff[c]+1,x0+= nxwx;p1<p2;x1 += nxwx,x0+= nxwx,p1++) *x1 = *x0 - *p1 *x;
+	  //for (x=0.0,p0=x0,p1=v+voff[c],p2=p1+pt[c];p1<p2;p0 += nxwx,p1++) x += *p0 * *p1; /* inner product of row and HH vector, v */
+	  //for (p1=v+voff[c]+1,x0+= nxwx;p1<p2;x1 += nxwx,x0+= nxwx,p1++) *x1 = *x0 - *p1 *x;
+	  Ztb(x1,x0,v+voff[c],qc+c,&nxwx,pt+c,work);
         }
       } else if (tpsuc[c]!=tpsc[c]) { /* still need to shift cols leftwards */
         for (j=0;j<pa;j++) { /* work down rows */
@@ -1853,3 +2006,4 @@
   FREE(work); if (tri) FREE(ws);//FREE(xwx);FREE(xwx0);
   FREE(B);FREE(R);FREE(C);FREE(sb);FREE(Cost);FREE(cost);FREE(b);FREE(sm);FREE(SMstack);FREE(worki);
 } /* XWXd1 */ 
+
diff -Nru mgcv-1.8-40/src/gdi.c mgcv-1.8-41/src/gdi.c
--- mgcv-1.8-40/src/gdi.c	2022-02-01 10:18:29.000000000 +0000
+++ mgcv-1.8-41/src/gdi.c	2022-07-06 11:02:55.000000000 +0000
@@ -1377,7 +1377,7 @@
    there are n_theta theta parameters.
 
    beta is a q vector
-   b1 is r by (M+n_theta)
+   b1 is r by (M+n_theta) dependence on theta is in earlier columns
    b2 is r by n_2dCols 
 */
 { int n_2dCols,i,j,k,one=1,bt,ct,ntot,kk;
@@ -2123,9 +2123,9 @@
           Det_th,Det2_th,Det3,Det_th2,
           b1,b2,eta1,eta2,
           n,&rank,M,n_theta,rSncol,&deriv2,&neg_w,&nr);
-    
-    if (*M>0) {
-      i=0;mgcv_mmult(work,Rh,b1,&i,&i,&rank,M,&rank); /* Rh db/drho */
+    /* note that theta dependencies are stored before sp dependencies in b1 etc */
+    if (*M>0) { 
+      i=0;mgcv_mmult(work,Rh,b1 + *n_theta * rank,&i,&i,&rank,M,&rank); /* Rh db/drho */
       /* Now obtain dVkk = db'/drho Rh' Rh db/drho ... */
       getXtX(dVkk,work,&rank,M);  
     }
@@ -2361,6 +2361,7 @@
    * sp is an M array of smoothing parameters (NOT log smoothing parameters)
    * z, w and wf are n-vectors of the pseudodata iterative newton weights and iterative 
      fisher weights (only if `fisher' is zero) 
+   * w1 is deriv of w w.r.t. log sp if REML=1 and deriv of wf w.r.t. log sp otherwise 
    * p_weights is an n-vector of prior weights (as opposed to the iterative weights in w)
    * mu and y are n-vectors of the fitted values and data.
    * g1,g2,g3,g4 are the n-vectors of the link derivatives 
diff -Nru mgcv-1.8-40/src/init.c mgcv-1.8-41/src/init.c
--- mgcv-1.8-40/src/init.c	2022-02-01 10:18:29.000000000 +0000
+++ mgcv-1.8-41/src/init.c	2022-09-23 13:49:19.000000000 +0000
@@ -30,6 +30,11 @@
   {"stmm",(DL_FUNC)&stmm,1},
   {"AddBVB",(DL_FUNC)&AddBVB,3},
   {"isa1p",(DL_FUNC)&isa1p,3},
+  {"mrow_sum",(DL_FUNC)&mrow_sum,3},
+  {"ncv",(DL_FUNC)&ncv,17},
+  {"Rncv",(DL_FUNC)&Rncv,19},
+  {"ncvls",(DL_FUNC)&ncvls,18},
+  {"Rncvls",(DL_FUNC)&Rncvls,19},
   {NULL, NULL, 0}
 };
 
@@ -38,7 +43,7 @@
     {"davies",(DL_FUNC) davies,10},
     {"tri_chol",(DL_FUNC) tri_chol,4},
     {"diagXVXt", (DL_FUNC) &diagXVXt,21},
-    {"XWXd", (DL_FUNC) &XWXd,18},
+    //    {"XWXd", (DL_FUNC) &XWXd,18},
     {"XWXd0", (DL_FUNC) &XWXd0,18},
     {"XWXd1", (DL_FUNC) &XWXd1,22},
     {"XWyd", (DL_FUNC) &XWyd,21},
@@ -95,6 +100,9 @@
     {"mgcv_pqr",(DL_FUNC)&mgcv_pqr,6},
     {"getRpqr",(DL_FUNC)&getRpqr,6},
     {"mgcv_pqrqy",(DL_FUNC)&mgcv_pqrqy,8},
+    {"minres",(DL_FUNC)&minres,7},
+    {"Zb",(DL_FUNC)&Zb,6},
+    {"Ztb",(DL_FUNC)&Ztb,7},
     {NULL, NULL, 0}
 };
 
@@ -102,7 +110,7 @@
 {
     R_registerRoutines(dll, CEntries, CallMethods, NULL, NULL);
     R_useDynamicSymbols(dll, FALSE);
-    R_RegisterCCallable("mgcv","mgcv_pmmult2", (DL_FUNC) &mgcv_pmmult2); 
+    R_RegisterCCallable("mgcv","mgcv_pmmult2", (DL_FUNC) &mgcv_pmmult2); // allows calling from other packages
     R_RegisterCCallable("mgcv","pls_fit1", (DL_FUNC) &pls_fit1);
     R_RegisterCCallable("mgcv","gdi2", (DL_FUNC) &gdi2); 
 }
diff -Nru mgcv-1.8-40/src/Makevars mgcv-1.8-41/src/Makevars
--- mgcv-1.8-40/src/Makevars	2022-02-01 10:18:29.000000000 +0000
+++ mgcv-1.8-41/src/Makevars	2022-07-06 11:02:55.000000000 +0000
@@ -2,5 +2,8 @@
 PKG_CFLAGS = $(SHLIB_OPENMP_CFLAGS)
 ## *Both* the above must be *uncommented* for release
 
-#PKG_CFLAGS = -Wall -pedantic $(SHLIB_OPENMP_CFLAGS)
+#PKG_CFLAGS = -g -O0 -Wall -pedantic $(SHLIB_OPENMP_CFLAGS)
+## Actually it now seems that you need to reset optimization flag in
+## /usr/local/lib/R/etc/Makeconf
+## This file can add flags but not modify what's in the above!
 ## `#' out previous line for release (but not without uncommenting openMP)
diff -Nru mgcv-1.8-40/src/mat.c mgcv-1.8-41/src/mat.c
--- mgcv-1.8-40/src/mat.c	2022-03-22 09:38:40.000000000 +0000
+++ mgcv-1.8-41/src/mat.c	2022-07-06 11:02:55.000000000 +0000
@@ -458,6 +458,34 @@
 		B, &lda,C, &ldb,&beta, A, &ldc FCONE FCONE);
 } /* end mgcv_mmult */
 
+SEXP mrow_sum(SEXP x,SEXP M, SEXP K) {
+/* X is n by p matrix, m and k are integer vectors   
+   B is m[length(m)-1] by p output matrix.
+   B[i,] is sum of X[k[j],] for j in m[i-1]:(m[i]-1) (m[-1]=0) 
+   .Called from mat.rowsum
+*/
+  int i,j,p,n,nm,*m,*k,*kp,*p1;
+  double *X,xx,*B;
+  SEXP b;
+  nm = length(M);
+  X = REAL(x);
+  M = PROTECT(coerceVector(M,INTSXP));
+  K = PROTECT(coerceVector(K,INTSXP)); /* otherwise R might be storing as double on entry */
+  m = INTEGER(M); k = INTEGER(K);
+  p = ncols(x); n = nrows(x);
+  b = PROTECT(allocMatrix(REALSXP,nm,p));
+  B = REAL(b);
+  for (j=0;j<p;j++,X+=n) {
+    for (kp=k,i=0;i<nm;i++) {
+      for(xx=0.0,p1=k+m[i];kp<p1;kp++) xx += X[*kp];
+      *B = xx;B++;
+    }  
+  }  
+  UNPROTECT(3);
+  return(b); 
+} /* mrow_sum */  
+
+
 SEXP mgcv_madi(SEXP a, SEXP b,SEXP ind,SEXP diag) {
 /* Performs 
      a[ind,ind] <- a[ind,ind] + b
@@ -1822,11 +1850,11 @@
    Givens rotations are of form [c,-s] where c = cos(theta), s = sin(theta). 
                                 [s,c]
 
-   Currently does not check that result of down date is positive definite. 
- 
    Assumes R upper triangular, and that it is OK to use first two columns 
    below diagonal as temporary strorage for Givens rotations (the storage is 
    needed to ensure algorithm is column oriented). 
+
+   For downdate returns a negative value in R[1] (R[1,0]) if not +ve definite.
 */ 
   double c0,s0,*c,*s,z,*x,z0,*c1;
   int j,j1,n1;
@@ -3621,7 +3649,8 @@
   FREE(z);
   FREE(err);
   if (vlength) FREE(v);
-  for (i=0;i< *n+1;i++) if (q[i]) FREE(q[i]);FREE(q);  
+  for (i=0;i< *n+1;i++) if (q[i]) FREE(q[i]);
+  FREE(q);  
   *n = j; /* number of iterations taken */
   #ifdef OMP_REPORT
   Rprintf("done\n");
diff -Nru mgcv-1.8-40/src/matrix.c mgcv-1.8-41/src/matrix.c
--- mgcv-1.8-40/src/matrix.c	2022-02-01 10:18:29.000000000 +0000
+++ mgcv-1.8-41/src/matrix.c	2022-10-18 13:59:48.000000000 +0000
@@ -246,7 +246,7 @@
   }
 } /* mcopy */
 
-void matmult(C,A,B,tA,tB) matrix C,A,B;int tA,tB;
+void matmult(matrix C,matrix A,matrix B,int tA,int tB)
 
 /* Puts A*B in C. A will be transposed in this calculation if tA is not zero.
    B will be transposed if tB is not zero */
@@ -374,7 +374,7 @@
 
 
 
-double dot(a,b) matrix a,b;
+double dot(matrix a,matrix b)
 
 { int i,k=0;double c=0.0,*p,*p1;
   if (a.vec) { p1=b.V;for (p=a.V;p<a.V+a.c*a.r;p++) c+=(*p)*(*p1++);}
@@ -387,7 +387,7 @@
 
 
 
-double enorm(d) matrix d;
+double enorm(matrix d)
 
 /* Euclidian norm of vector d, or rms for matrix */
 
@@ -409,7 +409,7 @@
 
 
 
-void householder(u,a,b,t1) matrix *u,a,b;int t1;
+void householder(matrix *u,matrix a,matrix b,int t1)
 
 /* transforms a to b, iff they are of equal Euclidian length. u is the
    (t1+1) vector such that the full post multiplying householder matrix is
@@ -425,7 +425,7 @@
 
 
 
-void Hmult(C,u) matrix C,u;
+void Hmult(matrix C,matrix u)
 
 /* This routine is for post multiplication by Housholder matrices only */
 
@@ -446,7 +446,7 @@
   freemat(Cu);
 } /* Hmult */
 
-void HQmult(C,U,p,t) matrix C,U;int p,t;
+void HQmult(matrix C,matrix U,int p,int t)
 
 /* This routine is for multiplying by householder matrices, stored as a
    series of vectors of the type u defined in routine householder above.
@@ -515,7 +515,7 @@
 } /* HQmult */
 
 
-void QT(Q,A,fullQ) matrix Q,A;int fullQ;
+void QT(matrix Q,matrix A,int fullQ)
 
 /* Uses householder matrices to perform the factorization of A (nxm),n<=m: */
 /*                  AQ=[0,T]  where Tij=0 if i+j<n for T an (nxn) matrix   */
diff -Nru mgcv-1.8-40/src/matrix.h mgcv-1.8-41/src/matrix.h
--- mgcv-1.8-40/src/matrix.h	2022-02-01 10:18:29.000000000 +0000
+++ mgcv-1.8-41/src/matrix.h	2022-09-30 07:59:03.000000000 +0000
@@ -12,7 +12,7 @@
 extern matrix null_mat;
 
 extern long matrallocd;
-
+#endif
 /* The user routines */
 
 void mcopy(matrix *A,matrix *B);
@@ -36,4 +36,4 @@
 matrix Rmatrix(double *A,int r,int c);
 matrix initvec(int rows);
 
-#endif
+
diff -Nru mgcv-1.8-40/src/mgcv.c mgcv-1.8-41/src/mgcv.c
--- mgcv-1.8-40/src/mgcv.c	2022-02-01 10:18:29.000000000 +0000
+++ mgcv-1.8-41/src/mgcv.c	2022-10-18 13:59:48.000000000 +0000
@@ -63,7 +63,7 @@
 }
 
 
-matrix getD(h,nak) matrix h;int nak;
+matrix getD(matrix h,int nak)
 
 /* the matrix mapping the value of the spline to the gradients at the knots.
    nak is true for 'not-a-knot' end conditions at the early end, otherwise
@@ -165,7 +165,7 @@
   if (hi)
   { for (j=0;j<n+1;j++) A->M[4*n][j]=0.0;
     if (up) A->M[4*n+lo][n]=-1.0; else A->M[4*n+lo][0]=-1.0;
-    b->V[4*n+lo]=upper;
+    b->V[4*n+lo] = -upper;
   }
   freemat(D);
   freemat(h);
diff -Nru mgcv-1.8-40/src/mgcv.h mgcv-1.8-41/src/mgcv.h
--- mgcv-1.8-40/src/mgcv.h	2022-02-01 10:18:29.000000000 +0000
+++ mgcv-1.8-41/src/mgcv.h	2022-09-23 13:05:29.000000000 +0000
@@ -126,9 +126,9 @@
             double *dH,int *deriv,int *nsp,int *nt);
 
 /* discretized covariate methods */
-void XWXd(double *XWX,double *X,double *w,int *k,int *ks, int *m,int *p, int *n, int *nx, 
-          int *ts, int *dt, int *nt,double *v,int *qc,int *nthreads,int *ar_stop,
-          int *ar_row,double *ar_weights);
+//void XWXd(double *XWX,double *X,double *w,int *k,int *ks, int *m,int *p, int *n, int *nx, 
+//          int *ts, int *dt, int *nt,double *v,int *qc,int *nthreads,int *ar_stop,
+//          int *ar_row,double *ar_weights);
 void XWXd0(double *XWX,double *X,double *w,int *k,int *ks, int *m,int *p, int *n, int *nx, 
           int *ts, int *dt, int *nt,double *v,int *qc,int *nthreads,int *ar_stop,
           int *ar_row,double *ar_weights);
@@ -211,6 +211,17 @@
 SEXP mgcv_Rpbacksolve(SEXP R, SEXP B,SEXP NT);
 SEXP mgcv_Rpcross(SEXP A, SEXP NT,SEXP NB);
 SEXP mgcv_madi(SEXP a, SEXP b,SEXP ind,SEXP diag);
+SEXP mrow_sum(SEXP x,SEXP M, SEXP K);
+SEXP ncv(SEXP x, SEXP hi, SEXP W1, SEXP W2, SEXP DB, SEXP DW, SEXP rS, SEXP IND, SEXP MI,SEXP M,
+	 SEXP K, SEXP BETA, SEXP SP, SEXP ETA, SEXP DETA,SEXP DLET,SEXP DERIV);
+SEXP Rncv(SEXP x, SEXP r, SEXP W1, SEXP W2, SEXP DB, SEXP DW, SEXP rS, SEXP IND, SEXP MI, SEXP M, SEXP K,SEXP BETA, SEXP SP, SEXP ETA,
+	  SEXP DETA,SEXP DLET,SEXP DERIV,SEXP EPS,SEXP NT);
+SEXP ncvls(SEXP x,SEXP JJ,SEXP h,SEXP hi,SEXP dH,SEXP L1, SEXP L2,SEXP L3,SEXP IND, SEXP MI, SEXP M, SEXP K,SEXP BETA,
+	   SEXP ETACV,SEXP DETACV,SEXP DETA,SEXP DB,SEXP DERIV);
+SEXP Rncvls(SEXP x,SEXP JJ,SEXP R1,SEXP dH,SEXP L1, SEXP L2,SEXP L3,SEXP IND, SEXP MI, SEXP M, SEXP K,SEXP BETA,
+	    SEXP ETACV,SEXP DETACV,SEXP DETA,SEXP DB,SEXP DERIV,SEXP EPS,SEXP NT);
+void chol_up(double *R,double *u, int *n,int *up,double *eps);
+void minres(double *R, double *u,double *b, double *x, int *p,int *m,double *work);
 
 /* sparse matrix routines */
 SEXP isa1p(SEXP L,SEXP S,SEXP NT);
@@ -274,4 +285,7 @@
              double *diagA,double *lb,int *n,double *tol);
 void sspl_mapply(double *y,double *x,double *w,double *U,double *V,int *n,int *nf,double *tol,int *m);
 
-
+/* just for testing */
+void Zb(double *b1,double *b0,double *v,int *qc, int *p,double *w);
+void Ztb(double *b1,double *b0,double *v,int *qc,int *di, int *p,double *w);
+  
diff -Nru mgcv-1.8-40/src/misc.c mgcv-1.8-41/src/misc.c
--- mgcv-1.8-40/src/misc.c	2022-02-01 10:18:29.000000000 +0000
+++ mgcv-1.8-41/src/misc.c	2022-07-06 11:02:55.000000000 +0000
@@ -758,4 +758,3 @@
    mgcv:::rwMatrix(stop,row,w,X)
    
 */
-
diff -Nru mgcv-1.8-40/src/ncv.c mgcv-1.8-41/src/ncv.c
--- mgcv-1.8-40/src/ncv.c	1970-01-01 00:00:00.000000000 +0000
+++ mgcv-1.8-41/src/ncv.c	2022-10-18 13:59:48.000000000 +0000
@@ -0,0 +1,1621 @@
+/* Copyright (C) 2022 Simon N. Wood  simon.wood@r-project.org
+
+This program is free software; you can redistribute it and/or
+modify it under the terms of the GNU General Public License
+as published by the Free Software Foundation; either version 2
+of the License, or (at your option) any later version.
+
+This program is distributed in the hope that it will be useful,
+but WITHOUT ANY WARRANTY; without even the implied warranty of
+MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+GNU General Public License for more details.
+(www.gnu.org/copyleft/gpl.html)
+
+You should have received a copy of the GNU General Public License
+along with this program; if not, write to the Free Software
+Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA  02111-1307,
+USA. */
+
+#include <stdlib.h>
+#include <stdio.h>
+#include <math.h>
+
+#ifdef _OPENMP // needs to precede R.h (and mgcv.h)
+#include <omp.h>
+#endif
+
+//#include <Rmath.h>
+//#include <Rinternals.h>
+//#include <Rconfig.h>
+#include <R.h>
+#include "mgcv.h"
+
+
+void minres0(double *R, double *u,double *b, double *x, int *p,int *m) {
+/* Brute force alternative to minres for testing purposes */ 
+  double *A,xx,zz,*work,workq;
+  int p2,j,one=1,*ipiv,lwork=-1;
+  char ntrans = 'N',trans='T',uplo='U',diag='N',side='L';
+  p2 = *p * *p;
+  A = (double *)CALLOC((size_t) p2,sizeof(double));
+  ipiv = (int *)CALLOC((size_t) *p,sizeof(int));
+  for (j=0;j<p2;j++) A[j] = R[j];
+  xx=1.0;
+  F77_CALL(dtrmm)(&side,&uplo,&trans,&diag,p,p,&xx,R,p,A,p FCONE FCONE FCONE FCONE); /* A = R'R */
+  zz = -1.0;
+  F77_CALL(dsyrk)(&uplo,&ntrans,p,m,&zz,u,p,&xx,A,p FCONE FCONE); /* A = R'R - uu' */
+  for (j=0;j<*p;j++) x[j] = b[j];
+  F77_CALL(dsysv)(&uplo,p,&one,A,p,ipiv,x,p,&workq,&lwork,&j FCONE);
+  lwork=floor(workq);if (lwork<workq) lwork++;
+  work = (double *)CALLOC((size_t) lwork,sizeof(double));
+  F77_CALL(dsysv)(&uplo,p,&one,A,p,ipiv,x,p,work,&lwork,&j FCONE);
+  FREE(A);FREE(ipiv);FREE(work);
+}
+
+void woodbury(double *R, double *u,double *b, double *x, int *p,int *m,double *work,int *iwork) {
+/* Solves (R'R - uu')x=b where u is p by m and (R'R-uu') need not be positive definite, by
+   direct use of Woodbury identity 
+   (R'R - uu')^{-1} = R^{-1} (I_p - R^{-T}U(U^TR^{-1}R^{-T}U - I_m)^{-1}U^TR^{-1})R^{-T} )
+   uses dgemv, dgemm, dtrsv, dtrsm, dsysv
+
+   iwork is length m and nwork = iwork[0] on entry
+   work is length p*m + m*m + m + nwork
+
+   nwork should really be set by a preliminary call to ssysv. It is supposed to be m*nb 
+   where nb is optimal block size. Ideally a one off call would be made setting nwork = -1 
+     F77_CALL(dsysv)(&uplo,m,&one,B,m,iwork,v,m,work,nwork,&i FCONE FCONE);
+   then *work is the optimal nwork on exit, while B and v are not accessed.
+*/
+  char side = 'L',trans = 'T',ntrans='N',uplo='U',diag='N';
+  double done=1.0,*RitU,dzero=0.0,*B,*p0,*v,xx;
+  int i,pm,one=1,nwork;
+  pm = *p * *m;
+  RitU = work;
+  work += pm;
+  B = work; work += *m * *m;
+  v = work; work += *m;
+  nwork = *iwork;
+  for (i=0;i<pm;i++) RitU[i] = u[i];
+  F77_CALL(dtrsm)(&side,&uplo,&trans,&diag,p,m,&done,R,p,RitU,p FCONE FCONE FCONE FCONE); /* form R^{-T}U */
+  F77_CALL(dgemm)(&trans,&ntrans,m,m,p,&done,RitU,p,RitU,p,&dzero,B,m FCONE FCONE); /* form U^TR^{-1}R^{-T}U */
+  for (p0=B,i=0;i < *m;i++,p0 += *m+1) *p0 += -1.0; /* B = U^TR^{-1}R^{-T}U - I */
+  for (i=0;i<*p;i++) x[i] = b[i];
+  F77_CALL(dtrsv)(&uplo,&trans,&diag,p,R,p,x,&one FCONE FCONE FCONE); /* x = R^{-T} b */
+  F77_CALL(dgemv)(&trans,p,m,&done,RitU,p,x,&one,&dzero,v,&one FCONE); /* v = U^TR^{-1})R^{-T} b */
+  F77_CALL(dsysv)(&uplo,m,&one,B,m,iwork,v,m,work,&nwork,&i FCONE); /* v = B^{-1}U^TR^{-1})R^{-T} b */
+  xx = -1.0;
+  F77_CALL(dgemv)(&ntrans,p,m,&xx,RitU,p,v,&one,&done,x,&one FCONE); /* x = (I-R^{-T}UB^{-1}U^TR^{-1})R^{-T} b */
+  F77_CALL(dtrsv)(&uplo,&ntrans,&diag,p,R,p,x,&one FCONE FCONE FCONE); /* x = R^{-1}(I-R^{-T}UB^{-1}U^TR^{-1})R^{-T} b */
+  
+} /* woodbury */ 
+
+void minres(double *R, double *u,double *b, double *x, int *p,int *m,double *work) {
+/* solves (R'R - uu')x=b where u is p by m and (R'R-uu') need not be positive definite. 
+   Use R as pre-conditioner. R'x0* = b, we solve (I - u*u*') x* = R^{-T}b
+   where x = R^{-1}x* and u* = R^{-T}u. R is upper triangular.   
+
+   work is p * (m+7) + m
+
+   Note that m is over-written with number of iterations on output.
+*/
+  int one=1,i,j;
+  double *v,*z,xx,zz,*u1,*dum,beta1,beta2,eta,epsilon,sig0,sig1,sig2,gamma0,gamma1,gamma2,
+    *v1,*v2,alpha,delta,rho1,rho2,rho3,maxb,*w,*w1,*w2,*pp;
+  char ntrans = 'N',trans='T',uplo='U',diag='N',side='L';
+  u1 = work; work += *p * *m;
+  v = work;work += *p;
+  v1 = work;work += *p;
+  v2 = work;work += *p;
+  w = work;work += *p;
+  w1 = work;work += *p;
+  w2 = work;work += *p;
+  z = work;work += *p;
+  dum = work; work += *m;
+  for (maxb=0.0,i=0;i<*p;i++) {
+    xx = x[i] = b[i];maxb += xx*xx; 
+  }
+  maxb = sqrt(maxb);
+  F77_CALL(dtrsv)(&uplo,&trans,&diag,p,R,p,x,&one FCONE FCONE FCONE); /* Solve R'x0* = b */
+  xx = 1.0;
+  for (i=0;i < *p * *m;i++) u1[i] = u[i];
+  F77_CALL(dtrsm)(&side,&uplo,&trans,&diag,p,m,&xx,R,p,u1,p FCONE FCONE FCONE FCONE); /* Solve R'u1 = u */
+  /* x currently contains R^{-T}b, form v = x - (I-u1u1')x = u1u1'x */
+  zz = 0.0;
+  F77_CALL(dgemv)(&trans,p,m,&xx,u1,p,x,&one,&zz,dum,&one FCONE); /* dum = u1'x */
+  F77_CALL(dgemv)(&ntrans,p,m,&xx,u1,p,dum,&one,&zz,v1,&one FCONE); /* v1 = u1u1'x */
+  for (beta1=0.0,i=0;i<*p;i++) { xx=v1[i]; beta1 += xx*xx;}
+  epsilon = eta = beta1 = sqrt(beta1); /* beta1 = \\v1\\ */
+  gamma0 = gamma1 = 1.0;sig0 = sig1 = 0.0;
+  for (i=0;i < *p ;i++) v[i] = w1[i] = w[i] = 0.0;
+  for (j=0;j<200;j++) {
+    for (i=0;i<*p;i++) { v1[i] /= beta1;z[i]=v1[i];}
+    xx = 1.0; zz = 0.0;
+    F77_CALL(dgemv)(&trans,p,m,&xx,u1,p,v1,&one,&zz,dum,&one FCONE); /* dum = u1'v1 */
+    zz = 1.0; xx = -1.0;
+    F77_CALL(dgemv)(&ntrans,p,m,&xx,u1,p,dum,&one,&zz,z,&one FCONE); /* z = (I-u1u1')v1 */
+    for (alpha=0.0,i=0;i<*p;i++) alpha += v1[i]*z[i];  /* alpha = v1'z = v1'(I-u1u1')v1 */
+    for (beta2=0.0,i=0;i<*p;i++) {
+      xx = v2[i] = z[i] - alpha * v1[i] - beta1 * v[i];
+      beta2 += xx*xx;
+    }
+    delta = gamma1 * alpha - gamma0 * sig1 * beta1;
+    rho1 = sqrt(delta*delta+beta2);
+    beta2 = sqrt(beta2); /* beta_2 = ||v2|| */
+    rho2 = sig1 * alpha + gamma0 * gamma1 * beta1;
+    rho3 = sig0*beta1;
+    gamma2 = delta/rho1;
+    sig2 = beta2/rho1;
+    xx = gamma2*eta;
+    for (i=0;i<*p;i++) {
+      w2[i] = (v1[i] - rho3*w[i] - rho2*w1[i])/rho1;
+      x[i] += xx * w2[i];  
+    }
+    epsilon *= fabs(sig2);
+    if (epsilon<maxb*1e-10) break;
+    eta *= -sig2;
+ 
+    pp = v; v = v1; v1 = v2; v2 = pp;
+    pp = w; w = w1; w1 = w2; w2 = pp;
+    sig0 = sig1; sig1 = sig2; beta1 = beta2;
+    gamma0 = gamma1; gamma1 = gamma2;
+  }
+  F77_CALL(dtrsv)(&uplo,&ntrans,&diag,p,R,p,x,&one FCONE FCONE FCONE); /* Solve R'x = x* */
+  *m=j;
+} /* minres */
+
+int CG(double *A,double *Mi,double *b, double *x,int n,double tol,double *cgwork) {
+/* Basic pre-conditioned conjugate gradient solver for Ax = b where A is n by n 
+   and Mi is the pre-conditioner. tol is the convergence tolerance. On exit x is 
+   the approximate solution, and the return value is the number of iterations used.
+   cgwork is an n*5 vector. 
+*/
+  int i,one=1,k;
+  double *r,*z,*p,*r1,*z1,c1,c2,bmax=0.0,rmax,alpha,rz,r1z1,pAp,*dum,beta;
+  char ntrans = 'N';
+  p = cgwork;
+  r = p + n;r1 = r + n;z=r1 + n;z1 = z + n;
+  /* dgemv(char *trans,int *m, int *n,double a,double *A,int *lda,double *x,int *dx,
+         double *b, double *y,int *dy)
+       trans='T' to transpose A, 'N' not to. A is m by n. Forms y = a*A'x + b*y, or
+       y = a*Ax + b*y. lda is number of actual rows in A (to allow for sub-matrices)
+       dx and dy are increments of x and y indices.  */
+  for (i=0;i<n;i++) {
+    c1 = b[i]; r[i] = c1; /* copy b to r for gemv call below */
+    c1 = fabs(c1);
+    if (c1>bmax) bmax = c1; /* find max abs b for convergence testing */
+  }
+  c1 = -1.0;c2=1.0;
+  F77_CALL(dgemv)(&ntrans,&n,&n,&c1,A,&n,x,&one,&c2,r,&one FCONE); /* r = b - Ax */
+  c1 = 0.0;
+  F77_CALL(dgemv)(&ntrans,&n,&n,&c2,Mi,&n,r,&one,&c1,z,&one FCONE); /* z = Mi r */
+  for (i=0;i<n;i++) p[i] = z[i];
+  c1=1.0;c2=0.0;
+  for (k=0;k<200;k++) {  
+    F77_CALL(dgemv)(&ntrans,&n,&n,&c1,A,&n,p,&one,&c2,z1,&one FCONE); /* z1 = Ap */
+    for (rz=0.0,pAp=0.0,i=0;i<n;i++) {rz += r[i]*z[i];pAp += p[i] * z1[i];} /*r'z & p'Ap*/
+    if (pAp==0.0) {k=-k;break;} /* failed, A possibly not +ve def */
+    alpha = rz/pAp;
+    for (rmax=0.0,i=0;i<n;i++) {
+      x[i] += alpha * p[i];
+      r1[i] = r[i] - alpha * z1[i];
+      if (fabs(r1[i])>rmax) rmax = fabs(r1[i]);
+    }
+    if (rmax < tol*bmax) break;
+    F77_CALL(dgemv)(&ntrans,&n,&n,&c1,Mi,&n,r1,&one,&c2,z1,&one FCONE); /* z1 = Mi r1 */
+    for (r1z1=0.0,i=0;i<n;i++) r1z1 += r1[i] * z1[i];
+    if (rz==0.0) {k=-k;break;} /* failed, A possibly not +ve def */
+    beta = r1z1/rz;
+    for (i=0;i<n;i++) p[i] = z1[i] + beta * p[i];
+    dum = z1; z1 = z; z = dum;
+    dum = r1; r1 = r; r = dum;
+  }
+  return(k); /* number of steps taken */
+} /* CG */
+
+
+SEXP ncv(SEXP x, SEXP hi, SEXP W1, SEXP W2, SEXP DB, SEXP DW, SEXP rS, SEXP IND, SEXP MI, SEXP M, SEXP K,SEXP BETA, SEXP SP, SEXP ETA, SEXP DETA,SEXP DLET,SEXP DERIV) {
+/* Neighbourhood cross validation function. CG version - not optimal.
+   Return: eta - eta[i] is linear predictor of y[ind[i]] when y[ind[i]] and its neighbours are ommited from fit
+           deta - deta[i,j] is derivative of eta[ind[i]] w.r.t. log smoothing parameter j.
+   Input: X - n by p model matrix. Hi inverse penalized Hessian. H penalized Hessian. w1 = w1[i] X[i,j] is dl_i/dbeta_j to within a scale parameter.
+          w2 - -X'diag(w2)X is Hessian of log likelihood to within a scale parameter. db - db[i,j] is dbeta_i/d rho_j where rho_j is a log s.p. or 
+          possibly other parameter. dw - dw[i,j] is dw2[i]/drho_j. rS[[i]] %*% t(rS[[i]]) is ith smoothing penalty matrix. 
+          k[m[i-1]:(m[i])] index the points in the ith neighbourhood. m[-1]=0 by convention. 
+          Similarly ind[mi[i-1]:mi[i]] index the points whose linear predictors are to be predicted on dropping of the ith neighbourhood.
+          beta - model coefficients (eta=X beta if nothing dropped). sp the smoothing parameters. deriv==0
+          for no derivative calculations, deriv!=0 otherwise. 
+         
+   Basic idea: to approximate the linear predictor on omission of the neighbours of each point in turn, a single Newton step is taken from the full fit
+               beta, using the gradient and Hessian implied by omitting the neighbours. To keep the cost at O(np^2) a pre-conditioned conjugate gradient 
+               iteration is used to solve for the change in beta caused by the omission. 
+               The gradient of this step w.r.t. to each smoothing parameter can also be obtained, again using CG to avoid O(p^3) cost for each obs.
+               A point can be predicted several times with different omitted neighbourhoods. ind[i] is the point being predicted and eta[i] its prediction.
+               LOOCV is recovered if ind = 0:(n-1) and each points neighbourhood is just itself.     
+ */
+  SEXP S,kr;
+  int maxn,i,nsp,n,p,*m,*k,j,l,ii,i0,ki,q,p2,one=1,deriv,kk,error=0,jj,nm,*ind,nth,*mi,io,io0,no;
+  double *X,*g,*g1,*gp,*p1,*Hp,*Hi,*Xi,xx,*xip,*xip0,z,*Hd,w1ki,w2ki,*wXi,*d,*w1,*w2,*eta,
+    *deta,*beta,*dg,*dgp,*dwX,*wp,*wp1,*db=NULL,*dw=NULL,*rSj,*sp,*d1,*dbp,*dH=NULL,*xp,*wxp,*bp,*bp1,*dwXi,*cgwork,*dlet=NULL;
+  char trans = 'T',ntrans = 'N';
+  M = PROTECT(coerceVector(M,INTSXP));
+  MI = PROTECT(coerceVector(MI,INTSXP));
+  IND = PROTECT(coerceVector(IND,INTSXP));
+  K = PROTECT(coerceVector(K,INTSXP)); /* otherwise R might be storing as double on entry */
+  deriv = asInteger(DERIV);
+  mi = INTEGER(MI);m = INTEGER(M); k = INTEGER(K);ind = INTEGER(IND);
+  nsp = length(rS);
+  nth = ncols(DETA)-nsp; /* how many non-sp parameters are there - first cols of db and dw relate to these */
+  sp = REAL(SP);
+  w1=REAL(W1);w2=REAL(W2);
+  X = REAL(x);beta = REAL(BETA);
+  Hi=REAL(hi);eta = REAL(ETA);deta = REAL(DETA);
+  p = ncols(x); n = nrows(x);p2=p*p;
+  no = length(IND); /* number of output lp values */
+  nm = length(M); /* number of elements in cross validated eta - need not be n*/
+  Hp = (double *)CALLOC((size_t) p2,sizeof(double));
+  g = (double *)CALLOC((size_t) 3*p,sizeof(double));
+  g1 = g + p;dg = g1 + p;
+  d = (double *)CALLOC((size_t) 2*p,sizeof(double)); /* perturbation to beta on dropping y_i and its neighbours */
+  cgwork = (double *)CALLOC((size_t) p*5,sizeof(double));
+  d1 = d + p;
+  /* need to know largest neighbourhood */
+  maxn = ii = 0;
+  for (j=0;j<nm;j++) {
+    i = m[j]; if (i-ii>maxn) maxn = i-ii; ii = i;
+  }  
+  Xi = (double *)CALLOC((size_t) p*maxn,sizeof(double)); /* holds sub-matrix removed for this neighbourhood */
+  wXi = (double *)CALLOC((size_t) p*maxn,sizeof(double)); /* equivalent pre-multiplied by diag(w2) */
+  dwXi = (double *)CALLOC((size_t) p*maxn,sizeof(double)); /* equivalent pre-multiplied by d diag(w2)/d rho_j */
+  Hd = (double *)CALLOC((size_t) p2,sizeof(double));
+  dwX = (double *)CALLOC((size_t) p*n,sizeof(double));
+  /* create Hessian X'diag(w2)X + S_lambda... */
+  for (xip0 = X,xip=dwX,q=0;q<p;q++) for (wp=w2,wp1=wp+n;wp<wp1;wp++,xip++,xip0++) *xip = *xip0 * *wp;
+  xx=1.0;z=0.0;
+  F77_CALL(dgemm)(&trans,&ntrans,&p,&p,&n,&xx,X,&n,dwX,&n,&z,Hd,&p FCONE FCONE);
+  for (j=0;j<nsp;j++) {
+    S = VECTOR_ELT(rS, j);rSj = REAL(S);q = ncols(S);
+    F77_CALL(dgemm)(&ntrans,&trans,&p,&p,&q,sp+j,rSj,&p,rSj,&p,&xx,Hd,&p FCONE FCONE);
+  }  
+  if (deriv>0) { /* derivarives of Hessian, dH/drho_j, needed */
+    db=REAL(DB);dw=REAL(DW);dlet = REAL(DLET);
+    dH = (double *)CALLOC((size_t) p2*(nsp+nth),sizeof(double));
+    for (j=0;j<nsp+nth;j++) {
+      for (xip0 = X,xip=dwX,q=0;q<p;q++) for (wp=dw+n*j,wp1=wp+n;wp<wp1;wp++,xip++,xip0++) *xip = *xip0 * *wp;    
+      F77_CALL(dgemm)(&trans,&ntrans,&p,&p,&n,&xx,X,&n,dwX,&n,&z,dH+j*p2,&p FCONE FCONE); /* X'diag(dw[,j])X */
+      if (j>=nth) { /* it's a smoothing parameter */
+        S = VECTOR_ELT(rS, j-nth); /* Writing R Extensions 5.9.6 */
+        rSj = REAL(S);q = ncols(S);
+        F77_CALL(dgemm)(&ntrans,&trans,&p,&p,&q,sp+j-nth,rSj,&p,rSj,&p,&xx,dH+j*p2,&p FCONE FCONE); /* X'diag(dw[,j])X + lambda_j S_j */
+      }
+    } 
+  } else if (deriv<0) dlet = REAL(DLET); /* storage for returning coeff changes per fold */
+  FREE(dwX);
+  for (io=ii=0,i=0;i<nm;i++) { /* loop over neighbourhoods, k[ii] is start of neighbourhood of i */
+    p1 = g + p; /* fill accumulated g vector */
+    ki = k[ii];w1ki = w1[ki];w2ki = w2[ki];
+    i0=ii;io0=io; /* record of start needed in deriv calc */
+    for (xip0=xip=Xi,gp=g,xp=X,wxp=wXi;gp<p1;gp++,xp += n,xip += maxn,wxp += maxn) {
+      xx = xp[ki]; /* X[k[ii],j] */
+      *gp = w1ki * xx; /* g gradient of log lik */
+      *xip = xx; /* Xi matrix holding X[k[i],] */
+      *wxp = xx*w2ki; /* wXi matrix holding w2[k[i]]*X[k[i],] */
+    }
+    q=1; /* count rows of Xi */
+    for (xip0++,ii++;ii<m[i];ii++,xip0++,q++) { /* accumulate rest of g and Xi */ 
+      ki = k[ii];w1ki = w1[ki];w2ki = w2[ki];
+      for (xip=xip0,gp=g,xp=X,wxp=wXi+q;gp<p1;gp++,xp += n,xip += maxn,wxp += maxn) {
+	xx = xp[ki];
+	*gp += w1ki * xx;
+	*xip = xx;
+	*wxp = xx*w2ki; 
+      }
+    }  
+    /* Now assemble the perturbed Hessian for this i */
+    for (j=0;j<p2;j++) Hp[j] = Hd[j]; /* copy penalized Hessian */
+    xx = -1.0;z=1.0; /* subtract part for neighbours of point i */ 
+    /* dgemm(char *transa,char *transb,int *m,int *n,int *k,double *alpha,double *A,
+         int *lda, double *B, int *ldb, double *beta,double *C,int *ldc) 
+         transa/b = 'T' or 'N' for A/B transposed or not. C = alpha op(A) op(B) + beta C,
+         where op() is transpose or not. C is m by n. k is cols of op(A). ldx is rows of X
+         in calling routine (to allow use of sub-matrices) */
+    F77_CALL(dgemm)(&trans,&ntrans,&p,&p,&q,&xx,Xi,&maxn,wXi,&maxn,&z,Hp,&p FCONE FCONE);
+    /* dgemv(char *trans,int *m, int *n,double a,double *A,int *lda,double *x,int *dx,
+         double *b, double *y,int *dy)
+       trans='T' to transpose A, 'N' not to. A is m by n. Forms y = a*A'x + b*y, or
+       y = a*Ax + b*y. lda is number of actual rows in A (to allow for sub-matrices)
+       dx and dy are increments of x and y indices.  */
+    xx = 0.0;
+    F77_CALL(dgemv)(&ntrans,&p,&p,&z,Hi,&p,g,&one,&xx,d,&one FCONE); /* initial step Hi g */
+    kk=CG(Hp,Hi,g,d,p,1e-13,cgwork); /* d is approx change in beta caused by dropping y_i and its neighbours */
+    if (kk>error) error=kk;
+    /* now create the linear predictors for target points */
+    for (;io<mi[i];io++) {
+      for (xx=0.0,xip=X+ind[io],j=0;j<p;j++,xip += n) xx += *xip * (beta[j]-d[j]);  
+      eta[io] = xx; /* neighbourhood cross validated eta */
+    }
+    if (deriv<0) for (xip=dlet+p*i,j=0;j<p;j++) xip[j] = d[j]; /* dlet[,i] = d[] */
+    /* now the derivatives */
+    if (deriv>0) for (l=0;l<nsp+nth;l++) { /* loop over smoothing parameters */
+	//	Rprintf(".");	
+      /* compute sum_nei(i) dg/drho_l, start with first element of neighbourhood */
+      for (xx=0.0,xip=Xi,bp=db+p*l,bp1=bp+p;bp<bp1;bp++,xip+=maxn) xx += *xip * *bp;
+      for (dgp=dg,xip=wXi,p1=dg+p;dgp < p1;dgp++,xip+= maxn) *dgp = - *xip * xx;
+      jj = i0;
+      if (l<nth) for (xx=dlet[l*n+k[jj]],dgp=dg,xip=Xi,p1=dg+p;dgp < p1;dgp++,xip+= maxn) *dgp = - *xip * xx;
+      for (jj++,j=1;j<q;j++,jj++) { /* loop over remaining neighbours */
+        for (xx=0.0,xip=Xi+j,bp=db+p*l,bp1=bp+p;bp<bp1;bp++,xip+=maxn) xx += *xip * *bp;
+        for (dgp=dg,xip=wXi+j,p1=dg+p;dgp < p1;dgp++,xip+= maxn) *dgp -= *xip * xx;
+	if (l<nth) for (xx=dlet[l*n+k[jj]],dgp=dg,xip=Xi,p1=dg+p;dgp < p1;dgp++,xip+= maxn) *dgp = - *xip * xx;
+      }
+      /* Now subtract dH/drho_j d */
+      /* First create diag(dw[,l])Xi */
+      for (j=0;j<p;j++) for (xip0=Xi+j*maxn,xip=dwXi+j*maxn,wp=dw+l*n,jj=i0;jj<m[i];jj++,xip0++,xip++) *xip = *xip0 * wp[k[jj]]; 
+      z=1.0;xx=0.0;
+      F77_CALL(dgemv)(&ntrans,&q,&p,&z,dwXi,&maxn,d,&one,&xx,g,&one FCONE); /* g = diag(dw[,l])Xi d */
+      F77_CALL(dgemv)(&trans,&q,&p,&z,Xi,&maxn,g,&one,&xx,g1,&one FCONE);  /* g1 = Xi'diag(dw[,l])Xi d */
+      F77_CALL(dgemv)(&ntrans,&p,&p,&z,dH+l*p2,&p,d,&one,&xx,g,&one FCONE); /* g = dH_l d */
+      for (j=0;j<p;j++) dg[j] += g1[j] - g[j]; /* sum_nei(i) dg/drho_l - dH/drho_l d */
+      F77_CALL(dgemv)(&ntrans,&p,&p,&z,Hi,&p,dg,&one,&xx,d1,&one FCONE); /* initial step Hi dg */
+      kk=CG(Hp,Hi,dg,d1,p,1e-13,cgwork); /* d1 is deriv wrt rho_l of approx change in beta caused by dropping the y_i and its neighbours */
+      if (kk>error) error=kk;
+      for (io=io0;io<mi[i];io++) {
+        for (xx=0.0,xip=X+ind[io],dbp=db+p*l,j=0;j<p;j++,xip += n) xx += *xip * (dbp[j]-d1[j]);  
+        deta[io+l*no] = xx;
+      }	
+    }
+  }  
+  FREE(Hp);FREE(Hd);
+  FREE(g);FREE(d);FREE(cgwork);
+  FREE(Xi);FREE(wXi);FREE(dwXi);
+  if (deriv>0) FREE(dH);
+  PROTECT(kr=allocVector(INTSXP,1));
+  INTEGER(kr)[0] = error; /* max CG iterations used */
+  UNPROTECT(5);
+  return(kr);
+} /* ncv */
+
+
+
+
+SEXP Rncv(SEXP x, SEXP r, SEXP W1, SEXP W2, SEXP DB, SEXP DW, SEXP rS, SEXP IND, SEXP MI, SEXP M, SEXP K,SEXP BETA, SEXP SP, SEXP ETA,
+	   SEXP DETA,SEXP DLET,SEXP DERIV,SEXP EPS,SEXP NT) {
+/* Neighbourhood cross validation function, based on updating the Cholesky factor of the Hessian, rather than CG. 
+   This is still O(np^2), but has the advantage of detecting any Hessian that is not positive definite. 
+   
+   OMP parallel version - scaling reasonable, as irreducibly level 2 dominated.
+
+   Return: eta - eta[i] is linear predictor of y[ind[i]] when y[ind[i]] and its neighbours are ommited from fit
+           deta - deta[i,j] is derivative of eta[ind[i]] w.r.t. log smoothing parameter j.
+   Input: X - n by p model matrix. R chol factor of penalized Hessian. w1 = w1[i] X[i,j] is dl_i/dbeta_j to within a scale parameter.
+          w2 - -X'diag(w2)X is Hessian of log likelihood to within a scale parameter. db - db[i,j] is dbeta_i/d rho_j where rho_j is a log s.p. or 
+          possibly other parameter. dw - dw[i,j] is dw2[i]/drho_j. rS[[i]] %*% t(rS[[i]]) is ith smoothing penalty matrix. 
+          k[m[i-1]:(m[i])] index the points in the ith neighbourhood. m[-1]=0 by convention. 
+          Similarly ind[mi[i-1]:mi[i]] index the points whose linear predictors are to be predicted on dropping of the ith neighbourhood.
+          beta - model coefficients (eta=X beta if nothing dropped). sp the smoothing parameters. deriv==0
+          for no derivative calculations, deriv>0 to obtain first derivatives. deriv < 0 to compute NCV score without derivatives
+          and return perturbations of beta in columns of DLET.  
+         
+   Basic idea: to approximate the linear predictor on omission of the neighbours of each point in turn, a single Newton step is taken from the full fit
+               beta, using the gradient and Hessian implied by omitting the neighbours. To keep the cost at O(np^2) an O(p^2) update of the Cholesky
+               factor is made to obtain Cholesky factor with dropped observations, enabling O(p^2) solution for the updated parameter.
+
+               The gradient of this step w.r.t. to each smoothing parameter can also be obtained, again using the updated Cholesky factor to avoid 
+               O(p^3) cost for each obs.
+
+               If the updated Hessian is not positive definite the Cholesky update will detect this, and the routing falls back on Woodbury or minres.
+               
+               A point can be predicted several times with different omitted neighbourhoods. ind[i] is the point being predicted and eta[i] its prediction.
+               LOOCV is recovered if ind = 0:(n-1) and each points neighbourhood is just itself.     
+ */
+  SEXP S,kr;
+  int maxn,i,nsp,n,p,*m,*k,j,l,ii,i0,ki,q,p2,one=1,deriv,*error,jj,nm,*ind,nth,*mi,io,io0,no,pdef,nddbuf,nwork = 0,
+    nt,tid=0,pmaxn,*iwork=NULL,use_minres=1,niwork=0;
+  double *X,*g,*g1,*gp,*p1,*R0,*R,*Xi,xx,*xip,*xip0,z,w1ki,w2ki,*wXi,*d,*w1,*w2,*eta,*p0,*p3,*ddbuf,*Rb,*work=NULL,
+    *deta,*beta,*dg,*dgp,*dwX=NULL,*wp,*wp1,*db=NULL,*dw=NULL,*rSj,*sp,*d1,*dbp,*dH=NULL,*xp,*wxp,*bp,*bp1,*dwXi,*dlet=NULL,*dp,eps,alpha;
+  char trans = 'T',ntrans = 'N',uplo='U',diag='N';
+  M = PROTECT(coerceVector(M,INTSXP));
+  MI = PROTECT(coerceVector(MI,INTSXP));
+  IND = PROTECT(coerceVector(IND,INTSXP));
+  K = PROTECT(coerceVector(K,INTSXP)); /* otherwise R might be storing as double on entry */
+  deriv = asInteger(DERIV);
+  nt = asInteger(NT);
+  mi = INTEGER(MI);m = INTEGER(M); k = INTEGER(K);ind = INTEGER(IND);
+  nsp = length(rS);
+  nth = ncols(DETA)-nsp; /* how many non-sp parameters are there - first cols of db and dw relate to these */
+  sp = REAL(SP);
+  w1=REAL(W1);w2=REAL(W2);
+  X = REAL(x);beta = REAL(BETA);
+  R=REAL(r);eta = REAL(ETA);deta = REAL(DETA);
+  eps = asReal(EPS);
+  p = ncols(x); n = nrows(x);p2=p*p;
+  no = length(IND); /* number of output lp values */
+  nm = length(M); /* number of elements in cross validated eta - need not be n*/
+  #ifndef _OPENMP
+  nt = 1;
+  #endif
+  g = (double *)CALLOC((size_t) 3*p*nt,sizeof(double));
+  g1 = g + p*nt;dg = g1 + p*nt;
+  d = (double *)CALLOC((size_t) 2*p*nt,sizeof(double)); /* perturbation to beta on dropping y_i and its neighbours */
+  d1 = d + p*nt;
+  /* need to know largest neighbourhood */
+  maxn = ii = 0;
+  for (j=0;j<nm;j++) {
+    i = m[j]; if (i-ii>maxn) maxn = i-ii; ii = i;
+  }
+  pmaxn = p*maxn;
+ 
+  Xi = (double *)CALLOC((size_t) pmaxn*nt,sizeof(double)); /* holds sub-matrix removed for this neighbourhood */
+  wXi = (double *)CALLOC((size_t) pmaxn*nt,sizeof(double)); /* equivalent pre-multiplied by diag(w2) */
+  dwXi = (double *)CALLOC((size_t) pmaxn*nt,sizeof(double)); /* equivalent pre-multiplied by d diag(w2)/d rho_j */
+  R0 = (double *)CALLOC((size_t) p2*nt,sizeof(double));Rb = (double *)CALLOC((size_t) p2*nt,sizeof(double));
+  ddbuf = (double *)CALLOC((size_t) pmaxn*nt,sizeof(double)); /* buffer for downdates that spoil +ve def */ 
+  if (use_minres) {
+    nwork =  p*(maxn+7)+maxn;
+  } else {
+    niwork = maxn;
+    iwork = (int *)CALLOC((size_t)niwork*nt,sizeof(int));
+    /* workspace query (d not referenced) ... */
+    nwork = -1;z = 1.0;i=1;
+    F77_CALL(dsysv)(&uplo,&niwork,&i,d,&niwork,iwork,d,&niwork,&xx,&nwork,&j FCONE);
+    nwork = (int) ceil(xx);
+    nwork += maxn*(p+1+maxn);
+  }  
+  work = (double *)CALLOC((size_t) nwork*nt,sizeof(double));
+  error = (int *)CALLOC((size_t) nt,sizeof(int));   
+  xx=1.0;z=0.0;
+
+  if (deriv>0) { /* derivarives of Hessian, dH/drho_j, needed */
+    dwX = (double *)CALLOC((size_t) p*n,sizeof(double));
+    db=REAL(DB);dw=REAL(DW);dlet = REAL(DLET);
+    dH = (double *)CALLOC((size_t) p2*(nsp+nth),sizeof(double));
+    for (j=0;j<nsp+nth;j++) {
+      for (xip0 = X,xip=dwX,q=0;q<p;q++) for (wp=dw+n*j,wp1=wp+n;wp<wp1;wp++,xip++,xip0++) *xip = *xip0 * *wp;    
+      F77_CALL(dgemm)(&trans,&ntrans,&p,&p,&n,&xx,X,&n,dwX,&n,&z,dH+j*p2,&p FCONE FCONE); /* X'diag(dw[,j])X */
+      if (j>=nth) { /* it's a smoothing parameter */
+        S = VECTOR_ELT(rS, j-nth); /* Writing R Extensions 5.9.6 */
+        rSj = REAL(S);q = ncols(S);
+        F77_CALL(dgemm)(&ntrans,&trans,&p,&p,&q,sp+j-nth,rSj,&p,rSj,&p,&xx,dH+j*p2,&p FCONE FCONE); /* X'diag(dw[,j])X + lambda_j S_j */
+      }
+    }
+    FREE(dwX);
+  } else if (deriv<0) dlet = REAL(DLET); /* for storing the coefficient changes for each fold */
+  
+ 
+  #ifdef _OPENMP
+  /* schedule: static, dynamic or guided - seems hard to do better than guided */
+#pragma omp parallel for schedule(guided) private(i,ii,io,i0,io0,j,jj,l,ki,q,alpha,nddbuf,pdef,xx,z,wxp,xip,xp,xip0,p0,p1,p3,dp,gp,dgp,bp,bp1,wp,dbp,w1ki,w2ki,tid) num_threads(nt)
+  #endif
+  for (i=0;i<nm;i++) { /* loop over neighbourhoods, k[ii] is start of neighbourhood of i */
+    if (!i) io=ii=0;else {io=mi[i-1];ii=m[i-1];} 
+    #ifdef _OPENMP
+    tid = omp_get_thread_num();
+    #endif
+    p1 = g + tid*p + p; /* fill accumulated g vector */
+    ki = k[ii];w1ki = w1[ki];w2ki = w2[ki];
+    i0=ii;io0=io; /* record of start needed in deriv calc */
+    for (p0=R0+tid*p2,p3=R,j=0;j<p;j++,p0+=p,p3+=p) for (q=0;q<=j;q++) p0[q] = p3[q]; /* copy Cholesky factor*/
+    alpha = sqrt(fabs(w2ki));
+    nddbuf=0; /* counter for number of updates to store as they cause loss of definiteness */
+    for (dp=d+tid*p,xip0=xip=Xi+pmaxn*tid,gp=g+tid*p,xp=X,wxp=wXi+pmaxn*tid;gp<p1;dp++,gp++,xp += n,xip += maxn,wxp += maxn) { /* first element of neighbourhood */
+      xx = xp[ki]; /* X[k[ii],j] */
+      *gp = w1ki * xx; /* g gradient of log lik */
+      *xip = xx; /* Xi matrix holding X[k[i],] */
+      *wxp = xx*w2ki; /* wXi matrix holding w2[k[i]]*X[k[i],] */
+      *dp = xx*alpha;
+    }
+    if (alpha<0) j=1; else j=0; /* update or downdate ? */
+    chol_up(R0+p2*tid,d+tid*p,&p,&j,&eps);
+    if (*(R0+p2*tid+1) < -0.5) { /* is update positive definite? */
+      pdef=0;*(R0+p2*tid+1)=0.0; 
+      for (p0=R0+p2*tid,p3=R,j=0;j<p;j++,p0+=p,p3+=p) for (q=0;q<=j;q++) p0[q] = p3[q]; /* restore factor to state before update attempt */
+      for (p0=ddbuf+p*nddbuf+tid*pmaxn,p3=d+tid*p,j=0;j<p;j++) p0[j] = p3[j]; /* store the skipped update */
+      nddbuf++;
+    } else pdef=1;
+    q=1; /* count rows of Xi */
+    for (xip0++,ii++;ii<m[i];ii++,xip0++,q++) { /* accumulate rest of g and Xi for rest of neighbourhood*/ 
+      ki = k[ii];w1ki = w1[ki];w2ki = w2[ki];alpha = sqrt(fabs(w2ki));
+      for (dp=d+tid*p,xip=xip0,gp=g+p*tid,xp=X,wxp=wXi+q+pmaxn*tid;gp<p1;dp++,gp++,xp += n,xip += maxn,wxp += maxn) {
+	xx = xp[ki];
+	*gp += w1ki * xx;
+	*xip = xx;
+	*wxp = xx*w2ki;
+	*dp = xx*alpha;
+      }
+      if (alpha<0) {
+	j=1; /* update */
+      } else {/* downdate */
+        for (p0=Rb+p2*tid,p3=R0+p2*tid,j=0;j<p;j++,p0+=p,p3+=p) for (l=0;l<=j;l++) p0[l] = p3[l]; /* backup state of R0 before attempting downdate */
+        j=0;
+      }
+      chol_up(R0+p2*tid,d+tid*p,&p,&j,&eps);
+      if (*(R0+p2*tid+1)< -0.5) { /* is update positive definite? */
+	pdef=0;*(R0+p2*tid+1) = 0.0;
+	for (p0=R0+tid*p2,p3=Rb+tid*p2,j=0;j<p;j++,p0+=p,p3+=p) for (l=0;l<=j;l++) p0[l] = p3[l]; /* restore factor to state before update attempt */
+        for (p0=ddbuf+p*nddbuf+pmaxn*tid,p3=d+p*tid,j=0;j<p;j++) p0[j] = p3[j]; /* store the skipped update */
+        nddbuf++;
+      } 	
+    }  
+    
+    if (pdef) { /* solve for R0'R0 d = g - the change in beta caused by dropping neighbourhood i */
+      for (p0=d+tid*p,p3=g+tid*p,j=0;j<p;j++) p0[j] = p3[j]; /* copy g to d */
+      F77_CALL(dtrsv)(&uplo,&trans,&diag,&p,R0+tid*p2,&p,d+tid*p,&one FCONE FCONE FCONE);
+      F77_CALL(dtrsv)(&uplo,&ntrans,&diag,&p,R0+tid*p2,&p,d+tid*p,&one FCONE FCONE FCONE);
+    } else {  /* fallback solve (R0'R0 - uu') d= g via minres iteration, u, the skipped downdates are in ddbuf*/
+     
+      j = nddbuf; /* modified to number of iterations on exit */     
+      if (use_minres) minres(R0+p2*tid,ddbuf+tid*pmaxn,g+p*tid,d+p*tid,&p,&j,work+tid*nwork); else {
+        *(iwork+niwork*tid) = nwork;
+	woodbury(R0+p2*tid,ddbuf+tid*pmaxn,g+p*tid,d+p*tid,&p,&j,work+tid*nwork,iwork+niwork*tid);
+      }	
+      error[tid]++; /* count the number of non +ve def cases */
+    }
+    for (;io<mi[i];io++) {
+      for (p0=d+tid*p,xx=0.0,xip=X+ind[io],j=0;j<p;j++,xip += n) xx += *xip * (beta[j]-p0[j]);  
+      eta[io] = xx; /* neighbourhood cross validated eta */
+    }
+    if (deriv<0) { /* return the coeff change */
+      for (p0=d+tid*p,p1=dlet+p*i,j=0;j<p;j++) p1[j] = p0[j]; /* dlet[,i] = d[] */
+    }  
+    /* now the derivatives */
+    if (deriv>0) for (l=0;l<nsp+nth;l++) { /* loop over smoothing parameters */
+      /* compute sum_nei(i) dg/drho_l, start with first element of neighbourhood */
+      for (xx=0.0,xip=Xi+tid*pmaxn,bp=db+p*l,bp1=bp+p;bp<bp1;bp++,xip+=maxn) xx += *xip * *bp;
+      for (dgp=dg+tid*p,xip=wXi+tid*pmaxn,p1=dgp+p;dgp < p1;dgp++,xip+= maxn) *dgp = - *xip * xx;
+      jj = i0;
+      if (l<nth) for (xx=dlet[l*n+k[jj]],dgp=dg+p*tid,xip=Xi+tid*pmaxn,p1=dgp+p;dgp < p1;dgp++,xip+= maxn) *dgp += - *xip * xx;
+      for (jj++,j=1;j<q;j++,jj++) { /* loop over remaining neighbours */
+        for (xx=0.0,xip=Xi+j+pmaxn*tid,bp=db+p*l,bp1=bp+p;bp<bp1;bp++,xip+=maxn) xx += *xip * *bp;
+        for (dgp=dg+tid*p,xip=wXi+j+tid*pmaxn,p1=dgp+p;dgp < p1;dgp++,xip+= maxn) *dgp -= *xip * xx;
+	if (l<nth) for (xx=dlet[l*n+k[jj]],dgp=dg+tid*p,xip=Xi+j+tid*pmaxn,p1=dgp+p;dgp < p1;dgp++,xip+= maxn) *dgp += - *xip * xx;
+      }
+      /* Now subtract dH/drho_j d */
+      /* First create diag(dw[,l])Xi */
+      for (j=0;j<p;j++) for (xip0=Xi+j*maxn+pmaxn*tid,xip=dwXi+j*maxn+tid*pmaxn,wp=dw+l*n,jj=i0;jj<m[i];jj++,xip0++,xip++) *xip = *xip0 * wp[k[jj]]; 
+      z=1.0;xx=0.0;
+      F77_CALL(dgemv)(&ntrans,&q,&p,&z,dwXi+pmaxn*tid,&maxn,d+tid*p,&one,&xx,g+tid*p,&one FCONE); /* g = diag(dw[,l])Xi d */
+      F77_CALL(dgemv)(&trans,&q,&p,&z,Xi+pmaxn*tid,&maxn,g+tid*p,&one,&xx,g1+tid*p,&one FCONE);  /* g1 = Xi'diag(dw[,l])Xi d */
+      F77_CALL(dgemv)(&ntrans,&p,&p,&z,dH+l*p2,&p,d+tid*p,&one,&xx,g+tid*p,&one FCONE); /* g = dH_l d */
+      for (dgp = dg+tid*p,p0=g1+tid*p,p3=g+tid*p,j=0;j<p;j++) dgp[j] += p0[j] - p3[j]; /* sum_nei(i) dg/drho_l - dH/drho_l d */
+      if (pdef) { /* solve for R0'R0 d = g - the change in beta caused by dropping neighbourhood i */
+	for (p0=d1+tid*p,p3=dg+tid*p,j=0;j<p;j++) p0[j] = p3[j];
+        F77_CALL(dtrsv)(&uplo,&trans,&diag,&p,R0+tid*p2,&p,d1+tid*p,&one FCONE FCONE FCONE);
+        F77_CALL(dtrsv)(&uplo,&ntrans,&diag,&p,R0+tid*p2,&p,d1+tid*p,&one FCONE FCONE FCONE);
+      } else {  /* fallback solve (R0'R0 + uu') d= g where u are skipped downdates in ddbuf */
+	j = nddbuf;
+	if (use_minres) minres(R0+tid*p2,ddbuf+tid*pmaxn,dg+tid*p,d1+tid*p,&p,&j,work+nwork*tid); else {
+          *(iwork+niwork*tid) = nwork;
+	  woodbury(R0+tid*p2,ddbuf+tid*pmaxn,dg+tid*p,d1+tid*p,&p,&j,work+nwork*tid,iwork+niwork*tid);
+	}  
+      }
+      for (io=io0;io<mi[i];io++) {
+        for (p0=d1+tid*p,xx=0.0,xip=X+ind[io],dbp=db+p*l,j=0;j<p;j++,xip += n) xx += *xip * (dbp[j]-p0[j]);  
+        deta[io+l*no] = xx;
+      }	
+    }
+  }  
+  FREE(R0);FREE(ddbuf);FREE(Rb);
+  FREE(g);FREE(d);
+  FREE(Xi);FREE(wXi);FREE(dwXi);
+  if (deriv>0) FREE(dH);
+  FREE(work);if (!use_minres) FREE(iwork);
+  for (j=0,i=0;i<nt;i++) j += error[i];
+  FREE(error);
+  PROTECT(kr=allocVector(INTSXP,1));
+  INTEGER(kr)[0] = j; /* max CG iterations used */
+  UNPROTECT(5);
+  return(kr);
+} /* Rncv */
+
+
+SEXP Rncv0(SEXP x, SEXP r, SEXP W1, SEXP W2, SEXP DB, SEXP DW, SEXP rS, SEXP IND, SEXP MI, SEXP M, SEXP K,SEXP BETA, SEXP SP, SEXP ETA,
+	  SEXP DETA,SEXP DLET,SEXP DERIV,SEXP EPS) {
+/* Neighbourhood cross validation function, based on updating the Cholesky factor of the Hessian, rather than CG. 
+   Original version with no parallel computation.
+
+   This is still O(np^2), but has the advantage of detecting any Hessian that is not positive definite. 
+   Return: eta - eta[i] is linear predictor of y[ind[i]] when y[ind[i]] and its neighbours are ommited from fit
+           deta - deta[i,j] is derivative of eta[ind[i]] w.r.t. log smoothing parameter j.
+   Input: X - n by p model matrix. R chol factor of penalized Hessian. w1 = w1[i] X[i,j] is dl_i/dbeta_j to within a scale parameter.
+          w2 - -X'diag(w2)X is Hessian of log likelihood to within a scale parameter. db - db[i,j] is dbeta_i/d rho_j where rho_j is a log s.p. or 
+          possibly other parameter. dw - dw[i,j] is dw2[i]/drho_j. rS[[i]] %*% t(rS[[i]]) is ith smoothing penalty matrix. 
+          k[m[i-1]:(m[i])] index the points in the ith neighbourhood. m[-1]=0 by convention. 
+          Similarly ind[mi[i-1]:mi[i]] index the points whose linear predictors are to be predicted on dropping of the ith neighbourhood.
+          beta - model coefficients (eta=X beta if nothing dropped). sp the smoothing parameters. deriv==0
+          for no derivative calculations, deriv!=0 otherwise. 
+         
+ 
+ */
+  SEXP S,kr;
+  int maxn,i,nsp,n,p,*m,*k,j,l,ii,i0,ki,q,p2,one=1,deriv,error=0,jj,nm,*ind,nth,*mi,io,io0,no,pdef,nddbuf,nwork = 0;
+  double *X,*g,*g1,*gp,*p1,*R0,*R,*Xi,xx,*xip,*xip0,z,w1ki,w2ki,*wXi,*d,*w1,*w2,*eta,*p0,*p3,*ddbuf,*Rb,*work=NULL,
+    *deta,*beta,*dg,*dgp,*dwX,*wp,*wp1,*db=NULL,*dw,*rSj,*sp,*d1,*dbp,*dH=NULL,*xp,*wxp,*bp,*bp1,*dwXi,*dlet=NULL,*dp,eps,alpha;
+  char trans = 'T',ntrans = 'N',uplo='U',diag='N';
+  M = PROTECT(coerceVector(M,INTSXP));
+  MI = PROTECT(coerceVector(MI,INTSXP));
+  IND = PROTECT(coerceVector(IND,INTSXP));
+  K = PROTECT(coerceVector(K,INTSXP)); /* otherwise R might be storing as double on entry */
+  deriv = asInteger(DERIV);
+  mi = INTEGER(MI);m = INTEGER(M); k = INTEGER(K);ind = INTEGER(IND);
+  nsp = length(rS);
+  nth = ncols(DETA)-nsp; /* how many non-sp parameters are there - first cols of db and dw relate to these */
+  sp = REAL(SP);
+  w1=REAL(W1);w2=REAL(W2);
+  X = REAL(x);beta = REAL(BETA);
+  R=REAL(r);eta = REAL(ETA);deta = REAL(DETA);
+  eps = asReal(EPS);
+  p = ncols(x); n = nrows(x);p2=p*p;
+  no = length(IND); /* number of output lp values */
+  nm = length(M); /* number of elements in cross validated eta - need not be n*/
+  g = (double *)CALLOC((size_t) 3*p,sizeof(double));
+  g1 = g + p;dg = g1 + p;
+  d = (double *)CALLOC((size_t) 2*p,sizeof(double)); /* perturbation to beta on dropping y_i and its neighbours */
+  d1 = d + p;
+  /* need to know largest neighbourhood */
+  maxn = ii = 0;
+  for (j=0;j<nm;j++) {
+    i = m[j]; if (i-ii>maxn) maxn = i-ii; ii = i;
+  }  
+  Xi = (double *)CALLOC((size_t) p*maxn,sizeof(double)); /* holds sub-matrix removed for this neighbourhood */
+  wXi = (double *)CALLOC((size_t) p*maxn,sizeof(double)); /* equivalent pre-multiplied by diag(w2) */
+  dwXi = (double *)CALLOC((size_t) p*maxn,sizeof(double)); /* equivalent pre-multiplied by d diag(w2)/d rho_j */
+  R0 = (double *)CALLOC((size_t) p2,sizeof(double));Rb = (double *)CALLOC((size_t) p2,sizeof(double));
+  dwX = (double *)CALLOC((size_t) p*n,sizeof(double));
+  ddbuf = (double *)CALLOC((size_t) p*maxn,sizeof(double)); /* buffer for downdates that spoil +ve def */ 
+ 
+  xx=1.0;z=0.0;
+
+  if (deriv) { /* derivarives of Hessian, dH/drho_j, needed */
+    db=REAL(DB);dw=REAL(DW);dlet = REAL(DLET);
+    dH = (double *)CALLOC((size_t) p2*(nsp+nth),sizeof(double));
+    for (j=0;j<nsp+nth;j++) {
+      for (xip0 = X,xip=dwX,q=0;q<p;q++) for (wp=dw+n*j,wp1=wp+n;wp<wp1;wp++,xip++,xip0++) *xip = *xip0 * *wp;    
+      F77_CALL(dgemm)(&trans,&ntrans,&p,&p,&n,&xx,X,&n,dwX,&n,&z,dH+j*p2,&p FCONE FCONE); /* X'diag(dw[,j])X */
+      if (j>=nth) { /* it's a smoothing parameter */
+        S = VECTOR_ELT(rS, j-nth); /* Writing R Extensions 5.9.6 */
+        rSj = REAL(S);q = ncols(S);
+        F77_CALL(dgemm)(&ntrans,&trans,&p,&p,&q,sp+j-nth,rSj,&p,rSj,&p,&xx,dH+j*p2,&p FCONE FCONE); /* X'diag(dw[,j])X + lambda_j S_j */
+      }
+    } 
+  }
+  FREE(dwX);
+  for (io=ii=0,i=0;i<nm;i++) { /* loop over neighbourhoods, k[ii] is start of neighbourhood of i */
+    p1 = g + p; /* fill accumulated g vector */
+    ki = k[ii];w1ki = w1[ki];w2ki = w2[ki];
+    i0=ii;io0=io; /* record of start needed in deriv calc */
+    for (p0=R0,p3=R,j=0;j<p;j++,p0+=p,p3+=p) for (q=0;q<=j;q++) p0[q] = p3[q]; /* copy Cholesky factor*/
+    alpha = sqrt(fabs(w2ki));
+    nddbuf=0; /* counter for number of updates to store as they cause loss of definiteness */
+    for (dp=d,xip0=xip=Xi,gp=g,xp=X,wxp=wXi;gp<p1;dp++,gp++,xp += n,xip += maxn,wxp += maxn) { /* first element of neighbourhood */
+      xx = xp[ki]; /* X[k[ii],j] */
+      *gp = w1ki * xx; /* g gradient of log lik */
+      *xip = xx; /* Xi matrix holding X[k[i],] */
+      *wxp = xx*w2ki; /* wXi matrix holding w2[k[i]]*X[k[i],] */
+      *dp = xx*alpha;
+    }
+    if (alpha<0) j=1; else j=0; /* update or downdate ? */
+    chol_up(R0,d,&p,&j,&eps);
+    if (R0[1]< -0.5) { /* is update positive definite? */
+      pdef=0;R0[1]=0.0; 
+      for (p0=R0,p3=R,j=0;j<p;j++,p0+=p,p3+=p) for (q=0;q<=j;q++) p0[q] = p3[q]; /* restore factor to state before update attempt */
+      for (p0=ddbuf+p*nddbuf,p3=d,j=0;j<p;j++) p0[j] = p3[j]; /* store the skipped update */
+      nddbuf++;
+    } else pdef=1;
+    q=1; /* count rows of Xi */
+    for (xip0++,ii++;ii<m[i];ii++,xip0++,q++) { /* accumulate rest of g and Xi for rest of neighbourhood*/ 
+      ki = k[ii];w1ki = w1[ki];w2ki = w2[ki];alpha = sqrt(fabs(w2ki));
+      for (dp=d,xip=xip0,gp=g,xp=X,wxp=wXi+q;gp<p1;dp++,gp++,xp += n,xip += maxn,wxp += maxn) {
+	xx = xp[ki];
+	*gp += w1ki * xx;
+	*xip = xx;
+	*wxp = xx*w2ki;
+	*dp = xx*alpha;
+      }
+      if (alpha<0) {
+	j=1; /* update */
+      } else {/* downdate */
+        for (p0=Rb,p3=R0,j=0;j<p;j++,p0+=p,p3+=p) for (l=0;l<=j;l++) p0[l] = p3[l]; /* backup state of R0 before attempting downdate */
+        j=0;
+      }
+      chol_up(R0,d,&p,&j,&eps);
+      if (R0[1]< -0.5) { /* is update positive definite? */
+	pdef=0;R0[1] = 0.0;
+	for (p0=R0,p3=Rb,j=0;j<p;j++,p0+=p,p3+=p) for (l=0;l<=j;l++) p0[l] = p3[l]; /* restore factor to state before update attempt */
+        for (p0=ddbuf+p*nddbuf,p3=d,j=0;j<p;j++) p0[j] = p3[j]; /* store the skipped update */
+        nddbuf++;
+      } 	
+    }  
+    
+    if (pdef) { /* solve for R0'R0 d = g - the change in beta caused by dropping neighbourhood i */
+      for (j=0;j<p;j++) d[j] = g[j];
+      F77_CALL(dtrsv)(&uplo,&trans,&diag,&p,R0,&p,d,&one FCONE FCONE FCONE);
+      F77_CALL(dtrsv)(&uplo,&ntrans,&diag,&p,R0,&p,d,&one FCONE FCONE FCONE);
+    } else {  /* fallback solve (R0'R0 - uu') d= g via minres iteration, u, the skipped downdates are in ddbuf*/
+      if (!nwork) {
+	nwork =  p*(maxn+7)+maxn;
+        work = (double *)CALLOC((size_t) nwork,sizeof(double));
+      }
+      j = nddbuf; /* modified to number of iterations on exit */
+      minres(R0,ddbuf,g,d,&p,&j,work);
+      error++; /* count the number of non +ve def cases */
+    }
+    for (;io<mi[i];io++) {
+      for (xx=0.0,xip=X+ind[io],j=0;j<p;j++,xip += n) xx += *xip * (beta[j]-d[j]);  
+      eta[io] = xx; /* neighbourhood cross validated eta */
+    }  
+    /* now the derivatives */
+    if (deriv) for (l=0;l<nsp+nth;l++) { /* loop over smoothing parameters */
+      /* compute sum_nei(i) dg/drho_l, start with first element of neighbourhood */
+      for (xx=0.0,xip=Xi,bp=db+p*l,bp1=bp+p;bp<bp1;bp++,xip+=maxn) xx += *xip * *bp;
+      for (dgp=dg,xip=wXi,p1=dg+p;dgp < p1;dgp++,xip+= maxn) *dgp = - *xip * xx;
+      jj = i0;
+      if (l<nth) for (xx=dlet[l*n+k[jj]],dgp=dg,xip=Xi,p1=dg+p;dgp < p1;dgp++,xip+= maxn) *dgp += - *xip * xx;
+      for (jj++,j=1;j<q;j++,jj++) { /* loop over remaining neighbours */
+        for (xx=0.0,xip=Xi+j,bp=db+p*l,bp1=bp+p;bp<bp1;bp++,xip+=maxn) xx += *xip * *bp;
+        for (dgp=dg,xip=wXi+j,p1=dg+p;dgp < p1;dgp++,xip+= maxn) *dgp -= *xip * xx;
+	if (l<nth) for (xx=dlet[l*n+k[jj]],dgp=dg,xip=Xi+j,p1=dg+p;dgp < p1;dgp++,xip+= maxn) *dgp += - *xip * xx;
+      }
+      /* Now subtract dH/drho_j d */
+      /* First create diag(dw[,l])Xi */
+      for (j=0;j<p;j++) for (xip0=Xi+j*maxn,xip=dwXi+j*maxn,wp=dw+l*n,jj=i0;jj<m[i];jj++,xip0++,xip++) *xip = *xip0 * wp[k[jj]]; 
+      z=1.0;xx=0.0;
+      F77_CALL(dgemv)(&ntrans,&q,&p,&z,dwXi,&maxn,d,&one,&xx,g,&one FCONE); /* g = diag(dw[,l])Xi d */
+      F77_CALL(dgemv)(&trans,&q,&p,&z,Xi,&maxn,g,&one,&xx,g1,&one FCONE);  /* g1 = Xi'diag(dw[,l])Xi d */
+      F77_CALL(dgemv)(&ntrans,&p,&p,&z,dH+l*p2,&p,d,&one,&xx,g,&one FCONE); /* g = dH_l d */
+      for (j=0;j<p;j++) dg[j] += g1[j] - g[j]; /* sum_nei(i) dg/drho_l - dH/drho_l d */
+      if (pdef) { /* solve for R0'R0 d = g - the change in beta caused by dropping neighbourhood i */
+	for (j=0;j<p;j++) d1[j] = dg[j];
+        F77_CALL(dtrsv)(&uplo,&trans,&diag,&p,R0,&p,d1,&one FCONE FCONE FCONE);
+        F77_CALL(dtrsv)(&uplo,&ntrans,&diag,&p,R0,&p,d1,&one FCONE FCONE FCONE);
+      } else {  /* fallback solve (R0'R0 + uu') d= g where u are skipped downdates in ddbuf */
+	j = nddbuf;
+	minres(R0,ddbuf,dg,d1,&p,&j,work);
+      }
+      for (io=io0;io<mi[i];io++) {
+        for (xx=0.0,xip=X+ind[io],dbp=db+p*l,j=0;j<p;j++,xip += n) xx += *xip * (dbp[j]-d1[j]);  
+        deta[io+l*no] = xx;
+      }	
+    }
+  }  
+  FREE(R0);FREE(ddbuf);FREE(Rb);
+  FREE(g);FREE(d);//FREE(cgwork);
+  FREE(Xi);FREE(wXi);FREE(dwXi);
+  if (deriv) FREE(dH);
+  if (nwork) FREE(work);
+  PROTECT(kr=allocVector(INTSXP,1));
+  INTEGER(kr)[0] = error; /* max CG iterations used */
+  UNPROTECT(5);
+  return(kr);
+} /* Rncv0 */
+
+
+static inline int i2f(int i,int j,int K) { /* note *static* inline to avoid external image and potential link failure */
+  /* see i3f - same idea for 2 indices */  
+  int ii;
+  if (i>j) { ii=j;j=i;i=ii;}
+  ii = (i*(2*K-i+1))/2 + j-i;
+  return(ii);
+} /* i2f */
+
+static inline int i3f(int i,int j,int k,int K) {
+/* Suppose we fill an array... 
+   for (m=i=0;i<K;i++) for (j=i;j<K;j++) for (k=j;k<K;k++,m++) a[m] = whatever
+   ... the idea being that the same 'whatever' is stored for all permutations 
+   (i,j,k), (j,k,i) etc. e.g. indices (0,2,3), (3,2,0), (2,0,3) etc all 
+   result in the same stored quantity. We might then want to compute the 
+   m for any such permutation of indices. This does it. 
+   Note that all indices start at 0 here.
+*/
+  int ii;
+  while (!(k>=j&&j>=i)) {
+    if (i>j) {ii=j;j=i;i=ii;}
+    if (j>k) {ii=j;j=k;k=ii;}
+  }  
+  ii = (i*(3*K*(K+1)+(i-1)*(i-3*K-2)))/6 + ((j-i)*(2*K+1-i-j))/2+k-j;
+  return(ii);
+} /* i3f */  
+
+SEXP ncvls(SEXP x,SEXP JJ,SEXP h,SEXP hi,SEXP dH,SEXP L1, SEXP L2,SEXP L3,SEXP IND, SEXP MI, SEXP M, SEXP K,SEXP BETA,
+	   SEXP ETACV,SEXP DETACV,SEXP DETA,SEXP DB,SEXP DERIV) {
+/* This computes the NCV for GAMLSS families. X[,jj[[i]]] is the model matrix for the ith linear predictor.
+   H is the penalized Hessian, and Hi its inverse (or an approximation to it since its used as a pre-conditioner).
+   dH[[i]] is the derivarive of H w.r.t. log(sp[i]); k[m[i-1]:(m[i])] index the neighbours of the ind[i]th point 
+   (including ind[i],usually), m[-1]=0 by convention. beta - model coefficients. 
+   lj contains jth derivatives of the likelihood w.r.t. the lp for each datum.
+   deta and dbeta are matrices with derivaives of the lp's and coefs in their cols. deta has the lps stacked in each 
+   column. 
+   The perturbed etas will be returned in etacv: if nm is the length of ind,then eta[q*nm+i] is the ith element of 
+   qth perturbed linear predictor.
+   The derivatives of the perturbed linear predictors are in deta: detacv[q*nm+i + l*(np*nlp)]] is the ith element of 
+   deriv of qth lp w.r.t. lth log sp. 
+   BUG? Offset handling!!
+*/
+  double *X,*H,*Hi,*l1,*l2,*l3=NULL,*beta,*g,*Hp,xx,z,*d,*d1,*cgwork,*eta,*deta,v,*db=NULL,*dbp,*detacv,*dh;
+  int **jj,*jjl,*jjq,*ind,*m,*k,n,p,nm,nlp,*plp,ii,i,j,i0,i1,l,ln,ki,p2,q,r,l2i,one=1,kk,nsp,iter1=0,iter=0,deriv,*mi,io,io0,no;
+  SEXP JJp,kr,DH;
+  char ntrans = 'N';
+  p = length(BETA);p2 = p*p;
+  n = nrows(x);deriv = asInteger(DERIV);
+  M = PROTECT(coerceVector(M,INTSXP));
+  MI = PROTECT(coerceVector(MI,INTSXP));
+  IND = PROTECT(coerceVector(IND,INTSXP));
+  K = PROTECT(coerceVector(K,INTSXP)); /* otherwise R might be storing as double on entry */
+  mi = INTEGER(MI); m = INTEGER(M); k = INTEGER(K);ind = INTEGER(IND);
+  l1 = REAL(L1);l2=REAL(L2);
+  nm = length(M);nlp = length(JJ);nsp = length(dH);no=length(IND);
+  eta = REAL(ETACV);H=REAL(h);Hi = REAL(hi);
+  beta = REAL(BETA);
+
+  if (deriv) {
+    l3=REAL(L3);deta=REAL(DETA);detacv=REAL(DETACV);db = REAL(DB);
+  }  
+  /* unpack the jj indices to here in order to avoid repeated list lookups withn loop */
+  jj = (int **)CALLOC((size_t) nlp,sizeof(int *));
+  plp = (int *)CALLOC((size_t) nlp,sizeof(int));
+  for (l=0;l<nlp;l++) {
+    JJp = VECTOR_ELT(JJ, l);JJp = PROTECT(coerceVector(JJp,INTSXP)); /* see R extensions 5.9.1 Handling the effects of garbage collection */
+    plp[l] = length(JJp); jj[l] = INTEGER(JJp); /* jj[l][1:plp[l]] indexes cols of X for this lp */
+    jjl=jj[l];for (i=0;i<plp[l];i++) jjl[i]--; /* 5.9.3 Details of R types. In fact coerceVector creates new vector only if type needs to change. */
+  }  
+  X = REAL(x);
+  g = (double *)CALLOC((size_t) 3*p,sizeof(double)); /* gradient change */
+  d = g+p; /* change in beta */
+  d1 = d + p; /* deriv of above */
+  cgwork = (double *)CALLOC((size_t) p*5,sizeof(double));
+  Hp = (double *)CALLOC((size_t) p2,sizeof(double)); /* perturbed Hessian */
+  for (io=ii=0,i=0;i<nm;i++) { /* loop over obs, k[ii] is start of neighbourhood of i */
+    i0=ii;io0=io; /* record start of neigbourhood record */
+    /* start with the change in gradient term */
+    for (l=0;l<p;l++) g[l] = 0.0; /* have to clear first as multiple lps may be added */
+    for (l=0;l<p2;l++) Hp[l] = H[l]; /* Hessian before dropping of neighbours */
+    for (;ii<m[i];ii++) { /* loop over neighbours */
+      ki=k[ii]; /* neighbour index */
+      for (l2i=l=0;l<nlp;l++) { /* loop over linear predictors */
+        jjl = jj[l];ln=l*n;
+        for (j=0;j<plp[l];j++) g[jjl[j]] += X[n*jjl[j]+ki] * l1[ln+ki]; /* accumulate grad change */
+	for (q=l;q<nlp;q++,l2i++) { /* Hessian block loop X[,jj[l]]' %*% l2*X[,jj[q]] */
+	  jjq = jj[q];ln = l2i*n;
+	  for (r=0;r<plp[q];r++) { /* NOTE: substantial optimization possible here once checked */
+	    for (j=0;j<plp[l];j++) {
+	      /* H is penalized *negative* Hessian, so dropped part needs to be *added* */
+	      xx = X[jjl[j]*n+ki]*X[jjq[r]*n+ki]*l2[ln+ki]; 
+              Hp[jjl[j]+jjq[r]*p] += xx;
+	      if (q>l) Hp[jjl[j]*p+jjq[r]] += xx;
+	    }
+	  }  
+	}  
+      } /* lp loop */
+    } /* neighbour loop */
+    xx = 0.0;z=1.0;
+    F77_CALL(dgemv)(&ntrans,&p,&p,&z,Hi,&p,g,&one,&xx,d,&one FCONE); /* initial step Hi g */
+    kk=CG(Hp,Hi,g,d,p,1e-13,cgwork); /* d is approx change in beta caused by dropping y_i and its neighbours */
+    if (kk<0) {
+      Rprintf("npd! ");kk = -kk;
+    }   
+	
+    if (iter < kk) iter=kk;
+    /* now create the linear predictors for the ith point */
+    for (;io<mi[i];io++)
+    for (l=0;l<nlp;l++) {
+      ln = no*l;jjl = jj[l];
+      for (xx=0.0,j=0;j<plp[l];j++) {
+	  q = jjl[j];
+	  xx += X[n*q+ind[io]] * (beta[q]-d[q]); 
+      }
+      eta[ln+io] = xx;	
+    }
+    if (deriv) { /* get the derivatives of the linear predictors */
+      for (l=0;l<nsp;l++) {
+	ln = l*nlp;
+	DH = VECTOR_ELT(dH, l);dh = REAL(DH);
+	xx=0.0;z=1.0;
+        F77_CALL(dgemv)(&ntrans,&p,&p,&z,dh,&p,d,&one,&xx,g,&one FCONE); /* g = dH_l d */
+	//for (j=0;j<p;j++) dg[j] = 0.0; /* clear deriv of g before accumulation - NOT needed - straight into g! */
+        for (i1=i0;i1<m[i];i1++) { /* neighbour loop */
+	  ki=k[i1]; /* current neighbour of interest */
+	  for (j=0;j<nlp;j++) for (q=0;q<nlp;q++) { /* have to do both triangles to avoid full matrix */
+	    v = l3[i3f(j,q,0,nlp)*n+ki]*deta[ln*n+ki];
+	    for (r=1;r<nlp;r++) v += l3[i3f(j,q,r,nlp)*n+ki]*deta[(ln+r)*n+ki];
+	    jjl=jj[j];jjq=jj[q];
+	    for (xx=0.0,kk=0;kk<plp[q];kk++) xx += X[ki+n*jjq[kk]]*d[jjq[kk]]; 
+            xx *= v;
+	    for (kk=0;kk<plp[j];kk++) g[jjl[kk]] -= X[ki+n*jjl[kk]]*xx;
+	    /* now work on deriv of grad */
+	    for (xx=0.0,kk=0;kk<plp[q];kk++) xx += X[ki+n*jjq[kk]]*db[jjq[kk]+p*l];
+	    xx *= l2[ki+i2f(j,q,nlp)*n];
+	    for (kk=0;kk<plp[j];kk++) g[jjl[kk]] += X[ki+n*jjl[kk]]*xx;
+	  }  
+	} /* neighbour loop */
+	/* at this point g contains deriv of perturned Hessian w.r.t. log sp l multiplied by
+           the change in beta vector, d. Now the derivative of grad vect w.r.t. log(sp[l])
+           is also required */
+	xx=0.0;z=1.0;
+	F77_CALL(dgemv)(&ntrans,&p,&p,&z,Hi,&p,g,&one,&xx,d1,&one FCONE); /* initial step Hi d */
+        kk=CG(Hp,Hi,g,d1,p,1e-13,cgwork); /* d1 is deriv wrt rho_l of approx change in beta caused by dropping the y_i and its neighbours */
+	
+	if (kk<0) {
+          Rprintf("npdg! ");kk = -kk;
+	}   
+	
+	if (iter1 < kk) iter1 = kk;
+	for (io=io0;io<mi[i];io++)
+        for (q=0;q<nlp;q++) {
+	  jjq=jj[q];dbp = db + p*l;
+	  for (xx=0.0,j=0;j<plp[q];j++) {
+	      kk = jjq[j];
+	      xx += X[ind[io]+kk*n] * (dbp[kk]-d1[kk]);
+	  }  
+          detacv[io + q*no +l*(no*nlp)] = xx;  
+        }	  
+      } /* l loop - smoothing parameters */	
+    } /* if deriv */  
+  } /* main obs loop */
+  for (l=0;l<nlp;l++) {
+    /* iff coerceVector did not have to create a new vector then subtracting 1 from index will have changed original object in R, so need to 
+       guard against that by adding one back on again */
+    jjl=jj[l];for (i=0;i<plp[l];i++) jjl[i]++; 
+  }  
+  FREE(jj);FREE(plp);FREE(g);FREE(Hp);FREE(cgwork);
+  PROTECT(kr=allocVector(INTSXP,2));
+  INTEGER(kr)[0] = iter; /* max CG iterations used */
+  INTEGER(kr)[1] = iter1; /* max CG iterations used for derivs*/
+  UNPROTECT(5+nlp);
+  return(kr);
+} /* ncvls */  
+
+
+SEXP Rncvls0(SEXP x,SEXP JJ,SEXP R1,SEXP dH,SEXP L1, SEXP L2,SEXP L3,SEXP IND, SEXP MI, SEXP M, SEXP K,SEXP BETA,
+	    SEXP ETACV,SEXP DETACV,SEXP DETA,SEXP DB,SEXP DERIV,SEXP EPS) {
+/* This computes the NCV for GAMLSS families, using the Cholesky factor updating approach. Only usable if no l.p.s 
+   share coefs. 
+
+   X[,jj[[i]]] is the model matrix for the ith linear predictor.
+   H is the penalized Hessian, and Hi its inverse (or an approximation to it since its used as a pre-conditioner).
+   dH[[i]] is the derivarive of H w.r.t. log(sp[i]); k[m[i-1]:(m[i])] index the neighbours of the ind[i]th point 
+   (including ind[i],usually), m[-1]=0 by convention. beta - model coefficients. 
+   lj contains jth derivatives of the likelihood w.r.t. the lp for each datum.
+   deta and dbeta are matrices with derivaives of the lp's and coefs in their cols. deta has the lps stacked in each 
+   column. 
+   The perturbed etas will be returned in etacv: if nm is the length of ind,then eta[q*nm+i] is the ith element of 
+   qth perturbed linear predictor.
+   The derivatives of the perturbed linear predictors are in deta: detacv[q*nm+i + l*(np*nlp)]] is the ith element of 
+   deriv of qth lp w.r.t. lth log sp. 
+*/
+  double *X,*R,*l1,*l2,*l3=NULL,*beta,*g,*R0,xx,z,*d,*d1,*eta,*deta,v,*db=NULL,*dbp,*detacv,*dh,*b,alpha,alpha0,eps,*Rb,*ddbuf,*p0,*p3,*work;
+  int **jj,*jjl,*jjq,*ind,*m,*k,n,p,nm,nlp,*plp,ii,i,j,i0,i1,l,ln,ki,p2,q,r,l2i,one=1,kk,nsp,error=0,deriv,nddbuf,
+    *mi,io,io0,no,pdef=1,maxn,buffer_size=0,nwork=0;
+  SEXP JJp,kr,DH;
+  char trans = 'T',ntrans = 'N',uplo='U',diag='N';
+  p = length(BETA);p2 = p*p;
+  n = nrows(x);deriv = asInteger(DERIV);
+  eps = asReal(EPS);
+  M = PROTECT(coerceVector(M,INTSXP));
+  MI = PROTECT(coerceVector(MI,INTSXP));
+  IND = PROTECT(coerceVector(IND,INTSXP));
+  K = PROTECT(coerceVector(K,INTSXP)); /* otherwise R might be storing as double on entry */
+  mi = INTEGER(MI); m = INTEGER(M); k = INTEGER(K);ind = INTEGER(IND);
+  l1 = REAL(L1);l2=REAL(L2);
+  nm = length(M);nlp = length(JJ);nsp = length(dH);no=length(IND);
+  eta = REAL(ETACV);beta = REAL(BETA);R=REAL(R1);
+
+  if (deriv) {
+    l3=REAL(L3);deta=REAL(DETA);detacv=REAL(DETACV);db = REAL(DB);
+  }  
+  /* unpack the jj indices to here in order to avoid repeated list lookups withn loop */
+  jj = (int **)CALLOC((size_t) nlp,sizeof(int *));
+  plp = (int *)CALLOC((size_t) nlp,sizeof(int));
+  for (l=0;l<nlp;l++) {
+    JJp = VECTOR_ELT(JJ, l);JJp = PROTECT(coerceVector(JJp,INTSXP)); /* see R extensions 5.9.1 Handling the effects of garbage collection */
+    plp[l] = length(JJp); jj[l] = INTEGER(JJp); /* jj[l][1:plp[l]] indexes cols of X for this lp */
+    jjl=jj[l];for (i=0;i<plp[l];i++) jjl[i]--; /* 5.9.3 Details of R types. In fact coerceVector creates new vector only if type needs to change. */
+  }
+  /* need to know largest neighbourhood to create storage buffer for problematic updates*/
+  maxn = ii = 0;
+  for (j=0;j<nm;j++) {
+    i = m[j]; if (i-ii>maxn) maxn = i-ii; ii = i;
+  }
+  
+  
+  X = REAL(x);
+  g = (double *)CALLOC((size_t) 3*p+nlp,sizeof(double)); /* gradient change */
+  d = g+p; /* change in beta */
+  d1 = d + p; /* deriv of above */
+  b = d1 + p; /* multipliers on spurious leading diagonal blocks */
+  R0 = (double *)CALLOC((size_t) p2,sizeof(double)); /* chol factor of perturbed Hessian */
+  Rb = (double *)CALLOC((size_t) p2,sizeof(double)); /* back up of chol factor of perturbed Hessian, in case of downdate failures */
+  for (io=ii=0,i=0;i<nm;i++) { /* loop over folds, k[ii] is start of neighbourhood of i */
+    i0=ii;io0=io; /* record start of neigbourhood record */
+    /* start with the change in gradient term */
+    for (l=0;l<p;l++) g[l] = 0.0; /* have to clear first as multiple lps may be added */
+    for (l=0;l<p2;l++) R0[l] = R[l]; /* Hessian Chol Factor before dropping of neighbours */
+    pdef=1; /* positive definiteness status of R0  */
+    nddbuf = 0; /* number of skipped down-dates */
+    for (;ii<m[i];ii++) { /* loop over neighbours */
+      ki=k[ii]; /* neighbour index */
+      for (j=0;j<nlp;j++) b[j] = 0.0;
+      for (l2i=l=0;l<nlp;l++) { /* loop over linear predictors */
+        jjl = jj[l];ln=l*n;
+        for (j=0;j<plp[l];j++) g[jjl[j]] += X[n*jjl[j]+ki] * l1[ln+ki]; /* accumulate grad change */
+        alpha0 = l2[l2i*n+ki];l2i++;
+	for (q=l+1;q<nlp;q++,l2i++) { /* Hessian block loop X[,jj[l]]' %*% l2*X[,jj[q]] */
+	  /* H is penalized *negative* Hessian, so dropped part needs to be *added* */
+	  jjq = jj[q];alpha = l2[l2i*n+ki];
+	  for (j=0;j<p;j++) d[j] = 0.0; 
+	  for (j=0;j<plp[l];j++) d[jjl[j]] =  X[jjl[j]*n+ki];
+	  for (j=0;j<plp[q];j++) d[jjq[j]] =  X[jjq[j]*n+ki]*alpha;
+	  /* Now record multipliers on spuriously generated leading diagonal blocks... */
+	  b[l] += 1.0;b[q] += alpha*alpha;
+          chol_up(R0,d,&p,&one,&eps);/* add correction */
+	}  /* lp loop q */
+	/* now update the lth leading diagonal block, removing the nuisance block already formed */
+	alpha = alpha0 - b[l];
+	xx = sqrt(fabs(alpha));for (j=0;j<p;j++) d[j] = 0.0;
+	for (j=0;j<plp[l];j++) d[jjl[j]] =  xx*X[jjl[j]*n+ki];
+	
+	if (alpha>0) { /* add or subtract correction */
+	  j=1;
+	} else { /* subtract */
+	  for (p0=Rb,p3=R0,j=0;j<p;j++,p0+=p,p3+=p) for (q=0;q<=j;q++) p0[q] = p3[q]; /* backup state of R0 before attempting downdate */
+	  j=0; 
+	}
+	chol_up(R0,d,&p,&j,&eps);
+	if (R0[1] < -0.5) { /* did update fail to be positive definite? */
+	  pdef=0;R0[1] = 0.0;
+	  if (!buffer_size) { /* need to create buffer for skipped down-dates */
+	    buffer_size = p*maxn*nlp;
+            ddbuf = (double *)CALLOC((size_t) buffer_size,sizeof(double));
+	  }
+	  for (p0=Rb,p3=R0,j=0;j<p;j++,p0+=p,p3+=p) for (q=0;q<=j;q++) p3[q] = p0[q]; /* restore to state before update attempt */
+	  for (p0=ddbuf+p*nddbuf,p3=d,j=0;j<p;j++) p0[j] = p3[j]; /* store the skipped update */
+          nddbuf++;
+        } 
+      } /* lp loop l */
+    } /* neighbour loop */
+    
+    if (pdef) { /* solve for R0'R0 d = g - the change in beta caused by dropping neighbourhood i */
+      for (j=0;j<p;j++) d[j] = g[j];
+      F77_CALL(dtrsv)(&uplo,&trans,&diag,&p,R0,&p,d,&one FCONE FCONE FCONE);
+      F77_CALL(dtrsv)(&uplo,&ntrans,&diag,&p,R0,&p,d,&one FCONE FCONE FCONE);
+    } else {  /* fallback solve (R0'R0 - uu') d= g where u is stored in ddbuf */
+      error++; /* count the number of non +ve def cases */ 
+      if (!nwork) {
+     	nwork =  p*(maxn*nlp+7)+maxn;
+        work = (double *)CALLOC((size_t) nwork,sizeof(double));
+      }
+      j = nddbuf; /* modified to number of iterations on exit */
+      minres(R0,ddbuf,g,d,&p,&j,work);
+      //Rprintf(" fold=%d iter=%d updates=%d\n",i,j,nddbuf);
+    }
+	
+    /* now create the linear predictors for the ith fold */
+    for (;io<mi[i];io++)
+    for (l=0;l<nlp;l++) {
+      ln = no*l;jjl = jj[l];i1=ind[io];
+      for (xx=0.0,j=0;j<plp[l];j++) {
+	  q = jjl[j];
+	  xx += X[n*q+i1] * (beta[q]-d[q]); 
+      }
+      eta[ln+io] = xx;	
+    }
+    if (deriv) { /* get the derivatives of the linear predictors */
+      for (l=0;l<nsp;l++) {
+	ln = l*nlp;
+	DH = VECTOR_ELT(dH, l);dh = REAL(DH);
+	xx=0.0;z=1.0;
+        F77_CALL(dgemv)(&ntrans,&p,&p,&z,dh,&p,d,&one,&xx,g,&one FCONE); /* g = dH_l d */
+        for (i1=i0;i1<m[i];i1++) { /* neighbour loop */
+	  ki=k[i1]; /* current neighbour of interest */
+	  for (j=0;j<nlp;j++) for (q=0;q<nlp;q++) { /* have to do both triangles to avoid needing full matrix */
+	    v = l3[i3f(j,q,0,nlp)*n+ki]*deta[ln*n+ki];
+	    for (r=1;r<nlp;r++) v += l3[i3f(j,q,r,nlp)*n+ki]*deta[(ln+r)*n+ki];
+	    jjl=jj[j];jjq=jj[q];
+	    for (xx=0.0,kk=0;kk<plp[q];kk++) xx += X[ki+n*jjq[kk]]*d[jjq[kk]]; 
+            xx *= v;
+	    for (kk=0;kk<plp[j];kk++) g[jjl[kk]] -= X[ki+n*jjl[kk]]*xx;
+	    /* now work on deriv of grad */
+	    for (xx=0.0,kk=0;kk<plp[q];kk++) xx += X[ki+n*jjq[kk]]*db[jjq[kk]+p*l];
+	    xx *= l2[ki+i2f(j,q,nlp)*n];
+	    for (kk=0;kk<plp[j];kk++) g[jjl[kk]] += X[ki+n*jjl[kk]]*xx;
+	  }  
+	} /* neighbour loop */
+	/* at this point g contains deriv of perturbed Hessian w.r.t. log sp l multiplied by
+           the change in beta vector, d. Now the derivative of grad vect w.r.t. log(sp[l])
+           is also required */
+        if (pdef) { /* solve for R0'R0 d1 = g1 */
+	  for (j=0;j<p;j++) d1[j] = g[j];
+          F77_CALL(dtrsv)(&uplo,&trans,&diag,&p,R0,&p,d1,&one FCONE FCONE FCONE);
+          F77_CALL(dtrsv)(&uplo,&ntrans,&diag,&p,R0,&p,d1,&one FCONE FCONE FCONE);
+        } else {  /* fallback solve (R0'R0 -uu')d1 = g1 where u stored in ddbuf*/
+          j = nddbuf;
+	  minres(R0,ddbuf,g,d1,&p,&j,work);
+	  //Rprintf(" %d",j);
+        }
+	
+	for (io=io0;io<mi[i];io++)
+        for (q=0;q<nlp;q++) {
+	  jjq=jj[q];dbp = db + p*l;
+	  for (xx=0.0,j=0;j<plp[q];j++) {
+	      kk = jjq[j];
+	      xx += X[ind[io]+kk*n] * (dbp[kk]-d1[kk]);
+	  }  
+          detacv[io + q*no +l*(no*nlp)] = xx;  
+        }	  
+      } /* l loop - smoothing parameters */	
+    }  
+  } /* main obs loop */
+  for (l=0;l<nlp;l++) {
+    /* iff coerceVector did not have to create a new vector then subtracting 1 from index will have changed original object in R, so need to 
+       guard against that by adding one back on again */
+    jjl=jj[l];for (i=0;i<plp[l];i++) jjl[i]++; 
+  }  
+  FREE(jj);FREE(plp);FREE(g);FREE(R0);
+  FREE(Rb);
+  if (buffer_size) FREE(ddbuf);
+  if (nwork) FREE(work);
+  PROTECT(kr=allocVector(INTSXP,1));
+  INTEGER(kr)[0] = error; /* number of Hessians not positive def */
+  UNPROTECT(5+nlp);
+  return(kr);
+} /* Rncvls0 */
+
+
+SEXP Rncvls1(SEXP x,SEXP JJ,SEXP R1,SEXP dH,SEXP L1, SEXP L2,SEXP L3,SEXP IND, SEXP MI, SEXP M, SEXP K,SEXP BETA,
+	    SEXP ETACV,SEXP DETACV,SEXP DETA,SEXP DB,SEXP DERIV,SEXP EPS,SEXP NT) {
+/* This computes the NCV for GAMLSS families, using the Cholesky factor updating approach. 
+
+   Only usable if no l.p.s share coefs. 
+
+   OMP parallelization
+
+   X[,jj[[i]]] is the model matrix for the ith linear predictor.
+   H is the penalized Hessian, and Hi its inverse (or an approximation to it since its used as a pre-conditioner).
+   dH[[i]] is the derivarive of H w.r.t. log(sp[i]); k[m[i-1]:(m[i])] index the neighbours of the ind[i]th point 
+   (including ind[i],usually), m[-1]=0 by convention. beta - model coefficients. 
+   lj contains jth derivatives of the likelihood w.r.t. the lp for each datum.
+   deta and dbeta are matrices with derivatives of the lp's and coefs in their cols. deta has the lps stacked in each 
+   column. 
+   The perturbed etas will be returned in etacv: if nm is the length of ind,then eta[q*nm+i] is the ith element of 
+   qth perturbed linear predictor.
+   The derivatives of the perturbed linear predictors are in deta: detacv[q*nm+i + l*(np*nlp)]] is the ith element of 
+   deriv of qth lp w.r.t. lth log sp. 
+*/
+  double *X,*R,*l1,*l2,*l3,*beta,*g,*R0,xx,z,*d,*d1,*eta,*deta,v,*db,*dbp,*detacv,*dh,*b,alpha,alpha0,eps,*Rb,*ddbuf,*p0,*p3,*work;
+  int **jj,*jjl,*jjq,*ind,*m,*k,n,p,nm,nlp,*plp,ii,i,j,i0,i1,l,ln,ki,p2,q,r,l2i,one=1,kk,nsp,*error,deriv,nddbuf,
+    *mi,io,io0,no,pdef=1,maxn,buffer_size=0,nwork=0,nt=1,tid=0,*iwork=NULL,use_minres=0,niwork=0;
+  SEXP JJp,kr,DH;
+  char trans = 'T',ntrans = 'N',uplo='U',diag='N';
+  p = length(BETA);p2 = p*p;
+  n = nrows(x);deriv = asInteger(DERIV);
+  eps = asReal(EPS);
+  M = PROTECT(coerceVector(M,INTSXP));
+  MI = PROTECT(coerceVector(MI,INTSXP));
+  IND = PROTECT(coerceVector(IND,INTSXP));
+  K = PROTECT(coerceVector(K,INTSXP)); /* otherwise R might be storing as double on entry */
+  mi = INTEGER(MI); m = INTEGER(M); k = INTEGER(K);ind = INTEGER(IND);
+  l1 = REAL(L1);l2=REAL(L2);
+  nm = length(M);nlp = length(JJ);nsp = length(dH);no=length(IND);
+  eta = REAL(ETACV);beta = REAL(BETA);R=REAL(R1);
+
+  if (deriv) {
+    l3=REAL(L3);deta=REAL(DETA);detacv=REAL(DETACV);db = REAL(DB);
+  } else l3=deta=detacv=db=NULL;
+  /* unpack the jj indices to here in order to avoid repeated list lookups withn loop */
+  jj = (int **)CALLOC((size_t) nlp,sizeof(int *));
+  plp = (int *)CALLOC((size_t) nlp,sizeof(int));
+  for (l=0;l<nlp;l++) {
+    JJp = VECTOR_ELT(JJ, l);JJp = PROTECT(coerceVector(JJp,INTSXP)); /* see R extensions 5.9.1 Handling the effects of garbage collection */
+    plp[l] = length(JJp); jj[l] = INTEGER(JJp); /* jj[l][1:plp[l]] indexes cols of X for this lp */
+    jjl=jj[l];for (i=0;i<plp[l];i++) jjl[i]--; /* 5.9.3 Details of R types. In fact coerceVector creates new vector only if type needs to change. */
+  }
+  /* need to know largest neighbourhood to create storage buffer for problematic updates*/
+  maxn = ii = 0;
+  for (j=0;j<nm;j++) {
+    i = m[j]; if (i-ii>maxn) maxn = i-ii; ii = i;
+  }
+  #ifdef _OPENMP
+  nt = asInteger(NT);
+  #endif
+  
+  X = REAL(x);
+  g = (double *)CALLOC((size_t) (3*p+nlp)*nt,sizeof(double)); /* gradient change */
+  d = g + p*nt; /* change in beta */
+  d1 = d + p*nt; /* deriv of above */
+  b = d1 + p*nt; /* multipliers on spurious leading diagonal blocks - length nlp*nt */
+  R0 = (double *)CALLOC((size_t) p2*nt,sizeof(double)); /* chol factor of perturbed Hessian */
+  Rb = (double *)CALLOC((size_t) p2*nt,sizeof(double)); /* back up of chol factor of perturbed Hessian, in case of downdate failures */
+  if (use_minres) {
+    nwork =  p*(maxn*nlp+7)+maxn;   
+  } else {
+    niwork = maxn*nlp; /* max rank of low rank downdate */
+    iwork = (int *)CALLOC((size_t)niwork*nt,sizeof(int));
+    /* workspace query (d not referenced) ... */
+    nwork = -1;z = 1.0;i=1;
+    F77_CALL(dsysv)(&uplo,&niwork,&i,d,&niwork,iwork,d,&niwork,&xx,&nwork,&j FCONE);
+    nwork = (int) ceil(xx);
+    nwork += maxn*nlp*(p+1+maxn*nlp);
+  }
+  work = (double *)CALLOC((size_t) nwork*nt,sizeof(double));
+  /* need to create buffer for skipped down-dates */
+  buffer_size = p*maxn*nlp; 
+  ddbuf = (double *)CALLOC((size_t) buffer_size*nt,sizeof(double));
+  error = (int *)CALLOC((size_t)nt,sizeof(int));	
+  #ifdef _OPENMP
+  /* schedule: static, dynamic or guided - seems hard to do better than guided */
+#pragma omp parallel for schedule(guided) private(i,ii,i0,io,io0,tid,l,pdef,nddbuf,ki,j,l2i,jjl,ln,alpha0,jjq,alpha,xx,p0,p3,q,i1,DH,dh,z,v,r,kk,dbp) num_threads(nt)
+  #endif
+  for (i=0;i<nm;i++) { /* loop over folds, k[ii] is start of neighbourhood of i */
+    if (!i) io=ii=0;else {io=mi[i-1];ii=m[i-1];} 
+    #ifdef _OPENMP
+    tid = omp_get_thread_num();
+    #endif
+    i0=ii;io0=io; /* record start of neigbourhood record */
+    /* start with the change in gradient term */
+    for (p0=g+p*tid,l=0;l<p;l++) p0[l] = 0.0; /* have to clear g first as multiple lps may be added */
+    for (p0=R0+p2*tid,l=0;l<p2;l++) p0[l] = R[l]; /* R0=R, Hessian Chol Factor before dropping of neighbours */
+    pdef=1; /* positive definiteness status of R0  */
+    nddbuf = 0; /* number of skipped down-dates */
+    for (;ii<m[i];ii++) { /* loop over neighbours */
+      ki=k[ii]; /* neighbour index */
+      for (p0=b+nlp*tid,j=0;j<nlp;j++) p0[j] = 0.0; /* clear b */
+      for (l2i=l=0;l<nlp;l++) { /* loop over linear predictors */
+        jjl = jj[l];ln=l*n;
+        for (p0=g+p*tid,j=0;j<plp[l];j++) p0[jjl[j]] += X[n*jjl[j]+ki] * l1[ln+ki]; /* accumulate grad change into g */
+        alpha0 = l2[l2i*n+ki];l2i++;
+	for (q=l+1;q<nlp;q++,l2i++) { /* Hessian block loop X[,jj[l]]' %*% l2*X[,jj[q]] */
+	  /* H is penalized *negative* Hessian, so dropped part needs to be *added* */
+	  jjq = jj[q];alpha = l2[l2i*n+ki];
+	  for (p0=d+p*tid,j=0;j<p;j++) p0[j] = 0.0; /* clear d */
+	  for (j=0;j<plp[l];j++) p0[jjl[j]] =  X[jjl[j]*n+ki]; /* fill d */
+	  for (j=0;j<plp[q];j++) p0[jjq[j]] =  X[jjq[j]*n+ki]*alpha; /* fill d */
+	  /* Now record multipliers on spuriously generated leading diagonal blocks... */
+	  *(b+l+nlp*tid) += 1.0;*(b+q+nlp*tid) += alpha*alpha;
+          chol_up(R0+p2*tid,d+tid*p,&p,&one,&eps);/* add correction */
+	}  /* lp loop q */
+	/* now update the lth leading diagonal block, removing the nuisance block already formed */
+	alpha = alpha0 - *(b+l+tid*nlp);
+	xx = sqrt(fabs(alpha));for (p0=d+tid*p,j=0;j<p;j++) p0[j] = 0.0;
+	for (j=0;j<plp[l];j++) p0[jjl[j]] =  xx*X[jjl[j]*n+ki];
+	
+	if (alpha>0) { /* add or subtract correction */
+	  j=1;
+	} else { /* subtract */
+	  for (p0=Rb+p2*tid,p3=R0+tid*p2,j=0;j<p;j++,p0+=p,p3+=p) for (q=0;q<=j;q++) p0[q] = p3[q]; /* backup state of R0 before attempting downdate */
+	  j=0; 
+	}
+	chol_up(R0+tid*p2,d+tid*p,&p,&j,&eps);
+	if (*(R0+1+p2*tid) < -0.5) { /* did update fail to be positive definite? */
+	  pdef=0;*(R0+1+p2*tid) = 0.0;
+	  
+	  for (p0=Rb+tid*p2,p3=R0+tid*p2,j=0;j<p;j++,p0+=p,p3+=p) for (q=0;q<=j;q++) p3[q] = p0[q]; /* restore to state before update attempt */
+	  for (p0=ddbuf+p*nddbuf+tid*buffer_size,p3=d+tid*p,j=0;j<p;j++) p0[j] = p3[j]; /* store the skipped update */
+          nddbuf++;
+        } 
+      } /* lp loop l */
+    } /* neighbour loop */
+    
+    if (pdef) { /* solve for R0'R0 d = g - the change in beta caused by dropping neighbourhood i */
+      for (p0=d+tid*p,p3=g+tid*p,j=0;j<p;j++) p0[j] = p3[j];
+      F77_CALL(dtrsv)(&uplo,&trans,&diag,&p,R0+tid*p2,&p,d+tid*p,&one FCONE FCONE FCONE);
+      F77_CALL(dtrsv)(&uplo,&ntrans,&diag,&p,R0+tid*p2,&p,d+tid*p,&one FCONE FCONE FCONE);
+    } else {  /* fallback solve (R0'R0 - uu') d= g where u is stored in ddbuf */
+      error[tid]++; /* count the number of non +ve def cases */ 
+    
+      j = nddbuf; /* modified to number of iterations on exit */
+      *(iwork+niwork*tid) = nwork;
+      if (use_minres) minres(R0+tid*p2,ddbuf+buffer_size*tid,g+tid*p,d+tid*p,&p,&j,work+nwork*tid); else
+	woodbury(R0+tid*p2,ddbuf+buffer_size*tid,g+tid*p,d+tid*p,&p,&j,work+nwork*tid,iwork+niwork*tid);
+      //Rprintf(" fold=%d iter=%d updates=%d\n",i,j,nddbuf);
+    }
+	
+    /* now create the linear predictors for the ith fold */
+    for (;io<mi[i];io++)
+    for (l=0;l<nlp;l++) {
+      ln = no*l;jjl = jj[l];i1=ind[io];
+      for (p0=d+tid*p,xx=0.0,j=0;j<plp[l];j++) {
+	  q = jjl[j];
+	  xx += X[n*q+i1] * (beta[q]-p0[q]); 
+      }
+      eta[ln+io] = xx;	
+    }
+    if (deriv) { /* get the derivatives of the linear predictors */
+      for (l=0;l<nsp;l++) {
+	ln = l*nlp;
+	DH = VECTOR_ELT(dH, l);dh = REAL(DH);
+	xx=0.0;z=1.0;
+        F77_CALL(dgemv)(&ntrans,&p,&p,&z,dh,&p,d+tid*p,&one,&xx,g+tid*p,&one FCONE); /* g = dH_l d */
+        for (i1=i0;i1<m[i];i1++) { /* neighbour loop */
+	  ki=k[i1]; /* current neighbour of interest */
+	  for (j=0;j<nlp;j++) for (q=0;q<nlp;q++) { /* have to do both triangles to avoid needing full matrix */
+	    v = l3[i3f(j,q,0,nlp)*n+ki]*deta[ln*n+ki];
+	    for (r=1;r<nlp;r++) v += l3[i3f(j,q,r,nlp)*n+ki]*deta[(ln+r)*n+ki];
+	    jjl=jj[j];jjq=jj[q];
+	    for (p0=d+tid*p,xx=0.0,kk=0;kk<plp[q];kk++) xx += X[ki+n*jjq[kk]]*p0[jjq[kk]]; 
+            xx *= v;
+	    for (p0=g+tid*p,kk=0;kk<plp[j];kk++) p0[jjl[kk]] -= X[ki+n*jjl[kk]]*xx;
+	    /* now work on deriv of grad */
+	    for (xx=0.0,kk=0;kk<plp[q];kk++) xx += X[ki+n*jjq[kk]]*db[jjq[kk]+p*l];
+	    xx *= l2[ki+i2f(j,q,nlp)*n];
+	    for (kk=0;kk<plp[j];kk++) p0[jjl[kk]] += X[ki+n*jjl[kk]]*xx;
+	  }  
+	} /* neighbour loop */
+	/* at this point g contains deriv of perturbed Hessian w.r.t. log sp l multiplied by
+           the change in beta vector, d. Now the derivative of grad vect w.r.t. log(sp[l])
+           is also required */
+        if (pdef) { /* solve for R0'R0 d1 = g1 */
+	  for (p0=d1+tid*p,p3=g+tid*p,j=0;j<p;j++) p0[j] = p3[j];
+          F77_CALL(dtrsv)(&uplo,&trans,&diag,&p,R0+tid*p2,&p,d1+tid*p,&one FCONE FCONE FCONE);
+          F77_CALL(dtrsv)(&uplo,&ntrans,&diag,&p,R0+tid*p2,&p,d1+tid*p,&one FCONE FCONE FCONE);
+        } else {  /* fallback solve (R0'R0 -uu')d1 = g1 where u stored in ddbuf*/
+          j = nddbuf; *(iwork+niwork*tid) = nwork;
+	  if (use_minres) minres(R0+tid*p2,ddbuf+tid*buffer_size,g+tid*p,d1+tid*p,&p,&j,work+nwork*tid); else
+	    woodbury(R0+tid*p2,ddbuf+tid*buffer_size,g+tid*p,d1+tid*p,&p,&j,work+nwork*tid,iwork+niwork*tid);
+	  //Rprintf(" %d",j);
+        }
+	
+	for (io=io0;io<mi[i];io++)
+        for (q=0;q<nlp;q++) {
+	  jjq=jj[q];dbp = db + p*l;
+	  for (p0=d1+tid*p,xx=0.0,j=0;j<plp[q];j++) {
+	      kk = jjq[j];
+	      xx += X[ind[io]+kk*n] * (dbp[kk]-p0[kk]);
+	  }  
+          detacv[io + q*no +l*(no*nlp)] = xx;  
+        }	  
+      } /* l loop - smoothing parameters */	
+    }  
+  } /* main obs loop */
+  for (l=0;l<nlp;l++) {
+    /* iff coerceVector did not have to create a new vector then subtracting 1 from index will have changed original object in R, so need to 
+       guard against that by adding one back on again */
+    jjl=jj[l];for (i=0;i<plp[l];i++) jjl[i]++; 
+  }  
+  FREE(jj);FREE(plp);FREE(g);FREE(R0);
+  FREE(Rb);FREE(ddbuf);FREE(work);if (!use_minres) FREE(iwork);
+  for (j=i=0;i<nt;i++) j+=error[i];
+  FREE(error);
+  PROTECT(kr=allocVector(INTSXP,1));
+  INTEGER(kr)[0] = j; /* number of Hessians not positive def */
+  UNPROTECT(5+nlp);
+  return(kr);
+} /* Rncvls1 */
+
+
+
+SEXP Rncvls(SEXP x,SEXP JJ,SEXP R1,SEXP dH,SEXP L1, SEXP L2,SEXP L3,SEXP IND, SEXP MI, SEXP M, SEXP K,SEXP BETA,
+	    SEXP ETACV,SEXP DETACV,SEXP DETA,SEXP DB,SEXP DERIV,SEXP EPS,SEXP NT) {
+/* This computes the NCV for GAMLSS families, using the Cholesky factor updating approach. 
+
+   l.p.s can share coefs in this version
+
+   OMP parallelization
+
+   X[,jj[[i]]] is the model matrix for the ith linear predictor.
+   H is the penalized Hessian, and Hi its inverse (or an approximation to it since its used as a pre-conditioner).
+   dH[[i]] is the derivarive of H w.r.t. log(sp[i]); k[m[i-1]:(m[i])] index the neighbours of the ind[i]th point 
+   (including ind[i],usually), m[-1]=0 by convention. beta - model coefficients. 
+   lj contains jth derivatives of the likelihood w.r.t. the lp for each datum.
+   deta and dbeta are matrices with derivatives of the lp's and coefs in their cols. deta has the lps stacked in each 
+   column. 
+   The perturbed etas will be returned in etacv: if nm is the length of ind,then eta[q*nm+i] is the ith element of 
+   qth perturbed linear predictor.
+   The derivatives of the perturbed linear predictors are in deta: detacv[q*nm+i + l*(np*nlp)]] is the ith element of 
+   deriv of qth lp w.r.t. lth log sp. 
+*/
+  double *X,*R,*l1,*l2,*l3,*beta,*g,*R0,xx,z,*d,*d1,*eta,*deta,v,*db,*dbp,*detacv,*dh,*b,alpha,alpha0,eps,*Rb,*ddbuf,*p0,*p3,*work;
+  int **jj,*jjl,*jjq,*ind,*m,*k,n,p,nm,nlp,*plp,ii,i,j,i0,i1,l,ln,ki,p2,q,r,l2i,one=1,kk,nsp,*error,deriv,nddbuf,
+    *mi,io,io0,no,pdef=1,maxn,buffer_size=0,nwork=0,nt=1,tid=0,*iwork=NULL,use_minres=1,niwork=0;
+  SEXP JJp,kr,DH;
+  char trans = 'T',ntrans = 'N',uplo='U',diag='N';
+  p = length(BETA);p2 = p*p;
+  n = nrows(x);deriv = asInteger(DERIV);
+  eps = asReal(EPS);
+  M = PROTECT(coerceVector(M,INTSXP));
+  MI = PROTECT(coerceVector(MI,INTSXP));
+  IND = PROTECT(coerceVector(IND,INTSXP));
+  K = PROTECT(coerceVector(K,INTSXP)); /* otherwise R might be storing as double on entry */
+  mi = INTEGER(MI); m = INTEGER(M); k = INTEGER(K);ind = INTEGER(IND);
+  l1 = REAL(L1);l2=REAL(L2);
+  nm = length(M);nlp = length(JJ);nsp = length(dH);no=length(IND);
+  eta = REAL(ETACV);beta = REAL(BETA);R=REAL(R1);
+
+  if (deriv) {
+    l3=REAL(L3);deta=REAL(DETA);detacv=REAL(DETACV);db = REAL(DB);
+  } else l3=deta=detacv=db=NULL;
+  /* unpack the jj indices to here in order to avoid repeated list lookups withn loop */
+  jj = (int **)CALLOC((size_t) nlp,sizeof(int *));
+  plp = (int *)CALLOC((size_t) nlp,sizeof(int));
+  for (l=0;l<nlp;l++) {
+    JJp = VECTOR_ELT(JJ, l);JJp = PROTECT(coerceVector(JJp,INTSXP)); /* see R extensions 5.9.1 Handling the effects of garbage collection */
+    plp[l] = length(JJp); jj[l] = INTEGER(JJp); /* jj[l][1:plp[l]] indexes cols of X for this lp */
+    jjl=jj[l];for (i=0;i<plp[l];i++) jjl[i]--; /* 5.9.3 Details of R types. In fact coerceVector creates new vector only if type needs to change. */
+  }
+  /* need to know largest neighbourhood to create storage buffer for problematic updates*/
+  maxn = ii = 0;
+  for (j=0;j<nm;j++) {
+    i = m[j]; if (i-ii>maxn) maxn = i-ii; ii = i;
+  }
+  #ifdef _OPENMP
+  nt = asInteger(NT);
+  #endif
+  
+  X = REAL(x);
+  g = (double *)CALLOC((size_t) (3*p+nlp)*nt,sizeof(double)); /* gradient change */
+  d = g + p*nt; /* change in beta */
+  d1 = d + p*nt; /* deriv of above */
+  b = d1 + p*nt; /* multipliers on spurious leading diagonal blocks - length nlp*nt */
+  R0 = (double *)CALLOC((size_t) p2*nt,sizeof(double)); /* chol factor of perturbed Hessian */
+  Rb = (double *)CALLOC((size_t) p2*nt,sizeof(double)); /* back up of chol factor of perturbed Hessian, in case of downdate failures */
+  niwork = maxn*nlp*(nlp+1)/2; /* max rank of low rank downdate */
+  if (use_minres) {
+    nwork =  p*(niwork+7)+niwork;   
+  } else {
+    iwork = (int *)CALLOC((size_t)niwork*nt,sizeof(int));
+    /* workspace query (d not referenced) ... */
+    nwork = -1;z = 1.0;i=1;
+    F77_CALL(dsysv)(&uplo,&niwork,&i,d,&niwork,iwork,d,&niwork,&xx,&nwork,&j FCONE);
+    nwork = (int) ceil(xx);
+    nwork += niwork*(p+1+niwork);
+  }
+  work = (double *)CALLOC((size_t) nwork*nt,sizeof(double));
+  /* need to create buffer for skipped down-dates */
+  buffer_size = p*niwork; 
+  ddbuf = (double *)CALLOC((size_t) buffer_size*nt,sizeof(double));
+  error = (int *)CALLOC((size_t)nt,sizeof(int));	
+  #ifdef _OPENMP
+  /* schedule: static, dynamic or guided - seems hard to do better than guided */
+#pragma omp parallel for schedule(guided) private(i,ii,i0,io,io0,tid,l,pdef,nddbuf,ki,j,l2i,jjl,ln,alpha0,jjq,alpha,xx,p0,p3,q,i1,DH,dh,z,v,r,kk,dbp) num_threads(nt)
+  #endif
+  for (i=0;i<nm;i++) { /* loop over folds, k[ii] is start of neighbourhood of i */
+    if (!i) io=ii=0;else {io=mi[i-1];ii=m[i-1];} 
+    #ifdef _OPENMP
+    tid = omp_get_thread_num();
+    #endif
+    i0=ii;io0=io; /* record start of neigbourhood record */
+    /* start with the change in gradient term */
+    for (p0=g+p*tid,l=0;l<p;l++) p0[l] = 0.0; /* have to clear g first as multiple lps may be added */
+    for (p0=R0+p2*tid,l=0;l<p2;l++) p0[l] = R[l]; /* R0=R, Hessian Chol Factor before dropping of neighbours */
+    pdef=1; /* positive definiteness status of R0  */
+    nddbuf = 0; /* number of skipped down-dates */
+    for (;ii<m[i];ii++) { /* loop over neighbours */
+      ki=k[ii]; /* neighbour index */
+      for (p0=b+nlp*tid,j=0;j<nlp;j++) p0[j] = 0.0; /* clear b */
+      for (kk=0;kk<2;kk++) /* do updates when kk==0 and downdates when kk==1 */
+      for (l2i=l=0;l<nlp;l++) { /* loop over linear predictors */
+        jjl = jj[l];ln=l*n;
+        if (!kk) for (p0=g+p*tid,j=0;j<plp[l];j++) p0[jjl[j]] += X[n*jjl[j]+ki] * l1[ln+ki]; /* accumulate grad change into g */
+        alpha0 = l2[l2i*n+ki];l2i++;
+	for (q=l+1;q<nlp;q++,l2i++) { /* Hessian block loop X[,jj[l]]' %*% l2*X[,jj[q]] */
+          /* H is penalized *negative* Hessian, so dropped part needs to be *added* */
+          jjq = jj[q];alpha = l2[l2i*n+ki];
+	  /* Now record multipliers on spuriously generated leading diagonal blocks... */
+	  if (!kk) { *(b+l+nlp*tid) += alpha;*(b+q+nlp*tid) += alpha;}
+          if ((!kk&&alpha>0)||(kk&&alpha<0)) { /* updates only on first pass, downdates only on second */
+	    xx = sqrt(fabs(alpha));
+	    for (p0=d+p*tid,j=0;j<p;j++) p0[j] = 0.0; /* clear d */
+	    for (j=0;j<plp[l];j++) p0[jjl[j]] =  X[jjl[j]*n+ki]*xx; /* fill d */
+	    for (j=0;j<plp[q];j++) p0[jjq[j]] +=  X[jjq[j]*n+ki]*xx; /* fill d */
+            if (alpha>0) j=1; else {
+	      for (p0=Rb+p2*tid,p3=R0+tid*p2,j=0;j<p;j++,p0+=p,p3+=p) for (q=0;q<=j;q++) p0[q] = p3[q]; /* backup state of R0 before attempting downdate */
+              j=0; /* downdate indicator*/
+	    }  
+	    chol_up(R0+p2*tid,d+tid*p,&p,&j,&eps);/* add/subtract correction */
+	    if (!j && *(R0+1+p2*tid) < -0.5) { /* downdate not pdef - restore R0 and store downdate */
+              pdef=0;*(R0+1+p2*tid) = 0.0;
+	      for (p0=Rb+tid*p2,p3=R0+tid*p2,j=0;j<p;j++,p0+=p,p3+=p) for (q=0;q<=j;q++) p3[q] = p0[q]; /* restore to state before update attempt */
+	      for (p0=ddbuf+p*nddbuf+tid*buffer_size,p3=d+tid*p,j=0;j<p;j++) p0[j] = p3[j]; /* store the skipped update */
+              nddbuf++;
+	    }  
+	  }  
+	}  /* lp loop q */
+	/* now update the lth leading diagonal block, removing the nuisance block already formed */
+	alpha = alpha0 - *(b+l+tid*nlp);
+	if ((!kk&&alpha>0)||(kk&&alpha<0)) { /* updates only on first pass, downdates only on second */
+	  xx = sqrt(fabs(alpha));for (p0=d+tid*p,j=0;j<p;j++) p0[j] = 0.0;
+	  for (j=0;j<plp[l];j++) p0[jjl[j]] =  xx*X[jjl[j]*n+ki];
+	
+	  if (alpha>0) { /* add or subtract correction? */
+	    j=1; /* add */
+	  } else { /* subtract */
+            for (p0=Rb+p2*tid,p3=R0+tid*p2,j=0;j<p;j++,p0+=p,p3+=p) for (q=0;q<=j;q++) p0[q] = p3[q]; /* backup state of R0 before attempting downdate */
+            j=0; 
+	  }
+	  chol_up(R0+tid*p2,d+tid*p,&p,&j,&eps);
+	  if (!j && *(R0+1+p2*tid) < -0.5) { /* did update fail to be positive definite? */
+	    pdef=0;*(R0+1+p2*tid) = 0.0;
+	 
+	    for (p0=Rb+tid*p2,p3=R0+tid*p2,j=0;j<p;j++,p0+=p,p3+=p) for (q=0;q<=j;q++) p3[q] = p0[q]; /* restore to state before update attempt */
+	    for (p0=ddbuf+p*nddbuf+tid*buffer_size,p3=d+tid*p,j=0;j<p;j++) p0[j] = p3[j]; /* store the skipped update */
+            nddbuf++;
+	  }  
+        } 
+      } /* lp loop l */
+    
+    } /* neighbour loop */
+    
+    if (pdef) { /* solve for R0'R0 d = g - the change in beta caused by dropping neighbourhood i */
+      for (p0=d+tid*p,p3=g+tid*p,j=0;j<p;j++) p0[j] = p3[j];
+      F77_CALL(dtrsv)(&uplo,&trans,&diag,&p,R0+tid*p2,&p,d+tid*p,&one FCONE FCONE FCONE);
+      F77_CALL(dtrsv)(&uplo,&ntrans,&diag,&p,R0+tid*p2,&p,d+tid*p,&one FCONE FCONE FCONE);
+    } else {  /* fallback solve (R0'R0 - uu') d= g where u is stored in ddbuf */
+      error[tid]++; /* count the number of non +ve def cases */ 
+    
+      j = nddbuf; /* modified to number of iterations on exit */
+      if (use_minres) minres(R0+tid*p2,ddbuf+buffer_size*tid,g+tid*p,d+tid*p,&p,&j,work+nwork*tid); else {
+        *(iwork+niwork*tid) = nwork;
+	woodbury(R0+tid*p2,ddbuf+buffer_size*tid,g+tid*p,d+tid*p,&p,&j,work+nwork*tid,iwork+niwork*tid);
+      }	
+      //Rprintf(" fold=%d iter=%d updates=%d\n",i,j,nddbuf);
+    }
+	
+    /* now create the linear predictors for the ith fold. d contains the perturbation to beta */
+    for (;io<mi[i];io++)
+    for (l=0;l<nlp;l++) {
+      ln = no*l;jjl = jj[l];i1=ind[io];
+      for (p0=d+tid*p,xx=0.0,j=0;j<plp[l];j++) {
+	  q = jjl[j];
+	  xx += X[n*q+i1] * (beta[q]-p0[q]); 
+      }
+      eta[ln+io] = xx;	
+    }
+    if (deriv) { /* get the derivatives of the linear predictors */
+      for (l=0;l<nsp;l++) {
+	ln = l*nlp;
+	DH = VECTOR_ELT(dH, l);dh = REAL(DH);
+	xx=0.0;z=1.0;
+        F77_CALL(dgemv)(&ntrans,&p,&p,&z,dh,&p,d+tid*p,&one,&xx,g+tid*p,&one FCONE); /* g = dH_l d */
+        for (i1=i0;i1<m[i];i1++) { /* neighbour loop */
+	  ki=k[i1]; /* current neighbour of interest */
+	  for (j=0;j<nlp;j++) for (q=0;q<nlp;q++) { /* have to do both triangles to avoid needing full matrix */
+	    v = l3[i3f(j,q,0,nlp)*n+ki]*deta[ln*n+ki];
+	    for (r=1;r<nlp;r++) v += l3[i3f(j,q,r,nlp)*n+ki]*deta[(ln+r)*n+ki];
+	    jjl=jj[j];jjq=jj[q];
+	    for (p0=d+tid*p,xx=0.0,kk=0;kk<plp[q];kk++) xx += X[ki+n*jjq[kk]]*p0[jjq[kk]]; 
+            xx *= v;
+	    for (p0=g+tid*p,kk=0;kk<plp[j];kk++) p0[jjl[kk]] -= X[ki+n*jjl[kk]]*xx;
+	    /* now work on deriv of grad */
+	    for (xx=0.0,kk=0;kk<plp[q];kk++) xx += X[ki+n*jjq[kk]]*db[jjq[kk]+p*l];
+	    xx *= l2[ki+i2f(j,q,nlp)*n];
+	    for (kk=0;kk<plp[j];kk++) p0[jjl[kk]] += X[ki+n*jjl[kk]]*xx;
+	  }  
+	} /* neighbour loop */
+	/* at this point g contains deriv of perturbed Hessian w.r.t. log sp l multiplied by
+           the change in beta vector, d. Now the derivative of grad vect w.r.t. log(sp[l])
+           is also required */
+        if (pdef) { /* solve for R0'R0 d1 = g1 */
+	  for (p0=d1+tid*p,p3=g+tid*p,j=0;j<p;j++) p0[j] = p3[j];
+          F77_CALL(dtrsv)(&uplo,&trans,&diag,&p,R0+tid*p2,&p,d1+tid*p,&one FCONE FCONE FCONE);
+          F77_CALL(dtrsv)(&uplo,&ntrans,&diag,&p,R0+tid*p2,&p,d1+tid*p,&one FCONE FCONE FCONE);
+        } else {  /* fallback solve (R0'R0 -uu')d1 = g1 where u stored in ddbuf*/
+          j = nddbuf; 
+	  if (use_minres) minres(R0+tid*p2,ddbuf+tid*buffer_size,g+tid*p,d1+tid*p,&p,&j,work+nwork*tid); else {
+            *(iwork+niwork*tid) = nwork;
+	    woodbury(R0+tid*p2,ddbuf+tid*buffer_size,g+tid*p,d1+tid*p,&p,&j,work+nwork*tid,iwork+niwork*tid);
+	  }  
+	  //Rprintf(" %d",j);
+        }
+	
+	for (io=io0;io<mi[i];io++)
+        for (q=0;q<nlp;q++) {
+	  jjq=jj[q];dbp = db + p*l;
+	  for (p0=d1+tid*p,xx=0.0,j=0;j<plp[q];j++) {
+	      kk = jjq[j];
+	      xx += X[ind[io]+kk*n] * (dbp[kk]-p0[kk]);
+	  }  
+          detacv[io + q*no +l*(no*nlp)] = xx;  
+        }	  
+      } /* l loop - smoothing parameters */	
+    }  
+  } /* main obs loop */
+  for (l=0;l<nlp;l++) {
+    /* iff coerceVector did not have to create a new vector then subtracting 1 from index will have changed original object in R, so need to 
+       guard against that by adding one back on again */
+    jjl=jj[l];for (i=0;i<plp[l];i++) jjl[i]++; 
+  }  
+  FREE(jj);FREE(plp);FREE(g);FREE(R0);
+  FREE(Rb);FREE(ddbuf);FREE(work);if (!use_minres) FREE(iwork);
+  for (j=i=0;i<nt;i++) j+=error[i];
+  FREE(error);
+  PROTECT(kr=allocVector(INTSXP,1));
+  INTEGER(kr)[0] = j; /* number of Hessians not positive def */
+  UNPROTECT(5+nlp);
+  return(kr);
+} /* Rncvls */  
diff -Nru mgcv-1.8-40/src/qp.c mgcv-1.8-41/src/qp.c
--- mgcv-1.8-40/src/qp.c	2022-02-01 10:18:29.000000000 +0000
+++ mgcv-1.8-41/src/qp.c	2022-10-18 13:59:48.000000000 +0000
@@ -39,7 +39,7 @@
 
 
 
-matrix addconQT(Q,T,a,u) matrix *Q,T,a,*u;
+matrix addconQT(matrix *Q,matrix T,matrix a,matrix *u)
 
 /* A constraint, a (a row vector), is added to the QT factorization of
    the working set. T must have been initialised square, and then had T.r
@@ -419,7 +419,7 @@
 */
 
 { matrix Q,T,Rf,PX,Py,a,P,p1,s,c,Xy,y1,u,Pd,pz,pk;
-  int k,i,j,tk,*I,*ignore,iter=0,*fixed,*delog,maxdel=100;
+  int k,i,j,tk,*I,*ignore,*fixed,*delog,maxdel=100;
   double x;
   I=(int *)CALLOC((size_t) p->r,sizeof(int)); /* I[i] is the row of Ain containing ith active constraint */
   fixed=(int *)CALLOC((size_t) p->r,sizeof(int)); /* fixed[i] is set to 1 when the corresponding inequality constraint is to be left in regardless of l.m. estimate */
@@ -456,9 +456,7 @@
   Pd=initmat(y->r,1);pz=initmat(p->r,1);pk=initmat(p->r,1);
   tk=0;             /* The number of inequality constraints currently active */
   /*printf("\nLSQ");*/
-  while(1)
-  { iter++;
-    /* Form Pd=Py-PXp and minimize ||R pz - Pd|| */
+  while(1) { /* Form Pd=Py-PXp and minimize ||R pz - Pd|| */
     vmult(&PX,p,&Pd,0); /* Pd = PXp */
     for (i=0;i<Pd.r;i++) Pd.V[i] = Py.V[i]-Pd.V[i]; /* Pd=P(y-Xp) */
     Rf.c=Rf.r=p->r-tk-Af->r; /* Restrict attention to QR factor of PXZ */
@@ -512,7 +510,7 @@
       }
     }
   }
-}
+} /* QPCLS */
 
 
 void PCLS(matrix *X,matrix *p,matrix *y,matrix *w,matrix *Ain,matrix *b,
@@ -547,7 +545,7 @@
 
 { int i,j,k,n;
   matrix z,F,W,Z,B;
-  double x,xx,*p1,*C;
+  double *p1,*C;
  
   /* form transformed data vector z */
   if (m>0) z=initmat(y->r+p->r,1);else z=initmat(y->r,1);
@@ -576,8 +574,9 @@
   QPCLS(&Z,&F,p,&z,Ain,b,Af,active); /* note that at present Z is full not HH */
   /* working out value of objective at minimum */
   B=initmat(z.r,1);matmult(B,F,*p,0,0);
-  xx=0.0;for (i=0;i<z.r;i++) { x=B.V[i]-z.V[i];xx+=x*x;}
-  /*printf("\nObjective at Minimum = %g\n",xx);*/ freemat(B);
+  /*xx=0.0;for (i=0;i<z.r;i++) { x=B.V[i]-z.V[i];xx+=x*x;}
+    printf("\nObjective at Minimum = %g\n",xx);*/
+  freemat(B);
   /* freeing storage .... */
   freemat(F);freemat(z);freemat(W);freemat(Z);
 }
diff -Nru mgcv-1.8-40/src/sparse.c mgcv-1.8-41/src/sparse.c
--- mgcv-1.8-40/src/sparse.c	2022-02-01 10:18:29.000000000 +0000
+++ mgcv-1.8-41/src/sparse.c	2022-09-17 17:09:21.000000000 +0000
@@ -662,7 +662,7 @@
     Xs[i].nk = ks[mx+i] - ks[i]; // number of index vectors for this term
     Xs[i].off = off + off_start[ks[i]]; // the .nk offset .m+1 vectors for .r 
     j = Xs[i].p[Xs[i].c]; /* number of NZs in this matrix */
-    if (j>nzmax) nzmax=j; /* largest amount of NZ storage required by any eleemnt of Xs */
+    if (j>nzmax) nzmax=j; /* largest amount of NZ storage required by any element of Xs */
     /* now Cache its transpose... */
     spalloc(Xt+i,Xs[i].m,j);
     cs_trans(Xs[i].p,Xs[i].i,Xs[i].x,Xt[i].p,Xt[i].i,Xt[i].x,iwork,Xs[i].m,Xs[i].c);