diff -Nru r-cran-pscbs-0.63.0/cran-comments.md r-cran-pscbs-0.64.0/cran-comments.md --- r-cran-pscbs-0.63.0/cran-comments.md 2017-06-28 14:32:39.000000000 +0000 +++ r-cran-pscbs-0.64.0/cran-comments.md 2018-08-12 21:30:44.000000000 +0000 @@ -1,39 +1,43 @@ -# CRAN submission PSCBS 0.63.0 +# CRAN submission PSCBS 0.64.0 -on 2017-06-27 +on 2018-08-12 -I've verified that this submission causes no issues for any of the 11 reverse (recursive) package dependencies available on CRAN and Bioconductor. +I've verified that this submission causes no issues for any of the 4 reverse package dependencies available on CRAN and Bioconductor. Thanks in advance +## Notes not sent to CRAN + ### R CMD check --as-cran validation The package has been verified using `R CMD check --as-cran` on: -* Platform x86_64-apple-darwin13.4.0 (64-bit) [Travis CI]: - - R version 3.3.3 (2017-03-06) - - R version 3.4.0 (2017-04-21) - +* Platform x86_64-apple-darwin15.6.0 (64-bit) [Travis CI]: + - R version 3.4.4 (2018-03-15) + - R version 3.5.0 (2018-04-23) + * Platform x86_64-unknown-linux-gnu (64-bit) [Travis CI]: - - R version 3.3.3 (2017-03-06) - - R version 3.4.0 (2017-04-21) - - R Under development (unstable) (2017-06-27 r72858) + - R version 3.4.4 (2017-01-27) + - R version 3.5.0 (2017-01-27) + - R Under development (unstable) (2018-08-12 r75113) * Platform x86_64-pc-linux-gnu (64-bit): - R version 3.2.0 (2015-04-16) - - R version 3.2.5 (2016-04-14) - - R version 3.4.0 (2017-04-21) - -* Platform i686-pc-linux-gnu (32-bit): + - R version 3.3.0 (2016-05-03) - R version 3.4.0 (2017-04-21) * Platform i386-w64-mingw32 (32-bit) [Appveyor CI]: - - R version 3.4.0 (2017-04-21) + - R version 3.5.1 (2018-07-02) * Platform x86_64-w64-mingw32/x64 (64-bit) [Appveyor CI]: - - R version 3.4.0 (2017-04-21) + - R version 3.5.1 (2018-07-02) * Platform x86_64-w64-mingw32/x64 (64-bit) [win-builder]: - - R version 3.4.0 (2017-04-21) - - R Under development (unstable) (2017-06-26 r72857) + - R version 3.5.1 (2018-07-02) + - R Under development (unstable) (2018-08-11 r75106) + +The following setups were skipped due to non-availability: + +* Platform i686-pc-linux-gnu (32-bit): + - R version 3.4.4 (2018-03-15) diff -Nru r-cran-pscbs-0.63.0/debian/changelog r-cran-pscbs-0.64.0/debian/changelog --- r-cran-pscbs-0.63.0/debian/changelog 2018-06-18 05:37:02.000000000 +0000 +++ r-cran-pscbs-0.64.0/debian/changelog 2018-08-13 08:04:34.000000000 +0000 @@ -1,3 +1,11 @@ +r-cran-pscbs (0.64.0-1) unstable; urgency=medium + + * Team upload. + * New upstream version + * Standards-Version: 4.2.0 + + -- Andreas Tille Mon, 13 Aug 2018 10:04:34 +0200 + r-cran-pscbs (0.63.0-2) unstable; urgency=medium * Team upload. diff -Nru r-cran-pscbs-0.63.0/debian/control r-cran-pscbs-0.64.0/debian/control --- r-cran-pscbs-0.63.0/debian/control 2018-06-18 05:37:02.000000000 +0000 +++ r-cran-pscbs-0.64.0/debian/control 2018-08-13 08:04:34.000000000 +0000 @@ -15,7 +15,7 @@ r-bioc-dnacopy, r-cran-listenv, r-cran-future (>= 1.5.0) -Standards-Version: 4.1.4 +Standards-Version: 4.2.0 Vcs-Browser: https://salsa.debian.org/r-pkg-team/r-cran-pscbs Vcs-Git: https://salsa.debian.org/r-pkg-team/r-cran-pscbs.git Homepage: https://cran.r-project.org/package=PSCBS diff -Nru r-cran-pscbs-0.63.0/DESCRIPTION r-cran-pscbs-0.64.0/DESCRIPTION --- r-cran-pscbs-0.63.0/DESCRIPTION 2017-06-28 14:32:39.000000000 +0000 +++ r-cran-pscbs-0.64.0/DESCRIPTION 2018-08-12 21:30:44.000000000 +0000 @@ -1,5 +1,5 @@ Package: PSCBS -Version: 0.63.0 +Version: 0.64.0 Depends: R (>= 3.2.0), utils @@ -24,7 +24,6 @@ Recommended: Hmisc VignetteBuilder: R.rsp -Date: 2017-06-27 Title: Analysis of Parent-Specific DNA Copy Numbers Authors@R: c( person("Henrik", "Bengtsson", role=c("aut", "cre", "cph"), diff -Nru r-cran-pscbs-0.63.0/.gitignore r-cran-pscbs-0.64.0/.gitignore --- r-cran-pscbs-0.63.0/.gitignore 2017-06-28 14:32:39.000000000 +0000 +++ r-cran-pscbs-0.64.0/.gitignore 2018-08-12 21:30:44.000000000 +0000 @@ -16,4 +16,7 @@ .future .ghi .issues - +.make +revdep/data.sqlite +revdep/checks/* +revdep/library/* diff -Nru r-cran-pscbs-0.63.0/incl/OlshenA_etal_2011.Rd r-cran-pscbs-0.64.0/incl/OlshenA_etal_2011.Rd --- r-cran-pscbs-0.63.0/incl/OlshenA_etal_2011.Rd 2017-06-28 14:32:39.000000000 +0000 +++ r-cran-pscbs-0.64.0/incl/OlshenA_etal_2011.Rd 2018-08-12 21:30:44.000000000 +0000 @@ -1 +1 @@ -A.B. Olshen, H. Bengtsson, P. Neuvial, P.T. Spellman, R.A. Olshen, V.E. Seshan, \emph{Parent-specific copy number in paired tumor-normal studies using circular binary segmentation}, Bioinformatics, 2011 +A.B. Olshen, H. Bengtsson, P. Neuvial, P.T. Spellman, R.A. Olshen, V.E. Seshan, \emph{Parent-specific copy number in paired tumor-normal studies using circular binary segmentation}, Bioinformatics, 2011 diff -Nru r-cran-pscbs-0.63.0/incl/OlshenVenkatraman_2004.Rd r-cran-pscbs-0.64.0/incl/OlshenVenkatraman_2004.Rd --- r-cran-pscbs-0.63.0/incl/OlshenVenkatraman_2004.Rd 2017-06-28 14:32:39.000000000 +0000 +++ r-cran-pscbs-0.64.0/incl/OlshenVenkatraman_2004.Rd 2018-08-12 21:30:44.000000000 +0000 @@ -1 +1 @@ -A.B. Olshen, E.S. Venkatraman (aka Venkatraman E. Seshan), R. Lucito and M. Wigler, \emph{Circular binary segmentation for the analysis of array-based DNA copy number data}, Biostatistics, 2004 +A.B. Olshen, E.S. Venkatraman (aka Venkatraman E. Seshan), R. Lucito and M. Wigler, \emph{Circular binary segmentation for the analysis of array-based DNA copy number data}, Biostatistics, 2004 diff -Nru r-cran-pscbs-0.63.0/incl/segmentByCBS,knownSegments.Rex r-cran-pscbs-0.64.0/incl/segmentByCBS,knownSegments.Rex --- r-cran-pscbs-0.63.0/incl/segmentByCBS,knownSegments.Rex 2017-06-28 14:32:39.000000000 +0000 +++ r-cran-pscbs-0.64.0/incl/segmentByCBS,knownSegments.Rex 2018-08-12 21:30:44.000000000 +0000 @@ -18,8 +18,8 @@ w[650:800] <- 0.001 -subplots(6, ncol=1) -par(mar=c(2,1,1,1)+0.1); +R.utils::subplots(6, ncol=1) +par(mar=c(2,1,1,1)+0.1) # - - - - - - - - - - - - - - - - - - - - - - - - - - - - - # Segmentation @@ -88,7 +88,6 @@ print(fit5) plotTracks(fit5) abline(v=c(knownSegments$start, knownSegments$end)/1e6, lty=3) -stopifnot(nbrOfSegments(fit5) == nrow(knownSegments)); # One can also force a separator between two segments by setting diff -Nru r-cran-pscbs-0.63.0/incl/segmentByCBS,tests.Rex r-cran-pscbs-0.64.0/incl/segmentByCBS,tests.Rex --- r-cran-pscbs-0.63.0/incl/segmentByCBS,tests.Rex 2017-06-28 14:32:39.000000000 +0000 +++ r-cran-pscbs-0.64.0/incl/segmentByCBS,tests.Rex 2018-08-12 21:30:44.000000000 +0000 @@ -65,7 +65,7 @@ # Appending CBS results fit1 <- segmentByCBS(y, chromosome=1, x=x) fit2 <- segmentByCBS(y, chromosome=2, x=x) -fit <- append(fit1, fit2) +fit <- c(fit1, fit2) print(fit) plotTracks(fit, subset=NULL, lwd=2, Clim=c(-3,3)) diff -Nru r-cran-pscbs-0.63.0/incl/VenkatramanOlshen_2007.Rd r-cran-pscbs-0.64.0/incl/VenkatramanOlshen_2007.Rd --- r-cran-pscbs-0.63.0/incl/VenkatramanOlshen_2007.Rd 2017-06-28 14:32:39.000000000 +0000 +++ r-cran-pscbs-0.64.0/incl/VenkatramanOlshen_2007.Rd 2018-08-12 21:30:44.000000000 +0000 @@ -1 +1 @@ -E.S. Venkatraman and A.B. Olshen, \emph{A faster circular binary segmentation algorithm for the analysis of array CGH data}, Bioinformatics, 2007 +E.S. Venkatraman and A.B. Olshen, \emph{A faster circular binary segmentation algorithm for the analysis of array CGH data}, Bioinformatics, 2007 diff -Nru r-cran-pscbs-0.63.0/INSTALL.md r-cran-pscbs-0.64.0/INSTALL.md --- r-cran-pscbs-0.63.0/INSTALL.md 2017-06-28 14:32:39.000000000 +0000 +++ r-cran-pscbs-0.64.0/INSTALL.md 2018-08-12 21:30:44.000000000 +0000 @@ -10,12 +10,12 @@ same. To reset to non-parallel processing, use `future::plan("sequential")`. To configure this automatically whenever the package is loaded, see -future vignette '[A Future for R: Controlling Default Future Strategy](https://cran.r-project.org/web/packages/future/vignettes/future-4-startup.html)'. +future vignette '[A Future for R: Controlling Default Future Strategy](https://cran.r-project.org/web/packages/future/vignettes/future-5-startup.html)'. <% } %> ## Installation -R package <%=pkg()%> is available on [CRAN](http://cran.r-project.org/package=<%=pkg()%>) and can be installed in R as: +R package <%=pkg()%> is available on [CRAN](https://cran.r-project.org/package=<%=pkg()%>) and can be installed in R as: ```r install.packages('<%=pkg()%>') ``` diff -Nru r-cran-pscbs-0.63.0/Makefile r-cran-pscbs-0.64.0/Makefile --- r-cran-pscbs-0.63.0/Makefile 2017-06-28 14:32:39.000000000 +0000 +++ r-cran-pscbs-0.64.0/Makefile 2018-08-12 21:30:44.000000000 +0000 @@ -1 +1 @@ -include .make/Makefile +include .make/Makefile diff -Nru r-cran-pscbs-0.63.0/man/append.AbstractCBS.Rd r-cran-pscbs-0.64.0/man/append.AbstractCBS.Rd --- r-cran-pscbs-0.63.0/man/append.AbstractCBS.Rd 2017-06-28 14:32:39.000000000 +0000 +++ r-cran-pscbs-0.64.0/man/append.AbstractCBS.Rd 2018-08-12 21:30:44.000000000 +0000 @@ -20,7 +20,7 @@ } \usage{ -\method{append}{AbstractCBS}(...) +\method{append}{AbstractCBS}(x, other, addSplit=TRUE, ...) } \arguments{ diff -Nru r-cran-pscbs-0.63.0/man/append.CBS.Rd r-cran-pscbs-0.64.0/man/append.CBS.Rd --- r-cran-pscbs-0.63.0/man/append.CBS.Rd 2017-06-28 14:32:39.000000000 +0000 +++ r-cran-pscbs-0.64.0/man/append.CBS.Rd 1970-01-01 00:00:00.000000000 +0000 @@ -1,42 +0,0 @@ -%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% -% Do not modify this file since it was automatically generated from: -% -% CBS.RESTRUCT.R -% -% by the Rdoc compiler part of the R.oo package. -%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% - - -\name{append.CBS} -\alias{append.CBS} -\alias{CBS.append} -\alias{append,CBS-method} - -\title{Appends one segmentation result to another} - -\description{ - Appends one segmentation result to another. -} - -\usage{ -\method{append}{CBS}(x, other, addSplit=TRUE, ...) -} - -\arguments{ - \item{x, other}{The two \code{\link{CBS}} objects to be combined.} - \item{other}{A \code{\link{PSCBS}} object.} - \item{addSplit}{If \code{\link[base:logical]{TRUE}}, a "divider" is added between chromosomes.} - \item{...}{Not used.} -} - -\value{ - Returns a \code{\link{CBS}} object of the same class as argument \code{x}. -} - -\author{Henrik Bengtsson} - -\seealso{ - For more information see \code{\link{CBS}}. -} -\keyword{internal} -\keyword{methods} diff -Nru r-cran-pscbs-0.63.0/man/append.PSCBS.Rd r-cran-pscbs-0.64.0/man/append.PSCBS.Rd --- r-cran-pscbs-0.63.0/man/append.PSCBS.Rd 2017-06-28 14:32:39.000000000 +0000 +++ r-cran-pscbs-0.64.0/man/append.PSCBS.Rd 1970-01-01 00:00:00.000000000 +0000 @@ -1,42 +0,0 @@ -%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% -% Do not modify this file since it was automatically generated from: -% -% PSCBS.RESTRUCT.R -% -% by the Rdoc compiler part of the R.oo package. -%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% - - -\name{append.PSCBS} -\alias{append.PSCBS} -\alias{PSCBS.append} -\alias{append,PSCBS-method} - -\title{Appends one segmentation result to another} - -\description{ - Appends one segmentation result to another. -} - -\usage{ -\method{append}{PSCBS}(x, other, addSplit=TRUE, ...) -} - -\arguments{ - \item{x, other}{The two \code{\link{PSCBS}} objects to be combined.} - \item{other}{A \code{\link{PSCBS}} object.} - \item{addSplit}{If \code{\link[base:logical]{TRUE}}, a "divider" is added between chromosomes.} - \item{...}{Not used.} -} - -\value{ - Returns a \code{\link{PSCBS}} object of the same class as argument \code{x}. -} - -\author{Henrik Bengtsson} - -\seealso{ - For more information see \code{\link{PSCBS}}. -} -\keyword{internal} -\keyword{methods} diff -Nru r-cran-pscbs-0.63.0/man/callOutliers.CBS.Rd r-cran-pscbs-0.64.0/man/callOutliers.CBS.Rd --- r-cran-pscbs-0.63.0/man/callOutliers.CBS.Rd 2017-06-28 14:32:39.000000000 +0000 +++ r-cran-pscbs-0.64.0/man/callOutliers.CBS.Rd 2018-08-12 21:30:44.000000000 +0000 @@ -37,7 +37,7 @@ } \section{The UCSF caller}{ - If \code{method == "ucsf-mad"}, then loci are called using [1]; + If \code{method == "ucsf-mad"}, then loci are called using [1] "Finally, to identify single technical or biological outliers such as high level amplifications, the presence of the outliers within a segment was allowed by assigning the original observed log2ratio diff -Nru r-cran-pscbs-0.63.0/man/CBS.Rd r-cran-pscbs-0.64.0/man/CBS.Rd --- r-cran-pscbs-0.63.0/man/CBS.Rd 2017-06-28 14:32:39.000000000 +0000 +++ r-cran-pscbs-0.64.0/man/CBS.Rd 2018-08-12 21:30:44.000000000 +0000 @@ -47,8 +47,8 @@ \section{Fields and Methods}{ \bold{Methods:}\cr \tabular{rll}{ - \tab \code{append} \tab -\cr \tab \code{as} \tab -\cr + \tab \code{c} \tab -\cr \tab \code{estimateStandardDeviation} \tab -\cr \tab \code{plotTracks} \tab -\cr \tab \code{pruneBySdUndo} \tab -\cr diff -Nru r-cran-pscbs-0.63.0/man/c.CBS.Rd r-cran-pscbs-0.64.0/man/c.CBS.Rd --- r-cran-pscbs-0.63.0/man/c.CBS.Rd 1970-01-01 00:00:00.000000000 +0000 +++ r-cran-pscbs-0.64.0/man/c.CBS.Rd 2018-08-12 21:30:44.000000000 +0000 @@ -0,0 +1,41 @@ +%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% +% Do not modify this file since it was automatically generated from: +% +% CBS.RESTRUCT.R +% +% by the Rdoc compiler part of the R.oo package. +%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% + + +\name{c.CBS} +\alias{c.CBS} +\alias{CBS.c} +\alias{c,CBS-method} +\alias{c.PSCBS} + +\title{Concatenates segmentation results} + +\description{ + Concatenates segmentation results. +} + +\usage{ +\method{c}{CBS}(..., addSplit=TRUE) +} + +\arguments{ + \item{\dots}{One or more \code{\link{AbstractCBS}} objects to be combined.} + \item{addSplit}{If \code{\link[base:logical]{TRUE}}, a "divider" is added between chromosomes.} +} + +\value{ + Returns an \code{\link{AbstractCBS}} object of the same class in \dots. +} + +\author{Henrik Bengtsson} + +\seealso{ + For more information see \code{\link{CBS}}. +} +\keyword{internal} +\keyword{methods} diff -Nru r-cran-pscbs-0.63.0/man/NonPairedPSCBS.Rd r-cran-pscbs-0.64.0/man/NonPairedPSCBS.Rd --- r-cran-pscbs-0.63.0/man/NonPairedPSCBS.Rd 2017-06-28 14:32:39.000000000 +0000 +++ r-cran-pscbs-0.64.0/man/NonPairedPSCBS.Rd 2018-08-12 21:30:44.000000000 +0000 @@ -50,7 +50,7 @@ \bold{Methods inherited from PSCBS}:\cr -append, as.data.frame, drawChangePoints, extractChromosomes, extractWIG, getLocusData, getLocusSignalNames, getSegmentTrackPrefixes, isLocallyPhased, isSegmentSplitter, normalizeTotalCNs, writeSegments +as.data.frame, c, drawChangePoints, extractChromosomes, extractWIG, getLocusData, getLocusSignalNames, getSegmentTrackPrefixes, isLocallyPhased, isSegmentSplitter, normalizeTotalCNs, writeSegments \bold{Methods inherited from AbstractCBS}:\cr adjustPloidyScale, all.equal, append, as.data.frame, clearCalls, drawChangePoints, drawKnownSegments, dropChangePoint, dropChangePoints, dropRegion, dropRegions, extractCNs, extractChromosome, extractChromosomes, extractRegions, extractSegments, extractWIG, getChangePoints, getChromosomeOffsets, getChromosomeRanges, getChromosomes, getLocusData, getLocusSignalNames, getMeanEstimators, getSampleName, getSegmentSizes, getSegmentTrackPrefixes, getSegments, load, mergeThreeSegments, mergeTwoSegments, nbrOfChangePoints, nbrOfChromosomes, nbrOfLoci, nbrOfSegments, normalizeTotalCNs, ploidy, ploidy<-, plotTracks, print, pruneByDP, pruneByHClust, renameChromosomes, report, resegment, resetSegments, sampleCNs, sampleName, sampleName<-, save, seqOfSegmentsByDP, setLocusData, setMeanEstimators, setPloidy, setSampleName, setSegments, shiftTCN, tileChromosomes, updateMeans, writeWIG diff -Nru r-cran-pscbs-0.63.0/man/PairedPSCBS.Rd r-cran-pscbs-0.64.0/man/PairedPSCBS.Rd --- r-cran-pscbs-0.63.0/man/PairedPSCBS.Rd 2017-06-28 14:32:39.000000000 +0000 +++ r-cran-pscbs-0.64.0/man/PairedPSCBS.Rd 2018-08-12 21:30:44.000000000 +0000 @@ -68,7 +68,7 @@ \bold{Methods inherited from PSCBS}:\cr -append, as.data.frame, drawChangePoints, extractChromosomes, extractWIG, getLocusData, getLocusSignalNames, getSegmentTrackPrefixes, isLocallyPhased, isSegmentSplitter, normalizeTotalCNs, writeSegments +as.data.frame, c, drawChangePoints, extractChromosomes, extractWIG, getLocusData, getLocusSignalNames, getSegmentTrackPrefixes, isLocallyPhased, isSegmentSplitter, normalizeTotalCNs, writeSegments \bold{Methods inherited from AbstractCBS}:\cr adjustPloidyScale, all.equal, append, as.data.frame, clearCalls, drawChangePoints, drawKnownSegments, dropChangePoint, dropChangePoints, dropRegion, dropRegions, extractCNs, extractChromosome, extractChromosomes, extractRegions, extractSegments, extractWIG, getChangePoints, getChromosomeOffsets, getChromosomeRanges, getChromosomes, getLocusData, getLocusSignalNames, getMeanEstimators, getSampleName, getSegmentSizes, getSegmentTrackPrefixes, getSegments, load, mergeThreeSegments, mergeTwoSegments, nbrOfChangePoints, nbrOfChromosomes, nbrOfLoci, nbrOfSegments, normalizeTotalCNs, ploidy, ploidy<-, plotTracks, print, pruneByDP, pruneByHClust, renameChromosomes, report, resegment, resetSegments, sampleCNs, sampleName, sampleName<-, save, seqOfSegmentsByDP, setLocusData, setMeanEstimators, setPloidy, setSampleName, setSegments, shiftTCN, tileChromosomes, updateMeans, writeWIG diff -Nru r-cran-pscbs-0.63.0/man/pruneByDP.AbstractCBS.Rd r-cran-pscbs-0.64.0/man/pruneByDP.AbstractCBS.Rd --- r-cran-pscbs-0.63.0/man/pruneByDP.AbstractCBS.Rd 2017-06-28 14:32:39.000000000 +0000 +++ r-cran-pscbs-0.64.0/man/pruneByDP.AbstractCBS.Rd 2018-08-12 21:30:44.000000000 +0000 @@ -37,7 +37,7 @@ \examples{\dontrun{ # Drop two segments - fitP <- pruneByDP(fit, nbrOfSegments=-2); + fitP <- pruneByDP(fit, nbrOfSegments=-2) }} \author{Henrik Bengtsson, Pierre Neuvial} diff -Nru r-cran-pscbs-0.63.0/man/pruneByHClust.AbstractCBS.Rd r-cran-pscbs-0.64.0/man/pruneByHClust.AbstractCBS.Rd --- r-cran-pscbs-0.63.0/man/pruneByHClust.AbstractCBS.Rd 2017-06-28 14:32:39.000000000 +0000 +++ r-cran-pscbs-0.64.0/man/pruneByHClust.AbstractCBS.Rd 2018-08-12 21:30:44.000000000 +0000 @@ -38,7 +38,7 @@ } \examples{\dontrun{ - fitP <- pruneByHClust(fit, h=0.25); + fitP <- pruneByHClust(fit, h=0.25) }} \author{Henrik Bengtsson} diff -Nru r-cran-pscbs-0.63.0/man/PSCBS.Rd r-cran-pscbs-0.64.0/man/PSCBS.Rd --- r-cran-pscbs-0.63.0/man/PSCBS.Rd 2017-06-28 14:32:39.000000000 +0000 +++ r-cran-pscbs-0.64.0/man/PSCBS.Rd 2018-08-12 21:30:44.000000000 +0000 @@ -45,7 +45,7 @@ \section{Fields and Methods}{ \bold{Methods:}\cr \tabular{rll}{ - \tab \code{append} \tab -\cr + \tab \code{c} \tab -\cr \tab \code{isLocallyPhased} \tab -\cr \tab \code{normalizeTotalCNs} \tab -\cr \tab \code{writeSegments} \tab -\cr diff -Nru r-cran-pscbs-0.63.0/man/segmentByCBS.Rd r-cran-pscbs-0.64.0/man/segmentByCBS.Rd --- r-cran-pscbs-0.63.0/man/segmentByCBS.Rd 2017-06-28 14:32:39.000000000 +0000 +++ r-cran-pscbs-0.64.0/man/segmentByCBS.Rd 2018-08-12 21:30:44.000000000 +0000 @@ -205,7 +205,7 @@ # Appending CBS results fit1 <- segmentByCBS(y, chromosome=1, x=x) fit2 <- segmentByCBS(y, chromosome=2, x=x) -fit <- append(fit1, fit2) +fit <- c(fit1, fit2) print(fit) plotTracks(fit, subset=NULL, lwd=2, Clim=c(-3,3)) diff -Nru r-cran-pscbs-0.63.0/man/segmentByPairedPSCBS.Rd r-cran-pscbs-0.64.0/man/segmentByPairedPSCBS.Rd --- r-cran-pscbs-0.63.0/man/segmentByPairedPSCBS.Rd 2017-06-28 14:32:39.000000000 +0000 +++ r-cran-pscbs-0.64.0/man/segmentByPairedPSCBS.Rd 2018-08-12 21:30:44.000000000 +0000 @@ -27,8 +27,8 @@ rho=NULL, chromosome=0, x=NULL, alphaTCN=0.009, alphaDH=0.001, undoTCN=0, undoDH=0, ..., avgTCN=c("mean", "median"), avgDH=c("mean", "median"), flavor=c("tcn&dh", "tcn,dh", "sqrt(tcn),dh", "sqrt(tcn)&dh", "tcn"), tbn=is.null(rho), - preserveScale=getOption("PSCBS/preserveScale", FALSE), joinSegments=TRUE, - knownSegments=NULL, dropMissingCT=TRUE, seed=NULL, verbose=FALSE) + joinSegments=TRUE, knownSegments=NULL, dropMissingCT=TRUE, seed=NULL, verbose=FALSE, + preserveScale=FALSE) } \arguments{ @@ -78,8 +78,6 @@ calling algorithm to be used.} \item{tbn}{If \code{\link[base:logical]{TRUE}}, \code{betaT} is normalized before segmentation using the TumorBoost method [2], otherwise not.} - \item{preserveScale}{Passed to \code{\link[aroma.light]{normalizeTumorBoost}}, - which is only called if \code{tbn} is \code{\link[base:logical]{TRUE}}.} \item{joinSegments}{If \code{\link[base:logical]{TRUE}}, there are no gaps between neighboring segments. If \code{\link[base:logical]{FALSE}}, the boundaries of a segment are defined by the support @@ -98,6 +96,8 @@ set before calling the segmentation method. The random seed is set to its original state when exiting. If \code{\link[base]{NULL}}, it is not set.} \item{verbose}{See \code{\link[R.utils]{Verbose}}.} + \item{preserveScale}{\emph{Deprecated and ignored + will give a warning if specified.}} } \value{ diff -Nru r-cran-pscbs-0.63.0/NAMESPACE r-cran-pscbs-0.64.0/NAMESPACE --- r-cran-pscbs-0.63.0/NAMESPACE 2017-06-28 14:32:39.000000000 +0000 +++ r-cran-pscbs-0.64.0/NAMESPACE 2018-08-12 21:30:44.000000000 +0000 @@ -149,10 +149,10 @@ # CBS S3method("all.equal", "CBS") -S3method("append", "CBS") S3method("as.character", "CBS") S3method("as.data.frame", "CBS") S3method("as.DNAcopy", "CBS") +S3method("c", "CBS") S3method("callAmplifications", "CBS") S3method("callArms", "CBS") S3method("callGainsAndLosses", "CBS") @@ -232,7 +232,6 @@ S3method("segmentByPairedPSCBS", "data.frame") # default -S3method("append", "default") S3method("callAllelicBalance", "default") S3method("callSegmentationOutliers", "default") S3method("dropSegmentationOutliers", "default") @@ -275,7 +274,6 @@ S3method("arrowsC1C2", "PairedPSCBS") S3method("arrowsDeltaC1C2", "PairedPSCBS") S3method("bootstrapCIs", "PairedPSCBS") -S3method("bootstrapDHByRegion", "PairedPSCBS") S3method("bootstrapSegmentsAndChangepoints", "PairedPSCBS") S3method("bootstrapTCNandDHByRegion", "PairedPSCBS") S3method("calcStatsForCopyNeutralABs", "PairedPSCBS") @@ -351,8 +349,8 @@ S3method("updateMeansTogether", "PairedPSCBS") # PSCBS -S3method("append", "PSCBS") S3method("as.data.frame", "PSCBS") +S3method("c", "PSCBS") S3method("drawChangePoints", "PSCBS") S3method("extractChromosomes", "PSCBS") S3method("getChangePoints", "PSCBS") diff -Nru r-cran-pscbs-0.63.0/NEWS r-cran-pscbs-0.64.0/NEWS --- r-cran-pscbs-0.63.0/NEWS 2017-06-28 14:32:39.000000000 +0000 +++ r-cran-pscbs-0.64.0/NEWS 2018-08-12 21:30:44.000000000 +0000 @@ -1,6 +1,34 @@ Package: PSCBS ============== +Version: 0.64.0 [2018-08-12] + +NEW FEATURES: + + o Added c() for CBS and PSCBS objects. + +PERFORMANCE: + + o segmentByCBS() no longer performs garbage collection, which happened + indirectly via a system.time() call that does GC by default. + +BUG FIXES: + + o plotTrack() for CBS objects would produce error "Argument 'Clim' is not a + vector: NULL" when the signal type was unknown. Now it will assume + (non-logged) copy-number ratios so it can choose a default 'Clim' range. + +DEPRECATED AND DEFUNCT: + + o Removed bootstrapDHByRegion() which was defunct since 0.44.0 (Feb 2015). + + o append(x, y) for CBS and PSCBS objects is deprecated; use c(x, y) instead. + + o Usage of argument 'preserveScale' for segmentByPairedPSCBS() is now + deprecated and ignored. It's value is now fixed to FALSE, which has been + the default since PSCBS 0.50.0 (Oct 2015). + + Version: 0.63.0 [2017-06-27] SIGNIFICANT CHANGES: diff -Nru r-cran-pscbs-0.63.0/R/006.fixVarArgs.R r-cran-pscbs-0.64.0/R/006.fixVarArgs.R --- r-cran-pscbs-0.63.0/R/006.fixVarArgs.R 2017-06-28 14:32:39.000000000 +0000 +++ r-cran-pscbs-0.64.0/R/006.fixVarArgs.R 2018-08-12 21:30:44.000000000 +0000 @@ -1,16 +1,4 @@ -append <- function(...) UseMethod("append"); +append <- function(...) UseMethod("append") setMethodS3("append", "default", function(...) { - base::append(...); + base::append(...) }) - - -############################################################################ -# HISTORY: -# 2013-10-14 [HB] -# o ROBUSTNESS: The overriding of append() to become a generic -# function does now call base::append() in the default, instead -# of copy the latter. All this will eventually be removed, -# when proper support for c, [, [[ etc. has been added everywhere. -# 2010-10-02 -# o Created to please R CMD check. -############################################################################ diff -Nru r-cran-pscbs-0.63.0/R/999.DEPRECATED.R r-cran-pscbs-0.64.0/R/999.DEPRECATED.R --- r-cran-pscbs-0.63.0/R/999.DEPRECATED.R 2017-06-28 14:32:39.000000000 +0000 +++ r-cran-pscbs-0.64.0/R/999.DEPRECATED.R 2018-08-12 21:30:44.000000000 +0000 @@ -1,10 +1,10 @@ ## - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - +## DEPRECATED +## - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - + +## - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - ## DEFUNCT ## - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -## Defunct since v0.41.0 (2015-03-30) -setMethodS3("bootstrapDHByRegion", "PairedPSCBS", function(fit, B=100, statsFcn=function(x) quantile(x, probs=c(0.025, 0.050, 0.95, 0.975)), by=c("betaTN", "betaT"), ..., force=FALSE, verbose=FALSE) { - .Defunct("bootstrapTCNandDHByRegion"); -}, deprecated=TRUE) # bootstrapDHByRegion() diff -Nru r-cran-pscbs-0.63.0/R/999.NonDocumentedObjects.R r-cran-pscbs-0.64.0/R/999.NonDocumentedObjects.R --- r-cran-pscbs-0.63.0/R/999.NonDocumentedObjects.R 2017-06-28 14:32:39.000000000 +0000 +++ r-cran-pscbs-0.64.0/R/999.NonDocumentedObjects.R 2018-08-12 21:30:44.000000000 +0000 @@ -20,9 +20,3 @@ # # @keyword internal #*/########################################################################### - -############################################################################ -# HISTORY: -# 2005-05-15 -# o Created to please R CMD check. -############################################################################ diff -Nru r-cran-pscbs-0.63.0/R/AbstractCBS.clearCalls.R r-cran-pscbs-0.64.0/R/AbstractCBS.clearCalls.R --- r-cran-pscbs-0.63.0/R/AbstractCBS.clearCalls.R 2017-06-28 14:32:39.000000000 +0000 +++ r-cran-pscbs-0.64.0/R/AbstractCBS.clearCalls.R 2018-08-12 21:30:44.000000000 +0000 @@ -1,30 +1,30 @@ -# Removes all segment calls and corresponding parameter estimates. -setMethodS3("clearCalls", "AbstractCBS", function(fit, ...) { - segs <- fit$output; - params <- fit$params; - - # Drop all calls - excl <- grep("Call$", colnames(segs)); - if (length(excl) > 0L) { - segs <- segs[,-excl]; - } - - # Drop all call parameters (AD HOC!) - for (ff in c("deltaROH", "deltaAB", "deltaLOH")) { - params[[ff]] <- NULL; - } - - fit$output <- segs; - fit$params <- params; - - invisible(fit); -}, protected=TRUE) - - - -############################################################################## -# HISTORY -# 2013-10-24 -# o Added clearCalls() for AbstractCBS. -# o Created. -############################################################################## +# Removes all segment calls and corresponding parameter estimates. +setMethodS3("clearCalls", "AbstractCBS", function(fit, ...) { + segs <- fit$output + params <- fit$params + + # Drop all calls + excl <- grep("Call$", colnames(segs)) + if (length(excl) > 0L) { + segs <- segs[,-excl] + } + + # Drop all call parameters (AD HOC!) + for (ff in c("deltaROH", "deltaAB", "deltaLOH")) { + params[[ff]] <- NULL + } + + fit$output <- segs + fit$params <- params + + invisible(fit) +}, protected=TRUE) + + + +############################################################################## +# HISTORY +# 2013-10-24 +# o Added clearCalls() for AbstractCBS. +# o Created. +############################################################################## diff -Nru r-cran-pscbs-0.63.0/R/AbstractCBS.HCLUST.R r-cran-pscbs-0.64.0/R/AbstractCBS.HCLUST.R --- r-cran-pscbs-0.63.0/R/AbstractCBS.HCLUST.R 2017-06-28 14:32:39.000000000 +0000 +++ r-cran-pscbs-0.64.0/R/AbstractCBS.HCLUST.R 2018-08-12 21:30:44.000000000 +0000 @@ -1,263 +1,235 @@ -###########################################################################/** -# @set "class=AbstractCBS" -# @RdocMethod updateMeansTogether -# @alias updateMeansTogether.CBS -# @alias updateMeansTogether.PairedPSCBS -# -# @title "Updates the CN mean levels jointly in sets of segments" -# -# \description{ -# @get "title" as if they were one large segment. -# The locus-level data is not updated/modified. -# } -# -# @synopsis -# -# \arguments{ -# \item{idxList}{A @list, where each element is an @integer @vector -# specifying segment indices of segments for which the mean levels -# should be calculated jointly.} -# \item{...}{Not used.} -# \item{verbose}{See @see "R.utils::Verbose".} -# } -# -# \value{ -# Returns an object of the same class. -# } -# -# @author "HB" -# -# \seealso{ -# This method is utilized by @seemethod "pruneByHClust". -# } -# -# @keyword internal -#*/########################################################################### -setMethodS3("updateMeansTogether", "AbstractCBS", abstract=TRUE, private=TRUE); - - -###########################################################################/** -# @set "class=AbstractCBS" -# @RdocMethod hclustCNs -# -# @title "Performs a hierarchical clustering of the CN mean levels" -# -# \description{ -# @get "title". -# } -# -# @synopsis -# -# \arguments{ -# \item{size}{Argument passed to @seemethod "sampleCNs".} -# \item{distMethod, hclustMethod}{Argument \code{method} for -# @see "stats::dist" and "stats::hclust", respectively.} -# \item{...}{Not used.} -# \item{verbose}{See @see "R.utils::Verbose".} -# } -# -# \value{ -# Returns a \code{hclust} object as returned by @see "stats::hclust". -# } -# -# @author -# -# \seealso{ -# This method is utilized by @seemethod "pruneByHClust". -# } -# -# @keyword internal -#*/########################################################################### -setMethodS3("hclustCNs", "AbstractCBS", function(fit, size=NULL, distMethod="euclidean", hclustMethod="ward.D", ..., verbose=FALSE) { - # - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - # Validate arguments - # - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - # Argument 'verbose': - verbose <- Arguments$getVerbose(verbose); - if (verbose) { - pushState(verbose); - on.exit(popState(verbose)); - } - - - verbose && enter(verbose, "Hierarchical clustering of segmented copy numbers"); - verbose && enter(verbose, "Extracting/sampling CNs"); - C <- sampleCNs(fit, size=size, splitters=FALSE); - verbose && str(verbose, C); - - # Drop also segments with no data points - ok <- !is.na(C); - ok <- rowAlls(ok); - C <- C[ok,,drop=FALSE]; - verbose && str(verbose, C); - verbose && exit(verbose); - - verbose && enter(verbose, "Calculating distance matrix"); - D <- stats::dist(C, method=distMethod); - verbose && str(verbose, D); - verbose && exit(verbose); - - verbose && enter(verbose, "Clustering"); - - # TODO: Do *weighted* hierarchical clustering - tree <- stats::hclust(D, method=hclustMethod); - verbose && str(verbose, tree); - verbose && exit(verbose); - - verbose && exit(verbose); - - tree; -}, private=TRUE) # hclustCNs() - - - -###########################################################################/** -# @RdocMethod pruneByHClust -# -# @title "Prunes the CN profile by pruning and merging through hierarchical clustering" -# -# \description{ -# @get "title". -# } -# -# @synopsis -# -# \arguments{ -# \item{...}{Arguments passed to @see "stats::cutree", -# particularly either of thresholds \code{h} or \code{k}.} -# \item{size, distMethod, hclustMethod}{Arguments (as well as -# some of \code{...}) passed to @seemethod "hclustCNs".} -# \item{merge}{If @TRUE, consecutive segments that belong to the -# same PSCN cluster will be merged into one large segment.} -# \item{update}{If @TRUE, segment means are updated afterwards, otherwise not.} -# \item{verbose}{See @see "R.utils::Verbose".} -# } -# -# \value{ -# Returns a pruned object of the same class. -# } -# -# \examples{\dontrun{ -# fitP <- pruneByHClust(fit, h=0.25); -# }} -# -# @author -# -# @keyword internal -#*/########################################################################### -setMethodS3("pruneByHClust", "AbstractCBS", function(fit, ..., size=NULL, distMethod="euclidean", hclustMethod="ward.D", merge=TRUE, update=TRUE, verbose=FALSE) { - # - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - # Validate arguments - # - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - # Argument 'verbose': - verbose <- Arguments$getVerbose(verbose); - if (verbose) { - pushState(verbose); - on.exit(popState(verbose)); - } - - - verbose && enter(verbose, "Prune segments by hierarchical clustering"); - verbose && cat(verbose, "Clustering arguments:"); - verbose && str(verbose, c(list(size=size, distMethod=distMethod, hclustMethod=hclustMethod), list(...))); - - verbose && enter(verbose, "Clustering"); - tree <- hclustCNs(fit, size=size, distMethod=distMethod, - hclustMethod=hclustMethod, ..., verbose=less(verbose,5)); - verbose && print(verbose, tree); - verbose && exit(verbose); - - verbose && enter(verbose, "Cutting tree"); - verbose && cat(verbose, "Cutting arguments:"); - verbose && str(verbose, c(list(tree=tree), list(...))); - p <- cutree(tree, ...); - verbose && str(verbose, p); - - # Group segments - idxList <- by(names(p), p, FUN=function(x) { - idxs <- as.integer(as.character(x)); - idxs <- sort(unique(idxs)); - list(idxs); - }); - verbose && str(verbose, idxList); - - verbose && exit(verbose); - - - # Dropping previous segment calls and quantile mean-level estimates. - fit <- resetSegments(fit); - - verbose && enter(verbose, "Merging mean levels of clustered segments"); - fit <- updateMeansTogether(fit, idxList=idxList, verbose=less(verbose, 10)); - verbose && exit(verbose); - - if (merge) { - verbose && enter(verbose, "Merging neighboring segments within each cluster"); - lefts <- c(); - for (ii in seq_along(idxList)) { - verbose && enter(verbose, sprintf("Cluster #%d of %d", ii, length(idxList))); - idxs <- idxList[[ii]]; - verbose && cat(verbose, "Segments in cluster:"); - verbose && str(verbose, idxs); - - # Indices to segments to merge - leftsII <- idxs[which(diff(idxs) == 1L)]; - verbose && cat(verbose, "Left indices of neighboring segments:"); - verbose && str(verbose, leftsII); - - lefts <- c(lefts, leftsII); - verbose && exit(verbose); - } # for (ii ...) - - lefts <- sort(unique(lefts)); - verbose && cat(verbose, "Left indices of segments to be merged:"); - verbose && str(verbose, lefts); - verbose && exit(verbose); - - verbose && enter(verbose, "Merging segments"); - lefts <- rev(lefts); - for (ii in seq_along(lefts)) { - fit <- mergeTwoSegments(fit, left=lefts[ii], update=FALSE); - } # for (ii ...) - verbose && exit(verbose); - } # if (merge) - - if (update) { - verbose && enter(verbose, "Updating segment means"); -## fit <- updateBoundaries(fit, verbose=less(verbose, 50)); - fit <- updateMeans(fit, verbose=less(verbose, 50)); - verbose && exit(verbose); - } - - verbose && exit(verbose); - - fit; -}, protected=TRUE) # pruneByHClust() - - -############################################################################ -# HISTORY: -# 2014-01-12 -# o CLEANUP: Renamed variable 'h' to 'tree' in pruneByHClust(), because -# it could easily be misinterpreted as argument 'h' to cutree(). -# 2013-09-18 -# o WORKAROUND: For R v2.15.3 and before, we need to attach the 'methods' -# package, otherwise we get 'Error in rowAlls(ok) : could not find -# function "loadMethod"' below. This seems to be a bug in R. -# 2013-02-05 -# o Now pruneByHClust() drops any existing segment calls and quantile -# mean-level estimates. -# 2011-12-06 -# o Now pruneByHClust(..., update=TRUE) for AbstractCBS updates the -# mean levels of the merged segments at the end. -# 2011-11-28 -# o Added abstract updateMeansTogether() for AbstractCBS. -# o Dropped kmeansCNs() stub. -# o Added Rdoc comments. -# o Now hclustCNs() also handles segments with missing (C1,C2) levels, -# which for instance can happen after calling ROH. -# 2011-10-14 -# o Implemented hclustCNs() and pruneByHClust() for AbstractCBS. -# o Implemented extractCNs() for PairedPSCBS. -# o Created. -############################################################################ +###########################################################################/** +# @set "class=AbstractCBS" +# @RdocMethod updateMeansTogether +# @alias updateMeansTogether.CBS +# @alias updateMeansTogether.PairedPSCBS +# +# @title "Updates the CN mean levels jointly in sets of segments" +# +# \description{ +# @get "title" as if they were one large segment. +# The locus-level data is not updated/modified. +# } +# +# @synopsis +# +# \arguments{ +# \item{idxList}{A @list, where each element is an @integer @vector +# specifying segment indices of segments for which the mean levels +# should be calculated jointly.} +# \item{...}{Not used.} +# \item{verbose}{See @see "R.utils::Verbose".} +# } +# +# \value{ +# Returns an object of the same class. +# } +# +# @author "HB" +# +# \seealso{ +# This method is utilized by @seemethod "pruneByHClust". +# } +# +# @keyword internal +#*/########################################################################### +setMethodS3("updateMeansTogether", "AbstractCBS", abstract=TRUE, private=TRUE) + + +###########################################################################/** +# @set "class=AbstractCBS" +# @RdocMethod hclustCNs +# +# @title "Performs a hierarchical clustering of the CN mean levels" +# +# \description{ +# @get "title". +# } +# +# @synopsis +# +# \arguments{ +# \item{size}{Argument passed to @seemethod "sampleCNs".} +# \item{distMethod, hclustMethod}{Argument \code{method} for +# @see "stats::dist" and "stats::hclust", respectively.} +# \item{...}{Not used.} +# \item{verbose}{See @see "R.utils::Verbose".} +# } +# +# \value{ +# Returns a \code{hclust} object as returned by @see "stats::hclust". +# } +# +# @author +# +# \seealso{ +# This method is utilized by @seemethod "pruneByHClust". +# } +# +# @keyword internal +#*/########################################################################### +setMethodS3("hclustCNs", "AbstractCBS", function(fit, size=NULL, distMethod="euclidean", hclustMethod="ward.D", ..., verbose=FALSE) { + # - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - + # Validate arguments + # - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - + # Argument 'verbose': + verbose <- Arguments$getVerbose(verbose) + if (verbose) { + pushState(verbose) + on.exit(popState(verbose)) + } + + + verbose && enter(verbose, "Hierarchical clustering of segmented copy numbers") + verbose && enter(verbose, "Extracting/sampling CNs") + C <- sampleCNs(fit, size=size, splitters=FALSE) + verbose && str(verbose, C) + + # Drop also segments with no data points + ok <- !is.na(C) + ok <- rowAlls(ok) + C <- C[ok,,drop=FALSE] + verbose && str(verbose, C) + verbose && exit(verbose) + + verbose && enter(verbose, "Calculating distance matrix") + D <- stats::dist(C, method=distMethod) + verbose && str(verbose, D) + verbose && exit(verbose) + + verbose && enter(verbose, "Clustering") + + # TODO: Do *weighted* hierarchical clustering + tree <- stats::hclust(D, method=hclustMethod) + verbose && str(verbose, tree) + verbose && exit(verbose) + + verbose && exit(verbose) + + tree +}, private=TRUE) # hclustCNs() + + + +###########################################################################/** +# @RdocMethod pruneByHClust +# +# @title "Prunes the CN profile by pruning and merging through hierarchical clustering" +# +# \description{ +# @get "title". +# } +# +# @synopsis +# +# \arguments{ +# \item{...}{Arguments passed to @see "stats::cutree", +# particularly either of thresholds \code{h} or \code{k}.} +# \item{size, distMethod, hclustMethod}{Arguments (as well as +# some of \code{...}) passed to @seemethod "hclustCNs".} +# \item{merge}{If @TRUE, consecutive segments that belong to the +# same PSCN cluster will be merged into one large segment.} +# \item{update}{If @TRUE, segment means are updated afterwards, otherwise not.} +# \item{verbose}{See @see "R.utils::Verbose".} +# } +# +# \value{ +# Returns a pruned object of the same class. +# } +# +# \examples{\dontrun{ +# fitP <- pruneByHClust(fit, h=0.25) +# }} +# +# @author +# +# @keyword internal +#*/########################################################################### +setMethodS3("pruneByHClust", "AbstractCBS", function(fit, ..., size=NULL, distMethod="euclidean", hclustMethod="ward.D", merge=TRUE, update=TRUE, verbose=FALSE) { + # - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - + # Validate arguments + # - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - + # Argument 'verbose': + verbose <- Arguments$getVerbose(verbose) + if (verbose) { + pushState(verbose) + on.exit(popState(verbose)) + } + + + verbose && enter(verbose, "Prune segments by hierarchical clustering") + verbose && cat(verbose, "Clustering arguments:") + verbose && str(verbose, c(list(size=size, distMethod=distMethod, hclustMethod=hclustMethod), list(...))) + + verbose && enter(verbose, "Clustering") + tree <- hclustCNs(fit, size=size, distMethod=distMethod, + hclustMethod=hclustMethod, ..., verbose=less(verbose,5)) + verbose && print(verbose, tree) + verbose && exit(verbose) + + verbose && enter(verbose, "Cutting tree") + verbose && cat(verbose, "Cutting arguments:") + verbose && str(verbose, c(list(tree=tree), list(...))) + p <- cutree(tree, ...) + verbose && str(verbose, p) + + # Group segments + idxList <- by(names(p), p, FUN=function(x) { + idxs <- as.integer(as.character(x)) + idxs <- sort(unique(idxs)) + list(idxs) + }) + verbose && str(verbose, idxList) + + verbose && exit(verbose) + + + # Dropping previous segment calls and quantile mean-level estimates. + fit <- resetSegments(fit) + + verbose && enter(verbose, "Merging mean levels of clustered segments") + fit <- updateMeansTogether(fit, idxList=idxList, verbose=less(verbose, 10)) + verbose && exit(verbose) + + if (merge) { + verbose && enter(verbose, "Merging neighboring segments within each cluster") + lefts <- c() + for (ii in seq_along(idxList)) { + verbose && enter(verbose, sprintf("Cluster #%d of %d", ii, length(idxList))) + idxs <- idxList[[ii]] + verbose && cat(verbose, "Segments in cluster:") + verbose && str(verbose, idxs) + + # Indices to segments to merge + leftsII <- idxs[which(diff(idxs) == 1L)] + verbose && cat(verbose, "Left indices of neighboring segments:") + verbose && str(verbose, leftsII) + + lefts <- c(lefts, leftsII) + verbose && exit(verbose) + } # for (ii ...) + + lefts <- sort(unique(lefts)) + verbose && cat(verbose, "Left indices of segments to be merged:") + verbose && str(verbose, lefts) + verbose && exit(verbose) + + verbose && enter(verbose, "Merging segments") + lefts <- rev(lefts) + for (ii in seq_along(lefts)) { + fit <- mergeTwoSegments(fit, left=lefts[ii], update=FALSE) + } # for (ii ...) + verbose && exit(verbose) + } # if (merge) + + if (update) { + verbose && enter(verbose, "Updating segment means") +## fit <- updateBoundaries(fit, verbose=less(verbose, 50)) + fit <- updateMeans(fit, verbose=less(verbose, 50)) + verbose && exit(verbose) + } + + verbose && exit(verbose) + + fit +}, protected=TRUE) # pruneByHClust() diff -Nru r-cran-pscbs-0.63.0/R/AbstractCBS.PLOT.R r-cran-pscbs-0.64.0/R/AbstractCBS.PLOT.R --- r-cran-pscbs-0.63.0/R/AbstractCBS.PLOT.R 2017-06-28 14:32:39.000000000 +0000 +++ r-cran-pscbs-0.64.0/R/AbstractCBS.PLOT.R 2018-08-12 21:30:44.000000000 +0000 @@ -24,40 +24,29 @@ # @seeclass # } #*/########################################################################### -setMethodS3("plotTracks", "AbstractCBS", abstract=TRUE); +setMethodS3("plotTracks", "AbstractCBS", abstract=TRUE) -setMethodS3("tileChromosomes", "AbstractCBS", abstract=TRUE, protected=TRUE); +setMethodS3("tileChromosomes", "AbstractCBS", abstract=TRUE, protected=TRUE) -setMethodS3("drawChangePoints", "AbstractCBS", abstract=TRUE, protected=TRUE); +setMethodS3("drawChangePoints", "AbstractCBS", abstract=TRUE, protected=TRUE) setMethodS3("drawKnownSegments", "AbstractCBS", function(fit, col="#aaaaaa", ..., xScale=1e-6) { - segs <- fit$params$knownSegments; + segs <- fit$params$knownSegments # Nothing todo? if (is.null(segs)) { - return(); + return() } # Workaround from the fact that extractChromosomes() does not drop # known segments. /HB 2013-03-21 - chromosome <- NULL; rm(list="chromosome"); # To please R CMD check. - segs <- subset(segs, chromosome %in% getChromosomes(fit)); - xStarts <- segs[,"start"]; - xEnds <- segs[,"end"]; - xs <- sort(unique(c(xStarts, xEnds))); - abline(v=xScale*xs, col=col, ...); + chromosome <- NULL; rm(list="chromosome") # To please R CMD check. + segs <- subset(segs, chromosome %in% getChromosomes(fit)) + xStarts <- segs[,"start"] + xEnds <- segs[,"end"] + xs <- sort(unique(c(xStarts, xEnds))) + abline(v=xScale*xs, col=col, ...) }, protected=TRUE) - - -############################################################################ -# HISTORY: -# 2013-03-21 -# o Added drawKnownSegments(). -# 2011-12-03 -# o Added drawChangePoints(). -# 2011-10-02 -# o Created. -############################################################################ diff -Nru r-cran-pscbs-0.63.0/R/AbstractCBS.PRUNE.R r-cran-pscbs-0.64.0/R/AbstractCBS.PRUNE.R --- r-cran-pscbs-0.63.0/R/AbstractCBS.PRUNE.R 2017-06-28 14:32:39.000000000 +0000 +++ r-cran-pscbs-0.64.0/R/AbstractCBS.PRUNE.R 2018-08-12 21:30:44.000000000 +0000 @@ -8,278 +8,279 @@ # Validate arguments # - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - # Argument 'by': - by <- Arguments$getCharacters(by); - data <- getLocusData(fit); - fields <- colnames(data); - missing <- fields[!is.element(by, fields)]; + by <- Arguments$getCharacters(by) + data <- getLocusData(fit) + fields <- colnames(data) + missing <- fields[!is.element(by, fields)] if (length(missing) > 0) { - throw("Argument 'by' specifies one or more non-existing locus data fields: ", paste(missing, collapse=", ")); + throw("Argument 'by' specifies one or more non-existing locus data fields: ", paste(missing, collapse=", ")) } # Argument 'shift': - shift <- Arguments$getNumeric(shift); + shift <- Arguments$getNumeric(shift) # Argument 'verbose': - verbose <- Arguments$getVerbose(verbose); + verbose <- Arguments$getVerbose(verbose) if (verbose) { - pushState(verbose); - on.exit(popState(verbose)); + pushState(verbose) + on.exit(popState(verbose)) } - verbose && enter(verbose, "Identifying optimal sets of segments via dynamic programming"); + verbose && enter(verbose, "Identifying optimal sets of segments via dynamic programming") # Assert that known segments was used - knownSegments <- fit$params$knownSegments; + knownSegments <- fit$params$knownSegments if (nrow(knownSegments) == 0L) { - chromosome <- getChromosomes(fit); - knownSegments <- data.frame(chromosome=chromosome, start=-Inf, end=+Inf); + chromosome <- getChromosomes(fit) + knownSegments <- data.frame(chromosome=chromosome, start=-Inf, end=+Inf) } - chrOffsets <- getChromosomeOffsets(fit); + chrOffsets <- getChromosomeOffsets(fit) # Sanity check - stopifnot(all(is.finite(chrOffsets))); + .stop_if_not(all(is.finite(chrOffsets))) # - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - # Shift every other non-empty region # - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - verbose && enter(verbose, "Shifting TCN levels for every second segment"); + verbose && enter(verbose, "Shifting TCN levels for every second segment") - segPrefix <- getSegmentTrackPrefixes(fit)[1L]; - segKeys <- toCamelCase(paste(segPrefix, c("start", "end"))); - segRowsKey <- toCamelCase(paste(segPrefix, "seg rows")); + segPrefix <- getSegmentTrackPrefixes(fit)[1L] + segKeys <- toCamelCase(paste(segPrefix, c("start", "end"))) + segRowsKey <- toCamelCase(paste(segPrefix, "seg rows")) - verbose && enter(verbose, "Split up into non-empty independent regions"); - chromosomes <- getChromosomes(fit); + verbose && enter(verbose, "Split up into non-empty independent regions") + chromosomes <- getChromosomes(fit) - fitList <- list(); + fitList <- list() for (jj in seq_along(chromosomes)) { - chr <- chromosomes[jj]; - verbose && enter(verbose, sprintf("Chromosome #%d ('%s') of %d", jj, chr, length(chromosomes))); + chr <- chromosomes[jj] + verbose && enter(verbose, sprintf("Chromosome #%d ('%s') of %d", jj, chr, length(chromosomes))) # Subset segmentation results on this chromosome - fitJJ <- extractChromosome(fit, chr); - verbose && cat(verbose, "Number of loci on chromosome: ", nbrOfLoci(fitJJ)); + fitJJ <- extractChromosome(fit, chr) + verbose && cat(verbose, "Number of loci on chromosome: ", nbrOfLoci(fitJJ)) # Nothing to do and nothing to add? if (nbrOfLoci(fitJJ) == 0L) { - verbose && exit(verbose); - next; + verbose && exit(verbose) + next } # Find known segments on this chromosome - knownSegmentsJJ <- subset(knownSegments, chromosome == chr); - verbose && cat(verbose, "Known segments on chromosome:"); - verbose && print(verbose, knownSegmentsJJ); + knownSegmentsJJ <- subset(knownSegments, chromosome == chr) + verbose && cat(verbose, "Known segments on chromosome:") + verbose && print(verbose, knownSegmentsJJ) # Nothing to do? if (nrow(knownSegmentsJJ) == 0L) { - fitList <- append(fitList, list(fitJJ)); - verbose && exit(verbose); - next; + fitList <- append(fitList, list(fitJJ)) + verbose && exit(verbose) + next } # Get the segments on this chromosome - segsJJ <- getSegments(fitJJ); + segsJJ <- getSegments(fitJJ) # Extract the individual known segments on this chromosome - fitListJJ <- list(); + fitListJJ <- list() for (kk in seq_len(nrow(knownSegmentsJJ))) { - verbose && enter(verbose, sprintf("Known segment #%d of %d", kk, nrow(knownSegmentsJJ))); - seg <- knownSegmentsJJ[kk,]; - verbose && print(verbose, seg); - - start <- seg$start; - end <- seg$end; - idxStart <- min(which(segsJJ[[segKeys[1]]] >= start)); - idxEnd <- max(which(segsJJ[[segKeys[2]]] <= end)); - idxs <- idxStart:idxEnd; + verbose && enter(verbose, sprintf("Known segment #%d of %d", kk, nrow(knownSegmentsJJ))) + seg <- knownSegmentsJJ[kk,] + verbose && print(verbose, seg) + + start <- seg$start + end <- seg$end + idxStart <- min(which(segsJJ[[segKeys[1]]] >= start)) + idxEnd <- max(which(segsJJ[[segKeys[2]]] <= end)) + idxs <- idxStart:idxEnd # Extract the particular known segment - fitKK <- extractSegments(fitJJ, idxs); + fitKK <- extractSegments(fitJJ, idxs) # Only add if it has loci if (nbrOfLoci(fitKK) > 0L) { - fitListJJ <- append(fitListJJ, list(fitKK)); + fitListJJ <- append(fitListJJ, list(fitKK)) } - fitKK <- NULL; # Not needed anymore - verbose && exit(verbose); + fitKK <- NULL # Not needed anymore + verbose && exit(verbose) } # for (kk ...) # Append - fitList <- append(fitList, fitListJJ); - fitListJJ <- NULL; # Not needed anymore + fitList <- append(fitList, fitListJJ) + fitListJJ <- NULL # Not needed anymore - verbose && exit(verbose); + verbose && exit(verbose) } # for (jj ...) - nbrOfRegions <- length(fitList); - verbose && cat(verbose, "Number of independent non-empty regions: ", nbrOfRegions); - verbose && exit(verbose); + nbrOfRegions <- length(fitList) + verbose && cat(verbose, "Number of independent non-empty regions: ", nbrOfRegions) + verbose && exit(verbose) - verbose && enter(verbose, "Shift every other region"); + verbose && enter(verbose, "Shift every other region") for (jj in seq(from=1L, to=nbrOfRegions, by=2L)) { - fitJJ <- fitList[[jj]]; - fitJJ <- shiftTCN(fitJJ, shift=shift); - fitList[[jj]] <- fitJJ; + fitJJ <- fitList[[jj]] + fitJJ <- shiftTCN(fitJJ, shift=shift) + fitList[[jj]] <- fitJJ } # for (jj ...) - verbose && exit(verbose); + verbose && exit(verbose) - verbose && enter(verbose, "Merge"); - fitT <- Reduce(function(a, b) append(a,b, addSplit=FALSE), fitList); + verbose && enter(verbose, "Merge") + ## former Reduce() w/ append(a, b, addSplit = FALSE) + fitT <- do.call(c, args = c(fitList, addSplit = FALSE)) # Sanity check -## stopifnot(nbrOfSegments(fitT) == nbrOfSegments(fit)); # Not true anymore - verbose && exit(verbose); - fitList <- NULL; # Not needed anymore - - segsT <- getSegments(fitT); - verbose && print(verbose, tail(segsT)); - verbose && exit(verbose); +## .stop_if_not(nbrOfSegments(fitT) == nbrOfSegments(fit)) # Not true anymore + verbose && exit(verbose) + fitList <- NULL # Not needed anymore + + segsT <- getSegments(fitT) + verbose && print(verbose, tail(segsT)) + verbose && exit(verbose) - fit <- fitT; + fit <- fitT # Not needed anymore - fitT <- knownSegments <- NULL; + fitT <- knownSegments <- NULL # - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - # Extract signals for DP # - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - verbose && enter(verbose, "Extracting signals for dynamic programming"); + verbose && enter(verbose, "Extracting signals for dynamic programming") - fit <- tileChromosomes(fit); + fit <- tileChromosomes(fit) # Locus-level signals - data <- getLocusData(fit); - Y <- as.matrix(data[,by,drop=FALSE]); - verbose && print(verbose, summary(Y)); + data <- getLocusData(fit) + Y <- as.matrix(data[,by,drop=FALSE]) + verbose && print(verbose, summary(Y)) # "DP" change-point indices (excluding the two outer/boundary ones) - segRows <- fit[[segRowsKey]]; - segIdxs <- seq_len(nrow(segRows)-1L); - cpIdxs <- segRows$endRow[segIdxs]; + segRows <- fit[[segRowsKey]] + segIdxs <- seq_len(nrow(segRows)-1L) + cpIdxs <- segRows$endRow[segIdxs] - verbose && exit(verbose); + verbose && exit(verbose) # - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - # Dynamic-programming segmention pruning # - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - verbose && enter(verbose, "Dynamic programming"); + verbose && enter(verbose, "Dynamic programming") - verbose && cat(verbose, "Number of \"DP\" change points: ", length(cpIdxs)); - verbose && str(verbose, cpIdxs); + verbose && cat(verbose, "Number of \"DP\" change points: ", length(cpIdxs)) + verbose && str(verbose, cpIdxs) - res <- seqOfSegmentsByDP(Y, candidatechangepoints=cpIdxs, ...); - verbose && str(verbose, res); + res <- seqOfSegmentsByDP(Y, candidatechangepoints=cpIdxs, ...) + verbose && str(verbose, res) # Sanity checks - jumpList <- res$jump; - lastJump <- jumpList[[length(jumpList)]]; - stopifnot(identical(cpIdxs, as.integer(lastJump))); + jumpList <- res$jump + lastJump <- jumpList[[length(jumpList)]] + .stop_if_not(identical(cpIdxs, as.integer(lastJump))) - verbose && exit(verbose); + verbose && exit(verbose) # - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - # Adjustments # - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - verbose && enter(verbose, "Excluding cases where known segments no longer correct"); + verbose && enter(verbose, "Excluding cases where known segments no longer correct") - verbose && cat(verbose, "Number of independent non-empty regions: ", nbrOfRegions); + verbose && cat(verbose, "Number of independent non-empty regions: ", nbrOfRegions) # Drop the K first - nbrOfCPs <- nbrOfRegions - 1L; + nbrOfCPs <- nbrOfRegions - 1L if (nbrOfCPs > 0L) { - K <- nbrOfCPs - 1L; # Don't count the flat segmentation - jumpList <- jumpList[-(1:K)]; + K <- nbrOfCPs - 1L # Don't count the flat segmentation + jumpList <- jumpList[-(1:K)] } - verbose && str(verbose, jumpList); - verbose && exit(verbose); + verbose && str(verbose, jumpList) + verbose && exit(verbose) # - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - # Extract the possible sets of segments # - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - verbose <- less(verbose, 20); - verbose && enter(verbose, "Converting to physical segments"); + verbose <- less(verbose, 20) + verbose && enter(verbose, "Converting to physical segments") - segs <- getSegments(fit, splitters=TRUE, addGaps=FALSE); - segs <- segs[,c("chromosome", segKeys)]; + segs <- getSegments(fit, splitters=TRUE, addGaps=FALSE) + segs <- segs[,c("chromosome", segKeys)] jumpIdxList <- lapply(jumpList, FUN=function(idxs) { - match(idxs, table=cpIdxs); - }); + match(idxs, table=cpIdxs) + }) # Sanity check - stopifnot(identical(seq_along(cpIdxs), jumpIdxList[[length(jumpIdxList)]])); + .stop_if_not(identical(seq_along(cpIdxs), jumpIdxList[[length(jumpIdxList)]])) - chrs <- segs$chromosome; - starts <- segs[[segKeys[1]]]; - ends <- segs[[segKeys[2]]]; - nsegs <- nrow(segs); - segList <- vector("list", length=length(jumpIdxList)); + chrs <- segs$chromosome + starts <- segs[[segKeys[1]]] + ends <- segs[[segKeys[2]]] + nsegs <- nrow(segs) + segList <- vector("list", length=length(jumpIdxList)) for (kk in seq_along(segList)) { - verbose && enter(verbose, sprintf("Sequence #%d of %d", kk, length(segList))); - idxs <- jumpIdxList[[kk]]; - verbose && cat(verbose, "Change point indices:"); - verbose && str(verbose, idxs); - - chrsKK <- chrs[idxs]; - chr <- chrsKK[1]; -# stopifnot(all(chrsKK == chr)); - chrsKK <- c(chrsKK, chrsKK[length(chrsKK)]); - startsKK <- starts[c(1L, idxs+1L)]; - endsKK <- ends[c(idxs, nsegs)]; - verbose && cat(verbose, "Chromosomes:"); - verbose && str(verbose, chrsKK); - verbose && cat(verbose, "Starts:"); - verbose && str(verbose, startsKK); - verbose && cat(verbose, "Ends:"); - verbose && str(verbose, endsKK); - - - offsetsKK <- chrOffsets[chrsKK]; - startsKK <- startsKK - offsetsKK; - endsKK <- endsKK - offsetsKK; - segsKK <- data.frame(chromosome=chrsKK, start=startsKK, end=endsKK); - verbose && print(verbose, tail(segsKK)); - segList[[kk]] <- segsKK; + verbose && enter(verbose, sprintf("Sequence #%d of %d", kk, length(segList))) + idxs <- jumpIdxList[[kk]] + verbose && cat(verbose, "Change point indices:") + verbose && str(verbose, idxs) + + chrsKK <- chrs[idxs] + chr <- chrsKK[1] +# .stop_if_not(all(chrsKK == chr)) + chrsKK <- c(chrsKK, chrsKK[length(chrsKK)]) + startsKK <- starts[c(1L, idxs+1L)] + endsKK <- ends[c(idxs, nsegs)] + verbose && cat(verbose, "Chromosomes:") + verbose && str(verbose, chrsKK) + verbose && cat(verbose, "Starts:") + verbose && str(verbose, startsKK) + verbose && cat(verbose, "Ends:") + verbose && str(verbose, endsKK) + + + offsetsKK <- chrOffsets[chrsKK] + startsKK <- startsKK - offsetsKK + endsKK <- endsKK - offsetsKK + segsKK <- data.frame(chromosome=chrsKK, start=startsKK, end=endsKK) + verbose && print(verbose, tail(segsKK)) + segList[[kk]] <- segsKK - verbose && exit(verbose); + verbose && exit(verbose) } # for (kk ...) - verbose && exit(verbose); - verbose <- more(verbose, 20); + verbose && exit(verbose) + verbose <- more(verbose, 20) # Sanity check - lastSegs <- segList[[length(segList)]]; -# stopifnot(identical(lastSegs, segs)); - verbose && str(verbose, segList); - - nbrOfCPsSeq <- sapply(jumpList, FUN=length); - verbose && cat(verbose, "Sequence of number of \"DP\" change points:"); - verbose && print(verbose, nbrOfCPsSeq); - - nbrOfSegsSeq <- sapply(segList, FUN=nrow); - verbose && cat(verbose, "Sequence of number of segments:"); - verbose && print(verbose, nbrOfSegsSeq); - - nbrOfChangePointsSeq <- nbrOfSegsSeq - nbrOfRegions; - verbose && cat(verbose, "Sequence of number of \"discovered\" change points:"); - verbose && print(verbose, nbrOfChangePointsSeq); + lastSegs <- segList[[length(segList)]] +# .stop_if_not(identical(lastSegs, segs)) + verbose && str(verbose, segList) + + nbrOfCPsSeq <- sapply(jumpList, FUN=length) + verbose && cat(verbose, "Sequence of number of \"DP\" change points:") + verbose && print(verbose, nbrOfCPsSeq) + + nbrOfSegsSeq <- sapply(segList, FUN=nrow) + verbose && cat(verbose, "Sequence of number of segments:") + verbose && print(verbose, nbrOfSegsSeq) + + nbrOfChangePointsSeq <- nbrOfSegsSeq - nbrOfRegions + verbose && cat(verbose, "Sequence of number of \"discovered\" change points:") + verbose && print(verbose, nbrOfChangePointsSeq) - stopifnot(nbrOfSegsSeq == nbrOfCPsSeq + 1L); - stopifnot(nbrOfSegsSeq[1L] == nbrOfRegions); + .stop_if_not(all(nbrOfSegsSeq == nbrOfCPsSeq + 1L)) + .stop_if_not(nbrOfSegsSeq[1L] == nbrOfRegions) segList <- lapply(segList, FUN=function(seg) { - attr(seg, "nbrOfChangePoints") <- nrow(seg) - nbrOfRegions; - seg; - }); + attr(seg, "nbrOfChangePoints") <- nrow(seg) - nbrOfRegions + seg + }) - K <- (nbrOfRegions-1L); + K <- (nbrOfRegions-1L) modelFit <- list( nbrOfSegments=nbrOfSegsSeq, nbrOfChangePoints=nbrOfSegsSeq - nbrOfRegions, @@ -288,12 +289,12 @@ rse=res$rse[-(1:K)], V=res$V[-(1:K),-(1:K),drop=FALSE], seqOfSegmentsByDP=res - ); - attr(segList, "modelFit") <- modelFit; + ) + attr(segList, "modelFit") <- modelFit - verbose && exit(verbose); + verbose && exit(verbose) - segList; + segList }, protected=TRUE) # seqOfSegmentsByDP() @@ -314,82 +315,82 @@ ### Bioinformatics). Default=0.5 ... ){ - n <- dim(Y)[1]; - p <- dim(Y)[2]; - kmaxmin <- floor(n/4); + n <- dim(Y)[1] + p <- dim(Y)[2] + kmaxmin <- floor(n/4) if (kmax == 0 || kmax > length(candidatechangepoints)) { - kmax <- min(length(candidatechangepoints), kmaxmin); + kmax <- min(length(candidatechangepoints), kmaxmin) } if (kmax > kmaxmin) { - sprintf('warning : not enough points to optimize the number of the change-points up to %s\n', kmax); - kmax <- kmaxmin; - sprintf('Set the maximum number of change-points to %s\n', kmax); + sprintf('warning : not enough points to optimize the number of the change-points up to %s\n', kmax) + kmax <- kmaxmin + sprintf('Set the maximum number of change-points to %s\n', kmax) } # Compute boundaries of the smallest intervals considered - b <- sort(union(0, union(n, candidatechangepoints))); - k <- length(b) - 1; # k is the number of such intervals + b <- sort(union(0, union(n, candidatechangepoints))) + k <- length(b) - 1 # k is the number of such intervals # Compute the k*k matrix J such that J[i,j] for i<=j is the RSE when intervales i to j are merged - J <- matrix(numeric(k*k), ncol=k); + J <- matrix(numeric(k*k), ncol=k) # How should NAs be handled?!? - Yz <- Y; - Yz[is.na(Yz)] <- 0; - s <- rbind(rep(0, times=p), colCumsums(Yz)); - v <- c(0, cumsum(rowSums(Y^2, na.rm=TRUE))); + Yz <- Y + Yz[is.na(Yz)] <- 0 + s <- rbind(rep(0, times=p), colCumsums(Yz)) + v <- c(0, cumsum(rowSums(Y^2, na.rm=TRUE))) for (i in 1:k) { for (j in i:k) { - Istart <- b[i] + 1; - Iend <- b[j+1]; - J[i,j] <- v[Iend+1] - v[Istart] - sum((s[Iend+1,]-s[Istart,])^2, na.rm=TRUE)/(Iend-Istart+1); + Istart <- b[i] + 1 + Iend <- b[j+1] + J[i,j] <- v[Iend+1] - v[Istart] - sum((s[Iend+1,]-s[Istart,])^2, na.rm=TRUE)/(Iend-Istart+1) } # for (j ...) } # for (i ...) # Dynamic programming - V <- matrix(numeric((kmax+1)*k), ncol=k); # V[i,j] is the best RSE for segmenting intervals 1 to j with at most i-1 change points - jump <- matrix(numeric(kmax*k), ncol=k); + V <- matrix(numeric((kmax+1)*k), ncol=k) # V[i,j] is the best RSE for segmenting intervals 1 to j with at most i-1 change points + jump <- matrix(numeric(kmax*k), ncol=k) # With no change points, V[i,j] is juste the precomputed RSE for intervals 1 to j - V[1,] <- J[1,]; + V[1,] <- J[1,] # Then we apply the recursive formula for (ki in 1:kmax) { for (j in (ki+1):k) { - val <- min(V[ki,ki:(j-1)] + t(J[(ki+1):j, j])); - ind <- which.min(V[ki,ki:(j-1)] + t(J[(ki+1):j, j])); - V[ki+1,j] <- val; - jump[ki,j] <- ind + ki-1; + val <- min(V[ki,ki:(j-1)] + t(J[(ki+1):j, j])) + ind <- which.min(V[ki,ki:(j-1)] + t(J[(ki+1):j, j])) + V[ki+1,j] <- val + jump[ki,j] <- ind + ki-1 } # for (j ...) } # for (ki ...) # Optimal segmentation - res.jump <- list(); + res.jump <- list() for (ki in 1:kmax) { - res.jump[[ki]] <- numeric(ki); - res.jump[[ki]][ki] <- jump[ki,k]; + res.jump[[ki]] <- numeric(ki) + res.jump[[ki]][ki] <- jump[ki,k] if (ki != 1) { for (i in seq(from=(ki-1), to=1, by=-1)) { - res.jump[[ki]][i] <- jump[i, res.jump[[ki]][i+1]]; + res.jump[[ki]][i] <- jump[i, res.jump[[ki]][i+1]] } } } # for (ki ...) # Convert back the index of the interval to the last position before the jump - rightlimit <- b[2:length(b)]; + rightlimit <- b[2:length(b)] for (ki in 1:kmax) { - res.jump[[ki]] <- rightlimit[res.jump[[ki]]]; + res.jump[[ki]] <- rightlimit[res.jump[[ki]]] } # for (ki ...) # RSE as a function of number of change-points - res.rse <- V[,k]; + res.rse <- V[,k] # Optimal number of change points - options(warn=-1); - J <- log(res.rse); - Km <- length(J); - Jtild <- (J[Km]-J)/(J[Km]-J[1])*(Km-1)+1; # Normalize - res.kbest <- max(which(diff(diff(Jtild)) > threshold)) + 1; - #if((res.kbest) == -Inf) { res.kbest <- 1 }; - return(list(jump=res.jump, rse=res.rse, kbest=res.kbest, V=V)); + options(warn=-1) + J <- log(res.rse) + Km <- length(J) + Jtild <- (J[Km]-J)/(J[Km]-J[1])*(Km-1)+1 # Normalize + res.kbest <- max(which(diff(diff(Jtild)) > threshold)) + 1 + #if((res.kbest) == -Inf) { res.kbest <- 1 } + return(list(jump=res.jump, rse=res.rse, kbest=res.kbest, V=V)) ### \item{res.jump{i}}{a i*1 vector of change-point positions for the i-th lambda value (i depends on lambda). i varies between 1 and kmax} ### \item{res.rse}{a (kmax+1)-dimensional vector of residual squared error} ### \item{res.kbest}{the number of selected change-points} @@ -424,7 +425,7 @@ # # \examples{\dontrun{ # # Drop two segments -# fitP <- pruneByDP(fit, nbrOfSegments=-2); +# fitP <- pruneByDP(fit, nbrOfSegments=-2) # }} # # @author "HB, PN" @@ -439,92 +440,70 @@ # - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - # Some pre-extraction # - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - knownSegments <- fit$params$knownSegments; + knownSegments <- fit$params$knownSegments if (nrow(knownSegments) == 0L) { - chromosome <- getChromosomes(fit); - knownSegments <- data.frame(chromosome=chromosome, start=-Inf, end=+Inf); + chromosome <- getChromosomes(fit) + knownSegments <- data.frame(chromosome=chromosome, start=-Inf, end=+Inf) } # - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - # Validate arguments # - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - # Argument 'nbrOfSegments': - nbrOfSegments <- Arguments$getInteger(nbrOfSegments); + nbrOfSegments <- Arguments$getInteger(nbrOfSegments) # Specifying number of change points *to drop*? if (nbrOfSegments < 0L) { - nbrOfCPsToDrop <- -nbrOfSegments; - nbrOfSegments <- nbrOfSegments(fit, splitters=FALSE) - nbrOfCPsToDrop; + nbrOfCPsToDrop <- -nbrOfSegments + nbrOfSegments <- nbrOfSegments(fit, splitters=FALSE) - nbrOfCPsToDrop } if (nbrOfSegments < nrow(knownSegments)) { - throw("Argument 'nbrOfSegments' is less than number of \"known\" segments: ", nbrOfSegments, " < ", nrow(knownSegments)); + throw("Argument 'nbrOfSegments' is less than number of \"known\" segments: ", nbrOfSegments, " < ", nrow(knownSegments)) } if (nbrOfSegments > nbrOfSegments(fit, splitters=FALSE)) { - throw("Argument 'nbrOfSegments' is greater than the number of \"found\" segments: ", nbrOfSegments, " > ", nbrOfSegments(fit, splitters=FALSE)); + throw("Argument 'nbrOfSegments' is greater than the number of \"found\" segments: ", nbrOfSegments, " > ", nbrOfSegments(fit, splitters=FALSE)) } # Argument 'verbose': - verbose <- Arguments$getVerbose(verbose); + verbose <- Arguments$getVerbose(verbose) if (verbose) { - pushState(verbose); - on.exit(popState(verbose)); + pushState(verbose) + on.exit(popState(verbose)) } - verbose && enter(verbose, "Prune segments by dynamical programming"); + verbose && enter(verbose, "Prune segments by dynamical programming") # Nothing to do? if (nbrOfSegments == nbrOfSegments(fit, splitters=FALSE)) { - verbose && cat(verbose, "No need for pruning, because the number of \"target\" segments is the same as the number of \"found\" segments: ", nbrOfSegments, " == ", nbrOfSegments(fit, splitters=FALSE)); - return(fit); - verbose && exit(verbose); + verbose && cat(verbose, "No need for pruning, because the number of \"target\" segments is the same as the number of \"found\" segments: ", nbrOfSegments, " == ", nbrOfSegments(fit, splitters=FALSE)) + return(fit) + verbose && exit(verbose) } - verbose && cat(verbose, "Target number of segments: ", nbrOfSegments); + verbose && cat(verbose, "Target number of segments: ", nbrOfSegments) - verbose && enter(verbose, "Dynamic programming"); - segList <- seqOfSegmentsByDP(fit, ...); + verbose && enter(verbose, "Dynamic programming") + segList <- seqOfSegmentsByDP(fit, ...) # Select the one with expected number of segments - nbrOfCPs <- sapply(segList, FUN=nrow); - verbose && printf(verbose, "Range of number of CPs among solutions: [%d,%d]\n", min(nbrOfCPs), max(nbrOfCPs)); + nbrOfCPs <- sapply(segList, FUN=nrow) + verbose && printf(verbose, "Range of number of CPs among solutions: [%d,%d]\n", min(nbrOfCPs), max(nbrOfCPs)) - keep <- which(nbrOfCPs == nbrOfSegments); - stopifnot(length(keep) == 1L); + keep <- which(nbrOfCPs == nbrOfSegments) + .stop_if_not(length(keep) == 1L) - knownSegments <- segList[[keep]]; - verbose && printf(verbose, "Solution with %d segments:\n", nbrOfSegments); - verbose && print(verbose, knownSegments); - verbose && exit(verbose); + knownSegments <- segList[[keep]] + verbose && printf(verbose, "Solution with %d segments:\n", nbrOfSegments) + verbose && print(verbose, knownSegments) + verbose && exit(verbose) - verbose && enter(verbose, "Rebuilding segmentation results"); - fitDP <- resegment(fit, knownSegments=knownSegments, undoTCN=+Inf, undoDH=+Inf, verbose=less(verbose, 10)); - verbose && print(verbose, fitDP); - verbose && exit(verbose); + verbose && enter(verbose, "Rebuilding segmentation results") + fitDP <- resegment(fit, knownSegments=knownSegments, undoTCN=+Inf, undoDH=+Inf, verbose=less(verbose, 10)) + verbose && print(verbose, fitDP) + verbose && exit(verbose) - verbose && exit(verbose); + verbose && exit(verbose) - fitDP; + fitDP }, protected=TRUE) # pruneByDP() - - - - -############################################################################ -# HISTORY: -# 2012-09-21 -# o Now seqOfSegmentsByDP() return a 'modelFit' attribute. -# 2012-09-20 -# o BUG FIX: seqOfSegmentsByDP() for AbstractCBS would not handle empty -# segments, which could occur if 'knownSegments' for instance included -# centromere gaps. -# 2012-09-14 -# o Added pruneByDP(..., nbrOfSegments) for AbstractCBS. -# o Added seqOfSegmentsByDP() which is a matrix version of dpseg() from -# GFLseg v0.1.6 by Morgane Pierre-Jean and Pierre Neuvial, which in turn -# ported it from the Matlab GPL source code by the original authors ???. -# o Generalized seqOfSegmentsByDP() to work for AbstractCBS. -# 2012-09-13 -# o Added seqOfSegmentsByDP() for PairedPSCBS. -# o Created. -############################################################################ diff -Nru r-cran-pscbs-0.63.0/R/AbstractCBS.R r-cran-pscbs-0.64.0/R/AbstractCBS.R --- r-cran-pscbs-0.63.0/R/AbstractCBS.R 2017-06-28 14:32:39.000000000 +0000 +++ r-cran-pscbs-0.64.0/R/AbstractCBS.R 2018-08-12 21:30:44.000000000 +0000 @@ -29,20 +29,20 @@ setConstructorS3("AbstractCBS", function(fit=list(), sampleName=fit$sampleName, ...) { # Argument 'sampleName': if (!is.null(sampleName)) { - sampleName <- Arguments$getCharacter(sampleName); + sampleName <- Arguments$getCharacter(sampleName) } - fit$sampleName <- sampleName; - extend(fit, "AbstractCBS"); + fit$sampleName <- sampleName + extend(fit, "AbstractCBS") }) setMethodS3("print", "AbstractCBS", function(x, ...) { # To please R CMD check - fit <- x; + fit <- x - segs <- getSegments(fit, simplify=TRUE, ...); - print(segs); + segs <- getSegments(fit, simplify=TRUE, ...) + print(segs) }, protected=TRUE) @@ -51,64 +51,64 @@ setMethodS3("all.equal", "AbstractCBS", function(target, current, check.attributes=FALSE, ...) { # NOTE: Here we cannot trust argument '...', because it may contain # copies of 'target' and 'current' - args <- list(...); - drop <- integer(0L); + args <- list(...) + drop <- integer(0L) for (kk in seq_along(args)) { - if (identical(args[[kk]], target)) drop <- c(drop, kk); - if (identical(args[[kk]], current)) drop <- c(drop, kk); + if (identical(args[[kk]], target)) drop <- c(drop, kk) + if (identical(args[[kk]], current)) drop <- c(drop, kk) } if (length(drop) > 0L) { - args <- args[-drop]; - str(args); -# assign("...", args, inherits=FALSE); + args <- args[-drop] + str(args) +# assign("...", args, inherits=FALSE) } - args <- list(...); + args <- list(...) # Compare class attributes - res <- all.equal(class(target), class(current)); + res <- all.equal(class(target), class(current)) if (!isTRUE(res)) { - return(res); + return(res) } # Compare locus-level data - dataT <- getLocusData(target); - dataC <- getLocusData(current); - res <- all.equal(dataT, dataC, check.attributes=check.attributes); + dataT <- getLocusData(target) + dataC <- getLocusData(current) + res <- all.equal(dataT, dataC, check.attributes=check.attributes) if (!isTRUE(res)) { - attr(res, "what") <- "getLocusData()"; - return(res); + attr(res, "what") <- "getLocusData()" + return(res) } # Compare segments - dataT <- getSegments(target); - dataC <- getSegments(current); - res <- all.equal(dataT, dataC, check.attributes=check.attributes); + dataT <- getSegments(target) + dataC <- getSegments(current) + res <- all.equal(dataT, dataC, check.attributes=check.attributes) if (!isTRUE(res)) { - attr(res, "what") <- "getSegments()"; - return(res); + attr(res, "what") <- "getSegments()" + return(res) } # Compare field names - fieldsT <- names(target); - fieldsC <- names(current); - res <- all.equal(fieldsT, fieldsC, check.attributes=check.attributes); + fieldsT <- names(target) + fieldsC <- names(current) + res <- all.equal(fieldsT, fieldsC, check.attributes=check.attributes) if (!isTRUE(res)) { - attr(res, "what") <- "names"; - return(res); + attr(res, "what") <- "names" + return(res) } # Compare other fields for (key in fieldsT) { - dataT <- target[[key]]; - dataC <- current[[key]]; - res <- all.equal(dataT, dataC, check.attributes=check.attributes); + dataT <- target[[key]] + dataC <- current[[key]] + res <- all.equal(dataT, dataC, check.attributes=check.attributes) if (!isTRUE(res)) { - attr(res, "what") <- sprintf("[[\"%s\"]]", key); - return(res); + attr(res, "what") <- sprintf("[[\"%s\"]]", key) + return(res) } } # for (key ...) - return(TRUE); + return(TRUE) }, protected=TRUE) @@ -141,7 +141,7 @@ # } #*/########################################################################### setMethodS3("save", "AbstractCBS", function(this, ...) { - saveObject(this, ...); + saveObject(this, ...) }) @@ -174,26 +174,26 @@ # } #*/########################################################################### setMethodS3("load", "AbstractCBS", function(static, ...) { - object <- loadObject(...); + object <- loadObject(...) # Patch for changes in class structure in PSCBS v0.13.2 -> v0.13.3. if (!inherits(object, "AbstractCBS")) { if (inherits(object, "CBS")) { - class(object) <- c(class(object), "AbstractCBS"); - warning("Added 'AbstractCBS' to the class hierarchy of the loaded ", class(object)[1], " object."); + class(object) <- c(class(object), "AbstractCBS") + warning("Added 'AbstractCBS' to the class hierarchy of the loaded ", class(object)[1], " object.") } else if (inherits(object, "PairedPSCBS")) { - class(object) <- c(class(object), "AbstractCBS"); - warning("Added 'AbstractCBS' to the class hierarchy of the loaded ", class(object)[1], " object."); + class(object) <- c(class(object), "AbstractCBS") + warning("Added 'AbstractCBS' to the class hierarchy of the loaded ", class(object)[1], " object.") } } # Sanity check if (!inherits(object, class(static)[1])) { throw("Loaded an object from file, but it does not inherit from ", - class(static)[1], " as expected: ", hpaste(class(object))); + class(static)[1], " as expected: ", hpaste(class(object))) } - object; + object }, static=TRUE) @@ -226,15 +226,15 @@ # } #*/########################################################################### setMethodS3("getSampleName", "AbstractCBS", function(fit, ...) { - name <- fit$sampleName; + name <- fit$sampleName if (is.null(name)) { - name <- as.character(NA); + name <- as.character(NA) } - name; + name }, protected=TRUE) setMethodS3("sampleName", "AbstractCBS", function(fit, ...) { - getSampleName(fit); + getSampleName(fit) }, protected=TRUE) @@ -269,20 +269,20 @@ #*/########################################################################### setMethodS3("setSampleName", "AbstractCBS", function(fit, name, ...) { # Argument 'value': - name <- Arguments$getCharacter(name); + name <- Arguments$getCharacter(name) - fit$sampleName <- name; + fit$sampleName <- name - invisible(fit); + invisible(fit) }, protected=TRUE) setMethodS3("sampleName<-", "AbstractCBS", function(x, value) { - setSampleName(x, value); + setSampleName(x, value) }, protected=TRUE, addVarArgs=FALSE) "sampleName<-" <- function(x, value) { - UseMethod("sampleName<-"); + UseMethod("sampleName<-") } @@ -317,25 +317,25 @@ # @seeclass # } #*/########################################################################### -setMethodS3("getLocusData", "AbstractCBS", abstract=TRUE); +setMethodS3("getLocusData", "AbstractCBS", abstract=TRUE) setMethodS3("setLocusData", "AbstractCBS", function(fit, loci, ...) { # Argument 'loci': - loci <- Arguments$getInstanceOf(loci, "data.frame"); - nbrOfLoci <- nbrOfLoci(fit); + loci <- Arguments$getInstanceOf(loci, "data.frame") + nbrOfLoci <- nbrOfLoci(fit) if (nrow(loci) != nbrOfLoci) { - throw("Cannot set locus-level data. The number of loci to be set differ from the existing number of loci: ", nrow(loci), " != ", nbrOfLoci); + throw("Cannot set locus-level data. The number of loci to be set differ from the existing number of loci: ", nrow(loci), " != ", nbrOfLoci) } - fit$data <- loci; + fit$data <- loci - invisible(fit); + invisible(fit) }, protected=TRUE) -setMethodS3("getLocusSignalNames", "AbstractCBS", abstract=TRUE, protected=TRUE); +setMethodS3("getLocusSignalNames", "AbstractCBS", abstract=TRUE, protected=TRUE) -setMethodS3("getSegmentTrackPrefixes", "AbstractCBS", abstract=TRUE, protected=TRUE); +setMethodS3("getSegmentTrackPrefixes", "AbstractCBS", abstract=TRUE, protected=TRUE) ###########################################################################/** @@ -364,8 +364,8 @@ # } #*/########################################################################### setMethodS3("nbrOfLoci", "AbstractCBS", function(fit, splitters=FALSE, ...) { - data <- getLocusData(fit, splitters=splitters, ...); - nrow(data); + data <- getLocusData(fit, splitters=splitters, ...) + nrow(data) }) @@ -401,7 +401,7 @@ # @seeclass # } #*/########################################################################### -setMethodS3("getSegments", "AbstractCBS", abstract=TRUE); +setMethodS3("getSegments", "AbstractCBS", abstract=TRUE) setMethodS3("setSegments", "AbstractCBS", function(fit, segments, splitters=TRUE, ...) { @@ -418,7 +418,7 @@ }, protected=TRUE) -setMethodS3("getChangePoints", "AbstractCBS", abstract=TRUE); +setMethodS3("getChangePoints", "AbstractCBS", abstract=TRUE) @@ -454,25 +454,25 @@ #*/########################################################################### setMethodS3("resetSegments", "AbstractCBS", function(fit, ...) { segs <- getSegments(fit, splitters=TRUE) - names <- colnames(segs); + names <- colnames(segs) - excl <- NULL; + excl <- NULL # Drop all quantile mean level estimates (from bootstrapping) - idxs <- grep("_[0-9.]*[%]$", names); - excl <- c(excl, idxs); + idxs <- grep("_[0-9.]*[%]$", names) + excl <- c(excl, idxs) # Drop all calls - idxs <- grep("Call$", names); - excl <- c(excl, idxs); + idxs <- grep("Call$", names) + excl <- c(excl, idxs) - excl <- unique(excl); + excl <- unique(excl) if (length(excl) > 0L) { - segs <- segs[,-excl]; + segs <- segs[,-excl] } fit <- setSegments(fit, segs, splitters=TRUE) - invisible(fit); + invisible(fit) }, protected=TRUE) @@ -505,7 +505,7 @@ # } #*/########################################################################### setMethodS3("nbrOfSegments", "AbstractCBS", function(this, splitters=FALSE, ...) { - nrow(getSegments(this, splitters=splitters, ...)); + nrow(getSegments(this, splitters=splitters, ...)) }) @@ -539,17 +539,17 @@ # } #*/########################################################################### setMethodS3("nbrOfChangePoints", "AbstractCBS", function(fit, ignoreGaps=FALSE, dropEmptySegments=TRUE, ...) { - segs <- getSegments(fit, splitters=TRUE, addGaps=!ignoreGaps); + segs <- getSegments(fit, splitters=TRUE, addGaps=!ignoreGaps) if (dropEmptySegments) { - prefix <- getSegmentTrackPrefixes(fit); + prefix <- getSegmentTrackPrefixes(fit) keys <- sapply(prefix, FUN=function(x) { - toCamelCase(paste(c(x, "nbr of loci"), collapse=" ")); - }); - counts <- as.matrix(segs[,keys]); - counts <- rowSums(counts, na.rm=TRUE); - segs$chromosome[counts == 0L] <- NA; + toCamelCase(paste(c(x, "nbr of loci"), collapse=" ")) + }) + counts <- as.matrix(segs[,keys]) + counts <- rowSums(counts, na.rm=TRUE) + segs$chromosome[counts == 0L] <- NA } - sum(!is.na(diff(segs$chromosome))); + sum(!is.na(diff(segs$chromosome))) }) @@ -582,7 +582,7 @@ # } #*/########################################################################### setMethodS3("as.data.frame", "AbstractCBS", function(x, ...) { - getSegments(x, ...); + getSegments(x, ...) }, protected=TRUE) @@ -615,15 +615,15 @@ # } #*/########################################################################### setMethodS3("getChromosomes", "AbstractCBS", function(this, ...) { - segs <- getSegments(this, ...); - chromosomes <- sort(unique(segs$chromosome), na.last=TRUE); + segs <- getSegments(this, ...) + chromosomes <- sort(unique(segs$chromosome), na.last=TRUE) # Drop NA dividers if (length(chromosomes) > 1) { - chromosomes <- chromosomes[!is.na(chromosomes)]; + chromosomes <- chromosomes[!is.na(chromosomes)] } - chromosomes; + chromosomes }) @@ -654,44 +654,44 @@ # } #*/########################################################################### setMethodS3("nbrOfChromosomes", "AbstractCBS", function(this, ...) { - length(getChromosomes(this, ...)); + length(getChromosomes(this, ...)) }) setMethodS3("getSegmentSizes", "AbstractCBS", function(fit, by=c("length", "count"), ...) { - by <- match.arg(by); + by <- match.arg(by) if (by == "length") { - prefix <- getSegmentTrackPrefixes(fit)[1]; - keys <- toCamelCase(paste(prefix, " ", c("start", "end"))); + prefix <- getSegmentTrackPrefixes(fit)[1] + keys <- toCamelCase(paste(prefix, " ", c("start", "end"))) } else if (by == "count") { - keys <- "nbrOfLoci"; + keys <- "nbrOfLoci" } - data <- getSegments(fit, ...)[,keys]; + data <- getSegments(fit, ...)[,keys] if (by == "length") { - res <- data[[2L]]-data[[1L]]+1L; + res <- data[[2L]]-data[[1L]]+1L } else if (by == "count") { - res <- data[[1L]]; + res <- data[[1L]] } - res; + res }) -setMethodS3("extractCNs", "AbstractCBS", abstract=TRUE); +setMethodS3("extractCNs", "AbstractCBS", abstract=TRUE) setMethodS3("sampleCNs", "AbstractCBS", function(fit, size=NULL, ...) { - data <- extractCNs(fit, ...); + data <- extractCNs(fit, ...) if (!is.null(size)) { - sizes <- getSegmentSizes(fit, ...); + sizes <- getSegmentSizes(fit, ...) # Sanity check - stopifnot(length(sizes) == nrow(data)); - idxs <- sample(nrow(data), size=size, replace=TRUE, prob=sizes); - data <- data[idxs,,drop=FALSE]; + .stop_if_not(length(sizes) == nrow(data)) + idxs <- sample(nrow(data), size=size, replace=TRUE, prob=sizes) + data <- data[idxs,,drop=FALSE] } - data; + data }) ###########################################################################/** @@ -721,91 +721,91 @@ # # @keyword internal #*/########################################################################### -setMethodS3("updateMeans", "AbstractCBS", abstract=TRUE, protected=TRUE); +setMethodS3("updateMeans", "AbstractCBS", abstract=TRUE, protected=TRUE) setMethodS3("getMeanEstimators", "AbstractCBS", function(fit, which=NULL, default=mean, ...) { - estList <- fit$params$meanEstimators; + estList <- fit$params$meanEstimators if (is.null(estList)) { - estList <- list(); + estList <- list() } - if (is.null(which)) which <- names(estList); + if (is.null(which)) which <- names(estList) for (key in which) { - fcn <- estList[[key]]; + fcn <- estList[[key]] if (is.null(fcn)) { - fcn <- default; + fcn <- default } else if (is.character(fcn)) { - fcn <- get(fcn, mode="function"); + fcn <- get(fcn, mode="function") } - estList[[key]] <- fcn; + estList[[key]] <- fcn } - estList; + estList }, protected=TRUE) setMethodS3("setMeanEstimators", "AbstractCBS", function(fit, ...) { - estList <- fit$params$meanEstimators; + estList <- fit$params$meanEstimators if (is.null(estList)) { - estList <- list(); + estList <- list() } - args <- list(...); + args <- list(...) # Nothing todo? if (length(args) == 0L) { - return(invisible(fit)); + return(invisible(fit)) } - keys <- names(args); + keys <- names(args) if (is.null(keys)) { - throw("Estimators arguments must be named."); + throw("Estimators arguments must be named.") } for (key in keys) { - fcn <- args[[key]]; + fcn <- args[[key]] if (is.function(fcn)) { } else if (is.character(fcn)) { if (!exists(fcn, mode="function")) { - throw(sprintf("No such '%s' estimator function: %s", key, fcn)); + throw(sprintf("No such '%s' estimator function: %s", key, fcn)) } } else { - throw(sprintf("Estimator argument '%s' must be a function or character string: %s", key, mode(fcn))); + throw(sprintf("Estimator argument '%s' must be a function or character string: %s", key, mode(fcn))) } - estList[[key]] <- fcn; + estList[[key]] <- fcn } - fit$params$meanEstimators <- estList; + fit$params$meanEstimators <- estList - invisible(fit); + invisible(fit) }, protected=TRUE) -setMethodS3("resegment", "AbstractCBS", abstract=TRUE, protected=TRUE); +setMethodS3("resegment", "AbstractCBS", abstract=TRUE, protected=TRUE) -setMethodS3("getChromosomeRanges", "AbstractCBS", abstract=TRUE, protected=TRUE); +setMethodS3("getChromosomeRanges", "AbstractCBS", abstract=TRUE, protected=TRUE) setMethodS3("getChromosomeOffsets", "AbstractCBS", function(fit, resolution=1e6, ...) { # Argument 'resolution': if (!is.null(resolution)) { - resolution <- Arguments$getDouble(resolution, range=c(1,Inf)); + resolution <- Arguments$getDouble(resolution, range=c(1,Inf)) } - data <- getChromosomeRanges(fit, ...); - splits <- data[,"start"] + data[,"length"]; + data <- getChromosomeRanges(fit, ...) + splits <- data[,"start"] + data[,"length"] if (!is.null(resolution)) { - splits <- ceiling(splits / resolution); - splits <- resolution * splits; + splits <- ceiling(splits / resolution) + splits <- resolution * splits } - offsets <- c(0L, cumsum(splits)); - names(offsets) <- c(rownames(data), NA); + offsets <- c(0L, cumsum(splits)) + names(offsets) <- c(rownames(data), NA) - offsets; + offsets }, protected=TRUE) # getChromosomeOffsets() @@ -851,41 +851,41 @@ # @keyword internal #*/########################################################################### setMethodS3("ploidy", "AbstractCBS", function(fit, ...) { - ploidy <- fit$params$ploidy; - if (is.null(ploidy)) ploidy <- 2L; - ploidy; + ploidy <- fit$params$ploidy + if (is.null(ploidy)) ploidy <- 2L + ploidy }) setMethodS3("ploidy<-", "AbstractCBS", function(fit, value) { - fit <- setPloidy(fit, ploidy=value, update=TRUE); - invisible(fit); + fit <- setPloidy(fit, ploidy=value, update=TRUE) + invisible(fit) }) "ploidy<-" <- function(fit, value) { - UseMethod("ploidy<-"); + UseMethod("ploidy<-") } setMethodS3("setPloidy", "AbstractCBS", function(fit, ploidy=2L, update=TRUE, ...) { # Argument 'ploidy': - ploidy <- Arguments$getInteger(ploidy, range=c(1,Inf)); + ploidy <- Arguments$getInteger(ploidy, range=c(1,Inf)) if (update) { # Calculate rescaling factor - oldPloidy <- ploidy(fit); - scale <- ploidy / oldPloidy; + oldPloidy <- ploidy(fit) + scale <- ploidy / oldPloidy # Nothing todo? if (scale != 1) { - fit <- adjustPloidyScale(fit, scale=scale, ...); + fit <- adjustPloidyScale(fit, scale=scale, ...) } } - fit$params$ploidy <- ploidy; - invisible(fit); + fit$params$ploidy <- ploidy + invisible(fit) }, protected=TRUE) -setMethodS3("adjustPloidyScale", "AbstractCBS", abstract=TRUE); +setMethodS3("adjustPloidyScale", "AbstractCBS", abstract=TRUE) ###########################################################################/** @@ -916,48 +916,3 @@ # } #*/########################################################################### setMethodS3("normalizeTotalCNs", "AbstractCBS", abstract=TRUE) - - -############################################################################ -# HISTORY: -# 2013-11-05 -# o Added basic implementations of setLocusData() and setSegments() -# for AbstractCBS. -# 2013-10-20 -# o Added abstract getChangePoints(). -# 2013-05-07 -# o Added ploidy() and ploidy()<- for AbstractCBS. -# 2013-02-01 -# o Added resetSegments() for AbstractCBS, which drops extra segments -# columns (e.g. bootstrap statisistics and calls) except those -# obtained from the segment algorithm. -# 2013-01-15 -# o Added get-/setMeanEstimators() for AbstractCBS. -# 2012-09-21 -# o Now nbrOfChangePoints() for AbstractCBS calculates only change points -# of connected neighboring segments. -# 2012-09-14 -# o GENERALIZATION: Added getSegmentSizes() for AbstractCBS. -# o GENERALIZATION: Added getChromosomeOffsets() for AbstractCBS. -# 2012-09-13 -# o Updated all.equal() for AbstractCBS to compare locus-level data, -# segments, and other fields. -# 2012-06-03 -# o DOCUMENTATION: Added Rd help for updateMeans(). -# 2011-12-03 -# o Now print() for AbstractCBS returns getSegments(..., simplify=TRUE). -# 2011-11-17 -# o Added resegment() for AbstractCBS. -# 2011-10-30 -# o Added save() and load() methods to AbstractCBS. -# 2011-10-16 -# o Added sampleCNs() for AbstractCBS. -# o Added abstract getSegmentSizes() for AbstractCBS. -# o Added abstract extractCNs() for AbstractCBS. -# 2011-10-08 -# o Added abstract updateMeans() for AbstractCBS. -# o Added all.equal() for AbstractCBS. -# o Added nbrOfChangePoints() for AbstractCBS. -# 2011-10-02 -# o Created. -############################################################################ diff -Nru r-cran-pscbs-0.63.0/R/AbstractCBS.REPORT.R r-cran-pscbs-0.64.0/R/AbstractCBS.REPORT.R --- r-cran-pscbs-0.63.0/R/AbstractCBS.REPORT.R 2017-06-28 14:32:39.000000000 +0000 +++ r-cran-pscbs-0.64.0/R/AbstractCBS.REPORT.R 2018-08-12 21:30:44.000000000 +0000 @@ -37,55 +37,55 @@ # @keyword internal #*/########################################################################### setMethodS3("report", "AbstractCBS", function(fit, sampleName=getSampleName(fit), studyName, ..., rspTags=NULL, rootPath="reports/", .filename="*", skip=TRUE, envir=new.env(), verbose=FALSE) { - use("R.rsp (>= 0.20.0)"); + use("R.rsp (>= 0.20.0)") # - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - # Validate arguments # - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - # Argument 'sampleName': - sampleName <- Arguments$getCharacter(sampleName); + sampleName <- Arguments$getCharacter(sampleName) if (is.na(sampleName)) { - throw("Cannot generate report. Argument 'sampleName' is non-valid or missing."); + throw("Cannot generate report. Argument 'sampleName' is non-valid or missing.") } # Argument 'studyName': if (missing(studyName)) { - throw("Cannot generate report. Argument 'studyName' is missing."); + throw("Cannot generate report. Argument 'studyName' is missing.") } - studyName <- Arguments$getCharacter(studyName); + studyName <- Arguments$getCharacter(studyName) if (is.na(studyName)) { - throw("Cannot generate report. Argument 'studyName' is non-valid."); + throw("Cannot generate report. Argument 'studyName' is non-valid.") } # Argument 'rspTags': if (!is.null(rspTags)) { - rspTags <- Arguments$getCharacters(rspTags); - rspTags <- unlist(strsplit(rspTags, split=",", fixed=TRUE)); - rspTags <- rspTags[nchar(rspTags) > 0L]; + rspTags <- Arguments$getCharacters(rspTags) + rspTags <- unlist(strsplit(rspTags, split=",", fixed=TRUE)) + rspTags <- rspTags[nchar(rspTags) > 0L] } # Argument 'rootPath': - rootPath <- Arguments$getWritablePath(rootPath); + rootPath <- Arguments$getWritablePath(rootPath) # Argument '.filename': if (!is.null(.filename)) { - .filename <- Arguments$getCharacter(.filename, useNames=TRUE); + .filename <- Arguments$getCharacter(.filename, useNames=TRUE) } # Argument 'verbose': - verbose <- Arguments$getVerbose(verbose); + verbose <- Arguments$getVerbose(verbose) if (verbose) { - pushState(verbose); - on.exit(popState(verbose)); + pushState(verbose) + on.exit(popState(verbose)) } - verbose && enter(verbose, "Generating CBS report"); + verbose && enter(verbose, "Generating CBS report") - verbose && cat(verbose, "Sample name: ", sampleName); - verbose && cat(verbose, "Number of chromosomes: ", nbrOfChromosomes(fit)); - verbose && cat(verbose, "Number of segments: ", nbrOfSegments(fit)); + verbose && cat(verbose, "Sample name: ", sampleName) + verbose && cat(verbose, "Number of chromosomes: ", nbrOfChromosomes(fit)) + verbose && cat(verbose, "Number of segments: ", nbrOfSegments(fit)) # - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - # Report template arguments @@ -99,68 +99,68 @@ Clim = c(0,2*ploidy(fit)), Blim = c(0,1), figForce = FALSE - ); + ) # Override with user arguments - userArgs <- list(...); + userArgs <- list(...) for (key in names(userArgs)) { - rspArgs[[key]] <- userArgs[[key]]; + rspArgs[[key]] <- userArgs[[key]] } if (is.null(rspArgs$reportPath)) { - rspArgs$reportPath <- file.path(rootPath, rspArgs$studyName); + rspArgs$reportPath <- file.path(rootPath, rspArgs$studyName) } - rspArgs$reportPath <- Arguments$getWritablePath(rspArgs$reportPath); - verbose && cat(verbose, "Report root path: ", rspArgs$reportPath); + rspArgs$reportPath <- Arguments$getWritablePath(rspArgs$reportPath) + verbose && cat(verbose, "Report root path: ", rspArgs$reportPath) # - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - # Linking to report files # - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - verbose && enter(verbose, "Linking to report files"); + verbose && enter(verbose, "Linking to report files") # Directory where all report templates lives - srcPath <- "templates"; + srcPath <- "templates" # If missing, default to one that comes with PSCBS/templates/ if (!isDirectory(srcPath)) { - srcPath <- system.file("templates", package="PSCBS"); + srcPath <- system.file("templates", package="PSCBS") } - srcPath <- file.path(srcPath, "rsp"); - srcPath <- Arguments$getReadablePath(srcPath); - verbose && cat(verbose, "Source path: ", srcPath); + srcPath <- file.path(srcPath, "rsp") + srcPath <- Arguments$getReadablePath(srcPath) + verbose && cat(verbose, "Source path: ", srcPath) - filename <- .filename; + filename <- .filename # - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - # Create file links to the main RSP report template # - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - verbose && enter(verbose, "Main RSP template"); + verbose && enter(verbose, "Main RSP template") # Construct the filename of the main RSP file to compile, iff missing. if (filename == "*") { - className <- class(fit)[1]; - fullname <- paste(c(className, rspTags), collapse=","); - filename <- sprintf("%s,report.tex.rsp", fullname); + className <- class(fit)[1] + fullname <- paste(c(className, rspTags), collapse=",") + filename <- sprintf("%s,report.tex.rsp", fullname) } - rspPathname <- file.path(srcPath, filename); - verbose && cat(verbose, "RSP report template: ", rspPathname); - rspPathname <- Arguments$getReadablePathname(rspPathname); - - destFilename <- sprintf("%s,%s", sampleName, filename); - destPathname <- filePath(rspArgs$reportPath, destFilename); - target <- rspPathname; - link <- destPathname; + rspPathname <- file.path(srcPath, filename) + verbose && cat(verbose, "RSP report template: ", rspPathname) + rspPathname <- Arguments$getReadablePathname(rspPathname) + + destFilename <- sprintf("%s,%s", sampleName, filename) + destPathname <- filePath(rspArgs$reportPath, destFilename) + target <- rspPathname + link <- destPathname if (!isFile(link)) { - verbose && cat(verbose, "Adding link: ", link, " -> ", target); - createLink(link=link, target=target); + verbose && cat(verbose, "Adding link: ", link, " -> ", target) + createLink(link=link, target=target) } # Sanity check - stopifnot(isFile(link)); + .stop_if_not(isFile(link)) - verbose && exit(verbose); + verbose && exit(verbose) # - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - # Skip? @@ -175,7 +175,7 @@ filename <- sprintf("%s.%s", fullname, ext) pathname <- file.path(rspArgs$reportPath, filename) pathname <- getAbsolutePath(pathname) - verbose && cat(verbose, "Expected output pathname: ", pathname); + verbose && cat(verbose, "Expected output pathname: ", pathname) if (isFile(pathname)) { verbose && cat(verbose, "Already exists: Skipping.") report <- R.rsp::RspFileProduct(pathname) @@ -187,114 +187,76 @@ # - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - # Create file links to all LaTeX include files # - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - verbose && enter(verbose, "All LaTeX files"); + verbose && enter(verbose, "All LaTeX files") - files <- list.files(path=srcPath, pattern="[.](bib|bst|cls|sty|tex)$", full.names=TRUE, recursive=FALSE); - files <- files[file_test("-f", files)]; + files <- list.files(path=srcPath, pattern="[.](bib|bst|cls|sty|tex)$", full.names=TRUE, recursive=FALSE) + files <- files[file_test("-f", files)] if (length(files) > 0L) { - verbose && cat(verbose, "Number of such files found: ", length(files)); - verbose && print(verbose, files); + verbose && cat(verbose, "Number of such files found: ", length(files)) + verbose && print(verbose, files) for (kk in seq_along(files)) { - target <- files[kk]; - link <- filePath(rspArgs$reportPath, basename(files[kk])); + target <- files[kk] + link <- filePath(rspArgs$reportPath, basename(files[kk])) if (!isFile(link)) { - verbose && cat(verbose, "Adding link: ", link, " -> ", target); - createLink(link=link, target=target); + verbose && cat(verbose, "Adding link: ", link, " -> ", target) + createLink(link=link, target=target) } # Sanity check - stopifnot(isFile(link)); + .stop_if_not(isFile(link)) } } else { - verbose && cat(verbose, "No such files found."); + verbose && cat(verbose, "No such files found.") } - verbose && exit(verbose); + verbose && exit(verbose) # - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - # Create file links to all 'incl.*' subdirectories # - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - verbose && enter(verbose, "All 'incl.*' subdirectories"); + verbose && enter(verbose, "All 'incl.*' subdirectories") - dirs <- list.files(srcPath, pattern="^incl", full.names=TRUE, recursive=FALSE); - dirs <- dirs[file_test("-d", dirs)]; + dirs <- list.files(srcPath, pattern="^incl", full.names=TRUE, recursive=FALSE) + dirs <- dirs[file_test("-d", dirs)] if (length(dirs) > 0L) { - verbose && cat(verbose, "Number of such directories found: ", length(dirs)); - verbose && print(verbose, dirs); + verbose && cat(verbose, "Number of such directories found: ", length(dirs)) + verbose && print(verbose, dirs) for (kk in seq_along(dirs)) { - target <- dirs[kk]; - link <- filePath(rspArgs$reportPath, basename(dirs[kk])); + target <- dirs[kk] + link <- filePath(rspArgs$reportPath, basename(dirs[kk])) if (!isDirectory(link)) { - verbose && cat(verbose, "Adding link: ", link, " -> ", target); - createLink(link=link, target=target); + verbose && cat(verbose, "Adding link: ", link, " -> ", target) + createLink(link=link, target=target) } # Sanity check - stopifnot(isDirectory(link)); + .stop_if_not(isDirectory(link)) } } else { - verbose && cat(verbose, "No such directories found."); + verbose && cat(verbose, "No such directories found.") } - verbose && exit(verbose); + verbose && exit(verbose) - verbose && exit(verbose); + verbose && exit(verbose) # - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - # Build reports # - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - verbose && enter(verbose, "Processing RSP template"); - rspArgs$figPath <- "figures/"; - args <- c(list(rspArgs=rspArgs), rspArgs); - report <- R.rsp::rfile(destPathname, workdir=rspArgs$reportPath, args=args, envir=envir, verbose=verbose); - verbose && exit(verbose); + verbose && enter(verbose, "Processing RSP template") + rspArgs$figPath <- "figures/" + args <- c(list(rspArgs=rspArgs), rspArgs) + report <- R.rsp::rfile(destPathname, workdir=rspArgs$reportPath, args=args, envir=envir, verbose=verbose) + verbose && exit(verbose) - verbose && cat(verbose, "Final report: ", getRelativePath(report)); + verbose && cat(verbose, "Final report: ", getRelativePath(report)) - verbose && exit(verbose); + verbose && exit(verbose) - report; + report }, protected=TRUE) - - - -############################################################################ -# HISTORY: -# 2013-03-21 -# o Now report() uses file links instead of copying template files. -# It also links to all LaTeX related files and all directories -# named '^incl.*'. -# 2013-03-09 -# o Now report() also included files listed in the optional file -# '.install_extras' of the source RSP template directory. -# The same filename is used by 'R CMD build/check' for including -# extra vignette source files. -# 2012-09-18 -# o Added argument 'force' to report() for AbstractCBS. This will -# copy the RSP template files again, although they are already in -# reports/ output directory. -# o Now report(fit, ..., rspTags) for AbstractCBS looks for the RSP -# template named (,),report.tex.rsp, where -# className is class(fit)[1] and argument 'rspTags' is an optional -# comma-separated character string/vector. -# o Now report() for AbstractCBS looks for the RSP template in templates/, -# and as a backup in templates,PSCBS/. If the latter does not exist, -# it is automatically created as a soft link to templates/ of the -# PSCBS package. This allows anyone to create their own customized -# copy (in templates/) of the default PSCBS RSP report. -# 2012-05-30 -# o Now report() gives more a informative error message if arguments -# 'sampleName' or 'studyName' are non-valid or missing. -# 2012-04-20 -# o Added argument '.filenames'. -# o Created from former PairedPSCBS.REPORT.R, which history as below. -# 2012-02-27 -# o Added Rdoc help. -# o Added report() for PairedPSCBS. -# o Created. -############################################################################ diff -Nru r-cran-pscbs-0.63.0/R/AbstractCBS.RESTRUCT.R r-cran-pscbs-0.64.0/R/AbstractCBS.RESTRUCT.R --- r-cran-pscbs-0.63.0/R/AbstractCBS.RESTRUCT.R 2017-06-28 14:32:39.000000000 +0000 +++ r-cran-pscbs-0.64.0/R/AbstractCBS.RESTRUCT.R 2018-08-12 21:30:44.000000000 +0000 @@ -90,8 +90,12 @@ # @seeclass # } #*/########################################################################### -setMethodS3("append", "AbstractCBS", abstract=TRUE); - +setMethodS3("append", "AbstractCBS", function(x, other, addSplit = TRUE, ...) { + new <- if (addSplit) "c(x, other, addSplit = TRUE)" else "c(x, other)" + old <- sprintf("append.%s()", class(x)[1]) + .Deprecated(new = new, old = old) + c(x, other, addSplit = addSplit) +}) setMethodS3("renameChromosomes", "AbstractCBS", function(fit, from, to, ...) { @@ -99,61 +103,61 @@ # Validate arguments # - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - # Argument 'from' & 'to': - from <- Arguments$getIntegers(from, disallow=c("NaN", "Inf")); - n <- length(from); - to <- Arguments$getIntegers(to, disallow=c("NaN", "Inf"), length=c(n,n)); + from <- Arguments$getIntegers(from, disallow=c("NaN", "Inf")) + n <- length(from) + to <- Arguments$getIntegers(to, disallow=c("NaN", "Inf"), length=c(n,n)) # Nothing to do? if (n == 0) { - return(fit); + return(fit) } - data <- getLocusData(fit); - segs <- getSegments(fit, splitters=TRUE, simplify=FALSE); - knownSegments <- fit$params$knownSegments; + data <- getLocusData(fit) + segs <- getSegments(fit, splitters=TRUE, simplify=FALSE) + knownSegments <- fit$params$knownSegments for (cc in seq_len(n)) { - chr <- from[cc]; - chrN <- to[cc]; - data$chromosome[data$chromosome == chr] <- chrN; - segs$chromosome[segs$chromosome == chr] <- chrN; - knownSegments$chromosome[knownSegments$chromosome == chr] <- chrN; + chr <- from[cc] + chrN <- to[cc] + data$chromosome[data$chromosome == chr] <- chrN + segs$chromosome[segs$chromosome == chr] <- chrN + knownSegments$chromosome[knownSegments$chromosome == chr] <- chrN } # for (cc ...) - fit <- setLocusData(fit, data); - fit <- setSegments(fit, segs); - fit$params$knownSegments <- knownSegments; + fit <- setLocusData(fit, data) + fit <- setSegments(fit, segs) + fit$params$knownSegments <- knownSegments - fit; + fit }, protected=TRUE) # renameChromosomes() -setMethodS3("extractChromosomes", "AbstractCBS", abstract=TRUE, protected=TRUE); +setMethodS3("extractChromosomes", "AbstractCBS", abstract=TRUE, protected=TRUE) setMethodS3("extractChromosome", "AbstractCBS", function(x, chromosome, ...) { # To please R CMD check - this <- x; + this <- x # Argument 'chromosome': - chromosome <- Arguments$getInteger(chromosome, disallow=c("NaN", "Inf")); + chromosome <- Arguments$getInteger(chromosome, disallow=c("NaN", "Inf")) - extractChromosomes(this, chromosomes=chromosome, ...); + extractChromosomes(this, chromosomes=chromosome, ...) }, protected=TRUE) -setMethodS3("extractSegments", "AbstractCBS", abstract=TRUE, protected=TRUE); +setMethodS3("extractSegments", "AbstractCBS", abstract=TRUE, protected=TRUE) setMethodS3("extractSegment", "AbstractCBS", function(this, idx, ...) { # - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - # Validate arguments # - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - # Argument 'region': - idx <- Arguments$getIndex(idx, max=nbrOfSegments(this, splitters=TRUE)); + idx <- Arguments$getIndex(idx, max=nbrOfSegments(this, splitters=TRUE)) - extractSegments(this, idxs=idx, ...); + extractSegments(this, idxs=idx, ...) }, private=TRUE) # extractSegment() @@ -161,44 +165,44 @@ # - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - # Validate arguments # - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - nbrOfSegments <- nbrOfSegments(this, splitters=TRUE); + nbrOfSegments <- nbrOfSegments(this, splitters=TRUE) # Argument 'regions': - regions <- Arguments$getIndices(regions, max=nbrOfSegments); + regions <- Arguments$getIndices(regions, max=nbrOfSegments) # Argument 'H': - H <- Arguments$getInteger(H, range=c(0,Inf)); + H <- Arguments$getInteger(H, range=c(0,Inf)) # Argument 'verbose': - verbose <- Arguments$getVerbose(verbose); + verbose <- Arguments$getVerbose(verbose) if (verbose) { - pushState(verbose); - on.exit(popState(verbose)); + pushState(verbose) + on.exit(popState(verbose)) } - verbose && enter(verbose, "Extract regions of a certain length"); + verbose && enter(verbose, "Extract regions of a certain length") - verbose && cat(verbose, "Left-most segments of regions to be extracted:"); - verbose && str(verbose, regions); - verbose && cat(verbose, "Number of segments in each region: ", H); + verbose && cat(verbose, "Left-most segments of regions to be extracted:") + verbose && str(verbose, regions) + verbose && cat(verbose, "Number of segments in each region: ", H) # Identify segments to keep - Hs <- seq_len(H); - regions <- regions - 1L; - regions <- as.list(regions); - segments <- lapply(regions, FUN=function(region) region + Hs); - segments <- unlist(segments, use.names=FALSE); - segments <- sort(unique(segments)); - verbose && cat(verbose, "Final set of segments to be extracted:"); - verbose && str(verbose, segments); + Hs <- seq_len(H) + regions <- regions - 1L + regions <- as.list(regions) + segments <- lapply(regions, FUN=function(region) region + Hs) + segments <- unlist(segments, use.names=FALSE) + segments <- sort(unique(segments)) + verbose && cat(verbose, "Final set of segments to be extracted:") + verbose && str(verbose, segments) - res <- extractSegments(this, idxs=segments, ...); + res <- extractSegments(this, idxs=segments, ...) - verbose && exit(verbose); + verbose && exit(verbose) - res; + res }, protected=TRUE) # extractRegions() @@ -208,9 +212,9 @@ # Validate arguments # - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - # Argument 'region': - region <- Arguments$getIndex(region, max=nbrOfSegments(this, splitters=TRUE)); + region <- Arguments$getIndex(region, max=nbrOfSegments(this, splitters=TRUE)) - extractRegions(this, regions=region, ...); + extractRegions(this, regions=region, ...) }, private=TRUE) # extractRegion() @@ -250,16 +254,16 @@ # @seeclass # } #*/########################################################################### -setMethodS3("mergeTwoSegments", "AbstractCBS", abstract=TRUE, protected=TRUE); +setMethodS3("mergeTwoSegments", "AbstractCBS", abstract=TRUE, protected=TRUE) setMethodS3("dropChangePoint", "AbstractCBS", function(fit, idx, ...) { # Argument 'idx': -## max <- nbrOfChangePoints(fit, splitters=TRUE, ...); - max <- nbrOfSegments(fit, splitters=TRUE, ...) - 1L; - idx <- Arguments$getIndex(idx, max=max); +## max <- nbrOfChangePoints(fit, splitters=TRUE, ...) + max <- nbrOfSegments(fit, splitters=TRUE, ...) - 1L + idx <- Arguments$getIndex(idx, max=max) - mergeTwoSegments(fit, left=idx, ...); + mergeTwoSegments(fit, left=idx, ...) }, protected=TRUE) @@ -300,32 +304,32 @@ #*/########################################################################### setMethodS3("dropChangePoints", "AbstractCBS", function(fit, idxs, update=TRUE, ...) { # Assert that there is only one chromosome - chrs <- getChromosomes(fit); + chrs <- getChromosomes(fit) if (length(chrs) > 1) { - throw("dropChangePoints() only support single-chromosome data: ", hpaste(chrs)); + throw("dropChangePoints() only support single-chromosome data: ", hpaste(chrs)) } # Argument 'idxs': -## max <- nbrOfChangePoints(fit, splitters=TRUE, ...); - max <- nbrOfSegments(fit, splitters=TRUE, ...) - 1L; - idxs <- Arguments$getIndices(idxs, max=max); +## max <- nbrOfChangePoints(fit, splitters=TRUE, ...) + max <- nbrOfSegments(fit, splitters=TRUE, ...) - 1L + idxs <- Arguments$getIndices(idxs, max=max) # Drop change points one by one - idxs <- unique(idxs); - idxs <- sort(idxs, decreasing=TRUE); + idxs <- unique(idxs) + idxs <- sort(idxs, decreasing=TRUE) for (ii in seq_along(idxs)) { - idx <- idxs[ii]; - updateI <- update && (ii == length(idxs)); - fit <- dropChangePoint(fit, idx=idx, update=updateI, ...); + idx <- idxs[ii] + updateI <- update && (ii == length(idxs)) + fit <- dropChangePoint(fit, idx=idx, update=updateI, ...) } # Update segment statistics? if (update) { - fit <- updateMeans(fit, ...); + fit <- updateMeans(fit, ...) } - fit; + fit }, protected=TRUE) @@ -360,21 +364,21 @@ #*/########################################################################### setMethodS3("mergeThreeSegments", "AbstractCBS", function(fit, middle, ...) { # Argument 'middle': - S <- nbrOfSegments(fit, splitters=TRUE); - middle <- Arguments$getIndex(middle, range=c(2L, S)); + S <- nbrOfSegments(fit, splitters=TRUE) + middle <- Arguments$getIndex(middle, range=c(2L, S)) # Assert that the three segments are on the same chromosome - idxs <- middle + c(-1L, 0L, +1L); - fitT <- extractSegments(fit, idxs); - chrs <- getChromosomes(fitT); + idxs <- middle + c(-1L, 0L, +1L) + fitT <- extractSegments(fit, idxs) + chrs <- getChromosomes(fitT) if (length(chrs) != 1L) { - throw("Argument 'middle' specifies a segment that is at the very end of a chromosome: ", middle); + throw("Argument 'middle' specifies a segment that is at the very end of a chromosome: ", middle) } - fitT <- NULL; # Not needed anymore + fitT <- NULL # Not needed anymore - fit <- mergeTwoSegments(fit, left=middle, ...); - fit <- mergeTwoSegments(fit, left=middle-1L, ...); - fit; + fit <- mergeTwoSegments(fit, left=middle, ...) + fit <- mergeTwoSegments(fit, left=middle-1L, ...) + fit }) # mergeThreeSegments() @@ -422,140 +426,95 @@ # - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - # Validate arguments # - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - nbrOfSegments <- nbrOfSegments(this, splitters=TRUE); + nbrOfSegments <- nbrOfSegments(this, splitters=TRUE) # Argument 'regions': - regions <- Arguments$getIndices(regions, max=nbrOfSegments); + regions <- Arguments$getIndices(regions, max=nbrOfSegments) # Argument 'H': - H <- Arguments$getInteger(H, range=c(0,Inf)); + H <- Arguments$getInteger(H, range=c(0,Inf)) # Argument 'asMissing': - asMissing <- Arguments$getLogical(asMissing); + asMissing <- Arguments$getLogical(asMissing) # Argument 'verbose': - verbose <- Arguments$getVerbose(verbose); + verbose <- Arguments$getVerbose(verbose) if (verbose) { - pushState(verbose); - on.exit(popState(verbose)); + pushState(verbose) + on.exit(popState(verbose)) } - verbose && enter(verbose, "Dropping regions of a certain length"); + verbose && enter(verbose, "Dropping regions of a certain length") - verbose && cat(verbose, "Left-most segments of regions to be dropped:"); - verbose && str(verbose, regions); - verbose && cat(verbose, "Number of segments in each region: ", H); + verbose && cat(verbose, "Left-most segments of regions to be dropped:") + verbose && str(verbose, regions) + verbose && cat(verbose, "Number of segments in each region: ", H) # Nothing to do? if (H == 0) { - verbose && cat(verbose, "Nothing to do. No segments will be dropped."); - verbose && exit(verbose); - return(this); + verbose && cat(verbose, "Nothing to do. No segments will be dropped.") + verbose && exit(verbose) + return(this) } # Identify segments to drop - Hs <- seq_len(H); - regions <- regions - 1L; - regions <- as.list(regions); - regions <- lapply(regions, FUN=function(region) region + Hs); - regions <- unlist(regions, use.names=FALSE); - regions <- sort(unique(regions)); - verbose && cat(verbose, "Final set of segments to be dropped:"); - verbose && str(verbose, regions); + Hs <- seq_len(H) + regions <- regions - 1L + regions <- as.list(regions) + regions <- lapply(regions, FUN=function(region) region + Hs) + regions <- unlist(regions, use.names=FALSE) + regions <- sort(unique(regions)) + verbose && cat(verbose, "Final set of segments to be dropped:") + verbose && str(verbose, regions) # Identify segments to keep - allRegions <- seq_len(nbrOfSegments); - keepSegments <- setdiff(allRegions, regions); - verbose && cat(verbose, "Final set of segments to be kept:"); - verbose && str(verbose, keepSegments); + allRegions <- seq_len(nbrOfSegments) + keepSegments <- setdiff(allRegions, regions) + verbose && cat(verbose, "Final set of segments to be kept:") + verbose && str(verbose, keepSegments) - dropped <- extractRegions(this, regions=regions, ...); - res <- this; + dropped <- extractRegions(this, regions=regions, ...) + res <- this if (length(regions) > 0) { if (asMissing) { - segs <- getSegments(res, splitters=TRUE); - pattern <- "(chromosome|id|start|end)$"; + segs <- getSegments(res, splitters=TRUE) + pattern <- "(chromosome|id|start|end)$" # TODO/AD HOC: Should be class specific /HB 2011-10-17 - pattern <- "(chromosome|id)$"; - excl <- grep(pattern, colnames(segs), ignore.case=TRUE, invert=TRUE); - segs[regions,excl] <- NA; - res$output <- segs; + pattern <- "(chromosome|id)$" + excl <- grep(pattern, colnames(segs), ignore.case=TRUE, invert=TRUE) + segs[regions,excl] <- NA + res$output <- segs # TODO/AD HOC: Should be class specific /HB 2011-10-17 for (ff in grep("segRows", names(res), ignore.case=TRUE, value=TRUE)) { - res[[ff]][regions,] <- NA; + res[[ff]][regions,] <- NA } } else { - res <- extractRegions(res, regions=keepSegments, ...); + res <- extractRegions(res, regions=keepSegments, ...) } } - res$dropped <- dropped; + res$dropped <- dropped # Sanity check if (asMissing) { - stopifnot(nbrOfSegments(res, splitters=TRUE) == nbrOfSegments(this, splitters=TRUE)); + .stop_if_not(nbrOfSegments(res, splitters=TRUE) == nbrOfSegments(this, splitters=TRUE)) } else { - stopifnot(nbrOfSegments(res, splitters=TRUE) + length(regions) == nbrOfSegments(this, splitters=TRUE)); + .stop_if_not(nbrOfSegments(res, splitters=TRUE) + length(regions) == nbrOfSegments(this, splitters=TRUE)) } - verbose && exit(verbose); + verbose && exit(verbose) - res; + res }, protected=TRUE) setMethodS3("dropRegion", "AbstractCBS", function(fit, region, ...) { # Argument 'region': - region <- Arguments$getIndex(region); + region <- Arguments$getIndex(region) - dropRegions(fit, regions=region, ...); + dropRegions(fit, regions=region, ...) }, protected=TRUE) -setMethodS3("shiftTCN", "AbstractCBS", abstract=TRUE, protected=TRUE); - - - - -############################################################################ -# HISTORY: -# 2013-04-20 [HB] -# o CLEANUP: Removed previously deprecated methods for AbstractCBS. -# 2013-03-21 [HB] -# o SPEEDUP: Made dropChangePoints() faster by only updating the segment -# statistics/means at the very end. -# o BUG FIX: dropChangePoint[s]() for AbstractCBS would not allow to -# drop the change points at the very end, if segmentation where done -# with known segments/gaps and/or empty segments. -# 2012-09-13 -# o Now renameChromosomes() also adjusts 'knownSegments'. -# o Added shiftTCN(). -# 2012-02-27 -# o Added renameChromosomes() to AbstractCBS. -# 2012-02-25 -# o Added dropChangePoints() for AbstractCBS. -# 2011-11-17 -# o FIX: extractRegions() for AbstractCBS would also show verbose output. -# 2011-11-04 -# o BUG FIX: extractSegment() for AbstractCBS would give an error, because -# it called itself instead of extractSegments(). -# 2011-10-21 -# o Added mergeThreeSegments() to AbstractCBS. -# 2011-10-17 -# o Added argument 'asMissing' to dropRegions() for AbstractCBS. -# 2011-10-14 -# o Added implementation of extractRegions() for AbstractCBS, which -# utilizes extractSegments(). -# o Added abstract extractSegments() and extractSegment() for AbstractCBS. -# 2011-10-10 -# o Added extractRegion()/dropRegion() and extractRegions()/dropRegions() -# for AbstractCBS, where former ones are wrappers for the latter ones. -# o Added dropChangePoint() for AbstractCBS, which is just a -# "name wrapper" for mergeTwoSegments(). -# 2011-10-08 -# o Added abstract updateMeans() for AbstractCBS. -# o Added abstract mergeTwoSegments() for AbstractCBS. -# 2011-10-02 -# o Created. -############################################################################ +setMethodS3("shiftTCN", "AbstractCBS", abstract=TRUE, protected=TRUE) diff -Nru r-cran-pscbs-0.63.0/R/callSegmentationOutliers.R r-cran-pscbs-0.64.0/R/callSegmentationOutliers.R --- r-cran-pscbs-0.63.0/R/callSegmentationOutliers.R 2017-06-28 14:32:39.000000000 +0000 +++ r-cran-pscbs-0.64.0/R/callSegmentationOutliers.R 2018-08-12 21:30:44.000000000 +0000 @@ -61,189 +61,167 @@ # Validate arguments # - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - # Argument 'y': - disallow <- c("Inf"); - y <- Arguments$getDoubles(y, disallow=disallow); - nbrOfLoci <- length(y); + disallow <- c("Inf") + y <- Arguments$getDoubles(y, disallow=disallow) + nbrOfLoci <- length(y) - length2 <- rep(nbrOfLoci, times=2L); + length2 <- rep(nbrOfLoci, times=2L) # Argument 'chromosome': - disallow <- c("NaN", "Inf"); - chromosome <- Arguments$getIntegers(chromosome, range=c(0,Inf), disallow=disallow); + disallow <- c("NaN", "Inf") + chromosome <- Arguments$getIntegers(chromosome, range=c(0,Inf), disallow=disallow) if (length(chromosome) == 1L) { - chromosome <- rep(chromosome, times=nbrOfLoci); + chromosome <- rep(chromosome, times=nbrOfLoci) } else { - chromosome <- Arguments$getVector(chromosome, length=length2); + chromosome <- Arguments$getVector(chromosome, length=length2) } # Argument 'x': if (!is.null(x)) { - disallow <- c("Inf"); - x <- Arguments$getDoubles(x, length=length2, disallow=disallow); + disallow <- c("Inf") + x <- Arguments$getDoubles(x, length=length2, disallow=disallow) } # Argument 'method': - method <- match.arg(method); + method <- match.arg(method) - verbose <- Arguments$getVerbose(verbose); + verbose <- Arguments$getVerbose(verbose) if (verbose) { - pushState(verbose); - on.exit(popState(verbose)); + pushState(verbose) + on.exit(popState(verbose)) } - verbose && enter(verbose, "Identifying outliers"); - uChromosomes <- sort(unique(chromosome)); - nbrOfChromosomes <- length(uChromosomes); - verbose && cat(verbose, "Number of chromosomes: ", nbrOfChromosomes); - verbose && cat(verbose, "Number of loci: ", nbrOfLoci); - verbose && cat(verbose, "Detection method: ", method); + verbose && enter(verbose, "Identifying outliers") + uChromosomes <- sort(unique(chromosome)) + nbrOfChromosomes <- length(uChromosomes) + verbose && cat(verbose, "Number of chromosomes: ", nbrOfChromosomes) + verbose && cat(verbose, "Number of loci: ", nbrOfLoci) + verbose && cat(verbose, "Detection method: ", method) # Allocate result vector - isOutlier <- logical(nbrOfLoci); + isOutlier <- logical(nbrOfLoci) # - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - # Filter missing data # - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - verbose && enter(verbose, "Identifying loci with non-missing data"); - keep <- (!is.na(x) & !is.na(y)); + verbose && enter(verbose, "Identifying loci with non-missing data") + keep <- (!is.na(x) & !is.na(y)) if (!is.null(chromosome)) { - keep <- (keep & !is.na(chromosome)); + keep <- (keep & !is.na(chromosome)) } - keep <- which(keep); - chromosome <- chromosome[keep]; - x <- x[keep]; - y <- y[keep]; - nbrOfLoci <- length(x); - verbose && cat(verbose, "Number of loci with non-missing data: ", nbrOfLoci); - verbose && exit(verbose); + keep <- which(keep) + chromosome <- chromosome[keep] + x <- x[keep] + y <- y[keep] + nbrOfLoci <- length(x) + verbose && cat(verbose, "Number of loci with non-missing data: ", nbrOfLoci) + verbose && exit(verbose) # - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - # For each chromosome # - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - isOutlierT <- logical(nbrOfLoci); + isOutlierT <- logical(nbrOfLoci) for (kk in seq_along(uChromosomes)) { - chr <- uChromosomes[kk]; - verbose && enter(verbose, sprintf("Chromosome #%d ('Chr%02d') of %d", kk, chr, length(uChromosomes))); - keepKK <- which(chromosome == chr); - nbrOfLociKK <- length(keepKK); - verbose && cat(verbose, "Number of loci on chromosome: ", nbrOfLociKK); + chr <- uChromosomes[kk] + verbose && enter(verbose, sprintf("Chromosome #%d ('Chr%02d') of %d", kk, chr, length(uChromosomes))) + keepKK <- which(chromosome == chr) + nbrOfLociKK <- length(keepKK) + verbose && cat(verbose, "Number of loci on chromosome: ", nbrOfLociKK) # Extract data - yKK <- y[keepKK]; - xKK <- x[keepKK]; - chromosomeKK <- chromosome[keepKK]; + yKK <- y[keepKK] + xKK <- x[keepKK] + chromosomeKK <- chromosome[keepKK] # Order loci along chromosome - o <- order(xKK); - xKK <- xKK[o]; - yKK <- yKK[o]; - chromosomeKK <- chromosomeKK[o]; - keepKK <- keepKK[o]; - o <- NULL; # Not needed anymore + o <- order(xKK) + xKK <- xKK[o] + yKK <- yKK[o] + chromosomeKK <- chromosomeKK[o] + keepKK <- keepKK[o] + o <- NULL # Not needed anymore # Supress all warnings, in order to avoid warnings by DNAcopy::CNA() # on "array has repeated maploc positions". Ideally we should filter # just those out. /HB 2013-10-22 suppressWarnings({ - dataKK <- CNA(genomdat=yKK, chrom=chromosomeKK, maploc=xKK, sampleid="y", presorted=TRUE); - }); - chromosomeKK <- xKK <- NULL; # Not needed anymore + dataKK <- CNA(genomdat=yKK, chrom=chromosomeKK, maploc=xKK, sampleid="y", presorted=TRUE) + }) + chromosomeKK <- xKK <- NULL # Not needed anymore - yKKs <- smooth.CNA(dataKK, ...)$y; - dataKK <- NULL; # Not needed anymore + yKKs <- smooth.CNA(dataKK, ...)$y + dataKK <- NULL # Not needed anymore # Sanity check - stopifnot(length(yKKs) == nbrOfLociKK); - outliersKK <- which(yKKs != yKK); - yKKs <- yKK <- NULL; # Not needed anymore + .stop_if_not(length(yKKs) == nbrOfLociKK) + outliersKK <- which(yKKs != yKK) + yKKs <- yKK <- NULL # Not needed anymore - nbrOfOutliers <- length(outliersKK); - verbose && cat(verbose, "Number of outliers: ", nbrOfOutliers); + nbrOfOutliers <- length(outliersKK) + verbose && cat(verbose, "Number of outliers: ", nbrOfOutliers) - outliers <- keepKK[outliersKK]; - keepKK <- outliersKK <- NULL; # Not needed anymore + outliers <- keepKK[outliersKK] + keepKK <- outliersKK <- NULL # Not needed anymore - isOutlierT[outliers] <- TRUE; - outliers <- NULL; # Not needed anymore + isOutlierT[outliers] <- TRUE + outliers <- NULL # Not needed anymore - verbose && exit(verbose); + verbose && exit(verbose) } # for (kk ...) - chromosome <- x <- y <- NULL; # Not needed anymore + chromosome <- x <- y <- NULL # Not needed anymore - isOutlier[keep] <- isOutlierT; - isOutlierT <- keep <- NULL; # Not needed anymore + isOutlier[keep] <- isOutlierT + isOutlierT <- keep <- NULL # Not needed anymore - nbrOfOutliers <- sum(isOutlier, na.rm=TRUE); - verbose && cat(verbose, "Total number of outliers: ", nbrOfOutliers); + nbrOfOutliers <- sum(isOutlier, na.rm=TRUE) + verbose && cat(verbose, "Total number of outliers: ", nbrOfOutliers) - verbose && exit(verbose); + verbose && exit(verbose) - isOutlier; + isOutlier }) # callSegmentationOutliers() setMethodS3("callSegmentationOutliers", "data.frame", function(y, ...) { - data <- y; + data <- y # Get either CBS or PSCBS total CN signals. - y <- data$y; + y <- data$y if (is.null(y)) { - y <- data$CT; + y <- data$CT } - callSegmentationOutliers(y=y, chromosome=data$chromosome, x=data$x, ...); + callSegmentationOutliers(y=y, chromosome=data$chromosome, x=data$x, ...) }) # callSegmentationOutliers() setMethodS3("dropSegmentationOutliers", "default", function(y, ...) { - isOutlier <- callSegmentationOutliers(y, ...); - y[isOutlier] <- NA_real_; - isOutlier <- NULL; # Not needed anymore - y; + isOutlier <- callSegmentationOutliers(y, ...) + y[isOutlier] <- NA_real_ + isOutlier <- NULL # Not needed anymore + y }) setMethodS3("dropSegmentationOutliers", "data.frame", function(y, ...) { - data <- y; + data <- y - isOutlier <- callSegmentationOutliers(data, ...); + isOutlier <- callSegmentationOutliers(data, ...) # Update either CBS or PSCBS total CN signals. - key <- "CT"; + key <- "CT" if (!is.element(key, colnames(data))) { - key <- "y"; + key <- "y" } - data[[key]][isOutlier] <- NA_real_; + data[[key]][isOutlier] <- NA_real_ - isOutlier <- NULL; # Not needed anymore + isOutlier <- NULL # Not needed anymore - data; + data }) - - -############################################################################ -# HISTORY: -# 2014-02-04 -# o Now retrieving local copies on DNAcopy functions up front. -# 2013-12-04 -# o DOCUMENTATION: Now {call|drop}SegmentationOutliers() are documented -# as generic functions. -# o Now {call|drop}SegmentationOutliers() drops allocated memory faster. -# o Added Rdoc for dropSegmentationOutliers(). -# 2011-11-23 -# o Added callSegmentationOutliers() and dropSegmentationOutliers() -# for data frames. -# 2011-05-31 -# o Now explicitly using DNAcopy::nnn() to call DNAcopy functions. -# 2010-11-27 -# o Added dropSegmentationOutliers() which sets outliers to missing values. -# o Added callSegmentationOutliers(), which utilizes the detection method -# of DNAcopy::smooth.CNA() as suggested by ABO. -# o Created. -############################################################################ diff -Nru r-cran-pscbs-0.63.0/R/CBS.CALL.R r-cran-pscbs-0.64.0/R/CBS.CALL.R --- r-cran-pscbs-0.63.0/R/CBS.CALL.R 2017-06-28 14:32:39.000000000 +0000 +++ r-cran-pscbs-0.64.0/R/CBS.CALL.R 2018-08-12 21:30:44.000000000 +0000 @@ -1,1451 +1,1365 @@ -###########################################################################/** -# @set "class=CBS" -# @RdocMethod callGainsAndLosses -# -# @title "Calls gains and losses" -# -# \description{ -# @get "title". -# } -# -# @synopsis -# -# \arguments{ -# \item{adjust}{A positive scale factor adjusting the sensitivity of the -# caller, where a value less (greater) than 1.0 makes the caller -# less (more) sensitive.} -# \item{method}{A @character string specifying the calling algorithm to use.} -# \item{...}{Additional/optional arguments used to override the default -# parameters used by the caller.} -# } -# -# \value{ -# Returns a @see "PSCBS::CBS" object where @logical columns -# 'lossCall' and 'gainCall' have been appended to the segmentation table. -# } -# -# \section{The UCSF caller}{ -# If \code{method == "ucsf-mad"}, then segments are called using [1], i.e. -# a segment is called gained or lost if its segment level is -# at least two standard deviations away from the median segment level -# on Chr1-22, where standard deviation is estimated using MAD. -# Then same is done for \code{method == "ucsf-dmad"} with the difference -# that the standard deviation is estimated using a robust first order -# variance estimator. -# } -# -# \examples{ -# @include "../incl/segmentByCBS.Rex" -# @include "../incl/segmentByCBS,calls.Rex" -# } -# -# @author "HB" -# -# \references{ -# [1] Fridlyand et al. \emph{Breast tumor copy number aberration -# phenotypes and genomic instability}, BMC Cancer, 2006. \cr -# } -# -# \seealso{ -# @seemethod "callAmplifications". -# @seemethod "callOutliers". -# @seeclass -# } -# -# @keyword internal -#*/########################################################################### -setMethodS3("callGainsAndLosses", "CBS", function(fit, adjust=1.0, method=c("ucsf-mad", "ucsf-dmad"), ..., verbose=FALSE) { - # - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - # Validate arguments - # - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - # Argument 'adjust': - adjust <- Arguments$getDouble(adjust, range=c(0, Inf)); - - # Argument 'method': - method <- match.arg(method); - - # Argument 'verbose': - verbose <- Arguments$getVerbose(verbose); - if (verbose) { - pushState(verbose); - on.exit(popState(verbose)); - } - - - verbose && enter(verbose, "Calling segments that are gained or lost"); - - userArgs <- list(...); - - params <- list(); - - # Allocate calls - naValue <- as.logical(NA); - nbrOfSegments <- nbrOfSegments(fit, splitters=TRUE); - segs <- getSegments(fit, splitters=TRUE); - nbrOfRows <- nrow(segs); - gainCalls <- lossCalls <- rep(naValue, times=nbrOfRows); - - verbose && cat(verbose, "Number of segments to be called: ", nbrOfSegments); - verbose && cat(verbose, "Call method: ", method); - - if (is.element(method, c("ucsf-mad", "ucsf-dmad"))) { - # Default arguments - args <- list( - chromosomes = intersect(getChromosomes(fit), c(0L, 1:22)), - scale = 2.0 - ); - - # Override by (optional) user-specified arguments - for (key in names(userArgs)) { - args[[key]] <- userArgs[[key]]; - } - - # Extract arguments - chromosomes <- args$chromosomes; - scale <- args$scale; - - # Argument check - chromosomes <- Arguments$getVector(chromosomes, lengths=c(1,Inf)); - scale <- Arguments$getDouble(scale, range=c(0,Inf)); - - # Estimate the whole-genome standard deviation of the TCNs - if (method == "ucsf-mad") { - sigma <- estimateStandardDeviation(fit, chromosomes=chromosomes, - method="res", estimator="mad"); - sigmaKey <- "sigmaMAD"; - } else if (method == "ucsf-dmad") { - sigma <- estimateStandardDeviation(fit, chromosomes=chromosomes, - method="diff", estimator="mad"); - sigmaKey <- "sigmaDelta"; - } else { - throw("INTERNAL ERROR: Unknown method: ", method); - } - - # Sanity check - sigma <- Arguments$getDouble(sigma, range=c(0,Inf)); - - # Calculate the threshold - tau <- scale * sigma; - - # Make more or less sensitive - tau <- adjust * tau; - - verbose && cat(verbose, "Call parameters:"); - verbose && str(verbose, list(sigma=sigma, scale=scale, adjust=adjust)); - - # Calculate segment levels using the median estimator - fitT <- updateMeans(fit, avg="median") - segsT <- getSegments(fitT, splitters=TRUE); - mu <- segsT$mean; - fitT <- segsT <- NULL; # Not needed anymore - - # The median segmented level - muR <- median(mu, na.rm=TRUE); - - # The threshold for losses - tauLoss <- muR - tau; - - # The threshold for gains - tauGain <- muR + tau; - - # Call - lossCalls <- (mu <= tauLoss); # Losses - gainCalls <- (mu >= tauGain); # Gains - - # Call parameters used - params$method <- method; - params$adjust <- adjust; - params[[sigmaKey]] <- sigma; - params$scale <- scale; - params$muR <- muR; - params$tau <- tau; - params$tauLoss <- tauLoss; - params$tauGain <- tauGain; - } - - verbose && cat(verbose, "Number of called segments: ", length(lossCalls)); - - - # Sanity check - stopifnot(length(lossCalls) == nbrOfRows); - stopifnot(length(gainCalls) == nbrOfRows); - - - # - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - # Update 'DNAcopy' object - # - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - # (a) segmentation table - segs <- getSegments(fit, splitters=TRUE); - segs$lossCall <- lossCalls; - segs$gainCall <- gainCalls; - fit$output <- segs; - - # (b) parameters - allParams <- fit$params; - if (is.null(allParams)) { - allParams <- list(); - } - allParams$callGainsAndLosses <- params; - fit$params <- allParams; - - verbose && exit(verbose); - - - # - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - # Return the updated 'CBS' object. - # - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - fit; -}, private=TRUE) # callGainsAndLosses() - - - - - -###########################################################################/** -# @RdocMethod callAmplifications -# -# @title "Calls (focal) amplifications" -# -# \description{ -# @get "title". -# } -# -# @synopsis -# -# \arguments{ -# \item{adjust}{A positive scale factor adjusting the sensitivity of the -# caller, where a value less (greater) than 1.0 makes the caller -# less (more) sensitive.} -# \item{maxLength}{A @double scalar specifying the maximum length of a segment -# in order for it to be considered a focal amplification.} -# \item{method}{A @character string specifying the calling algorithm to use.} -# \item{...}{Additional/optional arguments used to override the default -# parameters used by the caller.} -# \item{verbose}{@see "R.utils::Verbose".} -# } -# -# \value{ -# Returns a @see "PSCBS::CBS" object where @logical column -# 'amplificationCall' has been appended to the segmentation table. -# } -# -# \section{The UCSF caller}{ -# If \code{method == "ucsf-exp"}, then segments are called using [1], i.e. -# a segment is called an amplification if ... -# } -# -# @author -# -# \references{ -# [1] Fridlyand et al. \emph{Breast tumor copy number aberration -# phenotypes and genomic instability}, BMC Cancer, 2006. \cr -# } -# -# \seealso{ -# @seemethod "callGainsAndLosses". -# @seemethod "callOutliers". -# @seeclass -# } -# -# @keyword internal -#*/########################################################################### -setMethodS3("callAmplifications", "CBS", function(fit, adjust=1.0, maxLength=20e6, method=c("ucsf-exp"), ..., verbose=FALSE) { - # - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - # Validate arguments - # - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - # Argument 'adjust': - adjust <- Arguments$getDouble(adjust, range=c(0, Inf)); - - # Argument 'maxLength': - maxLength <- Arguments$getDouble(maxLength, range=c(0, Inf)); - - # Argument 'method': - method <- match.arg(method); - - # Argument 'verbose': - verbose <- Arguments$getVerbose(verbose); - if (verbose) { - pushState(verbose); - on.exit(popState(verbose)); - } - - - verbose && enter(verbose, "Calling segments that are amplified"); - - userArgs <- list(...); - - params <- list(); - - # Allocate calls - naValue <- as.logical(NA); - nbrOfSegments <- nbrOfSegments(fit, splitters=TRUE); - calls <- rep(naValue, times=nbrOfSegments); - - verbose && cat(verbose, "Number of segments to be called: ", nbrOfSegments); - verbose && cat(verbose, "Call method: ", method); - - if (method == "ucsf-exp") { - # - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - # Call arguments - # - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - # Default arguments - args <- list( - minLevel = 0.0, - lambda = 1.0, - degree = 3 - ); - - # Override by (optional) user-specified arguments - for (key in names(userArgs)) { - args[[key]] <- userArgs[[key]]; - } - - # Extract arguments - minLevel <- args$minLevel; - lambda <- args$lambda; - degree <- args$degree; - - # Validate arguments - minLevel <- Arguments$getDouble(minLevel, range=c(-Inf, Inf)); - lambda <- Arguments$getDouble(lambda, range=c(0, Inf)); - degree <- Arguments$getDouble(degree, range=c(1, Inf)); - - verbose && cat(verbose, "Call parameters:"); - verbose && str(verbose, list(minLevel=minLevel, lambda=lambda, - degree=degree)); - - segs <- getSegments(fit, splitters=TRUE); - - verbose && cat(verbose, "Segments:"); - verbose && str(verbose, segs); - - - # - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - # Rule #1: Only consider segments that are short enough - # - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - # The lengths (in bp) of the segments - start <- segs$start; - end <- segs$end; - length <- end - start; ## + 1L; - keep1 <- (length <= maxLength); - - # - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - # Rule #2: Only consider segments that have a mean level - # that is large enough. - # - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - # The mean levels of the segments - mu <- segs$mean; - keep2 <- (mu >= minLevel); - - # - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - # Rule #3: Only consider segments that have a mean level - # that is much larger than either of the - # flanking segments. - # - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - # The mean levels of the flanking segments - muL <- c(NA, mu[-nbrOfSegments]); - muR <- c(mu[-1], NA); - - # The difference in mean levels to the flanking segments - deltaL <- mu - muL; - deltaR <- mu - muR; - - # The maximum difference to either of the flanking segments - delta <- pmax(deltaL, deltaR, na.rm=TRUE); - - # The threshold for calling segments amplified - tau <- exp(-lambda * mu^degree); - - # Make more or less sensitive - tau <- adjust * tau; - - keep3 <- (delta >= tau); - - # Amplification calls - calls <- (keep1 & keep2 & keep3); - - # Call parameters used - params$method <- method; - params$adjust <- adjust; - params$maxLength <- maxLength; - params$minLevel <- minLevel; - params$lambda <- lambda; - params$degree <- degree; - params$tau <- tau; - } - - verbose && cat(verbose, "Number of called segments: ", length(calls)); - - # Sanity check - stopifnot(length(calls) == nbrOfSegments); - - - # - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - # Update 'DNAcopy' object - # - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - # (a) segmentation table - segs <- getSegments(fit, splitters=TRUE); - segs$amplificationCall <- calls; - fit$output <- segs; - - # (b) parameters - allParams <- fit$params; - if (is.null(allParams)) { - allParams <- list(); - } - allParams$callAmplifications <- params; - fit$params <- allParams; - - - verbose && exit(verbose); - - # - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - # Return the updated 'CBS' object. - # - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - fit; -}, private=TRUE) # callAmplifications() - - - -###########################################################################/** -# @RdocMethod callOutliers -# -# @title "Calls outliers" -# -# \description{ -# @get "title". -# } -# -# @synopsis -# -# \arguments{ -# \item{adjust}{A positive scale factor adjusting the sensitivity of the -# caller, where a value less (greater) than 1.0 makes the caller -# less (more) sensitive.} -# \item{method}{A @character string specifying the calling algorithm to use.} -# \item{...}{Additional/optional arguments used to override the default -# parameters used by the caller.} -# } -# -# \value{ -# Returns a @see "PSCBS::CBS" object where @logical columns -# 'negOutlierCall' and 'posOutlierCall' have been appended -# to the segmentation table. -# } -# -# \section{The UCSF caller}{ -# If \code{method == "ucsf-mad"}, then loci are called using [1]; -# "Finally, to identify single technical or biological outliers such -# as high level amplifications, the presence of the outliers within -# a segment was allowed by assigning the original observed log2ratio -# to the clones for which the observed values were more than four -# tumor-specific MAD away from the smoothed values." [1; Suppl. Mat.] -# } -# -# @author "HB" -# -# \references{ -# [1] Fridlyand et al. \emph{Breast tumor copy number aberration -# phenotypes and genomic instability}, BMC Cancer, 2006. \cr -# } -# -# \seealso{ -# @seemethod "callGainsAndLosses". -# @seemethod "callAmplifications". -# @seeclass -# } -# -# @keyword internal -#*/########################################################################### -setMethodS3("callOutliers", "CBS", function(fit, adjust=1.0, method=c("ucsf-mad"), ...) { - # - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - # Validate arguments - # - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - # Argument 'adjust': - adjust <- Arguments$getDouble(adjust, range=c(0, Inf)); - - # Argument 'method': - method <- match.arg(method); - - - userArgs <- list(...); - - params <- list(); - - # Allocate calls - nbrOfLoci <- nbrOfLoci(fit); - naValue <- as.logical(NA); - negOutlierCall <- posOutlierCall <- rep(naValue, times=nbrOfLoci); - - if (method == "ucsf-mad") { - # - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - # Call arguments - # - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - # Default arguments - args <- list( - scale = 4.0 - ); - - # Override by (optional) user-specified arguments - for (key in names(userArgs)) { - args[[key]] <- userArgs[[key]]; - } - - # Extract arguments - scale <- args$scale; - - # Validate arguments - scale <- Arguments$getDouble(scale, range=c(0, Inf)); - - - # Genomic annotations - data <- getLocusData(fit); - chromosome <- data$chromosome; - x <- data$x; - - # CN signals - y <- data[,3]; - - # Segmented CN signals - yS <- extractSegmentMeansByLocus(fit); - - # CN residuals (relative to segment means) - dy <- y - yS; - - segs <- getSegments(fit, splitters=TRUE); - - # Allocate per-segment SD estimates - nbrOfSegments <- nbrOfSegments(fit); - naValue <- NA_real_; - sds <- rep(naValue, times=nbrOfSegments); - - naValue <- NA_real_; - for (ss in seq_len(nbrOfSegments)) { - seg <- segs[ss,]; - - # Identify loci in current segment - idxs <- which(seg$chromosome == chromosome & - seg$start <= x & x <= seg$end); - - # Sanity check - idxs <- Arguments$getIndices(idxs, max=nbrOfLoci); - - # Extract CN residuals - dySS <- dy[idxs]; - - # Calculate MAD for segment - sdSS <- mad(dySS, na.rm=TRUE); - - # Threshold for outliers - tau <- scale * sdSS; - - # Make more or less sensitive - tau <- adjust * tau; - - # Call outliers - naValue <- as.logical(NA); - callsSS <- rep(naValue, times=length(dySS)); - callsSS[-tau <= dySS & dySS <= +tau] <- 0L; - callsSS[dySS > +tau] <- +1L; - callsSS[dySS < -tau] <- -1L; - - # Record - negOutlierCall[idxs] <- (callsSS < 0L); - posOutlierCall[idxs] <- (callsSS > 0L); - - sds[ss] <- sdSS; - } # for (ss ...) - - params$method <- method; - params$adjust <- adjust; - params$scale <- scale; - params$sds <- sds; - } - - - # Sanity check - stopifnot(length(negOutlierCall) == nbrOfLoci); - stopifnot(length(posOutlierCall) == nbrOfLoci); - - - # - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - # Update 'DNAcopy' object - # - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - # (a) segmentation table - data <- getLocusData(fit); - data$negOutlierCall <- negOutlierCall; - data$posOutlierCall <- posOutlierCall; - fit$data <- data; - - # (b) parameters - allParams <- fit$params; - if (is.null(allParams)) { - allParams <- list(); - } - allParams$callOutliers <- params; - fit$params <- allParams; - - - # - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - # Return the updated 'CBS' object. - # - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - fit; -}, private=TRUE) # callOutliers() - - - -setMethodS3("extractCallsByLocus", "CBS", function(fit, ...) { - # Extract locus data - data <- getLocusData(fit, ...); - - nbrOfLoci <- nrow(data); - - # Extract segment data - segs <- getSegments(fit, splitters=TRUE); - - # Identify segment calls - callCols <- grep("Call$", colnames(segs)); - nbrOfCalls <- length(callCols); - - - chromosome <- data$chromosome; - x <- data$x; - y <- data[,3]; - - # Allocate locus calls - naValue <- as.logical(NA); - callsL <- matrix(naValue, nrow=nbrOfLoci, ncol=nbrOfCalls); - colnames(callsL) <- colnames(segs)[callCols]; - callsL <- as.data.frame(callsL); - - # For each segment... - for (ss in seq_len(nrow(segs))) { - seg <- segs[ss,]; - idxs <- which(chromosome == seg$chromosome & - seg$start <= x & x <= seg$end); - idxs <- Arguments$getIndices(idxs, max=nbrOfLoci); - # Sanity check -## stopifnot(length(idxs) == seg$nbrOfLoci); - - callsSS <- seg[callCols]; - for (cc in seq_len(nbrOfCalls)) { - callsL[idxs,cc] <- callsSS[,cc]; - } - } # for (ss ...) - - # The calls for loci that have missing annotations or observations, - # should also be missing, i.e. NA. - nok <- (is.na(chromosome) | is.na(x) | is.na(y)); - callsL[nok,] <- as.logical(NA); - - # Sanity check - stopifnot(nrow(callsL) == nbrOfLoci); - stopifnot(ncol(callsL) == nbrOfCalls); - - callsL; -}, private=TRUE) # extractCallsByLocus() - - - -###########################################################################/** -# @RdocMethod getCallStatistics -# -# @title "Calculates various call statistics per chromosome" -# -# \description{ -# @get "title". -# } -# -# @synopsis -# -# \arguments{ -# \item{regions}{An optional @data.frame with columns "chromosome", -# "start", and "end" specifying the regions of interest to calculate -# statistics for. If @NULL, all of the genome is used.} -# \item{shrinkRegions}{If @TRUE, regions are shrunk to the support of -# the data.} -# \item{...}{Not used.} -# \item{verbose}{@see "R.utils::Verbose".} -# } -# -# \value{ -# Returns a CxK @data.frame, where C is the number of regions that -# meet the criteria setup by argument \code{regions} -# and (K-4)/2 is the number of call types. -# The first column is the chromosome index, the second and the third -# are the first and last position, and the fourth the length -# (=last-first+1) of the chromosome. -# The following columns contains call summaries per chromosome. -# For each chromosome and call type, the total length of such calls -# on that chromosome is reported together how large of a fraction -# of the chromosome such calls occupy. -# } -# -# \details{ -# The estimators implemented here are based solely on the -# segmentation results, which is very fast. -# In the original proposal by Fridlyand et al. [1], the authors -# estimates the parameters by converting segment-level calls back -# to locus-level calls and there do the calculations. -# The difference between the two approaches should be minor, -# particularly for large density arrays. -# } -# -# @author "HB" -# -# \references{ -# [1] Fridlyand et al. \emph{Breast tumor copy number aberration -# phenotypes and genomic instability}, BMC Cancer, 2006. \cr -# } -# -# \seealso{ -# @seeclass -# } -# -# @keyword internal -#*/########################################################################### -setMethodS3("getCallStatistics", "CBS", function(fit, regions=NULL, shrinkRegions=TRUE, ..., verbose=FALSE) { - # To please R CMD check, cf. subset() - chromosome <- NULL; rm(list="chromosome"); - - # - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - # Validate arguments - # - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - # Argument 'regions': - if (is.null(regions)) { - # Get chromosome lengths - regions <- getChromosomeRanges(fit)[,c("chromosome", "start", "end")]; - } - regions <- as.data.frame(regions); - stopifnot(all(is.element(c("chromosome", "start", "end"), colnames(regions)))); - stopifnot(!any(duplicated(regions$chromosome))); - - # Argument 'verbose': - verbose <- Arguments$getVerbose(verbose); - if (verbose) { - pushState(verbose); - on.exit(popState(verbose)); - } - - - verbose && enter(verbose, "Calculating call statistics"); - segs <- getSegments(fit, splitters=FALSE); - callTypes <- grep("Call$", colnames(segs), value=TRUE); - verbose && cat(verbose, "Call types: ", hpaste(callTypes)); - if (length(callTypes) == 0) { - throw("Cannot calculate call statistics. No calls have been made."); - } - - verbose && cat(verbose, "Regions of interest:"); - verbose && str(verbose, regions); - - - verbose && enter(verbose, "Filtering out segments within the requested regions"); - - # Filter out segments within the requested regions - segsT <- NULL; - verbose && cat(verbose, "Number of segments (before): ", nrow(segs)); - - for (rr in seq_len(nrow(regions))) { - regionRR <- regions[rr,]; - chrRR <- regionRR[,"chromosome"]; - startRR <- regionRR[,"start"]; - endRR <- regionRR[,"end"]; - if (is.na(chrRR) || is.na(startRR) || is.na(endRR)) { - next; - } - - verbose && enter(verbose, sprintf("Region #%d of %d", rr, nrow(regions))); - - # Select regions that (at least) overlapping with the region - segsRR <- subset(segs, chromosome == chrRR & start <= endRR & end >= startRR); - - verbose && cat(verbose, "Number of segments within region: ", nrow(segsRR)); - - # Special case - if (nrow(segsRR) == 0) { - segsRR <- segs[1,][NA,]; - segsRR$chromosome <- chrRR; - segsRR$start <- startRR; - segsRR$end <- endRR; - segsRR$nbrOfLoci <- 0L; - } - - if (shrinkRegions) { - range <- range(c(segsRR$start, segsRR$end), na.rm=TRUE); - startRR <- max(startRR, range[1], na.rm=TRUE); - endRR <- min(endRR, range[2], na.rm=TRUE); - regions[rr,"end"] <- endRR; - regions[rr,"start"] <- startRR; - } - - # Adjust ranges - segsRR$start[segsRR$start < startRR] <- startRR; - segsRR$end[segsRR$end > endRR] <- endRR; - - segsRR$fullLength <- endRR - startRR; ## + 1L; - - segsT <- rbind(segsT, segsRR); - - verbose && exit(verbose); - } # for (rr ...) - - segs <- segsT; - - # Order by chromosome - o <- order(segs$chromosome); - segs <- segs[o,]; - - verbose && cat(verbose, "Number of segments (after): ", nrow(segs)); - verbose && str(verbose, segs); - - verbose && exit(verbose); - - - verbose && enter(verbose, "Calculating total length per call and chromosome"); - # Sum length of calls per type and chromosome - segs$length <- segs[,"end"] - segs[,"start"]; ## + 1L; - res <- lapply(callTypes, FUN=function(type) { - coeffs <- as.integer(segs[,type]); - lens <- coeffs * segs$length; - lens <- by(lens, INDICES=segs$chromosome, FUN=sum, na.rm=TRUE); - as.vector(lens); - }); - names(res) <- gsub("Call$", "Length", callTypes); - res1 <- as.data.frame(res); - verbose && str(verbose, res); - verbose && exit(verbose); - - # Extract selected regions - idxs <- match(unique(segs$chromosome), regions$chromosome); - regionsT <- regions[idxs,]; - - # Sanity check - stopifnot(nrow(regionsT) == nrow(res1)); - - - verbose && enter(verbose, "Calculating fractions per region"); - # Calculate lengths - regionsT$length <- regionsT[,"end"] - regionsT[,"start"]; ## + 1L; - stopifnot(all(regionsT$length >= 0)); - - res2 <- res1 / regionsT[,"length"]; - names(res2) <- gsub("Call$", "Fraction", callTypes); - verbose && exit(verbose); - - res3 <- cbind(res1, res2); - - res <- regionsT; - if (nrow(res3) > 0) { - res <- cbind(res, res3); - } - rownames(res) <- NULL; - - res <- cbind(label=I(sprintf("chr%d", res[,"chromosome"])), res); - - # Sanity checks - resT <- res[,grep("Fraction", colnames(res))]; - for (key in colnames(resT)) { - rho <- resT[,key]; - stopifnot(all(rho >= 0, na.rm=TRUE)); - stopifnot(all(rho <= 1, na.rm=TRUE)); - } - - stopifnot(nrow(res) == nrow(regions)); - - verbose && str(verbose, res); - verbose && exit(verbose); - - res; -}, protected=TRUE) # getCallStatistics() - - - -###########################################################################/** -# @RdocMethod getFractionOfGenomeLost -# @aliasmethod getFractionOfGenomeGained -# @aliasmethod getFractionOfGenomeAltered -# @aliasmethod getFGL -# @aliasmethod getFGG -# @aliasmethod getFGA -# -# @title "Calculates the fraction of the genome lost, gained, or aberrant either way" -# -# \description{ -# @get "title" (in sense of total copy numbers), -# using definitions closely related to those presented in [1]. -# } -# -# @synopsis -# -# \arguments{ -# \item{...}{Not used.} -# } -# -# \value{ -# Returns a @double in [0,1]. -# } -# -# @author "HB" -# -# \references{ -# [1] Fridlyand et al. \emph{Breast tumor copy number aberration -# phenotypes and genomic instability}, BMC Cancer, 2006. \cr -# } -# -# \seealso{ -# Internally, @seemethod "getCallStatistics" is used. -# @seeclass -# } -# -# @keyword internal -#*/########################################################################### -setMethodS3("getFractionOfGenomeLost", "CBS", function(fit, ...) { - stats <- getCallStatistics(fit, ...); - mean(stats$lossFraction, na.rm=TRUE); -}, protected=TRUE) - -setMethodS3("getFractionOfGenomeGained", "CBS", function(fit, ...) { - stats <- getCallStatistics(fit, ...); - mean(stats$gainFraction, na.rm=TRUE); -}, protected=TRUE) - -setMethodS3("getFractionOfGenomeAltered", "CBS", function(fit, ...) { - getFractionOfGenomeLost(fit, ...) + getFractionOfGenomeGained(fit, ...); -}, protected=TRUE) - -# Shortcuts -setMethodS3("getFGL", "CBS", function(fit, ...) { - getFractionOfGenomeLost(fit, ...); -}, protected=TRUE) - -setMethodS3("getFGG", "CBS", function(fit, ...) { - getFractionOfGenomeGained(fit, ...); -}, protected=TRUE) - -setMethodS3("getFGA", "CBS", function(fit, ...) { - getFractionOfGenomeAltered(fit, ...); -}, protected=TRUE) - - - - -setMethodS3("isWholeChromosomeGained", "CBS", function(fit, minFraction=0.99, ...) { - # Argument 'minFraction': - minFraction <- Arguments$getDouble(minFraction, range=c(0,1)); - - stats <- getCallStatistics(fit, ...); - calls <- stats$gainFraction; - if (is.null(calls)) { - return(rep(NA, times=nbrOfChromosomes(fit))); - } - - res <- (calls >= minFraction); - names(res) <- stats$chromosome; - attr(res, "minFraction") <- minFraction; - - res; -}, protected=TRUE) # isWholeChromosomeGained() - - -setMethodS3("isWholeChromosomeLost", "CBS", function(fit, minFraction=0.99, ...) { - # Argument 'minFraction': - minFraction <- Arguments$getDouble(minFraction, range=c(0,1)); - - stats <- getCallStatistics(fit, ...); - calls <- stats$lossFraction; - if (is.null(calls)) { - return(rep(NA, times=nbrOfChromosomes(fit))); - } - - res <- (calls >= minFraction); - names(res) <- stats$chromosome; - attr(res, "minFraction") <- minFraction; - - res; -}, protected=TRUE) # isWholeChromosomeLost() - - -setMethodS3("nbrOfLosses", "CBS", function(fit, ...) { - stats <- getSegments(fit, ...); - calls <- stats$lossCall; - if (is.null(calls)) { - return(NA_integer_); - } - sum(calls, na.rm=TRUE); -}, protected=TRUE) - - -setMethodS3("nbrOfGains", "CBS", function(fit, ...) { - stats <- getSegments(fit, ...); - calls <- stats$gainCall; - if (is.null(calls)) { - return(NA_integer_); - } - sum(calls, na.rm=TRUE); -}, protected=TRUE) - - -setMethodS3("nbrOfAmplifications", "CBS", function(fit, ...) { - stats <- getSegments(fit, ...); - calls <- stats$amplificationCall; - if (is.null(calls)) { - return(NA_integer_); - } - sum(calls, na.rm=TRUE); -}, protected=TRUE) - - -setMethodS3("getCallStatisticsByArms", "CBS", function(fit, genomeData, ...) { - # To please/trick R CMD check - chromosome <- x <- NULL; rm(list=c("chromosome", "x")); - - # - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - # Validate arguments - # - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - # Argument 'genomeData': - genomeData <- as.data.frame(genomeData); - - - - # Subset 'regions' by chromosomes segmented - keep <- is.element(genomeData$chromosome, getChromosomes(fit)); - genomeData <- genomeData[keep,]; - - - # - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - # p-arm - # - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - regions <- getChromosomeRanges(fit); - regions$end <- genomeData$centroStart; - regions$start <- pmin(regions$start, regions$end); - - # Shrink regions - for (rr in seq_len(nrow(regions))) { - chr <- regions[rr,"chromosome"]; - x0 <- regions[rr,"start"]; - x1 <- regions[rr,"end"]; - xs <- subset(fit$data, chromosome == chr & x0 <= x & x <= x1)$x; - if (length(xs) > 0) { - range <- range(xs, na.rm=TRUE); - x0 <- max(c(x0, range[1]), na.rm=TRUE); - x1 <- min(c(x1, range[2]), na.rm=TRUE); - regions[rr,"start"] <- x0; - regions[rr,"end"] <- x1; - } - } # for (rr ...) - regions[,"length"] <- regions[,"end"] - regions[,"start"]; ## + 1L; - callStats <- getCallStatistics(fit, regions=regions); - callStats$label <- sprintf("%sp", callStats$label); - callStatsP <- callStats; - - - # - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - # q-arm - # - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - regions <- getChromosomeRanges(fit); - regions$start <- genomeData$centroEnd; - regions$end <- pmax(regions$end, regions$start); - - # Shrink regions - for (rr in seq_len(nrow(regions))) { - chr <- regions[rr,"chromosome"]; - x0 <- regions[rr,"start"]; - x1 <- regions[rr,"end"]; - xs <- subset(fit$data, chromosome == chr & x0 <= x & x <= x1)$x; - if (length(xs) > 0) { - range <- range(xs, na.rm=TRUE); - x0 <- max(c(x0, range[1]), na.rm=TRUE); - x1 <- min(c(x1, range[2]), na.rm=TRUE); - regions[rr,"start"] <- x0; - regions[rr,"end"] <- x1; - } - } # for (rr ...) - regions[,"length"] <- regions[,"end"] - regions[,"start"]; ## + 1L; - - callStats <- getCallStatistics(fit, regions=regions); - callStats$label <- sprintf("%sq", callStats$label); - callStatsQ <- callStats; - - - - # - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - # Merge - # - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - callStats <- rbind(callStatsP, callStatsQ); - - # Not needed anymore - regions <- callStatsP <- callStatsQ <- NULL; - - # Reorder - o <- order(callStats$chromosome, callStats$start); - callStats <- callStats[o,]; - - callStats; -}, protected=TRUE); # getCallStatisticsByArms() - - -setMethodS3("callArms", "CBS", function(fit, genomeData, minFraction=0.95, ...) { - # - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - # Validate arguments - # - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - # Argument 'minFraction': - minFraction <- Arguments$getDouble(minFraction, range=c(0,1)); - - - # - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - # p-arm - # - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - callStats <- getCallStatisticsByArms(fit, genomeData=genomeData); - - callTypes <- grep("Fraction", colnames(callStats), value=TRUE); - callTypes <- gsub("Fraction", "", callTypes); - - keys <- sprintf("%sFraction", callTypes); - rhos <- callStats[,keys]; - calls <- (rhos >= minFraction); - colnames(calls) <- sprintf("%sCall", callTypes); - - callStats <- cbind(callStats, calls); - - callStats; -}, protected=TRUE); # callArms() - - - - -###########################################################################/** -# @RdocMethod mergeNonCalledSegments -# -# @title "Merge neighboring segments that are not called" -# -# \description{ -# @get "title" -# } -# -# @synopsis -# -# \arguments{ -# \item{...}{Not used.} -# \item{verbose}{@see "R.utils::Verbose".} -# } -# -# \value{ -# Returns an object of the same class -# with the same of fewer number of segments. -# } -# -# @author "HB" -# -# \seealso{ -# @seeclass -# } -# -# @keyword internal -#*/########################################################################### -setMethodS3("mergeNonCalledSegments", "CBS", function(fit, ..., verbose=FALSE) { - # - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - # Validate arguments - # - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - # Argument 'verbose': - verbose <- Arguments$getVerbose(verbose); - if (verbose) { - pushState(verbose); - on.exit(popState(verbose)); - } - - verbose && enter(verbose, "Merging neighboring segments that are not called"); - - # Identify call columns - segs <- getSegments(fit, splitters=TRUE); - keep <- grep("Call$", colnames(segs)); - nbrOfCalls <- length(keep); - - # Sanity check - stopifnot(nbrOfCalls > 0); - - chromosomes <- getChromosomes(fit); - fitList <- list(); - for (cc in seq_along(chromosomes)) { - chromosome <- chromosomes[cc]; - verbose && enter(verbose, sprintf("Chromosome #%d ('%s') of %d", cc, chromosome, length(chromosomes))); - - - fitCC <- extractChromosome(fit, chromosome=chromosome); - n0 <- nbrOfSegments(fitCC); - - # Until no more neighboring non-called segments exists - while (TRUE) { - segs <- getSegments(fitCC, splitters=TRUE); - calls <- as.matrix(segs[,keep]); - - # Find two neighboring segments that are not called - isCalled <- rowAnys(calls, na.rm=TRUE); - verbose && printf(verbose, "Number of segments not called: %d of %d\n", sum(!isCalled, na.rm=TRUE), length(isCalled)); - - notCalled <- which(!isCalled); - delta <- diff(notCalled); - left <- notCalled[which(delta == 1)[1]]; - - # No more segments to merge? - if (is.na(left)) { - break; - } - - fitCC <- mergeTwoSegments(fitCC, left=left); - } # while (...) - - n1 <- nbrOfSegments(fitCC); - verbose && printf(verbose, "Number of segments merged: %d of %d\n", n0-n1, n0); - - fitList[[cc]] <- fitCC; - verbose && exit(verbose); - } # for (cc ...) - - verbose && enter(verbose, "Building result"); - res <- Reduce(append, fitList); - verbose && exit(verbose); - - verbose && exit(verbose); - - res; -}, protected=TRUE); # mergeNonCalledSegments() - - -setMethodS3("estimateDeltaCN", "CBS", function(fit, flavor=c("density(TCN)", "density(dTCN)", "dTCN"), adjust=0.3, ..., verbose=FALSE) { - # - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - # Validate arguments - # - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - # Argument 'flavor': - flavor <- match.arg(flavor); - - # Argument 'adjust': - adjust <- Arguments$getDouble(adjust, range=c(0,10)); - - - if (flavor == "density(TCN)") { - # Get segment mean levels - segs <- getSegments(fit, splitters=FALSE); - x <- segs$mean; - w <- segs$nbrOfLoci; - - # Drop missing values - keep <- is.finite(x) & is.finite(w); - x <- x[keep]; - w <- w[keep]; - keep <- NULL; # Not needed anymore - - # Normalize weights - w <- w / sum(w, na.rm=TRUE); - - # Estimate density - d <- density(x, weights=w, adjust=adjust); - - w <- NULL; # Not needed anymore - - # Find peaks - pv <- findPeaksAndValleys(d, ...); - type <- NULL; rm(list="type"); # To please R CMD check - p <- subset(pv, type == "peak"); - px <- p$x; - pw <- p$density; - - # Distance between peaks - dx <- diff(px); - # Weights "between" peaks (AD HOC: sum up peak weights) - dw <- pw[-length(pw)] + pw[-1L]; - - deltaCN <- weighted.mean(dx, w=dw); - } else if (flavor == "density(dTCN)") { - # Get change-point magnitudes - x <- getChangePoints(fit)[[1L]]; - x <- abs(x); - - # Drop missing values - keep <- is.finite(x); - x <- x[keep]; - keep <- NULL; # Not needed anymore - - - # Estimate density - d <- density(x, adjust=adjust); - - # Find peaks - pv <- findPeaksAndValleys(d, ...); - type <- NULL; rm(list="type"); # To please R CMD check - p <- subset(pv, type == "peak"); - px <- p$x; - pw <- p$density; - - # Distance between peaks - dx <- diff(px); - # Weights "between" peaks (AD HOC: sum up peak weights) - dw <- pw[-length(pw)] + pw[-1L]; - - throw("Still not implemented."); - } else if (flavor == "dTCN") { - # Get change-point magnitudes - x <- getChangePoints(fit)[[1L]]; - x <- abs(x); - - # Drop missing values - keep <- is.finite(x); - x <- x[keep]; - keep <- NULL; # Not needed anymore - - deltaCN <- median(x); - } - - # Sanity check - deltaCN <- Arguments$getDouble(deltaCN, range=c(0, Inf)); - - deltaCN; -}, protected=TRUE) - - - -setMethodS3("encodeCalls", "data.frame", function(calls, flavor="UCSF", ...) { - # Argument 'calls': - stopifnot(all(is.element(c("chromosome", "x"), colnames(calls)))); - stopifnot(all(is.element(c("lossCall", "gainCall"), colnames(calls)))); - - # Argument 'flavor': - flavor <- match.arg(flavor); - - calls0 <- calls; - - # Allocate - calls <- rep(NA_real_, times=nrow(calls0)); - - # Encode loss, neutral and gain (required) - calls[!calls0$gainCall & !calls0$lossCall] <- 0; - calls[calls0$gainCall] <- +1; - calls[calls0$lossCall] <- -1; - - # Encode amplifications, if any/called. - idxs <- which(calls0$amplificationCall); - calls[idxs] <- +9; - - # Encode negative and positive outliers, if any/called. - idxs <- which(calls0$negOutlierCall); - calls[idxs] <- calls[idxs] - 0.1; - - idxs <- which(calls0$posOutlierCall); - calls[idxs] <- calls[idxs] + 0.1; - - calls; -}, protected=TRUE) # encodeCalls() - - -setMethodS3("callGLAO", "CBS", function(fit, ..., verbose=FALSE) { - # - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - # Validate arguments - # - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - # Argument 'verbose': - verbose <- Arguments$getVerbose(verbose); - if (verbose) { - pushState(verbose); - on.exit(popState(verbose)); - } - - verbose && enter(verbose, "Call gains, losses, amplifications and (negative and positive) outliers"); - verbose && cat(verbose, "Number of segments: ", nbrOfSegments(fit)); - - # Call segments - fitC <- callGainsAndLosses(fit, ..., verbose=verbose); - fitC <- callAmplifications(fitC, ..., verbose=verbose); - - # Call loci, i.e. locus-level negative and positive outliers - fitC <- callOutliers(fitC, ..., verbose=verbose); - verbose && print(verbose, fitC); - - verbose && exit(verbose); - - fitC; -}, protected=TRUE) # callGLAO() - - -############################################################################ -# HISTORY: -# 2013-12-17 -# o Added argument 'flavor' to estimateDeltaCN() for CBS, which specifies -# the type of estimator to use. -# 2013-11-27 -# o Added callGLAO() for CBS. -# o Added encodeCalls() for 'data.frame' object returned by -# getLocusData(..., addCalls=TRUE). -# 2013-11-23 -# o BUG FIX: estimateDeltaCN() assumed aroma.light was loaded. -# 2013-11-14 -# o Added estimateDeltaCN() for CBS. -# o BUG FIX: callGainsAndLosses() for CBS would not estimate the median -# median CN level correctly if there were "empty" segments (e.g. gaps). -# This was/is due to a bug in segments.summary() of the DNAcopy package. -# Instead, we are now calculating the segment median levels ourselves. -# 2012-01-24 -# o ROBUSTNESS: Now getCallStatistics() for CBS asserts that calls have -# been made. If not, an exception is thrown. -# 2011-12-13 -# o Added "ucsf-dmad" to argument 'method' for callGainsAndLosses() of CBS. -# 2011-12-12 -# o Now extractCallsByLocus() for CBS passes arguments -# '...' to getLocusData(). -# 2011-10-23 -# o BUG FIX: getCallStatisticsByArms() for CBS would thrown a error if -# argument 'genomeData' did not contain exactly the same chromosomes -# as in the CBS object. -# o BUG FIX: The length of a segment must be defined as 'end-start' and -# not 'end-start+1' so that the the total length of all segments -# adds up correctly. -# o Added verbose output to callGainsAndLosses() and callAmplifications(). -# o BUG FIX: callAmplifications() for CBS generated an error, if -# more than one chromosome were called. -# 2011-10-08 -# o Added mergeNonCalledSegments() for CBS. -# 2011-10-07 -# o Now getCallStatistics() for CBS always return statistics for -# all regions requested, even empty ones. -# o Now getCallStatistics() for CBS also returns a 'label' column. -# o Added getCallStatisticsByArms() and callArms() for CBS. -# 2011-10-06 -# o Added optional argument 'regions' to getCallStatistics() for CBS. -# o Now getCallStatistics() for CBS also returns 'start' and 'end' -# position of each chromosome. -# 2011-10-03 -# o DOCUMENTATION: Added more help pages. -# 2011-10-02 -# o DOCUMENTATION: Added an Rdoc help page for getFractionOfGenomeLost(), -# getFractionOfGenomeGained(), getFractionOfGenomeAltered(), getFGL(), -# getFGG() and getFGA(). -# 2011-09-05 -# o Added getCallStatistics() for CBS. -# 2011-09-04 -# o Added extractCallsByLocus() for CBS. -# o Adopted the calling methods from ditto of the DNAcopy class. -# 2011-09-01 -# o Now callGainsAndLosses() returns a DNAcopy where the segmentation -# table has the new column 'tcnCall'. -# 2011-08-19 -# o Added argument 'callParams' to plotTracks() for DNAcopy. -# 2011-07-24 -# o Added callOutliers(). -# 2011-07-21 -# o Now amplified segments are also highlighted. -# 2011-07-20 -# o Added callAmplifications(). -# 2011-07-20 -# o Now callGainsAndLosses() estimates the noise level on autosomes only. -# o Now callGainsAndLosses() returns parameters used. -# o Updated callGainsAndLosses() to estimate the std. dev. as the -# MAD of the *residuals* (not the absolute) values. -# o Added support for estimateStandardDeviation(..., method="res"). -# o Added extractSegmentMeansByLocus(). -# o Added drawCentromeres(). -# 2011-07-18 -# o Added getSampleNames(). -# o Added plotTracks() for DNAcopy. -# o Added callGainsAndLosses() to DNAcopy objects. -# o Added nbrOfSegments(), nbrOfLoci() and nbrOfSamples(). -# 2011-07-17 -# o Added estimateStandardDeviation() to DNAcopy objects. -############################################################################ +###########################################################################/** +# @set "class=CBS" +# @RdocMethod callGainsAndLosses +# +# @title "Calls gains and losses" +# +# \description{ +# @get "title". +# } +# +# @synopsis +# +# \arguments{ +# \item{adjust}{A positive scale factor adjusting the sensitivity of the +# caller, where a value less (greater) than 1.0 makes the caller +# less (more) sensitive.} +# \item{method}{A @character string specifying the calling algorithm to use.} +# \item{...}{Additional/optional arguments used to override the default +# parameters used by the caller.} +# } +# +# \value{ +# Returns a @see "PSCBS::CBS" object where @logical columns +# 'lossCall' and 'gainCall' have been appended to the segmentation table. +# } +# +# \section{The UCSF caller}{ +# If \code{method == "ucsf-mad"}, then segments are called using [1], i.e. +# a segment is called gained or lost if its segment level is +# at least two standard deviations away from the median segment level +# on Chr1-22, where standard deviation is estimated using MAD. +# Then same is done for \code{method == "ucsf-dmad"} with the difference +# that the standard deviation is estimated using a robust first order +# variance estimator. +# } +# +# \examples{ +# @include "../incl/segmentByCBS.Rex" +# @include "../incl/segmentByCBS,calls.Rex" +# } +# +# @author "HB" +# +# \references{ +# [1] Fridlyand et al. \emph{Breast tumor copy number aberration +# phenotypes and genomic instability}, BMC Cancer, 2006. \cr +# } +# +# \seealso{ +# @seemethod "callAmplifications". +# @seemethod "callOutliers". +# @seeclass +# } +# +# @keyword internal +#*/########################################################################### +setMethodS3("callGainsAndLosses", "CBS", function(fit, adjust=1.0, method=c("ucsf-mad", "ucsf-dmad"), ..., verbose=FALSE) { + # - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - + # Validate arguments + # - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - + # Argument 'adjust': + adjust <- Arguments$getDouble(adjust, range=c(0, Inf)) + + # Argument 'method': + method <- match.arg(method) + + # Argument 'verbose': + verbose <- Arguments$getVerbose(verbose) + if (verbose) { + pushState(verbose) + on.exit(popState(verbose)) + } + + + verbose && enter(verbose, "Calling segments that are gained or lost") + + userArgs <- list(...) + + params <- list() + + # Allocate calls + naValue <- as.logical(NA) + nbrOfSegments <- nbrOfSegments(fit, splitters=TRUE) + segs <- getSegments(fit, splitters=TRUE) + nbrOfRows <- nrow(segs) + gainCalls <- lossCalls <- rep(naValue, times=nbrOfRows) + + verbose && cat(verbose, "Number of segments to be called: ", nbrOfSegments) + verbose && cat(verbose, "Call method: ", method) + + if (is.element(method, c("ucsf-mad", "ucsf-dmad"))) { + # Default arguments + args <- list( + chromosomes = intersect(getChromosomes(fit), c(0L, 1:22)), + scale = 2.0 + ) + + # Override by (optional) user-specified arguments + for (key in names(userArgs)) { + args[[key]] <- userArgs[[key]] + } + + # Extract arguments + chromosomes <- args$chromosomes + scale <- args$scale + + # Argument check + chromosomes <- Arguments$getVector(chromosomes, lengths=c(1,Inf)) + scale <- Arguments$getDouble(scale, range=c(0,Inf)) + + # Estimate the whole-genome standard deviation of the TCNs + if (method == "ucsf-mad") { + sigma <- estimateStandardDeviation(fit, chromosomes=chromosomes, + method="res", estimator="mad") + sigmaKey <- "sigmaMAD" + } else if (method == "ucsf-dmad") { + sigma <- estimateStandardDeviation(fit, chromosomes=chromosomes, + method="diff", estimator="mad") + sigmaKey <- "sigmaDelta" + } else { + throw("INTERNAL ERROR: Unknown method: ", method) + } + + # Sanity check + sigma <- Arguments$getDouble(sigma, range=c(0,Inf)) + + # Calculate the threshold + tau <- scale * sigma + + # Make more or less sensitive + tau <- adjust * tau + + verbose && cat(verbose, "Call parameters:") + verbose && str(verbose, list(sigma=sigma, scale=scale, adjust=adjust)) + + # Calculate segment levels using the median estimator + fitT <- updateMeans(fit, avg="median") + segsT <- getSegments(fitT, splitters=TRUE) + mu <- segsT$mean + fitT <- segsT <- NULL # Not needed anymore + + # The median segmented level + muR <- median(mu, na.rm=TRUE) + + # The threshold for losses + tauLoss <- muR - tau + + # The threshold for gains + tauGain <- muR + tau + + # Call + lossCalls <- (mu <= tauLoss) # Losses + gainCalls <- (mu >= tauGain) # Gains + + # Call parameters used + params$method <- method + params$adjust <- adjust + params[[sigmaKey]] <- sigma + params$scale <- scale + params$muR <- muR + params$tau <- tau + params$tauLoss <- tauLoss + params$tauGain <- tauGain + } + + verbose && cat(verbose, "Number of called segments: ", length(lossCalls)) + + + # Sanity check + .stop_if_not(length(lossCalls) == nbrOfRows) + .stop_if_not(length(gainCalls) == nbrOfRows) + + + # - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - + # Update 'DNAcopy' object + # - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - + # (a) segmentation table + segs <- getSegments(fit, splitters=TRUE) + segs$lossCall <- lossCalls + segs$gainCall <- gainCalls + fit$output <- segs + + # (b) parameters + allParams <- fit$params + if (is.null(allParams)) { + allParams <- list() + } + allParams$callGainsAndLosses <- params + fit$params <- allParams + + verbose && exit(verbose) + + + # - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - + # Return the updated 'CBS' object. + # - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - + fit +}, private=TRUE) # callGainsAndLosses() + + + + + +###########################################################################/** +# @RdocMethod callAmplifications +# +# @title "Calls (focal) amplifications" +# +# \description{ +# @get "title". +# } +# +# @synopsis +# +# \arguments{ +# \item{adjust}{A positive scale factor adjusting the sensitivity of the +# caller, where a value less (greater) than 1.0 makes the caller +# less (more) sensitive.} +# \item{maxLength}{A @double scalar specifying the maximum length of a segment +# in order for it to be considered a focal amplification.} +# \item{method}{A @character string specifying the calling algorithm to use.} +# \item{...}{Additional/optional arguments used to override the default +# parameters used by the caller.} +# \item{verbose}{@see "R.utils::Verbose".} +# } +# +# \value{ +# Returns a @see "PSCBS::CBS" object where @logical column +# 'amplificationCall' has been appended to the segmentation table. +# } +# +# \section{The UCSF caller}{ +# If \code{method == "ucsf-exp"}, then segments are called using [1], i.e. +# a segment is called an amplification if ... +# } +# +# @author +# +# \references{ +# [1] Fridlyand et al. \emph{Breast tumor copy number aberration +# phenotypes and genomic instability}, BMC Cancer, 2006. \cr +# } +# +# \seealso{ +# @seemethod "callGainsAndLosses". +# @seemethod "callOutliers". +# @seeclass +# } +# +# @keyword internal +#*/########################################################################### +setMethodS3("callAmplifications", "CBS", function(fit, adjust=1.0, maxLength=20e6, method=c("ucsf-exp"), ..., verbose=FALSE) { + # - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - + # Validate arguments + # - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - + # Argument 'adjust': + adjust <- Arguments$getDouble(adjust, range=c(0, Inf)) + + # Argument 'maxLength': + maxLength <- Arguments$getDouble(maxLength, range=c(0, Inf)) + + # Argument 'method': + method <- match.arg(method) + + # Argument 'verbose': + verbose <- Arguments$getVerbose(verbose) + if (verbose) { + pushState(verbose) + on.exit(popState(verbose)) + } + + + verbose && enter(verbose, "Calling segments that are amplified") + + userArgs <- list(...) + + params <- list() + + # Allocate calls + naValue <- as.logical(NA) + nbrOfSegments <- nbrOfSegments(fit, splitters=TRUE) + calls <- rep(naValue, times=nbrOfSegments) + + verbose && cat(verbose, "Number of segments to be called: ", nbrOfSegments) + verbose && cat(verbose, "Call method: ", method) + + if (method == "ucsf-exp") { + # - - - - - - - - - - - - - - - - - - - - - - - - - - - - - + # Call arguments + # - - - - - - - - - - - - - - - - - - - - - - - - - - - - - + # Default arguments + args <- list( + minLevel = 0.0, + lambda = 1.0, + degree = 3 + ) + + # Override by (optional) user-specified arguments + for (key in names(userArgs)) { + args[[key]] <- userArgs[[key]] + } + + # Extract arguments + minLevel <- args$minLevel + lambda <- args$lambda + degree <- args$degree + + # Validate arguments + minLevel <- Arguments$getDouble(minLevel, range=c(-Inf, Inf)) + lambda <- Arguments$getDouble(lambda, range=c(0, Inf)) + degree <- Arguments$getDouble(degree, range=c(1, Inf)) + + verbose && cat(verbose, "Call parameters:") + verbose && str(verbose, list(minLevel=minLevel, lambda=lambda, + degree=degree)) + + segs <- getSegments(fit, splitters=TRUE) + + verbose && cat(verbose, "Segments:") + verbose && str(verbose, segs) + + + # - - - - - - - - - - - - - - - - - - - - - - - - - - - - - + # Rule #1: Only consider segments that are short enough + # - - - - - - - - - - - - - - - - - - - - - - - - - - - - - + # The lengths (in bp) of the segments + start <- segs$start + end <- segs$end + length <- end - start ## + 1L + keep1 <- (length <= maxLength) + + # - - - - - - - - - - - - - - - - - - - - - - - - - - - - - + # Rule #2: Only consider segments that have a mean level + # that is large enough. + # - - - - - - - - - - - - - - - - - - - - - - - - - - - - - + # The mean levels of the segments + mu <- segs$mean + keep2 <- (mu >= minLevel) + + # - - - - - - - - - - - - - - - - - - - - - - - - - - - - - + # Rule #3: Only consider segments that have a mean level + # that is much larger than either of the + # flanking segments. + # - - - - - - - - - - - - - - - - - - - - - - - - - - - - - + # The mean levels of the flanking segments + muL <- c(NA, mu[-nbrOfSegments]) + muR <- c(mu[-1], NA) + + # The difference in mean levels to the flanking segments + deltaL <- mu - muL + deltaR <- mu - muR + + # The maximum difference to either of the flanking segments + delta <- pmax(deltaL, deltaR, na.rm=TRUE) + + # The threshold for calling segments amplified + tau <- exp(-lambda * mu^degree) + + # Make more or less sensitive + tau <- adjust * tau + + keep3 <- (delta >= tau) + + # Amplification calls + calls <- (keep1 & keep2 & keep3) + + # Call parameters used + params$method <- method + params$adjust <- adjust + params$maxLength <- maxLength + params$minLevel <- minLevel + params$lambda <- lambda + params$degree <- degree + params$tau <- tau + } + + verbose && cat(verbose, "Number of called segments: ", length(calls)) + + # Sanity check + .stop_if_not(length(calls) == nbrOfSegments) + + + # - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - + # Update 'DNAcopy' object + # - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - + # (a) segmentation table + segs <- getSegments(fit, splitters=TRUE) + segs$amplificationCall <- calls + fit$output <- segs + + # (b) parameters + allParams <- fit$params + if (is.null(allParams)) { + allParams <- list() + } + allParams$callAmplifications <- params + fit$params <- allParams + + + verbose && exit(verbose) + + # - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - + # Return the updated 'CBS' object. + # - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - + fit +}, private=TRUE) # callAmplifications() + + + +###########################################################################/** +# @RdocMethod callOutliers +# +# @title "Calls outliers" +# +# \description{ +# @get "title". +# } +# +# @synopsis +# +# \arguments{ +# \item{adjust}{A positive scale factor adjusting the sensitivity of the +# caller, where a value less (greater) than 1.0 makes the caller +# less (more) sensitive.} +# \item{method}{A @character string specifying the calling algorithm to use.} +# \item{...}{Additional/optional arguments used to override the default +# parameters used by the caller.} +# } +# +# \value{ +# Returns a @see "PSCBS::CBS" object where @logical columns +# 'negOutlierCall' and 'posOutlierCall' have been appended +# to the segmentation table. +# } +# +# \section{The UCSF caller}{ +# If \code{method == "ucsf-mad"}, then loci are called using [1] +# "Finally, to identify single technical or biological outliers such +# as high level amplifications, the presence of the outliers within +# a segment was allowed by assigning the original observed log2ratio +# to the clones for which the observed values were more than four +# tumor-specific MAD away from the smoothed values." [1; Suppl. Mat.] +# } +# +# @author "HB" +# +# \references{ +# [1] Fridlyand et al. \emph{Breast tumor copy number aberration +# phenotypes and genomic instability}, BMC Cancer, 2006. \cr +# } +# +# \seealso{ +# @seemethod "callGainsAndLosses". +# @seemethod "callAmplifications". +# @seeclass +# } +# +# @keyword internal +#*/########################################################################### +setMethodS3("callOutliers", "CBS", function(fit, adjust=1.0, method=c("ucsf-mad"), ...) { + # - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - + # Validate arguments + # - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - + # Argument 'adjust': + adjust <- Arguments$getDouble(adjust, range=c(0, Inf)) + + # Argument 'method': + method <- match.arg(method) + + + userArgs <- list(...) + + params <- list() + + # Allocate calls + nbrOfLoci <- nbrOfLoci(fit) + naValue <- as.logical(NA) + negOutlierCall <- posOutlierCall <- rep(naValue, times=nbrOfLoci) + + if (method == "ucsf-mad") { + # - - - - - - - - - - - - - - - - - - - - - - - - - - - - - + # Call arguments + # - - - - - - - - - - - - - - - - - - - - - - - - - - - - - + # Default arguments + args <- list( + scale = 4.0 + ) + + # Override by (optional) user-specified arguments + for (key in names(userArgs)) { + args[[key]] <- userArgs[[key]] + } + + # Extract arguments + scale <- args$scale + + # Validate arguments + scale <- Arguments$getDouble(scale, range=c(0, Inf)) + + + # Genomic annotations + data <- getLocusData(fit) + chromosome <- data$chromosome + x <- data$x + + # CN signals + y <- data[,3] + + # Segmented CN signals + yS <- extractSegmentMeansByLocus(fit) + + # CN residuals (relative to segment means) + dy <- y - yS + + segs <- getSegments(fit, splitters=TRUE) + + # Allocate per-segment SD estimates + nbrOfSegments <- nbrOfSegments(fit) + naValue <- NA_real_ + sds <- rep(naValue, times=nbrOfSegments) + + naValue <- NA_real_ + for (ss in seq_len(nbrOfSegments)) { + seg <- segs[ss,] + + # Identify loci in current segment + idxs <- which(seg$chromosome == chromosome & + seg$start <= x & x <= seg$end) + + # Sanity check + idxs <- Arguments$getIndices(idxs, max=nbrOfLoci) + + # Extract CN residuals + dySS <- dy[idxs] + + # Calculate MAD for segment + sdSS <- mad(dySS, na.rm=TRUE) + + # Threshold for outliers + tau <- scale * sdSS + + # Make more or less sensitive + tau <- adjust * tau + + # Call outliers + naValue <- as.logical(NA) + callsSS <- rep(naValue, times=length(dySS)) + callsSS[-tau <= dySS & dySS <= +tau] <- 0L + callsSS[dySS > +tau] <- +1L + callsSS[dySS < -tau] <- -1L + + # Record + negOutlierCall[idxs] <- (callsSS < 0L) + posOutlierCall[idxs] <- (callsSS > 0L) + + sds[ss] <- sdSS + } # for (ss ...) + + params$method <- method + params$adjust <- adjust + params$scale <- scale + params$sds <- sds + } + + + # Sanity check + .stop_if_not(length(negOutlierCall) == nbrOfLoci) + .stop_if_not(length(posOutlierCall) == nbrOfLoci) + + + # - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - + # Update 'DNAcopy' object + # - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - + # (a) segmentation table + data <- getLocusData(fit) + data$negOutlierCall <- negOutlierCall + data$posOutlierCall <- posOutlierCall + fit$data <- data + + # (b) parameters + allParams <- fit$params + if (is.null(allParams)) { + allParams <- list() + } + allParams$callOutliers <- params + fit$params <- allParams + + + # - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - + # Return the updated 'CBS' object. + # - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - + fit +}, private=TRUE) # callOutliers() + + + +setMethodS3("extractCallsByLocus", "CBS", function(fit, ...) { + # Extract locus data + data <- getLocusData(fit, ...) + + nbrOfLoci <- nrow(data) + + # Extract segment data + segs <- getSegments(fit, splitters=TRUE) + + # Identify segment calls + callCols <- grep("Call$", colnames(segs)) + nbrOfCalls <- length(callCols) + + + chromosome <- data$chromosome + x <- data$x + y <- data[,3] + + # Allocate locus calls + naValue <- as.logical(NA) + callsL <- matrix(naValue, nrow=nbrOfLoci, ncol=nbrOfCalls) + colnames(callsL) <- colnames(segs)[callCols] + callsL <- as.data.frame(callsL) + + # For each segment... + for (ss in seq_len(nrow(segs))) { + seg <- segs[ss,] + idxs <- which(chromosome == seg$chromosome & + seg$start <= x & x <= seg$end) + idxs <- Arguments$getIndices(idxs, max=nbrOfLoci) + # Sanity check +## .stop_if_not(length(idxs) == seg$nbrOfLoci) + + callsSS <- seg[callCols] + for (cc in seq_len(nbrOfCalls)) { + callsL[idxs,cc] <- callsSS[,cc] + } + } # for (ss ...) + + # The calls for loci that have missing annotations or observations, + # should also be missing, i.e. NA. + nok <- (is.na(chromosome) | is.na(x) | is.na(y)) + callsL[nok,] <- as.logical(NA) + + # Sanity check + .stop_if_not(nrow(callsL) == nbrOfLoci) + .stop_if_not(ncol(callsL) == nbrOfCalls) + + callsL +}, private=TRUE) # extractCallsByLocus() + + + +###########################################################################/** +# @RdocMethod getCallStatistics +# +# @title "Calculates various call statistics per chromosome" +# +# \description{ +# @get "title". +# } +# +# @synopsis +# +# \arguments{ +# \item{regions}{An optional @data.frame with columns "chromosome", +# "start", and "end" specifying the regions of interest to calculate +# statistics for. If @NULL, all of the genome is used.} +# \item{shrinkRegions}{If @TRUE, regions are shrunk to the support of +# the data.} +# \item{...}{Not used.} +# \item{verbose}{@see "R.utils::Verbose".} +# } +# +# \value{ +# Returns a CxK @data.frame, where C is the number of regions that +# meet the criteria setup by argument \code{regions} +# and (K-4)/2 is the number of call types. +# The first column is the chromosome index, the second and the third +# are the first and last position, and the fourth the length +# (=last-first+1) of the chromosome. +# The following columns contains call summaries per chromosome. +# For each chromosome and call type, the total length of such calls +# on that chromosome is reported together how large of a fraction +# of the chromosome such calls occupy. +# } +# +# \details{ +# The estimators implemented here are based solely on the +# segmentation results, which is very fast. +# In the original proposal by Fridlyand et al. [1], the authors +# estimates the parameters by converting segment-level calls back +# to locus-level calls and there do the calculations. +# The difference between the two approaches should be minor, +# particularly for large density arrays. +# } +# +# @author "HB" +# +# \references{ +# [1] Fridlyand et al. \emph{Breast tumor copy number aberration +# phenotypes and genomic instability}, BMC Cancer, 2006. \cr +# } +# +# \seealso{ +# @seeclass +# } +# +# @keyword internal +#*/########################################################################### +setMethodS3("getCallStatistics", "CBS", function(fit, regions=NULL, shrinkRegions=TRUE, ..., verbose=FALSE) { + # To please R CMD check, cf. subset() + chromosome <- NULL; rm(list="chromosome") + + # - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - + # Validate arguments + # - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - + # Argument 'regions': + if (is.null(regions)) { + # Get chromosome lengths + regions <- getChromosomeRanges(fit)[,c("chromosome", "start", "end")] + } + regions <- as.data.frame(regions) + .stop_if_not(all(is.element(c("chromosome", "start", "end"), colnames(regions)))) + .stop_if_not(!any(duplicated(regions$chromosome))) + + # Argument 'verbose': + verbose <- Arguments$getVerbose(verbose) + if (verbose) { + pushState(verbose) + on.exit(popState(verbose)) + } + + + verbose && enter(verbose, "Calculating call statistics") + segs <- getSegments(fit, splitters=FALSE) + callTypes <- grep("Call$", colnames(segs), value=TRUE) + verbose && cat(verbose, "Call types: ", hpaste(callTypes)) + if (length(callTypes) == 0) { + throw("Cannot calculate call statistics. No calls have been made.") + } + + verbose && cat(verbose, "Regions of interest:") + verbose && str(verbose, regions) + + + verbose && enter(verbose, "Filtering out segments within the requested regions") + + # Filter out segments within the requested regions + segsT <- NULL + verbose && cat(verbose, "Number of segments (before): ", nrow(segs)) + + for (rr in seq_len(nrow(regions))) { + regionRR <- regions[rr,] + chrRR <- regionRR[,"chromosome"] + startRR <- regionRR[,"start"] + endRR <- regionRR[,"end"] + if (is.na(chrRR) || is.na(startRR) || is.na(endRR)) { + next + } + + verbose && enter(verbose, sprintf("Region #%d of %d", rr, nrow(regions))) + + # Select regions that (at least) overlapping with the region + segsRR <- subset(segs, chromosome == chrRR & start <= endRR & end >= startRR) + + verbose && cat(verbose, "Number of segments within region: ", nrow(segsRR)) + + # Special case + if (nrow(segsRR) == 0) { + segsRR <- segs[1,][NA,] + segsRR$chromosome <- chrRR + segsRR$start <- startRR + segsRR$end <- endRR + segsRR$nbrOfLoci <- 0L + } + + if (shrinkRegions) { + range <- range(c(segsRR$start, segsRR$end), na.rm=TRUE) + startRR <- max(startRR, range[1], na.rm=TRUE) + endRR <- min(endRR, range[2], na.rm=TRUE) + regions[rr,"end"] <- endRR + regions[rr,"start"] <- startRR + } + + # Adjust ranges + segsRR$start[segsRR$start < startRR] <- startRR + segsRR$end[segsRR$end > endRR] <- endRR + + segsRR$fullLength <- endRR - startRR ## + 1L + + segsT <- rbind(segsT, segsRR) + + verbose && exit(verbose) + } # for (rr ...) + + segs <- segsT + + # Order by chromosome + o <- order(segs$chromosome) + segs <- segs[o,] + + verbose && cat(verbose, "Number of segments (after): ", nrow(segs)) + verbose && str(verbose, segs) + + verbose && exit(verbose) + + + verbose && enter(verbose, "Calculating total length per call and chromosome") + # Sum length of calls per type and chromosome + segs$length <- segs[,"end"] - segs[,"start"] ## + 1L + res <- lapply(callTypes, FUN=function(type) { + coeffs <- as.integer(segs[,type]) + lens <- coeffs * segs$length + lens <- by(lens, INDICES=segs$chromosome, FUN=sum, na.rm=TRUE) + as.vector(lens) + }) + names(res) <- gsub("Call$", "Length", callTypes) + res1 <- as.data.frame(res) + verbose && str(verbose, res) + verbose && exit(verbose) + + # Extract selected regions + idxs <- match(unique(segs$chromosome), regions$chromosome) + regionsT <- regions[idxs,] + + # Sanity check + .stop_if_not(nrow(regionsT) == nrow(res1)) + + + verbose && enter(verbose, "Calculating fractions per region") + # Calculate lengths + regionsT$length <- regionsT[,"end"] - regionsT[,"start"] ## + 1L + .stop_if_not(all(regionsT$length >= 0)) + + res2 <- res1 / regionsT[,"length"] + names(res2) <- gsub("Call$", "Fraction", callTypes) + verbose && exit(verbose) + + res3 <- cbind(res1, res2) + + res <- regionsT + if (nrow(res3) > 0) { + res <- cbind(res, res3) + } + rownames(res) <- NULL + + res <- cbind(label=I(sprintf("chr%d", res[,"chromosome"])), res) + + # Sanity checks + resT <- res[,grep("Fraction", colnames(res))] + for (key in colnames(resT)) { + rho <- resT[,key] + .stop_if_not(all(rho >= 0, na.rm=TRUE)) + .stop_if_not(all(rho <= 1, na.rm=TRUE)) + } + + .stop_if_not(nrow(res) == nrow(regions)) + + verbose && str(verbose, res) + verbose && exit(verbose) + + res +}, protected=TRUE) # getCallStatistics() + + + +###########################################################################/** +# @RdocMethod getFractionOfGenomeLost +# @aliasmethod getFractionOfGenomeGained +# @aliasmethod getFractionOfGenomeAltered +# @aliasmethod getFGL +# @aliasmethod getFGG +# @aliasmethod getFGA +# +# @title "Calculates the fraction of the genome lost, gained, or aberrant either way" +# +# \description{ +# @get "title" (in sense of total copy numbers), +# using definitions closely related to those presented in [1]. +# } +# +# @synopsis +# +# \arguments{ +# \item{...}{Not used.} +# } +# +# \value{ +# Returns a @double in [0,1]. +# } +# +# @author "HB" +# +# \references{ +# [1] Fridlyand et al. \emph{Breast tumor copy number aberration +# phenotypes and genomic instability}, BMC Cancer, 2006. \cr +# } +# +# \seealso{ +# Internally, @seemethod "getCallStatistics" is used. +# @seeclass +# } +# +# @keyword internal +#*/########################################################################### +setMethodS3("getFractionOfGenomeLost", "CBS", function(fit, ...) { + stats <- getCallStatistics(fit, ...) + mean(stats$lossFraction, na.rm=TRUE) +}, protected=TRUE) + +setMethodS3("getFractionOfGenomeGained", "CBS", function(fit, ...) { + stats <- getCallStatistics(fit, ...) + mean(stats$gainFraction, na.rm=TRUE) +}, protected=TRUE) + +setMethodS3("getFractionOfGenomeAltered", "CBS", function(fit, ...) { + getFractionOfGenomeLost(fit, ...) + getFractionOfGenomeGained(fit, ...) +}, protected=TRUE) + +# Shortcuts +setMethodS3("getFGL", "CBS", function(fit, ...) { + getFractionOfGenomeLost(fit, ...) +}, protected=TRUE) + +setMethodS3("getFGG", "CBS", function(fit, ...) { + getFractionOfGenomeGained(fit, ...) +}, protected=TRUE) + +setMethodS3("getFGA", "CBS", function(fit, ...) { + getFractionOfGenomeAltered(fit, ...) +}, protected=TRUE) + + + + +setMethodS3("isWholeChromosomeGained", "CBS", function(fit, minFraction=0.99, ...) { + # Argument 'minFraction': + minFraction <- Arguments$getDouble(minFraction, range=c(0,1)) + + stats <- getCallStatistics(fit, ...) + calls <- stats$gainFraction + if (is.null(calls)) { + return(rep(NA, times=nbrOfChromosomes(fit))) + } + + res <- (calls >= minFraction) + names(res) <- stats$chromosome + attr(res, "minFraction") <- minFraction + + res +}, protected=TRUE) # isWholeChromosomeGained() + + +setMethodS3("isWholeChromosomeLost", "CBS", function(fit, minFraction=0.99, ...) { + # Argument 'minFraction': + minFraction <- Arguments$getDouble(minFraction, range=c(0,1)) + + stats <- getCallStatistics(fit, ...) + calls <- stats$lossFraction + if (is.null(calls)) { + return(rep(NA, times=nbrOfChromosomes(fit))) + } + + res <- (calls >= minFraction) + names(res) <- stats$chromosome + attr(res, "minFraction") <- minFraction + + res +}, protected=TRUE) # isWholeChromosomeLost() + + +setMethodS3("nbrOfLosses", "CBS", function(fit, ...) { + stats <- getSegments(fit, ...) + calls <- stats$lossCall + if (is.null(calls)) { + return(NA_integer_) + } + sum(calls, na.rm=TRUE) +}, protected=TRUE) + + +setMethodS3("nbrOfGains", "CBS", function(fit, ...) { + stats <- getSegments(fit, ...) + calls <- stats$gainCall + if (is.null(calls)) { + return(NA_integer_) + } + sum(calls, na.rm=TRUE) +}, protected=TRUE) + + +setMethodS3("nbrOfAmplifications", "CBS", function(fit, ...) { + stats <- getSegments(fit, ...) + calls <- stats$amplificationCall + if (is.null(calls)) { + return(NA_integer_) + } + sum(calls, na.rm=TRUE) +}, protected=TRUE) + + +setMethodS3("getCallStatisticsByArms", "CBS", function(fit, genomeData, ...) { + # To please/trick R CMD check + chromosome <- x <- NULL; rm(list=c("chromosome", "x")) + + # - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - + # Validate arguments + # - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - + # Argument 'genomeData': + genomeData <- as.data.frame(genomeData) + + + + # Subset 'regions' by chromosomes segmented + keep <- is.element(genomeData$chromosome, getChromosomes(fit)) + genomeData <- genomeData[keep,] + + + # - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - + # p-arm + # - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - + regions <- getChromosomeRanges(fit) + regions$end <- genomeData$centroStart + regions$start <- pmin(regions$start, regions$end) + + # Shrink regions + for (rr in seq_len(nrow(regions))) { + chr <- regions[rr,"chromosome"] + x0 <- regions[rr,"start"] + x1 <- regions[rr,"end"] + xs <- subset(fit$data, chromosome == chr & x0 <= x & x <= x1)$x + if (length(xs) > 0) { + range <- range(xs, na.rm=TRUE) + x0 <- max(c(x0, range[1]), na.rm=TRUE) + x1 <- min(c(x1, range[2]), na.rm=TRUE) + regions[rr,"start"] <- x0 + regions[rr,"end"] <- x1 + } + } # for (rr ...) + regions[,"length"] <- regions[,"end"] - regions[,"start"] ## + 1L + callStats <- getCallStatistics(fit, regions=regions) + callStats$label <- sprintf("%sp", callStats$label) + callStatsP <- callStats + + + # - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - + # q-arm + # - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - + regions <- getChromosomeRanges(fit) + regions$start <- genomeData$centroEnd + regions$end <- pmax(regions$end, regions$start) + + # Shrink regions + for (rr in seq_len(nrow(regions))) { + chr <- regions[rr,"chromosome"] + x0 <- regions[rr,"start"] + x1 <- regions[rr,"end"] + xs <- subset(fit$data, chromosome == chr & x0 <= x & x <= x1)$x + if (length(xs) > 0) { + range <- range(xs, na.rm=TRUE) + x0 <- max(c(x0, range[1]), na.rm=TRUE) + x1 <- min(c(x1, range[2]), na.rm=TRUE) + regions[rr,"start"] <- x0 + regions[rr,"end"] <- x1 + } + } # for (rr ...) + regions[,"length"] <- regions[,"end"] - regions[,"start"] ## + 1L + + callStats <- getCallStatistics(fit, regions=regions) + callStats$label <- sprintf("%sq", callStats$label) + callStatsQ <- callStats + + + + # - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - + # Merge + # - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - + callStats <- rbind(callStatsP, callStatsQ) + + # Not needed anymore + regions <- callStatsP <- callStatsQ <- NULL + + # Reorder + o <- order(callStats$chromosome, callStats$start) + callStats <- callStats[o,] + + callStats +}, protected=TRUE) # getCallStatisticsByArms() + + +setMethodS3("callArms", "CBS", function(fit, genomeData, minFraction=0.95, ...) { + # - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - + # Validate arguments + # - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - + # Argument 'minFraction': + minFraction <- Arguments$getDouble(minFraction, range=c(0,1)) + + + # - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - + # p-arm + # - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - + callStats <- getCallStatisticsByArms(fit, genomeData=genomeData) + + callTypes <- grep("Fraction", colnames(callStats), value=TRUE) + callTypes <- gsub("Fraction", "", callTypes) + + keys <- sprintf("%sFraction", callTypes) + rhos <- callStats[,keys] + calls <- (rhos >= minFraction) + colnames(calls) <- sprintf("%sCall", callTypes) + + callStats <- cbind(callStats, calls) + + callStats +}, protected=TRUE) # callArms() + + + + +###########################################################################/** +# @RdocMethod mergeNonCalledSegments +# +# @title "Merge neighboring segments that are not called" +# +# \description{ +# @get "title" +# } +# +# @synopsis +# +# \arguments{ +# \item{...}{Not used.} +# \item{verbose}{@see "R.utils::Verbose".} +# } +# +# \value{ +# Returns an object of the same class +# with the same of fewer number of segments. +# } +# +# @author "HB" +# +# \seealso{ +# @seeclass +# } +# +# @keyword internal +#*/########################################################################### +setMethodS3("mergeNonCalledSegments", "CBS", function(fit, ..., verbose=FALSE) { + # - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - + # Validate arguments + # - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - + # Argument 'verbose': + verbose <- Arguments$getVerbose(verbose) + if (verbose) { + pushState(verbose) + on.exit(popState(verbose)) + } + + verbose && enter(verbose, "Merging neighboring segments that are not called") + + # Identify call columns + segs <- getSegments(fit, splitters=TRUE) + keep <- grep("Call$", colnames(segs)) + nbrOfCalls <- length(keep) + + # Sanity check + .stop_if_not(nbrOfCalls > 0) + + chromosomes <- getChromosomes(fit) + fitList <- list() + for (cc in seq_along(chromosomes)) { + chromosome <- chromosomes[cc] + verbose && enter(verbose, sprintf("Chromosome #%d ('%s') of %d", cc, chromosome, length(chromosomes))) + + + fitCC <- extractChromosome(fit, chromosome=chromosome) + n0 <- nbrOfSegments(fitCC) + + # Until no more neighboring non-called segments exists + while (TRUE) { + segs <- getSegments(fitCC, splitters=TRUE) + calls <- as.matrix(segs[,keep]) + + # Find two neighboring segments that are not called + isCalled <- rowAnys(calls, na.rm=TRUE) + verbose && printf(verbose, "Number of segments not called: %d of %d\n", sum(!isCalled, na.rm=TRUE), length(isCalled)) + + notCalled <- which(!isCalled) + delta <- diff(notCalled) + left <- notCalled[which(delta == 1)[1]] + + # No more segments to merge? + if (is.na(left)) { + break + } + + fitCC <- mergeTwoSegments(fitCC, left=left) + } # while (...) + + n1 <- nbrOfSegments(fitCC) + verbose && printf(verbose, "Number of segments merged: %d of %d\n", n0-n1, n0) + + fitList[[cc]] <- fitCC + verbose && exit(verbose) + } # for (cc ...) + + verbose && enter(verbose, "Building result") + res <- Reduce(append, fitList) + verbose && exit(verbose) + + verbose && exit(verbose) + + res +}, protected=TRUE) # mergeNonCalledSegments() + + +setMethodS3("estimateDeltaCN", "CBS", function(fit, flavor=c("density(TCN)", "density(dTCN)", "dTCN"), adjust=0.3, ..., verbose=FALSE) { + # - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - + # Validate arguments + # - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - + # Argument 'flavor': + flavor <- match.arg(flavor) + + # Argument 'adjust': + adjust <- Arguments$getDouble(adjust, range=c(0,10)) + + + if (flavor == "density(TCN)") { + # Get segment mean levels + segs <- getSegments(fit, splitters=FALSE) + x <- segs$mean + w <- segs$nbrOfLoci + + # Drop missing values + keep <- is.finite(x) & is.finite(w) + x <- x[keep] + w <- w[keep] + keep <- NULL # Not needed anymore + + # Normalize weights + w <- w / sum(w, na.rm=TRUE) + + # Estimate density + d <- density(x, weights=w, adjust=adjust) + + w <- NULL # Not needed anymore + + # Find peaks + pv <- findPeaksAndValleys(d, ...) + type <- NULL; rm(list="type") # To please R CMD check + p <- subset(pv, type == "peak") + px <- p$x + pw <- p$density + + # Distance between peaks + dx <- diff(px) + # Weights "between" peaks (AD HOC: sum up peak weights) + dw <- pw[-length(pw)] + pw[-1L] + + deltaCN <- weighted.mean(dx, w=dw) + } else if (flavor == "density(dTCN)") { + # Get change-point magnitudes + x <- getChangePoints(fit)[[1L]] + x <- abs(x) + + # Drop missing values + keep <- is.finite(x) + x <- x[keep] + keep <- NULL # Not needed anymore + + + # Estimate density + d <- density(x, adjust=adjust) + + # Find peaks + pv <- findPeaksAndValleys(d, ...) + type <- NULL; rm(list="type") # To please R CMD check + p <- subset(pv, type == "peak") + px <- p$x + pw <- p$density + + # Distance between peaks + dx <- diff(px) + # Weights "between" peaks (AD HOC: sum up peak weights) + dw <- pw[-length(pw)] + pw[-1L] + + throw("Still not implemented.") + } else if (flavor == "dTCN") { + # Get change-point magnitudes + x <- getChangePoints(fit)[[1L]] + x <- abs(x) + + # Drop missing values + keep <- is.finite(x) + x <- x[keep] + keep <- NULL # Not needed anymore + + deltaCN <- median(x) + } + + # Sanity check + deltaCN <- Arguments$getDouble(deltaCN, range=c(0, Inf)) + + deltaCN +}, protected=TRUE) + + + +setMethodS3("encodeCalls", "data.frame", function(calls, flavor="UCSF", ...) { + # Argument 'calls': + .stop_if_not(all(is.element(c("chromosome", "x"), colnames(calls)))) + .stop_if_not(all(is.element(c("lossCall", "gainCall"), colnames(calls)))) + + # Argument 'flavor': + flavor <- match.arg(flavor) + + calls0 <- calls + + # Allocate + calls <- rep(NA_real_, times=nrow(calls0)) + + # Encode loss, neutral and gain (required) + calls[!calls0$gainCall & !calls0$lossCall] <- 0 + calls[calls0$gainCall] <- +1 + calls[calls0$lossCall] <- -1 + + # Encode amplifications, if any/called. + idxs <- which(calls0$amplificationCall) + calls[idxs] <- +9 + + # Encode negative and positive outliers, if any/called. + idxs <- which(calls0$negOutlierCall) + calls[idxs] <- calls[idxs] - 0.1 + + idxs <- which(calls0$posOutlierCall) + calls[idxs] <- calls[idxs] + 0.1 + + calls +}, protected=TRUE) # encodeCalls() + + +setMethodS3("callGLAO", "CBS", function(fit, ..., verbose=FALSE) { + # - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - + # Validate arguments + # - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - + # Argument 'verbose': + verbose <- Arguments$getVerbose(verbose) + if (verbose) { + pushState(verbose) + on.exit(popState(verbose)) + } + + verbose && enter(verbose, "Call gains, losses, amplifications and (negative and positive) outliers") + verbose && cat(verbose, "Number of segments: ", nbrOfSegments(fit)) + + # Call segments + fitC <- callGainsAndLosses(fit, ..., verbose=verbose) + fitC <- callAmplifications(fitC, ..., verbose=verbose) + + # Call loci, i.e. locus-level negative and positive outliers + fitC <- callOutliers(fitC, ..., verbose=verbose) + verbose && print(verbose, fitC) + + verbose && exit(verbose) + + fitC +}, protected=TRUE) # callGLAO() diff -Nru r-cran-pscbs-0.63.0/R/CBS.EXTS.R r-cran-pscbs-0.64.0/R/CBS.EXTS.R --- r-cran-pscbs-0.63.0/R/CBS.EXTS.R 2017-06-28 14:32:39.000000000 +0000 +++ r-cran-pscbs-0.64.0/R/CBS.EXTS.R 2018-08-12 21:30:44.000000000 +0000 @@ -34,34 +34,34 @@ # - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - # Validate arguments # - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - data <- fit$data; - sample <- Arguments$getIndex(sample, max=ncol(data)-2L); + data <- fit$data + sample <- Arguments$getIndex(sample, max=ncol(data)-2L) - sampleName <- colnames(data)[sample+2L]; + sampleName <- colnames(data)[sample+2L] # - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - # Setup the 'data' field # - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - data <- fit$data; - rownames <- rownames(data); + data <- fit$data + rownames <- rownames(data) data <- data.frame( chromosome = data$chrom, x = data$maploc, y = data[,sample+2L,drop=TRUE], stringsAsFactors=FALSE - ); - rownames(data) <- rownames; + ) + rownames(data) <- rownames # - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - # Setup the 'output' field # - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - output <- fit$output; - ID <- NULL; rm(list="ID"); # To please R CMD check - output <- subset(output, ID == sampleName); - rownames <- rownames(output); + output <- fit$output + ID <- NULL; rm(list="ID") # To please R CMD check + output <- subset(output, ID == sampleName) + rownames <- rownames(output) output <- data.frame( chromosome = output$chrom, @@ -70,124 +70,124 @@ nbrOfLoci = as.integer(output$num.mark), mean = output$seg.mean, stringsAsFactors=FALSE - ); - rownames(output) <- rownames; + ) + rownames(output) <- rownames # Add chromosome splitter - ats <- which(diff(output$chromosome) != 0) + 1L; + ats <- which(diff(output$chromosome) != 0) + 1L if (length(ats) > 0) { - idxs <- seq_len(nrow(output)); - values <- rep(NA_integer_, times=length(ats)); - expand <- insert(idxs, ats=ats, values=values); # R.utils::insert() - output <- output[expand,]; - rownames(output) <- NULL; + idxs <- seq_len(nrow(output)) + values <- rep(NA_integer_, times=length(ats)) + expand <- insert(idxs, ats=ats, values=values) # R.utils::insert() + output <- output[expand,] + rownames(output) <- NULL } # - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - # Setup up 'CBS' object # - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - if (sampleName == "") sampleName <- as.character(NA); - res <- list(); - res$sampleName <- sampleName; - res$data <- data; - res$output <- output; - res$params <- list(); - class(res) <- class(CBS()); + if (sampleName == "") sampleName <- as.character(NA) + res <- list() + res$sampleName <- sampleName + res$data <- data + res$output <- output + res$params <- list() + class(res) <- class(CBS()) - res; + res }) # as.CBS() setMethodS3("extractTotalCNs", "CBS", function(fit, ...) { - data <- getSegments(fit, ...); - data[,c("mean", "nbrOfLoci"), drop=FALSE]; + data <- getSegments(fit, ...) + data[,c("mean", "nbrOfLoci"), drop=FALSE] }, protected=TRUE) setMethodS3("extractCNs", "CBS", function(fit, ...) { - data <- extractTotalCNs(fit, ...); - data <- data[,c("mean"), drop=FALSE]; - data <- as.matrix(data); - data; + data <- extractTotalCNs(fit, ...) + data <- data[,c("mean"), drop=FALSE] + data <- as.matrix(data) + data }, protected=TRUE) setMethodS3("extractChromosomes", "CBS", function(x, chromosomes, ...) { # To please R CMD check - this <- x; + this <- x # - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - # Validate arguments # - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - # Argument 'chromosomes': - disallow <- c("NaN", "Inf"); - chromosomes <- Arguments$getIntegers(chromosomes, range=c(0,Inf), disallow=disallow); - stopifnot(all(is.element(chromosomes, getChromosomes(this)))); + disallow <- c("NaN", "Inf") + chromosomes <- Arguments$getIntegers(chromosomes, range=c(0,Inf), disallow=disallow) + .stop_if_not(all(is.element(chromosomes, getChromosomes(this)))) # Always extract in order - chromosomes <- unique(chromosomes); - chromosomes <- sort(chromosomes); + chromosomes <- unique(chromosomes) + chromosomes <- sort(chromosomes) # Allocate results - res <- this; + res <- this # - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - # Locus data # - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - chromosome <- NULL; rm(list="chromosome"); # To please R CMD check - data <- getLocusData(this); - class <- class(data); - class(data) <- "data.frame"; - data <- subset(data, chromosome %in% chromosomes); - class(data) <- class; - res$data <- data; + chromosome <- NULL; rm(list="chromosome") # To please R CMD check + data <- getLocusData(this) + class <- class(data) + class(data) <- "data.frame" + data <- subset(data, chromosome %in% chromosomes) + class(data) <- class + res$data <- data # - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - # Segmentation data # - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - # Identify rows to subset - rows <- which(is.element(res$output$chromosome, chromosomes)); + rows <- which(is.element(res$output$chromosome, chromosomes)) for (field in c("output", "segRows")) { - res[[field]] <- res[[field]][rows,,drop=FALSE]; + res[[field]] <- res[[field]][rows,,drop=FALSE] } # Identify chromosome offsets - data <- getLocusData(this); - chrStarts <- match(getChromosomes(this), data$chromosome); - chrEnds <- c(chrStarts[-1]-1L, nrow(data)); - chrLengths <- chrEnds - chrStarts + 1L; - - chrLengthsExcl <- chrLengths; - - keep <- match(chromosomes, getChromosomes(this)); - chrLengthsExcl[keep] <- 0L; - cumChrLengthsExcl <- cumsum(chrLengthsExcl); + data <- getLocusData(this) + chrStarts <- match(getChromosomes(this), data$chromosome) + chrEnds <- c(chrStarts[-1]-1L, nrow(data)) + chrLengths <- chrEnds - chrStarts + 1L + + chrLengthsExcl <- chrLengths + + keep <- match(chromosomes, getChromosomes(this)) + chrLengthsExcl[keep] <- 0L + cumChrLengthsExcl <- cumsum(chrLengthsExcl) - shifts <- cumChrLengthsExcl[keep]; - stopifnot(all(is.finite(shifts))); + shifts <- cumChrLengthsExcl[keep] + .stop_if_not(all(is.finite(shifts))) # Adjust indices for (cc in seq_along(chromosomes)) { - chromosome <- chromosomes[cc]; - shift <- shifts[cc]; + chromosome <- chromosomes[cc] + shift <- shifts[cc] # Nothing to do? - if (shift == 0) next; + if (shift == 0) next for (field in c("segRows")) { - segRows <- res[[field]]; - rows <- which(res$output$chromosome == chromosome); - segRows[rows,] <- segRows[rows,] - shift; - res[[field]] <- segRows; + segRows <- res[[field]] + rows <- which(res$output$chromosome == chromosome) + segRows[rows,] <- segRows[rows,] - shift + res[[field]] <- segRows } } - res; + res }, protected=TRUE) setMethodS3("subset", "CBS", function(x, chromlist=NULL, ...) { - extractChromosomes(x, chromosomes=chromlist, ...); + extractChromosomes(x, chromosomes=chromlist, ...) }, private=TRUE) @@ -221,37 +221,37 @@ # @keyword internal #*/########################################################################### setMethodS3("extractSegmentMeansByLocus", "CBS", function(fit, ...) { - data <- getLocusData(fit, ...); - chromosome <- data$chromosome; - x <- data$x; - y <- data[,3]; - - segs <- getSegments(fit); - nbrOfSegments <- nrow(segs); - nbrOfLoci <- nrow(data); + data <- getLocusData(fit, ...) + chromosome <- data$chromosome + x <- data$x + y <- data[,3] + + segs <- getSegments(fit) + nbrOfSegments <- nrow(segs) + nbrOfLoci <- nrow(data) # Get mean estimators - estList <- getMeanEstimators(fit, "y"); - avgY <- estList$y; + estList <- getMeanEstimators(fit, "y") + avgY <- estList$y - yS <- y; + yS <- y for (ss in seq_len(nbrOfSegments)) { - seg <- segs[ss,]; + seg <- segs[ss,] idxs <- which(seg$chromosome == chromosome & - seg$start <= x & x <= seg$end); - idxs <- Arguments$getIndices(idxs, max=nbrOfLoci); + seg$start <= x & x <= seg$end) + idxs <- Arguments$getIndices(idxs, max=nbrOfLoci) - ySS <- y[idxs]; - ok <- is.finite(ySS); + ySS <- y[idxs] + ok <- is.finite(ySS) # Sanity check - ## stopifnot(sum(ok) == seg$nbrOfLoci); # Not dealing with ties + ## .stop_if_not(sum(ok) == seg$nbrOfLoci) # Not dealing with ties - mu <- avgY(ySS[ok]); - yS[idxs] <- mu; + mu <- avgY(ySS[ok]) + yS[idxs] <- mu } # for (ss ...) - yS; + yS }, private=TRUE) # extractSegmentMeansByLocus() @@ -294,7 +294,7 @@ #*/########################################################################### setMethodS3("estimateStandardDeviation", "CBS", function(fit, chromosomes=NULL, method=c("diff", "res", "abs", "DNAcopy"), estimator=c("mad", "sd"), na.rm=TRUE, weights=NULL, ...) { # Local copies of DNAcopy functions - DNAcopy_trimmed.variance <- .use("trimmed.variance", package="DNAcopy"); + DNAcopy_trimmed.variance <- .use("trimmed.variance", package="DNAcopy") # - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - @@ -305,170 +305,126 @@ } # Argument 'method': - method <- match.arg(method); + method <- match.arg(method) # Argument 'estimator': - estimator <- match.arg(estimator); + estimator <- match.arg(estimator) # Argument 'weights': if (!is.null(weights)) { - nbrOfLoci <- nbrOfLoci(fit); + nbrOfLoci <- nbrOfLoci(fit) weights <- Arguments$getNumerics(weights, range=c(0,Inf), - length=rep(nbrOfLoci, times=2)); + length=rep(nbrOfLoci, times=2)) } # Get the estimator function if (!is.null(weights)) { - estimator <- sprintf("weighted %s", estimator); - estimator <- R.utils::toCamelCase(estimator); + estimator <- sprintf("weighted %s", estimator) + estimator <- R.utils::toCamelCase(estimator) } if (method == "DNAcopy") { estimatorFcn <- function(y, trim=0.025, ...) { - sigma2 <- DNAcopy_trimmed.variance(y, trim=trim); - sqrt(sigma2); + sigma2 <- DNAcopy_trimmed.variance(y, trim=trim) + sqrt(sigma2) } } else { - estimatorFcn <- get(estimator, mode="function"); + estimatorFcn <- get(estimator, mode="function") } # Subset by chromosomes? if (!is.null(chromosomes)) { - fit <- extractChromosomes(fit, chromosomes=chromosomes); + fit <- extractChromosomes(fit, chromosomes=chromosomes) } - nbrOfLoci <- nbrOfLoci(fit); + nbrOfLoci <- nbrOfLoci(fit) # Nothing to do? if (nbrOfLoci <= 1) { - sigma <- NA_real_; - attr(sigma, "nbrOfLoci") <- nbrOfLoci; - attr(sigma, "df") <- NA_integer_; - return(sigma); + sigma <- NA_real_ + attr(sigma, "nbrOfLoci") <- nbrOfLoci + attr(sigma, "df") <- NA_integer_ + return(sigma) } - data <- getLocusData(fit); - y <- data[,3]; + data <- getLocusData(fit) + y <- data[,3] if (method == "diff") { - dy <- diff(y); + dy <- diff(y) # Weighted estimator? if (!is.null(weights)) { # Calculate weights per pair - weights <- (weights[1:(nbrOfLoci-1)]+weights[2:nbrOfLoci])/2; - sigma <- estimatorFcn(dy, w=weights, na.rm=na.rm)/sqrt(2); + weights <- (weights[1:(nbrOfLoci-1)]+weights[2:nbrOfLoci])/2 + sigma <- estimatorFcn(dy, w=weights, na.rm=na.rm)/sqrt(2) } else { - sigma <- estimatorFcn(dy, na.rm=na.rm)/sqrt(2); + sigma <- estimatorFcn(dy, na.rm=na.rm)/sqrt(2) } - df <- length(dy); + df <- length(dy) } else if (method == "res") { - yS <- extractSegmentMeansByLocus(fit); - dy <- y - yS; + yS <- extractSegmentMeansByLocus(fit) + dy <- y - yS if (!is.null(weights)) { - sigma <- estimatorFcn(dy, w=weights, na.rm=na.rm); + sigma <- estimatorFcn(dy, w=weights, na.rm=na.rm) } else { - sigma <- estimatorFcn(dy, na.rm=na.rm); + sigma <- estimatorFcn(dy, na.rm=na.rm) } - df <- length(dy); + df <- length(dy) } else if (method == "abs") { if (!is.null(weights)) { - sigma <- estimatorFcn(y, w=weights, na.rm=na.rm); + sigma <- estimatorFcn(y, w=weights, na.rm=na.rm) } else { - sigma <- estimatorFcn(y, na.rm=na.rm); + sigma <- estimatorFcn(y, na.rm=na.rm) } - df <- length(y); + df <- length(y) } else if (method == "DNAcopy") { if (na.rm) { - y <- y[!is.na(y)]; + y <- y[!is.na(y)] } - sigma <- estimatorFcn(y, ...); - df <- length(y); + sigma <- estimatorFcn(y, ...) + df <- length(y) } else { - throw("Method no implemented: ", method); + throw("Method no implemented: ", method) } - attr(sigma, "nbrOfLoci") <- nbrOfLoci; - attr(sigma, "df") <- df; + attr(sigma, "nbrOfLoci") <- nbrOfLoci + attr(sigma, "df") <- df - sigma; + sigma }) # estimateStandardDeviation() setMethodS3("getChromosomeRanges", "CBS", function(fit, ...) { # To please R CMD check, cf. subset() - chromosome <- NULL; rm(list="chromosome"); + chromosome <- NULL; rm(list="chromosome") - segs <- getSegments(fit, splitter=FALSE); - chromosomes <- sort(unique(segs$chromosome)); + segs <- getSegments(fit, splitter=FALSE) + chromosomes <- sort(unique(segs$chromosome)) # Allocate - naValue <- NA_real_; - res <- matrix(naValue, nrow=length(chromosomes), ncol=3); - rownames(res) <- chromosomes; - colnames(res) <- c("start", "end", "length"); + naValue <- NA_real_ + res <- matrix(naValue, nrow=length(chromosomes), ncol=3) + rownames(res) <- chromosomes + colnames(res) <- c("start", "end", "length") # Get start and end of each chromosome. for (ii in seq_len(nrow(res))) { - chr <- chromosomes[ii]; - segsII <- subset(segs, chromosome == chr); - res[ii,"start"] <- min(segsII$start, na.rm=TRUE); - res[ii,"end"] <- max(segsII$end, na.rm=TRUE); + chr <- chromosomes[ii] + segsII <- subset(segs, chromosome == chr) + res[ii,"start"] <- min(segsII$start, na.rm=TRUE) + res[ii,"end"] <- max(segsII$end, na.rm=TRUE) } # for (ii ...) - res[,"length"] <- res[,"end"] - res[,"start"] + 1L; + res[,"length"] <- res[,"end"] - res[,"start"] + 1L # Sanity check - stopifnot(nrow(res) == length(chromosomes)); + .stop_if_not(nrow(res) == length(chromosomes)) - res <- as.data.frame(res); - res <- cbind(chromosome=chromosomes, res); + res <- as.data.frame(res) + res <- cbind(chromosome=chromosomes, res) - res; + res }, protected=TRUE) # getChromosomeRanges() - - - -############################################################################ -# HISTORY: -# 2012-06-03 -# o BUG FIT: The recent updates of as.CBS() for DNAcopy would not work -# for samples with name ''. -# 2012-05-30 -# o BUG FIX: as.CNA() for DNAcopy added incorrect chromosome splitters. -# o BUG FIX: as.CNA() for DNAcopy would ignore argument 'sample' and -# always return the first sample. -# 2011-12-12 -# o Now extractSegmentMeansByLocus() for CBS passes arguments -# '...' to getLocusData(). -# 2011-11-28 -# o extractCNs() for CBS would not return a matrix but a data.frame. -# o BUG FIX: extractTotalCNs() for CBS would give an error. -# 2011-11-15 -# o Added method="DNAcopy" to estimateStandardDeviation() for CBS, which -# estimates the std. dev. using DNAcopy:::trimmed.variance(). -# 2011-10-16 -# o Added extractTotalCNs() for CBS. -# o Implemented extractCNs() for CBS. -# 2011-10-08 -# o BUG FIX: The object returned by as.CBS() of DNAcopy did not have the -# correct class hierarchy. -# 2011-10-06 -# o Now getChromosomeRanges() of CBS returns a data.frame instead of -# a matrix, and first column is now 'chromosome'. -# 2011-09-05 -# o Added getChromosomeRanges() for CBS. -# 2011-09-04 -# o Added estimateStandardDeviation() for CBS. -# o Added extractSegmentMeansByLocus() for CBS. -# 2011-09-03 -# o Added as.CBS() for DNAcopy to coerce a DNAcopy object to a CBS object. -# 2011-09-02 -# o Added extractByChromosomes() for CBS. -# o Added subset() for CBS for backward compatibility. -# o Added nbrOfLoci(), getChromosomes() and getSampleNames() for CBS. -# 2010-11-19 -# o Added append() for CBS objects. -############################################################################ diff -Nru r-cran-pscbs-0.63.0/R/CBS.IO.R r-cran-pscbs-0.64.0/R/CBS.IO.R --- r-cran-pscbs-0.63.0/R/CBS.IO.R 2017-06-28 14:32:39.000000000 +0000 +++ r-cran-pscbs-0.64.0/R/CBS.IO.R 2018-08-12 21:30:44.000000000 +0000 @@ -3,52 +3,52 @@ # Validate arguments # - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - # Argument 'name' and 'tags': - name <- Arguments$getCharacter(name); - tags <- Arguments$getCharacters(tags); + name <- Arguments$getCharacter(name) + tags <- Arguments$getCharacters(tags) # Argument 'ext': - ext <- Arguments$getCharacter(ext); + ext <- Arguments$getCharacter(ext) # Arguments 'path': - path <- Arguments$getWritablePath(path); + path <- Arguments$getWritablePath(path) # Argument 'nbrOfDecimals': - nbrOfDecimals <- Arguments$getInteger(nbrOfDecimals); + nbrOfDecimals <- Arguments$getInteger(nbrOfDecimals) - fullname <- paste(c(name, tags), collapse=","); - filename <- sprintf("%s.%s", fullname, ext); - pathname <- Arguments$getWritablePathname(filename, path=path, mustNotExist=(!overwrite && !skip)); + fullname <- paste(c(name, tags), collapse=",") + filename <- sprintf("%s.%s", fullname, ext) + pathname <- Arguments$getWritablePathname(filename, path=path, mustNotExist=(!overwrite && !skip)) # File already exists? if (isFile(pathname)) { # Skip? if (skip) { - return(pathname); + return(pathname) } # Overwrite! - file.remove(pathname); + file.remove(pathname) } # Write to temporary file - pathnameT <- pushTemporaryFile(pathname); + pathnameT <- pushTemporaryFile(pathname) # - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - # Extract data # - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - data <- getLocusData(fit, ...); + data <- getLocusData(fit, ...) # Round of floating points if (!is.null(nbrOfDecimals)) { - cols <- colnames(data); + cols <- colnames(data) for (key in cols) { - values <- data[[key]]; + values <- data[[key]] if (is.double(values)) { - values <- round(values, digits=nbrOfDecimals); - data[[key]] <- values; + values <- round(values, digits=nbrOfDecimals) + data[[key]] <- values } } # for (key ...) } @@ -58,10 +58,10 @@ # Build header # - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - if (addHeader) { - sigmaDelta <- estimateStandardDeviation(fit, method="diff"); -# sigmaResiduals <- estimateStandardDeviation(fit, method="res"); + sigmaDelta <- estimateStandardDeviation(fit, method="diff") +# sigmaResiduals <- estimateStandardDeviation(fit, method="res") - createdOn <- format(Sys.time(), format="%Y-%m-%d %H:%M:%S %Z"); + createdOn <- format(Sys.time(), format="%Y-%m-%d %H:%M:%S %Z") hdr <- c( name=name, tags=tags, @@ -79,18 +79,18 @@ nbrOfColumns=ncol(data), columnNames=paste(colnames(data), collapse=", "), columnClasses=paste(sapply(data, FUN=function(x) class(x)[1]), collapse=", ") - ); - bfr <- paste("# ", names(hdr), ": ", hdr, sep=""); + ) + bfr <- paste("# ", names(hdr), ": ", hdr, sep="") - cat(file=pathnameT, bfr, sep="\n"); + cat(file=pathnameT, bfr, sep="\n") } # if (addHeader) write.table(file=pathnameT, data, append=TRUE, quote=FALSE, sep=sep, - row.names=FALSE, col.names=TRUE); + row.names=FALSE, col.names=TRUE) - pathname <- popTemporaryFile(pathnameT); + pathname <- popTemporaryFile(pathnameT) - pathname; + pathname }, protected=TRUE) # writeLocusData() @@ -139,69 +139,69 @@ # Validate arguments # - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - # Argument 'name' and 'tags': - name <- Arguments$getCharacter(name); - tags <- Arguments$getCharacters(tags); + name <- Arguments$getCharacter(name) + tags <- Arguments$getCharacters(tags) # Argument 'ext': - ext <- Arguments$getCharacter(ext); + ext <- Arguments$getCharacter(ext) # Arguments 'path': - path <- Arguments$getWritablePath(path); + path <- Arguments$getWritablePath(path) # Argument 'nbrOfDecimals': - nbrOfDecimals <- Arguments$getInteger(nbrOfDecimals); + nbrOfDecimals <- Arguments$getInteger(nbrOfDecimals) - fullname <- paste(c(name, tags), collapse=","); - filename <- sprintf("%s.%s", fullname, ext); - pathname <- Arguments$getWritablePathname(filename, path=path, mustNotExist=(!overwrite && !skip)); + fullname <- paste(c(name, tags), collapse=",") + filename <- sprintf("%s.%s", fullname, ext) + pathname <- Arguments$getWritablePathname(filename, path=path, mustNotExist=(!overwrite && !skip)) # File already exists? if (isFile(pathname)) { # Skip? if (skip) { - return(pathname); + return(pathname) } # Overwrite! - file.remove(pathname); + file.remove(pathname) } # Write to temporary file - pathnameT <- pushTemporaryFile(pathname); + pathnameT <- pushTemporaryFile(pathname) - sampleName <- getSampleName(fit); + sampleName <- getSampleName(fit) # - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - # Extract data # - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - data <- getSegments(fit, ..., splitters=splitters); + data <- getSegments(fit, ..., splitters=splitters) # Round of floating points if (!is.null(nbrOfDecimals)) { - cols <- tolower(colnames(data)); - isInt <- (regexpr("chromosome|start|end|nbrofloci", cols) != -1); - cols <- which(isInt); + cols <- tolower(colnames(data)) + isInt <- (regexpr("chromosome|start|end|nbrofloci", cols) != -1) + cols <- which(isInt) for (cc in cols) { - values <- data[[cc]]; + values <- data[[cc]] if (is.double(values)) { - values <- round(values, digits=0); - data[[cc]] <- values; + values <- round(values, digits=0) + data[[cc]] <- values } } # for (key ...) - cols <- tolower(colnames(data)); - isInt <- (regexpr("chromosome|start|end|nbrofloci", cols) != -1); - isLog <- (regexpr("call", cols) != -1); - isDbl <- (!isInt & !isLog); - cols <- which(isDbl); + cols <- tolower(colnames(data)) + isInt <- (regexpr("chromosome|start|end|nbrofloci", cols) != -1) + isLog <- (regexpr("call", cols) != -1) + isDbl <- (!isInt & !isLog) + cols <- which(isDbl) for (kk in cols) { - values <- data[[kk]]; + values <- data[[kk]] if (is.double(values)) { - values <- round(values, digits=nbrOfDecimals); - data[[kk]] <- values; + values <- round(values, digits=nbrOfDecimals) + data[[kk]] <- values } } # for (key ...) } @@ -211,10 +211,10 @@ # Build header # - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - if (addHeader) { - sigmaDelta <- estimateStandardDeviation(fit, method="diff"); -# sigmaResiduals <- estimateStandardDeviation(fit, method="res"); + sigmaDelta <- estimateStandardDeviation(fit, method="diff") +# sigmaResiduals <- estimateStandardDeviation(fit, method="res") - createdOn <- format(Sys.time(), format="%Y-%m-%d %H:%M:%S %Z"); + createdOn <- format(Sys.time(), format="%Y-%m-%d %H:%M:%S %Z") hdr <- c( name=name, tags=tags, @@ -232,30 +232,16 @@ nbrOfColumns=ncol(data), columnNames=paste(colnames(data), collapse=", "), columnClasses=paste(sapply(data, FUN=function(x) class(x)[1]), collapse=", ") - ); - bfr <- paste("# ", names(hdr), ": ", hdr, sep=""); + ) + bfr <- paste("# ", names(hdr), ": ", hdr, sep="") - cat(file=pathnameT, bfr, sep="\n"); + cat(file=pathnameT, bfr, sep="\n") } # if (addHeader) write.table(file=pathnameT, data, append=TRUE, quote=FALSE, sep=sep, - row.names=FALSE, col.names=TRUE); + row.names=FALSE, col.names=TRUE) - pathname <- popTemporaryFile(pathnameT); + pathname <- popTemporaryFile(pathnameT) - pathname; + pathname }) # writeSegments() - - - - - -############################################################################ -# HISTORY: -# 2011-12-03 -# o Added arguments 'name', 'tags' and 'exts' to writeSegments() and -# writeLocusData() and dropped 'filename'. -# 2011-09-04 -# o Added writeSegments() for CBS. -# o Added writeLocusData() for CBS. -############################################################################ diff -Nru r-cran-pscbs-0.63.0/R/CBS.joinSegments.R r-cran-pscbs-0.64.0/R/CBS.joinSegments.R --- r-cran-pscbs-0.63.0/R/CBS.joinSegments.R 2017-06-28 14:32:39.000000000 +0000 +++ r-cran-pscbs-0.64.0/R/CBS.joinSegments.R 2018-08-12 21:30:44.000000000 +0000 @@ -34,7 +34,7 @@ # @keyword internal #*/########################################################################### setMethodS3("joinSegments", "CBS", function(fit, range=NULL, verbose=FALSE, ...) { - R_SANITY_CHECK <- TRUE; + R_SANITY_CHECK <- TRUE # - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - # Validate arguments @@ -47,33 +47,33 @@ if (nbrOfChrs > 1L) { throw("Argument 'range' cannot be given when 'fit' contains multiple chromosomes.") } - range <- Arguments$getDoubles(range, length=c(2,2)); - stopifnot(range[2] >= range[1]); + range <- Arguments$getDoubles(range, length=c(2,2)) + .stop_if_not(range[2] >= range[1]) } # Argument 'verbose': - verbose <- Arguments$getVerbose(verbose); + verbose <- Arguments$getVerbose(verbose) if (verbose) { - pushState(verbose); - on.exit(popState(verbose)); + pushState(verbose) + on.exit(popState(verbose)) } - verbose && enter(verbose, "Joining segments"); - segs <- getSegments(fit, splitters=TRUE); - verbose && cat(verbose, "Segments:"); - verbose && print(verbose, segs); + verbose && enter(verbose, "Joining segments") + segs <- getSegments(fit, splitters=TRUE) + verbose && cat(verbose, "Segments:") + verbose && print(verbose, segs) verbose && cat(verbose, "Chromosomes:") verbose && print(verbose, chromosomes) - verbose && cat(verbose, "Range:"); - verbose && print(verbose, range); + verbose && cat(verbose, "Range:") + verbose && print(verbose, range) - nbrOfSegs <- nrow(segs); + nbrOfSegs <- nrow(segs) if (nbrOfSegs > 1) { - verbose && enter(verbose, "Centering change points"); - prevSeg <- segs[1L,]; + verbose && enter(verbose, "Centering change points") + prevSeg <- segs[1L,] for (ss in 2:nbrOfSegs) { - currSeg <- segs[ss,]; + currSeg <- segs[ss,] ## New chromosome? if (!identical(currSeg$chromosome, prevSeg$chromosome)) { @@ -83,99 +83,83 @@ next } - currStart <- currSeg[,"start"]; - prevEnd <- prevSeg[,"end"]; + currStart <- currSeg[,"start"] + prevEnd <- prevSeg[,"end"] # Sanity check (will give an error if more than one chromosome) if (R_SANITY_CHECK) { - stopifnot(all(currStart >= prevEnd, na.rm=TRUE)); + .stop_if_not(all(currStart >= prevEnd, na.rm=TRUE)) } # Center CP - xMid <- (prevEnd + currStart) / 2; + xMid <- (prevEnd + currStart) / 2 # Move previous end and current start to this centered CP - segs[ss,"start"] <- xMid; - segs[ss-1L,"end"] <- xMid; + segs[ss,"start"] <- xMid + segs[ss-1L,"end"] <- xMid - prevSeg <- currSeg; + prevSeg <- currSeg } # for (ss ...) - verbose && exit(verbose); + verbose && exit(verbose) # Sanity checks if (R_SANITY_CHECK) { - stopifnot(all(segs$start[-1] >= segs$end[-nbrOfSegs], na.rm=TRUE)); - stopifnot(all(diff(segs$start) >= 0, na.rm=TRUE)); ## FIXME: > 0 - stopifnot(all(diff(segs$end) >= 0, na.rm=TRUE)); ## FIXME: > 0 + .stop_if_not(all(segs$start[-1] >= segs$end[-nbrOfSegs], na.rm=TRUE)) + .stop_if_not(all(diff(segs$start) >= 0, na.rm=TRUE)) ## FIXME: > 0 + .stop_if_not(all(diff(segs$end) >= 0, na.rm=TRUE)) ## FIXME: > 0 } # if (R_SANITY_CHECK) if (nbrOfSegs > 6) { - verbose && print(verbose, head(segs)); - verbose && print(verbose, tail(segs)); + verbose && print(verbose, head(segs)) + verbose && print(verbose, tail(segs)) } else { - verbose && print(verbose, segs); + verbose && print(verbose, segs) } } # if (nbrOfSegs > 1) if (!is.null(range)) { - verbose && enter(verbose, "Adjust for 'range'"); - verbose && cat(verbose, "Range:"); - verbose && print(verbose, range); - xMin <- min(range, na.rm=TRUE); - xMax <- max(range, na.rm=TRUE); + verbose && enter(verbose, "Adjust for 'range'") + verbose && cat(verbose, "Range:") + verbose && print(verbose, range) + xMin <- min(range, na.rm=TRUE) + xMax <- max(range, na.rm=TRUE) if (nbrOfSegs > 0) { # Sanity checks if (R_SANITY_CHECK) { - stopifnot(xMin <= segs[1L,"start"]); - stopifnot(segs[1L,"end"] <= xMax); + .stop_if_not(xMin <= segs[1L,"start"]) + .stop_if_not(segs[1L,"end"] <= xMax) } - segs[1L,"start"] <- xMin; - segs[nbrOfSegs,"end"] <- xMax; + segs[1L,"start"] <- xMin + segs[nbrOfSegs,"end"] <- xMax # Sanity checks if (R_SANITY_CHECK) { - stopifnot(all(segs$start[-1] >= segs$end[-nbrOfSegs], na.rm=TRUE)); - stopifnot(all(diff(segs$start) >= 0, na.rm=TRUE)); ## FIXME: > 0 - stopifnot(all(diff(segs$end) >= 0, na.rm=TRUE)); ## FIXME: > 0 + .stop_if_not(all(segs$start[-1] >= segs$end[-nbrOfSegs], na.rm=TRUE)) + .stop_if_not(all(diff(segs$start) >= 0, na.rm=TRUE)) ## FIXME: > 0 + .stop_if_not(all(diff(segs$end) >= 0, na.rm=TRUE)) ## FIXME: > 0 } if (nbrOfSegs > 6) { - verbose && print(verbose, head(segs)); - verbose && print(verbose, tail(segs)); + verbose && print(verbose, head(segs)) + verbose && print(verbose, tail(segs)) } else { - verbose && print(verbose, segs); + verbose && print(verbose, segs) } } # if (nbrOfSegs > 0) - verbose && exit(verbose); + verbose && exit(verbose) } # if (!is.null(range)) - fit <- setSegments(fit, segs, splitters=TRUE); + fit <- setSegments(fit, segs, splitters=TRUE) - segs <- getSegments(fit, splitters=FALSE); + segs <- getSegments(fit, splitters=FALSE) if (nbrOfSegs > 6) { - verbose && print(verbose, head(segs)); - verbose && print(verbose, tail(segs)); + verbose && print(verbose, head(segs)) + verbose && print(verbose, tail(segs)) } else { - verbose && print(verbose, segs); + verbose && print(verbose, segs) } - verbose && exit(verbose); + verbose && exit(verbose) - fit; + fit }, private=TRUE) # joinSegments() - - -############################################################################ -# HISTORY: -# 2013-11-14 -# o DOCUMENTATION: Added Rd help for joinSegments(). -# o CLEANUP: Removed stray variables. -# 2011-11-17 -# o Added more sanity checks to joinSegments(). -# 2011-09-04 -# o Updated joinSegments() to be aware of new column names in CBS. -# 2011-06-14 -# o Updated code to recognize new column names. -# 2010-11-21 -# o Extracted from segmentByPairedPSCBS.R -############################################################################ diff -Nru r-cran-pscbs-0.63.0/R/CBS.PLOT,many.R r-cran-pscbs-0.64.0/R/CBS.PLOT,many.R --- r-cran-pscbs-0.63.0/R/CBS.PLOT,many.R 2017-06-28 14:32:39.000000000 +0000 +++ r-cran-pscbs-0.64.0/R/CBS.PLOT,many.R 2018-08-12 21:30:44.000000000 +0000 @@ -3,126 +3,126 @@ # Validate arguments # - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - # Argument 'verbose': - verbose <- Arguments$getVerbose(verbose); + verbose <- Arguments$getVerbose(verbose) if (verbose) { - pushState(verbose); - on.exit(popState(verbose)); + pushState(verbose) + on.exit(popState(verbose)) } # Nothing to do, i.e. already tiled? if (isTRUE(attr(fit, "tiledChromosomes"))) { - return(fit); + return(fit) } - verbose && enter(verbose, "Tiling chromosomes"); + verbose && enter(verbose, "Tiling chromosomes") # - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - # Extract data and segments # - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - data <- getLocusData(fit); - segs <- getSegments(fit); - knownSegments <- fit$params$knownSegments; + data <- getLocusData(fit) + segs <- getSegments(fit) + knownSegments <- fit$params$knownSegments # Identify all chromosome - chromosomes <- getChromosomes(fit); + chromosomes <- getChromosomes(fit) # - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - # Additional chromosome annotations # - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - chrStats <- getChromosomeRanges(fit); + chrStats <- getChromosomeRanges(fit) # Build an "empty" row with start == 1. - chrStatsKK <- chrStats[1,,drop=FALSE][NA,,drop=FALSE]; - chrStatsKK[,"start"] <- 1L; + chrStatsKK <- chrStats[1,,drop=FALSE][NA,,drop=FALSE] + chrStatsKK[,"start"] <- 1L # Append empty row - chrStats <- rbind(chrStats, chrStatsKK); + chrStats <- rbind(chrStats, chrStatsKK) # Offset (start,stop) - chrOffsets <- getChromosomeOffsets(fit, ...); - chrStats[,"start"] <- chrStats[,"start"] + chrOffsets; - chrStats[,"end"] <- chrStats[,"end"] + chrOffsets; + chrOffsets <- getChromosomeOffsets(fit, ...) + chrStats[,"start"] <- chrStats[,"start"] + chrOffsets + chrStats[,"end"] <- chrStats[,"end"] + chrOffsets # - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - # Offset... # - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - segFields <- grep("(start|end)$", colnames(segs), value=TRUE); + segFields <- grep("(start|end)$", colnames(segs), value=TRUE) for (kk in seq_along(chromosomes)) { - chromosome <- chromosomes[kk]; - chrTag <- sprintf("Chr%02d", chromosome); + chromosome <- chromosomes[kk] + chrTag <- sprintf("Chr%02d", chromosome) verbose && enter(verbose, sprintf("Chromosome #%d ('%s') of %d", - kk, chrTag, length(chromosomes))); + kk, chrTag, length(chromosomes))) # Get offset for this chromosome - offset <- chrOffsets[kk]; - verbose && cat(verbose, "Offset: ", offset); + offset <- chrOffsets[kk] + verbose && cat(verbose, "Offset: ", offset) # Offset data - idxs <- which(data$chromosome == chromosome); + idxs <- which(data$chromosome == chromosome) if (length(idxs) > 0L) { - data$x[idxs] <- offset + data$x[idxs]; + data$x[idxs] <- offset + data$x[idxs] } # Offset segmentation - idxs <- which(segs$chromosome == chromosome); + idxs <- which(segs$chromosome == chromosome) if (length(idxs) > 0L) { - segs[idxs,segFields] <- offset + segs[idxs,segFields]; + segs[idxs,segFields] <- offset + segs[idxs,segFields] } # Offset known segments - idxs <- which(knownSegments$chromosome == chromosome); + idxs <- which(knownSegments$chromosome == chromosome) if (length(idxs) > 0L) { - knownSegments[idxs,c("start", "end")] <- offset + knownSegments[idxs,c("start", "end")]; + knownSegments[idxs,c("start", "end")] <- offset + knownSegments[idxs,c("start", "end")] } - verbose && exit(verbose); + verbose && exit(verbose) } # for (kk ...) # - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - # Update results # - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - fitT <- fit; - fitT$data <- data; - fitT$output <- segs; - fitT$chromosomeStats <- chrStats; - fitT$params$knownSegments <- knownSegments; - fitT$params$chrOffsets <- chrOffsets; + fitT <- fit + fitT$data <- data + fitT$output <- segs + fitT$chromosomeStats <- chrStats + fitT$params$knownSegments <- knownSegments + fitT$params$chrOffsets <- chrOffsets # - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - # Sanity checks # - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - segs <- getSegments(fit); - segsT <- getSegments(fitT); - segs <- segs[,!is.element(colnames(segs), c("start", "end"))]; - segsT <- segsT[,!is.element(colnames(segsT), c("start", "end"))]; - stopifnot(all.equal(segsT, segs)); - - data <- getLocusData(fit); - dataT <- getLocusData(fitT); - data <- data[,!is.element(colnames(data), c("x"))]; - dataT <- dataT[,!is.element(colnames(dataT), c("x"))]; - stopifnot(all.equal(dataT, data)); + segs <- getSegments(fit) + segsT <- getSegments(fitT) + segs <- segs[,!is.element(colnames(segs), c("start", "end"))] + segsT <- segsT[,!is.element(colnames(segsT), c("start", "end"))] + .stop_if_not(all.equal(segsT, segs)) + + data <- getLocusData(fit) + dataT <- getLocusData(fitT) + data <- data[,!is.element(colnames(data), c("x"))] + dataT <- dataT[,!is.element(colnames(dataT), c("x"))] + .stop_if_not(all.equal(dataT, data)) - stopifnot(nbrOfLoci(fitT) == nbrOfLoci(fit)); - stopifnot(nbrOfSegments(fitT) == nbrOfSegments(fit)); + .stop_if_not(nbrOfLoci(fitT) == nbrOfLoci(fit)) + .stop_if_not(nbrOfSegments(fitT) == nbrOfSegments(fit)) # Flag object - attr(fitT, "tiledChromosomes") <- TRUE; + attr(fitT, "tiledChromosomes") <- TRUE - verbose && exit(verbose); + verbose && exit(verbose) - fitT; + fitT }, protected=TRUE) # tileChromosomes() setMethodS3("plotTracksManyChromosomes", "CBS", function(x, scatter=TRUE, pch=20, col="gray", meanCol="purple", Clim=c(0,3*ploidy(x)), xScale=1e-6, xlab="Genomic position", Clab="TCN", ..., boundaries=TRUE, levels=TRUE, subset=NULL, byIndex=FALSE, add=FALSE, onBegin=NULL, onEnd=NULL, mar=NULL, verbose=FALSE) { # To please R CMD check - fit <- x; + fit <- x # - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - # Validate arguments @@ -130,139 +130,98 @@ # Argument 'fit': # Argument 'add': - add <- Arguments$getLogical(add); + add <- Arguments$getLogical(add) # Argument 'Clim': if (!add) { Clim <- Arguments$getNumerics(Clim, length=c(2L,2L), - disallow=c("Inf", "NA", "NaN")); + disallow=c("Inf", "NA", "NaN")) } # Argument 'xScale': - xScale <- Arguments$getNumeric(xScale, range=c(0,Inf)); + xScale <- Arguments$getNumeric(xScale, range=c(0,Inf)) # Argument 'subset': if (!is.null(subset)) { - subset <- Arguments$getDouble(subset, range=c(0,1)); + subset <- Arguments$getDouble(subset, range=c(0,1)) } # Argument 'verbose': - verbose <- Arguments$getVerbose(verbose); + verbose <- Arguments$getVerbose(verbose) if (verbose) { - pushState(verbose); - on.exit(popState(verbose)); + pushState(verbose) + on.exit(popState(verbose)) } # - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - # Tile chromosomes # - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - fitT <- tileChromosomes(fit); - verbose && str(verbose, fitT); + fitT <- tileChromosomes(fit) + verbose && str(verbose, fitT) # Sanity check - stopifnot(!is.null(fitT$chromosomeStats)); + .stop_if_not(!is.null(fitT$chromosomeStats)) # Extract the input data - data <- getLocusData(fitT); + data <- getLocusData(fitT) if (is.null(data)) { - throw("Cannot plot segmentation results. No input data available."); + throw("Cannot plot segmentation results. No input data available.") } # Subset of the loci? if (!is.null(subset) && subset < 1) { - n <- nrow(data); - keep <- sample(n, size=subset*n); - data <- data[keep,]; + n <- nrow(data) + keep <- sample(n, size=subset*n) + data <- data[keep,] } # To please R CMD check - CT <- y <- muN <- betaT <- betaN <- betaTN <- NULL; - rm(list=c("CT", "muN", "betaT", "betaN", "betaTN")); - attachLocally(data); - x <- xScale * x; - chrStats <- fitT$chromosomeStats; - chrStats <- chrStats[-nrow(chrStats),,drop=FALSE]; - chrRanges <- as.matrix(chrStats[,c("start","end")]); - vs <- xScale * chrRanges; - mids <- (vs[,1]+vs[,2])/2; - CT <- y; - - nbrOfLoci <- length(x); - chromosomes <- getChromosomes(fitT); - chrLabels <- sprintf("%02d", chromosomes); + CT <- y <- muN <- betaT <- betaN <- betaTN <- NULL + rm(list=c("CT", "muN", "betaT", "betaN", "betaTN")) + attachLocally(data) + x <- xScale * x + chrStats <- fitT$chromosomeStats + chrStats <- chrStats[-nrow(chrStats),,drop=FALSE] + chrRanges <- as.matrix(chrStats[,c("start","end")]) + vs <- xScale * chrRanges + mids <- (vs[,1]+vs[,2])/2 + CT <- y + + nbrOfLoci <- length(x) + chromosomes <- getChromosomes(fitT) + chrLabels <- sprintf("%02d", chromosomes) if (byIndex) { - xs <- seq_along(x); + xs <- seq_along(x) } else { - xs <- x; + xs <- x } if (!add && !is.null(mar)) { - par(mar=mar); + par(mar=mar) } - gh <- fitT; - gh$xScale <- xScale; + gh <- fitT + gh$xScale <- xScale - xlim <- xScale*range(chrRanges, na.rm=TRUE); + xlim <- xScale*range(chrRanges, na.rm=TRUE) - pchT <- if (scatter) { pch } else { NA }; + pchT <- if (scatter) { pch } else { NA } - plot(NA, xlim=xlim, ylim=Clim, xlab=xlab, ylab=Clab, axes=FALSE); - if (!is.null(onBegin)) onBegin(gh=gh); - points(xs, CT, pch=pchT, col=col, ...); - side <- rep(c(1,3), length.out=length(chrLabels)); - mtext(text=chrLabels, side=side, at=mids, line=0.1, cex=0.7*par("cex")); + plot(NA, xlim=xlim, ylim=Clim, xlab=xlab, ylab=Clab, axes=FALSE) + if (!is.null(onBegin)) onBegin(gh=gh) + points(xs, CT, pch=pchT, col=col, ...) + side <- rep(c(1,3), length.out=length(chrLabels)) + mtext(text=chrLabels, side=side, at=mids, line=0.1, cex=0.7*par("cex")) if (boundaries) { - abline(v=vs, lty=3); + abline(v=vs, lty=3) } - axis(side=2); - box(); + axis(side=2) + box() if (levels) { - drawLevels(fitT, col=meanCol, xScale=xScale, byIndex=byIndex); + drawLevels(fitT, col=meanCol, xScale=xScale, byIndex=byIndex) } - if (!is.null(onEnd)) onEnd(gh=gh); + if (!is.null(onEnd)) onEnd(gh=gh) - invisible(gh); + invisible(gh) }, private=TRUE) # plotTracksManyChromosomes() - - - -############################################################################ -# HISTORY: -# 2013-10-14 -# o Now plotTracksManyChromosomes() for CBS gives a more informative -# error if 'Clim' is invalid. -# 2013-10-09 -# o BUG FIX: tileChromosomes() for CBS did not set "tiledChromosomes" -# attribute due to a typo. -# 2013-05-07 -# o Now tileChromosomes() no longer gives warnings on "max(i): no -# non-missing arguments to max; returning -Inf". -# 2011-12-06 -# o Now plotTracks() for CBS always returns an invisible object. -# 2011-12-03 -# o Added drawChangePoints() for CBS. -# 2011-10-23 -# o BUG FIX: highlightArmCalls() for CBS did not handle empty chromosomes. -# 2011-10-08 -# o Added drawChromosomes() for CBS. -# 2011-10-07 -# o Added highlightArmCalls() for CBS. -# 2011-10-06 -# o Now drawCentromeres() for CBS can also plot start and stop. -# o ROBUSTNESS: Now plotTracksManyChromosomes() extract (start,end) -# information by names (no longer assuming certain indices). -# 2011-09-07 -# o Added highlightLocusCalls(). -# 2011-09-06 -# o Added highlightCalls(). -# o Added getChromosomeOffsets(). -# 2011-09-01 -# o BUG FIX: plotTracksManyChromosomes() for CBS gave an error because -# internal variable 'CT' was not defined. -# o BUG FIX: tileChromosomes() for CBS not identify the chromosomes of -# the loci, and hence generated corrupt/missing values while tiling. -# 2010-11-19 -# o Created from PairedPSCBS.R. -############################################################################ diff -Nru r-cran-pscbs-0.63.0/R/CBS.PLOT.R r-cran-pscbs-0.64.0/R/CBS.PLOT.R --- r-cran-pscbs-0.63.0/R/CBS.PLOT.R 2017-06-28 14:32:39.000000000 +0000 +++ r-cran-pscbs-0.64.0/R/CBS.PLOT.R 2018-08-12 21:30:44.000000000 +0000 @@ -32,104 +32,108 @@ #*/########################################################################### setMethodS3("plotTracks", "CBS", function(x, scatter=TRUE, pch=20, col="gray", meanCol="purple", cex=1, grid=FALSE, Clim="auto", xScale=1e-6, Clab="auto", ..., byIndex=FALSE, mar=NULL, add=FALSE) { # To please R CMD check - fit <- x; + fit <- x # - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - # Validate arguments # - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - # Argument 'add': - add <- Arguments$getLogical(add); + add <- Arguments$getLogical(add) # Argument 'Clim': if (identical(Clim, "auto")) { - signalType <- getSignalType(fit); - ploidy <- ploidy(fit); + signalType <- getSignalType(fit) + ploidy <- ploidy(fit) Clim <- switch(signalType, "log2ratio" = c(-2,2) + c(-1,1)*ploidy/2, "ratio" = c(0,3*ploidy), - NULL - ); + { + x_name <- as.character(substitute(x)) + warning(sprintf("Setting default 'Clim' assuming the signal type is %s because signalType(%s) is unknown (%s). Use signalType(%s) <- %s to avoid this warning.", sQuote("ratio"), x_name, sQuote(signalType), x_name, sQuote("ratio"))) + c(0, 3 * ploidy) + } + ) ## NOTE: Don't understand why, but with this 'R CMD build' gives: ## "Error: processing vignette 'CBS.tex.rsp' failed with diagnostics: ## Failed to infer argument 'Clim' due to an unknown signalType(): NA" ## /HB 2013-10-14 ## if (!add && is.null(Clim)) { -## throw("Failed to infer argument 'Clim' due to an unknown signalType(): ", signalType); +## throw("Failed to infer argument 'Clim' due to an unknown signalType(): ", signalType) ## } } else if (!add) { Clim <- Arguments$getNumerics(Clim, length=c(2L,2L), - disallow=c("Inf", "NA", "NaN")); + disallow=c("Inf", "NA", "NaN")) } if (identical(Clab, "auto")) { - signalType <- getSignalType(fit); + signalType <- getSignalType(fit) Clab <- switch(signalType, "log2ratio" = "log2 CN ratio", "ratio" = "CN ratio", NULL - ); + ) } # Argument 'fit': if (nbrOfChromosomes(fit) > 1L) { - res <- plotTracksManyChromosomes(fit, scatter=scatter, pch=pch, col=col, cex=cex, meanCol=meanCol, Clim=Clim, xScale=xScale, Clab=Clab, ..., byIndex=byIndex, mar=mar, add=add); - return(invisible(res)); + res <- plotTracksManyChromosomes(fit, scatter=scatter, pch=pch, col=col, cex=cex, meanCol=meanCol, Clim=Clim, xScale=xScale, Clab=Clab, ..., byIndex=byIndex, mar=mar, add=add) + return(invisible(res)) } # Argument 'xScale': - xScale <- Arguments$getNumeric(xScale, range=c(0,Inf)); + xScale <- Arguments$getNumeric(xScale, range=c(0,Inf)) # Extract the input data - data <- getLocusData(fit); + data <- getLocusData(fit) if (is.null(data)) { - throw("Cannot plot segmentation results. No input data available."); + throw("Cannot plot segmentation results. No input data available.") } - chromosomes <- getChromosomes(fit); - chromosome <- chromosomes[1]; - x <- data$x; - CT <- data[,3]; - nbrOfLoci <- length(x); + chromosomes <- getChromosomes(fit) + chromosome <- chromosomes[1] + x <- data$x + CT <- data[,3] + nbrOfLoci <- length(x) # Extract the segmentation - segs <- getSegments(fit); + segs <- getSegments(fit) if (chromosome != 0) { - chrTag <- sprintf("Chr%02d", chromosome); + chrTag <- sprintf("Chr%02d", chromosome) } else { - chrTag <- ""; + chrTag <- "" } if (xScale != 1) { - x <- xScale * x; + x <- xScale * x } if (!add && !is.null(mar)) { - par(mar=mar); + par(mar=mar) } - pchT <- if (scatter) { pch } else { NA }; + pchT <- if (scatter) { pch } else { NA } - plot(x, CT, pch=pchT, cex=cex, col=col, ..., ylim=Clim, ylab=Clab); - stext(side=3, pos=1, chrTag); + plot(x, CT, pch=pchT, cex=cex, col=col, ..., ylim=Clim, ylab=Clab) + stext(side=3, pos=1, chrTag) if (grid) { - yrange <- par("usr")[3:4]; - yrange[1] <- floor(yrange[1]); - yrange[2] <- ceiling(yrange[2]); - abline(h=seq(from=yrange[1], to=yrange[2], by=2), lty=3, col="gray"); - abline(h=0, lty=1, col="black"); + yrange <- par("usr")[3:4] + yrange[1] <- floor(yrange[1]) + yrange[2] <- ceiling(yrange[2]) + abline(h=seq(from=yrange[1], to=yrange[2], by=2), lty=3, col="gray") + abline(h=0, lty=1, col="black") } - drawLevels(fit, col=meanCol, xScale=xScale); + drawLevels(fit, col=meanCol, xScale=xScale) - invisible(); + invisible() }) # plotTracks() setMethodS3("plot", "CBS", function(x, ...) { - plotTracks(x, ...); + plotTracks(x, ...) }, protected=TRUE) @@ -137,93 +141,93 @@ # - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - # Tile chromosomes # - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - fitT <- tileChromosomes(fit); + fitT <- tileChromosomes(fit) # Get segmentation results - segs <- as.data.frame(fitT); + segs <- as.data.frame(fitT) # Extract subset of segments - fields <- c("start", "end", "mean"); - segs <- segs[,fields, drop=FALSE]; - segs <- unique(segs); + fields <- c("start", "end", "mean") + segs <- segs[,fields, drop=FALSE] + segs <- unique(segs) # Reuse drawLevels() for the DNAcopy class - colnames(segs) <- c("loc.start", "loc.end", "seg.mean"); - dummy <- list(output=segs); - class(dummy) <- "DNAcopy"; - drawLevels(dummy, col=col, xScale=xScale, ...); + colnames(segs) <- c("loc.start", "loc.end", "seg.mean") + dummy <- list(output=segs) + class(dummy) <- "DNAcopy" + drawLevels(dummy, col=col, xScale=xScale, ...) }, protected=TRUE) setMethodS3("highlightCalls", "CBS", function(fit, pch=20, callCols=c(loss="red", gain="green", "amplification"="blue"), lwd=3, meanCol="purple", ..., xScale=1e-6, byIndex=FALSE, verbose=FALSE) { - segs <- getSegments(fit, splitter=FALSE); + segs <- getSegments(fit, splitter=FALSE) # - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - # Identify segment calls # - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - callFields <- grep("Call$", colnames(segs)); - callTypes <- gsub("Call$", "", colnames(segs)[callFields]); - nbrOfCalls <- length(callFields); + callFields <- grep("Call$", colnames(segs)) + callTypes <- gsub("Call$", "", colnames(segs)[callFields]) + nbrOfCalls <- length(callFields) # Nothing todo? if (nbrOfCalls == 0L) { - return(); + return() } # - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - # Tile chromosomes # - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - fitT <- tileChromosomes(fit); + fitT <- tileChromosomes(fit) # - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - # Highlight threshold levels # - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - params <- fit$params$callGainsAndLosses; - abline(h=params$muR, col="gray", lty=3); - abline(h=params$tauLoss, col=callCols["loss"], lty=3); - abline(h=params$tauGain, col=callCols["gain"], lty=3); + params <- fit$params$callGainsAndLosses + abline(h=params$muR, col="gray", lty=3) + abline(h=params$tauLoss, col=callCols["loss"], lty=3) + abline(h=params$tauGain, col=callCols["gain"], lty=3) # - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - # Highlight gains and losses # - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - dataT <- getLocusData(fitT); - segsT <- getSegments(fitT, splitter=FALSE); - chr <- dataT[,"chromosome"]; - x <- dataT[,"x"]; - y <- dataT[,3]; - nbrOfLoci <- nbrOfLoci(fitT); - nbrOfSegments <- nbrOfSegments(fitT); + dataT <- getLocusData(fitT) + segsT <- getSegments(fitT, splitter=FALSE) + chr <- dataT[,"chromosome"] + x <- dataT[,"x"] + y <- dataT[,3] + nbrOfLoci <- nbrOfLoci(fitT) + nbrOfSegments <- nbrOfSegments(fitT) # Not needed anymore - dataT <- NULL; + dataT <- NULL # For each non-neutral segment for (ss in seq_len(nbrOfSegments)) { - seg <- segsT[ss,]; + seg <- segsT[ss,] for (tt in seq_along(callTypes)) { - field <- callFields[tt]; - type <- callTypes[tt]; + field <- callFields[tt] + type <- callTypes[tt] # Called? - call <- seg[[field]]; + call <- seg[[field]] if (isTRUE(call)) { - col <- callCols[type]; - idxs <- which(chr == seg$chromosome & seg$start <= x & x <= seg$end); - idxs <- Arguments$getIndices(idxs, max=nbrOfLoci); + col <- callCols[type] + idxs <- which(chr == seg$chromosome & seg$start <= x & x <= seg$end) + idxs <- Arguments$getIndices(idxs, max=nbrOfLoci) if (byIndex) { - xs <- idxs; + xs <- idxs } else { - xs <- x[idxs] * xScale; + xs <- x[idxs] * xScale } - ys <- y[idxs]; - points(xs, ys, pch=pch, col=col, ...); - xx <- range(xs, na.rm=TRUE); - yy <- rep(seg$mean, times=2); - lines(xx, yy, lwd=lwd, col=meanCol); + ys <- y[idxs] + points(xs, ys, pch=pch, col=col, ...) + xx <- range(xs, na.rm=TRUE) + yy <- rep(seg$mean, times=2) + lines(xx, yy, lwd=lwd, col=meanCol) } } # for (tt ...) } # for (ss ...) @@ -232,57 +236,57 @@ setMethodS3("highlightLocusCalls", "CBS", function(fit, callPchs=c(negOutlier=25, posOutlier=24), callCols=c(negOutlier="blue", posOutlier="blue"), ..., xScale=1e-6, byIndex=FALSE, verbose=FALSE) { - data <- getLocusData(fit); + data <- getLocusData(fit) # - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - # Identify segment calls # - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - callFields <- grep("Call$", colnames(data)); - callTypes <- gsub("Call$", "", colnames(data)[callFields]); - nbrOfCalls <- length(callFields); + callFields <- grep("Call$", colnames(data)) + callTypes <- gsub("Call$", "", colnames(data)[callFields]) + nbrOfCalls <- length(callFields) # Nothing todo? if (nbrOfCalls == 0) { - return(); + return() } # - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - # Tile chromosomes # - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - fitT <- tileChromosomes(fit); + fitT <- tileChromosomes(fit) # - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - # Highlight gains and losses # - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - dataT <- getLocusData(fitT); - chr <- dataT[,"chromosome"]; - x <- dataT[,"x"]; - y <- dataT[,3]; - nbrOfLoci <- nbrOfLoci(fitT); + dataT <- getLocusData(fitT) + chr <- dataT[,"chromosome"] + x <- dataT[,"x"] + y <- dataT[,3] + nbrOfLoci <- nbrOfLoci(fitT) # For each non-neutral segment for (tt in seq_along(callTypes)) { - field <- callFields[tt]; - type <- callTypes[tt]; + field <- callFields[tt] + type <- callTypes[tt] - isCalled <- dataT[[field]]; - idxs <- which(isCalled); + isCalled <- dataT[[field]] + idxs <- which(isCalled) if (length(idxs) == 0L) { - next; + next } if (byIndex) { - xs <- idxs; + xs <- idxs } else { - xs <- x[idxs] * xScale; + xs <- x[idxs] * xScale } - ys <- y[idxs]; - pch <- callPchs[type]; - col <- callCols[type]; - points(xs, ys, pch=pch, col=col, ...); + ys <- y[idxs] + pch <- callPchs[type] + col <- callCols[type] + points(xs, ys, pch=pch, col=col, ...) } # for (tt ...) }, protected=TRUE) # highlightLocusCalls() @@ -291,7 +295,7 @@ setMethodS3("drawChromosomes", "CBS", function(x, lty=3, xScale=1e-6, ..., byIndex=FALSE, verbose=FALSE) { # To please R CMD check - fit <- x; + fit <- x # - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - # Validate arguments @@ -299,32 +303,32 @@ # Argument 'fit': # Argument 'xScale': - xScale <- Arguments$getNumeric(xScale, range=c(0,Inf)); + xScale <- Arguments$getNumeric(xScale, range=c(0,Inf)) # Argument 'verbose': - verbose <- Arguments$getVerbose(verbose); + verbose <- Arguments$getVerbose(verbose) if (verbose) { - pushState(verbose); - on.exit(popState(verbose)); + pushState(verbose) + on.exit(popState(verbose)) } # - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - # Tile chromosomes # - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - fitT <- tileChromosomes(fit); + fitT <- tileChromosomes(fit) # Sanity check - stopifnot(!is.null(fitT$chromosomeStats)); + .stop_if_not(!is.null(fitT$chromosomeStats)) - chrStats <- fitT$chromosomeStats; - chrStats <- chrStats[-nrow(chrStats),,drop=FALSE]; - chrRanges <- as.matrix(chrStats[,c("start","end")]); - vs <- xScale * chrRanges; - mids <- (vs[,1]+vs[,2])/2; - chromosomes <- getChromosomes(fitT); - chrLabels <- sprintf("%02d", chromosomes); - side <- rep(c(1,3), length.out=length(chrLabels)); - mtext(text=chrLabels, side=side, at=mids, line=0.1, cex=0.7*par("cex")); - abline(v=vs, lty=lty); + chrStats <- fitT$chromosomeStats + chrStats <- chrStats[-nrow(chrStats),,drop=FALSE] + chrRanges <- as.matrix(chrStats[,c("start","end")]) + vs <- xScale * chrRanges + mids <- (vs[,1]+vs[,2])/2 + chromosomes <- getChromosomes(fitT) + chrLabels <- sprintf("%02d", chromosomes) + side <- rep(c(1,3), length.out=length(chrLabels)) + mtext(text=chrLabels, side=side, at=mids, line=0.1, cex=0.7*par("cex")) + abline(v=vs, lty=lty) }, protected=TRUE) # drawChromosomes() @@ -334,132 +338,91 @@ # Validate arguments # - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - # Argument 'genomeData': - stopifnot(inherits(genomeData, "data.frame")); - stopifnot(is.element("chromosome", colnames(genomeData))); - stopifnot(is.element("centroStart", colnames(genomeData))); - stopifnot(is.element("centroEnd", colnames(genomeData))); + .stop_if_not(inherits(genomeData, "data.frame")) + .stop_if_not(is.element("chromosome", colnames(genomeData))) + .stop_if_not(is.element("centroStart", colnames(genomeData))) + .stop_if_not(is.element("centroEnd", colnames(genomeData))) # Calculate the midpoints of the centromeres - colnames(genomeData) <- tolower(gsub("centro", "", colnames(genomeData))); - genomeData$mid <- (genomeData$start + genomeData$end) / 2; + colnames(genomeData) <- tolower(gsub("centro", "", colnames(genomeData))) + genomeData$mid <- (genomeData$start + genomeData$end) / 2 # - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - # Tile chromosomes # - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - fitT <- tileChromosomes(fit); + fitT <- tileChromosomes(fit) # Sanity check - stopifnot(!is.null(fitT$chromosomeStats)); + .stop_if_not(!is.null(fitT$chromosomeStats)) - chrStats <- fitT$chromosomeStats; - offsets <- chrStats[,"start"] - chrStats[1,"start"]; + chrStats <- fitT$chromosomeStats + offsets <- chrStats[,"start"] - chrStats[1,"start"] # Centroid locations in the tiled space - offsetsT <- offsets[seq_len(nrow(genomeData))]; + offsetsT <- offsets[seq_len(nrow(genomeData))] - xx <- genomeData[,what,drop=FALSE]; - xx <- as.matrix(xx); - xx <- offsetsT + xx; + xx <- genomeData[,what,drop=FALSE] + xx <- as.matrix(xx) + xx <- offsetsT + xx - ats <- xScale * xx; + ats <- xScale * xx for (cc in seq_len(ncol(xx))) { - abline(v=ats[,cc], col=col, lty=lty, ...); + abline(v=ats[,cc], col=col, lty=lty, ...) } - invisible(ats); + invisible(ats) }, protected=TRUE) # drawCentromeres() setMethodS3("highlightArmCalls", "CBS", function(fit, genomeData, minFraction=0.95, callCols=c("loss"="red", "gain"="green"), xScale=1e-6, ...) { # To please/trick R CMD check - chromosome <- x <- NULL; rm(list=c("chromosome", "x")); + chromosome <- x <- NULL; rm(list=c("chromosome", "x")) - callStats <- callArms(fit, genomeData=genomeData, minFraction=minFraction); + callStats <- callArms(fit, genomeData=genomeData, minFraction=minFraction) - callTypes <- grep("Fraction", colnames(callStats), value=TRUE); - callTypes <- gsub("Fraction", "", callTypes); + callTypes <- grep("Fraction", colnames(callStats), value=TRUE) + callTypes <- gsub("Fraction", "", callTypes) - callTypes <- intersect(callTypes, c("loss", "gain")); + callTypes <- intersect(callTypes, c("loss", "gain")) # Adjust (start, end) - offsets <- getChromosomeOffsets(fit); - offsets <- offsets[callStats[,"chromosome"]]; - callStats[,c("start","end")] <- offsets + callStats[,c("start","end")]; + offsets <- getChromosomeOffsets(fit) + offsets <- offsets[callStats[,"chromosome"]] + callStats[,c("start","end")] <- offsets + callStats[,c("start","end")] - nbrOfRegions <- nrow(callStats); + nbrOfRegions <- nrow(callStats) # Nothing todo? if (nbrOfRegions == 0) { - return(invisible(callStats)); + return(invisible(callStats)) } - usr <- par("usr"); - dy <- diff(usr[3:4]); - yy <- usr[3]+c(0,0.05*dy); - abline(h=usr[3]+0.95*0.05*dy, lty=1, col="gray"); + usr <- par("usr") + dy <- diff(usr[3:4]) + yy <- usr[3]+c(0,0.05*dy) + abline(h=usr[3]+0.95*0.05*dy, lty=1, col="gray") - xx <- callStats[,c("start", "end")]; - xx <- as.matrix(xx); - xx <- xx * xScale; + xx <- callStats[,c("start", "end")] + xx <- as.matrix(xx) + xx <- xx * xScale for (type in callTypes) { - col <- callCols[type]; - keyA <- sprintf("%sFraction", type); - keyB <- sprintf("%sCall", type); + col <- callCols[type] + keyA <- sprintf("%sFraction", type) + keyB <- sprintf("%sCall", type) for (kk in seq_len(nbrOfRegions)) { - xs <- xx[kk,]; - score <- callStats[kk, keyA]; + xs <- xx[kk,] + score <- callStats[kk, keyA] if (is.finite(score) && score > 0) { - ys <- rep(yy[1]+callStats[kk, keyA]*0.05*dy, times=2); - lines(x=xs, y=ys, col=col); - call <- callStats[kk, keyB]; + ys <- rep(yy[1]+callStats[kk, keyA]*0.05*dy, times=2) + lines(x=xs, y=ys, col=col) + call <- callStats[kk, keyB] if (call) { - rect(xs[1], yy[1], xs[2], yy[2], col=col, border=NA); + rect(xs[1], yy[1], xs[2], yy[2], col=col, border=NA) } } } } # for (type ...) - invisible(callStats); -}, protected=TRUE); # highlightArmCalls() - - - -############################################################################ -# HISTORY: -# 2013-11-23 -# o Added "dummy" 'byIndex' argument to drawLevels() for CBS to avoid -# that argument being passed down lines(). -# 2013-10-14 -# o Now plotTracks() for CBS gives a more informative error if 'Clim' -# is invalid or "auto" and could not be inferred due to an unknown -# or unset signal type. -# 2013-04-18 -# o Now drawLevels() also works for multiple chromosomes. -# 2011-12-06 -# o Now plotTracks() for CBS always returns an invisible object. -# 2011-12-03 -# o Added drawChangePoints() for CBS. -# 2011-10-23 -# o BUG FIX: highlightArmCalls() for CBS did not handle empty chromosomes. -# 2011-10-08 -# o Added drawChromosomes() for CBS. -# 2011-10-07 -# o Added highlightArmCalls() for CBS. -# 2011-10-06 -# o Now drawCentromeres() for CBS can also plot start and stop. -# o ROBUSTNESS: Now plotTracksManyChromosomes() extract (start,end) -# information by names (no longer assuming certain indices). -# 2011-09-07 -# o Added highlightLocusCalls(). -# 2011-09-06 -# o Added highlightCalls(). -# o Added getChromosomeOffsets(). -# 2011-09-01 -# o BUG FIX: plotTracksManyChromosomes() for CBS gave an error because -# internal variable 'CT' was not defined. -# o BUG FIX: tileChromosomes() for CBS not identify the chromosomes of -# the loci, and hence generated corrupt/missing values while tiling. -# 2010-11-19 -# o Created from PairedPSCBS.R. -############################################################################ + invisible(callStats) +}, protected=TRUE) # highlightArmCalls() diff -Nru r-cran-pscbs-0.63.0/R/CBS.PRUNE.R r-cran-pscbs-0.64.0/R/CBS.PRUNE.R --- r-cran-pscbs-0.63.0/R/CBS.PRUNE.R 2017-06-28 14:32:39.000000000 +0000 +++ r-cran-pscbs-0.64.0/R/CBS.PRUNE.R 2018-08-12 21:30:44.000000000 +0000 @@ -44,187 +44,174 @@ #*/########################################################################### setMethodS3("pruneBySdUndo", "CBS", function(fit, rho=3, sigma="DNAcopy", ..., verbose=FALSE) { # Local copies of DNAcopy functions - DNAcopy_changepoints.sdundo <- .use("changepoints.sdundo", package="DNAcopy"); + DNAcopy_changepoints.sdundo <- .use("changepoints.sdundo", package="DNAcopy") # - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - # Validate arguments # - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - # Argument 'rho': - rho <- Arguments$getDouble(rho, range=c(0,Inf)); + rho <- Arguments$getDouble(rho, range=c(0,Inf)) # Argument 'sigma': if (is.character(sigma)) { - sigma <- estimateStandardDeviation(fit, method=sigma, ...); + sigma <- estimateStandardDeviation(fit, method=sigma, ...) } - sigma <- Arguments$getDouble(sigma, range=c(0,Inf), disallow=c("NA", "NaN", "Inf")); + sigma <- Arguments$getDouble(sigma, range=c(0,Inf), disallow=c("NA", "NaN", "Inf")) # Argument 'verbose': - verbose <- Arguments$getVerbose(verbose); + verbose <- Arguments$getVerbose(verbose) if (verbose) { - pushState(verbose); - on.exit(popState(verbose)); + pushState(verbose) + on.exit(popState(verbose)) } - verbose && enter(verbose, "Pruning segments by standard deviation"); + verbose && enter(verbose, "Pruning segments by standard deviation") # Check if locus weights are available - data <- getLocusData(fit); - hasWeights <- !is.null(data$w); + data <- getLocusData(fit) + hasWeights <- !is.null(data$w) # Not needed anymore - data <- NULL; + data <- NULL # - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - # Prune chromosome by chromosome # - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - chromosomes <- getChromosomes(fit); - nbrOfChromosomes <- length(chromosomes); + chromosomes <- getChromosomes(fit) + nbrOfChromosomes <- length(chromosomes) - fitList <- vector("list", length=nbrOfChromosomes); + fitList <- vector("list", length=nbrOfChromosomes) for (cc in seq_len(nbrOfChromosomes)) { - chr <- chromosomes[cc]; + chr <- chromosomes[cc] verbose && enter(verbose, sprintf("Chromosome #%d ('Chr%s') of %d", - cc, chr, length(chromosomes))); + cc, chr, length(chromosomes))) # Extract this chromosome - fitT <- extractChromosome(fit, chromosome=chr); + fitT <- extractChromosome(fit, chromosome=chr) # - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - # Get segmentation data # - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - data <- getLocusData(fitT); - segs <- getSegments(fitT); - segRows <- fitT$segRows; - nbrOfSegs <- nrow(segRows); - verbose && cat(verbose, "Number of segments (before): ", nbrOfSegs); + data <- getLocusData(fitT) + segs <- getSegments(fitT) + segRows <- fitT$segRows + nbrOfSegs <- nrow(segRows) + verbose && cat(verbose, "Number of segments (before): ", nbrOfSegs) # - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - # Drop missing values # - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - y <- data$y; + y <- data$y # Label data points by their segment index - segId <- rep(NA_integer_, times=max(segRows[,2], na.rm=TRUE)); + segId <- rep(NA_integer_, times=max(segRows[,2], na.rm=TRUE)) for (rr in 1:nbrOfSegs) { - segRow <- unlist(segRows[rr,], use.names=FALSE); - idxs <- segRow[1]:segRow[2]; - segId[idxs] <- rr; + segRow <- unlist(segRows[rr,], use.names=FALSE) + idxs <- segRow[1]:segRow[2] + segId[idxs] <- rr } # Drop missing value - keep <- !is.na(y); + keep <- !is.na(y) if (hasWeights) { - w <- data$w; - keep <- keep & !is.na(w); + w <- data$w + keep <- keep & !is.na(w) } - units <- which(keep); - y <- y[units]; - segId <- segId[units]; + units <- which(keep) + y <- y[units] + segId <- segId[units] if (hasWeights) { - w <- w[units]; + w <- w[units] } # Update 'segRows' accordingly for (rr in 1:nbrOfSegs) { - startStop <- range(which(segId == rr)); - segRows[rr,1] <- startStop[1]; - segRows[rr,2] <- startStop[2]; + startStop <- range(which(segId == rr)) + segRows[rr,1] <- startStop[1] + segRows[rr,2] <- startStop[2] } # Not needed anymore - segId <- startStop <- NULL; + segId <- startStop <- NULL # Sanity check - stopifnot(max(segRows[,2]) <= length(y)); + .stop_if_not(max(segRows[,2]) <= length(y)) # - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - # Prune change points # - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - segLengths <- segRows[,2] - segRows[,1] + 1L; + segLengths <- segRows[,2] - segRows[,1] + 1L segLengthsP <- DNAcopy_changepoints.sdundo(genomdat=y, - lseg=segLengths, trimmed.SD=sigma, change.SD=rho); - segLengthsP <- as.integer(segLengthsP); - nbrOfSegsP <- length(segLengthsP); - verbose && cat(verbose, "Number of segments (after): ", nbrOfSegsP); + lseg=segLengths, trimmed.SD=sigma, change.SD=rho) + segLengthsP <- as.integer(segLengthsP) + nbrOfSegsP <- length(segLengthsP) + verbose && cat(verbose, "Number of segments (after): ", nbrOfSegsP) - nbrOfPrunedSegs <- nbrOfSegs-nbrOfSegsP; - verbose && cat(verbose, "Number of segments dropped: ", nbrOfPrunedSegs); + nbrOfPrunedSegs <- nbrOfSegs-nbrOfSegsP + verbose && cat(verbose, "Number of segments dropped: ", nbrOfPrunedSegs) # No segments pruned? if (nbrOfPrunedSegs == 0) { # Sanity check - stopifnot(identical(segLengthsP, segLengths)); + .stop_if_not(identical(segLengthsP, segLengths)) - fitList[[cc]] <- fitT; - verbose && cat(verbose, "Nothing to changed. Skipping."); -# verbose && exit(verbose); -# next; + fitList[[cc]] <- fitT + verbose && cat(verbose, "Nothing to changed. Skipping.") +# verbose && exit(verbose) +# next } # Setup new 'segRows' - endRow <- cumsum(segLengthsP); - n <- length(endRow); - segRowsP <- data.frame(startRow=c(1L, endRow[-n]+1L), endRow=endRow); + endRow <- cumsum(segLengthsP) + n <- length(endRow) + segRowsP <- data.frame(startRow=c(1L, endRow[-n]+1L), endRow=endRow) # Expand to units with also missing values - segRowsP[,1] <- units[segRowsP[,1]]; - segRowsP[,2] <- units[segRowsP[,2]]; + segRowsP[,1] <- units[segRowsP[,1]] + segRowsP[,2] <- units[segRowsP[,2]] # - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - # Create stub for a segment table # - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - idxs <- seq_len(nbrOfSegsP); - segsP <- segs[idxs,]; + idxs <- seq_len(nbrOfSegsP) + segsP <- segs[idxs,] # Sanity checks if (nbrOfPrunedSegs == 0) { - segRows <- fitT$segRows; - stopifnot(all.equal(segRowsP, segRows, check.attributes=FALSE)); - stopifnot(all.equal(segsP, segs, check.attributes=FALSE)); + segRows <- fitT$segRows + .stop_if_not(all.equal(segRowsP, segRows, check.attributes=FALSE)) + .stop_if_not(all.equal(segsP, segs, check.attributes=FALSE)) } - fitT$output <- segsP; - fitT$segRows <- segRowsP; + fitT$output <- segsP + fitT$segRows <- segRowsP - fitList[[cc]] <- fitT; + fitList[[cc]] <- fitT - verbose && exit(verbose); + verbose && exit(verbose) } # for (cc ...) - fitP <- Reduce(append, fitList); + ## formerly Reduce() w/ append(..., addSplit = TRUE) + fitP <- do.call(c, args = c(fitList, addSplit = TRUE)) - verbose && enter(verbose, "Updating segment means and boundaries"); - fitP <- updateBoundaries(fitP, verbose=less(verbose, 50)); - fitP <- updateMeans(fitP, verbose=less(verbose, 50)); - verbose && exit(verbose); - - nbrOfSegs <- nbrOfSegments(fit); - nbrOfSegsP <- nbrOfSegments(fitP); - nbrOfPrunedSegs <- nbrOfSegs-nbrOfSegsP; - verbose && cat(verbose, "Number of segments (before): ", nbrOfSegs); - verbose && cat(verbose, "Number of segments (after): ", nbrOfSegsP); - verbose && cat(verbose, "Number of segments dropped: ", nbrOfPrunedSegs); + verbose && enter(verbose, "Updating segment means and boundaries") + fitP <- updateBoundaries(fitP, verbose=less(verbose, 50)) + fitP <- updateMeans(fitP, verbose=less(verbose, 50)) + verbose && exit(verbose) + + nbrOfSegs <- nbrOfSegments(fit) + nbrOfSegsP <- nbrOfSegments(fitP) + nbrOfPrunedSegs <- nbrOfSegs-nbrOfSegsP + verbose && cat(verbose, "Number of segments (before): ", nbrOfSegs) + verbose && cat(verbose, "Number of segments (after): ", nbrOfSegsP) + verbose && cat(verbose, "Number of segments dropped: ", nbrOfPrunedSegs) - verbose && exit(verbose); + verbose && exit(verbose) - fitP; + fitP }) # pruneBySdUndo() setMethodS3("seqOfSegmentsByDP", "CBS", function(fit, by=c("y"), ...) { - NextMethod("seqOfSegmentsByDP", by=by); + NextMethod("seqOfSegmentsByDP", by=by) }) - - -############################################################################ -# HISTORY: -# 2012-09-13 -# o Added seqOfSegmentsByDP() for CBS. -# 2011-12-06 -# o BUG FIX: pruneBySdUndo() for CBS did not work with more than one array. -# 2011-11-16 -# o Added Rdoc comments. -# 2011-11-15 -# o Added pruneBySdUndo() for CBS. -# o Created. -############################################################################ diff -Nru r-cran-pscbs-0.63.0/R/CBS.R r-cran-pscbs-0.64.0/R/CBS.R --- r-cran-pscbs-0.63.0/R/CBS.R 2017-06-28 14:32:39.000000000 +0000 +++ r-cran-pscbs-0.64.0/R/CBS.R 2018-08-12 21:30:44.000000000 +0000 @@ -34,32 +34,32 @@ # @author "HB" #*/########################################################################### setConstructorS3("CBS", function(...) { - extend(AbstractCBS(list(data=NULL, output=NULL), ...), "CBS"); + extend(AbstractCBS(list(data=NULL, output=NULL), ...), "CBS") }) setMethodS3("all.equal", "CBS", function(target, current, check.attributes=FALSE, ...) { # Compare class attributes - res <- all.equal(class(target), class(current)); + res <- all.equal(class(target), class(current)) if (!isTRUE(res)) { - return(res); + return(res) } # WORKAROUND: segmentByCBS() return getSegments(fit)$id without NA:s for # splitters, unless append() is used. # TO DO: Fix segmentByCBS() /HB 2011-10-08 - segs <- getSegments(target); + segs <- getSegments(target) if (nrow(segs) > 0) { - isSplitter <- isSegmentSplitter(target); - segs[isSplitter, "sampleName"] <- NA; - target$output <- segs; + isSplitter <- isSegmentSplitter(target) + segs[isSplitter, "sampleName"] <- NA + target$output <- segs } - segs <- getSegments(current); + segs <- getSegments(current) if (nrow(segs) > 0) { - isSplitter <- isSegmentSplitter(current); - segs[isSplitter, "sampleName"] <- NA; - current$output <- segs; + isSplitter <- isSegmentSplitter(current) + segs[isSplitter, "sampleName"] <- NA + current$output <- segs } # NOTE: Here arguments 'target' and 'current' are lists and does not @@ -67,7 +67,7 @@ # If passed explicity, note that they must be named *and* that the # first/dispatch argument have to be passed as 'object=target' # (and never as 'target=target'). /HB 2014-02-03 - NextMethod("all.equal", object=target, current=current, check.attributes=check.attributes); + NextMethod("all.equal", object=target, current=current, check.attributes=check.attributes) }, protected=TRUE) @@ -103,100 +103,100 @@ #*/########################################################################### setMethodS3("as.character", "CBS", function(x, ...) { # To please R CMD check - fit <- x; + fit <- x - s <- sprintf("%s:", class(fit)[1]); + s <- sprintf("%s:", class(fit)[1]) - s <- c(s, sprintf("Sample name: %s", getSampleName(fit))); + s <- c(s, sprintf("Sample name: %s", getSampleName(fit))) - s <- c(s, sprintf("Signal type: %s", getSignalType(fit))); + s <- c(s, sprintf("Signal type: %s", getSignalType(fit))) - s <- c(s, sprintf("Number of segments: %d", nbrOfSegments(fit))); + s <- c(s, sprintf("Number of segments: %d", nbrOfSegments(fit))) - s <- c(s, sprintf("Number of loci: %d", nbrOfLoci(fit))); + s <- c(s, sprintf("Number of loci: %d", nbrOfLoci(fit))) - n <- getSegments(fit)$nbrOfLoci; - q <- quantile(n, probs=c(0.00, 0.05, 0.25, 0.50, 0.75, 0.95, 1.00), na.rm=TRUE); - qs <- sprintf("%g [%s]", q, names(q)); - s <- c(s, sprintf("Number of loci per segment: %s", paste(qs, collapse=", "))); + n <- getSegments(fit)$nbrOfLoci + q <- quantile(n, probs=c(0.00, 0.05, 0.25, 0.50, 0.75, 0.95, 1.00), na.rm=TRUE) + qs <- sprintf("%g [%s]", q, names(q)) + s <- c(s, sprintf("Number of loci per segment: %s", paste(qs, collapse=", "))) - chrs <- getChromosomes(fit); - s <- c(s, sprintf("Chromosomes: [%d] %s", length(chrs), hpaste(chrs))); + chrs <- getChromosomes(fit) + s <- c(s, sprintf("Chromosomes: [%d] %s", length(chrs), hpaste(chrs))) - s <- c(s, sprintf("Standard deviation: %g", estimateStandardDeviation(fit))); + s <- c(s, sprintf("Standard deviation: %g", estimateStandardDeviation(fit))) - tt <- grep("Call$", colnames(getLocusData(fit)), value=TRUE); - s <- c(s, sprintf("Locus calls: [%d] %s", length(tt), hpaste(tt))); + tt <- grep("Call$", colnames(getLocusData(fit)), value=TRUE) + s <- c(s, sprintf("Locus calls: [%d] %s", length(tt), hpaste(tt))) - segs <- getSegments(fit); - callCols <- grep("Call$", colnames(segs), value=TRUE); - callTypes <- gsub("Call$", "", callCols); - s <- c(s, sprintf("Types of segment calls: [%d] %s", length(callTypes), hpaste(callTypes))); + segs <- getSegments(fit) + callCols <- grep("Call$", colnames(segs), value=TRUE) + callTypes <- gsub("Call$", "", callCols) + s <- c(s, sprintf("Types of segment calls: [%d] %s", length(callTypes), hpaste(callTypes))) for (kk in seq_along(callCols)) { - key <- callCols[kk]; - type <- callTypes[kk]; - n <- sum(segs[,key], na.rm=TRUE); + key <- callCols[kk] + type <- callTypes[kk] + n <- sum(segs[,key], na.rm=TRUE) if (type == "loss") { - nC <- sum(isWholeChromosomeLost(fit)); + nC <- sum(isWholeChromosomeLost(fit)) } else if (type == "gain") { - nC <- sum(isWholeChromosomeGained(fit)); + nC <- sum(isWholeChromosomeGained(fit)) } else { - nC <- NA; + nC <- NA } - s <- c(s, sprintf("Number of chromosomes (segments) called '%s': %d (%d)", type, nC, n)); + s <- c(s, sprintf("Number of chromosomes (segments) called '%s': %d (%d)", type, nC, n)) } - GenericSummary(s); + GenericSummary(s) }, protected=TRUE) setMethodS3("as.data.frame", "CBS", function(x, ...) { - getSegments(x, splitter=FALSE, ...); + getSegments(x, splitter=FALSE, ...) }, protected=TRUE) setMethodS3("getSignalType", "CBS", function(fit, ...) { - type <- fit$signalType; - if (is.null(type)) type <- as.character(NA); - type; + type <- fit$signalType + if (is.null(type)) type <- as.character(NA) + type }, protected=TRUE) setMethodS3("signalType", "CBS", function(fit, ...) { - getSignalType(fit); + getSignalType(fit) }, protected=TRUE) "signalType<-" <- function(x, value) { - UseMethod("signalType<-"); + UseMethod("signalType<-") } setMethodS3("signalType<-", "CBS", function(x, value) { - fit <- x; + fit <- x # Argument 'value': - value <- Arguments$getCharacter(value); + value <- Arguments$getCharacter(value) - fit$signalType <- value; - fit; + fit$signalType <- value + fit }, private=TRUE, addVarArgs=FALSE) setMethodS3("getLocusSignalNames", "CBS", function(fit, ...) { - data <- fit$data; - names <- colnames(data); + data <- fit$data + names <- colnames(data) if (is.element("y", names)) { - return("y"); + return("y") } else if (is.element("CT", names)) { - return("CT"); + return("CT") } - throw("INTERNAL ERROR: Unknown locus signal names: ", paste(names, collapse=", ")); + throw("INTERNAL ERROR: Unknown locus signal names: ", paste(names, collapse=", ")) }, protected=TRUE) setMethodS3("getSegmentTrackPrefixes", "CBS", function(fit, ...) { - c(""); + c("") }, protected=TRUE) @@ -206,138 +206,138 @@ # - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - # Argument 'indices': if (!is.null(indices)) { - indices <- Arguments$getIndices(indices); + indices <- Arguments$getIndices(indices) } # Argument 'addCalls': if (is.logical(addCalls)) { - addCalls <- Arguments$getLogical(addCalls); + addCalls <- Arguments$getLogical(addCalls) if (!addCalls) { - addCalls <- NULL; + addCalls <- NULL } } else { - addCalls <- Arguments$getCharacters(addCalls); + addCalls <- Arguments$getCharacters(addCalls) } - data <- fit$data; + data <- fit$data # Append segment calls? if (length(addCalls) > 0) { - callsL <- extractCallsByLocus(fit); + callsL <- extractCallsByLocus(fit) if (is.character(addCalls)) { - callsL <- callsL[,addCalls]; + callsL <- callsL[,addCalls] } # Sanity check - stopifnot(nrow(callsL) == nrow(data)); + .stop_if_not(nrow(callsL) == nrow(data)) - data <- cbind(data, callsL); + data <- cbind(data, callsL) } # Return requested indices if (!is.null(indices)) { # Map of final indices to current indices - map <- match(indices, data$index); + map <- match(indices, data$index) # Extract/expand... - data <- data[map,]; - rownames(data) <- NULL; + data <- data[map,] + rownames(data) <- NULL # Sanity check - stopifnot(nrow(data) == length(indices)); + .stop_if_not(nrow(data) == length(indices)) } - data; + data }, private=TRUE) # getLocusData() setMethodS3("isSegmentSplitter", "CBS", function(fit, ...) { - segs <- fit$output; + segs <- fit$output - isSplitter <- lapply(segs[-1], FUN=is.na); - isSplitter <- Reduce("&", isSplitter); + isSplitter <- lapply(segs[-1], FUN=is.na) + isSplitter <- Reduce("&", isSplitter) - isSplitter; + isSplitter }, protected=TRUE) setMethodS3("getSegments", "CBS", function(fit, simplify=FALSE, splitters=TRUE, addGaps=FALSE, ...) { # Argument 'splitters': - splitters <- Arguments$getLogical(splitters); + splitters <- Arguments$getLogical(splitters) - segs <- fit$output; + segs <- fit$output - isSplitter <- isSegmentSplitter(fit); + isSplitter <- isSegmentSplitter(fit) # Add 'sampleName' column? if (nrow(segs) > 0) { - sampleName <- rep(getSampleName(fit), times=nrow(segs)); - sampleName[isSplitter] <- as.character(NA); + sampleName <- rep(getSampleName(fit), times=nrow(segs)) + sampleName[isSplitter] <- as.character(NA) if (!is.element("sampleName", colnames(segs))) { - segs <- cbind(sampleName=I(sampleName), segs); + segs <- cbind(sampleName=I(sampleName), segs) } else { - segs[,"sampleName"] <- sampleName; + segs[,"sampleName"] <- sampleName } } # Drop chromosome splitters? if (!splitters) { - segs <- segs[!isSplitter,]; + segs <- segs[!isSplitter,] } # Add splitters for "gaps"... if (splitters && addGaps) { # Chromosome gaps - n <- nrow(segs); - chrs <- segs$chromosome; - gapsAfter <- which(diff(chrs) != 0L); - gapsAfter <- gapsAfter[!is.na(chrs[gapsAfter])]; - nGaps <- length(gapsAfter); + n <- nrow(segs) + chrs <- segs$chromosome + gapsAfter <- which(diff(chrs) != 0L) + gapsAfter <- gapsAfter[!is.na(chrs[gapsAfter])] + nGaps <- length(gapsAfter) if (nGaps > 0L) { - idxs <- seq_len(n); - values <- rep(NA_integer_, times=nGaps); - idxs <- insert(idxs, ats=gapsAfter+1L, values=values); - segs <- segs[idxs,]; + idxs <- seq_len(n) + values <- rep(NA_integer_, times=nGaps) + idxs <- insert(idxs, ats=gapsAfter+1L, values=values) + segs <- segs[idxs,] } # Other gaps - n <- nrow(segs); - chrs <- segs$chromosome; - starts <- segs$tcnStart[-1L]; - ends <- segs$tcnEnd[-n]; - gapsAfter <- which(starts != ends); - onSameChr <- (chrs[gapsAfter+1L] == chrs[gapsAfter] ); - gapsAfter <- gapsAfter[onSameChr]; - nGaps <- length(gapsAfter); + n <- nrow(segs) + chrs <- segs$chromosome + starts <- segs$tcnStart[-1L] + ends <- segs$tcnEnd[-n] + gapsAfter <- which(starts != ends) + onSameChr <- (chrs[gapsAfter+1L] == chrs[gapsAfter] ) + gapsAfter <- gapsAfter[onSameChr] + nGaps <- length(gapsAfter) if (nGaps > 0L) { - idxs <- seq_len(n); - values <- rep(NA_integer_, times=nGaps); - idxs <- insert(idxs, ats=gapsAfter+1L, values=values); - segs <- segs[idxs,]; + idxs <- seq_len(n) + values <- rep(NA_integer_, times=nGaps) + idxs <- insert(idxs, ats=gapsAfter+1L, values=values) + segs <- segs[idxs,] } } - segs; + segs }, private=TRUE) setMethodS3("getChangePoints", "CBS", function(fit, ...) { # Already available? - cps <- fit$changepoints; - if (!is.null(cps)) return(cps); + cps <- fit$changepoints + if (!is.null(cps)) return(cps) - segs <- getSegments(fit, splitters=TRUE); - tcn <- segs[["mean"]]; - n <- length(tcn); + segs <- getSegments(fit, splitters=TRUE) + tcn <- segs[["mean"]] + n <- length(tcn) # Calculate observed (d) data - D <- tcn[-n] - tcn[-1L]; + D <- tcn[-n] - tcn[-1L] cps <- data.frame( d = D - ); + ) - cps; + cps }, private=TRUE) # getChangePoints() @@ -347,101 +347,101 @@ # Validate arguments # - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - # Argument 'verbose': - verbose <- Arguments$getVerbose(verbose); + verbose <- Arguments$getVerbose(verbose) if (verbose) { - pushState(verbose); - on.exit(popState(verbose)); + pushState(verbose) + on.exit(popState(verbose)) } - verbose && enter(verbose, "Updating boundaries"); + verbose && enter(verbose, "Updating boundaries") verbose && cat(verbose, "Number of segments: ", - nbrOfSegments(fit, splitters=FALSE)); + nbrOfSegments(fit, splitters=FALSE)) # - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - # Extract the data and segmentation results # - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - data <- getLocusData(fit); - segs <- getSegments(fit, splitters=TRUE); - segRows <- fit$segRows; - - nbrOfSegments <- nrow(segs); - chromosome <- data$chromosome; - x <- data$x; - y <- data$y; - w <- data$w; - hasWeights <- !is.null(w); + data <- getLocusData(fit) + segs <- getSegments(fit, splitters=TRUE) + segRows <- fit$segRows + + nbrOfSegments <- nrow(segs) + chromosome <- data$chromosome + x <- data$x + y <- data$y + w <- data$w + hasWeights <- !is.null(w) # - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - # Update segments # - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - for (ss in seq_len(nbrOfSegments)) { - verbose && enter(verbose, sprintf("Segment %d of %d", ss, nbrOfSegments)); - segRow <- segRows[ss,]; - seg <- segs[ss,]; + verbose && enter(verbose, sprintf("Segment %d of %d", ss, nbrOfSegments)) + segRow <- segRows[ss,] + seg <- segs[ss,] # A splitter - nothing todo? if (is.na(segRow[[1]]) && is.na(segRow[[2]])) { - next; + next } # (a) Identify units (loci) - units <- segRow[[1]]:segRow[[2]]; - verbose && cat(verbose, "Loci:"); - verbose && str(verbose, units); + units <- segRow[[1]]:segRow[[2]] + verbose && cat(verbose, "Loci:") + verbose && str(verbose, units) # (b) Extract signals - ySS <- y[units]; - xSS <- x[units]; - cSS <- chromosome[units]; + ySS <- y[units] + xSS <- x[units] + cSS <- chromosome[units] if (hasWeights) { - wSS <- w[units]; + wSS <- w[units] } # (c) Drop missing values - keep <- (!is.na(ySS) & !is.na(xSS) & !is.na(cSS)); + keep <- (!is.na(ySS) & !is.na(xSS) & !is.na(cSS)) if (hasWeights) { - keep <- keep & (!is.na(wSS) & wSS > 0); + keep <- keep & (!is.na(wSS) & wSS > 0) } - keep <- which(keep); - ySS <- ySS[keep]; - xSS <- xSS[keep]; - cSS <- cSS[keep]; + keep <- which(keep) + ySS <- ySS[keep] + xSS <- xSS[keep] + cSS <- cSS[keep] if (hasWeights) { - wSS <- wSS[keep]; + wSS <- wSS[keep] } - units <- units[keep]; - verbose && cat(verbose, "Loci (non-missing):"); - verbose && str(verbose, units); + units <- units[keep] + verbose && cat(verbose, "Loci (non-missing):") + verbose && str(verbose, units) # (d) Identify (chromosome, start, stop) - stopifnot(all(cSS == cSS[1])); - cSS <- cSS[1]; - xRange <- range(xSS, na.rm=TRUE); - verbose && cat(verbose, "Range:"); - verbose && print(verbose, xRange); + .stop_if_not(all(cSS == cSS[1])) + cSS <- cSS[1] + xRange <- range(xSS, na.rm=TRUE) + verbose && cat(verbose, "Range:") + verbose && print(verbose, xRange) # (e) Update segment information - seg$chromosome <- cSS; - seg$start <- xRange[1]; - seg$end <- xRange[2]; + seg$chromosome <- cSS + seg$start <- xRange[1] + seg$end <- xRange[2] - segs[ss,] <- seg; + segs[ss,] <- seg - verbose && exit(verbose); + verbose && exit(verbose) } # for (ss ...) # Update results - res <- fit; - res$output <- segs; + res <- fit + res$output <- segs # Rejoin segments? if (isTRUE(res$params$joinSegments)) { - res <- joinSegments(res, verbose=less(verbose,10)); + res <- joinSegments(res, verbose=less(verbose,10)) } - verbose && exit(verbose); + verbose && exit(verbose) - res; + res }, protected=TRUE) # updateBoundaries() @@ -451,54 +451,54 @@ # Validate arguments # - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - # Argument 'avg': - avg <- match.arg(avg); + avg <- match.arg(avg) # Argument 'verbose': - verbose <- Arguments$getVerbose(verbose); + verbose <- Arguments$getVerbose(verbose) if (verbose) { - pushState(verbose); - on.exit(popState(verbose)); + pushState(verbose) + on.exit(popState(verbose)) } - verbose && enter(verbose, "Updating mean level estimates"); + verbose && enter(verbose, "Updating mean level estimates") verbose && cat(verbose, "Number of segments: ", - nbrOfSegments(fit, splitters=FALSE)); + nbrOfSegments(fit, splitters=FALSE)) # - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - # Extract the data and segmentation results # - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - data <- getLocusData(fit); - segs <- getSegments(fit, splitters=TRUE); - segRows <- fit$segRows; - - nbrOfSegments <- nrow(segs); - chromosome <- data$chromosome; - x <- data$x; - y <- data$y; - w <- data$w; - hasWeights <- !is.null(w); + data <- getLocusData(fit) + segs <- getSegments(fit, splitters=TRUE) + segRows <- fit$segRows + + nbrOfSegments <- nrow(segs) + chromosome <- data$chromosome + x <- data$x + y <- data$y + w <- data$w + hasWeights <- !is.null(w) # - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - # Setting up averaging functions # - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - if (avg == "asis") { - est <- fit$params$meanEstimators; - avg <- est$y; - if (is.null(avg)) avg <- "mean"; - avg <- match.arg(avg); + est <- fit$params$meanEstimators + avg <- est$y + if (is.null(avg)) avg <- "mean" + avg <- match.arg(avg) } if (hasWeights) { if(avg == "mean") { - avgFUN <- weighted.mean; + avgFUN <- weighted.mean } else if(avg == "median") { - avgFUN <- weightedMedian; + avgFUN <- weightedMedian } else { - throw("Value of argument 'avg' is not supported with weights: ", avg); + throw("Value of argument 'avg' is not supported with weights: ", avg) } } else { - avgFUN <- get(avg, mode="function"); + avgFUN <- get(avg, mode="function") } @@ -506,65 +506,65 @@ # Update segments # - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - for (ss in seq_len(nbrOfSegments)) { - verbose && enter(verbose, sprintf("Segment %d of %d", ss, nbrOfSegments)); - segRow <- segRows[ss,]; - seg <- segs[ss,]; + verbose && enter(verbose, sprintf("Segment %d of %d", ss, nbrOfSegments)) + segRow <- segRows[ss,] + seg <- segs[ss,] # A splitter - nothing todo? if (!is.finite(segRow[[1]]) || !is.finite(segRow[[2]])) { - next; + next } # (a) Identify units (loci) - units <- segRow[[1]]:segRow[[2]]; + units <- segRow[[1]]:segRow[[2]] # (b) Extract signals - ySS <- y[units]; + ySS <- y[units] if (hasWeights) { - wSS <- w[units]; + wSS <- w[units] } # (c) Drop missing values - keep <- (!is.na(ySS)); + keep <- (!is.na(ySS)) if (hasWeights) { - keep <- keep & (!is.na(wSS) & wSS > 0); + keep <- keep & (!is.na(wSS) & wSS > 0) } - keep <- which(keep); - ySS <- ySS[keep]; + keep <- which(keep) + ySS <- ySS[keep] if (hasWeights) { - wSS <- wSS[keep]; + wSS <- wSS[keep] } - units <- units[keep]; - nbrOfLoci <- length(units); + units <- units[keep] + nbrOfLoci <- length(units) # (d) Update mean if (hasWeights) { - wSS <- wSS / sum(wSS); - gamma <- avgFUN(ySS, w=wSS); + wSS <- wSS / sum(wSS) + gamma <- avgFUN(ySS, w=wSS) } else { - gamma <- avgFUN(ySS); + gamma <- avgFUN(ySS) } # Sanity check - stopifnot(nbrOfLoci == 0 || !is.na(gamma)); + .stop_if_not(nbrOfLoci == 0 || !is.na(gamma)) # (d) Update the segment statistics - seg$mean <- gamma; - seg$nbrOfLoci <- nbrOfLoci; + seg$mean <- gamma + seg$nbrOfLoci <- nbrOfLoci - segs[ss,] <- seg; + segs[ss,] <- seg - verbose && exit(verbose); + verbose && exit(verbose) } # for (ss ...) # Return results - res <- fit; - res$output <- segs; - res <- setMeanEstimators(res, y=avg); + res <- fit + res$output <- segs + res <- setMeanEstimators(res, y=avg) - verbose && exit(verbose); + verbose && exit(verbose) - res; + res }, protected=TRUE) # updateMeans() @@ -573,138 +573,76 @@ # Validate arguments # - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - # Argument 'verbose': - verbose <- Arguments$getVerbose(verbose); + verbose <- Arguments$getVerbose(verbose) if (verbose) { - pushState(verbose); - on.exit(popState(verbose)); + pushState(verbose) + on.exit(popState(verbose)) } - verbose && enter(verbose, "Resegmenting a ", class(fit)[1], " object"); - segFcnName <- "segmentByCBS"; - segFcn <- getMethodS3(segFcnName, "default"); + verbose && enter(verbose, "Resegmenting a ", class(fit)[1], " object") + segFcnName <- "segmentByCBS" + segFcn <- getMethodS3(segFcnName, "default") # Use the locus-level data of the segmentation object - data <- getLocusData(fit); - class(data) <- "data.frame"; - drop <- c("index"); - keep <- !is.element(colnames(data), drop); - data <- data[,keep]; - verbose && str(verbose, data); + data <- getLocusData(fit) + class(data) <- "data.frame" + drop <- c("index") + keep <- !is.element(colnames(data), drop) + data <- data[,keep] + verbose && str(verbose, data) - verbose && cat(verbose, "Number of loci: ", nrow(data)); + verbose && cat(verbose, "Number of loci: ", nrow(data)) # - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - # Setup arguments to be passed # - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - verbose && enter(verbose, "Overriding default arguments"); + verbose && enter(verbose, "Overriding default arguments") # (a) The default arguments - formals <- formals(segFcn); + formals <- formals(segFcn) - formals <- formals[!sapply(formals, FUN=is.language)]; - formals <- formals[!sapply(formals, FUN=is.name)]; - drop <- c("chromosome", "x", "y", "w", "..."); - keep <- !is.element(names(formals), drop); - formals <- formals[keep]; + formals <- formals[!sapply(formals, FUN=is.language)] + formals <- formals[!sapply(formals, FUN=is.name)] + drop <- c("chromosome", "x", "y", "w", "...") + keep <- !is.element(names(formals), drop) + formals <- formals[keep] # (b) The arguments used in previous fit - params <- fit$params; - keep <- is.element(names(params), names(formals)); - params <- params[keep]; + params <- fit$params + keep <- is.element(names(params), names(formals)) + params <- params[keep] # (c) The arguments in '...' - userArgs <- list(..., verbose=verbose); + userArgs <- list(..., verbose=verbose) # (d) Merge - args <- formals; - args2 <- c(params, userArgs); + args <- formals + args2 <- c(params, userArgs) for (kk in seq_along(args2)) { - value <- args2[[kk]]; + value <- args2[[kk]] if (!is.null(value)) { - key <- names(args2)[kk]; + key <- names(args2)[kk] if (!is.null(key)) { - args[[key]] <- value; + args[[key]] <- value } else { - args <- c(args, list(value)); + args <- c(args, list(value)) } } } # for (key ...) - verbose && str(verbose, args[names(args) != "verbose"]); + verbose && str(verbose, args[names(args) != "verbose"]) - args <- c(list(data), args); - verbose && cat(verbose, "Arguments with data:"); - verbose && str(verbose, args[names(args) != "verbose"]); - verbose && exit(verbose); + args <- c(list(data), args) + verbose && cat(verbose, "Arguments with data:") + verbose && str(verbose, args[names(args) != "verbose"]) + verbose && exit(verbose) - verbose && enter(verbose, sprintf("Calling %s()", segFcnName)); - fit <- do.call(segFcnName, args); - verbose && exit(verbose); + verbose && enter(verbose, sprintf("Calling %s()", segFcnName)) + fit <- do.call(segFcnName, args) + verbose && exit(verbose) - verbose && exit(verbose); + verbose && exit(verbose) - fit; + fit }, protected=TRUE) # resegment() - - -############################################################################ -# HISTORY: -# 2014-02-03 -# o BUG FIX: all.equal() for CBS would pass the first/dispatch argument -# to NextMethod() as 'target=target' and not as 'object=target', which -# would result in it being passed it twice both named and non-named -# where the latter would become argument 'tolerance=target' in an -# internal call to all.equal() for numerics. In recent R-devel version -# this would generate "Error in all.equal.numeric(target[[i]], -# current[[i]], check.attributes = check.attributes, : 'tolerance' -# should be numeric Calls: stopifnot ... all.equal.default -> -# all.equal.list -> all.equal -> all.equal.numeric". -# 2013-12-17 -# o BUG FIX: getChangePoints() for CBS returned empty results. -# 2013-10-20 -# o Added getChangePoints() for CBS. -# 2012-09-21 -# o Now getSegments(..., splitters=TRUE) for CBS and PSCBS inserts NA -# rows whereever there is a "gap" between segments. A "gap" is when -# two segments are not connected (zero distance). -# 2012-06-03 -# o BUG FIX: all.equal(target, current) for CBS objects would give an -# error if either 'target' or 'current' had zero segments. -# 2011-12-12 -# o Added optional argument 'indices' to getLocusData() to be able -# to retrieve the locus-level data as indexed by input data. -# 2011-11-17 -# o Added resegment() for CBS for easy resegmentation. -# 2011-11-15 -# o Now updateMeans() uses locus-specific weights, iff available. -# o Added updateBoundaries() for CBS to update (start,stop) per segment. -# o CORRECTNESS: Now updateMeans() for CBS identify loci by the internal -# 'segRows' field and no longer by locations of segment boundaries, -# which gave slightly incorrect estimates for "tied" loci. -# 2011-10-16 -# o Added isSegmentSplitter(). -# 2011-10-08 -# o Relabelled column 'id' to 'sampleName' returned by getSegments(). -# o BUG FIX: getSegments() for CBS would not set 'id' for "splitter" rows. -# o Added mergeTwoSegments() for CBS. -# o Added updateMeans() for CBS. -# o Added all.equal() for CBS. -# 2011-10-02 -# o CLEANUP: Moved getChromosomes(), nbrOfChromosomes(), nbrOfSegments(), -# nbrOfLoci() and print() to AbstractCBS. -# o Now the CBS class extends the AbstractCBS class. -# 2011-09-04 -# o Added getSignalType() for CBS. -# o Added argument 'addCalls' to getLocusData(). -# o Added getSampleName() for CBS. -# 2011-09-03 -# o Added print() and as.character() for CBS. -# o Added CBS() constructor. Although it rairly will be used -# it we be a place holder for the documentation. -# 2011-09-02 -# o Added nbrOfLoci(), nbrOfSegments(), nbrOfChromosomes() and -# getChromosomes() for CBS. -# 2010-11-19 -# o Added append() for CBS objects. -############################################################################ diff -Nru r-cran-pscbs-0.63.0/R/CBS.RESTRUCT.R r-cran-pscbs-0.64.0/R/CBS.RESTRUCT.R --- r-cran-pscbs-0.63.0/R/CBS.RESTRUCT.R 2017-06-28 14:32:39.000000000 +0000 +++ r-cran-pscbs-0.64.0/R/CBS.RESTRUCT.R 2018-08-12 21:30:44.000000000 +0000 @@ -1,26 +1,27 @@ setMethodS3("shiftTCN", "CBS", function(fit, shift, update=TRUE, ...) { # Argument 'shift': - shift <- Arguments$getDouble(shift, disallow=c("NA", "NaN", "Inf")); + shift <- Arguments$getDouble(shift, disallow=c("NA", "NaN", "Inf")) - data <- getLocusData(fit); - data$y <- data$y + shift; - fit$data <- data; + data <- getLocusData(fit) + data$y <- data$y + shift + fit$data <- data # Not needed anymore - data <- NULL; + data <- NULL if (update) { - fit <- updateMeans(fit, ...); + fit <- updateMeans(fit, ...) } - fit; + fit }, protected=TRUE) ###########################################################################/** # @set "class=CBS" -# @RdocMethod append +# @RdocMethod c +# @alias c.PSCBS # -# @title "Appends one segmentation result to another" +# @title "Concatenates segmentation results" # # \description{ # @get "title". @@ -29,14 +30,12 @@ # @synopsis # # \arguments{ -# \item{x, other}{The two @see "CBS" objects to be combined.} -# \item{other}{A @see "PSCBS" object.} +# \item{\dots}{One or more @see "AbstractCBS" objects to be combined.} # \item{addSplit}{If @TRUE, a "divider" is added between chromosomes.} -# \item{...}{Not used.} # } # # \value{ -# Returns a @see "CBS" object of the same class as argument \code{x}. +# Returns an @see "AbstractCBS" object of the same class in \dots. # } # # @author "HB" @@ -45,210 +44,213 @@ # @seeclass # } #*/########################################################################### -setMethodS3("append", "CBS", function(x, other, addSplit=TRUE, ...) { - # To please R CMD check - this <- x; - +setMethodS3("c", "CBS", function(..., addSplit = TRUE) { # - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - # Validate arguments # - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - # Argument 'other': - other <- Arguments$getInstanceOf(other, class(this)[1]); - for (field in c("data", "output")) { - dataA <- this[[field]] - dataB <- other[[field]] - namesA <- colnames(dataA) - namesB <- colnames(dataB) - if (!all(namesA == namesB)) { - throw(sprintf("Cannot merge %s objects. Arguments 'other' and 'this' has different sets of columns in field '%s': {%s} [n=%d] != {%s} [n=%d]", class(this)[1], field, paste(namesA, collapse=", "), length(namesA), paste(namesB, collapse=", "), length(namesB))) - } - } - - # Argument 'addSplit': - addSplit <- Arguments$getLogical(addSplit); - - - # Allocate results - res <- this; - - # - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - # Locus data - # - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - data <- getLocusData(this); - res$data <- rbind(data, getLocusData(other)); - - - # - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - # Segmentation data - # - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - indexOffset <- nrow(data); - fields <- c("output", "segRows"); - for (field in fields[-1]) { - other[[field]] <- other[[field]] + indexOffset; - } - - splitter <- if (addSplit) NA else NULL; - for (field in fields) { - res[[field]] <- rbind(this[[field]], splitter, other[[field]]); - rownames(res[[field]]) <- NULL; - } - - # - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - # Parameters - # - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - ksT <- this$params$knownSegments; - ksT$length <- NULL; # In case it's been added - ksO <- other$params$knownSegments; - ksO$length <- NULL; # In case it's been added - res$params$knownSegments <- rbind(ksT, ksO); + args <- list(...) + ## Nothing todo? + nargs <- length(args) + if (nargs == 1) return(args[[1]]) + + isNA <- function(x) is.logical(x) && length(x) == 1L && is.na(x) + + res <- args[[1]] + fields <- c("output", "segRows") + + for (ii in 2:nargs) { + arg <- args[[ii]] + + if (isNA(arg)) { + if (addSplit) { + warning(sprintf("Detected explicit NA in call to c(<%s>, ..., addSplit = TRUE). Ignoring", class(args[[1]])[1])) + next + } + ## Add "splitter" + for (field in fields) { + res[[field]] <- rbind(res[[field]], NA) + } + } else { + ## Locus-level data + data <- getLocusData(res) + data_arg <- getLocusData(arg) + if (!all(colnames(data_arg) == colnames(data))) { + throw(sprintf("Cannot concatenate %s and %s objects, because they have different sets of columns in field %s: {%s} [n=%d] != {%s} [n=%d]", sQuote(class(res)[1]), sQuote(class(arg)[1]), sQuote(field), paste(sQuote(colnames(data)), collapse=", "), ncol(data), paste(sQuote(colnames(data_arg)), collapse=", "), ncol(data_arg))) + } + + indexOffset <- nrow(data) + + data <- rbind(data, getLocusData(arg)) + res[["data"]] <- data + + # Segmentation data + for (field in fields[-1]) { + arg[[field]] <- arg[[field]] + indexOffset + } + splitter <- if (addSplit) NA else NULL + for (field in fields) { + res[[field]] <- rbind(res[[field]], splitter, arg[[field]]) + } + + # Known segments + ksT <- res$params$knownSegments + ksT$length <- NULL # In case it's been added + ksO <- arg$params$knownSegments + ksO$length <- NULL # In case it's been added + res$params$knownSegments <- rbind(ksT, ksO) + } + } ## for (ii ...) + ## Drop row names, iff they've been added + for (field in fields) rownames(res[[field]]) <- NULL + # Sanity check - ns <- sapply(res[fields], FUN=nrow); - stopifnot(all(ns == ns[1])); + ns <- sapply(res[fields], FUN = nrow) + .stop_if_not(all(ns == ns[1])) - res; -}) # append() + res +}) # c() setMethodS3("extractSegments", "CBS", function(this, idxs, ..., verbose=FALSE) { - fit <- this; + fit <- this # - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - # Local functions # - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - updateSegRows <- function(segRows, idxs=NULL) { - verbose && str(verbose, segRows); + verbose && str(verbose, segRows) if (!is.null(idxs)) { - segRows <- segRows[idxs,,drop=FALSE]; + segRows <- segRows[idxs,,drop=FALSE] } -# verbose && cat(verbose, "Number of segments: ", nrow(segRows)); -# verbose && str(verbose, segRows); +# verbose && cat(verbose, "Number of segments: ", nrow(segRows)) +# verbose && str(verbose, segRows) # Treat splitters separately - isSplitter <- (is.na(segRows[,1]) & is.na(segRows[,2])); + isSplitter <- (is.na(segRows[,1]) & is.na(segRows[,2])) - ns <- segRows[,2] - segRows[,1] + 1L; -# verbose && cat(verbose, "Number of loci per segment:"); -# verbose && str(verbose, ns); - - ns <- ns[!isSplitter]; - from <- c(1L, cumsum(ns)[-length(ns)]+1L); - to <- from + (ns - 1L); - segRows[!isSplitter,1] <- from; - segRows[!isSplitter,2] <- to; - verbose && str(verbose, segRows); + ns <- segRows[,2] - segRows[,1] + 1L +# verbose && cat(verbose, "Number of loci per segment:") +# verbose && str(verbose, ns) + + ns <- ns[!isSplitter] + from <- c(1L, cumsum(ns)[-length(ns)]+1L) + to <- from + (ns - 1L) + segRows[!isSplitter,1] <- from + segRows[!isSplitter,2] <- to + verbose && str(verbose, segRows) # Sanity check - ns2 <- segRows[,2] - segRows[,1] + 1L; - ns2 <- ns2[!isSplitter]; - stopifnot(all(ns2 == ns)); + ns2 <- segRows[,2] - segRows[,1] + 1L + ns2 <- ns2[!isSplitter] + .stop_if_not(all(ns2 == ns)) - segRows; + segRows } # updateSegRows() # - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - # Validate arguments # - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - # Argument 'idxs': - idxs <- Arguments$getIndices(idxs, max=nbrOfSegments(fit, splitters=TRUE)); + idxs <- Arguments$getIndices(idxs, max=nbrOfSegments(fit, splitters=TRUE)) # Argument 'verbose': - verbose <- Arguments$getVerbose(verbose); + verbose <- Arguments$getVerbose(verbose) if (verbose) { - pushState(verbose); - on.exit(popState(verbose)); + pushState(verbose) + on.exit(popState(verbose)) } - verbose && enter(verbose, "Extracting subset of segments"); + verbose && enter(verbose, "Extracting subset of segments") - verbose && cat(verbose, "Number of segments: ", length(idxs)); - verbose && str(verbose, idxs); + verbose && cat(verbose, "Number of segments: ", length(idxs)) + verbose && str(verbose, idxs) # - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - # Extract data and estimates # - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - data <- getLocusData(fit); - segRows <- fit$segRows; - segs <- getSegments(fit); - params <- fit$params; + data <- getLocusData(fit) + segRows <- fit$segRows + segs <- getSegments(fit) + params <- fit$params # Sanity checks - stopifnot(all(!is.na(data$chromosome) & !is.na(data$x))); + .stop_if_not(all(!is.na(data$chromosome) & !is.na(data$x))) # - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - # Subset segments # - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - verbose && enter(verbose, "Update table of segments"); - segsT <- segs[idxs,,drop=FALSE]; - verbose && str(verbose, segsT); - verbose && exit(verbose); + verbose && enter(verbose, "Update table of segments") + segsT <- segs[idxs,,drop=FALSE] + verbose && str(verbose, segsT) + verbose && exit(verbose) # - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - # Subset data accordingly # - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - verbose && enter(verbose, "Update locus data"); + verbose && enter(verbose, "Update locus data") - segRowsT <- segRows[idxs,,drop=FALSE]; - from <- segRowsT[[1]]; - to <- segRowsT[[2]]; - ok <- (!is.na(from) & !is.na(to)); - from <- from[ok]; - to <- to[ok]; - keep <- logical(nrow(data)); + segRowsT <- segRows[idxs,,drop=FALSE] + from <- segRowsT[[1]] + to <- segRowsT[[2]] + ok <- (!is.na(from) & !is.na(to)) + from <- from[ok] + to <- to[ok] + keep <- logical(nrow(data)) for (rr in seq_along(from)) { - keep[from[rr]:to[rr]] <- TRUE; + keep[from[rr]:to[rr]] <- TRUE } - keep <- which(keep); - verbose && printf(verbose, "Identified %d (%.2f%%) of %d data rows:\n", length(keep), 100*length(keep)/nrow(data), nrow(data)); - verbose && str(verbose, keep); + keep <- which(keep) + verbose && printf(verbose, "Identified %d (%.2f%%) of %d data rows:\n", length(keep), 100*length(keep)/nrow(data), nrow(data)) + verbose && str(verbose, keep) - dataT <- data[keep,,drop=FALSE]; - verbose && str(verbose, dataT); + dataT <- data[keep,,drop=FALSE] + verbose && str(verbose, dataT) - verbose && exit(verbose); + verbose && exit(verbose) # - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - # Update 'segRows' # - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - verbose && enter(verbose, "Update 'segRows'"); - segRowsT <- updateSegRows(segRowsT); - d <- segRows[idxs,] - segRowsT; + verbose && enter(verbose, "Update 'segRows'") + segRowsT <- updateSegRows(segRowsT) + d <- segRows[idxs,] - segRowsT # Sanity check - stopifnot(identical(d[,1], d[,2])); - d <- d[,1]; - verbose && cat(verbose, "Row deltas:"); - verbose && str(verbose, d); + .stop_if_not(identical(d[,1], d[,2])) + d <- d[,1] + verbose && cat(verbose, "Row deltas:") + verbose && str(verbose, d) - segRows <- segRows[idxs,,drop=FALSE] - d; - verbose && str(verbose, segRows); + segRows <- segRows[idxs,,drop=FALSE] - d + verbose && str(verbose, segRows) # Sanity checks - stopifnot(suppressWarnings(max(segRows, na.rm=TRUE)) <= nrow(dataT)); - drow <- segRows[-1,1] - segRows[-nrow(segRows),2]; - stopifnot(all(is.na(drow) | (drow > 0))); + .stop_if_not(suppressWarnings(max(segRows, na.rm=TRUE)) <= nrow(dataT)) + drow <- segRows[-1,1] - segRows[-nrow(segRows),2] + .stop_if_not(all(is.na(drow) | (drow > 0))) if (!all(is.na(drow) | (drow > 0))) { - print(segRows); - throw("INTERNAL ERROR: Generated 'segRows' is invalid, because it contains overlapping data chunks."); + print(segRows) + throw("INTERNAL ERROR: Generated 'segRows' is invalid, because it contains overlapping data chunks.") } - verbose && exit(verbose); + verbose && exit(verbose) # Create new object - res <- fit; - res$data <- dataT; - res$output <- segsT; - res$segRows <- segRows; + res <- fit + res$data <- dataT + res$output <- segsT + res$segRows <- segRows - verbose && exit(verbose); + verbose && exit(verbose) - res; + res }, protected=TRUE) # extractSegments() @@ -257,137 +259,88 @@ # - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - # Validate arguments # - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - nbrOfSegments <- nbrOfSegments(this, splitters=TRUE); + nbrOfSegments <- nbrOfSegments(this, splitters=TRUE) # Argument 'left': - left <- Arguments$getIndex(left, max=nbrOfSegments-1L); + left <- Arguments$getIndex(left, max=nbrOfSegments-1L) # Argument 'verbose': - verbose <- Arguments$getVerbose(verbose); + verbose <- Arguments$getVerbose(verbose) if (verbose) { - pushState(verbose); - on.exit(popState(verbose)); + pushState(verbose) + on.exit(popState(verbose)) } - verbose && enter(verbose, "Merging two segments"); - verbose && printf(verbose, "Segments to be merged: %s & %s\n", left, left+1); - verbose && cat(verbose, "Number of segments before merging: ", nbrOfSegments); - verbose && cat(verbose, "Number of segments after merging: ", nbrOfSegments-1L); + verbose && enter(verbose, "Merging two segments") + verbose && printf(verbose, "Segments to be merged: %s & %s\n", left, left+1) + verbose && cat(verbose, "Number of segments before merging: ", nbrOfSegments) + verbose && cat(verbose, "Number of segments after merging: ", nbrOfSegments-1L) - segs <- getSegments(this); - segRows <- this$segRows; + segs <- getSegments(this) + segRows <- this$segRows - rows <- c(left,left+1); - segsT <- segs[rows,,drop=FALSE]; + rows <- c(left,left+1) + segsT <- segs[rows,,drop=FALSE] # Sanity check - chrs <- segsT[["chromosome"]]; + chrs <- segsT[["chromosome"]] if (chrs[1] != chrs[2]) { - throw("Cannot merge segments that are on different chromosomes: ", chrs[1], " != ", chrs[2]); + throw("Cannot merge segments that are on different chromosomes: ", chrs[1], " != ", chrs[2]) } # Merge segments - segT <- segsT[1,]; - fields <- colnames(segsT); - idxsUsed <- c(); + segT <- segsT[1,] + fields <- colnames(segsT) + idxsUsed <- c() # (id) [as in label] - idxs <- grep("(I|i)d$", fields); - idxsUsed <- c(idxsUsed, idxs); + idxs <- grep("(I|i)d$", fields) + idxsUsed <- c(idxsUsed, idxs) # (chromosome) - idxs <- grep("chromosome$", fields); - idxsUsed <- c(idxsUsed, idxs); + idxs <- grep("chromosome$", fields) + idxsUsed <- c(idxsUsed, idxs) # Starts - idxs <- grep("(S|s)tart$", fields); - T <- as.matrix(segsT[,idxs,drop=FALSE]); - segT[,idxs] <- colMins(T, na.rm=TRUE); - idxsUsed <- c(idxsUsed, idxs); + idxs <- grep("(S|s)tart$", fields) + T <- as.matrix(segsT[,idxs,drop=FALSE]) + segT[,idxs] <- colMins(T, na.rm=TRUE) + idxsUsed <- c(idxsUsed, idxs) # Ends - idxs <- grep("(E|e)nd$", fields); - T <- as.matrix(segsT[,idxs,drop=FALSE]); - segT[,idxs] <- colMaxs(T, na.rm=TRUE); - idxsUsed <- c(idxsUsed, idxs); + idxs <- grep("(E|e)nd$", fields) + T <- as.matrix(segsT[,idxs,drop=FALSE]) + segT[,idxs] <- colMaxs(T, na.rm=TRUE) + idxsUsed <- c(idxsUsed, idxs) # Counts - idxs <- grep("(N|n)brOf", fields); - segT[,idxs] <- colSums(segsT[,idxs,drop=FALSE]); - idxsUsed <- c(idxsUsed, idxs); + idxs <- grep("(N|n)brOf", fields) + segT[,idxs] <- colSums(segsT[,idxs,drop=FALSE]) + idxsUsed <- c(idxsUsed, idxs) # "Invalidate" remaining entries - idxsTodo <- setdiff(seq_along(fields), idxsUsed); - segT[,idxsTodo] <- NA; + idxsTodo <- setdiff(seq_along(fields), idxsUsed) + segT[,idxsTodo] <- NA # Update segment table - segs[rows[1],] <- segT; - segs <- segs[-rows[2],]; + segs[rows[1],] <- segT + segs <- segs[-rows[2],] # Update 'segRows' tables - segRows[rows[1],2] <- segRows[rows[2],2]; - segRows <- segRows[-rows[2],]; + segRows[rows[1],2] <- segRows[rows[2],2] + segRows <- segRows[-rows[2],] # Create results object - res <- this; - res$output <- segs; - res$segRows <- segRows; + res <- this + res$output <- segs + res$segRows <- segRows # Update the segment statistics? if (update) { - res <- updateMeans(res); + res <- updateMeans(res) } - verbose && exit(verbose); + verbose && exit(verbose) - res; + res }, protected=TRUE) # mergeTwoSegments() - - - -############################################################################ -# HISTORY: -# 2012-09-13 -# o Added shiftTCN() for CBS. -# 2012-02-24 -# o ROBUSTNESS: Added more sanity checks validating the correctness of -# what is returned by extractSegments() for CBS. -# 2011-11-17 -# o BUG FIX: extractSegments() for CBS would throw an error when -# there were multiple chromosomes. -# 2011-11-15 -# o BUG FIX: extractSegments() for CBS would throw an error, because in -# most cases it would created a corrupt internal 'segRows' field. -# 2011-10-20 -# o Now append() for CBS also appends '...$params$knownSegments'. -# 2011-10-16 -# o Added argument 'update' to mergeTwoSegments(). -# 2011-10-10 -# o Replaced extractRegions() with extractSegments() for CBS. -# o Added extractRegions() for CBS. -# 2011-10-08 -# o Relabelled column 'id' to 'sampleName' returned by getSegments(). -# o BUG FIX: getSegments() for CBS would not set 'id' for "splitter" rows. -# o Added mergeTwoSegments() for CBS. -# o Added updateMeans() for CBS. -# o Added all.equal() for CBS. -# 2011-10-02 -# o CLEANUP: Moved getChromosomes(), nbrOfChromosomes(), nbrOfSegments(), -# nbrOfLoci() and print() to AbstractCBS. -# o Now the CBS class extends the AbstractCBS class. -# 2011-09-04 -# o Added writeSegments() for CBS. -# o Added writeLocusData() for CBS. -# o Added getSignalType() for CBS. -# o Added argument 'addCalls' to getLocusData(). -# o Added getSampleName() for CBS. -# 2011-09-03 -# o Added print() and as.character() for CBS. -# o Added CBS() constructor. Although it rairly will be used -# it we be a place holder for the documentation. -# 2011-09-02 -# o Added nbrOfLoci(), nbrOfSegments(), nbrOfChromosomes() and -# getChromosomes() for CBS. -# 2010-11-19 -# o Added append() for CBS objects. -############################################################################ diff -Nru r-cran-pscbs-0.63.0/R/CBS.SMOOTH.R r-cran-pscbs-0.64.0/R/CBS.SMOOTH.R --- r-cran-pscbs-0.63.0/R/CBS.SMOOTH.R 2017-06-28 14:32:39.000000000 +0000 +++ r-cran-pscbs-0.64.0/R/CBS.SMOOTH.R 2018-08-12 21:30:44.000000000 +0000 @@ -33,39 +33,29 @@ #*/########################################################################### setMethodS3("getSmoothLocusData", "CBS", function(fit, by, ...) { # Argument 'by': - by <- Arguments$getNumeric(by, range=c(0,Inf)); + by <- Arguments$getNumeric(by, range=c(0,Inf)) - chromosomes <- getChromosomes(fit); - data <- getLocusData(fit); + chromosomes <- getChromosomes(fit) + data <- getLocusData(fit) - chromosome <- NULL; rm(list="chromosome"); # To please R CMD check + chromosome <- NULL; rm(list="chromosome") # To please R CMD check - dataS <- NULL; + dataS <- NULL for (kk in seq_along(chromosomes)) { - chr <- chromosomes[kk]; - dataT <- subset(data, chromosome == chr); - x <- dataT$x; - y <- dataT$y; - rx <- range(x, na.rm=TRUE); - bx <- seq(from=rx[1], to=rx[2], by=by); - xS <- bx[-1] - by/2; - yS <- binMeans(y=y, x=x, bx=bx); - count <- attr(yS, "count"); - yS[count == 0L] <- NA_real_; - attr(yS, "count") <- NULL; - dataTS <- data.frame(chromosome=chr, x=xS, count=count, y=yS); - dataS <- rbind(dataS, dataTS); + chr <- chromosomes[kk] + dataT <- subset(data, chromosome == chr) + x <- dataT$x + y <- dataT$y + rx <- range(x, na.rm=TRUE) + bx <- seq(from=rx[1], to=rx[2], by=by) + xS <- bx[-1] - by/2 + yS <- binMeans(y=y, x=x, bx=bx) + count <- attr(yS, "count") + yS[count == 0L] <- NA_real_ + attr(yS, "count") <- NULL + dataTS <- data.frame(chromosome=chr, x=xS, count=count, y=yS) + dataS <- rbind(dataS, dataTS) } # for (kk ...) - dataS; + dataS }, protected=TRUE) # getSmoothLocusData() - - -############################################################################ -# HISTORY: -# 2013-10-09 -# o Now getSmoothLocusData() for CBS also returns column 'count'. -# 2013-04-18 -# o Added getSmoothLocusData() for CBS. -# o Created. -############################################################################ diff -Nru r-cran-pscbs-0.63.0/R/CBS.updateMeansTogether.R r-cran-pscbs-0.64.0/R/CBS.updateMeansTogether.R --- r-cran-pscbs-0.63.0/R/CBS.updateMeansTogether.R 2017-06-28 14:32:39.000000000 +0000 +++ r-cran-pscbs-0.64.0/R/CBS.updateMeansTogether.R 2018-08-12 21:30:44.000000000 +0000 @@ -1,100 +1,91 @@ -setMethodS3("updateMeansTogether", "CBS", function(fit, idxList, ..., avg=c("mean", "median"), verbose=FALSE) { - # - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - # Validate arguments - # - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - nbrOfSegments <- nbrOfSegments(fit, splitters=TRUE); - - # Argument 'idxList': - if (!is.list(idxList)) { - idxList <- list(idxList); - } - idxList <- lapply(idxList, FUN=function(idxs) { - idxs <- Arguments$getIndices(idxs, max=nbrOfSegments); - sort(unique(idxs)); - }); - - # Argument 'avg': - avg <- match.arg(avg); - - # Argument 'verbose': - verbose <- Arguments$getVerbose(verbose); - if (verbose) { - pushState(verbose); - on.exit(popState(verbose)); - } - - verbose && enter(verbose, "Updating mean level estimates of multiple segments"); - - verbose && cat(verbose, "Segments:"); - verbose && str(verbose, idxList); - - - # - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - # Setting up averaging functions - # - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - avgFUN <- get(avg, mode="function"); - - - # - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - # Extract the data and segmentation results - # - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - data <- getLocusData(fit); - - segs <- getSegments(fit, splitters=TRUE); - - nbrOfSegments <- nrow(segs); - verbose && cat(verbose, "Total number of segments: ", nbrOfSegments); - - for (ss in seq_along(idxList)) { - idxs <- idxList[[ss]]; - - fitT <- extractSegments(fit, idxs); - verbose && cat(verbose, "Number of segments: ", nbrOfSegments(fitT)); - - dataT <- getLocusData(fitT); - segsT <- getSegments(fitT); - - y <- dataT$y; - - # - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - # Update the TCN segments - # - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - verbose && enter(verbose, "Recalculate TCN means"); - - # (c) Adjust for missing values - keep <- which(!is.na(y)); - - # (d) Update mean - gamma <- avgFUN(y[keep]); - - # Sanity check - stopifnot(length(gamma) == 0 || !is.na(gamma)); - - mus <- c(mean=gamma); - - verbose && print(verbose, mus); - verbose && exit(verbose); - - for (key in names(mus)) { - segs[idxs,key] <- mus[key]; - } - } # for (ss ...) - - # Return results - res <- fit; - res$output <- segs; - res <- setMeanEstimators(res, y=avg); - - verbose && exit(verbose); - - res; -}, private=TRUE) # updateMeansTogether() - - - -############################################################################ -# HISTORY: -# 2011-11-28 -# o Added updateMeansTogether() for CBS. -# o Created from PairedPSCBS.updateMeansTogether.R. -############################################################################ +setMethodS3("updateMeansTogether", "CBS", function(fit, idxList, ..., avg=c("mean", "median"), verbose=FALSE) { + # - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - + # Validate arguments + # - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - + nbrOfSegments <- nbrOfSegments(fit, splitters=TRUE) + + # Argument 'idxList': + if (!is.list(idxList)) { + idxList <- list(idxList) + } + idxList <- lapply(idxList, FUN=function(idxs) { + idxs <- Arguments$getIndices(idxs, max=nbrOfSegments) + sort(unique(idxs)) + }) + + # Argument 'avg': + avg <- match.arg(avg) + + # Argument 'verbose': + verbose <- Arguments$getVerbose(verbose) + if (verbose) { + pushState(verbose) + on.exit(popState(verbose)) + } + + verbose && enter(verbose, "Updating mean level estimates of multiple segments") + + verbose && cat(verbose, "Segments:") + verbose && str(verbose, idxList) + + + # - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - + # Setting up averaging functions + # - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - + avgFUN <- get(avg, mode="function") + + + # - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - + # Extract the data and segmentation results + # - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - + data <- getLocusData(fit) + + segs <- getSegments(fit, splitters=TRUE) + + nbrOfSegments <- nrow(segs) + verbose && cat(verbose, "Total number of segments: ", nbrOfSegments) + + for (ss in seq_along(idxList)) { + idxs <- idxList[[ss]] + + fitT <- extractSegments(fit, idxs) + verbose && cat(verbose, "Number of segments: ", nbrOfSegments(fitT)) + + dataT <- getLocusData(fitT) + segsT <- getSegments(fitT) + + y <- dataT$y + + # - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - + # Update the TCN segments + # - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - + verbose && enter(verbose, "Recalculate TCN means") + + # (c) Adjust for missing values + keep <- which(!is.na(y)) + + # (d) Update mean + gamma <- avgFUN(y[keep]) + + # Sanity check + .stop_if_not(length(gamma) == 0 || !is.na(gamma)) + + mus <- c(mean=gamma) + + verbose && print(verbose, mus) + verbose && exit(verbose) + + for (key in names(mus)) { + segs[idxs,key] <- mus[key] + } + } # for (ss ...) + + # Return results + res <- fit + res$output <- segs + res <- setMeanEstimators(res, y=avg) + + verbose && exit(verbose) + + res +}, private=TRUE) # updateMeansTogether() diff -Nru r-cran-pscbs-0.63.0/R/CNA.EXTS.R r-cran-pscbs-0.64.0/R/CNA.EXTS.R --- r-cran-pscbs-0.63.0/R/CNA.EXTS.R 2017-06-28 14:32:39.000000000 +0000 +++ r-cran-pscbs-0.64.0/R/CNA.EXTS.R 2018-08-12 21:30:44.000000000 +0000 @@ -1,26 +1,18 @@ -setMethodS3("segmentByCBS", "CNA", function(y, ...) { - # To please R CMD check - cnData <- y; - - # Extract signals of interest - chromosome <- cnData$chrom; - x <- cnData$maploc; - y <- cnData[,3]; - signalType <- attr(cnData, "data.type"); - sampleName <- colnames(cnData)[3]; - -# str(list(y=y, chromosome=chromosome, x=x)); - - fit <- segmentByCBS(y=y, chromosome=chromosome, x=x, ...); - sampleName(fit) <- sampleName; - - fit; -}) # segmentByCBS() - - -############################################################################# -# HISTORY: -# 2011-09-04 -# o Added segmentByCBS for CNA objects. -# o Created. -############################################################################# +setMethodS3("segmentByCBS", "CNA", function(y, ...) { + # To please R CMD check + cnData <- y + + # Extract signals of interest + chromosome <- cnData$chrom + x <- cnData$maploc + y <- cnData[,3] + signalType <- attr(cnData, "data.type") + sampleName <- colnames(cnData)[3] + +# str(list(y=y, chromosome=chromosome, x=x)) + + fit <- segmentByCBS(y=y, chromosome=chromosome, x=x, ...) + sampleName(fit) <- sampleName + + fit +}) # segmentByCBS() diff -Nru r-cran-pscbs-0.63.0/R/DNAcopy.EXTS.R r-cran-pscbs-0.64.0/R/DNAcopy.EXTS.R --- r-cran-pscbs-0.63.0/R/DNAcopy.EXTS.R 2017-06-28 14:32:39.000000000 +0000 +++ r-cran-pscbs-0.64.0/R/DNAcopy.EXTS.R 2018-08-12 21:30:44.000000000 +0000 @@ -1,279 +1,245 @@ -###########################################################################/** -# @set class=CBS -# @RdocMethod as.DNAcopy -# -# @title "Coerces a CBS object to a DNAcopy object" -# -# \description{ -# @get "title". -# } -# -# @synopsis -# -# \arguments{ -# \item{fit}{A @see "CBS" object."} -# \item{...}{Not used.} -# } -# -# \value{ -# Returns a @see "DNAcopy" object (of the \pkg{DNAcopy} package). -# } -# -# \examples{ -# @include "../incl/segmentByCBS.Rex" -# @include "../incl/as.DNAcopy.Rex" -# } -# -# @author "HB" -# -# \seealso{ -# \code{\link[PSCBS:as.CBS.DNAcopy]{as.CBS()}}. -# @seeclass -# } -# -# @keyword internal -#*/########################################################################### -setMethodS3("as.DNAcopy", "CBS", function(fit, ...) { - sampleName <- getSampleName(fit); - if (is.na(sampleName)) sampleName <- ""; - - # - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - # Setup the 'data' field - # - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - data <- getLocusData(fit); - - # Keep only certain columns - keep <- match(c("chromosome", "x"), colnames(data)); - keep <- c(keep, 3L); - data <- data[,keep,drop=FALSE]; - - # Sanity check - stopifnot(ncol(data) == 3); - - # Rename column names - colnames(data) <- c("chrom", "maploc", sampleName); - - class(data) <- c("CNA", "data.frame"); - attr(data, "data.type") <- "logratio"; - - - # - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - # Setup the 'output' field - # - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - output <- getSegments(fit, splitter=FALSE); - rownames <- rownames(output); - - output <- data.frame( - ID = sampleName, - chrom = output$chromosome, - loc.start = output$start, - loc.end = output$end, - num.mark = output$nbrOfLoci, - seg.mean = output$mean, - stringsAsFactors=FALSE - ); - rownames(output) <- rownames; - - - # - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - # Setup up 'DNAcopy' object - # - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - res <- list(); - res$data <- data; - res$output <- output; - res$call <- NA; - class(res) <- "DNAcopy"; - - res; -}, protected=TRUE) # as.DNAcopy() - - - -setMethodS3("nbrOfSegments", "DNAcopy", function(fit, ...) { - segs <- fit$output; - nrow(segs); -}) - -setMethodS3("nbrOfLoci", "DNAcopy", function(fit, ...) { - nrow(fit$data); -}) - -setMethodS3("nbrOfSamples", "DNAcopy", function(fit, ...) { - length(getSampleNames(fit, ...)); -}) - -setMethodS3("getSampleNames", "DNAcopy", function(fit, ...) { - names <- colnames(fit$data); - names <- setdiff(names, c("chrom", "maploc")); - names; -}) - -setMethodS3("getChromosomes", "DNAcopy", function(fit, ...) { - chromosomes <- fit$data$chrom; - sort(unique(chromosomes)); -}) - - -setMethodS3("estimateStandardDeviation", "DNAcopy", function(fit, sample=1L, method=c("diff", "abs", "res"), estimator=c("mad", "sd"), na.rm=TRUE, weights=NULL, ...) { - # - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - # Validate arguments - # - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - # Argument 'sample': - sample <- Arguments$getIndex(sample, max=nbrOfSamples(fit)); - - # Argument 'method': - method <- match.arg(method); - - # Argument 'estimator': - estimator <- match.arg(estimator); - - - - nbrOfLoci <- nbrOfLoci(fit); - - # Argument 'weights': - if (!is.null(weights)) { - weights <- Arguments$getNumerics(weights, range=c(0,Inf), length=rep(nbrOfLoci, times=2)); - } - - - # Nothing to do? - if (nbrOfLoci <= 1) { - return(NA_real_); - } - - # Get the estimator function - if (!is.null(weights)) { - estimator <- sprintf("weighted %s", estimator); - estimator <- R.utils::toCamelCase(estimator); - } - estimatorFcn <- get(estimator, mode="function"); - - - # Extract sample of interest - fit <- subset(fit, samplelist=sample); - - y <- fit$data[,3L]; - - if (method == "diff") { - y <- diff(y); - - # Weighted estimator? - if (!is.null(weights)) { - # Calculate weights per pair - weights <- (weights[1:(nbrOfLoci-1)]+weights[2:nbrOfLoci])/2; - sigma <- estimatorFcn(y, w=weights, na.rm=na.rm)/sqrt(2); - } else { - sigma <- estimatorFcn(y, na.rm=na.rm)/sqrt(2); - } - } else if (method == "abs") { - if (!is.null(weights)) { - sigma <- estimatorFcn(y, w=weights, na.rm=na.rm); - } else { - sigma <- estimatorFcn(y, na.rm=na.rm); - } - } else if (method == "res") { - yS <- extractSegmentMeansByLocus(fit); - dy <- y - yS; - if (!is.null(weights)) { - sigma <- estimatorFcn(dy, w=weights, na.rm=na.rm); - } else { - sigma <- estimatorFcn(dy, na.rm=na.rm); - } - } else { - throw("Method no implemented: ", method); - } - - sigma; -}) # estimateStandardDeviation() - - -setMethodS3("extractSegmentMeansByLocus", "DNAcopy", function(fit, sample=1L, ...) { - # - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - # Validate arguments - # - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - # Argument 'FUN': - FUN <- match.arg(FUN); - FUN <- get(FUN, mode="function"); - - # Argument 'sample': - sample <- Arguments$getIndex(sample, max=nbrOfSamples(fit)); - - # Extract sample of interest - fit <- subset(fit, samplelist=sample); - - data <- fit$data; - chr <- data$chrom; - x <- data$maploc; - y <- data[,sample,drop=TRUE]; - - segs <- fit$output; - nbrOfSegments <- nrow(segs); - nbrOfLoci <- nbrOfLoci(fit); - - # Get mean estimators - estList <- getMeanEstimators(fit, "y"); - avgY <- estList$y; - - yS <- y; - for (ss in seq_len(nbrOfSegments)) { - seg <- segs[ss,]; - idxs <- which(seg$chrom == chr & seg$loc.start <= x & x <= seg$loc.end); - idxs <- Arguments$getIndices(idxs, max=nbrOfLoci); - - ySS <- y[idxs]; - ok <- is.finite(ySS); - # Sanity check - ## stopifnot(sum(ok) == seg$num.mark); # Not dealing with ties - mu <- avgY(ySS[ok]); - yS[idxs] <- mu; - } # for (ss ...) - - yS; -}, private=TRUE) # extractSegmentMeansByLocus() - - - -setMethodS3("writeSegments", "DNAcopy", function(fit, samples=seq_len(nbrOfSamples(fit)), ...) { - # Argument 'samples': - samples <- Arguments$getIndices(samples, max=nbrOfSamples(fit)); - - pathnames <- c(); - for (ii in samples) { - fitII <- as.CBS(fit, sample=ii); - pathnameII <- writeSegments(fitII, ...); - pathnames <- c(pathnames, pathnameII); - } # for (ii ...) - - pathnames; -}) # writeSegments() - - -############################################################################ -# HISTORY: -# 2012-05-30 -# o Added writeSegments() for DNAcopy objects. -# 2011-09-04 -# o as.DNAcopy() did not drop "splitters" for the segment table. -# 2011-09-03 -# o Added as.DNAcopy() for CBS to coerce a CBS object to a DNAcopy object. -# 2011-09-02 -# o Added internal extractSegmentMeansByLocus() for DNAcopy, which is -# used by estimateStandardDeviation(..., method="res"). -# o Added estimateStandardDeviation() for DNAcopy. -# o ROBUSTNESS: Now getSampleNames() drops columns 'chrom' and 'maploc', -# instead of assuming their positions. -# o ROBUSTNESS: Now nbrOfSamples() utilizes getSampleNames(). -# o Added nbrOfSegments(), nbrOfLoci(), nbrOfSamples(), getSampleNames() -# and getChromosomes() for DNAcopy. -# HISTORY FROM PRIVATE SCRIPTS: -# 2011-07-20 -# o Added support for estimateStandardDeviation(..., method="res"). -# o Added extractSegmentMeansByLocus(). -# 2011-07-18 -# o Added getSampleNames(). -# o Added plotTracks() for DNAcopy. -# o Added nbrOfSegments(), nbrOfLoci() and nbrOfSamples(). -# 2011-07-17 -# o Added estimateStandardDeviation() to DNAcopy objects. -############################################################################ +###########################################################################/** +# @set class=CBS +# @RdocMethod as.DNAcopy +# +# @title "Coerces a CBS object to a DNAcopy object" +# +# \description{ +# @get "title". +# } +# +# @synopsis +# +# \arguments{ +# \item{fit}{A @see "CBS" object."} +# \item{...}{Not used.} +# } +# +# \value{ +# Returns a @see "DNAcopy" object (of the \pkg{DNAcopy} package). +# } +# +# \examples{ +# @include "../incl/segmentByCBS.Rex" +# @include "../incl/as.DNAcopy.Rex" +# } +# +# @author "HB" +# +# \seealso{ +# \code{\link[PSCBS:as.CBS.DNAcopy]{as.CBS()}}. +# @seeclass +# } +# +# @keyword internal +#*/########################################################################### +setMethodS3("as.DNAcopy", "CBS", function(fit, ...) { + sampleName <- getSampleName(fit) + if (is.na(sampleName)) sampleName <- "" + + # - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - + # Setup the 'data' field + # - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - + data <- getLocusData(fit) + + # Keep only certain columns + keep <- match(c("chromosome", "x"), colnames(data)) + keep <- c(keep, 3L) + data <- data[,keep,drop=FALSE] + + # Sanity check + .stop_if_not(ncol(data) == 3) + + # Rename column names + colnames(data) <- c("chrom", "maploc", sampleName) + + class(data) <- c("CNA", "data.frame") + attr(data, "data.type") <- "logratio" + + + # - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - + # Setup the 'output' field + # - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - + output <- getSegments(fit, splitter=FALSE) + rownames <- rownames(output) + + output <- data.frame( + ID = sampleName, + chrom = output$chromosome, + loc.start = output$start, + loc.end = output$end, + num.mark = output$nbrOfLoci, + seg.mean = output$mean, + stringsAsFactors=FALSE + ) + rownames(output) <- rownames + + + # - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - + # Setup up 'DNAcopy' object + # - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - + res <- list() + res$data <- data + res$output <- output + res$call <- NA + class(res) <- "DNAcopy" + + res +}, protected=TRUE) # as.DNAcopy() + + + +setMethodS3("nbrOfSegments", "DNAcopy", function(fit, ...) { + segs <- fit$output + nrow(segs) +}) + +setMethodS3("nbrOfLoci", "DNAcopy", function(fit, ...) { + nrow(fit$data) +}) + +setMethodS3("nbrOfSamples", "DNAcopy", function(fit, ...) { + length(getSampleNames(fit, ...)) +}) + +setMethodS3("getSampleNames", "DNAcopy", function(fit, ...) { + names <- colnames(fit$data) + names <- setdiff(names, c("chrom", "maploc")) + names +}) + +setMethodS3("getChromosomes", "DNAcopy", function(fit, ...) { + chromosomes <- fit$data$chrom + sort(unique(chromosomes)) +}) + + +setMethodS3("estimateStandardDeviation", "DNAcopy", function(fit, sample=1L, method=c("diff", "abs", "res"), estimator=c("mad", "sd"), na.rm=TRUE, weights=NULL, ...) { + # - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - + # Validate arguments + # - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - + # Argument 'sample': + sample <- Arguments$getIndex(sample, max=nbrOfSamples(fit)) + + # Argument 'method': + method <- match.arg(method) + + # Argument 'estimator': + estimator <- match.arg(estimator) + + + + nbrOfLoci <- nbrOfLoci(fit) + + # Argument 'weights': + if (!is.null(weights)) { + weights <- Arguments$getNumerics(weights, range=c(0,Inf), length=rep(nbrOfLoci, times=2)) + } + + + # Nothing to do? + if (nbrOfLoci <= 1) { + return(NA_real_) + } + + # Get the estimator function + if (!is.null(weights)) { + estimator <- sprintf("weighted %s", estimator) + estimator <- R.utils::toCamelCase(estimator) + } + estimatorFcn <- get(estimator, mode="function") + + + # Extract sample of interest + fit <- subset(fit, samplelist=sample) + + y <- fit$data[,3L] + + if (method == "diff") { + y <- diff(y) + + # Weighted estimator? + if (!is.null(weights)) { + # Calculate weights per pair + weights <- (weights[1:(nbrOfLoci-1)]+weights[2:nbrOfLoci])/2 + sigma <- estimatorFcn(y, w=weights, na.rm=na.rm)/sqrt(2) + } else { + sigma <- estimatorFcn(y, na.rm=na.rm)/sqrt(2) + } + } else if (method == "abs") { + if (!is.null(weights)) { + sigma <- estimatorFcn(y, w=weights, na.rm=na.rm) + } else { + sigma <- estimatorFcn(y, na.rm=na.rm) + } + } else if (method == "res") { + yS <- extractSegmentMeansByLocus(fit) + dy <- y - yS + if (!is.null(weights)) { + sigma <- estimatorFcn(dy, w=weights, na.rm=na.rm) + } else { + sigma <- estimatorFcn(dy, na.rm=na.rm) + } + } else { + throw("Method no implemented: ", method) + } + + sigma +}) # estimateStandardDeviation() + + +setMethodS3("extractSegmentMeansByLocus", "DNAcopy", function(fit, sample=1L, ...) { + # - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - + # Validate arguments + # - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - + # Argument 'sample': + sample <- Arguments$getIndex(sample, max=nbrOfSamples(fit)) + + # Extract sample of interest + fit <- subset(fit, samplelist=sample) + + data <- fit$data + chr <- data$chrom + x <- data$maploc + y <- data[,sample,drop=TRUE] + + segs <- fit$output + nbrOfSegments <- nrow(segs) + nbrOfLoci <- nbrOfLoci(fit) + + # Get mean estimators + estList <- getMeanEstimators(fit, "y") + avgY <- estList$y + + yS <- y + for (ss in seq_len(nbrOfSegments)) { + seg <- segs[ss,] + idxs <- which(seg$chrom == chr & seg$loc.start <= x & x <= seg$loc.end) + idxs <- Arguments$getIndices(idxs, max=nbrOfLoci) + + ySS <- y[idxs] + ok <- is.finite(ySS) + # Sanity check + ## .stop_if_not(sum(ok) == seg$num.mark) # Not dealing with ties + mu <- avgY(ySS[ok]) + yS[idxs] <- mu + } # for (ss ...) + + yS +}, private=TRUE) # extractSegmentMeansByLocus() + + + +setMethodS3("writeSegments", "DNAcopy", function(fit, samples=seq_len(nbrOfSamples(fit)), ...) { + # Argument 'samples': + samples <- Arguments$getIndices(samples, max=nbrOfSamples(fit)) + + pathnames <- c() + for (ii in samples) { + fitII <- as.CBS(fit, sample=ii) + pathnameII <- writeSegments(fitII, ...) + pathnames <- c(pathnames, pathnameII) + } # for (ii ...) + + pathnames +}) # writeSegments() diff -Nru r-cran-pscbs-0.63.0/R/drawLevels.DNAcopy.R r-cran-pscbs-0.64.0/R/drawLevels.DNAcopy.R --- r-cran-pscbs-0.63.0/R/drawLevels.DNAcopy.R 2017-06-28 14:32:39.000000000 +0000 +++ r-cran-pscbs-0.64.0/R/drawLevels.DNAcopy.R 2018-08-12 21:30:44.000000000 +0000 @@ -1,17 +1,9 @@ setMethodS3("drawLevels", "DNAcopy", function(fit, field=c("seg.mean", "tcn.mean", "dh.mean"), xScale=1, col="red", lwd=3, ...) { - field <- match.arg(field); - segments <- fit$output[,c("loc.start", "loc.end", field)]; + field <- match.arg(field) + segments <- fit$output[,c("loc.start", "loc.end", field)] apply(segments, MARGIN=1, FUN=function(seg) { - x <- c(seg[["loc.start"]], seg[["loc.end"]]); - y <- rep(seg[[field]], times=2); - lines(x=xScale*x, y=y, col=col, lwd=lwd, ...); - }); + x <- c(seg[["loc.start"]], seg[["loc.end"]]) + y <- rep(seg[[field]], times=2) + lines(x=xScale*x, y=y, col=col, lwd=lwd, ...) + }) }) - - - -############################################################################ -# HISTORY: -# 2010-07-09 -# o Created from drawLevels() for CopyNumberRegions in aroma.core. -############################################################################ diff -Nru r-cran-pscbs-0.63.0/R/exampleData.R r-cran-pscbs-0.64.0/R/exampleData.R --- r-cran-pscbs-0.63.0/R/exampleData.R 2017-06-28 14:32:39.000000000 +0000 +++ r-cran-pscbs-0.64.0/R/exampleData.R 2018-08-12 21:30:44.000000000 +0000 @@ -29,23 +29,16 @@ # Validate arguments # - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - # Argument 'name': - name <- match.arg(name); + name <- match.arg(name) - path <- system.file("data-ex", package="PSCBS", mustWork=TRUE); + path <- system.file("data-ex", package="PSCBS", mustWork=TRUE) if (name == "paired.chr01") { - filename <- "PairedPSCBS,exData,chr01.Rbin"; + filename <- "PairedPSCBS,exData,chr01.Rbin" } - pathname <- Arguments$getReadablePathname(filename, path=path); - data <- loadObject(pathname); + pathname <- Arguments$getReadablePathname(filename, path=path) + data <- loadObject(pathname) - data; + data }, protected=TRUE) - - -############################################################################ -# HISTORY: -# 2013-04-11 -# o Created. -############################################################################ diff -Nru r-cran-pscbs-0.63.0/R/findLargeGaps.R r-cran-pscbs-0.64.0/R/findLargeGaps.R --- r-cran-pscbs-0.63.0/R/findLargeGaps.R 2017-06-28 14:32:39.000000000 +0000 +++ r-cran-pscbs-0.64.0/R/findLargeGaps.R 2018-08-12 21:30:44.000000000 +0000 @@ -40,81 +40,66 @@ #*/########################################################################### setMethodS3("findLargeGaps", "default", function(chromosome=NULL, x, minLength, resolution=1L, ...) { # Argument 'x': - x <- Arguments$getNumerics(x); - nbrOfLoci <- length(x); + x <- Arguments$getNumerics(x) + nbrOfLoci <- length(x) # Argument 'chromosome': if (!is.null(chromosome)) { - disallow <- c("Inf"); - chromosome <- Arguments$getIntegers(chromosome, range=c(0,Inf), disallow=disallow, length=c(nbrOfLoci, nbrOfLoci)); + disallow <- c("Inf") + chromosome <- Arguments$getIntegers(chromosome, range=c(0,Inf), disallow=disallow, length=c(nbrOfLoci, nbrOfLoci)) } # Argument 'minLength': - minLength <- Arguments$getNumeric(minLength, range=c(0,Inf)); + minLength <- Arguments$getNumeric(minLength, range=c(0,Inf)) # Argument 'resolution': - resolution <- Arguments$getNumeric(resolution, range=c(0,Inf)); + resolution <- Arguments$getNumeric(resolution, range=c(0,Inf)) if (resolution >= minLength) { throw(sprintf("Cannot identify large gaps. Argument 'resolution' (=%g) is not strictly smaller than 'minLength' (=%g).", resolution, minLength)) } if (!is.null(chromosome)) { - allChromosomes <- sort(unique(chromosome)); - nbrOfChromosomes <- length(allChromosomes); + allChromosomes <- sort(unique(chromosome)) + nbrOfChromosomes <- length(allChromosomes) xEmpty <- vector(mode(x), length=0L) - gaps <- data.frame(chromosome=integer(0L), start=xEmpty, end=xEmpty); + gaps <- data.frame(chromosome=integer(0L), start=xEmpty, end=xEmpty) for (cc in seq_along(allChromosomes)) { - chr <- allChromosomes[cc]; - idxs <- which(chromosome == chr); - chromosomeCC <- chromosome[idxs]; - xCC <- x[idxs]; - gapsCC <- findLargeGaps(chromosome=NULL, x=xCC, minLength=minLength, ...); + chr <- allChromosomes[cc] + idxs <- which(chromosome == chr) + chromosomeCC <- chromosome[idxs] + xCC <- x[idxs] + gapsCC <- findLargeGaps(chromosome=NULL, x=xCC, minLength=minLength, ...) if (nrow(gapsCC) > 0) { - gapsCC <- cbind(chromosome=chr, gapsCC); - gaps <- rbind(gaps, gapsCC); + gapsCC <- cbind(chromosome=chr, gapsCC) + gaps <- rbind(gaps, gapsCC) } } # for (cc ...) } else { - x <- x[is.finite(x)]; - x <- sort(x); - dx <- diff(x); + x <- x[is.finite(x)] + x <- sort(x) + dx <- diff(x) - isGap <- (dx >= minLength); - idxsL <- which(isGap); + isGap <- (dx >= minLength) + idxsL <- which(isGap) ##str(list(x=x, dx=dx, isGap=isGap, idxsL=idxsL)) - xL <- x[idxsL]; - xR <- x[idxsL+1L]; + xL <- x[idxsL] + xR <- x[idxsL+1L] ##str(list(x=x, dx=dx, isGap=isGap, idxsL=idxsL, xL=xL, xR=xR)) - gaps <- data.frame(start=xL+resolution, end=xR-resolution); - gaps$length <- gaps$end - gaps$start; + gaps <- data.frame(start=xL+resolution, end=xR-resolution) + gaps$length <- gaps$end - gaps$start } ## Sanity checks - stopifnot(is.data.frame(gaps)) - stopifnot(all(gaps$start <= gaps$end)) - stopifnot(all(gaps$length >= 0)) + .stop_if_not(is.data.frame(gaps)) + .stop_if_not(all(gaps$start <= gaps$end)) + .stop_if_not(all(gaps$length >= 0)) - gaps; + gaps }) # findLargeGaps() setMethodS3("findLargeGaps", "data.frame", function(chromosome, ...) { - data <- chromosome; - findLargeGaps(chromosome=data$chromosome, x=data$x, ...); + data <- chromosome + findLargeGaps(chromosome=data$chromosome, x=data$x, ...) }) # findLargeGaps() - - - -############################################################################### -# HISTORY: -# 2015-04-25 -# o BUG FIX: findLargeGaps() could return NULL. Now it always returns -# a data.frame. -# 2012-02-22 -# o BUG FIX: findLargeGaps() did not handle missing values for -# argument 'chromosome'. -# 2011-11-22 -# o Added findLargeGaps(). -# o Created. -############################################################################### diff -Nru r-cran-pscbs-0.63.0/R/findNeutralCopyNumberState.R r-cran-pscbs-0.64.0/R/findNeutralCopyNumberState.R --- r-cran-pscbs-0.63.0/R/findNeutralCopyNumberState.R 2017-06-28 14:32:39.000000000 +0000 +++ r-cran-pscbs-0.64.0/R/findNeutralCopyNumberState.R 2018-08-12 21:30:44.000000000 +0000 @@ -35,65 +35,65 @@ # Validate arguments # - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - # Argument 'C': - C <- Arguments$getNumerics(C); - nbrOfLoci <- length(C); + C <- Arguments$getNumerics(C) + nbrOfLoci <- length(C) # Argument 'isAI': - length2 <- rep(nbrOfLoci, times=2); - isAI <- Arguments$getLogicals(isAI, length=length2, disallow=NULL); + length2 <- rep(nbrOfLoci, times=2) + isAI <- Arguments$getLogicals(isAI, length=length2, disallow=NULL) # Argument 'weights': if (!is.null(weights)) { - weights <- Arguments$getNumerics(weights, range=c(0, Inf), length=length2); + weights <- Arguments$getNumerics(weights, range=c(0, Inf), length=length2) } # Argument 'minDensity': - minDensity <- Arguments$getDouble(minDensity); + minDensity <- Arguments$getDouble(minDensity) # Argument 'flavor': - flavor <- match.arg(flavor); + flavor <- match.arg(flavor) # Argument 'verbose': - verbose <- Arguments$getVerbose(verbose); + verbose <- Arguments$getVerbose(verbose) if (verbose) { - pushState(verbose); - on.exit(popState(verbose)); + pushState(verbose) + on.exit(popState(verbose)) } - verbose && enter(verbose, "Identifying segments that are copy neutral states"); + verbose && enter(verbose, "Identifying segments that are copy neutral states") # - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - # Setup # - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - # Identify segments in allelic balance - isAB <- !isAI; + isAB <- !isAI # Identify segments that cannot be called - isNA <- (is.na(isAB) | is.na(C)); + isNA <- (is.na(isAB) | is.na(C)) # Only segments in allelic balance can be considered to be neutral - isNeutral <- isAB; + isNeutral <- isAB # Extracting segments in allelic balance - idxs <- which(isAB); - n <- length(idxs); - verbose && cat(verbose, "Number of segments in allelic balance: ", n); + idxs <- which(isAB) + n <- length(idxs) + verbose && cat(verbose, "Number of segments in allelic balance: ", n) # Special cases? if (n == 0) { # No segments are in allelic balance - verbose && exit(verbose); - return(isNeutral); + verbose && exit(verbose) + return(isNeutral) } else if (n == 1) { # Only one segment is in allelic balance. The best we can do # is to call that segment neutral. - verbose && exit(verbose); - return(isNeutral); + verbose && exit(verbose) + return(isNeutral) } else if (n < 5) { # What to do when the number of segments is really low? /HB 2010-09-09 - warning("The calling of regions in a copy-neutral state is uncertain, because there are less than five (5) regions in allelic balance: ", n); + warning("The calling of regions in a copy-neutral state is uncertain, because there are less than five (5) regions in allelic balance: ", n) } # - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - @@ -101,75 +101,75 @@ # - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - # Subset and standardize weights if (!is.null(weights)) { - weights <- weights[idxs]; - weights <- weights / sum(weights); + weights <- weights[idxs] + weights <- weights / sum(weights) } - y <- C[idxs]; - idxs <- NULL; # Not needed anymore + y <- C[idxs] + idxs <- NULL # Not needed anymore if (verbose) { - cat(verbose, "Data points:"); - df <- data.frame(C=y, weights=weights); - print(verbose, head(df)); - str(verbose, df); - df <- NULL; # Not needed anymore + cat(verbose, "Data points:") + df <- data.frame(C=y, weights=weights) + print(verbose, head(df)) + str(verbose, df) + df <- NULL # Not needed anymore } # - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - # Estimate the empirical density # - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - fit <- findPeaksAndValleys(y, weights=weights, ...); - verbose && cat(verbose, "Fit:"); + fit <- findPeaksAndValleys(y, weights=weights, ...) + verbose && cat(verbose, "Fit:") - verbose && cat(verbose, "Fit filtered by 'minDensity':"); - ok <- (fit[,"density"] > minDensity); - verbose && print(verbose, fit[ok,]); + verbose && cat(verbose, "Fit filtered by 'minDensity':") + ok <- (fit[,"density"] > minDensity) + verbose && print(verbose, fit[ok,]) # Look for peaks with enough density - isPeak <- (fit[,"type"] == "peak") & ok; - idxs <- which(isPeak); + isPeak <- (fit[,"type"] == "peak") & ok + idxs <- which(isPeak) # Sanity check - stopifnot(length(idxs) >= 1); + .stop_if_not(length(idxs) >= 1) # Extract the first peak if (flavor == "firstPeak") { - idx <- idxs[1]; + idx <- idxs[1] } else if (flavor == "maxPeak") { - idx <- idxs[which.max(fit[idxs,"density"])]; + idx <- idxs[which.max(fit[idxs,"density"])] } - neutralC <- fit[idx,"x"]; + neutralC <- fit[idx,"x"] - verbose && cat(verbose, "Neutral copy number:"); - verbose && cat(verbose, "Mode at: ", neutralC); - verbose && cat(verbose, "Mode ampliture: ", fit[idx,"density"]); + verbose && cat(verbose, "Neutral copy number:") + verbose && cat(verbose, "Mode at: ", neutralC) + verbose && cat(verbose, "Mode ampliture: ", fit[idx,"density"]) # If there is more than one peak, we should only call segments that # are not part of that other peak. if (idx+1 <= nrow(fit)) { - nextValleyC <- fit[idx+1, "x"]; + nextValleyC <- fit[idx+1, "x"] } else { - nextValleyC <- Inf; + nextValleyC <- Inf } - verbose && cat(verbose, "Upper range at: ", nextValleyC); + verbose && cat(verbose, "Upper range at: ", nextValleyC) # - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - # Call copy-neutral regions # - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - isNeutral <- isNeutral & (C < nextValleyC); + isNeutral <- isNeutral & (C < nextValleyC) # Segments with missing values cannot be called - isNeutral[isNA] <- NA; + isNeutral[isNA] <- NA - verbose && cat(verbose, "Neutral region calls:"); - verbose && summary(verbose, isNeutral); + verbose && cat(verbose, "Neutral region calls:") + verbose && summary(verbose, isNeutral) - verbose && exit(verbose); + verbose && exit(verbose) - isNeutral; + isNeutral }) # findNeutralCopyNumberState() diff -Nru r-cran-pscbs-0.63.0/R/gapsToSegments.R r-cran-pscbs-0.64.0/R/gapsToSegments.R --- r-cran-pscbs-0.63.0/R/gapsToSegments.R 2017-06-28 14:32:39.000000000 +0000 +++ r-cran-pscbs-0.64.0/R/gapsToSegments.R 2018-08-12 21:30:44.000000000 +0000 @@ -38,112 +38,93 @@ #*/########################################################################### setMethodS3("gapsToSegments", "data.frame", function(gaps, resolution=1L, minLength=0L, dropGaps=FALSE, ...) { # To please R CMD check - chromosome <- NULL; rm(list="chromosome"); + chromosome <- NULL; rm(list="chromosome") # - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - # Validate arguments # - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - # Argument 'gaps': - keys <- colnames(gaps); - stopifnot(all(is.element(c("start", "end"), keys))); - stopifnot(all(gaps$start <= gaps$end, na.rm=TRUE)); + keys <- colnames(gaps) + .stop_if_not(all(is.element(c("start", "end"), keys))) + .stop_if_not(all(gaps$start <= gaps$end, na.rm=TRUE)) hasChr <- is.element("chromosome", keys) ## Nothing more to do? if (nrow(gaps) == 0L) { - knownSegments <- data.frame(chromosome=integer(1L), start=-Inf, end=+Inf); + knownSegments <- data.frame(chromosome=integer(1L), start=-Inf, end=+Inf) if (!hasChr) knownSegments$hromosome <- NULL return(knownSegments) } # Order gaps by the genome - o <- order(gaps$chromosome, gaps$start, gaps$end); - gaps <- gaps[o,]; + o <- order(gaps$chromosome, gaps$start, gaps$end) + gaps <- gaps[o,] # For each chromosome... knownSegments <- NULL - chromosomes <- sort(unique(gaps$chromosome)); + chromosomes <- sort(unique(gaps$chromosome)) for (chr in chromosomes) { - gapsCC <- subset(gaps, chromosome == chr); - nCC <- nrow(gapsCC); + gapsCC <- subset(gaps, chromosome == chr) + nCC <- nrow(gapsCC) - starts <- gapsCC$start; - ends <- gapsCC$end; + starts <- gapsCC$start + ends <- gapsCC$end # Assert that no overlapping gaps where specified if (!all(starts[-1] >= ends[-nCC], na.rm=TRUE)) { - print(knownSegments); - throw("INTERNAL ERROR: Detected overlapping gaps on chromosome ", chr, " in argument 'gaps'."); + print(knownSegments) + throw("INTERNAL ERROR: Detected overlapping gaps on chromosome ", chr, " in argument 'gaps'.") } # All boundaries in order # (this is possible because gaps are non-overlapping) - naValue <- NA_real_; + naValue <- NA_real_ if (dropGaps) { - bps <- rep(naValue, times=2*nCC); - bps[seq(from=1, to=2*nCC, by=2)] <- starts - resolution; - bps[seq(from=2, to=2*nCC, by=2)] <- ends + resolution; - bps <- c(-Inf, bps, +Inf); - dim(bps) <- c(2L, nCC+1L); + bps <- rep(naValue, times=2*nCC) + bps[seq(from=1, to=2*nCC, by=2)] <- starts - resolution + bps[seq(from=2, to=2*nCC, by=2)] <- ends + resolution + bps <- c(-Inf, bps, +Inf) + dim(bps) <- c(2L, nCC+1L) } else { - bps <- rep(naValue, times=4*nCC); - bps[seq(from=1, to=4*nCC, by=4)] <- starts - resolution; - bps[seq(from=2, to=4*nCC, by=4)] <- starts; - bps[seq(from=3, to=4*nCC, by=4)] <- ends; - bps[seq(from=4, to=4*nCC, by=4)] <- ends + resolution; - bps <- c(-Inf, bps, +Inf); - dim(bps) <- c(2L, 2*nCC+1L); + bps <- rep(naValue, times=4*nCC) + bps[seq(from=1, to=4*nCC, by=4)] <- starts - resolution + bps[seq(from=2, to=4*nCC, by=4)] <- starts + bps[seq(from=3, to=4*nCC, by=4)] <- ends + bps[seq(from=4, to=4*nCC, by=4)] <- ends + resolution + bps <- c(-Inf, bps, +Inf) + dim(bps) <- c(2L, 2*nCC+1L) } - knownSegmentsCC <- data.frame(chromosome=chr, start=bps[1L,], end=bps[2L,]); + knownSegmentsCC <- data.frame(chromosome=chr, start=bps[1L,], end=bps[2L,]) - knownSegments <- rbind(knownSegments, knownSegmentsCC); + knownSegments <- rbind(knownSegments, knownSegmentsCC) } # for (chr ...) -# o <- with(knownSegments, order(chromosome, start, end)); -# knownSegments <- knownSegments[o,]; -# rownames(knownSegments) <- NULL; +# o <- with(knownSegments, order(chromosome, start, end)) +# knownSegments <- knownSegments[o,] +# rownames(knownSegments) <- NULL # Append segment lengths - knownSegments$length <- knownSegments$end - knownSegments$start; + knownSegments$length <- knownSegments$end - knownSegments$start # Drop too short segments - keep <- (knownSegments$length >= minLength); - knownSegments <- knownSegments[keep,]; + keep <- (knownSegments$length >= minLength) + knownSegments <- knownSegments[keep,] # - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - # Validate generated 'knownSegments' # - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - stopifnot(is.data.frame(knownSegments)) - stopifnot(nrow(knownSegments) >= 1L) + .stop_if_not(is.data.frame(knownSegments)) + .stop_if_not(nrow(knownSegments) >= 1L) for (chr in sort(unique(knownSegments$chromosome))) { - dd <- subset(knownSegments, chromosome == chr); + dd <- subset(knownSegments, chromosome == chr) # Known segments must not overlap if (!all(dd$start[-1] >= dd$end[-nrow(dd)], na.rm=TRUE)) { - throw("INTERNAL ERROR: Detected overlapping segments on chromosome ", chr, " in generated 'knownSegments'."); + throw("INTERNAL ERROR: Detected overlapping segments on chromosome ", chr, " in generated 'knownSegments'.") } } - knownSegments; + knownSegments }) # gapsToSegments() - - -############################################################################### -# HISTORY: -# 2012-09-13 -# o Added argument 'dropGaps' to gapsToSegments(). -# 2012-07-22 -# o Added argument 'minLength' to gapsToSegments(). -# 2011-12-12 -# o BUG FIX: Now gapsToSegments() gave invalid segments for chromosomes -# with more than one gap. -# o ROBUSTNESS: Now gapsToSegments() validates argument 'gaps' and -# asserts that it returns non-overlapping segments. -# 2011-11-22 -# o Made gapsToSegments() a method for 'data.frame' class. -# o Renamed gapsToKnownSegments() to gapsToSegments(). -# 2011-10-xx -# o Created. -############################################################################### diff -Nru r-cran-pscbs-0.63.0/R/installDNAcopy.R r-cran-pscbs-0.64.0/R/installDNAcopy.R --- r-cran-pscbs-0.63.0/R/installDNAcopy.R 2017-06-28 14:32:39.000000000 +0000 +++ r-cran-pscbs-0.64.0/R/installDNAcopy.R 2018-08-12 21:30:44.000000000 +0000 @@ -33,34 +33,25 @@ #*/############################################################################ setMethodS3("installDNAcopy", "default", function(..., force=FALSE) { # Argument 'force': - force <- Arguments$getLogical(force); + force <- Arguments$getLogical(force) # Package to be installed - pkgName <- "DNAcopy"; + pkgName <- "DNAcopy" # Is DNAcopy already available? if (!force && isPackageInstalled(pkgName)) { - library(pkgName, character.only=TRUE); - return(invisible()); + library(pkgName, character.only=TRUE) + return(invisible()) } # If not, install it... # To please R CMD check - biocLite <- NULL; rm(list="biocLite"); - source("http://www.bioconductor.org/biocLite.R"); - biocLite(pkgName, ...); + biocLite <- NULL; rm(list="biocLite") + source("http://www.bioconductor.org/biocLite.R") + biocLite(pkgName, ...) # ...and load it - library(pkgName, character.only=TRUE); + library(pkgName, character.only=TRUE) - return(invisible()); + return(invisible()) }) # installDNAcopy() - - -############################################################################ -# HISTORY: -# 2013-09-10 -# o Now 'R CMD check' no longer complaints about DNAcopy. -# 2011-05-31 -# o Created. -############################################################################ diff -Nru r-cran-pscbs-0.63.0/R/NonPairedPSCBS.R r-cran-pscbs-0.64.0/R/NonPairedPSCBS.R --- r-cran-pscbs-0.63.0/R/NonPairedPSCBS.R 2017-06-28 14:32:39.000000000 +0000 +++ r-cran-pscbs-0.64.0/R/NonPairedPSCBS.R 2018-08-12 21:30:44.000000000 +0000 @@ -30,52 +30,52 @@ setConstructorS3("NonPairedPSCBS", function(fit=list(), ...) { # Argument 'fit': if (!is.list(fit)) { - throw("Argument 'fit' is not a list: ", class(fit)[1]); + throw("Argument 'fit' is not a list: ", class(fit)[1]) } - extend(PSCBS(fit=fit, ...), "NonPairedPSCBS"); + extend(PSCBS(fit=fit, ...), "NonPairedPSCBS") }) setMethodS3("getLocusData", "NonPairedPSCBS", function(fit, ..., fields=c("asis", "full")) { # Argument 'fields': - fields <- match.arg(fields); + fields <- match.arg(fields) - data <- NextMethod("getLocusData", fields="asis"); + data <- NextMethod("getLocusData", fields="asis") if (fields == "full") { - names <- colnames(data); + names <- colnames(data) - data$isHet <- (data$muN == 1/2); + data$isHet <- (data$muN == 1/2) # BACKWARD COMPATIBILITY: If 'rho' does not exists, calculate # it on the fly from 'betaT'. # NOTE: This should give an error in the future. /HB 2013-10-25 if (is.null(data$rho)) { - data$rho <- 2*abs(data$betaT-1/2); - data$rho[!data$isHet] <- NA_real_; - warning("Locus-level DH signals ('rho') did not exist and were calculated from tumor BAFs ('betaT')"); + data$rho <- 2*abs(data$betaT-1/2) + data$rho[!data$isHet] <- NA_real_ + warning("Locus-level DH signals ('rho') did not exist and were calculated from tumor BAFs ('betaT')") } - data$c1 <- 1/2*(1-data$rho)*data$CT; - data$c2 <- data$CT - data$c1; + data$c1 <- 1/2*(1-data$rho)*data$CT + data$c2 <- data$CT - data$c1 - data$isSNP <- (!is.na(data$betaT) | !is.na(data$muN)); - data$type <- ifelse(data$isSNP, "SNP", "non-polymorphic locus"); + data$isSNP <- (!is.na(data$betaT) | !is.na(data$muN)) + data$type <- ifelse(data$isSNP, "SNP", "non-polymorphic locus") # Labels - data$muNx <- c("AA", "AB", "BB")[2*data$muN + 1L]; - data$isHetx <- c("AA|BB", "AB")[data$isHet + 1L]; + data$muNx <- c("AA", "AB", "BB")[2*data$muN + 1L] + data$isHetx <- c("AA|BB", "AB")[data$isHet + 1L] } - data; + data }, protected=TRUE) # getLocusData() setMethodS3("callROH", "NonPairedPSCBS", function(fit, ...) { - throw(sprintf("Cannot call ROH from '%s' data.", class(fit)[1L])); + throw(sprintf("Cannot call ROH from '%s' data.", class(fit)[1L])) }, private=TRUE) # callROH() @@ -84,63 +84,63 @@ # Validate arguments # - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - # Argument 'from': - from <- match.arg(from); + from <- match.arg(from) # Argument 'adjustFor': if (!is.null(adjustFor)) { - adjustFor <- Arguments$getCharacters(adjustFor); - adjustFor <- tolower(adjustFor); - knownValues <- c("ab", "loh", "roh"); - adjustFor <- match.arg(adjustFor, choices=knownValues, several.ok=TRUE); + adjustFor <- Arguments$getCharacters(adjustFor) + adjustFor <- tolower(adjustFor) + knownValues <- c("ab", "loh", "roh") + adjustFor <- match.arg(adjustFor, choices=knownValues, several.ok=TRUE) } # Argument 'avgTCN' & 'avgDH': - avgTCN <- match.arg(avgTCN); - avgDH <- match.arg(avgDH); + avgTCN <- match.arg(avgTCN) + avgDH <- match.arg(avgDH) # Argument 'verbose': - verbose <- Arguments$getVerbose(verbose); + verbose <- Arguments$getVerbose(verbose) if (verbose) { - pushState(verbose); - on.exit(popState(verbose)); + pushState(verbose) + on.exit(popState(verbose)) } - verbose && enter(verbose, "Updating mean level estimates"); - verbose && cat(verbose, "Adjusting for:"); - verbose && print(verbose, adjustFor); + verbose && enter(verbose, "Updating mean level estimates") + verbose && cat(verbose, "Adjusting for:") + verbose && print(verbose, adjustFor) # - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - # Setting up averaging functions # - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - if (avgTCN == "asis" || avgDH == "asis") { - est <- fit$params$meanEstimators; + est <- fit$params$meanEstimators if (avgTCN == "asis") { - avgTCN <- est$tcn; - if (is.null(avgTCN)) avgTCN <- "mean"; - avgTCN <- match.arg(avgTCN); + avgTCN <- est$tcn + if (is.null(avgTCN)) avgTCN <- "mean" + avgTCN <- match.arg(avgTCN) } if (avgDH == "asis") { - avgDH <- est$dh; - if (is.null(avgDH)) avgDH <- "mean"; - avgDH <- match.arg(avgDH); + avgDH <- est$dh + if (is.null(avgDH)) avgDH <- "mean" + avgDH <- match.arg(avgDH) } } avgList <- list( tcn = get(avgTCN, mode="function"), dh = get(avgDH, mode="function") - ); + ) # - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - # Extract the segmentation results # - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - segs <- getSegments(fit, splitters=TRUE); - segRows <- list(tcn=fit$tcnSegRows, dh=fit$dhSegRows); - nbrOfSegments <- nrow(segs); - verbose && cat(verbose, "Number of segments: ", nbrOfSegments); + segs <- getSegments(fit, splitters=TRUE) + segRows <- list(tcn=fit$tcnSegRows, dh=fit$dhSegRows) + nbrOfSegments <- nrow(segs) + verbose && cat(verbose, "Number of segments: ", nbrOfSegments) # - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - @@ -148,22 +148,22 @@ # - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - if (is.element("ab", adjustFor)) { if (!is.element("abCall", names(segs))) { - adjustFor <- setdiff(adjustFor, "ab"); - throw("Cannot adjust for AB, because they haven't been called."); + adjustFor <- setdiff(adjustFor, "ab") + throw("Cannot adjust for AB, because they haven't been called.") } } if (is.element("loh", adjustFor)) { if (!is.element("lohCall", names(segs))) { - adjustFor <- setdiff(adjustFor, "loh"); - throw("Cannot adjust for LOH, because they haven't been called."); + adjustFor <- setdiff(adjustFor, "loh") + throw("Cannot adjust for LOH, because they haven't been called.") } } if (is.element("roh", adjustFor)) { if (!is.element("rohCall", names(segs))) { - adjustFor <- setdiff(adjustFor, "roh"); - throw("Cannot adjust for ROH, because they haven't been called."); + adjustFor <- setdiff(adjustFor, "roh") + throw("Cannot adjust for ROH, because they haven't been called.") } } @@ -172,58 +172,58 @@ # Update the (TCN,DH) mean levels from locus-level data? # - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - if (from == "loci") { - data <- getLocusData(fit); - chromosome <- data$chromosome; - x <- data$x; - CT <- data$CT; - rho <- data$rho; + data <- getLocusData(fit) + chromosome <- data$chromosome + x <- data$x + CT <- data$CT + rho <- data$rho - isSplitter <- isSegmentSplitter(fit); + isSplitter <- isSegmentSplitter(fit) for (ss in seq_len(nbrOfSegments)[!isSplitter]) { - verbose && enter(verbose, sprintf("Segment %d of %d", ss, nbrOfSegments)); - seg <- segs[ss,]; - verbose && print(verbose, seg); + verbose && enter(verbose, sprintf("Segment %d of %d", ss, nbrOfSegments)) + seg <- segs[ss,] + verbose && print(verbose, seg) - chr <- seg[["chromosome"]]; - chrTag <- sprintf("chr%02d", chr); + chr <- seg[["chromosome"]] + chrTag <- sprintf("chr%02d", chr) for (what in c("tcn", "dh")) { - segRow <- segRows[[what]][ss,]; + segRow <- segRows[[what]][ss,] # (a) A splitter - nothing todo? if (!is.finite(segRow[[1]]) || !is.finite(segRow[[2]])) { - next; + next } # (b) Identify units (loci) - units <- segRow[[1]]:segRow[[2]]; + units <- segRow[[1]]:segRow[[2]] # (c) Adjust for missing values if (what == "tcn") { - value <- CT; + value <- CT } else if (what == "dh") { - value <- rho; + value <- rho } - keep <- which(!is.na(value[units])); - units <- units[keep]; + keep <- which(!is.na(value[units])) + units <- units[keep] # (d) Update mean - avgFUN <- avgList[[what]]; - gamma <- avgFUN(value[units]); + avgFUN <- avgList[[what]] + gamma <- avgFUN(value[units]) # Sanity check - stopifnot(length(units) == 0 || !is.na(gamma)); + .stop_if_not(length(units) == 0 || !is.na(gamma)) # Update the segment boundaries, estimates and counts - key <- paste(what, "Mean", sep=""); - seg[[key]] <- gamma; + key <- paste(what, "Mean", sep="") + seg[[key]] <- gamma } - verbose && print(verbose, seg); + verbose && print(verbose, seg) - segs[ss,] <- seg; + segs[ss,] <- seg - verbose && exit(verbose); + verbose && exit(verbose) } # for (ss ...) } # if (from ...) @@ -233,32 +233,32 @@ # Adjust segment means from various types of calls # - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - if (length(adjustFor) > 0) { - verbose && enter(verbose, "Adjusting segment means"); - verbose && cat(verbose, "Adjusting for:"); - verbose && print(verbose, adjustFor); + verbose && enter(verbose, "Adjusting segment means") + verbose && cat(verbose, "Adjusting for:") + verbose && print(verbose, adjustFor) if (is.element("ab", adjustFor)) { - verbose && enter(verbose, "Adjusting for AB"); - calls <- segs$abCall; - segs$dhMean[calls] <- 1/2; - verbose && exit(verbose); + verbose && enter(verbose, "Adjusting for AB") + calls <- segs$abCall + segs$dhMean[calls] <- 1/2 + verbose && exit(verbose) } if (is.element("loh", adjustFor)) { - verbose && enter(verbose, "Adjusting for LOH"); - calls <- segs$lohCall; - segs$dhMean[calls] <- 0; - verbose && exit(verbose); + verbose && enter(verbose, "Adjusting for LOH") + calls <- segs$lohCall + segs$dhMean[calls] <- 0 + verbose && exit(verbose) } if (is.element("roh", adjustFor)) { - verbose && enter(verbose, "Adjusting for ROH"); - calls <- segs$rohCall; - segs$dhMean[calls] <- NA_real_; - verbose && exit(verbose); + verbose && enter(verbose, "Adjusting for ROH") + calls <- segs$rohCall + segs$dhMean[calls] <- NA_real_ + verbose && exit(verbose) } - verbose && exit(verbose); + verbose && exit(verbose) } # if (length(adjustFor) > 0) @@ -266,25 +266,25 @@ # - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - # Update (C1,C2) mean levels # - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - verbose && enter(verbose, "Update (C1,C2) per segment"); + verbose && enter(verbose, "Update (C1,C2) per segment") # Append (C1,C2) estimates - tcn <- segs$tcnMean; - dh <- segs$dhMean; - C1 <- 1/2*(1-dh)*tcn; - C2 <- tcn - C1; - segs$c1Mean <- C1; - segs$c2Mean <- C2; - verbose && exit(verbose); + tcn <- segs$tcnMean + dh <- segs$dhMean + C1 <- 1/2*(1-dh)*tcn + C2 <- tcn - C1 + segs$c1Mean <- C1 + segs$c2Mean <- C2 + verbose && exit(verbose) # Return results - res <- fit; - res$output <- segs; - res <- setMeanEstimators(res, tcn=avgTCN, dh=avgDH); + res <- fit + res$output <- segs + res <- setMeanEstimators(res, tcn=avgTCN, dh=avgDH) - verbose && exit(verbose); + verbose && exit(verbose) - res; + res }, private=TRUE) # updateMeans() @@ -294,80 +294,80 @@ # Validate arguments # - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - # Argument 'verbose': - verbose <- Arguments$getVerbose(verbose); + verbose <- Arguments$getVerbose(verbose) if (verbose) { - pushState(verbose); - on.exit(popState(verbose)); + pushState(verbose) + on.exit(popState(verbose)) } - verbose && enter(verbose, "Resegmenting a ", class(fit)[1], " object"); + verbose && enter(verbose, "Resegmenting a ", class(fit)[1], " object") # Use the locus-level data of the PairedPSCBS object - data <- getLocusData(fit); - class(data) <- "data.frame"; - drop <- c("rho", "betaTN", "index"); - keep <- !is.element(colnames(data), drop); - data <- data[,keep]; - verbose && str(verbose, data); + data <- getLocusData(fit) + class(data) <- "data.frame" + drop <- c("rho", "betaTN", "index") + keep <- !is.element(colnames(data), drop) + data <- data[,keep] + verbose && str(verbose, data) - verbose && cat(verbose, "Number of loci: ", nrow(data)); + verbose && cat(verbose, "Number of loci: ", nrow(data)) # - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - # Setup arguments to be passed # - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - verbose && enter(verbose, "Overriding default arguments"); - segFcnName <- "segmentByPairedNonPSCBS"; - segFcn <- getMethodS3(segFcnName, "default"); + verbose && enter(verbose, "Overriding default arguments") + segFcnName <- "segmentByPairedNonPSCBS" + segFcn <- getMethodS3(segFcnName, "default") # (a) The default arguments - formals <- formals(segFcn); + formals <- formals(segFcn) - formals <- formals[!sapply(formals, FUN=is.language)]; - formals <- formals[!sapply(formals, FUN=is.name)]; - drop <- c("chromosome", "x", "w", "CT", "betaT", "betaN", "muN", "..."); - keep <- !is.element(names(formals), drop); - formals <- formals[keep]; + formals <- formals[!sapply(formals, FUN=is.language)] + formals <- formals[!sapply(formals, FUN=is.name)] + drop <- c("chromosome", "x", "w", "CT", "betaT", "betaN", "muN", "...") + keep <- !is.element(names(formals), drop) + formals <- formals[keep] # (b) The arguments used in previous fit - params <- fit$params; - keep <- is.element(names(params), names(formals)); - params <- params[keep]; + params <- fit$params + keep <- is.element(names(params), names(formals)) + params <- params[keep] # Don't trust 'tbn'! TODO. /HB 20111117 - params$tbn <- NULL; + params$tbn <- NULL # (c) The arguments in '...' - userArgs <- list(..., verbose=verbose); + userArgs <- list(..., verbose=verbose) # (d) Merge - args <- formals; - args2 <- c(params, userArgs); + args <- formals + args2 <- c(params, userArgs) for (kk in seq_along(args2)) { - value <- args2[[kk]]; + value <- args2[[kk]] if (!is.null(value)) { - key <- names(args2)[kk]; + key <- names(args2)[kk] if (!is.null(key)) { - args[[key]] <- value; + args[[key]] <- value } else { - args <- c(args, list(value)); + args <- c(args, list(value)) } } } # for (key ...) - verbose && str(verbose, args[names(args) != "verbose"]); + verbose && str(verbose, args[names(args) != "verbose"]) - verbose && enter(verbose, sprintf("Calling %s()", segFcnName)); - args <- c(list(data), args); - verbose && cat(verbose, "Arguments:"); - verbose && str(verbose, args[names(args) != "verbose"]); - verbose && exit(verbose); + verbose && enter(verbose, sprintf("Calling %s()", segFcnName)) + args <- c(list(data), args) + verbose && cat(verbose, "Arguments:") + verbose && str(verbose, args[names(args) != "verbose"]) + verbose && exit(verbose) - fit <- do.call(segFcnName, args); - verbose && exit(verbose); + fit <- do.call(segFcnName, args) + verbose && exit(verbose) - verbose && exit(verbose); + verbose && exit(verbose) - fit; + fit }, protected=TRUE) # resegment() diff -Nru r-cran-pscbs-0.63.0/R/PairedPSCBS.applyByRegion.R r-cran-pscbs-0.64.0/R/PairedPSCBS.applyByRegion.R --- r-cran-pscbs-0.63.0/R/PairedPSCBS.applyByRegion.R 2017-06-28 14:32:39.000000000 +0000 +++ r-cran-pscbs-0.64.0/R/PairedPSCBS.applyByRegion.R 2018-08-12 21:30:44.000000000 +0000 @@ -1,307 +1,292 @@ -setMethodS3("applyByRegion", "PairedPSCBS", function(fit, FUN, ..., subset=NULL, append=FALSE, verbose=FALSE) { - # - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - # Validate arguments - # - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - # Argument 'FUN': - stopifnot(is.function(FUN)); - - # Argument 'subset': - if (!is.null(subset)) { - subset <- Arguments$getIndices(subset, range=c(1, nbrOfSegments(fit))); - } - - # Argument 'verbose': - verbose <- Arguments$getVerbose(verbose); - if (verbose) { - pushState(verbose); - on.exit(popState(verbose)); - } - - - verbose && enter(verbose, "Apply function region by region"); - verbose && cat(verbose, "Segments:"); - verbose && str(verbose, subset); - - - # - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - # Extract data and estimates - # - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - data <- getLocusData(fit); - segs <- getSegments(fit); - tcnSegRows <- fit$tcnSegRows; - dhSegRows <- fit$dhSegRows; - params <- fit$params; - - # Sanity checks - if (!params$joinSegments) { - throw("Cannot applyByRegion() unless PSCNs are segmented using joinSegments=TRUE."); - } - dataRows <- tcnSegRows; - - # Sanity checks - stopifnot(all(!is.na(data$chromosome) & !is.na(data$x))); - stopifnot(length(tcnSegRows) == length(dhSegRows)); - - - # - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - # For each segment... - # - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - nbrOfSegments <- nrow(segs); - verbose && cat(verbose, "Number of segments: ", nbrOfSegments); - - # Allocate result objects? - if (append) { - dataN <- outputN <- dataRowsN <- NULL; - } - - if (is.null(subset)) { - subset <- seq_len(nbrOfSegments); - } - - for (rr in subset) { - verbose && enter(verbose, sprintf("Segment #%d of %d", rr, nbrOfSegments)); - - # Extract segment - segRR <- segs[rr,,drop=FALSE]; - - # Nothing todo? - if (is.na(segRR[["tcnId"]]) && is.na(segRR[["dhId"]])) { - verbose && cat(verbose, "A divider. Nothing to do."); - outputN <- rbind(outputN, NA); - dataRowsN <- rbind(dataRowsN, NA); - verbose && exit(verbose); - next; - } - - verbose && str(verbose, segRR, level=-20); - - # Extract data - dataRowsRR <- dataRows[rr,,drop=FALSE]; - from <- dataRowsRR[[1]]; - to <- dataRowsRR[[2]]; - ok <- (!is.na(from) & !is.na(to)); - from <- from[ok]; - to <- to[ok]; - keep <- logical(nrow(data)); - for (kk in seq_along(from)) { - keep[from[kk]:to[kk]] <- TRUE; - } - dataRowsRR <- which(keep); - verbose && printf(verbose, "Identified %d (%.2f%%) of %d data rows:\n", length(dataRowsRR), 100*length(dataRowsRR)/nrow(data), nrow(data)); - verbose && str(verbose, dataRowsRR); - dataRR <- data[dataRowsRR,,drop=FALSE]; - verbose && str(verbose, dataRR, level=-20); - - verbose && enter(verbose, "Applying function 'FUN' to segment"); - resRR <- FUN(rr, segRR, dataRR, ...); - verbose && cat(verbose, "Returned result:"); - verbose && str(verbose, resRR, level=-20); - verbose && exit(verbose); - - # Nothing to update/store? - if (!is.list(resRR)) { - verbose && cat(verbose, "Nothing more to do for this segment since nothing was returned: ", rr); - verbose && exit(verbose); - next; - } - - # Extract return data - dataRRN <- resRR$data; - segRRN <- resRR$output; - # Sanity check - stopifnot(!is.null(dataRRN)); - stopifnot(is.data.frame(dataRRN)); - stopifnot(!is.null(segRRN)); - stopifnot(is.data.frame(segRRN)); - - if (append) { - # Modified locus-level data - dataRowsRRN <- c(1L, nrow(dataRRN)); - if (!is.null(dataN)) { - dataRowsRRN <- dataRowsRRN + nrow(dataN); - } - dataN <- rbind(dataN, dataRRN); - # Sanity checks - stopifnot(nrow(dataN) == max(dataRowsN, na.rm=TRUE)); - - # Update segment table? - outputN <- rbind(outputN, segRRN); - dataRowsN <- rbind(dataRowsN, dataRowsRRN); - # Sanity check - stopifnot(nrow(outputN) == nrow(dataRowsN)); - # Sanity checks - stopifnot(nrow(dataN) == max(dataRowsN, na.rm=TRUE)); - } else { - # Modified locus-level data - verbose && enter(verbose, "Updating locus-level data"); - # Sanity check - stopifnot(dim(dataRRN) == dim(dataRR)); - stopifnot(length(dataRowsRR) == nrow(dataRRN)); - data[dataRowsRR,] <- dataRRN; - str(data[dataRowsRR,]); - verbose && exit(verbose); - - # Modified segment data - verbose && enter(verbose, "Updating segment data"); - # Sanity check - stopifnot(dim(segRRN) == dim(segRR)); - segs[rr,] <- segRRN; - verbose && exit(verbose); - } - - # Not needed anymore - dataRRN <- segRRN <- NULL; - dataRR <- segRR <- NULL; - resRR <- NULL; - - verbose && exit(verbose); - } # for (rr ...) - - if (append) { - if (!is.null(dataRowsN)) { - rownames(dataRowsN) <- NULL; - colnames(dataRowsN) <- colnames(dataRows); - dataRowsN <- as.data.frame(dataRowsN); - # Sanity checks - stopifnot(!is.null(dataN)); - stopifnot(!is.null(outputN)); - stopifnot(!is.null(dataRowsN)); - - data <- dataN; - segs <- outputN; - - # Not needed anymore - dataN <- outputN <- NULL; - } - } - - # Return result - res <- fit; # "clone" - res$data <- data; - res$output <- segs; - - # Not needed anymore - data <- segs <- NULL; - - # Update segment-to-locus index tables - if (append && !is.null(dataRowsN)) { - res$tcnSegRows <- dataRowsN; - res$dhSegRows <- dataRowsN; # Is this really the case? /HB 2011-01-17 - } - - verbose && exit(verbose); - - res; -}, private=TRUE) - - -.addC1C2WithStatitics <- function(rr, output, data, robust=TRUE, ...) { - # Calculate locus-level (C1,C2) - C <- data$CT; - rho <- data$rho; - C1 <- 1/2 * (1 - rho) * C; - C2 <- C - C1; - CC <- data.frame(C1=C1, C2=C2); - - if (robust) { - meanFcn <- function(x, ...) median(x, na.rm=TRUE); - sdFcn <- function(x, ...) mad(x, na.rm=TRUE); - } else { - meanFcn <- function(x, ...) mean(x, na.rm=TRUE); - sdFcn <- function(x, ...) sd(x, na.rm=TRUE); - } - - # Calculate region-level (C1,C2) means and std devs. - muCC <- apply(CC, MARGIN=2, FUN=meanFcn); - sigmaCC <- apply(CC, MARGIN=2, FUN=sdFcn); - rhoCC <- cor(CC[,1], CC[,2], use="pairwise.complete.obs"); - - names(muCC) <- c("c1Avg", "c2Avg"); - names(sigmaCC) <- c("c1Sd", "c2Sd"); - - # Update data - data <- cbind(data, CC); - - - # Update segment table - outputT <- c(muCC, sigmaCC, c1c2.cor=rhoCC); - outputT <- as.list(outputT); - outputT <- as.data.frame(outputT); - - output <- cbind(output, outputT); - - list(data=data, output=output); -} # .addC1C2WithStatitics() - - -.addCACBWithStatitics <- function(rr, output, data, beta=c("betaTN", "betaT"), stratifyBy=c("all", "hets", "homs"), robust=TRUE, ...) { - # Argument 'beta': - beta <- match.arg(beta); - - # Argument 'stratifyBy': - stratifyBy <- match.arg(stratifyBy); - - # Calculate locus-level (CA,CB) - C <- data$CT; - beta <- data[[beta]]; - CB <- beta * C; - CA <- C - CB; - CC <- data.frame(CA=CA, CB=CB); - - # Update data - data <- cbind(data, CC); - - - if (robust) { - meanFcn <- function(x, ...) median(x, na.rm=TRUE); - sdFcn <- function(x, ...) mad(x, na.rm=TRUE); - } else { - meanFcn <- function(x, ...) mean(x, na.rm=TRUE); - sdFcn <- function(x, ...) sd(x, na.rm=TRUE); - } - - if (stratifyBy == "hets") { - muN <- data$muN; - keep <- (muN == 1/2); - CC <- CC[keep,,drop=FALSE]; - } else if (stratifyBy == "homs") { - muN <- data$muN; - keep <- (muN == 0 | muN == 1); - CC <- CC[keep,,drop=FALSE]; - } - - # Calculate region-level (CA,CB) means and std devs. - muCC <- apply(CC, MARGIN=2, FUN=meanFcn); - sigmaCC <- apply(CC, MARGIN=2, FUN=sdFcn); - if (nrow(CC) < 3) { - rhoCC <- NA_real_; - } else { - rhoCC <- cor(CC[,1], CC[,2], use="pairwise.complete.obs"); - } - names(muCC) <- c("caAvg", "cbAvg"); - names(sigmaCC) <- c("caSd", "cbSd"); - - # Update segment table - outputT <- c(muCC, sigmaCC, cacbCor=rhoCC); - outputT <- as.list(outputT); - outputT <- as.data.frame(outputT); - - output <- cbind(output, outputT); - - list(data=data, output=output); -} # .addCACBWithStatitics() - - - -############################################################################# -# HISTORY: -# 2013-10-21 -# o Added argument 'subset' to applyByRegion() for PairedPSCBS. -# 2011-06-14 -# o Updated code to recognize new column names. -# 2011-01-27 -# o Added .addCACBWithStatitics(). -# o Added .addC1C2WithStatitics(). -# o Added applyByRegion(). -# o Created. -############################################################################# +setMethodS3("applyByRegion", "PairedPSCBS", function(fit, FUN, ..., subset=NULL, append=FALSE, verbose=FALSE) { + # - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - + # Validate arguments + # - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - + # Argument 'FUN': + .stop_if_not(is.function(FUN)) + + # Argument 'subset': + if (!is.null(subset)) { + subset <- Arguments$getIndices(subset, range=c(1, nbrOfSegments(fit))) + } + + # Argument 'verbose': + verbose <- Arguments$getVerbose(verbose) + if (verbose) { + pushState(verbose) + on.exit(popState(verbose)) + } + + + verbose && enter(verbose, "Apply function region by region") + verbose && cat(verbose, "Segments:") + verbose && str(verbose, subset) + + + # - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - + # Extract data and estimates + # - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - + data <- getLocusData(fit) + segs <- getSegments(fit) + tcnSegRows <- fit$tcnSegRows + dhSegRows <- fit$dhSegRows + params <- fit$params + + # Sanity checks + if (!params$joinSegments) { + throw("Cannot applyByRegion() unless PSCNs are segmented using joinSegments=TRUE.") + } + dataRows <- tcnSegRows + + # Sanity checks + .stop_if_not(all(!is.na(data$chromosome) & !is.na(data$x))) + .stop_if_not(length(tcnSegRows) == length(dhSegRows)) + + + # - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - + # For each segment... + # - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - + nbrOfSegments <- nrow(segs) + verbose && cat(verbose, "Number of segments: ", nbrOfSegments) + + # Allocate result objects? + if (append) { + dataN <- outputN <- dataRowsN <- NULL + } + + if (is.null(subset)) { + subset <- seq_len(nbrOfSegments) + } + + for (rr in subset) { + verbose && enter(verbose, sprintf("Segment #%d of %d", rr, nbrOfSegments)) + + # Extract segment + segRR <- segs[rr,,drop=FALSE] + + # Nothing todo? + if (is.na(segRR[["tcnId"]]) && is.na(segRR[["dhId"]])) { + verbose && cat(verbose, "A divider. Nothing to do.") + outputN <- rbind(outputN, NA) + dataRowsN <- rbind(dataRowsN, NA) + verbose && exit(verbose) + next + } + + verbose && str(verbose, segRR, level=-20) + + # Extract data + dataRowsRR <- dataRows[rr,,drop=FALSE] + from <- dataRowsRR[[1]] + to <- dataRowsRR[[2]] + ok <- (!is.na(from) & !is.na(to)) + from <- from[ok] + to <- to[ok] + keep <- logical(nrow(data)) + for (kk in seq_along(from)) { + keep[from[kk]:to[kk]] <- TRUE + } + dataRowsRR <- which(keep) + verbose && printf(verbose, "Identified %d (%.2f%%) of %d data rows:\n", length(dataRowsRR), 100*length(dataRowsRR)/nrow(data), nrow(data)) + verbose && str(verbose, dataRowsRR) + dataRR <- data[dataRowsRR,,drop=FALSE] + verbose && str(verbose, dataRR, level=-20) + + verbose && enter(verbose, "Applying function 'FUN' to segment") + resRR <- FUN(rr, segRR, dataRR, ...) + verbose && cat(verbose, "Returned result:") + verbose && str(verbose, resRR, level=-20) + verbose && exit(verbose) + + # Nothing to update/store? + if (!is.list(resRR)) { + verbose && cat(verbose, "Nothing more to do for this segment since nothing was returned: ", rr) + verbose && exit(verbose) + next + } + + # Extract return data + dataRRN <- resRR$data + segRRN <- resRR$output + # Sanity check + .stop_if_not(!is.null(dataRRN)) + .stop_if_not(is.data.frame(dataRRN)) + .stop_if_not(!is.null(segRRN)) + .stop_if_not(is.data.frame(segRRN)) + + if (append) { + # Modified locus-level data + dataRowsRRN <- c(1L, nrow(dataRRN)) + if (!is.null(dataN)) { + dataRowsRRN <- dataRowsRRN + nrow(dataN) + } + dataN <- rbind(dataN, dataRRN) + # Sanity checks + .stop_if_not(nrow(dataN) == max(dataRowsN, na.rm=TRUE)) + + # Update segment table? + outputN <- rbind(outputN, segRRN) + dataRowsN <- rbind(dataRowsN, dataRowsRRN) + # Sanity check + .stop_if_not(nrow(outputN) == nrow(dataRowsN)) + # Sanity checks + .stop_if_not(nrow(dataN) == max(dataRowsN, na.rm=TRUE)) + } else { + # Modified locus-level data + verbose && enter(verbose, "Updating locus-level data") + # Sanity check + .stop_if_not(dim(dataRRN) == dim(dataRR)) + .stop_if_not(length(dataRowsRR) == nrow(dataRRN)) + data[dataRowsRR,] <- dataRRN + str(data[dataRowsRR,]) + verbose && exit(verbose) + + # Modified segment data + verbose && enter(verbose, "Updating segment data") + # Sanity check + .stop_if_not(dim(segRRN) == dim(segRR)) + segs[rr,] <- segRRN + verbose && exit(verbose) + } + + # Not needed anymore + dataRRN <- segRRN <- NULL + dataRR <- segRR <- NULL + resRR <- NULL + + verbose && exit(verbose) + } # for (rr ...) + + if (append) { + if (!is.null(dataRowsN)) { + rownames(dataRowsN) <- NULL + colnames(dataRowsN) <- colnames(dataRows) + dataRowsN <- as.data.frame(dataRowsN) + # Sanity checks + .stop_if_not(!is.null(dataN)) + .stop_if_not(!is.null(outputN)) + .stop_if_not(!is.null(dataRowsN)) + + data <- dataN + segs <- outputN + + # Not needed anymore + dataN <- outputN <- NULL + } + } + + # Return result + res <- fit # "clone" + res$data <- data + res$output <- segs + + # Not needed anymore + data <- segs <- NULL + + # Update segment-to-locus index tables + if (append && !is.null(dataRowsN)) { + res$tcnSegRows <- dataRowsN + res$dhSegRows <- dataRowsN # Is this really the case? /HB 2011-01-17 + } + + verbose && exit(verbose) + + res +}, private=TRUE) + + +.addC1C2WithStatitics <- function(rr, output, data, robust=TRUE, ...) { + # Calculate locus-level (C1,C2) + C <- data$CT + rho <- data$rho + C1 <- 1/2 * (1 - rho) * C + C2 <- C - C1 + CC <- data.frame(C1=C1, C2=C2) + + if (robust) { + meanFcn <- function(x, ...) median(x, na.rm=TRUE) + sdFcn <- function(x, ...) mad(x, na.rm=TRUE) + } else { + meanFcn <- function(x, ...) mean(x, na.rm=TRUE) + sdFcn <- function(x, ...) sd(x, na.rm=TRUE) + } + + # Calculate region-level (C1,C2) means and std devs. + muCC <- apply(CC, MARGIN=2, FUN=meanFcn) + sigmaCC <- apply(CC, MARGIN=2, FUN=sdFcn) + rhoCC <- cor(CC[,1], CC[,2], use="pairwise.complete.obs") + + names(muCC) <- c("c1Avg", "c2Avg") + names(sigmaCC) <- c("c1Sd", "c2Sd") + + # Update data + data <- cbind(data, CC) + + + # Update segment table + outputT <- c(muCC, sigmaCC, c1c2.cor=rhoCC) + outputT <- as.list(outputT) + outputT <- as.data.frame(outputT) + + output <- cbind(output, outputT) + + list(data=data, output=output) +} # .addC1C2WithStatitics() + + +.addCACBWithStatitics <- function(rr, output, data, beta=c("betaTN", "betaT"), stratifyBy=c("all", "hets", "homs"), robust=TRUE, ...) { + # Argument 'beta': + beta <- match.arg(beta) + + # Argument 'stratifyBy': + stratifyBy <- match.arg(stratifyBy) + + # Calculate locus-level (CA,CB) + C <- data$CT + beta <- data[[beta]] + CB <- beta * C + CA <- C - CB + CC <- data.frame(CA=CA, CB=CB) + + # Update data + data <- cbind(data, CC) + + + if (robust) { + meanFcn <- function(x, ...) median(x, na.rm=TRUE) + sdFcn <- function(x, ...) mad(x, na.rm=TRUE) + } else { + meanFcn <- function(x, ...) mean(x, na.rm=TRUE) + sdFcn <- function(x, ...) sd(x, na.rm=TRUE) + } + + if (stratifyBy == "hets") { + muN <- data$muN + keep <- (muN == 1/2) + CC <- CC[keep,,drop=FALSE] + } else if (stratifyBy == "homs") { + muN <- data$muN + keep <- (muN == 0 | muN == 1) + CC <- CC[keep,,drop=FALSE] + } + + # Calculate region-level (CA,CB) means and std devs. + muCC <- apply(CC, MARGIN=2, FUN=meanFcn) + sigmaCC <- apply(CC, MARGIN=2, FUN=sdFcn) + if (nrow(CC) < 3) { + rhoCC <- NA_real_ + } else { + rhoCC <- cor(CC[,1], CC[,2], use="pairwise.complete.obs") + } + names(muCC) <- c("caAvg", "cbAvg") + names(sigmaCC) <- c("caSd", "cbSd") + + # Update segment table + outputT <- c(muCC, sigmaCC, cacbCor=rhoCC) + outputT <- as.list(outputT) + outputT <- as.data.frame(outputT) + + output <- cbind(output, outputT) + + list(data=data, output=output) +} # .addCACBWithStatitics() diff -Nru r-cran-pscbs-0.63.0/R/PairedPSCBS.BOOT.R r-cran-pscbs-0.64.0/R/PairedPSCBS.BOOT.R --- r-cran-pscbs-0.63.0/R/PairedPSCBS.BOOT.R 2017-06-28 14:32:39.000000000 +0000 +++ r-cran-pscbs-0.64.0/R/PairedPSCBS.BOOT.R 2018-08-12 21:30:44.000000000 +0000 @@ -36,7 +36,7 @@ #*/########################################################################### setMethodS3("bootstrapTCNandDHByRegion", "PairedPSCBS", function(fit, B=1000L, boot=NULL, ..., probs=c(0.025, 0.050, 0.95, 0.975), statsFcn=NULL, what=c("segment", "changepoint"), force=FALSE, verbose=FALSE, .debug=FALSE) { # Settings for sanity checks - tol <- getOption("PSCBS/sanityChecks/tolerance", 0.0005); + tol <- getOption("PSCBS/sanityChecks/tolerance", 0.0005) # - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - @@ -44,173 +44,173 @@ # - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - summarizeSamples <- function(X, statsFcn, stats=NULL, what=c("segment", "changepoint"), ..., verbose=FALSE) { # Argument 'X': - stopifnot(is.array(X)); - dim <- dim(X); - stopifnot(length(dim) == 3L); + .stop_if_not(is.array(X)) + dim <- dim(X) + .stop_if_not(length(dim) == 3L) # Argument 'statsFcn': - stopifnot(is.function(statsFcn)); - statsT <- statsFcn(1); - stopifnot(!is.null(names(statsT))); - nbrOfStats <- length(statsT); - statsNames <- names(statsT); - statsT <- NULL; # Not needed anymore + .stop_if_not(is.function(statsFcn)) + statsT <- statsFcn(1) + .stop_if_not(!is.null(names(statsT))) + nbrOfStats <- length(statsT) + statsNames <- names(statsT) + statsT <- NULL # Not needed anymore # Argument 'stats': if (!is.null(stats)) { - stopifnot(is.data.frame(stats)); + .stop_if_not(is.data.frame(stats)) } # Argument 'what': - what <- match.arg(what); - whatC <- capitalize(what); + what <- match.arg(what) + whatC <- capitalize(what) # Argument 'verbose': - verbose <- Arguments$getVerbose(verbose); + verbose <- Arguments$getVerbose(verbose) if (verbose) { - pushState(verbose); - on.exit(popState(verbose)); + pushState(verbose) + on.exit(popState(verbose)) } - dimnames <- dimnames(X); - fields <- dimnames[[3L]]; + dimnames <- dimnames(X) + fields <- dimnames[[3L]] - verbose && enter(verbose, sprintf("Summarizing bootstrapped %s (%s) data", what, paste(sQuote(fields), collapse=", "))); + verbose && enter(verbose, sprintf("Summarizing bootstrapped %s (%s) data", what, paste(sQuote(fields), collapse=", "))) # Allocate JxQxF matrix S - dim[2L] <- nbrOfStats; - dimnames[[2L]] <- statsNames; - S <- array(NA_real_, dim=dim, dimnames=dimnames); - verbose && str(verbose, S); + dim[2L] <- nbrOfStats + dimnames[[2L]] <- statsNames + S <- array(NA_real_, dim=dim, dimnames=dimnames) + verbose && str(verbose, S) for (kk in seq_along(fields)) { - field <- fields[kk]; - verbose && enter(verbose, sprintf("Field #%d ('%s') of %d", kk, field, length(fields))); + field <- fields[kk] + verbose && enter(verbose, sprintf("Field #%d ('%s') of %d", kk, field, length(fields))) - Xkk <- X[,,kk,drop=FALSE]; # An JxB matrix - dim(Xkk) <- dim(Xkk)[-3L]; + Xkk <- X[,,kk,drop=FALSE] # An JxB matrix + dim(Xkk) <- dim(Xkk)[-3L] # Sanity check - stopifnot(is.matrix(Xkk)); - stopifnot(nrow(Xkk) == dim(X)[1L]); - stopifnot(ncol(Xkk) == B); + .stop_if_not(is.matrix(Xkk)) + .stop_if_not(nrow(Xkk) == dim(X)[1L]) + .stop_if_not(ncol(Xkk) == B) for (jj in seq_len(dim(X)[1L])) { - verbose && enter(verbose, sprintf("%s #%d of %d", whatC, jj, dim(X)[1L])); - Xkkjj <- Xkk[jj,,drop=TRUE]; # A vector of length B - S[jj,,kk] <- statsFcn(Xkkjj); - verbose && exit(verbose); + verbose && enter(verbose, sprintf("%s #%d of %d", whatC, jj, dim(X)[1L])) + Xkkjj <- Xkk[jj,,drop=TRUE] # A vector of length B + S[jj,,kk] <- statsFcn(Xkkjj) + verbose && exit(verbose) } # for (jj ...) - Xkk <- NULL; # Not needed anymore + Xkk <- NULL # Not needed anymore - verbose && exit(verbose); + verbose && exit(verbose) } # for (jj ...) # Not needed anymore - X <- NULL; + X <- NULL - verbose && cat(verbose, "Bootstrap statistics"); - verbose && str(verbose, S); + verbose && cat(verbose, "Bootstrap statistics") + verbose && str(verbose, S) # Reshape JxQx4 array to Jx(4*Q) matrix - T <- wrap(S, map=list(1,NA), sep="_"); - colnames(T) <- gsub("(.*)_(.*)", "\\2_\\1", colnames(T)); + T <- wrap(S, map=list(1,NA), sep="_") + colnames(T) <- gsub("(.*)_(.*)", "\\2_\\1", colnames(T)) # Append as new columns to the summary table - stats <- cbind(stats, T); + stats <- cbind(stats, T) # Drop previously estimated values - dups <- duplicated(colnames(stats), fromLast=TRUE); + dups <- duplicated(colnames(stats), fromLast=TRUE) if (any(dups)) { - stats <- stats[,!dups, drop=FALSE]; + stats <- stats[,!dups, drop=FALSE] } # Not needed anymore - T <- dups <- NULL; + T <- dups <- NULL # - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - # Statistical sanity checks # - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - if (what == "segment" && B >= 100L) { - verbose && enter(verbose, "Statistical sanity checks (iff B >= 100)"); + verbose && enter(verbose, "Statistical sanity checks (iff B >= 100)") - stopifnot(is.array(S)); + .stop_if_not(is.array(S)) # Find extreme quantiles - probs <- dimnames(S)[[2L]]; - verbose && printf(verbose, "Available summaries: %s\n", paste(probs, collapse=", ")); - probs <- grep("%", probs, fixed=TRUE, value=TRUE); - S <- S[,probs,,drop=FALSE]; - probs <- gsub("%", "", probs, fixed=TRUE); - probs <- as.double(probs) / 100; - verbose && printf(verbose, "Available quantiles: %s\n", paste(probs, collapse=", ")); - verbose && str(verbose, S); + probs <- dimnames(S)[[2L]] + verbose && printf(verbose, "Available summaries: %s\n", paste(probs, collapse=", ")) + probs <- grep("%", probs, fixed=TRUE, value=TRUE) + S <- S[,probs,,drop=FALSE] + probs <- gsub("%", "", probs, fixed=TRUE) + probs <- as.double(probs) / 100 + verbose && printf(verbose, "Available quantiles: %s\n", paste(probs, collapse=", ")) + verbose && str(verbose, S) # Sanity check - stopifnot(all(is.finite(probs))); + .stop_if_not(all(is.finite(probs))) # Is it possible to check? if (any(probs < 0.10) && any(probs > 0.90)) { tryCatch({ - fields <- dimnames(S)[[3L]]; + fields <- dimnames(S)[[3L]] for (kk in seq_along(fields)) { - field <- fields[kk]; - verbose && enter(verbose, sprintf("Field #%d ('%s') of %d", kk, field, length(fields))); + field <- fields[kk] + verbose && enter(verbose, sprintf("Field #%d ('%s') of %d", kk, field, length(fields))) # Bootstrap statistics - Skk <- S[,,kk, drop=FALSE]; - dim(Skk) <- dim(Skk)[-3L]; + Skk <- S[,,kk, drop=FALSE] + dim(Skk) <- dim(Skk)[-3L] # Sanity checks - stopifnot(is.matrix(Skk)); + .stop_if_not(is.matrix(Skk)) - range <- Skk[,c(1L,ncol(Skk)),drop=FALSE]; + range <- Skk[,c(1L,ncol(Skk)),drop=FALSE] # Segmentation means - key <- sprintf("%sMean", field); - segMean <- segs[[key]]; + key <- sprintf("%sMean", field) + segMean <- segs[[key]] # Segmentation counts - cfield <- sprintf("%sNbrOfLoci", ifelse(field == "tcn", "tcn", "dh")); - counts <- segs[,cfield,drop=TRUE]; + cfield <- sprintf("%sNbrOfLoci", ifelse(field == "tcn", "tcn", "dh")) + counts <- segs[,cfield,drop=TRUE] if (verbose) { for (rr in seq_len(length(segMean))) { - printf(verbose, "Seg %3d. mean=%g, range=[%g,%g], n=%d\n", rr, segMean[rr], range[rr,1L], range[rr,2L], counts[rr]); + printf(verbose, "Seg %3d. mean=%g, range=[%g,%g], n=%d\n", rr, segMean[rr], range[rr,1L], range[rr,2L], counts[rr]) } # for (rr ...) } # Compare only segments with enough data points - keep <- (counts > 1L); - range <- range[keep,,drop=FALSE]; - segMean <- segMean[keep]; + keep <- (counts > 1L) + range <- range[keep,,drop=FALSE] + segMean <- segMean[keep] # Sanity checks - stopifnot(all(range[,2L] + tol >= range[,1L], na.rm=TRUE)); - stopifnot(all(segMean + tol >= range[,1L], na.rm=TRUE)); - stopifnot(all(segMean - tol <= range[,2L], na.rm=TRUE)); + .stop_if_not(all(range[,2L] + tol >= range[,1L], na.rm=TRUE)) + .stop_if_not(all(segMean + tol >= range[,1L], na.rm=TRUE)) + .stop_if_not(all(segMean - tol <= range[,2L], na.rm=TRUE)) - verbose && exit(verbose); + verbose && exit(verbose) } # for (kk ...) }, error = function(ex) { # If an error, display the data, then throw the exception - verbose && cat(verbose, "Tolerance (option 'PSCBS/sanityChecks/tolerance'): ", tol); - verbose && print(verbose, segs); - throw(ex); + verbose && cat(verbose, "Tolerance (option 'PSCBS/sanityChecks/tolerance'): ", tol) + verbose && print(verbose, segs) + throw(ex) }) } else { verbose && cat(verbose, "Skipping. Not enough quantiles: ", - paste(dimnames(S)[[2L]], collapse=", ")); + paste(dimnames(S)[[2L]], collapse=", ")) } - verbose && exit(verbose); + verbose && exit(verbose) } # if (B >= 100L) - verbose && exit(verbose); + verbose && exit(verbose) - stats; + stats } # summarizeSamples() @@ -218,120 +218,120 @@ # Validate arguments # - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - # Argument 'B': - B <- Arguments$getInteger(B, range=c(1,Inf)); + B <- Arguments$getInteger(B, range=c(1,Inf)) # Argument 'probs': - probs <- Arguments$getNumerics(probs, range=c(0,1)); + probs <- Arguments$getNumerics(probs, range=c(0,1)) # Always estimate the default quantiles - probs0 <- eval(formals(bootstrapTCNandDHByRegion.PairedPSCBS)$probs); - probs <- unique(sort(c(probs, probs0))); + probs0 <- eval(formals(bootstrapTCNandDHByRegion.PairedPSCBS)$probs, enclos = baseenv()) + probs <- unique(sort(c(probs, probs0))) # Argument 'statsFcn': if (is.null(statsFcn)) { - statsFcn <- function(x) quantile(x, probs=probs, na.rm=TRUE); + statsFcn <- function(x) quantile(x, probs=probs, na.rm=TRUE) } # Argument 'what': - what <- unique(match.arg(what, several.ok=TRUE)); + what <- unique(match.arg(what, several.ok=TRUE)) # Argument 'force': - force <- Arguments$getLogical(force); + force <- Arguments$getLogical(force) # Argument 'verbose': - verbose <- Arguments$getVerbose(verbose); + verbose <- Arguments$getVerbose(verbose) if (verbose) { - pushState(verbose); - on.exit(popState(verbose)); + pushState(verbose) + on.exit(popState(verbose)) } # Argument '.debug': - .debug <- Arguments$getLogical(.debug); + .debug <- Arguments$getLogical(.debug) - verbose && enter(verbose, "Resample (TCN,DH) signals and re-estimate summaries for ", paste(what, collapse=" & ")); + verbose && enter(verbose, "Resample (TCN,DH) signals and re-estimate summaries for ", paste(what, collapse=" & ")) # - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - # Extract existing estimates # - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - if (is.element("segment", what)) { - segs <- getSegments(fit); + segs <- getSegments(fit) } if (is.element("changepoint", what)) { - cps <- getChangePoints(fit); + cps <- getChangePoints(fit) } # - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - # Already done? # - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - stats <- statsFcn(1); - stopifnot(!is.null(names(stats))); - nbrOfStats <- length(stats); - statsNames <- names(stats); + stats <- statsFcn(1) + .stop_if_not(!is.null(names(stats))) + nbrOfStats <- length(stats) + statsNames <- names(stats) if (is.element("segment", what)) { - tcnStatsNames <- sprintf("tcn_%s", names(stats)); - dhStatsNames <- sprintf("dh_%s", names(stats)); - c1StatsNames <- sprintf("c1_%s", names(stats)); - c2StatsNames <- sprintf("c2_%s", names(stats)); - allStatsNames <- c(tcnStatsNames, dhStatsNames, c1StatsNames, c2StatsNames); - isDone <- is.element(allStatsNames, names(segs)); - names(isDone) <- allStatsNames; - verbose && cat(verbose, "Already done?"); - verbose && print(verbose, isDone); + tcnStatsNames <- sprintf("tcn_%s", names(stats)) + dhStatsNames <- sprintf("dh_%s", names(stats)) + c1StatsNames <- sprintf("c1_%s", names(stats)) + c2StatsNames <- sprintf("c2_%s", names(stats)) + allStatsNames <- c(tcnStatsNames, dhStatsNames, c1StatsNames, c2StatsNames) + isDone <- is.element(allStatsNames, names(segs)) + names(isDone) <- allStatsNames + verbose && cat(verbose, "Already done?") + verbose && print(verbose, isDone) # Not needed anymore allStatsNames <- tcnStatsNames <- dhStatsNames <- - c1StatsNames <- c2StatsNames <- NULL; + c1StatsNames <- c2StatsNames <- NULL if (!force && all(isDone)) { - verbose && cat(verbose, "Already done. Skipping."); - verbose && exit(verbose); - return(fit); + verbose && cat(verbose, "Already done. Skipping.") + verbose && exit(verbose) + return(fit) } } # The object to be returned - fitB <- fit; + fitB <- fit # - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - # Bootstrap (TCN,DH,C1,C2) segment mean levels # - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - if (is.null(boot)) { boot <- bootstrapSegmentsAndChangepoints(fit, B=B, ..., - force=force, .debug=.debug, verbose=verbose); + force=force, .debug=.debug, verbose=verbose) } else { - B <- dim(boot$segments)[2L]; + B <- dim(boot$segments)[2L] } # - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - # Summarizing segment (TCN,DH,C1,C2) mean levels # - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - if (is.element("segment", what)) { - segs <- summarizeSamples(boot$segments, statsFcn=statsFcn, stats=segs, what="segment", verbose=verbose); + segs <- summarizeSamples(boot$segments, statsFcn=statsFcn, stats=segs, what="segment", verbose=verbose) # Record statistics - fitB$output <- segs; - segs <- NULL; # Not needed anymore + fitB$output <- segs + segs <- NULL # Not needed anymore } # - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - # Summarizing change point (alpha, radius, manhattan, d1, d2) data # - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - if (is.element("changepoint", what)) { - cps <- summarizeSamples(boot$changepoints, statsFcn=statsFcn, stats=cps, what="changepoint", verbose=verbose); + cps <- summarizeSamples(boot$changepoints, statsFcn=statsFcn, stats=cps, what="changepoint", verbose=verbose) # Record statistics - fitB$changepoints <- cps; - cps <- NULL; # Not needed anymore + fitB$changepoints <- cps + cps <- NULL # Not needed anymore } # Not needed anymore - fit <- boot <- NULL; + fit <- boot <- NULL - verbose && exit(verbose); + verbose && exit(verbose) - fitB; + fitB }, private=TRUE) # bootstrapTCNandDHByRegion() @@ -350,54 +350,54 @@ # } setMethodS3("bootstrapSegmentsAndChangepoints", "PairedPSCBS", function(fit, B=1000L, by=c("betaTN", "betaT"), seed=NULL, force=FALSE, cache=FALSE, verbose=FALSE, .debug=FALSE, ...) { # Settings for sanity checks - tol <- getOption("PSCBS/sanityChecks/tolerance", 0.0005); + tol <- getOption("PSCBS/sanityChecks/tolerance", 0.0005) # - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - # Validate arguments # - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - # Argument 'B': - B <- Arguments$getInteger(B, range=c(1,Inf)); + B <- Arguments$getInteger(B, range=c(1,Inf)) # Argument 'by': - by <- match.arg(by); + by <- match.arg(by) # Argument 'seed': if (!is.null(seed)) { - seed <- Arguments$getInteger(seed); + seed <- Arguments$getInteger(seed) } # Argument '.cache': - cache <- Arguments$getLogical(cache); + cache <- Arguments$getLogical(cache) # Argument 'force': - force <- Arguments$getLogical(force); + force <- Arguments$getLogical(force) # Argument 'verbose': - verbose <- Arguments$getVerbose(verbose); + verbose <- Arguments$getVerbose(verbose) if (verbose) { - pushState(verbose); - on.exit(popState(verbose)); + pushState(verbose) + on.exit(popState(verbose)) } # Argument '.debug': - .debug <- Arguments$getLogical(.debug); + .debug <- Arguments$getLogical(.debug) - verbose && enter(verbose, "Bootstrapping (TCN,DH,C1,C2) segment mean levels"); + verbose && enter(verbose, "Bootstrapping (TCN,DH,C1,C2) segment mean levels") # - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - # Check for cached results # - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - key <- list(method="bootstrapSegmentsAndChangepoints", class=class(fit)[1L], - fit=fit, B=B, by=by, seed=seed); - dirs <- c("PSCBS", "bootstrap"); - boot <- loadCache(key=key, dirs=dirs); + fit=fit, B=B, by=by, seed=seed) + dirs <- c("PSCBS", "bootstrap") + boot <- loadCache(key=key, dirs=dirs) if (!force && !is.null(boot)) { - verbose && cat(verbose, "Found cached results. Skipping."); - verbose && exit(verbose); - return(boot); + verbose && cat(verbose, "Found cached results. Skipping.") + verbose && exit(verbose) + return(boot) } @@ -414,25 +414,25 @@ # - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - # Extract data and estimates # - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - data <- getLocusData(fit); - tcnSegRows <- fit$tcnSegRows; - dhSegRows <- fit$dhSegRows; - segs <- getSegments(fit); - params <- fit$params; + data <- getLocusData(fit) + tcnSegRows <- fit$tcnSegRows + dhSegRows <- fit$dhSegRows + segs <- getSegments(fit) + params <- fit$params # Sanity checks - stopifnot(all(!is.na(data$chromosome) & !is.na(data$x))); + .stop_if_not(all(!is.na(data$chromosome) & !is.na(data$x))) # Sanity checks if (!params$joinSegments) { - throw("Cannot bootstrap TCN and DH by segments unless PSCNs are segmented using joinSegments=TRUE."); + throw("Cannot bootstrap TCN and DH by segments unless PSCNs are segmented using joinSegments=TRUE.") } if (regexpr(",", params$flavor, fixed=TRUE) != -1L) { - throw(sprintf("Cannot bootstrap TCN and DH by segments if PSCNs are segmented using flavor=\"%s\".", params$flavor)); + throw(sprintf("Cannot bootstrap TCN and DH by segments if PSCNs are segmented using flavor=\"%s\".", params$flavor)) } # Sanity check (same as above, but just in case) - stopifnot(all(segs$tcnStart == segs$dhStart, na.rm=TRUE)); - stopifnot(all(segs$tcnEnd == segs$dhEnd, na.rm=TRUE)); + .stop_if_not(all(segs$tcnStart == segs$dhStart, na.rm=TRUE)) + .stop_if_not(all(segs$tcnEnd == segs$dhEnd, na.rm=TRUE)) @@ -440,31 +440,31 @@ # Find estimators # - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - # Get mean estimators used - estList <- getMeanEstimators(fit, c("tcn", "dh")); - avgTCN <- estList$tcn; - avgDH <- estList$dh; - estList <- NULL; # Not needed anymore + estList <- getMeanEstimators(fit, c("tcn", "dh")) + avgTCN <- estList$tcn + avgDH <- estList$dh + estList <- NULL # Not needed anymore # - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - # Get signals # - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - # Get (x,TCN,BAF) data - chromosome <- data$chromosome; - x <- data$x; - CT <- data$CT; - betaT <- data[[by]]; - muN <- data$muN; - rho <- data$rho; + chromosome <- data$chromosome + x <- data$x + CT <- data$CT + betaT <- data[[by]] + muN <- data$muN + rho <- data$rho # - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - # Classify each locus as (i) heterozygous SNP, (ii) homozygous SNP, # or (iii) non-polymorphic loci # - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - verbose && enter(verbose, "Identifying heterozygous & homozygous SNPs and non-polymorphic loci"); - nbrOfLoci <- length(CT); - verbose && cat(verbose, "Number of loci: ", nbrOfLoci); + verbose && enter(verbose, "Identifying heterozygous & homozygous SNPs and non-polymorphic loci") + nbrOfLoci <- length(CT) + verbose && cat(verbose, "Number of loci: ", nbrOfLoci) # Identify SNPs hasDH <- !is.null(rho) @@ -476,40 +476,40 @@ isHet <- (isSnp & (muN == 1/2)) } - snps <- which(isSnp); - nonSNPs <- which(!isSnp); - nbrOfSNPs <- sum(isSnp); - nbrOfNonSNPs <- sum(!isSnp); - verbose && cat(verbose, "Number of SNPs: ", nbrOfSNPs); - verbose && cat(verbose, "Number of non-SNPs: ", nbrOfNonSNPs); + snps <- which(isSnp) + nonSNPs <- which(!isSnp) + nbrOfSNPs <- sum(isSnp) + nbrOfNonSNPs <- sum(!isSnp) + verbose && cat(verbose, "Number of SNPs: ", nbrOfSNPs) + verbose && cat(verbose, "Number of non-SNPs: ", nbrOfNonSNPs) # Sanity checks - stopifnot(length(intersect(snps, nonSNPs)) == 0L); + .stop_if_not(length(intersect(snps, nonSNPs)) == 0L) # Heterozygous SNPs hets <- which(isSnp & isHet) - homs <- which(isSnp & !isHet); - nbrOfHets <- length(hets); - nbrOfHoms <- length(homs); + homs <- which(isSnp & !isHet) + nbrOfHets <- length(hets) + nbrOfHoms <- length(homs) if (!hasDH) { verbose && printf(verbose, "Number of heterozygous SNPs: %d (%.2f%%)\n", - nbrOfHets, 100*nbrOfHets/nbrOfSNPs); + nbrOfHets, 100*nbrOfHets/nbrOfSNPs) verbose && printf(verbose, "Number of homozygous SNPs: %d (%.2f%%)\n", - nbrOfHoms, 100*nbrOfHoms/nbrOfSNPs); + nbrOfHoms, 100*nbrOfHoms/nbrOfSNPs) } # Sanity checks - stopifnot(length(intersect(hets, homs)) == 0L); - stopifnot(nbrOfHets + nbrOfHoms == nbrOfSNPs); + .stop_if_not(length(intersect(hets, homs)) == 0L) + .stop_if_not(nbrOfHets + nbrOfHoms == nbrOfSNPs) # Sanity checks - stopifnot(length(isSnp) == nbrOfLoci); - stopifnot(length(isHet) == nbrOfLoci); + .stop_if_not(length(isSnp) == nbrOfLoci) + .stop_if_not(length(isHet) == nbrOfLoci) # Not needed anymore muN <- isSnp <- NULL - verbose && exit(verbose); + verbose && exit(verbose) # - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - @@ -517,147 +517,147 @@ # - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - if (is.null(rho)) { # Calculate DHs for heterozygous SNPs - rho <- 2*abs(betaT - 1/2); + rho <- 2*abs(betaT - 1/2) # DH is by definition only defined for heterozygous SNPs. # For simplicity, we set it to be NA for non-heterozygous loci. - rho[!isHet] <- NA_real_; + rho[!isHet] <- NA_real_ - data$rho <- rho; + data$rho <- rho } # Not needed anymore - betaT <- isHet <- NULL; + betaT <- isHet <- NULL # - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - # Resample (TCN,DH) within each segments # - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - nbrOfSegments <- nrow(segs); + nbrOfSegments <- nrow(segs) # Allocate JxBx4 matrix M of bootstrap means - dim <- c(nbrOfSegments, B, 4L); - dimnames <- list(NULL, NULL, c("tcn", "dh", "c1", "c2")); - M <- array(NA_real_, dim=dim, dimnames=dimnames); - verbose && str(verbose, M); + dim <- c(nbrOfSegments, B, 4L) + dimnames <- list(NULL, NULL, c("tcn", "dh", "c1", "c2")) + M <- array(NA_real_, dim=dim, dimnames=dimnames) + verbose && str(verbose, M) # Identify all loci with non-missing signals - idxsCT <- which(!is.na(CT)); - idxsRho <- which(!is.na(rho)); + idxsCT <- which(!is.na(CT)) + idxsRho <- which(!is.na(rho)) # Vectorized pre-adjustments for (field in c("tcnNbrOfLoci", "dhNbrOfLoci")) { - counts <- segs[[field]]; - counts[is.na(counts)] <- 0L; - segs[[field]] <- counts; + counts <- segs[[field]] + counts[is.na(counts)] <- 0L + segs[[field]] <- counts } - hasTcnLoci <- (is.finite(tcnSegRows[,1L]) & is.finite(tcnSegRows[,2L])); - hasDhLoci <- (is.finite(dhSegRows[,1L]) & is.finite(dhSegRows[,2L])); + hasTcnLoci <- (is.finite(tcnSegRows[,1L]) & is.finite(tcnSegRows[,2L])) + hasDhLoci <- (is.finite(dhSegRows[,1L]) & is.finite(dhSegRows[,2L])) # Identify "splitter" segments which have no data - chrs <- segs[["chromosome"]]; - tcnIds <- segs[["tcnId"]]; - dhIds <- segs[["dhId"]]; - tcnMeans <- segs[["tcnMean"]]; - dhMeans <- segs[["dhMean"]]; - isSplitter <- (is.na(chrs) & is.na(tcnIds) & is.na(dhIds)); + chrs <- segs[["chromosome"]] + tcnIds <- segs[["tcnId"]] + dhIds <- segs[["dhId"]] + tcnMeans <- segs[["tcnMean"]] + dhMeans <- segs[["dhMean"]] + isSplitter <- (is.na(chrs) & is.na(tcnIds) & is.na(dhIds)) # Get all segment indices except for "splitters" - jjs <- seq_len(nbrOfSegments); - jjs <- jjs[!isSplitter]; + jjs <- seq_len(nbrOfSegments) + jjs <- jjs[!isSplitter] for (jj in jjs) { - chr <- chrs[jj]; - tcnId <- tcnIds[jj]; - dhId <- dhIds[jj]; + chr <- chrs[jj] + tcnId <- tcnIds[jj] + dhId <- dhIds[jj] - verbose && enter(verbose, sprintf("Segment #%d (chr %d, tcnId=%d, dhId=%d) of %d", jj, chr, tcnId, dhId, nbrOfSegments)); + verbose && enter(verbose, sprintf("Segment #%d (chr %d, tcnId=%d, dhId=%d) of %d", jj, chr, tcnId, dhId, nbrOfSegments)) # Sanity check - if (.debug) stopifnot(!is.na(chr) && !is.na(tcnId) && !is.na(dhId)); + if (.debug) .stop_if_not(!is.na(chr) && !is.na(tcnId) && !is.na(dhId)) # Get the segment data - segJJ <- segs[jj,,drop=FALSE]; - verbose && print(verbose, segJJ); + segJJ <- segs[jj,,drop=FALSE] + verbose && print(verbose, segJJ) - nbrOfTCNs <- segJJ[,"tcnNbrOfLoci"]; - nbrOfDHs <- segJJ[,"dhNbrOfLoci"]; - verbose && cat(verbose, "Number of TCNs: ", nbrOfTCNs); - verbose && cat(verbose, "Number of DHs: ", nbrOfDHs); + nbrOfTCNs <- segJJ[,"tcnNbrOfLoci"] + nbrOfDHs <- segJJ[,"dhNbrOfLoci"] + verbose && cat(verbose, "Number of TCNs: ", nbrOfTCNs) + verbose && cat(verbose, "Number of DHs: ", nbrOfDHs) if (.debug) { - stopifnot(!is.na(nbrOfTCNs)); - stopifnot(!is.na(nbrOfDHs)); + .stop_if_not(!is.na(nbrOfTCNs)) + .stop_if_not(!is.na(nbrOfDHs)) } - tcnSegRowJJ <- unlist(tcnSegRows[jj,], use.names=FALSE); - dhSegRowJJ <- unlist(dhSegRows[jj,], use.names=FALSE); + tcnSegRowJJ <- unlist(tcnSegRows[jj,], use.names=FALSE) + dhSegRowJJ <- unlist(dhSegRows[jj,], use.names=FALSE) # Indices of all loci if (hasTcnLoci[jj]) { - idxsAll <- tcnSegRowJJ[1L]:tcnSegRowJJ[2L]; + idxsAll <- tcnSegRowJJ[1L]:tcnSegRowJJ[2L] } else { - idxsAll <- 0L; + idxsAll <- 0L } - verbose && str(verbose, idxsAll); - verbose && print(verbose, hpaste(idxsAll), level=-120); - verbose && str(verbose, idxsCT); - verbose && print(verbose, hpaste(idxsCT), level=-120); + verbose && str(verbose, idxsAll) + verbose && print(verbose, hpaste(idxsAll), level=-120) + verbose && str(verbose, idxsCT) + verbose && print(verbose, hpaste(idxsCT), level=-120) # Keep only loci with finite TCNs - idxsAll <- intersect(idxsAll, idxsCT); - verbose && str(verbose, idxsAll); - verbose && print(verbose, hpaste(idxsAll), level=-120); + idxsAll <- intersect(idxsAll, idxsCT) + verbose && str(verbose, idxsAll) + verbose && print(verbose, hpaste(idxsAll), level=-120) # Sanity check if (length(idxsAll) != nbrOfTCNs) { - verbose && str(verbose, setdiff(idxsCT, idxsAll)); - throw("INTERNAL ERROR: length(idxsAll) != nbrOfTCNs: ", length(idxsAll), " != ", nbrOfTCNs); + verbose && str(verbose, setdiff(idxsCT, idxsAll)) + throw("INTERNAL ERROR: length(idxsAll) != nbrOfTCNs: ", length(idxsAll), " != ", nbrOfTCNs) } # - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - # Identify loci used to calculate DH means # - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - verbose && enter(verbose, "Identify loci used to bootstrap DH means"); + verbose && enter(verbose, "Identify loci used to bootstrap DH means") if (hasDhLoci[jj]) { - idxsDH <- dhSegRowJJ[1L]:dhSegRowJJ[2L]; - idxsDH <- intersect(idxsDH, hets); + idxsDH <- dhSegRowJJ[1L]:dhSegRowJJ[2L] + idxsDH <- intersect(idxsDH, hets) # Drop missing values - idxsDH <- intersect(idxsDH, idxsRho); + idxsDH <- intersect(idxsDH, idxsRho) } else { - idxsDH <- 0L; + idxsDH <- 0L } - verbose && cat(verbose, "Heterozygous SNPs to resample for DH:"); - verbose && str(verbose, idxsDH); + verbose && cat(verbose, "Heterozygous SNPs to resample for DH:") + verbose && str(verbose, idxsDH) # Sanity check - if (.debug) stopifnot(length(idxsDH) == nbrOfDHs); + if (.debug) .stop_if_not(length(idxsDH) == nbrOfDHs) - verbose && exit(verbose); + verbose && exit(verbose) # - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - # Identify loci used to calculate TCN means # - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - verbose && enter(verbose, "Identify loci used to bootstrap TCN means"); + verbose && enter(verbose, "Identify loci used to bootstrap TCN means") # Identify SNPs and non-SNPs - idxsSNP <- intersect(snps, idxsAll); - idxsNonSNP <- setdiff(idxsAll, idxsSNP); - verbose && cat(verbose, "SNPs:"); - verbose && str(verbose, idxsSNP); - verbose && cat(verbose, "Non-polymorphic loci:"); - verbose && str(verbose, idxsNonSNP); + idxsSNP <- intersect(snps, idxsAll) + idxsNonSNP <- setdiff(idxsAll, idxsSNP) + verbose && cat(verbose, "SNPs:") + verbose && str(verbose, idxsSNP) + verbose && cat(verbose, "Non-polymorphic loci:") + verbose && str(verbose, idxsNonSNP) # Sanity check - if (.debug) stopifnot(length(idxsSNP) + length(idxsNonSNP) == length(idxsAll)); + if (.debug) .stop_if_not(length(idxsSNP) + length(idxsNonSNP) == length(idxsAll)) # Identify heterozygous and homozygous SNPs - idxsHet <- intersect(idxsSNP, hets); + idxsHet <- intersect(idxsSNP, hets) if (nbrOfHoms > 0) { idxsHom <- intersect(idxsSNP, homs) } else { @@ -666,40 +666,40 @@ } # Drop missing values - idxsNonSNP <- intersect(idxsNonSNP, idxsCT); - idxsHet <- intersect(idxsHet, idxsCT); - idxsHom <- intersect(idxsHom, idxsCT); - idxsHetNonDH <- setdiff(idxsHet, idxsDH); - - verbose && cat(verbose, "Heterozygous SNPs to resample for TCN:"); - verbose && str(verbose, idxsHet); - verbose && cat(verbose, "Homozygous SNPs to resample for TCN:"); - verbose && str(verbose, idxsHom); - verbose && cat(verbose, "Non-polymorphic loci to resample for TCN:"); - verbose && str(verbose, idxsNonSNP); - verbose && cat(verbose, "Heterozygous SNPs with non-DH to resample for TCN:"); - verbose && str(verbose, idxsHetNonDH); + idxsNonSNP <- intersect(idxsNonSNP, idxsCT) + idxsHet <- intersect(idxsHet, idxsCT) + idxsHom <- intersect(idxsHom, idxsCT) + idxsHetNonDH <- setdiff(idxsHet, idxsDH) + + verbose && cat(verbose, "Heterozygous SNPs to resample for TCN:") + verbose && str(verbose, idxsHet) + verbose && cat(verbose, "Homozygous SNPs to resample for TCN:") + verbose && str(verbose, idxsHom) + verbose && cat(verbose, "Non-polymorphic loci to resample for TCN:") + verbose && str(verbose, idxsNonSNP) + verbose && cat(verbose, "Heterozygous SNPs with non-DH to resample for TCN:") + verbose && str(verbose, idxsHetNonDH) # Note that length(idxsHetNonDH) may differ from zero in case CT is non-missing # but rho is missing, e.g. CT = sum(c(thetaA,thetaB), na.rm=TRUE) and # thetaB is missing. /HB 2010-12-01 - idxsTCN <- sort(unique(c(idxsHet, idxsHom, idxsNonSNP))); - verbose && cat(verbose, "Loci to resample for TCN:"); - verbose && str(verbose, idxsTCN); + idxsTCN <- sort(unique(c(idxsHet, idxsHom, idxsNonSNP))) + verbose && cat(verbose, "Loci to resample for TCN:") + verbose && str(verbose, idxsTCN) # Sanity check if (.debug) { - stopifnot(length(idxsHet) + length(idxsHom) + length(idxsNonSNP) == nbrOfTCNs); - stopifnot(length(intersect(idxsDH, idxsHetNonDH)) == 0L); - stopifnot(length(idxsTCN) == nbrOfTCNs); + .stop_if_not(length(idxsHet) + length(idxsHom) + length(idxsNonSNP) == nbrOfTCNs) + .stop_if_not(length(intersect(idxsDH, idxsHetNonDH)) == 0L) + .stop_if_not(length(idxsTCN) == nbrOfTCNs) } - verbose && exit(verbose); + verbose && exit(verbose) # These numbers should be preserved when the resampling verbose && printf(verbose, "Number of (#hets, #homs, #nonSNPs): (%d,%d,%d)\n", - length(idxsHet), length(idxsHom), length(idxsNonSNP)); + length(idxsHet), length(idxsHom), length(idxsNonSNP)) # - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - @@ -707,24 +707,24 @@ # - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - if (nbrOfTCNs > 0L) { # Sanity check - ys <- CT[idxsTCN]; - mu <- avgTCN(ys, na.rm=TRUE); - dMu <- (mu - tcnMeans[jj]); + ys <- CT[idxsTCN] + mu <- avgTCN(ys, na.rm=TRUE) + dMu <- (mu - tcnMeans[jj]) if (abs(dMu) > tol) { - str(list(nbrOfTCNs=nbrOfTCNs, tcnNbrOfLoci=segJJ$tcnNbrOfLoci, mu=mu, tcnMean=tcnMeans[jj], dMu=dMu, "abs(dMu)"=abs(dMu), "range(x[units])"=range(x[idxsTCN]))); - throw(sprintf("INTERNAL ERROR: Incorrectly recalculated TCN mean for Segment #%d (chr %d, tcnId=%d, dhId=%d): %g != %g", jj, chr, tcnId, dhId, mu, tcnMeans[jj])); + str(list(nbrOfTCNs=nbrOfTCNs, tcnNbrOfLoci=segJJ$tcnNbrOfLoci, mu=mu, tcnMean=tcnMeans[jj], dMu=dMu, "abs(dMu)"=abs(dMu), "range(x[units])"=range(x[idxsTCN]))) + throw(sprintf("INTERNAL ERROR: Incorrectly recalculated TCN mean for Segment #%d (chr %d, tcnId=%d, dhId=%d): %g != %g", jj, chr, tcnId, dhId, mu, tcnMeans[jj])) } } - shouldHaveDHs <- (nbrOfDHs > 0L && !is.na(dhMeans[jj])); + shouldHaveDHs <- (nbrOfDHs > 0L && !is.na(dhMeans[jj])) if (shouldHaveDHs) { # Sanity check - ys <- rho[idxsDH]; - mu <- avgDH(ys, na.rm=TRUE); - dMu <- (mu - dhMeans[jj]); + ys <- rho[idxsDH] + mu <- avgDH(ys, na.rm=TRUE) + dMu <- (mu - dhMeans[jj]) if (abs(dMu) > tol) { - str(list(nbrOfDHs=nbrOfDHs, dhNbrOfLoci=segJJ$dhNbrOfLoci, mu=mu, dhMean=dhMeans[jj], dMu=dMu, "abs(dMu)"=abs(dMu), "range(x[units])"=range(x[idxsDH]))); - throw(sprintf("INTERNAL ERROR: Incorrectly recalculated DH mean for Segment #%d (chr %d, tcnId=%d, dhId=%d): %g != %g", jj, chr, tcnId, dhId, mu, dhMeans[jj])); + str(list(nbrOfDHs=nbrOfDHs, dhNbrOfLoci=segJJ$dhNbrOfLoci, mu=mu, dhMean=dhMeans[jj], dMu=dMu, "abs(dMu)"=abs(dMu), "range(x[units])"=range(x[idxsDH]))) + throw(sprintf("INTERNAL ERROR: Incorrectly recalculated DH mean for Segment #%d (chr %d, tcnId=%d, dhId=%d): %g != %g", jj, chr, tcnId, dhId, mu, dhMeans[jj])) } } @@ -732,39 +732,39 @@ # - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - # Bootstrap while preserving (#hets, #homs, #nonSNPs) # - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - verbose && enter(verbose, "Bootstrapping while preserving (#hets, #homs, #nonSNPs)"); - verbose && cat(verbose, "Number of bootstrap samples: ", B); + verbose && enter(verbose, "Bootstrapping while preserving (#hets, #homs, #nonSNPs)") + verbose && cat(verbose, "Number of bootstrap samples: ", B) if (!shouldHaveDHs) { - idxsHetNonDH <- idxsDH; + idxsHetNonDH <- idxsDH } - nHoms <- length(idxsHom); - nNonSNPs <- length(idxsNonSNP); - nHetNonDHs <- length(idxsHetNonDH); + nHoms <- length(idxsHom) + nNonSNPs <- length(idxsNonSNP) + nHetNonDHs <- length(idxsHetNonDH) # Defaults - idxsDHBB <- NULL; + idxsDHBB <- NULL # Bootstrap B times for (bb in seq_len(B)) { # (1) Bootstrap DHs if (shouldHaveDHs) { # (a) Resample heterozygous SNPs (=> resampled DH units) - idxsDHBB <- resample(idxsDH, size=nbrOfDHs, replace=TRUE); + idxsDHBB <- resample(idxsDH, size=nbrOfDHs, replace=TRUE) # Extract signals - rhoBB <- rho[idxsDHBB]; + rhoBB <- rho[idxsDHBB] # Calculate bootstrap mean - M[jj,bb,"dh"] <- avgDH(rhoBB, na.rm=TRUE); + M[jj,bb,"dh"] <- avgDH(rhoBB, na.rm=TRUE) } # if (shouldHaveDHs) # (2) Bootstrap TCNs if (nbrOfTCNs > 0L) { # (a) Resample non-DH hets SNPs - idxsHetNonDHBB <- resample(idxsHetNonDH, size=nHetNonDHs, replace=TRUE); - idxsHetBB <- c(idxsDHBB, idxsHetNonDHBB); + idxsHetNonDHBB <- resample(idxsHetNonDH, size=nHetNonDHs, replace=TRUE) + idxsHetBB <- c(idxsDHBB, idxsHetNonDHBB) # (a) Resample homozygous SNPs if (nbrOfHoms > 0) { @@ -774,105 +774,105 @@ } # (b) Resample non-SNPs - idxsNonSNPBB <- resample(idxsNonSNP, size=nNonSNPs, replace=TRUE); + idxsNonSNPBB <- resample(idxsNonSNP, size=nNonSNPs, replace=TRUE) # (c) Resampled TCN units - idxsTCNBB <- c(idxsHetBB, idxsHomBB, idxsNonSNPBB); + idxsTCNBB <- c(idxsHetBB, idxsHomBB, idxsNonSNPBB) # Sanity check if (.debug) { - stopifnot(length(intersect(idxsDHBB, idxsHetNonDHBB)) == 0L); - stopifnot(length(intersect(idxsHetBB, idxsHomBB)) == 0L); - stopifnot(length(intersect(idxsHetBB, idxsNonSNPBB)) == 0L); - stopifnot(length(intersect(idxsHomBB, idxsNonSNPBB)) == 0L); + .stop_if_not(length(intersect(idxsDHBB, idxsHetNonDHBB)) == 0L) + .stop_if_not(length(intersect(idxsHetBB, idxsHomBB)) == 0L) + .stop_if_not(length(intersect(idxsHetBB, idxsNonSNPBB)) == 0L) + .stop_if_not(length(intersect(idxsHomBB, idxsNonSNPBB)) == 0L) } # Extract signals - CTBB <- CT[idxsTCNBB]; + CTBB <- CT[idxsTCNBB] # Calculate bootstrap mean - M[jj,bb,"tcn"] <- avgTCN(CTBB, na.rm=TRUE); + M[jj,bb,"tcn"] <- avgTCN(CTBB, na.rm=TRUE) } # if (nbrOfTCNs > 0L) } # (for bb ...) - verbose && exit(verbose); + verbose && exit(verbose) - verbose && exit(verbose); + verbose && exit(verbose) } # for (jj ...) - verbose && cat(verbose, "Bootstrapped segment mean levels"); - verbose && str(verbose, M); + verbose && cat(verbose, "Bootstrapped segment mean levels") + verbose && str(verbose, M) # Sanity check - stopifnot(all(!is.nan(M))); + .stop_if_not(all(!is.nan(M))) # - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - # Add (C1,C2) bootstrap mean levels # - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - verbose && enter(verbose, "Calculating (C1,C2) mean levels from (TCN,DH) mean levels"); - C1 <- (1-M[,,"dh", drop=FALSE]) * M[,,"tcn", drop=FALSE] / 2; - C2 <- M[,,"tcn", drop=FALSE] - C1; - M[,,"c1"] <- C1; - M[,,"c2"] <- C2; - verbose && str(verbose, M); + verbose && enter(verbose, "Calculating (C1,C2) mean levels from (TCN,DH) mean levels") + C1 <- (1-M[,,"dh", drop=FALSE]) * M[,,"tcn", drop=FALSE] / 2 + C2 <- M[,,"tcn", drop=FALSE] - C1 + M[,,"c1"] <- C1 + M[,,"c2"] <- C2 + verbose && str(verbose, M) # Sanity check - stopifnot(dim(M)[1L] == nbrOfSegments); - stopifnot(all(!is.nan(M))); + .stop_if_not(dim(M)[1L] == nbrOfSegments) + .stop_if_not(all(!is.nan(M))) # Not needed anymore - C1 <- C2 <- NULL; - verbose && exit(verbose); + C1 <- C2 <- NULL + verbose && exit(verbose) # - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - # Bootstrap polar (alpha,radius,manhattan) for change points # - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - verbose && enter(verbose, "Calculating polar (alpha,radius,manhattan) for change points"); - C <- M[,,c("c1","c2"), drop=FALSE]; + verbose && enter(verbose, "Calculating polar (alpha,radius,manhattan) for change points") + C <- M[,,c("c1","c2"), drop=FALSE] # Calculate difference # (will be empty if nbrOfSegments == 1, but that's ok/intended) - D <- C[-nbrOfSegments,,, drop=FALSE] - C[-1L,,, drop=FALSE]; + D <- C[-nbrOfSegments,,, drop=FALSE] - C[-1L,,, drop=FALSE] # Sanity check - stopifnot(dim(D)[1L] == nbrOfSegments-1L); - stopifnot(all(!is.nan(D))); - C <- NULL; # Not needed anymore + .stop_if_not(dim(D)[1L] == nbrOfSegments-1L) + .stop_if_not(all(!is.nan(D))) + C <- NULL # Not needed anymore # Allocate array - dimnames <- dimnames(D); - dimnames[[3L]] <- c("alpha", "radius", "manhattan", "d1", "d2"); - dim <- dim(D); - dim[3L] <- length(dimnames[[3L]]); - P <- array(NA_real_, dim=dim, dimnames=dimnames); - stopifnot(dim(P)[1L] == nbrOfSegments-1L); + dimnames <- dimnames(D) + dimnames[[3L]] <- c("alpha", "radius", "manhattan", "d1", "d2") + dim <- dim(D) + dim[3L] <- length(dimnames[[3L]]) + P <- array(NA_real_, dim=dim, dimnames=dimnames) + .stop_if_not(dim(P)[1L] == nbrOfSegments-1L) if (nbrOfSegments >= 2L) { - verbose && str(verbose, D); - P[,,"alpha"] <- atan2(D[,,2], D[,,1]); # Changepoint angles in (0,2*pi) - P[,,"radius"] <- sqrt(D[,,2]^2 + D[,,1]^2); - P[,,"manhattan"] <- abs(D[,,2]) + abs(D[,,1]); - P[,,"d1"] <- D[,,1]; - P[,,"d2"] <- D[,,2]; - } - alpha <- D <- NULL; # Not needed anymore - verbose && cat(verbose, "Bootstrapped change points"); - verbose && str(verbose, P); + verbose && str(verbose, D) + P[,,"alpha"] <- atan2(D[,,2], D[,,1]) # Changepoint angles in (0,2*pi) + P[,,"radius"] <- sqrt(D[,,2]^2 + D[,,1]^2) + P[,,"manhattan"] <- abs(D[,,2]) + abs(D[,,1]) + P[,,"d1"] <- D[,,1] + P[,,"d2"] <- D[,,2] + } + alpha <- D <- NULL # Not needed anymore + verbose && cat(verbose, "Bootstrapped change points") + verbose && str(verbose, P) # Sanity check - stopifnot(dim(P)[1L] == nbrOfSegments-1L); - stopifnot(all(!is.nan(P))); - verbose && exit(verbose); + .stop_if_not(dim(P)[1L] == nbrOfSegments-1L) + .stop_if_not(all(!is.nan(P))) + verbose && exit(verbose) - boot <- list(segments=M, changepoints=P); + boot <- list(segments=M, changepoints=P) # Cache? if (cache) { - saveCache(boot, key=key, dirs=dirs); - verbose && cat(verbose, "Saved results to cache."); + saveCache(boot, key=key, dirs=dirs) + verbose && cat(verbose, "Saved results to cache.") } - verbose && exit(verbose); + verbose && exit(verbose) - boot; + boot }, private=TRUE) # bootstrapSegmentsAndChangepoints() @@ -882,21 +882,21 @@ # Validate arguments # - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - # Argument 'what': - what <- match.arg(what); + what <- match.arg(what) if (what == "segment") { - data <- getSegments(fit); + data <- getSegments(fit) } else if (what == "changepoint") { - data <- getChangePoints(fit); + data <- getChangePoints(fit) } - grep("^[^_]+_[^_]+$", colnames(data), value=TRUE); + grep("^[^_]+_[^_]+$", colnames(data), value=TRUE) }, protected=TRUE) # findBootstrapSummaries() setMethodS3("hasBootstrapSummaries", "PairedPSCBS", function(fit, ...) { - fields <- findBootstrapSummaries(fit, ...); - (length(fields) > 0L); + fields <- findBootstrapSummaries(fit, ...) + (length(fields) > 0L) }) setMethodS3("clearBootstrapSummaries", "PairedPSCBS", function(fit, what=c("segment", "changepoint"), ..., verbose=FALSE) { @@ -904,43 +904,43 @@ # Validate arguments # - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - # Argument 'what': - what <- unique(match.arg(what, several.ok=TRUE)); + what <- unique(match.arg(what, several.ok=TRUE)) # Argument 'verbose': - verbose <- Arguments$getVerbose(verbose); + verbose <- Arguments$getVerbose(verbose) if (verbose) { - pushState(verbose); - on.exit(popState(verbose)); + pushState(verbose) + on.exit(popState(verbose)) } - verbose && enter(verbose, "Clearing bootstrap summaries"); + verbose && enter(verbose, "Clearing bootstrap summaries") - whats <- what; + whats <- what for (what in whats) { - verbose && enter(verbose, sprintf("Clearing %ss", what)); + verbose && enter(verbose, sprintf("Clearing %ss", what)) - fields <- findBootstrapSummaries(fit, what=what, ...); + fields <- findBootstrapSummaries(fit, what=what, ...) if (what == "segment") { - data <- getSegments(fit); - data <- data[,!is.element(colnames(data), fields)]; - fit$output <- data; + data <- getSegments(fit) + data <- data[,!is.element(colnames(data), fields)] + fit$output <- data } else if (what == "changepoint") { - data <- getChangePoints(fit); - data <- data[,!is.element(colnames(data), fields)]; - fit$changepoints <- data; + data <- getChangePoints(fit) + data <- data[,!is.element(colnames(data), fields)] + fit$changepoints <- data } # Sanity check - fields <- findBootstrapSummaries(fit, what=what, ...); - stopifnot(length(fields) == 0L); + fields <- findBootstrapSummaries(fit, what=what, ...) + .stop_if_not(length(fields) == 0L) - data <- fields <- NULL; # Not needed anymoew + data <- fields <- NULL # Not needed anymoew - verbose && exit(verbose); + verbose && exit(verbose) } # for (what ...) - verbose && exit(verbose); + verbose && exit(verbose) - fit; + fit }, protected=TRUE) # clearBootstrapSummaries() diff -Nru r-cran-pscbs-0.63.0/R/PairedPSCBS.BOOT.sets.R r-cran-pscbs-0.64.0/R/PairedPSCBS.BOOT.sets.R --- r-cran-pscbs-0.63.0/R/PairedPSCBS.BOOT.sets.R 2017-06-28 14:32:39.000000000 +0000 +++ r-cran-pscbs-0.64.0/R/PairedPSCBS.BOOT.sets.R 2018-08-12 21:30:44.000000000 +0000 @@ -42,29 +42,29 @@ # Validate arguments # - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - # Argument 'B': - B <- Arguments$getInteger(B, range=c(0,Inf)); + B <- Arguments$getInteger(B, range=c(0,Inf)) # Argument 'by': - by <- match.arg(by); + by <- match.arg(by) # Argument 'seed': if (!is.null(seed)) { - seed <- Arguments$getInteger(seed); + seed <- Arguments$getInteger(seed) } # Argument '.validate': - .validate <- Arguments$getLogical(.validate); + .validate <- Arguments$getLogical(.validate) # Argument 'verbose': - verbose <- Arguments$getVerbose(verbose); + verbose <- Arguments$getVerbose(verbose) if (verbose) { - pushState(verbose); - on.exit(popState(verbose)); + pushState(verbose) + on.exit(popState(verbose)) } - verbose && enter(verbose, "Bootstrapping (TCN,DH,C1,C2) segment mean levels"); + verbose && enter(verbose, "Bootstrapping (TCN,DH,C1,C2) segment mean levels") # - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - # Set the random seed @@ -79,41 +79,41 @@ # - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - # Extract data and estimates # - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - data <- getLocusData(fit); - tcnSegRows <- fit$tcnSegRows; - dhSegRows <- fit$dhSegRows; - segs <- getSegments(fit); - params <- fit$params; + data <- getLocusData(fit) + tcnSegRows <- fit$tcnSegRows + dhSegRows <- fit$dhSegRows + segs <- getSegments(fit) + params <- fit$params # Sanity checks if (!params$joinSegments) { - throw("Cannot bootstrap TCN and DH by segments unless PSCNs are segmented using joinSegments=TRUE."); + throw("Cannot bootstrap TCN and DH by segments unless PSCNs are segmented using joinSegments=TRUE.") } if (regexpr(",", params$flavor, fixed=TRUE) != -1L) { - throw(sprintf("Cannot bootstrap TCN and DH by segments if PSCNs are segmented using flavor=\"%s\".", params$flavor)); + throw(sprintf("Cannot bootstrap TCN and DH by segments if PSCNs are segmented using flavor=\"%s\".", params$flavor)) } # Sanity check (same as above, but just in case) - stopifnot(all(segs$tcnStart == segs$dhStart, na.rm=TRUE)); - stopifnot(all(segs$tcnEnd == segs$dhEnd, na.rm=TRUE)); + .stop_if_not(all(segs$tcnStart == segs$dhStart, na.rm=TRUE)) + .stop_if_not(all(segs$tcnEnd == segs$dhEnd, na.rm=TRUE)) # - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - # Get signals # - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - # Get (x,TCN,BAF) data - chromosome <- data$chromosome; - x <- data$x; - CT <- data$CT; - betaT <- data[[by]]; - muN <- data$muN; + chromosome <- data$chromosome + x <- data$x + CT <- data$CT + betaT <- data[[by]] + muN <- data$muN rho <- data$rho hasDH <- !is.null(rho) # Not needed anymore - data <- NULL; + data <- NULL # Sanity checks - stopifnot(all(!is.na(chromosome) & !is.na(x))); + .stop_if_not(all(!is.na(chromosome) & !is.na(x))) @@ -121,9 +121,9 @@ # Classify each locus as (i) heterozygous SNP, (ii) homozygous SNP, # or (iii) non-polymorphic loci # - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - verbose && enter(verbose, "Identifying heterozygous & homozygous SNPs and non-polymorphic loci"); - nbrOfLoci <- length(x); - verbose && cat(verbose, "Number of loci: ", nbrOfLoci); + verbose && enter(verbose, "Identifying heterozygous & homozygous SNPs and non-polymorphic loci") + nbrOfLoci <- length(x) + verbose && cat(verbose, "Number of loci: ", nbrOfLoci) # SNPs are identifies as those loci that have non-missing 'muN' (& betaTN') if (hasDH) { @@ -133,39 +133,39 @@ isSnp <- (!is.na(muN) & !is.na(betaT)) isHet <- isSnp & (muN == 1/2) } - snps <- which(isSnp); - nonSNPs <- which(!isSnp); - nbrOfSNPs <- sum(isSnp); - nbrOfNonSNPs <- sum(!isSnp); - verbose && cat(verbose, "Number of SNPs: ", nbrOfSNPs); - verbose && cat(verbose, "Number of non-SNPs: ", nbrOfNonSNPs); + snps <- which(isSnp) + nonSNPs <- which(!isSnp) + nbrOfSNPs <- sum(isSnp) + nbrOfNonSNPs <- sum(!isSnp) + verbose && cat(verbose, "Number of SNPs: ", nbrOfSNPs) + verbose && cat(verbose, "Number of non-SNPs: ", nbrOfNonSNPs) # Sanity checks - stopifnot(length(intersect(snps, nonSNPs)) == 0L); + .stop_if_not(length(intersect(snps, nonSNPs)) == 0L) # Heterozygous SNPs - hets <- which(isSnp & isHet); - homs <- which(isSnp & !isHet); - nbrOfHets <- length(hets); - nbrOfHoms <- length(homs); + hets <- which(isSnp & isHet) + homs <- which(isSnp & !isHet) + nbrOfHets <- length(hets) + nbrOfHoms <- length(homs) if (!hasDH) { verbose && printf(verbose, "Number of heterozygous SNPs: %d (%.2f%%)\n", - nbrOfHets, 100*nbrOfHets/nbrOfSNPs); + nbrOfHets, 100*nbrOfHets/nbrOfSNPs) verbose && printf(verbose, "Number of homozygous SNPs: %d (%.2f%%)\n", - nbrOfHoms, 100*nbrOfHoms/nbrOfSNPs); + nbrOfHoms, 100*nbrOfHoms/nbrOfSNPs) } # Sanity checks - stopifnot(length(intersect(hets, homs)) == 0L); - stopifnot(nbrOfHets + nbrOfHoms == nbrOfSNPs); + .stop_if_not(length(intersect(hets, homs)) == 0L) + .stop_if_not(nbrOfHets + nbrOfHoms == nbrOfSNPs) # Sanity checks - stopifnot(length(isSnp) == nbrOfLoci); - stopifnot(length(isHet) == nbrOfLoci); + .stop_if_not(length(isSnp) == nbrOfLoci) + .stop_if_not(length(isHet) == nbrOfLoci) # Not needed anymore - muN <- isSnp <- NULL; - verbose && exit(verbose); + muN <- isSnp <- NULL + verbose && exit(verbose) # - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - @@ -173,55 +173,55 @@ # - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - if (!hasDH) { # Calculate DHs for heterozygous SNPs - rho <- 2*abs(betaT - 1/2); + rho <- 2*abs(betaT - 1/2) # DH is by definition only defined for heterozygous SNPs. # For simplicity, we set it to be NA for non-heterozygous loci. - rho[!isHet] <- NA; + rho[!isHet] <- NA } # Not needed anymore - betaT <- isHet <- NULL; + betaT <- isHet <- NULL # - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - # Resample (TCN,DH) within each segments # - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - nbrOfSegments <- nrow(segs); + nbrOfSegments <- nrow(segs) # Allocate JxBx4 matrix M of bootstrap means - dim <- c(nbrOfSegments, B, 4L); - dimnames <- list(NULL, NULL, c("tcn", "dh", "c1", "c2")); + dim <- c(nbrOfSegments, B, 4L) + dimnames <- list(NULL, NULL, c("tcn", "dh", "c1", "c2")) # Identify all loci with non-missing signals - idxsCT <- which(!is.na(CT)); - idxsRho <- which(!is.na(rho)); + idxsCT <- which(!is.na(CT)) + idxsRho <- which(!is.na(rho)) # Not needed anymore - CT <- rho <- NULL; + CT <- rho <- NULL # Vectorized pre-adjustments for (key in c("tcnNbrOfLoci", "dhNbrOfLoci")) { - counts <- segs[[key]]; - counts[is.na(counts)] <- 0L; - segs[[key]] <- counts; - counts <- NULL; # Not needed anymore + counts <- segs[[key]] + counts[is.na(counts)] <- 0L + segs[[key]] <- counts + counts <- NULL # Not needed anymore } - hasTcnLoci <- (is.finite(tcnSegRows[,1L]) & is.finite(tcnSegRows[,2L])); - hasDhLoci <- (is.finite(dhSegRows[,1L]) & is.finite(dhSegRows[,2L])); + hasTcnLoci <- (is.finite(tcnSegRows[,1L]) & is.finite(tcnSegRows[,2L])) + hasDhLoci <- (is.finite(dhSegRows[,1L]) & is.finite(dhSegRows[,2L])) # Identify "splitter" segments which have no data - chrs <- segs[["chromosome"]]; - tcnIds <- segs[["tcnId"]]; - dhIds <- segs[["dhId"]]; - dhMeans <- segs[["dhMean"]]; - isSplitter <- (is.na(chrs) & is.na(tcnIds) & is.na(dhIds)); + chrs <- segs[["chromosome"]] + tcnIds <- segs[["tcnId"]] + dhIds <- segs[["dhId"]] + dhMeans <- segs[["dhMean"]] + isSplitter <- (is.na(chrs) & is.na(tcnIds) & is.na(dhIds)) # Get all segment indices except for "splitters" - jjs <- seq_len(nbrOfSegments); - jjs <- jjs[!isSplitter]; + jjs <- seq_len(nbrOfSegments) + jjs <- jjs[!isSplitter] # Allocate list to hold the results @@ -236,171 +236,171 @@ ), by = by, seed = seed - ); - locusSet <- vector("list", length=nbrOfSegments); + ) + locusSet <- vector("list", length=nbrOfSegments) # For each segment jj = 1, 2, ..., S for (jj in jjs) { - chr <- chrs[jj]; - tcnId <- tcnIds[jj]; - dhId <- dhIds[jj]; + chr <- chrs[jj] + tcnId <- tcnIds[jj] + dhId <- dhIds[jj] - verbose && enter(verbose, sprintf("Segment #%d (chr %d, tcnId=%d, dhId=%d) of %d", jj, chr, tcnId, dhId, nbrOfSegments)); + verbose && enter(verbose, sprintf("Segment #%d (chr %d, tcnId=%d, dhId=%d) of %d", jj, chr, tcnId, dhId, nbrOfSegments)) # Sanity check if (.validate) { - stopifnot(!is.na(chr) && !is.na(tcnId) && !is.na(dhId)); + .stop_if_not(!is.na(chr) && !is.na(tcnId) && !is.na(dhId)) } # Get the segment data - segJJ <- segs[jj,,drop=FALSE]; - nbrOfTCNs <- segJJ[,"tcnNbrOfLoci"]; - nbrOfDHs <- segJJ[,"dhNbrOfLoci"]; + segJJ <- segs[jj,,drop=FALSE] + nbrOfTCNs <- segJJ[,"tcnNbrOfLoci"] + nbrOfDHs <- segJJ[,"dhNbrOfLoci"] if (verbose) { - print(verbose, segJJ); - cat(verbose, "Number of TCNs: ", nbrOfTCNs); - cat(verbose, "Number of DHs: ", nbrOfDHs); + print(verbose, segJJ) + cat(verbose, "Number of TCNs: ", nbrOfTCNs) + cat(verbose, "Number of DHs: ", nbrOfDHs) } if (.validate) { - stopifnot(!is.na(nbrOfTCNs)); - stopifnot(!is.na(nbrOfDHs)); + .stop_if_not(!is.na(nbrOfTCNs)) + .stop_if_not(!is.na(nbrOfDHs)) } - tcnSegRowJJ <- unlist(tcnSegRows[jj,], use.names=FALSE); - dhSegRowJJ <- unlist(dhSegRows[jj,], use.names=FALSE); + tcnSegRowJJ <- unlist(tcnSegRows[jj,], use.names=FALSE) + dhSegRowJJ <- unlist(dhSegRows[jj,], use.names=FALSE) # Indices of all loci if (hasTcnLoci[jj]) { - idxsAll <- tcnSegRowJJ[1L]:tcnSegRowJJ[2L]; + idxsAll <- tcnSegRowJJ[1L]:tcnSegRowJJ[2L] } else { - idxsAll <- integer(0L); + idxsAll <- integer(0L) } if (verbose) { - str(verbose, idxsAll); - print(verbose, hpaste(idxsAll), level=-120); - str(verbose, idxsCT); - print(verbose, hpaste(idxsCT), level=-120); + str(verbose, idxsAll) + print(verbose, hpaste(idxsAll), level=-120) + str(verbose, idxsCT) + print(verbose, hpaste(idxsCT), level=-120) } # Keep only loci with finite TCNs - idxsAll <- intersect(idxsAll, idxsCT); + idxsAll <- intersect(idxsAll, idxsCT) if (verbose) { - str(verbose, idxsAll); - print(verbose, hpaste(idxsAll), level=-120); + str(verbose, idxsAll) + print(verbose, hpaste(idxsAll), level=-120) } # Sanity check if (length(idxsAll) != nbrOfTCNs) { - verbose && str(verbose, setdiff(idxsCT, idxsAll)); - throw("INTERNAL ERROR: length(idxsAll) != nbrOfTCNs: ", length(idxsAll), " != ", nbrOfTCNs); + verbose && str(verbose, setdiff(idxsCT, idxsAll)) + throw("INTERNAL ERROR: length(idxsAll) != nbrOfTCNs: ", length(idxsAll), " != ", nbrOfTCNs) } # - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - # Identify loci used to calculate DH means # - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - verbose && enter(verbose, "Identify loci used to bootstrap DH means"); + verbose && enter(verbose, "Identify loci used to bootstrap DH means") if (hasDhLoci[jj]) { - idxsDH <- dhSegRowJJ[1L]:dhSegRowJJ[2L]; - idxsDH <- intersect(idxsDH, hets); + idxsDH <- dhSegRowJJ[1L]:dhSegRowJJ[2L] + idxsDH <- intersect(idxsDH, hets) # Drop missing values - idxsDH <- intersect(idxsDH, idxsRho); + idxsDH <- intersect(idxsDH, idxsRho) } else { - idxsDH <- integer(0L); + idxsDH <- integer(0L) } - verbose && cat(verbose, "Heterozygous SNPs to resample for DH:"); - verbose && str(verbose, idxsDH); + verbose && cat(verbose, "Heterozygous SNPs to resample for DH:") + verbose && str(verbose, idxsDH) # Sanity check - if (.validate) stopifnot(length(idxsDH) == nbrOfDHs); + if (.validate) .stop_if_not(length(idxsDH) == nbrOfDHs) - verbose && exit(verbose); + verbose && exit(verbose) # - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - # Identify loci used to calculate TCN means # - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - verbose && enter(verbose, "Identify loci used to bootstrap TCN means"); + verbose && enter(verbose, "Identify loci used to bootstrap TCN means") # Identify SNPs and non-SNPs - idxsSNP <- intersect(snps, idxsAll); - idxsNonSNP <- setdiff(idxsAll, idxsSNP); + idxsSNP <- intersect(snps, idxsAll) + idxsNonSNP <- setdiff(idxsAll, idxsSNP) if (verbose) { - cat(verbose, "SNPs:"); - str(verbose, idxsSNP); - cat(verbose, "Non-polymorphic loci:"); - str(verbose, idxsNonSNP); + cat(verbose, "SNPs:") + str(verbose, idxsSNP) + cat(verbose, "Non-polymorphic loci:") + str(verbose, idxsNonSNP) } # Sanity check - if (.validate) stopifnot(length(idxsSNP) + length(idxsNonSNP) == length(idxsAll)); + if (.validate) .stop_if_not(length(idxsSNP) + length(idxsNonSNP) == length(idxsAll)) # Identify heterozygous and homozygous SNPs - idxsHet <- intersect(idxsSNP, hets); + idxsHet <- intersect(idxsSNP, hets) if (nbrOfHoms > 0) { - idxsHom <- intersect(idxsSNP, homs); + idxsHom <- intersect(idxsSNP, homs) } else { idxsHom <- integer(0L) } # Drop missing values - idxsNonSNP <- intersect(idxsNonSNP, idxsCT); - idxsHet <- intersect(idxsHet, idxsCT); + idxsNonSNP <- intersect(idxsNonSNP, idxsCT) + idxsHet <- intersect(idxsHet, idxsCT) if (nbrOfHoms > 0) { idxsHom <- intersect(idxsHom, idxsCT) } - idxsHetNonDH <- setdiff(idxsHet, idxsDH); + idxsHetNonDH <- setdiff(idxsHet, idxsDH) if (verbose) { - cat(verbose, "Heterozygous SNPs to resample for TCN:"); - str(verbose, idxsHet); - cat(verbose, "Homozygous SNPs to resample for TCN:"); - str(verbose, idxsHom); - cat(verbose, "Non-polymorphic loci to resample for TCN:"); - str(verbose, idxsNonSNP); - cat(verbose, "Heterozygous SNPs with non-DH to resample for TCN:"); - str(verbose, idxsHetNonDH); + cat(verbose, "Heterozygous SNPs to resample for TCN:") + str(verbose, idxsHet) + cat(verbose, "Homozygous SNPs to resample for TCN:") + str(verbose, idxsHom) + cat(verbose, "Non-polymorphic loci to resample for TCN:") + str(verbose, idxsNonSNP) + cat(verbose, "Heterozygous SNPs with non-DH to resample for TCN:") + str(verbose, idxsHetNonDH) } # Note that length(idxsHetNonDH) may differ from zero in case CT is non-missing # but rho is missing, e.g. CT = sum(c(thetaA,thetaB), na.rm=TRUE) and # thetaB is missing. /HB 2010-12-01 - idxsTCN <- sort(unique(c(idxsHet, idxsHom, idxsNonSNP))); + idxsTCN <- sort(unique(c(idxsHet, idxsHom, idxsNonSNP))) if (verbose) { - cat(verbose, "Loci to resample for TCN:"); - str(verbose, idxsTCN); + cat(verbose, "Loci to resample for TCN:") + str(verbose, idxsTCN) } # Sanity check if (.validate) { - stopifnot(length(idxsHet) + length(idxsHom) + length(idxsNonSNP) == nbrOfTCNs); - stopifnot(length(intersect(idxsDH, idxsHetNonDH)) == 0L); - stopifnot(length(idxsTCN) == nbrOfTCNs); + .stop_if_not(length(idxsHet) + length(idxsHom) + length(idxsNonSNP) == nbrOfTCNs) + .stop_if_not(length(intersect(idxsDH, idxsHetNonDH)) == 0L) + .stop_if_not(length(idxsTCN) == nbrOfTCNs) } - verbose && exit(verbose); + verbose && exit(verbose) # These numbers should be preserved when the resampling verbose && printf(verbose, "Number of (#hets, #homs, #nonSNPs): (%d,%d,%d)\n", - length(idxsHet), length(idxsHom), length(idxsNonSNP)); + length(idxsHet), length(idxsHom), length(idxsNonSNP)) # Workaround: ... Why? /HB 2013-10-22 - shouldHaveDHs <- (nbrOfDHs > 0L && !is.na(dhMeans[jj])); + shouldHaveDHs <- (nbrOfDHs > 0L && !is.na(dhMeans[jj])) if (!shouldHaveDHs) { - idxsHetNonDH <- idxsDH; - stopifnot(all(idxsHetNonDH > 0L)); + idxsHetNonDH <- idxsDH + .stop_if_not(all(idxsHetNonDH > 0L)) } - shouldHaveDHs <- NULL; # Not needed anymore + shouldHaveDHs <- NULL # Not needed anymore - nHoms <- length(idxsHom); - nNonSNPs <- length(idxsNonSNP); - nHetNonDHs <- length(idxsHetNonDH); + nHoms <- length(idxsHom) + nNonSNPs <- length(idxsNonSNP) + nHetNonDHs <- length(idxsHetNonDH) locusSetJJ <- list( segment = segJJ, @@ -414,56 +414,56 @@ hom = idxsHom, hetNonDh = idxsHetNonDH ) - ); + ) # Sanity checks if (.validate) { - loci <- locusSetJJ$loci; + loci <- locusSetJJ$loci for (key in names(loci)) { - idxs <- loci[[key]]; + idxs <- loci[[key]] # Assert positive indices - stopifnot(all(idxs > 0L)); + .stop_if_not(all(idxs > 0L)) # Assert a unique set of indices - stopifnot(!any(duplicated(idxs))); + .stop_if_not(!any(duplicated(idxs))) } # Assert non-overlapping sets with(loci, { - stopifnot(length(intersect(dh, hetNonDh)) == 0L); - stopifnot(length(intersect(het, hom)) == 0L); - stopifnot(length(intersect(het, nonSnp)) == 0L); - stopifnot(length(intersect(hom, nonSnp)) == 0L); - }); - loci <- NULL; # Not needed anymore + .stop_if_not(length(intersect(dh, hetNonDh)) == 0L) + .stop_if_not(length(intersect(het, hom)) == 0L) + .stop_if_not(length(intersect(het, nonSnp)) == 0L) + .stop_if_not(length(intersect(hom, nonSnp)) == 0L) + }) + loci <- NULL # Not needed anymore } # - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - # Identify bootstrap locus sets preserving (#hets, #homs, #nonSNPs) # - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - verbose && enter(verbose, "Identifying bootstrap locus sets that preservs (#hets, #homs, #nonSNPs)"); - verbose && cat(verbose, "Number of bootstrap samples: ", B); + verbose && enter(verbose, "Identifying bootstrap locus sets that preservs (#hets, #homs, #nonSNPs)") + verbose && cat(verbose, "Number of bootstrap samples: ", B) - nbrOfTCNs <- nbrOfDHs+nHoms+nNonSNPs; - nbrOfHets <- nbrOfDHs+nHetNonDHs; + nbrOfTCNs <- nbrOfDHs+nHoms+nNonSNPs + nbrOfHets <- nbrOfDHs+nHetNonDHs # Allocate index matrices - tcn <- matrix(NA_integer_, nrow=nbrOfTCNs, ncol=B); - dh <- matrix(NA_integer_, nrow=nbrOfDHs, ncol=B); - nonSnp <- matrix(NA_integer_, nrow=nNonSNPs, ncol=B); - het <- matrix(NA_integer_, nrow=nbrOfHets, ncol=B); - hom <- matrix(NA_integer_, nrow=nHoms, ncol=B); - hetNonDh <- matrix(NA_integer_, nrow=nHetNonDHs, ncol=B); + tcn <- matrix(NA_integer_, nrow=nbrOfTCNs, ncol=B) + dh <- matrix(NA_integer_, nrow=nbrOfDHs, ncol=B) + nonSnp <- matrix(NA_integer_, nrow=nNonSNPs, ncol=B) + het <- matrix(NA_integer_, nrow=nbrOfHets, ncol=B) + hom <- matrix(NA_integer_, nrow=nHoms, ncol=B) + hetNonDh <- matrix(NA_integer_, nrow=nHetNonDHs, ncol=B) ## resample <- function(x, size, ...) { -## stopifnot(size == length(x)); -## x; +## .stop_if_not(size == length(x)) +## x ## } # resample() # (1) Bootstrap DH loci if (nbrOfDHs > 0L) { # (a) Resample heterozygous SNPs (=> resampled DH units) for (bb in seq_len(B)) { - dh[,bb] <- resample(idxsDH, size=nbrOfDHs, replace=TRUE); + dh[,bb] <- resample(idxsDH, size=nbrOfDHs, replace=TRUE) } } @@ -471,47 +471,47 @@ # (a) Resample non-DH hets SNPs if (nHetNonDHs > 0L) { for (bb in seq_len(B)) { - idxsHetNonDHBB <- resample(idxsHetNonDH, size=nHetNonDHs, replace=TRUE); - hetNonDh[,bb] <- idxsHetNonDHBB; + idxsHetNonDHBB <- resample(idxsHetNonDH, size=nHetNonDHs, replace=TRUE) + hetNonDh[,bb] <- idxsHetNonDHBB } } # (b) Resample homozygous SNPs if (nHoms > 0L) { for (bb in seq_len(B)) { - idxsHomBB <- resample(idxsHom, size=nHoms, replace=TRUE); - hom[,bb] <- idxsHomBB; + idxsHomBB <- resample(idxsHom, size=nHoms, replace=TRUE) + hom[,bb] <- idxsHomBB } } # (c) Resample non-SNPs if (nNonSNPs > 0L) { for (bb in seq_len(B)) { - idxsNonSNPBB <- resample(idxsNonSNP, size=nNonSNPs, replace=TRUE); - nonSnp[,bb] <- idxsNonSNPBB; + idxsNonSNPBB <- resample(idxsNonSNP, size=nNonSNPs, replace=TRUE) + nonSnp[,bb] <- idxsNonSNPBB } } # (d) Resampled hets if (nbrOfHets > 0L) { for (bb in seq_len(B)) { - idxsDHBB <- dh[,bb]; - idxsHetNonDHBB <- hetNonDh[,bb]; - idxsHetBB <- c(idxsDHBB, idxsHetNonDHBB); -# idxsHetBB <- sort(idxsHetBB); - het[,bb] <- idxsHetBB; + idxsDHBB <- dh[,bb] + idxsHetNonDHBB <- hetNonDh[,bb] + idxsHetBB <- c(idxsDHBB, idxsHetNonDHBB) +# idxsHetBB <- sort(idxsHetBB) + het[,bb] <- idxsHetBB } } # (e) Update TCN loci if (nbrOfTCNs > 0L) { for (bb in seq_len(B)) { - idxsHetBB <- het[,bb]; - idxsHomBB <- hom[,bb]; - idxsNonSNPBB <- nonSnp[,bb]; - idxsTCNBB <- c(idxsHetBB, idxsHomBB, idxsNonSNPBB); -# idxsTCNBB <- sort(idxsTCNBB); - tcn[,bb] <- idxsTCNBB; + idxsHetBB <- het[,bb] + idxsHomBB <- hom[,bb] + idxsNonSNPBB <- nonSnp[,bb] + idxsTCNBB <- c(idxsHetBB, idxsHomBB, idxsNonSNPBB) +# idxsTCNBB <- sort(idxsTCNBB) + tcn[,bb] <- idxsTCNBB } } @@ -526,40 +526,40 @@ hom = hom, hetNonDh = hetNonDh ) - ); + ) # Sanity check if (.validate) { - loci <- locusSetJJ$loci; - lociB <- locusSetJJ$bootstrap$loci; + loci <- locusSetJJ$loci + lociB <- locusSetJJ$bootstrap$loci for (key in names(lociB)) { - idxs <- loci[[key]]; - idxsB <- lociB[[key]]; - idxsB <- unique(sort(idxsB)); - stopifnot(all(is.element(idxsB, idxs))); + idxs <- loci[[key]] + idxsB <- lociB[[key]] + idxsB <- unique(sort(idxsB)) + .stop_if_not(all(is.element(idxsB, idxs))) } - loci <- lociB <- NULL; # Not needed anymore + loci <- lociB <- NULL # Not needed anymore } - verbose && exit(verbose); + verbose && exit(verbose) # Record - locusSet[[jj]] <- locusSetJJ; + locusSet[[jj]] <- locusSetJJ # Not needed anymore - locusSetJJ <- NULL; - verbose && exit(verbose); + locusSetJJ <- NULL + verbose && exit(verbose) } # for (jj ...) # Sanity checks - stopifnot(is.list(locusSet)); - stopifnot(length(locusSet) == nbrOfSegments); + .stop_if_not(is.list(locusSet)) + .stop_if_not(length(locusSet) == nbrOfSegments) - verbose && exit(verbose); + verbose && exit(verbose) - res$locusSet <- locusSet; + res$locusSet <- locusSet - res; + res }, protected=TRUE) # getBootstrapLocusSets() diff -Nru r-cran-pscbs-0.63.0/R/PairedPSCBS.callAB.R r-cran-pscbs-0.64.0/R/PairedPSCBS.callAB.R --- r-cran-pscbs-0.63.0/R/PairedPSCBS.callAB.R 2017-06-28 14:32:39.000000000 +0000 +++ r-cran-pscbs-0.64.0/R/PairedPSCBS.callAB.R 2018-08-12 21:30:44.000000000 +0000 @@ -54,59 +54,59 @@ #*/########################################################################### setMethodS3("callAB", "PairedPSCBS", function(fit, flavor=c("DeltaAB*"), ..., minSize=1, xorCalls=TRUE, force=FALSE) { # Argument 'flavor': - flavor <- match.arg(flavor); + flavor <- match.arg(flavor) # Argument 'minSize': - minSize <- Arguments$getDouble(minSize, range=c(1,Inf)); + minSize <- Arguments$getDouble(minSize, range=c(1,Inf)) # Argument 'xorCalls': - xorCalls <- Arguments$getLogical(xorCalls); + xorCalls <- Arguments$getLogical(xorCalls) # Already done? - segs <- as.data.frame(fit); - calls <- segs$abCall; + segs <- as.data.frame(fit) + calls <- segs$abCall if (!force && !is.null(calls)) { - return(invisible(fit)); + return(invisible(fit)) } if (flavor == "DeltaAB*") { - fit <- callAllelicBalanceByDH(fit, ...); + fit <- callAllelicBalanceByDH(fit, ...) } else { - throw("Cannot call allelic balance. Unsupported flavor: ", flavor); + throw("Cannot call allelic balance. Unsupported flavor: ", flavor) } # Don't call segments with too few data points? if (minSize > 1) { - segs <- as.data.frame(fit); - ns <- segs$dhNbrOfLoci; - calls <- segs$abCall; - calls[ns < minSize] <- NA; - segs$abCall <- calls; - fit$output <- segs; + segs <- as.data.frame(fit) + ns <- segs$dhNbrOfLoci + calls <- segs$abCall + calls[ns < minSize] <- NA + segs$abCall <- calls + fit$output <- segs # Not needed anymore - segs <- calls <- NULL; + segs <- calls <- NULL } # Don't call a segment AB if it already called LOH? if (xorCalls) { - segs <- as.data.frame(fit); + segs <- as.data.frame(fit) if (is.element("lohCall", names(segs))) { - calls <- segs$abCall; - otherCalls <- segs$lohCall; + calls <- segs$abCall + otherCalls <- segs$lohCall # If called (TRUE) and already called (TRUE) # by the other caller, call it as NA. - calls[calls & otherCalls] <- NA; - segs$abCall <- calls; - fit$output <- segs; + calls[calls & otherCalls] <- NA + segs$abCall <- calls + fit$output <- segs } } - return(invisible(fit)); + return(invisible(fit)) }) setMethodS3("callAllelicBalance", "default", function(...) { - callAB(...); + callAB(...) }) @@ -160,54 +160,54 @@ # Validate arguments # - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - # Argument 'delta': - delta <- Arguments$getDouble(delta, range=c(0,Inf)); + delta <- Arguments$getDouble(delta, range=c(0,Inf)) # Argument 'alpha': - alpha <- Arguments$getDouble(alpha, range=c(0,1)); + alpha <- Arguments$getDouble(alpha, range=c(0,1)) # Argument 'verbose': - verbose <- Arguments$getVerbose(verbose); + verbose <- Arguments$getVerbose(verbose) if (verbose) { - pushState(verbose); - on.exit(popState(verbose)); + pushState(verbose) + on.exit(popState(verbose)) } - verbose && enter(verbose, "Calling segments of allelic balance from one-sided DH bootstrap confidence intervals"); - verbose && cat(verbose, "delta (offset adjusting for bias in DH): ", delta); - verbose && cat(verbose, "alpha (CI quantile; significance level): ", alpha); + verbose && enter(verbose, "Calling segments of allelic balance from one-sided DH bootstrap confidence intervals") + verbose && cat(verbose, "delta (offset adjusting for bias in DH): ", delta) + verbose && cat(verbose, "alpha (CI quantile; significance level): ", alpha) # Calculate DH confidence intervals, if not already done - probs <- c(alpha, 1-alpha); - fit <- bootstrapTCNandDHByRegion(fit, probs=probs, ..., verbose=less(verbose, 50)); + probs <- c(alpha, 1-alpha) + fit <- bootstrapTCNandDHByRegion(fit, probs=probs, ..., verbose=less(verbose, 50)) - segs <- as.data.frame(fit); + segs <- as.data.frame(fit) # Extract confidence interval - alphaTag <- sprintf("%g%%", 100*alpha); - column <- sprintf("dh_%s", alphaTag); + alphaTag <- sprintf("%g%%", 100*alpha) + column <- sprintf("dh_%s", alphaTag) # Sanity checks - stopifnot(is.element(column, colnames(segs))); + .stop_if_not(is.element(column, colnames(segs))) # One-sided test - verbose && enter(verbose, "Calling segments"); - value <- segs[,column, drop=TRUE]; - call <- (value < delta); - nbrOfCalls <- sum(call, na.rm=TRUE); - verbose && printf(verbose, "Number of segments called allelic balance (AB): %d (%.2f%%) of %d\n", nbrOfCalls, 100*nbrOfCalls/nrow(segs), nrow(segs)); - verbose && exit(verbose); + verbose && enter(verbose, "Calling segments") + value <- segs[,column, drop=TRUE] + call <- (value < delta) + nbrOfCalls <- sum(call, na.rm=TRUE) + verbose && printf(verbose, "Number of segments called allelic balance (AB): %d (%.2f%%) of %d\n", nbrOfCalls, 100*nbrOfCalls/nrow(segs), nrow(segs)) + verbose && exit(verbose) - segs$abCall <- call; - fit$output <- segs; + segs$abCall <- call + fit$output <- segs # Append 'delta' and 'alpha' to parameters - params <- fit$params; - params$deltaAB <- delta; - params$alphaAB <- alpha; - fit$params <- params; + params <- fit$params + params$deltaAB <- delta + params$alphaAB <- alpha + fit$params <- params - verbose && exit(verbose); + verbose && exit(verbose) - fit; + fit }, protected=TRUE) # callAllelicBalanceByDH() diff -Nru r-cran-pscbs-0.63.0/R/PairedPSCBS.callCopyNeutral.R r-cran-pscbs-0.64.0/R/PairedPSCBS.callCopyNeutral.R --- r-cran-pscbs-0.63.0/R/PairedPSCBS.callCopyNeutral.R 2017-06-28 14:32:39.000000000 +0000 +++ r-cran-pscbs-0.64.0/R/PairedPSCBS.callCopyNeutral.R 2018-08-12 21:30:44.000000000 +0000 @@ -37,42 +37,42 @@ #*/########################################################################### setMethodS3("callCopyNeutral", "PairedPSCBS", function(fit, flavor=c("TCN|AB"), ..., minSize=1, force=FALSE) { # Argument 'flavor': - flavor <- match.arg(flavor); + flavor <- match.arg(flavor) # Argument 'minSize': - minSize <- Arguments$getDouble(minSize, range=c(1,Inf)); + minSize <- Arguments$getDouble(minSize, range=c(1,Inf)) # Already done? - segs <- as.data.frame(fit); - calls <- segs$ntcnCall; + segs <- as.data.frame(fit) + calls <- segs$ntcnCall if (!force && !is.null(calls)) { - return(invisible(fit)); + return(invisible(fit)) } if (flavor == "TCN|AB") { - fit <- callCopyNeutralByTCNofAB(fit, ..., force=force); + fit <- callCopyNeutralByTCNofAB(fit, ..., force=force) } else { - throw("Cannot call copy-neutral states. Unsupported flavor: ", flavor); + throw("Cannot call copy-neutral states. Unsupported flavor: ", flavor) } # Don't call segments with too few data points? if (minSize > 1) { - segs <- as.data.frame(fit); - ns <- segs$dhNbrOfLoci; - calls <- segs$ntcnCall; - calls[ns < minSize] <- NA; - segs$ntcnCall <- calls; - fit$output <- segs; + segs <- as.data.frame(fit) + ns <- segs$dhNbrOfLoci + calls <- segs$ntcnCall + calls[ns < minSize] <- NA + segs$ntcnCall <- calls + fit$output <- segs # Not needed anymore - segs <- calls <- NULL; + segs <- calls <- NULL } - return(invisible(fit)); + return(invisible(fit)) }) setMethodS3("callNTCN", "PairedPSCBS", function(...) { - callCopyNeutral(...); + callCopyNeutral(...) }) @@ -83,99 +83,99 @@ # Validate arguments # - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - # Argument 'force': - force <- Arguments$getLogical(force); + force <- Arguments$getLogical(force) # Argument 'verbose': - verbose <- Arguments$getVerbose(verbose); + verbose <- Arguments$getVerbose(verbose) if (verbose) { - pushState(verbose); - on.exit(popState(verbose)); + pushState(verbose) + on.exit(popState(verbose)) } - verbose && enter(verbose, "calcStatsForCopyNeutralABs"); + verbose && enter(verbose, "calcStatsForCopyNeutralABs") - segsNTCN <- fit$params$copyNeutralStats; + segsNTCN <- fit$params$copyNeutralStats if (!force && !is.null(segsNTCN)) { - verbose && exit(verbose); - return(fit); + verbose && exit(verbose) + return(fit) } - verbose && enter(verbose, "Identifying copy neutral AB segments"); + verbose && enter(verbose, "Identifying copy neutral AB segments") # Getting AB calls - segs <- getSegments(fit, splitters=TRUE); - isAB <- segs$abCall; + segs <- getSegments(fit, splitters=TRUE) + isAB <- segs$abCall if (is.null(isAB)) { - throw("Cannot call copy-neutral states, because allelic-balance calls have not been made yet."); + throw("Cannot call copy-neutral states, because allelic-balance calls have not been made yet.") } - nABs <- sum(isAB, na.rm=TRUE); - verbose && cat(verbose, "Number of AB segments: ", nABs); + nABs <- sum(isAB, na.rm=TRUE) + verbose && cat(verbose, "Number of AB segments: ", nABs) if (nABs == 0L) { - throw("Cannot call copy-neutral states, because none of the segments are in allelic balance."); + throw("Cannot call copy-neutral states, because none of the segments are in allelic balance.") } - C <- segs[,"tcnMean", drop=TRUE]; - isAB <- segs[,"abCall", drop=TRUE]; - n <- segs[,"tcnNbrOfSNPs", drop=TRUE]; # "tcnNbrOfLoci"? /HB 2010-09-09 + C <- segs[,"tcnMean", drop=TRUE] + isAB <- segs[,"abCall", drop=TRUE] + n <- segs[,"tcnNbrOfSNPs", drop=TRUE] # "tcnNbrOfLoci"? /HB 2010-09-09 # Give more weight to longer regions - weights <- n; + weights <- n # Identify copy neutral AB segments isNeutralAB <- findNeutralCopyNumberState(C=C, isAI=!isAB, weights=weights, - ..., flavor="maxPeak", verbose=verbose); - nAB <- sum(isNeutralAB, na.rm=TRUE); - verbose && cat(verbose, "Number of copy-neutral AB segments: ", nAB); + ..., flavor="maxPeak", verbose=verbose) + nAB <- sum(isNeutralAB, na.rm=TRUE) + verbose && cat(verbose, "Number of copy-neutral AB segments: ", nAB) if (nAB == 0L) { - throw("Cannot call copy-neutral states, because none of the segments in allelic-balance are copy neutral."); + throw("Cannot call copy-neutral states, because none of the segments in allelic-balance are copy neutral.") } - verbose && enter(verbose, "Extracting all copy neutral AB segments across all chromosomes into one big segment"); + verbose && enter(verbose, "Extracting all copy neutral AB segments across all chromosomes into one big segment") # (a) Extract those - fitNTCN <- extractSegments(fit, isNeutralAB); - verbose && print(verbose, fitNTCN); - verbose && exit(verbose); + fitNTCN <- extractSegments(fit, isNeutralAB) + verbose && print(verbose, fitNTCN) + verbose && exit(verbose) # (b) Turn into a single-chromosome data set - fitNTCN <- extractSegments(fitNTCN, !isSegmentSplitter(fitNTCN)); - isSplitter <- is.na(fitNTCN$output$chromosome); - fitNTCN$data$chromosome <- 0L; - fitNTCN$output$chromosome <- 0L; - fitNTCN$output$chromosome[isSplitter] <- NA; + fitNTCN <- extractSegments(fitNTCN, !isSegmentSplitter(fitNTCN)) + isSplitter <- is.na(fitNTCN$output$chromosome) + fitNTCN$data$chromosome <- 0L + fitNTCN$output$chromosome <- 0L + fitNTCN$output$chromosome[isSplitter] <- NA # (c) Turn into one big segment by dropping all change points -## nCPs <- nbrOfChangePoints(fitNTCN, ignoreGaps=TRUE); - nCPs <- nbrOfSegments(fitNTCN, splitters=TRUE) - 1L; +## nCPs <- nbrOfChangePoints(fitNTCN, ignoreGaps=TRUE) + nCPs <- nbrOfSegments(fitNTCN, splitters=TRUE) - 1L if (nCPs >= 1L) { - verbose && enter(verbose, "Dropping all change points"); - fitNTCN <- dropChangePoints(fitNTCN, idxs=nCPs:1, ignoreGaps=TRUE, update=TRUE, verbose=less(verbose, 5)); - verbose && exit(verbose); + verbose && enter(verbose, "Dropping all change points") + fitNTCN <- dropChangePoints(fitNTCN, idxs=nCPs:1, ignoreGaps=TRUE, update=TRUE, verbose=less(verbose, 5)) + verbose && exit(verbose) } # Sanity check - stopifnot(nbrOfSegments(fitNTCN) == 1L); - verbose && exit(verbose); + .stop_if_not(nbrOfSegments(fitNTCN) == 1L) + verbose && exit(verbose) - verbose && enter(verbose, "Bootstrap the identified copy-neutral states"); + verbose && enter(verbose, "Bootstrap the identified copy-neutral states") fitNTCN <- bootstrapTCNandDHByRegion(fitNTCN, what="segment", force=TRUE, - ..., verbose=less(verbose, 50)); - segsNTCN <- getSegments(fitNTCN, simplified=FALSE); - names <- colnames(segsNTCN); - excl <- grep("(^chromosome|Id|Start|End|Call)$", names); - segsNTCN <- segsNTCN[,-excl,drop=FALSE]; + ..., verbose=less(verbose, 50)) + segsNTCN <- getSegments(fitNTCN, simplified=FALSE) + names <- colnames(segsNTCN) + excl <- grep("(^chromosome|Id|Start|End|Call)$", names) + segsNTCN <- segsNTCN[,-excl,drop=FALSE] # Sanity check - stopifnot(ncol(segsNTCN) > 0L); - verbose && exit(verbose); + .stop_if_not(ncol(segsNTCN) > 0L) + verbose && exit(verbose) - verbose && print(verbose, segsNTCN); - verbose && exit(verbose); + verbose && print(verbose, segsNTCN) + verbose && exit(verbose) - fit$params$copyNeutralStats <- segsNTCN; + fit$params$copyNeutralStats <- segsNTCN - invisible(fit); + invisible(fit) }, protected=TRUE) # calcStatsForCopyNeutralABs() @@ -224,74 +224,74 @@ #*/########################################################################### setMethodS3("estimateDeltaCN", "PairedPSCBS", function(fit, scale=1, flavor=c("1-kappa", "delta(mode)"), kappa=estimateKappa(fit), adjust=0.2, quantile=0.95, ...) { # Argument 'scale': - disallow <- c("NA", "NaN", "Inf"); - scale <- Arguments$getDouble(scale, range=c(0,Inf), disallow=disallow); + disallow <- c("NA", "NaN", "Inf") + scale <- Arguments$getDouble(scale, range=c(0,Inf), disallow=disallow) # Argument 'flavor': - flavor <- match.arg(flavor); + flavor <- match.arg(flavor) if (flavor == "1-kappa") { # Argument 'kappa': - disallow <- c("NA", "NaN", "Inf"); - kappa <- Arguments$getDouble(kappa, range=c(0,1), disallow=disallow); + disallow <- c("NA", "NaN", "Inf") + kappa <- Arguments$getDouble(kappa, range=c(0,1), disallow=disallow) # Half a TCN unit length - delta <- (1-kappa)/2; + delta <- (1-kappa)/2 } else if (flavor == "delta(mode)") { # To please R CMD check - rohCall <- abCall <- type <- NULL; - rm(list=c("rohCall", "abCall", "type")); + rohCall <- abCall <- type <- NULL + rm(list=c("rohCall", "abCall", "type")) - segs <- getSegments(fit); - segs <- subset(segs, !rohCall); - segs <- subset(segs, !abCall); - sigmas <- list(); + segs <- getSegments(fit) + segs <- subset(segs, !rohCall) + segs <- subset(segs, !abCall) + sigmas <- list() for (name in c("c1", "c2", "tcn")) { # FIXME/AD HOC: Here we assume certain fields - fields <- sprintf("%s_%s%%", name, c(5,95)); + fields <- sprintf("%s_%s%%", name, c(5,95)) if (all(is.element(fields, names(segs)))) { - sigma <- segs[,fields[2]] - segs[,fields[1]]; + sigma <- segs[,fields[2]] - segs[,fields[1]] } else { if (name == "tcn") { - n <- segs[,"tcnNbrOfLoci"]; + n <- segs[,"tcnNbrOfLoci"] } else { - n <- segs[,"dhNbrOfLoci"]; - n <- segs[,"tcnNbrOfLoci"]; + n <- segs[,"dhNbrOfLoci"] + n <- segs[,"tcnNbrOfLoci"] } - n <- segs[,"tcnEnd"] - segs[,"tcnStart"]; - sigma <- 1/sqrt(n); + n <- segs[,"tcnEnd"] - segs[,"tcnStart"] + sigma <- 1/sqrt(n) } - sigmas[[name]] <- sigma; + sigmas[[name]] <- sigma } - segs <- segs[,c("c1Mean", "c2Mean", "tcnMean")]; - x <- unlist(segs, use.names=FALSE); - w <- 1/unlist(sigmas, use.names=FALSE); - keep <- is.finite(x) & is.finite(w); - x <- x[keep]; w <- w[keep]; - w <- w / sum(w); - d <- density(x, weights=w, adjust=adjust); -## plotDensity(d); - tolMax <- max(d$y, na.rm=TRUE); - tols <- seq(from=tolMax, to=0, length.out=100); - stats <- data.frame(tolerance=tols, delta=NA_real_, count=NA_integer_); + segs <- segs[,c("c1Mean", "c2Mean", "tcnMean")] + x <- unlist(segs, use.names=FALSE) + w <- 1/unlist(sigmas, use.names=FALSE) + keep <- is.finite(x) & is.finite(w) + x <- x[keep]; w <- w[keep] + w <- w / sum(w) + d <- density(x, weights=w, adjust=adjust) +## plotDensity(d) + tolMax <- max(d$y, na.rm=TRUE) + tols <- seq(from=tolMax, to=0, length.out=100) + stats <- data.frame(tolerance=tols, delta=NA_real_, count=NA_integer_) for (kk in seq_along(tols)) { - tol <- tols[kk]; - px <- subset(findPeaksAndValleys(d, tol=tol), type == "peak")$x; - nx <- length(px); - dx <- mean(diff(px)); - stats[kk,"delta"] <- dx; - stats[kk,"count"] <- nx; + tol <- tols[kk] + px <- subset(findPeaksAndValleys(d, tol=tol), type == "peak")$x + nx <- length(px) + dx <- mean(diff(px)) + stats[kk,"delta"] <- dx + stats[kk,"count"] <- nx } - stats <- subset(stats, is.finite(delta)); -## print(stats); - delta <- median(stats[,"delta"]); + stats <- subset(stats, is.finite(delta)) +## print(stats) + delta <- median(stats[,"delta"]) } # Rescale - delta <- scale * delta; + delta <- scale * delta - delta; + delta }, protected=TRUE) # estimateDeltaCN() @@ -345,142 +345,142 @@ # Validate arguments # - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - # Argument 'delta': - disallow <- c("NA", "NaN", "Inf"); - delta <- Arguments$getDouble(delta, range=c(0,Inf), disallow=disallow); + disallow <- c("NA", "NaN", "Inf") + delta <- Arguments$getDouble(delta, range=c(0,Inf), disallow=disallow) # Argument 'alpha': - disallow <- c("NA", "NaN", "Inf"); - alpha <- Arguments$getDouble(alpha, range=c(0,0.5), disallow=disallow); + disallow <- c("NA", "NaN", "Inf") + alpha <- Arguments$getDouble(alpha, range=c(0,0.5), disallow=disallow) # Argument 'force': - force <- Arguments$getLogical(force); + force <- Arguments$getLogical(force) # Argument 'verbose': - verbose <- Arguments$getVerbose(verbose); + verbose <- Arguments$getVerbose(verbose) if (verbose) { - pushState(verbose); - on.exit(popState(verbose)); + pushState(verbose) + on.exit(popState(verbose)) } - verbose && enter(verbose, "callCopyNeutralByTCNofAB"); - verbose && cat(verbose, "Alpha: ", alpha); - verbose && cat(verbose, "Delta CN: ", delta); + verbose && enter(verbose, "callCopyNeutralByTCNofAB") + verbose && cat(verbose, "Alpha: ", alpha) + verbose && cat(verbose, "Delta CN: ", delta) - segs <- getSegments(fit, splitters=TRUE, simplify=FALSE); + segs <- getSegments(fit, splitters=TRUE, simplify=FALSE) # Nothing to do? if (!force && !is.null(segs$ntcnCall)) { # Copy neutral segments are already called - verbose && cat(verbose, "Already called. Skipping."); - verbose && exit(verbose); - return(fit); + verbose && cat(verbose, "Already called. Skipping.") + verbose && exit(verbose) + return(fit) } - verbose && enter(verbose, "Calling copy-neutral segments"); + verbose && enter(verbose, "Calling copy-neutral segments") - verbose && enter(verbose, "Retrieve TCN confidence intervals for all segments"); + verbose && enter(verbose, "Retrieve TCN confidence intervals for all segments") # Calculate TCN bootstrap estimates, if missing - probs <- c(alpha/2, 1-alpha/2); + probs <- c(alpha/2, 1-alpha/2) - verbose && printf(verbose, "Interval: [%g,%g]\n", probs[1], probs[2]); + verbose && printf(verbose, "Interval: [%g,%g]\n", probs[1], probs[2]) - keys <- sprintf("tcn_%g%%", 100*c(probs[1], probs[2])); - missing <- keys[!is.element(keys, colnames(segs))]; + keys <- sprintf("tcn_%g%%", 100*c(probs[1], probs[2])) + missing <- keys[!is.element(keys, colnames(segs))] if (length(missing) > 0) { - fit <- bootstrapTCNandDHByRegion(fit, probs=probs, ..., verbose=less(verbose, 50)); - segs <- getSegments(fit, splitters=TRUE, simplify=FALSE); + fit <- bootstrapTCNandDHByRegion(fit, probs=probs, ..., verbose=less(verbose, 50)) + segs <- getSegments(fit, splitters=TRUE, simplify=FALSE) # Assert that they exists - missing <- keys[!is.element(keys, colnames(segs))]; + missing <- keys[!is.element(keys, colnames(segs))] if (length(missing) > 0) { - throw("INTERNAL ERROR: No such statistics: ", hpaste(missing)); + throw("INTERNAL ERROR: No such statistics: ", hpaste(missing)) } } - verbose && exit(verbose); + verbose && exit(verbose) - verbose && enter(verbose, "Estimating TCN confidence interval of copy-neutral AB segments"); + verbose && enter(verbose, "Estimating TCN confidence interval of copy-neutral AB segments") - fit <- calcStatsForCopyNeutralABs(fit, ..., verbose=less(verbose, 5)); - stats <- fit$params$copyNeutralStats; - verbose && cat(verbose, "Bootstrap statistics for copy-neutral AB segments:"); - verbose && print(verbose, stats); + fit <- calcStatsForCopyNeutralABs(fit, ..., verbose=less(verbose, 5)) + stats <- fit$params$copyNeutralStats + verbose && cat(verbose, "Bootstrap statistics for copy-neutral AB segments:") + verbose && print(verbose, stats) # Extract TCN stats - cols <- grep("^(tcn_|tcnMean)", colnames(stats)); - tcnStats <- stats[,cols,drop=FALSE]; - tcnStats <- unlist(tcnStats, use.names=TRUE); - verbose && print(verbose, "TCN statistics:"); - verbose && print(verbose, tcnStats); + cols <- grep("^(tcn_|tcnMean)", colnames(stats)) + tcnStats <- stats[,cols,drop=FALSE] + tcnStats <- unlist(tcnStats, use.names=TRUE) + verbose && print(verbose, "TCN statistics:") + verbose && print(verbose, tcnStats) # Assert confidence interval of interest - missing <- keys[!is.element(keys, names(tcnStats))]; + missing <- keys[!is.element(keys, names(tcnStats))] if (length(missing) > 0) { - throw("INTERNAL ERROR: No such statistics: ", hpaste(missing)); + throw("INTERNAL ERROR: No such statistics: ", hpaste(missing)) } - mean <- tcnStats["tcnMean"]; - ci <- tcnStats[keys]; - verbose && printf(verbose, "%g%%-confidence interval of TCN mean for the copy-neutral state: [%g,%g] (mean=%g)\n", 100*(1-alpha), ci[1], ci[2], mean); + mean <- tcnStats["tcnMean"] + ci <- tcnStats[keys] + verbose && printf(verbose, "%g%%-confidence interval of TCN mean for the copy-neutral state: [%g,%g] (mean=%g)\n", 100*(1-alpha), ci[1], ci[2], mean) - verbose && exit(verbose); + verbose && exit(verbose) - verbose && enter(verbose, "Identify all copy-neutral segments");; - verbose && printf(verbose, "DeltaCN: +/-%g\n", delta); - range <- ci + delta*c(-1,+1); - verbose && printf(verbose, "Call (\"acceptance\") region: [%g,%g]\n", range[1], range[2]); + verbose && enter(verbose, "Identify all copy-neutral segments") + verbose && printf(verbose, "DeltaCN: +/-%g\n", delta) + range <- ci + delta*c(-1,+1) + verbose && printf(verbose, "Call (\"acceptance\") region: [%g,%g]\n", range[1], range[2]) # Get TCN confidence intervals for all segments - ci <- segs[,keys]; - ci <- as.matrix(ci); + ci <- segs[,keys] + ci <- as.matrix(ci) ## WAS: If a confidence interval is completely within the ## calling region, call it - ## isNTCN <- (range[1] <= ci[,1] & ci[,2] <= range[2]); + ## isNTCN <- (range[1] <= ci[,1] & ci[,2] <= range[2]) # If a segments confidence interval is completely outside the # copy-neutral region ("H_0"), that is, it is completely within # the rejection region ("H_1"), then the H_0 hypothesis that the # segment is copy-neutral in TCN is rejected. - isLoss <- (ci[,2] < range[1]); # (a) completely below, or - isGain <- (ci[,1] > range[2]); # (b) completely above. - isNTCN <- (!isLoss & !isGain); # => completely inside => not rejected. - - nbrOfSegs <- nrow(segs); - nbrOfABs <- sum(segs$abCall, na.rm=TRUE); - nbrOfCNs <- sum(isNTCN, na.rm=TRUE); - verbose && cat(verbose, "Total number of segments: ", nbrOfSegs); - verbose && cat(verbose, "Number of segments called allelic balance: ", nbrOfABs); - verbose && cat(verbose, "Number of segments called copy neutral: ", nbrOfCNs); - - nbrOfCNABs <- sum(isNTCN & segs$abCall, na.rm=TRUE); - verbose && cat(verbose, "Number of AB segments called copy neutral: ", nbrOfCNABs); - nbrOfCNNonABs <- sum(isNTCN & !segs$abCall, na.rm=TRUE); - verbose && cat(verbose, "Number of non-AB segments called copy neutral: ", nbrOfCNNonABs); + isLoss <- (ci[,2] < range[1]) # (a) completely below, or + isGain <- (ci[,1] > range[2]) # (b) completely above. + isNTCN <- (!isLoss & !isGain) # => completely inside => not rejected. + + nbrOfSegs <- nrow(segs) + nbrOfABs <- sum(segs$abCall, na.rm=TRUE) + nbrOfCNs <- sum(isNTCN, na.rm=TRUE) + verbose && cat(verbose, "Total number of segments: ", nbrOfSegs) + verbose && cat(verbose, "Number of segments called allelic balance: ", nbrOfABs) + verbose && cat(verbose, "Number of segments called copy neutral: ", nbrOfCNs) + + nbrOfCNABs <- sum(isNTCN & segs$abCall, na.rm=TRUE) + verbose && cat(verbose, "Number of AB segments called copy neutral: ", nbrOfCNABs) + nbrOfCNNonABs <- sum(isNTCN & !segs$abCall, na.rm=TRUE) + verbose && cat(verbose, "Number of non-AB segments called copy neutral: ", nbrOfCNNonABs) - verbose && exit(verbose); + verbose && exit(verbose) # Sanity check # # All previously called AB regions should remain called here as well -# stopifnot(all(isNTCN[isNeutralAB], na.rm=TRUE)); +# .stop_if_not(all(isNTCN[isNeutralAB], na.rm=TRUE)) - segs$ntcnCall <- isNTCN; + segs$ntcnCall <- isNTCN - params <- fit$params; - params$deltaCN <- delta; - params$ntcnRange <- range; + params <- fit$params + params$deltaCN <- delta + params$ntcnRange <- range - fitC <- fit; - fitC$output <- segs; - fitC$params <- params; + fitC <- fit + fitC$output <- segs + fitC$params <- params - verbose && exit(verbose); + verbose && exit(verbose) - fitC; + fitC }, protected=TRUE) # callCopyNeutralByTCNofAB() diff -Nru r-cran-pscbs-0.63.0/R/PairedPSCBS.callGNL.R r-cran-pscbs-0.64.0/R/PairedPSCBS.callGNL.R --- r-cran-pscbs-0.63.0/R/PairedPSCBS.callGNL.R 2017-06-28 14:32:39.000000000 +0000 +++ r-cran-pscbs-0.64.0/R/PairedPSCBS.callGNL.R 2018-08-12 21:30:44.000000000 +0000 @@ -44,112 +44,112 @@ # Validate arguments # - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - # Argument 'flavor': - flavor <- match.arg(flavor); + flavor <- match.arg(flavor) # Argument 'minSize': - minSize <- Arguments$getDouble(minSize, range=c(1,Inf)); + minSize <- Arguments$getDouble(minSize, range=c(1,Inf)) # Already done? - segs <- as.data.frame(fit); + segs <- as.data.frame(fit) if (!force && all(is.element(c("gainCall", "ntcnCall", "lossCall"), names(segs)))) { # Segments are already called - return(invisible(fit)); + return(invisible(fit)) } if (flavor == "TCN|AB") { - fit <- callGNLByTCNofAB(fit, ..., force=force); + fit <- callGNLByTCNofAB(fit, ..., force=force) } else { - throw("Cannot call allelic balance. Unsupported flavor: ", flavor); + throw("Cannot call allelic balance. Unsupported flavor: ", flavor) } # Don't call segments with too few data points? if (minSize > 1) { - segs <- as.data.frame(fit); - ns <- segs$dhNbrOfLoci; - calls <- segs$ntcnCall; - calls[ns < minSize] <- NA; - segs$ntcnCall <- calls; - fit$output <- segs; + segs <- as.data.frame(fit) + ns <- segs$dhNbrOfLoci + calls <- segs$ntcnCall + calls[ns < minSize] <- NA + segs$ntcnCall <- calls + fit$output <- segs # Not needed anymore - segs <- calls <- NULL; + segs <- calls <- NULL } - return(invisible(fit)); + return(invisible(fit)) }) # callGNL() setMethodS3("callGainNeutralLoss", "PairedPSCBS", function(...) { - callGNL(...); + callGNL(...) }) setMethodS3("callGNLByTCNofAB", "PairedPSCBS", function(fit, ..., minSize=1L, force=FALSE, verbose=FALSE) { # Argument 'minSize': - minSize <- Arguments$getDouble(minSize, range=c(1,Inf)); + minSize <- Arguments$getDouble(minSize, range=c(1,Inf)) # Argument 'verbose': - verbose <- Arguments$getVerbose(verbose); + verbose <- Arguments$getVerbose(verbose) if (verbose) { - pushState(verbose); - on.exit(popState(verbose)); + pushState(verbose) + on.exit(popState(verbose)) } - verbose && enter(verbose, "Calling gain, neutral, and loss based TCNs of AB segments"); + verbose && enter(verbose, "Calling gain, neutral, and loss based TCNs of AB segments") # Already done? - segs <- as.data.frame(fit); - called <- all(is.element(c("gainCall", "ntcnCall", "lossCall"), names(segs))); + segs <- as.data.frame(fit) + called <- all(is.element(c("gainCall", "ntcnCall", "lossCall"), names(segs))) if (!force && called) { - return(invisible(fit)); + return(invisible(fit)) } - verbose && enter(verbose, "Calling neutral TCNs"); - fit <- callCopyNeutralByTCNofAB(fit, ..., verbose=verbose); - verbose && exit(verbose); + verbose && enter(verbose, "Calling neutral TCNs") + fit <- callCopyNeutralByTCNofAB(fit, ..., verbose=verbose) + verbose && exit(verbose) # The segment data - segs <- as.data.frame(fit); - tcnMean <- segs$tcnMean; - nbrOfSegs <- nrow(segs); + segs <- as.data.frame(fit) + tcnMean <- segs$tcnMean + nbrOfSegs <- nrow(segs) # The call thresholds and the NTCN calls - ntcnCall <- call <- segs$ntcnCall; - verbose && printf(verbose, "Number of NTCN calls: %d (%.2f%%) of %d\n", sum(call, na.rm=TRUE), 100*sum(call, na.rm=TRUE)/nbrOfSegs, nbrOfSegs); + ntcnCall <- call <- segs$ntcnCall + verbose && printf(verbose, "Number of NTCN calls: %d (%.2f%%) of %d\n", sum(call, na.rm=TRUE), 100*sum(call, na.rm=TRUE)/nbrOfSegs, nbrOfSegs) - params <- fit$params; + params <- fit$params - deltaCN <- params$deltaCN; - stopifnot(!is.null(deltaCN)); - ntcnRange <- params$ntcnRange; - stopifnot(!is.null(ntcnRange)); + deltaCN <- params$deltaCN + .stop_if_not(!is.null(deltaCN)) + ntcnRange <- params$ntcnRange + .stop_if_not(!is.null(ntcnRange)) # Confidence interval of the TCN|AB segments - range <- ntcnRange + c(+1,-1)*deltaCN; + range <- ntcnRange + c(+1,-1)*deltaCN # Mean of the TCN|AB segments - mu <- mean(range, na.rm=TRUE); + mu <- mean(range, na.rm=TRUE) - verbose && printf(verbose, "Mean TCN of AB segments: %g\n", mu); + verbose && printf(verbose, "Mean TCN of AB segments: %g\n", mu) - verbose && enter(verbose, "Calling loss"); - call <- !ntcnCall & (tcnMean < mu); - segs$lossCall <- call; - verbose && printf(verbose, "Number of loss calls: %d (%.2f%%) of %d\n", sum(call, na.rm=TRUE), 100*sum(call, na.rm=TRUE)/nbrOfSegs, nbrOfSegs); - verbose && exit(verbose); - - verbose && enter(verbose, "Calling gain"); - call <- !ntcnCall & (tcnMean > mu); - segs$gainCall <- call; - verbose && printf(verbose, "Number of loss calls: %d (%.2f%%) of %d\n", sum(call, na.rm=TRUE), 100*sum(call, na.rm=TRUE)/nbrOfSegs, nbrOfSegs); - verbose && exit(verbose); + verbose && enter(verbose, "Calling loss") + call <- !ntcnCall & (tcnMean < mu) + segs$lossCall <- call + verbose && printf(verbose, "Number of loss calls: %d (%.2f%%) of %d\n", sum(call, na.rm=TRUE), 100*sum(call, na.rm=TRUE)/nbrOfSegs, nbrOfSegs) + verbose && exit(verbose) + + verbose && enter(verbose, "Calling gain") + call <- !ntcnCall & (tcnMean > mu) + segs$gainCall <- call + verbose && printf(verbose, "Number of loss calls: %d (%.2f%%) of %d\n", sum(call, na.rm=TRUE), 100*sum(call, na.rm=TRUE)/nbrOfSegs, nbrOfSegs) + verbose && exit(verbose) # Recording - fit$output <- segs; + fit$output <- segs - verbose && exit(verbose); + verbose && exit(verbose) - fit; + fit }) # callGNLByTCNofAB() @@ -159,138 +159,138 @@ # Validate arguments # - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - # Argument 'alpha': - disallow <- c("NA", "NaN", "Inf"); - alpha <- Arguments$getDouble(alpha, range=c(0,0.5), disallow=disallow); + disallow <- c("NA", "NaN", "Inf") + alpha <- Arguments$getDouble(alpha, range=c(0,0.5), disallow=disallow) # Argument 'deltaLoss' & 'deltaGain': - disallow <- c("NA", "NaN", "Inf"); - deltaLoss <- Arguments$getDouble(deltaLoss, range=c(-Inf,0), disallow=disallow); - deltaGain <- Arguments$getDouble(deltaGain, range=c(0,+Inf), disallow=disallow); + disallow <- c("NA", "NaN", "Inf") + deltaLoss <- Arguments$getDouble(deltaLoss, range=c(-Inf,0), disallow=disallow) + deltaGain <- Arguments$getDouble(deltaGain, range=c(0,+Inf), disallow=disallow) # Argument 'force': - force <- Arguments$getLogical(force); + force <- Arguments$getLogical(force) # Argument 'verbose': - verbose <- Arguments$getVerbose(verbose); + verbose <- Arguments$getVerbose(verbose) if (verbose) { - pushState(verbose); - on.exit(popState(verbose)); + pushState(verbose) + on.exit(popState(verbose)) } - verbose && enter(verbose, "callGNLByTCNofAB"); - verbose && cat(verbose, "Alpha: ", alpha); - verbose && cat(verbose, "Delta loss: ", deltaLoss); - verbose && cat(verbose, "Delta gain: ", deltaGain); + verbose && enter(verbose, "callGNLByTCNofAB") + verbose && cat(verbose, "Alpha: ", alpha) + verbose && cat(verbose, "Delta loss: ", deltaLoss) + verbose && cat(verbose, "Delta gain: ", deltaGain) - segs <- getSegments(fit, splitters=TRUE, simplify=FALSE); + segs <- getSegments(fit, splitters=TRUE, simplify=FALSE) # Already done? if (!force && all(is.element(c("gainCall", "ntcnCall", "lossCall"), names(segs)))) { # Segments are already called - verbose && cat(verbose, "Already called. Skipping."); - verbose && exit(verbose); - return(invisible(fit)); + verbose && cat(verbose, "Already called. Skipping.") + verbose && exit(verbose) + return(invisible(fit)) } # Check that bootstrap estimates exists - keys <- sprintf("tcn_%g%%", 100*c(alpha/2, 1-alpha/2)); - missing <- keys[!is.element(keys, colnames(segs))]; + keys <- sprintf("tcn_%g%%", 100*c(alpha/2, 1-alpha/2)) + missing <- keys[!is.element(keys, colnames(segs))] if (length(missing) > 0) { - throw("No such statistics: ", hpaste(missing)); + throw("No such statistics: ", hpaste(missing)) } - verbose && enter(verbose, "Calling segments"); + verbose && enter(verbose, "Calling segments") # - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - # Confidence interval of copy-neutral AB segments # - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - verbose && enter(verbose, "Estimating TCN confidence interval of copy-neutral AB segments"); + verbose && enter(verbose, "Estimating TCN confidence interval of copy-neutral AB segments") - fit <- calcStatsForCopyNeutralABs(fit, ..., verbose=less(verbose, 5)); - stats <- fit$params$copyNeutralStats; - verbose && cat(verbose, "Bootstrap statistics for copy-neutral AB segments:"); - verbose && print(verbose, stats); + fit <- calcStatsForCopyNeutralABs(fit, ..., verbose=less(verbose, 5)) + stats <- fit$params$copyNeutralStats + verbose && cat(verbose, "Bootstrap statistics for copy-neutral AB segments:") + verbose && print(verbose, stats) # Extract TCN stats - cols <- grep("^(tcn_|tcnMean)", colnames(stats)); - tcnStats <- stats[,cols,drop=FALSE]; - tcnStats <- unlist(tcnStats, use.names=TRUE); - verbose && print(verbose, "TCN statistics:"); - verbose && print(verbose, tcnStats); + cols <- grep("^(tcn_|tcnMean)", colnames(stats)) + tcnStats <- stats[,cols,drop=FALSE] + tcnStats <- unlist(tcnStats, use.names=TRUE) + verbose && print(verbose, "TCN statistics:") + verbose && print(verbose, tcnStats) # Extract confidence interval of interest - keys <- sprintf("tcn_%g%%", 100*c(alpha/2, 1-alpha/2)); - missing <- keys[!is.element(keys, names(tcnStats))]; + keys <- sprintf("tcn_%g%%", 100*c(alpha/2, 1-alpha/2)) + missing <- keys[!is.element(keys, names(tcnStats))] if (length(missing) > 0) { - throw("No such statistics: ", hpaste(missing)); + throw("No such statistics: ", hpaste(missing)) } - mean <- tcnStats["tcnMean"]; - ci <- tcnStats[keys]; - verbose && printf(verbose, "%g%%-confidence interval of TCN mean for the copy-neutral state: [%g,%g] (mean=%g)\n", 100*(1-alpha), ci[1], ci[2], mean); + mean <- tcnStats["tcnMean"] + ci <- tcnStats[keys] + verbose && printf(verbose, "%g%%-confidence interval of TCN mean for the copy-neutral state: [%g,%g] (mean=%g)\n", 100*(1-alpha), ci[1], ci[2], mean) - verbose && exit(verbose); + verbose && exit(verbose) # - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - # Get call regions # - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - naValue <- NA_real_; + naValue <- NA_real_ callRegions <- matrix(c( Inf, 1, 1, 1, 1, Inf - ), nrow=3, ncol=2, byrow=TRUE); - rownames(callRegions) <- c("loss", "ntcn", "gain"); - colnames(callRegions) <- c("lower", "upper"); - callRegions["loss",] <- ci[1]+callRegions["loss",]*deltaLoss; - callRegions["ntcn",] <- ci +callRegions["ntcn",]*c(deltaLoss, deltaGain); - callRegions["gain",] <- ci[2]+callRegions["gain",]*deltaGain; + ), nrow=3, ncol=2, byrow=TRUE) + rownames(callRegions) <- c("loss", "ntcn", "gain") + colnames(callRegions) <- c("lower", "upper") + callRegions["loss",] <- ci[1]+callRegions["loss",]*deltaLoss + callRegions["ntcn",] <- ci +callRegions["ntcn",]*c(deltaLoss, deltaGain) + callRegions["gain",] <- ci[2]+callRegions["gain",]*deltaGain - verbose && cat(verbose, "Call (\"acceptance\") regions:"); - verbose && print(verbose, callRegions); + verbose && cat(verbose, "Call (\"acceptance\") regions:") + verbose && print(verbose, callRegions) # - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - # Get statistics for all segments # - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - nbrOfSegs <- nrow(segs); - verbose && cat(verbose, "Number of segments: ", nbrOfSegs); - nbrOfABs <- sum(segs$abCall, na.rm=TRUE); - verbose && cat(verbose, "Number of AB segments: ", nbrOfABs); - verbose && cat(verbose, "Number of non-AB segments: ", nbrOfSegs-nbrOfABs); + nbrOfSegs <- nrow(segs) + verbose && cat(verbose, "Number of segments: ", nbrOfSegs) + nbrOfABs <- sum(segs$abCall, na.rm=TRUE) + verbose && cat(verbose, "Number of AB segments: ", nbrOfABs) + verbose && cat(verbose, "Number of non-AB segments: ", nbrOfSegs-nbrOfABs) # Get TCN confidence intervals for all segments - keys <- sprintf("tcn_%g%%", 100*c(alpha/2, 1-alpha/2)); - ci <- segs[,keys]; + keys <- sprintf("tcn_%g%%", 100*c(alpha/2, 1-alpha/2)) + ci <- segs[,keys] # Call states for (rr in seq_len(nrow(callRegions))) { - state <- rownames(callRegions)[rr]; - verbose && enter(verbose, "Identify all '", state, "' segments");; - range <- callRegions[rr,]; - verbose && printf(verbose, "Call (\"acceptance\") region: [%g,%g]\n", range[1], range[2]); + state <- rownames(callRegions)[rr] + verbose && enter(verbose, "Identify all '", state, "' segments") + range <- callRegions[rr,] + verbose && printf(verbose, "Call (\"acceptance\") region: [%g,%g]\n", range[1], range[2]) # If a confidence interval is completely within the # calling region, call it - isCalled <- (range[1] <= ci[,1] & ci[,2] < range[2]); + isCalled <- (range[1] <= ci[,1] & ci[,2] < range[2]) - nbrOfCalled <- sum(isCalled, na.rm=TRUE); - verbose && cat(verbose, "Number of segments called '", state, "': ", nbrOfCalled); -## verbose && cat(verbose, "Number of non-AB segments called '", state, "': ", (nbrOfSegs-nbrOfABs)-nbrOfCalled); - - key <- sprintf("%sCall", state); - segs[[key]] <- isCalled; - verbose && exit(verbose); + nbrOfCalled <- sum(isCalled, na.rm=TRUE) + verbose && cat(verbose, "Number of segments called '", state, "': ", nbrOfCalled) +## verbose && cat(verbose, "Number of non-AB segments called '", state, "': ", (nbrOfSegs-nbrOfABs)-nbrOfCalled) + + key <- sprintf("%sCall", state) + segs[[key]] <- isCalled + verbose && exit(verbose) } # for (rr ...) - fitC <- fit; - fitC$output <- segs; + fitC <- fit + fitC$output <- segs - verbose && exit(verbose); + verbose && exit(verbose) - fitC; + fitC }, protected=TRUE) # callGNLByTCNofABv1() diff -Nru r-cran-pscbs-0.63.0/R/PairedPSCBS.callLOH.R r-cran-pscbs-0.64.0/R/PairedPSCBS.callLOH.R --- r-cran-pscbs-0.63.0/R/PairedPSCBS.callLOH.R 2017-06-28 14:32:39.000000000 +0000 +++ r-cran-pscbs-0.64.0/R/PairedPSCBS.callLOH.R 2018-08-12 21:30:44.000000000 +0000 @@ -54,59 +54,59 @@ #*/########################################################################### setMethodS3("callLOH", "PairedPSCBS", function(fit, flavor=c("SmallC1", "LargeDH"), ..., minSize=1, xorCalls=TRUE, force=FALSE) { # Argument 'flavor': - flavor <- match.arg(flavor); + flavor <- match.arg(flavor) # Argument 'minSize': - minSize <- Arguments$getDouble(minSize, range=c(1,Inf)); + minSize <- Arguments$getDouble(minSize, range=c(1,Inf)) # Argument 'xorCalls': - xorCalls <- Arguments$getLogical(xorCalls); + xorCalls <- Arguments$getLogical(xorCalls) # Already done? - segs <- as.data.frame(fit); - calls <- segs$lohCall; + segs <- as.data.frame(fit) + calls <- segs$lohCall if (!force && !is.null(calls)) { - return(invisible(fit)); + return(invisible(fit)) } if (flavor == "SmallC1") { - fit <- callLowC1ByC1(fit, ..., callName="loh"); + fit <- callLowC1ByC1(fit, ..., callName="loh") } else if (flavor == "LargeDH") { - fit <- callExtremeAllelicImbalanceByDH(fit, ..., callName="loh"); + fit <- callExtremeAllelicImbalanceByDH(fit, ..., callName="loh") } else { - throw("Cannot call LOH. Unsupported flavor: ", flavor); + throw("Cannot call LOH. Unsupported flavor: ", flavor) } # Don't call segments with too few data points? if (minSize > 1) { - segs <- as.data.frame(fit); - ns <- segs$dhNbrOfLoci; - calls <- segs$lohCall; - calls[ns < minSize] <- NA; - segs$lohCall <- calls; - fit$output <- segs; + segs <- as.data.frame(fit) + ns <- segs$dhNbrOfLoci + calls <- segs$lohCall + calls[ns < minSize] <- NA + segs$lohCall <- calls + fit$output <- segs # Not needed anymore - segs <- calls <- NULL; + segs <- calls <- NULL } # Don't call a segment LOH if it already called AB? if (xorCalls) { - segs <- as.data.frame(fit); + segs <- as.data.frame(fit) if (is.element("abCall", names(segs))) { - calls <- segs$lohCall; - otherCalls <- segs$abCall; + calls <- segs$lohCall + otherCalls <- segs$abCall # If called (TRUE) and already called (TRUE) # by the other caller, call it as NA. - calls[calls & otherCalls] <- NA; - segs$lohCall <- calls; - fit$output <- segs; + calls[calls & otherCalls] <- NA + segs$lohCall <- calls + fit$output <- segs } } - return(invisible(fit)); + return(invisible(fit)) }) @@ -117,60 +117,60 @@ # - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - # Argument 'delta': if (delta != -Inf) { - delta <- Arguments$getDouble(delta, range=c(0,Inf)); + delta <- Arguments$getDouble(delta, range=c(0,Inf)) } # Argument 'alpha': - alpha <- Arguments$getDouble(alpha, range=c(0,1)); + alpha <- Arguments$getDouble(alpha, range=c(0,1)) # Argument 'verbose': - verbose <- Arguments$getVerbose(verbose); + verbose <- Arguments$getVerbose(verbose) if (verbose) { - pushState(verbose); - on.exit(popState(verbose)); + pushState(verbose) + on.exit(popState(verbose)) } - verbose && enter(verbose, "Calling segments of allelic balance from one-sided DH bootstrap confidence intervals"); + verbose && enter(verbose, "Calling segments of allelic balance from one-sided DH bootstrap confidence intervals") - verbose && cat(verbose, "delta (offset adjusting for bias in C1): ", delta); - verbose && cat(verbose, "alpha (CI quantile; significance level): ", alpha); + verbose && cat(verbose, "delta (offset adjusting for bias in C1): ", delta) + verbose && cat(verbose, "alpha (CI quantile; significance level): ", alpha) # Calculate C1 confidence intervals, if not already done - probs <- c(alpha, 1-alpha); - fit <- bootstrapTCNandDHByRegion(fit, probs=probs, ..., verbose=less(verbose, 50)); + probs <- c(alpha, 1-alpha) + fit <- bootstrapTCNandDHByRegion(fit, probs=probs, ..., verbose=less(verbose, 50)) - segs <- as.data.frame(fit); + segs <- as.data.frame(fit) # Extract confidence interval - alphaTag <- sprintf("%g%%", 100*alpha); - column <- sprintf("c1_%s", alphaTag); + alphaTag <- sprintf("%g%%", 100*alpha) + column <- sprintf("c1_%s", alphaTag) # Sanity checks - stopifnot(is.element(column, colnames(segs))); + .stop_if_not(is.element(column, colnames(segs))) # One-sided test - verbose && enter(verbose, "Calling segments"); - value <- segs[,column, drop=TRUE]; - call <- (value < delta); - nbrOfCalls <- sum(call, na.rm=TRUE); - verbose && printf(verbose, "Number of segments called low C1 (LowC1, \"LOH_C1\"): %d (%.2f%%) of %d\n", nbrOfCalls, 100*nbrOfCalls/nrow(segs), nrow(segs)); - verbose && exit(verbose); - - key <- sprintf("%sCall", callName); -# calls <- data.frame(lowc1Call=call); -# colnames(calls) <- key; -# segs <- cbind(segs, calls); - segs[[key]] <- call; - fit$output <- segs; + verbose && enter(verbose, "Calling segments") + value <- segs[,column, drop=TRUE] + call <- (value < delta) + nbrOfCalls <- sum(call, na.rm=TRUE) + verbose && printf(verbose, "Number of segments called low C1 (LowC1, \"LOH_C1\"): %d (%.2f%%) of %d\n", nbrOfCalls, 100*nbrOfCalls/nrow(segs), nrow(segs)) + verbose && exit(verbose) + + key <- sprintf("%sCall", callName) +# calls <- data.frame(lowc1Call=call) +# colnames(calls) <- key +# segs <- cbind(segs, calls) + segs[[key]] <- call + fit$output <- segs # Append 'delta' and 'alpha' to parameters - params <- fit$params; - params$deltaLowC1 <- delta; - params$alphaLowC1 <- alpha; - fit$params <- params; + params <- fit$params + params$deltaLowC1 <- delta + params$alphaLowC1 <- alpha + fit$params <- params - verbose && exit(verbose); + verbose && exit(verbose) - fit; + fit }, private=TRUE) # callLowC1ByC1() @@ -181,60 +181,60 @@ # Validate arguments # - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - # Argument 'delta': - delta <- Arguments$getDouble(delta, range=c(0,Inf)); + delta <- Arguments$getDouble(delta, range=c(0,Inf)) # Argument 'alpha': - alpha <- Arguments$getDouble(alpha, range=c(0,1)); + alpha <- Arguments$getDouble(alpha, range=c(0,1)) # Argument 'verbose': - verbose <- Arguments$getVerbose(verbose); + verbose <- Arguments$getVerbose(verbose) if (verbose) { - pushState(verbose); - on.exit(popState(verbose)); + pushState(verbose) + on.exit(popState(verbose)) } - verbose && enter(verbose, "Calling segments of extreme allelic imbalance (AI) from one-sided DH bootstrap confidence intervals"); + verbose && enter(verbose, "Calling segments of extreme allelic imbalance (AI) from one-sided DH bootstrap confidence intervals") - verbose && cat(verbose, "delta (offset adjusting for normal contamination and other biases): ", delta); - verbose && cat(verbose, "alpha (CI quantile; significance level): ", alpha); + verbose && cat(verbose, "delta (offset adjusting for normal contamination and other biases): ", delta) + verbose && cat(verbose, "alpha (CI quantile; significance level): ", alpha) # Calculate DH confidence intervalls, if not already done - probs <- c(alpha, 1-alpha); - fit <- bootstrapTCNandDHByRegion(fit, probs=probs, ..., verbose=less(verbose, 50)); + probs <- c(alpha, 1-alpha) + fit <- bootstrapTCNandDHByRegion(fit, probs=probs, ..., verbose=less(verbose, 50)) - segs <- as.data.frame(fit); + segs <- as.data.frame(fit) # Extract confidence interval - alphaTag <- sprintf("%g%%", 100*alpha); - column <- sprintf("dh_%s", alphaTag); + alphaTag <- sprintf("%g%%", 100*alpha) + column <- sprintf("dh_%s", alphaTag) # Sanity checks - stopifnot(is.element(column, colnames(segs))); + .stop_if_not(is.element(column, colnames(segs))) # One-sided test - verbose && enter(verbose, "Calling segments"); - value <- segs[,column, drop=TRUE]; - call <- (value >= delta); - nbrOfCalls <- sum(call, na.rm=TRUE); - verbose && printf(verbose, "Number of segments called high allelic imbalance (AI/\"LOH_AI\"): %d (%.2f%%) of %d\n", nbrOfCalls, 100*nbrOfCalls/nrow(segs), nrow(segs)); - verbose && exit(verbose); - - key <- sprintf("%sCall", callName); -# calls <- data.frame(aiHighCall=call); -# colnames(calls) <- key; -# segs <- cbind(segs, calls); - segs[[key]] <- call; - fit$output <- segs; + verbose && enter(verbose, "Calling segments") + value <- segs[,column, drop=TRUE] + call <- (value >= delta) + nbrOfCalls <- sum(call, na.rm=TRUE) + verbose && printf(verbose, "Number of segments called high allelic imbalance (AI/\"LOH_AI\"): %d (%.2f%%) of %d\n", nbrOfCalls, 100*nbrOfCalls/nrow(segs), nrow(segs)) + verbose && exit(verbose) + + key <- sprintf("%sCall", callName) +# calls <- data.frame(aiHighCall=call) +# colnames(calls) <- key +# segs <- cbind(segs, calls) + segs[[key]] <- call + fit$output <- segs # Append 'delta' and 'alpha' to parameters - params <- fit$params; - params$deltaExtremeDH <- delta; - params$alphaExtremeDH <- alpha; - fit$params <- params; + params <- fit$params + params$deltaExtremeDH <- delta + params$alphaExtremeDH <- alpha + fit$params <- params - verbose && exit(verbose); + verbose && exit(verbose) - fit; + fit }, private=TRUE) # callExtremeAllelicImbalanceByDH() diff -Nru r-cran-pscbs-0.63.0/R/PairedPSCBS.CALL.R r-cran-pscbs-0.64.0/R/PairedPSCBS.CALL.R --- r-cran-pscbs-0.63.0/R/PairedPSCBS.CALL.R 2017-06-28 14:32:39.000000000 +0000 +++ r-cran-pscbs-0.64.0/R/PairedPSCBS.CALL.R 2018-08-12 21:30:44.000000000 +0000 @@ -3,28 +3,28 @@ # Validate arguments # - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - # Argument 'verbose': - verbose <- Arguments$getVerbose(verbose); + verbose <- Arguments$getVerbose(verbose) if (verbose) { - pushState(verbose); - on.exit(popState(verbose)); + pushState(verbose) + on.exit(popState(verbose)) } - verbose && enter(verbose, "Calling segments to be in allelic balance (AB) or extreme allelic imbalance (AI)"); + verbose && enter(verbose, "Calling segments to be in allelic balance (AB) or extreme allelic imbalance (AI)") # Calculate DH confidence intervals, if not already done - probs <- sort(unique(c(alphaAB, alphaHighAI))); - probs <- sort(unique(c(probs, 1-probs))); - fit <- bootstrapTCNandDHByRegion(fit, probs=probs, ..., verbose=less(verbose, 50)); + probs <- sort(unique(c(alphaAB, alphaHighAI))) + probs <- sort(unique(c(probs, 1-probs))) + fit <- bootstrapTCNandDHByRegion(fit, probs=probs, ..., verbose=less(verbose, 50)) # Call allelic balance - fit <- callAllelicBalanceByDH(fit, delta=deltaAB, alpha=alphaAB, ..., verbose=less(verbose, 1)); + fit <- callAllelicBalanceByDH(fit, delta=deltaAB, alpha=alphaAB, ..., verbose=less(verbose, 1)) # Call high allelic imbalance - fit <- callExtremeAllelicImbalanceByDH(fit, delta=deltaHighAI, alpha=alphaHighAI, ..., verbose=less(verbose, 1)); + fit <- callExtremeAllelicImbalanceByDH(fit, delta=deltaHighAI, alpha=alphaHighAI, ..., verbose=less(verbose, 1)) - verbose && exit(verbose); + verbose && exit(verbose) - fit; + fit }, private=TRUE) # callABandHighAI() @@ -33,81 +33,81 @@ # Validate arguments # - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - # Argument 'verbose': - verbose <- Arguments$getVerbose(verbose); + verbose <- Arguments$getVerbose(verbose) if (verbose) { - pushState(verbose); - on.exit(popState(verbose)); + pushState(verbose) + on.exit(popState(verbose)) } - verbose && enter(verbose, "Calling segments to be in allelic balance (AB) or low minor copy number (low C1)"); + verbose && enter(verbose, "Calling segments to be in allelic balance (AB) or low minor copy number (low C1)") # Calculate DH confidence intervals, if not already done - probs <- sort(unique(c(alphaAB, alphaLowC1))); - probs <- sort(unique(c(probs, 1-probs))); - fit <- bootstrapTCNandDHByRegion(fit, probs=probs, ..., verbose=less(verbose, 50)); + probs <- sort(unique(c(alphaAB, alphaLowC1))) + probs <- sort(unique(c(probs, 1-probs))) + fit <- bootstrapTCNandDHByRegion(fit, probs=probs, ..., verbose=less(verbose, 50)) # Call allelic balance - fit <- callAllelicBalanceByDH(fit, delta=deltaAB, alpha=alphaAB, ..., verbose=less(verbose, 1)); + fit <- callAllelicBalanceByDH(fit, delta=deltaAB, alpha=alphaAB, ..., verbose=less(verbose, 1)) # Call high allelic imbalance - fit <- callLowC1ByC1(fit, delta=deltaLowC1, alpha=alphaLowC1, ..., verbose=less(verbose, 1)); + fit <- callLowC1ByC1(fit, delta=deltaLowC1, alpha=alphaLowC1, ..., verbose=less(verbose, 1)) - verbose && exit(verbose); + verbose && exit(verbose) - fit; + fit }, private=TRUE) # callABandLowC1() setMethodS3("extractCallsByLocus", "PairedPSCBS", function(fit, ...) { # Extract locus data - data <- getLocusData(fit, ...); + data <- getLocusData(fit, ...) - nbrOfLoci <- nrow(data); + nbrOfLoci <- nrow(data) # Extract segment data - segs <- getSegments(fit, splitters=TRUE); + segs <- getSegments(fit, splitters=TRUE) # Identify segment calls - callCols <- grep("Call$", colnames(segs)); - nbrOfCalls <- length(callCols); + callCols <- grep("Call$", colnames(segs)) + nbrOfCalls <- length(callCols) - chromosome <- data$chromosome; - x <- data$x; - y <- data[,3]; + chromosome <- data$chromosome + x <- data$x + y <- data[,3] # Allocate locus calls - naValue <- as.logical(NA); - callsL <- matrix(naValue, nrow=nbrOfLoci, ncol=nbrOfCalls); - colnames(callsL) <- colnames(segs)[callCols]; - callsL <- as.data.frame(callsL); + naValue <- as.logical(NA) + callsL <- matrix(naValue, nrow=nbrOfLoci, ncol=nbrOfCalls) + colnames(callsL) <- colnames(segs)[callCols] + callsL <- as.data.frame(callsL) # For each segment... for (ss in seq_len(nrow(segs))) { - seg <- segs[ss,]; + seg <- segs[ss,] idxs <- which(chromosome == seg$chromosome & - seg$tcnStart <= x & x <= seg$tcnEnd); - idxs <- Arguments$getIndices(idxs, max=nbrOfLoci); + seg$tcnStart <= x & x <= seg$tcnEnd) + idxs <- Arguments$getIndices(idxs, max=nbrOfLoci) # Sanity check -## stopifnot(length(idxs) == seg$tcnNbrOfLoci); +## .stop_if_not(length(idxs) == seg$tcnNbrOfLoci) - callsSS <- seg[callCols]; + callsSS <- seg[callCols] for (cc in seq_len(nbrOfCalls)) { - callsL[idxs,cc] <- callsSS[,cc]; + callsL[idxs,cc] <- callsSS[,cc] } } # for (ss ...) # The calls for loci that have missing annotations or observations, # should also be missing, i.e. NA. - nok <- (is.na(chromosome) | is.na(x) | is.na(y)); - callsL[nok,] <- as.logical(NA); + nok <- (is.na(chromosome) | is.na(x) | is.na(y)) + callsL[nok,] <- as.logical(NA) # Sanity check - stopifnot(nrow(callsL) == nbrOfLoci); - stopifnot(ncol(callsL) == nbrOfCalls); + .stop_if_not(nrow(callsL) == nbrOfLoci) + .stop_if_not(ncol(callsL) == nbrOfCalls) - callsL; + callsL }, private=TRUE) # extractCallsByLocus() diff -Nru r-cran-pscbs-0.63.0/R/PairedPSCBS.callROH.R r-cran-pscbs-0.64.0/R/PairedPSCBS.callROH.R --- r-cran-pscbs-0.63.0/R/PairedPSCBS.callROH.R 2017-06-28 14:32:39.000000000 +0000 +++ r-cran-pscbs-0.64.0/R/PairedPSCBS.callROH.R 2018-08-12 21:30:44.000000000 +0000 @@ -38,68 +38,68 @@ # Validate arguments # - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - # Argument 'verbose': - verbose <- Arguments$getVerbose(verbose); + verbose <- Arguments$getVerbose(verbose) if (verbose) { - pushState(verbose); - on.exit(popState(verbose)); + pushState(verbose) + on.exit(popState(verbose)) } - verbose && enter(verbose, "Calling ROH"); + verbose && enter(verbose, "Calling ROH") # Already done? - segs <- getSegments(fit); - calls <- segs$rohCall; + segs <- getSegments(fit) + calls <- segs$rohCall if (!force && !is.null(calls)) { - return(invisible(fit)); + return(invisible(fit)) } - nbrOfSegments <- nrow(segs); - calls <- rep(NA, times=nbrOfSegments); + nbrOfSegments <- nrow(segs) + calls <- rep(NA, times=nbrOfSegments) if (is.null(calls)) { - segs <- cbind(segs, rohCall=calls); + segs <- cbind(segs, rohCall=calls) } - delta <- NA_real_; + delta <- NA_real_ # For each segment... for (ss in seq_len(nbrOfSegments)) { - verbose && enter(verbose, sprintf("Segment #%d of %d", ss, nbrOfSegments)); + verbose && enter(verbose, sprintf("Segment #%d of %d", ss, nbrOfSegments)) - fitT <- extractSegment(fit, ss); + fitT <- extractSegment(fit, ss) # Call only "non-splitter" segments if (nbrOfSegments(fitT) > 0L) { - callSS <- callROHOneSegment(fitT, ..., verbose=less(verbose, 1)); - calls[ss] <- callSS; + callSS <- callROHOneSegment(fitT, ..., verbose=less(verbose, 1)) + calls[ss] <- callSS if (is.na(delta) && !is.na(callSS)) { - delta <- attr(callSS, "delta"); + delta <- attr(callSS, "delta") } } - verbose && exit(verbose); + verbose && exit(verbose) } # for (ss ...) - verbose && cat(verbose, "ROH calls:"); - verbose && str(verbose, calls); - verbose && print(verbose, summary(calls)); + verbose && cat(verbose, "ROH calls:") + verbose && str(verbose, calls) + verbose && print(verbose, summary(calls)) - segs$rohCall <- calls; + segs$rohCall <- calls - fit$output <- segs; + fit$output <- segs # Append parameters - params <- fit$params; - params$deltaROH <- delta; - fit$params <- params; + params <- fit$params + params$deltaROH <- delta + fit$params <- params # Set DH and (C1,C2) mean levels to NA? if (updateMeans) { fit <- updateMeans(fit, from="segments", adjustFor="roh", - verbose=less(verbose, 20)); + verbose=less(verbose, 20)) } - verbose && exit(verbose); + verbose && exit(verbose) - invisible(fit); + invisible(fit) }) # callROH() @@ -109,45 +109,45 @@ # Validate arguments # - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - # Argument 'verbose': - verbose <- Arguments$getVerbose(verbose); + verbose <- Arguments$getVerbose(verbose) if (verbose) { - pushState(verbose); - on.exit(popState(verbose)); + pushState(verbose) + on.exit(popState(verbose)) } - verbose && enter(verbose, "Calling ROH for a single segment"); + verbose && enter(verbose, "Calling ROH for a single segment") # Make sure that there is only a single segment in this object - stopifnot(nbrOfSegments(fit, splitters=TRUE) == 1L); + .stop_if_not(nbrOfSegments(fit, splitters=TRUE) == 1L) # Extract the locus-level data for the segment tested - data <- getLocusData(fit); + data <- getLocusData(fit) # Keep only SNPs: # SNPs are identifies as those loci that have non-missing # 'betaTN' & 'muN', cf. segmentByPairedPSCBS(). - isSnp <- (!is.na(data$betaTN) & !is.na(data$muN)); - nbrOfSnps <- sum(isSnp); - verbose && cat(verbose, "Number of SNPs: ", nbrOfSnps); - data <- data[isSnp,]; + isSnp <- (!is.na(data$betaTN) & !is.na(data$muN)) + nbrOfSnps <- sum(isSnp) + verbose && cat(verbose, "Number of SNPs: ", nbrOfSnps) + data <- data[isSnp,] # Extract that SNP signals used for calling ROH - betaN <- data$betaN; - muN <- data$muN; - csN <- data$csN; # Genotyping confidence scores, if available + betaN <- data$betaN + muN <- data$muN + csN <- data$csN # Genotyping confidence scores, if available # Test for ROH - fit <- testROH(muN=muN, csN=csN, betaN=betaN, ..., verbose=less(verbose, 10)); + fit <- testROH(muN=muN, csN=csN, betaN=betaN, ..., verbose=less(verbose, 10)) # Get the ROH call (TRUE, FALSE, or NA) - call <- fit; + call <- fit - verbose && exit(verbose); + verbose && exit(verbose) - call; + call }, private=TRUE) diff -Nru r-cran-pscbs-0.63.0/R/PairedPSCBS.estimateDeltaAB.R r-cran-pscbs-0.64.0/R/PairedPSCBS.estimateDeltaAB.R --- r-cran-pscbs-0.63.0/R/PairedPSCBS.estimateDeltaAB.R 2017-06-28 14:32:39.000000000 +0000 +++ r-cran-pscbs-0.64.0/R/PairedPSCBS.estimateDeltaAB.R 2018-08-12 21:30:44.000000000 +0000 @@ -40,95 +40,95 @@ # Validate arguments # - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - # Argument 'flavor': - flavor <- match.arg(flavor); + flavor <- match.arg(flavor) # Argument 'max': - max <- Arguments$getDouble(max, range=c(0,Inf)); + max <- Arguments$getDouble(max, range=c(0,Inf)) # Argument 'verbose': - verbose <- Arguments$getVerbose(verbose); + verbose <- Arguments$getVerbose(verbose) if (verbose) { - pushState(verbose); - on.exit(popState(verbose)); + pushState(verbose) + on.exit(popState(verbose)) } - verbose && enter(verbose, "Estimating DH threshold for calling allelic imbalances"); - verbose && cat(verbose, "flavor: ", flavor); + verbose && enter(verbose, "Estimating DH threshold for calling allelic imbalances") + verbose && cat(verbose, "flavor: ", flavor) if (flavor == "mad(hBAF)") { - if (is.null(scale)) scale <- 3; - verbose && cat(verbose, "scale: ", scale); + if (is.null(scale)) scale <- 3 + verbose && cat(verbose, "scale: ", scale) # sigma = mad(hBAF) = 1.4826*median(|hBAF-m|), # where m = median(hBAF) ~= 1/2 - sd <- estimateStdDevForHeterozygousBAF(this, ..., verbose=verbose); - verbose && printf(verbose, "sd: %.3g\n", sd); - delta <- scale * sd; + sd <- estimateStdDevForHeterozygousBAF(this, ..., verbose=verbose) + verbose && printf(verbose, "sd: %.3g\n", sd) + delta <- scale * sd } else if (flavor == "median(DH)") { - if (is.null(scale)) scale <- 3; - verbose && cat(verbose, "scale: ", scale); + if (is.null(scale)) scale <- 3 + verbose && cat(verbose, "scale: ", scale) # sigma = 1/2*1.4826*median(|hBAF-1/2|), # because DH = 2*|hBAF-1/2| - mu <- estimateMeanForDH(this, ..., verbose=verbose); - verbose && printf(verbose, "mu: %.3g\n", mu); - sd <- 1/2 * 1.4826 * mu; - verbose && printf(verbose, "sd: %.3g\n", sd); - delta <- scale * sd; + mu <- estimateMeanForDH(this, ..., verbose=verbose) + verbose && printf(verbose, "mu: %.3g\n", mu) + sd <- 1/2 * 1.4826 * mu + verbose && printf(verbose, "sd: %.3g\n", sd) + delta <- scale * sd } else if (flavor == "q(DH)") { - if (is.null(scale)) scale <- 1; - verbose && cat(verbose, "scale: ", scale); - delta <- estimateHighDHQuantileAtAB(this, scale=scale, ..., verbose=verbose); + if (is.null(scale)) scale <- 1 + verbose && cat(verbose, "scale: ", scale) + delta <- estimateHighDHQuantileAtAB(this, scale=scale, ..., verbose=verbose) } else if (flavor == "qq(DH)") { - if (is.null(scale)) scale <- 1; - verbose && cat(verbose, "scale: ", scale); - delta <- estimateDeltaABBySmallDH(this, ..., verbose=verbose); - delta <- scale * delta; + if (is.null(scale)) scale <- 1 + verbose && cat(verbose, "scale: ", scale) + delta <- estimateDeltaABBySmallDH(this, ..., verbose=verbose) + delta <- scale * delta } else { - throw("Unkown flavor: ", flavor); + throw("Unkown flavor: ", flavor) } ## } else if (flavor == "DHskew") { -## fit <- this; +## fit <- this ## if (is.null(fit$output$dhSkew)) { -## verbose && enter(verbose, "Estimating DH skewness for each segment"); -## fit <- applyByRegion(fit, FUN=.addTcnDhStatitics, verbose=less(verbose, 5)); -## verbose && exit(verbose); +## verbose && enter(verbose, "Estimating DH skewness for each segment") +## fit <- applyByRegion(fit, FUN=.addTcnDhStatitics, verbose=less(verbose, 5)) +## verbose && exit(verbose) ## } -## mu <- fit$output$dhMean; -## skew <- fit$output$dhSkew; +## mu <- fit$output$dhMean +## skew <- fit$output$dhSkew ## -## deltaSkew <- -0.55; -## keep <- which(skew < deltaSkew); -## verbose && printf(verbose, "Number of segments heavily skewed (< %.3f): %d\n", deltaSkew, length(keep)); +## deltaSkew <- -0.55 +## keep <- which(skew < deltaSkew) +## verbose && printf(verbose, "Number of segments heavily skewed (< %.3f): %d\n", deltaSkew, length(keep)) ## # Sanity check ## if (length(keep) == 0) { -## throw("Cannot estimate DH threshold for AB. No segments with strong skewness exists."); +## throw("Cannot estimate DH threshold for AB. No segments with strong skewness exists.") ## } -## deltaDH <- median(mu[keep], na.rm=TRUE); -## verbose && printf(verbose, "deltaDH: %.3g\n", deltaDH); -## deltaDH <- 1.10*deltaDH; -## verbose && printf(verbose, "Adjusted +10%% deltaDH: %.3g\n", deltaDH); +## deltaDH <- median(mu[keep], na.rm=TRUE) +## verbose && printf(verbose, "deltaDH: %.3g\n", deltaDH) +## deltaDH <- 1.10*deltaDH +## verbose && printf(verbose, "Adjusted +10%% deltaDH: %.3g\n", deltaDH) ## ## # sigma = 1/2*1.4826*median(|hBAF-1/2|), ## # because DH = 2*|hBAF-1/2| -## mu <- estimateMeanForDH(this, delta=deltaDH, ...); -## verbose && printf(verbose, "mu: %.3g\n", mu); -## sd <- 1/2 * 1.4826 * mu; -## verbose && printf(verbose, "sd: %.3g\n", sd); +## mu <- estimateMeanForDH(this, delta=deltaDH, ...) +## verbose && printf(verbose, "mu: %.3g\n", mu) +## sd <- 1/2 * 1.4826 * mu +## verbose && printf(verbose, "sd: %.3g\n", sd) ## } - verbose && printf(verbose, "Estimated delta: %.3g\n", delta); + verbose && printf(verbose, "Estimated delta: %.3g\n", delta) # Truncate estimate? if (delta > max) { - warning("Estimated delta (%.3g) was greater than the maximum allowed value (%.3g). The latter will be used instead.", delta, max); - delta <- max; - verbose && printf(verbose, "Max delta: %.3g\n", max); - verbose && printf(verbose, "Truncated delta: %.3g\n", delta); + warning("Estimated delta (%.3g) was greater than the maximum allowed value (%.3g). The latter will be used instead.", delta, max) + delta <- max + verbose && printf(verbose, "Max delta: %.3g\n", max) + verbose && printf(verbose, "Truncated delta: %.3g\n", delta) } - verbose && exit(verbose); + verbose && exit(verbose) - delta; + delta }) # estimateDeltaAB() @@ -138,59 +138,59 @@ # Validate arguments # - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - # Argument 'deltaDH': - deltaDH <- Arguments$getDouble(deltaDH, range=c(0,1)); + deltaDH <- Arguments$getDouble(deltaDH, range=c(0,1)) # Argument 'deltaTCN': - deltaTCN <- Arguments$getDouble(deltaTCN, range=c(0,Inf)); + deltaTCN <- Arguments$getDouble(deltaTCN, range=c(0,Inf)) # Argument 'verbose': - verbose <- Arguments$getVerbose(verbose); + verbose <- Arguments$getVerbose(verbose) if (verbose) { - pushState(verbose); - on.exit(popState(verbose)); + pushState(verbose) + on.exit(popState(verbose)) } - verbose && enter(verbose, "Estimating standard deviation of tumor BAFs for heterozygous SNPs"); - verbose && cat(verbose, "DH threshold: ", deltaDH); - verbose && cat(verbose, "TCN threshold: ", deltaTCN); + verbose && enter(verbose, "Estimating standard deviation of tumor BAFs for heterozygous SNPs") + verbose && cat(verbose, "DH threshold: ", deltaDH) + verbose && cat(verbose, "TCN threshold: ", deltaTCN) - segs <- as.data.frame(this); + segs <- as.data.frame(this) - verbose && cat(verbose, "Number of segments: ", nrow(segs)); + verbose && cat(verbose, "Number of segments: ", nrow(segs)) # - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - # Find segments to be used for the estimation # - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - # Find segments that have low DHs - idxsDH <- which(segs$dhMean <= deltaDH); - verbose && cat(verbose, "Identified segments with small DH levels: ", length(idxsDH)); - verbose && str(verbose, idxsDH); + idxsDH <- which(segs$dhMean <= deltaDH) + verbose && cat(verbose, "Identified segments with small DH levels: ", length(idxsDH)) + verbose && str(verbose, idxsDH) # Sanity check if (length(idxsDH) == 0) { - throw("Cannot estimate standard deviation. There exist no segments with DH less or equal to the given threshold: ", deltaDH); + throw("Cannot estimate standard deviation. There exist no segments with DH less or equal to the given threshold: ", deltaDH) } # Find segments that have low TCNs - idxsTCN <- which(segs$tcnMean <= deltaTCN); - verbose && cat(verbose, "Identified segments with small TCN levels: ", length(idxsTCN)); - verbose && str(verbose, idxsTCN); + idxsTCN <- which(segs$tcnMean <= deltaTCN) + verbose && cat(verbose, "Identified segments with small TCN levels: ", length(idxsTCN)) + verbose && str(verbose, idxsTCN) # Sanity check if (length(idxsTCN) == 0) { - throw("Cannot estimate standard deviation. There exist no segments with TCN less or equal to the given threshold: ", deltaTCN); + throw("Cannot estimate standard deviation. There exist no segments with TCN less or equal to the given threshold: ", deltaTCN) } # Segments with small DH and small TCN - idxs <- intersect(idxsDH, idxsTCN); - verbose && cat(verbose, "Identified segments with small DH and small TCN levels: ", length(idxs)); - verbose && str(verbose, idxs); + idxs <- intersect(idxsDH, idxsTCN) + verbose && cat(verbose, "Identified segments with small DH and small TCN levels: ", length(idxs)) + verbose && str(verbose, idxs) # Sanity check if (length(idxs) == 0) { - throw("Cannot estimate standard deviation. There exist no segments with small DH and small TCN."); + throw("Cannot estimate standard deviation. There exist no segments with small DH and small TCN.") } @@ -198,24 +198,24 @@ # Extract data and estimate parameters # - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - # Extract those segments - verbose && enter(verbose, "Extracting identified segments"); - fitT <- extractRegions(this, idxs); - verbose && exit(verbose); + verbose && enter(verbose, "Extracting identified segments") + fitT <- extractRegions(this, idxs) + verbose && exit(verbose) # Get the tumor BAFs for the heterozygous SNPs - verbose && enter(verbose, "Extracting BAFs for the heterozygous SNPs"); - beta <- with(fitT$data, betaTN[muN == 1/2]); - verbose && str(verbose, beta); - verbose && exit(verbose); + verbose && enter(verbose, "Extracting BAFs for the heterozygous SNPs") + beta <- with(fitT$data, betaTN[muN == 1/2]) + verbose && str(verbose, beta) + verbose && exit(verbose) # Estimate the standard deviation for those - sd <- mad(beta, na.rm=TRUE); - verbose && cat(verbose, "Estimated standard deviation: ", sd); + sd <- mad(beta, na.rm=TRUE) + verbose && cat(verbose, "Estimated standard deviation: ", sd) - verbose && exit(verbose); + verbose && exit(verbose) - sd; + sd }, private=TRUE) # estimateStdDevForHeterozygousBAF() @@ -226,63 +226,63 @@ # Validate arguments # - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - # Argument 'deltaDH': - deltaDH <- Arguments$getDouble(deltaDH, range=c(0,1)); + deltaDH <- Arguments$getDouble(deltaDH, range=c(0,1)) # Argument 'deltaTCN': - deltaTCN <- Arguments$getDouble(deltaTCN, range=c(0,Inf)); + deltaTCN <- Arguments$getDouble(deltaTCN, range=c(0,Inf)) # Argument 'robust': - robust <- Arguments$getLogical(robust); + robust <- Arguments$getLogical(robust) # Argument 'verbose': - verbose <- Arguments$getVerbose(verbose); + verbose <- Arguments$getVerbose(verbose) if (verbose) { - pushState(verbose); - on.exit(popState(verbose)); + pushState(verbose) + on.exit(popState(verbose)) } - verbose && enter(verbose, "Estimating mean of tumor DHs for heterozygous SNPs"); - verbose && cat(verbose, "DH threshold: ", deltaDH); - verbose && cat(verbose, "TCN threshold: ", deltaTCN); - verbose && cat(verbose, "Robust estimator: ", robust); - verbose && cat(verbose, "Trim: ", trim); + verbose && enter(verbose, "Estimating mean of tumor DHs for heterozygous SNPs") + verbose && cat(verbose, "DH threshold: ", deltaDH) + verbose && cat(verbose, "TCN threshold: ", deltaTCN) + verbose && cat(verbose, "Robust estimator: ", robust) + verbose && cat(verbose, "Trim: ", trim) - segs <- as.data.frame(this); + segs <- as.data.frame(this) - verbose && cat(verbose, "Number of segments: ", nrow(segs)); + verbose && cat(verbose, "Number of segments: ", nrow(segs)) # - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - # Find segments to be used for the estimation # - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - # Find segments that have low DHs - idxsDH <- which(segs$dhMean <= deltaDH); - verbose && cat(verbose, "Identified segments with small DH levels: ", length(idxsDH)); - verbose && str(verbose, idxsDH); + idxsDH <- which(segs$dhMean <= deltaDH) + verbose && cat(verbose, "Identified segments with small DH levels: ", length(idxsDH)) + verbose && str(verbose, idxsDH) # Sanity check if (length(idxsDH) == 0) { - throw("Cannot estimate standard deviation. There exist no segments with DH less or equal to the given threshold: ", deltaDH); + throw("Cannot estimate standard deviation. There exist no segments with DH less or equal to the given threshold: ", deltaDH) } # Find segments that have low TCNs - idxsTCN <- which(segs$tcnMean <= deltaTCN); - verbose && cat(verbose, "Identified segments with small TCN levels: ", length(idxsTCN)); - verbose && str(verbose, idxsTCN); + idxsTCN <- which(segs$tcnMean <= deltaTCN) + verbose && cat(verbose, "Identified segments with small TCN levels: ", length(idxsTCN)) + verbose && str(verbose, idxsTCN) # Sanity check if (length(idxsTCN) == 0) { - throw("Cannot estimate standard deviation. There exist no segments with TCN less or equal to the given threshold: ", deltaTCN); + throw("Cannot estimate standard deviation. There exist no segments with TCN less or equal to the given threshold: ", deltaTCN) } # Segments with small DH and small TCN - idxs <- intersect(idxsDH, idxsTCN); - verbose && cat(verbose, "Identified segments with small DH and small TCN levels: ", length(idxs)); - verbose && str(verbose, idxs); + idxs <- intersect(idxsDH, idxsTCN) + verbose && cat(verbose, "Identified segments with small DH and small TCN levels: ", length(idxs)) + verbose && str(verbose, idxs) # Sanity check if (length(idxs) == 0) { - throw("Cannot estimate standard deviation. There exist no segments with small DH and small TCN."); + throw("Cannot estimate standard deviation. There exist no segments with small DH and small TCN.") } @@ -290,32 +290,32 @@ # Extract data and estimate parameters # - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - # Extract those segments - verbose && enter(verbose, "Extracting identified segments"); - fitT <- extractRegions(this, idxs); - verbose && exit(verbose); + verbose && enter(verbose, "Extracting identified segments") + fitT <- extractRegions(this, idxs) + verbose && exit(verbose) # Get the tumor DHs for the heterozygous SNPs - verbose && enter(verbose, "Extracting DHs for the heterozygous SNPs"); - rho <- with(fitT$data, rho[muN == 1/2]); - verbose && str(verbose, rho); - verbose && exit(verbose); + verbose && enter(verbose, "Extracting DHs for the heterozygous SNPs") + rho <- with(fitT$data, rho[muN == 1/2]) + verbose && str(verbose, rho) + verbose && exit(verbose) # Estimate the average for those - rho <- rho[is.finite(rho)]; + rho <- rho[is.finite(rho)] if (robust) { - mu <- median(rho, na.rm=FALSE); - qlow <- quantile(rho, probs=0.05, na.rm=FALSE); - delta <- mu-qlow; - print(list(qlow=qlow, mu=mu, delta=delta, "mu+delta"=mu+delta)); + mu <- median(rho, na.rm=FALSE) + qlow <- quantile(rho, probs=0.05, na.rm=FALSE) + delta <- mu-qlow + print(list(qlow=qlow, mu=mu, delta=delta, "mu+delta"=mu+delta)) } else { - mu <- mean(rho, trim=trim, na.rm=FALSE); + mu <- mean(rho, trim=trim, na.rm=FALSE) } - verbose && cat(verbose, "Estimated mean: ", mu); + verbose && cat(verbose, "Estimated mean: ", mu) - verbose && exit(verbose); + verbose && exit(verbose) - mu; + mu }, private=TRUE) # estimateMeanForDH() @@ -325,80 +325,80 @@ # Validate arguments # - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - # Argument 'quantile': - quantile <- Arguments$getDouble(quantile, range=c(0.5,1)); + quantile <- Arguments$getDouble(quantile, range=c(0.5,1)) # Argument 'scale': - scale <- Arguments$getDouble(scale, range=c(0,Inf)); + scale <- Arguments$getDouble(scale, range=c(0,Inf)) # Argument 'deltaDH': - deltaDH <- Arguments$getDouble(deltaDH, range=c(0,1)); + deltaDH <- Arguments$getDouble(deltaDH, range=c(0,1)) # Argument 'deltaTCN': - deltaTCN <- Arguments$getDouble(deltaTCN, range=c(0,Inf)); + deltaTCN <- Arguments$getDouble(deltaTCN, range=c(0,Inf)) # Argument 'robust': - robust <- Arguments$getLogical(robust); + robust <- Arguments$getLogical(robust) # Argument 'verbose': - verbose <- Arguments$getVerbose(verbose); + verbose <- Arguments$getVerbose(verbose) if (verbose) { - pushState(verbose); - on.exit(popState(verbose)); + pushState(verbose) + on.exit(popState(verbose)) } - verbose && enter(verbose, "Estimating DH quantile of tumor DHs for heterozygous SNPs"); - verbose && cat(verbose, "DH threshold: ", deltaDH); - verbose && cat(verbose, "TCN threshold: ", deltaTCN); - verbose && cat(verbose, "Robust estimator: ", robust); - verbose && cat(verbose, "Scale factor: ", scale); + verbose && enter(verbose, "Estimating DH quantile of tumor DHs for heterozygous SNPs") + verbose && cat(verbose, "DH threshold: ", deltaDH) + verbose && cat(verbose, "TCN threshold: ", deltaTCN) + verbose && cat(verbose, "Robust estimator: ", robust) + verbose && cat(verbose, "Scale factor: ", scale) - segs <- as.data.frame(this); + segs <- as.data.frame(this) - verbose && cat(verbose, "Number of segments: ", nrow(segs)); + verbose && cat(verbose, "Number of segments: ", nrow(segs)) # - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - # Find segments to be used for the estimation # - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - verbose && enter(verbose, "Finding some segments that are likely to in allelic balance (AB)"); + verbose && enter(verbose, "Finding some segments that are likely to in allelic balance (AB)") # Find some segments that have low DHs - idxsDH <- which(segs$dhMean <= deltaDH); - verbose && cat(verbose, "Identified segments with small DH levels: ", length(idxsDH)); - verbose && str(verbose, idxsDH); + idxsDH <- which(segs$dhMean <= deltaDH) + verbose && cat(verbose, "Identified segments with small DH levels: ", length(idxsDH)) + verbose && str(verbose, idxsDH) # Sanity check if (length(idxsDH) == 0) { - throw("Cannot estimate standard deviation. There exist no segments with DH less or equal to the given threshold: ", deltaDH); + throw("Cannot estimate standard deviation. There exist no segments with DH less or equal to the given threshold: ", deltaDH) } # Find segments that have low TCNs - idxsTCN <- which(segs$tcnMean <= deltaTCN); - verbose && cat(verbose, "Identified segments with small TCN levels: ", length(idxsTCN)); - verbose && str(verbose, idxsTCN); + idxsTCN <- which(segs$tcnMean <= deltaTCN) + verbose && cat(verbose, "Identified segments with small TCN levels: ", length(idxsTCN)) + verbose && str(verbose, idxsTCN) # Sanity check if (length(idxsTCN) == 0) { - throw("Cannot estimate standard deviation. There exist no segments with TCN less or equal to the given threshold: ", deltaTCN); + throw("Cannot estimate standard deviation. There exist no segments with TCN less or equal to the given threshold: ", deltaTCN) } # Segments with small DH and small TCN - idxs <- intersect(idxsDH, idxsTCN); - verbose && cat(verbose, "Identified segments with small DH and small TCN levels: ", length(idxs)); - verbose && str(verbose, idxs); + idxs <- intersect(idxsDH, idxsTCN) + verbose && cat(verbose, "Identified segments with small DH and small TCN levels: ", length(idxs)) + verbose && str(verbose, idxs) # Sanity check if (length(idxs) == 0) { - throw("Cannot estimate standard deviation. There exist no segments with small DH and small TCN."); + throw("Cannot estimate standard deviation. There exist no segments with small DH and small TCN.") } # Extract those segments - verbose && enter(verbose, "Extracting identified segments"); - fitT <- extractRegions(this, idxs); - verbose && exit(verbose); + verbose && enter(verbose, "Extracting identified segments") + fitT <- extractRegions(this, idxs) + verbose && exit(verbose) - verbose && exit(verbose); + verbose && exit(verbose) @@ -406,46 +406,46 @@ # Extract data and estimate parameters # - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - # Get the tumor DHs for the heterozygous SNPs - verbose && enter(verbose, "Extracting DHs for the heterozygous SNPs"); - rho <- with(fitT$data, rho[muN == 1/2]); - verbose && str(verbose, rho); - verbose && exit(verbose); + verbose && enter(verbose, "Extracting DHs for the heterozygous SNPs") + rho <- with(fitT$data, rho[muN == 1/2]) + verbose && str(verbose, rho) + verbose && exit(verbose) # - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - # Estimating the DH quantile # - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - verbose && enter(verbose, "Estimating the quantile of interest"); - verbose && cat(verbose, "Quantile: ", quantile); + verbose && enter(verbose, "Estimating the quantile of interest") + verbose && cat(verbose, "Quantile: ", quantile) # Drop missing values - rho <- rho[is.finite(rho)]; + rho <- rho[is.finite(rho)] if (robust) { - lq <- quantile(rho, probs=1-quantile, na.rm=FALSE); - verbose && printf(verbose, "Estimated lower quantile (%.3f): %f\n", 1-quantile, lq); - mu <- median(rho, na.rm=FALSE); - verbose && cat(verbose, "Estimated median: ", mu); - delta <- mu-lq; - verbose && printf(verbose, "Estimated \"spread\": %f\n", delta); - uq <- mu + scale*delta; - verbose && printf(verbose, "Scale parameter: %f\n", scale); - qs <- c(lq, mu, mu+delta, uq); - names(qs) <- sprintf("%.1f%%", 100*c(1-quantile, 0.5, quantile, 0.5+scale*(quantile-0.5))); - names(qs)[3:4] <- sprintf("%s*", names(qs)[3:4]); - attr(uq, "quantiles") <- qs; + lq <- quantile(rho, probs=1-quantile, na.rm=FALSE) + verbose && printf(verbose, "Estimated lower quantile (%.3f): %f\n", 1-quantile, lq) + mu <- median(rho, na.rm=FALSE) + verbose && cat(verbose, "Estimated median: ", mu) + delta <- mu-lq + verbose && printf(verbose, "Estimated \"spread\": %f\n", delta) + uq <- mu + scale*delta + verbose && printf(verbose, "Scale parameter: %f\n", scale) + qs <- c(lq, mu, mu+delta, uq) + names(qs) <- sprintf("%.1f%%", 100*c(1-quantile, 0.5, quantile, 0.5+scale*(quantile-0.5))) + names(qs)[3:4] <- sprintf("%s*", names(qs)[3:4]) + attr(uq, "quantiles") <- qs } else { - uq <- quantile(rho, probs=quantile, na.rm=FALSE); + uq <- quantile(rho, probs=quantile, na.rm=FALSE) } - names(uq) <- uq; - verbose && printf(verbose, "Estimated upper quantile (%.3f): %f\n", quantile, uq); + names(uq) <- uq + verbose && printf(verbose, "Estimated upper quantile (%.3f): %f\n", quantile, uq) - verbose && exit(verbose); + verbose && exit(verbose) - verbose && exit(verbose); + verbose && exit(verbose) - uq; + uq }, private=TRUE) # estimateHighDHQuantileAtAB() @@ -508,165 +508,75 @@ # Validate arguments # - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - # Argument 'q1' & 'q2': - q1 <- Arguments$getDouble(q1, range=c(0,1)); - q2 <- Arguments$getDouble(q2, range=c(0,1)); + q1 <- Arguments$getDouble(q1, range=c(0,1)) + q2 <- Arguments$getDouble(q2, range=c(0,1)) # Argument 'verbose': - verbose <- Arguments$getVerbose(verbose); + verbose <- Arguments$getVerbose(verbose) if (verbose) { - pushState(verbose); - on.exit(popState(verbose)); + pushState(verbose) + on.exit(popState(verbose)) } - verbose && enter(verbose, "Estimating DH threshold for AB caller"); - verbose && cat(verbose, "quantile #1: ", q1); - verbose && cat(verbose, "Symmetric quantile #2: ", q2); + verbose && enter(verbose, "Estimating DH threshold for AB caller") + verbose && cat(verbose, "quantile #1: ", q1) + verbose && cat(verbose, "Symmetric quantile #2: ", q2) # - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - # Extract the region-level estimates # - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - segs <- getSegments(fit); - dh <- segs$dhMean; - stopifnot(!is.null(dh)); - n <- segs$dhNbrOfLoci; + segs <- getSegments(fit) + dh <- segs$dhMean + .stop_if_not(!is.null(dh)) + n <- segs$dhNbrOfLoci # Drop missing values - keep <- (!is.na(dh) & !is.na(n)); - idxs <- which(keep); - dh <- dh[idxs]; - n <- n[idxs]; - verbose && cat(verbose, "Number of segments: ", length(idxs)); + keep <- (!is.na(dh) & !is.na(n)) + idxs <- which(keep) + dh <- dh[idxs] + n <- n[idxs] + verbose && cat(verbose, "Number of segments: ", length(idxs)) # Sanity check - stopifnot(length(idxs) > 0); + .stop_if_not(length(idxs) > 0) # Calculated weighted quantile - weights <- n / sum(n); - deltaDH <- weightedQuantile(dh, w=weights, probs=q1); - verbose && printf(verbose, "Weighted %g%% quantile of DH: %f\n", 100*q1, deltaDH); + weights <- n / sum(n) + deltaDH <- weightedQuantile(dh, w=weights, probs=q1) + verbose && printf(verbose, "Weighted %g%% quantile of DH: %f\n", 100*q1, deltaDH) # Identify segments with DH this small - keep <- (dh <= deltaDH); - idxs <- idxs[keep]; - verbose && cat(verbose, "Number of segments with small DH: ", length(idxs)); + keep <- (dh <= deltaDH) + idxs <- idxs[keep] + verbose && cat(verbose, "Number of segments with small DH: ", length(idxs)) # Sanity check - stopifnot(length(idxs) > 0); + .stop_if_not(length(idxs) > 0) # - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - # Extract the locus-level estimates # - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - # Extract the regions of interest - fitT <- extractRegions(fit, idxs); + fitT <- extractRegions(fit, idxs) # Extract the data - data <- fitT$data; - rho <- data$rho; - stopifnot(!is.null(rho)); + data <- fitT$data + rho <- data$rho + .stop_if_not(!is.null(rho)) - verbose && cat(verbose, "Number of data points: ", length(rho)); + verbose && cat(verbose, "Number of data points: ", length(rho)) # Drop missing values - rho <- rho[is.finite(rho)]; - verbose && cat(verbose, "Number of finite data points: ", length(rho)); + rho <- rho[is.finite(rho)] + verbose && cat(verbose, "Number of finite data points: ", length(rho)) - qs <- quantile(rho, probs=c(1-q2, 1/2), na.rm=FALSE, names=FALSE); - verbose && printf(verbose, "Estimate of (1-%.3g):th and 50%% quantiles: (%g,%g)\n", q2, qs[1], qs[2]); - deltaAB <- qs[2] + (qs[2]-qs[1]); - verbose && printf(verbose, "Estimate of %.3g:th \"symmetric\" quantile: %g\n", q2, deltaAB); + qs <- quantile(rho, probs=c(1-q2, 1/2), na.rm=FALSE, names=FALSE) + verbose && printf(verbose, "Estimate of (1-%.3g):th and 50%% quantiles: (%g,%g)\n", q2, qs[1], qs[2]) + deltaAB <- qs[2] + (qs[2]-qs[1]) + verbose && printf(verbose, "Estimate of %.3g:th \"symmetric\" quantile: %g\n", q2, deltaAB) # Sanity check - deltaAB <- Arguments$getDouble(deltaAB); + deltaAB <- Arguments$getDouble(deltaAB) - deltaAB; + deltaAB }, protected=TRUE) # estimateDeltaABBySmallDH() - - -############################################################################ -# HISTORY: -# 2011-06-14 -# o Updated code to recognize new column names. -# 2011-05-29 -# o Renamed all arguments, variables, function named 'tau' to 'delta'. -# 2011-04-11 -# o Updated estimateTauABBySmallDH() for PairedPSCBS to use a "symmetric" -# quantile estimator. -# 2011-04-08 -# o Added estimateTauABBySmallDH() for PairedPSCBS. -# o Added Rdoc help to estimateTauAB() for PairedPSCBS. -# o Extracted from PairedPSCBS.EXTS.R. -# o BUG FIX: postsegmentTCN() for PairedPSCBS could generate an invalid -# 'tcnSegRows' matrix, where the indices for two consecutive segments -# would overlap, which is invalid. -# 2011-04-05 -# o BUG FIX: estimateHighDHQuantileAtAB() for PairedPSCBS would throw -# an error on an undefined 'trim' if verbose output was used. -# 2011-02-17 -# o Added arguments 'robust' and 'trim' to estimateMeanForDH(). -# 2011-02-03 -# o Added argument 'tauTCN' to estimateMeanForDH(). -# 2011-01-27 -# o Added flavor="DHskew" to estimateTauAB(). -# o Added flavor="DH" to estimateTauAB() to estimate from DH instead -# of hBAF. As argued by the equations in the comments, these two -# approaches gives virtually the same results. The advantage with the -# DH approach is that it requires one less degree of freedom. -# o Added estimateMeanForDH(). -# 2011-01-18 -# o BUG FIX: 'tcnSegRows' and 'dhSegRows' where not updated by -# extractByRegions() for PairedPSCBS. -# 2011-01-14 -# o Added estimateTauAB() for estimating the DeltaAB parameter. -# o Added estimateStdDevForHeterozygousBAF() for PairedPSCBS. -# o BUG FIX: extractByRegions() did not handle the case where multiple loci -# at the same position are split up in two different segments. -# 2011-01-12 -# o Added extractByRegions() and extractByRegion() for PairedPSCBS. -# o Now postsegmentTCN(..., force=TRUE) for PairedPSCBS also updates -# the TCN estimates even for segments where the DH segmentation did -# not find any additional change points. -# 2010-12-02 -# o Now postsegmentTCN() assert that total number of TCN loci before -# and after is the same. -# o Now postsegmentTCN() assert that joinSegment is TRUE. -# 2010-12-01 -# o Now postsegmentTCN() checks if it is already postsegmented. -# 2010-11-30 -# o TODO: postsegmentTCN() does not make sure of 'dhLociToExclude'. Why? -# o Now postsegmentTCN() recognizes the new 'tcnLociToExclude'. -# 2010-11-28 -# o BUG FIX: postsegmentTCN() did not handle loci with the same positions -# and that are split in two different segments. It also did not exclude -# loci with missing values. -# 2010-11-21 -# o Adjusted postsegmentTCN() such that the updated TCN segment boundaries -# are the maximum of the DH segment and the support by the loci. This -# means that postsegmentTCN() will work as expected both when signals -# where segmented with 'joinSegments' being TRUE or FALSE. -# 2010-10-25 -# o Now subsetByDhSegments() for PairedPSCBS handles the rare case when -# markers with the same positions are split in two different segments. -# o Renamed subsetBySegments() for PairedPSCBS to subsetByDhSegments(). -# 2010-09-26 -# o Now subsetBySegments() for PairedPSCBS handles multiple chromosomes. -# o Now postsegmentTCN() PairedPSCBS handles multiple chromosomes. -# 2010-09-21 -# o Added postsegmentTCN() for PairedPSCBS. -# 2010-09-19 -# o BUG FIX: plot() used non-defined nbrOfLoci; now length(x). -# 2010-09-15 -# o Added subsetBySegments(). -# o Added linesC1C2() and arrowsC1C2(). -# o Now the default 'cex' for pointsC1C2() corresponds to 'dh.num.mark'. -# o Now extractTotalAndDH() also returns 'dh.num.mark'. -# 2010-09-08 -# o Added argument 'add=FALSE' to plot(). -# o Added plotC1C2(). -# o Added extractTotalAndDH() and extractMinorMajorCNs(). -# 2010-09-04 -# o Added drawLevels() for PairedPSCBS. -# o Added as.data.frame() and print() for PairedPSCBS. -# 2010-09-03 -# o Added plot() for PairedPSCBS. -# o Created. -############################################################################ diff -Nru r-cran-pscbs-0.63.0/R/PairedPSCBS.estimateDeltaLOH.R r-cran-pscbs-0.64.0/R/PairedPSCBS.estimateDeltaLOH.R --- r-cran-pscbs-0.63.0/R/PairedPSCBS.estimateDeltaLOH.R 2017-06-28 14:32:39.000000000 +0000 +++ r-cran-pscbs-0.64.0/R/PairedPSCBS.estimateDeltaLOH.R 2018-08-12 21:30:44.000000000 +0000 @@ -39,40 +39,40 @@ # Validate arguments # - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - # Argument 'flavor': - flavor <- match.arg(flavor); + flavor <- match.arg(flavor) # Argument 'max': - max <- Arguments$getDouble(max, range=c(0,Inf)); + max <- Arguments$getDouble(max, range=c(0,Inf)) # Argument 'verbose': - verbose <- Arguments$getVerbose(verbose); + verbose <- Arguments$getVerbose(verbose) if (verbose) { - pushState(verbose); - on.exit(popState(verbose)); + pushState(verbose) + on.exit(popState(verbose)) } - verbose && enter(verbose, "Estimating DH threshold for calling LOH"); - verbose && cat(verbose, "flavor: ", flavor); + verbose && enter(verbose, "Estimating DH threshold for calling LOH") + verbose && cat(verbose, "flavor: ", flavor) if (flavor == "minC1|nonAB") { - delta <- estimateDeltaLOHByMinC1ForNonAB(this, ..., verbose=verbose); + delta <- estimateDeltaLOHByMinC1ForNonAB(this, ..., verbose=verbose) } else { - throw("Unkown flavor: ", flavor); + throw("Unkown flavor: ", flavor) } - verbose && printf(verbose, "delta: %.3g\n", delta); + verbose && printf(verbose, "delta: %.3g\n", delta) # Truncate estimate? if (delta > max) { - warning("Estimated delta (%.3g) was greater than the maximum allowed value (%.3g). The latter will be used instead.", delta, max); - delta <- max; - verbose && printf(verbose, "Max delta: %.3g\n", max); - verbose && printf(verbose, "Truncated delta: %.3g\n", delta); + warning("Estimated delta (%.3g) was greater than the maximum allowed value (%.3g). The latter will be used instead.", delta, max) + delta <- max + verbose && printf(verbose, "Max delta: %.3g\n", max) + verbose && printf(verbose, "Truncated delta: %.3g\n", delta) } - verbose && exit(verbose); + verbose && exit(verbose) - delta; + delta }) # estimateDeltaLOH() @@ -134,125 +134,91 @@ # Validate arguments # - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - # Argument 'midpoint': - midpoint <- Arguments$getDouble(midpoint, range=c(0,1)); + midpoint <- Arguments$getDouble(midpoint, range=c(0,1)) # Argument 'maxC': - maxC <- Arguments$getDouble(maxC, range=c(0,Inf)); + maxC <- Arguments$getDouble(maxC, range=c(0,Inf)) # Argument 'verbose': - verbose <- Arguments$getVerbose(verbose); + verbose <- Arguments$getVerbose(verbose) if (verbose) { - pushState(verbose); - on.exit(popState(verbose)); + pushState(verbose) + on.exit(popState(verbose)) } - verbose && enter(verbose, "Estimating DH threshold for calling LOH as the midpoint between guessed C1=0 and C1=1"); - segs <- getSegments(this, splitters=FALSE); - nbrOfSegments <- nrow(segs); + verbose && enter(verbose, "Estimating DH threshold for calling LOH as the midpoint between guessed C1=0 and C1=1") + segs <- getSegments(this, splitters=FALSE) + nbrOfSegments <- nrow(segs) - verbose && printf(verbose, "Argument 'midpoint': %.3g\n", midpoint); - verbose && cat(verbose, "Number of segments: ", nbrOfSegments); + verbose && printf(verbose, "Argument 'midpoint': %.3g\n", midpoint) + verbose && cat(verbose, "Number of segments: ", nbrOfSegments) # Getting AB calls - isAB <- segs$abCall; + isAB <- segs$abCall if (is.null(isAB)) { - throw("Cannot estimate delta_LOH because allelic-balance calls have not been made yet."); + throw("Cannot estimate delta_LOH because allelic-balance calls have not been made yet.") } - nbrOfAB <- sum(isAB, na.rm=TRUE); - verbose && printf(verbose, "Number of segments in allelic balance: %d (%.1f%%) of %d\n", nbrOfAB, 100*nbrOfAB/nbrOfSegments, nbrOfSegments); + nbrOfAB <- sum(isAB, na.rm=TRUE) + verbose && printf(verbose, "Number of segments in allelic balance: %d (%.1f%%) of %d\n", nbrOfAB, 100*nbrOfAB/nbrOfSegments, nbrOfSegments) # Sanity check if (nbrOfAB == 0) { - throw("There are no segments in allelic balance."); + throw("There are no segments in allelic balance.") } - nbrOfNonAB <- sum(!isAB, na.rm=TRUE); - verbose && printf(verbose, "Number of segments not in allelic balance: %d (%.1f%%) of %d\n", nbrOfNonAB, 100*nbrOfNonAB/nbrOfSegments, nbrOfSegments); - segsNonAB <- segs[which(!isAB),,drop=FALSE]; + nbrOfNonAB <- sum(!isAB, na.rm=TRUE) + verbose && printf(verbose, "Number of segments not in allelic balance: %d (%.1f%%) of %d\n", nbrOfNonAB, 100*nbrOfNonAB/nbrOfSegments, nbrOfSegments) + segsNonAB <- segs[which(!isAB),,drop=FALSE] # Sanity check if (nbrOfNonAB == 0) { - msg <- sprintf("All %d segments are in allelic balance. Cannot estimate DeltaLOH, which requires that at least one segment must be in allelic imbalance. Returning -Inf instead.", nbrOfSegments); - warning(msg); - return(-Inf); + msg <- sprintf("All %d segments are in allelic balance. Cannot estimate DeltaLOH, which requires that at least one segment must be in allelic imbalance. Returning -Inf instead.", nbrOfSegments) + warning(msg) + return(-Inf) } # Identify segments in AB and with small enough TCNs - C <- segs$tcnMean; - keep <- which(isAB & C <= maxC); - verbose && printf(verbose, "Number of segments in allelic balance and TCN <= %.2f: %d (%.1f%%) of %d\n", maxC, length(keep), 100*length(keep)/nbrOfSegments, nbrOfSegments); + C <- segs$tcnMean + keep <- which(isAB & C <= maxC) + verbose && printf(verbose, "Number of segments in allelic balance and TCN <= %.2f: %d (%.1f%%) of %d\n", maxC, length(keep), 100*length(keep)/nbrOfSegments, nbrOfSegments) # Sanity check if (length(keep) == 0) { - throw("There are no segments in allelic balance with small enough total CN."); + throw("There are no segments in allelic balance with small enough total CN.") } # (a) Estimate mean C1 level of AB segments - segsT <- segs[keep,,drop=FALSE]; - C <- segsT$tcnMean; - n <- segsT$dhNbrOfLoci; - w <- n/sum(n); - C1 <- C/2; # Called AB! - verbose && printf(verbose, "C: %s\n", hpaste(sprintf("%.3g", C))); - verbose && printf(verbose, "Corrected C1 (=C/2): %s\n", hpaste(sprintf("%.3g", C1))); - verbose && printf(verbose, "Number of DHs: %s\n", hpaste(n)); - verbose && printf(verbose, "Weights: %s\n", hpaste(sprintf("%.3g", w))); - muC1atAB <- weightedMedian(C1, w=w, na.rm=TRUE); - verbose && printf(verbose, "Weighted median of (corrected) C1 in allelic balance: %.3f\n", muC1atAB); + segsT <- segs[keep,,drop=FALSE] + C <- segsT$tcnMean + n <- segsT$dhNbrOfLoci + w <- n/sum(n) + C1 <- C/2 # Called AB! + verbose && printf(verbose, "C: %s\n", hpaste(sprintf("%.3g", C))) + verbose && printf(verbose, "Corrected C1 (=C/2): %s\n", hpaste(sprintf("%.3g", C1))) + verbose && printf(verbose, "Number of DHs: %s\n", hpaste(n)) + verbose && printf(verbose, "Weights: %s\n", hpaste(sprintf("%.3g", w))) + muC1atAB <- weightedMedian(C1, w=w, na.rm=TRUE) + verbose && printf(verbose, "Weighted median of (corrected) C1 in allelic balance: %.3f\n", muC1atAB) # (b) Estimate mean C1 level of non-AB segments - C1 <- segsNonAB$c1Mean; - muC1atNonAB <- min(C1, na.rm=TRUE); - idxs <- which(C1 <= muC1atNonAB); - n <- segsNonAB$dhNbrOfLoci[idxs]; - verbose && printf(verbose, "Smallest C1 among segments not in allelic balance: %.3g\n", muC1atNonAB); - verbose && printf(verbose, "There are %d segments with in total %d heterozygous SNPs with this level.\n", length(idxs), n); + C1 <- segsNonAB$c1Mean + muC1atNonAB <- min(C1, na.rm=TRUE) + idxs <- which(C1 <= muC1atNonAB) + n <- segsNonAB$dhNbrOfLoci[idxs] + verbose && printf(verbose, "Smallest C1 among segments not in allelic balance: %.3g\n", muC1atNonAB) + verbose && printf(verbose, "There are %d segments with in total %d heterozygous SNPs with this level.\n", length(idxs), n) # Sanity check - stopifnot(muC1atNonAB < muC1atAB); + .stop_if_not(muC1atNonAB < muC1atAB) - delta <- midpoint * (muC1atAB + muC1atNonAB); - verbose && printf(verbose, "Midpoint between the two: %.3g\n", delta); + delta <- midpoint * (muC1atAB + muC1atNonAB) + verbose && printf(verbose, "Midpoint between the two: %.3g\n", delta) - verbose && exit(verbose); + verbose && exit(verbose) - delta; + delta }, private=TRUE) # estimateDeltaLOHByMinC1AtNonAB() - - - -############################################################################ -# HISTORY: -# 2012-08-30 -# o Now estimateKappaByC1Density() relies on matrixStats (and no longer -# aroma.light) to implement weightedMedian(). -# 2012-01-13 -# o Corrected some of verbose messages of estimateDeltaLOHByMinC1ForNonAB() -# for PairedPSCBS objects. -# 2011-07-07 -# o GENERALIZATION: Now estimateDeltaLOHByMinC1ForNonAB() returns -Inf -# if all segments are called AB. -# 2011-07-06 -# o ROBUSTNESS: Added a sanity check to estimateDeltaLOHByMinC1AtNonAB() -# asserting that there exist segments that are not in allelic balance, -# which are needed for estimating $\mu_0$. -# 2011-06-14 -# o Updated code to recognize new column names. -# 2011-05-29 -# o Renamed all arguments, variables, function named 'tau' to 'delta'. -# 2011-04-27 -# o Added argument 'maxC' to estimateTauLOHByMinC1ForNonAB(). -# 2011-04-14 -# o Added argument 'max' to estimateTauAB() and estimateTauLOH(). -# 2011-04-11 -# o Added argument 'midpoint' to estimateTauLOHByMinC1AtNonAB(). -# o Dropped argument 'tauMax'; it was a misunderstanding. -# 2011-04-09 -# o Added estimateTauLOHByMinC1AtNonAB(). -# o Added estimateTauLOH(). -# o Created. -############################################################################ diff -Nru r-cran-pscbs-0.63.0/R/PairedPSCBS.estimateKappa.R r-cran-pscbs-0.64.0/R/PairedPSCBS.estimateKappa.R --- r-cran-pscbs-0.63.0/R/PairedPSCBS.estimateKappa.R 2017-06-28 14:32:39.000000000 +0000 +++ r-cran-pscbs-0.64.0/R/PairedPSCBS.estimateKappa.R 2018-08-12 21:30:44.000000000 +0000 @@ -1,265 +1,234 @@ -###########################################################################/** -# @set class=PairedPSCBS -# @RdocMethod estimateKappa -# -# @title "Estimate global background in segmented copy numbers" -# -# \description{ -# @get "title". -# The global background, here called \eqn{\kappa}, -# may have multiple origins where normal contamination is one, -# but not necessarily the only one. -# } -# -# @synopsis -# -# \arguments{ -# \item{flavor}{A @character string specifying which type of -# estimator to use.} -# \item{...}{Additional arguments passed to the estimator.} -# } -# -# \value{ -# Returns the background estimate as a @numeric scalar. -# } -# -# @author "HB" -# -# \seealso{ -# Internally, one of the following methods are used: -# @seemethod "estimateKappaByC1Density". -# } -# -#*/########################################################################### -setMethodS3("estimateKappa", "PairedPSCBS", function(this, flavor=c("density(C1)"), ...) { - # Argument 'flavor': - flavor <- match.arg(flavor); - - if (flavor == "density(C1)") { - estimateKappaByC1Density(this, ...); - } else { - throw("Cannot estimate background. Unsupported flavor: ", flavor); - } -}) - - - -###########################################################################/** -# @set class=PairedPSCBS -# @RdocMethod estimateKappaByC1Density -# -# @title "Estimate global background in segmented copy numbers" -# -# \description{ -# @get "title" based on the location of peaks in a weighted -# density estimator of the minor copy number mean levels. -# -# The global background, here called \eqn{\kappa}, -# may have multiple origins where normal contamination is one, -# but not necessarily the only one. -# -# \emph{Assumptions:} This estimator assumes that there are segments -# with C1=0 and C1=1, i.e. some deletions and, typically, some normal -# segements. -# } -# -# @synopsis -# -# \arguments{ -# \item{typeOfWeights}{A @character string specifying how weights -# are calculated.} -# \item{adjust}{A @numeric scale factor specifying the size of -# the bandwidth parameter used by the density estimator.} -# \item{from}{A @numeric scalar specifying the lower bound for the -# support of the estimated density.} -# \item{minDensity}{A non-negative @numeric threshold specifying -# the minimum density a peak should have in order to consider -# it a peak.} -# \item{...}{Not used.} -# \item{verbose}{See @see "R.utils::Verbose".} -# } -# -# \value{ -# Returns the background estimate as a @numeric scalar. -# } -# -# \section{Algorithm}{ -# \itemize{ -# \item Retrieve segment-level minor copy numbers and corresponding weights: -# \enumerate{ -# \item Grabs the segment-level C1 estimates. -# \item Calculate segment weights. -# The default (\code{typeOfWeights="dhNbrOfLoci"}) is to use -# weights proportional to the number of heterozygous SNPs. -# An alternative (\code{typeOfWeights="sqrt(dhNbrOfLoci)"}) is -# to use the square root of those counts. -# } -# -# \item Identify subset of regions with C1=0: -# \enumerate{ -# \item Estimates the weighted empirical density function -# (truncated at zero below). Tuning parameter 'adjust'. -# \item Find the first two peaks -# (with a density greater than tuning parameter 'minDensity'). -# \item Assumes that the two peaks corresponds to C1=0 and C1=1. -# \item Defines threshold Delta0.5 as the center location between -# these two peaks. -# } -# -# \item Estimate the global background signal: -# \enumerate{ -# \item For all segments with C1 < Delta0.5, calculate the weighted -# median of their C1:s. -# \item Let kappa be the above weighted median. -# This is the estimated background. -# } -# } -# } -# -# @author "HB" -# -# \seealso{ -# Instead of calling this method explicitly, it is recommended -# to use the @seemethod "estimateKappa" method. -# } -# -# @keyword internal -#*/########################################################################### -setMethodS3("estimateKappaByC1Density", "PairedPSCBS", function(this, typeOfWeights=c("dhNbrOfLoci", "sqrt(dhNbrOfLoci)"), adjust=1, from=0, minDensity=0.2, ..., verbose=FALSE) { - # - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - # Validate arguments - # - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - # Argument 'typeOfWeights': - typeOfWeights <- match.arg(typeOfWeights); - - # Argument 'adjust': - adjust <- Arguments$getDouble(adjust, range=c(0,Inf)); - - # Argument 'minDensity': - minDensity <- Arguments$getDouble(minDensity, range=c(0,Inf)); - - # Argument 'verbose': - verbose <- Arguments$getVerbose(verbose); - if (verbose) { - pushState(verbose); - on.exit(popState(verbose)); - } - - - verbose && enter(verbose, "Estimate global background (including normal contamination and more)"); - - # - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - # Extract the region-level estimates - # - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - segs <- this$output; - c1 <- segs$c1Mean; - stopifnot(!is.null(c1)); - n <- segs$dhNbrOfLoci; - - # Drop missing values - keep <- (!is.na(c1) & !is.na(n)); - c1 <- c1[keep]; - n <- n[keep]; - - verbose && cat(verbose, "Number of segments: ", length(c1)); - - # Calculate region weights - if (typeOfWeights == "dhNbrOfLoci") { - w <- n; - } else if (typeOfWeights == "sqrt(dhNbrOfLoci)") { - w <- sqrt(n); - } - - # Standardize weights to sum to one - weights <- w / sum(w); - - - # - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - # Identify subset of regions with C1=0 and C1=1 - # - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - verbose && enter(verbose, "Estimating threshold Delta0.5 from the empirical density of C1:s"); - verbose && cat(verbose, "adjust: ", adjust); - verbose && cat(verbose, "minDensity: ", minDensity); - ploidy <- ploidy(this); - verbose && cat(verbose, "ploidy: ", ploidy); - if (ploidy != 2) { - minDensity <- (2/ploidy)*minDensity; - verbose && cat(verbose, "minDensity (adjusted for ploidy): ", minDensity); - } - - d <- density(c1, weights=weights, adjust=adjust, from=from, na.rm=FALSE); - fit <- findPeaksAndValleys(d); - - type <- NULL; rm(list="type"); # To please R CMD check - fit <- subset(fit, type == "peak"); - if (nrow(fit) < 2L) { - throw(sprintf("Less than two modes were found in the empirical density of C1: %d", nrow(fit))); - } - nModes <- nrow(fit); - - fit <- subset(fit, density >= minDensity); - if (nrow(fit) < 2L) { - throw(sprintf("Less than two modes were found in the empirical density of C1 after removing %d modes that are too weak (density < %g): %d", nModes - nrow(fit), minDensity, nrow(fit))); - } - nModes <- nrow(fit); - verbose && cat(verbose, "All peaks:"); - verbose && print(verbose, fit); - - # Keep the first two peaks - fit <- fit[1:2,,drop=FALSE]; - verbose && cat(verbose, "C1=0 and C1=1 peaks:"); - verbose && print(verbose, fit); - - peaks <- fit$x; - Delta0.5 <- mean(peaks); - verbose && cat(verbose, "Estimate of Delta0.5: ", Delta0.5); - - verbose && exit(verbose); - - - # - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - # Estimate kappa - # - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - keep <- which(c1 < Delta0.5); - verbose && cat(verbose, "Number of segments with C1 < Delta0.5: ", length(keep)); - kappa <- weightedMedian(c1[keep], w=weights[keep]); - - # Adjust for ploidy - kappa <- (2/ploidy)*kappa; - verbose && cat(verbose, "Estimate of kappa: ", kappa); - - verbose && exit(verbose); - - kappa; -}, protected=TRUE) # estimateKappaByC1Density() - - - - -############################################################################# -# HISTORY: -# 2014-03-26 -# o Now estimateKappaByC1Density() give more informative error messages -# if it failed to identify modes for estimating the parameter. -# o Added argument 'from' to estimateKappaByC1Density(). -# 2013-09-26 -# o CLEANUP: Now estimateKappaByC1Density() no longer attached -# 'aroma.light', but only loads its namespace. -# 2013-05-07 -# o Now estimateKappaByC1Density() adjusts for ploidy, iff set. -# 2013-03-05 -# o Added argument 'typeOfWeights' to estimateKappaByC1Density() for -# PairedPSCBS, making it possible to specify what type of weights the -# density estimate should use. -# 2012-08-30 -# o ROBUSTNESS: estimateKappaByC1Density() did not make sure that -# weightedMedian() was actually available. Now it requires matrixStats. -# 2011-06-14 -# o Updated code to recognize new column names. -# 2011-04-08 -# o Added Rdoc for estimateKappaByC1Density(). -# 2011-02-03 -# o Added estimateKappa(). -# o Added estimateKappaByC1Density(). -# o Created. -############################################################################# +###########################################################################/** +# @set class=PairedPSCBS +# @RdocMethod estimateKappa +# +# @title "Estimate global background in segmented copy numbers" +# +# \description{ +# @get "title". +# The global background, here called \eqn{\kappa}, +# may have multiple origins where normal contamination is one, +# but not necessarily the only one. +# } +# +# @synopsis +# +# \arguments{ +# \item{flavor}{A @character string specifying which type of +# estimator to use.} +# \item{...}{Additional arguments passed to the estimator.} +# } +# +# \value{ +# Returns the background estimate as a @numeric scalar. +# } +# +# @author "HB" +# +# \seealso{ +# Internally, one of the following methods are used: +# @seemethod "estimateKappaByC1Density". +# } +# +#*/########################################################################### +setMethodS3("estimateKappa", "PairedPSCBS", function(this, flavor=c("density(C1)"), ...) { + # Argument 'flavor': + flavor <- match.arg(flavor) + + if (flavor == "density(C1)") { + estimateKappaByC1Density(this, ...) + } else { + throw("Cannot estimate background. Unsupported flavor: ", flavor) + } +}) + + + +###########################################################################/** +# @set class=PairedPSCBS +# @RdocMethod estimateKappaByC1Density +# +# @title "Estimate global background in segmented copy numbers" +# +# \description{ +# @get "title" based on the location of peaks in a weighted +# density estimator of the minor copy number mean levels. +# +# The global background, here called \eqn{\kappa}, +# may have multiple origins where normal contamination is one, +# but not necessarily the only one. +# +# \emph{Assumptions:} This estimator assumes that there are segments +# with C1=0 and C1=1, i.e. some deletions and, typically, some normal +# segements. +# } +# +# @synopsis +# +# \arguments{ +# \item{typeOfWeights}{A @character string specifying how weights +# are calculated.} +# \item{adjust}{A @numeric scale factor specifying the size of +# the bandwidth parameter used by the density estimator.} +# \item{from}{A @numeric scalar specifying the lower bound for the +# support of the estimated density.} +# \item{minDensity}{A non-negative @numeric threshold specifying +# the minimum density a peak should have in order to consider +# it a peak.} +# \item{...}{Not used.} +# \item{verbose}{See @see "R.utils::Verbose".} +# } +# +# \value{ +# Returns the background estimate as a @numeric scalar. +# } +# +# \section{Algorithm}{ +# \itemize{ +# \item Retrieve segment-level minor copy numbers and corresponding weights: +# \enumerate{ +# \item Grabs the segment-level C1 estimates. +# \item Calculate segment weights. +# The default (\code{typeOfWeights="dhNbrOfLoci"}) is to use +# weights proportional to the number of heterozygous SNPs. +# An alternative (\code{typeOfWeights="sqrt(dhNbrOfLoci)"}) is +# to use the square root of those counts. +# } +# +# \item Identify subset of regions with C1=0: +# \enumerate{ +# \item Estimates the weighted empirical density function +# (truncated at zero below). Tuning parameter 'adjust'. +# \item Find the first two peaks +# (with a density greater than tuning parameter 'minDensity'). +# \item Assumes that the two peaks corresponds to C1=0 and C1=1. +# \item Defines threshold Delta0.5 as the center location between +# these two peaks. +# } +# +# \item Estimate the global background signal: +# \enumerate{ +# \item For all segments with C1 < Delta0.5, calculate the weighted +# median of their C1:s. +# \item Let kappa be the above weighted median. +# This is the estimated background. +# } +# } +# } +# +# @author "HB" +# +# \seealso{ +# Instead of calling this method explicitly, it is recommended +# to use the @seemethod "estimateKappa" method. +# } +# +# @keyword internal +#*/########################################################################### +setMethodS3("estimateKappaByC1Density", "PairedPSCBS", function(this, typeOfWeights=c("dhNbrOfLoci", "sqrt(dhNbrOfLoci)"), adjust=1, from=0, minDensity=0.2, ..., verbose=FALSE) { + # - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - + # Validate arguments + # - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - + # Argument 'typeOfWeights': + typeOfWeights <- match.arg(typeOfWeights) + + # Argument 'adjust': + adjust <- Arguments$getDouble(adjust, range=c(0,Inf)) + + # Argument 'minDensity': + minDensity <- Arguments$getDouble(minDensity, range=c(0,Inf)) + + # Argument 'verbose': + verbose <- Arguments$getVerbose(verbose) + if (verbose) { + pushState(verbose) + on.exit(popState(verbose)) + } + + + verbose && enter(verbose, "Estimate global background (including normal contamination and more)") + + # - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - + # Extract the region-level estimates + # - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - + segs <- this$output + c1 <- segs$c1Mean + .stop_if_not(!is.null(c1)) + n <- segs$dhNbrOfLoci + + # Drop missing values + keep <- (!is.na(c1) & !is.na(n)) + c1 <- c1[keep] + n <- n[keep] + + verbose && cat(verbose, "Number of segments: ", length(c1)) + + # Calculate region weights + if (typeOfWeights == "dhNbrOfLoci") { + w <- n + } else if (typeOfWeights == "sqrt(dhNbrOfLoci)") { + w <- sqrt(n) + } + + # Standardize weights to sum to one + weights <- w / sum(w) + + + # - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - + # Identify subset of regions with C1=0 and C1=1 + # - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - + verbose && enter(verbose, "Estimating threshold Delta0.5 from the empirical density of C1:s") + verbose && cat(verbose, "adjust: ", adjust) + verbose && cat(verbose, "minDensity: ", minDensity) + ploidy <- ploidy(this) + verbose && cat(verbose, "ploidy: ", ploidy) + if (ploidy != 2) { + minDensity <- (2/ploidy)*minDensity + verbose && cat(verbose, "minDensity (adjusted for ploidy): ", minDensity) + } + + d <- density(c1, weights=weights, adjust=adjust, from=from, na.rm=FALSE) + fit <- findPeaksAndValleys(d) + + type <- NULL; rm(list="type") # To please R CMD check + fit <- subset(fit, type == "peak") + if (nrow(fit) < 2L) { + throw(sprintf("Less than two modes were found in the empirical density of C1: %d", nrow(fit))) + } + nModes <- nrow(fit) + + fit <- subset(fit, density >= minDensity) + if (nrow(fit) < 2L) { + throw(sprintf("Less than two modes were found in the empirical density of C1 after removing %d modes that are too weak (density < %g): %d", nModes - nrow(fit), minDensity, nrow(fit))) + } + nModes <- nrow(fit) + verbose && cat(verbose, "All peaks:") + verbose && print(verbose, fit) + + # Keep the first two peaks + fit <- fit[1:2,,drop=FALSE] + verbose && cat(verbose, "C1=0 and C1=1 peaks:") + verbose && print(verbose, fit) + + peaks <- fit$x + Delta0.5 <- mean(peaks) + verbose && cat(verbose, "Estimate of Delta0.5: ", Delta0.5) + + verbose && exit(verbose) + + + # - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - + # Estimate kappa + # - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - + keep <- which(c1 < Delta0.5) + verbose && cat(verbose, "Number of segments with C1 < Delta0.5: ", length(keep)) + kappa <- weightedMedian(c1[keep], w=weights[keep]) + + # Adjust for ploidy + kappa <- (2/ploidy)*kappa + verbose && cat(verbose, "Estimate of kappa: ", kappa) + + verbose && exit(verbose) + + kappa +}, protected=TRUE) # estimateKappaByC1Density() diff -Nru r-cran-pscbs-0.63.0/R/PairedPSCBS.extractSegmentDataByLocus.R r-cran-pscbs-0.64.0/R/PairedPSCBS.extractSegmentDataByLocus.R --- r-cran-pscbs-0.63.0/R/PairedPSCBS.extractSegmentDataByLocus.R 2017-06-28 14:32:39.000000000 +0000 +++ r-cran-pscbs-0.64.0/R/PairedPSCBS.extractSegmentDataByLocus.R 2018-08-12 21:30:44.000000000 +0000 @@ -1,86 +1,86 @@ setMethodS3("extractSegmentDataByLocus", "PairedPSCBS", function(fit, fields=NULL, ..., verbose=FALSE) { # Extract data - segs <- getSegments(fit, splitters=TRUE); - data <- getLocusData(fit, ...); + segs <- getSegments(fit, splitters=TRUE) + data <- getLocusData(fit, ...) # - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - # Validate arguments # - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - # Argument 'fields': if (!is.null(fields)) { - fields <- Arguments$getCharacters(fields); - unknown <- fields[!is.element(fields, colnames(segs))]; + fields <- Arguments$getCharacters(fields) + unknown <- fields[!is.element(fields, colnames(segs))] if (length(unknown) > 0L) { - throw("Unknown segment fields: ", paste(sQuote(unknown), collapse=", ")); + throw("Unknown segment fields: ", paste(sQuote(unknown), collapse=", ")) } } else { - fields <- colnames(segs); + fields <- colnames(segs) } # Argument 'verbose': - verbose <- Arguments$getVerbose(verbose); + verbose <- Arguments$getVerbose(verbose) if (verbose) { - pushState(verbose); - on.exit(popState(verbose)); + pushState(verbose) + on.exit(popState(verbose)) } - verbose && enter(verbose, "Extracting segment data by locus"); + verbose && enter(verbose, "Extracting segment data by locus") # Extract segment fields - chromosome <- data$chromosome; - x <- data$x; - y <- data[,3L]; - segs <- segs[,fields]; - nbrOfLoci <- nrow(data); + chromosome <- data$chromosome + x <- data$x + y <- data[,3L] + segs <- segs[,fields] + nbrOfLoci <- nrow(data) - verbose && printf(verbose, "Segment fields: [%d] %s\n", length(fields), paste(sQuote(fields), collapse=", ")); - verbose && cat(verbose, "Number of loci: ", nbrOfLoci); + verbose && printf(verbose, "Segment fields: [%d] %s\n", length(fields), paste(sQuote(fields), collapse=", ")) + verbose && cat(verbose, "Number of loci: ", nbrOfLoci) # Allocate segment fields at the locus level - dataL <- matrix(NA, nrow=nbrOfLoci, ncol=length(fields)); - colnames(dataL) <- fields; - dataL <- as.data.frame(dataL); + dataL <- matrix(NA, nrow=nbrOfLoci, ncol=length(fields)) + colnames(dataL) <- fields + dataL <- as.data.frame(dataL) - verbose && cat(verbose, "Allocated results:"); - verbose && str(verbose, dataL); + verbose && cat(verbose, "Allocated results:") + verbose && str(verbose, dataL) - verbose && enter(verbose, "Extracting segment by segment"); + verbose && enter(verbose, "Extracting segment by segment") # For each segment... for (ss in seq_len(nrow(segs))) { - verbose && enter(verbose, sprintf("Segment %d of %d", ss, nrow(segs))); - seg <- segs[ss,]; + verbose && enter(verbose, sprintf("Segment %d of %d", ss, nrow(segs))) + seg <- segs[ss,] idxs <- which(chromosome == seg$chromosome & - seg$tcnStart <= x & x <= seg$tcnEnd); - idxs <- Arguments$getIndices(idxs, max=nbrOfLoci); - verbose && cat(verbose, "Number of loci in segment: ", length(idxs)); + seg$tcnStart <= x & x <= seg$tcnEnd) + idxs <- Arguments$getIndices(idxs, max=nbrOfLoci) + verbose && cat(verbose, "Number of loci in segment: ", length(idxs)) # Sanity check -## stopifnot(length(idxs) == seg$tcnNbrOfLoci); +## .stop_if_not(length(idxs) == seg$tcnNbrOfLoci) - segsSS <- seg[fields]; - verbose && cat(verbose, "Segment data extracted:"); - verbose && print(verbose, segsSS); + segsSS <- seg[fields] + verbose && cat(verbose, "Segment data extracted:") + verbose && print(verbose, segsSS) for (field in fields) { - dataL[idxs,field] <- segsSS[[field]]; + dataL[idxs,field] <- segsSS[[field]] } - verbose && exit(verbose); + verbose && exit(verbose) } # for (ss ...) - verbose && exit(verbose); + verbose && exit(verbose) # The calls for loci that have missing annotations or observations, # should also be missing, i.e. NA. - nok <- (is.na(chromosome) | is.na(x) | is.na(y)); - dataL[nok,] <- NA; + nok <- (is.na(chromosome) | is.na(x) | is.na(y)) + dataL[nok,] <- NA # Sanity check - stopifnot(nrow(dataL) == nbrOfLoci); - stopifnot(ncol(dataL) == length(fields)); + .stop_if_not(nrow(dataL) == nbrOfLoci) + .stop_if_not(ncol(dataL) == length(fields)) - verbose && exit(verbose); + verbose && exit(verbose) - dataL; + dataL }, protected=TRUE) # extractSegmentDataByLocus() diff -Nru r-cran-pscbs-0.63.0/R/PairedPSCBS.EXTS3.R r-cran-pscbs-0.64.0/R/PairedPSCBS.EXTS3.R --- r-cran-pscbs-0.63.0/R/PairedPSCBS.EXTS3.R 2017-06-28 14:32:39.000000000 +0000 +++ r-cran-pscbs-0.64.0/R/PairedPSCBS.EXTS3.R 2018-08-12 21:30:44.000000000 +0000 @@ -1,34 +1,34 @@ setMethodS3("extractLocusLevelC1C2", "PairedPSCBS", function(fit, ...) { # Extract locus-level data - data <- getLocusData(fit); - C <- data$CT; - rho <- data$rho; + data <- getLocusData(fit) + C <- data$CT + rho <- data$rho # Swapped (C1,C2) <-> (C2,C1) for some segments? - fields <- colnames(getSegments(fit)); + fields <- colnames(getSegments(fit)) if (is.element("c1c2Swap", fields)) { # FIXME: When PSCBS is updated. # WORKAROUND: extractSegmentDataByLocus() in PSCBS v0.40.4 requires: # that fields "chromosome", "tcnStart", "tcnEnd" are always requested # /2014-03-21 c1c2Swap <- extractSegmentDataByLocus(fit, fields=c("c1c2Swap", - "chromosome", "tcnStart", "tcnEnd")); - c1c2Swap <- c1c2Swap[["c1c2Swap"]]; + "chromosome", "tcnStart", "tcnEnd")) + c1c2Swap <- c1c2Swap[["c1c2Swap"]] if (any(c1c2Swap)) { - rho[c1c2Swap] <- -rho[c1c2Swap]; + rho[c1c2Swap] <- -rho[c1c2Swap] } } - C1 <- 1/2*(1-rho)*C; - C2 <- C - C1; + C1 <- 1/2*(1-rho)*C + C2 <- C - C1 - data.frame(C1=C1, C2=C2); + data.frame(C1=C1, C2=C2) }, private=TRUE) # extractLocusLevelC1C2() setMethodS3("extractLocusLevelTCN", "PairedPSCBS", function(fit, ...) { - data <- getLocusData(fit); - C <- data$CT; + data <- getLocusData(fit) + C <- data$CT }, private=TRUE) # extractLocusLevelTCN() @@ -38,150 +38,117 @@ # Validate arguments # - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - # Argument 'what': - what <- match.arg(what); + what <- match.arg(what) - segs <- getSegments(fit, splitters=TRUE); - stopifnot(!is.null(segs)); - nbrOfSegments <- nrow(segs); + segs <- getSegments(fit, splitters=TRUE) + .stop_if_not(!is.null(segs)) + nbrOfSegments <- nrow(segs) # Argument 'idx': - idx <- Arguments$getIndex(idx, max=nbrOfSegments); + idx <- Arguments$getIndex(idx, max=nbrOfSegments) # Argument 'verbose': - verbose <- Arguments$getVerbose(verbose); + verbose <- Arguments$getVerbose(verbose) if (verbose) { - pushState(verbose); - on.exit(popState(verbose)); + pushState(verbose) + on.exit(popState(verbose)) } - verbose && enter(verbose, "Extracting a specific DH segment"); + verbose && enter(verbose, "Extracting a specific DH segment") # - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - # Extract the data and segmentation results # - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - data <- getLocusData(fit); - stopifnot(!is.null(data)); + data <- getLocusData(fit) + .stop_if_not(!is.null(data)) - segs <- getSegments(fit, splitters=TRUE); - stopifnot(!is.null(segs)); + segs <- getSegments(fit, splitters=TRUE) + .stop_if_not(!is.null(segs)) - verbose && enter(verbose, "Subsetting segment"); + verbose && enter(verbose, "Subsetting segment") # Subset the region-level data - seg <- segs[idx,,drop=FALSE]; + seg <- segs[idx,,drop=FALSE] - isDivider <- all(is.na(seg)); + isDivider <- all(is.na(seg)) if (isDivider) { - verbose && cat("Cannot extract DH segment. Not a valid segment: ", idx); - verbose && exit(verbose); - return(NULL); + verbose && cat("Cannot extract DH segment. Not a valid segment: ", idx) + verbose && exit(verbose) + return(NULL) } - verbose && print(verbose, seg); - verbose && cat(verbose, "Number of TCN markers: ", sum(seg[["tcnNbrOfLoci"]], na.rm=TRUE)); - verbose && exit(verbose); + verbose && print(verbose, seg) + verbose && cat(verbose, "Number of TCN markers: ", sum(seg[["tcnNbrOfLoci"]], na.rm=TRUE)) + verbose && exit(verbose) - verbose && enter(verbose, "Subsetting data"); - units <- seq_len(nrow(data)); + verbose && enter(verbose, "Subsetting data") + units <- seq_len(nrow(data)) # Keep only chromosome of interest - chr <- as.numeric(seg[,"chromosome"]); + chr <- as.numeric(seg[,"chromosome"]) if (!is.na(chr)) { - keep <- which(data$chromosome == chr); - units <- units[keep]; - data <- data[keep,]; + keep <- which(data$chromosome == chr) + units <- units[keep] + data <- data[keep,] } # Keep only loci within the segment - xRange <- as.numeric(seg[,c("dhStart", "dhEnd")]); - keep <- which(xRange[1] <= data$x & data$x <= xRange[2]); - units <- units[keep]; - data <- data[keep,]; + xRange <- as.numeric(seg[,c("dhStart", "dhEnd")]) + keep <- which(xRange[1] <= data$x & data$x <= xRange[2]) + units <- units[keep] + data <- data[keep,] - muN <- data$muN; - isSnp <- is.finite(muN); + muN <- data$muN + isSnp <- is.finite(muN) # Keep only SNPs? if (is.element(what, c("SNPs", "hets"))) { - keep <- which(isSnp); - units <- units[keep]; - data <- data[keep,]; + keep <- which(isSnp) + units <- units[keep] + data <- data[keep,] } # Keep only heterozygous SNPs? if (what == "hets") { - isHet <- (muN == 1/2); - keep <- which(isHet); - units <- units[keep]; - data <- data[keep,]; + isHet <- (muN == 1/2) + keep <- which(isHet) + units <- units[keep] + data <- data[keep,] } - verbose && exit(verbose); + verbose && exit(verbose) - n <- nrow(data); - verbose && cat(verbose, "Number of loci in DH segment: ", n); + n <- nrow(data) + verbose && cat(verbose, "Number of loci in DH segment: ", n) # Special case? - listOfDhLociNotPartOfSegment <- fit$listOfDhLociNotPartOfSegment; + listOfDhLociNotPartOfSegment <- fit$listOfDhLociNotPartOfSegment if (!is.null(listOfDhLociNotPartOfSegment)) { - tcnId <- seg[,"tcnId"]; - dhId <- seg[,"dhId"]; - dhLociNotPartOfSegment <- listOfDhLociNotPartOfSegment[[tcnId]]; + tcnId <- seg[,"tcnId"] + dhId <- seg[,"dhId"] + dhLociNotPartOfSegment <- listOfDhLociNotPartOfSegment[[tcnId]] if (!is.null(dhLociNotPartOfSegment)) { - lociToExclude <- dhLociNotPartOfSegment[[dhId]]; - verbose && cat(verbose, "Excluding loci that belongs to a flanking segment: ", length(lociToExclude)); - drop <- match(lociToExclude, units); - units <- units[-drop]; - data <- data[-drop,]; - n <- nrow(data); + lociToExclude <- dhLociNotPartOfSegment[[dhId]] + verbose && cat(verbose, "Excluding loci that belongs to a flanking segment: ", length(lociToExclude)) + drop <- match(lociToExclude, units) + units <- units[-drop] + data <- data[-drop,] + n <- nrow(data) } } - verbose && cat(verbose, "Number of units: ", n); - verbose && cat(verbose, "Number of TCN markers: ", seg[,"tcnNbrOfLoci"]); + verbose && cat(verbose, "Number of units: ", n) + verbose && cat(verbose, "Number of TCN markers: ", seg[,"tcnNbrOfLoci"]) # Sanity check - if (what == "hets" && n > 0) stopifnot(n == seg[,"dhNbrOfLoci"]); + if (what == "hets" && n > 0) .stop_if_not(n == seg[,"dhNbrOfLoci"]) - fitS <- fit; - fitS$data <- data; - fitS$output <- seg; + fitS <- fit + fitS$data <- data + fitS$output <- seg - verbose && exit(verbose); + verbose && exit(verbose) - fitS; + fitS }, protected=TRUE) # extractDhSegment() - - -############################################################################ -# HISTORY: -# 2012-02-24 -# o Added extractDhSegment() for PairedPSCBS, which was copied "as is" -# from the aroma.cn package. The below history has been updated to -# document changes in this method too. -# 2012-02-23 -# o Made extractDhSegment() protected. -# 2011-10-08 -# o ROBUSTIFICATION: Uses drop=FALSE in mergeTwoSegments() for PairedPSCBS. -# 2010-10-26 [HB] -# o Added extractDhSegment() for PairedPSCBS. -# 2011-10-02 -# o DOCUMENTATION: Added Rdoc help to mergeTwoSegments() & dropByRegions(). -# o Added verbose statements to the above to functions. -# 2011-06-14 -# o Updated code to recognize new column names. -# 2011-01-18 -# o BUG FIX: Fields 'tcnSegRows' and 'dhSegRows' were not updated by -# mergeTwoSegments() for PairedPSCBS. -# 2011-01-14 -# o Moved extractByRegions() and estimateStdDevForHeterozygousBAF() to -# psCBS v0.9.36. -# o Now extractByRegions() utilizes the 'segRows' field. -# o Added estimateStdDevForHeterozygousBAF(). -# 2011-01-12 -# o Added updateMeans() for PairedPSCBS. -# o Added dropByRegions(). -# o Added extractByRegions() and extractByRegion(). -# o Created. -############################################################################ diff -Nru r-cran-pscbs-0.63.0/R/PairedPSCBS.EXTS.R r-cran-pscbs-0.64.0/R/PairedPSCBS.EXTS.R --- r-cran-pscbs-0.63.0/R/PairedPSCBS.EXTS.R 2017-06-28 14:32:39.000000000 +0000 +++ r-cran-pscbs-0.64.0/R/PairedPSCBS.EXTS.R 2018-08-12 21:30:44.000000000 +0000 @@ -1,18 +1,18 @@ setMethodS3("shiftTCN", "PairedPSCBS", function(fit, shift, update=TRUE, ...) { # Argument 'shift': - shift <- Arguments$getDouble(shift, disallow=c("NA", "NaN", "Inf")); + shift <- Arguments$getDouble(shift, disallow=c("NA", "NaN", "Inf")) - data <- getLocusData(fit); - data$CT <- data$CT + shift; - fit$data <- data; + data <- getLocusData(fit) + data$CT <- data$CT + shift + fit$data <- data # Not needed anymore - data <- NULL; + data <- NULL if (update) { - fit <- updateMeans(fit, ...); + fit <- updateMeans(fit, ...) } - fit; + fit }, protected=TRUE) @@ -49,11 +49,11 @@ # } #*/########################################################################### setMethodS3("extractTCNAndDHs", "PairedPSCBS", function(fit, ...) { - segs <- getSegments(fit, ...); - stopifnot(!is.null(segs)); + segs <- getSegments(fit, ...) + .stop_if_not(!is.null(segs)) - data <- segs[,c("tcnMean", "dhMean", "tcnNbrOfLoci", "dhNbrOfLoci"), drop=FALSE]; - data; + data <- segs[,c("tcnMean", "dhMean", "tcnNbrOfLoci", "dhNbrOfLoci"), drop=FALSE] + data }, protected=TRUE) @@ -87,39 +87,39 @@ # } #*/########################################################################### setMethodS3("extractMinorMajorCNs", "PairedPSCBS", function(fit, ...) { - data <- extractTCNAndDHs(fit, ...); + data <- extractTCNAndDHs(fit, ...) - gamma <- data[,1L]; - rho <- data[,2L]; - C1 <- 1/2*(1-rho)*gamma; - C2 <- gamma - C1; - - data[,1L] <- C1; - data[,2L] <- C2; - colnames(data)[1:2] <- c("C1", "C2"); + gamma <- data[,1L] + rho <- data[,2L] + C1 <- 1/2*(1-rho)*gamma + C2 <- gamma - C1 + + data[,1L] <- C1 + data[,2L] <- C2 + colnames(data)[1:2] <- c("C1", "C2") # Swap (C1,C2)? - segs <- getSegments(fit, ...); - flipped <- segs$c1c2Swap; + segs <- getSegments(fit, ...) + flipped <- segs$c1c2Swap if (!is.null(flipped)) { - idxs <- which(flipped); + idxs <- which(flipped) if (length(idxs) > 0L) { - data[idxs,1:2] <- data[idxs,2:1]; + data[idxs,1:2] <- data[idxs,2:1] } } - data; + data }, protected=TRUE) setMethodS3("extractC1C2", "PairedPSCBS", function(...) { - extractMinorMajorCNs(...); + extractMinorMajorCNs(...) }, protected=TRUE) setMethodS3("extractCNs", "PairedPSCBS", function(fit, splitters=TRUE, ...) { - data <- extractC1C2(fit, splitters=splitters, ...); - data[,c("C1", "C2"), drop=FALSE]; + data <- extractC1C2(fit, splitters=splitters, ...) + data[,c("C1", "C2"), drop=FALSE] }) @@ -127,43 +127,43 @@ # - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - # Extract data # - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - X <- extractC1C2(..., splitters=TRUE, addGaps=TRUE); + X <- extractC1C2(..., splitters=TRUE, addGaps=TRUE) # (C1,C2) - C1C2 <- X[,1:2,drop=FALSE]; + C1C2 <- X[,1:2,drop=FALSE] # Number of TCN and DH data points - counts <- X[,3:4, drop=FALSE]; + counts <- X[,3:4, drop=FALSE] # Not needed anymore - X <- NULL; + X <- NULL # - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - # Calculate (dC1,dC2) # - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - # (dC1, dC2) - dC1C2 <- matrixStats::colDiffs(C1C2); + dC1C2 <- matrixStats::colDiffs(C1C2) # - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - # Change-point weights # - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - # Region weights from DH counts - w <- counts[,2,drop=TRUE]; - w <- sqrt(w); - w <- w / sum(w, na.rm=TRUE); + w <- counts[,2,drop=TRUE] + w <- sqrt(w) + w <- w / sum(w, na.rm=TRUE) # (a) Smallest of the two flanking (DH) counts - cpw <- cbind(w[1:(length(w)-1)], w[2:length(w)]); - cpw <- rowMins(cpw, na.rm=TRUE); - cpw[is.infinite(cpw)] <- NA; - cpw <- sqrt(cpw); - cpwMin <- cpw / sum(cpw, na.rm=TRUE); + cpw <- cbind(w[1:(length(w)-1)], w[2:length(w)]) + cpw <- rowMins(cpw, na.rm=TRUE) + cpw[is.infinite(cpw)] <- NA + cpw <- sqrt(cpw) + cpwMin <- cpw / sum(cpw, na.rm=TRUE) # (b) Sum of region weights - cpw <- w[1:(length(w)-1)] + w[2:length(w)]; - cpwAvg <- cpw / sum(cpw, na.rm=TRUE); + cpw <- w[1:(length(w)-1)] + w[2:length(w)] + cpwAvg <- cpw / sum(cpw, na.rm=TRUE) - cbind(dC1=dC1C2[,1], dC2=dC1C2[,2], wMin=cpwMin, wAvg=cpwAvg); + cbind(dC1=dC1C2[,1], dC2=dC1C2[,2], wMin=cpwMin, wAvg=cpwAvg) }, protected=TRUE) @@ -173,59 +173,59 @@ # Validate arguments # - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - # Argument 'verbose': - verbose <- Arguments$getVerbose(verbose); + verbose <- Arguments$getVerbose(verbose) if (verbose) { - pushState(verbose); - on.exit(popState(verbose)); + pushState(verbose) + on.exit(popState(verbose)) } - verbose && enter(verbose, "Post-segmenting TCNs"); + verbose && enter(verbose, "Post-segmenting TCNs") - flavor <- fit$params$flavor; + flavor <- fit$params$flavor if (!force && regexpr("&", flavor, fixed=TRUE) != -1) { - verbose && cat(verbose, "Nothing to do. Already postsegmentTCN:ed: ", flavor); - verbose && exit(verbose); - return(fit); + verbose && cat(verbose, "Nothing to do. Already postsegmentTCN:ed: ", flavor) + verbose && exit(verbose) + return(fit) } - joinSegments <- fit$params$joinSegments; + joinSegments <- fit$params$joinSegments if (!joinSegments) { - throw("Postsegmentation of TCNs is only implemented for the case when joinSegments=TRUE: ", joinSegments); + throw("Postsegmentation of TCNs is only implemented for the case when joinSegments=TRUE: ", joinSegments) } # Get mean estimators - estList <- getMeanEstimators(fit, "tcn"); - avgTCN <- estList$tcn; + estList <- getMeanEstimators(fit, "tcn") + avgTCN <- estList$tcn # - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - # Extract the data and segmentation results # - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - data <- getLocusData(fit); + data <- getLocusData(fit) - segs <- getSegments(fit); - keep <- is.finite(segs$chromosome); - segs <- segs[keep,,drop=FALSE]; - tcnSegRows <- fit$tcnSegRows[keep,,drop=FALSE]; - dhSegRows <- fit$dhSegRows[keep,,drop=FALSE]; + segs <- getSegments(fit) + keep <- is.finite(segs$chromosome) + segs <- segs[keep,,drop=FALSE] + tcnSegRows <- fit$tcnSegRows[keep,,drop=FALSE] + dhSegRows <- fit$dhSegRows[keep,,drop=FALSE] # Sanity check - stopifnot(nrow(dhSegRows) == nrow(tcnSegRows)); - stopifnot(all(tcnSegRows[,1] <= tcnSegRows[,2], na.rm=TRUE)); -# stopifnot(all(tcnSegRows[-nrow(tcnSegRows),2] < tcnSegRows[-1,1], na.rm=TRUE)); - stopifnot(all(dhSegRows[,1] <= dhSegRows[,2], na.rm=TRUE)); - stopifnot(all(dhSegRows[-nrow(dhSegRows),2] < dhSegRows[-1,1], na.rm=TRUE)); + .stop_if_not(nrow(dhSegRows) == nrow(tcnSegRows)) + .stop_if_not(all(tcnSegRows[,1] <= tcnSegRows[,2], na.rm=TRUE)) +# .stop_if_not(all(tcnSegRows[-nrow(tcnSegRows),2] < tcnSegRows[-1,1], na.rm=TRUE)) + .stop_if_not(all(dhSegRows[,1] <= dhSegRows[,2], na.rm=TRUE)) + .stop_if_not(all(dhSegRows[-nrow(dhSegRows),2] < dhSegRows[-1,1], na.rm=TRUE)) - nbrOfSegments <- nrow(segs); - verbose && cat(verbose, "Number of segments: ", nbrOfSegments); - - chromosome <- data$chromosome; - x <- data$x; - CT <- data$CT; - muN <- data$muN; - rho <- data$rho; + nbrOfSegments <- nrow(segs) + verbose && cat(verbose, "Number of segments: ", nbrOfSegments) + + chromosome <- data$chromosome + x <- data$x + CT <- data$CT + muN <- data$muN + rho <- data$rho hasDH <- !is.null(rho) if (hasDH) { isHet <- !is.na(rho) @@ -238,346 +238,242 @@ # - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - # Update the TCN segments # - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - chromosomes <- getChromosomes(fit); - nbrOfChromosomes <- length(chromosomes); - verbose && cat(verbose, "Number of chromosomes: ", nbrOfChromosomes); - verbose && print(verbose, chromosomes); + chromosomes <- getChromosomes(fit) + nbrOfChromosomes <- length(chromosomes) + verbose && cat(verbose, "Number of chromosomes: ", nbrOfChromosomes) + verbose && print(verbose, chromosomes) for (cc in seq_len(nbrOfChromosomes)) { - chr <- chromosomes[cc]; - chrTag <- sprintf("chr%02d", chr); - verbose && enter(verbose, sprintf("Chromosome %d ('%s') of %d", cc, chrTag, nbrOfChromosomes)); - rows <- which(is.element(segs[["chromosome"]], chr)); - verbose && cat(verbose, "Rows:"); - verbose && print(verbose, rows); - - segsCC <- segs[rows,,drop=FALSE]; - tcnSegRowsCC <- tcnSegRows[rows,,drop=FALSE]; - dhSegRowsCC <- dhSegRows[rows,,drop=FALSE]; - nbrOfSegmentsCC <- nrow(segsCC); - verbose && cat(verbose, "Number of segments: ", nbrOfSegmentsCC); + chr <- chromosomes[cc] + chrTag <- sprintf("chr%02d", chr) + verbose && enter(verbose, sprintf("Chromosome %d ('%s') of %d", cc, chrTag, nbrOfChromosomes)) + rows <- which(is.element(segs[["chromosome"]], chr)) + verbose && cat(verbose, "Rows:") + verbose && print(verbose, rows) + + segsCC <- segs[rows,,drop=FALSE] + tcnSegRowsCC <- tcnSegRows[rows,,drop=FALSE] + dhSegRowsCC <- dhSegRows[rows,,drop=FALSE] + nbrOfSegmentsCC <- nrow(segsCC) + verbose && cat(verbose, "Number of segments: ", nbrOfSegmentsCC) - tcnIds <- sort(unique(segsCC[["tcnId"]])); - I <- length(tcnIds); + tcnIds <- sort(unique(segsCC[["tcnId"]])) + I <- length(tcnIds) for (ii in seq_len(I)) { - tcnId <- tcnIds[ii]; - verbose && enter(verbose, sprintf("TCN segment #%d ('%s') of %d", ii, tcnId, I)); + tcnId <- tcnIds[ii] + verbose && enter(verbose, sprintf("TCN segment #%d ('%s') of %d", ii, tcnId, I)) - rowsII <- which(segsCC[["tcnId"]] == tcnId); - J <- length(rowsII); + rowsII <- which(segsCC[["tcnId"]] == tcnId) + J <- length(rowsII) # Nothing todo? if (!force && J == 1) { - verbose && cat(verbose, "Nothing todo. Only one DH segmentation. Skipping."); - verbose && exit(verbose); - next; + verbose && cat(verbose, "Nothing todo. Only one DH segmentation. Skipping.") + verbose && exit(verbose) + next } - verbose && cat(verbose, "Rows:"); - verbose && print(verbose, rowsII); - segsII <- segsCC[rowsII,,drop=FALSE]; + verbose && cat(verbose, "Rows:") + verbose && print(verbose, rowsII) + segsII <- segsCC[rowsII,,drop=FALSE] - tcnSegRowsII <- tcnSegRowsCC[rowsII,,drop=FALSE]; - dhSegRowsII <- dhSegRowsCC[rowsII,,drop=FALSE]; + tcnSegRowsII <- tcnSegRowsCC[rowsII,,drop=FALSE] + dhSegRowsII <- dhSegRowsCC[rowsII,,drop=FALSE] - verbose && cat(verbose, "TCN & DH segRows before:"); - verbose && print(verbose, cbind(tcn=tcnSegRowsII, dh=dhSegRowsII)); + verbose && cat(verbose, "TCN & DH segRows before:") + verbose && print(verbose, cbind(tcn=tcnSegRowsII, dh=dhSegRowsII)) - segRowsRange <- range(c(tcnSegRowsII, dhSegRowsII), na.rm=TRUE); + segRowsRange <- range(c(tcnSegRowsII, dhSegRowsII), na.rm=TRUE) verbose && printf(verbose, "Range [%d,%d]\n", - segRowsRange[1], segRowsRange[2]); + segRowsRange[1], segRowsRange[2]) - tcnSegRowsIIBefore <- tcnSegRowsII; - nbrOfTCNsBefore <- segsII[1,"tcnNbrOfLoci"]; + tcnSegRowsIIBefore <- tcnSegRowsII + nbrOfTCNsBefore <- segsII[1,"tcnNbrOfLoci"] # Sanity check - stopifnot(diff(segRowsRange)+1L == nbrOfTCNsBefore); + .stop_if_not(diff(segRowsRange)+1L == nbrOfTCNsBefore) for (jj in seq_len(J)) { - verbose && enter(verbose, sprintf("DH segment #%d of %d", jj, J)); - seg <- segsII[jj,,drop=FALSE]; - tcnSegRow <- unlist(tcnSegRowsII[jj,,drop=FALSE], use.names=FALSE); - dhSegRow <- unlist(dhSegRowsII[jj,,drop=FALSE], use.names=FALSE); + verbose && enter(verbose, sprintf("DH segment #%d of %d", jj, J)) + seg <- segsII[jj,,drop=FALSE] + tcnSegRow <- unlist(tcnSegRowsII[jj,,drop=FALSE], use.names=FALSE) + dhSegRow <- unlist(dhSegRowsII[jj,,drop=FALSE], use.names=FALSE) # Sanity check - stopifnot(all(is.na(tcnSegRow)) || (tcnSegRow[1] <= tcnSegRow[2])); - stopifnot(all(is.na(dhSegRow)) || (dhSegRow[1] <= dhSegRow[2])); + .stop_if_not(all(is.na(tcnSegRow)) || (tcnSegRow[1] <= tcnSegRow[2])) + .stop_if_not(all(is.na(dhSegRow)) || (dhSegRow[1] <= dhSegRow[2])) # Sanity check - idxsTCN <- tcnSegRow[1]:tcnSegRow[2]; - nbrOfTCNs <- sum(!is.na(CT[idxsTCN])); - stopifnot(nbrOfTCNs == nbrOfTCNsBefore); + idxsTCN <- tcnSegRow[1]:tcnSegRow[2] + nbrOfTCNs <- sum(!is.na(CT[idxsTCN])) + .stop_if_not(nbrOfTCNs == nbrOfTCNsBefore) if (joinSegments) { # (a) The TCN segment should have identical (start,end) boundaries as the DH region - xStart <- seg[["dhStart"]]; - xEnd <- seg[["dhEnd"]]; - verbose && printf(verbose, "[xStart,xEnd] = [%.0f,%.0f]\n", xStart, xEnd); - stopifnot(xStart <= xEnd); + xStart <- seg[["dhStart"]] + xEnd <- seg[["dhEnd"]] + verbose && printf(verbose, "[xStart,xEnd] = [%.0f,%.0f]\n", xStart, xEnd) + .stop_if_not(xStart <= xEnd) # (b) Identify units - units <- which(chromosome == chr & xStart <= x & x <= xEnd); + units <- which(chromosome == chr & xStart <= x & x <= xEnd) # (c) Drop units that are outside both the TCN and DH segments - keep <- (segRowsRange[1] <= units & units <= segRowsRange[2]); - units <- units[keep]; + keep <- (segRowsRange[1] <= units & units <= segRowsRange[2]) + units <- units[keep] - tcnSegRow <- range(units); - verbose && printf(verbose, "[idxStart,idxEnd] = [%d,%d]\n", tcnSegRow[1], tcnSegRow[2]); - verbose && cat(verbose, "Number of TCN loci: ", length(units)); + tcnSegRow <- range(units) + verbose && printf(verbose, "[idxStart,idxEnd] = [%d,%d]\n", tcnSegRow[1], tcnSegRow[2]) + verbose && cat(verbose, "Number of TCN loci: ", length(units)) # (c) Adjust for missing values - keep <- which(!is.na(CT[units])); - units <- units[keep]; + keep <- which(!is.na(CT[units])) + units <- units[keep] # (d) Adjust for DH boundaries if (jj > 1L) { - minIdx <- tcnSegRowsII[jj-1L,2L, drop=TRUE]; - units <- units[units > minIdx]; + minIdx <- tcnSegRowsII[jj-1L,2L, drop=TRUE] + units <- units[units > minIdx] } if (jj < J) { - maxIdx <- dhSegRowsII[jj+1L,1L, drop=TRUE]; - units <- units[units < maxIdx]; + maxIdx <- dhSegRowsII[jj+1L,1L, drop=TRUE] + units <- units[units < maxIdx] } if (jj == J) { -# maxIdx <- dhSegRowsII[jj+1L,1L, drop=TRUE]; -# units <- units[units < maxIdx]; +# maxIdx <- dhSegRowsII[jj+1L,1L, drop=TRUE] +# units <- units[units < maxIdx] } - tcnSegRow <- range(units); - verbose && printf(verbose, "[idxStart,idxEnd] = [%d,%d]\n", tcnSegRow[1], tcnSegRow[2]); - verbose && cat(verbose, "Number of non-missing TCN loci: ", length(units)); + tcnSegRow <- range(units) + verbose && printf(verbose, "[idxStart,idxEnd] = [%d,%d]\n", tcnSegRow[1], tcnSegRow[2]) + verbose && cat(verbose, "Number of non-missing TCN loci: ", length(units)) } else { throw("Not implemented yet.") # /HB 2010-12-02 } # if (joinSegments) - gamma <- avgTCN(CT[units]); + gamma <- avgTCN(CT[units]) # Sanity check - stopifnot(length(units) == 0 || !is.na(gamma)); + .stop_if_not(length(units) == 0 || !is.na(gamma)) # Update the segment boundaries, estimates and counts - seg[["tcnStart"]] <- xStart; - seg[["tcnEnd"]] <- xEnd; - seg[["tcnMean"]] <- gamma; - seg[["tcnNbrOfLoci"]] <- length(units); - seg[["tcnNbrOfSNPs"]] <- sum(isSnp[units]); - seg[["tcnNbrOfHets"]] <- sum(isHet[units]); + seg[["tcnStart"]] <- xStart + seg[["tcnEnd"]] <- xEnd + seg[["tcnMean"]] <- gamma + seg[["tcnNbrOfLoci"]] <- length(units) + seg[["tcnNbrOfSNPs"]] <- sum(isSnp[units]) + seg[["tcnNbrOfHets"]] <- sum(isHet[units]) # Sanity check - stopifnot(nrow(seg) == length(jj)); + .stop_if_not(nrow(seg) == length(jj)) - segsII[jj,] <- seg; - tcnSegRowsII[jj,] <- tcnSegRow; + segsII[jj,] <- seg + tcnSegRowsII[jj,] <- tcnSegRow - verbose && exit(verbose); + verbose && exit(verbose) } # for (jj ...) # Sanity check - stopifnot(nrow(segsII) == length(rowsII)); + .stop_if_not(nrow(segsII) == length(rowsII)) - verbose && cat(verbose, "TCN & DH segRows afterward:"); - verbose && print(verbose, cbind(tcn=tcnSegRowsII, dh=dhSegRowsII)); + verbose && cat(verbose, "TCN & DH segRows afterward:") + verbose && print(verbose, cbind(tcn=tcnSegRowsII, dh=dhSegRowsII)) -##print(segsII); +##print(segsII) # Sanity check - nbrOfTCNsAfter <- sum(segsII[,"tcnNbrOfLoci"], na.rm=TRUE); - verbose && cat(verbose, "Number of TCNs before: ", nbrOfTCNsBefore); - verbose && cat(verbose, "Number of TCNs after: ", nbrOfTCNsAfter); - stopifnot(nbrOfTCNsAfter >= nbrOfTCNsBefore); + nbrOfTCNsAfter <- sum(segsII[,"tcnNbrOfLoci"], na.rm=TRUE) + verbose && cat(verbose, "Number of TCNs before: ", nbrOfTCNsBefore) + verbose && cat(verbose, "Number of TCNs after: ", nbrOfTCNsAfter) + .stop_if_not(nbrOfTCNsAfter >= nbrOfTCNsBefore) # Sanity check - stopifnot(nrow(dhSegRowsII) == nrow(tcnSegRowsII)); - stopifnot(all(tcnSegRowsII[,1] <= tcnSegRowsII[,2], na.rm=TRUE)); - stopifnot(all(tcnSegRowsII[-nrow(tcnSegRowsII),2] < tcnSegRowsII[-1,1], na.rm=TRUE)); - stopifnot(all(dhSegRowsII[,1] <= dhSegRowsII[,2], na.rm=TRUE)); - stopifnot(all(dhSegRowsII[-nrow(dhSegRowsII),2] < dhSegRowsII[-1,1], na.rm=TRUE)); + .stop_if_not(nrow(dhSegRowsII) == nrow(tcnSegRowsII)) + .stop_if_not(all(tcnSegRowsII[,1] <= tcnSegRowsII[,2], na.rm=TRUE)) + .stop_if_not(all(tcnSegRowsII[-nrow(tcnSegRowsII),2] < tcnSegRowsII[-1,1], na.rm=TRUE)) + .stop_if_not(all(dhSegRowsII[,1] <= dhSegRowsII[,2], na.rm=TRUE)) + .stop_if_not(all(dhSegRowsII[-nrow(dhSegRowsII),2] < dhSegRowsII[-1,1], na.rm=TRUE)) - segsCC[rowsII,] <- segsII; - tcnSegRowsCC[rowsII,] <- tcnSegRowsII; + segsCC[rowsII,] <- segsII + tcnSegRowsCC[rowsII,] <- tcnSegRowsII # Not needed anymore - rowsII <- segsII <- NULL; - verbose && exit(verbose); + rowsII <- segsII <- NULL + verbose && exit(verbose) } # for (ii ...) # Sanity check - stopifnot(nrow(segsCC) == length(rows)); + .stop_if_not(nrow(segsCC) == length(rows)) # Sanity check - stopifnot(nrow(dhSegRowsCC) == nrow(tcnSegRowsCC)); - stopifnot(all(tcnSegRowsCC[,1] <= tcnSegRowsCC[,2], na.rm=TRUE)); + .stop_if_not(nrow(dhSegRowsCC) == nrow(tcnSegRowsCC)) + .stop_if_not(all(tcnSegRowsCC[,1] <= tcnSegRowsCC[,2], na.rm=TRUE)) #################### if (!all(tcnSegRowsCC[-nrow(tcnSegRowsCC),2] < tcnSegRowsCC[-1,1], na.rm=TRUE)) { - aa <- tcnSegRowsCC[-nrow(tcnSegRowsCC),2]; - bb <- tcnSegRowsCC[-1,1]; - delta <- bb - aa; - dd <- cbind(aa, bb, delta=delta); - print(dd); - dd <- subset(dd, delta == 0); - print(dd); - row <- dd[,1L,drop=TRUE]; - print(row); - rr <- row + -10:10; - dd <- data[rr,]; - rownames(dd) <- rr; - print(dd); -print(tcnSegRowsII); + aa <- tcnSegRowsCC[-nrow(tcnSegRowsCC),2] + bb <- tcnSegRowsCC[-1,1] + delta <- bb - aa + dd <- cbind(aa, bb, delta=delta) + print(dd) + dd <- subset(dd, delta == 0) + print(dd) + row <- dd[,1L,drop=TRUE] + print(row) + rr <- row + -10:10 + dd <- data[rr,] + rownames(dd) <- rr + print(dd) +print(tcnSegRowsII) } #################### - stopifnot(all(tcnSegRowsCC[-nrow(tcnSegRowsCC),2] < tcnSegRowsCC[-1,1], na.rm=TRUE)); - stopifnot(all(dhSegRowsCC[,1] <= dhSegRowsCC[,2], na.rm=TRUE)); - stopifnot(all(dhSegRowsCC[-nrow(dhSegRowsCC),2] < dhSegRowsCC[-1,1], na.rm=TRUE)); + .stop_if_not(all(tcnSegRowsCC[-nrow(tcnSegRowsCC),2] < tcnSegRowsCC[-1,1], na.rm=TRUE)) + .stop_if_not(all(dhSegRowsCC[,1] <= dhSegRowsCC[,2], na.rm=TRUE)) + .stop_if_not(all(dhSegRowsCC[-nrow(dhSegRowsCC),2] < dhSegRowsCC[-1,1], na.rm=TRUE)) - segs[rows,] <- segsCC; - tcnSegRows[rows,] <- tcnSegRowsCC; + segs[rows,] <- segsCC + tcnSegRows[rows,] <- tcnSegRowsCC # Not needed anymore - rows <- segsCC <- NULL; - verbose && exit(verbose); + rows <- segsCC <- NULL + verbose && exit(verbose) } # for (cc ...) # Sanity check - stopifnot(nrow(dhSegRows) == nrow(tcnSegRows)); - stopifnot(all(tcnSegRows[,1] <= tcnSegRows[,2], na.rm=TRUE)); - stopifnot(all(tcnSegRows[-nrow(tcnSegRows),2] < tcnSegRows[-1,1], na.rm=TRUE)); - stopifnot(all(dhSegRows[,1] <= dhSegRows[,2], na.rm=TRUE)); - stopifnot(all(dhSegRows[-nrow(dhSegRows),2] < dhSegRows[-1,1], na.rm=TRUE)); + .stop_if_not(nrow(dhSegRows) == nrow(tcnSegRows)) + .stop_if_not(all(tcnSegRows[,1] <= tcnSegRows[,2], na.rm=TRUE)) + .stop_if_not(all(tcnSegRows[-nrow(tcnSegRows),2] < tcnSegRows[-1,1], na.rm=TRUE)) + .stop_if_not(all(dhSegRows[,1] <= dhSegRows[,2], na.rm=TRUE)) + .stop_if_not(all(dhSegRows[-nrow(dhSegRows),2] < dhSegRows[-1,1], na.rm=TRUE)) - verbose && enter(verbose, "Update (C1,C2) per segment"); + verbose && enter(verbose, "Update (C1,C2) per segment") # Append (C1,C2) estimates - tcn <- segs$tcnMean; - dh <- segs$dhMean; - C1 <- 1/2*(1-dh)*tcn; - C2 <- tcn - C1; - segs$c1Mean <- C1; - segs$c2Mean <- C2; - verbose && exit(verbose); + tcn <- segs$tcnMean + dh <- segs$dhMean + C1 <- 1/2*(1-dh)*tcn + C2 <- tcn - C1 + segs$c1Mean <- C1 + segs$c2Mean <- C2 + verbose && exit(verbose) # Return results - keep <- which(is.finite(fit$output$chromosome)); - fitS <- fit; - fitS$data <- data; - fitS$output[keep,] <- segs; - fitS$tcnSegRows[keep,] <- tcnSegRows; + keep <- which(is.finite(fit$output$chromosome)) + fitS <- fit + fitS$data <- data + fitS$output[keep,] <- segs + fitS$tcnSegRows[keep,] <- tcnSegRows # Sanity check - tcnSegRows <- fitS$tcnSegRows; - dhSegRows <- fitS$dhSegRows; - stopifnot(nrow(dhSegRows) == nrow(tcnSegRows)); - stopifnot(all(tcnSegRows[,1] <= tcnSegRows[,2], na.rm=TRUE)); - stopifnot(all(tcnSegRows[-nrow(tcnSegRows),2] < tcnSegRows[-1,1], na.rm=TRUE)); - stopifnot(all(dhSegRows[,1] <= dhSegRows[,2], na.rm=TRUE)); - stopifnot(all(dhSegRows[-nrow(dhSegRows),2] < dhSegRows[-1,1], na.rm=TRUE)); + tcnSegRows <- fitS$tcnSegRows + dhSegRows <- fitS$dhSegRows + .stop_if_not(nrow(dhSegRows) == nrow(tcnSegRows)) + .stop_if_not(all(tcnSegRows[,1] <= tcnSegRows[,2], na.rm=TRUE)) + .stop_if_not(all(tcnSegRows[-nrow(tcnSegRows),2] < tcnSegRows[-1,1], na.rm=TRUE)) + .stop_if_not(all(dhSegRows[,1] <= dhSegRows[,2], na.rm=TRUE)) + .stop_if_not(all(dhSegRows[-nrow(dhSegRows),2] < dhSegRows[-1,1], na.rm=TRUE)) # Update 'flavor' - fitS$params$flavor <- gsub(",", "&", flavor, fixed=TRUE); + fitS$params$flavor <- gsub(",", "&", flavor, fixed=TRUE) - verbose && exit(verbose); + verbose && exit(verbose) - fitS; + fitS }, protected=TRUE) # postsegmentTCN() - - - - -############################################################################ -# HISTORY: -# 2013-08-15 -# o Made extractMinorMajorCNs() for PairedPSCBS acknowledge the -# 'c1c2Swap' field. -# 2013-01-15 -# o Now postsegmentTCN() uses the params$avgTCN estimator, iff given. -# 2012-09-21 -# o ROBUSTNESS: Now extractDeltaC1C2() for PairedPSCBS makes sure to -# retrieve segments with NA splitters between chromosomes and gaps. -# 2012-09-13 -# o Added shiftTCN() for PairedPSCBS. -# 2012-01-21 -# o CLEANUP: Removed left-over debug output in postsegmentTCN(). -# 2012-01-09 -# o Minor correction of a verbose message in postsegmentTCN(). -# 2011-10-16 -# o Added extractCNs(). -# 2011-10-14 -# o Now extractTCNAndDHs() passes '...' to getSegments(). -# 2011-10-02 -# o CLEANUP: Dropped empty callSegments() for PairedPSCBS. -# 2011-06-14 -# o Updated code to recognize new column names. -# 2011-04-08 -# o BUG FIX: postsegmentTCN() for PairedPSCBS could generate an invalid -# 'tcnSegRows' matrix, where the indices for two consecutive segments -# would overlap, which is invalid. -# 2011-04-05 -# o BUG FIX: estimateHighDHQuantileAtAB() for PairedPSCBS would throw -# an error on an undefined 'trim' if verbose output was used. -# 2011-02-17 -# o Added arguments 'robust' and 'trim' to estimateMeanForDH(). -# 2011-02-03 -# o Added argument 'tauTCN' to estimateMeanForDH(). -# 2011-01-27 -# o Added flavor="DHskew" to estimateTauAB(). -# o Added flavor="DH" to estimateTauAB() to estimate from DH instead -# of hBAF. As argued by the equations in the comments, these two -# approaches gives virtually the same results. The advantage with the -# DH approach is that it requires one less degree of freedom. -# o Added estimateMeanForDH(). -# 2011-01-18 -# o BUG FIX: 'tcnSegRows' and 'dhSegRows' where not updated by -# extractByRegions() for PairedPSCBS. -# 2011-01-14 -# o Added estimateTauAB() for estimating the DeltaAB parameter. -# o Added estimateStdDevForHeterozygousBAF() for PairedPSCBS. -# o BUG FIX: extractByRegions() did not handle the case where multiple loci -# at the same position are split up in two different segments. -# 2011-01-12 -# o Added extractByRegions() and extractByRegion() for PairedPSCBS. -# o Now postsegmentTCN(..., force=TRUE) for PairedPSCBS also updates -# the TCN estimates even for segments where the DH segmentation did -# not find any additional change points. -# 2010-12-02 -# o Now postsegmentTCN() assert that total number of TCN loci before -# and after is the same. -# o Now postsegmentTCN() assert that joinSegment is TRUE. -# 2010-12-01 -# o Now postsegmentTCN() checks if it is already postsegmented. -# 2010-11-30 -# o TODO: postsegmentTCN() does not make sure of 'dhLociToExclude'. Why? -# o Now postsegmentTCN() recognizes the new 'tcnLociToExclude'. -# 2010-11-28 -# o BUG FIX: postsegmentTCN() did not handle loci with the same positions -# and that are split in two different segments. It also did not exclude -# loci with missing values. -# 2010-11-21 -# o Adjusted postsegmentTCN() such that the updated TCN segment boundaries -# are the maximum of the DH segment and the support by the loci. This -# means that postsegmentTCN() will work as expected both when signals -# where segmented with 'joinSegments' being TRUE or FALSE. -# 2010-10-25 -# o Now subsetByDhSegments() for PairedPSCBS handles the rare case when -# markers with the same positions are split in two different segments. -# o Renamed subsetBySegments() for PairedPSCBS to subsetByDhSegments(). -# 2010-09-26 -# o Now subsetBySegments() for PairedPSCBS handles multiple chromosomes. -# o Now postsegmentTCN() PairedPSCBS handles multiple chromosomes. -# 2010-09-21 -# o Added postsegmentTCN() for PairedPSCBS. -# 2010-09-19 -# o BUG FIX: plot() used non-defined nbrOfLoci; now length(x). -# 2010-09-15 -# o Added subsetBySegments(). -# o Added linesC1C2() and arrowsC1C2(). -# o Now the default 'cex' for pointsC1C2() corresponds to 'dh.num.mark'. -# o Now extractTotalAndDH() also returns 'dh.num.mark'. -# 2010-09-08 -# o Added argument 'add=FALSE' to plot(). -# o Added plotC1C2(). -# o Added extractTotalAndDH() and extractMinorMajorCNs(). -# 2010-09-04 -# o Added drawLevels() for PairedPSCBS. -# o Added as.data.frame() and print() for PairedPSCBS. -# 2010-09-03 -# o Added plot() for PairedPSCBS. -# o Created. -############################################################################ diff -Nru r-cran-pscbs-0.63.0/R/PairedPSCBS.PLOT2.R r-cran-pscbs-0.64.0/R/PairedPSCBS.PLOT2.R --- r-cran-pscbs-0.63.0/R/PairedPSCBS.PLOT2.R 2017-06-28 14:32:39.000000000 +0000 +++ r-cran-pscbs-0.64.0/R/PairedPSCBS.PLOT2.R 2018-08-12 21:30:44.000000000 +0000 @@ -1,80 +1,80 @@ setMethodS3("plotTracks2", "PairedPSCBS", function(x, panels=NULL, calls=".*", pch=".", col=NULL, cex=1, lwd=2, changepoints=FALSE, grid=FALSE, quantiles=c(0.05,0.95), xlim=NULL, Clim=c(0,3*ploidy(x)), Blim=c(0,1), xScale=1e-6, ..., add=FALSE, subplots=!add && (length(panels) > 1), verbose=FALSE) { # To please R CMD check - fit <- x; + fit <- x # - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - # Validate arguments # - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - # Argument 'fit': if (nbrOfChromosomes(fit) > 1) { - throw("Multiple chromosomes detected. Not yet implemented."); + throw("Multiple chromosomes detected. Not yet implemented.") } # Argument 'panels': - panels <- unique(panels); - panelsOrg <- panels; - panels <- gsub("[-*]", "", panelsOrg); - knownPanels <- c("tcn", "tcn,c1,c2", "tcn,c1", "tcn,c2", "c1,c2", "c1", "c2", "dh", "betaN", "betaT", "betaTN"); - panels <- match.arg(panels, choices=knownPanels, several.ok=TRUE); - panels <- panelsOrg; + panels <- unique(panels) + panelsOrg <- panels + panels <- gsub("[-*]", "", panelsOrg) + knownPanels <- c("tcn", "tcn,c1,c2", "tcn,c1", "tcn,c2", "c1,c2", "c1", "c2", "dh", "betaN", "betaT", "betaTN") + panels <- match.arg(panels, choices=knownPanels, several.ok=TRUE) + panels <- panelsOrg # Argument 'calls': if (!is.null(calls)) { - calls <- sapply(calls, FUN=Arguments$getRegularExpression); + calls <- sapply(calls, FUN=Arguments$getRegularExpression) } # Argument 'changepoints': - changepoints <- Arguments$getLogical(changepoints); + changepoints <- Arguments$getLogical(changepoints) # Argument 'grid': - grid <- Arguments$getLogical(grid); + grid <- Arguments$getLogical(grid) # Argument 'xlim': if (!is.null(xlim)) { - xlim <- Arguments$getNumerics(xlim, length=c(2,2)); + xlim <- Arguments$getNumerics(xlim, length=c(2,2)) } # Argument 'xScale': - xScale <- Arguments$getNumeric(xScale, range=c(0,Inf)); + xScale <- Arguments$getNumeric(xScale, range=c(0,Inf)) # Argument 'add': - add <- Arguments$getLogical(add); + add <- Arguments$getLogical(add) # Argument 'Clim': if (!add) { Clim <- Arguments$getNumerics(Clim, length=c(2L,2L), - disallow=c("Inf", "NA", "NaN")); + disallow=c("Inf", "NA", "NaN")) } # Argument 'subplots': - subplots <- Arguments$getLogical(subplots); + subplots <- Arguments$getLogical(subplots) # Argument 'verbose': - verbose <- Arguments$getVerbose(verbose); + verbose <- Arguments$getVerbose(verbose) if (verbose) { - pushState(verbose); - on.exit(popState(verbose)); + pushState(verbose) + on.exit(popState(verbose)) } - verbose && enter(verbose, "Plotting PSCN panels"); + verbose && enter(verbose, "Plotting PSCN panels") # Extract the input data - data <- getLocusData(fit); + data <- getLocusData(fit) if (is.null(data)) { - throw("Cannot plot segmentation results. No input data available."); + throw("Cannot plot segmentation results. No input data available.") } - chromosomes <- getChromosomes(fit); - chromosome <- chromosomes[1]; - x <- data$x; - CT <- data$CT; - rho <- data$rho; - betaT <- data$betaT; - betaN <- data$betaN; - betaTN <- data$betaTN; - muN <- data$muN; + chromosomes <- getChromosomes(fit) + chromosome <- chromosomes[1] + x <- data$x + CT <- data$CT + rho <- data$rho + betaT <- data$betaT + betaN <- data$betaN + betaTN <- data$betaTN + muN <- data$muN rho <- data$rho hasDH <- !is.null(rho) if (hasDH) { @@ -90,51 +90,51 @@ # If 'rho' is not available, recalculate it from tumor BAFs. # NOTE: This should throw an error in the future. /HB 2013-10-25 if (!hasDH) { - rho <- rep(NA_real_, times=nbrOfLoci); - rho[isHet] <- 2*abs(betaTN[isHet]-1/2); - warning(sprintf("Locus-level DH signals ('rho') were not available in the %s object and therefore recalculated from the TumorBoost-normalized tumor BAFs ('betaTN').", class(fit)[1L])); + rho <- rep(NA_real_, times=nbrOfLoci) + rho[isHet] <- 2*abs(betaTN[isHet]-1/2) + warning(sprintf("Locus-level DH signals ('rho') were not available in the %s object and therefore recalculated from the TumorBoost-normalized tumor BAFs ('betaTN').", class(fit)[1L])) } # Extract the segmentation - segs <- as.data.frame(fit); + segs <- as.data.frame(fit) # Identify available calls if (!is.null(calls)) { - verbose && enter(verbose, "Identifying calls"); + verbose && enter(verbose, "Identifying calls") - pattern <- "Call$"; - callColumns <- grep(pattern, colnames(segs), value=TRUE); + pattern <- "Call$" + callColumns <- grep(pattern, colnames(segs), value=TRUE) if (length(callColumns) > 0) { keep <- sapply(calls, FUN=function(pattern) { - (regexpr(pattern, callColumns) != -1); - }); + (regexpr(pattern, callColumns) != -1) + }) if (is.matrix(keep)) { - keep <- rowAnys(keep); + keep <- rowAnys(keep) } - callColumns <- callColumns[keep]; - callLabels <- gsub(pattern, "", callColumns); - callLabels <- toupper(callLabels); + callColumns <- callColumns[keep] + callLabels <- gsub(pattern, "", callColumns) + callLabels <- toupper(callLabels) } - verbose && cat(verbose, "Call columns:"); - verbose && print(verbose, callColumns); + verbose && cat(verbose, "Call columns:") + verbose && print(verbose, callColumns) - verbose && exit(verbose); + verbose && exit(verbose) } else { - callColumns <- NULL; + callColumns <- NULL } if (chromosome != 0) { - chrTag <- sprintf("Chr%02d", chromosome); + chrTag <- sprintf("Chr%02d", chromosome) } else { - chrTag <- ""; + chrTag <- "" } if (xScale != 1) { - x <- xScale * x; + x <- xScale * x if (is.null(xlim)) { - xlim <- range(x, na.rm=TRUE); + xlim <- range(x, na.rm=TRUE) } else { - xlim <- xScale * xlim; + xlim <- xScale * xlim } } @@ -142,10 +142,10 @@ # - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - # Number of panels? # - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - nbrOfPanels <- length(panels); + nbrOfPanels <- length(panels) if (subplots) { - subplots(nbrOfPanels, ncol=1); - par(mar=c(1,4,1,2)+1); + subplots(nbrOfPanels, ncol=1) + par(mar=c(1,4,1,2)+1) } # Color loci by genotype @@ -160,149 +160,149 @@ # For each panel... # - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - for (pp in seq_len(nbrOfPanels)) { - panel <- panels[pp]; + panel <- panels[pp] verbose && enter(verbose, sprintf("Panel #%d ('%s') of %d", - pp, panel, length(panels))); + pp, panel, length(panels))) # - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - # Setup empty plot # - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - if (!add) { - verbose && enter(verbose, "Creating empty plot"); + verbose && enter(verbose, "Creating empty plot") - tracks <- strsplit(panel, split=",", fixed=TRUE)[[1]]; - tracks <- gsub("[-*]", "", tracks); + tracks <- strsplit(panel, split=",", fixed=TRUE)[[1]] + tracks <- gsub("[-*]", "", tracks) # Defaults - ylim <- Clim; - ylab <- paste(toupper(tracks), collapse=", "); + ylim <- Clim + ylab <- paste(toupper(tracks), collapse=", ") if (any(is.element(c("betaN", "betaT", "betaTN", "dh"), tracks))) { - ylim <- Blim; + ylim <- Blim } - verbose && cat(verbose, "ylim:"); - verbose && print(verbose, ylim); + verbose && cat(verbose, "ylim:") + verbose && print(verbose, ylim) - plot(NA, xlim=xlim, ylim=ylim, ylab=ylab); + plot(NA, xlim=xlim, ylim=ylim, ylab=ylab) # Geometrical annotations - stext(side=3, pos=1, chrTag); + stext(side=3, pos=1, chrTag) if (grid) { - abline(h=seq(from=0, to=ylim[2], by=2), lty=3, col="gray"); - abline(h=0, lty=1, col="black"); + abline(h=seq(from=0, to=ylim[2], by=2), lty=3, col="gray") + abline(h=0, lty=1, col="black") } - verbose && exit(verbose); + verbose && exit(verbose) } # - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - # Scatter tracks # - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - tracks <- strsplit(panel, split=",", fixed=TRUE)[[1]]; - keep <- grep("-", tracks, fixed=TRUE, invert=TRUE); - tracks <- tracks[keep]; - - verbose && cat(verbose, "Number tracks with scatter: ", length(tracks)); - verbose && cat(verbose, "Tracks with scatter:"); - verbose && print(verbose, tracks); - nbrOfTracks <- length(tracks); + tracks <- strsplit(panel, split=",", fixed=TRUE)[[1]] + keep <- grep("-", tracks, fixed=TRUE, invert=TRUE) + tracks <- tracks[keep] + + verbose && cat(verbose, "Number tracks with scatter: ", length(tracks)) + verbose && cat(verbose, "Tracks with scatter:") + verbose && print(verbose, tracks) + nbrOfTracks <- length(tracks) for (tt in seq_len(nbrOfTracks)) { - track <- tracks[tt]; + track <- tracks[tt] verbose && enter(verbose, sprintf("Scatter track #%d ('%s') of %d", - tt, track, nbrOfTracks)); - track <- gsub("[-*]", "", track); + tt, track, nbrOfTracks)) + track <- gsub("[-*]", "", track) # Defaults - colT <- if (is.null(col)) "black" else col; + colT <- if (is.null(col)) "black" else col if (track == "tcn") { - y <- CT; + y <- CT } else if (track == "c1") { - y <- 1/2*(1-rho)*CT; + y <- 1/2*(1-rho)*CT } else if (track == "c2") { - y <- 1/2*(1+rho)*CT; + y <- 1/2*(1+rho)*CT } else if (track == "betaN") { - y <- betaN; - colT <- if (is.null(col)) colMu else col; - ylab <- expression(BAF[N]); - ylim <- Blim; + y <- betaN + colT <- if (is.null(col)) colMu else col + ylab <- expression(BAF[N]) + ylim <- Blim } else if (track == "betaT") { - y <- betaT; - colT <- if (is.null(col)) colMu else col; - ylab <- expression(BAF[T]); - ylim <- Blim; + y <- betaT + colT <- if (is.null(col)) colMu else col + ylab <- expression(BAF[T]) + ylim <- Blim } else if (track == "betaTN") { - y <- betaTN; - colT <- if (is.null(col)) colMu else col; - ylab <- expression(BAF[T]^"*"); - ylim <- Blim; + y <- betaTN + colT <- if (is.null(col)) colMu else col + ylab <- expression(BAF[T]^"*") + ylim <- Blim } else if (track == "dh") { - y <- rho; - colT <- if (is.null(col)) colMu[isHet] else col; - ylab <- "DH"; - ylim <- Blim; + y <- rho + colT <- if (is.null(col)) colMu[isHet] else col + ylab <- "DH" + ylim <- Blim } else { - y <- NULL; + y <- NULL } # Nothing to do? if (!is.null(y)) { - points(x, y, pch=pch, cex=cex, col=colT); + points(x, y, pch=pch, cex=cex, col=colT) } - verbose && exit(verbose); + verbose && exit(verbose) } # for (tt ...) # - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - # Segment tracks # - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - tracks <- strsplit(panel, split=",", fixed=TRUE)[[1]]; - keep <- grep("*", tracks, fixed=TRUE); - tracks <- gsub("[-*]", "", tracks[keep]); + tracks <- strsplit(panel, split=",", fixed=TRUE)[[1]] + keep <- grep("*", tracks, fixed=TRUE) + tracks <- gsub("[-*]", "", tracks[keep]) # Keep only supported tracks - tracksWithLevels <- c("tcn", "betaTN", "c1", "c2", "dh"); - stopifnot(all(is.element(tracks, tracksWithLevels))); - tracks <- intersect(tracks, tracksWithLevels); - - verbose && cat(verbose, "Number tracks with levels: ", length(tracks)); - verbose && cat(verbose, "Tracks with levels:"); - verbose && print(verbose, tracks); - nbrOfTracks <- length(tracks); + tracksWithLevels <- c("tcn", "betaTN", "c1", "c2", "dh") + .stop_if_not(all(is.element(tracks, tracksWithLevels))) + tracks <- intersect(tracks, tracksWithLevels) + + verbose && cat(verbose, "Number tracks with levels: ", length(tracks)) + verbose && cat(verbose, "Tracks with levels:") + verbose && print(verbose, tracks) + nbrOfTracks <- length(tracks) for (tt in seq_len(nbrOfTracks)) { - track <- tracks[tt]; + track <- tracks[tt] verbose && enter(verbose, sprintf("Level track #%d ('%s') of %d", - tt, track, nbrOfTracks)); + tt, track, nbrOfTracks)) if (track == "tcn") { - colT <- "purple"; + colT <- "purple" } else if (track == "c1") { - colT <- "blue"; + colT <- "blue" } else if (track == "c2") { - colT <- "red"; + colT <- "red" } else if (track == "betaTN") { - colT <- "orange"; + colT <- "orange" } else if (track == "dh") { - colT <- "orange"; + colT <- "orange" } else { - colT <- if (is.null(col)) "black" else col; + colT <- if (is.null(col)) "black" else col } # Nothing to do? if (track != "betaTN") { drawConfidenceBands(fit, what=track, quantiles=quantiles, - col=colT, xScale=xScale); + col=colT, xScale=xScale) } - drawLevels(fit, what=track, col=colT, lwd=lwd, xScale=xScale); + drawLevels(fit, what=track, col=colT, lwd=lwd, xScale=xScale) - verbose && exit(verbose); + verbose && exit(verbose) } # for (tt ...) @@ -310,11 +310,11 @@ # Draw change points? # - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - if (changepoints) { - segs <- as.data.frame(fit); - xStarts <- segs[,"tcnStart"]; - xEnds <- segs[,"tcnEnd"]; - xs <- sort(unique(c(xStarts, xEnds))); - abline(v=xScale*xs, lty=1, col="gray"); + segs <- as.data.frame(fit) + xStarts <- segs[,"tcnStart"] + xEnds <- segs[,"tcnEnd"] + xs <- sort(unique(c(xStarts, xEnds))) + abline(v=xScale*xs, lty=1, col="gray") } @@ -323,53 +323,39 @@ # - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - if (length(callColumns) > 0) { for (cc in seq_along(callColumns)) { - callColumn <- callColumns[cc]; - callLabel <- callLabels[cc]; + callColumn <- callColumns[cc] + callLabel <- callLabels[cc] verbose && enter(verbose, sprintf("Call #%d ('%s') of %d", - cc, callLabel, length(callColumns))); + cc, callLabel, length(callColumns))) - verbose && cat(verbose, "Column: ", callColumn); + verbose && cat(verbose, "Column: ", callColumn) - segsT <- segs[,c("dhStart", "dhEnd", callColumn)]; - isCalled <- which(segsT[[callColumn]]); - segsT <- segsT[isCalled,1:2,drop=FALSE]; + segsT <- segs[,c("dhStart", "dhEnd", callColumn)] + isCalled <- which(segsT[[callColumn]]) + segsT <- segsT[isCalled,1:2,drop=FALSE] verbose && printf(verbose, "Number of segments called %s: %d\n", - callLabel, nrow(segsT)); - segsT <- xScale * segsT; + callLabel, nrow(segsT)) + segsT <- xScale * segsT - verbose && str(verbose, segsT); + verbose && str(verbose, segsT) - side <- 2*((cc+1) %% 2) + 1; + side <- 2*((cc+1) %% 2) + 1 # For each segment called... for (ss in seq_len(nrow(segsT))) { - x0 <- segsT[ss,1,drop=TRUE]; - x1 <- segsT[ss,2,drop=TRUE]; - abline(v=c(x0,x1), lty=3, col="gray"); - xMid <- (x0+x1)/2; - mtext(side=side, at=xMid, line=-1, cex=0.7, col="#666666", callLabel); + x0 <- segsT[ss,1,drop=TRUE] + x1 <- segsT[ss,2,drop=TRUE] + abline(v=c(x0,x1), lty=3, col="gray") + xMid <- (x0+x1)/2 + mtext(side=side, at=xMid, line=-1, cex=0.7, col="#666666", callLabel) } # for (ss in ...) - verbose && exit(verbose); + verbose && exit(verbose) } # for (cc in ...) } # if (length(callColumns) > 0) - verbose && exit(verbose); + verbose && exit(verbose) } # for (pp ...) - verbose && exit(verbose); + verbose && exit(verbose) - invisible(); + invisible() }, protected=TRUE) # plotTracks2() - - - -############################################################################ -# HISTORY: -# 2011-09-30 -# o BUG FIX: plotTracks2(..., panels="dh") gave an error due to a -# forgotten assigment. -# 2011-06-14 -# o Updated code to recognize new column names. -# 2011-01-19 -# o Added plotTracks2(). Completely rewritten plotTracks(). -# o Created. -############################################################################ diff -Nru r-cran-pscbs-0.63.0/R/PairedPSCBS.PLOT,many.R r-cran-pscbs-0.64.0/R/PairedPSCBS.PLOT,many.R --- r-cran-pscbs-0.63.0/R/PairedPSCBS.PLOT,many.R 2017-06-28 14:32:39.000000000 +0000 +++ r-cran-pscbs-0.64.0/R/PairedPSCBS.PLOT,many.R 2018-08-12 21:30:44.000000000 +0000 @@ -12,14 +12,14 @@ # Local functions # - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - attachGH <- function(gh, envir=parent.frame()) { - if (!is.list(gh)) return(); - if (is.null(gh$track)) return(); - if (!is.null(value <- gh$track)) assign("track", value, envir=envir); - if (!is.null(value <- gh$subtracks)) assign("trackT", value, envir=envir); - if (!is.null(value <- gh$scatter$col)) assign("colS", value, envir=envir); - if (!is.null(value <- gh$scatter$pch)) assign("pchT", value, envir=envir); - if (!is.null(value <- gh$level$col)) assign("colL", value, envir=envir); - if (!is.null(value <- gh$cis$col)) assign("colC", value, envir=envir); + if (!is.list(gh)) return() + if (is.null(gh$track)) return() + if (!is.null(value <- gh$track)) assign("track", value, envir=envir) + if (!is.null(value <- gh$subtracks)) assign("trackT", value, envir=envir) + if (!is.null(value <- gh$scatter$col)) assign("colS", value, envir=envir) + if (!is.null(value <- gh$scatter$pch)) assign("pchT", value, envir=envir) + if (!is.null(value <- gh$level$col)) assign("colL", value, envir=envir) + if (!is.null(value <- gh$cis$col)) assign("colC", value, envir=envir) } # attachGH() @@ -33,52 +33,52 @@ level = list(lty=1L, col=c("black", tcn="purple", c1="blue", c2="red", dh="orange")), callLevel = list(lty=1L, col=c("#666666")), knownSegment = list(lty=1L, col=c("#aaaaaa")) - ); + ) getOptionValue <- function(option, what, track, ...) { - values <- opts[[option]][[what]]; - value <- values[track]; - if (is.na(value)) value <- values[1L]; - unname(value); + values <- opts[[option]][[what]] + value <- values[track] + if (is.na(value)) value <- values[1L] + unname(value) } # getOptionValue() getScatterColor <- function(track, ...) { - getOptionValue("scatter", "col", track, ...); + getOptionValue("scatter", "col", track, ...) } # getScatterColor() getLevelColor <- function(track, ...) { - getOptionValue("level", "col", track, ...); + getOptionValue("level", "col", track, ...) } # getLevelColor() getCallScatterColor <- function(track, ...) { - getOptionValue("callScatter", "col", track, ...); + getOptionValue("callScatter", "col", track, ...) } # getCallScatterColor() getCallLevelColor <- function(track, ...) { - getOptionValue("callLevel", "col", track, ...); + getOptionValue("callLevel", "col", track, ...) } # getCallLevelColor() getCallLevelLty <- function(track, ...) { - getOptionValue("callLevel", "lty", track, ...); + getOptionValue("callLevel", "lty", track, ...) } # getCallLevelColor() getCIColor <- function(track, ...) { - getLevelColor(track, ...); + getLevelColor(track, ...) } # getLevelColor() getKnownSegmentColor <- function(track, ...) { - getOptionValue("knownSegment", "col", track, ...); + getOptionValue("knownSegment", "col", track, ...) } # getKnownSegmentColor() getKnownSegmentLty <- function(track, ...) { - getOptionValue("knownSegment", "lty", track, ...); + getOptionValue("knownSegment", "lty", track, ...) } # getKnownSegmentColor() drawXLabelTicks <- function() { if (identical(xlabTicks, "[chr]")) { - mtext(text=chrTags, side=rep(c(1,3), length.out=length(chrTags)), at=mids, line=0.1, cex=0.7); + mtext(text=chrTags, side=rep(c(1,3), length.out=length(chrTags)), at=mids, line=0.1, cex=0.7) } else if (identical(xlabTicks, "[pos]")) { - axis(side=1L); + axis(side=1L) } } @@ -89,144 +89,144 @@ # Argument 'chromosomes': if (!is.null(chromosomes)) { - disallow <- c("NaN", "Inf"); - chromosomes <- Arguments$getIntegers(chromosomes, range=c(0,Inf), disallow=disallow); - stopifnot(is.element(chromosomes, getChromosomes(fit))); + disallow <- c("NaN", "Inf") + chromosomes <- Arguments$getIntegers(chromosomes, range=c(0,Inf), disallow=disallow) + .stop_if_not(all(is.element(chromosomes, getChromosomes(fit)))) } # Argument 'tracks': knownTracks <- c("tcn", "dh", "tcn,c1,c2", "c1,c2", "c1", "c2", - "betaN", "betaT", "betaTN"); - defaultTracks <- knownTracks[1:3]; + "betaN", "betaT", "betaTN") + defaultTracks <- knownTracks[1:3] if (is.null(tracks)) { - tracks <- defaultTracks; + tracks <- defaultTracks } else { - tracks <- match.arg(tracks, choices=knownTracks, several.ok=TRUE); - tracks <- unique(tracks); + tracks <- match.arg(tracks, choices=knownTracks, several.ok=TRUE) + tracks <- unique(tracks) } # Argument 'scatter': if (!is.null(scatter)) { - scatter <- Arguments$getCharacter(scatter); + scatter <- Arguments$getCharacter(scatter) if (scatter == "*") { - scatter <- tracks; + scatter <- tracks } else { - scatterT <- strsplit(scatter, split=",", fixed=TRUE); - tracksT <- strsplit(tracks, split=",", fixed=TRUE); - stopifnot(all(is.element(scatterT, tracksT))); + scatterT <- strsplit(scatter, split=",", fixed=TRUE) + tracksT <- strsplit(tracks, split=",", fixed=TRUE) + .stop_if_not(all(is.element(scatterT, tracksT))) # Not needed anymore - scatterT <- tracksT <- NULL; + scatterT <- tracksT <- NULL } } # Argument 'calls': if (!is.null(calls)) { - calls <- sapply(calls, FUN=Arguments$getRegularExpression); + calls <- sapply(calls, FUN=Arguments$getRegularExpression) } # Argument 'callLoci': - callLoci <- Arguments$getLogical(callLoci); + callLoci <- Arguments$getLogical(callLoci) # Argument 'callThresholds': - callThresholds <- Arguments$getLogical(callThresholds); + callThresholds <- Arguments$getLogical(callThresholds) # Argument 'boundaries': - boundaries <- Arguments$getLogical(boundaries); + boundaries <- Arguments$getLogical(boundaries) # Argument 'knownSegments': - knownSegments <- Arguments$getLogical(knownSegments); + knownSegments <- Arguments$getLogical(knownSegments) # Argument 'add': - add <- Arguments$getLogical(add); + add <- Arguments$getLogical(add) # Argument 'Clim' & 'Blim': if (!add) { Clim <- Arguments$getNumerics(Clim, length=c(2L,2L), - disallow=c("Inf", "NA", "NaN")); + disallow=c("Inf", "NA", "NaN")) Blim <- Arguments$getNumerics(Blim, length=c(2L,2L), - disallow=c("Inf", "NA", "NaN")); + disallow=c("Inf", "NA", "NaN")) } # Argument 'xScale': - xScale <- Arguments$getNumeric(xScale, range=c(0,Inf)); + xScale <- Arguments$getNumeric(xScale, range=c(0,Inf)) # Argument 'xlabTicks': if (!is.null(xlabTicks)) { - xlabTicks <- Arguments$getCharacter(xlabTicks); + xlabTicks <- Arguments$getCharacter(xlabTicks) } # Argument 'subset': if (!is.null(subset)) { - subset <- Arguments$getDouble(subset); + subset <- Arguments$getDouble(subset) } # Argument 'verbose': - verbose <- Arguments$getVerbose(verbose); + verbose <- Arguments$getVerbose(verbose) if (verbose) { - pushState(verbose); - on.exit(popState(verbose)); + pushState(verbose) + on.exit(popState(verbose)) } - verbose && enter(verbose, "Plotting PSCN tracks"); + verbose && enter(verbose, "Plotting PSCN tracks") # - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - # Subset by chromosomes # - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - if (!is.null(chromosomes)) { - verbose && enter(verbose, "Plotting a subset of the chromosomes"); - fit <- extractChromosomes(fit, chromosomes=chromosomes, verbose=verbose); - verbose && exit(verbose); + verbose && enter(verbose, "Plotting a subset of the chromosomes") + fit <- extractChromosomes(fit, chromosomes=chromosomes, verbose=verbose) + verbose && exit(verbose) } # - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - # Tile chromosomes # - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - fit <- tileChromosomes(fit, verbose=verbose); - verbose && str(verbose, fit); + fit <- tileChromosomes(fit, verbose=verbose) + verbose && str(verbose, fit) # Extract the input data - data <- getLocusData(fit); + data <- getLocusData(fit) if (is.null(data)) { - throw("Cannot plot segmentation results. No input data available."); + throw("Cannot plot segmentation results. No input data available.") } # Extract the segmentation - segs <- as.data.frame(fit); + segs <- as.data.frame(fit) # Identify available calls - callData <- NULL; + callData <- NULL if (!is.null(calls) || callThresholds) { - verbose && enter(verbose, "Identifying calls"); + verbose && enter(verbose, "Identifying calls") - pattern <- "Call$"; - allCallColumns <- grep(pattern, colnames(segs), value=TRUE); - allCallLabels <- toupper(gsub(pattern, "", allCallColumns)); - verbose && cat(verbose, "Call columns:"); - verbose && print(verbose, allCallColumns); + pattern <- "Call$" + allCallColumns <- grep(pattern, colnames(segs), value=TRUE) + allCallLabels <- toupper(gsub(pattern, "", allCallColumns)) + verbose && cat(verbose, "Call columns:") + verbose && print(verbose, allCallColumns) if (!is.null(calls)) { - callColumns <- allCallColumns; + callColumns <- allCallColumns if (length(callColumns) > 0L) { keep <- sapply(calls, FUN=function(pattern) { - (regexpr(pattern, callColumns) != -1L); - }); + (regexpr(pattern, callColumns) != -1L) + }) if (is.matrix(keep)) { - keep <- rowAnys(keep); + keep <- rowAnys(keep) } - callColumns <- callColumns[keep]; - callLabels <- allCallLabels[keep]; + callColumns <- callColumns[keep] + callLabels <- allCallLabels[keep] # Annotate individual loci by calls? if (callLoci) { - callData <- extractCallsByLocus(fit, verbose=less(verbose,5)); + callData <- extractCallsByLocus(fit, verbose=less(verbose,5)) } } - verbose && cat(verbose, "Call to be annotated:"); - verbose && print(verbose, callColumns); + verbose && cat(verbose, "Call to be annotated:") + verbose && print(verbose, callColumns) } - verbose && exit(verbose); + verbose && exit(verbose) } @@ -242,211 +242,211 @@ } # (b) Subset - n <- nrow(data); - keep <- sample(n, size=subset*n); - data <- data[keep,]; + n <- nrow(data) + keep <- sample(n, size=subset*n) + data <- data[keep,] if (!is.null(callData)) { - callData <- callData[keep,]; + callData <- callData[keep,] } } # To please R CMD check - CT <- rho <- muN <- betaT <- betaN <- betaTN <- rho <- NULL; - rm(list=c("CT", "rho", "muN", "betaT", "betaN", "betaTN")); - attachLocally(data); + CT <- rho <- muN <- betaT <- betaN <- betaTN <- rho <- NULL + rm(list=c("CT", "rho", "muN", "betaT", "betaN", "betaTN")) + attachLocally(data) # Calculate (C1,C2) - C1 <- 1/2*(1-rho)*CT; - C2 <- CT - C1; + C1 <- 1/2*(1-rho)*CT + C2 <- CT - C1 # BACKWARD COMPATIBILITY: # If 'rho' is not available, recalculate it from tumor BAFs. # NOTE: This should throw an error in the future. /HB 2013-10-25 if (is.null(data$rho)) { - isSnp <- (!is.na(betaTN) & !is.na(muN)); - isHet <- isSnp & (muN == 1/2); - rho <- rep(NA_real_, times=nbrOfLoci); - rho[isHet] <- 2*abs(betaTN[isHet]-1/2); - warning(sprintf("Locus-level DH signals ('rho') were not available in the %s object and therefore recalculated from the TumorBoost-normalized tumor BAFs ('betaTN').", class(fit)[1L])); + isSnp <- (!is.na(betaTN) & !is.na(muN)) + isHet <- isSnp & (muN == 1/2) + rho <- rep(NA_real_, times=nbrOfLoci) + rho[isHet] <- 2*abs(betaTN[isHet]-1/2) + warning(sprintf("Locus-level DH signals ('rho') were not available in the %s object and therefore recalculated from the TumorBoost-normalized tumor BAFs ('betaTN').", class(fit)[1L])) } - x <- xScale * x; - vs <- xScale * fit$chromosomeStats[,1:2,drop=FALSE]; - mids <- (vs[,1]+vs[,2])/2; + x <- xScale * x + vs <- xScale * fit$chromosomeStats[,1:2,drop=FALSE] + mids <- (vs[,1]+vs[,2])/2 - nbrOfLoci <- length(x); - chrTags <- sprintf("Chr%02d", chromosomes); + nbrOfLoci <- length(x) + chrTags <- sprintf("Chr%02d", chromosomes) if (subplots) { - subplots(length(tracks), ncol=1L); - par(oma=oma, mar=mar); + subplots(length(tracks), ncol=1L) + par(oma=oma, mar=mar) } - pchT <- if (!is.null(scatter)) { pch } else { NA }; - xlim <- range(x, na.rm=TRUE); - xlab <- "Genomic position"; + pchT <- if (!is.null(scatter)) { pch } else { NA } + xlim <- range(x, na.rm=TRUE) + xlab <- "Genomic position" # Graphical handle - gh <- list(fit=fit); - gh$xScale <- xScale; - gh$xlim <- xlim; - gh$xlab <- xlab; + gh <- list(fit=fit) + gh$xScale <- xScale + gh$xlim <- xlim + gh$xlab <- xlab if (!is.null(callData)) { - gh$callsByLocus <- callData; + gh$callsByLocus <- callData } for (tt in seq_along(tracks)) { - track <- tracks[tt]; + track <- tracks[tt] verbose && enter(verbose, sprintf("Track #%d ('%s') of %d", - tt, track, length(tracks))); + tt, track, length(tracks))) # Get graphical style parameters. - tracksT <- unlist(strsplit(track, split=",", fixed=TRUE)); - colS <- sapply(tracksT, FUN=getScatterColor); - colL <- sapply(tracksT, FUN=getLevelColor); - colC <- sapply(tracksT, FUN=getCIColor); + tracksT <- unlist(strsplit(track, split=",", fixed=TRUE)) + colS <- sapply(tracksT, FUN=getScatterColor) + colL <- sapply(tracksT, FUN=getLevelColor) + colC <- sapply(tracksT, FUN=getCIColor) # Color scatter plot according to calls? if (!is.null(calls) && callLoci && length(callColumns) > 0L) { - colsT <- rep(colS[1L], times=nrow(callData)); + colsT <- rep(colS[1L], times=nrow(callData)) for (cc in seq_along(callColumns)) { - callColumn <- callColumns[cc]; - callLabel <- callLabels[cc]; + callColumn <- callColumns[cc] + callLabel <- callLabels[cc] verbose && enter(verbose, sprintf("Call #%d ('%s') of %d", - cc, callLabel, length(callColumns))); + cc, callLabel, length(callColumns))) - verbose && cat(verbose, "Column: ", callColumn); + verbose && cat(verbose, "Column: ", callColumn) - skip <- TRUE; + skip <- TRUE if (regexpr("tcn", track) != -1L) { - skip <- !is.element(callLabel, c("LOSS", "NTCN", "GAIN", "LOH")); + skip <- !is.element(callLabel, c("LOSS", "NTCN", "GAIN", "LOH")) } else if (track == "dh") { - skip <- !is.element(callLabel, c("AB", "LOH")); + skip <- !is.element(callLabel, c("AB", "LOH")) } if (skip) { - verbose && exit(verbose); - next; + verbose && exit(verbose) + next } - callsCC <- callData[[callColumn]]; - idxs <- which(callsCC); + callsCC <- callData[[callColumn]] + idxs <- which(callsCC) # Nothing to do? if (length(idxs) == 0L) { - verbose && exit(verbose); - next; + verbose && exit(verbose) + next } - callCol <- getCallScatterColor(callLabel); + callCol <- getCallScatterColor(callLabel) - colsT[idxs] <- callCol; + colsT[idxs] <- callCol } # for (cc in ...) - colS <- colsT; + colS <- colsT } # if (!is.null(calls)) # Assign graphical-handle parameters - gh$track <- track; - gh$subtracks <- tracksT; - gh$scatter <- list(col=colS, pch=pchT); - gh$level <- list(col=colL); - gh$cis <- list(col=colC); + gh$track <- track + gh$subtracks <- tracksT + gh$scatter <- list(col=colS, pch=pchT) + gh$level <- list(col=colL) + gh$cis <- list(col=colC) if (track == "tcn") { - plot(NA, xlim=xlim, ylim=Clim, xlab=xlab, ylab="TCN", axes=FALSE); - if (!is.null(onBegin)) attachGH(onBegin(gh=gh)); + plot(NA, xlim=xlim, ylim=Clim, xlab=xlab, ylab="TCN", axes=FALSE) + if (!is.null(onBegin)) attachGH(onBegin(gh=gh)) if (!is.na(pchT)) { - points(x, CT, pch=pchT, col=colS); + points(x, CT, pch=pchT, col=colS) } - drawConfidenceBands(fit, what="tcn", quantiles=quantiles, col=colC["tcn"], xScale=xScale); - drawLevels(fit, what="tcn", col=colL, xScale=xScale); + drawConfidenceBands(fit, what="tcn", quantiles=quantiles, col=colC["tcn"], xScale=xScale) + drawLevels(fit, what="tcn", col=colL, xScale=xScale) } if (is.element(track, c("tcn,c1,c2", "c1,c2", "c1", "c2"))) { - tracksT <- unlist(strsplit(track, split=",", fixed=TRUE)); - plot(NA, xlim=xlim, ylim=Clim, xlab=xlab, ylab="C1, C2, TCN", axes=FALSE); - if (!is.null(onBegin)) attachGH(onBegin(gh=gh)); + tracksT <- unlist(strsplit(track, split=",", fixed=TRUE)) + plot(NA, xlim=xlim, ylim=Clim, xlab=xlab, ylab="C1, C2, TCN", axes=FALSE) + if (!is.null(onBegin)) attachGH(onBegin(gh=gh)) # Draw scatter for TCN or C1 and C2. if (!is.na(pchT)) { if (is.element("tcn", tracksT)) { - points(x, CT, pch=pchT, col=colS); + points(x, CT, pch=pchT, col=colS) } else { if (is.element("c1", tracksT)) { - points(x, C1, pch=pchT, col=colS); + points(x, C1, pch=pchT, col=colS) } if (is.element("c2", tracksT)) { - points(x, C2, pch=pchT, col=colS); + points(x, C2, pch=pchT, col=colS) } } } # Draw confidence bands for TCN, C1, C2. if (is.element("tcn", tracksT)) { - drawConfidenceBands(fit, what="tcn", quantiles=quantiles, col=colC["tcn"], xScale=xScale); + drawConfidenceBands(fit, what="tcn", quantiles=quantiles, col=colC["tcn"], xScale=xScale) } if (is.element("c2", tracksT)) { - drawConfidenceBands(fit, what="c2", quantiles=quantiles, col=colC["c2"], xScale=xScale); + drawConfidenceBands(fit, what="c2", quantiles=quantiles, col=colC["c2"], xScale=xScale) } if (is.element("c1", tracksT)) { - drawConfidenceBands(fit, what="c1", quantiles=quantiles, col=colC["c1"], xScale=xScale); + drawConfidenceBands(fit, what="c1", quantiles=quantiles, col=colC["c1"], xScale=xScale) } # Draw segment means for TCN, C1, C2. if (is.element("tcn", tracksT)) { - drawLevels(fit, what="tcn", col=colL["tcn"], xScale=xScale); + drawLevels(fit, what="tcn", col=colL["tcn"], xScale=xScale) } if (is.element("c2", tracksT)) { - drawLevels(fit, what="c2", col=colL["c2"], xScale=xScale); + drawLevels(fit, what="c2", col=colL["c2"], xScale=xScale) } if (is.element("tcn", tracksT)) { # In case C2 overlaps with TCN - drawLevels(fit, what="tcn", col=colL["tcn"], lty="22", xScale=xScale); + drawLevels(fit, what="tcn", col=colL["tcn"], lty="22", xScale=xScale) } # In case C1 overlaps with C2 if (is.element("c1", tracksT)) { - drawLevels(fit, what="c1", col=colL["c1"], xScale=xScale); + drawLevels(fit, what="c1", col=colL["c1"], xScale=xScale) if (is.element("c2", tracksT)) { - drawLevels(fit, what="c2", col=colL["c2"], lty="22", xScale=xScale); + drawLevels(fit, what="c2", col=colL["c2"], lty="22", xScale=xScale) } if (is.element("tcn", tracksT)) { - drawLevels(fit, what="tcn", col=colL["tcn"], lty="22", xScale=xScale); + drawLevels(fit, what="tcn", col=colL["tcn"], lty="22", xScale=xScale) } } } if (track == "betaN") { - plot(NA, xlim=xlim, ylim=Blim, xlab=xlab, ylab="BAF_N", axes=FALSE); - if (!is.null(onBegin)) attachGH(onBegin(gh=gh)); + plot(NA, xlim=xlim, ylim=Blim, xlab=xlab, ylab="BAF_N", axes=FALSE) + if (!is.null(onBegin)) attachGH(onBegin(gh=gh)) if (!is.na(pchT)) { - points(x, betaN, pch=pchT, col=colS); + points(x, betaN, pch=pchT, col=colS) } } if (track == "betaT") { - plot(NA, xlim=xlim, ylim=Blim, xlab=xlab, ylab="BAF_T", axes=FALSE); - if (!is.null(onBegin)) attachGH(onBegin(gh=gh)); + plot(NA, xlim=xlim, ylim=Blim, xlab=xlab, ylab="BAF_T", axes=FALSE) + if (!is.null(onBegin)) attachGH(onBegin(gh=gh)) if (!is.na(pchT)) { - points(x, betaT, pch=pchT, col=colS); + points(x, betaT, pch=pchT, col=colS) } } if (track == "betaTN") { - plot(NA, xlim=xlim, ylim=Blim, xlab=xlab, ylab="BAF_TN", axes=FALSE); - if (!is.null(onBegin)) attachGH(onBegin(gh=gh)); + plot(NA, xlim=xlim, ylim=Blim, xlab=xlab, ylab="BAF_TN", axes=FALSE) + if (!is.null(onBegin)) attachGH(onBegin(gh=gh)) if (!is.na(pchT)) { - points(x, betaTN, pch=pchT, col=colS); + points(x, betaTN, pch=pchT, col=colS) } } if (track == "dh") { - plot(NA, xlim=xlim, ylim=Blim, xlab=xlab, ylab="DH", axes=FALSE); - if (!is.null(onBegin)) attachGH(onBegin(gh=gh)); + plot(NA, xlim=xlim, ylim=Blim, xlab=xlab, ylab="DH", axes=FALSE) + if (!is.null(onBegin)) attachGH(onBegin(gh=gh)) if (!is.na(pchT)) { - points(x, rho, pch=pchT, col=colS); + points(x, rho, pch=pchT, col=colS) } - drawConfidenceBands(fit, what="dh", quantiles=quantiles, col=colC["dh"], xScale=xScale); - drawLevels(fit, what="dh", col=colL["dh"], xScale=xScale); + drawConfidenceBands(fit, what="dh", quantiles=quantiles, col=colC["dh"], xScale=xScale) + drawLevels(fit, what="dh", col=colL["dh"], xScale=xScale) } # - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - @@ -454,32 +454,32 @@ # - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - if (!is.null(calls) && !callLoci && length(callColumns) > 0L) { for (cc in seq_along(callColumns)) { - callColumn <- callColumns[cc]; - callLabel <- callLabels[cc]; + callColumn <- callColumns[cc] + callLabel <- callLabels[cc] verbose && enter(verbose, sprintf("Call #%d ('%s') of %d", - cc, callLabel, length(callColumns))); + cc, callLabel, length(callColumns))) - verbose && cat(verbose, "Column: ", callColumn); + verbose && cat(verbose, "Column: ", callColumn) - segsT <- segs[,c("dhStart", "dhEnd", callColumn)]; - isCalled <- which(segsT[[callColumn]]); - segsT <- segsT[isCalled,1:2,drop=FALSE]; + segsT <- segs[,c("dhStart", "dhEnd", callColumn)] + isCalled <- which(segsT[[callColumn]]) + segsT <- segsT[isCalled,1:2,drop=FALSE] verbose && printf(verbose, "Number of segments called %s: %d\n", - callLabel, nrow(segsT)); - segsT <- xScale * segsT; + callLabel, nrow(segsT)) + segsT <- xScale * segsT - verbose && str(verbose, segsT); + verbose && str(verbose, segsT) - side <- 2*((cc+1) %% 2) + 1; + side <- 2*((cc+1) %% 2) + 1 # For each segment called... for (ss in seq_len(nrow(segsT))) { - x0 <- segsT[ss,1,drop=TRUE]; - x1 <- segsT[ss,2,drop=TRUE]; - abline(v=c(x0,x1), lty=3, col="gray"); - xMid <- (x0+x1)/2; - mtext(side=side, at=xMid, line=-1, cex=0.7, col="#666666", callLabel); + x0 <- segsT[ss,1,drop=TRUE] + x1 <- segsT[ss,2,drop=TRUE] + abline(v=c(x0,x1), lty=3, col="gray") + xMid <- (x0+x1)/2 + mtext(side=side, at=xMid, line=-1, cex=0.7, col="#666666", callLabel) } # for (ss in ...) - verbose && exit(verbose); + verbose && exit(verbose) } # for (cc in ...) } # if (!is.null(calls)) @@ -489,200 +489,60 @@ # - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - if (callThresholds) { # Add call parameter estimates, e.g. deltaAB - colCL <- sapply(tracksT, FUN=getCallLevelColor); - ltyCL <- sapply(tracksT, FUN=getCallLevelLty); + colCL <- sapply(tracksT, FUN=getCallLevelColor) + ltyCL <- sapply(tracksT, FUN=getCallLevelLty) - trackT <- track; + trackT <- track for (cc in seq_along(allCallColumns)) { - callColumn <- allCallColumns[cc]; - callLabel <- allCallLabels[cc]; + callColumn <- allCallColumns[cc] + callLabel <- allCallLabels[cc] - h <- NULL; + h <- NULL if (callLabel == "AB") { if (track == "dh") { - h <- fit$params$deltaAB; - label <- expression(Delta[AB]); + h <- fit$params$deltaAB + label <- expression(Delta[AB]) } } else if (callLabel == "LOH") { if (regexpr("c1", track) != -1L) { - h <- fit$params$deltaLowC1; - label <- expression(Delta[LOH]); - trackT <- "c1"; + h <- fit$params$deltaLowC1 + label <- expression(Delta[LOH]) + trackT <- "c1" } } else if (callLabel == "NTCN") { if (track == "tcn") { - h <- fit$params$ntcnRange; - label <- c(expression(Delta[-NTCN]), expression(Delta[+NTCN])); + h <- fit$params$ntcnRange + label <- c(expression(Delta[-NTCN]), expression(Delta[+NTCN])) } } if (!is.null(h)) { - abline(h=h, lty=ltyCL[trackT], lwd=2, col=colCL[trackT]); + abline(h=h, lty=ltyCL[trackT], lwd=2, col=colCL[trackT]) for (ss in 1:2) { - side <- c(2,4)[ss]; - adj <- c(1.2,-0.2)[ss]; - mtext(side=side, at=h, label, adj=adj, las=2, xpd=TRUE); + side <- c(2,4)[ss] + adj <- c(1.2,-0.2)[ss] + mtext(side=side, at=h, label, adj=adj, las=2, xpd=TRUE) } } } # for (cc in ...) } # if (callThresholds) - drawXLabelTicks(); + drawXLabelTicks() if (boundaries) { - abline(v=vs, lty=1, lwd=2); + abline(v=vs, lty=1, lwd=2) } if (knownSegments) { - colT <- getKnownSegmentColor(); - ltyT <- getKnownSegmentLty(); - drawKnownSegments(fit, col=colT, lty=ltyT); + colT <- getKnownSegmentColor() + ltyT <- getKnownSegmentLty() + drawKnownSegments(fit, col=colT, lty=ltyT) } - axis(side=2); box(); - if (!is.null(onEnd)) onEnd(gh=gh); + axis(side=2); box() + if (!is.null(onEnd)) onEnd(gh=gh) - verbose && exit(verbose); + verbose && exit(verbose) } # for (tt ...) - verbose && exit(verbose); + verbose && exit(verbose) - invisible(gh); + invisible(gh) }, private=TRUE) # plotTracksManyChromosomes() - - - -############################################################################ -# HISTORY: -# 2013-10-28 -# o Now plotTracksManyChromosomes() also supports -# tracks=c("c1,c2", "c1", "c2"). -# 2013-10-25 -# o Now plotTracksManyChromosomes() uses the locus data field 'rho' -# when plotting DH locus-level data. It only recalculates it from -# the tumor BAFs if the DH signals are not available - if so a -# warning is generated. -# 2013-10-20 -# o BUG FIX: plotTracksManyChromosomes() for PairedPSCBS would use -# Blim=Clim, regardless of what argument 'Blim' was. -# 2013-04-13 -# o Added argument 'boundaries' to plotTracksManyChromosomes(). -# 2013-04-11 -# o BUG FIX: plotTracksManyChromosomes(..., callLoci=TRUE) would color -# loci incorrectly if more than one chromosome are plotted. -# 2013-04-05 -# o Now plotTracks() passes more information to onBegin(gh)/onEnd(gh) -# hooks via the graphical handle object, cf. str(gh). -# 2013-03-21 -# o Added argument 'knownSegments' to plotTracksManyChromosomes(). -# o Generalized plotTracksManyChromosomes() a bit such that it can be -# used for a single chromosome as well. All col and lty annotations -# are now specified at the very top of the function. -# 2013-03-18 -# o Now plotTracksManyChromosomes() draws AB and LOH call thresholds. -# 2012-09-23 -# o Now plotTracks() [and plotTracksManyChromosomes()] draws segment levels -# in TCN-C2-C1 order, and then goes back and draws C2 and TCN with dashed -# lines, just in case C1 is overlapping C2 and C2 is overlapping TCN. -# 2012-09-21 -# o ROBUSTNESS: Now drawChangePointsC1C2() and arrowsC1C2() for PairedPSCBS -# makes sure to retrieve segments with NA splitters between chromosomes -# and gaps. -# 2012-02-29 -# o BUG FIX: plotTracks(..., add=TRUE) for PairedPSCBS would add TCNs -# when BAFs and DHs where intended. -# 2012-02-22 -# o BUG FIX: Argument 'calls' of plotTracks() for PairedPSCBS was ignored -# if more than one chromosome was plotted. -# 2011-12-03 -# o Added drawChangePointsC1C2() for PairedPSCBS. -# o Added drawChangePoints() for PairedPSCBS. -# 2011-11-12 -# o Added argument col="#00000033" to plotC1C2() and linesC1C2(). -# o Added argument 'oma' and 'mar' to plotTracksManyChromosomes() for -# PairedPSCBS for setting graphical parameters when 'add' == FALSE. -# 2011-09-30 -# o GENERALIZATION: Now drawLevels() for PairedPSCBS allows for drawing -# segmentation results in 'betaT' space. -# 2011-07-10 -# o BUG FIX: tileChromosomes() for PairedPSCBS was still assuming the -# old naming convention of column names. -# o ROBUSTNESS: Fixed partial argument matchings in arrowsC1C2() and -# arrowsDeltaC1C2() for PairedPSCBS. -# 2011-06-14 -# o Updated code to recognize new column names. -# 2011-01-19 -# o Added argument 'subplots'. -# 2011-01-18 -# o DOCUMENTATION: Documented more plotTracks() arguments for PairedPSCBS. -# o Now plotTracks(..., add=TRUE) for PairedPSCBS plots to the current -# figure/panel. -# o Now plotTracks(..., add=FALSE) for PairedPSCBS only sets up subplots -# if argument 'tracks' specifies more than one panel. -# o Added argument 'col' to plotTracks() for PairedPSCBS. -# 2011-01-12 -# o Added argument 'changepoints' to plotTracks() for PairedPSCBS for -# highlightning change-point locations as vertical lines. -# 2010-12-01 -# o Now using a default 'seed' for plotTracksManyChromosomes() such -# that the scatter data in the plots are reproducible by default. -# 2010-11-27 -# o BUG FIX: plotTracksManyChromosomes() would annotate called regions -# incorrectly. -# o Added more verbouse output to plotTracksManyChromosomes(). -# o Added missing argument 'verbose' to plotTracksManyChromosomes(). -# o plotTracksManyChromosomes() gained argument 'scatter'. -# o REPRODUCIBILITY: plotTracksManyChromosomes() for PairedPSCBS gained -# argument 'seed', because if 'subset' is specified then a random -# subset of the data points are displayed. -# 2010-11-26 -# o Added optional argument 'chromosomes' to plotTracks() to plot a -# subset of all chromosomes. -# o Now the default confidence intervals for plotTracks() is (0.05,0.95), -# if existing. -# 2010-11-22 -# o ROBUSTNESS: Now drawConfidenceBands() of PairedPSCBS silently does -# nothing if the requested bootstrap quantiles are available. -# o Added argument 'calls' to plotTracks() and plotTracksManyChromosomes() -# for highlighing called regions. -# 2010-11-21 -# o Now plotTracks() supports tracks "tcn,c1", "tcn,c2" and "c1,c2" too. -# o Added argument 'xlim' to plotTracks() making it possible to zoom in. -# o Now plotTracks() and plotTracksManyChromosomes() draws confidence -# bands, iff argument quantiles is given. -# o Added drawConfidenceBands() for PairedPSCBS. -# 2010-11-09 -# o Added argument 'cex=1' to plotTracks(). -# o BUG FIX: It was not possible to plot BAF tracks with plotTracks(). -# 2010-10-20 -# o Added arguments 'onBegin' and 'onEnd' to plotTracksManyChromosomes(). -# 2010-10-18 -# o Now plotTracks() can plot whole-genome data. -# o Now plotTracks() utilized plotTracksManyChromosomes() if there is -# more than one chromosome. -# o Added internal plotTracksManyChromosomes(). -# o Added internal tileChromosomes(). -# 2010-10-03 -# o Now the default is that plotTracks() for PairedPSCBS generated three -# panels: (1) TCN, (2) DH, and (3) C1+C2+TCN. -# o Added plotTracks() to be more explicit than just plot(). -# o Added argument 'xScale' to plot() for PairedPSCBS. -# o Now plot() for PairedPSCBS adds a chromosome tag. -# 2010-09-21 -# o Added argument 'what' to plot() for PairedPSCBS. -# o Added postsegmentTCN() for PairedPSCBS. -# 2010-09-19 -# o BUG FIX: plot() used non-defined nbrOfLoci; now length(x). -# 2010-09-15 -# o Added subsetBySegments(). -# o Added linesC1C2() and arrowsC1C2(). -# o Now the default 'cex' for pointsC1C2() corresponds to 'dh.num.mark'. -# o Now extractTotalAndDH() also returns 'dh.num.mark'. -# 2010-09-08 -# o Added argument 'add=FALSE' to plot(). -# o Added plotC1C2(). -# o Added extractTotalAndDH() and extractMinorMajorCNs(). -# 2010-09-04 -# o Added drawLevels() for PairedPSCBS. -# o Added as.data.frame() and print() for PairedPSCBS. -# 2010-09-03 -# o Added plot() for PairedPSCBS. -# o Created. -############################################################################ diff -Nru r-cran-pscbs-0.63.0/R/PairedPSCBS.PLOT.R r-cran-pscbs-0.64.0/R/PairedPSCBS.PLOT.R --- r-cran-pscbs-0.63.0/R/PairedPSCBS.PLOT.R 2017-06-28 14:32:39.000000000 +0000 +++ r-cran-pscbs-0.64.0/R/PairedPSCBS.PLOT.R 2018-08-12 21:30:44.000000000 +0000 @@ -54,93 +54,93 @@ setMethodS3("plotTracks1", "PairedPSCBS", function(x, tracks=c("tcn", "dh", "tcn,c1,c2", "tcn,c1", "tcn,c2", "c1,c2", "betaN", "betaT", "betaTN")[1:3], scatter="*", calls=".*", pch=".", col=NULL, cex=1, changepoints=FALSE, grid=FALSE, quantiles=c(0.05,0.95), xlim=NULL, Clim=c(0,3*ploidy(x)), Blim=c(0,1), xScale=1e-6, ..., add=FALSE, subplots=!add && (length(tracks) > 1), verbose=FALSE) { # To please R CMD check - fit <- x; + fit <- x # - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - # Validate arguments # - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - # Argument 'add': - add <- Arguments$getLogical(add); + add <- Arguments$getLogical(add) # Argument 'Clim' & 'Blim': if (!add) { Clim <- Arguments$getNumerics(Clim, length=c(2L,2L), - disallow=c("Inf", "NA", "NaN")); + disallow=c("Inf", "NA", "NaN")) Blim <- Arguments$getNumerics(Blim, length=c(2L,2L), - disallow=c("Inf", "NA", "NaN")); + disallow=c("Inf", "NA", "NaN")) } # Argument 'fit': if (nbrOfChromosomes(fit) >= 1L) { - return(plotTracksManyChromosomes(fit, tracks=tracks, scatter=scatter, calls=calls, pch=pch, quantiles=quantiles, Clim=Clim, Blim=Blim, xScale=xScale, ..., add=add, subplots=subplots, verbose=verbose)); + return(plotTracksManyChromosomes(fit, tracks=tracks, scatter=scatter, calls=calls, pch=pch, quantiles=quantiles, Clim=Clim, Blim=Blim, xScale=xScale, ..., add=add, subplots=subplots, verbose=verbose)) } # Argument 'tracks': - knownTracks <- c("tcn", "dh", "tcn,c1,c2", "tcn,c1", "tcn,c2", "c1,c2", "betaN", "betaT", "betaTN"); - tracks <- match.arg(tracks, choices=knownTracks, several.ok=TRUE); - tracks <- unique(tracks); + knownTracks <- c("tcn", "dh", "tcn,c1,c2", "tcn,c1", "tcn,c2", "c1,c2", "betaN", "betaT", "betaTN") + tracks <- match.arg(tracks, choices=knownTracks, several.ok=TRUE) + tracks <- unique(tracks) # Argument 'scatter': if (!is.null(scatter)) { - scatter <- Arguments$getCharacter(scatter); + scatter <- Arguments$getCharacter(scatter) if (scatter == "*") { - scatter <- tracks; + scatter <- tracks } else { - scatterT <- strsplit(scatter, split=",", fixed=TRUE); - tracksT <- strsplit(tracks, split=",", fixed=TRUE); - stopifnot(all(is.element(scatterT, tracksT))); + scatterT <- strsplit(scatter, split=",", fixed=TRUE) + tracksT <- strsplit(tracks, split=",", fixed=TRUE) + .stop_if_not(all(is.element(scatterT, tracksT))) # Not needed anymore - scatterT <- tracksT <- NULL; + scatterT <- tracksT <- NULL } } # Argument 'calls': if (!is.null(calls)) { - calls <- sapply(calls, FUN=Arguments$getRegularExpression); + calls <- sapply(calls, FUN=Arguments$getRegularExpression) } # Argument 'changepoints': - changepoints <- Arguments$getLogical(changepoints); + changepoints <- Arguments$getLogical(changepoints) # Argument 'grid': - grid <- Arguments$getLogical(grid); + grid <- Arguments$getLogical(grid) # Argument 'xlim': if (!is.null(xlim)) { - xlim <- Arguments$getNumerics(xlim, length=c(2,2)); + xlim <- Arguments$getNumerics(xlim, length=c(2,2)) } # Argument 'xScale': - xScale <- Arguments$getNumeric(xScale, range=c(0,Inf)); + xScale <- Arguments$getNumeric(xScale, range=c(0,Inf)) # Argument 'subplots': - subplots <- Arguments$getLogical(subplots); + subplots <- Arguments$getLogical(subplots) # Argument 'verbose': - verbose <- Arguments$getVerbose(verbose); + verbose <- Arguments$getVerbose(verbose) if (verbose) { - pushState(verbose); - on.exit(popState(verbose)); + pushState(verbose) + on.exit(popState(verbose)) } - verbose && enter(verbose, "Plotting PSCN tracks"); + verbose && enter(verbose, "Plotting PSCN tracks") # Extract the input data - data <- getLocusData(fit); + data <- getLocusData(fit) if (is.null(data)) { - throw("Cannot plot segmentation results. No input data available."); + throw("Cannot plot segmentation results. No input data available.") } - chromosomes <- getChromosomes(fit); - chromosome <- chromosomes[1]; - x <- data$x; - CT <- data$CT; - rho <- data$rho; - betaT <- data$betaT; - betaN <- data$betaN; - betaTN <- data$betaTN; - muN <- data$muN; + chromosomes <- getChromosomes(fit) + chromosome <- chromosomes[1] + x <- data$x + CT <- data$CT + rho <- data$rho + betaT <- data$betaT + betaN <- data$betaN + betaTN <- data$betaTN + muN <- data$muN rho <- data$rho hasDH <- !is.null(rho) if (hasDH) { @@ -156,55 +156,55 @@ # If 'rho' is not available, recalculate it from tumor BAFs. # NOTE: This should throw an error in the future. /HB 2013-10-25 if (!hasDH) { - rho <- rep(NA_real_, times=nbrOfLoci); - rho[isHet] <- 2*abs(betaTN[isHet]-1/2); - warning(sprintf("Locus-level DH signals ('rho') were not available in the %s object and therefore recalculated from the TumorBoost-normalized tumor BAFs ('betaTN').", class(fit)[1L])); + rho <- rep(NA_real_, times=nbrOfLoci) + rho[isHet] <- 2*abs(betaTN[isHet]-1/2) + warning(sprintf("Locus-level DH signals ('rho') were not available in the %s object and therefore recalculated from the TumorBoost-normalized tumor BAFs ('betaTN').", class(fit)[1L])) } # Extract the segmentation - segs <- as.data.frame(fit); + segs <- as.data.frame(fit) # Identify available calls if (!is.null(calls)) { - verbose && enter(verbose, "Identifying calls"); + verbose && enter(verbose, "Identifying calls") - pattern <- "Call$"; - callColumns <- grep(pattern, colnames(segs), value=TRUE); + pattern <- "Call$" + callColumns <- grep(pattern, colnames(segs), value=TRUE) if (length(callColumns) > 0) { keep <- sapply(calls, FUN=function(pattern) { - (regexpr(pattern, callColumns) != -1); - }); + (regexpr(pattern, callColumns) != -1) + }) if (is.matrix(keep)) { - keep <- rowAnys(keep); + keep <- rowAnys(keep) } - callColumns <- callColumns[keep]; - callLabels <- gsub(pattern, "", callColumns); - callLabels <- toupper(callLabels); + callColumns <- callColumns[keep] + callLabels <- gsub(pattern, "", callColumns) + callLabels <- toupper(callLabels) } - verbose && cat(verbose, "Call columns:"); - verbose && print(verbose, callColumns); + verbose && cat(verbose, "Call columns:") + verbose && print(verbose, callColumns) - verbose && exit(verbose); + verbose && exit(verbose) } else { - callColumns <- NULL; + callColumns <- NULL } if (chromosome != 0) { - chrTag <- sprintf("Chr%02d", chromosome); + chrTag <- sprintf("Chr%02d", chromosome) } else { - chrTag <- ""; + chrTag <- "" } if (xScale != 1) { - x <- xScale * x; + x <- xScale * x if (!is.null(xlim)) { - xlim <- xScale * xlim; + xlim <- xScale * xlim } } if (subplots) { - subplots(length(tracks), ncol=1); - par(mar=c(1,4,1,2)+1); + subplots(length(tracks), ncol=1) + par(mar=c(1,4,1,2)+1) } # Color loci by heterozygous vs homozygous @@ -215,73 +215,73 @@ } for (tt in seq_along(tracks)) { - track <- tracks[tt]; + track <- tracks[tt] verbose && enter(verbose, sprintf("Track #%d ('%s') of %d", - tt, track, length(tracks))); + tt, track, length(tracks))) if (!is.null(scatter)) { - pchT <- pch; - colT <- col; + pchT <- pch + colT <- col } else { - pchT <- NA; - colT <- NA; + pchT <- NA + colT <- NA } if (track == "tcn") { - colT <- ifelse(is.null(colT), "black", colT); + colT <- ifelse(is.null(colT), "black", colT) if (add) { - points(x, CT, pch=pchT, col=colT, cex=cex); + points(x, CT, pch=pchT, col=colT, cex=cex) } else { - plot(x, CT, pch=pchT, col=colT, cex=cex, xlim=xlim, ylim=Clim, ylab="TCN"); - stext(side=3, pos=1, chrTag); + plot(x, CT, pch=pchT, col=colT, cex=cex, xlim=xlim, ylim=Clim, ylab="TCN") + stext(side=3, pos=1, chrTag) if (grid) { - abline(h=seq(from=0, to=Clim[2], by=2), lty=3, col="gray"); - abline(h=0, lty=1, col="black"); + abline(h=seq(from=0, to=Clim[2], by=2), lty=3, col="gray") + abline(h=0, lty=1, col="black") } - drawConfidenceBands(fit, what="tcn", quantiles=quantiles, col="purple", xScale=xScale); - drawLevels(fit, what="tcn", col="purple", xScale=xScale); + drawConfidenceBands(fit, what="tcn", quantiles=quantiles, col="purple", xScale=xScale) + drawLevels(fit, what="tcn", col="purple", xScale=xScale) } } if (is.element(track, c("tcn,c1,c2", "tcn,c1", "tcn,c2", "c1,c2"))) { - colT <- ifelse(is.null(colT), "black", colT); - subtracks <- strsplit(track, split=",", fixed=TRUE)[[1]]; - ylab <- paste(toupper(subtracks), collapse=", "); + colT <- ifelse(is.null(colT), "black", colT) + subtracks <- strsplit(track, split=",", fixed=TRUE)[[1]] + ylab <- paste(toupper(subtracks), collapse=", ") if (add) { - points(x, CT, pch=pchT, cex=cex, col=colT); + points(x, CT, pch=pchT, cex=cex, col=colT) } else { - plot(x, CT, pch=pchT, cex=cex, col=colT, xlim=xlim, ylim=Clim, ylab=ylab); - stext(side=3, pos=1, chrTag); + plot(x, CT, pch=pchT, cex=cex, col=colT, xlim=xlim, ylim=Clim, ylab=ylab) + stext(side=3, pos=1, chrTag) if (grid) { - abline(h=seq(from=0, to=Clim[2], by=2), lty=3, col="gray"); - abline(h=0, lty=1, col="black"); + abline(h=seq(from=0, to=Clim[2], by=2), lty=3, col="gray") + abline(h=0, lty=1, col="black") } if (is.element("tcn", subtracks)) { - drawConfidenceBands(fit, what="tcn", quantiles=quantiles, col="purple", xScale=xScale); + drawConfidenceBands(fit, what="tcn", quantiles=quantiles, col="purple", xScale=xScale) } if (is.element("c2", subtracks)) { - drawConfidenceBands(fit, what="c2", quantiles=quantiles, col="red", xScale=xScale); + drawConfidenceBands(fit, what="c2", quantiles=quantiles, col="red", xScale=xScale) } if (is.element("c1", subtracks)) { - drawConfidenceBands(fit, what="c1", quantiles=quantiles, col="blue", xScale=xScale); + drawConfidenceBands(fit, what="c1", quantiles=quantiles, col="blue", xScale=xScale) } if (is.element("tcn", subtracks)) { - drawLevels(fit, what="tcn", col="purple", xScale=xScale); + drawLevels(fit, what="tcn", col="purple", xScale=xScale) } if (is.element("c2", subtracks)) { - drawLevels(fit, what="c2", col="red", xScale=xScale); + drawLevels(fit, what="c2", col="red", xScale=xScale) # In case C2 overlaps with TCN if (is.element("tcn", subtracks)) { - drawLevels(fit, what="tcn", col="purple", lty="22", xScale=xScale); + drawLevels(fit, what="tcn", col="purple", lty="22", xScale=xScale) } } if (is.element("c1", subtracks)) { - drawLevels(fit, what="c1", col="blue", xScale=xScale); + drawLevels(fit, what="c1", col="blue", xScale=xScale) # In case C1 overlaps with C1 if (is.element("c2", subtracks)) { - drawLevels(fit, what="c2", col="red", lty="22", xScale=xScale); + drawLevels(fit, what="c2", col="red", lty="22", xScale=xScale) if (is.element("tcn", subtracks)) { - drawLevels(fit, what="tcn", col="purple", lty="22", xScale=xScale); + drawLevels(fit, what="tcn", col="purple", lty="22", xScale=xScale) } } } @@ -289,44 +289,44 @@ } if (track == "betaN") { - colT <- ifelse(is.null(colT), colMu, colT); + colT <- ifelse(is.null(colT), colMu, colT) if (add) { - points(x, betaN, pch=pchT, cex=cex, col="black"); + points(x, betaN, pch=pchT, cex=cex, col="black") } else { - plot(x, betaN, pch=pchT, cex=cex, col=colT, xlim=xlim, ylim=Blim, ylab=expression(BAF[N])); - stext(side=3, pos=1, chrTag); + plot(x, betaN, pch=pchT, cex=cex, col=colT, xlim=xlim, ylim=Blim, ylab=expression(BAF[N])) + stext(side=3, pos=1, chrTag) } } if (track == "betaT") { - colT <- ifelse(is.null(colT), colMu, colT); + colT <- ifelse(is.null(colT), colMu, colT) if (add) { - points(x, betaT, pch=pchT, cex=cex, col="black"); + points(x, betaT, pch=pchT, cex=cex, col="black") } else { - plot(x, betaT, pch=pchT, cex=cex, col=colT, xlim=xlim, ylim=Blim, ylab=expression(BAF[T])); - stext(side=3, pos=1, chrTag); + plot(x, betaT, pch=pchT, cex=cex, col=colT, xlim=xlim, ylim=Blim, ylab=expression(BAF[T])) + stext(side=3, pos=1, chrTag) } } if (track == "betaTN") { - colT <- ifelse(is.null(colT), colMu, colT); + colT <- ifelse(is.null(colT), colMu, colT) if (add) { - points(x, betaTN, pch=pchT, cex=cex, col="black"); + points(x, betaTN, pch=pchT, cex=cex, col="black") } else { - plot(x, betaTN, pch=pchT, cex=cex, col=colT, xlim=xlim, ylim=Blim, ylab=expression(BAF[T]^"*")); - stext(side=3, pos=1, chrTag); + plot(x, betaTN, pch=pchT, cex=cex, col=colT, xlim=xlim, ylim=Blim, ylab=expression(BAF[T]^"*")) + stext(side=3, pos=1, chrTag) } } if (track == "dh") { - colT <- ifelse(is.null(colT), colMu[isHet], colT); + colT <- ifelse(is.null(colT), colMu[isHet], colT) if (add) { - points(x, rho, pch=pchT, cex=cex, col="black"); + points(x, rho, pch=pchT, cex=cex, col="black") } else { - plot(x, rho, pch=pchT, cex=cex, col=colT, xlim=xlim, ylim=Blim, ylab="DH"); - stext(side=3, pos=1, chrTag); - drawConfidenceBands(fit, what="dh", quantiles=quantiles, col="orange", xScale=xScale); - drawLevels(fit, what="dh", col="orange", xScale=xScale); + plot(x, rho, pch=pchT, cex=cex, col=colT, xlim=xlim, ylim=Blim, ylab="DH") + stext(side=3, pos=1, chrTag) + drawConfidenceBands(fit, what="dh", quantiles=quantiles, col="orange", xScale=xScale) + drawLevels(fit, what="dh", col="orange", xScale=xScale) } } @@ -334,7 +334,7 @@ # Draw change points? # - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - if (changepoints) { - drawChangePoints(fit, col="#666666", xScale=xScale); + drawChangePoints(fit, col="#666666", xScale=xScale) } @@ -343,85 +343,85 @@ # - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - if (length(callColumns) > 0) { for (cc in seq_along(callColumns)) { - callColumn <- callColumns[cc]; - callLabel <- callLabels[cc]; + callColumn <- callColumns[cc] + callLabel <- callLabels[cc] verbose && enter(verbose, sprintf("Call #%d ('%s') of %d", - cc, callLabel, length(callColumns))); + cc, callLabel, length(callColumns))) - verbose && cat(verbose, "Column: ", callColumn); + verbose && cat(verbose, "Column: ", callColumn) - segsT <- segs[,c("dhStart", "dhEnd", callColumn)]; - isCalled <- which(segsT[[callColumn]]); - segsT <- segsT[isCalled,1:2,drop=FALSE]; + segsT <- segs[,c("dhStart", "dhEnd", callColumn)] + isCalled <- which(segsT[[callColumn]]) + segsT <- segsT[isCalled,1:2,drop=FALSE] verbose && printf(verbose, "Number of segments called %s: %d\n", - callLabel, nrow(segsT)); - segsT <- xScale * segsT; + callLabel, nrow(segsT)) + segsT <- xScale * segsT - verbose && str(verbose, segsT); + verbose && str(verbose, segsT) - side <- 2*((cc+1) %% 2) + 1; + side <- 2*((cc+1) %% 2) + 1 # For each segment called... for (ss in seq_len(nrow(segsT))) { - x0 <- segsT[ss,1,drop=TRUE]; - x1 <- segsT[ss,2,drop=TRUE]; - abline(v=c(x0,x1), lty=3, col="gray"); - xMid <- (x0+x1)/2; - mtext(side=side, at=xMid, line=-1, cex=0.7, col="#666666", callLabel); + x0 <- segsT[ss,1,drop=TRUE] + x1 <- segsT[ss,2,drop=TRUE] + abline(v=c(x0,x1), lty=3, col="gray") + xMid <- (x0+x1)/2 + mtext(side=side, at=xMid, line=-1, cex=0.7, col="#666666", callLabel) } # for (ss in ...) - verbose && exit(verbose); + verbose && exit(verbose) } # for (cc in ...) # Add call parameter estimates, e.g. deltaAB for (cc in seq_along(callColumns)) { - callColumn <- callColumns[cc]; - callLabel <- callLabels[cc]; - h <- NULL; + callColumn <- callColumns[cc] + callLabel <- callLabels[cc] + h <- NULL if (callLabel == "AB") { if (track == "dh") { - h <- fit$params$deltaAB; - label <- expression(Delta[AB]); - colT <- "orange"; + h <- fit$params$deltaAB + label <- expression(Delta[AB]) + colT <- "orange" } } else if (callLabel == "LOH") { if (regexpr("c1", track) != -1L) { - h <- fit$params$deltaLowC1; - label <- expression(Delta[LOH]); - colT <- "blue"; + h <- fit$params$deltaLowC1 + label <- expression(Delta[LOH]) + colT <- "blue" } } else if (callLabel == "NTCN") { if (track == "tcn") { - h <- fit$params$ntcnRange; - label <- c(expression(Delta[-NTCN]), expression(Delta[+NTCN])); - colT <- "purple"; + h <- fit$params$ntcnRange + label <- c(expression(Delta[-NTCN]), expression(Delta[+NTCN])) + colT <- "purple" } } if (!is.null(h)) { - abline(h=h, lty=4, lwd=2, col=colT); + abline(h=h, lty=4, lwd=2, col=colT) for (ss in 1:2) { - side <- c(2,4)[ss]; - adj <- c(1.2,-0.2)[ss]; - mtext(side=side, at=h, label, adj=adj, las=2, xpd=TRUE); + side <- c(2,4)[ss] + adj <- c(1.2,-0.2)[ss] + mtext(side=side, at=h, label, adj=adj, las=2, xpd=TRUE) } } } # for (cc in ...) } # if (length(callColumns) > 0) - verbose && exit(verbose); + verbose && exit(verbose) } # for (tt ...) - verbose && exit(verbose); + verbose && exit(verbose) - invisible(); + invisible() }, private=TRUE) # plotTracks1() setMethodS3("plotTracks", "PairedPSCBS", function(fit, ...) { - plotTracksManyChromosomes(fit, ...); + plotTracksManyChromosomes(fit, ...) }) setMethodS3("plot", "PairedPSCBS", function(x, ...) { - plotTracks(x, ...); + plotTracks(x, ...) }, private=TRUE) @@ -429,54 +429,54 @@ # WORKAROUND: If Hmisc is loaded after R.utils, it provides a buggy # capitalize() that overrides the one we want to use. Until PSCBS # gets a namespace, we do the following workaround. /HB 2011-07-14 - capitalize <- R.utils::capitalize; + capitalize <- R.utils::capitalize # - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - # Validate arguments # - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - # Argument 'what': - what <- match.arg(what); + what <- match.arg(what) # Argument 'xScale': - xScale <- Arguments$getNumeric(xScale, range=c(0,Inf)); + xScale <- Arguments$getNumeric(xScale, range=c(0,Inf)) # - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - # Tile chromosomes # - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - fitT <- tileChromosomes(fit); + fitT <- tileChromosomes(fit) # Get segmentation results - segs <- as.data.frame(fitT); + segs <- as.data.frame(fitT) if (what == "betaTN") { - whatT <- "dh"; + whatT <- "dh" } else { - whatT <- what; + whatT <- what } # Extract subset of segments - fields <- c("start", "end"); - fields <- sprintf("%s%s", ifelse(what == "tcn", what, "dh"), capitalize(fields)); - fields <- c(fields, sprintf("%sMean", whatT)); - segsT <- segs[,fields, drop=FALSE]; - segsT <- unique(segsT); + fields <- c("start", "end") + fields <- sprintf("%s%s", ifelse(what == "tcn", what, "dh"), capitalize(fields)) + fields <- c(fields, sprintf("%sMean", whatT)) + segsT <- segs[,fields, drop=FALSE] + segsT <- unique(segsT) if (what == "betaTN") { - dh <- segsT[,"dhMean"]; - bafU <- (1 + dh)/2; - bafL <- (1 - dh)/2; - segsT[,3] <- bafU; - segsT[,4] <- bafL; + dh <- segsT[,"dhMean"] + bafU <- (1 + dh)/2 + bafL <- (1 - dh)/2 + segsT[,3] <- bafU + segsT[,4] <- bafL } # Reuse drawLevels() for the DNAcopy class for (cc in seq(from=3, to=ncol(segsT))) { - segsTT <- segsT[,c(1:2,cc)]; - colnames(segsTT) <- c("loc.start", "loc.end", "seg.mean"); - dummy <- list(output=segsTT); - class(dummy) <- "DNAcopy"; - drawLevels(dummy, lend=lend, xScale=xScale, ...); + segsTT <- segsT[,c(1:2,cc)] + colnames(segsTT) <- c("loc.start", "loc.end", "seg.mean") + dummy <- list(output=segsTT) + class(dummy) <- "DNAcopy" + drawLevels(dummy, lend=lend, xScale=xScale, ...) } # for (cc ...) }, private=TRUE) @@ -485,63 +485,63 @@ # WORKAROUND: If Hmisc is loaded after R.utils, it provides a buggy # capitalize() that overrides the one we want to use. Until PSCBS # gets a namespace, we do the following workaround. /HB 2011-07-14 - capitalize <- R.utils::capitalize; + capitalize <- R.utils::capitalize # - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - # Validate arguments # - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - # Argument 'what': - what <- match.arg(what); + what <- match.arg(what) # Argument 'quantiles': if (!is.null(quantiles)) { - quantiles <- Arguments$getNumerics(quantiles, range=c(0,1), length=c(2,2)); + quantiles <- Arguments$getNumerics(quantiles, range=c(0,1), length=c(2,2)) } # Argument 'xScale': - xScale <- Arguments$getNumeric(xScale, range=c(0,Inf)); + xScale <- Arguments$getNumeric(xScale, range=c(0,Inf)) # Nothing todo? if (is.null(quantiles)) { - return(); + return() } # - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - # Tile chromosomes # - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - fitT <- tileChromosomes(fit); + fitT <- tileChromosomes(fit) # Get segmentation results - segs <- as.data.frame(fitT); + segs <- as.data.frame(fitT) # Extract subset of segments - fields <- c("start", "end"); - fields <- sprintf("%s%s", ifelse(what == "tcn", what, "dh"), capitalize(fields)); + fields <- c("start", "end") + fields <- sprintf("%s%s", ifelse(what == "tcn", what, "dh"), capitalize(fields)) - tags <- sprintf("%g%%", 100*quantiles); - qFields <- sprintf("%s_%s", what, tags); + tags <- sprintf("%g%%", 100*quantiles) + qFields <- sprintf("%s_%s", what, tags) # Nothing todo? if (!all(is.element(qFields, colnames(segs)))) { - return(); + return() } - fields <- c(fields, qFields); + fields <- c(fields, qFields) - segsT <- segs[,fields, drop=FALSE]; - segsT <- unique(segsT); + segsT <- segs[,fields, drop=FALSE] + segsT <- unique(segsT) # Rescale x-axis - segsT[,1:2] <- xScale * segsT[,1:2]; + segsT[,1:2] <- xScale * segsT[,1:2] - colQ <- col2rgb(col, alpha=TRUE); - colQ["alpha",] <- alpha*colQ["alpha",]; - colQ <- rgb(red=colQ["red",], green=colQ["green",], blue=colQ["blue",], alpha=colQ["alpha",], maxColorValue=255); + colQ <- col2rgb(col, alpha=TRUE) + colQ["alpha",] <- alpha*colQ["alpha",] + colQ <- rgb(red=colQ["red",], green=colQ["green",], blue=colQ["blue",], alpha=colQ["alpha",], maxColorValue=255) for (kk in seq_len(nrow(segsT))) { - rect(xleft=segsT[kk,1], xright=segsT[kk,2], ybottom=segsT[kk,3], ytop=segsT[kk,4], col=colQ, border=FALSE); + rect(xleft=segsT[kk,1], xright=segsT[kk,2], ybottom=segsT[kk,3], ytop=segsT[kk,4], col=colQ, border=FALSE) } }, private=TRUE) @@ -550,51 +550,51 @@ setMethodS3("plotC1C2", "PairedPSCBS", function(fit, ..., xlab=expression(C[1]), ylab=expression(C[2]), Clim=c(0,2*ploidy(fit))) { # Argument 'Clim': Clim <- Arguments$getNumerics(Clim, length=c(2L,2L), - disallow=c("Inf", "NA", "NaN")); + disallow=c("Inf", "NA", "NaN")) - plot(NA, xlim=Clim, ylim=Clim, xlab=xlab, ylab=ylab); - abline(a=0, b=1, lty=3); - pointsC1C2(fit, ...); + plot(NA, xlim=Clim, ylim=Clim, xlab=xlab, ylab=ylab) + abline(a=0, b=1, lty=3) + pointsC1C2(fit, ...) }, private=TRUE) setMethodS3("pointsC1C2", "PairedPSCBS", function(fit, cex=NULL, col="#00000033", ...) { - data <- extractC1C2(fit); - X <- data[,1:2,drop=FALSE]; - n <- data[,4,drop=TRUE]; - n <- sqrt(n); - w <- n / sum(n, na.rm=TRUE); + data <- extractC1C2(fit) + X <- data[,1:2,drop=FALSE] + n <- data[,4,drop=TRUE] + n <- sqrt(n) + w <- n / sum(n, na.rm=TRUE) if (is.null(cex)) { - cex <- w; - cex <- cex / mean(cex, na.rm=TRUE); - cex <- cex + 1/2; + cex <- w + cex <- cex / mean(cex, na.rm=TRUE) + cex <- cex + 1/2 } - points(X, cex=cex, col=col, ...); + points(X, cex=cex, col=col, ...) }, private=TRUE) setMethodS3("linesC1C2", "PairedPSCBS", function(fit, ...) { - drawChangePointsC1C2(fit, ...); + drawChangePointsC1C2(fit, ...) }, private=TRUE) setMethodS3("drawChangePointsC1C2", "PairedPSCBS", function(fit, col="#00000033", labels=FALSE, lcol="#333333", cex=0.7, adj=c(+1.5,+0.5), ...) { - xy <- extractMinorMajorCNs(fit, splitters=TRUE, addGaps=TRUE); - xy <- xy[,1:2,drop=FALSE]; - res <- lines(xy, col=col, ...); + xy <- extractMinorMajorCNs(fit, splitters=TRUE, addGaps=TRUE) + xy <- xy[,1:2,drop=FALSE] + res <- lines(xy, col=col, ...) if (labels) { - n <- nrow(xy); - dxy <- (xy[-1,] - xy[-n,]) / 2; - xyMids <- xy[-n,] + dxy; - labels <- rownames(xy); - labels <- sprintf("%s-%s", labels[-n], labels[-1]); - text(xyMids, labels, cex=cex, col=lcol, adj=adj, ...); + n <- nrow(xy) + dxy <- (xy[-1,] - xy[-n,]) / 2 + xyMids <- xy[-n,] + dxy + labels <- rownames(xy) + labels <- sprintf("%s-%s", labels[-n], labels[-1]) + text(xyMids, labels, cex=cex, col=lcol, adj=adj, ...) } - invisible(res); + invisible(res) }, private=TRUE) @@ -603,47 +603,47 @@ setMethodS3("plotDeltaC1C2", "PairedPSCBS", function(fit, ..., xlab=expression(Delta*C[1]), ylab=expression(Delta*C[2]), Clim=c(-1,1)*ploidy(fit)) { # Argument 'Clim': Clim <- Arguments$getNumerics(Clim, length=c(2L,2L), - disallow=c("Inf", "NA", "NaN")); + disallow=c("Inf", "NA", "NaN")) - plot(NA, xlim=Clim, ylim=Clim, xlab=xlab, ylab=ylab); - abline(h=0, lty=3); - abline(v=0, lty=3); - pointsDeltaC1C2(fit, ...); + plot(NA, xlim=Clim, ylim=Clim, xlab=xlab, ylab=ylab) + abline(h=0, lty=3) + abline(v=0, lty=3) + pointsDeltaC1C2(fit, ...) }, private=TRUE) setMethodS3("pointsDeltaC1C2", "PairedPSCBS", function(fit, ...) { - data <- extractDeltaC1C2(fit); - X <- data[,1:2,drop=FALSE]; - points(X, ...); + data <- extractDeltaC1C2(fit) + X <- data[,1:2,drop=FALSE] + points(X, ...) }, private=TRUE) setMethodS3("linesDeltaC1C2", "PairedPSCBS", function(fit, ...) { - xy <- extractDeltaC1C2(fit); - xy <- xy[,1:2,drop=FALSE]; - lines(xy, ...); + xy <- extractDeltaC1C2(fit) + xy <- xy[,1:2,drop=FALSE] + lines(xy, ...) }, private=TRUE) setMethodS3("arrowsC1C2", "PairedPSCBS", function(fit, length=0.05, ...) { - xy <- extractMinorMajorCNs(fit, splitters=TRUE, addGaps=TRUE); - xy <- xy[,1:2,drop=FALSE]; - x <- xy[,1,drop=TRUE]; - y <- xy[,2,drop=TRUE]; - s <- seq_len(length(x)-1); - arrows(x0=x[s],y0=y[s], x1=x[s+1],y1=y[s+1], code=2, length=length, ...); + xy <- extractMinorMajorCNs(fit, splitters=TRUE, addGaps=TRUE) + xy <- xy[,1:2,drop=FALSE] + x <- xy[,1,drop=TRUE] + y <- xy[,2,drop=TRUE] + s <- seq_len(length(x)-1) + arrows(x0=x[s],y0=y[s], x1=x[s+1],y1=y[s+1], code=2, length=length, ...) }, private=TRUE) setMethodS3("arrowsDeltaC1C2", "PairedPSCBS", function(fit, length=0.05, ...) { - xy <- extractDeltaC1C2(fit); - xy <- xy[,1:2,drop=FALSE]; - x <- xy[,1,drop=TRUE]; - y <- xy[,2,drop=TRUE]; - s <- seq_len(length(x)-1); - arrows(x0=x[s],y0=y[s], x1=x[s+1],y1=y[s+1], code=2, length=length, ...); + xy <- extractDeltaC1C2(fit) + xy <- xy[,1:2,drop=FALSE] + x <- xy[,1,drop=TRUE] + y <- xy[,2,drop=TRUE] + s <- seq_len(length(x)-1) + arrows(x0=x[s],y0=y[s], x1=x[s+1],y1=y[s+1], code=2, length=length, ...) }, private=TRUE) @@ -655,119 +655,119 @@ # - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - # Argument 'chrStarts': if (!is.null(chrStarts)) { - chrStarts <- Arguments$getDoubles(chrStarts); + chrStarts <- Arguments$getDoubles(chrStarts) } # Argument 'verbose': - verbose <- Arguments$getVerbose(verbose); + verbose <- Arguments$getVerbose(verbose) if (verbose) { - pushState(verbose); - on.exit(popState(verbose)); + pushState(verbose) + on.exit(popState(verbose)) } # Nothing to do, i.e. already tiled? if (isTRUE(attr(fit, "tiledChromosomes"))) { - return(fit); + return(fit) } - verbose && enter(verbose, "Tile chromosomes"); + verbose && enter(verbose, "Tile chromosomes") # - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - # Extract data and segments # - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - data <- getLocusData(fit); - segs <- getSegments(fit); - knownSegments <- fit$params$knownSegments; + data <- getLocusData(fit) + segs <- getSegments(fit) + knownSegments <- fit$params$knownSegments # Identify all chromosome - chromosomes <- getChromosomes(fit); + chromosomes <- getChromosomes(fit) # - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - # Additional chromosome annotations # - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - if (is.null(chrStarts)) { - xRange <- matrix(0, nrow=length(chromosomes), ncol=2); + xRange <- matrix(0, nrow=length(chromosomes), ncol=2) for (kk in seq_along(chromosomes)) { - chromosome <- chromosomes[kk]; - idxs <- which(data$chromosome == chromosome); - x <- data$x[idxs]; - r <- range(x, na.rm=TRUE); - r <- r / 1e6; - r[1] <- floor(r[1]); - r[2] <- ceiling(r[2]); - r <- 1e6 * r; - xRange[kk,] <- r; + chromosome <- chromosomes[kk] + idxs <- which(data$chromosome == chromosome) + x <- data$x[idxs] + r <- range(x, na.rm=TRUE) + r <- r / 1e6 + r[1] <- floor(r[1]) + r[2] <- ceiling(r[2]) + r <- 1e6 * r + xRange[kk,] <- r } # for (kk ...) - chrLength <- xRange[,2]; - chrStarts <- c(0, cumsum(chrLength)[-length(chrLength)]); - chrEnds <- chrStarts + chrLength; + chrLength <- xRange[,2] + chrStarts <- c(0, cumsum(chrLength)[-length(chrLength)]) + chrEnds <- chrStarts + chrLength # Not needed anymore - x <- idxs <- NULL; + x <- idxs <- NULL } # if (is.null(chrStarts)) - verbose && cat(verbose, "Chromosome starts:"); - chromosomeStats <- cbind(start=chrStarts, end=chrEnds, length=chrEnds-chrStarts); - verbose && print(chromosomeStats); + verbose && cat(verbose, "Chromosome starts:") + chromosomeStats <- cbind(start=chrStarts, end=chrEnds, length=chrEnds-chrStarts) + verbose && print(chromosomeStats) # - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - # Offset... # - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - segFields <- grep("(Start|End)$", colnames(segs), value=TRUE); + segFields <- grep("(Start|End)$", colnames(segs), value=TRUE) # Sanity check - stopifnot(length(segFields) > 0); + .stop_if_not(length(segFields) > 0) for (kk in seq_along(chromosomes)) { - chromosome <- chromosomes[kk]; - chrTag <- sprintf("Chr%02d", chromosome); + chromosome <- chromosomes[kk] + chrTag <- sprintf("Chr%02d", chromosome) verbose && enter(verbose, sprintf("Chromosome #%d ('%s') of %d", - kk, chrTag, length(chromosomes))); + kk, chrTag, length(chromosomes))) # Get offset for this chromosome - offset <- chrStarts[kk]; - verbose && cat(verbose, "Offset: ", offset); + offset <- chrStarts[kk] + verbose && cat(verbose, "Offset: ", offset) # Offset data - idxs <- which(data$chromosome == chromosome); + idxs <- which(data$chromosome == chromosome) if (length(idxs) > 0L) { - data$x[idxs] <- offset + data$x[idxs]; + data$x[idxs] <- offset + data$x[idxs] } # Offset segmentation - idxs <- which(segs$chromosome == chromosome); + idxs <- which(segs$chromosome == chromosome) if (length(idxs) > 0L) { - segs[idxs,segFields] <- offset + segs[idxs,segFields]; + segs[idxs,segFields] <- offset + segs[idxs,segFields] } # Offset known segments - idxs <- which(knownSegments$chromosome == chromosome); + idxs <- which(knownSegments$chromosome == chromosome) if (length(idxs) > 0L) { - knownSegments[idxs,c("start", "end")] <- offset + knownSegments[idxs,c("start", "end")]; + knownSegments[idxs,c("start", "end")] <- offset + knownSegments[idxs,c("start", "end")] } - verbose && exit(verbose); + verbose && exit(verbose) } # for (kk ...) # - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - # Update results # - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - fit$data <- data; - fit$output <- segs; - fit$chromosomeStats <- chromosomeStats; - fit$params$knownSegments <- knownSegments; -# fitT$params$chrOffsets <- chrOffsets; + fit$data <- data + fit$output <- segs + fit$chromosomeStats <- chromosomeStats + fit$params$knownSegments <- knownSegments +# fitT$params$chrOffsets <- chrOffsets # Flag object - attr(fit, "tiledChromosomes") <- TRUE; + attr(fit, "tiledChromosomes") <- TRUE - verbose && exit(verbose); + verbose && exit(verbose) - fit; + fit }, private=TRUE) # tileChromosomes() @@ -776,20 +776,20 @@ # - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - # Tile chromosomes # - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - fitT <- tileChromosomes(fit); - verbose && str(verbose, fitT); + fitT <- tileChromosomes(fit) + verbose && str(verbose, fitT) - segs <- getSegments(fitT, splitters=FALSE); - xStarts <- segs[,"tcnStart"]; - xEnds <- segs[,"tcnEnd"]; + segs <- getSegments(fitT, splitters=FALSE) + xStarts <- segs[,"tcnStart"] + xEnds <- segs[,"tcnEnd"] - xs <- sort(unique(c(xStarts, xEnds))); - abline(v=xScale*xs, lty=1, col=col); + xs <- sort(unique(c(xStarts, xEnds))) + abline(v=xScale*xs, lty=1, col=col) if (labels) { - xMids <- xScale * (xEnds + xStarts) / 2; - labels <- rownames(segs); - mtext(side=side, at=xMids, labels, line=line, cex=cex, col=col, xpd=xpd, ...); + xMids <- xScale * (xEnds + xStarts) / 2 + labels <- rownames(segs) + mtext(side=side, at=xMids, labels, line=line, cex=cex, col=col, xpd=xpd, ...) } }, protected=TRUE) @@ -797,175 +797,53 @@ setMethodS3("getChromosomeRanges", "PairedPSCBS", function(fit, ...) { # To please R CMD check, cf. subset() - chromosome <- NULL; rm(list="chromosome"); + chromosome <- NULL; rm(list="chromosome") - segs <- getSegments(fit, splitter=FALSE); - chromosomes <- sort(unique(segs$chromosome)); + segs <- getSegments(fit, splitter=FALSE) + chromosomes <- sort(unique(segs$chromosome)) # Allocate - naValue <- NA_real_; - res <- matrix(naValue, nrow=length(chromosomes), ncol=3); - rownames(res) <- chromosomes; - colnames(res) <- c("start", "end", "length"); + naValue <- NA_real_ + res <- matrix(naValue, nrow=length(chromosomes), ncol=3) + rownames(res) <- chromosomes + colnames(res) <- c("start", "end", "length") # Get start and end of each chromosome. for (ii in seq_len(nrow(res))) { - chr <- chromosomes[ii]; - segsII <- subset(segs, chromosome == chr); - res[ii,"start"] <- min(segsII$tcnStart, na.rm=TRUE); - res[ii,"end"] <- max(segsII$tcnEnd, na.rm=TRUE); + chr <- chromosomes[ii] + segsII <- subset(segs, chromosome == chr) + res[ii,"start"] <- min(segsII$tcnStart, na.rm=TRUE) + res[ii,"end"] <- max(segsII$tcnEnd, na.rm=TRUE) } # for (ii ...) - res[,"length"] <- res[,"end"] - res[,"start"] + 1L; + res[,"length"] <- res[,"end"] - res[,"start"] + 1L # Sanity check - stopifnot(nrow(res) == length(chromosomes)); + .stop_if_not(nrow(res) == length(chromosomes)) - res <- as.data.frame(res); - res <- cbind(chromosome=chromosomes, res); + res <- as.data.frame(res) + res <- cbind(chromosome=chromosomes, res) - res; + res }, protected=TRUE) # getChromosomeRanges() setMethodS3("getChromosomeOffsets", "PairedPSCBS", function(fit, resolution=1e6, ...) { # Argument 'resolution': if (!is.null(resolution)) { - resolution <- Arguments$getDouble(resolution, range=c(1,Inf)); + resolution <- Arguments$getDouble(resolution, range=c(1,Inf)) } - data <- getChromosomeRanges(fit, ...); - splits <- data[,"start"] + data[,"length"]; + data <- getChromosomeRanges(fit, ...) + splits <- data[,"start"] + data[,"length"] if (!is.null(resolution)) { - splits <- ceiling(splits / resolution); - splits <- resolution * splits; + splits <- ceiling(splits / resolution) + splits <- resolution * splits } - offsets <- c(0L, cumsum(splits)); - names(offsets) <- c(rownames(data), NA); + offsets <- c(0L, cumsum(splits)) + names(offsets) <- c(rownames(data), NA) - offsets; + offsets }, protected=TRUE) # getChromosomeOffsets() - - - -############################################################################ -# HISTORY: -# 2013-05-07 -# o Now tileChromosomes() no longer gives warnings on "max(i): no -# non-missing arguments to max; returning -Inf". -# 2013-04-18 -# o Now drawLevels() and drawConfidenceBands() also works for -# multiple chromosomes. -# 2013-03-18 -# o Now plotTracksManyChromosomes() draws AB and LOH call thresholds. -# 2012-09-23 -# o Now plotTracks() [and plotTracksManyChromosomes()] draws segment levels -# in TCN-C2-C1 order, and then goes back and draws C2 and TCN with dashed -# lines, just in case C1 is overlapping C2 and C2 is overlapping TCN. -# 2012-09-21 -# o ROBUSTNESS: Now drawChangePointsC1C2() and arrowsC1C2() for PairedPSCBS -# makes sure to retrieve segments with NA splitters between chromosomes -# and gaps. -# 2012-02-29 -# o BUG FIX: plotTracks(..., add=TRUE) for PairedPSCBS would add TCNs -# when BAFs and DHs where intended. -# 2012-02-22 -# o BUG FIX: Argument 'calls' of plotTracks() for PairedPSCBS was ignored -# if more than one chromosome was plotted. -# 2011-12-03 -# o Added drawChangePointsC1C2() for PairedPSCBS. -# o Added drawChangePoints() for PairedPSCBS. -# 2011-11-12 -# o Added argument col="#00000033" to plotC1C2() and linesC1C2(). -# o Added argument 'oma' and 'mar' to plotTracksManyChromosomes() for -# PairedPSCBS for setting graphical parameters when 'add' == FALSE. -# 2011-09-30 -# o GENERALIZATION: Now drawLevels() for PairedPSCBS allows for drawing -# segmentation results in 'betaT' space. -# 2011-07-10 -# o BUG FIX: tileChromosomes() for PairedPSCBS was still assuming the -# old naming convention of column names. -# o ROBUSTNESS: Fixed partial argument matchings in arrowsC1C2() and -# arrowsDeltaC1C2() for PairedPSCBS. -# 2011-06-14 -# o Updated code to recognize new column names. -# 2011-01-19 -# o Added argument 'subplots'. -# 2011-01-18 -# o DOCUMENTATION: Documented more plotTracks() arguments for PairedPSCBS. -# o Now plotTracks(..., add=TRUE) for PairedPSCBS plots to the current -# figure/panel. -# o Now plotTracks(..., add=FALSE) for PairedPSCBS only sets up subplots -# if argument 'tracks' specifies more than one panel. -# o Added argument 'col' to plotTracks() for PairedPSCBS. -# 2011-01-12 -# o Added argument 'changepoints' to plotTracks() for PairedPSCBS for -# highlightning change-point locations as vertical lines. -# 2010-12-01 -# o Now using a default 'seed' for plotTracksManyChromosomes() such -# that the scatter data in the plots are reproducible by default. -# 2010-11-27 -# o BUG FIX: plotTracksManyChromosomes() would annotate called regions -# incorrectly. -# o Added more verbouse output to plotTracksManyChromosomes(). -# o Added missing argument 'verbose' to plotTracksManyChromosomes(). -# o plotTracksManyChromosomes() gained argument 'scatter'. -# o REPRODUCIBILITY: plotTracksManyChromosomes() for PairedPSCBS gained -# argument 'seed', because if 'subset' is specified then a random -# subset of the data points are displayed. -# 2010-11-26 -# o Added optional argument 'chromosomes' to plotTracks() to plot a -# subset of all chromosomes. -# o Now the default confidence intervals for plotTracks() is (0.05,0.95), -# if existing. -# 2010-11-22 -# o ROBUSTNESS: Now drawConfidenceBands() of PairedPSCBS silently does -# nothing if the requested bootstrap quantiles are available. -# o Added argument 'calls' to plotTracks() and plotTracksManyChromosomes() -# for highlighing called regions. -# 2010-11-21 -# o Now plotTracks() supports tracks "tcn,c1", "tcn,c2" and "c1,c2" too. -# o Added argument 'xlim' to plotTracks() making it possible to zoom in. -# o Now plotTracks() and plotTracksManyChromosomes() draws confidence -# bands, iff argument quantiles is given. -# o Added drawConfidenceBands() for PairedPSCBS. -# 2010-11-09 -# o Added argument 'cex=1' to plotTracks(). -# o BUG FIX: It was not possible to plot BAF tracks with plotTracks(). -# 2010-10-20 -# o Added arguments 'onBegin' and 'onEnd' to plotTracksManyChromosomes(). -# 2010-10-18 -# o Now plotTracks() can plot whole-genome data. -# o Now plotTracks() utilized plotTracksManyChromosomes() if there is -# more than one chromosome. -# o Added internal plotTracksManyChromosomes(). -# o Added internal tileChromosomes(). -# 2010-10-03 -# o Now the default is that plotTracks() for PairedPSCBS generated three -# panels: (1) TCN, (2) DH, and (3) C1+C2+TCN. -# o Added plotTracks() to be more explicit than just plot(). -# o Added argument 'xScale' to plot() for PairedPSCBS. -# o Now plot() for PairedPSCBS adds a chromosome tag. -# 2010-09-21 -# o Added argument 'what' to plot() for PairedPSCBS. -# o Added postsegmentTCN() for PairedPSCBS. -# 2010-09-19 -# o BUG FIX: plot() used non-defined nbrOfLoci; now length(x). -# 2010-09-15 -# o Added subsetBySegments(). -# o Added linesC1C2() and arrowsC1C2(). -# o Now the default 'cex' for pointsC1C2() corresponds to 'dh.num.mark'. -# o Now extractTotalAndDH() also returns 'dh.num.mark'. -# 2010-09-08 -# o Added argument 'add=FALSE' to plot(). -# o Added plotC1C2(). -# o Added extractTotalAndDH() and extractMinorMajorCNs(). -# 2010-09-04 -# o Added drawLevels() for PairedPSCBS. -# o Added as.data.frame() and print() for PairedPSCBS. -# 2010-09-03 -# o Added plot() for PairedPSCBS. -# o Created. -############################################################################ diff -Nru r-cran-pscbs-0.63.0/R/PairedPSCBS.PRUNE.R r-cran-pscbs-0.64.0/R/PairedPSCBS.PRUNE.R --- r-cran-pscbs-0.63.0/R/PairedPSCBS.PRUNE.R 2017-06-28 14:32:39.000000000 +0000 +++ r-cran-pscbs-0.64.0/R/PairedPSCBS.PRUNE.R 2018-08-12 21:30:44.000000000 +0000 @@ -1,11 +1,3 @@ setMethodS3("seqOfSegmentsByDP", "PairedPSCBS", function(fit, by=c("CT", "rho"), ...) { - NextMethod("seqOfSegmentsByDP", by=by); + NextMethod("seqOfSegmentsByDP", by=by) }) - - -############################################################################ -# HISTORY: -# 2012-09-13 -# o Added seqOfSegmentsByDP() for PairedPSCBS. -# o Created. -############################################################################ diff -Nru r-cran-pscbs-0.63.0/R/PairedPSCBS.R r-cran-pscbs-0.64.0/R/PairedPSCBS.R --- r-cran-pscbs-0.63.0/R/PairedPSCBS.R 2017-06-28 14:32:39.000000000 +0000 +++ r-cran-pscbs-0.64.0/R/PairedPSCBS.R 2018-08-12 21:30:44.000000000 +0000 @@ -30,22 +30,22 @@ setConstructorS3("PairedPSCBS", function(fit=list(), ...) { # Argument 'fit': if (!is.list(fit)) { - throw("Argument 'fit' is not a list: ", class(fit)[1]); + throw("Argument 'fit' is not a list: ", class(fit)[1]) } - extend(PSCBS(fit=fit, ...), "PairedPSCBS"); + extend(PSCBS(fit=fit, ...), "PairedPSCBS") }) setMethodS3("getLocusData", "PairedPSCBS", function(fit, ..., fields=c("asis", "full")) { # Argument 'fields': - fields <- match.arg(fields); + fields <- match.arg(fields) - data <- NextMethod("getLocusData", fields="asis"); + data <- NextMethod("getLocusData", fields="asis") if (fields == "full") { - names <- colnames(data); + names <- colnames(data) # Genotype calls if (!is.element("muN", names)) { @@ -53,7 +53,7 @@ data$muN <- rep(NA_real_, times=length(data$rho)) data$muN[is.finite(data$rho)] <- 1/2 } else if (is.element("betaN", names)) { - data$muN <- callNaiveGenotypes(data$betaN); + data$muN <- callNaiveGenotypes(data$betaN) } else { throw("Cannot identify heterozygous SNPs or genotypes") } @@ -65,38 +65,38 @@ # it on the fly from 'betaT'. # NOTE: This should give an error in the future. /HB 2013-10-25 if (is.null(data$rho)) { - data$rho <- 2*abs(data$betaT-1/2); - data$rho[!data$isHet] <- NA_real_; - warning("Locus-level DH signals ('rho') did not exist and were calculated from tumor BAFs ('betaT')"); + data$rho <- 2*abs(data$betaT-1/2) + data$rho[!data$isHet] <- NA_real_ + warning("Locus-level DH signals ('rho') did not exist and were calculated from tumor BAFs ('betaT')") } - data$c1 <- 1/2*(1-data$rho)*data$CT; - data$c2 <- data$CT - data$c1; + data$c1 <- 1/2*(1-data$rho)*data$CT + data$c2 <- data$CT - data$c1 # TumorBoost BAFs if (!is.element("rhoN", names)) { if (!is.element("betaTN", names) && is.element("betaN", names)) { - data$betaTN <- normalizeTumorBoost(betaN=data$betaN, betaT=data$betaT, muN=data$muN); + data$betaTN <- normalizeTumorBoost(betaN=data$betaN, betaT=data$betaT, muN=data$muN) } if (is.element("betaTN", names)) { - data$rhoN <- 2*abs(data$betaTN-1/2); - data$rhoN[!data$isHet] <- NA_real_; - data$c1N <- 1/2*(1-data$rhoN)*data$CT; - data$c2N <- data$CT - data$c1N; + data$rhoN <- 2*abs(data$betaTN-1/2) + data$rhoN[!data$isHet] <- NA_real_ + data$c1N <- 1/2*(1-data$rhoN)*data$CT + data$c2N <- data$CT - data$c1N } if (all(is.element(c("betaN", "betaT"), names))) { - data$isSNP <- (!is.na(data$betaT) | !is.na(data$betaN)); - data$type <- ifelse(data$isSNP, "SNP", "non-polymorphic locus"); + data$isSNP <- (!is.na(data$betaT) | !is.na(data$betaN)) + data$type <- ifelse(data$isSNP, "SNP", "non-polymorphic locus") } } # Labels - data$muNx <- c("AA", "AB", "BB")[2*data$muN + 1L]; - data$isHetx <- c("AA|BB", "AB")[data$isHet + 1L]; + data$muNx <- c("AA", "AB", "BB")[2*data$muN + 1L] + data$isHetx <- c("AA|BB", "AB")[data$isHet + 1L] } - data; + data }, protected=TRUE) # getLocusData() @@ -106,80 +106,80 @@ # Validate arguments # - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - # Argument 'verbose': - verbose <- Arguments$getVerbose(verbose); + verbose <- Arguments$getVerbose(verbose) if (verbose) { - pushState(verbose); - on.exit(popState(verbose)); + pushState(verbose) + on.exit(popState(verbose)) } - verbose && enter(verbose, "Resegmenting a ", class(fit)[1], " object"); + verbose && enter(verbose, "Resegmenting a ", class(fit)[1], " object") # Use the locus-level data of the PairedPSCBS object - data <- getLocusData(fit); - class(data) <- "data.frame"; - drop <- c("rho", "betaTN", "index"); - keep <- !is.element(colnames(data), drop); - data <- data[,keep]; - verbose && str(verbose, data); + data <- getLocusData(fit) + class(data) <- "data.frame" + drop <- c("rho", "betaTN", "index") + keep <- !is.element(colnames(data), drop) + data <- data[,keep] + verbose && str(verbose, data) - verbose && cat(verbose, "Number of loci: ", nrow(data)); + verbose && cat(verbose, "Number of loci: ", nrow(data)) # - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - # Setup arguments to be passed # - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - verbose && enter(verbose, "Overriding default arguments"); - segFcnName <- "segmentByPairedPSCBS"; - segFcn <- getMethodS3(segFcnName, "default"); + verbose && enter(verbose, "Overriding default arguments") + segFcnName <- "segmentByPairedPSCBS" + segFcn <- getMethodS3(segFcnName, "default") # (a) The default arguments - formals <- formals(segFcn); + formals <- formals(segFcn) - formals <- formals[!sapply(formals, FUN=is.language)]; - formals <- formals[!sapply(formals, FUN=is.name)]; - drop <- c("chromosome", "x", "w", "CT", "thetaT", "thetaN", "betaT", "betaN", "muN", "rho", "..."); - keep <- !is.element(names(formals), drop); - formals <- formals[keep]; + formals <- formals[!sapply(formals, FUN=is.language)] + formals <- formals[!sapply(formals, FUN=is.name)] + drop <- c("chromosome", "x", "w", "CT", "thetaT", "thetaN", "betaT", "betaN", "muN", "rho", "...") + keep <- !is.element(names(formals), drop) + formals <- formals[keep] # (b) The arguments used in previous fit - params <- fit$params; - keep <- is.element(names(params), names(formals)); - params <- params[keep]; + params <- fit$params + keep <- is.element(names(params), names(formals)) + params <- params[keep] # Don't trust 'tbn'! TODO. /HB 20111117 - params$tbn <- NULL; + params$tbn <- NULL # (c) The arguments in '...' - userArgs <- list(..., verbose=verbose); + userArgs <- list(..., verbose=verbose) # (d) Merge - args <- formals; - args2 <- c(params, userArgs); + args <- formals + args2 <- c(params, userArgs) for (kk in seq_along(args2)) { - value <- args2[[kk]]; + value <- args2[[kk]] if (!is.null(value)) { - key <- names(args2)[kk]; + key <- names(args2)[kk] if (!is.null(key)) { - args[[key]] <- value; + args[[key]] <- value } else { - args <- c(args, list(value)); + args <- c(args, list(value)) } } } # for (key ...) - verbose && str(verbose, args[names(args) != "verbose"]); + verbose && str(verbose, args[names(args) != "verbose"]) - verbose && enter(verbose, sprintf("Calling %s()", segFcnName)); - args <- c(list(data), args); - verbose && cat(verbose, "Arguments:"); - verbose && str(verbose, args[names(args) != "verbose"]); - verbose && exit(verbose); + verbose && enter(verbose, sprintf("Calling %s()", segFcnName)) + args <- c(list(data), args) + verbose && cat(verbose, "Arguments:") + verbose && str(verbose, args[names(args) != "verbose"]) + verbose && exit(verbose) - fit <- do.call(segFcnName, args); - verbose && exit(verbose); + fit <- do.call(segFcnName, args) + verbose && exit(verbose) - verbose && exit(verbose); + verbose && exit(verbose) - fit; + fit }, protected=TRUE) # resegment() @@ -187,42 +187,42 @@ # - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - # (a) Update locus-level data # - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - data <- getLocusData(fit); - names <- c("CT"); + data <- getLocusData(fit) + names <- c("CT") for (ff in names) { - data[[ff]] <- scale * data[[ff]]; + data[[ff]] <- scale * data[[ff]] } - fit$data <- data; ## fit <- setLocusData(fit, data); + fit$data <- data ## fit <- setLocusData(fit, data) # - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - # (b) Update segment-level data # - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - segs <- getSegments(fit); + segs <- getSegments(fit) # Adjust segment levels - names <- grep("^(tcn|c1|c2)(Mean|_.*%)$", names(segs), value=TRUE); + names <- grep("^(tcn|c1|c2)(Mean|_.*%)$", names(segs), value=TRUE) for (ff in names) { - segs[[ff]] <- scale * segs[[ff]]; + segs[[ff]] <- scale * segs[[ff]] } # Clear segment calls - names <- c("lohCall", "ntcnCall"); + names <- c("lohCall", "ntcnCall") for (ff in names) { - segs[[ff]] <- NULL; + segs[[ff]] <- NULL } - fit$output <- segs; ## fit <- setSegments(fit, sets); + fit$output <- segs ## fit <- setSegments(fit, sets) # - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - # (c) Update parameter estimates # - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - params <- fit$params; - fields <- c("copyNeutralStats", "deltaCN", "ntcnRange", "deltaLowC1"); - params[fields] <- NULL; - fit$params <- params; + params <- fit$params + fields <- c("copyNeutralStats", "deltaCN", "ntcnRange", "deltaLowC1") + params[fields] <- NULL + fit$params <- params - fit; + fit }, protected=TRUE) # adjustPloidyScale() diff -Nru r-cran-pscbs-0.63.0/R/PairedPSCBS.RESTRUCT.R r-cran-pscbs-0.64.0/R/PairedPSCBS.RESTRUCT.R --- r-cran-pscbs-0.63.0/R/PairedPSCBS.RESTRUCT.R 2017-06-28 14:32:39.000000000 +0000 +++ r-cran-pscbs-0.64.0/R/PairedPSCBS.RESTRUCT.R 2018-08-12 21:30:44.000000000 +0000 @@ -1,37 +1,37 @@ setMethodS3("extractSegments", "PairedPSCBS", function(this, idxs, ..., verbose=FALSE) { - fit <- this; + fit <- this # - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - # Local functions # - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - updateSegRows <- function(segRows, idxs=NULL) { - verbose && str(verbose, segRows); + verbose && str(verbose, segRows) if (!is.null(idxs)) { - segRows <- segRows[idxs,,drop=FALSE]; + segRows <- segRows[idxs,,drop=FALSE] } -# verbose && cat(verbose, "Number of segments: ", nrow(segRows)); -# verbose && str(verbose, segRows); +# verbose && cat(verbose, "Number of segments: ", nrow(segRows)) +# verbose && str(verbose, segRows) # Treat splitters separately - isSplitter <- (is.na(segRows[,1]) & is.na(segRows[,2])); + isSplitter <- (is.na(segRows[,1]) & is.na(segRows[,2])) - ns <- segRows[,2] - segRows[,1] + 1L; -# verbose && cat(verbose, "Number of loci per segment:"); -# verbose && str(verbose, ns); - - ns <- ns[!isSplitter]; - from <- c(1L, cumsum(ns)[-length(ns)]+1L); - to <- from + (ns - 1L); - segRows[!isSplitter,1] <- from; - segRows[!isSplitter,2] <- to; - verbose && str(verbose, segRows); + ns <- segRows[,2] - segRows[,1] + 1L +# verbose && cat(verbose, "Number of loci per segment:") +# verbose && str(verbose, ns) + + ns <- ns[!isSplitter] + from <- c(1L, cumsum(ns)[-length(ns)]+1L) + to <- from + (ns - 1L) + segRows[!isSplitter,1] <- from + segRows[!isSplitter,2] <- to + verbose && str(verbose, segRows) # Sanity check - ns2 <- segRows[,2] - segRows[,1] + 1L; - ns2 <- ns2[!isSplitter]; - stopifnot(all(ns2 == ns)); + ns2 <- segRows[,2] - segRows[,1] + 1L + ns2 <- ns2[!isSplitter] + .stop_if_not(all(ns2 == ns)) - segRows; + segRows } # updateSegRows() @@ -39,129 +39,129 @@ # Validate arguments # - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - # Argument 'idxs': - idxs <- Arguments$getIndices(idxs, max=nbrOfSegments(fit, splitters=TRUE)); + idxs <- Arguments$getIndices(idxs, max=nbrOfSegments(fit, splitters=TRUE)) # Argument 'verbose': - verbose <- Arguments$getVerbose(verbose); + verbose <- Arguments$getVerbose(verbose) if (verbose) { - pushState(verbose); - on.exit(popState(verbose)); + pushState(verbose) + on.exit(popState(verbose)) } - verbose && enter(verbose, "Extracting subset of segments"); + verbose && enter(verbose, "Extracting subset of segments") - verbose && cat(verbose, "Number of segments: ", length(idxs)); - verbose && str(verbose, idxs); + verbose && cat(verbose, "Number of segments: ", length(idxs)) + verbose && str(verbose, idxs) # - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - # Extract data and estimates # - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - data <- getLocusData(fit); - tcnSegRows <- fit$tcnSegRows; - dhSegRows <- fit$dhSegRows; - segs <- getSegments(fit); - params <- fit$params; + data <- getLocusData(fit) + tcnSegRows <- fit$tcnSegRows + dhSegRows <- fit$dhSegRows + segs <- getSegments(fit) + params <- fit$params # Sanity checks - stopifnot(all(!is.na(data$chromosome) & !is.na(data$x))); - stopifnot(length(tcnSegRows) == length(dhSegRows)); + .stop_if_not(all(!is.na(data$chromosome) & !is.na(data$x))) + .stop_if_not(length(tcnSegRows) == length(dhSegRows)) # Sanity checks if (!params$joinSegments) { - throw("Cannot extract subset of segments unless CNs are segmented using joinSegments=TRUE."); + throw("Cannot extract subset of segments unless CNs are segmented using joinSegments=TRUE.") } if (params$flavor == "tcn,dh") { - throw("NOT IMPLEMENTED: Extracting a subset of segments is not supported for flavor '", params$flavor, "'."); + throw("NOT IMPLEMENTED: Extracting a subset of segments is not supported for flavor '", params$flavor, "'.") } # - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - # Subset segments # - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - verbose && enter(verbose, "Update table of segments"); - segsT <- segs[idxs,,drop=FALSE]; - verbose && str(verbose, segsT); - verbose && exit(verbose); + verbose && enter(verbose, "Update table of segments") + segsT <- segs[idxs,,drop=FALSE] + verbose && str(verbose, segsT) + verbose && exit(verbose) # - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - # Subset data accordingly # - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - verbose && enter(verbose, "Update locus data"); + verbose && enter(verbose, "Update locus data") - segRows <- tcnSegRows; - segRows <- segRows[idxs,,drop=FALSE]; - from <- segRows[[1]]; - to <- segRows[[2]]; - ok <- (!is.na(from) & !is.na(to)); - from <- from[ok]; - to <- to[ok]; - keep <- logical(nrow(data)); + segRows <- tcnSegRows + segRows <- segRows[idxs,,drop=FALSE] + from <- segRows[[1]] + to <- segRows[[2]] + ok <- (!is.na(from) & !is.na(to)) + from <- from[ok] + to <- to[ok] + keep <- logical(nrow(data)) for (rr in seq_along(from)) { - keep[from[rr]:to[rr]] <- TRUE; + keep[from[rr]:to[rr]] <- TRUE } - keep <- which(keep); - verbose && printf(verbose, "Identified %d (%.2f%%) of %d data rows:\n", length(keep), 100*length(keep)/nrow(data), nrow(data)); - verbose && str(verbose, keep); + keep <- which(keep) + verbose && printf(verbose, "Identified %d (%.2f%%) of %d data rows:\n", length(keep), 100*length(keep)/nrow(data), nrow(data)) + verbose && str(verbose, keep) - dataT <- data[keep,,drop=FALSE]; - verbose && str(verbose, dataT); + dataT <- data[keep,,drop=FALSE] + verbose && str(verbose, dataT) - verbose && exit(verbose); + verbose && exit(verbose) # - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - # Update 'segRows' # - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - verbose && enter(verbose, "Update 'segRows'"); + verbose && enter(verbose, "Update 'segRows'") - segRows <- updateSegRows(tcnSegRows, idxs=idxs); - d <- tcnSegRows[idxs,] - segRows; + segRows <- updateSegRows(tcnSegRows, idxs=idxs) + d <- tcnSegRows[idxs,] - segRows # Sanity check - stopifnot(identical(d[,1], d[,2])); - d <- d[,1]; - verbose && cat(verbose, "Row deltas:"); - verbose && str(verbose, d); + .stop_if_not(identical(d[,1], d[,2])) + d <- d[,1] + verbose && cat(verbose, "Row deltas:") + verbose && str(verbose, d) - tcnSegRows <- tcnSegRows[idxs,,drop=FALSE] - d; - verbose && str(verbose, tcnSegRows); + tcnSegRows <- tcnSegRows[idxs,,drop=FALSE] - d + verbose && str(verbose, tcnSegRows) # Sanity checks - segRows <- tcnSegRows; - stopifnot(suppressWarnings(max(segRows, na.rm=TRUE)) <= nrow(dataT)); - drow <- segRows[-1,1] - segRows[-nrow(segRows),2]; + segRows <- tcnSegRows + .stop_if_not(suppressWarnings(max(segRows, na.rm=TRUE)) <= nrow(dataT)) + drow <- segRows[-1,1] - segRows[-nrow(segRows),2] if (!all(is.na(drow) | (drow > 0))) { - print(segRows); - throw("INTERNAL ERROR: Generated 'tcnSegRows' is invalid, because it contains overlapping data chunks."); + print(segRows) + throw("INTERNAL ERROR: Generated 'tcnSegRows' is invalid, because it contains overlapping data chunks.") } - dhSegRows <- dhSegRows[idxs,,drop=FALSE] - d; - verbose && str(verbose, dhSegRows); + dhSegRows <- dhSegRows[idxs,,drop=FALSE] - d + verbose && str(verbose, dhSegRows) # Sanity checks - segRows <- dhSegRows; - stopifnot(suppressWarnings(max(segRows, na.rm=TRUE)) <= nrow(dataT)); - drow <- segRows[-1,1] - segRows[-nrow(segRows),2]; - stopifnot(all(is.na(drow) | (drow > 0))); + segRows <- dhSegRows + .stop_if_not(suppressWarnings(max(segRows, na.rm=TRUE)) <= nrow(dataT)) + drow <- segRows[-1,1] - segRows[-nrow(segRows),2] + .stop_if_not(all(is.na(drow) | (drow > 0))) if (!all(is.na(drow) | (drow > 0))) { - print(segRows); - throw("INTERNAL ERROR: Generated 'dhSegRows' is invalid, because it contains overlapping data chunks."); + print(segRows) + throw("INTERNAL ERROR: Generated 'dhSegRows' is invalid, because it contains overlapping data chunks.") } - verbose && exit(verbose); + verbose && exit(verbose) # Create new object - res <- fit; - res$data <- dataT; - res$output <- segsT; - res$tcnSegRows <- tcnSegRows; - res$dhSegRows <- dhSegRows; + res <- fit + res$data <- dataT + res$output <- segsT + res$tcnSegRows <- tcnSegRows + res$dhSegRows <- dhSegRows - verbose && exit(verbose); + verbose && exit(verbose) - res; + res }, protected=TRUE) # extractSegments() @@ -201,190 +201,93 @@ # - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - # Validate arguments # - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - nbrOfSegments <- nbrOfSegments(this, splitters=TRUE); + nbrOfSegments <- nbrOfSegments(this, splitters=TRUE) # Argument 'left': - left <- Arguments$getIndex(left, max=nbrOfSegments-1L); + left <- Arguments$getIndex(left, max=nbrOfSegments-1L) # Argument 'verbose': - verbose <- Arguments$getVerbose(verbose); + verbose <- Arguments$getVerbose(verbose) if (verbose) { - pushState(verbose); - on.exit(popState(verbose)); + pushState(verbose) + on.exit(popState(verbose)) } - verbose && enter(verbose, "Merging two segments"); - verbose && printf(verbose, "Segments to be merged: %s & %s\n", left, left+1); - verbose && cat(verbose, "Number of segments before merging: ", nbrOfSegments); - verbose && cat(verbose, "Number of segments after merging: ", nbrOfSegments-1L); - - segs <- getSegments(this, splitters=TRUE); - tcnSegRows <- this$tcnSegRows; - dhSegRows <- this$dhSegRows; + verbose && enter(verbose, "Merging two segments") + verbose && printf(verbose, "Segments to be merged: %s & %s\n", left, left+1) + verbose && cat(verbose, "Number of segments before merging: ", nbrOfSegments) + verbose && cat(verbose, "Number of segments after merging: ", nbrOfSegments-1L) + + segs <- getSegments(this, splitters=TRUE) + tcnSegRows <- this$tcnSegRows + dhSegRows <- this$dhSegRows - rows <- c(left,left+1); - segsT <- segs[rows,,drop=FALSE]; + rows <- c(left,left+1) + segsT <- segs[rows,,drop=FALSE] # Sanity check - chrs <- segsT[["chromosome"]]; + chrs <- segsT[["chromosome"]] if (chrs[1] != chrs[2]) { - throw("Cannot merge segments that are on different chromosomes: ", chrs[1], " != ", chrs[2]); + throw("Cannot merge segments that are on different chromosomes: ", chrs[1], " != ", chrs[2]) } # Merge segments - segT <- segsT[1,]; - fields <- colnames(segsT); + segT <- segsT[1,] + fields <- colnames(segsT) # (chromosome, tcnId, dhId) - idxsUsed <- 1:3; + idxsUsed <- 1:3 # Starts - idxs <- grep("Start$", fields); - T <- as.matrix(segsT[,idxs,drop=FALSE]); - segT[,idxs] <- colMins(T, na.rm=TRUE); - idxsUsed <- c(idxsUsed, idxs); + idxs <- grep("Start$", fields) + T <- as.matrix(segsT[,idxs,drop=FALSE]) + segT[,idxs] <- colMins(T, na.rm=TRUE) + idxsUsed <- c(idxsUsed, idxs) # Ends - idxs <- grep("End$", fields); - T <- as.matrix(segsT[,idxs,drop=FALSE]); - segT[,idxs] <- colMaxs(T, na.rm=TRUE); - idxsUsed <- c(idxsUsed, idxs); + idxs <- grep("End$", fields) + T <- as.matrix(segsT[,idxs,drop=FALSE]) + segT[,idxs] <- colMaxs(T, na.rm=TRUE) + idxsUsed <- c(idxsUsed, idxs) # Counts - idxs <- grep("NbrOf", fields); - segT[,idxs] <- colSums(segsT[,idxs,drop=FALSE]); - idxsUsed <- c(idxsUsed, idxs); + idxs <- grep("NbrOf", fields) + segT[,idxs] <- colSums(segsT[,idxs,drop=FALSE]) + idxsUsed <- c(idxsUsed, idxs) # "Invalidate" remaining entries if (update) { - idxsTodo <- setdiff(seq_along(fields), idxsUsed); - segT[,idxsTodo] <- NA; + idxsTodo <- setdiff(seq_along(fields), idxsUsed) + segT[,idxsTodo] <- NA } # Update segment table - segs[rows[1],] <- segT; - segs <- segs[-rows[2],]; + segs[rows[1],] <- segT + segs <- segs[-rows[2],] # Update 'segRows' tables - segRows <- tcnSegRows; - segRows[rows[1],2] <- segRows[rows[2],2]; - segRows <- segRows[-rows[2],]; - tcnSegRows <- segRows; - - segRows <- dhSegRows; - segRows[rows[1],2] <- segRows[rows[2],2]; - segRows <- segRows[-rows[2],]; - dhSegRows <- segRows; + segRows <- tcnSegRows + segRows[rows[1],2] <- segRows[rows[2],2] + segRows <- segRows[-rows[2],] + tcnSegRows <- segRows + + segRows <- dhSegRows + segRows[rows[1],2] <- segRows[rows[2],2] + segRows <- segRows[-rows[2],] + dhSegRows <- segRows # Create results object - res <- this; - res$output <- segs; - res$tcnSegRows <- tcnSegRows; - res$dhSegRows <- dhSegRows; + res <- this + res$output <- segs + res$tcnSegRows <- tcnSegRows + res$dhSegRows <- dhSegRows # Update the segment statistics? if (update) { - res <- updateMeans(res); + res <- updateMeans(res) } - verbose && exit(verbose); + verbose && exit(verbose) - res; + res }, private=TRUE) - - - -############################################################################ -# HISTORY: -# 2012-02-24 -# o BUG FIX: The local updateSegRows() function inside extractSegments() -# for PairedPSCBS would return incorrect and invalid row indices. -# Copied ditto for CBS, which seems to work. -# o ROBUSTNESS: Added more sanity checks validating the correctness of -# what is returned by extractSegments() for PairedPSCBS. -# 2012-01-09 -# o ROBUSTNESS: Now extractSegments() for PairedPSCBS gives an informative -# error message that it is not supported if CNs were segmented using -# flavor "tcn,dh". -# 2011-10-16 -# o Added argument 'update' to mergeTwoSegments(). -# 2011-10-02 -# o CLEANUP: Dropped empty callSegments() for PairedPSCBS. -# 2011-06-14 -# o Updated code to recognize new column names. -# 2011-04-08 -# o BUG FIX: postsegmentTCN() for PairedPSCBS could generate an invalid -# 'tcnSegRows' matrix, where the indices for two consecutive segments -# would overlap, which is invalid. -# 2011-04-05 -# o BUG FIX: estimateHighDHQuantileAtAB() for PairedPSCBS would throw -# an error on an undefined 'trim' if verbose output was used. -# 2011-02-17 -# o Added arguments 'robust' and 'trim' to estimateMeanForDH(). -# 2011-02-03 -# o Added argument 'tauTCN' to estimateMeanForDH(). -# 2011-01-27 -# o Added flavor="DHskew" to estimateTauAB(). -# o Added flavor="DH" to estimateTauAB() to estimate from DH instead -# of hBAF. As argued by the equations in the comments, these two -# approaches gives virtually the same results. The advantage with the -# DH approach is that it requires one less degree of freedom. -# o Added estimateMeanForDH(). -# 2011-01-18 -# o BUG FIX: 'tcnSegRows' and 'dhSegRows' where not updated by -# extractByRegions() for PairedPSCBS. -# 2011-01-14 -# o Added estimateTauAB() for estimating the DeltaAB parameter. -# o Added estimateStdDevForHeterozygousBAF() for PairedPSCBS. -# o BUG FIX: extractByRegions() did not handle the case where multiple loci -# at the same position are split up in two different segments. -# 2011-01-12 -# o Added extractByRegions() and extractByRegion() for PairedPSCBS. -# o Now postsegmentTCN(..., force=TRUE) for PairedPSCBS also updates -# the TCN estimates even for segments where the DH segmentation did -# not find any additional change points. -# 2010-12-02 -# o Now postsegmentTCN() assert that total number of TCN loci before -# and after is the same. -# o Now postsegmentTCN() assert that joinSegment is TRUE. -# 2010-12-01 -# o Now postsegmentTCN() checks if it is already postsegmented. -# 2010-11-30 -# o TODO: postsegmentTCN() does not make sure of 'dhLociToExclude'. Why? -# o Now postsegmentTCN() recognizes the new 'tcnLociToExclude'. -# 2010-11-28 -# o BUG FIX: postsegmentTCN() did not handle loci with the same positions -# and that are split in two different segments. It also did not exclude -# loci with missing values. -# 2010-11-21 -# o Adjusted postsegmentTCN() such that the updated TCN segment boundaries -# are the maximum of the DH segment and the support by the loci. This -# means that postsegmentTCN() will work as expected both when signals -# where segmented with 'joinSegments' being TRUE or FALSE. -# 2010-10-25 -# o Now subsetByDhSegments() for PairedPSCBS handles the rare case when -# markers with the same positions are split in two different segments. -# o Renamed subsetBySegments() for PairedPSCBS to subsetByDhSegments(). -# 2010-09-26 -# o Now subsetBySegments() for PairedPSCBS handles multiple chromosomes. -# o Now postsegmentTCN() PairedPSCBS handles multiple chromosomes. -# 2010-09-21 -# o Added postsegmentTCN() for PairedPSCBS. -# 2010-09-19 -# o BUG FIX: plot() used non-defined nbrOfLoci; now length(x). -# 2010-09-15 -# o Added subsetBySegments(). -# o Added linesC1C2() and arrowsC1C2(). -# o Now the default 'cex' for pointsC1C2() corresponds to 'dh.num.mark'. -# o Now extractTotalAndDH() also returns 'dh.num.mark'. -# 2010-09-08 -# o Added argument 'add=FALSE' to plot(). -# o Added plotC1C2(). -# o Added extractTotalAndDH() and extractMinorMajorCNs(). -# 2010-09-04 -# o Added drawLevels() for PairedPSCBS. -# o Added as.data.frame() and print() for PairedPSCBS. -# 2010-09-03 -# o Added plot() for PairedPSCBS. -# o Created. -############################################################################ diff -Nru r-cran-pscbs-0.63.0/R/PairedPSCBS.unTumorBoost.R r-cran-pscbs-0.64.0/R/PairedPSCBS.unTumorBoost.R --- r-cran-pscbs-0.63.0/R/PairedPSCBS.unTumorBoost.R 2017-06-28 14:32:39.000000000 +0000 +++ r-cran-pscbs-0.64.0/R/PairedPSCBS.unTumorBoost.R 2018-08-12 21:30:44.000000000 +0000 @@ -1,66 +1,66 @@ -# @title "Re-calculates the segmented profile using non-TumorBoost BAFs" -setMethodS3("unTumorBoost", "PairedPSCBS", function(fit, ..., verbose=FALSE) { - # Argument 'verbose': - verbose <- Arguments$getVerbose(verbose); - if (verbose) { - pushState(verbose); - on.exit(popState(verbose)); - } - - - verbose && enterf(verbose, "Relcalculating %s profile without TumorBoost", class(fit)[1L]); - - # Nothing to do? - if (!fit$params$tbn) { - verbose && cat(verbose, "Profile is not from TumorBoost signals. Skipping."); - verbose && exit(verbose); - return(fit); - } - - verbose && enter(verbose, "Updating locus-level data"); - data <- getLocusData(fit); - data$betaTN <- data$betaT; - isHet <- !is.na(data$rho); - data$rho[isHet] <- 2*abs(data$betaTN[isHet]-1/2); - fit$data <- data; - verbose && exit(verbose); - - verbose && enter(verbose, "Updating segments"); - segs <- getSegments(fit); - segs$abCall <- NULL; - segs$lohCall <- NULL; - fit$output <- segs; - verbose && exit(verbose); - - verbose && enter(verbose, "Updating parameters"); - params <- fit$params; - params$tbn <- FALSE; - params$deltaAB <- params$alphaAB <- NULL; - params$deltaLowC1 <- params$alphaLowC1 <- NULL; - fit$params <- params; - verbose && exit(verbose); - - verbose && enter(verbose, "Resetting miscellaneous parameters and estimates"); - fit$changepoints <- NULL; - fit$deshearC1C2 <- NULL; - fit$cScaled <- NULL; - fit$kappa <- NULL; - fit$scale <- NULL; - fit <- clearBootstrapSummaries(fit, verbose=less(verbose, 50)); - verbose && exit(verbose); - - verbose && enter(verbose, "Update segment levels"); - fit <- updateMeans(fit, verbose=less(verbose, 50)); - verbose && exit(verbose); - - verbose && exit(verbose); - - fit; -}, protected=TRUE) # unTumorBoost() - - -############################################################################## -# HISTORY -# 2014-03-28 -# o Added unTumorBoost() for PairedPSCBS. -############################################################################## +# @title "Re-calculates the segmented profile using non-TumorBoost BAFs" +setMethodS3("unTumorBoost", "PairedPSCBS", function(fit, ..., verbose=FALSE) { + # Argument 'verbose': + verbose <- Arguments$getVerbose(verbose) + if (verbose) { + pushState(verbose) + on.exit(popState(verbose)) + } + + + verbose && enterf(verbose, "Relcalculating %s profile without TumorBoost", class(fit)[1L]) + + # Nothing to do? + if (!fit$params$tbn) { + verbose && cat(verbose, "Profile is not from TumorBoost signals. Skipping.") + verbose && exit(verbose) + return(fit) + } + + verbose && enter(verbose, "Updating locus-level data") + data <- getLocusData(fit) + data$betaTN <- data$betaT + isHet <- !is.na(data$rho) + data$rho[isHet] <- 2*abs(data$betaTN[isHet]-1/2) + fit$data <- data + verbose && exit(verbose) + + verbose && enter(verbose, "Updating segments") + segs <- getSegments(fit) + segs$abCall <- NULL + segs$lohCall <- NULL + fit$output <- segs + verbose && exit(verbose) + + verbose && enter(verbose, "Updating parameters") + params <- fit$params + params$tbn <- FALSE + params$deltaAB <- params$alphaAB <- NULL + params$deltaLowC1 <- params$alphaLowC1 <- NULL + fit$params <- params + verbose && exit(verbose) + + verbose && enter(verbose, "Resetting miscellaneous parameters and estimates") + fit$changepoints <- NULL + fit$deshearC1C2 <- NULL + fit$cScaled <- NULL + fit$kappa <- NULL + fit$scale <- NULL + fit <- clearBootstrapSummaries(fit, verbose=less(verbose, 50)) + verbose && exit(verbose) + + verbose && enter(verbose, "Update segment levels") + fit <- updateMeans(fit, verbose=less(verbose, 50)) + verbose && exit(verbose) + + verbose && exit(verbose) + + fit +}, protected=TRUE) # unTumorBoost() + + +############################################################################## +# HISTORY +# 2014-03-28 +# o Added unTumorBoost() for PairedPSCBS. +############################################################################## diff -Nru r-cran-pscbs-0.63.0/R/PairedPSCBS.updateMeans.R r-cran-pscbs-0.64.0/R/PairedPSCBS.updateMeans.R --- r-cran-pscbs-0.63.0/R/PairedPSCBS.updateMeans.R 2017-06-28 14:32:39.000000000 +0000 +++ r-cran-pscbs-0.64.0/R/PairedPSCBS.updateMeans.R 2018-08-12 21:30:44.000000000 +0000 @@ -3,62 +3,62 @@ # Validate arguments # - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - # Argument 'from': - from <- match.arg(from); + from <- match.arg(from) # Argument 'adjustFor': if (!is.null(adjustFor)) { - adjustFor <- Arguments$getCharacters(adjustFor); - adjustFor <- tolower(adjustFor); - knownValues <- c("ab", "loh", "roh"); - adjustFor <- match.arg(adjustFor, choices=knownValues, several.ok=TRUE); + adjustFor <- Arguments$getCharacters(adjustFor) + adjustFor <- tolower(adjustFor) + knownValues <- c("ab", "loh", "roh") + adjustFor <- match.arg(adjustFor, choices=knownValues, several.ok=TRUE) } # Argument 'avgTCN' & 'avgDH': - avgTCN <- match.arg(avgTCN); - avgDH <- match.arg(avgDH); + avgTCN <- match.arg(avgTCN) + avgDH <- match.arg(avgDH) # Argument 'verbose': - verbose <- Arguments$getVerbose(verbose); + verbose <- Arguments$getVerbose(verbose) if (verbose) { - pushState(verbose); - on.exit(popState(verbose)); + pushState(verbose) + on.exit(popState(verbose)) } - verbose && enter(verbose, "Updating mean level estimates"); - verbose && cat(verbose, "Adjusting for:"); - verbose && print(verbose, adjustFor); + verbose && enter(verbose, "Updating mean level estimates") + verbose && cat(verbose, "Adjusting for:") + verbose && print(verbose, adjustFor) # - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - # Setting up averaging functions # - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - if (avgTCN == "asis" || avgDH == "asis") { - est <- fit$params$meanEstimators; + est <- fit$params$meanEstimators if (avgTCN == "asis") { - avgTCN <- est$tcn; - if (is.null(avgTCN)) avgTCN <- "mean"; - avgTCN <- match.arg(avgTCN); + avgTCN <- est$tcn + if (is.null(avgTCN)) avgTCN <- "mean" + avgTCN <- match.arg(avgTCN) } if (avgDH == "asis") { - avgDH <- est$dh; - if (is.null(avgDH)) avgDH <- "mean"; - avgDH <- match.arg(avgDH); + avgDH <- est$dh + if (is.null(avgDH)) avgDH <- "mean" + avgDH <- match.arg(avgDH) } } avgList <- list( tcn = get(avgTCN, mode="function"), dh = get(avgDH, mode="function") - ); + ) # - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - # Extract the segmentation results # - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - segs <- getSegments(fit, splitters=TRUE); - segRows <- list(tcn=fit$tcnSegRows, dh=fit$dhSegRows); - nbrOfSegments <- nrow(segs); - verbose && cat(verbose, "Number of segments: ", nbrOfSegments); + segs <- getSegments(fit, splitters=TRUE) + segRows <- list(tcn=fit$tcnSegRows, dh=fit$dhSegRows) + nbrOfSegments <- nrow(segs) + verbose && cat(verbose, "Number of segments: ", nbrOfSegments) # - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - @@ -66,22 +66,22 @@ # - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - if (is.element("ab", adjustFor)) { if (!is.element("abCall", names(segs))) { - adjustFor <- setdiff(adjustFor, "ab"); - throw("Cannot adjust for AB, because they haven't been called."); + adjustFor <- setdiff(adjustFor, "ab") + throw("Cannot adjust for AB, because they haven't been called.") } } if (is.element("loh", adjustFor)) { if (!is.element("lohCall", names(segs))) { - adjustFor <- setdiff(adjustFor, "loh"); - throw("Cannot adjust for LOH, because they haven't been called."); + adjustFor <- setdiff(adjustFor, "loh") + throw("Cannot adjust for LOH, because they haven't been called.") } } if (is.element("roh", adjustFor)) { if (!is.element("rohCall", names(segs))) { - adjustFor <- setdiff(adjustFor, "roh"); - throw("Cannot adjust for ROH, because they haven't been called."); + adjustFor <- setdiff(adjustFor, "roh") + throw("Cannot adjust for ROH, because they haven't been called.") } } @@ -90,58 +90,58 @@ # Update the (TCN,DH) mean levels from locus-level data? # - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - if (from == "loci") { - data <- getLocusData(fit); - chromosome <- data$chromosome; - x <- data$x; - CT <- data$CT; - rho <- data$rho; + data <- getLocusData(fit) + chromosome <- data$chromosome + x <- data$x + CT <- data$CT + rho <- data$rho - isSplitter <- isSegmentSplitter(fit); + isSplitter <- isSegmentSplitter(fit) for (ss in seq_len(nbrOfSegments)[!isSplitter]) { - verbose && enter(verbose, sprintf("Segment %d of %d", ss, nbrOfSegments)); - seg <- segs[ss,]; - verbose && print(verbose, seg); + verbose && enter(verbose, sprintf("Segment %d of %d", ss, nbrOfSegments)) + seg <- segs[ss,] + verbose && print(verbose, seg) - chr <- seg[["chromosome"]]; - chrTag <- sprintf("chr%02d", chr); + chr <- seg[["chromosome"]] + chrTag <- sprintf("chr%02d", chr) for (what in c("tcn", "dh")) { - segRow <- segRows[[what]][ss,]; + segRow <- segRows[[what]][ss,] # (a) A splitter - nothing todo? if (!is.finite(segRow[[1]]) || !is.finite(segRow[[2]])) { - next; + next } # (b) Identify units (loci) - units <- segRow[[1]]:segRow[[2]]; + units <- segRow[[1]]:segRow[[2]] # (c) Adjust for missing values if (what == "tcn") { - value <- CT; + value <- CT } else if (what == "dh") { - value <- rho; + value <- rho } - keep <- which(!is.na(value[units])); - units <- units[keep]; + keep <- which(!is.na(value[units])) + units <- units[keep] # (d) Update mean - avgFUN <- avgList[[what]]; - gamma <- avgFUN(value[units]); + avgFUN <- avgList[[what]] + gamma <- avgFUN(value[units]) # Sanity check - stopifnot(length(units) == 0 || !is.na(gamma)); + .stop_if_not(length(units) == 0 || !is.na(gamma)) # Update the segment boundaries, estimates and counts - key <- paste(what, "Mean", sep=""); - seg[[key]] <- gamma; + key <- paste(what, "Mean", sep="") + seg[[key]] <- gamma } - verbose && print(verbose, seg); + verbose && print(verbose, seg) - segs[ss,] <- seg; + segs[ss,] <- seg - verbose && exit(verbose); + verbose && exit(verbose) } # for (ss ...) } # if (from ...) @@ -151,48 +151,48 @@ # Adjust segment means from various types of calls # - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - if (length(adjustFor) > 0) { - verbose && enter(verbose, "Adjusting segment means"); - verbose && cat(verbose, "Adjusting for:"); - verbose && print(verbose, adjustFor); + verbose && enter(verbose, "Adjusting segment means") + verbose && cat(verbose, "Adjusting for:") + verbose && print(verbose, adjustFor) if (is.element("ab", adjustFor)) { - verbose && enter(verbose, "Adjusting for AB"); - calls <- segs$abCall; - segs$dhMean[calls] <- 0; - verbose && exit(verbose); + verbose && enter(verbose, "Adjusting for AB") + calls <- segs$abCall + segs$dhMean[calls] <- 0 + verbose && exit(verbose) } if (is.element("loh", adjustFor)) { - verbose && enter(verbose, "Adjusting for LOH"); - calls <- segs$lohCall; - segs$dhMean[calls] <- 1; - verbose && exit(verbose); + verbose && enter(verbose, "Adjusting for LOH") + calls <- segs$lohCall + segs$dhMean[calls] <- 1 + verbose && exit(verbose) } if (is.element("roh", adjustFor)) { - verbose && enter(verbose, "Adjusting for ROH"); - calls <- segs$rohCall; - segs$dhMean[calls] <- NA_real_; - verbose && exit(verbose); + verbose && enter(verbose, "Adjusting for ROH") + calls <- segs$rohCall + segs$dhMean[calls] <- NA_real_ + verbose && exit(verbose) } - verbose && exit(verbose); + verbose && exit(verbose) } # if (length(adjustFor) > 0) # Update - fit$output <- segs; - fit <- setMeanEstimators(fit, tcn=avgTCN, dh=avgDH); + fit$output <- segs + fit <- setMeanEstimators(fit, tcn=avgTCN, dh=avgDH) if (clear) { - fit <- clearBootstrapSummaries(fit); + fit <- clearBootstrapSummaries(fit) } # Update (C1,C2) mean levels - fit <- updateMeansC1C2(fit, verbose=verbose); + fit <- updateMeansC1C2(fit, verbose=verbose) - verbose && exit(verbose); + verbose && exit(verbose) - fit; + fit }, private=TRUE) # updateMeans() @@ -201,40 +201,40 @@ # Validate arguments # - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - # Argument 'verbose': - verbose <- Arguments$getVerbose(verbose); + verbose <- Arguments$getVerbose(verbose) if (verbose) { - pushState(verbose); - on.exit(popState(verbose)); + pushState(verbose) + on.exit(popState(verbose)) } - verbose && enter(verbose, "Updating (C1,C2) segment mean levels"); - segs <- getSegments(fit); + verbose && enter(verbose, "Updating (C1,C2) segment mean levels") + segs <- getSegments(fit) if (nrow(segs) > 0L) { - tcn <- segs$tcnMean; - dh <- segs$dhMean; + tcn <- segs$tcnMean + dh <- segs$dhMean - C1 <- 1/2*(1-dh)*tcn; - C2 <- tcn - C1; + C1 <- 1/2*(1-dh)*tcn + C2 <- tcn - C1 - segs$c1Mean <- C1; - segs$c2Mean <- C2; + segs$c1Mean <- C1 + segs$c2Mean <- C2 # Preserve (C1,C2) swaps / change-point flips? - swap <- segs$c1c2Swap; + swap <- segs$c1c2Swap if (!is.null(swap)) { - swap <- which(swap); + swap <- which(swap) if (length(swap) > 0L) { - segs[swap, c("c1Mean","c2Mean")] <- segs[swap, c("c2Mean","c1Mean")]; + segs[swap, c("c1Mean","c2Mean")] <- segs[swap, c("c2Mean","c1Mean")] } } - fit$output <- segs; + fit$output <- segs } - verbose && exit(verbose); + verbose && exit(verbose) - fit; + fit }, protected=TRUE) # updateMeansC1C2() diff -Nru r-cran-pscbs-0.63.0/R/PairedPSCBS.updateMeansTogether.R r-cran-pscbs-0.64.0/R/PairedPSCBS.updateMeansTogether.R --- r-cran-pscbs-0.63.0/R/PairedPSCBS.updateMeansTogether.R 2017-06-28 14:32:39.000000000 +0000 +++ r-cran-pscbs-0.64.0/R/PairedPSCBS.updateMeansTogether.R 2018-08-12 21:30:44.000000000 +0000 @@ -1,124 +1,109 @@ -setMethodS3("updateMeansTogether", "PairedPSCBS", function(fit, idxList, ..., avgTCN=c("mean", "median"), avgDH=c("mean", "median"), verbose=FALSE) { - # - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - # Validate arguments - # - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - nbrOfSegments <- nbrOfSegments(fit, splitters=TRUE); - - # Argument 'idxList': - if (!is.list(idxList)) { - idxList <- list(idxList); - } - idxList <- lapply(idxList, FUN=function(idxs) { - idxs <- Arguments$getIndices(idxs, max=nbrOfSegments); - sort(unique(idxs)); - }); - - # Argument 'avgTCN' & 'avgDH': - avgTCN <- match.arg(avgTCN); - avgDH <- match.arg(avgDH); - - # Argument 'verbose': - verbose <- Arguments$getVerbose(verbose); - if (verbose) { - pushState(verbose); - on.exit(popState(verbose)); - } - - verbose && enter(verbose, "Updating mean level estimates of multiple segments"); - - verbose && cat(verbose, "Segments:"); - verbose && str(verbose, idxList); - - - # - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - # Setting up averaging functions - # - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - avgList <- list( - tcn = get(avgTCN, mode="function"), - dh = get(avgDH, mode="function") - ); - - - # - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - # Extract the data and segmentation results - # - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - data <- getLocusData(fit); - - segs <- getSegments(fit, splitters=TRUE); - - nbrOfSegments <- nrow(segs); - verbose && cat(verbose, "Total number of segments: ", nbrOfSegments); - - for (ss in seq_along(idxList)) { - idxs <- idxList[[ss]]; - - fitT <- extractSegments(fit, idxs); - verbose && cat(verbose, "Number of segments: ", nbrOfSegments(fitT)); - - dataT <- getLocusData(fitT); - segsT <- getSegments(fitT); - - CT <- dataT$CT; - rho <- dataT$rho; - - # - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - # Update the TCN segments - # - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - verbose && enter(verbose, "Recalculate (TCN,DH,C1,C2) means"); - naValue <- NA_real_; - mus <- c(tcn=naValue, dh=naValue, c1=naValue, c2=naValue); - for (key in c("tcn", "dh")) { - avgFUN <- avgList[[key]]; - - # (c) Adjust for missing values - if (key == "tcn") { - value <- CT; - } else if (key == "dh") { - value <- rho; - } - keep <- which(!is.na(value)); - - # (d) Update mean - gamma <- avgFUN(value[keep]); - - # Sanity check - stopifnot(length(gamma) == 0 || !is.na(gamma)); - - mus[key] <- gamma; - } # for (what ...) - - mus["c1"] <- 1/2*(1-mus["dh"])*mus["tcn"]; - mus["c2"] <- mus["tcn"] - mus["c1"]; - names(mus) <- sprintf("%sMean", names(mus)); - verbose && print(verbose, mus); - verbose && exit(verbose); - - for (key in names(mus)) { - segs[idxs,key] <- mus[key]; - } - } # for (ss ...) - - # Return results - res <- fit; - res$output <- segs; - res <- setMeanEstimators(res, tcn=avgTCN, dh=avgDH); - - verbose && exit(verbose); - - res; -}, private=TRUE) # updateMeansTogether() - - - -############################################################################ -# HISTORY: -# 2011-11-28 -# o Dropped kmeansCNs() stub. -# o Added Rdoc comments. -# o Now hclustCNs() also handles segments with missing (C1,C2) levels, -# which for instance can happen after calling ROH. -# 2011-10-14 -# o Implemented hclustCNs() and pruneByHClust() for AbstractCBS. -# o Implemented extractCNs() for PairedPSCBS. -# o Created. -############################################################################ +setMethodS3("updateMeansTogether", "PairedPSCBS", function(fit, idxList, ..., avgTCN=c("mean", "median"), avgDH=c("mean", "median"), verbose=FALSE) { + # - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - + # Validate arguments + # - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - + nbrOfSegments <- nbrOfSegments(fit, splitters=TRUE) + + # Argument 'idxList': + if (!is.list(idxList)) { + idxList <- list(idxList) + } + idxList <- lapply(idxList, FUN=function(idxs) { + idxs <- Arguments$getIndices(idxs, max=nbrOfSegments) + sort(unique(idxs)) + }) + + # Argument 'avgTCN' & 'avgDH': + avgTCN <- match.arg(avgTCN) + avgDH <- match.arg(avgDH) + + # Argument 'verbose': + verbose <- Arguments$getVerbose(verbose) + if (verbose) { + pushState(verbose) + on.exit(popState(verbose)) + } + + verbose && enter(verbose, "Updating mean level estimates of multiple segments") + + verbose && cat(verbose, "Segments:") + verbose && str(verbose, idxList) + + + # - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - + # Setting up averaging functions + # - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - + avgList <- list( + tcn = get(avgTCN, mode="function"), + dh = get(avgDH, mode="function") + ) + + + # - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - + # Extract the data and segmentation results + # - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - + data <- getLocusData(fit) + + segs <- getSegments(fit, splitters=TRUE) + + nbrOfSegments <- nrow(segs) + verbose && cat(verbose, "Total number of segments: ", nbrOfSegments) + + for (ss in seq_along(idxList)) { + idxs <- idxList[[ss]] + + fitT <- extractSegments(fit, idxs) + verbose && cat(verbose, "Number of segments: ", nbrOfSegments(fitT)) + + dataT <- getLocusData(fitT) + segsT <- getSegments(fitT) + + CT <- dataT$CT + rho <- dataT$rho + + # - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - + # Update the TCN segments + # - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - + verbose && enter(verbose, "Recalculate (TCN,DH,C1,C2) means") + naValue <- NA_real_ + mus <- c(tcn=naValue, dh=naValue, c1=naValue, c2=naValue) + for (key in c("tcn", "dh")) { + avgFUN <- avgList[[key]] + + # (c) Adjust for missing values + if (key == "tcn") { + value <- CT + } else if (key == "dh") { + value <- rho + } + keep <- which(!is.na(value)) + + # (d) Update mean + gamma <- avgFUN(value[keep]) + + # Sanity check + .stop_if_not(length(gamma) == 0 || !is.na(gamma)) + + mus[key] <- gamma + } # for (what ...) + + mus["c1"] <- 1/2*(1-mus["dh"])*mus["tcn"] + mus["c2"] <- mus["tcn"] - mus["c1"] + names(mus) <- sprintf("%sMean", names(mus)) + verbose && print(verbose, mus) + verbose && exit(verbose) + + for (key in names(mus)) { + segs[idxs,key] <- mus[key] + } + } # for (ss ...) + + # Return results + res <- fit + res$output <- segs + res <- setMeanEstimators(res, tcn=avgTCN, dh=avgDH) + + verbose && exit(verbose) + + res +}, private=TRUE) # updateMeansTogether() diff -Nru r-cran-pscbs-0.63.0/R/prememoize.R r-cran-pscbs-0.64.0/R/prememoize.R --- r-cran-pscbs-0.63.0/R/prememoize.R 2017-06-28 14:32:39.000000000 +0000 +++ r-cran-pscbs-0.64.0/R/prememoize.R 2018-08-12 21:30:44.000000000 +0000 @@ -1,45 +1,30 @@ -.setupCacheRootPath <- function(...) { - # Setup the cache root path, possibly by prompting the user. - ns <- getNamespace("R.cache"); - setupCacheRootPath <- get("setupCacheRootPath", mode="function", envir=ns); - setupCacheRootPath(); -} # .setupCacheRootPath() - -# CRAN POLICY: Add precalculated memoization files to the R.cache -# directory, unless running interactively. The reason for doing this -# is solely to make segmentBy[Non]PairedPSCBS examples to run faster -# on R CMD check but not having to create these memoized files. -# /HB 2012-11-05 -# UPDATE: Now it will also gain first-time users. /HB 2013-09-27 -.prememoize <- function(verbose=FALSE) { - # Explictly setup cache root here, since it's only done by 'R.cache' - # if that package is attached. Here we only load it. /HB 2013-09-27 - .setupCacheRootPath(); - - # This will make sure that the pre-generated calculations available - # in the 'PSCBS' package are copied to the R.cache cache directory. - # This regardless of whether a 'PSCBS' cache subdirectory exists - # or not. /HB 2013-09-27 - path <- "PSCBS/segmentByCBS/sbdry" - pathS <- system.file("misc/_Rcache", path, package="PSCBS"); - pathD <- getCachePath(path); - copyDirectory(pathS, pathD, copy.mode=FALSE, recursive=FALSE, overwrite=TRUE); - if (verbose) { - message("Added pre-memoized calculations: ", getAbsolutePath(pathD)); - } -} # .prememoize() - -############################################################################ -# HISTORY: -# 2013-09-27 -# o Now .prememorize() also copies pre-generated calculations in -# interactive session. It is also called every time the package -# is attached, which means it will also gain first-time users. -# o Added .setupCacheRootPath() until R.cache exports it. -# o Added argument 'verbose' to .prememoize(). -# 2013-09-26 -# o CLEANUP: Now .prememoize() no longer attaches 'R.cache', but only -# loads its namespace. -# 2012-11-05 -# o Created. -############################################################################ +.setupCacheRootPath <- function(...) { + # Setup the cache root path, possibly by prompting the user. + ns <- getNamespace("R.cache") + setupCacheRootPath <- get("setupCacheRootPath", mode="function", envir=ns) + setupCacheRootPath() +} # .setupCacheRootPath() + +# CRAN POLICY: Add precalculated memoization files to the R.cache +# directory, unless running interactively. The reason for doing this +# is solely to make segmentBy[Non]PairedPSCBS examples to run faster +# on R CMD check but not having to create these memoized files. +# /HB 2012-11-05 +# UPDATE: Now it will also gain first-time users. /HB 2013-09-27 +.prememoize <- function(verbose=FALSE) { + # Explictly setup cache root here, since it's only done by 'R.cache' + # if that package is attached. Here we only load it. /HB 2013-09-27 + .setupCacheRootPath() + + # This will make sure that the pre-generated calculations available + # in the 'PSCBS' package are copied to the R.cache cache directory. + # This regardless of whether a 'PSCBS' cache subdirectory exists + # or not. /HB 2013-09-27 + path <- "PSCBS/segmentByCBS/sbdry" + pathS <- system.file("misc/_Rcache", path, package="PSCBS") + pathD <- getCachePath(path) + copyDirectory(pathS, pathD, copy.mode=FALSE, recursive=FALSE, overwrite=TRUE) + if (verbose) { + message("Added pre-memoized calculations: ", getAbsolutePath(pathD)) + } +} # .prememoize() diff -Nru r-cran-pscbs-0.63.0/R/PSCBS.IO.R r-cran-pscbs-0.64.0/R/PSCBS.IO.R --- r-cran-pscbs-0.63.0/R/PSCBS.IO.R 2017-06-28 14:32:39.000000000 +0000 +++ r-cran-pscbs-0.64.0/R/PSCBS.IO.R 2018-08-12 21:30:44.000000000 +0000 @@ -40,68 +40,68 @@ # Validate arguments # - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - # Argument 'name' and 'tags': - name <- Arguments$getCharacter(name); - tags <- Arguments$getCharacters(tags); + name <- Arguments$getCharacter(name) + tags <- Arguments$getCharacters(tags) # Argument 'ext': - ext <- Arguments$getCharacter(ext); + ext <- Arguments$getCharacter(ext) # Arguments 'path': - path <- Arguments$getWritablePath(path); + path <- Arguments$getWritablePath(path) # Argument 'nbrOfDecimals': - nbrOfDecimals <- Arguments$getInteger(nbrOfDecimals); + nbrOfDecimals <- Arguments$getInteger(nbrOfDecimals) - fullname <- paste(c(name, tags), collapse=","); - filename <- sprintf("%s.%s", fullname, ext); - pathname <- Arguments$getWritablePathname(filename, path=path, mustNotExist=(!overwrite && !skip)); + fullname <- paste(c(name, tags), collapse=",") + filename <- sprintf("%s.%s", fullname, ext) + pathname <- Arguments$getWritablePathname(filename, path=path, mustNotExist=(!overwrite && !skip)) # File already exists? if (isFile(pathname)) { # Skip? if (skip) { - return(pathname); + return(pathname) } # Overwrite! - file.remove(pathname); + file.remove(pathname) } # Write to temporary file - pathnameT <- pushTemporaryFile(pathname); + pathnameT <- pushTemporaryFile(pathname) - sampleName <- getSampleName(fit); + sampleName <- getSampleName(fit) # - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - # Extract data # - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - data <- getSegments(fit, ..., splitters=splitters); + data <- getSegments(fit, ..., splitters=splitters) # Round of floating points if (!is.null(nbrOfDecimals)) { - cols <- tolower(colnames(data)); - isInt <- (regexpr("chromosome|start|end|nbrofloci", cols) != -1); - cols <- which(isInt); + cols <- tolower(colnames(data)) + isInt <- (regexpr("chromosome|start|end|nbrofloci", cols) != -1) + cols <- which(isInt) for (cc in cols) { - values <- data[[cc]]; + values <- data[[cc]] if (is.double(values)) { - values <- round(values, digits=0); - data[[cc]] <- values; + values <- round(values, digits=0) + data[[cc]] <- values } } # for (key ...) - cols <- tolower(colnames(data)); - isInt <- (regexpr("chromosome|start|end|nbrofloci", cols) != -1); - isLog <- (regexpr("call", cols) != -1); - isDbl <- (!isInt & !isLog); - cols <- which(isDbl); + cols <- tolower(colnames(data)) + isInt <- (regexpr("chromosome|start|end|nbrofloci", cols) != -1) + isLog <- (regexpr("call", cols) != -1) + isDbl <- (!isInt & !isLog) + cols <- which(isDbl) for (kk in cols) { - values <- data[[kk]]; + values <- data[[kk]] if (is.double(values)) { - values <- round(values, digits=nbrOfDecimals); - data[[kk]] <- values; + values <- round(values, digits=nbrOfDecimals) + data[[kk]] <- values } } # for (key ...) } @@ -111,11 +111,11 @@ # Build header # - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - if (addHeader) { -# sigmaDelta <- estimateStandardDeviation(fit, method="diff"); - sigmaDelta <- NA; -# sigmaResiduals <- estimateStandardDeviation(fit, method="res"); +# sigmaDelta <- estimateStandardDeviation(fit, method="diff") + sigmaDelta <- NA +# sigmaResiduals <- estimateStandardDeviation(fit, method="res") - createdOn <- format(Sys.time(), format="%Y-%m-%d %H:%M:%S %Z"); + createdOn <- format(Sys.time(), format="%Y-%m-%d %H:%M:%S %Z") hdr <- c( name=name, tags=tags, @@ -133,24 +133,16 @@ nbrOfColumns=ncol(data), columnNames=paste(colnames(data), collapse=", "), columnClasses=paste(sapply(data, FUN=function(x) class(x)[1]), collapse=", ") - ); - bfr <- paste("# ", names(hdr), ": ", hdr, sep=""); + ) + bfr <- paste("# ", names(hdr), ": ", hdr, sep="") - cat(file=pathnameT, bfr, sep="\n"); + cat(file=pathnameT, bfr, sep="\n") } # if (addHeader) write.table(file=pathnameT, data, append=TRUE, quote=FALSE, sep=sep, - row.names=FALSE, col.names=TRUE); + row.names=FALSE, col.names=TRUE) - pathname <- popTemporaryFile(pathnameT); + pathname <- popTemporaryFile(pathnameT) - pathname; + pathname }) # writeSegments() - - - -############################################################################ -# HISTORY: -# 2011-12-03 -# o Added writeSegments() for PSCBS. -############################################################################ diff -Nru r-cran-pscbs-0.63.0/R/PSCBS.isLocallyPhased.R r-cran-pscbs-0.64.0/R/PSCBS.isLocallyPhased.R --- r-cran-pscbs-0.63.0/R/PSCBS.isLocallyPhased.R 2017-06-28 14:32:39.000000000 +0000 +++ r-cran-pscbs-0.64.0/R/PSCBS.isLocallyPhased.R 2018-08-12 21:30:44.000000000 +0000 @@ -1,6 +1,6 @@ setMethodS3("isLocallyPhased", "PSCBS", function(fit, ...) { - segs <- getSegments(fit); - is.element("c1c2Swap", names(segs)); + segs <- getSegments(fit) + is.element("c1c2Swap", names(segs)) }) diff -Nru r-cran-pscbs-0.63.0/R/PSCBS.R r-cran-pscbs-0.64.0/R/PSCBS.R --- r-cran-pscbs-0.63.0/R/PSCBS.R 2017-06-28 14:32:39.000000000 +0000 +++ r-cran-pscbs-0.64.0/R/PSCBS.R 2018-08-12 21:30:44.000000000 +0000 @@ -30,62 +30,62 @@ setConstructorS3("PSCBS", function(fit=list(), ...) { # Argument 'fit': if (!is.list(fit)) { - throw("Argument 'fit' is not a list: ", class(fit)[1]); + throw("Argument 'fit' is not a list: ", class(fit)[1]) } - extend(AbstractCBS(fit, ...), "PSCBS"); + extend(AbstractCBS(fit, ...), "PSCBS") }) setMethodS3("as.data.frame", "PSCBS", function(x, ...) { - getSegments(x, splitters=TRUE, ...); + getSegments(x, splitters=TRUE, ...) }, protected=TRUE) setMethodS3("getLocusSignalNames", "PSCBS", function(fit, ...) { - c("CT", "rho"); + c("CT", "rho") }, protected=TRUE) setMethodS3("getSegmentTrackPrefixes", "PSCBS", function(fit, ...) { - c("tcn", "dh"); + c("tcn", "dh") }, protected=TRUE) setMethodS3("getLocusData", "PSCBS", function(fit, indices=NULL, fields=c("asis"), ...) { # Argument 'indices': if (!is.null(indices)) { - indices <- Arguments$getIndices(indices); + indices <- Arguments$getIndices(indices) } # Argument 'fields': - fields <- match.arg(fields); + fields <- match.arg(fields) - data <- fit$data; + data <- fit$data # Return requested indices if (!is.null(indices)) { # Map of final indices to current indices - map <- match(indices, data$index); + map <- match(indices, data$index) # Extract/expand... - data <- data[map,]; + data <- data[map,] # Sanity check - stopifnot(nrow(data) == length(indices)); + .stop_if_not(nrow(data) == length(indices)) } - data; + data }, protected=TRUE) # getLocusData() setMethodS3("isSegmentSplitter", "PSCBS", function(fit, ...) { - segs <- fit$output; + segs <- fit$output - isSplitter <- lapply(segs[-1], FUN=is.na); - isSplitter <- Reduce("&", isSplitter); + isSplitter <- lapply(segs[-1], FUN=is.na) + isSplitter <- Reduce("&", isSplitter) - isSplitter; + isSplitter }, protected=TRUE) @@ -119,103 +119,103 @@ #*/########################################################################### setMethodS3("getSegments", "PSCBS", function(fit, simplify=FALSE, splitters=TRUE, addGaps=FALSE, ...) { # Argument 'splitters': - splitters <- Arguments$getLogical(splitters); + splitters <- Arguments$getLogical(splitters) - segs <- fit$output; + segs <- fit$output # Drop chromosome splitters? if (!splitters) { - isSplitter <- isSegmentSplitter(fit); - segs <- segs[!isSplitter,]; + isSplitter <- isSegmentSplitter(fit) + segs <- segs[!isSplitter,] } # Add splitters for "gaps"... if (splitters && addGaps) { # Chromosome gaps - n <- nrow(segs); - chrs <- segs$chromosome; - gapsAfter <- which(diff(chrs) != 0L); - gapsAfter <- gapsAfter[!is.na(chrs[gapsAfter])]; - nGaps <- length(gapsAfter); + n <- nrow(segs) + chrs <- segs$chromosome + gapsAfter <- which(diff(chrs) != 0L) + gapsAfter <- gapsAfter[!is.na(chrs[gapsAfter])] + nGaps <- length(gapsAfter) if (nGaps > 0L) { - idxs <- seq_len(n); - values <- rep(NA_integer_, times=nGaps); - idxs <- insert(idxs, ats=gapsAfter+1L, values=values); - segs <- segs[idxs,]; + idxs <- seq_len(n) + values <- rep(NA_integer_, times=nGaps) + idxs <- insert(idxs, ats=gapsAfter+1L, values=values) + segs <- segs[idxs,] } # Other gaps - n <- nrow(segs); - chrs <- segs$chromosome; - starts <- segs$tcnStart[-1L]; - ends <- segs$tcnEnd[-n]; - gapsAfter <- which(starts != ends); - onSameChr <- (chrs[gapsAfter+1L] == chrs[gapsAfter] ); - gapsAfter <- gapsAfter[onSameChr]; - nGaps <- length(gapsAfter); + n <- nrow(segs) + chrs <- segs$chromosome + starts <- segs$tcnStart[-1L] + ends <- segs$tcnEnd[-n] + gapsAfter <- which(starts != ends) + onSameChr <- (chrs[gapsAfter+1L] == chrs[gapsAfter] ) + gapsAfter <- gapsAfter[onSameChr] + nGaps <- length(gapsAfter) if (nGaps > 0L) { - idxs <- seq_len(n); - values <- rep(NA_integer_, times=nGaps); - idxs <- insert(idxs, ats=gapsAfter+1L, values=values); - segs <- segs[idxs,]; + idxs <- seq_len(n) + values <- rep(NA_integer_, times=nGaps) + idxs <- insert(idxs, ats=gapsAfter+1L, values=values) + segs <- segs[idxs,] } } ## if (nrow(segs) > 0) { -## segs$id <- getSampleName(fit); +## segs$id <- getSampleName(fit) ## } if (simplify) { # If joinSegments was used (i.e. (start,end) are equal for TCN and DH)... if (fit$params$joinSegments) { # Sanity check - stopifnot(all(segs$tcnStart == segs$dhStart, na.rm=TRUE)); - stopifnot(all(segs$tcnEnd == segs$dhEnd, na.rm=TRUE)); + .stop_if_not(all(segs$tcnStart == segs$dhStart, na.rm=TRUE)) + .stop_if_not(all(segs$tcnEnd == segs$dhEnd, na.rm=TRUE)) - names <- colnames(segs); - keep <- !is.element(names, c("dhStart", "dhEnd")); - segs <- segs[,keep]; - names <- colnames(segs); - names[names == "tcnStart"] <- "start"; - names[names == "tcnEnd"] <- "end"; - colnames(segs) <- names; + names <- colnames(segs) + keep <- !is.element(names, c("dhStart", "dhEnd")) + segs <- segs[,keep] + names <- colnames(segs) + names[names == "tcnStart"] <- "start" + names[names == "tcnEnd"] <- "end" + colnames(segs) <- names } # Drop bootstrap columns, if any - names <- colnames(segs); - keep <- (regexpr("_[0-9]+(|[.][0-9]+)%$", names) == -1); - segs <- segs[,keep]; + names <- colnames(segs) + keep <- (regexpr("_[0-9]+(|[.][0-9]+)%$", names) == -1) + segs <- segs[,keep] } - segs; + segs }, private=TRUE) setMethodS3("getChangePoints", "PSCBS", function(fit, ...) { # Already available? - cps <- fit$changepoints; - if (!is.null(cps)) return(cps); + cps <- fit$changepoints + if (!is.null(cps)) return(cps) - segs <- getSegments(fit, splitters=TRUE); - tcn <- segs[["tcnMean"]]; - dh <- segs[["dhMean"]]; - C1 <- (1-dh) * tcn / 2; - C2 <- tcn - C1; - n <- length(tcn); + segs <- getSegments(fit, splitters=TRUE) + tcn <- segs[["tcnMean"]] + dh <- segs[["dhMean"]] + C1 <- (1-dh) * tcn / 2 + C2 <- tcn - C1 + n <- length(tcn) # Calculate observed (alpha, radius, manhattan, dc1, dc2) data - D1 <- C1[-n] - C1[-1L]; - D2 <- C2[-n] - C2[-1L]; + D1 <- C1[-n] - C1[-1L] + D2 <- C2[-n] - C2[-1L] cps <- data.frame( alpha = atan2(D2, D1), # Changepoint angles in (0,2*pi) radius = sqrt(D2^2 + D1^2), manhattan = abs(D2) + abs(D1), d1 = D1, d2 = D2 - ); + ) - cps; + cps }, private=TRUE) # getChangePoints() @@ -223,7 +223,7 @@ ## Fit using locus-level data data <- getLocusData(fit, ...) C <- data$CT - stopifnot(!is.null(C)) + .stop_if_not(!is.null(C)) mu <- median(C, na.rm=TRUE) scale <- targetTCN / mu @@ -245,44 +245,3 @@ invisible(fitN) }) - - -############################################################################ -# HISTORY: -# 2013-10-20 -# o Added getChangePoints() for PSCBS. -# 2012-09-21 -# o Now getSegments(..., splitters=TRUE) for CBS and PSCBS inserts NA -# rows whereever there is a "gap" between segments. A "gap" is when -# two segments are not connected (zero distance). -# 2012-04-21 -# o CLEANUP: Moved getSegmentSizes() from PSCBS to AbstractCBS. -# 2012-04-21 -# o CLEANUP: Moved getSegmentSizes() from PairedPSCBS to PSCBS. -# 2012-02-27 -# o Added argument 'fields' to getLocusData() for PairedPSCBS. -# 2011-12-12 -# o Added optional argument 'indices' to getLocusData() to be able -# to retrieve the locus-level data as indexed by input data. -# 2011-12-03 -# o Added argument 'simplify' to getSegments(). -# 2011-10-16 -# o Added isSegmentSplitter(). -# 2011-10-02 -# o Now the CBS class extends the AbstractCBS class. -# o Added print() and as.data.frame() to PSCBS. -# o Added getSegments() to PSCBS. -# o DOCUMENTATION: Added Rdoc for several PSCBS methods. -# o Added a PSCBS constructor with documentation. -# 2010-12-01 -# o Now also extractByChromosomes() and append() for PSCBS recognizes -# fields 'tcnLociToExclude' and 'dhLociToExclude'. -# o BUG FIX: extractByChromosome() for PSCBS would call it self instead -# of extractByChromosomes(). -# 2010-11-26 -# o Added extractByChromosomes() for PSCBS. -# 2010-09-26 -# o getChromosomes() no longer returns NA divers. -# 2010-09-24 -# o Added append() and more for PSCBS objects. -############################################################################ diff -Nru r-cran-pscbs-0.63.0/R/PSCBS.RESTRUCT.R r-cran-pscbs-0.64.0/R/PSCBS.RESTRUCT.R --- r-cran-pscbs-0.63.0/R/PSCBS.RESTRUCT.R 2017-06-28 14:32:39.000000000 +0000 +++ r-cran-pscbs-0.64.0/R/PSCBS.RESTRUCT.R 2018-08-12 21:30:44.000000000 +0000 @@ -1,186 +1,134 @@ -###########################################################################/** -# @set "class=PSCBS" -# @RdocMethod append -# -# @title "Appends one segmentation result to another" -# -# \description{ -# @get "title". -# } -# -# @synopsis -# -# \arguments{ -# \item{x, other}{The two @see "PSCBS" objects to be combined.} -# \item{other}{A @see "PSCBS" object.} -# \item{addSplit}{If @TRUE, a "divider" is added between chromosomes.} -# \item{...}{Not used.} -# } -# -# \value{ -# Returns a @see "PSCBS" object of the same class as argument \code{x}. -# } -# -# @author "HB" -# -# \seealso{ -# @seeclass -# } -#*/########################################################################### -setMethodS3("append", "PSCBS", function(x, other, addSplit=TRUE, ...) { - # To please R CMD check - this <- x; - +setMethodS3("c", "PSCBS", function(..., addSplit = TRUE) { # - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - # Validate arguments # - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - # Argument 'other': - other <- Arguments$getInstanceOf(other, "PSCBS"); - for (field in c("data", "output")) { - dataA <- this[[field]] - dataB <- other[[field]] - namesA <- colnames(dataA) - namesB <- colnames(dataB) - if (!all(namesA == namesB)) { - throw(sprintf("Cannot merge %s objects. Arguments 'other' and 'this' has different sets of columns in field '%s': {%s} [n=%d] != {%s} [n=%d]", class(this)[1], field, paste(namesA, collapse=", "), length(namesA), paste(namesB, collapse=", "), length(namesB))) - } - } - - # Argument 'addSplit': - addSplit <- Arguments$getLogical(addSplit); - - - # Allocate results - res <- this; - - # - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - # Locus data - # - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - res$data <- rbind(this$data, other$data); - - - # - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - # Segmentation data - # - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - indexOffset <- nrow(this$data); - fields <- c("output", "tcnSegRows", "dhSegRows"); - for (field in fields[-1]) { - other[[field]] <- other[[field]] + indexOffset; - } - - splitter <- if (addSplit) NA else NULL; - for (field in fields) { - res[[field]] <- rbind(this[[field]], splitter, other[[field]]); - rownames(res[[field]]) <- NULL; - } + args <- list(...) - # - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - # Parameters - # - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - ksT <- this$params$knownSegments; - ksT$length <- NULL; # In case it's been added - ksO <- other$params$knownSegments; - ksO$length <- NULL; # In case it's been added - res$params$knownSegments <- rbind(ksT, ksO); + ## Nothing todo? + nargs <- length(args) + if (nargs == 1) return(args[[1]]) + + isNA <- function(x) is.logical(x) && length(x) == 1L && is.na(x) + + res <- args[[1]] + fields <- c("output", "tcnSegRows", "dhSegRows") + + for (ii in 2:nargs) { + arg <- args[[ii]] + + if (isNA(arg)) { + if (addSplit) { + warning(sprintf("Detected explicit NA in call to c(<%s>, ..., addSplit = TRUE). Ignoring", class(args[[1]])[1])) + next + } + ## Add "splitter" + for (field in fields) { + res[[field]] <- rbind(res[[field]], NA) + } + } else { + ## Locus-level data + data <- getLocusData(res) + data_arg <- getLocusData(arg) + if (!all(colnames(data_arg) == colnames(data))) { + throw(sprintf("Cannot concatenate %s and %s objects, because they have different sets of columns in field %s: {%s} [n=%d] != {%s} [n=%d]", sQuote(class(res)[1]), sQuote(class(arg)[1]), sQuote(field), paste(sQuote(colnames(data)), collapse=", "), ncol(data), paste(sQuote(colnames(data_arg)), collapse=", "), ncol(data_arg))) + } + + indexOffset <- nrow(data) + + data <- rbind(data, getLocusData(arg)) + res[["data"]] <- data + + # Segmentation data + for (field in fields[-1]) { + arg[[field]] <- arg[[field]] + indexOffset + } + splitter <- if (addSplit) NA else NULL + for (field in fields) { + res[[field]] <- rbind(res[[field]], splitter, arg[[field]]) + } + + # Known segments + ksT <- res$params$knownSegments + ksT$length <- NULL # In case it's been added + ksO <- arg$params$knownSegments + ksO$length <- NULL # In case it's been added + res$params$knownSegments <- rbind(ksT, ksO) + } + } ## for (ii ...) + ## Drop row names, iff they've been added + for (field in fields) rownames(res[[field]]) <- NULL + # Sanity check - ns <- sapply(res[fields], FUN=nrow); - stopifnot(all(ns == ns[1])); + ns <- sapply(res[fields], FUN = nrow) + .stop_if_not(all(ns == ns[1])) - res; -}) # append() + res +}) # c() setMethodS3("extractChromosomes", "PSCBS", function(x, chromosomes, ...) { # To please R CMD check - this <- x; + this <- x # - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - # Validate arguments # - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - # Argument 'chromosomes': - disallow <- c("NaN", "Inf"); - chromosomes <- Arguments$getIntegers(chromosomes, range=c(0,Inf), disallow=disallow); - stopifnot(all(is.element(chromosomes, getChromosomes(this)))); + disallow <- c("NaN", "Inf") + chromosomes <- Arguments$getIntegers(chromosomes, range=c(0,Inf), disallow=disallow) + .stop_if_not(all(is.element(chromosomes, getChromosomes(this)))) # Always extract in order - chromosomes <- unique(chromosomes); - chromosomes <- sort(chromosomes); + chromosomes <- unique(chromosomes) + chromosomes <- sort(chromosomes) # Allocate results - res <- this; + res <- this # - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - # Locus data # - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - chromosome <- NULL; rm(list="chromosome"); # To please R CMD check - res$data <- subset(res$data, chromosome %in% chromosomes); + chromosome <- NULL; rm(list="chromosome") # To please R CMD check + res$data <- subset(res$data, chromosome %in% chromosomes) # - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - # Segmentation data # - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - # Identify rows to subset - rows <- which(is.element(res$output$chromosome, chromosomes)); + rows <- which(is.element(res$output$chromosome, chromosomes)) for (field in c("output", "tcnSegRows", "dhSegRows")) { - res[[field]] <- res[[field]][rows,,drop=FALSE]; + res[[field]] <- res[[field]][rows,,drop=FALSE] } # Identify chromosome offsets - chrStarts <- match(getChromosomes(this), this$data$chromosome); - chrEnds <- c(chrStarts[-1]-1L, nrow(this$data)); - chrLengths <- chrEnds - chrStarts + 1L; - - chrLengthsExcl <- chrLengths; - - keep <- match(chromosomes, getChromosomes(this)); - chrLengthsExcl[keep] <- 0L; - cumChrLengthsExcl <- cumsum(chrLengthsExcl); + chrStarts <- match(getChromosomes(this), this$data$chromosome) + chrEnds <- c(chrStarts[-1]-1L, nrow(this$data)) + chrLengths <- chrEnds - chrStarts + 1L + + chrLengthsExcl <- chrLengths + + keep <- match(chromosomes, getChromosomes(this)) + chrLengthsExcl[keep] <- 0L + cumChrLengthsExcl <- cumsum(chrLengthsExcl) - shifts <- cumChrLengthsExcl[keep]; - stopifnot(all(is.finite(shifts))); + shifts <- cumChrLengthsExcl[keep] + .stop_if_not(all(is.finite(shifts))) # Adjust indices for (cc in seq_along(chromosomes)) { - chromosome <- chromosomes[cc]; - shift <- shifts[cc]; + chromosome <- chromosomes[cc] + shift <- shifts[cc] # Nothing to do? - if (shift == 0) next; + if (shift == 0) next for (field in c("tcnSegRows", "dhSegRows")) { - segRows <- res[[field]]; - rows <- which(res$output$chromosome == chromosome); - segRows[rows,] <- segRows[rows,] - shift; - res[[field]] <- segRows; + segRows <- res[[field]] + rows <- which(res$output$chromosome == chromosome) + segRows[rows,] <- segRows[rows,] - shift + res[[field]] <- segRows } } - res; + res }, protected=TRUE) - - - -############################################################################ -# HISTORY: -# 2012-09-21 -# o ROBUSTNESS: Now append() for CBS and PSCBS drops column 'length' -# from 'knownSegments', iff it exists. -# 2011-10-20 -# o Now append() for PSCBS also appends '...$params$knownSegments'. -# 2011-10-02 -# o Now the CBS class extends the AbstractCBS class. -# o Added print() and as.data.frame() to PSCBS. -# o Added getSegments() to PSCBS. -# o DOCUMENTATION: Added Rdoc for several PSCBS methods. -# o Added a PSCBS constructor with documentation. -# 2010-12-01 -# o Now also extractByChromosomes() and append() for PSCBS recognizes -# fields 'tcnLociToExclude' and 'dhLociToExclude'. -# o BUG FIX: extractByChromosome() for PSCBS would call it self instead -# of extractByChromosomes(). -# 2010-11-26 -# o Added extractByChromosomes() for PSCBS. -# 2010-09-26 -# o getChromosomes() no longer returns NA divers. -# 2010-09-24 -# o Added append() and more for PSCBS objects. -############################################################################ diff -Nru r-cran-pscbs-0.63.0/R/randomSeed.R r-cran-pscbs-0.64.0/R/randomSeed.R --- r-cran-pscbs-0.63.0/R/randomSeed.R 2017-06-28 14:32:39.000000000 +0000 +++ r-cran-pscbs-0.64.0/R/randomSeed.R 2018-08-12 21:30:44.000000000 +0000 @@ -96,7 +96,7 @@ function(action=c("set", "advance", "reset", "get"), seed=NULL, kind=NULL, n=1L, backup=TRUE) { action <- match.arg(action) n <- as.integer(n) - stopifnot(n >= 1) + .stop_if_not(n >= 1) ## Record existing RNG kind (only once) if (is.null(oldKind)) oldKind <<- RNGkind()[1L] diff -Nru r-cran-pscbs-0.63.0/R/segmentByCBS.R r-cran-pscbs-0.64.0/R/segmentByCBS.R --- r-cran-pscbs-0.63.0/R/segmentByCBS.R 2017-06-28 14:32:39.000000000 +0000 +++ r-cran-pscbs-0.64.0/R/segmentByCBS.R 2018-08-12 21:30:44.000000000 +0000 @@ -115,7 +115,7 @@ # @keyword IO #*/########################################################################### setMethodS3("segmentByCBS", "default", function(y, chromosome=0L, x=NULL, index=seq_along(y), w=NULL, undo=0, avg=c("mean", "median"), ..., joinSegments=TRUE, knownSegments=NULL, seed=NULL, verbose=FALSE) { - R_SANITY_CHECK <- TRUE; + R_SANITY_CHECK <- TRUE # - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - # Local functions @@ -125,21 +125,21 @@ getbdry2 <- function(eta, nperm, alpha, tol=0.01, verbose=FALSE) { # Explictly setup cache root here, since it's only done by 'R.cache' # if that package is attached. Here we only load it. /HB 2013-09-27 - .setupCacheRootPath(); + .setupCacheRootPath() key <- list(method="segmentByCBS", eta=eta, nperm=as.integer(nperm), alpha=alpha, tol=tol, - version="0.16.1"); - dirs <- c("PSCBS", "segmentByCBS", "sbdry"); - bdry <- loadCache(key=key, dirs=dirs); - if (!is.null(bdry)) return(bdry); + version="0.16.1") + dirs <- c("PSCBS", "segmentByCBS", "sbdry") + bdry <- loadCache(key=key, dirs=dirs) + if (!is.null(bdry)) return(bdry) - max.ones <- floor(nperm * alpha) + 1L; - bdry <- getbdry(eta=eta, nperm=nperm, max.ones=max.ones, tol=tol); + max.ones <- floor(nperm * alpha) + 1L + bdry <- getbdry(eta=eta, nperm=nperm, max.ones=max.ones, tol=tol) - saveCache(bdry, key=key, dirs=dirs); + saveCache(bdry, key=key, dirs=dirs) - bdry; + bdry } # getbdry2() @@ -147,148 +147,148 @@ # Validate arguments # - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - # Argument 'y': - disallow <- c("Inf"); - y <- Arguments$getDoubles(y, disallow=disallow); - nbrOfLoci <- length(y); + disallow <- c("Inf") + y <- Arguments$getDoubles(y, disallow=disallow) + nbrOfLoci <- length(y) - length2 <- rep(nbrOfLoci, times=2); + length2 <- rep(nbrOfLoci, times=2) # Argument 'chromosome': if (is.null(chromosome)) { - chromosome <- 0L; + chromosome <- 0L } else { - disallow <- c("Inf"); - chromosome <- Arguments$getIntegers(chromosome, range=c(0,Inf), disallow=disallow); + disallow <- c("Inf") + chromosome <- Arguments$getIntegers(chromosome, range=c(0,Inf), disallow=disallow) if (length(chromosome) > 1) { - chromosome <- Arguments$getIntegers(chromosome, length=length2, disallow=disallow); + chromosome <- Arguments$getIntegers(chromosome, length=length2, disallow=disallow) ## # If 'chromosome' is a vector of length J, then it must contain ## # a unique chromosome. - ## chromosomes <- sort(unique(chromosome)); + ## chromosomes <- sort(unique(chromosome)) ## if (length(chromosomes) > 1) { - ## throw("Argument 'chromosome' specifies more than one unique chromosome: ", paste(seqToHumanReadable(chromosomes), collapse=", ")); + ## throw("Argument 'chromosome' specifies more than one unique chromosome: ", paste(seqToHumanReadable(chromosomes), collapse=", ")) ## } - ## chromosome <- chromosomes; + ## chromosome <- chromosomes } } # For future usage - chrom <- rep(chromosome, length.out=nbrOfLoci); + chrom <- rep(chromosome, length.out=nbrOfLoci) # Argument 'x': if (is.null(x)) { - x <- seq_len(nbrOfLoci); + x <- seq_len(nbrOfLoci) } else { - disallow <- c("Inf"); - x <- Arguments$getDoubles(x, length=length2, disallow=disallow); + disallow <- c("Inf") + x <- Arguments$getDoubles(x, length=length2, disallow=disallow) } # Argument 'index': if (is.null(index)) { - index <- seq_along(y); + index <- seq_along(y) } else { - index <- Arguments$getIndices(index); + index <- Arguments$getIndices(index) } # Argument 'w': - hasWeights <- !is.null(w); + hasWeights <- !is.null(w) if (hasWeights) { - disallow <- c("NA", "NaN", "Inf"); - w <- Arguments$getDoubles(w, range=c(0,1), length=length2, disallow=disallow); + disallow <- c("NA", "NaN", "Inf") + w <- Arguments$getDoubles(w, range=c(0,1), length=length2, disallow=disallow) } # Argument 'undo': - undo <- Arguments$getDouble(undo, range=c(0,Inf)); + undo <- Arguments$getDouble(undo, range=c(0,Inf)) # Argument 'avg': avg <- match.arg(avg) # Argument 'cpFlavor': - joinSegments <- Arguments$getLogical(joinSegments); + joinSegments <- Arguments$getLogical(joinSegments) # Argument 'knownSegments': if (is.null(knownSegments)) { - knownSegments <- data.frame(chromosome=integer(0), start=integer(0), end=integer(0)); + knownSegments <- data.frame(chromosome=integer(0), start=integer(0), end=integer(0)) } else { # if (!joinSegments) { -# throw("Argument 'knownSegments' should only be specified if argument 'joinSegments' is TRUE."); +# throw("Argument 'knownSegments' should only be specified if argument 'joinSegments' is TRUE.") # } } if (!is.data.frame(knownSegments)) { - throw("Argument 'knownSegments' is not a data.frame: ", class(knownSegments)[1]); + throw("Argument 'knownSegments' is not a data.frame: ", class(knownSegments)[1]) } if (!all(is.element(c("chromosome", "start", "end"), colnames(knownSegments)))) { - throw("Argument 'knownSegments' does not have the required column names: ", hpaste(colnames(knownSegments))); + throw("Argument 'knownSegments' does not have the required column names: ", hpaste(colnames(knownSegments))) } # Detailed validation of 'knownSegments'. for (chr in sort(unique(knownSegments$chromosome))) { - dd <- subset(knownSegments, chromosome == chr); + dd <- subset(knownSegments, chromosome == chr) # Order segments by 'start'. - o <- order(dd$start); - dd <- dd[o,]; + o <- order(dd$start) + dd <- dd[o,] # Known segments must not share 'start' or 'end' loci for (field in c("start", "end")) { - xs <- dd[[field]]; - xs <- xs[!is.na(xs)]; + xs <- dd[[field]] + xs <- xs[!is.na(xs)] if (anyDuplicated(xs) > 0) { - print(knownSegments); - throw(sprintf("Detected segments on chromosome %s with non-unique '%s' positions in argument 'knownSegments'", chr, field)); + print(knownSegments) + throw(sprintf("Detected segments on chromosome %s with non-unique '%s' positions in argument 'knownSegments'", chr, field)) } } # for (field ...) # Known segments must not overlap if (!all(dd$start[-1] >= dd$end[-nrow(dd)], na.rm=TRUE)) { - print(knownSegments); - throw("Detected overlapping segments on chromosome ", chr, " in argument 'knownSegments'."); + print(knownSegments) + throw("Detected overlapping segments on chromosome ", chr, " in argument 'knownSegments'.") } } # Argument 'seed': if (!is.null(seed)) { - seed <- Arguments$getIntegers(seed); + seed <- Arguments$getIntegers(seed) } # Argument 'verbose': - verbose <- Arguments$getVerbose(verbose); + verbose <- Arguments$getVerbose(verbose) if (verbose) { - pushState(verbose); - on.exit(popState(verbose)); + pushState(verbose) + on.exit(popState(verbose)) } - verbose && enter(verbose, "Segmenting by CBS"); + verbose && enter(verbose, "Segmenting by CBS") # - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - # Setup data # - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - verbose && enter(verbose, "Setup up data", level=-10); - data <- data.frame(chrom=chrom, x=x, y=y, index=index); + verbose && enter(verbose, "Setup up data", level=-10) + data <- data.frame(chrom=chrom, x=x, y=y, index=index) if (hasWeights) { - verbose && cat(verbose, "Adding locus-specific weights", level=-10); - data$w <- w; + verbose && cat(verbose, "Adding locus-specific weights", level=-10) + data$w <- w } - verbose && str(verbose, data, level=-10); + verbose && str(verbose, data, level=-10) # Not needed anymore - chrom <- x <- index <- y <- w <- NULL; - verbose && exit(verbose); + chrom <- x <- index <- y <- w <- NULL + verbose && exit(verbose) # - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - # Drop data points without known genomic positions, because that # is what DNAcopy::CNA() will do otherwise. # - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - ok <- (!is.na(data$chrom) & !is.na(data$x)); + ok <- (!is.na(data$chrom) & !is.na(data$x)) if (any(!ok)) { - verbose && enter(verbose, "Dropping loci with unknown locations", level=-10); - verbose && cat(verbose, "Number of loci dropped: ", sum(!ok), level=-10); - data <- data[ok,,drop=FALSE]; - verbose && exit(verbose); + verbose && enter(verbose, "Dropping loci with unknown locations", level=-10) + verbose && cat(verbose, "Number of loci dropped: ", sum(!ok), level=-10) + data <- data[ok,,drop=FALSE] + verbose && exit(verbose) } - ok <- NULL; # Not needed anymore + ok <- NULL # Not needed anymore # - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - @@ -297,25 +297,25 @@ # the sort such that the returned 'data' object is always in # the same order and number of loci as the input data. # - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - verbose && enter(verbose, "Ordering data along genome", level=-50); - o <- order(data$chrom, data$x, decreasing=FALSE, na.last=TRUE); + verbose && enter(verbose, "Ordering data along genome", level=-50) + o <- order(data$chrom, data$x, decreasing=FALSE, na.last=TRUE) # Any change? if (any(o != seq_along(o))) { - data <- data[o,,drop=FALSE]; + data <- data[o,,drop=FALSE] } - o <- NULL; # Not needed anymore - verbose && str(verbose, data, level=-50); - verbose && exit(verbose); + o <- NULL # Not needed anymore + verbose && str(verbose, data, level=-50) + verbose && exit(verbose) # - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - # Multiple chromosomes? # - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - # Identify all chromosomes, excluding missing values - chromosomes <- sort(unique(data$chrom), na.last=NA); - nbrOfChromosomes <- length(chromosomes); + chromosomes <- sort(unique(data$chrom), na.last=NA) + nbrOfChromosomes <- length(chromosomes) if (nbrOfChromosomes > 1) { - verbose && enter(verbose, "Segmenting multiple chromosomes"); - verbose && cat(verbose, "Number of chromosomes: ", nbrOfChromosomes); + verbose && enter(verbose, "Segmenting multiple chromosomes") + verbose && cat(verbose, "Number of chromosomes: ", nbrOfChromosomes) # Generate random seeds? seeds <- NULL @@ -329,27 +329,27 @@ fitList <- listenv() for (kk in seq_len(nbrOfChromosomes)) { - chromosomeKK <- chromosomes[kk]; - chrTag <- sprintf("Chr%02d", chromosomeKK); - verbose && enter(verbose, sprintf("Chromosome #%d ('%s') of %d", kk, chrTag, nbrOfChromosomes)); + chromosomeKK <- chromosomes[kk] + chrTag <- sprintf("Chr%02d", chromosomeKK) + verbose && enter(verbose, sprintf("Chromosome #%d ('%s') of %d", kk, chrTag, nbrOfChromosomes)) seedKK <- seeds[[kk]] # Extract subset of data and parameters for this chromosome - dataKK <- subset(data, chrom == chromosomeKK); - verbose && str(verbose, dataKK, level=-10); + dataKK <- subset(data, chrom == chromosomeKK) + verbose && str(verbose, dataKK, level=-10) chrom <- x <- index <- y <- w <- NULL - fields <- attachLocally(dataKK, fields=c("chrom", "x", "index", "y", "w")); - dataKK <- NULL; # Not needed anymore + fields <- attachLocally(dataKK, fields=c("chrom", "x", "index", "y", "w")) + dataKK <- NULL # Not needed anymore - knownSegmentsKK <- NULL; + knownSegmentsKK <- NULL if (!is.null(knownSegments)) { - knownSegmentsKK <- subset(knownSegments, chromosome == chromosomeKK); + knownSegmentsKK <- subset(knownSegments, chromosome == chromosomeKK) if (nrow(knownSegmentsKK) == 0L) { - knownSegmentsKK <- data.frame(chromosome=chromosomeKK, start=-Inf, end=+Inf); + knownSegmentsKK <- data.frame(chromosome=chromosomeKK, start=-Inf, end=+Inf) } - verbose && cat(verbose, "Known segments:", level=-5); - verbose && print(verbose, knownSegmentsKK, level=-5); + verbose && cat(verbose, "Known segments:", level=-5) + verbose && print(verbose, knownSegmentsKK, level=-5) } fitList[[chrTag]] %<-% { @@ -363,88 +363,89 @@ knownSegments=knownSegmentsKK, ..., seed=seedKK, - verbose=verbose); + verbose=verbose) # Sanity checks if (R_SANITY_CHECK) { if (nrow(knownSegmentsKK) == 0) { # Since all missing data have been dropped... - stopifnot(nrow(fit$data) == length(y)); + .stop_if_not(nrow(fit$data) == length(y)) # ...and ordered along the genome already. - stopifnot(all.equal(fit$data$y, y)); + .stop_if_not(all.equal(fit$data$y, y)) } # Assert weights were used - stopifnot(!hasWeights || !is.null(fit$data$w)) + .stop_if_not(!hasWeights || !is.null(fit$data$w)) } # if (R_SANITY_CHECK) - verbose && print(verbose, head(as.data.frame(fit)), level=-10); - verbose && print(verbose, tail(as.data.frame(fit)), level=-10); + verbose && print(verbose, head(as.data.frame(fit)), level=-10) + verbose && print(verbose, tail(as.data.frame(fit)), level=-10) fit } ## fitList[[chrTag]] <- ... rm(list=fields) # Not needed anymore - verbose && exit(verbose); + verbose && exit(verbose) } # for (kk ...) - verbose && enter(verbose, "Merging (independently) segmented chromosome", level=-50); + verbose && enter(verbose, "Merging (independently) segmented chromosome", level=-50) fitList <- as.list(fitList) - fit <- Reduce(append, fitList); + ## former Reduce() w/ append(..., addSplit = TRUE) + fit <- do.call(c, args = c(fitList, addSplit = TRUE)) # Not needed anymore - fitList <- NULL; + fitList <- NULL # Update parameters that otherwise may be incorrect - fit$params$seed <- seed; + fit$params$seed <- seed - verbose && str(verbose, fit, level=-10); - verbose && exit(verbose); + verbose && str(verbose, fit, level=-10) + verbose && exit(verbose) - segs <- as.data.frame(fit); + segs <- as.data.frame(fit) if (nrow(segs) < 6) { - verbose && print(verbose, segs, level=-10); + verbose && print(verbose, segs, level=-10) } else { - verbose && print(verbose, head(segs), level=-10); - verbose && print(verbose, tail(segs), level=-10); + verbose && print(verbose, head(segs), level=-10) + verbose && print(verbose, tail(segs), level=-10) } - verbose && exit(verbose); + verbose && exit(verbose) # - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - # Return results # - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - return(fit); + return(fit) } # if (nbrOfChromosomes > 1) - verbose && cat(verbose, "Chromosome: ", data$chrom[1L]); + verbose && cat(verbose, "Chromosome: ", data$chrom[1L]) # - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - # Subset 'knownSegments' # - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - verbose && enter(verbose, "Keeping only current chromosome for 'knownSegments'", level=-10); + verbose && enter(verbose, "Keeping only current chromosome for 'knownSegments'", level=-10) # Assume no missing values - currChromosome <- data$chrom[1]; - verbose && cat(verbose, "Chromosome: ", currChromosome, level=-10); + currChromosome <- data$chrom[1] + verbose && cat(verbose, "Chromosome: ", currChromosome, level=-10) - knownSegments <- subset(knownSegments, chromosome == currChromosome); + knownSegments <- subset(knownSegments, chromosome == currChromosome) if (nrow(knownSegments) == 0L) { - knownSegments <- data.frame(chromosome=currChromosome, start=-Inf, end=+Inf); + knownSegments <- data.frame(chromosome=currChromosome, start=-Inf, end=+Inf) } - nbrOfSegments <- nrow(knownSegments); + nbrOfSegments <- nrow(knownSegments) - verbose && cat(verbose, "Known segments for this chromosome:", level=-10); - verbose && print(verbose, knownSegments, level=-10); + verbose && cat(verbose, "Known segments for this chromosome:", level=-10) + verbose && print(verbose, knownSegments, level=-10) - verbose && exit(verbose); + verbose && exit(verbose) # Sanity checks if (R_SANITY_CHECK) { # Here 'knownSegments' should specify at most a single chromosome - uChromosomes <- sort(unique(knownSegments$chromosome)); + uChromosomes <- sort(unique(knownSegments$chromosome)) if (length(uChromosomes) > 1) { - throw("INTERNAL ERROR: Argument 'knownSegments' specifies more than one chromosome: ", hpaste(uChromosomes)); + throw("INTERNAL ERROR: Argument 'knownSegments' specifies more than one chromosome: ", hpaste(uChromosomes)) } } # if (R_SANITY_CHECK) @@ -454,8 +455,8 @@ # - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - # Sanity check of limitation /HB 2011-10-19 if (nbrOfSegments > 1) { - verbose && enter(verbose, "Segmenting multiple segments on current chromosome", level=-5); - verbose && cat(verbose, "Number of segments: ", nbrOfSegments, level=-5); + verbose && enter(verbose, "Segmenting multiple segments on current chromosome", level=-5) + verbose && cat(verbose, "Number of segments: ", nbrOfSegments, level=-5) # Create a splitter-only CBS object dataS <- data.frame(y=c(0,0), chromosome=c(1,2), x=c(0,0)) @@ -463,12 +464,12 @@ splitter <- segmentByCBS(dataS) dataS <- NULL suppressWarnings({ - splitter <- extractSegment(splitter, 2); + splitter <- extractSegment(splitter, 2) # Sanity check if (R_SANITY_CHECK) { - stopifnot(nbrOfSegments(splitter, splitters=TRUE) == 1); + .stop_if_not(nbrOfSegments(splitter, splitters=TRUE) == 1) } # if (R_SANITY_CHECK) - }); + }) # Generate random seeds? @@ -483,15 +484,15 @@ fitList <- listenv() for (jj in seq_len(nbrOfSegments)) { - seg <- knownSegments[jj,]; - chromosomeJJ <- seg$chromosome; - xStart <- seg$start; - xEnd <- seg$end; - segTag <- sprintf("chr%s:(%s,%s)", chromosomeJJ, xStart, xEnd); - verbose && enter(verbose, sprintf("Segment #%d ('%s') of %d", jj, segTag, nbrOfSegments), level=-10); + seg <- knownSegments[jj,] + chromosomeJJ <- seg$chromosome + xStart <- seg$start + xEnd <- seg$end + segTag <- sprintf("chr%s:(%s,%s)", chromosomeJJ, xStart, xEnd) + verbose && enter(verbose, sprintf("Segment #%d ('%s') of %d", jj, segTag, nbrOfSegments), level=-10) ## Nothing to do? - isSplitter <- (is.na(xStart) && is.na(xEnd)); + isSplitter <- (is.na(xStart) && is.na(xEnd)) if (isSplitter) { fit <- splitter verbose && cat(verbose, "Nothing to segment. Inserting an explicit splitter.", level=-10) @@ -540,11 +541,11 @@ # Sanity checks if (R_SANITY_CHECK) { - stopifnot(nrow(fit$data) == nbrOfLoci) - stopifnot(all.equal(fit$data$y, y)) + .stop_if_not(nrow(fit$data) == nbrOfLoci) + .stop_if_not(all.equal(fit$data$y, y)) # Assert weights were used - stopifnot(!hasWeights || !is.null(fit$data$w)) + .stop_if_not(!hasWeights || !is.null(fit$data$w)) } # if (R_SANITY_CHECK) segs <- as.data.frame(fit) @@ -557,7 +558,7 @@ # Sanity check if (R_SANITY_CHECK) { - stopifnot(TRUE && nbrOfSegments(fit, splitters=TRUE) > 0) + .stop_if_not(TRUE && nbrOfSegments(fit, splitters=TRUE) > 0) } # if (R_SANITY_CHECK) fit @@ -565,123 +566,123 @@ rm(list=fields) # Not needed anymore - verbose && exit(verbose); + verbose && exit(verbose) } # for (jj ...) - verbose && enter(verbose, "Merging (independently) segmented known segments", level=-10); - verbose && cat(verbose, "Number of segments: ", length(fitList), level=-10); + verbose && enter(verbose, "Merging (independently) segmented known segments", level=-10) + verbose && cat(verbose, "Number of segments: ", length(fitList), level=-10) fitList <- as.list(fitList) verbose && str(verbose, fitList, level=-50) - appendT <- function(...) append(..., addSplit=FALSE); - fit <- Reduce(appendT, fitList); + ## former Reduce() w/ append(..., addSplit = FALSE) + fit <- do.call(c, args = c(fitList, addSplit = FALSE)) # Not needed anymore - fitList <- NULL; + fitList <- NULL # Update parameters that otherwise may be incorrect - fit$params$seed <- seed; + fit$params$seed <- seed - verbose && str(verbose, fit, level=-10); - verbose && exit(verbose); + verbose && str(verbose, fit, level=-10) + verbose && exit(verbose) - segs <- getSegments(fit); + segs <- getSegments(fit) if (nrow(segs) > 6) { - verbose && print(verbose, head(segs), level=-10); - verbose && print(verbose, tail(segs), level=-10); + verbose && print(verbose, head(segs), level=-10) + verbose && print(verbose, tail(segs), level=-10) } else { - verbose && print(verbose, segs, level=-10); + verbose && print(verbose, segs, level=-10) } # Sanity checks if (R_SANITY_CHECK) { - segs <- getSegments(fit); - stopifnot(all(segs$start[-1] >= segs$end[-nrow(segs)], na.rm=TRUE)); - stopifnot(all(diff(segs$start) >= 0, na.rm=TRUE)); ## FIXME: > 0 - stopifnot(all(diff(segs$end) >= 0, na.rm=TRUE)); ## FIXME: > 0 + segs <- getSegments(fit) + .stop_if_not(all(segs$start[-1] >= segs$end[-nrow(segs)], na.rm=TRUE)) + .stop_if_not(all(diff(segs$start) >= 0, na.rm=TRUE)) ## FIXME: > 0 + .stop_if_not(all(diff(segs$end) >= 0, na.rm=TRUE)) ## FIXME: > 0 # if (nrow(fit$data) != length(y)) { - # print(c(nrow(fit$data), nrow(data))); + # print(c(nrow(fit$data), nrow(data))) # } - # stopifnot(nrow(fit$data) == nrow(data)); - # stopifnot(all(fit$data$chromosome == data$chromosome)); - # stopifnot(all(fit$data$x == data$x)); - # stopifnot(all(fit$data$index == data$index)); - # stopifnot(all.equal(fit$data$y, data$y)); + # .stop_if_not(nrow(fit$data) == nrow(data)) + # .stop_if_not(all(fit$data$chromosome == data$chromosome)) + # .stop_if_not(all(fit$data$x == data$x)) + # .stop_if_not(all(fit$data$index == data$index)) + # .stop_if_not(all.equal(fit$data$y, data$y)) } # if (R_SANITY_CHECK) - verbose && exit(verbose); + verbose && exit(verbose) # - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - # Return results # - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - return(fit); + return(fit) } # if (nbrOfSegments > 1) - nbrOfSegments <- nrow(knownSegments); + nbrOfSegments <- nrow(knownSegments) # Sanity check if (R_SANITY_CHECK) { - stopifnot(nbrOfSegments <= 1); + .stop_if_not(nbrOfSegments <= 1) } # if (R_SANITY_CHECK) # - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - # Specific segment? # - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - if (nbrOfSegments > 0) { - knownSegments <- subset(knownSegments, chromosome == chromosome); - nbrOfSegments <- nrow(knownSegments); + knownSegments <- subset(knownSegments, chromosome == chromosome) + nbrOfSegments <- nrow(knownSegments) # Sanity check if (R_SANITY_CHECK) { - stopifnot(nbrOfSegments <= 1); + .stop_if_not(nbrOfSegments <= 1) } # if (R_SANITY_CHECK) } if (nbrOfSegments == 1) { - seg <- knownSegments[1,]; - chromosomeJJ <- seg$chromosome; - xStart <- seg$start; - xEnd <- seg$end; - segTag <- sprintf("chr%s:(%s,%s)", chromosomeJJ, xStart, xEnd); - verbose && printf(verbose, "Extracting segment '%s'", segTag, level=-50); + seg <- knownSegments[1,] + chromosomeJJ <- seg$chromosome + xStart <- seg$start + xEnd <- seg$end + segTag <- sprintf("chr%s:(%s,%s)", chromosomeJJ, xStart, xEnd) + verbose && printf(verbose, "Extracting segment '%s'", segTag, level=-50) # Extract subset of data and parameters for this segment - data <- subset(data, chrom == chromosomeJJ & xStart <= x & x <= xEnd); - verbose && str(verbose, data, level=-50); + data <- subset(data, chrom == chromosomeJJ & xStart <= x & x <= xEnd) + verbose && str(verbose, data, level=-50) } # - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - # Retrieving segmentation function # - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - verbose && enter(verbose, "Retrieving the fit function", level=-50); + verbose && enter(verbose, "Retrieving the fit function", level=-50) # We need to attach the 'DNAcopy' package pkgName <- "DNAcopy" - use(pkgName); - pkg <- packageDescription(pkgName); - pkgVer <- pkg$Version; - pkgDetails <- sprintf("%s v%s", pkgName, pkgVer); - - methodName <- "segment"; - verbose && cat(verbose, "Method: ", methodName, level=-50); - verbose && cat(verbose, "Package: ", pkgDetails, level=-50); + use(pkgName) + pkg <- packageDescription(pkgName) + pkgVer <- pkg$Version + pkgDetails <- sprintf("%s v%s", pkgName, pkgVer) + + methodName <- "segment" + verbose && cat(verbose, "Method: ", methodName, level=-50) + verbose && cat(verbose, "Package: ", pkgDetails, level=-50) # Get the fit function for the segmentation method -# fitFcn <- getExportedValue(pkgName, methodName); - fitFcn <- getFromNamespace(methodName, pkgName); - verbose && str(verbose, "Function: ", fitFcn, level=-50); - formals <- formals(fitFcn); - verbose && cat(verbose, "Formals:", level=-50); - verbose && str(verbose, formals, level=-50); - verbose && exit(verbose); +# fitFcn <- getExportedValue(pkgName, methodName) + fitFcn <- getFromNamespace(methodName, pkgName) + verbose && str(verbose, "Function: ", fitFcn, level=-50) + formals <- formals(fitFcn) + verbose && cat(verbose, "Formals:", level=-50) + verbose && str(verbose, formals, level=-50) + verbose && exit(verbose) # - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - # Setting up arguments to pass to segmentation function # - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - verbose && enter(verbose, "Setting up method arguments", level=-50); + verbose && enter(verbose, "Setting up method arguments", level=-50) - verbose && enter(verbose, "Setting up ", pkgName, " data structure", level=-50); + verbose && enter(verbose, "Setting up ", pkgName, " data structure", level=-50) - sampleName <- "y"; # This is going to be the name of the data field + sampleName <- "y" # This is going to be the name of the data field # Supress all warnings, in order to avoid warnings by DNAcopy::CNA() # on "array has repeated maploc positions". Ideally we should filter @@ -694,97 +695,97 @@ maploc = data$x, sampleid = sampleName, presorted = TRUE - ); - }); - verbose && str(verbose, cnData, level=-50); - names(cnData)[3] <- sampleName; - verbose && str(verbose, cnData, level=-50); - verbose && exit(verbose); + ) + }) + verbose && str(verbose, cnData, level=-50) + names(cnData)[3] <- sampleName + verbose && str(verbose, cnData, level=-50) + verbose && exit(verbose) # Sanity check if (R_SANITY_CHECK) { # (because all loci with unknown locations have already been dropped) - stopifnot(nrow(cnData) == nrow(data)); + .stop_if_not(nrow(cnData) == nrow(data)) } # if (R_SANITY_CHECK) - userArgs <- list(...); + userArgs <- list(...) if (length(userArgs) > 0) { - verbose && cat(verbose, "User arguments:", level=-50); - verbose && str(verbose, userArgs, level=-50); + verbose && cat(verbose, "User arguments:", level=-50) + verbose && str(verbose, userArgs, level=-50) } # Check if 'sbdry' can/should be precalculated. This uses memoization # so that next time you segment with same 'nperm', 'alpha' and 'eta' # parameters, there will be much less startup overhead. if (length(userArgs) > 0 && !is.element("sbdry", names(userArgs))) { - keys <- c("nperm", "alpha", "eta"); - keep <- is.element(keys, names(userArgs)); + keys <- c("nperm", "alpha", "eta") + keep <- is.element(keys, names(userArgs)) if (any(keep)) { - verbose && enter(verbose, "Precalculating argument 'sbdry' (with memoization)", level=-50); + verbose && enter(verbose, "Precalculating argument 'sbdry' (with memoization)", level=-50) # Precalculate boundaries - argsT <- formals[keys]; - keys <- keys[keep]; - argsT[keys] <- userArgs[keys]; - argsT$verbose <- less(verbose, 5); - sbdry <- do.call(getbdry2, args=argsT); - userArgs$sbdry <- sbdry; - verbose && exit(verbose); + argsT <- formals[keys] + keys <- keys[keep] + argsT[keys] <- userArgs[keys] + argsT$verbose <- less(verbose, 5) + sbdry <- do.call(getbdry2, args=argsT) + userArgs$sbdry <- sbdry + verbose && exit(verbose) } } - params <- list(); + params <- list() if (hasWeights) { - params$weights <- data$w; + params$weights <- data$w } if (undo > 0) { - params$undo.splits <- "sdundo"; - params$undo.SD <- undo; + params$undo.splits <- "sdundo" + params$undo.SD <- undo } - verbose && cat(verbose, "Segmentation parameters:", level=-50); - verbose && str(verbose, params, level=-50); + verbose && cat(verbose, "Segmentation parameters:", level=-50) + verbose && str(verbose, params, level=-50) # Assign/overwrite by user arguments if (length(userArgs) > 0) { for (ff in names(userArgs)) { - params[[ff]] <- userArgs[[ff]]; + params[[ff]] <- userArgs[[ff]] } } - verbose && cat(verbose, "Segmentation and user parameters:", level=-50); - verbose && str(verbose, params, level=-50); + verbose && cat(verbose, "Segmentation and user parameters:", level=-50) + verbose && str(verbose, params, level=-50) # Cleaning out unknown parameters - keep <- (names(params) %in% names(formals)); - params <- params[keep]; + keep <- (names(params) %in% names(formals)) + params <- params[keep] - args <- c(list(cnData), params, verbose=as.logical(verbose)); - verbose && cat(verbose, "Final arguments:", level=-50); - verbose && str(verbose, args, level=-50); + args <- c(list(cnData), params, verbose=as.logical(verbose)) + verbose && cat(verbose, "Final arguments:", level=-50) + verbose && str(verbose, args, level=-50) - verbose && exit(verbose); + verbose && exit(verbose) # - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - # Calling segmentation function # - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - verbose && enter(verbose, sprintf("Calling %s() of %s", methodName, pkgName), level=-50); + verbose && enter(verbose, sprintf("Calling %s() of %s", methodName, pkgName), level=-50) # There are a few cases where we can/need to do a dummy segmentation # based on a single data points: # (a) WORKAROUND for the case when there are no data points. # (b) SPEEDUP: When undo=+Inf we don't really have to segment. - nbrOfNonMissingLoci <- sum(!is.na(cnData$y)); + nbrOfNonMissingLoci <- sum(!is.na(cnData$y)) if (nbrOfNonMissingLoci == 0) { - args[[1]] <- CNA(genomdat=0, chrom=0, maploc=0); + args[[1]] <- CNA(genomdat=0, chrom=0, maploc=0) if (hasWeights) args$weights <- 1.0 } else if (undo == +Inf) { - args[[1]] <- CNA(genomdat=0, chrom=0, maploc=0); + args[[1]] <- CNA(genomdat=0, chrom=0, maploc=0) if (hasWeights) args$weights <- 1.0 - verbose && cat(verbose, "Skipping identification of new change points (undo=+Inf)", level=-50); + verbose && cat(verbose, "Skipping identification of new change points (undo=+Inf)", level=-50) } @@ -801,170 +802,170 @@ # Does not work, because some internal function of the fit function # may only be accessible from within the namespace # How to do this for DNAcopy::segment()? /HB -## fit <- do.call(fitFcn, args); +## fit <- do.call(fitFcn, args) # This works, but requires that one loads the package and that the # function is not masked in the search() path. t <- system.time({ - fit <- do.call(methodName, args); - }); + fit <- do.call(methodName, args) + }, gcFirst = FALSE) # Drop the 'call' (because it will be huge due to the do.call() call) - fit$call <- NULL; - }); - attr(fit, "processingTime") <- t; - attr(fit, "pkgDetails") <- pkgDetails; - attr(fit, "randomSeed") <- seed; + fit$call <- NULL + }) + attr(fit, "processingTime") <- t + attr(fit, "pkgDetails") <- pkgDetails + attr(fit, "randomSeed") <- seed # WORKAROUND for the case when there are no data points. if (nbrOfNonMissingLoci == 0) { # Drop dummy data point... - fit$data <- cnData; ## fit$data[-1,,drop=FALSE]; + fit$data <- cnData ## fit$data[-1,,drop=FALSE] # ...dummy region found - output <- fit$output; - segRows <- fit$segRows; + output <- fit$output + segRows <- fit$segRows # Sanity check if (R_SANITY_CHECK) { - stopifnot(nrow(output) == 1); + .stop_if_not(nrow(output) == 1) } # if (R_SANITY_CHECK) # Was a region specified? if (nbrOfSegments == 1) { - seg <- knownSegments[1,]; - output$ID <- sampleName; - output$chrom <- seg$chromosome; + seg <- knownSegments[1,] + output$ID <- sampleName + output$chrom <- seg$chromosome if (is.finite(seg$start)) { - output$loc.start <- seg$start; + output$loc.start <- seg$start } if (is.finite(seg$end)) { - output$loc.end <- seg$end; + output$loc.end <- seg$end } - output$num.mark <- 0L; - output$seg.mean <- NA_real_; - segRows[1,] <- NA_integer_; + output$num.mark <- 0L + output$seg.mean <- NA_real_ + segRows[1,] <- NA_integer_ } else { - output <- output[-1,,drop=FALSE]; - segRows <- segRows[-1,,drop=FALSE]; + output <- output[-1,,drop=FALSE] + segRows <- segRows[-1,,drop=FALSE] } - fit$output <- output; - fit$segRows <- segRows; + fit$output <- output + fit$segRows <- segRows } else if (undo == +Inf) { # Drop dummy data point... - fit$data <- cnData; ## fit$data[-1,,drop=FALSE]; + fit$data <- cnData ## fit$data[-1,,drop=FALSE] # ...dummy region found - output <- fit$output; - segRows <- fit$segRows; + output <- fit$output + segRows <- fit$segRows # Sanity check if (R_SANITY_CHECK) { - stopifnot(nrow(output) == 1); + .stop_if_not(nrow(output) == 1) } # if (R_SANITY_CHECK) # Was a region specified? if (nbrOfSegments == 1) { - seg <- knownSegments[1,]; - output$ID <- sampleName; - output$chrom <- seg$chromosome; + seg <- knownSegments[1,] + output$ID <- sampleName + output$chrom <- seg$chromosome if (is.finite(seg$start)) { - output$loc.start <- seg$start; + output$loc.start <- seg$start } else { - output$loc.start <- min(cnData$maploc, na.rm=TRUE); + output$loc.start <- min(cnData$maploc, na.rm=TRUE) } if (is.finite(seg$end)) { - output$loc.end <- seg$end; + output$loc.end <- seg$end } else { - output$loc.end <- max(cnData$maploc, na.rm=TRUE); + output$loc.end <- max(cnData$maploc, na.rm=TRUE) } } - output$num.mark <- nrow(fit$data); - output$seg.mean <- mean(fit$data$y, na.rm=TRUE); - segRows$endRow <- nrow(fit$data); + output$num.mark <- nrow(fit$data) + output$seg.mean <- mean(fit$data$y, na.rm=TRUE) + segRows$endRow <- nrow(fit$data) - fit$output <- output; - fit$segRows <- segRows; + fit$output <- output + fit$segRows <- segRows } # if (undo == +Inf) - verbose && cat(verbose, "Captured output that was sent to stdout:", level=-50); - stdout <- paste(stdout, collapse="\n"); - verbose && cat(verbose, stdout, level=-50); + verbose && cat(verbose, "Captured output that was sent to stdout:", level=-50) + stdout <- paste(stdout, collapse="\n") + verbose && cat(verbose, stdout, level=-50) - verbose && cat(verbose, "Fitting time (in seconds):", level=-50); - verbose && print(verbose, t, level=-50); + verbose && cat(verbose, "Fitting time (in seconds):", level=-50) + verbose && print(verbose, t, level=-50) - verbose && cat(verbose, "Fitting time per 1000 loci (in seconds):", level=-50); - verbose && print(verbose, 1000*t/nbrOfLoci, level=-50); + verbose && cat(verbose, "Fitting time per 1000 loci (in seconds):", level=-50) + verbose && print(verbose, 1000*t/nbrOfLoci, level=-50) # - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - # Restructure # - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - verbose && enter(verbose, "Restructuring results", level=-50); + verbose && enter(verbose, "Restructuring results", level=-50) # Coerce - fit$output$num.mark <- as.integer(fit$output$num.mark); + fit$output$num.mark <- as.integer(fit$output$num.mark) # Coerce 'chrom' to a plain integer - fit$data$chrom <- unclass(fit$data$chrom); + fit$data$chrom <- unclass(fit$data$chrom) # Store genomewide index - fit$data$index <- data$index; + fit$data$index <- data$index # Store weights - fit$data$w <- data$w; + fit$data$w <- data$w # Not needed anymore - data <- NULL; + data <- NULL - verbose && exit(verbose); + verbose && exit(verbose) # Store also interesting parameters to DNAcopy::segment() - keys <- setdiff(names(formals), c("x", "weights", "sbdry", "verbose")); - keys <- c(keys, "undo", "seed"); - keep <- is.element(names(params), keys); - keep <- names(params)[keep]; - params <- params[keep]; - params$undo <- undo; - params$joinSegments <- joinSegments; - params$knownSegments <- knownSegments; - params$seed <- seed; - fit$params <- params; + keys <- setdiff(names(formals), c("x", "weights", "sbdry", "verbose")) + keys <- c(keys, "undo", "seed") + keep <- is.element(names(params), keys) + keep <- names(params)[keep] + params <- params[keep] + params$undo <- undo + params$joinSegments <- joinSegments + params$knownSegments <- knownSegments + params$seed <- seed + fit$params <- params -# class(fit) <- c("CBS", class(fit)); - class(fit) <- c("CBS", "AbstractCBS"); +# class(fit) <- c("CBS", class(fit)) + class(fit) <- c("CBS", "AbstractCBS") # Sanity checks if (R_SANITY_CHECK) { - segRows <- fit$segRows; - stopifnot(all(segRows[,1] <= segRows[,2], na.rm=TRUE)); - stopifnot(all(segRows[-nrow(segRows),2] < segRows[-1,1], na.rm=TRUE)); + segRows <- fit$segRows + .stop_if_not(all(segRows[,1] <= segRows[,2], na.rm=TRUE)) + .stop_if_not(all(segRows[-nrow(segRows),2] < segRows[-1,1], na.rm=TRUE)) } # if (R_SANITY_CHECK) # - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - # Renaming column names # - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - data <- getLocusData(fit); - names <- colnames(data); - names <- gsub("chrom", "chromosome", names, fixed=TRUE); - names <- gsub("maploc", "x", names, fixed=TRUE); - colnames(data) <- names; + data <- getLocusData(fit) + names <- colnames(data) + names <- gsub("chrom", "chromosome", names, fixed=TRUE) + names <- gsub("maploc", "x", names, fixed=TRUE) + colnames(data) <- names # Drop 'CNA' class and DNAcopy attributes - class(data) <- c("data.frame"); - attr(data, "data.type") <- NULL; + class(data) <- c("data.frame") + attr(data, "data.type") <- NULL - fit$data <- data; + fit$data <- data - segs <- fit$output; + segs <- fit$output - names <- colnames(segs); - names <- gsub("ID", "sampleName", names, fixed=TRUE); - names <- gsub("seg.mean", "mean", names, fixed=TRUE); - names <- gsub("chrom", "chromosome", names, fixed=TRUE); - names <- gsub("num.mark", "nbrOfLoci", names, fixed=TRUE); - names <- gsub("loc.", "", names, fixed=TRUE); # loc.start, loc.end - colnames(segs) <- names; - fit$output <- segs; + names <- colnames(segs) + names <- gsub("ID", "sampleName", names, fixed=TRUE) + names <- gsub("seg.mean", "mean", names, fixed=TRUE) + names <- gsub("chrom", "chromosome", names, fixed=TRUE) + names <- gsub("num.mark", "nbrOfLoci", names, fixed=TRUE) + names <- gsub("loc.", "", names, fixed=TRUE) # loc.start, loc.end + colnames(segs) <- names + fit$output <- segs # - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - @@ -972,22 +973,22 @@ # - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - if (joinSegments) { if (nbrOfSegments == 1) { - starts <- knownSegments$start; - ends <- knownSegments$end; - if (is.infinite(starts)) starts <- segs$start; - if (is.infinite(ends)) ends <- segs$end; - range <- range(c(starts, ends), na.rm=TRUE); + starts <- knownSegments$start + ends <- knownSegments$end + if (is.infinite(starts)) starts <- segs$start + if (is.infinite(ends)) ends <- segs$end + range <- range(c(starts, ends), na.rm=TRUE) } else { - range <- NULL; + range <- NULL } - fit <- joinSegments(fit, range=range, verbose=less(verbose, 10)); + fit <- joinSegments(fit, range=range, verbose=less(verbose, 10)) # Sanity checks if (R_SANITY_CHECK) { - segRows <- fit$segRows; - stopifnot(all(segRows[,1] <= segRows[,2], na.rm=TRUE)); - stopifnot(all(segRows[-nrow(segRows),2] < segRows[-1,1], na.rm=TRUE)); + segRows <- fit$segRows + .stop_if_not(all(segRows[,1] <= segRows[,2], na.rm=TRUE)) + .stop_if_not(all(segRows[-nrow(segRows),2] < segRows[-1,1], na.rm=TRUE)) } # if (R_SANITY_CHECK) } @@ -1003,158 +1004,34 @@ } - verbose && cat(verbose, "Results object:", level=-10); - verbose && str(verbose, fit, level=-10); + verbose && cat(verbose, "Results object:", level=-10) + verbose && str(verbose, fit, level=-10) - verbose && exit(verbose); + verbose && exit(verbose) - verbose && exit(verbose); + verbose && exit(verbose) - fit; + fit }) # segmentByCBS() setMethodS3("segmentByCBS", "data.frame", function(y, ...) { # To please R CMD check - data <- y; + data <- y - y <- data$y; + y <- data$y if (is.null(y)) { - y <- data$cn; + y <- data$cn if (is.null(y)) { - y <- data$CT; + y <- data$CT } } - segmentByCBS(y=y, chromosome=data$chromosome, x=data$x, index=data$index, w=data$w, ...); + segmentByCBS(y=y, chromosome=data$chromosome, x=data$x, index=data$index, w=data$w, ...) }) setMethodS3("segmentByCBS", "CBS", function(...) { - resegment(...); + resegment(...) }) # segmentByCBS() - - - -############################################################################ -# HISTORY: -# 2013-09-26 -# o CLEANUP: Now segmentByCBS() no longer attaches 'R.cache' in its -# internal getbdry2() function, but only loads its namespace. -# 2012-09-20 -# o BUG FIX: segmentByCBS(... knownSegments) could return segments for -# chromosome 0 even though it did not exist in the input data. -# 2012-09-13 -# o SPEEDUP: Now segmentByCBS(..., undo=+Inf) returns much faster, which -# is possible because there is no need to identify new change points. -# o CONSISTENCY FIX: Changed the behavior of extreme values of argument -# 'undo' to segmentByCBS() such that 'undo=0' (was 'undo=+Inf') now -# means that it will not ask DNAcopy::segment() to undo the segmentation, -# and such that 'undo=+Inf' means that no changepoints will be identified. -# The latter case allows you to effectively skip the segmentation but -# still calculate all the CBS statistics across a set of known segments -# via segmentByCBS(..., undo=+Inf, knownSegments=knownSegments). -# 2012-06-05 -# o Now segmentByCBS() for data frame:s does a better job identifying -# the CN signals. -# 2012-02-22 -# o BUG FIX: segmentByCBS(..., knownSegments=knownSegments) would -# incorrectly throw a sanity-check exception if 'knownSegments' -# contains a segment with 'start' and 'stop' positions being equal. -# 2011-11-17 -# o BUG FIX: Now parameter 'seed' is preserved by segmentByCBS(). -# o Added segmentByCBS() for CBS, which is just a wrapper for resegment(). -# o ROBUSTNESS: Now segmentByCBS() does more validation of 'knownSegments'. -# o ROBUSTNESS: Added more sanity checks for (start,end) of segments -# after merging segments that have been segmented separately due -# to 'knownSegments'. -# o Adjusted segmentByCBS() such that it can handle 'knownSegments' with -# chromosome boundaries given as -Inf and +Inf. -# 2011-11-15 -# o Now more segmentation parameters are stored in the CBS object. -# o SPEEDUP: Now segmentByCBS() will use memoization to retrieve -# so called "sequential boundaries for early stopping", iff any of -# the DNAcopy::segment() arguments 'alpha', 'nperm' and 'eta' are -# specified. See also DNAcopy::getbdry(). -# 2011-10-20 -# o Now the result of segmentByCBS() is guaranteed to include the -# segments given by argument 'knownSegments'. Before empty segments -# would be dropped. -# 2011-10-19 -# o Replaced argument 'knownCPs' with 'knownSegments' for segmentByCBS(). -# o Added support for specifying known change points in segmentByCBS(). -# 2011-10-02 -# o Added segmentByCBS() for data.frame such that the locus-level data -# arguments can also be passed via a data.frame. -# 2011-09-04 -# o ROBUSTNESS: Added drop=FALSE to matrix subsettings. -# 2011-09-03 -# o Now segmentByCBS() always returns a CBS object. To coerce to a -# DNAcopy object (as defined in the DNAcopy class) use as.DNAcopy(). -# o Removed argument 'columnNamesFlavor'. -# 2011-09-02 -# o Forgot to pass on argument 'index' in multi-chromosome processing. -# 2011-09-01 -# o GENERALIZATION: Now segmentByCBS() can process multiple chromosomes. -# o Now the random seed is set at the very beginning of the code, which -# should not make a difference, i.e. it should give identical results. -# 2011-06-14 -# o GENERALIZATION: Added argument 'columnNamesFlavor' to segmentByCBS(). -# o CONVENTION: Changed the column names of returned data frames. -# They now follow the camelCase naming convention and are shorter. -# 2011-05-31 -# o Now explicitly using DNAcopy::nnn() to call DNAcopy functions. -# 2011-04-07 -# o ROBUSTNESS: Added 'segRows' field validation in segmentByCBS(). -# 2010-12-01 -# o Now segmentByCBS() is utilizing 'segRows' from DNAcopy::segment(), -# which makes it possible to drop all code of trying to infer which -# loci belong to which segments. -# o Now the 'data' object returned also contains column 'index'. -# 2010-12-01 -# o Now the genomewide index is always stored in the 'data' field. -# o Added argument 'index' to segmentByCBS(). -# 2010-11-30 -# o Now segmentByCBS() returns a field 'lociToExclude'. -# 2010-11-28 -# o BUG FIX: The algorithm in segmentByCBS() that infers which loci( of -# the ones share the same genomic positions) that should be exclude -# from each segment did not take missing signals into account. -# 2010-11-21 -# o Now segmentByCBS(..., joinSegments=TRUE) utilizes joinSegments(). -# 2010-11-20 -# o Now it is possible to specify the boundaries of the regions to be -# segmented as known change points via argument 'knownCPs'. -# 2010-11-19 -# o Added argument 'joinSegments' to segmentByCBS() in order to specify -# if neighboring segments should be joined or not. -# o Now segmentByCBS() returns an object of class CBS. -# o Now segmentByCBS() allows for unknown genomic positions. -# o Now segmentByCBS() allows for missing signals. -# o Added argument 'preservOrder' to segmentByCBS(). If TRUE, then -# the loci in the returned 'data' object are ordered as the input -# data, otherwise it is ordered along the genome. -# 2010-11-16 -# o Now the 'data' object returned by segmentByCBS() contains field -# 'index' if and only if the loci had to be reorder along the genome. -# 2010-11-02 -# o Added argument 'undo' to segmentByCBS(), which corresponds to -# undo.splits="sdundo" and undo.SD=undo, if undo < Inf. -# 2010-10-25 -# o Now segmentByCBS() also returns element 'lociNotPartOfSegment', -# if there are segments that share end points, which can happen if -# a change point is called in middle of a set of loci that have the -# same genomic positions. In such cases, 'lociNotPartOfSegment' -# specifies which loci are *not* part of which segment. Then by -# identifying the loci that are within a segment by their positions -# and excluding any of the above, one knows exactly which loci -# CBS included in each segment. -# 2010-10-02 -# o Added argument optional 'chromosome'. -# 2010-09-02 -# o ROBUSTNESS: Now segmentByCBS() also works if there are no data points. -# 2010-07-09 -# o Created from segmentByCBS() for RawGenomicSignals in aroma.core. -# The latter will eventually call this method. -############################################################################ diff -Nru r-cran-pscbs-0.63.0/R/segmentByNonPairedPSCBS.R r-cran-pscbs-0.64.0/R/segmentByNonPairedPSCBS.R --- r-cran-pscbs-0.63.0/R/segmentByNonPairedPSCBS.R 2017-06-28 14:32:39.000000000 +0000 +++ r-cran-pscbs-0.64.0/R/segmentByNonPairedPSCBS.R 2018-08-12 21:30:44.000000000 +0000 @@ -99,189 +99,172 @@ # Validate arguments # - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - # Argument 'CT': - disallow <- c("Inf"); - CT <- Arguments$getDoubles(CT, disallow=disallow); - nbrOfLoci <- length(CT); - length2 <- rep(nbrOfLoci, times=2); + disallow <- c("Inf") + CT <- Arguments$getDoubles(CT, disallow=disallow) + nbrOfLoci <- length(CT) + length2 <- rep(nbrOfLoci, times=2) # Argument 'betaT': - betaT <- Arguments$getDoubles(betaT, length=length2, disallow="Inf"); + betaT <- Arguments$getDoubles(betaT, length=length2, disallow="Inf") # Argument 'flavor': - flavor <- match.arg(flavor); - knownFlavors <- eval(formals(segmentByPairedPSCBS.default)$flavor); + flavor <- match.arg(flavor) + knownFlavors <- eval(formals(segmentByPairedPSCBS.default)$flavor, enclos = baseenv()) if (!is.element(flavor, knownFlavors)) { - throw("Segmentation flavor is not among the supported ones (", paste(sprintf("\"%s\"", knownFlavors), collapse=", "), "): ", flavor); + throw("Segmentation flavor is not among the supported ones (", paste(sprintf("\"%s\"", knownFlavors), collapse=", "), "): ", flavor) } # Argument 'tauA' & 'tauB': if (!is.na(tauA) && !is.na(tauB)) { - tauA <- Arguments$getDouble(tauA); - tauB <- Arguments$getDouble(tauB); + tauA <- Arguments$getDouble(tauA) + tauB <- Arguments$getDouble(tauB) if (tauB < tauA) { - throw("Argument 'tauA' must be smaller than 'tauB': ", tauA, " > ", tauB); + throw("Argument 'tauA' must be smaller than 'tauB': ", tauA, " > ", tauB) } - tauA <- Arguments$getDouble(tauA, range=c(-0.5, +0.5)); - tauB <- Arguments$getDouble(tauB, range=c(+0.5, +1.5)); + tauA <- Arguments$getDouble(tauA, range=c(-0.5, +0.5)) + tauB <- Arguments$getDouble(tauB, range=c(+0.5, +1.5)) } # Argument 'verbose': - verbose <- Arguments$getVerbose(verbose); + verbose <- Arguments$getVerbose(verbose) if (verbose) { - pushState(verbose); - on.exit(popState(verbose)); + pushState(verbose) + on.exit(popState(verbose)) } - verbose && enter(verbose, "Segmenting non-paired tumor signals using Non-paired PSCBS"); + verbose && enter(verbose, "Segmenting non-paired tumor signals using Non-paired PSCBS") # - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - # Setup input data # - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - verbose && cat(verbose, "Number of loci: ", nbrOfLoci); + verbose && cat(verbose, "Number of loci: ", nbrOfLoci) # SNPs are identifies as those loci that have non-missing 'betaT' - isSnp <- !is.na(betaT); - nbrOfSnps <- sum(isSnp); - verbose && cat(verbose, "Number of SNPs: ", nbrOfSnps); + isSnp <- !is.na(betaT) + nbrOfSnps <- sum(isSnp) + verbose && cat(verbose, "Number of SNPs: ", nbrOfSnps) # - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - # Call tumor "genotypes" # - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - verbose && enter(verbose, "Calling \"genotypes\" from tumor allele B fractions"); - verbose && str(verbose, betaT); + verbose && enter(verbose, "Calling \"genotypes\" from tumor allele B fractions") + verbose && str(verbose, betaT) if (is.na(tauA) && is.na(tauB)) { - mBAF <- abs(betaT - 1/2); - fitT <- findPeaksAndValleys(mBAF); - type <- NULL; rm(list="type"); # To please 'R CMD check'. - fitT <- subset(fitT, type == "peak"); - o <- order(fitT$density, decreasing=TRUE); - fitT <- fitT[o,]; - fitT <- fitT[1,]; - z <- mBAF[mBAF >= fitT$x] - fitT$x; - q <- quantile(z, probs=0.95, na.rm=TRUE, names=FALSE); - qU <- fitT$x+q; - verbose && cat(verbose, "Upper quantile: ", qU); - qL <- fitT$x - q; - verbose && cat(verbose, "Symmetric lower quantile: ", qL); - tauA <- 1/2-qL; - tauB <- 1/2+qL; - verbose && cat(verbose, "(tauA, tauB) estimates: (%g,%g)", tauA, tauB); + mBAF <- abs(betaT - 1/2) + fitT <- findPeaksAndValleys(mBAF) + type <- NULL; rm(list="type") # To please 'R CMD check'. + fitT <- subset(fitT, type == "peak") + o <- order(fitT$density, decreasing=TRUE) + fitT <- fitT[o,] + fitT <- fitT[1,] + z <- mBAF[mBAF >= fitT$x] - fitT$x + q <- quantile(z, probs=0.95, na.rm=TRUE, names=FALSE) + qU <- fitT$x+q + verbose && cat(verbose, "Upper quantile: ", qU) + qL <- fitT$x - q + verbose && cat(verbose, "Symmetric lower quantile: ", qL) + tauA <- 1/2-qL + tauB <- 1/2+qL + verbose && cat(verbose, "(tauA, tauB) estimates: (%g,%g)", tauA, tauB) # Sanity check on (tauA, tauB) estimates if (tauB < tauA) { - throw("Failed to estimate (tauA, tauB). The estimate 'tauA' is greater than 'tauB', which it should not: ", tauA, " > ", tauB); + throw("Failed to estimate (tauA, tauB). The estimate 'tauA' is greater than 'tauB', which it should not: ", tauA, " > ", tauB) } - tauA <- Arguments$getDouble(tauA, range=c(-0.5, +0.5)); - tauB <- Arguments$getDouble(tauB, range=c(+0.5, +1.5)); + tauA <- Arguments$getDouble(tauA, range=c(-0.5, +0.5)) + tauB <- Arguments$getDouble(tauB, range=c(+0.5, +1.5)) } - verbose && cat(verbose, "Homozygous treshholds:"); - verbose && print(verbose, c(tauA, tauB)); + verbose && cat(verbose, "Homozygous treshholds:") + verbose && print(verbose, c(tauA, tauB)) - isHomA <- isSnp & (betaT <= tauA); - isHomB <- isSnp & (betaT >= tauB); - isHom <- (isHomA | isHomB); - isHet <- isSnp & !isHom; + isHomA <- isSnp & (betaT <= tauA) + isHomB <- isSnp & (betaT >= tauB) + isHom <- (isHomA | isHomB) + isHet <- isSnp & !isHom # Tumor proxy for germline genotypes - naValue <- NA_real_; - muNx <- rep(naValue, times=length(betaT)); - muNx[isHomA] <- 0; - muNx[isHet] <- 1/2; - muNx[isHomB] <- 1; + naValue <- NA_real_ + muNx <- rep(naValue, times=length(betaT)) + muNx[isHomA] <- 0 + muNx[isHet] <- 1/2 + muNx[isHomB] <- 1 # Not needed anymore - isHomA <- isHomB <- isHom <- isHet <- NULL; + isHomA <- isHomB <- isHom <- isHet <- NULL - verbose && cat(verbose, "Inferred germline genotypes (via tumor):"); - verbose && str(verbose, muNx); - verbose && print(verbose, table(muNx)); - verbose && exit(verbose); + verbose && cat(verbose, "Inferred germline genotypes (via tumor):") + verbose && str(verbose, muNx) + verbose && print(verbose, table(muNx)) + verbose && exit(verbose) - verbose && exit(verbose); + verbose && exit(verbose) # - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - # Segment using Paired PSCBS segmentation # - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - verbose && enter(verbose, "Segment using Paired PSCBS"); - fit <- segmentByPairedPSCBS(CT=CT, betaT=betaT, muN=muNx, tbn=FALSE, flavor=flavor, ..., verbose=verbose); - verbose && exit(verbose); + verbose && enter(verbose, "Segment using Paired PSCBS") + fit <- segmentByPairedPSCBS(CT=CT, betaT=betaT, muN=muNx, tbn=FALSE, flavor=flavor, ..., verbose=verbose) + verbose && exit(verbose) # - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - # Coerce fit object to Non-Paired PSCBS results # - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - verbose && enter(verbose, "Coercing to Non-Paired PSCBS results"); + verbose && enter(verbose, "Coercing to Non-Paired PSCBS results") - data <- fit$data; - class(data) <- gsub("PairedPSCNData", "NonPairedPSCNData", class(data), fixed=TRUE); -# class(data) <- c("NonPairedPSCNData", class(data)); - fit$data <- data; + data <- fit$data + class(data) <- gsub("PairedPSCNData", "NonPairedPSCNData", class(data), fixed=TRUE) +# class(data) <- c("NonPairedPSCNData", class(data)) + fit$data <- data # Not needed anymore - data <- NULL; + data <- NULL - segs <- fit$output; - class(segs) <- gsub("PairedPSCNSegments", "NonPairedPSCNSegments", class(segs), fixed=TRUE); -# class(segs) <- c("NonPairedPSCNSegments", class(segs)); - fit$output <- segs; + segs <- fit$output + class(segs) <- gsub("PairedPSCNSegments", "NonPairedPSCNSegments", class(segs), fixed=TRUE) +# class(segs) <- c("NonPairedPSCNSegments", class(segs)) + fit$output <- segs # Not needed anymore - segs <- NULL; + segs <- NULL - params <- fit$params; - params$tauA <- tauA; - params$tauB <- tauB; - fit$params <- params; + params <- fit$params + params$tauA <- tauA + params$tauB <- tauB + fit$params <- params # Not needed anymore - params <- NULL; + params <- NULL -# class(fit) <- gsub("PairedPSCBS", "NonPairedPSCBS", class(fit), fixed=TRUE); - class(fit) <- c("NonPairedPSCBS", class(fit)); +# class(fit) <- gsub("PairedPSCBS", "NonPairedPSCBS", class(fit), fixed=TRUE) + class(fit) <- c("NonPairedPSCBS", class(fit)) - verbose && exit(verbose); + verbose && exit(verbose) - verbose && print(verbose, head(as.data.frame(fit))); - verbose && print(verbose, tail(as.data.frame(fit))); + verbose && print(verbose, head(as.data.frame(fit))) + verbose && print(verbose, tail(as.data.frame(fit))) # - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - # Return results # - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - fit; + fit }) # segmentByNonPairedPSCBS() setMethodS3("segmentByNonPairedPSCBS", "data.frame", function(CT, ...) { # To please R CMD check - data <- CT; + data <- CT segmentByNonPairedPSCBS(CT=data$CT, betaT=data$betaT, - chromosome=data$chromosome, x=data$x, ...); + chromosome=data$chromosome, x=data$x, ...) }) setMethodS3("segmentByNonPairedPSCBS", "PairedPSCBS", function(...) { - resegment(...); + resegment(...) }) - - - -############################################################################ -# HISTORY: -# 2013-07-19 -# o ROBUSTNESS: Added a sanity check on the estimates of (tauA, tauB) -# when they are estimated from data in segmentByNonPairedPSCBS(). -# 2012-11-05 -# o DOCUMENTATION FIX: example(segmentByNonPairedPSCBS) was for the -# paired case. -# 2012-08-20 -# o BUG FIX: segmentByNonPairedPSCBS() forgot to specify namespace -# aroma.light when trying to call findPeaksAndValleys(). -# 2012-04-20 -# o Created from segmentByPairedPSCBS.R. -############################################################################ diff -Nru r-cran-pscbs-0.63.0/R/segmentByPairedPSCBS.R r-cran-pscbs-0.64.0/R/segmentByPairedPSCBS.R --- r-cran-pscbs-0.63.0/R/segmentByPairedPSCBS.R 2017-06-28 14:32:39.000000000 +0000 +++ r-cran-pscbs-0.64.0/R/segmentByPairedPSCBS.R 2018-08-12 21:30:44.000000000 +0000 @@ -62,8 +62,6 @@ # calling algorithm to be used.} # \item{tbn}{If @TRUE, \code{betaT} is normalized before segmentation # using the TumorBoost method [2], otherwise not.} -# \item{preserveScale}{Passed to @see "aroma.light::normalizeTumorBoost", -# which is only called if \code{tbn} is @TRUE.} # \item{joinSegments}{If @TRUE, there are no gaps between neighboring # segments. # If @FALSE, the boundaries of a segment are defined by the support @@ -82,6 +80,8 @@ # set before calling the segmentation method. The random seed is # set to its original state when exiting. If @NULL, it is not set.} # \item{verbose}{See @see "R.utils::Verbose".} +# \item{preserveScale}{\emph{Deprecated and ignored +# will give a warning if specified.}} # } # # \value{ @@ -152,37 +152,37 @@ # # @keyword IO #*/########################################################################### -setMethodS3("segmentByPairedPSCBS", "default", function(CT, thetaT=NULL, thetaN=NULL, betaT=NULL, betaN=NULL, muN=NULL, rho=NULL, chromosome=0, x=NULL, alphaTCN=0.009, alphaDH=0.001, undoTCN=0, undoDH=0, ..., avgTCN=c("mean", "median"), avgDH=c("mean", "median"), flavor=c("tcn&dh", "tcn,dh", "sqrt(tcn),dh", "sqrt(tcn)&dh", "tcn"), tbn=is.null(rho), preserveScale=getOption("PSCBS/preserveScale", FALSE), joinSegments=TRUE, knownSegments=NULL, dropMissingCT=TRUE, seed=NULL, verbose=FALSE) { +setMethodS3("segmentByPairedPSCBS", "default", function(CT, thetaT=NULL, thetaN=NULL, betaT=NULL, betaN=NULL, muN=NULL, rho=NULL, chromosome=0, x=NULL, alphaTCN=0.009, alphaDH=0.001, undoTCN=0, undoDH=0, ..., avgTCN=c("mean", "median"), avgDH=c("mean", "median"), flavor=c("tcn&dh", "tcn,dh", "sqrt(tcn),dh", "sqrt(tcn)&dh", "tcn"), tbn=is.null(rho), joinSegments=TRUE, knownSegments=NULL, dropMissingCT=TRUE, seed=NULL, verbose=FALSE, preserveScale=FALSE) { # WORKAROUND: If Hmisc is loaded after R.utils, it provides a buggy # capitalize() that overrides the one we want to use. Until PSCBS # gets a namespace, we do the following workaround. /HB 2011-07-14 - capitalize <- R.utils::capitalize; + capitalize <- R.utils::capitalize # To please R CMD check - index <- NULL; rm(list="index"); + index <- NULL; rm(list="index") # Settings for sanity checks - tol <- getOption("PSCBS/sanityChecks/tolerance", 0.0005); + tol <- getOption("PSCBS/sanityChecks/tolerance", 0.0005) # - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - # Validate arguments # - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - # Argument 'thetaT' & 'thetaN': if (!is.null(thetaT) && !is.null(thetaN)) { - thetaT <- Arguments$getDoubles(thetaT, disallow=disallow); - nbrOfLoci <- length(thetaT); - length2 <- rep(nbrOfLoci, times=2L); - thetaN <- Arguments$getDoubles(thetaN, length=length2, disallow=disallow); - CT <- 2 * thetaT / thetaN; + thetaT <- Arguments$getDoubles(thetaT, disallow=disallow) + nbrOfLoci <- length(thetaT) + length2 <- rep(nbrOfLoci, times=2L) + thetaN <- Arguments$getDoubles(thetaN, length=length2, disallow=disallow) + CT <- 2 * thetaT / thetaN } else if (!is.null(thetaT) || !is.null(thetaN)) { - throw("Either argument 'CT' needs to be specified or *both* of arguments 'thetaT' and 'thetaN'"); + throw("Either argument 'CT' needs to be specified or *both* of arguments 'thetaT' and 'thetaN'") } # Argument 'CT': - disallow <- c("Inf"); - CT <- Arguments$getDoubles(CT, disallow=disallow); - nbrOfLoci <- length(CT); - length2 <- rep(nbrOfLoci, times=2L); + disallow <- c("Inf") + CT <- Arguments$getDoubles(CT, disallow=disallow) + nbrOfLoci <- length(CT) + length2 <- rep(nbrOfLoci, times=2L) # Argument 'betaT': @@ -192,14 +192,14 @@ # Argument 'betaN': if (!is.null(betaN)) { - betaN <- Arguments$getDoubles(betaN, length=length2, disallow="Inf"); + betaN <- Arguments$getDoubles(betaN, length=length2, disallow="Inf") } # Argument 'muN': if (!is.null(muN)) { - muN <- Arguments$getDoubles(muN, length=length2, range=c(0,1), disallow="Inf"); + muN <- Arguments$getDoubles(muN, length=length2, range=c(0,1), disallow="Inf") if (all(is.na(muN)) == nbrOfLoci) { - throw(sprintf("All genotypes ('muN') are NAs: %d (100%%) out of %d", nbrOfLoci, nbrOfLoci)); + throw(sprintf("All genotypes ('muN') are NAs: %d (100%%) out of %d", nbrOfLoci, nbrOfLoci)) } } @@ -215,7 +215,7 @@ } # Argument 'tbn': - tbn <- Arguments$getLogical(tbn); + tbn <- Arguments$getLogical(tbn) if (!is.null(tbn)) { if (tbn) { if (is.null(betaT)) { @@ -227,118 +227,114 @@ } } - # Argument 'preserveScale': - if (tbn && missing(preserveScale)) { - if (!is.element("PSCBS/preserveScale", names(options()))) { - warning("Argument 'preserveScale' for segmentByPairedPSCBS() now defaults to FALSE. Prior to PSCBS v0.50.0 (October 2015) the default was TRUE. To avoid this warning, explicitly specify this argument when calling segmentByPairedPSCBS() or make sure to set option 'PSCBS/preserveScale' to either TRUE or FALSE. This warning will be removed in a future version."); - } - } - preserveScale <- Arguments$getLogical(preserveScale); - # Argument 'chromosome': if (is.null(chromosome)) { - chromosome <- 0L; + chromosome <- 0L } else { - disallow <- c("Inf"); - chromosome <- Arguments$getIntegers(chromosome, range=c(0,Inf), disallow=disallow); + disallow <- c("Inf") + chromosome <- Arguments$getIntegers(chromosome, range=c(0,Inf), disallow=disallow) if (length(chromosome) > 1) { - chromosome <- Arguments$getIntegers(chromosome, length=length2, disallow=disallow); + chromosome <- Arguments$getIntegers(chromosome, length=length2, disallow=disallow) } } # Argument 'x': if (is.null(x)) { - x <- seq_len(nbrOfLoci); + x <- seq_len(nbrOfLoci) } else { - disallow <- c("Inf"); - x <- Arguments$getDoubles(x, length=length2, disallow=disallow); + disallow <- c("Inf") + x <- Arguments$getDoubles(x, length=length2, disallow=disallow) } # Argument 'alphaTCN': - alphaTCN <- Arguments$getDouble(alphaTCN, range=c(0,1)); + alphaTCN <- Arguments$getDouble(alphaTCN, range=c(0,1)) # Argument 'alphaDH': - alphaDH <- Arguments$getDouble(alphaDH, range=c(0,1)); + alphaDH <- Arguments$getDouble(alphaDH, range=c(0,1)) # Argument 'undoTCN': - undoTCN <- Arguments$getDouble(undoTCN, range=c(0,Inf)); + undoTCN <- Arguments$getDouble(undoTCN, range=c(0,Inf)) # Argument 'undoDH': - undoDH <- Arguments$getDouble(undoDH, range=c(0,Inf)); + undoDH <- Arguments$getDouble(undoDH, range=c(0,Inf)) # Argument 'avgTCN' & 'avgDH': - avgTCN <- match.arg(avgTCN); - avgDH <- match.arg(avgDH); + avgTCN <- match.arg(avgTCN) + avgDH <- match.arg(avgDH) # Argument 'flavor': - flavor <- match.arg(flavor); - knownFlavors <- eval(formals(segmentByPairedPSCBS.default)$flavor); + flavor <- match.arg(flavor) + knownFlavors <- eval(formals(segmentByPairedPSCBS.default)$flavor, enclos = baseenv()) if (!is.element(flavor, knownFlavors)) { - throw("Segmentation flavor is not among the supported ones (", paste(sprintf("\"%s\"", knownFlavors), collapse=", "), "): ", flavor); + throw("Segmentation flavor is not among the supported ones (", paste(sprintf("\"%s\"", knownFlavors), collapse=", "), "): ", flavor) } # Argument 'joinSegments': - joinSegments <- Arguments$getLogical(joinSegments); + joinSegments <- Arguments$getLogical(joinSegments) # Argument 'knownSegments': if (is.null(knownSegments)) { - knownSegments <- data.frame(chromosome=integer(0), start=integer(0), end=integer(0)); + knownSegments <- data.frame(chromosome=integer(0), start=integer(0), end=integer(0)) } else { if (!joinSegments) { -## warning("Argument 'knownSegments' should only be specified if argument 'joinSegments' is TRUE."); +## warning("Argument 'knownSegments' should only be specified if argument 'joinSegments' is TRUE.") } } if (!is.data.frame(knownSegments)) { - throw("Argument 'knownSegments' is not a data.frame: ", class(knownSegments)[1]); + throw("Argument 'knownSegments' is not a data.frame: ", class(knownSegments)[1]) } if (!all(is.element(c("chromosome", "start", "end"), colnames(knownSegments)))) { - throw("Argument 'knownSegments' does not have the required column names: ", hpaste(colnames(knownSegments))); + throw("Argument 'knownSegments' does not have the required column names: ", hpaste(colnames(knownSegments))) } # Argument 'dropMissingCT': - dropMissingCT <- Arguments$getLogical(dropMissingCT); + dropMissingCT <- Arguments$getLogical(dropMissingCT) if (!dropMissingCT) { if (is.element(flavor, c("tcn&dh", "sqrt(tcn)&dh"))) { - throw("Missing values in 'CT' are (currently) not supported by the chosen 'flavor': ", flavor); + throw("Missing values in 'CT' are (currently) not supported by the chosen 'flavor': ", flavor) } } # Argument 'seed': if (!is.null(seed)) { - seed <- Arguments$getIntegers(seed); + seed <- Arguments$getIntegers(seed) } # Argument 'verbose': - verbose <- Arguments$getVerbose(verbose); + verbose <- Arguments$getVerbose(verbose) if (verbose) { - pushState(verbose); - on.exit(popState(verbose)); + pushState(verbose) + on.exit(popState(verbose)) } - - verbose && enter(verbose, "Segmenting paired tumor-normal signals using Paired PSCBS"); + # Argument 'preserveScale' is deprecated + if (!missing(preserveScale)) { + .Deprecated(msg = "Argument 'preserveScale' for segmentByPairedPSCBS() is deprecated and ignored; as of PSCBS 0.64.0 (Mar 2018) it is effectively fixed to FALSE, which has been the default since PSCBS 0.50.0 (Oct 2015). To avoid this warning, do not specify 'preserveScale' when calling segmentByPairedPSCBS().") + } + + verbose && enter(verbose, "Segmenting paired tumor-normal signals using Paired PSCBS") # - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - # Call genotypes? # - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - # Are genotype calls muN missing and can they be called? if (is.null(muN) && !is.null(betaN)) { - verbose && enter(verbose, "Calling genotypes from normal allele B fractions"); - verbose && str(verbose, betaN); - muN <- callNaiveGenotypes(betaN, censorAt=c(0,1)); - verbose && cat(verbose, "Called genotypes:"); - verbose && str(verbose, muN); - verbose && print(verbose, table(muN)); + verbose && enter(verbose, "Calling genotypes from normal allele B fractions") + verbose && str(verbose, betaN) + muN <- callNaiveGenotypes(betaN, censorAt=c(0,1)) + verbose && cat(verbose, "Called genotypes:") + verbose && str(verbose, muN) + verbose && print(verbose, table(muN)) # Assert proper calls - muN <- Arguments$getDoubles(muN, length=length2, range=c(0,1), disallow="Inf"); + muN <- Arguments$getDoubles(muN, length=length2, range=c(0,1), disallow="Inf") # Sanity check if (all(is.na(muN))) { - throw(sprintf("All genotypes ('muN') called from the normal allele B fractions ('betaN') are NAs: %d (100%%) out of %d", nbrOfLoci, nbrOfLoci)); + throw(sprintf("All genotypes ('muN') called from the normal allele B fractions ('betaN') are NAs: %d (100%%) out of %d", nbrOfLoci, nbrOfLoci)) } - verbose && exit(verbose); + verbose && exit(verbose) } @@ -346,28 +342,28 @@ # Normalize betaT using betaN (TumorBoost normalization) # - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - if (tbn) { - verbose && enter(verbose, "Normalizing betaT using betaN (TumorBoost)"); - betaTN <- normalizeTumorBoost(betaT=betaT, betaN=betaN, muN=muN, preserveScale=preserveScale); - verbose && cat(verbose, "Normalized BAFs:"); - verbose && str(verbose, betaTN); + verbose && enter(verbose, "Normalizing betaT using betaN (TumorBoost)") + betaTN <- normalizeTumorBoost(betaT=betaT, betaN=betaN, muN=muN, preserveScale=FALSE) + verbose && cat(verbose, "Normalized BAFs:") + verbose && str(verbose, betaTN) # Assert that no missing values where introduced - keep <- (is.finite(betaT) & is.finite(betaN) & is.finite(muN)); + keep <- (is.finite(betaT) & is.finite(betaN) & is.finite(muN)) if (anyNA(betaTN[keep])) { - throw("Internal error: normalizeTumorBoost() introduced missing values."); + throw("Internal error: normalizeTumorBoost() introduced missing values.") } # Not needed anymore - keep <- NULL; - verbose && exit(verbose); + keep <- NULL + verbose && exit(verbose) } else { - betaTN <- betaT; + betaTN <- betaT } # - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - # Setup data # - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - verbose && enter(verbose, "Setup up data"); + verbose && enter(verbose, "Setup up data") data <- data.frame(chromosome=chromosome, x=x, CT=CT) if (!is.null(thetaT)) { data$thetaT <- thetaT @@ -383,8 +379,8 @@ chromosome <- x <- CT <- thetaT <- thetaN <- betaT <- betaTN <- betaN <- muN <- rho <- NULL # Sanity check - stopifnot(nrow(data) == nbrOfLoci); - verbose && exit(verbose); + .stop_if_not(nrow(data) == nbrOfLoci) + verbose && exit(verbose) # - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - @@ -392,18 +388,18 @@ # is what DNAcopy::CNA() will do otherwise. At the end, we will # undo this such that the returned 'data' object is complete. # - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - ok <- (!is.na(data$chromosome) & !is.na(data$x)); + ok <- (!is.na(data$chromosome) & !is.na(data$x)) if (any(!ok)) { - verbose && enter(verbose, "Dropping loci with unknown locations"); - verbose && cat(verbose, "Number of loci dropped: ", sum(!ok)); - data <- data[ok,,drop=FALSE]; - nbrOfLoci <- nrow(data); - verbose && exit(verbose); + verbose && enter(verbose, "Dropping loci with unknown locations") + verbose && cat(verbose, "Number of loci dropped: ", sum(!ok)) + data <- data[ok,,drop=FALSE] + nbrOfLoci <- nrow(data) + verbose && exit(verbose) } - ok <- NULL; # Not needed anymore + ok <- NULL # Not needed anymore # Sanity check - stopifnot(nrow(data) == nbrOfLoci); + .stop_if_not(nrow(data) == nbrOfLoci) # - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - @@ -412,16 +408,16 @@ if (dropMissingCT) { ok <- (!is.na(data$CT)) if (any(!ok)) { - verbose && enter(verbose, "Dropping loci for which TCNs are missing"); - verbose && cat(verbose, "Number of loci dropped: ", sum(!ok)); - data <- data[ok,,drop=FALSE]; - nbrOfLoci <- nrow(data); - verbose && exit(verbose); + verbose && enter(verbose, "Dropping loci for which TCNs are missing") + verbose && cat(verbose, "Number of loci dropped: ", sum(!ok)) + data <- data[ok,,drop=FALSE] + nbrOfLoci <- nrow(data) + verbose && exit(verbose) } - ok <- NULL; # Not needed anymore + ok <- NULL # Not needed anymore # Sanity check - stopifnot(nrow(data) == nbrOfLoci); + .stop_if_not(nrow(data) == nbrOfLoci) } @@ -431,37 +427,37 @@ # the sort such that the returned 'data' object is always in # the same order and number of loci as the input data. # - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - verbose && enter(verbose, "Ordering data along genome"); - o <- order(data$chromosome, data$x, decreasing=FALSE, na.last=TRUE); + verbose && enter(verbose, "Ordering data along genome") + o <- order(data$chromosome, data$x, decreasing=FALSE, na.last=TRUE) # Any change? if (any(o != seq_along(o))) { - data <- data[o,,drop=FALSE]; + data <- data[o,,drop=FALSE] } - o <- NULL; # Not needed anymore - verbose && str(verbose, data); - verbose && exit(verbose); + o <- NULL # Not needed anymore + verbose && str(verbose, data) + verbose && exit(verbose) # Attach 'index' (guaranteed to be ordered) - data$index <- seq_len(nrow(data)); + data$index <- seq_len(nrow(data)) # Sanity check - stopifnot(nrow(data) == nbrOfLoci); + .stop_if_not(nrow(data) == nbrOfLoci) # - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - # Assert no missing values in (chromosome, x, CT) # - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - # Sanity check - ok <- (!is.na(data$chromosome) & !is.na(data$x)); + ok <- (!is.na(data$chromosome) & !is.na(data$x)) if (!all(ok)) { - throw("INTERNAL ERROR: Detected (chromosome, x) with missing values also after filtering."); + throw("INTERNAL ERROR: Detected (chromosome, x) with missing values also after filtering.") } # Sanity check if (dropMissingCT) { - ok <- (!is.na(data$CT)); + ok <- (!is.na(data$CT)) if (!all(ok)) { - throw("INTERNAL ERROR: Detected TCN with missing values also after filtering."); + throw("INTERNAL ERROR: Detected TCN with missing values also after filtering.") } } @@ -470,11 +466,11 @@ # Multiple chromosomes? # - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - # Identify all chromosomes, excluding missing values - chromosomes <- sort(unique(data$chromosome), na.last=NA); - nbrOfChromosomes <- length(chromosomes); + chromosomes <- sort(unique(data$chromosome), na.last=NA) + nbrOfChromosomes <- length(chromosomes) if (nbrOfChromosomes > 1) { - verbose && enter(verbose, "Segmenting multiple chromosomes"); - verbose && cat(verbose, "Number of chromosomes: ", nbrOfChromosomes); + verbose && enter(verbose, "Segmenting multiple chromosomes") + verbose && cat(verbose, "Number of chromosomes: ", nbrOfChromosomes) # Generate random seeds? seeds <- NULL @@ -488,23 +484,23 @@ fitList <- listenv() for (kk in seq_len(nbrOfChromosomes)) { - chromosomeKK <- chromosomes[kk]; - chrTag <- sprintf("Chr%02d", chromosomeKK); - verbose && enter(verbose, sprintf("Chromosome #%d ('%s') of %d", kk, chrTag, nbrOfChromosomes)); + chromosomeKK <- chromosomes[kk] + chrTag <- sprintf("Chr%02d", chromosomeKK) + verbose && enter(verbose, sprintf("Chromosome #%d ('%s') of %d", kk, chrTag, nbrOfChromosomes)) seedKK <- seeds[[kk]] # Extract subset of data and parameters for this chromosome - dataKK <- subset(data, chromosome == chromosomeKK); - verbose && str(verbose, dataKK); - fields <- attachLocally(dataKK, fields=c("CT", "thetaT", "thetaN", "betaT", "betaTN", "betaN", "muN", "rho", "chromosome", "x")); - dataKK <- NULL; # Not needed anymore + dataKK <- subset(data, chromosome == chromosomeKK) + verbose && str(verbose, dataKK) + fields <- attachLocally(dataKK, fields=c("CT", "thetaT", "thetaN", "betaT", "betaTN", "betaN", "muN", "rho", "chromosome", "x")) + dataKK <- NULL # Not needed anymore - knownSegmentsKK <- NULL; + knownSegmentsKK <- NULL if (!is.null(knownSegments)) { - knownSegmentsKK <- subset(knownSegments, chromosome == chromosomeKK); - verbose && cat(verbose, "Known segments:"); - verbose && print(verbose, knownSegmentsKK); + knownSegmentsKK <- subset(knownSegments, chromosome == chromosomeKK) + verbose && cat(verbose, "Known segments:") + verbose && print(verbose, knownSegmentsKK) } fitList[[chrTag]] %<-% { @@ -523,9 +519,9 @@ # Sanity checks if (nrow(knownSegmentsKK) == 0) { - stopifnot(nrow(fit$data) == length(CT)) - stopifnot(all.equal(fit$data$CT, CT)) - stopifnot(all.equal(fit$data$muN, muN)) + .stop_if_not(nrow(fit$data) == length(CT)) + .stop_if_not(all.equal(fit$data$CT, CT)) + .stop_if_not(all.equal(fit$data$muN, muN)) } # Update betaT (which is otherwise equals betaTN) @@ -538,61 +534,62 @@ } ## fitList[[chrTag]] <- ... rm(list=fields) # Not needed anymore - verbose && exit(verbose); + verbose && exit(verbose) } # for (kk ...) - verbose && enter(verbose, "Merging (independently) segmented chromosome"); + verbose && enter(verbose, "Merging (independently) segmented chromosome") fitList <- as.list(fitList) - fit <- Reduce(append, fitList); - fitList <- NULL; # Not needed anymore - verbose && str(verbose, fit); - verbose && exit(verbose); + ## former Reduce() w/ append(..., addSplit = TRUE) + fit <- do.call(c, args = c(fitList, addSplit = TRUE)) + fitList <- NULL # Not needed anymore + verbose && str(verbose, fit) + verbose && exit(verbose) # Update parameters that otherwise may be incorrect - fit$params$tbn <- tbn; - fit$params$seed <- seed; + fit$params$tbn <- tbn + fit$params$seed <- seed - segs <- as.data.frame(fit); + segs <- as.data.frame(fit) if (nrow(segs) < 6) { - verbose && print(verbose, segs); + verbose && print(verbose, segs) } else { - verbose && print(verbose, head(segs)); - verbose && print(verbose, tail(segs)); + verbose && print(verbose, head(segs)) + verbose && print(verbose, tail(segs)) } - verbose && exit(verbose); + verbose && exit(verbose) # - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - # Return results # - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - return(fit); + return(fit) } # if (nbrOfChromosomes > 1) # - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - # Subset 'knownSegments' # - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - verbose && enter(verbose, "Keeping only current chromosome for 'knownSegments'"); + verbose && enter(verbose, "Keeping only current chromosome for 'knownSegments'") - currChromosome <- data$chromosome[1]; - verbose && cat(verbose, "Chromosome: ", currChromosome); + currChromosome <- data$chromosome[1] + verbose && cat(verbose, "Chromosome: ", currChromosome) - knownSegments <- subset(knownSegments, chromosome == currChromosome); - nbrOfSegments <- nrow(knownSegments); + knownSegments <- subset(knownSegments, chromosome == currChromosome) + nbrOfSegments <- nrow(knownSegments) - verbose && cat(verbose, "Known segments for this chromosome:"); - verbose && print(verbose, knownSegments); + verbose && cat(verbose, "Known segments for this chromosome:") + verbose && print(verbose, knownSegments) - verbose && exit(verbose); + verbose && exit(verbose) # - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - # Sanity checks # - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - # Here 'knownSegments' should specify at most a single chromosome - uChromosomes <- sort(unique(knownSegments$chromosome)); + uChromosomes <- sort(unique(knownSegments$chromosome)) if (length(uChromosomes) > 1) { - throw("INTERNAL ERROR: Argument 'knownSegments' specifies more than one chromosome: ", hpaste(uChromosomes)); + throw("INTERNAL ERROR: Argument 'knownSegments' specifies more than one chromosome: ", hpaste(uChromosomes)) } @@ -600,16 +597,16 @@ # Assert no missing values in (chromosome, x, CT) # - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - # Sanity check - ok <- (!is.na(data$chromosome) & !is.na(data$x)); + ok <- (!is.na(data$chromosome) & !is.na(data$x)) if (!all(ok)) { - throw("INTERNAL ERROR: Detected (chromosome, x) with missing values also after filtering."); + throw("INTERNAL ERROR: Detected (chromosome, x) with missing values also after filtering.") } # Sanity check if (dropMissingCT) { - ok <- (!is.na(data$CT)); + ok <- (!is.na(data$CT)) if (!all(ok)) { - throw("INTERNAL ERROR: Detected TCN with missing values also after filtering."); + throw("INTERNAL ERROR: Detected TCN with missing values also after filtering.") } } @@ -617,9 +614,9 @@ # - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - # Setup input data # - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - verbose && cat(verbose, "alphaTCN: ", alphaTCN); - verbose && cat(verbose, "alphaDH: ", alphaDH); - verbose && cat(verbose, "Number of loci: ", nbrOfLoci); + verbose && cat(verbose, "alphaTCN: ", alphaTCN) + verbose && cat(verbose, "alphaDH: ", alphaDH) + verbose && cat(verbose, "Number of loci: ", nbrOfLoci) # - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - @@ -646,7 +643,7 @@ verbose && exit(verbose) } ## Sanity check - stopifnot(!is.null(data$rho)) + .stop_if_not(!is.null(data$rho)) @@ -667,24 +664,24 @@ # - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - # 1a. Identification of change points in total copy numbers # - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - verbose && enter(verbose, "Identification of change points by total copy numbers"); + verbose && enter(verbose, "Identification of change points by total copy numbers") - fields <- attachLocally(data, fields=c("CT", "thetaT", "thetaN", "chromosome", "x", "index")); + fields <- attachLocally(data, fields=c("CT", "thetaT", "thetaN", "chromosome", "x", "index")) # - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - # Assert no missing values in (chromosome, x, CT) # - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - # Sanity check - ok <- (!is.na(data$chromosome) & !is.na(data$x)); + ok <- (!is.na(data$chromosome) & !is.na(data$x)) if (!all(ok)) { - throw("INTERNAL ERROR: Detected (chromosome, x) with missing values also after filtering."); + throw("INTERNAL ERROR: Detected (chromosome, x) with missing values also after filtering.") } # Sanity check if (dropMissingCT) { - ok <- (!is.na(data$CT)); + ok <- (!is.na(data$CT)) if (!all(ok)) { - throw("INTERNAL ERROR: Detected CT with missing values also after filtering."); + throw("INTERNAL ERROR: Detected CT with missing values also after filtering.") } } @@ -697,51 +694,51 @@ knownSegments=knownSegments, alpha=alphaTCN, undo=undoTCN, ..., seed=seeds[["TCN"]], - verbose=verbose); - verbose && str(verbose, fit); + verbose=verbose) + verbose && str(verbose, fit) - rm(list=fields); # Not needed anymore + rm(list=fields) # Not needed anymore # Sanity check if (nrow(knownSegments) == 0) { - stopifnot(nrow(fit$data) == nrow(data)); - stopifnot(all(fit$data$chromosome == data$chromosome)); - stopifnot(all(fit$data$x == data$x)); - stopifnot(all(fit$data$index == data$index)); - stopifnot(all.equal(fit$data$y, data$CT)); + .stop_if_not(nrow(fit$data) == nrow(data)) + .stop_if_not(all(fit$data$chromosome == data$chromosome)) + .stop_if_not(all(fit$data$x == data$x)) + .stop_if_not(all(fit$data$index == data$index)) + .stop_if_not(all.equal(fit$data$y, data$CT)) } - tcnSegments <- fit$output; - tcnSegRows <- fit$segRows; - fit <- NULL; # Not needed anymore + tcnSegments <- fit$output + tcnSegRows <- fit$segRows + fit <- NULL # Not needed anymore # Sanity checks - stopifnot(all(tcnSegRows[,1] <= tcnSegRows[,2], na.rm=TRUE)); - stopifnot(all(tcnSegRows[-nrow(tcnSegRows),2] < tcnSegRows[-1,1], na.rm=TRUE)); + .stop_if_not(all(tcnSegRows[,1] <= tcnSegRows[,2], na.rm=TRUE)) + .stop_if_not(all(tcnSegRows[-nrow(tcnSegRows),2] < tcnSegRows[-1,1], na.rm=TRUE)) - verbose && exit(verbose); + verbose && exit(verbose) # - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - # 1b. Restructure TCN segmentation results # - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - verbose && enter(verbose, "Restructure TCN segmentation results"); + verbose && enter(verbose, "Restructure TCN segmentation results") # Drop dummy columns - keep <- setdiff(colnames(tcnSegments), c("sampleName")); - tcnSegments <- tcnSegments[,keep,drop=FALSE]; + keep <- setdiff(colnames(tcnSegments), c("sampleName")) + tcnSegments <- tcnSegments[,keep,drop=FALSE] # Tag fields by TCN - names <- names(tcnSegments); + names <- names(tcnSegments) # Adding 'tcn' prefix to column names - names <- sprintf("tcn%s", capitalize(names)); - names <- gsub("tcnChromosome", "chromosome", names, fixed=TRUE); - names(tcnSegments) <- names; - verbose && print(verbose, tcnSegments); + names <- sprintf("tcn%s", capitalize(names)) + names <- gsub("tcnChromosome", "chromosome", names, fixed=TRUE) + names(tcnSegments) <- names + verbose && print(verbose, tcnSegments) - nbrOfSegs <- nrow(tcnSegments); - verbose && cat(verbose, "Number of TCN segments: ", nbrOfSegs); + nbrOfSegs <- nrow(tcnSegments) + verbose && cat(verbose, "Number of TCN segments: ", nbrOfSegs) - verbose && exit(verbose); + verbose && exit(verbose) # - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - @@ -751,51 +748,51 @@ # using CBS. By definition, only heterozygous SNPs are used. if (flavor == "tcn") { - verbose && enter(verbose, "TCN-only segmentation"); + verbose && enter(verbose, "TCN-only segmentation") - tcnSegsExpanded <- tcnSegRows; - dhSegRows <- tcnSegRows; + tcnSegsExpanded <- tcnSegRows + dhSegRows <- tcnSegRows # Segments - segs <- tcnSegments; - segs[,"tcnId"] <- seq_len(nbrOfSegs); - segs[,"dhId"] <- rep(1L, times=nbrOfSegs); - segs[,c("tcnNbrOfSNPs", "tcnNbrOfHets", "dhNbrOfLoci")] <- 0L; - segs[,"dhStart"] <- segs[,"tcnStart"]; - segs[,"dhEnd"] <- segs[,"tcnEnd"]; + segs <- tcnSegments + segs[,"tcnId"] <- seq_len(nbrOfSegs) + segs[,"dhId"] <- rep(1L, times=nbrOfSegs) + segs[,c("tcnNbrOfSNPs", "tcnNbrOfHets", "dhNbrOfLoci")] <- 0L + segs[,"dhStart"] <- segs[,"tcnStart"] + segs[,"dhEnd"] <- segs[,"tcnEnd"] # For each TCN segment... for (kk in seq_len(nbrOfSegs)) { - tcnId <- kk; + tcnId <- kk - xStart <- tcnSegments[kk,"tcnStart"]; - xEnd <- tcnSegments[kk,"tcnEnd"]; - regionTag <- sprintf("[%10g,%10g]", xStart, xEnd); - verbose && enter(verbose, sprintf("Total CN segment #%d (%s) of %d", kk, regionTag, nbrOfSegs)); + xStart <- tcnSegments[kk,"tcnStart"] + xEnd <- tcnSegments[kk,"tcnEnd"] + regionTag <- sprintf("[%10g,%10g]", xStart, xEnd) + verbose && enter(verbose, sprintf("Total CN segment #%d (%s) of %d", kk, regionTag, nbrOfSegs)) # Empty segment? - rowStart <- tcnSegRows[kk,1]; - rowEnd <- tcnSegRows[kk,2]; + rowStart <- tcnSegRows[kk,1] + rowEnd <- tcnSegRows[kk,2] # Empty segment or a segment separator? - isEmptySegment <- (is.na(rowStart) && is.na(rowEnd)); + isEmptySegment <- (is.na(rowStart) && is.na(rowEnd)) # Nothing to do? if (isEmptySegment) { - verbose && exit(verbose); - next; + verbose && exit(verbose) + next } - nbrOfTCNLociKK <- tcnSegments[kk,"tcnNbrOfLoci"]; - verbose && cat(verbose, "Number of TCN loci in segment: ", nbrOfTCNLociKK); - rows <- seq(from=rowStart, length.out=nbrOfTCNLociKK); - dataKK <- data[rows,,drop=FALSE]; - nbrOfLociKK <- nrow(dataKK); + nbrOfTCNLociKK <- tcnSegments[kk,"tcnNbrOfLoci"] + verbose && cat(verbose, "Number of TCN loci in segment: ", nbrOfTCNLociKK) + rows <- seq(from=rowStart, length.out=nbrOfTCNLociKK) + dataKK <- data[rows,,drop=FALSE] + nbrOfLociKK <- nrow(dataKK) - verbose && cat(verbose, "Locus data for TCN segment:"); - verbose && str(verbose, dataKK); + verbose && cat(verbose, "Locus data for TCN segment:") + verbose && str(verbose, dataKK) - verbose && cat(verbose, "Number of loci: ", nbrOfLociKK); + verbose && cat(verbose, "Number of loci: ", nbrOfLociKK) hasDH <- !is.null(dataKK$rho) if (hasDH) { isSnpKK <- !is.na(dataKK$rho) @@ -807,121 +804,121 @@ nbrOfSnpsKK <- sum(isSnpKK) nbrOfHetsKK <- sum(isHetsKK) verbose && printf(verbose, "Number of SNPs: %d (%.2f%%)\n", - nbrOfSnpsKK, 100*nbrOfSnpsKK/nbrOfLociKK); + nbrOfSnpsKK, 100*nbrOfSnpsKK/nbrOfLociKK) verbose && printf(verbose, "Number of heterozygous SNPs: %d (%.2f%%)\n", - nbrOfHetsKK, 100*nbrOfHetsKK/nbrOfSnpsKK); + nbrOfHetsKK, 100*nbrOfHetsKK/nbrOfSnpsKK) - segs[kk,"tcnNbrOfSNPs"] <- nbrOfSnpsKK; - segs[kk,"tcnNbrOfHets"] <- nbrOfHetsKK; - segs[kk,"dhNbrOfLoci"] <- nbrOfHetsKK; + segs[kk,"tcnNbrOfSNPs"] <- nbrOfSnpsKK + segs[kk,"tcnNbrOfHets"] <- nbrOfHetsKK + segs[kk,"dhNbrOfLoci"] <- nbrOfHetsKK # Adjust 'dhRows[kk,]' - rows <- rows[isHetsKK]; - rows <- range(rows, na.rm=TRUE); - dhSegRows[kk,] <- rows; + rows <- rows[isHetsKK] + rows <- range(rows, na.rm=TRUE) + dhSegRows[kk,] <- rows # Sanity check if (nbrOfHetsKK > 0) { - stopifnot(all(dhSegRows[kk,1] <= dhSegRows[kk,2], na.rm=TRUE)); + .stop_if_not(all(dhSegRows[kk,1] <= dhSegRows[kk,2], na.rm=TRUE)) } # Calculate dhMean - rhoKK <- dataKK[["rho"]][isHetsKK]; - segs[kk,"dhMean"] <- mean(rhoKK, na.rm=TRUE); + rhoKK <- dataKK[["rho"]][isHetsKK] + segs[kk,"dhMean"] <- mean(rhoKK, na.rm=TRUE) - verbose && exit(verbose); + verbose && exit(verbose) } # for (kk ...) # Reorder segmentation columns - keys <- c("tcnId", "dhId", colnames(tcnSegments)); - keys <- c(keys, setdiff(colnames(segs), keys)); - segs <- segs[,keys]; + keys <- c("tcnId", "dhId", colnames(tcnSegments)) + keys <- c(keys, setdiff(colnames(segs), keys)) + segs <- segs[,keys] - verbose && exit(verbose); + verbose && exit(verbose) } else { - dhSegRows <- NULL; - tcnSegsExpanded <- NULL; + dhSegRows <- NULL + tcnSegsExpanded <- NULL # For each TCN segment... - segs <- vector("list", length=nbrOfSegs); + segs <- vector("list", length=nbrOfSegs) for (kk in seq_len(nbrOfSegs)) { - tcnId <- kk; + tcnId <- kk - xStart <- tcnSegments[kk,"tcnStart"]; - xEnd <- tcnSegments[kk,"tcnEnd"]; - regionTag <- sprintf("[%10g,%10g]", xStart, xEnd); - verbose && enter(verbose, sprintf("Total CN segment #%d (%s) of %d", kk, regionTag, nbrOfSegs)); + xStart <- tcnSegments[kk,"tcnStart"] + xEnd <- tcnSegments[kk,"tcnEnd"] + regionTag <- sprintf("[%10g,%10g]", xStart, xEnd) + verbose && enter(verbose, sprintf("Total CN segment #%d (%s) of %d", kk, regionTag, nbrOfSegs)) # Empty segment? - rowStart <- tcnSegRows[kk,1]; - rowEnd <- tcnSegRows[kk,2]; + rowStart <- tcnSegRows[kk,1] + rowEnd <- tcnSegRows[kk,2] # Empty segment or a segment separator? - isEmptySegment <- (is.na(rowStart) && is.na(rowEnd)); - isSplitter <- (isEmptySegment && is.na(xStart) && is.na(xEnd)); - isEmptySegment <- (isEmptySegment & !isSplitter); + isEmptySegment <- (is.na(rowStart) && is.na(rowEnd)) + isSplitter <- (isEmptySegment && is.na(xStart) && is.na(xEnd)) + isEmptySegment <- (isEmptySegment & !isSplitter) if (isSplitter) { - verbose && cat(verbose, "No signals to segment. Just a \"splitter\" segment. Skipping."); + verbose && cat(verbose, "No signals to segment. Just a \"splitter\" segment. Skipping.") # Sanity check - stopifnot(kk >= 1); + .stop_if_not(kk >= 1) # Add a splitter segment - segT <- segs[[kk-1]]; - segT <- segT[NA_integer_,]; - keys <- colnames(tcnSegments); - segT[,keys] <- tcnSegments[kk,keys]; - segT[,"tcnId"] <- tcnId; - segT[,"dhId"] <- 1L; - segT[,c("tcnNbrOfSNPs", "tcnNbrOfHets", "dhNbrOfLoci")] <- 0L; - segT[,"dhStart"] <- xStart; - segT[,"dhEnd"] <- xEnd; - segs[[kk]] <- segT; - verbose && print(verbose, segT); + segT <- segs[[kk-1]] + segT <- segT[NA_integer_,] + keys <- colnames(tcnSegments) + segT[,keys] <- tcnSegments[kk,keys] + segT[,"tcnId"] <- tcnId + segT[,"dhId"] <- 1L + segT[,c("tcnNbrOfSNPs", "tcnNbrOfHets", "dhNbrOfLoci")] <- 0L + segT[,"dhStart"] <- xStart + segT[,"dhEnd"] <- xEnd + segs[[kk]] <- segT + verbose && print(verbose, segT) # Add a splitter to TCN and DH segment row matrix - segRowsT <- dhSegRows[NA_integer_,]; - dhSegRows <- rbind(dhSegRows, segRowsT); + segRowsT <- dhSegRows[NA_integer_,] + dhSegRows <- rbind(dhSegRows, segRowsT) - segRowsT <- tcnSegsExpanded[NA_integer_,]; - tcnSegsExpanded <- rbind(tcnSegsExpanded, segRowsT); + segRowsT <- tcnSegsExpanded[NA_integer_,] + tcnSegsExpanded <- rbind(tcnSegsExpanded, segRowsT) - verbose && exit(verbose); - next; + verbose && exit(verbose) + next } # if (isSplitter) - nbrOfTCNLociKK <- tcnSegments[kk,"tcnNbrOfLoci"]; - verbose && cat(verbose, "Number of TCN loci in segment: ", nbrOfTCNLociKK); + nbrOfTCNLociKK <- tcnSegments[kk,"tcnNbrOfLoci"] + verbose && cat(verbose, "Number of TCN loci in segment: ", nbrOfTCNLociKK) # Sanity check - stopifnot(!isEmptySegment || (isEmptySegment && (nbrOfTCNLociKK == 0))); + .stop_if_not(!isEmptySegment || (isEmptySegment && (nbrOfTCNLociKK == 0))) if (nbrOfTCNLociKK > 0) { # Extract locus data for TCN segment - rows <- rowStart:rowEnd; + rows <- rowStart:rowEnd ## if (nrow(knownSegments) == 0) { - ## gammaT <- tcnSegments[kk,"tcnMean"]; - ## verbose && print(verbose, all.equal(mean(dataKK$CT, na.rm=TRUE), gammaT, tolerance=tol)); - ## stopifnot(all.equal(mean(dataKK$CT, na.rm=TRUE), gammaT, tolerance=tol)); + ## gammaT <- tcnSegments[kk,"tcnMean"] + ## verbose && print(verbose, all.equal(mean(dataKK$CT, na.rm=TRUE), gammaT, tolerance=tol)) + ## .stop_if_not(all.equal(mean(dataKK$CT, na.rm=TRUE), gammaT, tolerance=tol)) ## } } else { - rows <- integer(0); + rows <- integer(0) } # if (nbrOfTCNLociKK > 0) - dataKK <- data[rows,,drop=FALSE]; - nbrOfLociKK <- nrow(dataKK); + dataKK <- data[rows,,drop=FALSE] + nbrOfLociKK <- nrow(dataKK) # Sanity check - stopifnot(length(dataKK$CT) == nbrOfTCNLociKK); - ## stopifnot(sum(!is.na(dataKK$CT)) == nbrOfTCNLociKK); + .stop_if_not(length(dataKK$CT) == nbrOfTCNLociKK) + ## .stop_if_not(sum(!is.na(dataKK$CT)) == nbrOfTCNLociKK) - verbose && cat(verbose, "Locus data for TCN segment:"); - verbose && str(verbose, dataKK); + verbose && cat(verbose, "Locus data for TCN segment:") + verbose && str(verbose, dataKK) - verbose && cat(verbose, "Number of loci: ", nbrOfLociKK); + verbose && cat(verbose, "Number of loci: ", nbrOfLociKK) hasDH <- !is.null(dataKK$rho) if (hasDH) { @@ -934,18 +931,18 @@ nbrOfSnpsKK <- sum(isSnpKK) nbrOfHetsKK <- sum(isHetsKK) verbose && printf(verbose, "Number of SNPs: %d (%.2f%%)\n", - nbrOfSnpsKK, 100*nbrOfSnpsKK/nbrOfLociKK); + nbrOfSnpsKK, 100*nbrOfSnpsKK/nbrOfLociKK) verbose && printf(verbose, "Number of heterozygous SNPs: %d (%.2f%%)\n", - nbrOfHetsKK, 100*nbrOfHetsKK/nbrOfSnpsKK); + nbrOfHetsKK, 100*nbrOfHetsKK/nbrOfSnpsKK) # Since segments in 'knownSegments' has already been used in the TCN # segmentation, they are not needed in the DH segmentation. - currChromosome <- data$chromosome[1]; - verbose && cat(verbose, "Chromosome: ", currChromosome); - knownSegmentsT <- data.frame(chromosome=currChromosome, start=xStart, end=xEnd); + currChromosome <- data$chromosome[1] + verbose && cat(verbose, "Chromosome: ", currChromosome) + knownSegmentsT <- data.frame(chromosome=currChromosome, start=xStart, end=xEnd) - verbose && enter(verbose, "Segmenting DH signals"); - fields <- attachLocally(dataKK, fields=c("chromosome", "x", "rho", "index")); + verbose && enter(verbose, "Segmenting DH signals") + fields <- attachLocally(dataKK, fields=c("chromosome", "x", "rho", "index")) fit <- segmentByCBS(rho, chromosome=chromosome, x=x, @@ -953,110 +950,110 @@ knownSegments=knownSegmentsT, alpha=alphaDH, undo=undoDH, ..., seed=seeds[["DH"]], - verbose=verbose); - verbose && str(verbose, fit); - dhSegments <- fit$output; - dhSegRowsKK <- fit$segRows; - - verbose && cat(verbose, "DH segmentation (locally-indexed) rows:"); - verbose && print(verbose, dhSegRowsKK); - verbose && str(verbose, index); + verbose=verbose) + verbose && str(verbose, fit) + dhSegments <- fit$output + dhSegRowsKK <- fit$segRows + + verbose && cat(verbose, "DH segmentation (locally-indexed) rows:") + verbose && print(verbose, dhSegRowsKK) + verbose && str(verbose, index) # Remap to genome-wide indices for (cc in 1:2) { - dhSegRowsKK[,cc] <- index[dhSegRowsKK[,cc]]; + dhSegRowsKK[,cc] <- index[dhSegRowsKK[,cc]] } - verbose && cat(verbose, "DH segmentation rows:"); - verbose && print(verbose, dhSegRowsKK); + verbose && cat(verbose, "DH segmentation rows:") + verbose && print(verbose, dhSegRowsKK) # Not needed anymore - rm(list=fields); - fit <- NULL; - verbose && exit(verbose); + rm(list=fields) + fit <- NULL + verbose && exit(verbose) # Drop dummy columns - keep <- setdiff(colnames(dhSegments), c("sampleName", "chromosome")); - dhSegments <- dhSegments[,keep,drop=FALSE]; + keep <- setdiff(colnames(dhSegments), c("sampleName", "chromosome")) + dhSegments <- dhSegments[,keep,drop=FALSE] # Tag fields by DH - names <- names(dhSegments); + names <- names(dhSegments) # Adding 'dh' prefix to column names - names <- sprintf("dh%s", capitalize(names)); - names(dhSegments) <- names; + names <- sprintf("dh%s", capitalize(names)) + names(dhSegments) <- names # Special case: If there where not enough data to segment DH... if (nrow(dhSegments) == 0) { - dhSegments <- dhSegments[NA_integer_,,drop=FALSE]; - dhSegRowsKK <- dhSegRowsKK[NA_integer_,,drop=FALSE]; + dhSegments <- dhSegments[NA_integer_,,drop=FALSE] + dhSegRowsKK <- dhSegRowsKK[NA_integer_,,drop=FALSE] } - verbose && cat(verbose, "DH segmentation table:"); - verbose && print(verbose, dhSegments); - verbose && print(verbose, dhSegRowsKK); + verbose && cat(verbose, "DH segmentation table:") + verbose && print(verbose, dhSegments) + verbose && print(verbose, dhSegRowsKK) # Expand the TCN segmentation result data frame - rows <- rep(kk, times=nrow(dhSegments)); - verbose && cat(verbose, "Rows:"); - verbose && print(verbose, rows); - tcnSegmentsKK <- tcnSegments[rows,,drop=FALSE]; - tcnSegRowsKK <- tcnSegRows[rows,,drop=FALSE]; + rows <- rep(kk, times=nrow(dhSegments)) + verbose && cat(verbose, "Rows:") + verbose && print(verbose, rows) + tcnSegmentsKK <- tcnSegments[rows,,drop=FALSE] + tcnSegRowsKK <- tcnSegRows[rows,,drop=FALSE] # Sanity check - stopifnot(nrow(tcnSegmentsKK) == nrow(dhSegments)); - stopifnot(nrow(tcnSegRowsKK) == nrow(dhSegments)); - stopifnot(is.na(tcnSegRowsKK[,1]) || is.na(dhSegRowsKK[,1]) || (tcnSegRowsKK[,1] <= dhSegRowsKK[,1])); - stopifnot(is.na(tcnSegRowsKK[,2]) || is.na(dhSegRowsKK[,2]) || (dhSegRowsKK[,2] <= tcnSegRowsKK[,2])); - verbose && cat(verbose, "TCN segmentation rows:"); - verbose && print(verbose, tcnSegRowsKK); - stopifnot(all(tcnSegRowsKK[,1] == tcnSegRowsKK[1,1], na.rm=TRUE)); - stopifnot(all(tcnSegRowsKK[,2] == tcnSegRowsKK[1,2], na.rm=TRUE)); - - verbose && cat(verbose, "TCN and DH segmentation rows:"); - verbose && print(verbose, tcnSegRowsKK); - verbose && print(verbose, dhSegRowsKK); - verbose && print(verbose, tcnSegsExpanded); + .stop_if_not(nrow(tcnSegmentsKK) == nrow(dhSegments)) + .stop_if_not(nrow(tcnSegRowsKK) == nrow(dhSegments)) + .stop_if_not(is.na(tcnSegRowsKK[,1]) || is.na(dhSegRowsKK[,1]) || (tcnSegRowsKK[,1] <= dhSegRowsKK[,1])) + .stop_if_not(is.na(tcnSegRowsKK[,2]) || is.na(dhSegRowsKK[,2]) || (dhSegRowsKK[,2] <= tcnSegRowsKK[,2])) + verbose && cat(verbose, "TCN segmentation rows:") + verbose && print(verbose, tcnSegRowsKK) + .stop_if_not(all(tcnSegRowsKK[,1] == tcnSegRowsKK[1,1], na.rm=TRUE)) + .stop_if_not(all(tcnSegRowsKK[,2] == tcnSegRowsKK[1,2], na.rm=TRUE)) + + verbose && cat(verbose, "TCN and DH segmentation rows:") + verbose && print(verbose, tcnSegRowsKK) + verbose && print(verbose, dhSegRowsKK) + verbose && print(verbose, tcnSegsExpanded) # Append - tcnSegsExpanded <- rbind(tcnSegsExpanded, tcnSegRowsKK); - verbose && cat(verbose, "TCN segmentation (expanded) rows:"); - verbose && print(verbose, tcnSegsExpanded); - rownames(tcnSegsExpanded) <- NULL; - - dhSegRows <- rbind(dhSegRows, dhSegRowsKK); - rownames(dhSegRows) <- NULL; - - verbose && cat(verbose, "TCN and DH segmentation rows:"); - verbose && print(verbose, tcnSegRows); - verbose && print(verbose, dhSegRows); - verbose && print(verbose, tcnSegsExpanded); + tcnSegsExpanded <- rbind(tcnSegsExpanded, tcnSegRowsKK) + verbose && cat(verbose, "TCN segmentation (expanded) rows:") + verbose && print(verbose, tcnSegsExpanded) + rownames(tcnSegsExpanded) <- NULL + + dhSegRows <- rbind(dhSegRows, dhSegRowsKK) + rownames(dhSegRows) <- NULL + + verbose && cat(verbose, "TCN and DH segmentation rows:") + verbose && print(verbose, tcnSegRows) + verbose && print(verbose, dhSegRows) + verbose && print(verbose, tcnSegsExpanded) # Sanity checks - stopifnot(all(tcnSegRows[,1] <= tcnSegRows[,2], na.rm=TRUE)); - stopifnot(all(tcnSegRows[-nrow(tcnSegRows),2] < tcnSegRows[-1,1], na.rm=TRUE)); - stopifnot(all(dhSegRows[,1] <= dhSegRows[,2], na.rm=TRUE)); - stopifnot(all(dhSegRows[-nrow(dhSegRows),2] < dhSegRows[-1,1], na.rm=TRUE)); - stopifnot(all(tcnSegsExpanded[,1] <= tcnSegsExpanded[,2], na.rm=TRUE)); - stopifnot(all(tcnSegsExpanded[,1] <= dhSegRows[,1], na.rm=TRUE)); - stopifnot(all(tcnSegsExpanded[,2] >= dhSegRows[,2], na.rm=TRUE)); + .stop_if_not(all(tcnSegRows[,1] <= tcnSegRows[,2], na.rm=TRUE)) + .stop_if_not(all(tcnSegRows[-nrow(tcnSegRows),2] < tcnSegRows[-1,1], na.rm=TRUE)) + .stop_if_not(all(dhSegRows[,1] <= dhSegRows[,2], na.rm=TRUE)) + .stop_if_not(all(dhSegRows[-nrow(dhSegRows),2] < dhSegRows[-1,1], na.rm=TRUE)) + .stop_if_not(all(tcnSegsExpanded[,1] <= tcnSegsExpanded[,2], na.rm=TRUE)) + .stop_if_not(all(tcnSegsExpanded[,1] <= dhSegRows[,1], na.rm=TRUE)) + .stop_if_not(all(tcnSegsExpanded[,2] >= dhSegRows[,2], na.rm=TRUE)) ## if (!all(tcnSegsExpanded[-nrow(tcnSegsExpanded),2] < tcnSegsExpanded[-1,1], na.rm=TRUE)) { - ## stopifnot(all(tcnSegsExpanded[-nrow(tcnSegsExpanded),2] < tcnSegsExpanded[-1,1], na.rm=TRUE)); + ## .stop_if_not(all(tcnSegsExpanded[-nrow(tcnSegsExpanded),2] < tcnSegsExpanded[-1,1], na.rm=TRUE)) ## } # Sanity check - stopifnot(nrow(dhSegRows) == nrow(tcnSegsExpanded)); + .stop_if_not(nrow(dhSegRows) == nrow(tcnSegsExpanded)) # Append information on number of SNPs and hets in CN region tcnSegmentsKK <- cbind( tcnSegmentsKK, tcnNbrOfSNPs=nbrOfSnpsKK, tcnNbrOfHets=nbrOfHetsKK - ); - verbose && cat(verbose, "Total CN segmentation table (expanded):"); - verbose && print(verbose, tcnSegmentsKK); + ) + verbose && cat(verbose, "Total CN segmentation table (expanded):") + verbose && print(verbose, tcnSegmentsKK) # Sanity check - stopifnot(nrow(tcnSegmentsKK) == nrow(dhSegments)); + .stop_if_not(nrow(tcnSegmentsKK) == nrow(dhSegments)) # Combine TCN and DH segmentation results tcndhSegments <- cbind( @@ -1064,52 +1061,52 @@ dhId=seq_len(nrow(dhSegments)), tcnSegmentsKK, dhSegments - ); + ) - segs[[kk]] <- tcndhSegments; + segs[[kk]] <- tcndhSegments - verbose && cat(verbose, "(TCN,DH) segmentation for one total CN segment:"); - verbose && print(verbose, segs[[kk]]); + verbose && cat(verbose, "(TCN,DH) segmentation for one total CN segment:") + verbose && print(verbose, segs[[kk]]) - verbose && exit(verbose); + verbose && exit(verbose) } # for (kk ...) - segs <- Reduce(rbind, segs); - rownames(segs) <- NULL; + segs <- Reduce(rbind, segs) + rownames(segs) <- NULL } # if (flavor == "tcn") # Sanity check - stopifnot(nrow(dhSegRows) == nrow(tcnSegsExpanded)); - rownames(tcnSegRows) <- rownames(dhSegRows) <- NULL; + .stop_if_not(nrow(dhSegRows) == nrow(tcnSegsExpanded)) + rownames(tcnSegRows) <- rownames(dhSegRows) <- NULL - stopifnot(all(tcnSegRows[,1] <= tcnSegRows[,2], na.rm=TRUE)); - stopifnot(all(tcnSegRows[-nrow(tcnSegRows),2] < tcnSegRows[-1,1], na.rm=TRUE)); + .stop_if_not(all(tcnSegRows[,1] <= tcnSegRows[,2], na.rm=TRUE)) + .stop_if_not(all(tcnSegRows[-nrow(tcnSegRows),2] < tcnSegRows[-1,1], na.rm=TRUE)) if (flavor != "tcn") { - stopifnot(all(dhSegRows[,1] <= dhSegRows[,2], na.rm=TRUE)); + .stop_if_not(all(dhSegRows[,1] <= dhSegRows[,2], na.rm=TRUE)) } - stopifnot(all(dhSegRows[-nrow(dhSegRows),2] < dhSegRows[-1,1], na.rm=TRUE)); - stopifnot(all(tcnSegsExpanded[,1] <= tcnSegsExpanded[,2], na.rm=TRUE)); -## stopifnot(all(tcnSegsExpanded[-nrow(tcnSegsExpanded),2] < tcnSegsExpanded[-1,1], na.rm=TRUE)); + .stop_if_not(all(dhSegRows[-nrow(dhSegRows),2] < dhSegRows[-1,1], na.rm=TRUE)) + .stop_if_not(all(tcnSegsExpanded[,1] <= tcnSegsExpanded[,2], na.rm=TRUE)) +## .stop_if_not(all(tcnSegsExpanded[-nrow(tcnSegsExpanded),2] < tcnSegsExpanded[-1,1], na.rm=TRUE)) # Move 'chromosome' column to the first column - idx <- match("chromosome", names(segs)); - idxs <- c(idx, seq_len(ncol(segs))[-idx]); - segs <- segs[,idxs,drop=FALSE]; - verbose && print(verbose, segs); + idx <- match("chromosome", names(segs)) + idxs <- c(idx, seq_len(ncol(segs))[-idx]) + segs <- segs[,idxs,drop=FALSE] + verbose && print(verbose, segs) - verbose && enter(verbose, "Calculating (C1,C2) per segment"); + verbose && enter(verbose, "Calculating (C1,C2) per segment") # Append (C1,C2) estimates - tcn <- segs$tcnMean; - dh <- segs$dhMean; - C1 <- 1/2*(1-dh)*tcn; - C2 <- tcn - C1; - segs <- cbind(segs, c1Mean=C1, c2Mean=C2); - verbose && exit(verbose); + tcn <- segs$tcnMean + dh <- segs$dhMean + C1 <- 1/2*(1-dh)*tcn + C2 <- tcn - C1 + segs <- cbind(segs, c1Mean=C1, c2Mean=C2) + verbose && exit(verbose) - nbrOfSegs <- nrow(segs); - verbose && cat(verbose, "Number of segments: ", nbrOfSegs); + nbrOfSegs <- nrow(segs) + verbose && cat(verbose, "Number of segments: ", nbrOfSegs) - verbose && exit(verbose); + verbose && exit(verbose) # - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - # Create result object @@ -1122,14 +1119,14 @@ joinSegments = joinSegments, knownSegments = knownSegments, seed = seed - ); + ) # Should we drop attributes? /HB 2010-09-24 - stopifnot(all(data$index == seq_len(nrow(data)))); - data$index <- NULL; # Drop, because it is guaranteed to be ordered - class(data) <- c("PairedPSCNData", class(data)); + .stop_if_not(all(data$index == seq_len(nrow(data)))) + data$index <- NULL # Drop, because it is guaranteed to be ordered + class(data) <- c("PairedPSCNData", class(data)) - class(segs) <- c("PairedPSCNSegments", class(segs)); + class(segs) <- c("PairedPSCNSegments", class(segs)) fit <- list( data = data, @@ -1137,282 +1134,75 @@ tcnSegRows = tcnSegsExpanded, dhSegRows = dhSegRows, params = params - ); + ) - class(fit) <- c("PairedPSCBS", "PSCBS", "AbstractCBS"); + class(fit) <- c("PairedPSCBS", "PSCBS", "AbstractCBS") # - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - # Update? # - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - if (avgTCN != "mean" || avgDH != "mean") { - verbose && enter(verbose, "Updating mean level using different estimator"); - verbose && cat(verbose, "TCN estimator: ", avgTCN); - verbose && cat(verbose, "DH estimator: ", avgDH); - fit <- updateMeans(fit, avgTCN=avgTCN, avgDH=avgDH, verbose=less(verbose, 20)); - verbose && exit(verbose); + verbose && enter(verbose, "Updating mean level using different estimator") + verbose && cat(verbose, "TCN estimator: ", avgTCN) + verbose && cat(verbose, "DH estimator: ", avgDH) + fit <- updateMeans(fit, avgTCN=avgTCN, avgDH=avgDH, verbose=less(verbose, 20)) + verbose && exit(verbose) } if (is.element(flavor, c("tcn&dh", "sqrt(tcn)&dh"))) { - fit$params$flavor <- gsub("&", ",", flavor, fixed=TRUE); # AD HOC. - fit <- postsegmentTCN(fit, verbose=verbose); + fit$params$flavor <- gsub("&", ",", flavor, fixed=TRUE) # AD HOC. + fit <- postsegmentTCN(fit, verbose=verbose) # Sanity check - CT <- fit$data$CT; - tcnSegRows <- fit$tcnSegRows; - dhSegRows <- fit$dhSegRows; + CT <- fit$data$CT + tcnSegRows <- fit$tcnSegRows + dhSegRows <- fit$dhSegRows for (jj in 1:nrow(tcnSegRows)) { - tcnSegRowJJ <- unlist(tcnSegRows[jj,,drop=TRUE], use.names=FALSE); - dhSegRowJJ <- unlist(dhSegRows[jj,,drop=TRUE], use.names=FALSE); - stopifnot( + tcnSegRowJJ <- unlist(tcnSegRows[jj,,drop=TRUE], use.names=FALSE) + dhSegRowJJ <- unlist(dhSegRows[jj,,drop=TRUE], use.names=FALSE) + .stop_if_not( is.na(tcnSegRowJJ[1]) || is.na(dhSegRowJJ[1]) || # A TCN segment must start at or before a DH segment... (tcnSegRowJJ[1] <= dhSegRowJJ[1]) || # ...unless there was an outlier at the left edge. (is.na(CT[dhSegRowJJ[1]]) && (tcnSegRowJJ[1] - 1L <= dhSegRowJJ[1])) - ); - stopifnot( + ) + .stop_if_not( is.na(tcnSegRowJJ[2]) || is.na(dhSegRowJJ[2]) || # A TCN segment must end at or after a DH segment... (dhSegRowJJ[2] <= tcnSegRowJJ[2]) || # ...unless there was an outlier at the right edge. (is.na(CT[dhSegRowJJ[2]]) && (dhSegRowJJ[2] <= tcnSegRowJJ[2] + 1L)) - ); + ) } # for (jj ...) # Not needed anymore - CT <- tcnSegRows <- dhSegRows <- NULL; + CT <- tcnSegRows <- dhSegRows <- NULL } - verbose && print(verbose, head(as.data.frame(fit))); - verbose && print(verbose, tail(as.data.frame(fit))); + verbose && print(verbose, head(as.data.frame(fit))) + verbose && print(verbose, tail(as.data.frame(fit))) # - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - # Return results # - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - fit; + fit }) # segmentByPairedPSCBS() setMethodS3("segmentByPairedPSCBS", "data.frame", function(CT, ...) { # To please R CMD check - data <- CT; + data <- CT segmentByPairedPSCBS(CT=data$CT, thetaT=data$thetaT, thetaN=data$thetaN, betaT=data$betaT, betaN=data$betaN, muN=data$muN, rho=data$rho, - chromosome=data$chromosome, x=data$x, ...); + chromosome=data$chromosome, x=data$x, ...) }) setMethodS3("segmentByPairedPSCBS", "PairedPSCBS", function(...) { - resegment(...); + resegment(...) }) # segmentByPairedPSCBS() - - - -############################################################################ -# HISTORY: -# 2014-06-08 -# o Now segmentByPairedPSCBS() gives a warning about future change of the -# default value of argument 'preserveScale' (from current TRUE to FALSE). -# The warning only appears if the argument is not specified explicitly. -# 2014-03-30 -# o As an alternative to argument 'CT', segmentByPairedPSCBS() now accepts -# arguments 'thetaT' and 'thetaN', in case 'CT' is calculated as -# CT=2*thetaT/thetaN (and all of 'CT', 'thetaT' and 'thetaN' are stored -# as part the locus-level data signals. -# 2014-01-29 -# o ROBUSTNESS: Now segmentByPairedPSCBS() asserts that argument 'muN' -# is not all NAs. Similarily, if 'muN' is called from 'betaN' the -# same assertion is done after calling. -# 2013-02-01 -# o BUG FIX: segmentByPairedPSCBS(..., avgDH="median") only worked for -# single-chromosome data. Same for avgTCN="median". -# 2013-01-16 -# o Added arguments 'avgTCN' and 'avgDH' to segmentByPairedPSCBS(). -# 2012-09-15 -# o Added argument 'dropMissingCT' to segmentByPairedPSCBS(). -# 2012-09-13 -# o CONSISTENCY FIX: Changed the behavior of extreme values of argument -# 'undoTCN' and 'undoDH' to segmentByPairedPSCBS() such that it is -# consistent with the new rules for 'undo' of segmentByCBS(). -# 2012-07-22 -# o GENERALIZATION/BUG FIX: Now segmentByPairedPSCBS() drops loci for -# which CT is missing (regardless of betaT). For instance, in rare cases -# when the reference (e.g. the normal) is missing, then it may be that -# CT is missing while betaT is not. -# o Now the verbose output in segmentByPairedPSCBS() specifies the range -# of segments with greater precision. -# 2012-04-20 -# o Now it is possible to skip the DH segmentation in Paired PSCBS, i.e. -# segmentByPairedPSCBS(..., flavor="tcn"). -# o BUG FIX: segmentByPairedPSCBS() would throw "error in `$<-.data.frame -# `(`*tmp*`, "rho" ..." if some loci points has unknown genomic positions. -# 2011-11-19 -# o GENERALIZATION: Now it is possible to run Paired PSCBS (without -# TumorBoost) when only genotypes but not BAFs are available for the -# matched normal. -# 2011-11-17 -# o ROBUSTNESS: Now all internal iterative calls to segmentByPairedPSCBS() -# and segmentByCBS() have an explicit seed=NULL. -# o BUG FIX: Now 'tbn' argument is correctly preserved in the stored -# parameter settings of segmentByPairedPSCBS(). -# o BUG FIX: segmentByPairedPSCBS() would give an error when trying to -# segment DH if the TCN segment contains no data points, which could -# happen if 'knownSegments' specifies an empty segment, centromere. -# o Added segmentByPairedPSCBS() for PairedPSCBS, which is just a -# wrapper for resegment(). -# 2011-10-21 -# o Now segmentByPairedCBS() handles forced separators in 'knownSegments'. -# 2011-10-20 -# o CLEANUP: Dropped a stray debug output message in segmentByPairedPSCBS(). -# o Replaced argument 'knownCPs' with 'knownSegments' for segmentByCBS(). -# 2011-10-02 -# o Added segmentByPairedPSCBS() for data.frame such that the locus-level -# data arguments can also be passed via a data.frame. -# 2011-09-04 -# o ROBUSTNESS: Added drop=FALSE to matrix subsettings. -# o CLEANUP: Removed all references to/usage of DNAcopy fields, which -# are no longer part of the CBS class. -# 2011-09-03 -# o Updated code to not use deprecated argument 'columnNamesFlavor' -# of segmentByCBS(). -# 2011-08-08 -# o BUG FIX: If dropSegmentationOutliers() would drop an outlier next to -# a change point, such that total copy-number signal would become NA, -# then the sanity checks that TCN segments always overlaps DH segments -# would fail. Now the sanity checks are aware of this special case. -# o Moved the sanity checks that tests the TCN and DH "segRows" from the -# bootstrapTCNandDHByRegion() to segmentByPairedPSCBS(). This is the -# first step to fix a failure in the sanity checks that could happend -# iff one first run dropSegmentationOutliers(). -# 2011-07-15 -# o DOCUMENTATION: Added a section to help("segmentByPairedPSCBS") on -# the importance of doing a whole-genome PSCBS segmentations if -# calling AB and LOH states afterward. -# 2011-07-14 -# o DOCUMENTATION: Added to the help that arguments betaT, betaN and muN -# may contain NAs for non-polymorphic loci. -# o BUG FIX/ROBUSTNESS: In some cases, the segmentation table would -# contain column names with incorrect capitalization, e.g. "tcnnbrOfLoci" -# instead of "tcnNbrOfLoci". This would cause several downstream -# methods to give an error. The reason for this is that the Hmisc -# package, if loaded after R.utils, overrides capitalize() in R.utils -# with another (buggy?) capitalize() function. To avoid this, we -# now everywhere specify explicitly that we want to the one in R.utils. -# 2011-07-06 -# o DOCUMENTATION: The description of argument 'chromosome' for -# segmentByPairedPSCBS() did not describe how to segment multiple -# chromosomes in one call. -# 2011-07-05 -# o BUG FIX: Output fields 'tcnNbrOfSNPs'and 'tcnNbrOfHets' were mistakenly -# labelled as 'tcnNbrOr...'. Thanks Christine Ho at UC Berkeley for -# reporting on this. -# 2011-06-28 -# o DOCUMENTATION: Clarified that argument 'CT' should be tumor copy -# number ratios relative to the normal. -# 2011-06-14 -# o CONVENTION: Changed the column names of returned data frames. -# They now follow the camelCase naming convention and are shorter. -# 2011-05-29 -# o Renamed options to reflect new package name. -# 2011-04-07 -# o ROBUSTNESS: Added validation of the different 'tcnSegRows' and -# 'dhSegRows' calculations in segmentByPairedPSCBS(). This helps -# us narrow down a bug in postsegmentTCN(). -# 2010-12-09 -# o BUG FIX: When there were multiple chromsomes processed by -# segmentByPairedPSCBS(), then the returned data object would -# contain 'betaT' identical to 'betaTN'. -# 2010-12-02 -# o Now segmentByPairedPSCBS() uses option "psCBS/sanityChecks/tolerance". -# 2010-11-30 -# o Now segmentByPairedPSCBS() returns data frames 'tcnLociToExclude' -# and 'dhLociToExclude'. -# o BUG FIX: Argument 'flavor' of segmentByPairedPSCBS() would be ignored -# if multiple chromsomomes were segmented. -# 2010-11-28 -# o BUG FIX: Iff argument 'chromosome' to segmentByPairedPSCBS() was of -# length greater than one and specified exactly one unique chromosome, -# then exception "Number of elements in argument 'chromosome' should -# be exactly 8712 not 86209 value(s)" would be thrown. -# 2010-11-27 -# o BUG FIX: segmentByPairedPSCBS() would not accept missing values in -# argument 'chromosome'. -# o Now arguments '...' of segmentByPairedPSCBS() are passed to -# the two segmentByCBS() calls. -# 2010-11-22 -# o BUG FIX: segmentByPairedPSCBS() would not subset the correct set of -# DH signals if there were some missing values in TCN. -# 2010-11-21 -# o Changed the default to flavor="tch&dh". -# o Added support for flavors "tcn&dh", which, contrary to "tcn,dh", -# enforces TCN and DH to have the same change points. -# o Now segmentByPairedPSCBS() also returns minor and major copy numbers -# for each segment. -# o Forgot to return arguments 'joinSegments' & 'knownCPs' in 'params'. -# 2010-11-20 -# o Now it is possible to specify the boundaries of the regions to be -# segmented as known change points via argument 'knownCPs'. -# o Added argument 'joinSegments' to segmentByPairedPSCBS() in order to -# specify if neighboring segments should be joined or not. -# o Now segmentByCBS() allows for unknown genomic positions. -# o Now segmentByCBS() allows also for missing total CN signals. -# 2010-11-16 -# o BUG FIX: In the rare cases where two loci at the same positions are -# split up into two neighboring segments, then segmentByPairedPSCBS() -# would fail to infer which they were if and only if the loci were not -# ordered along the genome. This could happen with for instance -# Affymetrix GenomeWideSNP_6 data. -# o DOCUMENTATION: Clarified the form of argument 'muN', and added -# references to papers and cross links to more internal methods. -# 2010-11-04 -# o BUG FIX: There was a stray/debug stop() statement left in -# segmentByPairedPSCBS() causing an "error" in the rare case -# when loci that have the same physical locations are split -# into two different segments. -# 2010-11-02 -# o Added arguments 'undoTCN' and 'undoDH' to segmentByPairedPSCBS(). -# o BUG FIX: Arguments 'alphaTCN' and 'alphaDH' of segmentByPairedPSCBS() -# were not used when more than one chromosome were segmented. -# 2010-10-25 -# o BUG FIX: Now the correct set of loci are extracted from each TCN -# segment, in the rare case that two neighboring TCN segments have -# the same end points. -# 2010-10-18 -# o Added arguments 'alphaTCN' and 'alphaDH' to segmentByPairedPSCBS(). -# o Now segmentByPairedPSCBS() can segment multiple chromosomes. -# 2010-10-17 -# o Added argument 'tbn' to segmentByPairedPSCBS() specifying whether -# TumorBoostNormalization should be applied or not. -# 2010-10-10 -# o The default is now to segment TCN on the original scale, not the sqrt(). -# o Added flavor "sqrt(tcn),dh", which is segments sqrt(TCN) and then DH, -# as original proposed by ABO. -# 2010-10-03 -# o CLEAN UP: Now segmentByPairedPSCBS() is making use of argument -# 'chromosome' of segmentByCBS(). -# 2010-10-02 -# o Argument 'chromosome' default to 0 and have to be a finite integer. -# 2010-09-24 -# o Now the 'data' field returned is a data.frame (no longer a list). -# o Now the 'chromosome' field of the data field is expanded to have the -# same number of elements as the other locus fields. -# 2010-09-18 -# o Added argument 'chromosome' to segmentByPairedPSCBS(), which, if given, -# adds a chromosome column to the data and segmentation results. -# 2010-09-08 -# o Now segmentByPairedPSCBS() also returns the TumorBoost normalized data. -# This also means that plot() for PairedPSCBS no longer has to -# recalculate them. -# 2010-09-04 -# o Added drawLevels() for PairedPSCBS. -# o Added as.data.frame() and print() for PairedPSCBS. -# 2010-09-03 -# o Added plot(). -# 2010-07-09 -# o The segmentByPairedPSCBS() method was written completely from scratch. -# o Created. -############################################################################ diff -Nru r-cran-pscbs-0.63.0/R/testROH.R r-cran-pscbs-0.64.0/R/testROH.R --- r-cran-pscbs-0.63.0/R/testROH.R 2017-06-28 14:32:39.000000000 +0000 +++ r-cran-pscbs-0.64.0/R/testROH.R 2018-08-12 21:30:44.000000000 +0000 @@ -40,41 +40,41 @@ # Validate arguments # - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - # Argument 'muN': - muN <- Arguments$getDoubles(muN, range=c(0,1)); - nbrOfSnps <- length(muN); - length2 <- rep(nbrOfSnps, times=2); + muN <- Arguments$getDoubles(muN, range=c(0,1)) + nbrOfSnps <- length(muN) + length2 <- rep(nbrOfSnps, times=2) # Argument 'csN' & 'betaN': if (!is.null(csN)) { - csN <- Arguments$getDoubles(csN, range=c(0,1), length=length2); + csN <- Arguments$getDoubles(csN, range=c(0,1), length=length2) } else { if (!is.null(betaN)) { - betaN <- Arguments$getDoubles(betaN, length=length2); + betaN <- Arguments$getDoubles(betaN, length=length2) } } # Argument 'minNbrOfSnps': - minNbrOfSnps <- Arguments$getInteger(minNbrOfSnps, range=c(1,Inf)); + minNbrOfSnps <- Arguments$getInteger(minNbrOfSnps, range=c(1,Inf)) # Argument 'verbose': - verbose <- Arguments$getVerbose(verbose); + verbose <- Arguments$getVerbose(verbose) if (verbose) { - pushState(verbose); - on.exit(popState(verbose)); + pushState(verbose) + on.exit(popState(verbose)) } - verbose && enter(verbose, "Testing for ROH"); + verbose && enter(verbose, "Testing for ROH") # Default ROH call - call <- NA; + call <- NA - verbose && cat(verbose, "Number of SNPs: ", nbrOfSnps); + verbose && cat(verbose, "Number of SNPs: ", nbrOfSnps) # Nothing todo? if (nbrOfSnps < minNbrOfSnps) { - verbose && exit(verbose); - return(call); + verbose && exit(verbose) + return(call) } @@ -83,22 +83,22 @@ # - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - if (is.null(csN)) { if (!is.null(betaN)) { - verbose && enter(verbose, "Calculating confidence scores"); + verbose && enter(verbose, "Calculating confidence scores") # Assuming naive genotyping a'la aroma.light::callNaiveGenotypes() # was used to call genotypes 'muN' from 'betaN'. # AD HOC: We also have to assume that the thresholds were 1/3 and 2/3. - a <- 1/3; # was fit$x[1]; - b <- 2/3; # was fit$x[2]; + a <- 1/3 # was fit$x[1] + b <- 2/3 # was fit$x[2] # AD HOC: We have to make some assumption about which SNPs are diploid. # Assume all for now - isDiploid <- rep(TRUE, times=nbrOfSnps); + isDiploid <- rep(TRUE, times=nbrOfSnps) # KNOWN ISSUE: Scores for homozygotes are in [0,1/3], whereas # heterzygotes are in [0,1/6]. /PN 2011-11-11 - csN[isDiploid] <- rowMins(abs(cbind(betaN[isDiploid]-a, betaN[isDiploid]-b))); - verbose && exit(verbose); + csN[isDiploid] <- rowMins(abs(cbind(betaN[isDiploid]-a, betaN[isDiploid]-b))) + verbose && exit(verbose) } } @@ -107,38 +107,38 @@ # Call ROH # - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - # Identify heterozygous SNPs - isHet <- (muN == 1/2); - verbose && print(verbose, summary(isHet)); + isHet <- (muN == 1/2) + verbose && print(verbose, summary(isHet)) # With or without genotype confidence scores? if (!is.null(csN)) { # 0-1 weights (just to make sure) # Weights summing to one - w <- csN / sum(csN, na.rm=TRUE); + w <- csN / sum(csN, na.rm=TRUE) - wnHets <- sum(isHet*w, na.rm=TRUE); - wnSnps <- sum(w, na.rm=TRUE); # == 1 /HB + wnHets <- sum(isHet*w, na.rm=TRUE) + wnSnps <- sum(w, na.rm=TRUE) # == 1 /HB # Sanity check - stopifnot(isZero(wnSnps - 1.0, eps=sqrt(.Machine$double.eps))); + .stop_if_not(isZero(wnSnps - 1.0, eps=sqrt(.Machine$double.eps))) } else { - wnHets <- sum(isHet, na.rm=TRUE); - wnSnps <- 1; + wnHets <- sum(isHet, na.rm=TRUE) + wnSnps <- 1 } - propHets <- wnHets/wnSnps; - verbose && print(verbose, propHets); + propHets <- wnHets/wnSnps + verbose && print(verbose, propHets) - call <- (propHets < delta); - verbose && print(verbose, call); + call <- (propHets < delta) + verbose && print(verbose, call) # Record parameter settings - attr(call, "minNbrOfSnps") <- minNbrOfSnps; - attr(call, "delta") <- delta; + attr(call, "minNbrOfSnps") <- minNbrOfSnps + attr(call, "delta") <- delta - verbose && exit(verbose); + verbose && exit(verbose) - call; + call }) # testROH() diff -Nru r-cran-pscbs-0.63.0/R/utils.R r-cran-pscbs-0.64.0/R/utils.R --- r-cran-pscbs-0.63.0/R/utils.R 1970-01-01 00:00:00.000000000 +0000 +++ r-cran-pscbs-0.64.0/R/utils.R 2018-08-12 21:30:44.000000000 +0000 @@ -0,0 +1,15 @@ +.stop_if_not <- function(...) { + res <- list(...) + n <- length(res) + if (n == 0L) return() + + for (ii in 1L:n) { + res_ii <- .subset2(res, ii) + if (length(res_ii) != 1L || is.na(res_ii) || !res_ii) { + mc <- match.call() + call <- deparse(mc[[ii + 1]], width.cutoff = 60L) + if (length(call) > 1L) call <- paste(call[1L], "...") + stop(sQuote(call), " is not TRUE but ", call. = FALSE, domain = NA) + } + } +} diff -Nru r-cran-pscbs-0.63.0/R/weightedQuantile.R r-cran-pscbs-0.64.0/R/weightedQuantile.R --- r-cran-pscbs-0.63.0/R/weightedQuantile.R 2017-06-28 14:32:39.000000000 +0000 +++ r-cran-pscbs-0.64.0/R/weightedQuantile.R 2018-08-12 21:30:44.000000000 +0000 @@ -46,36 +46,36 @@ #*/############################################################################ setMethodS3("weightedQuantile", "default", function(x, w, probs=c(0, 0.25, 0.5, 0.75, 1), na.rm=TRUE, method=c("wtd.quantile"), ...) { # Argument 'x': - x <- Arguments$getNumerics(x); + x <- Arguments$getNumerics(x) # Argument 'w': if (missing(w)) { # By default use weights that are one. - w <- rep(1, times=length(x)); + w <- rep(1, times=length(x)) } else { - w <- Arguments$getNumerics(w, range=c(0,Inf), length=rep(length(x), times=2L)); + w <- Arguments$getNumerics(w, range=c(0,Inf), length=rep(length(x), times=2L)) } - naValue <- NA; - storage.mode(naValue) <- storage.mode(x); + naValue <- NA + storage.mode(naValue) <- storage.mode(x) # Argument 'na.rm': if (is.na(na.rm)) { # There are no NAs } else if (isTRUE(na.rm)) { # Remove values that are NA's - tmp <- !(is.na(x) | is.na(w)); - x <- .subset(x, tmp); - w <- .subset(w, tmp); + tmp <- !(is.na(x) | is.na(w)) + x <- .subset(x, tmp) + w <- .subset(w, tmp) } else if (anyNA(x)) { - return(naValue); + return(naValue) } # Argument 'method': - method <- match.arg(method); + method <- match.arg(method) if (method == "wtd.quantile") { # This will load 'Hmisc', if not already done - wtd.quantile <- Hmisc::wtd.quantile; + wtd.quantile <- Hmisc::wtd.quantile } @@ -83,47 +83,35 @@ # Remove values with zero (and negative) weight. This will: # (1) take care of the case when all weights are zero, # (2) it will most likely speed up the sorting. - n <- length(w); - tmp <- (w > 0); + n <- length(w) + tmp <- (w > 0) if (!all(tmp)) { - x <- .subset(x, tmp); - w <- .subset(w, tmp); - n <- sum(tmp); + x <- .subset(x, tmp) + w <- .subset(w, tmp) + n <- sum(tmp) } # Are there any values left to calculate the weighted median of? if (n == 0) { - return(naValue); + return(naValue) } else if (n == 1) { - return(x); + return(x) } # Are any weights Inf? Then treat them with equal weight and all others # with weight zero. If they have equal weight, regular quantile # can be used instead, which is assumed to be faster. - tmp <- is.infinite(w); + tmp <- is.infinite(w) if (any(tmp)) { - x <- .subset(x, tmp); + x <- .subset(x, tmp) # Here we know there are no NAs. - return(quantile(x, probs=probs, na.rm=FALSE, ...)); + return(quantile(x, probs=probs, na.rm=FALSE, ...)) } # Here we know that there are no missing values in the data if (method == "wtd.quantile") { - wtd.quantile(x, weights=w, probs=probs, normwt=TRUE, na.rm=FALSE, ...); + wtd.quantile(x, weights=w, probs=probs, normwt=TRUE, na.rm=FALSE, ...) } else { - throw("Cannot estimate weighted quantiles: Argument 'method' is unknown: ", method); + throw("Cannot estimate weighted quantiles: Argument 'method' is unknown: ", method) } }) # weightedQuantile() - - -############################################################################ -# HISTORY: -# 2013-09-26 [HB] -# o CLEANUP: Now weightedQuantile(..., method=="wtd.quantile") no longer -# attaches 'Hmisc', but only loads its namespace. -# 2012-08-30 -# o Updated Rdoc cross reference for matrixStats to point to matrixStats. -# 2011-04-08 -# o Created. -############################################################################ diff -Nru r-cran-pscbs-0.63.0/R/writeWIG.R r-cran-pscbs-0.64.0/R/writeWIG.R --- r-cran-pscbs-0.63.0/R/writeWIG.R 2017-06-28 14:32:39.000000000 +0000 +++ r-cran-pscbs-0.64.0/R/writeWIG.R 2018-08-12 21:30:44.000000000 +0000 @@ -3,7 +3,7 @@ graphType <- match.arg(graphType) # Argument 'nbrOfDecimals': - nbrOfDecimals <- Arguments$getInteger(nbrOfDecimals); + nbrOfDecimals <- Arguments$getInteger(nbrOfDecimals) data <- getSegments(fit, splitter=FALSE) fields <- c("chromosome", "start", "end", "mean") @@ -26,7 +26,7 @@ # Round mean levels if (!is.null(nbrOfDecimals)) { - data[["mean"]] <- round(data[["mean"]], digits=nbrOfDecimals); + data[["mean"]] <- round(data[["mean"]], digits=nbrOfDecimals) } # Drop segments with missing values @@ -83,29 +83,29 @@ # Validate arguments # - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - # Argument 'name' and 'tags': - name <- Arguments$getCharacter(name); - tags <- Arguments$getCharacters(tags); + name <- Arguments$getCharacter(name) + tags <- Arguments$getCharacters(tags) # Argument 'ext': - ext <- Arguments$getCharacter(ext); + ext <- Arguments$getCharacter(ext) # Arguments 'path': - path <- Arguments$getWritablePath(path); + path <- Arguments$getWritablePath(path) - fullname <- paste(c(name, tags), collapse=","); - filename <- sprintf("%s.%s", fullname, ext); - pathname <- Arguments$getWritablePathname(filename, path=path, mustNotExist=(!overwrite && !skip)); + fullname <- paste(c(name, tags), collapse=",") + filename <- sprintf("%s.%s", fullname, ext) + pathname <- Arguments$getWritablePathname(filename, path=path, mustNotExist=(!overwrite && !skip)) # File already exists? if (isFile(pathname)) { # Skip? if (skip) { - return(pathname); + return(pathname) } # Overwrite! - file.remove(pathname); + file.remove(pathname) } ## Write file (atomically) @@ -134,10 +134,3 @@ pathname }) - - -############################################################################ -# HISTORY: -# 2015-09-08 -# o Added extractWIG() and writeWIG() for CBS objects. -############################################################################ diff -Nru r-cran-pscbs-0.63.0/.Rbuildignore r-cran-pscbs-0.64.0/.Rbuildignore --- r-cran-pscbs-0.63.0/.Rbuildignore 2017-06-28 14:32:39.000000000 +0000 +++ r-cran-pscbs-0.64.0/.Rbuildignore 2018-08-12 21:30:44.000000000 +0000 @@ -56,3 +56,5 @@ ^.ghi ^.issues +^.*\.Rproj$ +^\.Rproj\.user$ diff -Nru r-cran-pscbs-0.63.0/README.md r-cran-pscbs-0.64.0/README.md --- r-cran-pscbs-0.63.0/README.md 2017-06-28 14:32:39.000000000 +0000 +++ r-cran-pscbs-0.64.0/README.md 2018-08-12 21:30:44.000000000 +0000 @@ -12,11 +12,11 @@ same. To reset to non-parallel processing, use `future::plan("sequential")`. To configure this automatically whenever the package is loaded, see -future vignette '[A Future for R: Controlling Default Future Strategy](https://cran.r-project.org/web/packages/future/vignettes/future-4-startup.html)'. +future vignette '[A Future for R: Controlling Default Future Strategy](https://cran.r-project.org/web/packages/future/vignettes/future-5-startup.html)'. ## Installation -R package PSCBS is available on [CRAN](http://cran.r-project.org/package=PSCBS) and can be installed in R as: +R package PSCBS is available on [CRAN](https://cran.r-project.org/package=PSCBS) and can be installed in R as: ```r install.packages('PSCBS') ``` diff -Nru r-cran-pscbs-0.63.0/revdep/check.R r-cran-pscbs-0.64.0/revdep/check.R --- r-cran-pscbs-0.63.0/revdep/check.R 2017-06-28 14:32:39.000000000 +0000 +++ r-cran-pscbs-0.64.0/revdep/check.R 1970-01-01 00:00:00.000000000 +0000 @@ -1,20 +0,0 @@ -library("devtools") - -availableCores <- function() { - getenv <- function(name) { - as.integer(Sys.getenv(name, NA_character_)) - } - getopt <- function(name) { - as.integer(getOption(name, NA_integer_)) - } - if (is.finite(n <- getopt("mc.cores") + 1L)) return(n) - if (is.finite(n <- getopt("Ncpus") + 1L)) return(n) - if (is.finite(n <- getenv("PBS_NUM_PPN"))) return(n) - if (is.finite(n <- getenv("SLURM_CPUS_PER_TASK"))) return(n) - if (is.finite(n <- getenv("NSLOTS"))) return(n) - 1L -} - -revdep_check(bioconductor = TRUE, recursive = TRUE, threads = availableCores()) -revdep_check_save_summary() -revdep_check_print_problems() diff -Nru r-cran-pscbs-0.63.0/revdep/problems.md r-cran-pscbs-0.64.0/revdep/problems.md --- r-cran-pscbs-0.63.0/revdep/problems.md 2017-06-28 14:32:39.000000000 +0000 +++ r-cran-pscbs-0.64.0/revdep/problems.md 2018-08-12 21:30:44.000000000 +0000 @@ -1,40 +1,43 @@ -# Setup +# aroma.cn -## Platform +Version: 1.6.1 -|setting |value | -|:--------|:----------------------------| -|version |R version 3.4.0 (2017-04-21) | -|system |x86_64, linux-gnu | -|ui |X11 | -|language |en | -|collate |en_US.UTF-8 | -|tz |America/Los_Angeles | -|date |2017-06-27 | - -## Packages - -|package |* |version |date |source | -|:-----------|:--|:-------|:----------|:--------------| -|aroma.light | |3.6.0 |2017-05-18 |cran (@3.6.0) | -|DNAcopy | |1.50.1 |2017-05-18 |cran (@1.50.1) | -|future | |1.5.0 |2017-05-26 |cran (@1.5.0) | -|ggplot2 | |2.2.1 |2016-12-30 |cran (@2.2.1) | -|Hmisc | |4.0-3 |2017-05-02 |cran (@4.0-3) | -|listenv | |0.6.0 |2015-12-28 |cran (@0.6.0) | -|matrixStats | |0.52.2 |2017-04-14 |cran (@0.52.2) | -|PSCBS | |0.62.0 |2016-11-11 |cran (@0.62.0) | -|R.cache | |0.12.0 |2015-11-12 |cran (@0.12.0) | -|R.devices | |2.15.1 |2016-11-10 |cran (@2.15.1) | -|R.methodsS3 | |1.7.1 |2016-02-16 |cran (@1.7.1) | -|R.oo | |1.21.0 |2016-11-01 |cran (@1.21.0) | -|R.rsp | |0.41.0 |2017-04-16 |cran (@0.41.0) | -|R.utils | |2.5.0 |2016-11-07 |cran (@2.5.0) | +## In both -# Check results +* checking package dependencies ... NOTE + ``` + Package suggested but not available for checking: ‘GLAD’ + ``` -0 packages with problems +# aroma.core +Version: 3.1.3 +## In both +* checking package dependencies ... NOTE + ``` + Packages suggested but not available for checking: + ‘EBImage’ ‘GLAD’ ‘expectile’ ‘HaarSeg’ ‘mpcbs’ + ``` + +* checking Rd cross-references ... NOTE + ``` + Package unavailable to check Rd xrefs: ‘GLAD’ + ``` + +# PureCN + +Version: 1.11.11 + +## In both + +* checking installed package size ... NOTE + ``` + installed size is 7.3Mb + sub-directories of 1Mb or more: + data 1.1Mb + doc 2.6Mb + extdata 3.0Mb + ``` diff -Nru r-cran-pscbs-0.63.0/revdep/README.md r-cran-pscbs-0.64.0/revdep/README.md --- r-cran-pscbs-0.63.0/revdep/README.md 2017-06-28 14:32:39.000000000 +0000 +++ r-cran-pscbs-0.64.0/revdep/README.md 2018-08-12 21:30:44.000000000 +0000 @@ -1,327 +1,41 @@ -# Setup +# Platform -## Platform - -|setting |value | +|field |value | |:--------|:----------------------------| -|version |R version 3.4.0 (2017-04-21) | +|version |R version 3.5.1 (2018-07-02) | +|os |Ubuntu 18.04.1 LTS | |system |x86_64, linux-gnu | |ui |X11 | |language |en | |collate |en_US.UTF-8 | -|tz |America/Los_Angeles | -|date |2017-06-27 | - -## Packages - -|package |* |version |date |source | -|:-----------|:--|:-------|:----------|:--------------| -|aroma.light | |3.6.0 |2017-05-18 |cran (@3.6.0) | -|DNAcopy | |1.50.1 |2017-05-18 |cran (@1.50.1) | -|future | |1.5.0 |2017-05-26 |cran (@1.5.0) | -|ggplot2 | |2.2.1 |2016-12-30 |cran (@2.2.1) | -|Hmisc | |4.0-3 |2017-05-02 |cran (@4.0-3) | -|listenv | |0.6.0 |2015-12-28 |cran (@0.6.0) | -|matrixStats | |0.52.2 |2017-04-14 |cran (@0.52.2) | -|PSCBS | |0.62.0 |2016-11-11 |cran (@0.62.0) | -|R.cache | |0.12.0 |2015-11-12 |cran (@0.12.0) | -|R.devices | |2.15.1 |2016-11-10 |cran (@2.15.1) | -|R.methodsS3 | |1.7.1 |2016-02-16 |cran (@1.7.1) | -|R.oo | |1.21.0 |2016-11-01 |cran (@1.21.0) | -|R.rsp | |0.41.0 |2017-04-16 |cran (@0.41.0) | -|R.utils | |2.5.0 |2016-11-07 |cran (@2.5.0) | - -# Check results - -11 packages - -|package |version | errors| warnings| notes| -|:----------------|:-------|------:|--------:|-----:| -|ACNE |0.8.1 | 0| 0| 0| -|aroma.affymetrix |3.1.0 | 0| 0| 0| -|aroma.cn |1.6.1 | 0| 0| 0| -|aroma.core |3.1.0 | 0| 0| 1| -|calmate |0.12.1 | 0| 0| 0| -|MPAgenomics |1.1.2 | 0| 0| 2| -|NSA |0.0.32 | 0| 0| 6| -|PECA |1.12.0 | 0| 0| 1| -|PureCN |1.6.3 | 0| 0| 1| -|Repitools |1.22.0 | 0| 0| 3| -|TIN |1.8.0 | 0| 0| 2| - -## ACNE (0.8.1) -Maintainer: Henrik Bengtsson -Bug reports: https://github.com/HenrikBengtsson/ACNE/issues - -0 errors | 0 warnings | 0 notes - -## aroma.affymetrix (3.1.0) -Maintainer: Henrik Bengtsson -Bug reports: https://github.com/HenrikBengtsson/aroma.affymetrix/issues - -0 errors | 0 warnings | 0 notes - -## aroma.cn (1.6.1) -Maintainer: Henrik Bengtsson -Bug reports: https://github.com/HenrikBengtsson/aroma.cn/issues - -0 errors | 0 warnings | 0 notes - -## aroma.core (3.1.0) -Maintainer: Henrik Bengtsson -Bug reports: https://github.com/HenrikBengtsson/aroma.core/issues - -0 errors | 0 warnings | 1 note - -``` -checking package dependencies ... NOTE -Packages suggested but not available for checking: - ‘expectile’ ‘HaarSeg’ ‘mpcbs’ -``` - -## calmate (0.12.1) -Maintainer: Henrik Bengtsson -Bug reports: https://github.com/HenrikBengtsson/calmate/issues - -0 errors | 0 warnings | 0 notes - -## MPAgenomics (1.1.2) -Maintainer: Samuel Blanck - -0 errors | 0 warnings | 2 notes - -``` -checking dependencies in R code ... NOTE -'library' or 'require' calls in package code: - ‘R.devices’ ‘R.filesets’ ‘R.methodsS3’ ‘R.oo’ ‘aroma.affymetrix’ - ‘aroma.cn’ ‘aroma.core’ ‘aroma.light’ ‘matrixStats’ ‘snowfall’ - Please use :: or requireNamespace() instead. - See section 'Suggested packages' in the 'Writing R Extensions' manual. -Unexported object imported by a ':::' call: ‘cghseg:::segmeanCO’ - See the note in ?`:::` about the use of this operator. - -checking R code for possible problems ... NOTE -.varregtimescount: no visible global function definition for ‘var’ -CGHSEGaroma: no visible global function definition for ‘read.csv’ -CGHSEGaroma : : no visible global function definition for - ‘points’ -CGHSEGaroma : : no visible global function definition for - ‘lines’ -CGHSEGaroma : : no visible global function definition for - ‘write.table’ -CGHcall: no visible global function definition for ‘mad’ -... 43 lines ... -tumorboostPlot: no visible global function definition for ‘par’ -tumorboostPlot: no visible global function definition for ‘axis’ -tumorboostPlot: no visible global function definition for ‘points’ -Undefined global functions or variables: - axis head lines lm mad median optim par points read.csv sd var - write.table -Consider adding - importFrom("graphics", "axis", "lines", "par", "points") - importFrom("stats", "lm", "mad", "median", "optim", "sd", "var") - importFrom("utils", "head", "read.csv", "write.table") -to your NAMESPACE file. -``` - -## NSA (0.0.32) -Maintainer: Maria Ortiz-Estevez - -0 errors | 0 warnings | 6 notes - -``` -checking package dependencies ... NOTE -Depends: includes the non-default packages: - ‘R.methodsS3’ ‘MASS’ ‘matrixStats’ ‘R.oo’ ‘R.utils’ ‘aroma.core’ - ‘aroma.affymetrix’ ‘DNAcopy’ -Adding so many packages to the search path is excessive and importing -selectively is preferable. - -checking top-level files ... NOTE -Non-standard file/directory found at top level: - ‘incl’ - -checking dependencies in R code ... NOTE -Packages in Depends field not imported from: - ‘DNAcopy’ ‘MASS’ ‘R.methodsS3’ ‘R.oo’ ‘aroma.affymetrix’ ‘aroma.core’ - ‘matrixStats’ - These packages need to be imported from (in the NAMESPACE file) - for when this namespace is loaded but not attached. - -checking S3 generic/method consistency ... NOTE -Found the following apparent S3 methods exported but not registered: - NSAByTotalAndFracB.matrix allocateOutputDataSets.NSANormalization - allocateOutputDataSets.SNPsNormalization - allocateOutputDataSets.SampleNormalization - as.character.NSANormalization as.character.SNPsNormalization - as.character.SampleNormalization findArraysTodo.NSANormalization - findArraysTodo.SampleNormalization findUnitsTodo.SNPsNormalization - fitNSA.matrix fitNSAcnPs.matrix getDataSets.NSANormalization - getDataSets.SNPsNormalization getDataSets.SampleNormalization - getName.NSANormalization getName.SNPsNormalization - getName.SampleNormalization getOutputDataSets.NSANormalization - getOutputDataSets.SNPsNormalization - getOutputDataSets.SampleNormalization getPath.NSANormalization - getPath.SNPsNormalization getPath.SampleNormalization - getRootPath.NSANormalization getRootPath.SNPsNormalization - getRootPath.SampleNormalization process.NSANormalization - process.SNPsNormalization process.SampleNormalization - sampleNByTotalAndFracB.numeric snpsNByTotalAndFracB.matrix -See section ‘Registering S3 methods’ in the ‘Writing R Extensions’ -manual. - -checking R code for possible problems ... NOTE -NB: .First.lib is obsolete and will not be used in R >= 3.0.0 - -.First.lib: no visible global function definition for - ‘packageDescription’ -NSAByTotalAndFracB.matrix: no visible global function definition for - ‘throw’ -NSAByTotalAndFracB.matrix: no visible global function definition for - ‘str’ -NSANormalization: no visible global function definition for ‘throw’ -... 279 lines ... - extractMatrix findUnitsTodo getAsteriskTags getChipType getFile - getFullName getFullNames getGenomeInformation getName getNames - getPath getPathname getPathnames getPositions getRam getRootPath - getTags getUnitsOnChromosome hist median nbrOfFiles newInstance - packageDescription rowAlls rowMedians segment setTags str throw trim - verbose -Consider adding - importFrom("graphics", "hist") - importFrom("stats", "approxfun", "median") - importFrom("utils", "packageDescription", "str") -to your NAMESPACE file. - -checking Rd line widths ... NOTE -Rd file 'NSANormalization.Rd': - \examples lines wider than 100 characters: - by <- 50e3; # 50kb bins; you may want to try with other amounts of smoothing xOut <- seq(from=xRange[1], to=xRange[2], by=by); - plot(getSignals(cnCNPS), getSignals(cnSNPS), xlim=Clim, ylim=Clim); abline(a=0, b=1, col="red", lwd=2); - -These lines will be truncated in the PDF manual. -``` - -## PECA (1.12.0) -Maintainer: Tomi Suomi - -0 errors | 0 warnings | 1 note - -``` -checking Rd line widths ... NOTE -Rd file 'PECA.Rd': - \usage lines wider than 90 characters: - PECA_AffyBatch(affy=NULL, normalize=FALSE, test="t", type="median", paired=FALSE, progress=FALSE) - -These lines will be truncated in the PDF manual. -``` - -## PureCN (1.6.3) -Maintainer: Markus Riester - -0 errors | 0 warnings | 1 note - -``` -checking installed package size ... NOTE - installed size is 5.6Mb - sub-directories of 1Mb or more: - doc 1.6Mb - extdata 2.7Mb -``` - -## Repitools (1.22.0) -Maintainer: Mark Robinson - -0 errors | 0 warnings | 3 notes - -``` -checking R code for possible problems ... NOTE -Found an obsolete/platform-specific call in the following function: - ‘maskOut’ -Found the platform-specific device: - ‘windows’ -dev.new() is the preferred way to open a new device, in the unlikely -event one is needed. -.cpgBoxplots: no visible global function definition for ‘pdf’ -.cpgBoxplots: no visible global function definition for ‘par’ -.cpgBoxplots: no visible global function definition for ‘dev.off’ -... 291 lines ... - rainbow read.table rect str t.test text title verbose -Consider adding - importFrom("grDevices", "dev.off", "pdf", "rainbow") - importFrom("graphics", "abline", "axis", "barplot", "bxp", "grid", - "layout", "legend", "lines", "matlines", "matplot", "mtext", - "par", "persp", "plot", "plot.new", "plot.window", "points", - "polygon", "rect", "text", "title") - importFrom("stats", "dbeta", "embed", "filter", "kmeans", "lm", - "lowess", "p.adjust", "predict", "pt", "qnorm", "t.test") - importFrom("utils", "read.table", "str") -to your NAMESPACE file. - -checking Rd line widths ... NOTE -Rd file 'ChromaBlocks.Rd': - \usage lines wider than 90 characters: - ChromaBlocks(rs.ip, rs.input, organism, chrs, ipWidth=100, inputWidth=500, preset=NULL, blockWidth=NULL, minBlocks=NULL, extend=NULL, c ... [TRUNCATED] - -Rd file 'GCbiasPlots.Rd': - \usage lines wider than 90 characters: - cex = 0.2, pch.col = "black", line.col = "red", lty = 1, lwd = 2, verbose = TRUE) - -Rd file 'absoluteCN.Rd': -... 57 lines ... - -Rd file 'regionStats.Rd': - \usage lines wider than 90 characters: - regionStats(x, design = NULL, maxFDR=0.05, n.perm=5, window=600, mean.trim=.1, min.probes=10, max.gap=500, two.sides=TRUE, ndf, return. ... [TRUNCATED] - regionStats(x, design = NULL, maxFDR=0.05, n.perm=5, window=600, mean.trim=.1, min.probes=10, max.gap=500, two.sides=TRUE, ind=NULL, re ... [TRUNCATED] - -Rd file 'writeWig.Rd': - \usage lines wider than 90 characters: - writeWig(rs, seq.len = NULL, design=NULL, sample=20, drop.zero=TRUE, normalize=TRUE, verbose=TRUE) - -These lines will be truncated in the PDF manual. - -checking compiled code ... NOTE -File ‘Repitools/libs/Repitools.so’: - Found no call to: ‘R_useDynamicSymbols’ - -It is good practice to register native routines and to disable symbol -search. - -See ‘Writing portable packages’ in the ‘Writing R Extensions’ manual. -``` - -## TIN (1.8.0) -Maintainer: Bjarne Johannessen - -0 errors | 0 warnings | 2 notes - -``` -checking top-level files ... NOTE -Non-standard file/directory found at top level: - ‘doc’ +|tz |Europe/Stockholm | +|date |2018-08-12 | -checking R code for possible problems ... NOTE -aberrantExonUsage: no visible global function definition for ‘quantile’ -aberrantExonUsage: no visible global function definition for ‘ave’ -clusterPlot: no visible global function definition for ‘dist’ -clusterPlot: no visible global function definition for ‘hclust’ -clusterPlot: no visible global function definition for - ‘colorRampPalette’ -clusterPlot: no visible global function definition for ‘par’ -clusterPlot: no visible global function definition for ‘png’ -clusterPlot: no visible global function definition for ‘jpeg’ -... 50 lines ... - importFrom("stats", "ave", "dist", "hclust", "median", "quantile") - importFrom("utils", "data", "read.table") -to your NAMESPACE file. +# Dependencies -Found the following assignments to the global environment: -File ‘TIN/R/aberrantExonUsage.R’: - assign("quantiles", quantiles, envir = .GlobalEnv) - assign("aberrantExons", aberrantExons, envir = .GlobalEnv) -File ‘TIN/R/correlationPlot.R’: - assign("randomGeneSetsDist", B, envir = .GlobalEnv) - assign("traPermutationsDist", L, envir = .GlobalEnv) -``` +|package |old |new |Δ | +|:-----------|:------|:-----------|:--| +|PSCBS |0.63.0 |0.63.0-9000 |* | +|aroma.light |3.10.0 |3.10.0 | | +|digest |0.6.15 |0.6.15 | | +|DNAcopy |1.54.0 |1.54.0 | | +|future |1.9.0 |1.9.0 | | +|globals |0.12.1 |0.12.1 | | +|listenv |0.7.0 |0.7.0 | | +|matrixStats |0.54.0 |0.54.0 | | +|R.cache |0.13.0 |0.13.0 | | +|R.methodsS3 |1.7.1 |1.7.1 | | +|R.oo |1.22.0 |1.22.0 | | +|R.utils |2.6.0 |2.6.0 | | + +# Revdeps + +## All (4) + +|package |version |error |warning |note | +|:-----------------------------------|:-------|:-----|:-------|:----| +|[aroma.cn](problems.md#aromacn) |1.6.1 | | |1 | +|[aroma.core](problems.md#aromacore) |3.1.3 | | |2 | +|jointseg |1.0.1 | | | | +|[PureCN](problems.md#purecn) |1.11.11 | | |1 | diff -Nru r-cran-pscbs-0.63.0/revdep/run.R r-cran-pscbs-0.64.0/revdep/run.R --- r-cran-pscbs-0.63.0/revdep/run.R 1970-01-01 00:00:00.000000000 +0000 +++ r-cran-pscbs-0.64.0/revdep/run.R 2018-08-12 21:30:44.000000000 +0000 @@ -0,0 +1,47 @@ +library("revdepcheck") +options(warn = 1L) + +availableCores <- function() { + getenv <- function(name) as.integer(Sys.getenv(name, NA_character_)) + getopt <- function(name) as.integer(getOption(name, NA_integer_)) + if (is.finite(n <- getopt("mc.cores") + 1L)) return(n) + if (is.finite(n <- getopt("Ncpus") + 1L)) return(n) + if (is.finite(n <- getenv("PBS_NUM_PPN"))) return(n) + if (is.finite(n <- getenv("SLURM_CPUS_PER_TASK"))) return(n) + if (is.finite(n <- getenv("NSLOTS"))) return(n) + 1L +} + +check <- function() { + if (file_test("-f", p <- Sys.getenv("R_CHECK_ENVIRON", "~/.R/check.Renviron"))) { + cat(sprintf("R CMD check will use env vars from %s\n", sQuote(p))) + cat(sprintf("To disable, set 'R_CHECK_ENVIRON=false' (a fake pathname)\n")) + } + + envs <- grep("^_R_CHECK_", names(Sys.getenv()), value = TRUE) + if (length(envs) > 0L) { + cat(sprintf("Detected _R_CHECK_* env vars that will affect R CMD check: %s\n", + paste(sQuote(envs), collapse = ", "))) + } + + revdep_check(bioc = TRUE, num_workers = availableCores(), + timeout = as.difftime(20, units = "mins"), quiet = FALSE) +} + + +args <- base::commandArgs() +if ("--reset" %in% args) { + revdep_reset() +} else if ("--todo" %in% args) { + print(revdep_todo()) +} else if ("--add" %in% args) { + pos <- which("--add" == args) + pkgs <- args[seq(from = pos + 1L, to = length(args))] + revdep_add(packages = pkgs) + cat(sprintf("* %s\n", revdep_todo())) +} else if ("--broken" %in% args) { + revdep_add_broken() + cat(sprintf("* %s\n", revdep_todo())) +} else { + check() +} diff -Nru r-cran-pscbs-0.63.0/revdep/timing.md r-cran-pscbs-0.64.0/revdep/timing.md --- r-cran-pscbs-0.63.0/revdep/timing.md 2017-06-28 14:32:39.000000000 +0000 +++ r-cran-pscbs-0.64.0/revdep/timing.md 1970-01-01 00:00:00.000000000 +0000 @@ -1,17 +0,0 @@ -# Check times - -| |package |version | check_time| -|:--|:----------------|:-------|----------:| -|9 |PureCN |1.6.3 | 755.2| -|10 |Repitools |1.22.0 | 334.9| -|11 |TIN |1.8.0 | 155.7| -|2 |aroma.affymetrix |3.1.0 | 125.1| -|8 |PECA |1.12.0 | 119.8| -|4 |aroma.core |3.1.0 | 90.9| -|6 |MPAgenomics |1.1.2 | 48.1| -|3 |aroma.cn |1.6.1 | 44.8| -|5 |calmate |0.12.1 | 39.6| -|7 |NSA |0.0.32 | 39.3| -|1 |ACNE |0.8.1 | 38.7| - - diff -Nru r-cran-pscbs-0.63.0/tests/segmentByCBS.R r-cran-pscbs-0.64.0/tests/segmentByCBS.R --- r-cran-pscbs-0.63.0/tests/segmentByCBS.R 2017-06-28 14:32:39.000000000 +0000 +++ r-cran-pscbs-0.64.0/tests/segmentByCBS.R 2018-08-12 21:30:44.000000000 +0000 @@ -126,7 +126,7 @@ # Simulate multiple chromosomes fit1 <- renameChromosomes(fit, from=0, to=1) fit2 <- renameChromosomes(fit, from=0, to=2) -fitM <- append(fit1, fit2) +fitM <- c(fit1, fit2) fitM <- segmentByCBS(fitM) sampleName(fitM) <- "CBS_Example_M" print(fitM) diff -Nru r-cran-pscbs-0.63.0/tests/segmentByCBS,report.R r-cran-pscbs-0.64.0/tests/segmentByCBS,report.R --- r-cran-pscbs-0.63.0/tests/segmentByCBS,report.R 2017-06-28 14:32:39.000000000 +0000 +++ r-cran-pscbs-0.64.0/tests/segmentByCBS,report.R 2018-08-12 21:30:44.000000000 +0000 @@ -37,7 +37,7 @@ # Fake a multi-chromosome segmentation fit1 <- fit fit2 <- renameChromosomes(fit, from=1, to=2) -fit <- append(fit1, fit2) +fit <- c(fit1, fit2) report(fit, sampleName="CBS", studyName="CBS-Ex", verbose=-10) diff -Nru r-cran-pscbs-0.63.0/tests/segmentByCBS,shiftTCN.R r-cran-pscbs-0.64.0/tests/segmentByCBS,shiftTCN.R --- r-cran-pscbs-0.63.0/tests/segmentByCBS,shiftTCN.R 2017-06-28 14:32:39.000000000 +0000 +++ r-cran-pscbs-0.64.0/tests/segmentByCBS,shiftTCN.R 2018-08-12 21:30:44.000000000 +0000 @@ -60,7 +60,7 @@ } fitList[[kk]] <- fitKK } # for (kk ...) -fitT <- Reduce(append, fitList) +fitT <- do.call(c, fitList) # Sanity check stopifnot(nbrOfSegments(fitT) == nbrOfSegments(fit)) @@ -94,7 +94,7 @@ } fitList[[kk]] <- fitKK } # for (kk ...) -fitT <- Reduce(append, fitList) +fitT <- do.call(c, fitList) # Sanity check stopifnot(nbrOfSegments(fitT) == nbrOfSegments(fit)) @@ -102,12 +102,12 @@ abline(v=c(knownSegments$start, knownSegments$end)/1e6, lty=3) -segList <- seqOfSegmentsByDP(fit); -K <- length(segList); -subplots(K, ncol=2, byrow=FALSE); -par(mar=c(2,1,1,1)); +segList <- seqOfSegmentsByDP(fit) +K <- length(segList) +subplots(K, ncol=2, byrow=FALSE) +par(mar=c(2,1,1,1)) for (kk in 1:K) { - knownSegments <- segList[[kk]]; - fitKK <- resegment(fit, knownSegments=knownSegments, undo=+Inf); - plotTracks(fitKK, Clim=c(-3,3)); + knownSegments <- segList[[kk]] + fitKK <- resegment(fit, knownSegments=knownSegments, undo=+Inf) + plotTracks(fitKK, Clim=c(-3,3)) } # for (kk ...) diff -Nru r-cran-pscbs-0.63.0/tests/segmentByPairedPSCBS.R r-cran-pscbs-0.64.0/tests/segmentByPairedPSCBS.R --- r-cran-pscbs-0.63.0/tests/segmentByPairedPSCBS.R 2017-06-28 14:32:39.000000000 +0000 +++ r-cran-pscbs-0.64.0/tests/segmentByPairedPSCBS.R 2018-08-12 21:30:44.000000000 +0000 @@ -32,7 +32,7 @@ str(dataS) -fig <- 1; +fig <- 1 # - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - @@ -144,7 +144,7 @@ # Simulate multiple chromosomes fit1 <- fit fit2 <- renameChromosomes(fit, from=1, to=2) -fitM <- append(fit1, fit2) +fitM <- c(fit1, fit2) # Tile chromosomes fitT <- tileChromosomes(fitM) diff -Nru r-cran-pscbs-0.63.0/tests/segmentByPairedPSCBS,report.R r-cran-pscbs-0.64.0/tests/segmentByPairedPSCBS,report.R --- r-cran-pscbs-0.63.0/tests/segmentByPairedPSCBS,report.R 2017-06-28 14:32:39.000000000 +0000 +++ r-cran-pscbs-0.64.0/tests/segmentByPairedPSCBS,report.R 2018-08-12 21:30:44.000000000 +0000 @@ -34,7 +34,7 @@ # Fake a multi-chromosome segmentation fit1 <- fit fit2 <- renameChromosomes(fit, from=1, to=2) -fit <- append(fit1, fit2) +fit <- c(fit1, fit2) report(fit, sampleName="PairedPSCBS", studyName="PSCBS-Ex", verbose=-10) diff -Nru r-cran-pscbs-0.63.0/tests/segmentByPairedPSCBS,seqOfSegmentsByDP.R r-cran-pscbs-0.64.0/tests/segmentByPairedPSCBS,seqOfSegmentsByDP.R --- r-cran-pscbs-0.63.0/tests/segmentByPairedPSCBS,seqOfSegmentsByDP.R 2017-06-28 14:32:39.000000000 +0000 +++ r-cran-pscbs-0.64.0/tests/segmentByPairedPSCBS,seqOfSegmentsByDP.R 2018-08-12 21:30:44.000000000 +0000 @@ -34,28 +34,28 @@ R.oo::attachLocally(dataS) -gaps <- findLargeGaps(dataS, minLength=2e6); -knownSegments <- gapsToSegments(gaps, dropGaps=TRUE); +gaps <- findLargeGaps(dataS, minLength=2e6) +knownSegments <- gapsToSegments(gaps, dropGaps=TRUE) # Paired PSCBS segmentation fit <- segmentByPairedPSCBS(dataS, knownSegments=knownSegments, - seed=0xBEEF, verbose=-10); -print(fit); + seed=0xBEEF, verbose=-10) +print(fit) -fit1 <- fit; -fit2 <- renameChromosomes(fit1, from=1, to=2); -fit <- append(fit1, fit2); -knownSegments <- tileChromosomes(fit)$params$knownSegments; +fit1 <- fit +fit2 <- renameChromosomes(fit1, from=1, to=2) +fit <- c(fit1, fit2) +knownSegments <- tileChromosomes(fit)$params$knownSegments -segList <- seqOfSegmentsByDP(fit, verbose=-10); -K <- length(segList); -ks <- seq(from=1, to=K, length.out=min(5,K)); -subplots(length(ks), ncol=1, byrow=TRUE); -par(mar=c(2,1,1,1)); +segList <- seqOfSegmentsByDP(fit, verbose=-10) +K <- length(segList) +ks <- seq(from=1, to=K, length.out=min(5,K)) +subplots(length(ks), ncol=1, byrow=TRUE) +par(mar=c(2,1,1,1)) for (kk in ks) { - knownSegmentsKK <- segList[[kk]]; - fitKK <- resegment(fit, knownSegments=knownSegmentsKK, undoTCN=+Inf, undoDH=+Inf); - plotTracks(fitKK, tracks="tcn,c1,c2", Clim=c(0,5), add=TRUE); - abline(v=c(knownSegments$start, knownSegments$end)/1e6, lty=3); - stext(side=3, pos=0, sprintf("Number of segments: %d", nrow(knownSegmentsKK))); + knownSegmentsKK <- segList[[kk]] + fitKK <- resegment(fit, knownSegments=knownSegmentsKK, undoTCN=+Inf, undoDH=+Inf) + plotTracks(fitKK, tracks="tcn,c1,c2", Clim=c(0,5), add=TRUE) + abline(v=c(knownSegments$start, knownSegments$end)/1e6, lty=3) + stext(side=3, pos=0, sprintf("Number of segments: %d", nrow(knownSegmentsKK))) } # for (kk ...)