Binary files /tmp/tmpQV1IU2/duMUrOh_TQ/r-bioc-rtracklayer-1.48.0/build/vignette.rds and /tmp/tmpQV1IU2/lZP02ztoCc/r-bioc-rtracklayer-1.50.0/build/vignette.rds differ diff -Nru r-bioc-rtracklayer-1.48.0/debian/changelog r-bioc-rtracklayer-1.50.0/debian/changelog --- r-bioc-rtracklayer-1.48.0/debian/changelog 2020-05-31 06:10:01.000000000 +0000 +++ r-bioc-rtracklayer-1.50.0/debian/changelog 2020-11-03 12:30:01.000000000 +0000 @@ -1,8 +1,10 @@ -r-bioc-rtracklayer (1.48.0-1build1) groovy; urgency=medium +r-bioc-rtracklayer (1.50.0-1) unstable; urgency=medium - * No-change rebuild against r-api-4.0 + * New upstream version + * debhelper-compat 13 (routine-update) + * No tab in license text (routine-update) - -- Steve Langasek Sun, 31 May 2020 06:10:01 +0000 + -- Andreas Tille Tue, 03 Nov 2020 13:30:01 +0100 r-bioc-rtracklayer (1.48.0-1) unstable; urgency=medium diff -Nru r-bioc-rtracklayer-1.48.0/debian/control r-bioc-rtracklayer-1.50.0/debian/control --- r-bioc-rtracklayer-1.48.0/debian/control 2020-05-31 06:10:01.000000000 +0000 +++ r-bioc-rtracklayer-1.50.0/debian/control 2020-11-03 12:30:01.000000000 +0000 @@ -1,16 +1,15 @@ Source: r-bioc-rtracklayer -Maintainer: Ubuntu Developers -XSBC-Original-Maintainer: Debian R Packages Maintainers +Maintainer: Debian R Packages Maintainers Uploaders: Andreas Tille Section: gnu-r Testsuite: autopkgtest-pkg-r Priority: optional -Build-Depends: debhelper-compat (= 12), +Build-Depends: debhelper-compat (= 13), dh-r, r-base-dev, r-bioc-genomicranges (>= 1.37.2), r-cran-xml, - r-bioc-biocgenerics (>= 0.25.1), + r-bioc-biocgenerics (>= 0.35.3), r-bioc-s4vectors (>= 0.23.18), r-bioc-iranges (>= 2.13.13), r-bioc-xvector (>= 0.19.7), diff -Nru r-bioc-rtracklayer-1.48.0/debian/copyright r-bioc-rtracklayer-1.50.0/debian/copyright --- r-bioc-rtracklayer-1.48.0/debian/copyright 2020-05-20 09:17:28.000000000 +0000 +++ r-bioc-rtracklayer-1.50.0/debian/copyright 2020-11-03 12:30:01.000000000 +0000 @@ -79,9 +79,9 @@ License: Artistic-2.0 License: Artistic-2.0 - The "Artistic License" + The "Artistic License" . - Preamble + Preamble . 1. You may make and give away verbatim copies of the source form of the Standard Version of this Package without restriction, provided that diff -Nru r-bioc-rtracklayer-1.48.0/DESCRIPTION r-bioc-rtracklayer-1.50.0/DESCRIPTION --- r-bioc-rtracklayer-1.48.0/DESCRIPTION 2020-04-27 23:58:52.000000000 +0000 +++ r-bioc-rtracklayer-1.50.0/DESCRIPTION 2020-10-28 03:07:00.000000000 +0000 @@ -1,10 +1,10 @@ Package: rtracklayer Title: R interface to genome annotation files and the UCSC genome browser -Version: 1.48.0 +Version: 1.50.0 Author: Michael Lawrence, Vince Carey, Robert Gentleman Depends: R (>= 3.3), methods, GenomicRanges (>= 1.37.2) -Imports: XML (>= 1.98-0), BiocGenerics (>= 0.25.1), S4Vectors (>= +Imports: XML (>= 1.98-0), BiocGenerics (>= 0.35.3), S4Vectors (>= 0.23.18), IRanges (>= 2.13.13), XVector (>= 0.19.7), GenomeInfoDb (>= 1.15.2), Biostrings (>= 2.47.6), zlibbioc, RCurl (>= 1.4-2), Rsamtools (>= 1.31.2), GenomicAlignments (>= @@ -23,14 +23,14 @@ Maintainer: Michael Lawrence License: Artistic-2.0 + file LICENSE Collate: io.R web.R ranges.R trackDb.R browser.R ucsc.R readGFF.R gff.R - bed.R wig.R utils.R bigWig.R chain.R quickload.R twobit.R - fasta.R tabix.R bam.R trackTable.R index.R compression.R - test_rtracklayer_package.R ncbi.R igv.R zzz.R + bed.R wig.R utils.R bigWig.R bigBed.R chain.R quickload.R + trackhub.R twobit.R fasta.R tabix.R bam.R trackTable.R index.R + compression.R test_rtracklayer_package.R ncbi.R igv.R zzz.R biocViews: Annotation,Visualization,DataImport git_url: https://git.bioconductor.org/packages/rtracklayer -git_branch: RELEASE_3_11 -git_last_commit: a3e63e7 -git_last_commit_date: 2020-04-27 -Date/Publication: 2020-04-27 +git_branch: RELEASE_3_12 +git_last_commit: d2e61f7 +git_last_commit_date: 2020-10-27 +Date/Publication: 2020-10-27 NeedsCompilation: yes -Packaged: 2020-04-27 23:58:52 UTC; biocbuild +Packaged: 2020-10-28 03:07:00 UTC; biocbuild Binary files /tmp/tmpQV1IU2/duMUrOh_TQ/r-bioc-rtracklayer-1.48.0/inst/doc/rtracklayer.pdf and /tmp/tmpQV1IU2/lZP02ztoCc/r-bioc-rtracklayer-1.50.0/inst/doc/rtracklayer.pdf differ Binary files /tmp/tmpQV1IU2/duMUrOh_TQ/r-bioc-rtracklayer-1.48.0/inst/tests/test.bb and /tmp/tmpQV1IU2/lZP02ztoCc/r-bioc-rtracklayer-1.50.0/inst/tests/test.bb differ diff -Nru r-bioc-rtracklayer-1.48.0/inst/tests/trackhub/genomes.txt r-bioc-rtracklayer-1.50.0/inst/tests/trackhub/genomes.txt --- r-bioc-rtracklayer-1.48.0/inst/tests/trackhub/genomes.txt 1970-01-01 00:00:00.000000000 +0000 +++ r-bioc-rtracklayer-1.50.0/inst/tests/trackhub/genomes.txt 2020-10-27 17:24:51.000000000 +0000 @@ -0,0 +1,3 @@ +genome hg19 +trackDb hg19/trackDb.txt +organism BigFoot diff -Nru r-bioc-rtracklayer-1.48.0/inst/tests/trackhub/hg19/trackDb.txt r-bioc-rtracklayer-1.50.0/inst/tests/trackhub/hg19/trackDb.txt --- r-bioc-rtracklayer-1.48.0/inst/tests/trackhub/hg19/trackDb.txt 1970-01-01 00:00:00.000000000 +0000 +++ r-bioc-rtracklayer-1.50.0/inst/tests/trackhub/hg19/trackDb.txt 2020-10-27 17:24:51.000000000 +0000 @@ -0,0 +1,9 @@ +track wgEncodeUWDukeDnaseGM12878FdrPeaks +bigDataUrl wgEncodeUWDukeDnaseGM12878.fdr01peaks.hg19.bb +parent uniformDnasePeaks on +type bigBed 6 + +shortLabel GM12878 +longLabel GM12878 DNaseI FDR 1% Uniform Peak Calls from UW-Duke +color 8,104,172 +subGroups view=Peaks tier=t1 cellType=GM12878 lab=UWDuke +metadata cell=GM12878 Binary files /tmp/tmpQV1IU2/duMUrOh_TQ/r-bioc-rtracklayer-1.48.0/inst/tests/trackhub/hg19/wgEncodeUWDukeDnaseGM12878.fdr01peaks.hg19.bb and /tmp/tmpQV1IU2/lZP02ztoCc/r-bioc-rtracklayer-1.50.0/inst/tests/trackhub/hg19/wgEncodeUWDukeDnaseGM12878.fdr01peaks.hg19.bb differ diff -Nru r-bioc-rtracklayer-1.48.0/inst/tests/trackhub/hub.txt r-bioc-rtracklayer-1.50.0/inst/tests/trackhub/hub.txt --- r-bioc-rtracklayer-1.48.0/inst/tests/trackhub/hub.txt 1970-01-01 00:00:00.000000000 +0000 +++ r-bioc-rtracklayer-1.50.0/inst/tests/trackhub/hub.txt 2020-10-27 17:24:51.000000000 +0000 @@ -0,0 +1,6 @@ +hub test_hub +shortLabel test_hub +longLabel test_hub +genomesFile genomes.txt +email user@domain.com +descriptionUrl http://www.somedomain.com/articles/h19 diff -Nru r-bioc-rtracklayer-1.48.0/inst/unitTests/test_bb.R r-bioc-rtracklayer-1.50.0/inst/unitTests/test_bb.R --- r-bioc-rtracklayer-1.48.0/inst/unitTests/test_bb.R 1970-01-01 00:00:00.000000000 +0000 +++ r-bioc-rtracklayer-1.50.0/inst/unitTests/test_bb.R 2020-10-27 17:24:51.000000000 +0000 @@ -0,0 +1,70 @@ +test_bb <- function() { + if (.Platform$OS.type == "windows") + return() + + test_path <- system.file("tests", package = "rtracklayer") + test_bb <- file.path(test_path, "test.bb") + start <- c(237640, 521500 ,565725, 565900, 566760, + 119905, 122525, 173925, 179865, 180185) + ir <- IRanges(start, width = 151) + space <- factor(c(rep("chr1", 5), rep("chr10", 5))) + name <- rep(".", 10) + score <- seq.int(70L, 700L, length = 10) + signalValue <- seq(10, 100, length = 10) + peak <- rep(-1L, 10) + correct_fixed <- GRanges(space, ir, name = name, score = score, + signalValue = signalValue , peak = peak) + si <- SeqinfoForBSGenome("hg19") + seqlengths(correct_fixed) <- seqlengths(si)[levels(space)] + + ## TEST: import whole file + test <- import(test_bb) + checkIdentical(test, correct_fixed) + + ## TEST: 'which' + which <- GRanges(c("chr10"), IRanges(c(180185, 180335))) + correct_which <- subsetByOverlaps(correct_fixed, which) + test <- import(test_bb, which = which) + checkIdentical(test, correct_which) + + ## TEST: empty which + which <- GRanges() + correct_which <- subsetByOverlaps(correct_fixed, which) + test <- import(test_bb, which = which) + checkIdentical(test, correct_which) + + ## TEST: BigBedSelection (GRanges, no field) + which <- GRanges(c("chr10"), IRanges(c(180185, 180335))) + test <- import(test_bb, + selection = BigBedSelection(which, colnames = character())) + correct_subset <- subsetByOverlaps(correct_fixed, which) + correct_which <- correct_subset[, character()] + correct_which@elementMetadata <- DataFrame() + checkIdentical(test, correct_which) + + ## TEST: BigBedSelection (GRanges, 1 default field) + test <- import(test_bb, + selection = BigBedSelection(which, colnames = c("name"))) + correct_which <- correct_subset[, c("name")] + checkIdentical(test, correct_which) + + ## TEST: BigBedSelection (GRanges, 1 extra field) + test <- import(test_bb, + selection = BigBedSelection(which, colnames = c("peak"))) + correct_which <- correct_subset[, c("peak")] + checkIdentical(test, correct_which) + + ## TEST: BigBedSelection (GRanges, 1 default field and 1 extra field) + colnames <- c("name", "peak") + test <- import(test_bb, + selection = BigBedSelection(which, colnames =colnames)) + correct_which <- correct_subset[, colnames] + checkIdentical(test, correct_which) + + # TEST: export + test_bb_out <- file.path(tempdir(), "test_out.bb") + export(correct_fixed, test_bb_out) + on.exit(unlink(test_bb_out)) + test <- import(test_bb_out) + checkIdentical(test, correct_fixed) +} diff -Nru r-bioc-rtracklayer-1.48.0/inst/unitTests/test_trackhub.R r-bioc-rtracklayer-1.50.0/inst/unitTests/test_trackhub.R --- r-bioc-rtracklayer-1.48.0/inst/unitTests/test_trackhub.R 1970-01-01 00:00:00.000000000 +0000 +++ r-bioc-rtracklayer-1.50.0/inst/unitTests/test_trackhub.R 2020-10-27 17:24:51.000000000 +0000 @@ -0,0 +1,139 @@ +test_trackhub <- function() { + test_trackhub_path <- system.file("tests", "trackhub", package = "rtracklayer") + + correct_trackhub_uri <- file.path("file://", test_trackhub_path) + correct_trackhub_genome <- "hg19" + correct_trackhub_length <- 1L + correct_hub <- "test_hub" + correct_shortLabel <- "test_hub" + correct_longLabel <- "test_hub" + correct_genomesFile <- "genomes.txt" + correct_email <- "user@domain.com" + correct_descriptionUrl <- "http://www.somedomain.com/articles/h19" + correct_trackDb <- "hg19/trackDb.txt" + + ### - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - + ### TEST TrackHub Class + ### + + th <- TrackHub(test_trackhub_path) + + ## TEST: uri + checkIdentical(uri(th), correct_trackhub_uri) + + ## TEST: genome + checkIdentical(genome(th), correct_trackhub_genome) + + ## TEST: length + checkIdentical(length(th), correct_trackhub_length) + + # TEST: hub + checkIdentical(hub(th), correct_hub) + + # TEST: shortLabel + checkIdentical(shortLabel(th), correct_shortLabel) + + # TEST: longLabel + checkIdentical(longLabel(th), correct_longLabel) + + # TEST: genomesFile + checkIdentical(genomesFile(th), correct_genomesFile) + + # TEST: email + checkIdentical(email(th), correct_email) + + # TEST: descriptionUrl + checkIdentical(descriptionUrl(th), correct_descriptionUrl) + + # TEST: hub<- + new_hub <- "new_hub" + hub(th) <- new_hub + checkIdentical(hub(th), new_hub) + hub(th) <- correct_hub + + # TEST: shortLabel<- + new_shortLabel <- "new_hub" + shortLabel(th) <- new_shortLabel + checkIdentical(shortLabel(th), new_shortLabel) + shortLabel(th) <- correct_shortLabel + + # TEST: longLabel<- + new_longLabel <- "new_hub" + longLabel(th) <- new_longLabel + checkIdentical(longLabel(th), new_longLabel) + longLabel(th) <- correct_longLabel + + # TEST: genomesFile<- + new_genomesFile <- "newfile.txt" + genomesFile(th) <- new_genomesFile + checkIdentical(genomesFile(th), new_genomesFile) + genomesFile(th) <- correct_genomesFile + + # TEST: email<- + new_email <- "new@domail.com" + email(th) <- new_email + checkIdentical(email(th), new_email) + email(th) <- correct_email + + # TEST: descriptionUrl<- + new_descriptionUrl <- "http://newdomail.com/articles/hg19" + descriptionUrl(th) <- new_descriptionUrl + checkIdentical(descriptionUrl(th), new_descriptionUrl) + descriptionUrl(th) <- correct_descriptionUrl + + # TEST: genomeField + checkIdentical(genomeField(th, "hg19", "trackDb"), correct_trackDb) + + ### - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - + ### TEST TrackHubGenome Class + ### + + correct_trackhubgenome_uri <- paste0(correct_trackhub_uri, "/hg19") + correct_trackhubgenome_genome_name <- "hg19" + correct_trackhubgenome_length <- 1L + correct_trackhubgenome_organism <- "BigFoot" + correct_trackhubgenome_names <- "wgEncodeUWDukeDnaseGM12878FdrPeaks" + correct_bigDataUrl <- "wgEncodeUWDukeDnaseGM12878.fdr01peaks.hg19.bb" + + thg <- TrackHubGenome(th, "hg19") + + # TEST: uri + checkIdentical(uri(thg), correct_trackhubgenome_uri) + + # TEST: genome + checkIdentical(genome(thg), correct_trackhubgenome_genome_name) + + # TEST: length + checkIdentical(length(thg), correct_trackhubgenome_length) + + # TEST: organism + checkIdentical(organism(thg), correct_trackhubgenome_organism) + + # TEST: names + checkIdentical(names(thg), correct_trackhubgenome_names) + + # TEST: trackNames + checkIdentical(trackNames(thg), correct_trackhubgenome_names) + + # TEST: trackField + checkIdentical(trackField(thg, "wgEncodeUWDukeDnaseGM12878FdrPeaks", "bigDataUrl"), correct_bigDataUrl) + + ### - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - + ### TEST TrackContainer Class + ### + + correct_slot_type <- "Track" + correct_track <- Track(track = "tcell", bigDataUrl = "tcell/data.bigWig") + + # TEST: slot type + tc <- TrackContainer() + slot_type <- slot(tc, "elementType") + checkIdentical(slot_type, correct_slot_type) + + # TEST: wrong type slot error reporting + checkException(tc[[1]] <- 1) + + # TEST: names() + tc[[1]] <- correct_track + checkIdentical(names(tc), correct_track@track) +} diff -Nru r-bioc-rtracklayer-1.48.0/load.R r-bioc-rtracklayer-1.50.0/load.R --- r-bioc-rtracklayer-1.48.0/load.R 2020-04-27 20:22:29.000000000 +0000 +++ r-bioc-rtracklayer-1.50.0/load.R 1970-01-01 00:00:00.000000000 +0000 @@ -1,15 +0,0 @@ -library(RCurl) -library(XML) -library(GenomicRanges) -library(rJava) - -files <- c("web.R", "range.R", "trackSet.R", "browser.R", "gff.R", "ucsc.R", - "bed.R", "wig.R", "io.R") -sapply(files, source) - -#track <- import(system.file("inst", "test", "v1.gff", package = "rtracklayer")) -track <- import("../inst/tests/bed.wig") -track@genome <- "hg18" - -session <- browserSession("ucsc") -layTrack(session, track) diff -Nru r-bioc-rtracklayer-1.48.0/man/BigBedFile.Rd r-bioc-rtracklayer-1.50.0/man/BigBedFile.Rd --- r-bioc-rtracklayer-1.48.0/man/BigBedFile.Rd 1970-01-01 00:00:00.000000000 +0000 +++ r-bioc-rtracklayer-1.50.0/man/BigBedFile.Rd 2020-10-27 17:24:51.000000000 +0000 @@ -0,0 +1,143 @@ +\name{BigBedFile-class} +\docType{class} + +%% Classes: +\alias{class:BigBedFile} +\alias{BigBedFile-class} +\alias{class:BBFile} +\alias{BBFile-class} + +%% Constructor: +\alias{BigBedFile} +\alias{BBFile} + +%% Accessors: +\alias{seqinfo,BigBedFile-method} + +%% Import: +\alias{import.bb} +\alias{import.bb,ANY-method} +\alias{import,BigBedFile,ANY,ANY-method} + +%% Export: +\alias{export.bb} +\alias{export.bb,ANY-method} +\alias{export,ANY,BigBedFile,ANY-method} +\alias{export,GenomicRanges,BigBedFile,ANY-method} + +\title{BigBed Import and Export} + +\description{ + These functions support the import and export of the UCSC BigBed + format, a compressed, binary form of BED with a spatial index + and precomputed summaries. These functions do not work on Windows. +} + +\usage{ +\S4method{import}{BigBedFile,ANY,ANY}(con, format, text, + selection = BigBedSelection(which, ...), + which = con, ...) +import.bb(con, ...) + +\S4method{export}{ANY,BigBedFile,ANY}(object, con, format, ...) +\S4method{export}{GenomicRanges,BigBedFile,ANY}(object, con, format, + compress = TRUE, extraIndexes = "") +export.bb(object, con, ...) +} + + +\arguments{ + \item{con}{A path, URL or \code{BigBedFile} object. Connections are + not supported. For the functions ending in \code{.bb}, the file + format is indicated by the function name. For the \code{export} + and \code{import} methods, the format must be indicated another + way. If \code{con} is a path, or URL, either the file + extension or the \code{format} argument needs to be \dQuote{bigBed} + or \dQuote{bb}. + } + \item{object}{The object to export, should be \code{GRanges}. + } + \item{format}{If not missing, should be \dQuote{bigBed} or \dQuote{bb} + (case insensitive). + } + \item{text}{Not supported. + } + \item{selection}{A \code{\linkS4class{BigBedSelection}} object + indicating the ranges to load. + } + \item{which}{A range data structure coercible to \code{IntegerRangesList}, + like a \code{GRanges}, or a \code{BigBedFile}. Only the intervals in + the file overlapping the given ranges are returned. By default, the + value is the \code{BigBedFile} itself. Its \code{Seqinfo} object is + extracted and coerced to a \code{IntegerRangesList} that represents the + entirety of the file. + } + \item{compress}{If \code{TRUE}, compress the data. No reason to change this. + } + \item{extraIndexes}{If set, make an index on each field in a comma separated list + } + \item{...}{Arguments to pass down to methods to other methods. For + import, the flow eventually reaches the \code{BigBedFile} method on + \code{import}. + } +} + +\section{\code{BigBedFile} objects}{ + A \code{BigWigFile} object, an extension of + \code{\linkS4class{RTLFile}} is a reference to a BigBed file. To cast + a path, URL or connection to a \code{BigBedFile}, pass it to the + \code{BigBedFile} constructor. + + BigBed files are more complex than most track files, and there are a + number of methods on \code{BigBedFile} for accessing the additional + information: + + \describe{ + \item{}{ + \code{seqinfo(x)}: + Gets the \code{\link[GenomeInfoDb]{Seqinfo}} object + indicating the lengths of the sequences for the intervals in the + file. No circularity or genome information is available. + } + } + + When accessing remote data, the UCSC library caches data in the + \file{/tmp/udcCache} directory. To clean the cache, call + \code{cleanBigBedCache(maxDays)}, where any files older than + \code{maxDays} days old will be deleted. +} + +\author{Michael Lawrence} + +\examples{ +if (.Platform$OS.type != "windows") { + test_path <- system.file("tests", package = "rtracklayer") + test_bb <- file.path(test_path, "test.bb") + + ## Returns ranges with all fields + gr <- import(test_bb) + gr + + ## Retuns ranges only for 'chr10' + ## between 180185-180185 with all fields + which <- GRanges(c("chr10"), IRanges(c(180185, 180185))) + import(test_bb, which = which) + + ## Retuns ranges only for 'chr10' + ## between 180185-180185 with name and peak fields + selection <- BigBedSelection(which, colnames = c("name", "peak")) + import(test_bb, selection = selection) + +\dontrun{ + test_bb_out <- file.path(tempdir(), "test_out.bb") + export(test, test_bb_out) + + ## make an index for 'name' + test_bb_out <- file.path(tempdir(), "test_out.bb") + export(test, test_bb_out, extraIndexes = "name") +} +} +} + +\keyword{methods} +\keyword{classes} diff -Nru r-bioc-rtracklayer-1.48.0/man/BigBedSelection.rd r-bioc-rtracklayer-1.50.0/man/BigBedSelection.rd --- r-bioc-rtracklayer-1.48.0/man/BigBedSelection.rd 1970-01-01 00:00:00.000000000 +0000 +++ r-bioc-rtracklayer-1.50.0/man/BigBedSelection.rd 2020-10-27 17:24:51.000000000 +0000 @@ -0,0 +1,59 @@ +\name{BigBedSelection-class} +\docType{class} +\alias{BigBedSelection-class} + +% constructor +\alias{BigBedSelection} + +\alias{coerce,IntegerRangesList,BigBedSelection-method} +\alias{coerce,GenomicRanges,BigBedSelection-method} + +\title{Selection of ranges and columns} + +\description{A \code{BigBedSelection} represents a query against a + BigBed file, see \code{\link{import.bb}}. It is simply + a \link[IRanges]{RangedSelection} with \code{colnames} + parameter.\code{colnames} should be a character vector of column names. + Default columns are \code{"name", "score", "thick", "itemRgb"} + and \code{"blocks"}, if non-empty, as that is the only column supported + by BigBed.} + +\section{Constructor}{ + \describe{ + \item{}{\code{BigBedSelection(ranges = GRanges(), colnames = + "score")}: Constructs a \code{BigBedSelection} with the given + \code{ranges} and \code{colnames}. + a \code{character} identifying a genome (see + \code{\link{GenomicSelection}}), or a + \code{\linkS4class{BigBedFile}}, in which case the ranges are + derived from the bounds of its sequences. + } + } +} + +\section{Coercion}{ + \describe{ + \item{}{\code{as(from, "BigBedSelection")}: Coerces \code{from} to a + \code{BigBedSelection} object. Typically, \code{from} is a + \code{\link[GenomicRanges]{GRanges}} or + a \code{\link[IRanges]{IntegerRangesList}}, the ranges of + which become the ranges in the + new \code{BigBedSelection}. + } + } +} + +\author{ Michael Lawrence } + +\examples{ + rl <- IRangesList(chr1 = IRanges::IRanges(c(1, 5), c(3, 6))) + + BigBedSelection(rl) + as(rl, "BigBedSelection") # same as above + + # do not select any column + BigBedSelection(rl, character()) +} + +\keyword{methods} +\keyword{classes} diff -Nru r-bioc-rtracklayer-1.48.0/man/TrackHub-class.Rd r-bioc-rtracklayer-1.50.0/man/TrackHub-class.Rd --- r-bioc-rtracklayer-1.48.0/man/TrackHub-class.Rd 1970-01-01 00:00:00.000000000 +0000 +++ r-bioc-rtracklayer-1.50.0/man/TrackHub-class.Rd 2020-10-27 17:24:51.000000000 +0000 @@ -0,0 +1,167 @@ +\name{TrackHub-class} +\docType{class} + +%% Classes: +\alias{class:TrackHub} +\alias{TrackHub-class} + +%% Constructor: +\alias{TrackHub} + +%% Accessors: +\alias{$,TrackHub-method} +\alias{[[,TrackHub,ANY,ANY-method} +\alias{genome,TrackHub-method} +\alias{length,TrackHub-method} +\alias{uri} +\alias{writeTrackHub} + +%% Data Access: +\alias{hub,TrackHub-method} +\alias{hub<-,TrackHub-method} +\alias{shortLabel,TrackHub-method} +\alias{shortLabel<-,TrackHub-method} +\alias{longLabel,TrackHub-method} +\alias{longLabel<-,TrackHub-method} +\alias{genomeFile,TrackHub-method} +\alias{genomeFile<-,TrackHub-method} +\alias{email,TrackHub-method} +\alias{email<-,TrackHub-method} +\alias{descriptionUrl,TrackHub-method} +\alias{descriptionUrl<-,TrackHub-method} +\alias{genomeField,TrackHub-method} +\alias{genomeField<-,TrackHub-method} +\alias{genomeInfo,TrackHub-method} +\alias{genomeInfo<-,TrackHub-method} + +%% Coercion: +\alias{coerce,character,TrackHub-method} + +%% Show: +\alias{show,TrackHub-method} + +\title{TrackHub Access} + +\description{ + The \code{TrackHub} class represents a TrackHub data source, + essentially directory layout separating tracks and sequences by + genome, along with a few metadata files. This interface abstracts + those details and provides access to a TrackHub at any URL supported + by R (HTTP, FTP, and local files). This is an easy way to make data + accessible to the UCSC Genome Browser. +} + +\section{Constructor}{ + \describe{ + \item{}{ + \code{TrackHub(uri, create = FALSE)}: Constructs a + new \code{TrackHub} object, representing a repository at + \code{uri}. If \code{create} is \code{TRUE}, and \code{uri} is + writeable (i.e., local), the repository is created if it does not + already exist. If it does exist, then a message is emitted to + indicate that the repository was not recreated. + } + } +} + +\section{Accessor Methods}{ + In the code snippets below, \code{x} represents a \code{TrackHub} + object. + + \describe{ + \item{}{\code{x$genome}, \code{x[["genome"]]}: Get + the \code{\linkS4class{TrackHubGenome}} object for the genome named + \code{genome}. + } + \item{}{\code{length(x)}: number of genomes in the repository. + } + \item{}{\code{uri(x)}: + Get the URI pointing to the TrackHub repository. + } + \item{}{\code{genome(x)}: + Get the identifiers of the genomes present in the repository. + } + \item{}{\code{writeTrackHub(x)}: + Write hub content and genomes from memory representation to the hub file and genomes file. + It also create resources if they are missing like genomes file and genome directory for + newly add genome. + } + } +} + +\section{Data Access}{ + Note that all storing methods(like \code{hub()<-}) are only supported for local repositories, i.e., those with a file:// URI scheme. + \describe{ + \item{}{ + \code{hub(x)}: get the value of hub. + } + \item{}{ + \code{hub(x) <- value}: store the \code{value} of hub for \code{x}. + } + \item{}{ + \code{shortLabel(x)}: get the value of hub. + } + \item{}{ + \code{shortLabel(x) <- value}: store the \code{value} of shortLabel for \code{x}. + } + \item{}{ + \code{longLabel(x)}: get the value of hub. + } + \item{}{ + \code{longLabel(x) <- value}: store the \code{value} of longLabel for \code{x}. + } + \item{}{ + \code{genomeFile(x)}: get the value of hub. + } + \item{}{ + \code{genomeFile(x) <- value}: store the \code{value} of genomesFile for \code{x}. + } + \item{}{ + \code{email(x)}: get the value of hub. + } + \item{}{ + \code{email(x) <- value}: store the \code{value} of email for \code{x}. + } + \item{}{ + \code{descriptionUrl(x)}: get the value of hub. + } + \item{}{ + \code{descriptionUrl(x) <- value}: store the \code{value} of descriptionUrl for \code{x}. + } + \item{}{ + \code{genomeField(x, name, field)}: Get the \code{value} of \code{field} for \code{name} genome. + } + \item{}{ + \code{genomeField(x, name, field) <- value}: Set or Update the \code{field} and \code{value} for \code{name} genome. + } + \item{}{ + \code{genomeInfo(x, name)}: Get the \code{Genome} object for \code{name} genome. + } + \item{}{ + \code{genomeInfo(x) <- value}: Add \code{value} (Genome object) to existing genomes list. + \code{Genome} takes named arguemnts of all UCSC supported fields for genome + file(like \code{genome, trackDb, twoBitPath}, etc). + } + } +} + +\author{Michael Lawrence} + +\examples{ +th <- TrackHub(system.file("tests", "trackhub", package = "rtracklayer")) +uri(th) +genome(th) +length(th) +th$hg19 +th[["hg19"]] +hub(th) +email(th) + +\dontrun{ +hub(th) <- "new_hub" +writeTrackHub(th) +} +} + +\keyword{methods} +\keyword{classes} diff -Nru r-bioc-rtracklayer-1.48.0/man/TrackHubGenome-class.Rd r-bioc-rtracklayer-1.50.0/man/TrackHubGenome-class.Rd --- r-bioc-rtracklayer-1.48.0/man/TrackHubGenome-class.Rd 1970-01-01 00:00:00.000000000 +0000 +++ r-bioc-rtracklayer-1.50.0/man/TrackHubGenome-class.Rd 2020-10-27 17:24:51.000000000 +0000 @@ -0,0 +1,168 @@ +\name{TrackHubGenome-class} +\docType{class} + +%% Classes: +\alias{class:TrackHubGenome} +\alias{TrackHubGenome-class} + +%% Constructor: +\alias{TrackHubGenome} + +%% Accessors: +\alias{uri,TrackHubGenome-method} +\alias{genome,TrackHubGenome-method} +\alias{length,TrackHubGenome-method} +\alias{names,TrackHubGenome-method} +\alias{getTracks,TrackHubGenome-method} +\alias{trackhub} +\alias{organism,TrackHubGenome-method} +\alias{trackField,TrackHubGenome-method} +\alias{trackField<-,TrackHubGenome-method} +\alias{writeTrackHub,TrackHubGenome-method} + +%% Data Access: +\alias{track,TrackHubGenome-method} +\alias{track<-,TrackHubGenome,ANY-method} +\alias{track<-,TrackHubGenome,RTLFile-method} +\alias{track<-,TrackHubGenome,RsamtoolsFile-method} +\alias{track<-,TrackHubGenome,character-method} +\alias{referenceSequence,TrackHubGenome-method} +\alias{referenceSequence<-,TrackHubGenome-method} + +%% Show +\alias{show,TrackHubGenome-method} + +\title{TrackHub Genome Access} + +\description{ + A TrackHub data source is a collection of tracks and sequences, + separated by genome. This class, \code{TrackHubGenome} provides + direct access to the data for one particular genome. +} + +\section{Constructor}{ + \describe{ + \item{}{ + \code{TrackHubGenome(trackhub, genome, create = FALSE}: Constructs a + new \code{TrackHubGenome} object, representing \code{genome} in + the repository \code{trackhub} (a URI string or a + \code{\linkS4class{TrackHub}} object). + + The \code{genome} argument can be an ID corresponding to a + genome (potentially) in \code{trackhub} or an installed + \code{BSgenome} package. + + If \code{create} is \code{TRUE}, and the trackDb file does not + already exist, it will be created. + Creation only works if the repository is local and + writeable. + } + } +} + +\section{Accessor Methods}{ + In the code snippets below, \code{x} represent a \code{TrackHubGenome} object. + + \describe{ + \item{}{ + \code{uri(x)}: Get the uri pointing to the genome directory in the + TrackHub repository. + } + \item{}{ + \code{genome(x)}: Get the name of the genome, e.g. + \dQuote{hg19}. + } + \item{}{ + \code{length(x)}: number of tracks + } + \item{}{ + \code{names(x), trackNames(x)}: names of the tracks + } + \item{}{ + \code{getTracks(x)}: Get the \code{List} of \code{Track} from the tracks + } + \item{}{ + \code{trackhub(x)}: Get the TrackHub object that contains this + genome. + } + \item{}{ + \code{organism(x)}: Get the organism name for this genome, + e.g., \dQuote{H sapiens}. + } + \item{}{ + \code{trackField(x, name, field)}: Get the \code{value} of \code{field} for \code{name} track. + } + \item{}{ + \code{trackField(x, name, field) <- value}: Store the \code{field} and \code{value} for \code{name} track. + } + \item{}{\code{writeTrackHub(x)}: + Write tracks from memory representation to the trackDb file. + } + } +} + +\section{Data Access}{ + \describe{ + \item{}{ + \code{track(x, name), x$name}: get the track called \code{name} + } + \item{}{ + \code{track(x, name, format = bestFileFormat(value)) <- + value, x$name <- value}: store the track \code{value} under + \code{name}. Note that track storing is only supported + for local repositories, i.e., those with a \code{file://} URI + scheme. + + Currently, supported \code{value} types include a + \code{GenomicRanges}, \code{GRangesList}, or a file resource + (copied to the repository). The file resource may be + represented as a path, URL, \code{\linkS4class{RTLFile}} or + \code{\link[Rsamtools:RsamtoolsFile-class]{RsamtoolsFile}}. If + not a file name, \code{value} is written in \code{format}. For + generic interval data, this means a BigWig file (if there is a + numeric \dQuote{score} column) or a BED file otherwise. An + \code{RleList} (e.g., coverage) is output as BigWig. For + \code{UCSCData} values, the format is chosen according to the + type of track line. For \code{RsamtoolsFile} objects, the file + and its index are copied. + } + \item{}{ + \code{referenceSequence(x)}: Get the reference sequence, as a + \code{DNAStringSet}. + } + \item{}{ + \code{referenceSequence(x) <- value}: Set the reference sequence, as a + \code{DNAStringSet}. It is written as a 2bit file. This only works + on local repositories. + } + } +} + +\author{Michael Lawrence} + +\examples{ +tests_dir <- system.file("tests", package = "rtracklayer") +th <- TrackHub(file.path(tests_dir, "trackhub")) +thg <- TrackHubGenome(th, "hg19") +length(thg) +organism(thg) +names(thg) + +\dontrun{ +th <- TrackHub(file.path(tests_dir, "trackhub"), create = TRUE) +genomesFile(th) <- "genomes.txt" +genomeInfo(th) <- Genome(genome = "hg38", trackDb = "hg38/trackDb.txt") +genomeField(th, "hg38", "twoBitPath") <- "hg38/seq.2bit" +writeTrackHub(th) +thg <- TrackHubGenome(th, "hg38", create = TRUE) +seq <- import(file.path(tests_dir, "test.2bit")) +referenceSequence(thg) <- seq +track(thg, "PeaksData") <- paste0(tests_dir, "/test.bigWig") +trackField(thg, "wgEncodeUWDukeDnaseGM12878FdrPeaks", "bigDataUrl") <- "hg38/wgEncodeCshlShortRnaSeq.bigWig" +trackField(thg, "wgEncodeUWDukeDnaseGM12878FdrPeaks", "color") <- "8,104,172" +writeTrackHub(thg) +} +} + +\keyword{methods} +\keyword{classes} diff -Nru r-bioc-rtracklayer-1.48.0/man/UCSCData-class.Rd r-bioc-rtracklayer-1.50.0/man/UCSCData-class.Rd --- r-bioc-rtracklayer-1.48.0/man/UCSCData-class.Rd 2020-04-27 20:22:29.000000000 +0000 +++ r-bioc-rtracklayer-1.50.0/man/UCSCData-class.Rd 2020-10-27 17:24:51.000000000 +0000 @@ -1,10 +1,10 @@ \name{UCSCData-class} \docType{class} \alias{UCSCData-class} -\alias{export.bed,UCSCData,characterORconnection-method} +\alias{export.bed,UCSCData,character_OR_connection-method} \alias{export.bed15,UCSCData-method} -\alias{export.gff,UCSCData,characterORconnection-method} -\alias{export.ucsc,UCSCData,characterORconnection-method} +\alias{export.gff,UCSCData,character_OR_connection-method} +\alias{export.ucsc,UCSCData,character_OR_connection-method} \alias{initialize,UCSCData-method} \alias{show,UCSCData-method} \alias{coerce,GRanges,UCSCData-method} diff -Nru r-bioc-rtracklayer-1.48.0/NAMESPACE r-bioc-rtracklayer-1.50.0/NAMESPACE --- r-bioc-rtracklayer-1.48.0/NAMESPACE 2020-04-27 20:22:29.000000000 +0000 +++ r-bioc-rtracklayer-1.50.0/NAMESPACE 2020-10-27 17:24:51.000000000 +0000 @@ -43,7 +43,7 @@ importFrom("Biostrings", get_seqtype_conversion_lookup, writeXStringSet, DNAStringSet, DNA_BASES, DNA_ALPHABET, alphabetFrequency, - reverseComplement) + reverseComplement, uniqueLetters) importMethodsFrom("Biostrings", masks, "masks<-", getSeq) importClassesFrom("Biostrings", DNAStringSet, XStringSet) @@ -64,12 +64,13 @@ UCSCSession, UCSCView, UCSCData, TrackLine, BasicTrackLine, GraphTrackLine, Bed15TrackLine, UCSCTrackModes, BigWigSelection, - UCSCSchema, Quickload, QuickloadGenome) + BigBedSelection, UCSCSchema, Quickload, QuickloadGenome, + TrackHub, TrackHubGenome, Track, TrackContainer) ## File classes exportClasses(RTLFile, CompressedFile, GFFFile, UCSCFile, BEDFile, WIGFile, ChainFile, FastaFile, GFF1File, GFF2File, GFF3File, BEDGraphFile, - BED15File, GTFFile, GVFFile, BigWigFile, BigWigFileList, + BED15File, GTFFile, GVFFile, BigWigFile, BigWigFileList, BigBedFile, TwoBitFile, RTLFileList, BEDPEFile) exportMethods(activeView, "activeView<-", blocks, browseGenome, @@ -78,10 +79,11 @@ close, export, export.bed, export.bed15, export.bedGraph, export.gff, export.gff1, export.gff2, export.gff3, export.ucsc, export.wig, export.bw, - export.2bit, + export.bb, export.2bit, import, import.bed, import.bed15, import.bedGraph, import.gff, import.gff1, import.gff2, import.gff3, - import.ucsc, import.wig, import.bw, import.chain, import.2bit, + import.ucsc, import.wig, import.bw, import.chain, + import.2bit, import.bb, exportToTabix, "track<-", track, trackNames, "trackNames<-", getTable, @@ -98,6 +100,10 @@ referenceSequence, "referenceSequence<-", asBED, asGFF, split, fileFormat, + hub, "hub<-", shortLabel, "shortLabel<-", longLabel, "longLabel<-", + genomesFile, "genomesFile<-", email, "email<-", descriptionUrl, + "descriptionUrl<-", genomeField, "genomeField<-", getTracks , writeTrackHub, + trackField, "trackField<-", genomeInfo, "genomeInfo<-", ## from IRanges start, end, "start<-", "end<-", score, "score<-", @@ -113,14 +119,16 @@ score, "score<-", as.data.frame, space, ucscGenomes, readGFFPragmas, sniffGFFVersion, GFFcolnames, readGFF, readGFFAsGRanges, - BigWigSelection, GRangesForUCSCGenome, GRangesForBSGenome, - summary, seqinfo, genome, "genome<-", + BigWigSelection, BigBedSelection, + GRangesForUCSCGenome, GRangesForBSGenome, + summary, seqinfo, genome, "genome<-", Genome, uri, Quickload, quickload, QuickloadGenome, - organism, releaseDate, mcols, wigToBigWig, + organism, releaseDate, mcols, TrackHub, trackhub, TrackHubGenome, + Track, TrackContainer, wigToBigWig, SeqinfoForBSGenome, SeqinfoForUCSCGenome, resource, path, - FileForFormat, cleanupBigWigCache, viewURL) + FileForFormat, cleanupBigWigCache, cleanupBigBedCache, viewURL) export(GFFFile, UCSCFile, BEDFile, WIGFile, ChainFile, FastaFile, GFF1File, GFF2File, GFF3File, BEDGraphFile, BED15File, GTFFile, GVFFile, BigWigFile, BigWigFileList, TwoBitFile, - BEDPEFile) + BEDPEFile, BigBedFile) diff -Nru r-bioc-rtracklayer-1.48.0/R/bam.R r-bioc-rtracklayer-1.50.0/R/bam.R --- r-bioc-rtracklayer-1.48.0/R/bam.R 2020-04-27 20:22:29.000000000 +0000 +++ r-bioc-rtracklayer-1.50.0/R/bam.R 2020-10-27 17:24:51.000000000 +0000 @@ -24,6 +24,14 @@ ans }) +fillColumn <- function(x, filler) { + if (is.null(x)) + filler + else if (anyNA(x)) + ifelse(is.na(x), filler, x) + else x +} + setMethod("export", c("GAlignments", "BamFile"), function(object, con, format, index = TRUE) { sam_path <- paste(file_path_sans_ext(path(con)), ".sam", sep = "") @@ -43,19 +51,18 @@ writeLines(header, sam_con) } emd <- mcols(object) - aln <- paste(if (!is.null(names(object))) names(object) else "*", - if (!is.null(emd[["flag"]])) emd[["flag"]] else - ifelse(strand(object) == "-", "16", "0"), + aln <- paste(fillColumn(names(object), "*"), + fillColumn(emd[["flag"]], + ifelse(strand(object) == "-", "16", "0")), seqnames(object), start(object), - if (!is.null(emd[["mapq"]])) emd[["mapq"]] else "255", + fillColumn(emd[["mapq"]], "255"), cigar(object), - if (!is.null(emd[["mrnm"]])) emd[["mrnm"]] else "*", - if (!is.null(emd[["mpos"]])) emd[["mpos"]] else "0", - if (!is.null(emd[["isize"]])) emd[["isize"]] else "0", + fillColumn(emd[["mrnm"]], "*"), + fillColumn(emd[["mpos"]], "0"), + fillColumn(emd[["isize"]], "0"), if (is(object, "GappedReads")) object@qseq - else if (!is.null(emd[["seq"]])) emd[["seq"]] - else "*", - if (!is.null(emd[["qual"]])) emd[["qual"]] else "*", + else fillColumn(emd[["seq"]], "*"), + fillColumn(emd[["qual"]], "*"), sep = "\t") custom <- emd[nchar(names(emd)) == 2L] if (length(custom) > 0L) { diff -Nru r-bioc-rtracklayer-1.48.0/R/bigBed.R r-bioc-rtracklayer-1.50.0/R/bigBed.R --- r-bioc-rtracklayer-1.48.0/R/bigBed.R 1970-01-01 00:00:00.000000000 +0000 +++ r-bioc-rtracklayer-1.50.0/R/bigBed.R 2020-10-27 17:24:51.000000000 +0000 @@ -0,0 +1,261 @@ +### ========================================================================= +### BigBed support +### ------------------------------------------------------------------------- + +### - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - +### Classes +### + +setClass("BigBedFile", contains = "RTLFile") +setClass("BBFile", contains = "BigBedFile") + +BigBedFile <- function(path) { + if (!isSingleString(path)) + stop("'filename' must be a single string, specifying a path") + new("BigBedFile", resource = path) +} +BBFile <- BigBedFile + +setMethod("seqinfo", "BigBedFile", function(x) { + seqlengths <- .Call(BBDFile_seqlengths, expandPath(path(x))) + Seqinfo(names(seqlengths), seqlengths) +}) + +.defaultColNames <- c("name", "score", "thick", "itemRgb", "blocks") + +setClass("BigBedSelection", prototype = prototype(colnames = .defaultColNames), + contains = "RangedSelection") + +BigBedSelection <- function(ranges=IRangesList(), colnames = .defaultColNames) { + if (is.character(ranges)) + new("BigBedSelection", GenomicSelection(ranges, colnames = colnames)) + else { + if (is(ranges, "BigBedFile")) + ranges <- seqinfo(ranges) + new("BigBedSelection", ranges = as(ranges, "IntegerRangesList"), + colnames = colnames) + } +} + +setAs("IntegerRangesList", "BigBedSelection", function(from) { + new("BigBedSelection", as(from, "RangedSelection"), colnames = .defaultColNames) +}) + +setAs("GenomicRanges", "BigBedSelection", function(from) { + as(as(from, "IntegerRangesList"), "BigBedSelection") +}) + +### - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - +### Import +### + +setGeneric("import.bb", function(con, ...) standardGeneric("import.bb")) + +setMethod("import.bb", "ANY", function(con, ...) { + import(con, "BigBed", ...) +}) + +setMethod("import", "BigBedFile", + function(con, format, text, selection = BigBedSelection(which, ...), + which = con, ...) + { + if (!missing(format)) + checkArgFormat(con, format) + si <- seqinfo(con) + selection <- as(selection, "BigBedSelection") + ranges <- ranges(selection) + badSpaces <- setdiff(names(ranges)[lengths(ranges) > 0L], seqlevels(si)) + if (length(badSpaces) > 0L) + warning("'which' contains seqnames not known to BigBed file: ", + paste(badSpaces, collapse = ", ")) + ranges <- ranges[names(ranges) %in% seqlevels(si)] + flatranges <- unlist(ranges, use.names = FALSE) + if (is.null(flatranges)) + flatranges <- IRanges() + which_rl <- split(flatranges, factor(space(ranges), seqlevels(si))) + which <- GRanges(which_rl) + allFields <- .Call(BBDFile_fieldnames, expandPath(path(con))) + defaultFields <- allFields[[1L]] + ValidextraFields <- allFields[[2L]] + selectedFields <- colnames(selection) + extraFields <- setdiff(selectedFields, defaultFields) + if (identical(colnames(BigBedSelection()), selectedFields)) { + selectedFields <- defaultFields[defaultFields != ""] + extraFields <- ValidextraFields[ValidextraFields != ""] + } + if (!identical(selectedFields, defaultFields)) { + defaultFields <- defaultFields[defaultFields != ""] + ValidextraFields <- ValidextraFields[ValidextraFields != ""] + defaultFieldIndexes <- which(defaultFields %in% selectedFields) + extraFieldIndexes <- which(ValidextraFields %in% extraFields) + invalidFields <- setdiff(extraFields, ValidextraFields) + if (length(defaultFieldIndexes) == 0L) + defaultFieldIndexes <- c(0L) + if (length(extraFieldIndexes) == 0L) + extraFieldIndexes <- c(0L) + if (length(invalidFields)) + warning("Invalid ", invalidFields, " field(s)") + }else { + defaultFieldIndexes <- c() + extraFieldIndexes <- c() + } + defaultNames <- defaultFields[defaultFields %in% selectedFields] + extraNames <- ValidextraFields[ValidextraFields %in% extraFields] + C_ans <- .Call(BBDFile_query, expandPath(path(con)), + as.character(seqnames(si)), ranges(which), + defaultFieldIndexes, extraFieldIndexes) + nhits <- C_ans[[1L]] + gr <- GRanges(rep(seqnames(which), nhits), C_ans[[3L]], seqinfo=si) + if (!is.null(C_ans[[4L]])) + strand(gr) <- gsub(".", "*", C_ans[[4L]], fixed = TRUE) + blocksPosition <- which(defaultNames %in% c("blocks")) + if (length(blocksPosition)) { + blocksPosition <- 4 + blocksPosition + C_ans[[blocksPosition]] <- IRangesList(C_ans[[blocksPosition]]) + } + val <- c() + if (length(defaultFieldIndexes) && defaultFieldIndexes[1] != 0) + val <- c(Filter(Negate(is.null), C_ans[5L:length(C_ans)])) + val <- c(val, Filter(Negate(is.null), C_ans[[2L]])) + elementMetadata <- DataFrame(val) + names(elementMetadata) <- c(defaultNames ,extraNames) + gr@elementMetadata <- elementMetadata + gr + }) + +### - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - +### Export +### + +setGeneric("export.bb", function(object, con, ...) standardGeneric("export.bb")) + +setMethod("export.bb", "ANY", + function(object, con, ...) + { + export(object, con, "BigBed", ...) + }) + +setMethod("export", c("ANY", "BigBedFile"), + function(object, con, format, ...) + { + object <- as(object, "GRanges") + callGeneric() + }) + +setMethod("export", c("GenomicRanges", "BigBedFile"), + function(object, con, format, compress = TRUE, extraIndexes = "") + { + if (!missing(format)) + checkArgFormat(con, format) + con <- path.expand(path(con)) + object <- sortBySeqnameAndStart(object) + seqlengths <- seqlengths(object) + stopIfNotValidForExport(object) + if (!is.character(extraIndexes)) + stop("The extraIndexes must be character") + if (any(is.na(seqlengths))) + stop("Unable to determine seqlengths; either specify ", + "'seqlengths' or specify a genome on 'object' that ", + "is known to BSgenome or UCSC") + if (!isTRUEorFALSE(compress)) + stop("'compress' must be TRUE or FALSE") + seqlengths <- seqlengths(object) + bedString <- bedString(object) + autoSqlString <- autoSqlString(object) + extraIndexes <- gsub("[\n\t ]", "", extraIndexes, perl = TRUE) + invisible(BigBedFile(.Call(BBDFile_write, seqlengths, bedString, autoSqlString, + extraIndexes, compress, con))) + }) + +stopIfNotValidForExport <- function(x) { + elementMetadata <- elementMetadata(x) + name <- elementMetadata$name + score <- elementMetadata$score + itemRgb <- elementMetadata$itemRgb + thick <- elementMetadata$thick + blocks <- elementMetadata$blocks + if (!is.null(name) && (!is.character(name) || any(is.na(name)))) + stop("The name must be character, without any NA's") + if (isValidScore(score)) + stop("The score must be numeric, without any NA's") + if (!is.null(itemRgb) && (!is.character(itemRgb) || any(is.na(itemRgb)))) + stop("The itemRgb must be character, without any NA's") + if (!is.null(thick) && !is(thick, "IRanges")) + stop("The thick must be IRanges") + if (!is.null(blocks) && !is(blocks, "IRangesList")) + stop("The blocks must be IRangesList") +} + +bedString <- function(x) { + elementMetadata <- elementMetadata(x) + name <- elementMetadata$name + elementMetadata$name <- NULL + score <- elementMetadata$score + elementMetadata$score <- NULL + strand <- as.character(strand(x)) + strand <- gsub("*", ".", strand, fixed = TRUE) + thick <- elementMetadata$thick + thickStart <- NULL + thickEnd <- NULL + if (!is.null(thick)) { + thickStart <- start(ranges(thick)) + thickEnd <- end(ranges(thick)) + elementMetadata$thick <- NULL + } + itemRgb <- as.data.frame(t(col2rgb(elementMetadata$itemRgb))) + itemRgb <- do.call(paste, c(itemRgb, sep=",")) + elementMetadata$itemRgb <- NULL + blocks <- elementMetadata$blocks + blockCount <- NULL + blockSizes <- NULL + blockStarts <- NULL + if (!is.null(blocks)) { + length <- length(blocks) + blockCount <- lengths(blocks) + blockSizes <- lapply(width(blocks), function(x) paste(x, collapse=",")) + blockStarts <- lapply(start(blocks), function(x) paste(x, collapse=",")) + elementMetadata$blocks <- NULL + } + extraColumnsString <- do.call(paste, as.list(elementMetadata)) + paste(as.character(seqnames(x)), start(ranges(x)), end(ranges(x)), name, score, + strand, thickStart, thickEnd, itemRgb, blockCount, blockSizes, + blockStarts, extraColumnsString, collapse = "\n") +} + +autoSqlString <- function(x) { + asString <- c('table bed "Browser Extensible Data" (\n', + 'string chrom; "Reference sequence chromosome or scaffold"\n', + 'uint chromStart; "Start position in chromosome"\n', + 'uint chromEnd; "End position in chromosome"\n') + + names <- c("name", "itemRgb", "score", "thick", "blocks", "double", "integer", "character", "raw") + values <- c('string name; "Name of item."\n', + 'uint reserved; "Used as itemRgb as of 2004-11-22"\n', + 'uint score; "Score (0-1000)"\nchar[1] strand; "+ or - for strand"\n', + paste0('uint thickStart; "Start of where display should be thick (start codon)"\n', + 'uint thickEnd; "End of where display should be thick (stop codon)"\n'), + paste0('int blockCount; "Number of blocks"\n', + 'int[blockCount] blockSizes; "Comma separated list of block sizes"\n', + 'int[blockCount] chromStarts; "Start positions relative to chromStart"\n'), + "double ", "int ", "string ", "uint ") + mapping <- setNames(values, names) + metadata <- elementMetadata(x) + names <- names(metadata) + defaultFields <- colnames(BigBedSelection()) + fieldsString <- lapply(names, function(y) { + if (y %in% defaultFields) + mapping[y] + else { + typeString <- mapping[storage.mode(metadata[[y]])] + paste(typeString, y, '; ""\n') + } + }) + asString <- c(asString, fieldsString, ')') + paste(asString, collapse = "") +} + +### - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - +### Utilities +### + +cleanupBigBedCache <- cleanupBigWigCache diff -Nru r-bioc-rtracklayer-1.48.0/R/io.R r-bioc-rtracklayer-1.50.0/R/io.R --- r-bioc-rtracklayer-1.48.0/R/io.R 2020-04-27 20:22:29.000000000 +0000 +++ r-bioc-rtracklayer-1.50.0/R/io.R 2020-10-27 17:24:51.000000000 +0000 @@ -11,7 +11,7 @@ ### wraps a low-level representation of a file, currently either a ### path/URL or connection. -setClass("RTLFile", representation(resource = "characterORconnection"), +setClass("RTLFile", representation(resource = "character_OR_connection"), contains = "VIRTUAL") setClass("RTLFileList", @@ -260,7 +260,7 @@ uriExists <- function(x) { uri <- .parseURI(x) - if (uriIsLocal(x)) { + if (uriIsLocal(uri)) { exists <- file.exists(uri$path) } else { txt <- getURL(x, header = TRUE) diff -Nru r-bioc-rtracklayer-1.48.0/R/quickload.R r-bioc-rtracklayer-1.50.0/R/quickload.R --- r-bioc-rtracklayer-1.48.0/R/quickload.R 2020-04-27 20:22:29.000000000 +0000 +++ r-bioc-rtracklayer-1.50.0/R/quickload.R 2020-10-27 17:24:51.000000000 +0000 @@ -8,19 +8,23 @@ setClass("Quickload", representation(uri = "character")) -uri <- function(x, ...) x@uri - Quickload_contents <- function(x) { read.table(contentsFile(x), sep = "\t", col.names = c("dir", "title"), colClasses = "character") } +setGeneric("uri", function(x) standardGeneric("uri")) + +setMethod("uri", "Quickload", function(x) { + x@uri +}) + setMethod("genome", "Quickload", function(x) { contents <- Quickload_contents(x) as.character(structure(contents$dir, names = contents$title)) }) -setMethod("names", "Quickload", genome) +setMethod("names", "Quickload", function(x) genome(x)) setMethod("length", "Quickload", function(x) length(names(x))) @@ -115,6 +119,10 @@ names = as.character(x_mcols$title)) }) +setMethod("trackNames", "QuickloadGenome", function(object) { + names(object) +}) + setMethod("mcols", "QuickloadGenome", function(x) { files <- QuickloadGenome_annotFiles(x) if (!length(xmlChildren(files))) diff -Nru r-bioc-rtracklayer-1.48.0/R/ranges.R r-bioc-rtracklayer-1.50.0/R/ranges.R --- r-bioc-rtracklayer-1.48.0/R/ranges.R 2020-04-27 20:22:29.000000000 +0000 +++ r-bioc-rtracklayer-1.50.0/R/ranges.R 2020-10-27 17:24:51.000000000 +0000 @@ -34,7 +34,7 @@ dots <- list(...) if (length(dots) == 1) { dots <- dots[[1L]] - if ((is(dots, "data.frame") || is(dots, "DataTable")) && + if ((is(dots, "data.frame") || is(dots, "DataFrame")) && !is.null(dots[["strand"]])) { strand <- dots[["strand"]] dots[["strand"]] <- NULL diff -Nru r-bioc-rtracklayer-1.48.0/R/trackhub.R r-bioc-rtracklayer-1.50.0/R/trackhub.R --- r-bioc-rtracklayer-1.48.0/R/trackhub.R 1970-01-01 00:00:00.000000000 +0000 +++ r-bioc-rtracklayer-1.50.0/R/trackhub.R 2020-10-27 17:24:51.000000000 +0000 @@ -0,0 +1,996 @@ +### ========================================================================= +### TrackHub support +### ------------------------------------------------------------------------- + +### - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - +### TrackContainer class +### + +setClass("TrackContainer", + representation("SimpleList"), + prototype(elementType = "Track") +) + +setMethod("names", "TrackContainer", function(x) { + vapply(x, function(y) y@track, character(1L) ,USE.NAMES = FALSE) +}) + +TrackContainer <- function(...) { + args <- list(...) + if (length(args) == 1 && is.list(args[[1L]])) + args <- args[[1L]] + if (!all(vapply(args, is, logical(1L), "Track"))) + stop("all elements in '...' must be Track objects") + S4Vectors:::new_SimpleList_from_list("TrackContainer", args) +} + +setClass("Track", + representation( + # common trackDb settings + track = "character", + type = "character", + shortLabel = "character", + longLabel = "character", + bigDataUrl = "character", + html = "character", + visibility = "character", + meta = "character", + + + # common optional settings + color = "character", + priority = "numeric", + altColor = "character", + boxedCfg = "logical", + chromosomes = "character", + darkerLabels = "logical", + dataVersion = "character", + directUrl = "character", + iframeUrl = "character", + iframeOptions = "character", + mouseOverField = "character", + otherDb = "character", + pennantIcon = "character", + tableBrowser = "character", + url = "character", + urlLabel = "character", + urls = "character", + skipEmptyFields = "logical", + skipFields = "character", + sepFields = "character", + + + ##settings by track type + + # bam settings + refUrl = "character", + bigDataIndex = "character", + bamColorMode = "character", + bamGrayMode = "character", + aliQualRange = "character", + baseQualRange = "character", + bamColorTag = "character", + noColorTag = "character", + bamSkipPrintQualScore = "character", + indelDoubleInsert = "logical", + indelQueryInsert = "logical", + indelPolyA = "logical", + minAliQual = "character", + pairEndsByName = "character", + pairSearchRange = "character", + showNames = "logical", + doWiggle = "logical", + maxWindowToDraw = "integer", + + # bigBarChart settings + barChartBars = "character", + barChartColor = "character", + barChartLabel = "character", + barChartMaxSize = "character", + barChartSizeWindows = "character", + barChartMetric = "character", + barChartUnit = "character", + barChartMatrixUrl = "character", + barChartSampleUrl = "character", + maxLimit = "character", + labelFields = "character", + defaultLabelFields = "character", + itemRgb = "logical", + colorByStrand = "character", + denseCoverage = "integer", + labelOnFeature = "logical", + exonArrows = "logical", + exonNumbers = "logical", + scoreFilter = "character", + scoreFilterLimits = "character", + maxItems = "integer", + minGrayLevel = "character", + noScoreFilter = "logical", + spectrum = "logical", + scoreMax = "integer", + scoreMin = "integer", + thickDrawItem = "logical", + searchIndex = "character", + searchTrix = "character", + labelSeparator = "character", + # UNSUPPORTED fields + # filter. + # filterByRange. + # filterLimits. + # filterText. + # filterType. + # filterValues. + # filterValuesDefault. + # filterType. + # filterLabel. + bedNameLabel = "character", + exonArrowsDense = "logical", + itemImagePath = "character", + itemBigImagePath = "character", + mergeSpannedItems = "logical", + linkIdInName = "logical", + nextExonText = "character", + prevExonText = "character", + scoreLabel = "character", + showTopScorers = "character", + + # bigChain settings + linkDataUrl = "character", + + # bigInteract settings + interactDirectional = "character", + interactUp = "character", + interactMultiRegion = "character", + maxHeightPixels = "character", + speciesOrder = "character", + frames = "character", + summary = "character", + + # bigNarrowPeak settings + # UNSUPPORTED fields + #scoreFilter + #pValueFilter + #qValueFilter + #signalFilter + #FilterLimits + #FilterByRange + + # bigPsl settings + baseColorUseCds = "character", + baseColorUseSequence = "character", + baseColorDefault = "character", + showDiffBasesAllScales = "logical", + + # bigWig settings + autoscale = "character", + autoScale = "character", + viewLimits = "character", + viewLimitsMax = "character", + alwaysZero = "logical", + graphTypeDefault = "character", + maxWindowToQuery = "integer", + negateValues = "logical", + smoothingWindow = "character", + transformFunc = "character", + windowingFunction = "character", + yLineMark = "character", + yLineOnOff = "logical", + gridDefault = "logical", + + + # hic settings + showSnpWidth = "integer", + otherSpecies = "character", + + + # vcfTabix settings + minQual = "character", + minFreq = "character", + hapClusterEnabled = "character", + hapClusterColorBy = "character", + hapClusterTreeAngle = "character", + hapClusterHeight = "character", + applyMinQual = "character", + + + ##Grouping tracks into sets and hierarchies + + # Supertrack + superTrack = "character", + parent = "character", + + # Composite Tracks + compositeTrack = "logical", + allButtonPair = "logical", + centerLabelsDense = "logical", + dragAndDrop = "character", + hideEmptySubtracks = "logical", + hideEmptySubtracksMultiBedUrl = "character", + hideEmptySubtracksSourcesUrl = "character", + hideEmptySubtracksLabel = "character", + + + # Subgroups + subGroup1 = "character", + subGroup2 = "character", + subGroup3 = "character", + subGroup4 = "character", + subGroup5 = "character", + subGroup6 = "character", + subGroup7 = "character", + subGroup8 = "character", + subGroup9 = "character", + subGroups = "character", + dimensions = "character", + filterComposite = "character", + dimensionAchecked = "character", + dimensionBchecked = "character", + sortOrder = "character", + + # Subgroups Views + view = "character", + viewUi = "logical", + configurable = "logical", + + # multiWig + container = "character", + aggregate = "character", + showSubtrackColorOnUi = "logical", + + # Miscellaneous Deprecated Settings + metadata = "character", + noInherit = "logical", + useScore = "integer" + ) +) + +Track <- function(...) { + new("Track", ...) +} + +### - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - +### Genome class +### + +setClass("Genome", + representation( + genome = "character", + trackDb = "character", + metaDb = "character", + metaTab = "character", + twoBitPath = "character", + groups = "character", + description = "character", + organism = "character", + defaultPos = "character", + orderKey = "character", + htmlPath = "character" + ), + prototype( + genome = NA_character_, + trackDb = NA_character_, + metaDb = NA_character_, + metaTab = NA_character_, + twoBitPath = NA_character_, + groups = NA_character_, + description = NA_character_, + organism = NA_character_, + defaultPos = NA_character_, + orderKey = NA_character_, + htmlPath = NA_character_ + ) +) + +Genome <- function(...) { + new("Genome", ...) +} + +stopIfNotGenome <- function(x) { + if (!is(value, "Genome")) + stop("value must be Genome object") +} + +### - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - +### GenomeContainer class +### + +setClass("GenomeContainer", + representation("SimpleList"), + prototype(elementType = "Genome") +) + +setMethod("names", "GenomeContainer", function(x) { + vapply(x, function(y) y@genome, character(1L), USE.NAMES = FALSE) +}) + +setMethod("getListElement", "GenomeContainer", function(x, i, exact = TRUE) { + genome <- x[names(x) == i] + if (length(genome) == 1L) unlist(genome)[[1L]] +}) + +GenomeContainer <- function(...) { + args <- list(...) + if (length(args) == 1 && is.list(args[[1L]])) + args <- args[[1L]] + if (!all(vapply(args, is, logical(1L), "Genome"))) + stop("all elements in '...' must be Genome objects") + S4Vectors:::new_SimpleList_from_list("GenomeContainer", args) +} + +### - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - +### TrackHub class +### + +setGeneric("hub", function(x) standardGeneric("hub")) +setGeneric("hub<-", function(x, value) standardGeneric("hub<-")) +setGeneric("shortLabel", function(x) standardGeneric("shortLabel")) +setGeneric("shortLabel<-", function(x, value) standardGeneric("shortLabel<-")) +setGeneric("longLabel", function(x) standardGeneric("longLabel")) +setGeneric("longLabel<-", function(x, value) standardGeneric("longLabel<-")) +setGeneric("genomesFile", function(x) standardGeneric("genomesFile")) +setGeneric("genomesFile<-", function(x, value) standardGeneric("genomesFile<-")) +setGeneric("email", function(x) standardGeneric("email")) +setGeneric("email<-", function(x, value) standardGeneric("email<-")) +setGeneric("descriptionUrl", function(x) standardGeneric("descriptionUrl")) +setGeneric("descriptionUrl<-", function(x, value) standardGeneric("descriptionUrl<-")) +setGeneric("writeTrackHub", function(x) standardGeneric("writeTrackHub")) +setGeneric("genomeField", function(x, name, key) standardGeneric("genomeField")) +setGeneric("genomeField<-", function(x, name, key, value) standardGeneric("genomeField<-")) +setGeneric("genomeInfo", function(x, name) standardGeneric("genomeInfo")) +setGeneric("genomeInfo<-", function(x, value) standardGeneric("genomeInfo<-")) + +setClass("TrackHub", + representation( + uri = "character", + hub = "character", + shortLabel = "character", + longLabel = "character", + genomesFile = "character", + email = "character", + descriptionUrl = "character", + genomeContainer = "GenomeContainer"), + prototype( + hub = NA_character_, + shortLabel = NA_character_, + longLabel = NA_character_, + genomesFile = NA_character_, + email = NA_character_, + descriptionUrl = NA_character_), + contains = "List") + +hubFile <- function(x) paste(trimSlash(uri(x)), "hub.txt", sep = "/") + +stopIfNotLocal <- function(x) { + if (!uriIsWritable(x)) { + stop("Repository is read only; cannot write on remote repository") + } +} + +getHubContent <- function(x) { + content <- readLines(hubFile(x), warn = FALSE) + rexp <- "^(\\w+)\\s?(.*)$" + contentVec <- c(sub(rexp, "\\2", content)) + names(contentVec) <- sub(rexp, "\\1", content) + x@hub <- contentVec["hub"] + x@shortLabel <- contentVec["shortLabel"] + x@longLabel <- contentVec["longLabel"] + x@genomesFile <- contentVec["genomesFile"] + x@email <- contentVec["email"] + x@descriptionUrl <- contentVec["descriptionUrl"] + x +} + +setHubContent <- function(x) { + file = hubFile(x) + cat("", file = file) + if (!is.na(x@hub)) + cat("hub ", x@hub, "\n", append = TRUE, sep = "", file = file) + if (!is.na(x@shortLabel)) + cat("shortLabel ", x@shortLabel, "\n", append = TRUE, sep = "", file = file) + if (!is.na(x@longLabel)) + cat("longLabel ", x@longLabel, "\n", append = TRUE, sep = "", file = file) + if (!isFieldEmpty(x@genomesFile)) + cat("genomesFile ", x@genomesFile, "\n", append = TRUE, sep = "", file = file) + if (!is.na(x@email)) + cat("email ", x@email, "\n", append = TRUE, sep = "", file = file) + if (!is.na(x@descriptionUrl)) + cat("descriptionUrl ", x@descriptionUrl, "\n", append = TRUE, sep = "", file = file) +} + +getGenomesContent <- function(x) { + if (uriExists(hubFile(x))) { + genomesFileValue <- x@genomesFile + if (!isFieldEmpty(genomesFileValue)) { + genomesFilePath <- combineURI(uri(x), unname(genomesFileValue)) + if (file.size(parseURI(genomesFilePath)$path) == 0L) { + return(list()) + } + content <- readLines(genomesFilePath, warn = FALSE) + content_df <- read.csv(text = sub(" ", ",", content), header = FALSE) + genomesIndex <- grep("\\bgenome\\b", content_df$V1) + totalGenomes <- length(genomesIndex) + genomesIndex[length(genomesIndex) + 1] <- length(content_df$V1) + 1 + genomes <- lapply(1:totalGenomes, function(x) { + startPosition <- genomesIndex[x] + endPosition <- genomesIndex[x + 1] - 1 + genome <- setNames(data.frame(content_df$V1[startPosition:endPosition], + content_df$V2[startPosition:endPosition]), + c("field", "value")) + genome <- setNames(as.list(genome$value), genome$field) + genome <- do.call(Genome, genome) + }) + genomes + } + else message("hub.txt: 'genomesFile' does not contain valid reference to genomes file") + } +} + +setGenomesContent <- function(x, genomeContainer) { + genomesFilePath <- combineURI(uri(x), x@genomesFile) + slots <- slotNames(Genome()) + genomesFields <- c("twoBitPath", "groups", "htmlPath", "metaDb", "trackDb" , "metaTab") + genomes <- vapply(genomeContainer, function(y) { + uri <- combineURI(uri(x), y@genome) + if (!uriExists(uri)) + createResource(uri, dir = TRUE) + genome <- vapply(slots, function(slotName) { + slotValue <- slot(y, slotName) + if (!isEmpty(slotValue) && !is.na(slotValue)) { + if (slotName %in% genomesFields) { + filePath <- combineURI(uri(x), slotValue) + if (!uriExists(filePath)) { + createResource(filePath) + } + } + paste0(slotName, " ", slotValue) + } + else "" + }, character(1L)) + }, character(11L)) + genomes <- genomes[genomes != ""] + genomes <- gsub("\\bgenome\\b", "\ngenome", genomes) + writeLines(genomes, genomesFilePath) +} + +getGenome <- function(x, name) { + genome <- x@genomeContainer[[name]] + if (length(genome) == 1L) genome + else if (length(genome) == 0L) stop("Genome '", name, "' does not exist") + else if (length(genome) > 1L) stop("Multiple genomes match ", name) +} + +setGenome <- function(x, name, value) { + stopIfNotGenome(value) + genome <- x@genomeContainer[[name]] + if (length(genome) == 1L) genome <- value + else if (length(genome) == 0L) stop("Genome '", name, "' does not exist") + else if (length(genome) > 1L) stop("Multiple genomes match ", name) + x@genomeContainer[[name]] <- genome + x +} + +setMethod("uri", "TrackHub", function(x) { + x@uri +}) + +setMethod("hub", "TrackHub", function(x) { + unname(x@hub) +}) + +setReplaceMethod("hub", "TrackHub", function(x, value) { + x@hub <- value + x +}) + +setMethod("shortLabel", "TrackHub", function(x) { + unname(x@shortLabel) +}) + +setReplaceMethod("shortLabel", "TrackHub", function(x, value) { + x@shortLabel <- value + x +}) + +setMethod("longLabel", "TrackHub", function(x) { + unname(x@longLabel) +}) + +setReplaceMethod("longLabel", "TrackHub", function(x, value) { + x@longLabel <- value + x +}) + +setMethod("genomesFile", "TrackHub", function(x) { + unname(x@genomesFile) +}) + +setReplaceMethod("genomesFile", "TrackHub", function(x, value) { + x@genomesFile <- value + x +}) + +setMethod("email", "TrackHub", function(x) { + unname(x@email) +}) + +setReplaceMethod("email", "TrackHub", function(x, value) { + x@email <- value + x +}) + +setMethod("descriptionUrl", "TrackHub", function(x) { + unname(x@descriptionUrl) +}) + +setReplaceMethod("descriptionUrl", "TrackHub", function(x, value) { + x@descriptionUrl <- value + x +}) + +setMethod("genome", "TrackHub", function(x) { + genomes <- x@genomeContainer + names(genomes) +}) + +setMethod("getListElement", "TrackHub", function(x, i, exact = TRUE) { + TrackHubGenome(x, i) +}) + +setMethod("names", "TrackHub", function(x) genome(x)) + +setMethod("length", "TrackHub", function(x) length(names(x))) + +setMethod("genomeInfo", "TrackHub", function(x, name) { + names <- names(x@genomeContainer) + genome <- x@genomeContainer[[name]] + if (length(genome) == 0L) stop("Genome '", name, "' does not exist") + else genome +}) + +setReplaceMethod("genomeInfo", "TrackHub", function(x, value) { + stopIfNotGenome(value) + name <- value@genome + names <- names(x@genomeContainer) + genome <- x@genomeContainer[[name]] + if (length(genome) == 1L) stop("NOTE: Genome '", name, "' already exists") + else if (length(genome) > 1L) stop("Multiple genomes match ", name) + else { + if (!identical(value, Genome())) { + if (length(x@genomeContainer) == 0L) genomes <- value + else genomes <- c(unlist(x@genomeContainer), value) + x@genomeContainer <- GenomeContainer(genomes) + } + } + x +}) + +setMethod("genomeField", "TrackHub", function(x, name, key) { + genome <- getGenome(x, name) + slot(genome, key) +}) + +setReplaceMethod("genomeField", "TrackHub", function(x, name, key, value) { + genome <- getGenome(x, name) + slot(genome, key) <- value + setGenome(x, name, genome) +}) + +setMethod("writeTrackHub", "TrackHub", function(x) { + stopIfNotLocal(hubFile(x)) + setHubContent(x) + genomesFilePath <- combineURI(uri(x), genomesFile(x)) + if (!uriExists(genomesFilePath) && !is.na(genomesFile(x))) + createResource(genomesFilePath) + if (uriExists(genomesFilePath)) + setGenomesContent(x, x@genomeContainer) +}) + +setMethod("show", "TrackHub", function(object) { + cat(class(object), "repository\nuri:", uri(object), "\n") + cat(S4Vectors:::labeledLine("genomes", genome(object))) + cat("hub:", hub(object), "\n") + cat("shortLabel:", shortLabel(object), "\n") + cat("longLabel:", longLabel(object), "\n") + cat("genomesFile:", genomesFile(object), "\n") + cat("email:", email(object), "\n") + cat("descriptionUrl:", descriptionUrl(object), "\n") +}) + +TrackHub <- function(uri, create = FALSE) { + if (!isTRUEorFALSE(create)) + stop("'create' must be TRUE or FALSE") + if (create) { + if (uriExists(uri)) { + message("NOTE: '", uri, "' already exists") + create <- FALSE + } ## must create this before calling normURI (requires existence) + else createResource(uri, dir = TRUE) + } + th <- new("TrackHub") + th@uri <- normURI(uri) + if (create && !uriExists(hubFile(th))) { + createResource(hubFile(th)) + } else { + th <- getHubContent(th) + genomes <- getGenomesContent(th) + if (is.list(genomes) && length(genomes) >= 1L) + th@genomeContainer <- GenomeContainer(unlist(genomes)) + } + th +} + +setAs("character", "TrackHub", function(from) TrackHub(from)) + +### - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - +### TrackHubGenome class +### + +setGeneric("getTracks", function(x) standardGeneric("getTracks")) +setGeneric("trackField", function(x, name, key) standardGeneric("trackField")) +setGeneric("trackField<-", function(x, name, key, value) standardGeneric("trackField<-")) + +setClass("TrackHubGenome", + representation(trackhub = "TrackHub", + genome = "character", + tracks = "TrackContainer", + levels = "integer"), + contains = "TrackDb") + +trackhub <- function(x) x@trackhub + +createTrack <- function(trackDf) { + fieldToType <- list( + track = "character", type = "character", shortLabel = "character", longLabel = "character", + bigDataUrl = "character", html = "character", visibility = "character", meta = "character", + color = "character", priority = "numeric", altColor = "character", boxedCfg = "logical", + chromosomes = "character", darkerLabels = "logical", dataVersion = "character", + directUrl = "character", iframeUrl = "character", iframeOptions = "character", + mouseOverField = "character", otherDb = "character", pennantIcon = "character", + tableBrowser = "character", url = "character", urlLabel = "character", urls = "character", + skipEmptyFields = "logical", skipFields = "character", sepFields = "character", + refUrl = "character", bigDataIndex = "character", bamColorMode = "character", + bamGrayMode = "character", aliQualRange = "character", baseQualRange = "character", + bamColorTag = "character", noColorTag = "character", bamSkipPrintQualScore = "character", + indelDoubleInsert = "logical", indelQueryInsert = "logical", indelPolyA = "logical", + minAliQual = "character", pairEndsByName = "character", pairSearchRange = "character", + showNames = "logical", doWiggle = "logical", maxWindowToDraw = "integer", + barChartBars = "character", barChartColor = "character", barChartLabel = "character", + barChartMaxSize = "character", barChartSizeWindows = "character", barChartMetric = "character", + barChartUnit = "character", barChartMatrixUrl = "character", barChartSampleUrl = "character", + maxLimit = "character", labelFields = "character", defaultLabelFields = "character", + itemRgb = "logical", colorByStrand = "character", denseCoverage = "integer", + labelOnFeature = "logical", exonArrows = "logical", exonNumbers = "logical", + scoreFilter = "character", scoreFilterLimits = "character", maxItems = "integer", + minGrayLevel = "character", noScoreFilter = "logical", spectrum = "logical", + scoreMax = "integer", scoreMin = "integer", thickDrawItem = "logical", searchIndex = "character", + searchTrix = "character", labelSeparator = "character", bedNameLabel = "character", + exonArrowsDense = "logical", itemImagePath = "character", itemBigImagePath = "character", + mergeSpannedItems = "logical", linkIdInName = "logical", nextExonText = "character", + prevExonText = "character", scoreLabel = "character", showTopScorers = "character", + linkDataUrl = "character", interactDirectional = "character", interactUp = "character", + interactMultiRegion = "character", maxHeightPixels = "character", speciesOrder = "character", + frames = "character", summary = "character", baseColorUseCds = "character", + baseColorUseSequence = "character", baseColorDefault = "character", + showDiffBasesAllScales = "logical", autoscale = "character", autoScale = "character", + viewLimits = "character", viewLimitsMax = "character", alwaysZero = "logical", + graphTypeDefault = "character", maxWindowToQuery = "integer", negateValues = "logical", + smoothingWindow = "character", transformFunc = "character", windowingFunction = "character", + yLineMark = "character", yLineOnOff = "logical", gridDefault = "logical", + showSnpWidth = "integer", otherSpecies = "character", minQual = "character", minFreq = "character", + hapClusterEnabled = "character", hapClusterColorBy = "character", hapClusterTreeAngle = "character", + hapClusterHeight = "character", applyMinQual = "character", superTrack = "character", + parent = "character", compositeTrack = "logical", allButtonPair = "logical", + centerLabelsDense = "logical", dragAndDrop = "character", + hideEmptySubtracks = "logical", hideEmptySubtracksMultiBedUrl = "character", + hideEmptySubtracksSourcesUrl = "character", hideEmptySubtracksLabel = "character", + subGroup1 = "character", subGroup2 = "character", subGroup3 = "character", subGroup4 = "character", + subGroup5 = "character", subGroup6 = "character", subGroup7 = "character", subGroup8 = "character", + subGroup9 = "character", subGroups = "character", dimensions = "character", + filterComposite = "character", dimensionAchecked = "character", dimensionBchecked = "character", + sortOrder = "character", view = "character", viewUi = "logical", configurable = "logical", + container = "character", aggregate = "character", showSubtrackColorOnUi = "logical", + metadata = "character", noInherit = "logical", useScore = "integer") + trackDf$value <- gsub("\\b[Oo]n\\b", "TRUE", trackDf$value) + trackDf$value <- gsub("\\b[Oo]ff\\b", "FALSE", trackDf$value) + args <- Map(as, trackDf$value, fieldToType[trackDf$field]) + names(args) <- trackDf$field + track <- do.call(Track, args) + track +} + +getTabCountList <- function(contentdf) { + matches <- gregexpr("^(\\t)+", contentdf) + tabCountList <- vapply(matches, attr, integer(1L), "match.length") + tabCountList +} + +readAndSanitize <- function(filepath) { + fileContent <- readLines(filepath, warn = FALSE) + fileContent <- gsub("^(\\t)*#(.)*", "", fileContent) # to avoid reading commented tracks + fileContent <- gsub(",", ";", fileContent) + contentDf <- read.csv(text = sub(" ", ",", fileContent), header = FALSE) + contentDf$V2 <- gsub(";", ",", contentDf$V2) + nonEmptyContent <- vapply(contentDf$V2, function(x) x!="", logical(1L)) + contentDf <- contentDf[nonEmptyContent,] + contentDf +} + +getTrackDbContent <- function(x, trackDbFilePath) { + if (file.size(parseURI(trackDbFilePath)$path) == 1L) { + x@tracks <- TrackContainer() + return(x) + } + contentDf <- readAndSanitize(trackDbFilePath) + tracksIndex <- grep("\\btrack\\b", contentDf$V1) + levels <- getTabCountList(contentDf$V1) + levels <- levels[tracksIndex] + levels <- as.integer(gsub(-1, 0, levels)) + totalTracks <- length(tracksIndex) + tracksIndex[length(tracksIndex) + 1] <- length(contentDf$V1) + 1 # to read last track from file + contentDf$V1 <- gsub("^(\\t)+", "", contentDf$V1) + # to speed up, reading track by track + tracks <- lapply(c(1:totalTracks), function(x) { + startPosition <- tracksIndex[x] + endPosition <- tracksIndex[x + 1] - 1 + trackDf <- setNames(data.frame(contentDf$V1[startPosition:endPosition], + contentDf$V2[startPosition:endPosition]), + c("field", "value")) + track <- createTrack(trackDf) + }) + x@tracks <- TrackContainer(tracks) + x@levels <- levels + x +} + +setMethod("genome", "TrackHubGenome", function(x) x@genome) + +setMethod("uri", "TrackHubGenome", function(x) + paste(trimSlash(uri(trackhub(x))), genome(x), sep = "/")) + +setMethod("getTracks", "TrackHubGenome", function(x) { + x@tracks +}) + +setMethod("names", "TrackHubGenome", function(x) { + as.character(names(getTracks(x))) +}) + +setMethod("trackNames", "TrackHubGenome", function(object) { + names(object) +}) + +setMethod("trackField", "TrackHubGenome", function(x, name, key) { + names <- names(x@tracks) + track <- x@tracks[names == name] + if (length(track) == 0L) stop("Track '", name, "' does not exist") + else if (length(track) > 1L) stop("Multiple tracks match ", name) + slot(track[[1L]], key) +}) + +setReplaceMethod("trackField", "TrackHubGenome", function(x, name, key, value) { + names <- names(x@tracks) + track <- x@tracks[names == name] + slot(track[[1L]], key) <- value + x@tracks[names == name] <- track + x +}) + +setMethod("organism", "TrackHubGenome", function(object) { + genome <- getGenome(trackhub(object), genome(object)) + genome@organism +}) + +setMethod("referenceSequence", "TrackHubGenome", function(x) { + trackhub <- trackhub(x) + genome <- getGenome(trackhub, genome(x)) + twoBitPathValue <- genome@twoBitPath + if (!isFieldEmpty(twoBitPathValue)) { + twoBitFilePath <- combineURI(uri(trackhub), twoBitPathValue) + import(twoBitFilePath) + } + else stop("genome.txt: 'twoBitPath' does not contain a reference to a file") +}) + +setReplaceMethod("referenceSequence", "TrackHubGenome", function(x, value) { + trackhub <- trackhub(x) + genomesFilePath <- combineURI(uri(trackhub), trackhub@genomesFile) + stopIfNotLocal(genomesFilePath) + genome <- getGenome(trackhub, genome(x)) + twoBitPathValue <- genome@twoBitPath + if (!isFieldEmpty(twoBitPathValue)) { + twoBitFilePath <- combineURI(uri(trackhub(x)), twoBitPathValue) + export.2bit(value, twoBitFilePath) + x + } + else stop("genome.txt: 'twoBitPath' does not contain a reference to a file") +}) + +setMethod("length", "TrackHubGenome", function(x) { + length(names(x)) +}) + +setMethod("writeTrackHub", "TrackHubGenome", function(x) { + trackhub <- trackhub(x) + stopIfNotLocal(hubFile(trackhub)) + genome <- getGenome(trackhub, genome(x)) + trackDbValue <- genome@trackDb + trackDbFilePath <- combineURI(uri(trackhub), trackDbValue) + if (file.size(parseURI(trackDbFilePath)$path) != 1L || length(x@tracks)) { + tabStrings <- vapply(x@levels, function(y) { + paste(rep("\t", y), collapse = "") + },character(1L)) + if (length(tabStrings) == 0) + tabStrings <- rep("", length(x@tracks)) + slots <- slotNames(Track()) + tracks <- vapply(seq_len(length(x@tracks)), function(i) { + track <- vapply(slots, function(slotName) { + slotValue <- slot(x@tracks[[i]], slotName) + if (!isEmpty(slotValue)) { + if (is.na(tabStrings[i])) tabStrings[i] <- "" + trackline <- paste0(tabStrings[i], slotName, " ", slotValue) + if (slotName == "track") trackline <- paste0("\n", trackline) + trackline + } + else "" + }, character(1L)) + }, character(155L)) + tracks <- tracks[tracks != ""] + tracks <- gsub("\\bTRUE\\b", "on", tracks) + tracks <- gsub("\\bFALSE\\b", "off", tracks) + writeLines(tracks, trackDbFilePath) + } +}) + +setMethod("show", "TrackHubGenome", function(object) { + cat(class(object), "track database\ngenome:", genome(object), "\ntrackhub:", + uri(trackhub(object)), "\n") + cat(S4Vectors:::labeledLine("names", names(object))) +}) + +TrackHubGenome <- function(trackhub, genome, create = FALSE) { + trackhub <- as(trackhub, "TrackHub") + thg <- new("TrackHubGenome") + thg@trackhub <- trackhub + thg@genome <- genome + genome <- getGenome(trackhub(thg), genome(thg)) + trackDbValue <- genome@trackDb + if (!isFieldEmpty(trackDbValue)) { + trackDbFilePath <- combineURI(uri(trackhub(thg)), trackDbValue) + absolutePath <- parseURI(trackDbFilePath)$path + if (!uriExists(trackDbFilePath) && create) { + createResource(trackDbFilePath) + }else if (file.size(absolutePath) != 1L && uriExists(trackDbFilePath)) { + thg <- getTrackDbContent(thg, trackDbFilePath) + } + } + thg +} + +### - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - +### Import of tracks from Track Hub +### + +setMethod("track", "TrackHubGenome", function(object, name, ...) { + names <- names(object@tracks) + track <- object@tracks[names == name] + if (length(track) == 0L) stop("Track '", name, "' does not exist") + else if (length(track) > 1L) stop("Multiple tracks match ", name) + + if (isEmpty(track[[1L]]@bigDataUrl)) { + stop("Track '", name, "' does not contain any data file") + } + else if (uriIsLocal(parseURI(track[[1L]]@bigDataUrl))) { + import(paste0(parseURI(uri(trackhub(object)))$path, "/", track[[1L]]@bigDataUrl)) + }else { + import(track[[1L]]@bigDataUrl) + } +}) + +### - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - +### Export of tracks to Track Hub +### + +copyResourceToTrackHub <- function(object, uri) { + parsed_uri <- .parseURI(uri) + if (parsed_uri$scheme == "") + uri <- paste0("file://", uri) + filename <- basename(uri) + trackhub <- trackhub(object) + object_uri <- .parseURI(uri(trackhub)) + if (uriIsLocal(object_uri)) { + genome <- getGenome(trackhub, genome(object)) + trackDbValue <- genome@trackDb + trackDbValue <- sub(basename(trackDbValue), "", trackDbValue) + trackDbValue <- sub("/$", "", trackDbValue) + dest_file <- paste(object_uri$path, trackDbValue, filename, sep = "/") + dest_file <- sub("^/", "", dest_file) + if (paste(uri(object), filename, sep = "/") != uri) + ### FIXME: URLdecode() here because of R bug + download.file(URLdecode(uri), dest_file) + } + else stop("TrackHub is not local; cannot copy track") + filename +} + +.exportToTrackHub <- function(object, name, + format = bestFileFormat(value, object), + index = TRUE, ..., value) +{ + filename <- paste(name, format, sep = ".") + path <- paste(uri(object), filename, sep = "/") + file <- export(value, path, format = format, index = index, ...) + track(object, name, index = FALSE) <- file + object +} + +setReplaceMethod("track",signature(object = "TrackHubGenome", value = "ANY"), + .exportToTrackHub) + +setReplaceMethod("track", + signature(object = "TrackHubGenome", value = "RsamtoolsFile"), + function(object, name, ..., value) + { + if (missing(name)) + name <- basename(path(value)) + track(object, name) <- URLencode(path(value)) + copyResourceToTrackHub(object, URLencode(index(value))) + object + }) + +setReplaceMethod("track", + signature(object = "TrackHubGenome", value = "RTLFile"), + function(object, name, ..., value) + { + if (missing(name)) + name <- basename(path(value)) + track(object, name) <- URLencode(path(value)) + object + }) + +setReplaceMethod("track", + signature(object = "TrackHubGenome", value = "character"), + function(object, name = basename(object), ..., value) + { + filename <- copyResourceToTrackHub(object, value) + genome <- getGenome(trackhub(object), genome(object)) + trackDbValue <- genome@trackDb + trackDbValue <- sub(basename(trackDbValue), "", trackDbValue) + trackDbValue <- sub("/$", "", trackDbValue) + bigDataUrlValue <- paste(trackDbValue, filename, sep = "/") + bigDataUrlValue <- sub("^/", "", bigDataUrlValue) + names <- names(object@tracks) + trackPosition <- which(names == name) + trackDf <- setNames(data.frame(c("track", "bigDataUrl"), + c(name, bigDataUrlValue)), + c("field", "value")) + track <- createTrack(trackDf) + if (isEmpty(trackPosition)) { + trackPosition <- length(object@tracks) + 1 + object@tracks[[trackPosition]] <- track + }else { + object@tracks[[trackPosition]] <- track + } + object + }) + +### - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - +### Utilities +### + +combineURI <- function(x,y) paste(trimSlash(x), y, sep = "/") + +isFileReference <- function(x) { + formats <- c("txt", "2bit", "html") + tools::file_ext(x) %in% formats +} + +isFieldEmpty <- function(x) { + if ((isFileReference(x) && !is.na(x)) && !is.null(x)) { + return(FALSE) + } + return(TRUE) +} + +trimSlash <- function(x) { + sub("/$", "", x) +} diff -Nru r-bioc-rtracklayer-1.48.0/R/trackTable.R r-bioc-rtracklayer-1.50.0/R/trackTable.R --- r-bioc-rtracklayer-1.48.0/R/trackTable.R 2020-04-27 20:22:29.000000000 +0000 +++ r-bioc-rtracklayer-1.50.0/R/trackTable.R 2020-10-27 17:24:51.000000000 +0000 @@ -15,7 +15,7 @@ standardGeneric("import.tabSeparated"), signature = "con") -setMethod("import.tabSeparated", "characterORconnection", +setMethod("import.tabSeparated", "character_OR_connection", function(con, genome = NA, seqnames = 1L, start = 2L, end = 3L, ...) { tab <- read.table(con, sep = "\t", ...) diff -Nru r-bioc-rtracklayer-1.48.0/R/ucsc.R r-bioc-rtracklayer-1.50.0/R/ucsc.R --- r-bioc-rtracklayer-1.48.0/R/ucsc.R 2020-04-27 20:22:29.000000000 +0000 +++ r-bioc-rtracklayer-1.50.0/R/ucsc.R 2020-10-27 17:24:51.000000000 +0000 @@ -1161,6 +1161,9 @@ }) chooseGraphType <- function(from) { + if (is(from, "GPos")) { + return(if (is(from, "StitchedGPos")) "bedGraph" else "wig") + } r <- ranges(from) type <- "bedGraph" ## decide whether compression is a good idea diff -Nru r-bioc-rtracklayer-1.48.0/R/wig.R r-bioc-rtracklayer-1.50.0/R/wig.R --- r-bioc-rtracklayer-1.48.0/R/wig.R 2020-04-27 20:22:29.000000000 +0000 +++ r-bioc-rtracklayer-1.50.0/R/wig.R 2020-10-27 17:24:51.000000000 +0000 @@ -80,11 +80,6 @@ doBlock <- function(chromData) { if (length(chromData) == 0L) return() - if (is(chromData, "GPos")) { - ans <- writer(chromData, con, "fixedStep", append) - append <<- TRUE - return(ans) - } if (is.unsorted(start(chromData))) chromData <- chromData[order(start(chromData)),] starts <- start(chromData) @@ -93,11 +88,13 @@ stop("Features cannot overlap. ", "Note that WIG does not distinguish between strands - ", "try exporting two tracks, one for each strand.") - spans <- ends - starts + 1 if (length(starts) == 1) steps <- 0 else steps <- diff(starts) - fixedSpan <- all(spans[1] == spans) + fixedSpan <- is(object, "GPos") || { + spans <- ends - starts + 1L + all(spans[1L] == spans) + } if (!fixedSpan) stop("The span must be uniform for Wiggle export. ", "Consider exporting to bedGraph/bigWig, ", diff -Nru r-bioc-rtracklayer-1.48.0/src/bbiHelper.c r-bioc-rtracklayer-1.50.0/src/bbiHelper.c --- r-bioc-rtracklayer-1.48.0/src/bbiHelper.c 1970-01-01 00:00:00.000000000 +0000 +++ r-bioc-rtracklayer-1.50.0/src/bbiHelper.c 2020-10-27 17:24:51.000000000 +0000 @@ -0,0 +1,23 @@ +#include "ucsc/common.h" +#include "ucsc/bbiFile.h" + +#include "bbiHelper.h" + +SEXP bbiSeqLengths(struct bbiFile *file) { + struct bbiChromInfo *chromList = bbiChromList(file); + struct bbiChromInfo *chrom = chromList; + SEXP seqlengths, seqlengthNames; + + PROTECT(seqlengths = allocVector(INTSXP, slCount(chromList))); + seqlengthNames = allocVector(STRSXP, length(seqlengths)); + setAttrib(seqlengths, R_NamesSymbol, seqlengthNames); + + for(int i = 0; i < length(seqlengths); i++) { + INTEGER(seqlengths)[i] = chrom->size; + SET_STRING_ELT(seqlengthNames, i, mkChar(chrom->name)); + chrom = chrom->next; + } + bbiChromInfoFreeList(&chromList); + UNPROTECT(1); + return seqlengths; +} diff -Nru r-bioc-rtracklayer-1.48.0/src/bbiHelper.h r-bioc-rtracklayer-1.50.0/src/bbiHelper.h --- r-bioc-rtracklayer-1.48.0/src/bbiHelper.h 1970-01-01 00:00:00.000000000 +0000 +++ r-bioc-rtracklayer-1.50.0/src/bbiHelper.h 2020-10-27 17:24:51.000000000 +0000 @@ -0,0 +1,11 @@ +#ifndef BBI_HELPER_H +#define BBI_HELPER_H + +#include "ucsc/common.h" +#include "ucsc/bbiFile.h" + +#include "rtracklayer.h" + +SEXP bbiSeqLengths(struct bbiFile *file); + +#endif diff -Nru r-bioc-rtracklayer-1.48.0/src/bigBed.c r-bioc-rtracklayer-1.50.0/src/bigBed.c --- r-bioc-rtracklayer-1.48.0/src/bigBed.c 1970-01-01 00:00:00.000000000 +0000 +++ r-bioc-rtracklayer-1.50.0/src/bigBed.c 2020-10-27 17:24:51.000000000 +0000 @@ -0,0 +1,502 @@ +#include "ucsc/common.h" +#include "ucsc/hash.h" +#include "ucsc/bigBed.h" +#include "ucsc/linefile.h" +#include "ucsc/localmem.h" + +#include "bigBed.h" +#include "handlers.h" +#include "bbiHelper.h" +#include "bigBedHelper.h" + +/* --- .Call ENTRY POINT --- */ +SEXP BBDFile_seqlengths(SEXP r_filename) +{ + pushRHandlers(); + struct bbiFile *file = bigBedFileOpen((char *)CHAR(asChar(r_filename))); + SEXP seqlengths = PROTECT(bbiSeqLengths(file)); + bigBedFileClose(&file); + popRHandlers(); + UNPROTECT(1); + return seqlengths; +} + +/* --- .Call ENTRY POINT --- */ +SEXP BBDFile_fieldnames(SEXP r_filename) +{ + pushRHandlers(); + struct bbiFile *file = bigBedFileOpen((char *)CHAR(asChar(r_filename))); + char *asText = bigBedAutoSqlText(file); + struct asObject *as = asParseText(asText); + freeMem(asText); + int fieldCount = file->fieldCount; + int definedFieldCount = getDefinedFieldCount(as); + bigBedFileClose(&file); + char *names[] = {"name", "score", "thick", "itemRgb", "blocks"}; + struct asColumn *asCol = as->columnList; + SEXP defaultFields = PROTECT(allocVector(STRSXP, definedFieldCount)); + SEXP extraFields = PROTECT(allocVector(STRSXP, fieldCount - definedFieldCount)); + for (int i = 0; i < fieldCount; ++i) { + if (i >= definedFieldCount) + SET_STRING_ELT(extraFields, i - definedFieldCount, mkChar(asCol->name)); + else if (i == 3) + SET_STRING_ELT(defaultFields, i, mkChar(names[0])); + else if (i == 4) + SET_STRING_ELT(defaultFields, i, mkChar(names[1])); + else if (i == 7) + SET_STRING_ELT(defaultFields, i, mkChar(names[2])); + else if (i == 8) + SET_STRING_ELT(defaultFields, i, mkChar(names[3])); + else if (i == 11) + SET_STRING_ELT(defaultFields, i, mkChar(names[4])); + asCol = asCol->next; + } + SEXP list = PROTECT(allocVector(VECSXP, 2)); + SET_VECTOR_ELT(list, 0, defaultFields); + SET_VECTOR_ELT(list, 1, extraFields); + asObjectFree(&as); + popRHandlers(); + UNPROTECT(3); + return list; +} + +/* --- .Call ENTRY POINT --- */ +SEXP BBDFile_query(SEXP r_filename, SEXP r_seqnames, SEXP r_ranges, + SEXP r_defaultindex, SEXP r_extraindex) +{ + pushRHandlers(); + struct bbiFile *file = bigBedFileOpen((char *)CHAR(asChar(r_filename))); + struct lm *lm = lmInit(0); + int n_ranges = get_IRanges_length(r_ranges); + int *start = INTEGER(get_IRanges_start(r_ranges)); + int *width = INTEGER(get_IRanges_width(r_ranges)); + + SEXP ans, n_qhits, ranges, chromStart, chromWidth, name, score, + strand = R_NilValue, thickStart, thickWidth, itemRgb, blocks, + extraFields = R_NilValue, lengthIndex; + + n_qhits = PROTECT(allocVector(INTSXP, n_ranges)); + struct bigBedInterval *hits = NULL, *tail = NULL; + /* querying records in range */ + for (int i = 0; i < n_ranges; ++i) { + struct bigBedInterval *queryHits = + bigBedIntervalQuery(file, (char *)CHAR(STRING_ELT(r_seqnames, i)), + start[i] - 1, start[i] - 1 + width[i], 0, lm); + if (!hits) { + hits = queryHits; + tail = slLastEl(hits); + } else { + tail->next = queryHits; + tail = slLastEl(tail); + } + INTEGER(n_qhits)[i] = slCount(queryHits); + } + + /* need these before closing file */ + char *asText = bigBedAutoSqlText(file); + struct asObject *as = asParseText(asText); + freeMem(asText); + int fieldCount = file->fieldCount; + int definedFieldCount = getDefinedFieldCount(as); + int extraFieldCount = fieldCount - definedFieldCount; + int n_hits = slCount(hits); + bigBedFileClose(&file); + + int presentFieldCount = 0, unprotectCount = 0; + /* mandatory default field */ + chromStart = PROTECT(allocVector(INTSXP, n_hits)); + chromWidth = PROTECT(allocVector(INTSXP, n_hits)); + /* if any of the default field is present and selected, allocate memory for it */ + if (isPresent(definedFieldCount, i_name) && isSelected(r_defaultindex, 1)) { + name = PROTECT(allocVector(STRSXP, n_hits)); + ++presentFieldCount; + ++unprotectCount; + } + if (isPresent(definedFieldCount, i_score) && isSelected(r_defaultindex, 2)) { + score = PROTECT(allocVector(INTSXP, n_hits)); + ++presentFieldCount; + ++unprotectCount; + } + if (isPresent(definedFieldCount, i_strand)) { + strand = PROTECT(allocVector(STRSXP, n_hits)); + ++unprotectCount; + } + if (isPresent(definedFieldCount, i_thick) && isSelected(r_defaultindex, 3)) { + thickStart = PROTECT(allocVector(INTSXP, n_hits)); + thickWidth = PROTECT(allocVector(INTSXP, n_hits)); + ++presentFieldCount; + unprotectCount += 2; + } + if (isPresent(definedFieldCount, i_itemRgb) && isSelected(r_defaultindex, 4)) { + itemRgb = PROTECT(allocVector(STRSXP, n_hits)); + ++presentFieldCount; + ++unprotectCount; + } + if (isPresent(definedFieldCount, i_blocks) && isSelected(r_defaultindex, 5)) { + blocks = PROTECT(allocVector(VECSXP, n_hits)); + ++presentFieldCount; + ++unprotectCount; + } + + SEXPTYPE *typeId; + /* if extra fields are present and selected + * identify the type information and allocate memory */ + if (extraFieldCount > 0) { + int k = 0; + enum asTypes fieldType; + struct asColumn *asCol = as->columnList; + extraFields = PROTECT(allocVector(VECSXP, extraFieldCount)); + typeId = (SEXPTYPE*)R_alloc(extraFieldCount, sizeof(SEXPTYPE)); + for (int j = 0; j < fieldCount; ++j) { + fieldType = asCol->lowType->type; + if (j >= definedFieldCount) { + if (asTypesIsFloating(fieldType) || fieldType == t_uint || + fieldType == t_off) { + typeId[k] = REALSXP; + } else if (fieldType == t_int || fieldType == t_short || + fieldType == t_ushort || fieldType == t_byte) { + typeId[k] = INTSXP; + } else if (fieldType == t_char || fieldType == t_string || + fieldType == t_lstring) { + typeId[k] = STRSXP; + } else if (fieldType == t_ubyte) { + typeId[k] = RAWSXP; + } + if (isSelected(r_extraindex, (j - definedFieldCount + 1))) { + SEXP temp = PROTECT(allocVector(typeId[k], n_hits)); + SET_VECTOR_ELT(extraFields, k, temp); + ++unprotectCount; + ++k; + } + } + asCol = asCol->next; + } + lengthIndex = PROTECT(allocVector(INTSXP, extraFieldCount)); + memset(INTEGER(lengthIndex), 0, sizeof(int) * extraFieldCount); + unprotectCount += 2; + } + asObjectFree(&as); + + int count = 0, k = 0; + char startBuf[16], endBuf[16], *row[fieldCount], rgbBuf[8]; + for (int i = 0; i < n_hits; ++i, hits = hits->next, ++count) { + if (INTEGER(n_qhits)[k] == count && k < n_ranges) { + ++k; + count = 0; + } + bigBedIntervalToRow(hits, (char *)CHAR(STRING_ELT(r_seqnames, k)), + startBuf, endBuf, row, fieldCount); + struct bed *bed = bedLoadN(row, definedFieldCount); + /* mandatory default field */ + INTEGER(chromStart)[i] = bed->chromStart; + INTEGER(chromWidth)[i] = bed->chromEnd - bed->chromStart + 1; + /* if any of the default field is present and selected, store its value */ + if (isPresent(definedFieldCount, i_name) && isSelected(r_defaultindex, 1)) { + SET_STRING_ELT(name, i, mkChar(bed->name)); + } + if (isPresent(definedFieldCount, i_score) && isSelected(r_defaultindex, 2)) { + INTEGER(score)[i] = bed->score; + } + if (isPresent(definedFieldCount, i_strand)) { + SET_STRING_ELT(strand, i, mkChar(bed->strand)); + } + if (isPresent(definedFieldCount, i_thick) && isSelected(r_defaultindex, 3)) { + INTEGER(thickWidth)[i] = bed->thickEnd - bed->thickStart + 1; + INTEGER(thickStart)[i] = bed->thickStart; + } + if (isPresent(definedFieldCount, i_itemRgb) && isSelected(r_defaultindex, 4)) { + snprintf(rgbBuf, 8, "#%06x", bed->itemRgb); + SET_STRING_ELT(itemRgb, i, mkChar(rgbBuf)); + } + if (isPresent(definedFieldCount, i_blocks) && isSelected(r_defaultindex, 5)) { + SEXP bstart = PROTECT(allocVector(INTSXP, bed->blockCount)); + SEXP bwidth = PROTECT(allocVector(INTSXP, bed->blockCount)); + for (int j = 0; j< bed->blockCount; ++j) { + INTEGER(bwidth)[j] = bed->blockSizes[j]; + INTEGER(bstart)[j] = bed->chromStarts[j]; + } + SET_VECTOR_ELT(blocks, i, new_IRanges("IRanges", bstart, bwidth, R_NilValue)); + UNPROTECT(2); + } + bedFree(&bed); + + /* if extra fields are present and selected store their values */ + for (int j = definedFieldCount, efIndex = 0 ; j < fieldCount; ++j) { + if (isSelected(r_extraindex, (j - definedFieldCount + 1))) { + switch(typeId[efIndex]) { + case REALSXP: + REAL(VECTOR_ELT(extraFields, efIndex))[i] = sqlDouble(row[j]); + break; + case INTSXP: + INTEGER(VECTOR_ELT(extraFields, efIndex))[i] = sqlSigned(row[j]); + break; + case STRSXP: { + int index = INTEGER(lengthIndex)[efIndex]; + SET_STRING_ELT(VECTOR_ELT(extraFields, efIndex), index, mkChar(row[j])); + INTEGER(lengthIndex)[efIndex] = index + 1; + break; + } + case RAWSXP: + RAW(extraFields)[i] = sqlUnsigned(row[j]); + break; + } + ++efIndex; + } + } + freeMem(row[3]); + } + + ranges = PROTECT(new_IRanges("IRanges", chromStart, chromWidth, R_NilValue)); + ans = PROTECT(allocVector(VECSXP, presentFieldCount + 4)); + int index = 0; + SET_VECTOR_ELT(ans, index++, n_qhits); + SET_VECTOR_ELT(ans, index++, extraFields); + SET_VECTOR_ELT(ans, index++, ranges); + SET_VECTOR_ELT(ans, index++, strand); + if (isPresent(definedFieldCount, i_name) && isSelected(r_defaultindex, 1)) { + SET_VECTOR_ELT(ans, index++, name); + } + if (isPresent(definedFieldCount, i_score) && isSelected(r_defaultindex, 2)) { + SET_VECTOR_ELT(ans, index++, score); + } + if (isPresent(definedFieldCount, i_thick) && isSelected(r_defaultindex, 3)) { + SET_VECTOR_ELT(ans, index++, new_IRanges("IRanges", thickStart, + thickWidth, R_NilValue)); + } + if (isPresent(definedFieldCount, i_itemRgb) && isSelected(r_defaultindex, 4)) { + SET_VECTOR_ELT(ans, index++, itemRgb); + } + if (isPresent(definedFieldCount, i_blocks) && isSelected(r_defaultindex, 5)) { + SET_VECTOR_ELT(ans, index++, blocks); + } + UNPROTECT(5 + unprotectCount); + lmCleanup(&lm); + popRHandlers(); + return ans; +} + +static struct hash *createIntHash(SEXP v) { + struct hash *hash = hashNew(0); + SEXP names = getAttrib(v, R_NamesSymbol); + for (int i = 0; i < length(v); ++i) + hashAddInt(hash, (char *)CHAR(STRING_ELT(names, i)), INTEGER(v)[i]); + return hash; +} + +/* --- .Call ENTRY POINT --- */ +SEXP BBDFile_write(SEXP r_seqlengths, SEXP r_bedString, SEXP r_autosql, + SEXP r_indexfields, SEXP r_compress, SEXP r_outfile) +{ + pushRHandlers(); + int blockSize = 256; + int itemsPerSlot = 512; + char *bedString = cloneString((char *)CHAR(asChar(r_bedString))); + struct lineFile *lf = lineFileOnString("text", TRUE, bedString); + struct bbExIndexMaker *eim = NULL; + bool doCompress = asLogical(r_compress); + struct hash *lenHash = createIntHash(r_seqlengths); + char *asText = (char *)CHAR(asChar(r_autosql)); + struct asObject *as = asParseText(asText); + bits16 fieldCount = slCount(as->columnList); + bits16 definedFieldCount = getDefinedFieldCount(as); + char *extraIndex = (char *)CHAR(asChar(r_indexfields)); + struct slName *extraIndexList = slNameListFromString(extraIndex, ','); + bits16 extraIndexCount = slCount(extraIndexList); + if (extraIndexList != NULL) + eim = bbExIndexMakerNew(extraIndexList, as); + + /* Do first pass, mostly just scanning file and counting hits per chromosome. */ + int minDiff = 0; + double aveSize = 0; + bits64 bedCount = 0; + bits32 uncompressBufSize = 0; + struct bbiChromUsage *usageList = bbiChromUsageFromBedFile(lf, lenHash, eim, &minDiff, + &aveSize, &bedCount); + + /* Open output file and write dummy header. */ + FILE *f = mustOpen((char *)CHAR(asChar(r_outfile)), "wb"); + bbiWriteDummyHeader(f); + bbiWriteDummyZooms(f); + + /* Write out autoSql string */ + bits64 asOffset = ftell(f); + mustWrite(f, asText, strlen(asText) + 1); + + /* Write out dummy total summary. */ + struct bbiSummaryElement totalSum; + ZeroVar(&totalSum); + bits64 totalSummaryOffset = ftell(f); + bbiSummaryElementWrite(f, &totalSum); + + /* Write out dummy header extension */ + bits64 extHeaderOffset = ftell(f); + bits16 extHeaderSize = 64; + repeatCharOut(f, 0, extHeaderSize); + + /* Write out extra index stuff if need be. */ + bits64 extraIndexListOffset = 0; + bits64 extraIndexListEndOffset = 0; + if (extraIndexList != NULL) { + extraIndexListOffset = ftell(f); + int extraIndexSize = 16 + 4*1; /* Fixed record size 16, plus 1 times field size of 4 */ + repeatCharOut(f, 0, extraIndexSize*extraIndexCount); + extraIndexListEndOffset = ftell(f); + } + + /* Write out chromosome/size database. */ + bits64 chromTreeOffset = ftell(f); + bbiWriteChromInfo(usageList, blockSize, f); + + /* Set up to keep track of possible initial reduction levels. */ + int resScales[bbiMaxZoomLevels], resSizes[bbiMaxZoomLevels]; + int resTryCount = bbiCalcResScalesAndSizes(aveSize, resScales, resSizes); + + /* Write out primary full resolution data in sections, collect stats to use for reductions. */ + bits64 dataOffset = ftell(f); + bits32 blockCount = 0; + bits32 maxBlockSize = 0; + struct bbiBoundsArray *boundsArray = NULL; + writeOne(f, bedCount); + if (bedCount > 0) { + blockCount = bbiCountSectionsNeeded(usageList, itemsPerSlot); + AllocArray(boundsArray, blockCount); + freez(&bedString); + bedString = cloneString((char *)CHAR(asChar(r_bedString))); + lf = lineFileOnString("text", TRUE, bedString); + if (eim) + bbExIndexMakerAllocChunkArrays(eim, bedCount); + writeBlocks(usageList, lf, as, itemsPerSlot, boundsArray, blockCount, doCompress, + f, resTryCount, resScales, resSizes, eim, bedCount, fieldCount, + definedFieldCount, &maxBlockSize); + } + + /* Write out primary data index. */ + bits64 indexOffset = ftell(f); + cirTreeFileBulkIndexToOpenFile(boundsArray, sizeof(boundsArray[0]), blockCount, + blockSize, 1, NULL, bbiBoundsArrayFetchKey, + bbiBoundsArrayFetchOffset, indexOffset, f); + freez(&boundsArray); + + /* Declare arrays and vars that track the zoom levels we actually output. */ + bits32 zoomAmounts[bbiMaxZoomLevels]; + bits64 zoomDataOffsets[bbiMaxZoomLevels]; + bits64 zoomIndexOffsets[bbiMaxZoomLevels]; + + /* Call monster zoom maker library function that bedGraphToBigWig also uses. */ + int zoomLevels = 0; + if (bedCount > 0) { + freez(&bedString); + bedString = cloneString((char *)CHAR(asChar(r_bedString))); + lf = lineFileOnString("text", TRUE, bedString); + zoomLevels = bbiWriteZoomLevels(lf, f, blockSize, itemsPerSlot, bedWriteReducedOnceReturnReducedTwice, + fieldCount, doCompress, indexOffset - dataOffset, usageList, + resTryCount, resScales, resSizes, zoomAmounts, zoomDataOffsets, + zoomIndexOffsets, &totalSum); + } + + /* Write out extra indexes if need be. */ + if (eim) { + int i; + for (i=0; i < eim->indexCount; ++i) { + eim->fileOffsets[i] = ftell(f); + maxBedNameSize = eim->maxFieldSize[i]; + qsort(eim->chunkArrayArray[i], bedCount, + sizeof(struct bbNamedFileChunk), bbNamedFileChunkCmpByName); + assert(sizeof(struct bbNamedFileChunk) == sizeof(eim->chunkArrayArray[i][0])); + bptFileBulkIndexToOpenFile(eim->chunkArrayArray[i], sizeof(eim->chunkArrayArray[i][0]), + bedCount, blockSize, bbNamedFileChunkKey, maxBedNameSize, + bbNamedFileChunkVal, sizeof(bits64) + sizeof(bits64), f); + } + } + + /* Figure out buffer size needed for uncompression if need be. */ + if (doCompress) { + int maxZoomUncompSize = itemsPerSlot * sizeof(struct bbiSummaryOnDisk); + uncompressBufSize = max(maxBlockSize, maxZoomUncompSize); + } + + /* Go back and rewrite header. */ + rewind(f); + bits32 sig = bigBedSig; + bits16 version = bbiCurrentVersion; + bits16 summaryCount = zoomLevels; + bits32 reserved32 = 0; + bits64 reserved64 = 0; + + /* Write fixed header */ + writeOne(f, sig); + writeOne(f, version); + writeOne(f, summaryCount); + writeOne(f, chromTreeOffset); + writeOne(f, dataOffset); + writeOne(f, indexOffset); + writeOne(f, fieldCount); + writeOne(f, definedFieldCount); + writeOne(f, asOffset); + writeOne(f, totalSummaryOffset); + writeOne(f, uncompressBufSize); + writeOne(f, extHeaderOffset); + assert(ftell(f) == 64); + + /* Write summary headers with data. */ + int i; + for (i=0; ifileOffsets[i]); + repeatCharOut(f, 0, 4); // reserved + + // Write out field list - easy this time because for now always only one field. + bits16 fieldId = eim->indexFields[i]; + writeOne(f, fieldId); + repeatCharOut(f, 0, 2); // reserved + } + assert(ftell(f) == extraIndexListEndOffset); + } + + /* Write end signature. */ + fseek(f, 0L, SEEK_END); + writeOne(f, sig); + + carefulClose(&f); + freez(&bedString); + freeHash(&lenHash); + asObjectFree(&as); + lineFileClose(&lf); + bbiChromUsageFreeList(&usageList); + popRHandlers(); + return r_outfile; +} diff -Nru r-bioc-rtracklayer-1.48.0/src/bigBed.h r-bioc-rtracklayer-1.50.0/src/bigBed.h --- r-bioc-rtracklayer-1.48.0/src/bigBed.h 1970-01-01 00:00:00.000000000 +0000 +++ r-bioc-rtracklayer-1.50.0/src/bigBed.h 2020-10-27 17:24:51.000000000 +0000 @@ -0,0 +1,15 @@ +#ifndef BIG_BED_H +#define BIG_BED_H + +#include "rtracklayer.h" + +/* The .Call entry points */ + +SEXP BBDFile_seqlengths(SEXP r_filename); +SEXP BBDFile_fieldnames(SEXP r_filename); +SEXP BBDFile_query(SEXP r_filename, SEXP r_seqnames, SEXP r_ranges, + SEXP r_defaultindex, SEXP r_extraindex); +SEXP BBDFile_write(SEXP r_seqlengths, SEXP r_bedString, SEXP r_autosql, + SEXP r_indexfields, SEXP r_compress, SEXP r_outfile); + +#endif diff -Nru r-bioc-rtracklayer-1.48.0/src/bigBedHelper.c r-bioc-rtracklayer-1.50.0/src/bigBedHelper.c --- r-bioc-rtracklayer-1.48.0/src/bigBedHelper.c 1970-01-01 00:00:00.000000000 +0000 +++ r-bioc-rtracklayer-1.50.0/src/bigBedHelper.c 2020-10-27 17:24:51.000000000 +0000 @@ -0,0 +1,460 @@ +#include "bigBedHelper.h" + +/* + Most of the functions in this file are taken from ucscGenomeBrowser/kent/src/utils/bedToBigBed.c +*/ + +int getDefinedFieldCount(struct asObject *as) { + int definedFieldCount = 0; + struct asColumn *asCol = as->columnList; + char *asText = bedAsDef(12, 12); + struct asObject *bedAs = asParseText(asText); + freeMem(asText); + struct asColumn *bedCol = bedAs->columnList; + while (asCol && bedCol) { + if (strncmp(asCol->name, bedCol->name, strlen(asCol->name)) == 0) + ++definedFieldCount; + bedCol = bedCol->next; + asCol = asCol->next; + } + asObjectFree(&bedAs); + return definedFieldCount; +} + +bool isPresent(int definedFieldCount, int index) { + return (definedFieldCount - index >= 0) ? TRUE : FALSE; +} + +bool isSelected(SEXP r_selectedindex, int position) { + if (length(r_selectedindex) == 0) + return TRUE; + for (int i = 0; i < length(r_selectedindex); ++i) { + if(INTEGER(r_selectedindex)[i] == position) + return TRUE; + } + return FALSE; +} + +/* following functions are taken from the kent library */ + +struct rbTree *rangeTreeForBedChrom(struct lineFile *lf, char *chrom) +/* Read lines from bed file as long as they match chrom. Return a rangeTree that + * corresponds to the coverage. */ +{ +struct rbTree *tree = rangeTreeNew(); +char *line; +while (lineFileNextReal(lf, &line)) + { + if (!startsWithWord(chrom, line)) + { + lineFileReuse(lf); + break; + } + char *row[3]; + chopLine(line, row); + unsigned start = sqlUnsigned(row[1]); + unsigned end = sqlUnsigned(row[2]); + rangeTreeAddToCoverageDepth(tree, start, end); + } +return tree; +} + +void bbExIndexMakerAddKeysFromRow(struct bbExIndexMaker *eim, char **row, int recordIx) +/* Save the keys that are being indexed by row in eim. */ +{ +int i; +for (i=0; i < eim->indexCount; ++i) + { + int rowIx = eim->indexFields[i]; + eim->chunkArrayArray[i][recordIx].name = cloneString(row[rowIx]); + } +} + +void bbExIndexMakerAddOffsetSize(struct bbExIndexMaker *eim, bits64 offset, bits64 size, + long startIx, long endIx) +/* Update offset and size fields of all file chunks between startIx and endIx */ +{ +int i; +for (i=0; i < eim->indexCount; ++i) + { + struct bbNamedFileChunk *chunks = eim->chunkArrayArray[i]; + long j; + for (j = startIx; j < endIx; ++j) + { + struct bbNamedFileChunk *chunk = chunks + j; + chunk->offset = offset; + chunk->size = size; + } + } +} + +/* Allocate the big part of the extra index maker - the part that holds which + * chunk is used for each record. */ +void bbExIndexMakerAllocChunkArrays(struct bbExIndexMaker *eim, int recordCount) { + eim->recordCount = recordCount; + int i; + for (i=0; i < eim->indexCount; ++i) + AllocArray(eim->chunkArrayArray[i], recordCount); +} + +/* Return an index maker corresponding to extraIndexList. Checks that all fields + * mentioned are in autoSql definition, and for now that they are all text fields. */ +struct bbExIndexMaker *bbExIndexMakerNew(struct slName *extraIndexList, struct asObject *as) { + /* Fill in scalar fields and return quickly if no extra indexes. */ + struct bbExIndexMaker *eim; + AllocVar(eim); + eim->indexCount = slCount(extraIndexList); + if (eim->indexCount == 0) + return eim; // Not much to do in this case + + /* Allocate arrays according field count. */ + AllocArray(eim->indexFields, eim->indexCount); + AllocArray(eim->maxFieldSize, eim->indexCount); + AllocArray(eim->chunkArrayArray, eim->indexCount); + AllocArray(eim->fileOffsets, eim->indexCount); + + /* Loop through each field checking that it is indeed something we can index + * and if so saving information about it */ + int indexIx = 0; + struct slName *name; + for (name = extraIndexList; name != NULL; name = name->next) { + struct asColumn *col = asColumnFind(as, name->name); + if (col == NULL) + errAbort("extraIndex field %s not a standard bed field or found in autoSql string.", + name->name); + if (!sameString(col->lowType->name, "string")) + errAbort("Sorry for now can only index string fields."); + eim->indexFields[indexIx] = slIxFromElement(as->columnList, col); + ++indexIx; + } + return eim; +} + +/* Compare two named offset object to facilitate qsorting by name. */ +int bbNamedFileChunkCmpByName(const void *va, const void *vb) { + const struct bbNamedFileChunk *a = va, *b = vb; + return strcmp(a->name, b->name); +} + +/* Return pointer to val for bPlusTree maker. */ +void *bbNamedFileChunkVal(const void *va) { + const struct bbNamedFileChunk *item = va; + return (void *)&item->offset; +} + +/* Copy name to keyBuf for bPlusTree maker */ +void bbNamedFileChunkKey(const void *va, char *keyBuf) { + const struct bbNamedFileChunk *item = va; + strncpy(keyBuf,item->name, maxBedNameSize); +} + +struct bbiSummary *bedWriteReducedOnceReturnReducedTwice(struct bbiChromUsage *usageList, + int fieldCount, struct lineFile *lf, bits32 initialReduction, bits32 initialReductionCount, + int zoomIncrement, int blockSize, int itemsPerSlot, boolean doCompress, + struct lm *lm, FILE *f, bits64 *retDataStart, bits64 *retIndexStart, + struct bbiSummaryElement *totalSum) +/* Write out data reduced by factor of initialReduction. Also calculate and keep in memory + * next reduction level. This is more work than some ways, but it keeps us from having to + * keep the first reduction entirely in memory. */ +{ +struct bbiSummary *twiceReducedList = NULL; +bits32 doubleReductionSize = initialReduction * zoomIncrement; +struct bbiChromUsage *usage = usageList; +struct bbiBoundsArray *boundsArray, *boundsPt, *boundsEnd; +boundsPt = AllocArray(boundsArray, initialReductionCount); +boundsEnd = boundsPt + initialReductionCount; + +*retDataStart = ftell(f); +writeOne(f, initialReductionCount); +/* This gets a little complicated I'm afraid. The strategy is to: + * 1) Build up a range tree that represents coverage depth on that chromosome + * This also has the nice side effect of getting rid of overlaps. + * 2) Stream through the range tree, outputting the initial summary level and + * further reducing. + */ +boolean firstTime = TRUE; +struct bbiSumOutStream *stream = bbiSumOutStreamOpen(itemsPerSlot, f, doCompress); +for (usage = usageList; usage != NULL; usage = usage->next) + { + struct bbiSummary oneSummary, *sum = NULL; + struct rbTree *rangeTree = rangeTreeForBedChrom(lf, usage->name); + struct range *range, *rangeList = rangeTreeList(rangeTree); + for (range = rangeList; range != NULL; range = range->next) + { + /* Grab values we want from range. */ + double val = ptToInt(range->val); + int start = range->start; + int end = range->end; + bits32 size = end - start; + + /* Add to total summary. */ + if (firstTime) + { + totalSum->validCount = size; + totalSum->minVal = totalSum->maxVal = val; + totalSum->sumData = val*size; + totalSum->sumSquares = val*val*size; + firstTime = FALSE; + } + else + { + totalSum->validCount += size; + if (val < totalSum->minVal) totalSum->minVal = val; + if (val > totalSum->maxVal) totalSum->maxVal = val; + totalSum->sumData += val*size; + totalSum->sumSquares += val*val*size; + } + + /* If start past existing block then output it. */ + if (sum != NULL && sum->end <= start && sum->end < usage->size) + { + bbiOutputOneSummaryFurtherReduce(sum, &twiceReducedList, doubleReductionSize, + &boundsPt, boundsEnd, lm, stream); + sum = NULL; + } + /* If don't have a summary we're working on now, make one. */ + if (sum == NULL) + { + oneSummary.chromId = usage->id; + oneSummary.start = start; + oneSummary.end = start + initialReduction; + if (oneSummary.end > usage->size) oneSummary.end = usage->size; + oneSummary.minVal = oneSummary.maxVal = val; + oneSummary.sumData = oneSummary.sumSquares = 0.0; + oneSummary.validCount = 0; + sum = &oneSummary; + } + /* Deal with case where might have to split an item between multiple summaries. This + * loop handles all but the final affected summary in that case. */ + while (end > sum->end) + { + /* Fold in bits that overlap with existing summary and output. */ + int overlap = rangeIntersection(start, end, sum->start, sum->end); + assert(overlap > 0); + sum->validCount += overlap; + if (sum->minVal > val) sum->minVal = val; + if (sum->maxVal < val) sum->maxVal = val; + sum->sumData += val * overlap; + sum->sumSquares += val*val * overlap; + bbiOutputOneSummaryFurtherReduce(sum, &twiceReducedList, doubleReductionSize, + &boundsPt, boundsEnd, lm, stream); + size -= overlap; + + /* Move summary to next part. */ + sum->start = start = sum->end; + sum->end = start + initialReduction; + if (sum->end > usage->size) sum->end = usage->size; + sum->minVal = sum->maxVal = val; + sum->sumData = sum->sumSquares = 0.0; + sum->validCount = 0; + } + + /* Add to summary. */ + sum->validCount += size; + if (sum->minVal > val) sum->minVal = val; + if (sum->maxVal < val) sum->maxVal = val; + sum->sumData += val * size; + sum->sumSquares += val*val * size; + } + if (sum != NULL) + { + bbiOutputOneSummaryFurtherReduce(sum, &twiceReducedList, doubleReductionSize, + &boundsPt, boundsEnd, lm, stream); + } + rangeTreeFree(&rangeTree); + } +bbiSumOutStreamClose(&stream); + +/* Write out 1st zoom index. */ +int indexOffset = *retIndexStart = ftell(f); +assert(boundsPt == boundsEnd); +cirTreeFileBulkIndexToOpenFile(boundsArray, sizeof(boundsArray[0]), initialReductionCount, + blockSize, itemsPerSlot, NULL, bbiBoundsArrayFetchKey, bbiBoundsArrayFetchOffset, + indexOffset, f); + +freez(&boundsArray); +slReverse(&twiceReducedList); +return twiceReducedList; +} + +void writeBlocks(struct bbiChromUsage *usageList, struct lineFile *lf, struct asObject *as, + int itemsPerSlot, struct bbiBoundsArray *bounds, + int sectionCount, boolean doCompress, FILE *f, + int resTryCount, int resScales[], int resSizes[], + struct bbExIndexMaker *eim, int bedCount, + bits16 fieldCount, int bedN, bits32 *retMaxBlockSize) +/* Read through lf, writing it in f. Save starting points of blocks (every itemsPerSlot) + * to boundsArray */ +{ +int maxBlockSize = 0; +struct bbiChromUsage *usage = usageList; +char *line, *row[fieldCount+1]; +int lastField = fieldCount-1; +int itemIx = 0, sectionIx = 0; +bits64 blockStartOffset = 0; +int startPos = 0, endPos = 0; +bits32 chromId = 0; +struct dyString *stream = dyStringNew(0); + +/* Will keep track of some things that help us determine how much to reduce. */ +bits32 resEnds[resTryCount]; +int resTry; +for (resTry = 0; resTry < resTryCount; ++resTry) + resEnds[resTry] = 0; +boolean atEnd = FALSE, sameChrom = FALSE; +bits32 start = 0, end = 0; +char *chrom = NULL; +struct bed *bed; +AllocVar(bed); + +/* Help keep track of which beds are in current chunk so as to write out + * namedChunks to eim if need be. */ +long sectionStartIx = 0, sectionEndIx = 0; + +for (;;) + { + /* Get next line of input if any. */ + if (lineFileNextReal(lf, &line)) + { + /* Chop up line and make sure the word count is right. */ + int wordCount; + wordCount = chopLine(line, row); + lineFileExpectWords(lf, fieldCount, wordCount); + loadAndValidateBedExt(row, bedN, fieldCount, lf, bed, as, FALSE, TRUE); + chrom = bed->chrom; + start = bed->chromStart; + end = bed->chromEnd; + + sameChrom = sameString(chrom, usage->name); + } + else /* No next line */ + { + atEnd = TRUE; + } + + /* Check conditions that would end block and save block info and advance to next if need be. */ + if (atEnd || !sameChrom || itemIx >= itemsPerSlot) + { + /* Save stream to file, compressing if need be. */ + if (stream->stringSize > maxBlockSize) + maxBlockSize = stream->stringSize; + if (doCompress) + { + size_t maxCompSize = zCompBufSize(stream->stringSize); + + // keep around an area of scratch memory + static int compBufSize = 0; + static char *compBuf = NULL; + // check to see if buffer needed for compression is big enough + if (compBufSize < maxCompSize) + { + // free up the old not-big-enough piece + freez(&compBuf); // freez knows bout NULL + + // get new scratch area + compBufSize = maxCompSize; + compBuf = needLargeMem(compBufSize); + } + int compSize = zCompress(stream->string, stream->stringSize, compBuf, maxCompSize); + mustWrite(f, compBuf, compSize); + } + else + mustWrite(f, stream->string, stream->stringSize); + dyStringClear(stream); + + /* Save block offset and size for all named chunks in this section. */ + if (eim != NULL) + { + bits64 blockEndOffset = ftell(f); + bbExIndexMakerAddOffsetSize(eim, blockStartOffset, blockEndOffset-blockStartOffset, + sectionStartIx, sectionEndIx); + sectionStartIx = sectionEndIx; + } + + /* Save info on existing block. */ + struct bbiBoundsArray *b = &bounds[sectionIx]; + b->offset = blockStartOffset; + b->range.chromIx = chromId; + b->range.start = startPos; + b->range.end = endPos; + ++sectionIx; + itemIx = 0; + + if (atEnd) + break; + } + + /* Advance to next chromosome if need be and get chromosome id. */ + if (!sameChrom) + { + usage = usage->next; + assert(usage != NULL); + assert(sameString(chrom, usage->name)); + for (resTry = 0; resTry < resTryCount; ++resTry) + resEnds[resTry] = 0; + } + chromId = usage->id; + + /* At start of block we save a lot of info. */ + if (itemIx == 0) + { + blockStartOffset = ftell(f); + startPos = start; + endPos = end; + } + /* Otherwise just update end. */ + { + if (endPos < end) + endPos = end; + /* No need to update startPos since list is sorted. */ + } + + /* Save name into namedOffset if need be. */ + if (eim != NULL) + { + bbExIndexMakerAddKeysFromRow(eim, row, sectionEndIx); + sectionEndIx += 1; + } + + /* Write out data. */ + dyStringWriteOne(stream, chromId); + dyStringWriteOne(stream, start); + dyStringWriteOne(stream, end); + if (fieldCount > 3) + { + int i; + /* Write 3rd through next to last field and a tab separator. */ + for (i=3; i= resEnd) + { + resSizes[resTry] += 1; + resEnds[resTry] = resEnd = start + resScales[resTry]; + } + while (end > resEnd) + { + resSizes[resTry] += 1; + resEnds[resTry] = resEnd = resEnd + resScales[resTry]; + } + } + } +assert(sectionIx == sectionCount); +freez(&bed); +*retMaxBlockSize = maxBlockSize; +} diff -Nru r-bioc-rtracklayer-1.48.0/src/bigBedHelper.h r-bioc-rtracklayer-1.50.0/src/bigBedHelper.h --- r-bioc-rtracklayer-1.48.0/src/bigBedHelper.h 1970-01-01 00:00:00.000000000 +0000 +++ r-bioc-rtracklayer-1.50.0/src/bigBedHelper.h 2020-10-27 17:24:51.000000000 +0000 @@ -0,0 +1,54 @@ +#ifndef BIGBED_HELPER_H +#define BIGBED_HELPER_H + +#include "ucsc/sig.h" +#include "ucsc/common.h" +#include "ucsc/sqlNum.h" +#include "ucsc/asParse.h" +#include "ucsc/obscure.h" +#include "ucsc/bbiFile.h" +#include "ucsc/zlibFace.h" +#include "ucsc/basicBed.h" +#include "ucsc/bPlusTree.h" +#include "ucsc/rangeTree.h" + +#include "rtracklayer.h" + +static int maxBedNameSize; + +enum IFields +{ + i_name = 3, /* Index value of name field */ + i_score = 4, /* Index value of score field */ + i_strand = 5, /* Index value of strand field */ + i_thick = 7, /* Index value of thick field */ + i_itemRgb = 8, /* Index value of itemRgb field */ + i_blocks = 11, /* Index value of blocks field */ +}; + +int getDefinedFieldCount(struct asObject *as); +bool isPresent(int definedFieldCount, int index); +bool isSelected(SEXP r_selectedindex, int position); + +void *bbNamedFileChunkVal(const void *va); +void bbNamedFileChunkKey(const void *va, char *keyBuf); +int bbNamedFileChunkCmpByName(const void *va, const void *vb); +struct rbTree *rangeTreeForBedChrom(struct lineFile *lf, char *chrom); +void bbExIndexMakerAllocChunkArrays(struct bbExIndexMaker *eim, int recordCount); +void bbExIndexMakerAddKeysFromRow(struct bbExIndexMaker *eim, char **row, int recordIx); +struct bbExIndexMaker *bbExIndexMakerNew(struct slName *extraIndexList, struct asObject *as); +void bbExIndexMakerAddOffsetSize(struct bbExIndexMaker *eim, bits64 offset, bits64 size, + long startIx, long endIx); +struct bbiSummary *bedWriteReducedOnceReturnReducedTwice(struct bbiChromUsage *usageList, + int fieldCount, struct lineFile *lf, bits32 initialReduction, bits32 initialReductionCount, + int zoomIncrement, int blockSize, int itemsPerSlot, boolean doCompress, + struct lm *lm, FILE *f, bits64 *retDataStart, bits64 *retIndexStart, + struct bbiSummaryElement *totalSum); +void writeBlocks(struct bbiChromUsage *usageList, struct lineFile *lf, struct asObject *as, + int itemsPerSlot, struct bbiBoundsArray *bounds, + int sectionCount, boolean doCompress, FILE *f, + int resTryCount, int resScales[], int resSizes[], + struct bbExIndexMaker *eim, int bedCount, + bits16 fieldCount, int bedN, bits32 *retMaxBlockSize); + +#endif diff -Nru r-bioc-rtracklayer-1.48.0/src/bigWig.c r-bioc-rtracklayer-1.50.0/src/bigWig.c --- r-bioc-rtracklayer-1.48.0/src/bigWig.c 2020-04-27 20:22:29.000000000 +0000 +++ r-bioc-rtracklayer-1.50.0/src/bigWig.c 2020-10-27 17:24:51.000000000 +0000 @@ -7,6 +7,7 @@ #include "ucsc/bwgInternal.h" #include "bigWig.h" +#include "bbiHelper.h" #include "handlers.h" static struct bwgBedGraphItem * @@ -212,23 +213,10 @@ /* --- .Call ENTRY POINT --- */ SEXP BWGFile_seqlengths(SEXP r_filename) { pushRHandlers(); + SEXP seqlengths; struct bbiFile * file = bigWigFileOpen((char *)CHAR(asChar(r_filename))); - struct bbiChromInfo *chromList = bbiChromList(file); - struct bbiChromInfo *chrom = chromList; - SEXP seqlengths, seqlengthNames; - - PROTECT(seqlengths = allocVector(INTSXP, slCount(chromList))); - seqlengthNames = allocVector(STRSXP, length(seqlengths)); - setAttrib(seqlengths, R_NamesSymbol, seqlengthNames); - - for(int i = 0; i < length(seqlengths); i++) { - INTEGER(seqlengths)[i] = chrom->size; - SET_STRING_ELT(seqlengthNames, i, mkChar(chrom->name)); - chrom = chrom->next; - } - + PROTECT(seqlengths = bbiSeqLengths(file)); bbiFileClose(&file); - bbiChromInfoFreeList(&chromList); popRHandlers(); UNPROTECT(1); return seqlengths; diff -Nru r-bioc-rtracklayer-1.48.0/src/Makevars.common r-bioc-rtracklayer-1.50.0/src/Makevars.common --- r-bioc-rtracklayer-1.48.0/src/Makevars.common 2020-04-27 20:22:29.000000000 +0000 +++ r-bioc-rtracklayer-1.50.0/src/Makevars.common 2020-10-27 17:24:51.000000000 +0000 @@ -1,9 +1,10 @@ PKG_OBJECTS = \ S4Vectors_stubs.o IRanges_stubs.o XVector_stubs.o R_init_rtracklayer.o \ - readGFF.o bigWig.o chain_io.o twoBit.o handlers.o utils.o + readGFF.o bbiHelper.o bigWig.o bigBedHelper.o bigBed.o chain_io.o twoBit.o handlers.o utils.o UCSC_OBJECTS = \ - bPlusTree.o bbiRead.o bbiWrite.o bwgCreate.o bwgQuery.o \ + memgfx.o aliType.o binRange.o htmlColor.o sqlList.o tokenizer.o asParse.o \ + basicBed.o bigBed.o bPlusTree.o bbiRead.o bbiWrite.o bwgCreate.o bwgQuery.o \ cirTree.o common.o dnaseq.o dnautil.o errAbort.o hash.o linefile.o localmem.o\ sqlNum.o zlibFace.o dystring.o hmmstats.o obscure.o pipeline.o \ rangeTree.o rbTree.o memalloc.o dlist.o udc.o net.o bits.o twoBit.o \ diff -Nru r-bioc-rtracklayer-1.48.0/src/R_init_rtracklayer.c r-bioc-rtracklayer-1.50.0/src/R_init_rtracklayer.c --- r-bioc-rtracklayer-1.48.0/src/R_init_rtracklayer.c 2020-04-27 20:22:29.000000000 +0000 +++ r-bioc-rtracklayer-1.50.0/src/R_init_rtracklayer.c 2020-10-27 17:24:51.000000000 +0000 @@ -1,6 +1,7 @@ #include "rtracklayer.h" #include "readGFF.h" #include "bigWig.h" +#include "bigBed.h" #include "twoBit.h" #include "utils.h" @@ -24,6 +25,11 @@ CALLMETHOD_DEF(BWGFile_fromWIG, 4), CALLMETHOD_DEF(R_udcCleanup, 1), CALLMETHOD_DEF(R_setUserUdcDir, 1), + /* bigBed.c */ + CALLMETHOD_DEF(BBDFile_fieldnames, 1), + CALLMETHOD_DEF(BBDFile_seqlengths, 1), + CALLMETHOD_DEF(BBDFile_query, 5), + CALLMETHOD_DEF(BBDFile_write, 6), /* twobit.c */ CALLMETHOD_DEF(DNAString_to_twoBit, 3), CALLMETHOD_DEF(TwoBits_write, 2), diff -Nru r-bioc-rtracklayer-1.48.0/src/ucsc/aliType.c r-bioc-rtracklayer-1.50.0/src/ucsc/aliType.c --- r-bioc-rtracklayer-1.48.0/src/ucsc/aliType.c 1970-01-01 00:00:00.000000000 +0000 +++ r-bioc-rtracklayer-1.50.0/src/ucsc/aliType.c 2020-10-27 17:24:51.000000000 +0000 @@ -0,0 +1,33 @@ +/* aliType - some definitions for type of alignment. */ + +/* Copyright (C) 2011 The Regents of the University of California + * See README in this or parent directory for licensing information. */ +#include "common.h" +#include "aliType.h" + + +char *gfTypeName(enum gfType type) +/* Return string representing type. */ +{ +if (type == gftDna) return "DNA"; +if (type == gftRna) return "RNA"; +if (type == gftProt) return "protein"; +if (type == gftDnaX) return "DNAX"; +if (type == gftRnaX) return "RNAX"; +internalErr(); +return NULL; +} + +enum gfType gfTypeFromName(char *name) +/* Return type from string. */ +{ +if (sameWord(name, "DNA")) return gftDna; +if (sameWord(name, "RNA")) return gftRna; +if (sameWord(name, "protein")) return gftProt; +if (sameWord(name, "prot")) return gftProt; +if (sameWord(name, "DNAX")) return gftDnaX; +if (sameWord(name, "RNAX")) return gftRnaX; +errAbort("Unknown sequence type '%s'", name); +return 0; +} + diff -Nru r-bioc-rtracklayer-1.48.0/src/ucsc/aliType.h r-bioc-rtracklayer-1.50.0/src/ucsc/aliType.h --- r-bioc-rtracklayer-1.48.0/src/ucsc/aliType.h 1970-01-01 00:00:00.000000000 +0000 +++ r-bioc-rtracklayer-1.50.0/src/ucsc/aliType.h 2020-10-27 17:24:51.000000000 +0000 @@ -0,0 +1,32 @@ +/* aliType - some definitions for type of alignment. */ + +#ifndef ALITYPE_H +#define ALITYPE_H + +enum gfType +/* Types of sequence genoFind deals with. */ + { + gftDna = 0, /* DNA (genomic) */ + gftRna = 1, /* RNA */ + gftProt = 2, /* Protein. */ + gftDnaX = 3, /* Genomic DNA translated to protein */ + gftRnaX = 4, /* RNA translated to protein */ + }; + +char *gfTypeName(enum gfType type); +/* Return string representing type. */ + +enum gfType gfTypeFromName(char *name); +/* Return type from string. */ + +enum ffStringency +/* How tight of a match is required. */ + { + ffExact = 0, /* Only an exact match will do. */ + + ffCdna = 1, /* Near exact. Tolerate long gaps in target (genomic) */ + ffTight = 2, /* Near exact. Not so tolerant of long gaps in target. */ + ffLoose = 3, /* Less exact. */ + }; + +#endif /* ALITYPE_H */ diff -Nru r-bioc-rtracklayer-1.48.0/src/ucsc/asParse.c r-bioc-rtracklayer-1.50.0/src/ucsc/asParse.c --- r-bioc-rtracklayer-1.48.0/src/ucsc/asParse.c 1970-01-01 00:00:00.000000000 +0000 +++ r-bioc-rtracklayer-1.50.0/src/ucsc/asParse.c 2020-10-27 17:24:51.000000000 +0000 @@ -0,0 +1,705 @@ +/* asParse - parse out an autoSql .as file. */ + +/* Copyright (C) 2014 The Regents of the University of California + * See README in this or parent directory for licensing information. */ + +#include "common.h" +#include "linefile.h" +#include "tokenizer.h" +#include "dystring.h" +#include "asParse.h" +#include "sqlNum.h" + + +/* n.b. switched double/float from %f to %g to partially address losing + * precision. Values like 2e-12 were being rounded to 0.0 with %f. While %g + * doesn't match the precision of the database fields, specifying a larger + * precision with %g resulted in numbers like 1.9999999999999999597733e-12, + * which might impact load time. This issue needs more investigation.*/ +struct asTypeInfo asTypes[] = { + {t_double, "double", FALSE, FALSE, "double", "double", "Double", "Double", "%g", "FloatField"}, + {t_float, "float", FALSE, FALSE, "float", "float", "Float", "Float", "%g", "FloatField"}, + {t_char, "char", FALSE, FALSE, "char", "char", "Char", "Char", "%c", "CharField"}, + {t_int, "int", FALSE, FALSE, "int", "int", "Signed", "Signed", "%d", "IntegerField"}, + {t_uint, "uint", TRUE, FALSE, "int unsigned", "unsigned", "Unsigned", "Unsigned", "%u", "PositiveIntegerField"}, + {t_short, "short", FALSE, FALSE, "smallint", "short", "Short", "Signed", "%d", "SmallIntegerField"}, + {t_ushort, "ushort", TRUE, FALSE, "smallint unsigned", "unsigned short","Ushort", "Unsigned", "%u", "SmallPositiveIntegerField"}, + {t_byte, "byte", FALSE, FALSE, "tinyint", "signed char", "Byte", "Signed", "%d", "SmallIntegerField"}, + {t_ubyte, "ubyte", TRUE, FALSE, "tinyint unsigned", "unsigned char", "Ubyte", "Unsigned", "%u", "SmallPositiveIntegerField"}, + {t_off, "bigint", FALSE, FALSE, "bigint", "long long", "LongLong", "LongLong", "%lld", "BigIntegerField"}, + {t_string, "string", FALSE, TRUE, "varchar(255)", "char *", "String", "String", "%s", "CharField"}, + {t_lstring, "lstring", FALSE, TRUE, "longblob", "char *", "String", "String", "%s", "TextField"}, + {t_enum, "enum", FALSE, FALSE, "enum", "!error!", "Enum", "Enum", NULL, "CharField"}, + {t_set, "set", FALSE, FALSE, "set", "unsigned", "Set", "Set", NULL, NULL}, + {t_object, "object", FALSE, FALSE, "longblob", "!error!", "Object", "Object", NULL, "TextField"}, + {t_object, "table", FALSE, FALSE, "longblob", "!error!", "Object", "Object", NULL, "TextField"}, + {t_simple, "simple", FALSE, FALSE, "longblob", "!error!", "Simple", "Simple", NULL, "TextField"}, +}; + +struct asTypeInfo *asTypeFindLow(char *name) +/* Return asType for a low level type of given name. (Low level because may be decorated + * with array or pointer stuff at a higher level). Returns NULL if not found. */ +{ +int i; +for (i=0; istring); +if (type == NULL) + tokenizerErrAbort(tkz, "Unknown type '%s'", tkz->string); +return type; +} + +static void sqlSymDef(struct asColumn *col, struct dyString *dy) +/* print symbolic column definition for sql */ +{ +dyStringPrintf(dy, "%s(", col->lowType->sqlName); +struct slName *val; +for (val = col->values; val != NULL; val = val->next) + { + dyStringPrintf(dy, "\"%s\"", val->name); + if (val->next != NULL) + dyStringAppend(dy, ", "); + } +dyStringPrintf(dy, ")"); +} + +struct dyString *asColumnToSqlType(struct asColumn *col) +/* Convert column to a sql type spec in returned dyString */ +{ +struct asTypeInfo *lt = col->lowType; +struct dyString *type = dyStringNew(32); +if ((lt->type == t_enum) || (lt->type == t_set)) + sqlSymDef(col, type); +else if (col->isList || col->isArray) + dyStringPrintf(type, "longblob"); +else if (lt->type == t_char) + dyStringPrintf(type, "char(%d)", col->fixedSize ? col->fixedSize : 1); +else + dyStringPrintf(type, "%s", lt->sqlName); +return type; +} + +char *asTypeNameFromSqlType(char *sqlType) +/* Return the autoSql type name (not enum) for the given SQL type, or NULL. + * Don't attempt to free result. */ +// Unfortunately, when sqlType is longblob, we don't know whether it's a list +// of some type or an lstring. :( +{ +if (sqlType == NULL) + return NULL; +// For comparison with asTypes[*], we need to strip '(...)' strings from all types +// except 'varchar' which must be 'varchar(255)'. For 'char', we need to remember +// what was in the '(...)' so we can add back the '[...]' after type comparison. +boolean isArray = FALSE; +int arraySize = 0; +static char buf[1024]; +if (startsWith("varchar", sqlType)) + safecpy(buf, sizeof(buf), "varchar(255)"); +else if (sameString("blob", sqlType)) + safecpy(buf, sizeof(buf), "longblob"); +else + { + safecpy(buf, sizeof(buf), sqlType); + char *leftParen = strstr(buf, " ("); + if (leftParen == NULL) + leftParen = strchr(buf, '('); + if (leftParen != NULL) + { + isArray = startsWith("char", sqlType); + char *rightParen = strrchr(leftParen, ')'); + if (rightParen != NULL) + { + *rightParen = '\0'; + arraySize = atoi(leftParen+1); + strcpy(leftParen, rightParen+1); + } + else + errAbort("asTypeNameFromSqlType: mismatched ( in sql type def'%s'", sqlType); + } + } +int i; +for (i = 0; i < ArraySize(asTypes); i++) + if (sameString(buf, asTypes[i].sqlName)) + { + if (isArray) + { + int typeLen = strlen(buf); + safef(buf+typeLen, sizeof(buf)-typeLen, "[%d]", arraySize); + return buf; + } + else + return asTypes[i].name; + } +if (sameString(buf, "date")) + return "string"; +return NULL; +} + +static struct asColumn *findColumn(struct asObject *table, char *colName) +/* Return column or null. */ +{ +struct asColumn *col; + +for (col = table->columnList; col != NULL; col = col->next) + { + if (sameWord(col->name, colName)) + return col; + } +return NULL; +} + +static void mustNotFindColumn(struct asObject *table, char *colName) +/* Die if column found. */ +{ +struct asColumn *col = findColumn(table, colName); +if (col) + errAbort("duplicate column names found: %s, %s", col->name, colName); +} + + +static struct asColumn *mustFindColumn(struct asObject *table, char *colName) +/* Return column or die. */ +{ +struct asColumn *col = findColumn(table, colName); +if (!col) + errAbort("Couldn't find column %s", colName); +return col; +} + +static struct asObject *findObType(struct asObject *objList, char *obName) +/* Find object with given name. */ +{ +struct asObject *obj; +for (obj = objList; obj != NULL; obj = obj->next) + { + if (sameWord(obj->name, obName)) + return obj; + } +return NULL; +} + +static void asParseColArraySpec(struct tokenizer *tkz, struct asObject *obj, + struct asColumn *col) +/* parse the array length specification for a column */ +{ +if (col->lowType->type == t_simple) + col->isArray = TRUE; +else + col->isList = TRUE; +tokenizerMustHaveNext(tkz); +if (isdigit(tkz->string[0])) + { + col->fixedSize = atoi(tkz->string); + tokenizerMustHaveNext(tkz); + } +else if (isalpha(tkz->string[0])) + { +#ifdef OLD + if (obj->isSimple) + tokenizerErrAbort(tkz, "simple objects can't include variable length arrays\n"); +#endif /* OLD */ + col->linkedSizeName = cloneString(tkz->string); + col->linkedSize = mustFindColumn(obj, col->linkedSizeName); + col->linkedSize->isSizeLink = TRUE; + tokenizerMustHaveNext(tkz); + } +else + tokenizerErrAbort(tkz, "must have column name or integer inside []'s\n"); +tokenizerMustMatch(tkz, "]"); +} + +static void asParseColSymSpec(struct tokenizer *tkz, struct asObject *obj, + struct asColumn *col) +/* parse the enum or set symbolic values for a column */ +{ +tokenizerMustHaveNext(tkz); +while (tkz->string[0] != ')') + { + slSafeAddHead(&col->values, slNameNew(tkz->string)); + /* look for `,' or `)', but allow `,' after last token */ + tokenizerMustHaveNext(tkz); + if (!((tkz->string[0] == ',') || (tkz->string[0] == ')'))) + tokenizerErrAbort(tkz, "expected `,' or `)' got `%s'", tkz->string); + if (tkz->string[0] != ')') + tokenizerMustHaveNext(tkz); + } +tokenizerMustMatch(tkz, ")"); +slReverse(&col->values); +} + +int tokenizerUnsignedVal(struct tokenizer *tkz) +/* Ensure current token is an unsigned integer and return value */ +{ +if (!isdigit(tkz->string[0])) + { + struct lineFile *lf = tkz->lf; + errAbort("expecting number got %s line %d of %s", tkz->string, lf->lineIx, lf->fileName); + } +return sqlUnsigned(tkz->string); +} + +struct asIndex *asParseIndex(struct tokenizer *tkz, struct asColumn *col) +/* See if there's an index key word and if so parse it and return an asIndex + * based on it. If not an index key word then just return NULL. */ +{ +struct asIndex *index = NULL; +if (sameString(tkz->string, "primary") || sameString(tkz->string, "unique") + || sameString(tkz->string, "index") ) + { + AllocVar(index); + index->type = cloneString(tkz->string); + tokenizerMustHaveNext(tkz); + if (tkz->string[0] == '[') + { + tokenizerMustHaveNext(tkz); + index->size = tokenizerUnsignedVal(tkz); + tokenizerMustHaveNext(tkz); + tokenizerMustMatch(tkz, "]"); + } + } +return index; +} + +static void asParseColDef(struct tokenizer *tkz, struct asObject *obj) +/* Parse a column definition */ +{ +struct asColumn *col; +AllocVar(col); + +col->lowType = findLowType(tkz); +tokenizerMustHaveNext(tkz); + +if (col->lowType->type == t_object || col->lowType->type == t_simple) + { + col->obName = cloneString(tkz->string); + tokenizerMustHaveNext(tkz); + } + +if (tkz->string[0] == '[') + asParseColArraySpec(tkz, obj, col); +else if (tkz->string[0] == '(') + asParseColSymSpec(tkz, obj, col); + +col->name = cloneString(tkz->string); +mustNotFindColumn(obj, col->name); // check for duplicate column name +tokenizerMustHaveNext(tkz); +col->index = asParseIndex(tkz, col); +if (sameString(tkz->string, "auto")) + { + col->autoIncrement = TRUE; + if (!asTypesIsInt(col->lowType->type)) + errAbort("error - auto with non-integer type for field %s", col->name); + tokenizerMustHaveNext(tkz); + } +tokenizerMustMatch(tkz, ";"); +col->comment = cloneString(tkz->string); +tokenizerMustHaveNext(tkz); +if (col->lowType->type == t_char && col->fixedSize != 0) + col->isList = FALSE; /* It's not really a list... */ +slAddHead(&obj->columnList, col); +} + +static struct asObject *asParseTableDef(struct tokenizer *tkz) +/* Parse a table or object definintion */ +{ +struct asObject *obj; +AllocVar(obj); +if (sameWord(tkz->string, "table")) + obj->isTable = TRUE; +else if (sameWord(tkz->string, "simple")) + obj->isSimple = TRUE; +else if (sameWord(tkz->string, "object")) + ; +else + tokenizerErrAbort(tkz, "Expecting 'table' or 'object' got '%s'", tkz->string); +tokenizerMustHaveNext(tkz); +obj->name = cloneString(tkz->string); +tokenizerMustHaveNext(tkz); +obj->comment = cloneString(tkz->string); + +/* parse columns */ +tokenizerMustHaveNext(tkz); +tokenizerMustMatch(tkz, "("); +while (tkz->string[0] != ')') + asParseColDef(tkz, obj); +slReverse(&obj->columnList); +return obj; +} + +static void asLinkEmbeddedObjects(struct asObject *obj, struct asObject *objList) +/* Look up any embedded objects. */ +{ +struct asColumn *col; +for (col = obj->columnList; col != NULL; col = col->next) + { + if (col->obName != NULL) + { + if ((col->obType = findObType(objList, col->obName)) == NULL) + errAbort("%s used but not defined", col->obName); + if (obj->isSimple) + { + if (!col->obType->isSimple) + errAbort("Simple object %s with embedded non-simple object %s", + obj->name, col->name); + } + } + } +} + +static struct asObject *asParseTokens(struct tokenizer *tkz) +/* Parse file into a list of objects. */ +{ +struct asObject *objList = NULL; +struct asObject *obj; + +while (tokenizerNext(tkz)) + { + obj = asParseTableDef(tkz); + if (findObType(objList, obj->name)) + tokenizerErrAbort(tkz, "Duplicate definition of %s", obj->name); + slAddTail(&objList, obj); + } + +for (obj = objList; obj != NULL; obj = obj->next) + asLinkEmbeddedObjects(obj, objList); + +return objList; +} + +char *asTypesIntSizeDescription(enum asTypes type) +/* Return description of integer size. Do not free. */ +{ +int size = asTypesIntSize(type); +switch (size) + { + case 1: + return "byte"; + case 2: + return "short integer"; + case 4: + return "integer"; + case 8: + return "long long integer"; + default: + errAbort("Unexpected error in asTypesIntSizeDescription: expecting integer type size of 1, 2, 4, or 8. Got %d.", size); + return NULL; // happy compiler, never gets here + + } +} + +int asTypesIntSize(enum asTypes type) +/* Return size in bytes of any integer type - short, long, unsigned, etc. */ +{ +switch (type) + { + case t_int: + case t_uint: + return 4; + case t_short: + case t_ushort: + return 2; + case t_byte: + case t_ubyte: + return 1; + case t_off: + return 8; + default: + errAbort("Unexpected error in asTypesIntSize: expecting integer type. Got %d.", type); + return 0; // happy compiler, never gets here + } +} + +boolean asTypesIsUnsigned(enum asTypes type) +/* Return TRUE if it's any integer type - short, long, unsigned, etc. */ +{ +switch (type) + { + case t_uint: + case t_ushort: + case t_ubyte: + return TRUE; + default: + return FALSE; + } +} + +boolean asTypesIsInt(enum asTypes type) +/* Return TRUE if it's any integer type - short, long, unsigned, etc. */ +{ +switch (type) + { + case t_int: + case t_uint: + case t_short: + case t_ushort: + case t_byte: + case t_ubyte: + case t_off: + return TRUE; + default: + return FALSE; + } +} + +boolean asTypesIsFloating(enum asTypes type) +/* Return TRUE if it's any floating point type - float or double. */ +{ +switch (type) + { + case t_float: + case t_double: + return TRUE; + default: + return FALSE; + } +} + +static struct asObject *asParseLineFile(struct lineFile *lf) +/* Parse open line file. Closes lf as a side effect. */ +{ +struct tokenizer *tkz = tokenizerOnLineFile(lf); +tkz->uncommentShell = TRUE; /* Take out # style comments. */ +struct asObject *objList = asParseTokens(tkz); +tokenizerFree(&tkz); +return objList; +} + + +void asColumnFree(struct asColumn **pAs) +/* free a single asColumn */ +{ +struct asColumn *as = *pAs; +if (as != NULL) + { + freeMem(as->name); + freeMem(as->comment); + freez(pAs); + } +} + + +void asColumnFreeList(struct asColumn **pList) +/* free a list of asColumn */ +{ +struct asColumn *el, *next; + +for (el = *pList; el != NULL; el = next) + { + next = el->next; + asColumnFree(&el); + } +*pList = NULL; +} + +void asObjectFree(struct asObject **pAs) +/* free a single asObject */ +{ +struct asObject *as = *pAs; +if (as != NULL) + { + freeMem(as->name); + freeMem(as->comment); + asColumnFreeList(&as->columnList); + freez(pAs); + } +} + + +void asObjectFreeList(struct asObject **pList) +/* free a list of asObject */ +{ +struct asObject *el, *next; + +for (el = *pList; el != NULL; el = next) + { + next = el->next; + asObjectFree(&el); + } +*pList = NULL; +} + +struct asObject *asParseFile(char *fileName) +/* Parse autoSql .as file. */ +{ +return asParseLineFile(lineFileOpen(fileName, TRUE)); +} + + +struct asObject *asParseText(char *text) +/* Parse autoSql from text (as opposed to file). */ +{ +char *dupe = cloneString(text); +struct lineFile *lf = lineFileOnString("text", TRUE, dupe); +struct asObject *objList = asParseLineFile(lf); +freez(&dupe); +return objList; +} + +struct asColumn *asColumnFind(struct asObject *asObj, char *name) +// Return named column. +{ +struct asColumn *asCol = NULL; +if (asObj!= NULL) + { + for (asCol = asObj->columnList; asCol != NULL; asCol = asCol->next) + if (sameString(asCol->name, name)) + break; + } +return asCol; +} + +int asColumnFindIx(struct asColumn *list, char *name) +/* Return index of first element of asColumn list that matches name. + * Return -1 if not found. */ +{ +struct asColumn *ac; +int ix = 0; +for (ac = list; ac != NULL; ac = ac->next, ix++) + if (sameString(name, ac->name)) + return ix; +return -1; +} + +int asColumnMustFindIx(struct asColumn *list, char *name) +/* Return index of first element of asColumn list that matches name. + * errAbort if not found. */ +{ +int ix = asColumnFindIx(list, name); +if (ix < 0) + errAbort("asColumnMustFindIx: cannot find column \"%s\" in list", name); +return ix; +} + +boolean asCompareObjs(char *name1, struct asObject *as1, char *name2, struct asObject *as2, int numColumnsToCheck, + int *retNumColumnsSame, boolean abortOnDifference) +/* Compare as-objects as1 and as2 making sure several important fields show they are the same name and type. + * If difference found, print it to stderr. If abortOnDifference, errAbort. + * Othewise, return TRUE if the objects columns match through the first numColumnsToCheck fields. + * If retNumColumnsSame is not NULL, then it will be set to the number of contiguous matching columns. */ +{ +boolean differencesFound = FALSE; +struct asColumn *col1 = as1->columnList, *col2 = as2->columnList; +int checkCount = 0; +int verboseLevel = 2; +if (abortOnDifference) + verboseLevel = 1; +if (as1->isTable != as2->isTable) + { + verbose(verboseLevel,"isTable does not match: %s=[%d] %s=[%d]", name1, as1->isTable, name2, as2->isTable); + differencesFound = TRUE; + } +else if (as1->isSimple != as2->isSimple) + { + verbose(verboseLevel,"isSimple does not match: %s=[%d] %s=[%d]", name1, as1->isSimple, name2, as2->isSimple); + differencesFound = TRUE; + } +else + { + if (!as1->isTable) + { + errAbort("asCompareObjLists only supports Table .as objects at this time."); + } + for (col1 = as1->columnList, col2 = as2->columnList; + col1 != NULL && col2 != NULL && checkCount < numColumnsToCheck; + col1 = col1->next, col2 = col2->next, ++checkCount) + { + // allow reserved fields to become used + if (!(sameOk(col1->name, col2->name) || sameOk(col1->name, "reserved") || sameOk("reserved", col2->name))) + { + verbose(verboseLevel,"column #%d names do not match: %s=[%s] %s=[%s]\n" + , checkCount+1, name1, col1->name, name2, col2->name); + differencesFound = TRUE; + break; + } + else if (col1->isSizeLink != col2->isSizeLink) + { + verbose(verboseLevel,"column #%d isSizeLink do not match: %s=[%d] %s=[%d]\n" + , checkCount+1, name1, col1->isSizeLink, name2, col2->isSizeLink); + differencesFound = TRUE; + break; + } + else if (col1->isList != col2->isList) + { + verbose(verboseLevel,"column #%d isList do not match: %s=[%d] %s=[%d]\n" + , checkCount+1, name1, col1->isList, name2, col2->isList); + differencesFound = TRUE; + break; + } + else if (col1->isArray != col2->isArray) + { + verbose(verboseLevel,"column #%d isArray do not match: %s=[%d] %s=[%d]\n" + , checkCount+1, name1, col1->isArray, name2, col2->isArray); + differencesFound = TRUE; + break; + } + else if (!sameOk(col1->lowType->name, col2->lowType->name)) + { + verbose(verboseLevel,"column #%d type names do not match: %s=[%s] %s=[%s]\n" + , checkCount+1, name1, col1->lowType->name, name2, col2->lowType->name); + differencesFound = TRUE; + break; + } + else if (col1->fixedSize != col2->fixedSize) + { + verbose(verboseLevel,"column #%d fixedSize do not match: %s=[%d] %s=[%d]\n" + , checkCount+1, name1, col1->fixedSize, name2, col2->fixedSize); + differencesFound = TRUE; + break; + } + else if (!sameOk(col1->linkedSizeName, col2->linkedSizeName)) + { + verbose(verboseLevel,"column #%d linkedSizeName do not match: %s=[%s] %s=[%s]\n" + , checkCount+1, name1, col1->linkedSizeName, name2, col2->linkedSizeName); + differencesFound = TRUE; + break; + } + } + if (!differencesFound && checkCount < numColumnsToCheck) + errAbort("Unexpected error in asCompareObjLists: asked to compare %d columns in %s and %s, but only found %d in one or both asObjects." + , numColumnsToCheck, name1, name2, checkCount); + } +if (differencesFound) + { + if (abortOnDifference) + errAbort("asObjects differ."); + else + verbose(verboseLevel,"asObjects differ. Matching field count=%d\n", checkCount); + } +if (retNumColumnsSame) + *retNumColumnsSame = checkCount; +return (!differencesFound); +} + +boolean asColumnNamesMatchFirstN(struct asObject *as1, struct asObject *as2, int n) +/* Compare only the column names of as1 and as2, not types because if an asObj has been + * created from sql type info, longblobs are cast to lstrings but in the proper autoSql + * might be lists instead (e.g. longblob in sql, uint exonStarts[exonCount] in autoSql. */ +{ +struct asColumn *col1 = as1->columnList, *col2 = as2->columnList; +int checkCount = 0; +for (col1 = as1->columnList, col2 = as2->columnList; + col1 != NULL && col2 != NULL && checkCount < n; + col1 = col1->next, col2 = col2->next, ++checkCount) + { + char *name1 = col1->name; + char *name2 = col2->name; + // Ignore initial _ -- sometimes added to bigBed field names to suppress hgc display. + if (name1 && name1[0] == '_') + name1++; + if (name2 && name2[0] == '_') + name2++; + if (!sameOk(name1, name2)) + return FALSE; + } +return TRUE; +} diff -Nru r-bioc-rtracklayer-1.48.0/src/ucsc/asParse.h r-bioc-rtracklayer-1.50.0/src/ucsc/asParse.h --- r-bioc-rtracklayer-1.48.0/src/ucsc/asParse.h 1970-01-01 00:00:00.000000000 +0000 +++ r-bioc-rtracklayer-1.50.0/src/ucsc/asParse.h 2020-10-27 17:24:51.000000000 +0000 @@ -0,0 +1,155 @@ +/* asParse - parse out an autoSql .as file. */ + +#ifndef ASPARSE_H +#define ASPARSE_H + +enum asTypes +/* Different low level types (not including lists and objects) */ + { + t_double, /* double precision floating point. */ + t_float, /* single precision floating point. */ + t_char, /* character or fixed size character array. */ + t_int, /* signed 32 bit integer */ + t_uint, /* unsigned 32 bit integer */ + t_short, /* signed 16 bit integer */ + t_ushort, /* unsigned 16 bit integer */ + t_byte, /* signed 8 bit integer */ + t_ubyte, /* unsigned 8 bit integer */ + t_off, /* 64 bit integer. */ + t_string, /* varchar/char * (variable size string up to 255 chars) */ + t_lstring, /* variable sized large string. */ + t_object, /* composite object - object/table - forms lists. */ + t_simple, /* simple composite object - forms arrays. */ + t_enum, /* enumerated symbolic values */ + t_set, /* set of symbolic values */ + }; + +char *asTypesIntSizeDescription(enum asTypes type); +/* Return description of integer size. Do not free. */ + +int asTypesIntSize(enum asTypes type); +/* Return size in bytes of any integer type - short, long, unsigned, etc. */ + +boolean asTypesIsUnsigned(enum asTypes type); +/* Return TRUE if it's any integer type - short, long, unsigned, etc. */ + +boolean asTypesIsInt(enum asTypes type); +/* Return TRUE if it's any integer type - short, long, unsigned, etc. */ + +boolean asTypesIsFloating(enum asTypes type); +/* Return TRUE if it's any floating point type - float or double. */ + +struct asTypeInfo + { + enum asTypes type; /* Numeric ID of low level type. */ + char *name; /* Text ID of low level type. */ + bool isUnsigned; /* True if an unsigned int of some type. */ + bool stringy; /* True if a string or blob. */ + char *sqlName; /* SQL type name. */ + char *cName; /* C type name. */ + char *listyName; /* What functions that load a list are called. */ + char *nummyName; /* What functions that load a number are called. */ + char *outFormat; /* Output format for printf. %d, %u, etc. */ + char *djangoName; /* Django type name */ + }; + +struct asTypeInfo *asTypeFindLow(char *name); +/* Return asType for a low level type of given name. (Low level because may be decorated + * with array or pointer stuff at a higher level). Returns NULL if not found. */ + +struct asIndex +/* Information about an index */ + { + struct asIndex *next; /* In case it needs to be on a list. */ + char *type; /* 'primary' 'index' or 'uniq' to pass to SQL */ + int size; /* If nonzero only index prefix of this many chars. */ + }; + +struct asColumn +/* Info on one column/field */ + { + struct asColumn *next; /* Next column. */ + char *name; /* Column name. */ + char *comment; /* Comment string on column. */ + struct asTypeInfo *lowType; /* Root type info. */ + char *obName; /* Name of object or table. */ + struct asObject *obType; /* Name of composite object. */ + int fixedSize; /* 0 if not fixed size, otherwise size of list. */ + char *linkedSizeName; /* Points to variable that holds size of list. */ + struct asColumn *linkedSize; /* Column for linked size. */ + bool isSizeLink; /* Flag to tell if have read link. */ + bool isList; /* TRUE if a list. */ + bool isArray; /* TRUE if an array. */ + bool autoIncrement; /* TRUE if we want to auto_increment this field. */ + struct slName *values; /* values for symbolic types */ + struct asIndex *index; /* Possibly null index description. */ + }; + +struct asObject +/* Info on whole asObject. */ + { + struct asObject *next; + char *name; /* Name of object. */ + char *comment; /* Comment describing object. */ + struct asColumn *columnList; /* List of columns. */ + bool isTable; /* True if a table. */ + bool isSimple; /* True if a simple object. */ + }; + +struct dyString *asColumnToSqlType(struct asColumn *col); +/* Convert column to a sql type spec in returned dyString */ + +char *asTypeNameFromSqlType(char *sqlType); +/* Return the autoSql type name (not enum) for the given SQL type, or NULL. + * Don't attempt to free result. */ + +struct asObject *asParseFile(char *fileName); +/* Parse autoSql .as file. */ + +struct asObject *asParseText(char *text); +/* Parse autoSql from text (as opposed to file). */ + +void asObjectFree(struct asObject **as); +/* free a single asObject */ + +void asObjectFreeList(struct asObject **as); +/* free a list of asObject */ + +void asColumnFree(struct asColumn **as); +/* free a single asColumn */ + +void asColumnFreeList(struct asColumn **as); +/* free a list of asColumn */ + +struct asColumn *asColumnFind(struct asObject *as, char *name); +/* Return column of given name from object, or NULL if not found. */ + +int asColumnFindIx(struct asColumn *list, char *name); +/* Return index of first element of asColumn list that matches name. + * Return -1 if not found. */ + +int asColumnMustFindIx(struct asColumn *list, char *name); +/* Return index of first element of asColumn list that matches name. + * errAbort if not found. */ + +boolean asCompareObjs(char *name1, struct asObject *as1, char *name2, struct asObject *as2, int numColumnsToCheck, + int *retNumColumnsSame, boolean abortOnDifference); +/* Compare as-objects as1 and as2 making sure several important fields show they are the same name and type. + * If difference found, print it to stderr. If abortOnDifference, errAbort. + * Othewise, return TRUE if the objects columns match through the first numColumnsToCheck fields. + * If retNumColumnsSame is not NULL, then it will be set to the number of contiguous matching columns. */ + +INLINE boolean asObjectsMatch(struct asObject *as1, struct asObject *as2) +{ +int colCount = slCount(as1->columnList); +if (slCount(as2->columnList) != colCount) + return FALSE; +return asCompareObjs(as1->name, as1, as2->name, as2, colCount, NULL, FALSE); +} + +boolean asColumnNamesMatchFirstN(struct asObject *as1, struct asObject *as2, int n); +/* Compare only the column names of as1 and as2, not types because if an asObj has been + * created from sql type info, longblobs are cast to lstrings but in the proper autoSql + * might be lists instead (e.g. longblob in sql, uint exonStarts[exonCount] in autoSql. */ + +#endif /* ASPARSE_H */ diff -Nru r-bioc-rtracklayer-1.48.0/src/ucsc/basicBed.c r-bioc-rtracklayer-1.50.0/src/ucsc/basicBed.c --- r-bioc-rtracklayer-1.48.0/src/ucsc/basicBed.c 1970-01-01 00:00:00.000000000 +0000 +++ r-bioc-rtracklayer-1.50.0/src/ucsc/basicBed.c 2020-10-27 17:24:51.000000000 +0000 @@ -0,0 +1,1767 @@ +/* basicBed contains the basic code for Browser Extensible Data (bed) files and tables. + * The idea behind bed is that the first three fields are defined and required. + * A total of 15 fields are defined, and the file can contain any number of these. + * In addition after any number of defined fields there can be custom fields that + * are not defined in the bed spec. + * + * There's additional bed-related code in src/hg/inc/bed.h. This module contains the + * stuff that's independent of the database and other genomic structures. */ + +/* Copyright (C) 2014 The Regents of the University of California + * See README in this or parent directory for licensing information. */ + + +#include "common.h" +#include "hash.h" +#include "linefile.h" +#include "dystring.h" +#include "sqlNum.h" +#include "sqlList.h" +#include "rangeTree.h" +#include "binRange.h" +#include "asParse.h" +#include "htmlColor.h" +#include "basicBed.h" +#include "memgfx.h" +#include "localmem.h" + + +void bedStaticLoad(char **row, struct bed *ret) +/* Load a row from bed table into ret. The contents of ret will + * be replaced at the next call to this function. */ +{ +ret->chrom = row[0]; +ret->chromStart = sqlUnsigned(row[1]); +ret->chromEnd = sqlUnsigned(row[2]); +ret->name = row[3]; +} + +struct bed *bedLoad(char **row) +/* Load a bed from row fetched with select * from bed + * from database. Dispose of this with bedFree(). */ +{ +struct bed *ret; +AllocVar(ret); +ret->chrom = cloneString(row[0]); +ret->chromStart = sqlUnsigned(row[1]); +ret->chromEnd = sqlUnsigned(row[2]); +ret->name = cloneString(row[3]); +return ret; +} + +struct bed *bedCommaIn(char **pS, struct bed *ret) +/* Create a bed out of a comma separated string. + * This will fill in ret if non-null, otherwise will + * return a new bed */ +{ +char *s = *pS; + +if (ret == NULL) + AllocVar(ret); +ret->chrom = sqlStringComma(&s); +ret->chromStart = sqlUnsignedComma(&s); +ret->chromEnd = sqlUnsignedComma(&s); +ret->name = sqlStringComma(&s); +*pS = s; +return ret; +} + +void bedFree(struct bed **pEl) +/* Free a single dynamically allocated bed such as created + * with bedLoad(). */ +{ +struct bed *el; + +if ((el = *pEl) == NULL) return; +freeMem(el->chrom); +freeMem(el->name); +freeMem(el->blockSizes); +freeMem(el->chromStarts); +freeMem(el->expIds); +freeMem(el->expScores); +freez(pEl); +} + +void bedFreeList(struct bed **pList) +/* Free a list of dynamically allocated bed's */ +{ +struct bed *el, *next; + +for (el = *pList; el != NULL; el = next) + { + next = el->next; + bedFree(&el); + } +*pList = NULL; +} + +void bedOutput(struct bed *el, FILE *f, char sep, char lastSep) +/* Print out bed. Separate fields with sep. Follow last field with lastSep. */ +{ +if (sep == ',') fputc('"',f); +fprintf(f, "%s", el->chrom); +if (sep == ',') fputc('"',f); +fputc(sep,f); +fprintf(f, "%u", el->chromStart); +fputc(sep,f); +fprintf(f, "%u", el->chromEnd); +fputc(sep,f); +if (sep == ',') fputc('"',f); +fprintf(f, "%s", el->name); +if (sep == ',') fputc('"',f); +fputc(lastSep,f); +} + +/* --------------- End of AutoSQL generated code. --------------- */ + +int bedCmp(const void *va, const void *vb) +/* Compare to sort based on chrom,chromStart. */ +{ +const struct bed *a = *((struct bed **)va); +const struct bed *b = *((struct bed **)vb); +int dif; +dif = strcmp(a->chrom, b->chrom); +if (dif == 0) + dif = a->chromStart - b->chromStart; +return dif; +} + +int bedCmpEnd(const void *va, const void *vb) +/* Compare to sort based on chrom,chromEnd. */ +{ +const struct bed *a = *((struct bed **)va); +const struct bed *b = *((struct bed **)vb); +int dif; +dif = strcmp(a->chrom, b->chrom); +if (dif == 0) + dif = a->chromEnd - b->chromEnd; +return dif; +} + +int bedCmpScore(const void *va, const void *vb) +/* Compare to sort based on score - lowest first. */ +{ +const struct bed *a = *((struct bed **)va); +const struct bed *b = *((struct bed **)vb); +return a->score - b->score; +} + +int bedCmpPlusScore(const void *va, const void *vb) +/* Compare to sort based on chrom,chromStart. */ +{ +const struct bed *a = *((struct bed **)va); +const struct bed *b = *((struct bed **)vb); +int dif; +dif = strcmp(a->chrom, b->chrom); +if (dif == 0) + { + dif = (a->chromStart - b->chromStart) * 1000 +(a->score - b->score); + } +return dif; +} + +int bedCmpSize(const void *va, const void *vb) +/* Compare to sort based on size of element (end-start == size) */ +{ +const struct bed *a = *((struct bed **)va); +const struct bed *b = *((struct bed **)vb); +int a_size = a->chromEnd - a->chromStart; +int b_size = b->chromEnd - b->chromStart; +return (a_size - b_size); +} + +int bedCmpChromStrandStartName(const void *va, const void *vb) +/* Compare to sort based on chrom,strand,chromStart. */ +{ +const struct bed *a = *((struct bed **)va); +const struct bed *b = *((struct bed **)vb); +int dif; + +dif = strcmp(a->name, b->name); +if (dif == 0) + dif = strcmp(a->chrom, b->chrom); +if (dif == 0) + dif = strcmp(a->strand, b->strand); +if (dif == 0) + dif = a->chromStart - b->chromStart; +return dif; +} + +int bedCmpChromStrandStart(const void *va, const void *vb) +/* Compare to sort based on chrom,strand,chromStart. */ +{ +const struct bed *a = *((struct bed **)va); +const struct bed *b = *((struct bed **)vb); +int dif; +dif = strcmp(a->chrom, b->chrom); +if (dif == 0) + dif = strcmp(a->strand, b->strand); +if (dif == 0) + dif = a->chromStart - b->chromStart; +return dif; +} + +struct bedLine *bedLineNew(char *line) +/* Create a new bedLine based on tab-separated string s. */ +{ +struct bedLine *bl; +char *s, c; + +AllocVar(bl); +bl->chrom = cloneString(line); +s = strchr(bl->chrom, '\t'); +if (s == NULL) + errAbort("Expecting tab in bed line %s", line); +*s++ = 0; +c = *s; +if (isdigit(c) || (c == '-' && isdigit(s[1]))) + { + bl->chromStart = atoi(s); + bl->line = s; + return bl; + } +else + { + errAbort("Expecting start position in second field of %s", line); + return NULL; + } +} + +void bedLineFree(struct bedLine **pBl) +/* Free up memory associated with bedLine. */ +{ +struct bedLine *bl; + +if ((bl = *pBl) != NULL) + { + freeMem(bl->chrom); + freez(pBl); + } +} + +void bedLineFreeList(struct bedLine **pList) +/* Free a list of dynamically allocated bedLine's */ +{ +struct bedLine *el, *next; + +for (el = *pList; el != NULL; el = next) + { + next = el->next; + bedLineFree(&el); + } +*pList = NULL; +} + + +int bedLineCmp(const void *va, const void *vb) +/* Compare to sort based on query. */ +{ +const struct bedLine *a = *((struct bedLine **)va); +const struct bedLine *b = *((struct bedLine **)vb); +int dif; +dif = strcmp(a->chrom, b->chrom); +if (dif == 0) + dif = a->chromStart - b->chromStart; +return dif; +} + + +void bedSortFile(char *inFile, char *outFile) +/* Sort a bed file (in place, overwrites old file. */ +{ +struct lineFile *lf = NULL; +FILE *f = NULL; +struct bedLine *blList = NULL, *bl; +char *line; +int lineSize; + +verbose(2, "Reading %s\n", inFile); +lf = lineFileOpen(inFile, TRUE); +while (lineFileNext(lf, &line, &lineSize)) + { + if (line[0] == '#') + continue; + bl = bedLineNew(line); + slAddHead(&blList, bl); + } +lineFileClose(&lf); + +verbose(2, "Sorting\n"); +slSort(&blList, bedLineCmp); + +verbose(2, "Writing %s\n", outFile); +f = mustOpen(outFile, "w"); +for (bl = blList; bl != NULL; bl = bl->next) + { + fprintf(f, "%s\t%s\n", bl->chrom, bl->line); + if (ferror(f)) + { + perror("Writing error\n"); + errAbort("%s is truncated, sorry.", outFile); + } + } +fclose(f); +} + +struct bed *bedLoad3(char **row) +/* Load first three fields of bed. */ +{ +struct bed *ret; +AllocVar(ret); +ret->chrom = cloneString(row[0]); +ret->chromStart = sqlUnsigned(row[1]); +ret->chromEnd = sqlUnsigned(row[2]); +return ret; +} + +struct bed *bedLoad5(char **row) +/* Load first five fields of bed. */ +{ +struct bed *ret; +AllocVar(ret); +ret->chrom = cloneString(row[0]); +ret->chromStart = sqlUnsigned(row[1]); +ret->chromEnd = sqlUnsigned(row[2]); +ret->name = cloneString(row[3]); +ret->score = sqlSigned(row[4]); +return ret; +} + +struct bed *bedLoad6(char **row) +/* Load first six fields of bed. */ +{ +struct bed *ret = bedLoad5(row); +safef(ret->strand, sizeof(ret->strand), "%s", row[5]); +return ret; +} + +/* it turns out that it isn't just hgLoadBed and custom tracks + * that may encounter the r,g,b specification. Any program that + * reads bed files may enconter them, so take care of them + * at any time. The strchr() function is very fast which will + * be a failure in the vast majority of cases parsing integers, + * therefore, this shouldn't be too severe a performance hit. + */ +int itemRgbColumn(char *column9) +/* Convert color specification to internal format. */ +{ +int itemRgb = 0; +/* Allow comma separated list of rgb values here */ +char *comma = strchr(column9, ','); +if (comma) + { + if (-1 == (itemRgb = bedParseRgb(column9))) + errAbort("ERROR: expecting r,g,b specification, " + "found: '%s'", column9); + } +else + itemRgb = sqlUnsigned(column9); +return itemRgb; +} + +struct bed *bedLoad12(char **row) +/* Load a bed from row fetched with select * from bed + * from database. Dispose of this with bedFree(). */ +{ +struct bed *ret; +int sizeOne; + +AllocVar(ret); +ret->blockCount = sqlSigned(row[9]); +ret->chrom = cloneString(row[0]); +ret->chromStart = sqlUnsigned(row[1]); +ret->chromEnd = sqlUnsigned(row[2]); +ret->name = cloneString(row[3]); +ret->score = sqlSigned(row[4]); +strcpy(ret->strand, row[5]); +ret->thickStart = sqlUnsigned(row[6]); +ret->thickEnd = sqlUnsigned(row[7]); +ret->itemRgb = itemRgbColumn(row[8]); +sqlSignedDynamicArray(row[10], &ret->blockSizes, &sizeOne); +assert(sizeOne == ret->blockCount); +sqlSignedDynamicArray(row[11], &ret->chromStarts, &sizeOne); +assert(sizeOne == ret->blockCount); +return ret; +} + +struct bed *bedLoadN(char *row[], int wordCount) +/* Convert a row of strings to a bed. */ +{ +struct bed * bed; +int count; + +AllocVar(bed); +bed->chrom = cloneString(row[0]); +bed->chromStart = sqlUnsigned(row[1]); +bed->chromEnd = sqlUnsigned(row[2]); +if (wordCount > 3) + bed->name = cloneString(row[3]); +if (wordCount > 4) + bed->score = sqlSigned(row[4]); +if (wordCount > 5) + bed->strand[0] = row[5][0]; +if (wordCount > 6) + bed->thickStart = sqlUnsigned(row[6]); +else + bed->thickStart = bed->chromStart; +if (wordCount > 7) + bed->thickEnd = sqlUnsigned(row[7]); +else + bed->thickEnd = bed->chromEnd; +if (wordCount > 8) + bed->itemRgb = itemRgbColumn(row[8]); +if (wordCount > 9) + bed->blockCount = sqlUnsigned(row[9]); +if (wordCount > 10) + sqlSignedDynamicArray(row[10], &bed->blockSizes, &count); +if (wordCount > 11) + sqlSignedDynamicArray(row[11], &bed->chromStarts, &count); +if (wordCount > 12) + bed->expCount = sqlUnsigned(row[12]); +if (wordCount > 13) + sqlSignedDynamicArray(row[13], &bed->expIds, &count); +if (wordCount > 14) + sqlFloatDynamicArray(row[14], &bed->expScores, &count); +return bed; +} + +struct bed *bedLoadNAllChrom(char *fileName, int numFields, char* chrom) +/* Load bed entries from a tab-separated file that have the given chrom. + * Dispose of this with bedFreeList(). */ +{ +struct bed *list = NULL, *el; +struct lineFile *lf = lineFileOpen(fileName, TRUE); +char *row[numFields]; + +while (lineFileRow(lf, row)) + { + el = bedLoadN(row, numFields); + if(chrom == NULL || sameString(el->chrom, chrom)) + slAddHead(&list, el); + else + bedFree(&el); + } +lineFileClose(&lf); +slReverse(&list); +return list; +} + +struct bed *bedLoadNAll(char *fileName, int numFields) +/* Load all bed from a tab-separated file. + * Dispose of this with bedFreeList(). */ +{ +return bedLoadNAllChrom(fileName, numFields, NULL); +} + +struct bed *bedLoadAll(char *fileName) +/* Determines how many fields are in a bedFile and load all beds from + * a tab-separated file. Dispose of this with bedFreeList(). */ +{ +struct bed *list = NULL; +struct lineFile *lf = lineFileOpen(fileName, TRUE); +char *line, *row[bedKnownFields]; + +while (lineFileNextReal(lf, &line)) + { + int numFields = chopByWhite(line, row, ArraySize(row)); + if (numFields < 4) + errAbort("file %s doesn't appear to be in bed format. At least 4 fields required, got %d", fileName, numFields); + slAddHead(&list, bedLoadN(row, numFields)); + } +lineFileClose(&lf); +slReverse(&list); +return list; +} + +void bedLoadAllReturnFieldCountAndRgb(char *fileName, struct bed **retList, int *retFieldCount, + boolean *retRgb) +/* Load bed of unknown size and return number of fields as well as list of bed items. + * Ensures that all lines in bed file have same field count. Also returns whether + * column 9 is being used as RGB or not. */ +{ +struct bed *list = NULL; +struct lineFile *lf = lineFileOpen(fileName, TRUE); +char *line, *row[bedKnownFields]; +int fieldCount = 0; +boolean isRgb = FALSE; + +while (lineFileNextReal(lf, &line)) + { + int numFields = chopByWhite(line, row, ArraySize(row)); + if (numFields < 4) + errAbort("file %s doesn't appear to be in bed format. At least 4 fields required, got %d", + fileName, numFields); + if (fieldCount == 0) + { + fieldCount = numFields; + if (fieldCount >= 9) + isRgb = (strchr(row[8], ',') != NULL); + } + else + if (fieldCount != numFields) + errAbort("Inconsistent number of fields in file. %d on line %d of %s, %d previously.", + numFields, lf->lineIx, lf->fileName, fieldCount); + slAddHead(&list, bedLoadN(row, fieldCount)); + } +lineFileClose(&lf); +slReverse(&list); +*retList = list; +*retFieldCount = fieldCount; +if (retRgb != NULL) + *retRgb = isRgb; +} + +void bedLoadAllReturnFieldCount(char *fileName, struct bed **retList, int *retFieldCount) +/* Load bed of unknown size and return number of fields as well as list of bed items. + * Ensures that all lines in bed file have same field count. */ +{ +bedLoadAllReturnFieldCountAndRgb(fileName, retList, retFieldCount, NULL); +} + +void bedOutFlexible(struct bed *el, int wordCount, FILE *f, + char sep, char lastSep, boolean useItemRgb) +/* Write a bed of wordCount fields, optionally interpreting field nine as R,G,B values. */ +{ +int i; +if (sep == ',') fputc('"',f); +fprintf(f, "%s", el->chrom); +if (sep == ',') fputc('"',f); +fputc(sep,f); +fprintf(f, "%u", el->chromStart); +fputc(sep,f); +fprintf(f, "%u", el->chromEnd); +if (wordCount <= 3) + { + fputc(lastSep, f); + return; + } +fputc(sep,f); +if (sep == ',') fputc('"',f); +fprintf(f, "%s", el->name); +if (sep == ',') fputc('"',f); +if (wordCount <= 4) + { + fputc(lastSep, f); + return; + } +fputc(sep,f); +fprintf(f, "%d", el->score); +if (wordCount <= 5) + { + fputc(lastSep, f); + return; + } +fputc(sep,f); +if (sep == ',') fputc('"',f); +fprintf(f, "%s", el->strand); +if (sep == ',') fputc('"',f); +if (wordCount <= 6) + { + fputc(lastSep, f); + return; + } +fputc(sep,f); +fprintf(f, "%u", el->thickStart); +if (wordCount <= 7) + { + fputc(lastSep, f); + return; + } +fputc(sep,f); +fprintf(f, "%u", el->thickEnd); +if (wordCount <= 8) + { + fputc(lastSep, f); + return; + } +fputc(sep,f); +if (useItemRgb) + fprintf(f, "%d,%d,%d", (el->itemRgb & 0xff0000) >> 16, + (el->itemRgb & 0xff00) >> 8, (el->itemRgb & 0xff)); +else + fprintf(f, "%u", el->itemRgb); +if (wordCount <= 9) + { + fputc(lastSep, f); + return; + } +fputc(sep,f); +fprintf(f, "%d", el->blockCount); +if (wordCount <= 10) + { + fputc(lastSep, f); + return; + } +fputc(sep,f); +if (sep == ',') fputc('{',f); +for (i=0; iblockCount; ++i) + { + fprintf(f, "%d", el->blockSizes[i]); + fputc(',', f); + } +if (sep == ',') fputc('}',f); +if (wordCount <= 11) + { + fputc(lastSep, f); + return; + } +fputc(sep,f); +if (sep == ',') fputc('{',f); +for (i=0; iblockCount; ++i) + { + fprintf(f, "%d", el->chromStarts[i]); + fputc(',', f); + } +if (sep == ',') fputc('}',f); + +if (wordCount <= 12) + { + fputc(lastSep, f); + return; + } +fputc(sep,f); +fprintf(f, "%d", el->expCount); + +if (wordCount <= 13) + { + fputc(lastSep, f); + return; + } +fputc(sep,f); +if (sep == ',') fputc('{',f); +for (i=0; iexpCount; ++i) + { + fprintf(f, "%d", el->expIds[i]); + fputc(',', f); + } +if (sep == ',') fputc('}',f); + + +if (wordCount <= 14) + { + fputc(lastSep, f); + return; + } +fputc(sep,f); +if (sep == ',') fputc('{',f); +for (i=0; iexpCount; ++i) + { + fprintf(f, "%g", el->expScores[i]); + fputc(',', f); + } +if (sep == ',') fputc('}',f); + + +fputc(lastSep,f); +} + +void bedOutputN(struct bed *el, int wordCount, FILE *f, char sep, char lastSep) +/* Write a bed of wordCount fields. */ +{ +bedOutFlexible(el, wordCount, f, sep, lastSep, FALSE); +} + +void bedOutputNitemRgb(struct bed *el, int wordCount, FILE *f, + char sep, char lastSep) +/* Write a bed of wordCount fields, interpret column 9 as RGB. */ +{ +bedOutFlexible(el, wordCount, f, sep, lastSep, TRUE); +} + + +int bedTotalBlockSize(struct bed *bed) +/* Return total size of all blocks. */ +{ +int total = 0; +int i; +if (bed->blockCount == 0) + return bed->chromEnd - bed->chromStart; +for (i=0; iblockCount; ++i) + total += bed->blockSizes[i]; +return total; +} + +int bedBlockSizeInRange(struct bed *bed, int rangeStart, int rangeEnd) +/* Get size of all parts of all exons between rangeStart and rangeEnd. */ +{ +int total = 0; +int i; +for (i=0; iblockCount; ++i) + { + int start = bed->chromStart + bed->chromStarts[i]; + int end = start + bed->blockSizes[i]; + total += positiveRangeIntersection(start, end, rangeStart, rangeEnd); + } +return total; +} + +int bedTotalThickBlockSize(struct bed *bed) +/* Return total size of all thick blocks. */ +{ +return bedBlockSizeInRange(bed, bed->thickStart, bed->thickEnd); +} + +int bedStartThinSize(struct bed *bed) +/* Return total size of all blocks before thick part. */ +{ +return bedBlockSizeInRange(bed, bed->chromStart, bed->thickStart); +} + +int bedEndThinSize(struct bed *bed) +/* Return total size of all blocks after thick part. */ +{ +return bedBlockSizeInRange(bed, bed->thickEnd, bed->chromEnd); +} + +void makeItBed12(struct bed *bedList, int numFields) +/* If it's less than bed 12, make it bed 12. The numFields */ +/* param is for how many fields the bed *currently* has. */ +{ +int i = 1; +struct bed *cur; +for (cur = bedList; cur != NULL; cur = cur->next) + { + /* it better be bigger than bed 3. */ + if (numFields < 4) + { + char name[50]; + safef(name, ArraySize(name), "item.%d", i+1); + cur->name = cloneString(name); + } + if (numFields < 5) + cur->score = 1000; + if (numFields < 6) + { + cur->strand[0] = '?'; + cur->strand[1] = '\0'; + } + if (numFields < 8) + { + cur->thickStart = cur->chromStart; + cur->thickEnd = cur->chromEnd; + } + if (numFields < 9) + cur->itemRgb = 0; + if (numFields < 12) + { + cur->blockSizes = needMem(sizeof(int)); + cur->chromStarts = needMem(sizeof(int)); + cur->blockCount = 1; + cur->chromStarts[0] = 0; + cur->blockSizes[0] = cur->chromEnd - cur->chromStart; + } + i++; + } +} + +struct bed *lmCloneBed(struct bed *bed, struct lm *lm) +/* Make a copy of bed in local memory. */ +{ +struct bed *newBed; +if (bed == NULL) + return NULL; +lmAllocVar(lm, newBed); +newBed->chrom = lmCloneString(lm, bed->chrom); +newBed->chromStart = bed->chromStart; +newBed->chromEnd = bed->chromEnd; +newBed->name = lmCloneString(lm, bed->name); +newBed->score = bed->score; +strncpy(newBed->strand, bed->strand, sizeof(bed->strand)); +newBed->thickStart = bed->thickStart; +newBed->thickEnd = bed->thickEnd; +newBed->itemRgb = bed->itemRgb; +newBed->blockCount = bed->blockCount; +if (bed->blockCount > 0) + { + newBed->blockSizes = lmCloneMem(lm, bed->blockSizes, + sizeof(bed->blockSizes[0]) * bed->blockCount); + newBed->chromStarts = lmCloneMem(lm, bed->chromStarts, + sizeof(bed->chromStarts[0]) * bed->blockCount); + } +newBed->expCount = bed->expCount; +if (bed->expCount > 0) + { + newBed->expIds = lmCloneMem(lm, bed->expIds, + sizeof(bed->expIds[0]) * bed->expCount); + newBed->expScores = lmCloneMem(lm, bed->expScores, + sizeof(bed->expScores[0]) * bed->expCount); + } +return(newBed); +} + + +struct bed *cloneBed(struct bed *bed) +/* Make an all-newly-allocated copy of a single bed record. */ +{ +struct bed *newBed; +if (bed == NULL) + return NULL; +AllocVar(newBed); +newBed->chrom = cloneString(bed->chrom); +newBed->chromStart = bed->chromStart; +newBed->chromEnd = bed->chromEnd; +newBed->name = cloneString(bed->name); +newBed->score = bed->score; +strncpy(newBed->strand, bed->strand, sizeof(bed->strand)); +newBed->thickStart = bed->thickStart; +newBed->thickEnd = bed->thickEnd; +newBed->itemRgb = bed->itemRgb; +newBed->blockCount = bed->blockCount; +if (bed->blockCount > 0) + { + newBed->blockSizes = needMem(sizeof(int) * bed->blockCount); + memcpy(newBed->blockSizes, bed->blockSizes, + sizeof(int) * bed->blockCount); + newBed->chromStarts = needMem(sizeof(int) * bed->blockCount); + memcpy(newBed->chromStarts, bed->chromStarts, + sizeof(int) * bed->blockCount); + } +newBed->expCount = bed->expCount; +if (bed->expCount > 0) + { + newBed->expIds = needMem(sizeof(int) * bed->expCount); + memcpy(newBed->expIds, bed->expIds, + sizeof(int) * bed->expCount); + newBed->expScores = needMem(sizeof(float) * bed->expCount); + memcpy(newBed->expScores, bed->expScores, + sizeof(float) * bed->expCount); + } +return(newBed); +} + + +struct bed *cloneBedList(struct bed *bedList) +/* Make an all-newly-allocated list copied from bed. */ +{ +struct bed *bedListOut = NULL, *bed=NULL; + +for (bed=bedList; bed != NULL; bed=bed->next) + { + struct bed *newBed = cloneBed(bed); + slAddHead(&bedListOut, newBed); + } + +slReverse(&bedListOut); +return bedListOut; +} + +struct bed *bedListNextDifferentChrom(struct bed *bedList) +/* Return next bed in list that is from a different chrom than the start of the list. */ +{ +char *firstChrom = bedList->chrom; +struct bed *bed; +for (bed = bedList->next; bed != NULL; bed = bed->next) + if (!sameString(firstChrom, bed->chrom)) + break; +return bed; +} + +struct bed *bedCommaInN(char **pS, struct bed *ret, int fieldCount) +/* Create a bed out of a comma separated string looking for fieldCount + * fields. This will fill in ret if non-null, otherwise will return a + * new bed */ +{ +char *s = *pS; +int i; + +if (ret == NULL) + AllocVar(ret); +ret->chrom = sqlStringComma(&s); +ret->chromStart = sqlUnsignedComma(&s); +ret->chromEnd = sqlUnsignedComma(&s); +if (fieldCount > 3) + ret->name = sqlStringComma(&s); +if (fieldCount > 4) + ret->score = sqlUnsignedComma(&s); +if (fieldCount > 5) + sqlFixedStringComma(&s, ret->strand, sizeof(ret->strand)); +if (fieldCount > 6) + ret->thickStart = sqlUnsignedComma(&s); +else + ret->thickStart = ret->chromStart; +if (fieldCount > 7) + ret->thickEnd = sqlUnsignedComma(&s); +else + ret->thickEnd = ret->chromEnd; +if (fieldCount > 8) + ret->itemRgb = sqlUnsignedComma(&s); +if (fieldCount > 9) + ret->blockCount = sqlUnsignedComma(&s); +if (fieldCount > 10) + { + s = sqlEatChar(s, '{'); + AllocArray(ret->blockSizes, ret->blockCount); + for (i=0; iblockCount; ++i) + { + ret->blockSizes[i] = sqlSignedComma(&s); + } + s = sqlEatChar(s, '}'); + s = sqlEatChar(s, ','); + } +if(fieldCount > 11) + { + s = sqlEatChar(s, '{'); + AllocArray(ret->chromStarts, ret->blockCount); + for (i=0; iblockCount; ++i) + { + ret->chromStarts[i] = sqlSignedComma(&s); + } + s = sqlEatChar(s, '}'); + s = sqlEatChar(s, ','); + } +if (fieldCount > 12) + ret->expCount = sqlSignedComma(&s); +if (fieldCount > 13) + { + s = sqlEatChar(s, '{'); + AllocArray(ret->expIds, ret->expCount); + for (i=0; iexpCount; ++i) + { + ret->expIds[i] = sqlSignedComma(&s); + } + s = sqlEatChar(s, '}'); + s = sqlEatChar(s, ','); + } +if (fieldCount > 14) + { + s = sqlEatChar(s, '{'); + AllocArray(ret->expScores, ret->expCount); + for (i=0; iexpCount; ++i) + { + ret->expScores[i] = sqlFloatComma(&s); + } + s = sqlEatChar(s, '}'); + s = sqlEatChar(s, ','); + } +*pS = s; +return ret; +} + +struct hash *readBedToBinKeeper(char *sizeFileName, char *bedFileName, int wordCount) +/* Read a list of beds and return results in hash of binKeeper structure for fast query + * See also bedsIntoKeeperHash, which takes the beds read into a list already, but + * dispenses with the need for the sizeFile. */ +{ +struct binKeeper *bk; +struct bed *bed; +struct lineFile *lf = lineFileOpen(sizeFileName, TRUE); +struct lineFile *bf = lineFileOpen(bedFileName , TRUE); +struct hash *hash = newHash(0); +char *chromRow[2]; +char *row[3] ; + +assert (wordCount == 3); +while (lineFileRow(lf, chromRow)) + { + char *name = chromRow[0]; + int size = lineFileNeedNum(lf, chromRow, 1); + + if (hashLookup(hash, name) != NULL) + warn("Duplicate %s, ignoring all but first\n", name); + else + { + bk = binKeeperNew(0, size); + assert(size > 1); + hashAdd(hash, name, bk); + } + } +while (lineFileNextRow(bf, row, ArraySize(row))) + { + bed = bedLoadN(row, wordCount); + bk = hashMustFindVal(hash, bed->chrom); + binKeeperAdd(bk, bed->chromStart, bed->chromEnd, bed); + } +lineFileClose(&bf); +lineFileClose(&lf); +return hash; +} + +void bedOutputRgb(FILE *f, unsigned int color) +/* Output a string: "r,g,b" for 24 bit number */ +{ +int colorIx = (int)color; +struct rgbColor rgb = colorIxToRgb(colorIx); +//fprintf(f, "%d,%d,%d", rgb.r, rgb.g, rgb.b); +// FIXME: endian issue ?? +fprintf(f, "%d,%d,%d", rgb.b, rgb.g, rgb.r); +} + +int bedParseRgb(char *itemRgb) +/* parse a string: "r,g,b" into three unsigned char values + returned as 24 bit number, or -1 for failure */ +{ +char dupe[64]; +int wordCount; +char *row[4]; + +strncpy(dupe, itemRgb, sizeof(dupe)); +wordCount = chopString(dupe, ",", row, ArraySize(row)); + +if ((wordCount != 3) || (!isdigit(row[0][0]) || + !isdigit(row[1][0]) || !isdigit(row[2][0]))) + return (-1); + +return ( ((atoi(row[0]) & 0xff) << 16) | + ((atoi(row[1]) & 0xff) << 8) | + (atoi(row[2]) & 0xff) ); +} + +int bedParseColor(char *colorSpec) +/* Parse an HTML color string, a string of 3 comma-sep unsigned color values 0-255, + * or a 6-digit hex string preceded by #. + * O/w return unsigned integer value. Return -1 on error */ +{ +if (strchr(colorSpec,',')) + return bedParseRgb(colorSpec); +unsigned rgb; +if (htmlColorForCode(colorSpec, &rgb)) + return rgb; +if (htmlColorForName(colorSpec, &rgb)) + return rgb; +return sqlUnsigned(colorSpec); +} + +long long bedTotalSize(struct bed *bedList) +/* Add together sizes of all beds in list. */ +{ +long long total=0; +struct bed *bed; +for (bed = bedList; bed != NULL; bed = bed->next) + total += (bed->chromEnd - bed->chromStart); +return total; +} + +void bedIntoRangeTree(struct bed *bed, struct rbTree *rangeTree) +/* Add all blocks in bed to range tree. For beds without blocks, + * add entire bed. */ +{ +int i; +if (bed->blockCount == 0) + rangeTreeAdd(rangeTree, bed->chromStart, bed->chromEnd); +else + { + for (i=0; i < bed->blockCount; ++i) + { + int start = bed->chromStart + bed->chromStarts[i]; + int end = start + bed->blockSizes[i]; + rangeTreeAdd(rangeTree, start, end); + } + } +} + +struct rbTree *bedToRangeTree(struct bed *bed) +/* Convert bed into a range tree. */ +{ +struct rbTree *rangeTree = rangeTreeNew(); +bedIntoRangeTree(bed, rangeTree); +return rangeTree; +} + +int bedRangeTreeOverlap(struct bed *bed, struct rbTree *rangeTree) +/* Return number of bases bed overlaps with rangeTree. */ +{ +int totalOverlap = 0; +if (bed->blockCount == 0) + totalOverlap = rangeTreeOverlapSize(rangeTree, bed->chromStart, bed->chromEnd); +else + { + int i; + for (i=0; i < bed->blockCount; ++i) + { + int start = bed->chromStart + bed->chromStarts[i]; + int end = start + bed->blockSizes[i]; + totalOverlap += rangeTreeOverlapSize(rangeTree, start, end); + } + } +return totalOverlap; +} + +int bedSameStrandOverlap(struct bed *a, struct bed *b) +/* Return amount of block-level overlap on same strand between a and b */ +{ +/* Make sure on same strand, chromosome, and that overlap + * at the non-block level. */ +if (a->strand[0] != b->strand[0]) + return 0; +if (!sameString(a->chrom, b->chrom)) + return 0; +int outerOverlap = rangeIntersection(a->chromStart, a->chromEnd, + b->chromStart, b->chromEnd); +if (outerOverlap <= 0) + return 0; + +/* If both beds are non-blocked then we're pretty much done. */ +if (a->blockCount == 0 && b->blockCount == 0) + return outerOverlap; + +/* Otherwise make up a range tree containing regions covered by a, + * and figure out how much b overlaps it.. */ +struct rbTree *rangeTree = bedToRangeTree(a); +int overlap = bedRangeTreeOverlap(b, rangeTree); + +/* Clean up and return result. */ +rangeTreeFree(&rangeTree); +return overlap; +} + +boolean bedExactMatch(struct bed *oldBed, struct bed *newBed) +/* Return TRUE if it's an exact match. */ +{ +boolean oldCoding = (oldBed->thickStart != oldBed->thickEnd); +boolean newCoding = (newBed->thickStart != newBed->thickEnd); + +if (oldCoding != newCoding) + return FALSE; +/* non-coding bed's have different standards for what exactly + * goes into these fields. The standard just says they should + * be equal */ +if (oldCoding && ((oldBed->thickStart != newBed->thickStart) || + (oldBed->thickEnd != newBed->thickEnd))) + return FALSE; +if (oldBed->blockCount != newBed->blockCount) + return FALSE; +int oldSize = bedTotalBlockSize(oldBed); +int newSize = bedTotalBlockSize(newBed); +int overlap = bedSameStrandOverlap(oldBed, newBed); +return (oldSize == newSize && oldSize == overlap); +} + +boolean bedCompatibleExtension(struct bed *oldBed, struct bed *newBed) +/* Return TRUE if newBed is a compatible extension of oldBed, meaning + * all internal exons and all introns of old bed are contained, in the + * same order in the new bed. */ +{ +/* New bed must have at least as many exons as old bed... */ +if (oldBed->blockCount > newBed->blockCount) + return FALSE; + +/* New bed must also must also encompass old bed. */ +if (newBed->chromStart > oldBed->chromStart) + return FALSE; +if (newBed->chromEnd < oldBed->chromEnd) + return FALSE; + +/* Look for an exact match */ +int oldSize = bedTotalBlockSize(oldBed); +int newSize = bedTotalBlockSize(newBed); +int overlap = bedSameStrandOverlap(oldBed, newBed); +if (oldSize == newSize && oldSize == overlap) + return TRUE; + +/* If overlap is smaller than old size then we can't be a superset. */ +if (overlap < oldSize) + return FALSE; + +/* If we're a single exon bed then we're done. */ +if (oldBed->blockCount <= 1) + return TRUE; + +/* Otherwise we look for first intron start in old bed, and then + * flip through new bed until we find an intron that starts at the + * same place. */ +int oldFirstIntronStart = oldBed->chromStart + oldBed->chromStarts[0] + oldBed->blockSizes[0]; +int newLastBlock = newBed->blockCount-1, oldLastBlock = oldBed->blockCount-1; +int newIx, oldIx; +for (newIx=0; newIx < newLastBlock; ++newIx) + { + int iStartNew = newBed->chromStart + newBed->chromStarts[newIx] + newBed->blockSizes[newIx]; + if (iStartNew == oldFirstIntronStart) + break; + } +if (newIx == newLastBlock) + return FALSE; + +/* Now we go through all introns in old bed, and make sure they match. */ +for (oldIx=0; oldIx < oldLastBlock; ++oldIx, ++newIx) + { + int iStartOld = oldBed->chromStart + oldBed->chromStarts[oldIx] + oldBed->blockSizes[oldIx]; + int iEndOld = oldBed->chromStart + oldBed->chromStarts[oldIx+1]; + int iStartNew = newBed->chromStart + newBed->chromStarts[newIx] + newBed->blockSizes[newIx]; + int iEndNew = newBed->chromStart + newBed->chromStarts[newIx+1]; + if (iStartOld != iStartNew || iEndOld != iEndNew) + return FALSE; + } + +/* Finally, make sure that the new bed doesn't contain any introns that overlap with the + * last exon of the old bed */ +for(; newIx < newLastBlock; ++newIx) + { + int iStartNew = newBed->chromStart + newBed->chromStarts[newIx] + newBed->blockSizes[newIx]; + if (iStartNew < oldBed->chromEnd) + return FALSE; + else if (iStartNew >= oldBed->chromEnd) + break; + } + +return TRUE; +} + +struct bed3 *bed3New(char *chrom, int start, int end) +/* Make new bed3. */ +{ +struct bed3 *bed; +AllocVar(bed); +bed->chrom = cloneString(chrom); +bed->chromStart = start; +bed->chromEnd = end; +return bed; +} + +struct bed *bedThickOnly(struct bed *in) +/* Return a bed that only has the thick part. (Which is usually the CDS). */ +{ +if (in->thickStart >= in->thickEnd) + return NULL; +if (in->expCount != 0 || in->expIds != NULL || in->expScores != NULL) + errAbort("Sorry, bedThickOnly only works on beds with up to 12 fields."); + +/* Allocate output, and fill in simple fields. */ +struct bed *out; +AllocVar(out); +out->chrom = cloneString(in->chrom); +out->chromStart = out->thickStart = in->thickStart; +out->chromEnd = out->thickEnd = in->thickEnd; +out->name = cloneString(in->name); +out->strand[0] = in->strand[0]; +out->score = in->score; +out->itemRgb = in->itemRgb; + +/* If need be fill in blocks. */ +if (in->blockCount > 0) + { + /* Count up blocks inside CDS. */ + int i; + int outBlockCount = 0; + for (i=0; iblockCount; ++i) + { + int start = in->chromStart + in->chromStarts[i]; + int end = start + in->blockSizes[i]; + if (start < in->thickStart) start = in->thickStart; + if (end > in->thickEnd) end = in->thickEnd; + if (start < end) + outBlockCount += 1; + } + + /* This trivieal case shouldn't happen, but just in case, we deal with it. */ + if (outBlockCount == 0) + { + freeMem(out); + return NULL; + } + + /* Allocate block arrays for output. */ + out->blockCount = outBlockCount; + AllocArray(out->chromStarts, outBlockCount); + AllocArray(out->blockSizes, outBlockCount); + + /* Scan through input one more time, copying to out. */ + int outBlockIx = 0; + for (i=0; iblockCount; ++i) + { + int start = in->chromStart + in->chromStarts[i]; + int end = start + in->blockSizes[i]; + if (start < in->thickStart) start = in->thickStart; + if (end > in->thickEnd) end = in->thickEnd; + if (start < end) + { + out->chromStarts[outBlockIx] = start - out->chromStart; + out->blockSizes[outBlockIx] = end - start; + outBlockIx += 1; + } + } + } +return out; +} + +struct bed *bedThickOnlyList(struct bed *inList) +/* Return a list of beds that only are the thick part of input. */ +{ +struct bed *outList = NULL, *out, *in; +for (in = inList; in != NULL; in = in->next) + { + if ((out = bedThickOnly(in)) != NULL) + slAddHead(&outList, out); + } +slReverse(&outList); +return outList; +} + +char *bedAsDef(int bedFieldCount, int totalFieldCount) +/* Return an autoSql definition for a bed of given number of fields. + * Normally totalFieldCount is equal to bedFieldCount. If there are extra + * fields they are just given the names field16, field17, etc and type string. */ +{ +if (bedFieldCount < 3 || bedFieldCount > 15) + errAbort("bedFieldCount is %d, but must be between %d and %d in bedAsDef", bedFieldCount, 3, 15); +struct dyString *dy = dyStringNew(0); +dyStringAppend(dy, + "table bed\n" + "\"Browser Extensible Data\"\n" + " (\n" + " string chrom; \"Reference sequence chromosome or scaffold\"\n" + " uint chromStart; \"Start position in chromosome\"\n" + " uint chromEnd; \"End position in chromosome\"\n" + ); +if (bedFieldCount >= 4) + dyStringAppend(dy, " string name; \"Name of item.\"\n"); +if (bedFieldCount >= 5) + dyStringAppend(dy, " uint score; \"Score (0-1000)\"\n"); +if (bedFieldCount >= 6) + dyStringAppend(dy, " char[1] strand; \"+ or - for strand\"\n"); +if (bedFieldCount >= 7) + dyStringAppend(dy, " uint thickStart; \"Start of where display should be thick (start codon)\"\n"); +if (bedFieldCount >= 8) + dyStringAppend(dy, " uint thickEnd; \"End of where display should be thick (stop codon)\"\n"); +if (bedFieldCount >= 9) + dyStringAppend(dy, " uint reserved; \"Used as itemRgb as of 2004-11-22\"\n"); +if (bedFieldCount >= 10) + dyStringAppend(dy, " int blockCount; \"Number of blocks\"\n"); +if (bedFieldCount >= 11) + dyStringAppend(dy, " int[blockCount] blockSizes; \"Comma separated list of block sizes\"\n"); +if (bedFieldCount >= 12) + dyStringAppend(dy, " int[blockCount] chromStarts; \"Start positions relative to chromStart\"\n"); +if (bedFieldCount >= 13) + dyStringAppend(dy, " int expCount; \"Experiment count\"\n"); +if (bedFieldCount >= 14) + dyStringAppend(dy, " int[expCount] expIds; \"Comma separated list of experiment ids. Always 0,1,2,3....\"\n"); +if (bedFieldCount >= 15) + dyStringAppend(dy, " float[expCount] expScores; \"Comma separated list of experiment scores.\"\n"); +int i; +for (i=bedFieldCount+1; i<=totalFieldCount; ++i) + dyStringPrintf(dy, "lstring field%d; \"Undocumented field\"\n", i+1); +dyStringAppend(dy, " )\n"); +return dyStringCannibalize(&dy); +} + + +boolean asCompareObjAgainstStandardBed(struct asObject *asYours, int numColumnsToCheck, boolean abortOnDifference) +/* Compare user's .as object asYours to the standard BED. + * abortOnDifference specifies whether to warn or abort if they differ within the first numColumnsToCheck columns. + * Returns TRUE if they match. */ +{ +boolean result = FALSE; +struct asObject *asStandard = NULL; +if (numColumnsToCheck > 15) + errAbort("There are only 15 standard BED columns defined and you have asked for %d.", numColumnsToCheck); +if (numColumnsToCheck < 3) + errAbort("All BED files must have at least 3 columns. (Is it possible that you provided a chrom.sizes file instead of a BED file?)"); +char *asStandardText = bedAsDef(15,15); +asStandard = asParseText(asStandardText); +result = asCompareObjs("Yours", asYours, "BED Standard", asStandard, numColumnsToCheck, NULL, abortOnDifference); +freeMem(asStandardText); +asObjectFreeList(&asStandard); +return result; +} + + +void loadAndValidateBedExt(char *row[], int bedFieldCount, int fieldCount, struct lineFile *lf, struct bed * bed, struct asObject *as, boolean isCt, boolean allow1bpOverlap) +/* Convert a row of strings to a bed and validate the contents. Abort with message if invalid data. Optionally validate bedPlus via asObject. + * If a customTrack, then some errors are tolerated. Possibly allow exons to overlap by one base. */ +{ +int count; +int *blockSizes = NULL; +int *chromStarts; + +bed->chrom = row[0]; // note this value is not cloned for speed, callers may need to clone it. + +// This check is usually redundant since the caller should be checking it against actual chromInfo names +// however hgLoadBed might not always have that info available. +if (strlen(bed->chrom) >= BB_MAX_CHROM_STRING) // must leave room for 0 terminator + lineFileAbort(lf, "chrom [%s] is too long (must not exceed %d characters)", bed->chrom, BB_MAX_CHROM_STRING - 1); +if (strlen(bed->chrom) < 1) + lineFileAbort(lf, "chrom cannot be blank or empty"); + +lineFileAllInts(lf, row, 1, &bed->chromStart, FALSE, 4, "integer", FALSE); + +lineFileAllInts(lf, row, 2, &bed->chromEnd, FALSE, 4, "integer", FALSE); + +if (bed->chromEnd < bed->chromStart) + lineFileAbort(lf, "chromStart after chromEnd (%u > %u)", + bed->chromStart, bed->chromEnd); +if (bedFieldCount > 3) + { + bed->name = row[3]; + if (strlen(bed->name) > 255) + lineFileAbort(lf, "name [%s] is too long (must not exceed 255 characters)", bed->name); + if (isCt) + bed->name = cloneString(bed->name); + } +if (bedFieldCount > 4) + { + lineFileAllInts(lf, row, 4, &bed->score, TRUE, 4, "integer", FALSE); + if (!isCt && (bed->score < 0 || bed->score > 1000)) + lineFileAbort(lf, "score (%d) must be between 0 and 1000", bed->score); + } + +if (bedFieldCount > 5) + { + if (!isCt && strlen(row[5]) > 1) + lineFileAbort(lf, "Expecting + or - or . in strand, found [%s]",row[5]); + bed->strand[0] = row[5][0]; + bed->strand[1] = 0; + if (bed->strand[0] != '+' && bed->strand[0] != '-' && bed->strand[0] != '.') + lineFileAbort(lf, "Expecting + or - or . in strand, found [%s]",row[5]); + } +if (bedFieldCount > 6) + lineFileAllInts(lf, row, 6, &bed->thickStart, FALSE, 4, "integer", FALSE); +else + bed->thickStart = bed->chromStart; +if (bedFieldCount > 7) + { + lineFileAllInts(lf, row, 7, &bed->thickEnd, FALSE, 4, "integer", FALSE); + if (bed->thickEnd < bed->thickStart) + lineFileAbort(lf, "thickStart after thickEnd"); + if ((bed->thickStart != 0) && + ((bed->thickStart < bed->chromStart) || + (bed->thickStart > bed->chromEnd))) + lineFileAbort(lf, + "thickStart out of range (chromStart to chromEnd, or 0 if no CDS)"); + if ((bed->thickEnd != 0) && + ((bed->thickEnd < bed->chromStart) || + (bed->thickEnd > bed->chromEnd))) + lineFileAbort(lf, + "thickEnd out of range for %s:%u-%u, thick:%u-%u (chromStart to chromEnd, or 0 if no CDS)", + bed->name, bed->chromStart, bed->chromEnd, + bed->thickStart, bed->thickEnd); + } +else + bed->thickEnd = bed->chromEnd; + +if (bedFieldCount > 8) + { + if (strchr(row[8],',')) + { + unsigned char colors[4]; + char *saveColorString = cloneString(row[8]); + int numColors = lineFileAllIntsArray(lf, row, 8, colors, sizeof colors, FALSE, 1, "integer", FALSE); + if (numColors == 3) + { + bed->itemRgb = (((unsigned)colors[0]) << 2*8) | (((unsigned)colors[1]) << 1*8) | (unsigned)colors[2]; + } + else + lineFileAbort(lf, "Expecting color to consist of r,g,b values from 0 to 255. Got [%s]", saveColorString); + freeMem(saveColorString); + } + else + { + lineFileAllInts(lf, row, 8, &bed->itemRgb, FALSE, 4, "integer", FALSE); + } + } + +int tempArraySize = 1; // How big arrays are below +if (bedFieldCount > 9) + { + lineFileAllInts(lf, row, 9, &bed->blockCount, FALSE, 4, "integer", FALSE); + if (!(bed->blockCount >= 1)) + lineFileAbort(lf, "Expecting blockCount (%d) to be 1 or more.", bed->blockCount); + tempArraySize = bed->blockCount; + } +int tempBlockSizes[tempArraySize]; +int tempChromStarts[tempArraySize]; +int tempExpIds[tempArraySize]; +float tempExpScores[tempArraySize]; +if (bedFieldCount > 10) + { + if (isCt) + { + AllocArray(bed->blockSizes,bed->blockCount+1); // having +1 allows us to detect incorrect size + count = lineFileAllIntsArray(lf, row, 10, bed->blockSizes, bed->blockCount+1, TRUE, 4, "integer", TRUE); + blockSizes = bed->blockSizes; + } + else + { + count = lineFileAllIntsArray(lf, row, 10, tempBlockSizes, tempArraySize, TRUE, 4, "integer", TRUE); + blockSizes = tempBlockSizes; + } + if (count != bed->blockCount) + lineFileAbort(lf, "Expecting %d elements in blockSizes list, found at least %d", bed->blockCount, count); +#ifdef NOTNOW + int i; + for (i=0; i < bed->blockCount; i++) + { + if (!(blockSizes[i] > 0)) + lineFileAbort(lf, "BED blockSizes must be greater than 0, blockSize[%d] = %d", i, blockSizes[i]); + } +#endif + } +if (bedFieldCount > 11) + { + int i; + if (isCt) + { + AllocArray(bed->chromStarts,bed->blockCount+1); // having +1 allows us to detect incorrect size + count = lineFileAllIntsArray(lf, row, 11, bed->chromStarts, bed->blockCount+1, TRUE, 4, "integer", TRUE); + chromStarts = bed->chromStarts; + } + else + { + count = lineFileAllIntsArray(lf, row, 11, tempChromStarts, tempArraySize, TRUE, 4, "integer", TRUE); + chromStarts = tempChromStarts; + } + if (count != bed->blockCount) + lineFileAbort(lf, "Expecting %d elements in chromStarts list, found at least %d", bed->blockCount, count); + // tell the user if they appear to be using absolute starts rather than + // relative... easy to forget! Also check block order, coord ranges... + if (chromStarts[0] != 0) + lineFileAbort(lf, + "BED blocks must span chromStart to chromEnd. " + "BED chromStarts[0] = %d, must be 0 so that (chromStart + " + "chromStarts[0]) equals chromStart.", chromStarts[0]); + + for (i=1; i < bed->blockCount; i++) + { + +/* +printf("%d:%d %s %s s:%d c:%u cs:%u ce:%u csI:%d bsI:%d ls:%d le:%d
\n", lineIx, i, bed->chrom, bed->name, bed->score, bed->blockCount, bed->chromStart, bed->chromEnd, bed->chromStarts[i], bed->blockSizes[i], lastStart, lastEnd); +*/ + // extra check to give user help for a common problem + if (chromStarts[i]+bed->chromStart >= bed->chromEnd) + { + if (chromStarts[i] >= bed->chromStart) + lineFileAbort(lf, "BED chromStarts offsets must be relative to chromStart, " + "not absolute. Try subtracting chromStart from each offset " + "in chromStarts."); + else + lineFileAbort(lf, "BED chromStarts[i]+chromStart must be less than chromEnd."); + } + // chrom blocks must ascend without overlap + int fudge = 0; + if (allow1bpOverlap) + fudge = -1; + if (!(chromStarts[i] >= chromStarts[i-1] + blockSizes[i-1] + fudge)) + lineFileAbort(lf, "BED blocks must be in ascending order without overlap. Blocks %d and %d overlap.", i-1, i); + } + + // last block-end must match chromEnd + i = bed->blockCount-1; + if ((bed->chromStart + chromStarts[i] + blockSizes[i]) != bed->chromEnd) + { + lineFileAbort(lf, "BED blocks must span chromStart to chromEnd. (chromStart + " + "chromStarts[last] + blockSizes[last]) must equal chromEnd."); + } + } + +if (bedFieldCount > 12) + // get the microarray/colored-exon fields + { + lineFileAllInts(lf, row, 12, &bed->expCount, TRUE, 4, "integer", TRUE); + if (!(bed->expCount >= 1)) + lineFileAbort(lf, "Expecting expCount (%d) to be 1 or more.", bed->expCount); + if (isCt) + { + AllocArray(bed->expIds,bed->expCount+1); // having +1 allows us to detect incorrect size + count = lineFileAllIntsArray(lf, row, 13, bed->expIds, bed->expCount+1, TRUE, 4, "integer", TRUE); + } + else + { + count = lineFileAllIntsArray(lf, row, 13, tempExpIds, tempArraySize, TRUE, 4, "integer", TRUE); + } + if (count != bed->expCount) + lineFileAbort(lf, "expecting %d elements in expIds list (bed field 14)", bed->expCount); + if (bedFieldCount == 15) + { + if (isCt) + { + sqlFloatDynamicArray(row[14], &bed->expScores, &count); + } + else + { + count = sqlFloatArray(row[14], tempExpScores, tempArraySize); + } + if (count != bed->expCount) + lineFileAbort(lf, "expecting %d elements in expScores list (bed field 15)", bed->expCount); + } + } + +/* Check bedPlus fields are formatted right. */ +/* This could form the basis of an .as-validator independent of BED. I suppose it could go in asParse.c */ +if (as) + { + struct hash* linkHash = NULL; + /* Validate as-fields */ + struct asColumn *asCol = NULL; + asCol = as->columnList; + int i; + // Pre-scan ALL fields for linked fields + for (i=0; ilowType->type; + if (! (asCol->isList || asCol->isArray)) + { + if (asTypesIsInt(type)) + { + if (asCol->isSizeLink) // save the field value and index for later use in validating a list size. + { + int listSize = 0; // big enough to hold the list count + lineFileAllInts(lf, row, i, &listSize, TRUE, 4, "integer", TRUE); + if (!linkHash) + linkHash = newHash(4); + hashAddInt(linkHash, asCol->name, listSize); + } + } + } + asCol = asCol->next; + } + /* Validate bed-plus fields */ + asCol = slElementFromIx(as->columnList, bedFieldCount); + for (i=bedFieldCount; ilowType->type; + if (! (asCol->isList || asCol->isArray)) + { + if (asTypesIsInt(type)) + lineFileAllInts(lf, row, i, NULL, !asTypesIsUnsigned(type), asTypesIntSize(type), asTypesIntSizeDescription(type), FALSE); + else if (asTypesIsFloating(type)) + lineFileNeedDouble(lf, row, i); + else if (type == t_string) + { + if (strlen(row[i]) > 255) + lineFileAbort(lf, "expecting length (%llu) of string (%s) not to exceed 255 in field %s", (unsigned long long)strlen(row[i]), row[i], asCol->name); + } + } + else if (asCol->isList) + { + if (asTypesIsFloating(type)) + { + // assure count = #items in list; lightweight validation (better than none) + int ix = asColumnFindIx(as->columnList, asCol->linkedSizeName); + int count = sqlUnsigned(row[ix]); + if (count < 0) + lineFileAbort(lf, + "expecting nonnegative number in count field for %s list, found %d", + asCol->name, asCol->fixedSize); + int itemCount = countSeparatedItems(row[i], ','); + if (count != itemCount) + lineFileAbort(lf, "expecting %d elements in %s list, found %d", + count, asCol->name, itemCount); + } + else if (asTypesIsInt(type)) + { + count = lineFileAllIntsArray(lf, row, i, NULL, countSeparatedItems(row[i], ','), + !asTypesIsUnsigned(type), asTypesIntSize(type), asTypesIntSizeDescription(type), FALSE); + if (asCol->fixedSize > 0) + { + if (asCol->fixedSize != count) + lineFileAbort(lf, "expecting %d elements in %s list, found %d", asCol->fixedSize, asCol->name, count); + } + else + { + if (!linkHash) + lineFileAbort(lf, "linked field %s was not found; it is required for determining listSize of %s" + , asCol->linkedSizeName, asCol->name); + int listSize = hashIntValDefault(linkHash, asCol->linkedSizeName, -1); + if (listSize == -1) + lineFileAbort(lf, "linked field %s was not found; it is required for determining listSize of %s" + , asCol->linkedSizeName, asCol->name); + if (!(listSize >= 1)) + lineFileAbort(lf, "invalid list size %d for list %s must be 1 or greater, empty lists are not allowed", listSize, asCol->name); + if (!(listSize == count)) + lineFileAbort(lf, "expecting %d elements in %s list, found %d", listSize, asCol->name, count); + } + } + } + asCol = asCol->next; + } + hashFree(&linkHash); + } + +} + +void loadAndValidateBed(char *row[], int bedFieldCount, int fieldCount, struct lineFile *lf, struct bed * bed, struct asObject *as, boolean isCt) +/* Convert a row of strings to a bed and validate the contents. Abort with message if invalid data. Optionally validate bedPlus via asObject. + * If a customTrack, then some errors are tolerated. */ +{ +loadAndValidateBedExt(row, bedFieldCount, fieldCount, lf, bed, as, isCt, FALSE); +} + + +struct bed3 *bed3LoadAll(char *fileName) +/* Load three columns from file as bed3. */ +{ +struct lineFile *lf = lineFileOpen(fileName, TRUE); +char *row[3]; +struct bed3 *list = NULL, *el; +while (lineFileRow(lf, row)) + { + AllocVar(el); + el->chrom = cloneString(row[0]); + el->chromStart = sqlUnsigned(row[1]); + el->chromEnd = sqlUnsigned(row[2]); + slAddHead(&list, el); + } +lineFileClose(&lf); +slReverse(&list); +return list; +} + +void bed3Free(struct bed3 **pBed) +/* Free up bed3 */ +{ +struct bed3 *bed = *pBed; +if (bed != NULL) + { + freeMem(bed->chrom); + freez(pBed); + } +} + +void bed3FreeList(struct bed3 **pList) +/* Free a list of dynamically allocated bed3's */ +{ +struct bed3 *el, *next; + +for (el = *pList; el != NULL; el = next) + { + next = el->next; + bed3Free(&el); + } +*pList = NULL; +} + +long long bed3TotalSize(struct bed3 *bedList) +/* Return sum of chromEnd-chromStart. */ +{ +long long sum = 0; +struct bed3 *bed; +for (bed = bedList; bed != NULL; bed = bed->next) + sum += bed->chromEnd - bed->chromStart; +return sum; +} + +struct bed4 *bed4New(char *chrom, int start, int end, char *name) +/* Make new bed4. */ +{ +struct bed4 *bed; +AllocVar(bed); +bed->chrom = cloneString(chrom); +bed->chromStart = start; +bed->chromEnd = end; +bed->name = cloneString(name); +return bed; +} + +void bed4Free(struct bed4 **pBed) +/* Free up bed4 */ +{ +struct bed4 *bed = *pBed; +if (bed != NULL) + { + freeMem(bed->chrom); + freeMem(bed->name); + freez(pBed); + } +} + +void bed4FreeList(struct bed4 **pList) +/* Free a list of dynamically allocated bed4's */ +{ +struct bed4 *el, *next; + +for (el = *pList; el != NULL; el = next) + { + next = el->next; + bed4Free(&el); + } +*pList = NULL; +} + diff -Nru r-bioc-rtracklayer-1.48.0/src/ucsc/basicBed.h r-bioc-rtracklayer-1.50.0/src/ucsc/basicBed.h --- r-bioc-rtracklayer-1.48.0/src/ucsc/basicBed.h 1970-01-01 00:00:00.000000000 +0000 +++ r-bioc-rtracklayer-1.50.0/src/ucsc/basicBed.h 2020-10-27 17:24:51.000000000 +0000 @@ -0,0 +1,325 @@ +/* basicBed.h contains the basic interface to Browser Extensible Data (bed) files and tables. + * The idea behind bed is that the first three fields are defined and required. + * A total of 15 fields are defined, and the file can contain any number of these. + * In addition after any number of defined fields there can be custom fields that + * are not defined in the bed spec. + * + * There's additional bed-related code in src/hg/inc/bed.h. This module contains the + * stuff that's independent of the database and other genomic structures. */ + +#ifndef BASICBED_H +#define BASICBED_H + +#include "asParse.h" + +struct bed +/* Browser extensible data */ + { + struct bed *next; /* Next in singly linked list. */ + char *chrom; /* Human chromosome or FPC contig */ + unsigned chromStart; /* Start position in chromosome */ + unsigned chromEnd; /* End position in chromosome */ + char *name; /* Name of item */ + + /* The following items are not loaded by the bedLoad routines. */ + int score; /* Score - 0-1000 */ /* Should be uint but there are still some ct users with neg values, .as DOES say uint */ + char strand[2]; /* + or -. */ + unsigned thickStart; /* Start of where display should be thick (start codon for genes) */ + unsigned thickEnd; /* End of where display should be thick (stop codon for genes) */ + unsigned itemRgb; /* RGB 8 bits each */ + unsigned blockCount; /* Number of blocks. */ + int *blockSizes; /* Comma separated list of block sizes. */ + int *chromStarts; /* Start positions inside chromosome. Relative to chromStart*/ + + + int expCount; /* Experiment count */ + int *expIds; /* Comma separated list of Experiment ids */ + float *expScores; /* Comma separated list of Experiment scores. */ + char *label; /* Label to use on element if bigBed. */ + }; + +#define bedKnownFields 15 /* Maximum known fields in bed */ + +#define BB_MAX_CHROM_STRING 255 /* Maximum string length for chromosome length */ + +struct bed3 +/* Browser extensible data - first three fields */ + { + struct bed3 *next; /* Next in singly linked list. */ + char *chrom; /* Human chromosome or FPC contig */ + unsigned chromStart; /* Start position in chromosome */ + unsigned chromEnd; /* End position in chromosome */ + }; + +struct bed3 *bed3New(char *chrom, int start, int end); +/* Make new bed3. */ + +void bed3Free(struct bed3 **pBed); +/* Free up bed3 */ + +void bed3FreeList(struct bed3 **pList); +/* Free a list of dynamically allocated bed3's */ + +struct bed3 *bed3LoadAll(char *fileName); +/* Load three columns from file as bed3. */ + +long long bed3TotalSize(struct bed3 *bedList); +/* Return sum of chromEnd-chromStart. */ + +struct bed4 +/* Browser extensible data - first four fields */ + { + struct bed4 *next; /* Next in singly linked list. */ + char *chrom; /* Human chromosome or FPC contig */ + unsigned chromStart; /* Start position in chromosome */ + unsigned chromEnd; /* End position in chromosome */ + char *name; /* Name of item */ + }; + + +struct bed4 *bed4New(char *chrom, int start, int end, char *name); +/* Make new bed4. */ + +void bed4Free(struct bed4 **pBed); +/* Free up bed4 */ + +void bed4FreeList(struct bed4 **pList); +/* Free a list of dynamically allocated bed4's */ + +void bedStaticLoad(char **row, struct bed *ret); +/* Load a row from bed table into ret. The contents of ret will + * be replaced at the next call to this function. */ + +struct bed *bedLoad(char **row); +/* Load a bed from row fetched with select * from bed + * from database. Dispose of this with bedFree(). + * This loads first four fields. */ + +struct bed *bedCommaIn(char **pS, struct bed *ret); +/* Create a bed out of a comma separated string. + * This will fill in ret if non-null, otherwise will + * return a new bed */ + +void bedFree(struct bed **pEl); +/* Free a single dynamically allocated bed such as created + * with bedLoad(). */ + +void bedFreeList(struct bed **pList); +/* Free a list of dynamically allocated bed's */ + +void bedOutput(struct bed *el, FILE *f, char sep, char lastSep); +/* Print out bed. Separate fields with sep. Follow last field with lastSep. */ + +#define bedTabOut(el,f) bedOutput(el,f,'\t','\n'); +/* Print out bed as a line in a tab-separated file. */ + +#define bedCommaOut(el,f) bedOutput(el,f,',',','); +/* Print out bed as a comma separated list including final comma. */ + +/* --------------- End of AutoSQL generated code. --------------- */ + +int bedCmp(const void *va, const void *vb); +/* Compare to sort based on chrom,chromStart. */ + +int bedCmpEnd(const void *va, const void *vb); +/* Compare to sort based on chrom,chromEnd. */ + +int bedCmpScore(const void *va, const void *vb); +/* Compare to sort based on score - lowest first. */ + +int bedCmpPlusScore(const void *va, const void *vb); +/* Compare to sort based on chrom, chromStart and score - lowest first. */ + +int bedCmpSize(const void *va, const void *vb); +/* Compare to sort based on size of element (end-start == size) */ + +int bedCmpChromStrandStart(const void *va, const void *vb); +/* Compare to sort based on chrom,strand,chromStart. */ + +int bedCmpChromStrandStartName(const void *va, const void *vb); +/* Compare to sort based on name, chrom,strand,chromStart. */ + +struct bedLine +/* A line in a bed file with chromosome, start position parsed out. */ + { + struct bedLine *next; /* Next in list. */ + char *chrom; /* Chromosome parsed out. */ + int chromStart; /* Start position (still in rest of line). */ + char *line; /* Rest of line. */ + }; + +struct bedLine *bedLineNew(char *line); +/* Create a new bedLine based on tab-separated string s. */ + +void bedLineFree(struct bedLine **pBl); +/* Free up memory associated with bedLine. */ + +void bedLineFreeList(struct bedLine **pList); +/* Free a list of dynamically allocated bedLine's */ + +int bedLineCmp(const void *va, const void *vb); +/* Compare to sort based on chrom,chromStart. */ + +void bedSortFile(char *inFile, char *outFile); +/* Sort a bed file (in place, overwrites old file. */ + +struct bed *bedLoad3(char **row); +/* Load first three fields of bed. */ + +struct bed *bedLoad5(char **row); +/* Load first five fields of bed. */ + +struct bed *bedLoad6(char **row); +/* Load first six fields of bed. */ + +struct bed *bedLoad12(char **row); +/* Load all 12 fields of bed. */ + +struct bed *bedLoadN(char *row[], int wordCount); +/* Convert a row of strings to a bed. */ + +struct bed *bedLoadNAllChrom(char *fileName, int numFields, char* chrom); +/* Load bed entries from a tab-separated file that have the given chrom. + * Dispose of this with bedFreeList(). */ + +struct bed *bedLoadNAll(char *fileName, int numFields); +/* Load all bed from a tab-separated file. + * Dispose of this with bedFreeList(). */ + +struct bed *bedLoadAll(char *fileName); +/* Determines how many fields are in a bedFile and load all beds from + * a tab-separated file. Dispose of this with bedFreeList(). */ + +void bedLoadAllReturnFieldCount(char *fileName, struct bed **retList, int *retFieldCount); +/* Load bed of unknown size and return number of fields as well as list of bed items. + * Ensures that all lines in bed file have same field count. */ + +void bedLoadAllReturnFieldCountAndRgb(char *fileName, struct bed **retList, int *retFieldCount, + boolean *retRgb); +/* Load bed of unknown size and return number of fields as well as list of bed items. + * Ensures that all lines in bed file have same field count. Also returns whether + * column 9 is being used as RGB or not. */ + +void bedOutputN(struct bed *el, int wordCount, FILE *f, char sep, char lastSep); +/* Write a bed of wordCount fields. */ + +void bedOutputNitemRgb(struct bed *el, int wordCount, FILE *f, + char sep, char lastSep); +/* Write a bed of wordCount fields, interpret column 9 as RGB. */ + +void bedOutFlexible(struct bed *el, int wordCount, FILE *f, + char sep, char lastSep, boolean useItemRgb); +/* Write a bed of wordCount fields, optionally interpreting field nine as R,G,B values. */ + +#define bedTabOutNitemRgb(el,wordCount, f) bedOutputNitemRgb(el,wordCount,f,'\t','\n') +/* Print out bed as a line in a tab-separated file. Interpret + column 9 as RGB */ + +#define bedTabOutN(el,wordCount, f) bedOutputN(el,wordCount,f,'\t','\n') +/* Print out bed as a line in a tab-separated file. */ + +#define bedCommaOutN(el,wordCount, f) bedOutputN(el,wordCount,f,',',',') +/* Print out bed as a comma separated list including final comma. */ + +int bedTotalBlockSize(struct bed *bed); +/* Return total size of all blocks. */ + +int bedTotalThickBlockSize(struct bed *bed); +/* Return total size of all thick blocks. */ + +int bedStartThinSize(struct bed *bed); +/* Return total size of all blocks before thick part. */ + +int bedEndThinSize(struct bed *bed); +/* Return total size of all blocks after thick part. */ + +int bedBlockSizeInRange(struct bed *bed, int rangeStart, int rangeEnd); +/* Get size of all parts of all exons between rangeStart and rangeEnd. */ + +void makeItBed12(struct bed *bedList, int numFields); +/* If it's less than bed 12, make it bed 12. The numFields */ +/* param is for how many fields the bed *currently* has. */ + +struct bed *cloneBed(struct bed *bed); +/* Make an all-newly-allocated copy of a single bed record. */ + +struct bed *cloneBedList(struct bed *bed); +/* Make an all-newly-allocated list copied from bed. */ + +struct bed *bedListNextDifferentChrom(struct bed *bedList); +/* Return next bed in list that is from a different chrom than the start of the list. */ + +struct bed *lmCloneBed(struct bed *bed, struct lm *lm); +/* Make a copy of bed in local memory. */ + +struct bed *bedCommaInN(char **pS, struct bed *ret, int fieldCount); +/* Create a bed out of a comma separated string looking for fieldCount + * fields. This will fill in ret if non-null, otherwise will return a + * new bed */ + +struct hash *readBedToBinKeeper(char *sizeFileName, char *bedFileName, int wordCount); +/* Read a list of beds and return results in hash of binKeeper structure for fast query + * See also bedsIntoKeeperHash, which takes the beds read into a list already, but + * dispenses with the need for the sizeFile. */ + +int bedParseRgb(char *itemRgb); +/* parse a string: "r,g,b" into three unsigned char values + returned as 24 bit number, or -1 for failure */ + +int bedParseColor(char *colorSpec); +/* Parse an HTML color string, a string of 3 comma-sep unsigned color values 0-255, + * or a 6-digit hex string preceded by #. + * O/w return unsigned integer value. Return -1 on error */ + +void bedOutputRgb(FILE *f, unsigned int color); +/* Output a string: "r,g,b" for 24 bit number */ + +long long bedTotalSize(struct bed *bedList); +/* Add together sizes of all beds in list. */ + +int bedSameStrandOverlap(struct bed *a, struct bed *b); +/* Return amount of block-level overlap on same strand between a and b */ + +boolean bedExactMatch(struct bed *oldBed, struct bed *newBed); +/* Return TRUE if it's an exact match. */ + +boolean bedCompatibleExtension(struct bed *oldBed, struct bed *newBed); +/* Return TRUE if newBed is a compatible extension of oldBed, meaning + * all internal exons and all introns of old bed are contained, in the + * same order in the new bed. */ + +struct rbTree *bedToRangeTree(struct bed *bed); +/* Convert bed into a range tree. */ + +void bedIntoRangeTree(struct bed *bed, struct rbTree *rangeTree); +/* Add all blocks in bed to range tree. For beds without blocks, + * add entire bed. */ + +int bedRangeTreeOverlap(struct bed *bed, struct rbTree *rangeTree); +/* Return number of bases bed overlaps with rangeTree. */ + +struct bed *bedThickOnly(struct bed *in); +/* Return a bed that only has the thick part. (Which is usually the CDS). */ + +struct bed *bedThickOnlyList(struct bed *inList); +/* Return a list of beds that only are the thick part of input. */ + +char *bedAsDef(int bedFieldCount, int totalFieldCount); +/* Return an autoSql definition for a bed of given number of fields. + * Normally totalFieldCount is equal to bedFieldCount. If there are extra + * fields they are just given the names field16, field17, etc and type string. */ + +boolean asCompareObjAgainstStandardBed(struct asObject *asYours, int numColumnsToCheck, boolean abortOnDifference); +/* Compare user's .as object asYours to the standard BED. + * abortOnDifference specifies whether to warn or abort if they differ within the first numColumnsToCheck columns. + * Returns TRUE if they match. */ + +void loadAndValidateBed(char *row[], int wordCount, int fieldCount, struct lineFile *lf, struct bed * bed, struct asObject *as, boolean isCt); +/* Convert a row of strings to a bed and validate the contents. Abort with message if invalid data. Optionally validate bedPlus via asObject. */ + +void loadAndValidateBedExt(char *row[], int bedFieldCount, int fieldCount, struct lineFile *lf, struct bed * bed, struct asObject *as, boolean isCt, boolean allow1bpOverlap); +/* Convert a row of strings to a bed and validate the contents. Abort with message if invalid data. Optionally validate bedPlus via asObject. Possibly allow one base overlap in exons */ + +int itemRgbColumn(char *column9); +/* Convert color specification to internal format. */ +#endif /* BASICBED_H */ diff -Nru r-bioc-rtracklayer-1.48.0/src/ucsc/bbiWrite.c r-bioc-rtracklayer-1.50.0/src/ucsc/bbiWrite.c --- r-bioc-rtracklayer-1.48.0/src/ucsc/bbiWrite.c 2020-04-27 20:22:29.000000000 +0000 +++ r-bioc-rtracklayer-1.50.0/src/ucsc/bbiWrite.c 2020-10-27 17:24:51.000000000 +0000 @@ -329,7 +329,7 @@ /* Call routine to make the initial zoom level and also a bit of work towards further levels. */ struct lm *lm = lmInit(0); int zoomIncrement = bbiResIncrement; -lineFileRewind(lf); +// lineFileRewind(lf); struct bbiSummary *rezoomedList = writeReducedOnceReturnReducedTwice(usageList, fieldCount, lf, initialReduction, initialReducedCount, zoomIncrement, blockSize, itemsPerSlot, doCompress, lm, diff -Nru r-bioc-rtracklayer-1.48.0/src/ucsc/bigBed.c r-bioc-rtracklayer-1.50.0/src/ucsc/bigBed.c --- r-bioc-rtracklayer-1.48.0/src/ucsc/bigBed.c 1970-01-01 00:00:00.000000000 +0000 +++ r-bioc-rtracklayer-1.50.0/src/ucsc/bigBed.c 2020-10-27 17:24:51.000000000 +0000 @@ -0,0 +1,693 @@ +/* bigBed - interface to binary file with bed-style values (that is a bunch of + * possibly overlapping regions. */ + +/* Copyright (C) 2013 The Regents of the University of California + * See README in this or parent directory for licensing information. */ + +#include "common.h" +#include "hash.h" +#include "linefile.h" +#include "obscure.h" +#include "dystring.h" +#include "rangeTree.h" +#include "cirTree.h" +#include "bPlusTree.h" +#include "basicBed.h" +#include "asParse.h" +#include "zlibFace.h" +#include "sig.h" +#include "udc.h" +#include "bbiFile.h" +#include "bigBed.h" + +struct bbiFile *bigBedFileOpen(char *fileName) +/* Open up big bed file. */ +{ +return bbiFileOpen(fileName, bigBedSig, "big bed"); +} + +boolean bigBedFileCheckSigs(char *fileName) +/* check file signatures at beginning and end of file */ +{ +return bbiFileCheckSigs(fileName, bigBedSig, "big bed"); +} + +struct bigBedInterval *bigBedIntervalQuery(struct bbiFile *bbi, char *chrom, + bits32 start, bits32 end, int maxItems, struct lm *lm) +/* Get data for interval. Return list allocated out of lm. Set maxItems to maximum + * number of items to return, or to 0 for all items. */ +{ +struct bigBedInterval *el, *list = NULL; +int itemCount = 0; +bbiAttachUnzoomedCir(bbi); +// Find blocks with padded start and end to make sure we include zero-length insertions: +bits32 paddedStart = (start > 0) ? start-1 : start; +bits32 paddedEnd = end+1; +bits32 chromId; +struct fileOffsetSize *blockList = bbiOverlappingBlocks(bbi, bbi->unzoomedCir, + chrom, paddedStart, paddedEnd, &chromId); +struct fileOffsetSize *block, *beforeGap, *afterGap; +struct udcFile *udc = bbi->udc; +boolean isSwapped = bbi->isSwapped; + +/* Set up for uncompression optionally. */ +char *uncompressBuf = NULL; +if (bbi->uncompressBufSize > 0) + uncompressBuf = needLargeMem(bbi->uncompressBufSize); + +char *mergedBuf = NULL; +for (block = blockList; block != NULL; ) + { + /* Find contigious blocks and read them into mergedBuf. */ + fileOffsetSizeFindGap(block, &beforeGap, &afterGap); + bits64 mergedOffset = block->offset; + bits64 mergedSize = beforeGap->offset + beforeGap->size - mergedOffset; + udcSeek(udc, mergedOffset); + mergedBuf = needLargeMem(mergedSize); + udcMustRead(udc, mergedBuf, mergedSize); + char *blockBuf = mergedBuf; + + /* Loop through individual blocks within merged section. */ + for (;block != afterGap; block = block->next) + { + /* Uncompress if necessary. */ + char *blockPt, *blockEnd; + if (uncompressBuf) + { + blockPt = uncompressBuf; + int uncSize = zUncompress(blockBuf, block->size, uncompressBuf, bbi->uncompressBufSize); + blockEnd = blockPt + uncSize; + } + else + { + blockPt = blockBuf; + blockEnd = blockPt + block->size; + } + + while (blockPt < blockEnd) + { + /* Read next record into local variables. */ + bits32 chr = memReadBits32(&blockPt, isSwapped); + bits32 s = memReadBits32(&blockPt, isSwapped); + bits32 e = memReadBits32(&blockPt, isSwapped); + + /* calculate length of rest of bed fields */ + int restLen = strlen(blockPt); + + /* If we're actually in range then copy it into a new element and add to list. */ + if (chr == chromId && + ((s < end && e > start) + // Make sure to include zero-length insertion elements at start or end: + || (s == e && (s == end || e == start)))) + { + ++itemCount; + if (maxItems > 0 && itemCount > maxItems) + break; + + lmAllocVar(lm, el); + el->start = s; + el->end = e; + if (restLen > 0) + el->rest = lmCloneStringZ(lm, blockPt, restLen); + el->chromId = chromId; + slAddHead(&list, el); + } + + // move blockPt pointer to end of previous bed + blockPt += restLen + 1; + } + if (maxItems > 0 && itemCount > maxItems) + break; + blockBuf += block->size; + } + if (maxItems > 0 && itemCount > maxItems) + break; + freez(&mergedBuf); + } +freez(&mergedBuf); +freeMem(uncompressBuf); +slFreeList(&blockList); +slReverse(&list); +return list; +} + +int bigBedIntervalToRow(struct bigBedInterval *interval, char *chrom, char *startBuf, char *endBuf, + char **row, int rowSize) +/* Convert bigBedInterval into an array of chars equivalent to what you'd get by + * parsing the bed file. The startBuf and endBuf are used to hold the ascii representation of + * start and end. Note that the interval->rest string will have zeroes inserted as a side effect. + */ +{ +int fieldCount = 3; +sprintf(startBuf, "%u", interval->start); +sprintf(endBuf, "%u", interval->end); +row[0] = chrom; +row[1] = startBuf; +row[2] = endBuf; +if (!isEmpty(interval->rest)) + { + int wordCount = chopByChar(cloneString(interval->rest), '\t', row+3, rowSize-3); + fieldCount += wordCount; + } +return fieldCount; +} + +static struct bbiInterval *bigBedCoverageIntervals(struct bbiFile *bbi, + char *chrom, bits32 start, bits32 end, struct lm *lm) +/* Return intervals where the val is the depth of coverage. */ +{ +/* Get list of overlapping intervals */ +struct bigBedInterval *bi, *biList = bigBedIntervalQuery(bbi, chrom, start, end, 0, lm); +if (biList == NULL) + return NULL; + +/* Make a range tree that collects coverage. */ +struct rbTree *rangeTree = rangeTreeNew(); +for (bi = biList; bi != NULL; bi = bi->next) + rangeTreeAddToCoverageDepth(rangeTree, bi->start, bi->end); +struct range *range, *rangeList = rangeTreeList(rangeTree); + +/* Convert rangeList to bbiInterval list. */ +struct bbiInterval *bwi, *bwiList = NULL; +for (range = rangeList; range != NULL; range = range->next) + { + lmAllocVar(lm, bwi); + bwi->start = range->start; + if (bwi->start < start) + bwi->start = start; + bwi->end = range->end; + if (bwi->end > end) + bwi->end = end; + bwi->val = ptToInt(range->val); + slAddHead(&bwiList, bwi); + } +slReverse(&bwiList); + +/* Clean up and go home. */ +rangeTreeFree(&rangeTree); +return bwiList; +} + +boolean bigBedSummaryArrayExtended(struct bbiFile *bbi, char *chrom, bits32 start, bits32 end, + int summarySize, struct bbiSummaryElement *summary) +/* Get extended summary information for summarySize evenly spaced elements into + * the summary array. */ +{ +return bbiSummaryArrayExtended(bbi, chrom, start, end, bigBedCoverageIntervals, + summarySize, summary); +} + +boolean bigBedSummaryArray(struct bbiFile *bbi, char *chrom, bits32 start, bits32 end, + enum bbiSummaryType summaryType, int summarySize, double *summaryValues) +/* Fill in summaryValues with data from indicated chromosome range in bigBed file. + * Be sure to initialize summaryValues to a default value, which will not be touched + * for regions without data in file. (Generally you want the default value to either + * be 0.0 or nan("") depending on the application.) Returns FALSE if no data + * at that position. */ +{ +return bbiSummaryArray(bbi, chrom, start, end, bigBedCoverageIntervals, + summaryType, summarySize, summaryValues); +} + +struct offsetSize +/* Simple file offset and file size. */ + { + bits64 offset; + bits64 size; + }; + +static int cmpOffsetSizeRef(const void *va, const void *vb) +/* Compare to sort slRef pointing to offsetSize. Sort is kind of hokey, + * but guarantees all items that are the same will be next to each other + * at least, which is all we care about. */ +{ +const struct slRef *a = *((struct slRef **)va); +const struct slRef *b = *((struct slRef **)vb); +return memcmp(a->val, b->val, sizeof(struct offsetSize)); +} + +static struct fileOffsetSize *fosFromRedundantBlockList(struct slRef **pBlockList, + boolean isSwapped) +/* Convert from list of references to offsetSize format to list of fileOffsetSize + * format, while removing redundancy. Sorts *pBlockList as a side effect. */ +{ +/* Sort input so it it easy to uniquify. */ +slSort(pBlockList, cmpOffsetSizeRef); +struct slRef *blockList = *pBlockList; + +/* Make new fileOffsetSize for each unique offsetSize. */ +struct fileOffsetSize *fosList = NULL, *fos; +struct offsetSize lastOffsetSize = {0,0}; +struct slRef *blockRef; +for (blockRef = blockList; blockRef != NULL; blockRef = blockRef->next) + { + if (memcmp(&lastOffsetSize, blockRef->val, sizeof(lastOffsetSize)) != 0) + { + memcpy(&lastOffsetSize, blockRef->val, sizeof(lastOffsetSize)); + AllocVar(fos); + if (isSwapped) + { + fos->offset = byteSwap64(lastOffsetSize.offset); + fos->size = byteSwap64(lastOffsetSize.size); + } + else + { + fos->offset = lastOffsetSize.offset; + fos->size = lastOffsetSize.size; + } + slAddHead(&fosList, fos); + } + } +slReverse(&fosList); +return fosList; +} + + +static struct fileOffsetSize *bigBedChunksMatchingName(struct bbiFile *bbi, + struct bptFile *index, char *name) +/* Get list of file chunks that match name. Can slFreeList this when done. */ +{ +struct slRef *blockList = bptFileFindMultiple(index, + name, strlen(name), sizeof(struct offsetSize)); +struct fileOffsetSize *fosList = fosFromRedundantBlockList(&blockList, bbi->isSwapped); +slRefFreeListAndVals(&blockList); +return fosList; +} + +static struct fileOffsetSize *bigBedChunksMatchingNames(struct bbiFile *bbi, + struct bptFile *index, char **names, int nameCount) +/* Get list of file chunks that match any of the names. Can slFreeList this when done. */ +{ +/* Go through all names and make a blockList that includes all blocks with any hit to any name. + * Many of these blocks will occur multiple times. */ +struct slRef *blockList = NULL; +int nameIx; +for (nameIx = 0; nameIx < nameCount; ++nameIx) + { + char *name = names[nameIx]; + struct slRef *oneList = bptFileFindMultiple(index, + name, strlen(name), sizeof(struct offsetSize)); + blockList = slCat(oneList, blockList); + } + +/* Create nonredundant list of blocks. */ +struct fileOffsetSize *fosList = fosFromRedundantBlockList(&blockList, bbi->isSwapped); + +/* Clean up and resturn result. */ +slRefFreeListAndVals(&blockList); +return fosList; +} + +typedef boolean (*BbFirstWordMatch)(char *line, int fieldIx, void *target); +/* A function that returns TRUE if first word in tab-separated line matches target. */ + +static void extractField(char *line, int fieldIx, char **retField, int *retFieldSize) +/* Go through tab separated line and figure out start and size of given field. */ +{ +int i; +fieldIx -= 3; /* Skip over chrom/start/end, which are not in line. */ +for (i=0; iisSwapped; +for (fos = fosList; fos != NULL; fos = fos->next) + { + /* Read in raw data */ + udcSeek(bbi->udc, fos->offset); + char *rawData = needLargeMem(fos->size); + udcRead(bbi->udc, rawData, fos->size); + + /* Optionally uncompress data, and set data pointer to uncompressed version. */ + char *uncompressedData = NULL; + char *data = NULL; + int dataSize = 0; + if (bbi->uncompressBufSize > 0) + { + data = uncompressedData = needLargeMem(bbi->uncompressBufSize); + dataSize = zUncompress(rawData, fos->size, uncompressedData, bbi->uncompressBufSize); + } + else + { + data = rawData; + dataSize = fos->size; + } + + /* Set up for "memRead" routines to more or less treat memory block like file */ + char *blockPt = data, *blockEnd = data + dataSize; + struct dyString *dy = dyStringNew(32); // Keep bits outside of chrom/start/end here + + + /* Read next record into local variables. */ + while (blockPt < blockEnd) + { + bits32 chromIx = memReadBits32(&blockPt, isSwapped); + bits32 s = memReadBits32(&blockPt, isSwapped); + bits32 e = memReadBits32(&blockPt, isSwapped); + int c; + dyStringClear(dy); + // TODO - can simplify this probably just to for (;;) {if ((c = *blockPt++) == 0) ... + while ((c = *blockPt++) >= 0) + { + if (c == 0) + break; + dyStringAppendC(dy, c); + } + if ((*matcher)(dy->string, fieldIx, target)) + { + lmAllocVar(lm, interval); + interval->start = s; + interval->end = e; + interval->rest = cloneString(dy->string); + interval->chromId = chromIx; + slAddHead(&intervalList, interval); + } + } + + /* Clean up temporary buffers. */ + dyStringFree(&dy); + freez(&uncompressedData); + freez(&rawData); + } +slReverse(&intervalList); +return intervalList; +} + + + +struct bigBedInterval *bigBedNameQuery(struct bbiFile *bbi, struct bptFile *index, + int fieldIx, char *name, struct lm *lm) +/* Return list of intervals matching file. These intervals will be allocated out of lm. */ +{ +struct fileOffsetSize *fosList = bigBedChunksMatchingName(bbi, index, name); +struct bigBedInterval *intervalList = bigBedIntervalsMatchingName(bbi, fosList, + bbWordMatchesName, fieldIx, name, lm); +slFreeList(&fosList); +return intervalList; +} + +struct bigBedInterval *bigBedMultiNameQuery(struct bbiFile *bbi, struct bptFile *index, + int fieldIx, char **names, int nameCount, struct lm *lm) +/* Fetch all records matching any of the names. Using given index on given field. + * Return list is allocated out of lm. */ +{ +/* Set up name index and get list of chunks that match any of our names. */ +struct fileOffsetSize *fosList = bigBedChunksMatchingNames(bbi, index, names, nameCount); + +/* Create hash of all names. */ +struct hash *hash = newHash(0); +int nameIx; +for (nameIx=0; nameIx < nameCount; ++nameIx) + hashAdd(hash, names[nameIx], NULL); + + +/* Get intervals where name matches hash target. */ +struct bigBedInterval *intervalList = bigBedIntervalsMatchingName(bbi, fosList, + bbWordIsInHash, fieldIx, hash, lm); + +/* Clean up and return results. */ +slFreeList(&fosList); +hashFree(&hash); +return intervalList; +} + +void bigBedIntervalListToBedFile(struct bbiFile *bbi, struct bigBedInterval *intervalList, FILE *f) +/* Write out big bed interval list to bed file, looking up chromosome. */ +{ +char chromName[bbi->chromBpt->keySize+1]; +int lastChromId = -1; +struct bigBedInterval *interval; +for (interval = intervalList; interval != NULL; interval = interval->next) + { + bbiCachedChromLookup(bbi, interval->chromId, lastChromId, chromName, sizeof(chromName)); + lastChromId = interval->chromId; + fprintf(f, "%s\t%u\t%u\t%s\n", chromName, interval->start, interval->end, interval->rest); + } +} + +int bigBedIntervalToRowLookupChrom(struct bigBedInterval *interval, + struct bigBedInterval *prevInterval, struct bbiFile *bbi, + char *chromBuf, int chromBufSize, char *startBuf, char *endBuf, char **row, int rowSize) +/* Convert bigBedInterval to array of chars equivalend to what you'd get by parsing the + * bed file. If you already know what chromosome the interval is on use the simpler + * bigBedIntervalToRow. This one will look up the chromosome based on the chromId field + * of the interval, which is relatively time consuming. To avoid doing this unnecessarily + * pass in a non-NULL prevInterval, and if the chromId is the same on prevInterval as this, + * it will avoid the lookup. The chromBufSize should be at greater or equal to + * bbi->chromBpt->keySize+1. The startBuf and endBuf are used to hold the ascii representation of + * start and end, and should be 16 bytes. Note that the interval->rest string will have zeroes + * inserted as a side effect. Returns number of fields in row. */ +{ +int lastChromId = (prevInterval == NULL ? -1 : prevInterval->chromId); +bbiCachedChromLookup(bbi, interval->chromId, lastChromId, chromBuf, chromBufSize); +return bigBedIntervalToRow(interval, chromBuf, startBuf, endBuf, row, rowSize); +} + +char *bigBedAutoSqlText(struct bbiFile *bbi) +/* Get autoSql text if any associated with file. Do a freeMem of this when done. */ +{ +if (bbi->asOffset == 0) + return NULL; +struct udcFile *f = bbi->udc; +udcSeek(f, bbi->asOffset); +return udcReadStringAndZero(f); +} + +struct asObject *bigBedAs(struct bbiFile *bbi) +/* Get autoSql object definition if any associated with file. */ +{ +if (bbi->asOffset == 0) + return NULL; +char *asText = bigBedAutoSqlText(bbi); +struct asObject *as = asParseText(asText); +freeMem(asText); +return as; +} + +struct asObject *bigBedAsOrDefault(struct bbiFile *bbi) +// Get asObject associated with bigBed - if none exists in file make it up from field counts. +{ +struct asObject *as = bigBedAs(bbi); +if (as == NULL) + as = asParseText(bedAsDef(bbi->definedFieldCount, bbi->fieldCount)); +return as; +} + +struct asObject *bigBedFileAsObjOrDefault(char *fileName) +// Get asObject associated with bigBed file, or the default. +{ +struct bbiFile *bbi = bigBedFileOpen(fileName); +if (bbi) + { + struct asObject *as = bigBedAsOrDefault(bbi); + bbiFileClose(&bbi); + return as; + } +return NULL; +} + +int bbFieldIndex(struct bbiFile *bbi, char* fieldName) +/* return the index of a given field */ +{ +if (fieldName==NULL) + return -1; +struct asObject *as = bigBedAsOrDefault(bbi); +if (as == NULL) + return -1; + +// search for field name, return index if found +struct asColumn *col = as->columnList; +int ix = 0; +for (;col != NULL;col=col->next, ix+=1) + if (sameString(col->name, fieldName)) + return ix; +return -1; +} + +int bbExtraFieldIndex(struct bbiFile *bbi, char* fieldName) +/* return the index of a given extra field */ +{ +if (fieldName==NULL) + return 0; +struct asObject *as = bigBedAsOrDefault(bbi); +if (as == NULL) + return 0; + +// search for field name, return index if found +struct asColumn *col = as->columnList; +int ix = 0; +for (;col != NULL;col=col->next, ix+=1) + if (sameString(col->name, fieldName)) + return max(ix-3, 0); // never return a negative value +return 0; +} + +bits64 bigBedItemCount(struct bbiFile *bbi) +/* Return total items in file. */ +{ +udcSeek(bbi->udc, bbi->unzoomedDataOffset); +return udcReadBits64(bbi->udc, bbi->isSwapped); +} + +struct slName *bigBedListExtraIndexes(struct bbiFile *bbi) +/* Return list of names of extra indexes beyond primary chrom:start-end one" */ +{ +struct udcFile *udc = bbi->udc; +boolean isSwapped = bbi->isSwapped; + +/* See if we have any extra indexes, and if so seek to there. */ +bits64 offset = bbi->extraIndexListOffset; +if (offset == 0) + return NULL; +udcSeek(udc, offset); + +/* Construct list of field that are being indexed. List is list of + * field numbers within asObj. */ +int i; +struct slInt *intList = NULL, *intEl; +for (i=0; iextraIndexCount; ++i) + { + bits16 fieldCount; + udcReadBits16(udc, isSwapped); // type + fieldCount = udcReadBits16(udc, isSwapped); + udcSeekCur(udc, sizeof(bits64)); // skip over fileOffset + udcSeekCur(udc, 4); // skip over reserved bits + if (fieldCount == 1) + { + bits16 fieldId = udcReadBits16(udc, isSwapped); + udcSeekCur(udc, 2); // skip over reserved bits + intEl = slIntNew(fieldId); + slAddHead(&intList, intEl); + } + else + { + warn("Not yet understanding indexes on multiple fields at once."); + internalErr(); + } + } + +/* Now have to make an asObject to find out name that corresponds to this field. */ +struct asObject *as = bigBedAsOrDefault(bbi); + +/* Make list of field names out of list of field numbers */ +struct slName *nameList = NULL; +for (intEl = intList; intEl != NULL; intEl = intEl->next) + { + struct asColumn *col = slElementFromIx(as->columnList, intEl->val); + if (col == NULL) + { + warn("Inconsistent bigBed file %s", bbi->fileName); + internalErr(); + } + slNameAddHead(&nameList, col->name); + } + +asObjectFree(&as); +return nameList; +} + +struct bptFile *bigBedOpenExtraIndex(struct bbiFile *bbi, char *fieldName, int *retFieldIx) +/* Return index associated with fieldName. Aborts if no such index. Optionally return + * index in a row of this field. */ +{ +struct udcFile *udc = bbi->udc; +boolean isSwapped = bbi->isSwapped; +struct asObject *as = bigBedAsOrDefault(bbi); +struct asColumn *col = asColumnFind(as, fieldName); +if (col == NULL) + errAbort("No field %s in %s", fieldName, bbi->fileName); +int colIx = slIxFromElement(as->columnList, col); +if (retFieldIx != NULL) + *retFieldIx = colIx; +asObjectFree(&as); + +/* See if we have any extra indexes, and if so seek to there. */ +bits64 offset = bbi->extraIndexListOffset; +if (offset == 0) + errAbort("%s has no indexes", bbi->fileName); +udcSeek(udc, offset); + +/* Go through each extra index and see if it's a match */ +int i; +for (i=0; iextraIndexCount; ++i) + { + bits16 type = udcReadBits16(udc, isSwapped); + bits16 fieldCount = udcReadBits16(udc, isSwapped); + bits64 fileOffset = udcReadBits64(udc, isSwapped); + udcSeekCur(udc, 4); // skip over reserved bits + + if (type != 0) + { + warn("Don't understand type %d", type); + internalErr(); + } + if (fieldCount == 1) + { + bits16 fieldId = udcReadBits16(udc, isSwapped); + udcSeekCur(udc, 2); // skip over reserved bits + if (fieldId == colIx) + { + udcSeek(udc, fileOffset); + struct bptFile *bpt = bptFileAttach(bbi->fileName, udc); + return bpt; + } + } + else + { + warn("Not yet understanding indexes on multiple fields at once."); + internalErr(); + } + } + +errAbort("%s is not indexed in %s", fieldName, bbi->fileName); +return NULL; +} + + + diff -Nru r-bioc-rtracklayer-1.48.0/src/ucsc/binRange.c r-bioc-rtracklayer-1.50.0/src/ucsc/binRange.c --- r-bioc-rtracklayer-1.48.0/src/ucsc/binRange.c 1970-01-01 00:00:00.000000000 +0000 +++ r-bioc-rtracklayer-1.50.0/src/ucsc/binRange.c 2020-10-27 17:24:51.000000000 +0000 @@ -0,0 +1,392 @@ +/* binRange Stuff to handle binning - which helps us restrict + * our attention to the parts of database that contain info + * about a particular window on a chromosome. This scheme + * will work without modification for chromosome sizes up + * to half a gigaBase. The finest sized bin is 128k (1<<17). + * The next coarsest is 8x as big (1<<13). There's a hierarchy + * of bins with the chromosome itself being the final bin. + * Features are put in the finest bin they'll fit in. + * + * This file is copyright 2002 Jim Kent, but license is hereby + * granted for all use - public, private or commercial. */ + +#include "common.h" +#include "binRange.h" + + +/* add one new level to get coverage past chrom sizes of 512 Mb + * effective limit is now the size of an integer since chrom start + * and end coordinates are always being used in int's == 2Gb-1 */ +static int binOffsetsExtended[] = + {4096+512+64+8+1, 512+64+8+1, 64+8+1, 8+1, 1, 0}; + +static int binOffsets[] = {512+64+8+1, 64+8+1, 8+1, 1, 0}; +#define _binFirstShift 17 /* How much to shift to get to finest bin. */ +#define _binNextShift 3 /* How much to shift to get to next larger bin. */ + +int binLevelsExtended() +/* Return number of levels to bins. */ +{ +return ArraySize(binOffsetsExtended); +} + +int binLevels() +/* Return number of levels to bins. */ +{ +return ArraySize(binOffsets); +} + +int binFirstShift() +/* Return amount to shift a number to get to finest bin. */ +{ +return _binFirstShift; +} + +int binNextShift() +/* Return amount to shift a number to get to next coarser bin. */ +{ +return _binNextShift; +} + +int binOffsetExtended(int level) +/* Return offset for bins of a given level. */ +{ +assert(level >= 0 && level < ArraySize(binOffsetsExtended)); +return binOffsetsExtended[level] + _binOffsetOldToExtended; +} + +int binOffset(int level) +/* Return offset for bins of a given level. */ +{ +assert(level >= 0 && level < ArraySize(binOffsets)); +return binOffsets[level]; +} + +static int binFromRangeStandard(int start, int end) +/* Given start,end in chromosome coordinates assign it + * a bin. There's a bin for each 128k segment, for each + * 1M segment, for each 8M segment, for each 64M segment, + * and for each chromosome (which is assumed to be less than + * 512M.) A range goes into the smallest bin it will fit in. */ +{ +int startBin = start, endBin = end-1, i; +startBin >>= _binFirstShift; +endBin >>= _binFirstShift; +for (i=0; i>= _binNextShift; + endBin >>= _binNextShift; + } +errAbort("start %d, end %d out of range in findBin (max is 512M)", start, end); +return 0; +} + +static int binFromRangeExtended(int start, int end) +/* Given start,end in chromosome coordinates assign it + * a bin. There's a bin for each 128k segment, for each + * 1M segment, for each 8M segment, for each 64M segment, + * for each 512M segment, and one top level bin for 4Gb. + * Note, since start and end are int's, the practical limit + * is up to 2Gb-1, and thus, only four result bins on the second + * level. + * A range goes into the smallest bin it will fit in. */ +{ +int startBin = start, endBin = end-1, i; +startBin >>= _binFirstShift; +endBin >>= _binFirstShift; +for (i=0; i>= _binNextShift; + endBin >>= _binNextShift; + } +errAbort("start %d, end %d out of range in findBin (max is 2Gb)", start, end); +return 0; +} + +int binFromRange(int start, int end) +/* return bin that this start-end segment is in */ +{ +if (end <= BINRANGE_MAXEND_512M) + return binFromRangeStandard(start, end); +else + return binFromRangeExtended(start, end); +} + +static int binFromRangeBinKeeperExtended(int start, int end) +/* This is just like binFromRangeExtended() above, but it doesn't limit + * the answers to the range from _binOffsetOldToExtended and up. + * It simply uses the whole new bin scheme as if it was the only + * one. + */ +{ +int startBin = start, endBin = end-1, i; +startBin >>= _binFirstShift; +endBin >>= _binFirstShift; +for (i=0; i>= _binNextShift; + endBin >>= _binNextShift; + } +errAbort("start %d, end %d out of range in findBin (max is 2Gb)", start, end); +return 0; +} + +struct binKeeper *binKeeperNew(int minPos, int maxPos) +/* Create new binKeeper that can cover range. */ +{ +int binCount; +struct binKeeper *bk; +if (minPos < 0 || maxPos < 0 || minPos > maxPos) + errAbort("bad range %d,%d in binKeeperNew", minPos, maxPos); + +binCount = binFromRangeBinKeeperExtended(maxPos-1, maxPos) + 1; +AllocVar(bk); +bk->minPos = minPos; +bk->maxPos = maxPos; +bk->binCount = binCount; +AllocArray(bk->binLists, binCount); +return bk; +} + +void binKeeperFree(struct binKeeper **pBk) +/* Free up a bin keeper. */ +{ +struct binKeeper *bk = *pBk; +if (bk != NULL) + { + int i; + for (i=0; ibinCount; ++i) + slFreeList(&bk->binLists[i]); + freeMem(bk->binLists); + freez(pBk); + } +} + +void binKeeperAdd(struct binKeeper *bk, int start, int end, void *val) +/* Add item to binKeeper. */ +{ +int bin; +struct binElement *el; +if (start < bk->minPos || end > bk->maxPos || start > end) + errAbort("(%d %d) out of range (%d %d) in binKeeperAdd", + start, end, bk->minPos, bk->maxPos); +bin = binFromRangeBinKeeperExtended(start, end); +assert(bin < bk->binCount); +AllocVar(el); +el->start = start; +el->end = end; +el->val = val; +slAddHead(&bk->binLists[bin], el); +} + +int binElementCmpStart(const void *va, const void *vb) +/* Compare to sort based on start. */ +{ +const struct binElement *a = *((struct binElement **)va); +const struct binElement *b = *((struct binElement **)vb); +return a->start - b->start; +} + +struct binElement *binKeeperFind(struct binKeeper *bk, int start, int end) +/* Return a list of all items in binKeeper that intersect range. + * Free this list with slFreeList. */ +{ +struct binElement *list = NULL, *newEl, *el; +int startBin, endBin; +int i,j; + +if (start < bk->minPos) start = bk->minPos; +if (end > bk->maxPos) end = bk->maxPos; +if (start >= end) return NULL; +startBin = (start>>_binFirstShift); +endBin = ((end-1)>>_binFirstShift); +for (i=0; ibinLists[j]; el != NULL; el = el->next) + { + if (rangeIntersection(el->start, el->end, start, end) > 0) + { + newEl = CloneVar(el); + slAddHead(&list, newEl); + } + } + } + startBin >>= _binNextShift; + endBin >>= _binNextShift; + } +return list; +} + +boolean binKeeperAnyOverlap(struct binKeeper *bk, int start, int end) +/* Return TRUE if start/end overlaps with any items in binKeeper. */ +{ +struct binElement *el; +int startBin, endBin; +int i,j; + +if (start < bk->minPos) start = bk->minPos; +if (end > bk->maxPos) end = bk->maxPos; +if (start >= end) return FALSE; +startBin = (start>>_binFirstShift); +endBin = ((end-1)>>_binFirstShift); +for (i=0; ibinLists[j]; el != NULL; el = el->next) + { + if (rangeIntersection(el->start, el->end, start, end) > 0) + { + return TRUE; + } + } + } + startBin >>= _binNextShift; + endBin >>= _binNextShift; + } +return FALSE; +} + +void binKeeperReplaceVal(struct binKeeper *bk, int start, int end, + void *oldVal, void *newVal) +/* Replace occurences of old val in range from start->end with newVal */ +{ +struct binElement *el; +int startBin, endBin; +int i,j; + +if (start < bk->minPos) start = bk->minPos; +if (end > bk->maxPos) end = bk->maxPos; +if (start >= end) return; +startBin = (start>>_binFirstShift); +endBin = ((end-1)>>_binFirstShift); +for (i=0; ibinLists[j]; el != NULL; el = el->next) + { + if (rangeIntersection(el->start, el->end, start, end) > 0) + { + if (el->val == oldVal) + { + el->val = newVal; + } + } + } + } + startBin >>= _binNextShift; + endBin >>= _binNextShift; + } +} + + +struct binElement *binKeeperFindSorted(struct binKeeper *bk, int start, int end) +/* Like binKeeperFind, but sort list on start coordinates. */ +{ +struct binElement *list = binKeeperFind(bk, start, end); +slSort(&list, binElementCmpStart); +return list; +} + +struct binElement *binKeeperFindAll(struct binKeeper *bk) +/* Get all elements sorted. */ +{ +return binKeeperFindSorted(bk, bk->minPos, bk->maxPos); +} + +struct binElement *binKeeperFindLowest(struct binKeeper *bk, int start, int end) +/* Find the lowest overlapping range. Quick even if search range large */ +{ +struct binElement *first = NULL, *el; +int startBin = (start>>_binFirstShift), endBin = ((end-1)>>_binFirstShift); +int i,j; + +/* Search the possible range of bins at each level, looking for lowest. Once + * an overlaping range is found at a level, continue with next level, however + * must search an entire bin as they are not ordered. */ +for (i=0; ibinLists[j]; el != NULL; el = el->next) + { + if ((rangeIntersection(el->start, el->end, start, end) > 0) + && ((first == NULL) || (el->start < first->start) + || ((el->start == first->start) + && (el->end < first->end)))) + { + first = el; + foundOne = TRUE; + } + } + } + startBin >>= _binNextShift; + endBin >>= _binNextShift; + } +return first; +} + + +void binKeeperRemove(struct binKeeper *bk, int start, int end, void *val) +/* Remove item from binKeeper. */ +{ +int bin = binFromRangeBinKeeperExtended(start, end); +struct binElement **pList = &bk->binLists[bin], *newList = NULL, *el, *next; +for (el = *pList; el != NULL; el = next) + { + next = el->next; + if (el->val == val && el->start == start && el->end == end) + { + freeMem(el); + } + else + { + slAddHead(&newList, el); + } + } +slReverse(&newList); +*pList = newList; +} + +struct binKeeperCookie binKeeperFirst(struct binKeeper *bk) +/* Return an object to use by binKeeperNext() to traverse the binElements. + * The first call to binKeeperNext() will return the first entry in the + * table. */ +{ +struct binKeeperCookie cookie; +cookie.bk = bk; +cookie.blIdx = 0; +cookie.nextBel = bk->binLists[0]; +return cookie; +} + +struct binElement* binKeeperNext(struct binKeeperCookie *cookie) +/* Return the next entry in the binKeeper table. */ +{ +/* if we don't have a next, move down bin list until we find one */ +while ((cookie->nextBel == NULL) + && (++cookie->blIdx < cookie->bk->binCount)) + cookie->nextBel = cookie->bk->binLists[cookie->blIdx]; +if (cookie->blIdx >= cookie->bk->binCount) + return NULL; /* no more */ +else + { + struct binElement* bel = cookie->nextBel; + cookie->nextBel = cookie->nextBel->next; + return bel; + } +} diff -Nru r-bioc-rtracklayer-1.48.0/src/ucsc/binRange.h r-bioc-rtracklayer-1.50.0/src/ucsc/binRange.h --- r-bioc-rtracklayer-1.48.0/src/ucsc/binRange.h 1970-01-01 00:00:00.000000000 +0000 +++ r-bioc-rtracklayer-1.50.0/src/ucsc/binRange.h 2020-10-27 17:24:51.000000000 +0000 @@ -0,0 +1,120 @@ +#ifndef BINRANGE_H +#define BINRANGE_H + +/* binRange Stuff to handle binning - which helps us restrict + * our attention to the parts of database that contain info + * about a particular window on a chromosome. This scheme + * will work without modification for chromosome sizes up + * to half a gigaBase. The finest sized bin is 128k (1<<17). + * The next coarsest is 8x as big (1<<13). There's a hierarchy + * of bins with the chromosome itself being the final bin. + * Features are put in the finest bin they'll fit in. + * + * This file is copyright 2002 Jim Kent, but license is hereby + * granted for all use - public, private or commercial. */ + +#define BINRANGE_MAXEND_512M (512*1024*1024) +#define _binOffsetOldToExtended 4681 + +int binLevelsExtended(); +/* Return number of levels to bins. */ + +int binLevels(); +/* Return number of levels to bins. */ + +int binFirstShift(); +/* Return amount to shift a number to get to finest bin. */ + +int binNextShift(); +/* Return amount to shift a number to get to next coarser bin. */ + +int binOffsetExtended(int level); +/* Return offset for bins of a given level. */ + +int binOffset(int level); +/* Return offset for bins of a given level. */ + +/***** And now for some higher level stuff - useful for binning + ***** things in memory. ******/ + +int binFromRange(int start, int end); +/* Given start,end in chromosome coordinates assign it + * a bin. There's a bin for each 128k segment, for each + * 1M segment, for each 8M segment, for each 64M segment, + * and for each chromosome (which is assumed to be less than + * 512M.) A range goes into the smallest bin it will fit in. */ + +struct binElement +/* An element in a bin. */ + { + struct binElement *next; + int start, end; /* 0 based, half open range */ + void *val; /* Actual bin item. */ + }; + +int binElementCmpStart(const void *va, const void *vb); +/* Compare to sort based on start. */ + +struct binKeeper +/* This keeps things in bins in memory */ + { + struct binKeeper *next; + int minPos; /* Minimum position to bin. */ + int maxPos; /* Maximum position to bin. */ + int binCount; /* Count of bins. */ + struct binElement **binLists; /* A list for each bin. */ + }; + +struct binKeeperCookie +/* used by binKeeperFirst/binKeeperNext in tracking location in traversing bins */ + { + struct binKeeper *bk; /* binKeeper we are associated with */ + int blIdx; /* current bin list index */ + struct binElement *nextBel; /* next binElement */ + }; + +struct binKeeper *binKeeperNew(int minPos, int maxPos); +/* Create new binKeeper that can cover range. */ + +void binKeeperFree(struct binKeeper **pBk); +/* Free up a bin keeper. */ + +void binKeeperAdd(struct binKeeper *bk, int start, int end, void *val); +/* Add item to binKeeper. */ + +void binKeeperRemove(struct binKeeper *bk, int start, int end, void *val); +/* Remove item from binKeeper. */ + +struct binElement *binKeeperFind(struct binKeeper *bk, int start, int end); +/* Return a list of all items in binKeeper that intersect range. + * Free this list with slFreeList. */ + +struct binElement *binKeeperFindSorted(struct binKeeper *bk, int start, int end); +/* Like binKeeperFind, but sort list on start coordinates. */ + +struct binElement *binKeeperFindAll(struct binKeeper *bk); +/* Get all elements sorted. */ + +boolean binKeeperAnyOverlap(struct binKeeper *bk, int start, int end); +/* Return TRUE if start/end overlaps with any items in binKeeper. */ + +void binKeeperReplaceVal(struct binKeeper *bk, int start, int end, + void *oldVal, void *newVal); +/* Replace occurences of old val in range from start->end with newVal */ + +struct binElement *binKeeperFindLowest(struct binKeeper *bk, int start, int end); +/* Find the lowest overlapping range. Quick even if search range large */ + +void binKeeperRemove(struct binKeeper *bk, int start, int end, void *val); +/* Remove item from binKeeper. */ + +struct binKeeperCookie binKeeperFirst(struct binKeeper *bk); +/* Return an object to use by binKeeperNext() to traverse the binElements. + * The first call to binKeeperNext() will return the first entry in the + * table. */ + +struct binElement* binKeeperNext(struct binKeeperCookie *cookie); +/* Return the next entry in the binKeeper table. */ + +#endif /* BINRANGE_H */ + diff -Nru r-bioc-rtracklayer-1.48.0/src/ucsc/htmlColor.c r-bioc-rtracklayer-1.50.0/src/ucsc/htmlColor.c --- r-bioc-rtracklayer-1.48.0/src/ucsc/htmlColor.c 1970-01-01 00:00:00.000000000 +0000 +++ r-bioc-rtracklayer-1.50.0/src/ucsc/htmlColor.c 2020-10-27 17:24:51.000000000 +0000 @@ -0,0 +1,101 @@ +/* HTML colors */ + +/* Copyright (C) 2017 The Regents of the University of California + * See README in this or parent directory for licensing information. */ + +#include "common.h" + +struct htmlColor { + char *name; + unsigned rgb; +}; + +static struct htmlColor htmlColors[] = { +/* The 16 HTML basic colors, as per //www.w3.org/TR/css3-color/#html4 */ + { "black", 0 }, + { "silver", 0xc0c0c0 }, + { "gray", 0x808080 }, + { "white", 0xffffff }, + { "maroon", 0x800000 }, + { "red", 0xff0000 }, + { "purple", 0x800080 }, + { "fuchsia", 0xff00ff }, + { "green", 0x008000 }, + { "lime", 0x00ff00 }, + { "olive", 0x808000 }, + { "yellow", 0xffff00 }, + { "navy", 0x000080 }, + { "blue", 0x0000ff }, + { "teal", 0x008080 }, + { "aqua", 0x00ffff } +}; + +int htmlColorCount() +/* Return count of defined HTML colors */ +{ +return (sizeof(htmlColors) / (sizeof(struct htmlColor))); +} + +struct slName *htmlColorNames() +/* Return list of defined HTML colors */ +{ +int count = htmlColorCount(); +int i; +struct slName *colors = NULL; +for (i=0; i> 16) & 0xff; + if (g != NULL) + *g = (value >> 8) & 0xff; + if (b != NULL) + *b = value & 0xff; +} diff -Nru r-bioc-rtracklayer-1.48.0/src/ucsc/htmlColor.h r-bioc-rtracklayer-1.50.0/src/ucsc/htmlColor.h --- r-bioc-rtracklayer-1.48.0/src/ucsc/htmlColor.h 1970-01-01 00:00:00.000000000 +0000 +++ r-bioc-rtracklayer-1.50.0/src/ucsc/htmlColor.h 2020-10-27 17:24:51.000000000 +0000 @@ -0,0 +1,28 @@ +/* HTML colors */ + +/* Copyright (C) 2017 The Regents of the University of California + * See README in this or parent directory for licensing information. */ + +#ifndef HTMLCOLOR_H +#define HTMLCOLOR_H + +int htmlColorCount(); +/* Return count of defined HTML colors */ + +boolean htmlColorExists(char *name); +/* Determine if color name is one of the defined HTML basic set */ + +struct slName *htmlColorNames(); +/* Return list of defined HTML colors */ + +boolean htmlColorForName(char *name, unsigned *value); +/* Lookup color for name. Return false if not a valid color name */ + +boolean htmlColorForCode(char *code, unsigned *value); +/* Convert value to decimal and return true if code is valid #NNNNNN hex code */ + +void htmlColorToRGB(unsigned value, int *r, int *g, int *b); +/* Convert an unsigned RGB value into separate R, G, and B components */ + +#endif + diff -Nru r-bioc-rtracklayer-1.48.0/src/ucsc/memgfx.c r-bioc-rtracklayer-1.50.0/src/ucsc/memgfx.c --- r-bioc-rtracklayer-1.48.0/src/ucsc/memgfx.c 1970-01-01 00:00:00.000000000 +0000 +++ r-bioc-rtracklayer-1.50.0/src/ucsc/memgfx.c 2020-10-27 17:24:51.000000000 +0000 @@ -0,0 +1,25 @@ +/* memgfx - routines for drawing on bitmaps in memory. + * Currently limited to 256 color bitmaps. + * + * This file is copyright 2002 Jim Kent, but license is hereby + * granted for all use - public, private or commercial. */ + +#include "memgfx.h" + + +struct rgbColor colorIxToRgb(int colorIx) +/* Return rgb value at color index. */ +{ +static struct rgbColor rgb; +#ifdef MEMGFX_BIGENDIAN +rgb.r = (colorIx >> 24) & 0xff; +rgb.g = (colorIx >> 16) & 0xff; +rgb.b = (colorIx >> 8) & 0xff; +#else +rgb.r = (colorIx >> 0) & 0xff; +rgb.g = (colorIx >> 8) & 0xff; +rgb.b = (colorIx >> 16) & 0xff; +#endif +return rgb; +} + diff -Nru r-bioc-rtracklayer-1.48.0/src/ucsc/memgfx.h r-bioc-rtracklayer-1.50.0/src/ucsc/memgfx.h --- r-bioc-rtracklayer-1.48.0/src/ucsc/memgfx.h 1970-01-01 00:00:00.000000000 +0000 +++ r-bioc-rtracklayer-1.50.0/src/ucsc/memgfx.h 2020-10-27 17:24:51.000000000 +0000 @@ -0,0 +1,60 @@ +/* Memgfx - stuff to do graphics in memory buffers. + * Typically will just write these out as .gif or .png files. + * This stuff is byte-a-pixel for simplicity. + * It can do 256 colors. + * + * This file is copyright 2000 Jim Kent, but license is hereby + * granted for all use - public, private or commercial. */ + +#ifndef MEMGFX_H +#define MEMGFX_H + +#if defined(__sgi__) || defined(__sgi) || defined(__powerpc__) || defined(sparc) || defined(__ppc__) || defined(__s390__) || defined(__s390x__) + +// BIGENDIAN machines: + +#define MEMGFX_BIGENDIAN 1 +#define MG_WHITE 0xffffffff +#define MG_BLACK 0x000000ff +#define MG_RED 0xff0000ff +#define MG_GREEN 0x00ff00ff +#define MG_BLUE 0x0000ffff +#define MG_CYAN 0x00ffffff +#define MG_MAGENTA 0xff00ffff +#define MG_YELLOW 0xffff00ff +#define MG_GRAY 0x808080ff + +#define MAKECOLOR_32(r,g,b) (((unsigned int)0xff) | ((unsigned int)b<<8) | ((unsigned int)g << 16) | ((unsigned int)r << 24)) +#define COLOR_32_RED(c) (((c)>>24)&0xff) +#define COLOR_32_GREEN(c) (((c)>>16)&0xff) +#define COLOR_32_BLUE(c) (((c)>>8)&0xff) + +#else + +// LITTLE ENDIAN machines: + +#define MG_WHITE 0xffffffff +#define MG_BLACK 0xff000000 +#define MG_RED 0xff0000ff +#define MG_GREEN 0xff00ff00 +#define MG_BLUE 0xffff0000 +#define MG_CYAN 0xffffff00 +#define MG_MAGENTA 0xffff00ff +#define MG_YELLOW 0xff00ffff +#define MG_GRAY 0xff808080 + +#define MAKECOLOR_32(r,g,b) (((unsigned int)0xff<<24) | ((unsigned int)b<<16) | ((unsigned int)g << 8) | (unsigned int)r) +#define COLOR_32_RED(c) ((c)&0xff) +#define COLOR_32_GREEN(c) (((c)>>8)&0xff) +#define COLOR_32_BLUE(c) (((c)>>16)&0xff) +#endif + +struct rgbColor + { + unsigned char r, g, b; + }; + +struct rgbColor colorIxToRgb(int colorIx); +/* Return rgb value at color index. */ + +#endif /* MEMGFX_H */ diff -Nru r-bioc-rtracklayer-1.48.0/src/ucsc/sqlList.c r-bioc-rtracklayer-1.50.0/src/ucsc/sqlList.c --- r-bioc-rtracklayer-1.48.0/src/ucsc/sqlList.c 1970-01-01 00:00:00.000000000 +0000 +++ r-bioc-rtracklayer-1.50.0/src/ucsc/sqlList.c 2020-10-27 17:24:51.000000000 +0000 @@ -0,0 +1,1293 @@ +/* Stuff for processing comma separated lists - a little long so + * in a separate module from jksql.c though interface is still + * in jksql.c. + * + * This file is copyright 2002 Jim Kent, but license is hereby + * granted for all use - public, private or commercial. */ + +/* The various static routines sqlStaticArray are NOT thread-safe. */ + +#include "common.h" +#include "sqlNum.h" +#include "sqlList.h" +#include "dystring.h" +#include "hash.h" + + +int sqlByteArray(char *s, signed char *array, int arraySize) +/* Convert comma separated list of numbers to an array. Pass in + * array an max size of array. */ +{ +unsigned count = 0; +for (;;) + { + char *e; + if (s == NULL || s[0] == 0 || count == arraySize) + break; + e = strchr(s, ','); + if (e != NULL) + *e++ = 0; + array[count++] = sqlSigned(s); + s = e; + } +return count; +} + +void sqlByteStaticArray(char *s, signed char **retArray, int *retSize) +/* Convert comma separated list of numbers to an array which will be + * overwritten next call to this function, but need not be freed. */ +{ +static signed char *array = NULL; +static unsigned alloc = 0; +unsigned count = 0; + +for (;;) + { + char *e; + if (s == NULL || s[0] == 0) + break; + e = strchr(s, ','); + if (e != NULL) + *e++ = 0; + if (count >= alloc) + { + if (alloc == 0) + alloc = 64; + else + alloc <<= 1; + ExpandArray(array, count, alloc); + } + array[count++] = sqlSigned(s); + s = e; + } +*retSize = count; +*retArray = array; +} + +void sqlByteDynamicArray(char *s, signed char **retArray, int *retSize) +/* Convert comma separated list of numbers to an dynamically allocated + * array, which should be freeMem()'d when done. Thread-safe. */ +{ +signed char *array = NULL; +int count = 0; + +if (s) + { + count = countSeparatedItems(s, ','); + if (count > 0) + { + AllocArray(array, count); + count = 0; + for (;;) + { + array[count++] = sqlSignedInList(&s); + if (*s++ == 0) + break; + if (*s == 0) + break; + } + } + } +*retArray = array; +*retSize = count; +} + +/*-------------------------*/ + +int sqlUbyteArray(char *s, unsigned char *array, int arraySize) +/* Convert comma separated list of numbers to an array. Pass in + * array an max size of array. */ +{ +unsigned count = 0; +for (;;) + { + char *e; + if (s == NULL || s[0] == 0 || count == arraySize) + break; + e = strchr(s, ','); + if (e != NULL) + *e++ = 0; + array[count++] = sqlUnsigned(s); + s = e; + } +return count; +} + +void sqlUbyteStaticArray(char *s, unsigned char **retArray, int *retSize) +/* Convert comma separated list of numbers to an array which will be + * overwritten next call to this function, but need not be freed. */ +{ +static unsigned char *array = NULL; +static unsigned alloc = 0; +unsigned count = 0; + +for (;;) + { + char *e; + if (s == NULL || s[0] == 0) + break; + e = strchr(s, ','); + if (e != NULL) + *e++ = 0; + if (count >= alloc) + { + if (alloc == 0) + alloc = 64; + else + alloc <<= 1; + ExpandArray(array, count, alloc); + } + array[count++] = sqlUnsigned(s); + s = e; + } +*retSize = count; +*retArray = array; +} + +void sqlUbyteDynamicArray(char *s, unsigned char **retArray, int *retSize) +/* Convert comma separated list of numbers to an dynamically allocated + * array, which should be freeMem()'d when done. Thread-safe. */ +{ +unsigned char *array = NULL; +int count = 0; + +if (s) + { + count = countSeparatedItems(s, ','); + if (count > 0) + { + AllocArray(array, count); + count = 0; + for (;;) + { + array[count++] = sqlUnsignedInList(&s); + if (*s++ == 0) + break; + if (*s == 0) + break; + } + } + } +*retArray = array; +*retSize = count; +} + +/*-------------------------*/ + +int sqlCharArray(char *s, char *array, int arraySize) +/* Convert comma separated list of chars to an array. Pass in + * array and max size of array. */ +{ +unsigned count = 0; +for (;;) + { + char *e; + if (s == NULL || s[0] == 0 || count == arraySize) + break; + e = strchr(s, ','); + if (e != NULL) + *e++ = 0; + array[count++] = s[0]; + s = e; + } +return count; +} + +void sqlCharStaticArray(char *s, char **retArray, int *retSize) +/* Convert comma separated list of chars to an array which will be + * overwritten next call to this function, but need not be freed. */ +{ +static char *array = NULL; +static unsigned alloc = 0; +unsigned count = 0; + +for (;;) + { + char *e; + if (s == NULL || s[0] == 0) + break; + e = strchr(s, ','); + if (e != NULL) + *e++ = 0; + if (count >= alloc) + { + if (alloc == 0) + alloc = 64; + else + alloc <<= 1; + ExpandArray(array, count, alloc); + } + array[count++] = s[0]; + s = e; + } +*retSize = count; +*retArray = array; +} + +void sqlCharDynamicArray(char *s, char **retArray, int *retSize) +/* Convert comma separated list of chars to a dynamically allocated + * array, which should be freeMem()'d when done. Thread-safe. */ +{ +char *array = NULL; +int count = 0; + +if (s) + { + count = countSeparatedItems(s, ','); + if (count > 0) + { + AllocArray(array, count); + count = 0; + for (;;) + { + if (*s == ',') + errAbort("Empty element in list. Each element should contain one character."); + array[count++] = *s++; + if (!(*s == 0 || *s == ',')) + { + --s; + char *e = strchr(s, ','); + if (e) + *e = 0; + errAbort("Invalid character: %s", s); + } + if (*s++ == 0) + break; + if (*s == 0) + break; + } + } + } +*retArray = array; +*retSize = count; +} + +/*-------------------------*/ + +int sqlShortArray(char *s, short *array, int arraySize) +/* Convert comma separated list of numbers to an array. Pass in + * array an max size of array. */ +{ +unsigned count = 0; +for (;;) + { + char *e; + if (s == NULL || s[0] == 0 || count == arraySize) + break; + e = strchr(s, ','); + if (e != NULL) + *e++ = 0; + array[count++] = sqlSigned(s); + s = e; + } +return count; +} + +void sqlShortStaticArray(char *s, short **retArray, int *retSize) +/* Convert comma separated list of numbers to an array which will be + * overwritten next call to this function, but need not be freed. */ +{ +static short *array = NULL; +static unsigned alloc = 0; +unsigned count = 0; + +for (;;) + { + char *e; + if (s == NULL || s[0] == 0) + break; + e = strchr(s, ','); + if (e != NULL) + *e++ = 0; + if (count >= alloc) + { + if (alloc == 0) + alloc = 64; + else + alloc <<= 1; + ExpandArray(array, count, alloc); + } + array[count++] = sqlSigned(s); + s = e; + } +*retSize = count; +*retArray = array; +} + +void sqlShortDynamicArray(char *s, short **retArray, int *retSize) +/* Convert comma separated list of numbers to an dynamically allocated + * array, which should be freeMem()'d when done. Thread-safe. */ +{ +short *array = NULL; +int count = 0; + +if (s) + { + count = countSeparatedItems(s, ','); + if (count > 0) + { + AllocArray(array, count); + count = 0; + for (;;) + { + array[count++] = sqlSignedInList(&s); + if (*s++ == 0) + break; + if (*s == 0) + break; + } + } + } +*retArray = array; +*retSize = count; +} + +/*-------------------------*/ + +int sqlUshortArray(char *s, unsigned short *array, int arraySize) +/* Convert comma separated list of numbers to an array. Pass in + * array an max size of array. */ +{ +unsigned count = 0; +for (;;) + { + char *e; + if (s == NULL || s[0] == 0 || count == arraySize) + break; + e = strchr(s, ','); + if (e != NULL) + *e++ = 0; + array[count++] = sqlUnsigned(s); + s = e; + } +return count; +} + +void sqlUshortStaticArray(char *s, unsigned short **retArray, int *retSize) +/* Convert comma separated list of numbers to an array which will be + * overwritten next call to this function, but need not be freed. */ +{ +static unsigned short *array = NULL; +static unsigned alloc = 0; +unsigned count = 0; + +for (;;) + { + char *e; + if (s == NULL || s[0] == 0) + break; + e = strchr(s, ','); + if (e != NULL) + *e++ = 0; + if (count >= alloc) + { + if (alloc == 0) + alloc = 64; + else + alloc <<= 1; + ExpandArray(array, count, alloc); + } + array[count++] = sqlUnsigned(s); + s = e; + } +*retSize = count; +*retArray = array; +} + +void sqlUshortDynamicArray(char *s, unsigned short **retArray, int *retSize) +/* Convert comma separated list of numbers to an dynamically allocated + * array, which should be freeMem()'d when done. Thread-safe. */ +{ +unsigned short *array = NULL; +int count = 0; + +if (s) + { + count = countSeparatedItems(s, ','); + if (count > 0) + { + AllocArray(array, count); + count = 0; + for (;;) + { + array[count++] = sqlUnsignedInList(&s); + if (*s++ == 0) + break; + if (*s == 0) + break; + } + } + } +*retArray = array; +*retSize = count; +} + +/*-------------------------*/ +int sqlDoubleArray(char *s, double *array, int maxArraySize) +/* Convert comma separated list of floating point numbers to an array. + * Pass in array and max size of array. */ +{ +unsigned count = 0; +for (;;) + { + char *e; + if (s == NULL || s[0] == 0 || count == maxArraySize) + break; + e = strchr(s, ','); + if (e != NULL) + *e++ = 0; + array[count++] = atof(s); + s = e; + } +return count; +} + +double sqlSumDoublesCommaSep(char *s) +/* Return sum of double values in a comma-separated list */ +{ +int count = 0; +char *p = s; +while (*p) + if (*p++ == ',') + count++; +double *array = NULL; +int arraySize = count + 1; +AllocArray(array, arraySize); +char *t = cloneString(s); +count = sqlDoubleArray(cloneString(s), array, arraySize); +freez(&t); +int i; +double sum = 0.0; +for (i = 0; i < count; i++) + sum += array[i]; +return sum; +} + +int sqlFloatArray(char *s, float *array, int maxArraySize) +/* Convert comma separated list of floating point numbers to an array. + * Pass in array and max size of array. */ +{ +unsigned count = 0; +for (;;) + { + char *e; + if (s == NULL || s[0] == 0 || count == maxArraySize) + break; + e = strchr(s, ','); + if (e != NULL) + *e++ = 0; + array[count++] = atof(s); + s = e; + } +return count; +} + +void sqlDoubleStaticArray(char *s, double **retArray, int *retSize) +/* Convert comma separated list of numbers to an array which will be + * overwritten next call to this function, but need not be freed. */ +{ +static double *array = NULL; +static unsigned alloc = 0; +unsigned count = 0; + +for (;;) + { + char *e; + if (s == NULL || s[0] == 0) + break; + e = strchr(s, ','); + if (e != NULL) + *e++ = 0; + if (count >= alloc) + { + if (alloc == 0) + alloc = 64; + else + alloc <<= 1; + ExpandArray(array, count, alloc); + } + array[count++] = atof(s); + s = e; + } +*retSize = count; +*retArray = array; +} + +void sqlFloatStaticArray(char *s, float **retArray, int *retSize) +/* Convert comma separated list of numbers to an array which will be + * overwritten next call to this function, but need not be freed. */ +{ +static float *array = NULL; +static unsigned alloc = 0; +unsigned count = 0; + +for (;;) + { + char *e; + if (s == NULL || s[0] == 0) + break; + e = strchr(s, ','); + if (e != NULL) + *e++ = 0; + if (count >= alloc) + { + if (alloc == 0) + alloc = 128; + else + alloc <<= 1; + ExpandArray(array, count, alloc); + } + array[count++] = atof(s); + s = e; + } +*retSize = count; +*retArray = array; +} + +void sqlDoubleDynamicArray(char *s, double **retArray, int *retSize) +/* Convert comma separated list of numbers to an dynamically allocated + * array, which should be freeMem()'d when done. Thread-safe.*/ +{ +double *array = NULL; +int count = 0; + +if (s) + { + count = countSeparatedItems(s, ','); + if (count > 0) + { + AllocArray(array, count); + count = 0; + for (;;) + { + array[count++] = sqlDoubleInList(&s); + if (*s++ == 0) + break; + if (*s == 0) + break; + } + } + } +*retArray = array; +*retSize = count; +} + +void sqlFloatDynamicArray(char *s, float **retArray, int *retSize) +/* Convert comma separated list of numbers to an dynamically allocated + * array, which should be freeMem()'d when done. Thread-safe. */ +{ +float *array = NULL; +int count = 0; + +if (s) + { + count = countSeparatedItems(s, ','); + if (count > 0) + { + AllocArray(array, count); + count = 0; + for (;;) + { + array[count++] = sqlFloatInList(&s); + if (*s++ == 0) + break; + if (*s == 0) + break; + } + } + } +*retArray = array; +*retSize = count; +} + +/*-------------------------*/ + +int sqlUnsignedArray(char *s, unsigned *array, int arraySize) +/* Convert comma separated list of numbers to an array. Pass in + * array and max size of array. */ +{ +unsigned count = 0; +for (;;) + { + char *e; + if (s == NULL || s[0] == 0 || count == arraySize) + break; + e = strchr(s, ','); + if (e != NULL) + *e++ = 0; + array[count++] = sqlUnsigned(s); + s = e; + } +return count; +} + +void sqlUnsignedStaticArray(char *s, unsigned **retArray, int *retSize) +/* Convert comma separated list of numbers to an array which will be + * overwritten next call to this function, but need not be freed. */ +{ +static unsigned *array = NULL; +static unsigned alloc = 0; +unsigned count = 0; + +for (;;) + { + char *e; + if (s == NULL || s[0] == 0) + break; + e = strchr(s, ','); + if (e != NULL) + *e++ = 0; + if (count >= alloc) + { + if (alloc == 0) + alloc = 64; + else + alloc <<= 1; + ExpandArray(array, count, alloc); + } + array[count++] = sqlUnsigned(s); + s = e; + } +*retSize = count; +*retArray = array; +} + +void sqlUnsignedDynamicArray(char *s, unsigned **retArray, int *retSize) +/* Convert comma separated list of numbers to an dynamically allocated + * array, which should be freeMem()'d when done. Thread-safe. */ +{ +unsigned *array = NULL; +int count = 0; + +if (s) + { + count = countSeparatedItems(s, ','); + if (count > 0) + { + AllocArray(array, count); + count = 0; + for (;;) + { + array[count++] = sqlUnsignedInList(&s); + if (*s++ == 0) + break; + if (*s == 0) + break; + } + } + } +*retArray = array; +*retSize = count; +} + +/*-------------------------*/ + +int sqlSignedArray(char *s, int *array, int arraySize) +/* Convert comma separated list of numbers to an array. Pass in + * array an max size of array. */ +{ +int count = 0; +for (;;) + { + char *e; + if (s == NULL || s[0] == 0 || count == arraySize) + break; + e = strchr(s, ','); + if (e != NULL) + *e++ = 0; + array[count++] = sqlSigned(s); + s = e; + } +return count; +} + +void sqlSignedStaticArray(char *s, int **retArray, int *retSize) +/* Convert comma separated list of numbers to an array which will be + * overwritten next call to this function, but need not be freed. */ +{ +static int *array = NULL; +static int alloc = 0; +int count = 0; + +for (;;) + { + char *e; + if (s == NULL || s[0] == 0) + break; + e = strchr(s, ','); + if (e != NULL) + *e++ = 0; + if (count >= alloc) + { + if (alloc == 0) + alloc = 64; + else + alloc <<= 1; + ExpandArray(array, count, alloc); + } + array[count++] = sqlSigned(s); + s = e; + } +*retSize = count; +*retArray = array; +} + +void sqlSignedDynamicArray(char *s, int **retArray, int *retSize) +/* Convert comma separated list of numbers to an dynamically allocated + * array, which should be freeMem()'d when done. Thread-safe. */ +{ +int *array = NULL; +int count = 0; + +if (s) + { + count = countSeparatedItems(s, ','); + if (count > 0) + { + AllocArray(array, count); + count = 0; + for (;;) + { + array[count++] = sqlSignedInList(&s); + if (*s++ == 0) + break; + if (*s == 0) + break; + } + } + } +*retArray = array; +*retSize = count; +} + + +/*-------------------------*/ + +int sqlLongLongArray(char *s, long long *array, int arraySize) +/* Convert comma separated list of numbers to an array. Pass in + * array and max size of array. */ +{ +unsigned count = 0; +for (;;) + { + char *e; + if (s == NULL || s[0] == 0 || count == arraySize) + break; + e = strchr(s, ','); + if (e != NULL) + *e++ = 0; + array[count++] = sqlLongLong(s); + s = e; + } +return count; +} + +void sqlLongLongStaticArray(char *s, long long **retArray, int *retSize) +/* Convert comma separated list of numbers to an array which will be + * overwritten next call to this function, but need not be freed. */ +{ +static long long *array = NULL; +static unsigned alloc = 0; +unsigned count = 0; + +for (;;) + { + char *e; + if (s == NULL || s[0] == 0) + break; + e = strchr(s, ','); + if (e != NULL) + *e++ = 0; + if (count >= alloc) + { + if (alloc == 0) + alloc = 64; + else + alloc <<= 1; + ExpandArray(array, count, alloc); + } + array[count++] = sqlLongLong(s); + s = e; + } +*retSize = count; +*retArray = array; +} + +void sqlLongLongDynamicArray(char *s, long long **retArray, int *retSize) +/* Convert comma separated list of numbers to an dynamically allocated + * array, which should be freeMem()'d when done. Thread-safe. */ +{ +long long *array = NULL; +int count = 0; + +if (s) + { + count = countSeparatedItems(s, ','); + if (count > 0) + { + AllocArray(array, count); + count = 0; + for (;;) + { + array[count++] = sqlLongLongInList(&s); + if (*s++ == 0) + break; + if (*s == 0) + break; + } + } + } +*retArray = array; +*retSize = count; +} + +/*-------------------------*/ + + +int sqlStringArray(char *s, char **array, int maxArraySize) +/* Convert comma separated list of strings to an array. Pass in + * array and max size of array. Returns actual size*/ +{ +int count = 0; +for (;;) + { + char *e; + if (s == NULL || s[0] == 0 || count == maxArraySize) + break; + e = strchr(s, ','); + if (e != NULL) + *e++ = 0; + array[count++] = s; + s = e; + } +return count; +} + +void sqlStringStaticArray(char *s, char ***retArray, int *retSize) +/* Convert comma separated list of strings to an array which will be + * overwritten next call to this function, but need not be freed. */ +{ +static char **array = NULL; +static int alloc = 0; +int count = 0; + +for (;;) + { + char *e; + if (s == NULL || s[0] == 0) + break; + e = strchr(s, ','); + if (e != NULL) + *e++ = 0; + if (count >= alloc) + { + if (alloc == 0) + alloc = 64; + else + alloc <<= 1; + ExpandArray(array, count, alloc); + } + array[count++] = s; + s = e; + } +*retSize = count; +*retArray = array; +} + +void sqlStringDynamicArray(char *s, char ***retArray, int *retSize) +/* Convert comma separated list of strings to an dynamically allocated + * array, which should be freeMem()'d when done. As a speed option all + * of the elements in the array are needMem()'d at the same time. This + * means that all the entries are free()'d by calling freeMem() on the + * first element. For example: + * sqlStringDynamicArray(s, &retArray, &retSize); + * DoSomeFunction(retArray, retSize); + * freeMem(retArray[0]); + * freeMem(retArray); + * Thread-safe. */ +{ +char **array = NULL; +int count = 0; +if (s) + { + count = countSeparatedItems(s, ','); + if (count > 0) + { + AllocArray(array, count); + count = 0; + s = cloneString(s); + for (;;) + { + char *e; + if (s == NULL || s[0] == 0) + break; + e = strchr(s, ','); + if (e != NULL) + *e++ = 0; + array[count++] = s; + s = e; + } + } + } +*retArray = array; +*retSize = count; +} + +char *sqlDoubleArrayToString( double *array, int arraySize) +{ +int i; +struct dyString *string = newDyString(256); +for( i = 0 ; i < arraySize; i++ ) + { + dyStringPrintf(string, "%f,", array[i]); + } +return dyStringCannibalize(&string); +} + +char *sqlFloatArrayToString( float *array, int arraySize) +{ +int i; +struct dyString *string = newDyString(256); +for( i = 0 ; i < arraySize; i++ ) + { + dyStringPrintf(string, "%f,", array[i]); + } +return dyStringCannibalize(&string); +} + +char *sqlUnsignedArrayToString( unsigned *array, int arraySize) +{ +int i; +struct dyString *string = newDyString(256); +for( i = 0 ; i < arraySize; i++ ) + { + dyStringPrintf(string, "%u,", array[i]); + } +return dyStringCannibalize(&string); +} + +char *sqlSignedArrayToString( int *array, int arraySize) +{ +int i; +struct dyString *string = newDyString(256); +for( i = 0 ; i < arraySize; i++ ) + { + dyStringPrintf(string, "%d,", array[i]); + } +return dyStringCannibalize(&string); +} + +char *sqlShortArrayToString( short *array, int arraySize) +{ +int i; +struct dyString *string = newDyString(256); +for( i = 0 ; i < arraySize; i++ ) + { + dyStringPrintf(string, "%d,", array[i]); + } +return dyStringCannibalize(&string); +} + +char *sqlUshortArrayToString( unsigned short *array, int arraySize) +{ +int i; +struct dyString *string = newDyString(256); +for( i = 0 ; i < arraySize; i++ ) + { + dyStringPrintf(string, "%u,", array[i]); + } +return dyStringCannibalize(&string); +} + +char *sqlByteArrayToString( signed char *array, int arraySize) +{ +int i; +struct dyString *string = newDyString(256); +for( i = 0 ; i < arraySize; i++ ) + { + dyStringPrintf(string, "%d,", array[i]); + } +return dyStringCannibalize(&string); +} + +char *sqlUbyteArrayToString( unsigned char *array, int arraySize) +{ +int i; +struct dyString *string = newDyString(256); +for( i = 0 ; i < arraySize; i++ ) + { + dyStringPrintf(string, "%u,", array[i]); + } +return dyStringCannibalize(&string); +} + +char *sqlCharArrayToString( char *array, int arraySize) +{ +int i; +struct dyString *string = newDyString(256); +for( i = 0 ; i < arraySize; i++ ) + { + dyStringPrintf(string, "%c,", array[i]); + } +return dyStringCannibalize(&string); +} + +char *sqlLongLongArrayToString( long long *array, int arraySize) +{ +int i; +struct dyString *string = newDyString(256); +for( i = 0 ; i < arraySize; i++ ) + { + dyStringPrintf(string, "%lld,", array[i]); + } +return dyStringCannibalize(&string); +} + +char *sqlStringArrayToString( char **array, int arraySize) +{ +int i; +struct dyString *string = newDyString(256); +for( i = 0 ; i < arraySize; i++ ) + { + dyStringPrintf(string, "%s,", array[i]); + } +return dyStringCannibalize(&string); +} + + +/* -------------- */ + + + +void sqlStringFreeDynamicArray(char ***pArray) +/* Free up a dynamic array (ends up freeing array and first string on it.) */ +{ +char **array; +if ((array = *pArray) != NULL) + { + freeMem(array[0]); + freez(pArray); + } +} + +int sqlUnsignedComma(char **pS) +/* Return signed number at *pS. Advance *pS past comma at end */ +{ +char *s = *pS; +char *e = strchr(s, ','); +unsigned ret; + +*e++ = 0; +*pS = e; +ret = sqlUnsigned(s); +return ret; +} + + +int sqlSignedComma(char **pS) +/* Return signed number at *pS. Advance *pS past comma at end */ +{ +char *s = *pS; +char *e = strchr(s, ','); +int ret; + +*e++ = 0; +*pS = e; +ret = sqlSigned(s); +return ret; +} + +char sqlCharComma(char **pS) +/* Return char at *pS. Advance *pS past comma after char */ +{ +char *s = *pS; +char *e = strchr(s, ','); +int ret; + +*e++ = 0; +*pS = e; +ret = s[0]; +return ret; +} + +long long sqlLongLongComma(char **pS) +/* Return offset (often 64 bits) at *pS. Advance *pS past comma at + * end */ +{ +char *s = *pS; +char *e = strchr(s, ','); +long long ret; + +*e++ = 0; +*pS = e; +ret = sqlLongLong(s); +return ret; +} + +float sqlFloatComma(char **pS) +/* Return signed number at *pS. Advance *pS past comma at end */ +{ +char *s = *pS; +char *e = strchr(s, ','); +float ret; + +*e++ = 0; +*pS = e; +ret = atof(s); +return ret; +} + +double sqlDoubleComma(char **pS) +/* Return signed number at *pS. Advance *pS past comma at end */ +{ +char *s = *pS; +char *e = strchr(s, ','); +double ret; + +*e++ = 0; +*pS = e; +ret = atof(s); +return ret; +} + + +static char *findStringEnd(char *start, char endC) +/* Return end of string. */ +{ +char c; +char *s = start; + +for (;;) + { + c = *s; + if (c == endC) + return s; + else if (c == 0) + errAbort("Unterminated string"); + ++s; + } +} + +static char *sqlGetOptQuoteString(char **pS) +/* Return string at *pS. (Either quoted or not.) Advance *pS. */ +{ +char *s = *pS; +char *e; +char c = *s; + +if (c == '"' || c == '\'') + { + s += 1; + e = findStringEnd(s, c); + *e++ = 0; + if (*e++ != ',') + errAbort("Expecting comma after string"); + } +else + { + e = strchr(s, ','); + *e++ = 0; + } +*pS = e; +return s; +} + +char *sqlStringComma(char **pS) +/* Return string at *pS. (Either quoted or not.) Advance *pS. */ +{ +return cloneString(sqlGetOptQuoteString(pS)); +} + +void sqlFixedStringComma(char **pS, char *buf, int bufSize) +/* Copy string at *pS to buf. Advance *pS. */ +{ +strncpy(buf, sqlGetOptQuoteString(pS), bufSize); +} + +char *sqlEatChar(char *s, char c) +/* Make sure next character is 'c'. Return past next char */ +{ +if (*s++ != c) + errAbort("Expecting %c got %c (%d) in database", c, s[-1], s[-1]); +return s; +} + +static struct hash *buildSymHash(char **values, boolean isEnum) +/* build a hash of values for either enum or set symbolic column */ +{ +struct hash *valHash = hashNew(0); +unsigned setVal = 1; /* not used for enum */ +int iVal; +for (iVal = 0; values[iVal] != NULL; iVal++) + { + if (isEnum) + hashAddInt(valHash, values[iVal], iVal); + else + { + hashAddInt(valHash, values[iVal], setVal); + setVal = setVal << 1; + } + } +return valHash; +} + +unsigned sqlEnumParse(char *valStr, char **values, struct hash **valHashPtr) +/* parse an enumerated column value */ +{ +if (*valHashPtr == NULL) + *valHashPtr = buildSymHash(values, TRUE); +return hashIntVal(*valHashPtr, valStr); +} + +unsigned sqlEnumComma(char **pS, char **values, struct hash **valHashPtr) +/* Return enum at *pS. (Either quoted or not.) Advance *pS. */ +{ +return sqlEnumParse(sqlGetOptQuoteString(pS), values, valHashPtr); +} + +void sqlEnumPrint(FILE *f, unsigned value, char **values) +/* print an enumerated column value */ +{ +fputs(values[value], f); +} + +unsigned sqlSetParse(char *valStr, char **values, struct hash **valHashPtr) +/* parse a set column value */ +{ +if (*valHashPtr == NULL) + *valHashPtr = buildSymHash(values, FALSE); +/* parse comma separated string */ +unsigned value = 0; +char *val = strtok(valStr, ","); +while (val != NULL) + { + value |= hashIntVal(*valHashPtr, val); + val = strtok(NULL, ","); + } + +return value; +} + +unsigned sqlSetComma(char **pS, char **values, struct hash **valHashPtr) +/* Return set at *pS. (Either quoted or not.) Advance *pS. */ +{ +return sqlSetParse(sqlGetOptQuoteString(pS), values, valHashPtr); +} + +void sqlSetPrint(FILE *f, unsigned value, char **values) +/* print a set column value */ +{ +int iVal; +unsigned curVal = 1; +int cnt = 0; +for (iVal = 0; values[iVal] != NULL; iVal++, curVal = curVal << 1) + { + if (curVal & value) + { + if (cnt > 0) + fputc(',', f); + fputs(values[iVal], f); + cnt++; + } + } +} diff -Nru r-bioc-rtracklayer-1.48.0/src/ucsc/sqlList.h r-bioc-rtracklayer-1.50.0/src/ucsc/sqlList.h --- r-bioc-rtracklayer-1.48.0/src/ucsc/sqlList.h 1970-01-01 00:00:00.000000000 +0000 +++ r-bioc-rtracklayer-1.50.0/src/ucsc/sqlList.h 2020-10-27 17:24:51.000000000 +0000 @@ -0,0 +1,145 @@ +/* Stuff for processing comma separated lists . + * + * This file is copyright 2002 Jim Kent, but license is hereby + * granted for all use - public, private or commercial. */ + +#ifndef SQLLIST_H +#define SQLLIST_H +struct hash; + +int sqlDoubleArray(char *s, double *array, int maxArraySize); +double sqlSumDoublesCommaSep(char *s); +/* Return sum of double values in a comma-separated list */ + +int sqlFloatArray(char *s, float *array, int maxArraySize); +int sqlUnsignedArray(char *s, unsigned *array, int maxArraySize); +int sqlSignedArray(char *s, int *array, int maxArraySize); +int sqlShortArray(char *s, short *array, int arraySize); +int sqlUshortArray(char *s, unsigned short *array, int arraySize); +int sqlByteArray(char *s, signed char *array, int arraySize); +int sqlUbyteArray(char *s, unsigned char *array, int arraySize); +int sqlCharArray(char *s, char *array, int arraySize); +int sqlLongLongArray(char *s, long long *array, int arraySize); +/* Convert comma separated list of numbers to an array. Pass in + * array and max size of array. Returns actual array size.*/ + +void sqlDoubleStaticArray(char *s, double **retArray, int *retSize); +void sqlFloatStaticArray(char *s, float **retArray, int *retSize); +void sqlUnsignedStaticArray(char *s, unsigned **retArray, int *retSize); +void sqlSignedStaticArray(char *s, int **retArray, int *retSize); +void sqlShortStaticArray(char *s, short **retArray, int *retSize); +void sqlUshortStaticArray(char *s, unsigned short **retArray, int *retSize); +void sqlByteStaticArray(char *s, signed char **retArray, int *retSize); +void sqlUbyteStaticArray(char *s, unsigned char **retArray, int *retSize); +void sqlCharStaticArray(char *s, char **retArray, int *retSize); +void sqlLongLongStaticArray(char *s, long long **array, int *retSize); +/* Convert comma separated list of numbers to an array which will be + * overwritten next call to this function or to sqlXxxxxxDynamicArray, + * but need not be freed. */ + +void sqlDoubleDynamicArray(char *s, double **retArray, int *retSize); +void sqlFloatDynamicArray(char *s, float **retArray, int *retSize); +void sqlUnsignedDynamicArray(char *s, unsigned **retArray, int *retSize); +void sqlSignedDynamicArray(char *s, int **retArray, int *retSize); +void sqlShortDynamicArray(char *s, short **retArray, int *retSize); +void sqlUshortDynamicArray(char *s, unsigned short **retArray, int *retSize); +void sqlByteDynamicArray(char *s, signed char **retArray, int *retSize); +void sqlUbyteDynamicArray(char *s, unsigned char **retArray, int *retSize); +void sqlCharDynamicArray(char *s, char **retArray, int *retSize); +void sqlLongLongDynamicArray(char *s, long long **retArray, int *retSize); +/* Convert comma separated list of numbers to an dynamically allocated + * array, which should be freeMem()'d when done. */ + + +int sqlStringArray(char *s, char **array, int maxArraySize); +/* Convert comma separated list of strings to an array. Pass in + * array and max size of array. Returns actual size. This will + * only persist as long as s persists.... Use sqlStringDynamicArray + * if calling repeatedly. */ + +void sqlStringStaticArray(char *s, char ***retArray, int *retSize); +/* Convert comma separated list of strings to an array which will be + * overwritten next call to this function or to sqlUnsignedDynamicArray, + * but need not be freed. */ + +void sqlStringDynamicArray(char *s, char ***retArray, int *retSize); +/* Convert comma separated list of strings to an dynamically allocated + * array, which should be freeMem()'d when done. */ + +void sqlStringFreeDynamicArray(char ***pArray); +/* Free up a dynamic array (ends up freeing array and first string on it.) */ + +char *sqlDoubleArrayToString( double *array, int arraySize); +char *sqlFloatArrayToString( float *array, int arraySize); +char *sqlUnsignedArrayToString( unsigned *array, int arraySize); +char *sqlSignedArrayToString( int *array, int arraySize); +char *sqlShortArrayToString( short *array, int arraySize); +char *sqlUshortArrayToString( unsigned short *array, int arraySize); +char *sqlByteArrayToString( signed char *array, int arraySize); +char *sqlUbyteArrayToString( unsigned char *array, int arraySize); +char *sqlCharArrayToString( char *array, int arraySize); +char *sqlLongLongArrayToString( long long *array, int arraySize); +char *sqlStringArrayToString( char **array, int arraySize); +/* Convert arrays into comma separated strings. The char *'s returned + * should be freeMem()'d when done */ + +char *sqlEscapeString(const char *orig); +/* Prepares string for inclusion in a SQL statement . Remember to free + * returned string. returned string contains strlen(length)*2+1 as many bytes + * as orig because in worst case every character has to be escaped. + * Example 1: The Gene's Name -> The Gene''s Name + * Example 2: he said "order and orient" -> he said ""order and orient"" */ + +char *sqlEscapeString2(char *to, const char* from); +/* Prepares a string for inclusion in a sql statement. Output string + * must be 2*strlen(from)+1 */ + +int sqlUnsignedComma(char **pS); +/* Return signed number at *pS. Advance *pS past comma at end. + * This function is used by the system that automatically loads + * structured object from longblobs. */ + +int sqlSignedComma(char **pS); +/* Return signed number at *pS. Advance *pS past comma at end */ + +char sqlCharComma(char **pS); +/* Return char at *pS. Advance *pS past comma after char */ + +long long sqlLongLongComma(char **pS); +/* Return long long number at *pS. Advance *pS past comma at end */ + +double sqlDoubleComma(char **pS); +/* Return double floating number at *pS. Advance *pS past comma at end */ + +float sqlFloatComma(char **pS); +/* Return floating point number at *pS. Advance *pS past comma at end */ + +char *sqlStringComma(char **pS); +/* Return string at *pS. (Either quoted or not.) Advance *pS. */ + +void sqlFixedStringComma(char **pS, char *buf, int bufSize); +/* Copy string at *pS to buf. Advance *pS. */ + +char *sqlEatChar(char *s, char c); +/* Make sure next character is 'c'. Return past next char */ + +unsigned sqlEnumParse(char *valStr, char **values, struct hash **valHashPtr); +/* parse an enumerated column value */ + +unsigned sqlEnumComma(char **pS, char **values, struct hash **valHashPtr); +/* Return enum at *pS. (Either quoted or not.) Advance *pS. */ + +void sqlEnumPrint(FILE *f, unsigned value, char **values); +/* print an enumerated column value */ + +unsigned sqlSetParse(char *valStr, char **values, struct hash **valHashPtr); +/* parse a set column value */ + +unsigned sqlSetComma(char **pS, char **values, struct hash **valHashPtr); +/* Return set at *pS. (Either quoted or not.) Advance *pS. */ + +void sqlSetPrint(FILE *f, unsigned value, char **values); +/* print a set column value */ + +#endif /* SQLLIST_H */ + diff -Nru r-bioc-rtracklayer-1.48.0/src/ucsc/tokenizer.c r-bioc-rtracklayer-1.50.0/src/ucsc/tokenizer.c --- r-bioc-rtracklayer-1.48.0/src/ucsc/tokenizer.c 1970-01-01 00:00:00.000000000 +0000 +++ r-bioc-rtracklayer-1.50.0/src/ucsc/tokenizer.c 2020-10-27 17:24:51.000000000 +0000 @@ -0,0 +1,216 @@ +/* tokenizer - A tokenizer structure that will chop up file into + * tokens. It is aware of quoted strings and otherwise tends to return + * white-space or punctuated-separated words, with punctuation in + * a separate token. This is used by autoSql. */ + +/* Copyright (C) 2011 The Regents of the University of California + * See README in this or parent directory for licensing information. */ + +#include "common.h" +#include "errAbort.h" +#include "linefile.h" +#include "tokenizer.h" + + +struct tokenizer *tokenizerOnLineFile(struct lineFile *lf) +/* Create a new tokenizer on open lineFile. */ +{ +struct tokenizer *tkz; +AllocVar(tkz); +tkz->sAlloc = 128; +tkz->string = needMem(tkz->sAlloc); +tkz->lf = lf; +tkz->curLine = tkz->linePt = ""; +return tkz; +} + +struct tokenizer *tokenizerNew(char *fileName) +/* Return a new tokenizer. */ +{ +return tokenizerOnLineFile(lineFileOpen(fileName, TRUE)); +} + +void tokenizerFree(struct tokenizer **pTkz) +/* Tear down a tokenizer. */ +{ +struct tokenizer *tkz; +if ((tkz = *pTkz) != NULL) + { + freeMem(tkz->string); + lineFileClose(&tkz->lf); + freez(pTkz); + } +} + +void tokenizerReuse(struct tokenizer *tkz) +/* Reuse token. */ +{ +if (!tkz->eof) + tkz->reuse = TRUE; +} + +int tokenizerLineCount(struct tokenizer *tkz) +/* Return line of current token. */ +{ +return tkz->lf->lineIx; +} + +char *tokenizerFileName(struct tokenizer *tkz) +/* Return name of file. */ +{ +return tkz->lf->fileName; +} + +char *tokenizerNext(struct tokenizer *tkz) +/* Return token's next string (also available as tkz->string) or + * NULL at EOF. */ +{ +char *start, *end; +char c, *s; +int size; +if (tkz->reuse) + { + tkz->reuse = FALSE; + return tkz->string; + } +tkz->leadingSpaces = 0; +for (;;) /* Skip over white space and comments. */ + { + int lineSize; + s = start = skipLeadingSpaces(tkz->linePt); + tkz->leadingSpaces += s - tkz->linePt; + if ((c = start[0]) != 0) + { + if (tkz->uncommentC && c == '/') + { + if (start[1] == '/') + ; /* Keep going in loop effectively ignoring rest of line. */ + else if (start[1] == '*') + { + start += 2; + for (;;) + { + char *end = stringIn("*/", start); + if (end != NULL) + { + tkz->linePt = end+2; + break; + } + if (!lineFileNext(tkz->lf, &tkz->curLine, &lineSize)) + errAbort("End of file (%s) in comment", tokenizerFileName(tkz)); + start = tkz->curLine; + } + continue; + } + else + break; + } + else if (tkz->uncommentShell && c == '#') + ; /* Keep going in loop effectively ignoring rest of line. */ + else + break; /* Got something real. */ + } + if (!lineFileNext(tkz->lf, &tkz->curLine, &lineSize)) + { + tkz->eof = TRUE; + return NULL; + } + tkz->leadingSpaces += 1; + tkz->linePt = tkz->curLine; + } +if (isalnum(c) || (c == '_')) + { + for (;;) + { + s++; + if (!(isalnum(*s) || (*s == '_'))) + break; + } + end = s; + } +else if (c == '"' || c == '\'') + { + char quot = c; + if (tkz->leaveQuotes) + start = s++; + else + start = ++s; + for (;;) + { + c = *s; + if (c == quot) + { + if (s[-1] == '\\') + { + if (s >= start+2 && s[-2] == '\\') + break; + } + else + break; + } + else if (c == 0) + { + break; + } + ++s; + } + end = s; + if (c != 0) + ++s; + if (tkz->leaveQuotes) + end += 1; + } +else + { + end = ++s; + } +tkz->linePt = s; +size = end - start; +if (size >= tkz->sAlloc) + { + tkz->sAlloc = size+128; + tkz->string = needMoreMem(tkz->string, 0, tkz->sAlloc); + } +memcpy(tkz->string, start, size); +tkz->string[size] = 0; +return tkz->string; +} + + +void tokenizerErrAbort(struct tokenizer *tkz, char *format, ...) +/* Print error message followed by file and line number and + * abort. */ +{ +va_list args; +va_start(args, format); +vaWarn(format, args); +errAbort("line %d of %s:\n%s", + tokenizerLineCount(tkz), tokenizerFileName(tkz), tkz->curLine); +} + +void tokenizerNotEnd(struct tokenizer *tkz) +/* Squawk if at end. */ +{ +if (tkz->eof) + errAbort("Unexpected end of input line %d of %s", tkz->lf->lineIx, tkz->lf->fileName); +} + +char *tokenizerMustHaveNext(struct tokenizer *tkz) +/* Get next token, which must be there. */ +{ +char *s = tokenizerNext(tkz); +if (s == NULL) + errAbort("Unexpected end of input line %d of %s", tkz->lf->lineIx, tkz->lf->fileName); +return s; +} + +void tokenizerMustMatch(struct tokenizer *tkz, char *string) +/* Require next token to match string. Return next token + * if it does, otherwise abort. */ +{ +if (sameWord(tkz->string, string)) + tokenizerMustHaveNext(tkz); +else + tokenizerErrAbort(tkz, "Expecting %s got %s", string, tkz->string); +} + diff -Nru r-bioc-rtracklayer-1.48.0/src/ucsc/tokenizer.h r-bioc-rtracklayer-1.50.0/src/ucsc/tokenizer.h --- r-bioc-rtracklayer-1.48.0/src/ucsc/tokenizer.h 1970-01-01 00:00:00.000000000 +0000 +++ r-bioc-rtracklayer-1.50.0/src/ucsc/tokenizer.h 2020-10-27 17:24:51.000000000 +0000 @@ -0,0 +1,71 @@ +/* tokenizer - A tokenizer structure that will chop up file into + * tokens. It is aware of quoted strings and otherwise tends to return + * white-space or punctuated-separated words, with punctuation in + * a separate token. This is used by autoSql. */ + +#ifndef TOKENIZER_H +#define TOKENIZER_H + +struct tokenizer +/* This handles reading in tokens. */ + { + bool reuse; /* True if want to reuse this token. */ + bool eof; /* True at end of file. */ + int leadingSpaces; /* Number of leading spaces before token. */ + struct lineFile *lf; /* Underlying file. */ + char *curLine; /* Current line of text. */ + char *linePt; /* Start position within current line. */ + char *string; /* String value of token */ + int sSize; /* Size of string. */ + int sAlloc; /* Allocated string size. */ + /* Some variables set after tokenizerNew to control details of + * parsing. */ + bool leaveQuotes; /* Leave quotes in string. */ + bool uncommentC; /* Take out C (and C++) style comments. */ + bool uncommentShell; /* Take out # style comments. */ + }; + +struct tokenizer *tokenizerNew(char *fileName); +/* Return a new tokenizer. */ + +struct tokenizer *tokenizerOnLineFile(struct lineFile *lf); +/* Create a new tokenizer on open lineFile. */ + +void tokenizerFree(struct tokenizer **pTkz); +/* Tear down a tokenizer. */ + +void tokenizerReuse(struct tokenizer *tkz); +/* Reuse token. */ + +int tokenizerLineCount(struct tokenizer *tkz); +/* Return line of current token. */ + +char *tokenizerFileName(struct tokenizer *tkz); +/* Return name of file. */ + +char *tokenizerNext(struct tokenizer *tkz); +/* Return token's next string (also available as tkz->string) or + * NULL at EOF. This string will be overwritten with the next call + * to tokenizerNext, so cloneString if you need to save it. */ + +void tokenizerErrAbort(struct tokenizer *tkz, char *format, ...) +/* Print error message followed by file and line number and + * abort. */ +#if defined(__GNUC__) +__attribute__((format(printf, 2, 3))) +#endif +; + + +void tokenizerNotEnd(struct tokenizer *tkz); +/* Squawk if at end. */ + +char *tokenizerMustHaveNext(struct tokenizer *tkz); +/* Get next token, which must be there. */ + +void tokenizerMustMatch(struct tokenizer *tkz, char *string); +/* Require next token to match string. Return next token + * if it does, otherwise abort. */ + +#endif /* TOKENIZER_H */ +