diff -Nru libvcflib-1.0.1+dfsg/CMakeLists.txt libvcflib-1.0.2+dfsg/CMakeLists.txt --- libvcflib-1.0.1+dfsg/CMakeLists.txt 1970-01-01 00:00:00.000000000 +0000 +++ libvcflib-1.0.2+dfsg/CMakeLists.txt 2021-01-28 07:04:12.000000000 +0000 @@ -0,0 +1,318 @@ +cmake_minimum_required(VERSION 3.1) +project(vcflib) + +include(ExternalProject) +include(FeatureSummary) + +find_package(PkgConfig REQUIRED) + +include(FindBZip2) +include(FindLibLZMA) +include(FindZLIB) + +feature_summary( + FATAL_ON_MISSING_REQUIRED_PACKAGES + WHAT REQUIRED_PACKAGES_NOT_FOUND) + +# ---- Options + +option(BUILD_DOC "Build documentation" ON) +option(OPENMP "Enable OpenMP" OFF) +option(PROFILING "Enable profiling" OFF) +option(GPROF "Enable gprof profiling" OFF) + +# ---- Dependencies + +find_package(ZLIB) +set_package_properties(ZLIB PROPERTIES TYPE REQUIRED) +#find_package(LIBLZMA) +#set_package_properties(LIBLZMA PROPERTIES TYPE REQUIRED) +find_package(Threads) +set_package_properties(Threads PROPERTIES TYPE REQUIRED) + +pkg_check_modules(HTSLIB htslib) +pkg_check_modules(TABIXPP tabixpp) # Optional + +# ---- Build switches + +set(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} -fPIC -std=c++0x -D_FILE_OFFSET_BITS=64 -Isrc -O3") + +if(OPENMP) + find_package(OpenMP REQUIRED) + set(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} ${OpenMP_CXX_FLAGS}") +endif(OPENMP) + +if(PROFILING) + set(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} -g") +endif(PROFILING) + +if(GPROF) + set(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} -pg") +endif(GPROF) + +# ---- Include files + +include_directories(include) +include_directories(fastahack) +include_directories(intervaltree) +include_directories(smithwaterman) +include_directories(multichoose) +include_directories(filevercmp) + +if(HTSLIB_LOCAL) + include_directories( + ${HTSLIB_LOCAL} + tabixpp/htslib + contrib/htslib) +endif(HTSLIB_LOCAL) + +file(GLOB INCLUDES + src/*.h* + multichoose/*.h* + intervaltree/*.h* + smithwaterman/*.h* + fastahack/*.h* + filevercmp/*.h*) + +add_library(vcflib STATIC + src/Variant.h + src/split.h + src/pdflib.hpp + src/var.hpp + src/cdflib.hpp + src/rnglib.hpp + src/join.h + src/Variant.cpp + src/rnglib.cpp + src/var.cpp + src/pdflib.cpp + src/cdflib.cpp + src/split.cpp + src/ssw.hpp + src/ssw_cpp.hpp + fastahack/Fasta.cpp + smithwaterman/SmithWatermanGotoh.cpp + smithwaterman/Repeats.cpp + smithwaterman/IndelAllele.cpp + smithwaterman/disorder.cpp + smithwaterman/LeftAlign.cpp + fsom/fsom.c + filevercmp/filevercmp.c + ) + +set(BINS + vcfecho + dumpContigsFromHeader + bFst + pVst + hapLrt + popStats + wcFst + iHS + segmentFst + segmentIhs + genotypeSummary + sequenceDiversity + pFst + smoother + vcfld + plotHaps + abba-baba + permuteGPAT++ + permuteSmooth + normalize-iHS + meltEHH + vcfaltcount + vcfhetcount + vcfhethomratio + vcffilter + vcf2tsv + vcfgenotypes + vcfannotategenotypes + vcfcommonsamples + vcfremovesamples + vcfkeepsamples + vcfsamplenames + vcfgenotypecompare + vcffixup + vcfclassify + vcfsamplediff + vcfremoveaberrantgenotypes + vcfrandom + vcfparsealts + vcfstats + vcfflatten + vcfprimers + vcfnumalt + vcfcleancomplex + vcfintersect + vcfannotate + vcfallelicprimitives + vcfoverlay + vcfaddinfo + vcfkeepinfo + vcfkeepgeno + vcfafpath + vcfcountalleles + vcflength + vcfdistance + vcfrandomsample + vcfentropy + vcfglxgt + vcfroc + vcfcheck + vcfstreamsort + vcfuniq + vcfuniqalleles + vcfremap + vcf2fasta + vcfsitesummarize + vcfbreakmulti + vcfcreatemulti + vcfevenregions + vcfcat + vcfgenosummarize + vcfgenosamplenames + vcfgeno2haplo + vcfleftalign + vcfcombine + vcfgeno2alleles + vcfindex + vcf2dag + vcfsample2info + vcfqual2info + vcfinfo2qual + vcfglbound + vcfinfosummarize + ) + +# ---- Get version + +file (STRINGS "VERSION" BUILD_NUMBER) +add_definitions(-DVCFLIB_VERSION="${BUILD_NUMBER}") +add_definitions(-DVERSION="${BUILD_NUMBER}") + +# ---- Build htslib +# +# Note by default we use the distributed htslib! These are +# the old instructions: + +if (HTSLIB_LOCAL) + +ExternalProject_Add(htslib-build + SOURCE_DIR ${HTSLIB_LOCAL} + BINARY_DIR ${HTSLIB_LOCAL} + PREFIX ${CMAKE_CURRENT_BINARY_DIR}/htslib + CONFIGURE_COMMAND "" + BUILD_COMMAND $(MAKE) lib-static + INSTALL_COMMAND "" + ) + +add_library(htslib STATIC IMPORTED) + +set_property(TARGET htslib PROPERTY + IMPORTED_LOCATION ${HTSLIB_LOCAL}/libhts.a) +add_dependencies(htslib htslib-build) + +# If the user wants to configure our HTSlib to build with libddeflate, we need +# to make sure to link against libdeflate as a transitive dependency. To do +# that, pass -DHTSLIB_EXTRA_LIBS="-ldeflate" when configuring the project with +# cmake. +# TODO: Stop vendoring in htslib and just use find_package +set(HTSLIB_EXTRA_LIBS "-lcurl" CACHE STRING "Library flags needed to link with htslib's dependencies, for chosen configuration") +set_property(TARGET htslib PROPERTY INTERFACE_LINK_LIBRARIES ${HTSLIB_EXTRA_LIBS}) + +endif(HTSLIB_LOCAL) + +# ---- Build all + +if (NOT BUILD_ONLY_LIB) + foreach(BIN ${BINS}) + add_executable(${BIN} src/${BIN}.cpp) + target_include_directories(${BIN} PUBLIC ${TABIXPP_LIBRARIES} ${HTSLIB_INCLUDE_DIRS}) + target_link_libraries(${BIN} vcflib tabixpp -DGIT_VERSION=0.9.0 ${TABIXPP_LIBRARIES} ${HTSLIB_LIBRARIES} ${CMAKE_THREAD_LIBS_INIT} ${ZLIB_LIBRARIES} ${LIBLZMA_LIBRARIES} ${BZIP2_LIBRARIES}) + endforeach(BIN ${BINS}) + install(TARGETS ${BINS} RUNTIME DESTINATION bin) +endif() + +enable_testing() + +# ---- Test + +add_test( + NAME RunTests + COMMAND "make" + WORKING_DIRECTORY ${CMAKE_CURRENT_SOURCE_DIR}/test +) + +function(add_pytest TEST_FILE) + add_test( + NAME ${TEST_FILE} + COMMAND python3 pytest/${TEST_FILE}.py + WORKING_DIRECTORY ${CMAKE_CURRENT_SOURCE_DIR}/test + ) +endfunction() + +function(add_pydoctest TEST_FILE) + add_test( + NAME ${TEST_FILE} + COMMAND python3 -m doctest -o NORMALIZE_WHITESPACE -o REPORT_UDIFF pytest/${TEST_FILE}.md + WORKING_DIRECTORY ${CMAKE_CURRENT_SOURCE_DIR}/test + ) +endfunction() + +function(add_doctest TEST_FILE) + add_test( + NAME ${TEST_FILE} + COMMAND python3 -m doctest -o NORMALIZE_WHITESPACE -o REPORT_UDIFF ../${TEST_FILE}.md + WORKING_DIRECTORY ${CMAKE_CURRENT_SOURCE_DIR}/test + ) +endfunction() + +function(add_pydoctest_fullname TEST_FILE) + add_test( + NAME ${TEST_FILE} + COMMAND python3 -m doctest -o NORMALIZE_WHITESPACE -o REPORT_UDIFF ${TEST_FILE} + WORKING_DIRECTORY ${CMAKE_CURRENT_SOURCE_DIR}/test + ) +endfunction() + +# add_pytest(vcf2tsv-test) +add_pydoctest(vcf2tsv) +add_doctest(doc/vcfintersect) +add_doctest(doc/vcffilter) +# add_pydoctest_fullname(../README.md) + +# ---- Build docs +# +# Generates man pages for the python doctests. Don't need +# to run every time so it is a separate command. For pandoc logic see +# https://www.howtogeek.com/682871/how-to-create-a-man-page-on-linux/ +# +# cmake --build . --target man ; cmake --install . + +find_program(PANDOC pandoc) + +if (PANDOC) + # note the option ALL which allows to build the docs together with the application + add_custom_target( man + WORKING_DIRECTORY ${CMAKE_CURRENT_SOURCE_DIR} + # COMMAND ${PANDOC} ./test/pytest/vcf2tsv.md -s -t man -o ./man/vcf2tsv.1 + COMMAND ruby ./test/scripts/bin2md.rb --man test/scripts/bin2md-template.erb + COMMAND ruby ./test/scripts/bin2md.rb --man --index + COMMAND ruby ./test/scripts/md2man + # regenerate to allow for URLs in markdown docs + COMMAND ruby ./test/scripts/bin2md.rb test/scripts/bin2md-template.erb + COMMAND ruby ./test/scripts/bin2md.rb --index + ) +else (PANDOC) + message("Pandoc needs to be installed to generate the man pages") +endif (PANDOC) + +# ---- Install + +install(TARGETS vcflib ARCHIVE DESTINATION lib) + +install(FILES ${INCLUDES} DESTINATION include) + +install(DIRECTORY ${CMAKE_SOURCE_DIR}/man DESTINATION ${CMAKE_INSTALL_PREFIX}/man/man1) diff -Nru libvcflib-1.0.1+dfsg/debian/changelog libvcflib-1.0.2+dfsg/debian/changelog --- libvcflib-1.0.1+dfsg/debian/changelog 2020-10-31 20:57:30.000000000 +0000 +++ libvcflib-1.0.2+dfsg/debian/changelog 2021-01-28 12:03:40.000000000 +0000 @@ -1,3 +1,26 @@ +libvcflib (1.0.2+dfsg-2) unstable; urgency=medium + + * Team upload. + * debian/rules: unit tests: Use DEB_HOST_GNU_TYPE instead of + DEB_HOST_MULTIARCH for finding the build directory + + -- Michael R. Crusoe Thu, 28 Jan 2021 13:03:40 +0100 + +libvcflib (1.0.2+dfsg-1) unstable; urgency=medium + + [ Steffen Moeller ] + * Team upload. + * Added binaries expected by bcbio to /usr/bin + * d/rules - clean: Removing generated README.html + + [ Michael R. Crusoe ] + * debian/copyright: Update Files-Excluded + * Standards-Version: 4.5.1 (routine-update) + * New upstream release + * Switch to cmake, update patches + + -- Michael R. Crusoe Thu, 28 Jan 2021 11:16:19 +0100 + libvcflib (1.0.1+dfsg-4) unstable; urgency=medium * Team upload. diff -Nru libvcflib-1.0.1+dfsg/debian/control libvcflib-1.0.2+dfsg/debian/control --- libvcflib-1.0.1+dfsg/debian/control 2020-10-31 20:57:29.000000000 +0000 +++ libvcflib-1.0.2+dfsg/debian/control 2021-01-28 12:03:38.000000000 +0000 @@ -4,7 +4,7 @@ Section: science Priority: optional Build-Depends: debhelper-compat (= 13), - dh-exec, + cmake, markdown , libtabixpp-dev, libbz2-dev, @@ -13,8 +13,8 @@ libssw-dev, libfastahack-dev (>= 0.0+git20160702.bbc645f+dfsg-5~), pkg-config, - libipc-run3-perl -Standards-Version: 4.5.0 + libgtest-dev +Standards-Version: 4.5.1 Vcs-Browser: https://salsa.debian.org/med-team/libvcflib Vcs-Git: https://salsa.debian.org/med-team/libvcflib.git Homepage: https://github.com/vcflib/vcflib @@ -73,7 +73,6 @@ Architecture: any Depends: ${shlibs:Depends}, ${misc:Depends}, - ${perl:Depends}, python3:any, r-base-core, r-cran-plyr, diff -Nru libvcflib-1.0.1+dfsg/debian/copyright libvcflib-1.0.2+dfsg/debian/copyright --- libvcflib-1.0.1+dfsg/debian/copyright 2020-10-31 20:57:29.000000000 +0000 +++ libvcflib-1.0.2+dfsg/debian/copyright 2021-01-28 12:03:38.000000000 +0000 @@ -10,7 +10,7 @@ googletest libVCFH smithwaterman - tabixpp + src/simde Files: * Copyright: © 2009-2012 Erik Garrison diff -Nru libvcflib-1.0.1+dfsg/debian/libvcflib1.install libvcflib-1.0.2+dfsg/debian/libvcflib1.install --- libvcflib-1.0.1+dfsg/debian/libvcflib1.install 2019-12-10 11:34:18.000000000 +0000 +++ libvcflib-1.0.2+dfsg/debian/libvcflib1.install 2021-01-28 10:09:38.000000000 +0000 @@ -1,2 +1 @@ -#! /usr/bin/dh-exec -lib/lib*.so.1 usr/lib/${DEB_HOST_MULTIARCH}/ +usr/lib/*/libvcflib.so.* diff -Nru libvcflib-1.0.1+dfsg/debian/libvcflib-dev.install libvcflib-1.0.2+dfsg/debian/libvcflib-dev.install --- libvcflib-1.0.1+dfsg/debian/libvcflib-dev.install 2019-12-10 11:34:18.000000000 +0000 +++ libvcflib-1.0.2+dfsg/debian/libvcflib-dev.install 2021-01-28 09:42:40.000000000 +0000 @@ -1,3 +1,3 @@ -#! /usr/bin/dh-exec -lib/lib*.so usr/lib/${DEB_HOST_MULTIARCH}/ -include/*.h usr/include/vcflib +usr/lib/*/lib*.so +usr/include/*.h usr/include/vcflib +usr/include/*.hpp usr/include/vcflib diff -Nru libvcflib-1.0.1+dfsg/debian/libvcflib-tools.install libvcflib-1.0.2+dfsg/debian/libvcflib-tools.install --- libvcflib-1.0.1+dfsg/debian/libvcflib-tools.install 2019-12-10 15:07:52.000000000 +0000 +++ libvcflib-1.0.2+dfsg/debian/libvcflib-tools.install 2021-01-28 09:15:16.000000000 +0000 @@ -1,6 +1,9 @@ -bin/vcfkeepinfo usr/bin -bin/vcf2tsv usr/bin -bin/vcffilter usr/bin -bin/*.R usr/lib/R/site-library/vcflib -bin usr/lib/vcflib +usr/bin/vcfkeepinfo +usr/bin/vcf2tsv +usr/bin/vcffilter +usr/bin/vcffixup +usr/bin/vcfuniqalleles +usr/bin/vcfallelicprimitives +scripts/*.R usr/lib/R/site-library/vcflib +usr/bin usr/lib/vcflib debian/wrapper/* usr/bin diff -Nru libvcflib-1.0.1+dfsg/debian/libvcflib-tools.manpages libvcflib-1.0.2+dfsg/debian/libvcflib-tools.manpages --- libvcflib-1.0.1+dfsg/debian/libvcflib-tools.manpages 2019-12-10 11:34:18.000000000 +0000 +++ libvcflib-1.0.2+dfsg/debian/libvcflib-tools.manpages 2021-01-28 09:25:05.000000000 +0000 @@ -1 +1 @@ -debian/*.1 +usr/man/man1/man/*.1 diff -Nru libvcflib-1.0.1+dfsg/debian/patches/crossbuild libvcflib-1.0.2+dfsg/debian/patches/crossbuild --- libvcflib-1.0.1+dfsg/debian/patches/crossbuild 2020-10-31 17:41:05.000000000 +0000 +++ libvcflib-1.0.2+dfsg/debian/patches/crossbuild 1970-01-01 00:00:00.000000000 +0000 @@ -1,15 +0,0 @@ -Author: Michael R. Crusoe -Description: Enable cross-building -Forwarded: not-needed ---- libvcflib.orig/Makefile -+++ libvcflib/Makefile -@@ -1,4 +1,9 @@ - #OBJ_DIR = ./ -+PKG_CONFIG ?= pkg-config -+HTS_INCLUDES=$(shell $(PKG_CONFIG) --cflags libsmithwaterman) $(shell $(PKG_CONFIG) --cflags libfastahack) -+LDLIBS=-ltabixpp -lhts $(shell $(PKG_CONFIG) --libs libsmithwaterman) $(shell $(PKG_CONFIG) --libs libfastahack) -lstdc++ -lm -+ -+ - HEADERS = src/Variant.h \ - src/split.h \ - src/pdflib.hpp \ diff -Nru libvcflib-1.0.1+dfsg/debian/patches/disorder_include libvcflib-1.0.2+dfsg/debian/patches/disorder_include --- libvcflib-1.0.1+dfsg/debian/patches/disorder_include 2020-10-31 17:41:05.000000000 +0000 +++ libvcflib-1.0.2+dfsg/debian/patches/disorder_include 1970-01-01 00:00:00.000000000 +0000 @@ -1,23 +0,0 @@ -From: Michael R. Crusoe -Subject: Put the disorder.h include where it is used -Forwarded: https://github.com/vcflib/vcflib/pull/299 ---- libvcflib.orig/src/Variant.h -+++ libvcflib/src/Variant.h -@@ -19,7 +19,6 @@ - #include "join.h" - #include "tabix.hpp" - #include --#include "disorder.h" - #include - #include "convert.h" - #include "multichoose.h" ---- libvcflib.orig/src/vcfentropy.cpp -+++ libvcflib/src/vcfentropy.cpp -@@ -2,6 +2,7 @@ - #include "split.h" - #include "Fasta.h" - #include -+#include "disorder.h" - - using namespace std; - using namespace vcflib; diff -Nru libvcflib-1.0.1+dfsg/debian/patches/dont_link_pthread libvcflib-1.0.2+dfsg/debian/patches/dont_link_pthread --- libvcflib-1.0.1+dfsg/debian/patches/dont_link_pthread 2020-10-31 17:41:05.000000000 +0000 +++ libvcflib-1.0.2+dfsg/debian/patches/dont_link_pthread 1970-01-01 00:00:00.000000000 +0000 @@ -1,19 +0,0 @@ -Author: Michael R. Crusoe -Description: pthread is needed by the libraries we link to, but not us -Forwarded: not-needed ---- libvcflib.orig/Makefile -+++ libvcflib/Makefile -@@ -125,11 +125,11 @@ - # Use the one we ship in tabixpp unless told otherwise by the environment - HTS_LIB ?= $(VCF_LIB_LOCAL)/tabixpp/htslib/libhts.a - HTS_INCLUDES ?= -I$(VCF_LIB_LOCAL)/tabixpp/htslib --HTS_LDFLAGS ?= -L$(VCF_LIB_LOCAL)/tabixpp/htslib -lhts -lbz2 -lm -lz -llzma -pthread -+HTS_LDFLAGS ?= -L$(VCF_LIB_LOCAL)/tabixpp/htslib -lhts -lbz2 -lm -lz -llzma - - - INCLUDES = $(HTS_INCLUDES) -I$(INC_DIR) --LDFLAGS = -L$(LIB_DIR) -lvcflib $(HTS_LDFLAGS) -lpthread -lz -lm -llzma -lbz2 -lssw -+LDFLAGS = -L$(LIB_DIR) -lvcflib $(HTS_LDFLAGS) -lz -lm -llzma -lbz2 -lssw - - all: $(OBJECTS) $(BINS) scriptToBin - diff -Nru libvcflib-1.0.1+dfsg/debian/patches/fix_ssw_cpp_header_name.patch libvcflib-1.0.2+dfsg/debian/patches/fix_ssw_cpp_header_name.patch --- libvcflib-1.0.1+dfsg/debian/patches/fix_ssw_cpp_header_name.patch 2020-10-31 17:41:05.000000000 +0000 +++ libvcflib-1.0.2+dfsg/debian/patches/fix_ssw_cpp_header_name.patch 1970-01-01 00:00:00.000000000 +0000 @@ -1,15 +0,0 @@ -Author: Andreas Tille -Last-Update: Thu, 13 Sep 2018 19:35:33 +0200 -Description: The header file is named ssw_cpp.h -Forwarded: not-needed ---- libvcflib.orig/src/Variant.h -+++ libvcflib/src/Variant.h -@@ -20,7 +20,7 @@ - #include "tabix.hpp" - #include - #include "disorder.h" --#include "ssw_cpp.hpp" -+#include - #include "convert.h" - #include "multichoose.h" - #include "Fasta.h" diff -Nru libvcflib-1.0.1+dfsg/debian/patches/hardening-flags libvcflib-1.0.2+dfsg/debian/patches/hardening-flags --- libvcflib-1.0.1+dfsg/debian/patches/hardening-flags 2020-10-31 17:41:05.000000000 +0000 +++ libvcflib-1.0.2+dfsg/debian/patches/hardening-flags 2021-01-28 08:36:48.000000000 +0000 @@ -1,77 +1,6 @@ From: Michael R. Crusoe Subject: Pass CPPFLAGS Forwarded: not-needed ---- libvcflib.orig/Makefile -+++ libvcflib/Makefile -@@ -125,7 +125,7 @@ - # Use the one we ship in tabixpp unless told otherwise by the environment - HTS_LIB ?= $(VCF_LIB_LOCAL)/tabixpp/htslib/libhts.a - HTS_INCLUDES ?= -I$(VCF_LIB_LOCAL)/tabixpp/htslib --HTS_LDFLAGS ?= -L$(VCF_LIB_LOCAL)/tabixpp/htslib -lhts -lbz2 -lm -lz -llzma -+HTS_LDFLAGS ?= -L$(VCF_LIB_LOCAL)/tabixpp/htslib -lhts -lbz2 -lm -lz -llzma $(LDFLAGS) - - - INCLUDES = $(HTS_INCLUDES) -I$(INC_DIR) -@@ -138,7 +138,7 @@ - - GIT_VERSION ?= $(shell git describe --abbrev=4 --dirty --always) - --CXXFLAGS = -Ofast -D_FILE_OFFSET_BITS=64 -std=c++0x -+CXXFLAGS += -Ofast -D_FILE_OFFSET_BITS=64 -std=c++0x - #CXXFLAGS = -O2 - #CXXFLAGS = -pedantic -Wall -Wshadow -Wpointer-arith -Wcast-qual - -@@ -152,28 +152,28 @@ - $(MAKE) CXXFLAGS="$(CXXFLAGS) -pg" all - - $(OBJECTS): $(SOURCES) $(HEADERS) multichoose pre $(FILEVERCMP) -- $(CXX) -c $(CFLAGS) -fPIC -o $@ src/$(*F).cpp $(INCLUDES) $(LDFLAGS) $(CXXFLAGS) && $(CP) src/*.h* $(VCF_LIB_LOCAL)/$(INC_DIR)/ -+ $(CXX) $(CPPFLAGS) -c $(CFLAGS) -fPIC -o $@ src/$(*F).cpp $(INCLUDES) $(LDFLAGS) $(CXXFLAGS) $(LDLIBS) && $(CP) src/*.h* $(VCF_LIB_LOCAL)/$(INC_DIR)/ - - multichoose: pre -- cd multichoose && $(MAKE) && $(CP) *.h* $(VCF_LIB_LOCAL)/$(INC_DIR)/ -+ cd multichoose && LDFLAGS="$(HTS_LDFLAGS)" $(MAKE) && $(CP) *.h* $(VCF_LIB_LOCAL)/$(INC_DIR)/ - - intervaltree: pre -- cd intervaltree && $(MAKE) && $(CP) *.h* $(VCF_LIB_LOCAL)/$(INC_DIR)/ -+ cd intervaltree && LDFLAGS="$(HTS_LDFLAGS)" $(MAKE) && $(CP) *.h* $(VCF_LIB_LOCAL)/$(INC_DIR)/ - - $(TABIX): pre - echo "No need to create separately packaged tabixpp" - - $(FILEVERCMP): pre -- cd filevercmp && make && $(CP) *.h* $(VCF_LIB_LOCAL)/$(INC_DIR)/ && $(CP) *.o $(VCF_LIB_LOCAL)/$(INC_DIR)/ -+ cd filevercmp && LDFLAGS="$(HTS_LDFLAGS)" $(MAKE) && $(CP) *.h* $(VCF_LIB_LOCAL)/$(INC_DIR)/ && $(CP) *.o $(VCF_LIB_LOCAL)/$(INC_DIR)/ - - $(SHORTBINS): pre - $(MAKE) $(BIN_DIR)/$@ - - $(BINS): $(BIN_SOURCES) libvcflib.so $(OBJECTS) $(FILEVERCMP) pre intervaltree -- $(CXX) src/$(notdir $@).cpp -o $@ $(INCLUDES) $(LDFLAGS) $(CXXFLAGS) -DVERSION=\"$(GIT_VERSION)\" -+ $(CXX) $(CPPFLAGS) src/$(notdir $@).cpp -o $@ $(INCLUDES) $(LDFLAGS) $(CXXFLAGS) -DVERSION=\"$(GIT_VERSION)\" $(LDLIBS) - - libvcflib.so: $(OBJECTS) $(FILEVERCMP) pre -- $(CC) -shared -Wl,-soname -Wl,libvcflib.so.1 -o libvcflib.so.1 $(OBJECTS) $(FILEVERCMP) -+ $(CC) $(HTS_LDFLAGS) -shared -Wl,-soname -Wl,libvcflib.so.1 -o libvcflib.so.1 $(OBJECTS) $(FILEVERCMP) $(LDLIBS) - ln -s libvcflib.so.1 libvcflib.so - $(CP) -a libvcflib.so* $(LIB_DIR) - -@@ -199,10 +199,10 @@ - $(RM) -r $(LIB_DIR) - $(RM) -r $(INC_DIR) - $(RM) -r $(OBJ_DIR) -- $(MAKE) clean -C multichoose -- $(MAKE) clean -C libVCFH -- $(MAKE) clean -C test -- $(MAKE) clean -C filevercmp -- $(MAKE) clean -C intervaltree -+ +$(MAKE) clean -C multichoose -+ +$(MAKE) clean -C libVCFH -+ +$(MAKE) clean -C test -+ +$(MAKE) clean -C filevercmp -+ +$(MAKE) clean -C intervaltree - - .PHONY: clean all test pre --- libvcflib.orig/filevercmp/Makefile +++ libvcflib/filevercmp/Makefile @@ -7,7 +7,7 @@ diff -Nru libvcflib-1.0.1+dfsg/debian/patches/no_fsom libvcflib-1.0.2+dfsg/debian/patches/no_fsom --- libvcflib-1.0.1+dfsg/debian/patches/no_fsom 2020-10-31 17:41:05.000000000 +0000 +++ libvcflib-1.0.2+dfsg/debian/patches/no_fsom 2021-01-28 08:20:25.000000000 +0000 @@ -1,99 +1,13 @@ From: Michael R. Crusoe Subject: fsom is not used Forwarded: not-needed ---- libvcflib.orig/Makefile -+++ libvcflib/Makefile -@@ -114,9 +114,6 @@ - src/vcfnull2ref.cpp \ - src/vcfinfosummarize.cpp +--- libvcflib.orig/CMakeLists.txt ++++ libvcflib/CMakeLists.txt +@@ -96,7 +96,6 @@ + smithwaterman/IndelAllele.cpp + smithwaterman/disorder.cpp + smithwaterman/LeftAlign.cpp +- fsom/fsom.c + filevercmp/filevercmp.c + ) --# when we can figure out how to build on mac --# src/vcfsom.cpp -- - #BINS = $(BIN_SOURCES:.cpp=) - BINS = $(addprefix $(BIN_DIR)/,$(notdir $(BIN_SOURCES:.cpp=))) - SHORTBINS = $(notdir $(BIN_SOURCES:.cpp=)) -@@ -128,7 +125,6 @@ - INDELALLELE = smithwaterman/IndelAllele.o - DISORDER = smithwaterman/disorder.o - LEFTALIGN = smithwaterman/LeftAlign.o --FSOM = fsom/fsom.o - FILEVERCMP = filevercmp/filevercmp.o - - # Work out how to find htslib -@@ -194,9 +190,6 @@ - $(FASTAHACK): pre - cd fastahack && $(MAKE) && $(CP) *.h* $(VCF_LIB_LOCAL)/$(INC_DIR)/ && $(CP) Fasta.o $(VCF_LIB_LOCAL)/$(OBJ_DIR)/ - --#$(FSOM): --# cd fsom && $(CXX) $(CXXFLAGS) -c fsom.c -lm -- - $(FILEVERCMP): pre - cd filevercmp && make && $(CP) *.h* $(VCF_LIB_LOCAL)/$(INC_DIR)/ && $(CP) *.o $(VCF_LIB_LOCAL)/$(INC_DIR)/ - -@@ -238,7 +231,6 @@ - $(MAKE) clean -C smithwaterman - $(MAKE) clean -C fastahack - $(MAKE) clean -C multichoose -- $(MAKE) clean -C fsom - $(MAKE) clean -C libVCFH - $(MAKE) clean -C test - $(MAKE) clean -C filevercmp ---- libvcflib.orig/README.md -+++ libvcflib/README.md -@@ -116,7 +116,6 @@ - * Generate potential **primers** from VCF records ([vcfprimers](#vcfprimers)), to check for genome uniqueness. - * Convert the numerical represenation of genotypes provided by the GT field to a **human-readable genotype format** ([vcfgenotypes](#vcfgenotypes)). - * Observe how different alignment parameters, including context and entropy-dependent ones, influence **variant classification and interpretation** ([vcfremap](#vcfremap)). -- * **Classify variants** by annotations in the INFO field using a self-organizing map ([vcfsom](#vcfsom)); **re-estimate their quality** given known variants. - - - A number of "helper" perl and python3 scripts (e.g. vcf2bed.py, vcfbiallelic) further extend functionality. -@@ -571,47 +570,6 @@ - Prints the names of the samples in the VCF file. - - --### vcfsom -- -- usage: vcfsom [options] [vcf file] -- -- training: -- vcfsom -s output.som -f "AF DP ABP" training.vcf -- -- application: -- vcfsom -a output.som -f "AF DP ABP" test.vcf >results.vcf -- --vcfsom trains and/or applies a self-organizing map to the input VCF data on stdin, adding two columns for the x and y coordinates of the winning neuron in the network and an optional euclidean distance from a given node (--center). -- --If a map is provided via --apply, map will be applied to input without training. --Automated filtering to an estimated FP rate is -- -- options: -- -- -h, --help this dialog -- -- training: -- -- -f, --fields "FIELD ..." INFO fields to provide to the SOM -- -a, --apply FILE apply the saved map to input data to FILE -- -s, --save FILE train on input data and save the map to FILE -- -t, --print-training-results -- print results of SOM on training input -- (you can also just use --apply on the same input) -- -x, --width X width in columns of the output array -- -y, --height Y height in columns of the output array -- -i, --iterations N number of training iterations or epochs -- -d, --debug print timing information -- -- recalibration: -- -- -c, --center X,Y annotate with euclidean distance from center -- -p, --paint-true VCF use VCF file to annotate true variants (multiple) -- -f, --paint-false VCF use VCF file to annotate false variants (multiple) -- -R, --paint-tag TAG provide estimated FDR% in TAG in variant INFO -- -N, --false-negative replace FDR% (false detection) with FNR% (false negative) -- -- - ### vcfstats - - usage: vcfstats [options] diff -Nru libvcflib-1.0.1+dfsg/debian/patches/no_libVCFH libvcflib-1.0.2+dfsg/debian/patches/no_libVCFH --- libvcflib-1.0.1+dfsg/debian/patches/no_libVCFH 2020-10-31 17:41:05.000000000 +0000 +++ libvcflib-1.0.2+dfsg/debian/patches/no_libVCFH 1970-01-01 00:00:00.000000000 +0000 @@ -1,13 +0,0 @@ -From: Michael R. Crusoe -Subject: libVCFH is not used -Forwarded: not-needed ---- libvcflib.orig/Makefile -+++ libvcflib/Makefile -@@ -200,7 +200,6 @@ - $(RM) -r $(INC_DIR) - $(RM) -r $(OBJ_DIR) - +$(MAKE) clean -C multichoose -- +$(MAKE) clean -C libVCFH - +$(MAKE) clean -C test - +$(MAKE) clean -C filevercmp - +$(MAKE) clean -C intervaltree diff -Nru libvcflib-1.0.1+dfsg/debian/patches/override_version libvcflib-1.0.2+dfsg/debian/patches/override_version --- libvcflib-1.0.1+dfsg/debian/patches/override_version 2020-10-31 17:41:05.000000000 +0000 +++ libvcflib-1.0.2+dfsg/debian/patches/override_version 1970-01-01 00:00:00.000000000 +0000 @@ -1,14 +0,0 @@ -Author: Michael R. Crusoe -Description: allow us to specify the package version -Forwarded: not-needed ---- libvcflib.orig/Makefile -+++ libvcflib/Makefile -@@ -136,7 +136,7 @@ - scriptToBin: $(BINS) - $(CP) scripts/* $(BIN_DIR) - --GIT_VERSION += $(shell git describe --abbrev=4 --dirty --always) -+GIT_VERSION ?= $(shell git describe --abbrev=4 --dirty --always) - - CXXFLAGS = -Ofast -D_FILE_OFFSET_BITS=64 -std=c++0x - #CXXFLAGS = -O2 diff -Nru libvcflib-1.0.1+dfsg/debian/patches/series libvcflib-1.0.2+dfsg/debian/patches/series --- libvcflib-1.0.1+dfsg/debian/patches/series 2020-10-31 17:23:22.000000000 +0000 +++ libvcflib-1.0.2+dfsg/debian/patches/series 2021-01-28 10:49:43.000000000 +0000 @@ -1,14 +1,10 @@ +spelling no_fsom -use_debian_packaged_tabixpp.patch use_debian_packaged_smithwaterman.patch use_debian_packaged_fastahack.patch use_debian_packaged_libssw.patch shared_lib.patch pkg-config.patch -fix_ssw_cpp_header_name.patch -dont_link_pthread -override_version hardening-flags -no_libVCFH -disorder_include -crossbuild +tabix_linking +tests diff -Nru libvcflib-1.0.1+dfsg/debian/patches/shared_lib.patch libvcflib-1.0.2+dfsg/debian/patches/shared_lib.patch --- libvcflib-1.0.1+dfsg/debian/patches/shared_lib.patch 2020-10-31 17:41:05.000000000 +0000 +++ libvcflib-1.0.2+dfsg/debian/patches/shared_lib.patch 2021-01-28 10:09:38.000000000 +0000 @@ -2,42 +2,25 @@ Last-Update: Thu, 15 Sep 2016 22:26:26 +0200 Description: Create shared lib instead of static Forwarded: not-needed ---- libvcflib.orig/Makefile -+++ libvcflib/Makefile -@@ -152,7 +152,7 @@ - $(MAKE) CXXFLAGS="$(CXXFLAGS) -pg" all +--- libvcflib.orig/CMakeLists.txt ++++ libvcflib/CMakeLists.txt +@@ -72,7 +72,7 @@ + intervaltree/*.h* + filevercmp/*.h*) + +-add_library(vcflib STATIC ++add_library(vcflib SHARED + src/Variant.h + src/split.h + src/pdflib.hpp +@@ -186,6 +186,10 @@ + file (STRINGS "VERSION" BUILD_NUMBER) + add_definitions(-DVCFLIB_VERSION="${BUILD_NUMBER}") + add_definitions(-DVERSION="${BUILD_NUMBER}") ++string(REGEX MATCH "^[0-9]+" MAJOR_BUILD_NUMBER ${BUILD_NUMBER}) ++set_target_properties(vcflib PROPERTIES ++ SOVERSION ${MAJOR_BUILD_NUMBER} ++ ) - $(OBJECTS): $(SOURCES) $(HEADERS) multichoose pre $(FILEVERCMP) -- $(CXX) -c -o $@ src/$(*F).cpp $(INCLUDES) $(LDFLAGS) $(CXXFLAGS) && $(CP) src/*.h* $(VCF_LIB_LOCAL)/$(INC_DIR)/ -+ $(CXX) -c $(CFLAGS) -fPIC -o $@ src/$(*F).cpp $(INCLUDES) $(LDFLAGS) $(CXXFLAGS) && $(CP) src/*.h* $(VCF_LIB_LOCAL)/$(INC_DIR)/ - - multichoose: pre - cd multichoose && $(MAKE) && $(CP) *.h* $(VCF_LIB_LOCAL)/$(INC_DIR)/ -@@ -169,13 +169,13 @@ - $(SHORTBINS): pre - $(MAKE) $(BIN_DIR)/$@ - --$(BINS): $(BIN_SOURCES) libvcflib.a $(OBJECTS) $(FILEVERCMP) pre intervaltree -+$(BINS): $(BIN_SOURCES) libvcflib.so $(OBJECTS) $(FILEVERCMP) pre intervaltree - $(CXX) src/$(notdir $@).cpp -o $@ $(INCLUDES) $(LDFLAGS) $(CXXFLAGS) -DVERSION=\"$(GIT_VERSION)\" - --libvcflib.a: $(OBJECTS) $(FILEVERCMP) pre -- ar rs libvcflib.a $(OBJECTS) $(FILEVERCMP) -- $(CP) libvcflib.a $(LIB_DIR) -- -+libvcflib.so: $(OBJECTS) $(FILEVERCMP) pre -+ $(CC) -shared -Wl,-soname -Wl,libvcflib.so.1 -o libvcflib.so.1 $(OBJECTS) $(FILEVERCMP) -+ ln -s libvcflib.so.1 libvcflib.so -+ $(CP) -a libvcflib.so* $(LIB_DIR) - - test: $(BINS) - @prove -Itests/lib -w tests/*.t -@@ -194,7 +194,7 @@ - - clean: - $(RM) $(BINS) $(OBJECTS) -- $(RM) libvcflib.a -+ $(RM) libvcflib.so* - $(RM) -r $(BIN_DIR) - $(RM) -r $(LIB_DIR) - $(RM) -r $(INC_DIR) + # ---- Build htslib + # diff -Nru libvcflib-1.0.1+dfsg/debian/patches/spelling libvcflib-1.0.2+dfsg/debian/patches/spelling --- libvcflib-1.0.1+dfsg/debian/patches/spelling 1970-01-01 00:00:00.000000000 +0000 +++ libvcflib-1.0.2+dfsg/debian/patches/spelling 2021-01-28 10:18:58.000000000 +0000 @@ -0,0 +1,102 @@ +From: Michael R. Crusoe +Subject: fix typos +Forwarded: https://github.com/vcflib/vcflib/pull/308 +--- libvcflib.orig/doc/popStats.md ++++ libvcflib/doc/popStats.md +@@ -21,7 +21,7 @@ + + + +- Calculates basic population statistics at bi-allelic sites. The allele frequency is the number of non-reference alleles divided by the total number of alleles. The expected hetrozygosity is 2*p*q, where p is the non-reference allele frequency and q is 1-p. The observed heterozgosity is the fraction of 0/1 genotypes out of all genotypes. The inbreeding coefficent, Fis, is the relative heterozygosity of each individual vs. compared to the target group. ++ Calculates basic population statistics at bi-allelic sites. The allele frequency is the number of non-reference alleles divided by the total number of alleles. The expected hetrozygosity is 2*p*q, where p is the non-reference allele frequency and q is 1-p. The observed heterozgosity is the fraction of 0/1 genotypes out of all genotypes. The inbreeding coefficient, Fis, is the relative heterozygosity of each individual vs. compared to the target group. + + Output : 9 columns : + 1. seqid +--- libvcflib.orig/man/popStats.1 ++++ libvcflib/man/popStats.1 +@@ -18,7 +18,7 @@ + + + +- Calculates basic population statistics at bi-allelic sites. The allele frequency is the number of non-reference alleles divided by the total number of alleles. The expected hetrozygosity is 2*p*q, where p is the non-reference allele frequency and q is 1-p. The observed heterozgosity is the fraction of 0/1 genotypes out of all genotypes. The inbreeding coefficent, Fis, is the relative heterozygosity of each individual vs. compared to the target group. ++ Calculates basic population statistics at bi-allelic sites. The allele frequency is the number of non-reference alleles divided by the total number of alleles. The expected hetrozygosity is 2*p*q, where p is the non-reference allele frequency and q is 1-p. The observed heterozgosity is the fraction of 0/1 genotypes out of all genotypes. The inbreeding coefficient, Fis, is the relative heterozygosity of each individual vs. compared to the target group. + + Output : 9 columns : + 1. seqid +--- libvcflib.orig/src/popStats.cpp ++++ libvcflib/src/popStats.cpp +@@ -36,7 +36,7 @@ + cerr << " General population genetic statistics for each SNP " << endl << endl; + + cerr << R"( +- Calculates basic population statistics at bi-allelic sites. The allele frequency is the number of non-reference alleles divided by the total number of alleles. The expected hetrozygosity is 2*p*q, where p is the non-reference allele frequency and q is 1-p. The observed heterozgosity is the fraction of 0/1 genotypes out of all genotypes. The inbreeding coefficent, Fis, is the relative heterozygosity of each individual vs. compared to the target group. )" << endl << endl; ++ Calculates basic population statistics at bi-allelic sites. The allele frequency is the number of non-reference alleles divided by the total number of alleles. The expected hetrozygosity is 2*p*q, where p is the non-reference allele frequency and q is 1-p. The observed heterozgosity is the fraction of 0/1 genotypes out of all genotypes. The inbreeding coefficient, Fis, is the relative heterozygosity of each individual vs. compared to the target group. )" << endl << endl; + + cerr << "Output : 9 columns : " << endl; + cerr << " 1. seqid " << endl; +--- libvcflib.orig/README.md ++++ libvcflib/README.md +@@ -117,7 +117,7 @@ + + | filter command | description | + | :-------------- | :---------- | +- | [vcfuniq](./doc/vcfuniq.md) | List unique genotypes. Like GNU uniq, but for VCF records. Remove records which have the same positon, ref, and alt as the previous record. | ++ | [vcfuniq](./doc/vcfuniq.md) | List unique genotypes. Like GNU uniq, but for VCF records. Remove records which have the same position, ref, and alt as the previous record. | + | [vcfuniqalleles](./doc/vcfuniqalleles.md) | List unique alleles For each record, remove any duplicate alternate alleles that may have resulted from merging separate VCF files. | + | [vcffilter](./doc/vcffilter.md) | VCF filter the specified vcf file using the set of filters | + +--- libvcflib.orig/doc/vcflib.md ++++ libvcflib/doc/vcflib.md +@@ -37,7 +37,7 @@ + + | filter command | description | + | :-------------- | :---------- | +- | [vcfuniq](./vcfuniq.md) | List unique genotypes. Like GNU uniq, but for VCF records. Remove records which have the same positon, ref, and alt as the previous record. | ++ | [vcfuniq](./vcfuniq.md) | List unique genotypes. Like GNU uniq, but for VCF records. Remove records which have the same position, ref, and alt as the previous record. | + | [vcfuniqalleles](./vcfuniqalleles.md) | List unique alleles For each record, remove any duplicate alternate alleles that may have resulted from merging separate VCF files. | + | [vcffilter](./vcffilter.md) | VCF filter the specified vcf file using the set of filters | + +--- libvcflib.orig/doc/vcfuniq.md ++++ libvcflib/doc/vcfuniq.md +@@ -11,7 +11,7 @@ + + # DESCRIPTION + +-List unique genotypes. Like GNU uniq, but for VCF records. Remove records which have the same positon, ref, and alt as the previous record. ++List unique genotypes. Like GNU uniq, but for VCF records. Remove records which have the same position, ref, and alt as the previous record. + + + +--- libvcflib.orig/man/vcflib.1 ++++ libvcflib/man/vcflib.1 +@@ -48,7 +48,7 @@ + T}@T{ + List unique genotypes. + Like GNU uniq, but for VCF records. +-Remove records which have the same positon, ref, and alt as the previous ++Remove records which have the same position, ref, and alt as the previous + record. + T} + T{ +--- libvcflib.orig/man/vcfuniq.1 ++++ libvcflib/man/vcfuniq.1 +@@ -12,7 +12,7 @@ + .PP + List unique genotypes. + Like GNU uniq, but for VCF records. +-Remove records which have the same positon, ref, and alt as the previous ++Remove records which have the same position, ref, and alt as the previous + record. + .SH OPTIONS + .IP +--- libvcflib.orig/src/vcfuniq.cpp ++++ libvcflib/src/vcfuniq.cpp +@@ -22,7 +22,7 @@ + Usage: vcfuniq + + List unique genotypes. Like GNU uniq, but for VCF records. Remove +-records which have the same positon, ref, and alt as the previous ++records which have the same position, ref, and alt as the previous + record. + + Type: filter diff -Nru libvcflib-1.0.1+dfsg/debian/patches/tabix_linking libvcflib-1.0.2+dfsg/debian/patches/tabix_linking --- libvcflib-1.0.1+dfsg/debian/patches/tabix_linking 1970-01-01 00:00:00.000000000 +0000 +++ libvcflib-1.0.2+dfsg/debian/patches/tabix_linking 2021-01-28 10:49:43.000000000 +0000 @@ -0,0 +1,13 @@ +Author: Michael R. Crusoe +Description: link against tabixpp +Forwarded: not-needed +--- libvcflib.orig/CMakeLists.txt ++++ libvcflib/CMakeLists.txt +@@ -93,6 +93,7 @@ + PkgConfig::SMITHWATERMAN + PkgConfig::FASTAHACK + ssw ++ tabixpp + ) + + set(BINS diff -Nru libvcflib-1.0.1+dfsg/debian/patches/tests libvcflib-1.0.2+dfsg/debian/patches/tests --- libvcflib-1.0.1+dfsg/debian/patches/tests 1970-01-01 00:00:00.000000000 +0000 +++ libvcflib-1.0.2+dfsg/debian/patches/tests 2021-01-28 11:36:39.000000000 +0000 @@ -0,0 +1,32 @@ +Author: Michael R. Crusoe +Description: Use packaged gtest and update paths +Forwarded: not-needed +--- libvcflib.orig/test/Makefile ++++ libvcflib/test/Makefile +@@ -7,22 +7,17 @@ + CXX=g++ + CFLAGS=-Wall -std=c++0x + INCLUDE=-I../src -I../googletest/googletest/include/ -I../fastahack -I../smithwaterman/ -I../multichoose/ -I../filevercmp/ -I../googletest/googletest/make/ +-LIB=-L../build -L../googletest/googletest/make/ -lm -ltabixpp -lhts -lpthread +-LIBGTEST=../googletest/googletest/make/gtest_main.a +-LIBVCF=../build/libvcflib.a ++LIB=-L../obj-$(shell dpkg-architecture -qDEB_HOST_GNU_TYPE)/ -L../googletest/googletest/make/ -lm -ltabixpp -lhts -lpthread ++LIBGTEST=-lgtest_main -lgtest ++LIBVCF=-lvcflib + + all: run + +-../googletest/googletest/make/gtest_main.a: +- cd ../googletest/googletest/make && make +- +-tests/main: ../googletest/googletest/make/gtest_main.a ++tests/main: + $(CXX) $(CFLAGS) $(INCLUDE) tests/mainTest.cpp -o tests/main $(LIBVCF) $(LIBGTEST) $(LIB) + + run: tests/main + ./tests/main +- rm -v ../googletest/googletest/make/gtest_main.a +- rm -v ../googletest/googletest/make/sample1_unittest + + clean: + rm -f tests/main diff -Nru libvcflib-1.0.1+dfsg/debian/patches/use_debian_packaged_fastahack.patch libvcflib-1.0.2+dfsg/debian/patches/use_debian_packaged_fastahack.patch --- libvcflib-1.0.1+dfsg/debian/patches/use_debian_packaged_fastahack.patch 2020-10-31 17:41:05.000000000 +0000 +++ libvcflib-1.0.2+dfsg/debian/patches/use_debian_packaged_fastahack.patch 2021-01-28 10:14:12.000000000 +0000 @@ -1,55 +1,209 @@ Author: Andreas Tille -Last-Update: Thu, 23 Jun 2016 09:10:54 +0200 +Last-Update: 2021-01-28 Description: Use Debian packaged libfastahack (and libdisorder) Forwarded: not-needed ---- libvcflib.orig/Makefile -+++ libvcflib/Makefile -@@ -119,7 +119,6 @@ - SHORTBINS = $(notdir $(BIN_SOURCES:.cpp=)) - - # TABIX = tabixpp/tabix.o --# FASTAHACK = fastahack/Fasta.o - FILEVERCMP = filevercmp/filevercmp.o - - # Work out how to find htslib -@@ -159,7 +158,7 @@ - gprof: - $(MAKE) CXXFLAGS="$(CXXFLAGS) -pg" all - --$(OBJECTS): $(SOURCES) $(HEADERS) multichoose pre $(FILEVERCMP) $(FASTAHACK) -+$(OBJECTS): $(SOURCES) $(HEADERS) multichoose pre $(FILEVERCMP) - $(CXX) -c -o $@ src/$(*F).cpp $(INCLUDES) $(LDFLAGS) $(CXXFLAGS) && $(CP) src/*.h* $(VCF_LIB_LOCAL)/$(INC_DIR)/ - - multichoose: pre -@@ -171,20 +170,17 @@ - $(TABIX): pre - echo "No need to create separately packaged tabixpp" - --$(FASTAHACK): pre -- cd fastahack && $(MAKE) && $(CP) *.h* $(VCF_LIB_LOCAL)/$(INC_DIR)/ && $(CP) Fasta.o $(VCF_LIB_LOCAL)/$(OBJ_DIR)/ -- - $(FILEVERCMP): pre - cd filevercmp && make && $(CP) *.h* $(VCF_LIB_LOCAL)/$(INC_DIR)/ && $(CP) *.o $(VCF_LIB_LOCAL)/$(INC_DIR)/ - - $(SHORTBINS): pre - $(MAKE) $(BIN_DIR)/$@ - --$(BINS): $(BIN_SOURCES) libvcflib.a $(OBJECTS) $(FASTAHACK) $(SSW) $(FILEVERCMP) pre intervaltree -+$(BINS): $(BIN_SOURCES) libvcflib.a $(OBJECTS) $(SSW) $(FILEVERCMP) pre intervaltree - $(CXX) src/$(notdir $@).cpp -o $@ $(INCLUDES) $(LDFLAGS) $(CXXFLAGS) -DVERSION=\"$(GIT_VERSION)\" - --libvcflib.a: $(OBJECTS) $(FASTAHACK) $(SSW) $(FILEVERCMP) pre -- ar rs libvcflib.a $(OBJECTS) $(FASTAHACK) $(SSW) $(FILEVERCMP) -+libvcflib.a: $(OBJECTS) $(SSW) $(FILEVERCMP) pre -+ ar rs libvcflib.a $(OBJECTS) $(SSW) $(FILEVERCMP) - $(CP) libvcflib.a $(LIB_DIR) - - -@@ -211,7 +207,6 @@ - $(RM) -r $(LIB_DIR) - $(RM) -r $(INC_DIR) - $(RM) -r $(OBJ_DIR) -- $(MAKE) clean -C fastahack - $(MAKE) clean -C multichoose - $(MAKE) clean -C libVCFH - $(MAKE) clean -C test +--- libvcflib.orig/CMakeLists.txt ++++ libvcflib/CMakeLists.txt +@@ -33,6 +33,7 @@ + pkg_check_modules(HTSLIB htslib) + pkg_check_modules(TABIXPP tabixpp) # Optional + pkg_check_modules(SMITHWATERMAN REQUIRED IMPORTED_TARGET libsmithwaterman) ++pkg_check_modules(FASTAHACK REQUIRED IMPORTED_TARGET libfastahack) + + # ---- Build switches + +@@ -54,7 +55,6 @@ + # ---- Include files + + include_directories(include) +-include_directories(fastahack) + include_directories(intervaltree) + include_directories(multichoose) + include_directories(filevercmp) +@@ -70,7 +70,6 @@ + src/*.h* + multichoose/*.h* + intervaltree/*.h* +- fastahack/*.h* + filevercmp/*.h*) + + add_library(vcflib STATIC +@@ -89,12 +88,12 @@ + src/split.cpp + src/ssw.hpp + src/ssw_cpp.hpp +- fastahack/Fasta.cpp + filevercmp/filevercmp.c + ) + + target_link_libraries(vcflib + PkgConfig::SMITHWATERMAN ++ PkgConfig::FASTAHACK + ) + + set(BINS +--- libvcflib.orig/src/Variant.h ++++ libvcflib/src/Variant.h +@@ -31,7 +31,7 @@ + #include "ssw_cpp.hpp" + #include "convert.h" + #include "multichoose.h" +-#include ++#include + extern "C" { + #include "filevercmp.h" + } +--- libvcflib.orig/src/vcf2dag.cpp ++++ libvcflib/src/vcf2dag.cpp +@@ -11,7 +11,7 @@ + #include "BedReader.h" + #include "IntervalTree.h" + #include +-#include "Fasta.h" ++#include + #include + #include + #include +--- libvcflib.orig/src/vcf2fasta.cpp ++++ libvcflib/src/vcf2fasta.cpp +@@ -13,7 +13,7 @@ + #include "split.h" + #include + #include +-#include "Fasta.h" ++#include + #include + #include + +--- libvcflib.orig/src/vcfcheck.cpp ++++ libvcflib/src/vcfcheck.cpp +@@ -9,7 +9,7 @@ + + #include "Variant.h" + #include "split.h" +-#include "Fasta.h" ++#include + #include "gpatInfo.hpp" + #include + +--- libvcflib.orig/src/vcfentropy.cpp ++++ libvcflib/src/vcfentropy.cpp +@@ -9,7 +9,7 @@ + + #include "Variant.h" + #include "split.h" +-#include "Fasta.h" ++#include + #include + #include "disorder.h" + +--- libvcflib.orig/src/vcfevenregions.cpp ++++ libvcflib/src/vcfevenregions.cpp +@@ -9,7 +9,7 @@ + + #include "Variant.h" + #include "split.h" +-#include "Fasta.h" ++#include + #include + #include + +--- libvcflib.orig/src/vcfgeno2haplo.cpp ++++ libvcflib/src/vcfgeno2haplo.cpp +@@ -9,7 +9,7 @@ + + #include "Variant.h" + #include +-#include "Fasta.h" ++#include + #include "gpatInfo.hpp" + #include + #include +--- libvcflib.orig/src/vcfinfosummarize.cpp ++++ libvcflib/src/vcfinfosummarize.cpp +@@ -9,7 +9,7 @@ + + #include "Variant.h" + #include "split.h" +-#include "Fasta.h" ++#include + #include "gpatInfo.hpp" + #include + #include +--- libvcflib.orig/src/vcfintersect.cpp ++++ libvcflib/src/vcfintersect.cpp +@@ -11,7 +11,7 @@ + #include "BedReader.h" + #include "IntervalTree.h" + #include +-#include "Fasta.h" ++#include + #include + #include + #include +--- libvcflib.orig/src/vcfleftalign.cpp ++++ libvcflib/src/vcfleftalign.cpp +@@ -11,7 +11,7 @@ + #include "convert.h" + #include "join.h" + #include "split.h" +-#include "Fasta.h" ++#include + #include + #include + #include +--- libvcflib.orig/src/vcfnormalizesvs.cpp ++++ libvcflib/src/vcfnormalizesvs.cpp +@@ -9,7 +9,7 @@ + + #include "Variant.h" + #include +-#include "Fasta.h" ++#include + + using namespace std; + using namespace vcflib; +--- libvcflib.orig/src/vcfprimers.cpp ++++ libvcflib/src/vcfprimers.cpp +@@ -9,7 +9,7 @@ + + #include "Variant.h" + #include "split.h" +-#include "Fasta.h" ++#include + #include + + using namespace std; +--- libvcflib.orig/src/vcfremap.cpp ++++ libvcflib/src/vcfremap.cpp +@@ -11,7 +11,7 @@ + #include "BedReader.h" + #include "IntervalTree.h" + #include +-#include "Fasta.h" ++#include + #include + #include + #include +--- libvcflib.orig/src/vcfroc.cpp ++++ libvcflib/src/vcfroc.cpp +@@ -11,7 +11,7 @@ + #include "BedReader.h" + #include "IntervalTree.h" + #include +-#include "Fasta.h" ++#include + #include + #include + #include +--- libvcflib.orig/src/vcfsample2info.cpp ++++ libvcflib/src/vcfsample2info.cpp +@@ -9,7 +9,7 @@ + + #include "Variant.h" + #include "split.h" +-#include "Fasta.h" ++#include + #include + #include + #include diff -Nru libvcflib-1.0.1+dfsg/debian/patches/use_debian_packaged_libssw.patch libvcflib-1.0.2+dfsg/debian/patches/use_debian_packaged_libssw.patch --- libvcflib-1.0.1+dfsg/debian/patches/use_debian_packaged_libssw.patch 2020-10-31 17:41:05.000000000 +0000 +++ libvcflib-1.0.2+dfsg/debian/patches/use_debian_packaged_libssw.patch 2021-01-28 10:14:23.000000000 +0000 @@ -1,54 +1,35 @@ Author: Andreas Tille -Last-Update: Tue, 22 Nov 2016 10:56:09 +0100 +Last-Update: 2021-01-28 Bug-Debian: https://bugs.debian.org/838513 Description: Use Debian packaged libssw (while its not clear why two implementations of Smith-Waterman are needed at all) Forwarded: not-needed ---- libvcflib.orig/Makefile -+++ libvcflib/Makefile -@@ -129,9 +129,7 @@ - - - INCLUDES = $(HTS_INCLUDES) -I$(INC_DIR) --LDFLAGS = -L$(LIB_DIR) -lvcflib $(HTS_LDFLAGS) -lpthread -lz -lm -llzma -lbz2 -- -- -+LDFLAGS = -L$(LIB_DIR) -lvcflib $(HTS_LDFLAGS) -lpthread -lz -lm -llzma -lbz2 -lssw - - all: $(OBJECTS) $(BINS) scriptToBin - -@@ -144,11 +142,6 @@ - #CXXFLAGS = -O2 - #CXXFLAGS = -pedantic -Wall -Wshadow -Wpointer-arith -Wcast-qual - --SSW = src/ssw.o src/ssw_cpp.o -- --ssw.o: src/ssw.hpp --ssw_cpp.o:src/ssw_cpp.hpp -- - openmp: - $(MAKE) CXXFLAGS="$(CXXFLAGS) -fopenmp -D HAS_OPENMP" - -@@ -176,11 +169,11 @@ - $(SHORTBINS): pre - $(MAKE) $(BIN_DIR)/$@ - --$(BINS): $(BIN_SOURCES) libvcflib.a $(OBJECTS) $(SSW) $(FILEVERCMP) pre intervaltree -+$(BINS): $(BIN_SOURCES) libvcflib.a $(OBJECTS) $(FILEVERCMP) pre intervaltree - $(CXX) src/$(notdir $@).cpp -o $@ $(INCLUDES) $(LDFLAGS) $(CXXFLAGS) -DVERSION=\"$(GIT_VERSION)\" - --libvcflib.a: $(OBJECTS) $(SSW) $(FILEVERCMP) pre -- ar rs libvcflib.a $(OBJECTS) $(SSW) $(FILEVERCMP) -+libvcflib.a: $(OBJECTS) $(FILEVERCMP) pre -+ ar rs libvcflib.a $(OBJECTS) $(FILEVERCMP) - $(CP) libvcflib.a $(LIB_DIR) - - -@@ -201,7 +194,6 @@ - - clean: - $(RM) $(BINS) $(OBJECTS) -- $(RM) ssw_cpp.o ssw.o - $(RM) libvcflib.a - $(RM) -r $(BIN_DIR) - $(RM) -r $(LIB_DIR) +--- libvcflib.orig/CMakeLists.txt ++++ libvcflib/CMakeLists.txt +@@ -86,14 +86,13 @@ + src/pdflib.cpp + src/cdflib.cpp + src/split.cpp +- src/ssw.hpp +- src/ssw_cpp.hpp + filevercmp/filevercmp.c + ) + + target_link_libraries(vcflib + PkgConfig::SMITHWATERMAN + PkgConfig::FASTAHACK ++ ssw + ) + + set(BINS +--- libvcflib.orig/src/Variant.h ++++ libvcflib/src/Variant.h +@@ -28,7 +28,7 @@ + #include "join.h" + #include + #include +-#include "ssw_cpp.hpp" ++#include + #include "convert.h" + #include "multichoose.h" + #include diff -Nru libvcflib-1.0.1+dfsg/debian/patches/use_debian_packaged_smithwaterman.patch libvcflib-1.0.2+dfsg/debian/patches/use_debian_packaged_smithwaterman.patch --- libvcflib-1.0.1+dfsg/debian/patches/use_debian_packaged_smithwaterman.patch 2020-10-31 17:41:05.000000000 +0000 +++ libvcflib-1.0.2+dfsg/debian/patches/use_debian_packaged_smithwaterman.patch 2021-01-28 10:14:37.000000000 +0000 @@ -1,79 +1,60 @@ Author: Andreas Tille -Last-Update: Thu, 23 Jun 2016 09:10:54 +0200 +Last-Update: 2021-01-28 Description: Use Debian packaged libsmithwaterman Forwarded: not-needed ---- libvcflib.orig/Makefile -+++ libvcflib/Makefile -@@ -120,11 +120,6 @@ - - # TABIX = tabixpp/tabix.o - # FASTAHACK = fastahack/Fasta.o --SMITHWATERMAN = smithwaterman/SmithWatermanGotoh.o --REPEATS = smithwaterman/Repeats.o --INDELALLELE = smithwaterman/IndelAllele.o --DISORDER = smithwaterman/disorder.o --LEFTALIGN = smithwaterman/LeftAlign.o - FILEVERCMP = filevercmp/filevercmp.o - - # Work out how to find htslib -@@ -164,7 +159,7 @@ - gprof: - $(MAKE) CXXFLAGS="$(CXXFLAGS) -pg" all - --$(OBJECTS): $(SOURCES) $(HEADERS) multichoose pre $(SMITHWATERMAN) $(FILEVERCMP) $(FASTAHACK) -+$(OBJECTS): $(SOURCES) $(HEADERS) multichoose pre $(FILEVERCMP) $(FASTAHACK) - $(CXX) -c -o $@ src/$(*F).cpp $(INCLUDES) $(LDFLAGS) $(CXXFLAGS) && $(CP) src/*.h* $(VCF_LIB_LOCAL)/$(INC_DIR)/ - - multichoose: pre -@@ -176,17 +171,6 @@ - $(TABIX): pre - echo "No need to create separately packaged tabixpp" - --$(SMITHWATERMAN): pre -- cd smithwaterman && $(MAKE) && $(CP) *.h* $(VCF_LIB_LOCAL)/$(INC_DIR)/ && $(CP) *.o $(VCF_LIB_LOCAL)/$(OBJ_DIR)/ -- --$(DISORDER): $(SMITHWATERMAN) -- --$(REPEATS): $(SMITHWATERMAN) -- --$(LEFTALIGN): $(SMITHWATERMAN) -- --$(INDELALLELE): $(SMITHWATERMAN) -- - $(FASTAHACK): pre - cd fastahack && $(MAKE) && $(CP) *.h* $(VCF_LIB_LOCAL)/$(INC_DIR)/ && $(CP) Fasta.o $(VCF_LIB_LOCAL)/$(OBJ_DIR)/ - -@@ -196,11 +180,11 @@ - $(SHORTBINS): pre - $(MAKE) $(BIN_DIR)/$@ - --$(BINS): $(BIN_SOURCES) libvcflib.a $(OBJECTS) $(SMITHWATERMAN) $(FASTAHACK) $(DISORDER) $(LEFTALIGN) $(INDELALLELE) $(SSW) $(FILEVERCMP) pre intervaltree -+$(BINS): $(BIN_SOURCES) libvcflib.a $(OBJECTS) $(FASTAHACK) $(SSW) $(FILEVERCMP) pre intervaltree - $(CXX) src/$(notdir $@).cpp -o $@ $(INCLUDES) $(LDFLAGS) $(CXXFLAGS) -DVERSION=\"$(GIT_VERSION)\" - --libvcflib.a: $(OBJECTS) $(SMITHWATERMAN) $(REPEATS) $(FASTAHACK) $(DISORDER) $(LEFTALIGN) $(INDELALLELE) $(SSW) $(FILEVERCMP) pre -- ar rs libvcflib.a $(OBJECTS) smithwaterman/sw.o $(FASTAHACK) $(SSW) $(FILEVERCMP) -+libvcflib.a: $(OBJECTS) $(FASTAHACK) $(SSW) $(FILEVERCMP) pre -+ ar rs libvcflib.a $(OBJECTS) $(FASTAHACK) $(SSW) $(FILEVERCMP) - $(CP) libvcflib.a $(LIB_DIR) - - -@@ -227,7 +211,6 @@ - $(RM) -r $(LIB_DIR) - $(RM) -r $(INC_DIR) - $(RM) -r $(OBJ_DIR) -- $(MAKE) clean -C smithwaterman - $(MAKE) clean -C fastahack - $(MAKE) clean -C multichoose - $(MAKE) clean -C libVCFH --- libvcflib.orig/src/Variant.h +++ libvcflib/src/Variant.h -@@ -18,7 +18,7 @@ +@@ -27,7 +27,7 @@ #include "split.h" #include "join.h" - #include "tabix.hpp" + #include -#include "SmithWatermanGotoh.h" +#include - #include "disorder.h" #include "ssw_cpp.hpp" #include "convert.h" + #include "multichoose.h" +--- libvcflib.orig/CMakeLists.txt ++++ libvcflib/CMakeLists.txt +@@ -32,6 +32,7 @@ + + pkg_check_modules(HTSLIB htslib) + pkg_check_modules(TABIXPP tabixpp) # Optional ++pkg_check_modules(SMITHWATERMAN REQUIRED IMPORTED_TARGET libsmithwaterman) + + # ---- Build switches + +@@ -55,7 +56,6 @@ + include_directories(include) + include_directories(fastahack) + include_directories(intervaltree) +-include_directories(smithwaterman) + include_directories(multichoose) + include_directories(filevercmp) + +@@ -70,7 +70,6 @@ + src/*.h* + multichoose/*.h* + intervaltree/*.h* +- smithwaterman/*.h* + fastahack/*.h* + filevercmp/*.h*) + +@@ -91,14 +90,13 @@ + src/ssw.hpp + src/ssw_cpp.hpp + fastahack/Fasta.cpp +- smithwaterman/SmithWatermanGotoh.cpp +- smithwaterman/Repeats.cpp +- smithwaterman/IndelAllele.cpp +- smithwaterman/disorder.cpp +- smithwaterman/LeftAlign.cpp + filevercmp/filevercmp.c + ) + ++target_link_libraries(vcflib ++ PkgConfig::SMITHWATERMAN ++ ) ++ + set(BINS + vcfecho + dumpContigsFromHeader diff -Nru libvcflib-1.0.1+dfsg/debian/patches/use_debian_packaged_tabixpp.patch libvcflib-1.0.2+dfsg/debian/patches/use_debian_packaged_tabixpp.patch --- libvcflib-1.0.1+dfsg/debian/patches/use_debian_packaged_tabixpp.patch 2020-10-31 17:41:05.000000000 +0000 +++ libvcflib-1.0.2+dfsg/debian/patches/use_debian_packaged_tabixpp.patch 1970-01-01 00:00:00.000000000 +0000 @@ -1,55 +0,0 @@ -Author: Andreas Tille -Last-Update: Thu, 23 Jun 2016 09:10:54 +0200 -Description: Use Debian packaged libtabixpp -Forwarded: not-needed - ---- libvcflib.orig/Makefile -+++ libvcflib/Makefile -@@ -118,8 +118,8 @@ - BINS = $(addprefix $(BIN_DIR)/,$(notdir $(BIN_SOURCES:.cpp=))) - SHORTBINS = $(notdir $(BIN_SOURCES:.cpp=)) - --TABIX = tabixpp/tabix.o --FASTAHACK = fastahack/Fasta.o -+# TABIX = tabixpp/tabix.o -+# FASTAHACK = fastahack/Fasta.o - SMITHWATERMAN = smithwaterman/SmithWatermanGotoh.o - REPEATS = smithwaterman/Repeats.o - INDELALLELE = smithwaterman/IndelAllele.o -@@ -164,7 +164,7 @@ - gprof: - $(MAKE) CXXFLAGS="$(CXXFLAGS) -pg" all - --$(OBJECTS): $(SOURCES) $(HEADERS) $(TABIX) multichoose pre $(SMITHWATERMAN) $(FILEVERCMP) $(FASTAHACK) -+$(OBJECTS): $(SOURCES) $(HEADERS) multichoose pre $(SMITHWATERMAN) $(FILEVERCMP) $(FASTAHACK) - $(CXX) -c -o $@ src/$(*F).cpp $(INCLUDES) $(LDFLAGS) $(CXXFLAGS) && $(CP) src/*.h* $(VCF_LIB_LOCAL)/$(INC_DIR)/ - - multichoose: pre -@@ -174,7 +174,7 @@ - cd intervaltree && $(MAKE) && $(CP) *.h* $(VCF_LIB_LOCAL)/$(INC_DIR)/ - - $(TABIX): pre -- cd tabixpp && INCLUDES="$(HTS_INCLUDES)" LIBPATH="-L. $(HTS_LDFLAGS)" HTSLIB="$(HTS_LIB)" $(MAKE) && $(CP) *.h* $(VCF_LIB_LOCAL)/$(INC_DIR)/ -+ echo "No need to create separately packaged tabixpp" - - $(SMITHWATERMAN): pre - cd smithwaterman && $(MAKE) && $(CP) *.h* $(VCF_LIB_LOCAL)/$(INC_DIR)/ && $(CP) *.o $(VCF_LIB_LOCAL)/$(OBJ_DIR)/ -@@ -199,8 +199,8 @@ - $(BINS): $(BIN_SOURCES) libvcflib.a $(OBJECTS) $(SMITHWATERMAN) $(FASTAHACK) $(DISORDER) $(LEFTALIGN) $(INDELALLELE) $(SSW) $(FILEVERCMP) pre intervaltree - $(CXX) src/$(notdir $@).cpp -o $@ $(INCLUDES) $(LDFLAGS) $(CXXFLAGS) -DVERSION=\"$(GIT_VERSION)\" - --libvcflib.a: $(OBJECTS) $(SMITHWATERMAN) $(REPEATS) $(FASTAHACK) $(DISORDER) $(LEFTALIGN) $(INDELALLELE) $(SSW) $(FILEVERCMP) $(TABIX) pre -- ar rs libvcflib.a $(OBJECTS) smithwaterman/sw.o $(FASTAHACK) $(SSW) $(FILEVERCMP) $(TABIX) -+libvcflib.a: $(OBJECTS) $(SMITHWATERMAN) $(REPEATS) $(FASTAHACK) $(DISORDER) $(LEFTALIGN) $(INDELALLELE) $(SSW) $(FILEVERCMP) pre -+ ar rs libvcflib.a $(OBJECTS) smithwaterman/sw.o $(FASTAHACK) $(SSW) $(FILEVERCMP) - $(CP) libvcflib.a $(LIB_DIR) - - -@@ -227,7 +227,6 @@ - $(RM) -r $(LIB_DIR) - $(RM) -r $(INC_DIR) - $(RM) -r $(OBJ_DIR) -- $(MAKE) clean -C tabixpp - $(MAKE) clean -C smithwaterman - $(MAKE) clean -C fastahack - $(MAKE) clean -C multichoose diff -Nru libvcflib-1.0.1+dfsg/debian/rules libvcflib-1.0.2+dfsg/debian/rules --- libvcflib-1.0.1+dfsg/debian/rules 2020-10-31 17:06:48.000000000 +0000 +++ libvcflib-1.0.2+dfsg/debian/rules 2021-01-28 11:34:57.000000000 +0000 @@ -9,11 +9,18 @@ export GIT_VERSION=$(DEB_VERSION) %: - dh $@ + dh $@ --buildsystem cmake override_dh_clean: dh_clean find . -name "*.o" -delete + rm -f README.html + +override_dh_auto_clean: + dh_auto_clean || true + +override_dh_auto_configure: + dh_auto_configure -- -DOPENMP=ON override_dh_auto_build: dh_auto_build @@ -23,7 +30,7 @@ override_dh_auto_test: ifeq (,$(filter nocheck,$(DEB_BUILD_OPTIONS))) - export LD_LIBRARY_PATH=$(CURDIR)/lib/:$LD_LIBRARY_PATH dh_auto_test + export LD_LIBRARY_PATH=$(CURDIR)/obj-${DEB_HOST_GNU_TYPE}/:$$LD_LIBRARY_PATH && cd test && $(MAKE) endif override_dh_install: diff -Nru libvcflib-1.0.1+dfsg/debian/vcflib.1 libvcflib-1.0.2+dfsg/debian/vcflib.1 --- libvcflib-1.0.1+dfsg/debian/vcflib.1 2019-12-10 11:34:18.000000000 +0000 +++ libvcflib-1.0.2+dfsg/debian/vcflib.1 1970-01-01 00:00:00.000000000 +0000 @@ -1,28 +0,0 @@ -.TH VCFLIB 1 "April 16, 2001" -.SH NAME -vcflib \- runs various programs using libvcflib -.SH SYNOPSIS -.B vcflib -\fIprogram\fR [arguments] -.SH DESCRIPTION -This manual page documents briefly the -.B vcflib -wrapper to the libvcflib programs. -This manual page was written for the Debian GNU/Linux distribution -because the original program did not have a manual page. -.PP -\fBvcflib\fP is just a wrapper that invokes the various programs -that are part of the libvcflib source. If you call these without -any argument each tool will print its help page. -.PP -If you call \fBvcflib\fR without any tool argument it prints a list -of tools. -.P -If you set your your PATH to -.IP -export PATH=$PATH:/usr/lib/vcflib/bin -.P -you can call all tools directly without the \fBvcflib\fR wrapper. -.SH AUTHOR -This manual page was written by Andreas Tille -for the Debian GNU/Linux system (but may be used by others). diff -Nru libvcflib-1.0.1+dfsg/doc/abba-baba.md libvcflib-1.0.2+dfsg/doc/abba-baba.md --- libvcflib-1.0.1+dfsg/doc/abba-baba.md 1970-01-01 00:00:00.000000000 +0000 +++ libvcflib-1.0.2+dfsg/doc/abba-baba.md 2021-01-28 07:04:12.000000000 +0000 @@ -0,0 +1,79 @@ +% ABBA-BABA(1) abba-baba (vcflib) | abba-baba (VCF genotype) +% Erik Garrison and vcflib contributors + +# NAME + +**abba-baba** + +# SYNOPSIS + +**abba-baba** --tree 0,1,2,3 --file my.vcf --type PL + +# DESCRIPTION + +**abba-baba** calculates the tree pattern for four indviduals. This tool assumes reference is ancestral and ignores non **abba-baba** sites. The output is a boolian value: 1 = true , 0 = false for abba and baba. the tree argument should be specified from the most basal taxa to the most derived. + + + +# OPTIONS + +``` + + + Example: + D C B A + \ / / / + \ / / + \ / + \ / + / + / + --tree A,B,C,D + +Output : 4 columns : + 1. seqid + 2. position + 3. abba + 4. baba +required: t,tree -- a zero based comma separated list of target individuals corrisponding to VCF columns +required: f,file -- a properly formatted VCF. +required: y,type -- genotype likelihood format ; genotypes: GP,GL or PL; + + +type: genotype + +``` + + + + + +# EXIT VALUES + +**0** +: Success + +**not 0** +: Failure + +# SEE ALSO + + + +[vcflib](./vcflib.md)(1) + + + +# OTHER + +## Source code + +[abba-baba.cpp](https://github.com/vcflib/vcflib/blob/master/src/abba-baba.cpp) + +# LICENSE + +Copyright 2011-2020 (C) Erik Garrison and vcflib contributors. MIT licensed. + + diff -Nru libvcflib-1.0.1+dfsg/doc/bFst.md libvcflib-1.0.2+dfsg/doc/bFst.md --- libvcflib-1.0.1+dfsg/doc/bFst.md 1970-01-01 00:00:00.000000000 +0000 +++ libvcflib-1.0.2+dfsg/doc/bFst.md 2021-01-28 07:04:12.000000000 +0000 @@ -0,0 +1,78 @@ +% BFST(1) bFst (vcflib) | bFst (VCF statistics) +% Erik Garrison and vcflib contributors + +# NAME + +**bFst** + +# SYNOPSIS + +**bFst** --target 0,1,2,3,4,5,6,7 --background 11,12,13,16,17,19,22 --file my.vcf --deltaaf 0.1 + +# DESCRIPTION + +**bFst** is a Bayesian approach to Fst. Importantly **bFst** accounts for genotype uncertainty in the model using genotype likelihoods. For a more detailed description see: `A Bayesian approach to inferring population structure from dominant markers' by Holsinger et al. Molecular Ecology Vol 11, issue 7 2002. The likelihood function has been modified to use genotype likelihoods provided by variant callers. There are five free parameters estimated in the model: each subpopulation's allele frequency and Fis (fixation index, within each subpopulation), a free parameter for the total population's allele frequency, and Fst. + + + +# OPTIONS + +``` + + +Output : 11 columns : + 1. Seqid + 2. Position + 3. Observed allele frequency in target. + 4. Estimated allele frequency in target. + 5. Observed allele frequency in background. + 6. Estimated allele frequency in background. + 7. Observed allele frequency combined. + 8. Estimated allele frequency in combined. + 9. ML estimate of Fst (mean) + 10. Lower bound of the 95% credible interval + 11. Upper bound of the 95% credible interval + +required: t,target -- a zero bases comma separated list of target individuals corrisponding to VCF columns +required: b,background -- a zero bases comma separated list of background individuals corrisponding to VCF columns +required: f,file a -- a proper formatted VCF file. the FORMAT field MUST contain "PL" +required: d,deltaaf -- skip sites were the difference in allele frequency is less than deltaaf + + +Type: statistics + +``` + + + + + +# EXIT VALUES + +**0** +: Success + +**not 0** +: Failure + +# SEE ALSO + + + +[vcflib](./vcflib.md)(1) + + + +# OTHER + +## Source code + +[bFst.cpp](https://github.com/vcflib/vcflib/blob/master/src/bFst.cpp) + +# LICENSE + +Copyright 2011-2020 (C) Erik Garrison and vcflib contributors. MIT licensed. + + diff -Nru libvcflib-1.0.1+dfsg/doc/dumpContigsFromHeader.md libvcflib-1.0.2+dfsg/doc/dumpContigsFromHeader.md --- libvcflib-1.0.1+dfsg/doc/dumpContigsFromHeader.md 1970-01-01 00:00:00.000000000 +0000 +++ libvcflib-1.0.2+dfsg/doc/dumpContigsFromHeader.md 2021-01-28 07:04:12.000000000 +0000 @@ -0,0 +1,73 @@ +% DUMPCONTIGSFROMHEADER(1) dumpContigsFromHeader (vcflib) | dumpContigsFromHeader (VCF transformation) +% Erik Garrison and vcflib contributors + +# NAME + +**dumpContigsFromHeader** + +# SYNOPSIS + +**dumpContigsFromHeader** file + +# DESCRIPTION + +Dump contigs from header + + + + + +# EXAMPLES + +``` + +Example: + + **dumpContigsFromHeader** samples/scaffold612.vcf + + ##contig= + ##contig= + (...) + + output + + scaffold4 1524 + scaffold12 56895 + (...) + +Type: transformation + + +``` + + + +# EXIT VALUES + +**0** +: Success + +**not 0** +: Failure + +# SEE ALSO + + + +[vcflib](./vcflib.md)(1) + + + +# OTHER + +## Source code + +[dumpContigsFromHeader.cpp](https://github.com/vcflib/vcflib/blob/master/src/dumpContigsFromHeader.cpp) + +# LICENSE + +Copyright 2011-2020 (C) Erik Garrison and vcflib contributors. MIT licensed. + + diff -Nru libvcflib-1.0.1+dfsg/doc/genotypeSummary.md libvcflib-1.0.2+dfsg/doc/genotypeSummary.md --- libvcflib-1.0.1+dfsg/doc/genotypeSummary.md 1970-01-01 00:00:00.000000000 +0000 +++ libvcflib-1.0.2+dfsg/doc/genotypeSummary.md 2021-01-28 07:04:12.000000000 +0000 @@ -0,0 +1,68 @@ +% GENOTYPESUMMARY(1) genotypeSummary (vcflib) | genotypeSummary (VCF statistics) +% Erik Garrison and vcflib contributors + +# NAME + +**genotypeSummary** + +# SYNOPSIS + +genotypeSummmary --type PL --target 0,1,2,3,4,5,6,7 --file my.vcf --snp + +# DESCRIPTION + +Generates a table of genotype counts. Summarizes genotype counts for bi-allelic SNVs and indel + + + +# OPTIONS + +``` + + +output: table of genotype counts for each individual. +required: t,target -- a zero based comma separated list of target individuals corresponding to VCF columns +required: f,file -- proper formatted VCF +required, y,type -- genotype likelihood format; genotype : GL,PL,GP +optional, r,region -- a tabix compliant region : chr1:1-1000 or chr1 +optional, s,snp -- Only count SNPs +optional, a,ancestral -- describe counts relative to the ancestral allele defined as AA in INFO + +Type: statistics + + +``` + + + + + +# EXIT VALUES + +**0** +: Success + +**not 0** +: Failure + +# SEE ALSO + + + +[vcflib](./vcflib.md)(1) + + + +# OTHER + +## Source code + +[genotypeSummary.cpp](https://github.com/vcflib/vcflib/blob/master/src/genotypeSummary.cpp) + +# LICENSE + +Copyright 2011-2020 (C) Erik Garrison and vcflib contributors. MIT licensed. + + diff -Nru libvcflib-1.0.1+dfsg/doc/hapLrt.md libvcflib-1.0.2+dfsg/doc/hapLrt.md --- libvcflib-1.0.1+dfsg/doc/hapLrt.md 1970-01-01 00:00:00.000000000 +0000 +++ libvcflib-1.0.2+dfsg/doc/hapLrt.md 2021-01-28 07:04:12.000000000 +0000 @@ -0,0 +1,74 @@ +% HAPLRT(1) hapLrt (vcflib) | hapLrt (VCF genotype) +% Erik Garrison and vcflib contributors + +# NAME + +**hapLrt** + +# SYNOPSIS + +hapLRT --target 0,1,2,3,4,5,6,7 --background 11,12,13,16,17,19,22 --type GP --file my.vcf + +# DESCRIPTION + +HapLRT is a likelihood ratio test for haplotype lengths. The lengths are modeled with an exponential distribution. The sign denotes if the target has longer haplotypes (1) or the background (-1). + + + +# OPTIONS + +``` + + +Output : 4 columns : + 1. seqid + 2. position + 3. mean target haplotype length + 4. mean background haplotype length + 5. p-value from LRT + 6. sign + +required: t,target -- argument: a zero base comma separated list of target individuals corrisponding to VCF columns +required: b,background -- argument: a zero base comma separated list of background individuals corrisponding to VCF columns +required: f,file -- argument: a properly formatted phased VCF file +required: y,type -- argument: type of genotype likelihood: PL, GL or GP +optional: r,region -- argument: a genomice range to calculate **hapLrt** on in the format : "seqid:start-end" or "seqid" + + +Type: genotype + +``` + + + + + +# EXIT VALUES + +**0** +: Success + +**not 0** +: Failure + +# SEE ALSO + + + +[vcflib](./vcflib.md)(1) + + + +# OTHER + +## Source code + +[hapLrt.cpp](https://github.com/vcflib/vcflib/blob/master/src/hapLrt.cpp) + +# LICENSE + +Copyright 2011-2020 (C) Erik Garrison and vcflib contributors. MIT licensed. + + diff -Nru libvcflib-1.0.1+dfsg/doc/iHS.md libvcflib-1.0.2+dfsg/doc/iHS.md --- libvcflib-1.0.1+dfsg/doc/iHS.md 1970-01-01 00:00:00.000000000 +0000 +++ libvcflib-1.0.2+dfsg/doc/iHS.md 2021-01-28 07:04:12.000000000 +0000 @@ -0,0 +1,92 @@ +% IHS(1) iHS (vcflib) | iHS (VCF statistics) +% Erik Garrison and vcflib contributors + +# NAME + +**iHS** + +# SYNOPSIS + +**iHS** --target 0,1,2,3,4,5,6,7 --file my.phased.vcf \ --region chr1:1-1000 > STDOUT 2> STDERR + +# DESCRIPTION + +**iHS** calculates the integrated haplotype score which measures the relative decay of extended haplotype homozygosity (EHH) for the reference and alternative alleles at a site (see: voight et al. 2006, Spiech & Hernandez 2014). + + + +# OPTIONS + +``` + + +Our code is highly concordant with both implementations mentioned. However, we do not set an upper limit to the allele frequency. **iHS** can be run without a genetic map, in which case the change in EHH is integrated over a constant. Human genetic maps for GRCh36 and GRCh37 (hg18 & hg19) can be found at: http://bochet.gcc.biostat.washington.edu/beagle/genetic_maps/ . **iHS** by default interpolates SNV positions to genetic position (you don't need a genetic position for every VCF entry in the map file). + +**iHS** analyses requires normalization by allele frequency. It is important that **iHS** is calculated over large regions so that the normalization does not down weight real signals. For genome-wide runs it is recommended to run slightly overlapping windows and throwing out values that fail integration (columns 7 & 8 in the output) and then removing duplicates by using the 'sort' and 'uniq' linux commands. Normalization of the output is as simple as running 'normalize-**iHS**'. + + + + **iHS** calculates the integrated ratio of haplotype decay between the reference and non-reference allele. +Output : 4 columns : + 1. seqid + 2. position + 3. target allele frequency + 4. integrated EHH (alternative) + 5. integrated EHH (reference) + 6. **iHS** ln(iEHHalt/iEHHref) + 7. != 0 integration failure + 8. != 0 integration failure + +Params: + required: t,target A zero base comma separated list of target + individuals corresponding to VCF columns + required: r,region A tabix compliant genomic range + format: "seqid:start-end" or "seqid" + required: f,file Proper formatted and phased VCF. + required: y,type Genotype likelihood format: GT,PL,GL,GP + optional: a,af Alternative alleles with frquences less + than [0.05] are skipped. + optional: x,threads Number of CPUS [1]. + recommended: g,gen A PLINK formatted map file. + + + +Type: statistics + + + +``` + + + + + +# EXIT VALUES + +**0** +: Success + +**not 0** +: Failure + +# SEE ALSO + + + +[vcflib](./vcflib.md)(1) + + + +# OTHER + +## Source code + +[iHS.cpp](https://github.com/vcflib/vcflib/blob/master/src/iHS.cpp) + +# LICENSE + +Copyright 2011-2020 (C) Erik Garrison and vcflib contributors. MIT licensed. + + diff -Nru libvcflib-1.0.1+dfsg/doc/meltEHH.md libvcflib-1.0.2+dfsg/doc/meltEHH.md --- libvcflib-1.0.1+dfsg/doc/meltEHH.md 1970-01-01 00:00:00.000000000 +0000 +++ libvcflib-1.0.2+dfsg/doc/meltEHH.md 2021-01-28 07:04:12.000000000 +0000 @@ -0,0 +1,86 @@ +% MELTEHH(1) meltEHH (vcflib) | meltEHH (VCF statistics) +% Erik Garrison and vcflib contributors + +# NAME + +**meltEHH** + +# SYNOPSIS + +**meltEHH** --target 0,1,2,3,4,5,6,7 --pos 10 --file my.phased.vcf \ --region chr1:1-1000 > STDOUT 2> STDERR + +# DESCRIPTION + + + + + +# OPTIONS + +``` + + + **meltEHH** provides the data to plot extended haplotype homozygosity +(EHH) curves and produces the data to generate the following plot: + + + + + **meltEHH** provides the data to plot EHH curves. +Output : 4 columns : + 1. seqid + 2. position + 3. EHH + 4. ref or alt [0 == ref] +Params: + required: t,target A zero base comma separated list of target + individuals corresponding to VCF columns + required: r,region A tabix compliant genomic range + format: "seqid:start-end" or "seqid" + required: f,file Proper formatted and phased VCF. + required: y,type Genotype likelihood format: GT,PL,GL,GP + required: p,position Variant position to melt. + optional: a,af Alternative alleles with frequencies less + than [0.05] are skipped. + + + +Type: statistics + + + +``` + + + + + +# EXIT VALUES + +**0** +: Success + +**not 0** +: Failure + +# SEE ALSO + + + +[vcflib](./vcflib.md)(1) + + + +# OTHER + +## Source code + +[meltEHH.cpp](https://github.com/vcflib/vcflib/blob/master/src/meltEHH.cpp) + +# LICENSE + +Copyright 2011-2020 (C) Erik Garrison and vcflib contributors. MIT licensed. + + diff -Nru libvcflib-1.0.1+dfsg/doc/normalize-iHS.md libvcflib-1.0.2+dfsg/doc/normalize-iHS.md --- libvcflib-1.0.1+dfsg/doc/normalize-iHS.md 1970-01-01 00:00:00.000000000 +0000 +++ libvcflib-1.0.2+dfsg/doc/normalize-iHS.md 2021-01-28 07:04:12.000000000 +0000 @@ -0,0 +1,74 @@ +% NORMALIZE-IHS(1) normalize-iHS (vcflib) | normalize-iHS (VCF genotype) +% Erik Garrison and vcflib contributors + +# NAME + +**normalize-iHS** + +# SYNOPSIS + +normalizeHS -s 0.01 -f input.txt + +# DESCRIPTION + +normalizes iHS or XP-EHH scores. + + + +# OPTIONS + +``` + + + + +A cross-population extended haplotype homozygosity (XP-EHH) score is +directional: a positive score suggests selection is likely to have +happened in population A, whereas a negative score suggests the same +about population B. See for example +https://www.ncbi.nlm.nih.gov/pmc/articles/PMC2687721/ + + +Output : **normalize-iHS** adds one additional column to input (normalized score). +required: -f -- Output from iHS or XPEHH +optional: -s -- Max AF diff for window [0.01] + +Type: genotype + + + +``` + + + + + +# EXIT VALUES + +**0** +: Success + +**not 0** +: Failure + +# SEE ALSO + + + +[vcflib](./vcflib.md)(1) + + + +# OTHER + +## Source code + +[normalize-iHS.cpp](https://github.com/vcflib/vcflib/blob/master/src/normalize-iHS.cpp) + +# LICENSE + +Copyright 2011-2020 (C) Erik Garrison and vcflib contributors. MIT licensed. + + diff -Nru libvcflib-1.0.1+dfsg/doc/permuteGPAT++.md libvcflib-1.0.2+dfsg/doc/permuteGPAT++.md --- libvcflib-1.0.1+dfsg/doc/permuteGPAT++.md 1970-01-01 00:00:00.000000000 +0000 +++ libvcflib-1.0.2+dfsg/doc/permuteGPAT++.md 2021-01-28 07:04:12.000000000 +0000 @@ -0,0 +1,70 @@ +% PERMUTEGPAT++(1) permuteGPAT++ (vcflib) | permuteGPAT++ (VCF phenotype) +% Erik Garrison and vcflib contributors + +# NAME + +**permuteGPAT++** + +# SYNOPSIS + +**permuteGPAT++** -f gpat.txt -n 5 -s 1 + +# DESCRIPTION + +**permuteGPAT++** is a method for adding empirical p-values to a GPAT++ score. + + + +# OPTIONS + +``` + + + Currently **permuteGPAT++** only supports wcFst, but will be extended. + +OUTPUT: **permuteGPAT++** will append three additional columns: + 1. The number of successes + 2. The number of trials + 3. The empirical p-value + +file: f -- argument: the input file +number: n -- argument: the number of permutations to run for each value [1000] +success: s -- argument: stop permutations after 's' successes [1] + +Type: phenotype + +``` + + + + + +# EXIT VALUES + +**0** +: Success + +**not 0** +: Failure + +# SEE ALSO + + + +[vcflib](./vcflib.md)(1) + + + +# OTHER + +## Source code + +[permuteGPAT++.cpp](https://github.com/vcflib/vcflib/blob/master/src/permuteGPAT++.cpp) + +# LICENSE + +Copyright 2011-2020 (C) Erik Garrison and vcflib contributors. MIT licensed. + + diff -Nru libvcflib-1.0.1+dfsg/doc/permuteSmooth.md libvcflib-1.0.2+dfsg/doc/permuteSmooth.md --- libvcflib-1.0.1+dfsg/doc/permuteSmooth.md 1970-01-01 00:00:00.000000000 +0000 +++ libvcflib-1.0.2+dfsg/doc/permuteSmooth.md 2021-01-28 07:04:12.000000000 +0000 @@ -0,0 +1,75 @@ +% PERMUTESMOOTH(1) permuteSmooth (vcflib) | permuteSmooth (VCF statistics) +% Erik Garrison and vcflib contributors + +# NAME + +**permuteSmooth** + +# SYNOPSIS + +**permuteSmooth** -s wcFst.smooth.txt -f wcFst.txt -n 5 -s 1 + +# DESCRIPTION + +**permuteSmooth** is a method for adding empirical p-values smoothed wcFst scores. + + + +# OPTIONS + +``` + + +Required: + file: f -- argument: original wcFst data + smoothed: s -- argument: smoothed wcFst data + format: y -- argument: [swcFst, segwcFst] +Optional: + number: n -- argument: the number of permutations to run for each value [1000] + success: u -- argument: stop permutations after 's' successes [1] + success: x -- argument: number of threads [1] + +OUTPUT: **permuteSmooth** will append three additional columns: + 1. The number of successes + 2. The number of trials + 3. The empirical p-value + + +Type: statistics + + +``` + + + + + +# EXIT VALUES + +**0** +: Success + +**not 0** +: Failure + +# SEE ALSO + + + +[vcflib](./vcflib.md)(1) + + + +# OTHER + +## Source code + +[permuteSmooth.cpp](https://github.com/vcflib/vcflib/blob/master/src/permuteSmooth.cpp) + +# LICENSE + +Copyright 2011-2020 (C) Erik Garrison and vcflib contributors. MIT licensed. + + diff -Nru libvcflib-1.0.1+dfsg/doc/pFst.md libvcflib-1.0.2+dfsg/doc/pFst.md --- libvcflib-1.0.1+dfsg/doc/pFst.md 1970-01-01 00:00:00.000000000 +0000 +++ libvcflib-1.0.2+dfsg/doc/pFst.md 2021-01-28 07:04:12.000000000 +0000 @@ -0,0 +1,85 @@ +% PFST(1) pFst (vcflib) | pFst (VCF statistics) +% Erik Garrison and vcflib contributors + +# NAME + +**pFst** + +# SYNOPSIS + +**pFst** --target 0,1,2,3,4,5,6,7 --background 11,12,13,16,17,19,22 --file my.vcf --deltaaf 0.1 --type PL + +# DESCRIPTION + +**pFst** is a probabilistic approach for detecting differences in allele frequencies between two populations. + + + +# OPTIONS + +``` + + + + +**pFst** is a likelihood ratio test (LRT) quantifying allele frequency +differences between populations. The LRT by default uses the binomial +distribution. If Genotype likelihoods are provided it uses a modified +binomial that weights each allele count by its certainty. If type is +set to 'PO' the LRT uses a beta distribution to fit the allele +frequency spectrum of the target and background. PO requires the AD +and DP genotype fields and requires at least two pools for the target +and background. The p-value calculated in **pFst** is based on the +chi-squared distribution with one degree of freedom. + + +Output : 3 columns : + 1. seqid + 2. position + 3. **pFst** probability + +required: t,target -- argument: a zero based comma separated list of target individuals corresponding to VCF columns +required: b,background -- argument: a zero based comma separated list of background individuals corresponding to VCF columns +required: f,file -- argument: a properly formatted VCF. +required: y,type -- argument: genotype likelihood format ; genotypes: GP, GL or PL; pooled: PO +optional: d,deltaaf -- argument: skip sites where the difference in allele frequencies is less than deltaaf, default is zero +optional: r,region -- argument: a tabix compliant genomic range : seqid or seqid:start-end +optional: c,counts -- switch : use genotype counts rather than genotype likelihoods to estimate parameters, default false + +Type: statistics + +``` + + + + + +# EXIT VALUES + +**0** +: Success + +**not 0** +: Failure + +# SEE ALSO + + + +[vcflib](./vcflib.md)(1) + + + +# OTHER + +## Source code + +[pFst.cpp](https://github.com/vcflib/vcflib/blob/master/src/pFst.cpp) + +# LICENSE + +Copyright 2011-2020 (C) Erik Garrison and vcflib contributors. MIT licensed. + + diff -Nru libvcflib-1.0.1+dfsg/doc/plotHaps.md libvcflib-1.0.2+dfsg/doc/plotHaps.md --- libvcflib-1.0.1+dfsg/doc/plotHaps.md 1970-01-01 00:00:00.000000000 +0000 +++ libvcflib-1.0.2+dfsg/doc/plotHaps.md 2021-01-28 07:04:12.000000000 +0000 @@ -0,0 +1,68 @@ +% PLOTHAPS(1) plotHaps (vcflib) | plotHaps (VCF statistics) +% Erik Garrison and vcflib contributors + +# NAME + +**plotHaps** + +# SYNOPSIS + + + +# DESCRIPTION + +**plotHaps** provides the formatted output that can be used with 'bin/plotHaplotypes.R'. + + + +# OPTIONS + +``` + + +Output : haplotype matrix and positions + +**plotHaps** --target 0,1,2,3,4,5,6,7 --file my.phased.vcf.gz + +required: t,target -- argument: a zero base comma separated list of target individuals corrisponding to VCF column s +required: r,region -- argument: a tabix compliant genomic range : "seqid:start-end" or "seqid" +required: f,file -- argument: proper formatted phased VCF file +required: y,type -- argument: genotype likelihood format: PL,GP,GP + +Type: statistics + +``` + + + + + +# EXIT VALUES + +**0** +: Success + +**not 0** +: Failure + +# SEE ALSO + + + +[vcflib](./vcflib.md)(1) + + + +# OTHER + +## Source code + +[plotHaps.cpp](https://github.com/vcflib/vcflib/blob/master/src/plotHaps.cpp) + +# LICENSE + +Copyright 2011-2020 (C) Erik Garrison and vcflib contributors. MIT licensed. + + diff -Nru libvcflib-1.0.1+dfsg/doc/popStats.md libvcflib-1.0.2+dfsg/doc/popStats.md --- libvcflib-1.0.1+dfsg/doc/popStats.md 1970-01-01 00:00:00.000000000 +0000 +++ libvcflib-1.0.2+dfsg/doc/popStats.md 2021-01-28 07:04:12.000000000 +0000 @@ -0,0 +1,78 @@ +% POPSTATS(1) popStats (vcflib) | popStats (VCF statistics) +% Erik Garrison and vcflib contributors + +# NAME + +**popStats** + +# SYNOPSIS + +popStat --type PL --target 0,1,2,3,4,5,6,7 --file my.vcf + +# DESCRIPTION + +General population genetic statistics for each SNP + + + +# OPTIONS + +``` + + + + Calculates basic population statistics at bi-allelic sites. The allele frequency is the number of non-reference alleles divided by the total number of alleles. The expected hetrozygosity is 2*p*q, where p is the non-reference allele frequency and q is 1-p. The observed heterozgosity is the fraction of 0/1 genotypes out of all genotypes. The inbreeding coefficent, Fis, is the relative heterozygosity of each individual vs. compared to the target group. + +Output : 9 columns : + 1. seqid + 2. position + 3. target allele frequency + 4. expected heterozygosity + 5. observed heterozygosity + 6. number of hets + 7. number of homozygous ref + 8. number of homozygous alt + 9. target Fis +required: t,target -- a zero based comma separated list of target individuals corresponding to VCF columns +required: f,file -- proper formatted VCF +required, y,type -- genotype likelihood format; genotype : GL,PL,GP +optional, r,region -- a tabix compliant region : chr1:1-1000 or chr1 + +Type: statistics + + +``` + + + + + +# EXIT VALUES + +**0** +: Success + +**not 0** +: Failure + +# SEE ALSO + + + +[vcflib](./vcflib.md)(1) + + + +# OTHER + +## Source code + +[popStats.cpp](https://github.com/vcflib/vcflib/blob/master/src/popStats.cpp) + +# LICENSE + +Copyright 2011-2020 (C) Erik Garrison and vcflib contributors. MIT licensed. + + diff -Nru libvcflib-1.0.1+dfsg/doc/pVst.md libvcflib-1.0.2+dfsg/doc/pVst.md --- libvcflib-1.0.1+dfsg/doc/pVst.md 1970-01-01 00:00:00.000000000 +0000 +++ libvcflib-1.0.2+dfsg/doc/pVst.md 2021-01-28 07:04:12.000000000 +0000 @@ -0,0 +1,83 @@ +% PVST(1) pVst (vcflib) | pVst (VCF statistics) +% Erik Garrison and vcflib contributors + +# NAME + +**pVst** + +# SYNOPSIS + +**pVst** --target 0,1,2,3,4,5,6,7 --background 11,12,13,16,17,19,22 --file my.vcf --type CN + +# DESCRIPTION + +**pVst** calculates vst, a measure of CNV stratification. + + + +# OPTIONS + +``` + + + + +The statistic Vst is used to test the difference in copy numbers at +each SV between two groups: Vst = (Vt-Vs)/Vt, where Vt is the overall +variance of copy number and Vs the average variance within +populations. + +Output : 4 columns : + 1. seqid + 2. position + 3. end + 3. vst + 4. probability + +required: t,target -- argument: a zero based comma separated list of target individuals corresponding to VCF columns +required: b,background -- argument: a zero based comma separated list of background individuals corresponding to VCF columns +required: f,file -- argument: a properly formatted VCF. +required: y,type -- argument: the genotype field with the copy number: e.g. CN|CNF +optional: r,region -- argument: a tabix compliant genomic range : seqid or seqid:start-end +optional: x,cpu -- argument: number of CPUs [1] +optional: n,per -- argument: number of permutations [1000] + +Type: statistics + + + +``` + + + + + +# EXIT VALUES + +**0** +: Success + +**not 0** +: Failure + +# SEE ALSO + + + +[vcflib](./vcflib.md)(1) + + + +# OTHER + +## Source code + +[pVst.cpp](https://github.com/vcflib/vcflib/blob/master/src/pVst.cpp) + +# LICENSE + +Copyright 2011-2020 (C) Erik Garrison and vcflib contributors. MIT licensed. + + diff -Nru libvcflib-1.0.1+dfsg/doc/segmentFst.md libvcflib-1.0.2+dfsg/doc/segmentFst.md --- libvcflib-1.0.1+dfsg/doc/segmentFst.md 1970-01-01 00:00:00.000000000 +0000 +++ libvcflib-1.0.2+dfsg/doc/segmentFst.md 2021-01-28 07:04:12.000000000 +0000 @@ -0,0 +1,74 @@ +% SEGMENTFST(1) segmentFst (vcflib) | segmentFst (VCF statistics) +% Erik Garrison and vcflib contributors + +# NAME + +**segmentFst** + +# SYNOPSIS + +**segmentFst** -s 0.7 -f wcFst.output.txt + +# DESCRIPTION + +**segmentFst** creates genomic segments (bed file) for regions with high wcFst + + + +# OPTIONS + +``` + + +**segmentFst** provides a way to find continious regions with high Fst values. It takes the output of wcFst and produces a BED file. These high Fst region can be permutated with 'permuteGPATwindow' +Output : 8 columns : + 1. Seqid + 2. Start (zero based) + 3. End (zero based) + 4. Average Fst + 5. Average high Fst (Fst > -s) + 6. N Fst values in segment + 7. N high fst values in segment + 8. Segment length +required: -f -- Output from wcFst +optional: -s -- High Fst cutoff [0.8] + +Type: statistics + + + +``` + + + + + +# EXIT VALUES + +**0** +: Success + +**not 0** +: Failure + +# SEE ALSO + + + +[vcflib](./vcflib.md)(1) + + + +# OTHER + +## Source code + +[segmentFst.cpp](https://github.com/vcflib/vcflib/blob/master/src/segmentFst.cpp) + +# LICENSE + +Copyright 2011-2020 (C) Erik Garrison and vcflib contributors. MIT licensed. + + diff -Nru libvcflib-1.0.1+dfsg/doc/segmentIhs.md libvcflib-1.0.2+dfsg/doc/segmentIhs.md --- libvcflib-1.0.1+dfsg/doc/segmentIhs.md 1970-01-01 00:00:00.000000000 +0000 +++ libvcflib-1.0.2+dfsg/doc/segmentIhs.md 2021-01-28 07:04:12.000000000 +0000 @@ -0,0 +1,72 @@ +% SEGMENTIHS(1) segmentIhs (vcflib) | segmentIhs (VCF statistics) +% Erik Garrison and vcflib contributors + +# NAME + +**segmentIhs** + +# SYNOPSIS + +segmentFst -s 2 -f iHS.normalized.output.txt + +# DESCRIPTION + +Creates genomic segments (bed file) for regions with high wcFst + + + +# OPTIONS + +``` + +Output : 8 columns : + 1. Seqid + 2. Start (zero based) + 3. End (zero based) + 4. Average iHS + 5. Average high Fst (iHS > -s) + 6. N iHS values in segment + 7. N high iHS values in segment + 8. Segment length +required: -f -- Output from normalizeIHS +optional: -s -- High absolute iHS cutoff [2] + +Type: statistics + + + +``` + + + + + +# EXIT VALUES + +**0** +: Success + +**not 0** +: Failure + +# SEE ALSO + + + +[vcflib](./vcflib.md)(1) + + + +# OTHER + +## Source code + +[segmentIhs.cpp](https://github.com/vcflib/vcflib/blob/master/src/segmentIhs.cpp) + +# LICENSE + +Copyright 2011-2020 (C) Erik Garrison and vcflib contributors. MIT licensed. + + diff -Nru libvcflib-1.0.1+dfsg/doc/sequenceDiversity.md libvcflib-1.0.2+dfsg/doc/sequenceDiversity.md --- libvcflib-1.0.1+dfsg/doc/sequenceDiversity.md 1970-01-01 00:00:00.000000000 +0000 +++ libvcflib-1.0.2+dfsg/doc/sequenceDiversity.md 2021-01-28 07:04:12.000000000 +0000 @@ -0,0 +1,76 @@ +% SEQUENCEDIVERSITY(1) sequenceDiversity (vcflib) | sequenceDiversity (VCF statistics) +% Erik Garrison and vcflib contributors + +# NAME + +**sequenceDiversity** + +# SYNOPSIS + +**sequenceDiversity** --target 0,1,2,3,4,5,6,7 --file my.vcf + +# DESCRIPTION + +The **sequenceDiversity** program calculates two popular metrics of haplotype diversity: pi and extended haplotype homozygoisty (eHH). Pi is calculated using the Nei and Li 1979 formulation. eHH a convenient way to think about haplotype diversity. When eHH = 0 all haplotypes in the window are unique and when eHH = 1 all haplotypes in the window are identical. + + + +# OPTIONS + +``` + + +Output : 5 columns: + 1. seqid + 2. start of window + 3. end of window + 4. pi + 5. eHH + + +required: t,target -- argument: a zero base comma separated list of target individuals corresponding to VCF columns +required: f,file -- argument: a properly formatted phased VCF file +required: y,type -- argument: type of genotype likelihood: PL, GL or GP +optional: a,af -- sites less than af are filtered out; default is 0 +optional: r,region -- argument: a tabix compliant region : "seqid:0-100" or "seqid" +optional: w,window -- argument: the number of SNPs per window; default is 20 + +Type: statistics + + + +``` + + + + + +# EXIT VALUES + +**0** +: Success + +**not 0** +: Failure + +# SEE ALSO + + + +[vcflib](./vcflib.md)(1) + + + +# OTHER + +## Source code + +[sequenceDiversity.cpp](https://github.com/vcflib/vcflib/blob/master/src/sequenceDiversity.cpp) + +# LICENSE + +Copyright 2011-2020 (C) Erik Garrison and vcflib contributors. MIT licensed. + + diff -Nru libvcflib-1.0.1+dfsg/doc/smoother.md libvcflib-1.0.2+dfsg/doc/smoother.md --- libvcflib-1.0.1+dfsg/doc/smoother.md 1970-01-01 00:00:00.000000000 +0000 +++ libvcflib-1.0.2+dfsg/doc/smoother.md 2021-01-28 07:04:12.000000000 +0000 @@ -0,0 +1,79 @@ +% SMOOTHER(1) smoother (vcflib) | smoother (VCF transformation) +% Erik Garrison and vcflib contributors + +# NAME + +**smoother** + +# SYNOPSIS + +**smoother** --format pFst --file GPA.output.txt + +# DESCRIPTION + +smoothes is a method for window smoothing many of the GPAT++ formats. + + + +# OPTIONS + +``` + + + **smoother** averages a set of scores over a sliding genomic window. + **smoother** slides over genomic positions not the SNP indices. In other words + the number of scores within a window will not be constant. The last + window for each seqid can be smaller than the defined window size. + **smoother** automatically analyses different seqids separately. +Output : 4 columns : + 1. seqid + 2. window start + 2. window end + 3. averaged score + +required: f,file -- argument: a file created by GPAT++ +required: o,format -- argument: format of input file, case sensitive + available format options: + wcFst, pFst, bFst, iHS, xpEHH, abba-baba, col3 +optional: w,window -- argument: size of genomic window in base pairs (default 5000) +optional: s,step -- argument: window step size in base pairs (default 1000) +optional: t,truncate -- flag : end last window at last position (zero based) + +Type: transformation + + +``` + + + + + +# EXIT VALUES + +**0** +: Success + +**not 0** +: Failure + +# SEE ALSO + + + +[vcflib](./vcflib.md)(1) + + + +# OTHER + +## Source code + +[smoother.cpp](https://github.com/vcflib/vcflib/blob/master/src/smoother.cpp) + +# LICENSE + +Copyright 2011-2020 (C) Erik Garrison and vcflib contributors. MIT licensed. + + diff -Nru libvcflib-1.0.1+dfsg/doc/vcf2dag.md libvcflib-1.0.2+dfsg/doc/vcf2dag.md --- libvcflib-1.0.1+dfsg/doc/vcf2dag.md 1970-01-01 00:00:00.000000000 +0000 +++ libvcflib-1.0.2+dfsg/doc/vcf2dag.md 2021-01-28 07:04:12.000000000 +0000 @@ -0,0 +1,65 @@ +% VCF2DAG(1) vcf2dag (vcflib) | vcf2dag (VCF transformation) +% Erik Garrison and vcflib contributors + +# NAME + +**vcf2dag** + +# SYNOPSIS + +**vcf2dag** [options] [] + +# DESCRIPTION + +Modify VCF to be able to build a directed acyclic graph (DAG) + + + +# OPTIONS + +``` + +options: + -r, --reference FILE FASTA reference file. + +Modify the VCF file so that homozygous regions are included as REF/. calls. +For each ref and alt allele, assign an index. These steps are sufficient to +enable use of the VCF as a DAG (specifically a partially-ordered graph). + +Type: transformation + +``` + + + + + +# EXIT VALUES + +**0** +: Success + +**not 0** +: Failure + +# SEE ALSO + + + +[vcflib](./vcflib.md)(1) + + + +# OTHER + +## Source code + +[vcf2dag.cpp](https://github.com/vcflib/vcflib/blob/master/src/vcf2dag.cpp) + +# LICENSE + +Copyright 2011-2020 (C) Erik Garrison and vcflib contributors. MIT licensed. + + diff -Nru libvcflib-1.0.1+dfsg/doc/vcf2fasta.md libvcflib-1.0.2+dfsg/doc/vcf2fasta.md --- libvcflib-1.0.1+dfsg/doc/vcf2fasta.md 1970-01-01 00:00:00.000000000 +0000 +++ libvcflib-1.0.2+dfsg/doc/vcf2fasta.md 2021-01-28 07:04:12.000000000 +0000 @@ -0,0 +1,64 @@ +% VCF2FASTA(1) vcf2fasta (vcflib) | vcf2fasta (VCF transformation) +% Erik Garrison and vcflib contributors + +# NAME + +**vcf2fasta** + +# SYNOPSIS + +**vcf2fasta** [options] [file] + +# DESCRIPTION + +Generates sample_seq:N.fa for each sample, reference sequence, and chromosomal copy N in [0,1... ploidy]. Each sequence in the fasta file is named using the same pattern used for the file name, allowing them to be combined. + + + +# OPTIONS + +``` + +options: + -f, --reference REF Use this reference when decomposing samples. + -p, --prefix PREFIX Affix this output prefix to each file, none by default + -P, --default-ploidy N Set a default ploidy for samples which do not have information in the first record (2). + + +Type: transformation + +``` + + + + + +# EXIT VALUES + +**0** +: Success + +**not 0** +: Failure + +# SEE ALSO + + + +[vcflib](./vcflib.md)(1) + + + +# OTHER + +## Source code + +[vcf2fasta.cpp](https://github.com/vcflib/vcflib/blob/master/src/vcf2fasta.cpp) + +# LICENSE + +Copyright 2011-2020 (C) Erik Garrison and vcflib contributors. MIT licensed. + + diff -Nru libvcflib-1.0.1+dfsg/doc/vcf2tsv.md libvcflib-1.0.2+dfsg/doc/vcf2tsv.md --- libvcflib-1.0.1+dfsg/doc/vcf2tsv.md 1970-01-01 00:00:00.000000000 +0000 +++ libvcflib-1.0.2+dfsg/doc/vcf2tsv.md 2021-01-28 07:04:12.000000000 +0000 @@ -0,0 +1,59 @@ +% VCF2TSV(1) vcf2tsv (vcflib) | vcf2tsv (VCF transformation) +% Erik Garrison and vcflib contributors + +# NAME + +**vcf2tsv** + +# SYNOPSIS + +**vcf2tsv** [-n null_string] [-g] [vcf file] + +# DESCRIPTION + +Converts VCF to per-allelle or per-genotype tab-delimited format, using null string to replace empty values in the table. Specifying -g will output one line per sample with genotype information. When there is more than one alt allele there will be multiple rows, one for each allele and, the info will match the 'A' index + + + +# OPTIONS + +``` + + +Type: transformation + +``` + + + + + +# EXIT VALUES + +**0** +: Success + +**not 0** +: Failure + +# SEE ALSO + + + +[vcflib](./vcflib.md)(1) + + + +# OTHER + +## Source code + +[vcf2tsv.cpp](https://github.com/vcflib/vcflib/blob/master/src/vcf2tsv.cpp) + +# LICENSE + +Copyright 2011-2020 (C) Erik Garrison and vcflib contributors. MIT licensed. + + diff -Nru libvcflib-1.0.1+dfsg/doc/vcfaddinfo.md libvcflib-1.0.2+dfsg/doc/vcfaddinfo.md --- libvcflib-1.0.1+dfsg/doc/vcfaddinfo.md 1970-01-01 00:00:00.000000000 +0000 +++ libvcflib-1.0.2+dfsg/doc/vcfaddinfo.md 2021-01-28 07:04:12.000000000 +0000 @@ -0,0 +1,59 @@ +% VCFADDINFO(1) vcfaddinfo (vcflib) | vcfaddinfo (VCF transformation) +% Erik Garrison and vcflib contributors + +# NAME + +**vcfaddinfo** + +# SYNOPSIS + +**vcfaddinfo** + +# DESCRIPTION + +Adds info fields from the second file which are not present in the first vcf file. + + + +# OPTIONS + +``` + + +Type: transformation + +``` + + + + + +# EXIT VALUES + +**0** +: Success + +**not 0** +: Failure + +# SEE ALSO + + + +[vcflib](./vcflib.md)(1) + + + +# OTHER + +## Source code + +[vcfaddinfo.cpp](https://github.com/vcflib/vcflib/blob/master/src/vcfaddinfo.cpp) + +# LICENSE + +Copyright 2011-2020 (C) Erik Garrison and vcflib contributors. MIT licensed. + + diff -Nru libvcflib-1.0.1+dfsg/doc/vcfafpath.md libvcflib-1.0.2+dfsg/doc/vcfafpath.md --- libvcflib-1.0.1+dfsg/doc/vcfafpath.md 1970-01-01 00:00:00.000000000 +0000 +++ libvcflib-1.0.2+dfsg/doc/vcfafpath.md 2021-01-28 07:04:12.000000000 +0000 @@ -0,0 +1,79 @@ +% VCFAFPATH(1) vcfafpath (vcflib) | vcfafpath (VCF transformation) +% Erik Garrison and vcflib contributors + +# NAME + +**vcfafpath** + +# SYNOPSIS + +**vcfafpath** + +# DESCRIPTION + +Display genotype paths + + + + + +# EXAMPLES + +``` + +Example: + + **vcfafpath** samples/scaffold612.vcf + +``` + +T -> A +A -> G +T -> C +C -> A +C -> T +A -> G +T -> C +G -> C +C -> CAGA +A -> G +``` + + +Type: transformation + + + +``` + + + +# EXIT VALUES + +**0** +: Success + +**not 0** +: Failure + +# SEE ALSO + + + +[vcflib](./vcflib.md)(1) + + + +# OTHER + +## Source code + +[vcfafpath.cpp](https://github.com/vcflib/vcflib/blob/master/src/vcfafpath.cpp) + +# LICENSE + +Copyright 2011-2020 (C) Erik Garrison and vcflib contributors. MIT licensed. + + diff -Nru libvcflib-1.0.1+dfsg/doc/vcfallelicprimitives.md libvcflib-1.0.2+dfsg/doc/vcfallelicprimitives.md --- libvcflib-1.0.1+dfsg/doc/vcfallelicprimitives.md 1970-01-01 00:00:00.000000000 +0000 +++ libvcflib-1.0.2+dfsg/doc/vcfallelicprimitives.md 2021-01-28 07:04:12.000000000 +0000 @@ -0,0 +1,70 @@ +% VCFALLELICPRIMITIVES(1) vcfallelicprimitives (vcflib) | vcfallelicprimitives (VCF transformation) +% Erik Garrison and vcflib contributors + +# NAME + +**vcfallelicprimitives** + +# SYNOPSIS + +**vcfallelicprimitives** [options] [file] + +# DESCRIPTION + +If multiple allelic primitives (gaps or mismatches) are specified in a single VCF record, split the record into multiple lines, but drop all INFO fields. Does not handle genotypes (yet). MNPs are split into multiple SNPs unless the -m flag is provided. Records generated by splits have th + + + +# OPTIONS + +``` + +options: + -m, --use-mnps Retain MNPs as separate events (default: false). + -t, --tag-parsed FLAG Tag records which are split apart of a complex allele with this flag. + -L, --max-length LEN Do not manipulate records in which either the ALT or + REF is longer than LEN (default: 200). + -k, --keep-info Maintain site and allele-level annotations when decomposing. + Note that in many cases, such as multisample VCFs, these won't + be valid post-decomposition. For biallelic loci in single-sample + VCFs, they should be usable with caution. + -g, --keep-geno Maintain genotype-level annotations when decomposing. Similar + caution should be used for this as for --keep-info. + +Type: transformation + +``` + + + + + +# EXIT VALUES + +**0** +: Success + +**not 0** +: Failure + +# SEE ALSO + + + +[vcflib](./vcflib.md)(1) + + + +# OTHER + +## Source code + +[vcfallelicprimitives.cpp](https://github.com/vcflib/vcflib/blob/master/src/vcfallelicprimitives.cpp) + +# LICENSE + +Copyright 2011-2020 (C) Erik Garrison and vcflib contributors. MIT licensed. + + diff -Nru libvcflib-1.0.1+dfsg/doc/vcfaltcount.md libvcflib-1.0.2+dfsg/doc/vcfaltcount.md --- libvcflib-1.0.1+dfsg/doc/vcfaltcount.md 1970-01-01 00:00:00.000000000 +0000 +++ libvcflib-1.0.2+dfsg/doc/vcfaltcount.md 2021-01-28 07:04:12.000000000 +0000 @@ -0,0 +1,59 @@ +% VCFALTCOUNT(1) vcfaltcount (vcflib) | vcfaltcount (VCF statistics) +% Erik Garrison and vcflib contributors + +# NAME + +**vcfaltcount** + +# SYNOPSIS + +**vcfaltcount** + +# DESCRIPTION + +count the number of alternate alleles in all records in the vcf file + + + +# OPTIONS + +``` + + +Type: statistics + +``` + + + + + +# EXIT VALUES + +**0** +: Success + +**not 0** +: Failure + +# SEE ALSO + + + +[vcflib](./vcflib.md)(1) + + + +# OTHER + +## Source code + +[vcfaltcount.cpp](https://github.com/vcflib/vcflib/blob/master/src/vcfaltcount.cpp) + +# LICENSE + +Copyright 2011-2020 (C) Erik Garrison and vcflib contributors. MIT licensed. + + diff -Nru libvcflib-1.0.1+dfsg/doc/vcfannotategenotypes.md libvcflib-1.0.2+dfsg/doc/vcfannotategenotypes.md --- libvcflib-1.0.1+dfsg/doc/vcfannotategenotypes.md 1970-01-01 00:00:00.000000000 +0000 +++ libvcflib-1.0.2+dfsg/doc/vcfannotategenotypes.md 2021-01-28 07:04:12.000000000 +0000 @@ -0,0 +1,59 @@ +% VCFANNOTATEGENOTYPES(1) vcfannotategenotypes (vcflib) | vcfannotategenotypes (VCF transformation) +% Erik Garrison and vcflib contributors + +# NAME + +**vcfannotategenotypes** + +# SYNOPSIS + +**vcfannotategenotypes** + +# DESCRIPTION + +Examine genotype correspondence. Annotate genotypes in the first file with genotypes in the second adding the genotype as another flag to each sample filed in the first file. annotation-tag is the name of the sample flag which is added to store the annotation. also adds a 'has_variant' flag for sites where the second file has a variant. + + + +# OPTIONS + +``` + + +Type: transformation + +``` + + + + + +# EXIT VALUES + +**0** +: Success + +**not 0** +: Failure + +# SEE ALSO + + + +[vcflib](./vcflib.md)(1) + + + +# OTHER + +## Source code + +[vcfannotategenotypes.cpp](https://github.com/vcflib/vcflib/blob/master/src/vcfannotategenotypes.cpp) + +# LICENSE + +Copyright 2011-2020 (C) Erik Garrison and vcflib contributors. MIT licensed. + + diff -Nru libvcflib-1.0.1+dfsg/doc/vcfannotate.md libvcflib-1.0.2+dfsg/doc/vcfannotate.md --- libvcflib-1.0.1+dfsg/doc/vcfannotate.md 1970-01-01 00:00:00.000000000 +0000 +++ libvcflib-1.0.2+dfsg/doc/vcfannotate.md 2021-01-28 07:04:12.000000000 +0000 @@ -0,0 +1,64 @@ +% VCFANNOTATE(1) vcfannotate (vcflib) | vcfannotate (VCF transformation) +% Erik Garrison and vcflib contributors + +# NAME + +**vcfannotate** + +# SYNOPSIS + +**vcfannotate** [options] [] + +# DESCRIPTION + +Intersect the records in the VCF file with targets provided in a BED file. Intersections are done on the reference sequences in the VCF file. If no VCF filename is specified on the command line (last argument) the VCF read from stdin. + + + +# OPTIONS + +``` + + +options: + -b, --bed use annotations provided by this BED file + -k, --key use this INFO field key for the annotations + -d, --default use this INFO field key for records without annotations + +Type: transformation + +``` + + + + + +# EXIT VALUES + +**0** +: Success + +**not 0** +: Failure + +# SEE ALSO + + + +[vcflib](./vcflib.md)(1) + + + +# OTHER + +## Source code + +[vcfannotate.cpp](https://github.com/vcflib/vcflib/blob/master/src/vcfannotate.cpp) + +# LICENSE + +Copyright 2011-2020 (C) Erik Garrison and vcflib contributors. MIT licensed. + + diff -Nru libvcflib-1.0.1+dfsg/doc/vcfbreakmulti.md libvcflib-1.0.2+dfsg/doc/vcfbreakmulti.md --- libvcflib-1.0.1+dfsg/doc/vcfbreakmulti.md 1970-01-01 00:00:00.000000000 +0000 +++ libvcflib-1.0.2+dfsg/doc/vcfbreakmulti.md 2021-01-28 07:04:12.000000000 +0000 @@ -0,0 +1,59 @@ +% VCFBREAKMULTI(1) vcfbreakmulti (vcflib) | vcfbreakmulti (VCF transformation) +% Erik Garrison and vcflib contributors + +# NAME + +**vcfbreakmulti** + +# SYNOPSIS + +**vcfbreakmulti** [options] [file] + +# DESCRIPTION + +If multiple alleles are specified in a single record, break the record into multiple lines, preserving allele-specific INFO fields. + + + +# OPTIONS + +``` + + +Type: transformation + +``` + + + + + +# EXIT VALUES + +**0** +: Success + +**not 0** +: Failure + +# SEE ALSO + + + +[vcflib](./vcflib.md)(1) + + + +# OTHER + +## Source code + +[vcfbreakmulti.cpp](https://github.com/vcflib/vcflib/blob/master/src/vcfbreakmulti.cpp) + +# LICENSE + +Copyright 2011-2020 (C) Erik Garrison and vcflib contributors. MIT licensed. + + diff -Nru libvcflib-1.0.1+dfsg/doc/vcfcat.md libvcflib-1.0.2+dfsg/doc/vcfcat.md --- libvcflib-1.0.1+dfsg/doc/vcfcat.md 1970-01-01 00:00:00.000000000 +0000 +++ libvcflib-1.0.2+dfsg/doc/vcfcat.md 2021-01-28 07:04:12.000000000 +0000 @@ -0,0 +1,61 @@ +% VCFCAT(1) vcfcat (vcflib) | vcfcat (VCF transformation) +% Erik Garrison and vcflib contributors + +# NAME + +**vcfcat** + +# SYNOPSIS + +**vcfcat** [file1] [file2] ... [fileN] + +# DESCRIPTION + +Concatenates VCF files + + + +# OPTIONS + +``` + + +Type: transformation + + + +``` + + + + + +# EXIT VALUES + +**0** +: Success + +**not 0** +: Failure + +# SEE ALSO + + + +[vcflib](./vcflib.md)(1) + + + +# OTHER + +## Source code + +[vcfcat.cpp](https://github.com/vcflib/vcflib/blob/master/src/vcfcat.cpp) + +# LICENSE + +Copyright 2011-2020 (C) Erik Garrison and vcflib contributors. MIT licensed. + + diff -Nru libvcflib-1.0.1+dfsg/doc/vcfcheck.md libvcflib-1.0.2+dfsg/doc/vcfcheck.md --- libvcflib-1.0.1+dfsg/doc/vcfcheck.md 1970-01-01 00:00:00.000000000 +0000 +++ libvcflib-1.0.2+dfsg/doc/vcfcheck.md 2021-01-28 07:04:12.000000000 +0000 @@ -0,0 +1,66 @@ +% VCFCHECK(1) vcfcheck (vcflib) | vcfcheck (VCF metrics) +% Erik Garrison and vcflib contributors + +# NAME + +**vcfcheck** + +# SYNOPSIS + +**vcfcheck** [options] + +# DESCRIPTION + +Validate integrity and identity of the VCF by verifying that the VCF record's REF matches a given reference file. + + + +# OPTIONS + +``` + +options: + -f, --fasta-reference FASTA reference file to use to obtain primer sequences + -x, --exclude-failures If a record fails, don't print it. Otherwise do. + -k, --keep-failures Print if the record fails, otherwise not. + -h, --help Print this message. + -v, --version Print version. + + +Type: metrics + +``` + + + + + +# EXIT VALUES + +**0** +: Success + +**not 0** +: Failure + +# SEE ALSO + + + +[vcflib](./vcflib.md)(1) + + + +# OTHER + +## Source code + +[vcfcheck.cpp](https://github.com/vcflib/vcflib/blob/master/src/vcfcheck.cpp) + +# LICENSE + +Copyright 2011-2020 (C) Erik Garrison and vcflib contributors. MIT licensed. + + diff -Nru libvcflib-1.0.1+dfsg/doc/vcfclassify.md libvcflib-1.0.2+dfsg/doc/vcfclassify.md --- libvcflib-1.0.1+dfsg/doc/vcfclassify.md 1970-01-01 00:00:00.000000000 +0000 +++ libvcflib-1.0.2+dfsg/doc/vcfclassify.md 2021-01-28 07:04:12.000000000 +0000 @@ -0,0 +1,61 @@ +% VCFCLASSIFY(1) vcfclassify (vcflib) | vcfclassify (VCF transformation) +% Erik Garrison and vcflib contributors + +# NAME + +**vcfclassify** + +# SYNOPSIS + +**vcfclassify** + +# DESCRIPTION + +Creates a new VCF where each variant is tagged by allele class: snp, ts/tv, indel, mnp + + + +# OPTIONS + +``` + + +Type: transformation + + + +``` + + + + + +# EXIT VALUES + +**0** +: Success + +**not 0** +: Failure + +# SEE ALSO + + + +[vcflib](./vcflib.md)(1) + + + +# OTHER + +## Source code + +[vcfclassify.cpp](https://github.com/vcflib/vcflib/blob/master/src/vcfclassify.cpp) + +# LICENSE + +Copyright 2011-2020 (C) Erik Garrison and vcflib contributors. MIT licensed. + + diff -Nru libvcflib-1.0.1+dfsg/doc/vcfcleancomplex.md libvcflib-1.0.2+dfsg/doc/vcfcleancomplex.md --- libvcflib-1.0.1+dfsg/doc/vcfcleancomplex.md 1970-01-01 00:00:00.000000000 +0000 +++ libvcflib-1.0.2+dfsg/doc/vcfcleancomplex.md 2021-01-28 07:04:12.000000000 +0000 @@ -0,0 +1,62 @@ +% VCFCLEANCOMPLEX(1) vcfcleancomplex (vcflib) | vcfcleancomplex (VCF transformation) +% Erik Garrison and vcflib contributors + +# NAME + +**vcfcleancomplex** + +# SYNOPSIS + +**vcfcleancomplex** + +# DESCRIPTION + +Removes reference-matching sequence from complex alleles and adjusts records to reflect positional change. + + + +# OPTIONS + +``` + + +Generate a VCF stream in which 'long' non-complexalleles have their position corrected. +assumes that VCF records can't overlap 5'->3' + +Type: transformation + +``` + + + + + +# EXIT VALUES + +**0** +: Success + +**not 0** +: Failure + +# SEE ALSO + + + +[vcflib](./vcflib.md)(1) + + + +# OTHER + +## Source code + +[vcfcleancomplex.cpp](https://github.com/vcflib/vcflib/blob/master/src/vcfcleancomplex.cpp) + +# LICENSE + +Copyright 2011-2020 (C) Erik Garrison and vcflib contributors. MIT licensed. + + diff -Nru libvcflib-1.0.1+dfsg/doc/vcfcombine.md libvcflib-1.0.2+dfsg/doc/vcfcombine.md --- libvcflib-1.0.1+dfsg/doc/vcfcombine.md 1970-01-01 00:00:00.000000000 +0000 +++ libvcflib-1.0.2+dfsg/doc/vcfcombine.md 2021-01-28 07:04:12.000000000 +0000 @@ -0,0 +1,64 @@ +% VCFCOMBINE(1) vcfcombine (vcflib) | vcfcombine (VCF transformation) +% Erik Garrison and vcflib contributors + +# NAME + +**vcfcombine** + +# SYNOPSIS + +**vcfcombine** [vcf file] [vcf file] ... + +# DESCRIPTION + +Combine VCF files positionally, combining samples when sites and alleles are identical. Any number of VCF files may be combined. The INFO field and other columns are taken from one of the files which are combined when records in multiple files match. Alleles must have identical ordering to be combined into one record. If they do not, multiple records will be emitted. + + + +# OPTIONS + +``` + + +options: + -h --help This text. + -v --version Print version. + -r --region REGION A region specifier of the form chrN:x-y to bound the merge + +Type: transformation + +``` + + + + + +# EXIT VALUES + +**0** +: Success + +**not 0** +: Failure + +# SEE ALSO + + + +[vcflib](./vcflib.md)(1) + + + +# OTHER + +## Source code + +[vcfcombine.cpp](https://github.com/vcflib/vcflib/blob/master/src/vcfcombine.cpp) + +# LICENSE + +Copyright 2011-2020 (C) Erik Garrison and vcflib contributors. MIT licensed. + + diff -Nru libvcflib-1.0.1+dfsg/doc/vcfcommonsamples.md libvcflib-1.0.2+dfsg/doc/vcfcommonsamples.md --- libvcflib-1.0.1+dfsg/doc/vcfcommonsamples.md 1970-01-01 00:00:00.000000000 +0000 +++ libvcflib-1.0.2+dfsg/doc/vcfcommonsamples.md 2021-01-28 07:04:12.000000000 +0000 @@ -0,0 +1,59 @@ +% VCFCOMMONSAMPLES(1) vcfcommonsamples (vcflib) | vcfcommonsamples (VCF transformation) +% Erik Garrison and vcflib contributors + +# NAME + +**vcfcommonsamples** + +# SYNOPSIS + +**vcfcommonsamples** + +# DESCRIPTION + +Generates each record in the first file, removing samples not present in the second + + + +# OPTIONS + +``` + + +Type: transformation + +``` + + + + + +# EXIT VALUES + +**0** +: Success + +**not 0** +: Failure + +# SEE ALSO + + + +[vcflib](./vcflib.md)(1) + + + +# OTHER + +## Source code + +[vcfcommonsamples.cpp](https://github.com/vcflib/vcflib/blob/master/src/vcfcommonsamples.cpp) + +# LICENSE + +Copyright 2011-2020 (C) Erik Garrison and vcflib contributors. MIT licensed. + + diff -Nru libvcflib-1.0.1+dfsg/doc/vcfcountalleles.md libvcflib-1.0.2+dfsg/doc/vcfcountalleles.md --- libvcflib-1.0.1+dfsg/doc/vcfcountalleles.md 1970-01-01 00:00:00.000000000 +0000 +++ libvcflib-1.0.2+dfsg/doc/vcfcountalleles.md 2021-01-28 07:04:12.000000000 +0000 @@ -0,0 +1,65 @@ +% VCFCOUNTALLELES(1) vcfcountalleles (vcflib) | vcfcountalleles (VCF statistics) +% Erik Garrison and vcflib contributors + +# NAME + +**vcfcountalleles** + +# SYNOPSIS + +**vcfcountalleles** + +# DESCRIPTION + +Count alleles + + + + + +# EXAMPLES + +``` + +Example: + +**vcfcountalleles** samples/scaffold612.vcf +42603 + +Type: statistics + + + +``` + + + +# EXIT VALUES + +**0** +: Success + +**not 0** +: Failure + +# SEE ALSO + + + +[vcflib](./vcflib.md)(1) + + + +# OTHER + +## Source code + +[vcfcountalleles.cpp](https://github.com/vcflib/vcflib/blob/master/src/vcfcountalleles.cpp) + +# LICENSE + +Copyright 2011-2020 (C) Erik Garrison and vcflib contributors. MIT licensed. + + diff -Nru libvcflib-1.0.1+dfsg/doc/vcfcreatemulti.md libvcflib-1.0.2+dfsg/doc/vcfcreatemulti.md --- libvcflib-1.0.1+dfsg/doc/vcfcreatemulti.md 1970-01-01 00:00:00.000000000 +0000 +++ libvcflib-1.0.2+dfsg/doc/vcfcreatemulti.md 2021-01-28 07:04:12.000000000 +0000 @@ -0,0 +1,59 @@ +% VCFCREATEMULTI(1) vcfcreatemulti (vcflib) | vcfcreatemulti (VCF transformation) +% Erik Garrison and vcflib contributors + +# NAME + +**vcfcreatemulti** + +# SYNOPSIS + +**vcfcreatemulti** [options] [file] + +# DESCRIPTION + +If overlapping alleles are represented across multiple records, merge them into a single record. Currently only for indels. + + + +# OPTIONS + +``` + + +Type: transformation + +``` + + + + + +# EXIT VALUES + +**0** +: Success + +**not 0** +: Failure + +# SEE ALSO + + + +[vcflib](./vcflib.md)(1) + + + +# OTHER + +## Source code + +[vcfcreatemulti.cpp](https://github.com/vcflib/vcflib/blob/master/src/vcfcreatemulti.cpp) + +# LICENSE + +Copyright 2011-2020 (C) Erik Garrison and vcflib contributors. MIT licensed. + + diff -Nru libvcflib-1.0.1+dfsg/doc/vcfdistance.md libvcflib-1.0.2+dfsg/doc/vcfdistance.md --- libvcflib-1.0.1+dfsg/doc/vcfdistance.md 1970-01-01 00:00:00.000000000 +0000 +++ libvcflib-1.0.2+dfsg/doc/vcfdistance.md 2021-01-28 07:04:12.000000000 +0000 @@ -0,0 +1,59 @@ +% VCFDISTANCE(1) vcfdistance (vcflib) | vcfdistance (VCF metrics) +% Erik Garrison and vcflib contributors + +# NAME + +**vcfdistance** + +# SYNOPSIS + +**vcfdistance** [customtagname] < [vcf file] + +# DESCRIPTION + +Adds a tag to each variant record which indicates the distance to the nearest variant. (defaults to BasesToClosestVariant if no custom tag name is given. + + + +# OPTIONS + +``` + + +Type: metrics + +``` + + + + + +# EXIT VALUES + +**0** +: Success + +**not 0** +: Failure + +# SEE ALSO + + + +[vcflib](./vcflib.md)(1) + + + +# OTHER + +## Source code + +[vcfdistance.cpp](https://github.com/vcflib/vcflib/blob/master/src/vcfdistance.cpp) + +# LICENSE + +Copyright 2011-2020 (C) Erik Garrison and vcflib contributors. MIT licensed. + + diff -Nru libvcflib-1.0.1+dfsg/doc/vcfecho.md libvcflib-1.0.2+dfsg/doc/vcfecho.md --- libvcflib-1.0.1+dfsg/doc/vcfecho.md 1970-01-01 00:00:00.000000000 +0000 +++ libvcflib-1.0.2+dfsg/doc/vcfecho.md 2021-01-28 07:04:12.000000000 +0000 @@ -0,0 +1,61 @@ +% VCFECHO(1) vcfecho (vcflib) | vcfecho (VCF transformation) +% Erik Garrison and vcflib contributors + +# NAME + +**vcfecho** + +# SYNOPSIS + +**vcfecho** + +# DESCRIPTION + +Echo VCF to stdout (simple demo) + + + +# OPTIONS + +``` + + +Type: transformation + + + +``` + + + + + +# EXIT VALUES + +**0** +: Success + +**not 0** +: Failure + +# SEE ALSO + + + +[vcflib](./vcflib.md)(1) + + + +# OTHER + +## Source code + +[vcfecho.cpp](https://github.com/vcflib/vcflib/blob/master/src/vcfecho.cpp) + +# LICENSE + +Copyright 2011-2020 (C) Erik Garrison and vcflib contributors. MIT licensed. + + diff -Nru libvcflib-1.0.1+dfsg/doc/vcfentropy.md libvcflib-1.0.2+dfsg/doc/vcfentropy.md --- libvcflib-1.0.1+dfsg/doc/vcfentropy.md 1970-01-01 00:00:00.000000000 +0000 +++ libvcflib-1.0.2+dfsg/doc/vcfentropy.md 2021-01-28 07:04:12.000000000 +0000 @@ -0,0 +1,64 @@ +% VCFENTROPY(1) vcfentropy (vcflib) | vcfentropy (VCF metrics) +% Erik Garrison and vcflib contributors + +# NAME + +**vcfentropy** + +# SYNOPSIS + +**vcfentropy** [options] + +# DESCRIPTION + +Annotate VCF records with the Shannon entropy of flanking sequence. Anotates the output VCF file with, for each record, EntropyLeft, EntropyRight, EntropyCenter, which are the entropies of the sequence of the given window size to the left, right, and center of the record. Also adds EntropyRef and EntropyAlt for each alt. + + + +# OPTIONS + +``` + +options: + -f, --fasta-reference FASTA reference file to use to obtain flanking sequences + -w, --window-size Size of the window over which to calculate entropy + + + +Type: metrics + +``` + + + + + +# EXIT VALUES + +**0** +: Success + +**not 0** +: Failure + +# SEE ALSO + + + +[vcflib](./vcflib.md)(1) + + + +# OTHER + +## Source code + +[vcfentropy.cpp](https://github.com/vcflib/vcflib/blob/master/src/vcfentropy.cpp) + +# LICENSE + +Copyright 2011-2020 (C) Erik Garrison and vcflib contributors. MIT licensed. + + diff -Nru libvcflib-1.0.1+dfsg/doc/vcfevenregions.md libvcflib-1.0.2+dfsg/doc/vcfevenregions.md --- libvcflib-1.0.1+dfsg/doc/vcfevenregions.md 1970-01-01 00:00:00.000000000 +0000 +++ libvcflib-1.0.2+dfsg/doc/vcfevenregions.md 2021-01-28 07:04:12.000000000 +0000 @@ -0,0 +1,67 @@ +% VCFEVENREGIONS(1) vcfevenregions (vcflib) | vcfevenregions (VCF transformation) +% Erik Garrison and vcflib contributors + +# NAME + +**vcfevenregions** + +# SYNOPSIS + +**vcfevenregions** [options] + +# DESCRIPTION + +Generates a list of regions, e.g. chr20:10..30 using the variant density information provided in the VCF file to ensure that the regions have even numbers of variants. This can be use to reduce the variance in runtime when dividing variant detection or genotyping by genomic coordinates. + + + +# OPTIONS + +``` + +options: + -f, --fasta-reference REF FASTA reference file to use to obtain primer sequences. + -n, --number-of-regions N The number of desired regions. + -p, --number-of-positions N The number of positions per region. + -o, --offset N Add an offset to region positioning, to avoid boundary + related artifacts in downstream processing. + -l, --overlap N The number of sites to overlap between regions. Default 0. + -s, --separator SEQ Specify string to use to separate region output. Default '-' + +Type: transformation + +``` + + + + + +# EXIT VALUES + +**0** +: Success + +**not 0** +: Failure + +# SEE ALSO + + + +[vcflib](./vcflib.md)(1) + + + +# OTHER + +## Source code + +[vcfevenregions.cpp](https://github.com/vcflib/vcflib/blob/master/src/vcfevenregions.cpp) + +# LICENSE + +Copyright 2011-2020 (C) Erik Garrison and vcflib contributors. MIT licensed. + + diff -Nru libvcflib-1.0.1+dfsg/doc/vcffilter.md libvcflib-1.0.2+dfsg/doc/vcffilter.md --- libvcflib-1.0.1+dfsg/doc/vcffilter.md 1970-01-01 00:00:00.000000000 +0000 +++ libvcflib-1.0.2+dfsg/doc/vcffilter.md 2021-01-28 07:04:12.000000000 +0000 @@ -0,0 +1,90 @@ +% VCFFILTER(1) vcffilter (vcflib) | vcffilter (VCF filter) +% Erik Garrison and vcflib contributors + +# NAME + +**vcffilter** + +# SYNOPSIS + +**vcffilter** [options] + +# DESCRIPTION + +VCF filter the specified vcf file using the set of filters + + + +# OPTIONS + +``` + + +options: + -f, --info-filter specifies a filter to apply to the info fields of records, + removes alleles which do not pass the filter + -g, --genotype-filter specifies a filter to apply to the genotype fields of records + -k, --keep-info used in conjunction with '-g', keeps variant info, but removes genotype + -s, --filter-sites filter entire records, not just alleles + -t, --tag-pass tag vcf records as positively filtered with this tag, print all records + -F, --tag-fail tag vcf records as negatively filtered with this tag, print all records + -A, --append-filter append the existing filter tag, don't just replace it + -a, --allele-tag apply -t on a per-allele basis. adds or sets the corresponding INFO field tag + -v, --invert inverts the filter, e.g. grep -v + -o, --or use logical OR instead of AND to combine filters + -r, --region specify a region on which to target the filtering, requires a BGZF + compressed file which has been indexed with tabix. any number of + regions may be specified. + +Filter the specified vcf file using the set of filters. +Filters are specified in the form " : + -f "DP > 10" # for info fields + -g "GT = 1|1" # for genotype fields + -f "CpG" # for 'flag' fields + +Operators can be any of: =, !, <, >, |, & + +Any number of filters may be specified. They are combined via logical AND +unless --or is specified on the command line. Obtain logical negation through +the use of parentheses, e.g. "! ( DP = 10 )" + +For convenience, you can specify "QUAL" to refer to the quality of the site, even +though it does not appear in the INFO fields. + +type: filter + +``` + + + + + +# EXIT VALUES + +**0** +: Success + +**not 0** +: Failure + +# SEE ALSO + + + +[vcflib](./vcflib.md)(1) + + + +# OTHER + +## Source code + +[vcffilter.cpp](https://github.com/vcflib/vcflib/blob/master/src/vcffilter.cpp) + +# LICENSE + +Copyright 2011-2020 (C) Erik Garrison and vcflib contributors. MIT licensed. + + diff -Nru libvcflib-1.0.1+dfsg/doc/vcffixup.md libvcflib-1.0.2+dfsg/doc/vcffixup.md --- libvcflib-1.0.1+dfsg/doc/vcffixup.md 1970-01-01 00:00:00.000000000 +0000 +++ libvcflib-1.0.2+dfsg/doc/vcffixup.md 2021-01-28 07:04:12.000000000 +0000 @@ -0,0 +1,72 @@ +% VCFFIXUP(1) vcffixup (vcflib) | vcffixup (VCF transformation) +% Erik Garrison and vcflib contributors + +# NAME + +**vcffixup** + +# SYNOPSIS + +**vcffixup** + +# DESCRIPTION + +Generates a VCF stream where AC and NS have been generated for each record using sample genotypes + + + +# OPTIONS + +``` + + + + +Count the allele frequencies across alleles present in each record in the VCF file. (Similar to vcftools --freq.) + +Uses genotypes from the VCF file to correct AC (alternate allele count), AF +(alternate allele frequency), NS (number of called), in the VCF records. For +example: + + % vcfkeepsamples file.vcf NA12878 | **vcffixup** - | vcffilter -f "AC > 0" + +Would downsample file.vcf to only NA12878, removing sites for which the sample +was not called as polymorphic. + +Type: transformation + +``` + + + + + +# EXIT VALUES + +**0** +: Success + +**not 0** +: Failure + +# SEE ALSO + + + +[vcflib](./vcflib.md)(1) + + + +# OTHER + +## Source code + +[vcffixup.cpp](https://github.com/vcflib/vcflib/blob/master/src/vcffixup.cpp) + +# LICENSE + +Copyright 2011-2020 (C) Erik Garrison and vcflib contributors. MIT licensed. + + diff -Nru libvcflib-1.0.1+dfsg/doc/vcfflatten.md libvcflib-1.0.2+dfsg/doc/vcfflatten.md --- libvcflib-1.0.1+dfsg/doc/vcfflatten.md 1970-01-01 00:00:00.000000000 +0000 +++ libvcflib-1.0.2+dfsg/doc/vcfflatten.md 2021-01-28 07:04:12.000000000 +0000 @@ -0,0 +1,59 @@ +% VCFFLATTEN(1) vcfflatten (vcflib) | vcfflatten (VCF transformation) +% Erik Garrison and vcflib contributors + +# NAME + +**vcfflatten** + +# SYNOPSIS + +**vcfflatten** [file] + +# DESCRIPTION + +Removes multi-allelic sites by picking the most common alternate. Requires allele frequency specification 'AF' and use of 'G' and 'A' to specify the fields which vary according to the Allele or Genotype. VCF file may be specified on the command line or piped as stdin. + + + +# OPTIONS + +``` + + +Type: transformation + +``` + + + + + +# EXIT VALUES + +**0** +: Success + +**not 0** +: Failure + +# SEE ALSO + + + +[vcflib](./vcflib.md)(1) + + + +# OTHER + +## Source code + +[vcfflatten.cpp](https://github.com/vcflib/vcflib/blob/master/src/vcfflatten.cpp) + +# LICENSE + +Copyright 2011-2020 (C) Erik Garrison and vcflib contributors. MIT licensed. + + diff -Nru libvcflib-1.0.1+dfsg/doc/vcfgeno2alleles.md libvcflib-1.0.2+dfsg/doc/vcfgeno2alleles.md --- libvcflib-1.0.1+dfsg/doc/vcfgeno2alleles.md 1970-01-01 00:00:00.000000000 +0000 +++ libvcflib-1.0.2+dfsg/doc/vcfgeno2alleles.md 2021-01-28 07:04:12.000000000 +0000 @@ -0,0 +1,59 @@ +% VCFGENO2ALLELES(1) vcfgeno2alleles (vcflib) | vcfgeno2alleles (VCF transformation) +% Erik Garrison and vcflib contributors + +# NAME + +**vcfgeno2alleles** + +# SYNOPSIS + +**vcfgeno2alleles** <[vcf file] + +# DESCRIPTION + +modifies the genotypes field to provide the literal alleles rather than indexes + + + +# OPTIONS + +``` + + +Type: transformation + +``` + + + + + +# EXIT VALUES + +**0** +: Success + +**not 0** +: Failure + +# SEE ALSO + + + +[vcflib](./vcflib.md)(1) + + + +# OTHER + +## Source code + +[vcfgeno2alleles.cpp](https://github.com/vcflib/vcflib/blob/master/src/vcfgeno2alleles.cpp) + +# LICENSE + +Copyright 2011-2020 (C) Erik Garrison and vcflib contributors. MIT licensed. + + diff -Nru libvcflib-1.0.1+dfsg/doc/vcfgeno2haplo.md libvcflib-1.0.2+dfsg/doc/vcfgeno2haplo.md --- libvcflib-1.0.1+dfsg/doc/vcfgeno2haplo.md 1970-01-01 00:00:00.000000000 +0000 +++ libvcflib-1.0.2+dfsg/doc/vcfgeno2haplo.md 2021-01-28 07:04:12.000000000 +0000 @@ -0,0 +1,68 @@ +% VCFGENO2HAPLO(1) vcfgeno2haplo (vcflib) | vcfgeno2haplo (VCF transformation) +% Erik Garrison and vcflib contributors + +# NAME + +**vcfgeno2haplo** + +# SYNOPSIS + +**vcfgeno2haplo** [options] [] + +# DESCRIPTION + +Convert genotype-based phased alleles within --window-size into haplotype alleles. Will break haplotype construction when encountering non-phased genotypes on input. + + + +# OPTIONS + +``` + +options: + -h, --help Print this message + -v, --version Print version + -r, --reference FILE FASTA reference file + -w, --window-size N Merge variants at most this many bp apart (default 30) + -o, --only-variants Don't output the entire haplotype, just concatenate + REF/ALT strings (delimited by ":") + + + +Type: transformation + +``` + + + + + +# EXIT VALUES + +**0** +: Success + +**not 0** +: Failure + +# SEE ALSO + + + +[vcflib](./vcflib.md)(1) + + + +# OTHER + +## Source code + +[vcfgeno2haplo.cpp](https://github.com/vcflib/vcflib/blob/master/src/vcfgeno2haplo.cpp) + +# LICENSE + +Copyright 2011-2020 (C) Erik Garrison and vcflib contributors. MIT licensed. + + diff -Nru libvcflib-1.0.1+dfsg/doc/vcfgenosamplenames.md libvcflib-1.0.2+dfsg/doc/vcfgenosamplenames.md --- libvcflib-1.0.1+dfsg/doc/vcfgenosamplenames.md 1970-01-01 00:00:00.000000000 +0000 +++ libvcflib-1.0.2+dfsg/doc/vcfgenosamplenames.md 2021-01-28 07:04:12.000000000 +0000 @@ -0,0 +1,69 @@ +% VCFGENOSAMPLENAMES(1) vcfgenosamplenames (vcflib) | vcfgenosamplenames (VCF transformation) +% Erik Garrison and vcflib contributors + +# NAME + +**vcfgenosamplenames** + +# SYNOPSIS + +**vcfgenosamplenames** + +# DESCRIPTION + +Get samplenames + + + + + +# EXAMPLES + +``` + +Example: + +vcfsamplenames samples/sample.vcf + +NA00001 +NA00002 +NA00003 + + +Type: transformation + + + +``` + + + +# EXIT VALUES + +**0** +: Success + +**not 0** +: Failure + +# SEE ALSO + + + +[vcflib](./vcflib.md)(1) + + + +# OTHER + +## Source code + +[vcfgenosamplenames.cpp](https://github.com/vcflib/vcflib/blob/master/src/vcfgenosamplenames.cpp) + +# LICENSE + +Copyright 2011-2020 (C) Erik Garrison and vcflib contributors. MIT licensed. + + diff -Nru libvcflib-1.0.1+dfsg/doc/vcfgenosummarize.md libvcflib-1.0.2+dfsg/doc/vcfgenosummarize.md --- libvcflib-1.0.1+dfsg/doc/vcfgenosummarize.md 1970-01-01 00:00:00.000000000 +0000 +++ libvcflib-1.0.2+dfsg/doc/vcfgenosummarize.md 2021-01-28 07:04:12.000000000 +0000 @@ -0,0 +1,59 @@ +% VCFGENOSUMMARIZE(1) vcfgenosummarize (vcflib) | vcfgenosummarize (VCF statistics) +% Erik Garrison and vcflib contributors + +# NAME + +**vcfgenosummarize** + +# SYNOPSIS + +**vcfgenosummarize** <[input file] >[output vcf] + +# DESCRIPTION + +Adds summary statistics to each record summarizing qualities reported in called genotypes. Uses: RO (reference observation count), QR (quality sum reference observations) AO (alternate observation count), QA (quality sum alternate observations) + + + +# OPTIONS + +``` + + +Type: statistics + +``` + + + + + +# EXIT VALUES + +**0** +: Success + +**not 0** +: Failure + +# SEE ALSO + + + +[vcflib](./vcflib.md)(1) + + + +# OTHER + +## Source code + +[vcfgenosummarize.cpp](https://github.com/vcflib/vcflib/blob/master/src/vcfgenosummarize.cpp) + +# LICENSE + +Copyright 2011-2020 (C) Erik Garrison and vcflib contributors. MIT licensed. + + diff -Nru libvcflib-1.0.1+dfsg/doc/vcfgenotypecompare.md libvcflib-1.0.2+dfsg/doc/vcfgenotypecompare.md --- libvcflib-1.0.1+dfsg/doc/vcfgenotypecompare.md 1970-01-01 00:00:00.000000000 +0000 +++ libvcflib-1.0.2+dfsg/doc/vcfgenotypecompare.md 2021-01-28 07:04:12.000000000 +0000 @@ -0,0 +1,59 @@ +% VCFGENOTYPECOMPARE(1) vcfgenotypecompare (vcflib) | vcfgenotypecompare (VCF statistics) +% Erik Garrison and vcflib contributors + +# NAME + +**vcfgenotypecompare** + +# SYNOPSIS + +**vcfgenotypecompare** + +# DESCRIPTION + +adds statistics to the INFO field of the vcf file describing the amount of discrepancy between the genotypes (GT) in the vcf file and the genotypes reported in the . use this after vcfannotategenotypes to get correspondence statistics for two vcfs. + + + +# OPTIONS + +``` + + +Type: statistics + +``` + + + + + +# EXIT VALUES + +**0** +: Success + +**not 0** +: Failure + +# SEE ALSO + + + +[vcflib](./vcflib.md)(1) + + + +# OTHER + +## Source code + +[vcfgenotypecompare.cpp](https://github.com/vcflib/vcflib/blob/master/src/vcfgenotypecompare.cpp) + +# LICENSE + +Copyright 2011-2020 (C) Erik Garrison and vcflib contributors. MIT licensed. + + diff -Nru libvcflib-1.0.1+dfsg/doc/vcfgenotypes.md libvcflib-1.0.2+dfsg/doc/vcfgenotypes.md --- libvcflib-1.0.1+dfsg/doc/vcfgenotypes.md 1970-01-01 00:00:00.000000000 +0000 +++ libvcflib-1.0.2+dfsg/doc/vcfgenotypes.md 2021-01-28 07:04:12.000000000 +0000 @@ -0,0 +1,83 @@ +% VCFGENOTYPES(1) vcfgenotypes (vcflib) | vcfgenotypes (VCF statistics) +% Erik Garrison and vcflib contributors + +# NAME + +**vcfgenotypes** + +# SYNOPSIS + +**vcfgenotypes** + +# DESCRIPTION + +Report the genotypes for each sample, for each variant in the VCF. Convert the numerical represenation of genotypes provided by the GT field to a human-readable genotype format. + + + +# OPTIONS + +``` + + + + +``` + + + + + +# EXAMPLES + +``` + +Example: + + **vcfgenotypes** samples/sample.vcf + +19 111 A C A,C NA00001:A/A NA00002:A/A NA00003:A/C +19 112 A G A,G NA00001:A/A NA00002:A/A NA00003:A/G +20 14370 G A G,A NA00001:G/G NA00002:G/A NA00003:A/A +20 17330 T A T,A NA00001:T/T NA00002:T/A NA00003:T/T +20 1110696 A G,T A,G,T NA00001:G/T NA00002:G/T NA00003:T/T +20 1230237 T . T,. NA00001:T/T NA00002:T/T NA00003:T/T +20 1234567 G GA,GAC G,GA,GAC NA00001:G/GA NA00002:G/GAC NA00003:GA/GA +20 1235237 T . T,. NA00001:T/T NA00002:T/T NA00003:./. +X 10 AC A,ATG AC,A,ATG NA00001:AC NA00002:AC/A NA00003:AC/ATG + +Type: statistics + +``` + + + +# EXIT VALUES + +**0** +: Success + +**not 0** +: Failure + +# SEE ALSO + + + +[vcflib](./vcflib.md)(1) + + + +# OTHER + +## Source code + +[vcfgenotypes.cpp](https://github.com/vcflib/vcflib/blob/master/src/vcfgenotypes.cpp) + +# LICENSE + +Copyright 2011-2020 (C) Erik Garrison and vcflib contributors. MIT licensed. + + diff -Nru libvcflib-1.0.1+dfsg/doc/vcfglbound.md libvcflib-1.0.2+dfsg/doc/vcfglbound.md --- libvcflib-1.0.1+dfsg/doc/vcfglbound.md 1970-01-01 00:00:00.000000000 +0000 +++ libvcflib-1.0.2+dfsg/doc/vcfglbound.md 2021-01-28 07:04:12.000000000 +0000 @@ -0,0 +1,64 @@ +% VCFGLBOUND(1) vcfglbound (vcflib) | vcfglbound (VCF transformation) +% Erik Garrison and vcflib contributors + +# NAME + +**vcfglbound** + +# SYNOPSIS + +**vcfglbound** [options] + +# DESCRIPTION + +Adjust GLs so that the maximum GL is 0 by dividing all GLs for each sample by the max. + + + +# OPTIONS + +``` + + +Then cap (bound) at N (e.g. -10).options: + -b, --bound N Bound GLs to this limit. + -x, --exclude-broken If GLs are > 0, remove site. + + +Type: transformation + +``` + + + + + +# EXIT VALUES + +**0** +: Success + +**not 0** +: Failure + +# SEE ALSO + + + +[vcflib](./vcflib.md)(1) + + + +# OTHER + +## Source code + +[vcfglbound.cpp](https://github.com/vcflib/vcflib/blob/master/src/vcfglbound.cpp) + +# LICENSE + +Copyright 2011-2020 (C) Erik Garrison and vcflib contributors. MIT licensed. + + diff -Nru libvcflib-1.0.1+dfsg/doc/vcfglxgt.md libvcflib-1.0.2+dfsg/doc/vcfglxgt.md --- libvcflib-1.0.1+dfsg/doc/vcfglxgt.md 1970-01-01 00:00:00.000000000 +0000 +++ libvcflib-1.0.2+dfsg/doc/vcfglxgt.md 2021-01-28 07:04:12.000000000 +0000 @@ -0,0 +1,63 @@ +% VCFGLXGT(1) vcfglxgt (vcflib) | vcfglxgt (VCF transformation) +% Erik Garrison and vcflib contributors + +# NAME + +**vcfglxgt** + +# SYNOPSIS + +**vcfglxgt** [options] + +# DESCRIPTION + +Set genotypes using the maximum genotype likelihood for each sample. + + + +# OPTIONS + +``` + +options: + -n, --fix-null-genotypes only apply to null and partly-null genotypes + + + +Type: transformation + +``` + + + + + +# EXIT VALUES + +**0** +: Success + +**not 0** +: Failure + +# SEE ALSO + + + +[vcflib](./vcflib.md)(1) + + + +# OTHER + +## Source code + +[vcfglxgt.cpp](https://github.com/vcflib/vcflib/blob/master/src/vcfglxgt.cpp) + +# LICENSE + +Copyright 2011-2020 (C) Erik Garrison and vcflib contributors. MIT licensed. + + diff -Nru libvcflib-1.0.1+dfsg/doc/vcfhetcount.md libvcflib-1.0.2+dfsg/doc/vcfhetcount.md --- libvcflib-1.0.1+dfsg/doc/vcfhetcount.md 1970-01-01 00:00:00.000000000 +0000 +++ libvcflib-1.0.2+dfsg/doc/vcfhetcount.md 2021-01-28 07:04:12.000000000 +0000 @@ -0,0 +1,60 @@ +% VCFHETCOUNT(1) vcfhetcount (vcflib) | vcfhetcount (VCF metrics) +% Erik Garrison and vcflib contributors + +# NAME + +**vcfhetcount** + +# SYNOPSIS + +**vcfhetcount** + +# DESCRIPTION + +Calculate the heterozygosity rate: count the number of alternate alleles in heterozygous genotypes in all records in the vcf file + + + +# OPTIONS + +``` + +outputs a count for each individual in the file + +Type: metrics + +``` + + + + + +# EXIT VALUES + +**0** +: Success + +**not 0** +: Failure + +# SEE ALSO + + + +[vcflib](./vcflib.md)(1) + + + +# OTHER + +## Source code + +[vcfhetcount.cpp](https://github.com/vcflib/vcflib/blob/master/src/vcfhetcount.cpp) + +# LICENSE + +Copyright 2011-2020 (C) Erik Garrison and vcflib contributors. MIT licensed. + + diff -Nru libvcflib-1.0.1+dfsg/doc/vcfhethomratio.md libvcflib-1.0.2+dfsg/doc/vcfhethomratio.md --- libvcflib-1.0.1+dfsg/doc/vcfhethomratio.md 1970-01-01 00:00:00.000000000 +0000 +++ libvcflib-1.0.2+dfsg/doc/vcfhethomratio.md 2021-01-28 07:04:12.000000000 +0000 @@ -0,0 +1,59 @@ +% VCFHETHOMRATIO(1) vcfhethomratio (vcflib) | vcfhethomratio (VCF metrics) +% Erik Garrison and vcflib contributors + +# NAME + +**vcfhethomratio** + +# SYNOPSIS + +**vcfhethomratio** + +# DESCRIPTION + +Generates the het/hom ratio for each individual in the file + + + +# OPTIONS + +``` + + +Type: metrics + +``` + + + + + +# EXIT VALUES + +**0** +: Success + +**not 0** +: Failure + +# SEE ALSO + + + +[vcflib](./vcflib.md)(1) + + + +# OTHER + +## Source code + +[vcfhethomratio.cpp](https://github.com/vcflib/vcflib/blob/master/src/vcfhethomratio.cpp) + +# LICENSE + +Copyright 2011-2020 (C) Erik Garrison and vcflib contributors. MIT licensed. + + diff -Nru libvcflib-1.0.1+dfsg/doc/vcfindex.md libvcflib-1.0.2+dfsg/doc/vcfindex.md --- libvcflib-1.0.1+dfsg/doc/vcfindex.md 1970-01-01 00:00:00.000000000 +0000 +++ libvcflib-1.0.2+dfsg/doc/vcfindex.md 2021-01-28 07:04:12.000000000 +0000 @@ -0,0 +1,61 @@ +% VCFINDEX(1) vcfindex (vcflib) | vcfindex (VCF transformation) +% Erik Garrison and vcflib contributors + +# NAME + +**vcfindex** + +# SYNOPSIS + +**vcfindex** + +# DESCRIPTION + +Adds an index number to the INFO field (id=position) + + + +# OPTIONS + +``` + + +Type: transformation + + + +``` + + + + + +# EXIT VALUES + +**0** +: Success + +**not 0** +: Failure + +# SEE ALSO + + + +[vcflib](./vcflib.md)(1) + + + +# OTHER + +## Source code + +[vcfindex.cpp](https://github.com/vcflib/vcflib/blob/master/src/vcfindex.cpp) + +# LICENSE + +Copyright 2011-2020 (C) Erik Garrison and vcflib contributors. MIT licensed. + + diff -Nru libvcflib-1.0.1+dfsg/doc/vcfinfo2qual.md libvcflib-1.0.2+dfsg/doc/vcfinfo2qual.md --- libvcflib-1.0.1+dfsg/doc/vcfinfo2qual.md 1970-01-01 00:00:00.000000000 +0000 +++ libvcflib-1.0.2+dfsg/doc/vcfinfo2qual.md 2021-01-28 07:04:12.000000000 +0000 @@ -0,0 +1,59 @@ +% VCFINFO2QUAL(1) vcfinfo2qual (vcflib) | vcfinfo2qual (VCF transformation) +% Erik Garrison and vcflib contributors + +# NAME + +**vcfinfo2qual** + +# SYNOPSIS + +**vcfinfo2qual** [key] [vcf_file] + +# DESCRIPTION + +Sets QUAL from info field tag keyed by [key]. The VCF file may be omitted and read from stdin. The average of the field is used if it contains multiple values. + + + +# OPTIONS + +``` + + +Type: transformation + +``` + + + + + +# EXIT VALUES + +**0** +: Success + +**not 0** +: Failure + +# SEE ALSO + + + +[vcflib](./vcflib.md)(1) + + + +# OTHER + +## Source code + +[vcfinfo2qual.cpp](https://github.com/vcflib/vcflib/blob/master/src/vcfinfo2qual.cpp) + +# LICENSE + +Copyright 2011-2020 (C) Erik Garrison and vcflib contributors. MIT licensed. + + diff -Nru libvcflib-1.0.1+dfsg/doc/vcfinfosummarize.md libvcflib-1.0.2+dfsg/doc/vcfinfosummarize.md --- libvcflib-1.0.1+dfsg/doc/vcfinfosummarize.md 1970-01-01 00:00:00.000000000 +0000 +++ libvcflib-1.0.2+dfsg/doc/vcfinfosummarize.md 2021-01-28 07:04:12.000000000 +0000 @@ -0,0 +1,67 @@ +% VCFINFOSUMMARIZE(1) vcfinfosummarize (vcflib) | vcfinfosummarize (VCF transformation) +% Erik Garrison and vcflib contributors + +# NAME + +**vcfinfosummarize** + +# SYNOPSIS + +**vcfinfosummarize** [options] + +# DESCRIPTION + +Take annotations given in the per-sample fields and add the mean, median, min, or max to the site-level INFO. + + + +# OPTIONS + +``` + +options: + -f, --field Summarize this field in the INFO column + -i, --info Store the computed statistic in this info field + -a, --average Take the mean for field (default) + -m, --median Use the median + -n, --min Use the min + -x, --max Use the max + -h, --help Print this message + -v, --version Print version + + +``` + + + + + +# EXIT VALUES + +**0** +: Success + +**not 0** +: Failure + +# SEE ALSO + + + +[vcflib](./vcflib.md)(1) + + + +# OTHER + +## Source code + +[vcfinfosummarize.cpp](https://github.com/vcflib/vcflib/blob/master/src/vcfinfosummarize.cpp) + +# LICENSE + +Copyright 2011-2020 (C) Erik Garrison and vcflib contributors. MIT licensed. + + diff -Nru libvcflib-1.0.1+dfsg/doc/vcfintersect.md libvcflib-1.0.2+dfsg/doc/vcfintersect.md --- libvcflib-1.0.1+dfsg/doc/vcfintersect.md 1970-01-01 00:00:00.000000000 +0000 +++ libvcflib-1.0.2+dfsg/doc/vcfintersect.md 2021-01-28 07:04:12.000000000 +0000 @@ -0,0 +1,84 @@ +% VCFINTERSECT(1) vcfintersect (vcflib) | vcfintersect (VCF transformation) +% Erik Garrison and vcflib contributors + +# NAME + +**vcfintersect** + +# SYNOPSIS + +**vcfintersect** [options] [] + +# DESCRIPTION + +VCF set analysis + + + +# OPTIONS + +``` + + +options: + -b, --bed FILE use intervals provided by this BED file + -R, --region REGION use 1-based tabix-style region (e.g. chrZ:10-20), multiples allowed + -S, --start-only don't use the reference length information in the record to determine + overlap status, just use the start posiion + -v, --invert invert the selection, printing only records which would + not have been printed out + -i, --intersect-vcf FILE use this VCF for set intersection generation + -u, --union-vcf FILE use this VCF for set union generation + -w, --window-size N compare records up to this many bp away (default 30) + -r, --reference FILE FASTA reference file, required with -i and -u + -l, --loci output whole loci when one alternate allele matches + -m, --ref-match intersect on the basis of record REF string + -t, --tag TAG attach TAG to each record's info field if it would intersect + -V, --tag-value VAL use this value to indicate that the allele is passing + '.' will be used otherwise. default: 'PASS' + -M, --merge-from FROM-TAG + -T, --merge-to TO-TAG merge from FROM-TAG used in the -i file, setting TO-TAG + in the current file. + +For bed-vcf intersection, alleles which fall into the targets are retained. + +Haplotype aware intersection, union and complement. Use for intersection and union of VCF files: unify on equivalent alleles within window-size bp +as determined by haplotype comparison alleles. + +type: transformation + +``` + + + + + +# EXIT VALUES + +**0** +: Success + +**not 0** +: Failure + +# SEE ALSO + + + +[vcflib](./vcflib.md)(1) + + + +# OTHER + +## Source code + +[vcfintersect.cpp](https://github.com/vcflib/vcflib/blob/master/src/vcfintersect.cpp) + +# LICENSE + +Copyright 2011-2020 (C) Erik Garrison and vcflib contributors. MIT licensed. + + diff -Nru libvcflib-1.0.1+dfsg/doc/vcfkeepgeno.md libvcflib-1.0.2+dfsg/doc/vcfkeepgeno.md --- libvcflib-1.0.1+dfsg/doc/vcfkeepgeno.md 1970-01-01 00:00:00.000000000 +0000 +++ libvcflib-1.0.2+dfsg/doc/vcfkeepgeno.md 2021-01-28 07:04:12.000000000 +0000 @@ -0,0 +1,59 @@ +% VCFKEEPGENO(1) vcfkeepgeno (vcflib) | vcfkeepgeno (VCF transformation) +% Erik Garrison and vcflib contributors + +# NAME + +**vcfkeepgeno** + +# SYNOPSIS + +**vcfkeepgeno** [FIELD1] [FIELD2] ... + +# DESCRIPTION + +Reduce file size by removing FORMAT fields not listed on the command line from sample specifications in the output + + + +# OPTIONS + +``` + + +Type: transformation + +``` + + + + + +# EXIT VALUES + +**0** +: Success + +**not 0** +: Failure + +# SEE ALSO + + + +[vcflib](./vcflib.md)(1) + + + +# OTHER + +## Source code + +[vcfkeepgeno.cpp](https://github.com/vcflib/vcflib/blob/master/src/vcfkeepgeno.cpp) + +# LICENSE + +Copyright 2011-2020 (C) Erik Garrison and vcflib contributors. MIT licensed. + + diff -Nru libvcflib-1.0.1+dfsg/doc/vcfkeepinfo.md libvcflib-1.0.2+dfsg/doc/vcfkeepinfo.md --- libvcflib-1.0.1+dfsg/doc/vcfkeepinfo.md 1970-01-01 00:00:00.000000000 +0000 +++ libvcflib-1.0.2+dfsg/doc/vcfkeepinfo.md 2021-01-28 07:04:12.000000000 +0000 @@ -0,0 +1,59 @@ +% VCFKEEPINFO(1) vcfkeepinfo (vcflib) | vcfkeepinfo (VCF transformation) +% Erik Garrison and vcflib contributors + +# NAME + +**vcfkeepinfo** + +# SYNOPSIS + +**vcfkeepinfo** [FIELD1] [FIELD2] ... + +# DESCRIPTION + +To decrease file size remove INFO fields not listed on the command line + + + +# OPTIONS + +``` + + +Type: transformation + +``` + + + + + +# EXIT VALUES + +**0** +: Success + +**not 0** +: Failure + +# SEE ALSO + + + +[vcflib](./vcflib.md)(1) + + + +# OTHER + +## Source code + +[vcfkeepinfo.cpp](https://github.com/vcflib/vcflib/blob/master/src/vcfkeepinfo.cpp) + +# LICENSE + +Copyright 2011-2020 (C) Erik Garrison and vcflib contributors. MIT licensed. + + diff -Nru libvcflib-1.0.1+dfsg/doc/vcfkeepsamples.md libvcflib-1.0.2+dfsg/doc/vcfkeepsamples.md --- libvcflib-1.0.1+dfsg/doc/vcfkeepsamples.md 1970-01-01 00:00:00.000000000 +0000 +++ libvcflib-1.0.2+dfsg/doc/vcfkeepsamples.md 2021-01-28 07:04:12.000000000 +0000 @@ -0,0 +1,48 @@ +% VCFKEEPSAMPLES(1) vcfkeepsamples (vcflib) | vcfkeepsamples (VCF transformation) +% Erik Garrison and vcflib contributors + +# NAME + +**vcfkeepsamples** + +# SYNOPSIS + +**vcfkeepsamples** [SAMPLE1] [SAMPLE2] ... + +# DESCRIPTION + +outputs each record in the vcf file, removing samples not listed on the command line + + + + + +# EXIT VALUES + +**0** +: Success + +**not 0** +: Failure + +# SEE ALSO + + + +[vcflib](./vcflib.md)(1) + + + +# OTHER + +## Source code + +[vcfkeepsamples.cpp](https://github.com/vcflib/vcflib/blob/master/src/vcfkeepsamples.cpp) + +# LICENSE + +Copyright 2011-2020 (C) Erik Garrison and vcflib contributors. MIT licensed. + + diff -Nru libvcflib-1.0.1+dfsg/doc/vcfld.md libvcflib-1.0.2+dfsg/doc/vcfld.md --- libvcflib-1.0.1+dfsg/doc/vcfld.md 1970-01-01 00:00:00.000000000 +0000 +++ libvcflib-1.0.2+dfsg/doc/vcfld.md 2021-01-28 07:04:12.000000000 +0000 @@ -0,0 +1,69 @@ +% VCFLD(1) vcfld (vcflib) | vcfld (VCF transformation) +% Erik Garrison and vcflib contributors + +# NAME + +**vcfld** + +# SYNOPSIS + +**vcfld** --target 0,1,2,3,4,5,6,7 --background 11,12,13,16,17,19,22 --file my.vcf -e -d -r + +# DESCRIPTION + +Compute LD + + + +# OPTIONS + +``` + + +required: t,target -- argument: a zero base comma separated list of target individuals corresponding to VCF columns +required: b,background -- argument: a zero base comma separated list of background individuals corresponding to VCF columns +required: f,file -- argument: a properly formatted phased VCF file +required: y,type -- argument: type of genotype likelihood: PL, GL or GP +optional: w,window -- argument: window size to average LD; default is 1000 +optional: e,external -- switch: population to calculate LD expectation; default is target +optional: d,derived -- switch: which haplotype to count "00" vs "11"; default "00", + + +Type: transformation + + +``` + + + + + +# EXIT VALUES + +**0** +: Success + +**not 0** +: Failure + +# SEE ALSO + + + +[vcflib](./vcflib.md)(1) + + + +# OTHER + +## Source code + +[vcfld.cpp](https://github.com/vcflib/vcflib/blob/master/src/vcfld.cpp) + +# LICENSE + +Copyright 2011-2020 (C) Erik Garrison and vcflib contributors. MIT licensed. + + diff -Nru libvcflib-1.0.1+dfsg/doc/vcfleftalign.md libvcflib-1.0.2+dfsg/doc/vcfleftalign.md --- libvcflib-1.0.1+dfsg/doc/vcfleftalign.md 1970-01-01 00:00:00.000000000 +0000 +++ libvcflib-1.0.2+dfsg/doc/vcfleftalign.md 2021-01-28 07:04:12.000000000 +0000 @@ -0,0 +1,75 @@ +% VCFLEFTALIGN(1) vcfleftalign (vcflib) | vcfleftalign (VCF transformation) +% Erik Garrison and vcflib contributors + +# NAME + +**vcfleftalign** + +# SYNOPSIS + +**vcfleftalign** [options] [file] + +# DESCRIPTION + +Left-align indels and complex variants in the input using a pairwise ref/alt alignment followed by a heuristic, iterative left realignment process that shifts indel representations to their absolute leftmost (5') extent. + + + +# OPTIONS + +``` + + +This is the same procedure used in the internal left alignment in +freebayes, and can be used when preparing VCF files for input to +freebayes to decrease positional representation differences between +the input alleles and left-realigned alignments. + +options: + + -r, --reference FILE Use this reference as a basis for realignment. + -w, --window N Use a window of this many bp when left aligning (150). + +Left-aligns variants in the specified input file or stdin. Window +size is determined dynamically according to the entropy of the regions +flanking the indel. These must have entropy > 1 bit/bp, or be shorter +than ~5kb. + + +Type: transformation + +``` + + + + + +# EXIT VALUES + +**0** +: Success + +**not 0** +: Failure + +# SEE ALSO + + + +[vcflib](./vcflib.md)(1) + + + +# OTHER + +## Source code + +[vcfleftalign.cpp](https://github.com/vcflib/vcflib/blob/master/src/vcfleftalign.cpp) + +# LICENSE + +Copyright 2011-2020 (C) Erik Garrison and vcflib contributors. MIT licensed. + + diff -Nru libvcflib-1.0.1+dfsg/doc/vcflength.md libvcflib-1.0.2+dfsg/doc/vcflength.md --- libvcflib-1.0.1+dfsg/doc/vcflength.md 1970-01-01 00:00:00.000000000 +0000 +++ libvcflib-1.0.2+dfsg/doc/vcflength.md 2021-01-28 07:04:12.000000000 +0000 @@ -0,0 +1,76 @@ +% VCFLENGTH(1) vcflength (vcflib) | vcflength (VCF transformation) +% Erik Garrison and vcflib contributors + +# NAME + +**vcflength** + +# SYNOPSIS + +**vcflength** + +# DESCRIPTION + +Add length info field + + + + + +# EXAMPLES + +``` + +Example: + +**vcflength** samples/sample.vcf +##fileformat=VCFv4.0 +(...) +#CHROM POS ID REF ALT QUAL FILTER INFO FORMAT NA00001 NA00002 NA00003 +19 111 . A C 9.6 . length=0;length.alt=1;length.ref=1 GT:HQ 0|0:10,10 0|0:10,10 0/1:3,3 +19 112 . A G 10 . length=0;length.alt=1;length.ref=1 GT:HQ 0|0:10,10 0|0:10,10 0/1:3,3 +20 14370 rs6054257 G A 29 PASS AF=0.5;DP=14;NS=3;length=0;length.alt=1;length.ref=1;DB;H2 GT:GQ:DP:HQ 0|0:48:1:51,51 1|0:48:8:51,51 1/1:43:5:.,. +20 17330 . T A 3 q10 AF=0.017;DP=11;NS=3;length=0;length.alt=1;length.ref=1 GT:GQ:DP:HQ 0|0:49:3:58,50 0|1:3:5:65,3 0/0:41:3:.,. +20 1110696 rs6040355 A G,T 67 PASS AA=T;AF=0.333,0.667;DP=10;NS=2;length=0,0;length.alt=1,1;length.ref=1;DB GT:GQ:DP:HQ 1|2:21:6:23,27 2|1:2:0:18,2 2/2:35:4:.,. +20 1230237 . T . 47 PASS AA=T;DP=13;NS=3;length=0;length.alt=1;length.ref=1GT:GQ:DP:HQ 0|0:54:.:56,60 0|0:48:4:51,51 0/0:61:2:.,. +20 1234567 microsat1 G GA,GAC 50 PASS AA=G;AC=3,1;AN=6;DP=9;NS=3;length=1,2;length.alt=2,3;length.ref=1 GT:GQ:DP 0/1:.:4 0/2:17:2 1/1:40:3 +20 1235237 . T . 0 . length=0;length.alt=1;length.ref=1 GT 0/00|0 ./. +X 10 rsTest AC A,ATG 10 PASS length=-1,1;length.alt=1,3;length.ref=2 GT 0 0/1 0|2 + +Type: transformation + + + +``` + + + +# EXIT VALUES + +**0** +: Success + +**not 0** +: Failure + +# SEE ALSO + + + +[vcflib](./vcflib.md)(1) + + + +# OTHER + +## Source code + +[vcflength.cpp](https://github.com/vcflib/vcflib/blob/master/src/vcflength.cpp) + +# LICENSE + +Copyright 2011-2020 (C) Erik Garrison and vcflib contributors. MIT licensed. + + diff -Nru libvcflib-1.0.1+dfsg/doc/vcflib.md libvcflib-1.0.2+dfsg/doc/vcflib.md --- libvcflib-1.0.1+dfsg/doc/vcflib.md 1970-01-01 00:00:00.000000000 +0000 +++ libvcflib-1.0.2+dfsg/doc/vcflib.md 2021-01-28 07:04:12.000000000 +0000 @@ -0,0 +1,156 @@ +% vcflib(1) vcflib | vcfilb (index) +% Erik Garrison and vcflib contributors + +# NAME + +**vcflib** index + +# DESCRIPTION + +vcflib contains tools and libraries for dealing with the Variant Call +Format (VCF) which is a flat-file, tab-delimited textual format +intended to describe reference-indexed variations between +individuals. + +VCF provides a common interchange format for the description of +variation in individuals and populations of samples, and has become +the defacto standard reporting format for a wide array of genomic +variant detectors. + +vcflib provides methods to manipulate and interpret sequence variation +as it can be described by VCF. It is both: + +* an API for parsing and operating on records of genomic variation as it can be described by the VCF format, +* and a collection of command-line utilities for executing complex manipulations on VCF files. + +The API itself provides a quick and extremely permissive method to +read and write VCF files. Extensions and applications of the library +provided in the included utilities (*.cpp) comprise the vast bulk of +the library's utility for most users. + + + + +## filter + +| filter command | description | +| :-------------- | :---------- | + | [vcfuniq](./vcfuniq.md) | List unique genotypes. Like GNU uniq, but for VCF records. Remove records which have the same positon, ref, and alt as the previous record. | + | [vcfuniqalleles](./vcfuniqalleles.md) | List unique alleles For each record, remove any duplicate alternate alleles that may have resulted from merging separate VCF files. | + | [vcffilter](./vcffilter.md) | VCF filter the specified vcf file using the set of filters | + +## metrics + +| metrics command | description | +| :-------------- | :---------- | + | [vcfcheck](./vcfcheck.md) | Validate integrity and identity of the VCF by verifying that the VCF record's REF matches a given reference file. | + | [vcfhethomratio](./vcfhethomratio.md) | Generates the het/hom ratio for each individual in the file | + | [vcfhetcount](./vcfhetcount.md) | Calculate the heterozygosity rate: count the number of alternate alleles in heterozygous genotypes in all records in the vcf file | + | [vcfdistance](./vcfdistance.md) | Adds a tag to each variant record which indicates the distance to the nearest variant. (defaults to BasesToClosestVariant if no custom tag name is given. | + | [vcfentropy](./vcfentropy.md) | Annotate VCF records with the Shannon entropy of flanking sequence. Anotates the output VCF file with, for each record, EntropyLeft, EntropyRight, EntropyCenter, which are the entropies of the sequence of the given window size to the left, right, and center of the record. Also adds EntropyRef and EntropyAlt for each alt. | + +## phenotype + +| phenotype command | description | +| :-------------- | :---------- | + | [permuteGPAT++](./permuteGPAT++.md) | **permuteGPAT++** is a method for adding empirical p-values to a GPAT++ score. | + +## genotype + +| genotype command | description | +| :-------------- | :---------- | + | [normalize-iHS](./normalize-iHS.md) | normalizes iHS or XP-EHH scores. | + | [hapLrt](./hapLrt.md) | HapLRT is a likelihood ratio test for haplotype lengths. The lengths are modeled with an exponential distribution. The sign denotes if the target has longer haplotypes (1) or the background (-1). | + | [abba-baba](./abba-baba.md) | **abba-baba** calculates the tree pattern for four indviduals. This tool assumes reference is ancestral and ignores non **abba-baba** sites. The output is a boolian value: 1 = true , 0 = false for abba and baba. the tree argument should be specified from the most basal taxa to the most derived. | + +## transformation + +| transformation command | description | +| :-------------- | :---------- | + | [vcfinfo2qual](./vcfinfo2qual.md) | Sets QUAL from info field tag keyed by [key]. The VCF file may be omitted and read from stdin. The average of the field is used if it contains multiple values. | + | [vcfsamplediff](./vcfsamplediff.md) | Establish putative somatic variants using reported differences between germline and somatic samples. Tags each record where the listed sample genotypes differ with . The first sample is assumed to be germline, the second somatic. Each record is tagged with ={germline,somatic,loh} to specify the type of variant given the genotype difference between the two samples. | + | [vcfaddinfo](./vcfaddinfo.md) | Adds info fields from the second file which are not present in the first vcf file. | + | [vcfremoveaberrantgenotypes](./vcfremoveaberrantgenotypes.md) | strips samples which are homozygous but have observations implying heterozygosity. Remove samples for which the reported genotype (GT) and observation counts disagree (AO, RO). | + | [vcfglxgt](./vcfglxgt.md) | Set genotypes using the maximum genotype likelihood for each sample. | + | [dumpContigsFromHeader](./dumpContigsFromHeader.md) | Dump contigs from header | + | [vcfevenregions](./vcfevenregions.md) | Generates a list of regions, e.g. chr20:10..30 using the variant density information provided in the VCF file to ensure that the regions have even numbers of variants. This can be use to reduce the variance in runtime when dividing variant detection or genotyping by genomic coordinates. | + | [vcfcat](./vcfcat.md) | Concatenates VCF files | + | [vcfannotategenotypes](./vcfannotategenotypes.md) | Examine genotype correspondence. Annotate genotypes in the first file with genotypes in the second adding the genotype as another flag to each sample filed in the first file. annotation-tag is the name of the sample flag which is added to store the annotation. also adds a 'has_variant' flag for sites where the second file has a variant. | + | [vcfafpath](./vcfafpath.md) | Display genotype paths | + | [vcfclassify](./vcfclassify.md) | Creates a new VCF where each variant is tagged by allele class: snp, ts/tv, indel, mnp | + | [vcfallelicprimitives](./vcfallelicprimitives.md) | If multiple allelic primitives (gaps or mismatches) are specified in a single VCF record, split the record into multiple lines, but drop all INFO fields. Does not handle genotypes (yet). MNPs are split into multiple SNPs unless the -m flag is provided. Records generated by splits have th | + | [vcfqual2info](./vcfqual2info.md) | Puts QUAL into an info field tag keyed by [key]. | + | [vcfcreatemulti](./vcfcreatemulti.md) | If overlapping alleles are represented across multiple records, merge them into a single record. Currently only for indels. | + | [vcfgeno2alleles](./vcfgeno2alleles.md) | modifies the genotypes field to provide the literal alleles rather than indexes | + | [vcfsample2info](./vcfsample2info.md) | Take annotations given in the per-sample fields and add the mean, median, min, or max to the site-level INFO. | + | [vcfld](./vcfld.md) | Compute LD | + | [vcfnumalt](./vcfnumalt.md) | outputs a VCF stream where NUMALT has been generated for each record using sample genotypes | + | [vcfstreamsort](./vcfstreamsort.md) | Sorts the input (either stdin or file) using a streaming sort algorithm. Guarantees that the positional order is correct provided out-of-order variants are no more than 100 positions in the VCF file apart. | + | [vcfinfosummarize](./vcfinfosummarize.md) | Take annotations given in the per-sample fields and add the mean, median, min, or max to the site-level INFO. | + | [vcflength](./vcflength.md) | Add length info field | + | [vcfkeepgeno](./vcfkeepgeno.md) | Reduce file size by removing FORMAT fields not listed on the command line from sample specifications in the output | + | [vcfcombine](./vcfcombine.md) | Combine VCF files positionally, combining samples when sites and alleles are identical. Any number of VCF files may be combined. The INFO field and other columns are taken from one of the files which are combined when records in multiple files match. Alleles must have identical ordering to be combined into one record. If they do not, multiple records will be emitted. | + | [vcfprimers](./vcfprimers.md) | For each VCF record, extract the flanking sequences, and write them to stdout as FASTA records suitable for alignment. | + | [vcfflatten](./vcfflatten.md) | Removes multi-allelic sites by picking the most common alternate. Requires allele frequency specification 'AF' and use of 'G' and 'A' to specify the fields which vary according to the Allele or Genotype. VCF file may be specified on the command line or piped as stdin. | + | [vcf2dag](./vcf2dag.md) | Modify VCF to be able to build a directed acyclic graph (DAG) | + | [vcfcleancomplex](./vcfcleancomplex.md) | Removes reference-matching sequence from complex alleles and adjusts records to reflect positional change. | + | [vcfbreakmulti](./vcfbreakmulti.md) | If multiple alleles are specified in a single record, break the record into multiple lines, preserving allele-specific INFO fields. | + | [vcfindex](./vcfindex.md) | Adds an index number to the INFO field (id=position) | + | [vcfkeepinfo](./vcfkeepinfo.md) | To decrease file size remove INFO fields not listed on the command line | + | [vcfgeno2haplo](./vcfgeno2haplo.md) | Convert genotype-based phased alleles within --window-size into haplotype alleles. Will break haplotype construction when encountering non-phased genotypes on input. | + | [vcfintersect](./vcfintersect.md) | VCF set analysis | + | [vcfannotate](./vcfannotate.md) | Intersect the records in the VCF file with targets provided in a BED file. Intersections are done on the reference sequences in the VCF file. If no VCF filename is specified on the command line (last argument) the VCF read from stdin. | + | [smoother](./smoother.md) | smoothes is a method for window smoothing many of the GPAT++ formats. | + | [vcf2fasta](./vcf2fasta.md) | Generates sample_seq:N.fa for each sample, reference sequence, and chromosomal copy N in [0,1... ploidy]. Each sequence in the fasta file is named using the same pattern used for the file name, allowing them to be combined. | + | [vcfsamplenames](./vcfsamplenames.md) | List sample names | + | [vcfleftalign](./vcfleftalign.md) | Left-align indels and complex variants in the input using a pairwise ref/alt alignment followed by a heuristic, iterative left realignment process that shifts indel representations to their absolute leftmost (5') extent. | + | [vcfglbound](./vcfglbound.md) | Adjust GLs so that the maximum GL is 0 by dividing all GLs for each sample by the max. | + | [vcfcommonsamples](./vcfcommonsamples.md) | Generates each record in the first file, removing samples not present in the second | + | [vcfecho](./vcfecho.md) | Echo VCF to stdout (simple demo) | + | [vcfkeepsamples](./vcfkeepsamples.md) | outputs each record in the vcf file, removing samples not listed on the command line | + | [vcf2tsv](./vcf2tsv.md) | Converts VCF to per-allelle or per-genotype tab-delimited format, using null string to replace empty values in the table. Specifying -g will output one line per sample with genotype information. When there is more than one alt allele there will be multiple rows, one for each allele and, the info will match the 'A' index | + | [vcfoverlay](./vcfoverlay.md) | Overlay records in the input vcf files with order as precedence. | + | [vcfgenosamplenames](./vcfgenosamplenames.md) | Get samplenames | + | [vcfremovesamples](./vcfremovesamples.md) | outputs each record in the vcf file, removing samples listed on the command line | + | [vcfremap](./vcfremap.md) | For each alternate allele, attempt to realign against the reference with lowered gap open penalty. If realignment is possible, adjust the cigar and reference/alternate alleles. Observe how different alignment parameters, including context and entropy-dependent ones, influence variant classification and interpretation. | + | [vcffixup](./vcffixup.md) | Generates a VCF stream where AC and NS have been generated for each record using sample genotypes | + +## statistics + +| statistics command | description | +| :-------------- | :---------- | + | [vcfgenosummarize](./vcfgenosummarize.md) | Adds summary statistics to each record summarizing qualities reported in called genotypes. Uses: RO (reference observation count), QR (quality sum reference observations) AO (alternate observation count), QA (quality sum alternate observations) | + | [vcfcountalleles](./vcfcountalleles.md) | Count alleles | + | [meltEHH](./meltEHH.md) | | + | [genotypeSummary](./genotypeSummary.md) | Generates a table of genotype counts. Summarizes genotype counts for bi-allelic SNVs and indel | + | [vcfrandomsample](./vcfrandomsample.md) | Randomly sample sites from an input VCF file, which may be provided as stdin. Scale the sampling probability by the field specified in KEY. This may be used to provide uniform sampling across allele frequencies, for instance. | + | [pVst](./pVst.md) | **pVst** calculates vst, a measure of CNV stratification. | + | [vcfrandom](./vcfrandom.md) | Generate a random VCF file | + | [segmentFst](./segmentFst.md) | **segmentFst** creates genomic segments (bed file) for regions with high wcFst | + | [sequenceDiversity](./sequenceDiversity.md) | The **sequenceDiversity** program calculates two popular metrics of haplotype diversity: pi and extended haplotype homozygoisty (eHH). Pi is calculated using the Nei and Li 1979 formulation. eHH a convenient way to think about haplotype diversity. When eHH = 0 all haplotypes in the window are unique and when eHH = 1 all haplotypes in the window are identical. | + | [segmentIhs](./segmentIhs.md) | Creates genomic segments (bed file) for regions with high wcFst | + | [vcfgenotypes](./vcfgenotypes.md) | Report the genotypes for each sample, for each variant in the VCF. Convert the numerical represenation of genotypes provided by the GT field to a human-readable genotype format. | + | [vcfaltcount](./vcfaltcount.md) | count the number of alternate alleles in all records in the vcf file | + | [plotHaps](./plotHaps.md) | **plotHaps** provides the formatted output that can be used with 'bin/plotHaplotypes.R'. | + | [vcfsitesummarize](./vcfsitesummarize.md) | Summarize by site | + | [vcfgenotypecompare](./vcfgenotypecompare.md) | adds statistics to the INFO field of the vcf file describing the amount of discrepancy between the genotypes (GT) in the vcf file and the genotypes reported in the . use this after vcfannotategenotypes to get correspondence statistics for two vcfs. | + | [vcfstats](./vcfstats.md) | Prints statistics about variants in the input VCF file. | + | [wcFst](./wcFst.md) | **wcFst** is Weir & Cockerham's Fst for two populations. Negative values are VALID, they are sites which can be treated as zero Fst. For more information see Evolution, Vol. 38 N. 6 Nov 1984. Specifically **wcFst** uses equations 1,2,3,4. | + | [permuteSmooth](./permuteSmooth.md) | **permuteSmooth** is a method for adding empirical p-values smoothed wcFst scores. | + | [bFst](./bFst.md) | **bFst** is a Bayesian approach to Fst. Importantly **bFst** accounts for genotype uncertainty in the model using genotype likelihoods. For a more detailed description see: `A Bayesian approach to inferring population structure from dominant markers' by Holsinger et al. Molecular Ecology Vol 11, issue 7 2002. The likelihood function has been modified to use genotype likelihoods provided by variant callers. There are five free parameters estimated in the model: each subpopulation's allele frequency and Fis (fixation index, within each subpopulation), a free parameter for the total population's allele frequency, and Fst. | + | [vcfroc](./vcfroc.md) | Generates a pseudo-ROC curve using sensitivity and specificity estimated against a putative truth set. Thresholding is provided by successive QUAL cutoffs. | + | [vcfparsealts](./vcfparsealts.md) | Alternate allele parsing method. This method uses pairwise alignment of REF and ALTs to determine component allelic primitives for each alternate allele. | + | [pFst](./pFst.md) | **pFst** is a probabilistic approach for detecting differences in allele frequencies between two populations. | + | [iHS](./iHS.md) | **iHS** calculates the integrated haplotype score which measures the relative decay of extended haplotype homozygosity (EHH) for the reference and alternative alleles at a site (see: voight et al. 2006, Spiech & Hernandez 2014). | + | [popStats](./popStats.md) | General population genetic statistics for each SNP | + +# SOURCE CODE + +See the source code repository at https://github.com/vcflib/vcflib + +# LICENSE + +Copyright 2011-2020 (C) Erik Garrison and vcflib contributors. MIT licensed. + diff -Nru libvcflib-1.0.1+dfsg/doc/vcfnumalt.md libvcflib-1.0.2+dfsg/doc/vcfnumalt.md --- libvcflib-1.0.1+dfsg/doc/vcfnumalt.md 1970-01-01 00:00:00.000000000 +0000 +++ libvcflib-1.0.2+dfsg/doc/vcfnumalt.md 2021-01-28 07:04:12.000000000 +0000 @@ -0,0 +1,48 @@ +% VCFNUMALT(1) vcfnumalt (vcflib) | vcfnumalt (VCF transformation) +% Erik Garrison and vcflib contributors + +# NAME + +**vcfnumalt** + +# SYNOPSIS + +**vcfnumalt** + +# DESCRIPTION + +outputs a VCF stream where NUMALT has been generated for each record using sample genotypes + + + + + +# EXIT VALUES + +**0** +: Success + +**not 0** +: Failure + +# SEE ALSO + + + +[vcflib](./vcflib.md)(1) + + + +# OTHER + +## Source code + +[vcfnumalt.cpp](https://github.com/vcflib/vcflib/blob/master/src/vcfnumalt.cpp) + +# LICENSE + +Copyright 2011-2020 (C) Erik Garrison and vcflib contributors. MIT licensed. + + diff -Nru libvcflib-1.0.1+dfsg/doc/vcfoverlay.md libvcflib-1.0.2+dfsg/doc/vcfoverlay.md --- libvcflib-1.0.1+dfsg/doc/vcfoverlay.md 1970-01-01 00:00:00.000000000 +0000 +++ libvcflib-1.0.2+dfsg/doc/vcfoverlay.md 2021-01-28 07:04:12.000000000 +0000 @@ -0,0 +1,61 @@ +% VCFOVERLAY(1) vcfoverlay (vcflib) | vcfoverlay (VCF transformation) +% Erik Garrison and vcflib contributors + +# NAME + +**vcfoverlay** + +# SYNOPSIS + +**vcfoverlay** [options] [ ...] + +# DESCRIPTION + +Overlay records in the input vcf files with order as precedence. + + + +# OPTIONS + +``` + +options: + -h, --help this dialog + -v, --version prints version + + +``` + + + + + +# EXIT VALUES + +**0** +: Success + +**not 0** +: Failure + +# SEE ALSO + + + +[vcflib](./vcflib.md)(1) + + + +# OTHER + +## Source code + +[vcfoverlay.cpp](https://github.com/vcflib/vcflib/blob/master/src/vcfoverlay.cpp) + +# LICENSE + +Copyright 2011-2020 (C) Erik Garrison and vcflib contributors. MIT licensed. + + diff -Nru libvcflib-1.0.1+dfsg/doc/vcfparsealts.md libvcflib-1.0.2+dfsg/doc/vcfparsealts.md --- libvcflib-1.0.1+dfsg/doc/vcfparsealts.md 1970-01-01 00:00:00.000000000 +0000 +++ libvcflib-1.0.2+dfsg/doc/vcfparsealts.md 2021-01-28 07:04:12.000000000 +0000 @@ -0,0 +1,86 @@ +% VCFPARSEALTS(1) vcfparsealts (vcflib) | vcfparsealts (VCF statistics) +% Erik Garrison and vcflib contributors + +# NAME + +**vcfparsealts** + +# SYNOPSIS + +**vcfparsealts** + +# DESCRIPTION + +Alternate allele parsing method. This method uses pairwise alignment of REF and ALTs to determine component allelic primitives for each alternate allele. + + + + + +# EXAMPLES + +``` + +Example: + +**vcfparsealts** samples/sample.vcf +##fileformat=VCFv4.0 +(...) +#CHROM POS ID REF ALT QUAL FILTER INFO FORMAT NA00001 NA00002 NA00003 +19 111 . A C 9.6 . . GT:HQ 0|0:10,10 0|0:10,10 0/1:3,3 + ( A :: 111 A -> A; ) ( C :: 111 A -> C; ) +19 112 . A G 10 . . GT:HQ 0|0:10,10 0|0:10,10 0/1:3,3 + ( A :: 112 A -> A; ) ( G :: 112 A -> G; ) +20 14370 rs6054257 G A 29 PASS AF=0.5;DP=14;NS=3;DB;H2 GT:GQ:DP:HQ 0|0:48:1:51,51 1|0:48:8:51,51 1/1:43:5:.,. + ( A :: 14370 G -> A; ) ( G :: 14370 G -> G; ) +20 17330 . T A 3 q10 AF=0.017;DP=11;NS=3 GT:GQ:DP:HQ 0|0:49:3:58,50 0|1:3:5:65,3 0/0:41:3:.,. + ( A :: 17330 T -> A; ) ( T :: 17330 T -> T; ) +20 1110696 rs6040355 A G,T 67 PASS AA=T;AF=0.333,0.667;DP=10;NS=2;DB GT:GQ:DP:HQ 1|2:21:6:23,27 2|1:2:0:18,2 2/2:35:4:.,. + ( A :: 1110696 A -> A; ) ( G :: 1110696 A -> G; ) ( T :: 1110696 A -> T; ) +20 1230237 . T . 47 PASS AA=T;DP=13;NS=3 GT:GQ:DP:HQ 0|0:54:.:56,60 0|0:48:4:51,51 0/0:61:2:.,. + ( . :: 1230237 T -> .; ) ( T :: 1230237 T -> T; ) +20 1234567 microsat1 G GA,GAC 50 PASS AA=G;AC=3,1;AN=6;DP=9;NS=3 GT:GQ:DP 0/1:.:4 0/2:17:2 1/1:40:3 + ( G :: 1234567 G -> G; ) ( GA :: 1234567 G -> G; 1234568 -> A; ) ( GAC :: 1234567 G -> G; 1234568 -> AC; ) +20 1235237 . T . 0 . . GT 0/0 0|0 ./. + ( . :: 1235237 T -> .; ) ( T :: 1235237 T -> T; ) +X 10 rsTest AC A,ATG 10 PASS . GT 0 0/1 0|2 + ( A :: 10 A -> A; 11 C -> ; ) ( AC :: 10 AC -> AC; ) ( ATG :: 10 A -> A; 11 -> T; 11 C -> G; ) + + +Type: statistics + + + +``` + + + +# EXIT VALUES + +**0** +: Success + +**not 0** +: Failure + +# SEE ALSO + + + +[vcflib](./vcflib.md)(1) + + + +# OTHER + +## Source code + +[vcfparsealts.cpp](https://github.com/vcflib/vcflib/blob/master/src/vcfparsealts.cpp) + +# LICENSE + +Copyright 2011-2020 (C) Erik Garrison and vcflib contributors. MIT licensed. + + diff -Nru libvcflib-1.0.1+dfsg/doc/vcfprimers.md libvcflib-1.0.2+dfsg/doc/vcfprimers.md --- libvcflib-1.0.1+dfsg/doc/vcfprimers.md 1970-01-01 00:00:00.000000000 +0000 +++ libvcflib-1.0.2+dfsg/doc/vcfprimers.md 2021-01-28 07:04:12.000000000 +0000 @@ -0,0 +1,74 @@ +% VCFPRIMERS(1) vcfprimers (vcflib) | vcfprimers (VCF transformation) +% Erik Garrison and vcflib contributors + +# NAME + +**vcfprimers** + +# SYNOPSIS + +**vcfprimers** [options] + +# DESCRIPTION + +For each VCF record, extract the flanking sequences, and write them to stdout as FASTA records suitable for alignment. + + + +# OPTIONS + +``` + +options: + -f, --fasta-reference FASTA reference file to use to obtain primer sequences + -l, --primer-length The length of the primer sequences on each side of the variant + +This tool is intended for use in designing validation +experiments. Primers extracted which would flank all of the alleles at multi-allelic +sites. The name of the FASTA "reads" indicates the VCF record which they apply to. +The form is >CHROM_POS_LEFT for the 3' primer and >CHROM_POS_RIGHT for the 5' primer, +for example: + +>20_233255_LEFT +CCATTGTATATATAGACCATAATTTCTTTATCCAATCATCTGTTGATGGA +>20_233255_RIGHT +ACTCAGTTGATTCCATACCTTTGCCATCATGAATCATGTTGTAATAAACA + + +Type: transformation + +``` + + + + + +# EXIT VALUES + +**0** +: Success + +**not 0** +: Failure + +# SEE ALSO + + + +[vcflib](./vcflib.md)(1) + + + +# OTHER + +## Source code + +[vcfprimers.cpp](https://github.com/vcflib/vcflib/blob/master/src/vcfprimers.cpp) + +# LICENSE + +Copyright 2011-2020 (C) Erik Garrison and vcflib contributors. MIT licensed. + + diff -Nru libvcflib-1.0.1+dfsg/doc/vcfqual2info.md libvcflib-1.0.2+dfsg/doc/vcfqual2info.md --- libvcflib-1.0.1+dfsg/doc/vcfqual2info.md 1970-01-01 00:00:00.000000000 +0000 +++ libvcflib-1.0.2+dfsg/doc/vcfqual2info.md 2021-01-28 07:04:12.000000000 +0000 @@ -0,0 +1,60 @@ +% VCFQUAL2INFO(1) vcfqual2info (vcflib) | vcfqual2info (VCF transformation) +% Erik Garrison and vcflib contributors + +# NAME + +**vcfqual2info** + +# SYNOPSIS + +**vcfqual2info** [key] [vcf_file] + +# DESCRIPTION + +Puts QUAL into an info field tag keyed by [key]. + + + +# OPTIONS + +``` + + + +Type: transformation + +``` + + + + + +# EXIT VALUES + +**0** +: Success + +**not 0** +: Failure + +# SEE ALSO + + + +[vcflib](./vcflib.md)(1) + + + +# OTHER + +## Source code + +[vcfqual2info.cpp](https://github.com/vcflib/vcflib/blob/master/src/vcfqual2info.cpp) + +# LICENSE + +Copyright 2011-2020 (C) Erik Garrison and vcflib contributors. MIT licensed. + + diff -Nru libvcflib-1.0.1+dfsg/doc/vcfrandom.md libvcflib-1.0.2+dfsg/doc/vcfrandom.md --- libvcflib-1.0.1+dfsg/doc/vcfrandom.md 1970-01-01 00:00:00.000000000 +0000 +++ libvcflib-1.0.2+dfsg/doc/vcfrandom.md 2021-01-28 07:04:12.000000000 +0000 @@ -0,0 +1,88 @@ +% VCFRANDOM(1) vcfrandom (vcflib) | vcfrandom (VCF statistics) +% Erik Garrison and vcflib contributors + +# NAME + +**vcfrandom** + +# SYNOPSIS + +**vcfrandom** + +# DESCRIPTION + +Generate a random VCF file + + + + + +# EXAMPLES + +``` + +Example: + + **vcfrandom** + +##fileformat=VCFv4.0 +##source=**vcfrandom** +##reference=/d2/data/references/build_37/human_reference_v37.fa +##phasing=none +##INFO= +##INFO= +##INFO= +##INFO= +##INFO= +##FORMAT= +##FORMAT= +##FORMAT= +#CHROM POS ID REF ALT QUAL FILTER INFO FORMAT bill +one 1 . G G,A 100 . DP=83 GT:DP 0/1:1 +one 2 . G G,A 100 . DP=3 GT:DP 0/1:49 +one 3 . G C,T 100 . DP=5 GT:DP 0/1:12 +one 4 . C G,T 100 . DP=51 GT:DP 0/1:60 +one 5 . A T,A 100 . DP=31 GT:DP 0/1:89 +one 6 . T T,A 100 . DP=56 GT:DP 0/1:60 +one 7 . T A,C 100 . DP=78 GT:DP 0/1:75 +one 8 . T G,A 100 . DP=73 GT:DP 0/1:78 +one 9 . C C,G 100 . DP=42 GT:DP 0/1:67 + + +Type: statistics + + + +``` + + + +# EXIT VALUES + +**0** +: Success + +**not 0** +: Failure + +# SEE ALSO + + + +[vcflib](./vcflib.md)(1) + + + +# OTHER + +## Source code + +[vcfrandom.cpp](https://github.com/vcflib/vcflib/blob/master/src/vcfrandom.cpp) + +# LICENSE + +Copyright 2011-2020 (C) Erik Garrison and vcflib contributors. MIT licensed. + + diff -Nru libvcflib-1.0.1+dfsg/doc/vcfrandomsample.md libvcflib-1.0.2+dfsg/doc/vcfrandomsample.md --- libvcflib-1.0.1+dfsg/doc/vcfrandomsample.md 1970-01-01 00:00:00.000000000 +0000 +++ libvcflib-1.0.2+dfsg/doc/vcfrandomsample.md 2021-01-28 07:04:12.000000000 +0000 @@ -0,0 +1,63 @@ +% VCFRANDOMSAMPLE(1) vcfrandomsample (vcflib) | vcfrandomsample (VCF statistics) +% Erik Garrison and vcflib contributors + +# NAME + +**vcfrandomsample** + +# SYNOPSIS + +**vcfrandomsample** [options] [] + +# DESCRIPTION + +Randomly sample sites from an input VCF file, which may be provided as stdin. Scale the sampling probability by the field specified in KEY. This may be used to provide uniform sampling across allele frequencies, for instance. + + + +# OPTIONS + +``` + +options: + -r, --rate RATE base sampling probability per locus + -s, --scale-by KEY scale sampling likelihood by this Float info field + -p, --random-seed N use this random seed (by default read from /dev/random) + -q, --pseudorandom-seed use a pseudorandom seed (by default read from /dev/random) + + +``` + + + + + +# EXIT VALUES + +**0** +: Success + +**not 0** +: Failure + +# SEE ALSO + + + +[vcflib](./vcflib.md)(1) + + + +# OTHER + +## Source code + +[vcfrandomsample.cpp](https://github.com/vcflib/vcflib/blob/master/src/vcfrandomsample.cpp) + +# LICENSE + +Copyright 2011-2020 (C) Erik Garrison and vcflib contributors. MIT licensed. + + diff -Nru libvcflib-1.0.1+dfsg/doc/vcfremap.md libvcflib-1.0.2+dfsg/doc/vcfremap.md --- libvcflib-1.0.1+dfsg/doc/vcfremap.md 1970-01-01 00:00:00.000000000 +0000 +++ libvcflib-1.0.2+dfsg/doc/vcfremap.md 2021-01-28 07:04:12.000000000 +0000 @@ -0,0 +1,69 @@ +% VCFREMAP(1) vcfremap (vcflib) | vcfremap (VCF transformation) +% Erik Garrison and vcflib contributors + +# NAME + +**vcfremap** + +# SYNOPSIS + +**vcfremap** [options] [] + +# DESCRIPTION + +For each alternate allele, attempt to realign against the reference with lowered gap open penalty. If realignment is possible, adjust the cigar and reference/alternate alleles. Observe how different alignment parameters, including context and entropy-dependent ones, influence variant classification and interpretation. + + + +# OPTIONS + +``` + +options: + -w, --ref-window-size N align using this many bases flanking each side of the reference allele + -s, --alt-window-size N align using this many flanking bases from the reference around each alternate allele + -r, --reference FILE FASTA reference file, required with -i and -u + -m, --match-score N match score for SW algorithm + -x, --mismatch-score N mismatch score for SW algorithm + -o, --gap-open-penalty N gap open penalty for SW algorithm + -e, --gap-extend-penalty N gap extension penalty for SW algorithm + -z, --entropy-gap-open use entropy scaling for the gap open penalty + -R, --repeat-gap-extend N penalize non-repeat-unit gaps in repeat sequence + -a, --adjust-vcf TAG supply a new cigar as TAG in the output VCF + + +``` + + + + + +# EXIT VALUES + +**0** +: Success + +**not 0** +: Failure + +# SEE ALSO + + + +[vcflib](./vcflib.md)(1) + + + +# OTHER + +## Source code + +[vcfremap.cpp](https://github.com/vcflib/vcflib/blob/master/src/vcfremap.cpp) + +# LICENSE + +Copyright 2011-2020 (C) Erik Garrison and vcflib contributors. MIT licensed. + + diff -Nru libvcflib-1.0.1+dfsg/doc/vcfremoveaberrantgenotypes.md libvcflib-1.0.2+dfsg/doc/vcfremoveaberrantgenotypes.md --- libvcflib-1.0.1+dfsg/doc/vcfremoveaberrantgenotypes.md 1970-01-01 00:00:00.000000000 +0000 +++ libvcflib-1.0.2+dfsg/doc/vcfremoveaberrantgenotypes.md 2021-01-28 07:04:12.000000000 +0000 @@ -0,0 +1,59 @@ +% VCFREMOVEABERRANTGENOTYPES(1) vcfremoveaberrantgenotypes (vcflib) | vcfremoveaberrantgenotypes (VCF transformation) +% Erik Garrison and vcflib contributors + +# NAME + +**vcfremoveaberrantgenotypes** + +# SYNOPSIS + +**vcfremoveaberrantgenotypes** + +# DESCRIPTION + +strips samples which are homozygous but have observations implying heterozygosity. Remove samples for which the reported genotype (GT) and observation counts disagree (AO, RO). + + + +# OPTIONS + +``` + + +Type: transformation + +``` + + + + + +# EXIT VALUES + +**0** +: Success + +**not 0** +: Failure + +# SEE ALSO + + + +[vcflib](./vcflib.md)(1) + + + +# OTHER + +## Source code + +[vcfremoveaberrantgenotypes.cpp](https://github.com/vcflib/vcflib/blob/master/src/vcfremoveaberrantgenotypes.cpp) + +# LICENSE + +Copyright 2011-2020 (C) Erik Garrison and vcflib contributors. MIT licensed. + + diff -Nru libvcflib-1.0.1+dfsg/doc/vcfremovesamples.md libvcflib-1.0.2+dfsg/doc/vcfremovesamples.md --- libvcflib-1.0.1+dfsg/doc/vcfremovesamples.md 1970-01-01 00:00:00.000000000 +0000 +++ libvcflib-1.0.2+dfsg/doc/vcfremovesamples.md 2021-01-28 07:04:12.000000000 +0000 @@ -0,0 +1,48 @@ +% VCFREMOVESAMPLES(1) vcfremovesamples (vcflib) | vcfremovesamples (VCF transformation) +% Erik Garrison and vcflib contributors + +# NAME + +**vcfremovesamples** + +# SYNOPSIS + +**vcfremovesamples** [SAMPLE1] [SAMPLE2] ... + +# DESCRIPTION + +outputs each record in the vcf file, removing samples listed on the command line + + + + + +# EXIT VALUES + +**0** +: Success + +**not 0** +: Failure + +# SEE ALSO + + + +[vcflib](./vcflib.md)(1) + + + +# OTHER + +## Source code + +[vcfremovesamples.cpp](https://github.com/vcflib/vcflib/blob/master/src/vcfremovesamples.cpp) + +# LICENSE + +Copyright 2011-2020 (C) Erik Garrison and vcflib contributors. MIT licensed. + + diff -Nru libvcflib-1.0.1+dfsg/doc/vcfroc.md libvcflib-1.0.2+dfsg/doc/vcfroc.md --- libvcflib-1.0.1+dfsg/doc/vcfroc.md 1970-01-01 00:00:00.000000000 +0000 +++ libvcflib-1.0.2+dfsg/doc/vcfroc.md 2021-01-28 07:04:12.000000000 +0000 @@ -0,0 +1,63 @@ +% VCFROC(1) vcfroc (vcflib) | vcfroc (VCF statistics) +% Erik Garrison and vcflib contributors + +# NAME + +**vcfroc** + +# SYNOPSIS + +**vcfroc** [options] [] + +# DESCRIPTION + +Generates a pseudo-ROC curve using sensitivity and specificity estimated against a putative truth set. Thresholding is provided by successive QUAL cutoffs. + + + +# OPTIONS + +``` + +options: + -t, --truth-vcf FILE use this VCF as ground truth for ROC generation + -w, --window-size N compare records up to this many bp away (default 30) + -c, --complex directly compare complex alleles, don't parse into primitives + -r, --reference FILE FASTA reference file + + +``` + + + + + +# EXIT VALUES + +**0** +: Success + +**not 0** +: Failure + +# SEE ALSO + + + +[vcflib](./vcflib.md)(1) + + + +# OTHER + +## Source code + +[vcfroc.cpp](https://github.com/vcflib/vcflib/blob/master/src/vcfroc.cpp) + +# LICENSE + +Copyright 2011-2020 (C) Erik Garrison and vcflib contributors. MIT licensed. + + diff -Nru libvcflib-1.0.1+dfsg/doc/vcfsample2info.md libvcflib-1.0.2+dfsg/doc/vcfsample2info.md --- libvcflib-1.0.1+dfsg/doc/vcfsample2info.md 1970-01-01 00:00:00.000000000 +0000 +++ libvcflib-1.0.2+dfsg/doc/vcfsample2info.md 2021-01-28 07:04:12.000000000 +0000 @@ -0,0 +1,65 @@ +% VCFSAMPLE2INFO(1) vcfsample2info (vcflib) | vcfsample2info (VCF transformation) +% Erik Garrison and vcflib contributors + +# NAME + +**vcfsample2info** + +# SYNOPSIS + +**vcfsample2info** [options] + +# DESCRIPTION + +Take annotations given in the per-sample fields and add the mean, median, min, or max to the site-level INFO. + + + +# OPTIONS + +``` + +options: + -f, --field Add information about this field in samples to INFO column + -i, --info Store the computed statistic in this info field + -a, --average Take the mean of samples for field (default) + -m, --median Use the median + -n, --min Use the min + -x, --max Use the max + + +``` + + + + + +# EXIT VALUES + +**0** +: Success + +**not 0** +: Failure + +# SEE ALSO + + + +[vcflib](./vcflib.md)(1) + + + +# OTHER + +## Source code + +[vcfsample2info.cpp](https://github.com/vcflib/vcflib/blob/master/src/vcfsample2info.cpp) + +# LICENSE + +Copyright 2011-2020 (C) Erik Garrison and vcflib contributors. MIT licensed. + + diff -Nru libvcflib-1.0.1+dfsg/doc/vcfsamplediff.md libvcflib-1.0.2+dfsg/doc/vcfsamplediff.md --- libvcflib-1.0.1+dfsg/doc/vcfsamplediff.md 1970-01-01 00:00:00.000000000 +0000 +++ libvcflib-1.0.2+dfsg/doc/vcfsamplediff.md 2021-01-28 07:04:12.000000000 +0000 @@ -0,0 +1,63 @@ +% VCFSAMPLEDIFF(1) vcfsamplediff (vcflib) | vcfsamplediff (VCF transformation) +% Erik Garrison and vcflib contributors + +# NAME + +**vcfsamplediff** + +# SYNOPSIS + +**vcfsamplediff** [options] [ ... ] + +# DESCRIPTION + +Establish putative somatic variants using reported differences between germline and somatic samples. Tags each record where the listed sample genotypes differ with . The first sample is assumed to be germline, the second somatic. Each record is tagged with ={germline,somatic,loh} to specify the type of variant given the genotype difference between the two samples. + + + +# OPTIONS + +``` + + +options: + -s --strict Require that no observations in the germline support the somatic alternate. + + +Type: transformation + +``` + + + + + +# EXIT VALUES + +**0** +: Success + +**not 0** +: Failure + +# SEE ALSO + + + +[vcflib](./vcflib.md)(1) + + + +# OTHER + +## Source code + +[vcfsamplediff.cpp](https://github.com/vcflib/vcflib/blob/master/src/vcfsamplediff.cpp) + +# LICENSE + +Copyright 2011-2020 (C) Erik Garrison and vcflib contributors. MIT licensed. + + diff -Nru libvcflib-1.0.1+dfsg/doc/vcfsamplenames.md libvcflib-1.0.2+dfsg/doc/vcfsamplenames.md --- libvcflib-1.0.1+dfsg/doc/vcfsamplenames.md 1970-01-01 00:00:00.000000000 +0000 +++ libvcflib-1.0.2+dfsg/doc/vcfsamplenames.md 2021-01-28 07:04:12.000000000 +0000 @@ -0,0 +1,61 @@ +% VCFSAMPLENAMES(1) vcfsamplenames (vcflib) | vcfsamplenames (VCF transformation) +% Erik Garrison and vcflib contributors + +# NAME + +**vcfsamplenames** + +# SYNOPSIS + +**vcfsamplenames** + +# DESCRIPTION + +List sample names + + + +# OPTIONS + +``` + + +Type: transformation + + + +``` + + + + + +# EXIT VALUES + +**0** +: Success + +**not 0** +: Failure + +# SEE ALSO + + + +[vcflib](./vcflib.md)(1) + + + +# OTHER + +## Source code + +[vcfsamplenames.cpp](https://github.com/vcflib/vcflib/blob/master/src/vcfsamplenames.cpp) + +# LICENSE + +Copyright 2011-2020 (C) Erik Garrison and vcflib contributors. MIT licensed. + + diff -Nru libvcflib-1.0.1+dfsg/doc/vcfsitesummarize.md libvcflib-1.0.2+dfsg/doc/vcfsitesummarize.md --- libvcflib-1.0.1+dfsg/doc/vcfsitesummarize.md 1970-01-01 00:00:00.000000000 +0000 +++ libvcflib-1.0.2+dfsg/doc/vcfsitesummarize.md 2021-01-28 07:04:12.000000000 +0000 @@ -0,0 +1,76 @@ +% VCFSITESUMMARIZE(1) vcfsitesummarize (vcflib) | vcfsitesummarize (VCF statistics) +% Erik Garrison and vcflib contributors + +# NAME + +**vcfsitesummarize** + +# SYNOPSIS + +**vcfsitesummarize** + +# DESCRIPTION + +Summarize by site + + + + + +# EXAMPLES + +``` + +Example: + +**vcfsitesummarize** samples/sample.vcf + +CHROM POS ID REF QUAL FILTER AA AC AF AN DP NS DB H2 +19 111 . A 9.6 . 0 0 +19 112 . A 10 . 0 0 +20 14370 rs6054257 G 29 PASS 0.5 14 3 1 1 +20 17330 . T 3 q10 0.017 11 3 0 0 +20 1110696 rs6040355 A 67 PASS T 10 2 1 0 +20 1230237 . T 47 PASS T 13 3 0 0 +20 1234567 microsat1 G 50 PASS G 6 9 3 0 0 +20 1235237 . T 0 . 0 0 +X 10 rsTest AC 10 PASS + + +Type: statistics + + + +``` + + + +# EXIT VALUES + +**0** +: Success + +**not 0** +: Failure + +# SEE ALSO + + + +[vcflib](./vcflib.md)(1) + + + +# OTHER + +## Source code + +[vcfsitesummarize.cpp](https://github.com/vcflib/vcflib/blob/master/src/vcfsitesummarize.cpp) + +# LICENSE + +Copyright 2011-2020 (C) Erik Garrison and vcflib contributors. MIT licensed. + + diff -Nru libvcflib-1.0.1+dfsg/doc/vcfstats.md libvcflib-1.0.2+dfsg/doc/vcfstats.md --- libvcflib-1.0.1+dfsg/doc/vcfstats.md 1970-01-01 00:00:00.000000000 +0000 +++ libvcflib-1.0.2+dfsg/doc/vcfstats.md 2021-01-28 07:04:12.000000000 +0000 @@ -0,0 +1,72 @@ +% VCFSTATS(1) vcfstats (vcflib) | vcfstats (VCF statistics) +% Erik Garrison and vcflib contributors + +# NAME + +**vcfstats** + +# SYNOPSIS + +**vcfstats** [options] + +# DESCRIPTION + +Prints statistics about variants in the input VCF file. + + + +# OPTIONS + +``` + + + -r, --region specify a region on which to target the stats, requires a BGZF + compressed file which has been indexed with tabix. any number of + regions may be specified. + -a, --add-info add the statistics intermediate information to the VCF file, + writing out VCF records instead of summary statistics + -t, --add-type only add the type= field to the info (faster than -a) + -l, --no-length-frequency don't out the indel and mnp length-frequency spectra + -m, --match-score N match score for SW algorithm + -x, --mismatch-score N mismatch score for SW algorithm + -o, --gap-open-penalty N gap open penalty for SW algorithm + -e, --gap-extend-penalty N gap extension penalty for SW algorithm + + +Type: statistics + +``` + + + + + +# EXIT VALUES + +**0** +: Success + +**not 0** +: Failure + +# SEE ALSO + + + +[vcflib](./vcflib.md)(1) + + + +# OTHER + +## Source code + +[vcfstats.cpp](https://github.com/vcflib/vcflib/blob/master/src/vcfstats.cpp) + +# LICENSE + +Copyright 2011-2020 (C) Erik Garrison and vcflib contributors. MIT licensed. + + diff -Nru libvcflib-1.0.1+dfsg/doc/vcfstreamsort.md libvcflib-1.0.2+dfsg/doc/vcfstreamsort.md --- libvcflib-1.0.1+dfsg/doc/vcfstreamsort.md 1970-01-01 00:00:00.000000000 +0000 +++ libvcflib-1.0.2+dfsg/doc/vcfstreamsort.md 2021-01-28 07:04:12.000000000 +0000 @@ -0,0 +1,64 @@ +% VCFSTREAMSORT(1) vcfstreamsort (vcflib) | vcfstreamsort (VCF transformation) +% Erik Garrison and vcflib contributors + +# NAME + +**vcfstreamsort** + +# SYNOPSIS + +**vcfstreamsort** [options] [vcf file] + +# DESCRIPTION + +Sorts the input (either stdin or file) using a streaming sort algorithm. Guarantees that the positional order is correct provided out-of-order variants are no more than 100 positions in the VCF file apart. + + + +# OPTIONS + +``` + +options: + + -h, --help this dialog + -w, --window N number of sites to sort (default 10000) + -a, --all load all sites and then sort in memory + +Type: transformation + +``` + + + + + +# EXIT VALUES + +**0** +: Success + +**not 0** +: Failure + +# SEE ALSO + + + +[vcflib](./vcflib.md)(1) + + + +# OTHER + +## Source code + +[vcfstreamsort.cpp](https://github.com/vcflib/vcflib/blob/master/src/vcfstreamsort.cpp) + +# LICENSE + +Copyright 2011-2020 (C) Erik Garrison and vcflib contributors. MIT licensed. + + diff -Nru libvcflib-1.0.1+dfsg/doc/vcfuniqalleles.md libvcflib-1.0.2+dfsg/doc/vcfuniqalleles.md --- libvcflib-1.0.1+dfsg/doc/vcfuniqalleles.md 1970-01-01 00:00:00.000000000 +0000 +++ libvcflib-1.0.2+dfsg/doc/vcfuniqalleles.md 2021-01-28 07:04:12.000000000 +0000 @@ -0,0 +1,61 @@ +% VCFUNIQALLELES(1) vcfuniqalleles (vcflib) | vcfuniqalleles (VCF filter) +% Erik Garrison and vcflib contributors + +# NAME + +**vcfuniqalleles** + +# SYNOPSIS + +**vcfuniqalleles** + +# DESCRIPTION + +List unique alleles For each record, remove any duplicate alternate alleles that may have resulted from merging separate VCF files. + + + +# OPTIONS + +``` + + +Type: filter + + + +``` + + + + + +# EXIT VALUES + +**0** +: Success + +**not 0** +: Failure + +# SEE ALSO + + + +[vcflib](./vcflib.md)(1) + + + +# OTHER + +## Source code + +[vcfuniqalleles.cpp](https://github.com/vcflib/vcflib/blob/master/src/vcfuniqalleles.cpp) + +# LICENSE + +Copyright 2011-2020 (C) Erik Garrison and vcflib contributors. MIT licensed. + + diff -Nru libvcflib-1.0.1+dfsg/doc/vcfuniq.md libvcflib-1.0.2+dfsg/doc/vcfuniq.md --- libvcflib-1.0.1+dfsg/doc/vcfuniq.md 1970-01-01 00:00:00.000000000 +0000 +++ libvcflib-1.0.2+dfsg/doc/vcfuniq.md 2021-01-28 07:04:12.000000000 +0000 @@ -0,0 +1,61 @@ +% VCFUNIQ(1) vcfuniq (vcflib) | vcfuniq (VCF filter) +% Erik Garrison and vcflib contributors + +# NAME + +**vcfuniq** + +# SYNOPSIS + +**vcfuniq** + +# DESCRIPTION + +List unique genotypes. Like GNU uniq, but for VCF records. Remove records which have the same positon, ref, and alt as the previous record. + + + +# OPTIONS + +``` + + +Type: filter + + + +``` + + + + + +# EXIT VALUES + +**0** +: Success + +**not 0** +: Failure + +# SEE ALSO + + + +[vcflib](./vcflib.md)(1) + + + +# OTHER + +## Source code + +[vcfuniq.cpp](https://github.com/vcflib/vcflib/blob/master/src/vcfuniq.cpp) + +# LICENSE + +Copyright 2011-2020 (C) Erik Garrison and vcflib contributors. MIT licensed. + + diff -Nru libvcflib-1.0.1+dfsg/doc/wcFst.md libvcflib-1.0.2+dfsg/doc/wcFst.md --- libvcflib-1.0.1+dfsg/doc/wcFst.md 1970-01-01 00:00:00.000000000 +0000 +++ libvcflib-1.0.2+dfsg/doc/wcFst.md 2021-01-28 07:04:12.000000000 +0000 @@ -0,0 +1,74 @@ +% WCFST(1) wcFst (vcflib) | wcFst (VCF statistics) +% Erik Garrison and vcflib contributors + +# NAME + +**wcFst** + +# SYNOPSIS + +**wcFst** --target 0,1,2,3,4,5,6,7 --background 11,12,13,16,17,19,22 --file my.vcf --deltaaf 0.1 --type PL + +# DESCRIPTION + +**wcFst** is Weir & Cockerham's Fst for two populations. Negative values are VALID, they are sites which can be treated as zero Fst. For more information see Evolution, Vol. 38 N. 6 Nov 1984. Specifically **wcFst** uses equations 1,2,3,4. + + + +# OPTIONS + +``` + + +Output : 3 columns : + 1. seqid + 2. position + 3. target allele frequency + 4. background allele frequency + 5. **wcFst** + +required: t,target -- argument: a zero based comma separated list of target individuals corrisponding to VCF columns +required: b,background -- argument: a zero based comma separated list of background individuals corrisponding to VCF columns +required: f,file -- argument: proper formatted VCF +required, y,type -- argument: genotype likelihood format; genotype : GT,GL,PL,GP +optional: r,region -- argument: a tabix compliant genomic range: seqid or seqid:start-end +optional: d,deltaaf -- argument: skip sites where the difference in allele frequencies is less than deltaaf, default is zero + +Type: statistics + + +``` + + + + + +# EXIT VALUES + +**0** +: Success + +**not 0** +: Failure + +# SEE ALSO + + + +[vcflib](./vcflib.md)(1) + + + +# OTHER + +## Source code + +[wcFst.cpp](https://github.com/vcflib/vcflib/blob/master/src/wcFst.cpp) + +# LICENSE + +Copyright 2011-2020 (C) Erik Garrison and vcflib contributors. MIT licensed. + + diff -Nru libvcflib-1.0.1+dfsg/.github/ISSUE_TEMPLATE/bug_report.md libvcflib-1.0.2+dfsg/.github/ISSUE_TEMPLATE/bug_report.md --- libvcflib-1.0.1+dfsg/.github/ISSUE_TEMPLATE/bug_report.md 1970-01-01 00:00:00.000000000 +0000 +++ libvcflib-1.0.2+dfsg/.github/ISSUE_TEMPLATE/bug_report.md 2021-01-28 07:04:12.000000000 +0000 @@ -0,0 +1,44 @@ +--- +name: Bug report 🐞 +about: Create a report to help us improve +title: '' +labels: bug +assignees: '' +--- +**Only bug reports!** + +The C++ version of VCFLIB is in *maintenance* mode. Use the github issue +tracker to report bugs *only*. For comments, questions and features, +please use the google group mailing list as stated on the +[README](https://github.com/vcflib/vcflib/blob/master/README.md)! + +**Describe the bug** + +A clear and concise description of what the bug is. + +**To Reproduce** + +Include all steps to reproduce the behavior and paste any complete +errors from the terminal. + +**Expected behavior** + +A clear and concise description of what you expected to happen. + +**Screenshots** + +If applicable, add screenshots to help explain your problem. + +**Additional context** + +Add any other context about the problem here. + +Include a set of VCF files to reproduce the issue + ++ bonus points if you try to minimize the test case yourself, as issues are often localized: + - try to use sambamba or samtools slice to first extract the reference where the error occurs + - if that succeeds (the error is still reproducible), continue to crop the file in binary-search fashion + +**Finally** + +Please check the README and docs carefully. Everyone working on vcflib is doing that for free. Please respect our time (too). diff -Nru libvcflib-1.0.1+dfsg/.github/workflows/ci_test.yml libvcflib-1.0.2+dfsg/.github/workflows/ci_test.yml --- libvcflib-1.0.1+dfsg/.github/workflows/ci_test.yml 1970-01-01 00:00:00.000000000 +0000 +++ libvcflib-1.0.2+dfsg/.github/workflows/ci_test.yml 2021-01-28 07:04:12.000000000 +0000 @@ -0,0 +1,30 @@ +name: CI +on: [push, pull_request] +jobs: + arch: + name: vcflib-CI + runs-on: ubuntu-latest + strategy: + matrix: + os: [ubuntu-latest] + python-version: [3.8] + steps: + - name: Install dependencies + run: sudo apt-get install cmake libhts-dev libtabixpp-dev libtabixpp0 + - uses: actions/checkout@v2 + - uses: actions/setup-python@v1 + with: + python-version: ${{ matrix.python-version }} + - name: Checkout submodules + run: git submodule update --init --recursive + - name: Configure + run: | + mkdir -p build + cd build + cmake .. + - name: Compile + run: cmake --build build/ --verbose + - name: Run tests + run: | + cd build + ctest --verbose diff -Nru libvcflib-1.0.1+dfsg/.github/workflows/issue-tracker-bot.yaml libvcflib-1.0.2+dfsg/.github/workflows/issue-tracker-bot.yaml --- libvcflib-1.0.1+dfsg/.github/workflows/issue-tracker-bot.yaml 1970-01-01 00:00:00.000000000 +0000 +++ libvcflib-1.0.2+dfsg/.github/workflows/issue-tracker-bot.yaml 2021-01-28 07:04:12.000000000 +0000 @@ -0,0 +1,19 @@ +name: "Close stale issues" +on: + schedule: + - cron: "33 3 * * *" + +jobs: + stale: + runs-on: ubuntu-latest + steps: + - uses: actions/stale@v3 + with: + repo-token: ${{ secrets.GITHUB_TOKEN }} + stale-issue-message: 'This issue is marked stale because it has been open 120 days with no activity. Remove stale label or comment or this will be closed in 5 days' + close-issue-message: 'This issue was closed for lack of activity. Feel free to re-open if someone feels like working on it.' + days-before-stale: 120 + days-before-close: 5 + exempt-issue-labels: "bug,build,enhancement,help wanted,in progress" + debug-only: false + operations-per-run: 100 diff -Nru libvcflib-1.0.1+dfsg/.github/workflows/support-tracker-bot.yaml libvcflib-1.0.2+dfsg/.github/workflows/support-tracker-bot.yaml --- libvcflib-1.0.1+dfsg/.github/workflows/support-tracker-bot.yaml 1970-01-01 00:00:00.000000000 +0000 +++ libvcflib-1.0.2+dfsg/.github/workflows/support-tracker-bot.yaml 2021-01-28 07:04:12.000000000 +0000 @@ -0,0 +1,20 @@ +name: "Close support issues" +on: + schedule: + - cron: "0 1 * * *" + +jobs: + stale: + runs-on: ubuntu-latest + steps: + - uses: actions/stale@v3 + with: + repo-token: ${{ secrets.GITHUB_TOKEN }} + stale-issue-message: 'This issue is marked stale because it has been open 30 days with no activity. Remove stale label or comment or this will be closed in 5 days' + close-issue-message: 'This issue was closed for lack of activity. Feel free to re-open if someone feels like working on it.' + days-before-stale: 30 + days-before-close: 5 + only-labels: "please file bugs only - try biostars.org instead" + exempt-issue-labels: "build,enhancement,help wanted,in progress" + debug-only: false + operations-per-run: 100 diff -Nru libvcflib-1.0.1+dfsg/.gitignore libvcflib-1.0.2+dfsg/.gitignore --- libvcflib-1.0.1+dfsg/.gitignore 2019-10-01 07:06:01.000000000 +0000 +++ libvcflib-1.0.2+dfsg/.gitignore 2021-01-28 07:04:12.000000000 +0000 @@ -3,6 +3,8 @@ .*swp .nfs* *.o +*.cmake +tmp BedReader.cpp Fasta.cpp Fasta.h @@ -33,7 +35,6 @@ test.vcf test.vcf.gz test.vcf.gz.tbi -test/ vcf2tsv vcfaddinfo vcfaddtag.cpp @@ -120,3 +121,10 @@ bin/ obj/ include/ +build/ +*.aux +*.log +*.synctex.gz +test/tests/main +tabixpp/htslib +contrib/htslib diff -Nru libvcflib-1.0.1+dfsg/.gitmodules libvcflib-1.0.2+dfsg/.gitmodules --- libvcflib-1.0.1+dfsg/.gitmodules 2019-10-01 07:06:01.000000000 +0000 +++ libvcflib-1.0.2+dfsg/.gitmodules 2021-01-28 07:04:12.000000000 +0000 @@ -1,6 +1,3 @@ -[submodule "tabixpp"] - path = tabixpp - url = https://github.com/ekg/tabixpp.git [submodule "smithwaterman"] path = smithwaterman url = https://github.com/ekg/smithwaterman.git @@ -25,3 +22,6 @@ [submodule "libVCFH"] path = libVCFH url = https://github.com/edawson/libVCFH.git +[submodule "src/simde"] + path = src/simde + url = https://github.com/simd-everywhere/simde-no-tests.git diff -Nru libvcflib-1.0.1+dfsg/guix.scm libvcflib-1.0.2+dfsg/guix.scm --- libvcflib-1.0.1+dfsg/guix.scm 1970-01-01 00:00:00.000000000 +0000 +++ libvcflib-1.0.2+dfsg/guix.scm 2021-01-28 07:04:12.000000000 +0000 @@ -0,0 +1,68 @@ +;; To use this file to build HEAD of freebayes: +;; +;; guix build -f guix.scm +;; +;; To get a development container (emacs shell will work) +;; +;; guix environment -C -l guix.scm + +(use-modules + ((guix licenses) #:prefix license:) + (guix gexp) + (guix packages) + (guix git-download) + (guix build-system cmake) + (gnu packages algebra) + (gnu packages base) + (gnu packages compression) + (gnu packages bioinformatics) + (gnu packages build-tools) + (gnu packages curl) + (gnu packages haskell-xyz) ; pandoc + (gnu packages llvm) + (gnu packages python) + ;; (gnu packages ninja) + (gnu packages parallel) + (gnu packages perl) + (gnu packages perl6) + (gnu packages pkg-config) + (gnu packages ruby) + (srfi srfi-1) + (ice-9 popen) + (ice-9 rdelim)) + +(define %source-dir (dirname (current-filename))) + +(define %git-commit + (read-string (open-pipe "git show HEAD | head -1 | cut -d ' ' -f 2" OPEN_READ))) + +(define-public vcflib-git + (package + (name "vcflib-git") + (version (git-version "1.0.2" "HEAD" %git-commit)) + (source (local-file %source-dir #:recursive? #t)) + (build-system cmake-build-system) + (inputs + `(("curl" ,curl) + ("fastahack" ,fastahack) + ("htslib" ,htslib) + ("pandoc" ,pandoc) ;; for generation man pages + ("perl" ,perl) + ("python" ,python) + ("ruby" ,ruby) ;; for generating man pages + ("smithwaterman" ,smithwaterman) + ("tabixpp" ,tabixpp) + ("xz" ,xz) + ("zlib" ,zlib))) + (native-inputs + `(("pkg-config" ,pkg-config))) + (home-page "https://github.com/vcflib/vcflib/") + (synopsis "Library for parsing and manipulating VCF files") + (description "Vcflib provides methods to manipulate and interpret +sequence variation as it can be described by VCF. It is both an API for parsing +and operating on records of genomic variation as it can be described by the VCF +format, and a collection of command-line utilities for executing complex +manipulations on VCF files.") + (license license:expat))) + +vcflib-git diff -Nru libvcflib-1.0.1+dfsg/LICENSE libvcflib-1.0.2+dfsg/LICENSE --- libvcflib-1.0.1+dfsg/LICENSE 2019-10-01 07:06:01.000000000 +0000 +++ libvcflib-1.0.2+dfsg/LICENSE 2021-01-28 07:04:12.000000000 +0000 @@ -1,4 +1,4 @@ -Copyright (c) 2012 Erik Garrison +Copyright (c) 2012-2020 Erik Garrison Permission is hereby granted, free of charge, to any person obtaining a copy of this software and associated documentation files (the "Software"), to deal diff -Nru libvcflib-1.0.1+dfsg/Makefile libvcflib-1.0.2+dfsg/Makefile --- libvcflib-1.0.1+dfsg/Makefile 2019-10-01 07:06:01.000000000 +0000 +++ libvcflib-1.0.2+dfsg/Makefile 2021-01-28 07:04:12.000000000 +0000 @@ -1,247 +1,39 @@ -#OBJ_DIR = ./ -HEADERS = src/Variant.h \ - src/split.h \ - src/pdflib.hpp \ - src/var.hpp \ - src/cdflib.hpp \ - src/rnglib.hpp \ - src/join.h -SOURCES = src/Variant.cpp \ - src/rnglib.cpp \ - src/var.cpp \ - src/pdflib.cpp \ - src/cdflib.cpp \ - src/split.cpp -OBJECTS= $(SOURCES:.cpp=.o) +# Convenience Makefile invokes cmake. See also the README VCF_LIB_LOCAL:=$(shell pwd) -BIN_DIR:=bin -LIB_DIR:=lib -SRC_DIR=src -INC_DIR:=include -OBJ_DIR:=obj - -RM = rm -f -CP = cp -f - -# TODO -#vcfstats.cpp - -BIN_SOURCES = src/vcfecho.cpp \ - src/vcfnormalizesvs.cpp \ - src/dumpContigsFromHeader.cpp \ - src/bFst.cpp \ - src/pVst.cpp \ - src/hapLrt.cpp \ - src/popStats.cpp \ - src/wcFst.cpp \ - src/iHS.cpp \ - src/segmentFst.cpp \ - src/segmentIhs.cpp \ - src/genotypeSummary.cpp \ - src/sequenceDiversity.cpp \ - src/pFst.cpp \ - src/smoother.cpp \ - src/vcfld.cpp \ - src/plotHaps.cpp \ - src/abba-baba.cpp \ - src/permuteGPAT++.cpp \ - src/permuteSmooth.cpp \ - src/normalize-iHS.cpp \ - src/meltEHH.cpp \ - src/vcfaltcount.cpp \ - src/vcfhetcount.cpp \ - src/vcfhethomratio.cpp \ - src/vcffilter.cpp \ - src/vcf2tsv.cpp \ - src/vcfgenotypes.cpp \ - src/vcfannotategenotypes.cpp \ - src/vcfcommonsamples.cpp \ - src/vcfremovesamples.cpp \ - src/vcfkeepsamples.cpp \ - src/vcfsamplenames.cpp \ - src/vcfgenotypecompare.cpp \ - src/vcffixup.cpp \ - src/vcfclassify.cpp \ - src/vcfsamplediff.cpp \ - src/vcfremoveaberrantgenotypes.cpp \ - src/vcfrandom.cpp \ - src/vcfparsealts.cpp \ - src/vcfstats.cpp \ - src/vcfflatten.cpp \ - src/vcfprimers.cpp \ - src/vcfnumalt.cpp \ - src/vcfcleancomplex.cpp \ - src/vcfintersect.cpp \ - src/vcfannotate.cpp \ - src/vcfallelicprimitives.cpp \ - src/vcfoverlay.cpp \ - src/vcfaddinfo.cpp \ - src/vcfkeepinfo.cpp \ - src/vcfkeepgeno.cpp \ - src/vcfafpath.cpp \ - src/vcfcountalleles.cpp \ - src/vcflength.cpp \ - src/vcfdistance.cpp \ - src/vcfrandomsample.cpp \ - src/vcfentropy.cpp \ - src/vcfglxgt.cpp \ - src/vcfroc.cpp \ - src/vcfcheck.cpp \ - src/vcfstreamsort.cpp \ - src/vcfuniq.cpp \ - src/vcfuniqalleles.cpp \ - src/vcfremap.cpp \ - src/vcf2fasta.cpp \ - src/vcfsitesummarize.cpp \ - src/vcfbreakmulti.cpp \ - src/vcfcreatemulti.cpp \ - src/vcfevenregions.cpp \ - src/vcfcat.cpp \ - src/vcfgenosummarize.cpp \ - src/vcfgenosamplenames.cpp \ - src/vcfgeno2haplo.cpp \ - src/vcfleftalign.cpp \ - src/vcfcombine.cpp \ - src/vcfgeno2alleles.cpp \ - src/vcfindex.cpp \ - src/vcf2dag.cpp \ - src/vcfsample2info.cpp \ - src/vcfqual2info.cpp \ - src/vcfinfo2qual.cpp \ - src/vcfglbound.cpp \ - src/vcfunphase.cpp \ - src/vcfnull2ref.cpp \ - src/vcfinfosummarize.cpp - -# when we can figure out how to build on mac -# src/vcfsom.cpp - -#BINS = $(BIN_SOURCES:.cpp=) -BINS = $(addprefix $(BIN_DIR)/,$(notdir $(BIN_SOURCES:.cpp=))) -SHORTBINS = $(notdir $(BIN_SOURCES:.cpp=)) - -TABIX = tabixpp/tabix.o -FASTAHACK = fastahack/Fasta.o -SMITHWATERMAN = smithwaterman/SmithWatermanGotoh.o -REPEATS = smithwaterman/Repeats.o -INDELALLELE = smithwaterman/IndelAllele.o -DISORDER = smithwaterman/disorder.o -LEFTALIGN = smithwaterman/LeftAlign.o -FSOM = fsom/fsom.o -FILEVERCMP = filevercmp/filevercmp.o - -# Work out how to find htslib -# Use the one we ship in tabixpp unless told otherwise by the environment -HTS_LIB ?= $(VCF_LIB_LOCAL)/tabixpp/htslib/libhts.a -HTS_INCLUDES ?= -I$(VCF_LIB_LOCAL)/tabixpp/htslib -HTS_LDFLAGS ?= -L$(VCF_LIB_LOCAL)/tabixpp/htslib -lhts -lbz2 -lm -lz -llzma -pthread - - -INCLUDES = $(HTS_INCLUDES) -I$(INC_DIR) -LDFLAGS = -L$(LIB_DIR) -lvcflib $(HTS_LDFLAGS) -lpthread -lz -lm -llzma -lbz2 - - - -all: $(OBJECTS) $(BINS) scriptToBin - -scriptToBin: $(BINS) - $(CP) scripts/* $(BIN_DIR) - -GIT_VERSION += $(shell git describe --abbrev=4 --dirty --always) - -CXXFLAGS = -Ofast -D_FILE_OFFSET_BITS=64 -std=c++0x -#CXXFLAGS = -O2 -#CXXFLAGS = -pedantic -Wall -Wshadow -Wpointer-arith -Wcast-qual - -SSW = src/ssw.o src/ssw_cpp.o - -ssw.o: src/ssw.hpp -ssw_cpp.o:src/ssw_cpp.hpp +BUILD_DIR:=$(VCF_LIB_LOCAL)/build +BIN_DIR:=$(VCF_LIB_LOCAL)/bin +LIB_DIR:=$(VCF_LIB_LOCAL)/lib +INC_DIR:=$(VCF_LIB_LOCAL)/include +CMAKE_FLAGS?= + +all: + if [ ! -d $(BUILD_DIR) ]; then mkdir -p $(BUILD_DIR); fi + cd $(BUILD_DIR); \ + cmake $(CMAKE_FLAGS) -DCMAKE_INSTALL_PREFIX=$(VCF_LIB_LOCAL) $(VCF_LIB_LOCAL); \ + $(MAKE) && $(MAKE) install openmp: - $(MAKE) CXXFLAGS="$(CXXFLAGS) -fopenmp -D HAS_OPENMP" + CMAKE_FLAGS=-DOPENMP=ON $(MAKE) all profiling: - $(MAKE) CXXFLAGS="$(CXXFLAGS) -g" all + CMAKE_FLAGS=-DPROFILING=ON $(MAKE) all gprof: - $(MAKE) CXXFLAGS="$(CXXFLAGS) -pg" all - -$(OBJECTS): $(SOURCES) $(HEADERS) $(TABIX) multichoose pre $(SMITHWATERMAN) $(FILEVERCMP) $(FASTAHACK) - $(CXX) -c -o $@ src/$(*F).cpp $(INCLUDES) $(LDFLAGS) $(CXXFLAGS) && $(CP) src/*.h* $(VCF_LIB_LOCAL)/$(INC_DIR)/ - -multichoose: pre - cd multichoose && $(MAKE) && $(CP) *.h* $(VCF_LIB_LOCAL)/$(INC_DIR)/ - -intervaltree: pre - cd intervaltree && $(MAKE) && $(CP) *.h* $(VCF_LIB_LOCAL)/$(INC_DIR)/ - -$(TABIX): pre - cd tabixpp && INCLUDES="$(HTS_INCLUDES)" LIBPATH="-L. $(HTS_LDFLAGS)" HTSLIB="$(HTS_LIB)" $(MAKE) && $(CP) *.h* $(VCF_LIB_LOCAL)/$(INC_DIR)/ - -$(SMITHWATERMAN): pre - cd smithwaterman && $(MAKE) && $(CP) *.h* $(VCF_LIB_LOCAL)/$(INC_DIR)/ && $(CP) *.o $(VCF_LIB_LOCAL)/$(OBJ_DIR)/ - -$(DISORDER): $(SMITHWATERMAN) - -$(REPEATS): $(SMITHWATERMAN) - -$(LEFTALIGN): $(SMITHWATERMAN) - -$(INDELALLELE): $(SMITHWATERMAN) - -$(FASTAHACK): pre - cd fastahack && $(MAKE) && $(CP) *.h* $(VCF_LIB_LOCAL)/$(INC_DIR)/ && $(CP) Fasta.o $(VCF_LIB_LOCAL)/$(OBJ_DIR)/ - -#$(FSOM): -# cd fsom && $(CXX) $(CXXFLAGS) -c fsom.c -lm - -$(FILEVERCMP): pre - cd filevercmp && make && $(CP) *.h* $(VCF_LIB_LOCAL)/$(INC_DIR)/ && $(CP) *.o $(VCF_LIB_LOCAL)/$(INC_DIR)/ - -$(SHORTBINS): pre - $(MAKE) $(BIN_DIR)/$@ - -$(BINS): $(BIN_SOURCES) libvcflib.a $(OBJECTS) $(SMITHWATERMAN) $(FASTAHACK) $(DISORDER) $(LEFTALIGN) $(INDELALLELE) $(SSW) $(FILEVERCMP) pre intervaltree - $(CXX) src/$(notdir $@).cpp -o $@ $(INCLUDES) $(LDFLAGS) $(CXXFLAGS) -DVERSION=\"$(GIT_VERSION)\" - -libvcflib.a: $(OBJECTS) $(SMITHWATERMAN) $(REPEATS) $(FASTAHACK) $(DISORDER) $(LEFTALIGN) $(INDELALLELE) $(SSW) $(FILEVERCMP) $(TABIX) pre - ar rs libvcflib.a $(OBJECTS) smithwaterman/sw.o $(FASTAHACK) $(SSW) $(FILEVERCMP) $(TABIX) - $(CP) libvcflib.a $(LIB_DIR) - + CMAKE_FLAGS=-DGPROF=ON $(MAKE) all test: $(BINS) @prove -Itests/lib -w tests/*.t -pre: - if [ ! -d $(BIN_DIR) ]; then mkdir -p $(BIN_DIR); fi - if [ ! -d $(LIB_DIR) ]; then mkdir -p $(LIB_DIR); fi - if [ ! -d $(INC_DIR) ]; then mkdir -p $(INC_DIR); fi - if [ ! -d $(OBJ_DIR) ]; then mkdir -p $(OBJ_DIR); fi - - pull: git pull update: pull all clean: - $(RM) $(BINS) $(OBJECTS) - $(RM) ssw_cpp.o ssw.o - $(RM) libvcflib.a - $(RM) -r $(BIN_DIR) - $(RM) -r $(LIB_DIR) - $(RM) -r $(INC_DIR) - $(RM) -r $(OBJ_DIR) - $(MAKE) clean -C tabixpp - $(MAKE) clean -C smithwaterman - $(MAKE) clean -C fastahack - $(MAKE) clean -C multichoose - $(MAKE) clean -C fsom - $(MAKE) clean -C libVCFH - $(MAKE) clean -C test - $(MAKE) clean -C filevercmp - $(MAKE) clean -C intervaltree - + rm -f $(BINS) $(OBJECTS) + rm -rf $(BIN_DIR) + rm -rf $(LIB_DIR) + rm -rf $(INC_DIR) + .PHONY: clean all test pre diff -Nru libvcflib-1.0.1+dfsg/man/abba-baba.1 libvcflib-1.0.2+dfsg/man/abba-baba.1 --- libvcflib-1.0.1+dfsg/man/abba-baba.1 1970-01-01 00:00:00.000000000 +0000 +++ libvcflib-1.0.2+dfsg/man/abba-baba.1 2021-01-28 07:04:12.000000000 +0000 @@ -0,0 +1,64 @@ +.\" Automatically generated by Pandoc 2.7.3 +.\" +.TH "ABBA-BABA" "1" "" "abba-baba (vcflib)" "abba-baba (VCF genotype)" +.hy +.SH NAME +.PP +\f[B]abba-baba\f[R] +.SH SYNOPSIS +.PP +\f[B]abba-baba\f[R] \[en]tree 0,1,2,3 \[en]file my.vcf \[en]type PL +.SH DESCRIPTION +.PP +\f[B]abba-baba\f[R] calculates the tree pattern for four indviduals. +This tool assumes reference is ancestral and ignores non +\f[B]abba-baba\f[R] sites. +The output is a boolian value: 1 = true , 0 = false for abba and baba. +the tree argument should be specified from the most basal taxa to the +most derived. +.SH OPTIONS +.IP +.nf +\f[C] + + + Example: + D C B A + \[rs] / / / + \[rs] / / + \[rs] / + \[rs] / + / + / + --tree A,B,C,D + +Output : 4 columns : + 1. seqid + 2. position + 3. abba + 4. baba +required: t,tree -- a zero based comma separated list of target individuals corrisponding to VCF columns +required: f,file -- a properly formatted VCF. +required: y,type -- genotype likelihood format ; genotypes: GP,GL or PL; + + +type: genotype +\f[R] +.fi +.SH EXIT VALUES +.TP +.B \f[B]0\f[R] +Success +.TP +.B \f[B]not 0\f[R] +Failure +.SH SEE ALSO +.PP +\f[B]vcflib\f[R](1) +.SH OTHER +.SH LICENSE +.PP +Copyright 2011-2020 (C) Erik Garrison and vcflib contributors. +MIT licensed. +.SH AUTHORS +Erik Garrison and vcflib contributors. diff -Nru libvcflib-1.0.1+dfsg/man/bFst.1 libvcflib-1.0.2+dfsg/man/bFst.1 --- libvcflib-1.0.1+dfsg/man/bFst.1 1970-01-01 00:00:00.000000000 +0000 +++ libvcflib-1.0.2+dfsg/man/bFst.1 2021-01-28 07:04:12.000000000 +0000 @@ -0,0 +1,70 @@ +.\" Automatically generated by Pandoc 2.7.3 +.\" +.TH "BFST" "1" "" "bFst (vcflib)" "bFst (VCF statistics)" +.hy +.SH NAME +.PP +\f[B]bFst\f[R] +.SH SYNOPSIS +.PP +\f[B]bFst\f[R] \[en]target 0,1,2,3,4,5,6,7 \[en]background +11,12,13,16,17,19,22 \[en]file my.vcf \[en]deltaaf 0.1 +.SH DESCRIPTION +.PP +\f[B]bFst\f[R] is a Bayesian approach to Fst. +Importantly \f[B]bFst\f[R] accounts for genotype uncertainty in the +model using genotype likelihoods. +For a more detailed description see: \[ga]A Bayesian approach to +inferring population structure from dominant markers\[cq] by Holsinger +et al.\ Molecular Ecology Vol 11, issue 7 2002. +The likelihood function has been modified to use genotype likelihoods +provided by variant callers. +There are five free parameters estimated in the model: each +subpopulation\[cq]s allele frequency and Fis (fixation index, within +each subpopulation), a free parameter for the total population\[cq]s +allele frequency, and Fst. +.SH OPTIONS +.IP +.nf +\f[C] + + +Output : 11 columns : + 1. Seqid + 2. Position + 3. Observed allele frequency in target. + 4. Estimated allele frequency in target. + 5. Observed allele frequency in background. + 6. Estimated allele frequency in background. + 7. Observed allele frequency combined. + 8. Estimated allele frequency in combined. + 9. ML estimate of Fst (mean) + 10. Lower bound of the 95% credible interval + 11. Upper bound of the 95% credible interval + +required: t,target -- a zero bases comma separated list of target individuals corrisponding to VCF columns +required: b,background -- a zero bases comma separated list of background individuals corrisponding to VCF columns +required: f,file a -- a proper formatted VCF file. the FORMAT field MUST contain \[dq]PL\[dq] +required: d,deltaaf -- skip sites were the difference in allele frequency is less than deltaaf + + +Type: statistics +\f[R] +.fi +.SH EXIT VALUES +.TP +.B \f[B]0\f[R] +Success +.TP +.B \f[B]not 0\f[R] +Failure +.SH SEE ALSO +.PP +\f[B]vcflib\f[R](1) +.SH OTHER +.SH LICENSE +.PP +Copyright 2011-2020 (C) Erik Garrison and vcflib contributors. +MIT licensed. +.SH AUTHORS +Erik Garrison and vcflib contributors. diff -Nru libvcflib-1.0.1+dfsg/man/dumpContigsFromHeader.1 libvcflib-1.0.2+dfsg/man/dumpContigsFromHeader.1 --- libvcflib-1.0.1+dfsg/man/dumpContigsFromHeader.1 1970-01-01 00:00:00.000000000 +0000 +++ libvcflib-1.0.2+dfsg/man/dumpContigsFromHeader.1 2021-01-28 07:04:12.000000000 +0000 @@ -0,0 +1,53 @@ +.\" Automatically generated by Pandoc 2.7.3 +.\" +.TH "DUMPCONTIGSFROMHEADER" "1" "" "dumpContigsFromHeader (vcflib)" "dumpContigsFromHeader (VCF transformation)" +.hy +.SH NAME +.PP +\f[B]dumpContigsFromHeader\f[R] +.SH SYNOPSIS +.PP +\f[B]dumpContigsFromHeader\f[R] file +.SH DESCRIPTION +.PP +Dump contigs from header +.SH EXAMPLES +.IP +.nf +\f[C] + +Example: + + **dumpContigsFromHeader** samples/scaffold612.vcf + + ##contig= + ##contig= + (...) + + output + + scaffold4 1524 + scaffold12 56895 + (...) + +Type: transformation + +\f[R] +.fi +.SH EXIT VALUES +.TP +.B \f[B]0\f[R] +Success +.TP +.B \f[B]not 0\f[R] +Failure +.SH SEE ALSO +.PP +\f[B]vcflib\f[R](1) +.SH OTHER +.SH LICENSE +.PP +Copyright 2011-2020 (C) Erik Garrison and vcflib contributors. +MIT licensed. +.SH AUTHORS +Erik Garrison and vcflib contributors. diff -Nru libvcflib-1.0.1+dfsg/man/genotypeSummary.1 libvcflib-1.0.2+dfsg/man/genotypeSummary.1 --- libvcflib-1.0.1+dfsg/man/genotypeSummary.1 1970-01-01 00:00:00.000000000 +0000 +++ libvcflib-1.0.2+dfsg/man/genotypeSummary.1 2021-01-28 07:04:12.000000000 +0000 @@ -0,0 +1,51 @@ +.\" Automatically generated by Pandoc 2.7.3 +.\" +.TH "GENOTYPESUMMARY" "1" "" "genotypeSummary (vcflib)" "genotypeSummary (VCF +statistics)" +.hy +.SH NAME +.PP +\f[B]genotypeSummary\f[R] +.SH SYNOPSIS +.PP +genotypeSummmary \[en]type PL \[en]target 0,1,2,3,4,5,6,7 \[en]file +my.vcf \[en]snp +.SH DESCRIPTION +.PP +Generates a table of genotype counts. +Summarizes genotype counts for bi-allelic SNVs and indel +.SH OPTIONS +.IP +.nf +\f[C] + + +output: table of genotype counts for each individual. +required: t,target -- a zero based comma separated list of target individuals corresponding to VCF columns +required: f,file -- proper formatted VCF +required, y,type -- genotype likelihood format; genotype : GL,PL,GP +optional, r,region -- a tabix compliant region : chr1:1-1000 or chr1 +optional, s,snp -- Only count SNPs +optional, a,ancestral -- describe counts relative to the ancestral allele defined as AA in INFO + +Type: statistics + +\f[R] +.fi +.SH EXIT VALUES +.TP +.B \f[B]0\f[R] +Success +.TP +.B \f[B]not 0\f[R] +Failure +.SH SEE ALSO +.PP +\f[B]vcflib\f[R](1) +.SH OTHER +.SH LICENSE +.PP +Copyright 2011-2020 (C) Erik Garrison and vcflib contributors. +MIT licensed. +.SH AUTHORS +Erik Garrison and vcflib contributors. diff -Nru libvcflib-1.0.1+dfsg/man/hapLrt.1 libvcflib-1.0.2+dfsg/man/hapLrt.1 --- libvcflib-1.0.1+dfsg/man/hapLrt.1 1970-01-01 00:00:00.000000000 +0000 +++ libvcflib-1.0.2+dfsg/man/hapLrt.1 2021-01-28 07:04:12.000000000 +0000 @@ -0,0 +1,58 @@ +.\" Automatically generated by Pandoc 2.7.3 +.\" +.TH "HAPLRT" "1" "" "hapLrt (vcflib)" "hapLrt (VCF genotype)" +.hy +.SH NAME +.PP +\f[B]hapLrt\f[R] +.SH SYNOPSIS +.PP +hapLRT \[en]target 0,1,2,3,4,5,6,7 \[en]background 11,12,13,16,17,19,22 +\[en]type GP \[en]file my.vcf +.SH DESCRIPTION +.PP +HapLRT is a likelihood ratio test for haplotype lengths. +The lengths are modeled with an exponential distribution. +The sign denotes if the target has longer haplotypes (1) or the +background (-1). +.SH OPTIONS +.IP +.nf +\f[C] + + +Output : 4 columns : + 1. seqid + 2. position + 3. mean target haplotype length + 4. mean background haplotype length + 5. p-value from LRT + 6. sign + +required: t,target -- argument: a zero base comma separated list of target individuals corrisponding to VCF columns +required: b,background -- argument: a zero base comma separated list of background individuals corrisponding to VCF columns +required: f,file -- argument: a properly formatted phased VCF file +required: y,type -- argument: type of genotype likelihood: PL, GL or GP +optional: r,region -- argument: a genomice range to calculate **hapLrt** on in the format : \[dq]seqid:start-end\[dq] or \[dq]seqid\[dq] + + +Type: genotype +\f[R] +.fi +.SH EXIT VALUES +.TP +.B \f[B]0\f[R] +Success +.TP +.B \f[B]not 0\f[R] +Failure +.SH SEE ALSO +.PP +\f[B]vcflib\f[R](1) +.SH OTHER +.SH LICENSE +.PP +Copyright 2011-2020 (C) Erik Garrison and vcflib contributors. +MIT licensed. +.SH AUTHORS +Erik Garrison and vcflib contributors. diff -Nru libvcflib-1.0.1+dfsg/man/iHS.1 libvcflib-1.0.2+dfsg/man/iHS.1 --- libvcflib-1.0.1+dfsg/man/iHS.1 1970-01-01 00:00:00.000000000 +0000 +++ libvcflib-1.0.2+dfsg/man/iHS.1 2021-01-28 07:04:12.000000000 +0000 @@ -0,0 +1,76 @@ +.\" Automatically generated by Pandoc 2.7.3 +.\" +.TH "IHS" "1" "" "iHS (vcflib)" "iHS (VCF statistics)" +.hy +.SH NAME +.PP +\f[B]iHS\f[R] +.SH SYNOPSIS +.PP +\f[B]iHS\f[R] \[en]target 0,1,2,3,4,5,6,7 \[en]file my.phased.vcf +\ \[en]region chr1:1-1000 > STDOUT 2> STDERR +.SH DESCRIPTION +.PP +\f[B]iHS\f[R] calculates the integrated haplotype score which measures +the relative decay of extended haplotype homozygosity (EHH) for the +reference and alternative alleles at a site (see: voight et al.\ 2006, +Spiech & Hernandez 2014). +.SH OPTIONS +.IP +.nf +\f[C] + + +Our code is highly concordant with both implementations mentioned. However, we do not set an upper limit to the allele frequency. **iHS** can be run without a genetic map, in which case the change in EHH is integrated over a constant. Human genetic maps for GRCh36 and GRCh37 (hg18 & hg19) can be found at: http://bochet.gcc.biostat.washington.edu/beagle/genetic_maps/ . **iHS** by default interpolates SNV positions to genetic position (you don\[aq]t need a genetic position for every VCF entry in the map file). + +**iHS** analyses requires normalization by allele frequency. It is important that **iHS** is calculated over large regions so that the normalization does not down weight real signals. For genome-wide runs it is recommended to run slightly overlapping windows and throwing out values that fail integration (columns 7 & 8 in the output) and then removing duplicates by using the \[aq]sort\[aq] and \[aq]uniq\[aq] linux commands. Normalization of the output is as simple as running \[aq]normalize-**iHS**\[aq]. + + + + **iHS** calculates the integrated ratio of haplotype decay between the reference and non-reference allele. +Output : 4 columns : + 1. seqid + 2. position + 3. target allele frequency + 4. integrated EHH (alternative) + 5. integrated EHH (reference) + 6. **iHS** ln(iEHHalt/iEHHref) + 7. != 0 integration failure + 8. != 0 integration failure + +Params: + required: t,target A zero base comma separated list of target + individuals corresponding to VCF columns + required: r,region A tabix compliant genomic range + format: \[dq]seqid:start-end\[dq] or \[dq]seqid\[dq] + required: f,file Proper formatted and phased VCF. + required: y,type Genotype likelihood format: GT,PL,GL,GP + optional: a,af Alternative alleles with frquences less + than [0.05] are skipped. + optional: x,threads Number of CPUS [1]. + recommended: g,gen A PLINK formatted map file. + + + +Type: statistics + + +\f[R] +.fi +.SH EXIT VALUES +.TP +.B \f[B]0\f[R] +Success +.TP +.B \f[B]not 0\f[R] +Failure +.SH SEE ALSO +.PP +\f[B]vcflib\f[R](1) +.SH OTHER +.SH LICENSE +.PP +Copyright 2011-2020 (C) Erik Garrison and vcflib contributors. +MIT licensed. +.SH AUTHORS +Erik Garrison and vcflib contributors. diff -Nru libvcflib-1.0.1+dfsg/man/meltEHH.1 libvcflib-1.0.2+dfsg/man/meltEHH.1 --- libvcflib-1.0.1+dfsg/man/meltEHH.1 1970-01-01 00:00:00.000000000 +0000 +++ libvcflib-1.0.2+dfsg/man/meltEHH.1 2021-01-28 07:04:12.000000000 +0000 @@ -0,0 +1,65 @@ +.\" Automatically generated by Pandoc 2.7.3 +.\" +.TH "MELTEHH" "1" "" "meltEHH (vcflib)" "meltEHH (VCF statistics)" +.hy +.SH NAME +.PP +\f[B]meltEHH\f[R] +.SH SYNOPSIS +.PP +\f[B]meltEHH\f[R] \[en]target 0,1,2,3,4,5,6,7 \[en]pos 10 \[en]file +my.phased.vcf \ \[en]region chr1:1-1000 > STDOUT 2> STDERR +.SH DESCRIPTION +.SH OPTIONS +.IP +.nf +\f[C] + + + **meltEHH** provides the data to plot extended haplotype homozygosity +(EHH) curves and produces the data to generate the following plot: +\[dq]\[dq] + + + + **meltEHH** provides the data to plot EHH curves. +Output : 4 columns : + 1. seqid + 2. position + 3. EHH + 4. ref or alt [0 == ref] +Params: + required: t,target A zero base comma separated list of target + individuals corresponding to VCF columns + required: r,region A tabix compliant genomic range + format: \[dq]seqid:start-end\[dq] or \[dq]seqid\[dq] + required: f,file Proper formatted and phased VCF. + required: y,type Genotype likelihood format: GT,PL,GL,GP + required: p,position Variant position to melt. + optional: a,af Alternative alleles with frequencies less + than [0.05] are skipped. + + + +Type: statistics + + +\f[R] +.fi +.SH EXIT VALUES +.TP +.B \f[B]0\f[R] +Success +.TP +.B \f[B]not 0\f[R] +Failure +.SH SEE ALSO +.PP +\f[B]vcflib\f[R](1) +.SH OTHER +.SH LICENSE +.PP +Copyright 2011-2020 (C) Erik Garrison and vcflib contributors. +MIT licensed. +.SH AUTHORS +Erik Garrison and vcflib contributors. diff -Nru libvcflib-1.0.1+dfsg/man/normalize-iHS.1 libvcflib-1.0.2+dfsg/man/normalize-iHS.1 --- libvcflib-1.0.1+dfsg/man/normalize-iHS.1 1970-01-01 00:00:00.000000000 +0000 +++ libvcflib-1.0.2+dfsg/man/normalize-iHS.1 2021-01-28 07:04:12.000000000 +0000 @@ -0,0 +1,54 @@ +.\" Automatically generated by Pandoc 2.7.3 +.\" +.TH "NORMALIZE-IHS" "1" "" "normalize-iHS (vcflib)" "normalize-iHS (VCF genotype)" +.hy +.SH NAME +.PP +\f[B]normalize-iHS\f[R] +.SH SYNOPSIS +.PP +normalizeHS -s 0.01 -f input.txt +.SH DESCRIPTION +.PP +normalizes iHS or XP-EHH scores. +.SH OPTIONS +.IP +.nf +\f[C] + + + + +A cross-population extended haplotype homozygosity (XP-EHH) score is +directional: a positive score suggests selection is likely to have +happened in population A, whereas a negative score suggests the same +about population B. See for example +https://www.ncbi.nlm.nih.gov/pmc/articles/PMC2687721/ + + +Output : **normalize-iHS** adds one additional column to input (normalized score). +required: -f -- Output from iHS or XPEHH +optional: -s -- Max AF diff for window [0.01] + +Type: genotype + + +\f[R] +.fi +.SH EXIT VALUES +.TP +.B \f[B]0\f[R] +Success +.TP +.B \f[B]not 0\f[R] +Failure +.SH SEE ALSO +.PP +\f[B]vcflib\f[R](1) +.SH OTHER +.SH LICENSE +.PP +Copyright 2011-2020 (C) Erik Garrison and vcflib contributors. +MIT licensed. +.SH AUTHORS +Erik Garrison and vcflib contributors. diff -Nru libvcflib-1.0.1+dfsg/man/permuteGPAT++.1 libvcflib-1.0.2+dfsg/man/permuteGPAT++.1 --- libvcflib-1.0.1+dfsg/man/permuteGPAT++.1 1970-01-01 00:00:00.000000000 +0000 +++ libvcflib-1.0.2+dfsg/man/permuteGPAT++.1 2021-01-28 07:04:12.000000000 +0000 @@ -0,0 +1,51 @@ +.\" Automatically generated by Pandoc 2.7.3 +.\" +.TH "PERMUTEGPAT++" "1" "" "permuteGPAT++ (vcflib)" "permuteGPAT++ (VCF phenotype)" +.hy +.SH NAME +.PP +\f[B]permuteGPAT++\f[R] +.SH SYNOPSIS +.PP +\f[B]permuteGPAT++\f[R] -f gpat.txt -n 5 -s 1 +.SH DESCRIPTION +.PP +\f[B]permuteGPAT++\f[R] is a method for adding empirical p-values to a +GPAT++ score. +.SH OPTIONS +.IP +.nf +\f[C] + + + Currently **permuteGPAT++** only supports wcFst, but will be extended. + +OUTPUT: **permuteGPAT++** will append three additional columns: + 1. The number of successes + 2. The number of trials + 3. The empirical p-value + +file: f -- argument: the input file +number: n -- argument: the number of permutations to run for each value [1000] +success: s -- argument: stop permutations after \[aq]s\[aq] successes [1] + +Type: phenotype +\f[R] +.fi +.SH EXIT VALUES +.TP +.B \f[B]0\f[R] +Success +.TP +.B \f[B]not 0\f[R] +Failure +.SH SEE ALSO +.PP +\f[B]vcflib\f[R](1) +.SH OTHER +.SH LICENSE +.PP +Copyright 2011-2020 (C) Erik Garrison and vcflib contributors. +MIT licensed. +.SH AUTHORS +Erik Garrison and vcflib contributors. diff -Nru libvcflib-1.0.1+dfsg/man/permuteSmooth.1 libvcflib-1.0.2+dfsg/man/permuteSmooth.1 --- libvcflib-1.0.1+dfsg/man/permuteSmooth.1 1970-01-01 00:00:00.000000000 +0000 +++ libvcflib-1.0.2+dfsg/man/permuteSmooth.1 2021-01-28 07:04:12.000000000 +0000 @@ -0,0 +1,56 @@ +.\" Automatically generated by Pandoc 2.7.3 +.\" +.TH "PERMUTESMOOTH" "1" "" "permuteSmooth (vcflib)" "permuteSmooth (VCF statistics)" +.hy +.SH NAME +.PP +\f[B]permuteSmooth\f[R] +.SH SYNOPSIS +.PP +\f[B]permuteSmooth\f[R] -s wcFst.smooth.txt -f wcFst.txt -n 5 -s 1 +.SH DESCRIPTION +.PP +\f[B]permuteSmooth\f[R] is a method for adding empirical p-values +smoothed wcFst scores. +.SH OPTIONS +.IP +.nf +\f[C] + + +Required: + file: f -- argument: original wcFst data + smoothed: s -- argument: smoothed wcFst data + format: y -- argument: [swcFst, segwcFst] +Optional: + number: n -- argument: the number of permutations to run for each value [1000] + success: u -- argument: stop permutations after \[aq]s\[aq] successes [1] + success: x -- argument: number of threads [1] + +OUTPUT: **permuteSmooth** will append three additional columns: + 1. The number of successes + 2. The number of trials + 3. The empirical p-value + + +Type: statistics + +\f[R] +.fi +.SH EXIT VALUES +.TP +.B \f[B]0\f[R] +Success +.TP +.B \f[B]not 0\f[R] +Failure +.SH SEE ALSO +.PP +\f[B]vcflib\f[R](1) +.SH OTHER +.SH LICENSE +.PP +Copyright 2011-2020 (C) Erik Garrison and vcflib contributors. +MIT licensed. +.SH AUTHORS +Erik Garrison and vcflib contributors. diff -Nru libvcflib-1.0.1+dfsg/man/pFst.1 libvcflib-1.0.2+dfsg/man/pFst.1 --- libvcflib-1.0.1+dfsg/man/pFst.1 1970-01-01 00:00:00.000000000 +0000 +++ libvcflib-1.0.2+dfsg/man/pFst.1 2021-01-28 07:04:12.000000000 +0000 @@ -0,0 +1,67 @@ +.\" Automatically generated by Pandoc 2.7.3 +.\" +.TH "PFST" "1" "" "pFst (vcflib)" "pFst (VCF statistics)" +.hy +.SH NAME +.PP +\f[B]pFst\f[R] +.SH SYNOPSIS +.PP +\f[B]pFst\f[R] \[en]target 0,1,2,3,4,5,6,7 \[en]background +11,12,13,16,17,19,22 \[en]file my.vcf \[en]deltaaf 0.1 \[en]type PL +.SH DESCRIPTION +.PP +\f[B]pFst\f[R] is a probabilistic approach for detecting differences in +allele frequencies between two populations. +.SH OPTIONS +.IP +.nf +\f[C] + + + + +**pFst** is a likelihood ratio test (LRT) quantifying allele frequency +differences between populations. The LRT by default uses the binomial +distribution. If Genotype likelihoods are provided it uses a modified +binomial that weights each allele count by its certainty. If type is +set to \[aq]PO\[aq] the LRT uses a beta distribution to fit the allele +frequency spectrum of the target and background. PO requires the AD +and DP genotype fields and requires at least two pools for the target +and background. The p-value calculated in **pFst** is based on the +chi-squared distribution with one degree of freedom. + + +Output : 3 columns : + 1. seqid + 2. position + 3. **pFst** probability + +required: t,target -- argument: a zero based comma separated list of target individuals corresponding to VCF columns +required: b,background -- argument: a zero based comma separated list of background individuals corresponding to VCF columns +required: f,file -- argument: a properly formatted VCF. +required: y,type -- argument: genotype likelihood format ; genotypes: GP, GL or PL; pooled: PO +optional: d,deltaaf -- argument: skip sites where the difference in allele frequencies is less than deltaaf, default is zero +optional: r,region -- argument: a tabix compliant genomic range : seqid or seqid:start-end +optional: c,counts -- switch : use genotype counts rather than genotype likelihoods to estimate parameters, default false + +Type: statistics +\f[R] +.fi +.SH EXIT VALUES +.TP +.B \f[B]0\f[R] +Success +.TP +.B \f[B]not 0\f[R] +Failure +.SH SEE ALSO +.PP +\f[B]vcflib\f[R](1) +.SH OTHER +.SH LICENSE +.PP +Copyright 2011-2020 (C) Erik Garrison and vcflib contributors. +MIT licensed. +.SH AUTHORS +Erik Garrison and vcflib contributors. diff -Nru libvcflib-1.0.1+dfsg/man/plotHaps.1 libvcflib-1.0.2+dfsg/man/plotHaps.1 --- libvcflib-1.0.1+dfsg/man/plotHaps.1 1970-01-01 00:00:00.000000000 +0000 +++ libvcflib-1.0.2+dfsg/man/plotHaps.1 2021-01-28 07:04:12.000000000 +0000 @@ -0,0 +1,47 @@ +.\" Automatically generated by Pandoc 2.7.3 +.\" +.TH "PLOTHAPS" "1" "" "plotHaps (vcflib)" "plotHaps (VCF statistics)" +.hy +.SH NAME +.PP +\f[B]plotHaps\f[R] +.SH SYNOPSIS +.SH DESCRIPTION +.PP +\f[B]plotHaps\f[R] provides the formatted output that can be used with +`bin/plotHaplotypes.R'. +.SH OPTIONS +.IP +.nf +\f[C] + + +Output : haplotype matrix and positions + +**plotHaps** --target 0,1,2,3,4,5,6,7 --file my.phased.vcf.gz + +required: t,target -- argument: a zero base comma separated list of target individuals corrisponding to VCF column s +required: r,region -- argument: a tabix compliant genomic range : \[dq]seqid:start-end\[dq] or \[dq]seqid\[dq] +required: f,file -- argument: proper formatted phased VCF file +required: y,type -- argument: genotype likelihood format: PL,GP,GP + +Type: statistics +\f[R] +.fi +.SH EXIT VALUES +.TP +.B \f[B]0\f[R] +Success +.TP +.B \f[B]not 0\f[R] +Failure +.SH SEE ALSO +.PP +\f[B]vcflib\f[R](1) +.SH OTHER +.SH LICENSE +.PP +Copyright 2011-2020 (C) Erik Garrison and vcflib contributors. +MIT licensed. +.SH AUTHORS +Erik Garrison and vcflib contributors. diff -Nru libvcflib-1.0.1+dfsg/man/popStats.1 libvcflib-1.0.2+dfsg/man/popStats.1 --- libvcflib-1.0.1+dfsg/man/popStats.1 1970-01-01 00:00:00.000000000 +0000 +++ libvcflib-1.0.2+dfsg/man/popStats.1 2021-01-28 07:04:12.000000000 +0000 @@ -0,0 +1,58 @@ +.\" Automatically generated by Pandoc 2.7.3 +.\" +.TH "POPSTATS" "1" "" "popStats (vcflib)" "popStats (VCF statistics)" +.hy +.SH NAME +.PP +\f[B]popStats\f[R] +.SH SYNOPSIS +.PP +popStat \[en]type PL \[en]target 0,1,2,3,4,5,6,7 \[en]file my.vcf +.SH DESCRIPTION +.PP +General population genetic statistics for each SNP +.SH OPTIONS +.IP +.nf +\f[C] + + + + Calculates basic population statistics at bi-allelic sites. The allele frequency is the number of non-reference alleles divided by the total number of alleles. The expected hetrozygosity is 2*p*q, where p is the non-reference allele frequency and q is 1-p. The observed heterozgosity is the fraction of 0/1 genotypes out of all genotypes. The inbreeding coefficent, Fis, is the relative heterozygosity of each individual vs. compared to the target group. + +Output : 9 columns : + 1. seqid + 2. position + 3. target allele frequency + 4. expected heterozygosity + 5. observed heterozygosity + 6. number of hets + 7. number of homozygous ref + 8. number of homozygous alt + 9. target Fis +required: t,target -- a zero based comma separated list of target individuals corresponding to VCF columns +required: f,file -- proper formatted VCF +required, y,type -- genotype likelihood format; genotype : GL,PL,GP +optional, r,region -- a tabix compliant region : chr1:1-1000 or chr1 + +Type: statistics + +\f[R] +.fi +.SH EXIT VALUES +.TP +.B \f[B]0\f[R] +Success +.TP +.B \f[B]not 0\f[R] +Failure +.SH SEE ALSO +.PP +\f[B]vcflib\f[R](1) +.SH OTHER +.SH LICENSE +.PP +Copyright 2011-2020 (C) Erik Garrison and vcflib contributors. +MIT licensed. +.SH AUTHORS +Erik Garrison and vcflib contributors. diff -Nru libvcflib-1.0.1+dfsg/man/pVst.1 libvcflib-1.0.2+dfsg/man/pVst.1 --- libvcflib-1.0.1+dfsg/man/pVst.1 1970-01-01 00:00:00.000000000 +0000 +++ libvcflib-1.0.2+dfsg/man/pVst.1 2021-01-28 07:04:12.000000000 +0000 @@ -0,0 +1,64 @@ +.\" Automatically generated by Pandoc 2.7.3 +.\" +.TH "PVST" "1" "" "pVst (vcflib)" "pVst (VCF statistics)" +.hy +.SH NAME +.PP +\f[B]pVst\f[R] +.SH SYNOPSIS +.PP +\f[B]pVst\f[R] \[en]target 0,1,2,3,4,5,6,7 \[en]background +11,12,13,16,17,19,22 \[en]file my.vcf \[en]type CN +.SH DESCRIPTION +.PP +\f[B]pVst\f[R] calculates vst, a measure of CNV stratification. +.SH OPTIONS +.IP +.nf +\f[C] + + + + +The statistic Vst is used to test the difference in copy numbers at +each SV between two groups: Vst = (Vt-Vs)/Vt, where Vt is the overall +variance of copy number and Vs the average variance within +populations. + +Output : 4 columns : + 1. seqid + 2. position + 3. end + 3. vst + 4. probability + +required: t,target -- argument: a zero based comma separated list of target individuals corresponding to VCF columns +required: b,background -- argument: a zero based comma separated list of background individuals corresponding to VCF columns +required: f,file -- argument: a properly formatted VCF. +required: y,type -- argument: the genotype field with the copy number: e.g. CN|CNF +optional: r,region -- argument: a tabix compliant genomic range : seqid or seqid:start-end +optional: x,cpu -- argument: number of CPUs [1] +optional: n,per -- argument: number of permutations [1000] + +Type: statistics + + +\f[R] +.fi +.SH EXIT VALUES +.TP +.B \f[B]0\f[R] +Success +.TP +.B \f[B]not 0\f[R] +Failure +.SH SEE ALSO +.PP +\f[B]vcflib\f[R](1) +.SH OTHER +.SH LICENSE +.PP +Copyright 2011-2020 (C) Erik Garrison and vcflib contributors. +MIT licensed. +.SH AUTHORS +Erik Garrison and vcflib contributors. diff -Nru libvcflib-1.0.1+dfsg/man/segmentFst.1 libvcflib-1.0.2+dfsg/man/segmentFst.1 --- libvcflib-1.0.1+dfsg/man/segmentFst.1 1970-01-01 00:00:00.000000000 +0000 +++ libvcflib-1.0.2+dfsg/man/segmentFst.1 2021-01-28 07:04:12.000000000 +0000 @@ -0,0 +1,55 @@ +.\" Automatically generated by Pandoc 2.7.3 +.\" +.TH "SEGMENTFST" "1" "" "segmentFst (vcflib)" "segmentFst (VCF statistics)" +.hy +.SH NAME +.PP +\f[B]segmentFst\f[R] +.SH SYNOPSIS +.PP +\f[B]segmentFst\f[R] -s 0.7 -f wcFst.output.txt +.SH DESCRIPTION +.PP +\f[B]segmentFst\f[R] creates genomic segments (bed file) for regions +with high wcFst +.SH OPTIONS +.IP +.nf +\f[C] + + +**segmentFst** provides a way to find continious regions with high Fst values. It takes the output of wcFst and produces a BED file. These high Fst region can be permutated with \[aq]permuteGPATwindow\[aq] +Output : 8 columns : + 1. Seqid + 2. Start (zero based) + 3. End (zero based) + 4. Average Fst + 5. Average high Fst (Fst > -s) + 6. N Fst values in segment + 7. N high fst values in segment + 8. Segment length +required: -f -- Output from wcFst +optional: -s -- High Fst cutoff [0.8] + +Type: statistics + + +\f[R] +.fi +.SH EXIT VALUES +.TP +.B \f[B]0\f[R] +Success +.TP +.B \f[B]not 0\f[R] +Failure +.SH SEE ALSO +.PP +\f[B]vcflib\f[R](1) +.SH OTHER +.SH LICENSE +.PP +Copyright 2011-2020 (C) Erik Garrison and vcflib contributors. +MIT licensed. +.SH AUTHORS +Erik Garrison and vcflib contributors. diff -Nru libvcflib-1.0.1+dfsg/man/segmentIhs.1 libvcflib-1.0.2+dfsg/man/segmentIhs.1 --- libvcflib-1.0.1+dfsg/man/segmentIhs.1 1970-01-01 00:00:00.000000000 +0000 +++ libvcflib-1.0.2+dfsg/man/segmentIhs.1 2021-01-28 07:04:12.000000000 +0000 @@ -0,0 +1,52 @@ +.\" Automatically generated by Pandoc 2.7.3 +.\" +.TH "SEGMENTIHS" "1" "" "segmentIhs (vcflib)" "segmentIhs (VCF statistics)" +.hy +.SH NAME +.PP +\f[B]segmentIhs\f[R] +.SH SYNOPSIS +.PP +segmentFst -s 2 -f iHS.normalized.output.txt +.SH DESCRIPTION +.PP +Creates genomic segments (bed file) for regions with high wcFst +.SH OPTIONS +.IP +.nf +\f[C] + +Output : 8 columns : + 1. Seqid + 2. Start (zero based) + 3. End (zero based) + 4. Average iHS + 5. Average high Fst (iHS > -s) + 6. N iHS values in segment + 7. N high iHS values in segment + 8. Segment length +required: -f -- Output from normalizeIHS +optional: -s -- High absolute iHS cutoff [2] + +Type: statistics + + +\f[R] +.fi +.SH EXIT VALUES +.TP +.B \f[B]0\f[R] +Success +.TP +.B \f[B]not 0\f[R] +Failure +.SH SEE ALSO +.PP +\f[B]vcflib\f[R](1) +.SH OTHER +.SH LICENSE +.PP +Copyright 2011-2020 (C) Erik Garrison and vcflib contributors. +MIT licensed. +.SH AUTHORS +Erik Garrison and vcflib contributors. diff -Nru libvcflib-1.0.1+dfsg/man/sequenceDiversity.1 libvcflib-1.0.2+dfsg/man/sequenceDiversity.1 --- libvcflib-1.0.1+dfsg/man/sequenceDiversity.1 1970-01-01 00:00:00.000000000 +0000 +++ libvcflib-1.0.2+dfsg/man/sequenceDiversity.1 2021-01-28 07:04:12.000000000 +0000 @@ -0,0 +1,62 @@ +.\" Automatically generated by Pandoc 2.7.3 +.\" +.TH "SEQUENCEDIVERSITY" "1" "" "sequenceDiversity (vcflib)" "sequenceDiversity (VCF +statistics)" +.hy +.SH NAME +.PP +\f[B]sequenceDiversity\f[R] +.SH SYNOPSIS +.PP +\f[B]sequenceDiversity\f[R] \[en]target 0,1,2,3,4,5,6,7 \[en]file my.vcf +.SH DESCRIPTION +.PP +The \f[B]sequenceDiversity\f[R] program calculates two popular metrics +of haplotype diversity: pi and extended haplotype homozygoisty (eHH). +Pi is calculated using the Nei and Li 1979 formulation. +eHH a convenient way to think about haplotype diversity. +When eHH = 0 all haplotypes in the window are unique and when eHH = 1 +all haplotypes in the window are identical. +.SH OPTIONS +.IP +.nf +\f[C] + + +Output : 5 columns: + 1. seqid + 2. start of window + 3. end of window + 4. pi + 5. eHH + + +required: t,target -- argument: a zero base comma separated list of target individuals corresponding to VCF columns +required: f,file -- argument: a properly formatted phased VCF file +required: y,type -- argument: type of genotype likelihood: PL, GL or GP +optional: a,af -- sites less than af are filtered out; default is 0 +optional: r,region -- argument: a tabix compliant region : \[dq]seqid:0-100\[dq] or \[dq]seqid\[dq] +optional: w,window -- argument: the number of SNPs per window; default is 20 + +Type: statistics + + +\f[R] +.fi +.SH EXIT VALUES +.TP +.B \f[B]0\f[R] +Success +.TP +.B \f[B]not 0\f[R] +Failure +.SH SEE ALSO +.PP +\f[B]vcflib\f[R](1) +.SH OTHER +.SH LICENSE +.PP +Copyright 2011-2020 (C) Erik Garrison and vcflib contributors. +MIT licensed. +.SH AUTHORS +Erik Garrison and vcflib contributors. diff -Nru libvcflib-1.0.1+dfsg/man/smoother.1 libvcflib-1.0.2+dfsg/man/smoother.1 --- libvcflib-1.0.1+dfsg/man/smoother.1 1970-01-01 00:00:00.000000000 +0000 +++ libvcflib-1.0.2+dfsg/man/smoother.1 2021-01-28 07:04:12.000000000 +0000 @@ -0,0 +1,59 @@ +.\" Automatically generated by Pandoc 2.7.3 +.\" +.TH "SMOOTHER" "1" "" "smoother (vcflib)" "smoother (VCF transformation)" +.hy +.SH NAME +.PP +\f[B]smoother\f[R] +.SH SYNOPSIS +.PP +\f[B]smoother\f[R] \[en]format pFst \[en]file GPA.output.txt +.SH DESCRIPTION +.PP +smoothes is a method for window smoothing many of the GPAT++ formats. +.SH OPTIONS +.IP +.nf +\f[C] + + + **smoother** averages a set of scores over a sliding genomic window. + **smoother** slides over genomic positions not the SNP indices. In other words + the number of scores within a window will not be constant. The last + window for each seqid can be smaller than the defined window size. + **smoother** automatically analyses different seqids separately. +Output : 4 columns : + 1. seqid + 2. window start + 2. window end + 3. averaged score + +required: f,file -- argument: a file created by GPAT++ +required: o,format -- argument: format of input file, case sensitive + available format options: + wcFst, pFst, bFst, iHS, xpEHH, abba-baba, col3 +optional: w,window -- argument: size of genomic window in base pairs (default 5000) +optional: s,step -- argument: window step size in base pairs (default 1000) +optional: t,truncate -- flag : end last window at last position (zero based) + +Type: transformation + +\f[R] +.fi +.SH EXIT VALUES +.TP +.B \f[B]0\f[R] +Success +.TP +.B \f[B]not 0\f[R] +Failure +.SH SEE ALSO +.PP +\f[B]vcflib\f[R](1) +.SH OTHER +.SH LICENSE +.PP +Copyright 2011-2020 (C) Erik Garrison and vcflib contributors. +MIT licensed. +.SH AUTHORS +Erik Garrison and vcflib contributors. diff -Nru libvcflib-1.0.1+dfsg/man/vcf2dag.1 libvcflib-1.0.2+dfsg/man/vcf2dag.1 --- libvcflib-1.0.1+dfsg/man/vcf2dag.1 1970-01-01 00:00:00.000000000 +0000 +++ libvcflib-1.0.2+dfsg/man/vcf2dag.1 2021-01-28 07:04:12.000000000 +0000 @@ -0,0 +1,45 @@ +.\" Automatically generated by Pandoc 2.7.3 +.\" +.TH "VCF2DAG" "1" "" "vcf2dag (vcflib)" "vcf2dag (VCF transformation)" +.hy +.SH NAME +.PP +\f[B]vcf2dag\f[R] +.SH SYNOPSIS +.PP +\f[B]vcf2dag\f[R] options [] +.SH DESCRIPTION +.PP +Modify VCF to be able to build a directed acyclic graph (DAG) +.SH OPTIONS +.IP +.nf +\f[C] + +options: + -r, --reference FILE FASTA reference file. + +Modify the VCF file so that homozygous regions are included as REF/. calls. +For each ref and alt allele, assign an index. These steps are sufficient to +enable use of the VCF as a DAG (specifically a partially-ordered graph). + +Type: transformation +\f[R] +.fi +.SH EXIT VALUES +.TP +.B \f[B]0\f[R] +Success +.TP +.B \f[B]not 0\f[R] +Failure +.SH SEE ALSO +.PP +\f[B]vcflib\f[R](1) +.SH OTHER +.SH LICENSE +.PP +Copyright 2011-2020 (C) Erik Garrison and vcflib contributors. +MIT licensed. +.SH AUTHORS +Erik Garrison and vcflib contributors. diff -Nru libvcflib-1.0.1+dfsg/man/vcf2fasta.1 libvcflib-1.0.2+dfsg/man/vcf2fasta.1 --- libvcflib-1.0.1+dfsg/man/vcf2fasta.1 1970-01-01 00:00:00.000000000 +0000 +++ libvcflib-1.0.2+dfsg/man/vcf2fasta.1 2021-01-28 07:04:12.000000000 +0000 @@ -0,0 +1,47 @@ +.\" Automatically generated by Pandoc 2.7.3 +.\" +.TH "VCF2FASTA" "1" "" "vcf2fasta (vcflib)" "vcf2fasta (VCF transformation)" +.hy +.SH NAME +.PP +\f[B]vcf2fasta\f[R] +.SH SYNOPSIS +.PP +\f[B]vcf2fasta\f[R] options [file] +.SH DESCRIPTION +.PP +Generates sample_seq:N.fa for each sample, reference sequence, and +chromosomal copy N in [0,1\&... ploidy]. +Each sequence in the fasta file is named using the same pattern used for +the file name, allowing them to be combined. +.SH OPTIONS +.IP +.nf +\f[C] + +options: + -f, --reference REF Use this reference when decomposing samples. + -p, --prefix PREFIX Affix this output prefix to each file, none by default + -P, --default-ploidy N Set a default ploidy for samples which do not have information in the first record (2). + + +Type: transformation +\f[R] +.fi +.SH EXIT VALUES +.TP +.B \f[B]0\f[R] +Success +.TP +.B \f[B]not 0\f[R] +Failure +.SH SEE ALSO +.PP +\f[B]vcflib\f[R](1) +.SH OTHER +.SH LICENSE +.PP +Copyright 2011-2020 (C) Erik Garrison and vcflib contributors. +MIT licensed. +.SH AUTHORS +Erik Garrison and vcflib contributors. diff -Nru libvcflib-1.0.1+dfsg/man/vcf2tsv.1 libvcflib-1.0.2+dfsg/man/vcf2tsv.1 --- libvcflib-1.0.1+dfsg/man/vcf2tsv.1 1970-01-01 00:00:00.000000000 +0000 +++ libvcflib-1.0.2+dfsg/man/vcf2tsv.1 2021-01-28 07:04:12.000000000 +0000 @@ -0,0 +1,43 @@ +.\" Automatically generated by Pandoc 2.7.3 +.\" +.TH "VCF2TSV" "1" "" "vcf2tsv (vcflib)" "vcf2tsv (VCF transformation)" +.hy +.SH NAME +.PP +\f[B]vcf2tsv\f[R] +.SH SYNOPSIS +.PP +\f[B]vcf2tsv\f[R] [-n null_string] [-g] [vcf file] +.SH DESCRIPTION +.PP +Converts VCF to per-allelle or per-genotype tab-delimited format, using +null string to replace empty values in the table. +Specifying -g will output one line per sample with genotype information. +When there is more than one alt allele there will be multiple rows, one +for each allele and, the info will match the `A' index +.SH OPTIONS +.IP +.nf +\f[C] + + +Type: transformation +\f[R] +.fi +.SH EXIT VALUES +.TP +.B \f[B]0\f[R] +Success +.TP +.B \f[B]not 0\f[R] +Failure +.SH SEE ALSO +.PP +\f[B]vcflib\f[R](1) +.SH OTHER +.SH LICENSE +.PP +Copyright 2011-2020 (C) Erik Garrison and vcflib contributors. +MIT licensed. +.SH AUTHORS +Erik Garrison and vcflib contributors. diff -Nru libvcflib-1.0.1+dfsg/man/vcfaddinfo.1 libvcflib-1.0.2+dfsg/man/vcfaddinfo.1 --- libvcflib-1.0.1+dfsg/man/vcfaddinfo.1 1970-01-01 00:00:00.000000000 +0000 +++ libvcflib-1.0.2+dfsg/man/vcfaddinfo.1 2021-01-28 07:04:12.000000000 +0000 @@ -0,0 +1,40 @@ +.\" Automatically generated by Pandoc 2.7.3 +.\" +.TH "VCFADDINFO" "1" "" "vcfaddinfo (vcflib)" "vcfaddinfo (VCF transformation)" +.hy +.SH NAME +.PP +\f[B]vcfaddinfo\f[R] +.SH SYNOPSIS +.PP +\f[B]vcfaddinfo\f[R] +.SH DESCRIPTION +.PP +Adds info fields from the second file which are not present in the first +vcf file. +.SH OPTIONS +.IP +.nf +\f[C] + + +Type: transformation +\f[R] +.fi +.SH EXIT VALUES +.TP +.B \f[B]0\f[R] +Success +.TP +.B \f[B]not 0\f[R] +Failure +.SH SEE ALSO +.PP +\f[B]vcflib\f[R](1) +.SH OTHER +.SH LICENSE +.PP +Copyright 2011-2020 (C) Erik Garrison and vcflib contributors. +MIT licensed. +.SH AUTHORS +Erik Garrison and vcflib contributors. diff -Nru libvcflib-1.0.1+dfsg/man/vcfafpath.1 libvcflib-1.0.2+dfsg/man/vcfafpath.1 --- libvcflib-1.0.1+dfsg/man/vcfafpath.1 1970-01-01 00:00:00.000000000 +0000 +++ libvcflib-1.0.2+dfsg/man/vcfafpath.1 2021-01-28 07:04:12.000000000 +0000 @@ -0,0 +1,52 @@ +.\" Automatically generated by Pandoc 2.7.3 +.\" +.TH "VCFAFPATH" "1" "" "vcfafpath (vcflib)" "vcfafpath (VCF transformation)" +.hy +.SH NAME +.PP +\f[B]vcfafpath\f[R] +.SH SYNOPSIS +.PP +\f[B]vcfafpath\f[R] +.SH DESCRIPTION +.PP +Display genotype paths +.SH EXAMPLES +.IP +.nf +\f[C] + +Example: + + **vcfafpath** samples/scaffold612.vcf +\f[R] +.fi +.PP +T -> A A -> G T -> C C -> A C -> T A -> G T -> C G -> C C -> CAGA A -> G +.IP +.nf +\f[C] + + +Type: transformation + + +\f[R] +.fi +.SH EXIT VALUES +.TP +.B \f[B]0\f[R] +Success +.TP +.B \f[B]not 0\f[R] +Failure +.SH SEE ALSO +.PP +\f[B]vcflib\f[R](1) +.SH OTHER +.SH LICENSE +.PP +Copyright 2011-2020 (C) Erik Garrison and vcflib contributors. +MIT licensed. +.SH AUTHORS +Erik Garrison and vcflib contributors. diff -Nru libvcflib-1.0.1+dfsg/man/vcfallelicprimitives.1 libvcflib-1.0.2+dfsg/man/vcfallelicprimitives.1 --- libvcflib-1.0.1+dfsg/man/vcfallelicprimitives.1 1970-01-01 00:00:00.000000000 +0000 +++ libvcflib-1.0.2+dfsg/man/vcfallelicprimitives.1 2021-01-28 07:04:12.000000000 +0000 @@ -0,0 +1,55 @@ +.\" Automatically generated by Pandoc 2.7.3 +.\" +.TH "VCFALLELICPRIMITIVES" "1" "" "vcfallelicprimitives (vcflib)" "vcfallelicprimitives (VCF transformation)" +.hy +.SH NAME +.PP +\f[B]vcfallelicprimitives\f[R] +.SH SYNOPSIS +.PP +\f[B]vcfallelicprimitives\f[R] options [file] +.SH DESCRIPTION +.PP +If multiple allelic primitives (gaps or mismatches) are specified in a +single VCF record, split the record into multiple lines, but drop all +INFO fields. +Does not handle genotypes (yet). +MNPs are split into multiple SNPs unless the -m flag is provided. +Records generated by splits have th +.SH OPTIONS +.IP +.nf +\f[C] + +options: + -m, --use-mnps Retain MNPs as separate events (default: false). + -t, --tag-parsed FLAG Tag records which are split apart of a complex allele with this flag. + -L, --max-length LEN Do not manipulate records in which either the ALT or + REF is longer than LEN (default: 200). + -k, --keep-info Maintain site and allele-level annotations when decomposing. + Note that in many cases, such as multisample VCFs, these won\[aq]t + be valid post-decomposition. For biallelic loci in single-sample + VCFs, they should be usable with caution. + -g, --keep-geno Maintain genotype-level annotations when decomposing. Similar + caution should be used for this as for --keep-info. + +Type: transformation +\f[R] +.fi +.SH EXIT VALUES +.TP +.B \f[B]0\f[R] +Success +.TP +.B \f[B]not 0\f[R] +Failure +.SH SEE ALSO +.PP +\f[B]vcflib\f[R](1) +.SH OTHER +.SH LICENSE +.PP +Copyright 2011-2020 (C) Erik Garrison and vcflib contributors. +MIT licensed. +.SH AUTHORS +Erik Garrison and vcflib contributors. diff -Nru libvcflib-1.0.1+dfsg/man/vcfaltcount.1 libvcflib-1.0.2+dfsg/man/vcfaltcount.1 --- libvcflib-1.0.1+dfsg/man/vcfaltcount.1 1970-01-01 00:00:00.000000000 +0000 +++ libvcflib-1.0.2+dfsg/man/vcfaltcount.1 2021-01-28 07:04:12.000000000 +0000 @@ -0,0 +1,39 @@ +.\" Automatically generated by Pandoc 2.7.3 +.\" +.TH "VCFALTCOUNT" "1" "" "vcfaltcount (vcflib)" "vcfaltcount (VCF statistics)" +.hy +.SH NAME +.PP +\f[B]vcfaltcount\f[R] +.SH SYNOPSIS +.PP +\f[B]vcfaltcount\f[R] +.SH DESCRIPTION +.PP +count the number of alternate alleles in all records in the vcf file +.SH OPTIONS +.IP +.nf +\f[C] + + +Type: statistics +\f[R] +.fi +.SH EXIT VALUES +.TP +.B \f[B]0\f[R] +Success +.TP +.B \f[B]not 0\f[R] +Failure +.SH SEE ALSO +.PP +\f[B]vcflib\f[R](1) +.SH OTHER +.SH LICENSE +.PP +Copyright 2011-2020 (C) Erik Garrison and vcflib contributors. +MIT licensed. +.SH AUTHORS +Erik Garrison and vcflib contributors. diff -Nru libvcflib-1.0.1+dfsg/man/vcfannotate.1 libvcflib-1.0.2+dfsg/man/vcfannotate.1 --- libvcflib-1.0.1+dfsg/man/vcfannotate.1 1970-01-01 00:00:00.000000000 +0000 +++ libvcflib-1.0.2+dfsg/man/vcfannotate.1 2021-01-28 07:04:12.000000000 +0000 @@ -0,0 +1,48 @@ +.\" Automatically generated by Pandoc 2.7.3 +.\" +.TH "VCFANNOTATE" "1" "" "vcfannotate (vcflib)" "vcfannotate (VCF transformation)" +.hy +.SH NAME +.PP +\f[B]vcfannotate\f[R] +.SH SYNOPSIS +.PP +\f[B]vcfannotate\f[R] options [] +.SH DESCRIPTION +.PP +Intersect the records in the VCF file with targets provided in a BED +file. +Intersections are done on the reference sequences in the VCF file. +If no VCF filename is specified on the command line (last argument) the +VCF read from stdin. +.SH OPTIONS +.IP +.nf +\f[C] + + +options: + -b, --bed use annotations provided by this BED file + -k, --key use this INFO field key for the annotations + -d, --default use this INFO field key for records without annotations + +Type: transformation +\f[R] +.fi +.SH EXIT VALUES +.TP +.B \f[B]0\f[R] +Success +.TP +.B \f[B]not 0\f[R] +Failure +.SH SEE ALSO +.PP +\f[B]vcflib\f[R](1) +.SH OTHER +.SH LICENSE +.PP +Copyright 2011-2020 (C) Erik Garrison and vcflib contributors. +MIT licensed. +.SH AUTHORS +Erik Garrison and vcflib contributors. diff -Nru libvcflib-1.0.1+dfsg/man/vcfannotategenotypes.1 libvcflib-1.0.2+dfsg/man/vcfannotategenotypes.1 --- libvcflib-1.0.1+dfsg/man/vcfannotategenotypes.1 1970-01-01 00:00:00.000000000 +0000 +++ libvcflib-1.0.2+dfsg/man/vcfannotategenotypes.1 2021-01-28 07:04:12.000000000 +0000 @@ -0,0 +1,45 @@ +.\" Automatically generated by Pandoc 2.7.3 +.\" +.TH "VCFANNOTATEGENOTYPES" "1" "" "vcfannotategenotypes (vcflib)" "vcfannotategenotypes (VCF transformation)" +.hy +.SH NAME +.PP +\f[B]vcfannotategenotypes\f[R] +.SH SYNOPSIS +.PP +\f[B]vcfannotategenotypes\f[R] +.SH DESCRIPTION +.PP +Examine genotype correspondence. +Annotate genotypes in the first file with genotypes in the second adding +the genotype as another flag to each sample filed in the first file. +annotation-tag is the name of the sample flag which is added to store +the annotation. +also adds a `has_variant' flag for sites where the second file has a +variant. +.SH OPTIONS +.IP +.nf +\f[C] + + +Type: transformation +\f[R] +.fi +.SH EXIT VALUES +.TP +.B \f[B]0\f[R] +Success +.TP +.B \f[B]not 0\f[R] +Failure +.SH SEE ALSO +.PP +\f[B]vcflib\f[R](1) +.SH OTHER +.SH LICENSE +.PP +Copyright 2011-2020 (C) Erik Garrison and vcflib contributors. +MIT licensed. +.SH AUTHORS +Erik Garrison and vcflib contributors. diff -Nru libvcflib-1.0.1+dfsg/man/vcfbreakmulti.1 libvcflib-1.0.2+dfsg/man/vcfbreakmulti.1 --- libvcflib-1.0.1+dfsg/man/vcfbreakmulti.1 1970-01-01 00:00:00.000000000 +0000 +++ libvcflib-1.0.2+dfsg/man/vcfbreakmulti.1 2021-01-28 07:04:12.000000000 +0000 @@ -0,0 +1,41 @@ +.\" Automatically generated by Pandoc 2.7.3 +.\" +.TH "VCFBREAKMULTI" "1" "" "vcfbreakmulti (vcflib)" "vcfbreakmulti (VCF +transformation)" +.hy +.SH NAME +.PP +\f[B]vcfbreakmulti\f[R] +.SH SYNOPSIS +.PP +\f[B]vcfbreakmulti\f[R] options [file] +.SH DESCRIPTION +.PP +If multiple alleles are specified in a single record, break the record +into multiple lines, preserving allele-specific INFO fields. +.SH OPTIONS +.IP +.nf +\f[C] + + +Type: transformation +\f[R] +.fi +.SH EXIT VALUES +.TP +.B \f[B]0\f[R] +Success +.TP +.B \f[B]not 0\f[R] +Failure +.SH SEE ALSO +.PP +\f[B]vcflib\f[R](1) +.SH OTHER +.SH LICENSE +.PP +Copyright 2011-2020 (C) Erik Garrison and vcflib contributors. +MIT licensed. +.SH AUTHORS +Erik Garrison and vcflib contributors. diff -Nru libvcflib-1.0.1+dfsg/man/vcfcat.1 libvcflib-1.0.2+dfsg/man/vcfcat.1 --- libvcflib-1.0.1+dfsg/man/vcfcat.1 1970-01-01 00:00:00.000000000 +0000 +++ libvcflib-1.0.2+dfsg/man/vcfcat.1 2021-01-28 07:04:12.000000000 +0000 @@ -0,0 +1,41 @@ +.\" Automatically generated by Pandoc 2.7.3 +.\" +.TH "VCFCAT" "1" "" "vcfcat (vcflib)" "vcfcat (VCF transformation)" +.hy +.SH NAME +.PP +\f[B]vcfcat\f[R] +.SH SYNOPSIS +.PP +\f[B]vcfcat\f[R] [file1] [file2] \&... [fileN] +.SH DESCRIPTION +.PP +Concatenates VCF files +.SH OPTIONS +.IP +.nf +\f[C] + + +Type: transformation + + +\f[R] +.fi +.SH EXIT VALUES +.TP +.B \f[B]0\f[R] +Success +.TP +.B \f[B]not 0\f[R] +Failure +.SH SEE ALSO +.PP +\f[B]vcflib\f[R](1) +.SH OTHER +.SH LICENSE +.PP +Copyright 2011-2020 (C) Erik Garrison and vcflib contributors. +MIT licensed. +.SH AUTHORS +Erik Garrison and vcflib contributors. diff -Nru libvcflib-1.0.1+dfsg/man/vcfcheck.1 libvcflib-1.0.2+dfsg/man/vcfcheck.1 --- libvcflib-1.0.1+dfsg/man/vcfcheck.1 1970-01-01 00:00:00.000000000 +0000 +++ libvcflib-1.0.2+dfsg/man/vcfcheck.1 2021-01-28 07:04:12.000000000 +0000 @@ -0,0 +1,47 @@ +.\" Automatically generated by Pandoc 2.7.3 +.\" +.TH "VCFCHECK" "1" "" "vcfcheck (vcflib)" "vcfcheck (VCF metrics)" +.hy +.SH NAME +.PP +\f[B]vcfcheck\f[R] +.SH SYNOPSIS +.PP +\f[B]vcfcheck\f[R] options +.SH DESCRIPTION +.PP +Validate integrity and identity of the VCF by verifying that the VCF +record\[cq]s REF matches a given reference file. +.SH OPTIONS +.IP +.nf +\f[C] + +options: + -f, --fasta-reference FASTA reference file to use to obtain primer sequences + -x, --exclude-failures If a record fails, don\[aq]t print it. Otherwise do. + -k, --keep-failures Print if the record fails, otherwise not. + -h, --help Print this message. + -v, --version Print version. + + +Type: metrics +\f[R] +.fi +.SH EXIT VALUES +.TP +.B \f[B]0\f[R] +Success +.TP +.B \f[B]not 0\f[R] +Failure +.SH SEE ALSO +.PP +\f[B]vcflib\f[R](1) +.SH OTHER +.SH LICENSE +.PP +Copyright 2011-2020 (C) Erik Garrison and vcflib contributors. +MIT licensed. +.SH AUTHORS +Erik Garrison and vcflib contributors. diff -Nru libvcflib-1.0.1+dfsg/man/vcfclassify.1 libvcflib-1.0.2+dfsg/man/vcfclassify.1 --- libvcflib-1.0.1+dfsg/man/vcfclassify.1 1970-01-01 00:00:00.000000000 +0000 +++ libvcflib-1.0.2+dfsg/man/vcfclassify.1 2021-01-28 07:04:12.000000000 +0000 @@ -0,0 +1,42 @@ +.\" Automatically generated by Pandoc 2.7.3 +.\" +.TH "VCFCLASSIFY" "1" "" "vcfclassify (vcflib)" "vcfclassify (VCF transformation)" +.hy +.SH NAME +.PP +\f[B]vcfclassify\f[R] +.SH SYNOPSIS +.PP +\f[B]vcfclassify\f[R] +.SH DESCRIPTION +.PP +Creates a new VCF where each variant is tagged by allele class: snp, +ts/tv, indel, mnp +.SH OPTIONS +.IP +.nf +\f[C] + + +Type: transformation + + +\f[R] +.fi +.SH EXIT VALUES +.TP +.B \f[B]0\f[R] +Success +.TP +.B \f[B]not 0\f[R] +Failure +.SH SEE ALSO +.PP +\f[B]vcflib\f[R](1) +.SH OTHER +.SH LICENSE +.PP +Copyright 2011-2020 (C) Erik Garrison and vcflib contributors. +MIT licensed. +.SH AUTHORS +Erik Garrison and vcflib contributors. diff -Nru libvcflib-1.0.1+dfsg/man/vcfcleancomplex.1 libvcflib-1.0.2+dfsg/man/vcfcleancomplex.1 --- libvcflib-1.0.1+dfsg/man/vcfcleancomplex.1 1970-01-01 00:00:00.000000000 +0000 +++ libvcflib-1.0.2+dfsg/man/vcfcleancomplex.1 2021-01-28 07:04:12.000000000 +0000 @@ -0,0 +1,44 @@ +.\" Automatically generated by Pandoc 2.7.3 +.\" +.TH "VCFCLEANCOMPLEX" "1" "" "vcfcleancomplex (vcflib)" "vcfcleancomplex (VCF +transformation)" +.hy +.SH NAME +.PP +\f[B]vcfcleancomplex\f[R] +.SH SYNOPSIS +.PP +\f[B]vcfcleancomplex\f[R] +.SH DESCRIPTION +.PP +Removes reference-matching sequence from complex alleles and adjusts +records to reflect positional change. +.SH OPTIONS +.IP +.nf +\f[C] + + +Generate a VCF stream in which \[aq]long\[aq] non-complexalleles have their position corrected. +assumes that VCF records can\[aq]t overlap 5\[aq]->3\[aq] + +Type: transformation +\f[R] +.fi +.SH EXIT VALUES +.TP +.B \f[B]0\f[R] +Success +.TP +.B \f[B]not 0\f[R] +Failure +.SH SEE ALSO +.PP +\f[B]vcflib\f[R](1) +.SH OTHER +.SH LICENSE +.PP +Copyright 2011-2020 (C) Erik Garrison and vcflib contributors. +MIT licensed. +.SH AUTHORS +Erik Garrison and vcflib contributors. diff -Nru libvcflib-1.0.1+dfsg/man/vcfcombine.1 libvcflib-1.0.2+dfsg/man/vcfcombine.1 --- libvcflib-1.0.1+dfsg/man/vcfcombine.1 1970-01-01 00:00:00.000000000 +0000 +++ libvcflib-1.0.2+dfsg/man/vcfcombine.1 2021-01-28 07:04:12.000000000 +0000 @@ -0,0 +1,50 @@ +.\" Automatically generated by Pandoc 2.7.3 +.\" +.TH "VCFCOMBINE" "1" "" "vcfcombine (vcflib)" "vcfcombine (VCF transformation)" +.hy +.SH NAME +.PP +\f[B]vcfcombine\f[R] +.SH SYNOPSIS +.PP +\f[B]vcfcombine\f[R] [vcf file] [vcf file] \&... +.SH DESCRIPTION +.PP +Combine VCF files positionally, combining samples when sites and alleles +are identical. +Any number of VCF files may be combined. +The INFO field and other columns are taken from one of the files which +are combined when records in multiple files match. +Alleles must have identical ordering to be combined into one record. +If they do not, multiple records will be emitted. +.SH OPTIONS +.IP +.nf +\f[C] + + +options: + -h --help This text. + -v --version Print version. + -r --region REGION A region specifier of the form chrN:x-y to bound the merge + +Type: transformation +\f[R] +.fi +.SH EXIT VALUES +.TP +.B \f[B]0\f[R] +Success +.TP +.B \f[B]not 0\f[R] +Failure +.SH SEE ALSO +.PP +\f[B]vcflib\f[R](1) +.SH OTHER +.SH LICENSE +.PP +Copyright 2011-2020 (C) Erik Garrison and vcflib contributors. +MIT licensed. +.SH AUTHORS +Erik Garrison and vcflib contributors. diff -Nru libvcflib-1.0.1+dfsg/man/vcfcommonsamples.1 libvcflib-1.0.2+dfsg/man/vcfcommonsamples.1 --- libvcflib-1.0.1+dfsg/man/vcfcommonsamples.1 1970-01-01 00:00:00.000000000 +0000 +++ libvcflib-1.0.2+dfsg/man/vcfcommonsamples.1 2021-01-28 07:04:12.000000000 +0000 @@ -0,0 +1,41 @@ +.\" Automatically generated by Pandoc 2.7.3 +.\" +.TH "VCFCOMMONSAMPLES" "1" "" "vcfcommonsamples (vcflib)" "vcfcommonsamples (VCF +transformation)" +.hy +.SH NAME +.PP +\f[B]vcfcommonsamples\f[R] +.SH SYNOPSIS +.PP +\f[B]vcfcommonsamples\f[R] +.SH DESCRIPTION +.PP +Generates each record in the first file, removing samples not present in +the second +.SH OPTIONS +.IP +.nf +\f[C] + + +Type: transformation +\f[R] +.fi +.SH EXIT VALUES +.TP +.B \f[B]0\f[R] +Success +.TP +.B \f[B]not 0\f[R] +Failure +.SH SEE ALSO +.PP +\f[B]vcflib\f[R](1) +.SH OTHER +.SH LICENSE +.PP +Copyright 2011-2020 (C) Erik Garrison and vcflib contributors. +MIT licensed. +.SH AUTHORS +Erik Garrison and vcflib contributors. diff -Nru libvcflib-1.0.1+dfsg/man/vcfcountalleles.1 libvcflib-1.0.2+dfsg/man/vcfcountalleles.1 --- libvcflib-1.0.1+dfsg/man/vcfcountalleles.1 1970-01-01 00:00:00.000000000 +0000 +++ libvcflib-1.0.2+dfsg/man/vcfcountalleles.1 2021-01-28 07:04:12.000000000 +0000 @@ -0,0 +1,46 @@ +.\" Automatically generated by Pandoc 2.7.3 +.\" +.TH "VCFCOUNTALLELES" "1" "" "vcfcountalleles (vcflib)" "vcfcountalleles (VCF +statistics)" +.hy +.SH NAME +.PP +\f[B]vcfcountalleles\f[R] +.SH SYNOPSIS +.PP +\f[B]vcfcountalleles\f[R] +.SH DESCRIPTION +.PP +Count alleles +.SH EXAMPLES +.IP +.nf +\f[C] + +Example: + +**vcfcountalleles** samples/scaffold612.vcf +42603 + +Type: statistics + + +\f[R] +.fi +.SH EXIT VALUES +.TP +.B \f[B]0\f[R] +Success +.TP +.B \f[B]not 0\f[R] +Failure +.SH SEE ALSO +.PP +\f[B]vcflib\f[R](1) +.SH OTHER +.SH LICENSE +.PP +Copyright 2011-2020 (C) Erik Garrison and vcflib contributors. +MIT licensed. +.SH AUTHORS +Erik Garrison and vcflib contributors. diff -Nru libvcflib-1.0.1+dfsg/man/vcfcreatemulti.1 libvcflib-1.0.2+dfsg/man/vcfcreatemulti.1 --- libvcflib-1.0.1+dfsg/man/vcfcreatemulti.1 1970-01-01 00:00:00.000000000 +0000 +++ libvcflib-1.0.2+dfsg/man/vcfcreatemulti.1 2021-01-28 07:04:12.000000000 +0000 @@ -0,0 +1,42 @@ +.\" Automatically generated by Pandoc 2.7.3 +.\" +.TH "VCFCREATEMULTI" "1" "" "vcfcreatemulti (vcflib)" "vcfcreatemulti (VCF +transformation)" +.hy +.SH NAME +.PP +\f[B]vcfcreatemulti\f[R] +.SH SYNOPSIS +.PP +\f[B]vcfcreatemulti\f[R] options [file] +.SH DESCRIPTION +.PP +If overlapping alleles are represented across multiple records, merge +them into a single record. +Currently only for indels. +.SH OPTIONS +.IP +.nf +\f[C] + + +Type: transformation +\f[R] +.fi +.SH EXIT VALUES +.TP +.B \f[B]0\f[R] +Success +.TP +.B \f[B]not 0\f[R] +Failure +.SH SEE ALSO +.PP +\f[B]vcflib\f[R](1) +.SH OTHER +.SH LICENSE +.PP +Copyright 2011-2020 (C) Erik Garrison and vcflib contributors. +MIT licensed. +.SH AUTHORS +Erik Garrison and vcflib contributors. diff -Nru libvcflib-1.0.1+dfsg/man/vcfdistance.1 libvcflib-1.0.2+dfsg/man/vcfdistance.1 --- libvcflib-1.0.1+dfsg/man/vcfdistance.1 1970-01-01 00:00:00.000000000 +0000 +++ libvcflib-1.0.2+dfsg/man/vcfdistance.1 2021-01-28 07:04:12.000000000 +0000 @@ -0,0 +1,41 @@ +.\" Automatically generated by Pandoc 2.7.3 +.\" +.TH "VCFDISTANCE" "1" "" "vcfdistance (vcflib)" "vcfdistance (VCF metrics)" +.hy +.SH NAME +.PP +\f[B]vcfdistance\f[R] +.SH SYNOPSIS +.PP +\f[B]vcfdistance\f[R] [customtagname] < [vcf file] +.SH DESCRIPTION +.PP +Adds a tag to each variant record which indicates the distance to the +nearest variant. +(defaults to BasesToClosestVariant if no custom tag name is given. +.SH OPTIONS +.IP +.nf +\f[C] + + +Type: metrics +\f[R] +.fi +.SH EXIT VALUES +.TP +.B \f[B]0\f[R] +Success +.TP +.B \f[B]not 0\f[R] +Failure +.SH SEE ALSO +.PP +\f[B]vcflib\f[R](1) +.SH OTHER +.SH LICENSE +.PP +Copyright 2011-2020 (C) Erik Garrison and vcflib contributors. +MIT licensed. +.SH AUTHORS +Erik Garrison and vcflib contributors. diff -Nru libvcflib-1.0.1+dfsg/man/vcfecho.1 libvcflib-1.0.2+dfsg/man/vcfecho.1 --- libvcflib-1.0.1+dfsg/man/vcfecho.1 1970-01-01 00:00:00.000000000 +0000 +++ libvcflib-1.0.2+dfsg/man/vcfecho.1 2021-01-28 07:04:12.000000000 +0000 @@ -0,0 +1,41 @@ +.\" Automatically generated by Pandoc 2.7.3 +.\" +.TH "VCFECHO" "1" "" "vcfecho (vcflib)" "vcfecho (VCF transformation)" +.hy +.SH NAME +.PP +\f[B]vcfecho\f[R] +.SH SYNOPSIS +.PP +\f[B]vcfecho\f[R] +.SH DESCRIPTION +.PP +Echo VCF to stdout (simple demo) +.SH OPTIONS +.IP +.nf +\f[C] + + +Type: transformation + + +\f[R] +.fi +.SH EXIT VALUES +.TP +.B \f[B]0\f[R] +Success +.TP +.B \f[B]not 0\f[R] +Failure +.SH SEE ALSO +.PP +\f[B]vcflib\f[R](1) +.SH OTHER +.SH LICENSE +.PP +Copyright 2011-2020 (C) Erik Garrison and vcflib contributors. +MIT licensed. +.SH AUTHORS +Erik Garrison and vcflib contributors. diff -Nru libvcflib-1.0.1+dfsg/man/vcfentropy.1 libvcflib-1.0.2+dfsg/man/vcfentropy.1 --- libvcflib-1.0.1+dfsg/man/vcfentropy.1 1970-01-01 00:00:00.000000000 +0000 +++ libvcflib-1.0.2+dfsg/man/vcfentropy.1 2021-01-28 07:04:12.000000000 +0000 @@ -0,0 +1,48 @@ +.\" Automatically generated by Pandoc 2.7.3 +.\" +.TH "VCFENTROPY" "1" "" "vcfentropy (vcflib)" "vcfentropy (VCF metrics)" +.hy +.SH NAME +.PP +\f[B]vcfentropy\f[R] +.SH SYNOPSIS +.PP +\f[B]vcfentropy\f[R] options +.SH DESCRIPTION +.PP +Annotate VCF records with the Shannon entropy of flanking sequence. +Anotates the output VCF file with, for each record, EntropyLeft, +EntropyRight, EntropyCenter, which are the entropies of the sequence of +the given window size to the left, right, and center of the record. +Also adds EntropyRef and EntropyAlt for each alt. +.SH OPTIONS +.IP +.nf +\f[C] + +options: + -f, --fasta-reference FASTA reference file to use to obtain flanking sequences + -w, --window-size Size of the window over which to calculate entropy + + + +Type: metrics +\f[R] +.fi +.SH EXIT VALUES +.TP +.B \f[B]0\f[R] +Success +.TP +.B \f[B]not 0\f[R] +Failure +.SH SEE ALSO +.PP +\f[B]vcflib\f[R](1) +.SH OTHER +.SH LICENSE +.PP +Copyright 2011-2020 (C) Erik Garrison and vcflib contributors. +MIT licensed. +.SH AUTHORS +Erik Garrison and vcflib contributors. diff -Nru libvcflib-1.0.1+dfsg/man/vcfevenregions.1 libvcflib-1.0.2+dfsg/man/vcfevenregions.1 --- libvcflib-1.0.1+dfsg/man/vcfevenregions.1 1970-01-01 00:00:00.000000000 +0000 +++ libvcflib-1.0.2+dfsg/man/vcfevenregions.1 2021-01-28 07:04:12.000000000 +0000 @@ -0,0 +1,52 @@ +.\" Automatically generated by Pandoc 2.7.3 +.\" +.TH "VCFEVENREGIONS" "1" "" "vcfevenregions (vcflib)" "vcfevenregions (VCF +transformation)" +.hy +.SH NAME +.PP +\f[B]vcfevenregions\f[R] +.SH SYNOPSIS +.PP +\f[B]vcfevenregions\f[R] options +.SH DESCRIPTION +.PP +Generates a list of regions, e.g.\ chr20:10..30 using the variant +density information provided in the VCF file to ensure that the regions +have even numbers of variants. +This can be use to reduce the variance in runtime when dividing variant +detection or genotyping by genomic coordinates. +.SH OPTIONS +.IP +.nf +\f[C] + +options: + -f, --fasta-reference REF FASTA reference file to use to obtain primer sequences. + -n, --number-of-regions N The number of desired regions. + -p, --number-of-positions N The number of positions per region. + -o, --offset N Add an offset to region positioning, to avoid boundary + related artifacts in downstream processing. + -l, --overlap N The number of sites to overlap between regions. Default 0. + -s, --separator SEQ Specify string to use to separate region output. Default \[aq]-\[aq] + +Type: transformation +\f[R] +.fi +.SH EXIT VALUES +.TP +.B \f[B]0\f[R] +Success +.TP +.B \f[B]not 0\f[R] +Failure +.SH SEE ALSO +.PP +\f[B]vcflib\f[R](1) +.SH OTHER +.SH LICENSE +.PP +Copyright 2011-2020 (C) Erik Garrison and vcflib contributors. +MIT licensed. +.SH AUTHORS +Erik Garrison and vcflib contributors. diff -Nru libvcflib-1.0.1+dfsg/man/vcffilter.1 libvcflib-1.0.2+dfsg/man/vcffilter.1 --- libvcflib-1.0.1+dfsg/man/vcffilter.1 1970-01-01 00:00:00.000000000 +0000 +++ libvcflib-1.0.2+dfsg/man/vcffilter.1 2021-01-28 07:04:12.000000000 +0000 @@ -0,0 +1,70 @@ +.\" Automatically generated by Pandoc 2.7.3 +.\" +.TH "VCFFILTER" "1" "" "vcffilter (vcflib)" "vcffilter (VCF filter)" +.hy +.SH NAME +.PP +\f[B]vcffilter\f[R] +.SH SYNOPSIS +.PP +\f[B]vcffilter\f[R] options +.SH DESCRIPTION +.PP +VCF filter the specified vcf file using the set of filters +.SH OPTIONS +.IP +.nf +\f[C] + + +options: + -f, --info-filter specifies a filter to apply to the info fields of records, + removes alleles which do not pass the filter + -g, --genotype-filter specifies a filter to apply to the genotype fields of records + -k, --keep-info used in conjunction with \[aq]-g\[aq], keeps variant info, but removes genotype + -s, --filter-sites filter entire records, not just alleles + -t, --tag-pass tag vcf records as positively filtered with this tag, print all records + -F, --tag-fail tag vcf records as negatively filtered with this tag, print all records + -A, --append-filter append the existing filter tag, don\[aq]t just replace it + -a, --allele-tag apply -t on a per-allele basis. adds or sets the corresponding INFO field tag + -v, --invert inverts the filter, e.g. grep -v + -o, --or use logical OR instead of AND to combine filters + -r, --region specify a region on which to target the filtering, requires a BGZF + compressed file which has been indexed with tabix. any number of + regions may be specified. + +Filter the specified vcf file using the set of filters. +Filters are specified in the form \[dq] : + -f \[dq]DP > 10\[dq] # for info fields + -g \[dq]GT = 1|1\[dq] # for genotype fields + -f \[dq]CpG\[dq] # for \[aq]flag\[aq] fields + +Operators can be any of: =, !, <, >, |, & + +Any number of filters may be specified. They are combined via logical AND +unless --or is specified on the command line. Obtain logical negation through +the use of parentheses, e.g. \[dq]! ( DP = 10 )\[dq] + +For convenience, you can specify \[dq]QUAL\[dq] to refer to the quality of the site, even +though it does not appear in the INFO fields. + +type: filter +\f[R] +.fi +.SH EXIT VALUES +.TP +.B \f[B]0\f[R] +Success +.TP +.B \f[B]not 0\f[R] +Failure +.SH SEE ALSO +.PP +\f[B]vcflib\f[R](1) +.SH OTHER +.SH LICENSE +.PP +Copyright 2011-2020 (C) Erik Garrison and vcflib contributors. +MIT licensed. +.SH AUTHORS +Erik Garrison and vcflib contributors. diff -Nru libvcflib-1.0.1+dfsg/man/vcffixup.1 libvcflib-1.0.2+dfsg/man/vcffixup.1 --- libvcflib-1.0.1+dfsg/man/vcffixup.1 1970-01-01 00:00:00.000000000 +0000 +++ libvcflib-1.0.2+dfsg/man/vcffixup.1 2021-01-28 07:04:12.000000000 +0000 @@ -0,0 +1,53 @@ +.\" Automatically generated by Pandoc 2.7.3 +.\" +.TH "VCFFIXUP" "1" "" "vcffixup (vcflib)" "vcffixup (VCF transformation)" +.hy +.SH NAME +.PP +\f[B]vcffixup\f[R] +.SH SYNOPSIS +.PP +\f[B]vcffixup\f[R] +.SH DESCRIPTION +.PP +Generates a VCF stream where AC and NS have been generated for each +record using sample genotypes +.SH OPTIONS +.IP +.nf +\f[C] + + + + +Count the allele frequencies across alleles present in each record in the VCF file. (Similar to vcftools --freq.) + +Uses genotypes from the VCF file to correct AC (alternate allele count), AF +(alternate allele frequency), NS (number of called), in the VCF records. For +example: + + % vcfkeepsamples file.vcf NA12878 | **vcffixup** - | vcffilter -f \[dq]AC > 0\[dq] + +Would downsample file.vcf to only NA12878, removing sites for which the sample +was not called as polymorphic. + +Type: transformation +\f[R] +.fi +.SH EXIT VALUES +.TP +.B \f[B]0\f[R] +Success +.TP +.B \f[B]not 0\f[R] +Failure +.SH SEE ALSO +.PP +\f[B]vcflib\f[R](1) +.SH OTHER +.SH LICENSE +.PP +Copyright 2011-2020 (C) Erik Garrison and vcflib contributors. +MIT licensed. +.SH AUTHORS +Erik Garrison and vcflib contributors. diff -Nru libvcflib-1.0.1+dfsg/man/vcfflatten.1 libvcflib-1.0.2+dfsg/man/vcfflatten.1 --- libvcflib-1.0.1+dfsg/man/vcfflatten.1 1970-01-01 00:00:00.000000000 +0000 +++ libvcflib-1.0.2+dfsg/man/vcfflatten.1 2021-01-28 07:04:12.000000000 +0000 @@ -0,0 +1,42 @@ +.\" Automatically generated by Pandoc 2.7.3 +.\" +.TH "VCFFLATTEN" "1" "" "vcfflatten (vcflib)" "vcfflatten (VCF transformation)" +.hy +.SH NAME +.PP +\f[B]vcfflatten\f[R] +.SH SYNOPSIS +.PP +\f[B]vcfflatten\f[R] [file] +.SH DESCRIPTION +.PP +Removes multi-allelic sites by picking the most common alternate. +Requires allele frequency specification `AF' and use of `G' and `A' to +specify the fields which vary according to the Allele or Genotype. +VCF file may be specified on the command line or piped as stdin. +.SH OPTIONS +.IP +.nf +\f[C] + + +Type: transformation +\f[R] +.fi +.SH EXIT VALUES +.TP +.B \f[B]0\f[R] +Success +.TP +.B \f[B]not 0\f[R] +Failure +.SH SEE ALSO +.PP +\f[B]vcflib\f[R](1) +.SH OTHER +.SH LICENSE +.PP +Copyright 2011-2020 (C) Erik Garrison and vcflib contributors. +MIT licensed. +.SH AUTHORS +Erik Garrison and vcflib contributors. diff -Nru libvcflib-1.0.1+dfsg/man/vcfgeno2alleles.1 libvcflib-1.0.2+dfsg/man/vcfgeno2alleles.1 --- libvcflib-1.0.1+dfsg/man/vcfgeno2alleles.1 1970-01-01 00:00:00.000000000 +0000 +++ libvcflib-1.0.2+dfsg/man/vcfgeno2alleles.1 2021-01-28 07:04:12.000000000 +0000 @@ -0,0 +1,41 @@ +.\" Automatically generated by Pandoc 2.7.3 +.\" +.TH "VCFGENO2ALLELES" "1" "" "vcfgeno2alleles (vcflib)" "vcfgeno2alleles (VCF +transformation)" +.hy +.SH NAME +.PP +\f[B]vcfgeno2alleles\f[R] +.SH SYNOPSIS +.PP +\f[B]vcfgeno2alleles\f[R] <[vcf file] +.SH DESCRIPTION +.PP +modifies the genotypes field to provide the literal alleles rather than +indexes +.SH OPTIONS +.IP +.nf +\f[C] + + +Type: transformation +\f[R] +.fi +.SH EXIT VALUES +.TP +.B \f[B]0\f[R] +Success +.TP +.B \f[B]not 0\f[R] +Failure +.SH SEE ALSO +.PP +\f[B]vcflib\f[R](1) +.SH OTHER +.SH LICENSE +.PP +Copyright 2011-2020 (C) Erik Garrison and vcflib contributors. +MIT licensed. +.SH AUTHORS +Erik Garrison and vcflib contributors. diff -Nru libvcflib-1.0.1+dfsg/man/vcfgeno2haplo.1 libvcflib-1.0.2+dfsg/man/vcfgeno2haplo.1 --- libvcflib-1.0.1+dfsg/man/vcfgeno2haplo.1 1970-01-01 00:00:00.000000000 +0000 +++ libvcflib-1.0.2+dfsg/man/vcfgeno2haplo.1 2021-01-28 07:04:12.000000000 +0000 @@ -0,0 +1,52 @@ +.\" Automatically generated by Pandoc 2.7.3 +.\" +.TH "VCFGENO2HAPLO" "1" "" "vcfgeno2haplo (vcflib)" "vcfgeno2haplo (VCF +transformation)" +.hy +.SH NAME +.PP +\f[B]vcfgeno2haplo\f[R] +.SH SYNOPSIS +.PP +\f[B]vcfgeno2haplo\f[R] options [] +.SH DESCRIPTION +.PP +Convert genotype-based phased alleles within \[en]window-size into +haplotype alleles. +Will break haplotype construction when encountering non-phased genotypes +on input. +.SH OPTIONS +.IP +.nf +\f[C] + +options: + -h, --help Print this message + -v, --version Print version + -r, --reference FILE FASTA reference file + -w, --window-size N Merge variants at most this many bp apart (default 30) + -o, --only-variants Don\[aq]t output the entire haplotype, just concatenate + REF/ALT strings (delimited by \[dq]:\[dq]) + + + +Type: transformation +\f[R] +.fi +.SH EXIT VALUES +.TP +.B \f[B]0\f[R] +Success +.TP +.B \f[B]not 0\f[R] +Failure +.SH SEE ALSO +.PP +\f[B]vcflib\f[R](1) +.SH OTHER +.SH LICENSE +.PP +Copyright 2011-2020 (C) Erik Garrison and vcflib contributors. +MIT licensed. +.SH AUTHORS +Erik Garrison and vcflib contributors. diff -Nru libvcflib-1.0.1+dfsg/man/vcfgenosamplenames.1 libvcflib-1.0.2+dfsg/man/vcfgenosamplenames.1 --- libvcflib-1.0.1+dfsg/man/vcfgenosamplenames.1 1970-01-01 00:00:00.000000000 +0000 +++ libvcflib-1.0.2+dfsg/man/vcfgenosamplenames.1 2021-01-28 07:04:12.000000000 +0000 @@ -0,0 +1,50 @@ +.\" Automatically generated by Pandoc 2.7.3 +.\" +.TH "VCFGENOSAMPLENAMES" "1" "" "vcfgenosamplenames (vcflib)" "vcfgenosamplenames +(VCF transformation)" +.hy +.SH NAME +.PP +\f[B]vcfgenosamplenames\f[R] +.SH SYNOPSIS +.PP +\f[B]vcfgenosamplenames\f[R] +.SH DESCRIPTION +.PP +Get samplenames +.SH EXAMPLES +.IP +.nf +\f[C] + +Example: + +vcfsamplenames samples/sample.vcf + +NA00001 +NA00002 +NA00003 + + +Type: transformation + + +\f[R] +.fi +.SH EXIT VALUES +.TP +.B \f[B]0\f[R] +Success +.TP +.B \f[B]not 0\f[R] +Failure +.SH SEE ALSO +.PP +\f[B]vcflib\f[R](1) +.SH OTHER +.SH LICENSE +.PP +Copyright 2011-2020 (C) Erik Garrison and vcflib contributors. +MIT licensed. +.SH AUTHORS +Erik Garrison and vcflib contributors. diff -Nru libvcflib-1.0.1+dfsg/man/vcfgenosummarize.1 libvcflib-1.0.2+dfsg/man/vcfgenosummarize.1 --- libvcflib-1.0.1+dfsg/man/vcfgenosummarize.1 1970-01-01 00:00:00.000000000 +0000 +++ libvcflib-1.0.2+dfsg/man/vcfgenosummarize.1 2021-01-28 07:04:12.000000000 +0000 @@ -0,0 +1,44 @@ +.\" Automatically generated by Pandoc 2.7.3 +.\" +.TH "VCFGENOSUMMARIZE" "1" "" "vcfgenosummarize (vcflib)" "vcfgenosummarize (VCF +statistics)" +.hy +.SH NAME +.PP +\f[B]vcfgenosummarize\f[R] +.SH SYNOPSIS +.PP +\f[B]vcfgenosummarize\f[R] <[input file] >[output vcf] +.SH DESCRIPTION +.PP +Adds summary statistics to each record summarizing qualities reported in +called genotypes. +Uses: RO (reference observation count), QR (quality sum reference +observations) AO (alternate observation count), QA (quality sum +alternate observations) +.SH OPTIONS +.IP +.nf +\f[C] + + +Type: statistics +\f[R] +.fi +.SH EXIT VALUES +.TP +.B \f[B]0\f[R] +Success +.TP +.B \f[B]not 0\f[R] +Failure +.SH SEE ALSO +.PP +\f[B]vcflib\f[R](1) +.SH OTHER +.SH LICENSE +.PP +Copyright 2011-2020 (C) Erik Garrison and vcflib contributors. +MIT licensed. +.SH AUTHORS +Erik Garrison and vcflib contributors. diff -Nru libvcflib-1.0.1+dfsg/man/vcfgenotypecompare.1 libvcflib-1.0.2+dfsg/man/vcfgenotypecompare.1 --- libvcflib-1.0.1+dfsg/man/vcfgenotypecompare.1 1970-01-01 00:00:00.000000000 +0000 +++ libvcflib-1.0.2+dfsg/man/vcfgenotypecompare.1 2021-01-28 07:04:12.000000000 +0000 @@ -0,0 +1,44 @@ +.\" Automatically generated by Pandoc 2.7.3 +.\" +.TH "VCFGENOTYPECOMPARE" "1" "" "vcfgenotypecompare (vcflib)" "vcfgenotypecompare +(VCF statistics)" +.hy +.SH NAME +.PP +\f[B]vcfgenotypecompare\f[R] +.SH SYNOPSIS +.PP +\f[B]vcfgenotypecompare\f[R] +.SH DESCRIPTION +.PP +adds statistics to the INFO field of the vcf file describing the amount +of discrepancy between the genotypes (GT) in the vcf file and the +genotypes reported in the . +use this after vcfannotategenotypes to get correspondence statistics for +two vcfs. +.SH OPTIONS +.IP +.nf +\f[C] + + +Type: statistics +\f[R] +.fi +.SH EXIT VALUES +.TP +.B \f[B]0\f[R] +Success +.TP +.B \f[B]not 0\f[R] +Failure +.SH SEE ALSO +.PP +\f[B]vcflib\f[R](1) +.SH OTHER +.SH LICENSE +.PP +Copyright 2011-2020 (C) Erik Garrison and vcflib contributors. +MIT licensed. +.SH AUTHORS +Erik Garrison and vcflib contributors. diff -Nru libvcflib-1.0.1+dfsg/man/vcfgenotypes.1 libvcflib-1.0.2+dfsg/man/vcfgenotypes.1 --- libvcflib-1.0.1+dfsg/man/vcfgenotypes.1 1970-01-01 00:00:00.000000000 +0000 +++ libvcflib-1.0.2+dfsg/man/vcfgenotypes.1 2021-01-28 07:04:12.000000000 +0000 @@ -0,0 +1,63 @@ +.\" Automatically generated by Pandoc 2.7.3 +.\" +.TH "VCFGENOTYPES" "1" "" "vcfgenotypes (vcflib)" "vcfgenotypes (VCF statistics)" +.hy +.SH NAME +.PP +\f[B]vcfgenotypes\f[R] +.SH SYNOPSIS +.PP +\f[B]vcfgenotypes\f[R] +.SH DESCRIPTION +.PP +Report the genotypes for each sample, for each variant in the VCF. +Convert the numerical represenation of genotypes provided by the GT +field to a human-readable genotype format. +.SH OPTIONS +.IP +.nf +\f[C] + + + +\f[R] +.fi +.SH EXAMPLES +.IP +.nf +\f[C] + +Example: + + **vcfgenotypes** samples/sample.vcf + +19 111 A C A,C NA00001:A/A NA00002:A/A NA00003:A/C +19 112 A G A,G NA00001:A/A NA00002:A/A NA00003:A/G +20 14370 G A G,A NA00001:G/G NA00002:G/A NA00003:A/A +20 17330 T A T,A NA00001:T/T NA00002:T/A NA00003:T/T +20 1110696 A G,T A,G,T NA00001:G/T NA00002:G/T NA00003:T/T +20 1230237 T . T,. NA00001:T/T NA00002:T/T NA00003:T/T +20 1234567 G GA,GAC G,GA,GAC NA00001:G/GA NA00002:G/GAC NA00003:GA/GA +20 1235237 T . T,. NA00001:T/T NA00002:T/T NA00003:./. +X 10 AC A,ATG AC,A,ATG NA00001:AC NA00002:AC/A NA00003:AC/ATG + +Type: statistics +\f[R] +.fi +.SH EXIT VALUES +.TP +.B \f[B]0\f[R] +Success +.TP +.B \f[B]not 0\f[R] +Failure +.SH SEE ALSO +.PP +\f[B]vcflib\f[R](1) +.SH OTHER +.SH LICENSE +.PP +Copyright 2011-2020 (C) Erik Garrison and vcflib contributors. +MIT licensed. +.SH AUTHORS +Erik Garrison and vcflib contributors. diff -Nru libvcflib-1.0.1+dfsg/man/vcfglbound.1 libvcflib-1.0.2+dfsg/man/vcfglbound.1 --- libvcflib-1.0.1+dfsg/man/vcfglbound.1 1970-01-01 00:00:00.000000000 +0000 +++ libvcflib-1.0.2+dfsg/man/vcfglbound.1 2021-01-28 07:04:12.000000000 +0000 @@ -0,0 +1,45 @@ +.\" Automatically generated by Pandoc 2.7.3 +.\" +.TH "VCFGLBOUND" "1" "" "vcfglbound (vcflib)" "vcfglbound (VCF transformation)" +.hy +.SH NAME +.PP +\f[B]vcfglbound\f[R] +.SH SYNOPSIS +.PP +\f[B]vcfglbound\f[R] options +.SH DESCRIPTION +.PP +Adjust GLs so that the maximum GL is 0 by dividing all GLs for each +sample by the max. +.SH OPTIONS +.IP +.nf +\f[C] + + +Then cap (bound) at N (e.g. -10).options: + -b, --bound N Bound GLs to this limit. + -x, --exclude-broken If GLs are > 0, remove site. + + +Type: transformation +\f[R] +.fi +.SH EXIT VALUES +.TP +.B \f[B]0\f[R] +Success +.TP +.B \f[B]not 0\f[R] +Failure +.SH SEE ALSO +.PP +\f[B]vcflib\f[R](1) +.SH OTHER +.SH LICENSE +.PP +Copyright 2011-2020 (C) Erik Garrison and vcflib contributors. +MIT licensed. +.SH AUTHORS +Erik Garrison and vcflib contributors. diff -Nru libvcflib-1.0.1+dfsg/man/vcfglxgt.1 libvcflib-1.0.2+dfsg/man/vcfglxgt.1 --- libvcflib-1.0.1+dfsg/man/vcfglxgt.1 1970-01-01 00:00:00.000000000 +0000 +++ libvcflib-1.0.2+dfsg/man/vcfglxgt.1 2021-01-28 07:04:12.000000000 +0000 @@ -0,0 +1,43 @@ +.\" Automatically generated by Pandoc 2.7.3 +.\" +.TH "VCFGLXGT" "1" "" "vcfglxgt (vcflib)" "vcfglxgt (VCF transformation)" +.hy +.SH NAME +.PP +\f[B]vcfglxgt\f[R] +.SH SYNOPSIS +.PP +\f[B]vcfglxgt\f[R] options +.SH DESCRIPTION +.PP +Set genotypes using the maximum genotype likelihood for each sample. +.SH OPTIONS +.IP +.nf +\f[C] + +options: + -n, --fix-null-genotypes only apply to null and partly-null genotypes + + + +Type: transformation +\f[R] +.fi +.SH EXIT VALUES +.TP +.B \f[B]0\f[R] +Success +.TP +.B \f[B]not 0\f[R] +Failure +.SH SEE ALSO +.PP +\f[B]vcflib\f[R](1) +.SH OTHER +.SH LICENSE +.PP +Copyright 2011-2020 (C) Erik Garrison and vcflib contributors. +MIT licensed. +.SH AUTHORS +Erik Garrison and vcflib contributors. diff -Nru libvcflib-1.0.1+dfsg/man/vcfhetcount.1 libvcflib-1.0.2+dfsg/man/vcfhetcount.1 --- libvcflib-1.0.1+dfsg/man/vcfhetcount.1 1970-01-01 00:00:00.000000000 +0000 +++ libvcflib-1.0.2+dfsg/man/vcfhetcount.1 2021-01-28 07:04:12.000000000 +0000 @@ -0,0 +1,41 @@ +.\" Automatically generated by Pandoc 2.7.3 +.\" +.TH "VCFHETCOUNT" "1" "" "vcfhetcount (vcflib)" "vcfhetcount (VCF metrics)" +.hy +.SH NAME +.PP +\f[B]vcfhetcount\f[R] +.SH SYNOPSIS +.PP +\f[B]vcfhetcount\f[R] +.SH DESCRIPTION +.PP +Calculate the heterozygosity rate: count the number of alternate alleles +in heterozygous genotypes in all records in the vcf file +.SH OPTIONS +.IP +.nf +\f[C] + +outputs a count for each individual in the file + +Type: metrics +\f[R] +.fi +.SH EXIT VALUES +.TP +.B \f[B]0\f[R] +Success +.TP +.B \f[B]not 0\f[R] +Failure +.SH SEE ALSO +.PP +\f[B]vcflib\f[R](1) +.SH OTHER +.SH LICENSE +.PP +Copyright 2011-2020 (C) Erik Garrison and vcflib contributors. +MIT licensed. +.SH AUTHORS +Erik Garrison and vcflib contributors. diff -Nru libvcflib-1.0.1+dfsg/man/vcfhethomratio.1 libvcflib-1.0.2+dfsg/man/vcfhethomratio.1 --- libvcflib-1.0.1+dfsg/man/vcfhethomratio.1 1970-01-01 00:00:00.000000000 +0000 +++ libvcflib-1.0.2+dfsg/man/vcfhethomratio.1 2021-01-28 07:04:12.000000000 +0000 @@ -0,0 +1,39 @@ +.\" Automatically generated by Pandoc 2.7.3 +.\" +.TH "VCFHETHOMRATIO" "1" "" "vcfhethomratio (vcflib)" "vcfhethomratio (VCF metrics)" +.hy +.SH NAME +.PP +\f[B]vcfhethomratio\f[R] +.SH SYNOPSIS +.PP +\f[B]vcfhethomratio\f[R] +.SH DESCRIPTION +.PP +Generates the het/hom ratio for each individual in the file +.SH OPTIONS +.IP +.nf +\f[C] + + +Type: metrics +\f[R] +.fi +.SH EXIT VALUES +.TP +.B \f[B]0\f[R] +Success +.TP +.B \f[B]not 0\f[R] +Failure +.SH SEE ALSO +.PP +\f[B]vcflib\f[R](1) +.SH OTHER +.SH LICENSE +.PP +Copyright 2011-2020 (C) Erik Garrison and vcflib contributors. +MIT licensed. +.SH AUTHORS +Erik Garrison and vcflib contributors. diff -Nru libvcflib-1.0.1+dfsg/man/vcfindex.1 libvcflib-1.0.2+dfsg/man/vcfindex.1 --- libvcflib-1.0.1+dfsg/man/vcfindex.1 1970-01-01 00:00:00.000000000 +0000 +++ libvcflib-1.0.2+dfsg/man/vcfindex.1 2021-01-28 07:04:12.000000000 +0000 @@ -0,0 +1,41 @@ +.\" Automatically generated by Pandoc 2.7.3 +.\" +.TH "VCFINDEX" "1" "" "vcfindex (vcflib)" "vcfindex (VCF transformation)" +.hy +.SH NAME +.PP +\f[B]vcfindex\f[R] +.SH SYNOPSIS +.PP +\f[B]vcfindex\f[R] +.SH DESCRIPTION +.PP +Adds an index number to the INFO field (id=position) +.SH OPTIONS +.IP +.nf +\f[C] + + +Type: transformation + + +\f[R] +.fi +.SH EXIT VALUES +.TP +.B \f[B]0\f[R] +Success +.TP +.B \f[B]not 0\f[R] +Failure +.SH SEE ALSO +.PP +\f[B]vcflib\f[R](1) +.SH OTHER +.SH LICENSE +.PP +Copyright 2011-2020 (C) Erik Garrison and vcflib contributors. +MIT licensed. +.SH AUTHORS +Erik Garrison and vcflib contributors. diff -Nru libvcflib-1.0.1+dfsg/man/vcfinfo2qual.1 libvcflib-1.0.2+dfsg/man/vcfinfo2qual.1 --- libvcflib-1.0.1+dfsg/man/vcfinfo2qual.1 1970-01-01 00:00:00.000000000 +0000 +++ libvcflib-1.0.2+dfsg/man/vcfinfo2qual.1 2021-01-28 07:04:12.000000000 +0000 @@ -0,0 +1,42 @@ +.\" Automatically generated by Pandoc 2.7.3 +.\" +.TH "VCFINFO2QUAL" "1" "" "vcfinfo2qual (vcflib)" "vcfinfo2qual (VCF +transformation)" +.hy +.SH NAME +.PP +\f[B]vcfinfo2qual\f[R] +.SH SYNOPSIS +.PP +\f[B]vcfinfo2qual\f[R] [key] [vcf_file] +.SH DESCRIPTION +.PP +Sets QUAL from info field tag keyed by [key]. +The VCF file may be omitted and read from stdin. +The average of the field is used if it contains multiple values. +.SH OPTIONS +.IP +.nf +\f[C] + + +Type: transformation +\f[R] +.fi +.SH EXIT VALUES +.TP +.B \f[B]0\f[R] +Success +.TP +.B \f[B]not 0\f[R] +Failure +.SH SEE ALSO +.PP +\f[B]vcflib\f[R](1) +.SH OTHER +.SH LICENSE +.PP +Copyright 2011-2020 (C) Erik Garrison and vcflib contributors. +MIT licensed. +.SH AUTHORS +Erik Garrison and vcflib contributors. diff -Nru libvcflib-1.0.1+dfsg/man/vcfinfosummarize.1 libvcflib-1.0.2+dfsg/man/vcfinfosummarize.1 --- libvcflib-1.0.1+dfsg/man/vcfinfosummarize.1 1970-01-01 00:00:00.000000000 +0000 +++ libvcflib-1.0.2+dfsg/man/vcfinfosummarize.1 2021-01-28 07:04:12.000000000 +0000 @@ -0,0 +1,49 @@ +.\" Automatically generated by Pandoc 2.7.3 +.\" +.TH "VCFINFOSUMMARIZE" "1" "" "vcfinfosummarize (vcflib)" "vcfinfosummarize (VCF +transformation)" +.hy +.SH NAME +.PP +\f[B]vcfinfosummarize\f[R] +.SH SYNOPSIS +.PP +\f[B]vcfinfosummarize\f[R] options +.SH DESCRIPTION +.PP +Take annotations given in the per-sample fields and add the mean, +median, min, or max to the site-level INFO. +.SH OPTIONS +.IP +.nf +\f[C] + +options: + -f, --field Summarize this field in the INFO column + -i, --info Store the computed statistic in this info field + -a, --average Take the mean for field (default) + -m, --median Use the median + -n, --min Use the min + -x, --max Use the max + -h, --help Print this message + -v, --version Print version + +\f[R] +.fi +.SH EXIT VALUES +.TP +.B \f[B]0\f[R] +Success +.TP +.B \f[B]not 0\f[R] +Failure +.SH SEE ALSO +.PP +\f[B]vcflib\f[R](1) +.SH OTHER +.SH LICENSE +.PP +Copyright 2011-2020 (C) Erik Garrison and vcflib contributors. +MIT licensed. +.SH AUTHORS +Erik Garrison and vcflib contributors. diff -Nru libvcflib-1.0.1+dfsg/man/vcfintersect.1 libvcflib-1.0.2+dfsg/man/vcfintersect.1 --- libvcflib-1.0.1+dfsg/man/vcfintersect.1 1970-01-01 00:00:00.000000000 +0000 +++ libvcflib-1.0.2+dfsg/man/vcfintersect.1 2021-01-28 07:04:12.000000000 +0000 @@ -0,0 +1,65 @@ +.\" Automatically generated by Pandoc 2.7.3 +.\" +.TH "VCFINTERSECT" "1" "" "vcfintersect (vcflib)" "vcfintersect (VCF +transformation)" +.hy +.SH NAME +.PP +\f[B]vcfintersect\f[R] +.SH SYNOPSIS +.PP +\f[B]vcfintersect\f[R] options [] +.SH DESCRIPTION +.PP +VCF set analysis +.SH OPTIONS +.IP +.nf +\f[C] + + +options: + -b, --bed FILE use intervals provided by this BED file + -R, --region REGION use 1-based tabix-style region (e.g. chrZ:10-20), multiples allowed + -S, --start-only don\[aq]t use the reference length information in the record to determine + overlap status, just use the start posiion + -v, --invert invert the selection, printing only records which would + not have been printed out + -i, --intersect-vcf FILE use this VCF for set intersection generation + -u, --union-vcf FILE use this VCF for set union generation + -w, --window-size N compare records up to this many bp away (default 30) + -r, --reference FILE FASTA reference file, required with -i and -u + -l, --loci output whole loci when one alternate allele matches + -m, --ref-match intersect on the basis of record REF string + -t, --tag TAG attach TAG to each record\[aq]s info field if it would intersect + -V, --tag-value VAL use this value to indicate that the allele is passing + \[aq].\[aq] will be used otherwise. default: \[aq]PASS\[aq] + -M, --merge-from FROM-TAG + -T, --merge-to TO-TAG merge from FROM-TAG used in the -i file, setting TO-TAG + in the current file. + +For bed-vcf intersection, alleles which fall into the targets are retained. + +Haplotype aware intersection, union and complement. Use for intersection and union of VCF files: unify on equivalent alleles within window-size bp +as determined by haplotype comparison alleles. + +type: transformation +\f[R] +.fi +.SH EXIT VALUES +.TP +.B \f[B]0\f[R] +Success +.TP +.B \f[B]not 0\f[R] +Failure +.SH SEE ALSO +.PP +\f[B]vcflib\f[R](1) +.SH OTHER +.SH LICENSE +.PP +Copyright 2011-2020 (C) Erik Garrison and vcflib contributors. +MIT licensed. +.SH AUTHORS +Erik Garrison and vcflib contributors. diff -Nru libvcflib-1.0.1+dfsg/man/vcfkeepgeno.1 libvcflib-1.0.2+dfsg/man/vcfkeepgeno.1 --- libvcflib-1.0.1+dfsg/man/vcfkeepgeno.1 1970-01-01 00:00:00.000000000 +0000 +++ libvcflib-1.0.2+dfsg/man/vcfkeepgeno.1 2021-01-28 07:04:12.000000000 +0000 @@ -0,0 +1,40 @@ +.\" Automatically generated by Pandoc 2.7.3 +.\" +.TH "VCFKEEPGENO" "1" "" "vcfkeepgeno (vcflib)" "vcfkeepgeno (VCF transformation)" +.hy +.SH NAME +.PP +\f[B]vcfkeepgeno\f[R] +.SH SYNOPSIS +.PP +\f[B]vcfkeepgeno\f[R] [FIELD1] [FIELD2] \&... +.SH DESCRIPTION +.PP +Reduce file size by removing FORMAT fields not listed on the command +line from sample specifications in the output +.SH OPTIONS +.IP +.nf +\f[C] + + +Type: transformation +\f[R] +.fi +.SH EXIT VALUES +.TP +.B \f[B]0\f[R] +Success +.TP +.B \f[B]not 0\f[R] +Failure +.SH SEE ALSO +.PP +\f[B]vcflib\f[R](1) +.SH OTHER +.SH LICENSE +.PP +Copyright 2011-2020 (C) Erik Garrison and vcflib contributors. +MIT licensed. +.SH AUTHORS +Erik Garrison and vcflib contributors. diff -Nru libvcflib-1.0.1+dfsg/man/vcfkeepinfo.1 libvcflib-1.0.2+dfsg/man/vcfkeepinfo.1 --- libvcflib-1.0.1+dfsg/man/vcfkeepinfo.1 1970-01-01 00:00:00.000000000 +0000 +++ libvcflib-1.0.2+dfsg/man/vcfkeepinfo.1 2021-01-28 07:04:12.000000000 +0000 @@ -0,0 +1,39 @@ +.\" Automatically generated by Pandoc 2.7.3 +.\" +.TH "VCFKEEPINFO" "1" "" "vcfkeepinfo (vcflib)" "vcfkeepinfo (VCF transformation)" +.hy +.SH NAME +.PP +\f[B]vcfkeepinfo\f[R] +.SH SYNOPSIS +.PP +\f[B]vcfkeepinfo\f[R] [FIELD1] [FIELD2] \&... +.SH DESCRIPTION +.PP +To decrease file size remove INFO fields not listed on the command line +.SH OPTIONS +.IP +.nf +\f[C] + + +Type: transformation +\f[R] +.fi +.SH EXIT VALUES +.TP +.B \f[B]0\f[R] +Success +.TP +.B \f[B]not 0\f[R] +Failure +.SH SEE ALSO +.PP +\f[B]vcflib\f[R](1) +.SH OTHER +.SH LICENSE +.PP +Copyright 2011-2020 (C) Erik Garrison and vcflib contributors. +MIT licensed. +.SH AUTHORS +Erik Garrison and vcflib contributors. diff -Nru libvcflib-1.0.1+dfsg/man/vcfkeepsamples.1 libvcflib-1.0.2+dfsg/man/vcfkeepsamples.1 --- libvcflib-1.0.1+dfsg/man/vcfkeepsamples.1 1970-01-01 00:00:00.000000000 +0000 +++ libvcflib-1.0.2+dfsg/man/vcfkeepsamples.1 2021-01-28 07:04:12.000000000 +0000 @@ -0,0 +1,32 @@ +.\" Automatically generated by Pandoc 2.7.3 +.\" +.TH "VCFKEEPSAMPLES" "1" "" "vcfkeepsamples (vcflib)" "vcfkeepsamples (VCF +transformation)" +.hy +.SH NAME +.PP +\f[B]vcfkeepsamples\f[R] +.SH SYNOPSIS +.PP +\f[B]vcfkeepsamples\f[R] [SAMPLE1] [SAMPLE2] \&... +.SH DESCRIPTION +.PP +outputs each record in the vcf file, removing samples not listed on the +command line +.SH EXIT VALUES +.TP +.B \f[B]0\f[R] +Success +.TP +.B \f[B]not 0\f[R] +Failure +.SH SEE ALSO +.PP +\f[B]vcflib\f[R](1) +.SH OTHER +.SH LICENSE +.PP +Copyright 2011-2020 (C) Erik Garrison and vcflib contributors. +MIT licensed. +.SH AUTHORS +Erik Garrison and vcflib contributors. diff -Nru libvcflib-1.0.1+dfsg/man/vcfld.1 libvcflib-1.0.2+dfsg/man/vcfld.1 --- libvcflib-1.0.1+dfsg/man/vcfld.1 1970-01-01 00:00:00.000000000 +0000 +++ libvcflib-1.0.2+dfsg/man/vcfld.1 2021-01-28 07:04:12.000000000 +0000 @@ -0,0 +1,50 @@ +.\" Automatically generated by Pandoc 2.7.3 +.\" +.TH "VCFLD" "1" "" "vcfld (vcflib)" "vcfld (VCF transformation)" +.hy +.SH NAME +.PP +\f[B]vcfld\f[R] +.SH SYNOPSIS +.PP +\f[B]vcfld\f[R] \[en]target 0,1,2,3,4,5,6,7 \[en]background +11,12,13,16,17,19,22 \[en]file my.vcf -e -d -r +.SH DESCRIPTION +.PP +Compute LD +.SH OPTIONS +.IP +.nf +\f[C] + + +required: t,target -- argument: a zero base comma separated list of target individuals corresponding to VCF columns +required: b,background -- argument: a zero base comma separated list of background individuals corresponding to VCF columns +required: f,file -- argument: a properly formatted phased VCF file +required: y,type -- argument: type of genotype likelihood: PL, GL or GP +optional: w,window -- argument: window size to average LD; default is 1000 +optional: e,external -- switch: population to calculate LD expectation; default is target +optional: d,derived -- switch: which haplotype to count \[dq]00\[dq] vs \[dq]11\[dq]; default \[dq]00\[dq], + + +Type: transformation + +\f[R] +.fi +.SH EXIT VALUES +.TP +.B \f[B]0\f[R] +Success +.TP +.B \f[B]not 0\f[R] +Failure +.SH SEE ALSO +.PP +\f[B]vcflib\f[R](1) +.SH OTHER +.SH LICENSE +.PP +Copyright 2011-2020 (C) Erik Garrison and vcflib contributors. +MIT licensed. +.SH AUTHORS +Erik Garrison and vcflib contributors. diff -Nru libvcflib-1.0.1+dfsg/man/vcfleftalign.1 libvcflib-1.0.2+dfsg/man/vcfleftalign.1 --- libvcflib-1.0.1+dfsg/man/vcfleftalign.1 1970-01-01 00:00:00.000000000 +0000 +++ libvcflib-1.0.2+dfsg/man/vcfleftalign.1 2021-01-28 07:04:12.000000000 +0000 @@ -0,0 +1,59 @@ +.\" Automatically generated by Pandoc 2.7.3 +.\" +.TH "VCFLEFTALIGN" "1" "" "vcfleftalign (vcflib)" "vcfleftalign (VCF +transformation)" +.hy +.SH NAME +.PP +\f[B]vcfleftalign\f[R] +.SH SYNOPSIS +.PP +\f[B]vcfleftalign\f[R] options [file] +.SH DESCRIPTION +.PP +Left-align indels and complex variants in the input using a pairwise +ref/alt alignment followed by a heuristic, iterative left realignment +process that shifts indel representations to their absolute leftmost +(5\[cq]) extent. +.SH OPTIONS +.IP +.nf +\f[C] + + +This is the same procedure used in the internal left alignment in +freebayes, and can be used when preparing VCF files for input to +freebayes to decrease positional representation differences between +the input alleles and left-realigned alignments. + +options: + + -r, --reference FILE Use this reference as a basis for realignment. + -w, --window N Use a window of this many bp when left aligning (150). + +Left-aligns variants in the specified input file or stdin. Window +size is determined dynamically according to the entropy of the regions +flanking the indel. These must have entropy > 1 bit/bp, or be shorter +than \[ti]5kb. + + +Type: transformation +\f[R] +.fi +.SH EXIT VALUES +.TP +.B \f[B]0\f[R] +Success +.TP +.B \f[B]not 0\f[R] +Failure +.SH SEE ALSO +.PP +\f[B]vcflib\f[R](1) +.SH OTHER +.SH LICENSE +.PP +Copyright 2011-2020 (C) Erik Garrison and vcflib contributors. +MIT licensed. +.SH AUTHORS +Erik Garrison and vcflib contributors. diff -Nru libvcflib-1.0.1+dfsg/man/vcflength.1 libvcflib-1.0.2+dfsg/man/vcflength.1 --- libvcflib-1.0.1+dfsg/man/vcflength.1 1970-01-01 00:00:00.000000000 +0000 +++ libvcflib-1.0.2+dfsg/man/vcflength.1 2021-01-28 07:04:12.000000000 +0000 @@ -0,0 +1,56 @@ +.\" Automatically generated by Pandoc 2.7.3 +.\" +.TH "VCFLENGTH" "1" "" "vcflength (vcflib)" "vcflength (VCF transformation)" +.hy +.SH NAME +.PP +\f[B]vcflength\f[R] +.SH SYNOPSIS +.PP +\f[B]vcflength\f[R] +.SH DESCRIPTION +.PP +Add length info field +.SH EXAMPLES +.IP +.nf +\f[C] + +Example: + +**vcflength** samples/sample.vcf +##fileformat=VCFv4.0 +(...) +#CHROM POS ID REF ALT QUAL FILTER INFO FORMAT NA00001 NA00002 NA00003 +19 111 . A C 9.6 . length=0;length.alt=1;length.ref=1 GT:HQ 0|0:10,10 0|0:10,10 0/1:3,3 +19 112 . A G 10 . length=0;length.alt=1;length.ref=1 GT:HQ 0|0:10,10 0|0:10,10 0/1:3,3 +20 14370 rs6054257 G A 29 PASS AF=0.5;DP=14;NS=3;length=0;length.alt=1;length.ref=1;DB;H2 GT:GQ:DP:HQ 0|0:48:1:51,51 1|0:48:8:51,51 1/1:43:5:.,. +20 17330 . T A 3 q10 AF=0.017;DP=11;NS=3;length=0;length.alt=1;length.ref=1 GT:GQ:DP:HQ 0|0:49:3:58,50 0|1:3:5:65,3 0/0:41:3:.,. +20 1110696 rs6040355 A G,T 67 PASS AA=T;AF=0.333,0.667;DP=10;NS=2;length=0,0;length.alt=1,1;length.ref=1;DB GT:GQ:DP:HQ 1|2:21:6:23,27 2|1:2:0:18,2 2/2:35:4:.,. +20 1230237 . T . 47 PASS AA=T;DP=13;NS=3;length=0;length.alt=1;length.ref=1GT:GQ:DP:HQ 0|0:54:.:56,60 0|0:48:4:51,51 0/0:61:2:.,. +20 1234567 microsat1 G GA,GAC 50 PASS AA=G;AC=3,1;AN=6;DP=9;NS=3;length=1,2;length.alt=2,3;length.ref=1 GT:GQ:DP 0/1:.:4 0/2:17:2 1/1:40:3 +20 1235237 . T . 0 . length=0;length.alt=1;length.ref=1 GT 0/00|0 ./. +X 10 rsTest AC A,ATG 10 PASS length=-1,1;length.alt=1,3;length.ref=2 GT 0 0/1 0|2 + +Type: transformation + + +\f[R] +.fi +.SH EXIT VALUES +.TP +.B \f[B]0\f[R] +Success +.TP +.B \f[B]not 0\f[R] +Failure +.SH SEE ALSO +.PP +\f[B]vcflib\f[R](1) +.SH OTHER +.SH LICENSE +.PP +Copyright 2011-2020 (C) Erik Garrison and vcflib contributors. +MIT licensed. +.SH AUTHORS +Erik Garrison and vcflib contributors. diff -Nru libvcflib-1.0.1+dfsg/man/vcflib.1 libvcflib-1.0.2+dfsg/man/vcflib.1 --- libvcflib-1.0.1+dfsg/man/vcflib.1 1970-01-01 00:00:00.000000000 +0000 +++ libvcflib-1.0.2+dfsg/man/vcflib.1 2021-01-28 07:04:12.000000000 +0000 @@ -0,0 +1,674 @@ +.\"t +.\" Automatically generated by Pandoc 2.7.3 +.\" +.TH "vcflib" "1" "" "vcflib" "vcfilb (index)" +.hy +.SH NAME +.PP +\f[B]vcflib\f[R] index +.SH DESCRIPTION +.PP +vcflib contains tools and libraries for dealing with the Variant Call +Format (VCF) which is a flat-file, tab-delimited textual format intended +to describe reference-indexed variations between individuals. +.PP +VCF provides a common interchange format for the description of +variation in individuals and populations of samples, and has become the +defacto standard reporting format for a wide array of genomic variant +detectors. +.PP +vcflib provides methods to manipulate and interpret sequence variation +as it can be described by VCF. +It is both: +.IP \[bu] 2 +an API for parsing and operating on records of genomic variation as it +can be described by the VCF format, +.IP \[bu] 2 +and a collection of command-line utilities for executing complex +manipulations on VCF files. +.PP +The API itself provides a quick and extremely permissive method to read +and write VCF files. +Extensions and applications of the library provided in the included +utilities (*.cpp) comprise the vast bulk of the library\[cq]s utility +for most users. +.SS filter +.PP +.TS +tab(@); +lw(40.4n) lw(29.6n). +T{ +filter command +T}@T{ +description +T} +_ +T{ +\f[B]vcfuniq\f[R] +T}@T{ +List unique genotypes. +Like GNU uniq, but for VCF records. +Remove records which have the same positon, ref, and alt as the previous +record. +T} +T{ +\f[B]vcfuniqalleles\f[R] +T}@T{ +List unique alleles For each record, remove any duplicate alternate +alleles that may have resulted from merging separate VCF files. +T} +T{ +\f[B]vcffilter\f[R] +T}@T{ +VCF filter the specified vcf file using the set of filters +T} +.TE +.SS metrics +.PP +.TS +tab(@); +lw(40.4n) lw(29.6n). +T{ +metrics command +T}@T{ +description +T} +_ +T{ +\f[B]vcfcheck\f[R] +T}@T{ +Validate integrity and identity of the VCF by verifying that the VCF +record\[cq]s REF matches a given reference file. +T} +T{ +\f[B]vcfhethomratio\f[R] +T}@T{ +Generates the het/hom ratio for each individual in the file +T} +T{ +\f[B]vcfhetcount\f[R] +T}@T{ +Calculate the heterozygosity rate: count the number of alternate alleles +in heterozygous genotypes in all records in the vcf file +T} +T{ +\f[B]vcfdistance\f[R] +T}@T{ +Adds a tag to each variant record which indicates the distance to the +nearest variant. +(defaults to BasesToClosestVariant if no custom tag name is given. +T} +T{ +\f[B]vcfentropy\f[R] +T}@T{ +Annotate VCF records with the Shannon entropy of flanking sequence. +Anotates the output VCF file with, for each record, EntropyLeft, +EntropyRight, EntropyCenter, which are the entropies of the sequence of +the given window size to the left, right, and center of the record. +Also adds EntropyRef and EntropyAlt for each alt. +T} +.TE +.SS phenotype +.PP +.TS +tab(@); +lw(40.4n) lw(29.6n). +T{ +phenotype command +T}@T{ +description +T} +_ +T{ +\f[B]permuteGPAT++\f[R] +T}@T{ +\f[B]permuteGPAT++\f[R] is a method for adding empirical p-values to a +GPAT++ score. +T} +.TE +.SS genotype +.PP +.TS +tab(@); +lw(40.4n) lw(29.6n). +T{ +genotype command +T}@T{ +description +T} +_ +T{ +\f[B]normalize-iHS\f[R] +T}@T{ +normalizes iHS or XP-EHH scores. +T} +T{ +\f[B]hapLrt\f[R] +T}@T{ +HapLRT is a likelihood ratio test for haplotype lengths. +The lengths are modeled with an exponential distribution. +The sign denotes if the target has longer haplotypes (1) or the +background (-1). +T} +T{ +\f[B]abba-baba\f[R] +T}@T{ +\f[B]abba-baba\f[R] calculates the tree pattern for four indviduals. +This tool assumes reference is ancestral and ignores non +\f[B]abba-baba\f[R] sites. +The output is a boolian value: 1 = true , 0 = false for abba and baba. +the tree argument should be specified from the most basal taxa to the +most derived. +T} +.TE +.SS transformation +.PP +.TS +tab(@); +lw(40.4n) lw(29.6n). +T{ +transformation command +T}@T{ +description +T} +_ +T{ +\f[B]vcfinfo2qual\f[R] +T}@T{ +Sets QUAL from info field tag keyed by [key]. +The VCF file may be omitted and read from stdin. +The average of the field is used if it contains multiple values. +T} +T{ +\f[B]vcfsamplediff\f[R] +T}@T{ +Establish putative somatic variants using reported differences between +germline and somatic samples. +Tags each record where the listed sample genotypes differ with . +The first sample is assumed to be germline, the second somatic. +Each record is tagged with ={germline,somatic,loh} to specify the type +of variant given the genotype difference between the two samples. +T} +T{ +\f[B]vcfaddinfo\f[R] +T}@T{ +Adds info fields from the second file which are not present in the first +vcf file. +T} +T{ +\f[B]vcfremoveaberrantgenotypes\f[R] +T}@T{ +strips samples which are homozygous but have observations implying +heterozygosity. +Remove samples for which the reported genotype (GT) and observation +counts disagree (AO, RO). +T} +T{ +\f[B]vcfglxgt\f[R] +T}@T{ +Set genotypes using the maximum genotype likelihood for each sample. +T} +T{ +\f[B]dumpContigsFromHeader\f[R] +T}@T{ +Dump contigs from header +T} +T{ +\f[B]vcfevenregions\f[R] +T}@T{ +Generates a list of regions, e.g.\ chr20:10..30 using the variant +density information provided in the VCF file to ensure that the regions +have even numbers of variants. +This can be use to reduce the variance in runtime when dividing variant +detection or genotyping by genomic coordinates. +T} +T{ +\f[B]vcfcat\f[R] +T}@T{ +Concatenates VCF files +T} +T{ +\f[B]vcfannotategenotypes\f[R] +T}@T{ +Examine genotype correspondence. +Annotate genotypes in the first file with genotypes in the second adding +the genotype as another flag to each sample filed in the first file. +annotation-tag is the name of the sample flag which is added to store +the annotation. +also adds a `has_variant' flag for sites where the second file has a +variant. +T} +T{ +\f[B]vcfafpath\f[R] +T}@T{ +Display genotype paths +T} +T{ +\f[B]vcfclassify\f[R] +T}@T{ +Creates a new VCF where each variant is tagged by allele class: snp, +ts/tv, indel, mnp +T} +T{ +\f[B]vcfallelicprimitives\f[R] +T}@T{ +If multiple allelic primitives (gaps or mismatches) are specified in a +single VCF record, split the record into multiple lines, but drop all +INFO fields. +Does not handle genotypes (yet). +MNPs are split into multiple SNPs unless the -m flag is provided. +Records generated by splits have th +T} +T{ +\f[B]vcfqual2info\f[R] +T}@T{ +Puts QUAL into an info field tag keyed by [key]. +T} +T{ +\f[B]vcfcreatemulti\f[R] +T}@T{ +If overlapping alleles are represented across multiple records, merge +them into a single record. +Currently only for indels. +T} +T{ +\f[B]vcfgeno2alleles\f[R] +T}@T{ +modifies the genotypes field to provide the literal alleles rather than +indexes +T} +T{ +\f[B]vcfsample2info\f[R] +T}@T{ +Take annotations given in the per-sample fields and add the mean, +median, min, or max to the site-level INFO. +T} +T{ +\f[B]vcfld\f[R] +T}@T{ +Compute LD +T} +T{ +\f[B]vcfnumalt\f[R] +T}@T{ +outputs a VCF stream where NUMALT has been generated for each record +using sample genotypes +T} +T{ +\f[B]vcfstreamsort\f[R] +T}@T{ +Sorts the input (either stdin or file) using a streaming sort algorithm. +Guarantees that the positional order is correct provided out-of-order +variants are no more than 100 positions in the VCF file apart. +T} +T{ +\f[B]vcfinfosummarize\f[R] +T}@T{ +Take annotations given in the per-sample fields and add the mean, +median, min, or max to the site-level INFO. +T} +T{ +\f[B]vcflength\f[R] +T}@T{ +Add length info field +T} +T{ +\f[B]vcfkeepgeno\f[R] +T}@T{ +Reduce file size by removing FORMAT fields not listed on the command +line from sample specifications in the output +T} +T{ +\f[B]vcfcombine\f[R] +T}@T{ +Combine VCF files positionally, combining samples when sites and alleles +are identical. +Any number of VCF files may be combined. +The INFO field and other columns are taken from one of the files which +are combined when records in multiple files match. +Alleles must have identical ordering to be combined into one record. +If they do not, multiple records will be emitted. +T} +T{ +\f[B]vcfprimers\f[R] +T}@T{ +For each VCF record, extract the flanking sequences, and write them to +stdout as FASTA records suitable for alignment. +T} +T{ +\f[B]vcfflatten\f[R] +T}@T{ +Removes multi-allelic sites by picking the most common alternate. +Requires allele frequency specification `AF' and use of `G' and `A' to +specify the fields which vary according to the Allele or Genotype. +VCF file may be specified on the command line or piped as stdin. +T} +T{ +\f[B]vcf2dag\f[R] +T}@T{ +Modify VCF to be able to build a directed acyclic graph (DAG) +T} +T{ +\f[B]vcfcleancomplex\f[R] +T}@T{ +Removes reference-matching sequence from complex alleles and adjusts +records to reflect positional change. +T} +T{ +\f[B]vcfbreakmulti\f[R] +T}@T{ +If multiple alleles are specified in a single record, break the record +into multiple lines, preserving allele-specific INFO fields. +T} +T{ +\f[B]vcfindex\f[R] +T}@T{ +Adds an index number to the INFO field (id=position) +T} +T{ +\f[B]vcfkeepinfo\f[R] +T}@T{ +To decrease file size remove INFO fields not listed on the command line +T} +T{ +\f[B]vcfgeno2haplo\f[R] +T}@T{ +Convert genotype-based phased alleles within \[en]window-size into +haplotype alleles. +Will break haplotype construction when encountering non-phased genotypes +on input. +T} +T{ +\f[B]vcfintersect\f[R] +T}@T{ +VCF set analysis +T} +T{ +\f[B]vcfannotate\f[R] +T}@T{ +Intersect the records in the VCF file with targets provided in a BED +file. +Intersections are done on the reference sequences in the VCF file. +If no VCF filename is specified on the command line (last argument) the +VCF read from stdin. +T} +T{ +\f[B]smoother\f[R] +T}@T{ +smoothes is a method for window smoothing many of the GPAT++ formats. +T} +T{ +\f[B]vcf2fasta\f[R] +T}@T{ +Generates sample_seq:N.fa for each sample, reference sequence, and +chromosomal copy N in [0,1\&... ploidy]. +Each sequence in the fasta file is named using the same pattern used for +the file name, allowing them to be combined. +T} +T{ +\f[B]vcfsamplenames\f[R] +T}@T{ +List sample names +T} +T{ +\f[B]vcfleftalign\f[R] +T}@T{ +Left-align indels and complex variants in the input using a pairwise +ref/alt alignment followed by a heuristic, iterative left realignment +process that shifts indel representations to their absolute leftmost +(5\[cq]) extent. +T} +T{ +\f[B]vcfglbound\f[R] +T}@T{ +Adjust GLs so that the maximum GL is 0 by dividing all GLs for each +sample by the max. +T} +T{ +\f[B]vcfcommonsamples\f[R] +T}@T{ +Generates each record in the first file, removing samples not present in +the second +T} +T{ +\f[B]vcfecho\f[R] +T}@T{ +Echo VCF to stdout (simple demo) +T} +T{ +\f[B]vcfkeepsamples\f[R] +T}@T{ +outputs each record in the vcf file, removing samples not listed on the +command line +T} +T{ +\f[B]vcf2tsv\f[R] +T}@T{ +Converts VCF to per-allelle or per-genotype tab-delimited format, using +null string to replace empty values in the table. +Specifying -g will output one line per sample with genotype information. +When there is more than one alt allele there will be multiple rows, one +for each allele and, the info will match the `A' index +T} +T{ +\f[B]vcfoverlay\f[R] +T}@T{ +Overlay records in the input vcf files with order as precedence. +T} +T{ +\f[B]vcfgenosamplenames\f[R] +T}@T{ +Get samplenames +T} +T{ +\f[B]vcfremovesamples\f[R] +T}@T{ +outputs each record in the vcf file, removing samples listed on the +command line +T} +T{ +\f[B]vcfremap\f[R] +T}@T{ +For each alternate allele, attempt to realign against the reference with +lowered gap open penalty. +If realignment is possible, adjust the cigar and reference/alternate +alleles. +Observe how different alignment parameters, including context and +entropy-dependent ones, influence variant classification and +interpretation. +T} +T{ +\f[B]vcffixup\f[R] +T}@T{ +Generates a VCF stream where AC and NS have been generated for each +record using sample genotypes +T} +.TE +.SS statistics +.PP +.TS +tab(@); +lw(40.4n) lw(29.6n). +T{ +statistics command +T}@T{ +description +T} +_ +T{ +\f[B]vcfgenosummarize\f[R] +T}@T{ +Adds summary statistics to each record summarizing qualities reported in +called genotypes. +Uses: RO (reference observation count), QR (quality sum reference +observations) AO (alternate observation count), QA (quality sum +alternate observations) +T} +T{ +\f[B]vcfcountalleles\f[R] +T}@T{ +Count alleles +T} +T{ +\f[B]meltEHH\f[R] +T}@T{ +T} +T{ +\f[B]genotypeSummary\f[R] +T}@T{ +Generates a table of genotype counts. +Summarizes genotype counts for bi-allelic SNVs and indel +T} +T{ +\f[B]vcfrandomsample\f[R] +T}@T{ +Randomly sample sites from an input VCF file, which may be provided as +stdin. +Scale the sampling probability by the field specified in KEY. +This may be used to provide uniform sampling across allele frequencies, +for instance. +T} +T{ +\f[B]pVst\f[R] +T}@T{ +\f[B]pVst\f[R] calculates vst, a measure of CNV stratification. +T} +T{ +\f[B]vcfrandom\f[R] +T}@T{ +Generate a random VCF file +T} +T{ +\f[B]segmentFst\f[R] +T}@T{ +\f[B]segmentFst\f[R] creates genomic segments (bed file) for regions +with high wcFst +T} +T{ +\f[B]sequenceDiversity\f[R] +T}@T{ +The \f[B]sequenceDiversity\f[R] program calculates two popular metrics +of haplotype diversity: pi and extended haplotype homozygoisty (eHH). +Pi is calculated using the Nei and Li 1979 formulation. +eHH a convenient way to think about haplotype diversity. +When eHH = 0 all haplotypes in the window are unique and when eHH = 1 +all haplotypes in the window are identical. +T} +T{ +\f[B]segmentIhs\f[R] +T}@T{ +Creates genomic segments (bed file) for regions with high wcFst +T} +T{ +\f[B]vcfgenotypes\f[R] +T}@T{ +Report the genotypes for each sample, for each variant in the VCF. +Convert the numerical represenation of genotypes provided by the GT +field to a human-readable genotype format. +T} +T{ +\f[B]vcfaltcount\f[R] +T}@T{ +count the number of alternate alleles in all records in the vcf file +T} +T{ +\f[B]plotHaps\f[R] +T}@T{ +\f[B]plotHaps\f[R] provides the formatted output that can be used with +`bin/plotHaplotypes.R'. +T} +T{ +\f[B]vcfsitesummarize\f[R] +T}@T{ +Summarize by site +T} +T{ +\f[B]vcfgenotypecompare\f[R] +T}@T{ +adds statistics to the INFO field of the vcf file describing the amount +of discrepancy between the genotypes (GT) in the vcf file and the +genotypes reported in the . +use this after vcfannotategenotypes to get correspondence statistics for +two vcfs. +T} +T{ +\f[B]vcfstats\f[R] +T}@T{ +Prints statistics about variants in the input VCF file. +T} +T{ +\f[B]wcFst\f[R] +T}@T{ +\f[B]wcFst\f[R] is Weir & Cockerham\[cq]s Fst for two populations. +Negative values are VALID, they are sites which can be treated as zero +Fst. +For more information see Evolution, Vol. +38 N. +6 Nov 1984. +Specifically \f[B]wcFst\f[R] uses equations 1,2,3,4. +T} +T{ +\f[B]permuteSmooth\f[R] +T}@T{ +\f[B]permuteSmooth\f[R] is a method for adding empirical p-values +smoothed wcFst scores. +T} +T{ +\f[B]bFst\f[R] +T}@T{ +\f[B]bFst\f[R] is a Bayesian approach to Fst. +Importantly \f[B]bFst\f[R] accounts for genotype uncertainty in the +model using genotype likelihoods. +For a more detailed description see: \[ga]A Bayesian approach to +inferring population structure from dominant markers\[cq] by Holsinger +et al.\ Molecular Ecology Vol 11, issue 7 2002. +The likelihood function has been modified to use genotype likelihoods +provided by variant callers. +There are five free parameters estimated in the model: each +subpopulation\[cq]s allele frequency and Fis (fixation index, within +each subpopulation), a free parameter for the total population\[cq]s +allele frequency, and Fst. +T} +T{ +\f[B]vcfroc\f[R] +T}@T{ +Generates a pseudo-ROC curve using sensitivity and specificity estimated +against a putative truth set. +Thresholding is provided by successive QUAL cutoffs. +T} +T{ +\f[B]vcfparsealts\f[R] +T}@T{ +Alternate allele parsing method. +This method uses pairwise alignment of REF and ALTs to determine +component allelic primitives for each alternate allele. +T} +T{ +\f[B]pFst\f[R] +T}@T{ +\f[B]pFst\f[R] is a probabilistic approach for detecting differences in +allele frequencies between two populations. +T} +T{ +\f[B]iHS\f[R] +T}@T{ +\f[B]iHS\f[R] calculates the integrated haplotype score which measures +the relative decay of extended haplotype homozygosity (EHH) for the +reference and alternative alleles at a site (see: voight et al.\ 2006, +Spiech & Hernandez 2014). +T} +T{ +\f[B]popStats\f[R] +T}@T{ +General population genetic statistics for each SNP +T} +.TE +.SH SOURCE CODE +.PP +See the source code repository at https://github.com/vcflib/vcflib +.SH LICENSE +.PP +Copyright 2011-2020 (C) Erik Garrison and vcflib contributors. +MIT licensed. +.SH AUTHORS +Erik Garrison and vcflib contributors. diff -Nru libvcflib-1.0.1+dfsg/man/vcfnumalt.1 libvcflib-1.0.2+dfsg/man/vcfnumalt.1 --- libvcflib-1.0.1+dfsg/man/vcfnumalt.1 1970-01-01 00:00:00.000000000 +0000 +++ libvcflib-1.0.2+dfsg/man/vcfnumalt.1 2021-01-28 07:04:12.000000000 +0000 @@ -0,0 +1,31 @@ +.\" Automatically generated by Pandoc 2.7.3 +.\" +.TH "VCFNUMALT" "1" "" "vcfnumalt (vcflib)" "vcfnumalt (VCF transformation)" +.hy +.SH NAME +.PP +\f[B]vcfnumalt\f[R] +.SH SYNOPSIS +.PP +\f[B]vcfnumalt\f[R] +.SH DESCRIPTION +.PP +outputs a VCF stream where NUMALT has been generated for each record +using sample genotypes +.SH EXIT VALUES +.TP +.B \f[B]0\f[R] +Success +.TP +.B \f[B]not 0\f[R] +Failure +.SH SEE ALSO +.PP +\f[B]vcflib\f[R](1) +.SH OTHER +.SH LICENSE +.PP +Copyright 2011-2020 (C) Erik Garrison and vcflib contributors. +MIT licensed. +.SH AUTHORS +Erik Garrison and vcflib contributors. diff -Nru libvcflib-1.0.1+dfsg/man/vcfoverlay.1 libvcflib-1.0.2+dfsg/man/vcfoverlay.1 --- libvcflib-1.0.1+dfsg/man/vcfoverlay.1 1970-01-01 00:00:00.000000000 +0000 +++ libvcflib-1.0.2+dfsg/man/vcfoverlay.1 2021-01-28 07:04:12.000000000 +0000 @@ -0,0 +1,41 @@ +.\" Automatically generated by Pandoc 2.7.3 +.\" +.TH "VCFOVERLAY" "1" "" "vcfoverlay (vcflib)" "vcfoverlay (VCF transformation)" +.hy +.SH NAME +.PP +\f[B]vcfoverlay\f[R] +.SH SYNOPSIS +.PP +\f[B]vcfoverlay\f[R] options [ \&...] +.SH DESCRIPTION +.PP +Overlay records in the input vcf files with order as precedence. +.SH OPTIONS +.IP +.nf +\f[C] + +options: + -h, --help this dialog + -v, --version prints version + +\f[R] +.fi +.SH EXIT VALUES +.TP +.B \f[B]0\f[R] +Success +.TP +.B \f[B]not 0\f[R] +Failure +.SH SEE ALSO +.PP +\f[B]vcflib\f[R](1) +.SH OTHER +.SH LICENSE +.PP +Copyright 2011-2020 (C) Erik Garrison and vcflib contributors. +MIT licensed. +.SH AUTHORS +Erik Garrison and vcflib contributors. diff -Nru libvcflib-1.0.1+dfsg/man/vcfparsealts.1 libvcflib-1.0.2+dfsg/man/vcfparsealts.1 --- libvcflib-1.0.1+dfsg/man/vcfparsealts.1 1970-01-01 00:00:00.000000000 +0000 +++ libvcflib-1.0.2+dfsg/man/vcfparsealts.1 2021-01-28 07:04:12.000000000 +0000 @@ -0,0 +1,68 @@ +.\" Automatically generated by Pandoc 2.7.3 +.\" +.TH "VCFPARSEALTS" "1" "" "vcfparsealts (vcflib)" "vcfparsealts (VCF statistics)" +.hy +.SH NAME +.PP +\f[B]vcfparsealts\f[R] +.SH SYNOPSIS +.PP +\f[B]vcfparsealts\f[R] +.SH DESCRIPTION +.PP +Alternate allele parsing method. +This method uses pairwise alignment of REF and ALTs to determine +component allelic primitives for each alternate allele. +.SH EXAMPLES +.IP +.nf +\f[C] + +Example: + +**vcfparsealts** samples/sample.vcf +##fileformat=VCFv4.0 +(...) +#CHROM POS ID REF ALT QUAL FILTER INFO FORMAT NA00001 NA00002 NA00003 +19 111 . A C 9.6 . . GT:HQ 0|0:10,10 0|0:10,10 0/1:3,3 + ( A :: 111 A -> A; ) ( C :: 111 A -> C; ) +19 112 . A G 10 . . GT:HQ 0|0:10,10 0|0:10,10 0/1:3,3 + ( A :: 112 A -> A; ) ( G :: 112 A -> G; ) +20 14370 rs6054257 G A 29 PASS AF=0.5;DP=14;NS=3;DB;H2 GT:GQ:DP:HQ 0|0:48:1:51,51 1|0:48:8:51,51 1/1:43:5:.,. + ( A :: 14370 G -> A; ) ( G :: 14370 G -> G; ) +20 17330 . T A 3 q10 AF=0.017;DP=11;NS=3 GT:GQ:DP:HQ 0|0:49:3:58,50 0|1:3:5:65,3 0/0:41:3:.,. + ( A :: 17330 T -> A; ) ( T :: 17330 T -> T; ) +20 1110696 rs6040355 A G,T 67 PASS AA=T;AF=0.333,0.667;DP=10;NS=2;DB GT:GQ:DP:HQ 1|2:21:6:23,27 2|1:2:0:18,2 2/2:35:4:.,. + ( A :: 1110696 A -> A; ) ( G :: 1110696 A -> G; ) ( T :: 1110696 A -> T; ) +20 1230237 . T . 47 PASS AA=T;DP=13;NS=3 GT:GQ:DP:HQ 0|0:54:.:56,60 0|0:48:4:51,51 0/0:61:2:.,. + ( . :: 1230237 T -> .; ) ( T :: 1230237 T -> T; ) +20 1234567 microsat1 G GA,GAC 50 PASS AA=G;AC=3,1;AN=6;DP=9;NS=3 GT:GQ:DP 0/1:.:4 0/2:17:2 1/1:40:3 + ( G :: 1234567 G -> G; ) ( GA :: 1234567 G -> G; 1234568 -> A; ) ( GAC :: 1234567 G -> G; 1234568 -> AC; ) +20 1235237 . T . 0 . . GT 0/0 0|0 ./. + ( . :: 1235237 T -> .; ) ( T :: 1235237 T -> T; ) +X 10 rsTest AC A,ATG 10 PASS . GT 0 0/1 0|2 + ( A :: 10 A -> A; 11 C -> ; ) ( AC :: 10 AC -> AC; ) ( ATG :: 10 A -> A; 11 -> T; 11 C -> G; ) + + +Type: statistics + + +\f[R] +.fi +.SH EXIT VALUES +.TP +.B \f[B]0\f[R] +Success +.TP +.B \f[B]not 0\f[R] +Failure +.SH SEE ALSO +.PP +\f[B]vcflib\f[R](1) +.SH OTHER +.SH LICENSE +.PP +Copyright 2011-2020 (C) Erik Garrison and vcflib contributors. +MIT licensed. +.SH AUTHORS +Erik Garrison and vcflib contributors. diff -Nru libvcflib-1.0.1+dfsg/man/vcfprimers.1 libvcflib-1.0.2+dfsg/man/vcfprimers.1 --- libvcflib-1.0.1+dfsg/man/vcfprimers.1 1970-01-01 00:00:00.000000000 +0000 +++ libvcflib-1.0.2+dfsg/man/vcfprimers.1 2021-01-28 07:04:12.000000000 +0000 @@ -0,0 +1,55 @@ +.\" Automatically generated by Pandoc 2.7.3 +.\" +.TH "VCFPRIMERS" "1" "" "vcfprimers (vcflib)" "vcfprimers (VCF transformation)" +.hy +.SH NAME +.PP +\f[B]vcfprimers\f[R] +.SH SYNOPSIS +.PP +\f[B]vcfprimers\f[R] options +.SH DESCRIPTION +.PP +For each VCF record, extract the flanking sequences, and write them to +stdout as FASTA records suitable for alignment. +.SH OPTIONS +.IP +.nf +\f[C] + +options: + -f, --fasta-reference FASTA reference file to use to obtain primer sequences + -l, --primer-length The length of the primer sequences on each side of the variant + +This tool is intended for use in designing validation +experiments. Primers extracted which would flank all of the alleles at multi-allelic +sites. The name of the FASTA \[dq]reads\[dq] indicates the VCF record which they apply to. +The form is >CHROM_POS_LEFT for the 3\[aq] primer and >CHROM_POS_RIGHT for the 5\[aq] primer, +for example: + +>20_233255_LEFT +CCATTGTATATATAGACCATAATTTCTTTATCCAATCATCTGTTGATGGA +>20_233255_RIGHT +ACTCAGTTGATTCCATACCTTTGCCATCATGAATCATGTTGTAATAAACA + + +Type: transformation +\f[R] +.fi +.SH EXIT VALUES +.TP +.B \f[B]0\f[R] +Success +.TP +.B \f[B]not 0\f[R] +Failure +.SH SEE ALSO +.PP +\f[B]vcflib\f[R](1) +.SH OTHER +.SH LICENSE +.PP +Copyright 2011-2020 (C) Erik Garrison and vcflib contributors. +MIT licensed. +.SH AUTHORS +Erik Garrison and vcflib contributors. diff -Nru libvcflib-1.0.1+dfsg/man/vcfqual2info.1 libvcflib-1.0.2+dfsg/man/vcfqual2info.1 --- libvcflib-1.0.1+dfsg/man/vcfqual2info.1 1970-01-01 00:00:00.000000000 +0000 +++ libvcflib-1.0.2+dfsg/man/vcfqual2info.1 2021-01-28 07:04:12.000000000 +0000 @@ -0,0 +1,41 @@ +.\" Automatically generated by Pandoc 2.7.3 +.\" +.TH "VCFQUAL2INFO" "1" "" "vcfqual2info (vcflib)" "vcfqual2info (VCF +transformation)" +.hy +.SH NAME +.PP +\f[B]vcfqual2info\f[R] +.SH SYNOPSIS +.PP +\f[B]vcfqual2info\f[R] [key] [vcf_file] +.SH DESCRIPTION +.PP +Puts QUAL into an info field tag keyed by [key]. +.SH OPTIONS +.IP +.nf +\f[C] + + + +Type: transformation +\f[R] +.fi +.SH EXIT VALUES +.TP +.B \f[B]0\f[R] +Success +.TP +.B \f[B]not 0\f[R] +Failure +.SH SEE ALSO +.PP +\f[B]vcflib\f[R](1) +.SH OTHER +.SH LICENSE +.PP +Copyright 2011-2020 (C) Erik Garrison and vcflib contributors. +MIT licensed. +.SH AUTHORS +Erik Garrison and vcflib contributors. diff -Nru libvcflib-1.0.1+dfsg/man/vcfrandom.1 libvcflib-1.0.2+dfsg/man/vcfrandom.1 --- libvcflib-1.0.1+dfsg/man/vcfrandom.1 1970-01-01 00:00:00.000000000 +0000 +++ libvcflib-1.0.2+dfsg/man/vcfrandom.1 2021-01-28 07:04:12.000000000 +0000 @@ -0,0 +1,68 @@ +.\" Automatically generated by Pandoc 2.7.3 +.\" +.TH "VCFRANDOM" "1" "" "vcfrandom (vcflib)" "vcfrandom (VCF statistics)" +.hy +.SH NAME +.PP +\f[B]vcfrandom\f[R] +.SH SYNOPSIS +.PP +\f[B]vcfrandom\f[R] +.SH DESCRIPTION +.PP +Generate a random VCF file +.SH EXAMPLES +.IP +.nf +\f[C] + +Example: + + **vcfrandom** + +##fileformat=VCFv4.0 +##source=**vcfrandom** +##reference=/d2/data/references/build_37/human_reference_v37.fa +##phasing=none +##INFO= +##INFO= +##INFO= +##INFO= +##INFO= +##FORMAT= +##FORMAT= +##FORMAT= +#CHROM POS ID REF ALT QUAL FILTER INFO FORMAT bill +one 1 . G G,A 100 . DP=83 GT:DP 0/1:1 +one 2 . G G,A 100 . DP=3 GT:DP 0/1:49 +one 3 . G C,T 100 . DP=5 GT:DP 0/1:12 +one 4 . C G,T 100 . DP=51 GT:DP 0/1:60 +one 5 . A T,A 100 . DP=31 GT:DP 0/1:89 +one 6 . T T,A 100 . DP=56 GT:DP 0/1:60 +one 7 . T A,C 100 . DP=78 GT:DP 0/1:75 +one 8 . T G,A 100 . DP=73 GT:DP 0/1:78 +one 9 . C C,G 100 . DP=42 GT:DP 0/1:67 + + +Type: statistics + + +\f[R] +.fi +.SH EXIT VALUES +.TP +.B \f[B]0\f[R] +Success +.TP +.B \f[B]not 0\f[R] +Failure +.SH SEE ALSO +.PP +\f[B]vcflib\f[R](1) +.SH OTHER +.SH LICENSE +.PP +Copyright 2011-2020 (C) Erik Garrison and vcflib contributors. +MIT licensed. +.SH AUTHORS +Erik Garrison and vcflib contributors. diff -Nru libvcflib-1.0.1+dfsg/man/vcfrandomsample.1 libvcflib-1.0.2+dfsg/man/vcfrandomsample.1 --- libvcflib-1.0.1+dfsg/man/vcfrandomsample.1 1970-01-01 00:00:00.000000000 +0000 +++ libvcflib-1.0.2+dfsg/man/vcfrandomsample.1 2021-01-28 07:04:12.000000000 +0000 @@ -0,0 +1,48 @@ +.\" Automatically generated by Pandoc 2.7.3 +.\" +.TH "VCFRANDOMSAMPLE" "1" "" "vcfrandomsample (vcflib)" "vcfrandomsample (VCF +statistics)" +.hy +.SH NAME +.PP +\f[B]vcfrandomsample\f[R] +.SH SYNOPSIS +.PP +\f[B]vcfrandomsample\f[R] options [] +.SH DESCRIPTION +.PP +Randomly sample sites from an input VCF file, which may be provided as +stdin. +Scale the sampling probability by the field specified in KEY. +This may be used to provide uniform sampling across allele frequencies, +for instance. +.SH OPTIONS +.IP +.nf +\f[C] + +options: + -r, --rate RATE base sampling probability per locus + -s, --scale-by KEY scale sampling likelihood by this Float info field + -p, --random-seed N use this random seed (by default read from /dev/random) + -q, --pseudorandom-seed use a pseudorandom seed (by default read from /dev/random) + +\f[R] +.fi +.SH EXIT VALUES +.TP +.B \f[B]0\f[R] +Success +.TP +.B \f[B]not 0\f[R] +Failure +.SH SEE ALSO +.PP +\f[B]vcflib\f[R](1) +.SH OTHER +.SH LICENSE +.PP +Copyright 2011-2020 (C) Erik Garrison and vcflib contributors. +MIT licensed. +.SH AUTHORS +Erik Garrison and vcflib contributors. diff -Nru libvcflib-1.0.1+dfsg/man/vcfremap.1 libvcflib-1.0.2+dfsg/man/vcfremap.1 --- libvcflib-1.0.1+dfsg/man/vcfremap.1 1970-01-01 00:00:00.000000000 +0000 +++ libvcflib-1.0.2+dfsg/man/vcfremap.1 2021-01-28 07:04:12.000000000 +0000 @@ -0,0 +1,55 @@ +.\" Automatically generated by Pandoc 2.7.3 +.\" +.TH "VCFREMAP" "1" "" "vcfremap (vcflib)" "vcfremap (VCF transformation)" +.hy +.SH NAME +.PP +\f[B]vcfremap\f[R] +.SH SYNOPSIS +.PP +\f[B]vcfremap\f[R] options [] +.SH DESCRIPTION +.PP +For each alternate allele, attempt to realign against the reference with +lowered gap open penalty. +If realignment is possible, adjust the cigar and reference/alternate +alleles. +Observe how different alignment parameters, including context and +entropy-dependent ones, influence variant classification and +interpretation. +.SH OPTIONS +.IP +.nf +\f[C] + +options: + -w, --ref-window-size N align using this many bases flanking each side of the reference allele + -s, --alt-window-size N align using this many flanking bases from the reference around each alternate allele + -r, --reference FILE FASTA reference file, required with -i and -u + -m, --match-score N match score for SW algorithm + -x, --mismatch-score N mismatch score for SW algorithm + -o, --gap-open-penalty N gap open penalty for SW algorithm + -e, --gap-extend-penalty N gap extension penalty for SW algorithm + -z, --entropy-gap-open use entropy scaling for the gap open penalty + -R, --repeat-gap-extend N penalize non-repeat-unit gaps in repeat sequence + -a, --adjust-vcf TAG supply a new cigar as TAG in the output VCF + +\f[R] +.fi +.SH EXIT VALUES +.TP +.B \f[B]0\f[R] +Success +.TP +.B \f[B]not 0\f[R] +Failure +.SH SEE ALSO +.PP +\f[B]vcflib\f[R](1) +.SH OTHER +.SH LICENSE +.PP +Copyright 2011-2020 (C) Erik Garrison and vcflib contributors. +MIT licensed. +.SH AUTHORS +Erik Garrison and vcflib contributors. diff -Nru libvcflib-1.0.1+dfsg/man/vcfremoveaberrantgenotypes.1 libvcflib-1.0.2+dfsg/man/vcfremoveaberrantgenotypes.1 --- libvcflib-1.0.1+dfsg/man/vcfremoveaberrantgenotypes.1 1970-01-01 00:00:00.000000000 +0000 +++ libvcflib-1.0.2+dfsg/man/vcfremoveaberrantgenotypes.1 2021-01-28 07:04:12.000000000 +0000 @@ -0,0 +1,42 @@ +.\" Automatically generated by Pandoc 2.7.3 +.\" +.TH "VCFREMOVEABERRANTGENOTYPES" "1" "" "vcfremoveaberrantgenotypes (vcflib)" "vcfremoveaberrantgenotypes (VCF transformation)" +.hy +.SH NAME +.PP +\f[B]vcfremoveaberrantgenotypes\f[R] +.SH SYNOPSIS +.PP +\f[B]vcfremoveaberrantgenotypes\f[R] +.SH DESCRIPTION +.PP +strips samples which are homozygous but have observations implying +heterozygosity. +Remove samples for which the reported genotype (GT) and observation +counts disagree (AO, RO). +.SH OPTIONS +.IP +.nf +\f[C] + + +Type: transformation +\f[R] +.fi +.SH EXIT VALUES +.TP +.B \f[B]0\f[R] +Success +.TP +.B \f[B]not 0\f[R] +Failure +.SH SEE ALSO +.PP +\f[B]vcflib\f[R](1) +.SH OTHER +.SH LICENSE +.PP +Copyright 2011-2020 (C) Erik Garrison and vcflib contributors. +MIT licensed. +.SH AUTHORS +Erik Garrison and vcflib contributors. diff -Nru libvcflib-1.0.1+dfsg/man/vcfremovesamples.1 libvcflib-1.0.2+dfsg/man/vcfremovesamples.1 --- libvcflib-1.0.1+dfsg/man/vcfremovesamples.1 1970-01-01 00:00:00.000000000 +0000 +++ libvcflib-1.0.2+dfsg/man/vcfremovesamples.1 2021-01-28 07:04:12.000000000 +0000 @@ -0,0 +1,32 @@ +.\" Automatically generated by Pandoc 2.7.3 +.\" +.TH "VCFREMOVESAMPLES" "1" "" "vcfremovesamples (vcflib)" "vcfremovesamples (VCF +transformation)" +.hy +.SH NAME +.PP +\f[B]vcfremovesamples\f[R] +.SH SYNOPSIS +.PP +\f[B]vcfremovesamples\f[R] [SAMPLE1] [SAMPLE2] \&... +.SH DESCRIPTION +.PP +outputs each record in the vcf file, removing samples listed on the +command line +.SH EXIT VALUES +.TP +.B \f[B]0\f[R] +Success +.TP +.B \f[B]not 0\f[R] +Failure +.SH SEE ALSO +.PP +\f[B]vcflib\f[R](1) +.SH OTHER +.SH LICENSE +.PP +Copyright 2011-2020 (C) Erik Garrison and vcflib contributors. +MIT licensed. +.SH AUTHORS +Erik Garrison and vcflib contributors. diff -Nru libvcflib-1.0.1+dfsg/man/vcfroc.1 libvcflib-1.0.2+dfsg/man/vcfroc.1 --- libvcflib-1.0.1+dfsg/man/vcfroc.1 1970-01-01 00:00:00.000000000 +0000 +++ libvcflib-1.0.2+dfsg/man/vcfroc.1 2021-01-28 07:04:12.000000000 +0000 @@ -0,0 +1,45 @@ +.\" Automatically generated by Pandoc 2.7.3 +.\" +.TH "VCFROC" "1" "" "vcfroc (vcflib)" "vcfroc (VCF statistics)" +.hy +.SH NAME +.PP +\f[B]vcfroc\f[R] +.SH SYNOPSIS +.PP +\f[B]vcfroc\f[R] options [] +.SH DESCRIPTION +.PP +Generates a pseudo-ROC curve using sensitivity and specificity estimated +against a putative truth set. +Thresholding is provided by successive QUAL cutoffs. +.SH OPTIONS +.IP +.nf +\f[C] + +options: + -t, --truth-vcf FILE use this VCF as ground truth for ROC generation + -w, --window-size N compare records up to this many bp away (default 30) + -c, --complex directly compare complex alleles, don\[aq]t parse into primitives + -r, --reference FILE FASTA reference file + +\f[R] +.fi +.SH EXIT VALUES +.TP +.B \f[B]0\f[R] +Success +.TP +.B \f[B]not 0\f[R] +Failure +.SH SEE ALSO +.PP +\f[B]vcflib\f[R](1) +.SH OTHER +.SH LICENSE +.PP +Copyright 2011-2020 (C) Erik Garrison and vcflib contributors. +MIT licensed. +.SH AUTHORS +Erik Garrison and vcflib contributors. diff -Nru libvcflib-1.0.1+dfsg/man/vcfsample2info.1 libvcflib-1.0.2+dfsg/man/vcfsample2info.1 --- libvcflib-1.0.1+dfsg/man/vcfsample2info.1 1970-01-01 00:00:00.000000000 +0000 +++ libvcflib-1.0.2+dfsg/man/vcfsample2info.1 2021-01-28 07:04:12.000000000 +0000 @@ -0,0 +1,47 @@ +.\" Automatically generated by Pandoc 2.7.3 +.\" +.TH "VCFSAMPLE2INFO" "1" "" "vcfsample2info (vcflib)" "vcfsample2info (VCF +transformation)" +.hy +.SH NAME +.PP +\f[B]vcfsample2info\f[R] +.SH SYNOPSIS +.PP +\f[B]vcfsample2info\f[R] options +.SH DESCRIPTION +.PP +Take annotations given in the per-sample fields and add the mean, +median, min, or max to the site-level INFO. +.SH OPTIONS +.IP +.nf +\f[C] + +options: + -f, --field Add information about this field in samples to INFO column + -i, --info Store the computed statistic in this info field + -a, --average Take the mean of samples for field (default) + -m, --median Use the median + -n, --min Use the min + -x, --max Use the max + +\f[R] +.fi +.SH EXIT VALUES +.TP +.B \f[B]0\f[R] +Success +.TP +.B \f[B]not 0\f[R] +Failure +.SH SEE ALSO +.PP +\f[B]vcflib\f[R](1) +.SH OTHER +.SH LICENSE +.PP +Copyright 2011-2020 (C) Erik Garrison and vcflib contributors. +MIT licensed. +.SH AUTHORS +Erik Garrison and vcflib contributors. diff -Nru libvcflib-1.0.1+dfsg/man/vcfsamplediff.1 libvcflib-1.0.2+dfsg/man/vcfsamplediff.1 --- libvcflib-1.0.1+dfsg/man/vcfsamplediff.1 1970-01-01 00:00:00.000000000 +0000 +++ libvcflib-1.0.2+dfsg/man/vcfsamplediff.1 2021-01-28 07:04:12.000000000 +0000 @@ -0,0 +1,49 @@ +.\" Automatically generated by Pandoc 2.7.3 +.\" +.TH "VCFSAMPLEDIFF" "1" "" "vcfsamplediff (vcflib)" "vcfsamplediff (VCF +transformation)" +.hy +.SH NAME +.PP +\f[B]vcfsamplediff\f[R] +.SH SYNOPSIS +.PP +\f[B]vcfsamplediff\f[R] options [ \&... ] +.SH DESCRIPTION +.PP +Establish putative somatic variants using reported differences between +germline and somatic samples. +Tags each record where the listed sample genotypes differ with . +The first sample is assumed to be germline, the second somatic. +Each record is tagged with ={germline,somatic,loh} to specify the type +of variant given the genotype difference between the two samples. +.SH OPTIONS +.IP +.nf +\f[C] + + +options: + -s --strict Require that no observations in the germline support the somatic alternate. + + +Type: transformation +\f[R] +.fi +.SH EXIT VALUES +.TP +.B \f[B]0\f[R] +Success +.TP +.B \f[B]not 0\f[R] +Failure +.SH SEE ALSO +.PP +\f[B]vcflib\f[R](1) +.SH OTHER +.SH LICENSE +.PP +Copyright 2011-2020 (C) Erik Garrison and vcflib contributors. +MIT licensed. +.SH AUTHORS +Erik Garrison and vcflib contributors. diff -Nru libvcflib-1.0.1+dfsg/man/vcfsamplenames.1 libvcflib-1.0.2+dfsg/man/vcfsamplenames.1 --- libvcflib-1.0.1+dfsg/man/vcfsamplenames.1 1970-01-01 00:00:00.000000000 +0000 +++ libvcflib-1.0.2+dfsg/man/vcfsamplenames.1 2021-01-28 07:04:12.000000000 +0000 @@ -0,0 +1,42 @@ +.\" Automatically generated by Pandoc 2.7.3 +.\" +.TH "VCFSAMPLENAMES" "1" "" "vcfsamplenames (vcflib)" "vcfsamplenames (VCF +transformation)" +.hy +.SH NAME +.PP +\f[B]vcfsamplenames\f[R] +.SH SYNOPSIS +.PP +\f[B]vcfsamplenames\f[R] +.SH DESCRIPTION +.PP +List sample names +.SH OPTIONS +.IP +.nf +\f[C] + + +Type: transformation + + +\f[R] +.fi +.SH EXIT VALUES +.TP +.B \f[B]0\f[R] +Success +.TP +.B \f[B]not 0\f[R] +Failure +.SH SEE ALSO +.PP +\f[B]vcflib\f[R](1) +.SH OTHER +.SH LICENSE +.PP +Copyright 2011-2020 (C) Erik Garrison and vcflib contributors. +MIT licensed. +.SH AUTHORS +Erik Garrison and vcflib contributors. diff -Nru libvcflib-1.0.1+dfsg/man/vcfsitesummarize.1 libvcflib-1.0.2+dfsg/man/vcfsitesummarize.1 --- libvcflib-1.0.1+dfsg/man/vcfsitesummarize.1 1970-01-01 00:00:00.000000000 +0000 +++ libvcflib-1.0.2+dfsg/man/vcfsitesummarize.1 2021-01-28 07:04:12.000000000 +0000 @@ -0,0 +1,57 @@ +.\" Automatically generated by Pandoc 2.7.3 +.\" +.TH "VCFSITESUMMARIZE" "1" "" "vcfsitesummarize (vcflib)" "vcfsitesummarize (VCF +statistics)" +.hy +.SH NAME +.PP +\f[B]vcfsitesummarize\f[R] +.SH SYNOPSIS +.PP +\f[B]vcfsitesummarize\f[R] +.SH DESCRIPTION +.PP +Summarize by site +.SH EXAMPLES +.IP +.nf +\f[C] + +Example: + +**vcfsitesummarize** samples/sample.vcf + +CHROM POS ID REF QUAL FILTER AA AC AF AN DP NS DB H2 +19 111 . A 9.6 . 0 0 +19 112 . A 10 . 0 0 +20 14370 rs6054257 G 29 PASS 0.5 14 3 1 1 +20 17330 . T 3 q10 0.017 11 3 0 0 +20 1110696 rs6040355 A 67 PASS T 10 2 1 0 +20 1230237 . T 47 PASS T 13 3 0 0 +20 1234567 microsat1 G 50 PASS G 6 9 3 0 0 +20 1235237 . T 0 . 0 0 +X 10 rsTest AC 10 PASS + + +Type: statistics + + +\f[R] +.fi +.SH EXIT VALUES +.TP +.B \f[B]0\f[R] +Success +.TP +.B \f[B]not 0\f[R] +Failure +.SH SEE ALSO +.PP +\f[B]vcflib\f[R](1) +.SH OTHER +.SH LICENSE +.PP +Copyright 2011-2020 (C) Erik Garrison and vcflib contributors. +MIT licensed. +.SH AUTHORS +Erik Garrison and vcflib contributors. diff -Nru libvcflib-1.0.1+dfsg/man/vcfstats.1 libvcflib-1.0.2+dfsg/man/vcfstats.1 --- libvcflib-1.0.1+dfsg/man/vcfstats.1 1970-01-01 00:00:00.000000000 +0000 +++ libvcflib-1.0.2+dfsg/man/vcfstats.1 2021-01-28 07:04:12.000000000 +0000 @@ -0,0 +1,52 @@ +.\" Automatically generated by Pandoc 2.7.3 +.\" +.TH "VCFSTATS" "1" "" "vcfstats (vcflib)" "vcfstats (VCF statistics)" +.hy +.SH NAME +.PP +\f[B]vcfstats\f[R] +.SH SYNOPSIS +.PP +\f[B]vcfstats\f[R] options +.SH DESCRIPTION +.PP +Prints statistics about variants in the input VCF file. +.SH OPTIONS +.IP +.nf +\f[C] + + + -r, --region specify a region on which to target the stats, requires a BGZF + compressed file which has been indexed with tabix. any number of + regions may be specified. + -a, --add-info add the statistics intermediate information to the VCF file, + writing out VCF records instead of summary statistics + -t, --add-type only add the type= field to the info (faster than -a) + -l, --no-length-frequency don\[aq]t out the indel and mnp length-frequency spectra + -m, --match-score N match score for SW algorithm + -x, --mismatch-score N mismatch score for SW algorithm + -o, --gap-open-penalty N gap open penalty for SW algorithm + -e, --gap-extend-penalty N gap extension penalty for SW algorithm + + +Type: statistics +\f[R] +.fi +.SH EXIT VALUES +.TP +.B \f[B]0\f[R] +Success +.TP +.B \f[B]not 0\f[R] +Failure +.SH SEE ALSO +.PP +\f[B]vcflib\f[R](1) +.SH OTHER +.SH LICENSE +.PP +Copyright 2011-2020 (C) Erik Garrison and vcflib contributors. +MIT licensed. +.SH AUTHORS +Erik Garrison and vcflib contributors. diff -Nru libvcflib-1.0.1+dfsg/man/vcfstreamsort.1 libvcflib-1.0.2+dfsg/man/vcfstreamsort.1 --- libvcflib-1.0.1+dfsg/man/vcfstreamsort.1 1970-01-01 00:00:00.000000000 +0000 +++ libvcflib-1.0.2+dfsg/man/vcfstreamsort.1 2021-01-28 07:04:12.000000000 +0000 @@ -0,0 +1,47 @@ +.\" Automatically generated by Pandoc 2.7.3 +.\" +.TH "VCFSTREAMSORT" "1" "" "vcfstreamsort (vcflib)" "vcfstreamsort (VCF +transformation)" +.hy +.SH NAME +.PP +\f[B]vcfstreamsort\f[R] +.SH SYNOPSIS +.PP +\f[B]vcfstreamsort\f[R] options [vcf file] +.SH DESCRIPTION +.PP +Sorts the input (either stdin or file) using a streaming sort algorithm. +Guarantees that the positional order is correct provided out-of-order +variants are no more than 100 positions in the VCF file apart. +.SH OPTIONS +.IP +.nf +\f[C] + +options: + + -h, --help this dialog + -w, --window N number of sites to sort (default 10000) + -a, --all load all sites and then sort in memory + +Type: transformation +\f[R] +.fi +.SH EXIT VALUES +.TP +.B \f[B]0\f[R] +Success +.TP +.B \f[B]not 0\f[R] +Failure +.SH SEE ALSO +.PP +\f[B]vcflib\f[R](1) +.SH OTHER +.SH LICENSE +.PP +Copyright 2011-2020 (C) Erik Garrison and vcflib contributors. +MIT licensed. +.SH AUTHORS +Erik Garrison and vcflib contributors. diff -Nru libvcflib-1.0.1+dfsg/man/vcfuniq.1 libvcflib-1.0.2+dfsg/man/vcfuniq.1 --- libvcflib-1.0.1+dfsg/man/vcfuniq.1 1970-01-01 00:00:00.000000000 +0000 +++ libvcflib-1.0.2+dfsg/man/vcfuniq.1 2021-01-28 07:04:12.000000000 +0000 @@ -0,0 +1,44 @@ +.\" Automatically generated by Pandoc 2.7.3 +.\" +.TH "VCFUNIQ" "1" "" "vcfuniq (vcflib)" "vcfuniq (VCF filter)" +.hy +.SH NAME +.PP +\f[B]vcfuniq\f[R] +.SH SYNOPSIS +.PP +\f[B]vcfuniq\f[R] +.SH DESCRIPTION +.PP +List unique genotypes. +Like GNU uniq, but for VCF records. +Remove records which have the same positon, ref, and alt as the previous +record. +.SH OPTIONS +.IP +.nf +\f[C] + + +Type: filter + + +\f[R] +.fi +.SH EXIT VALUES +.TP +.B \f[B]0\f[R] +Success +.TP +.B \f[B]not 0\f[R] +Failure +.SH SEE ALSO +.PP +\f[B]vcflib\f[R](1) +.SH OTHER +.SH LICENSE +.PP +Copyright 2011-2020 (C) Erik Garrison and vcflib contributors. +MIT licensed. +.SH AUTHORS +Erik Garrison and vcflib contributors. diff -Nru libvcflib-1.0.1+dfsg/man/vcfuniqalleles.1 libvcflib-1.0.2+dfsg/man/vcfuniqalleles.1 --- libvcflib-1.0.1+dfsg/man/vcfuniqalleles.1 1970-01-01 00:00:00.000000000 +0000 +++ libvcflib-1.0.2+dfsg/man/vcfuniqalleles.1 2021-01-28 07:04:12.000000000 +0000 @@ -0,0 +1,42 @@ +.\" Automatically generated by Pandoc 2.7.3 +.\" +.TH "VCFUNIQALLELES" "1" "" "vcfuniqalleles (vcflib)" "vcfuniqalleles (VCF filter)" +.hy +.SH NAME +.PP +\f[B]vcfuniqalleles\f[R] +.SH SYNOPSIS +.PP +\f[B]vcfuniqalleles\f[R] +.SH DESCRIPTION +.PP +List unique alleles For each record, remove any duplicate alternate +alleles that may have resulted from merging separate VCF files. +.SH OPTIONS +.IP +.nf +\f[C] + + +Type: filter + + +\f[R] +.fi +.SH EXIT VALUES +.TP +.B \f[B]0\f[R] +Success +.TP +.B \f[B]not 0\f[R] +Failure +.SH SEE ALSO +.PP +\f[B]vcflib\f[R](1) +.SH OTHER +.SH LICENSE +.PP +Copyright 2011-2020 (C) Erik Garrison and vcflib contributors. +MIT licensed. +.SH AUTHORS +Erik Garrison and vcflib contributors. diff -Nru libvcflib-1.0.1+dfsg/man/wcFst.1 libvcflib-1.0.2+dfsg/man/wcFst.1 --- libvcflib-1.0.1+dfsg/man/wcFst.1 1970-01-01 00:00:00.000000000 +0000 +++ libvcflib-1.0.2+dfsg/man/wcFst.1 2021-01-28 07:04:12.000000000 +0000 @@ -0,0 +1,61 @@ +.\" Automatically generated by Pandoc 2.7.3 +.\" +.TH "WCFST" "1" "" "wcFst (vcflib)" "wcFst (VCF statistics)" +.hy +.SH NAME +.PP +\f[B]wcFst\f[R] +.SH SYNOPSIS +.PP +\f[B]wcFst\f[R] \[en]target 0,1,2,3,4,5,6,7 \[en]background +11,12,13,16,17,19,22 \[en]file my.vcf \[en]deltaaf 0.1 \[en]type PL +.SH DESCRIPTION +.PP +\f[B]wcFst\f[R] is Weir & Cockerham\[cq]s Fst for two populations. +Negative values are VALID, they are sites which can be treated as zero +Fst. +For more information see Evolution, Vol. +38 N. +6 Nov 1984. +Specifically \f[B]wcFst\f[R] uses equations 1,2,3,4. +.SH OPTIONS +.IP +.nf +\f[C] + + +Output : 3 columns : + 1. seqid + 2. position + 3. target allele frequency + 4. background allele frequency + 5. **wcFst** + +required: t,target -- argument: a zero based comma separated list of target individuals corrisponding to VCF columns +required: b,background -- argument: a zero based comma separated list of background individuals corrisponding to VCF columns +required: f,file -- argument: proper formatted VCF +required, y,type -- argument: genotype likelihood format; genotype : GT,GL,PL,GP +optional: r,region -- argument: a tabix compliant genomic range: seqid or seqid:start-end +optional: d,deltaaf -- argument: skip sites where the difference in allele frequencies is less than deltaaf, default is zero + +Type: statistics + +\f[R] +.fi +.SH EXIT VALUES +.TP +.B \f[B]0\f[R] +Success +.TP +.B \f[B]not 0\f[R] +Failure +.SH SEE ALSO +.PP +\f[B]vcflib\f[R](1) +.SH OTHER +.SH LICENSE +.PP +Copyright 2011-2020 (C) Erik Garrison and vcflib contributors. +MIT licensed. +.SH AUTHORS +Erik Garrison and vcflib contributors. diff -Nru libvcflib-1.0.1+dfsg/README.md libvcflib-1.0.2+dfsg/README.md --- libvcflib-1.0.1+dfsg/README.md 2019-10-01 07:06:01.000000000 +0000 +++ libvcflib-1.0.2+dfsg/README.md 2021-01-28 07:04:12.000000000 +0000 @@ -1,923 +1,354 @@ -# vcflib -### A C++ library for parsing and manipulating VCF files. - -#### author: Erik Garrison +# vcflib -#### license: MIT +### A C++ library for parsing and manipulating VCF files. -[![Build Status](https://travis-ci.org/vcflib/vcflib.svg?branch=master)](https://travis-ci.org/vcflib/vcflib) +![Github-CI](https://github.com/vcflib/vcflib/workflows/CI/badge.svg) [![Travis-CI](https://travis-ci.org/vcflib/vcflib.svg?branch=master)](https://travis-ci.org/vcflib/vcflib) [![AnacondaBadge](https://anaconda.org/bioconda/vcflib/badges/installer/conda.svg)](https://anaconda.org/bioconda/vcflib) [![DL](https://anaconda.org/bioconda/vcflib/badges/downloads.svg)](https://anaconda.org/bioconda/vcflib) [![BrewBadge](https://img.shields.io/badge/%F0%9F%8D%BAbrew-vcflib-brightgreen.svg)](https://github.com/brewsci/homebrew-bio) [![GuixBadge](https://img.shields.io/badge/gnuguix-vcflib-brightgreen.svg)](https://www.gnu.org/software/guix/packages/V/) [![DebianBadge](https://badges.debian.net/badges/debian/testing/libvcflib-dev/version.svg)](https://packages.debian.org/testing/libvcflib-dev) [![C++0x](https://img.shields.io/badge/Language-C++0x-steelblue.svg)](https://www.cprogramming.com/c++11/what-is-c++0x.html) [![Gitter](https://badges.gitter.im/ekg/vcflib.svg)](https://gitter.im/ekg/vcflib?utm_source=badge&utm_medium=badge&utm_campaign=pr-badge&utm_content=badge) ## overview -The [Variant Call Format (VCF)](http://www.1000genomes.org/wiki/Analysis/Variant%20Call%20Format/vcf-variant-call-format-version-41) -is a flat-file, tab-delimited textual format -intended to concisely describe reference-indexed variations between individuals. -VCF provides a common interchange format for the description of variation in individuals and populations of samples, -and has become the _defacto_ standard reporting format for a wide array of genomic variant detectors. +The [Variant Call Format +(VCF)](http://www.1000genomes.org/wiki/Analysis/Variant%20Call%20Format/vcf-variant-call-format-version-41) +is a flat-file, tab-delimited textual format that +describes reference-indexed variations between individuals. VCF +provides a common interchange format for the description of variation +in individuals and populations of samples, and has become the +*de facto* standard reporting format for a wide array of genomic +variant detectors. -vcflib provides methods to manipulate and interpret sequence variation as it can be described by VCF. -It is both: +vcflib provides methods to manipulate and interpret sequence variation +described by VCF. It is both: - * an API for parsing and operating on records of genomic variation as it can be described by the VCF format, - * and a collection of command-line utilities for executing complex manipulations on VCF files. + * an API for parsing and operating on records of genomic variation as + it can be described by the VCF format + * a collection of command-line utilities for executing complex + manipulations on VCF files -The API itself provides a quick and extremely permissive method to read and write VCF files. -Extensions and applications of the library provided in the included utilities (*.cpp) comprise the vast bulk of the library's utility for most users. +vclib is both a library (with an API) and a collection of useful +tools. The API provides a quick and extremely permissive method to +read and write VCF files. Extensions and applications of the library +provided in the included utilities (*.cpp) comprise the vast bulk of +the library's utility. -## installation +--- -### [bioconda](https://bioconda.github.io/user/install.html) -``` -conda install -c conda-forge -c bioconda -c defaults vcflib -``` +Short index: -### [homebrew](https://brew.sh) -``` -brew install brewsci/bio/vcflib -``` +- [Install](#INSTALL) +- [Usage](#USAGE) +- [TOOLS](#TOOLS) + * [Filter](#filter) + * [Metrics](#metrics) + * [Phenotype](#phenotype) + * [Genotype](#genotype) + * [Transformation](#transformation) + * [Statistics](#statistics) + * [Scripts](#scripts) +- [Link library](#link-library) +- [Build from source](#build-from-source) +- [Development](#Development) +- [LICENSE](#LICENSE) -### source -``` -git clone --recursive https://github.com/ekg/vcflib.git -cd vcflib -make -j -``` -Executables are built into the `./bin` directory in the repository. -A number of shell, perl, python3, and R scripts already reside there. -This makes installation easy, as users can add vcflib/bin -to their path, or copy the contained executables to a directory already in their path. - -## usage - -vcflib provides a variety of functions for VCF manipulation: - -### comparison - - * Generate **haplotype-aware intersections** ([vcfintersect](#vcfintersect) -i), **unions** (vcfintersect -u), and **complements** (vcfintersect -v -i). - * **Overlay-merge** multiple VCF files together, using provided order as precedence ([vcfoverlay](#vcfoverlay)). - * **Combine** multiple VCF files together, handling samples when alternate allele descriptions are identical ([vcfcombine](#vcfcombine)). - * **Validate** the integrity and identity of the VCF by verifying that the VCF record's REF matches a given reference file ([vcfcheck](#vcfcheck)). - -### format conversion - - * Convert a VCF file into a per-allele or per-genotype **tab-separated (.tsv)** file ([vcf2tsv](#vcf2tsv)). - * Store a VCF file in an **SQLite3** database (vcf2sqlite.py). - * Make a **BED file** from the intervals in a VCF file (vcf2bed.py). - -### filtering and subsetting - - * **Filter** variants and genotypes using arbitrary expressions based on values in the INFO and sample fields ([vcffilter](#vcffilter)). - * **Randomly sample** a subset of records from a VCF file, given a rate ([vcfrandomsample](#vcfrandomsample)). - * **Select variants** of a certain type (vcfsnps, vcfbiallelic, vcfindels, vcfcomplex, etc.) - -### annotation - - * **Annotate** one VCF file with fields from the INFO column of another, based on position ([vcfaddinfo](#vcfaddinfo), [vcfintersect](#vcfintersect)). - * Incorporate annotations or targets provided by a *BED* file ([vcfannotate](#vcfannotate), [vcfintersect](#vcfintersect)). - * Examine **genotype correspondence** between two VCF files by annotating samples in one file with genotypes from another ([vcfannotategenotypes](#vcfannotategenotypes)). - * Annotate variants with the **distance** to the nearest variant ([vcfdistance](#vcfdistance)). - * Count the number of alternate alleles represented in samples at each variant record ([vcfaltcount](#vcfaltcount)). - * **Subset INFO fields** to decrease file size and processing time ([vcfkeepinfo](#vcfkeepinfo)). - * Lighten up VCF files by keeping only a **subset of per-sample information** ([vcfkeepgeno](#vcfkeepgeno)). - * **Numerically index** alleles in a VCF file ([vcfindex](#vcfindex)). - -### samples - - * Quickly obtain the **list of samples** in a given VCF file ([vcfsamplenames](#vcfsamplenames)). - * **Remove samples** from a VCF file ([vcfkeepsamples](#vcfkeepsamples), [vcfremovesamples](#vcfremovesamples)). - -### ordering - - * **Sort variants** by genome coordinate ([vcfstreamsort](#vcfstreamsort)). - * **Remove duplicate** variants in vcfstreamsort'ed files according to their REF and ALT fields ([vcfuniq](#vcfuniq)). - -### variant representation - - * **Break multiallelic** records into multiple records ([vcfbreakmulti](#vcfbreakmulti)), retaining allele-specific INFO fields. - * **Combine overlapping biallelic** records into a single record ([vcfcreatemulti](#vcfcreatemulti)). - * **Decompose complex variants** into a canonical SNP and indel representation ([vcfallelicprimitives](#vcfallelicprimitives)), generating phased genotypes for available samples. - * **Reconstitute complex variants** provided a phased VCF with samples ([vcfgeno2haplo](#vcfgeno2haplo)). - * **Left-align indel and complex variants** ([vcfleftalign](#vcfleftalign)). - -### genotype manipulation - - * **Set genotypes** in a VCF file provided genotype likelihoods in the GL field ([vcfglxgt](#vcfglxgt)). - * Establish putative **somatic variants** using reported differences between germline and somatic samples ([vcfsamplediff](#vcfsamplediff)). - * Remove samples for which the reported genotype (GT) and observation counts disagree (AO, RO) ([vcfremoveaberrantgenotypes](#vcfremoveaberrantgenotypes)). - -### interpretation and classification of variants - - * Obtain aggregate **statistics** about VCF files ([vcfstats](#vcfstats)). - * Print the **receiver-operating characteristic (ROC)** of one VCF given a truth set ([vcfroc](#vcfroc)). - * Annotate VCF records with the **Shannon entropy** of flanking sequence ([vcfentropy](#vcfentropy)). - * Calculate the heterozygosity rate ([vcfhetcount](#vcfhetcount)). - * Generate potential **primers** from VCF records ([vcfprimers](#vcfprimers)), to check for genome uniqueness. - * Convert the numerical represenation of genotypes provided by the GT field to a **human-readable genotype format** ([vcfgenotypes](#vcfgenotypes)). - * Observe how different alignment parameters, including context and entropy-dependent ones, influence **variant classification and interpretation** ([vcfremap](#vcfremap)). - * **Classify variants** by annotations in the INFO field using a self-organizing map ([vcfsom](#vcfsom)); **re-estimate their quality** given known variants. - - -A number of "helper" perl and python3 scripts (e.g. vcf2bed.py, vcfbiallelic) further extend functionality. - -In practice, users are encouraged to drive the utilities in the library in a streaming fashion, using pipes, to fully utilize resources on multi-core systems during interactive work. Piping provides a convenient method to interface with other libraries (vcf-tools, BedTools, GATK, htslib, bcftools, freebayes) which interface via VCF files, allowing the composition of an immense variety of processing functions. - -## development - -See src/vcfecho.cpp for basic usage. src/Variant.h and src/Variant.cpp describe methods available in the API. -vcflib is incorporated into several projects, such as [freebayes](https://github.com/ekg/freebayes), which may provide a point of reference for prospective developers. -Additionally, developers should be aware of that vcflib contains submodules (git repositories) comprising its dependencies (outside of lzib and a *nix environment). - - - -## executables - -### vcf2tsv - - usage: vcf2tsv [-n null_string] [-g] [vcf file] -Converts stdin or given VCF file to tab-delimited format, using null string to replace empty values in the table. -Specifying -g will output one line per sample with genotype information. - -### vcfaddinfo - - usage: vcfaddinfo -Adds info fields from the second file which are not present in the first vcf file. - - -### vcfafpath - -Uses allele frequencies in the AF info column to estimate phylogeny at multiallelic sites. - - -### vcfallelicprimitives - - usage: vcfallelicprimitives [options] [file] - - options: - -m, --use-mnps Retain MNPs as separate events (default: false) - -t, --tag-parsed FLAG Tag records which are split apart of a complex allele - with this flag - -If multiple alleleic primitives (gaps or mismatches) are specified in a single VCF record, split the record into multiple lines, but drop all INFO fields. -"Pure" MNPs are split into multiple SNPs unless the -m flag is provided. -Genotypes are phased where complex alleles have been decomposed, provided genotypes in the input. - - -### vcfaltcount - -Counts the number of alternate alleles in the record. - - -### vcfannotate - - usage: vcfannotate [options] [] - - options: - -b, --bed use annotations provided by this BED file - -k, --key use this INFO field key for the annotations - -d, --default use this INFO field key for records without annotations - -Intersect the records in the VCF file with targets provided in a BED file. -Intersections are done on the reference sequences in the VCF file. -If no VCF filename is specified on the command line (last argument) the VCF read from stdin. - - -### vcfannotategenotypes - - usage: vcfannotategenotypes - -Annotates genotypes in the first file with genotypes in the second adding the genotype as another flag to each sample filed in the first file. -Annotation-tag is the name of the sample flag which is added to store the annotation. -Also adds a 'has\_variant' flag for sites where the second file has a variant. - - -### vcfbreakmulti - - usage: vcfbreakmulti [options] [file] - -If multiple alleles are specified in a single record, break the record into multiple lines, preserving allele-specific INFO fields. - - -### vcfcheck - - usage: vcfcheck [options] - - options: -f, --fasta-reference FASTA reference file to use to obtain - primer sequences - -Verifies that the VCF REF field matches the reference as described. - - - -### vcfcleancomplex - -Removes reference-matching sequence from complex alleles and adjusts records to -reflect positional change. - - -### vcfcombine - - usage: vcfcombine [vcf file] [vcf file] ... - - options: - -h --help This text. - -r --region REGION A region specifier of the form chrN:x-y to bound the merge - -Combines VCF files positionally, combining samples when sites and alleles are identical. -Any number of VCF files may be combined. -The INFO field and other columns are taken from one of the files which are combined when records in multiple files match. -Alleles must -have identical ordering to be combined into one record. -If they do not, multiple records will be emitted. - - -### vcfcommonsamples - - usage: vcfcommonsamples - -Outputs each record in the first file, removing samples not present in the second. - - -### vcfcountalleles - -Counts the total number of alleles in the input. - - -### vcfcreatemulti - -If overlapping alleles are represented across multiple records, merge them into a single record. - -### vcfdistance - -Adds a value to each VCF record indicating the distance to the nearest variant -in the file. - - -### vcfentropy - - usage: vcfentropy [options] - - options: - -f, --fasta-reference FASTA reference file to use to obtain primer sequences - -w, --window-size Size of the window over which to calculate entropy - -Anotates the output VCF file with, for each record, EntropyLeft, -EntropyRight, EntropyCenter, which are the entropies of the sequence of the -given window size to the left, right, and center of the record. - - - -### vcffilter - - usage: vcffilter [options] - - options: - -f, --info-filter specifies a filter to apply to the info fields of records, - removes alleles which do not pass the filter - -g, --genotype-filter specifies a filter to apply to the genotype fields of records - -s, --filter-sites filter entire records, not just alleles - -t, --tag-pass tag vcf records as positively filtered with this tag, print all records - -F, --tag-fail tag vcf records as negatively filtered with this tag, print all records - -A, --append-filter append the existing filter tag, don't just replace it - -a, --allele-tag apply -t on a per-allele basis. adds or sets the corresponding INFO field tag - -v, --invert inverts the filter, e.g. grep -v - -o, --or use logical OR instead of AND to combine filters - -r, --region specify a region on which to target the filtering, requires a BGZF - compressed file which has been indexed with tabix. any number of - regions may be specified. - -Filter the specified VCF file using the set of filters. -Filters are specified in the form " ": - -f "DP > 10" # for info fields - -g "GT = 1|1" # for genotype fields - -f "CpG" # for 'flag' fields - -Operators can be any of: =, !, <, >, |, & - -Any number of filters may be specified. They are combined via logical AND -unless --or is specified on the command line. Obtain logical negation -through the use of parentheses, and negative numbers using 0-N: - -f "! ( DP = 10 )" # depth not-equal 10 - -f "GL = ( 0 - 1 )" # genotype-ll equal -1 - -For convenience, you can specify "QUAL" to refer to the quality of the site, -even though it does not appear in the INFO fields. - - -### vcffixup - -Count the allele frequencies across alleles present in each record in the VCF file. (Similar to vcftools --freq.) - -Uses genotypes from the VCF file to correct AC (alternate allele count), AF -(alternate allele frequency), NS (number of called), in the VCF records. For -example: - - % vcfkeepsamples file.vcf NA12878 | vcffixup - | vcffilter -f "AC > 0" - -Would downsample file.vcf to only NA12878, removing sites for which the sample -was not called as polymorphic. - - -### vcfflatten - - usage: vcfflatten [file] - -Removes multi-allelic sites by picking the most common alternate. -Requires allele frequency specification 'AF' and use of 'G' and 'A' to specify the fields which vary according to the Allele or Genotype. -VCF file may be specified on the command line or piped as stdin. - - -### vcfgeno2haplo - - usage: vcfgeno2haplo [options] [] - - options: - -w, --window-size N compare records up to this many bp away (default 30) - -r, --reference FILE FASTA reference file, required with -i and -u - -Convert genotype-based phased alleles within --window-size into haplotype alleles. - - - -### vcfgenotypecompare - - usage: vcfgenotypecompare - -Adds statistics to the INFO field of the vcf file describing the amount of discrepancy between the genotypes (GT) in the vcf file and the genotypes reported in the . -Use this after vcfannotategenotypes to get correspondence statistics for two vcfs. - - -### vcfgenotypes - -Converts numerical representation of genotypes (standard in GT field) to the -alleles provided in the call's ALT/REF fields. - - -### vcfglxgt - - usage: vcfglxgt [options] - - options: - -n, --fix-null-genotypes only apply to null and partly-null genotypes - -Set genotypes using the maximum genotype likelihood for each sample. - - - -### vcfhetcount - -Count the number of heterozygotes in the input VCF. - - -### vcfhethomratio - -Provides the ratio between heterozygotes and homozygotes. - -### vcfindex - -Adds a field (id) which contains an allele-specific numerical index. - -### vcfintersect - - usage: vcfintersect [options] [] - - options: - -b, --bed FILE use intervals provided by this BED file - -v, --invert invert the selection, printing only records which would - not have been printed out - -i, --intersect-vcf FILE use this VCF for set intersection generation - -u, --union-vcf FILE use this VCF for set union generation - -w, --window-size N compare records up to this many bp away (default 30) - -r, --reference FILE FASTA reference file, required with -i and -u - -l, --loci output whole loci when one alternate allele matches - -m, --ref-match intersect on the basis of record REF string - -t, --tag TAG attach TAG to each record's info field if it would intersect - -V, --tag-value VAL use this value to indicate that the allele is passing - '.' will be used otherwise. default: 'PASS' - -M, --merge-from FROM-TAG - -T, --merge-to TO-TAG merge from FROM-TAG used in the -i file, setting TO-TAG - in the current file. - -For bed-vcf intersection, alleles which fall into the targets are retained. - -For vcf-vcf intersection and union, unify on equivalent alleles within window-size bp as determined by haplotype comparison alleles. - - -### vcfkeepgeno - - usage: vcfkeepgeno [FIELD1] [FIELD2] ... - -Outputs each record in the vcf file, removing FORMAT fields not listed on the command line from sample specifications in the output. - - -### vcfkeepinfo - - usage: vcfkeepinfo [FIELD1] [FIELD2] ... - - Outputs each record in the vcf file, removing INFO fields not listed on the command line. - - -### vcfkeepsamples - - usage: vcfkeepsamples [SAMPLE1] [SAMPLE2] ... - -Outputs each record in the vcf file, removing samples not listed on the command line. - - -### vcfleftalign - -Left-align indels and complex variants in the input using a pairwise ref/alt -alignment followed by a heuristic, iterative left realignment process that -shifts indel representations to their absolute leftmost (5') extent. This is -the same procedure used in the internal left alignment in freebayes, and can be -used when preparing VCF files for input to freebayes to decrease positional -representation differences between the input alleles and left-realigned -alignments. - - usage: vcfleftalign [options] [file] - - options: - -r, --reference FILE Use this reference as a basis for realignment. - -w, --window N Use a window of this many bp when left aligning (150). - -Left-aligns variants in the specified input file or stdin. -Window size is determined dynamically according to the entropy of the regions flanking the indel. -These must have entropy > 1 bit/bp, or be shorter than ~5kb. - - -### vcflength - -Adds the length of the variant record (in [-/+]) relative to the reference allele to each VCF record. - - -### vcfnumalt - -Annotates the VCF stream on stdin with the number of alternate alleles at the site. - - -### vcfoverlay - - usage: vcfoverlay [options] [ ...] - - options: - -h, --help this dialog - - Overlays records in the input vcf files in the order in which they appear. - - -### vcfparsealts - -Demonstration of alternate allele parsing method. This method uses pairwise -alignment of REF and ALTs to determine component allelic primitives for each -alternate allele. - -Use `vcfallelicprimitives` to decompose records while preserving format. - - -### vcfprimers - - usage: vcfprimers [options] - - options: - -f, --fasta-reference FASTA reference file to use to obtain primer sequences - -l, --primer-length The length of the primer sequences on each side of the variant - -For each VCF record, extract the flanking sequences, and write them to stdout as FASTA records suitable for alignment. -This tool is intended for use in designing validation experiments. -Primers extracted which would flank all of the alleles at multi-allelic sites. -The name of the FASTA "reads" indicates the VCF record which they apply to. -The form is >CHROM_POS_LEFT for the 3' primer and >CHROM_POS_RIGHT for the 5' primer, for example: - - >20_233255_LEFT - CCATTGTATATATAGACCATAATTTCTTTATCCAATCATCTGTTGATGGA - >20_233255_RIGHT - ACTCAGTTGATTCCATACCTTTGCCATCATGAATCATGTTGTAATAAACA - - - -### vcfrandomsample - - usage: vcfrandomsample [options] [] - - options: - -r, --rate RATE base sampling probability per locus - -s, --scale-by KEY scale sampling likelihood by this Float info field - -p, --random-seed N use this random seed - -Randomly sample sites from an input VCF file, which may be provided as stdin. -Scale the sampling probability by the field specified in KEY. -This may be used to provide uniform sampling across allele frequencies, for instance. - - -### vcfremap - - usage: vcfremap [options] [] - - options: - -w, --ref-window-size N align using this many bases flanking each side of the reference allele - -s, --alt-window-size N align using this many flanking bases from the reference around each alternate allele - -r, --reference FILE FASTA reference file, required with -i and -u - -m, --match-score N match score for SW algorithm - -x, --mismatch-score N mismatch score for SW algorithm - -o, --gap-open-penalty N gap open penalty for SW algorithm - -e, --gap-extend-penalty N gap extension penalty for SW algorithm - -z, --entropy-gap-open use entropy scaling for the gap open penalty - -R, --repeat-gap-extend N penalize non-repeat-unit gaps in repeat sequence - -a, --adjust-vcf TAG supply a new cigar as TAG in the output VCF - -For each alternate allele, attempt to realign against the reference with lowered gap open penalty. -If realignment is possible, adjust the cigar and reference/alternate alleles. - - -### vcfremoveaberrantgenotypes - -Strips genotypes which are homozygous but have observations implying -heterozygosity. Requires RA (reference allele observation) and AA (alternate -allele observation) for each genotype. - - -### vcfremovesamples - - usage: vcfremovesamples [SAMPLE1] [SAMPLE2] ... - -Outputs each record in the vcf file, removing samples listed on the command line. - - -### vcfroc - - usage: vcfroc [options] [] - - options: - -t, --truth-vcf FILE use this VCF as ground truth for ROC generation - -w, --window-size N compare records up to this many bp away (default 30) - -r, --reference FILE FASTA reference file - -Generates a pseudo-ROC curve using sensitivity and specificity estimated against a putative truth set. -Thresholding is provided by successive QUAL cutoffs. - - -### vcfsamplediff - - usage: vcfsamplediff [ ... ] - -Tags each record where the listed sample genotypes differ with -The first sample is assumed to be germline, the second somatic. -Each record is tagged with ={germline,somatic,loh} to specify the type of variant given the genotype difference between the two samples. - - -### vcfsamplenames - -Prints the names of the samples in the VCF file. - - -### vcfsom - - usage: vcfsom [options] [vcf file] - - training: - vcfsom -s output.som -f "AF DP ABP" training.vcf - - application: - vcfsom -a output.som -f "AF DP ABP" test.vcf >results.vcf - -vcfsom trains and/or applies a self-organizing map to the input VCF data on stdin, adding two columns for the x and y coordinates of the winning neuron in the network and an optional euclidean distance from a given node (--center). - -If a map is provided via --apply, map will be applied to input without training. -Automated filtering to an estimated FP rate is - - options: - - -h, --help this dialog - - training: - - -f, --fields "FIELD ..." INFO fields to provide to the SOM - -a, --apply FILE apply the saved map to input data to FILE - -s, --save FILE train on input data and save the map to FILE - -t, --print-training-results - print results of SOM on training input - (you can also just use --apply on the same input) - -x, --width X width in columns of the output array - -y, --height Y height in columns of the output array - -i, --iterations N number of training iterations or epochs - -d, --debug print timing information - - recalibration: - - -c, --center X,Y annotate with euclidean distance from center - -p, --paint-true VCF use VCF file to annotate true variants (multiple) - -f, --paint-false VCF use VCF file to annotate false variants (multiple) - -R, --paint-tag TAG provide estimated FDR% in TAG in variant INFO - -N, --false-negative replace FDR% (false detection) with FNR% (false negative) - - -### vcfstats - - usage: vcfstats [options] - - -r, --region specify a region on which to target the stats, requires a BGZF - compressed file which has been indexed with tabix. any number of - regions may be specified. - -a, --add-info add the statistics intermediate information to the VCF file, - writing out VCF records instead of summary statistics - -l, --no-length-frequency don't out the indel and mnp length-frequency spectra - -m, --match-score N match score for SW algorithm - -x, --mismatch-score N mismatch score for SW algorithm - -o, --gap-open-penalty N gap open penalty for SW algorithm - -e, --gap-extend-penalty N gap extension penalty for SW algorithm - -Prints statistics about variants in the input VCF file. - - -### vcfstreamsort - -Reads VCF on stdin and guarantees that the positional order is correct provided out-of-order -variants are no more than 100 positions in the VCF file apart. - - -### vcfuniq - -Like GNU uniq, but for VCF records. Remove records which have the same positon, ref, and alt -as the previous record. - - -### vcfuniqalleles - -For each record, remove any duplicate alternate alleles that may have resulted from merging -separate VCF files. - -## GPAT++ - -The application of population genomics to non-model organisms is greatly facilitated by the low cost of next generation sequencing (NGS). Barriers, however, exist for using NGS data for population level analyses. Traditional population genetic metrics, such as Fst, are not robust to the genotyping errors inherent in noisy NGS data. Additionally, many older software tools were never designed to handle the volume of data produced by NGS pipelines. To overcome these limitations we have developed a flexible software library designed specifically for large and noisy NGS datasets. The Genotype Phenotype Association Toolkit (GPAT++) implements both traditional and novel population genetic methods in a single user-friendly framework. GPAT consists of a suite of command-line tools and a Perl API that programmers can use to develop new applications. To date GPAT++ has been used successfully to identity genotype-phenotype associations in several real-world datasets including: domestic pigeons, Pox virus and pine rust fungus. GPAT++ is open source and freely available for academic use. - -### Functions - - [X] Basic population stats (Af, Pi, eHH, oHet, genotypeCounts) - - [X] Several flavors of Fst - - [X] Linkage - - [X] Association testing (genotypic and pooled data) - - [X] Haplotype methods (hapLrt) - - [X] Smoothing - - [X] Permutation - - [X] Plotting - -### Documentation , basic usage, FAQ - - -1. Most GPAT++ tools write to both STDERR and STDOUT. -2. All GPAT++ tools group individuals using a zero-based comma separated index (e.g. 0,1,2 ; first three individuals in VCF) -3. Some GPAT++ tools (haplotype methods) require a region. -4. What is the genotype likelihood format? When in doubt use GT! Only a few GPAT++ tools make use of the genotype likelihoods. - GT: The genotype is correct - GL: Genotype likelihood (Freebayes) - GP: Genotype probability (Beagle) - PL: Scaled genotype likelihood (GATK) -5. pFst is the only tool that will work on pooled data. - -### wcFst +--- -Calculates Weir and Cockerham's Fst estimator bi-allelic genotype data (Weir and Cockerham 1984). Sites with less than five genotypes in the target and background are skipped because they provide unreliable estimates of Fst. Fix sites are also ignored. +## INSTALL -``` -INFO: help -INFO: description: - wcFst is Weir & Cockerham's Fst for two populations. Negative values are VALID, - they are sites which can be treated as zero Fst. For more information see Evolution, Vol. 38 N. 6 Nov 1984. - Specifically wcFst uses equations 1,2,3,4. - -Output : 3 columns : - 1. seqid - 2. position - 3. target allele frequency - 4. background allele frequency - 5. wcFst - -INFO: usage: wcFst --target 0,1,2,3,4,5,6,7 --background 11,12,13,16,17,19,22 --file my.vcf --deltaaf 0.1 --type PL - -INFO: required: t,target -- argument: a zero based comma separated list of target individuals corrisponding to VCF columns -INFO: required: b,background -- argument: a zero based comma separated list of background individuals corrisponding to VCF columns -INFO: required: f,file -- argument: proper formatted VCF -INFO: required, y,type -- argument: genotype likelihood format; genotype : GT,GL,PL,GP -INFO: optional: r,region -- argument: a tabix compliant genomic range: seqid or seqid:start-end -INFO: optional: d,deltaaf -- argument: skip sites where the difference in allele frequencies is less than deltaaf, default is zero +### [Bioconda](https://bioconda.github.io/user/install.html) + +Conda installs in user land without root access + +```sh +conda install -c bioconda vcflib ``` -### segmentFst +### [Homebrew](https://brew.sh) -This program provides a way to find continious regions with high Fst values. It takes the output of wcFst and produces a BED file. These high Fst region can be permutated with 'permuteGPATwindow'. +Homebrew installs on Linux and Mac OSX +```sh +brew install brewsci/bio/vcflib ``` -INFO: help -INFO: description: - Creates genomic segments (bed file) for regions with high wcFst -Output : 8 columns : - 1. Seqid - 2. Start (zero based) - 3. End (zero based) - 4. Average Fst - 5. Average high Fst (Fst > -s) - 6. N Fst values in segment - 7. N high fst values in segment - 8. Segment length -INFO: usage: segmentFst -s 0.7 -f wcFst.output.txt -INFO: required: -f -- Output from wcFst -INFO: optional: -s -- High Fst cutoff [0.8] +### [Debian](https://debian.org/) +For Debian and Ubuntu + +```sh +apt-get install libvcflib-tools libvcflib-dev ``` +### [GNU Guix](https://guix.gnu.org/) + +We develop against guix + +```sh +guix package -i vcflib +``` + +## USAGE + +Users are encouraged to drive the utilities in the library in a +streaming fashion, using Unix pipes to fully utilize resources on +multi-core systems. Piping provides a convenient method to interface +with other libraries (vcf-tools, BedTools, GATK, htslib, +[bio-vcf](https://github.com/vcflib/bio-vcf), bcftools, +[freebayes](https://github.com/freebayes)) which interface via VCF +files, allowing the composition of an immense variety of processing +functions. Examples can be found in the scripts, +e.g. [script](./scripts/vcfgtcompare.sh). + + +# TOOLS + + + + + + + +## filter + +| filter command | description | +| :-------------- | :---------- | + | [vcfuniq](./doc/vcfuniq.md) | List unique genotypes. Like GNU uniq, but for VCF records. Remove records which have the same positon, ref, and alt as the previous record. | + | [vcfuniqalleles](./doc/vcfuniqalleles.md) | List unique alleles For each record, remove any duplicate alternate alleles that may have resulted from merging separate VCF files. | + | [vcffilter](./doc/vcffilter.md) | VCF filter the specified vcf file using the set of filters | + +## metrics + +| metrics command | description | +| :-------------- | :---------- | + | [vcfcheck](./doc/vcfcheck.md) | Validate integrity and identity of the VCF by verifying that the VCF record's REF matches a given reference file. | + | [vcfhethomratio](./doc/vcfhethomratio.md) | Generates the het/hom ratio for each individual in the file | + | [vcfhetcount](./doc/vcfhetcount.md) | Calculate the heterozygosity rate: count the number of alternate alleles in heterozygous genotypes in all records in the vcf file | + | [vcfdistance](./doc/vcfdistance.md) | Adds a tag to each variant record which indicates the distance to the nearest variant. (defaults to BasesToClosestVariant if no custom tag name is given. | + | [vcfentropy](./doc/vcfentropy.md) | Annotate VCF records with the Shannon entropy of flanking sequence. Anotates the output VCF file with, for each record, EntropyLeft, EntropyRight, EntropyCenter, which are the entropies of the sequence of the given window size to the left, right, and center of the record. Also adds EntropyRef and EntropyAlt for each alt. | + +## phenotype + +| phenotype command | description | +| :-------------- | :---------- | + | [permuteGPAT++](./doc/permuteGPAT++.md) | **permuteGPAT++** is a method for adding empirical p-values to a GPAT++ score. | + +## genotype + +| genotype command | description | +| :-------------- | :---------- | + | [normalize-iHS](./doc/normalize-iHS.md) | normalizes iHS or XP-EHH scores. | + | [hapLrt](./doc/hapLrt.md) | HapLRT is a likelihood ratio test for haplotype lengths. The lengths are modeled with an exponential distribution. The sign denotes if the target has longer haplotypes (1) or the background (-1). | + | [abba-baba](./doc/abba-baba.md) | **abba-baba** calculates the tree pattern for four indviduals. This tool assumes reference is ancestral and ignores non **abba-baba** sites. The output is a boolian value: 1 = true , 0 = false for abba and baba. the tree argument should be specified from the most basal taxa to the most derived. | + +## transformation + +| transformation command | description | +| :-------------- | :---------- | + | [vcfinfo2qual](./doc/vcfinfo2qual.md) | Sets QUAL from info field tag keyed by [key]. The VCF file may be omitted and read from stdin. The average of the field is used if it contains multiple values. | + | [vcfsamplediff](./doc/vcfsamplediff.md) | Establish putative somatic variants using reported differences between germline and somatic samples. Tags each record where the listed sample genotypes differ with . The first sample is assumed to be germline, the second somatic. Each record is tagged with ={germline,somatic,loh} to specify the type of variant given the genotype difference between the two samples. | + | [vcfaddinfo](./doc/vcfaddinfo.md) | Adds info fields from the second file which are not present in the first vcf file. | + | [vcfremoveaberrantgenotypes](./doc/vcfremoveaberrantgenotypes.md) | strips samples which are homozygous but have observations implying heterozygosity. Remove samples for which the reported genotype (GT) and observation counts disagree (AO, RO). | + | [vcfglxgt](./doc/vcfglxgt.md) | Set genotypes using the maximum genotype likelihood for each sample. | + | [dumpContigsFromHeader](./doc/dumpContigsFromHeader.md) | Dump contigs from header | + | [vcfevenregions](./doc/vcfevenregions.md) | Generates a list of regions, e.g. chr20:10..30 using the variant density information provided in the VCF file to ensure that the regions have even numbers of variants. This can be use to reduce the variance in runtime when dividing variant detection or genotyping by genomic coordinates. | + | [vcfcat](./doc/vcfcat.md) | Concatenates VCF files | + | [vcfannotategenotypes](./doc/vcfannotategenotypes.md) | Examine genotype correspondence. Annotate genotypes in the first file with genotypes in the second adding the genotype as another flag to each sample filed in the first file. annotation-tag is the name of the sample flag which is added to store the annotation. also adds a 'has_variant' flag for sites where the second file has a variant. | + | [vcfafpath](./doc/vcfafpath.md) | Display genotype paths | + | [vcfclassify](./doc/vcfclassify.md) | Creates a new VCF where each variant is tagged by allele class: snp, ts/tv, indel, mnp | + | [vcfallelicprimitives](./doc/vcfallelicprimitives.md) | If multiple allelic primitives (gaps or mismatches) are specified in a single VCF record, split the record into multiple lines, but drop all INFO fields. Does not handle genotypes (yet). MNPs are split into multiple SNPs unless the -m flag is provided. Records generated by splits have th | + | [vcfqual2info](./doc/vcfqual2info.md) | Puts QUAL into an info field tag keyed by [key]. | + | [vcfcreatemulti](./doc/vcfcreatemulti.md) | If overlapping alleles are represented across multiple records, merge them into a single record. Currently only for indels. | + | [vcfgeno2alleles](./doc/vcfgeno2alleles.md) | modifies the genotypes field to provide the literal alleles rather than indexes | + | [vcfsample2info](./doc/vcfsample2info.md) | Take annotations given in the per-sample fields and add the mean, median, min, or max to the site-level INFO. | + | [vcfld](./doc/vcfld.md) | Compute LD | + | [vcfnumalt](./doc/vcfnumalt.md) | outputs a VCF stream where NUMALT has been generated for each record using sample genotypes | + | [vcfstreamsort](./doc/vcfstreamsort.md) | Sorts the input (either stdin or file) using a streaming sort algorithm. Guarantees that the positional order is correct provided out-of-order variants are no more than 100 positions in the VCF file apart. | + | [vcfinfosummarize](./doc/vcfinfosummarize.md) | Take annotations given in the per-sample fields and add the mean, median, min, or max to the site-level INFO. | + | [vcflength](./doc/vcflength.md) | Add length info field | + | [vcfkeepgeno](./doc/vcfkeepgeno.md) | Reduce file size by removing FORMAT fields not listed on the command line from sample specifications in the output | + | [vcfcombine](./doc/vcfcombine.md) | Combine VCF files positionally, combining samples when sites and alleles are identical. Any number of VCF files may be combined. The INFO field and other columns are taken from one of the files which are combined when records in multiple files match. Alleles must have identical ordering to be combined into one record. If they do not, multiple records will be emitted. | + | [vcfprimers](./doc/vcfprimers.md) | For each VCF record, extract the flanking sequences, and write them to stdout as FASTA records suitable for alignment. | + | [vcfflatten](./doc/vcfflatten.md) | Removes multi-allelic sites by picking the most common alternate. Requires allele frequency specification 'AF' and use of 'G' and 'A' to specify the fields which vary according to the Allele or Genotype. VCF file may be specified on the command line or piped as stdin. | + | [vcf2dag](./doc/vcf2dag.md) | Modify VCF to be able to build a directed acyclic graph (DAG) | + | [vcfcleancomplex](./doc/vcfcleancomplex.md) | Removes reference-matching sequence from complex alleles and adjusts records to reflect positional change. | + | [vcfbreakmulti](./doc/vcfbreakmulti.md) | If multiple alleles are specified in a single record, break the record into multiple lines, preserving allele-specific INFO fields. | + | [vcfindex](./doc/vcfindex.md) | Adds an index number to the INFO field (id=position) | + | [vcfkeepinfo](./doc/vcfkeepinfo.md) | To decrease file size remove INFO fields not listed on the command line | + | [vcfgeno2haplo](./doc/vcfgeno2haplo.md) | Convert genotype-based phased alleles within --window-size into haplotype alleles. Will break haplotype construction when encountering non-phased genotypes on input. | + | [vcfintersect](./doc/vcfintersect.md) | VCF set analysis | + | [vcfannotate](./doc/vcfannotate.md) | Intersect the records in the VCF file with targets provided in a BED file. Intersections are done on the reference sequences in the VCF file. If no VCF filename is specified on the command line (last argument) the VCF read from stdin. | + | [smoother](./doc/smoother.md) | smoothes is a method for window smoothing many of the GPAT++ formats. | + | [vcf2fasta](./doc/vcf2fasta.md) | Generates sample_seq:N.fa for each sample, reference sequence, and chromosomal copy N in [0,1... ploidy]. Each sequence in the fasta file is named using the same pattern used for the file name, allowing them to be combined. | + | [vcfsamplenames](./doc/vcfsamplenames.md) | List sample names | + | [vcfleftalign](./doc/vcfleftalign.md) | Left-align indels and complex variants in the input using a pairwise ref/alt alignment followed by a heuristic, iterative left realignment process that shifts indel representations to their absolute leftmost (5') extent. | + | [vcfglbound](./doc/vcfglbound.md) | Adjust GLs so that the maximum GL is 0 by dividing all GLs for each sample by the max. | + | [vcfcommonsamples](./doc/vcfcommonsamples.md) | Generates each record in the first file, removing samples not present in the second | + | [vcfecho](./doc/vcfecho.md) | Echo VCF to stdout (simple demo) | + | [vcfkeepsamples](./doc/vcfkeepsamples.md) | outputs each record in the vcf file, removing samples not listed on the command line | + | [vcf2tsv](./doc/vcf2tsv.md) | Converts VCF to per-allelle or per-genotype tab-delimited format, using null string to replace empty values in the table. Specifying -g will output one line per sample with genotype information. When there is more than one alt allele there will be multiple rows, one for each allele and, the info will match the 'A' index | + | [vcfoverlay](./doc/vcfoverlay.md) | Overlay records in the input vcf files with order as precedence. | + | [vcfgenosamplenames](./doc/vcfgenosamplenames.md) | Get samplenames | + | [vcfremovesamples](./doc/vcfremovesamples.md) | outputs each record in the vcf file, removing samples listed on the command line | + | [vcfremap](./doc/vcfremap.md) | For each alternate allele, attempt to realign against the reference with lowered gap open penalty. If realignment is possible, adjust the cigar and reference/alternate alleles. Observe how different alignment parameters, including context and entropy-dependent ones, influence variant classification and interpretation. | + | [vcffixup](./doc/vcffixup.md) | Generates a VCF stream where AC and NS have been generated for each record using sample genotypes | + +## statistics + +| statistics command | description | +| :-------------- | :---------- | + | [vcfgenosummarize](./doc/vcfgenosummarize.md) | Adds summary statistics to each record summarizing qualities reported in called genotypes. Uses: RO (reference observation count), QR (quality sum reference observations) AO (alternate observation count), QA (quality sum alternate observations) | + | [vcfcountalleles](./doc/vcfcountalleles.md) | Count alleles | + | [meltEHH](./doc/meltEHH.md) | | + | [genotypeSummary](./doc/genotypeSummary.md) | Generates a table of genotype counts. Summarizes genotype counts for bi-allelic SNVs and indel | + | [vcfrandomsample](./doc/vcfrandomsample.md) | Randomly sample sites from an input VCF file, which may be provided as stdin. Scale the sampling probability by the field specified in KEY. This may be used to provide uniform sampling across allele frequencies, for instance. | + | [pVst](./doc/pVst.md) | **pVst** calculates vst, a measure of CNV stratification. | + | [vcfrandom](./doc/vcfrandom.md) | Generate a random VCF file | + | [segmentFst](./doc/segmentFst.md) | **segmentFst** creates genomic segments (bed file) for regions with high wcFst | + | [sequenceDiversity](./doc/sequenceDiversity.md) | The **sequenceDiversity** program calculates two popular metrics of haplotype diversity: pi and extended haplotype homozygoisty (eHH). Pi is calculated using the Nei and Li 1979 formulation. eHH a convenient way to think about haplotype diversity. When eHH = 0 all haplotypes in the window are unique and when eHH = 1 all haplotypes in the window are identical. | + | [segmentIhs](./doc/segmentIhs.md) | Creates genomic segments (bed file) for regions with high wcFst | + | [vcfgenotypes](./doc/vcfgenotypes.md) | Report the genotypes for each sample, for each variant in the VCF. Convert the numerical represenation of genotypes provided by the GT field to a human-readable genotype format. | + | [vcfaltcount](./doc/vcfaltcount.md) | count the number of alternate alleles in all records in the vcf file | + | [plotHaps](./doc/plotHaps.md) | **plotHaps** provides the formatted output that can be used with 'bin/plotHaplotypes.R'. | + | [vcfsitesummarize](./doc/vcfsitesummarize.md) | Summarize by site | + | [vcfgenotypecompare](./doc/vcfgenotypecompare.md) | adds statistics to the INFO field of the vcf file describing the amount of discrepancy between the genotypes (GT) in the vcf file and the genotypes reported in the . use this after vcfannotategenotypes to get correspondence statistics for two vcfs. | + | [vcfstats](./doc/vcfstats.md) | Prints statistics about variants in the input VCF file. | + | [wcFst](./doc/wcFst.md) | **wcFst** is Weir & Cockerham's Fst for two populations. Negative values are VALID, they are sites which can be treated as zero Fst. For more information see Evolution, Vol. 38 N. 6 Nov 1984. Specifically **wcFst** uses equations 1,2,3,4. | + | [permuteSmooth](./doc/permuteSmooth.md) | **permuteSmooth** is a method for adding empirical p-values smoothed wcFst scores. | + | [bFst](./doc/bFst.md) | **bFst** is a Bayesian approach to Fst. Importantly **bFst** accounts for genotype uncertainty in the model using genotype likelihoods. For a more detailed description see: `A Bayesian approach to inferring population structure from dominant markers' by Holsinger et al. Molecular Ecology Vol 11, issue 7 2002. The likelihood function has been modified to use genotype likelihoods provided by variant callers. There are five free parameters estimated in the model: each subpopulation's allele frequency and Fis (fixation index, within each subpopulation), a free parameter for the total population's allele frequency, and Fst. | + | [vcfroc](./doc/vcfroc.md) | Generates a pseudo-ROC curve using sensitivity and specificity estimated against a putative truth set. Thresholding is provided by successive QUAL cutoffs. | + | [vcfparsealts](./doc/vcfparsealts.md) | Alternate allele parsing method. This method uses pairwise alignment of REF and ALTs to determine component allelic primitives for each alternate allele. | + | [pFst](./doc/pFst.md) | **pFst** is a probabilistic approach for detecting differences in allele frequencies between two populations. | + | [iHS](./doc/iHS.md) | **iHS** calculates the integrated haplotype score which measures the relative decay of extended haplotype homozygosity (EHH) for the reference and alternative alleles at a site (see: voight et al. 2006, Spiech & Hernandez 2014). | + | [popStats](./doc/popStats.md) | General population genetic statistics for each SNP | + +See also [vcflib.md](./doc/vcflib.md). + +## scripts + +The vcflib source repository contains a number of additional scripts. +Click on the link to see the source code. + +| script | description | +| :-------------- | :---------- | +| [vcfclearinfo](./scripts/vcfclearinfo) | clear INFO field | +| [vcfqualfilter](./scripts/vcfqualfilter) | quality filter | +| [vcfnulldotslashdot](./scripts/vcfnulldotslashdot) | rewrite null genotypes to ./. | +| [vcfprintaltdiscrepancy.r](./scripts/vcfprintaltdiscrepancy.r) | show ALT discrepancies in a table | +| [vcfremovenonATGC](./scripts/vcfremovenonATGC) | remove non-nucleotides in REF or ALT | +| [plotSmoothed.R](./scripts/plotSmoothed.R) | smooth plot of wcFst, pFst or abba-baba | +| [vcf_strip_extra_headers](./scripts/vcf_strip_extra_headers) | strip headers | +| [plotHapLrt.R](./scripts/plotHapLrt.R) | plot results of pFst | +| [vcfbiallelic](./scripts/vcfbiallelic) | remove anything that is not biallelic | +| [vcfsort](./scripts/vcfsort) | sort VCF using shell script | +| [vcfnosnps](./scripts/vcfnosnps) | remove SNPs | +| [vcfmultiwayscripts](./scripts/vcfmultiwayscripts) | more multiway comparisons | +| [vcfgtcompare.sh](./scripts/vcfgtcompare.sh) | annotates records in the first file with genotypes and sites from the second | +| [plotPfst.R](./scripts/plotPfst.R) | plot pFst | +| [vcfregionreduce_and_cut](./scripts/vcfregionreduce_and_cut) | reduce, gzip, and tabix | +| [plotBfst.R](./scripts/plotBfst.R) | plot results of pFst | +| [vcfnobiallelicsnps](./scripts/vcfnobiallelicsnps) | remove biallelic SNPs | +| [vcfindels](./scripts/vcfindels) | show INDELS | +| [vcfmultiway](./scripts/vcfmultiway) | multiway comparison | +| [vcfregionreduce](./scripts/vcfregionreduce) | reduce VCFs using a BED File, gzip them up and create tabix index | +| [vcfprintaltdiscrepancy.sh](./scripts/vcfprintaltdiscrepancy.sh) | runner | +| [vcfclearid](./scripts/vcfclearid) | clear ID field | +| [vcfcomplex](./scripts/vcfcomplex) | remove all SNPs but keep SVs | +| [vcffirstheader](./scripts/vcffirstheader) | show first header | +| [plotXPEHH.R](./scripts/plotXPEHH.R) | plot XPEHH | +| [vcfregionreduce_pipe](./scripts/vcfregionreduce_pipe) | reduce, gzip and tabix in a pipe | +| [vcfplotaltdiscrepancy.sh](./scripts/vcfplotaltdiscrepancy.sh) | plot ALT discrepancy runner | +| [vcfplottstv.sh](./scripts/vcfplottstv.sh) | runner | +| [vcfnoindels](./scripts/vcfnoindels) | remove INDELs | +| [bgziptabix](./scripts/bgziptabix) | runs bgzip on the input and tabix indexes the result | +| [plotHaplotypes.R](./scripts/plotHaplotypes.R) | plot results | +| [vcfplotsitediscrepancy.r](./scripts/vcfplotsitediscrepancy.r) | plot site discrepancy | +| [vcfindelproximity](./scripts/vcfindelproximity) | show SNPs around an INDEL | +| [bed2region](./scripts/bed2region) | convert VCF CHROM column in VCF file to region | +| [vcfplotaltdiscrepancy.r](./scripts/vcfplotaltdiscrepancy.r) | plot ALT discrepancies | +| [plot_roc.r](./scripts/plot_roc.r) | plot ROC | +| [vcfmultiallelic](./scripts/vcfmultiallelic) | remove anything that is not multiallelic | +| [vcfsnps](./scripts/vcfsnps) | show SNPs | +| [vcfvarstats](./scripts/vcfvarstats) | use fastahack to get stats | +| [vcfregionreduce_uncompressed](./scripts/vcfregionreduce_uncompressed) | reduce, gzip and tabix | +| [plotWCfst.R](./scripts/plotWCfst.R) | plot wcFst | +| [vcf2bed.py](./scripts/vcf2bed.py) | transform VCF to BED file | +| [vcfjoincalls](./scripts/vcfjoincalls) | overlay files using QUAL and GT from a second VCF | +| [vcf2sqlite.py](./scripts/vcf2sqlite.py) | push VCF file into SQLite3 database using dbname | + +# Development -### popStats -Calculates basic population statistics at bi-allelic sites. The allele frequency is the number of non-reference alleles divided by the total number of alleles. The expected hetrozygosity is 2*p*q, where p is the non-reference allele frequency and q is 1-p. The observed heterozgosity is the fraction of 0/1 genotypes out of all genotypes. The inbreeding coefficent, Fis, is the relative heterozygosity of each individual vs. compared to the target group. +## build from source +VCFLIB uses the cmake build system, after a recursive checkout +of the sources make the files in the ./build directory with: + +```sh +git clone --recursive https://github.com/vcflib/vcflib.git +cd vcflib +mkdir -p build && cd build +cmake .. +cmake --build . +cmake --install . ``` -INFO: help -INFO: description: - General population genetic statistics for each SNP - -Output : 9 columns : - 1. seqid - 2. position - 3. target allele frequency - 4. expected heterozygosity - 5. observed heterozygosity - 6. number of hets - 7. number of homozygous ref - 8. number of homozygous alt - 9. target Fis -INFO: usage: popStat --type PL --target 0,1,2,3,4,5,6,7 --file my.vcf - -INFO: required: t,target -- a zero based comma separated list of target individuals corresponding to VCF columns -INFO: required: f,file -- proper formatted VCF -INFO: required, y,type -- genotype likelihood format; genotype : GL,PL,GP -INFO: optional, r,region -- a tabix compliant region : chr1:1-1000 or chr1 -``` -### genotypeSummary -Generates a table of genotype counts. +and to run the tests +```sh +ctest --verbose ``` -INFO: help -INFO: description: - Summarizes genotype counts for bi-allelic SNVs and indel - -INFO: usage: genotypeSummmary --type PL --target 0,1,2,3,4,5,6,7 --file my.vcf --snp - -INFO: required: t,target -- a zero based comma separated list of target individuals corresponding to VCF columns -INFO: required: f,file -- proper formatted VCF -INFO: required, y,type -- genotype likelihood format; genotype : GL,PL,GP -INFO: optional, r,region -- a tabix compliant region : chr1:1-1000 or chr1 -INFO: optional, s,snp -- Only count SNPs -``` +Executables are built into the `./build` directory in the repository. -### pFst +Build dependencies can be viewed in the Travis-CI and github-CI +scripts (see badges above), as well as [guix.scm](./guix.scm) used by +us to create the build environment (for instructions see the header of +guix.scm). Essentially: -pFst is a likelihood ratio test (LRT) quantifying allele frequency differences between populations. The LRT by default uses the binomial distribution. If Genotype likelihoods are provided it uses a modified binomial that weights each allele count by its certainty. If type is set to 'PO' the LRT uses a beta distribution to fit the allele frequency spectrum of the target and background. PO requires the AD and DP genotype fields and requires at least two pools for the target and background. The p-value calculated in pFst is based on the chi-squared distribution with one degree of freedom. +- C++ compiler +- htslib +- tabixpp -``` -INFO: help -INFO: description: - pFst is a probabilistic approach for detecting differences in allele frequencies between two populations. - -Output : 3 columns : - 1. seqid - 2. position - 3. pFst probability - -INFO: usage: pFst --target 0,1,2,3,4,5,6,7 --background 11,12,13,16,17,19,22 --file my.vcf --deltaaf 0.1 --type PL - -INFO: required: t,target -- argument: a zero based comma separated list of target individuals corresponding to VCF columns -INFO: required: b,background -- argument: a zero based comma separated list of background individuals corresponding to VCF columns -INFO: required: f,file -- argument: a properly formatted VCF. -INFO: required: y,type -- argument: genotype likelihood format ; genotypes: GP, GL or PL; pooled: PO -INFO: optional: d,deltaaf -- argument: skip sites where the difference in allele frequencies is less than deltaaf, default is zero -INFO: optional: r,region -- argument: a tabix compliant genomic range : seqid or seqid:start-end -INFO: optional: c,counts -- switch : use genotype counts rather than genotype likelihoods to estimate parameters, default false -``` +For include files add -### EHH and PI +- libhts-dev +- libtabixpp-dev +- libtabixpp0 -The 'sequenceDiversity' program calculates extended haplotype homozygosity and pi within a fixed-width sliding window. This requires phased data. +And for some of the VCF executables -``` -INFO: help -INFO: description: - The sequenceDiversity program calculates two popular metrics of haplotype diversity: pi and - extended haplotype homozygoisty (eHH). Pi is calculated using the Nei and Li 1979 formulation. - eHH a convenient way to think about haplotype diversity. When eHH = 0 all haplotypes in the window - are unique and when eHH = 1 all haplotypes in the window are identical. - -Output : 5 columns: - 1. seqid - 2. start of window - 3. end of window - 4. pi - 5. eHH - - -INFO: usage: sequenceDiversity --target 0,1,2,3,4,5,6,7 --file my.vcf - -INFO: required: t,target -- argument: a zero base comma separated list of target individuals corresponding to VCF columns -INFO: required: f,file -- argument: a properly formatted phased VCF file -INFO: required: y,type -- argument: type of genotype likelihood: PL, GL or GP -INFO: optional: a,af -- sites less than af are filtered out; default is 0 -INFO: optional: r,region -- argument: a tabix compliant region : "seqid:0-100" or "seqid" -INFO: optional: w,window -- argument: the number of SNPs per window; default is 20 +- python +- perl -``` +### Using a different htslib -### meltEHH +Check out htslib in tabixpp (recursively) and -The program 'meltEHH' produces the data to generate the following plot: + cmake -DHTSLIB_LOCAL:STRING=./tabixpp/htslib/ .. + cmake --build . - +## link library -``` -INFO: help -INFO: description: - meltEHH provides the data to plot EHH curves. -Output : 4 columns : - 1. seqid - 2. position - 3. EHH - 4. ref or alt [0 == ref] -Usage: - meltEHH --target 0,1,2,3,4,5,6,7 --pos 10 --file my.phased.vcf \ - --region chr1:1-1000 > STDOUT 2> STDERR - -Params: - required: t,target A zero base comma separated list of target - individuals corresponding to VCF columns - required: r,region A tabix compliant genomic range - format: "seqid:start-end" or "seqid" - required: f,file Proper formatted and phased VCF. - required: y,type Genotype likelihood format: GT,PL,GL,GP - required: p,position Variant position to melt. - optional: a,af Alternative alleles with frequencies less - than [0.05] are skipped. -``` -### iHS +The standard build creates `build/vcflib.a`. Take a hint from the +[cmake](./CMakeLists.txt) file that builds all the vcflib tools. -iHS calculates the integrated haplotype score which measures the relative decay of extended haplotype homozygosity (EHH) for the reference and alternative alleles at a site (see: voight et al. 2006, Spiech & Hernandez 2014). Our code is highly concordant with both implementations mentioned. However, we do not set an upper limit to the allele frequency. iHS can be run without a genetic map, in which case the change in EHH is integrated over a constant. Human genetic maps for GRCh36 and GRCh37 (hg18 & hg19) can be found at: http://bochet.gcc.biostat.washington.edu/beagle/genetic_maps/ . iHS by default interpolates SNV positions to genetic position (you don't need a genetic position for every VCF entry in the map file). +## source code -iHS analyses requires normalization by allele frequency. It is important that iHS is calculated over large regions so that the normalization does not down weight real signals. For genome-wide runs it is recommended to run slightly overlapping windows and throwing out values that fail integration (columns 7 & 8 in the output) and then removing duplicates by using the 'sort' and 'uniq' linux commands. Normalization of the output is as simple as running 'normalize-iHS'. +See [vcfecho.cpp](./src/vcfecho.cpp) for basic usage. +[Variant.h](./src/Variant.h) and [Variant.cpp](./src/Variant.cpp) +describe methods available in the API. vcflib is incorporated into +several projects, such as +[freebayes](https://github.com/freebayes/freebayes), which may provide +a point of reference for prospective developers. Note vcflib contains +submodules (git repositories) comprising some dependencies. A full +Guix development environment we use is defined [here](./guix.scm). -``` -INFO: help -INFO: description: - iHS calculates the integrated ratio of haplotype decay between the reference and non-reference allele. -Output : 4 columns : - 1. seqid - 2. position - 3. target allele frequency - 4. integrated EHH (alternative) - 5. integrated EHH (reference) - 6. iHS ln(iEHHalt/iEHHref) - 7. != 0 integration failure - 8. != 0 integration failure - -Usage: - iHS --target 0,1,2,3,4,5,6,7 --file my.phased.vcf \ - --region chr1:1-1000 > STDOUT 2> STDERR - -Params: - required: t,target A zero base comma separated list of target - individuals corresponding to VCF columns - required: r,region A tabix compliant genomic range - format: "seqid:start-end" or "seqid" - required: f,file Proper formatted and phased VCF. - required: y,type Genotype likelihood format: GT,PL,GL,GP - optional: a,af Alternative alleles with frquences less - than [0.05] are skipped. - optional: x,threads Number of CPUS [1]. - recommended: g,gen A PLINK formatted map file. -``` -### smoother -``` -A method for window smoothing many of the GPAT++ formats. +# LICENSE -INFO: help -INFO: description: - Smoother averages a set of scores over a sliding genomic window. - Smoother slides over genomic positions not the SNP indices. In other words - the number of scores within a window will not be constant. The last - window for each seqid can be smaller than the defined window size. - Smoother automatically analyses different seqids separately. -Output : 4 columns : - 1. seqid - 2. window start - 2. window end - 3. averaged score - -INFO: usage: smoother --format pFst --file GPA.output.txt - -INFO: required: f,file -- argument: a file created by GPAT++ -INFO: required: o,format -- argument: format of input file, case sensitive - available format options: - wcFst, pFst, bFst, iHS, xpEHH, abba-baba -INFO: optional: w,window -- argument: size of genomic window in base pairs (default 5000) -INFO: optional: s,step -- argument: window step size in base pairs (default 1000) -INFO: optional: t,truncate -- flag : end last window at last position (zero based) last window at last position (zero based) -``` +This software is distributed under the free software [MIT +LICENSE](./LICENSE). diff -Nru libvcflib-1.0.1+dfsg/RELEASE_NOTES.md libvcflib-1.0.2+dfsg/RELEASE_NOTES.md --- libvcflib-1.0.1+dfsg/RELEASE_NOTES.md 1970-01-01 00:00:00.000000000 +0000 +++ libvcflib-1.0.2+dfsg/RELEASE_NOTES.md 2021-01-28 07:04:12.000000000 +0000 @@ -0,0 +1,22 @@ +## ChangeLog v1.0.2 (20210104) + +This is a maintenance release of vcflib, mostly improving the build +system, CI and generating markdown docs as well as man pages. + ++ Removed tabixpp and htslib source dependencies, i.e., we are now using + the distro provided libraries and include files through pkg-config. + See also the [README](README.md#build-from-source) ++ Removed the tabixpp+htslib git submodules ++ Generalise and document the cmake build system ++ Added tests to the cmake build system and build instructions to README ++ Added support for ARM64 and PowerPC, see #292 (thanks @genisysram and @mr-c) ++ Added github actions for the issue tracker ++ Added githum CI ++ Updated header files in src with copyright/license info, see #16 ++ Created markdown [docs](./doc/vcflib.md) and [man pages](./man/) for + all utilities. Created a script bin2md for markdown generation and + use pandoc for the man page generation. + +## Older changes + +For older changes view the git [log](https://github.com/vcflib/vcflib/commits/master). diff -Nru libvcflib-1.0.1+dfsg/samples/empty.vcf libvcflib-1.0.2+dfsg/samples/empty.vcf --- libvcflib-1.0.1+dfsg/samples/empty.vcf 1970-01-01 00:00:00.000000000 +0000 +++ libvcflib-1.0.2+dfsg/samples/empty.vcf 2021-01-28 07:04:12.000000000 +0000 @@ -0,0 +1,6 @@ +##fileformat=VCFv4.1 +##fileDate=20191001 +##source=copyPasta +##INFO= +##FORMAT= +#CHROM POS ID REF ALT QUAL FILTER INFO FORMAT NA00001 NA00002 NA00003 diff -Nru libvcflib-1.0.1+dfsg/scripts/bed2region libvcflib-1.0.2+dfsg/scripts/bed2region --- libvcflib-1.0.1+dfsg/scripts/bed2region 2019-10-01 07:06:02.000000000 +0000 +++ libvcflib-1.0.2+dfsg/scripts/bed2region 2021-01-28 07:04:12.000000000 +0000 @@ -1,4 +1,10 @@ #!/usr/bin/env perl +# convert VCF CHROM column in VCF file to region + +# Example +# +# #CHROM:POS-ID REF ALT QUAL FILTER INFO FORMAT NA00001 NA00002 NA00003 ... +# 20:1110696-rs6040355 A G,T 67 PASS NS=2;DP=10;AF=0.333,0.667;AA=T;DB ... while () { $_ =~ /^(.+?)\s(.+?)\s(.+)\s*/; diff -Nru libvcflib-1.0.1+dfsg/scripts/bgziptabix libvcflib-1.0.2+dfsg/scripts/bgziptabix --- libvcflib-1.0.1+dfsg/scripts/bgziptabix 2019-10-01 07:06:02.000000000 +0000 +++ libvcflib-1.0.2+dfsg/scripts/bgziptabix 2021-01-28 07:04:12.000000000 +0000 @@ -1,4 +1,5 @@ #!/usr/bin/env bash +# runs bgzip on the input and tabix indexes the result if [ $# -ne 1 ]; then diff -Nru libvcflib-1.0.1+dfsg/scripts/plotBfst.R libvcflib-1.0.2+dfsg/scripts/plotBfst.R --- libvcflib-1.0.1+dfsg/scripts/plotBfst.R 2019-10-01 07:06:02.000000000 +0000 +++ libvcflib-1.0.2+dfsg/scripts/plotBfst.R 2021-01-28 07:04:12.000000000 +0000 @@ -1,4 +1,6 @@ -#usage: nohup R --vanilla < plotPfst --args pFst.txt +# plot results of pFst +# +# usage: nohup R --vanilla < plotPfst --args pFst.txt cmd_args <- commandArgs(trailingOnly = TRUE) @@ -12,4 +14,4 @@ ggsave(filename=pngName, width=20, height=4, units="in", theplot) } -plotPfst(cmd_args) \ No newline at end of file +plotPfst(cmd_args) diff -Nru libvcflib-1.0.1+dfsg/scripts/plotHaplotypes.R libvcflib-1.0.2+dfsg/scripts/plotHaplotypes.R --- libvcflib-1.0.1+dfsg/scripts/plotHaplotypes.R 2019-10-01 07:06:02.000000000 +0000 +++ libvcflib-1.0.2+dfsg/scripts/plotHaplotypes.R 2021-01-28 07:04:12.000000000 +0000 @@ -1,3 +1,5 @@ +# plot results +# #usage: nohup R --vanilla < plotPfst --args plotHapOutput.txt cmd_args <- commandArgs(trailingOnly = TRUE) @@ -24,4 +26,4 @@ } -imageHap(cmd_args) \ No newline at end of file +imageHap(cmd_args) diff -Nru libvcflib-1.0.1+dfsg/scripts/plotHapLrt.R libvcflib-1.0.2+dfsg/scripts/plotHapLrt.R --- libvcflib-1.0.1+dfsg/scripts/plotHapLrt.R 2019-10-01 07:06:02.000000000 +0000 +++ libvcflib-1.0.2+dfsg/scripts/plotHapLrt.R 2021-01-28 07:04:12.000000000 +0000 @@ -1,3 +1,5 @@ +# plot results of pFst +# #usage: nohup R --vanilla < plotPfst --args pFst.txt cmd_args <- commandArgs(trailingOnly = TRUE) @@ -11,4 +13,4 @@ ggsave(filename=pngName, width=20, height=4, units="in", theplot) } -plotPfst(cmd_args) \ No newline at end of file +plotPfst(cmd_args) diff -Nru libvcflib-1.0.1+dfsg/scripts/plotPfst.R libvcflib-1.0.2+dfsg/scripts/plotPfst.R --- libvcflib-1.0.1+dfsg/scripts/plotPfst.R 2019-10-01 07:06:02.000000000 +0000 +++ libvcflib-1.0.2+dfsg/scripts/plotPfst.R 2021-01-28 07:04:12.000000000 +0000 @@ -1,3 +1,5 @@ +# Plot pFst +# #usage: nohup R --vanilla < plotPfst --args pFst.txt cmd_args <- commandArgs(trailingOnly = TRUE) @@ -12,4 +14,4 @@ ggsave(filename=pngName, width=20, height=4, units="in", theplot) } -plotPfst(cmd_args) \ No newline at end of file +plotPfst(cmd_args) diff -Nru libvcflib-1.0.1+dfsg/scripts/plot_roc.r libvcflib-1.0.2+dfsg/scripts/plot_roc.r --- libvcflib-1.0.1+dfsg/scripts/plot_roc.r 2019-10-01 07:06:02.000000000 +0000 +++ libvcflib-1.0.2+dfsg/scripts/plot_roc.r 2021-01-28 07:04:12.000000000 +0000 @@ -1,7 +1,5 @@ #!/usr/bin/env Rscript - - - +# Plot ROC require(plyr) require(ggplot2) diff -Nru libvcflib-1.0.1+dfsg/scripts/plotSmoothed.R libvcflib-1.0.2+dfsg/scripts/plotSmoothed.R --- libvcflib-1.0.1+dfsg/scripts/plotSmoothed.R 2019-10-01 07:06:02.000000000 +0000 +++ libvcflib-1.0.2+dfsg/scripts/plotSmoothed.R 2021-01-28 07:04:12.000000000 +0000 @@ -1,3 +1,5 @@ +# Smooth plot of wcFst, pFst or abba-baba +# #usage: nohup R --vanilla < plotPfst --args smoothedpFst.txt wcFst|pFst|abba-baba cmd_args <- commandArgs(trailingOnly = TRUE) diff -Nru libvcflib-1.0.1+dfsg/scripts/plotWCfst.R libvcflib-1.0.2+dfsg/scripts/plotWCfst.R --- libvcflib-1.0.1+dfsg/scripts/plotWCfst.R 2019-10-01 07:06:02.000000000 +0000 +++ libvcflib-1.0.2+dfsg/scripts/plotWCfst.R 2021-01-28 07:04:12.000000000 +0000 @@ -1,3 +1,5 @@ +# plot wcFst +# #usage: nohup R --vanilla < plotPfst --args pFst.txt cmd_args <- commandArgs(trailingOnly = TRUE) @@ -11,4 +13,4 @@ ggsave(filename=pngName, width=20, height=4, units="in", theplot) } -plotPfst(cmd_args) \ No newline at end of file +plotPfst(cmd_args) diff -Nru libvcflib-1.0.1+dfsg/scripts/plotXPEHH.R libvcflib-1.0.2+dfsg/scripts/plotXPEHH.R --- libvcflib-1.0.1+dfsg/scripts/plotXPEHH.R 2019-10-01 07:06:02.000000000 +0000 +++ libvcflib-1.0.2+dfsg/scripts/plotXPEHH.R 2021-01-28 07:04:12.000000000 +0000 @@ -1,3 +1,5 @@ +# Plot XPEHH +# #usage: nohup R --vanilla < plotPfst --args pFst.txt cmd_args <- commandArgs(trailingOnly = TRUE) @@ -11,4 +13,4 @@ ggsave(filename=pngName, width=20, height=4, units="in", theplot) } -plotPfst(cmd_args) \ No newline at end of file +plotPfst(cmd_args) diff -Nru libvcflib-1.0.1+dfsg/scripts/vcf2bed.py libvcflib-1.0.2+dfsg/scripts/vcf2bed.py --- libvcflib-1.0.1+dfsg/scripts/vcf2bed.py 2019-10-01 07:06:02.000000000 +0000 +++ libvcflib-1.0.2+dfsg/scripts/vcf2bed.py 2021-01-28 07:04:12.000000000 +0000 @@ -1,5 +1,6 @@ #!/usr/bin/env python3 - +# Transform VCF to BED file +# import sys for line in sys.stdin: diff -Nru libvcflib-1.0.1+dfsg/scripts/vcf2sqlite.py libvcflib-1.0.2+dfsg/scripts/vcf2sqlite.py --- libvcflib-1.0.1+dfsg/scripts/vcf2sqlite.py 2019-10-01 07:06:02.000000000 +0000 +++ libvcflib-1.0.2+dfsg/scripts/vcf2sqlite.py 2021-01-28 07:04:12.000000000 +0000 @@ -1,4 +1,5 @@ #!/usr/bin/env python3 +# Push VCF file into SQLite3 database using dbname import sys import re diff -Nru libvcflib-1.0.1+dfsg/scripts/vcfbiallelic libvcflib-1.0.2+dfsg/scripts/vcfbiallelic --- libvcflib-1.0.1+dfsg/scripts/vcfbiallelic 2019-10-01 07:06:02.000000000 +0000 +++ libvcflib-1.0.2+dfsg/scripts/vcfbiallelic 2021-01-28 07:04:12.000000000 +0000 @@ -1,5 +1,5 @@ #!/usr/bin/env perl -# +# Remove anything that is not biallelic while () { if ($_ =~ /^#/) { diff -Nru libvcflib-1.0.1+dfsg/scripts/vcfclearid libvcflib-1.0.2+dfsg/scripts/vcfclearid --- libvcflib-1.0.1+dfsg/scripts/vcfclearid 2019-10-01 07:06:02.000000000 +0000 +++ libvcflib-1.0.2+dfsg/scripts/vcfclearid 2021-01-28 07:04:12.000000000 +0000 @@ -1,5 +1,6 @@ #!/usr/bin/env python3 - +# Clear ID field +# import sys for line in sys.stdin: diff -Nru libvcflib-1.0.1+dfsg/scripts/vcfclearinfo libvcflib-1.0.2+dfsg/scripts/vcfclearinfo --- libvcflib-1.0.1+dfsg/scripts/vcfclearinfo 2019-10-01 07:06:02.000000000 +0000 +++ libvcflib-1.0.2+dfsg/scripts/vcfclearinfo 2021-01-28 07:04:12.000000000 +0000 @@ -1,5 +1,6 @@ #!/usr/bin/env python3 - +# Clear INFO field +# import sys for line in sys.stdin: diff -Nru libvcflib-1.0.1+dfsg/scripts/vcfcomplex libvcflib-1.0.2+dfsg/scripts/vcfcomplex --- libvcflib-1.0.1+dfsg/scripts/vcfcomplex 2019-10-01 07:06:02.000000000 +0000 +++ libvcflib-1.0.2+dfsg/scripts/vcfcomplex 2021-01-28 07:04:12.000000000 +0000 @@ -1,5 +1,5 @@ #!/usr/bin/env perl -# +# Remove all SNPs but keep SVs while () { if ($_ =~ /^#/) { diff -Nru libvcflib-1.0.1+dfsg/scripts/vcffirstheader libvcflib-1.0.2+dfsg/scripts/vcffirstheader --- libvcflib-1.0.1+dfsg/scripts/vcffirstheader 2019-10-01 07:06:02.000000000 +0000 +++ libvcflib-1.0.2+dfsg/scripts/vcffirstheader 2021-01-28 07:04:12.000000000 +0000 @@ -1,5 +1,6 @@ #!/usr/bin/env python3 - +# Show first header +# import sys header=True diff -Nru libvcflib-1.0.1+dfsg/scripts/vcfgtcompare.sh libvcflib-1.0.2+dfsg/scripts/vcfgtcompare.sh --- libvcflib-1.0.1+dfsg/scripts/vcfgtcompare.sh 2019-10-01 07:06:02.000000000 +0000 +++ libvcflib-1.0.2+dfsg/scripts/vcfgtcompare.sh 2021-01-28 07:04:12.000000000 +0000 @@ -1,5 +1,6 @@ #!/bin/bash - +# annotates records in the first file with genotypes and sites from the second +# if [ $# != 3 ]; then echo "usage: $0 [annotation] [fileA] [fileB]" diff -Nru libvcflib-1.0.1+dfsg/scripts/vcfindelproximity libvcflib-1.0.2+dfsg/scripts/vcfindelproximity --- libvcflib-1.0.1+dfsg/scripts/vcfindelproximity 2019-10-01 07:06:02.000000000 +0000 +++ libvcflib-1.0.2+dfsg/scripts/vcfindelproximity 2021-01-28 07:04:12.000000000 +0000 @@ -1,5 +1,5 @@ #!/usr/bin/env perl -# +# Show SNPs around an INDEL diff -Nru libvcflib-1.0.1+dfsg/scripts/vcfindels libvcflib-1.0.2+dfsg/scripts/vcfindels --- libvcflib-1.0.1+dfsg/scripts/vcfindels 2019-10-01 07:06:02.000000000 +0000 +++ libvcflib-1.0.2+dfsg/scripts/vcfindels 2021-01-28 07:04:12.000000000 +0000 @@ -1,5 +1,5 @@ #!/usr/bin/env perl -# +# Show INDELS while () { if ($_ =~ /^#/) { diff -Nru libvcflib-1.0.1+dfsg/scripts/vcfjoincalls libvcflib-1.0.2+dfsg/scripts/vcfjoincalls --- libvcflib-1.0.1+dfsg/scripts/vcfjoincalls 2019-10-01 07:06:02.000000000 +0000 +++ libvcflib-1.0.2+dfsg/scripts/vcfjoincalls 2021-01-28 07:04:12.000000000 +0000 @@ -1,4 +1,5 @@ #!/bin/bash +# overlay files using QUAL and GT from a second VCF if [ $# -ne 5 ]; then diff -Nru libvcflib-1.0.1+dfsg/scripts/vcfmultiallelic libvcflib-1.0.2+dfsg/scripts/vcfmultiallelic --- libvcflib-1.0.1+dfsg/scripts/vcfmultiallelic 2019-10-01 07:06:02.000000000 +0000 +++ libvcflib-1.0.2+dfsg/scripts/vcfmultiallelic 2021-01-28 07:04:12.000000000 +0000 @@ -1,5 +1,5 @@ #!/usr/bin/env perl -# +# remove anything that is not multiallelic while () { if ($_ =~ /^#/) { diff -Nru libvcflib-1.0.1+dfsg/scripts/vcfmultiway libvcflib-1.0.2+dfsg/scripts/vcfmultiway --- libvcflib-1.0.1+dfsg/scripts/vcfmultiway 2019-10-01 07:06:02.000000000 +0000 +++ libvcflib-1.0.2+dfsg/scripts/vcfmultiway 2021-01-28 07:04:12.000000000 +0000 @@ -1,5 +1,6 @@ #!/bin/bash - +# Multiway comparison +# reference=$1 shift diff -Nru libvcflib-1.0.1+dfsg/scripts/vcfmultiwayscripts libvcflib-1.0.2+dfsg/scripts/vcfmultiwayscripts --- libvcflib-1.0.1+dfsg/scripts/vcfmultiwayscripts 2019-10-01 07:06:02.000000000 +0000 +++ libvcflib-1.0.2+dfsg/scripts/vcfmultiwayscripts 2021-01-28 07:04:12.000000000 +0000 @@ -1,4 +1,5 @@ #!/bin/bash +# More multiway comparisons reference=$1 outdir=$2 diff -Nru libvcflib-1.0.1+dfsg/scripts/vcfnobiallelicsnps libvcflib-1.0.2+dfsg/scripts/vcfnobiallelicsnps --- libvcflib-1.0.1+dfsg/scripts/vcfnobiallelicsnps 2019-10-01 07:06:02.000000000 +0000 +++ libvcflib-1.0.2+dfsg/scripts/vcfnobiallelicsnps 2021-01-28 07:04:12.000000000 +0000 @@ -1,5 +1,5 @@ #!/usr/bin/env perl -# +# Remove biallelic SNPs while () { if ($_ =~ /^#/) { diff -Nru libvcflib-1.0.1+dfsg/scripts/vcfnoindels libvcflib-1.0.2+dfsg/scripts/vcfnoindels --- libvcflib-1.0.1+dfsg/scripts/vcfnoindels 2019-10-01 07:06:02.000000000 +0000 +++ libvcflib-1.0.2+dfsg/scripts/vcfnoindels 2021-01-28 07:04:12.000000000 +0000 @@ -1,5 +1,5 @@ #!/usr/bin/env perl -# +# Remove INDELs while () { if ($_ =~ /^#/) { diff -Nru libvcflib-1.0.1+dfsg/scripts/vcfnosnps libvcflib-1.0.2+dfsg/scripts/vcfnosnps --- libvcflib-1.0.1+dfsg/scripts/vcfnosnps 2019-10-01 07:06:02.000000000 +0000 +++ libvcflib-1.0.2+dfsg/scripts/vcfnosnps 2021-01-28 07:04:12.000000000 +0000 @@ -1,5 +1,5 @@ #!/usr/bin/env perl -# +# Remove SNPs while () { if ($_ =~ /^#/) { diff -Nru libvcflib-1.0.1+dfsg/scripts/vcfnulldotslashdot libvcflib-1.0.2+dfsg/scripts/vcfnulldotslashdot --- libvcflib-1.0.1+dfsg/scripts/vcfnulldotslashdot 2019-10-01 07:06:02.000000000 +0000 +++ libvcflib-1.0.2+dfsg/scripts/vcfnulldotslashdot 2021-01-28 07:04:12.000000000 +0000 @@ -1,5 +1,6 @@ #!/usr/bin/env python3 - +# Rewrite null genotypes to ./. +# import sys import math diff -Nru libvcflib-1.0.1+dfsg/scripts/vcfplotaltdiscrepancy.r libvcflib-1.0.2+dfsg/scripts/vcfplotaltdiscrepancy.r --- libvcflib-1.0.1+dfsg/scripts/vcfplotaltdiscrepancy.r 2019-10-01 07:06:02.000000000 +0000 +++ libvcflib-1.0.2+dfsg/scripts/vcfplotaltdiscrepancy.r 2021-01-28 07:04:12.000000000 +0000 @@ -1,4 +1,5 @@ #!/usr/bin/env Rscript +# Plot ALT discrepancies # helper functions diff -Nru libvcflib-1.0.1+dfsg/scripts/vcfplotaltdiscrepancy.sh libvcflib-1.0.2+dfsg/scripts/vcfplotaltdiscrepancy.sh --- libvcflib-1.0.1+dfsg/scripts/vcfplotaltdiscrepancy.sh 2019-10-01 07:06:02.000000000 +0000 +++ libvcflib-1.0.2+dfsg/scripts/vcfplotaltdiscrepancy.sh 2021-01-28 07:04:12.000000000 +0000 @@ -1,4 +1,5 @@ #!/bin/bash +# Plot ALT discrepancy runner filename=$1 tag=$2 diff -Nru libvcflib-1.0.1+dfsg/scripts/vcfplotsitediscrepancy.r libvcflib-1.0.2+dfsg/scripts/vcfplotsitediscrepancy.r --- libvcflib-1.0.1+dfsg/scripts/vcfplotsitediscrepancy.r 2019-10-01 07:06:02.000000000 +0000 +++ libvcflib-1.0.2+dfsg/scripts/vcfplotsitediscrepancy.r 2021-01-28 07:04:12.000000000 +0000 @@ -1,4 +1,5 @@ #!/usr/bin/env Rscript --vanilla --slave +# Plot site discrepancy # get the input VCF tabular format, assert that sites must have AC > 0 vcf <- subset(read.table(pipe('cat /dev/stdin'), header=T), AC > 0) diff -Nru libvcflib-1.0.1+dfsg/scripts/vcfplottstv.sh libvcflib-1.0.2+dfsg/scripts/vcfplottstv.sh --- libvcflib-1.0.1+dfsg/scripts/vcfplottstv.sh 2019-10-01 07:06:02.000000000 +0000 +++ libvcflib-1.0.2+dfsg/scripts/vcfplottstv.sh 2021-01-28 07:04:12.000000000 +0000 @@ -1,4 +1,5 @@ #!/bin/bash +# Runner filename=$1 title=$2 diff -Nru libvcflib-1.0.1+dfsg/scripts/vcfprintaltdiscrepancy.r libvcflib-1.0.2+dfsg/scripts/vcfprintaltdiscrepancy.r --- libvcflib-1.0.1+dfsg/scripts/vcfprintaltdiscrepancy.r 2019-10-01 07:06:02.000000000 +0000 +++ libvcflib-1.0.2+dfsg/scripts/vcfprintaltdiscrepancy.r 2021-01-28 07:04:12.000000000 +0000 @@ -1,4 +1,5 @@ #!/usr/bin/env Rscript --vanilla --slave +# Show ALT discrepancies in a table # get the input VCF tabular format, assert that sites must have AC > 0 vcf <- subset(read.table(pipe('cat /dev/stdin'), header=T), AC > 0) diff -Nru libvcflib-1.0.1+dfsg/scripts/vcfprintaltdiscrepancy.sh libvcflib-1.0.2+dfsg/scripts/vcfprintaltdiscrepancy.sh --- libvcflib-1.0.1+dfsg/scripts/vcfprintaltdiscrepancy.sh 2019-10-01 07:06:02.000000000 +0000 +++ libvcflib-1.0.2+dfsg/scripts/vcfprintaltdiscrepancy.sh 2021-01-28 07:04:12.000000000 +0000 @@ -1,5 +1,6 @@ #!/bin/bash - +# Runner +# tag=$1 vcf2tsv \ diff -Nru libvcflib-1.0.1+dfsg/scripts/vcfqualfilter libvcflib-1.0.2+dfsg/scripts/vcfqualfilter --- libvcflib-1.0.1+dfsg/scripts/vcfqualfilter 2019-10-01 07:06:02.000000000 +0000 +++ libvcflib-1.0.2+dfsg/scripts/vcfqualfilter 2021-01-28 07:04:12.000000000 +0000 @@ -1,5 +1,5 @@ #!/usr/bin/env perl -# +# Quality filter # use Getopt::Long; diff -Nru libvcflib-1.0.1+dfsg/scripts/vcfregionreduce libvcflib-1.0.2+dfsg/scripts/vcfregionreduce --- libvcflib-1.0.1+dfsg/scripts/vcfregionreduce 2019-10-01 07:06:02.000000000 +0000 +++ libvcflib-1.0.2+dfsg/scripts/vcfregionreduce 2021-01-28 07:04:12.000000000 +0000 @@ -1,5 +1,6 @@ #!/bin/bash - +# Reduce VCFs using a BED File, gzip them up and create tabix index +# if [ $# -ne 2 ]; then echo "Usage: $0 [region file] [directory]" diff -Nru libvcflib-1.0.1+dfsg/scripts/vcfregionreduce_and_cut libvcflib-1.0.2+dfsg/scripts/vcfregionreduce_and_cut --- libvcflib-1.0.1+dfsg/scripts/vcfregionreduce_and_cut 2019-10-01 07:06:02.000000000 +0000 +++ libvcflib-1.0.2+dfsg/scripts/vcfregionreduce_and_cut 2021-01-28 07:04:12.000000000 +0000 @@ -1,4 +1,5 @@ #!/bin/bash +# Reduce, gzip, and tabix if [ $# -ne 2 ]; then diff -Nru libvcflib-1.0.1+dfsg/scripts/vcfregionreduce_pipe libvcflib-1.0.2+dfsg/scripts/vcfregionreduce_pipe --- libvcflib-1.0.1+dfsg/scripts/vcfregionreduce_pipe 2019-10-01 07:06:02.000000000 +0000 +++ libvcflib-1.0.2+dfsg/scripts/vcfregionreduce_pipe 2021-01-28 07:04:12.000000000 +0000 @@ -1,5 +1,6 @@ #!/bin/bash - +# Reduce, gzip and tabix in a pipe +# if [ $# -ne 2 ]; then echo "Usage: $0 [region file] [directory]" diff -Nru libvcflib-1.0.1+dfsg/scripts/vcfregionreduce_uncompressed libvcflib-1.0.2+dfsg/scripts/vcfregionreduce_uncompressed --- libvcflib-1.0.1+dfsg/scripts/vcfregionreduce_uncompressed 2019-10-01 07:06:02.000000000 +0000 +++ libvcflib-1.0.2+dfsg/scripts/vcfregionreduce_uncompressed 2021-01-28 07:04:12.000000000 +0000 @@ -1,4 +1,5 @@ #!/bin/bash +# Reduce, gzip and tabix if [ $# -ne 2 ]; then diff -Nru libvcflib-1.0.1+dfsg/scripts/vcfremovenonATGC libvcflib-1.0.2+dfsg/scripts/vcfremovenonATGC --- libvcflib-1.0.1+dfsg/scripts/vcfremovenonATGC 2019-10-01 07:06:02.000000000 +0000 +++ libvcflib-1.0.2+dfsg/scripts/vcfremovenonATGC 2021-01-28 07:04:12.000000000 +0000 @@ -1,5 +1,5 @@ #!/usr/bin/env perl -# +# Remove non-nucleotides in REF or ALT while () { if ($_ =~ /^#/) { diff -Nru libvcflib-1.0.1+dfsg/scripts/vcfsnps libvcflib-1.0.2+dfsg/scripts/vcfsnps --- libvcflib-1.0.1+dfsg/scripts/vcfsnps 2019-10-01 07:06:02.000000000 +0000 +++ libvcflib-1.0.2+dfsg/scripts/vcfsnps 2021-01-28 07:04:12.000000000 +0000 @@ -1,5 +1,5 @@ #!/usr/bin/env perl -# +# Show SNPs while () { if ($_ =~ /^#/) { diff -Nru libvcflib-1.0.1+dfsg/scripts/vcfsort libvcflib-1.0.2+dfsg/scripts/vcfsort --- libvcflib-1.0.1+dfsg/scripts/vcfsort 2019-10-01 07:06:02.000000000 +0000 +++ libvcflib-1.0.2+dfsg/scripts/vcfsort 2021-01-28 07:04:12.000000000 +0000 @@ -1,3 +1,4 @@ #!/bin/bash +# Sort VCF using shell script head -1000 $1 | grep "^#"; cat $@ | grep -v "^#" | sort -k1,1d -k2,2n diff -Nru libvcflib-1.0.1+dfsg/scripts/vcf_strip_extra_headers libvcflib-1.0.2+dfsg/scripts/vcf_strip_extra_headers --- libvcflib-1.0.1+dfsg/scripts/vcf_strip_extra_headers 2019-10-01 07:06:02.000000000 +0000 +++ libvcflib-1.0.2+dfsg/scripts/vcf_strip_extra_headers 2021-01-28 07:04:12.000000000 +0000 @@ -1,4 +1,5 @@ #!/usr/bin/env perl +# Strip headers my $seen_non_header = 0; diff -Nru libvcflib-1.0.1+dfsg/scripts/vcfvarstats libvcflib-1.0.2+dfsg/scripts/vcfvarstats --- libvcflib-1.0.1+dfsg/scripts/vcfvarstats 2019-10-01 07:06:02.000000000 +0000 +++ libvcflib-1.0.2+dfsg/scripts/vcfvarstats 2021-01-28 07:04:12.000000000 +0000 @@ -1,5 +1,5 @@ #!/usr/bin/env perl -# +# Use fastahack to get stats use IPC::Open2; diff -Nru libvcflib-1.0.1+dfsg/src/abba-baba.cpp libvcflib-1.0.2+dfsg/src/abba-baba.cpp --- libvcflib-1.0.1+dfsg/src/abba-baba.cpp 2019-10-01 07:06:02.000000000 +0000 +++ libvcflib-1.0.2+dfsg/src/abba-baba.cpp 2021-01-28 07:04:12.000000000 +0000 @@ -1,3 +1,12 @@ +/* + vcflib C++ library for parsing and manipulating VCF files + + Copyright © 2010-2020 Erik Garrison + Copyright © 2020 Pjotr Prins + + This software is published under the MIT License. See the LICENSE file. +*/ + #include "Variant.h" #include "split.h" #include "cdflib.hpp" @@ -6,7 +15,7 @@ #include #include -#include +#include #include #include #include @@ -35,9 +44,9 @@ cerr << " \\ / " << endl; cerr << " / " << endl; cerr << " / " << endl; - + cerr << " --tree A,B,C,D" << endl << endl; - + cerr << "Output : 4 columns : " << endl; cerr << " 1. seqid " << endl; @@ -50,7 +59,7 @@ cerr << "INFO: required: t,tree -- a zero based comma separated list of target individuals corrisponding to VCF columns" << endl; cerr << "INFO: required: f,file -- a properly formatted VCF. " << endl; cerr << "INFO: required: y,type -- genotype likelihood format ; genotypes: GP,GL or PL; " << endl; - cerr << endl; + cerr << endl << endl << "type: genotype" << endl; printVersion() ; } @@ -65,12 +74,13 @@ return v; } -/*random sample heterozygous genotypes could eventually be weighted +/*random sample heterozygous genotypes could eventually be weighted by genotype likelihoods and added complexity for linked, phased genos random sampling adds noise but will not affect the overall measurement of D-statistic */ int sample_het(int &rv){ rv = rand() % 2 ; // pick from 0/1 het with 50-50 odds + return rv; } @@ -119,7 +129,7 @@ } void loadIndices(vector & tree, string set){ - + vector indviduals = split(set, ","); if(indviduals.size() < 4){ @@ -127,7 +137,7 @@ exit(1); } for( vector::iterator it = indviduals.begin(); it != indviduals.end(); it++){ - + int indx = atoi((*it).c_str()); cerr << indx << endl; //print sample index for user check tree.push_back(indx); @@ -137,7 +147,7 @@ int main(int argc, char** argv) { // pooled or genotyped - + int pool = 0; // the filename @@ -146,22 +156,22 @@ // set region to scaffold - string region = "NA"; + string region = "NA"; - // using vcflib; thanks to Erik Garrison + // using vcflib; thanks to Erik Garrison VariantCallFile variantFile; // zero based index for the tree - + vector tree; - - // deltaaf is the difference of allele frequency we bother to look at + + // deltaaf is the difference of allele frequency we bother to look at string deltaaf ; double daf = 0; - // + // int counts = 0; @@ -169,7 +179,7 @@ string type = "NA"; - const struct option longopts[] = + const struct option longopts[] = { {"version" , 0, 0, 'v'}, {"help" , 0, 0, 'h'}, @@ -188,7 +198,7 @@ while(iarg != -1) { iarg = getopt_long(argc, argv, "r:d:t:f:y:hv", longopts, &index); - + switch (iarg) { case 'h': @@ -197,7 +207,7 @@ case 'v': printVersion(); return 0; - case 'y': + case 'y': type = optarg; cerr << "INFO: genotype likelihoods set to: " << type << endl; break; @@ -210,22 +220,22 @@ break; case 'r': cerr << "INFO: set seqid region to : " << optarg << endl; - region = optarg; + region = optarg; break; default: break; } } - + if(filename == "NA"){ cerr << "FATAL: did not specify the file\n"; printHelp(); exit(1); } - + variantFile.open(filename); - + if(region != "NA"){ variantFile.setRegion(region); } @@ -250,10 +260,10 @@ cerr << "FATAL: genotype likelihood is incorrectly formatted, only use: PL or GL" << endl; printHelp(); return 1; - } + } Variant var(variantFile); - + vector sampleNames = variantFile.sampleNames; srand(time(0)); //initialize random number generator @@ -263,7 +273,7 @@ if(var.alt.size() > 1){ continue; } - + map > tA, tB, tC, tD; tA = var.samples[sampleNames[tree[0]]]; @@ -274,7 +284,7 @@ if(tA["GT"].front() == "./." || tB["GT"].front() == "./." || tC["GT"].front() == "./." || tD["GT"].front() == "./."){ continue; } - + int A = 0,B = 0,C = 0,D = 0; // set default allelic state to zero double abba = 0; //booleans for abab or baba state. @@ -309,5 +319,5 @@ // above is alternate print to check that we are getting observed // ABBA or BABA patterns } - return 0; + return 0; } diff -Nru libvcflib-1.0.1+dfsg/src/BedReader.h libvcflib-1.0.2+dfsg/src/BedReader.h --- libvcflib-1.0.1+dfsg/src/BedReader.h 2019-10-01 07:06:02.000000000 +0000 +++ libvcflib-1.0.2+dfsg/src/BedReader.h 2021-01-28 07:04:12.000000000 +0000 @@ -1,3 +1,12 @@ +/* + vcflib C++ library for parsing and manipulating VCF files + + Copyright © 2010-2020 Erik Garrison + Copyright © 2020 Pjotr Prins + + This software is published under the MIT License. See the LICENSE file. +*/ + #ifndef BEDREADER_H #define BEDREADER_H diff -Nru libvcflib-1.0.1+dfsg/src/bFst.cpp libvcflib-1.0.2+dfsg/src/bFst.cpp --- libvcflib-1.0.1+dfsg/src/bFst.cpp 2019-10-01 07:06:02.000000000 +0000 +++ libvcflib-1.0.2+dfsg/src/bFst.cpp 2021-01-28 07:04:12.000000000 +0000 @@ -1,10 +1,19 @@ +/* + vcflib C++ library for parsing and manipulating VCF files + + Copyright © 2010-2020 Erik Garrison + Copyright © 2020 Pjotr Prins + + This software is published under the MIT License. See the LICENSE file. +*/ + #include "Variant.h" #include "split.h" #include "pdflib.hpp" #include #include -#include +#include #include #include #include @@ -19,7 +28,7 @@ double nalt ; double nref ; - double af ; + double af ; double nhomr; double nhoma; double nhet ; @@ -27,11 +36,11 @@ double fis ; vector questionable; vector geno_index ; - vector< vector< double > > unphred_p; - + vector< vector< double > > unphred_p; + }; -double unphred(string phred){ +double unphred(string phred){ double unphred = atof(phred.c_str()); unphred = unphred / -10; return unphred; @@ -46,7 +55,7 @@ population.nhet = 0; population.ngeno = 0; population.fis = 0; - + } void loadPop( vector< map< string, vector > >& group, pop & population){ @@ -56,27 +65,27 @@ int index = 0; for(; targ_it != group.end(); targ_it++){ - + string genotype = (*targ_it)["GT"].front(); - + vector phreds; phreds.push_back( unphred((*targ_it)["PL"][0])); phreds.push_back( unphred((*targ_it)["PL"][1])); phreds.push_back( unphred((*targ_it)["PL"][2])); - + double scaled ; double norm = log(exp(phreds[0]) + exp(phreds[1]) + exp(phreds[2])); - + population.unphred_p.push_back(phreds); - + while(1){ if(genotype == "0/0"){ population.ngeno += 1; population.nhomr += 1; population.nref += 2; - population.geno_index.push_back(0); - scaled = exp(phreds[0] - norm); + population.geno_index.push_back(0); + scaled = exp(phreds[0] - norm); break; } if(genotype == "0/1"){ @@ -85,7 +94,7 @@ population.nref += 1; population.nalt += 1; population.geno_index.push_back(1); - scaled = exp(phreds[1] - norm); + scaled = exp(phreds[1] - norm); break; } if(genotype == "1/1"){ @@ -93,7 +102,7 @@ population.nhoma += 1; population.nalt += 2; population.geno_index.push_back(2); - scaled = exp(phreds[2] - norm); + scaled = exp(phreds[2] - norm); break; } if(genotype == "0|0"){ @@ -101,7 +110,7 @@ population.nhomr += 1; population.nref += 2; population.geno_index.push_back(0); - scaled = exp(phreds[0] - norm); + scaled = exp(phreds[0] - norm); break; } if(genotype == "0|1"){ @@ -110,7 +119,7 @@ population.nref += 1; population.nalt += 1; population.geno_index.push_back(1); - scaled = exp(phreds[1] - norm); + scaled = exp(phreds[1] - norm); break; } if(genotype == "1|1"){ @@ -118,26 +127,26 @@ population.nhoma += 1; population.nalt += 2; population.geno_index.push_back(2); - scaled = exp(phreds[2] - norm); + scaled = exp(phreds[2] - norm); break; } cerr << "FATAL: unknown genotype" << endl; exit(1); } - + if(scaled < 0.75){ population.questionable.push_back(index); - } + } index += 1; } - + if(population.nalt == 0 && population.nref == 0){ population.af = -1; } else{ - + population.af = (population.nalt / (population.nref + population.nalt)); - + if(population.nhet > 0){ population.fis = ( 1 - ((population.nhet/population.ngeno) / (2*population.af*(1 - population.af)))); } @@ -147,7 +156,7 @@ if(population.fis < 0){ population.fis = 0.00001; } - } + } } double bound(double v){ @@ -162,15 +171,15 @@ void phardy(vector& results, double af, double fis){ - + double p0 = pow((1 - af),2) + ((1 - af)*af*fis); double p1 = 2*(1 - af)*af*(1 - fis); double p2 = pow(af,2) + ((1 - af)*af*fis); - + results.push_back(p0); results.push_back(p1); results.push_back(p2); - + } double likelihood(pop & population, double af, double fis){ @@ -200,14 +209,14 @@ double norm = exp(aa) + exp(ab) + exp(bb); double prop = population.unphred_p[geno_indx][*it] + log(genotypeProbs[*it]); - + loglikelihood += (prop - norm); geno_indx++; } - + return loglikelihood; - + } double FullProb(pop & target, pop & back, vector& p) @@ -222,7 +231,7 @@ double afprior = log( r8_normal_pdf (p[6], 0.1, p[4])); double afpriorT = log( r8_normal_pdf (target.af, 0.05, p[0])); double afpriorB = log( r8_normal_pdf (back.af, 0.05, p[1])); - + if(std::isinf(afprior) || std::isnan(afprior)){ return -100000; } @@ -239,9 +248,9 @@ double llb = likelihood(back, p[1], p[3]); double full = llt + llb + ptaf + pbaf + afprior + afpriorT + afpriorB; - + return full; - + } @@ -255,46 +264,46 @@ double accept = ((double)rand() / (double)(RAND_MAX)); double up = ((double)rand() / (double)(RAND_MAX))/10 - 0.05; double updatep = parameters[pindx] + up; - + // cerr << accept << "\t" << up << endl; if(updatep >= 1 || updatep <= 0){ return; } - + double llB = FullProb(target, background, parameters); parameters[pindx] = updatep; double llT = FullProb(target, background, parameters); - + if((llT - llB) > accept){ - return; + return; } else{ parameters[pindx] = origpar; } -} +} void updateGenotypes(pop & target, pop & background, vector& parameters, int gindex, int tbindex){ - + // tbindex indicates if the subroutine will update the target or background genotype; double accept = ((double)rand() / (double)(RAND_MAX)); int newGindex = rand() % 3; - + //cerr << newGindex << endl; //cerr << "gindex " << gindex << endl; - //cerr << "gsize t:" << target.geno_index.size() << endl; - //cerr << "gsize b:" << background.geno_index.size() << endl; - + //cerr << "gsize t:" << target.geno_index.size() << endl; + //cerr << "gsize b:" << background.geno_index.size() << endl; + int oldtindex = target.geno_index[gindex] ; - int oldbindex = background.geno_index[gindex] ; - + int oldbindex = background.geno_index[gindex] ; + double llB = FullProb(target, background, parameters); - + if(tbindex == 0){ - //udate target + //udate target target.geno_index[gindex] = newGindex; } else{ @@ -302,7 +311,7 @@ background.geno_index[gindex] = newGindex; } double llT = FullProb(target, background, parameters); - + if((llT - llB) > accept){ return; } @@ -313,7 +322,7 @@ else{ target.geno_index[gindex] = oldbindex; } - } + } } @@ -327,11 +336,11 @@ void loadIndices(map & index, string set){ - + vector indviduals = split(set, ","); vector::iterator it = indviduals.begin(); - + for(; it != indviduals.end(); it++){ index[ atoi( (*it).c_str() ) ] = 1; } @@ -348,20 +357,20 @@ string filename = "NA"; - // using vcflib; thanks to Erik Garrison - + // using vcflib; thanks to Erik Garrison + VariantCallFile variantFile ; - // zero based index for the target and background indivudals - + // zero based index for the target and background indivudals + map it, ib; - - // deltaaf is the difference of allele frequency we bother to look at + + // deltaaf is the difference of allele frequency we bother to look at string deltaaf ; double daf = -1; - const struct option longopts[] = + const struct option longopts[] = { {"version" , 0, 0, 'v'}, {"help" , 0, 0, 'h'}, @@ -378,7 +387,7 @@ while(iarg != -1) { iarg = getopt_long(argc, argv, "d:t:b:f:hv", longopts, &index); - + switch (iarg) { case 0: @@ -386,15 +395,15 @@ case 'h': cerr << endl; - cerr << "INFO: help: " << endl << endl; + cerr << "INFO: help" << endl << endl; - cerr << " bFst is a Bayesian approach to Fst. Importantly bFst account for genotype uncertainty in the model using genotype likelihoods." << endl; - cerr << " For a more detailed description see: Holsinger et al. Molecular Ecology Vol 11, issue 7 2002. The likelihood function has been " << endl; + cerr << " bFst is a Bayesian approach to Fst. Importantly bFst accounts for genotype uncertainty in the model using genotype likelihoods." << endl; + cerr << " For a more detailed description see: `A Bayesian approach to inferring population structure from dominant markers' by Holsinger et al. Molecular Ecology Vol 11, issue 7 2002. The likelihood function has been " << endl; cerr << " modified to use genotype likelihoods provided by variant callers. There are five free parameters estimated in the model: each " << endl; cerr << " subpopulation's allele frequency and Fis (fixation index, within each subpopulation), a free parameter for the total population\'s " << endl; cerr << " allele frequency, and Fst. " << endl << endl; - - cerr << "Output : 11 columns : " << endl; + + cerr << "Output : 11 columns : " << endl; cerr << " 1. Seqid " << endl; cerr << " 2. Position " << endl; cerr << " 3. Observed allele frequency in target. " << endl; @@ -406,15 +415,15 @@ cerr << " 9. ML estimate of Fst (mean) " << endl; cerr << " 10. Lower bound of the 95% credible interval " << endl; cerr << " 11. Upper bound of the 95% credible interval " << endl << endl; - + cerr << "INFO: usage: bFst --target 0,1,2,3,4,5,6,7 --background 11,12,13,16,17,19,22 --file my.vcf --deltaaf 0.1" << endl; cerr << endl; cerr << "INFO: required: t,target -- a zero bases comma separated list of target individuals corrisponding to VCF columns" << endl; cerr << "INFO: required: b,background -- a zero bases comma separated list of background individuals corrisponding to VCF columns" << endl; - cerr << "INFO: required: f,file a -- a proper formatted VCF file. the FORMAT field MUST contain \"PL\"" << endl; + cerr << "INFO: required: f,file a -- a proper formatted VCF file. the FORMAT field MUST contain \"PL\"" << endl; cerr << "INFO: required: d,deltaaf -- skip sites were the difference in allele frequency is less than deltaaf" << endl; - cerr << endl; + cerr << endl << endl << "Type: statistics" << endl; printVersion(); cerr << endl << endl; return 0; @@ -441,13 +450,13 @@ case 'd': cerr << "INFO: difference in allele frequency : " << optarg << endl; deltaaf = optarg; - daf = atof(deltaaf.c_str()); + daf = atof(deltaaf.c_str()); + break; + default: break; - default: - break; cerr << endl; cerr << "FATAL: unknown command line option " << optarg << endl << endl ; - cerr << "INFO: please use bFst --help " << endl; + cerr << "INFO: please use bFst --help " << endl; cerr << endl; return(1); } @@ -457,7 +466,7 @@ if(daf == -1){ cerr << endl; cerr << "FATAL: did not specify deltaaf" << endl; - cerr << "INFO: please use bFst --help " << endl; + cerr << "INFO: please use bFst --help " << endl; cerr << endl; return(1); } @@ -465,25 +474,25 @@ if(filename == "NA"){ cerr << endl; cerr << "FATAL: did not specify VCF file" << endl; - cerr << "INFO: please use bFst --help " << endl; + cerr << "INFO: please use bFst --help " << endl; cerr << endl; return(1); } variantFile.open(filename); - + if (!variantFile.is_open()) { cerr << endl; cerr << "FATAL: could not open VCF file" << endl; - cerr << "INFO: please use bFst --help" << endl; + cerr << "INFO: please use bFst --help" << endl; cerr << endl; return(1); } if(it.size() < 2){ cerr << endl; - cerr << "FATAL: target not specified or less than two indviduals" << endl; - cerr << "INFO: please use bFst --help " << endl; + cerr << "FATAL: target not specified or less than two indviduals" << endl; + cerr << "INFO: please use bFst --help " << endl; cerr << endl; } if(ib.size() < 2){ @@ -492,50 +501,50 @@ cerr << "INFO: please use bFst --help " << endl; cerr << endl; } - + Variant var(variantFile); vector samples = variantFile.sampleNames; int nsamples = samples.size(); while (variantFile.getNextVariant(var)) { - - // biallelic sites naturally + + // biallelic sites naturally if(var.alt.size() > 1){ continue; } - + vector < map< string, vector > > target, background, total; - + int index = 0; for(int nsamp = 0; nsamp < nsamples; nsamp++){ map > sample = var.samples[ samples[nsamp]]; - + if(sample["GT"].front() != "./."){ if(it.find(index) != it.end() ){ target.push_back(sample); total.push_back(sample); - + } if(ib.find(index) != ib.end()){ background.push_back(sample); total.push_back(sample); } } - + index += 1; } - + if(target.size() < 2 || background.size() < 2 ){ continue; } - + pop popt, popb, popTotal; - + initPop(popt); initPop(popb); initPop(popTotal); @@ -559,8 +568,8 @@ if(afdiff < daf){ continue; } - - + + cerr << "INFO: target has " << popt.questionable.size() << " questionable genotypes " << endl; cerr << "INFO: background has " << popb.questionable.size() << " questionable genotypes " << endl; @@ -578,18 +587,18 @@ double fsts [10000] ; for(int i = 0; i < 15000; i++){ - + // update each of j parameters - + for(int j = 0; j < 6; j++ ){ - + updateParameters(popt, popb, parameters, j); if(i > 4999){ - sums[j] += parameters[j]; + sums[j] += parameters[j]; } } if(i > 4999){ - fsts[i - 5000] = parameters[5]; + fsts[i - 5000] = parameters[5]; } for(vector::iterator itt = popt.questionable.begin(); itt != popt.questionable.end(); itt++){ updateGenotypes(popt, popb, parameters, (*itt), 0); @@ -599,23 +608,23 @@ updateGenotypes(popt, popb, parameters, (*itb) , 1); } } - + qsort (fsts, sizeof(fsts)/sizeof(fsts[0]), sizeof(fsts[0]), cmp ); - + double lcredint = fsts[500]; - double hcredint = fsts[9500]; - - cout << var.sequenceName << "\t" << var.position + double hcredint = fsts[9500]; + + cout << var.sequenceName << "\t" << var.position << "\t" << popt.af << "\t" << sums[0]/10000 - << "\t" << popb.af + << "\t" << popb.af << "\t" << sums[1]/10000 - << "\t" << popTotal.af + << "\t" << popTotal.af << "\t" << sums[4]/10000 << "\t" << sums[5]/10000 << "\t" << lcredint << "\t" << hcredint << endl; } - return 0; + return 0; } diff -Nru libvcflib-1.0.1+dfsg/src/cdflib.cpp libvcflib-1.0.2+dfsg/src/cdflib.cpp --- libvcflib-1.0.1+dfsg/src/cdflib.cpp 2019-10-01 07:06:02.000000000 +0000 +++ libvcflib-1.0.2+dfsg/src/cdflib.cpp 2021-01-28 07:04:12.000000000 +0000 @@ -1,3 +1,12 @@ +/* + vcflib C++ library for parsing and manipulating VCF files + + Copyright © 2010-2020 Erik Garrison + Copyright © 2020 Pjotr Prins + + This software is published under the MIT License. See the LICENSE file. +*/ + # include # include # include diff -Nru libvcflib-1.0.1+dfsg/src/cdflib.hpp libvcflib-1.0.2+dfsg/src/cdflib.hpp --- libvcflib-1.0.1+dfsg/src/cdflib.hpp 2019-10-01 07:06:02.000000000 +0000 +++ libvcflib-1.0.2+dfsg/src/cdflib.hpp 2021-01-28 07:04:12.000000000 +0000 @@ -1,3 +1,12 @@ +/* + vcflib C++ library for parsing and manipulating VCF files + + Copyright © 2010-2020 Erik Garrison + Copyright © 2020 Pjotr Prins + + This software is published under the MIT License. See the LICENSE file. +*/ + double algdiv ( double *a, double *b ); double alnrel ( double *a ); double apser ( double *a, double *b, double *x, double *eps ); diff -Nru libvcflib-1.0.1+dfsg/src/convert.h libvcflib-1.0.2+dfsg/src/convert.h --- libvcflib-1.0.1+dfsg/src/convert.h 2019-10-01 07:06:02.000000000 +0000 +++ libvcflib-1.0.2+dfsg/src/convert.h 2021-01-28 07:04:12.000000000 +0000 @@ -1,3 +1,12 @@ +/* + vcflib C++ library for parsing and manipulating VCF files + + Copyright © 2010-2020 Erik Garrison + Copyright © 2020 Pjotr Prins + + This software is published under the MIT License. See the LICENSE file. +*/ + #ifndef __CONVERT_H #define __CONVERT_H diff -Nru libvcflib-1.0.1+dfsg/src/dumpContigsFromHeader.cpp libvcflib-1.0.2+dfsg/src/dumpContigsFromHeader.cpp --- libvcflib-1.0.1+dfsg/src/dumpContigsFromHeader.cpp 2019-10-01 07:06:02.000000000 +0000 +++ libvcflib-1.0.2+dfsg/src/dumpContigsFromHeader.cpp 2021-01-28 07:04:12.000000000 +0000 @@ -1,3 +1,12 @@ +/* + vcflib C++ library for parsing and manipulating VCF files + + Copyright © 2010-2020 Erik Garrison + Copyright © 2020 Pjotr Prins + + This software is published under the MIT License. See the LICENSE file. +*/ + #include "Variant.h" #include "split.h" #include "var.hpp" @@ -12,18 +21,46 @@ int main(int argc, char** argv) { - string filename = argv[1]; + if (argc == 2) { + string h_flag = argv[1]; + + if (argc == 2 && (h_flag == "-h" || h_flag == "--help")) { + cerr << R"( +Dump contigs from header + +Usage: dumpContigsFromHeader file + +Example: + + dumpContigsFromHeader samples/scaffold612.vcf + + ##contig= + ##contig= + (...) + output + + scaffold4 1524 + scaffold12 56895 + (...) + +Type: transformation + )"; + exit(1); + } + } + + string filename = argv[1]; VariantCallFile variantFile; variantFile.open(filename); vector headerLines = split (variantFile.header, "\n"); - + for(vector::iterator it = headerLines.begin(); it != headerLines.end(); it++){ // cerr << "h:" << (*it) << endl; - + if((*it).substr(0,8) == "##contig"){ string contigInfo = (*it).substr(10, (*it).length() -11); // cerr << contigInfo << endl; @@ -38,7 +75,7 @@ cout << subfield[1] << endl; } } - + } diff -Nru libvcflib-1.0.1+dfsg/src/genotypeSummary.cpp libvcflib-1.0.2+dfsg/src/genotypeSummary.cpp --- libvcflib-1.0.1+dfsg/src/genotypeSummary.cpp 2019-10-01 07:06:02.000000000 +0000 +++ libvcflib-1.0.2+dfsg/src/genotypeSummary.cpp 2021-01-28 07:04:12.000000000 +0000 @@ -1,8 +1,18 @@ +/* + vcflib C++ library for parsing and manipulating VCF files + + Copyright © 2010-2020 Erik Garrison + Copyright © 2020 Pjotr Prins + + This software is published under the MIT License. See the LICENSE file. +*/ + #include "Variant.h" #include "split.h" #include "cdflib.hpp" #include "pdflib.hpp" #include "var.hpp" +#include "makeUnique.h" #include #include @@ -31,7 +41,7 @@ cerr << endl << endl; cerr << "INFO: help" << endl; cerr << "INFO: description:" << endl; - cerr << " Summarizes genotype counts for bi-allelic SNVs and indel " << endl; + cerr << "Generates a table of genotype counts. Summarizes genotype counts for bi-allelic SNVs and indel " << endl << endl; cerr << "INFO: output: table of genotype counts for each individual." << endl; @@ -44,6 +54,7 @@ cerr << "INFO: optional, r,region -- a tabix compliant region : chr1:1-1000 or chr1 " << endl; cerr << "INFO: optional, s,snp -- Only count SNPs " << endl; cerr << "INFO: optional, a,ancestral -- describe counts relative to the ancestral allele defined as AA in INFO" << endl; + cerr << endl << "Type: statistics" << endl << endl; printVersion(); } @@ -83,7 +94,7 @@ string filename; - // open standardout + // open standardout // set region to scaffold @@ -190,7 +201,7 @@ printHelp(); } - bool is_open; + bool is_open; if (filename == "-") { @@ -198,10 +209,10 @@ } else { - is_open=variantFile.open(filename); - + is_open=variantFile.open(filename); + } - + if (!is_open) { cerr << "FATAL: could not open file for reading" << endl; printHelp(); @@ -249,7 +260,7 @@ for ( map::iterator x=it.begin(); x!=it.end(); ++x) { - countDataSampleName.push_back(samples[x->first] ); + countDataSampleName.push_back(samples[x->first] ); } @@ -317,19 +328,21 @@ index += 1; } - genotype * populationTarget ; + using Detail::makeUnique; + + unique_ptr populationTarget ; if(type == "PL"){ - populationTarget = new pl(); + populationTarget = makeUnique(); } if(type == "GL"){ - populationTarget = new gl(); + populationTarget = makeUnique(); } if(type == "GP"){ - populationTarget = new gp(); + populationTarget = makeUnique(); } if(type == "GT"){ - populationTarget = new gt(); + populationTarget = makeUnique(); } populationTarget->loadPop(target, var.sequenceName, var.position); @@ -362,7 +375,7 @@ exit(1); } } - delete populationTarget; + ; } diff -Nru libvcflib-1.0.1+dfsg/src/gl-XPEHH.cpp libvcflib-1.0.2+dfsg/src/gl-XPEHH.cpp --- libvcflib-1.0.1+dfsg/src/gl-XPEHH.cpp 2019-10-01 07:06:02.000000000 +0000 +++ libvcflib-1.0.2+dfsg/src/gl-XPEHH.cpp 2021-01-28 07:04:12.000000000 +0000 @@ -1,3 +1,12 @@ +/* + vcflib C++ library for parsing and manipulating VCF files + + Copyright © 2010-2020 Erik Garrison + Copyright © 2020 Pjotr Prins + + This software is published under the MIT License. See the LICENSE file. +*/ + #include "Variant.h" #include "split.h" #include "cdflib.hpp" diff -Nru libvcflib-1.0.1+dfsg/src/gpatInfo.hpp libvcflib-1.0.2+dfsg/src/gpatInfo.hpp --- libvcflib-1.0.1+dfsg/src/gpatInfo.hpp 2019-10-01 07:06:02.000000000 +0000 +++ libvcflib-1.0.2+dfsg/src/gpatInfo.hpp 2021-01-28 07:04:12.000000000 +0000 @@ -1,3 +1,12 @@ +/* + vcflib C++ library for parsing and manipulating VCF files + + Copyright © 2010-2020 Erik Garrison + Copyright © 2020 Pjotr Prins + + This software is published under the MIT License. See the LICENSE file. +*/ + #ifndef gpatInfo_H #define gpatInfo_H @@ -6,15 +15,15 @@ #include void printBasicVersion(void){ - std::cout << "vcflib" << "\t" << VERSION << std::endl; + std::cout << "vcflib" << "\t" << VCFLIB_VERSION << std::endl; } void printVersion(void){ std::cerr << "------------------------------------------------------" << std::endl; std::cerr << " This is a vcflib::GPAT++ tool " << std::endl; - - std::cerr << "-Version : " << VERSION << std::endl; + + std::cerr << "-Version : " << VCFLIB_VERSION << std::endl; std::cerr << "-Contact : zev.kronenberg [at] gmail.com " << std::endl; std::cerr << "-Notes : If you find a bug, please open a report on github!" << std::endl; std::cerr << "-Support : Please post questions to biostars.org " << std::endl; diff -Nru libvcflib-1.0.1+dfsg/src/hapLrt.cpp libvcflib-1.0.2+dfsg/src/hapLrt.cpp --- libvcflib-1.0.1+dfsg/src/hapLrt.cpp 2019-10-01 07:06:02.000000000 +0000 +++ libvcflib-1.0.2+dfsg/src/hapLrt.cpp 2021-01-28 07:04:12.000000000 +0000 @@ -1,12 +1,22 @@ +/* + vcflib C++ library for parsing and manipulating VCF files + + Copyright © 2010-2020 Erik Garrison + Copyright © 2020 Pjotr Prins + + This software is published under the MIT License. See the LICENSE file. +*/ + #include "Variant.h" #include "split.h" #include "cdflib.hpp" #include "pdflib.hpp" #include "var.hpp" +#include "makeUnique.h" #include #include -#include +#include #include #include #include @@ -28,14 +38,14 @@ cerr << " The sign denotes if the target has longer haplotypes (1) or the background (-1). " << endl << endl; cerr << "Output : 4 columns : " << endl; - cerr << " 1. seqid " << endl; - cerr << " 2. position " << endl; - cerr << " 3. mean target haplotype length " << endl; - cerr << " 4. mean background haplotype length " << endl; - cerr << " 5. p-value from LRT " << endl; + cerr << " 1. seqid " << endl; + cerr << " 2. position " << endl; + cerr << " 3. mean target haplotype length " << endl; + cerr << " 4. mean background haplotype length " << endl; + cerr << " 5. p-value from LRT " << endl; cerr << " 6. sign " << endl << endl; - cerr << "INFO: hapLRT --target 0,1,2,3,4,5,6,7 --background 11,12,13,16,17,19,22 --type GP --file my.vcf " << endl; + cerr << "INFO: Usage: hapLRT --target 0,1,2,3,4,5,6,7 --background 11,12,13,16,17,19,22 --type GP --file my.vcf " << endl; cerr << endl; cerr << "INFO: required: t,target -- argument: a zero base comma separated list of target individuals corrisponding to VCF columns " << endl; @@ -44,7 +54,8 @@ cerr << "INFO: required: y,type -- argument: type of genotype likelihood: PL, GL or GP " << endl; cerr << "INFO: optional: r,region -- argument: a genomice range to calculate hapLrt on in the format : \"seqid:start-end\" or \"seqid\" " << endl; cerr << endl; - + cerr << endl << "Type: genotype" << endl << endl; + printVersion(); exit(1); @@ -58,10 +69,10 @@ } void loadIndices(map & index, string set){ - + vector indviduals = split(set, ","); vector::iterator it = indviduals.begin(); - + for(; it != indviduals.end(); it++){ index[ atoi( (*it).c_str() ) ] = 1; } @@ -76,7 +87,7 @@ for(int i = 0 ; i < gmax*2; i++){ lengths[i] = 0; } - + for(int i = 0; i < gmax*2; i++){ //get group member 1 int g = (i < gmax) ? group[i] : group[i-gmax]; int c = (i < gmax) ? 0 : 1; @@ -120,7 +131,7 @@ if(*citE != *aitE) break; to_add++; } - + len += to_add; } @@ -172,7 +183,7 @@ for (i=1.0; i<=n; i++){ fact += log(i); } - + return fact; } @@ -184,13 +195,13 @@ // R example dnbinom(x=6, size=0.195089, mu=7.375, log=TRUE) double ans = lgamma( r+k ) - ( lfactorial(k) + lgamma(r) ) ; - ans += log(pow((m/(r+m)),k)) ; + ans += log(pow((m/(r+m)),k)) ; ans += log(pow((r/(r+m)),r)) ; - + //cerr << "k: " << k << "\t" << "m: " << m << "\t" << "r: " << r << "\t" << "ans: " << ans << endl; - + return ans; - + } double lexp(double x, double lambda){ @@ -198,11 +209,11 @@ double ans = lambda * pow ( exp(1) , (-lambda * x)); return log(ans); - + } double totalLL(int dat[], int n, double m){ - + double ll = 0; for(int j = 0; j < n; j++){ @@ -213,17 +224,17 @@ } double var(int dat[], int n, double mean){ - + double sum = 0; for(int i = 0; i < n; i++){ sum += pow( (double(dat[i]) - mean), 2); } - + double var = sum / (n - 1); - + return var; - + } void calc(string **haplotypes, int nhaps, vector pos, vector afs, vector & target, vector & background, vector total, string seqid){ @@ -239,7 +250,7 @@ for(int snp = 0; snp < haplotypes[0][0].length(); snp++){ int targetLengths[tl]; - int backgroundLengths[bl]; + int backgroundLengths[bl]; int totalLengths[al]; //changed (carson) --> findLengths(haplotypes, target, snp, targetLengths, tl); @@ -249,8 +260,8 @@ copy(targetLengths, targetLengths + tl, totalLengths); copy(backgroundLengths, backgroundLengths +bl, totalLengths + tl); - - + + double tm = mean(targetLengths, tl); double bm = mean(backgroundLengths, bl); double am = mean(totalLengths, al); @@ -263,18 +274,18 @@ double Alt = totalLL(targetLengths, 2*target.size(), tm) - + totalLL(backgroundLengths, 2*background.size(), bm); - + + totalLL(backgroundLengths, 2*background.size(), bm); + double Null = totalLL(targetLengths, 2*target.size(), am) - + totalLL(backgroundLengths, 2*background.size(), am); + + totalLL(backgroundLengths, 2*background.size(), am); double l = 2 * (Alt - Null); if(l < 0){ continue; } - + int which = 1; double p ; double q ; @@ -286,12 +297,12 @@ cdfchi(&which, &p, &q, &x, &df, &status, &bound ); cout << seqid << "\t" << pos[snp] << "\t" << tm << "\t" << bm << "\t" << 1-p << "\t" << dir << endl; - + } } void loadPhased(string **haplotypes, genotype * pop, int ntarget){ - + int indIndex = 0; for(vector::iterator ind = pop->gts.begin(); ind != pop->gts.end(); ind++){ @@ -301,7 +312,7 @@ // } vector< string > gs = split(g, "|"); haplotypes[indIndex][0].append(gs[0]); - haplotypes[indIndex][1].append(gs[1]); + haplotypes[indIndex][1].append(gs[1]); indIndex += 1; } } @@ -318,27 +329,27 @@ // set region to scaffold - string region = "NA"; + string region = "NA"; - // using vcflib; thanks to Erik Garrison + // using vcflib; thanks to Erik Garrison VariantCallFile variantFile; - // zero based index for the target and background indivudals - + // zero based index for the target and background indivudals + map targetIndex, backgroundIndex; - - // deltaaf is the difference of allele frequency we bother to look at + + // deltaaf is the difference of allele frequency we bother to look at // ancestral state is set to zero by default - - // phased + + // phased int phased = 0; string type = "NA"; - const struct option longopts[] = + const struct option longopts[] = { {"version" , 0, 0, 'v'}, {"help" , 0, 0, 'h'}, @@ -357,7 +368,7 @@ while(iarg != -1) { iarg = getopt_long(argc, argv, "y:r:t:b:f:hv", longopts, &findex); - + switch (iarg) { case 'h': @@ -383,7 +394,7 @@ break; case 'r': cerr << "INFO: set seqid region to : " << optarg << endl; - region = optarg; + region = optarg; break; default: break; @@ -395,7 +406,7 @@ okayGenotypeLikelihoods["GL"] = 1; okayGenotypeLikelihoods["GP"] = 1; okayGenotypeLikelihoods["GT"] = 1; - + if(type == "NA"){ cerr << "FATAL: failed to specify genotype likelihood format : PL or GL" << endl; @@ -416,9 +427,9 @@ variantFile.open(filename); - + if(region != "NA"){ - if(!variantFile.setRegion(region)){ //check if region is even specified in header + if(!variantFile.setRegion(region)){ //check if region is even specified in header bool region_exists = false; vector headerLines = split (variantFile.header, "\n"); for(vector::iterator it = headerLines.begin(); it != headerLines.end(); it++){ @@ -459,20 +470,20 @@ if (!variantFile.is_open()) { return 1; } - + Variant var(variantFile); vector samples = variantFile.sampleNames; int nsamples = samples.size(); - + vector ibi, iti, itot; int index, indexi = 0; for(vector::iterator samp = samples.begin(); samp != samples.end(); samp++){ - + string samplename = (*samp) ; if(targetIndex.find(index) != targetIndex.end() ){ @@ -489,22 +500,22 @@ } // itot.insert(itot.end(), iti.begin(), iti.end()); - + itot = iti; itot.insert(itot.end(), ibi.begin(), ibi.end()); vector positions; vector afs; - //string haplotypes [nsamples][2]; + //string haplotypes [nsamples][2]; string **haplotypes = new string*[nsamples]; for (int i = 0; i < nsamples; i++) { haplotypes[i] = new string[2]; } - + string currentSeqid = "NA"; - int count = 0; + int count = 0; while (variantFile.getNextVariant(var)) { count++; //cerr << count << endl; @@ -528,97 +539,83 @@ currentSeqid = var.sequenceName; afs.clear(); } - + vector < map< string, vector > > target, background, total; - + int sindex = 0; for(int nsamp = 0; nsamp < nsamples; nsamp++){ - + map > sample = var.samples[ samples[nsamp]]; - + if(targetIndex.find(sindex) != targetIndex.end() ){ target.push_back(sample); - total.push_back(sample); + total.push_back(sample); } if(backgroundIndex.find(sindex) != backgroundIndex.end()){ background.push_back(sample); - total.push_back(sample); + total.push_back(sample); } - + sindex += 1; } - - genotype * populationTarget ; - genotype * populationBackground; - genotype * populationTotal ; - + + using Detail::makeUnique; + + unique_ptr populationTarget ; + unique_ptr populationBackground; + unique_ptr populationTotal ; + if(type == "PL"){ - populationTarget = new pl(); - populationBackground = new pl(); - populationTotal = new pl(); + populationTarget = makeUnique(); + populationBackground = makeUnique(); + populationTotal = makeUnique(); } if(type == "GL"){ - populationTarget = new gl(); - populationBackground = new gl(); - populationTotal = new gl(); + populationTarget = makeUnique(); + populationBackground = makeUnique(); + populationTotal = makeUnique(); } if(type == "GP"){ - populationTarget = new gp(); - populationBackground = new gp(); - populationTotal = new gp(); + populationTarget = makeUnique(); + populationBackground = makeUnique(); + populationTotal = makeUnique(); } if(type == "GT"){ - populationTarget = new gt(); - populationBackground = new gt(); - populationTotal = new gt(); + populationTarget = makeUnique(); + populationBackground = makeUnique(); + populationTotal = makeUnique(); } - + populationTarget->loadPop(target, var.sequenceName, var.position); - + populationBackground->loadPop(background, var.sequenceName, var.position); - + populationTotal->loadPop(total, var.sequenceName, var.position); - - + + if(populationTotal->af > 0.95 || populationTotal->af < 0.05){ - delete populationTarget; - delete populationBackground; - delete populationTotal; + ; + ; - populationTarget = NULL; - populationBackground = NULL; - populationTotal = NULL; continue; } - + afs.push_back(populationTotal->af); positions.push_back(var.position); - loadPhased(haplotypes, populationTotal, nsamples); - - delete populationTarget; - delete populationBackground; - delete populationTotal; - - populationTarget = NULL; - populationBackground = NULL; - populationTotal = NULL; + loadPhased(haplotypes, populationTotal.get(), nsamples); + + ; + ; - } -// delete populationTarget; -// delete populationBackground; -// delete populationTotal; -// -// populationTarget = NULL; -// populationBackground = NULL; -// populationTotal = NULL; + } calc(haplotypes, nsamples, positions, afs, iti, ibi, itot, currentSeqid); - - return 0; + + return 0; } diff -Nru libvcflib-1.0.1+dfsg/src/iHS.cpp libvcflib-1.0.2+dfsg/src/iHS.cpp --- libvcflib-1.0.1+dfsg/src/iHS.cpp 2019-10-01 07:06:02.000000000 +0000 +++ libvcflib-1.0.2+dfsg/src/iHS.cpp 2021-01-28 07:04:12.000000000 +0000 @@ -1,3 +1,12 @@ +/* + vcflib C++ library for parsing and manipulating VCF files + + Copyright © 2010-2020 Erik Garrison + Copyright © 2020 Pjotr Prins + + This software is published under the MIT License. See the LICENSE file. +*/ + #include "Variant.h" #include "split.h" #include "cdflib.hpp" @@ -6,13 +15,14 @@ #include #include -#include +#include #include #include #include #include #include #include "gpatInfo.hpp" +#include "makeUnique.h" // maaas speed #if defined HAS_OPENMP @@ -40,40 +50,45 @@ using namespace vcflib; void printHelp(void){ - cerr << endl << endl; - cerr << "INFO: help" << endl; - cerr << "INFO: description:" << endl; - cerr << " iHS calculates the integrated ratio of haplotype decay between the reference and non-reference allele. " << endl; - - - cerr << "Output : 4 columns : " << endl; - cerr << " 1. seqid " << endl; - cerr << " 2. position " << endl; - cerr << " 3. target allele frequency " << endl; - cerr << " 4. integrated EHH (alternative) " << endl; - cerr << " 5. integrated EHH (reference) " << endl; - cerr << " 6. iHS ln(iEHHalt/iEHHref) " << endl << endl; - cerr << " 7. != 0 integration failure " << endl << endl; - cerr << " 8. != 0 integration failure " << endl << endl; - - cerr << "Usage:" << endl; - - cerr << " iHS --target 0,1,2,3,4,5,6,7 --file my.phased.vcf \\" << endl; - cerr << " --region chr1:1-1000 > STDOUT 2> STDERR " << endl << endl; - - cerr << "Params:" << endl; - cerr << " required: t,target A zero base comma separated list of target" << endl; - cerr << " individuals corresponding to VCF columns " << endl; - cerr << " required: r,region A tabix compliant genomic range " << endl; - cerr << " format: \"seqid:start-end\" or \"seqid\" " << endl; - cerr << " required: f,file Proper formatted and phased VCF. " << endl; - cerr << " required: y,type Genotype likelihood format: GT,PL,GL,GP " << endl; - cerr << " optional: a,af Alternative alleles with frequencies less " << endl; - cerr << " than [0.05] are skipped. " << endl; - cerr << " optional: x,threads Number of CPUS [1]. " << endl; - cerr << " recommended: g,gen A PLINK formatted map file. " << endl; + cerr << R"( +iHS calculates the integrated haplotype score which measures the relative decay of extended haplotype homozygosity (EHH) for the reference and alternative alleles at a site (see: voight et al. 2006, Spiech & Hernandez 2014). + +Our code is highly concordant with both implementations mentioned. However, we do not set an upper limit to the allele frequency. iHS can be run without a genetic map, in which case the change in EHH is integrated over a constant. Human genetic maps for GRCh36 and GRCh37 (hg18 & hg19) can be found at: http://bochet.gcc.biostat.washington.edu/beagle/genetic_maps/ . iHS by default interpolates SNV positions to genetic position (you don't need a genetic position for every VCF entry in the map file). + +iHS analyses requires normalization by allele frequency. It is important that iHS is calculated over large regions so that the normalization does not down weight real signals. For genome-wide runs it is recommended to run slightly overlapping windows and throwing out values that fail integration (columns 7 & 8 in the output) and then removing duplicates by using the 'sort' and 'uniq' linux commands. Normalization of the output is as simple as running 'normalize-iHS'. + +INFO: help +INFO: description: + iHS calculates the integrated ratio of haplotype decay between the reference and non-reference allele. +Output : 4 columns : + 1. seqid + 2. position + 3. target allele frequency + 4. integrated EHH (alternative) + 5. integrated EHH (reference) + 6. iHS ln(iEHHalt/iEHHref) + 7. != 0 integration failure + 8. != 0 integration failure + +Usage: iHS --target 0,1,2,3,4,5,6,7 --file my.phased.vcf \ + --region chr1:1-1000 > STDOUT 2> STDERR + +Params: + required: t,target A zero base comma separated list of target + individuals corresponding to VCF columns + required: r,region A tabix compliant genomic range + format: "seqid:start-end" or "seqid" + required: f,file Proper formatted and phased VCF. + required: y,type Genotype likelihood format: GT,PL,GL,GP + optional: a,af Alternative alleles with frquences less + than [0.05] are skipped. + optional: x,threads Number of CPUS [1]. + recommended: g,gen A PLINK formatted map file. + +)" << endl ; + cerr << endl << "Type: statistics" << endl << endl; cerr << endl; - + printVersion(); exit(1); @@ -81,7 +96,7 @@ bool gDist(int start, int end, double * gd){ - + if(globalOpts.geneticMap.find(start) == globalOpts.geneticMap.end()){ return false; } @@ -101,7 +116,7 @@ } ifstream featureFile (globalOpts.geneticMapFile.c_str()); - + string line; int lastpos = 0; @@ -125,7 +140,7 @@ lastpos = pos; continue; } - + int diff = abs(pos - lastpos); double vdiff = abs(lastvalue - cm ); double chunk = vdiff/double(diff); @@ -164,20 +179,20 @@ } void loadIndices(map & index, string set){ - + vector indviduals = split(set, ","); vector::iterator it = indviduals.begin(); - + for(; it != indviduals.end(); it++){ index[ atoi( (*it).c_str() ) ] = 1; } } -void countHaps(int nhaps, map & targetH, +void countHaps(int nhaps, map & targetH, string **haplotypes, int start, int end){ for(int i = 0; i < nhaps; i++){ - + std::string h1 = haplotypes[i][0].substr(start, (end - start)) ; std::string h2 = haplotypes[i][1].substr(start, (end - start)) ; @@ -196,12 +211,12 @@ } } -void computeNs(map & targetH, int start, +void computeNs(map & targetH, int start, int end, double * sumT, char ref, bool dir){ - - for( map::iterator th = targetH.begin(); + + for( map::iterator th = targetH.begin(); th != targetH.end(); th++){ - + if(th->second < 2){ continue; } @@ -209,15 +224,15 @@ // end is extending ; check first base if(dir){ - if( th->first[0] == ref){ + if( th->first[0] == ref){ // std::cerr << "count dat: " << th->first << " " << th->second << " " << ref << " " << dir << endl; - + *sumT += r8_choose(th->second, 2); } } - + // start is extending ; check last base else{ @@ -232,13 +247,13 @@ } } -bool calcEhh(string **haplotypes, int start, - int end, char ref, int nhaps, +bool calcEhh(string **haplotypes, int start, + int end, char ref, int nhaps, double * ehh, double div, bool dir){ double sum = 0 ; - map refH; - + map refH; + countHaps(nhaps, refH, haplotypes, start, end); computeNs(refH, start, end, &sum, ref, dir ); @@ -254,16 +269,16 @@ return true; } -int integrate(string **haplotypes , +int integrate(string **haplotypes , vector & pos, bool direction, - int maxl, - int snp, + int maxl, + int snp, char ref, - int nhaps, + int nhaps, double * iHH, double denom ){ - + double ehh = 1; int start = snp; @@ -289,10 +304,10 @@ return 1; } double ehhRT = 0; - if(!calcEhh(haplotypes, - start, end, - ref, nhaps, - &ehhRT, denom, + if(!calcEhh(haplotypes, + start, end, + ref, nhaps, + &ehhRT, denom, direction)){ return 1; } @@ -307,13 +322,13 @@ double dist = 0 ; if(direction){ - gDist(pos[end-1], pos[end], &delta_gDist); + gDist(pos[end-1], pos[end], &delta_gDist); dist = abs(pos[end-1] - pos[end]); } else{ gDist(pos[start + 1], pos[start], &delta_gDist); dist = abs(pos[end-1] - pos[end]); - + } if(dist > 10000){ @@ -332,8 +347,8 @@ return 10; } -void calc(string **haplotypes, int nhaps, - vector & afs, vector & pos, +void calc(string **haplotypes, int nhaps, + vector & afs, vector & pos, vector & target, vector & background, string seqid){ int maxl = haplotypes[0][0].length(); @@ -341,7 +356,7 @@ #if defined HAS_OPENMP #pragma omp parallel for schedule(dynamic, 20) #endif - + for(int snp = 0; snp < maxl; snp++){ double ihhR = 0; @@ -350,17 +365,17 @@ map refH; countHaps(nhaps, refH, haplotypes, snp, snp+1); - + double denomP1 = double(refH["0"]); double denomP2 = double(refH["1"]); int refFail = 0; int altFail = 0; - + refFail += integrate(haplotypes, pos, true, maxl, snp, '0', nhaps, &ihhR, denomP1); - + refFail += integrate(haplotypes, pos, false, maxl, snp, '0', nhaps, &ihhR, denomP1); altFail += integrate(haplotypes, pos, true, maxl, snp, '1', nhaps, &ihhA, denomP2); @@ -374,23 +389,23 @@ #if defined HAS_OPENMP omp_set_lock(&lock); #endif - cout << seqid - << "\t" << pos[snp] - << "\t" << afs[snp] - << "\t" << ihhR - << "\t" << ihhA - << "\t" << log(ihhA/ihhR) - << "\t" << refFail + cout << seqid + << "\t" << pos[snp] + << "\t" << afs[snp] + << "\t" << ihhR + << "\t" << ihhA + << "\t" << log(ihhA/ihhR) + << "\t" << refFail << "\t" << altFail << std::endl; -#if defined HAS_OPENMP +#if defined HAS_OPENMP omp_unset_lock(&lock); #endif } } void loadPhased(string **haplotypes, genotype * pop, int ntarget){ - + int indIndex = 0; for(vector::iterator ind = pop->gts.begin(); ind != pop->gts.end(); ind++){ @@ -407,11 +422,11 @@ globalOpts.threads = 1 ; globalOpts.af = 0.05; - // zero based index for the target and background indivudals - + // zero based index for the target and background indivudals + map it, ib; - - const struct option longopts[] = + + const struct option longopts[] = { {"version" , 0, 0, 'v'}, {"help" , 0, 0, 'h'}, @@ -431,7 +446,7 @@ while(iarg != -1) { iarg = getopt_long(argc, argv, "a:x:g:y:r:d:t:b:f:hv", longopts, &findex); - + switch (iarg) { case 'a': @@ -480,7 +495,7 @@ case 'r': { cerr << "INFO: set seqid region to : " << optarg << endl; - globalOpts.region = optarg; + globalOpts.region = optarg; break; default: break; @@ -495,12 +510,12 @@ okayGenotypeLikelihoods["GL"] = 1; okayGenotypeLikelihoods["GP"] = 1; okayGenotypeLikelihoods["GT"] = 1; - + // add an option for dumping // for(std::map::iterator gm = geneticMap.begin(); gm != geneticMap.end(); gm++){ -// cerr << "pos: " << gm->first << " cm: " << gm->second << endl; +// cerr << "pos: " << gm->first << " cm: " << gm->second << endl; // } if(globalOpts.type.empty()){ @@ -526,12 +541,12 @@ exit(1); } - // using vcflib; thanksErik + // using vcflib; thanksErik VariantCallFile variantFile; variantFile.open(globalOpts.filename); - + if(globalOpts.region.empty()){ cerr << "FATAL: region required" << endl; exit(1); @@ -544,11 +559,11 @@ if (!variantFile.is_open()) { exit(1); } - + Variant var( variantFile ); vector target_h, background_h; - int index = 0; + int index = 0; int indexi = 0; @@ -556,21 +571,21 @@ int nsamples = samples.size(); for(vector::iterator samp = samples.begin(); samp != samples.end(); samp++){ - + string sampleName = (*samp); - + if(it.find(index) != it.end() ){ target_h.push_back(indexi); indexi++; } index++; } - - + + vector positions; - + vector afs; - + string **haplotypes = new string*[target_h.size()]; for (int i = 0; i < target_h.size(); i++) { haplotypes[i] = new string[2]; @@ -591,48 +606,47 @@ } vector < map< string, vector > > target, background, total; - + int sindex = 0; - + for(int nsamp = 0; nsamp < nsamples; nsamp++){ map > sample = var.samples[ samples[nsamp]]; - + if(it.find(sindex) != it.end() ){ target.push_back(sample); - } + } sindex += 1; } - - genotype * populationTarget ; - + + using Detail::makeUnique; + + unique_ptr populationTarget ; + if(globalOpts.type == "PL"){ - populationTarget = new pl(); + populationTarget = makeUnique(); } if(globalOpts.type == "GL"){ - populationTarget = new gl(); + populationTarget = makeUnique(); } if(globalOpts.type == "GP"){ - populationTarget = new gp(); + populationTarget = makeUnique(); } if(globalOpts.type == "GT"){ - populationTarget = new gt(); + populationTarget = makeUnique(); } populationTarget->loadPop(target, var.sequenceName, var.position); - - if(populationTarget->af <= globalOpts.af - || populationTarget->nref < 2 + + if(populationTarget->af <= globalOpts.af + || populationTarget->nref < 2 || populationTarget->nalt < 2){ - delete populationTarget; + ; continue; } positions.push_back(var.position); afs.push_back(populationTarget->af); - loadPhased(haplotypes, populationTarget, populationTarget->gts.size()); - - populationTarget = NULL; - delete populationTarget; + loadPhased(haplotypes, populationTarget.get(), populationTarget->gts.size()); } if(!globalOpts.geneticMapFile.empty()){ @@ -641,10 +655,10 @@ cerr << "INFO: finished loading genetics map" << endl; } - calc(haplotypes, target_h.size(), afs, positions, + calc(haplotypes, target_h.size(), afs, positions, target_h, background_h, globalOpts.seqid); clearHaplotypes(haplotypes, target_h.size()); - exit(0); + exit(0); } diff -Nru libvcflib-1.0.1+dfsg/src/join.h libvcflib-1.0.2+dfsg/src/join.h --- libvcflib-1.0.1+dfsg/src/join.h 2019-10-01 07:06:02.000000000 +0000 +++ libvcflib-1.0.2+dfsg/src/join.h 2021-01-28 07:04:12.000000000 +0000 @@ -1,3 +1,12 @@ +/* + vcflib C++ library for parsing and manipulating VCF files + + Copyright © 2010-2020 Erik Garrison + Copyright © 2020 Pjotr Prins + + This software is published under the MIT License. See the LICENSE file. +*/ + #ifndef __JOIN_H #define __JOIN_H diff -Nru libvcflib-1.0.1+dfsg/src/makeUnique.h libvcflib-1.0.2+dfsg/src/makeUnique.h --- libvcflib-1.0.1+dfsg/src/makeUnique.h 1970-01-01 00:00:00.000000000 +0000 +++ libvcflib-1.0.2+dfsg/src/makeUnique.h 2021-01-28 07:04:12.000000000 +0000 @@ -0,0 +1,8 @@ +#include + +namespace Detail { + template + std::unique_ptr makeUnique(Args &&... args) { + return unique_ptr(new T(forward(args)...)); + } +} diff -Nru libvcflib-1.0.1+dfsg/src/meltEHH.cpp libvcflib-1.0.2+dfsg/src/meltEHH.cpp --- libvcflib-1.0.1+dfsg/src/meltEHH.cpp 2019-10-01 07:06:02.000000000 +0000 +++ libvcflib-1.0.2+dfsg/src/meltEHH.cpp 2021-01-28 07:04:12.000000000 +0000 @@ -1,3 +1,12 @@ +/* + vcflib C++ library for parsing and manipulating VCF files + + Copyright © 2010-2020 Erik Garrison + Copyright © 2020 Pjotr Prins + + This software is published under the MIT License. See the LICENSE file. +*/ + #include "Variant.h" #include "split.h" #include "cdflib.hpp" @@ -6,20 +15,21 @@ #include #include -#include +#include #include #include #include #include #include #include "gpatInfo.hpp" +#include "makeUnique.h" // maaas speed #if defined HAS_OPENMP #include // print lock omp_lock_t lock; -#endif +#endif struct opts{ @@ -44,32 +54,40 @@ cerr << endl << endl; cerr << "INFO: help" << endl; cerr << "INFO: description:" << endl; - cerr << " meltEHH provides the data to plot EHH curves. " << endl; - + cerr << " " << endl << endl; + + cerr << R"( meltEHH provides the data to plot extended haplotype homozygosity +(EHH) curves and produces the data to generate the following plot: + + +INFO: help +INFO: description: + meltEHH provides the data to plot EHH curves. +Output : 4 columns : + 1. seqid + 2. position + 3. EHH + 4. ref or alt [0 == ref] +Usage: + meltEHH --target 0,1,2,3,4,5,6,7 --pos 10 --file my.phased.vcf \ + --region chr1:1-1000 > STDOUT 2> STDERR + +Params: + required: t,target A zero base comma separated list of target + individuals corresponding to VCF columns + required: r,region A tabix compliant genomic range + format: "seqid:start-end" or "seqid" + required: f,file Proper formatted and phased VCF. + required: y,type Genotype likelihood format: GT,PL,GL,GP + required: p,position Variant position to melt. + optional: a,af Alternative alleles with frequencies less + than [0.05] are skipped. + +)" << endl; - cerr << "Output : 4 columns : " << endl; - cerr << " 1. seqid " << endl; - cerr << " 2. position " << endl; - cerr << " 3. EHH " << endl; - cerr << " 4. ref or alt [0 == ref] " << endl; - - cerr << "Usage:" << endl; - - cerr << " meltEHH --target 0,1,2,3,4,5,6,7 --pos 10 --file my.phased.vcf \\" << endl; - cerr << " --region chr1:1-1000 > STDOUT 2> STDERR " << endl << endl; - - cerr << "Params:" << endl; - cerr << " required: t,target A zero base comma separated list of target" << endl; - cerr << " individuals corresponding to VCF columns " << endl; - cerr << " required: r,region A tabix compliant genomic range " << endl; - cerr << " format: \"seqid:start-end\" or \"seqid\" " << endl; - cerr << " required: f,file Proper formatted and phased VCF. " << endl; - cerr << " required: y,type Genotype likelihood format: GT,PL,GL,GP " << endl; - cerr << " required: p,position Variant position to melt. " << endl; - cerr << " optional: a,af Alternative alleles with frequencies less " << endl; - cerr << " than [0.05] are skipped. " << endl; + cerr << endl << "Type: statistics" << endl << endl; cerr << endl; - + printVersion(); exit(1); @@ -77,7 +95,7 @@ bool gDist(int start, int end, double * gd){ - + if(globalOpts.geneticMap.find(start) == globalOpts.geneticMap.end()){ return false; } @@ -97,7 +115,7 @@ } ifstream featureFile (globalOpts.geneticMapFile.c_str()); - + string line; int lastpos = 0; @@ -121,7 +139,7 @@ lastpos = pos; continue; } - + int diff = abs(pos - lastpos); double vdiff = abs(lastvalue - cm ); double chunk = vdiff/double(diff); @@ -160,20 +178,20 @@ } void loadIndices(map & index, string set){ - + vector indviduals = split(set, ","); vector::iterator it = indviduals.begin(); - + for(; it != indviduals.end(); it++){ index[ atoi( (*it).c_str() ) ] = 1; } } -void countHaps(int nhaps, map & targetH, +void countHaps(int nhaps, map & targetH, string **haplotypes, int start, int end){ for(int i = 0; i < nhaps; i++){ - + std::string h1 = haplotypes[i][0].substr(start, (end - start)) ; std::string h2 = haplotypes[i][1].substr(start, (end - start)) ; @@ -192,12 +210,12 @@ } } -void computeNs(map & targetH, int start, +void computeNs(map & targetH, int start, int end, double * sumT, char ref, bool dir){ - - for( map::iterator th = targetH.begin(); + + for( map::iterator th = targetH.begin(); th != targetH.end(); th++){ - + if(th->second < 2){ continue; } @@ -205,15 +223,15 @@ // end is extending ; check first base if(dir){ - if( th->first[0] == ref){ + if( th->first[0] == ref){ // std::cerr << "count dat: " << th->first << " " << th->second << " " << ref << " " << dir << endl; - + *sumT += r8_choose(th->second, 2); } } - + // start is extending ; check last base else{ @@ -228,13 +246,13 @@ } } -bool calcEhh(string **haplotypes, int start, - int end, char ref, int nhaps, +bool calcEhh(string **haplotypes, int start, + int end, char ref, int nhaps, double * ehh, double div, bool dir){ double sum = 0 ; - map refH; - + map refH; + countHaps(nhaps, refH, haplotypes, start, end); computeNs(refH, start, end, &sum, ref, dir ); @@ -250,16 +268,16 @@ return true; } -int integrate(string ** haplotypes, +int integrate(string ** haplotypes, vector & pos, bool direction, - int maxl, - int snp, + int maxl, + int snp, char ref, - int nhaps, + int nhaps, double * iHH, double denom ){ - + double ehh = 1; int start = snp; @@ -285,10 +303,10 @@ return 1; } double ehhRT = 0; - if(!calcEhh(haplotypes, - start, end, - ref, nhaps, - &ehhRT, denom, + if(!calcEhh(haplotypes, + start, end, + ref, nhaps, + &ehhRT, denom, direction)){ return 1; } @@ -306,7 +324,7 @@ gDist(pos[start + 1], pos[start], &delta_gDist); } *iHH += ((ehh + ehhRT)/2)*delta_gDist; - + if(direction){ std::cout << pos[end] << "\t" << ehh << "\t" << ref << "\t" << direction << std::endl; } @@ -322,8 +340,8 @@ return 10; } -void calc(string ** haplotypes, int nhaps, - vector & afs, vector & pos, +void calc(string ** haplotypes, int nhaps, + vector & afs, vector & pos, vector & target, vector & background, string seqid){ int maxl = haplotypes[0][0].length(); @@ -334,7 +352,7 @@ if(pos[snp] != globalOpts.pos ){ continue; } - + double ihhR = 0; double ihhA = 0; @@ -342,20 +360,20 @@ map refH; countHaps(nhaps, refH, haplotypes, snp, snp+1); - + double denomP1 = double(refH["0"]); double denomP2 = double(refH["1"]); int refFail = 0; int altFail = 0; - + std::cout << pos[snp] << "\t" << "1" << "\t" << "0" << "\t" << "0" << std::endl; refFail += integrate(haplotypes, pos, true, maxl, snp, '0', nhaps, &ihhR, denomP1); - + refFail += integrate(haplotypes, pos, false, maxl, snp, '0', nhaps, &ihhR, denomP1); altFail += integrate(haplotypes, pos, true, maxl, snp, '1', nhaps, &ihhA, denomP2); @@ -366,7 +384,7 @@ } void loadPhased(string **haplotypes, genotype * pop, int ntarget){ - + int indIndex = 0; for(vector::iterator ind = pop->gts.begin(); ind != pop->gts.end(); ind++){ @@ -383,11 +401,11 @@ globalOpts.threads = 1 ; globalOpts.af = 0.05; - // zero based index for the target and background indivudals - + // zero based index for the target and background indivudals + map it, ib; - - const struct option longopts[] = + + const struct option longopts[] = { {"version" , 0, 0, 'v'}, {"help" , 0, 0, 'h'}, @@ -408,7 +426,7 @@ while(iarg != -1) { iarg = getopt_long(argc, argv, "a:x:g:y:r:d:t:b:f:p:hv", longopts, &findex); - + switch (iarg) { case 'p': @@ -463,7 +481,7 @@ case 'r': { cerr << "INFO: set seqid region to : " << optarg << endl; - globalOpts.region = optarg; + globalOpts.region = optarg; break; default: break; @@ -474,18 +492,18 @@ #if defined HAS_OPENMP omp_set_num_threads(globalOpts.threads); #endif - + map okayGenotypeLikelihoods; okayGenotypeLikelihoods["PL"] = 1; okayGenotypeLikelihoods["GL"] = 1; okayGenotypeLikelihoods["GP"] = 1; okayGenotypeLikelihoods["GT"] = 1; - + // add an option for dumping // for(std::map::iterator gm = geneticMap.begin(); gm != geneticMap.end(); gm++){ -// cerr << "pos: " << gm->first << " cm: " << gm->second << endl; +// cerr << "pos: " << gm->first << " cm: " << gm->second << endl; // } if(globalOpts.type.empty()){ @@ -511,12 +529,12 @@ exit(1); } - // using vcflib; thanksErik + // using vcflib; thanksErik VariantCallFile variantFile; variantFile.open(globalOpts.filename); - + if(globalOpts.region.empty()){ cerr << "FATAL: region required" << endl; exit(1); @@ -529,11 +547,11 @@ if (!variantFile.is_open()) { exit(1); } - + Variant var( variantFile ); vector target_h, background_h; - int index = 0; + int index = 0; int indexi = 0; @@ -541,26 +559,26 @@ int nsamples = samples.size(); for(vector::iterator samp = samples.begin(); samp != samples.end(); samp++){ - + string sampleName = (*samp); - + if(it.find(index) != it.end() ){ target_h.push_back(indexi); indexi++; } index++; } - - + + vector positions; - + vector afs; string **haplotypes = new string*[target_h.size()]; for (int i = 0; i < target_h.size(); i++) { haplotypes[i] = new string[2]; } - + while (variantFile.getNextVariant(var)) { @@ -576,48 +594,47 @@ } vector < map< string, vector > > target, background, total; - + int sindex = 0; - + for(int nsamp = 0; nsamp < nsamples; nsamp++){ map > sample = var.samples[ samples[nsamp]]; - + if(it.find(sindex) != it.end() ){ target.push_back(sample); - } + } sindex += 1; } - - genotype * populationTarget ; - + + using Detail::makeUnique; + + unique_ptr populationTarget; + if(globalOpts.type == "PL"){ - populationTarget = new pl(); + populationTarget = makeUnique(); } if(globalOpts.type == "GL"){ - populationTarget = new gl(); + populationTarget = makeUnique(); } if(globalOpts.type == "GP"){ - populationTarget = new gp(); + populationTarget = makeUnique(); } if(globalOpts.type == "GT"){ - populationTarget = new gt(); + populationTarget = makeUnique(); } populationTarget->loadPop(target, var.sequenceName, var.position); - - if(populationTarget->af <= globalOpts.af - || populationTarget->nref < 2 + + if(populationTarget->af <= globalOpts.af + || populationTarget->nref < 2 || populationTarget->nalt < 2){ - delete populationTarget; + ; continue; } positions.push_back(var.position); afs.push_back(populationTarget->af); - loadPhased(haplotypes, populationTarget, populationTarget->gts.size()); - - populationTarget = NULL; - delete populationTarget; + loadPhased(haplotypes, populationTarget.get(), populationTarget->gts.size()); } if(!globalOpts.geneticMapFile.empty()){ @@ -626,10 +643,10 @@ cerr << "INFO: finished loading genetics map" << endl; } - calc(haplotypes, target_h.size(), afs, positions, + calc(haplotypes, target_h.size(), afs, positions, target_h, background_h, globalOpts.seqid); clearHaplotypes(haplotypes, target_h.size()); - exit(0); + exit(0); } diff -Nru libvcflib-1.0.1+dfsg/src/mt19937ar.h libvcflib-1.0.2+dfsg/src/mt19937ar.h --- libvcflib-1.0.1+dfsg/src/mt19937ar.h 2019-10-01 07:06:02.000000000 +0000 +++ libvcflib-1.0.2+dfsg/src/mt19937ar.h 2021-01-28 07:04:12.000000000 +0000 @@ -1,3 +1,12 @@ +/* + vcflib C++ library for parsing and manipulating VCF files + + Copyright © 2010-2020 Erik Garrison + Copyright © 2020 Pjotr Prins + + This software is published under the MIT License. See the LICENSE file. +*/ + /* A C-program for MT19937, with initialization improved 2002/1/26. Coded by Takuji Nishimura and Makoto Matsumoto. diff -Nru libvcflib-1.0.1+dfsg/src/normalize-iHS.cpp libvcflib-1.0.2+dfsg/src/normalize-iHS.cpp --- libvcflib-1.0.1+dfsg/src/normalize-iHS.cpp 2019-10-01 07:06:02.000000000 +0000 +++ libvcflib-1.0.2+dfsg/src/normalize-iHS.cpp 2021-01-28 07:04:12.000000000 +0000 @@ -1,3 +1,12 @@ +/* + vcflib C++ library for parsing and manipulating VCF files + + Copyright © 2010-2020 Erik Garrison + Copyright © 2020 Pjotr Prins + + This software is published under the MIT License. See the LICENSE file. +*/ + /* @@ -78,7 +87,17 @@ cerr << endl << endl; cerr << "INFO: help" << endl; cerr << "INFO: description:" << endl; - cerr << " normalizes iHS or XP-EHH scores " << endl; + cerr << " normalizes iHS or XP-EHH scores. " << endl << endl ; + + cerr << R"( + +A cross-population extended haplotype homozygosity (XP-EHH) score is +directional: a positive score suggests selection is likely to have +happened in population A, whereas a negative score suggests the same +about population B. See for example +https://www.ncbi.nlm.nih.gov/pmc/articles/PMC2687721/ + +)" << endl ; cerr << "Output : normalize-iHS adds one additional column to input (normalized score)." << endl; @@ -86,6 +105,7 @@ cerr << endl; cerr << "INFO: required: -f -- Output from iHS or XPEHH " << endl; cerr << "INFO: optional: -s -- Max AF diff for window [0.01]" << endl; + cerr << endl << "Type: genotype" << endl << endl; cerr << endl; @@ -101,7 +121,7 @@ switch(opt){ case 's': { - string op = optarg; + string op = optarg; globalOpts.afDiff = atof(op.c_str()); break; } @@ -111,7 +131,7 @@ exit(1); break; } - + case 'f': { globalOpts.file = optarg; @@ -121,8 +141,8 @@ { break; } - } - opt = getopt( argc, argv, optString ); + } + opt = getopt( argc, argv, optString ); } return 1; } @@ -176,7 +196,7 @@ s += *it; n += 1; } - + return (s/n); } @@ -185,7 +205,7 @@ //------------------------------- SUBROUTINE -------------------------------- /* - Function input : vector of iHS data + Function input : vector of iHS data Function does : normalizes @@ -194,13 +214,13 @@ */ void normalize(std::vector & data, int * pos){ - + std::vector windat; int start = *pos; int end = *pos; - while((abs(data[start]->af - data[end]->af ) < globalOpts.afDiff) + while((abs(data[start]->af - data[end]->af ) < globalOpts.afDiff) && end < data.size() -1 ){ end += 1; } @@ -208,22 +228,22 @@ for(int i = start; i <= end; i++){ windat.push_back(data[i]->iHS); } - + double avg = windowAvg(windat); double sd = sqrt(var(windat, avg)); - std::cerr << "start: " << data[start]->af << " " - << "end: " << data[end]->af << " " - << "n iHS scores: " << windat.size() << " " - << "mean: " << avg << " " + std::cerr << "start: " << data[start]->af << " " + << "end: " << data[end]->af << " " + << "n iHS scores: " << windat.size() << " " + << "mean: " << avg << " " << "sd: " << sd << std::endl; for(int i = start; i <= end; i++){ data[i]->niHS = (data[i]->iHS - avg) / (sd); } - + *pos = end; - + } //------------------------------- MAIN -------------------------------- @@ -264,14 +284,14 @@ data.push_back(tp); } - + myfile.close(); } else{ cerr << "FATAL: could not open file: " << globalOpts.file << endl; exit(1); } - + std::cerr << "INFO: sorting " << data.size() << " scores by AF" << std::endl; @@ -294,7 +314,7 @@ << data[i]->niHS << "\t" << data[i]->F1 << "\t" << data[i]->F2 << std::endl; - + } diff -Nru libvcflib-1.0.1+dfsg/src/pdflib.cpp libvcflib-1.0.2+dfsg/src/pdflib.cpp --- libvcflib-1.0.1+dfsg/src/pdflib.cpp 2019-10-01 07:06:02.000000000 +0000 +++ libvcflib-1.0.2+dfsg/src/pdflib.cpp 2021-01-28 07:04:12.000000000 +0000 @@ -1,3 +1,12 @@ +/* + vcflib C++ library for parsing and manipulating VCF files + + Copyright © 2010-2020 Erik Garrison + Copyright © 2020 Pjotr Prins + + This software is published under the MIT License. See the LICENSE file. +*/ + # include # include # include diff -Nru libvcflib-1.0.1+dfsg/src/pdflib.hpp libvcflib-1.0.2+dfsg/src/pdflib.hpp --- libvcflib-1.0.1+dfsg/src/pdflib.hpp 2019-10-01 07:06:02.000000000 +0000 +++ libvcflib-1.0.2+dfsg/src/pdflib.hpp 2021-01-28 07:04:12.000000000 +0000 @@ -1,3 +1,12 @@ +/* + vcflib C++ library for parsing and manipulating VCF files + + Copyright © 2010-2020 Erik Garrison + Copyright © 2020 Pjotr Prins + + This software is published under the MIT License. See the LICENSE file. +*/ + double i4_binomial_pdf ( int n, double p, int k ); int i4_binomial_sample ( int n, double pp ); double i4vec_multinomial_pdf ( int n, double p[], int m, int x[] ); diff -Nru libvcflib-1.0.1+dfsg/src/permuteGPAT++.cpp libvcflib-1.0.2+dfsg/src/permuteGPAT++.cpp --- libvcflib-1.0.1+dfsg/src/permuteGPAT++.cpp 2019-10-01 07:06:02.000000000 +0000 +++ libvcflib-1.0.2+dfsg/src/permuteGPAT++.cpp 2021-01-28 07:04:12.000000000 +0000 @@ -1,4 +1,14 @@ /* + vcflib C++ library for parsing and manipulating VCF files + + Copyright © 2010-2020 Erik Garrison + Copyright © 2015 Zev N. Kronenberg + Copyright © 2020 Pjotr Prins + + This software is published under the MIT License. See the LICENSE file. +*/ + +/* This program was created at: Fri Apr 17 14:59:53 2015 This program was created by: Zev N. Kronenberg @@ -51,7 +61,7 @@ struct options{ std::string file; int npermutation; - int nsuc; + int nsuc; }globalOpts; static const char *optString = "f:n:s:"; @@ -67,7 +77,7 @@ globalOpts.nsuc = 1; globalOpts.npermutation = 1000; - + opt = getopt(argc, argv, optString); while(opt != -1){ switch(opt){ @@ -93,8 +103,8 @@ break; } } - - opt = getopt( argc, argv, optString ); + + opt = getopt( argc, argv, optString ); } return 1; } @@ -113,13 +123,13 @@ cerr << endl << endl; cerr << "INFO: help" << endl; cerr << "INFO: description:" << endl; - cerr << " permuteGPAT++ is a method for adding empirical p-values to a GPAT++ score." << endl ; + cerr << " permuteGPAT++ is a method for adding empirical p-values to a GPAT++ score." << endl << endl ; cerr << " Currently permuteGPAT++ only supports wcFst, but will be extended. " << endl ; cerr << endl; cerr << "OUTPUT: permuteGPAT++ will append three additional columns:" << endl; cerr << " 1. The number of successes " << endl; cerr << " 2. The number of trials " << endl; - cerr << " 3. The empirical p-value " << endl << endl; + cerr << " 3. The empirical p-value " << endl << endl; cerr << "INFO: usage: permuteGPAT++ -f gpat.txt -n 5 -s 1 "<< endl; cerr << endl; @@ -127,7 +137,7 @@ cerr << "INFO: number: n -- argument: the number of permutations to run for each value [1000]" << endl; cerr << "INFO: success: s -- argument: stop permutations after \'s\' successes [1]" << endl; - + cerr << endl << "Type: phenotype" << endl << endl; cerr << endl; } @@ -139,10 +149,16 @@ int main( int argc, char** argv) { -int parse = parseOpts(argc, argv); + if (argc == 2) { + string h_flag = argv[1]; + if (argc == 2 && (h_flag == "-h" || h_flag == "--help")) { + printHelp(); + exit(1); + } + } + int parse = parseOpts(argc, argv); if(globalOpts.file.compare("NA") == 0){ - cerr << "FATAL: no file was provided" << endl; printHelp(); exit(1); } @@ -186,20 +202,20 @@ vector region = split(line, "\t"); double value = atof(region[4].c_str()); - + if(value < 0){ value = 0; } - + double suc = 0; double per = 0; int datas = data.size(); - double pv = (1.0 / globalOpts.npermutation); + double pv = (1.0 / globalOpts.npermutation); while( suc < globalOpts.nsuc && per < globalOpts.npermutation){ per += 1.0; - + int r = rand() % datas; if(value < data[r]){ @@ -211,7 +227,7 @@ } cout << line << "\t" << suc << "\t" << per << "\t" << pv << endl; } - + } else{ cerr << "FATAL: could not open file: " << globalOpts.file << endl; diff -Nru libvcflib-1.0.1+dfsg/src/permuteGPATsmoother.cpp libvcflib-1.0.2+dfsg/src/permuteGPATsmoother.cpp --- libvcflib-1.0.1+dfsg/src/permuteGPATsmoother.cpp 2019-10-01 07:06:02.000000000 +0000 +++ libvcflib-1.0.2+dfsg/src/permuteGPATsmoother.cpp 2021-01-28 07:04:12.000000000 +0000 @@ -1,4 +1,13 @@ /* + vcflib C++ library for parsing and manipulating VCF files + + Copyright © 2010-2020 Erik Garrison + Copyright © 2020 Pjotr Prins + + This software is published under the MIT License. See the LICENSE file. +*/ + +/* This program was created at: Fri Apr 17 14:59:53 2015 This program was created by: Zev N. Kronenberg diff -Nru libvcflib-1.0.1+dfsg/src/permuteRegions.cpp libvcflib-1.0.2+dfsg/src/permuteRegions.cpp --- libvcflib-1.0.1+dfsg/src/permuteRegions.cpp 2019-10-01 07:06:02.000000000 +0000 +++ libvcflib-1.0.2+dfsg/src/permuteRegions.cpp 2021-01-28 07:04:12.000000000 +0000 @@ -1,4 +1,13 @@ /* + vcflib C++ library for parsing and manipulating VCF files + + Copyright © 2010-2020 Erik Garrison + Copyright © 2020 Pjotr Prins + + This software is published under the MIT License. See the LICENSE file. +*/ + +/* This program was created at: Fri Apr 17 14:59:53 2015 This program was created by: Zev N. Kronenberg diff -Nru libvcflib-1.0.1+dfsg/src/permuteSmooth.cpp libvcflib-1.0.2+dfsg/src/permuteSmooth.cpp --- libvcflib-1.0.1+dfsg/src/permuteSmooth.cpp 2019-10-01 07:06:02.000000000 +0000 +++ libvcflib-1.0.2+dfsg/src/permuteSmooth.cpp 2021-01-28 07:04:12.000000000 +0000 @@ -1,4 +1,14 @@ /* + vcflib C++ library for parsing and manipulating VCF files + + Copyright © 2010-2020 Erik Garrison + Copyright © 2015 Zev N. Kronenberg + Copyright © 2020 Pjotr Prins + + This software is published under the MIT License. See the LICENSE file. +*/ + +/* This program was created at: Fri Apr 17 14:59:53 2015 This program was created by: Zev N. Kronenberg @@ -122,6 +132,7 @@ cerr << " 2. The number of trials " << endl; cerr << " 3. The empirical p-value " << endl; cerr << endl; + cerr << endl << "Type: statistics" << endl << endl; printVersion(); } @@ -136,6 +147,15 @@ globalOpts.nsuc = 1; globalOpts.npermutation = 1000; + if (argc == 2) { + string h_flag = argv[1]; + + if (argc == 2 && (h_flag == "-h" || h_flag == "--help")) { + printHelp(); + exit(1); + } + } + opt = getopt(argc, argv, optString); while(opt != -1){ switch(opt){ diff -Nru libvcflib-1.0.1+dfsg/src/pFst.cpp libvcflib-1.0.2+dfsg/src/pFst.cpp --- libvcflib-1.0.1+dfsg/src/pFst.cpp 2019-10-01 07:06:02.000000000 +0000 +++ libvcflib-1.0.2+dfsg/src/pFst.cpp 2021-01-28 07:04:12.000000000 +0000 @@ -1,3 +1,12 @@ +/* + vcflib C++ library for parsing and manipulating VCF files + + Copyright © 2010-2020 Erik Garrison + Copyright © 2020 Pjotr Prins + + This software is published under the MIT License. See the LICENSE file. +*/ + #include "Variant.h" #include "split.h" #include "cdflib.hpp" @@ -6,13 +15,14 @@ #include #include -#include +#include #include #include #include #include #include #include "gpatInfo.hpp" +#include "makeUnique.h" using namespace std; using namespace vcflib; @@ -23,7 +33,20 @@ cerr << "INFO: help" << endl; cerr << "INFO: description:" << endl; cerr << " pFst is a probabilistic approach for detecting differences in allele frequencies between two populations." << endl << endl; - + + cerr << R"( + +pFst is a likelihood ratio test (LRT) quantifying allele frequency +differences between populations. The LRT by default uses the binomial +distribution. If Genotype likelihoods are provided it uses a modified +binomial that weights each allele count by its certainty. If type is +set to 'PO' the LRT uses a beta distribution to fit the allele +frequency spectrum of the target and background. PO requires the AD +and DP genotype fields and requires at least two pools for the target +and background. The p-value calculated in pFst is based on the +chi-squared distribution with one degree of freedom. + +)" << endl ; cerr << "Output : 3 columns : " << endl; cerr << " 1. seqid " << endl; @@ -40,7 +63,7 @@ cerr << "INFO: optional: r,region -- argument: a tabix compliant genomic range : seqid or seqid:start-end " << endl; cerr << "INFO: optional: c,counts -- switch : use genotype counts rather than genotype likelihoods to estimate parameters, default false " << endl; - cerr << endl; + cerr << endl << "Type: statistics" << endl; printVersion() ; } @@ -56,11 +79,11 @@ } void loadIndices(map & index, string set){ - + vector indviduals = split(set, ","); vector::iterator it = indviduals.begin(); - + for(; it != indviduals.end(); it++){ index[ atoi( (*it).c_str() ) ] = 1; } @@ -70,7 +93,7 @@ double ans = lgamma(n+1)-lgamma(x+1)-lgamma(n-x+1) + x * log(p) + (n-x) * log(1-p); return ans; - + } int main(int argc, char** argv) { @@ -85,22 +108,22 @@ // set region to scaffold - string region = "NA"; + string region = "NA"; - // using vcflib; thanks to Erik Garrison + // using vcflib; thanks to Erik Garrison VariantCallFile variantFile; - // zero based index for the target and background indivudals - + // zero based index for the target and background indivudals + map it, ib; - - // deltaaf is the difference of allele frequency we bother to look at + + // deltaaf is the difference of allele frequency we bother to look at string deltaaf ; double daf = 0; - // + // int counts = 0; @@ -108,7 +131,7 @@ string type = "NA"; - const struct option longopts[] = + const struct option longopts[] = { {"version" , 0, 0, 'v'}, {"help" , 0, 0, 'h'}, @@ -128,7 +151,7 @@ while(iarg != -1) { iarg = getopt_long(argc, argv, "r:d:t:b:f:y:chv", longopts, &index); - + switch (iarg) { case 'h': @@ -137,7 +160,7 @@ case 'v': printVersion(); return 0; - case 'y': + case 'y': type = optarg; cerr << "INFO: genotype likelihoods set to: " << type << endl; if(type == "GT"){ @@ -165,26 +188,26 @@ case 'd': cerr << "INFO: only scoring sites where the allele frequency difference is greater than: " << optarg << endl; deltaaf = optarg; - daf = atof(deltaaf.c_str()); + daf = atof(deltaaf.c_str()); break; case 'r': cerr << "INFO: set seqid region to : " << optarg << endl; - region = optarg; + region = optarg; break; default: break; } } - + if(filename == "NA"){ cerr << "FATAL: did not specify the file\n"; printHelp(); exit(1); } - + variantFile.open(filename); - + if(region != "NA"){ if(! variantFile.setRegion(region)){ cerr <<"FATAL: unable to set region" << endl; @@ -215,7 +238,7 @@ cerr << "FATAL: genotype likelihood is incorrectly formatted, only use: PL,PO,GL,GP" << endl; printHelp(); return 1; - } + } Variant var(variantFile); @@ -227,10 +250,10 @@ if(var.alt.size() > 1){ continue; } - - + + vector < map< string, vector > > target, background, total; - + int index = 0; for(int nsamp = 0; nsamp < nsamples; nsamp++){ @@ -239,70 +262,63 @@ if(sample["GT"].front() != "./."){ if(it.find(index) != it.end() ){ - target.push_back(sample); - total.push_back(sample); + target.push_back(sample); + total.push_back(sample); } if(ib.find(index) != ib.end()){ background.push_back(sample); - total.push_back(sample); + total.push_back(sample); } } - + index += 1; } - - zvar * populationTarget ; - zvar * populationBackground ; - zvar * populationTotal ; + + unique_ptr populationTarget ; + unique_ptr populationBackground ; + unique_ptr populationTotal ; + + using Detail::makeUnique; if(type == "PO"){ - populationTarget = new pooled(); - populationBackground = new pooled(); - populationTotal = new pooled(); + populationTarget = makeUnique(); + populationBackground = makeUnique(); + populationTotal = makeUnique(); } if(type == "PL"){ - populationTarget = new pl(); - populationBackground = new pl(); - populationTotal = new pl(); + populationTarget = makeUnique(); + populationBackground = makeUnique(); + populationTotal = makeUnique(); } if(type == "GL"){ - populationTarget = new gl(); - populationBackground = new gl(); - populationTotal = new gl(); + populationTarget = makeUnique(); + populationBackground = makeUnique(); + populationTotal = makeUnique(); } if(type == "GP"){ - populationTarget = new gp(); - populationBackground = new gp(); - populationTotal = new gp(); + populationTarget = makeUnique(); + populationBackground = makeUnique(); + populationTotal = makeUnique(); } if(type == "GT"){ - populationTarget = new gt(); - populationBackground = new gt(); - populationTotal = new gt(); - } + populationTarget = makeUnique(); + populationBackground = makeUnique(); + populationTotal = makeUnique(); + } - populationTotal->loadPop(total , var.sequenceName, var.position); + populationTotal->loadPop(total , var.sequenceName, var.position); populationTarget->loadPop(target , var.sequenceName, var.position); populationBackground->loadPop(background, var.sequenceName, var.position); if(populationTarget->npop < 2 || populationBackground->npop < 2){ - delete populationTarget; - delete populationBackground; - delete populationTotal; - continue; } - populationTotal->estimatePosterior(); + populationTotal->estimatePosterior(); populationTarget->estimatePosterior(); populationBackground->estimatePosterior(); if(populationTarget->alpha == -1 || populationBackground->alpha == -1){ - delete populationTarget; - delete populationBackground; - delete populationTotal; - - continue; } @@ -310,7 +326,7 @@ populationTotal->alpha = 0.001 + populationTotal->nref; populationTotal->beta = 0.001 + populationTotal->nalt; - + populationTarget->alpha = 0.001 + populationTarget->nref; populationTarget->beta = 0.001 + populationTarget->nalt; @@ -331,15 +347,10 @@ logLbinomial(populationBackground->beta, (populationBackground->alpha + populationBackground->beta), populationTotalEstAF) ; double alt = logLbinomial(populationTarget->beta, (populationTarget->alpha + populationTarget->beta), populationTargetEstAF) + logLbinomial(populationBackground->beta, (populationBackground->alpha + populationBackground->beta), populationBackgroundEstAF) ; - - double l = 2 * (alt - null); - - if(l <= 0){ - delete populationTarget; - delete populationBackground; - delete populationTotal; + double l = 2 * (alt - null); + if(l <= 0){ continue; } @@ -350,19 +361,10 @@ double df = 1; int status; double bound ; - + cdfchi(&which, &p, &q, &x, &df, &status, &bound ); - - cout << var.sequenceName << "\t" << var.position << "\t" << 1-p << endl ; - - delete populationTarget; - delete populationBackground; - delete populationTotal; - - populationTarget = NULL; - populationBackground = NULL; - populationTotal = NULL; + cout << var.sequenceName << "\t" << var.position << "\t" << 1-p << endl ; } - return 0; + return 0; } diff -Nru libvcflib-1.0.1+dfsg/src/plotHaps.cpp libvcflib-1.0.2+dfsg/src/plotHaps.cpp --- libvcflib-1.0.1+dfsg/src/plotHaps.cpp 2019-10-01 07:06:02.000000000 +0000 +++ libvcflib-1.0.2+dfsg/src/plotHaps.cpp 2021-01-28 07:04:12.000000000 +0000 @@ -1,12 +1,22 @@ +/* + vcflib C++ library for parsing and manipulating VCF files + + Copyright © 2010-2020 Erik Garrison + Copyright © 2020 Pjotr Prins + + This software is published under the MIT License. See the LICENSE file. +*/ + #include "Variant.h" #include "split.h" #include "cdflib.hpp" #include "pdflib.hpp" #include "var.hpp" +#include "makeUnique.h" #include #include -#include +#include #include #include #include @@ -24,7 +34,7 @@ cerr << endl << endl; cerr << "INFO: help" << endl; cerr << "INFO: description:" << endl; - cerr << " plotHaps provides the formatted output that can be used with \'bin/plotHaplotypes.R\'. " << endl; + cerr << " plotHaps provides the formatted output that can be used with \'bin/plotHaplotypes.R\'. " << endl << endl ; cerr << "Output : haplotype matrix and positions" << endl << endl; @@ -33,9 +43,8 @@ cerr << "INFO: required: r,region -- argument: a tabix compliant genomic range : \"seqid:start-end\" or \"seqid\" " << endl; cerr << "INFO: required: f,file -- argument: proper formatted phased VCF file " << endl; cerr << "INFO: required: y,type -- argument: genotype likelihood format: PL,GP,GP " << endl; + cerr << endl << "Type: statistics" << endl << endl; cerr << endl; - - printVersion(); exit(1); } @@ -48,10 +57,10 @@ } void loadIndices(map & index, string set){ - + vector indviduals = split(set, ","); vector::iterator it = indviduals.begin(); - + for(; it != indviduals.end(); it++){ index[ atoi( (*it).c_str() ) ] = 1; } @@ -60,7 +69,7 @@ void calc(string **haplotypes, int nhaps, vector afs, vector pos, vector & target, vector & background, string seqid){ for(int snp = 0; snp < haplotypes[0][0].length(); snp++){ - + double ehhA = 1; double ehhR = 1; @@ -69,20 +78,20 @@ int start = snp; int end = snp; - int core = snp; + int core = snp; while( ehhA > 0.05 && ehhR > 0.05 ) { - + start -= 1; end += 1; - + if(start == -1){ break; } if(end == haplotypes[0][0].length() - 1){ break; } - + map targetH; double sumrT = 0; @@ -93,14 +102,14 @@ for(int i = 0; i < nhaps; i++){ targetH[ haplotypes[i][0].substr(start, (end - start)) ]++; targetH[ haplotypes[i][1].substr(start, (end - start)) ]++; - } - for( map::iterator th = targetH.begin(); th != targetH.end(); th++){ - if( (*th).first.substr((end-start)/2, 1) == "1"){ - sumaT += r8_choose(th->second, 2); + } + for( map::iterator th = targetH.begin(); th != targetH.end(); th++){ + if( (*th).first.substr((end-start)/2, 1) == "1"){ + sumaT += r8_choose(th->second, 2); naltT += th->second; } else{ - sumrT += r8_choose(th->second, 2); + sumrT += r8_choose(th->second, 2); nrefT += th->second; } } @@ -110,9 +119,9 @@ iHSA += ehhA; iHSR += ehhR; - } + } cout << seqid << "\t" << pos[snp] << "\t" << afs[snp] << "\t" << iHSA << "\t" << iHSR << "\t" << iHSA/iHSR << endl; - } + } } double EHH(string **haplotypes, int nhaps){ @@ -128,18 +137,18 @@ double nh = 0; for( map::iterator it = hapcounts.begin(); it != hapcounts.end(); it++){ - nh += it->second; + nh += it->second; sum += r8_choose(it->second, 2); } double max = (sum / r8_choose(nh, 2)); - + return max; } void loadPhased(string **haplotypes, genotype * pop, int ntarget){ - + int indIndex = 0; for(vector::iterator ind = pop->gts.begin(); ind != pop->gts.end(); ind++){ @@ -174,30 +183,30 @@ // set region to scaffold - string region = "NA"; + string region = "NA"; - // using vcflib; thanks to Erik Garrison + // using vcflib; thanks to Erik Garrison VariantCallFile variantFile; - // zero based index for the target and background indivudals - + // zero based index for the target and background indivudals + map it, ib; - - // deltaaf is the difference of allele frequency we bother to look at + + // deltaaf is the difference of allele frequency we bother to look at // ancestral state is set to zero by default int counts = 0; - - // phased + + // phased int phased = 0; string type = "NA"; - const struct option longopts[] = + const struct option longopts[] = { {"version" , 0, 0, 'v'}, {"help" , 0, 0, 'h'}, @@ -214,7 +223,7 @@ while(iarg != -1) { iarg = getopt_long(argc, argv, "y:r:t:f:hv", longopts, &findex); - + switch (iarg) { case 'h': @@ -235,7 +244,7 @@ break; case 'r': cerr << "INFO: set seqid region to : " << optarg << endl; - region = optarg; + region = optarg; break; default: break; @@ -247,7 +256,7 @@ okayGenotypeLikelihoods["GL"] = 1; okayGenotypeLikelihoods["GP"] = 1; okayGenotypeLikelihoods["GT"] = 1; - + if(type == "NA"){ cerr << "FATAL: failed to specify genotype likelihood format : PL or GL" << endl; @@ -273,7 +282,7 @@ printHelp(); return 1; } - + if(region != "NA"){ if(! variantFile.setRegion(region)){ cerr <<"FATAL: unable to set region" << endl; @@ -285,7 +294,7 @@ printHelp(); return 1; } - + Variant var(variantFile); vector samples = variantFile.sampleNames; @@ -297,26 +306,26 @@ int indexi = 0; for(vector::iterator samp = samples.begin(); samp != samples.end(); samp++){ - + if(it.find(index) != it.end() ){ target_h.push_back(indexi); indexi++; } index++; } - + vector positions; - + vector afs; string **haplotypes = new string*[target_h.size()]; for (int i = 0; i < target_h.size(); i++) { haplotypes[i] = new string[2]; } - + string currentSeqid = "NA"; - - + + while (variantFile.getNextVariant(var)) { if(!var.isPhased()){ @@ -328,44 +337,46 @@ continue; } - + vector < map< string, vector > > target, background, total; - + int sindex = 0; for(int nsamp = 0; nsamp < nsamples; nsamp++){ map > sample = var.samples[ samples[nsamp]]; - + if(it.find(sindex) != it.end() ){ target.push_back(sample); - } + } sindex += 1; } - - genotype * populationTarget ; + + using Detail::makeUnique; + + unique_ptr populationTarget ; if(type == "PL"){ - populationTarget = new pl(); + populationTarget = makeUnique(); } if(type == "GL"){ - populationTarget = new gl(); + populationTarget = makeUnique(); } if(type == "GP"){ - populationTarget = new gp(); + populationTarget = makeUnique(); } if(type == "GT"){ - populationTarget = new gt(); + populationTarget = makeUnique(); } - + populationTarget->loadPop(target, var.sequenceName, var.position); - + positions.push_back(var.position); afs.push_back(populationTarget->af); - loadPhased(haplotypes, populationTarget, populationTarget->gts.size()); + loadPhased(haplotypes, populationTarget.get(), populationTarget->gts.size()); } - + printHaplotypes( haplotypes, target_h, positions); - - return 0; + + return 0; } diff -Nru libvcflib-1.0.1+dfsg/src/popStats.cpp libvcflib-1.0.2+dfsg/src/popStats.cpp --- libvcflib-1.0.1+dfsg/src/popStats.cpp 2019-10-01 07:06:02.000000000 +0000 +++ libvcflib-1.0.2+dfsg/src/popStats.cpp 2021-01-28 07:04:12.000000000 +0000 @@ -1,3 +1,12 @@ +/* + vcflib C++ library for parsing and manipulating VCF files + + Copyright © 2010-2020 Erik Garrison + Copyright © 2020 Pjotr Prins + + This software is published under the MIT License. See the LICENSE file. +*/ + #include "Variant.h" #include "split.h" #include "cdflib.hpp" @@ -6,13 +15,14 @@ #include #include -#include +#include #include #include #include #include #include #include "gpatInfo.hpp" +#include "makeUnique.h" using namespace std; using namespace vcflib; @@ -25,6 +35,9 @@ cerr << "INFO: description:" << endl; cerr << " General population genetic statistics for each SNP " << endl << endl; + cerr << R"( + Calculates basic population statistics at bi-allelic sites. The allele frequency is the number of non-reference alleles divided by the total number of alleles. The expected hetrozygosity is 2*p*q, where p is the non-reference allele frequency and q is 1-p. The observed heterozgosity is the fraction of 0/1 genotypes out of all genotypes. The inbreeding coefficent, Fis, is the relative heterozygosity of each individual vs. compared to the target group. )" << endl << endl; + cerr << "Output : 9 columns : " << endl; cerr << " 1. seqid " << endl; cerr << " 2. position " << endl; @@ -43,7 +56,7 @@ cerr << "INFO: required: f,file -- proper formatted VCF " << endl; cerr << "INFO: required, y,type -- genotype likelihood format; genotype : GL,PL,GP " << endl; cerr << "INFO: optional, r,region -- a tabix compliant region : chr1:1-1000 or chr1 " << endl; - + cerr << endl << "Type: statistics" << endl << endl; printVersion(); } @@ -59,11 +72,11 @@ } void loadIndices(map & index, string set){ - + vector indviduals = split(set, ","); vector::iterator it = indviduals.begin(); - + for(; it != indviduals.end(); it++){ index[ atoi( (*it).c_str() ) ] = 1; } @@ -82,21 +95,21 @@ // set region to scaffold - string region = "NA"; + string region = "NA"; - // using vcflib; thanks to Erik Garrison + // using vcflib; thanks to Erik Garrison VariantCallFile variantFile; - // zero based index for the target and background indivudals - + // zero based index for the target and background indivudals + map it, ib; - + // genotype likelihood format string type = "NA"; - const struct option longopts[] = + const struct option longopts[] = { {"version" , 0, 0, 'v'}, {"help" , 0, 0, 'h'}, @@ -113,7 +126,7 @@ while(iarg != -1) { iarg = getopt_long(argc, argv, "y:r:d:t:b:f:chv", longopts, &index); - + switch (iarg) { case 'h': @@ -138,7 +151,7 @@ break; case 'r': cerr << "INFO: set seqid region to : " << optarg << endl; - region = optarg; + region = optarg; break; case 'y': type = optarg; @@ -154,12 +167,12 @@ cerr << "FATAL: failed to specify a file" << endl; printHelp(); } - + if(!variantFile.open(filename)){ cerr << "FATAL: could not open file for reading" << endl; printHelp(); } - + if(region != "NA"){ if(! variantFile.setRegion(region)){ cerr <<"FATAL: unable to set region" << endl; @@ -196,15 +209,15 @@ int nsamples = samples.size(); while (variantFile.getNextVariant(var)) { - - // biallelic sites naturally + + // biallelic sites naturally if(var.alt.size() > 1){ continue; } - + vector < map< string, vector > > target, background, total; - + int index = 0; for(int nsamp = 0; nsamp < nsamples; nsamp++){ @@ -215,26 +228,28 @@ if(it.find(index) != it.end() ){ target.push_back(sample); } - } + } index += 1; } - - genotype * populationTarget ; - genotype * populationBackground ; + + using Detail::makeUnique; + + unique_ptr populationTarget ; + unique_ptr populationBackground ; if(type == "PL"){ - populationTarget = new pl(); + populationTarget = makeUnique(); } if(type == "GL"){ - populationTarget = new gl(); + populationTarget = makeUnique(); } if(type == "GP"){ - populationTarget = new gp(); + populationTarget = makeUnique(); } if(type == "GT"){ - populationTarget = new gt(); + populationTarget = makeUnique(); } - + populationTarget->loadPop(target, var.sequenceName, var.position); //cerr << " 3. target allele frequency " << endl; @@ -246,13 +261,12 @@ //cerr << " 9. target Fis " << endl; if(populationTarget->af == -1){ - delete populationTarget; continue; } double ehet = 2*(populationTarget->af * (1 - populationTarget->af)); - - cout << var.sequenceName << "\t" << var.position << "\t" + + cout << var.sequenceName << "\t" << var.position << "\t" << populationTarget->af << "\t" << ehet << "\t" << populationTarget->hfrq << "\t" @@ -260,9 +274,6 @@ << populationTarget->nhomr << "\t" << populationTarget->nhoma << "\t" << populationTarget->fis << endl; - - delete populationTarget; - } - return 0; + return 0; } diff -Nru libvcflib-1.0.1+dfsg/src/pVst.cpp libvcflib-1.0.2+dfsg/src/pVst.cpp --- libvcflib-1.0.1+dfsg/src/pVst.cpp 2019-10-01 07:06:02.000000000 +0000 +++ libvcflib-1.0.2+dfsg/src/pVst.cpp 2021-01-28 07:04:12.000000000 +0000 @@ -1,3 +1,12 @@ +/* + vcflib C++ library for parsing and manipulating VCF files + + Copyright © 2010-2020 Erik Garrison + Copyright © 2020 Pjotr Prins + + This software is published under the MIT License. See the LICENSE file. +*/ + #include "Variant.h" #include "split.h" #include "cdflib.hpp" @@ -6,15 +15,15 @@ #include #include -#include +#include #include #include #include #include #include -#include -#include -#include +#include +#include +#include #include "gpatInfo.hpp" #if defined HAS_OPENMP @@ -43,8 +52,16 @@ cerr << endl << endl; cerr << "INFO: help" << endl; cerr << "INFO: description:" << endl; - cerr << " vFst calculates vst, a measure of CNV stratification." << endl << endl; - + cerr << " pVst calculates vst, a measure of CNV stratification." << endl << endl; + cerr << R"( + +The statistic Vst is used to test the difference in copy numbers at +each SV between two groups: Vst = (Vt-Vs)/Vt, where Vt is the overall +variance of copy number and Vs the average variance within +populations. + +)"; + cerr << "Output : 4 columns : " << endl; cerr << " 1. seqid " << endl; cerr << " 2. position " << endl; @@ -60,8 +77,9 @@ cerr << "INFO: required: y,type -- argument: the genotype field with the copy number: e.g. CN|CNF " << endl; cerr << "INFO: optional: r,region -- argument: a tabix compliant genomic range : seqid or seqid:start-end " << endl; cerr << "INFO: optional: x,cpu -- argument: number of CPUs [1] " << endl ; - cerr << "INFO: optional: n,per -- argument: number of permutations [1000] " << endl; - + cerr << "INFO: optional: n,per -- argument: number of permutations [1000] " << endl; + + cerr << endl << "Type: statistics" << endl << endl; cerr << endl; printVersion() ; @@ -128,10 +146,10 @@ } double vst(copyNcounts * d){ - + double muA = mean(d->total ); double vA = var(d->total, muA ); - + if(vA <= 0){ return 0; } @@ -144,7 +162,7 @@ double cT = double(d->target.size()) / double(d->total.size()); double cB = double(d->background.size()) / double(d->total.size()); - + double ans = (vA - ( (cT * vT) + (cB * vB) )) / vA; //purge negative @@ -167,16 +185,16 @@ if (nsuc > 1){ break; } - + trials += 1; std::random_shuffle(d->total.begin(), d->total.end()); - + int tsize = d->target.size(); - + d->target.clear(); d->background.clear(); - + int counter = 0; for(std::vector::iterator it = d->total.begin(); @@ -189,9 +207,9 @@ } counter+=1; } - + // std::cerr << "PER\t" << v << "\t" << vst(d) << std::endl; - + if (vst(d) >= v ){ nsuc += 1; } @@ -202,12 +220,12 @@ p = double(nsuc) / double(trials); } - - d->results << d->seqid - << "\t" - << d->pos - << "\t" + + d->results << d->seqid + << "\t" + << d->pos + << "\t" << d->end << "\t" << d->type @@ -215,8 +233,8 @@ << v << "\t" << p ; - - + + } @@ -226,36 +244,36 @@ //------------------------------- SUBROUTINE -------------------------------- void loadIndices(map & index, string set){ - + vector indviduals = split(set, ","); vector::iterator it = indviduals.begin(); - + for(; it != indviduals.end(); it++){ index[ atoi( (*it).c_str() ) ] = 1; } } // gotta load the dat so that we can permute using open MP -void loadDat(copyNcounts * d, +void loadDat(copyNcounts * d, std::string & type, vector < map< string, vector > > & target, vector < map< string, vector > > & background){ - for(vector < map< string, vector > >::iterator it + for(vector < map< string, vector > >::iterator it = target.begin(); it!= target.end(); it++){ d->target.push_back( atof((*it)[type].front().c_str()) ); d->total.push_back( atof((*it)[type].front().c_str()) ); - + d->targetV += (*it)[type].front(); d->targetV += ","; - + } - for(vector < map< string, vector > >::iterator it + for(vector < map< string, vector > >::iterator it = background.begin(); it!= background.end(); it++){ d->background.push_back( atof((*it)[type].front().c_str()) ); - d->total.push_back( atof((*it)[type].front().c_str()) ); - + d->total.push_back( atof((*it)[type].front().c_str()) ); + d->backgroundV += (*it)[type].front(); d->backgroundV += ","; @@ -275,22 +293,22 @@ // set region to scaffold - string region = "NA"; + string region = "NA"; - // using vcflib; thanks to Erik Garrison + // using vcflib; thanks to Erik Garrison VariantCallFile variantFile; - // zero based index for the target and background indivudals - + // zero based index for the target and background indivudals + map it, ib; - - // deltaaf is the difference of allele frequency we bother to look at + + // deltaaf is the difference of allele frequency we bother to look at string deltaaf ; double daf = 0; - // + // int counts = 0; @@ -298,7 +316,7 @@ string type = "NA"; - const struct option longopts[] = + const struct option longopts[] = { {"version" , 0, 0, 'v'}, {"help" , 0, 0, 'h'}, @@ -309,7 +327,7 @@ {"background", 1, 0, 'b'}, {"region" , 1, 0, 'r'}, {"type" , 1, 0, 'y'}, - + {0,0,0,0} }; @@ -338,7 +356,7 @@ return 0; } case 'y': - { + { type = optarg; cerr << "INFO: Copy number will be found in : " << type << endl; break; @@ -373,12 +391,12 @@ { cerr << "INFO: only scoring sites where the allele frequency difference is greater than: " << optarg << endl; deltaaf = optarg; - daf = atof(deltaaf.c_str()); + daf = atof(deltaaf.c_str()); break; } case 'r': cerr << "INFO: set seqid region to : " << optarg << endl; - region = optarg; + region = optarg; break; default: break; @@ -389,15 +407,15 @@ omp_set_num_threads(cpu); #endif - + if(filename == "NA"){ cerr << "FATAL: did not specify the file\n"; printHelp(); exit(1); } - + variantFile.open(filename); - + if(region != "NA"){ if(! variantFile.setRegion(region)){ cerr <<"FATAL: unable to set region" << endl; @@ -408,13 +426,13 @@ if (!variantFile.is_open()) { exit(1); } - + if(type == "NA"){ cerr << "FATAL: failed to specify copy number genotype field" << endl; printHelp(); return 1; } - + Variant var(variantFile); vector samples = variantFile.sampleNames; @@ -437,14 +455,14 @@ if(formatMap.find(type) == formatMap.end()){ continue; } - - + + copyNcounts * varDat = new copyNcounts; varDat->pos = var.position ; varDat->seqid = var.sequenceName; varDat->type = var.alt.front() ; - + if(var.info.find("CALLERS") != var.info.end()){ stringstream caller; for(std::vector::iterator z = var.info["CALLERS"].begin(); @@ -452,7 +470,7 @@ caller << (*z); caller << ','; } - + varDat->callers = caller.str(); } @@ -464,31 +482,31 @@ } vector < map< string, vector > > target, background, total; - + int index = 0; - - + + for(int nsamp = 0; nsamp < nsamples; nsamp++){ - + map > sample = var.samples[ samples[nsamp]]; - + if(sample[type].front() != "."){ if(it.find(index) != it.end() ){ - target.push_back(sample); - total.push_back(sample); + target.push_back(sample); + total.push_back(sample); } if(ib.find(index) != ib.end()){ background.push_back(sample); - total.push_back(sample); + total.push_back(sample); } } - + index += 1; } - + loadDat(varDat, type, target, background); - if(varDat->target.size() < 2 + if(varDat->target.size() < 2 || varDat->background.size() < 2){ delete varDat; continue; @@ -498,7 +516,7 @@ // this odd pattern is for open MP ... later if(dataBin.size() == cpu){ - + #if defined HAS_OPENMP #pragma omp parallel for schedule(dynamic, 1) #endif @@ -507,19 +525,19 @@ } for(int i = 0 ; i < dataBin.size(); i++){ std::cout << dataBin[i]->results.str() << "\t" << dataBin[i]->targetV << "\t" << dataBin[i]->backgroundV ; - + if(!dataBin[i]->callers.empty()){ std::cout << "\t" << dataBin[i]->callers << std::endl; } else{ std::cout << std::endl; } - + delete dataBin[i]; } dataBin.clear(); } } - return 0; + return 0; } diff -Nru libvcflib-1.0.1+dfsg/src/rnglib.cpp libvcflib-1.0.2+dfsg/src/rnglib.cpp --- libvcflib-1.0.1+dfsg/src/rnglib.cpp 2019-10-01 07:06:02.000000000 +0000 +++ libvcflib-1.0.2+dfsg/src/rnglib.cpp 2021-01-28 07:04:12.000000000 +0000 @@ -1,3 +1,12 @@ +/* + vcflib C++ library for parsing and manipulating VCF files + + Copyright © 2010-2020 Erik Garrison + Copyright © 2020 Pjotr Prins + + This software is published under the MIT License. See the LICENSE file. +*/ + # include # include # include diff -Nru libvcflib-1.0.1+dfsg/src/rnglib.hpp libvcflib-1.0.2+dfsg/src/rnglib.hpp --- libvcflib-1.0.1+dfsg/src/rnglib.hpp 2019-10-01 07:06:02.000000000 +0000 +++ libvcflib-1.0.2+dfsg/src/rnglib.hpp 2021-01-28 07:04:12.000000000 +0000 @@ -1,3 +1,12 @@ +/* + vcflib C++ library for parsing and manipulating VCF files + + Copyright © 2010-2020 Erik Garrison + Copyright © 2020 Pjotr Prins + + This software is published under the MIT License. See the LICENSE file. +*/ + void advance_state ( int k ); bool antithetic_get ( ); void antithetic_memory ( int i, bool &value ); diff -Nru libvcflib-1.0.1+dfsg/src/segmentFst.cpp libvcflib-1.0.2+dfsg/src/segmentFst.cpp --- libvcflib-1.0.1+dfsg/src/segmentFst.cpp 2019-10-01 07:06:02.000000000 +0000 +++ libvcflib-1.0.2+dfsg/src/segmentFst.cpp 2021-01-28 07:04:12.000000000 +0000 @@ -1,3 +1,12 @@ +/* + vcflib C++ library for parsing and manipulating VCF files + + Copyright © 2010-2020 Erik Garrison + Copyright © 2020 Pjotr Prins + + This software is published under the MIT License. See the LICENSE file. +*/ + /* @@ -64,7 +73,8 @@ cerr << endl << endl; cerr << "INFO: help" << endl; cerr << "INFO: description:" << endl; - cerr << " Creates genomic segments (bed file) for regions with high wcFst " << endl; + cerr << "segmentFst creates genomic segments (bed file) for regions with high wcFst" << endl << endl ; + cerr << "segmentFst provides a way to find continious regions with high Fst values. It takes the output of wcFst and produces a BED file. These high Fst region can be permutated with 'permuteGPATwindow'" << endl ; cerr << "Output : 8 columns : " << endl; cerr << " 1. Seqid " << endl; @@ -80,7 +90,7 @@ cerr << endl; cerr << "INFO: required: -f -- Output from wcFst " << endl; cerr << "INFO: optional: -s -- High Fst cutoff [0.8] " << endl; - + cerr << endl << "Type: statistics" << endl << endl; cerr << endl; printVersion(); diff -Nru libvcflib-1.0.1+dfsg/src/segmentIhs.cpp libvcflib-1.0.2+dfsg/src/segmentIhs.cpp --- libvcflib-1.0.1+dfsg/src/segmentIhs.cpp 2019-10-01 07:06:02.000000000 +0000 +++ libvcflib-1.0.2+dfsg/src/segmentIhs.cpp 2021-01-28 07:04:12.000000000 +0000 @@ -1,3 +1,12 @@ +/* + vcflib C++ library for parsing and manipulating VCF files + + Copyright © 2010-2020 Erik Garrison + Copyright © 2020 Pjotr Prins + + This software is published under the MIT License. See the LICENSE file. +*/ + /* @@ -80,7 +89,7 @@ cerr << endl; cerr << "INFO: required: -f -- Output from normalizeIHS " << endl; cerr << "INFO: optional: -s -- High absolute iHS cutoff [2] " << endl; - + cerr << endl << "Type: statistics" << endl << endl; cerr << endl; printVersion(); @@ -96,7 +105,7 @@ switch(opt){ case 's': { - string op = optarg; + string op = optarg; globalOpts.cut = atof(op.c_str()); break; } @@ -106,7 +115,7 @@ exit(1); break; } - + case 'f': { globalOpts.file = optarg; @@ -116,8 +125,8 @@ { break; } - } - opt = getopt( argc, argv, optString ); + } + opt = getopt( argc, argv, optString ); } return 1; } @@ -131,10 +140,10 @@ */ -bool growWindow(vector & values, - int * begin , - int * end , - int * nhigh , +bool growWindow(vector & values, + int * begin , + int * end , + int * nhigh , int * nlow , double * hSum , double * lSum ){ @@ -178,7 +187,7 @@ void process(vector & pos, vector & value, vector & seqid) { - + // i is the index of the outter loop/aka variant sites. // always start the seed with 9 SNPs the window grows to 10 in "growWindow" @@ -186,16 +195,16 @@ int begin = i -9; int end = i +9; - + int nHigh = 0; int nLow = 0; - + double HighSum = 0; double LowSum = 0; bool anyGroth = false; - - while(growWindow(value, &begin, &end, + + while(growWindow(value, &begin, &end, &nHigh, &nLow, &HighSum, &LowSum)){ anyGroth = true; } @@ -224,9 +233,9 @@ << nHigh + nLow << "\t" << nHigh << "\t" << (pos[end] - pos[begin]) - << endl; + << endl; - } + } } } @@ -279,7 +288,7 @@ lastPos = atoi(lineDat[1].c_str()); seqid.push_back(lineDat[0]); pos.push_back(atoi(lineDat[1].c_str())); - value.push_back(abs(atof(lineDat[6].c_str()))); + value.push_back(abs(atof(lineDat[6].c_str()))); } } diff -Nru libvcflib-1.0.1+dfsg/src/sequenceDiversity.cpp libvcflib-1.0.2+dfsg/src/sequenceDiversity.cpp --- libvcflib-1.0.1+dfsg/src/sequenceDiversity.cpp 2019-10-01 07:06:02.000000000 +0000 +++ libvcflib-1.0.2+dfsg/src/sequenceDiversity.cpp 2021-01-28 07:04:12.000000000 +0000 @@ -1,3 +1,12 @@ +/* + vcflib C++ library for parsing and manipulating VCF files + + Copyright © 2010-2020 Erik Garrison + Copyright © 2020 Pjotr Prins + + This software is published under the MIT License. See the LICENSE file. +*/ + #include "Variant.h" #include "split.h" #include "cdflib.hpp" @@ -6,12 +15,13 @@ #include #include -#include +#include #include #include #include #include #include +#include "makeUnique.h" #include "gpatInfo.hpp" using namespace std; @@ -40,11 +50,12 @@ cerr << "INFO: required: t,target -- argument: a zero base comma separated list of target individuals corresponding to VCF columns " << endl; cerr << "INFO: required: f,file -- argument: a properly formatted phased VCF file " << endl; cerr << "INFO: required: y,type -- argument: type of genotype likelihood: PL, GL or GP " << endl; - cerr << "INFO: optional: a,af -- sites less than af are filtered out; default is 0 " << endl; - cerr << "INFO: optional: r,region -- argument: a tabix compliant region : \"seqid:0-100\" or \"seqid\" " << endl; - cerr << "INFO: optional: w,window -- argument: the number of SNPs per window; default is 20 " << endl; + cerr << "INFO: optional: a,af -- sites less than af are filtered out; default is 0 " << endl; + cerr << "INFO: optional: r,region -- argument: a tabix compliant region : \"seqid:0-100\" or \"seqid\" " << endl; + cerr << "INFO: optional: w,window -- argument: the number of SNPs per window; default is 20 " << endl; + cerr << endl << "Type: statistics" << endl << endl; cerr << endl; - + printVersion(); exit(1); @@ -58,10 +69,10 @@ } void loadIndices(map & index, string set){ - + vector indviduals = split(set, ","); vector::iterator it = indviduals.begin(); - + for(; it != indviduals.end(); it++){ index[ atoi( (*it).c_str() ) ] = 1; } @@ -71,18 +82,18 @@ double nchooseSum = 0; // summing over all possible haplotypes - for(std::map::iterator it = hapWin.begin(); + for(map::iterator it = hapWin.begin(); it != hapWin.end(); it++){ nchooseSum += r8_choose(it->second, 2); } double piSum = 0; - // all unique pairwise - for(std::map::iterator it = hapWin.begin(); + // all unique pairwise + for(map::iterator it = hapWin.begin(); it != hapWin.end(); it++){ - + // advancing it - std::map::iterator iz = it; + map::iterator iz = it; iz++; for(;iz != hapWin.end(); iz++){ // different bases @@ -95,14 +106,14 @@ double f1 = double(it->second)/double(nHaps); double f2 = double(iz->second)/double(nHaps); double perBaseDiff = double(ndiff)/double(wlen); - + piSum += f1*f2*perBaseDiff; } } - + *pi = piSum; - *eHH = nchooseSum / r8_choose(nHaps, 2); + *eHH = nchooseSum / r8_choose(nHaps, 2); } @@ -112,25 +123,25 @@ if(haplotypes[0][0].length() < (window-1) ){ return; } - + for(int snpA = 0; snpA < haplotypes[0][0].length() - window; snpA += 1){ - + map targetHaplotypes; - + for(int targetIndex = 0; targetIndex < target.size(); targetIndex++ ){ - + string haplotypeA; string haplotypeB; - + haplotypeA += haplotypes[target[targetIndex]][0].substr(snpA, window) ; haplotypeB += haplotypes[target[targetIndex]][1].substr(snpA, window) ; - + targetHaplotypes[haplotypeA]++; targetHaplotypes[haplotypeB]++; - + } - + double piEst; double eHH = 0; @@ -138,16 +149,16 @@ int wlen = pos[snpA + window] - pos[snpA]; - pi(targetHaplotypes, target.size()*2, &piEst, &eHH, wlen); + pi(targetHaplotypes, target.size()*2, &piEst, &eHH, wlen); cout << seqid << "\t" << pos[snpA] << "\t" << pos[snpA + window] << "\t" << piEst << "\t" << eHH << endl; - + } } void loadPhased(string **haplotypes, genotype * pop, int ntarget){ - + int indIndex = 0; for(vector::iterator ind = pop->gts.begin(); ind != pop->gts.end(); ind++){ @@ -171,27 +182,27 @@ // set region to scaffold - string region = "NA"; + string region = "NA"; - // using vcflib; thanks to Erik Garrison + // using vcflib; thanks to Erik Garrison VariantCallFile variantFile; - // zero based index for the target and background indivudals - + // zero based index for the target and background indivudals + map targetIndex, backgroundIndex; - - // deltaaf is the difference of allele frequency we bother to look at + + // deltaaf is the difference of allele frequency we bother to look at // ancestral state is set to zero by default int counts = 0; - - // phased + + // phased int phased = 0; - + // use the background allele frequency int external = 0; @@ -207,7 +218,7 @@ string type = "NA"; - const struct option longopts[] = + const struct option longopts[] = { {"version" , 0, 0, 'v'}, {"help" , 0, 0, 'h'}, @@ -228,7 +239,7 @@ while(iarg != -1) { iarg = getopt_long(argc, argv, "a:w:y:r:t:b:f:edhv", longopts, &findex); - + switch (iarg) { case 'h': @@ -268,7 +279,7 @@ case 'r': { cerr << "INFO: set seqid region to : " << optarg << endl; - region = optarg; + region = optarg; break; } case 'e': @@ -283,7 +294,7 @@ cerr << "INFO: count haplotypes \"11\" rather than \"00\"" << endl; break; } - case 'w': + case 'w': { string win = optarg; windowSize = atof( win.c_str() ); @@ -327,15 +338,15 @@ cerr << "INFO: window size: " << windowSize << endl; variantFile.open(filename); - + if(region != "NA"){ - variantFile.setRegion(region); + variantFile.setRegion(region); } - + if (!variantFile.is_open()) { return 1; } - + Variant var(variantFile); vector samples = variantFile.sampleNames; @@ -344,7 +355,7 @@ vector target_h, background_h; int index, indexi = 0; - + for(vector::iterator samp = samples.begin(); samp != samples.end(); samp++){ string sampleName = (*samp); if(targetIndex.find(index) != targetIndex.end() ){ @@ -357,7 +368,7 @@ } index++; } - + vector positions; vector targetAFS; vector backgroundAFS; @@ -366,9 +377,9 @@ for (int i = 0; i < nsamples; i++) { haplotypes[i] = new string[2]; } - + string currentSeqid = "NA"; - + while (variantFile.getNextVariant(var)) { if(!var.isPhased()){ @@ -390,73 +401,71 @@ backgroundAFS.clear(); } - + vector < map< string, vector > > target, background, total; - + int sindex = 0; - + for(int nsamp = 0; nsamp < nsamples; nsamp++){ map > sample = var.samples[ samples[nsamp]]; if(targetIndex.find(sindex) != targetIndex.end() ){ target.push_back(sample); - total.push_back(sample); + total.push_back(sample); } if(backgroundIndex.find(sindex) != backgroundIndex.end()){ background.push_back(sample); - total.push_back(sample); - } + total.push_back(sample); + } sindex += 1; } - - genotype * populationTarget ; - genotype * populationBackground; - genotype * populationTotal ; - + + unique_ptr populationTarget ; + unique_ptr populationBackground; + unique_ptr populationTotal ; + + using Detail::makeUnique; + if(type == "PL"){ - populationTarget = new pl(); - populationBackground = new pl(); - populationTotal = new pl(); + populationTarget = makeUnique(); + populationBackground = makeUnique(); + populationTotal = makeUnique(); } if(type == "GL"){ - populationTarget = new gl(); - populationBackground = new gl(); - populationTotal = new gl(); + populationTarget = makeUnique(); + populationBackground = makeUnique(); + populationTotal = makeUnique(); } if(type == "GP"){ - populationTarget = new gp(); - populationBackground = new gp(); - populationTotal = new gp(); + populationTarget = makeUnique(); + populationBackground = makeUnique(); + populationTotal = makeUnique(); } if(type == "GT"){ - populationTarget = new gt(); - populationBackground = new gt(); - populationTotal = new gt(); + populationTarget = makeUnique(); + populationBackground = makeUnique(); + populationTotal = makeUnique(); } - + populationTarget->loadPop(target, var.sequenceName, var.position); - + populationBackground->loadPop(background, var.sequenceName, var.position); - + populationTotal->loadPop(total, var.sequenceName, var.position); if(populationTotal->af < af_filt){ - - delete populationTarget; - delete populationBackground; - delete populationTotal; continue; } - + targetAFS.push_back(populationTarget->af); backgroundAFS.push_back(populationBackground->af); positions.push_back(var.position); - loadPhased(haplotypes, populationTotal, nsamples); - + loadPhased(haplotypes, populationTotal.get(), nsamples); + } calc(haplotypes, nsamples, positions, targetAFS, backgroundAFS, external, derived, windowSize, target_h, background_h, currentSeqid); - - return 0; + + return 0; } diff -Nru libvcflib-1.0.1+dfsg/src/smoother.cpp libvcflib-1.0.2+dfsg/src/smoother.cpp --- libvcflib-1.0.1+dfsg/src/smoother.cpp 2019-10-01 07:06:02.000000000 +0000 +++ libvcflib-1.0.2+dfsg/src/smoother.cpp 2021-01-28 07:04:12.000000000 +0000 @@ -1,3 +1,12 @@ +/* + vcflib C++ library for parsing and manipulating VCF files + + Copyright © 2010-2020 Erik Garrison + Copyright © 2020 Pjotr Prins + + This software is published under the MIT License. See the LICENSE file. +*/ + #include #include #include @@ -6,10 +15,10 @@ #include #include #include "split.h" -#include +#include #include #include "gpatInfo.hpp" -#include +#include using namespace std; @@ -19,7 +28,7 @@ long int step; long int size; int seqid; - int pos ; + int pos ; int value; }; @@ -34,11 +43,12 @@ cerr << endl << endl; cerr << "INFO: help" << endl; cerr << "INFO: description:" << endl; - cerr << " Smoother averages a set of scores over a sliding genomic window. " << endl; - cerr << " Smoother slides over genomic positions not the SNP indices. In other words " << endl; + cerr << "smoothes is a method for window smoothing many of the GPAT++ formats." << endl << endl ; + cerr << " smoother averages a set of scores over a sliding genomic window. " << endl; + cerr << " smoother slides over genomic positions not the SNP indices. In other words " << endl; cerr << " the number of scores within a window will not be constant. The last " << endl; cerr << " window for each seqid can be smaller than the defined window size. " << endl; - cerr << " Smoother automatically analyses different seqids separately. " << endl; + cerr << " smoother automatically analyses different seqids separately. " << endl; cerr << "Output : 4 columns : " << endl; cerr << " 1. seqid " << endl; @@ -55,6 +65,7 @@ cerr << "INFO: optional: w,window -- argument: size of genomic window in base pairs (default 5000)" << endl; cerr << "INFO: optional: s,step -- argument: window step size in base pairs (default 1000) " << endl; cerr << "INFO: optional: t,truncate -- flag : end last window at last position (zero based) " << endl; + cerr << endl << "Type: transformation" << endl << endl; printVersion(); cerr << endl << endl; } @@ -62,13 +73,13 @@ double ngreater(list & rangeData, double val){ double n = 0; - - for(list::iterator it = rangeData.begin(); + + for(list::iterator it = rangeData.begin(); it != rangeData.end(); it++ ){ if(it->score >= val){ n += 1; - } + } } return n; } @@ -105,24 +116,24 @@ } void processSeqid(ifstream & file, string seqid, streampos offset, opts & opt){ - + string line ; - + long int windowSize = opt.size; long int start = 0; long int end = windowSize; list windowDat; - + file.clear(); - + file.seekg(offset); - + vector sline; while(getline(file, line)){ - sline = split(line, '\t'); + sline = split(line, '\t'); score current ; if(seqid != sline[opt.seqid]){ break; @@ -136,7 +147,7 @@ - // add in if abba-baba to process second score. + // add in if abba-baba to process second score. if(current.position > end){ @@ -157,8 +168,8 @@ } std::cout << std::endl; } - - + + } while(end < current.position){ start += opt.step; @@ -167,14 +178,14 @@ windowDat.pop_front(); } } - windowDat.push_back(current); + windowDat.push_back(current); } // add function for D-stat if abba-baba double finalMean = windowAvg(windowDat); - + if(opt.truncate && (finalMean == finalMean) ){ cout << seqid << "\t" << start << "\t" << windowDat.back().position - 1 << "\t" << windowDat.size() << "\t" << finalMean; - + if(opt.format == "iHS"){ std::cout << "\t" << ngreater(windowDat, 2.5) ; } @@ -194,7 +205,7 @@ } int main(int argc, char** argv) { - + map acceptableFormats; acceptableFormats["pFst"] = 1; acceptableFormats["col3"] = 1; @@ -214,7 +225,7 @@ string filename = "NA"; - static struct option longopts[] = + static struct option longopts[] = { {"version" , 0, 0, 'v'}, {"help" , 0, 0, 'h'}, @@ -335,23 +346,23 @@ cerr << "INFO: please use smoother --help" << endl; return 1; } - + ifstream ifs(filename.c_str()); - + string currentSeqid = "NA"; string line; map seqidIndex; - + if(ifs){ while(getline(ifs, line)){ vector sline = split(line, '\t'); if(sline[opt.seqid] != currentSeqid){ - + long int bline = ifs.tellg() ; bline -= ( line.size() +1 ); - + // std::cerr << "INFO: seqid: " << sline[opt.seqid] << " tellg: " << bline << std::endl; map::iterator it; @@ -373,7 +384,7 @@ cerr << "INFO: processing seqid : "<< (it->first) << endl; processSeqid(ifs, (it->first),(it->second), opt); } - + ifs.close(); cerr << "INFO: smoother has successfully finished" << endl; diff -Nru libvcflib-1.0.1+dfsg/src/split.cpp libvcflib-1.0.2+dfsg/src/split.cpp --- libvcflib-1.0.1+dfsg/src/split.cpp 2019-10-01 07:06:02.000000000 +0000 +++ libvcflib-1.0.2+dfsg/src/split.cpp 2021-01-28 07:04:12.000000000 +0000 @@ -1,3 +1,12 @@ +/* + vcflib C++ library for parsing and manipulating VCF files + + Copyright © 2010-2020 Erik Garrison + Copyright © 2020 Pjotr Prins + + This software is published under the MIT License. See the LICENSE file. +*/ + #include "split.h" diff -Nru libvcflib-1.0.1+dfsg/src/split.h libvcflib-1.0.2+dfsg/src/split.h --- libvcflib-1.0.1+dfsg/src/split.h 2019-10-01 07:06:02.000000000 +0000 +++ libvcflib-1.0.2+dfsg/src/split.h 2021-01-28 07:04:12.000000000 +0000 @@ -1,3 +1,12 @@ +/* + vcflib C++ library for parsing and manipulating VCF files + + Copyright © 2010-2020 Erik Garrison + Copyright © 2020 Pjotr Prins + + This software is published under the MIT License. See the LICENSE file. +*/ + #ifndef __SPLIT_H #define __SPLIT_H diff -Nru libvcflib-1.0.1+dfsg/src/splitUniqStarts.cpp libvcflib-1.0.2+dfsg/src/splitUniqStarts.cpp --- libvcflib-1.0.1+dfsg/src/splitUniqStarts.cpp 2019-10-01 07:06:02.000000000 +0000 +++ libvcflib-1.0.2+dfsg/src/splitUniqStarts.cpp 2021-01-28 07:04:12.000000000 +0000 @@ -1,3 +1,12 @@ +/* + vcflib C++ library for parsing and manipulating VCF files + + Copyright © 2010-2020 Erik Garrison + Copyright © 2020 Pjotr Prins + + This software is published under the MIT License. See the LICENSE file. +*/ + #include "Variant.h" #include "split.h" #include "cdflib.hpp" diff -Nru libvcflib-1.0.1+dfsg/src/var.cpp libvcflib-1.0.2+dfsg/src/var.cpp --- libvcflib-1.0.1+dfsg/src/var.cpp 2019-10-01 07:06:02.000000000 +0000 +++ libvcflib-1.0.2+dfsg/src/var.cpp 2021-01-28 07:04:12.000000000 +0000 @@ -1,3 +1,21 @@ +/* + vcflib C++ library for parsing and manipulating VCF files + + Copyright © 2010-2020 Erik Garrison + Copyright © 2020 Pjotr Prins + + This software is published under the MIT License. See the LICENSE file. +*/ + +/* + vcflib C++ library for parsing and manipulating VCF files + + Copyright © 2010-2020 Erik Garrison + Copyright © 2020 Pjotr Prins + Copyright © 2014 Zev N. Kronenberg + + This software is published under the MIT License. See the LICENSE file. +*/ #include "var.hpp" diff -Nru libvcflib-1.0.1+dfsg/src/var.hpp libvcflib-1.0.2+dfsg/src/var.hpp --- libvcflib-1.0.1+dfsg/src/var.hpp 2019-10-01 07:06:02.000000000 +0000 +++ libvcflib-1.0.2+dfsg/src/var.hpp 2021-01-28 07:04:12.000000000 +0000 @@ -1,3 +1,12 @@ +/* + vcflib C++ library for parsing and manipulating VCF files + + Copyright © 2010-2020 Erik Garrison + Copyright © 2020 Pjotr Prins + + This software is published under the MIT License. See the LICENSE file. +*/ + // not to complicate the issue but I need a different variant object to handle populations. #ifndef __VAR_H diff -Nru libvcflib-1.0.1+dfsg/src/Variant.cpp libvcflib-1.0.2+dfsg/src/Variant.cpp --- libvcflib-1.0.1+dfsg/src/Variant.cpp 2019-10-01 07:06:02.000000000 +0000 +++ libvcflib-1.0.2+dfsg/src/Variant.cpp 2021-01-28 07:04:12.000000000 +0000 @@ -1,3 +1,12 @@ +/* + vcflib C++ library for parsing and manipulating VCF files + + Copyright © 2010-2020 Erik Garrison + Copyright © 2020 Pjotr Prins + + This software is published under the MIT License. See the LICENSE file. +*/ + #include "Variant.h" #include diff -Nru libvcflib-1.0.1+dfsg/src/Variant.h libvcflib-1.0.2+dfsg/src/Variant.h --- libvcflib-1.0.1+dfsg/src/Variant.h 2019-10-01 07:06:02.000000000 +0000 +++ libvcflib-1.0.2+dfsg/src/Variant.h 2021-01-28 07:04:12.000000000 +0000 @@ -1,3 +1,12 @@ +/* + vcflib C++ library for parsing and manipulating VCF files + + Copyright © 2010-2020 Erik Garrison + Copyright © 2020 Pjotr Prins + + This software is published under the MIT License. See the LICENSE file. +*/ + #ifndef __VARIANT_H #define __VARIANT_H @@ -17,13 +26,12 @@ #include #include "split.h" #include "join.h" -#include "tabix.hpp" +#include #include "SmithWatermanGotoh.h" -#include "disorder.h" #include "ssw_cpp.hpp" #include "convert.h" #include "multichoose.h" -#include "Fasta.h" +#include extern "C" { #include "filevercmp.h" } @@ -194,7 +202,7 @@ class Variant { friend ostream& operator<<(ostream& out, Variant& var); - + public: string sequenceName; @@ -226,7 +234,7 @@ map extendedAlternates(long int newPosition, long int length); - /** + /** * Convert a structural variant to the canonical VCF4.3 format using a reference. * Meturns true if the variant is canonicalized, false otherwise. * May NOT be called twice on the same variant; it will fail an assert. @@ -249,39 +257,39 @@ * TODO: CURRENTLY: canonical requires there be only one alt allele **/ bool canonicalize(FastaReference& ref, - vector insertions, - bool place_seq = true, + vector insertions, + bool place_seq = true, int min_size_override = 0); - - /** + + /** * Returns true if the variant's ALT contains a symbolic allele like * instead of sequence, and the variant has an SVTYPE INFO tag. */ bool isSymbolicSV() const; - + /** * Returns true if the variant has an SVTYPE INFO tag and either an SVLEN or END INFO tag. */ bool hasSVTags() const; - + /** * This returns true if the variant appears able to be handled by * canonicalize(). It checks if it has fully specified sequence, or if it - * has a defined SV type and length/endpoint. + * has a defined SV type and length/endpoint. */ bool canonicalizable(); - + /** * This gets set to true after canonicalize() has been called on the variant, if it succeeded. */ bool canonical; - + /** * Get the maximum zero-based position of the reference affected by this variant. * Only works reliably for variants that are not SVs or for SVs that have been canonicalize()'d. */ int getMaxReferencePos(); - + /** * Return the SV type of the given alt, or "" if there is no SV type set for that alt. * This is the One True Way to get the SVTYPE of a variant; we should not touch the SVTYPE tag anywhere else. @@ -384,7 +392,7 @@ // constructor RuleToken(string token, map& variables); - RuleToken(void) + RuleToken(void) : type(BOOLEAN_VARIABLE) , state(false) { } @@ -440,7 +448,7 @@ } inline bool isOperand(const RuleToken& token) { - return ( token.type == RuleToken::OPERAND || + return ( token.type == RuleToken::OPERAND || token.type == RuleToken::NUMBER || token.type == RuleToken::NUMERIC_VARIABLE || token.type == RuleToken::STRING_VARIABLE || @@ -630,14 +638,14 @@ */ vector header_columns; - /* + /* * the maps we're going to be using will be case-insensitive * so that "fileFormat" and "fileformat" hash to the same item. */ struct stringcasecmp : binary_function { struct charcasecmp : public std::binary_function { bool operator() (const unsigned char& c1, const unsigned char& c2) const { - return tolower (c1) < tolower (c2); + return tolower (c1) < tolower (c2); } }; bool operator() (const std::string & s1, const std::string & s2) const { @@ -646,10 +654,10 @@ }; // contains all the ##_types_ as keys, the value is either empty or a VCF file has set it - map header_lines; + map header_lines; // contains all the ##_types_ as keys, the value is a vector of ##_type_ (since there can be duplicate #INFO for example, duplicate ids are not allowed) - map, stringcasecmp> header_lists; + map, stringcasecmp> header_lists; }; diff -Nru libvcflib-1.0.1+dfsg/src/vcf2dag.cpp libvcflib-1.0.2+dfsg/src/vcf2dag.cpp --- libvcflib-1.0.1+dfsg/src/vcf2dag.cpp 2019-10-01 07:06:02.000000000 +0000 +++ libvcflib-1.0.2+dfsg/src/vcf2dag.cpp 2021-01-28 07:04:12.000000000 +0000 @@ -1,3 +1,12 @@ +/* + vcflib C++ library for parsing and manipulating VCF files + + Copyright © 2010-2020 Erik Garrison + Copyright © 2020 Pjotr Prins + + This software is published under the MIT License. See the LICENSE file. +*/ + #include "Variant.h" #include "BedReader.h" #include "IntervalTree.h" @@ -14,12 +23,14 @@ void printSummary(char** argv) { cerr << "usage: " << argv[0] << " [options] []" << endl << endl - << "options:" << endl + << "Modify VCF to be able to build a directed acyclic graph (DAG)" << endl + << "options:" << endl << " -r, --reference FILE FASTA reference file." << endl << endl << "Modify the VCF file so that homozygous regions are included as REF/. calls." << endl << "For each ref and alt allele, assign an index. These steps are sufficient to" << endl << "enable use of the VCF as a DAG (specifically a partially-ordered graph)." << endl; + cerr << endl << "Type: transformation" << endl << endl; exit(0); } @@ -93,7 +104,7 @@ } else { reference.open(fastaFileName); } - + string idname = "id"; long int uid = 0; @@ -165,4 +176,3 @@ return 0; } - diff -Nru libvcflib-1.0.1+dfsg/src/vcf2fasta.cpp libvcflib-1.0.2+dfsg/src/vcf2fasta.cpp --- libvcflib-1.0.1+dfsg/src/vcf2fasta.cpp 2019-10-01 07:06:02.000000000 +0000 +++ libvcflib-1.0.2+dfsg/src/vcf2fasta.cpp 2021-01-28 07:04:12.000000000 +0000 @@ -1,3 +1,12 @@ +/* + vcflib C++ library for parsing and manipulating VCF files + + Copyright © 2010-2020 Erik Garrison + Copyright © 2020 Pjotr Prins + + This software is published under the MIT License. See the LICENSE file. +*/ + #include "Variant.h" #include "convert.h" #include "join.h" @@ -62,13 +71,14 @@ void printSummary(char** argv) { cerr << "usage: " << argv[0] << " [options] [file]" << endl << endl + << "Generates sample_seq:N.fa for each sample, reference sequence, and chromosomal copy N in [0,1... ploidy]." << endl + << "Each sequence in the fasta file is named using the same pattern used for the file name, allowing them to be combined." << endl << "options:" << endl << " -f, --reference REF Use this reference when decomposing samples." << endl << " -p, --prefix PREFIX Affix this output prefix to each file, none by default" << endl << " -P, --default-ploidy N Set a default ploidy for samples which do not have information in the first record (2)." << endl - << endl - << "Outputs sample_seq:N.fa for each sample, reference sequence, and chromosomal copy N in [0,1... ploidy]." << endl - << "Each sequence in the fasta file is named using the same pattern used for the file name, allowing them to be combined." << endl; + << endl ; + cerr << endl << "Type: transformation" << endl << endl; //<< "Impossible regions of haplotypes are noted with an error message. The corresponding" << endl //<< "regions of the output FASTA files will be marked as N." << endl exit(0); @@ -167,7 +177,7 @@ if( *g == NULL_ALLELE ){ if( nullAlleleString == "" ){ cerr << "empty genotype call for sample " << *s << " at " << var.sequenceName << ":" << var.position << endl; - cerr << "use -n option to set value to output for missing calls" << endl; + cerr << "use -n option to set value to output for missing calls" << endl; exit(1); }else{ outputs[sample].at(i)->write(nullAlleleString); @@ -280,4 +290,3 @@ return 0; } - diff -Nru libvcflib-1.0.1+dfsg/src/vcf2tsv.cpp libvcflib-1.0.2+dfsg/src/vcf2tsv.cpp --- libvcflib-1.0.1+dfsg/src/vcf2tsv.cpp 2019-10-01 07:06:02.000000000 +0000 +++ libvcflib-1.0.2+dfsg/src/vcf2tsv.cpp 2021-01-28 07:04:12.000000000 +0000 @@ -1,3 +1,12 @@ +/* + vcflib C++ library for parsing and manipulating VCF files + + Copyright © 2010-2020 Erik Garrison + Copyright © 2020 Pjotr Prins + + This software is published under the MIT License. See the LICENSE file. +*/ + #include "Variant.h" #include @@ -5,11 +14,11 @@ using namespace vcflib; void printSummary(char** argv) { - cerr << "usage: " << argv[0] << " [-n null_string] [-g]" << " [vcf file]" << endl - << "Converts stdin or given VCF file to tab-delimited format, using null string to replace empty values in the table." << endl - << "Specifying -g will output one line per sample with genotype information." << endl - << "When there is more than one alt allele there will be multiple rows, one for each allele and, the info will match the 'A' index" << endl; - + cerr << "usage: vcf2tsv [-n null_string] [-g]" << " [vcf file]" << endl << endl + << "Converts VCF to per-allelle or per-genotype tab-delimited format, using null string to replace empty values in the table." << endl + << "Specifying -g will output one line per sample with genotype information." << endl + << "When there is more than one alt allele there will be multiple rows, one for each allele and, the info will match the 'A' index" << endl; + cerr << endl << "Type: transformation" << endl << endl; exit(1); } @@ -207,4 +216,3 @@ } return 0; } - diff -Nru libvcflib-1.0.1+dfsg/src/vcfaddinfo.cpp libvcflib-1.0.2+dfsg/src/vcfaddinfo.cpp --- libvcflib-1.0.1+dfsg/src/vcfaddinfo.cpp 2019-10-01 07:06:02.000000000 +0000 +++ libvcflib-1.0.2+dfsg/src/vcfaddinfo.cpp 2021-01-28 07:04:12.000000000 +0000 @@ -1,3 +1,12 @@ +/* + vcflib C++ library for parsing and manipulating VCF files + + Copyright © 2010-2020 Erik Garrison + Copyright © 2020 Pjotr Prins + + This software is published under the MIT License. See the LICENSE file. +*/ + #include "Variant.h" #include "split.h" #include @@ -19,9 +28,10 @@ int main(int argc, char** argv) { if (argc != 3) { - cerr << "usage: " << argv[0] << " " << endl - << "Adds info fields from the second file which are not present in the first vcf file." << endl; - return 1; + cerr << "usage: " << argv[0] << " " << endl << endl + << "Adds info fields from the second file which are not present in the first vcf file." << endl; + cerr << endl << "Type: transformation" << endl << endl; + return 1; } string filenameA = argv[1]; @@ -57,12 +67,12 @@ // step forward, annotating each genotype record with an empty genotype // when the two match, iterate through the genotypes from the first file // and get the genotypes reported in the second file - + variantFileA.getNextVariant(varA); variantFileB.getNextVariant(varB); - + variantFileA.header = unionInfoHeaderLines(variantFileA.header, variantFileB.header); - + cout << variantFileA.header << endl; do { @@ -95,7 +105,7 @@ variantFileA.getNextVariant(varA); variantFileB.getNextVariant(varB); } - + } while (!variantFileA.done() && !variantFileB.done()); if (!variantFileA.done()) { @@ -108,4 +118,3 @@ return 0; } - diff -Nru libvcflib-1.0.1+dfsg/src/vcfafpath.cpp libvcflib-1.0.2+dfsg/src/vcfafpath.cpp --- libvcflib-1.0.1+dfsg/src/vcfafpath.cpp 2019-10-01 07:06:02.000000000 +0000 +++ libvcflib-1.0.2+dfsg/src/vcfafpath.cpp 2021-01-28 07:04:12.000000000 +0000 @@ -1,3 +1,12 @@ +/* + vcflib C++ library for parsing and manipulating VCF files + + Copyright © 2010-2020 Erik Garrison + Copyright © 2020 Pjotr Prins + + This software is published under the MIT License. See the LICENSE file. +*/ + #include "Variant.h" #include #include @@ -7,6 +16,40 @@ using namespace vcflib; int main(int argc, char** argv) { + if (argc == 2) { + string h_flag = argv[1]; + + if (argc == 2 && (h_flag == "-h" || h_flag == "--help")) { + cerr << R"( +Display genotype paths + +Usage: vcfafpath + +Example: + + vcfafpath samples/scaffold612.vcf + +``` + +T -> A +A -> G +T -> C +C -> A +C -> T +A -> G +T -> C +G -> C +C -> CAGA +A -> G +``` + + +Type: transformation + + )"; + exit(1); + } + } VariantCallFile variantFile; @@ -49,4 +92,3 @@ return 0; } - diff -Nru libvcflib-1.0.1+dfsg/src/vcfallelicprimitives.cpp libvcflib-1.0.2+dfsg/src/vcfallelicprimitives.cpp --- libvcflib-1.0.1+dfsg/src/vcfallelicprimitives.cpp 2019-10-01 07:06:02.000000000 +0000 +++ libvcflib-1.0.2+dfsg/src/vcfallelicprimitives.cpp 2021-01-28 07:04:12.000000000 +0000 @@ -1,3 +1,12 @@ +/* + vcflib C++ library for parsing and manipulating VCF files + + Copyright © 2010-2020 Erik Garrison + Copyright © 2020 Pjotr Prins + + This software is published under the MIT License. See the LICENSE file. +*/ + #include "Variant.h" #include "convert.h" #include "join.h" @@ -19,6 +28,10 @@ void printSummary(char** argv) { cerr << "usage: " << argv[0] << " [options] [file]" << endl << endl + << "If multiple allelic primitives (gaps or mismatches) are specified in" << endl + << "a single VCF record, split the record into multiple lines, but drop all" << endl + << "INFO fields. Does not handle genotypes (yet). MNPs are split into" << endl + << "multiple SNPs unless the -m flag is provided. Records generated by splits have th" << endl << "options:" << endl << " -m, --use-mnps Retain MNPs as separate events (default: false)." << endl << " -t, --tag-parsed FLAG Tag records which are split apart of a complex allele with this flag." << endl @@ -29,12 +42,8 @@ << " be valid post-decomposition. For biallelic loci in single-sample" << endl << " VCFs, they should be usable with caution." << endl << " -g, --keep-geno Maintain genotype-level annotations when decomposing. Similar" << endl - << " caution should be used for this as for --keep-info." << endl - << endl - << "If multiple alleleic primitives (gaps or mismatches) are specified in" << endl - << "a single VCF record, split the record into multiple lines, but drop all" << endl - << "INFO fields. Does not handle genotypes (yet). MNPs are split into" << endl - << "multiple SNPs unless the -m flag is provided. Records generated by splits have th" << endl; + << " caution should be used for this as for --keep-info." << endl; + cerr << endl << "Type: transformation" << endl << endl; exit(0); } @@ -132,7 +141,7 @@ // we can't decompose *1* bp events, these are already in simplest-form whether SNPs or indels // we also don't handle anything larger than maxLength bp - if (var.alt.size() == 1 + if (var.alt.size() == 1 && ( var.alt.front().size() == 1 || var.ref.size() == 1 || var.alt.front().size() > maxLength @@ -250,7 +259,7 @@ sampleit != var.samples.end(); ++sampleit) { string& sampleName = sampleit->first; map >& sampleValues = var.samples[sampleName]; - + } } */ @@ -411,4 +420,3 @@ return 0; } - diff -Nru libvcflib-1.0.1+dfsg/src/vcfaltcount.cpp libvcflib-1.0.2+dfsg/src/vcfaltcount.cpp --- libvcflib-1.0.1+dfsg/src/vcfaltcount.cpp 2019-10-01 07:06:02.000000000 +0000 +++ libvcflib-1.0.2+dfsg/src/vcfaltcount.cpp 2021-01-28 07:04:12.000000000 +0000 @@ -1,3 +1,12 @@ +/* + vcflib C++ library for parsing and manipulating VCF files + + Copyright © 2010-2020 Erik Garrison + Copyright © 2020 Pjotr Prins + + This software is published under the MIT License. See the LICENSE file. +*/ + #include "Variant.h" #include "split.h" #include @@ -10,9 +19,10 @@ int main(int argc, char** argv) { if (argc != 2) { - cerr << "usage: " << argv[0] << " " << endl - << "count the number of alternate alleles in all records in the vcf file" << endl; - return 1; + cerr << "usage: " << argv[0] << " " << endl << endl + << "count the number of alternate alleles in all records in the vcf file" << endl; + cerr << endl << "Type: statistics" << endl << endl; + return 1; } string filename = argv[1]; @@ -47,4 +57,3 @@ return 0; } - diff -Nru libvcflib-1.0.1+dfsg/src/vcfannotate.cpp libvcflib-1.0.2+dfsg/src/vcfannotate.cpp --- libvcflib-1.0.1+dfsg/src/vcfannotate.cpp 2019-10-01 07:06:02.000000000 +0000 +++ libvcflib-1.0.2+dfsg/src/vcfannotate.cpp 2021-01-28 07:04:12.000000000 +0000 @@ -1,3 +1,12 @@ +/* + vcflib C++ library for parsing and manipulating VCF files + + Copyright © 2010-2020 Erik Garrison + Copyright © 2020 Pjotr Prins + + This software is published under the MIT License. See the LICENSE file. +*/ + #include "Variant.h" #include "BedReader.h" #include @@ -9,15 +18,15 @@ void printSummary(char** argv) { cerr << "usage: " << argv[0] << " [options] []" << endl << endl - << "options:" << endl - << " -b, --bed use annotations provided by this BED file" << endl - << " -k, --key use this INFO field key for the annotations" << endl - << " -d, --default use this INFO field key for records without annotations" << endl - << endl << "Intersect the records in the VCF file with targets provided in a BED file." << endl << "Intersections are done on the reference sequences in the VCF file." << endl << "If no VCF filename is specified on the command line (last argument) the VCF" << endl - << "read from stdin." << endl; + << "read from stdin." << endl << endl + << "options:" << endl + << " -b, --bed use annotations provided by this BED file" << endl + << " -k, --key use this INFO field key for the annotations" << endl + << " -d, --default use this INFO field key for records without annotations" << endl; + cerr << endl << "Type: transformation" << endl << endl; exit(0); } diff -Nru libvcflib-1.0.1+dfsg/src/vcfannotategenotypes.cpp libvcflib-1.0.2+dfsg/src/vcfannotategenotypes.cpp --- libvcflib-1.0.1+dfsg/src/vcfannotategenotypes.cpp 2019-10-01 07:06:02.000000000 +0000 +++ libvcflib-1.0.2+dfsg/src/vcfannotategenotypes.cpp 2021-01-28 07:04:12.000000000 +0000 @@ -1,3 +1,12 @@ +/* + vcflib C++ library for parsing and manipulating VCF files + + Copyright © 2010-2020 Erik Garrison + Copyright © 2020 Pjotr Prins + + This software is published under the MIT License. See the LICENSE file. +*/ + #include "Variant.h" #include "split.h" #include @@ -11,7 +20,7 @@ var.addFormatField(annotationTag); - map > >::iterator s = var.samples.begin(); + map > >::iterator s = var.samples.begin(); map > >::iterator sEnd = var.samples.end(); for (; s != sEnd; ++s) { @@ -25,7 +34,7 @@ varA.addFormatField(annotationTag); - map > >::iterator s = varA.samples.begin(); + map > >::iterator s = varA.samples.begin(); map > >::iterator sEnd = varA.samples.end(); map varAAlleleInts; @@ -55,7 +64,7 @@ map >& other = o->second; string& otherGenotype = other["GT"].front(); // XXX this must compare the genotypes in the two files - + if (otherGenotype.find("|") != string::npos) { vector gtB = decomposePhasedGenotype(otherGenotype); vector gtnew; @@ -90,12 +99,13 @@ int main(int argc, char** argv) { if (argc != 4) { - cerr << "usage: " << argv[0] << " " << endl - << "annotates genotypes in the first file with genotypes in the second" << endl - << "adding the genotype as another flag to each sample filed in the first file." << endl - << "annotation-tag is the name of the sample flag which is added to store the annotation." << endl - << "also adds a 'has_variant' flag for sites where the second file has a variant." << endl; - return 1; + cerr << "usage: " << argv[0] << " " << endl << endl + << "Examine genotype correspondence. Annotate genotypes in the first file with genotypes in the second" << endl + << "adding the genotype as another flag to each sample filed in the first file." << endl + << "annotation-tag is the name of the sample flag which is added to store the annotation." << endl + << "also adds a 'has_variant' flag for sites where the second file has a variant." << endl; + cerr << endl << "Type: transformation" << endl << endl; + return 1; } string annotag = argv[1]; @@ -132,7 +142,7 @@ // step forward, annotating each genotype record with an empty genotype // when the two match, iterate through the genotypes from the first file // and get the genotypes reported in the second file - + variantFileA.getNextVariant(varA); variantFileB.getNextVariant(varB); @@ -157,8 +167,8 @@ variantFileB.getNextVariant(varB); } - // if A is not done- and A is less than B, read A. - // should also read if variant B is done. + // if A is not done- and A is less than B, read A. + // should also read if variant B is done. if (!variantFileA.done() && (varA.sequenceName != varB.sequenceName || (varA.sequenceName == varB.sequenceName && varA.position < varB.position) @@ -200,14 +210,14 @@ if (!hasMultipleAlts && (varsA.size() > 1 || varsB.size() > 1)) { map, Variant> varsAParsed; - map, Variant> varsBParsed; + map, Variant> varsBParsed; for (vector::iterator v = varsA.begin(); v != varsA.end(); ++v) { varsAParsed[make_pair(v->ref, v->alt.front())] = *v; } for (vector::iterator v = varsB.begin(); v != varsB.end(); ++v) { varsBParsed[make_pair(v->ref, v->alt.front())] = *v; } - + for (map, Variant>::iterator vs = varsAParsed.begin(); vs != varsAParsed.end(); ++vs) { Variant& varA = vs->second; annotateWithBlankGenotypes(varA, annotag); @@ -241,4 +251,3 @@ return 0; } - diff -Nru libvcflib-1.0.1+dfsg/src/vcfbreakmulti.cpp libvcflib-1.0.2+dfsg/src/vcfbreakmulti.cpp --- libvcflib-1.0.1+dfsg/src/vcfbreakmulti.cpp 2019-10-01 07:06:02.000000000 +0000 +++ libvcflib-1.0.2+dfsg/src/vcfbreakmulti.cpp 2021-01-28 07:04:12.000000000 +0000 @@ -1,3 +1,12 @@ +/* + vcflib C++ library for parsing and manipulating VCF files + + Copyright © 2010-2020 Erik Garrison + Copyright © 2020 Pjotr Prins + + This software is published under the MIT License. See the LICENSE file. +*/ + #include "Variant.h" #include "convert.h" #include @@ -18,6 +27,7 @@ << endl << "If multiple alleles are specified in a single record, break the record into" << endl << "multiple lines, preserving allele-specific INFO fields." << endl; + cerr << endl << "Type: transformation" << endl << endl; exit(0); } @@ -111,4 +121,3 @@ return 0; } - diff -Nru libvcflib-1.0.1+dfsg/src/vcfcat.cpp libvcflib-1.0.2+dfsg/src/vcfcat.cpp --- libvcflib-1.0.1+dfsg/src/vcfcat.cpp 2019-10-01 07:06:02.000000000 +0000 +++ libvcflib-1.0.2+dfsg/src/vcfcat.cpp 2021-01-28 07:04:12.000000000 +0000 @@ -1,13 +1,35 @@ +/* + vcflib C++ library for parsing and manipulating VCF files + + Copyright © 2010-2020 Erik Garrison + Copyright © 2020 Pjotr Prins + + This software is published under the MIT License. See the LICENSE file. +*/ + #include "Variant.h" using namespace std; using namespace vcflib; int main(int argc, char** argv) { + if (argc == 2) { + string h_flag = argv[1]; + + if (h_flag == "-h" || h_flag == "--help") { + cerr << R"( +usage: vcfcat [file1] [file2] ... [fileN] + +Concatenates VCF files + +Type: transformation + + )"; + } + exit(1); + } if (argc == 1) { - cout << "usage: " << argv[0] << " [file1] [file2] ... [fileN]" << endl - << "Concatenates VCF files." << endl; return 0; } else { for (int i = 1; i < argc; ++i) { @@ -31,4 +53,3 @@ return 0; } - diff -Nru libvcflib-1.0.1+dfsg/src/vcfcheck.cpp libvcflib-1.0.2+dfsg/src/vcfcheck.cpp --- libvcflib-1.0.1+dfsg/src/vcfcheck.cpp 2019-10-01 07:06:02.000000000 +0000 +++ libvcflib-1.0.2+dfsg/src/vcfcheck.cpp 2021-01-28 07:04:12.000000000 +0000 @@ -1,3 +1,12 @@ +/* + vcflib C++ library for parsing and manipulating VCF files + + Copyright © 2010-2020 Erik Garrison + Copyright © 2020 Pjotr Prins + + This software is published under the MIT License. See the LICENSE file. +*/ + #include "Variant.h" #include "split.h" #include "Fasta.h" @@ -10,15 +19,15 @@ void printSummary(char** argv) { cerr << "usage: " << argv[0] << " [options] " << endl << endl - << "options:" << endl + << "Validate integrity and identity of the VCF by verifying that the VCF record's REF matches a given reference file." << endl + << "options:" << endl << " -f, --fasta-reference FASTA reference file to use to obtain primer sequences" << endl << " -x, --exclude-failures If a record fails, don't print it. Otherwise do." << endl << " -k, --keep-failures Print if the record fails, otherwise not." << endl << " -h, --help Print this message." << endl << " -v, --version Print version." << endl - << endl - << "Verifies that the VCF REF field matches the reference as described." << endl << endl; + cerr << endl << "Type: metrics" << endl << endl; exit(0); } @@ -55,7 +64,7 @@ /* Detect the end of the options. */ if (c == -1) break; - + switch (c) { case 0: @@ -150,4 +159,3 @@ return 0; } - diff -Nru libvcflib-1.0.1+dfsg/src/vcfclassify.cpp libvcflib-1.0.2+dfsg/src/vcfclassify.cpp --- libvcflib-1.0.1+dfsg/src/vcfclassify.cpp 2019-10-01 07:06:02.000000000 +0000 +++ libvcflib-1.0.2+dfsg/src/vcfclassify.cpp 2021-01-28 07:04:12.000000000 +0000 @@ -1,3 +1,12 @@ +/* + vcflib C++ library for parsing and manipulating VCF files + + Copyright © 2010-2020 Erik Garrison + Copyright © 2020 Pjotr Prins + + This software is published under the MIT License. See the LICENSE file. +*/ + #include "Variant.h" #include "split.h" #include @@ -84,11 +93,21 @@ int main(int argc, char** argv) { - if (argc != 2) { - cerr << "usage: " << argv[0] << " " << endl - << "outputs a VCF stream each variant is tagged by allele class: snp, ts/tv, indel, mnp" << endl; - return 1; + if (argc == 2) { + string h_flag = argv[1]; + if (h_flag == "-h" || h_flag == "--help") { + cerr << R"( +usage: vcfclassify + +Creates a new VCF where each variant is tagged by allele class: snp, +ts/tv, indel, mnp + +Type: transformation + + )"; + exit(1); } + } string filename = argv[1]; @@ -125,7 +144,7 @@ // write the new header cout << variantFile.header << endl; - + while (variantFile.getNextVariant(var)) { @@ -159,4 +178,3 @@ return 0; } - diff -Nru libvcflib-1.0.1+dfsg/src/vcfcleancomplex.cpp libvcflib-1.0.2+dfsg/src/vcfcleancomplex.cpp --- libvcflib-1.0.1+dfsg/src/vcfcleancomplex.cpp 2019-10-01 07:06:02.000000000 +0000 +++ libvcflib-1.0.2+dfsg/src/vcfcleancomplex.cpp 2021-01-28 07:04:12.000000000 +0000 @@ -1,3 +1,12 @@ +/* + vcflib C++ library for parsing and manipulating VCF files + + Copyright © 2010-2020 Erik Garrison + Copyright © 2020 Pjotr Prins + + This software is published under the MIT License. See the LICENSE file. +*/ + #include "Variant.h" #include "split.h" #include @@ -10,14 +19,18 @@ int main(int argc, char** argv) { - if (argc != 2) { - cerr << "usage: " << argv[0] << " " << endl - << "outputs a VCF stream in which 'long' non-complex" - << "alleles have their position corrected." << endl - << "assumes that VCF records can't overlap 5'->3'" << endl; - return 1; + if (argc == 2) { + string h_flag = argv[1]; + if (h_flag == "-h" || h_flag == "--help") { + cerr << "usage: " << argv[0] << " " << endl << endl + << "Removes reference-matching sequence from complex alleles and adjusts records to reflect positional change." << endl << endl + << "Generate a VCF stream in which 'long' non-complex" + << "alleles have their position corrected." << endl + << "assumes that VCF records can't overlap 5'->3'" << endl; + cerr << endl << "Type: transformation" << endl << endl; + return 1; } - + } string filename = argv[1]; VariantCallFile variantFile; @@ -36,7 +49,7 @@ // write the new header cout << variantFile.header << endl; - + // print the records, filtering is done via the setting of varA's output sample names while (variantFile.getNextVariant(var)) { // if we just have one parsed alternate (non-complex case) @@ -68,4 +81,3 @@ return 0; } - diff -Nru libvcflib-1.0.1+dfsg/src/vcfcombine.cpp libvcflib-1.0.2+dfsg/src/vcfcombine.cpp --- libvcflib-1.0.1+dfsg/src/vcfcombine.cpp 2019-10-01 07:06:02.000000000 +0000 +++ libvcflib-1.0.2+dfsg/src/vcfcombine.cpp 2021-01-28 07:04:12.000000000 +0000 @@ -1,3 +1,12 @@ +/* + vcflib C++ library for parsing and manipulating VCF files + + Copyright © 2010-2020 Erik Garrison + Copyright © 2020 Pjotr Prins + + This software is published under the MIT License. See the LICENSE file. +*/ + #include "Variant.h" #include #include @@ -9,7 +18,7 @@ void printSummary(char** argv) { cerr << "usage: " << argv[0] << " [vcf file] [vcf file] ..." << endl << endl - << "Combines VCF files positionally, combining samples when sites and alleles are identical." << endl + << "Combine VCF files positionally, combining samples when sites and alleles are identical." << endl << "Any number of VCF files may be combined. The INFO field and other columns are taken from" << endl << "one of the files which are combined when records in multiple files match. Alleles must" << endl << "have identical ordering to be combined into one record. If they do not, multiple records" << endl @@ -19,6 +28,7 @@ << " -h --help This text." << endl << " -v --version Print version." << endl << " -r --region REGION A region specifier of the form chrN:x-y to bound the merge" << endl; + cerr << endl << "Type: transformation" << endl << endl; exit(1); } @@ -217,4 +227,3 @@ return 0; } - diff -Nru libvcflib-1.0.1+dfsg/src/vcfcommonsamples.cpp libvcflib-1.0.2+dfsg/src/vcfcommonsamples.cpp --- libvcflib-1.0.1+dfsg/src/vcfcommonsamples.cpp 2019-10-01 07:06:02.000000000 +0000 +++ libvcflib-1.0.2+dfsg/src/vcfcommonsamples.cpp 2021-01-28 07:04:12.000000000 +0000 @@ -1,3 +1,12 @@ +/* + vcflib C++ library for parsing and manipulating VCF files + + Copyright © 2010-2020 Erik Garrison + Copyright © 2020 Pjotr Prins + + This software is published under the MIT License. See the LICENSE file. +*/ + #include "Variant.h" #include "split.h" #include @@ -27,12 +36,15 @@ int main(int argc, char** argv) { - if (argc != 3) { - cerr << "usage: " << argv[0] << " " << endl - << "outputs each record in the first file, removing samples not present in the second" << endl; - return 1; + if (argc == 2) { + string h_flag = argv[1]; + if (h_flag == "-h" || h_flag == "--help") { + cerr << "usage: " << argv[0] << " " << endl << endl + << "Generates each record in the first file, removing samples not present in the second" << endl; + cerr << endl << "Type: transformation" << endl << endl; + return 1; } - + } string filenameA = argv[1]; string filenameB = argv[2]; @@ -70,10 +82,10 @@ // and restrict the output sample names in the variant to those we are keeping varA.setOutputSampleNames(commonSamples); - + // write the new header cout << variantFileA.header << endl; - + // print the records, filtering is done via the setting of varA's output sample names while (variantFileA.getNextVariant(varA)) { cout << varA << endl; @@ -82,4 +94,3 @@ return 0; } - diff -Nru libvcflib-1.0.1+dfsg/src/vcfcountalleles.cpp libvcflib-1.0.2+dfsg/src/vcfcountalleles.cpp --- libvcflib-1.0.1+dfsg/src/vcfcountalleles.cpp 2019-10-01 07:06:02.000000000 +0000 +++ libvcflib-1.0.2+dfsg/src/vcfcountalleles.cpp 2021-01-28 07:04:12.000000000 +0000 @@ -1,9 +1,38 @@ +/* + vcflib C++ library for parsing and manipulating VCF files + + Copyright © 2010-2020 Erik Garrison + Copyright © 2020 Pjotr Prins + + This software is published under the MIT License. See the LICENSE file. +*/ + #include "Variant.h" using namespace std; using namespace vcflib; int main(int argc, char** argv) { +if (argc == 2) { + string h_flag = argv[1]; + if (h_flag == "-h" || h_flag == "--help") { + cerr << R"( +Count alleles + +Usage: vcfcountalleles + +Example: + +vcfcountalleles samples/scaffold612.vcf +42603 + +Type: statistics + + )"; + exit(1); + } + } + VariantCallFile variantFile; @@ -30,4 +59,3 @@ return 0; } - diff -Nru libvcflib-1.0.1+dfsg/src/vcfcreatemulti.cpp libvcflib-1.0.2+dfsg/src/vcfcreatemulti.cpp --- libvcflib-1.0.1+dfsg/src/vcfcreatemulti.cpp 2019-10-01 07:06:02.000000000 +0000 +++ libvcflib-1.0.2+dfsg/src/vcfcreatemulti.cpp 2021-01-28 07:04:12.000000000 +0000 @@ -1,3 +1,12 @@ +/* + vcflib C++ library for parsing and manipulating VCF files + + Copyright © 2010-2020 Erik Garrison + Copyright © 2020 Pjotr Prins + + This software is published under the MIT License. See the LICENSE file. +*/ + #include "Variant.h" #include "convert.h" #include @@ -19,6 +28,7 @@ << endl << "If overlapping alleles are represented across multiple records, merge" << endl << "them into a single record. Currently only for indels." << endl; + cerr << endl << "Type: transformation" << endl << endl; exit(0); } @@ -194,4 +204,3 @@ return 0; } - diff -Nru libvcflib-1.0.1+dfsg/src/vcfdistance.cpp libvcflib-1.0.2+dfsg/src/vcfdistance.cpp --- libvcflib-1.0.1+dfsg/src/vcfdistance.cpp 2019-10-01 07:06:02.000000000 +0000 +++ libvcflib-1.0.2+dfsg/src/vcfdistance.cpp 2021-01-28 07:04:12.000000000 +0000 @@ -1,3 +1,12 @@ +/* + vcflib C++ library for parsing and manipulating VCF files + + Copyright © 2010-2020 Erik Garrison + Copyright © 2020 Pjotr Prins + + This software is published under the MIT License. See the LICENSE file. +*/ + #include "Variant.h" #include "split.h" #include @@ -12,11 +21,12 @@ if (argc > 1) { tag = argv[1]; if (tag == "--help" || tag == "-h") { - cerr << "usage: " << argv[0] << " [customtagname] < [vcf file]" << endl - << "adds a tag to each variant record which indicates the distance" << endl - << "to the nearest variant." << endl - << "(defaults to BasesToClosestVariant if no custom tag name is given." << endl; - return 1; + cerr << "usage: " << argv[0] << " [customtagname] < [vcf file]" << endl << endl + << "Adds a tag to each variant record which indicates the distance" << endl + << "to the nearest variant." << endl + << "(defaults to BasesToClosestVariant if no custom tag name is given." << endl; + cerr << endl << "Type: metrics" << endl << endl; + return 1; } } else { /* use default */ @@ -96,4 +106,3 @@ return 0; } - diff -Nru libvcflib-1.0.1+dfsg/src/vcfecho.cpp libvcflib-1.0.2+dfsg/src/vcfecho.cpp --- libvcflib-1.0.1+dfsg/src/vcfecho.cpp 2019-10-01 07:06:02.000000000 +0000 +++ libvcflib-1.0.2+dfsg/src/vcfecho.cpp 2021-01-28 07:04:12.000000000 +0000 @@ -1,3 +1,12 @@ +/* + vcflib C++ library for parsing and manipulating VCF files + + Copyright © 2010-2020 Erik Garrison + Copyright © 2020 Pjotr Prins + + This software is published under the MIT License. See the LICENSE file. +*/ + #include "Variant.h" using namespace std; @@ -5,6 +14,21 @@ int main(int argc, char** argv) { +if (argc == 2) { + string h_flag = argv[1]; + if (h_flag == "-h" || h_flag == "--help") { + cerr << R"( +usage: vcfecho + +Echo VCF to stdout (simple demo) + +Type: transformation + + )"; + exit(1); + } + } + VariantCallFile variantFile; if (argc > 1) { @@ -28,4 +52,3 @@ return 0; } - diff -Nru libvcflib-1.0.1+dfsg/src/vcfentropy.cpp libvcflib-1.0.2+dfsg/src/vcfentropy.cpp --- libvcflib-1.0.1+dfsg/src/vcfentropy.cpp 2019-10-01 07:06:02.000000000 +0000 +++ libvcflib-1.0.2+dfsg/src/vcfentropy.cpp 2021-01-28 07:04:12.000000000 +0000 @@ -1,7 +1,17 @@ +/* + vcflib C++ library for parsing and manipulating VCF files + + Copyright © 2010-2020 Erik Garrison + Copyright © 2020 Pjotr Prins + + This software is published under the MIT License. See the LICENSE file. +*/ + #include "Variant.h" #include "split.h" #include "Fasta.h" #include +#include "disorder.h" using namespace std; using namespace vcflib; @@ -9,14 +19,16 @@ void printSummary(char** argv) { cerr << "usage: " << argv[0] << " [options] " << endl << endl - << "options:" << endl - << " -f, --fasta-reference FASTA reference file to use to obtain flanking sequences" << endl - << " -w, --window-size Size of the window over which to calculate entropy" << endl - << endl + << "Annotate VCF records with the Shannon entropy of flanking sequence." << endl << "Anotates the output VCF file with, for each record, EntropyLeft, EntropyRight," << endl << "EntropyCenter, which are the entropies of the sequence of the given window size to the" << endl << "left, right, and center of the record. Also adds EntropyRef and EntropyAlt for each alt." << endl + << "options:" << endl + << " -f, --fasta-reference FASTA reference file to use to obtain flanking sequences" << endl + << " -w, --window-size Size of the window over which to calculate entropy" << endl + << endl << endl; + cerr << endl << "Type: metrics" << endl << endl; exit(0); } @@ -50,7 +62,7 @@ /* Detect the end of the options. */ if (c == -1) break; - + switch (c) { case 0: @@ -70,7 +82,7 @@ case 'w': windowSize = atoi(optarg); break; - + case 'h': printSummary(argv); exit(0); @@ -81,7 +93,7 @@ printSummary(argv); exit(1); break; - + default: abort (); } @@ -156,4 +168,3 @@ return 0; } - diff -Nru libvcflib-1.0.1+dfsg/src/vcfevenregions.cpp libvcflib-1.0.2+dfsg/src/vcfevenregions.cpp --- libvcflib-1.0.1+dfsg/src/vcfevenregions.cpp 2019-10-01 07:06:02.000000000 +0000 +++ libvcflib-1.0.2+dfsg/src/vcfevenregions.cpp 2021-01-28 07:04:12.000000000 +0000 @@ -1,3 +1,12 @@ +/* + vcflib C++ library for parsing and manipulating VCF files + + Copyright © 2010-2020 Erik Garrison + Copyright © 2020 Pjotr Prins + + This software is published under the MIT License. See the LICENSE file. +*/ + #include "Variant.h" #include "split.h" #include "Fasta.h" @@ -10,19 +19,19 @@ void printSummary(char** argv) { cerr << "usage: " << argv[0] << " [options] " << endl << endl - << "options:" << endl + << "Generates a list of regions, e.g. chr20:10..30 using the variant" << endl + << "density information provided in the VCF file to ensure that the regions have" << endl + << "even numbers of variants. This can be use to reduce the variance in runtime" << endl + << "when dividing variant detection or genotyping by genomic coordinates." << endl + << "options:" << endl << " -f, --fasta-reference REF FASTA reference file to use to obtain primer sequences." << endl << " -n, --number-of-regions N The number of desired regions." << endl << " -p, --number-of-positions N The number of positions per region." << endl << " -o, --offset N Add an offset to region positioning, to avoid boundary" << endl << " related artifacts in downstream processing." << endl << " -l, --overlap N The number of sites to overlap between regions. Default 0." << endl - << " -s, --separator SEQ Specify string to use to separate region output. Default '-'" << endl - << endl - << "Generates a list of regions, e.g. chr20:10..30 using the variant" << endl - << "density information provided in the VCF file to ensure that the regions have" << endl - << "even numbers of variants. This can be use to reduce the variance in runtime" << endl - << "when dividing variant detection or genotyping by genomic coordinates." << endl; + << " -s, --separator SEQ Specify string to use to separate region output. Default '-'" << endl; + cerr << endl << "Type: transformation" << endl << endl; exit(0); } @@ -76,7 +85,7 @@ /* Detect the end of the options. */ if (c == -1) break; - + switch (c) { case 0: @@ -112,7 +121,7 @@ case 's': regionSplitSeq = optarg; break; - + case 'h': printSummary(argv); exit(0); @@ -123,7 +132,7 @@ printSummary(argv); exit(1); break; - + default: abort (); } @@ -199,4 +208,3 @@ return 0; } - diff -Nru libvcflib-1.0.1+dfsg/src/vcffilter.cpp libvcflib-1.0.2+dfsg/src/vcffilter.cpp --- libvcflib-1.0.1+dfsg/src/vcffilter.cpp 2019-10-01 07:06:02.000000000 +0000 +++ libvcflib-1.0.2+dfsg/src/vcffilter.cpp 2021-01-28 07:04:12.000000000 +0000 @@ -1,3 +1,12 @@ +/* + vcflib C++ library for parsing and manipulating VCF files + + Copyright © 2010-2020 Erik Garrison + Copyright © 2020 Pjotr Prins + + This software is published under the MIT License. See the LICENSE file. +*/ + #include "Variant.h" #include "split.h" #include @@ -6,7 +15,8 @@ using namespace vcflib; void printSummary(char** argv) { - cerr << "usage: " << argv[0] << " [options] " << endl + cerr << "vcflib " << VCFLIB_VERSION << " filter the specified vcf file using the set of filters" << endl << endl + << "usage: " << argv[0] << " [options] " << endl << endl << "options:" << endl << " -f, --info-filter specifies a filter to apply to the info fields of records," << endl @@ -38,7 +48,7 @@ << endl << "For convenience, you can specify \"QUAL\" to refer to the quality of the site, even" << endl << "though it does not appear in the INFO fields." << endl - << endl; + << endl << "type: filter" << endl; exit(0); } @@ -419,4 +429,3 @@ return 0; } - diff -Nru libvcflib-1.0.1+dfsg/src/vcffixup.cpp libvcflib-1.0.2+dfsg/src/vcffixup.cpp --- libvcflib-1.0.1+dfsg/src/vcffixup.cpp 2019-10-01 07:06:02.000000000 +0000 +++ libvcflib-1.0.2+dfsg/src/vcffixup.cpp 2021-01-28 07:04:12.000000000 +0000 @@ -1,3 +1,12 @@ +/* + vcflib C++ library for parsing and manipulating VCF files + + Copyright © 2010-2020 Erik Garrison + Copyright © 2020 Pjotr Prins + + This software is published under the MIT License. See the LICENSE file. +*/ + #include "Variant.h" #include "split.h" #include @@ -44,8 +53,23 @@ int main(int argc, char** argv) { if (argc == 1 || ((argc > 1) && strcmp(argv[1], "-h") == 0) || strcmp(argv[1], "--help") == 0) { - cerr << "usage: " << argv[0] << " " << endl - << "outputs a VCF stream where AC and NS have been generated for each record using sample genotypes" << endl; + cerr << "usage: " << argv[0] << " " << endl << endl + << "Generates a VCF stream where AC and NS have been generated for each record using sample genotypes" << endl << endl; + cerr << R"( + +Count the allele frequencies across alleles present in each record in the VCF file. (Similar to vcftools --freq.) + +Uses genotypes from the VCF file to correct AC (alternate allele count), AF +(alternate allele frequency), NS (number of called), in the VCF records. For +example: + + % vcfkeepsamples file.vcf NA12878 | vcffixup - | vcffilter -f "AC > 0" + +Would downsample file.vcf to only NA12878, removing sites for which the sample +was not called as polymorphic. +)"; + + cerr << endl << "Type: transformation" << endl << endl; return 1; } @@ -116,4 +140,3 @@ return 0; } - diff -Nru libvcflib-1.0.1+dfsg/src/vcfflatten.cpp libvcflib-1.0.2+dfsg/src/vcfflatten.cpp --- libvcflib-1.0.1+dfsg/src/vcfflatten.cpp 2019-10-01 07:06:02.000000000 +0000 +++ libvcflib-1.0.2+dfsg/src/vcfflatten.cpp 2021-01-28 07:04:12.000000000 +0000 @@ -1,3 +1,12 @@ +/* + vcflib C++ library for parsing and manipulating VCF files + + Copyright © 2010-2020 Erik Garrison + Copyright © 2020 Pjotr Prins + + This software is published under the MIT License. See the LICENSE file. +*/ + #include "Variant.h" #include "convert.h" @@ -26,6 +35,7 @@ << "allele frequency specification 'AF' and use of 'G' and 'A' to specify the" << endl << "fields which vary according to the Allele or Genotype. VCF file may be" << endl << "specified on the command line or piped as stdin." << endl; + cerr << endl << "Type: transformation" << endl << endl; exit(1); } variantFile.open(filename); @@ -69,7 +79,7 @@ // now get the genotype indexes we want to keep vector alleleIndexes; - alleleIndexes.push_back(0); + alleleIndexes.push_back(0); alleleIndexes.push_back(bestaltGenotypeIndex); // add the reference allele index for generating genotype indexes @@ -175,4 +185,3 @@ return 0; } - diff -Nru libvcflib-1.0.1+dfsg/src/vcfgeno2alleles.cpp libvcflib-1.0.2+dfsg/src/vcfgeno2alleles.cpp --- libvcflib-1.0.1+dfsg/src/vcfgeno2alleles.cpp 2019-10-01 07:06:02.000000000 +0000 +++ libvcflib-1.0.2+dfsg/src/vcfgeno2alleles.cpp 2021-01-28 07:04:12.000000000 +0000 @@ -1,3 +1,12 @@ +/* + vcflib C++ library for parsing and manipulating VCF files + + Copyright © 2010-2020 Erik Garrison + Copyright © 2020 Pjotr Prins + + This software is published under the MIT License. See the LICENSE file. +*/ + #include "Variant.h" #include "split.h" #include @@ -10,8 +19,9 @@ int main(int argc, char** argv) { if (argc > 1) { - cerr << "usage: " << argv[0] << " <[vcf file]" << endl + cerr << "usage: " << argv[0] << " <[vcf file]" << endl << endl << "modifies the genotypes field to provide the literal alleles rather than indexes" << endl; + cerr << endl << "Type: transformation" << endl << endl; return 1; } @@ -27,15 +37,15 @@ Variant var(variantFile); while (variantFile.getNextVariant(var)) { - map > >::iterator s = var.samples.begin(); + map > >::iterator s = var.samples.begin(); map > >::iterator sEnd = var.samples.end(); - + for (; s != sEnd; ++s) { map >& sample = s->second; vector& gtstrs = sample["GT"]; string& genotype = gtstrs.front(); vector gt = split(genotype, "|/"); - + // report the sample and it's genotype stringstream o; for (vector::iterator g = gt.begin(); g != gt.end(); ++g) { @@ -51,4 +61,3 @@ return 0; } - diff -Nru libvcflib-1.0.1+dfsg/src/vcfgeno2haplo.cpp libvcflib-1.0.2+dfsg/src/vcfgeno2haplo.cpp --- libvcflib-1.0.1+dfsg/src/vcfgeno2haplo.cpp 2019-10-01 07:06:02.000000000 +0000 +++ libvcflib-1.0.2+dfsg/src/vcfgeno2haplo.cpp 2021-01-28 07:04:12.000000000 +0000 @@ -1,3 +1,12 @@ +/* + vcflib C++ library for parsing and manipulating VCF files + + Copyright © 2010-2020 Erik Garrison + Copyright © 2020 Pjotr Prins + + This software is published under the MIT License. See the LICENSE file. +*/ + #include "Variant.h" #include #include "Fasta.h" @@ -13,7 +22,9 @@ void printSummary(char** argv) { cerr << "usage: " << argv[0] << " [options] []" << endl << endl - << "options:" << endl + << "Convert genotype-based phased alleles within --window-size into haplotype alleles." << endl + << "Will break haplotype construction when encountering non-phased genotypes on input." << endl + << "options:" << endl << " -h, --help Print this message" << endl << " -v, --version Print version" << endl << " -r, --reference FILE FASTA reference file" << endl @@ -21,9 +32,8 @@ << " -o, --only-variants Don't output the entire haplotype, just concatenate" << endl << " REF/ALT strings (delimited by \":\")" << endl << endl - << "Convert genotype-based phased alleles within --window-size into haplotype alleles." << endl - << "Will break haplotype construction when encountering non-phased genotypes on input." << endl << endl; + cerr << endl << "Type: transformation" << endl << endl; exit(0); } @@ -69,7 +79,7 @@ c = getopt_long (argc, argv, "hvow:r:", long_options, &option_index); - + if (c == -1){ break; } @@ -78,7 +88,7 @@ { printBasicVersion(); exit(0); - } + } case 'o': { onlyVariants = true; @@ -211,7 +221,7 @@ // only build haplotypes for samples with complete information string& sampleName = *s; vector >& haplotypes = sampleHaplotypes[sampleName]; - + bool completeCoverage = true; // ensure complete genotype coverage over the haplotype cluster for (vector::iterator v = cluster.begin(); v != cluster.end(); ++v) { @@ -224,7 +234,7 @@ if (!completeCoverage) { continue; // skip samples without complete coverage } - + // what's the ploidy? { string& gt = cluster.front().samples[sampleName]["GT"].front(); @@ -234,7 +244,7 @@ haplotypes.push_back(haplotype); } } - + for (vector::iterator v = cluster.begin(); v != cluster.end(); ++v) { string& gt = v->samples[sampleName]["GT"].front(); vector gtspec = split(gt, "|"); @@ -257,12 +267,12 @@ hapToSamples[&*uniqueHaplotypes.find(*h)].push_back(hs->first); } } - + // write new haplotypes map, string> haplotypeSeqs; map, int> haplotypeIndexes; map alleles; - + int impossibleHaplotypes = 0; // always include the reference haplotype as 0 @@ -328,7 +338,7 @@ } } } - + if (impossibleHaplotype) { ++impossibleHaplotypes; haplotypeIndexes[*u] = -1; // indicates impossible haplotype @@ -365,7 +375,7 @@ for (int i = 1; i < alleleIndex; ++i) { outputVar.alt.push_back(alleles[i]); } - + outputVar.sequenceName = cluster.front().sequenceName; outputVar.position = cluster.front().position; outputVar.filter = "."; @@ -373,7 +383,7 @@ outputVar.info = cluster.front().info; outputVar.samples.clear(); outputVar.format = cluster.front().format; - + // now the genotypes for (vector::iterator s = var.sampleNames.begin(); s != var.sampleNames.end(); ++s) { string& sampleName = *s; @@ -412,4 +422,3 @@ return 0; } - diff -Nru libvcflib-1.0.1+dfsg/src/vcfgenosamplenames.cpp libvcflib-1.0.2+dfsg/src/vcfgenosamplenames.cpp --- libvcflib-1.0.1+dfsg/src/vcfgenosamplenames.cpp 2019-10-01 07:06:02.000000000 +0000 +++ libvcflib-1.0.2+dfsg/src/vcfgenosamplenames.cpp 2021-01-28 07:04:12.000000000 +0000 @@ -1,9 +1,43 @@ +/* + vcflib C++ library for parsing and manipulating VCF files + + Copyright © 2010-2020 Erik Garrison + Copyright © 2020 Pjotr Prins + + This software is published under the MIT License. See the LICENSE file. +*/ + #include "Variant.h" using namespace std; using namespace vcflib; int main(int argc, char** argv) { + if (argc == 2) { + string h_flag = argv[1]; + if (h_flag == "-h" || h_flag == "--help") { + cerr << R"( +Get samplenames + +Usage: vcfgenosamplenames + +Example: + +vcfsamplenames samples/sample.vcf + +NA00001 +NA00002 +NA00003 + + +Type: transformation + + )"; + exit(1); + } + } + + VariantCallFile variantFile; @@ -36,4 +70,3 @@ return 0; } - diff -Nru libvcflib-1.0.1+dfsg/src/vcfgenosummarize.cpp libvcflib-1.0.2+dfsg/src/vcfgenosummarize.cpp --- libvcflib-1.0.1+dfsg/src/vcfgenosummarize.cpp 2019-10-01 07:06:02.000000000 +0000 +++ libvcflib-1.0.2+dfsg/src/vcfgenosummarize.cpp 2021-01-28 07:04:12.000000000 +0000 @@ -1,3 +1,12 @@ +/* + vcflib C++ library for parsing and manipulating VCF files + + Copyright © 2010-2020 Erik Garrison + Copyright © 2020 Pjotr Prins + + This software is published under the MIT License. See the LICENSE file. +*/ + #include "Variant.h" #include "split.h" #include @@ -11,14 +20,18 @@ int main(int argc, char** argv) { - if (argc > 1 && (argv[1] == "-h" || argv[1] == "--help")) { - cerr << "usage: " << argv[0] << " <[input file] >[output vcf]" << endl + if (argc == 2) { + string h_flag = argv[1]; + if (h_flag == "-h" || h_flag == "--help") { + cerr << "usage: " << argv[0] << " <[input file] >[output vcf]" << endl << endl << "Adds summary statistics to each record summarizing qualities reported in" << endl << "called genotypes. Uses:" << endl << "RO (reference observation count), QR (quality sum reference observations)" << endl << "AO (alternate observation count), QA (quality sum alternate observations)" << endl; + cerr << endl << "Type: statistics" << endl << endl; return 1; } + } VariantCallFile variantFile; if (argc == 1) { @@ -47,7 +60,7 @@ // write the new header cout << variantFile.header << endl; - + // print the records, filtering is done via the setting of varA's output sample names while (variantFile.getNextVariant(var)) { int refobs = 0; @@ -94,7 +107,7 @@ if (refobs == 0 || refqual == 0) { var.info["RQA"].push_back(convert(1)); } else { - var.info["RQA"].push_back(convert(((double)altqual[i]/(double)altobs[i]) / + var.info["RQA"].push_back(convert(((double)altqual[i]/(double)altobs[i]) / ((double)refqual/(double)refobs))); } } @@ -104,4 +117,3 @@ return 0; } - diff -Nru libvcflib-1.0.1+dfsg/src/vcfgenotypecompare.cpp libvcflib-1.0.2+dfsg/src/vcfgenotypecompare.cpp --- libvcflib-1.0.1+dfsg/src/vcfgenotypecompare.cpp 2019-10-01 07:06:02.000000000 +0000 +++ libvcflib-1.0.2+dfsg/src/vcfgenotypecompare.cpp 2021-01-28 07:04:12.000000000 +0000 @@ -1,3 +1,12 @@ +/* + vcflib C++ library for parsing and manipulating VCF files + + Copyright © 2010-2020 Erik Garrison + Copyright © 2020 Pjotr Prins + + This software is published under the MIT License. See the LICENSE file. +*/ + #include "Variant.h" #include "split.h" #include @@ -27,11 +36,12 @@ int main(int argc, char** argv) { if (argc != 3) { - cerr << "usage: " << argv[0] << " " << endl - << "adds statistics to the INFO field of the vcf file describing the" << endl - << "amount of discrepancy between the genotypes (GT) in the vcf file and the" << endl - << "genotypes reported in the . use this after" << endl - << "vcfannotategenotypes to get correspondence statistics for two vcfs." << endl; + cerr << "usage: " << argv[0] << " " << endl << endl + << "adds statistics to the INFO field of the vcf file describing the" << endl + << "amount of discrepancy between the genotypes (GT) in the vcf file and the" << endl + << "genotypes reported in the . use this after" << endl + << "vcfannotategenotypes to get correspondence statistics for two vcfs." << endl; + cerr << endl << "Type: statistics" << endl << endl; return 1; } @@ -324,4 +334,3 @@ return 0; } - diff -Nru libvcflib-1.0.1+dfsg/src/vcfgenotypes.cpp libvcflib-1.0.2+dfsg/src/vcfgenotypes.cpp --- libvcflib-1.0.1+dfsg/src/vcfgenotypes.cpp 2019-10-01 07:06:02.000000000 +0000 +++ libvcflib-1.0.2+dfsg/src/vcfgenotypes.cpp 2021-01-28 07:04:12.000000000 +0000 @@ -1,3 +1,12 @@ +/* + vcflib C++ library for parsing and manipulating VCF files + + Copyright © 2010-2020 Erik Garrison + Copyright © 2020 Pjotr Prins + + This software is published under the MIT License. See the LICENSE file. +*/ + #include "Variant.h" #include "split.h" #include @@ -10,8 +19,28 @@ int main(int argc, char** argv) { if (argc != 2) { - cerr << "usage: " << argv[0] << " " << endl - << "report the genotypes for each sample, for each variant in the vcf file" << endl; + cerr << "usage: " << argv[0] << " " << endl << endl; + cerr << R"( + +Report the genotypes for each sample, for each variant in the +VCF. Convert the numerical represenation of genotypes provided by the +GT field to a human-readable genotype format. + +Example: + + vcfgenotypes samples/sample.vcf + +19 111 A C A,C NA00001:A/A NA00002:A/A NA00003:A/C +19 112 A G A,G NA00001:A/A NA00002:A/A NA00003:A/G +20 14370 G A G,A NA00001:G/G NA00002:G/A NA00003:A/A +20 17330 T A T,A NA00001:T/T NA00002:T/A NA00003:T/T +20 1110696 A G,T A,G,T NA00001:G/T NA00002:G/T NA00003:T/T +20 1230237 T . T,. NA00001:T/T NA00002:T/T NA00003:T/T +20 1234567 G GA,GAC G,GA,GAC NA00001:G/GA NA00002:G/GAC NA00003:GA/GA +20 1235237 T . T,. NA00001:T/T NA00002:T/T NA00003:./. +X 10 AC A,ATG AC,A,ATG NA00001:AC NA00002:AC/A NA00003:AC/ATG +)"; + cerr << endl << "Type: statistics" << endl << endl; return 1; } @@ -31,15 +60,15 @@ Variant var(variantFile); while (variantFile.getNextVariant(var)) { - map > >::iterator s = var.samples.begin(); + map > >::iterator s = var.samples.begin(); map > >::iterator sEnd = var.samples.end(); - + cout << var.sequenceName << "\t" << var.position << "\t" << var.ref << "\t"; - var.printAlt(cout); cout << "\t"; - var.printAlleles(cout); cout << "\t"; - + var.printAlt(cout); cout << "\t"; + var.printAlleles(cout); cout << "\t"; + for (; s != sEnd; ++s) { map >& sample = s->second; string& genotype = sample["GT"].front(); // XXX assumes we can only have one GT value @@ -64,4 +93,3 @@ return 0; } - diff -Nru libvcflib-1.0.1+dfsg/src/vcfglbound.cpp libvcflib-1.0.2+dfsg/src/vcfglbound.cpp --- libvcflib-1.0.1+dfsg/src/vcfglbound.cpp 2019-10-01 07:06:02.000000000 +0000 +++ libvcflib-1.0.2+dfsg/src/vcfglbound.cpp 2021-01-28 07:04:12.000000000 +0000 @@ -1,3 +1,12 @@ +/* + vcflib C++ library for parsing and manipulating VCF files + + Copyright © 2010-2020 Erik Garrison + Copyright © 2020 Pjotr Prins + + This software is published under the MIT License. See the LICENSE file. +*/ + #include "Variant.h" #include "split.h" #include @@ -8,12 +17,13 @@ void printSummary(char** argv) { cerr << "usage: " << argv[0] << " [options] " << endl << endl - << "options:" << endl + << "Adjust GLs so that the maximum GL is 0 by dividing all GLs for each sample by the max." << endl << endl + << "Then cap (bound) at N (e.g. -10)." + << "options:" << endl << " -b, --bound N Bound GLs to this limit." << endl << " -x, --exclude-broken If GLs are > 0, remove site." << endl - << endl - << "Adjust GLs so that the maximum GL is 0 by dividing all GLs for each sample by the max." << endl - << "Then cap (bound) at N (e.g. -10)." << endl; + << endl; + cerr << endl << "Type: transformation" << endl << endl; exit(0); } @@ -44,7 +54,7 @@ /* Detect the end of the options. */ if (c == -1) break; - + switch (c) { case 0: @@ -60,11 +70,11 @@ case 'b': glBound = atof(optarg); break; - + case 'x': excludeBroken = true; break; - + case 'h': printSummary(argv); exit(0); @@ -75,7 +85,7 @@ printSummary(argv); exit(1); break; - + default: abort (); } @@ -175,4 +185,3 @@ return 0; } - diff -Nru libvcflib-1.0.1+dfsg/src/vcfglxgt.cpp libvcflib-1.0.2+dfsg/src/vcfglxgt.cpp --- libvcflib-1.0.1+dfsg/src/vcfglxgt.cpp 2019-10-01 07:06:02.000000000 +0000 +++ libvcflib-1.0.2+dfsg/src/vcfglxgt.cpp 2021-01-28 07:04:12.000000000 +0000 @@ -1,3 +1,12 @@ +/* + vcflib C++ library for parsing and manipulating VCF files + + Copyright © 2010-2020 Erik Garrison + Copyright © 2020 Pjotr Prins + + This software is published under the MIT License. See the LICENSE file. +*/ + #include "Variant.h" #include "split.h" #include @@ -8,11 +17,12 @@ void printSummary(char** argv) { cerr << "usage: " << argv[0] << " [options] " << endl << endl - << "options:" << endl + << "Set genotypes using the maximum genotype likelihood for each sample." << endl + << "options:" << endl << " -n, --fix-null-genotypes only apply to null and partly-null genotypes" << endl << endl - << "Set genotypes using the maximum genotype likelihood for each sample." << endl << endl; + cerr << endl << "Type: transformation" << endl << endl; exit(0); } @@ -41,7 +51,7 @@ /* Detect the end of the options. */ if (c == -1) break; - + switch (c) { case 0: @@ -57,7 +67,7 @@ case 'n': fixNull = true; break; - + case 'h': printSummary(argv); exit(0); @@ -68,7 +78,7 @@ printSummary(argv); exit(1); break; - + default: abort (); } @@ -142,7 +152,7 @@ maxindex = i; // prefers == gls in order of listing } } - + // determine which genotype it represents // modify, if the GT is part-null vector& gtv = g->second; @@ -168,4 +178,3 @@ return 0; } - diff -Nru libvcflib-1.0.1+dfsg/src/vcfhetcount.cpp libvcflib-1.0.2+dfsg/src/vcfhetcount.cpp --- libvcflib-1.0.1+dfsg/src/vcfhetcount.cpp 2019-10-01 07:06:02.000000000 +0000 +++ libvcflib-1.0.2+dfsg/src/vcfhetcount.cpp 2021-01-28 07:04:12.000000000 +0000 @@ -1,3 +1,12 @@ +/* + vcflib C++ library for parsing and manipulating VCF files + + Copyright © 2010-2020 Erik Garrison + Copyright © 2020 Pjotr Prins + + This software is published under the MIT License. See the LICENSE file. +*/ + #include "Variant.h" #include "split.h" #include @@ -9,12 +18,17 @@ int main(int argc, char** argv) { - if (argc == 2 && (argv[1] == "-h" || argv[1] == "--help")) { - cerr << "usage: " << argv[0] << " " << endl - << "count the number of alternate alleles in heterozygous genotypes in all records in the vcf file" << endl - << "outputs a count for each individual in the file" << endl; + if (argc == 2) { + string h_flag = argv[1]; + if (h_flag == "-h" || h_flag == "--help") { + cerr << "usage: " << argv[0] << " " << endl << endl + << "Calculate the heterozygosity rate: " << endl + << "count the number of alternate alleles in heterozygous genotypes in all records in the vcf file" << endl + << "outputs a count for each individual in the file" << endl; + cerr << endl << "Type: metrics" << endl << endl; return 1; } + } string inputFilename; @@ -69,4 +83,3 @@ return 0; } - diff -Nru libvcflib-1.0.1+dfsg/src/vcfhethomratio.cpp libvcflib-1.0.2+dfsg/src/vcfhethomratio.cpp --- libvcflib-1.0.1+dfsg/src/vcfhethomratio.cpp 2019-10-01 07:06:02.000000000 +0000 +++ libvcflib-1.0.2+dfsg/src/vcfhethomratio.cpp 2021-01-28 07:04:12.000000000 +0000 @@ -1,3 +1,12 @@ +/* + vcflib C++ library for parsing and manipulating VCF files + + Copyright © 2010-2020 Erik Garrison + Copyright © 2020 Pjotr Prins + + This software is published under the MIT License. See the LICENSE file. +*/ + #include "Variant.h" #include "split.h" #include @@ -9,12 +18,15 @@ int main(int argc, char** argv) { - if (argc != 2) { - cerr << "usage: " << argv[0] << " " << endl - << "outputs the het/hom ratio for each individual in the file" << endl; - return 1; + if (argc == 2) { + string h_flag = argv[1]; + if (h_flag == "-h" || h_flag == "--help") { + cerr << "usage: " << argv[0] << " " << endl << endl + << "Generates the het/hom ratio for each individual in the file" << endl; + cerr << endl << "Type: metrics" << endl << endl; + return 1; } - + } string filename = argv[1]; VariantCallFile variantFile; @@ -63,4 +75,3 @@ return 0; } - diff -Nru libvcflib-1.0.1+dfsg/src/vcfindex.cpp libvcflib-1.0.2+dfsg/src/vcfindex.cpp --- libvcflib-1.0.1+dfsg/src/vcfindex.cpp 2019-10-01 07:06:02.000000000 +0000 +++ libvcflib-1.0.2+dfsg/src/vcfindex.cpp 2021-01-28 07:04:12.000000000 +0000 @@ -1,3 +1,12 @@ +/* + vcflib C++ library for parsing and manipulating VCF files + + Copyright © 2010-2020 Erik Garrison + Copyright © 2020 Pjotr Prins + + This software is published under the MIT License. See the LICENSE file. +*/ + #include "Variant.h" #include "convert.h" #include @@ -6,6 +15,20 @@ using namespace vcflib; int main(int argc, char** argv) { +if (argc == 2) { + string h_flag = argv[1]; + if (h_flag == "-h" || h_flag == "--help") { + cerr << R"( +Adds an index number to the INFO field (id=position) + +Usage: vcfindex + +Type: transformation + + )"; + exit(1); + } + } VariantCallFile variantFile; @@ -39,4 +62,3 @@ return 0; } - diff -Nru libvcflib-1.0.1+dfsg/src/vcfinfo2qual.cpp libvcflib-1.0.2+dfsg/src/vcfinfo2qual.cpp --- libvcflib-1.0.1+dfsg/src/vcfinfo2qual.cpp 2019-10-01 07:06:02.000000000 +0000 +++ libvcflib-1.0.2+dfsg/src/vcfinfo2qual.cpp 2021-01-28 07:04:12.000000000 +0000 @@ -1,3 +1,12 @@ +/* + vcflib C++ library for parsing and manipulating VCF files + + Copyright © 2010-2020 Erik Garrison + Copyright © 2020 Pjotr Prins + + This software is published under the MIT License. See the LICENSE file. +*/ + #include "Variant.h" using namespace std; @@ -8,10 +17,12 @@ VariantCallFile variantFile; if (argc == 1) { - cerr << "usage: " << argv[0] << " [key] [vcf_file]" << endl - << "Sets QUAL from info field tag keyed by [key]." << endl - << "The VCF file may be omitted and read from stdin." << endl - << "The average of the field is used if it contains multiple values." << endl; + cerr << "usage: " << argv[0] << " [key] [vcf_file]" << endl << endl + << "Sets QUAL from info field tag keyed by [key]." << endl + << "The VCF file may be omitted and read from stdin." << endl + << "The average of the field is used if it contains multiple values." << endl; + cerr << endl << "Type: transformation" << endl << endl; + return 1; } @@ -47,4 +58,3 @@ return 0; } - diff -Nru libvcflib-1.0.1+dfsg/src/vcfinfosummarize.cpp libvcflib-1.0.2+dfsg/src/vcfinfosummarize.cpp --- libvcflib-1.0.1+dfsg/src/vcfinfosummarize.cpp 2019-10-01 07:06:02.000000000 +0000 +++ libvcflib-1.0.2+dfsg/src/vcfinfosummarize.cpp 2021-01-28 07:04:12.000000000 +0000 @@ -1,3 +1,12 @@ +/* + vcflib C++ library for parsing and manipulating VCF files + + Copyright © 2010-2020 Erik Garrison + Copyright © 2020 Pjotr Prins + + This software is published under the MIT License. See the LICENSE file. +*/ + #include "Variant.h" #include "split.h" #include "Fasta.h" @@ -12,7 +21,7 @@ void printSummary(char** argv) { cerr << "usage: " << argv[0] << " [options] " << endl << endl - << "options:" << endl + << "options:" << endl << " -f, --field Summarize this field in the INFO column" << endl << " -i, --info Store the computed statistic in this info field" << endl << " -a, --average Take the mean for field (default)" << endl @@ -23,8 +32,9 @@ << " -v, --version Print version" << endl << endl << "Take annotations given in the per-sample fields and add the mean, median, min, or max" << endl - << "to the site-level INFO." << endl + << "to the site-level INFO." << endl; + cerr << endl << "Type: transformation" << endl << endl; exit(0); } @@ -48,7 +58,7 @@ int c; string sitewideField; string infoField; - StatType statType = MEAN; + StatType statType = MEAN; if (argc == 1) printSummary(argv); @@ -76,7 +86,7 @@ /* Detect the end of the options. */ if (c == -1) break; - + switch (c) { case 0: @@ -88,7 +98,7 @@ printf (" with arg %s", optarg); printf ("\n"); break; - + case 'v': { printBasicVersion(); @@ -134,7 +144,7 @@ printSummary(argv); exit(1); break; - + default: abort (); } @@ -225,4 +235,3 @@ return 0; } - diff -Nru libvcflib-1.0.1+dfsg/src/vcfintersect.cpp libvcflib-1.0.2+dfsg/src/vcfintersect.cpp --- libvcflib-1.0.1+dfsg/src/vcfintersect.cpp 2019-10-01 07:06:02.000000000 +0000 +++ libvcflib-1.0.2+dfsg/src/vcfintersect.cpp 2021-01-28 07:04:12.000000000 +0000 @@ -1,3 +1,12 @@ +/* + vcflib C++ library for parsing and manipulating VCF files + + Copyright © 2010-2020 Erik Garrison + Copyright © 2020 Pjotr Prins + + This software is published under the MIT License. See the LICENSE file. +*/ + #include "Variant.h" #include "BedReader.h" #include "IntervalTree.h" @@ -10,11 +19,11 @@ using namespace std; using namespace vcflib; - void printSummary(char** argv) { - cerr << "usage: " << argv[0] << " [options] []" << endl + cerr << "vcflib " << VCFLIB_VERSION << " set analysis" << endl << endl + << "usage: vcfintersect [options] []" << endl << endl - << "options:" << endl + << "options:" << endl << " -b, --bed FILE use intervals provided by this BED file" << endl << " -R, --region REGION use 1-based tabix-style region (e.g. chrZ:10-20), multiples allowed" << endl << " -S, --start-only don't use the reference length information in the record to determine" << endl @@ -36,8 +45,9 @@ << endl << "For bed-vcf intersection, alleles which fall into the targets are retained." << endl << endl - << "For vcf-vcf intersection and union, unify on equivalent alleles within window-size bp" << endl - << "as determined by haplotype comparison alleles." << endl; + << "Haplotype aware intersection, union and complement. Use for intersection and union of VCF files: unify on equivalent alleles within window-size bp" << endl + << "as determined by haplotype comparison alleles." << endl << endl + << "type: transformation" << endl; //<< "Intersect the records in the VCF file with targets provided in a BED file." << endl //<< "Intersections are done on the reference sequences in the VCF file." << endl //<< "If no VCF filename is specified on the command line (last argument) the VCF" << endl @@ -208,7 +218,7 @@ if (!bedFileName.empty()) { usingBED = true; } - + if (usingBED || !regions.empty()) { variantFile.parseSamples = false; } @@ -262,7 +272,7 @@ VariantFieldType mergeFromType = f->second; stringstream s; s << mergeFromType; - + variantFile.addHeaderLine("##INFO="); } @@ -570,4 +580,3 @@ return 0; } - diff -Nru libvcflib-1.0.1+dfsg/src/vcfkeepgeno.cpp libvcflib-1.0.2+dfsg/src/vcfkeepgeno.cpp --- libvcflib-1.0.1+dfsg/src/vcfkeepgeno.cpp 2019-10-01 07:06:02.000000000 +0000 +++ libvcflib-1.0.2+dfsg/src/vcfkeepgeno.cpp 2021-01-28 07:04:12.000000000 +0000 @@ -1,3 +1,12 @@ +/* + vcflib C++ library for parsing and manipulating VCF files + + Copyright © 2010-2020 Erik Garrison + Copyright © 2020 Pjotr Prins + + This software is published under the MIT License. See the LICENSE file. +*/ + #include "Variant.h" #include "split.h" #include @@ -10,13 +19,17 @@ int main(int argc, char** argv) { - if (argc < 3) { - cerr << "usage: " << argv[0] << " [FIELD1] [FIELD2] ..." << endl - << "outputs each record in the vcf file, removing FORMAT fields not listed " - << "on the command line from sample specifications in the output" - << endl; - return 1; + if (argc == 2) { + string h_flag = argv[1]; + if (h_flag == "-h" || h_flag == "--help") { + cerr << "usage: " << argv[0] << " [FIELD1] [FIELD2] ..." << endl << endl + << "Reduce file size by removing FORMAT fields not listed " + << "on the command line from sample specifications in the output" + << endl; + cerr << endl << "Type: transformation" << endl << endl; + return 1; } + } string filename = argv[1]; @@ -49,7 +62,7 @@ // write the header cout << variantFile.header << endl; - + // print the records, filtering is done via the setting of varA's output sample names while (variantFile.getNextVariant(var)) { var.format = newFormat; @@ -59,4 +72,3 @@ return 0; } - diff -Nru libvcflib-1.0.1+dfsg/src/vcfkeepinfo.cpp libvcflib-1.0.2+dfsg/src/vcfkeepinfo.cpp --- libvcflib-1.0.1+dfsg/src/vcfkeepinfo.cpp 2019-10-01 07:06:02.000000000 +0000 +++ libvcflib-1.0.2+dfsg/src/vcfkeepinfo.cpp 2021-01-28 07:04:12.000000000 +0000 @@ -1,3 +1,12 @@ +/* + vcflib C++ library for parsing and manipulating VCF files + + Copyright © 2010-2020 Erik Garrison + Copyright © 2020 Pjotr Prins + + This software is published under the MIT License. See the LICENSE file. +*/ + #include "Variant.h" #include "split.h" #include @@ -10,8 +19,9 @@ int main(int argc, char** argv) { if (argc < 3) { - cerr << "usage: " << argv[0] << " [FIELD1] [FIELD2] ..." << endl - << "outputs each record in the vcf file, removing INFO fields not listed on the command line" << endl; + cerr << "usage: " << argv[0] << " [FIELD1] [FIELD2] ..." << endl << endl + << "To decrease file size remove INFO fields not listed on the command line" << endl; + cerr << endl << "Type: transformation" << endl << endl; return 1; } @@ -46,17 +56,17 @@ // write the header cout << variantFile.header << endl; - + // print the records, filtering is done via the setting of varA's output sample names while (variantFile.getNextVariant(var)) { for (map >::iterator i = var.info.begin(); i != var.info.end(); ++i) { if (!fieldsToKeep.count(i->first)) { - var.info.erase(i->first); + i = var.info.erase(i); } } for (map::iterator i = var.infoFlags.begin(); i != var.infoFlags.end(); ++i) { if (!fieldsToKeep.count(i->first)) { - var.infoFlags.erase(i->first); + i = var.infoFlags.erase(i); } } cout << var << endl; @@ -65,4 +75,3 @@ return 0; } - diff -Nru libvcflib-1.0.1+dfsg/src/vcfkeepsamples.cpp libvcflib-1.0.2+dfsg/src/vcfkeepsamples.cpp --- libvcflib-1.0.1+dfsg/src/vcfkeepsamples.cpp 2019-10-01 07:06:02.000000000 +0000 +++ libvcflib-1.0.2+dfsg/src/vcfkeepsamples.cpp 2021-01-28 07:04:12.000000000 +0000 @@ -1,3 +1,12 @@ +/* + vcflib C++ library for parsing and manipulating VCF files + + Copyright © 2010-2020 Erik Garrison + Copyright © 2020 Pjotr Prins + + This software is published under the MIT License. See the LICENSE file. +*/ + #include "Variant.h" #include "split.h" #include @@ -9,8 +18,9 @@ int main(int argc, char** argv) { if (argc < 3) { - cerr << "usage: " << argv[0] << " [SAMPLE1] [SAMPLE2] ..." << endl + cerr << "usage: " << argv[0] << " [SAMPLE1] [SAMPLE2] ..." << endl << endl << "outputs each record in the vcf file, removing samples not listed on the command line" << endl; + cerr << endl << "Type: transformation" << endl << endl; return 1; } @@ -39,10 +49,10 @@ // and restrict the output sample names in the variant to those we are keeping var.setOutputSampleNames(samplesToKeep); - + // write the new header cout << variantFile.header << endl; - + // print the records, filtering is done via the setting of varA's output sample names while (variantFile.getNextVariant(var)) { cout << var << endl; @@ -51,4 +61,3 @@ return 0; } - diff -Nru libvcflib-1.0.1+dfsg/src/vcfld.cpp libvcflib-1.0.2+dfsg/src/vcfld.cpp --- libvcflib-1.0.1+dfsg/src/vcfld.cpp 2019-10-01 07:06:02.000000000 +0000 +++ libvcflib-1.0.2+dfsg/src/vcfld.cpp 2021-01-28 07:04:12.000000000 +0000 @@ -1,12 +1,22 @@ +/* + vcflib C++ library for parsing and manipulating VCF files + + Copyright © 2010-2020 Erik Garrison + Copyright © 2020 Pjotr Prins + + This software is published under the MIT License. See the LICENSE file. +*/ + #include "Variant.h" #include "split.h" #include "cdflib.hpp" #include "pdflib.hpp" #include "var.hpp" +#include "makeUnique.h" #include #include -#include +#include #include #include #include @@ -23,18 +33,20 @@ cerr << "INFO: help" << endl; cerr << "INFO: description:" << endl; - cerr << "INFO: LD --target 0,1,2,3,4,5,6,7 --background 11,12,13,16,17,19,22 --file my.vcf -e -d -r " << endl; + cerr << "usage: vcfld --target 0,1,2,3,4,5,6,7 --background 11,12,13,16,17,19,22 --file my.vcf -e -d -r " << endl; cerr << endl; - - cerr << "INFO: required: t,target -- argument: a zero base comma separated list of target individuals corrisponding to VCF columns " << endl; - cerr << "INFO: required: b,background -- argument: a zero base comma separated list of background individuals corrisponding to VCF columns " << endl; + cerr << "Compute LD" << endl << endl; + cerr << "INFO: required: t,target -- argument: a zero base comma separated list of target individuals corresponding to VCF columns " << endl; + cerr << "INFO: required: b,background -- argument: a zero base comma separated list of background individuals corresponding to VCF columns " << endl; cerr << "INFO: required: f,file -- argument: a properly formatted phased VCF file " << endl; cerr << "INFO: required: y,type -- argument: type of genotype likelihood: PL, GL or GP " << endl; cerr << "INFO: optional: w,window -- argument: window size to average LD; default is 1000 " << endl; cerr << "INFO: optional: e,external -- switch: population to calculate LD expectation; default is target " << endl; cerr << "INFO: optional: d,derived -- switch: which haplotype to count \"00\" vs \"11\"; default \"00\", " << endl; cerr << endl; - + + cerr << endl << "Type: transformation" << endl << endl; + printVersion(); exit(1); @@ -48,10 +60,10 @@ } void loadIndices(map & index, string set){ - + vector indviduals = split(set, ","); vector::iterator it = indviduals.begin(); - + for(; it != indviduals.end(); it++){ index[ atoi( (*it).c_str() ) ] = 1; } @@ -65,33 +77,33 @@ double nLD = 0; for(int snpB = snpA; snpB < (snpA + 100); snpB++){ - + map targetHaplotypes; - + targetHaplotypes["11"] = 1; targetHaplotypes["01"] = 1; targetHaplotypes["10"] = 1; targetHaplotypes["00"] = 1; - + for(int targetIndex = 0; targetIndex < target.size(); targetIndex++ ){ - + string haplotypeA; string haplotypeB; - + haplotypeA += haplotypes[target[targetIndex]][0].substr(snpA, 1) += haplotypes[target[targetIndex]][0].substr(snpB, 1); - haplotypeB += haplotypes[target[targetIndex]][1].substr(snpA, 1) += haplotypes[target[targetIndex]][1].substr(snpB, 1); - + haplotypeB += haplotypes[target[targetIndex]][1].substr(snpA, 1) += haplotypes[target[targetIndex]][1].substr(snpB, 1); + targetHaplotypes[haplotypeA]++; targetHaplotypes[haplotypeB]++; - + } - + double sa = tafs[snpA]; double sb = tafs[snpA]; string hapID = "00"; - + if(external == 1){ sa = bafs[snpA]; sb = bafs[snpB]; @@ -104,17 +116,17 @@ sb = 1 - sb; } - double observation = targetHaplotypes[hapID] / ( target.size()*2 ) ; + double observation = targetHaplotypes[hapID] / ( target.size()*2 ) ; double expectation = sa*sb; - double d = observation - expectation; + double d = observation - expectation; //double r = d / sqrt((1 - afs[snpA])*(1-afs[snpB])*afs[snpA]*afs[snpB]); - + nLD += 1; sumLD += d; - + } - + cout << seqid << "\t" << pos[snpA] << "\t" << tafs[snpA] << "\t" << sumLD << "\t" << nLD << endl; } @@ -122,7 +134,7 @@ } void loadPhased(string **haplotypes, genotype * pop, int ntarget){ - + int indIndex = 0; for(vector::iterator ind = pop->gts.begin(); ind != pop->gts.end(); ind++){ @@ -146,27 +158,27 @@ // set region to scaffold - string region = "NA"; + string region = "NA"; - // using vcflib; thanks to Erik Garrison + // using vcflib; thanks to Erik Garrison VariantCallFile variantFile; - // zero based index for the target and background indivudals - + // zero based index for the target and background indivudals + map it, ib; - - // deltaaf is the difference of allele frequency we bother to look at + + // deltaaf is the difference of allele frequency we bother to look at // ancestral state is set to zero by default int counts = 0; - - // phased + + // phased int phased = 0; - + // use the background allele frequency int external = 0; @@ -179,7 +191,7 @@ string type = "NA"; - const struct option longopts[] = + const struct option longopts[] = { {"version" , 0, 0, 'v'}, {"help" , 0, 0, 'h'}, @@ -200,7 +212,7 @@ while(iarg != -1) { iarg = getopt_long(argc, argv, "w:y:r:t:b:f:edhv", longopts, &findex); - + switch (iarg) { case 'h': @@ -239,7 +251,7 @@ case 'r': { cerr << "INFO: set seqid region to : " << optarg << endl; - region = optarg; + region = optarg; break; } case 'e': @@ -254,7 +266,7 @@ cerr << "INFO: count haplotypes \"11\" rather than \"00\"" << endl; break; } - case 'w': + case 'w': { string win = optarg; windowSize = atol( win.c_str() ); @@ -270,7 +282,7 @@ okayGenotypeLikelihoods["GL"] = 1; okayGenotypeLikelihoods["GP"] = 1; okayGenotypeLikelihoods["GT"] = 1; - + if(type == "NA"){ cerr << "FATAL: failed to specify genotype likelihood format : PL or GL" << endl; @@ -291,18 +303,18 @@ variantFile.open(filename); - + if(region != "NA"){ if(! variantFile.setRegion(region)){ cerr <<"FATAL: unable to set region" << endl; return 1; } } - + if (!variantFile.is_open()) { return 1; } - + Variant var(variantFile); vector samples = variantFile.sampleNames; @@ -313,7 +325,7 @@ int index, indexi = 0; for(vector::iterator samp = samples.begin(); samp != samples.end(); samp++){ - + if(it.find(index) != it.end() ){ target_h.push_back(indexi); indexi++; @@ -324,7 +336,7 @@ } index++; } - + vector positions; vector targetAFS; vector backgroundAFS; @@ -333,9 +345,9 @@ for (int i = 0; i < nsamples; i++) { haplotypes[i] = new string[2]; } - + string currentSeqid = "NA"; - + while (variantFile.getNextVariant(var)) { if(!var.isPhased()){ @@ -358,53 +370,55 @@ targetAFS.clear(); backgroundAFS.clear(); } - + vector < map< string, vector > > target, background, total; - + int sindex = 0; - + for(int nsamp = 0; nsamp < nsamples; nsamp++){ map > sample = var.samples[ samples[nsamp]]; if(it.find(sindex) != it.end() ){ target.push_back(sample); - total.push_back(sample); + total.push_back(sample); } if(ib.find(sindex) != ib.end()){ background.push_back(sample); - total.push_back(sample); - } + total.push_back(sample); + } sindex += 1; } - - genotype * populationTarget ; - genotype * populationBackground; - genotype * populationTotal ; - + + using Detail::makeUnique; + + unique_ptr populationTarget ; + unique_ptr populationBackground; + unique_ptr populationTotal ; + if(type == "PL"){ - populationTarget = new pl(); - populationBackground = new pl(); - populationTotal = new pl(); + populationTarget = makeUnique(); + populationBackground = makeUnique(); + populationTotal = makeUnique(); } if(type == "GL"){ - populationTarget = new gl(); - populationBackground = new gl(); - populationTotal = new gl(); + populationTarget = makeUnique(); + populationBackground = makeUnique(); + populationTotal = makeUnique(); } if(type == "GP"){ - populationTarget = new gp(); - populationBackground = new gp(); - populationTotal = new gp(); + populationTarget = makeUnique(); + populationBackground = makeUnique(); + populationTotal = makeUnique(); } - + populationTarget->loadPop(target, var.sequenceName, var.position); - + populationBackground->loadPop(background, var.sequenceName, var.position); - + populationTotal->loadPop(total, var.sequenceName, var.position); - - + + if(populationTotal->af > 0.95 || populationTotal->af < 0.05){ continue; } @@ -412,11 +426,11 @@ targetAFS.push_back(populationTarget->af); backgroundAFS.push_back(populationBackground->af); positions.push_back(var.position); - loadPhased(haplotypes, populationTotal, nsamples); - + loadPhased(haplotypes, populationTotal.get(), nsamples); + } calc(haplotypes, nsamples, positions, targetAFS, backgroundAFS, external, derived, windowSize, target_h, background_h, currentSeqid); - - return 0; + + return 0; } diff -Nru libvcflib-1.0.1+dfsg/src/vcfleftalign.cpp libvcflib-1.0.2+dfsg/src/vcfleftalign.cpp --- libvcflib-1.0.1+dfsg/src/vcfleftalign.cpp 2019-10-01 07:06:02.000000000 +0000 +++ libvcflib-1.0.2+dfsg/src/vcfleftalign.cpp 2021-01-28 07:04:12.000000000 +0000 @@ -1,3 +1,12 @@ +/* + vcflib C++ library for parsing and manipulating VCF files + + Copyright © 2010-2020 Erik Garrison + Copyright © 2020 Pjotr Prins + + This software is published under the MIT License. See the LICENSE file. +*/ + #include "Variant.h" #include "convert.h" #include "join.h" @@ -140,7 +149,7 @@ } void getAlignment(Variant& var, FastaReference& reference, string& ref, vector& alignments, int window) { - + // default alignment params float matchScore = 10.0f; float mismatchScore = -9.0f; @@ -341,18 +350,18 @@ if (previous->sequence.at(0) == seq.at(0) && FBhomopolymer(seq) && FBhomopolymer(readseq)) { - VCFLEFTALIGN_DEBUG("moving " << *previous << " right to " + VCFLEFTALIGN_DEBUG("moving " << *previous << " right to " << (indel.insertion ? indel.position : indel.position - previous->length) << endl); previous->position = indel.insertion ? indel.position : indel.position - previous->length; } - } + } else { int pos = previous->position; while (pos < (int) referenceSequence.length() && ((previous->insertion && pos + previous->length <= indel.position) || (!previous->insertion && pos + previous->length < indel.position)) - && previous->sequence + && previous->sequence == referenceSequence.substr(pos + previous->length, previous->length)) { pos += previous->length; } @@ -371,7 +380,7 @@ } // for each indel - // if ( we're matched up to the previous insertion (or deletion) + // if ( we're matched up to the previous insertion (or deletion) // and it's also an insertion or deletion ) // merge the indels // @@ -411,7 +420,7 @@ last = *id; lastend = last.insertion ? last.position : (last.position + last.length); } - + if (lastend < alignedLength) { newCigar.push_back(make_pair(alignedLength - lastend, "M")); } @@ -456,7 +465,7 @@ } else { bool result = true; - while ((result = leftAlign(alternateSequence, cigar, referenceSequence, debug)) && --maxiterations > 0) { + while ((result = leftAlign(alternateSequence, cigar, referenceSequence, debug)) && --maxiterations > 0) { } if (maxiterations <= 0) { @@ -471,15 +480,32 @@ void printSummary(char** argv) { - cerr << "usage: " << argv[0] << " [options] [file]" << endl - << endl - << "options:" << endl - << " -r, --reference FILE Use this reference as a basis for realignment." << endl - << " -w, --window N Use a window of this many bp when left aligning (150)." << endl - << endl - << "Left-aligns variants in the specified input file or stdin. Window size is determined" << endl - << "dynamically according to the entropy of the regions flanking the indel. These must have" << endl - << "entropy > 1 bit/bp, or be shorter than ~5kb." << endl; + cerr << R"( + +Left-align indels and complex variants in the input using a pairwise +ref/alt alignment followed by a heuristic, iterative left realignment +process that shifts indel representations to their absolute leftmost +(5') extent. + +This is the same procedure used in the internal left alignment in +freebayes, and can be used when preparing VCF files for input to +freebayes to decrease positional representation differences between +the input alleles and left-realigned alignments. + +usage: vcfleftalign [options] [file] + +options: + + -r, --reference FILE Use this reference as a basis for realignment. + -w, --window N Use a window of this many bp when left aligning (150). + +Left-aligns variants in the specified input file or stdin. Window +size is determined dynamically according to the entropy of the regions +flanking the indel. These must have entropy > 1 bit/bp, or be shorter +than ~5kb. + +)"; + cerr << endl << "Type: transformation" << endl << endl; exit(0); } @@ -770,7 +796,7 @@ // then handle genotypes; determine the mapping between alleleic primitives and convert to phased haplotypes // this means taking all the parsedAlternates and, for each one, generating a pattern of allele indecies corresponding to it - + //for (vector::iterator v = variants.begin(); v != variants.end(); ++v) { } @@ -778,4 +804,3 @@ return 0; } - diff -Nru libvcflib-1.0.1+dfsg/src/vcflength.cpp libvcflib-1.0.2+dfsg/src/vcflength.cpp --- libvcflib-1.0.1+dfsg/src/vcflength.cpp 2019-10-01 07:06:02.000000000 +0000 +++ libvcflib-1.0.2+dfsg/src/vcflength.cpp 2021-01-28 07:04:12.000000000 +0000 @@ -1,3 +1,12 @@ +/* + vcflib C++ library for parsing and manipulating VCF files + + Copyright © 2010-2020 Erik Garrison + Copyright © 2020 Pjotr Prins + + This software is published under the MIT License. See the LICENSE file. +*/ + #include "Variant.h" #include "convert.h" #include @@ -6,6 +15,37 @@ using namespace vcflib; int main(int argc, char** argv) { +if (argc == 2) { + string h_flag = argv[1]; + if (h_flag == "-h" || h_flag == "--help") { + cerr << R"( +Add length info field + +Usage: vcflength + +Example: + +vcflength samples/sample.vcf +##fileformat=VCFv4.0 +(...) +#CHROM POS ID REF ALT QUAL FILTER INFO FORMAT NA00001 NA00002 NA00003 +19 111 . A C 9.6 . length=0;length.alt=1;length.ref=1 GT:HQ 0|0:10,10 0|0:10,10 0/1:3,3 +19 112 . A G 10 . length=0;length.alt=1;length.ref=1 GT:HQ 0|0:10,10 0|0:10,10 0/1:3,3 +20 14370 rs6054257 G A 29 PASS AF=0.5;DP=14;NS=3;length=0;length.alt=1;length.ref=1;DB;H2 GT:GQ:DP:HQ 0|0:48:1:51,51 1|0:48:8:51,51 1/1:43:5:.,. +20 17330 . T A 3 q10 AF=0.017;DP=11;NS=3;length=0;length.alt=1;length.ref=1 GT:GQ:DP:HQ 0|0:49:3:58,50 0|1:3:5:65,3 0/0:41:3:.,. +20 1110696 rs6040355 A G,T 67 PASS AA=T;AF=0.333,0.667;DP=10;NS=2;length=0,0;length.alt=1,1;length.ref=1;DB GT:GQ:DP:HQ 1|2:21:6:23,27 2|1:2:0:18,2 2/2:35:4:.,. +20 1230237 . T . 47 PASS AA=T;DP=13;NS=3;length=0;length.alt=1;length.ref=1GT:GQ:DP:HQ 0|0:54:.:56,60 0|0:48:4:51,51 0/0:61:2:.,. +20 1234567 microsat1 G GA,GAC 50 PASS AA=G;AC=3,1;AN=6;DP=9;NS=3;length=1,2;length.alt=2,3;length.ref=1 GT:GQ:DP 0/1:.:4 0/2:17:2 1/1:40:3 +20 1235237 . T . 0 . length=0;length.alt=1;length.ref=1 GT 0/00|0 ./. +X 10 rsTest AC A,ATG 10 PASS length=-1,1;length.alt=1,3;length.ref=2 GT 0 0/1 0|2 + +Type: transformation + + )"; + exit(1); + } + } + VariantCallFile variantFile; @@ -46,4 +86,3 @@ return 0; } - diff -Nru libvcflib-1.0.1+dfsg/src/vcfnormalizesvs.cpp libvcflib-1.0.2+dfsg/src/vcfnormalizesvs.cpp --- libvcflib-1.0.1+dfsg/src/vcfnormalizesvs.cpp 2019-10-01 07:06:02.000000000 +0000 +++ libvcflib-1.0.2+dfsg/src/vcfnormalizesvs.cpp 2021-01-28 07:04:12.000000000 +0000 @@ -1,3 +1,12 @@ +/* + vcflib C++ library for parsing and manipulating VCF files + + Copyright © 2010-2020 Erik Garrison + Copyright © 2020 Pjotr Prins + + This software is published under the MIT License. See the LICENSE file. +*/ + #include "Variant.h" #include #include "Fasta.h" diff -Nru libvcflib-1.0.1+dfsg/src/vcfnull2ref.cpp libvcflib-1.0.2+dfsg/src/vcfnull2ref.cpp --- libvcflib-1.0.1+dfsg/src/vcfnull2ref.cpp 2019-10-01 07:06:02.000000000 +0000 +++ libvcflib-1.0.2+dfsg/src/vcfnull2ref.cpp 2021-01-28 07:04:12.000000000 +0000 @@ -1,3 +1,12 @@ +/* + vcflib C++ library for parsing and manipulating VCF files + + Copyright © 2010-2020 Erik Garrison + Copyright © 2020 Pjotr Prins + + This software is published under the MIT License. See the LICENSE file. +*/ + #include "Variant.h" #include "split.h" #include diff -Nru libvcflib-1.0.1+dfsg/src/vcfnumalt.cpp libvcflib-1.0.2+dfsg/src/vcfnumalt.cpp --- libvcflib-1.0.1+dfsg/src/vcfnumalt.cpp 2019-10-01 07:06:02.000000000 +0000 +++ libvcflib-1.0.2+dfsg/src/vcfnumalt.cpp 2021-01-28 07:04:12.000000000 +0000 @@ -1,3 +1,12 @@ +/* + vcflib C++ library for parsing and manipulating VCF files + + Copyright © 2010-2020 Erik Garrison + Copyright © 2020 Pjotr Prins + + This software is published under the MIT License. See the LICENSE file. +*/ + #include "Variant.h" #include "split.h" #include @@ -9,12 +18,15 @@ int main(int argc, char** argv) { - if (argc != 2) { - cerr << "usage: " << argv[0] << " " << endl - << "outputs a VCF stream where NUMALT has been generated for each record using sample genotypes" << endl; - return 1; + if (argc == 2) { + string h_flag = argv[1]; + if (h_flag == "-h" || h_flag == "--help") { + cerr << "usage: " << argv[0] << " " << endl << endl + << "outputs a VCF stream where NUMALT has been generated for each record using sample genotypes" << endl; + cerr << endl << "Type: transformation" << endl << endl; + return 1; } - + } string filename = argv[1]; VariantCallFile variantFile; @@ -39,7 +51,7 @@ // write the new header cout << variantFile.header << endl; - + // print the records, filtering is done via the setting of varA's output sample names while (variantFile.getNextVariant(var)) { stringstream na; @@ -52,4 +64,3 @@ return 0; } - diff -Nru libvcflib-1.0.1+dfsg/src/vcfoverlay.cpp libvcflib-1.0.2+dfsg/src/vcfoverlay.cpp --- libvcflib-1.0.1+dfsg/src/vcfoverlay.cpp 2019-10-01 07:06:02.000000000 +0000 +++ libvcflib-1.0.2+dfsg/src/vcfoverlay.cpp 2021-01-28 07:04:12.000000000 +0000 @@ -1,3 +1,12 @@ +/* + vcflib C++ library for parsing and manipulating VCF files + + Copyright © 2010-2020 Erik Garrison + Copyright © 2020 Pjotr Prins + + This software is published under the MIT License. See the LICENSE file. +*/ + #include "Variant.h" #include #include "gpatInfo.hpp" @@ -10,11 +19,12 @@ void printSummary(char** argv) { cerr << "usage: " << argv[0] << " [options] [ ...]" << endl << endl - << "options:" << endl + << "options:" << endl << " -h, --help this dialog" << endl << " -v, --version prints version" << endl << endl - << "Overlays records in the input vcf files in the order in which they appear." << endl; + << "Overlay records in the input vcf files with order as precedence." << endl; + cerr << endl << "Type: transformation" << endl << endl; exit(0); } @@ -32,10 +42,10 @@ {0, 0, 0, 0} }; int option_index = 0; - + c = getopt_long (argc, argv, "hv", long_options, &option_index); - + if (c == -1){ break; } @@ -49,7 +59,7 @@ { printBasicVersion(); exit(0); - } + } case '?': { printSummary(argv); @@ -93,7 +103,7 @@ cerr << "vcfoverlay encountered errors when opening " << inputFilename << endl; } } - + cout << variantFiles.begin()->second.first->header << endl; while (!linesByPrecedence.empty()) { @@ -107,7 +117,7 @@ cout << m->second.begin()->second << endl; } linesByPrecedence[lowestChrom].erase(lowestPosition); - + if (linesByPrecedence[lowestChrom].empty()) { linesByPrecedence.erase(lowestChrom); } @@ -117,4 +127,3 @@ return 0; } - diff -Nru libvcflib-1.0.1+dfsg/src/vcfparsealts.cpp libvcflib-1.0.2+dfsg/src/vcfparsealts.cpp --- libvcflib-1.0.1+dfsg/src/vcfparsealts.cpp 2019-10-01 07:06:02.000000000 +0000 +++ libvcflib-1.0.2+dfsg/src/vcfparsealts.cpp 2021-01-28 07:04:12.000000000 +0000 @@ -1,9 +1,61 @@ +/* + vcflib C++ library for parsing and manipulating VCF files + + Copyright © 2010-2020 Erik Garrison + Copyright © 2020 Pjotr Prins + + This software is published under the MIT License. See the LICENSE file. +*/ + #include "Variant.h" using namespace std; using namespace vcflib; int main(int argc, char** argv) { + if (argc == 2) { + string h_flag = argv[1]; + if (h_flag == "-h" || h_flag == "--help") { + cerr << R"( +Alternate allele parsing method. This method uses pairwise +alignment of REF and ALTs to determine component allelic primitives +for each alternate allele. + +Usage: vcfparsealts + +Example: + +vcfparsealts samples/sample.vcf +##fileformat=VCFv4.0 +(...) +#CHROM POS ID REF ALT QUAL FILTER INFO FORMAT NA00001 NA00002 NA00003 +19 111 . A C 9.6 . . GT:HQ 0|0:10,10 0|0:10,10 0/1:3,3 + ( A :: 111 A -> A; ) ( C :: 111 A -> C; ) +19 112 . A G 10 . . GT:HQ 0|0:10,10 0|0:10,10 0/1:3,3 + ( A :: 112 A -> A; ) ( G :: 112 A -> G; ) +20 14370 rs6054257 G A 29 PASS AF=0.5;DP=14;NS=3;DB;H2 GT:GQ:DP:HQ 0|0:48:1:51,51 1|0:48:8:51,51 1/1:43:5:.,. + ( A :: 14370 G -> A; ) ( G :: 14370 G -> G; ) +20 17330 . T A 3 q10 AF=0.017;DP=11;NS=3 GT:GQ:DP:HQ 0|0:49:3:58,50 0|1:3:5:65,3 0/0:41:3:.,. + ( A :: 17330 T -> A; ) ( T :: 17330 T -> T; ) +20 1110696 rs6040355 A G,T 67 PASS AA=T;AF=0.333,0.667;DP=10;NS=2;DB GT:GQ:DP:HQ 1|2:21:6:23,27 2|1:2:0:18,2 2/2:35:4:.,. + ( A :: 1110696 A -> A; ) ( G :: 1110696 A -> G; ) ( T :: 1110696 A -> T; ) +20 1230237 . T . 47 PASS AA=T;DP=13;NS=3 GT:GQ:DP:HQ 0|0:54:.:56,60 0|0:48:4:51,51 0/0:61:2:.,. + ( . :: 1230237 T -> .; ) ( T :: 1230237 T -> T; ) +20 1234567 microsat1 G GA,GAC 50 PASS AA=G;AC=3,1;AN=6;DP=9;NS=3 GT:GQ:DP 0/1:.:4 0/2:17:2 1/1:40:3 + ( G :: 1234567 G -> G; ) ( GA :: 1234567 G -> G; 1234568 -> A; ) ( GAC :: 1234567 G -> G; 1234568 -> AC; ) +20 1235237 . T . 0 . . GT 0/0 0|0 ./. + ( . :: 1235237 T -> .; ) ( T :: 1235237 T -> T; ) +X 10 rsTest AC A,ATG 10 PASS . GT 0 0/1 0|2 + ( A :: 10 A -> A; 11 C -> ; ) ( AC :: 10 AC -> AC; ) ( ATG :: 10 A -> A; 11 -> T; 11 C -> G; ) + + +Type: statistics + + )"; + exit(1); + } + } + VariantCallFile variantFile; @@ -39,4 +91,3 @@ return 0; } - diff -Nru libvcflib-1.0.1+dfsg/src/vcfprimers.cpp libvcflib-1.0.2+dfsg/src/vcfprimers.cpp --- libvcflib-1.0.1+dfsg/src/vcfprimers.cpp 2019-10-01 07:06:02.000000000 +0000 +++ libvcflib-1.0.2+dfsg/src/vcfprimers.cpp 2021-01-28 07:04:12.000000000 +0000 @@ -1,3 +1,12 @@ +/* + vcflib C++ library for parsing and manipulating VCF files + + Copyright © 2010-2020 Erik Garrison + Copyright © 2020 Pjotr Prins + + This software is published under the MIT License. See the LICENSE file. +*/ + #include "Variant.h" #include "split.h" #include "Fasta.h" @@ -9,12 +18,13 @@ void printSummary(char** argv) { cerr << "usage: " << argv[0] << " [options] " << endl << endl - << "options:" << endl + << "For each VCF record, extract the flanking sequences, and write them to stdout as FASTA" << endl + << "records suitable for alignment." << endl + << "options:" << endl << " -f, --fasta-reference FASTA reference file to use to obtain primer sequences" << endl << " -l, --primer-length The length of the primer sequences on each side of the variant" << endl << endl - << "For each VCF record, extract the flanking sequences, and write them to stdout as FASTA" << endl - << "records suitable for alignment. This tool is intended for use in designing validation" << endl + << "This tool is intended for use in designing validation" << endl << "experiments. Primers extracted which would flank all of the alleles at multi-allelic" << endl << "sites. The name of the FASTA \"reads\" indicates the VCF record which they apply to." << endl << "The form is >CHROM_POS_LEFT for the 3' primer and >CHROM_POS_RIGHT for the 5' primer," << endl @@ -25,6 +35,7 @@ << ">20_233255_RIGHT" << endl << "ACTCAGTTGATTCCATACCTTTGCCATCATGAATCATGTTGTAATAAACA" << endl << endl; + cerr << endl << "Type: transformation" << endl << endl; exit(0); } @@ -58,7 +69,7 @@ /* Detect the end of the options. */ if (c == -1) break; - + switch (c) { case 0: @@ -78,7 +89,7 @@ case 'l': primerLength = atoi(optarg); break; - + case 'h': printSummary(argv); exit(0); @@ -89,7 +100,7 @@ printSummary(argv); exit(1); break; - + default: abort (); } @@ -137,4 +148,3 @@ return 0; } - diff -Nru libvcflib-1.0.1+dfsg/src/vcfqual2info.cpp libvcflib-1.0.2+dfsg/src/vcfqual2info.cpp --- libvcflib-1.0.1+dfsg/src/vcfqual2info.cpp 2019-10-01 07:06:02.000000000 +0000 +++ libvcflib-1.0.2+dfsg/src/vcfqual2info.cpp 2021-01-28 07:04:12.000000000 +0000 @@ -1,3 +1,12 @@ +/* + vcflib C++ library for parsing and manipulating VCF files + + Copyright © 2010-2020 Erik Garrison + Copyright © 2020 Pjotr Prins + + This software is published under the MIT License. See the LICENSE file. +*/ + #include "Variant.h" using namespace std; @@ -8,9 +17,11 @@ VariantCallFile variantFile; if (argc == 1) { - cerr << "usage: " << argv[0] << " [key] [vcf_file]" << endl + cerr << "usage: " << argv[0] << " [key] [vcf_file]" << endl << endl << "Puts QUAL into an info field tag keyed by [key]." << endl - << "The VCF file may be omitted and read from stdin." << endl; + << endl; + cerr << endl << "Type: transformation" << endl << endl; + return 1; } @@ -41,4 +52,3 @@ return 0; } - diff -Nru libvcflib-1.0.1+dfsg/src/vcfrandom.cpp libvcflib-1.0.2+dfsg/src/vcfrandom.cpp --- libvcflib-1.0.1+dfsg/src/vcfrandom.cpp 2019-10-01 07:06:02.000000000 +0000 +++ libvcflib-1.0.2+dfsg/src/vcfrandom.cpp 2021-01-28 07:04:12.000000000 +0000 @@ -1,3 +1,12 @@ +/* + vcflib C++ library for parsing and manipulating VCF files + + Copyright © 2010-2020 Erik Garrison + Copyright © 2020 Pjotr Prins + + This software is published under the MIT License. See the LICENSE file. +*/ + #include #include #include @@ -9,6 +18,51 @@ int main(int argc, char** argv) { + if (argc == 2) { + string h_flag = argv[1]; + + if (argc == 2 && (h_flag == "-h" || h_flag == "--help")) { + cerr << R"( +Generate a random VCF file + +Usage: vcfrandom + +Example: + + vcfrandom + +##fileformat=VCFv4.0 +##source=vcfrandom +##reference=/d2/data/references/build_37/human_reference_v37.fa +##phasing=none +##INFO= +##INFO= +##INFO= +##INFO= +##INFO= +##FORMAT= +##FORMAT= +##FORMAT= +#CHROM POS ID REF ALT QUAL FILTER INFO FORMAT bill +one 1 . G G,A 100 . DP=83 GT:DP 0/1:1 +one 2 . G G,A 100 . DP=3 GT:DP 0/1:49 +one 3 . G C,T 100 . DP=5 GT:DP 0/1:12 +one 4 . C G,T 100 . DP=51 GT:DP 0/1:60 +one 5 . A T,A 100 . DP=31 GT:DP 0/1:89 +one 6 . T T,A 100 . DP=56 GT:DP 0/1:60 +one 7 . T A,C 100 . DP=78 GT:DP 0/1:75 +one 8 . T G,A 100 . DP=73 GT:DP 0/1:78 +one 9 . C C,G 100 . DP=42 GT:DP 0/1:67 + + +Type: statistics + + )"; + exit(1); + } + } + + VariantCallFile variantFile; stringstream headerss; diff -Nru libvcflib-1.0.1+dfsg/src/vcfrandomsample.cpp libvcflib-1.0.2+dfsg/src/vcfrandomsample.cpp --- libvcflib-1.0.1+dfsg/src/vcfrandomsample.cpp 2019-10-01 07:06:02.000000000 +0000 +++ libvcflib-1.0.2+dfsg/src/vcfrandomsample.cpp 2021-01-28 07:04:12.000000000 +0000 @@ -1,3 +1,12 @@ +/* + vcflib C++ library for parsing and manipulating VCF files + + Copyright © 2010-2020 Erik Garrison + Copyright © 2020 Pjotr Prins + + This software is published under the MIT License. See the LICENSE file. +*/ + #include "Variant.h" #include "BedReader.h" #include @@ -13,7 +22,7 @@ void printSummary(char** argv) { cerr << "usage: " << argv[0] << " [options] []" << endl << endl - << "options:" << endl + << "options:" << endl << " -r, --rate RATE base sampling probability per locus" << endl << " -s, --scale-by KEY scale sampling likelihood by this Float info field" << endl << " -p, --random-seed N use this random seed (by default read from /dev/random)" << endl @@ -22,6 +31,8 @@ << "Randomly sample sites from an input VCF file, which may be provided as stdin." << endl << "Scale the sampling probability by the field specified in KEY. This may be" << endl << "used to provide uniform sampling across allele frequencies, for instance." << endl; + cerr << endl << "Type: statistics" << endl << endl; + exit(0); } @@ -126,7 +137,7 @@ variantFile.addHeaderLine(liness.str()); cout << variantFile.header << endl; - + // check that we can use the scaling key if (!scaleByKey.empty()) { if (variantFile.infoTypes.find(scaleByKey) == variantFile.infoTypes.end()) { diff -Nru libvcflib-1.0.1+dfsg/src/vcfremap.cpp libvcflib-1.0.2+dfsg/src/vcfremap.cpp --- libvcflib-1.0.1+dfsg/src/vcfremap.cpp 2019-10-01 07:06:02.000000000 +0000 +++ libvcflib-1.0.2+dfsg/src/vcfremap.cpp 2021-01-28 07:04:12.000000000 +0000 @@ -1,3 +1,12 @@ +/* + vcflib C++ library for parsing and manipulating VCF files + + Copyright © 2010-2020 Erik Garrison + Copyright © 2020 Pjotr Prins + + This software is published under the MIT License. See the LICENSE file. +*/ + #include "Variant.h" #include "BedReader.h" #include "IntervalTree.h" @@ -14,7 +23,7 @@ void printSummary(char** argv) { cerr << "usage: " << argv[0] << " [options] []" << endl << endl - << "options:" << endl + << "options:" << endl << " -w, --ref-window-size N align using this many bases flanking each side of the reference allele" << endl << " -s, --alt-window-size N align using this many flanking bases from the reference around each alternate allele" << endl << " -r, --reference FILE FASTA reference file, required with -i and -u" << endl @@ -27,7 +36,9 @@ << " -a, --adjust-vcf TAG supply a new cigar as TAG in the output VCF" << endl << endl << "For each alternate allele, attempt to realign against the reference with lowered gap open penalty." << endl - << "If realignment is possible, adjust the cigar and reference/alternate alleles." << endl; + << "If realignment is possible, adjust the cigar and reference/alternate alleles." << endl + << "Observe how different alignment parameters, including context and entropy-dependent ones, influence variant classification and interpretation." << endl; + cerr << endl << "Type: transformation" << endl << endl; exit(0); } @@ -163,7 +174,7 @@ } else { freference.open(fastaFileName); } - + if (adjustVcf) { vector commandline; for (int i = 0; i < argc; ++i) @@ -189,7 +200,7 @@ // try to remap locally string reference = freference.getSubSequence(var.sequenceName, var.position - 1 - windowsize, windowsize * 2 + var.ref.size()); - + // passed to sw align unsigned int referencePos; string cigar; @@ -202,7 +213,7 @@ //cout << "REF:\t" << reference << endl; //cout << "ALT:\t" << string(windowsize - altwindowsize, ' ') << alternateQuery << endl; - + CSmithWatermanGotoh sw(matchScore, mismatchScore, gapOpenPenalty, gapExtendPenalty); if (useEntropy) sw.EnableEntropyGapPenalty(1); if (useRepeatGapExtendPenalty) sw.EnableRepeatGapExtensionPenalty(repeatGapExtendPenalty); @@ -315,7 +326,7 @@ subend = c->back().first; } } - + // adjust the cigars and get the new reference length int reflen = 0; for (vector > >::iterator c = cigars.begin(); c != cigars.end(); ++c) { @@ -347,4 +358,3 @@ return 0; } - diff -Nru libvcflib-1.0.1+dfsg/src/vcfremoveaberrantgenotypes.cpp libvcflib-1.0.2+dfsg/src/vcfremoveaberrantgenotypes.cpp --- libvcflib-1.0.1+dfsg/src/vcfremoveaberrantgenotypes.cpp 2019-10-01 07:06:02.000000000 +0000 +++ libvcflib-1.0.2+dfsg/src/vcfremoveaberrantgenotypes.cpp 2021-01-28 07:04:12.000000000 +0000 @@ -1,3 +1,12 @@ +/* + vcflib C++ library for parsing and manipulating VCF files + + Copyright © 2010-2020 Erik Garrison + Copyright © 2020 Pjotr Prins + + This software is published under the MIT License. See the LICENSE file. +*/ + #include "Variant.h" #include "split.h" #include @@ -33,12 +42,16 @@ int main(int argc, char** argv) { - if (argc != 2) { - cerr << "usage: " << argv[0] << " " << endl - << "strips samples which are homozygous but have observations implying heterozygosity" << endl; - return 1; + if (argc == 2) { + string h_flag = argv[1]; + if (h_flag == "-h" || h_flag == "--help") { + cerr << "usage: " << argv[0] << " " << endl << endl + << "strips samples which are homozygous but have observations implying heterozygosity." << endl + << "Remove samples for which the reported genotype (GT) and observation counts disagree (AO, RO)." << endl; + cerr << endl << "Type: transformation" << endl << endl; + return 1; } - + } string filename = argv[1]; VariantCallFile variantFile; @@ -62,7 +75,7 @@ // write the new header cout << variantFile.header << endl; - + // print the records, filtering is done via the setting of varA's output sample names while (variantFile.getNextVariant(var)) { stripAberrant(var); @@ -72,4 +85,3 @@ return 0; } - diff -Nru libvcflib-1.0.1+dfsg/src/vcfremovesamples.cpp libvcflib-1.0.2+dfsg/src/vcfremovesamples.cpp --- libvcflib-1.0.1+dfsg/src/vcfremovesamples.cpp 2019-10-01 07:06:02.000000000 +0000 +++ libvcflib-1.0.2+dfsg/src/vcfremovesamples.cpp 2021-01-28 07:04:12.000000000 +0000 @@ -1,3 +1,12 @@ +/* + vcflib C++ library for parsing and manipulating VCF files + + Copyright © 2010-2020 Erik Garrison + Copyright © 2020 Pjotr Prins + + This software is published under the MIT License. See the LICENSE file. +*/ + #include "Variant.h" #include "split.h" #include @@ -28,12 +37,15 @@ int main(int argc, char** argv) { - if (argc < 3) { - cerr << "usage: " << argv[0] << " [SAMPLE1] [SAMPLE2] ..." << endl - << "outputs each record in the vcf file, removing samples listed on the command line" << endl; + if (argc == 2) { + string h_flag = argv[1]; + if (h_flag == "-h" || h_flag == "--help") { + cerr << "usage: " << argv[0] << " [SAMPLE1] [SAMPLE2] ..." << endl << endl + << "outputs each record in the vcf file, removing samples listed on the command line" << endl; + cerr << endl << "Type: transformation" << endl << endl; return 1; } - + } string filename = argv[1]; vector samplesToRemove; @@ -61,10 +73,10 @@ // and restrict the output sample names in the variant to those we are keeping var.setOutputSampleNames(samplesToKeep); - + // write the new header cout << variantFile.header << endl; - + // print the records, filtering is done via the setting of varA's output sample names while (variantFile.getNextVariant(var)) { cout << var << endl; @@ -73,4 +85,3 @@ return 0; } - diff -Nru libvcflib-1.0.1+dfsg/src/vcfroc.cpp libvcflib-1.0.2+dfsg/src/vcfroc.cpp --- libvcflib-1.0.1+dfsg/src/vcfroc.cpp 2019-10-01 07:06:02.000000000 +0000 +++ libvcflib-1.0.2+dfsg/src/vcfroc.cpp 2021-01-28 07:04:12.000000000 +0000 @@ -1,3 +1,12 @@ +/* + vcflib C++ library for parsing and manipulating VCF files + + Copyright © 2010-2020 Erik Garrison + Copyright © 2020 Pjotr Prins + + This software is published under the MIT License. See the LICENSE file. +*/ + #include "Variant.h" #include "BedReader.h" #include "IntervalTree.h" @@ -14,7 +23,7 @@ void printSummary(char** argv) { cerr << "usage: " << argv[0] << " [options] []" << endl << endl - << "options:" << endl + << "options:" << endl << " -t, --truth-vcf FILE use this VCF as ground truth for ROC generation" << endl << " -w, --window-size N compare records up to this many bp away (default 30)" << endl << " -c, --complex directly compare complex alleles, don't parse into primitives" << endl @@ -22,6 +31,7 @@ << endl << "Generates a pseudo-ROC curve using sensitivity and specificity estimated against" << endl << "a putative truth set. Thresholding is provided by successive QUAL cutoffs." << endl; + cerr << endl << "Type: statistics" << endl << endl; exit(0); } @@ -38,7 +48,7 @@ Variant* v = &variants.back(); rawVariantIntervals[var.sequenceName].push_back(Interval(left, right, v)); } - + for (map > >::iterator j = rawVariantIntervals.begin(); j != rawVariantIntervals.end(); ++j) { variantIntervals[j->first] = IntervalTree((vector >&&)j->second); } @@ -323,7 +333,7 @@ int falseNegativeComplex = 0; // write header - + cout << "threshold" << "\t" << "num_snps" << "\t" << "false_positive_snps" << "\t" @@ -414,7 +424,7 @@ } } else { --totalComplex; - } + } } vector& falseNegatives = falseNegativeAllelesAtCutoff[threshold]; for (vector::iterator va = falseNegatives.begin(); va != falseNegatives.end(); ++va) { @@ -460,9 +470,8 @@ << falseNegativeComplex << endl; } - + exit(0); // why? return 0; } - diff -Nru libvcflib-1.0.1+dfsg/src/vcfsample2info.cpp libvcflib-1.0.2+dfsg/src/vcfsample2info.cpp --- libvcflib-1.0.1+dfsg/src/vcfsample2info.cpp 2019-10-01 07:06:02.000000000 +0000 +++ libvcflib-1.0.2+dfsg/src/vcfsample2info.cpp 2021-01-28 07:04:12.000000000 +0000 @@ -1,3 +1,12 @@ +/* + vcflib C++ library for parsing and manipulating VCF files + + Copyright © 2010-2020 Erik Garrison + Copyright © 2020 Pjotr Prins + + This software is published under the MIT License. See the LICENSE file. +*/ + #include "Variant.h" #include "split.h" #include "Fasta.h" @@ -11,7 +20,7 @@ void printSummary(char** argv) { cerr << "usage: " << argv[0] << " [options] " << endl << endl - << "options:" << endl + << "options:" << endl << " -f, --field Add information about this field in samples to INFO column" << endl << " -i, --info Store the computed statistic in this info field" << endl << " -a, --average Take the mean of samples for field (default)" << endl @@ -22,6 +31,7 @@ << "Take annotations given in the per-sample fields and add the mean, median, min, or max" << endl << "to the site-level INFO." << endl << endl; + cerr << endl << "Type: transformation" << endl << endl; exit(0); } @@ -45,7 +55,7 @@ int c; string sampleField; string infoField; - StatType statType = MEAN; + StatType statType = MEAN; if (argc == 1) printSummary(argv); @@ -72,7 +82,7 @@ /* Detect the end of the options. */ if (c == -1) break; - + switch (c) { case 0: @@ -92,7 +102,7 @@ case 'i': infoField = optarg; break; - + case 'a': statType = MEAN; break; @@ -118,7 +128,7 @@ printSummary(argv); exit(1); break; - + default: abort (); } @@ -215,4 +225,3 @@ return 0; } - diff -Nru libvcflib-1.0.1+dfsg/src/vcfsamplediff.cpp libvcflib-1.0.2+dfsg/src/vcfsamplediff.cpp --- libvcflib-1.0.1+dfsg/src/vcfsamplediff.cpp 2019-10-01 07:06:02.000000000 +0000 +++ libvcflib-1.0.2+dfsg/src/vcfsamplediff.cpp 2021-01-28 07:04:12.000000000 +0000 @@ -1,3 +1,12 @@ +/* + vcflib C++ library for parsing and manipulating VCF files + + Copyright © 2010-2020 Erik Garrison + Copyright © 2020 Pjotr Prins + + This software is published under the MIT License. See the LICENSE file. +*/ + #include "Variant.h" #include "split.h" #include @@ -37,15 +46,17 @@ void printSummary(char** argv) { - cerr << "usage: " << argv[0] << " [options] [ ... ] " << endl - << "Tags each record where the listed sample genotypes differ with ." << endl - << "The first sample is assumed to be germline, the second somatic." << endl - << "Each record is tagged with ={germline,somatic,loh} to specify the type of" << endl - << "variant given the genotype difference between the two samples." << endl - << endl - << "options:" << endl - << " -s --strict Require that no observations in the germline support the somatic alternate." << endl - << endl; + cerr << "usage: " << argv[0] << " [options] [ ... ] " << endl << endl + << "Establish putative somatic variants using reported differences between germline and somatic samples." << endl + << "Tags each record where the listed sample genotypes differ with ." << endl + << "The first sample is assumed to be germline, the second somatic." << endl + << "Each record is tagged with ={germline,somatic,loh} to specify the type of" << endl + << "variant given the genotype difference between the two samples." << endl + << endl + << "options:" << endl + << " -s --strict Require that no observations in the germline support the somatic alternate." << endl + << endl; + cerr << endl << "Type: transformation" << endl << endl; } @@ -73,7 +84,7 @@ /* Detect the end of the options. */ if (c == -1) break; - + switch (c) { case 0: @@ -89,7 +100,7 @@ case 's': strict = true; break; - + case 'h': printSummary(argv); exit(0); @@ -100,7 +111,7 @@ printSummary(argv); exit(1); break; - + default: abort (); } @@ -149,7 +160,7 @@ // write the new header cout << variantFile.header << endl; - + // print the records, filtering is done via the setting of varA's output sample names while (variantFile.getNextVariant(var)) { if (var.samples.find(samples.front()) != var.samples.end() @@ -197,4 +208,3 @@ return 0; } - diff -Nru libvcflib-1.0.1+dfsg/src/vcfsamplenames.cpp libvcflib-1.0.2+dfsg/src/vcfsamplenames.cpp --- libvcflib-1.0.1+dfsg/src/vcfsamplenames.cpp 2019-10-01 07:06:02.000000000 +0000 +++ libvcflib-1.0.2+dfsg/src/vcfsamplenames.cpp 2021-01-28 07:04:12.000000000 +0000 @@ -1,9 +1,32 @@ +/* + vcflib C++ library for parsing and manipulating VCF files + + Copyright © 2010-2020 Erik Garrison + Copyright © 2020 Pjotr Prins + + This software is published under the MIT License. See the LICENSE file. +*/ + #include "Variant.h" using namespace std; using namespace vcflib; int main(int argc, char** argv) { +if (argc == 2) { + string h_flag = argv[1]; + if (h_flag == "-h" || h_flag == "--help") { + cerr << R"( +List sample names + +Usage: vcfsamplenames + +Type: transformation + + )"; + exit(1); + } + } VariantCallFile variantFile; @@ -26,4 +49,3 @@ return 0; } - diff -Nru libvcflib-1.0.1+dfsg/src/vcfsamplestats.cpp libvcflib-1.0.2+dfsg/src/vcfsamplestats.cpp --- libvcflib-1.0.1+dfsg/src/vcfsamplestats.cpp 2019-10-01 07:06:02.000000000 +0000 +++ libvcflib-1.0.2+dfsg/src/vcfsamplestats.cpp 2021-01-28 07:04:12.000000000 +0000 @@ -1,3 +1,12 @@ +/* + vcflib C++ library for parsing and manipulating VCF files + + Copyright © 2010-2020 Erik Garrison + Copyright © 2020 Pjotr Prins + + This software is published under the MIT License. See the LICENSE file. +*/ + #include "Variant.h" #include "split.h" #include diff -Nru libvcflib-1.0.1+dfsg/src/vcfsitesummarize.cpp libvcflib-1.0.2+dfsg/src/vcfsitesummarize.cpp --- libvcflib-1.0.1+dfsg/src/vcfsitesummarize.cpp 2019-10-01 07:06:02.000000000 +0000 +++ libvcflib-1.0.2+dfsg/src/vcfsitesummarize.cpp 2021-01-28 07:04:12.000000000 +0000 @@ -1,3 +1,12 @@ +/* + vcflib C++ library for parsing and manipulating VCF files + + Copyright © 2010-2020 Erik Garrison + Copyright © 2020 Pjotr Prins + + This software is published under the MIT License. See the LICENSE file. +*/ + #include "Variant.h" using namespace std; @@ -5,6 +14,38 @@ int main(int argc, char** argv) { + if (argc == 2) { + string h_flag = argv[1]; + if (h_flag == "-h" || h_flag == "--help") { + cerr << R"( +Summarize by site + +Usage: vcfsitesummarize + +Example: + +vcfsitesummarize samples/sample.vcf + +CHROM POS ID REF QUAL FILTER AA AC AF AN DP NS DB H2 +19 111 . A 9.6 . 0 0 +19 112 . A 10 . 0 0 +20 14370 rs6054257 G 29 PASS 0.5 14 3 1 1 +20 17330 . T 3 q10 0.017 11 3 0 0 +20 1110696 rs6040355 A 67 PASS T 10 2 1 0 +20 1230237 . T 47 PASS T 13 3 0 0 +20 1234567 microsat1 G 50 PASS G 6 9 3 0 0 +20 1235237 . T 0 . 0 0 +X 10 rsTest AC 10 PASS + + +Type: statistics + + )"; + exit(1); + } + } + + VariantCallFile variantFile; if (argc > 1) { @@ -36,7 +77,7 @@ // defaults cout << "CHROM\tPOS\tID\tREF\tQUAL\tFILTER"; - + // configurable info field for (vector::iterator i = infofields.begin(); i != infofields.end(); ++i) { cout << "\t" << *i; @@ -83,12 +124,11 @@ cout << 0; } } - + cout << endl; - + } - + return 0; } - diff -Nru libvcflib-1.0.1+dfsg/src/vcfsom.cpp libvcflib-1.0.2+dfsg/src/vcfsom.cpp --- libvcflib-1.0.1+dfsg/src/vcfsom.cpp 2019-10-01 07:06:02.000000000 +0000 +++ libvcflib-1.0.2+dfsg/src/vcfsom.cpp 2021-01-28 07:04:12.000000000 +0000 @@ -1,3 +1,14 @@ +/* + vcflib C++ library for parsing and manipulating VCF files + + Copyright © 2010-2020 Erik Garrison + Copyright © 2020 Pjotr Prins + + This software is published under the MIT License. See the LICENSE file. + + Note this file is obsolete, see 1b5f1b4be67226501357ec788e8cd37c0f0a9faa +*/ + #include "Variant.h" #include "split.h" #include "convert.h" @@ -5,7 +16,7 @@ #include #include #include -#include "fsom/fsom.h" +#include #include #include @@ -205,7 +216,8 @@ << " -T, --paint-true VCF use VCF file to annotate true variants (multiple)" << endl << " -F, --paint-false VCF use VCF file to annotate false variants (multiple)" << endl << " -R, --paint-tag TAG provide estimated FDR% in TAG in variant INFO" << endl - << " -N, --false-negative replace FDR% (false detection) with FNR% (false negative)" << endl; + << " -N, --false-negative replace FDR% (false detection) with FNR% (false negative)" << endl << endl; + cerr << "This code is deprecated!" << endl << endl; } @@ -239,7 +251,7 @@ while (true) { static struct option long_options[] = - { + { /* These options set a flag. */ //{"verbose", no_argument, &verbose_flag, 1}, {"help", no_argument, 0, 'h'}, @@ -311,7 +323,7 @@ som_file = optarg; apply = true; break; - + case 's': som_file = optarg; train = true; @@ -378,7 +390,7 @@ } if (debug) start_timer(); - + vector variants; if (train) { map > normalizationLimits; @@ -460,7 +472,7 @@ } else { net = som_network_new(data[0].size(), height, width); - + if ( !net ) { printf( "ERROR: som_network_new failed.\n" ); return 1; diff -Nru libvcflib-1.0.1+dfsg/src/vcfstats.cpp libvcflib-1.0.2+dfsg/src/vcfstats.cpp --- libvcflib-1.0.1+dfsg/src/vcfstats.cpp 2019-10-01 07:06:02.000000000 +0000 +++ libvcflib-1.0.2+dfsg/src/vcfstats.cpp 2021-01-28 07:04:12.000000000 +0000 @@ -1,3 +1,12 @@ +/* + vcflib C++ library for parsing and manipulating VCF files + + Copyright © 2010-2020 Erik Garrison + Copyright © 2020 Pjotr Prins + + This software is published under the MIT License. See the LICENSE file. +*/ + #include "Variant.h" #include "split.h" #include "convert.h" @@ -61,6 +70,8 @@ void printSummary(char** argv) { cerr << "usage: " << argv[0] << " [options] " << endl << endl + << "Prints statistics about variants in the input VCF file." << endl << endl + << " -r, --region specify a region on which to target the stats, requires a BGZF" << endl << " compressed file which has been indexed with tabix. any number of" << endl << " regions may be specified." << endl @@ -72,8 +83,9 @@ << " -x, --mismatch-score N mismatch score for SW algorithm" << endl << " -o, --gap-open-penalty N gap open penalty for SW algorithm" << endl << " -e, --gap-extend-penalty N gap extension penalty for SW algorithm" << endl - << endl - << "Prints statistics about variants in the input VCF file." << endl; + << endl; + cerr << endl << "Type: statistics" << endl << endl; + exit(1); } @@ -119,7 +131,7 @@ /* Detect the end of the options. */ if (c == -1) break; - + switch (c) { case 0: @@ -136,15 +148,15 @@ printSummary(argv); exit(0); break; - + case 'r': regions.push_back(optarg); break; - + case 'l': lengthFrequency = false; break; - + case 'a': addTags = true; break; @@ -168,7 +180,7 @@ case 'e': gapExtendPenalty = atof(optarg); break; - + default: abort (); } @@ -257,7 +269,7 @@ } else { ++biallelics; } - map > alternates + map > alternates = var.parsedAlternates(includePreviousBaseForIndels, useMNPs, useEntropy, @@ -267,9 +279,9 @@ gapExtendPenalty); map > uniqueVariants; - + vector cigars; - + for (vector::iterator a = var.alt.begin(); a != var.alt.end(); ++a) { string& alternate = *a; if (addTags) @@ -569,4 +581,3 @@ return 0; } - diff -Nru libvcflib-1.0.1+dfsg/src/vcfstreamsort.cpp libvcflib-1.0.2+dfsg/src/vcfstreamsort.cpp --- libvcflib-1.0.1+dfsg/src/vcfstreamsort.cpp 2019-10-01 07:06:02.000000000 +0000 +++ libvcflib-1.0.2+dfsg/src/vcfstreamsort.cpp 2021-01-28 07:04:12.000000000 +0000 @@ -1,3 +1,12 @@ +/* + vcflib C++ library for parsing and manipulating VCF files + + Copyright © 2010-2020 Erik Garrison + Copyright © 2020 Pjotr Prins + + This software is published under the MIT License. See the LICENSE file. +*/ + #include "Variant.h" #include #include @@ -16,13 +25,14 @@ void printSummary(char** argv) { cerr << "usage: " << argv[0] << " [options] [vcf file]" << endl << endl - << "Sorts the input (either stdin or file) using a streaming sort algorithm." + << "Sorts the input (either stdin or file) using a streaming sort algorithm. Guarantees that the positional order is correct provided out-of-order variants are no more than 100 positions in the VCF file apart." << endl << "options:" << endl << endl << " -h, --help this dialog" << endl << " -w, --window N number of sites to sort (default 10000)" << endl << " -a, --all load all sites and then sort in memory" << endl; + cerr << endl << "Type: transformation" << endl << endl; } int main(int argc, char** argv) { @@ -35,7 +45,7 @@ while (true) { static struct option long_options[] = - { + { /* These options set a flag. */ //{"verbose", no_argument, &verbose_flag, 1}, {"help", no_argument, 0, 'h'}, @@ -63,7 +73,7 @@ exit(1); } break; - + case 'a': sortAll = true; break; @@ -72,7 +82,7 @@ printSummary(argv); exit(0); break; - + default: break; } @@ -140,4 +150,3 @@ return 0; } - diff -Nru libvcflib-1.0.1+dfsg/src/vcfToHap.cpp libvcflib-1.0.2+dfsg/src/vcfToHap.cpp --- libvcflib-1.0.1+dfsg/src/vcfToHap.cpp 2019-10-01 07:06:02.000000000 +0000 +++ libvcflib-1.0.2+dfsg/src/vcfToHap.cpp 2021-01-28 07:04:12.000000000 +0000 @@ -1,8 +1,18 @@ +/* + vcflib C++ library for parsing and manipulating VCF files + + Copyright © 2010-2020 Erik Garrison + Copyright © 2020 Pjotr Prins + + This software is published under the MIT License. See the LICENSE file. +*/ + #include "Variant.h" #include "split.h" #include "cdflib.hpp" #include "pdflib.hpp" #include "var.hpp" +#include "makeUnique.h" #include #include @@ -385,33 +395,30 @@ sindex += 1; } - genotype * populationTarget ; + unique_ptr populationTarget ; if(globalOpts.type == "PL"){ - populationTarget = new pl(); + populationTarget makeUnique(); } if(globalOpts.type == "GL"){ - populationTarget = new gl(); + populationTarget makeUnique(); } if(globalOpts.type == "GP"){ - populationTarget = new gp(); + populationTarget makeUnique(); } if(globalOpts.type == "GT"){ - populationTarget = new gt(); + populationTarget makeUnique(); } populationTarget->loadPop(target, var.sequenceName, var.position); if(populationTarget->af <= globalOpts.af || populationTarget->af >= (1-globalOpts.af) ){ - delete populationTarget; + ; continue; } positions.push_back(var.position); afs.push_back(populationTarget->af); - loadPhased(haplotypes, populationTarget, populationTarget->gts.size()); - - populationTarget = NULL; - delete populationTarget; + loadPhased(haplotypes, populationTarget.get(), populationTarget->gts.size()); } if(!globalOpts.geneticMapFile.empty()){ diff -Nru libvcflib-1.0.1+dfsg/src/vcfuniqalleles.cpp libvcflib-1.0.2+dfsg/src/vcfuniqalleles.cpp --- libvcflib-1.0.1+dfsg/src/vcfuniqalleles.cpp 2019-10-01 07:06:02.000000000 +0000 +++ libvcflib-1.0.2+dfsg/src/vcfuniqalleles.cpp 2021-01-28 07:04:12.000000000 +0000 @@ -1,3 +1,12 @@ +/* + vcflib C++ library for parsing and manipulating VCF files + + Copyright © 2010-2020 Erik Garrison + Copyright © 2020 Pjotr Prins + + This software is published under the MIT License. See the LICENSE file. +*/ + #include "Variant.h" #include @@ -5,6 +14,22 @@ using namespace vcflib; int main(int argc, char** argv) { +if (argc == 2) { + string h_flag = argv[1]; + if (h_flag == "-h" || h_flag == "--help") { + cerr << R"( + +List unique alleles For each record, remove any duplicate alternate +alleles that may have resulted from merging separate VCF files. + +Usage: vcfuniqalleles + +Type: filter + + )"; + exit(1); + } + } VariantCallFile variantFile; @@ -51,4 +76,3 @@ return 0; } - diff -Nru libvcflib-1.0.1+dfsg/src/vcfuniq.cpp libvcflib-1.0.2+dfsg/src/vcfuniq.cpp --- libvcflib-1.0.1+dfsg/src/vcfuniq.cpp 2019-10-01 07:06:02.000000000 +0000 +++ libvcflib-1.0.2+dfsg/src/vcfuniq.cpp 2021-01-28 07:04:12.000000000 +0000 @@ -1,3 +1,12 @@ +/* + vcflib C++ library for parsing and manipulating VCF files + + Copyright © 2010-2020 Erik Garrison + Copyright © 2020 Pjotr Prins + + This software is published under the MIT License. See the LICENSE file. +*/ + #include "Variant.h" using namespace std; @@ -5,6 +14,25 @@ int main(int argc, char** argv) { + if (argc == 2) { + string h_flag = argv[1]; + if (h_flag == "-h" || h_flag == "--help") { + cerr << R"( + +Usage: vcfuniq + +List unique genotypes. Like GNU uniq, but for VCF records. Remove +records which have the same positon, ref, and alt as the previous +record. + +Type: filter + + )"; + exit(1); + } + } + + VariantCallFile variantFile; if (argc > 1) { @@ -46,4 +74,3 @@ return 0; } - diff -Nru libvcflib-1.0.1+dfsg/src/vcfunphase.cpp libvcflib-1.0.2+dfsg/src/vcfunphase.cpp --- libvcflib-1.0.1+dfsg/src/vcfunphase.cpp 2019-10-01 07:06:02.000000000 +0000 +++ libvcflib-1.0.2+dfsg/src/vcfunphase.cpp 2021-01-28 07:04:12.000000000 +0000 @@ -1,3 +1,12 @@ +/* + vcflib C++ library for parsing and manipulating VCF files + + Copyright © 2010-2020 Erik Garrison + Copyright © 2020 Pjotr Prins + + This software is published under the MIT License. See the LICENSE file. +*/ + #include "Variant.h" #include "split.h" #include diff -Nru libvcflib-1.0.1+dfsg/src/vec128int.h libvcflib-1.0.2+dfsg/src/vec128int.h --- libvcflib-1.0.1+dfsg/src/vec128int.h 2019-10-01 07:06:02.000000000 +0000 +++ libvcflib-1.0.2+dfsg/src/vec128int.h 2021-01-28 07:04:12.000000000 +0000 @@ -1,3 +1,12 @@ +/* + vcflib C++ library for parsing and manipulating VCF files + + Copyright © 2010-2020 Erik Garrison + Copyright © 2020 Pjotr Prins + + This software is published under the MIT License. See the LICENSE file. +*/ + /******************************************************************************/ /* */ /* Licensed Materials - Property of IBM */ diff -Nru libvcflib-1.0.1+dfsg/src/veclib_types.h libvcflib-1.0.2+dfsg/src/veclib_types.h --- libvcflib-1.0.1+dfsg/src/veclib_types.h 2019-10-01 07:06:02.000000000 +0000 +++ libvcflib-1.0.2+dfsg/src/veclib_types.h 2021-01-28 07:04:12.000000000 +0000 @@ -1,3 +1,12 @@ +/* + vcflib C++ library for parsing and manipulating VCF files + + Copyright © 2010-2020 Erik Garrison + Copyright © 2020 Pjotr Prins + + This software is published under the MIT License. See the LICENSE file. +*/ + /******************************************************************************/ /* */ /* Licensed Materials - Property of IBM */ diff -Nru libvcflib-1.0.1+dfsg/src/wcFst.cpp libvcflib-1.0.2+dfsg/src/wcFst.cpp --- libvcflib-1.0.1+dfsg/src/wcFst.cpp 2019-10-01 07:06:02.000000000 +0000 +++ libvcflib-1.0.2+dfsg/src/wcFst.cpp 2021-01-28 07:04:12.000000000 +0000 @@ -1,3 +1,12 @@ +/* + vcflib C++ library for parsing and manipulating VCF files + + Copyright © 2010-2020 Erik Garrison + Copyright © 2020 Pjotr Prins + + This software is published under the MIT License. See the LICENSE file. +*/ + #include "Variant.h" #include "split.h" #include "cdflib.hpp" @@ -6,13 +15,14 @@ #include #include -#include +#include #include #include #include #include #include #include "gpatInfo.hpp" +#include "makeUnique.h" using namespace std; using namespace vcflib; @@ -42,7 +52,7 @@ cerr << "INFO: required, y,type -- argument: genotype likelihood format; genotype : GT,GL,PL,GP " << endl; cerr << "INFO: optional: r,region -- argument: a tabix compliant genomic range: seqid or seqid:start-end " << endl; cerr << "INFO: optional: d,deltaaf -- argument: skip sites where the difference in allele frequencies is less than deltaaf, default is zero " << endl; - + cerr << endl << "Type: statistics" << endl << endl; printVersion(); } @@ -58,11 +68,11 @@ } void loadIndices(map & index, string set){ - + vector indviduals = split(set, ","); vector::iterator it = indviduals.begin(); - + for(; it != indviduals.end(); it++){ index[ atoi( (*it).c_str() ) ] = 1; } @@ -81,17 +91,17 @@ // set region to scaffold - string region = "NA"; + string region = "NA"; - // using vcflib; thanks to Erik Garrison + // using vcflib; thanks to Erik Garrison VariantCallFile variantFile; - // zero based index for the target and background indivudals - + // zero based index for the target and background indivudals + map it, ib; - - // deltaaf is the difference of allele frequency we bother to look at + + // deltaaf is the difference of allele frequency we bother to look at string deltaaf ; double daf = 0.00; @@ -101,7 +111,7 @@ string type = "NA"; - const struct option longopts[] = + const struct option longopts[] = { {"version" , 0, 0, 'v'}, {"help" , 0, 0, 'h'}, @@ -120,7 +130,7 @@ while(iarg != -1) { iarg = getopt_long(argc, argv, "y:r:d:t:b:f:chv", longopts, &index); - + switch (iarg) { case 'h': @@ -146,7 +156,7 @@ case 'd': cerr << "INFO: only scoring sites where the allele frequency difference is greater than: " << optarg << endl; deltaaf = optarg; - daf = atof(deltaaf.c_str()); + daf = atof(deltaaf.c_str()); break; case 'y': type = optarg; @@ -154,7 +164,7 @@ break; case 'r': cerr << "INFO: set seqid region to : " << optarg << endl; - region = optarg; + region = optarg; break; default: break; @@ -168,7 +178,7 @@ } variantFile.open(filename); - + if(region != "NA"){ if(! variantFile.setRegion(region)){ cerr << "FATAL: unable to set region" << endl; @@ -205,15 +215,15 @@ int nsamples = samples.size(); while (variantFile.getNextVariant(var)) { - - // biallelic sites naturally + + // biallelic sites naturally if(var.alt.size() > 1){ continue; } - + vector < map< string, vector > > target, background, total; - + int index = 0; for(int nsamp = 0; nsamp < nsamples; nsamp++){ @@ -227,7 +237,7 @@ if(ib.find(index) != ib.end()){ background.push_back(sample); } - } + } index += 1; } @@ -235,58 +245,52 @@ if(target.size() < 5 || background.size() < 5){ continue; } - - genotype * populationTarget ; - genotype * populationBackground ; + + using Detail::makeUnique; + + unique_ptr populationTarget ; + unique_ptr populationBackground ; if(type == "PL"){ - populationTarget = new pl(); - populationBackground = new pl(); + populationTarget = makeUnique(); + populationBackground = makeUnique(); } if(type == "GL"){ - populationTarget = new gl(); - populationBackground = new gl(); + populationTarget = makeUnique(); + populationBackground = makeUnique(); } if(type == "GP"){ - populationTarget = new gp(); - populationBackground = new gp(); + populationTarget = makeUnique(); + populationBackground = makeUnique(); } if(type == "GT"){ - populationTarget = new gt(); - populationBackground = new gt(); + populationTarget = makeUnique(); + populationBackground = makeUnique(); } - + populationTarget->loadPop(target, var.sequenceName, var.position); populationBackground->loadPop(background, var.sequenceName, var.position); if(populationTarget->af == -1 || populationBackground->af == -1){ - delete populationTarget; - delete populationBackground; continue; } if(populationTarget->af == 1 && populationBackground->af == 1){ - delete populationTarget; - delete populationBackground; continue; } if(populationTarget->af == 0 && populationBackground->af == 0){ - delete populationTarget; - delete populationBackground; continue; } double afdiff = abs(populationTarget->af - populationBackground->af); if(afdiff < daf){ - delete populationTarget; - delete populationBackground; continue; } - + // pg 1360 B.S Weir and C.C. Cockerham 1984 double nbar = ( populationTarget->ngeno / 2 ) + (populationBackground->ngeno / 2); double rn = 2*nbar; - + // special case of only two populations double nc = rn ; nc -= (pow(populationTarget->ngeno,2)/rn); @@ -294,16 +298,16 @@ // average sample frequency double pbar = (populationTarget->af + populationBackground->af) / 2; - // sample variance of allele A frequences over the population - + // sample variance of allele A frequences over the population + double s2 = 0; s2 += ((populationTarget->ngeno * pow(populationTarget->af - pbar, 2))/nbar); s2 += ((populationBackground->ngeno * pow(populationBackground->af - pbar, 2))/nbar); - - // average heterozygosity - + + // average heterozygosity + double hbar = (populationTarget->hfrq + populationBackground->hfrq) / 2; - + //global af var double pvar = pbar * (1 - pbar); @@ -313,20 +317,17 @@ double avar2 = 1 / (nbar -1) ; double avar3 = pvar - (0.5*s2) - (0.25*hbar); double avar = avar1 * (s2 - (avar2 * avar3)); - + double bvar1 = nbar / (nbar - 1); double bvar2 = pvar - (0.5*s2) - (((2*nbar -1)/(4*nbar))*hbar); double bvar = bvar1 * bvar2; double cvar = 0.5*hbar; - + double fst = avar / (avar+bvar+cvar); - - cout << var.sequenceName << "\t" << var.position << "\t" << populationTarget->af << "\t" << populationBackground->af << "\t" << fst << endl ; - delete populationTarget; - delete populationBackground; + cout << var.sequenceName << "\t" << var.position << "\t" << populationTarget->af << "\t" << populationBackground->af << "\t" << fst << endl ; } - return 0; + return 0; } diff -Nru libvcflib-1.0.1+dfsg/src/xpEHH.cpp libvcflib-1.0.2+dfsg/src/xpEHH.cpp --- libvcflib-1.0.1+dfsg/src/xpEHH.cpp 2019-10-01 07:06:02.000000000 +0000 +++ libvcflib-1.0.2+dfsg/src/xpEHH.cpp 2021-01-28 07:04:12.000000000 +0000 @@ -1,3 +1,12 @@ +/* + vcflib C++ library for parsing and manipulating VCF files + + Copyright © 2010-2020 Erik Garrison + Copyright © 2020 Pjotr Prins + + This software is published under the MIT License. See the LICENSE file. +*/ + #include "Variant.h" #include "split.h" #include "cdflib.hpp" @@ -390,29 +399,29 @@ sindex += 1; } - genotype * populationTarget ; - genotype * populationBackground; - genotype * populationTotal ; + unique_ptr populationTarget ; + unique_ptr populationBackground; + unique_ptr populationTotal ; if(type == "PL"){ - populationTarget = new pl(); - populationBackground = new pl(); - populationTotal = new pl(); + populationTarget = makeUnique(); + populationBackground = makeUnique(); + populationTotal = makeUnique(); } if(type == "GL"){ - populationTarget = new gl(); - populationBackground = new gl(); - populationTotal = new gl(); + populationTarget = makeUnique(); + populationBackground = makeUnique(); + populationTotal = makeUnique(); } if(type == "GP"){ - populationTarget = new gp(); - populationBackground = new gp(); - populationTotal = new gp(); + populationTarget = makeUnique(); + populationBackground = makeUnique(); + populationTotal = makeUnique(); } if(type == "GT"){ - populationTarget = new gt(); - populationBackground = new gt(); - populationTotal = new gt(); + populationTarget = makeUnique(); + populationBackground = makeUnique(); + populationTotal = makeUnique(); } populationTarget->loadPop(target, var.sequenceName, var.position); @@ -423,20 +432,14 @@ // if(populationTotal->af > 0.99 || populationTotal->af < 0.01){ +// // -// delete populationTarget; -// delete populationBackground; -// delete populationTotal; // continue; // } afs.push_back(populationTotal->af); positions.push_back(var.position); loadPhased(haplotypes, populationTotal, nsamples); - - delete populationTarget; - delete populationBackground; - delete populationTotal; } diff -Nru libvcflib-1.0.1+dfsg/test/data/regression/vcf2tsv_4.tsv libvcflib-1.0.2+dfsg/test/data/regression/vcf2tsv_4.tsv --- libvcflib-1.0.1+dfsg/test/data/regression/vcf2tsv_4.tsv 1970-01-01 00:00:00.000000000 +0000 +++ libvcflib-1.0.2+dfsg/test/data/regression/vcf2tsv_4.tsv 2021-01-28 07:04:12.000000000 +0000 @@ -0,0 +1,13 @@ +#CHROM POS ID REF ALT QUAL FILTER AA AC AF AN DB DP H2 NS +19 111 . A C 9.6 . . . . . . . . . +19 112 . A G 10 . . . . . . . . . +20 14370 rs6054257 G A 29 PASS . . 0.5 . . 14 . 3 +20 17330 . T A 3 q10 . . 0.017 . . 11 . 3 +20 1110696 rs6040355 A G 67 PASS T . 0.333,0.667 . . 10 . 2 +20 1110696 rs6040355 A T 67 PASS T . 0.333,0.667 . . 10 . 2 +20 1230237 . T . 47 PASS T . . . . 13 . 3 +20 1234567 microsat1 G GA 50 PASS G 3,1 . 6 . 9 . 3 +20 1234567 microsat1 G GAC 50 PASS G 3,1 . 6 . 9 . 3 +20 1235237 . T . 0 . . . . . . . . . +X 10 rsTest AC A 10 PASS . . . . . . . . +X 10 rsTest AC ATG 10 PASS . . . . . . . . diff -Nru libvcflib-1.0.1+dfsg/test/data/regression/vcf2tsv_5.tsv libvcflib-1.0.2+dfsg/test/data/regression/vcf2tsv_5.tsv --- libvcflib-1.0.1+dfsg/test/data/regression/vcf2tsv_5.tsv 1970-01-01 00:00:00.000000000 +0000 +++ libvcflib-1.0.2+dfsg/test/data/regression/vcf2tsv_5.tsv 2021-01-28 07:04:12.000000000 +0000 @@ -0,0 +1,37 @@ +#CHROM POS ID REF ALT QUAL FILTER AA AC AF AN DB DP H2 NS SAMPLE DP GQ GT HQ +19 111 . A C 9.6 . . . . . . . . . NA00001 . . 0|0 10,10 +19 111 . A C 9.6 . . . . . . . . . NA00002 . . 0|0 10,10 +19 111 . A C 9.6 . . . . . . . . . NA00003 . . 0/1 3,3 +19 112 . A G 10 . . . . . . . . . NA00001 . . 0|0 10,10 +19 112 . A G 10 . . . . . . . . . NA00002 . . 0|0 10,10 +19 112 . A G 10 . . . . . . . . . NA00003 . . 0/1 3,3 +20 14370 rs6054257 G A 29 PASS . . 0.5 . . 14 . 3 NA00001 1 48 0|0 51,51 +20 14370 rs6054257 G A 29 PASS . . 0.5 . . 14 . 3 NA00002 8 48 1|0 51,51 +20 14370 rs6054257 G A 29 PASS . . 0.5 . . 14 . 3 NA00003 5 43 1/1 .,. +20 17330 . T A 3 q10 . . 0.017 . . 11 . 3 NA00001 3 49 0|0 58,50 +20 17330 . T A 3 q10 . . 0.017 . . 11 . 3 NA00002 5 3 0|1 65,3 +20 17330 . T A 3 q10 . . 0.017 . . 11 . 3 NA00003 3 41 0/0 .,. +20 1110696 rs6040355 A G 67 PASS T . 0.333,0.667 . . 10 . 2 NA00001 6 21 1|2 23,27 +20 1110696 rs6040355 A G 67 PASS T . 0.333,0.667 . . 10 . 2 NA00002 0 2 2|1 18,2 +20 1110696 rs6040355 A G 67 PASS T . 0.333,0.667 . . 10 . 2 NA00003 4 35 2/2 .,. +20 1110696 rs6040355 A T 67 PASS T . 0.333,0.667 . . 10 . 2 NA00001 6 21 1|2 23,27 +20 1110696 rs6040355 A T 67 PASS T . 0.333,0.667 . . 10 . 2 NA00002 0 2 2|1 18,2 +20 1110696 rs6040355 A T 67 PASS T . 0.333,0.667 . . 10 . 2 NA00003 4 35 2/2 .,. +20 1230237 . T . 47 PASS T . . . . 13 . 3 NA00001 . 54 0|0 56,60 +20 1230237 . T . 47 PASS T . . . . 13 . 3 NA00002 4 48 0|0 51,51 +20 1230237 . T . 47 PASS T . . . . 13 . 3 NA00003 2 61 0/0 .,. +20 1234567 microsat1 G GA 50 PASS G 3,1 . 6 . 9 . 3 NA00001 4 . 0/1 . +20 1234567 microsat1 G GA 50 PASS G 3,1 . 6 . 9 . 3 NA00002 2 17 0/2 . +20 1234567 microsat1 G GA 50 PASS G 3,1 . 6 . 9 . 3 NA00003 3 40 1/1 . +20 1234567 microsat1 G GAC 50 PASS G 3,1 . 6 . 9 . 3 NA00001 4 . 0/1 . +20 1234567 microsat1 G GAC 50 PASS G 3,1 . 6 . 9 . 3 NA00002 2 17 0/2 . +20 1234567 microsat1 G GAC 50 PASS G 3,1 . 6 . 9 . 3 NA00003 3 40 1/1 . +20 1235237 . T . 0 . . . . . . . . . NA00001 . . 0/0 . +20 1235237 . T . 0 . . . . . . . . . NA00002 . . 0|0 . +20 1235237 . T . 0 . . . . . . . . . NA00003 . . ./. . +X 10 rsTest AC A 10 PASS . . . . . . . . NA00001 . . 0 . +X 10 rsTest AC A 10 PASS . . . . . . . . NA00002 . . 0/1 . +X 10 rsTest AC A 10 PASS . . . . . . . . NA00003 . . 0|2 . +X 10 rsTest AC ATG 10 PASS . . . . . . . . NA00001 . . 0 . +X 10 rsTest AC ATG 10 PASS . . . . . . . . NA00002 . . 0/1 . +X 10 rsTest AC ATG 10 PASS . . . . . . . . NA00003 . . 0|2 . diff -Nru libvcflib-1.0.1+dfsg/test/data/regression/vcffilter_2.vcf libvcflib-1.0.2+dfsg/test/data/regression/vcffilter_2.vcf --- libvcflib-1.0.1+dfsg/test/data/regression/vcffilter_2.vcf 1970-01-01 00:00:00.000000000 +0000 +++ libvcflib-1.0.2+dfsg/test/data/regression/vcffilter_2.vcf 2021-01-28 07:04:12.000000000 +0000 @@ -0,0 +1,26 @@ +##fileformat=VCFv4.0 +##fileDate=20090805 +##source=myImputationProgramV3.1 +##reference=1000GenomesPilot-NCBI36 +##phasing=partial +##filter="DP > 10" +##INFO= +##INFO= +##INFO= +##INFO= +##INFO= +##INFO= +##INFO= +##INFO= +##FILTER= +##FILTER= +##FORMAT= +##FORMAT= +##FORMAT= +##FORMAT= +##ALT= +##ALT= +#CHROM POS ID REF ALT QUAL FILTER INFO FORMAT NA00001 NA00002 NA00003 +20 14370 rs6054257 G A 29 PASS AF=0.5;DP=14;NS=3;DB;H2 GT:GQ:DP:HQ 0|0:48:1:51,51 1|0:48:8:51,51 1/1:43:5:.,. +20 17330 . T A 3 q10 AF=0.017;DP=11;NS=3 GT:GQ:DP:HQ 0|0:49:3:58,50 0|1:3:5:65,3 0/0:41:3:.,. +20 1230237 . T . 47 PASS AA=T;DP=13;NS=3 GT:GQ:DP:HQ 0|0:54:.:56,60 0|0:48:4:51,51 0/0:61:2:.,. diff -Nru libvcflib-1.0.1+dfsg/test/Makefile libvcflib-1.0.2+dfsg/test/Makefile --- libvcflib-1.0.1+dfsg/test/Makefile 2019-10-01 07:06:02.000000000 +0000 +++ libvcflib-1.0.2+dfsg/test/Makefile 2021-01-28 07:04:12.000000000 +0000 @@ -5,19 +5,24 @@ CC=gcc CXX=g++ -CFLAGS=-Wall -std=c++0x -LINKERS=-lm -lpthread -lhts -lvcflib -lz -INCLUDE=-I../src -I../googletest/googletest/include/ -I../tabixpp/ -I../tabixpp/htslib/ -I ../smithwaterman/ -I ../multichoose/ -I ../filevercmp/ -I ../googletest/googletest/make/ -LIB=-L../ -L../tabixpp/htslib -L../tabixpp/htslib/ -L../googletest/googletest/make/ +CFLAGS=-Wall -std=c++0x +INCLUDE=-I../src -I../googletest/googletest/include/ -I../fastahack -I../smithwaterman/ -I../multichoose/ -I../filevercmp/ -I../googletest/googletest/make/ +LIB=-L../build -L../googletest/googletest/make/ -lm -ltabixpp -lhts -lpthread LIBGTEST=../googletest/googletest/make/gtest_main.a -LIBVCF=../libvcflib.a +LIBVCF=../build/libvcflib.a + all: run ../googletest/googletest/make/gtest_main.a: - cd ../googletest/googletest/make/ && make + cd ../googletest/googletest/make && make + tests/main: ../googletest/googletest/make/gtest_main.a - $(CXX) $(CFLAGS) $(INCLUDE) $(LIB) $(LINKERS) tests/mainTest.cpp ../tabixpp/tabix.cpp ../tabixpp/htslib/libhts.a $(LIBVCF) $(LIBGTEST) -o tests/main + $(CXX) $(CFLAGS) $(INCLUDE) tests/mainTest.cpp -o tests/main $(LIBVCF) $(LIBGTEST) $(LIB) + run: tests/main ./tests/main + rm -v ../googletest/googletest/make/gtest_main.a + rm -v ../googletest/googletest/make/sample1_unittest + clean: rm -f tests/main diff -Nru libvcflib-1.0.1+dfsg/test/pytest/rtest.py libvcflib-1.0.2+dfsg/test/pytest/rtest.py --- libvcflib-1.0.1+dfsg/test/pytest/rtest.py 1970-01-01 00:00:00.000000000 +0000 +++ libvcflib-1.0.2+dfsg/test/pytest/rtest.py 2021-01-28 07:04:12.000000000 +0000 @@ -0,0 +1,61 @@ +import os +import sys +import pprint +import difflib +import inspect +import re +from subprocess import Popen, PIPE + +bindir = "../build" + +regressiondir = "data/regression" +tmpdir = "tmp" + +VERSION = open("../VERSION").read().strip() + +def cat(cmd): + head(cmd, -1) + +def head(cmd, lines=4): + # out = subprocess.check_output(f"{bindir}/{cmd}",shell=True) + cmd2 = f"{bindir}/{cmd}".split() + # process = Popen(cmd2, stdout=PIPE, stderr=PIPE, shell=True) + # out = process.communicate()[0] + p = Popen(cmd2, stdout=PIPE, stderr=PIPE, close_fds=True) + output = p.communicate() + out = output[0] + if len(out) == 0: + # if stdout is empty fetch stderr + out = output[1] + # out=subprocess.check_output(cmd2, universal_newlines=True) + header = out.decode().expandtabs(tabsize=8).split("\n")[0:lines] + header = ['>' if l=='' else l for l in header] + header = [l.replace(VERSION+" ", "") for l in header] + header = [l.replace("../build/", "") for l in header] + print("\n".join(header)) + +def run_stdout(cmd, ext = "vcf"): + os.makedirs(tmpdir,exist_ok=True) + curframe = inspect.currentframe() + # pp = pprint.PrettyPrinter(indent=4) + # pp.pprint(inspect.getouterframes(curframe)) + # print("------------n") + calframe = inspect.getouterframes(curframe, 1) + p = re.compile('\[([0-9])\]') + index = p.findall(calframe[1].filename)[0] + + name = calframe[1].filename[0:-4] + if "doctest" in name: + name = name[9:-3] + else: + name = name[18:-1] + + name += "_"+index + if ext: + name += "." + ext + + tmpfn = tmpdir + "/" + name + os.system(f"{bindir}/{cmd} > {tmpfn}") + cmpfn = regressiondir+"/"+name + sys.stdout.writelines(difflib.unified_diff(open(cmpfn).readlines(),open(tmpfn).readlines(),cmpfn,tmpfn,n=1)) + print(f"output in {name}") diff -Nru libvcflib-1.0.1+dfsg/test/pytest/vcf2tsv.md libvcflib-1.0.2+dfsg/test/pytest/vcf2tsv.md --- libvcflib-1.0.1+dfsg/test/pytest/vcf2tsv.md 1970-01-01 00:00:00.000000000 +0000 +++ libvcflib-1.0.2+dfsg/test/pytest/vcf2tsv.md 2021-01-28 07:04:12.000000000 +0000 @@ -0,0 +1,99 @@ +% VCF2TSV(1) vcf2tsv | Convert VCF to TSV +% Erik Garrison and other vcflib contributors + +# NAME + +vcf2tsv - Converts stdin or given VCF file to tab-delimited format, +using null string to replace empty values in the table. + +# SYNOPSIS + +**vcf2tsv** \[-n null_string] \[-g] \[*file*] + +# DESCRIPTION + +**vcf2tsv** converts stdin or given VCF file to tab-delimited format, +using null string to replace empty values in the table. + +Specifying *-g* will output one line per sample with genotype +information. When there is more than one alt allele there will be +multiple rows, one for each allele and, the info will match the 'A' +index + +## Options + +-h, --help + +: shows help message and exits. + +-g + +: Output one line per sample with genotype information. + +# EXIT VALUES + +**0** +: Success + +**not 0** +: Failure + +# EXAMPLES + + + + +``` + +>>> head("vcf2tsv -h",1) +usage: vcf2tsv [-n null_string] [-g] [vcf file] + + +``` + +vcf2tsv converts a CSV to a tabulated test file, e.g. + +```python + +>>> head("vcf2tsv ../samples/sample.vcf") +#CHROM POS ID REF ALT QUAL FILTER AA AC AF AN DB DP H2 NS +19 111 . A C 9.6 . . . . . . . . . +19 112 . A G 10 . . . . . . . . . +20 14370 rs6054257 G A 29 PASS . . 0.5 . . 14 . 3 + +``` + +Use the `-g` switch to show genotypes + +```python + +>>> head("vcf2tsv -g ../samples/sample.vcf") +#CHROM POS ID REF ALT QUAL FILTER AA AC AF AN DB DP H2 NS SAMPLE DP GQ GT HQ +19 111 . A C 9.6 . . . . . . . . . NA00001 . . 0|0 10,10 +19 111 . A C 9.6 . . . . . . . . . NA00002 . . 0|0 10,10 +19 111 . A C 9.6 . . . . . . . . . NA00003 . . 0/1 3,3 + +``` + +## Source code + +[vcf2tsv.cpp](../../src/vcf2tsv.cpp) + +## Regression tests + +The following commands run full regression tests: + +>>> run_stdout("vcf2tsv ../samples/sample.vcf", ext="tsv") +output in vcf2tsv_4.tsv + +>>> run_stdout("vcf2tsv -g ../samples/sample.vcf", ext="tsv") +output in vcf2tsv_5.tsv + + +# LICENSE + +Copyright 2020 (C) Erik Garrison and vcflib contributors. MIT licensed. diff -Nru libvcflib-1.0.1+dfsg/test/pytest/vcf2tsv-test.py libvcflib-1.0.2+dfsg/test/pytest/vcf2tsv-test.py --- libvcflib-1.0.1+dfsg/test/pytest/vcf2tsv-test.py 1970-01-01 00:00:00.000000000 +0000 +++ libvcflib-1.0.2+dfsg/test/pytest/vcf2tsv-test.py 2021-01-28 07:04:12.000000000 +0000 @@ -0,0 +1,30 @@ +import rtest + +datadir = "../samples" + +vcf = datadir+"/sample.vcf" + +def test01(n): + """vcf2tsv converts a CSV to a tabulated test file, e.g. + + vcf2tsv samples/sample.vcf + + outputs + +``` +#CHROM POS ID REF ALT QUAL FILTER AA AC AF AN DB DP H2NS +19 111 . A C 9.6 . . . . . . . . . +19 112 . A G 10 . . . . . . . . . +20 14370 rs6054257 G A 29 PASS . . 0.5 . . 14.3 +``` + + >>> rtest.run_stdout(f"vcf2tsv {vcf}", ext="tsv") + + """ + pass + + +if __name__ == "__main__": + import doctest + rtest.setup() + doctest.testmod() diff -Nru libvcflib-1.0.1+dfsg/test/scripts/bin2mdidx-template.erb libvcflib-1.0.2+dfsg/test/scripts/bin2mdidx-template.erb --- libvcflib-1.0.1+dfsg/test/scripts/bin2mdidx-template.erb 1970-01-01 00:00:00.000000000 +0000 +++ libvcflib-1.0.2+dfsg/test/scripts/bin2mdidx-template.erb 2021-01-28 07:04:12.000000000 +0000 @@ -0,0 +1,26 @@ + + +## <%= cmd %> + +<%= descr %> + +```sh + +>>> cat("<%= help_cmd %>") +<%= pydoc_full %> + +``` + +### Source code + +[<%= cmd %>.cpp](https://github.com/vcflib/vcflib/blob/master/src/<%= cmd %>.cpp) + +--- diff -Nru libvcflib-1.0.1+dfsg/test/scripts/bin2md.rb libvcflib-1.0.2+dfsg/test/scripts/bin2md.rb --- libvcflib-1.0.1+dfsg/test/scripts/bin2md.rb 1970-01-01 00:00:00.000000000 +0000 +++ libvcflib-1.0.2+dfsg/test/scripts/bin2md.rb 2021-01-28 07:04:12.000000000 +0000 @@ -0,0 +1,270 @@ +#!/usr/bin/env ruby +# +# Internal helper script to create a markdown file from binaries +# +# The --index option creates the index page +# +# bin2md [--index] erbtemplate [binary] +# +# by Pjotr Prins (C) 2020 +# +# The rules are simple USAGE: +# +# usage: can be multiline block anywhere in the output +# +# After removing that the next block is the DESCRIPTION. +# Onle line create a TYPE (see TYPES). +# The rest are the OPTIONS. + +require 'erb' +require 'date' +require 'open3' + +# cerr << endl << "Type: statistics" << endl << endl; +# cerr << endl << "Type: transformation" << endl << endl; +TYPES = ["filter","metrics","phenotype","genotype","transformation","statistics"] + +=begin + +if (argc == 2) { + string h_flag = argv[1]; + if (h_flag == "-h" || h_flag == "--help") { + cerr << R"( +Generate a random VCF file + +Usage: vcfrandom + +Example: + + vcfrandom + + +Type: statistics + + )"; + exit(1); + } + } + +=end + + +VERSION=`cat ./VERSION`.strip +template = ARGV.shift +is_man = false # creating man pages? +if template == "--man" + is_man = true + template = ARGV.shift +end +create_index = false +if template == "--index" + create_index = true + template = ARGV.shift + index = [] +end +search = ARGV.shift + +bindir = './build' +$stderr.print("--- Parsing the bin files in #{bindir} for #{VERSION}\n") + +d = DateTime.now +year = d.year + +Dir.glob(bindir+'/*').each do|bin| + if !File.directory?(bin) and File.executable?(bin) + if search and bin !~ /#{search}/ + next + end + cmd = File.basename(bin) + help_cmd = cmd + " -h" + $stderr.print(" "+bin+"\n") + stdout, stderr, status = Open3.capture3(bin+" -h") + out = stderr + stdout + if out == "" + help_cmd = cmd + stdout, stderr, status = Open3.capture3(bin) + out = stderr + stdout + end + # out = ,:encoding => 'UTF-8' + out = out.encode('UTF-8') + # $stderr.print(out) + lines = (out).split("\n") + lines = lines.map{|l| l.gsub(/#{Regexp.escape(cmd)}/,"**#{cmd}**")} + lines = lines.map{|l| l.gsub(/\.+\/build\//,"")} + lines = lines.map{|l| l.gsub(/INFO: help:?/,"")} + lines = lines.map{|l| l.gsub(/INFO: description:/,"")} + lines = lines.map{|l| l.gsub(/INFO:\s+/,"")} + pydoc_full = lines.map{|l| l=="" ? '>' : l }.join("\n") + in_usage = false + has_options = nil + has_example = false + usage = [] + other = [] + example = [] + lines.shift while lines[0] == "" + lines.each do | l | + break if l == "------------------------------------------------------" + if l =~ /usage/i + in_usage = true + end + if in_usage + if l == "" or l =~ /^Output/i + in_usage = false + next + end + usage << l + else + if l =~ /^Example:/ + has_example = true + end + if has_example + example << l + else + other << l + end + end + end + descr = [] + rest = other + type = "unknown" + (other+example).each do | l | + if l =~ /type: (\S+)/i + type = $1 + raise "Unknown type #{type} for #{cmd}" if !TYPES.include?(type) + break + end + end + + other.each do | l | + break if l == "" or l =~ /^Output/i or l =~ /Options/i + descr << l + end + + if descr == [] + lineno = 0 + rr = rest.reverse + rr.each_with_index do | l,i | + if l != "" and l !~ /^Type/i + lineno = i + break + end + end + rr = rr[lineno..-1] + rr.each do | l | + if descr.length and l == "" or l =~ /^\s/ + descr = descr.reverse + break + end + descr << l + end + rest = rr.drop(descr.size).reverse + else + rest = rest.drop(descr.size) + end + + body = rest.join("\n") + has_options = true if body != "" + usage = usage.join(" ").gsub(/#{VERSION}\s+/,"") + usage = usage.gsub(/usage:\s+/i,"") + usage = usage.gsub(/\s+/," ").strip + pydoc_full = pydoc_full.gsub(/#{VERSION}\s+/,"") + pydoc_full = pydoc_full.gsub(/\.\.\/build\//,"") + # pydoc_full = pydoc_full.gsub(/vcflib/,"VCF") + descr = descr.join(" ").gsub(/#{VERSION}\s+/,"") + descr = descr.sub(/vcflib/,"VCF") + descr = descr.gsub(/\s+/," ").strip + example = example.join("\n") + # print("HELP:",help_cmd,"\n") + # print("DESCRIPTION:",descr,"\n") + # print("USAGE:",usage,"\n") + # print("TYPE:",type,"\n") + # print("BODY:",body,"\n") + + if create_index + rec = { + cmd: cmd, + type: type, + descr: descr + } + index << rec + else + b = binding + renderer = ERB.new(File.read(template)) + + File.open("./doc/#{cmd}.md","w") { |f| + f.print renderer.result(b) + } + end + end +end +if create_index + require 'ostruct' + + renderer = ERB.new(File.read("./test/scripts/index-item.erb")) + File.open("./doc/vcflib.md","w") { |f| + f.print <
+ +HEADER + TYPES.each do | type | + f.print %{ +## #{type} + +| #{type} command | description | +| :-------------- | :---------- | +} + + index.each do | rec | + rec = OpenStruct.new(rec) + if rec.type == type + b = binding + f.print renderer.result(b) + end + end + end + github = "https://github.com/vcflib/vcflib" + f.print <