diff -Nru fast5-0~20150918/debian/changelog fast5-0.5.6/debian/changelog --- fast5-0~20150918/debian/changelog 2016-01-20 10:11:22.000000000 +0000 +++ fast5-0.5.6/debian/changelog 2016-08-12 06:10:21.000000000 +0000 @@ -1,3 +1,18 @@ +fast5 (0.5.6-1) unstable; urgency=medium + + * Activate d/watch + * Imported Upstream version 0.5.6 (Closes: #832035) + * Update copyright years and email address + * Change section to science + * Bump Standards-Version to 3.9.8 + * Use encrypted protocols for VCS URLs + * Remove obsolete autopkgtest + * Update patches + * Update name of README + * Remove obsolete examples + + -- Afif Elghraoui Thu, 11 Aug 2016 23:09:17 -0700 + fast5 (0~20150918-1) unstable; urgency=low * Initial release - upstream git revision 0639cc5 (Closes: #811460) diff -Nru fast5-0~20150918/debian/control fast5-0.5.6/debian/control --- fast5-0~20150918/debian/control 2016-01-20 10:11:22.000000000 +0000 +++ fast5-0.5.6/debian/control 2016-08-12 02:21:38.000000000 +0000 @@ -1,15 +1,14 @@ Source: fast5 -Section: libs +Section: science Priority: optional Maintainer: Debian Med Packaging Team -Uploaders: - Afif Elghraoui , +Uploaders: Afif Elghraoui Build-Depends: debhelper (>= 9), -Standards-Version: 3.9.6 +Standards-Version: 3.9.8 Homepage: https://github.com/mateidavid/fast5 -Vcs-Git: git://anonscm.debian.org/debian-med/fast5.git -Vcs-Browser: http://anonscm.debian.org/cgit/debian-med/fast5.git +Vcs-Git: https://anonscm.debian.org/git/debian-med/fast5.git +Vcs-Browser: https://anonscm.debian.org/cgit/debian-med/fast5.git Package: libfast5-dev Section: libdevel diff -Nru fast5-0~20150918/debian/copyright fast5-0.5.6/debian/copyright --- fast5-0~20150918/debian/copyright 2016-01-20 10:11:22.000000000 +0000 +++ fast5-0.5.6/debian/copyright 2016-08-12 02:09:16.000000000 +0000 @@ -4,15 +4,12 @@ Files: * Copyright: - 2015 Matei David, Ontario Institute for Cancer Research + 2015-2016 Matei David, Ontario Institute for Cancer Research 2015 Jared Simpson, Ontario Institute for Cancer Research License: MIT -Comment: - Licensing terms are mentioned on https://github.com/mateidavid/fast5/issues/7#issuecomment-172662247 - and are noted in the source distribution in the current upstream revision. Files: debian/* -Copyright: 2016 Afif Elghraoui +Copyright: 2016 Afif Elghraoui License: MIT License: MIT diff -Nru fast5-0~20150918/debian/docs fast5-0.5.6/debian/docs --- fast5-0~20150918/debian/docs 2016-01-20 10:11:22.000000000 +0000 +++ fast5-0.5.6/debian/docs 2016-08-12 02:55:13.000000000 +0000 @@ -1 +1 @@ -README.md +README.org diff -Nru fast5-0~20150918/debian/examples fast5-0.5.6/debian/examples --- fast5-0~20150918/debian/examples 2016-01-20 10:11:22.000000000 +0000 +++ fast5-0.5.6/debian/examples 1970-01-01 00:00:00.000000000 +0000 @@ -1,2 +0,0 @@ -src/a.cpp -src/Makefile diff -Nru fast5-0~20150918/debian/patches/relative-paths.patch fast5-0.5.6/debian/patches/relative-paths.patch --- fast5-0~20150918/debian/patches/relative-paths.patch 2016-01-20 10:11:22.000000000 +0000 +++ fast5-0.5.6/debian/patches/relative-paths.patch 2016-08-12 01:58:31.000000000 +0000 @@ -6,12 +6,12 @@ Last-Update: 2016-01-19 --- fast5.orig/src/fast5.hpp +++ fast5/src/fast5.hpp -@@ -9,7 +9,7 @@ - #include - #include +@@ -14,7 +14,7 @@ + #include + #include -#include "hdf5_tools.hpp" +#include #define MAX_K_LEN 8 - namespace fast5 + namespace diff -Nru fast5-0~20150918/debian/patches/revise-example.patch fast5-0.5.6/debian/patches/revise-example.patch --- fast5-0~20150918/debian/patches/revise-example.patch 2016-01-20 10:11:22.000000000 +0000 +++ fast5-0.5.6/debian/patches/revise-example.patch 1970-01-01 00:00:00.000000000 +0000 @@ -1,31 +0,0 @@ -Description: Revise example as applicable for Debian. - The main changes are to find the hdf5 libraries as they are - installed by the Debian package. -Author: Afif Elghraoui -Forwarded: not-needed -Last-Update: 2016-01-19 ---- fast5.orig/src/Makefile -+++ fast5/src/Makefile -@@ -1,2 +1,9 @@ --a: a.cpp fast5.hpp hdf5_tools.hpp -- g++ -std=c++11 -O0 -g3 -ggdb -fno-eliminate-unused-debug-types -Wall -Wextra -pedantic -Wno-unused-parameter -o $@ $^ -L /usr/local/lib -lhdf5 -+ -+a: a.cpp -+ h5c++ -std=c++11 -O0 -g3 -ggdb -fno-eliminate-unused-debug-types -Wall -Wextra -pedantic -Wno-unused-parameter -o $@ -lhdf5 -+ -+ -+clean: -+ $(RM) a -+ -+.PHONY: clean ---- fast5.orig/src/a.cpp -+++ fast5/src/a.cpp -@@ -2,7 +2,7 @@ - #include - #include - --#include "fast5.hpp" -+#include - - using namespace std; - diff -Nru fast5-0~20150918/debian/patches/series fast5-0.5.6/debian/patches/series --- fast5-0~20150918/debian/patches/series 2016-01-20 10:11:22.000000000 +0000 +++ fast5-0.5.6/debian/patches/series 2016-08-12 02:08:35.000000000 +0000 @@ -1,2 +1 @@ relative-paths.patch -revise-example.patch diff -Nru fast5-0~20150918/debian/tests/control fast5-0.5.6/debian/tests/control --- fast5-0~20150918/debian/tests/control 2016-01-20 10:11:22.000000000 +0000 +++ fast5-0.5.6/debian/tests/control 1970-01-01 00:00:00.000000000 +0000 @@ -1,6 +0,0 @@ -Test-Command: cd src && make; make clean -Depends: - @, - make, -Restrictions: - rw-build-tree, diff -Nru fast5-0~20150918/debian/watch fast5-0.5.6/debian/watch --- fast5-0~20150918/debian/watch 2016-01-20 10:11:22.000000000 +0000 +++ fast5-0.5.6/debian/watch 2016-08-12 01:52:50.000000000 +0000 @@ -1,4 +1,5 @@ version=3 -# Upstream has no formal releases for this project. We update this -# as needed for nanopolish (where it's bundled) +opts="filenamemangle=s/(?:.*?)?v?(\d[\d.]*)\.tar\.gz/fast5-$1.tar.gz/" \ + https://github.com/mateidavid/fast5/tags \ + (?:.*?/)?v?(\d[\d.]*)\.tar\.gz diff -Nru fast5-0~20150918/.gitignore fast5-0.5.6/.gitignore --- fast5-0~20150918/.gitignore 1970-01-01 00:00:00.000000000 +0000 +++ fast5-0.5.6/.gitignore 2016-07-28 20:14:13.000000000 +0000 @@ -0,0 +1,2 @@ +/build* +/local* diff -Nru fast5-0~20150918/LICENSE fast5-0.5.6/LICENSE --- fast5-0~20150918/LICENSE 1970-01-01 00:00:00.000000000 +0000 +++ fast5-0.5.6/LICENSE 2016-07-28 20:14:13.000000000 +0000 @@ -0,0 +1,22 @@ +The MIT License (MIT) + +Copyright (c) 2015 Matei David, Ontario Institute for Cancer Research +Copyright (c) 2015 Jared Simpson, Ontario Institute for Cancer Research + +Permission is hereby granted, free of charge, to any person obtaining a copy +of this software and associated documentation files (the "Software"), to deal +in the Software without restriction, including without limitation the rights +to use, copy, modify, merge, publish, distribute, sublicense, and/or sell +copies of the Software, and to permit persons to whom the Software is +furnished to do so, subject to the following conditions: + +The above copyright notice and this permission notice shall be included in all +copies or substantial portions of the Software. + +THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR +IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, +FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE +AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER +LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, +OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE +SOFTWARE. diff -Nru fast5-0~20150918/python/fast5/__init__.py fast5-0.5.6/python/fast5/__init__.py --- fast5-0~20150918/python/fast5/__init__.py 1970-01-01 00:00:00.000000000 +0000 +++ fast5-0.5.6/python/fast5/__init__.py 2016-07-28 20:14:13.000000000 +0000 @@ -0,0 +1,10 @@ +""" +fast5.__init__.py +(c) 2016: Matei David, Ontario Institute for Cancer Research +MIT License +""" + +from .version import __version__ +from fast5 import * + +__version_info__ = tuple([int(num) for num in __version__.split('.')]) diff -Nru fast5-0~20150918/python/fast5/source/fast5.cpp fast5-0.5.6/python/fast5/source/fast5.cpp --- fast5-0~20150918/python/fast5/source/fast5.cpp 1970-01-01 00:00:00.000000000 +0000 +++ fast5-0.5.6/python/fast5/source/fast5.cpp 2016-07-28 20:14:13.000000000 +0000 @@ -0,0 +1,182 @@ +#include +#include +#include +#include + +#include "fast5.hpp" + +namespace bp = boost::python; + +// member functions with default arguments +BOOST_PYTHON_MEMBER_FUNCTION_OVERLOADS(get_raw_samples_params_overloads, get_raw_samples_params, 0, 1) +BOOST_PYTHON_MEMBER_FUNCTION_OVERLOADS(get_raw_samples_overloads, get_raw_samples, 0, 1) +BOOST_PYTHON_MEMBER_FUNCTION_OVERLOADS(get_eventdetection_read_name_list_overloads, get_eventdetection_read_name_list, 0, 1) +BOOST_PYTHON_MEMBER_FUNCTION_OVERLOADS(have_eventdetection_events_overloads, have_eventdetection_events, 0, 1) +BOOST_PYTHON_MEMBER_FUNCTION_OVERLOADS(get_eventdetection_params_overloads, get_eventdetection_params, 0, 1) +BOOST_PYTHON_MEMBER_FUNCTION_OVERLOADS(get_eventdetection_event_params_overloads, get_eventdetection_event_params, 0, 2) +BOOST_PYTHON_MEMBER_FUNCTION_OVERLOADS(get_eventdetection_events_overloads, get_eventdetection_events, 0, 2) +BOOST_PYTHON_MEMBER_FUNCTION_OVERLOADS(have_basecall_fastq_overlords, have_basecall_fastq, 1, 2) +BOOST_PYTHON_MEMBER_FUNCTION_OVERLOADS(get_basecall_fastq_overlords, get_basecall_fastq, 1, 2) +BOOST_PYTHON_MEMBER_FUNCTION_OVERLOADS(have_basecall_seq_overlords, have_basecall_seq, 1, 2) +BOOST_PYTHON_MEMBER_FUNCTION_OVERLOADS(get_basecall_seq_overlords, get_basecall_seq, 1, 2) +BOOST_PYTHON_MEMBER_FUNCTION_OVERLOADS(have_basecall_model_overlords, have_basecall_model, 1, 2) +BOOST_PYTHON_MEMBER_FUNCTION_OVERLOADS(get_basecall_model_file_overlords, get_basecall_model_file, 1, 2) +BOOST_PYTHON_MEMBER_FUNCTION_OVERLOADS(get_basecall_model_params_overlords, get_basecall_model_params, 1, 2) +BOOST_PYTHON_MEMBER_FUNCTION_OVERLOADS(get_basecall_model_overlords, get_basecall_model, 1, 2) +BOOST_PYTHON_MEMBER_FUNCTION_OVERLOADS(have_basecall_events_overlords, have_basecall_events, 1, 2) +BOOST_PYTHON_MEMBER_FUNCTION_OVERLOADS(get_basecall_events_overlords, get_basecall_events, 1, 2) +BOOST_PYTHON_MEMBER_FUNCTION_OVERLOADS(have_basecall_event_alignment_overlords, have_basecall_event_alignment, 0, 1) +BOOST_PYTHON_MEMBER_FUNCTION_OVERLOADS(get_basecall_event_alignment_overlords, get_basecall_event_alignment, 0, 1) + +BOOST_PYTHON_MODULE(fast5) +{ + bp::class_("Channel_Id_Parameters") + .def_readwrite("channel_number", &fast5::Channel_Id_Parameters::channel_number) + .def_readwrite("digitisation", &fast5::Channel_Id_Parameters::digitisation) + .def_readwrite("offset", &fast5::Channel_Id_Parameters::offset) + .def_readwrite("range", &fast5::Channel_Id_Parameters::range) + .def_readwrite("sampling_rate", &fast5::Channel_Id_Parameters::sampling_rate) + ; + bp::class_("Raw_Samples_Parameters") + .def_readwrite("read_id", &fast5::Raw_Samples_Parameters::read_id) + .def_readwrite("read_number", &fast5::Raw_Samples_Parameters::read_number) + .def_readwrite("start_mux", &fast5::Raw_Samples_Parameters::start_mux) + .def_readwrite("start_time", &fast5::Raw_Samples_Parameters::start_time) + .def_readwrite("duration", &fast5::Raw_Samples_Parameters::duration) + ;; + bp::class_("EventDetection_Event_Parameters") + .def_readwrite("read_id", &fast5::EventDetection_Event_Parameters::read_id) + .def_readwrite("read_number", &fast5::EventDetection_Event_Parameters::read_number) + .def_readwrite("scaling_used", &fast5::EventDetection_Event_Parameters::scaling_used) + .def_readwrite("start_mux", &fast5::EventDetection_Event_Parameters::start_mux) + .def_readwrite("start_time", &fast5::EventDetection_Event_Parameters::start_time) + .def_readwrite("duration", &fast5::EventDetection_Event_Parameters::duration) + .def_readwrite("median_before", &fast5::EventDetection_Event_Parameters::median_before) + .def_readwrite("abasic_found", &fast5::EventDetection_Event_Parameters::abasic_found) + ; + bp::class_("EventDetection_Event_Entry") + .def_readwrite("mean", &fast5::EventDetection_Event_Entry::mean) + .def_readwrite("stdv", &fast5::EventDetection_Event_Entry::stdv) + .def_readwrite("start", &fast5::EventDetection_Event_Entry::start) + .def_readwrite("length", &fast5::EventDetection_Event_Entry::length) + ; + bp::class_("Model_Entry") + .def_readwrite("variant", &fast5::Model_Entry::variant) + .def_readwrite("level_mean", &fast5::Model_Entry::level_mean) + .def_readwrite("level_stdv", &fast5::Model_Entry::level_stdv) + .def_readwrite("sd_mean", &fast5::Model_Entry::sd_mean) + .def_readwrite("sd_stdv", &fast5::Model_Entry::sd_stdv) + .def_readwrite("weight", &fast5::Model_Entry::weight) + .def_readwrite("kmer", &fast5::Model_Entry::kmer) + ; + bp::class_("Model_Parameters") + .def_readwrite("scale", &fast5::Model_Parameters::scale) + .def_readwrite("shift", &fast5::Model_Parameters::shift) + .def_readwrite("drift", &fast5::Model_Parameters::drift) + .def_readwrite("var", &fast5::Model_Parameters::var) + .def_readwrite("scale_sd", &fast5::Model_Parameters::scale_sd) + .def_readwrite("var_sd", &fast5::Model_Parameters::var_sd) + ; + bp::class_("Event_Entry") + .def_readwrite("mean", &fast5::Event_Entry::mean) + .def_readwrite("stdv", &fast5::Event_Entry::stdv) + .def_readwrite("start", &fast5::Event_Entry::start) + .def_readwrite("length", &fast5::Event_Entry::length) + .def_readwrite("p_model_state", &fast5::Event_Entry::p_model_state) + .def_readwrite("p_mp_state", &fast5::Event_Entry::p_mp_state) + .def_readwrite("p_A", &fast5::Event_Entry::p_A) + .def_readwrite("p_C", &fast5::Event_Entry::p_C) + .def_readwrite("p_G", &fast5::Event_Entry::p_G) + .def_readwrite("p_T", &fast5::Event_Entry::p_T) + .def_readwrite("move", &fast5::Event_Entry::move) + .def_readwrite("model_state", &fast5::Event_Entry::model_state) + .def_readwrite("mp_state", &fast5::Event_Entry::mp_state) + ;; + bp::class_("Event_Alignment_Entry") + .def_readwrite("template_index", &fast5::Event_Alignment_Entry::template_index) + .def_readwrite("complement_index", &fast5::Event_Alignment_Entry::complement_index) + .def("get_kmer", &fast5::Event_Alignment_Entry::get_kmer) + ;; + + bp::class_>("Map_Str_Str") + .def(bp::map_indexing_suite>()) + ; + bp::class_>("Vec_Str") + .def(bp::vector_indexing_suite>()) + ; + bp::class_>("Vec_Raw_Samples_Entry") + .def(bp::vector_indexing_suite>()) + ; + bp::class_>("Vec_EventDetection_Event_Entry") + .def(bp::vector_indexing_suite>()) + ; + bp::class_>("Vec_Model_Entry") + .def(bp::vector_indexing_suite>()) + ; + bp::class_>("Vec_Event_Entry") + .def(bp::vector_indexing_suite>()) + ; + bp::class_>("Vec_Event_Alignment_Entry") + .def(bp::vector_indexing_suite>()) + ; + + bp::class_("File") + .def(bp::init>()) + .def("is_open", &fast5::File::is_open) + .def("is_rw", &fast5::File::is_rw) + .def("file_name", &fast5::File::file_name, bp::return_value_policy()) + .def("open", &fast5::File::open) + .def("create", &fast5::File::create) + .def("close", &fast5::File::close) + .def("is_valid_file", &hdf5_tools::File::is_valid_file).staticmethod("is_valid_file") + .def("get_object_count", &hdf5_tools::File::get_object_count).staticmethod("get_object_count") + // + .def("file_version", &fast5::File::file_version) + // + .def("have_channel_id_params", &fast5::File::have_channel_id_params) + .def("get_channel_id_params", &fast5::File::get_channel_id_params) + // + .def("have_sampling_rate", &fast5::File::have_sampling_rate) + .def("get_sampling_rate", &fast5::File::get_sampling_rate) + // + .def("have_tracking_id_params", &fast5::File::have_tracking_id_params) + .def("get_tracking_id_params", &fast5::File::get_tracking_id_params) + // + .def("have_sequences_params", &fast5::File::have_sequences_params) + .def("get_sequences_params", &fast5::File::get_sequences_params) + // + .def("get_raw_samples_read_name_list", &fast5::File::get_raw_samples_read_name_list, bp::return_value_policy()) + .def("have_raw_samples", &fast5::File::have_raw_samples) + .def("get_raw_samples_params", &fast5::File::get_raw_samples_params, get_raw_samples_params_overloads()) + .def("get_raw_samples", &fast5::File::get_raw_samples, get_raw_samples_overloads()) + // + .def("get_eventdetection_group_list", &fast5::File::get_eventdetection_group_list, bp::return_value_policy()) + .def("have_eventdetection_groups", &fast5::File::have_eventdetection_groups) + .def("get_eventdetection_read_name_list", &fast5::File::get_eventdetection_read_name_list, get_eventdetection_read_name_list_overloads()) + .def("have_eventdetection_events", &fast5::File::have_eventdetection_events, have_eventdetection_events_overloads()) + .def("get_eventdetection_params", &fast5::File::get_eventdetection_params, get_eventdetection_params_overloads()) + .def("get_eventdetection_event_params", &fast5::File::get_eventdetection_event_params, get_eventdetection_event_params_overloads()) + .def("get_eventdetection_events", &fast5::File::get_eventdetection_events, get_eventdetection_events_overloads()) + // + .def("get_basecall_group_list", &fast5::File::get_basecall_group_list, bp::return_value_policy()) + .def("have_basecall_groups", &fast5::File::have_basecall_groups) + .def("get_basecall_strand_group_list", &fast5::File::get_basecall_strand_group_list, bp::return_value_policy()) + .def("have_basecall_strand_groups", &fast5::File::have_basecall_strand_groups) + .def("have_basecall_log", &fast5::File::have_basecall_log) + .def("get_basecall_log", &fast5::File::get_basecall_log) + .def("have_basecall_fastq", &fast5::File::have_basecall_fastq, have_basecall_fastq_overlords()) + .def("get_basecall_fastq", &fast5::File::get_basecall_fastq, get_basecall_fastq_overlords()) + .def("add_basecall_fastq", &fast5::File::add_basecall_fastq) + .def("have_basecall_seq", &fast5::File::have_basecall_seq, have_basecall_seq_overlords()) + .def("get_basecall_seq", &fast5::File::get_basecall_seq, get_basecall_seq_overlords()) + .def("add_basecall_seq", &fast5::File::add_basecall_seq) + .def("have_basecall_model", &fast5::File::have_basecall_model, have_basecall_model_overlords()) + .def("get_basecall_model_file", &fast5::File::get_basecall_model_file, get_basecall_model_file_overlords()) + .def("get_basecall_model_params", &fast5::File::get_basecall_model_params, get_basecall_model_params_overlords()) + .def("get_basecall_model", &fast5::File::get_basecall_model, get_basecall_model_overlords()) + .def("have_basecall_events", &fast5::File::have_basecall_events, have_basecall_events_overlords()) + .def("get_basecall_events", &fast5::File::get_basecall_events, get_basecall_events_overlords()) + .def("have_basecall_event_alignment", &fast5::File::have_basecall_event_alignment, have_basecall_event_alignment_overlords()) + .def("get_basecall_event_alignment", &fast5::File::get_basecall_event_alignment, get_basecall_event_alignment_overlords()) + ; +} diff -Nru fast5-0~20150918/python/fast5/version.py fast5-0.5.6/python/fast5/version.py --- fast5-0~20150918/python/fast5/version.py 1970-01-01 00:00:00.000000000 +0000 +++ fast5-0.5.6/python/fast5/version.py 2016-07-28 20:14:13.000000000 +0000 @@ -0,0 +1 @@ +__version__ = '0.5.6' diff -Nru fast5-0~20150918/python/fast5/.version.py.in fast5-0.5.6/python/fast5/.version.py.in --- fast5-0~20150918/python/fast5/.version.py.in 1970-01-01 00:00:00.000000000 +0000 +++ fast5-0.5.6/python/fast5/.version.py.in 2016-07-28 20:14:13.000000000 +0000 @@ -0,0 +1 @@ +__version__ = '${VERSION}' diff -Nru fast5-0~20150918/python/.gitignore fast5-0.5.6/python/.gitignore --- fast5-0~20150918/python/.gitignore 1970-01-01 00:00:00.000000000 +0000 +++ fast5-0.5.6/python/.gitignore 2016-07-28 20:14:13.000000000 +0000 @@ -0,0 +1,5 @@ +build/ +dist/ +*.egg-info/ +*.pyc +*.so diff -Nru fast5-0~20150918/python/Makefile fast5-0.5.6/python/Makefile --- fast5-0~20150918/python/Makefile 1970-01-01 00:00:00.000000000 +0000 +++ fast5-0.5.6/python/Makefile 2016-07-28 20:14:13.000000000 +0000 @@ -0,0 +1,35 @@ +.SUFFIXES: +MAKEFLAGS += -r +SHELL := /bin/bash +.DELETE_ON_ERROR: +.PHONY: all help clean check_virtualenv develop develop-user develop-uninstall develop-uninstall-user + +PYTHON = $(shell which python) + +all: help + +print-%: + @echo '$*=$($*)' + +help: ## This help. + @awk 'BEGIN {FS = ":.*?## "} /^[a-zA-Z_-]+:.*?## / {printf "\033[36m%-30s\033[0m %s\n", $$1, $$2}' $(MAKEFILE_LIST) + +clean: ## Remove build products + ${PYTHON} setup.py clean + rm -rf fast5.egg-info build dist + find fast5/ \( -name '*.pyc' -o -name '*.so' \) -delete + +check_virtualenv: + @[ "$$VIRTUAL_ENV" ] || { echo "not in a virtualenv" >&2; exit 1; } + +develop: check_virtualenv clean ## Install in develop mode to current virtualenv + ${PYTHON} setup.py develop + +develop-user: clean ## Install in develop mode to current user + ${PYTHON} setup.py develop --user + +develop-uninstall: check_virtualenv clean ## Uninstall from current virtualenv + ${PYTHON} setup.py develop --uninstall + +develop-uninstall-user: clean ## Uninstall from current user + ${PYTHON} setup.py develop --uninstall --user diff -Nru fast5-0~20150918/python/setup.py fast5-0.5.6/python/setup.py --- fast5-0~20150918/python/setup.py 1970-01-01 00:00:00.000000000 +0000 +++ fast5-0.5.6/python/setup.py 2016-07-28 20:14:13.000000000 +0000 @@ -0,0 +1,94 @@ +""" +fast5.setup.py +(c) 2016: Matei David, Ontario Institute for Cancer Research +MIT License +""" + +import os +import re +import pkg_resources +import sys +from setuptools import setup, Extension + +exec(open('fast5/version.py').read()) + +# check HDF5 include and lib dirs +hdf5_dir = os.environ.get('HDF5_DIR', '/usr') +hdf5_include_dir = os.environ.get('HDF5_INCLUDE_DIR', os.path.join(hdf5_dir, 'include')) +hdf5_lib_dir = os.environ.get('HDF5_LIB_DIR', os.path.join(hdf5_dir, 'lib')) +hdf5_lib = os.environ.get('HDF_LIB', 'hdf5') +if not os.path.isfile(os.path.join(hdf5_include_dir, 'H5pubconf.h')): + sys.exit(hdf5_include_dir + ': could not find HDF5 header files; use HDF5_DIR or HDF5_INCLUDE_DIR') +if (not os.path.isfile(os.path.join(hdf5_lib_dir, 'lib' + hdf5_lib + '.so')) + and not os.path.isfile(os.path.join(hdf5_lib_dir, 'lib' + hdf5_lib + '.a'))): + sys.exit(hdf5_lib_dir + ': could not find HDF5 library file; use HDF5_DIR or HDF5_LIB_DIR/HDF5_LIB') + +# check Boost.Python include and lib dirs +boost_dir = os.environ.get('BOOST_DIR', '/usr') +boost_include_dir = os.environ.get('BOOST_INCLUDE_DIR', os.path.join(boost_dir, 'include')) +boost_lib_dir = os.environ.get('BOOST_LIB_DIR', os.path.join(boost_dir, 'lib')) +boost_python_lib = os.environ.get('BOOST_PYTHON_LIB', 'boost_python') +if not os.path.isfile(os.path.join(boost_include_dir, 'boost', 'python.hpp')): + sys.exit(boost_include_dir + ': could not find Boost Python header files; use BOOST_DIR or BOOST_INCLUDE_DIR') +if (not os.path.isfile(os.path.join(boost_lib_dir, 'lib' + boost_python_lib + '.so')) + and not os.path.isfile(os.path.join(boost_lib_dir, 'lib' + boost_python_lib + '.a'))): + sys.exit(boost_lib_dir + ': could not find Boost Python library file; use BOOST_DIR or BOOST_LIB_DIR/BOOST_PYTHON_LIB') + +fast5_dir = os.environ.get('FAST5_DIR', os.path.join('..', 'src')) + +extra_compile_args = [ + '-std=c++11', + '-Wall', '-Wextra', '-Wpedantic', + '-isystem', hdf5_include_dir, + '-isystem', boost_include_dir, +] +#extra_compile_args += ['-O0', '-g3', '-ggdb', '-fno-eliminate-unused-debug-types', '-v'] +extra_link_args = [] +#extra_link_args += ['-v'] + +extensions = [ + Extension( + 'fast5.fast5', + include_dirs=[ + fast5_dir, + ], + sources=[ + os.path.join('fast5', 'source', 'fast5.cpp'), + ], + depends=[ + os.path.join(fast5_dir, fn) + for fn in ['fast5.hpp', 'hdf5_tools.hpp'] + ], + extra_compile_args=extra_compile_args, + extra_link_args=extra_link_args, + library_dirs=[ + hdf5_lib_dir, + boost_lib_dir, + ], + runtime_library_dirs=[ + hdf5_lib_dir, + boost_lib_dir, + ], + libraries=[ + hdf5_lib, + boost_python_lib, + ], + ), +] + +setup( + name='fast5', + description='Fast5 file interface.', + version=__version__, + #long_description=open('README').read(), + author='Matei David, Ontario Institute for Cancer Research', + author_email='matei.david at oicr.on.ca', + license='MIT', + url='https://github.com/mateidavid/fast5', + packages=['fast5'], + exclude_package_data={ + '': ['*.c', '*.cpp', '*.h', '*.hpp'], + }, + ext_modules=extensions, + scripts=[], +) diff -Nru fast5-0~20150918/README.md fast5-0.5.6/README.md --- fast5-0~20150918/README.md 2015-09-18 17:12:49.000000000 +0000 +++ fast5-0.5.6/README.md 1970-01-01 00:00:00.000000000 +0000 @@ -1,11 +0,0 @@ -# FAST5 - -A lightweight C++11 library to read raw signal data from Oxford Nanopore's FAST5 files. - -## Installation instructions - -This library is provided as header files only, so you only need to copy ```fast5.hpp``` and ```hdf5_tools.hpp``` into your project. - -## Usage instructions - -See ```a.cpp``` for an example. diff -Nru fast5-0~20150918/README.org fast5-0.5.6/README.org --- fast5-0~20150918/README.org 1970-01-01 00:00:00.000000000 +0000 +++ fast5-0.5.6/README.org 2016-07-28 20:14:13.000000000 +0000 @@ -0,0 +1,48 @@ +# -*- mode:org; mode:visual-line; coding:utf-8; -*- + +** Fast5 Library + +[[http://travis-ci.org/mateidavid/fast5][http://travis-ci.org/mateidavid/fast5.svg?branch=master]] + +A lightweight C++11 library to read raw signal data from Oxford Nanopore's Fast5 files. + +*** C++ + +**** Installation + +This is a header-only library. You only need to copy [[file:src/fast5.hpp][src/fast5.hpp]] and [[file:src/hdf5_tools.hpp][src/hdf5_tools.hpp]] into your C++ project. + +**** Usage + +See [[file:src/f5dump.cpp][src/f5dump.cpp]] for an example. + +*** Python Wrapper + +An optional python wrapper for this library is available through Boost.Python. The wrapper currently implements only read-only access. + +**** Installation + +#+BEGIN_EXAMPLE +cd python +HDF5_DIR=/usr/local BOOST_DIR=/usr/local make develop-user +#+END_EXAMPLE + +Notes: + +- HDF5 and Boost.Python must be available, and their locations can be passed on to the Python setup process using the environment variables =HDF5_DIR= and =BOOST_DIR=. Alternatively, the respective include directories, library directories, and library names may be specified explicitly with: =HDF5_INCLUDE_DIR=, =HDF5_LIB_DIR=, =HDF5_LIB=, =BOOST_INCLUDE_DIR=, =BOOST_LIB_DIR=, =BOOST_PYTHON_LIB=. For details, see [[file:python/setup.py][python/setup.py]] and [[file:.travis.yml][.travis.yml]]. + +- To install =fast5= as a package in a virtualenv, use the target =develop=. To install as a user package, use the target =develop-user=. For details, see [[file:python/Makefile][python/Makefile]]. + +**** Usage + +#+BEGIN_EXAMPLE +import fast5 +f = fast5.File("file.000.fast5") +print(f.file_version()) +print(f.have_eventdetection_events()) +#+END_EXAMPLE + +*** License + +[[file:LICENSE][MIT License]]. + diff -Nru fast5-0~20150918/src/a.cpp fast5-0.5.6/src/a.cpp --- fast5-0~20150918/src/a.cpp 2015-09-18 17:12:49.000000000 +0000 +++ fast5-0.5.6/src/a.cpp 1970-01-01 00:00:00.000000000 +0000 @@ -1,86 +0,0 @@ -#include -#include -#include - -#include "fast5.hpp" - -using namespace std; - - -int main(int argc, char* argv[]) -{ - assert(argc == 2); - string file_name(argv[1]); - //string ds_name(argv[2]); - - // Open the FAST5 file for reading - fast5::File* f_p; - f_p = new fast5::File(file_name); - - // Check that it opened successfully - assert(f_p->is_open()); - - // Extract version information for the ONT software used to generate this dataset - cout << "file_version=" << f_p->file_version() << endl; - cout << "basecall_version=" << f_p->basecall_version() << endl; - cout << "eventdetection_version=" << f_p->eventdetection_version() << endl; - cout << "sequences_version=" << f_p->sequences_version() << endl; - - // This function checks to see if 2D basecalls are available - if(f_p->have_basecalled_2D()) - { - cout << "basecalled_2D=" << f_p->basecalled_2D() << endl; - - // Extract the alignment between template and complement events - // which were generated by the 2D basecaller - auto v = f_p->get_event_alignments(); - cout << "event_alignment().size()=" << v.size() << endl; - for (const auto& e : v) - { - cout << "(template=" << e.template_index << ", complement=" << e.complement_index << ", kmer=" << e.kmer << ")" << endl; - } - } - - // Iterate over the template/complement strands - for (size_t i = 0; i < 2; ++i) - { - // Check if a pore model for this strand exists - if (f_p->have_model(i)) - { - // Print the name of ONT's reference model used to basecall - cout << "Model file: " << f_p->get_model_file(i) << endl; - - // Extract the global scaling parameters for the pore model - auto params = f_p->get_model_parameters(i); - cout << "model drift=" << params.drift << - ", scale=" << params.scale << - ", scale_sd=" << params.scale_sd << - ", shift=" << params.shift << - ", var=" << params.var << - ", var_sd=" << params.var_sd << endl; - - // Extract the expected current levels for each k-mer - auto v = f_p->get_model(i); - cout << "model(" << i << ").size()=" << v.size() << endl; - for (const auto& e : v) - { - cout << "(kmer=" << e.kmer << ", level_mean=" << e.level_mean << ", level_stdv=" << e.level_stdv << ")" << endl; - } - } - - // Check if this strand has event observations - if (f_p->have_events(i)) - { - // Extract each event - auto v = f_p->get_events(i); - cout << "events(" << i << ").size()=" << v.size() << endl; - for (const auto& e : v) - { - cout << "(mean=" << e.mean << ", start=" << e.start << ", stdv=" << e.stdv << ", length=" << e.length << ")" << endl; - } - } - } - - // Cleanup the file pointer, which closes the file - delete f_p; -} diff -Nru fast5-0~20150918/src/f5dump.cpp fast5-0.5.6/src/f5dump.cpp --- fast5-0~20150918/src/f5dump.cpp 1970-01-01 00:00:00.000000000 +0000 +++ fast5-0.5.6/src/f5dump.cpp 2016-07-28 20:14:13.000000000 +0000 @@ -0,0 +1,197 @@ +#include +#include +#include + +#include "fast5.hpp" + +using namespace std; + +template < typename T > +void print_vector(ostream& os, const vector< T >& v, const string& delim) +{ + for (auto it = v.begin(); it != v.end(); ++it) + { + if (it != v.begin()) os << delim; + os << *it; + } +} +template < typename U, typename V > +void print_map(ostream& os, const map< U, V >& m, const string& prefix) +{ + for (const auto& p : m) + { + os << prefix << p.first << "=" << p.second << endl; + } +} + +int main(int argc, char* argv[]) +{ + if (argc != 2) + { + cerr << "use: " << argv[0] << " " << endl; + return EXIT_FAILURE; + } + string file_name(argv[1]); + // + // open the FAST5 file for reading + // + if (not fast5::File::is_valid_file(file_name)) + { + cout << "not a fast5 file [" << file_name << "]" << endl; + return EXIT_SUCCESS; + } + { + fast5::File f; + // + // All fast5 operations are performed inside a try-catch block. This should + // resist various hdf5 errors without leaking memory. + // + try + { + // + // open file + // + f.open(file_name); + assert(f.is_open()); + // + // extract version information for the ONT software used to generate this dataset + // + cout << "file_version=" << f.file_version() << endl; + // + // inspect channel_id params + // + if (f.have_channel_id_params()) + { + auto channel_id_params = f.get_channel_id_params(); + cout << "channel_id/channel_number=" << channel_id_params.channel_number << endl + << "channel_id/digitisation=" << channel_id_params.digitisation << endl + << "channel_id/offset=" << channel_id_params.offset << endl + << "channel_id/range=" << channel_id_params.range << endl + << "channel_id/sampling_rate=" << channel_id_params.sampling_rate << endl; + } + // + // inspect tracking_id params + // + if (f.have_tracking_id_params()) + { + auto tracking_id_params = f.get_tracking_id_params(); + print_map(cout, tracking_id_params, "tracking_id/"); + } + // + // inspect sequences params + // + if (f.have_sequences_params()) + { + auto sequences_params = f.get_sequences_params(); + print_map(cout, sequences_params, "sequences/"); + } + // + // inspect raw samples + // + if (f.have_raw_samples()) + { + auto rs_params = f.get_raw_samples_params(); + auto rs = f.get_raw_samples(); + cout << "raw_samples/read_id=" << rs_params.read_id << endl + << "raw_samples/read_number=" << rs_params.read_number << endl + << "raw_samples/start_mux=" << rs_params.start_mux << endl + << "raw_samples/start_time=" << rs_params.start_time << endl + << "raw_samples/duration=" << rs_params.duration << endl + << "raw_samples/size=" << rs.size() << endl; + const auto& e = rs.front(); + cout << " (" << e << ")" << endl; + } + // + // inspect eventdetection events + // + cout << "eventdetection_group_list="; + print_vector(cout, f.get_eventdetection_group_list(), ","); + cout << endl; + if (f.have_eventdetection_events()) + { + auto ed_params = f.get_eventdetection_params(); + print_map(cout, ed_params, "eventdetection/"); + auto ed_ev_params = f.get_eventdetection_event_params(); + auto ed_ev = f.get_eventdetection_events(); + cout << "eventdetection/events/abasic_found=" << ed_ev_params.abasic_found << endl + << "eventdetection/events/duration=" << ed_ev_params.duration << endl + << "eventdetection/events/median_before=" << ed_ev_params.median_before << endl + << "eventdetection/events/read_id=" << ed_ev_params.read_id << endl + << "eventdetection/events/read_number=" << ed_ev_params.read_number << endl + << "eventdetection/events/scaling_used=" << ed_ev_params.scaling_used << endl + << "eventdetection/events/start_mux=" << ed_ev_params.start_mux << endl + << "eventdetection/events/start_time=" << ed_ev_params.start_time << endl + << "eventdetection/events/size=" << ed_ev.size() << endl; + const auto& e = ed_ev.front(); + cout << " (mean=" << e.mean + << ", stdv=" << e.stdv + << ", start=" << e.start + << ", length=" << e.length << ")" << endl; + } // if have_eventdetection_events + // + // inspect basecall groups + // + for (unsigned st = 0; st < 3; ++st) + { + cout << "basecall(" << st << ")/group_list="; + print_vector(cout, f.get_basecall_strand_group_list(st), ","); + cout << endl; + // basecall sequence + if (f.have_basecall_seq(st)) + { + cout << "basecall(" << st << ")/seq_size=" << f.get_basecall_seq(st).size() << endl; + } + // basecall model + if (f.have_basecall_model(st)) + { + cout << "basecall(" << st << ")/model_file=" << f.get_basecall_model_file(st) << endl; + auto m_params = f.get_basecall_model_params(st); + auto m = f.get_basecall_model(st); + cout << "basecall(" << st << ")/model/scale=" << m_params.scale << endl + << "basecall(" << st << ")/model/shift=" << m_params.shift << endl + << "basecall(" << st << ")/model/drift=" << m_params.drift << endl + << "basecall(" << st << ")/model/var=" << m_params.var << endl + << "basecall(" << st << ")/model/scale_sd=" << m_params.scale_sd << endl + << "basecall(" << st << ")/model/var_sd=" << m_params.var_sd << endl + << "basecall(" << st << ")/model/size=" << m.size() << endl; + const auto& e = m.front(); + cout << " (kmer=" << e.get_kmer() + << ", level_mean=" << e.level_mean + << ", level_stdv=" << e.level_stdv << ")" << endl; + } + // basecall events + if (f.have_basecall_events(st)) + { + auto ev = f.get_basecall_events(st); + cout << "basecall(" << st << ")/events/size=" << ev.size() << endl; + const auto& e = ev.front(); + cout << " (mean=" << e.mean + << ", stdv=" << e.stdv + << ", start=" << e.start + << ", length=" << e.length + << ", model_state=" << e.get_model_state() + << ", p_model_state=" << e.p_model_state + << ", move=" << e.move << ")" << endl; + } + // basecall event alignment + if (st == 2 and f.have_basecall_event_alignment()) + { + auto al = f.get_basecall_event_alignment(); + cout << "basecall(2)/event_alignment/size=" << al.size() << endl; + const auto& e = al.front(); + cout << " (template_index=" << e.template_index + << ", complement_index=" << e.complement_index + << ", kmer=" << e.get_kmer() << ")" << endl; + } + } // for st + } + catch (hdf5_tools::Exception& e) + { + cout << "hdf5 error: " << e.what() << endl; + } + // + // fast5 file is closed by its destructor at the end of this scope + // + } + assert(fast5::File::get_object_count() == 0); +} diff -Nru fast5-0~20150918/src/f5dump-full.cpp fast5-0.5.6/src/f5dump-full.cpp --- fast5-0~20150918/src/f5dump-full.cpp 1970-01-01 00:00:00.000000000 +0000 +++ fast5-0.5.6/src/f5dump-full.cpp 2016-07-28 20:14:13.000000000 +0000 @@ -0,0 +1,273 @@ +#include +#include +#include + +#include "fast5.hpp" + +using namespace std; + +template < typename T > +void print_vector(ostream& os, const vector< T >& v, const string& delim) +{ + for (auto it = v.begin(); it != v.end(); ++it) + { + if (it != v.begin()) os << delim; + os << *it; + } +} +template < typename U, typename V > +void print_map(ostream& os, const map< U, V >& m, const string& prefix) +{ + for (const auto& p : m) + { + os << prefix << p.first << "=" << p.second << endl; + } +} + +int main(int argc, char* argv[]) +{ + if (argc != 2) + { + cerr << "use: " << argv[0] << " " << endl; + return EXIT_FAILURE; + } + string file_name(argv[1]); + // + // open the FAST5 file for reading + // + if (not fast5::File::is_valid_file(file_name)) + { + cout << "not a fast5 file [" << file_name << "]" << endl; + return EXIT_SUCCESS; + } + { + fast5::File f; + // + // All fast5 operations are performed inside a try-catch block. This should + // resist various hdf5 errors without leaking memory. + // + try + { + // + // open file + // + f.open(file_name); + assert(f.is_open()); + // + // extract version information for the ONT software used to generate this dataset + // + cout << "file_version=" << f.file_version() << endl; + // + // inspect channel_id params + // + bool have_channel_id_params = f.have_channel_id_params(); + cout << "have_channel_id_params=" << have_channel_id_params << endl; + if (have_channel_id_params) + { + auto channel_id_params = f.get_channel_id_params(); + cout << "channel_id/channel_number=" << channel_id_params.channel_number << endl + << "channel_id/digitisation=" << channel_id_params.digitisation << endl + << "channel_id/offset=" << channel_id_params.offset << endl + << "channel_id/range=" << channel_id_params.range << endl + << "channel_id/sampling_rate=" << channel_id_params.sampling_rate << endl; + } + // + // inspect tracking_id params + // + bool have_tracking_id_params = f.have_tracking_id_params(); + cout << "have_tracking_id_params=" << have_tracking_id_params << endl; + if (have_tracking_id_params) + { + auto tracking_id_params = f.get_tracking_id_params(); + print_map(cout, tracking_id_params, "tracking_id/"); + } + // + // inspect sequences params + // + bool have_sequences_params = f.have_sequences_params(); + cout << "have_sequences_params=" << have_sequences_params << endl; + if (have_sequences_params) + { + auto sequences_params = f.get_sequences_params(); + print_map(cout, sequences_params, "sequences/"); + } + // + // inspect raw samples + // + bool have_raw_samples = f.have_raw_samples(); + cout << "have_raw_samples=" << have_raw_samples << endl; + if (have_raw_samples) + { + auto rs_rn_list = f.get_raw_samples_read_name_list(); + cout << "raw_samples_read_name_list="; + print_vector(cout, rs_rn_list, ","); + cout << endl; + for (const auto& rn : rs_rn_list) + { + auto rs_params = f.get_raw_samples_params(); + auto rs = f.get_raw_samples(); + cout << "raw_samples/" << rn << "/read_id=" << rs_params.read_id << endl + << "raw_samples/" << rn << "/read_number=" << rs_params.read_number << endl + << "raw_samples/" << rn << "/start_mux=" << rs_params.start_mux << endl + << "raw_samples/" << rn << "/start_time=" << rs_params.start_time << endl + << "raw_samples/" << rn << "/duration=" << rs_params.duration << endl + << "raw_samples/" << rn << "/size=" << rs.size() << endl; + const auto& e = rs.front(); + cout << " (" << e << ")" << endl; + } + } + // + // inspect eventdetection groups + // + bool have_eventdetection_events = f.have_eventdetection_events(); + cout << "have_eventdetection_events=" << have_eventdetection_events << endl; + bool have_eventdetection_groups = f.have_eventdetection_groups(); + cout << "have_eventdetection_groups=" << have_eventdetection_groups << endl; + if (have_eventdetection_groups) + { + auto ed_gr_list = f.get_eventdetection_group_list(); + cout << "eventdetection_group_list="; + print_vector(cout, ed_gr_list, ","); + cout << endl; + for (const auto& ed_gr : ed_gr_list) + { + auto ed_params = f.get_eventdetection_params(ed_gr); + print_map(cout, ed_params, "eventdetection/"); + auto rn_list = f.get_eventdetection_read_name_list(ed_gr); + cout << "eventdetection/" << ed_gr << "/read_name_list="; + print_vector(cout, rn_list, ","); + cout << endl; + have_eventdetection_events = f.have_eventdetection_events(ed_gr); + cout << "eventdetection/" << ed_gr << "/have_eventdetection_events=" << have_eventdetection_events << endl; + for (const auto& rn : rn_list) + { + std::ostringstream tmp; + tmp << "eventdetection/" << ed_gr << "/" << rn; + auto ed_ev_params = f.get_eventdetection_event_params(ed_gr, rn); + auto ed_ev = f.get_eventdetection_events(ed_gr, rn); + cout << tmp.str() << "/abasic_found=" << ed_ev_params.abasic_found << endl + << tmp.str() << "/duration=" << ed_ev_params.duration << endl + << tmp.str() << "/median_before=" << ed_ev_params.median_before << endl + << tmp.str() << "/read_id=" << ed_ev_params.read_id << endl + << tmp.str() << "/read_number=" << ed_ev_params.read_number << endl + << tmp.str() << "/scaling_used=" << ed_ev_params.scaling_used << endl + << tmp.str() << "/start_mux=" << ed_ev_params.start_mux << endl + << tmp.str() << "/start_time=" << ed_ev_params.start_time << endl + << tmp.str() << "/size=" << ed_ev.size() << endl; + for (const auto& e : ed_ev) + { + cout << " (mean=" << e.mean + << ", stdv=" << e.stdv + << ", start=" << e.start + << ", length=" << e.length << ")" << endl; + break; + } + } // for rn : rn_list + } // for ed_gr : ed_gr_list + } // if have_eventdetection_groups + // + // inspect basecall groups + // + bool have_basecall_groups = f.have_basecall_groups(); + cout << "have_basecall_groups=" << have_basecall_groups << endl; + if (have_basecall_groups) + { + auto bc_gr_list = f.get_basecall_group_list(); + cout << "basecall_group_list="; + print_vector(cout, bc_gr_list, ","); + cout << endl; + for (unsigned st = 0; st < 3; ++st) + { + auto bc_st_gr_list = f.get_basecall_strand_group_list(st); + cout << "basecall_strand_group_list(" << st << ")="; + print_vector(cout, bc_st_gr_list, ","); + cout << endl; + } + for (const auto& bc_gr : bc_gr_list) + { + // dump basecall params + auto bc_params = f.get_basecall_params(bc_gr); + std::ostringstream tmp; + tmp << "basecall/" << bc_gr << "/"; + print_map(cout, bc_params, tmp.str()); + // check if basecall log exists + cout << "basecall/" << bc_gr << "/have_log=" << f.have_basecall_log(bc_gr) << endl; + } + for (unsigned st = 0; st < 3; ++st) + { + bool have_seq = f.have_basecall_seq(st); + cout << "basecall(" << st << ")/have_seq=" << have_seq << endl; + if (have_seq) + { + cout << "basecall(" << st << ")/seq=" << f.get_basecall_seq(st).substr(0, 10) << "..." << endl; + } + bool have_model = f.have_basecall_model(st); + cout << "basecall(" << st << ")/have_model=" << have_model << endl; + if (have_model) + { + cout << "basecall(" << st << ")/model_file=" << f.get_basecall_model_file(st) << endl; + auto m_params = f.get_basecall_model_params(st); + auto m = f.get_basecall_model(st); + cout << "basecall(" << st << ")/model/scale=" << m_params.scale << endl + << "basecall(" << st << ")/model/shift=" << m_params.shift << endl + << "basecall(" << st << ")/model/drift=" << m_params.drift << endl + << "basecall(" << st << ")/model/var=" << m_params.var << endl + << "basecall(" << st << ")/model/scale_sd=" << m_params.scale_sd << endl + << "basecall(" << st << ")/model/var_sd=" << m_params.var_sd << endl + << "basecall(" << st << ")/model/size=" << m.size() << endl; + for (const auto& e : m) + { + cout << " (kmer=" << e.get_kmer() + << ", level_mean=" << e.level_mean + << ", level_stdv=" << e.level_stdv << ")" << endl; + break; + } + } + bool have_events = f.have_basecall_events(st); + cout << "basecall(" << st << ")/have_events=" << have_events << endl; + if (have_events) + { + auto ev = f.get_basecall_events(st); + cout << "basecall(" << st << ")/events/size=" << ev.size() << endl; + for (const auto& e : ev) + { + cout << " (mean=" << e.mean + << ", stdv=" << e.stdv + << ", start=" << e.start + << ", length=" << e.length + << ", model_state=" << e.get_model_state() + << ", p_model_state=" << e.p_model_state + << ", move=" << e.move << ")" << endl; + break; + } + } + if (st == 2) + { + bool have_event_alignment = f.have_basecall_event_alignment(); + cout << "basecall(2)/have_event_alignment=" << have_event_alignment << endl; + if (have_event_alignment) + { + auto al = f.get_basecall_event_alignment(); + cout << "basecall(2)/event_alignment/size=" << al.size() << endl; + for (const auto& e : al) + { + cout << " (template_index=" << e.template_index + << ", complement_index=" << e.complement_index + << ", kmer=" << e.get_kmer() << ")" << endl; + break; + } + } + } + } + } // have_basecall_groups + } + catch (hdf5_tools::Exception& e) + { + cout << "hdf5 error: " << e.what() << endl; + } + // + // fast5 file is closed by its destructor at the end of this scope + // + } + assert(fast5::File::get_object_count() == 0); +} diff -Nru fast5-0~20150918/src/f5-mod.cpp fast5-0.5.6/src/f5-mod.cpp --- fast5-0~20150918/src/f5-mod.cpp 1970-01-01 00:00:00.000000000 +0000 +++ fast5-0.5.6/src/f5-mod.cpp 2016-07-28 20:14:13.000000000 +0000 @@ -0,0 +1,92 @@ +#include +#include +#include + +#include "fast5.hpp" + +using namespace std; + +int main(int argc, char* argv[]) +{ + if (argc != 2) + { + cerr << "use: " << argv[0] << " " << endl; + return EXIT_FAILURE; + } + string file_name(argv[1]); + { + fast5::File f; + // + // All fast5 operations are performed inside a try-catch block. This should + // resist various hdf5 errors without leaking memory. + // + try + { + // + // open file in rw mode + // + f.open(file_name, true); + assert(f.is_open()); + assert(f.is_rw()); + // + // find next available basecall group with given prefix + // + string test_bc_grp_prefix = "Test_"; + auto bc_grp_l = f.get_basecall_group_list(); + set< string > test_bc_grp_suffix_s; + for (const auto& bc_grp : bc_grp_l) + { + if (bc_grp.compare(0, test_bc_grp_prefix.size(), test_bc_grp_prefix) == 0) + { + cerr << "found group: " << test_bc_grp_prefix + bc_grp.substr(test_bc_grp_prefix.size()) << endl; + } + test_bc_grp_suffix_s.insert(bc_grp.substr(test_bc_grp_prefix.size())); + } + string test_bc_grp_suffix; + for (unsigned i = 0; i < 1000; ++i) + { + ostringstream os; + os << setw(3) << setfill('0') << i; + if (test_bc_grp_suffix_s.count(os.str()) == 0) + { + test_bc_grp_suffix = os.str(); + break; + } + } + assert(not test_bc_grp_suffix.empty()); + clog << "using group: " << test_bc_grp_prefix + test_bc_grp_suffix << endl; + // + // add basecall seq + // + f.add_basecall_seq(0, test_bc_grp_prefix + test_bc_grp_suffix, "test_name", "ACGT"); + // + // add basecall events + // + vector< fast5::Event_Entry > ev(3, {55.0, 1.0, 0.05, 0.01, .5, .5, .7, .1, .1, .1, 0, + array< char, 8 >{"ACGTA"}, array< char, 8 >{"CGTAC"}}); + f.add_basecall_events(0, test_bc_grp_prefix + test_bc_grp_suffix, ev); + // + // add basecall pore model + // + vector< fast5::Model_Entry > mod(3, {0, 56.0, 1.0, 42.0, 1.0, 5.0, array< char, 8 >{"ACGTA"}}); + f.add_basecall_model(0, test_bc_grp_prefix + test_bc_grp_suffix, mod); + // + // add basecall pore model params + // + fast5::Model_Parameters params{1.0, 0.0, 0.0, 1.0, .9, .9}; + f.add_basecall_model_params(0, test_bc_grp_prefix + test_bc_grp_suffix, params); + // + // add basecall model file + // + f.add_basecall_model_file(0, test_bc_grp_prefix + test_bc_grp_suffix, "/dev/null"); + } + catch (hdf5_tools::Exception& e) + { + cout << "hdf5 error: " << e.what() << endl; + } + // + // fast5 file is closed by its destructor at the end of this scope + // + } + assert(hdf5_tools::File::get_object_count() == 0); +} diff -Nru fast5-0~20150918/src/fast5.hpp fast5-0.5.6/src/fast5.hpp --- fast5-0~20150918/src/fast5.hpp 2015-09-18 17:12:49.000000000 +0000 +++ fast5-0.5.6/src/fast5.hpp 2016-07-28 20:14:13.000000000 +0000 @@ -1,20 +1,84 @@ #ifndef __FAST5_HPP #define __FAST5_HPP +#include #include +#include #include #include #include #include #include #include +#include +#include +#include #include "hdf5_tools.hpp" #define MAX_K_LEN 8 +namespace +{ + inline static std::string array_to_string(const std::array< char, MAX_K_LEN >& a) + { + return std::string(a.begin(), std::find(a.begin(), a.end(), '\0')); + } +} + namespace fast5 { +struct Channel_Id_Parameters +{ + std::string channel_number; + double digitisation; + double offset; + double range; + double sampling_rate; +}; // struct Channel_Id_Parameters + +typedef std::map< std::string, std::string > Tracking_Id_Parameters; + +typedef std::map< std::string, std::string > Sequences_Parameters; + +typedef float Raw_Samples_Entry; + +struct Raw_Samples_Parameters +{ + std::string read_id; + long long read_number; + long long start_mux; + long long start_time; + long long duration; +}; // struct Raw_Samples_Parameters + +struct EventDetection_Event_Entry +{ + double mean; + double stdv; + long long start; + long long length; + friend bool operator == (const EventDetection_Event_Entry& lhs, const EventDetection_Event_Entry& rhs) + { + return lhs.mean == rhs.mean + and lhs.stdv == rhs.stdv + and lhs.start == rhs.start + and lhs.length == rhs.length; + } +}; // struct EventDetection_Event + +struct EventDetection_Event_Parameters +{ + std::string read_id; + long long read_number; + long long scaling_used; + long long start_mux; + long long start_time; + long long duration; + double median_before; + unsigned abasic_found; +}; // struct EventDetection_Event_Parameters + // // This struct represents the expected signal measured // given the kmer sequence that is in the pore when the @@ -24,13 +88,24 @@ // struct Model_Entry { - char kmer[MAX_K_LEN]; long long variant; double level_mean; double level_stdv; double sd_mean; double sd_stdv; double weight; + std::array< char, MAX_K_LEN > kmer; + std::string get_kmer() const { return array_to_string(kmer); } + friend bool operator == (const Model_Entry& lhs, const Model_Entry& rhs) + { + return lhs.variant == rhs.variant + and lhs.level_mean == rhs.level_mean + and lhs.level_stdv == rhs.level_stdv + and lhs.sd_mean == rhs.sd_mean + and lhs.sd_stdv == rhs.sd_stdv + and lhs.weight == rhs.weight + and lhs.kmer == rhs.kmer; + } }; // struct Model_Entry // @@ -39,35 +114,52 @@ // struct Model_Parameters { - double drift; double scale; - double scale_sd; double shift; + double drift; double var; + double scale_sd; double var_sd; }; // struct Model_Parameters // // This struct represents an observed event. -// The members of the struct are the same as +// The members of the struct are the same as // the fields encoded in the FAST5 file. // struct Event_Entry { double mean; - double start; double stdv; + double start; double length; - char model_state[MAX_K_LEN]; - double model_level; - long long move; double p_model_state; - char mp_state[MAX_K_LEN]; double p_mp_state; double p_A; double p_C; double p_G; double p_T; + long long move; + std::array< char, MAX_K_LEN > model_state; + std::array< char, MAX_K_LEN > mp_state; + std::string get_model_state() const { return array_to_string(model_state); } + std::string get_mp_state() const { return array_to_string(mp_state); } + friend bool operator == (const Event_Entry& lhs, const Event_Entry& rhs) + { + return lhs.mean == rhs.mean + and lhs.stdv == rhs.stdv + and lhs.start == rhs.start + and lhs.length == rhs.length + and lhs.p_model_state == rhs.p_model_state + and lhs.p_mp_state == rhs.p_mp_state + and lhs.p_A == rhs.p_A + and lhs.p_C == rhs.p_C + and lhs.p_G == rhs.p_G + and lhs.p_T == rhs.p_T + and lhs.move == rhs.move + and lhs.model_state == rhs.model_state + and lhs.mp_state == rhs.mp_state; + } }; // struct Event_Entry // @@ -78,234 +170,772 @@ { long long template_index; long long complement_index; - char kmer[MAX_K_LEN]; + std::array< char, MAX_K_LEN > kmer; + std::string get_kmer() const { return array_to_string(kmer); } + friend bool operator == (const Event_Alignment_Entry& lhs, const Event_Alignment_Entry& rhs) + { + return lhs.template_index == rhs.template_index + and lhs.complement_index == rhs.complement_index + and lhs.kmer == rhs.kmer; + } }; // struct Event_Alignment_Entry + class File - : private hdf5_tools::File_Reader + : private hdf5_tools::File { private: - typedef hdf5_tools::File_Reader Base; + typedef hdf5_tools::File Base; public: - using Base::Base; - - using Base::is_open; - using Base::file_name; - using Base::open; - using Base::close; + //using Base::is_open; + //using Base::is_rw; + //using Base::file_name; + //using Base::create; + //using Base::close; + using Base::get_object_count; + using Base::is_valid_file; + //using Base::write; + + File() = default; + File(const std::string& file_name, bool rw = false) { open(file_name, rw); } + + bool is_open() const { return static_cast< const Base* >(this)->is_open(); } + bool is_rw() const { return static_cast< const Base* >(this)->is_rw(); } + const std::string& file_name() const { return static_cast< const Base* >(this)->file_name(); } + void create(const std::string& file_name, bool truncate = false) { static_cast< Base* >(this)->create(file_name, truncate); } + void close() { static_cast< Base* >(this)->close(); } - std::string file_version() const + void open(const std::string& file_name, bool rw = false) { - double v; - assert(Base::exists("/file_version")); - Base::read< double >("/file_version", v); - // convert it to string - std::ostringstream os; - os << v; - return os.str(); + Base::open(file_name, rw); + if (is_open()) + { + // detect raw samples read name + detect_raw_samples_read_name_list(); + // detect eventdetection groups + detect_eventdetection_group_list(); + // detect basecall groups + detect_basecall_group_list(); + } } - std::string basecall_version() const + /** + * Extract "/file_version" attribute. This must exist. + */ + std::string file_version() const { std::string res; - std::string path = get_bc_2d_root() + "/version"; - assert(Base::exists(path)); - Base::read< std::string >(path, res); + assert(Base::exists(file_version_path())); + Base::read(file_version_path(), res); return res; } - std::string eventdetection_version() const - { - std::string res; - // only support eventdetection group 000 for now - std::string path = "/Analyses/EventDetection_000/version"; - assert(Base::exists(path)); - Base::read< std::string >(path, res); + /** + * Check if "/UniqueGlobalKey/channel_id" attributes exist. + */ + bool have_channel_id_params() const + { + return Base::group_exists(channel_id_path()); + } + /** + * Extract "/UniqueGlobalKey/channel_id" attributes. + */ + Channel_Id_Parameters get_channel_id_params() const + { + Channel_Id_Parameters res; + Base::read(channel_id_path() + "/channel_number", res.channel_number); + Base::read(channel_id_path() + "/digitisation", res.digitisation); + Base::read(channel_id_path() + "/offset", res.offset); + Base::read(channel_id_path() + "/range", res.range); + Base::read(channel_id_path() + "/sampling_rate", res.sampling_rate); return res; } - - std::string get_log() const + /** + * Check if sampling rate exists. + */ + bool have_sampling_rate() const { - std::string res; - std::string path = get_bc_2d_root() + "/Log"; - assert(Base::exists(path)); - Base::read< std::string >(path, res); - return res; + return have_channel_id_params(); } - + /** + * Get sampling rate. + */ double get_sampling_rate() const { - assert(have_sampling_rate()); - - auto lg = get_log(); - auto idx = lg.find("Sampling rate is"); - - std::string line; - std::stringstream ss1(lg.substr(idx)); - std::getline(ss1,line,'\n'); - - std::stringstream ss2(line); - - std::string token; - std::getline(ss2,token,' '); //Sampling - std::getline(ss2,token,' '); //rate - std::getline(ss2,token,' '); //is - std::getline(ss2,token,' '); //Hz value + auto channel_id_params = get_channel_id_params(); + return channel_id_params.sampling_rate; + } - return std::atof(token.c_str()); + /** + * Check if "/UniqueGlobalKey/tracking_id" attributes exist. + */ + bool have_tracking_id_params() const + { + return Base::group_exists(tracking_id_path()); + } + /** + * Extract "/UniqueGlobalKey/tracking_id" attributes. + */ + Tracking_Id_Parameters get_tracking_id_params() const + { + return get_attr_map(tracking_id_path()); } - bool have_sampling_rate() const + /** + * Check if sequences attributes exists. + */ + bool have_sequences_params() const + { + return Base::group_exists(sequences_path()); + } + /** + * Get sequences attributes. + */ + Sequences_Parameters get_sequences_params() const { - auto lg = get_log(); - auto idx = lg.find("Sampling rate is"); - return idx != std::string::npos; + return get_attr_map(sequences_path()); } - std::string get_model_file(size_t i) const + /** + * Get list of raw samples read names. + */ + const std::vector< std::string >& get_raw_samples_read_name_list() const { - std::string res; - assert(Base::exists(model_file_path(i))); - Base::read< std::string >(model_file_path(i), res); + return _raw_samples_read_name_list; + } + /** + * Check if raw samples exist. + */ + bool have_raw_samples() const + { + return have_channel_id_params() and not get_raw_samples_read_name_list().empty(); + } + /** + * Get raw samples attributes for given read name (default: first read name). + */ + Raw_Samples_Parameters get_raw_samples_params(const std::string& _rn = std::string()) const + { + Raw_Samples_Parameters res; + const std::string& rn = not _rn.empty()? _rn : get_raw_samples_read_name_list().front(); + std::string p = raw_samples_params_path(rn); + Base::read(p + "/read_id", res.read_id); + Base::read(p + "/read_number", res.read_number); + Base::read(p + "/start_mux", res.start_mux); + Base::read(p + "/start_time", res.start_time); + Base::read(p + "/duration", res.duration); + return res; + } + /** + * Get raw samples for given read name (default: first read name). + */ + std::vector< Raw_Samples_Entry > get_raw_samples(const std::string& _rn = std::string()) const + { + // get raw samples + std::vector< uint16_t > raw_samples; + const std::string& rn = not _rn.empty()? _rn : get_raw_samples_read_name_list().front(); + Base::read(raw_samples_path(rn), raw_samples); + // get scaling parameters + auto channel_id_params = get_channel_id_params(); + // decode levels + std::vector< Raw_Samples_Entry > res; + res.reserve(raw_samples.size()); + for (auto int_level : raw_samples) + { + res.push_back((static_cast< float >(int_level) + channel_id_params.offset) + * channel_id_params.range / channel_id_params.digitisation); + } return res; } - std::string sequences_version() const + /** + * Get list of EventDetection groups. + */ + const std::vector< std::string >& get_eventdetection_group_list() const + { + return _eventdetection_group_list; + } + /** + * Check if any EventDetection groups exist. + */ + bool have_eventdetection_groups() const + { + return not get_eventdetection_group_list().empty(); + } + /** + * Get list of reads for given EventDetection group (default: first EventDetection group). + */ + std::vector< std::string > get_eventdetection_read_name_list(const std::string& _ed_gr = std::string()) const + { + const std::string& ed_gr = not _ed_gr.empty()? _ed_gr : get_eventdetection_group_list().front(); + return detect_eventdetection_read_name_list(ed_gr); + } + /** + * Check if EventDetection events exist for given EventDetection group (default: first EventDetection group). + */ + bool have_eventdetection_events(const std::string& _ed_gr = std::string()) const { - std::vector< std::string > tmp; - assert(Base::exists("/Sequences/Meta/version")); - Base::read< std::string >("/Sequences/Meta/version", tmp); - std::string res; - for (const auto& s: tmp) + std::string ed_gr; + if (_ed_gr.empty()) { - res += s; + auto ed_gr_l = get_eventdetection_group_list(); + if (ed_gr_l.empty()) return false; + ed_gr = ed_gr_l.front(); + } + else + { + ed_gr = _ed_gr; + } + return not get_eventdetection_read_name_list(ed_gr).empty(); + } + /** + * Get EventDetection params for given EventDetection group (default: first EventDetection group). + */ + std::map< std::string, std::string > get_eventdetection_params(const std::string& _ed_gr = std::string()) const + { + const std::string& ed_gr = not _ed_gr.empty()? _ed_gr : get_eventdetection_group_list().front(); + return get_attr_map(eventdetection_params_path(ed_gr)); + } + /** + * Get EventDetection event params for given EventDetection group, and given read name + * (default: first EventDetection group, and first read name in it). + */ + EventDetection_Event_Parameters get_eventdetection_event_params( + const std::string& _ed_gr = std::string(), const std::string& _rn = std::string()) const + { + EventDetection_Event_Parameters res; + const std::string& ed_gr = not _ed_gr.empty()? _ed_gr : get_eventdetection_group_list().front(); + const std::string rn = not _rn.empty()? _rn : get_eventdetection_read_name_list(ed_gr).front(); + auto p = eventdetection_event_params_path(ed_gr, rn); + auto a_v = Base::get_attr_list(p); + std::set< std::string > a_s(a_v.begin(), a_v.end()); + Base::read(p + "/read_number", res.read_number); + Base::read(p + "/scaling_used", res.scaling_used); + Base::read(p + "/start_mux", res.start_mux); + Base::read(p + "/start_time", res.start_time); + Base::read(p + "/duration", res.duration); + // optional fields + if (a_s.count("read_id")) + { + Base::read(p + "/read_id", res.read_id); + } + if (a_s.count("median_before")) + { + Base::read(p + "/median_before", res.median_before); + } + else + { + res.median_before = -1; + } + if (a_s.count("abasic_found")) + { + Base::read(p + "/abasic_found", res.abasic_found); + } + else + { + res.abasic_found = 0; } return res; } + /** + * Get EventDetection events for given EventDetection group, and given read name. + */ + std::vector< EventDetection_Event_Entry > get_eventdetection_events( + const std::string& _ed_gr = std::string(), const std::string& _rn = std::string()) const + { + std::vector< EventDetection_Event_Entry > res; + const std::string& ed_gr = not _ed_gr.empty()? _ed_gr : get_eventdetection_group_list().front(); + const std::string rn = not _rn.empty()? _rn : get_eventdetection_read_name_list(ed_gr).front(); + auto p = eventdetection_events_path(ed_gr, rn); + auto struct_member_names = Base::get_struct_members(p); + assert(struct_member_names.size() >= 4); + bool have_stdv = false; + bool have_variance = false; + for (const auto& s : struct_member_names) + { + if (s == "stdv") have_stdv = true; + else if (s == "variance") have_variance = true; + } + hdf5_tools::Compound_Map m; + m.add_member("mean", &EventDetection_Event_Entry::mean); + m.add_member("start", &EventDetection_Event_Entry::start); + m.add_member("length", &EventDetection_Event_Entry::length); + if (have_stdv) + { + m.add_member("stdv", &EventDetection_Event_Entry::stdv); + } + else if (have_variance) + { + m.add_member("variance", &EventDetection_Event_Entry::stdv); + } + else + { + // must have stdv or variance + abort(); + } + Base::read(p, res, m); + if (not have_stdv) + { + // have read variances + for (auto& e : res) + { + e.stdv = std::sqrt(e.stdv); + } + } + return res; + } // get_eventdetection_events() - bool have_basecalled_2D() const + /** + * Get list of all Basecall groups. + */ + const std::vector< std::string >& get_basecall_group_list() const { - return Base::exists(get_bc_2d_root() + "/BaseCalled_2D/Fastq"); + return _basecall_group_list; } - - std::string basecalled_2D() const + /** + * Check if any Basecall groups exist. + */ + bool have_basecall_groups() const + { + return not get_basecall_group_list().empty(); + } + /** + * Get list of Basecall groups for given strand. + */ + const std::vector< std::string >& get_basecall_strand_group_list(unsigned st) const + { + return _basecall_strand_group_list[st]; + } + /** + * Check if any Basecall groups exist for given strand. + */ + bool have_basecall_strand_groups(unsigned st) const + { + return not get_basecall_strand_group_list(st).empty(); + } + /** + * Get Basecall group params for given Basecall group. + */ + std::map< std::string, std::string > get_basecall_params(const std::string& bc_gr) const + { + return get_attr_map(basecall_root_path() + "/" + basecall_group_prefix() + bc_gr); + } + /** + * Check if Basecall log exists for given Basecall group. + */ + bool have_basecall_log(const std::string& bc_gr) const + { + std::string path = basecall_root_path() + "/" + basecall_group_prefix() + bc_gr + "/Log"; + return Base::exists(path); + } + /** + * Get Basecall log for given Basecall group. + */ + std::string get_basecall_log(const std::string& bc_gr) const { std::string res; - Base::read< std::string >(get_bc_2d_root() + "/BaseCalled_2D/Fastq", res); - - // Split the FASTQ record on newlines - size_t nl1 = res.find_first_of('\n'); - size_t nl2 = res.find_first_of('\n', nl1 + 1); - - if(nl1 == std::string::npos || nl2 == std::string::npos) - return ""; - else - return res.substr(nl1 + 1, nl2 - nl1 - 1); + std::string path = basecall_root_path() + "/" + basecall_group_prefix() + bc_gr + "/Log"; + Base::read(path, res); + return res; } - - std::vector< Event_Alignment_Entry > get_event_alignments() const + /** + * Check if Basecall fastq exists for given Basecall group and given strand. + */ + bool have_basecall_fastq(unsigned st, const std::string& _bc_gr = std::string()) const + { + if (_bc_gr.empty() and get_basecall_strand_group_list(st).empty()) return false; + const std::string& bc_gr = not _bc_gr.empty()? _bc_gr : get_basecall_strand_group_list(st).front(); + return Base::dataset_exists(basecall_fastq_path(bc_gr, st)); + } + /** + * Get Basecall fastq for given Basecall group and given strand. + */ + std::string get_basecall_fastq(unsigned st, const std::string& _bc_gr = std::string()) const { - std::vector< Event_Alignment_Entry > res; - hdf5_tools::Compound_Map m; - m.add_member("template", &Event_Alignment_Entry::template_index); - m.add_member("complement", &Event_Alignment_Entry::complement_index); - m.add_member("kmer", &Event_Alignment_Entry::kmer); - Base::read< Event_Alignment_Entry >(get_bc_2d_root() + "/BaseCalled_2D/Alignment", res, &m); + std::string res; + const std::string& bc_gr = not _bc_gr.empty()? _bc_gr : get_basecall_strand_group_list(st).front(); + Base::read(basecall_fastq_path(bc_gr, st), res); return res; } - - bool have_model(size_t i) const + /** + * Add Basecall fastq + */ + void add_basecall_fastq(unsigned st, const std::string& bc_gr, const std::string& fq) const + { + Base::write(basecall_fastq_path(bc_gr, st), true, fq); + } + /** + * Check if Basecall seq exists for given Basecall group and given strand. + */ + bool have_basecall_seq(unsigned st, const std::string& _bc_gr = std::string()) const + { + return have_basecall_fastq(st, _bc_gr); + } + /** + * Get Basecall sequence for given Basecall group and given strand. + */ + std::string get_basecall_seq(unsigned st, const std::string& _bc_gr = std::string()) const + { + return fq2seq(get_basecall_fastq(st, _bc_gr)); + } + /** + * Add Basecall seq + */ + void add_basecall_seq(unsigned st, const std::string& bc_gr, + const std::string& name, const std::string& seq, int default_qual = 33) const + { + std::ostringstream oss; + oss << '@' << name << std::endl + << seq << std::endl + << '+' << std::endl + << std::string(seq.size(), static_cast< char >(default_qual)); + add_basecall_fastq(st, bc_gr, oss.str()); + } + /** + * Check if Basecall model exist for given Basecall group and given strand. + */ + bool have_basecall_model(unsigned st, const std::string& _bc_gr = std::string()) const + { + if (_bc_gr.empty() and get_basecall_strand_group_list(st).empty()) return false; + const std::string& bc_gr = not _bc_gr.empty()? _bc_gr : get_basecall_strand_group_list(st).front(); + return Base::dataset_exists(basecall_model_path(bc_gr, st)); + } + /** + * Get Basecall model file name for given Basecall group and given strand. + */ + std::string get_basecall_model_file(unsigned st, const std::string& _bc_gr = std::string()) const { - return Base::exists(model_path(i)); + std::string res; + const std::string& bc_gr = not _bc_gr.empty()? _bc_gr : get_basecall_strand_group_list(st).front(); + assert(Base::exists(basecall_model_file_path(bc_gr, st))); + Base::read(basecall_model_file_path(bc_gr, st), res); + return res; } - bool have_events(size_t i) const + void add_basecall_model_file(unsigned st, const std::string& bc_gr, const std::string& file_name) const { - return Base::exists(events_path(i)); + std::string path = basecall_model_file_path(bc_gr, st); + Base::write(path, false, file_name); } - - std::vector< Model_Entry > get_model(size_t i) const + /** + * Get Basecall model parameters for given Basecall group and given strand. + */ + Model_Parameters get_basecall_model_params(unsigned st, const std::string& _bc_gr = std::string()) const + { + Model_Parameters res; + const std::string& bc_gr = not _bc_gr.empty()? _bc_gr : get_basecall_strand_group_list(st).front(); + std::string path = basecall_model_path(bc_gr, st); + Base::read(path + "/scale", res.scale); + Base::read(path + "/shift", res.shift); + Base::read(path + "/drift", res.drift); + Base::read(path + "/var", res.var); + Base::read(path + "/scale_sd", res.scale_sd); + Base::read(path + "/var_sd", res.var_sd); + return res; + } + template < typename T > + void add_basecall_model_params(unsigned st, const std::string& bc_gr, const T& params) const + { + std::string path = basecall_model_path(bc_gr, st); + Base::write(path + "/scale", false, params.scale); + Base::write(path + "/shift", false, params.shift); + Base::write(path + "/drift", false, params.drift); + Base::write(path + "/var", false, params.var); + Base::write(path + "/scale_sd", false, params.scale_sd); + Base::write(path + "/var_sd", false, params.var_sd); + } + /** + * Get Basecall model for given Basecall group and given strand. + */ + std::vector< Model_Entry > get_basecall_model(unsigned st, const std::string& _bc_gr = std::string()) const { std::vector< Model_Entry > res; + const std::string& bc_gr = not _bc_gr.empty()? _bc_gr : get_basecall_strand_group_list(st).front(); hdf5_tools::Compound_Map m; m.add_member("kmer", &Model_Entry::kmer); m.add_member("level_mean", &Model_Entry::level_mean); m.add_member("level_stdv", &Model_Entry::level_stdv); m.add_member("sd_mean", &Model_Entry::sd_mean); m.add_member("sd_stdv", &Model_Entry::sd_stdv); - Base::read< Model_Entry >(model_path(i), res, &m); + Base::read(basecall_model_path(bc_gr, st), res, m); return res; } - - Model_Parameters get_model_parameters(size_t i) const - { - Model_Parameters res; - std::string path = model_path(i); - Base::read< double >(path + "/drift", res.drift); - Base::read< double >(path + "/scale", res.scale); - Base::read< double >(path + "/scale_sd", res.scale_sd); - Base::read< double >(path + "/shift", res.shift); - Base::read< double >(path + "/var", res.var); - Base::read< double >(path + "/var_sd", res.var_sd); - return res; - } - - std::vector< Event_Entry > get_events(size_t i) const + /** + * Add Basecall model + */ + template < typename T > + void add_basecall_model(unsigned st, const std::string& bc_gr, const std::vector< T >& m) const + { + hdf5_tools::Compound_Map cm; + cm.add_member("kmer", &T::kmer); + cm.add_member("level_mean", &T::level_mean); + cm.add_member("level_stdv", &T::level_stdv); + cm.add_member("sd_mean", &T::sd_mean); + cm.add_member("sd_stdv", &T::sd_stdv); + Base::write(basecall_model_path(bc_gr, st), true, m, cm); + } + /** + * Check if Basecall events exist for given Basecall group and given strand. + */ + bool have_basecall_events(unsigned st, const std::string& _bc_gr = std::string()) const + { + if (_bc_gr.empty() and get_basecall_strand_group_list(st).empty()) return false; + const std::string& bc_gr = not _bc_gr.empty()? _bc_gr : get_basecall_strand_group_list(st).front(); + return Base::dataset_exists(basecall_events_path(bc_gr, st)); + } + /** + * Get Basecall events for given Basecall group and given strand. + */ + std::vector< Event_Entry > get_basecall_events(unsigned st, const std::string& _bc_gr = std::string()) const { std::vector< Event_Entry > res; + const std::string& bc_gr = not _bc_gr.empty()? _bc_gr : get_basecall_strand_group_list(st).front(); hdf5_tools::Compound_Map m; m.add_member("mean", &Event_Entry::mean); m.add_member("start", &Event_Entry::start); m.add_member("stdv", &Event_Entry::stdv); m.add_member("length", &Event_Entry::length); - Base::read< Event_Entry >(events_path(i), res, &m); + m.add_member("p_model_state", &Event_Entry::p_model_state); + m.add_member("model_state", &Event_Entry::model_state); + m.add_member("move", &Event_Entry::move); + Base::read(basecall_events_path(bc_gr, st), res, m); return res; } - - void set_basecalled_group_id(size_t i) + /** + * Add Basecall events + */ + template < typename T > + void add_basecall_events(unsigned st, const std::string& bc_gr, const std::vector< T >& ev) const + { + hdf5_tools::Compound_Map cm; + cm.add_member("mean", &T::mean); + cm.add_member("start", &T::start); + cm.add_member("stdv", &T::stdv); + cm.add_member("length", &T::length); + cm.add_member("p_model_state", &T::p_model_state); + cm.add_member("model_state", &T::model_state); + cm.add_member("move", &T::move); + Base::write(basecall_events_path(bc_gr, st), true, ev, cm); + } + /** + * Check if Basecall event alignment exist for given Basecall group. + */ + bool have_basecall_event_alignment(const std::string& _bc_gr = std::string()) const + { + if (_bc_gr.empty() and get_basecall_strand_group_list(2).empty()) return false; + const std::string& bc_gr = not _bc_gr.empty()? _bc_gr : get_basecall_strand_group_list(2).front(); + return Base::dataset_exists(basecall_event_alignment_path(bc_gr)); + } + /** + * Get Basecall events for given Basecall group. + */ + std::vector< Event_Alignment_Entry > get_basecall_event_alignment(const std::string& _bc_gr = std::string()) const { - assert(i <= 999); - std::stringstream ss; - ss << std::setfill('0') << std::setw(3) << i; - _basecalled_group_id = ss.str(); + std::vector< Event_Alignment_Entry > res; + const std::string& bc_gr = not _bc_gr.empty()? _bc_gr : get_basecall_strand_group_list(2).front(); + hdf5_tools::Compound_Map m; + m.add_member("template", &Event_Alignment_Entry::template_index); + m.add_member("complement", &Event_Alignment_Entry::complement_index); + m.add_member("kmer", &Event_Alignment_Entry::kmer); + Base::read(basecall_event_alignment_path(bc_gr), res, m); + return res; } + static std::string fq2seq(const std::string& fq) + { + size_t nl1_pos = fq.find_first_of('\n'); + if (nl1_pos == std::string::npos) return std::string(); + size_t nl2_pos = fq.find_first_of('\n', nl1_pos + 1); + if (nl2_pos == std::string::npos) return std::string(); + return fq.substr(nl1_pos + 1, nl2_pos - nl1_pos - 1); + } private: - - // Returns the root path of the form: - // Analyses/Basecall_2D_ddd/ where ddd is the group - std::string get_bc_2d_root() const + void detect_raw_samples_read_name_list() + { + if (not Base::group_exists(raw_samples_root_path())) return; + auto rn_list = Base::list_group(raw_samples_root_path()); + for (const auto& rn : rn_list) + { + if (not Base::dataset_exists(raw_samples_path(rn))) continue; + _raw_samples_read_name_list.push_back(rn); + } + } + + void detect_eventdetection_group_list() { - return "/Analyses/Basecall_2D_" + _basecalled_group_id; + if (not Base::group_exists(eventdetection_root_path())) return; + auto g_list = Base::list_group(eventdetection_root_path()); + for (const auto& g : g_list) + { + if (g.size() <= eventdetection_group_prefix().size()) continue; + auto p = std::mismatch(eventdetection_group_prefix().begin(), + eventdetection_group_prefix().end(), + g.begin()); + if (p.first != eventdetection_group_prefix().end()) continue; + _eventdetection_group_list.emplace_back(p.second, g.end()); + } + } + + std::vector< std::string > detect_eventdetection_read_name_list(const std::string& ed_gr) const + { + std::vector< std::string > res; + std::string p = eventdetection_root_path() + "/" + eventdetection_group_prefix() + ed_gr + "/Reads"; + if (not Base::group_exists(p)) return res; + auto rn_list = Base::list_group(p); + for (const auto& rn : rn_list) + { + if (not Base::dataset_exists(p + "/" + rn + "/Events")) continue; + res.push_back(rn); + } + return res; } - std::string model_path(size_t i) const + void detect_basecall_group_list() { - static std::vector< std::string > _model_path = - { "/BaseCalled_template/Model", - "/BaseCalled_complement/Model" }; - return get_bc_2d_root() + _model_path.at(i); + if (not Base::group_exists(basecall_root_path())) return; + auto g_list = Base::list_group(basecall_root_path()); + for (const auto& g : g_list) + { + if (g.size() <= basecall_group_prefix().size()) continue; + auto p = std::mismatch(basecall_group_prefix().begin(), + basecall_group_prefix().end(), + g.begin()); + if (p.first != basecall_group_prefix().end()) continue; + _basecall_group_list.emplace_back(p.second, g.end()); + for (unsigned st = 0; st < 3; ++st) + { + if (Base::group_exists(basecall_root_path() + "/" + g + "/" + basecall_strand_subgroup(st))) + { + _basecall_strand_group_list[st].emplace_back(p.second, g.end()); + } + } + } } - std::string events_path(size_t i) const + std::map< std::string, std::string > get_attr_map(const std::string& path) const { - static std::vector< std::string > _events_path = - { "/BaseCalled_template/Events", - "/BaseCalled_complement/Events" }; - return get_bc_2d_root() + _events_path.at(i); + std::map< std::string, std::string > res; + auto a_list = Base::get_attr_list(path); + for (const auto& a : a_list) + { + std::string tmp; + Base::read(path + "/" + a, tmp); + res[a] = tmp; + } + return res; } - std::string model_file_path(size_t i) const + // list of read names for which we have raw samples + std::vector< std::string > _raw_samples_read_name_list; + + // list of EventDetection groups + std::vector< std::string > _eventdetection_group_list; + + // list of Basecall groups + std::vector< std::string > _basecall_group_list; + + // list of per-strand Basecall groups; 0/1/2 = template/complement/2d + std::array< std::vector< std::string >, 3 > _basecall_strand_group_list; + + // static paths + static const std::string& file_version_path() { - static std::vector< std::string > _model_file_path = - { "/Summary/basecall_1d_template/model_file", - "/Summary/basecall_1d_complement/model_file" }; - return get_bc_2d_root() + _model_file_path.at(i); + static const std::string _file_version_path = "/file_version"; + return _file_version_path; } - // default to using the 000 analysis group - std::string _basecalled_group_id = "000"; + static const std::string& channel_id_path() + { + static const std::string _channel_id_path = "/UniqueGlobalKey/channel_id"; + return _channel_id_path; + } + static const std::string& tracking_id_path() + { + static const std::string _tracking_id_path = "/UniqueGlobalKey/tracking_id"; + return _tracking_id_path; + } + static const std::string& raw_samples_root_path() + { + static const std::string _raw_samples_root_path = "/Raw/Reads"; + return _raw_samples_root_path; + } + static std::string raw_samples_params_path(const std::string& rn) + { + return raw_samples_root_path() + "/" + rn; + } + static std::string raw_samples_path(const std::string& rn) + { + return raw_samples_root_path() + "/" + rn + "/Signal"; + } + static const std::string& sequences_path() + { + static const std::string _sequences_path = "/Sequences/Meta"; + return _sequences_path; + } + static const std::string& eventdetection_root_path() + { + static const std::string _eventdetection_root_path = "/Analyses"; + return _eventdetection_root_path; + } + static const std::string& eventdetection_group_prefix() + { + static const std::string _eventdetection_group_prefix = "EventDetection_"; + return _eventdetection_group_prefix; + } + static std::string eventdetection_params_path(const std::string& ed_gr) + { + return eventdetection_root_path() + "/" + eventdetection_group_prefix() + ed_gr; + } + static std::string eventdetection_event_params_path(const std::string& ed_gr, const std::string& rn) + { + return eventdetection_root_path() + "/" + eventdetection_group_prefix() + ed_gr + "/Reads/" + rn; + } + static std::string eventdetection_events_path(const std::string& ed_gr, const std::string& rn) + { + return eventdetection_root_path() + "/" + eventdetection_group_prefix() + ed_gr + "/Reads/" + rn + "/Events"; + } + static const std::string& basecall_root_path() + { + static const std::string _basecall_root_path = "/Analyses"; + return _basecall_root_path; + } + static const std::string& basecall_group_prefix() + { + static const std::string _basecall_group_prefix = "Basecall_"; + return _basecall_group_prefix; + } + static const std::string& basecall_strand_subgroup(unsigned st) + { + static const std::array< std::string, 3 > _basecall_strand_subgroup = + {{ "BaseCalled_template", "BaseCalled_complement", "BaseCalled_2D" }}; + return _basecall_strand_subgroup[st]; + } + static std::string basecall_fastq_path(const std::string& bc_gr, unsigned st) + { + return basecall_root_path() + "/" + basecall_group_prefix() + bc_gr + "/" + + basecall_strand_subgroup(st) + "/Fastq"; + } + static std::string basecall_model_path(const std::string& bc_gr, unsigned st) + { + return basecall_root_path() + "/" + basecall_group_prefix() + bc_gr + "/" + + basecall_strand_subgroup(st) + "/Model"; + } + static std::string basecall_model_file_path(const std::string& bc_gr, unsigned st) + { + assert(st < 2); + return basecall_root_path() + "/" + basecall_group_prefix() + bc_gr + + "/Summary/basecall_1d_" + (st == 0? "template" : "complement") + "/model_file"; + } + static std::string basecall_events_path(const std::string& bc_gr, unsigned st) + { + return basecall_root_path() + "/" + basecall_group_prefix() + bc_gr + "/" + + basecall_strand_subgroup(st) + "/Events"; + } + static std::string basecall_event_alignment_path(const std::string& bc_gr) + { + return basecall_root_path() + "/" + basecall_group_prefix() + bc_gr + "/" + + basecall_strand_subgroup(2) + "/Alignment"; + } }; // class File } // namespace fast5 diff -Nru fast5-0~20150918/src/.gitignore fast5-0.5.6/src/.gitignore --- fast5-0~20150918/src/.gitignore 1970-01-01 00:00:00.000000000 +0000 +++ fast5-0.5.6/src/.gitignore 2016-07-28 20:14:13.000000000 +0000 @@ -0,0 +1,4 @@ +f5dump +f5dump-full +hdf5-mod +f5-mod diff -Nru fast5-0~20150918/src/hdf5-mod.cpp fast5-0.5.6/src/hdf5-mod.cpp --- fast5-0~20150918/src/hdf5-mod.cpp 1970-01-01 00:00:00.000000000 +0000 +++ fast5-0.5.6/src/hdf5-mod.cpp 2016-07-28 20:14:13.000000000 +0000 @@ -0,0 +1,308 @@ +#include +#include +#include + +#include "hdf5_tools.hpp" + +using namespace std; +using namespace hdf5; + +struct B +{ + int val_1; + array< char, 6 > val_2; + string val_3; + friend ostream & operator << (ostream & os, const B & b) + { + os << "(val_1=" << b.val_1 + << ",val_2=\"" << string(b.val_2.begin(), b.val_2.end()) + << "\",val_3=\"" << b.val_3 << "\")"; + return os; + } +}; + +struct A +{ + int val_1; + int val_1a; + float val_2; + char val_3[6]; + array< char, 6 > val_4; + string val_5; + B val_6; + friend ostream & operator << (ostream & os, const A & a) + { + os << "(val_1=" << a.val_1 + << ",val_1a=" << a.val_1a + << ",val_2=" << a.val_2 + << ",val_3=\"" << a.val_3 + << "\",val_4=\"" << string(a.val_4.begin(), a.val_4.end()) + << "\",val_5=\"" << a.val_5 + << "\",val_6=" << a.val_6 << ")"; + return os; + } +}; + +struct B_string +{ + string val_1; + string val_2; + string val_3; + friend ostream & operator << (ostream & os, const B_string & b) + { + os << "(val_1=\"" << b.val_1 + << "\",val_2=\"" << b.val_2 + << "\",val_3=\"" << b.val_3 << "\")"; + return os; + } +}; +struct A_string +{ + string val_1; + string val_1a; + string val_2; + string val_3; + string val_4; + string val_5; + B_string val_6; + friend ostream & operator << (ostream & os, const A_string & a) + { + os << "(val_1=\"" << a.val_1 + << "\",val_1a=\"" << a.val_1a + << "\",val_2=\"" << a.val_2 + << "\",val_3=\"" << a.val_3 + << "\",val_4=\"" << a.val_4 + << "\",val_5=\"" << a.val_5 + << "\",val_6=" << a.val_6 << ")"; + return os; + } +}; + +struct B_char_array +{ + array< char, 6 > val_2; + array< char, 6 > val_3; + friend ostream & operator << (ostream & os, const B_char_array & b) + { + os << "(val_2=\"" << string(b.val_2.begin(), b.val_2.end()) + << "\",val_3=\"" << string(b.val_3.begin(), b.val_3.end()) << ")"; + return os; + } +}; + +struct A_char_array +{ + array< char, 6 > val_3; + array< char, 6 > val_4; + array< char, 6 > val_5; + B_char_array val_6; + friend ostream & operator << (ostream & os, const A_char_array & a) + { + os + << "(val_3=\"" << string(a.val_3.begin(), a.val_3.end()) + << "\",val_4=\"" << string(a.val_4.begin(), a.val_4.end()) + << "\",val_5=\"" << string(a.val_5.begin(), a.val_5.end()) + << "\",val_6=" << a.val_6 << ")"; + return os; + } +}; + +int main(int argc, char* argv[]) +{ + if (argc != 2 and argc != 3) + { + cerr << "use: " << argv[0] << " [-f] " << endl; + return EXIT_FAILURE; + } + bool force = string(argv[1]) == "-f"; + string file_name(argv[force? 2 : 1]); + { + hdf5_tools::File f; + // + // All fast5 operations are performed inside a try-catch block. This should + // resist various hdf5 errors without leaking memory. + // + try + { + // + // create file; without -f, fail if it exist + // + f.create(file_name, force); + assert(f.is_open()); + assert(f.is_rw()); + // + // write a /file_version to allow f5dump to work + // + string file_version("42"); + f.write("/file_version", false, file_version); + int val_1 = 42; + float val_2 = 3.14; + char val_3[6] = "ACGTA"; + array< char, 6 > val_4 = { "AACCG" }; + string val_5("CCCGG"); + static_assert(hdf5_tools::detail::mem_type_class< void >::value == 0, ""); + static_assert(hdf5_tools::detail::mem_type_class< decltype(val_1) >::value == 1, ""); + static_assert(hdf5_tools::detail::mem_type_class< decltype(val_2) >::value == 1, ""); + static_assert(hdf5_tools::detail::mem_type_class< decltype(val_3) >::value == 2, ""); + static_assert(hdf5_tools::detail::mem_type_class< decltype(val_4) >::value == 2, ""); + static_assert(hdf5_tools::detail::mem_type_class< decltype(val_5) >::value == 3, ""); + static_assert(hdf5_tools::detail::mem_type_class< std::true_type >::value == 4, ""); + // + // write integer + // + f.write("/val_1", false, val_1); + f.write("/val_1_as_64", false, val_1, H5T_STD_I64LE); + f.write("/val_1_v", false, vector< int >(3, val_1)); + // + // write float + // + f.write("/val_2", false, val_2); + f.write("/val_2_as_64", false, val_2, H5T_IEEE_F64LE); + f.write("/val_2_v", false, vector< float >(3, val_2)); + // + // write fixlen string: char[] + // + f.write("/val_3", false, val_3); + f.write("/val_3_as_len_3", false, val_3, 3); + f.write("/val_3_as_varlen", false, val_3, -1); + // + // write fixlen string: std::array< char > + // + f.write("/val_4", false, val_4); + f.write("/val_4_as_len_3", false, val_4, 3); + f.write("/val_4_as_varlen", false, val_4, -1); + f.write("/val_4_v", false, vector < decltype(val_4) >(3, val_4)); + f.write("/val_4_v_as_len_3", false, vector < decltype(val_4) >(3, val_4), 3); + f.write("/val_4_v_as_varlen", false, vector < decltype(val_4) >(3, val_4), -1); + // + // write varlen string + // + f.write("/val_5", false, val_5); + f.write("/val_5_as_len_3", false, val_5, 3); + f.write("/val_5_as_fixlen", false, val_5, 0); + f.write("/val_5_v", false, vector< decltype(val_5) >(3, val_5)); + f.write("/val_5_v_as_len_3", false, vector< decltype(val_5) >(3, val_5), 3); + f.write("/val_5_v_as_fixlen", false, vector< decltype(val_5) >(1, val_5), 0); // only size 1 + // + // write compound + // + A val_6{ 1, 2, 3.14, "ACGTA", "CGTAC", "CCGGT", { 42, "GTTAC", "TTATT" } }; + hdf5_tools::Compound_Map cm_A; + hdf5_tools::Compound_Map cm_B; + cm_B.add_member("val_1", &B::val_1); + cm_B.add_member("val_2", &B::val_2); + cm_B.add_member("val_3", &B::val_3); + for (const auto& e : cm_B.members()) + { + clog << "cm_B: (" << (void*)&e << ")" << e << endl; + } + cm_A.add_member("val_1", &A::val_1); + cm_A.add_member("val_2", &A::val_2); + cm_A.add_member("val_3", &A::val_3); + cm_A.add_member("val_4", &A::val_4); + cm_A.add_member("val_5", &A::val_5); + cm_A.add_member("val_6", &A::val_6, &cm_B); + for (const auto& e : cm_A.members()) + { + clog << "cm_A: (" << (void*)&e << ")" << e << endl; + } + auto l = cm_A.get_member_ptr_list(); + for (const auto& p : l) + { + clog << "member:"; + for (const auto& e_ptr : p.first) + { + clog << " " << *e_ptr; + } + clog << "; total_offset=" << p.second << endl; + } + //f.write("/val_6a", false, val_6, cm_A); + f.write("/val_6d", true, val_6, cm_A); + vector< A > src(3, val_6); + f.write("/val_6d_v", true, src, cm_A); + clog << "wrote val_6d_v:" << endl; + for (const auto& a : src) + { + clog << a << endl; + } + + // + // test reading compound + // + // using original map + { + std::vector< A > dest; + f.read("/val_6d_v", dest, cm_A); + clog << "read val_6d_v:" << endl; + for (const auto& a : dest) + { + clog << a << endl; + } + } + // using all strings + { + hdf5_tools::Compound_Map cm_A_string; + hdf5_tools::Compound_Map cm_B_string; + cm_B_string.add_member("val_1", &B_string::val_1); + cm_B_string.add_member("val_2", &B_string::val_2); + cm_B_string.add_member("val_3", &B_string::val_3); + for (const auto& e : cm_B_string.members()) + { + clog << "cm_B_string: (" << (void*)&e << ")" << e << endl; + } + cm_A_string.add_member("val_1", &A_string::val_1); + cm_A_string.add_member("val_2", &A_string::val_2); + cm_A_string.add_member("val_3", &A_string::val_3); + cm_A_string.add_member("val_4", &A_string::val_4); + cm_A_string.add_member("val_5", &A_string::val_5); + cm_A_string.add_member("val_6", &A_string::val_6, &cm_B_string); + for (const auto& e : cm_A_string.members()) + { + clog << "cm_A_string: (" << (void*)&e << ")" << e << endl; + } + std::vector< A_string > dest; + f.read("/val_6d_v", dest, cm_A_string); + clog << "read val_6d_v using all-strings:" << endl; + for (const auto& a : dest) + { + clog << a << endl; + } + } + // using char arrays + { + hdf5_tools::Compound_Map cm_A_char_array; + hdf5_tools::Compound_Map cm_B_char_array; + cm_B_char_array.add_member("val_2", &B_char_array::val_2); + cm_B_char_array.add_member("val_3", &B_char_array::val_3); + for (const auto& e : cm_B_char_array.members()) + { + clog << "cm_B_char_array: (" << (void*)&e << ")" << e << endl; + } + cm_A_char_array.add_member("val_3", &A_char_array::val_3); + cm_A_char_array.add_member("val_4", &A_char_array::val_4); + cm_A_char_array.add_member("val_5", &A_char_array::val_5); + cm_A_char_array.add_member("val_6", &A_char_array::val_6, &cm_B_char_array); + for (const auto& e : cm_A_char_array.members()) + { + clog << "cm_A_char_array: (" << (void*)&e << ")" << e << endl; + } + std::vector< A_char_array > dest; + f.read("/val_6d_v", dest, cm_A_char_array); + clog << "read val_6d_v using char arrays:" << endl; + for (const auto& a : dest) + { + clog << a << endl; + } + + } + } + catch (hdf5_tools::Exception& e) + { + cout << "hdf5 error: " << e.what() << endl; + } + // + // fast5 file is closed by its destructor at the end of this scope + // + } + assert(hdf5_tools::File::get_object_count() == 0); +} diff -Nru fast5-0~20150918/src/hdf5_tools.hpp fast5-0.5.6/src/hdf5_tools.hpp --- fast5-0~20150918/src/hdf5_tools.hpp 2015-09-18 17:12:49.000000000 +0000 +++ fast5-0.5.6/src/hdf5_tools.hpp 2016-07-28 20:14:13.000000000 +0000 @@ -1,14 +1,27 @@ +// +// The MIT License (MIT) +// +// Copyright (c) 2015 Matei David, Ontario Institute for Cancer Research +// + #ifndef __HDF5_TOOLS_HPP #define __HDF5_TOOLS_HPP #include +#include #include #include +#include #include #include #include #include #include +#include +#include +#include +#include +#include namespace hdf5 { @@ -24,8 +37,13 @@ : public std::exception { public: - Exception(const std::string& msg) : _msg(msg) {} + Exception(const std::string& msg) : _msg(active_path() + ": " + msg) {} const char* what() const noexcept { return _msg.c_str(); } + static std::string& active_path() + { + static thread_local std::string _active_path; + return _active_path; + } private: std::string _msg; }; // class Exception @@ -36,6 +54,13 @@ namespace detail { +/// Compute offset of a struct member from a member pointer (runtime version). +template < typename T, typename U > +std::size_t offset_of(U T::* mem_ptr) +{ + return reinterpret_cast< std::size_t >(&(((T*)0)->*mem_ptr)); +} + /// TempMetaFunc: Given destination type, deduce memory type to be used in hdf5 read operation. /// Only useful for numeric types. /// HDF5 idiosyncracy: @@ -58,77 +83,473 @@ template <> struct get_mem_type< double > { static hid_t id() { return H5T_NATIVE_DOUBLE; } }; template <> struct get_mem_type< long double > { static hid_t id() { return H5T_NATIVE_LDOUBLE; } }; -/// TempMetaFunc: Given destination type, can we read it -template < typename Out_Data_Type > -struct can_read -{ - static const bool value = - std::is_integral< Out_Data_Type >::value - or std::is_floating_point< Out_Data_Type >::value - or std::is_same< typename std::remove_extent< Out_Data_Type >::type, char >::value - or std::is_same< Out_Data_Type, std::string >::value - or std::is_class< Out_Data_Type >:: value; -}; - -/// TempMetaFunc: Given a destination type, does it need a compound map -template < typename Out_Data_Type > -struct read_as_atomic -{ - static const bool value = - std::is_integral< Out_Data_Type >::value - or std::is_floating_point< Out_Data_Type >::value - or std::is_same< typename std::remove_extent< Out_Data_Type >::type, char >::value - or std::is_same< Out_Data_Type, std::string >::value; +/** + * Class of memory type: + * 0 - unknown + * 1 - numeric (signed/unsigned integer or float) + * 2 - fixed length string (char array) + * 3 - variable length string (std::string) + * 4 - class + */ +template < typename T > +struct mem_type_class +{ + static const int value = + std::conditional< std::is_integral< T >::value or std::is_floating_point< T >::value, + std::integral_constant< int, 1 >, + typename std::conditional< std::is_class< T >::value, + std::integral_constant< int, 4 >, + std::integral_constant< int, 0 > >::type >::type::value; }; - -/// Compute offset of a struct member from a member pointer (runtime version). -template < typename T, typename U > -std::size_t offset_of(U T::* mem_ptr) +template < size_t Size > +struct mem_type_class< char[Size] > { - return reinterpret_cast< std::size_t >(&(((T*)0)->*mem_ptr)); -} + static const int value = 2; +}; +template < size_t Size > +struct mem_type_class< const char[Size] > +{ + static const int value = 2; +}; +template < size_t Size > +struct mem_type_class< std::array< char, Size > > +{ + static const int value = 2; +}; +template < size_t Size > +struct mem_type_class< std::array< const char, Size > > +{ + static const int value = 2; +}; +template <> +struct mem_type_class< std::string > +{ + static const int value = 3; +}; -/// Description of a member inside a compound -/// Only works with numeric, string, and struct types. -struct Compound_Member_Description +// Struct whose purpuse is to destroy the HDF object during destruction +struct HDF_Object_Holder { -public: - Compound_Member_Description(const std::string& _name, size_t _offset, hid_t _numeric_type_id) - : name(_name), offset(_offset), numeric_type_id(_numeric_type_id) + hid_t id; + std::function< herr_t(hid_t) > dtor; + HDF_Object_Holder() + : id(0) {} + HDF_Object_Holder(const HDF_Object_Holder&) = delete; + HDF_Object_Holder(HDF_Object_Holder&& other) + : id(0) { - type = numeric; + load(std::move(other)); } - Compound_Member_Description(const std::string& _name, size_t _offset, size_t _char_array_size) - : name(_name), offset(_offset), char_array_size(_char_array_size) + HDF_Object_Holder(hid_t _id, std::function< herr_t(hid_t) > _dtor) { - type = char_array; + load(_id, _dtor); } - Compound_Member_Description(const std::string& _name, size_t _offset) - : name(_name), offset(_offset) + ~HDF_Object_Holder() noexcept(false) + { + if (id > 0) + { + if (dtor) + { + dtor(id); + } + id = 0; + } + } + HDF_Object_Holder& operator = (const HDF_Object_Holder&) = delete; + HDF_Object_Holder& operator = (HDF_Object_Holder&& other) { - type = string; + if (&other != this) + { + std::swap(id, other.id); + std::swap(dtor, other.dtor); + } + return *this; + } + void load(hid_t _id, std::function< herr_t(hid_t) > _dtor) + { + id = _id; + dtor = _dtor; } - Compound_Member_Description(const std::string& _name, size_t _offset, const Compound_Map* _compound_map_ptr) - : name(_name), offset(_offset), compound_map_ptr(_compound_map_ptr) + void load(HDF_Object_Holder&& other) { - type = compound; + *this = std::move(other); } +}; // struct HDF_Object_Holder - bool is_numeric() const { return type == numeric; } +struct Util +{ + /** + * Make hdf5 string type. + * @param sz If negative, make varlen string; else make fixlen string of size sz. + */ + static HDF_Object_Holder make_str_type(long sz) + { + assert(sz != 0); + HDF_Object_Holder res( + wrap(H5Tcopy, H5T_C_S1), + wrapped_closer(H5Tclose)); + size_t tmp = sz < 0? H5T_VARIABLE : sz; + wrap(H5Tset_size, res.id, tmp); + return res; + } // make_str_type + + /** + * Get name and return value checker for hdf5 function. + */ + static const std::pair< const char *, std::function< bool(void *) > >& + get_fcn_info(void (*fcn_ptr)()) + { + static const std::map< void (*)(), std::pair< const char *, std::function< bool(void *) > > > fcn_info_m = + { + { (void(*)())&H5Aclose, + { "H5Aclose", + [] (void * vp) { return *reinterpret_cast< herr_t * >(vp) >= 0; } + } + }, + { (void(*)())&H5Acreate2, + { "H5Acreate2", + [] (void * vp) { return *reinterpret_cast< hid_t * >(vp) > 0; } + } + }, + { (void(*)())&H5Aexists_by_name, + { "H5Aexists_by_name", + [] (void * vp) { return *reinterpret_cast< htri_t * >(vp) >= 0; } + } + }, + { (void(*)())&H5Aget_name_by_idx, + { "H5Aget_name_by_idx", + [] (void * vp) { return *reinterpret_cast< ssize_t * >(vp) >= 0; } + } + }, + { (void(*)())&H5Aget_space, + { "H5Aget_space", + [] (void * vp) { return *reinterpret_cast< hid_t * >(vp) > 0; } + } + }, + { (void(*)())&H5Aget_type, + { "H5Aget_type", + [] (void * vp) { return *reinterpret_cast< hid_t * >(vp) > 0; } + } + }, + { (void(*)())&H5Aopen, + { "H5Aopen", + [] (void * vp) { return *reinterpret_cast< hid_t * >(vp) > 0; } + } + }, + { (void(*)())&H5Aopen_by_name, + { "H5Aopen_by_name", + [] (void * vp) { return *reinterpret_cast< hid_t * >(vp) > 0; } + } + }, + { (void(*)())&H5Aread, + { "H5Aread", + [] (void * vp) { return *reinterpret_cast< herr_t * >(vp) >= 0; } + } + }, + { (void(*)())&H5Awrite, + { "H5Awrite", + [] (void * vp) { return *reinterpret_cast< herr_t * >(vp) >= 0; } + } + }, + + { (void(*)())&H5Dclose, + { "H5Dclose", + [] (void * vp) { return *reinterpret_cast< herr_t * >(vp) >= 0; } + } + }, + { (void(*)())&H5Dcreate2, + { "H5Dcreate2", + [] (void * vp) { return *reinterpret_cast< hid_t * >(vp) > 0; } + } + }, + { (void(*)())&H5Dget_space, + { "H5Dget_space", + [] (void * vp) { return *reinterpret_cast< hid_t * >(vp) > 0; } + } + }, + { (void(*)())&H5Dget_type, + { "H5Dget_type", + [] (void * vp) { return *reinterpret_cast< hid_t * >(vp) > 0; } + } + }, + { (void(*)())&H5Dopen, + { "H5Dopen", + [] (void * vp) { return *reinterpret_cast< hid_t * >(vp) > 0; } + } + }, + { (void(*)())&H5Dread, + { "H5Dread", + [] (void * vp) { return *reinterpret_cast< herr_t * >(vp) >= 0; } + } + }, + { (void(*)())&H5Dvlen_reclaim, + { "H5Dvlen_reclaim", + [] (void * vp) { return *reinterpret_cast< herr_t * >(vp) >= 0; } + } + }, + { (void(*)())&H5Dwrite, + { "H5Dwrite", + [] (void * vp) { return *reinterpret_cast< herr_t * >(vp) >= 0; } + } + }, + + { (void(*)())&H5Gclose, + { "H5Gclose", + [] (void * vp) { return *reinterpret_cast< herr_t * >(vp) >= 0; } + } + }, + { (void(*)())&H5Gcreate2, + { "H5Gcreate2", + [] (void * vp) { return *reinterpret_cast< hid_t * >(vp) > 0; } + } + }, + { (void(*)())&H5Gget_info, + { "H5Gget_info", + [] (void * vp) { return *reinterpret_cast< herr_t * >(vp) >= 0; } + } + }, + { (void(*)())&H5Gopen2, + { "H5Gopen2", + [] (void * vp) { return *reinterpret_cast< hid_t * >(vp) > 0; } + } + }, + + { (void(*)())&H5Lexists, + { "H5Lexists", + [] (void * vp) { return *reinterpret_cast< htri_t * >(vp) >= 0; } + } + }, + { (void(*)())&H5Lget_name_by_idx, + { "H5Lget_name_by_idx", + [] (void * vp) { return *reinterpret_cast< ssize_t * >(vp) >= 0; } + } + }, + + { (void(*)())&H5Oclose, + { "H5Oclose", + [] (void * vp) { return *reinterpret_cast< herr_t * >(vp) >= 0; } + } + }, + { (void(*)())&H5Oexists_by_name, + { "H5Oexists_by_name", + [] (void * vp) { return *reinterpret_cast< htri_t * >(vp) >= 0; } + } + }, + { (void(*)())&H5Oget_info, + { "H5Oget_info", + [] (void * vp) { return *reinterpret_cast< herr_t * >(vp) >= 0; } + } + }, + { (void(*)())&H5Oopen, + { "H5Oopen", + [] (void * vp) { return *reinterpret_cast< hid_t * >(vp) > 0; } + } + }, + + { (void(*)())&H5Pclose, + { "H5Pclose", + [] (void * vp) { return *reinterpret_cast< herr_t * >(vp) >= 0; } + } + }, + { (void(*)())&H5Pcreate, + { "H5Pcreate", + [] (void * vp) { return *reinterpret_cast< hid_t * >(vp) > 0; } + } + }, + { (void(*)())&H5Pset_create_intermediate_group, + { "H5Pset_create_intermediate_group", + [] (void * vp) { return *reinterpret_cast< herr_t * >(vp) >= 0; } + } + }, + + { (void(*)())&H5Sclose, + { "H5Sclose", + [] (void * vp) { return *reinterpret_cast< herr_t * >(vp) >= 0; } + } + }, + { (void(*)())&H5Screate, + { "H5Screate", + [] (void * vp) { return *reinterpret_cast< hid_t * >(vp) > 0; } + } + }, + { (void(*)())&H5Screate_simple, + { "H5Screate_simple", + [] (void * vp) { return *reinterpret_cast< hid_t * >(vp) > 0; } + } + }, + { (void(*)())&H5Sget_simple_extent_dims, + { "H5Sget_simple_extent_dims", + [] (void * vp) { return *reinterpret_cast< int * >(vp) >= 0; } + } + }, + { (void(*)())&H5Sget_simple_extent_ndims, + { "H5Sget_simple_extent_ndims", + [] (void * vp) { return *reinterpret_cast< int * >(vp) >= 0; } + } + }, + { (void(*)())&H5Sget_simple_extent_type, + { "H5Sget_simple_extent_type", + [] (void * vp) { return *reinterpret_cast< H5S_class_t * >(vp) != H5S_NO_CLASS; } + } + }, + + { (void(*)())&H5Tclose, + { "H5Tclose", + [] (void * vp) { return *reinterpret_cast< herr_t * >(vp) >= 0; } + } + }, + { (void(*)())&H5Tcopy, + { "H5Tcopy", + [] (void * vp) { return *reinterpret_cast< hid_t * >(vp) > 0; } + } + }, + { (void(*)())&H5Tcreate, + { "H5Tcreate", + [] (void * vp) { return *reinterpret_cast< hid_t * >(vp) > 0; } + } + }, + { (void(*)())&H5Tget_class, + { "H5Tget_class", + [] (void * vp) { return *reinterpret_cast< H5T_class_t * >(vp) != H5T_NO_CLASS; } + } + }, + { (void(*)())&H5Tget_member_index, + { "H5Tget_member_index", + [] (void * vp) { return *reinterpret_cast< int * >(vp) >= 0; } + } + }, + { (void(*)())&H5Tget_member_name, + { "H5Tget_member_name", + [] (void * vp) { return *reinterpret_cast< char* * >(vp) != nullptr; } + } + }, + { (void(*)())&H5Tget_member_type, + { "H5Tget_member_type", + [] (void * vp) { return *reinterpret_cast< hid_t * >(vp) > 0; } + } + }, + { (void(*)())&H5Tget_nmembers, + { "H5Tget_nmembers", + [] (void * vp) { return *reinterpret_cast< int * >(vp) >= 0; } + } + }, + { (void(*)())&H5Tget_sign, + { "H5Tget_sign", + [] (void * vp) { return *reinterpret_cast< H5T_sign_t * >(vp) != H5T_SGN_ERROR; } + } + }, + { (void(*)())&H5Tget_size, + { "H5Tget_size", + [] (void * vp) { return *reinterpret_cast< size_t * >(vp) > 0; } + } + }, + { (void(*)())&H5Tinsert, + { "H5Tinsert", + [] (void * vp) { return *reinterpret_cast< herr_t * >(vp) >= 0; } + } + }, + { (void(*)())&H5Tis_variable_str, + { "H5Tis_variable_str", + [] (void * vp) { return *reinterpret_cast< htri_t * >(vp) >= 0; } + } + }, + { (void(*)())&H5Tset_size, + { "H5Tset_size", + [] (void * vp) { return *reinterpret_cast< herr_t * >(vp) >= 0; } + } + }, + }; + return fcn_info_m.at(fcn_ptr); + } + + /** + * General-purpose wrapper of hdf5 calls that checks return value for validity. + */ + template < typename Function, typename... Args > + static typename std::result_of< Function(Args...) >::type + wrap(Function&& f, Args&& ...args) + { + auto res = f(args...); + const auto& f_info = get_fcn_info((void(*)())&f); + if (not f_info.second((void*)&res)) throw Exception(std::string("error in ") + f_info.first); + return res; + } + + /** + * Wrap closer function. + */ + template < typename Function > + static std::function< herr_t(hid_t) > wrapped_closer(Function&& f) + { + return [&] (hid_t id) { return wrap(f, id); }; + } +}; // struct Util + +/// Description of a member inside a compound +/// Only works with numeric, string, and struct types. +struct Compound_Member_Description +{ +public: + Compound_Member_Description(const std::string& _name, size_t _offset, hid_t _numeric_type_id) + : type(numeric), + name(_name), + offset(_offset), + numeric_type_id(_numeric_type_id) {} + Compound_Member_Description(const std::string& _name, size_t _offset, size_t _char_array_size) + : type(char_array), + name(_name), + offset(_offset), + char_array_size(_char_array_size) {} + Compound_Member_Description(const std::string& _name, size_t _offset) + : type(string), + name(_name), + offset(_offset) {} + Compound_Member_Description(const std::string& _name, size_t _offset, + const Compound_Map* _compound_map_ptr, size_t _compound_size) + : type(compound), + name(_name), + offset(_offset), + compound_map_ptr(_compound_map_ptr), + compound_size(_compound_size) {} + + bool is_numeric() const { return type == numeric; } bool is_char_array() const { return type == char_array; } - bool is_string() const { return type == string; } - bool is_compound() const { return type == compound; } + bool is_string() const { return type == string; } + bool is_compound() const { return type == compound; } - std::string name; - size_t offset; - union + HDF_Object_Holder get_type() const { - hid_t numeric_type_id; - size_t char_array_size; - const Compound_Map* compound_map_ptr; - }; + assert(not is_compound()); + HDF_Object_Holder res; + if (is_numeric()) + { + res.load(numeric_type_id, nullptr); + } + else if (is_char_array()) + { + res.load(Util::make_str_type(char_array_size)); + } + else if (is_string()) + { + res.load(Util::make_str_type(-1)); + } + return res; + } + + friend std::ostream& operator << (std::ostream& os, const Compound_Member_Description& e) + { + os << "(&=" << (void*)&e + << ",name=\"" << e.name + << "\",type=" << (e.is_numeric() + ? "numeric" + : (e.is_char_array() + ? "char_array" + : (e.is_string() + ? "string" : "compound"))) + << ",offset=" << e.offset << ")"; + return os; + } -private: enum member_type { numeric, @@ -137,6 +558,15 @@ compound }; member_type type; + std::string name; + size_t offset; + union + { + hid_t numeric_type_id; + size_t char_array_size; + const Compound_Map* compound_map_ptr; + }; + size_t compound_size; }; // Compound_Member_Description } // namespace detail @@ -155,20 +585,19 @@ template < typename T, typename U > void add_member(const std::string& name, U T::* mem_ptr) { - static_assert(std::is_integral< U >::value - or std::is_floating_point< U >::value - or std::is_same< typename std::remove_extent< U >::type, char >::value - or std::is_same< U, std::string >::value, - "add_member(name, mem_ptr) overload expects numerical or string types only "); - if (std::is_integral< U >::value or std::is_floating_point< U >::value) + static_assert(detail::mem_type_class< U >::value == 1 + or detail::mem_type_class< U >::value == 2 + or detail::mem_type_class< U >::value == 3, + "add_member(name, mem_ptr) overload expects numerical or string types only"); + if (detail::mem_type_class< U >::value == 1) { _members.emplace_back(name, detail::offset_of(mem_ptr), detail::get_mem_type< U >::id()); } - else if (std::is_same< typename std::remove_extent< U >::type, char >::value) + else if (detail::mem_type_class< U >::value == 2) { _members.emplace_back(name, detail::offset_of(mem_ptr), sizeof(U)); } - else if (std::is_same< U, std::string >::value) + else if (detail::mem_type_class< U >::value == 3) { _members.emplace_back(name, detail::offset_of(mem_ptr)); } @@ -177,14 +606,159 @@ template < typename T, typename U > void add_member(const std::string& name, U T::* mem_ptr, const Compound_Map* compound_map_ptr) { - assert(false); // not currently implemented - static_assert(std::is_class< U >::value, - "add_member(name, mem_ptr, compound_map_ptr) overload expects class types only "); - _members.emplace_back(name, detail::offset_of(mem_ptr), compound_map_ptr); + static_assert(detail::mem_type_class< U >::value == 4, + "add_member(name, mem_ptr, compound_map_ptr) overload expects class types only"); + _members.emplace_back(name, detail::offset_of(mem_ptr), compound_map_ptr, sizeof(U)); } const std::vector< detail::Compound_Member_Description >& members() const { return _members; } + /** + * Get list of non-compound member types. + * @return A list of pairs; first: list of member ptrs followed; second: absolute offset. + */ + typedef std::deque< std::pair< std::deque< const detail::Compound_Member_Description* >, + size_t > > member_ptr_list_type; + member_ptr_list_type get_member_ptr_list() const + { + member_ptr_list_type res; + for (const auto& e : members()) + { + member_ptr_list_type::value_type p; + if (not e.is_compound()) + { + member_ptr_list_type::value_type p; + p.first = { &e }; + p.second = e.offset; + res.emplace_back(std::move(p)); + } + else + { + auto tmp = e.compound_map_ptr->get_member_ptr_list(); + for (auto& tmp_p : tmp) + { + member_ptr_list_type::value_type p; + p.first = std::move(tmp_p.first); + p.first.push_front(&e); + p.second = tmp_p.second + e.offset; + res.emplace_back(std::move(p)); + } + } + } + return res; + } + + /** + * Produce hdf5 compound datatype for this map. + * @param compound_size Extrenally-tracked compound size + * @param selector If empty, use all elements; if not empty, use only elements that pass selection. + * @fill If true, type offsets follow compound map offsets, allowing for gaps; + * if false: type offsets are minimal values required to fit members. + */ + detail::HDF_Object_Holder build_type( + size_t compound_size, + std::function< bool(const detail::Compound_Member_Description&) > selector = nullptr, + bool fill = true) const + { + //std::clog << "===== build_type (" << (void*)this << ") start" << std::endl; + std::deque< std::tuple< std::string, detail::HDF_Object_Holder, size_t > > stype_id_holder_l; + size_t compressed_size = 0; + for (const auto& e : members()) + { + detail::HDF_Object_Holder stype_id_holder; + if (selector and not e.is_compound() and not selector(e)) continue; + if (not e.is_compound()) + { + stype_id_holder = e.get_type(); + } + else + { + stype_id_holder = e.compound_map_ptr->build_type(e.compound_size, selector, fill); + } + if (stype_id_holder.id > 0) + { + stype_id_holder_l.emplace_back( + std::string(e.name), + std::move(stype_id_holder), + fill? e.offset : compressed_size); + compressed_size += H5Tget_size(std::get<1>(stype_id_holder_l.back()).id); + } + } + if (stype_id_holder_l.empty()) + { + //std::clog << "===== build_type (" << (void*)this << ") empty" << std::endl; + return detail::HDF_Object_Holder(); + } + //std::clog << "===== build_type (" << (void*)this << ") compound size: " << (fill? compound_size : compressed_size) << std::endl; + detail::HDF_Object_Holder res( + detail::Util::wrap(H5Tcreate, H5T_COMPOUND, fill? compound_size : compressed_size), + detail::Util::wrapped_closer(H5Tclose)); + for (const auto& t : stype_id_holder_l) + { + //std::clog << "===== build_type (" << (void*)this << ") adding name=\"" << std::get<0>(t) << "\", offset=" << std::get<2>(t) << std::endl; + detail::Util::wrap(H5Tinsert, res.id, std::get<0>(t).c_str(), std::get<2>(t), std::get<1>(t).id); + } + //std::clog << "===== build_type (" << (void*)this << ") end" << std::endl; + return res; + } + + static detail::HDF_Object_Holder build_flat_type( + const member_ptr_list_type::value_type::first_type& l, hid_t id = 0) + { + detail::HDF_Object_Holder res; + size_t sz = 0; + for (auto it = l.rbegin(); it != l.rend(); ++it) + { + const detail::Compound_Member_Description& e = **it; + assert((it == l.rbegin()) == (not e.is_compound())); + assert((it == l.rbegin()) == (res.id == 0)); + assert((it == l.rbegin()) == (sz == 0)); + if (it == l.rbegin()) + { + if (id == 0) + { + res.load(e.get_type()); + } + else + { + res.load( + detail::Util::wrap(H5Tcopy, id), + detail::Util::wrapped_closer(H5Tclose)); + } + sz = detail::Util::wrap(H5Tget_size, res.id); + } + detail::HDF_Object_Holder tmp( + detail::Util::wrap(H5Tcreate, H5T_COMPOUND, sz), + detail::Util::wrapped_closer(H5Tclose)); + detail::Util::wrap(H5Tinsert, tmp.id, e.name.c_str(), 0, res.id); + std::swap(res, tmp); + } + return res; + } + + /** + * Get compound member from an existing compound type. + */ + static detail::HDF_Object_Holder get_compound_member( + hid_t id, const member_ptr_list_type::value_type::first_type& l) + { + detail::HDF_Object_Holder res( + detail::Util::wrap(H5Tcopy, id), + detail::Util::wrapped_closer(H5Tclose)); + for (auto it = l.begin(); it != l.end(); ++it) + { + const detail::Compound_Member_Description& e = **it; + assert(detail::Util::wrap(H5Tget_class, res.id) == H5T_COMPOUND); + unsigned idx = detail::Util::wrap(H5Tget_member_index, res.id, e.name.c_str()); + detail::HDF_Object_Holder tmp( + detail::Util::wrap(H5Tget_member_type, res.id, idx), + detail::Util::wrapped_closer(H5Tclose)); + std::swap(res, tmp); + } + assert(detail::Util::wrap(H5Tget_class, res.id) != H5T_COMPOUND); + return res; + } + private: std::vector< detail::Compound_Member_Description > _members; }; // Compound_Map @@ -192,377 +766,673 @@ namespace detail { -// TempSpec: reading numerics -template < typename Out_Data_Type, typename Out_Data_Storage > -struct Extent_Atomic_Reader -{ - void operator () (const std::string& loc_full_name, Out_Data_Storage& dest, - const Compound_Map*, hid_t obj_id, hid_t, - const std::string&, std::function< hid_t(hid_t) >, - const std::string& read_fcn_name, std::function< herr_t(hid_t, hid_t, void*) > read_fcn) - { - hid_t mem_type_id = get_mem_type< Out_Data_Type >::id(); - assert(mem_type_id != -1); - int status = read_fcn(obj_id, mem_type_id, static_cast< void* >(dest.data())); - if (status < 0) throw Exception(loc_full_name + ": error in " + read_fcn_name); - } -}; // struct Extent_Atomic_Reader - -// TempSpec: for reading strings -template < typename Out_Data_Storage > -struct Extent_Atomic_Reader< std::string, Out_Data_Storage > -{ - void operator () (const std::string& loc_full_name, Out_Data_Storage& dest, - const Compound_Map*, hid_t obj_id, hid_t obj_space_id, - const std::string& get_type_fcn_name, std::function< hid_t(hid_t) > get_type_fcn, - const std::string& read_fcn_name, std::function< herr_t(hid_t, hid_t, void*) > read_fcn) +// open object to be read, return dspace_id, file_dtype_id, reader fcn, reader fcn name, and is_ds +struct Reader_Base +{ + Reader_Base(hid_t grp_id, const std::string& name) { - int status; - int file_type_id = get_type_fcn(obj_id); - if (file_type_id < 0) throw Exception(loc_full_name + ": error in " + get_type_fcn_name); - int is_vlen_str = H5Tis_variable_str(file_type_id); - if (is_vlen_str < 0) throw Exception(loc_full_name + ": error in H5Tis_variable_str"); - hid_t mem_type_id = H5Tcopy(H5T_C_S1); - if (mem_type_id < 0) throw Exception(loc_full_name + ": error in H5Tcopy"); - if (is_vlen_str) // stored as variable-length string + int status = Util::wrap(H5Aexists_by_name, grp_id, ".", name.c_str(), H5P_DEFAULT); + is_ds = status == 0; + if (is_ds) { - // compute mem_type - status = H5Tset_size(mem_type_id, H5T_VARIABLE); - if (status < 0) throw Exception(loc_full_name + ": error in H5Tset_size(variable)"); - // prepare buffer to receive data - std::vector< char* > char_p_buff(dest.size(), nullptr); - // perform the read - status = read_fcn(obj_id, mem_type_id, static_cast< void* >(char_p_buff.data())); - if (status < 0) throw Exception(loc_full_name + ": error in " + read_fcn_name); - // transfer strings to destination - for (size_t i = 0; i < dest.size(); ++i) - { - if (not char_p_buff[i]) throw Exception(loc_full_name + ": " + read_fcn_name + " did not fill buffer"); - dest[i] = char_p_buff[i]; - } - // reclaim memory allocated by libhdf5 - status = H5Dvlen_reclaim(mem_type_id, obj_space_id, H5P_DEFAULT, char_p_buff.data()); - if (status < 0) throw Exception(loc_full_name + ": error in H5Dvlen_reclaim"); + obj_id_holder.load( + Util::wrap(H5Dopen, grp_id, name.c_str(), H5P_DEFAULT), + Util::wrapped_closer(H5Dclose)); + dspace_id_holder.load( + Util::wrap(H5Dget_space, obj_id_holder.id), + Util::wrapped_closer(H5Sclose)); + file_dtype_id_holder.load( + Util::wrap(H5Dget_type, obj_id_holder.id), + Util::wrapped_closer(H5Tclose)); + reader = [&] (hid_t mem_dtype_id, void* dest) { + return Util::wrap(H5Dread, obj_id_holder.id, mem_dtype_id, H5S_ALL, H5S_ALL, H5P_DEFAULT, dest); + }; + } + else + { + obj_id_holder.load( + Util::wrap(H5Aopen, grp_id, name.c_str(), H5P_DEFAULT), + Util::wrapped_closer(H5Aclose)); + dspace_id_holder.load( + Util::wrap(H5Aget_space, obj_id_holder.id), + Util::wrapped_closer(H5Sclose)); + file_dtype_id_holder.load( + Util::wrap(H5Aget_type, obj_id_holder.id), + Util::wrapped_closer(H5Tclose)); + reader = [&] (hid_t mem_dtype_id, void* dest) { + return Util::wrap(H5Aread, obj_id_holder.id, mem_dtype_id, dest); + }; + } + // dataspace class and size + dspace_class = Util::wrap(H5Sget_simple_extent_type, dspace_id_holder.id); + if (dspace_class == H5S_SCALAR) + { + dspace_size = 1; + } + else if (dspace_class == H5S_SIMPLE) + { + auto ndims = Util::wrap(H5Sget_simple_extent_ndims, dspace_id_holder.id); + if (ndims != 1) throw Exception("reading multi-dimensional extents is not supported"); + hsize_t tmp; + Util::wrap(H5Sget_simple_extent_dims, dspace_id_holder.id, &tmp, nullptr); + dspace_size = tmp; + } + else + { + throw Exception("reading dataspaces other than SCALAR and SIMPLE is not supported"); } - else // stored as fixed-length string + // datatype class + file_dtype_class = Util::wrap(H5Tget_class, file_dtype_id_holder.id); + if (file_dtype_class == H5T_STRING) + { + file_dtype_is_vlen_str = Util::wrap(H5Tis_variable_str, file_dtype_id_holder.id); + } + else + { + file_dtype_is_vlen_str = false; + } + file_dtype_size = Util::wrap(H5Tget_size, file_dtype_id_holder.id); + } + HDF_Object_Holder obj_id_holder; + HDF_Object_Holder dspace_id_holder; + HDF_Object_Holder file_dtype_id_holder; + std::function< void(hid_t, void*) > reader; + H5S_class_t dspace_class; + size_t dspace_size; + H5T_class_t file_dtype_class; + htri_t file_dtype_is_vlen_str; + size_t file_dtype_size; + bool is_ds; +}; // struct Reader_Base + +struct String_reader +{ + std::vector< std::string > operator () ( + Reader_Base& reader_base, + const Compound_Map::member_ptr_list_type::value_type::first_type* mptr_l_ptr = nullptr) const + { + std::vector< std::string > res(reader_base.dspace_size); + assert((mptr_l_ptr != nullptr) == (reader_base.file_dtype_class == H5T_COMPOUND)); + HDF_Object_Holder file_stype_id_holder; + hid_t file_stype_id = 0; + if (reader_base.file_dtype_class == H5T_COMPOUND) + { + file_stype_id_holder = Compound_Map::get_compound_member( + reader_base.file_dtype_id_holder.id, + *mptr_l_ptr); + file_stype_id = file_stype_id_holder.id; + } + else + { + file_stype_id = reader_base.file_dtype_id_holder.id; + } + auto mem_type_wrapper = [&] (HDF_Object_Holder&& id_holder) { + HDF_Object_Holder tmp(std::move(id_holder)); + return (mptr_l_ptr != nullptr + ? Compound_Map::build_flat_type(*mptr_l_ptr, tmp.id) + : std::move(tmp)); + }; + assert(Util::wrap(H5Tget_class, file_stype_id) != H5T_COMPOUND); + auto file_stype_class = Util::wrap(H5Tget_class, file_stype_id); + HDF_Object_Holder mem_dtype_id_holder; + if (file_stype_class == H5T_STRING) // stored as a string + { + if (Util::wrap(H5Tis_variable_str, file_stype_id)) // stored as a varlen string + { + // compute mem_type + mem_dtype_id_holder = mem_type_wrapper(Util::make_str_type(-1)); + // prepare buffer to receive data + std::vector< char * > charptr_buff(res.size(), nullptr); + // perform the read + reader_base.reader(mem_dtype_id_holder.id, charptr_buff.data()); + // transfer strings to destination + for (size_t i = 0; i < res.size(); ++i) + { + if (not charptr_buff[i]) throw Exception("read did not fill buffer"); + res[i] = charptr_buff[i]; + } + // reclaim memory allocated by libhdf5 + Util::wrap(H5Dvlen_reclaim, mem_dtype_id_holder.id, reader_base.dspace_id_holder.id, + H5P_DEFAULT, charptr_buff.data()); + } + else // stored as a fixlen string + { + // compute mem_type + size_t file_stype_size = Util::wrap(H5Tget_size, file_stype_id); + mem_dtype_id_holder = mem_type_wrapper(Util::make_str_type(file_stype_size + 1)); + // prepare buffer to receieve data + std::vector< char > char_buff(res.size() * (file_stype_size + 1), '\0'); + // perform the read + reader_base.reader(mem_dtype_id_holder.id, char_buff.data()); + // transfer strings to destination + for (size_t i = 0; i < res.size(); ++i) + { + res[i] = std::string(&char_buff[i * (file_stype_size + 1)], file_stype_size); + // trim trailing '\0'-s + while (not res[i].empty() and res[i].back() == '\0') + { + res[i].resize(res[i].size() - 1); + } + } + } + } + else if (file_stype_class == H5T_INTEGER) // stored as an integer + { + if (Util::wrap(H5Tget_sign, file_stype_id) == H5T_SGN_NONE) // stored as an unsigned integer + { + // compute mem_type + mem_dtype_id_holder = mem_type_wrapper( + HDF_Object_Holder(get_mem_type< unsigned long long >::id(), nullptr)); + // prepare buffer to read data + std::vector< unsigned long long > ull_buff(res.size()); + // perform the read + reader_base.reader(mem_dtype_id_holder.id, ull_buff.data()); + // transfer to destination + for (size_t i = 0; i < res.size(); ++i) + { + std::ostringstream oss; + oss << ull_buff[i]; + res[i] = oss.str(); + } + } + else // stored as a signed integer + { + // compute mem_type + mem_dtype_id_holder = mem_type_wrapper( + HDF_Object_Holder(get_mem_type< long long >::id(), nullptr)); + // prepare buffer to read data + std::vector< long long > ll_buff(res.size()); + // perform the read + reader_base.reader(mem_dtype_id_holder.id, ll_buff.data()); + // transfer to destination + for (size_t i = 0; i < res.size(); ++i) + { + std::ostringstream oss; + oss << ll_buff[i]; + res[i] = oss.str(); + } + } + } + else if (file_stype_class == H5T_FLOAT) // stored as a float { // compute mem_type - size_t sz = H5Tget_size(file_type_id); - if (sz == 0) throw Exception(loc_full_name + ": H5Tget_size returned 0; is this an error?!"); - status = H5Tset_size(mem_type_id, sz + 1); - if (status < 0) throw Exception(loc_full_name + ": error in H5Tset_size(fixed)"); - // prepare buffer to receieve data - std::vector< char > char_buff(dest.size() * (sz + 1)); + mem_dtype_id_holder = mem_type_wrapper( + HDF_Object_Holder(get_mem_type< double >::id(), nullptr)); + // prepare buffer to read data + std::vector< double > d_buff(res.size()); // perform the read - status = read_fcn(obj_id, mem_type_id, static_cast< void* >(char_buff.data())); - if (status < 0) throw Exception(loc_full_name + ": error in " + read_fcn_name); - // transfer strings to destination - for (size_t i = 0; i < dest.size(); ++i) + reader_base.reader(mem_dtype_id_holder.id, d_buff.data()); + // transfer to destination + for (size_t i = 0; i < res.size(); ++i) { - dest[i] = std::string(&char_buff[i * (sz + 1)], sz); + std::ostringstream oss; + oss << d_buff[i]; + res[i] = oss.str(); } } - status = H5Tclose(mem_type_id); - if (status < 0) throw Exception(loc_full_name + ": error in H5Tclose(mem_type_id)"); - status = H5Tclose(file_type_id); - if (status < 0) throw Exception(loc_full_name + ": error in H5Tclose(file_type_id)"); + return res; } -}; // struct Extent_Atomic_Reader< std::string > +}; -template < typename Out_Data_Type, typename Out_Data_Storage > -struct Extent_Compound_Reader +// Reader_helper +// Branch on memory type classes +template < int, typename > +struct Reader_helper; +// numeric +template < typename Data_Type > +struct Reader_helper< 1, Data_Type > { - void operator () (const std::string& loc_full_name, Out_Data_Storage& dest, - const Compound_Map* compound_map_ptr, hid_t obj_id, hid_t, - const std::string& get_type_fcn_name, std::function< hid_t(hid_t) > get_type_fcn, - const std::string& read_fcn_name, std::function< herr_t(hid_t, hid_t, void*) > read_fcn) + void operator () (Reader_Base& reader_base, Data_Type * out) const { - int status; - assert(compound_map_ptr); - hid_t file_type_id = get_type_fcn(obj_id); - if (file_type_id < 0) throw Exception(loc_full_name + ": error in " + get_type_fcn_name); - H5T_class_t file_type_class = H5Tget_class(file_type_id); - if (file_type_class == H5T_NO_CLASS) throw Exception(loc_full_name + ": error in H5Tget_class(file_type)"); - if (file_type_class != H5T_COMPOUND) throw Exception(loc_full_name + ": expected H5T_COMPOUND datatype"); - - // pass 1 - // read numeric and char_array members only - hid_t mem_type_id = H5Tcreate(H5T_COMPOUND, sizeof(Out_Data_Type)); - std::vector< hid_t > mem_stype_id_v; - for (const auto& e : compound_map_ptr->members()) - { - assert(not e.is_compound()); // not implemented yet - if (e.is_string()) continue; - int file_stype_idx = H5Tget_member_index(file_type_id, e.name.c_str()); - if (file_stype_idx < 0) throw Exception(loc_full_name + ": missing member \"" + e.name + "\""); - hid_t file_stype_id = H5Tget_member_type(file_type_id, file_stype_idx); - if (file_stype_id < 0) throw Exception(loc_full_name + ": error in H5Tget_member_type"); - H5T_class_t file_stype_class = H5Tget_class(file_stype_id); - if (file_stype_class == H5T_NO_CLASS) throw Exception(loc_full_name + ": error in H5Tget_class(file_stype)"); - if (e.is_numeric()) - { - if (file_stype_class != H5T_INTEGER and file_stype_class != H5T_FLOAT) - throw Exception(loc_full_name + ": member \"" + e.name + "\" is numeric, but file_stype is not numeric"); - status = H5Tinsert(mem_type_id, e.name.c_str(), e.offset, e.numeric_type_id); - if (status < 0) throw Exception(loc_full_name + ": error in H5Tinsert(\"" + e.name + "\")"); + assert(std::is_integral< Data_Type >::value or std::is_floating_point< Data_Type >::value); + hid_t mem_dtype_id = get_mem_type< Data_Type >::id(); + reader_base.reader(mem_dtype_id, out); + } +}; +// char array +template < typename Data_Type > +struct Reader_helper< 2, Data_Type > +{ + void operator () (Reader_Base& reader_base, Data_Type * out) const + { + if (reader_base.file_dtype_class == H5T_STRING + and not reader_base.file_dtype_is_vlen_str) + { + HDF_Object_Holder mem_dtype_id_holder(Util::make_str_type(sizeof(Data_Type))); + reader_base.reader(mem_dtype_id_holder.id, out); + } + else // conversion needed + { + auto tmp = String_reader()(reader_base); + for (size_t i = 0; i < tmp.size(); ++i) + { + std::memset(&out[i][0], '\0', sizeof(Data_Type)); + std::memcpy(&out[i][0], tmp[i].data(), std::min(tmp[i].size(), sizeof(Data_Type) - 1)); } + } + } +}; +// string +template < typename Data_Type > +struct Reader_helper< 3, Data_Type > +{ + void operator () (Reader_Base& reader_base, Data_Type * out) const + { + static_assert(std::is_same< Data_Type, std::string >::value, "Data_Type not std::string"); + auto tmp = String_reader()(reader_base); + for (size_t i = 0; i < tmp.size(); ++i) + { + std::swap(out[i], tmp[i]); + } + } +}; +// compound +template < typename Data_Type > +struct Reader_helper< 4, Data_Type > +{ + void operator () (Reader_Base& reader_base, Data_Type * out, const Compound_Map & cm) const + { + // get member list + auto mptr_l = cm.get_member_ptr_list(); + // go through members, check they exist, decide if they need conversion + std::set< const detail::Compound_Member_Description * > conversion_needed_s; + for (const auto& p : mptr_l) + { + HDF_Object_Holder file_stype_id_holder( + Compound_Map::get_compound_member(reader_base.file_dtype_id_holder.id, p.first)); + if (p.first.back()->is_string() + or (p.first.back()->is_char_array() + and Util::wrap(H5Tget_class, file_stype_id_holder.id) == H5T_STRING + and Util::wrap(H5Tis_variable_str, file_stype_id_holder.id))) + { + conversion_needed_s.insert(p.first.back()); + } + } + // read all members that do not need conversion all-at-once + auto implicit_conversion = [&] (const detail::Compound_Member_Description& e) { + return conversion_needed_s.count(&e) == 0; + }; + HDF_Object_Holder mem_dtype_id_holder(cm.build_type(sizeof(Data_Type), implicit_conversion, true)); + if (mem_dtype_id_holder.id > 0) + { + reader_base.reader(mem_dtype_id_holder.id, out); + } + // read members that need conversion one-by-one + for (const auto& p : mptr_l) + { + const detail::Compound_Member_Description& e = *p.first.back(); + if (implicit_conversion(e)) continue; + // read member into vector of strings + auto tmp = String_reader()(reader_base, &p.first); + assert(tmp.size() == reader_base.dspace_size); + // write it to destination + assert(e.is_char_array() or e.is_string()); if (e.is_char_array()) { - if (file_stype_class != H5T_STRING) - throw Exception(loc_full_name + ": member \"" + e.name + "\" is char_array, but file_stype is not H5T_STRING"); - status = H5Tis_variable_str(file_stype_id); - if (status < 0) throw Exception(loc_full_name + ": error in H5Tis_variable_str(\"" + e.name + "\")"); - if (status) throw Exception(loc_full_name + ": member \"" + e.name + "\" is a char_array, but file_stype is a variable len string"); - //size_t file_stype_size = H5Tget_size(file_stype_id); - //if (file_stype_size == 0) throw Exception(loc_full_name + ": H5Tget_size(\"" + e.name + "\") returned 0"); - hid_t mem_stype_id = H5Tcopy(H5T_C_S1); - if (mem_stype_id < 0) throw Exception(loc_full_name + ": member \"" + e.name + "\": error in H5Tcopy"); - status = H5Tset_size(mem_stype_id, e.char_array_size); - if (status < 0) throw Exception(loc_full_name + ": error in H5Tset_size(\"" + e.name + "\")"); - status = H5Tinsert(mem_type_id, e.name.c_str(), e.offset, mem_stype_id); - if (status < 0) throw Exception(loc_full_name + ": error in H5Tinsert(\"" + e.name + "\")"); - mem_stype_id_v.push_back(mem_stype_id); - } - status = H5Tclose(file_stype_id); - if (status < 0) throw Exception(loc_full_name + ": member \"" + e.name + "\": error in H5Tclose(file_stype)"); - } - // perform the actual read - status = read_fcn(obj_id, mem_type_id, static_cast< void* >(dest.data())); - if (status < 0) throw Exception(loc_full_name + ": pass 1: error in " + read_fcn_name); - // release the memory types - for (const auto& mem_stype_id : mem_stype_id_v) - { - status = H5Tclose(mem_stype_id); - if (status < 0) throw Exception(loc_full_name + ": error in H5Tclose(mem_stype)"); - } - mem_stype_id_v.clear(); - status = H5Tclose(mem_type_id); - if (status < 0) throw Exception(loc_full_name + ": error in H5Tclose(mem_type)"); - - // pass 2 - // read strings - for (const auto& e : compound_map_ptr->members()) - { - assert(not e.is_compound()); // not implemented yet - if (e.is_numeric() or e.is_char_array()) continue; - //TODO - assert(false); - } - - status = H5Tclose(file_type_id); - if (status < 0) throw Exception(loc_full_name + ": error in H5Tclose(file_type_id)"); - } -}; //struct Extent_Compound_Reader - -// TempSpec: read extent of atomic types -template < typename Out_Data_Type, typename Out_Data_Storage, bool = true > -struct Extent_Reader_as_atomic - : Extent_Atomic_Reader< Out_Data_Type, Out_Data_Storage > -{}; - -// TempSpec: read extent of compound types -template < typename Out_Data_Type, typename Out_Data_Storage > -struct Extent_Reader_as_atomic< Out_Data_Type, Out_Data_Storage, false > - : Extent_Compound_Reader< Out_Data_Type, Out_Data_Storage > -{}; - -// branch on atomic/compound destination -template < typename Out_Data_Type, typename Out_Data_Storage > -struct Extent_Reader - : public Extent_Reader_as_atomic< Out_Data_Type, Out_Data_Storage, read_as_atomic< Out_Data_Type >::value > -{}; - -template < typename, typename, bool > -struct Object_Reader_impl; - -// TempSpec: reading scalars -template < typename Out_Data_Type > -struct Object_Reader_impl< Out_Data_Type, Out_Data_Type, true > -{ - void operator () (const std::string& loc_full_name, Out_Data_Type& dest, - const Compound_Map* compound_map_ptr, hid_t obj_id, hid_t obj_space_id, - const std::string& get_type_fcn_name, std::function< hid_t(hid_t) > get_type_fcn, - const std::string& read_fcn_name, std::function< herr_t(hid_t, hid_t, void*) > read_fcn) - { - H5S_class_t obj_class_t = H5Sget_simple_extent_type(obj_space_id); - if (obj_class_t == H5S_NO_CLASS) throw Exception(loc_full_name + ": error in H5Sget_simple_extent_type"); - if (obj_class_t != H5S_SCALAR) - throw Exception(loc_full_name + ": reading as scalar, but dataspace not H5S_SCALAR"); - std::vector< Out_Data_Type > tmp(1); - Extent_Reader< Out_Data_Type, std::vector< Out_Data_Type > >()( - loc_full_name, tmp, compound_map_ptr, obj_id, obj_space_id, - get_type_fcn_name, get_type_fcn, - read_fcn_name, read_fcn); - dest = std::move(tmp[0]); - } -}; - -// TempSpec: reading vectors -template < typename Out_Data_Type, typename Out_Data_Storage > -struct Object_Reader_impl< Out_Data_Type, Out_Data_Storage, false > -{ - void operator () (const std::string& loc_full_name, Out_Data_Storage& dest, - const Compound_Map* compound_map_ptr, hid_t obj_id, hid_t obj_space_id, - const std::string& get_type_fcn_name, std::function< hid_t(hid_t) > get_type_fcn, - const std::string& read_fcn_name, std::function< herr_t(hid_t, hid_t, void*) > read_fcn) - { - H5S_class_t obj_class_t = H5Sget_simple_extent_type(obj_space_id); - if (obj_class_t == H5S_NO_CLASS) throw Exception(loc_full_name + ": error in H5Sget_simple_extent_type"); - if (obj_class_t != H5S_SIMPLE) - throw Exception(loc_full_name + ": reading as vector, but dataspace not H5S_SIMPLE"); - int status = H5Sget_simple_extent_dims(obj_space_id, nullptr, nullptr); - if (status < 0) throw Exception(loc_full_name + ": error in H5Sget_simple_extent_dims"); - if (status != 1) throw Exception(loc_full_name + ": expected extent of dimension 1"); - hsize_t sz; - H5Sget_simple_extent_dims(obj_space_id, &sz, nullptr); - dest.clear(); - dest.resize(sz); - Extent_Reader< Out_Data_Type, Out_Data_Storage >()( - loc_full_name, dest, compound_map_ptr, obj_id, obj_space_id, - get_type_fcn_name, get_type_fcn, - read_fcn_name, read_fcn); - } -}; - -// TempMetaFunc: split scalar & vector reading branches -template < typename Out_Data_Type, typename Out_Data_Storage > -struct Object_Reader - : public Object_Reader_impl< Out_Data_Type, Out_Data_Storage, std::is_same< Out_Data_Type, Out_Data_Storage >::value > {}; - -// open object and object space, then delegate -template < typename Out_Data_Type, typename Out_Data_Storage > -void read_obj_helper(const std::string& loc_full_name, Out_Data_Storage& dest, const Compound_Map* compound_map_ptr, - const std::string& open_fcn_name, std::function< hid_t(void) > open_fcn, - const std::string& close_fcn_name, std::function< herr_t(hid_t) > close_fcn, - const std::string& get_space_fcn_name, std::function< hid_t(hid_t) > get_space_fcn, - const std::string& get_type_fcn_name, std::function< hid_t(hid_t) > get_type_fcn, - const std::string& read_fcn_name, std::function< herr_t(hid_t, hid_t, void*) > read_fcn) -{ - int status; - // open object - hid_t obj_id = open_fcn(); - if (obj_id < 0) throw Exception(loc_full_name + ": error in " + open_fcn_name); - // open object space, check reading ode matches storage mode (scalar/vector) - hid_t obj_space_id = get_space_fcn(obj_id); - if (obj_space_id < 0) throw Exception(loc_full_name + ": error in " + get_space_fcn_name); - // read object - Object_Reader< Out_Data_Type, Out_Data_Storage >()( - loc_full_name, dest, compound_map_ptr, obj_id, obj_space_id, - get_type_fcn_name, get_type_fcn, - read_fcn_name, read_fcn); - // close object space & object - status = H5Sclose(obj_space_id); - if (status < 0) throw Exception(loc_full_name + ": error in H5Sclose"); - status = close_fcn(obj_id); - if (status < 0) throw Exception(loc_full_name + ": error in " + close_fcn_name); -} + for (size_t i = 0; i < tmp.size(); ++i) + { + std::memset(reinterpret_cast< char * >(&out[i]) + p.second, '\0', e.char_array_size); + std::memcpy(reinterpret_cast< char * >(&out[i]) + p.second, + tmp[i].data(), + std::min(tmp[i].size(), e.char_array_size - 1)); + } + } + else if (e.is_string()) + { + for (size_t i = 0; i < tmp.size(); ++i) + { + std::swap( + *reinterpret_cast< std::string * >(reinterpret_cast< char * >(&out[i]) + p.second), + tmp[i]); + } + } + } + } +}; -// determine if address is attribute or dataset, then delegate -template < typename Out_Data_Type, typename Out_Data_Storage > -void read_addr(hid_t root_id, const std::string& loc_path, const std::string& loc_name, - Out_Data_Storage& dest, const Compound_Map* compound_map_ptr) -{ - assert(root_id > 0); - std::string loc_full_name = loc_path + loc_name; - // determine if object is an attribute; otherwise, assume it's a dataset - int status; - status = H5Aexists_by_name(root_id, loc_path.c_str(), loc_name.c_str(), H5P_DEFAULT); - if (status < 0) throw Exception(loc_full_name + ": error in H5Aexists_by_name"); - bool is_attr = status > 0; - if (is_attr) - { - read_obj_helper< Out_Data_Type, Out_Data_Storage >( - loc_full_name, dest, compound_map_ptr, - "H5Aopen_by_name", - [&root_id, &loc_path, &loc_name] () - { - return H5Aopen_by_name(root_id, loc_path.c_str(), loc_name.c_str(), H5P_DEFAULT, H5P_DEFAULT); - }, - "H5Aclose", &H5Aclose, - "H5Aget_space", &H5Aget_space, - "H5Aget_type", &H5Aget_type, - "H5Aread", - [] (hid_t id, hid_t mem_type_id, void* dest_p) - { - return H5Aread(id, mem_type_id, dest_p); - }); - } - else - { - read_obj_helper< Out_Data_Type, Out_Data_Storage >( - loc_full_name, dest, compound_map_ptr, - "H5Dopen", - [&root_id, &loc_full_name] () - { - return H5Dopen(root_id, loc_full_name.c_str(), H5P_DEFAULT); - }, - "H5Dclose", &H5Dclose, - "H5Dget_space", &H5Dget_space, - "H5Dget_type", &H5Dget_type, - "H5Dread", - [] (hid_t id, hid_t mem_type_id, void* dest_p) - { - return H5Dread(id, mem_type_id, H5S_ALL, H5S_ALL, H5P_DEFAULT, dest_p); - }); - } -} // read_addr - -// TempSpec: for atomic types -template < typename Out_Data_Type, bool = true > -struct Reader_as_atomic -{ - template < typename Out_Data_Storage > - void operator () (hid_t root_id, const std::string& loc_path, const std::string& loc_name, - Out_Data_Storage& dest) - { - static_assert(can_read< Out_Data_Type >::value, - "Reader_impl: expected a readable destination"); - static_assert(read_as_atomic< Out_Data_Type >::value, - "Reader_impl: expected a type readable as atomic"); - read_addr< Out_Data_Type, Out_Data_Storage >(root_id, loc_path, loc_name, dest, nullptr); - } -}; - -// TempSpec: for compound types -template < typename Out_Data_Type > -struct Reader_as_atomic< Out_Data_Type, false > -{ - template < typename Out_Data_Storage > - void operator () (hid_t root_id, const std::string& loc_path, const std::string& loc_name, - Out_Data_Storage& dest, const Compound_Map* compound_map_ptr) - { - static_assert(can_read< Out_Data_Type >::value, - "Reader_impl: expected a readable destination"); - static_assert(not read_as_atomic< Out_Data_Type >::value, - "Reader_impl: expected a type readable as compound"); - read_addr< Out_Data_Type, Out_Data_Storage >(root_id, loc_path, loc_name, dest, compound_map_ptr); - } -}; - -template < typename Out_Data_Type > -struct Reader : public Reader_as_atomic< Out_Data_Type, read_as_atomic< Out_Data_Type >::value > -{}; +template < typename Data_Type > +struct Reader +{ + template < typename ...Args > + void operator () (hid_t grp_id, const std::string& name, + Data_Type & out, Args&& ...args) const + { + Reader_Base reader_base(grp_id, name); + if (reader_base.dspace_size == 1) + { + Reader_helper< mem_type_class< Data_Type >::value, Data_Type >()( + reader_base, &out, std::forward< Args >(args)...); + } + else if (std::is_same< Data_Type, std::string >::value + and reader_base.file_dtype_class == H5T_STRING + and not reader_base.file_dtype_is_vlen_str + and reader_base.file_dtype_size == 1) + { + std::vector< std::array< char, 1 > > char_buff(reader_base.dspace_size); + Reader_helper< 2, std::array< char, 1 > >()( + reader_base, char_buff.data(), std::forward< Args >(args)...); + reinterpret_cast< std::string& >(out).assign(&char_buff[0][0], reader_base.dspace_size); + } + else + { + throw Exception("reading scalar, but dataspace size is not 1"); + } + } +}; +template < typename Data_Type > +struct Reader< std::vector< Data_Type > > +{ + template < typename ...Args > + void operator () (hid_t grp_id, const std::string& name, + std::vector< Data_Type> & out, Args&& ...args) const + { + Reader_Base reader_base(grp_id, name); + out.clear(); + out.resize(reader_base.dspace_size); + Reader_helper< mem_type_class< Data_Type >::value, Data_Type >()( + reader_base, out.data(), std::forward< Args >(args)...); + } +}; + +// Writer_helper_base +// Common base for Write_helper atomic/compound +struct Writer_helper_base +{ + static HDF_Object_Holder create(hid_t grp_id, const std::string& loc_name, bool as_ds, + hid_t dspace_id, hid_t file_dtype_id) + { + HDF_Object_Holder obj_id_holder; + if (as_ds) + { + obj_id_holder.load( + Util::wrap(H5Dcreate2, grp_id, loc_name.c_str(), file_dtype_id, dspace_id, + H5P_DEFAULT, H5P_DEFAULT, H5P_DEFAULT), + Util::wrapped_closer(H5Dclose)); + } + else + { + obj_id_holder.load( + Util::wrap(H5Acreate2, grp_id, loc_name.c_str(), file_dtype_id, dspace_id, + H5P_DEFAULT, H5P_DEFAULT), + Util::wrapped_closer(H5Aclose)); + } + return obj_id_holder; + } + static void write(hid_t obj_id, bool as_ds, hid_t mem_dtype_id, const void* in) + { + if (as_ds) + { + Util::wrap(H5Dwrite, obj_id, mem_dtype_id, H5S_ALL, H5S_ALL, H5P_DEFAULT, in); + } + else + { + Util::wrap(H5Awrite, obj_id, mem_dtype_id, in); + } + } + static void create_and_write(hid_t grp_id, const std::string& loc_name, bool as_ds, + hid_t dspace_id, hid_t mem_dtype_id, hid_t file_dtype_id, + const void* in) + { + HDF_Object_Holder obj_id_holder(create(grp_id, loc_name, as_ds, dspace_id, file_dtype_id)); + write(obj_id_holder.id, as_ds, mem_dtype_id, in); + } +}; // struct Writer_helper_base + +// Writer_helper +// Branch on memory type classes +template < int, typename > +struct Writer_helper; + +// numeric +template < typename In_Data_Type > +struct Writer_helper< 1, In_Data_Type > + : public Writer_helper_base +{ + void operator () (hid_t grp_id, const std::string& loc_name, bool as_ds, + hid_t dspace_id, size_t, + const In_Data_Type * in, hid_t file_dtype_id = 0) const + { + assert(std::is_integral< In_Data_Type >::value or std::is_floating_point< In_Data_Type >::value); + hid_t mem_dtype_id = get_mem_type< In_Data_Type >::id(); + if (file_dtype_id == 0) + { + file_dtype_id = mem_dtype_id; + } + Writer_helper_base::create_and_write( + grp_id, loc_name, as_ds, + dspace_id, mem_dtype_id, file_dtype_id, + in); + } +}; + +// fixed-length string +template < typename In_Data_Type > +struct Writer_helper< 2, In_Data_Type > + : public Writer_helper_base +{ + void operator () (hid_t grp_id, const std::string& loc_name, bool as_ds, + hid_t dspace_id, size_t sz, + const In_Data_Type * in, hid_t file_dtype_id = 0) const + { + HDF_Object_Holder mem_dtype_id_holder; + HDF_Object_Holder file_dtype_id_holder; + std::vector< const char * > charptr_buff; + const void * vptr_in = in; + if (file_dtype_id >= 0) + { + mem_dtype_id_holder = Util::make_str_type(sizeof(In_Data_Type)); + if (file_dtype_id == 0) + { + file_dtype_id = mem_dtype_id_holder.id; + } + else // file_dtype_id > 0 + { + file_dtype_id_holder = Util::make_str_type(file_dtype_id); + file_dtype_id = file_dtype_id_holder.id; + } + } + else // file_dtype_id < 0: write as varlen strings + { + mem_dtype_id_holder = Util::make_str_type(-1); + file_dtype_id = mem_dtype_id_holder.id; + // prepare array of pointers + charptr_buff.resize(sz); + for (hsize_t i = 0; i < sz; ++i) + { + charptr_buff[i] = &in[i][0]; + } + vptr_in = charptr_buff.data(); + } + Writer_helper_base::create_and_write( + grp_id, loc_name, as_ds, + dspace_id, mem_dtype_id_holder.id, file_dtype_id, + vptr_in); + } +}; + +// variable-length string +template <> +struct Writer_helper< 3, std::string > + : public Writer_helper_base +{ + void operator () (hid_t grp_id, const std::string& loc_name, bool as_ds, + hid_t dspace_id, size_t sz, + const std::string * in, hid_t file_dtype_id = -1) const + { + HDF_Object_Holder mem_dtype_id_holder; + std::vector< const char * > charptr_buff; + std::vector< char > char_buff; + const void * vptr_in; + if (file_dtype_id == -1) // varlen to varlen + { + mem_dtype_id_holder = Util::make_str_type(-1); + charptr_buff.resize(sz); + for (hsize_t i = 0; i < sz; ++i) + { + charptr_buff[i] = in[i].data(); + } + vptr_in = charptr_buff.data(); + } + else // varlen to fixlen + { + assert(file_dtype_id > 0 or sz == 1); // file_dtype_id == 0 only allowed for single strings + size_t slen = file_dtype_id > 0 ? file_dtype_id : in[0].size() + 1; + assert(slen <= std::numeric_limits< long >::max()); + mem_dtype_id_holder = Util::make_str_type(slen); + char_buff.resize(sz * slen); + for (hsize_t i = 0; i < sz; ++i) + { + for (size_t j = 0; j < slen - 1; ++j) + { + char_buff[i * slen + j] = j < in[i].size()? in[i][j] : '\0'; + } + char_buff[i * slen + slen - 1] = '\0'; + } + vptr_in = char_buff.data(); + } + Writer_helper_base::create_and_write( + grp_id, loc_name, as_ds, + dspace_id, mem_dtype_id_holder.id, mem_dtype_id_holder.id, + vptr_in); + } +}; + +// compound +template < typename In_Data_Type > +struct Writer_helper< 4, In_Data_Type > + : public Writer_helper_base +{ + void operator () (hid_t grp_id, const std::string& loc_name, bool as_ds, + hid_t dspace_id, size_t sz, + const In_Data_Type * in, const Compound_Map& cm) const + { + HDF_Object_Holder obj_id_holder; + // create object + { + // create the file type + HDF_Object_Holder file_dtype_id_holder( + cm.build_type(sizeof(In_Data_Type), nullptr, false)); + obj_id_holder = Writer_helper_base::create( + grp_id, loc_name, as_ds, + dspace_id, file_dtype_id_holder.id); + } + // define functor that selects members which can be written with implicit conversion + auto implicit_conversion = [] (const detail::Compound_Member_Description& e) { + return (e.is_numeric() + or e.is_char_array()); + }; + // write fields which do not need conversion, all-in-one + { + HDF_Object_Holder mem_dtype_id_holder( + cm.build_type(sizeof(In_Data_Type), implicit_conversion, true)); + Writer_helper_base::write(obj_id_holder.id, as_ds, mem_dtype_id_holder.id, in); + } + // write fields which need conversion, one-by-one + { + auto mptr_l = cm.get_member_ptr_list(); + for (const auto& p : mptr_l) + { + const detail::Compound_Member_Description& e = *p.first.back(); + if (implicit_conversion(e)) continue; + if (not as_ds) throw Exception("string in compound is supported in datasets, but not attributes"); + size_t mem_offset = p.second; + if (e.is_string()) + { + // prepare memory vector of char* + std::vector< const char * > charptr_buff(sz); + for (size_t i = 0; i < sz; ++i) + { + charptr_buff[i] = reinterpret_cast< const std::string * >( + reinterpret_cast< const char * >(&in[i]) + mem_offset)->data(); + } + // create flat hdf5 type + //HDF_Object_Holder mem_dtype_id_holder(Compound_Map::build_flat_type(p.first)); + HDF_Object_Holder mem_dtype_id_holder( + cm.build_type(sizeof(In_Data_Type), + [&e] (const detail::Compound_Member_Description& _e) { + return &_e == &e; + }, + false)); + Writer_helper_base::write(obj_id_holder.id, as_ds, mem_dtype_id_holder.id, charptr_buff.data()); + } + } + } + } +}; + +// Writer +// Struct branches on data argument type: +// if std::vector, it writes a simple extent; +// if not std::vector, it writes a scalar. +template < typename In_Data_Type > +struct Writer +{ + template < typename ...Args > + void operator () (hid_t grp_id, const std::string& loc_name, bool as_ds, + const In_Data_Type & in, Args&& ...args) const + { + // create dataspace + HDF_Object_Holder dspace_id_holder( + Util::wrap(H5Screate, H5S_SCALAR), + Util::wrapped_closer(H5Sclose)); + Writer_helper< mem_type_class< In_Data_Type >::value, In_Data_Type >()( + grp_id, loc_name, as_ds, + dspace_id_holder.id, 1, + &in, std::forward< Args >(args)...); + } +}; + + +template < typename In_Data_Type > +struct Writer< std::vector< In_Data_Type > > +{ + template < typename ...Args > + void operator () (hid_t grp_id, const std::string& loc_name, bool as_ds, + const std::vector< In_Data_Type > & in, Args&& ...args) const + { + assert(not in.empty()); + // create dataspace + hsize_t sz = in.size(); + HDF_Object_Holder dspace_id_holder( + Util::wrap(H5Screate_simple, 1, &sz, nullptr), + Util::wrapped_closer(H5Sclose)); + Writer_helper< mem_type_class< In_Data_Type >::value, In_Data_Type >()( + grp_id, loc_name, as_ds, + dspace_id_holder.id, sz, + in.data(), std::forward< Args >(args)...); + } +}; } // namespace detail /// An HDF5 file reader -class File_Reader +class File { public: - File_Reader() : _file_id(0) {} - File_Reader(const std::string& file_name) : _file_id(0) { open(file_name); } - File_Reader(const File_Reader&) = delete; - File_Reader& operator = (const File_Reader&) = delete; - ~File_Reader() { if (is_open()) close(); } + File() : _file_id(0) {} + File(const std::string& file_name, bool rw = false) : _file_id(0) { open(file_name, rw); } + File(const File&) = delete; + File& operator = (const File&) = delete; + ~File() { if (is_open()) close(); } bool is_open() const { return _file_id > 0; } + bool is_rw() const { return _rw; } const std::string& file_name() const { return _file_name; } - void open(const std::string& file_name) + void create(const std::string& file_name, bool truncate = false) + { + assert(not is_open()); + _file_name = file_name; + _rw = true; + _file_id = H5Fcreate(file_name.c_str(), truncate? H5F_ACC_TRUNC : H5F_ACC_EXCL, H5P_DEFAULT, H5P_DEFAULT); + if (not is_open()) throw Exception(_file_name + ": error in H5Fcreate"); + } + void open(const std::string& file_name, bool rw = false) { assert(not is_open()); _file_name = file_name; - _file_id = H5Fopen(file_name.c_str(), H5F_ACC_RDONLY, H5P_DEFAULT); + _rw = rw; + _file_id = H5Fopen(file_name.c_str(), not rw? H5F_ACC_RDONLY : H5F_ACC_RDWR, H5P_DEFAULT); if (not is_open()) throw Exception(_file_name + ": error in H5Fopen"); } void close() @@ -574,9 +1444,55 @@ _file_id = 0; _file_name.clear(); } + static bool is_valid_file(const std::string& file_name) + { + std::ifstream ifs(file_name); + if (not ifs) return false; + (void)ifs.peek(); + if (not ifs) return false; + ifs.close(); + auto status = H5Fis_hdf5(file_name.c_str()); + if (status <= 0) return 0; + auto file_id = H5Fopen(file_name.c_str(), H5F_ACC_RDONLY, H5P_DEFAULT); // error if file is truncated + if (file_id < 0) return 0; + status = H5Fclose(file_id); + if (status < 0) throw Exception(file_name + ": error in H5Fclose"); + return 1; + } - /// Determine if address is an attribute or dataset - bool exists(const std::string& loc_full_name) const + static int get_object_count() + { + return H5Fget_obj_count(H5F_OBJ_ALL, H5F_OBJ_ALL); + } + + /// Check if a group exists + bool group_exists(const std::string& loc_full_name) const + { + assert(is_open()); + assert(not loc_full_name.empty() and loc_full_name[0] == '/'); + std::string loc_path; + std::string loc_name; + std::tie(loc_path, loc_name) = split_full_name(loc_full_name); + // check all path elements exist, except for what is to the right of the last '/' + // sets active path + if (not path_exists(loc_path)) return false; + return check_object_type(loc_full_name, H5O_TYPE_GROUP); + } + /// Check if a dataset exists + bool dataset_exists(const std::string& loc_full_name) const + { + assert(is_open()); + assert(not loc_full_name.empty() and loc_full_name[0] == '/'); + std::string loc_path; + std::string loc_name; + std::tie(loc_path, loc_name) = split_full_name(loc_full_name); + // check all path elements exist, except for what is to the right of the last '/' + // sets active path + if (not path_exists(loc_path)) return false; + return check_object_type(loc_full_name, H5O_TYPE_DATASET); + } + /// Check if attribute exists + bool attribute_exists(const std::string& loc_full_name) const { assert(is_open()); assert(not loc_full_name.empty() and loc_full_name[0] == '/'); @@ -585,45 +1501,173 @@ std::tie(loc_path, loc_name) = split_full_name(loc_full_name); int status; // check all path elements exist, except for what is to the right of the last '/' - size_t pos = 0; - while (true) - { - ++pos; - pos = loc_full_name.find('/', pos); - if (pos == std::string::npos) break; - std::string tmp = loc_full_name.substr(0, pos); - status = H5Lexists(_file_id, tmp.c_str(), H5P_DEFAULT); - if (status < 0) throw Exception(loc_full_name + ": error in H5Lexists"); - if (not status) return false; - status = H5Oexists_by_name(_file_id, tmp.c_str(), H5P_DEFAULT); - if (status < 0) throw Exception(loc_full_name + ": error in H5Oexists_by_name"); - if (not status) return false; - } + // sets active path + if (not path_exists(loc_path)) return false; + // check if target is an attribute status = H5Aexists_by_name(_file_id, loc_path.c_str(), loc_name.c_str(), H5P_DEFAULT); - if (status < 0) throw Exception(loc_full_name + ": error in H5Aexists_by_name"); - if (status) return true; - // not an attribute: try to open as a dataset - hid_t ds_id = H5Dopen(_file_id, loc_full_name.c_str(), H5P_DEFAULT); - if (ds_id < 0) return false; - status = H5Dclose(ds_id); - if (status < 0) throw Exception(loc_full_name + ": error in H5Dclose"); - return true; + if (status < 0) throw Exception("error in H5Aexists_by_name"); + return status > 0; + } + bool exists(const std::string& loc_full_name) const + { + return attribute_exists(loc_full_name) or dataset_exists(loc_full_name); } + /// Read attribute or dataset at address - template < typename Out_Data_Type, typename ...Args > - void read(const std::string& loc_full_name, Args&& ...args) const + template < typename Data_Storage, typename ...Args > + void read(const std::string& loc_full_name, Data_Storage& out, Args&& ...args) const { assert(is_open()); assert(not loc_full_name.empty() and loc_full_name[0] == '/'); std::string loc_path; std::string loc_name; std::tie(loc_path, loc_name) = split_full_name(loc_full_name); - detail::Reader< Out_Data_Type >()(_file_id, loc_path, loc_name, std::forward< Args >(args)...); + Exception::active_path() = loc_full_name; + detail::HDF_Object_Holder grp_id_holder( + detail::Util::wrap(H5Oopen, _file_id, loc_path.c_str(), H5P_DEFAULT), + detail::Util::wrapped_closer(H5Oclose)); + detail::Reader< Data_Storage >()(grp_id_holder.id, loc_name, + out, std::forward< Args >(args)...); + } + /// Write attribute or dataset + template < typename In_Data_Storage, typename ...Args > + void write(const std::string& loc_full_name, bool as_ds, const In_Data_Storage& in, Args&& ...args) const + { + assert(is_open()); + assert(is_rw()); + assert(not loc_full_name.empty() and loc_full_name[0] == '/'); + assert(not exists(loc_full_name)); + std::string loc_path; + std::string loc_name; + std::tie(loc_path, loc_name) = split_full_name(loc_full_name); + Exception::active_path() = loc_full_name; + detail::HDF_Object_Holder grp_id_holder; + std::string grp_path = loc_path != "/"? loc_path.substr(0, loc_path.size() - 1) : "/"; + if (group_exists(grp_path) or dataset_exists(grp_path)) + { + grp_id_holder.load( + detail::Util::wrap(H5Oopen, _file_id, grp_path.c_str(), H5P_DEFAULT), + detail::Util::wrapped_closer(H5Oclose)); + } + else + { + detail::HDF_Object_Holder lcpl_id_holder( + detail::Util::wrap(H5Pcreate, H5P_LINK_CREATE), + detail::Util::wrapped_closer(H5Pclose)); + detail::Util::wrap(H5Pset_create_intermediate_group, lcpl_id_holder.id, 1); + grp_id_holder.load( + detail::Util::wrap(H5Gcreate2, _file_id, grp_path.c_str(), lcpl_id_holder.id, H5P_DEFAULT, H5P_DEFAULT), + detail::Util::wrapped_closer(H5Gclose)); + } + detail::Writer< In_Data_Storage >()(grp_id_holder.id, loc_name, as_ds, in, std::forward< Args >(args)...); + } + template < typename In_Data_Storage, typename ...Args > + void write_dataset(const std::string& loc_full_name, const In_Data_Storage& in, Args&& ...args) const + { + write(loc_full_name, true, in, std::forward< Args >(args)...); } + template < typename In_Data_Storage, typename ...Args > + void write_attribute(const std::string& loc_full_name, const In_Data_Storage& in, Args&& ...args) const + { + write(loc_full_name, false, in, std::forward< Args >(args)...); + } + + /// Return a list of names (groups/datasets) in the given group + std::vector< std::string > list_group(const std::string& group_full_name) const + { + std::vector< std::string > res; + Exception::active_path() = group_full_name; + assert(group_exists(group_full_name)); + detail::HDF_Object_Holder g_id_holder( + detail::Util::wrap(H5Gopen2, _file_id, group_full_name.c_str(), H5P_DEFAULT), + detail::Util::wrapped_closer(H5Gclose)); + H5G_info_t g_info; + detail::Util::wrap(H5Gget_info, g_id_holder.id, &g_info); + res.resize(g_info.nlinks); + for (unsigned i = 0; i < res.size(); ++i) + { + // find size first + long sz1 = detail::Util::wrap(H5Lget_name_by_idx, _file_id, group_full_name.c_str(), + H5_INDEX_NAME, H5_ITER_NATIVE, i, nullptr, 0, H5P_DEFAULT); + res[i].resize(sz1); + long sz2 = detail::Util::wrap(H5Lget_name_by_idx, _file_id, group_full_name.c_str(), + H5_INDEX_NAME, H5_ITER_NATIVE, i, &res[i][0], sz1+1, H5P_DEFAULT); + if (sz1 != sz2) throw Exception("error in H5Lget_name_by_idx: sz1!=sz2"); + } + return res; + } // list_group + /// Return a list of attributes of the given object + std::vector< std::string > get_attr_list(const std::string& loc_full_name) const + { + std::vector< std::string > res; + Exception::active_path() = loc_full_name; + assert(group_exists(loc_full_name) or dataset_exists(loc_full_name)); + detail::HDF_Object_Holder id_holder( + detail::Util::wrap(H5Oopen, _file_id, loc_full_name.c_str(), H5P_DEFAULT), + detail::Util::wrapped_closer(H5Oclose)); + H5O_info_t info; + detail::Util::wrap(H5Oget_info, id_holder.id, &info); + // num_attrs in info.num_attrs + for (unsigned i = 0; i < (unsigned)info.num_attrs; ++i) + { + int name_sz = detail::Util::wrap(H5Aget_name_by_idx, id_holder.id, ".", + H5_INDEX_NAME, H5_ITER_NATIVE, i, nullptr, 0, H5P_DEFAULT); + std::string tmp(name_sz, '\0'); + detail::Util::wrap(H5Aget_name_by_idx, id_holder.id, ".", + H5_INDEX_NAME, H5_ITER_NATIVE, i, &tmp[0], name_sz + 1, H5P_DEFAULT); + res.emplace_back(std::move(tmp)); + } + return res; + } // get_attr_list + /// Return a list of struct field names in the given dataset/attribute + std::vector< std::string > get_struct_members(const std::string& loc_full_name) const + { + std::vector< std::string > res; + Exception::active_path() = loc_full_name; + assert(attribute_exists(loc_full_name) or dataset_exists(loc_full_name)); + detail::HDF_Object_Holder attr_id_holder; + detail::HDF_Object_Holder ds_id_holder; + detail::HDF_Object_Holder type_id_holder; + if (attribute_exists(loc_full_name)) + { + std::string loc_path; + std::string loc_name; + std::tie(loc_path, loc_name) = split_full_name(loc_full_name); + attr_id_holder.load( + detail::Util::wrap(H5Aopen_by_name, _file_id, loc_path.c_str(), loc_name.c_str(), + H5P_DEFAULT, H5P_DEFAULT), + detail::Util::wrapped_closer(H5Aclose)); + type_id_holder.load( + detail::Util::wrap(H5Aget_type, attr_id_holder.id), + detail::Util::wrapped_closer(H5Tclose)); + } + else + { + ds_id_holder.load( + detail::Util::wrap(H5Oopen, _file_id, loc_full_name.c_str(), H5P_DEFAULT), + detail::Util::wrapped_closer(H5Oclose)); + type_id_holder.load( + detail::Util::wrap(H5Dget_type, ds_id_holder.id), + detail::Util::wrapped_closer(H5Tclose)); + } + if (detail::Util::wrap(H5Tget_class, type_id_holder.id) == H5T_COMPOUND) + { + // type is indeed a struct + int nmem = detail::Util::wrap(H5Tget_nmembers, type_id_holder.id); + for (int i = 0; i < nmem; ++i) + { + char* s = detail::Util::wrap(H5Tget_member_name, type_id_holder.id, i); + res.emplace_back(s); + free(s); + } + } + return res; + } // get_struct_members private: std::string _file_name; hid_t _file_id; + bool _rw; /// Split a full name into path and name static std::pair< std::string, std::string > split_full_name(const std::string& full_name) @@ -633,7 +1677,57 @@ std::string name = last_slash_pos != std::string::npos? full_name.substr(last_slash_pos + 1) : full_name; return std::make_pair(path, name); } // split_full_name -}; // class File_Reader + + /// Determine if a path to an element exists + bool path_exists(const std::string& full_path_name) const + { + assert(is_open()); + assert(not full_path_name.empty() + and full_path_name[0] == '/' + and full_path_name[full_path_name.size() - 1] == '/'); + Exception::active_path() = full_path_name; + // check all path elements exist, except for what is to the right of the last '/' + size_t pos = 0; + while (true) + { + ++pos; + pos = full_path_name.find('/', pos); + if (pos == std::string::npos) break; + std::string tmp = full_path_name.substr(0, pos); + // check link exists + if (not detail::Util::wrap(H5Lexists, _file_id, tmp.c_str(), H5P_DEFAULT)) return false; + // check object exists + if (not detail::Util::wrap(H5Oexists_by_name, _file_id, tmp.c_str(), H5P_DEFAULT)) return false; + // open object in order to check type + detail::HDF_Object_Holder o_id_holder( + detail::Util::wrap(H5Oopen, _file_id, tmp.c_str(), H5P_DEFAULT), + detail::Util::wrapped_closer(H5Oclose)); + // check object is a group + H5O_info_t o_info; + detail::Util::wrap(H5Oget_info, o_id_holder.id, &o_info); + if (o_info.type != H5O_TYPE_GROUP) return false; + } + return true; + } // path_exists() + + /// Check if a group exists + bool check_object_type(const std::string& loc_full_name, H5O_type_t type_id) const + { + // check link exists + if (loc_full_name != "/" + and not detail::Util::wrap(H5Lexists, _file_id, loc_full_name.c_str(), H5P_DEFAULT)) return false; + // check object exists + if (not detail::Util::wrap(H5Oexists_by_name, _file_id, loc_full_name.c_str(), H5P_DEFAULT)) return false; + // open object in order to check type + detail::HDF_Object_Holder o_id_holder( + detail::Util::wrap(H5Oopen, _file_id, loc_full_name.c_str(), H5P_DEFAULT), + detail::Util::wrapped_closer(H5Oclose)); + // check object is a group + H5O_info_t o_info; + detail::Util::wrap(H5Oget_info, o_id_holder.id, &o_info); + return o_info.type == type_id; + } +}; // class File } // namespace hdf5_tools diff -Nru fast5-0~20150918/src/Makefile fast5-0.5.6/src/Makefile --- fast5-0~20150918/src/Makefile 2015-09-18 17:12:49.000000000 +0000 +++ fast5-0.5.6/src/Makefile 2016-07-28 20:14:13.000000000 +0000 @@ -1,2 +1,33 @@ -a: a.cpp fast5.hpp hdf5_tools.hpp - g++ -std=c++11 -O0 -g3 -ggdb -fno-eliminate-unused-debug-types -Wall -Wextra -pedantic -Wno-unused-parameter -o $@ $^ -L /usr/local/lib -lhdf5 +.SUFFIXES: +MAKEFLAGS += -r +SHELL := /bin/bash +.DELETE_ON_ERROR: +.PHONY: all help list clean check_hdf5 + +HDF5_DIR = /usr/local +HDF5_INCLUDE_DIR = ${HDF5_DIR}/include +HDF5_LIB_DIR = ${HDF5_DIR}/lib +HDF5_LIB = hdf5 + +TARGETS = f5dump f5dump-full hdf5-mod f5-mod + +all: ${TARGETS} + +print-%: + @echo '$*=$($*)' + +help: ## This help. + @awk 'BEGIN {FS = ":.*?## "} /^[a-zA-Z_-]+:.*?## / {printf "\033[36m%-30s\033[0m %s\n", $$1, $$2}' $(MAKEFILE_LIST) + +list: + @echo "TARGETS=${TARGETS}" + +clean: + rm -rf ${TARGETS} + +check_hdf5: + @[ -f "${HDF5_INCLUDE_DIR}/H5pubconf.h" ] || { echo "HDF5 headers not found" >&2; exit 1; } + @[ -f "${HDF5_LIB_DIR}/lib${HDF5_LIB}.so" ] || [ -f "${HDF5_LIB_DIR}/lib${HDF5_LIB}.a" ] || { echo "HDF5 library not found" >&2; exit 1; } + +%: %.cpp fast5.hpp hdf5_tools.hpp | check_hdf5 + ${CXX} -std=c++11 -O0 -g3 -ggdb -fno-eliminate-unused-debug-types -Wall -Wextra -Wpedantic -isystem ${HDF5_INCLUDE_DIR} -o $@ $< -L${HDF5_LIB_DIR} -Wl,--rpath=${HDF5_LIB_DIR} -l${HDF5_LIB} -lpthread -lz -ldl diff -Nru fast5-0~20150918/src/tmp.cpp fast5-0.5.6/src/tmp.cpp --- fast5-0~20150918/src/tmp.cpp 1970-01-01 00:00:00.000000000 +0000 +++ fast5-0.5.6/src/tmp.cpp 2016-07-28 20:14:13.000000000 +0000 @@ -0,0 +1,207 @@ +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include + +#include + +using namespace std; + +template < typename T, typename U > +std::size_t offset_of(U T::* mem_ptr) +{ + return reinterpret_cast< std::size_t >(&(((T*)0)->*mem_ptr)); +} + +struct A +{ + int val_1; + unsigned val_2; + float val_3; + int val_4; + string val_5; +}; + +int main(int argc, char * argv[]) +{ + if (argc != 2) + { + cerr << "use: " << argv[0] << " " << endl; + exit(EXIT_FAILURE); + } + // + // create file, fail if existing + // + auto file_id = H5Fcreate(argv[1], H5F_ACC_TRUNC, H5P_DEFAULT, H5P_DEFAULT); + assert(file_id > 0); + auto scalar_space_id = H5Screate(H5S_SCALAR); + assert(scalar_space_id > 0); + auto lcpl_id = H5Pcreate(H5P_LINK_CREATE); + H5Pset_create_intermediate_group(lcpl_id, 1); + + // + // write numeric scalar attribute + // + // create group + auto grp_id = H5Gcreate2(file_id, "/Group_1/Subgroup_1_1", lcpl_id, H5P_DEFAULT, H5P_DEFAULT); + assert(grp_id > 0); + auto attr1_id = H5Acreate2(grp_id, "Attribute_1_1_1", H5T_NATIVE_INT, scalar_space_id, + H5P_DEFAULT, H5P_DEFAULT); + assert(attr1_id > 0); + int i = 42; + auto status = H5Awrite(attr1_id, H5T_NATIVE_INT, &i); + assert(status >= 0); + H5Gclose(grp_id); + H5Aclose(attr1_id); + + // + // write numeric vector dataset + // + { + vector< float > v = { 1.0, 2.0, 3.0 }; + hsize_t v_size = v.size(); + auto v_space_id = H5Screate_simple(1, &v_size, nullptr); + auto ds1_id = H5Dcreate2(file_id, "/Group_2/Subgroup_2_1/Dataset_2_1_1", H5T_NATIVE_FLOAT, v_space_id, + lcpl_id, H5P_DEFAULT, H5P_DEFAULT); + status = H5Dwrite(ds1_id, H5T_NATIVE_FLOAT, H5S_ALL, H5S_ALL, H5P_DEFAULT, v.data()); + assert(status >= 0); + H5Dclose(ds1_id); + H5Sclose(v_space_id); + } + + // + // write compound scalar dataset + // + { + //A a{ 1, 2, 3.2, 4, true }; + A a{ 1, 2, 3.2, 4, "xoxo" }; + auto a_type_id = H5Tcreate(H5T_COMPOUND, sizeof(A)); + vector< hid_t > a_stype_id; + status = H5Tinsert(a_type_id, "val_2", offset_of(&A::val_2), H5T_NATIVE_UINT); + assert(status >= 0); + status = H5Tinsert(a_type_id, "val_3", offset_of(&A::val_3), H5T_NATIVE_FLOAT); + assert(status >= 0); + status = H5Tinsert(a_type_id, "val_1", offset_of(&A::val_1), H5T_NATIVE_INT); + assert(status >= 0); + auto ds2_id = H5Dcreate2(file_id, "/Group_2/Subgroup_2_1/Dataset_2_1_2", a_type_id, scalar_space_id, + lcpl_id, H5P_DEFAULT, H5P_DEFAULT); + status = H5Dwrite(ds2_id, a_type_id, H5S_ALL, H5S_ALL, H5P_DEFAULT, &a); + assert(status >= 0); + H5Dclose(ds2_id); + H5Tclose(a_type_id); + } + + // + // write compound scalar dataset + // + { + //vector< A > a_v{{ 1, 2, 3.1, 4, true }, { 11, 12, 13.1, 14, false }, { 21, 22, 23.1, 24, true }}; + vector< A > a_v{{ 1, 2, 3.1, 4, "xoxo" }, { 11, 12, 13.1, 14, "xexe" }, { 21, 22, 23.1, 24, "xixi" }}; + auto a_type_id = H5Tcreate(H5T_COMPOUND, sizeof(A)); + status = H5Tinsert(a_type_id, "val_1", offset_of(&A::val_1), H5T_NATIVE_INT); + assert(status >= 0); + //status = H5Tinsert(a_type_id, "val_2", offset_of(&A::val_2), H5T_NATIVE_UINT); + //assert(status >= 0); + status = H5Tinsert(a_type_id, "val_3", offset_of(&A::val_3), H5T_NATIVE_FLOAT); + assert(status >= 0); + hsize_t a_v_size = a_v.size(); + auto a_v_space_id = H5Screate_simple(1, &a_v_size, nullptr); + auto ds3_id = H5Dcreate2(file_id, "/Group_2/Subgroup_2_1/Dataset_2_1_3", a_type_id, a_v_space_id, + lcpl_id, H5P_DEFAULT, H5P_DEFAULT); + status = H5Dwrite(ds3_id, a_type_id, H5S_ALL, H5S_ALL, H5P_DEFAULT, a_v.data()); + assert(status >= 0); + H5Dclose(ds3_id); + H5Sclose(a_v_space_id); + H5Tclose(a_type_id); + } + + // + // write compound scalar dataset in 2 steps + // + { + //vector< A > a_v{{ 1, 2, 3.1, 4, true }, { 11, 12, 13.1, 14, false }, { 21, 22, 23.1, 24, true }}; + vector< A > a_v{{ 100, 2, 3.1, 4, "xoxo" }, { 111, 12, 13.1, 14, "xexe" }, { 121, 22, 23.1, 24, "xixi" }}; + hid_t ds4_id; + // create dataset + { + auto a_type_id = H5Tcreate(H5T_COMPOUND, sizeof(A)); + status = H5Tinsert(a_type_id, "val_1", offset_of(&A::val_1), H5T_NATIVE_INT); + assert(status >= 0); + status = H5Tinsert(a_type_id, "val_2", offset_of(&A::val_2), H5T_NATIVE_UINT); + assert(status >= 0); + status = H5Tinsert(a_type_id, "val_3", offset_of(&A::val_3), H5T_NATIVE_FLOAT); + assert(status >= 0); + hid_t val_5_type_id = H5Tcopy(H5T_C_S1); + status = H5Tset_size(val_5_type_id, H5T_VARIABLE); + assert(status >= 0); + status = H5Tinsert(a_type_id, "val_5", offset_of(&A::val_5), val_5_type_id); + assert(status >= 0); + hsize_t a_v_size = a_v.size(); + auto a_v_space_id = H5Screate_simple(1, &a_v_size, nullptr); + ds4_id = H5Dcreate2(file_id, "/Group_2/Subgroup_2_1/Dataset_2_1_4", a_type_id, a_v_space_id, + lcpl_id, H5P_DEFAULT, H5P_DEFAULT); + H5Sclose(a_v_space_id); + H5Tclose(val_5_type_id); + H5Tclose(a_type_id); + } + // write val_1 + { + auto a_type_id = H5Tcreate(H5T_COMPOUND, sizeof(A)); + status = H5Tinsert(a_type_id, "val_1", offset_of(&A::val_1), H5T_NATIVE_INT); + assert(status >= 0); + status = H5Dwrite(ds4_id, a_type_id, H5S_ALL, H5S_ALL, H5P_DEFAULT, a_v.data()); + assert(status >= 0); + H5Tclose(a_type_id); + } + // write val_2 + { + auto a_type_id = H5Tcreate(H5T_COMPOUND, sizeof(A)); + status = H5Tinsert(a_type_id, "val_2", offset_of(&A::val_2), H5T_NATIVE_UINT); + assert(status >= 0); + status = H5Dwrite(ds4_id, a_type_id, H5S_ALL, H5S_ALL, H5P_DEFAULT, a_v.data()); + assert(status >= 0); + H5Tclose(a_type_id); + } + // write val_3 + { + auto a_type_id = H5Tcreate(H5T_COMPOUND, sizeof(A)); + status = H5Tinsert(a_type_id, "val_3", offset_of(&A::val_3), H5T_NATIVE_FLOAT); + assert(status >= 0); + //status = H5Dwrite(ds4_id, a_type_id, H5S_ALL, H5S_ALL, H5P_DEFAULT, a_v.data()); + assert(status >= 0); + H5Tclose(a_type_id); + } + // write val_5 + { + auto a_type_id = H5Tcreate(H5T_COMPOUND, sizeof(const char *)); + hid_t val_5_type_id = H5Tcopy(H5T_C_S1); + status = H5Tset_size(val_5_type_id, H5T_VARIABLE); + assert(status >= 0); + status = H5Tinsert(a_type_id, "val_5", 0, val_5_type_id); + assert(status >= 0); + H5Tclose(val_5_type_id); + vector< const char * > charptr_buff(a_v.size()); + for (size_t i = 0; i < a_v.size(); ++i) + { + charptr_buff[i] = a_v[i].val_5.data(); + } + status = H5Dwrite(ds4_id, a_type_id, H5S_ALL, H5S_ALL, H5P_DEFAULT, charptr_buff.data()); + assert(status >= 0); + H5Tclose(a_type_id); + } + H5Dclose(ds4_id); + } + + // + // clean up + // + H5Sclose(scalar_space_id); + H5Pclose(lcpl_id); + H5Fclose(file_id); +} diff -Nru fast5-0~20150918/.travis.Dockerfile.in fast5-0.5.6/.travis.Dockerfile.in --- fast5-0~20150918/.travis.Dockerfile.in 1970-01-01 00:00:00.000000000 +0000 +++ fast5-0.5.6/.travis.Dockerfile.in 2016-07-28 20:14:13.000000000 +0000 @@ -0,0 +1,37 @@ +FROM debian:unstable +MAINTAINER Matei David +ARG DEBIAN_FRONTEND=noninteractive + +# use host timezone +ENV TZ=${TZ} +RUN ln -snf /usr/share/zoneinfo/${TZ} /etc/localtime && echo ${TZ} > /etc/timezone + +# install prerequisites +RUN apt-get update && \ + apt-get install -y \ + build-essential \ + libhdf5-dev \ + libboost-dev \ + libboost-python-dev \ + python2.7-minimal \ + python-setuptools \ + python-virtualenv + +# expose prerequisites settings +ENV HDF5_INCLUDE_DIR=/usr/include/hdf5/serial +ENV HDF5_LIB_DIR=/usr/lib/x86_64-linux-gnu/hdf5/serial +ENV BOOST_INCLUDE_DIR=/usr/include +ENV BOOST_LIB_DIR=/usr/lib/x86_64-linux-gnu + +# if necessary, specify compiler +#RUN apt-get install -y g++-4.9 g++-5 g++-6 +#ENV CC=gcc-4.9 +#ENV CXX=g++-4.9 + +# use host id +RUN groupadd --gid ${GROUP_ID} ${GROUP_NAME} +RUN useradd --create-home --uid ${USER_ID} --gid ${GROUP_ID} ${USER_NAME} +USER ${USER_NAME} + +VOLUME /data +WORKDIR /data diff -Nru fast5-0~20150918/.travis.yml fast5-0.5.6/.travis.yml --- fast5-0~20150918/.travis.yml 1970-01-01 00:00:00.000000000 +0000 +++ fast5-0.5.6/.travis.yml 2016-07-28 20:14:13.000000000 +0000 @@ -0,0 +1,19 @@ +# travis.yml for github.com/mateidavid/fast5 + +sudo: required + +services: + - docker + +before_install: + - sudo apt-get update -y + - sudo apt-get install -y -o Dpkg::Options::="--force-confnew" docker-engine + - TZ=$(cat /etc/timezone) USER_ID=$(id -u) USER_NAME=$(id -un) GROUP_ID=$(id -g) GROUP_NAME=$(id -gn) envsubst <.travis.Dockerfile.in | docker build -t fast5 - + +install: + - docker run --rm -v $PWD:/data fast5 make -C src -e + - docker run --rm -v $PWD:/data fast5 bash -c 'virtualenv build-venv && source build-venv/bin/activate && make -C python -e develop' + +script: + - docker run --rm -v $PWD:/data fast5 bash -c 'src/hdf5-mod -f file.000.fast5 && src/f5-mod file.000.fast5 && src/f5dump file.000.fast5 && src/f5dump-full file.000.fast5' + - docker run --rm -v $PWD:/data fast5 bash -c 'source build-venv/bin/activate && python -c "import fast5; f = fast5.File(\"file.000.fast5\"); print(f.file_version()); print(f.have_eventdetection_events())"' diff -Nru fast5-0~20150918/VERSION fast5-0.5.6/VERSION --- fast5-0~20150918/VERSION 1970-01-01 00:00:00.000000000 +0000 +++ fast5-0.5.6/VERSION 2016-07-28 20:14:13.000000000 +0000 @@ -0,0 +1 @@ +0.5.6 diff -Nru fast5-0~20150918/.version_files fast5-0.5.6/.version_files --- fast5-0~20150918/.version_files 1970-01-01 00:00:00.000000000 +0000 +++ fast5-0.5.6/.version_files 2016-07-28 20:14:13.000000000 +0000 @@ -0,0 +1,2 @@ +VERSION +python/fast5/version.py diff -Nru fast5-0~20150918/.VERSION.in fast5-0.5.6/.VERSION.in --- fast5-0~20150918/.VERSION.in 1970-01-01 00:00:00.000000000 +0000 +++ fast5-0.5.6/.VERSION.in 2016-07-28 20:14:13.000000000 +0000 @@ -0,0 +1 @@ +${VERSION}