diff -Nru apache-parquet-cpp-1.1.1.20170524/ci/before_script_travis.sh apache-parquet-cpp-1.1.1.20170612/ci/before_script_travis.sh --- apache-parquet-cpp-1.1.1.20170524/ci/before_script_travis.sh 2017-05-23 18:29:32.000000000 +0000 +++ apache-parquet-cpp-1.1.1.20170612/ci/before_script_travis.sh 2017-06-11 18:28:45.000000000 +0000 @@ -30,13 +30,11 @@ cmake -DPARQUET_CXXFLAGS=-Werror \ -DPARQUET_TEST_MEMCHECK=ON \ -DPARQUET_BUILD_BENCHMARKS=ON \ - -DPARQUET_ARROW=ON \ -DPARQUET_ARROW_LINKAGE=static \ -DPARQUET_GENERATE_COVERAGE=1 \ $TRAVIS_BUILD_DIR else cmake -DPARQUET_CXXFLAGS=-Werror \ - -DPARQUET_ARROW=ON \ -DPARQUET_ARROW_LINKAGE=static \ $TRAVIS_BUILD_DIR fi diff -Nru apache-parquet-cpp-1.1.1.20170524/ci/msvc-build.bat apache-parquet-cpp-1.1.1.20170612/ci/msvc-build.bat --- apache-parquet-cpp-1.1.1.20170524/ci/msvc-build.bat 2017-05-23 18:29:32.000000000 +0000 +++ apache-parquet-cpp-1.1.1.20170612/ci/msvc-build.bat 2017-06-11 18:28:45.000000000 +0000 @@ -21,6 +21,11 @@ cd build SET PARQUET_TEST_DATA=%APPVEYOR_BUILD_FOLDER%\data +set PARQUET_CXXFLAGS=/MP + +if NOT "%CONFIGURATION%" == "Debug" ( + set PARQUET_CXXFLAGS="%PARQUET_CXXFLAGS% /WX" +) if "%CONFIGURATION%" == "Toolchain" ( conda install -y boost-cpp=1.63 brotli=0.6.0 zlib=1.2.11 snappy=1.1.4 thrift-cpp=0.10.0 -c conda-forge @@ -30,7 +35,7 @@ cmake -G "%GENERATOR%" ^ -DCMAKE_BUILD_TYPE=Release ^ -DPARQUET_BOOST_USE_SHARED=OFF ^ - -DPARQUET_CXXFLAGS="/MP" ^ + -DPARQUET_CXXFLAGS=%PARQUET_CXXFLAGS% ^ -DPARQUET_ZLIB_VENDORED=OFF ^ .. || exit /B @@ -42,7 +47,7 @@ cmake -G "%GENERATOR%" ^ -DCMAKE_BUILD_TYPE=%CONFIGURATION% ^ -DPARQUET_BOOST_USE_SHARED=OFF ^ - -DPARQUET_CXXFLAGS="/MP" ^ + -DPARQUET_CXXFLAGS=%PARQUET_CXXFLAGS% ^ .. || exit /B cmake --build . --config %CONFIGURATION% || exit /B diff -Nru apache-parquet-cpp-1.1.1.20170524/ci/travis_script_toolchain.sh apache-parquet-cpp-1.1.1.20170612/ci/travis_script_toolchain.sh --- apache-parquet-cpp-1.1.1.20170524/ci/travis_script_toolchain.sh 2017-05-23 18:29:32.000000000 +0000 +++ apache-parquet-cpp-1.1.1.20170612/ci/travis_script_toolchain.sh 2017-06-11 18:28:45.000000000 +0000 @@ -54,7 +54,6 @@ cmake -DPARQUET_CXXFLAGS=-Werror \ -DPARQUET_TEST_MEMCHECK=ON \ -DPARQUET_ZLIB_VENDORED=off \ - -DPARQUET_ARROW=ON \ -DPARQUET_GENERATE_COVERAGE=1 \ $TRAVIS_BUILD_DIR diff -Nru apache-parquet-cpp-1.1.1.20170524/CMakeLists.txt apache-parquet-cpp-1.1.1.20170612/CMakeLists.txt --- apache-parquet-cpp-1.1.1.20170524/CMakeLists.txt 2017-05-23 18:29:32.000000000 +0000 +++ apache-parquet-cpp-1.1.1.20170612/CMakeLists.txt 2017-06-11 18:28:45.000000000 +0000 @@ -110,9 +110,6 @@ option(PARQUET_BUILD_EXECUTABLES "Build the libparquet executable CLI tools" ON) - option(PARQUET_ARROW - "Build the Arrow support" - ON) option(PARQUET_ZLIB_VENDORED "Build our own zlib (some libz.a aren't configured for static linking)" ON) @@ -229,8 +226,12 @@ set(NO_COLOR "") endif() - add_test(${BENCHMARK_NAME} - ${BUILD_SUPPORT_DIR}/run-test.sh ${CMAKE_BINARY_DIR} benchmark ${BENCHMARK_PATH} ${NO_COLOR}) + if(WIN32) + add_test(${BENCHMARK_NAME} ${BENCHMARK_PATH} ${NO_COLOR}) + else() + add_test(${BENCHMARK_NAME} + ${BUILD_SUPPORT_DIR}/run-test.sh ${CMAKE_BINARY_DIR} benchmark ${BENCHMARK_PATH} ${NO_COLOR}) + endif() set_tests_properties(${BENCHMARK_NAME} PROPERTIES LABELS "benchmark") if(ARGN) set_tests_properties(${BENCHMARK_NAME} PROPERTIES ${ARGN}) @@ -367,7 +368,7 @@ # Thrift requires these definitions for some types that we use add_definitions(-DHAVE_INTTYPES_H -DHAVE_NETDB_H) if (MSVC) - add_definitions(-DNOMINMAX) + add_definitions(-DNOMINMAX -D_CRT_SECURE_NO_WARNINGS) else() add_definitions(-DHAVE_NETINET_IN_H -fPIC) endif() @@ -609,6 +610,10 @@ src/parquet/exception.cc src/parquet/types.cc + src/parquet/arrow/reader.cc + src/parquet/arrow/schema.cc + src/parquet/arrow/writer.cc + src/parquet/column/levels.cc src/parquet/column/reader.cc src/parquet/column/writer.cc @@ -686,6 +691,7 @@ add_subdirectory(src/parquet) add_subdirectory(src/parquet/api) +add_subdirectory(src/parquet/arrow) add_subdirectory(src/parquet/column) add_subdirectory(src/parquet/file) add_subdirectory(src/parquet/util) @@ -696,11 +702,6 @@ add_subdirectory(examples) add_subdirectory(tools) -# Arrow adapter -if (PARQUET_ARROW) - add_subdirectory(src/parquet/arrow) -endif() - add_custom_target(clean-all COMMAND ${CMAKE_BUILD_TOOL} clean COMMAND ${CMAKE_COMMAND} -P cmake_modules/clean-all.cmake diff -Nru apache-parquet-cpp-1.1.1.20170524/cmake_modules/FindBrotli.cmake apache-parquet-cpp-1.1.1.20170612/cmake_modules/FindBrotli.cmake --- apache-parquet-cpp-1.1.1.20170524/cmake_modules/FindBrotli.cmake 2017-05-23 18:29:32.000000000 +0000 +++ apache-parquet-cpp-1.1.1.20170612/cmake_modules/FindBrotli.cmake 2017-06-11 18:28:45.000000000 +0000 @@ -72,6 +72,9 @@ ${BROTLI_LIBS}/${CMAKE_STATIC_LIBRARY_PREFIX}${BROTLI_LIB_NAME}enc${BROTLI_MSVC_STATIC_LIB_SUFFIX}${CMAKE_STATIC_LIBRARY_SUFFIX} ${BROTLI_LIBS}/${CMAKE_STATIC_LIBRARY_PREFIX}${BROTLI_LIB_NAME}dec${BROTLI_MSVC_STATIC_LIB_SUFFIX}${CMAKE_STATIC_LIBRARY_SUFFIX} ${BROTLI_LIBS}/${CMAKE_STATIC_LIBRARY_PREFIX}${BROTLI_LIB_NAME}common${BROTLI_MSVC_STATIC_LIB_SUFFIX}${CMAKE_STATIC_LIBRARY_SUFFIX}) + set(BROTLI_STATIC_LIBRARY_ENC ${BROTLI_LIBS}/${CMAKE_STATIC_LIBRARY_PREFIX}${BROTLI_LIB_NAME}enc${BROTLI_MSVC_STATIC_LIB_SUFFIX}${CMAKE_STATIC_LIBRARY_SUFFIX}) + set(BROTLI_STATIC_LIBRARY_DEC ${BROTLI_LIBS}/${CMAKE_STATIC_LIBRARY_PREFIX}${BROTLI_LIB_NAME}dec${BROTLI_MSVC_STATIC_LIB_SUFFIX}${CMAKE_STATIC_LIBRARY_SUFFIX}) + set(BROTLI_STATIC_LIBRARY_COMMON ${BROTLI_LIBS}/${CMAKE_STATIC_LIBRARY_PREFIX}${BROTLI_LIB_NAME}common${BROTLI_MSVC_STATIC_LIB_SUFFIX}${CMAKE_STATIC_LIBRARY_SUFFIX}) set(BROTLI_SHARED_LIB ${BROTLI_LIBS}/${CMAKE_SHARED_LIBRARY_PREFIX}${BROTLI_LIB_NAME}enc${CMAKE_SHARED_LIBRARY_SUFFIX} ${BROTLI_LIBS}/${CMAKE_SHARED_LIBRARY_PREFIX}${BROTLI_LIB_NAME}dec${CMAKE_SHARED_LIBRARY_SUFFIX} diff -Nru apache-parquet-cpp-1.1.1.20170524/cmake_modules/ThirdpartyToolchain.cmake apache-parquet-cpp-1.1.1.20170612/cmake_modules/ThirdpartyToolchain.cmake --- apache-parquet-cpp-1.1.1.20170524/cmake_modules/ThirdpartyToolchain.cmake 2017-05-23 18:29:32.000000000 +0000 +++ apache-parquet-cpp-1.1.1.20170612/cmake_modules/ThirdpartyToolchain.cmake 2017-06-11 18:28:45.000000000 +0000 @@ -133,7 +133,7 @@ if (NOT ZLIB_FOUND) set(ZLIB_PREFIX "${CMAKE_CURRENT_BINARY_DIR}/zlib_ep/src/zlib_ep-install") set(ZLIB_HOME "${ZLIB_PREFIX}") - set(ZLIB_INCLUDE_DIRS "${ZLIB_PREFIX}/include") + set(ZLIB_INCLUDE_DIR "${ZLIB_PREFIX}/include") if (MSVC) if (${UPPERCASE_BUILD_TYPE} STREQUAL "DEBUG") set(ZLIB_STATIC_LIB_NAME zlibstaticd.lib) @@ -161,7 +161,7 @@ set(ZLIB_VENDORED 0) endif() -include_directories(SYSTEM ${ZLIB_INCLUDE_DIRS}) +include_directories(SYSTEM ${ZLIB_INCLUDE_DIR}) add_library(zlibstatic STATIC IMPORTED) set_target_properties(zlibstatic PROPERTIES IMPORTED_LOCATION ${ZLIB_STATIC_LIB}) @@ -217,7 +217,7 @@ set(THRIFT_CMAKE_ARGS "-DFLEX_EXECUTABLE=${WINFLEXBISON_PREFIX}/win_flex.exe" "-DBISON_EXECUTABLE=${WINFLEXBISON_PREFIX}/win_bison.exe" - "-DZLIB_INCLUDE_DIR=${ZLIB_INCLUDE_DIRS}" + "-DZLIB_INCLUDE_DIR=${ZLIB_INCLUDE_DIR}" "-DZLIB_LIBRARY=${ZLIB_STATIC_LIB}" "-DWITH_SHARED_LIB=OFF" "-DWITH_PLUGIN=OFF" @@ -335,9 +335,9 @@ else() set(BROTLI_LIB_DIR lib) endif() - set(BROTLI_LIBRARY_ENC "${BROTLI_PREFIX}/${BROTLI_LIB_DIR}/${CMAKE_LIBRARY_ARCHITECTURE}/${CMAKE_STATIC_LIBRARY_PREFIX}brotlienc${CMAKE_STATIC_LIBRARY_SUFFIX}") - set(BROTLI_LIBRARY_DEC "${BROTLI_PREFIX}/${BROTLI_LIB_DIR}/${CMAKE_LIBRARY_ARCHITECTURE}/${CMAKE_STATIC_LIBRARY_PREFIX}brotlidec${CMAKE_STATIC_LIBRARY_SUFFIX}") - set(BROTLI_LIBRARY_COMMON "${BROTLI_PREFIX}/${BROTLI_LIB_DIR}/${CMAKE_LIBRARY_ARCHITECTURE}/${CMAKE_STATIC_LIBRARY_PREFIX}brotlicommon${CMAKE_STATIC_LIBRARY_SUFFIX}") + set(BROTLI_STATIC_LIBRARY_ENC "${BROTLI_PREFIX}/${BROTLI_LIB_DIR}/${CMAKE_LIBRARY_ARCHITECTURE}/${CMAKE_STATIC_LIBRARY_PREFIX}brotlienc${CMAKE_STATIC_LIBRARY_SUFFIX}") + set(BROTLI_STATIC_LIBRARY_DEC "${BROTLI_PREFIX}/${BROTLI_LIB_DIR}/${CMAKE_LIBRARY_ARCHITECTURE}/${CMAKE_STATIC_LIBRARY_PREFIX}brotlidec${CMAKE_STATIC_LIBRARY_SUFFIX}") + set(BROTLI_STATIC_LIBRARY_COMMON "${BROTLI_PREFIX}/${BROTLI_LIB_DIR}/${CMAKE_LIBRARY_ARCHITECTURE}/${CMAKE_STATIC_LIBRARY_PREFIX}brotlicommon${CMAKE_STATIC_LIBRARY_SUFFIX}") set(BROTLI_CMAKE_ARGS -DCMAKE_BUILD_TYPE=${CMAKE_BUILD_TYPE} "-DCMAKE_CXX_FLAGS=${EP_CXX_FLAGS}" "-DCMAKE_C_FLAGS=${EX_C_FLAGS}" @@ -346,7 +346,7 @@ -DBUILD_SHARED_LIBS=OFF) if (CMAKE_VERSION VERSION_GREATER "3.2") - set(BROTLI_BUILD_BYPRODUCTS BUILD_BYPRODUCTS "${BROTLI_LIBRARY_ENC}" "${BROTLI_LIBRARY_DEC}" "${BROTLI_LIBRARY_COMMON}") + set(BROTLI_BUILD_BYPRODUCTS BUILD_BYPRODUCTS "${BROTLI_STATIC_LIBRARY_ENC}" "${BROTLI_STATIC_LIBRARY_DEC}" "${BROTLI_STATIC_LIBRARY_COMMON}") endif() ExternalProject_Add(brotli_ep @@ -369,11 +369,11 @@ include_directories(SYSTEM ${BROTLI_INCLUDE_DIR}) add_library(brotlistatic_enc STATIC IMPORTED) -set_target_properties(brotlistatic_enc PROPERTIES IMPORTED_LOCATION ${BROTLI_LIBRARY_ENC}) +set_target_properties(brotlistatic_enc PROPERTIES IMPORTED_LOCATION ${BROTLI_STATIC_LIBRARY_ENC}) add_library(brotlistatic_dec STATIC IMPORTED) -set_target_properties(brotlistatic_dec PROPERTIES IMPORTED_LOCATION ${BROTLI_LIBRARY_DEC}) +set_target_properties(brotlistatic_dec PROPERTIES IMPORTED_LOCATION ${BROTLI_STATIC_LIBRARY_DEC}) add_library(brotlistatic_common STATIC IMPORTED) -set_target_properties(brotlistatic_common PROPERTIES IMPORTED_LOCATION ${BROTLI_LIBRARY_COMMON}) +set_target_properties(brotlistatic_common PROPERTIES IMPORTED_LOCATION ${BROTLI_STATIC_LIBRARY_COMMON}) if (BROTLI_VENDORED) add_dependencies(brotlistatic_enc brotli_ep) @@ -448,10 +448,14 @@ set(GBENCHMARK_INCLUDE_DIR "${GBENCHMARK_PREFIX}/include") set(GBENCHMARK_STATIC_LIB "${GBENCHMARK_PREFIX}/lib/${CMAKE_STATIC_LIBRARY_PREFIX}benchmark${CMAKE_STATIC_LIBRARY_SUFFIX}") set(GBENCHMARK_CMAKE_ARGS - "-DCMAKE_BUILD_TYPE=Release" "-DCMAKE_INSTALL_PREFIX:PATH=${GBENCHMARK_PREFIX}" "-DBENCHMARK_ENABLE_TESTING=OFF" "-DCMAKE_CXX_FLAGS=${EP_CXX_FLAGS}") + if (MSVC) + set(GBENCHMARK_CMAKE_ARGS ${GBENCHMARK_CMAKE_ARGS} "-DCMAKE_BUILD_TYPE=${CMAKE_BUILD_TYPE}") + else() + set(GBENCHMARK_CMAKE_ARGS ${GBENCHMARK_CMAKE_ARGS} "-DCMAKE_BUILD_TYPE=Release") + endif() if (APPLE) set(GBENCHMARK_CMAKE_ARGS ${GBENCHMARK_CMAKE_ARGS} "-DBENCHMARK_USE_LIBCXX=ON") endif() @@ -515,7 +519,7 @@ endif() if ("$ENV{PARQUET_ARROW_VERSION}" STREQUAL "") - set(ARROW_VERSION "d8db8f8a11abbbb6c45645b2d7370610311731bd") + set(ARROW_VERSION "a8f8ba0cbcf5f596f042e90b7a208e7a0c3925b7") else() set(ARROW_VERSION "$ENV{PARQUET_ARROW_VERSION}") endif() diff -Nru apache-parquet-cpp-1.1.1.20170524/debian/changelog apache-parquet-cpp-1.1.1.20170612/debian/changelog --- apache-parquet-cpp-1.1.1.20170524/debian/changelog 2017-05-25 07:00:52.000000000 +0000 +++ apache-parquet-cpp-1.1.1.20170612/debian/changelog 2017-06-12 02:50:35.000000000 +0000 @@ -1,8 +1,14 @@ -apache-parquet-cpp (1.1.1.20170524-2~xenial1) xenial; urgency=medium +apache-parquet-cpp (1.1.1.20170612-2~xenial1) xenial; urgency=medium * Build for xenial. - -- Kouhei Sutou Thu, 25 May 2017 16:00:52 +0900 + -- Kouhei Sutou Mon, 12 Jun 2017 11:50:35 +0900 + +apache-parquet-cpp (1.1.1.20170612-1) unstable; urgency=low + + * New upstream release. + + -- Kouhei Sutou Mon, 12 Jun 2017 00:00:00 +0900 apache-parquet-cpp (1.1.1.20170524-1) unstable; urgency=low diff -Nru apache-parquet-cpp-1.1.1.20170524/debian/control apache-parquet-cpp-1.1.1.20170612/debian/control --- apache-parquet-cpp-1.1.1.20170524/debian/control 2017-05-25 07:00:51.000000000 +0000 +++ apache-parquet-cpp-1.1.1.20170612/debian/control 2017-06-12 02:50:34.000000000 +0000 @@ -26,24 +26,11 @@ Pre-Depends: ${misc:Pre-Depends} Depends: ${misc:Depends}, - ${shlibs:Depends} -Description: Apache Parquet is a columner storage format - . - This package provides library files to process Apache Parquet format. - -Package: libparquet-arrow1 -Section: libs -Architecture: any -Multi-Arch: same -Pre-Depends: ${misc:Pre-Depends} -Depends: - ${misc:Depends}, ${shlibs:Depends}, - libparquet1 (= ${binary:Version}), libarrow0 Description: Apache Parquet is a columner storage format . - This package provides library files to integrate Apache Arrrow. + This package provides library files to process Apache Parquet format. Package: libparquet-dev Section: libdevel @@ -52,8 +39,7 @@ Depends: ${misc:Depends}, zlib1g-dev, - libparquet1 (= ${binary:Version}), - libparquet-arrow1 (= ${binary:Version}) + libparquet1 (= ${binary:Version}) Description: Apache Parquet is a columner storage format . This package provides header files. diff -Nru apache-parquet-cpp-1.1.1.20170524/debian/libparquet-arrow1.install apache-parquet-cpp-1.1.1.20170612/debian/libparquet-arrow1.install --- apache-parquet-cpp-1.1.1.20170524/debian/libparquet-arrow1.install 2017-05-25 07:00:51.000000000 +0000 +++ apache-parquet-cpp-1.1.1.20170612/debian/libparquet-arrow1.install 1970-01-01 00:00:00.000000000 +0000 @@ -1 +0,0 @@ -usr/lib/*/libparquet_arrow.so.* diff -Nru apache-parquet-cpp-1.1.1.20170524/dev/release/verify-release-candidate apache-parquet-cpp-1.1.1.20170612/dev/release/verify-release-candidate --- apache-parquet-cpp-1.1.1.20170524/dev/release/verify-release-candidate 2017-05-23 18:29:32.000000000 +0000 +++ apache-parquet-cpp-1.1.1.20170612/dev/release/verify-release-candidate 2017-06-11 18:28:45.000000000 +0000 @@ -50,7 +50,7 @@ # Build mkdir -p build cd build - cmake -DPARQUET_ARROW=ON -DPARQUET_ARROW_LINKAGE=static .. + cmake -DPARQUET_ARROW_LINKAGE=static .. make -j5 # Test diff -Nru apache-parquet-cpp-1.1.1.20170524/docs/Windows.md apache-parquet-cpp-1.1.1.20170612/docs/Windows.md --- apache-parquet-cpp-1.1.1.20170524/docs/Windows.md 2017-05-23 18:29:32.000000000 +0000 +++ apache-parquet-cpp-1.1.1.20170612/docs/Windows.md 2017-06-11 18:28:45.000000000 +0000 @@ -54,6 +54,32 @@ `BROTLI_HOME` variable with path to `brotli` installation `ARROW_HOME` variable with path to `arrow` installation +### Customize static libraries names lookup of 3rd party dependencies + +If you decided to use pre-built 3rd party dependencies libs, it's possible to +configure parquet-cpp cmake build script to search for customized names of 3rd +party static libs. + +`zlib`. Pass `-DPARQUET_ZLIB_VENDORED=OFF` to enable lookup of custom zlib +build. Set `ZLIB_HOME` environment variable. Pass +`-DZLIB_MSVC_STATIC_LIB_SUFFIX=%ZLIB_SUFFIX%` to link with z%ZLIB_SUFFIX%.lib + +`arrow`. Set `ARROW_HOME` environment variable. Pass +`-DARROW_MSVC_STATIC_LIB_SUFFIX=%ARROW_SUFFIX%` to link with +arrow%ARROW_SUFFIX%.lib + +`brotli`. Set `BROTLY_HOME` environment variable. Pass +`-DBROTLI_MSVC_STATIC_LIB_SUFFIX=%BROTLI_SUFFIX%` to link with +brotli*%BROTLI_SUFFIX%.lib. + +`snappy`. Set `SNAPPY_HOME` environment variable. Pass +`-DSNAPPY_MSVC_STATIC_LIB_SUFFIX=%SNAPPY_SUFFIX%` to link with +snappy%SNAPPY_SUFFIX%.lib. + +`thrift`. Set `THRIFT_HOME` environment variable. Pass +`-DTHRIFT_MSVC_STATIC_LIB_SUFFIX=%THRIFT_SUFFIX%` to link with +thrift*%THRIFT_SUFFIX%.lib. + ### Visual Studio Microsoft provides the free Visual Studio Community edition. Once you have diff -Nru apache-parquet-cpp-1.1.1.20170524/examples/reader-writer.cc apache-parquet-cpp-1.1.1.20170612/examples/reader-writer.cc --- apache-parquet-cpp-1.1.1.20170524/examples/reader-writer.cc 2017-05-23 18:29:32.000000000 +0000 +++ apache-parquet-cpp-1.1.1.20170612/examples/reader-writer.cc 2017-06-11 18:28:45.000000000 +0000 @@ -30,7 +30,7 @@ * This example describes writing and reading Parquet Files in C++ and serves as a * reference to the API. * The file contains all the physical data types supported by Parquet. -**/ + **/ /* Parquet is a structured columnar file format * Parquet File = "Parquet data" + "Parquet Metadata" @@ -42,7 +42,7 @@ * complex (nested) type (internal nodes) * For specific details, please refer the format here: * https://github.com/apache/parquet-format/blob/master/LogicalTypes.md -**/ + **/ constexpr int NUM_ROWS_PER_ROW_GROUP = 500; constexpr int FIXED_LENGTH = 10; @@ -168,7 +168,7 @@ parquet::FloatWriter* float_writer = static_cast(rg_writer->NextColumn()); for (int i = 0; i < NUM_ROWS_PER_ROW_GROUP; i++) { - float value = i * 1.1; + float value = i * 1.1f; float_writer->WriteBatch(1, nullptr, nullptr, &value); } @@ -367,7 +367,7 @@ // There are no NULL values in the rows written assert(values_read == 1); // Verify the value written - float expected_value = i * 1.1; + float expected_value = i * 1.1f; assert(value == expected_value); i++; } diff -Nru apache-parquet-cpp-1.1.1.20170524/src/parquet/arrow/arrow-reader-writer-benchmark.cc apache-parquet-cpp-1.1.1.20170612/src/parquet/arrow/arrow-reader-writer-benchmark.cc --- apache-parquet-cpp-1.1.1.20170524/src/parquet/arrow/arrow-reader-writer-benchmark.cc 2017-05-23 18:29:32.000000000 +0000 +++ apache-parquet-cpp-1.1.1.20170612/src/parquet/arrow/arrow-reader-writer-benchmark.cc 2017-06-11 18:28:45.000000000 +0000 @@ -114,14 +114,14 @@ SetBytesProcessed(state); } -BENCHMARK_TEMPLATE(BM_WriteColumn, false, Int32Type); -BENCHMARK_TEMPLATE(BM_WriteColumn, true, Int32Type); +BENCHMARK_TEMPLATE2(BM_WriteColumn, false, Int32Type); +BENCHMARK_TEMPLATE2(BM_WriteColumn, true, Int32Type); -BENCHMARK_TEMPLATE(BM_WriteColumn, false, Int64Type); -BENCHMARK_TEMPLATE(BM_WriteColumn, true, Int64Type); +BENCHMARK_TEMPLATE2(BM_WriteColumn, false, Int64Type); +BENCHMARK_TEMPLATE2(BM_WriteColumn, true, Int64Type); -BENCHMARK_TEMPLATE(BM_WriteColumn, false, DoubleType); -BENCHMARK_TEMPLATE(BM_WriteColumn, true, DoubleType); +BENCHMARK_TEMPLATE2(BM_WriteColumn, false, DoubleType); +BENCHMARK_TEMPLATE2(BM_WriteColumn, true, DoubleType); template static void BM_ReadColumn(::benchmark::State& state) { @@ -141,14 +141,14 @@ SetBytesProcessed(state); } -BENCHMARK_TEMPLATE(BM_ReadColumn, false, Int32Type); -BENCHMARK_TEMPLATE(BM_ReadColumn, true, Int32Type); +BENCHMARK_TEMPLATE2(BM_ReadColumn, false, Int32Type); +BENCHMARK_TEMPLATE2(BM_ReadColumn, true, Int32Type); -BENCHMARK_TEMPLATE(BM_ReadColumn, false, Int64Type); -BENCHMARK_TEMPLATE(BM_ReadColumn, true, Int64Type); +BENCHMARK_TEMPLATE2(BM_ReadColumn, false, Int64Type); +BENCHMARK_TEMPLATE2(BM_ReadColumn, true, Int64Type); -BENCHMARK_TEMPLATE(BM_ReadColumn, false, DoubleType); -BENCHMARK_TEMPLATE(BM_ReadColumn, true, DoubleType); +BENCHMARK_TEMPLATE2(BM_ReadColumn, false, DoubleType); +BENCHMARK_TEMPLATE2(BM_ReadColumn, true, DoubleType); } // namespace benchmark diff -Nru apache-parquet-cpp-1.1.1.20170524/src/parquet/arrow/arrow-reader-writer-test.cc apache-parquet-cpp-1.1.1.20170612/src/parquet/arrow/arrow-reader-writer-test.cc --- apache-parquet-cpp-1.1.1.20170524/src/parquet/arrow/arrow-reader-writer-test.cc 2017-05-23 18:29:32.000000000 +0000 +++ apache-parquet-cpp-1.1.1.20170612/src/parquet/arrow/arrow-reader-writer-test.cc 2017-06-11 18:28:45.000000000 +0000 @@ -15,6 +15,12 @@ // specific language governing permissions and limitations // under the License. +#ifdef _MSC_VER +#pragma warning(push) +// Disable forcing value to bool warnings +#pragma warning(disable : 4800) +#endif + #include "gtest/gtest.h" #include @@ -1172,13 +1178,11 @@ std::unique_ptr arrow_reader; ASSERT_NO_THROW( - arrow_reader.reset(new FileReader(pool, - ParquetFileReader::OpenFile(path, false)))); + arrow_reader.reset(new FileReader(pool, ParquetFileReader::OpenFile(path, false)))); std::shared_ptr<::arrow::Table> table; ASSERT_OK_NO_THROW(arrow_reader->ReadTable(&table)); } - } // namespace arrow } // namespace parquet diff -Nru apache-parquet-cpp-1.1.1.20170524/src/parquet/arrow/CMakeLists.txt apache-parquet-cpp-1.1.1.20170612/src/parquet/arrow/CMakeLists.txt --- apache-parquet-cpp-1.1.1.20170524/src/parquet/arrow/CMakeLists.txt 2017-05-23 18:29:32.000000000 +0000 +++ apache-parquet-cpp-1.1.1.20170612/src/parquet/arrow/CMakeLists.txt 2017-06-11 18:28:45.000000000 +0000 @@ -15,51 +15,11 @@ # specific language governing permissions and limitations # under the License. -# ---------------------------------------------------------------------- -# parquet_arrow : Arrow <-> Parquet adapter - -set(PARQUET_ARROW_SRCS - reader.cc - schema.cc - writer.cc -) - -# Set dependencies so ExternalProjects are built beforehand -set(PARQUET_ARROW_DEPENDENCIES ${ARROW_LINK_LIBS} parquet_static) -SET(PARQUET_ARROW_SHARED_LINK_LIBS ${ARROW_LINK_LIBS} parquet_shared) -SET(PARQUET_ARROW_STATIC_LINK_LIBS ${ARROW_LINK_LIBS} parquet_static) - -ADD_LIB(parquet_arrow - SOURCES ${PARQUET_ARROW_SRCS} - LIB_BUILD_SHARED ${PARQUET_BUILD_SHARED} - LIB_BUILD_STATIC ${PARQUET_BUILD_STATIC} - DEPENDENCIES ${PARQUET_ARROW_DEPENDENCIES} - SHARED_LINK_FLAGS ${SHARED_LINK_FLAGS} - SHARED_LINK_LIBS ${PARQUET_ARROW_SHARED_LINK_LIBS} - STATIC_LINK_LIBS ${PARQUET_ARROW_STATIC_LINK_LIBS} - ABI_VERSION ${PARQUET_ABI_VERSION} - SO_VERSION ${PARQUET_SO_VERSION} - LIB_RPATH_ORIGIN ${PARQUET_RPATH_ORIGIN} -) - ADD_PARQUET_TEST(arrow-schema-test) ADD_PARQUET_TEST(arrow-reader-writer-test) -if (PARQUET_BUILD_STATIC) - ADD_PARQUET_LINK_LIBRARIES(arrow-schema-test parquet_arrow_static) - ADD_PARQUET_LINK_LIBRARIES(arrow-reader-writer-test parquet_arrow_static) -else() - ADD_PARQUET_LINK_LIBRARIES(arrow-schema-test parquet_arrow_shared) - ADD_PARQUET_LINK_LIBRARIES(arrow-reader-writer-test parquet_arrow_shared) -endif() - if(PARQUET_BUILD_BENCHMARKS) ADD_PARQUET_BENCHMARK(arrow-reader-writer-benchmark) - if (PARQUET_BUILD_STATIC) - ADD_PARQUET_LINK_LIBRARIES(arrow-reader-writer-benchmark parquet_arrow_static) - else() - ADD_PARQUET_LINK_LIBRARIES(arrow-reader-writer-benchmark parquet_arrow_shared) - endif() endif() # Headers: top level @@ -68,11 +28,3 @@ schema.h writer.h DESTINATION "${CMAKE_INSTALL_INCLUDEDIR}/parquet/arrow") - -# pkg-config support -configure_file(parquet-arrow.pc.in - "${CMAKE_CURRENT_BINARY_DIR}/parquet-arrow.pc" - @ONLY) -install( - FILES "${CMAKE_CURRENT_BINARY_DIR}/parquet-arrow.pc" - DESTINATION "${CMAKE_INSTALL_LIBDIR}/pkgconfig/") diff -Nru apache-parquet-cpp-1.1.1.20170524/src/parquet/arrow/parquet-arrow.pc.in apache-parquet-cpp-1.1.1.20170612/src/parquet/arrow/parquet-arrow.pc.in --- apache-parquet-cpp-1.1.1.20170524/src/parquet/arrow/parquet-arrow.pc.in 2017-05-23 18:29:32.000000000 +0000 +++ apache-parquet-cpp-1.1.1.20170612/src/parquet/arrow/parquet-arrow.pc.in 1970-01-01 00:00:00.000000000 +0000 @@ -1,30 +0,0 @@ -# Licensed to the Apache Software Foundation (ASF) under one -# or more contributor license agreements. See the NOTICE file -# distributed with this work for additional information -# regarding copyright ownership. The ASF licenses this file -# to you under the Apache License, Version 2.0 (the -# "License"); you may not use this file except in compliance -# with the License. You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, -# software distributed under the License is distributed on an -# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY -# KIND, either express or implied. See the License for the -# specific language governing permissions and limitations -# under the License. - -prefix=@CMAKE_INSTALL_PREFIX@ -libdir=${prefix}/@CMAKE_INSTALL_LIBDIR@ -includedir=${prefix}/@CMAKE_INSTALL_INCLUDEDIR@ - -so_version=@PARQUET_SO_VERSION@ -abi_version=@PARQUET_ABI_VERSION@ - -Name: Apache Parquet Apache Arrow adapter -Description: Apache Parquet Apache arrow adapter provides Arrow IPC modules for reading and writing Parquet format. -Version: @PARQUET_VERSION@ -Libs: -L${libdir} -lparquet_arrow -Cflags: -I${includedir} -Requires: parquet arrow diff -Nru apache-parquet-cpp-1.1.1.20170524/src/parquet/arrow/reader.cc apache-parquet-cpp-1.1.1.20170612/src/parquet/arrow/reader.cc --- apache-parquet-cpp-1.1.1.20170524/src/parquet/arrow/reader.cc 2017-05-23 18:29:32.000000000 +0000 +++ apache-parquet-cpp-1.1.1.20170612/src/parquet/arrow/reader.cc 2017-06-11 18:28:45.000000000 +0000 @@ -295,7 +295,7 @@ batch_size += reader_->metadata()->RowGroup(j)->ColumnChunk(i)->num_values(); } - return flat_column_reader->NextBatch(batch_size, out); + return flat_column_reader->NextBatch(static_cast(batch_size), out); } Status FileReader::Impl::GetSchema( @@ -319,7 +319,7 @@ // TODO(wesm): Refactor to share more code with ReadTable auto ReadColumnFunc = [&indices, &row_group_index, &schema, &columns, &rg_metadata, - this](int i) { + this](int i) { int column_index = indices[i]; int64_t batch_size = rg_metadata->ColumnChunk(column_index)->num_values(); @@ -331,7 +331,7 @@ ColumnReader flat_column_reader(std::move(impl)); std::shared_ptr array; - RETURN_NOT_OK(flat_column_reader.NextBatch(batch_size, &array)); + RETURN_NOT_OK(flat_column_reader.NextBatch(static_cast(batch_size), &array)); columns[i] = std::make_shared(schema->field(i), array); return Status::OK(); }; @@ -380,7 +380,7 @@ std::vector indices(reader_->metadata()->num_columns()); for (size_t i = 0; i < indices.size(); ++i) { - indices[i] = i; + indices[i] = static_cast(i); } return ReadTable(indices, table); } @@ -389,7 +389,7 @@ std::vector indices(reader_->metadata()->num_columns()); for (size_t i = 0; i < indices.size(); ++i) { - indices[i] = i; + indices[i] = static_cast(i); } return ReadRowGroup(i, indices, table); } @@ -479,8 +479,8 @@ RETURN_NOT_OK(values_buffer_.Resize(values_to_read * sizeof(ParquetCType), false)); auto values = reinterpret_cast(values_buffer_.mutable_data()); int64_t values_read; - PARQUET_CATCH_NOT_OK(*levels_read = reader->ReadBatch( - values_to_read, nullptr, nullptr, values, &values_read)); + PARQUET_CATCH_NOT_OK(*levels_read = reader->ReadBatch(static_cast(values_to_read), + nullptr, nullptr, values, &values_read)); ArrowCType* out_ptr = reinterpret_cast(data_buffer_ptr_); std::copy(values, values + values_read, out_ptr + valid_bits_idx_); @@ -489,19 +489,20 @@ return Status::OK(); } -#define NONNULLABLE_BATCH_FAST_PATH(ArrowType, ParquetType, CType) \ - template <> \ - Status ColumnReader::Impl::ReadNonNullableBatch( \ - TypedColumnReader * reader, int64_t values_to_read, \ - int64_t * levels_read) { \ - int64_t values_read; \ - CType* out_ptr = reinterpret_cast(data_buffer_ptr_); \ - PARQUET_CATCH_NOT_OK(*levels_read = reader->ReadBatch(values_to_read, nullptr, \ - nullptr, out_ptr + valid_bits_idx_, &values_read)); \ - \ - valid_bits_idx_ += values_read; \ - \ - return Status::OK(); \ +#define NONNULLABLE_BATCH_FAST_PATH(ArrowType, ParquetType, CType) \ + template <> \ + Status ColumnReader::Impl::ReadNonNullableBatch( \ + TypedColumnReader * reader, int64_t values_to_read, \ + int64_t * levels_read) { \ + int64_t values_read; \ + CType* out_ptr = reinterpret_cast(data_buffer_ptr_); \ + PARQUET_CATCH_NOT_OK(*levels_read = reader->ReadBatch( \ + static_cast(values_to_read), nullptr, nullptr, \ + out_ptr + valid_bits_idx_, &values_read)); \ + \ + valid_bits_idx_ += values_read; \ + \ + return Status::OK(); \ } NONNULLABLE_BATCH_FAST_PATH(::arrow::Int32Type, Int32Type, int32_t) @@ -519,8 +520,8 @@ RETURN_NOT_OK(values_buffer_.Resize(values_to_read * sizeof(Int96), false)); auto values = reinterpret_cast(values_buffer_.mutable_data()); int64_t values_read; - PARQUET_CATCH_NOT_OK(*levels_read = reader->ReadBatch( - values_to_read, nullptr, nullptr, values, &values_read)); + PARQUET_CATCH_NOT_OK(*levels_read = reader->ReadBatch(static_cast(values_to_read), + nullptr, nullptr, values, &values_read)); int64_t* out_ptr = reinterpret_cast(data_buffer_ptr_) + valid_bits_idx_; for (int64_t i = 0; i < values_read; i++) { @@ -537,8 +538,8 @@ RETURN_NOT_OK(values_buffer_.Resize(values_to_read * sizeof(int32_t), false)); auto values = reinterpret_cast(values_buffer_.mutable_data()); int64_t values_read; - PARQUET_CATCH_NOT_OK(*levels_read = reader->ReadBatch( - values_to_read, nullptr, nullptr, values, &values_read)); + PARQUET_CATCH_NOT_OK(*levels_read = reader->ReadBatch(static_cast(values_to_read), + nullptr, nullptr, values, &values_read)); int64_t* out_ptr = reinterpret_cast(data_buffer_ptr_) + valid_bits_idx_; for (int64_t i = 0; i < values_read; i++) { @@ -556,8 +557,8 @@ RETURN_NOT_OK(values_buffer_.Resize(values_to_read * sizeof(bool), false)); auto values = reinterpret_cast(values_buffer_.mutable_data()); int64_t values_read; - PARQUET_CATCH_NOT_OK(*levels_read = reader->ReadBatch( - values_to_read, nullptr, nullptr, values, &values_read)); + PARQUET_CATCH_NOT_OK(*levels_read = reader->ReadBatch(static_cast(values_to_read), + nullptr, nullptr, values, &values_read)); for (int64_t i = 0; i < values_read; i++) { if (values[i]) { ::arrow::BitUtil::SetBit(data_buffer_ptr_, valid_bits_idx_); } @@ -577,11 +578,12 @@ RETURN_NOT_OK(values_buffer_.Resize(values_to_read * sizeof(ParquetCType), false)); auto values = reinterpret_cast(values_buffer_.mutable_data()); int64_t null_count; - PARQUET_CATCH_NOT_OK(reader->ReadBatchSpaced(values_to_read, def_levels, rep_levels, - values, valid_bits_ptr_, valid_bits_idx_, levels_read, values_read, &null_count)); + PARQUET_CATCH_NOT_OK(reader->ReadBatchSpaced(static_cast(values_to_read), + def_levels, rep_levels, values, valid_bits_ptr_, valid_bits_idx_, levels_read, + values_read, &null_count)); auto data_ptr = reinterpret_cast(data_buffer_ptr_); - INIT_BITSET(valid_bits_ptr_, valid_bits_idx_); + INIT_BITSET(valid_bits_ptr_, static_cast(valid_bits_idx_)); for (int64_t i = 0; i < *values_read; i++) { if (bitset_valid_bits_ptr_ & (1 << bit_offset_valid_bits_ptr_)) { @@ -595,22 +597,22 @@ return Status::OK(); } -#define NULLABLE_BATCH_FAST_PATH(ArrowType, ParquetType, CType) \ - template <> \ - Status ColumnReader::Impl::ReadNullableBatch( \ - TypedColumnReader * reader, int16_t * def_levels, \ - int16_t * rep_levels, int64_t values_to_read, int64_t * levels_read, \ - int64_t * values_read) { \ - auto data_ptr = reinterpret_cast(data_buffer_ptr_); \ - int64_t null_count; \ - PARQUET_CATCH_NOT_OK(reader->ReadBatchSpaced(values_to_read, def_levels, rep_levels, \ - data_ptr + valid_bits_idx_, valid_bits_ptr_, valid_bits_idx_, levels_read, \ - values_read, &null_count)); \ - \ - valid_bits_idx_ += *values_read; \ - null_count_ += null_count; \ - \ - return Status::OK(); \ +#define NULLABLE_BATCH_FAST_PATH(ArrowType, ParquetType, CType) \ + template <> \ + Status ColumnReader::Impl::ReadNullableBatch( \ + TypedColumnReader * reader, int16_t * def_levels, \ + int16_t * rep_levels, int64_t values_to_read, int64_t * levels_read, \ + int64_t * values_read) { \ + auto data_ptr = reinterpret_cast(data_buffer_ptr_); \ + int64_t null_count; \ + PARQUET_CATCH_NOT_OK(reader->ReadBatchSpaced(static_cast(values_to_read), \ + def_levels, rep_levels, data_ptr + valid_bits_idx_, valid_bits_ptr_, \ + valid_bits_idx_, levels_read, values_read, &null_count)); \ + \ + valid_bits_idx_ += *values_read; \ + null_count_ += null_count; \ + \ + return Status::OK(); \ } NULLABLE_BATCH_FAST_PATH(::arrow::Int32Type, Int32Type, int32_t) @@ -629,11 +631,12 @@ RETURN_NOT_OK(values_buffer_.Resize(values_to_read * sizeof(Int96), false)); auto values = reinterpret_cast(values_buffer_.mutable_data()); int64_t null_count; - PARQUET_CATCH_NOT_OK(reader->ReadBatchSpaced(values_to_read, def_levels, rep_levels, - values, valid_bits_ptr_, valid_bits_idx_, levels_read, values_read, &null_count)); + PARQUET_CATCH_NOT_OK(reader->ReadBatchSpaced(static_cast(values_to_read), + def_levels, rep_levels, values, valid_bits_ptr_, valid_bits_idx_, levels_read, + values_read, &null_count)); auto data_ptr = reinterpret_cast(data_buffer_ptr_); - INIT_BITSET(valid_bits_ptr_, valid_bits_idx_); + INIT_BITSET(valid_bits_ptr_, static_cast(valid_bits_idx_)); for (int64_t i = 0; i < *values_read; i++) { if (bitset_valid_bits_ptr_ & (1 << bit_offset_valid_bits_ptr_)) { data_ptr[valid_bits_idx_ + i] = impala_timestamp_to_nanoseconds(values[i]); @@ -653,11 +656,12 @@ RETURN_NOT_OK(values_buffer_.Resize(values_to_read * sizeof(int32_t), false)); auto values = reinterpret_cast(values_buffer_.mutable_data()); int64_t null_count; - PARQUET_CATCH_NOT_OK(reader->ReadBatchSpaced(values_to_read, def_levels, rep_levels, - values, valid_bits_ptr_, valid_bits_idx_, levels_read, values_read, &null_count)); + PARQUET_CATCH_NOT_OK(reader->ReadBatchSpaced(static_cast(values_to_read), + def_levels, rep_levels, values, valid_bits_ptr_, valid_bits_idx_, levels_read, + values_read, &null_count)); auto data_ptr = reinterpret_cast(data_buffer_ptr_); - INIT_BITSET(valid_bits_ptr_, valid_bits_idx_); + INIT_BITSET(valid_bits_ptr_, static_cast(valid_bits_idx_)); for (int64_t i = 0; i < *values_read; i++) { if (bitset_valid_bits_ptr_ & (1 << bit_offset_valid_bits_ptr_)) { data_ptr[valid_bits_idx_ + i] = static_cast(values[i]) * 86400000; @@ -677,10 +681,11 @@ RETURN_NOT_OK(values_buffer_.Resize(values_to_read * sizeof(bool), false)); auto values = reinterpret_cast(values_buffer_.mutable_data()); int64_t null_count; - PARQUET_CATCH_NOT_OK(reader->ReadBatchSpaced(values_to_read, def_levels, rep_levels, - values, valid_bits_ptr_, valid_bits_idx_, levels_read, values_read, &null_count)); + PARQUET_CATCH_NOT_OK(reader->ReadBatchSpaced(static_cast(values_to_read), + def_levels, rep_levels, values, valid_bits_ptr_, valid_bits_idx_, levels_read, + values_read, &null_count)); - INIT_BITSET(valid_bits_ptr_, valid_bits_idx_); + INIT_BITSET(valid_bits_ptr_, static_cast(valid_bits_idx_)); for (int64_t i = 0; i < *values_read; i++) { if (bitset_valid_bits_ptr_ & (1 << bit_offset_valid_bits_ptr_)) { if (values[i]) { ::arrow::BitUtil::SetBit(data_buffer_ptr_, valid_bits_idx_ + i); } @@ -716,7 +721,8 @@ Status ColumnReader::Impl::InitValidBits(int batch_size) { valid_bits_idx_ = 0; if (descr_->max_definition_level() > 0) { - int valid_bits_size = ::arrow::BitUtil::CeilByte(batch_size + 1) / 8; + int valid_bits_size = + static_cast(::arrow::BitUtil::CeilByte(batch_size + 1)) / 8; valid_bits_buffer_ = std::make_shared(pool_); RETURN_NOT_OK(valid_bits_buffer_->Resize(valid_bits_size, false)); valid_bits_ptr_ = valid_bits_buffer_->mutable_data(); @@ -786,7 +792,8 @@ if (j == (list_depth - 1)) { RETURN_NOT_OK(offset_builders[j]->Append(values_offset)); } else { - RETURN_NOT_OK(offset_builders[j]->Append(offset_builders[j + 1]->length())); + RETURN_NOT_OK(offset_builders[j]->Append( + static_cast(offset_builders[j + 1]->length()))); } if (((empty_def_level[j] - 1) == def_levels[i]) && (nullable[j])) { @@ -806,7 +813,8 @@ if (j == (list_depth - 1)) { RETURN_NOT_OK(offset_builders[j]->Append(values_offset)); } else { - RETURN_NOT_OK(offset_builders[j]->Append(offset_builders[j + 1]->length())); + RETURN_NOT_OK(offset_builders[j]->Append( + static_cast(offset_builders[j + 1]->length()))); } } @@ -864,9 +872,9 @@ RETURN_NOT_OK((ReadNullableBatch(reader, def_levels + total_levels_read, rep_levels + total_levels_read, values_to_read, &levels_read, &values_read))); - total_levels_read += levels_read; + total_levels_read += static_cast(levels_read); } - values_to_read -= values_read; + values_to_read -= static_cast(values_read); if (!column_reader_->HasNext()) { NextRowGroup(); } } @@ -925,9 +933,9 @@ RETURN_NOT_OK((ReadNullableBatch<::arrow::BooleanType, BooleanType>(reader, def_levels + total_levels_read, rep_levels + total_levels_read, values_to_read, &levels_read, &values_read))); - total_levels_read += levels_read; + total_levels_read += static_cast(levels_read); } - values_to_read -= values_read; + values_to_read -= static_cast(values_read); if (!column_reader_->HasNext()) { NextRowGroup(); } } @@ -991,7 +999,7 @@ PARQUET_CATCH_NOT_OK( levels_read = reader->ReadBatch(values_to_read, def_levels + total_levels_read, rep_levels + total_levels_read, values, &values_read)); - values_to_read -= levels_read; + values_to_read -= static_cast(levels_read); if (descr_->max_definition_level() == 0) { for (int64_t i = 0; i < levels_read; i++) { RETURN_NOT_OK( @@ -1012,7 +1020,7 @@ values_idx++; } } - total_levels_read += levels_read; + total_levels_read += static_cast(levels_read); } if (!column_reader_->HasNext()) { NextRowGroup(); } } @@ -1047,7 +1055,7 @@ PARQUET_CATCH_NOT_OK( levels_read = reader->ReadBatch(values_to_read, def_levels + total_levels_read, rep_levels + total_levels_read, values, &values_read)); - values_to_read -= levels_read; + values_to_read -= static_cast(levels_read); if (descr_->max_definition_level() == 0) { for (int64_t i = 0; i < levels_read; i++) { RETURN_NOT_OK(builder.Append(values[i].ptr)); @@ -1064,7 +1072,7 @@ values_idx++; } } - total_levels_read += levels_read; + total_levels_read += static_cast(levels_read); } if (!column_reader_->HasNext()) { NextRowGroup(); } } diff -Nru apache-parquet-cpp-1.1.1.20170524/src/parquet/arrow/test-util.h apache-parquet-cpp-1.1.1.20170612/src/parquet/arrow/test-util.h --- apache-parquet-cpp-1.1.1.20170524/src/parquet/arrow/test-util.h 2017-05-23 18:29:32.000000000 +0000 +++ apache-parquet-cpp-1.1.1.20170612/src/parquet/arrow/test-util.h 2017-06-11 18:28:45.000000000 +0000 @@ -210,7 +210,7 @@ if (!valid_bytes[i]) { builder.AppendNull(); } else { - ::arrow::test::random_bytes(kBufferSize, seed + i, buffer); + ::arrow::test::random_bytes(kBufferSize, seed + static_cast(i), buffer); builder.Append(buffer, kBufferSize); } } @@ -240,7 +240,7 @@ if (!valid_bytes[i]) { builder.AppendNull(); } else { - ::arrow::test::random_bytes(kBufferSize, seed + i, buffer); + ::arrow::test::random_bytes(kBufferSize, seed + static_cast(i), buffer); builder.Append(buffer); } } @@ -294,10 +294,10 @@ if (!(((i % 2) == 0) && ((i / 2) < null_count))) { // Non-null list (list with index 1 is always empty). ::arrow::BitUtil::SetBit(null_bitmap_ptr, i); - if (i != 1) { current_offset += length_per_entry; } + if (i != 1) { current_offset += static_cast(length_per_entry); } } } - offsets_ptr[size] = values->length(); + offsets_ptr[size] = static_cast(values->length()); auto value_field = std::make_shared<::arrow::Field>("item", values->type(), nullable_values); diff -Nru apache-parquet-cpp-1.1.1.20170524/src/parquet/arrow/writer.cc apache-parquet-cpp-1.1.1.20170612/src/parquet/arrow/writer.cc --- apache-parquet-cpp-1.1.1.20170524/src/parquet/arrow/writer.cc 2017-05-23 18:29:32.000000000 +0000 +++ apache-parquet-cpp-1.1.1.20170612/src/parquet/arrow/writer.cc 2017-06-11 18:28:45.000000000 +0000 @@ -63,7 +63,7 @@ Status VisitInline(const Array& array); Status Visit(const ::arrow::PrimitiveArray& array) { - array_offsets_.push_back(array.offset()); + array_offsets_.push_back(static_cast(array.offset())); valid_bitmaps_.push_back(array.null_bitmap_data()); null_counts_.push_back(array.null_count()); values_type_ = array.type_id(); @@ -72,7 +72,7 @@ } Status Visit(const ::arrow::BinaryArray& array) { - array_offsets_.push_back(array.offset()); + array_offsets_.push_back(static_cast(array.offset())); valid_bitmaps_.push_back(array.null_bitmap_data()); null_counts_.push_back(array.null_count()); values_type_ = array.type_id(); @@ -81,7 +81,7 @@ } Status Visit(const ListArray& array) { - array_offsets_.push_back(array.offset()); + array_offsets_.push_back(static_cast(array.offset())); valid_bitmaps_.push_back(array.null_bitmap_data()); null_counts_.push_back(array.null_count()); offsets_.push_back(array.raw_value_offsets()); @@ -111,7 +111,7 @@ std::shared_ptr* rep_levels, const Array** values_array) { // Work downwards to extract bitmaps and offsets min_offset_idx_ = 0; - max_offset_idx_ = array.length(); + max_offset_idx_ = static_cast(array.length()); RETURN_NOT_OK(VisitInline(array)); *num_values = max_offset_idx_ - min_offset_idx_; *values_offset = min_offset_idx_; @@ -143,7 +143,7 @@ std::fill(def_levels_ptr, def_levels_ptr + array.length(), 1); } else { const uint8_t* valid_bits = array.null_bitmap_data(); - INIT_BITSET(valid_bits, array.offset()); + INIT_BITSET(valid_bits, static_cast(array.offset())); for (int i = 0; i < array.length(); i++) { if (bitset_valid_bits & (1 << bit_offset_valid_bits)) { def_levels_ptr[i] = 1; @@ -396,7 +396,7 @@ RETURN_NOT_OK(data_buffer_.Resize(num_values * sizeof(ParquetCType))); auto buffer_ptr = reinterpret_cast(data_buffer_.mutable_data()); - INIT_BITSET(valid_bits, valid_bits_offset); + INIT_BITSET(valid_bits, static_cast(valid_bits_offset)); for (int i = 0; i < num_values; i++) { if (bitset_valid_bits & (1 << bit_offset_valid_bits)) { buffer_ptr[i] = static_cast(data_ptr[i]); @@ -417,7 +417,7 @@ const int64_t* data_ptr) { RETURN_NOT_OK(data_buffer_.Resize(num_values * sizeof(int32_t))); auto buffer_ptr = reinterpret_cast(data_buffer_.mutable_data()); - INIT_BITSET(valid_bits, valid_bits_offset); + INIT_BITSET(valid_bits, static_cast(valid_bits_offset)); for (int i = 0; i < num_values; i++) { if (bitset_valid_bits & (1 << bit_offset_valid_bits)) { // Convert from milliseconds into days since the epoch @@ -439,7 +439,7 @@ const int32_t* data_ptr) { RETURN_NOT_OK(data_buffer_.Resize(num_values * sizeof(int32_t))); auto buffer_ptr = reinterpret_cast(data_buffer_.mutable_data()); - INIT_BITSET(valid_bits, valid_bits_offset); + INIT_BITSET(valid_bits, static_cast(valid_bits_offset)); if (type.unit() == TimeUnit::SECOND) { for (int i = 0; i < num_values; i++) { @@ -497,7 +497,7 @@ auto writer = reinterpret_cast*>(column_writer); int buffer_idx = 0; - int32_t offset = array->offset(); + int64_t offset = array->offset(); for (int i = 0; i < data->length(); i++) { if (!data->IsNull(i)) { buffer_ptr[buffer_idx++] = BitUtil::GetBit(data_ptr, offset + i); diff -Nru apache-parquet-cpp-1.1.1.20170524/src/parquet/CMakeLists.txt apache-parquet-cpp-1.1.1.20170612/src/parquet/CMakeLists.txt --- apache-parquet-cpp-1.1.1.20170524/src/parquet/CMakeLists.txt 2017-05-23 18:29:32.000000000 +0000 +++ apache-parquet-cpp-1.1.1.20170612/src/parquet/CMakeLists.txt 2017-06-11 18:28:45.000000000 +0000 @@ -24,12 +24,21 @@ types.h DESTINATION "${CMAKE_INSTALL_INCLUDEDIR}/parquet") +configure_file(parquet_version.h.in + "${CMAKE_CURRENT_BINARY_DIR}/parquet_version.h" + @ONLY) + +install(FILES + "${CMAKE_CURRENT_BINARY_DIR}/parquet_version.h" + DESTINATION "${CMAKE_INSTALL_INCLUDEDIR}/parquet") + # pkg-config support configure_file(parquet.pc.in "${CMAKE_CURRENT_BINARY_DIR}/parquet.pc" @ONLY) -install( - FILES "${CMAKE_CURRENT_BINARY_DIR}/parquet.pc" + +install(FILES + "${CMAKE_CURRENT_BINARY_DIR}/parquet.pc" DESTINATION "${CMAKE_INSTALL_LIBDIR}/pkgconfig/") ADD_PARQUET_TEST(compression-test) diff -Nru apache-parquet-cpp-1.1.1.20170524/src/parquet/column/column-reader-test.cc apache-parquet-cpp-1.1.1.20170612/src/parquet/column/column-reader-test.cc --- apache-parquet-cpp-1.1.1.20170524/src/parquet/column/column-reader-test.cc 2017-05-23 18:29:32.000000000 +0000 +++ apache-parquet-cpp-1.1.1.20170612/src/parquet/column/column-reader-test.cc 2017-06-11 18:28:45.000000000 +0000 @@ -89,9 +89,9 @@ // 1) batch_size < page_size (multiple ReadBatch from a single page) // 2) batch_size > page_size (BatchRead limits to a single page) do { - batch = reader->ReadBatch(batch_size, &dresult[0] + batch_actual, - &rresult[0] + batch_actual, &vresult[0] + total_values_read, &values_read); - total_values_read += values_read; + batch = static_cast(reader->ReadBatch(batch_size, &dresult[0] + batch_actual, + &rresult[0] + batch_actual, &vresult[0] + total_values_read, &values_read)); + total_values_read += static_cast(values_read); batch_actual += batch; batch_size = std::max(batch_size * 2, 4096); } while (batch > 0); @@ -102,7 +102,8 @@ if (max_def_level_ > 0) { ASSERT_TRUE(vector_equal(def_levels_, dresult)); } if (max_rep_level_ > 0) { ASSERT_TRUE(vector_equal(rep_levels_, rresult)); } // catch improper writes at EOS - batch_actual = reader->ReadBatch(5, nullptr, nullptr, nullptr, &values_read); + batch_actual = + static_cast(reader->ReadBatch(5, nullptr, nullptr, nullptr, &values_read)); ASSERT_EQ(0, batch_actual); ASSERT_EQ(0, values_read); } @@ -126,12 +127,13 @@ // 1) batch_size < page_size (multiple ReadBatch from a single page) // 2) batch_size > page_size (BatchRead limits to a single page) do { - batch = reader->ReadBatchSpaced(batch_size, dresult.data() + levels_actual, - rresult.data() + levels_actual, vresult.data() + batch_actual, - valid_bits.data() + batch_actual, 0, &levels_read, &values_read, &null_count); - total_values_read += batch - null_count; + batch = static_cast(reader->ReadBatchSpaced(batch_size, + dresult.data() + levels_actual, rresult.data() + levels_actual, + vresult.data() + batch_actual, valid_bits.data() + batch_actual, 0, + &levels_read, &values_read, &null_count)); + total_values_read += batch - static_cast(null_count); batch_actual += batch; - levels_actual += levels_read; + levels_actual += static_cast(levels_read); batch_size = std::max(batch_size * 2, 4096); } while ((batch > 0) || (levels_read > 0)); @@ -146,8 +148,8 @@ } if (max_rep_level_ > 0) { ASSERT_TRUE(vector_equal(rep_levels_, rresult)); } // catch improper writes at EOS - batch_actual = reader->ReadBatchSpaced(5, nullptr, nullptr, nullptr, - valid_bits.data(), 0, &levels_read, &values_read, &null_count); + batch_actual = static_cast(reader->ReadBatchSpaced(5, nullptr, nullptr, nullptr, + valid_bits.data(), 0, &levels_read, &values_read, &null_count)); ASSERT_EQ(0, batch_actual); ASSERT_EQ(0, null_count); } @@ -262,8 +264,8 @@ // Read half a page reader->ReadBatch( levels_per_page / 2, dresult.data(), rresult.data(), vresult.data(), &values_read); - vector sub_values( - values_.begin() + 2 * levels_per_page, values_.begin() + 2.5 * levels_per_page); + vector sub_values(values_.begin() + 2 * levels_per_page, + values_.begin() + static_cast(2.5 * static_cast(levels_per_page))); ASSERT_TRUE(vector_equal(sub_values, vresult)); // 2) skip_size == page_size (skip across two pages) @@ -273,7 +275,8 @@ reader->ReadBatch( levels_per_page / 2, dresult.data(), rresult.data(), vresult.data(), &values_read); sub_values.clear(); - sub_values.insert(sub_values.end(), values_.begin() + 3.5 * levels_per_page, + sub_values.insert(sub_values.end(), + values_.begin() + static_cast(3.5 * static_cast(levels_per_page)), values_.begin() + 4 * levels_per_page); ASSERT_TRUE(vector_equal(sub_values, vresult)); @@ -285,8 +288,9 @@ reader->ReadBatch( levels_per_page / 2, dresult.data(), rresult.data(), vresult.data(), &values_read); sub_values.clear(); - sub_values.insert( - sub_values.end(), values_.begin() + 4.5 * levels_per_page, values_.end()); + sub_values.insert(sub_values.end(), + values_.begin() + static_cast(4.5 * static_cast(levels_per_page)), + values_.end()); ASSERT_TRUE(vector_equal(sub_values, vresult)); values_.clear(); diff -Nru apache-parquet-cpp-1.1.1.20170524/src/parquet/column/column-writer-test.cc apache-parquet-cpp-1.1.1.20170612/src/parquet/column/column-writer-test.cc --- apache-parquet-cpp-1.1.1.20170524/src/parquet/column/column-writer-test.cc 2017-05-23 18:29:32.000000000 +0000 +++ apache-parquet-cpp-1.1.1.20170612/src/parquet/column/column-writer-test.cc 2017-06-11 18:28:45.000000000 +0000 @@ -93,8 +93,9 @@ void ReadColumn(Compression::type compression = Compression::UNCOMPRESSED) { BuildReader(static_cast(this->values_out_.size()), compression); - reader_->ReadBatch(this->values_out_.size(), definition_levels_out_.data(), - repetition_levels_out_.data(), this->values_out_ptr_, &values_read_); + reader_->ReadBatch(static_cast(this->values_out_.size()), + definition_levels_out_.data(), repetition_levels_out_.data(), + this->values_out_ptr_, &values_read_); this->SyncValuesOut(); } @@ -133,7 +134,7 @@ Compression::type compression, bool enable_dictionary, bool enable_statistics, int64_t num_rows) { std::vector valid_bits( - BitUtil::RoundUpNumBytes(this->values_.size()) + 1, 255); + BitUtil::RoundUpNumBytes(static_cast(this->values_.size())) + 1, 255); ColumnProperties column_properties( encoding, compression, enable_dictionary, enable_statistics); std::shared_ptr> writer = @@ -204,7 +205,8 @@ values_read_ = 0; while (values_read_ < total_values) { int64_t values_read_recently = 0; - reader_->ReadBatch(this->values_out_.size() - values_read_, + reader_->ReadBatch( + static_cast(this->values_out_.size()) - static_cast(values_read_), definition_levels_out_.data() + values_read_, repetition_levels_out_.data() + values_read_, this->values_out_ptr_ + values_read_, &values_read_recently); @@ -222,7 +224,8 @@ values_read_ = 0; while (values_read_ < total_values) { int64_t values_read_recently = 0; - reader_->ReadBatch(this->values_out_.size() - values_read_, + reader_->ReadBatch( + static_cast(this->values_out_.size()) - static_cast(values_read_), definition_levels_out_.data() + values_read_, repetition_levels_out_.data() + values_read_, this->values_out_ptr_ + values_read_, &values_read_recently); diff -Nru apache-parquet-cpp-1.1.1.20170524/src/parquet/column/levels.cc apache-parquet-cpp-1.1.1.20170612/src/parquet/column/levels.cc --- apache-parquet-cpp-1.1.1.20170524/src/parquet/column/levels.cc 2017-05-23 18:29:32.000000000 +0000 +++ apache-parquet-cpp-1.1.1.20170612/src/parquet/column/levels.cc 2017-06-11 18:28:45.000000000 +0000 @@ -36,7 +36,8 @@ break; } case Encoding::BIT_PACKED: { - int num_bytes = BitUtil::Ceil(num_buffered_values * bit_width_, 8); + int num_bytes = + static_cast(BitUtil::Ceil(num_buffered_values * bit_width_, 8)); bit_packed_encoder_.reset(new BitWriter(data, num_bytes)); break; } @@ -58,7 +59,7 @@ break; } case Encoding::BIT_PACKED: { - num_bytes = BitUtil::Ceil(num_buffered_values * bit_width, 8); + num_bytes = static_cast(BitUtil::Ceil(num_buffered_values * bit_width, 8)); break; } default: @@ -112,7 +113,8 @@ return sizeof(int32_t) + num_bytes; } case Encoding::BIT_PACKED: { - num_bytes = BitUtil::Ceil(num_buffered_values * bit_width_, 8); + num_bytes = + static_cast(BitUtil::Ceil(num_buffered_values * bit_width_, 8)); if (!bit_packed_decoder_) { bit_packed_decoder_.reset(new BitReader(data, num_bytes)); } else { diff -Nru apache-parquet-cpp-1.1.1.20170524/src/parquet/column/levels-test.cc apache-parquet-cpp-1.1.1.20170612/src/parquet/column/levels-test.cc --- apache-parquet-cpp-1.1.1.20170524/src/parquet/column/levels-test.cc 2017-05-23 18:29:32.000000000 +0000 +++ apache-parquet-cpp-1.1.1.20170612/src/parquet/column/levels-test.cc 2017-06-11 18:28:45.000000000 +0000 @@ -52,17 +52,18 @@ LevelEncoder encoder; int levels_count = 0; bytes.resize(2 * num_levels); - ASSERT_EQ(2 * num_levels, bytes.size()); + ASSERT_EQ(2 * num_levels, static_cast(bytes.size())); // encode levels if (encoding == Encoding::RLE) { // leave space to write the rle length value - encoder.Init( - encoding, max_level, num_levels, bytes.data() + sizeof(int32_t), bytes.size()); + encoder.Init(encoding, max_level, num_levels, bytes.data() + sizeof(int32_t), + static_cast(bytes.size())); levels_count = encoder.Encode(num_levels, input_levels); (reinterpret_cast(bytes.data()))[0] = encoder.len(); } else { - encoder.Init(encoding, max_level, num_levels, bytes.data(), bytes.size()); + encoder.Init( + encoding, max_level, num_levels, bytes.data(), static_cast(bytes.size())); levels_count = encoder.Encode(num_levels, input_levels); } ASSERT_EQ(num_levels, levels_count); @@ -73,10 +74,10 @@ LevelDecoder decoder; int levels_count = 0; std::vector output_levels; - int num_levels = input_levels.size(); + int num_levels = static_cast(input_levels.size()); output_levels.resize(num_levels); - ASSERT_EQ(num_levels, output_levels.size()); + ASSERT_EQ(num_levels, static_cast(output_levels.size())); // Decode levels and test with multiple decode calls decoder.SetData(encoding, max_level, num_levels, bytes.data()); @@ -112,13 +113,13 @@ std::vector output_levels; // Decode levels and test with multiple SetData calls - int setdata_count = bytes.size(); - int num_levels = input_levels.size() / setdata_count; + int setdata_count = static_cast(bytes.size()); + int num_levels = static_cast(input_levels.size()) / setdata_count; output_levels.resize(num_levels); // Try multiple SetData for (int ct = 0; ct < setdata_count; ct++) { int offset = ct * num_levels; - ASSERT_EQ(num_levels, output_levels.size()); + ASSERT_EQ(num_levels, static_cast(output_levels.size())); decoder.SetData(encoding, max_level, num_levels, bytes[ct].data()); levels_count = decoder.Decode(num_levels, output_levels.data()); ASSERT_EQ(num_levels, levels_count); @@ -149,7 +150,8 @@ int max_level = (1 << bit_width) - 1; // Generate levels GenerateLevels(min_repeat_factor, max_repeat_factor, max_level, input_levels); - EncodeLevels(encoding, max_level, input_levels.size(), input_levels.data(), bytes); + EncodeLevels(encoding, max_level, static_cast(input_levels.size()), + input_levels.data(), bytes); VerifyDecodingLevels(encoding, max_level, input_levels, bytes); input_levels.clear(); } @@ -166,7 +168,7 @@ std::vector> bytes; Encoding::type encodings[2] = {Encoding::RLE, Encoding::BIT_PACKED}; GenerateLevels(min_repeat_factor, max_repeat_factor, max_level, input_levels); - int num_levels = input_levels.size(); + int num_levels = static_cast(input_levels.size()); int setdata_factor = 8; int split_level_size = num_levels / setdata_factor; bytes.resize(setdata_factor); @@ -200,7 +202,8 @@ LevelEncoder::MaxBufferSize(Encoding::RLE, 1, kNumToEncode)); LevelEncoder encoder; - encoder.Init(Encoding::RLE, 1, kNumToEncode, output.data(), output.size()); + encoder.Init( + Encoding::RLE, 1, kNumToEncode, output.data(), static_cast(output.size())); int encode_count = encoder.Encode(kNumToEncode, levels.data()); ASSERT_EQ(kNumToEncode, encode_count); @@ -231,7 +234,8 @@ LevelEncoder::MaxBufferSize(Encoding::RLE, bit_width, kNumToEncode)); LevelEncoder encoder; - encoder.Init(Encoding::RLE, bit_width, kNumToEncode, output.data(), output.size()); + encoder.Init(Encoding::RLE, bit_width, kNumToEncode, output.data(), + static_cast(output.size())); int encode_count = encoder.Encode(kNumToEncode, levels.data()); ASSERT_EQ(kNumToEncode, encode_count); diff -Nru apache-parquet-cpp-1.1.1.20170524/src/parquet/column/page.h apache-parquet-cpp-1.1.1.20170612/src/parquet/column/page.h --- apache-parquet-cpp-1.1.1.20170524/src/parquet/column/page.h 2017-05-23 18:29:32.000000000 +0000 +++ apache-parquet-cpp-1.1.1.20170612/src/parquet/column/page.h 2017-06-11 18:28:45.000000000 +0000 @@ -52,7 +52,7 @@ const uint8_t* data() const { return buffer_->data(); } // @returns: the total size in bytes of the page's data buffer - int32_t size() const { return buffer_->size(); } + int32_t size() const { return static_cast(buffer_->size()); } private: std::shared_ptr buffer_; diff -Nru apache-parquet-cpp-1.1.1.20170524/src/parquet/column/properties.h apache-parquet-cpp-1.1.1.20170612/src/parquet/column/properties.h --- apache-parquet-cpp-1.1.1.20170524/src/parquet/column/properties.h 2017-05-23 18:29:32.000000000 +0000 +++ apache-parquet-cpp-1.1.1.20170612/src/parquet/column/properties.h 2017-06-11 18:28:45.000000000 +0000 @@ -23,6 +23,7 @@ #include #include "parquet/exception.h" +#include "parquet/parquet_version.h" #include "parquet/schema.h" #include "parquet/types.h" #include "parquet/util/memory.h" @@ -85,7 +86,7 @@ static constexpr Encoding::type DEFAULT_ENCODING = Encoding::PLAIN; static constexpr ParquetVersion::type DEFAULT_WRITER_VERSION = ParquetVersion::PARQUET_1_0; -static const char DEFAULT_CREATED_BY[] = "parquet-cpp version 1.0.0"; +static const char DEFAULT_CREATED_BY[] = CREATED_BY_VERSION; static constexpr Compression::type DEFAULT_COMPRESSION_TYPE = Compression::UNCOMPRESSED; class PARQUET_EXPORT ColumnProperties { diff -Nru apache-parquet-cpp-1.1.1.20170524/src/parquet/column/reader.cc apache-parquet-cpp-1.1.1.20170612/src/parquet/column/reader.cc --- apache-parquet-cpp-1.1.1.20170524/src/parquet/column/reader.cc 2017-05-23 18:29:32.000000000 +0000 +++ apache-parquet-cpp-1.1.1.20170612/src/parquet/column/reader.cc 2017-06-11 18:28:45.000000000 +0000 @@ -119,9 +119,9 @@ // Levels are encoded as rle or bit-packed. // Init repetition levels if (descr_->max_repetition_level() > 0) { - int64_t rep_levels_bytes = - repetition_level_decoder_.SetData(page->repetition_level_encoding(), - descr_->max_repetition_level(), num_buffered_values_, buffer); + int64_t rep_levels_bytes = repetition_level_decoder_.SetData( + page->repetition_level_encoding(), descr_->max_repetition_level(), + static_cast(num_buffered_values_), buffer); buffer += rep_levels_bytes; data_size -= rep_levels_bytes; } @@ -130,9 +130,9 @@ // Init definition levels if (descr_->max_definition_level() > 0) { - int64_t def_levels_bytes = - definition_level_decoder_.SetData(page->definition_level_encoding(), - descr_->max_definition_level(), num_buffered_values_, buffer); + int64_t def_levels_bytes = definition_level_decoder_.SetData( + page->definition_level_encoding(), descr_->max_definition_level(), + static_cast(num_buffered_values_), buffer); buffer += def_levels_bytes; data_size -= def_levels_bytes; } @@ -169,7 +169,8 @@ throw ParquetException("Unknown encoding type."); } } - current_decoder_->SetData(num_buffered_values_, buffer, data_size); + current_decoder_->SetData( + static_cast(num_buffered_values_), buffer, static_cast(data_size)); return true; } else { // We don't know what this page type is. We're allowed to skip non-data @@ -185,12 +186,12 @@ int64_t ColumnReader::ReadDefinitionLevels(int64_t batch_size, int16_t* levels) { if (descr_->max_definition_level() == 0) { return 0; } - return definition_level_decoder_.Decode(batch_size, levels); + return definition_level_decoder_.Decode(static_cast(batch_size), levels); } int64_t ColumnReader::ReadRepetitionLevels(int64_t batch_size, int16_t* levels) { if (descr_->max_repetition_level() == 0) { return 0; } - return repetition_level_decoder_.Decode(batch_size, levels); + return repetition_level_decoder_.Decode(static_cast(batch_size), levels); } // ---------------------------------------------------------------------- @@ -225,13 +226,13 @@ // ---------------------------------------------------------------------- // Instantiate templated classes -template class TypedColumnReader; -template class TypedColumnReader; -template class TypedColumnReader; -template class TypedColumnReader; -template class TypedColumnReader; -template class TypedColumnReader; -template class TypedColumnReader; -template class TypedColumnReader; +template class PARQUET_TEMPLATE_EXPORT TypedColumnReader; +template class PARQUET_TEMPLATE_EXPORT TypedColumnReader; +template class PARQUET_TEMPLATE_EXPORT TypedColumnReader; +template class PARQUET_TEMPLATE_EXPORT TypedColumnReader; +template class PARQUET_TEMPLATE_EXPORT TypedColumnReader; +template class PARQUET_TEMPLATE_EXPORT TypedColumnReader; +template class PARQUET_TEMPLATE_EXPORT TypedColumnReader; +template class PARQUET_TEMPLATE_EXPORT TypedColumnReader; } // namespace parquet diff -Nru apache-parquet-cpp-1.1.1.20170524/src/parquet/column/reader.h apache-parquet-cpp-1.1.1.20170612/src/parquet/column/reader.h --- apache-parquet-cpp-1.1.1.20170524/src/parquet/column/reader.h 2017-05-23 18:29:32.000000000 +0000 +++ apache-parquet-cpp-1.1.1.20170612/src/parquet/column/reader.h 2017-06-11 18:28:45.000000000 +0000 @@ -91,11 +91,11 @@ // values. For repeated or optional values, there may be fewer data values // than levels, and this tells you how many encoded levels there are in that // case. - int num_buffered_values_; + int64_t num_buffered_values_; // The number of values from the current data page that have been decoded // into memory - int num_decoded_values_; + int64_t num_decoded_values_; ::arrow::MemoryPool* pool_; }; @@ -128,8 +128,8 @@ // This API is the same for both V1 and V2 of the DataPage // // @returns: actual number of levels read (see values_read for number of values read) - int64_t ReadBatch(int batch_size, int16_t* def_levels, int16_t* rep_levels, T* values, - int64_t* values_read); + int64_t ReadBatch(int64_t batch_size, int16_t* def_levels, int16_t* rep_levels, + T* values, int64_t* values_read); /// Read a batch of repetition levels, definition levels, and values from the /// column and leave spaces for null entries on the lowest level in the values @@ -165,7 +165,7 @@ /// (i.e. definition_level == max_definition_level - 1) /// @param[out] null_count The number of nulls on the lowest levels. /// (i.e. (values_read - null_count) is total number of non-null entries) - int64_t ReadBatchSpaced(int batch_size, int16_t* def_levels, int16_t* rep_levels, + int64_t ReadBatchSpaced(int64_t batch_size, int16_t* def_levels, int16_t* rep_levels, T* values, uint8_t* valid_bits, int64_t valid_bits_offset, int64_t* levels_read, int64_t* values_read, int64_t* null_count); @@ -205,7 +205,7 @@ template inline int64_t TypedColumnReader::ReadValues(int64_t batch_size, T* out) { - int64_t num_decoded = current_decoder_->Decode(out, batch_size); + int64_t num_decoded = current_decoder_->Decode(out, static_cast(batch_size)); return num_decoded; } @@ -213,12 +213,12 @@ inline int64_t TypedColumnReader::ReadValuesSpaced(int64_t batch_size, T* out, int null_count, uint8_t* valid_bits, int64_t valid_bits_offset) { return current_decoder_->DecodeSpaced( - out, batch_size, null_count, valid_bits, valid_bits_offset); + out, static_cast(batch_size), null_count, valid_bits, valid_bits_offset); } template -inline int64_t TypedColumnReader::ReadBatch(int batch_size, int16_t* def_levels, - int16_t* rep_levels, T* values, int64_t* values_read) { +inline int64_t TypedColumnReader::ReadBatch(int64_t batch_size, + int16_t* def_levels, int16_t* rep_levels, T* values, int64_t* values_read) { // HasNext invokes ReadNewPage if (!HasNext()) { *values_read = 0; @@ -265,8 +265,8 @@ inline void DefinitionLevelsToBitmap(const int16_t* def_levels, int64_t num_def_levels, int16_t max_definition_level, int64_t* values_read, int64_t* null_count, uint8_t* valid_bits, int64_t valid_bits_offset) { - int byte_offset = valid_bits_offset / 8; - int bit_offset = valid_bits_offset % 8; + int byte_offset = static_cast(valid_bits_offset) / 8; + int bit_offset = static_cast(valid_bits_offset) % 8; uint8_t bitset = valid_bits[byte_offset]; for (int i = 0; i < num_def_levels; ++i) { @@ -293,7 +293,7 @@ } template -inline int64_t TypedColumnReader::ReadBatchSpaced(int batch_size, +inline int64_t TypedColumnReader::ReadBatchSpaced(int64_t batch_size, int16_t* def_levels, int16_t* rep_levels, T* values, uint8_t* valid_bits, int64_t valid_bits_offset, int64_t* levels_read, int64_t* values_read, int64_t* null_count_out) { @@ -338,8 +338,8 @@ int16_t max_definition_level = descr_->max_definition_level(); DefinitionLevelsToBitmap(def_levels, num_def_levels, max_definition_level, values_read, &null_count, valid_bits, valid_bits_offset); - total_values = ReadValuesSpaced( - *values_read, values, null_count, valid_bits, valid_bits_offset); + total_values = ReadValuesSpaced(*values_read, values, static_cast(null_count), + valid_bits, valid_bits_offset); } *levels_read = num_def_levels; *null_count_out = null_count; @@ -383,10 +383,10 @@ do { batch_size = std::min(batch_size, rows_to_skip); - values_read = - ReadBatch(batch_size, reinterpret_cast(def_levels->mutable_data()), - reinterpret_cast(rep_levels->mutable_data()), - reinterpret_cast(vals->mutable_data()), &values_read); + values_read = ReadBatch(static_cast(batch_size), + reinterpret_cast(def_levels->mutable_data()), + reinterpret_cast(rep_levels->mutable_data()), + reinterpret_cast(vals->mutable_data()), &values_read); rows_to_skip -= values_read; } while (values_read > 0 && rows_to_skip > 0); } @@ -403,14 +403,14 @@ typedef TypedColumnReader ByteArrayReader; typedef TypedColumnReader FixedLenByteArrayReader; -extern template class PARQUET_EXPORT TypedColumnReader; -extern template class PARQUET_EXPORT TypedColumnReader; -extern template class PARQUET_EXPORT TypedColumnReader; -extern template class PARQUET_EXPORT TypedColumnReader; -extern template class PARQUET_EXPORT TypedColumnReader; -extern template class PARQUET_EXPORT TypedColumnReader; -extern template class PARQUET_EXPORT TypedColumnReader; -extern template class PARQUET_EXPORT TypedColumnReader; +PARQUET_EXTERN_TEMPLATE TypedColumnReader; +PARQUET_EXTERN_TEMPLATE TypedColumnReader; +PARQUET_EXTERN_TEMPLATE TypedColumnReader; +PARQUET_EXTERN_TEMPLATE TypedColumnReader; +PARQUET_EXTERN_TEMPLATE TypedColumnReader; +PARQUET_EXTERN_TEMPLATE TypedColumnReader; +PARQUET_EXTERN_TEMPLATE TypedColumnReader; +PARQUET_EXTERN_TEMPLATE TypedColumnReader; } // namespace parquet diff -Nru apache-parquet-cpp-1.1.1.20170524/src/parquet/column/scanner.h apache-parquet-cpp-1.1.1.20170612/src/parquet/column/scanner.h --- apache-parquet-cpp-1.1.1.20170524/src/parquet/column/scanner.h 2017-05-23 18:29:32.000000000 +0000 +++ apache-parquet-cpp-1.1.1.20170612/src/parquet/column/scanner.h 2017-06-11 18:28:45.000000000 +0000 @@ -103,8 +103,9 @@ bool NextLevels(int16_t* def_level, int16_t* rep_level) { if (level_offset_ == levels_buffered_) { - levels_buffered_ = typed_reader_->ReadBatch(batch_size_, def_levels_.data(), - rep_levels_.data(), values_, &values_buffered_); + levels_buffered_ = + static_cast(typed_reader_->ReadBatch(static_cast(batch_size_), + def_levels_.data(), rep_levels_.data(), values_, &values_buffered_)); value_offset_ = 0; level_offset_ = 0; diff -Nru apache-parquet-cpp-1.1.1.20170524/src/parquet/column/statistics.cc apache-parquet-cpp-1.1.1.20170612/src/parquet/column/statistics.cc --- apache-parquet-cpp-1.1.1.20170524/src/parquet/column/statistics.cc 2017-05-23 18:29:32.000000000 +0000 +++ apache-parquet-cpp-1.1.1.20170612/src/parquet/column/statistics.cc 2017-06-11 18:28:45.000000000 +0000 @@ -120,7 +120,7 @@ if (num_not_null == 0) return; Compare compare(descr_); - INIT_BITSET(valid_bits, valid_bits_offset); + INIT_BITSET(valid_bits, static_cast(valid_bits_offset)); // Find first valid entry and use that for min/max // As (num_not_null != 0) there must be one int64_t length = num_null + num_not_null; @@ -216,7 +216,8 @@ template void TypedRowGroupStatistics::PlainDecode(const std::string& src, T* dst) { PlainDecoder decoder(descr()); - decoder.SetData(1, reinterpret_cast(src.c_str()), src.size()); + decoder.SetData( + 1, reinterpret_cast(src.c_str()), static_cast(src.size())); decoder.Decode(dst, 1); } @@ -227,17 +228,17 @@ template <> void TypedRowGroupStatistics::PlainDecode(const std::string& src, T* dst) { - dst->len = src.size(); + dst->len = static_cast(src.size()); dst->ptr = reinterpret_cast(src.c_str()); } -template class TypedRowGroupStatistics; -template class TypedRowGroupStatistics; -template class TypedRowGroupStatistics; -template class TypedRowGroupStatistics; -template class TypedRowGroupStatistics; -template class TypedRowGroupStatistics; -template class TypedRowGroupStatistics; -template class TypedRowGroupStatistics; +template class PARQUET_TEMPLATE_EXPORT TypedRowGroupStatistics; +template class PARQUET_TEMPLATE_EXPORT TypedRowGroupStatistics; +template class PARQUET_TEMPLATE_EXPORT TypedRowGroupStatistics; +template class PARQUET_TEMPLATE_EXPORT TypedRowGroupStatistics; +template class PARQUET_TEMPLATE_EXPORT TypedRowGroupStatistics; +template class PARQUET_TEMPLATE_EXPORT TypedRowGroupStatistics; +template class PARQUET_TEMPLATE_EXPORT TypedRowGroupStatistics; +template class PARQUET_TEMPLATE_EXPORT TypedRowGroupStatistics; } // namespace parquet diff -Nru apache-parquet-cpp-1.1.1.20170524/src/parquet/column/statistics.h apache-parquet-cpp-1.1.1.20170612/src/parquet/column/statistics.h --- apache-parquet-cpp-1.1.1.20170524/src/parquet/column/statistics.h 2017-05-23 18:29:32.000000000 +0000 +++ apache-parquet-cpp-1.1.1.20170612/src/parquet/column/statistics.h 2017-06-11 18:28:45.000000000 +0000 @@ -216,14 +216,14 @@ #pragma GCC diagnostic ignored "-Wattributes" #endif -extern template class PARQUET_EXPORT TypedRowGroupStatistics; -extern template class PARQUET_EXPORT TypedRowGroupStatistics; -extern template class PARQUET_EXPORT TypedRowGroupStatistics; -extern template class PARQUET_EXPORT TypedRowGroupStatistics; -extern template class PARQUET_EXPORT TypedRowGroupStatistics; -extern template class PARQUET_EXPORT TypedRowGroupStatistics; -extern template class PARQUET_EXPORT TypedRowGroupStatistics; -extern template class PARQUET_EXPORT TypedRowGroupStatistics; +PARQUET_EXTERN_TEMPLATE TypedRowGroupStatistics; +PARQUET_EXTERN_TEMPLATE TypedRowGroupStatistics; +PARQUET_EXTERN_TEMPLATE TypedRowGroupStatistics; +PARQUET_EXTERN_TEMPLATE TypedRowGroupStatistics; +PARQUET_EXTERN_TEMPLATE TypedRowGroupStatistics; +PARQUET_EXTERN_TEMPLATE TypedRowGroupStatistics; +PARQUET_EXTERN_TEMPLATE TypedRowGroupStatistics; +PARQUET_EXTERN_TEMPLATE TypedRowGroupStatistics; #if defined(__GNUC__) && !defined(__clang__) #pragma GCC diagnostic pop diff -Nru apache-parquet-cpp-1.1.1.20170524/src/parquet/column/statistics-test.cc apache-parquet-cpp-1.1.1.20170612/src/parquet/column/statistics-test.cc --- apache-parquet-cpp-1.1.1.20170524/src/parquet/column/statistics-test.cc 2017-05-23 18:29:32.000000000 +0000 +++ apache-parquet-cpp-1.1.1.20170612/src/parquet/column/statistics-test.cc 2017-06-11 18:28:45.000000000 +0000 @@ -72,7 +72,7 @@ TypedStats statistics3(this->schema_.Column(0)); std::vector valid_bits( - BitUtil::RoundUpNumBytes(this->values_.size()) + 1, 255); + BitUtil::RoundUpNumBytes(static_cast(this->values_.size())) + 1, 255); statistics3.UpdateSpaced( this->values_ptr_, valid_bits.data(), 0, this->values_.size(), 0); std::string encoded_min_spaced = statistics3.EncodeMin(); @@ -145,8 +145,8 @@ // simulate the case when data comes from multiple buffers, // in which case special care is necessary for FLBA/ByteArray types for (int i = 0; i < 2; i++) { - int batch_num_values = i ? num_values - num_values / 2 : num_values / 2; - int batch_null_count = i ? null_count : 0; + int64_t batch_num_values = i ? num_values - num_values / 2 : num_values / 2; + int64_t batch_null_count = i ? null_count : 0; DCHECK(null_count <= num_values); // avoid too much headache std::vector definition_levels(batch_null_count, 0); definition_levels.insert( diff -Nru apache-parquet-cpp-1.1.1.20170524/src/parquet/column/test-specialization.h apache-parquet-cpp-1.1.1.20170612/src/parquet/column/test-specialization.h --- apache-parquet-cpp-1.1.1.20170524/src/parquet/column/test-specialization.h 2017-05-23 18:29:32.000000000 +0000 +++ apache-parquet-cpp-1.1.1.20170612/src/parquet/column/test-specialization.h 2017-06-11 18:28:45.000000000 +0000 @@ -115,7 +115,11 @@ template <> void PrimitiveTypedTest::SyncValuesOut() { - std::copy(bool_buffer_out_.begin(), bool_buffer_out_.end(), values_out_.begin()); + std::vector::const_iterator source_iterator = bool_buffer_out_.begin(); + std::vector::iterator destination_iterator = values_out_.begin(); + while (source_iterator != bool_buffer_out_.end()) { + *destination_iterator++ = *source_iterator++ != 0; + } } template @@ -143,7 +147,7 @@ def_levels_.resize(num_values); values_.resize(num_values); - InitValues(num_values, values_, buffer_); + InitValues(static_cast(num_values), values_, buffer_); values_ptr_ = values_.data(); std::fill(def_levels_.begin(), def_levels_.end(), 1); @@ -154,7 +158,7 @@ def_levels_.resize(num_values); values_.resize(num_values); - InitValues(num_values, values_, buffer_); + InitValues(static_cast(num_values), values_, buffer_); bool_buffer_.resize(num_values); std::copy(values_.begin(), values_.end(), bool_buffer_.begin()); values_ptr_ = reinterpret_cast(bool_buffer_.data()); diff -Nru apache-parquet-cpp-1.1.1.20170524/src/parquet/column/test-util.h apache-parquet-cpp-1.1.1.20170612/src/parquet/column/test-util.h --- apache-parquet-cpp-1.1.1.20170524/src/parquet/column/test-util.h 2017-05-23 18:29:32.000000000 +0000 +++ apache-parquet-cpp-1.1.1.20170612/src/parquet/column/test-util.h 2017-06-11 18:28:45.000000000 +0000 @@ -131,7 +131,7 @@ void AppendValues(const ColumnDescriptor* d, const vector& values, Encoding::type encoding = Encoding::PLAIN) { PlainEncoder encoder(d); - encoder.Put(&values[0], values.size()); + encoder.Put(&values[0], static_cast(values.size())); std::shared_ptr values_sink = encoder.FlushValues(); sink_->Write(values_sink->data(), values_sink->size()); @@ -174,10 +174,10 @@ // RLE-encoded bytes have to be preceded in the stream by their absolute // size. LevelEncoder encoder; - encoder.Init( - encoding, max_level, levels.size(), encode_buffer.data(), encode_buffer.size()); + encoder.Init(encoding, max_level, static_cast(levels.size()), + encode_buffer.data(), static_cast(encode_buffer.size())); - encoder.Encode(levels.size(), levels.data()); + encoder.Encode(static_cast(levels.size()), levels.data()); int32_t rle_bytes = encoder.len(); sink_->Write(reinterpret_cast(&rle_bytes), sizeof(int32_t)); @@ -192,7 +192,7 @@ ParquetException::NYI("only plain encoding currently implemented"); } PlainEncoder encoder(d); - encoder.Put(values, values.size()); + encoder.Put(values, static_cast(values.size())); std::shared_ptr buffer = encoder.FlushValues(); sink_->Write(buffer->data(), buffer->size()); @@ -243,7 +243,7 @@ ~DictionaryPageBuilder() { pool_.FreeAll(); } shared_ptr AppendValues(const vector& values) { - int num_values = values.size(); + int num_values = static_cast(values.size()); // Dictionary encoding encoder_->Put(values.data(), num_values); num_dict_values_ = encoder_->num_entries(); @@ -291,7 +291,7 @@ Encoding::type encoding, vector>& rle_indices) { InMemoryOutputStream page_stream; test::DictionaryPageBuilder page_builder(d); - int num_pages = values_per_page.size(); + int num_pages = static_cast(values_per_page.size()); int value_start = 0; for (int i = 0; i < num_pages; i++) { @@ -313,7 +313,7 @@ int16_t max_def_level, const vector& rep_levels, int16_t max_rep_level, int num_levels_per_page, const vector& values_per_page, vector>& pages, Encoding::type encoding = Encoding::RLE_DICTIONARY) { - int num_pages = values_per_page.size(); + int num_pages = static_cast(values_per_page.size()); vector> rle_indices; shared_ptr dict_page = MakeDictPage(d, values, values_per_page, encoding, rle_indices); @@ -332,7 +332,7 @@ rep_level_end = (i + 1) * num_levels_per_page; } shared_ptr data_page = MakeDataPage(d, {}, values_per_page[i], - encoding, rle_indices[i]->data(), rle_indices[i]->size(), + encoding, rle_indices[i]->data(), static_cast(rle_indices[i]->size()), slice(def_levels, def_level_start, def_level_end), max_def_level, slice(rep_levels, rep_level_start, rep_level_end), max_rep_level); pages.push_back(data_page); @@ -346,7 +346,7 @@ int16_t max_def_level, const vector& rep_levels, int16_t max_rep_level, int num_levels_per_page, const vector& values_per_page, vector>& pages, Encoding::type encoding = Encoding::PLAIN) { - int num_pages = values_per_page.size(); + int num_pages = static_cast(values_per_page.size()); int def_level_start = 0; int def_level_end = 0; int rep_level_start = 0; diff -Nru apache-parquet-cpp-1.1.1.20170524/src/parquet/column/writer.cc apache-parquet-cpp-1.1.1.20170612/src/parquet/column/writer.cc --- apache-parquet-cpp-1.1.1.20170524/src/parquet/column/writer.cc 2017-05-23 18:29:32.000000000 +0000 +++ apache-parquet-cpp-1.1.1.20170612/src/parquet/column/writer.cc 2017-06-11 18:28:45.000000000 +0000 @@ -87,19 +87,19 @@ int64_t ColumnWriter::RleEncodeLevels( const Buffer& src_buffer, ResizableBuffer* dest_buffer, int16_t max_level) { // TODO: This only works with due to some RLE specifics - int64_t rle_size = - LevelEncoder::MaxBufferSize(Encoding::RLE, max_level, num_buffered_values_) + - sizeof(int32_t); + int64_t rle_size = LevelEncoder::MaxBufferSize(Encoding::RLE, max_level, + static_cast(num_buffered_values_)) + + sizeof(int32_t); // Use Arrow::Buffer::shrink_to_fit = false // underlying buffer only keeps growing. Resize to a smaller size does not reallocate. PARQUET_THROW_NOT_OK(dest_buffer->Resize(rle_size, false)); - level_encoder_.Init(Encoding::RLE, max_level, num_buffered_values_, + level_encoder_.Init(Encoding::RLE, max_level, static_cast(num_buffered_values_), dest_buffer->mutable_data() + sizeof(int32_t), - dest_buffer->size() - sizeof(int32_t)); - int encoded = level_encoder_.Encode( - num_buffered_values_, reinterpret_cast(src_buffer.data())); + static_cast(dest_buffer->size()) - sizeof(int32_t)); + int encoded = level_encoder_.Encode(static_cast(num_buffered_values_), + reinterpret_cast(src_buffer.data())); DCHECK_EQ(encoded, num_buffered_values_); reinterpret_cast(dest_buffer->mutable_data())[0] = level_encoder_.len(); int64_t encoded_size = level_encoder_.len() + sizeof(int32_t); @@ -154,12 +154,13 @@ std::shared_ptr compressed_data_copy; PARQUET_THROW_NOT_OK(compressed_data->Copy( 0, compressed_data->size(), allocator_, &compressed_data_copy)); - CompressedDataPage page(compressed_data_copy, num_buffered_values_, encoding_, - Encoding::RLE, Encoding::RLE, uncompressed_size, page_stats); + CompressedDataPage page(compressed_data_copy, + static_cast(num_buffered_values_), encoding_, Encoding::RLE, + Encoding::RLE, uncompressed_size, page_stats); data_pages_.push_back(std::move(page)); } else { // Eagerly write pages - CompressedDataPage page(compressed_data, num_buffered_values_, encoding_, - Encoding::RLE, Encoding::RLE, uncompressed_size, page_stats); + CompressedDataPage page(compressed_data, static_cast(num_buffered_values_), + encoding_, Encoding::RLE, Encoding::RLE, uncompressed_size, page_stats); WriteDataPage(page); } @@ -170,8 +171,7 @@ } void ColumnWriter::WriteDataPage(const CompressedDataPage& page) { - int64_t bytes_written = pager_->WriteDataPage(page); - total_bytes_written_ += bytes_written; + total_bytes_written_ += pager_->WriteDataPage(page); } int64_t ColumnWriter::Close() { @@ -361,7 +361,7 @@ WriteRepetitionLevels(num_values, rep_levels); } else { // Each value is exactly one row - num_rows_ += num_values; + num_rows_ += static_cast(num_values); } if (num_rows_ > expected_rows_) { @@ -422,7 +422,7 @@ WriteRepetitionLevels(num_values, rep_levels); } else { // Each value is exactly one row - num_rows_ += num_values; + num_rows_ += static_cast(num_values); } if (num_rows_ > expected_rows_) { @@ -461,7 +461,7 @@ // of values, the chunking will ensure the AddDataPage() is called at a reasonable // pagesize limit int64_t write_batch_size = properties_->write_batch_size(); - int num_batches = num_values / write_batch_size; + int num_batches = static_cast(num_values / write_batch_size); int64_t num_remaining = num_values % write_batch_size; int64_t value_offset = 0; for (int round = 0; round < num_batches; round++) { @@ -486,7 +486,7 @@ // of values, the chunking will ensure the AddDataPage() is called at a reasonable // pagesize limit int64_t write_batch_size = properties_->write_batch_size(); - int num_batches = num_values / write_batch_size; + int num_batches = static_cast(num_values / write_batch_size); int64_t num_remaining = num_values % write_batch_size; int64_t num_spaced_written = 0; int64_t values_offset = 0; @@ -506,22 +506,23 @@ template void TypedColumnWriter::WriteValues(int64_t num_values, const T* values) { - current_encoder_->Put(values, num_values); + current_encoder_->Put(values, static_cast(num_values)); } template void TypedColumnWriter::WriteValuesSpaced(int64_t num_values, const uint8_t* valid_bits, int64_t valid_bits_offset, const T* values) { - current_encoder_->PutSpaced(values, num_values, valid_bits, valid_bits_offset); + current_encoder_->PutSpaced( + values, static_cast(num_values), valid_bits, valid_bits_offset); } -template class TypedColumnWriter; -template class TypedColumnWriter; -template class TypedColumnWriter; -template class TypedColumnWriter; -template class TypedColumnWriter; -template class TypedColumnWriter; -template class TypedColumnWriter; -template class TypedColumnWriter; +template class PARQUET_TEMPLATE_EXPORT TypedColumnWriter; +template class PARQUET_TEMPLATE_EXPORT TypedColumnWriter; +template class PARQUET_TEMPLATE_EXPORT TypedColumnWriter; +template class PARQUET_TEMPLATE_EXPORT TypedColumnWriter; +template class PARQUET_TEMPLATE_EXPORT TypedColumnWriter; +template class PARQUET_TEMPLATE_EXPORT TypedColumnWriter; +template class PARQUET_TEMPLATE_EXPORT TypedColumnWriter; +template class PARQUET_TEMPLATE_EXPORT TypedColumnWriter; } // namespace parquet diff -Nru apache-parquet-cpp-1.1.1.20170524/src/parquet/column/writer.h apache-parquet-cpp-1.1.1.20170612/src/parquet/column/writer.h --- apache-parquet-cpp-1.1.1.20170524/src/parquet/column/writer.h 2017-05-23 18:29:32.000000000 +0000 +++ apache-parquet-cpp-1.1.1.20170612/src/parquet/column/writer.h 2017-06-11 18:28:45.000000000 +0000 @@ -118,17 +118,17 @@ // values. For repeated or optional values, there may be fewer data values // than levels, and this tells you how many encoded levels there are in that // case. - int num_buffered_values_; + int64_t num_buffered_values_; // The total number of stored values. For repeated or optional values, this // number may be lower than num_buffered_values_. - int num_buffered_encoded_values_; + int64_t num_buffered_encoded_values_; // Total number of rows written with this ColumnWriter int num_rows_; // Records the total number of bytes written by the serializer - int total_bytes_written_; + int64_t total_bytes_written_; // Flag to check if the Writer has been closed bool closed_; @@ -212,14 +212,14 @@ typedef TypedColumnWriter ByteArrayWriter; typedef TypedColumnWriter FixedLenByteArrayWriter; -extern template class PARQUET_EXPORT TypedColumnWriter; -extern template class PARQUET_EXPORT TypedColumnWriter; -extern template class PARQUET_EXPORT TypedColumnWriter; -extern template class PARQUET_EXPORT TypedColumnWriter; -extern template class PARQUET_EXPORT TypedColumnWriter; -extern template class PARQUET_EXPORT TypedColumnWriter; -extern template class PARQUET_EXPORT TypedColumnWriter; -extern template class PARQUET_EXPORT TypedColumnWriter; +PARQUET_EXTERN_TEMPLATE TypedColumnWriter; +PARQUET_EXTERN_TEMPLATE TypedColumnWriter; +PARQUET_EXTERN_TEMPLATE TypedColumnWriter; +PARQUET_EXTERN_TEMPLATE TypedColumnWriter; +PARQUET_EXTERN_TEMPLATE TypedColumnWriter; +PARQUET_EXTERN_TEMPLATE TypedColumnWriter; +PARQUET_EXTERN_TEMPLATE TypedColumnWriter; +PARQUET_EXTERN_TEMPLATE TypedColumnWriter; } // namespace parquet diff -Nru apache-parquet-cpp-1.1.1.20170524/src/parquet/compression.cc apache-parquet-cpp-1.1.1.20170612/src/parquet/compression.cc --- apache-parquet-cpp-1.1.1.20170524/src/parquet/compression.cc 2017-05-23 18:29:32.000000000 +0000 +++ apache-parquet-cpp-1.1.1.20170612/src/parquet/compression.cc 2017-06-11 18:28:45.000000000 +0000 @@ -152,9 +152,9 @@ // from the beginning again. while (ret != Z_STREAM_END) { stream_.next_in = const_cast(reinterpret_cast(input)); - stream_.avail_in = input_length; + stream_.avail_in = static_cast(input_length); stream_.next_out = reinterpret_cast(output); - stream_.avail_out = output_length; + stream_.avail_out = static_cast(output_length); // We know the output size. In this case, we can use Z_FINISH // which is more efficient. @@ -188,9 +188,9 @@ uint8_t* output) { if (!compressor_initialized_) { InitCompressor(); } stream_.next_in = const_cast(reinterpret_cast(input)); - stream_.avail_in = input_length; + stream_.avail_in = static_cast(input_length); stream_.next_out = reinterpret_cast(output); - stream_.avail_out = output_length; + stream_.avail_out = static_cast(output_length); int64_t ret = 0; if ((ret = deflate(&stream_, Z_FINISH)) != Z_STREAM_END) { diff -Nru apache-parquet-cpp-1.1.1.20170524/src/parquet/compression-test.cc apache-parquet-cpp-1.1.1.20170612/src/parquet/compression-test.cc --- apache-parquet-cpp-1.1.1.20170524/src/parquet/compression-test.cc 2017-05-23 18:29:32.000000000 +0000 +++ apache-parquet-cpp-1.1.1.20170612/src/parquet/compression-test.cc 2017-06-11 18:28:45.000000000 +0000 @@ -34,13 +34,13 @@ T c1; T c2; - int max_compressed_len = c1.MaxCompressedLen(data.size(), &data[0]); + int max_compressed_len = static_cast(c1.MaxCompressedLen(data.size(), &data[0])); std::vector compressed(max_compressed_len); std::vector decompressed(data.size()); // compress with c1 - int actual_size = - c1.Compress(data.size(), &data[0], max_compressed_len, &compressed[0]); + int actual_size = static_cast( + c1.Compress(data.size(), &data[0], max_compressed_len, &compressed[0])); compressed.resize(actual_size); // decompress with c2 @@ -49,8 +49,8 @@ ASSERT_TRUE(test::vector_equal(data, decompressed)); // compress with c2 - int actual_size2 = - c2.Compress(data.size(), &data[0], max_compressed_len, &compressed[0]); + int actual_size2 = static_cast( + c2.Compress(data.size(), &data[0], max_compressed_len, &compressed[0])); ASSERT_EQ(actual_size2, actual_size); // decompress with c1 diff -Nru apache-parquet-cpp-1.1.1.20170524/src/parquet/encoding.h apache-parquet-cpp-1.1.1.20170612/src/parquet/encoding.h --- apache-parquet-cpp-1.1.1.20170524/src/parquet/encoding.h 2017-05-23 18:29:32.000000000 +0000 +++ apache-parquet-cpp-1.1.1.20170612/src/parquet/encoding.h 2017-06-11 18:28:45.000000000 +0000 @@ -52,7 +52,7 @@ PoolBuffer buffer(pool_); buffer.Resize(num_values * sizeof(T)); int32_t num_valid_values = 0; - INIT_BITSET(valid_bits, valid_bits_offset); + INIT_BITSET(valid_bits, static_cast(valid_bits_offset)); T* data = reinterpret_cast(buffer.mutable_data()); for (int32_t i = 0; i < num_values; i++) { if (bitset_valid_bits & (1 << bit_offset_valid_bits)) { diff -Nru apache-parquet-cpp-1.1.1.20170524/src/parquet/encoding-internal.h apache-parquet-cpp-1.1.1.20170612/src/parquet/encoding-internal.h --- apache-parquet-cpp-1.1.1.20170524/src/parquet/encoding-internal.h 2017-05-23 18:29:32.000000000 +0000 +++ apache-parquet-cpp-1.1.1.20170612/src/parquet/encoding-internal.h 2017-06-11 18:28:45.000000000 +0000 @@ -196,7 +196,8 @@ bits_available_(kInMemoryDefaultCapacity * 8), bits_buffer_(AllocateBuffer(pool, kInMemoryDefaultCapacity)), values_sink_(new InMemoryOutputStream(pool)) { - bit_writer_.reset(new BitWriter(bits_buffer_->mutable_data(), bits_buffer_->size())); + bit_writer_.reset(new BitWriter( + bits_buffer_->mutable_data(), static_cast(bits_buffer_->size()))); } int64_t EstimatedDataEncodedSize() override { @@ -208,7 +209,7 @@ bit_writer_->Flush(); values_sink_->Write(bit_writer_->buffer(), bit_writer_->bytes_written()); bit_writer_->Clear(); - bits_available_ = bits_buffer_->size() * 8; + bits_available_ = static_cast(bits_buffer_->size()) * 8; } std::shared_ptr buffer = values_sink_->GetBuffer(); @@ -236,7 +237,7 @@ \ int bits_remaining = num_values - bit_offset; \ while (bit_offset < num_values) { \ - bits_available_ = bits_buffer_->size() * 8; \ + bits_available_ = static_cast(bits_buffer_->size()) * 8; \ \ int bits_to_write = std::min(bits_available_, bits_remaining); \ for (int i = bit_offset; i < bit_offset + bits_to_write; i++) { \ @@ -463,7 +464,9 @@ // reserve // an extra "RleEncoder::MinBufferSize" bytes. These extra bytes won't be used // but not reserving them would cause the encoder to fail. - return 1 + RleEncoder::MaxBufferSize(bit_width(), buffered_indices_.size()) + + return 1 + + RleEncoder::MaxBufferSize( + bit_width(), static_cast(buffered_indices_.size())) + RleEncoder::MinBufferSize(bit_width()); } @@ -493,7 +496,8 @@ std::shared_ptr FlushValues() override { std::shared_ptr buffer = AllocateBuffer(this->allocator_, EstimatedDataEncodedSize()); - int result_size = WriteIndices(buffer->mutable_data(), EstimatedDataEncodedSize()); + int result_size = WriteIndices( + buffer->mutable_data(), static_cast(EstimatedDataEncodedSize())); ClearIndices(); PARQUET_THROW_NOT_OK(buffer->Resize(result_size, false)); return buffer; @@ -507,7 +511,7 @@ void PutSpaced(const T* src, int num_values, const uint8_t* valid_bits, int64_t valid_bits_offset) override { - INIT_BITSET(valid_bits, valid_bits_offset); + INIT_BITSET(valid_bits, static_cast(valid_bits_offset)); for (int32_t i = 0; i < num_values; i++) { if (bitset_valid_bits & (1 << bit_offset_valid_bits)) { Put(src[i]); } READ_NEXT_BITSET(valid_bits); @@ -521,7 +525,7 @@ ChunkedAllocator* mem_pool() { return pool_; } /// The number of entries in the dictionary. - int num_entries() const { return uniques_.size(); } + int num_entries() const { return static_cast(uniques_.size()); } private: ::arrow::MemoryPool* allocator_; @@ -607,7 +611,7 @@ if (index == HASH_SLOT_EMPTY) { // Not in the hash table, so we insert it now - index = uniques_.size(); + index = static_cast(uniques_.size()); hash_slots_[j] = index; AddDictKey(v); @@ -808,7 +812,7 @@ int64_t delta; if (!decoder_.GetValue(delta_bit_width_, &delta)) ParquetException::EofException(); delta += min_delta_; - last_value_ += delta; + last_value_ += static_cast(delta); buffer[i] = last_value_; --values_current_mini_block_; } diff -Nru apache-parquet-cpp-1.1.1.20170524/src/parquet/encoding-test.cc apache-parquet-cpp-1.1.1.20170612/src/parquet/encoding-test.cc --- apache-parquet-cpp-1.1.1.20170524/src/parquet/encoding-test.cc 2017-05-23 18:29:32.000000000 +0000 +++ apache-parquet-cpp-1.1.1.20170612/src/parquet/encoding-test.cc 2017-06-11 18:28:45.000000000 +0000 @@ -42,7 +42,7 @@ TEST(VectorBooleanTest, TestEncodeDecode) { // PARQUET-454 int nvalues = 10000; - int nbytes = BitUtil::Ceil(nvalues, 8); + int nbytes = static_cast(BitUtil::Ceil(nvalues, 8)); // seed the prng so failure is deterministic vector draws = flip_coins_seed(nvalues, 0.5, 0); @@ -58,7 +58,8 @@ vector decode_buffer(nbytes); const uint8_t* decode_data = &decode_buffer[0]; - decoder.SetData(nvalues, encode_buffer->data(), encode_buffer->size()); + decoder.SetData( + nvalues, encode_buffer->data(), static_cast(encode_buffer->size())); int values_decoded = decoder.Decode(&decode_buffer[0], nvalues); ASSERT_EQ(nvalues, values_decoded); @@ -218,7 +219,8 @@ encoder.Put(draws_, num_values_); encode_buffer_ = encoder.FlushValues(); - decoder.SetData(num_values_, encode_buffer_->data(), encode_buffer_->size()); + decoder.SetData( + num_values_, encode_buffer_->data(), static_cast(encode_buffer_->size())); int values_decoded = decoder.Decode(decode_buf_, num_values_); ASSERT_EQ(num_values_, values_decoded); VerifyResults(decode_buf_, draws_, num_values_); @@ -263,13 +265,13 @@ ASSERT_TRUE(indices_from_spaced->Equals(*indices)); PlainDecoder dict_decoder(descr_.get()); - dict_decoder.SetData( - encoder.num_entries(), dict_buffer_->data(), dict_buffer_->size()); + dict_decoder.SetData(encoder.num_entries(), dict_buffer_->data(), + static_cast(dict_buffer_->size())); DictionaryDecoder decoder(descr_.get()); decoder.SetDict(&dict_decoder); - decoder.SetData(num_values_, indices->data(), indices->size()); + decoder.SetData(num_values_, indices->data(), static_cast(indices->size())); int values_decoded = decoder.Decode(decode_buf_, num_values_); ASSERT_EQ(num_values_, values_decoded); @@ -279,7 +281,7 @@ VerifyResults(decode_buf_, draws_, num_values_); // Also test spaced decoding - decoder.SetData(num_values_, indices->data(), indices->size()); + decoder.SetData(num_values_, indices->data(), static_cast(indices->size())); values_decoded = decoder.DecodeSpaced(decode_buf_, num_values_, 0, valid_bits.data(), 0); ASSERT_EQ(num_values_, values_decoded); diff -Nru apache-parquet-cpp-1.1.1.20170524/src/parquet/file/file-deserialize-test.cc apache-parquet-cpp-1.1.1.20170612/src/parquet/file/file-deserialize-test.cc --- apache-parquet-cpp-1.1.1.20170524/src/parquet/file/file-deserialize-test.cc 2017-05-23 18:29:32.000000000 +0000 +++ apache-parquet-cpp-1.1.1.20170612/src/parquet/file/file-deserialize-test.cc 2017-06-11 18:28:45.000000000 +0000 @@ -192,7 +192,7 @@ std::vector buffer; for (int i = 0; i < num_pages; ++i) { const uint8_t* data = faux_data[i].data(); - int data_size = faux_data[i].size(); + int data_size = static_cast(faux_data[i].size()); int64_t max_compressed_size = codec->MaxCompressedLen(data_size, data); buffer.resize(max_compressed_size); @@ -200,7 +200,7 @@ int64_t actual_size = codec->Compress(data_size, data, max_compressed_size, &buffer[0]); - WriteDataPageHeader(1024, data_size, actual_size); + WriteDataPageHeader(1024, data_size, static_cast(actual_size)); out_stream_->Write(buffer.data(), actual_size); } @@ -209,7 +209,7 @@ std::shared_ptr page; const DataPage* data_page; for (int i = 0; i < num_pages; ++i) { - int data_size = faux_data[i].size(); + int data_size = static_cast(faux_data[i].size()); page = page_reader_->NextPage(); data_page = static_cast(page.get()); ASSERT_EQ(data_size, data_page->size()); diff -Nru apache-parquet-cpp-1.1.1.20170524/src/parquet/file/file-metadata-test.cc apache-parquet-cpp-1.1.1.20170612/src/parquet/file/file-metadata-test.cc --- apache-parquet-cpp-1.1.1.20170524/src/parquet/file/file-metadata-test.cc 2017-05-23 18:29:32.000000000 +0000 +++ apache-parquet-cpp-1.1.1.20170612/src/parquet/file/file-metadata-test.cc 2017-06-11 18:28:45.000000000 +0000 @@ -48,7 +48,7 @@ .set_min(std::string(reinterpret_cast(&int_min), 4)) .set_max(std::string(reinterpret_cast(&int_max), 4)); EncodedStatistics stats_float; - float float_min = 100.100, float_max = 200.200; + float float_min = 100.100f, float_max = 200.200f; stats_float.set_null_count(0) .set_distinct_count(nrows) .set_min(std::string(reinterpret_cast(&float_min), 4)) @@ -84,7 +84,7 @@ // file metadata ASSERT_EQ(nrows, f_accessor->num_rows()); - ASSERT_LE(0, f_accessor->size()); + ASSERT_LE(0, static_cast(f_accessor->size())); ASSERT_EQ(2, f_accessor->num_row_groups()); ASSERT_EQ(ParquetVersion::PARQUET_2_0, f_accessor->version()); ASSERT_EQ(DEFAULT_CREATED_BY, f_accessor->created_by()); diff -Nru apache-parquet-cpp-1.1.1.20170524/src/parquet/file/metadata.cc apache-parquet-cpp-1.1.1.20170612/src/parquet/file/metadata.cc --- apache-parquet-cpp-1.1.1.20170524/src/parquet/file/metadata.cc 2017-05-23 18:29:32.000000000 +0000 +++ apache-parquet-cpp-1.1.1.20170612/src/parquet/file/metadata.cc 2017-06-11 18:28:45.000000000 +0000 @@ -287,7 +287,7 @@ : row_group_(row_group), schema_(schema), writer_version_(writer_version) {} ~RowGroupMetaDataImpl() {} - inline int num_columns() const { return row_group_->columns.size(); } + inline int num_columns() const { return static_cast(row_group_->columns.size()); } inline int64_t num_rows() const { return row_group_->num_rows; } @@ -371,10 +371,14 @@ inline uint32_t size() const { return metadata_len_; } inline int num_columns() const { return schema_.num_columns(); } inline int64_t num_rows() const { return metadata_->num_rows; } - inline int num_row_groups() const { return metadata_->row_groups.size(); } + inline int num_row_groups() const { + return static_cast(metadata_->row_groups.size()); + } inline int32_t version() const { return metadata_->version; } inline const std::string& created_by() const { return metadata_->created_by; } - inline int num_schema_elements() const { return metadata_->schema.size(); } + inline int num_schema_elements() const { + return static_cast(metadata_->schema.size()); + } const ApplicationVersion& writer_version() const { return writer_version_; } @@ -404,7 +408,7 @@ std::unique_ptr metadata_; void InitSchema() { schema::FlatSchemaConverter converter( - &metadata_->schema[0], metadata_->schema.size()); + &metadata_->schema[0], static_cast(metadata_->schema.size())); schema_.Init(converter.Convert()); } SchemaDescriptor schema_; @@ -743,7 +747,7 @@ row_group_->__set_total_byte_size(total_byte_size); } - int num_columns() { return row_group_->columns.size(); } + int num_columns() { return static_cast(row_group_->columns.size()); } private: void InitializeColumns(int ncols) { row_group_->columns.resize(ncols); } diff -Nru apache-parquet-cpp-1.1.1.20170524/src/parquet/file/reader-internal.cc apache-parquet-cpp-1.1.1.20170612/src/parquet/file/reader-internal.cc --- apache-parquet-cpp-1.1.1.20170524/src/parquet/file/reader-internal.cc 2017-05-23 18:29:32.000000000 +0000 +++ apache-parquet-cpp-1.1.1.20170612/src/parquet/file/reader-internal.cc 2017-06-11 18:28:45.000000000 +0000 @@ -68,7 +68,7 @@ if (bytes_available == 0) { return std::shared_ptr(nullptr); } // This gets used, then set by DeserializeThriftMsg - header_size = bytes_available; + header_size = static_cast(bytes_available); try { DeserializeThriftMsg(buffer, &header_size, ¤t_page_header_); break; @@ -202,6 +202,8 @@ // ---------------------------------------------------------------------- // SerializedFile: Parquet on-disk layout +// PARQUET-978: Minimize footer reads by reading 64 KB from the end of the file +static constexpr int64_t DEFAULT_FOOTER_READ_SIZE = 64 * 1024; static constexpr uint32_t FOOTER_SIZE = 8; static constexpr uint8_t PARQUET_MAGIC[4] = {'P', 'A', 'R', '1'}; @@ -255,15 +257,19 @@ throw ParquetException("Corrupted file, smaller than file footer"); } - uint8_t footer_buffer[FOOTER_SIZE]; + uint8_t footer_buffer[DEFAULT_FOOTER_READ_SIZE]; + int64_t footer_read_size = std::min(file_size, DEFAULT_FOOTER_READ_SIZE); int64_t bytes_read = - source_->ReadAt(file_size - FOOTER_SIZE, FOOTER_SIZE, footer_buffer); + source_->ReadAt(file_size - footer_read_size, footer_read_size, footer_buffer); - if (bytes_read != FOOTER_SIZE || memcmp(footer_buffer + 4, PARQUET_MAGIC, 4) != 0) { + // Check if all bytes are read. Check if last 4 bytes read have the magic bits + if (bytes_read != footer_read_size || + memcmp(footer_buffer + footer_read_size - 4, PARQUET_MAGIC, 4) != 0) { throw ParquetException("Invalid parquet file. Corrupt footer."); } - uint32_t metadata_len = *reinterpret_cast(footer_buffer); + uint32_t metadata_len = + *reinterpret_cast(footer_buffer + footer_read_size - FOOTER_SIZE); int64_t metadata_start = file_size - FOOTER_SIZE - metadata_len; if (FOOTER_SIZE + metadata_len > file_size) { throw ParquetException( @@ -273,10 +279,17 @@ std::shared_ptr metadata_buffer = AllocateBuffer(properties_.memory_pool(), metadata_len); - bytes_read = - source_->ReadAt(metadata_start, metadata_len, metadata_buffer->mutable_data()); - if (bytes_read != metadata_len) { - throw ParquetException("Invalid parquet file. Could not read metadata bytes."); + + // Check if the footer_buffer contains the entire metadata + if (footer_read_size >= (metadata_len + FOOTER_SIZE)) { + memcpy(metadata_buffer->mutable_data(), + footer_buffer + (footer_read_size - metadata_len - FOOTER_SIZE), metadata_len); + } else { + bytes_read = + source_->ReadAt(metadata_start, metadata_len, metadata_buffer->mutable_data()); + if (bytes_read != metadata_len) { + throw ParquetException("Invalid parquet file. Could not read metadata bytes."); + } } file_metadata_ = FileMetaData::Make(metadata_buffer->data(), &metadata_len); diff -Nru apache-parquet-cpp-1.1.1.20170524/src/parquet/file/writer-internal.cc apache-parquet-cpp-1.1.1.20170612/src/parquet/file/writer-internal.cc --- apache-parquet-cpp-1.1.1.20170524/src/parquet/file/writer-internal.cc 2017-05-23 18:29:32.000000000 +0000 +++ apache-parquet-cpp-1.1.1.20170612/src/parquet/file/writer-internal.cc 2017-06-11 18:28:45.000000000 +0000 @@ -62,7 +62,6 @@ void SerializedPageWriter::Close(bool has_dictionary, bool fallback) { // index_page_offset = 0 since they are not supported - // TODO: Remove default fallback = 'false' when implemented metadata_->Finish(num_values_, dictionary_page_offset_, 0, data_page_offset_, total_compressed_size_, total_uncompressed_size_, has_dictionary, fallback); @@ -102,8 +101,8 @@ format::PageHeader page_header; page_header.__set_type(format::PageType::DATA_PAGE); - page_header.__set_uncompressed_page_size(uncompressed_size); - page_header.__set_compressed_page_size(compressed_data->size()); + page_header.__set_uncompressed_page_size(static_cast(uncompressed_size)); + page_header.__set_compressed_page_size(static_cast(compressed_data->size())); page_header.__set_data_page_header(data_page_header); // TODO(PARQUET-594) crc checksum @@ -140,8 +139,8 @@ format::PageHeader page_header; page_header.__set_type(format::PageType::DICTIONARY_PAGE); - page_header.__set_uncompressed_page_size(uncompressed_size); - page_header.__set_compressed_page_size(compressed_data->size()); + page_header.__set_uncompressed_page_size(static_cast(uncompressed_size)); + page_header.__set_compressed_page_size(static_cast(compressed_data->size())); page_header.__set_dictionary_page_header(dict_page_header); // TODO(PARQUET-594) crc checksum @@ -261,12 +260,12 @@ void FileSerializer::WriteMetaData() { // Write MetaData - uint32_t metadata_len = sink_->Tell(); + uint32_t metadata_len = static_cast(sink_->Tell()); // Get a FileMetaData auto metadata = metadata_->Finish(); metadata->WriteTo(sink_.get()); - metadata_len = sink_->Tell() - metadata_len; + metadata_len = static_cast(sink_->Tell()) - metadata_len; // Write Footer sink_->Write(reinterpret_cast(&metadata_len), 4); diff -Nru apache-parquet-cpp-1.1.1.20170524/src/parquet/parquet_version.h apache-parquet-cpp-1.1.1.20170612/src/parquet/parquet_version.h --- apache-parquet-cpp-1.1.1.20170524/src/parquet/parquet_version.h 1970-01-01 00:00:00.000000000 +0000 +++ apache-parquet-cpp-1.1.1.20170612/src/parquet/parquet_version.h 2017-06-11 18:28:45.000000000 +0000 @@ -0,0 +1,24 @@ +// Licensed to the Apache Software Foundation (ASF) under one +// or more contributor license agreements. See the NOTICE file +// distributed with this work for additional information +// regarding copyright ownership. The ASF licenses this file +// to you under the Apache License, Version 2.0 (the +// "License"); you may not use this file except in compliance +// with the License. You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, +// software distributed under the License is distributed on an +// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +// KIND, either express or implied. See the License for the +// specific language governing permissions and limitations +// under the License. + +#ifndef PARQUET_VERSION_H +#define PARQUET_VERSION_H + +// define the parquet created by version +#define CREATED_BY_VERSION "parquet-cpp version 1.1.1-SNAPSHOT" + +#endif // PARQUET_VERSION_H diff -Nru apache-parquet-cpp-1.1.1.20170524/src/parquet/parquet_version.h.in apache-parquet-cpp-1.1.1.20170612/src/parquet/parquet_version.h.in --- apache-parquet-cpp-1.1.1.20170524/src/parquet/parquet_version.h.in 1970-01-01 00:00:00.000000000 +0000 +++ apache-parquet-cpp-1.1.1.20170612/src/parquet/parquet_version.h.in 2017-06-11 18:28:45.000000000 +0000 @@ -0,0 +1,24 @@ +// Licensed to the Apache Software Foundation (ASF) under one +// or more contributor license agreements. See the NOTICE file +// distributed with this work for additional information +// regarding copyright ownership. The ASF licenses this file +// to you under the Apache License, Version 2.0 (the +// "License"); you may not use this file except in compliance +// with the License. You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, +// software distributed under the License is distributed on an +// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +// KIND, either express or implied. See the License for the +// specific language governing permissions and limitations +// under the License. + +#ifndef PARQUET_VERSION_H +#define PARQUET_VERSION_H + +// define the parquet created by version +#define CREATED_BY_VERSION "parquet-cpp version @PARQUET_VERSION@" + +#endif // PARQUET_VERSION_H diff -Nru apache-parquet-cpp-1.1.1.20170524/src/parquet/reader-test.cc apache-parquet-cpp-1.1.1.20170612/src/parquet/reader-test.cc --- apache-parquet-cpp-1.1.1.20170524/src/parquet/reader-test.cc 2017-05-23 18:29:32.000000000 +0000 +++ apache-parquet-cpp-1.1.1.20170612/src/parquet/reader-test.cc 2017-06-11 18:28:45.000000000 +0000 @@ -83,12 +83,14 @@ ASSERT_EQ(8, reader_->metadata()->num_rows()); // This file only has 1 row group ASSERT_EQ(1, reader_->metadata()->num_row_groups()); + // Size of the metadata is 730 bytes + ASSERT_EQ(730, reader_->metadata()->size()); // This row group must have 8 rows ASSERT_EQ(8, group->metadata()->num_rows()); ASSERT_TRUE(col->HasNext()); int64_t values_read; - int levels_read = col->ReadBatch(4, def_levels, rep_levels, values, &values_read); + auto levels_read = col->ReadBatch(4, def_levels, rep_levels, values, &values_read); ASSERT_EQ(4, levels_read); ASSERT_EQ(4, values_read); diff -Nru apache-parquet-cpp-1.1.1.20170524/src/parquet/schema.cc apache-parquet-cpp-1.1.1.20170612/src/parquet/schema.cc --- apache-parquet-cpp-1.1.1.20170524/src/parquet/schema.cc 2017-05-23 18:29:32.000000000 +0000 +++ apache-parquet-cpp-1.1.1.20170612/src/parquet/schema.cc 2017-06-11 18:28:45.000000000 +0000 @@ -361,7 +361,7 @@ } std::shared_ptr FromParquet(const std::vector& schema) { - FlatSchemaConverter converter(&schema[0], schema.size()); + FlatSchemaConverter converter(&schema[0], static_cast(schema.size())); std::unique_ptr root = converter.Convert(); std::shared_ptr descr = std::make_shared(); @@ -594,7 +594,7 @@ } else { // Primitive node, append to leaves leaves_.push_back(ColumnDescriptor(node, max_def_level, max_rep_level, this)); - leaf_to_base_.emplace(leaves_.size() - 1, base); + leaf_to_base_.emplace(static_cast(leaves_.size()) - 1, base); } } diff -Nru apache-parquet-cpp-1.1.1.20170524/src/parquet/schema.h apache-parquet-cpp-1.1.1.20170612/src/parquet/schema.h --- apache-parquet-cpp-1.1.1.20170524/src/parquet/schema.h 2017-05-23 18:29:32.000000000 +0000 +++ apache-parquet-cpp-1.1.1.20170612/src/parquet/schema.h 2017-06-11 18:28:45.000000000 +0000 @@ -250,7 +250,7 @@ const NodePtr& field(int i) const { return fields_[i]; } - int field_count() const { return fields_.size(); } + int field_count() const { return static_cast(fields_.size()); } void ToParquet(void* opaque_element) const override; void Visit(Visitor* visitor) override; @@ -365,7 +365,7 @@ bool Equals(const SchemaDescriptor& other) const; // The number of physical columns appearing in the file - int num_columns() const { return leaves_.size(); } + int num_columns() const { return static_cast(leaves_.size()); } const schema::NodePtr& schema_root() const { return schema_; } diff -Nru apache-parquet-cpp-1.1.1.20170524/src/parquet/schema-test.cc apache-parquet-cpp-1.1.1.20170612/src/parquet/schema-test.cc --- apache-parquet-cpp-1.1.1.20170524/src/parquet/schema-test.cc 2017-05-23 18:29:32.000000000 +0000 +++ apache-parquet-cpp-1.1.1.20170612/src/parquet/schema-test.cc 2017-06-11 18:28:45.000000000 +0000 @@ -385,7 +385,7 @@ elements.push_back( NewPrimitive("item", FieldRepetitionType::OPTIONAL, format::Type::INT64, 4)); - Convert(&elements[0], elements.size()); + Convert(&elements[0], static_cast(elements.size())); // Construct the expected schema NodeVector fields; diff -Nru apache-parquet-cpp-1.1.1.20170524/src/parquet/types-test.cc apache-parquet-cpp-1.1.1.20170612/src/parquet/types-test.cc --- apache-parquet-cpp-1.1.1.20170524/src/parquet/types-test.cc 2017-05-23 18:29:32.000000000 +0000 +++ apache-parquet-cpp-1.1.1.20170612/src/parquet/types-test.cc 2017-06-11 18:28:45.000000000 +0000 @@ -78,8 +78,8 @@ ASSERT_STREQ("10240000000000", FormatStatValue(Type::INT64, smin.c_str()).c_str()); ASSERT_STREQ("20480000000000", FormatStatValue(Type::INT64, smax.c_str()).c_str()); - float float_min = 1.024; - float float_max = 2.048; + float float_min = 1.024f; + float float_max = 2.048f; smin = std::string(reinterpret_cast(&float_min), sizeof(float)); smax = std::string(reinterpret_cast(&float_max), sizeof(float)); ASSERT_STREQ("1.024", FormatStatValue(Type::FLOAT, smin.c_str()).c_str()); diff -Nru apache-parquet-cpp-1.1.1.20170524/src/parquet/util/bit-stream-utils.h apache-parquet-cpp-1.1.1.20170612/src/parquet/util/bit-stream-utils.h --- apache-parquet-cpp-1.1.1.20170524/src/parquet/util/bit-stream-utils.h 2017-05-23 18:29:32.000000000 +0000 +++ apache-parquet-cpp-1.1.1.20170612/src/parquet/util/bit-stream-utils.h 2017-06-11 18:28:45.000000000 +0000 @@ -49,7 +49,9 @@ /// The number of current bytes written, including the current byte (i.e. may include a /// fraction of a byte). Includes buffered values. - int bytes_written() const { return byte_offset_ + BitUtil::Ceil(bit_offset_, 8); } + int bytes_written() const { + return byte_offset_ + static_cast(BitUtil::Ceil(bit_offset_, 8)); + } uint8_t* buffer() const { return buffer_; } int buffer_len() const { return max_bytes_; } @@ -144,7 +146,9 @@ /// Returns the number of bytes left in the stream, not including the current /// byte (i.e., there may be an additional fraction of a byte). - int bytes_left() { return max_bytes_ - (byte_offset_ + BitUtil::Ceil(bit_offset_, 8)); } + int bytes_left() { + return max_bytes_ - (byte_offset_ + static_cast(BitUtil::Ceil(bit_offset_, 8))); + } /// Maximum byte length of a vlq encoded int static const int MAX_VLQ_BYTE_LEN = 5; diff -Nru apache-parquet-cpp-1.1.1.20170524/src/parquet/util/bit-stream-utils.inline.h apache-parquet-cpp-1.1.1.20170612/src/parquet/util/bit-stream-utils.inline.h --- apache-parquet-cpp-1.1.1.20170524/src/parquet/util/bit-stream-utils.inline.h 2017-05-23 18:29:32.000000000 +0000 +++ apache-parquet-cpp-1.1.1.20170612/src/parquet/util/bit-stream-utils.inline.h 2017-06-11 18:28:45.000000000 +0000 @@ -50,7 +50,7 @@ } inline void BitWriter::Flush(bool align) { - int num_bytes = BitUtil::Ceil(bit_offset_, 8); + int num_bytes = static_cast(BitUtil::Ceil(bit_offset_, 8)); DCHECK_LE(byte_offset_ + num_bytes, max_bytes_); memcpy(buffer_ + byte_offset_, &buffered_values_, num_bytes); @@ -91,8 +91,15 @@ template inline void GetValue_(int num_bits, T* v, int max_bytes, const uint8_t* buffer, int* bit_offset, int* byte_offset, uint64_t* buffered_values) { - *v = BitUtil::TrailingBits(*buffered_values, *bit_offset + num_bits) >> *bit_offset; - +#ifdef _MSC_VER +#pragma warning(push) +#pragma warning(disable : 4800) +#endif + *v = static_cast( + BitUtil::TrailingBits(*buffered_values, *bit_offset + num_bits) >> *bit_offset); +#ifdef _MSC_VER +#pragma warning(pop) +#endif *bit_offset += num_bits; if (*bit_offset >= 64) { *byte_offset += 8; @@ -104,10 +111,16 @@ } else { memcpy(buffered_values, buffer + *byte_offset, bytes_remaining); } - +#ifdef _MSC_VER +#pragma warning(push) +#pragma warning(disable : 4800 4805) +#endif // Read bits of v that crossed into new buffered_values_ *v |= BitUtil::TrailingBits(*buffered_values, *bit_offset) << (num_bits - *bit_offset); +#ifdef _MSC_VER +#pragma warning(pop) +#endif DCHECK_LE(*bit_offset, 64); } } @@ -132,7 +145,9 @@ uint64_t needed_bits = num_bits * batch_size; uint64_t remaining_bits = (max_bytes - byte_offset) * 8 - bit_offset; - if (remaining_bits < needed_bits) { batch_size = remaining_bits / num_bits; } + if (remaining_bits < needed_bits) { + batch_size = static_cast(remaining_bits) / num_bits; + } int i = 0; if (UNLIKELY(bit_offset != 0)) { @@ -156,7 +171,14 @@ unpack_buffer, unpack_size, num_bits); if (num_unpacked == 0) { break; } for (int k = 0; k < num_unpacked; ++k) { +#ifdef _MSC_VER +#pragma warning(push) +#pragma warning(disable : 4800) +#endif v[i + k] = unpack_buffer[k]; +#ifdef _MSC_VER +#pragma warning(pop) +#endif } i += num_unpacked; byte_offset += num_unpacked * num_bits / 8; @@ -185,7 +207,7 @@ template inline bool BitReader::GetAligned(int num_bytes, T* v) { DCHECK_LE(num_bytes, static_cast(sizeof(T))); - int bytes_read = BitUtil::Ceil(bit_offset_, 8); + int bytes_read = static_cast(BitUtil::Ceil(bit_offset_, 8)); if (UNLIKELY(byte_offset_ + bytes_read + num_bytes > max_bytes_)) return false; // Advance byte_offset to next unread byte and read num_bytes @@ -227,7 +249,7 @@ int32_t u_signed; if (!GetVlqInt(&u_signed)) return false; uint32_t u = static_cast(u_signed); - *reinterpret_cast(v) = (u >> 1) ^ -(u & 1); + *reinterpret_cast(v) = (u >> 1) ^ -(static_cast(u & 1)); return true; } diff -Nru apache-parquet-cpp-1.1.1.20170524/src/parquet/util/bit-util.h apache-parquet-cpp-1.1.1.20170612/src/parquet/util/bit-util.h --- apache-parquet-cpp-1.1.1.20170524/src/parquet/util/bit-util.h 2017-05-23 18:29:32.000000000 +0000 +++ apache-parquet-cpp-1.1.1.20170612/src/parquet/util/bit-util.h 2017-06-11 18:28:45.000000000 +0000 @@ -312,7 +312,7 @@ } static inline bool GetArrayBit(const uint8_t* bits, int i) { - return bits[i / 8] & (1 << (i % 8)); + return (bits[i / 8] & (1 << (i % 8))) != 0; } static inline void SetArrayBit(uint8_t* bits, int i, bool is_set) { diff -Nru apache-parquet-cpp-1.1.1.20170524/src/parquet/util/CMakeLists.txt apache-parquet-cpp-1.1.1.20170612/src/parquet/util/CMakeLists.txt --- apache-parquet-cpp-1.1.1.20170524/src/parquet/util/CMakeLists.txt 2017-05-23 18:29:32.000000000 +0000 +++ apache-parquet-cpp-1.1.1.20170612/src/parquet/util/CMakeLists.txt 2017-06-11 18:28:45.000000000 +0000 @@ -39,6 +39,11 @@ target_link_libraries(parquet_benchmark_main gbenchmark ) + elseif(WIN32) + target_link_libraries(parquet_benchmark_main + gbenchmark + shlwapi.lib # workaround for bug(?) in gbenchmark: unresolved external symbol __imp_SHGetValueA + ) else() target_link_libraries(parquet_benchmark_main gbenchmark diff -Nru apache-parquet-cpp-1.1.1.20170524/src/parquet/util/comparison-test.cc apache-parquet-cpp-1.1.1.20170612/src/parquet/util/comparison-test.cc --- apache-parquet-cpp-1.1.1.20170524/src/parquet/util/comparison-test.cc 2017-05-23 18:29:32.000000000 +0000 +++ apache-parquet-cpp-1.1.1.20170612/src/parquet/util/comparison-test.cc 2017-06-11 18:28:45.000000000 +0000 @@ -34,7 +34,7 @@ static ByteArray ByteArrayFromString(const std::string& s) { auto ptr = reinterpret_cast(s.data()); - return ByteArray(s.size(), ptr); + return ByteArray(static_cast(s.size()), ptr); } static FLBA FLBAFromString(const std::string& s) { @@ -68,7 +68,7 @@ auto arr2 = FLBAFromString(b); NodePtr node = PrimitiveNode::Make("FLBA", Repetition::REQUIRED, - Type::FIXED_LEN_BYTE_ARRAY, LogicalType::NONE, a.size()); + Type::FIXED_LEN_BYTE_ARRAY, LogicalType::NONE, static_cast(a.size())); ColumnDescriptor descr(node, 0, 0); Compare less(&descr); ASSERT_TRUE(less(arr1, arr2)); diff -Nru apache-parquet-cpp-1.1.1.20170524/src/parquet/util/cpu-info.cc apache-parquet-cpp-1.1.1.20170612/src/parquet/util/cpu-info.cc --- apache-parquet-cpp-1.1.1.20170524/src/parquet/util/cpu-info.cc 2017-05-23 18:29:32.000000000 +0000 +++ apache-parquet-cpp-1.1.1.20170612/src/parquet/util/cpu-info.cc 2017-06-11 18:28:45.000000000 +0000 @@ -112,7 +112,7 @@ // that when impala is running, the core will not be in a lower power state. // TODO: is there a more robust way to do this, such as // Window's QueryPerformanceFrequency() - float mhz = atof(value.c_str()); + float mhz = static_cast(atof(value.c_str())); max_mhz = max(mhz, max_mhz); } else if (name.compare("processor") == 0) { ++num_cores; @@ -148,7 +148,7 @@ #endif if (max_mhz != 0) { - cycles_per_ms_ = max_mhz * 1000; + cycles_per_ms_ = static_cast(max_mhz) * 1000; } else { cycles_per_ms_ = 1000000; } diff -Nru apache-parquet-cpp-1.1.1.20170524/src/parquet/util/hash-util.h apache-parquet-cpp-1.1.1.20170612/src/parquet/util/hash-util.h --- apache-parquet-cpp-1.1.1.20170524/src/parquet/util/hash-util.h 2017-05-23 18:29:32.000000000 +0000 +++ apache-parquet-cpp-1.1.1.20170612/src/parquet/util/hash-util.h 2017-06-11 18:28:45.000000000 +0000 @@ -216,7 +216,7 @@ return MurmurHash2_64(data, bytes, seed); } #else - return MurmurHash2_64(data, bytes, seed); + return static_cast(MurmurHash2_64(data, bytes, seed)); #endif } diff -Nru apache-parquet-cpp-1.1.1.20170524/src/parquet/util/memory.cc apache-parquet-cpp-1.1.1.20170612/src/parquet/util/memory.cc --- apache-parquet-cpp-1.1.1.20170524/src/parquet/util/memory.cc 2017-05-23 18:29:32.000000000 +0000 +++ apache-parquet-cpp-1.1.1.20170612/src/parquet/util/memory.cc 2017-06-11 18:28:45.000000000 +0000 @@ -275,7 +275,7 @@ char str[16]; out << "ChunkedAllocator(#chunks=" << chunks_.size() << " ["; for (size_t i = 0; i < chunks_.size(); ++i) { - sprintf(str, "0x%lx=", reinterpret_cast(chunks_[i].data)); // NOLINT + sprintf(str, "0x%zx=", reinterpret_cast(chunks_[i].data)); // NOLINT out << (i > 0 ? " " : "") << str << chunks_[i].size << "/" << chunks_[i].allocated_bytes; } diff -Nru apache-parquet-cpp-1.1.1.20170524/src/parquet/util/memory.h apache-parquet-cpp-1.1.1.20170612/src/parquet/util/memory.h --- apache-parquet-cpp-1.1.1.20170524/src/parquet/util/memory.h 2017-05-23 18:29:32.000000000 +0000 +++ apache-parquet-cpp-1.1.1.20170612/src/parquet/util/memory.h 2017-06-11 18:28:45.000000000 +0000 @@ -46,7 +46,7 @@ #define PARQUET_IGNORE_NOT_OK(s) \ try { \ (s); \ - } catch (const ::parquet::ParquetException& e) {} + } catch (const ::parquet::ParquetException& e) { UNUSED(e); } #define PARQUET_THROW_NOT_OK(s) \ do { \ @@ -222,7 +222,7 @@ /// Return offset to unoccpied space in current chunk. int GetFreeOffset() const { if (current_chunk_idx_ == -1) return 0; - return chunks_[current_chunk_idx_].allocated_bytes; + return static_cast(chunks_[current_chunk_idx_].allocated_bytes); } template diff -Nru apache-parquet-cpp-1.1.1.20170524/src/parquet/util/rle-encoding.h apache-parquet-cpp-1.1.1.20170612/src/parquet/util/rle-encoding.h --- apache-parquet-cpp-1.1.1.20170524/src/parquet/util/rle-encoding.h 2017-05-23 18:29:32.000000000 +0000 +++ apache-parquet-cpp-1.1.1.20170612/src/parquet/util/rle-encoding.h 2017-06-11 18:28:45.000000000 +0000 @@ -168,9 +168,10 @@ static int MinBufferSize(int bit_width) { /// 1 indicator byte and MAX_VALUES_PER_LITERAL_RUN 'bit_width' values. int max_literal_run_size = - 1 + BitUtil::Ceil(MAX_VALUES_PER_LITERAL_RUN * bit_width, 8); + 1 + static_cast(BitUtil::Ceil(MAX_VALUES_PER_LITERAL_RUN * bit_width, 8)); /// Up to MAX_VLQ_BYTE_LEN indicator and a single 'bit_width' value. - int max_repeated_run_size = BitReader::MAX_VLQ_BYTE_LEN + BitUtil::Ceil(bit_width, 8); + int max_repeated_run_size = + BitReader::MAX_VLQ_BYTE_LEN + static_cast(BitUtil::Ceil(bit_width, 8)); return std::max(max_literal_run_size, max_repeated_run_size); } @@ -181,14 +182,15 @@ // 8 values per smallest run, 8 bits per byte // int bytes_per_run = BitUtil::Ceil(bit_width * 8, 8); int bytes_per_run = bit_width; - int num_runs = BitUtil::Ceil(num_values, 8); + int num_runs = static_cast(BitUtil::Ceil(num_values, 8)); int literal_max_size = num_runs + num_runs * bytes_per_run; // In the very worst case scenario, the data is a concatenation of repeated // runs of 8 values. Repeated run has a 1 byte varint followed by the // bit-packed repeated value - int min_repeated_run_size = 1 + BitUtil::Ceil(bit_width, 8); - int repeated_max_size = BitUtil::Ceil(num_values, 8) * min_repeated_run_size; + int min_repeated_run_size = 1 + static_cast(BitUtil::Ceil(bit_width, 8)); + int repeated_max_size = + static_cast(BitUtil::Ceil(num_values, 8)) * min_repeated_run_size; return std::max(literal_max_size, repeated_max_size); } @@ -286,8 +288,8 @@ if (repeat_count_ > 0) { int repeat_batch = std::min(batch_size - values_read, static_cast(repeat_count_)); - std::fill( - values + values_read, values + values_read + repeat_batch, current_value_); + std::fill(values + values_read, values + values_read + repeat_batch, + static_cast(current_value_)); repeat_count_ -= repeat_batch; values_read += repeat_batch; } else if (literal_count_ > 0) { @@ -349,10 +351,10 @@ DCHECK_GE(bit_width_, 0); int values_read = 0; int remaining_nulls = null_count; - INIT_BITSET(valid_bits, valid_bits_offset); + INIT_BITSET(valid_bits, static_cast(valid_bits_offset)); while (values_read < batch_size) { - bool is_valid = (bitset_valid_bits & (1 << bit_offset_valid_bits)); + bool is_valid = (bitset_valid_bits & (1 << bit_offset_valid_bits)) != 0; READ_NEXT_BITSET(valid_bits); if (is_valid) { @@ -431,8 +433,9 @@ literal_count_ = (indicator_value >> 1) * 8; } else { repeat_count_ = indicator_value >> 1; - bool result = bit_reader_.GetAligned( - BitUtil::Ceil(bit_width_, 8), reinterpret_cast(¤t_value_)); + bool result = + bit_reader_.GetAligned(static_cast(BitUtil::Ceil(bit_width_, 8)), + reinterpret_cast(¤t_value_)); DCHECK(result); } return true; @@ -507,7 +510,8 @@ // The lsb of 0 indicates this is a repeated run int32_t indicator_value = repeat_count_ << 1 | 0; result &= bit_writer_.PutVlqInt(indicator_value); - result &= bit_writer_.PutAligned(current_value_, BitUtil::Ceil(bit_width_, 8)); + result &= bit_writer_.PutAligned( + current_value_, static_cast(BitUtil::Ceil(bit_width_, 8))); DCHECK(result); num_buffered_values_ = 0; repeat_count_ = 0; diff -Nru apache-parquet-cpp-1.1.1.20170524/src/parquet/util/rle-test.cc apache-parquet-cpp-1.1.1.20170612/src/parquet/util/rle-test.cc --- apache-parquet-cpp-1.1.1.20170524/src/parquet/util/rle-test.cc 2017-05-23 18:29:32.000000000 +0000 +++ apache-parquet-cpp-1.1.1.20170612/src/parquet/util/rle-test.cc 2017-06-11 18:28:45.000000000 +0000 @@ -80,7 +80,7 @@ bool val = false; bool result = reader.GetValue(1, &val); EXPECT_TRUE(result); - EXPECT_EQ(val, i % 2); + EXPECT_EQ(val, (i % 2) != 0); } for (int i = 0; i < 8; ++i) { @@ -103,7 +103,7 @@ // Writes 'num_vals' values with width 'bit_width' and reads them back. void TestBitArrayValues(int bit_width, int num_vals) { - int len = BitUtil::Ceil(bit_width * num_vals, 8); + int len = static_cast(BitUtil::Ceil(bit_width * num_vals, 8)); EXPECT_TRUE(len > 0); const uint64_t mod = bit_width == 64 ? 1 : 1LL << bit_width; @@ -210,7 +210,8 @@ { RleDecoder decoder(buffer, len, bit_width); vector values_read(values.size()); - ASSERT_EQ(values.size(), decoder.GetBatch(values_read.data(), values.size())); + ASSERT_EQ(values.size(), + decoder.GetBatch(values_read.data(), static_cast(values.size()))); EXPECT_EQ(values, values_read); } } @@ -241,7 +242,7 @@ RleDecoder decoder(buffer, len, bit_width); vector values_read(values.size()); if (static_cast(values.size()) != - decoder.GetBatch(values_read.data(), values.size())) { + decoder.GetBatch(values_read.data(), static_cast(values.size()))) { return false; } if (values != values_read) { return false; } @@ -274,14 +275,14 @@ } for (int width = 9; width <= MAX_WIDTH; ++width) { - ValidateRle(values, width, NULL, 2 * (1 + BitUtil::Ceil(width, 8))); + ValidateRle(values, width, NULL, 2 * (1 + static_cast(BitUtil::Ceil(width, 8)))); } // Test 100 0's and 1's alternating for (int i = 0; i < 100; ++i) { values[i] = i % 2; } - int num_groups = BitUtil::Ceil(100, 8); + int num_groups = static_cast(BitUtil::Ceil(100, 8)); expected_buffer[0] = (num_groups << 1) | 1; for (int i = 1; i <= 100 / 8; ++i) { expected_buffer[i] = BOOST_BINARY(1 0 1 0 1 0 1 0); @@ -292,8 +293,9 @@ // num_groups and expected_buffer only valid for bit width = 1 ValidateRle(values, 1, expected_buffer, 1 + num_groups); for (int width = 2; width <= MAX_WIDTH; ++width) { - int num_values = BitUtil::Ceil(100, 8) * 8; - ValidateRle(values, width, NULL, 1 + BitUtil::Ceil(width * num_values, 8)); + int num_values = static_cast(BitUtil::Ceil(100, 8)) * 8; + ValidateRle( + values, width, NULL, 1 + static_cast(BitUtil::Ceil(width * num_values, 8))); } } @@ -446,7 +448,7 @@ for (int i = 0; i < num_added; ++i) { bool result = decoder.Get(&v); EXPECT_TRUE(result); - EXPECT_EQ(v, parity); + EXPECT_EQ(v != 0, parity); parity = !parity; } // Make sure we get false when reading past end a couple times. diff -Nru apache-parquet-cpp-1.1.1.20170524/src/parquet/util/visibility.h apache-parquet-cpp-1.1.1.20170612/src/parquet/util/visibility.h --- apache-parquet-cpp-1.1.1.20170524/src/parquet/util/visibility.h 2017-05-23 18:29:32.000000000 +0000 +++ apache-parquet-cpp-1.1.1.20170612/src/parquet/util/visibility.h 2017-06-11 18:28:45.000000000 +0000 @@ -19,6 +19,17 @@ #define PARQUET_UTIL_VISIBILITY_H #if defined(_WIN32) || defined(__CYGWIN__) +#ifdef _MSC_VER +#pragma warning(push) +// Disable warning for STL types usage in DLL interface https://web.archive.org/web/20130317015847/http://connect.microsoft.com/VisualStudio/feedback/details/696593/vc-10-vs-2010-basic-string-exports +#pragma warning(disable : 4275 4251) +// Disable diamond inheritance warnings +#pragma warning(disable : 4250) +// Disable macro redefinition warnings +#pragma warning(disable : 4005) +// Disable extern before exported template warnings +#pragma warning(disable : 4910) +#endif #define PARQUET_EXPORT __declspec(dllexport) #define PARQUET_NO_EXPORT #else // Not Windows @@ -30,4 +41,21 @@ #endif #endif // Non-Windows +// gcc and clang disagree about how to handle template visibility when you have +// explicit specializations https://llvm.org/bugs/show_bug.cgi?id=24815 + +#if defined(__clang__) +#define PARQUET_EXTERN_TEMPLATE extern template class PARQUET_EXPORT +#else +#define PARQUET_EXTERN_TEMPLATE extern template class +#endif + +// This is a complicated topic, some reading on it: +// http://www.codesynthesis.com/~boris/blog/2010/01/18/dll-export-cxx-templates/ +#ifdef _MSC_VER +#define PARQUET_TEMPLATE_EXPORT PARQUET_EXPORT +#else +#define PARQUET_TEMPLATE_EXPORT +#endif + #endif // PARQUET_UTIL_VISIBILITY_H