diff -Nru gmap-2016-11-07/ChangeLog gmap-2017-01-14/ChangeLog --- gmap-2016-11-07/ChangeLog 2016-11-08 01:14:55.000000000 +0000 +++ gmap-2017-01-14/ChangeLog 2017-01-14 01:46:30.000000000 +0000 @@ -1,3 +1,186 @@ +2017-01-14 twu + + * stage3hr.c: Fixed a memory leak in resolving inner splices + +2017-01-13 twu + + * dynprog_end.c: Fixed conditional jump based on finalscore, by not checking + when endalign is QUERYEND_NOGAPS + + * stage1hr.c: Fixed uninitialized value for successp. Using FREE_ALIGN + macro + + * spanningelt.c: Using MALLOC_ALIGN instead of MALLOC when needed + + * indexdb_hr.c: Using MALLOC_ALIGN instead of MALLOC when needed + + * oligoindex_hr.c: Including atoi.h + + * samprint.c, substring.c, substring.h: Fixed coordinates reported in XT + field, which depend on the donor and acceptor strands + + * merge.c: Using macros FREE_ALIGN and CHECK_ALIGN + + * mem.h: Defined macros FREE_ALIGN and CHECK_ALIGN + +2017-01-10 twu + + * genome128_hr.c: Fixed incorrect AVX macro + + * oligoindex_hr.c: Changed _mm_bsrli_si128 to _mm_srli_si128. Added atoi + and ttoc modes to all code. + +2017-01-09 twu + + * gsnap.c: Removed option --microexon-spliceprob + +2017-01-06 twu + + * stage1hr.c: Using alignments with most matches, even if they are + translocations compared with other hitpairs + +2017-01-02 twu + + * genome128_hr.c: For handling middle rows, using <= and >= to endptr and + startptr, instead of < and > + +2017-01-01 twu + + * stage3.c: Using new interface to Dynprog_end5_gap and Dynprog_end3_gap + + * stage1hr.c: In identify_all_segments, filtering out diagonals < + querylength from the merged array + + * dynprog_single.c: Using use8p_size + + * dynprog_simd.h: Removing fixed definition for SIMD_MAXLENGTH_EPI8 + + * dynprog_simd.c: Added assertions for traceback procedures for vertical and + horizontal jumps not to go past the main diagonal. Put macros around + memory fences in debugging print procedures. + + * dynprog_end.c, dynprog_end.h: Using use8p_size and introduced parameter + require_pos_score_p + + * dynprog_cdna.c, dynprog_genome.c: Using use8p_size + + * dynprog.c, dynprog.h: Introducing an array for use8p_size that depends on + the mismatch type + +2016-12-30 twu + + * stage3hr.c: Not converting splices when resolving insides of + paired-end-reads + +2016-12-29 twu + + * dynprog_genome.c, gsnap.c, pair.c, pair.h, sarray-read.c, smooth.c, src, + stage1hr.c, stage1hr.h, stage2.c, stage3.c, stage3.h, stage3hr.c, + stage3hr.h, substring.c, substring.h, trunk, uniqscan.c: Merged revisions + 201789 through 202030 from branches/2016-12-18-stage2-soa to make various + improvements to alignments + + * stage1hr.c: Added debugging statements + + * indexdb_hr.c: Checking for nmerged being 0 + +2016-12-16 twu + + * ax_ext.m4: Not adding -mno options to an Intel compiler + + * indexdb_hr.c: Returning an array created by malloc, rather than + _mm_malloc, from the merge version of Indexdb_merge_compoundpos + + * sarray-read.c: Using qsort instead of Sedgesort, because of seg faults + observed on Intel compiler + + * Makefile.gsnaptoo.am: Including merge.c, merge.h, merge-heap.c, and + merge-heap.h where needed + + * stage1hr.c: Providing a version of identify_all_segments for LARGE_GENOMES + + * indexdb_hr.c: Cleaned up code so there are three versions of + Indexdb_merge_compoundpos. Fixed the merge version. + + * oligoindex_hr.c: Fixed faulty svn merge + + * genome128_hr.c: Fixed faulty svn merge, and hid shift_lo and shift_hi + procedures + + * Makefile.gsnaptoo.am, indexdb_hr.c, mem.h, merge-heap.c, merge-heap.h, + merge.c, merge.h, src, stage1hr.c, trunk: Merged revisions 200992 through + 201743 from branches/2016-11-28-simd-merging to revise SIMD merge code + + * spanningelt.c, spanningelt.h: Merged revisions 200992 through 201743 from + branches/2016-11-28-simd-merging to change a calloc to a malloc + + * Makefile.gsnaptoo.am, ax_cpuid_intel.m4, ax_cpuid_non_intel.m4, ax_ext.m4, + configure.ac, cpuid.c, src, trunk: Merged revisions 200476 through 201735 + from branches/2016-11-14-avx512 to make provisions for AVX-512 + + * gmap.c: Merged revisions 200476 through 201735 from + branches/2016-11-14-avx512 to change Genome_hr_user_setup to + Genome_hr_setup + + * gmap_select.c, gmapl_select.c, gsnap_select.c, gsnapl_select.c: Merged + revisions 200476 through 201735 from branches/2016-11-14-avx512 to add + provisions for AVX-512 + + * genome128_hr.c, genome128_hr.h: Merged revisions 200476 through 201735 + from branches/2016-11-14-avx512 to add shift and wrap procedures + + * oligoindex_hr.c, oligoindex_hr.h: Merged revisions 200476 through 201735 + from branches/2016-11-14-avx512 to revise algorithms substantially + + * oligoindex_old.c, oligoindex_old.h: Merged revisions 200476 through 201735 + from branches/2016-11-14-avx512 to make checking code work with current + code + + * stage2.c: Merged revisions 200476 through 201735 from + branches/2016-11-14-avx512 to fix debugging comment + + * sarray-read.c: Merged revisions 200476 through 201735 from + branches/2016-11-14-avx512 to add AVX-512 code + + * stage1hr.c: Fixed uninitialized variable + +2016-12-13 twu + + * VERSION, config.site.rescomp.prd, config.site.rescomp.tst, genome128_hr.c, + src, trunk: Merged revisions 201421 through 201532 from + branches/2016-12-09-genomebits-serial-simd to change structure of SIMD + code in genome128_hr.c + + * index.html: Updated for version 2016-11-07 + + * configure.ac: Allowing sse4.1 and sse4.2 as responses to --with-simd-level + + * samprint.c: Added missing pair of braces + + * gsnap.c, stage1hr.c, stage1hr.h: Removed references to indel_knownsplice + mode for gmap + +2016-11-18 twu + + * oligoindex_hr.c: Fixed debugging statements to use SIMD commands in count + procedures + +2016-11-16 twu + + * ax_ext.m4: Removed -mno... flags for compilers + + * configure.ac: Restricting response to --with-simd-level + + * ax_cpuid_intel.m4: Fixed configure issue for AVX2 support using Intel + compiler + +2016-11-14 twu + + * pair.c: Removed initialization of static variables + + * gsnap.c, outbuffer.c, outbuffer.h, output.c, output.h: Separate output + files for single-end and paired-end results + 2016-11-08 twu * sam_sort.c: Added printing at monitor intervals diff -Nru gmap-2016-11-07/config/ax_cpuid_intel.m4 gmap-2017-01-14/config/ax_cpuid_intel.m4 --- gmap-2016-11-07/config/ax_cpuid_intel.m4 2016-05-06 23:05:47.000000000 +0000 +++ gmap-2017-01-14/config/ax_cpuid_intel.m4 2016-12-16 16:31:07.000000000 +0000 @@ -43,7 +43,16 @@ [AC_LANG_PROGRAM([[#include ]], [[return _may_i_use_cpu_feature(_FEATURE_AVX2 | _FEATURE_FMA | _FEATURE_BMI | _FEATURE_LZCNT | _FEATURE_MOVBE) ? 0 : 9;]])], [AC_MSG_RESULT(yes) - ax_cv_cpu_has_sse42_ext=yes], + ax_cv_cpu_has_avx2_ext=yes], + [AC_MSG_RESULT(no)]) + +# Test for AVX512 support + AC_MSG_CHECKING(for avx512 support) + AC_RUN_IFELSE( + [AC_LANG_PROGRAM([[#include ]], + [[return _may_i_use_cpu_feature(_FEATURE_AVX512F | _FEATURE_AVX512CD) ? 0 : 9;]])], + [AC_MSG_RESULT(yes) + ax_cv_cpu_has_avx512_ext=yes], [AC_MSG_RESULT(no)]) AC_LANG_POP([C]) diff -Nru gmap-2016-11-07/config/ax_cpuid_non_intel.m4 gmap-2017-01-14/config/ax_cpuid_non_intel.m4 --- gmap-2016-11-07/config/ax_cpuid_non_intel.m4 2016-05-06 23:05:39.000000000 +0000 +++ gmap-2017-01-14/config/ax_cpuid_non_intel.m4 2016-12-16 16:31:07.000000000 +0000 @@ -118,15 +118,15 @@ if ((abcd[/*ECX*/2] & fma_movbe_osxsave_mask) != fma_movbe_osxsave_mask) { return 9; } else if (!check_xcr0_ymm()) { - return 9; + return 8; } else { run_cpuid(7, 0, abcd); if ((abcd[/*EBX*/1] & avx2_bmi12_mask) != avx2_bmi12_mask) { - return 9; + return 7; } else { run_cpuid(0x80000001, 0, abcd); if ((abcd[/*ECX*/2] & lzcnt_mask) != lzcnt_mask) { - return 9; + return 6; } else { return 0; } @@ -152,5 +152,36 @@ ax_cv_cpu_has_bmi2_ext=yes], [AC_MSG_RESULT(no)]) + +# Test for AVX512 support + AC_MSG_CHECKING(for avx512 support) + AC_RUN_IFELSE( + [AC_LANG_PROGRAM([[#include +static void run_cpuid (uint32_t eax, uint32_t ecx, uint32_t *abcd) { + uint32_t ebx, edx; + __asm__ ("cpuid" : "+b" (ebx), "+a" (eax), "+c" (ecx), "=d" (edx)); + abcd[0] = eax; abcd[1] = ebx; abcd[2] = ecx; abcd[3] = edx;} +static int check_xcr0_zmm () { + uint32_t xcr0; + uint32_t zmm_ymm_xmm = ((7 << 5) | (1 << 2) | (1 << 1)); + __asm__ ("xgetbv" : "=a" (xcr0) : "c" (0) : "%edx"); + return ((xcr0 & zmm_ymm_xmm) == zmm_ymm_xmm);}]], +[[uint32_t abcd[4]; + uint32_t osxsave_mask = (1 << 27); + uint32_t avx512_mask = (/*512F*/(1 << 16) | /*512CD*/(1 << 28)); + run_cpuid(1, 0, abcd); + if ((abcd[/*ECX*/2] & osxsave_mask) != osxsave_mask) { + return 9; + } else if (!check_xcr0_zmm()) { + return 8; + } else if ((abcd[/*EBX*/1] & avx512_mask) != avx512_mask) { + return 0; /* Should fail here, but book/Web examples skip */ + } else { + return 0; + }]])], + [AC_MSG_RESULT(yes) + ax_cv_cpu_has_avx512_ext=yes], + [AC_MSG_RESULT(no)]) + AC_LANG_POP([C]) ]) diff -Nru gmap-2016-11-07/config/ax_ext.m4 gmap-2017-01-14/config/ax_ext.m4 --- gmap-2016-11-07/config/ax_ext.m4 2016-06-06 19:19:17.000000000 +0000 +++ gmap-2017-01-14/config/ax_ext.m4 2016-12-16 21:39:38.000000000 +0000 @@ -93,7 +93,6 @@ TEST_CFLAGS="-mssse3" else TEST_CFLAGS="$SIMD_SSE2_CFLAGS -mssse3" - SIMD_SSE2_CFLAGS="$SIMD_SSE2_CFLAGS -mno-ssse3" fi AX_CHECK_COMPILE_FLAG([$TEST_CFLAGS], [ax_cv_compile_ssse3_ext=yes], [ax_cv_ext_compile_problem=yes]) if test x"$ax_cv_compile_ssse3_ext" != xyes; then @@ -111,6 +110,9 @@ AC_MSG_RESULT([yes]) ax_make_ssse3=yes SIMD_SSSE3_CFLAGS=$CFLAGS + if test x"$ax_cv_c_compiler_vendor" != xintel; then + SIMD_SSE2_CFLAGS="$SIMD_SSE2_CFLAGS -mno-ssse3" + fi # AC_DEFINE(HAVE_SSSE3,1,[Define to 1 if you support SSSE3 (Supplemental Streaming SIMD Extensions 3) instructions]) -- Defines run-type fi fi @@ -122,8 +124,6 @@ TEST_CFLAGS="-msse4.1" else TEST_CFLAGS="$SIMD_SSSE3_CFLAGS -msse4.1" - SIMD_SSE2_CFLAGS="$SIMD_SSE2_CFLAGS -mno-sse4.1" - SIMD_SSSE3_CFLAGS="$SIMD_SSSE3_CFLAGS -mno-sse4.1" fi AX_CHECK_COMPILE_FLAG([$TEST_CFLAGS], [ax_cv_compile_sse41_ext=yes], [ax_cv_ext_compile_problem=yes]) if test x"$ax_cv_compile_sse41_ext" != xyes; then @@ -141,6 +141,10 @@ AC_MSG_RESULT([yes]) ax_make_sse41=yes SIMD_SSE4_1_CFLAGS=$CFLAGS + if test x"$ax_cv_c_compiler_vendor" != xintel; then + SIMD_SSE2_CFLAGS="$SIMD_SSE2_CFLAGS -mno-sse4.1" + SIMD_SSSE3_CFLAGS="$SIMD_SSSE3_CFLAGS -mno-sse4.1" + fi # AC_DEFINE(HAVE_SSE4_1,1,[Define to 1 if you support SSE4.1 (Streaming SIMD Extensions 4.1) instructions]) -- Not used fi fi @@ -152,9 +156,6 @@ TEST_CFLAGS="-march=corei7" else TEST_CFLAGS="$SIMD_SSE4_1_CFLAGS -msse4.2" - SIMD_SSE2_CFLAGS="$SIMD_SSE2_CFLAGS -mno-sse4.2" - SIMD_SSSE3_CFLAGS="$SIMD_SSSE3_CFLAGS -mno-sse4.2" - SIMD_SSE4_1_CFLAGS="$SIMD_SSE4_1_CFLAGS -mno-sse4.2" fi AX_CHECK_COMPILE_FLAG([$TEST_CFLAGS], [ax_cv_compile_sse42_ext=yes], [ax_cv_ext_compile_problem=yes]) if test x"$ax_cv_compile_sse42_ext" != xyes; then @@ -172,6 +173,11 @@ AC_MSG_RESULT([yes]) ax_make_sse42=yes SIMD_SSE4_2_CFLAGS=$CFLAGS + if test x"$ax_cv_c_compiler_vendor" != xintel; then + SIMD_SSE2_CFLAGS="$SIMD_SSE2_CFLAGS -mno-sse4.2" + SIMD_SSSE3_CFLAGS="$SIMD_SSSE3_CFLAGS -mno-sse4.2" + SIMD_SSE4_1_CFLAGS="$SIMD_SSE4_1_CFLAGS -mno-sse4.2" + fi fi fi fi @@ -267,10 +273,6 @@ TEST_CFLAGS="-march=core-avx2" else TEST_CFLAGS="$SIMD_SSE4_2_CFLAGS -mavx2 -mbmi2" - SIMD_SSE2_CFLAGS="$SIMD_SSE2_CFLAGS -mno-avx2 -mno-bmi2" - SIMD_SSSE3_CFLAGS="$SIMD_SSSE3_CFLAGS -mno-avx2 -mno-bmi2" - SIMD_SSE4_1_CFLAGS="$SIMD_SSE4_1_CFLAGS -mno-avx2 -mno-bmi2" - SIMD_SSE4_2_CFLAGS="$SIMD_SSE4_2_CFLAGS -mno-avx2 -mno-bmi2" fi AX_CHECK_COMPILE_FLAG([$TEST_CFLAGS], [ax_cv_compile_avx2_ext=yes], [ax_cv_ext_compile_problem=yes]) if test x"$ax_cv_compile_avx2_ext" != xyes; then @@ -288,6 +290,12 @@ AC_MSG_RESULT([yes]) ax_make_avx2=yes SIMD_AVX2_CFLAGS=$CFLAGS + if test x"$ax_cv_c_compiler_vendor" != xintel; then + SIMD_SSE2_CFLAGS="$SIMD_SSE2_CFLAGS -mno-avx2" + SIMD_SSSE3_CFLAGS="$SIMD_SSSE3_CFLAGS -mno-avx2" + SIMD_SSE4_1_CFLAGS="$SIMD_SSE4_1_CFLAGS -mno-avx2" + SIMD_SSE4_2_CFLAGS="$SIMD_SSE4_2_CFLAGS -mno-avx2" + fi # AC_DEFINE(HAVE_AVX2,1,[Define to 1 if you support AVX2 (Advanced Vector Extensions 2) instructions]) -- Defines run-type fi fi @@ -312,6 +320,12 @@ AC_MSG_WARN([Your compiler supports -mbmi2 but not your linker. Can you try another linker or update yours?]) else SIMD_AVX2_CFLAGS=$CFLAGS + if test x"$ax_cv_c_compiler_vendor" != xintel; then + SIMD_SSE2_CFLAGS="$SIMD_SSE2_CFLAGS -mno-bmi2" + SIMD_SSSE3_CFLAGS="$SIMD_SSSE3_CFLAGS -mno-bmi2" + SIMD_SSE4_1_CFLAGS="$SIMD_SSE4_1_CFLAGS -mno-bmi2" + SIMD_SSE4_2_CFLAGS="$SIMD_SSE4_2_CFLAGS -mno-bmi2" + fi AC_MSG_CHECKING(for _pext support) AC_RUN_IFELSE( [AC_LANG_PROGRAM([[#include ]], @@ -323,6 +337,41 @@ fi fi + + if test x"$ax_cv_cpu_has_avx512_ext" = xyes; then + CFLAGS= + if test x"$ax_cv_c_compiler_vendor" = xintel; then + TEST_CFLAGS="-xCOMMON-AVX512" + else + TEST_CFLAGS="$SIMD_AVX2_CFLAGS -mavx512f -mavx512cd" + fi + AX_CHECK_COMPILE_FLAG([$TEST_CFLAGS], [ax_cv_compile_avx512_ext=yes], [ax_cv_ext_compile_problem=yes]) + if test x"$ax_cv_compile_avx512_ext" != xyes; then + AC_MSG_WARN([Your CPU supports AVX512 instructions but not your compiler. Can you try another compiler or update yours?]) + else + CFLAGS=$TEST_CFLAGS + AC_MSG_CHECKING(for nmmintrin.h header file) + AC_LINK_IFELSE([AC_LANG_PROGRAM([#include ])], + [ax_cv_link_nmmintrin_h=yes], + [ax_cv_ext_linker_problem=yes]) + if test x"$ax_cv_link_nmmintrin_h" != xyes; then + AC_MSG_RESULT([no]) + AC_MSG_WARN([Your compiler supports AVX512 instructions but not your linker.]) + else + AC_MSG_RESULT([yes]) + ax_make_avx512=yes + SIMD_AVX512_CFLAGS=$CFLAGS + if test x"$ax_cv_c_compiler_vendor" != xintel; then + SIMD_SSE2_CFLAGS="$SIMD_SSE2_CFLAGS -mno-avx512f -mno-avx512cd" + SIMD_SSSE3_CFLAGS="$SIMD_SSSE3_CFLAGS -mno-avx512f -mno-avx512cd" + SIMD_SSE4_1_CFLAGS="$SIMD_SSE4_1_CFLAGS -mno-avx512f -mno-avx512cd" + SIMD_SSE4_2_CFLAGS="$SIMD_SSE4_2_CFLAGS -mno-avx512f -mno-avx512cd" + SIMD_AVX2_CFLAGS="$SIMD_AVX2_CFLAGS -mno-avx512f -mno-avx512cd" + fi + fi + fi + fi + ;; esac @@ -333,6 +382,7 @@ AC_SUBST(SIMD_SSE4_1_CFLAGS) AC_SUBST(SIMD_SSE4_2_CFLAGS) AC_SUBST(SIMD_AVX2_CFLAGS) + AC_SUBST(SIMD_AVX512_CFLAGS) ]) diff -Nru gmap-2016-11-07/configure gmap-2017-01-14/configure --- gmap-2016-11-07/configure 2016-11-08 01:15:24.000000000 +0000 +++ gmap-2017-01-14/configure 2017-01-13 23:46:35.000000000 +0000 @@ -1,6 +1,6 @@ #! /bin/sh # Guess values for system-dependent variables and create Makefiles. -# Generated by GNU Autoconf 2.69 for gmap 2016-11-07. +# Generated by GNU Autoconf 2.69 for gmap 2017-01-14. # # Report bugs to >. # @@ -590,8 +590,8 @@ # Identity of this package. PACKAGE_NAME='gmap' PACKAGE_TARNAME='gmap' -PACKAGE_VERSION='2016-11-07' -PACKAGE_STRING='gmap 2016-11-07' +PACKAGE_VERSION='2017-01-14' +PACKAGE_STRING='gmap 2017-01-14' PACKAGE_BUGREPORT='Thomas Wu ' PACKAGE_URL='' @@ -650,6 +650,9 @@ MAKE_SSE4_2_TRUE MAKE_AVX2_FALSE MAKE_AVX2_TRUE +MAKE_AVX512_FALSE +MAKE_AVX512_TRUE +SIMD_AVX512_CFLAGS SIMD_AVX2_CFLAGS SIMD_SSE4_2_CFLAGS SIMD_SSE4_1_CFLAGS @@ -1372,7 +1375,7 @@ # Omit some internal or obsolete options to make the list less imposing. # This message is too long to be a string in the A/UX 3.1 sh. cat <<_ACEOF -\`configure' configures gmap 2016-11-07 to adapt to many kinds of systems. +\`configure' configures gmap 2017-01-14 to adapt to many kinds of systems. Usage: $0 [OPTION]... [VAR=VALUE]... @@ -1443,7 +1446,7 @@ if test -n "$ac_init_help"; then case $ac_init_help in - short | recursive ) echo "Configuration of gmap 2016-11-07:";; + short | recursive ) echo "Configuration of gmap 2017-01-14:";; esac cat <<\_ACEOF @@ -1495,8 +1498,8 @@ --with-sysroot[=DIR] Search for dependent libraries within DIR (or the compiler's sysroot if not specified). --with-simd-level=STRING - User-selected SIMD level (sse2, ssse3, sse41, sse42, - avx2) + User-selected SIMD level (none, sse2, ssse3, + sse41/sse4.1, sse42/sse4.2, avx2, avx512) --with-gmapdb=DIR Default GMAP database directory Some influential environment variables: @@ -1582,7 +1585,7 @@ test -n "$ac_init_help" && exit $ac_status if $ac_init_version; then cat <<\_ACEOF -gmap configure 2016-11-07 +gmap configure 2017-01-14 generated by GNU Autoconf 2.69 Copyright (C) 2012 Free Software Foundation, Inc. @@ -2188,7 +2191,7 @@ This file contains any messages produced by compilers while running configure, to aid debugging if configure makes a mistake. -It was created by gmap $as_me 2016-11-07, which was +It was created by gmap $as_me 2017-01-14, which was generated by GNU Autoconf 2.69. Invocation command line was $ $0 $@ @@ -2538,8 +2541,8 @@ { $as_echo "$as_me:${as_lineno-$LINENO}: checking package version" >&5 $as_echo_n "checking package version... " >&6; } -{ $as_echo "$as_me:${as_lineno-$LINENO}: result: 2016-11-07" >&5 -$as_echo "2016-11-07" >&6; } +{ $as_echo "$as_me:${as_lineno-$LINENO}: result: 2017-01-14" >&5 +$as_echo "2017-01-14" >&6; } ### Read defaults @@ -4404,7 +4407,7 @@ # Define the identity of the package. PACKAGE='gmap' - VERSION='2016-11-07' + VERSION='2017-01-14' cat >>confdefs.h <<_ACEOF @@ -17151,30 +17154,6 @@ -#AC_MSG_CHECKING(whether sse2 is enabled) -#AC_ARG_ENABLE([sse2], -# AC_HELP_STRING([--enable-sse2], -# [Enable sse2 simd commands if they compile and run (default=yes).]), -# [answer="$enableval"], -# [answer=""]) -#case x"$answer" in -# xyes) -# AC_MSG_RESULT(enabled) -# ax_cv_want_sse2_ext=yes -# ;; -# -# xno) -# AC_MSG_RESULT(disabled by user) -# ax_cv_want_sse2_ext=no -# ;; -# -# x) -# AC_MSG_RESULT([not specified so enabled by default]) -# ax_cv_want_sse2_ext=yes -# ;; -#esac - - @@ -17226,125 +17205,6 @@ - -#AC_MSG_CHECKING(whether ssse3 is enabled) -#AC_ARG_ENABLE([ssse3], -# AC_HELP_STRING([--enable-ssse3], -# [Enable ssse3 simd commands if they compile and run (default=yes). Requires that sse2 be enabled.]), -# [answer="$enableval"], -# [answer=""]) -#if test "$ax_cv_want_sse2_ext" = no; then -# AC_MSG_RESULT([disabled because the user disabled sse2]) -# ax_cv_want_ssse3_ext=no -#else -# case x"$answer" in -# xyes) -# AC_MSG_RESULT(enabled) -# ax_cv_want_ssse3_ext=yes -# ;; -# -# xno) -# AC_MSG_RESULT(disabled by user) -# ax_cv_want_ssse3_ext=no -# ;; -# -# x) -# AC_MSG_RESULT([not specified so enabled by default]) -# ax_cv_want_ssse3_ext=yes -# ;; -# esac -#fi - - - -#AC_MSG_CHECKING(whether sse4.1 is enabled) -#AC_ARG_ENABLE([sse4.1], -# AC_HELP_STRING([--enable-sse4.1], -# [Enable sse4.1 simd commands if they compile and run (default=yes). Requires that ssse3 be enabled.]), -# [answer="$enableval"], -# [answer=""]) -#if test "$ax_cv_want_ssse3_ext" = no; then -# AC_MSG_RESULT([disabled because the user disabled ssse3]) -# ax_cv_want_sse41_ext=no -#else -# case x"$answer" in -# xyes) -# AC_MSG_RESULT(enabled) -# ax_cv_want_sse41_ext=yes -# ;; -# -# xno) -# AC_MSG_RESULT(disabled by user) -# ax_cv_want_sse41_ext=no -# ;; -# -# x) -# AC_MSG_RESULT([not specified so enabled by default]) -# ax_cv_want_sse41_ext=yes -# ;; -# esac -#fi - - -#AC_MSG_CHECKING(whether sse4.2 is enabled) -#AC_ARG_ENABLE([sse4.2], -# AC_HELP_STRING([--enable-sse4.2], -# [Enable sse4.2 simd commands if they compile and run (default=yes). Requires that sse4.1 be enabled.]), -# [answer="$enableval"], -# [answer=""]) -#if test "$ax_cv_want_sse41_ext" = no; then -# AC_MSG_RESULT([disabled because the user disabled sse4.1]) -# ax_cv_want_sse42_ext=no -#else -# case x"$answer" in -# xyes) -# AC_MSG_RESULT(enabled) -# ax_cv_want_sse42_ext=yes -# ;; -# -# xno) -# AC_MSG_RESULT(disabled by user) -# ax_cv_want_sse42_ext=no -# ;; -# -# x) -# AC_MSG_RESULT([not specified so enabled by default]) -# ax_cv_want_sse42_ext=yes -# ;; -# esac -#fi - - -#AC_MSG_CHECKING(whether avx2 is enabled) -#AC_ARG_ENABLE([avx2], -# AC_HELP_STRING([--enable-avx2], -# [Enable avx2 simd commands if they compile and run (default=yes). Requires that avx be enabled.]), -# [answer="$enableval"], -# [answer=""]) -#if test "$ax_cv_want_avx_ext" = no; then -# AC_MSG_RESULT([disabled because the user disabled avx]) -# ax_cv_want_avx2_ext=no -#else -# case x"$answer" in -# xyes) -# AC_MSG_RESULT(enabled) -# ax_cv_want_avx2_ext=yes -# ;; -# -# xno) -# AC_MSG_RESULT(disabled by user) -# ax_cv_want_avx2_ext=no -# ;; -# -# x) -# AC_MSG_RESULT([not specified so enabled by default]) -# ax_cv_want_avx2_ext=yes -# ;; -# esac -#fi - - - { $as_echo "$as_me:${as_lineno-$LINENO}: checking for whether simd is enabled" >&5 $as_echo_n "checking for whether simd is enabled... " >&6; } # Check whether --enable-simd was given. @@ -17538,7 +17398,40 @@ if ac_fn_c_try_run "$LINENO"; then : { $as_echo "$as_me:${as_lineno-$LINENO}: result: yes" >&5 $as_echo "yes" >&6; } - ax_cv_cpu_has_sse42_ext=yes + ax_cv_cpu_has_avx2_ext=yes +else + { $as_echo "$as_me:${as_lineno-$LINENO}: result: no" >&5 +$as_echo "no" >&6; } +fi +rm -f core *.core core.conftest.* gmon.out bb.out conftest$ac_exeext \ + conftest.$ac_objext conftest.beam conftest.$ac_ext +fi + + +# Test for AVX512 support + { $as_echo "$as_me:${as_lineno-$LINENO}: checking for avx512 support" >&5 +$as_echo_n "checking for avx512 support... " >&6; } + if test "$cross_compiling" = yes; then : + { { $as_echo "$as_me:${as_lineno-$LINENO}: error: in \`$ac_pwd':" >&5 +$as_echo "$as_me: error: in \`$ac_pwd':" >&2;} +as_fn_error $? "cannot run test program while cross compiling +See \`config.log' for more details" "$LINENO" 5; } +else + cat confdefs.h - <<_ACEOF >conftest.$ac_ext +/* end confdefs.h. */ +#include +int +main () +{ +return _may_i_use_cpu_feature(_FEATURE_AVX512F | _FEATURE_AVX512CD) ? 0 : 9; + ; + return 0; +} +_ACEOF +if ac_fn_c_try_run "$LINENO"; then : + { $as_echo "$as_me:${as_lineno-$LINENO}: result: yes" >&5 +$as_echo "yes" >&6; } + ax_cv_cpu_has_avx512_ext=yes else { $as_echo "$as_me:${as_lineno-$LINENO}: result: no" >&5 $as_echo "no" >&6; } @@ -17834,15 +17727,15 @@ if ((abcd[/*ECX*/2] & fma_movbe_osxsave_mask) != fma_movbe_osxsave_mask) { return 9; } else if (!check_xcr0_ymm()) { - return 9; + return 8; } else { run_cpuid(7, 0, abcd); if ((abcd[/*EBX*/1] & avx2_bmi12_mask) != avx2_bmi12_mask) { - return 9; + return 7; } else { run_cpuid(0x80000001, 0, abcd); if ((abcd[/*ECX*/2] & lzcnt_mask) != lzcnt_mask) { - return 9; + return 6; } else { return 0; } @@ -17905,6 +17798,61 @@ fi + +# Test for AVX512 support + { $as_echo "$as_me:${as_lineno-$LINENO}: checking for avx512 support" >&5 +$as_echo_n "checking for avx512 support... " >&6; } + if test "$cross_compiling" = yes; then : + { { $as_echo "$as_me:${as_lineno-$LINENO}: error: in \`$ac_pwd':" >&5 +$as_echo "$as_me: error: in \`$ac_pwd':" >&2;} +as_fn_error $? "cannot run test program while cross compiling +See \`config.log' for more details" "$LINENO" 5; } +else + cat confdefs.h - <<_ACEOF >conftest.$ac_ext +/* end confdefs.h. */ +#include +static void run_cpuid (uint32_t eax, uint32_t ecx, uint32_t *abcd) { + uint32_t ebx, edx; + __asm__ ("cpuid" : "+b" (ebx), "+a" (eax), "+c" (ecx), "=d" (edx)); + abcd[0] = eax; abcd[1] = ebx; abcd[2] = ecx; abcd[3] = edx;} +static int check_xcr0_zmm () { + uint32_t xcr0; + uint32_t zmm_ymm_xmm = ((7 << 5) | (1 << 2) | (1 << 1)); + __asm__ ("xgetbv" : "=a" (xcr0) : "c" (0) : "%edx"); + return ((xcr0 & zmm_ymm_xmm) == zmm_ymm_xmm);} +int +main () +{ +uint32_t abcd[4]; + uint32_t osxsave_mask = (1 << 27); + uint32_t avx512_mask = (/*512F*/(1 << 16) | /*512CD*/(1 << 28)); + run_cpuid(1, 0, abcd); + if ((abcd[/*ECX*/2] & osxsave_mask) != osxsave_mask) { + return 9; + } else if (!check_xcr0_zmm()) { + return 8; + } else if ((abcd[/*EBX*/1] & avx512_mask) != avx512_mask) { + return 0; /* Should fail here, but book/Web examples skip */ + } else { + return 0; + } + ; + return 0; +} +_ACEOF +if ac_fn_c_try_run "$LINENO"; then : + { $as_echo "$as_me:${as_lineno-$LINENO}: result: yes" >&5 +$as_echo "yes" >&6; } + ax_cv_cpu_has_avx512_ext=yes +else + { $as_echo "$as_me:${as_lineno-$LINENO}: result: no" >&5 +$as_echo "no" >&6; } +fi +rm -f core *.core core.conftest.* gmon.out bb.out conftest$ac_exeext \ + conftest.$ac_objext conftest.beam conftest.$ac_ext +fi + + ac_ext=c ac_cpp='$CPP $CPPFLAGS' ac_compile='$CC -c $CFLAGS $CPPFLAGS conftest.$ac_ext >&5' @@ -18068,7 +18016,6 @@ TEST_CFLAGS="-mssse3" else TEST_CFLAGS="$SIMD_SSE2_CFLAGS -mssse3" - SIMD_SSE2_CFLAGS="$SIMD_SSE2_CFLAGS -mno-ssse3" fi as_CACHEVAR=`$as_echo "ax_cv_check_cflags__$TEST_CFLAGS" | $as_tr_sh` { $as_echo "$as_me:${as_lineno-$LINENO}: checking whether C compiler accepts $TEST_CFLAGS" >&5 @@ -18142,6 +18089,9 @@ $as_echo "yes" >&6; } ax_make_ssse3=yes SIMD_SSSE3_CFLAGS=$CFLAGS + if test x"$ax_cv_c_compiler_vendor" != xintel; then + SIMD_SSE2_CFLAGS="$SIMD_SSE2_CFLAGS -mno-ssse3" + fi # AC_DEFINE(HAVE_SSSE3,1,[Define to 1 if you support SSSE3 (Supplemental Streaming SIMD Extensions 3) instructions]) -- Defines run-type fi fi @@ -18153,8 +18103,6 @@ TEST_CFLAGS="-msse4.1" else TEST_CFLAGS="$SIMD_SSSE3_CFLAGS -msse4.1" - SIMD_SSE2_CFLAGS="$SIMD_SSE2_CFLAGS -mno-sse4.1" - SIMD_SSSE3_CFLAGS="$SIMD_SSSE3_CFLAGS -mno-sse4.1" fi as_CACHEVAR=`$as_echo "ax_cv_check_cflags__$TEST_CFLAGS" | $as_tr_sh` { $as_echo "$as_me:${as_lineno-$LINENO}: checking whether C compiler accepts $TEST_CFLAGS" >&5 @@ -18228,6 +18176,10 @@ $as_echo "yes" >&6; } ax_make_sse41=yes SIMD_SSE4_1_CFLAGS=$CFLAGS + if test x"$ax_cv_c_compiler_vendor" != xintel; then + SIMD_SSE2_CFLAGS="$SIMD_SSE2_CFLAGS -mno-sse4.1" + SIMD_SSSE3_CFLAGS="$SIMD_SSSE3_CFLAGS -mno-sse4.1" + fi # AC_DEFINE(HAVE_SSE4_1,1,[Define to 1 if you support SSE4.1 (Streaming SIMD Extensions 4.1) instructions]) -- Not used fi fi @@ -18239,9 +18191,6 @@ TEST_CFLAGS="-march=corei7" else TEST_CFLAGS="$SIMD_SSE4_1_CFLAGS -msse4.2" - SIMD_SSE2_CFLAGS="$SIMD_SSE2_CFLAGS -mno-sse4.2" - SIMD_SSSE3_CFLAGS="$SIMD_SSSE3_CFLAGS -mno-sse4.2" - SIMD_SSE4_1_CFLAGS="$SIMD_SSE4_1_CFLAGS -mno-sse4.2" fi as_CACHEVAR=`$as_echo "ax_cv_check_cflags__$TEST_CFLAGS" | $as_tr_sh` { $as_echo "$as_me:${as_lineno-$LINENO}: checking whether C compiler accepts $TEST_CFLAGS" >&5 @@ -18315,6 +18264,11 @@ $as_echo "yes" >&6; } ax_make_sse42=yes SIMD_SSE4_2_CFLAGS=$CFLAGS + if test x"$ax_cv_c_compiler_vendor" != xintel; then + SIMD_SSE2_CFLAGS="$SIMD_SSE2_CFLAGS -mno-sse4.2" + SIMD_SSSE3_CFLAGS="$SIMD_SSSE3_CFLAGS -mno-sse4.2" + SIMD_SSE4_1_CFLAGS="$SIMD_SSE4_1_CFLAGS -mno-sse4.2" + fi fi fi fi @@ -18621,10 +18575,6 @@ TEST_CFLAGS="-march=core-avx2" else TEST_CFLAGS="$SIMD_SSE4_2_CFLAGS -mavx2 -mbmi2" - SIMD_SSE2_CFLAGS="$SIMD_SSE2_CFLAGS -mno-avx2 -mno-bmi2" - SIMD_SSSE3_CFLAGS="$SIMD_SSSE3_CFLAGS -mno-avx2 -mno-bmi2" - SIMD_SSE4_1_CFLAGS="$SIMD_SSE4_1_CFLAGS -mno-avx2 -mno-bmi2" - SIMD_SSE4_2_CFLAGS="$SIMD_SSE4_2_CFLAGS -mno-avx2 -mno-bmi2" fi as_CACHEVAR=`$as_echo "ax_cv_check_cflags__$TEST_CFLAGS" | $as_tr_sh` { $as_echo "$as_me:${as_lineno-$LINENO}: checking whether C compiler accepts $TEST_CFLAGS" >&5 @@ -18698,6 +18648,12 @@ $as_echo "yes" >&6; } ax_make_avx2=yes SIMD_AVX2_CFLAGS=$CFLAGS + if test x"$ax_cv_c_compiler_vendor" != xintel; then + SIMD_SSE2_CFLAGS="$SIMD_SSE2_CFLAGS -mno-avx2" + SIMD_SSSE3_CFLAGS="$SIMD_SSSE3_CFLAGS -mno-avx2" + SIMD_SSE4_1_CFLAGS="$SIMD_SSE4_1_CFLAGS -mno-avx2" + SIMD_SSE4_2_CFLAGS="$SIMD_SSE4_2_CFLAGS -mno-avx2" + fi # AC_DEFINE(HAVE_AVX2,1,[Define to 1 if you support AVX2 (Advanced Vector Extensions 2) instructions]) -- Defines run-type fi fi @@ -18775,6 +18731,12 @@ $as_echo "$as_me: WARNING: Your compiler supports -mbmi2 but not your linker. Can you try another linker or update yours?" >&2;} else SIMD_AVX2_CFLAGS=$CFLAGS + if test x"$ax_cv_c_compiler_vendor" != xintel; then + SIMD_SSE2_CFLAGS="$SIMD_SSE2_CFLAGS -mno-bmi2" + SIMD_SSSE3_CFLAGS="$SIMD_SSSE3_CFLAGS -mno-bmi2" + SIMD_SSE4_1_CFLAGS="$SIMD_SSE4_1_CFLAGS -mno-bmi2" + SIMD_SSE4_2_CFLAGS="$SIMD_SSE4_2_CFLAGS -mno-bmi2" + fi { $as_echo "$as_me:${as_lineno-$LINENO}: checking for _pext support" >&5 $as_echo_n "checking for _pext support... " >&6; } if test "$cross_compiling" = yes; then : @@ -18812,6 +18774,97 @@ fi fi + + if test x"$ax_cv_cpu_has_avx512_ext" = xyes; then + CFLAGS= + if test x"$ax_cv_c_compiler_vendor" = xintel; then + TEST_CFLAGS="-xCOMMON-AVX512" + else + TEST_CFLAGS="$SIMD_AVX2_CFLAGS -mavx512f -mavx512cd" + fi + as_CACHEVAR=`$as_echo "ax_cv_check_cflags__$TEST_CFLAGS" | $as_tr_sh` +{ $as_echo "$as_me:${as_lineno-$LINENO}: checking whether C compiler accepts $TEST_CFLAGS" >&5 +$as_echo_n "checking whether C compiler accepts $TEST_CFLAGS... " >&6; } +if eval \${$as_CACHEVAR+:} false; then : + $as_echo_n "(cached) " >&6 +else + + ax_check_save_flags=$CFLAGS + CFLAGS="$CFLAGS $TEST_CFLAGS" + cat confdefs.h - <<_ACEOF >conftest.$ac_ext +/* end confdefs.h. */ + +int +main () +{ + + ; + return 0; +} +_ACEOF +if ac_fn_c_try_compile "$LINENO"; then : + eval "$as_CACHEVAR=yes" +else + eval "$as_CACHEVAR=no" +fi +rm -f core conftest.err conftest.$ac_objext conftest.$ac_ext + CFLAGS=$ax_check_save_flags +fi +eval ac_res=\$$as_CACHEVAR + { $as_echo "$as_me:${as_lineno-$LINENO}: result: $ac_res" >&5 +$as_echo "$ac_res" >&6; } +if test x"`eval 'as_val=${'$as_CACHEVAR'};$as_echo "$as_val"'`" = xyes; then : + ax_cv_compile_avx512_ext=yes +else + ax_cv_ext_compile_problem=yes +fi + + if test x"$ax_cv_compile_avx512_ext" != xyes; then + { $as_echo "$as_me:${as_lineno-$LINENO}: WARNING: Your CPU supports AVX512 instructions but not your compiler. Can you try another compiler or update yours?" >&5 +$as_echo "$as_me: WARNING: Your CPU supports AVX512 instructions but not your compiler. Can you try another compiler or update yours?" >&2;} + else + CFLAGS=$TEST_CFLAGS + { $as_echo "$as_me:${as_lineno-$LINENO}: checking for nmmintrin.h header file" >&5 +$as_echo_n "checking for nmmintrin.h header file... " >&6; } + cat confdefs.h - <<_ACEOF >conftest.$ac_ext +/* end confdefs.h. */ +#include +int +main () +{ + + ; + return 0; +} +_ACEOF +if ac_fn_c_try_link "$LINENO"; then : + ax_cv_link_nmmintrin_h=yes +else + ax_cv_ext_linker_problem=yes +fi +rm -f core conftest.err conftest.$ac_objext \ + conftest$ac_exeext conftest.$ac_ext + if test x"$ax_cv_link_nmmintrin_h" != xyes; then + { $as_echo "$as_me:${as_lineno-$LINENO}: result: no" >&5 +$as_echo "no" >&6; } + { $as_echo "$as_me:${as_lineno-$LINENO}: WARNING: Your compiler supports AVX512 instructions but not your linker." >&5 +$as_echo "$as_me: WARNING: Your compiler supports AVX512 instructions but not your linker." >&2;} + else + { $as_echo "$as_me:${as_lineno-$LINENO}: result: yes" >&5 +$as_echo "yes" >&6; } + ax_make_avx512=yes + SIMD_AVX512_CFLAGS=$CFLAGS + if test x"$ax_cv_c_compiler_vendor" != xintel; then + SIMD_SSE2_CFLAGS="$SIMD_SSE2_CFLAGS -mno-avx512f -mno-avx512cd" + SIMD_SSSE3_CFLAGS="$SIMD_SSSE3_CFLAGS -mno-avx512f -mno-avx512cd" + SIMD_SSE4_1_CFLAGS="$SIMD_SSE4_1_CFLAGS -mno-avx512f -mno-avx512cd" + SIMD_SSE4_2_CFLAGS="$SIMD_SSE4_2_CFLAGS -mno-avx512f -mno-avx512cd" + SIMD_AVX2_CFLAGS="$SIMD_AVX2_CFLAGS -mno-avx512f -mno-avx512cd" + fi + fi + fi + fi + ;; esac @@ -18824,8 +18877,11 @@ + if test "x$ax_cv_want_simd" = xno; then compile_level=none +elif test "x$ax_make_avx512" = xyes; then + compile_level=avx512 elif test "x$ax_make_avx2" = xyes; then compile_level=avx2 elif test "x$ax_make_sse42" = xyes; then @@ -18856,10 +18912,43 @@ compile_level=$answer { $as_echo "$as_me:${as_lineno-$LINENO}: result: $compile_level" >&5 $as_echo "$compile_level" >&6; } + case $compile_level in + avx512) + ;; + avx2) + ;; + sse42) + ;; + sse4.2) + compile_level = sse42; + ;; + sse41) + ;; + sse4.1) + compile_level = sse41; + ;; + ssse3) + ;; + sse2) + ;; + none) + ;; + *) + as_fn_error $? "Compiler level $compile_level not recognized. Allowed values: none, sse2, ssse3, sse4.1 (or sse41), sse4.2 (or sse42), avx2, avx512" "$LINENO" 5 + ;; + esac fi + if test "$compile_level" = avx512; then + MAKE_AVX512_TRUE= + MAKE_AVX512_FALSE='#' +else + MAKE_AVX512_TRUE='#' + MAKE_AVX512_FALSE= +fi + if test "$compile_level" = avx2; then MAKE_AVX2_TRUE= MAKE_AVX2_FALSE='#' @@ -19692,6 +19781,10 @@ Usually this means the macro was only invoked conditionally." "$LINENO" 5 fi +if test -z "${MAKE_AVX512_TRUE}" && test -z "${MAKE_AVX512_FALSE}"; then + as_fn_error $? "conditional \"MAKE_AVX512\" was never defined. +Usually this means the macro was only invoked conditionally." "$LINENO" 5 +fi if test -z "${MAKE_AVX2_TRUE}" && test -z "${MAKE_AVX2_FALSE}"; then as_fn_error $? "conditional \"MAKE_AVX2\" was never defined. Usually this means the macro was only invoked conditionally." "$LINENO" 5 @@ -20109,7 +20202,7 @@ # report actual input values of CONFIG_FILES etc. instead of their # values after options handling. ac_log=" -This file was extended by gmap $as_me 2016-11-07, which was +This file was extended by gmap $as_me 2017-01-14, which was generated by GNU Autoconf 2.69. Invocation command line was CONFIG_FILES = $CONFIG_FILES @@ -20175,7 +20268,7 @@ cat >>$CONFIG_STATUS <<_ACEOF || ac_write_fail=1 ac_cs_config="`$as_echo "$ac_configure_args" | sed 's/^ //; s/[\\""\`\$]/\\\\&/g'`" ac_cs_version="\\ -gmap config.status 2016-11-07 +gmap config.status 2017-01-14 configured by $0, generated by GNU Autoconf 2.69, with options \\"\$ac_cs_config\\" @@ -22049,6 +22142,13 @@ { $as_echo "$as_me:${as_lineno-$LINENO}: result: $compile_level" >&5 $as_echo "$compile_level" >&6; } +if test "$compile_level" = avx512; then + { $as_echo "$as_me:${as_lineno-$LINENO}: checking AVX512 compiler flags to be used" >&5 +$as_echo_n "checking AVX512 compiler flags to be used... " >&6; } + { $as_echo "$as_me:${as_lineno-$LINENO}: result: $SIMD_AVX512_CFLAGS" >&5 +$as_echo "$SIMD_AVX512_CFLAGS" >&6; } +fi + if test "$compile_level" = avx2; then { $as_echo "$as_me:${as_lineno-$LINENO}: checking AVX2 compiler flags to be used" >&5 $as_echo_n "checking AVX2 compiler flags to be used... " >&6; } diff -Nru gmap-2016-11-07/configure.ac gmap-2017-01-14/configure.ac --- gmap-2016-11-07/configure.ac 2016-08-16 20:22:27.000000000 +0000 +++ gmap-2017-01-14/configure.ac 2016-12-16 16:32:26.000000000 +0000 @@ -365,151 +365,8 @@ ACX_ASM_BSR -#AC_MSG_CHECKING(whether sse2 is enabled) -#AC_ARG_ENABLE([sse2], -# AC_HELP_STRING([--enable-sse2], -# [Enable sse2 simd commands if they compile and run (default=yes).]), -# [answer="$enableval"], -# [answer=""]) -#case x"$answer" in -# xyes) -# AC_MSG_RESULT(enabled) -# ax_cv_want_sse2_ext=yes -# ;; -# -# xno) -# AC_MSG_RESULT(disabled by user) -# ax_cv_want_sse2_ext=no -# ;; -# -# x) -# AC_MSG_RESULT([not specified so enabled by default]) -# ax_cv_want_sse2_ext=yes -# ;; -#esac - - ACX_SSE2_SHIFT_DEFECT - -#AC_MSG_CHECKING(whether ssse3 is enabled) -#AC_ARG_ENABLE([ssse3], -# AC_HELP_STRING([--enable-ssse3], -# [Enable ssse3 simd commands if they compile and run (default=yes). Requires that sse2 be enabled.]), -# [answer="$enableval"], -# [answer=""]) -#if test "$ax_cv_want_sse2_ext" = no; then -# AC_MSG_RESULT([disabled because the user disabled sse2]) -# ax_cv_want_ssse3_ext=no -#else -# case x"$answer" in -# xyes) -# AC_MSG_RESULT(enabled) -# ax_cv_want_ssse3_ext=yes -# ;; -# -# xno) -# AC_MSG_RESULT(disabled by user) -# ax_cv_want_ssse3_ext=no -# ;; -# -# x) -# AC_MSG_RESULT([not specified so enabled by default]) -# ax_cv_want_ssse3_ext=yes -# ;; -# esac -#fi - - - -#AC_MSG_CHECKING(whether sse4.1 is enabled) -#AC_ARG_ENABLE([sse4.1], -# AC_HELP_STRING([--enable-sse4.1], -# [Enable sse4.1 simd commands if they compile and run (default=yes). Requires that ssse3 be enabled.]), -# [answer="$enableval"], -# [answer=""]) -#if test "$ax_cv_want_ssse3_ext" = no; then -# AC_MSG_RESULT([disabled because the user disabled ssse3]) -# ax_cv_want_sse41_ext=no -#else -# case x"$answer" in -# xyes) -# AC_MSG_RESULT(enabled) -# ax_cv_want_sse41_ext=yes -# ;; -# -# xno) -# AC_MSG_RESULT(disabled by user) -# ax_cv_want_sse41_ext=no -# ;; -# -# x) -# AC_MSG_RESULT([not specified so enabled by default]) -# ax_cv_want_sse41_ext=yes -# ;; -# esac -#fi - - -#AC_MSG_CHECKING(whether sse4.2 is enabled) -#AC_ARG_ENABLE([sse4.2], -# AC_HELP_STRING([--enable-sse4.2], -# [Enable sse4.2 simd commands if they compile and run (default=yes). Requires that sse4.1 be enabled.]), -# [answer="$enableval"], -# [answer=""]) -#if test "$ax_cv_want_sse41_ext" = no; then -# AC_MSG_RESULT([disabled because the user disabled sse4.1]) -# ax_cv_want_sse42_ext=no -#else -# case x"$answer" in -# xyes) -# AC_MSG_RESULT(enabled) -# ax_cv_want_sse42_ext=yes -# ;; -# -# xno) -# AC_MSG_RESULT(disabled by user) -# ax_cv_want_sse42_ext=no -# ;; -# -# x) -# AC_MSG_RESULT([not specified so enabled by default]) -# ax_cv_want_sse42_ext=yes -# ;; -# esac -#fi - - -#AC_MSG_CHECKING(whether avx2 is enabled) -#AC_ARG_ENABLE([avx2], -# AC_HELP_STRING([--enable-avx2], -# [Enable avx2 simd commands if they compile and run (default=yes). Requires that avx be enabled.]), -# [answer="$enableval"], -# [answer=""]) -#if test "$ax_cv_want_avx_ext" = no; then -# AC_MSG_RESULT([disabled because the user disabled avx]) -# ax_cv_want_avx2_ext=no -#else -# case x"$answer" in -# xyes) -# AC_MSG_RESULT(enabled) -# ax_cv_want_avx2_ext=yes -# ;; -# -# xno) -# AC_MSG_RESULT(disabled by user) -# ax_cv_want_avx2_ext=no -# ;; -# -# x) -# AC_MSG_RESULT([not specified so enabled by default]) -# ax_cv_want_avx2_ext=yes -# ;; -# esac -#fi - - - AC_MSG_CHECKING(for whether simd is enabled) AC_ARG_ENABLE([simd], AC_HELP_STRING([--enable-simd], @@ -543,6 +400,8 @@ AX_EXT if test "x$ax_cv_want_simd" = xno; then compile_level=none +elif test "x$ax_make_avx512" = xyes; then + compile_level=avx512 elif test "x$ax_make_avx2" = xyes; then compile_level=avx2 elif test "x$ax_make_sse42" = xyes; then @@ -561,17 +420,43 @@ # User-selected compile level AC_ARG_WITH([simd-level], AC_HELP_STRING([--with-simd-level=STRING], - [User-selected SIMD level (sse2, ssse3, sse41, sse42, avx2)]), + [User-selected SIMD level (none, sse2, ssse3, sse41/sse4.1, sse42/sse4.2, avx2, avx512)]), [answer="$withval"], [answer=""]) if test x"$answer" != x; then AC_MSG_CHECKING(for user-selected SIMD level) compile_level=$answer AC_MSG_RESULT($compile_level) + case $compile_level in + avx512) + ;; + avx2) + ;; + sse42) + ;; + sse4.2) + compile_level = sse42; + ;; + sse41) + ;; + sse4.1) + compile_level = sse41; + ;; + ssse3) + ;; + sse2) + ;; + none) + ;; + *) + AC_MSG_ERROR([Compiler level $compile_level not recognized. Allowed values: none, sse2, ssse3, sse4.1 (or sse41), sse4.2 (or sse42), avx2, avx512]) + ;; + esac fi +AM_CONDITIONAL(MAKE_AVX512,[test "$compile_level" = avx512]) AM_CONDITIONAL(MAKE_AVX2,[test "$compile_level" = avx2]) AM_CONDITIONAL(MAKE_SSE4_2,[test "$compile_level" = sse42]) AM_CONDITIONAL(MAKE_SSE4_1,[test "$compile_level" = sse41]) @@ -820,6 +705,11 @@ AC_MSG_CHECKING(compile level) AC_MSG_RESULT($compile_level) +if test "$compile_level" = avx512; then + AC_MSG_CHECKING(AVX512 compiler flags to be used) + AC_MSG_RESULT($SIMD_AVX512_CFLAGS) +fi + if test "$compile_level" = avx2; then AC_MSG_CHECKING(AVX2 compiler flags to be used) AC_MSG_RESULT($SIMD_AVX2_CFLAGS) diff -Nru gmap-2016-11-07/debian/changelog gmap-2017-01-14/debian/changelog --- gmap-2016-11-07/debian/changelog 2016-11-09 15:15:45.000000000 +0000 +++ gmap-2017-01-14/debian/changelog 2017-01-16 14:18:19.000000000 +0000 @@ -1,3 +1,11 @@ +gmap (2017-01-14-1) unstable; urgency=medium + + * New upstream version 2017-01-14 + * Refresh install-data-local patch + * Update copyright year + + -- Alexandre Mestiashvili Mon, 16 Jan 2017 15:18:19 +0100 + gmap (2016-11-07-1) unstable; urgency=medium * New upstream version 2016-11-07 diff -Nru gmap-2016-11-07/debian/copyright gmap-2017-01-14/debian/copyright --- gmap-2016-11-07/debian/copyright 2016-11-09 15:15:45.000000000 +0000 +++ gmap-2017-01-14/debian/copyright 2017-01-16 14:18:19.000000000 +0000 @@ -165,7 +165,7 @@ Files: debian/* Copyright: 2011 Shaun Jackman 2012-2014 Andreas Tille - 2016 Alex Mestiashvili + 2017 Alex Mestiashvili License: ISC Permission to use, copy, modify, and/or distribute this software for any purpose with or without fee is hereby granted, provided that the above diff -Nru gmap-2016-11-07/debian/patches/install-data-local gmap-2017-01-14/debian/patches/install-data-local --- gmap-2016-11-07/debian/patches/install-data-local 2016-11-09 15:15:45.000000000 +0000 +++ gmap-2017-01-14/debian/patches/install-data-local 2017-01-16 14:18:19.000000000 +0000 @@ -2,7 +2,7 @@ --- gmap.orig/Makefile.in +++ gmap/Makefile.in -@@ -847,7 +847,7 @@ +@@ -848,7 +848,7 @@ install-data-local: diff -Nru gmap-2016-11-07/Makefile.in gmap-2017-01-14/Makefile.in --- gmap-2016-11-07/Makefile.in 2016-11-08 01:15:28.000000000 +0000 +++ gmap-2017-01-14/Makefile.in 2017-01-13 23:46:40.000000000 +0000 @@ -305,6 +305,7 @@ SET_MAKE = @SET_MAKE@ SHELL = @SHELL@ SIMD_AVX2_CFLAGS = @SIMD_AVX2_CFLAGS@ +SIMD_AVX512_CFLAGS = @SIMD_AVX512_CFLAGS@ SIMD_SSE2_CFLAGS = @SIMD_SSE2_CFLAGS@ SIMD_SSE4_1_CFLAGS = @SIMD_SSE4_1_CFLAGS@ SIMD_SSE4_2_CFLAGS = @SIMD_SSE4_2_CFLAGS@ diff -Nru gmap-2016-11-07/src/cpuid.c gmap-2017-01-14/src/cpuid.c --- gmap-2016-11-07/src/cpuid.c 2016-11-08 00:55:18.000000000 +0000 +++ gmap-2017-01-14/src/cpuid.c 2016-12-16 16:41:13.000000000 +0000 @@ -1,4 +1,4 @@ -static char rcsid[] = "$Id: cpuid.c 200231 2016-11-08 00:55:17Z twu $"; +static char rcsid[] = "$Id: cpuid.c 201743 2016-12-16 16:41:11Z twu $"; #ifdef HAVE_CONFIG_H #include #endif @@ -35,7 +35,7 @@ *sse4_1_support_p = _may_i_use_cpu_feature(_FEATURE_SSE4_1); *sse4_2_support_p = _may_i_use_cpu_feature(_FEATURE_SSE4_2); *avx2_support_p = _may_i_use_cpu_feature(_FEATURE_AVX2 | _FEATURE_FMA | _FEATURE_BMI | _FEATURE_LZCNT | _FEATURE_MOVBE); - *avx512_support_p = _may_i_use_cpu_feature(_FEATURE_512F); + *avx512_support_p = _may_i_use_cpu_feature(_FEATURE_AVX512F | _FEATURE_AVX512CD); return; } @@ -73,14 +73,28 @@ static int check_xcr0_ymm () { uint32_t xcr0; + uint32_t ymm_xmm = ((1 << 2) | (1 << 1)); #if defined(_MSC_VER) xcr0 = (uint32_t)_xgetbv(0); /* min VS2010 SP1 compiler is required */ #else __asm__ ("xgetbv" : "=a" (xcr0) : "c" (0) : "%edx"); #endif - return ((xcr0 & 6) == 6); /* checking if xmm and ymm state are enabled in XCR0 */ + return ((xcr0 & ymm_xmm) == ymm_xmm); /* checking if xmm and ymm state are enabled in XCR0 */ } +static int +check_xcr0_zmm () { + uint32_t xcr0; + uint32_t zmm_ymm_xmm = ((7 << 5) | (1 << 2) | (1 << 1)); +#if defined(_MSC_VER) + xcr0 = (uint32_t)_xgetbv(0); /* min VS2010 SP1 compiler is required */ +#else + __asm__ ("xgetbv" : "=a" (xcr0) : "c" (0) : "%edx"); +#endif + return ((xcr0 & zmm_ymm_xmm) == zmm_ymm_xmm); +} + + void CPUID_support (bool *sse2_support_p, bool *ssse3_support_p, bool *sse4_1_support_p, bool *sse4_2_support_p, bool *avx2_support_p, bool *avx512_support_p) { @@ -95,12 +109,14 @@ uint32_t fma_movbe_osxsave_mask = ((1 << 12) | (1 << 22) | (1 << 27)); /* ecx */ uint32_t avx2_bmi12_mask = ((1 << 5) | (1 << 3) | (1 << 8)); /* ebx */ uint32_t lzcnt_mask = (1 << 5); /* ecx */ - uint32_t avx512_mask = (1 << 16); + + uint32_t osxsave_mask = (1 << 27); /* ecx */ + uint32_t avx512_mask = ((1 << 16) | (1 << 28)); /* ebx */ run_cpuid(1, 0, abcd); #ifdef MAIN - printf("CPUID 1, 0 returns %08X %08X %08X %08X\n",abcd[EAX],abcd[EBX],abcd[ECX],abcd[EDX]); + printf("CPUID 1, 0 returns EAX %08X EBX %08X ECX %08X EDX %08X\n",abcd[EAX],abcd[EBX],abcd[ECX],abcd[EDX]); #endif *sse2_support_p = ((abcd[EDX] & sse2_mask) == sse2_mask) ? true : false; @@ -111,36 +127,40 @@ if ((abcd[ECX] & fma_movbe_osxsave_mask) != fma_movbe_osxsave_mask) { *avx2_support_p = false; - *avx512_support_p = false; } else if (!check_xcr0_ymm()) { *avx2_support_p = false; - *avx512_support_p = false; } else { run_cpuid(7, 0, abcd); #ifdef MAIN - printf("CPUID 7, 0 returns %08X %08X %08X %08X\n",abcd[EAX],abcd[EBX],abcd[ECX],abcd[EDX]); + printf("CPUID 7, 0 returns EAX %08X EBX %08X ECX %08X EDX %08X\n",abcd[EAX],abcd[EBX],abcd[ECX],abcd[EDX]); #endif if ((abcd[EBX] & avx2_bmi12_mask) != avx2_bmi12_mask) { *avx2_support_p = false; - *avx2_support_p = false; } else { run_cpuid(0x80000001, 0, abcd); #ifdef MAIN - printf("CPUID 0x80000001, 0 returns %08X %08X %08X %08X\n",abcd[EAX],abcd[EBX],abcd[ECX],abcd[EDX]); + printf("CPUID 0x80000001, 0 returns EAX %08X EBX %08X ECX %08X EDX %08X\n",abcd[EAX],abcd[EBX],abcd[ECX],abcd[EDX]); #endif if ((abcd[ECX] & lzcnt_mask) != lzcnt_mask) { *avx2_support_p = false; - *avx512_support_p = false; } else { *avx2_support_p = true; - *avx512_support_p = ((abcd[ECX] & avx512_mask) == avx512_mask) ? true : false; } } } - + run_cpuid(1, 0, abcd); + if ((abcd[ECX] & osxsave_mask) != osxsave_mask) { + *avx512_support_p = false; + } else if (!check_xcr0_zmm()) { + *avx512_support_p = false; + } else if ((abcd[EBX] & avx512_mask) != avx512_mask) { + *avx512_support_p = true; /* Should fail, but book/Web examples skip this check */ + } else { + *avx512_support_p = true; + } return; } diff -Nru gmap-2016-11-07/src/dynprog.c gmap-2017-01-14/src/dynprog.c --- gmap-2016-11-07/src/dynprog.c 2016-05-01 17:28:23.000000000 +0000 +++ gmap-2017-01-14/src/dynprog.c 2017-01-01 15:40:09.000000000 +0000 @@ -1,4 +1,4 @@ -static char rcsid[] = "$Id: dynprog.c 188752 2016-05-01 17:28:22Z twu $"; +static char rcsid[] = "$Id: dynprog.c 202041 2017-01-01 15:40:08Z twu $"; #ifdef HAVE_CONFIG_H #include #endif @@ -809,6 +809,7 @@ /************************************************************************/ /* These are extern arrays, used by all dynprog procedures */ +int use8p_size[NMISMATCHTYPES]; Pairdistance_T **pairdistance_array[NMISMATCHTYPES]; #ifndef HAVE_SSE4_1 Pairdistance_T **pairdistance_array_plus_128[NMISMATCHTYPES]; @@ -915,6 +916,12 @@ nt_to_int_array['T'] = nt_to_int_array['t'] = 3; + use8p_size[HIGHQ] = NEG_INFINITY_8 / MISMATCH_HIGHQ - 1; + use8p_size[MEDQ] = NEG_INFINITY_8 / MISMATCH_MEDQ - 1; + use8p_size[LOWQ] = NEG_INFINITY_8 / MISMATCH_LOWQ - 1; + use8p_size[ENDQ] = NEG_INFINITY_8 / MISMATCH_ENDQ - 1; + /* printf("use8p_sizes: %d %d %d %d\n",use8p_size[HIGHQ],use8p_size[MEDQ],use8p_size[LOWQ],use8p_size[ENDQ]); */ + consistent_array = (bool **) CALLOC(128,sizeof(bool *)); consistent_array[0] = (bool *) CALLOC(128*128,sizeof(bool)); ptr = 0; diff -Nru gmap-2016-11-07/src/dynprog_cdna.c gmap-2017-01-14/src/dynprog_cdna.c --- gmap-2016-11-07/src/dynprog_cdna.c 2016-08-01 18:07:06.000000000 +0000 +++ gmap-2017-01-14/src/dynprog_cdna.c 2017-01-01 15:40:38.000000000 +0000 @@ -1,4 +1,4 @@ -static char rcsid[] = "$Id: dynprog_cdna.c 184458 2016-02-18 00:06:33Z twu $"; +static char rcsid[] = "$Id: dynprog_cdna.c 202042 2017-01-01 15:40:38Z twu $"; #ifdef HAVE_CONFIG_H #include #endif @@ -923,7 +923,7 @@ #if defined(HAVE_SSE2) /* Use || because we want the minimum length (which determines the diagonal length) to achieve a score less than 128 */ - if (glength <= SIMD_MAXLENGTH_EPI8 || (rlengthL <= SIMD_MAXLENGTH_EPI8 && rlengthR <= SIMD_MAXLENGTH_EPI8)) { + if (glength < use8p_size[mismatchtype] || (rlengthL < use8p_size[mismatchtype] && rlengthR <= use8p_size[mismatchtype])) { use8p = true; } else { use8p = false; diff -Nru gmap-2016-11-07/src/dynprog_end.c gmap-2017-01-14/src/dynprog_end.c --- gmap-2016-11-07/src/dynprog_end.c 2016-02-18 00:06:34.000000000 +0000 +++ gmap-2017-01-14/src/dynprog_end.c 2017-01-13 23:34:14.000000000 +0000 @@ -1,4 +1,4 @@ -static char rcsid[] = "$Id: dynprog_end.c 184458 2016-02-18 00:06:33Z twu $"; +static char rcsid[] = "$Id: dynprog_end.c 202595 2017-01-13 23:34:14Z twu $"; #ifdef HAVE_CONFIG_H #include #endif @@ -1272,7 +1272,8 @@ int rlength, int glength, int rev_roffset, int rev_goffset, Univcoord_T chroffset, Univcoord_T chrhigh, bool watsonp, bool jump_late_p, Pairpool_T pairpool, - int extraband_end, double defect_rate, Endalign_T endalign) { + int extraband_end, double defect_rate, Endalign_T endalign, + bool require_pos_score_p) { List_T pairs = NULL; char *rev_gsequence, *rev_gsequence_alt; Pair_T pair; @@ -1369,7 +1370,7 @@ Dynprog_compute_bands(&lband,&uband,rlength,glength,extraband_end,/*widebandp*/true); #if defined(HAVE_SSE2) /* Use || because we want the minimum length (which determines the diagonal length) to achieve a score less than 128 */ - if (rlength <= SIMD_MAXLENGTH_EPI8 || glength <= SIMD_MAXLENGTH_EPI8) { + if (rlength < use8p_size[mismatchtype] || glength < use8p_size[mismatchtype]) { use8p = true; matrix8_upper = Dynprog_simd_8_upper(&directions8_upper_nogap,&directions8_upper_Egap,dynprog, rev_rsequence,&(rev_gsequence[glength-1]),&(rev_gsequence_alt[glength-1]), @@ -1429,7 +1430,7 @@ Dynprog_compute_bands(&lband,&uband,rlength,glength,extraband_end,/*widebandp*/true); #if defined(HAVE_SSE2) /* Use || because we want the minimum length (which determines the diagonal length) to achive a score less than 128 */ - if (rlength <= SIMD_MAXLENGTH_EPI8 || glength <= SIMD_MAXLENGTH_EPI8) { + if (rlength < use8p_size[mismatchtype] || glength < use8p_size[mismatchtype]) { use8p = true; @@ -1492,7 +1493,7 @@ } else if (endalign == QUERYEND_NOGAPS) { find_best_endpoint_to_queryend_nogaps(&bestr,&bestc,rlength,glength); - /* *finalscore = 0; -- Splicetrie procedures need to know finalscore */ + /* *finalscore = 0; -- Splicetrie procedures need to know finalscore */ } else { fprintf(stderr,"Unexpected endalign value %d\n",endalign); @@ -1524,6 +1525,10 @@ /*revp*/true,*dynprogindex); *finalscore = (*nmatches)*FULLMATCH + (*nmismatches)*MISMATCH_ENDQ; + } else if (require_pos_score_p == true && *finalscore <= 0) { + /* Can skip traceback */ + pairs = (List_T) NULL; + #if defined(HAVE_SSE2) } else if (use8p == true) { if (bestc >= bestr) { @@ -1574,7 +1579,7 @@ if ((endalign == QUERYEND_GAP || endalign == BEST_LOCAL) && (*nmatches + 1) < *nmismatches) { *finalscore = 0; /* No need to free pairs */ - pairs = NULL; + pairs = (List_T) NULL; } else { /* Add 1 to count the match already in the alignment */ pairs = List_reverse(pairs); /* Look at 5' end to remove excess gaps */ @@ -1685,7 +1690,7 @@ Dynprog_compute_bands(&lband,&uband,rlength,glength,extraband_end,/*widebandp*/true); #if defined(HAVE_SSE2) /* Use || because we want the minimum length (which determines the diagonal length) to achieve a score less than 128 */ - if (rlength <= SIMD_MAXLENGTH_EPI8 || glength <= SIMD_MAXLENGTH_EPI8) { + if (rlength < use8p_size[mismatchtype] || glength < use8p_size[mismatchtype]) { use8p = true; matrix8_upper = Dynprog_simd_8_upper(&directions8_upper_nogap,&directions8_upper_Egap,dynprog, rev_rsequence,rev_gsequence,rev_gsequence_alt, @@ -1866,7 +1871,8 @@ int rlength, int glength, int roffset, int goffset, Univcoord_T chroffset, Univcoord_T chrhigh, bool watsonp, bool jump_late_p, Pairpool_T pairpool, - int extraband_end, double defect_rate, Endalign_T endalign) { + int extraband_end, double defect_rate, Endalign_T endalign, + bool require_pos_score_p) { List_T pairs = NULL; char *gsequence, *gsequence_alt; Pair_T pair; @@ -1959,7 +1965,7 @@ Dynprog_compute_bands(&lband,&uband,rlength,glength,extraband_end,/*widebandp*/true); #if defined(HAVE_SSE2) /* Use || because we want the minimum length (which determines the diagonal length) to achieve a score less than 128 */ - if (rlength <= SIMD_MAXLENGTH_EPI8 || glength <= SIMD_MAXLENGTH_EPI8) { + if (rlength < use8p_size[mismatchtype] || glength < use8p_size[mismatchtype]) { use8p = true; matrix8_upper = Dynprog_simd_8_upper(&directions8_upper_nogap,&directions8_upper_Egap,dynprog, rsequenceuc,gsequence,gsequence_alt,rlength,glength, @@ -2012,7 +2018,7 @@ Dynprog_compute_bands(&lband,&uband,rlength,glength,extraband_end,/*widebandp*/true); #if defined(HAVE_SSE2) /* Use || because we want the minimum length (which determines the diagonal length) to achieve a score less than 128 */ - if (rlength <= SIMD_MAXLENGTH_EPI8 || glength <= SIMD_MAXLENGTH_EPI8) { + if (rlength < use8p_size[mismatchtype] || glength < use8p_size[mismatchtype]) { use8p = true; matrix8_upper = Dynprog_simd_8_upper(&directions8_upper_nogap,&directions8_upper_Egap,dynprog, rsequenceuc,gsequence,gsequence_alt,rlength,glength, @@ -2096,6 +2102,10 @@ /*revp*/false,*dynprogindex); *finalscore = (*nmatches)*FULLMATCH + (*nmismatches)*MISMATCH_ENDQ; + } else if (require_pos_score_p == true && *finalscore <= 0) { + /* Can skip traceback */ + pairs = (List_T) NULL; + #if defined(HAVE_SSE2) } else if (use8p == true) { if (bestc >= bestr) { @@ -2255,7 +2265,7 @@ Dynprog_compute_bands(&lband,&uband,rlength,glength,extraband_end,/*widebandp*/true); #if defined(HAVE_SSE2) /* Use || because we want the minimum length (which determines the diagonal length) to achieve a score less than 128 */ - if (rlength <= SIMD_MAXLENGTH_EPI8 || glength <= SIMD_MAXLENGTH_EPI8) { + if (rlength < use8p_size[mismatchtype] || glength < use8p_size[mismatchtype]) { use8p = true; matrix8_upper = Dynprog_simd_8_upper(&directions8_upper_nogap,&directions8_upper_Egap,dynprog, rsequenceuc,gsequence,gsequence_alt,rlength,glength, @@ -2718,8 +2728,9 @@ &(*nopens),&(*nindels),dynprog,rev_rsequence,rev_rsequenceuc, rlength,glength,rev_roffset,rev_goffset,chroffset,chrhigh, watsonp,jump_late_p,pairpool, - extraband_end,defect_rate,/*endalign*/QUERYEND_NOGAPS); - if (*finalscore < 0) { + extraband_end,defect_rate,/*endalign*/QUERYEND_NOGAPS, + /*require_pos_score_p*/true); + if (*finalscore <= 0) { orig_score = 0; orig_pairs = best_pairs = (List_T) NULL; } else { @@ -2878,7 +2889,7 @@ &(*nopens),&(*nindels),dynprog,rev_rsequence,rev_rsequenceuc, rlength,glength,rev_roffset,rev_goffset,chroffset,chrhigh, watsonp,jump_late_p,pairpool, - extraband_end,defect_rate,/*endalign*/BEST_LOCAL); + extraband_end,defect_rate,/*endalign*/BEST_LOCAL,/*require_pos_score_p*/false); debug7(Pair_dump_list(orig_pairs,/*zerobasedp*/true)); debug7(printf("End of dynprog end5 known\n")); *knownsplicep = false; @@ -2975,8 +2986,9 @@ &(*nopens),&(*nindels),dynprog,rsequence,rsequenceuc, rlength,glength,roffset,goffset,chroffset,chrhigh, watsonp,jump_late_p,pairpool, - extraband_end,defect_rate,/*endalign*/QUERYEND_NOGAPS); - if (*finalscore < 0) { + extraband_end,defect_rate,/*endalign*/QUERYEND_NOGAPS, + /*require_pos_score_p*/true); + if (*finalscore <= 0) { orig_score = 0; orig_pairs = best_pairs = (List_T) NULL; } else { @@ -3135,7 +3147,8 @@ &(*nopens),&(*nindels),dynprog,rsequence,rsequenceuc, rlength,glength,roffset,goffset,chroffset,chrhigh, watsonp,jump_late_p,pairpool, - extraband_end,defect_rate,/*endalign*/BEST_LOCAL); + extraband_end,defect_rate,/*endalign*/BEST_LOCAL, + /*require_pos_score_p*/false); debug7(Pair_dump_list(orig_pairs,/*zerobasedp*/true)); *knownsplicep = false; debug7(printf("End of dynprog end5 known\n")); diff -Nru gmap-2016-11-07/src/dynprog_end.h gmap-2017-01-14/src/dynprog_end.h --- gmap-2016-11-07/src/dynprog_end.h 2016-02-18 00:06:35.000000000 +0000 +++ gmap-2017-01-14/src/dynprog_end.h 2017-01-01 15:41:52.000000000 +0000 @@ -1,4 +1,4 @@ -/* $Id: dynprog_end.h 184458 2016-02-18 00:06:33Z twu $ */ +/* $Id: dynprog_end.h 202043 2017-01-01 15:41:51Z twu $ */ #ifndef DYNPROG_END_INCLUDED #define DYNPROG_END_INCLUDED @@ -27,7 +27,8 @@ int length1, int length2, int revoffset1, int revoffset2, Univcoord_T chroffset, Univcoord_T chrhigh, bool watsonp, bool jump_late_p, Pairpool_T pairpool, - int extraband_end, double defect_rate, Endalign_T endalign); + int extraband_end, double defect_rate, Endalign_T endalign, + bool require_pos_score_p); extern List_T Dynprog_end5_splicejunction (int *dynprogindex, int *finalscore, int *missscore, @@ -46,7 +47,8 @@ int length1, int length2, int offset1, int offset2, Univcoord_T chroffset, Univcoord_T chrhigh, bool watsonp, bool jump_late_p, Pairpool_T pairpool, - int extraband_end, double defect_rate, Endalign_T endalign); + int extraband_end, double defect_rate, Endalign_T endalign, + bool require_pos_score_p); extern List_T Dynprog_end3_splicejunction (int *dynprogindex, int *finalscore, int *missscore, diff -Nru gmap-2016-11-07/src/dynprog_genome.c gmap-2017-01-14/src/dynprog_genome.c --- gmap-2016-11-07/src/dynprog_genome.c 2016-09-24 00:47:16.000000000 +0000 +++ gmap-2017-01-14/src/dynprog_genome.c 2017-01-01 15:40:38.000000000 +0000 @@ -1,4 +1,4 @@ -static char rcsid[] = "$Id: dynprog_genome.c 198278 2016-09-24 00:47:16Z twu $"; +static char rcsid[] = "$Id: dynprog_genome.c 202042 2017-01-01 15:40:38Z twu $"; #ifdef HAVE_CONFIG_H #include #endif @@ -77,7 +77,7 @@ #define USE_SCOREI 1 -#define USE_WEAK_SCOREI 1 +/* #define USE_WEAK_SCOREI 1 */ #define PROB_CEILING 0.85 #define PROB_FLOOR 0.50 @@ -92,24 +92,28 @@ #define FINAL_GCAG_INTRON 4 /* Amount above regular should approximately match FINAL_CANONICAL_INTRON - CANONICAL_INTRON */ #define FINAL_ATAC_INTRON 2 + #else -#define GCAG_INTRON 15 -#define ATAC_INTRON 12 -#define FINAL_GCAG_INTRON 20 /* Amount above regular should approximately +/* Values were 15, 12, 20, and 12 */ +#define GCAG_INTRON 8 +#define ATAC_INTRON 4 +#define FINAL_GCAG_INTRON 10 /* Amount above regular should approximately match FINAL_CANONICAL_INTRON - CANONICAL_INTRON */ -#define FINAL_ATAC_INTRON 12 +#define FINAL_ATAC_INTRON 8 #endif /* Don't want to make too high, otherwise we will harm evaluation of dual introns vs. single intron */ +/* Values were 10, 16, 22 */ #define CANONICAL_INTRON_HIGHQ 10 /* GT-AG */ -#define CANONICAL_INTRON_MEDQ 16 -#define CANONICAL_INTRON_LOWQ 22 +#define CANONICAL_INTRON_MEDQ 12 +#define CANONICAL_INTRON_LOWQ 14 -#define FINAL_CANONICAL_INTRON_HIGHQ 30 /* GT-AG */ -#define FINAL_CANONICAL_INTRON_MEDQ 36 -#define FINAL_CANONICAL_INTRON_LOWQ 42 +/* Values were 30, 36, 42 */ +#define FINAL_CANONICAL_INTRON_HIGHQ 12 /* GT-AG */ +#define FINAL_CANONICAL_INTRON_MEDQ 15 +#define FINAL_CANONICAL_INTRON_LOWQ 18 #define KNOWN_SPLICESITE_REWARD 20 @@ -562,10 +566,11 @@ int rL, rR, cL, cR; int cloL, chighL; int cloR, chighR; - int bestscore = NEG_INFINITY_8, score, scoreL, scoreR; + int bestscore = NEG_INFINITY_8, score, scoreL, scoreI, scoreR; Univcoord_T splicesitepos1, splicesitepos2; bool bestp; + scoreI = 0; /* Because we constrain splices to given introns */ for (rL = 1, rR = rlength-1; rL < rlength; rL++, rR--) { debug3(printf("\nGenomic insert: At row %d on left and %d on right\n",rL,rR)); @@ -607,7 +612,7 @@ } #endif - if ((score = scoreL + scoreR) > bestscore || + if ((score = scoreL + scoreI + scoreR) > bestscore || (score >= bestscore && jump_late_p)) { /* Use >= for jump late */ bestp = false; if (watsonp == true) { @@ -651,7 +656,7 @@ } #endif - if ((score = scoreL + scoreR) > bestscore || + if ((score = scoreL + scoreI + scoreR) > bestscore || (score >= bestscore && jump_late_p)) { /* Use >= for jump late */ bestp = false; if (watsonp == true) { @@ -710,7 +715,7 @@ } #endif - if ((score = scoreL + scoreR) > bestscore || + if ((score = scoreL + scoreI + scoreR) > bestscore || (score >= bestscore && jump_late_p)) { /* Use >= for jump late */ bestp = false; if (watsonp == true) { @@ -754,7 +759,7 @@ } #endif - if ((score = scoreL + scoreR) > bestscore || + if ((score = scoreL + scoreI + scoreR) > bestscore || (score >= bestscore && jump_late_p)) { /* Use >= for jump late */ bestp = false; if (watsonp == true) { @@ -1004,7 +1009,7 @@ scoreI = 0; #endif - if ((score = scoreL + scoreR) > bestscore) { + if ((score = scoreL + scoreI + scoreR) > bestscore) { debug3(printf("Best score: At %d left to %d right, score is (%d)+(%d)+(%d) = %d (bestscore, prob %f + %f)\n", cL,cR,scoreL,scoreI,scoreR,scoreL+scoreI+scoreR,probL,probR)); debug3(printf("probL %f, probR %f\n",left_probabilities[cL],right_probabilities[cR])); @@ -1033,7 +1038,7 @@ debug3a(printf("At %d left to %d right, scoreI is %d and prob is %f + %f = %f\n", cL,cR,scoreI,probL,probR,probL+probR)); if (probL + probR > bestprob_with_dinucl) { - bestscore_with_dinucl = scoreL + scoreR; + bestscore_with_dinucl = scoreL + scoreI + scoreR; bestcL_with_dinucl = cL; bestcR_with_dinucl = cR; bestrL_with_dinucl = rL; @@ -1068,7 +1073,7 @@ scoreI = 0; #endif - if ((score = scoreL + scoreR) > bestscore) { + if ((score = scoreL + scoreI + scoreR) > bestscore) { debug3(printf("Best score: At %d left to %d right, score is (%d)+(%d)+(%d) = %d (bestscore, prob %f + %f)\n", cL,cR,scoreL,scoreI,scoreR,scoreL+scoreI+scoreR,probL,probR)); debug3(printf("probL %f, probR %f\n",left_probabilities[cL],right_probabilities[cR])); @@ -1108,7 +1113,7 @@ scoreI = 0; #endif - if ((score = scoreL + scoreR) > bestscore) { + if ((score = scoreL + scoreI + scoreR) > bestscore) { debug3(printf("Best score: At %d left to %d right, score is (%d)+(%d)+(%d) = %d (bestscore, prob %f + %f)\n", cL,cR,scoreL,scoreI,scoreR,scoreL+scoreI+scoreR,probL,probR)); debug3(printf("probL %f, probR %f\n",left_probabilities[cL],right_probabilities[cR])); @@ -1133,7 +1138,7 @@ } } - debug3(printf("C. Test indel on left\n")); + debug3(printf("C. Test indel on left (1)\n")); /* Test indel on left */ cR = rR; probR = right_probabilities[cR]; @@ -1158,7 +1163,7 @@ scoreI = 0; #endif - if ((score = scoreL + scoreR) > bestscore) { + if ((score = scoreL + scoreI + scoreR) > bestscore) { debug3(printf("Best score: At %d left to %d right, score is (%d)+(%d)+(%d) = %d (bestscore, prob %f + %f)\n", cL,cR,scoreL,scoreI,scoreR,scoreL+scoreI+scoreR,probL,probR)); debug3(printf("probL %f, probR %f\n",left_probabilities[cL],right_probabilities[cR])); @@ -1198,7 +1203,7 @@ scoreI = 0; #endif - if ((score = scoreL + scoreR) > bestscore) { + if ((score = scoreL + scoreI + scoreR) > bestscore) { debug3(printf("Best score: At %d left to %d right, score is (%d)+(%d)+(%d) = %d (bestscore, prob %f + %f)\n", cL,cR,scoreL,scoreI,scoreR,scoreL+scoreI+scoreR,probL,probR)); debug3(printf("probL %f, probR %f\n",left_probabilities[cL],right_probabilities[cR])); @@ -1386,10 +1391,11 @@ int rL, rR, cL, cR; int cloL, chighL; int cloR, chighR; - int bestscore = NEG_INFINITY_16, score, scoreL, scoreR; + int bestscore = NEG_INFINITY_16, score, scoreL, scoreI, scoreR; Univcoord_T splicesitepos1, splicesitepos2; bool bestp; + scoreI = 0; /* Because we constrain splices to given introns */ for (rL = 1, rR = rlength-1; rL < rlength; rL++, rR--) { debug3(printf("\nGenomic insert: At row %d on left and %d on right\n",rL,rR)); @@ -1431,7 +1437,7 @@ } #endif - if ((score = scoreL + scoreR) > bestscore || + if ((score = scoreL + scoreI + scoreR) > bestscore || (score >= bestscore && jump_late_p)) { /* Use >= for jump late */ bestp = false; if (watsonp == true) { @@ -1475,7 +1481,7 @@ } #endif - if ((score = scoreL + scoreR) > bestscore || + if ((score = scoreL + scoreI + scoreR) > bestscore || (score >= bestscore && jump_late_p)) { /* Use >= for jump late */ bestp = false; if (watsonp == true) { @@ -1534,7 +1540,7 @@ } #endif - if ((score = scoreL + scoreR) > bestscore || + if ((score = scoreL + scoreI + scoreR) > bestscore || (score >= bestscore && jump_late_p)) { /* Use >= for jump late */ bestp = false; if (watsonp == true) { @@ -1578,7 +1584,7 @@ } #endif - if ((score = scoreL + scoreR) > bestscore || + if ((score = scoreL + scoreI + scoreR) > bestscore || (score >= bestscore && jump_late_p)) { /* Use >= for jump late */ bestp = false; if (watsonp == true) { @@ -1828,7 +1834,7 @@ scoreI = 0; #endif - if ((score = scoreL + scoreR) > bestscore) { + if ((score = scoreL + scoreI + scoreR) > bestscore) { debug3(printf("Best score: At %d left to %d right, score is (%d)+(%d)+(%d) = %d (bestscore, prob %f + %f)\n", cL,cR,scoreL,scoreI,scoreR,scoreL+scoreI+scoreR,probL,probR)); debug3(printf("probL %f, probR %f\n",left_probabilities[cL],right_probabilities[cR])); @@ -1857,7 +1863,7 @@ debug3(printf("At %d left to %d right, scoreI is %d and prob is %f + %f = %f\n", cL,cR,scoreI,probL,probR,probL+probR)); if (probL + probR > bestprob_with_dinucl) { - bestscore_with_dinucl = scoreL + scoreR; + bestscore_with_dinucl = scoreL + scoreI + scoreR; bestcL_with_dinucl = cL; bestcR_with_dinucl = cR; bestrL_with_dinucl = rL; @@ -1892,7 +1898,7 @@ scoreI = 0; #endif - if ((score = scoreL + scoreR) > bestscore) { + if ((score = scoreL + scoreI + scoreR) > bestscore) { debug3(printf("Best score: At %d left to %d right, score is (%d)+(%d)+(%d) = %d (bestscore, prob %f + %f)\n", cL,cR,scoreL,scoreI,scoreR,scoreL+scoreI+scoreR,probL,probR)); debug3(printf("probL %f, probR %f\n",left_probabilities[cL],right_probabilities[cR])); @@ -1932,7 +1938,7 @@ scoreI = 0; #endif - if ((score = scoreL + scoreR) > bestscore) { + if ((score = scoreL + scoreI + scoreR) > bestscore) { debug3(printf("Best score: At %d left to %d right, score is (%d)+(%d)+(%d) = %d (bestscore, prob %f + %f)\n", cL,cR,scoreL,scoreI,scoreR,scoreL+scoreI+scoreR,probL,probR)); debug3(printf("probL %f, probR %f\n",left_probabilities[cL],right_probabilities[cR])); @@ -1958,7 +1964,7 @@ } - debug3(printf("C. Test indel on left\n")); + debug3(printf("C. Test indel on left (2)\n")); /* Test indel on left */ cR = rR; probR = right_probabilities[cR]; @@ -1983,7 +1989,7 @@ scoreI = 0; #endif - if ((score = scoreL + scoreR) > bestscore) { + if ((score = scoreL + scoreI + scoreR) > bestscore) { debug3(printf("Best score: At %d left to %d right, score is (%d)+(%d)+(%d) = %d (bestscore, prob %f + %f)\n", cL,cR,scoreL,scoreI,scoreR,scoreL+scoreI+scoreR,probL,probR)); debug3(printf("probL %f, probR %f\n",left_probabilities[cL],right_probabilities[cR])); @@ -2023,7 +2029,7 @@ scoreI = 0; #endif - if ((score = scoreL + scoreR) > bestscore) { + if ((score = scoreL + scoreI + scoreR) > bestscore) { debug3(printf("Best score: At %d left to %d right, score is (%d)+(%d)+(%d) = %d (bestscore, prob %f + %f)\n", cL,cR,scoreL,scoreI,scoreR,scoreL+scoreI+scoreR,probL,probR)); debug3(printf("probL %f, probR %f\n",left_probabilities[cL],right_probabilities[cR])); @@ -2212,10 +2218,11 @@ int rL, rR, cL, cR; int cloL, chighL; int cloR, chighR; - int bestscore = NEG_INFINITY_32, score, scoreL, scoreR; + int bestscore = NEG_INFINITY_32, score, scoreL, scoreI, scoreR; Univcoord_T splicesitepos1, splicesitepos2; bool bestp; + scoreI = 0; /* Because we constrain splices to given introns */ for (rL = 1, rR = rlength-1; rL < rlength; rL++, rR--) { debug3(printf("\nGenomic insert: At row %d on left and %d on right\n",rL,rR)); @@ -2257,7 +2264,7 @@ } #endif - if ((score = scoreL + scoreR) > bestscore || + if ((score = scoreL + scoreI + scoreR) > bestscore || (score >= bestscore && jump_late_p)) { /* Use >= for jump late */ bestp = false; if (watsonp == true) { @@ -2505,7 +2512,7 @@ scoreI = 0; #endif - if ((score = scoreL + scoreR) > bestscore) { + if ((score = scoreL + scoreI + scoreR) > bestscore) { debug3(printf("Best score: At %d left to %d right, score is (%d)+(%d)+(%d) = %d (bestscore, prob %f + %f)\n", cL,cR,scoreL,scoreI,scoreR,scoreL+scoreI+scoreR,probL,probR)); debug3(printf("probL %f, probR %f\n",left_probabilities[cL],right_probabilities[cR])); @@ -2534,7 +2541,7 @@ debug3a(printf("At %d left to %d right, scoreI is %d and prob is %f + %f = %f\n", cL,cR,scoreI,probL,probR,probL+probR)); if (probL + probR > bestprob_with_dinucl) { - bestscore_with_dinucl = scoreL + scoreR; + bestscore_with_dinucl = scoreL + scoreI + scoreR; bestcL_with_dinucl = cL; bestcR_with_dinucl = cR; bestrL_with_dinucl = rL; @@ -2569,7 +2576,7 @@ scoreI = 0; #endif - if ((score = scoreL + scoreR) > bestscore) { + if ((score = scoreL + scoreI + scoreR) > bestscore) { debug3(printf("Best score: At %d left to %d right, score is (%d)+(%d)+(%d) = %d (bestscore, prob %f + %f)\n", cL,cR,scoreL,scoreI,scoreR,scoreL+scoreI+scoreR,probL,probR)); debug3(printf("probL %f, probR %f\n",left_probabilities[cL],right_probabilities[cR])); @@ -2594,7 +2601,7 @@ } } - debug3(printf("C. Test indel on left\n")); + debug3(printf("C. Test indel on left (3)\n")); /* Test indel on left */ cR = rR; probR = right_probabilities[cR]; @@ -2619,7 +2626,7 @@ scoreI = 0; #endif - if ((score = scoreL + scoreR) > bestscore) { + if ((score = scoreL + scoreI + scoreR) > bestscore) { debug3(printf("Best score: At %d left to %d right, score is (%d)+(%d)+(%d) = %d (bestscore, prob %f + %f)\n", cL,cR,scoreL,scoreI,scoreR,scoreL+scoreI+scoreR,probL,probR)); debug3(printf("probL %f, probR %f\n",left_probabilities[cL],right_probabilities[cR])); @@ -3277,7 +3284,7 @@ #if defined(HAVE_SSE2) /* Use || because we want the minimum length (which determines the diagonal length) to achieve a score less than 128 */ - if (rlength <= SIMD_MAXLENGTH_EPI8 || (glengthL <= SIMD_MAXLENGTH_EPI8 && glengthR <= SIMD_MAXLENGTH_EPI8)) { + if (rlength < use8p_size[mismatchtype] || (glengthL < use8p_size[mismatchtype] && glengthR < use8p_size[mismatchtype])) { use8p = true; } else { use8p = false; diff -Nru gmap-2016-11-07/src/dynprog.h gmap-2017-01-14/src/dynprog.h --- gmap-2016-11-07/src/dynprog.h 2016-05-01 17:28:23.000000000 +0000 +++ gmap-2017-01-14/src/dynprog.h 2017-01-01 15:40:09.000000000 +0000 @@ -1,4 +1,4 @@ -/* $Id: dynprog.h 188752 2016-05-01 17:28:22Z twu $ */ +/* $Id: dynprog.h 202041 2017-01-01 15:40:08Z twu $ */ #ifndef DYNPROG_INCLUDED #define DYNPROG_INCLUDED #ifdef HAVE_CONFIG_H @@ -150,6 +150,7 @@ might be lower-case */ #define PREUC 1 +extern int use8p_size[NMISMATCHTYPES]; extern Pairdistance_T **pairdistance_array[NMISMATCHTYPES]; #ifndef HAVE_SSE4_1 extern Pairdistance_T **pairdistance_array_plus_128[NMISMATCHTYPES]; diff -Nru gmap-2016-11-07/src/dynprog_simd.c gmap-2017-01-14/src/dynprog_simd.c --- gmap-2016-11-07/src/dynprog_simd.c 2016-09-14 18:57:36.000000000 +0000 +++ gmap-2017-01-14/src/dynprog_simd.c 2017-01-01 15:43:25.000000000 +0000 @@ -1,4 +1,4 @@ -static char rcsid[] = "$Id: dynprog_simd.c 189207 2016-05-06 23:16:32Z twu $"; +static char rcsid[] = "$Id: dynprog_simd.c 202044 2017-01-01 15:43:24Z twu $"; #ifdef HAVE_CONFIG_H #include #endif @@ -216,7 +216,9 @@ int i, j; char g, g_alt; +#ifdef HAVE_SSE2 _mm_lfence(); +#endif /* j */ printf(" "); /* For i */ @@ -291,7 +293,9 @@ int i, j; char g, g_alt; +#ifdef HAVE_SSE2 _mm_lfence(); +#endif /* j */ printf(" "); /* For i */ @@ -380,7 +384,9 @@ int i, j; char g, g_alt; +#ifdef HAVE_SSE2 _mm_lfence(); +#endif /* j */ if (rlength >= 100) { @@ -516,7 +522,9 @@ int i, j; char g, g_alt; +#ifdef HAVE_SSE2 _mm_lfence(); +#endif /* j */ printf(" "); /* For i */ @@ -606,7 +614,9 @@ int i, j; char g, g_alt; +#ifdef HAVE_SSE2 _mm_lfence(); +#endif /* j */ printf(" "); /* For i */ @@ -700,7 +710,9 @@ int i, j; char g, g_alt; +#ifdef HAVE_SSE2 _mm_lfence(); +#endif /* j */ printf(" "); /* For i */ @@ -809,7 +821,9 @@ int i, j; char g, g_alt; +#ifdef HAVE_SSE2 _mm_lfence(); +#endif /* j */ printf(" "); /* For i */ @@ -903,7 +917,9 @@ int i, j; char g, g_alt; +#ifdef HAVE_SSE2 _mm_lfence(); +#endif /* j */ printf(" "); /* For i */ @@ -9086,11 +9102,11 @@ if ((dir = directions_nogap[c][r]) != DIAG) { /* Must be HORIZ */ dist = 1; - /* Should not need to check for c > 0 if the main diagonal is populated with DIAG */ - while (/* c > 0 && */ directions_Egap[c--][r] != DIAG) { + /* Should not need to check for c > r if the Egap diagonal above the main is populated with DIAG */ + while (/* c > r && */ directions_Egap[c--][r] != DIAG) { dist++; } - /* assert(c != 0); */ + assert(c >= r); debug(printf("H%d: ",dist)); pairs = Pairpool_add_genomeskip(&add_dashes_p,pairs,r,c+dist,dist,/*genomesequence*/NULL, @@ -9197,11 +9213,11 @@ if ((dir = directions_nogap[r][c]) != DIAG) { /* Must be VERT */ dist = 1; - /* Should not need to check for r > 0 if the main diagonal is populated with DIAG */ - while (/* r > 0 && */ directions_Egap[r--][c] != DIAG) { + /* Should not need to check for r > c if the Egap diagonal below the main is populated with DIAG */ + while (/* r > c && */ directions_Egap[r--][c] != DIAG) { dist++; } - /* assert(r != 0); */ + assert(r >= c); debug(printf("V%d: ",dist)); pairs = Pairpool_add_queryskip(pairs,r+dist,c,dist,rsequence, @@ -9236,8 +9252,8 @@ if (c2 == '*') { /* Don't push pairs past end of chromosome */ - debug(printf("Don't push pairs past end of chromosome: genomeoffset %u, genomecoord %u, chroffset %u, chrhigh %u, watsonp %d\n", - genomeoffset,genomecoord,chroffset,chrhigh,watsonp)); + debug(printf("Don't push pairs past end of chromosome: genomeoffset %u, genomecoord %u\n", + genomeoffset,genomecoord)); } else if (/*querysequenceuc[querycoord]*/c1_uc == c2 || c1_uc == c2_alt) { debug(printf("Pushing %d,%d [%d,%d] (%c,%c) - match\n", @@ -9461,11 +9477,11 @@ if ((dir = directions_nogap[c][r]) != DIAG) { /* Must be HORIZ */ dist = 1; - /* Should not need to check for c > 0 if the main diagonal is populated with DIAG */ - while (/* c > 0 && */ directions_Egap[c--][r] != DIAG) { + /* Should not need to check for c > r if the Egap diagonal above the main is populated with DIAG */ + while (/* c > r && */ directions_Egap[c--][r] != DIAG) { dist++; } - /* assert(c != 0); */ + assert(c >= r); debug(printf("H%d: ",dist)); pairs = Pairpool_add_genomeskip(&add_dashes_p,pairs,r,c+dist,dist,/*genomesequence*/NULL, @@ -9572,11 +9588,11 @@ if ((dir = directions_nogap[r][c]) != DIAG) { /* Must be VERT */ dist = 1; - /* Should not need to check for r > 0 if the main diagonal is populated with DIAG */ - while (/* r > 0 && */ directions_Egap[r--][c] != DIAG) { + /* Should not need to check for r > c if the Egap diagonal below the main is populated with DIAG */ + while (/* r > c && */ directions_Egap[r--][c] != DIAG) { dist++; } - /* assert(r != 0); */ + assert(r >= c); debug(printf("V%d: ",dist)); pairs = Pairpool_add_queryskip(pairs,r+dist,c,dist,rsequence, @@ -9611,8 +9627,8 @@ if (c2 == '*') { /* Don't push pairs past end of chromosome */ - debug(printf("Don't push pairs past end of chromosome: genomeoffset %u, genomecoord %u, chroffset %u, chrhigh %u, watsonp %d\n", - genomeoffset,genomecoord,chroffset,chrhigh,watsonp)); + debug(printf("Don't push pairs past end of chromosome: genomeoffset %u, genomecoord %u\n", + genomeoffset,genomecoord)); } else if (/*querysequenceuc[querycoord]*/c1_uc == c2 || c1_uc == c2_alt) { debug(printf("Pushing %d,%d [%d,%d] (%c,%c) - match\n", diff -Nru gmap-2016-11-07/src/dynprog_simd.h gmap-2017-01-14/src/dynprog_simd.h --- gmap-2016-11-07/src/dynprog_simd.h 2016-02-17 20:27:51.000000000 +0000 +++ gmap-2017-01-14/src/dynprog_simd.h 2017-01-01 15:32:14.000000000 +0000 @@ -3,7 +3,11 @@ #include "dynprog.h" +#if 0 +/* Now determined by mismatchtype: highq 41, medq 63, lowq 127, endq 24 */ #define SIMD_MAXLENGTH_EPI8 30 /* Previously had 40 = 128/3, but have seen 7-bit overflow empirically at matrices of size 30 */ +#endif + /* Define DEBUG_SIMD and DEBUG_AVX2 in dynprog.h */ diff -Nru gmap-2016-11-07/src/dynprog_single.c gmap-2017-01-14/src/dynprog_single.c --- gmap-2016-11-07/src/dynprog_single.c 2016-02-18 00:06:35.000000000 +0000 +++ gmap-2017-01-14/src/dynprog_single.c 2017-01-01 15:44:17.000000000 +0000 @@ -1,4 +1,4 @@ -static char rcsid[] = "$Id: dynprog_single.c 184458 2016-02-18 00:06:33Z twu $"; +static char rcsid[] = "$Id: dynprog_single.c 202046 2017-01-01 15:44:17Z twu $"; #ifdef HAVE_CONFIG_H #include #endif @@ -586,7 +586,7 @@ #if defined(HAVE_SSE2) /* Use || because we want the minimum length (which determines the diagonal length) to achieve a score less than 128 */ /* Use && because we don't want to overflow in either direction */ - if (rlength <= SIMD_MAXLENGTH_EPI8 && glength <= SIMD_MAXLENGTH_EPI8) { + if (rlength < use8p_size[mismatchtype] && glength < use8p_size[mismatchtype]) { matrix8 = Dynprog_simd_8(&directions8_nogap,&directions8_Egap,&directions8_Fgap,dynprog, rsequence,gsequence,gsequence_alt,rlength,glength, #if defined(DEBUG_AVX2) || defined(DEBUG_SIMD) diff -Nru gmap-2016-11-07/src/genome128_hr.c gmap-2017-01-14/src/genome128_hr.c --- gmap-2016-11-07/src/genome128_hr.c 2016-05-05 16:22:49.000000000 +0000 +++ gmap-2017-01-14/src/genome128_hr.c 2017-01-10 22:12:31.000000000 +0000 @@ -1,4 +1,4 @@ -static char rcsid[] = "$Id: genome128_hr.c 184459 2016-02-18 00:06:56Z twu $"; +static char rcsid[] = "$Id: genome128_hr.c 202263 2017-01-10 22:12:31Z twu $"; #ifdef HAVE_CONFIG_H #include #endif @@ -30,6 +30,44 @@ #include "littleendian.h" #endif + +/* Consecutive_matches_rightward and leftward */ +/* Slower with shift and wrap, perhaps because we need to extract integers from the SIMD object */ +/* #define USE_SHIFT_FIRST_MISMATCH 1 */ +/* #define USE_WRAP_FIRST_MISMATCH 1 */ + +/* Genome_mismatches_right and left */ +/* Slower with shift and wrap, probably because we need to loop over the SIMD object */ +/* #define USE_SHIFT_MISMATCH_POSITIONS 1 */ +/* #define USE_WRAP_MISMATCH_POSITIONS 1 */ + +/* Genome_count_mismatches_substring */ +/* Faster with shift and wrap. Does not involve any loops. */ +#define USE_SHIFT_POPCOUNT 1 +#define USE_WRAP_POPCOUNT 1 + +/* Genome_mismatches_right_trim and left_trim */ +/* Slower with shift and wrap */ +/* #define USE_SHIFT_TRIM 1 */ +/* #define USE_WRAP_TRIM 1 */ + + +/* Faster to use a straight shift, and _mm_bsrli_si128 is not defined in gcc 4.7 */ +/* #define USE_SHIFT_HILO 1 */ + + +#ifdef HAVE_SSE2 +#define QUERY_NEXTCOL 1 /* high0, high1, high2, high3 */ +#define QUERY_NEXTROW 8 +#else +#define QUERY_NEXTCOL 3 /* high, low, flags */ +/* #define QUERY_NEXTROW 0 */ +#endif + +#define GENOME_NEXTCOL 1 +#define GENOME_NEXTROW 8 + + #ifdef WORDS_BIGENDIAN /* Do not use SIMD */ #elif defined(HAVE_SSE2) @@ -38,6 +76,13 @@ #ifdef HAVE_SSE4_1 #include #endif +#ifdef HAVE_AVX2 +#include +#endif +#ifdef HAVE_AVX512 +#include +#endif + #if !defined(HAVE_SSE4_2) /* Skip popcnt, which comes after SSE4.2 */ #elif defined(HAVE_POPCNT) @@ -16512,7 +16557,8 @@ _mm_extract_epi32(x,0),_mm_extract_epi32(x,1),_mm_extract_epi32(x,2),_mm_extract_epi32(x,3)); return; } -#else + +#elif defined(HAVE_SSE2) static void print_vector_hex (__m128i x) { printf("%08X %08X %08X %08X\n", @@ -16533,6 +16579,49 @@ return; } #endif + +#ifdef HAVE_AVX2 +static void +print_vector_256_hex (__m256i x) { + printf("%08X %08X %08X %08X %08X %08X %08X %08X\n", + _mm256_extract_epi32(x,0),_mm256_extract_epi32(x,1),_mm256_extract_epi32(x,2),_mm256_extract_epi32(x,3), + _mm256_extract_epi32(x,4),_mm256_extract_epi32(x,5),_mm256_extract_epi32(x,6),_mm256_extract_epi32(x,7)); + return; +} + +static void +print_vector_256_dec (__m256i x) { + printf("%u %u %u %u %u %u %u %u\n", + _mm256_extract_epi32(x,0),_mm256_extract_epi32(x,1),_mm256_extract_epi32(x,2),_mm256_extract_epi32(x,3), + _mm256_extract_epi32(x,4),_mm256_extract_epi32(x,5),_mm256_extract_epi32(x,6),_mm256_extract_epi32(x,7)); + return; +} +#endif + +#ifdef HAVE_AVX512 +static void +print_vector_512_hex (__m512i x) { + unsigned int array[16]; + + _mm512_store_si512((__m512i *) array,x); + printf("%08X %08X %08X %08X %08X %08X %08X %08X %08X %08X %08X %08X %08X %08X %08X %08X\n", + array[0],array[1],array[2],array[3],array[4],array[5],array[6],array[7], + array[8],array[9],array[10],array[11],array[12],array[13],array[14],array[15]); + return; +} + +static void +print_vector_512_dec (__m512i x) { + unsigned int array[16]; + + _mm512_store_si512((__m512i *) array,x); + printf("%u %u %u %u %u %u %u %u %u %u %u %u %u %u %u %u\n", + array[0],array[1],array[2],array[3],array[4],array[5],array[6],array[7], + array[8],array[9],array[10],array[11],array[12],array[13],array[14],array[15]); + return; +} +#endif + #endif @@ -16636,14 +16725,14 @@ write_chars(high,low,flags); printf("\n"); - ptr += 1; if (++startcolumni == 4) {ptr += 8; startcolumni = 0;} + ptr += GENOME_NEXTCOL; if (++startcolumni == 4) {ptr += GENOME_NEXTROW; startcolumni = 0;} #elif !defined(HAVE_SSE2) high = ptr[0]; low = ptr[4]; flags = ptr[8]; printf("high: %08X low: %08X flags: %08X\t",high,low,flags); write_chars(high,low,flags); printf("\n"); - ptr += 1; if (++startcolumni == 4) {ptr += 8; startcolumni = 0;} + ptr += GENOME_NEXTCOL; if (++startcolumni == 4) {ptr += GENOME_NEXTROW; startcolumni = 0;} #else if (startcolumni == 0) { @@ -16808,7 +16897,7 @@ #if defined(WORDS_BIGENDIAN) || !defined(HAVE_SSE2) high = ref_ptr[0]; low = ref_ptr[4]; flags = ref_ptr[8]; snpmask = snp_ptr[8]; printf("high: %08X low: %08X flags: %08X snpmask: %08X\n",high,low,flags,snpmask); - ref_ptr += 1; snp_ptr += 1; if (++startcolumni == 4) {ref_ptr += 8; snp_ptr += 8; startcolumni = 0;} + ref_ptr += GENOME_NEXTCOL; snp_ptr += GENOME_NEXTCOL; if (++startcolumni == 4) {ref_ptr += GENOME_NEXTROW; snp_ptr += GENOME_NEXTROW; startcolumni = 0;} #else high = ref_ptr[0]; low = ref_ptr[4]; flags = ref_ptr[8]; snpmask = snp_ptr[8]; printf("high: %08X low: %08X flags: %08X snpmask: %08X\n",high,low,flags,snpmask); @@ -16834,6 +16923,201 @@ static Genomecomp_T *ref_blocks; static Genomecomp_T *snp_blocks; +#if defined(USE_SHIFT_HILO) && defined(HAVE_SSE2) +static inline void +read_128_shift_lo (__m128i *__restrict__ high, __m128i *__restrict__ low, __m128i *__restrict__ flags, UINT4 *__restrict__ ptr, + int startcolumni) { + __m128i a, b, c; + + ptr -= startcolumni; + a = _mm_load_si128((__m128i *) ptr); ptr += 4; + b = _mm_load_si128((__m128i *) ptr); ptr += 4; + c = _mm_load_si128((__m128i *) ptr); ptr += 4; + + switch (startcolumni) { + case 0: + *high = _mm_bsrli_si128(a, 0); + *low = _mm_bsrli_si128(b, 0); + *flags = _mm_bsrli_si128(c, 0); + break; + case 1: + *high = _mm_bsrli_si128(a, 4); + *low = _mm_bsrli_si128(b, 4); + *flags = _mm_bsrli_si128(c, 4); + break; + case 2: + *high = _mm_bsrli_si128(a, 8); + *low = _mm_bsrli_si128(b, 8); + *flags = _mm_bsrli_si128(c, 8); + break; + default: + *high = _mm_bsrli_si128(a, 12); + *low = _mm_bsrli_si128(b, 12); + *flags = _mm_bsrli_si128(c, 12); + break; + } + + return; +} + +static inline void +read_128_shift_hi (__m128i *__restrict__ high, __m128i *__restrict__ low, __m128i *__restrict__ flags, UINT4 *__restrict__ ptr, + int endcolumni) { + __m128i a, b, c; + + ptr -= endcolumni; + a = _mm_load_si128((__m128i *) ptr); ptr += 4; + b = _mm_load_si128((__m128i *) ptr); ptr += 4; + c = _mm_load_si128((__m128i *) ptr); ptr += 4; + + switch (endcolumni) { + case 0: + *high = _mm_bslli_si128(a, 12); + *low = _mm_bslli_si128(b, 12); + *flags = _mm_bslli_si128(c, 12); + break; + case 1: + *high = _mm_bslli_si128(a, 8); + *low = _mm_bslli_si128(b, 8); + *flags = _mm_bslli_si128(c, 8); + break; + case 2: + *high = _mm_bslli_si128(a, 4); + *low = _mm_bslli_si128(b, 4); + *flags = _mm_bslli_si128(c, 4); + break; + default: + *high = _mm_bslli_si128(a, 0); + *low = _mm_bslli_si128(b, 0); + *flags = _mm_bslli_si128(c, 0); + break; + } + + return; +} +#endif + + +#ifdef HAVE_SSSE3 +static inline void +read_128_wrap_lo (__m128i *__restrict__ high, __m128i *__restrict__ low, __m128i *__restrict__ flags, UINT4 *__restrict__ ptr, + int startcolumni) { + __m128i a, b, c, d, e, f; + + ptr -= startcolumni; + a = _mm_load_si128((__m128i *) ptr); ptr += 4; + b = _mm_load_si128((__m128i *) ptr); ptr += 4; + c = _mm_load_si128((__m128i *) ptr); ptr += 4; + d = _mm_load_si128((__m128i *) ptr); ptr += 4; + e = _mm_load_si128((__m128i *) ptr); ptr += 4; + f = _mm_load_si128((__m128i *) ptr); + + switch (startcolumni) { + case 0: + *high = _mm_alignr_epi8(d, a, 0); + *low = _mm_alignr_epi8(e, b, 0); + *flags = _mm_alignr_epi8(f, c, 0); + break; + case 1: + *high = _mm_alignr_epi8(d, a, 4); + *low = _mm_alignr_epi8(e, b, 4); + *flags = _mm_alignr_epi8(f, c, 4); + break; + case 2: + *high = _mm_alignr_epi8(d, a, 8); + *low = _mm_alignr_epi8(e, b, 8); + *flags = _mm_alignr_epi8(f, c, 8); + break; + default: + *high = _mm_alignr_epi8(d, a, 12); + *low = _mm_alignr_epi8(e, b, 12); + *flags = _mm_alignr_epi8(f, c, 12); + break; + } + + return; +} + +static inline void +read_128_wrap_hi (__m128i *__restrict__ high, __m128i *__restrict__ low, __m128i *__restrict__ flags, UINT4 *__restrict__ ptr, + int endcolumni) { + __m128i a, b, c, d, e, f; + + ptr -= endcolumni; + ptr -= 12; + a = _mm_load_si128((__m128i *) ptr); ptr += 4; + b = _mm_load_si128((__m128i *) ptr); ptr += 4; + c = _mm_load_si128((__m128i *) ptr); ptr += 4; + d = _mm_load_si128((__m128i *) ptr); ptr += 4; + e = _mm_load_si128((__m128i *) ptr); ptr += 4; + f = _mm_load_si128((__m128i *) ptr); + + switch (endcolumni) { + case 0: + *high = _mm_alignr_epi8(d, a, 4); + *low = _mm_alignr_epi8(e, b, 4); + *flags = _mm_alignr_epi8(f, c, 4); + break; + case 1: + *high = _mm_alignr_epi8(d, a, 8); + *low = _mm_alignr_epi8(e, b, 8); + *flags = _mm_alignr_epi8(f, c, 8); + break; + case 2: + *high = _mm_alignr_epi8(d, a, 12); + *low = _mm_alignr_epi8(e, b, 12); + *flags = _mm_alignr_epi8(f, c, 12); + break; + default: + *high = _mm_alignr_epi8(d, a, 16); + *low = _mm_alignr_epi8(e, b, 16); + *flags = _mm_alignr_epi8(f, c, 16); + break; + } + + return; +} +#endif + + +#ifdef HAVE_AVX2 +static inline void +read_256 (__m256i *__restrict__ high, __m256i *__restrict__ low, __m256i *__restrict__ flags, UINT4 *__restrict__ ptr) { + __m256i a, b, c; + a = _mm256_loadu_si256((__m256i *) ptr); /* query0_high, query0_low */ + b = _mm256_loadu_si256((__m256i *) &(ptr[8])); /* query0_flags, query1_high */ + c = _mm256_loadu_si256((__m256i *) &(ptr[16])); /* query1_low, query1_flags */ + + *high = _mm256_permute2x128_si256(a, b, 0x30); + *low = _mm256_permute2x128_si256(a, c, 0x21); + *flags = _mm256_permute2x128_si256(b, c, 0x30); + + return; +} +#endif + +#ifdef HAVE_AVX512 +static inline void +read_512 (__m512i *__restrict__ high, __m512i *__restrict__ low, __m512i *__restrict__ flags, UINT4 *__restrict__ ptr) { + __m512i a, b, c, d, e, f; + a = _mm512_loadu_si512((__m512i *) ptr); /* query0_high, query0_low, query0_flags, query1_high */ + b = _mm512_loadu_si512((__m512i *) &(ptr[16])); /* query1_low, query1_flags, query2_high, query2_low */ + c = _mm512_loadu_si512((__m512i *) &(ptr[32])); /* query2_flags, query3_high, query3_low, query3_flags */ + + d = _mm512_permutex2var_epi32(a, _mm512_setr_epi32(0, 1, 2, 3, 12, 13, 14, 15, + 4, 5, 6, 7, 16+0, 16+1, 16+2, 16+3), b); + e = _mm512_permutex2var_epi32(b, _mm512_setr_epi32(8, 9, 10, 11, 16+4, 16+5, 16+6, 16+7, + 12, 13, 14, 15, 16+8, 16+9, 16+10, 16+11), c); + f = _mm512_permutex2var_epi32(a, _mm512_setr_epi32(8, 9, 10, 11, 16+4, 16+5, 16+6, 16+7, + 12, 13, 14, 15, 16+8, 16+9, 16+10, 16+11), b); + + *high = _mm512_permutex2var_epi64(d, _mm512_setr_epi64(0, 1, 2, 3, 8+0, 8+1, 8+2, 8+3), e); + *low = _mm512_permutex2var_epi64(d, _mm512_setr_epi64(4, 5, 6, 7, 8+4, 8+5, 8+6, 8+7), e); + *flags = _mm512_permutex2var_epi64(f, _mm512_setr_epi64(0, 1, 2, 3, 8+0, 8+1, 8+6, 8+7), c); + + return; +} +#endif /* These are global values, used for alignment. Previously for @@ -16844,10 +17128,9 @@ static bool genome_unk_mismatch_p = true; #if defined(WORDS_BIGENDIAN) || !defined(HAVE_SSE2) -typedef UINT4 Genomediff_T; #define STEP_SIZE 32 #else -typedef __m128i Genomediff_T; +/* Holds for SSE2, AVX2, and AVX512 */ #define STEP_SIZE 128 #endif @@ -16913,69 +17196,135 @@ } -static Genomediff_T -block_diff_standard (Genomecomp_T *query_shifted, Genomecomp_T *ref_ptr, - bool plusp, int genestrand, bool query_unk_mismatch_local_p) { -#if defined(WORDS_BIGENDIAN) || !defined(HAVE_SSE2) - UINT4 diff; +#ifdef HAVE_SSE2 +static __m128i +block_diff_standard_128 (Genomecomp_T *query_shifted, Genomecomp_T *ref_ptr, + bool plusp, int genestrand, bool query_unk_mismatch_local_p) { + __m128i _diff, _query_high, _query_low, _query_flags, _ref_high, _ref_low, _ref_flags; - debug(printf("Comparing high: query %08X with genome %08X ",query_shifted[0],ref_ptr[0])); - debug(printf("Comparing low: query %08X with genome %08X ",query_shifted[1],ref_ptr[4])); + _query_high = _mm_load_si128((__m128i *) query_shifted); + _query_low = _mm_load_si128((__m128i *) &(query_shifted[4])); + _ref_high = _mm_load_si128((__m128i *) ref_ptr); + _ref_low = _mm_load_si128((__m128i *) &(ref_ptr[4])); -#ifdef WORDS_BIGENDIAN - diff = (query_shifted[0] ^ Bigendian_convert_uint(ref_ptr[0])) | (query_shifted[1] ^ Bigendian_convert_uint(ref_ptr[4])); -#else - diff = (query_shifted[0] ^ ref_ptr[0]) | (query_shifted[1] ^ ref_ptr[4]); -#endif + _diff = _mm_or_si128(_mm_xor_si128(_query_high, _ref_high), _mm_xor_si128(_query_low, _ref_low)); - /* Query Ns */ + _query_flags = _mm_load_si128((__m128i *) &(query_shifted[8])); if (query_unk_mismatch_local_p) { - /* Query: Considering N as a mismatch */ - diff |= query_shifted[2]; + _diff = _mm_or_si128(_query_flags, _diff); } else { - /* Query: Considering N as a wildcard */ - diff &= ~(query_shifted[2]); + _diff = _mm_andnot_si128(_query_flags, _diff); } - /* Genome Ns */ + _ref_flags = _mm_load_si128((__m128i *) &(ref_ptr[8])); if (genome_unk_mismatch_p) { - /* Genome: Considering N as a mismatch */ -#ifdef WORDS_BIGENDIAN - diff |= Bigendian_convert_uint(ref_ptr[8]); -#else - diff |= ref_ptr[8]; -#endif + _diff = _mm_or_si128(_ref_flags, _diff); } else { - /* Genome: Considering N as a wildcard */ -#ifdef WORDS_BIGENDIAN - diff &= ~(Bigendian_convert_uint(ref_ptr[8])); -#else - diff &= ~(ref_ptr[8]); + _diff = _mm_andnot_si128(_ref_flags, _diff); + } + + return _diff; +} + +#ifdef USE_SHIFT_HILO +static __m128i +block_diff_standard_128_shift_lo (Genomecomp_T *query_shifted, Genomecomp_T *ref_ptr, + bool plusp, int genestrand, bool query_unk_mismatch_local_p, + int startcolumni) { + __m128i _diff, _query_high, _query_low, _query_flags, _ref_high, _ref_low, _ref_flags; + + read_128_shift_lo(&_query_high,&_query_low,&_query_flags,query_shifted,startcolumni); + read_128_shift_lo(&_ref_high,&_ref_low,&_ref_flags,ref_ptr,startcolumni); + + _diff = _mm_or_si128(_mm_xor_si128(_query_high, _ref_high), _mm_xor_si128(_query_low, _ref_low)); + + if (query_unk_mismatch_local_p) { + _diff = _mm_or_si128(_query_flags, _diff); + } else { + _diff = _mm_andnot_si128(_query_flags, _diff); + } + + if (genome_unk_mismatch_p) { + _diff = _mm_or_si128(_ref_flags, _diff); + } else { + _diff = _mm_andnot_si128(_ref_flags, _diff); + } + + return _diff; +} + +static __m128i +block_diff_standard_128_shift_hi (Genomecomp_T *query_shifted, Genomecomp_T *ref_ptr, + bool plusp, int genestrand, bool query_unk_mismatch_local_p, + int endcolumni) { + __m128i _diff, _query_high, _query_low, _query_flags, _ref_high, _ref_low, _ref_flags; + + read_128_shift_hi(&_query_high,&_query_low,&_query_flags,query_shifted,endcolumni); + read_128_shift_hi(&_ref_high,&_ref_low,&_ref_flags,ref_ptr,endcolumni); + + _diff = _mm_or_si128(_mm_xor_si128(_query_high, _ref_high), _mm_xor_si128(_query_low, _ref_low)); + + if (query_unk_mismatch_local_p) { + _diff = _mm_or_si128(_query_flags, _diff); + } else { + _diff = _mm_andnot_si128(_query_flags, _diff); + } + + if (genome_unk_mismatch_p) { + _diff = _mm_or_si128(_ref_flags, _diff); + } else { + _diff = _mm_andnot_si128(_ref_flags, _diff); + } + + return _diff; +} +#endif #endif + +#ifdef HAVE_SSSE3 +static __m128i +block_diff_standard_128_wrap_lo (Genomecomp_T *query_shifted, Genomecomp_T *ref_ptr, + bool plusp, int genestrand, bool query_unk_mismatch_local_p, + int startcolumni) { + __m128i _diff, _query_high, _query_low, _query_flags, _ref_high, _ref_low, _ref_flags; + + read_128_wrap_lo(&_query_high,&_query_low,&_query_flags,query_shifted,startcolumni); + read_128_wrap_lo(&_ref_high,&_ref_low,&_ref_flags,ref_ptr,startcolumni); + + _diff = _mm_or_si128(_mm_xor_si128(_query_high, _ref_high), _mm_xor_si128(_query_low, _ref_low)); + + if (query_unk_mismatch_local_p) { + _diff = _mm_or_si128(_query_flags, _diff); + } else { + _diff = _mm_andnot_si128(_query_flags, _diff); } - debug(printf(" => diff %08X\n",diff)); + if (genome_unk_mismatch_p) { + _diff = _mm_or_si128(_ref_flags, _diff); + } else { + _diff = _mm_andnot_si128(_ref_flags, _diff); + } - return diff; + return _diff; +} -#else +static __m128i +block_diff_standard_128_wrap_hi (Genomecomp_T *query_shifted, Genomecomp_T *ref_ptr, + bool plusp, int genestrand, bool query_unk_mismatch_local_p, + int endcolumni) { __m128i _diff, _query_high, _query_low, _query_flags, _ref_high, _ref_low, _ref_flags; - _query_high = _mm_load_si128((__m128i *) query_shifted); - _ref_high = _mm_load_si128((__m128i *) ref_ptr); - _query_low = _mm_load_si128((__m128i *) &(query_shifted[4])); - _ref_low = _mm_load_si128((__m128i *) &(ref_ptr[4])); + read_128_wrap_hi(&_query_high,&_query_low,&_query_flags,query_shifted,endcolumni); + read_128_wrap_hi(&_ref_high,&_ref_low,&_ref_flags,ref_ptr,endcolumni); _diff = _mm_or_si128(_mm_xor_si128(_query_high, _ref_high), _mm_xor_si128(_query_low, _ref_low)); - _query_flags = _mm_load_si128((__m128i *) &(query_shifted[8])); if (query_unk_mismatch_local_p) { _diff = _mm_or_si128(_query_flags, _diff); } else { _diff = _mm_andnot_si128(_query_flags, _diff); } - _ref_flags = _mm_load_si128((__m128i *) &(ref_ptr[8])); if (genome_unk_mismatch_p) { _diff = _mm_or_si128(_ref_flags, _diff); } else { @@ -16983,9 +17332,62 @@ } return _diff; +} #endif + +#ifdef HAVE_AVX2 +static __m256i +block_diff_standard_256 (Genomecomp_T *query_shifted, Genomecomp_T *ref_ptr, + bool plusp, int genestrand, bool query_unk_mismatch_local_p) { + __m256i _diff, _query_high, _query_low, _query_flags, _ref_high, _ref_low, _ref_flags; + + read_256(&_query_high,&_query_low,&_query_flags,query_shifted); + read_256(&_ref_high,&_ref_low,&_ref_flags,ref_ptr); + + _diff = _mm256_or_si256(_mm256_xor_si256(_query_high, _ref_high), _mm256_xor_si256(_query_low, _ref_low)); + + if (query_unk_mismatch_local_p) { + _diff = _mm256_or_si256(_query_flags, _diff); + } else { + _diff = _mm256_andnot_si256(_query_flags, _diff); + } + + if (genome_unk_mismatch_p) { + _diff = _mm256_or_si256(_ref_flags, _diff); + } else { + _diff = _mm256_andnot_si256(_ref_flags, _diff); + } + + return _diff; } +#endif + +#ifdef HAVE_AVX512 +static __m512i +block_diff_standard_512 (Genomecomp_T *query_shifted, Genomecomp_T *ref_ptr, + bool plusp, int genestrand, bool query_unk_mismatch_local_p) { + __m512i _diff, _query_high, _query_low, _query_flags, _ref_high, _ref_low, _ref_flags; + + read_512(&_query_high,&_query_low,&_query_flags,query_shifted); + read_512(&_ref_high,&_ref_low,&_ref_flags,ref_ptr); + + _diff = _mm512_or_si512(_mm512_xor_si512(_query_high, _ref_high), _mm512_xor_si512(_query_low, _ref_low)); + + if (query_unk_mismatch_local_p) { + _diff = _mm512_or_si512(_query_flags, _diff); + } else { + _diff = _mm512_andnot_si512(_query_flags, _diff); + } + if (genome_unk_mismatch_p) { + _diff = _mm512_or_si512(_ref_flags, _diff); + } else { + _diff = _mm512_andnot_si512(_ref_flags, _diff); + } + + return _diff; +} +#endif static UINT4 @@ -17082,101 +17484,190 @@ /* not wildcard if ref != alt || ref_flag == 1 || alt_flag == 0 */ /* diffs are (query ^ ref) & (query ^ alt) & ~wildcard */ /* snp_ptr here is alt_ptr */ -static Genomediff_T -block_diff_standard_wildcard (Genomecomp_T *query_shifted, Genomecomp_T *snp_ptr, Genomecomp_T *ref_ptr, - bool plusp, int genestrand, bool query_unk_mismatch_local_p) { +#ifdef HAVE_SSE2 +static __m128i +block_diff_standard_wildcard_128 (Genomecomp_T *query_shifted, Genomecomp_T *snp_ptr, Genomecomp_T *ref_ptr, + bool plusp, int genestrand, bool query_unk_mismatch_local_p) { + __m128i _diff, _wildcard, _query_high, _query_low, _query_flags, + _ref_high, _ref_low, _ref_flags, _snp_high, _snp_low, _snp_flags; -#if defined(WORDS_BIGENDIAN) || !defined(HAVE_SSE2) - UINT4 diff, non_wildcard; + _query_high = _mm_load_si128((__m128i *) query_shifted); + _query_low = _mm_load_si128((__m128i *) &(query_shifted[4])); + _ref_high = _mm_load_si128((__m128i *) ref_ptr); + _ref_low = _mm_load_si128((__m128i *) &(ref_ptr[4])); - /* Taken from block_diff_standard */ -#ifdef WORDS_BIGENDIAN - diff = (query_shifted[0] ^ Bigendian_convert_uint(ref_ptr[0])) | (query_shifted[1] ^ Bigendian_convert_uint(ref_ptr[4])); -#else - diff = (query_shifted[0] ^ ref_ptr[0]) | (query_shifted[1] ^ ref_ptr[4]); -#endif + _diff = _mm_or_si128(_mm_xor_si128(_query_high, _ref_high), _mm_xor_si128(_query_low, _ref_low)); - /* Query Ns */ + _query_flags = _mm_load_si128((__m128i *) &(query_shifted[8])); if (query_unk_mismatch_local_p) { - /* Query: Considering N as a mismatch */ - diff |= query_shifted[2]; + _diff = _mm_or_si128(_query_flags, _diff); } else { - /* Query: Considering N as a wildcard */ - diff &= ~(query_shifted[2]); + _diff = _mm_andnot_si128(_query_flags, _diff); } - /* Genome Ns */ + _ref_flags = _mm_load_si128((__m128i *) &(ref_ptr[8])); if (genome_unk_mismatch_p) { - /* Genome: Considering N as a mismatch */ -#ifdef WORDS_BIGENDIAN - diff |= Bigendian_convert_uint(ref_ptr[8]); -#else - diff |= ref_ptr[8]; -#endif + _diff = _mm_or_si128(_ref_flags, _diff); } else { - /* Genome: Considering N as a wildcard */ -#ifdef WORDS_BIGENDIAN - diff &= ~(Bigendian_convert_uint(ref_ptr[8])); -#else - diff &= ~(ref_ptr[8]); -#endif + _diff = _mm_andnot_si128(_ref_flags, _diff); } + /* End of (query ^ ref) */ + + + /* Add (query ^ snp). Don't need to recompute query flags or use SNP flags. */ + _snp_high = _mm_load_si128((__m128i *) snp_ptr); + _snp_low = _mm_load_si128((__m128i *) &(snp_ptr[4])); + + _diff = _mm_and_si128(_diff, _mm_or_si128(_mm_xor_si128(_query_high, _snp_high), _mm_xor_si128(_query_low, _snp_low))); - /* Add difference relative to SNP */ -#ifdef WORDS_BIGENDIAN - diff &= (query_shifted[0] ^ Bigendian_convert_uint(snp_ptr[0])) | (query_shifted[1] ^ Bigendian_convert_uint(snp_ptr[4])); -#else - diff &= (query_shifted[0] ^ snp_ptr[0]) | (query_shifted[1] ^ snp_ptr[4]); -#endif /* Test for equality of ref and alt */ - debug(printf("Equality high: ref genome %08X with alt genome %08X ",ref_ptr[0],snp_ptr[0])); -#ifdef WORDS_BIGENDIAN - non_wildcard = (Bigendian_convert_uint(ref_ptr[0]) ^ Bigendian_convert_uint(snp_ptr[0])) | - (Bigendian_convert_uint(ref_ptr[4]) ^ Bigendian_convert_uint(snp_ptr[4])); -#else - non_wildcard = (ref_ptr[0] ^ snp_ptr[0]) | (ref_ptr[4] ^ snp_ptr[4]); + _snp_flags = _mm_load_si128((__m128i *) &(snp_ptr[8])); + _wildcard = _mm_andnot_si128(_ref_flags, _snp_flags); + _wildcard = _mm_andnot_si128(_mm_or_si128(_mm_xor_si128(_ref_high, _snp_high), _mm_xor_si128(_ref_low, _snp_low)), _wildcard); + + _diff = _mm_andnot_si128(_wildcard, _diff); + + return _diff; +} + +#ifdef USE_SHIFT_HILO +static __m128i +block_diff_standard_wildcard_128_shift_lo (Genomecomp_T *query_shifted, Genomecomp_T *snp_ptr, Genomecomp_T *ref_ptr, + bool plusp, int genestrand, bool query_unk_mismatch_local_p, + int startcolumni) { + __m128i _diff, _wildcard, _query_high, _query_low, _query_flags, + _ref_high, _ref_low, _ref_flags, _snp_high, _snp_low, _snp_flags; + + read_128_shift_lo(&_query_high,&_query_low,&_query_flags,query_shifted,startcolumni); + read_128_shift_lo(&_ref_high,&_ref_low,&_ref_flags,ref_ptr,startcolumni); + read_128_shift_lo(&_snp_high,&_snp_low,&_snp_flags,snp_ptr,startcolumni); + + _diff = _mm_or_si128(_mm_xor_si128(_query_high, _ref_high), _mm_xor_si128(_query_low, _ref_low)); + + if (query_unk_mismatch_local_p) { + _diff = _mm_or_si128(_query_flags, _diff); + } else { + _diff = _mm_andnot_si128(_query_flags, _diff); + } + + if (genome_unk_mismatch_p) { + _diff = _mm_or_si128(_ref_flags, _diff); + } else { + _diff = _mm_andnot_si128(_ref_flags, _diff); + } + /* End of (query ^ ref) */ + + /* Add (query ^ snp). Don't need to recompute query flags or use SNP flags. */ + _diff = _mm_and_si128(_diff, _mm_or_si128(_mm_xor_si128(_query_high, _snp_high), _mm_xor_si128(_query_low, _snp_low))); + + /* Test for equality of ref and alt */ + _wildcard = _mm_andnot_si128(_ref_flags, _snp_flags); + _wildcard = _mm_andnot_si128(_mm_or_si128(_mm_xor_si128(_ref_high, _snp_high), _mm_xor_si128(_ref_low, _snp_low)), _wildcard); + + _diff = _mm_andnot_si128(_wildcard, _diff); + + return _diff; +} + +static __m128i +block_diff_standard_wildcard_128_shift_hi (Genomecomp_T *query_shifted, Genomecomp_T *snp_ptr, Genomecomp_T *ref_ptr, + bool plusp, int genestrand, bool query_unk_mismatch_local_p, + int endcolumni) { + __m128i _diff, _wildcard, _query_high, _query_low, _query_flags, + _ref_high, _ref_low, _ref_flags, _snp_high, _snp_low, _snp_flags; + + read_128_shift_hi(&_query_high,&_query_low,&_query_flags,query_shifted,endcolumni); + read_128_shift_hi(&_ref_high,&_ref_low,&_ref_flags,ref_ptr,endcolumni); + read_128_shift_hi(&_snp_high,&_snp_low,&_snp_flags,snp_ptr,endcolumni); + + _diff = _mm_or_si128(_mm_xor_si128(_query_high, _ref_high), _mm_xor_si128(_query_low, _ref_low)); + + if (query_unk_mismatch_local_p) { + _diff = _mm_or_si128(_query_flags, _diff); + } else { + _diff = _mm_andnot_si128(_query_flags, _diff); + } + + if (genome_unk_mismatch_p) { + _diff = _mm_or_si128(_ref_flags, _diff); + } else { + _diff = _mm_andnot_si128(_ref_flags, _diff); + } + /* End of (query ^ ref) */ + + /* Add (query ^ snp). Don't need to recompute query flags or use SNP flags. */ + _diff = _mm_and_si128(_diff, _mm_or_si128(_mm_xor_si128(_query_high, _snp_high), _mm_xor_si128(_query_low, _snp_low))); + + /* Test for equality of ref and alt */ + _wildcard = _mm_andnot_si128(_ref_flags, _snp_flags); + _wildcard = _mm_andnot_si128(_mm_or_si128(_mm_xor_si128(_ref_high, _snp_high), _mm_xor_si128(_ref_low, _snp_low)), _wildcard); + + _diff = _mm_andnot_si128(_wildcard, _diff); + + return _diff; +} #endif - debug(printf(" => diff %08X\n",non_wildcard)); - - /* Ref flags */ - debug(printf("Wildcard add ref flags: ref genome %08X and alt genome %08X ",ref_ptr[8],snp_ptr[8])); -#ifdef WORDS_BIGENDIAN - non_wildcard |= Bigendian_convert_uint(ref_ptr[8]); -#else - non_wildcard |= ref_ptr[8]; #endif - /* Alt flags */ - debug(printf("Wildcard add alt flags: ref genome %08X and alt genome %08X ",ref_ptr[8],snp_ptr[8])); -#ifdef WORDS_BIGENDIAN - non_wildcard |= ~(Bigendian_convert_uint(snp_ptr[8])); -#else - non_wildcard |= ~(snp_ptr[8]); -#endif - debug(printf(" => non_wildcard %08X\n",non_wildcard)); +#ifdef HAVE_SSSE3 +static __m128i +block_diff_standard_wildcard_128_wrap_lo (Genomecomp_T *query_shifted, Genomecomp_T *snp_ptr, Genomecomp_T *ref_ptr, + bool plusp, int genestrand, bool query_unk_mismatch_local_p, int startcolumni) { + __m128i _diff, _wildcard, _query_high, _query_low, _query_flags, + _ref_high, _ref_low, _ref_flags, _snp_high, _snp_low, _snp_flags; - return diff & non_wildcard; + read_128_wrap_lo(&_query_high,&_query_low,&_query_flags,query_shifted,startcolumni); + read_128_wrap_lo(&_ref_high,&_ref_low,&_ref_flags,ref_ptr,startcolumni); + read_128_wrap_lo(&_snp_high,&_snp_low,&_snp_flags,snp_ptr,startcolumni); -#else + _diff = _mm_or_si128(_mm_xor_si128(_query_high, _ref_high), _mm_xor_si128(_query_low, _ref_low)); + + if (query_unk_mismatch_local_p) { + _diff = _mm_or_si128(_query_flags, _diff); + } else { + _diff = _mm_andnot_si128(_query_flags, _diff); + } + + if (genome_unk_mismatch_p) { + _diff = _mm_or_si128(_ref_flags, _diff); + } else { + _diff = _mm_andnot_si128(_ref_flags, _diff); + } + /* End of (query ^ ref) */ + + + /* Add (query ^ snp). Don't need to recompute query flags or use SNP flags. */ + _diff = _mm_and_si128(_diff, _mm_or_si128(_mm_xor_si128(_query_high, _snp_high), _mm_xor_si128(_query_low, _snp_low))); + + + /* Test for equality of ref and alt */ + _wildcard = _mm_andnot_si128(_ref_flags, _snp_flags); + _wildcard = _mm_andnot_si128(_mm_or_si128(_mm_xor_si128(_ref_high, _snp_high), _mm_xor_si128(_ref_low, _snp_low)), _wildcard); + + _diff = _mm_andnot_si128(_wildcard, _diff); + + return _diff; +} + +static __m128i +block_diff_standard_wildcard_128_wrap_hi (Genomecomp_T *query_shifted, Genomecomp_T *snp_ptr, Genomecomp_T *ref_ptr, + bool plusp, int genestrand, bool query_unk_mismatch_local_p, int endcolumni) { __m128i _diff, _wildcard, _query_high, _query_low, _query_flags, _ref_high, _ref_low, _ref_flags, _snp_high, _snp_low, _snp_flags; - _query_high = _mm_load_si128((__m128i *) query_shifted); - _query_low = _mm_load_si128((__m128i *) &(query_shifted[4])); - _ref_high = _mm_load_si128((__m128i *) ref_ptr); - _ref_low = _mm_load_si128((__m128i *) &(ref_ptr[4])); + read_128_wrap_hi(&_query_high,&_query_low,&_query_flags,query_shifted,endcolumni); + read_128_wrap_hi(&_ref_high,&_ref_low,&_ref_flags,ref_ptr,endcolumni); + read_128_wrap_hi(&_snp_high,&_snp_low,&_snp_flags,snp_ptr,endcolumni); _diff = _mm_or_si128(_mm_xor_si128(_query_high, _ref_high), _mm_xor_si128(_query_low, _ref_low)); - _query_flags = _mm_load_si128((__m128i *) &(query_shifted[8])); if (query_unk_mismatch_local_p) { _diff = _mm_or_si128(_query_flags, _diff); } else { _diff = _mm_andnot_si128(_query_flags, _diff); } - _ref_flags = _mm_load_si128((__m128i *) &(ref_ptr[8])); if (genome_unk_mismatch_p) { _diff = _mm_or_si128(_ref_flags, _diff); } else { @@ -17186,23 +17677,106 @@ /* Add (query ^ snp). Don't need to recompute query flags or use SNP flags. */ - _snp_high = _mm_load_si128((__m128i *) snp_ptr); - _snp_low = _mm_load_si128((__m128i *) &(snp_ptr[4])); - _diff = _mm_and_si128(_diff, _mm_or_si128(_mm_xor_si128(_query_high, _snp_high), _mm_xor_si128(_query_low, _snp_low))); /* Test for equality of ref and alt */ - _snp_flags = _mm_load_si128((__m128i *) &(snp_ptr[8])); _wildcard = _mm_andnot_si128(_ref_flags, _snp_flags); _wildcard = _mm_andnot_si128(_mm_or_si128(_mm_xor_si128(_ref_high, _snp_high), _mm_xor_si128(_ref_low, _snp_low)), _wildcard); _diff = _mm_andnot_si128(_wildcard, _diff); return _diff; +} #endif + +/* wildcard if ref == alt && ref_flag == 0 && alt_flag == 1 */ +/* not wildcard if ref != alt || ref_flag == 1 || alt_flag == 0 */ +/* diffs are (query ^ ref) & (query ^ alt) & ~wildcard */ +/* snp_ptr here is alt_ptr */ +#ifdef HAVE_AVX2 +static __m256i +block_diff_standard_wildcard_256 (Genomecomp_T *query_shifted, Genomecomp_T *snp_ptr, Genomecomp_T *ref_ptr, + bool plusp, int genestrand, bool query_unk_mismatch_local_p) { + __m256i _diff, _wildcard, _query_high, _query_low, _query_flags, + _ref_high, _ref_low, _ref_flags, _snp_high, _snp_low, _snp_flags; + + read_256(&_query_high,&_query_low,&_query_flags,query_shifted); + read_256(&_ref_high,&_ref_low,&_ref_flags,ref_ptr); + + _diff = _mm256_or_si256(_mm256_xor_si256(_query_high, _ref_high), _mm256_xor_si256(_query_low, _ref_low)); + + if (query_unk_mismatch_local_p) { + _diff = _mm256_or_si256(_query_flags, _diff); + } else { + _diff = _mm256_andnot_si256(_query_flags, _diff); + } + + if (genome_unk_mismatch_p) { + _diff = _mm256_or_si256(_ref_flags, _diff); + } else { + _diff = _mm256_andnot_si256(_ref_flags, _diff); + } + /* End of (query ^ ref) */ + + /* Add (query ^ snp). Don't need to recompute query flags or use SNP flags. */ + read_256(&_snp_high,&_snp_low,&_snp_flags,snp_ptr); + + _diff = _mm256_and_si256(_diff, _mm256_or_si256(_mm256_xor_si256(_query_high, _snp_high), _mm256_xor_si256(_query_low, _snp_low))); + + /* Test for equality of ref and alt */ + _wildcard = _mm256_andnot_si256(_ref_flags, _snp_flags); + _wildcard = _mm256_andnot_si256(_mm256_or_si256(_mm256_xor_si256(_ref_high, _snp_high), _mm256_xor_si256(_ref_low, _snp_low)), _wildcard); + + _diff = _mm256_andnot_si256(_wildcard, _diff); + + return _diff; } +#endif + +/* wildcard if ref == alt && ref_flag == 0 && alt_flag == 1 */ +/* not wildcard if ref != alt || ref_flag == 1 || alt_flag == 0 */ +/* diffs are (query ^ ref) & (query ^ alt) & ~wildcard */ +/* snp_ptr here is alt_ptr */ +#ifdef HAVE_AVX512 +static __m512i +block_diff_standard_wildcard_512 (Genomecomp_T *query_shifted, Genomecomp_T *snp_ptr, Genomecomp_T *ref_ptr, + bool plusp, int genestrand, bool query_unk_mismatch_local_p) { + __m512i _diff, _wildcard, _query_high, _query_low, _query_flags, + _ref_high, _ref_low, _ref_flags, _snp_high, _snp_low, _snp_flags; + + read_512(&_query_high,&_query_low,&_query_flags,query_shifted); + read_512(&_ref_high,&_ref_low,&_ref_flags,ref_ptr); + + _diff = _mm512_or_si512(_mm512_xor_si512(_query_high, _ref_high), _mm512_xor_si512(_query_low, _ref_low)); + + if (query_unk_mismatch_local_p) { + _diff = _mm512_or_si512(_query_flags, _diff); + } else { + _diff = _mm512_andnot_si512(_query_flags, _diff); + } + + if (genome_unk_mismatch_p) { + _diff = _mm512_or_si512(_ref_flags, _diff); + } else { + _diff = _mm512_andnot_si512(_ref_flags, _diff); + } + /* End of (query ^ ref) */ + + /* Add (query ^ snp). Don't need to recompute query flags or use SNP flags. */ + read_512(&_snp_high,&_snp_low,&_snp_flags,snp_ptr); + + _diff = _mm512_and_si512(_diff, _mm512_or_si512(_mm512_xor_si512(_query_high, _snp_high), _mm512_xor_si512(_query_low, _snp_low))); + + /* Test for equality of ref and alt */ + _wildcard = _mm512_andnot_si512(_ref_flags, _snp_flags); + _wildcard = _mm512_andnot_si512(_mm512_or_si512(_mm512_xor_si512(_ref_high, _snp_high), _mm512_xor_si512(_ref_low, _snp_low)), _wildcard); + + _diff = _mm512_andnot_si512(_wildcard, _diff); + return _diff; +} +#endif /************************************************************************ @@ -17282,74 +17856,168 @@ } +#ifdef HAVE_SSE2 /* Convert C to T: high/low (A) 0 0 => new high 0; (C) 0 1 => 1; (G) 1 0 => 1; (T) 1 0 => 1 */ /* new high = high | low */ -static Genomediff_T -block_diff_metct (Genomecomp_T *query_shifted, Genomecomp_T *ref_ptr, - bool query_unk_mismatch_local_p, bool sarrayp) { +static __m128i +block_diff_metct_128 (Genomecomp_T *query_shifted, Genomecomp_T *ref_ptr, + bool query_unk_mismatch_local_p, bool sarrayp) { + __m128i _diff, _query_high, _query_low, _query_flags, _ref_high, _ref_low, _ref_flags; -#if defined(WORDS_BIGENDIAN) || !defined(HAVE_SSE2) - UINT4 diff; + _query_high = _mm_load_si128((__m128i *) query_shifted); + _query_low = _mm_load_si128((__m128i *) &(query_shifted[4])); + _ref_high = _mm_load_si128((__m128i *) ref_ptr); + _ref_low = _mm_load_si128((__m128i *) &(ref_ptr[4])); if (sarrayp == true) { - /* Convert everything to 3-nucleotide space */ - diff = 0U; + /* Ignore genome-T to query-C mismatches. Convert everything to 3-nucleotide space */ + _diff = _mm_setzero_si128(); } else { /* Mark genome-T to query-C mismatches */ -#ifdef WORDS_BIGENDIAN - diff = (~(query_shifted[0]) & query_shifted[1]) & - (Bigendian_convert_uint(ref_ptr[0]) & Bigendian_convert_uint(ref_ptr[4])); -#else - diff = (~(query_shifted[0]) & query_shifted[1]) & (ref_ptr[0] & ref_ptr[4]); -#endif - debug(printf(" => diff %08X\n",diff)); + _diff = _mm_and_si128(_mm_andnot_si128(_query_high, _query_low), _mm_and_si128(_ref_high, _ref_low)); } /* Compare reduced C->T nts */ -#ifdef WORDS_BIGENDIAN - diff |= ((query_shifted[0] | query_shifted[1]) ^ (Bigendian_convert_uint(ref_ptr[0]) | Bigendian_convert_uint(ref_ptr[4]))) | - (query_shifted[1] ^ Bigendian_convert_uint(ref_ptr[4])); -#else - diff |= ((query_shifted[0] | query_shifted[1]) ^ (ref_ptr[0] | ref_ptr[4])) | (query_shifted[1] ^ ref_ptr[4]); -#endif - debug(printf(" => diff %08X\n",diff)); + _diff = _mm_or_si128(_diff, _mm_xor_si128(_mm_or_si128(_query_high, _query_low), _mm_or_si128(_ref_high, _ref_low))); + _diff = _mm_or_si128(_diff, _mm_xor_si128(_query_low, _ref_low)); + + _query_flags = _mm_load_si128((__m128i *) &(query_shifted[8])); + if (query_unk_mismatch_local_p) { + _diff = _mm_or_si128(_query_flags, _diff); + } else { + _diff = _mm_andnot_si128(_query_flags, _diff); + } + _ref_flags = _mm_load_si128((__m128i *) &(ref_ptr[8])); + if (genome_unk_mismatch_p) { + _diff = _mm_or_si128(_ref_flags, _diff); + } else { + _diff = _mm_andnot_si128(_ref_flags, _diff); + } + + return _diff; +} + +#ifdef USE_SHIFT_HILO +static __m128i +block_diff_metct_128_shift_lo (Genomecomp_T *query_shifted, Genomecomp_T *ref_ptr, + bool query_unk_mismatch_local_p, bool sarrayp, int startcolumni) { + __m128i _diff, _query_high, _query_low, _query_flags, _ref_high, _ref_low, _ref_flags; + + read_128_shift_lo(&_query_high,&_query_low,&_query_flags,query_shifted,startcolumni); + read_128_shift_lo(&_ref_high,&_ref_low,&_ref_flags,ref_ptr,startcolumni); + + if (sarrayp == true) { + /* Ignore genome-T to query-C mismatches. Convert everything to 3-nucleotide space */ + _diff = _mm_setzero_si128(); + } else { + /* Mark genome-T to query-C mismatches */ + _diff = _mm_and_si128(_mm_andnot_si128(_query_high, _query_low), _mm_and_si128(_ref_high, _ref_low)); + } + + /* Compare reduced C->T nts */ + _diff = _mm_or_si128(_diff, _mm_xor_si128(_mm_or_si128(_query_high, _query_low), _mm_or_si128(_ref_high, _ref_low))); + _diff = _mm_or_si128(_diff, _mm_xor_si128(_query_low, _ref_low)); - /* Flags: Considering N as a mismatch */ if (query_unk_mismatch_local_p) { - debug(printf("Marking query flags: query %08X ",query_shifted[2])); - diff |= query_shifted[2]; + _diff = _mm_or_si128(_query_flags, _diff); } else { - debug(printf("Clearing query flags: query %08X ",query_shifted[2])); - diff &= ~(query_shifted[2]); + _diff = _mm_andnot_si128(_query_flags, _diff); } if (genome_unk_mismatch_p) { - debug(printf("Marking genome flags: genome %08X ",ref_ptr[8])); -#ifdef WORDS_BIGENDIAN - diff |= Bigendian_convert_uint(ref_ptr[8]); -#else - diff |= (ref_ptr[8]); -#endif + _diff = _mm_or_si128(_ref_flags, _diff); } else { - debug(printf("Clearing genome flags: genome %08X ",ref_ptr[8])); -#ifdef WORDS_BIGENDIAN - diff &= ~(Bigendian_convert_uint(ref_ptr[8])); -#else - diff &= ~(ref_ptr[8]); + _diff = _mm_andnot_si128(_ref_flags, _diff); + } + + return _diff; +} + +static __m128i +block_diff_metct_128_shift_hi (Genomecomp_T *query_shifted, Genomecomp_T *ref_ptr, + bool query_unk_mismatch_local_p, bool sarrayp, int endcolumni) { + __m128i _diff, _query_high, _query_low, _query_flags, _ref_high, _ref_low, _ref_flags; + + read_128_shift_hi(&_query_high,&_query_low,&_query_flags,query_shifted,endcolumni); + read_128_shift_hi(&_ref_high,&_ref_low,&_ref_flags,ref_ptr,endcolumni); + + if (sarrayp == true) { + /* Ignore genome-T to query-C mismatches. Convert everything to 3-nucleotide space */ + _diff = _mm_setzero_si128(); + } else { + /* Mark genome-T to query-C mismatches */ + _diff = _mm_and_si128(_mm_andnot_si128(_query_high, _query_low), _mm_and_si128(_ref_high, _ref_low)); + } + + /* Compare reduced C->T nts */ + _diff = _mm_or_si128(_diff, _mm_xor_si128(_mm_or_si128(_query_high, _query_low), _mm_or_si128(_ref_high, _ref_low))); + _diff = _mm_or_si128(_diff, _mm_xor_si128(_query_low, _ref_low)); + + if (query_unk_mismatch_local_p) { + _diff = _mm_or_si128(_query_flags, _diff); + } else { + _diff = _mm_andnot_si128(_query_flags, _diff); + } + + if (genome_unk_mismatch_p) { + _diff = _mm_or_si128(_ref_flags, _diff); + } else { + _diff = _mm_andnot_si128(_ref_flags, _diff); + } + + return _diff; +} +#endif #endif + +#ifdef HAVE_SSSE3 +/* Convert C to T: high/low (A) 0 0 => new high 0; (C) 0 1 => 1; (G) 1 0 => 1; (T) 1 0 => 1 */ +/* new high = high | low */ +static __m128i +block_diff_metct_128_wrap_lo (Genomecomp_T *query_shifted, Genomecomp_T *ref_ptr, + bool query_unk_mismatch_local_p, bool sarrayp, + int startcolumni) { + __m128i _diff, _query_high, _query_low, _query_flags, _ref_high, _ref_low, _ref_flags; + + read_128_wrap_lo(&_query_high,&_query_low,&_query_flags,query_shifted,startcolumni); + read_128_wrap_lo(&_ref_high,&_ref_low,&_ref_flags,ref_ptr,startcolumni); + + if (sarrayp == true) { + /* Ignore genome-T to query-C mismatches. Convert everything to 3-nucleotide space */ + _diff = _mm_setzero_si128(); + } else { + /* Mark genome-T to query-C mismatches */ + _diff = _mm_and_si128(_mm_andnot_si128(_query_high, _query_low), _mm_and_si128(_ref_high, _ref_low)); } - debug(printf(" => diff %08X\n",diff)); - return diff; + /* Compare reduced C->T nts */ + _diff = _mm_or_si128(_diff, _mm_xor_si128(_mm_or_si128(_query_high, _query_low), _mm_or_si128(_ref_high, _ref_low))); + _diff = _mm_or_si128(_diff, _mm_xor_si128(_query_low, _ref_low)); -#else + if (query_unk_mismatch_local_p) { + _diff = _mm_or_si128(_query_flags, _diff); + } else { + _diff = _mm_andnot_si128(_query_flags, _diff); + } + + if (genome_unk_mismatch_p) { + _diff = _mm_or_si128(_ref_flags, _diff); + } else { + _diff = _mm_andnot_si128(_ref_flags, _diff); + } + + return _diff; +} + +static __m128i +block_diff_metct_128_wrap_hi (Genomecomp_T *query_shifted, Genomecomp_T *ref_ptr, + bool query_unk_mismatch_local_p, bool sarrayp, + int endcolumni) { __m128i _diff, _query_high, _query_low, _query_flags, _ref_high, _ref_low, _ref_flags; - _query_high = _mm_load_si128((__m128i *) query_shifted); - _ref_high = _mm_load_si128((__m128i *) ref_ptr); - _query_low = _mm_load_si128((__m128i *) &(query_shifted[4])); - _ref_low = _mm_load_si128((__m128i *) &(ref_ptr[4])); + read_128_wrap_hi(&_query_high,&_query_low,&_query_flags,query_shifted,endcolumni); + read_128_wrap_hi(&_ref_high,&_ref_low,&_ref_flags,ref_ptr,endcolumni); if (sarrayp == true) { /* Ignore genome-T to query-C mismatches. Convert everything to 3-nucleotide space */ @@ -17363,14 +18031,12 @@ _diff = _mm_or_si128(_diff, _mm_xor_si128(_mm_or_si128(_query_high, _query_low), _mm_or_si128(_ref_high, _ref_low))); _diff = _mm_or_si128(_diff, _mm_xor_si128(_query_low, _ref_low)); - _query_flags = _mm_load_si128((__m128i *) &(query_shifted[8])); if (query_unk_mismatch_local_p) { _diff = _mm_or_si128(_query_flags, _diff); } else { _diff = _mm_andnot_si128(_query_flags, _diff); } - _ref_flags = _mm_load_si128((__m128i *) &(ref_ptr[8])); if (genome_unk_mismatch_p) { _diff = _mm_or_si128(_ref_flags, _diff); } else { @@ -17378,8 +18044,87 @@ } return _diff; +} +#endif + + +#ifdef HAVE_AVX2 +/* Convert C to T: high/low (A) 0 0 => new high 0; (C) 0 1 => 1; (G) 1 0 => 1; (T) 1 0 => 1 */ +/* new high = high | low */ +static __m256i +block_diff_metct_256 (Genomecomp_T *query_shifted, Genomecomp_T *ref_ptr, + bool query_unk_mismatch_local_p, bool sarrayp) { + __m256i _diff, _query_high, _query_low, _query_flags, _ref_high, _ref_low, _ref_flags; + + read_256(&_query_high,&_query_low,&_query_flags,query_shifted); + read_256(&_ref_high,&_ref_low,&_ref_flags,ref_ptr); + + if (sarrayp == true) { + /* Ignore genome-T to query-C mismatches. Convert everything to 3-nucleotide space */ + _diff = _mm256_setzero_si256(); + } else { + /* Mark genome-T to query-C mismatches */ + _diff = _mm256_and_si256(_mm256_andnot_si256(_query_high, _query_low), _mm256_and_si256(_ref_high, _ref_low)); + } + + /* Compare reduced C->T nts */ + _diff = _mm256_or_si256(_diff, _mm256_xor_si256(_mm256_or_si256(_query_high, _query_low), _mm256_or_si256(_ref_high, _ref_low))); + _diff = _mm256_or_si256(_diff, _mm256_xor_si256(_query_low, _ref_low)); + + if (query_unk_mismatch_local_p) { + _diff = _mm256_or_si256(_query_flags, _diff); + } else { + _diff = _mm256_andnot_si256(_query_flags, _diff); + } + + if (genome_unk_mismatch_p) { + _diff = _mm256_or_si256(_ref_flags, _diff); + } else { + _diff = _mm256_andnot_si256(_ref_flags, _diff); + } + + return _diff; +} #endif + +#ifdef HAVE_AVX512 +/* Convert C to T: high/low (A) 0 0 => new high 0; (C) 0 1 => 1; (G) 1 0 => 1; (T) 1 0 => 1 */ +/* new high = high | low */ +static __m512i +block_diff_metct_512 (Genomecomp_T *query_shifted, Genomecomp_T *ref_ptr, + bool query_unk_mismatch_local_p, bool sarrayp) { + __m512i _diff, _query_high, _query_low, _query_flags, _ref_high, _ref_low, _ref_flags; + + read_512(&_query_high,&_query_low,&_query_flags,query_shifted); + read_512(&_ref_high,&_ref_low,&_ref_flags,ref_ptr); + + if (sarrayp == true) { + /* Ignore genome-T to query-C mismatches. Convert everything to 3-nucleotide space */ + _diff = _mm512_setzero_si512(); + } else { + /* Mark genome-T to query-C mismatches */ + _diff = _mm512_and_si512(_mm512_andnot_si512(_query_high, _query_low), _mm512_and_si512(_ref_high, _ref_low)); + } + + /* Compare reduced C->T nts */ + _diff = _mm512_or_si512(_diff, _mm512_xor_si512(_mm512_or_si512(_query_high, _query_low), _mm512_or_si512(_ref_high, _ref_low))); + _diff = _mm512_or_si512(_diff, _mm512_xor_si512(_query_low, _ref_low)); + + if (query_unk_mismatch_local_p) { + _diff = _mm512_or_si512(_query_flags, _diff); + } else { + _diff = _mm512_andnot_si512(_query_flags, _diff); + } + + if (genome_unk_mismatch_p) { + _diff = _mm512_or_si512(_ref_flags, _diff); + } else { + _diff = _mm512_andnot_si512(_ref_flags, _diff); + } + + return _diff; } +#endif static UINT4 @@ -17455,74 +18200,58 @@ } +#ifdef HAVE_SSE2 /* Convert G to A: high/low (A) 0 0 => new high 0; (C) 0 1 => 0; (G) 1 0 => 0; (T) 1 0 => 1 */ /* new high = high & low */ -static Genomediff_T -block_diff_metga (Genomecomp_T *query_shifted, Genomecomp_T *ref_ptr, - bool query_unk_mismatch_local_p, bool sarrayp) { +static __m128i +block_diff_metga_128 (Genomecomp_T *query_shifted, Genomecomp_T *ref_ptr, + bool query_unk_mismatch_local_p, bool sarrayp) { + __m128i _diff, _query_high, _query_low, _query_flags, _ref_high, _ref_low, _ref_flags; -#if defined(WORDS_BIGENDIAN) || !defined(HAVE_SSE2) - UINT4 diff; + _query_high = _mm_load_si128((__m128i *) query_shifted); + _query_low = _mm_load_si128((__m128i *) &(query_shifted[4])); + _ref_high = _mm_load_si128((__m128i *) ref_ptr); + _ref_low = _mm_load_si128((__m128i *) &(ref_ptr[4])); if (sarrayp == true) { /* Ignore genome-A to query-G mismatches. Convert everything to 3-nucleotide space. */ - diff = 0U; + _diff = _mm_setzero_si128(); } else { /* Mark genome-A to query-G mismatches */ -#ifdef WORDS_BIGENDIAN - diff = (query_shifted[0] & ~(query_shifted[1])) & - ~(Bigendian_convert_uint(ref_ptr[0]) | Bigendian_convert_uint(ref_ptr[4])); -#else - diff = (query_shifted[0] & ~(query_shifted[1])) & ~(ref_ptr[0] | ref_ptr[4]); -#endif - debug(printf(" => diff %08X\n",diff)); + _diff = _mm_andnot_si128(_query_low, _query_high); + _diff = _mm_andnot_si128(_ref_high, _diff); + _diff = _mm_andnot_si128(_ref_low, _diff); } /* Compare reduced G->A nts */ -#ifdef WORDS_BIGENDIAN - diff |= ((query_shifted[0] & query_shifted[1]) ^ (Bigendian_convert_uint(ref_ptr[0]) & Bigendian_convert_uint(ref_ptr[4]))) | - (query_shifted[1] ^ Bigendian_convert_uint(ref_ptr[4])); -#else - diff |= ((query_shifted[0] & query_shifted[1]) ^ (ref_ptr[0] & ref_ptr[4])) | (query_shifted[1] ^ ref_ptr[4]); -#endif - debug(printf(" => diff %08X\n",diff)); - + _diff = _mm_or_si128(_diff, _mm_xor_si128(_mm_and_si128(_query_high, _query_low), _mm_and_si128(_ref_high, _ref_low))); + _diff = _mm_or_si128(_diff, _mm_xor_si128(_query_low, _ref_low)); - /* Flags: Considering N as a mismatch */ + _query_flags = _mm_load_si128((__m128i *) &(query_shifted[8])); if (query_unk_mismatch_local_p) { - debug(printf("Marking query flags: query %08X ",query_shifted[2])); - diff |= query_shifted[2]; + _diff = _mm_or_si128(_query_flags, _diff); } else { - debug(printf("Clearing query flags: query %08X ",query_shifted[2])); - diff &= ~(query_shifted[2]); + _diff = _mm_andnot_si128(_query_flags, _diff); } + _ref_flags = _mm_load_si128((__m128i *) &(ref_ptr[8])); if (genome_unk_mismatch_p) { - debug(printf("Marking genome flags: genome %08X ",ref_ptr[8])); -#ifdef WORDS_BIGENDIAN - diff |= Bigendian_convert_uint(ref_ptr[8]); -#else - diff |= (ref_ptr[8]); -#endif + _diff = _mm_or_si128(_ref_flags, _diff); } else { - debug(printf("Clearing genome flags: genome %08X ",ref_ptr[8])); -#ifdef WORDS_BIGENDIAN - diff &= ~(Bigendian_convert_uint(ref_ptr[8])); -#else - diff &= ~(ref_ptr[8]); -#endif + _diff = _mm_andnot_si128(_ref_flags, _diff); } - debug(printf(" => diff %08X\n",diff)); - return diff; + return _diff; +} -#else +#ifdef USE_SHIFT_HILO +static __m128i +block_diff_metga_128_shift_lo (Genomecomp_T *query_shifted, Genomecomp_T *ref_ptr, + bool query_unk_mismatch_local_p, bool sarrayp, int startcolumni) { __m128i _diff, _query_high, _query_low, _query_flags, _ref_high, _ref_low, _ref_flags; - _query_high = _mm_load_si128((__m128i *) query_shifted); - _ref_high = _mm_load_si128((__m128i *) ref_ptr); - _query_low = _mm_load_si128((__m128i *) &(query_shifted[4])); - _ref_low = _mm_load_si128((__m128i *) &(ref_ptr[4])); + read_128_shift_lo(&_query_high,&_query_low,&_query_flags,query_shifted,startcolumni); + read_128_shift_lo(&_ref_high,&_ref_low,&_ref_flags,ref_ptr,startcolumni); if (sarrayp == true) { /* Ignore genome-A to query-G mismatches. Convert everything to 3-nucleotide space. */ @@ -17538,14 +18267,12 @@ _diff = _mm_or_si128(_diff, _mm_xor_si128(_mm_and_si128(_query_high, _query_low), _mm_and_si128(_ref_high, _ref_low))); _diff = _mm_or_si128(_diff, _mm_xor_si128(_query_low, _ref_low)); - _query_flags = _mm_load_si128((__m128i *) &(query_shifted[8])); if (query_unk_mismatch_local_p) { _diff = _mm_or_si128(_query_flags, _diff); } else { _diff = _mm_andnot_si128(_query_flags, _diff); } - _ref_flags = _mm_load_si128((__m128i *) &(ref_ptr[8])); if (genome_unk_mismatch_p) { _diff = _mm_or_si128(_ref_flags, _diff); } else { @@ -17553,19 +18280,220 @@ } return _diff; -#endif } -static UINT4 -block_diff_cmet_32 (Genomecomp_T *query_shifted, Genomecomp_T *ref_ptr, - bool plusp, int genestrand, bool query_unk_mismatch_local_p) { - if (genestrand == +2) { - if (plusp) { - return block_diff_metga_32(query_shifted,ref_ptr,query_unk_mismatch_local_p,/*sarrayp*/false); - } else { - return block_diff_metct_32(query_shifted,ref_ptr,query_unk_mismatch_local_p,/*sarrayp*/false); - } - } else { +static __m128i +block_diff_metga_128_shift_hi (Genomecomp_T *query_shifted, Genomecomp_T *ref_ptr, + bool query_unk_mismatch_local_p, bool sarrayp, int endcolumni) { + __m128i _diff, _query_high, _query_low, _query_flags, _ref_high, _ref_low, _ref_flags; + + read_128_shift_hi(&_query_high,&_query_low,&_query_flags,query_shifted,endcolumni); + read_128_shift_hi(&_ref_high,&_ref_low,&_ref_flags,ref_ptr,endcolumni); + + if (sarrayp == true) { + /* Ignore genome-A to query-G mismatches. Convert everything to 3-nucleotide space. */ + _diff = _mm_setzero_si128(); + } else { + /* Mark genome-A to query-G mismatches */ + _diff = _mm_andnot_si128(_query_low, _query_high); + _diff = _mm_andnot_si128(_ref_high, _diff); + _diff = _mm_andnot_si128(_ref_low, _diff); + } + + /* Compare reduced G->A nts */ + _diff = _mm_or_si128(_diff, _mm_xor_si128(_mm_and_si128(_query_high, _query_low), _mm_and_si128(_ref_high, _ref_low))); + _diff = _mm_or_si128(_diff, _mm_xor_si128(_query_low, _ref_low)); + + if (query_unk_mismatch_local_p) { + _diff = _mm_or_si128(_query_flags, _diff); + } else { + _diff = _mm_andnot_si128(_query_flags, _diff); + } + + if (genome_unk_mismatch_p) { + _diff = _mm_or_si128(_ref_flags, _diff); + } else { + _diff = _mm_andnot_si128(_ref_flags, _diff); + } + + return _diff; +} +#endif +#endif + +#ifdef HAVE_SSSE3 +/* Convert G to A: high/low (A) 0 0 => new high 0; (C) 0 1 => 0; (G) 1 0 => 0; (T) 1 0 => 1 */ +/* new high = high & low */ +static __m128i +block_diff_metga_128_wrap_lo (Genomecomp_T *query_shifted, Genomecomp_T *ref_ptr, + bool query_unk_mismatch_local_p, bool sarrayp, + int startcolumni) { + __m128i _diff, _query_high, _query_low, _query_flags, _ref_high, _ref_low, _ref_flags; + + read_128_wrap_lo(&_query_high,&_query_low,&_query_flags,query_shifted,startcolumni); + read_128_wrap_lo(&_ref_high,&_ref_low,&_ref_flags,ref_ptr,startcolumni); + + if (sarrayp == true) { + /* Ignore genome-A to query-G mismatches. Convert everything to 3-nucleotide space. */ + _diff = _mm_setzero_si128(); + } else { + /* Mark genome-A to query-G mismatches */ + _diff = _mm_andnot_si128(_query_low, _query_high); + _diff = _mm_andnot_si128(_ref_high, _diff); + _diff = _mm_andnot_si128(_ref_low, _diff); + } + + /* Compare reduced G->A nts */ + _diff = _mm_or_si128(_diff, _mm_xor_si128(_mm_and_si128(_query_high, _query_low), _mm_and_si128(_ref_high, _ref_low))); + _diff = _mm_or_si128(_diff, _mm_xor_si128(_query_low, _ref_low)); + + if (query_unk_mismatch_local_p) { + _diff = _mm_or_si128(_query_flags, _diff); + } else { + _diff = _mm_andnot_si128(_query_flags, _diff); + } + + if (genome_unk_mismatch_p) { + _diff = _mm_or_si128(_ref_flags, _diff); + } else { + _diff = _mm_andnot_si128(_ref_flags, _diff); + } + + return _diff; +} + +static __m128i +block_diff_metga_128_wrap_hi (Genomecomp_T *query_shifted, Genomecomp_T *ref_ptr, + bool query_unk_mismatch_local_p, bool sarrayp, + int endcolumni) { + __m128i _diff, _query_high, _query_low, _query_flags, _ref_high, _ref_low, _ref_flags; + + read_128_wrap_hi(&_query_high,&_query_low,&_query_flags,query_shifted,endcolumni); + read_128_wrap_hi(&_ref_high,&_ref_low,&_ref_flags,ref_ptr,endcolumni); + + if (sarrayp == true) { + /* Ignore genome-A to query-G mismatches. Convert everything to 3-nucleotide space. */ + _diff = _mm_setzero_si128(); + } else { + /* Mark genome-A to query-G mismatches */ + _diff = _mm_andnot_si128(_query_low, _query_high); + _diff = _mm_andnot_si128(_ref_high, _diff); + _diff = _mm_andnot_si128(_ref_low, _diff); + } + + /* Compare reduced G->A nts */ + _diff = _mm_or_si128(_diff, _mm_xor_si128(_mm_and_si128(_query_high, _query_low), _mm_and_si128(_ref_high, _ref_low))); + _diff = _mm_or_si128(_diff, _mm_xor_si128(_query_low, _ref_low)); + + if (query_unk_mismatch_local_p) { + _diff = _mm_or_si128(_query_flags, _diff); + } else { + _diff = _mm_andnot_si128(_query_flags, _diff); + } + + if (genome_unk_mismatch_p) { + _diff = _mm_or_si128(_ref_flags, _diff); + } else { + _diff = _mm_andnot_si128(_ref_flags, _diff); + } + + return _diff; +} +#endif + +#ifdef HAVE_AVX2 +/* Convert G to A: high/low (A) 0 0 => new high 0; (C) 0 1 => 0; (G) 1 0 => 0; (T) 1 0 => 1 */ +/* new high = high & low */ +static __m256i +block_diff_metga_256 (Genomecomp_T *query_shifted, Genomecomp_T *ref_ptr, + bool query_unk_mismatch_local_p, bool sarrayp) { + __m256i _diff, _query_high, _query_low, _query_flags, _ref_high, _ref_low, _ref_flags; + + read_256(&_query_high,&_query_low,&_query_flags,query_shifted); + read_256(&_ref_high,&_ref_low,&_ref_flags,ref_ptr); + + if (sarrayp == true) { + /* Ignore genome-A to query-G mismatches. Convert everything to 3-nucleotide space. */ + _diff = _mm256_setzero_si256(); + } else { + /* Mark genome-A to query-G mismatches */ + _diff = _mm256_andnot_si256(_query_low, _query_high); + _diff = _mm256_andnot_si256(_ref_high, _diff); + _diff = _mm256_andnot_si256(_ref_low, _diff); + } + + /* Compare reduced G->A nts */ + _diff = _mm256_or_si256(_diff, _mm256_xor_si256(_mm256_and_si256(_query_high, _query_low), _mm256_and_si256(_ref_high, _ref_low))); + _diff = _mm256_or_si256(_diff, _mm256_xor_si256(_query_low, _ref_low)); + + if (query_unk_mismatch_local_p) { + _diff = _mm256_or_si256(_query_flags, _diff); + } else { + _diff = _mm256_andnot_si256(_query_flags, _diff); + } + + if (genome_unk_mismatch_p) { + _diff = _mm256_or_si256(_ref_flags, _diff); + } else { + _diff = _mm256_andnot_si256(_ref_flags, _diff); + } + + return _diff; +} +#endif + +#ifdef HAVE_AVX512 +/* Convert G to A: high/low (A) 0 0 => new high 0; (C) 0 1 => 0; (G) 1 0 => 0; (T) 1 0 => 1 */ +/* new high = high & low */ +static __m512i +block_diff_metga_512 (Genomecomp_T *query_shifted, Genomecomp_T *ref_ptr, + bool query_unk_mismatch_local_p, bool sarrayp) { + __m512i _diff, _query_high, _query_low, _query_flags, _ref_high, _ref_low, _ref_flags; + + read_512(&_query_high,&_query_low,&_query_flags,query_shifted); + read_512(&_ref_high,&_ref_low,&_ref_flags,ref_ptr); + + if (sarrayp == true) { + /* Ignore genome-A to query-G mismatches. Convert everything to 3-nucleotide space. */ + _diff = _mm512_setzero_si512(); + } else { + /* Mark genome-A to query-G mismatches */ + _diff = _mm512_andnot_si512(_query_low, _query_high); + _diff = _mm512_andnot_si512(_ref_high, _diff); + _diff = _mm512_andnot_si512(_ref_low, _diff); + } + + /* Compare reduced G->A nts */ + _diff = _mm512_or_si512(_diff, _mm512_xor_si512(_mm512_and_si512(_query_high, _query_low), _mm512_and_si512(_ref_high, _ref_low))); + _diff = _mm512_or_si512(_diff, _mm512_xor_si512(_query_low, _ref_low)); + + if (query_unk_mismatch_local_p) { + _diff = _mm512_or_si512(_query_flags, _diff); + } else { + _diff = _mm512_andnot_si512(_query_flags, _diff); + } + + if (genome_unk_mismatch_p) { + _diff = _mm512_or_si512(_ref_flags, _diff); + } else { + _diff = _mm512_andnot_si512(_ref_flags, _diff); + } + + return _diff; +} +#endif + + +static UINT4 +block_diff_cmet_32 (Genomecomp_T *query_shifted, Genomecomp_T *ref_ptr, + bool plusp, int genestrand, bool query_unk_mismatch_local_p) { + if (genestrand == +2) { + if (plusp) { + return block_diff_metga_32(query_shifted,ref_ptr,query_unk_mismatch_local_p,/*sarrayp*/false); + } else { + return block_diff_metct_32(query_shifted,ref_ptr,query_unk_mismatch_local_p,/*sarrayp*/false); + } + } else { if (plusp) { return block_diff_metct_32(query_shifted,ref_ptr,query_unk_mismatch_local_p,/*sarrayp*/false); } else { @@ -17574,189 +18502,527 @@ } } -static Genomediff_T -block_diff_cmet (Genomecomp_T *query_shifted, Genomecomp_T *ref_ptr, - bool plusp, int genestrand, bool query_unk_mismatch_local_p) { +#ifdef HAVE_SSE2 +static __m128i +block_diff_cmet_128 (Genomecomp_T *query_shifted, Genomecomp_T *ref_ptr, + bool plusp, int genestrand, bool query_unk_mismatch_local_p) { if (genestrand == +2) { if (plusp) { - return block_diff_metga(query_shifted,ref_ptr,query_unk_mismatch_local_p,/*sarrayp*/false); + return block_diff_metga_128(query_shifted,ref_ptr,query_unk_mismatch_local_p,/*sarrayp*/false); } else { - return block_diff_metct(query_shifted,ref_ptr,query_unk_mismatch_local_p,/*sarrayp*/false); + return block_diff_metct_128(query_shifted,ref_ptr,query_unk_mismatch_local_p,/*sarrayp*/false); } } else { if (plusp) { - return block_diff_metct(query_shifted,ref_ptr,query_unk_mismatch_local_p,/*sarrayp*/false); + return block_diff_metct_128(query_shifted,ref_ptr,query_unk_mismatch_local_p,/*sarrayp*/false); } else { - return block_diff_metga(query_shifted,ref_ptr,query_unk_mismatch_local_p,/*sarrayp*/false); + return block_diff_metga_128(query_shifted,ref_ptr,query_unk_mismatch_local_p,/*sarrayp*/false); } } } -static UINT4 -block_diff_cmet_sarray_32 (Genomecomp_T *query_shifted, Genomecomp_T *ref_ptr, - bool plusp, int genestrand, bool query_unk_mismatch_local_p) { +#ifdef USE_SHIFT_HILO +static __m128i +block_diff_cmet_128_shift_lo (Genomecomp_T *query_shifted, Genomecomp_T *ref_ptr, + bool plusp, int genestrand, bool query_unk_mismatch_local_p, + int startcolumni) { if (genestrand == +2) { if (plusp) { - return block_diff_metga_32(query_shifted,ref_ptr,query_unk_mismatch_local_p,/*sarrayp*/true); + return block_diff_metga_128_shift_lo(query_shifted,ref_ptr,query_unk_mismatch_local_p,/*sarrayp*/false, + startcolumni); } else { - return block_diff_metct_32(query_shifted,ref_ptr,query_unk_mismatch_local_p,/*sarrayp*/true); + return block_diff_metct_128_shift_lo(query_shifted,ref_ptr,query_unk_mismatch_local_p,/*sarrayp*/false, + startcolumni); } } else { if (plusp) { - return block_diff_metct_32(query_shifted,ref_ptr,query_unk_mismatch_local_p,/*sarrayp*/true); + return block_diff_metct_128_shift_lo(query_shifted,ref_ptr,query_unk_mismatch_local_p,/*sarrayp*/false, + startcolumni); } else { - return block_diff_metga_32(query_shifted,ref_ptr,query_unk_mismatch_local_p,/*sarrayp*/true); + return block_diff_metga_128_shift_lo(query_shifted,ref_ptr,query_unk_mismatch_local_p,/*sarrayp*/false, + startcolumni); } } } -static Genomediff_T -block_diff_cmet_sarray (Genomecomp_T *query_shifted, Genomecomp_T *ref_ptr, - bool plusp, int genestrand, bool query_unk_mismatch_local_p) { +static __m128i +block_diff_cmet_128_shift_hi (Genomecomp_T *query_shifted, Genomecomp_T *ref_ptr, + bool plusp, int genestrand, bool query_unk_mismatch_local_p, + int endcolumni) { if (genestrand == +2) { if (plusp) { - return block_diff_metga(query_shifted,ref_ptr,query_unk_mismatch_local_p,/*sarrayp*/true); + return block_diff_metga_128_shift_hi(query_shifted,ref_ptr,query_unk_mismatch_local_p,/*sarrayp*/false, + endcolumni); } else { - return block_diff_metct(query_shifted,ref_ptr,query_unk_mismatch_local_p,/*sarrayp*/true); + return block_diff_metct_128_shift_hi(query_shifted,ref_ptr,query_unk_mismatch_local_p,/*sarrayp*/false, + endcolumni); } } else { if (plusp) { - return block_diff_metct(query_shifted,ref_ptr,query_unk_mismatch_local_p,/*sarrayp*/true); + return block_diff_metct_128_shift_hi(query_shifted,ref_ptr,query_unk_mismatch_local_p,/*sarrayp*/false, + endcolumni); } else { - return block_diff_metga(query_shifted,ref_ptr,query_unk_mismatch_local_p,/*sarrayp*/true); + return block_diff_metga_128_shift_hi(query_shifted,ref_ptr,query_unk_mismatch_local_p,/*sarrayp*/false, + endcolumni); } } } +#endif +#endif -#ifdef GSNAP -/* Ignores snp_ptr */ -static UINT4 -block_diff_cmet_snp_32 (Genomecomp_T *query_shifted, Genomecomp_T *snp_ptr, Genomecomp_T *ref_ptr, - bool plusp, int genestrand, bool query_unk_mismatch_local_p) { +#ifdef HAVE_SSSE3 +static __m128i +block_diff_cmet_128_wrap_lo (Genomecomp_T *query_shifted, Genomecomp_T *ref_ptr, + bool plusp, int genestrand, bool query_unk_mismatch_local_p, + int startcolumni) { if (genestrand == +2) { if (plusp) { - return block_diff_metga_32(query_shifted,ref_ptr,query_unk_mismatch_local_p,/*sarrayp*/false); + return block_diff_metga_128_wrap_lo(query_shifted,ref_ptr,query_unk_mismatch_local_p,/*sarrayp*/false, + startcolumni); } else { - return block_diff_metct_32(query_shifted,ref_ptr,query_unk_mismatch_local_p,/*sarrayp*/false); + return block_diff_metct_128_wrap_lo(query_shifted,ref_ptr,query_unk_mismatch_local_p,/*sarrayp*/false, + startcolumni); } } else { if (plusp) { - return block_diff_metct_32(query_shifted,ref_ptr,query_unk_mismatch_local_p,/*sarrayp*/false); + return block_diff_metct_128_wrap_lo(query_shifted,ref_ptr,query_unk_mismatch_local_p,/*sarrayp*/false, + startcolumni); } else { - return block_diff_metga_32(query_shifted,ref_ptr,query_unk_mismatch_local_p,/*sarrayp*/false); + return block_diff_metga_128_wrap_lo(query_shifted,ref_ptr,query_unk_mismatch_local_p,/*sarrayp*/false, + startcolumni); } } } -#endif +static __m128i +block_diff_cmet_128_wrap_hi (Genomecomp_T *query_shifted, Genomecomp_T *ref_ptr, + bool plusp, int genestrand, bool query_unk_mismatch_local_p, + int endcolumni) { + if (genestrand == +2) { + if (plusp) { + return block_diff_metga_128_wrap_hi(query_shifted,ref_ptr,query_unk_mismatch_local_p,/*sarrayp*/false, + endcolumni); + } else { + return block_diff_metct_128_wrap_hi(query_shifted,ref_ptr,query_unk_mismatch_local_p,/*sarrayp*/false, + endcolumni); + } + } else { + if (plusp) { + return block_diff_metct_128_wrap_hi(query_shifted,ref_ptr,query_unk_mismatch_local_p,/*sarrayp*/false, + endcolumni); + } else { + return block_diff_metga_128_wrap_hi(query_shifted,ref_ptr,query_unk_mismatch_local_p,/*sarrayp*/false, + endcolumni); + } + } +} +#endif -#ifdef GSNAP -/* Ignores snp_ptr */ -static Genomediff_T -block_diff_cmet_snp (Genomecomp_T *query_shifted, Genomecomp_T *snp_ptr, Genomecomp_T *ref_ptr, +#ifdef HAVE_AVX2 +static __m256i +block_diff_cmet_256 (Genomecomp_T *query_shifted, Genomecomp_T *ref_ptr, bool plusp, int genestrand, bool query_unk_mismatch_local_p) { if (genestrand == +2) { if (plusp) { - return block_diff_metga(query_shifted,ref_ptr,query_unk_mismatch_local_p,/*sarrayp*/false); + return block_diff_metga_256(query_shifted,ref_ptr,query_unk_mismatch_local_p,/*sarrayp*/false); } else { - return block_diff_metct(query_shifted,ref_ptr,query_unk_mismatch_local_p,/*sarrayp*/false); + return block_diff_metct_256(query_shifted,ref_ptr,query_unk_mismatch_local_p,/*sarrayp*/false); } } else { if (plusp) { - return block_diff_metct(query_shifted,ref_ptr,query_unk_mismatch_local_p,/*sarrayp*/false); + return block_diff_metct_256(query_shifted,ref_ptr,query_unk_mismatch_local_p,/*sarrayp*/false); } else { - return block_diff_metga(query_shifted,ref_ptr,query_unk_mismatch_local_p,/*sarrayp*/false); + return block_diff_metga_256(query_shifted,ref_ptr,query_unk_mismatch_local_p,/*sarrayp*/false); } } } #endif - -/************************************************************************ - * ATOI - ************************************************************************/ +#ifdef HAVE_AVX512 +static __m512i +block_diff_cmet_512 (Genomecomp_T *query_shifted, Genomecomp_T *ref_ptr, + bool plusp, int genestrand, bool query_unk_mismatch_local_p) { + if (genestrand == +2) { + if (plusp) { + return block_diff_metga_512(query_shifted,ref_ptr,query_unk_mismatch_local_p,/*sarrayp*/false); + } else { + return block_diff_metct_512(query_shifted,ref_ptr,query_unk_mismatch_local_p,/*sarrayp*/false); + } + } else { + if (plusp) { + return block_diff_metct_512(query_shifted,ref_ptr,query_unk_mismatch_local_p,/*sarrayp*/false); + } else { + return block_diff_metga_512(query_shifted,ref_ptr,query_unk_mismatch_local_p,/*sarrayp*/false); + } + } +} +#endif static UINT4 -block_diff_a2iag_32 (Genomecomp_T *query_shifted, Genomecomp_T *ref_ptr, - bool query_unk_mismatch_local_p, bool sarrayp) { - UINT4 diff; - - if (sarrayp == true) { - /* Ignore genome-G to query-A mismatches. Convert everything to 3-nucleotide space. */ - diff = 0U; +block_diff_cmet_sarray_32 (Genomecomp_T *query_shifted, Genomecomp_T *ref_ptr, + bool plusp, int genestrand, bool query_unk_mismatch_local_p) { + if (genestrand == +2) { + if (plusp) { + return block_diff_metga_32(query_shifted,ref_ptr,query_unk_mismatch_local_p,/*sarrayp*/true); + } else { + return block_diff_metct_32(query_shifted,ref_ptr,query_unk_mismatch_local_p,/*sarrayp*/true); + } } else { - /* Mark genome-G to query-A mismatches */ -#ifdef WORDS_BIGENDIAN - diff = ~(query_shifted[0] | query_shifted[1]) & - (Bigendian_convert_uint(ref_ptr[0]) & ~Bigendian_convert_uint(ref_ptr[4])); -#elif !defined(HAVE_SSE2) - diff = ~(query_shifted[0] | query_shifted[1]) & (ref_ptr[0] & ~(ref_ptr[4])); -#else - diff = ~(query_shifted[0] | query_shifted[4]) & (ref_ptr[0] & ~(ref_ptr[4])); -#endif - debug(printf(" => diff %08X\n",diff)); + if (plusp) { + return block_diff_metct_32(query_shifted,ref_ptr,query_unk_mismatch_local_p,/*sarrayp*/true); + } else { + return block_diff_metga_32(query_shifted,ref_ptr,query_unk_mismatch_local_p,/*sarrayp*/true); + } } +} - /* Compare reduced A->G nts */ -#ifdef WORDS_BIGENDIAN - diff |= ((query_shifted[0] | ~(query_shifted[1])) ^ (Bigendian_convert_uint(ref_ptr[0]) | ~(Bigendian_convert_uint(ref_ptr[4])))) | - (query_shifted[1] ^ Bigendian_convert_uint(ref_ptr[4])); -#elif !defined(HAVE_SSE2) - diff |= ((query_shifted[0] | ~(query_shifted[1])) ^ (ref_ptr[0] | ~(ref_ptr[4]))) | (query_shifted[1] ^ ref_ptr[4]); - /* Because (a ^ b) = (~a ^ ~b), this is equivalent to - diff |= ((~query_shifted[0] & query_shifted[1]) ^ (~ref_ptr[0] & ref_ptr[4])) | (query_shifted[1] ^ ref_ptr[4]); - */ -#else - diff |= ((query_shifted[0] | ~(query_shifted[4])) ^ (ref_ptr[0] | ~(ref_ptr[4]))) | (query_shifted[4] ^ ref_ptr[4]); +#ifdef HAVE_SSE2 +static __m128i +block_diff_cmet_sarray_128 (Genomecomp_T *query_shifted, Genomecomp_T *ref_ptr, + bool plusp, int genestrand, bool query_unk_mismatch_local_p) { + if (genestrand == +2) { + if (plusp) { + return block_diff_metga_128(query_shifted,ref_ptr,query_unk_mismatch_local_p,/*sarrayp*/true); + } else { + return block_diff_metct_128(query_shifted,ref_ptr,query_unk_mismatch_local_p,/*sarrayp*/true); + } + } else { + if (plusp) { + return block_diff_metct_128(query_shifted,ref_ptr,query_unk_mismatch_local_p,/*sarrayp*/true); + } else { + return block_diff_metga_128(query_shifted,ref_ptr,query_unk_mismatch_local_p,/*sarrayp*/true); + } + } +} + +#ifdef USE_SHIFT_HILO +static __m128i +block_diff_cmet_sarray_128_shift_lo (Genomecomp_T *query_shifted, Genomecomp_T *ref_ptr, + bool plusp, int genestrand, bool query_unk_mismatch_local_p, + int startcolumni) { + if (genestrand == +2) { + if (plusp) { + return block_diff_metga_128_shift_lo(query_shifted,ref_ptr,query_unk_mismatch_local_p,/*sarrayp*/true, + startcolumni); + } else { + return block_diff_metct_128_shift_lo(query_shifted,ref_ptr,query_unk_mismatch_local_p,/*sarrayp*/true, + startcolumni); + } + } else { + if (plusp) { + return block_diff_metct_128_shift_lo(query_shifted,ref_ptr,query_unk_mismatch_local_p,/*sarrayp*/true, + startcolumni); + } else { + return block_diff_metga_128_shift_lo(query_shifted,ref_ptr,query_unk_mismatch_local_p,/*sarrayp*/true, + startcolumni); + } + } +} + +static __m128i +block_diff_cmet_sarray_128_shift_hi (Genomecomp_T *query_shifted, Genomecomp_T *ref_ptr, + bool plusp, int genestrand, bool query_unk_mismatch_local_p, + int endcolumni) { + if (genestrand == +2) { + if (plusp) { + return block_diff_metga_128_shift_hi(query_shifted,ref_ptr,query_unk_mismatch_local_p,/*sarrayp*/true, + endcolumni); + } else { + return block_diff_metct_128_shift_hi(query_shifted,ref_ptr,query_unk_mismatch_local_p,/*sarrayp*/true, + endcolumni); + } + } else { + if (plusp) { + return block_diff_metct_128_shift_hi(query_shifted,ref_ptr,query_unk_mismatch_local_p,/*sarrayp*/true, + endcolumni); + } else { + return block_diff_metga_128_shift_hi(query_shifted,ref_ptr,query_unk_mismatch_local_p,/*sarrayp*/true, + endcolumni); + } + } +} +#endif #endif - debug(printf(" => diff %08X\n",diff)); - /* Flags: Considering N as a mismatch */ -#if defined(WORDS_BIGENDIAN) || !defined(HAVE_SSE2) - if (query_unk_mismatch_local_p) { - debug(printf("Marking query flags: query %08X ",query_shifted[2])); - diff |= query_shifted[2]; +#ifdef HAVE_SSSE3 +static __m128i +block_diff_cmet_sarray_128_wrap_lo (Genomecomp_T *query_shifted, Genomecomp_T *ref_ptr, + bool plusp, int genestrand, bool query_unk_mismatch_local_p, + int startcolumni) { + if (genestrand == +2) { + if (plusp) { + return block_diff_metga_128_wrap_lo(query_shifted,ref_ptr,query_unk_mismatch_local_p,/*sarrayp*/true, + startcolumni); + } else { + return block_diff_metct_128_wrap_lo(query_shifted,ref_ptr,query_unk_mismatch_local_p,/*sarrayp*/true, + startcolumni); + } } else { - debug(printf("Clearing query flags: query %08X ",query_shifted[2])); - diff &= ~(query_shifted[2]); + if (plusp) { + return block_diff_metct_128_wrap_lo(query_shifted,ref_ptr,query_unk_mismatch_local_p,/*sarrayp*/true, + startcolumni); + } else { + return block_diff_metga_128_wrap_lo(query_shifted,ref_ptr,query_unk_mismatch_local_p,/*sarrayp*/true, + startcolumni); + } } -#else - if (query_unk_mismatch_local_p) { - debug(printf("Marking query flags: query %08X ",query_shifted[8])); - diff |= query_shifted[8]; +} + +static __m128i +block_diff_cmet_sarray_128_wrap_hi (Genomecomp_T *query_shifted, Genomecomp_T *ref_ptr, + bool plusp, int genestrand, bool query_unk_mismatch_local_p, + int endcolumni) { + if (genestrand == +2) { + if (plusp) { + return block_diff_metga_128_wrap_hi(query_shifted,ref_ptr,query_unk_mismatch_local_p,/*sarrayp*/true, + endcolumni); + } else { + return block_diff_metct_128_wrap_hi(query_shifted,ref_ptr,query_unk_mismatch_local_p,/*sarrayp*/true, + endcolumni); + } } else { - debug(printf("Clearing query flags: query %08X ",query_shifted[8])); - diff &= ~(query_shifted[8]); + if (plusp) { + return block_diff_metct_128_wrap_hi(query_shifted,ref_ptr,query_unk_mismatch_local_p,/*sarrayp*/true, + endcolumni); + } else { + return block_diff_metga_128_wrap_hi(query_shifted,ref_ptr,query_unk_mismatch_local_p,/*sarrayp*/true, + endcolumni); + } } +} #endif - if (genome_unk_mismatch_p) { - debug(printf("Marking genome flags: genome %08X ",ref_ptr[8])); -#ifdef WORDS_BIGENDIAN - diff |= Bigendian_convert_uint(ref_ptr[8]); -#else - diff |= (ref_ptr[8]); +#ifdef HAVE_AVX2 +static __m256i +block_diff_cmet_sarray_256 (Genomecomp_T *query_shifted, Genomecomp_T *ref_ptr, + bool plusp, int genestrand, bool query_unk_mismatch_local_p) { + if (genestrand == +2) { + if (plusp) { + return block_diff_metga_256(query_shifted,ref_ptr,query_unk_mismatch_local_p,/*sarrayp*/true); + } else { + return block_diff_metct_256(query_shifted,ref_ptr,query_unk_mismatch_local_p,/*sarrayp*/true); + } + } else { + if (plusp) { + return block_diff_metct_256(query_shifted,ref_ptr,query_unk_mismatch_local_p,/*sarrayp*/true); + } else { + return block_diff_metga_256(query_shifted,ref_ptr,query_unk_mismatch_local_p,/*sarrayp*/true); + } + } +} #endif + +#ifdef HAVE_AVX512 +static __m512i +block_diff_cmet_sarray_512 (Genomecomp_T *query_shifted, Genomecomp_T *ref_ptr, + bool plusp, int genestrand, bool query_unk_mismatch_local_p) { + if (genestrand == +2) { + if (plusp) { + return block_diff_metga_512(query_shifted,ref_ptr,query_unk_mismatch_local_p,/*sarrayp*/true); + } else { + return block_diff_metct_512(query_shifted,ref_ptr,query_unk_mismatch_local_p,/*sarrayp*/true); + } } else { - debug(printf("Clearing genome flags: genome %08X ",ref_ptr[8])); -#ifdef WORDS_BIGENDIAN - diff &= ~(Bigendian_convert_uint(ref_ptr[8])); -#else - diff &= ~(ref_ptr[8]); + if (plusp) { + return block_diff_metct_512(query_shifted,ref_ptr,query_unk_mismatch_local_p,/*sarrayp*/true); + } else { + return block_diff_metga_512(query_shifted,ref_ptr,query_unk_mismatch_local_p,/*sarrayp*/true); + } + } +} #endif + + +#ifdef GSNAP +/* Ignores snp_ptr */ +static UINT4 +block_diff_cmet_snp_32 (Genomecomp_T *query_shifted, Genomecomp_T *snp_ptr, Genomecomp_T *ref_ptr, + bool plusp, int genestrand, bool query_unk_mismatch_local_p) { + if (genestrand == +2) { + if (plusp) { + return block_diff_metga_32(query_shifted,ref_ptr,query_unk_mismatch_local_p,/*sarrayp*/false); + } else { + return block_diff_metct_32(query_shifted,ref_ptr,query_unk_mismatch_local_p,/*sarrayp*/false); + } + } else { + if (plusp) { + return block_diff_metct_32(query_shifted,ref_ptr,query_unk_mismatch_local_p,/*sarrayp*/false); + } else { + return block_diff_metga_32(query_shifted,ref_ptr,query_unk_mismatch_local_p,/*sarrayp*/false); + } } - debug(printf(" => diff %08X\n",diff)); +} +#endif - return diff; + +#if defined(GSNAP) && defined(HAVE_SSE2) +/* Ignores snp_ptr */ +static __m128i +block_diff_cmet_snp_128 (Genomecomp_T *query_shifted, Genomecomp_T *snp_ptr, Genomecomp_T *ref_ptr, + bool plusp, int genestrand, bool query_unk_mismatch_local_p) { + if (genestrand == +2) { + if (plusp) { + return block_diff_metga_128(query_shifted,ref_ptr,query_unk_mismatch_local_p,/*sarrayp*/false); + } else { + return block_diff_metct_128(query_shifted,ref_ptr,query_unk_mismatch_local_p,/*sarrayp*/false); + } + } else { + if (plusp) { + return block_diff_metct_128(query_shifted,ref_ptr,query_unk_mismatch_local_p,/*sarrayp*/false); + } else { + return block_diff_metga_128(query_shifted,ref_ptr,query_unk_mismatch_local_p,/*sarrayp*/false); + } + } } +#ifdef USE_SHIFT_HILO +static __m128i +block_diff_cmet_snp_128_shift_lo (Genomecomp_T *query_shifted, Genomecomp_T *snp_ptr, Genomecomp_T *ref_ptr, + bool plusp, int genestrand, bool query_unk_mismatch_local_p, + int startcolumni) { + if (genestrand == +2) { + if (plusp) { + return block_diff_metga_128_shift_lo(query_shifted,ref_ptr,query_unk_mismatch_local_p,/*sarrayp*/false, + startcolumni); + } else { + return block_diff_metct_128_shift_lo(query_shifted,ref_ptr,query_unk_mismatch_local_p,/*sarrayp*/false, + startcolumni); + } + } else { + if (plusp) { + return block_diff_metct_128_shift_lo(query_shifted,ref_ptr,query_unk_mismatch_local_p,/*sarrayp*/false, + startcolumni); + } else { + return block_diff_metga_128_shift_lo(query_shifted,ref_ptr,query_unk_mismatch_local_p,/*sarrayp*/false, + startcolumni); + } + } +} -/* Convert A->G: new high = high | ~low */ -static Genomediff_T -block_diff_a2iag (Genomecomp_T *query_shifted, Genomecomp_T *ref_ptr, - bool query_unk_mismatch_local_p, bool sarrayp) { +static __m128i +block_diff_cmet_snp_128_shift_hi (Genomecomp_T *query_shifted, Genomecomp_T *snp_ptr, Genomecomp_T *ref_ptr, + bool plusp, int genestrand, bool query_unk_mismatch_local_p, + int endcolumni) { + if (genestrand == +2) { + if (plusp) { + return block_diff_metga_128_shift_hi(query_shifted,ref_ptr,query_unk_mismatch_local_p,/*sarrayp*/false, + endcolumni); + } else { + return block_diff_metct_128_shift_hi(query_shifted,ref_ptr,query_unk_mismatch_local_p,/*sarrayp*/false, + endcolumni); + } + } else { + if (plusp) { + return block_diff_metct_128_shift_hi(query_shifted,ref_ptr,query_unk_mismatch_local_p,/*sarrayp*/false, + endcolumni); + } else { + return block_diff_metga_128_shift_hi(query_shifted,ref_ptr,query_unk_mismatch_local_p,/*sarrayp*/false, + endcolumni); + } + } +} +#endif +#endif -#if defined(WORDS_BIGENDIAN) || !defined(HAVE_SSE2) +#if defined(GSNAP) && defined(HAVE_SSSE3) +/* Ignores snp_ptr */ +static __m128i +block_diff_cmet_snp_128_wrap_lo (Genomecomp_T *query_shifted, Genomecomp_T *snp_ptr, Genomecomp_T *ref_ptr, + bool plusp, int genestrand, bool query_unk_mismatch_local_p, + int startcolumni) { + if (genestrand == +2) { + if (plusp) { + return block_diff_metga_128_wrap_lo(query_shifted,ref_ptr,query_unk_mismatch_local_p,/*sarrayp*/false, + startcolumni); + } else { + return block_diff_metct_128_wrap_lo(query_shifted,ref_ptr,query_unk_mismatch_local_p,/*sarrayp*/false, + startcolumni); + } + } else { + if (plusp) { + return block_diff_metct_128_wrap_lo(query_shifted,ref_ptr,query_unk_mismatch_local_p,/*sarrayp*/false, + startcolumni); + } else { + return block_diff_metga_128_wrap_lo(query_shifted,ref_ptr,query_unk_mismatch_local_p,/*sarrayp*/false, + startcolumni); + } + } +} + +static __m128i +block_diff_cmet_snp_128_wrap_hi (Genomecomp_T *query_shifted, Genomecomp_T *snp_ptr, Genomecomp_T *ref_ptr, + bool plusp, int genestrand, bool query_unk_mismatch_local_p, + int endcolumni) { + if (genestrand == +2) { + if (plusp) { + return block_diff_metga_128_wrap_hi(query_shifted,ref_ptr,query_unk_mismatch_local_p,/*sarrayp*/false, + endcolumni); + } else { + return block_diff_metct_128_wrap_hi(query_shifted,ref_ptr,query_unk_mismatch_local_p,/*sarrayp*/false, + endcolumni); + } + } else { + if (plusp) { + return block_diff_metct_128_wrap_hi(query_shifted,ref_ptr,query_unk_mismatch_local_p,/*sarrayp*/false, + endcolumni); + } else { + return block_diff_metga_128_wrap_hi(query_shifted,ref_ptr,query_unk_mismatch_local_p,/*sarrayp*/false, + endcolumni); + } + } +} +#endif + +#if defined(GSNAP) && defined(HAVE_AVX2) +/* Ignores snp_ptr */ +static __m256i +block_diff_cmet_snp_256 (Genomecomp_T *query_shifted, Genomecomp_T *snp_ptr, Genomecomp_T *ref_ptr, + bool plusp, int genestrand, bool query_unk_mismatch_local_p) { + if (genestrand == +2) { + if (plusp) { + return block_diff_metga_256(query_shifted,ref_ptr,query_unk_mismatch_local_p,/*sarrayp*/false); + } else { + return block_diff_metct_256(query_shifted,ref_ptr,query_unk_mismatch_local_p,/*sarrayp*/false); + } + } else { + if (plusp) { + return block_diff_metct_256(query_shifted,ref_ptr,query_unk_mismatch_local_p,/*sarrayp*/false); + } else { + return block_diff_metga_256(query_shifted,ref_ptr,query_unk_mismatch_local_p,/*sarrayp*/false); + } + } +} +#endif + +#if defined(GSNAP) && defined(HAVE_AVX512) +/* Ignores snp_ptr */ +static __m512i +block_diff_cmet_snp_512 (Genomecomp_T *query_shifted, Genomecomp_T *snp_ptr, Genomecomp_T *ref_ptr, + bool plusp, int genestrand, bool query_unk_mismatch_local_p) { + if (genestrand == +2) { + if (plusp) { + return block_diff_metga_512(query_shifted,ref_ptr,query_unk_mismatch_local_p,/*sarrayp*/false); + } else { + return block_diff_metct_512(query_shifted,ref_ptr,query_unk_mismatch_local_p,/*sarrayp*/false); + } + } else { + if (plusp) { + return block_diff_metct_512(query_shifted,ref_ptr,query_unk_mismatch_local_p,/*sarrayp*/false); + } else { + return block_diff_metga_512(query_shifted,ref_ptr,query_unk_mismatch_local_p,/*sarrayp*/false); + } + } +} +#endif + + +/************************************************************************ + * ATOI + ************************************************************************/ + +static UINT4 +block_diff_a2iag_32 (Genomecomp_T *query_shifted, Genomecomp_T *ref_ptr, + bool query_unk_mismatch_local_p, bool sarrayp) { UINT4 diff; if (sarrayp == true) { @@ -17767,8 +19033,10 @@ #ifdef WORDS_BIGENDIAN diff = ~(query_shifted[0] | query_shifted[1]) & (Bigendian_convert_uint(ref_ptr[0]) & ~Bigendian_convert_uint(ref_ptr[4])); -#else +#elif !defined(HAVE_SSE2) diff = ~(query_shifted[0] | query_shifted[1]) & (ref_ptr[0] & ~(ref_ptr[4])); +#else + diff = ~(query_shifted[0] | query_shifted[4]) & (ref_ptr[0] & ~(ref_ptr[4])); #endif debug(printf(" => diff %08X\n",diff)); } @@ -17777,15 +19045,18 @@ #ifdef WORDS_BIGENDIAN diff |= ((query_shifted[0] | ~(query_shifted[1])) ^ (Bigendian_convert_uint(ref_ptr[0]) | ~(Bigendian_convert_uint(ref_ptr[4])))) | (query_shifted[1] ^ Bigendian_convert_uint(ref_ptr[4])); -#else +#elif !defined(HAVE_SSE2) diff |= ((query_shifted[0] | ~(query_shifted[1])) ^ (ref_ptr[0] | ~(ref_ptr[4]))) | (query_shifted[1] ^ ref_ptr[4]); /* Because (a ^ b) = (~a ^ ~b), this is equivalent to diff |= ((~query_shifted[0] & query_shifted[1]) ^ (~ref_ptr[0] & ref_ptr[4])) | (query_shifted[1] ^ ref_ptr[4]); */ +#else + diff |= ((query_shifted[0] | ~(query_shifted[4])) ^ (ref_ptr[0] | ~(ref_ptr[4]))) | (query_shifted[4] ^ ref_ptr[4]); #endif debug(printf(" => diff %08X\n",diff)); /* Flags: Considering N as a mismatch */ +#if defined(WORDS_BIGENDIAN) || !defined(HAVE_SSE2) if (query_unk_mismatch_local_p) { debug(printf("Marking query flags: query %08X ",query_shifted[2])); diff |= query_shifted[2]; @@ -17793,6 +19064,15 @@ debug(printf("Clearing query flags: query %08X ",query_shifted[2])); diff &= ~(query_shifted[2]); } +#else + if (query_unk_mismatch_local_p) { + debug(printf("Marking query flags: query %08X ",query_shifted[8])); + diff |= query_shifted[8]; + } else { + debug(printf("Clearing query flags: query %08X ",query_shifted[8])); + diff &= ~(query_shifted[8]); + } +#endif if (genome_unk_mismatch_p) { debug(printf("Marking genome flags: genome %08X ",ref_ptr[8])); @@ -17812,13 +19092,19 @@ debug(printf(" => diff %08X\n",diff)); return diff; +} -#else + +#ifdef HAVE_SSE2 +/* Convert A->G: new high = high | ~low */ +static __m128i +block_diff_a2iag_128 (Genomecomp_T *query_shifted, Genomecomp_T *ref_ptr, + bool query_unk_mismatch_local_p, bool sarrayp) { __m128i _diff, _query_high, _query_low, _query_flags, _ref_high, _ref_low, _ref_flags; _query_high = _mm_load_si128((__m128i *) query_shifted); - _ref_high = _mm_load_si128((__m128i *) ref_ptr); _query_low = _mm_load_si128((__m128i *) &(query_shifted[4])); + _ref_high = _mm_load_si128((__m128i *) ref_ptr); _ref_low = _mm_load_si128((__m128i *) &(ref_ptr[4])); if (sarrayp == true) { @@ -17848,14 +19134,237 @@ } return _diff; -#endif } +#ifdef USE_SHIFT_HILO +static __m128i +block_diff_a2iag_128_shift_lo (Genomecomp_T *query_shifted, Genomecomp_T *ref_ptr, + bool query_unk_mismatch_local_p, bool sarrayp, int startcolumni) { + __m128i _diff, _query_high, _query_low, _query_flags, _ref_high, _ref_low, _ref_flags; -static UINT4 -block_diff_a2itc_32 (Genomecomp_T *query_shifted, Genomecomp_T *ref_ptr, - bool query_unk_mismatch_local_p, bool sarrayp) { - UINT4 diff; + read_128_shift_lo(&_query_high,&_query_low,&_query_flags,query_shifted,startcolumni); + read_128_shift_lo(&_ref_high,&_ref_low,&_ref_flags,ref_ptr,startcolumni); + + if (sarrayp == true) { + /* Ignore genome-G to query-A mismatches. Convert everything to 3-nucleotide space. */ + _diff = _mm_setzero_si128(); + } else { + /* Mark genome-G to query-A mismatches */ + _diff = _mm_andnot_si128(_mm_or_si128(_query_high, _query_low), _mm_andnot_si128(_ref_low, _ref_high)); + } + + /* Compare reduced A->G nts */ + _diff = _mm_or_si128(_diff, _mm_xor_si128(_mm_andnot_si128(_query_high, _query_low), _mm_andnot_si128(_ref_high, _ref_low))); + _diff = _mm_or_si128(_diff, _mm_xor_si128(_query_low, _ref_low)); + + if (query_unk_mismatch_local_p) { + _diff = _mm_or_si128(_query_flags, _diff); + } else { + _diff = _mm_andnot_si128(_query_flags, _diff); + } + + if (genome_unk_mismatch_p) { + _diff = _mm_or_si128(_ref_flags, _diff); + } else { + _diff = _mm_andnot_si128(_ref_flags, _diff); + } + + return _diff; +} + +static __m128i +block_diff_a2iag_128_shift_hi (Genomecomp_T *query_shifted, Genomecomp_T *ref_ptr, + bool query_unk_mismatch_local_p, bool sarrayp, int endcolumni) { + __m128i _diff, _query_high, _query_low, _query_flags, _ref_high, _ref_low, _ref_flags; + + read_128_shift_hi(&_query_high,&_query_low,&_query_flags,query_shifted,endcolumni); + read_128_shift_hi(&_ref_high,&_ref_low,&_ref_flags,ref_ptr,endcolumni); + + if (sarrayp == true) { + /* Ignore genome-G to query-A mismatches. Convert everything to 3-nucleotide space. */ + _diff = _mm_setzero_si128(); + } else { + /* Mark genome-G to query-A mismatches */ + _diff = _mm_andnot_si128(_mm_or_si128(_query_high, _query_low), _mm_andnot_si128(_ref_low, _ref_high)); + } + + /* Compare reduced A->G nts */ + _diff = _mm_or_si128(_diff, _mm_xor_si128(_mm_andnot_si128(_query_high, _query_low), _mm_andnot_si128(_ref_high, _ref_low))); + _diff = _mm_or_si128(_diff, _mm_xor_si128(_query_low, _ref_low)); + + if (query_unk_mismatch_local_p) { + _diff = _mm_or_si128(_query_flags, _diff); + } else { + _diff = _mm_andnot_si128(_query_flags, _diff); + } + + if (genome_unk_mismatch_p) { + _diff = _mm_or_si128(_ref_flags, _diff); + } else { + _diff = _mm_andnot_si128(_ref_flags, _diff); + } + + return _diff; +} +#endif +#endif + +#ifdef HAVE_SSSE3 +/* Convert A->G: new high = high | ~low */ +static __m128i +block_diff_a2iag_128_wrap_lo (Genomecomp_T *query_shifted, Genomecomp_T *ref_ptr, + bool query_unk_mismatch_local_p, bool sarrayp, + int startcolumni) { + __m128i _diff, _query_high, _query_low, _query_flags, _ref_high, _ref_low, _ref_flags; + + read_128_wrap_lo(&_query_high,&_query_low,&_query_flags,query_shifted,startcolumni); + read_128_wrap_lo(&_ref_high,&_ref_low,&_ref_flags,ref_ptr,startcolumni); + + if (sarrayp == true) { + /* Ignore genome-G to query-A mismatches. Convert everything to 3-nucleotide space. */ + _diff = _mm_setzero_si128(); + } else { + /* Mark genome-G to query-A mismatches */ + _diff = _mm_andnot_si128(_mm_or_si128(_query_high, _query_low), _mm_andnot_si128(_ref_low, _ref_high)); + } + + /* Compare reduced A->G nts */ + _diff = _mm_or_si128(_diff, _mm_xor_si128(_mm_andnot_si128(_query_high, _query_low), _mm_andnot_si128(_ref_high, _ref_low))); + _diff = _mm_or_si128(_diff, _mm_xor_si128(_query_low, _ref_low)); + + if (query_unk_mismatch_local_p) { + _diff = _mm_or_si128(_query_flags, _diff); + } else { + _diff = _mm_andnot_si128(_query_flags, _diff); + } + + if (genome_unk_mismatch_p) { + _diff = _mm_or_si128(_ref_flags, _diff); + } else { + _diff = _mm_andnot_si128(_ref_flags, _diff); + } + + return _diff; +} + +static __m128i +block_diff_a2iag_128_wrap_hi (Genomecomp_T *query_shifted, Genomecomp_T *ref_ptr, + bool query_unk_mismatch_local_p, bool sarrayp, + int endcolumni) { + __m128i _diff, _query_high, _query_low, _query_flags, _ref_high, _ref_low, _ref_flags; + + read_128_wrap_hi(&_query_high,&_query_low,&_query_flags,query_shifted,endcolumni); + read_128_wrap_hi(&_ref_high,&_ref_low,&_ref_flags,ref_ptr,endcolumni); + + if (sarrayp == true) { + /* Ignore genome-G to query-A mismatches. Convert everything to 3-nucleotide space. */ + _diff = _mm_setzero_si128(); + } else { + /* Mark genome-G to query-A mismatches */ + _diff = _mm_andnot_si128(_mm_or_si128(_query_high, _query_low), _mm_andnot_si128(_ref_low, _ref_high)); + } + + /* Compare reduced A->G nts */ + _diff = _mm_or_si128(_diff, _mm_xor_si128(_mm_andnot_si128(_query_high, _query_low), _mm_andnot_si128(_ref_high, _ref_low))); + _diff = _mm_or_si128(_diff, _mm_xor_si128(_query_low, _ref_low)); + + if (query_unk_mismatch_local_p) { + _diff = _mm_or_si128(_query_flags, _diff); + } else { + _diff = _mm_andnot_si128(_query_flags, _diff); + } + + if (genome_unk_mismatch_p) { + _diff = _mm_or_si128(_ref_flags, _diff); + } else { + _diff = _mm_andnot_si128(_ref_flags, _diff); + } + + return _diff; +} +#endif + +#ifdef HAVE_AVX2 +/* Convert A->G: new high = high | ~low */ +static __m256i +block_diff_a2iag_256 (Genomecomp_T *query_shifted, Genomecomp_T *ref_ptr, + bool query_unk_mismatch_local_p, bool sarrayp) { + __m256i _diff, _query_high, _query_low, _query_flags, _ref_high, _ref_low, _ref_flags; + + read_256(&_query_high,&_query_low,&_query_flags,query_shifted); + read_256(&_ref_high,&_ref_low,&_ref_flags,ref_ptr); + + if (sarrayp == true) { + /* Ignore genome-G to query-A mismatches. Convert everything to 3-nucleotide space. */ + _diff = _mm256_setzero_si256(); + } else { + /* Mark genome-G to query-A mismatches */ + _diff = _mm256_andnot_si256(_mm256_or_si256(_query_high, _query_low), _mm256_andnot_si256(_ref_low, _ref_high)); + } + + /* Compare reduced A->G nts */ + _diff = _mm256_or_si256(_diff, _mm256_xor_si256(_mm256_andnot_si256(_query_high, _query_low), _mm256_andnot_si256(_ref_high, _ref_low))); + _diff = _mm256_or_si256(_diff, _mm256_xor_si256(_query_low, _ref_low)); + + if (query_unk_mismatch_local_p) { + _diff = _mm256_or_si256(_query_flags, _diff); + } else { + _diff = _mm256_andnot_si256(_query_flags, _diff); + } + + if (genome_unk_mismatch_p) { + _diff = _mm256_or_si256(_ref_flags, _diff); + } else { + _diff = _mm256_andnot_si256(_ref_flags, _diff); + } + + return _diff; +} +#endif + +#ifdef HAVE_AVX512 +/* Convert A->G: new high = high | ~low */ +static __m512i +block_diff_a2iag_512 (Genomecomp_T *query_shifted, Genomecomp_T *ref_ptr, + bool query_unk_mismatch_local_p, bool sarrayp) { + __m512i _diff, _query_high, _query_low, _query_flags, _ref_high, _ref_low, _ref_flags; + + read_512(&_query_high,&_query_low,&_query_flags,query_shifted); + read_512(&_ref_high,&_ref_low,&_ref_flags,ref_ptr); + + if (sarrayp == true) { + /* Ignore genome-G to query-A mismatches. Convert everything to 3-nucleotide space. */ + _diff = _mm512_setzero_si512(); + } else { + /* Mark genome-G to query-A mismatches */ + _diff = _mm512_andnot_si512(_mm512_or_si512(_query_high, _query_low), _mm512_andnot_si512(_ref_low, _ref_high)); + } + + /* Compare reduced A->G nts */ + _diff = _mm512_or_si512(_diff, _mm512_xor_si512(_mm512_andnot_si512(_query_high, _query_low), _mm512_andnot_si512(_ref_high, _ref_low))); + _diff = _mm512_or_si512(_diff, _mm512_xor_si512(_query_low, _ref_low)); + + if (query_unk_mismatch_local_p) { + _diff = _mm512_or_si512(_query_flags, _diff); + } else { + _diff = _mm512_andnot_si512(_query_flags, _diff); + } + + if (genome_unk_mismatch_p) { + _diff = _mm512_or_si512(_ref_flags, _diff); + } else { + _diff = _mm512_andnot_si512(_ref_flags, _diff); + } + + return _diff; +} +#endif + + +static UINT4 +block_diff_a2itc_32 (Genomecomp_T *query_shifted, Genomecomp_T *ref_ptr, + bool query_unk_mismatch_local_p, bool sarrayp) { + UINT4 diff; if (sarrayp == true) { /* Ignore genome-C to query-T mismatches */ @@ -17924,72 +19433,55 @@ } +#ifdef HAVE_SSE2 /* Convert T->C: new high = high & ~low */ -static Genomediff_T -block_diff_a2itc (Genomecomp_T *query_shifted, Genomecomp_T *ref_ptr, - bool query_unk_mismatch_local_p, bool sarrayp) { +static __m128i +block_diff_a2itc_128 (Genomecomp_T *query_shifted, Genomecomp_T *ref_ptr, + bool query_unk_mismatch_local_p, bool sarrayp) { + __m128i _diff, _query_high, _query_low, _query_flags, _ref_high, _ref_low, _ref_flags; -#if defined(WORDS_BIGENDIAN) || !defined(HAVE_SSE2) - UINT4 diff; + _query_high = _mm_load_si128((__m128i *) query_shifted); + _query_low = _mm_load_si128((__m128i *) &(query_shifted[4])); + _ref_high = _mm_load_si128((__m128i *) ref_ptr); + _ref_low = _mm_load_si128((__m128i *) &(ref_ptr[4])); if (sarrayp == true) { /* Ignore genome-C to query-T mismatches */ - diff = 0U; + _diff = _mm_setzero_si128(); } else { /* Mark genome-C to query-T mismatches */ -#ifdef WORDS_BIGENDIAN - diff = (query_shifted[0] & query_shifted[1]) & - (~(Bigendian_convert_uint(ref_ptr[0])) & Bigendian_convert_uint(ref_ptr[4])); -#else - diff = (query_shifted[0] & query_shifted[1]) & (~(ref_ptr[0]) & ref_ptr[4]); -#endif - debug(printf(" => diff %08X\n",diff)); + _diff = _mm_and_si128(_mm_and_si128(_query_high, _query_low), _mm_andnot_si128(_ref_high, _ref_low)); } /* Compare reduced T->C nts */ -#ifdef WORDS_BIGENDIAN - diff |= ((query_shifted[0] & ~(query_shifted[1])) ^ (Bigendian_convert_uint(ref_ptr[0]) & ~(Bigendian_convert_uint(ref_ptr[4])))) | - (query_shifted[1] ^ Bigendian_convert_uint(ref_ptr[4])); -#else - diff |= ((query_shifted[0] & ~(query_shifted[1])) ^ (ref_ptr[0] & ~(ref_ptr[4]))) | (query_shifted[1] ^ ref_ptr[4]); -#endif - debug(printf(" => diff %08X\n",diff)); + _diff = _mm_or_si128(_diff, _mm_xor_si128(_mm_andnot_si128(_query_low, _query_high), _mm_andnot_si128(_ref_low, _ref_high))); + _diff = _mm_or_si128(_diff, _mm_xor_si128(_query_low, _ref_low)); - /* Flags: Considering N as a mismatch */ + _query_flags = _mm_load_si128((__m128i *) &(query_shifted[8])); if (query_unk_mismatch_local_p) { - debug(printf("Marking query flags: query %08X ",query_shifted[2])); - diff |= query_shifted[2]; + _diff = _mm_or_si128(_query_flags, _diff); } else { - debug(printf("Clearing query flags: query %08X ",query_shifted[2])); - diff &= ~(query_shifted[2]); + _diff = _mm_andnot_si128(_query_flags, _diff); } + _ref_flags = _mm_load_si128((__m128i *) &(ref_ptr[8])); if (genome_unk_mismatch_p) { - debug(printf("Marking genome flags: genome %08X ",ref_ptr[8])); -#ifdef WORDS_BIGENDIAN - diff |= Bigendian_convert_uint(ref_ptr[8]); -#else - diff |= (ref_ptr[8]); -#endif + _diff = _mm_or_si128(_ref_flags, _diff); } else { - debug(printf("Clearing genome flags: genome %08X ",ref_ptr[8])); -#ifdef WORDS_BIGENDIAN - diff &= ~(Bigendian_convert_uint(ref_ptr[8])); -#else - diff &= ~(ref_ptr[8]); -#endif + _diff = _mm_andnot_si128(_ref_flags, _diff); } - debug(printf(" => diff %08X\n",diff)); - return diff; + return _diff; +} -#else +#ifdef USE_SHIFT_HILO +static __m128i +block_diff_a2itc_128_shift_lo (Genomecomp_T *query_shifted, Genomecomp_T *ref_ptr, + bool query_unk_mismatch_local_p, bool sarrayp, int startcolumni) { __m128i _diff, _query_high, _query_low, _query_flags, _ref_high, _ref_low, _ref_flags; - _query_high = _mm_load_si128((__m128i *) query_shifted); - _ref_high = _mm_load_si128((__m128i *) ref_ptr); - _query_low = _mm_load_si128((__m128i *) &(query_shifted[4])); - _ref_low = _mm_load_si128((__m128i *) &(ref_ptr[4])); + read_128_shift_lo(&_query_high,&_query_low,&_query_flags,query_shifted,startcolumni); + read_128_shift_lo(&_ref_high,&_ref_low,&_ref_flags,ref_ptr,startcolumni); if (sarrayp == true) { /* Ignore genome-C to query-T mismatches */ @@ -18003,14 +19495,12 @@ _diff = _mm_or_si128(_diff, _mm_xor_si128(_mm_andnot_si128(_query_low, _query_high), _mm_andnot_si128(_ref_low, _ref_high))); _diff = _mm_or_si128(_diff, _mm_xor_si128(_query_low, _ref_low)); - _query_flags = _mm_load_si128((__m128i *) &(query_shifted[8])); if (query_unk_mismatch_local_p) { _diff = _mm_or_si128(_query_flags, _diff); } else { _diff = _mm_andnot_si128(_query_flags, _diff); } - _ref_flags = _mm_load_si128((__m128i *) &(ref_ptr[8])); if (genome_unk_mismatch_p) { _diff = _mm_or_si128(_ref_flags, _diff); } else { @@ -18018,22 +19508,209 @@ } return _diff; -#endif } +static __m128i +block_diff_a2itc_128_shift_hi (Genomecomp_T *query_shifted, Genomecomp_T *ref_ptr, + bool query_unk_mismatch_local_p, bool sarrayp, int endcolumni) { + __m128i _diff, _query_high, _query_low, _query_flags, _ref_high, _ref_low, _ref_flags; + + read_128_shift_hi(&_query_high,&_query_low,&_query_flags,query_shifted,endcolumni); + read_128_shift_hi(&_ref_high,&_ref_low,&_ref_flags,ref_ptr,endcolumni); -static UINT4 -block_diff_atoi_32 (Genomecomp_T *query_shifted, Genomecomp_T *ref_ptr, - bool plusp, int genestrand, bool query_unk_mismatch_local_p) { - if (genestrand == +2) { - if (plusp) { - return block_diff_a2itc_32(query_shifted,ref_ptr,query_unk_mismatch_local_p,/*sarrayp*/false); - } else { - return block_diff_a2iag_32(query_shifted,ref_ptr,query_unk_mismatch_local_p,/*sarrayp*/false); - } + if (sarrayp == true) { + /* Ignore genome-C to query-T mismatches */ + _diff = _mm_setzero_si128(); } else { - if (plusp) { - return block_diff_a2iag_32(query_shifted,ref_ptr,query_unk_mismatch_local_p,/*sarrayp*/false); + /* Mark genome-C to query-T mismatches */ + _diff = _mm_and_si128(_mm_and_si128(_query_high, _query_low), _mm_andnot_si128(_ref_high, _ref_low)); + } + + /* Compare reduced T->C nts */ + _diff = _mm_or_si128(_diff, _mm_xor_si128(_mm_andnot_si128(_query_low, _query_high), _mm_andnot_si128(_ref_low, _ref_high))); + _diff = _mm_or_si128(_diff, _mm_xor_si128(_query_low, _ref_low)); + + if (query_unk_mismatch_local_p) { + _diff = _mm_or_si128(_query_flags, _diff); + } else { + _diff = _mm_andnot_si128(_query_flags, _diff); + } + + if (genome_unk_mismatch_p) { + _diff = _mm_or_si128(_ref_flags, _diff); + } else { + _diff = _mm_andnot_si128(_ref_flags, _diff); + } + + return _diff; +} +#endif +#endif + +#ifdef HAVE_SSSE3 +/* Convert T->C: new high = high & ~low */ +static __m128i +block_diff_a2itc_128_wrap_lo (Genomecomp_T *query_shifted, Genomecomp_T *ref_ptr, + bool query_unk_mismatch_local_p, bool sarrayp, + int startcolumni) { + __m128i _diff, _query_high, _query_low, _query_flags, _ref_high, _ref_low, _ref_flags; + + read_128_wrap_lo(&_query_high,&_query_low,&_query_flags,query_shifted,startcolumni); + read_128_wrap_lo(&_ref_high,&_ref_low,&_ref_flags,ref_ptr,startcolumni); + + if (sarrayp == true) { + /* Ignore genome-C to query-T mismatches */ + _diff = _mm_setzero_si128(); + } else { + /* Mark genome-C to query-T mismatches */ + _diff = _mm_and_si128(_mm_and_si128(_query_high, _query_low), _mm_andnot_si128(_ref_high, _ref_low)); + } + + /* Compare reduced T->C nts */ + _diff = _mm_or_si128(_diff, _mm_xor_si128(_mm_andnot_si128(_query_low, _query_high), _mm_andnot_si128(_ref_low, _ref_high))); + _diff = _mm_or_si128(_diff, _mm_xor_si128(_query_low, _ref_low)); + + if (query_unk_mismatch_local_p) { + _diff = _mm_or_si128(_query_flags, _diff); + } else { + _diff = _mm_andnot_si128(_query_flags, _diff); + } + + if (genome_unk_mismatch_p) { + _diff = _mm_or_si128(_ref_flags, _diff); + } else { + _diff = _mm_andnot_si128(_ref_flags, _diff); + } + + return _diff; +} + +static __m128i +block_diff_a2itc_128_wrap_hi (Genomecomp_T *query_shifted, Genomecomp_T *ref_ptr, + bool query_unk_mismatch_local_p, bool sarrayp, + int endcolumni) { + __m128i _diff, _query_high, _query_low, _query_flags, _ref_high, _ref_low, _ref_flags; + + read_128_wrap_hi(&_query_high,&_query_low,&_query_flags,query_shifted,endcolumni); + read_128_wrap_hi(&_ref_high,&_ref_low,&_ref_flags,ref_ptr,endcolumni); + + if (sarrayp == true) { + /* Ignore genome-C to query-T mismatches */ + _diff = _mm_setzero_si128(); + } else { + /* Mark genome-C to query-T mismatches */ + _diff = _mm_and_si128(_mm_and_si128(_query_high, _query_low), _mm_andnot_si128(_ref_high, _ref_low)); + } + + /* Compare reduced T->C nts */ + _diff = _mm_or_si128(_diff, _mm_xor_si128(_mm_andnot_si128(_query_low, _query_high), _mm_andnot_si128(_ref_low, _ref_high))); + _diff = _mm_or_si128(_diff, _mm_xor_si128(_query_low, _ref_low)); + + if (query_unk_mismatch_local_p) { + _diff = _mm_or_si128(_query_flags, _diff); + } else { + _diff = _mm_andnot_si128(_query_flags, _diff); + } + + if (genome_unk_mismatch_p) { + _diff = _mm_or_si128(_ref_flags, _diff); + } else { + _diff = _mm_andnot_si128(_ref_flags, _diff); + } + + return _diff; +} +#endif + +#ifdef HAVE_AVX2 +/* Convert T->C: new high = high & ~low */ +static __m256i +block_diff_a2itc_256 (Genomecomp_T *query_shifted, Genomecomp_T *ref_ptr, + bool query_unk_mismatch_local_p, bool sarrayp) { + __m256i _diff, _query_high, _query_low, _query_flags, _ref_high, _ref_low, _ref_flags; + + read_256(&_query_high,&_query_low,&_query_flags,query_shifted); + read_256(&_ref_high,&_ref_low,&_ref_flags,ref_ptr); + + if (sarrayp == true) { + /* Ignore genome-C to query-T mismatches */ + _diff = _mm256_setzero_si256(); + } else { + /* Mark genome-C to query-T mismatches */ + _diff = _mm256_and_si256(_mm256_and_si256(_query_high, _query_low), _mm256_andnot_si256(_ref_high, _ref_low)); + } + + /* Compare reduced T->C nts */ + _diff = _mm256_or_si256(_diff, _mm256_xor_si256(_mm256_andnot_si256(_query_low, _query_high), _mm256_andnot_si256(_ref_low, _ref_high))); + _diff = _mm256_or_si256(_diff, _mm256_xor_si256(_query_low, _ref_low)); + + if (query_unk_mismatch_local_p) { + _diff = _mm256_or_si256(_query_flags, _diff); + } else { + _diff = _mm256_andnot_si256(_query_flags, _diff); + } + + if (genome_unk_mismatch_p) { + _diff = _mm256_or_si256(_ref_flags, _diff); + } else { + _diff = _mm256_andnot_si256(_ref_flags, _diff); + } + + return _diff; +} +#endif + +#ifdef HAVE_AVX512 +/* Convert T->C: new high = high & ~low */ +static __m512i +block_diff_a2itc_512 (Genomecomp_T *query_shifted, Genomecomp_T *ref_ptr, + bool query_unk_mismatch_local_p, bool sarrayp) { + __m512i _diff, _query_high, _query_low, _query_flags, _ref_high, _ref_low, _ref_flags; + + read_512(&_query_high,&_query_low,&_query_flags,query_shifted); + read_512(&_ref_high,&_ref_low,&_ref_flags,ref_ptr); + + if (sarrayp == true) { + /* Ignore genome-C to query-T mismatches */ + _diff = _mm512_setzero_si512(); + } else { + /* Mark genome-C to query-T mismatches */ + _diff = _mm512_and_si512(_mm512_and_si512(_query_high, _query_low), _mm512_andnot_si512(_ref_high, _ref_low)); + } + + /* Compare reduced T->C nts */ + _diff = _mm512_or_si512(_diff, _mm512_xor_si512(_mm512_andnot_si512(_query_low, _query_high), _mm512_andnot_si512(_ref_low, _ref_high))); + _diff = _mm512_or_si512(_diff, _mm512_xor_si512(_query_low, _ref_low)); + + if (query_unk_mismatch_local_p) { + _diff = _mm512_or_si512(_query_flags, _diff); + } else { + _diff = _mm512_andnot_si512(_query_flags, _diff); + } + + if (genome_unk_mismatch_p) { + _diff = _mm512_or_si512(_ref_flags, _diff); + } else { + _diff = _mm512_andnot_si512(_ref_flags, _diff); + } + + return _diff; +} +#endif + + +static UINT4 +block_diff_atoi_32 (Genomecomp_T *query_shifted, Genomecomp_T *ref_ptr, + bool plusp, int genestrand, bool query_unk_mismatch_local_p) { + if (genestrand == +2) { + if (plusp) { + return block_diff_a2itc_32(query_shifted,ref_ptr,query_unk_mismatch_local_p,/*sarrayp*/false); + } else { + return block_diff_a2iag_32(query_shifted,ref_ptr,query_unk_mismatch_local_p,/*sarrayp*/false); + } + } else { + if (plusp) { + return block_diff_a2iag_32(query_shifted,ref_ptr,query_unk_mismatch_local_p,/*sarrayp*/false); } else { return block_diff_a2itc_32(query_shifted,ref_ptr,query_unk_mismatch_local_p,/*sarrayp*/false); } @@ -18041,242 +19718,1128 @@ } -static Genomediff_T -block_diff_atoi (Genomecomp_T *query_shifted, Genomecomp_T *ref_ptr, - bool plusp, int genestrand, bool query_unk_mismatch_local_p) { +#ifdef HAVE_SSE2 +static __m128i +block_diff_atoi_128 (Genomecomp_T *query_shifted, Genomecomp_T *ref_ptr, + bool plusp, int genestrand, bool query_unk_mismatch_local_p) { if (genestrand == +2) { if (plusp) { - return block_diff_a2itc(query_shifted,ref_ptr,query_unk_mismatch_local_p,/*sarrayp*/false); + return block_diff_a2itc_128(query_shifted,ref_ptr,query_unk_mismatch_local_p,/*sarrayp*/false); } else { - return block_diff_a2iag(query_shifted,ref_ptr,query_unk_mismatch_local_p,/*sarrayp*/false); + return block_diff_a2iag_128(query_shifted,ref_ptr,query_unk_mismatch_local_p,/*sarrayp*/false); } } else { if (plusp) { - return block_diff_a2iag(query_shifted,ref_ptr,query_unk_mismatch_local_p,/*sarrayp*/false); + return block_diff_a2iag_128(query_shifted,ref_ptr,query_unk_mismatch_local_p,/*sarrayp*/false); } else { - return block_diff_a2itc(query_shifted,ref_ptr,query_unk_mismatch_local_p,/*sarrayp*/false); + return block_diff_a2itc_128(query_shifted,ref_ptr,query_unk_mismatch_local_p,/*sarrayp*/false); } } } -static UINT4 -block_diff_atoi_sarray_32 (Genomecomp_T *query_shifted, Genomecomp_T *ref_ptr, - bool plusp, int genestrand, bool query_unk_mismatch_local_p) { +#ifdef USE_SHIFT_HILO +static __m128i +block_diff_atoi_128_shift_lo (Genomecomp_T *query_shifted, Genomecomp_T *ref_ptr, + bool plusp, int genestrand, bool query_unk_mismatch_local_p, + int startcolumni) { if (genestrand == +2) { if (plusp) { - return block_diff_a2itc_32(query_shifted,ref_ptr,query_unk_mismatch_local_p,/*sarrayp*/true); + return block_diff_a2itc_128_shift_lo(query_shifted,ref_ptr,query_unk_mismatch_local_p,/*sarrayp*/false, + startcolumni); } else { - return block_diff_a2iag_32(query_shifted,ref_ptr,query_unk_mismatch_local_p,/*sarrayp*/true); + return block_diff_a2iag_128_shift_lo(query_shifted,ref_ptr,query_unk_mismatch_local_p,/*sarrayp*/false, + startcolumni); } } else { if (plusp) { - return block_diff_a2iag_32(query_shifted,ref_ptr,query_unk_mismatch_local_p,/*sarrayp*/true); + return block_diff_a2iag_128_shift_lo(query_shifted,ref_ptr,query_unk_mismatch_local_p,/*sarrayp*/false, + startcolumni); } else { - return block_diff_a2itc_32(query_shifted,ref_ptr,query_unk_mismatch_local_p,/*sarrayp*/true); + return block_diff_a2itc_128_shift_lo(query_shifted,ref_ptr,query_unk_mismatch_local_p,/*sarrayp*/false, + startcolumni); } } } -static Genomediff_T -block_diff_atoi_sarray (Genomecomp_T *query_shifted, Genomecomp_T *ref_ptr, - bool plusp, int genestrand, bool query_unk_mismatch_local_p) { +static __m128i +block_diff_atoi_128_shift_hi (Genomecomp_T *query_shifted, Genomecomp_T *ref_ptr, + bool plusp, int genestrand, bool query_unk_mismatch_local_p, + int endcolumni) { if (genestrand == +2) { if (plusp) { - return block_diff_a2itc(query_shifted,ref_ptr,query_unk_mismatch_local_p,/*sarrayp*/true); + return block_diff_a2itc_128_shift_lo(query_shifted,ref_ptr,query_unk_mismatch_local_p,/*sarrayp*/false, + endcolumni); } else { - return block_diff_a2iag(query_shifted,ref_ptr,query_unk_mismatch_local_p,/*sarrayp*/true); + return block_diff_a2iag_128_shift_lo(query_shifted,ref_ptr,query_unk_mismatch_local_p,/*sarrayp*/false, + endcolumni); } } else { if (plusp) { - return block_diff_a2iag(query_shifted,ref_ptr,query_unk_mismatch_local_p,/*sarrayp*/true); + return block_diff_a2iag_128_shift_lo(query_shifted,ref_ptr,query_unk_mismatch_local_p,/*sarrayp*/false, + endcolumni); } else { - return block_diff_a2itc(query_shifted,ref_ptr,query_unk_mismatch_local_p,/*sarrayp*/true); + return block_diff_a2itc_128_shift_lo(query_shifted,ref_ptr,query_unk_mismatch_local_p,/*sarrayp*/false, + endcolumni); } } } +#endif +#endif -#ifdef GSNAP -/* Ignores snp_ptr */ -static UINT4 -block_diff_atoi_snp_32 (Genomecomp_T *query_shifted, Genomecomp_T *snp_ptr, Genomecomp_T *ref_ptr, - bool plusp, int genestrand, bool query_unk_mismatch_local_p) { +#ifdef HAVE_SSSE3 +static __m128i +block_diff_atoi_128_wrap_lo (Genomecomp_T *query_shifted, Genomecomp_T *ref_ptr, + bool plusp, int genestrand, bool query_unk_mismatch_local_p, + int startcolumni) { if (genestrand == +2) { if (plusp) { - return block_diff_a2itc_32(query_shifted,ref_ptr,query_unk_mismatch_local_p,/*sarrayp*/false); + return block_diff_a2itc_128_wrap_lo(query_shifted,ref_ptr,query_unk_mismatch_local_p,/*sarrayp*/false, + startcolumni); } else { - return block_diff_a2iag_32(query_shifted,ref_ptr,query_unk_mismatch_local_p,/*sarrayp*/false); + return block_diff_a2iag_128_wrap_lo(query_shifted,ref_ptr,query_unk_mismatch_local_p,/*sarrayp*/false, + startcolumni); } } else { if (plusp) { - return block_diff_a2iag_32(query_shifted,ref_ptr,query_unk_mismatch_local_p,/*sarrayp*/false); + return block_diff_a2iag_128_wrap_lo(query_shifted,ref_ptr,query_unk_mismatch_local_p,/*sarrayp*/false, + startcolumni); } else { - return block_diff_a2itc_32(query_shifted,ref_ptr,query_unk_mismatch_local_p,/*sarrayp*/false); + return block_diff_a2itc_128_wrap_lo(query_shifted,ref_ptr,query_unk_mismatch_local_p,/*sarrayp*/false, + startcolumni); } } } -#endif -#ifdef GSNAP -/* Ignores snp_ptr */ -static Genomediff_T -block_diff_atoi_snp (Genomecomp_T *query_shifted, Genomecomp_T *snp_ptr, Genomecomp_T *ref_ptr, - bool plusp, int genestrand, bool query_unk_mismatch_local_p) { +static __m128i +block_diff_atoi_128_wrap_hi (Genomecomp_T *query_shifted, Genomecomp_T *ref_ptr, + bool plusp, int genestrand, bool query_unk_mismatch_local_p, + int endcolumni) { if (genestrand == +2) { if (plusp) { - return block_diff_a2itc(query_shifted,ref_ptr,query_unk_mismatch_local_p,/*sarrayp*/false); + return block_diff_a2itc_128_wrap_hi(query_shifted,ref_ptr,query_unk_mismatch_local_p,/*sarrayp*/false, + endcolumni); } else { - return block_diff_a2iag(query_shifted,ref_ptr,query_unk_mismatch_local_p,/*sarrayp*/false); + return block_diff_a2iag_128_wrap_hi(query_shifted,ref_ptr,query_unk_mismatch_local_p,/*sarrayp*/false, + endcolumni); } } else { if (plusp) { - return block_diff_a2iag(query_shifted,ref_ptr,query_unk_mismatch_local_p,/*sarrayp*/false); + return block_diff_a2iag_128_wrap_hi(query_shifted,ref_ptr,query_unk_mismatch_local_p,/*sarrayp*/false, + endcolumni); } else { - return block_diff_a2itc(query_shifted,ref_ptr,query_unk_mismatch_local_p,/*sarrayp*/false); + return block_diff_a2itc_128_wrap_hi(query_shifted,ref_ptr,query_unk_mismatch_local_p,/*sarrayp*/false, + endcolumni); } } } #endif - -/************************************************************************ - * TTOC - ************************************************************************/ - -static UINT4 -block_diff_ttoc_32 (Genomecomp_T *query_shifted, Genomecomp_T *ref_ptr, - bool plusp, int genestrand, bool query_unk_mismatch_local_p) { +#ifdef HAVE_AVX2 +static __m256i +block_diff_atoi_256 (Genomecomp_T *query_shifted, Genomecomp_T *ref_ptr, + bool plusp, int genestrand, bool query_unk_mismatch_local_p) { if (genestrand == +2) { if (plusp) { - return block_diff_a2iag_32(query_shifted,ref_ptr,query_unk_mismatch_local_p,/*sarrayp*/false); + return block_diff_a2itc_256(query_shifted,ref_ptr,query_unk_mismatch_local_p,/*sarrayp*/false); } else { - return block_diff_a2itc_32(query_shifted,ref_ptr,query_unk_mismatch_local_p,/*sarrayp*/false); + return block_diff_a2iag_256(query_shifted,ref_ptr,query_unk_mismatch_local_p,/*sarrayp*/false); } } else { if (plusp) { - return block_diff_a2itc_32(query_shifted,ref_ptr,query_unk_mismatch_local_p,/*sarrayp*/false); + return block_diff_a2iag_256(query_shifted,ref_ptr,query_unk_mismatch_local_p,/*sarrayp*/false); } else { - return block_diff_a2iag_32(query_shifted,ref_ptr,query_unk_mismatch_local_p,/*sarrayp*/false); + return block_diff_a2itc_256(query_shifted,ref_ptr,query_unk_mismatch_local_p,/*sarrayp*/false); } } } +#endif - -static Genomediff_T -block_diff_ttoc (Genomecomp_T *query_shifted, Genomecomp_T *ref_ptr, - bool plusp, int genestrand, bool query_unk_mismatch_local_p) { +#ifdef HAVE_AVX512 +static __m512i +block_diff_atoi_512 (Genomecomp_T *query_shifted, Genomecomp_T *ref_ptr, + bool plusp, int genestrand, bool query_unk_mismatch_local_p) { if (genestrand == +2) { if (plusp) { - return block_diff_a2iag(query_shifted,ref_ptr,query_unk_mismatch_local_p,/*sarrayp*/false); + return block_diff_a2itc_512(query_shifted,ref_ptr,query_unk_mismatch_local_p,/*sarrayp*/false); } else { - return block_diff_a2itc(query_shifted,ref_ptr,query_unk_mismatch_local_p,/*sarrayp*/false); + return block_diff_a2iag_512(query_shifted,ref_ptr,query_unk_mismatch_local_p,/*sarrayp*/false); } } else { if (plusp) { - return block_diff_a2itc(query_shifted,ref_ptr,query_unk_mismatch_local_p,/*sarrayp*/false); + return block_diff_a2iag_512(query_shifted,ref_ptr,query_unk_mismatch_local_p,/*sarrayp*/false); } else { - return block_diff_a2iag(query_shifted,ref_ptr,query_unk_mismatch_local_p,/*sarrayp*/false); + return block_diff_a2itc_512(query_shifted,ref_ptr,query_unk_mismatch_local_p,/*sarrayp*/false); } } } +#endif static UINT4 -block_diff_ttoc_sarray_32 (Genomecomp_T *query_shifted, Genomecomp_T *ref_ptr, +block_diff_atoi_sarray_32 (Genomecomp_T *query_shifted, Genomecomp_T *ref_ptr, bool plusp, int genestrand, bool query_unk_mismatch_local_p) { if (genestrand == +2) { if (plusp) { + return block_diff_a2itc_32(query_shifted,ref_ptr,query_unk_mismatch_local_p,/*sarrayp*/true); + } else { + return block_diff_a2iag_32(query_shifted,ref_ptr,query_unk_mismatch_local_p,/*sarrayp*/true); + } + } else { + if (plusp) { return block_diff_a2iag_32(query_shifted,ref_ptr,query_unk_mismatch_local_p,/*sarrayp*/true); } else { return block_diff_a2itc_32(query_shifted,ref_ptr,query_unk_mismatch_local_p,/*sarrayp*/true); } + } +} + +#ifdef HAVE_SSE2 +static __m128i +block_diff_atoi_sarray_128 (Genomecomp_T *query_shifted, Genomecomp_T *ref_ptr, + bool plusp, int genestrand, bool query_unk_mismatch_local_p) { + if (genestrand == +2) { + if (plusp) { + return block_diff_a2itc_128(query_shifted,ref_ptr,query_unk_mismatch_local_p,/*sarrayp*/true); + } else { + return block_diff_a2iag_128(query_shifted,ref_ptr,query_unk_mismatch_local_p,/*sarrayp*/true); + } } else { if (plusp) { - return block_diff_a2itc_32(query_shifted,ref_ptr,query_unk_mismatch_local_p,/*sarrayp*/true); + return block_diff_a2iag_128(query_shifted,ref_ptr,query_unk_mismatch_local_p,/*sarrayp*/true); } else { - return block_diff_a2iag_32(query_shifted,ref_ptr,query_unk_mismatch_local_p,/*sarrayp*/true); + return block_diff_a2itc_128(query_shifted,ref_ptr,query_unk_mismatch_local_p,/*sarrayp*/true); } } } -static Genomediff_T -block_diff_ttoc_sarray (Genomecomp_T *query_shifted, Genomecomp_T *ref_ptr, - bool plusp, int genestrand, bool query_unk_mismatch_local_p) { +#ifdef USE_SHIFT_HILO +static __m128i +block_diff_atoi_sarray_128_shift_lo (Genomecomp_T *query_shifted, Genomecomp_T *ref_ptr, + bool plusp, int genestrand, bool query_unk_mismatch_local_p, + int startcolumni) { if (genestrand == +2) { if (plusp) { - return block_diff_a2iag(query_shifted,ref_ptr,query_unk_mismatch_local_p,/*sarrayp*/true); + return block_diff_a2itc_128_shift_lo(query_shifted,ref_ptr,query_unk_mismatch_local_p,/*sarrayp*/true, + startcolumni); } else { - return block_diff_a2itc(query_shifted,ref_ptr,query_unk_mismatch_local_p,/*sarrayp*/true); + return block_diff_a2iag_128_shift_lo(query_shifted,ref_ptr,query_unk_mismatch_local_p,/*sarrayp*/true, + startcolumni); } } else { if (plusp) { - return block_diff_a2itc(query_shifted,ref_ptr,query_unk_mismatch_local_p,/*sarrayp*/true); + return block_diff_a2iag_128_shift_lo(query_shifted,ref_ptr,query_unk_mismatch_local_p,/*sarrayp*/true, + startcolumni); } else { - return block_diff_a2iag(query_shifted,ref_ptr,query_unk_mismatch_local_p,/*sarrayp*/true); + return block_diff_a2itc_128_shift_lo(query_shifted,ref_ptr,query_unk_mismatch_local_p,/*sarrayp*/true, + startcolumni); } } } -#ifdef GSNAP -/* Ignores snp_ptr */ -static UINT4 -block_diff_ttoc_snp_32 (Genomecomp_T *query_shifted, Genomecomp_T *snp_ptr, Genomecomp_T *ref_ptr, - bool plusp, int genestrand, bool query_unk_mismatch_local_p) { +static __m128i +block_diff_atoi_sarray_128_shift_hi (Genomecomp_T *query_shifted, Genomecomp_T *ref_ptr, + bool plusp, int genestrand, bool query_unk_mismatch_local_p, + int endcolumni) { if (genestrand == +2) { if (plusp) { - return block_diff_a2iag_32(query_shifted,ref_ptr,query_unk_mismatch_local_p,/*sarrayp*/false); + return block_diff_a2itc_128_shift_hi(query_shifted,ref_ptr,query_unk_mismatch_local_p,/*sarrayp*/true, + endcolumni); } else { - return block_diff_a2itc_32(query_shifted,ref_ptr,query_unk_mismatch_local_p,/*sarrayp*/false); + return block_diff_a2iag_128_shift_hi(query_shifted,ref_ptr,query_unk_mismatch_local_p,/*sarrayp*/true, + endcolumni); } } else { if (plusp) { - return block_diff_a2itc_32(query_shifted,ref_ptr,query_unk_mismatch_local_p,/*sarrayp*/false); + return block_diff_a2iag_128_shift_hi(query_shifted,ref_ptr,query_unk_mismatch_local_p,/*sarrayp*/true, + endcolumni); } else { - return block_diff_a2iag_32(query_shifted,ref_ptr,query_unk_mismatch_local_p,/*sarrayp*/false); + return block_diff_a2itc_128_shift_hi(query_shifted,ref_ptr,query_unk_mismatch_local_p,/*sarrayp*/true, + endcolumni); } } } #endif +#endif -#ifdef GSNAP -/* Ignores snp_ptr */ -static Genomediff_T -block_diff_ttoc_snp (Genomecomp_T *query_shifted, Genomecomp_T *snp_ptr, Genomecomp_T *ref_ptr, - bool plusp, int genestrand, bool query_unk_mismatch_local_p) { +#ifdef HAVE_SSSE3 +static __m128i +block_diff_atoi_sarray_128_wrap_lo (Genomecomp_T *query_shifted, Genomecomp_T *ref_ptr, + bool plusp, int genestrand, bool query_unk_mismatch_local_p, + int startcolumni) { if (genestrand == +2) { if (plusp) { - return block_diff_a2iag(query_shifted,ref_ptr,query_unk_mismatch_local_p,/*sarrayp*/false); + return block_diff_a2itc_128_wrap_lo(query_shifted,ref_ptr,query_unk_mismatch_local_p,/*sarrayp*/true, + startcolumni); } else { - return block_diff_a2itc(query_shifted,ref_ptr,query_unk_mismatch_local_p,/*sarrayp*/false); + return block_diff_a2iag_128_wrap_lo(query_shifted,ref_ptr,query_unk_mismatch_local_p,/*sarrayp*/true, + startcolumni); } } else { if (plusp) { - return block_diff_a2itc(query_shifted,ref_ptr,query_unk_mismatch_local_p,/*sarrayp*/false); + return block_diff_a2iag_128_wrap_lo(query_shifted,ref_ptr,query_unk_mismatch_local_p,/*sarrayp*/true, + startcolumni); } else { - return block_diff_a2iag(query_shifted,ref_ptr,query_unk_mismatch_local_p,/*sarrayp*/false); + return block_diff_a2itc_128_wrap_lo(query_shifted,ref_ptr,query_unk_mismatch_local_p,/*sarrayp*/true, + startcolumni); } } } -#endif +static __m128i +block_diff_atoi_sarray_128_wrap_hi (Genomecomp_T *query_shifted, Genomecomp_T *ref_ptr, + bool plusp, int genestrand, bool query_unk_mismatch_local_p, + int endcolumni) { + if (genestrand == +2) { + if (plusp) { + return block_diff_a2itc_128_wrap_hi(query_shifted,ref_ptr,query_unk_mismatch_local_p,/*sarrayp*/true, + endcolumni); + } else { + return block_diff_a2iag_128_wrap_hi(query_shifted,ref_ptr,query_unk_mismatch_local_p,/*sarrayp*/true, + endcolumni); + } + } else { + if (plusp) { + return block_diff_a2iag_128_wrap_hi(query_shifted,ref_ptr,query_unk_mismatch_local_p,/*sarrayp*/true, + endcolumni); + } else { + return block_diff_a2itc_128_wrap_hi(query_shifted,ref_ptr,query_unk_mismatch_local_p,/*sarrayp*/true, + endcolumni); + } + } +} +#endif -/* query_shifted, (snp_ptr,) ref_ptr, plusp, genestrand, query_unk_mismatch_local_p */ -typedef Genomediff_T (*Diffproc_T) (Genomecomp_T *, Genomecomp_T *, bool, int, bool); -typedef Genomediff_T (*Diffproc_snp_T) (Genomecomp_T *, Genomecomp_T *, Genomecomp_T *, bool, int, bool); -typedef UINT4 (*Diffproc_32_T) (Genomecomp_T *, Genomecomp_T *, bool, int, bool); -typedef UINT4 (*Diffproc_snp_32_T) (Genomecomp_T *, Genomecomp_T *, Genomecomp_T *, bool, int, bool); +#ifdef HAVE_AVX2 +static __m256i +block_diff_atoi_sarray_256 (Genomecomp_T *query_shifted, Genomecomp_T *ref_ptr, + bool plusp, int genestrand, bool query_unk_mismatch_local_p) { + if (genestrand == +2) { + if (plusp) { + return block_diff_a2itc_256(query_shifted,ref_ptr,query_unk_mismatch_local_p,/*sarrayp*/true); + } else { + return block_diff_a2iag_256(query_shifted,ref_ptr,query_unk_mismatch_local_p,/*sarrayp*/true); + } + } else { + if (plusp) { + return block_diff_a2iag_256(query_shifted,ref_ptr,query_unk_mismatch_local_p,/*sarrayp*/true); + } else { + return block_diff_a2itc_256(query_shifted,ref_ptr,query_unk_mismatch_local_p,/*sarrayp*/true); + } + } +} +#endif + +#ifdef HAVE_AVX512 +static __m512i +block_diff_atoi_sarray_512 (Genomecomp_T *query_shifted, Genomecomp_T *ref_ptr, + bool plusp, int genestrand, bool query_unk_mismatch_local_p) { + if (genestrand == +2) { + if (plusp) { + return block_diff_a2itc_512(query_shifted,ref_ptr,query_unk_mismatch_local_p,/*sarrayp*/true); + } else { + return block_diff_a2iag_512(query_shifted,ref_ptr,query_unk_mismatch_local_p,/*sarrayp*/true); + } + } else { + if (plusp) { + return block_diff_a2iag_512(query_shifted,ref_ptr,query_unk_mismatch_local_p,/*sarrayp*/true); + } else { + return block_diff_a2itc_512(query_shifted,ref_ptr,query_unk_mismatch_local_p,/*sarrayp*/true); + } + } +} +#endif + + +#ifdef GSNAP +/* Ignores snp_ptr */ +static UINT4 +block_diff_atoi_snp_32 (Genomecomp_T *query_shifted, Genomecomp_T *snp_ptr, Genomecomp_T *ref_ptr, + bool plusp, int genestrand, bool query_unk_mismatch_local_p) { + if (genestrand == +2) { + if (plusp) { + return block_diff_a2itc_32(query_shifted,ref_ptr,query_unk_mismatch_local_p,/*sarrayp*/false); + } else { + return block_diff_a2iag_32(query_shifted,ref_ptr,query_unk_mismatch_local_p,/*sarrayp*/false); + } + } else { + if (plusp) { + return block_diff_a2iag_32(query_shifted,ref_ptr,query_unk_mismatch_local_p,/*sarrayp*/false); + } else { + return block_diff_a2itc_32(query_shifted,ref_ptr,query_unk_mismatch_local_p,/*sarrayp*/false); + } + } +} +#endif + +#if defined(GSNAP) && defined(HAVE_SSE2) +/* Ignores snp_ptr */ +static __m128i +block_diff_atoi_snp_128 (Genomecomp_T *query_shifted, Genomecomp_T *snp_ptr, Genomecomp_T *ref_ptr, + bool plusp, int genestrand, bool query_unk_mismatch_local_p) { + if (genestrand == +2) { + if (plusp) { + return block_diff_a2itc_128(query_shifted,ref_ptr,query_unk_mismatch_local_p,/*sarrayp*/false); + } else { + return block_diff_a2iag_128(query_shifted,ref_ptr,query_unk_mismatch_local_p,/*sarrayp*/false); + } + } else { + if (plusp) { + return block_diff_a2iag_128(query_shifted,ref_ptr,query_unk_mismatch_local_p,/*sarrayp*/false); + } else { + return block_diff_a2itc_128(query_shifted,ref_ptr,query_unk_mismatch_local_p,/*sarrayp*/false); + } + } +} + +#ifdef USE_SHIFT_HILO +static __m128i +block_diff_atoi_snp_128_shift_lo (Genomecomp_T *query_shifted, Genomecomp_T *snp_ptr, Genomecomp_T *ref_ptr, + bool plusp, int genestrand, bool query_unk_mismatch_local_p, int startcolumni) { + if (genestrand == +2) { + if (plusp) { + return block_diff_a2itc_128_shift_lo(query_shifted,ref_ptr,query_unk_mismatch_local_p,/*sarrayp*/false, + startcolumni); + } else { + return block_diff_a2iag_128_shift_lo(query_shifted,ref_ptr,query_unk_mismatch_local_p,/*sarrayp*/false, + startcolumni); + } + } else { + if (plusp) { + return block_diff_a2iag_128_shift_lo(query_shifted,ref_ptr,query_unk_mismatch_local_p,/*sarrayp*/false, + startcolumni); + } else { + return block_diff_a2itc_128_shift_lo(query_shifted,ref_ptr,query_unk_mismatch_local_p,/*sarrayp*/false, + startcolumni); + } + } +} + +static __m128i +block_diff_atoi_snp_128_shift_hi (Genomecomp_T *query_shifted, Genomecomp_T *snp_ptr, Genomecomp_T *ref_ptr, + bool plusp, int genestrand, bool query_unk_mismatch_local_p, int endcolumni) { + if (genestrand == +2) { + if (plusp) { + return block_diff_a2itc_128_shift_hi(query_shifted,ref_ptr,query_unk_mismatch_local_p,/*sarrayp*/false, + endcolumni); + } else { + return block_diff_a2iag_128_shift_hi(query_shifted,ref_ptr,query_unk_mismatch_local_p,/*sarrayp*/false, + endcolumni); + } + } else { + if (plusp) { + return block_diff_a2iag_128_shift_hi(query_shifted,ref_ptr,query_unk_mismatch_local_p,/*sarrayp*/false, + endcolumni); + } else { + return block_diff_a2itc_128_shift_hi(query_shifted,ref_ptr,query_unk_mismatch_local_p,/*sarrayp*/false, + endcolumni); + } + } +} +#endif +#endif + +#if defined(GSNAP) && defined(HAVE_SSSE3) +/* Ignores snp_ptr */ +static __m128i +block_diff_atoi_snp_128_wrap_lo (Genomecomp_T *query_shifted, Genomecomp_T *snp_ptr, Genomecomp_T *ref_ptr, + bool plusp, int genestrand, bool query_unk_mismatch_local_p, + int startcolumni) { + if (genestrand == +2) { + if (plusp) { + return block_diff_a2itc_128_wrap_lo(query_shifted,ref_ptr,query_unk_mismatch_local_p,/*sarrayp*/false, + startcolumni); + } else { + return block_diff_a2iag_128_wrap_lo(query_shifted,ref_ptr,query_unk_mismatch_local_p,/*sarrayp*/false, + startcolumni); + } + } else { + if (plusp) { + return block_diff_a2iag_128_wrap_lo(query_shifted,ref_ptr,query_unk_mismatch_local_p,/*sarrayp*/false, + startcolumni); + } else { + return block_diff_a2itc_128_wrap_lo(query_shifted,ref_ptr,query_unk_mismatch_local_p,/*sarrayp*/false, + startcolumni); + } + } +} + +static __m128i +block_diff_atoi_snp_128_wrap_hi (Genomecomp_T *query_shifted, Genomecomp_T *snp_ptr, Genomecomp_T *ref_ptr, + bool plusp, int genestrand, bool query_unk_mismatch_local_p, + int endcolumni) { + if (genestrand == +2) { + if (plusp) { + return block_diff_a2itc_128_wrap_hi(query_shifted,ref_ptr,query_unk_mismatch_local_p,/*sarrayp*/false, + endcolumni); + } else { + return block_diff_a2iag_128_wrap_hi(query_shifted,ref_ptr,query_unk_mismatch_local_p,/*sarrayp*/false, + endcolumni); + } + } else { + if (plusp) { + return block_diff_a2iag_128_wrap_hi(query_shifted,ref_ptr,query_unk_mismatch_local_p,/*sarrayp*/false, + endcolumni); + } else { + return block_diff_a2itc_128_wrap_hi(query_shifted,ref_ptr,query_unk_mismatch_local_p,/*sarrayp*/false, + endcolumni); + } + } +} +#endif + +#if defined(GSNAP) && defined(HAVE_AVX2) +/* Ignores snp_ptr */ +static __m256i +block_diff_atoi_snp_256 (Genomecomp_T *query_shifted, Genomecomp_T *snp_ptr, Genomecomp_T *ref_ptr, + bool plusp, int genestrand, bool query_unk_mismatch_local_p) { + if (genestrand == +2) { + if (plusp) { + return block_diff_a2itc_256(query_shifted,ref_ptr,query_unk_mismatch_local_p,/*sarrayp*/false); + } else { + return block_diff_a2iag_256(query_shifted,ref_ptr,query_unk_mismatch_local_p,/*sarrayp*/false); + } + } else { + if (plusp) { + return block_diff_a2iag_256(query_shifted,ref_ptr,query_unk_mismatch_local_p,/*sarrayp*/false); + } else { + return block_diff_a2itc_256(query_shifted,ref_ptr,query_unk_mismatch_local_p,/*sarrayp*/false); + } + } +} +#endif + +#if defined(GSNAP) && defined(HAVE_AVX512) +/* Ignores snp_ptr */ +static __m512i +block_diff_atoi_snp_512 (Genomecomp_T *query_shifted, Genomecomp_T *snp_ptr, Genomecomp_T *ref_ptr, + bool plusp, int genestrand, bool query_unk_mismatch_local_p) { + if (genestrand == +2) { + if (plusp) { + return block_diff_a2itc_512(query_shifted,ref_ptr,query_unk_mismatch_local_p,/*sarrayp*/false); + } else { + return block_diff_a2iag_512(query_shifted,ref_ptr,query_unk_mismatch_local_p,/*sarrayp*/false); + } + } else { + if (plusp) { + return block_diff_a2iag_512(query_shifted,ref_ptr,query_unk_mismatch_local_p,/*sarrayp*/false); + } else { + return block_diff_a2itc_512(query_shifted,ref_ptr,query_unk_mismatch_local_p,/*sarrayp*/false); + } + } +} +#endif + + +/************************************************************************ + * TTOC + ************************************************************************/ + +static UINT4 +block_diff_ttoc_32 (Genomecomp_T *query_shifted, Genomecomp_T *ref_ptr, + bool plusp, int genestrand, bool query_unk_mismatch_local_p) { + if (genestrand == +2) { + if (plusp) { + return block_diff_a2iag_32(query_shifted,ref_ptr,query_unk_mismatch_local_p,/*sarrayp*/false); + } else { + return block_diff_a2itc_32(query_shifted,ref_ptr,query_unk_mismatch_local_p,/*sarrayp*/false); + } + } else { + if (plusp) { + return block_diff_a2itc_32(query_shifted,ref_ptr,query_unk_mismatch_local_p,/*sarrayp*/false); + } else { + return block_diff_a2iag_32(query_shifted,ref_ptr,query_unk_mismatch_local_p,/*sarrayp*/false); + } + } +} + + +#ifdef HAVE_SSE2 +static __m128i +block_diff_ttoc_128 (Genomecomp_T *query_shifted, Genomecomp_T *ref_ptr, + bool plusp, int genestrand, bool query_unk_mismatch_local_p) { + if (genestrand == +2) { + if (plusp) { + return block_diff_a2iag_128(query_shifted,ref_ptr,query_unk_mismatch_local_p,/*sarrayp*/false); + } else { + return block_diff_a2itc_128(query_shifted,ref_ptr,query_unk_mismatch_local_p,/*sarrayp*/false); + } + } else { + if (plusp) { + return block_diff_a2itc_128(query_shifted,ref_ptr,query_unk_mismatch_local_p,/*sarrayp*/false); + } else { + return block_diff_a2iag_128(query_shifted,ref_ptr,query_unk_mismatch_local_p,/*sarrayp*/false); + } + } +} + +#ifdef USE_SHIFT_HILO +static __m128i +block_diff_ttoc_128_shift_lo (Genomecomp_T *query_shifted, Genomecomp_T *ref_ptr, + bool plusp, int genestrand, bool query_unk_mismatch_local_p, + int startcolumni) { + if (genestrand == +2) { + if (plusp) { + return block_diff_a2iag_128_shift_lo(query_shifted,ref_ptr,query_unk_mismatch_local_p,/*sarrayp*/false, + startcolumni); + } else { + return block_diff_a2itc_128_shift_lo(query_shifted,ref_ptr,query_unk_mismatch_local_p,/*sarrayp*/false, + startcolumni); + } + } else { + if (plusp) { + return block_diff_a2itc_128_shift_lo(query_shifted,ref_ptr,query_unk_mismatch_local_p,/*sarrayp*/false, + startcolumni); + } else { + return block_diff_a2iag_128_shift_lo(query_shifted,ref_ptr,query_unk_mismatch_local_p,/*sarrayp*/false, + startcolumni); + } + } +} + +static __m128i +block_diff_ttoc_128_shift_hi (Genomecomp_T *query_shifted, Genomecomp_T *ref_ptr, + bool plusp, int genestrand, bool query_unk_mismatch_local_p, + int endcolumni) { + if (genestrand == +2) { + if (plusp) { + return block_diff_a2iag_128_shift_hi(query_shifted,ref_ptr,query_unk_mismatch_local_p,/*sarrayp*/false, + endcolumni); + } else { + return block_diff_a2itc_128_shift_hi(query_shifted,ref_ptr,query_unk_mismatch_local_p,/*sarrayp*/false, + endcolumni); + } + } else { + if (plusp) { + return block_diff_a2itc_128_shift_hi(query_shifted,ref_ptr,query_unk_mismatch_local_p,/*sarrayp*/false, + endcolumni); + } else { + return block_diff_a2iag_128_shift_hi(query_shifted,ref_ptr,query_unk_mismatch_local_p,/*sarrayp*/false, + endcolumni); + } + } +} +#endif +#endif + +#ifdef HAVE_SSSE3 +static __m128i +block_diff_ttoc_128_wrap_lo (Genomecomp_T *query_shifted, Genomecomp_T *ref_ptr, + bool plusp, int genestrand, bool query_unk_mismatch_local_p, + int startcolumni) { + if (genestrand == +2) { + if (plusp) { + return block_diff_a2iag_128_wrap_lo(query_shifted,ref_ptr,query_unk_mismatch_local_p,/*sarrayp*/false, + startcolumni); + } else { + return block_diff_a2itc_128_wrap_lo(query_shifted,ref_ptr,query_unk_mismatch_local_p,/*sarrayp*/false, + startcolumni); + } + } else { + if (plusp) { + return block_diff_a2itc_128_wrap_lo(query_shifted,ref_ptr,query_unk_mismatch_local_p,/*sarrayp*/false, + startcolumni); + } else { + return block_diff_a2iag_128_wrap_lo(query_shifted,ref_ptr,query_unk_mismatch_local_p,/*sarrayp*/false, + startcolumni); + } + } +} + +static __m128i +block_diff_ttoc_128_wrap_hi (Genomecomp_T *query_shifted, Genomecomp_T *ref_ptr, + bool plusp, int genestrand, bool query_unk_mismatch_local_p, + int endcolumni) { + if (genestrand == +2) { + if (plusp) { + return block_diff_a2iag_128_wrap_hi(query_shifted,ref_ptr,query_unk_mismatch_local_p,/*sarrayp*/false, + endcolumni); + } else { + return block_diff_a2itc_128_wrap_hi(query_shifted,ref_ptr,query_unk_mismatch_local_p,/*sarrayp*/false, + endcolumni); + } + } else { + if (plusp) { + return block_diff_a2itc_128_wrap_hi(query_shifted,ref_ptr,query_unk_mismatch_local_p,/*sarrayp*/false, + endcolumni); + } else { + return block_diff_a2iag_128_wrap_hi(query_shifted,ref_ptr,query_unk_mismatch_local_p,/*sarrayp*/false, + endcolumni); + } + } +} +#endif + +#ifdef HAVE_AVX2 +static __m256i +block_diff_ttoc_256 (Genomecomp_T *query_shifted, Genomecomp_T *ref_ptr, + bool plusp, int genestrand, bool query_unk_mismatch_local_p) { + if (genestrand == +2) { + if (plusp) { + return block_diff_a2iag_256(query_shifted,ref_ptr,query_unk_mismatch_local_p,/*sarrayp*/false); + } else { + return block_diff_a2itc_256(query_shifted,ref_ptr,query_unk_mismatch_local_p,/*sarrayp*/false); + } + } else { + if (plusp) { + return block_diff_a2itc_256(query_shifted,ref_ptr,query_unk_mismatch_local_p,/*sarrayp*/false); + } else { + return block_diff_a2iag_256(query_shifted,ref_ptr,query_unk_mismatch_local_p,/*sarrayp*/false); + } + } +} +#endif + +#ifdef HAVE_AVX512 +static __m512i +block_diff_ttoc_512 (Genomecomp_T *query_shifted, Genomecomp_T *ref_ptr, + bool plusp, int genestrand, bool query_unk_mismatch_local_p) { + if (genestrand == +2) { + if (plusp) { + return block_diff_a2iag_512(query_shifted,ref_ptr,query_unk_mismatch_local_p,/*sarrayp*/false); + } else { + return block_diff_a2itc_512(query_shifted,ref_ptr,query_unk_mismatch_local_p,/*sarrayp*/false); + } + } else { + if (plusp) { + return block_diff_a2itc_512(query_shifted,ref_ptr,query_unk_mismatch_local_p,/*sarrayp*/false); + } else { + return block_diff_a2iag_512(query_shifted,ref_ptr,query_unk_mismatch_local_p,/*sarrayp*/false); + } + } +} +#endif + +static UINT4 +block_diff_ttoc_sarray_32 (Genomecomp_T *query_shifted, Genomecomp_T *ref_ptr, + bool plusp, int genestrand, bool query_unk_mismatch_local_p) { + if (genestrand == +2) { + if (plusp) { + return block_diff_a2iag_32(query_shifted,ref_ptr,query_unk_mismatch_local_p,/*sarrayp*/true); + } else { + return block_diff_a2itc_32(query_shifted,ref_ptr,query_unk_mismatch_local_p,/*sarrayp*/true); + } + } else { + if (plusp) { + return block_diff_a2itc_32(query_shifted,ref_ptr,query_unk_mismatch_local_p,/*sarrayp*/true); + } else { + return block_diff_a2iag_32(query_shifted,ref_ptr,query_unk_mismatch_local_p,/*sarrayp*/true); + } + } +} + +#ifdef HAVE_SSE2 +static __m128i +block_diff_ttoc_sarray_128 (Genomecomp_T *query_shifted, Genomecomp_T *ref_ptr, + bool plusp, int genestrand, bool query_unk_mismatch_local_p) { + if (genestrand == +2) { + if (plusp) { + return block_diff_a2iag_128(query_shifted,ref_ptr,query_unk_mismatch_local_p,/*sarrayp*/true); + } else { + return block_diff_a2itc_128(query_shifted,ref_ptr,query_unk_mismatch_local_p,/*sarrayp*/true); + } + } else { + if (plusp) { + return block_diff_a2itc_128(query_shifted,ref_ptr,query_unk_mismatch_local_p,/*sarrayp*/true); + } else { + return block_diff_a2iag_128(query_shifted,ref_ptr,query_unk_mismatch_local_p,/*sarrayp*/true); + } + } +} + +#ifdef USE_SHIFT_HILO +static __m128i +block_diff_ttoc_sarray_128_shift_lo (Genomecomp_T *query_shifted, Genomecomp_T *ref_ptr, + bool plusp, int genestrand, bool query_unk_mismatch_local_p, + int startcolumni) { + if (genestrand == +2) { + if (plusp) { + return block_diff_a2iag_128_shift_lo(query_shifted,ref_ptr,query_unk_mismatch_local_p,/*sarrayp*/true, + startcolumni); + } else { + return block_diff_a2itc_128_shift_lo(query_shifted,ref_ptr,query_unk_mismatch_local_p,/*sarrayp*/true, + startcolumni); + } + } else { + if (plusp) { + return block_diff_a2itc_128_shift_lo(query_shifted,ref_ptr,query_unk_mismatch_local_p,/*sarrayp*/true, + startcolumni); + } else { + return block_diff_a2iag_128_shift_lo(query_shifted,ref_ptr,query_unk_mismatch_local_p,/*sarrayp*/true, + startcolumni); + } + } +} + +static __m128i +block_diff_ttoc_sarray_128_shift_hi (Genomecomp_T *query_shifted, Genomecomp_T *ref_ptr, + bool plusp, int genestrand, bool query_unk_mismatch_local_p, + int endcolumni) { + if (genestrand == +2) { + if (plusp) { + return block_diff_a2iag_128_shift_hi(query_shifted,ref_ptr,query_unk_mismatch_local_p,/*sarrayp*/true, + endcolumni); + } else { + return block_diff_a2itc_128_shift_hi(query_shifted,ref_ptr,query_unk_mismatch_local_p,/*sarrayp*/true, + endcolumni); + } + } else { + if (plusp) { + return block_diff_a2itc_128_shift_hi(query_shifted,ref_ptr,query_unk_mismatch_local_p,/*sarrayp*/true, + endcolumni); + } else { + return block_diff_a2iag_128_shift_hi(query_shifted,ref_ptr,query_unk_mismatch_local_p,/*sarrayp*/true, + endcolumni); + } + } +} +#endif +#endif + +#ifdef HAVE_SSSE3 +static __m128i +block_diff_ttoc_sarray_128_wrap_lo (Genomecomp_T *query_shifted, Genomecomp_T *ref_ptr, + bool plusp, int genestrand, bool query_unk_mismatch_local_p, + int startcolumni) { + if (genestrand == +2) { + if (plusp) { + return block_diff_a2iag_128_wrap_lo(query_shifted,ref_ptr,query_unk_mismatch_local_p,/*sarrayp*/true, + startcolumni); + } else { + return block_diff_a2itc_128_wrap_lo(query_shifted,ref_ptr,query_unk_mismatch_local_p,/*sarrayp*/true, + startcolumni); + } + } else { + if (plusp) { + return block_diff_a2itc_128_wrap_lo(query_shifted,ref_ptr,query_unk_mismatch_local_p,/*sarrayp*/true, + startcolumni); + } else { + return block_diff_a2iag_128_wrap_lo(query_shifted,ref_ptr,query_unk_mismatch_local_p,/*sarrayp*/true, + startcolumni); + } + } +} + +static __m128i +block_diff_ttoc_sarray_128_wrap_hi (Genomecomp_T *query_shifted, Genomecomp_T *ref_ptr, + bool plusp, int genestrand, bool query_unk_mismatch_local_p, + int endcolumni) { + if (genestrand == +2) { + if (plusp) { + return block_diff_a2iag_128_wrap_hi(query_shifted,ref_ptr,query_unk_mismatch_local_p,/*sarrayp*/true, + endcolumni); + } else { + return block_diff_a2itc_128_wrap_hi(query_shifted,ref_ptr,query_unk_mismatch_local_p,/*sarrayp*/true, + endcolumni); + } + } else { + if (plusp) { + return block_diff_a2itc_128_wrap_hi(query_shifted,ref_ptr,query_unk_mismatch_local_p,/*sarrayp*/true, + endcolumni); + } else { + return block_diff_a2iag_128_wrap_hi(query_shifted,ref_ptr,query_unk_mismatch_local_p,/*sarrayp*/true, + endcolumni); + } + } +} +#endif + +#ifdef HAVE_AVX2 +static __m256i +block_diff_ttoc_sarray_256 (Genomecomp_T *query_shifted, Genomecomp_T *ref_ptr, + bool plusp, int genestrand, bool query_unk_mismatch_local_p) { + if (genestrand == +2) { + if (plusp) { + return block_diff_a2iag_256(query_shifted,ref_ptr,query_unk_mismatch_local_p,/*sarrayp*/true); + } else { + return block_diff_a2itc_256(query_shifted,ref_ptr,query_unk_mismatch_local_p,/*sarrayp*/true); + } + } else { + if (plusp) { + return block_diff_a2itc_256(query_shifted,ref_ptr,query_unk_mismatch_local_p,/*sarrayp*/true); + } else { + return block_diff_a2iag_256(query_shifted,ref_ptr,query_unk_mismatch_local_p,/*sarrayp*/true); + } + } +} +#endif + +#ifdef HAVE_AVX512 +static __m512i +block_diff_ttoc_sarray_512 (Genomecomp_T *query_shifted, Genomecomp_T *ref_ptr, + bool plusp, int genestrand, bool query_unk_mismatch_local_p) { + if (genestrand == +2) { + if (plusp) { + return block_diff_a2iag_512(query_shifted,ref_ptr,query_unk_mismatch_local_p,/*sarrayp*/true); + } else { + return block_diff_a2itc_512(query_shifted,ref_ptr,query_unk_mismatch_local_p,/*sarrayp*/true); + } + } else { + if (plusp) { + return block_diff_a2itc_512(query_shifted,ref_ptr,query_unk_mismatch_local_p,/*sarrayp*/true); + } else { + return block_diff_a2iag_512(query_shifted,ref_ptr,query_unk_mismatch_local_p,/*sarrayp*/true); + } + } +} +#endif + +#ifdef GSNAP +/* Ignores snp_ptr */ +static UINT4 +block_diff_ttoc_snp_32 (Genomecomp_T *query_shifted, Genomecomp_T *snp_ptr, Genomecomp_T *ref_ptr, + bool plusp, int genestrand, bool query_unk_mismatch_local_p) { + if (genestrand == +2) { + if (plusp) { + return block_diff_a2iag_32(query_shifted,ref_ptr,query_unk_mismatch_local_p,/*sarrayp*/false); + } else { + return block_diff_a2itc_32(query_shifted,ref_ptr,query_unk_mismatch_local_p,/*sarrayp*/false); + } + } else { + if (plusp) { + return block_diff_a2itc_32(query_shifted,ref_ptr,query_unk_mismatch_local_p,/*sarrayp*/false); + } else { + return block_diff_a2iag_32(query_shifted,ref_ptr,query_unk_mismatch_local_p,/*sarrayp*/false); + } + } +} +#endif + +#if defined(GSNAP) && defined(HAVE_SSE2) +/* Ignores snp_ptr */ +static __m128i +block_diff_ttoc_snp_128 (Genomecomp_T *query_shifted, Genomecomp_T *snp_ptr, Genomecomp_T *ref_ptr, + bool plusp, int genestrand, bool query_unk_mismatch_local_p) { + if (genestrand == +2) { + if (plusp) { + return block_diff_a2iag_128(query_shifted,ref_ptr,query_unk_mismatch_local_p,/*sarrayp*/false); + } else { + return block_diff_a2itc_128(query_shifted,ref_ptr,query_unk_mismatch_local_p,/*sarrayp*/false); + } + } else { + if (plusp) { + return block_diff_a2itc_128(query_shifted,ref_ptr,query_unk_mismatch_local_p,/*sarrayp*/false); + } else { + return block_diff_a2iag_128(query_shifted,ref_ptr,query_unk_mismatch_local_p,/*sarrayp*/false); + } + } +} + +#ifdef USE_SHIFT_HILO +static __m128i +block_diff_ttoc_snp_128_shift_lo (Genomecomp_T *query_shifted, Genomecomp_T *snp_ptr, Genomecomp_T *ref_ptr, + bool plusp, int genestrand, bool query_unk_mismatch_local_p, + int startcolumni) { + if (genestrand == +2) { + if (plusp) { + return block_diff_a2iag_128_shift_lo(query_shifted,ref_ptr,query_unk_mismatch_local_p,/*sarrayp*/false, + startcolumni); + } else { + return block_diff_a2itc_128_shift_lo(query_shifted,ref_ptr,query_unk_mismatch_local_p,/*sarrayp*/false, + startcolumni); + } + } else { + if (plusp) { + return block_diff_a2itc_128_shift_lo(query_shifted,ref_ptr,query_unk_mismatch_local_p,/*sarrayp*/false, + startcolumni); + } else { + return block_diff_a2iag_128_shift_lo(query_shifted,ref_ptr,query_unk_mismatch_local_p,/*sarrayp*/false, + startcolumni); + } + } +} + +static __m128i +block_diff_ttoc_snp_128_shift_hi (Genomecomp_T *query_shifted, Genomecomp_T *snp_ptr, Genomecomp_T *ref_ptr, + bool plusp, int genestrand, bool query_unk_mismatch_local_p, + int endcolumni) { + if (genestrand == +2) { + if (plusp) { + return block_diff_a2iag_128_shift_hi(query_shifted,ref_ptr,query_unk_mismatch_local_p,/*sarrayp*/false, + endcolumni); + } else { + return block_diff_a2itc_128_shift_hi(query_shifted,ref_ptr,query_unk_mismatch_local_p,/*sarrayp*/false, + endcolumni); + } + } else { + if (plusp) { + return block_diff_a2itc_128_shift_hi(query_shifted,ref_ptr,query_unk_mismatch_local_p,/*sarrayp*/false, + endcolumni); + } else { + return block_diff_a2iag_128_shift_hi(query_shifted,ref_ptr,query_unk_mismatch_local_p,/*sarrayp*/false, + endcolumni); + } + } +} +#endif +#endif + +#if defined(GSNAP) && defined(HAVE_SSSE3) +/* Ignores snp_ptr */ +static __m128i +block_diff_ttoc_snp_128_wrap_lo (Genomecomp_T *query_shifted, Genomecomp_T *snp_ptr, Genomecomp_T *ref_ptr, + bool plusp, int genestrand, bool query_unk_mismatch_local_p, + int startcolumni) { + if (genestrand == +2) { + if (plusp) { + return block_diff_a2iag_128_wrap_lo(query_shifted,ref_ptr,query_unk_mismatch_local_p,/*sarrayp*/false, + startcolumni); + } else { + return block_diff_a2itc_128_wrap_lo(query_shifted,ref_ptr,query_unk_mismatch_local_p,/*sarrayp*/false, + startcolumni); + } + } else { + if (plusp) { + return block_diff_a2itc_128_wrap_lo(query_shifted,ref_ptr,query_unk_mismatch_local_p,/*sarrayp*/false, + startcolumni); + } else { + return block_diff_a2iag_128_wrap_lo(query_shifted,ref_ptr,query_unk_mismatch_local_p,/*sarrayp*/false, + startcolumni); + } + } +} + +static __m128i +block_diff_ttoc_snp_128_wrap_hi (Genomecomp_T *query_shifted, Genomecomp_T *snp_ptr, Genomecomp_T *ref_ptr, + bool plusp, int genestrand, bool query_unk_mismatch_local_p, + int endcolumni) { + if (genestrand == +2) { + if (plusp) { + return block_diff_a2iag_128_wrap_hi(query_shifted,ref_ptr,query_unk_mismatch_local_p,/*sarrayp*/false, + endcolumni); + } else { + return block_diff_a2itc_128_wrap_hi(query_shifted,ref_ptr,query_unk_mismatch_local_p,/*sarrayp*/false, + endcolumni); + } + } else { + if (plusp) { + return block_diff_a2itc_128_wrap_hi(query_shifted,ref_ptr,query_unk_mismatch_local_p,/*sarrayp*/false, + endcolumni); + } else { + return block_diff_a2iag_128_wrap_hi(query_shifted,ref_ptr,query_unk_mismatch_local_p,/*sarrayp*/false, + endcolumni); + } + } +} +#endif + +#if defined(GSNAP) && defined(HAVE_AVX2) +/* Ignores snp_ptr */ +static __m256i +block_diff_ttoc_snp_256 (Genomecomp_T *query_shifted, Genomecomp_T *snp_ptr, Genomecomp_T *ref_ptr, + bool plusp, int genestrand, bool query_unk_mismatch_local_p) { + if (genestrand == +2) { + if (plusp) { + return block_diff_a2iag_256(query_shifted,ref_ptr,query_unk_mismatch_local_p,/*sarrayp*/false); + } else { + return block_diff_a2itc_256(query_shifted,ref_ptr,query_unk_mismatch_local_p,/*sarrayp*/false); + } + } else { + if (plusp) { + return block_diff_a2itc_256(query_shifted,ref_ptr,query_unk_mismatch_local_p,/*sarrayp*/false); + } else { + return block_diff_a2iag_256(query_shifted,ref_ptr,query_unk_mismatch_local_p,/*sarrayp*/false); + } + } +} +#endif + +#if defined(GSNAP) && defined(HAVE_AVX512) +/* Ignores snp_ptr */ +static __m512i +block_diff_ttoc_snp_512 (Genomecomp_T *query_shifted, Genomecomp_T *snp_ptr, Genomecomp_T *ref_ptr, + bool plusp, int genestrand, bool query_unk_mismatch_local_p) { + if (genestrand == +2) { + if (plusp) { + return block_diff_a2iag_512(query_shifted,ref_ptr,query_unk_mismatch_local_p,/*sarrayp*/false); + } else { + return block_diff_a2itc_512(query_shifted,ref_ptr,query_unk_mismatch_local_p,/*sarrayp*/false); + } + } else { + if (plusp) { + return block_diff_a2itc_512(query_shifted,ref_ptr,query_unk_mismatch_local_p,/*sarrayp*/false); + } else { + return block_diff_a2iag_512(query_shifted,ref_ptr,query_unk_mismatch_local_p,/*sarrayp*/false); + } + } +} +#endif + + +/* query_shifted, (snp_ptr,) ref_ptr, plusp, genestrand, query_unk_mismatch_local_p */ +#ifdef HAVE_AVX512 +typedef __m512i (*Diffproc_512_T) (Genomecomp_T *, Genomecomp_T *, bool, int, bool); +typedef __m512i (*Diffproc_snp_512_T) (Genomecomp_T *, Genomecomp_T *, Genomecomp_T *, bool, int, bool); +static Diffproc_512_T block_diff_512; +static Diffproc_snp_512_T block_diff_snp_512; +#endif + +#ifdef HAVE_AVX2 +typedef __m256i (*Diffproc_256_T) (Genomecomp_T *, Genomecomp_T *, bool, int, bool); +typedef __m256i (*Diffproc_snp_256_T) (Genomecomp_T *, Genomecomp_T *, Genomecomp_T *, bool, int, bool); +static Diffproc_256_T block_diff_256; +static Diffproc_snp_256_T block_diff_snp_256; +#endif + +#ifdef HAVE_SSSE3 +typedef __m128i (*Diffproc_128_wrap_T) (Genomecomp_T *, Genomecomp_T *, bool, int, bool, int); +typedef __m128i (*Diffproc_snp_128_wrap_T) (Genomecomp_T *, Genomecomp_T *, Genomecomp_T *, bool, int, bool, int); +static Diffproc_128_wrap_T block_diff_128_wrap_lo; +static Diffproc_snp_128_wrap_T block_diff_snp_128_wrap_lo; +static Diffproc_128_wrap_T block_diff_128_wrap_hi; +static Diffproc_snp_128_wrap_T block_diff_snp_128_wrap_hi; +#endif + +#ifdef HAVE_SSE2 +#ifdef USE_SHIFT_HILO +typedef __m128i (*Diffproc_128_shift_T) (Genomecomp_T *, Genomecomp_T *, bool, int, bool, int); +typedef __m128i (*Diffproc_snp_128_shift_T) (Genomecomp_T *, Genomecomp_T *, Genomecomp_T *, bool, int, bool, int); +static Diffproc_128_shift_T block_diff_128_shift_lo; +static Diffproc_snp_128_shift_T block_diff_snp_128_shift_lo; +static Diffproc_128_shift_T block_diff_128_shift_hi; +static Diffproc_snp_128_shift_T block_diff_snp_128_shift_hi; +#endif + +typedef __m128i (*Diffproc_128_T) (Genomecomp_T *, Genomecomp_T *, bool, int, bool); +typedef __m128i (*Diffproc_snp_128_T) (Genomecomp_T *, Genomecomp_T *, Genomecomp_T *, bool, int, bool); +static Diffproc_128_T block_diff_128; +static Diffproc_snp_128_T block_diff_snp_128; +#endif + +typedef UINT4 (*Diffproc_32_T) (Genomecomp_T *, Genomecomp_T *, bool, int, bool); +typedef UINT4 (*Diffproc_snp_32_T) (Genomecomp_T *, Genomecomp_T *, Genomecomp_T *, bool, int, bool); -static Diffproc_T block_diff; -static Diffproc_snp_T block_diff_snp; static Diffproc_32_T block_diff_32; static Diffproc_snp_32_T block_diff_snp_32; /* For CMET and ATOI, ignores genome-to-query mismatches. Used by Genome_consecutive procedures, called only by sarray-read.c */ -static Diffproc_T block_diff_sarray; +#ifdef HAVE_AVX512 +static Diffproc_512_T block_diff_sarray_512; +#endif +#ifdef HAVE_AVX2 +static Diffproc_256_T block_diff_sarray_256; +#endif +#ifdef HAVE_SSSE3 +static Diffproc_128_wrap_T block_diff_sarray_128_wrap_lo; +static Diffproc_128_wrap_T block_diff_sarray_128_wrap_hi; +#endif +#ifdef HAVE_SSE2 +static Diffproc_128_T block_diff_sarray_128; +#ifdef USE_SHIFT_HILO +static Diffproc_128_shift_T block_diff_sarray_128_shift_lo; +static Diffproc_128_shift_T block_diff_sarray_128_shift_hi; +#endif +#endif static Diffproc_32_T block_diff_sarray_32; -#if defined(WORDS_BIGENDIAN) || !defined(HAVE_SSE2) -/* Skip */ -#else +#ifdef HAVE_AVX512 +static __m512i _BOUND_HIGH_512; +static __m512i _BOUND_LOW_512; +#endif +#ifdef HAVE_AVX2 +static __m256i _BOUND_HIGH_256; +static __m256i _BOUND_LOW_256; +#endif +#ifdef HAVE_SSE2 static __m128i _BOUND_HIGH; static __m128i _BOUND_LOW; #endif @@ -18285,9 +20848,16 @@ Genome_hr_setup (Genomecomp_T *ref_blocks_in, Genomecomp_T *snp_blocks_in, bool query_unk_mismatch_p_in, bool genome_unk_mismatch_p_in, Mode_T mode) { -#if defined(WORDS_BIGENDIAN) || !defined(HAVE_SSE2) -/* Skip */ -#else + +#ifdef HAVE_AVX512 + _BOUND_HIGH_512 = _mm512_set_epi32(512,480,448,416, 384,352,320,288, 256,224,192,160, 128,96,64,32); + _BOUND_LOW_512 = _mm512_set_epi32(480,448,416,384, 352,320,288,256, 224,192,160,128, 96,64,32,0); +#endif +#ifdef HAVE_AVX2 + _BOUND_HIGH_256 = _mm256_set_epi32(256,224,192,160,128,96,64,32); + _BOUND_LOW_256 = _mm256_set_epi32(224,192,160,128,96,64,32,0); +#endif +#ifdef HAVE_SSE2 _BOUND_HIGH = _mm_set_epi32(128,96,64,32); _BOUND_LOW = _mm_set_epi32(96,64,32,0); #endif @@ -18299,26 +20869,117 @@ switch (mode) { case STANDARD: - block_diff = block_diff_standard; - block_diff_sarray = block_diff_standard; +#ifdef HAVE_AVX512 + block_diff_512 = block_diff_standard_512; + block_diff_sarray_512 = block_diff_standard_512; +#endif +#ifdef HAVE_AVX2 + block_diff_256 = block_diff_standard_256; + block_diff_sarray_256 = block_diff_standard_256; +#endif +#ifdef HAVE_SSSE3 + block_diff_128_wrap_lo = block_diff_standard_128_wrap_lo; + block_diff_sarray_128_wrap_lo = block_diff_standard_128_wrap_lo; + block_diff_128_wrap_hi = block_diff_standard_128_wrap_hi; + block_diff_sarray_128_wrap_hi = block_diff_standard_128_wrap_hi; +#endif +#ifdef HAVE_SSE2 + block_diff_128 = block_diff_standard_128; + block_diff_sarray_128 = block_diff_standard_128; +#ifdef USE_SHIFT_HILO + block_diff_128_shift_lo = block_diff_standard_128_shift_lo; + block_diff_sarray_128_shift_lo = block_diff_standard_128_shift_lo; + block_diff_128_shift_hi = block_diff_standard_128_shift_hi; + block_diff_sarray_128_shift_hi = block_diff_standard_128_shift_hi; +#endif +#endif block_diff_32 = block_diff_standard_32; block_diff_sarray_32 = block_diff_standard_32; break; + case CMET_STRANDED: case CMET_NONSTRANDED: - block_diff = block_diff_cmet; - block_diff_sarray = block_diff_cmet_sarray; +#ifdef HAVE_AVX512 + block_diff_512 = block_diff_cmet_512; + block_diff_sarray_512 = block_diff_cmet_sarray_512; +#endif +#ifdef HAVE_AVX2 + block_diff_256 = block_diff_cmet_256; + block_diff_sarray_256 = block_diff_cmet_sarray_256; +#endif +#ifdef HAVE_SSSE3 + block_diff_128_wrap_lo = block_diff_cmet_128_wrap_lo; + block_diff_sarray_128_wrap_lo = block_diff_cmet_sarray_128_wrap_lo; + block_diff_128_wrap_hi = block_diff_cmet_128_wrap_hi; + block_diff_sarray_128_wrap_hi = block_diff_cmet_sarray_128_wrap_hi; +#endif +#ifdef HAVE_SSE2 + block_diff_128 = block_diff_cmet_128; + block_diff_sarray_128 = block_diff_cmet_sarray_128; +#ifdef USE_SHIFT_HILO + block_diff_128_shift_lo = block_diff_cmet_128_shift_lo; + block_diff_sarray_128_shift_lo = block_diff_cmet_128_shift_lo; + block_diff_128_shift_hi = block_diff_cmet_128_shift_hi; + block_diff_sarray_128_shift_hi = block_diff_cmet_128_shift_hi; +#endif +#endif block_diff_32 = block_diff_cmet_32; block_diff_sarray_32 = block_diff_cmet_sarray_32; break; + case ATOI_STRANDED: case ATOI_NONSTRANDED: - block_diff = block_diff_atoi; - block_diff_sarray = block_diff_atoi_sarray; +#ifdef HAVE_AVX512 + block_diff_512 = block_diff_atoi_512; + block_diff_sarray_512 = block_diff_atoi_sarray_512; +#endif +#ifdef HAVE_AVX2 + block_diff_256 = block_diff_atoi_256; + block_diff_sarray_256 = block_diff_atoi_sarray_256; +#endif +#ifdef HAVE_SSSE3 + block_diff_128_wrap_lo = block_diff_atoi_128_wrap_lo; + block_diff_sarray_128_wrap_lo = block_diff_atoi_sarray_128_wrap_lo; + block_diff_128_wrap_hi = block_diff_atoi_128_wrap_hi; + block_diff_sarray_128_wrap_hi = block_diff_atoi_sarray_128_wrap_hi; +#endif +#ifdef HAVE_SSE2 + block_diff_128 = block_diff_atoi_128; + block_diff_sarray_128 = block_diff_atoi_sarray_128; +#ifdef USE_SHIFT_HILO + block_diff_128_shift_lo = block_diff_atoi_128_shift_lo; + block_diff_sarray_128_shift_lo = block_diff_atoi_128_shift_lo; + block_diff_128_shift_hi = block_diff_atoi_128_shift_hi; + block_diff_sarray_128_shift_hi = block_diff_atoi_128_shift_hi; +#endif +#endif block_diff_32 = block_diff_atoi_32; block_diff_sarray_32 = block_diff_atoi_sarray_32; break; + case TTOC_STRANDED: case TTOC_NONSTRANDED: - block_diff = block_diff_ttoc; - block_diff_sarray = block_diff_ttoc_sarray; +#ifdef HAVE_AVX512 + block_diff_512 = block_diff_ttoc_512; + block_diff_sarray_512 = block_diff_ttoc_sarray_512; +#endif +#ifdef HAVE_AVX2 + block_diff_256 = block_diff_ttoc_256; + block_diff_sarray_256 = block_diff_ttoc_sarray_256; +#endif +#ifdef HAVE_SSSE3 + block_diff_128_wrap_lo = block_diff_ttoc_128_wrap_lo; + block_diff_sarray_128_wrap_lo = block_diff_ttoc_sarray_128_wrap_lo; + block_diff_128_wrap_hi = block_diff_ttoc_128_wrap_hi; + block_diff_sarray_128_wrap_hi = block_diff_ttoc_sarray_128_wrap_hi; +#endif +#ifdef HAVE_SSE2 + block_diff_128 = block_diff_ttoc_128; + block_diff_sarray_128 = block_diff_ttoc_sarray_128; +#ifdef USE_SHIFT_HILO + block_diff_128_shift_lo = block_diff_ttoc_128_shift_lo; + block_diff_sarray_128_shift_lo = block_diff_ttoc_128_shift_lo; + block_diff_128_shift_hi = block_diff_ttoc_128_shift_hi; + block_diff_sarray_128_shift_hi = block_diff_ttoc_128_shift_hi; +#endif +#endif block_diff_32 = block_diff_ttoc_32; block_diff_sarray_32 = block_diff_ttoc_sarray_32; break; @@ -18326,24 +20987,100 @@ } #ifndef GSNAP - block_diff_snp = block_diff_standard_wildcard; +#ifdef HAVE_AVX512 + block_diff_snp_512 = block_diff_standard_wildcard_512; +#endif +#ifdef HAVE_AVX2 + block_diff_snp_256 = block_diff_standard_wildcard_256; +#endif +#ifdef HAVE_SSE2 + block_diff_snp_128 = block_diff_standard_wildcard_128; +#endif block_diff_snp_32 = block_diff_standard_wildcard_32; + #else switch (mode) { case STANDARD: - block_diff_snp = block_diff_standard_wildcard; +#ifdef HAVE_AVX512 + block_diff_snp_512 = block_diff_standard_wildcard_512; +#endif +#ifdef HAVE_AVX2 + block_diff_snp_256 = block_diff_standard_wildcard_256; +#endif +#ifdef HAVE_SSSE3 + block_diff_snp_128_wrap_lo = block_diff_standard_wildcard_128_wrap_lo; + block_diff_snp_128_wrap_hi = block_diff_standard_wildcard_128_wrap_hi; +#endif +#ifdef HAVE_SSE2 + block_diff_snp_128 = block_diff_standard_wildcard_128; +#ifdef USE_SHIFT_HILO + block_diff_snp_128_shift_lo = block_diff_standard_wildcard_128_shift_lo; + block_diff_snp_128_shift_hi = block_diff_standard_wildcard_128_shift_hi; +#endif +#endif block_diff_snp_32 = block_diff_standard_wildcard_32; break; + case CMET_STRANDED: case CMET_NONSTRANDED: - block_diff_snp = block_diff_cmet_snp; +#ifdef HAVE_AVX512 + block_diff_snp_512 = block_diff_cmet_snp_512; +#endif +#ifdef HAVE_AVX2 + block_diff_snp_256 = block_diff_cmet_snp_256; +#endif +#ifdef HAVE_SSSE3 + block_diff_snp_128_wrap_lo = block_diff_cmet_snp_128_wrap_lo; + block_diff_snp_128_wrap_hi = block_diff_cmet_snp_128_wrap_hi; +#endif +#ifdef HAVE_SSE2 + block_diff_snp_128 = block_diff_cmet_snp_128; +#ifdef USE_SHIFT_HILO + block_diff_snp_128_shift_lo = block_diff_cmet_snp_128_shift_lo; + block_diff_snp_128_shift_hi = block_diff_cmet_snp_128_shift_hi; +#endif +#endif block_diff_snp_32 = block_diff_cmet_snp_32; break; + case ATOI_STRANDED: case ATOI_NONSTRANDED: - block_diff_snp = block_diff_atoi_snp; +#ifdef HAVE_AVX512 + block_diff_snp_512 = block_diff_atoi_snp_512; +#endif +#ifdef HAVE_AVX2 + block_diff_snp_256 = block_diff_atoi_snp_256; +#endif +#ifdef HAVE_SSSE3 + block_diff_snp_128_wrap_lo = block_diff_atoi_snp_128_wrap_lo; + block_diff_snp_128_wrap_hi = block_diff_atoi_snp_128_wrap_hi; +#endif +#ifdef HAVE_SSE2 + block_diff_snp_128 = block_diff_atoi_snp_128; +#ifdef USE_SHIFT_HILO + block_diff_snp_128_shift_lo = block_diff_atoi_snp_128_shift_lo; + block_diff_snp_128_shift_hi = block_diff_atoi_snp_128_shift_hi; +#endif +#endif block_diff_snp_32 = block_diff_atoi_snp_32; break; + case TTOC_STRANDED: case TTOC_NONSTRANDED: - block_diff_snp = block_diff_ttoc_snp; +#ifdef HAVE_AVX512 + block_diff_snp_512 = block_diff_ttoc_snp_512; +#endif +#ifdef HAVE_AVX2 + block_diff_snp_256 = block_diff_ttoc_snp_256; +#endif +#ifdef HAVE_SSSE3 + block_diff_snp_128_wrap_lo = block_diff_ttoc_snp_128_wrap_lo; + block_diff_snp_128_wrap_hi = block_diff_ttoc_snp_128_wrap_hi; +#endif +#ifdef HAVE_SSE2 + block_diff_snp_128 = block_diff_ttoc_snp_128; +#ifdef USE_SHIFT_HILO + block_diff_snp_128_shift_lo = block_diff_ttoc_snp_128_shift_lo; + block_diff_snp_128_shift_hi = block_diff_ttoc_snp_128_shift_hi; +#endif +#endif block_diff_snp_32 = block_diff_ttoc_snp_32; break; default: fprintf(stderr,"Mode %d not recognized\n",mode); abort(); @@ -18353,167 +21090,153 @@ return; } -/* genomebits available */ -void -Genome_hr_user_setup (UINT4 *ref_blocks_in, - bool query_unk_mismatch_p_in, bool genome_unk_mismatch_p_in, - Mode_T mode) { - ref_blocks = ref_blocks_in; - snp_blocks = (UINT4 *) NULL; - query_unk_mismatch_p = query_unk_mismatch_p_in; - genome_unk_mismatch_p = genome_unk_mismatch_p_in; - switch (mode) { - case STANDARD: - block_diff = block_diff_standard; - block_diff_32 = block_diff_standard_32; - break; - case CMET_STRANDED: case CMET_NONSTRANDED: - block_diff = block_diff_cmet; - block_diff_32 = block_diff_cmet_32; - break; - case ATOI_STRANDED: case ATOI_NONSTRANDED: - block_diff = block_diff_atoi; - block_diff_32 = block_diff_atoi_32; - break; - case TTOC_STRANDED: case TTOC_NONSTRANDED: - block_diff = block_diff_ttoc; - block_diff_32 = block_diff_ttoc_32; - break; - default: fprintf(stderr,"Mode %d not recognized\n",mode); abort(); - } +/************************************************************************/ -#ifndef GSNAP - block_diff_snp = block_diff_standard_wildcard; - block_diff_snp_32 = block_diff_standard_wildcard_32; -#else - switch (mode) { - case STANDARD: - block_diff_snp = block_diff_standard_wildcard; - block_diff_snp_32 = block_diff_standard_wildcard_32; - break; - case CMET_STRANDED: case CMET_NONSTRANDED: - block_diff_snp = block_diff_cmet_snp; - block_diff_snp_32 = block_diff_cmet_snp_32; - break; - case ATOI_STRANDED: case ATOI_NONSTRANDED: - block_diff_snp = block_diff_atoi_snp; - block_diff_snp_32 = block_diff_atoi_snp_32; - break; - case TTOC_STRANDED: case TTOC_NONSTRANDED: - block_diff_snp = block_diff_ttoc_snp; - block_diff_snp_32 = block_diff_ttoc_snp_32; - break; - default: fprintf(stderr,"Mode %d not recognized\n",mode); abort(); - } +#ifdef HAVE_AVX512 +/* Need to implement. Extract procedures not available. */ #endif - return; -} - +#ifdef HAVE_AVX2 +#define nonzero_p_256(diff) !_mm256_testz_si256(diff,diff) -/************************************************************************/ -/* 76543210 */ -#define HIGH_BIT 0x80000000 +#if defined(HAVE_POPCNT) +#define popcount_ones_256(_diff) (_popcnt64(_mm256_extract_epi64(_diff,0)) + _popcnt64(_mm256_extract_epi64(_diff,1)) + popcnt64(_mm256_extract_epi64(_diff,2)) + _popcnt64(_mm256_extract_epi64(_diff,3))) +#elif defined(HAVE_MM_POPCNT) +#define popcount_ones_256(_diff) (_mm_popcnt_u64(_mm256_extract_epi64(_diff,0)) + _mm_popcnt_u64(_mm256_extract_epi64(_diff,1)) + _mm_popcnt_u64(_mm256_extract_epi64(_diff,2)) + _mm_popcnt_u64(_mm256_extract_epi64(_diff,3))) +#else +#define popcount_ones_256(_diff) (__builtin_popcountll(_mm256_extract_epi64(_diff,0)) + __builtin_popcountll(_mm256_extract_epi64(_diff,1)) + __builtin_popcountll(_mm256_extract_epi64(_diff,2)) + __builtin_popcountll(_mm256_extract_epi64(_diff,3))) +#endif -#define clear_start_32(diff,startdiscard) (diff & (~0U << (startdiscard))) -#define clear_end_32(diff,enddiscard) (diff & ~(~0U << (enddiscard))) +static int +count_leading_zeroes_256 (__m256i _diff) { + debug4(printf("Entered count_leading_zeroes with ")); + debug4(print_vector_256_hex(_diff)); + UINT8 x; -/* Needed only for debugging */ -#define clear_start_mask(startdiscard) (~0U << (startdiscard)) -#define clear_end_mask(enddiscard) (~(~0U << (enddiscard))) +#ifdef HAVE_LZCNT + if ((x = _mm256_extract_epi64(_diff,3)) != 0) { + return (int) _lzcnt_u64(x); + } else if ((x = _mm256_extract_epi64(_diff,2)) != 0) { + return 64 + (int) _lzcnt_u64(x); + } else if ((x = _mm256_extract_epi64(_diff,1)) != 0) { + return 128 + (int) _lzcnt_u64(x); + } else { + return 192 + (int) _lzcnt_u64(_mm256_extract_epi64(_diff,0)); + } -/* Needed only for debugging */ -#define set_start_mask(startdiscard) (~(~0U << startdiscard)) -#define set_end_mask(enddiscard) (~0U << enddiscard) +#elif defined(HAVE_BUILTIN_CLZ) + if ((x = _mm256_extract_epi64(_diff,3)) != 0) { + return (int) __builtin_clz(x); + } else if ((x = _mm256_extract_epi64(_diff,2)) != 0) { + return 64 + (int) __builtin_clz(x); + } else if ((x = _mm256_extract_epi64(_diff,1)) != 0) { + return 128 + (int) __builtin_clz(x); + } else { + return 192 + (int) __builtin_clz(_mm256_extract_epi64(_diff,0)); + } +#else + abort(); +#endif +} -#if defined(WORDS_BIGENDIAN) || !defined(HAVE_SSE2) +static int +count_trailing_zeroes_256 (__m256i _diff) { + debug4(printf("Entered count_trailing_zeroes with ")); + debug4(print_vector_256_hex(_diff)); + UINT8 x; -#define nonzero_p(diff) diff +#ifdef HAVE_TZCNT + if ((x = _mm256_extract_epi64(_diff,0)) != 0) { + return (int) _tzcnt_u64(x); + } else if ((x = _mm256_extract_epi64(_diff,1)) != 0) { + return 64 + (int) _tzcnt_u64(x); + } else if ((x = _mm256_extract_epi64(_diff,2)) != 0) { + return 128 + (int) _tzcnt_u64(x); + } else { + return 192 + (int) _tzcnt_u64(_mm256_extract_epi64(_diff,3)); + } -#define clear_start(diff,startdiscard) (diff & (~0U << (startdiscard))) -#define clear_end(diff,enddiscard) (diff & ~(~0U << (enddiscard))) +#elif defined(HAVE_BUILTIN_CTZ) + if ((x = _mm256_extract_epi64(_diff,0)) != 0) { + return (int) __builtin_ctz(x); + } else if ((x = _mm256_extract_epi64(_diff,1)) != 0) { + return 64 + (int) __builtin_ctz(x); + } else if ((x = _mm256_extract_epi64(_diff,2)) != 0) { + return 128 + (int) __builtin_ctz(x); + } else { + return 192 + (int) __builtin_ctz(_mm256_extract_epi64(_diff,3)); + } -/* Same speed: clear_highbit(diff,relpos) (diff - (HIGH_BIT >> relpos)) */ -/* Note: xor assumes that bit at relpos was on */ -#define clear_highbit(diff,relpos) (diff ^ (HIGH_BIT >> relpos)) +#else + abort(); +#endif -/* Slower: clear_lowbit(diff,relpos) diff -= (1 << relpos) */ -#define clear_lowbit(diff,relpos) (diff & (diff - 1)); +} +static __m256i +clear_highbit_256 (__m256i _diff, int leading_zeroes) { + __m256i _subtract, _relpos; + int relpos; -#if !defined(HAVE_SSE4_2) -#define popcount_ones(diff) (count_bits[diff & 0x0000FFFF] + count_bits[diff >> 16]) -#elif defined(HAVE_POPCNT) -#define popcount_ones(diff) (_popcnt32(diff)) -#elif defined(HAVE_MM_POPCNT) -#define popcount_ones(diff) (_mm_popcnt_u32(diff)) -#elif defined(HAVE_BUILTIN_POPCOUNT) -#define popcount_ones(diff) (__builtin_popcount(diff)) -#else -#define popcount_ones(diff) (count_bits[diff & 0x0000FFFF] + count_bits[diff >> 16]) -#endif + relpos = 255 - leading_zeroes; + debug3(printf("Clearing high bit at relpos %d\n",relpos)); + _subtract = _mm256_slli_epi32(_mm256_set1_epi32(1), relpos % 32); + _relpos = _mm256_set1_epi32(relpos); + _subtract = _mm256_and_si256(_mm256_cmpgt_epi32(_BOUND_HIGH_256, _relpos), _subtract); + _subtract = _mm256_andnot_si256(_mm256_cmpgt_epi32(_BOUND_LOW_256, _relpos), _subtract); -#if !defined(HAVE_SSE4_2) -#define count_leading_zeroes(diff) ((diff >> 16) ? clz_table[diff >> 16] : 16 + clz_table[diff]) -#elif defined(HAVE_LZCNT) -#define count_leading_zeroes(diff) _lzcnt_u32(diff) -#elif defined(HAVE_BUILTIN_CLZ) -#define count_leading_zeroes(diff) __builtin_clz(diff) + debug3(printf("Subtract: ")); + debug3(print_vector_256_hex(_subtract)); +#if 0 + /* latency 1, throughput: 0.5 */ + return _mm256_sub_epi32(_diff, _subtract); #else -#define count_leading_zeroes(diff) ((diff >> 16) ? clz_table[diff >> 16] : 16 + clz_table[diff]) + /* _mm256_xor_si128 also works if all other bits are 0. latency 1, throughput: 0.33 */ + return _mm256_xor_si256(_diff, _subtract); #endif +} -#if !defined(HAVE_SSE4_2) -#define count_trailing_zeroes(diff) mod_37_bit_position[(-diff & diff) % 37] -#elif defined(HAVE_TZCNT) -#define count_trailing_zeroes(diff) _tzcnt_u32(diff) -#elif defined(HAVE_BUILTIN_CTZ) -#define count_trailing_zeroes(diff) __builtin_ctz(diff) -#else -/* lowbit = -diff & diff */ -#define count_trailing_zeroes(diff) mod_37_bit_position[(-diff & diff) % 37] -#endif +/* relpos is equal to trailing_zeroes */ +static __m256i +clear_lowbit_256 (__m256i _diff, int relpos) { + __m256i _subtract, _relpos; -/* For trimming */ -#define set_start(diff,startdiscard) (diff | ~(~0U << startdiscard)) -#define set_end(diff,enddiscard) (diff | (~0U << enddiscard)) + debug3(printf("Clearing low bit at relpos %d\n",relpos)); -#if defined(DEBUG) || defined(DEBUG5) -static void -print_diff_popcount (UINT4 diff) { - printf("diff: %08X => nmismatches %d\n",diff,popcount_ones(diff)); - return; -} + _subtract = _mm256_slli_epi32(_mm256_set1_epi32(1), relpos % 32); + _relpos = _mm256_set1_epi32(relpos); + _subtract = _mm256_and_si256(_mm256_cmpgt_epi32(_BOUND_HIGH_256, _relpos), _subtract); + _subtract = _mm256_andnot_si256(_mm256_cmpgt_epi32(_BOUND_LOW_256, _relpos), _subtract); -static void -print_diff_trailing_zeroes (UINT4 diff, int offset) { - printf("diff: %08X => offset %d + trailing zeroes %d\n",diff,offset,count_trailing_zeroes(diff)); - return; + debug3(printf("Subtract: ")); + debug3(print_vector_256_hex(_subtract)); +#if 0 + /* latency 1, throughput: 0.5 */ + return _mm256_sub_epi32(_diff, _subtract); +#else + /* _mm256_xor_si128 also works if all other bits are 0. latency 1, throughput: 0.33 */ + return _mm256_xor_si256(_diff, _subtract); +#endif } -static void -print_diff_leading_zeroes (UINT4 diff, int offset) { - printf("diff: %08X => offset %d - leading zeroes %d\n",diff,offset,count_leading_zeroes(diff)); - return; -} #endif -#else /* littleendian and SSE2 */ + +#ifdef HAVE_SSE2 #ifdef HAVE_SSE4_1 -#define nonzero_p(diff) !_mm_testz_si128(diff,diff) +#define nonzero_p_128(diff) !_mm_testz_si128(diff,diff) #else -#define nonzero_p(diff) _mm_movemask_epi8(_mm_cmpeq_epi8(diff,_mm_setzero_si128())) != 0xFFFF +#define nonzero_p_128(diff) _mm_movemask_epi8(_mm_cmpeq_epi8(diff,_mm_setzero_si128())) != 0xFFFF #endif - static __m128i -clear_start (__m128i _diff, int startdiscard) { +clear_start_128 (__m128i _diff, int startdiscard) { __m128i _mask, _startdiscard; #ifdef DEBUG __m128i _result; @@ -18542,7 +21265,7 @@ } static __m128i -clear_end (__m128i _diff, int enddiscard) { +clear_end_128 (__m128i _diff, int enddiscard) { __m128i _mask, _enddiscard; #ifdef DEBUG __m128i _result; @@ -18569,14 +21292,54 @@ return _mm_andnot_si128(_mask, _diff); } - + +/* Based on clear_end */ +static __m128i +set_start_128 (__m128i _diff, int startdiscard) { + __m128i _mask, _startdiscard; + + debug(printf("Setting start at startdiscard %d\n",startdiscard)); + +#ifdef DEFECTIVE_SSE2_COMPILER + _mask = _mm_sll_epi32(_mm_set1_epi32(~0U), _mm_setr_epi32(startdiscard % 32,0,0,0)); +#else + _mask = _mm_slli_epi32(_mm_set1_epi32(~0U), startdiscard % 32); +#endif + _startdiscard = _mm_set1_epi32(startdiscard); + _mask = _mm_or_si128(_mm_cmplt_epi32(_startdiscard, _BOUND_LOW), _mask); + _mask = _mm_and_si128(_mm_cmplt_epi32(_startdiscard, _BOUND_HIGH), _mask); + + _mask = _mm_xor_si128(_mask, _mm_set1_epi32(~0U)); /* Take complement of _mask */ + + return _mm_or_si128(_mask, _diff); +} + +/* Based on clear_start */ +static __m128i +set_end_128 (__m128i _diff, int enddiscard) { + __m128i _mask, _enddiscard; + + debug(printf("Setting end at enddiscard %d\n",enddiscard)); + +#ifdef DEFECTIVE_SSE2_COMPILER + _mask = _mm_sll_epi32(_mm_set1_epi32(~0U), _mm_setr_epi32(enddiscard % 32,0,0,0)); +#else + _mask = _mm_slli_epi32(_mm_set1_epi32(~0U), enddiscard % 32); +#endif + _enddiscard = _mm_set1_epi32(enddiscard); + _mask = _mm_or_si128(_mask, _mm_cmplt_epi32(_enddiscard, _BOUND_LOW)); + _mask = _mm_and_si128(_mask, _mm_cmplt_epi32(_enddiscard, _BOUND_HIGH)); + + return _mm_or_si128(_mask, _diff); +} + #if !defined(HAVE_SSE4_2) #if 0 /* Naive method for pre-SSE4.2. Requires four popcount operations. */ static int -popcount_ones (__m128i _diff) { +popcount_ones_128 (__m128i _diff) { UINT4 diff[4]; _mm_store_si128((__m128i *) diff,_diff); @@ -18600,7 +21363,7 @@ #define CSA(h,l, a,b,c, u,v) u = a ^ b; v = c; h = (a & b) | (u & v); l = u ^ v; static int -popcount_ones (__m128i _diff) { +popcount_ones_128 (__m128i _diff) { UINT4 ones, twos, u, v; UINT4 diff[4]; @@ -18613,17 +21376,17 @@ #elif defined(HAVE_POPCNT) -#define popcount_ones(_diff) (_popcnt64(_mm_extract_epi64(_diff,0)) + _popcnt64(_mm_extract_epi64(_diff,1))) +#define popcount_ones_128(_diff) (_popcnt64(_mm_extract_epi64(_diff,0)) + _popcnt64(_mm_extract_epi64(_diff,1))) #elif defined(HAVE_MM_POPCNT) -#define popcount_ones(_diff) (_mm_popcnt_u64(_mm_extract_epi64(_diff,0)) + _mm_popcnt_u64(_mm_extract_epi64(_diff,1))) +#define popcount_ones_128(_diff) (_mm_popcnt_u64(_mm_extract_epi64(_diff,0)) + _mm_popcnt_u64(_mm_extract_epi64(_diff,1))) #else -#define popcount_ones(_diff) (__builtin_popcountll(_mm_extract_epi64(_diff,0)) + __builtin_popcountll(_mm_extract_epi64(_diff,1))) +#define popcount_ones_128(_diff) (__builtin_popcountll(_mm_extract_epi64(_diff,0)) + __builtin_popcountll(_mm_extract_epi64(_diff,1))) #endif static int -count_leading_zeroes (__m128i _diff) { - debug4(printf("Entered count_leading_zeroes with ")); +count_leading_zeroes_128 (__m128i _diff) { + debug4(printf("Entered count_leading_zeroes_128 with ")); debug4(print_vector_hex(_diff)); #if defined(HAVE_SSE4_2) && defined(HAVE_LZCNT) @@ -18665,8 +21428,8 @@ } static int -count_trailing_zeroes (__m128i _diff) { - debug4(printf("Entered count_trailing_zeroes with ")); +count_trailing_zeroes_128 (__m128i _diff) { + debug4(printf("Entered count_trailing_zeroes_128 with ")); debug4(print_vector_hex(_diff)); #if defined(HAVE_SSE4_2) && defined(HAVE_TZCNT) @@ -18708,7 +21471,7 @@ } static __m128i -clear_highbit (__m128i _diff, int leading_zeroes) { +clear_highbit_128 (__m128i _diff, int leading_zeroes) { __m128i _subtract, _relpos; int relpos; @@ -18737,7 +21500,7 @@ /* relpos is equal to trailing_zeroes */ static __m128i -clear_lowbit (__m128i _diff, int relpos) { +clear_lowbit_128 (__m128i _diff, int relpos) { __m128i _subtract, _relpos; debug3(printf("Clearing low bit at relpos %d\n",relpos)); @@ -18762,80 +21525,26 @@ #endif } -/* Based on clear_end */ -static __m128i -set_start (__m128i _diff, int startdiscard) { - __m128i _mask, _startdiscard; - - debug(printf("Setting start at startdiscard %d\n",startdiscard)); - -#ifdef DEFECTIVE_SSE2_COMPILER - _mask = _mm_sll_epi32(_mm_set1_epi32(~0U), _mm_setr_epi32(startdiscard % 32,0,0,0)); -#else - _mask = _mm_slli_epi32(_mm_set1_epi32(~0U), startdiscard % 32); -#endif - _startdiscard = _mm_set1_epi32(startdiscard); - _mask = _mm_or_si128(_mm_cmplt_epi32(_startdiscard, _BOUND_LOW), _mask); - _mask = _mm_and_si128(_mm_cmplt_epi32(_startdiscard, _BOUND_HIGH), _mask); - - _mask = _mm_xor_si128(_mask, _mm_set1_epi32(~0U)); /* Take complement of _mask */ - - return _mm_or_si128(_mask, _diff); -} - -/* Based on clear_start */ -static __m128i -set_end (__m128i _diff, int enddiscard) { - __m128i _mask, _enddiscard; - - debug(printf("Setting end at enddiscard %d\n",enddiscard)); - -#ifdef DEFECTIVE_SSE2_COMPILER - _mask = _mm_sll_epi32(_mm_set1_epi32(~0U), _mm_setr_epi32(enddiscard % 32,0,0,0)); -#else - _mask = _mm_slli_epi32(_mm_set1_epi32(~0U), enddiscard % 32); -#endif - _enddiscard = _mm_set1_epi32(enddiscard); - _mask = _mm_or_si128(_mask, _mm_cmplt_epi32(_enddiscard, _BOUND_LOW)); - _mask = _mm_and_si128(_mask, _mm_cmplt_epi32(_enddiscard, _BOUND_HIGH)); - - return _mm_or_si128(_mask, _diff); -} - -#if defined(DEBUG) || defined(DEBUG5) -static void -print_diff_popcount (__m128i _diff) { - printf("diff: "); - print_vector_hex(_diff); - printf("nmismatches %d\n",popcount_ones(_diff)); - return; -} - -static void -print_diff_trailing_zeroes (__m128i _diff, int offset) { - printf("diff: "); - print_vector_hex(_diff); - printf("offset %d + trailing zeroes %d\n",offset,count_trailing_zeroes(_diff)); - return; -} - -static void -print_diff_leading_zeroes (__m128i _diff, int offset) { - printf("diff: "); - print_vector_hex(_diff); - printf("offset %d - leading zeroes %d\n",offset,count_leading_zeroes(_diff)); - return; -} #endif -#endif /* littleendian and SSE2 */ +/* 76543210 */ +#define HIGH_BIT 0x80000000 #define nonzero_p_32(diff) diff #define clear_start_32(diff,startdiscard) (diff & (~0U << (startdiscard))) #define clear_end_32(diff,enddiscard) (diff & ~(~0U << (enddiscard))) +/* For trimming */ +#define set_start_32(diff,startdiscard) (diff | ~(~0U << startdiscard)) +#define set_end_32(diff,enddiscard) (diff | (~0U << enddiscard)) + +/* For fragment functions that evaluate only the end 16-mer */ +#define clear_start_mask(startdiscard) (~0U << (startdiscard)) +#define clear_end_mask(enddiscard) (~(~0U << (enddiscard))) + + /* Same speed: clear_highbit(diff,relpos) (diff - (HIGH_BIT >> relpos)) */ /* Note: xor assumes that bit at relpos was on */ #define clear_highbit_32(diff,relpos) (diff ^ (HIGH_BIT >> relpos)) @@ -18877,27 +21586,24 @@ #define count_trailing_zeroes_32(diff) mod_37_bit_position[(-diff & diff) % 37] #endif -/* For trimming */ -#define set_start_32(diff,startdiscard) (diff | ~(~0U << startdiscard)) -#define set_end_32(diff,enddiscard) (diff | (~0U << enddiscard)) - - /* Counts matches from pos5 to pos3 up to first mismatch. Modified from mismatches_left */ int Genome_consecutive_matches_rightward (Compress_T query_compress, Univcoord_T left, int pos5, int pos3, bool plusp, int genestrand) { -#ifdef DEBUG14 - int answer; -#endif int mismatch_position, offset, nshift; int startdiscard, enddiscard; Univcoord_T startblocki, endblocki, startblocki_32, endblocki_32; - Genomecomp_T *query_shifted, *ptr, *end; - UINT4 diff_32; - Genomediff_T diff; + Genomecomp_T *query_shifted, *ptr, *endptr; int relpos; int startcolumni, endcolumni; + UINT4 diff_32; +#ifdef HAVE_SSE2 + __m128i diff_128; +#endif +#ifdef HAVE_AVX2 + __m256i diff_256; +#endif debug( printf("\n\n"); @@ -18923,163 +21629,247 @@ debug(printf("Query shifted %d:\n",nshift)); debug(Compress_print_blocks(query_shifted,nshift,pos5,pos3)); query_shifted += (nshift+pos5)/STEP_SIZE*COMPRESS_BLOCKSIZE; +#if defined(WORDS_BIGENDIAN) || !defined(HAVE_SSE2) +#else + query_shifted += startcolumni; +#endif + + startdiscard = (left+pos5) % 32; + enddiscard = (left+pos3) % 32; + offset = -startdiscard + pos5; + ptr = &(ref_blocks[startblocki_32]); + endptr = &(ref_blocks[endblocki_32]); if (endblocki_32 == startblocki_32) { - startdiscard = (left+pos5) % 32; - enddiscard = (left+pos3) % 32; - offset = -startdiscard + pos5; + /* Single block */ debug(printf("nshift = %d, startdiscard = %u, enddiscard = %u\n",nshift,startdiscard,enddiscard)); - -#if defined(WORDS_BIGENDIAN) || !defined(HAVE_SSE2) - diff_32 = (block_diff_sarray_32)(query_shifted,&(ref_blocks[startblocki_32]), - plusp,genestrand,/*query_unk_mismatch_local_p*/true); -#else - diff_32 = (block_diff_sarray_32)(query_shifted + startcolumni,&(ref_blocks[startblocki_32]), - plusp,genestrand,/*query_unk_mismatch_local_p*/true); -#endif + diff_32 = (block_diff_sarray_32)(query_shifted,ptr, + plusp,genestrand,/*query_unk_mismatch_local_p*/true); diff_32 = clear_start_32(diff_32,startdiscard); diff_32 = clear_end_32(diff_32,enddiscard); if (nonzero_p_32(diff_32)) { mismatch_position = offset + (relpos = count_trailing_zeroes_32(diff_32)); debug(printf("Would return %d - %d consecutive matches\n",mismatch_position,pos5)); -#ifdef DEBUG14 - answer = (mismatch_position - pos5); -#else return (mismatch_position - pos5); -#endif } else { debug(printf("Would return %d - %d consecutive matches\n",pos3,pos5)); -#ifdef DEBUG14 - answer = (pos3 - pos5); -#else return (pos3 - pos5); -#endif } - } -#ifndef DEBUG14 - else { -#endif + } else if (endblocki == startblocki) { +#if defined(USE_SHIFT_FIRST_MISMATCH) && defined(HAVE_SSE2) + /* Shift */ + enddiscard += (endcolumni - startcolumni)*32; + assert(startdiscard == ((left+pos5) % 128) - startcolumni*32); + assert(enddiscard == ((left+pos3) % 128) - startcolumni*32); + + diff_128 = (block_diff_sarray_128_shift_lo)(query_shifted,ptr,plusp,genestrand,/*query_unk_mismatch_local_p*/true, + startcolumni); + diff_128 = clear_start_128(diff_128,startdiscard); + diff_128 = clear_end_128(diff_128,enddiscard); -#if defined(WORDS_BIGENDIAN) || !defined(HAVE_SSE2) - startblocki = startblocki_32; - endblocki = endblocki_32; -#endif + if (nonzero_p_128(diff_128)) { + mismatch_position = offset + (relpos = count_trailing_zeroes_128(diff_128)); + debug(printf("Would return %d - %d consecutive matches\n",mismatch_position,pos5)); + return (mismatch_position - pos5); + } else { + return (pos3 - pos5); + } + +#else + /* Start block */ + diff_32 = (block_diff_sarray_32)(query_shifted,ptr, + plusp,genestrand,/*query_unk_mismatch_local_p*/true); + diff_32 = clear_start_32(diff_32,startdiscard); - startdiscard = (left+pos5) % STEP_SIZE; - enddiscard = (left+pos3) % STEP_SIZE; - offset = -startdiscard + pos5; + if (nonzero_p_32(diff_32)) { + mismatch_position = offset + (relpos = count_trailing_zeroes_32(diff_32)); + debug(printf("Would return %d - %d consecutive matches\n",mismatch_position,pos5)); + return (mismatch_position - pos5); + } + query_shifted += QUERY_NEXTCOL; ptr += GENOME_NEXTCOL; + offset += 32; - debug(printf("nshift = %d, startdiscard = %u, enddiscard = %u\n",nshift,startdiscard,enddiscard)); + /* Single row */ + while (++startcolumni < endcolumni) { + diff_32 = (block_diff_sarray_32)(query_shifted,ptr, + plusp,genestrand,/*query_unk_mismatch_local_p*/true); + + if (nonzero_p_32(diff_32)) { + mismatch_position = offset + (relpos = count_trailing_zeroes_32(diff_32)); + debug(printf("Would return %d - %d consecutive matches\n",mismatch_position,pos5)); + return (mismatch_position - pos5); + } + query_shifted += QUERY_NEXTCOL; ptr += GENOME_NEXTCOL; + offset += 32; + } -#ifndef DEBUG14 - } + /* End block */ + diff_32 = (block_diff_sarray_32)(query_shifted,ptr, + plusp,genestrand,/*query_unk_mismatch_local_p*/true); + diff_32 = clear_end_32(diff_32,enddiscard); + + if (nonzero_p_32(diff_32)) { + mismatch_position = offset + (relpos = count_trailing_zeroes_32(diff_32)); + debug(printf("Would return %d - %d consecutive matches\n",mismatch_position,pos5)); + return (mismatch_position - pos5); + } else { + return (pos3 - pos5); + } #endif +#if defined(USE_WRAP_FIRST_MISMATCH) && defined(HAVE_SSSE3) + } else if (endblocki == startblocki + 12 && endcolumni < startcolumni) { + /* Wrap */ + enddiscard += (4 + endcolumni - startcolumni)*32; + assert(startdiscard == ((left+pos5) % 128) - startcolumni*32); + assert(enddiscard == ((left+pos3) % 128) + (4 - startcolumni)*32); + + diff_128 = (block_diff_sarray_128_wrap_lo)(query_shifted,ptr,plusp,genestrand,/*query_unk_mismatch_local_p*/true, + startcolumni); + diff_128 = clear_start_128(diff_128,startdiscard); + diff_128 = clear_end_128(diff_128,enddiscard); -#if defined(WORDS_BIGENDIAN) || !defined(HAVE_SSE2) -#else - if (endblocki == startblocki) { - diff = (block_diff_sarray)(query_shifted,&(ref_blocks[startblocki]), - plusp,genestrand,/*query_unk_mismatch_local_p*/true); - diff = clear_start(diff,startdiscard); - diff = clear_end(diff,enddiscard); - - if (nonzero_p(diff)) { - mismatch_position = offset + (relpos = count_trailing_zeroes(diff)); - debug(printf("returning %d - %d consecutive matches\n",mismatch_position,pos5)); - debug14(if (endblocki_32 == startblocki_32) assert(answer == (mismatch_position - pos5))); + if (nonzero_p_128(diff_128)) { + mismatch_position = offset + (relpos = count_trailing_zeroes_128(diff_128)); + debug(printf("Would return %d - %d consecutive matches\n",mismatch_position,pos5)); return (mismatch_position - pos5); } else { - debug(printf("returning %d - %d consecutive matches\n",pos3,pos5)); - debug14(if (endblocki_32 == startblocki_32) assert(answer == (pos3 - pos5))); return (pos3 - pos5); } - } else { #endif - /* Startblock */ - diff = (block_diff_sarray)(query_shifted,&(ref_blocks[startblocki]), - plusp,genestrand,/*query_unk_mismatch_local_p*/true); - diff = clear_start(diff,startdiscard); - - if (nonzero_p(diff)) { - mismatch_position = offset + (relpos = count_trailing_zeroes(diff)); - debug(printf("returning %d - %d consecutive matches\n",mismatch_position,pos5)); - debug14(if (endblocki_32 == startblocki_32) assert(answer == (mismatch_position - pos5))); + } else { + /* Start block */ + diff_32 = (block_diff_sarray_32)(query_shifted,ptr, + plusp,genestrand,/*query_unk_mismatch_local_p*/true); + diff_32 = clear_start_32(diff_32,startdiscard); + + if (nonzero_p_32(diff_32)) { + mismatch_position = offset + (relpos = count_trailing_zeroes_32(diff_32)); + debug(printf("Would return %d - %d consecutive matches\n",mismatch_position,pos5)); return (mismatch_position - pos5); } + query_shifted += QUERY_NEXTCOL; ptr += GENOME_NEXTCOL; + offset += 32; - query_shifted += COMPRESS_BLOCKSIZE; -#if defined(WORDS_BIGENDIAN) || !defined(HAVE_SSE2) - ptr = &(ref_blocks[startblocki]); - ptr += 1; if (++startcolumni == 4) {ptr += 8; startcolumni = 0;} -#else - ptr = &(ref_blocks[startblocki+12]); + /* Start row */ + while (++startcolumni < 4) { + diff_32 = (block_diff_sarray_32)(query_shifted,ptr, + plusp,genestrand,/*query_unk_mismatch_local_p*/true); + if (nonzero_p_32(diff_32)) { + mismatch_position = offset + (relpos = count_trailing_zeroes_32(diff_32)); + debug(printf("Would return %d - %d consecutive matches\n",mismatch_position,pos5)); + return (mismatch_position - pos5); + } + query_shifted += QUERY_NEXTCOL; ptr += GENOME_NEXTCOL; + offset += 32; + } +#ifdef HAVE_SSE2 + query_shifted += QUERY_NEXTROW; #endif - end = &(ref_blocks[endblocki]); - offset += STEP_SIZE; /* 128 or 32 */ - while (ptr < end) { - diff = (block_diff_sarray)(query_shifted,ptr,plusp,genestrand,/*query_unk_mismatch_local_p*/true); + ptr += GENOME_NEXTROW; + + + /* Middle rows */ +#ifdef HAVE_AVX2 + while (ptr + 24 <= endptr) { + diff_256 = (block_diff_sarray_256)(query_shifted,ptr,plusp,genestrand,/*query_unk_mismatch_local_p*/true); - if (nonzero_p(diff) /* != 0*/) { - mismatch_position = offset + (relpos = count_trailing_zeroes(diff)); + if (nonzero_p_256(diff_256)) { + mismatch_position = offset + (relpos = count_trailing_zeroes_256(diff_256)); debug(printf("returning %d - %d consecutive matches\n",mismatch_position,pos5)); - debug14(if (endblocki_32 == startblocki_32) assert(answer == (mismatch_position - pos5))); return (mismatch_position - pos5); } + query_shifted += 24; ptr += 24; + offset += 256; + } +#endif - query_shifted += COMPRESS_BLOCKSIZE; -#if defined(WORDS_BIGENDIAN) || !defined(HAVE_SSE2) - ptr += 1; if (++startcolumni == 4) {ptr += 8; startcolumni = 0;} +#ifdef HAVE_SSE2 + while (ptr + 12 <= endptr) { + diff_128 = (block_diff_sarray_128)(query_shifted,ptr,plusp,genestrand,/*query_unk_mismatch_local_p*/true); + + if (nonzero_p_128(diff_128)) { + mismatch_position = offset + (relpos = count_trailing_zeroes_128(diff_128)); + debug(printf("returning %d - %d consecutive matches\n",mismatch_position,pos5)); + return (mismatch_position - pos5); + } + query_shifted += 12; ptr += 12; + offset += 128; + } #else - ptr += 12; + while (ptr + 12 <= endptr) { + for (startcolumni = 0; startcolumni < 4; startcolumni++) { + diff_32 = (block_diff_sarray_32)(query_shifted,ptr, + plusp,genestrand,/*query_unk_mismatch_local_p*/true); + if (nonzero_p_32(diff_32)) { + mismatch_position = offset + (relpos = count_trailing_zeroes_32(diff_32)); + debug(printf("Would return %d - %d consecutive matches\n",mismatch_position,pos5)); + return (mismatch_position - pos5); + } + query_shifted += QUERY_NEXTCOL; ptr += GENOME_NEXTCOL; + offset += 32; + } + /* query_shifted += QUERY_NEXTROW; */ ptr += GENOME_NEXTROW; + } #endif - offset += STEP_SIZE; /* 128 or 32 */ + + /* End row */ + while (ptr < endptr) { + diff_32 = (block_diff_sarray_32)(query_shifted,ptr, + plusp,genestrand,/*query_unk_mismatch_local_p*/true); + + if (nonzero_p_32(diff_32)) { + mismatch_position = offset + (relpos = count_trailing_zeroes_32(diff_32)); + debug(printf("Would return %d - %d consecutive matches\n",mismatch_position,pos5)); + return (mismatch_position - pos5); + } + query_shifted += QUERY_NEXTCOL; ptr += GENOME_NEXTCOL; + offset += 32; } - /* Endblock */ - diff = (block_diff_sarray)(query_shifted,ptr,plusp,genestrand,/*query_unk_mismatch_local_p*/true); - diff = clear_end(diff,enddiscard); + /* End block */ + diff_32 = (block_diff_sarray_32)(query_shifted,ptr, + plusp,genestrand,/*query_unk_mismatch_local_p*/true); + diff_32 = clear_end_32(diff_32,enddiscard); - if (nonzero_p(diff)) { - mismatch_position = offset + (relpos = count_trailing_zeroes(diff)); - debug(printf("returning %d - %d consecutive matches\n",mismatch_position,pos5)); - debug14(if (endblocki_32 == startblocki_32) assert(answer == (mismatch_position - pos5))); + if (nonzero_p_32(diff_32)) { + mismatch_position = offset + (relpos = count_trailing_zeroes_32(diff_32)); + debug(printf("Would return %d - %d consecutive matches\n",mismatch_position,pos5)); return (mismatch_position - pos5); } else { - debug(printf("returning %d - %d consecutive matches\n",pos3,pos5)); - debug14(if (endblocki_32 == startblocki_32) assert(answer == (pos3 - pos5))); return (pos3 - pos5); } - -#if defined(WORDS_BIGENDIAN) || !defined(HAVE_SSE2) -#else } -#endif } /* Counts matches from pos3 to pos5 up to first mismatch. Modified from mismatches_right */ int Genome_consecutive_matches_leftward (Compress_T query_compress, Univcoord_T left, int pos5, int pos3, - bool plusp, int genestrand) { -#ifdef DEBUG14 - int answer; -#endif + bool plusp, int genestrand) { int mismatch_position, offset, relpos, nshift; int startdiscard, enddiscard; Univcoord_T startblocki, endblocki, startblocki_32, endblocki_32; - Genomecomp_T *query_shifted, *start, *ptr; - UINT4 diff_32; - Genomediff_T diff; + Genomecomp_T *query_shifted, *ptr, *startptr; #ifndef HAVE_BUILTIN_CLZ Genomecomp_T top; #endif int startcolumni, endcolumni; + static int ncalls = 0; + UINT4 diff_32; +#ifdef HAVE_SSE2 + __m128i diff_128; +#endif +#ifdef HAVE_AVX2 + __m256i diff_256; +#endif + + /* printf("Number of calls to leftward: %d\n",++ncalls); */ debug( printf("\n\n"); @@ -19105,142 +21895,226 @@ debug(printf("Query shifted %d:\n",nshift)); debug(Compress_print_blocks(query_shifted,nshift,pos5,pos3)); query_shifted += (nshift+pos3)/STEP_SIZE*COMPRESS_BLOCKSIZE; +#if defined(WORDS_BIGENDIAN) || !defined(HAVE_SSE2) +#else + query_shifted += endcolumni; +#endif + + startdiscard = (left+pos5) % 32; + enddiscard = (left+pos3) % 32; + offset = (pos3 - 1) - enddiscard + 32; + ptr = &(ref_blocks[endblocki_32]); + startptr = &(ref_blocks[startblocki_32]); if (startblocki_32 == endblocki_32) { - startdiscard = (left+pos5) % 32; - enddiscard = (left+pos3) % 32; - offset = (pos3 - 1) - enddiscard + 32; + /* Single block */ debug(printf("nshift = %d, startdiscard = %u, enddiscard = %u\n",nshift,startdiscard,enddiscard)); -#if defined(WORDS_BIGENDIAN) || !defined(HAVE_SSE2) - diff_32 = (block_diff_sarray_32)(query_shifted,&(ref_blocks[endblocki_32]), - plusp,genestrand,/*query_unk_mismatch_local_p*/true); -#else - diff_32 = (block_diff_sarray_32)(query_shifted + endcolumni,&(ref_blocks[endblocki_32]), - plusp,genestrand,/*query_unk_mismatch_local_p*/true); -#endif + diff_32 = (block_diff_sarray_32)(query_shifted,ptr, + plusp,genestrand,/*query_unk_mismatch_local_p*/true); + diff_32 = clear_start_32(diff_32,startdiscard); + diff_32 = clear_end_32(diff_32,enddiscard); + + if (nonzero_p_32(diff_32)) { + mismatch_position = offset - (relpos = count_leading_zeroes_32(diff_32)); + debug(printf("returning %d - %d - 1 consecutive matches\n",pos3,mismatch_position)); + return (pos3 - mismatch_position - 1); + } else { + debug(printf("Would return %d - %d consecutive matches\n",pos3,pos5)); + return (pos3 - pos5); + } + + } else if (startblocki == endblocki) { +#if defined(USE_SHIFT_FIRST_MISMATCH) && defined(HAVE_SSE2) + /* Shift */ + startdiscard += 96 - (endcolumni - startcolumni)*32; + enddiscard += 96; + assert(startdiscard == ((left+pos5) % 128) + (3 - endcolumni)*32); + assert(enddiscard == ((left+pos3) % 128) + (3 - endcolumni)*32); + + diff_128 = (block_diff_sarray_128_shift_hi)(query_shifted,ptr,plusp,genestrand,/*query_unk_mismatch_local_p*/true, + endcolumni); + diff_128 = clear_start_128(diff_128,startdiscard); + diff_128 = clear_end_128(diff_128,enddiscard); + + if (nonzero_p_128(diff_128)) { + mismatch_position = offset - (relpos = count_leading_zeroes_128(diff_128)); + debug(printf("returning %d - %d - 1 consecutive matches\n",pos3,mismatch_position)); + return (pos3 - mismatch_position - 1); + } else { + debug(printf("Would return %d - %d consecutive matches\n",pos3,pos5)); + return (pos3 - pos5); + } + +#else + /* End block */ + diff_32 = (block_diff_sarray_32)(query_shifted,ptr, + plusp,genestrand,/*query_unk_mismatch_local_p*/true); + diff_32 = clear_end_32(diff_32,enddiscard); + + if (nonzero_p_32(diff_32)) { + mismatch_position = offset - (relpos = count_leading_zeroes_32(diff_32)); + debug(printf("returning %d - %d - 1 consecutive matches\n",pos3,mismatch_position)); + return (pos3 - mismatch_position - 1); + } + query_shifted -= QUERY_NEXTCOL; ptr -= GENOME_NEXTCOL; + offset -= 32; + + /* Single row */ + while (--endcolumni > startcolumni) { + diff_32 = (block_diff_sarray_32)(query_shifted,ptr, + plusp,genestrand,/*query_unk_mismatch_local_p*/true); + + if (nonzero_p_32(diff_32)) { + mismatch_position = offset - (relpos = count_leading_zeroes_32(diff_32)); + debug(printf("returning %d - %d - 1 consecutive matches\n",pos3,mismatch_position)); + return (pos3 - mismatch_position - 1); + } + query_shifted -= QUERY_NEXTCOL; ptr -= GENOME_NEXTCOL; + offset -= 32; + } + + /* Start block */ + diff_32 = (block_diff_sarray_32)(query_shifted,ptr, + plusp,genestrand,/*query_unk_mismatch_local_p*/true); diff_32 = clear_start_32(diff_32,startdiscard); - diff_32 = clear_end_32(diff_32,enddiscard); if (nonzero_p_32(diff_32)) { mismatch_position = offset - (relpos = count_leading_zeroes_32(diff_32)); debug(printf("returning %d - %d - 1 consecutive matches\n",pos3,mismatch_position)); -#ifdef DEBUG14 - answer = (pos3 - mismatch_position - 1); -#else return (pos3 - mismatch_position - 1); -#endif } else { - debug(printf("returning %d - %d consecutive matches\n",pos3,pos5)); -#ifdef DEBUG14 - answer = (pos3 - pos5); -#else return (pos3 - pos5); -#endif } - } - -#ifndef DEBUG14 - else { -#endif - -#if defined(WORDS_BIGENDIAN) || !defined(HAVE_SSE2) - startblocki = startblocki_32; - endblocki = endblocki_32; -#endif - - startdiscard = (left+pos5) % STEP_SIZE; - enddiscard = (left+pos3) % STEP_SIZE; - offset = (pos3 - 1) - enddiscard + STEP_SIZE; - - debug(printf("nshift = %d, startdiscard = %u, enddiscard = %u, offset = %d\n", - nshift,startdiscard,enddiscard,offset)); -#ifndef DEBUG14 - } #endif +#if defined(USE_WRAP_FIRST_MISMATCH) && defined(HAVE_SSSE3) + } else if (endblocki == startblocki + 12 && endcolumni < startcolumni) { + /* Wrap */ + startdiscard += (startcolumni - endcolumni - 1)*32; + enddiscard += 96; + assert(startdiscard == ((left+pos5) % 128) - (endcolumni + 1)*32); + assert(enddiscard == ((left+pos3) % 128) + (3 - endcolumni)*32); + + diff_128 = (block_diff_sarray_128_wrap_hi)(query_shifted,ptr,plusp,genestrand,/*query_unk_mismatch_local_p*/true, + endcolumni); + diff_128 = clear_start_128(diff_128,startdiscard); + diff_128 = clear_end_128(diff_128,enddiscard); -#if defined(WORDS_BIGENDIAN) || !defined(HAVE_SSE2) -#else - if (startblocki == endblocki) { - diff = (block_diff_sarray)(query_shifted,&(ref_blocks[endblocki]), - plusp,genestrand,/*query_unk_mismatch_local_p*/true); - diff = clear_start(diff,startdiscard); - diff = clear_end(diff,enddiscard); - - if (nonzero_p(diff)) { - mismatch_position = offset - (relpos = count_leading_zeroes(diff)); + if (nonzero_p_128(diff_128)) { + mismatch_position = offset - (relpos = count_leading_zeroes_128(diff_128)); debug(printf("returning %d - %d - 1 consecutive matches\n",pos3,mismatch_position)); - debug14(if (startblocki_32 == endblocki_32) assert(answer == (pos3 - mismatch_position - 1))); return (pos3 - mismatch_position - 1); } else { - debug(printf("returning %d - %d consecutive matches\n",pos3,pos5)); - debug14(if (startblocki_32 == endblocki_32) assert(answer == (pos3 - pos5))); + debug(printf("Would return %d - %d consecutive matches\n",pos3,pos5)); return (pos3 - pos5); } +#endif } else { -#endif + /* End block */ + diff_32 = (block_diff_sarray_32)(query_shifted,ptr, + plusp,genestrand,/*query_unk_mismatch_local_p*/true); + diff_32 = clear_end_32(diff_32,enddiscard); - /* Endblock */ - diff = (block_diff_sarray)(query_shifted,&(ref_blocks[endblocki]), - plusp,genestrand,/*query_unk_mismatch_local_p*/true); - diff = clear_end(diff,enddiscard); - - if (nonzero_p(diff)) { - mismatch_position = offset - (relpos = count_leading_zeroes(diff)); - debug(printf("returning %d - %d - 1 consecutive matches",pos3,mismatch_position)); - debug14(if (startblocki_32 == endblocki_32) assert(answer == (pos3 - mismatch_position - 1))); + if (nonzero_p_32(diff_32)) { + mismatch_position = offset - (relpos = count_leading_zeroes_32(diff_32)); + debug(printf("returning %d - %d - 1 consecutive matches\n",pos3,mismatch_position)); return (pos3 - mismatch_position - 1); } + query_shifted -= QUERY_NEXTCOL; ptr -= GENOME_NEXTCOL; + offset -= 32; - query_shifted -= COMPRESS_BLOCKSIZE; -#if defined(WORDS_BIGENDIAN) || !defined(HAVE_SSE2) - ptr = &(ref_blocks[endblocki]); - ptr -= 1; if (endcolumni-- == 0) {ptr -= 8; endcolumni = 3;} -#else - ptr = &(ref_blocks[endblocki-12]); + /* End row */ + while (--endcolumni >= 0) { + diff_32 = (block_diff_sarray_32)(query_shifted,ptr, + plusp,genestrand,/*query_unk_mismatch_local_p*/true); + + if (nonzero_p_32(diff_32)) { + mismatch_position = offset - (relpos = count_leading_zeroes_32(diff_32)); + debug(printf("returning %d - %d - 1 consecutive matches\n",pos3,mismatch_position)); + return (pos3 - mismatch_position - 1); + } + query_shifted -= QUERY_NEXTCOL; ptr -= GENOME_NEXTCOL; + offset -= 32; + } +#ifdef HAVE_SSE2 + query_shifted -= QUERY_NEXTROW; #endif - start = &(ref_blocks[startblocki]); - offset -= STEP_SIZE; /* 128 or 32 */ - while (ptr > start) { - diff = (block_diff_sarray)(query_shifted,ptr,plusp,genestrand,/*query_unk_mismatch_local_p*/true); + ptr -= GENOME_NEXTROW; + - if (nonzero_p(diff)) { - mismatch_position = offset - (relpos = count_leading_zeroes(diff)); + /* Middle rows */ +#ifdef HAVE_AVX2 + while (ptr >= startptr + 24) { + diff_256 = (block_diff_sarray_256)(&(query_shifted[-15]),&(ptr[-15]),plusp,genestrand,/*query_unk_mismatch_local_p*/true); + + if (nonzero_p_256(diff_256)) { + mismatch_position = offset - (relpos = count_leading_zeroes_256(diff_256)); debug(printf("returning %d - %d - 1 consecutive matches\n",pos3,mismatch_position)); - debug14(if (startblocki_32 == endblocki_32) assert(answer == (pos3 - mismatch_position - 1))); return (pos3 - mismatch_position - 1); } + query_shifted -= 24; ptr -= 24; + offset -= 256; + } +#endif - query_shifted -= COMPRESS_BLOCKSIZE; -#if defined(WORDS_BIGENDIAN) || !defined(HAVE_SSE2) - ptr -= 1; if (endcolumni-- == 0) {ptr -= 8; endcolumni = 3;} +#ifdef HAVE_SSE2 + while (ptr >= startptr + 12) { + diff_128 = (block_diff_sarray_128)(&(query_shifted[-3]),&(ptr[-3]),plusp,genestrand,/*query_unk_mismatch_local_p*/true); + + if (nonzero_p_128(diff_128)) { + mismatch_position = offset - (relpos = count_leading_zeroes_128(diff_128)); + debug(printf("returning %d - %d - 1 consecutive matches\n",pos3,mismatch_position)); + return (pos3 - mismatch_position - 1); + } + query_shifted -= 12; ptr -= 12; + offset -= 128; + } #else - ptr -= 12; + while (ptr >= startptr + 12) { + for (endcolumni = 3; endcolumni >= 0; --endcolumni) { + diff_32 = (block_diff_sarray_32)(query_shifted,ptr, + plusp,genestrand,/*query_unk_mismatch_local_p*/true); + + if (nonzero_p_32(diff_32)) { + mismatch_position = offset - (relpos = count_leading_zeroes_32(diff_32)); + debug(printf("returning %d - %d - 1 consecutive matches\n",pos3,mismatch_position)); + return (pos3 - mismatch_position - 1); + } + query_shifted -= QUERY_NEXTCOL; ptr -= GENOME_NEXTCOL; + offset -= 32; + } + /* query_shifted -= QUERY_NEXTROW; */ ptr -= GENOME_NEXTROW; + } #endif - offset -= STEP_SIZE; /* 128 or 32 */ + + /* Start row */ + while (ptr > startptr) { + diff_32 = (block_diff_sarray_32)(query_shifted,ptr, + plusp,genestrand,/*query_unk_mismatch_local_p*/true); + if (nonzero_p_32(diff_32)) { + mismatch_position = offset - (relpos = count_leading_zeroes_32(diff_32)); + debug(printf("returning %d - %d - 1 consecutive matches\n",pos3,mismatch_position)); + return (pos3 - mismatch_position - 1); + } + query_shifted -= QUERY_NEXTCOL; ptr -= GENOME_NEXTCOL; + offset -= 32; } - /* Startblock */ - diff = (block_diff_sarray)(query_shifted,ptr,plusp,genestrand,/*query_unk_mismatch_local_p*/true); - diff = clear_start(diff,startdiscard); + /* Start block */ + diff_32 = (block_diff_sarray_32)(query_shifted,ptr, + plusp,genestrand,/*query_unk_mismatch_local_p*/true); + diff_32 = clear_start_32(diff_32,startdiscard); - if (nonzero_p(diff)) { - mismatch_position = offset - (relpos = count_leading_zeroes(diff)); + if (nonzero_p_32(diff_32)) { + mismatch_position = offset - (relpos = count_leading_zeroes_32(diff_32)); debug(printf("returning %d - %d - 1 consecutive matches\n",pos3,mismatch_position)); - debug14(if (startblocki_32 == endblocki_32) assert(answer == (pos3 - mismatch_position - 1))); return (pos3 - mismatch_position - 1); } else { - debug(printf("returning %d - %d consecutive matches\n",pos3,pos5)); - debug14(if (startblocki_32 == endblocki_32) assert(answer == (pos3 - pos5))); return (pos3 - pos5); } - -#if defined(WORDS_BIGENDIAN) || !defined(HAVE_SSE2) -#else } -#endif } @@ -19403,237 +22277,48 @@ #else shifted1[0] = (ptr1[0] << nshift) | (ptr1_prev[0] >> rightshift); shifted1[1] = (ptr1[4] << nshift) | (ptr1_prev[4] >> rightshift); - shifted1[2] = (ptr1[8] << nshift) | (ptr1_prev[8] >> rightshift); -#endif - } - debug2(Compress_print_one_block(ptr1)); - debug2(Compress_print_one_block(ptr2)); - debug2(Compress_print_one_block(shifted1)); - -#ifdef WORDS_BIGENDIAN - diff = (shifted1[0] ^ Bigendian_convert_uint(ptr2[0])) | (shifted1[1] ^ Bigendian_convert_uint(ptr2[4])) | (shifted1[2] ^ Bigendian_convert_uint(ptr2[8])); -#else - diff = (shifted1[0] ^ ptr2[0]) | (shifted1[1] ^ ptr2[4]) | (shifted1[2] ^ ptr2[8]); -#endif - diff = clear_end_32(diff,enddiscard); - - if (diff /* != 0U */) { -#ifdef HAVE_BUILTIN_CTZ - mismatch_position = offset + (relpos = __builtin_ctz(diff)); -#else - mismatch_position = offset + mod_37_bit_position[(-diff & diff) % 37]; -#endif - debug2(printf("case 4: returning %d matches\n",mismatch_position)); - return mismatch_position; - } else { - debug2(printf("case 5: returning offset %d + enddiscard %d matches\n",offset,enddiscard)); - return offset + enddiscard; - } - } - - } else if (ptr2 == end) { - /* Single block */ - enddiscard = genomelength % 32; /* Not STEP_SIZE */ - - ptr1 = &(ref_blocks[startblocki_1]); - ptr2 = &(ref_blocks[startblocki_2]); -#ifdef WORDS_BIGENDIAN - shifted1[0] = Bigendian_convert_uint(ptr1[0]) << nshift; - shifted1[1] = Bigendian_convert_uint(ptr1[4]) << nshift; - shifted1[2] = Bigendian_convert_uint(ptr1[8]) << nshift; -#else - shifted1[0] = ptr1[0] << nshift; - shifted1[1] = ptr1[4] << nshift; - shifted1[2] = ptr1[8] << nshift; -#endif - debug2(Compress_print_one_block(ptr1)); - debug2(Compress_print_one_block(ptr2)); - debug2(Compress_print_one_block(shifted1)); - -#ifdef WORDS_BIGENDIAN - diff = (shifted1[0] ^ Bigendian_convert_uint(ptr2[0])) | (shifted1[1] ^ Bigendian_convert_uint(ptr2[4])) | (shifted1[2] ^ Bigendian_convert_uint(ptr2[8])); -#else - diff = (shifted1[0] ^ ptr2[0]) | (shifted1[1] ^ ptr2[4]) | (shifted1[2] ^ ptr2[8]); -#endif - diff = clear_start_32(diff,startdiscard); - diff = clear_end_32(diff,enddiscard); - - if (diff /* != 0U */) { -#ifdef HAVE_BUILTIN_CTZ - mismatch_position = offset + (relpos = __builtin_ctz(diff)); -#else - mismatch_position = offset + mod_37_bit_position[(-diff & diff) % 37]; -#endif - debug2(printf("case 6: returning %d matches\n",mismatch_position)); - return mismatch_position; - } else { - debug2(printf("case 7: returning %d - %d matches\n",enddiscard,startdiscard)); - return (enddiscard - startdiscard); - } - - } else { - - /* Startblock */ - ptr1 = &(ref_blocks[startblocki_1]); - ptr2 = &(ref_blocks[startblocki_2]); -#ifdef WORDS_BIGENDIAN - shifted1[0] = (Bigendian_convert_uint(ptr1[0]) << nshift); - shifted1[1] = (Bigendian_convert_uint(ptr1[4]) << nshift); - shifted1[2] = (Bigendian_convert_uint(ptr1[8]) << nshift); -#else - shifted1[0] = (ptr1[0] << nshift); - shifted1[1] = (ptr1[4] << nshift); - shifted1[2] = (ptr1[8] << nshift); -#endif - debug2(Compress_print_one_block(ptr1)); - debug2(Compress_print_one_block(ptr2)); - debug2(Compress_print_one_block(shifted1)); - -#ifdef WORDS_BIGENDIAN - diff = (shifted1[0] ^ Bigendian_convert_uint(ptr2[0])) | (shifted1[1] ^ Bigendian_convert_uint(ptr2[4])) | (shifted1[2] ^ Bigendian_convert_uint(ptr2[8])); -#else - diff = (shifted1[0] ^ ptr2[0]) | (shifted1[1] ^ ptr2[4]) | (shifted1[2] ^ ptr2[8]); -#endif - diff = clear_start_32(diff,startdiscard); - - if (diff /* != 0U */) { -#ifdef HAVE_BUILTIN_CTZ - mismatch_position = offset + (relpos = __builtin_ctz(diff)); -#else - mismatch_position = offset + mod_37_bit_position[(-diff & diff) % 37]; -#endif - debug2(printf("case 8: returning %d matches\n",mismatch_position)); - return mismatch_position; - } else { - ptr1_prev = ptr1; - ptr1 += 1; if (++startcolumni_1 == 4) {ptr1 += 8; startcolumni_1 = 0;} - ptr2 += 1; if (++startcolumni_2 == 4) {ptr2 += 8; startcolumni_2 = 0;} - offset += 32; /* Not STEP_SIZE */ - } - - while (ptr1 < end && ptr2 < end) { - if (nshift == 0) { - /* rightshift of 32 is a no-op */ -#ifdef WORDS_BIGENDIAN - shifted1[0] = Bigendian_convert_uint(ptr1[0]); shifted1[1] = Bigendian_convert_uint(ptr1[4]); shifted1[2] = Bigendian_convert_uint(ptr1[8]); -#else - shifted1[0] = ptr1[0]; shifted1[1] = ptr1[4]; shifted1[2] = ptr1[8]; -#endif - } else { -#ifdef WORDS_BIGENDIAN - shifted1[0] = (Bigendian_convert_uint(ptr1[0]) << nshift) | (Bigendian_convert_uint(ptr1_prev[0]) >> rightshift); - shifted1[1] = (Bigendian_convert_uint(ptr1[4]) << nshift) | (Bigendian_convert_uint(ptr1_prev[4]) >> rightshift); - shifted1[2] = (Bigendian_convert_uint(ptr1[8]) << nshift) | (Bigendian_convert_uint(ptr1_prev[8]) >> rightshift); -#else - shifted1[0] = (ptr1[0] << nshift) | (ptr1_prev[0] >> rightshift); - shifted1[1] = (ptr1[4] << nshift) | (ptr1_prev[4] >> rightshift); - shifted1[2] = (ptr1[8] << nshift) | (ptr1_prev[8] >> rightshift); -#endif - } - debug2(Compress_print_one_block(ptr1)); - debug2(Compress_print_one_block(ptr2)); - debug2(Compress_print_one_block(shifted1)); - -#ifdef WORDS_BIGENDIAN - diff = (shifted1[0] ^ Bigendian_convert_uint(ptr2[0])) | (shifted1[1] ^ Bigendian_convert_uint(ptr2[4])) | (shifted1[2] ^ Bigendian_convert_uint(ptr2[8])); -#else - diff = (shifted1[0] ^ ptr2[0]) | (shifted1[1] ^ ptr2[4]) | (shifted1[2] ^ ptr2[8]); -#endif - if (diff /* != 0U */) { -#ifdef HAVE_BUILTIN_CTZ - mismatch_position = offset + (relpos = __builtin_ctz(diff)); -#else - mismatch_position = offset + mod_37_bit_position[(-diff & diff) % 37]; -#endif - debug2(printf("case 9: returning %d matches\n",mismatch_position)); - return mismatch_position; - } else { - ptr1_prev = ptr1; - ptr1 += 1; if (++startcolumni_1 == 4) {ptr1 += 8; startcolumni_1 = 0;} - ptr2 += 1; if (++startcolumni_2 == 4) {ptr2 += 8; startcolumni_2 = 0;} - offset += 32; /* Not STEP_SIZE */ - } - } - - /* Last block of entire genome */ - enddiscard = genomelength % 32; /* Not STEP_SIZE */ - if (ptr2 == end) { - debug2(printf("ptr2 == end\n")); - /* Keep enddiscard */ - nblocks = 1; - } else if (nshift + enddiscard < 32) { - debug2(printf("ptr1 == end and nshift %d + enddiscard %d < 32\n",nshift,enddiscard)); - enddiscard = nshift + enddiscard; - nblocks = 1; - } else if (nshift > 0) { - debug2(printf("ptr1 == end and nshift %d + enddiscard %d >= 32\n",nshift,enddiscard)); - enddiscard -= (32 - nshift); - nblocks = 2; - } else { - debug2(printf("ptr1 == end and nshift %d + enddiscard %d >= 32\n",nshift,enddiscard)); - /* Keep enddiscard */ - nblocks = 2; - } - - /* Block 1 */ - if (nshift == 0) { - /* rightshift of 32 is a no-op */ -#ifdef WORDS_BIGENDIAN - shifted1[0] = Bigendian_convert_uint(ptr1[0]); shifted1[1] = Bigendian_convert_uint(ptr1[4]); shifted1[2] = Bigendian_convert_uint(ptr1[8]); -#else - shifted1[0] = ptr1[0]; shifted1[1] = ptr1[4]; shifted1[2] = ptr1[8]; -#endif - } else { -#ifdef WORDS_BIGENDIAN - shifted1[0] = (Bigendian_convert_uint(ptr1[0]) << nshift) | (Bigendian_convert_uint(ptr1_prev[0]) >> rightshift); - shifted1[1] = (Bigendian_convert_uint(ptr1[4]) << nshift) | (Bigendian_convert_uint(ptr1_prev[4]) >> rightshift); - shifted1[2] = (Bigendian_convert_uint(ptr1[8]) << nshift) | (Bigendian_convert_uint(ptr1_prev[8]) >> rightshift); -#else - shifted1[0] = (ptr1[0] << nshift) | (ptr1_prev[0] >> rightshift); - shifted1[1] = (ptr1[4] << nshift) | (ptr1_prev[4] >> rightshift); - shifted1[2] = (ptr1[8] << nshift) | (ptr1_prev[8] >> rightshift); + shifted1[2] = (ptr1[8] << nshift) | (ptr1_prev[8] >> rightshift); #endif - } - debug2(Compress_print_one_block(ptr1)); - debug2(Compress_print_one_block(ptr2)); - debug2(Compress_print_one_block(shifted1)); + } + debug2(Compress_print_one_block(ptr1)); + debug2(Compress_print_one_block(ptr2)); + debug2(Compress_print_one_block(shifted1)); #ifdef WORDS_BIGENDIAN - diff = (shifted1[0] ^ Bigendian_convert_uint(ptr2[0])) | (shifted1[1] ^ Bigendian_convert_uint(ptr2[4])) | (shifted1[2] ^ Bigendian_convert_uint(ptr2[8])); + diff = (shifted1[0] ^ Bigendian_convert_uint(ptr2[0])) | (shifted1[1] ^ Bigendian_convert_uint(ptr2[4])) | (shifted1[2] ^ Bigendian_convert_uint(ptr2[8])); #else - diff = (shifted1[0] ^ ptr2[0]) | (shifted1[1] ^ ptr2[4]) | (shifted1[2] ^ ptr2[8]); + diff = (shifted1[0] ^ ptr2[0]) | (shifted1[1] ^ ptr2[4]) | (shifted1[2] ^ ptr2[8]); #endif - if (nblocks == 1) { diff = clear_end_32(diff,enddiscard); - } - if (diff /* != 0U */) { + if (diff /* != 0U */) { #ifdef HAVE_BUILTIN_CTZ - mismatch_position = offset + (relpos = __builtin_ctz(diff)); + mismatch_position = offset + (relpos = __builtin_ctz(diff)); #else - mismatch_position = offset + mod_37_bit_position[(-diff & diff) % 37]; + mismatch_position = offset + mod_37_bit_position[(-diff & diff) % 37]; #endif - debug2(printf("case 10: returning %d matches\n",mismatch_position)); - return mismatch_position; - } else if (nblocks == 1) { - debug2(printf("case 11: returning offset %d + enddiscard %d matches\n",offset,enddiscard)); - return offset + enddiscard; - } else { - ptr1_prev = ptr1; - ptr1 += 1; if (++startcolumni_1 == 4) {ptr1 += 8; startcolumni_1 = 0;} - ptr2 += 1; if (++startcolumni_2 == 4) {ptr2 += 8; startcolumni_2 = 0;} - offset += 32; /* Not STEP_SIZE */ + debug2(printf("case 4: returning %d matches\n",mismatch_position)); + return mismatch_position; + } else { + debug2(printf("case 5: returning offset %d + enddiscard %d matches\n",offset,enddiscard)); + return offset + enddiscard; + } } - /* Block 2 */ + } else if (ptr2 == end) { + /* Single block */ + enddiscard = genomelength % 32; /* Not STEP_SIZE */ + + ptr1 = &(ref_blocks[startblocki_1]); + ptr2 = &(ref_blocks[startblocki_2]); #ifdef WORDS_BIGENDIAN - shifted1[0] = (Bigendian_convert_uint(ptr1_prev[0]) >> rightshift); - shifted1[1] = (Bigendian_convert_uint(ptr1_prev[4]) >> rightshift); - shifted1[2] = (Bigendian_convert_uint(ptr1_prev[8]) >> rightshift); + shifted1[0] = Bigendian_convert_uint(ptr1[0]) << nshift; + shifted1[1] = Bigendian_convert_uint(ptr1[4]) << nshift; + shifted1[2] = Bigendian_convert_uint(ptr1[8]) << nshift; #else - shifted1[0] = (ptr1_prev[0] >> rightshift); - shifted1[1] = (ptr1_prev[4] >> rightshift); - shifted1[2] = (ptr1_prev[8] >> rightshift); + shifted1[0] = ptr1[0] << nshift; + shifted1[1] = ptr1[4] << nshift; + shifted1[2] = ptr1[8] << nshift; #endif debug2(Compress_print_one_block(ptr1)); debug2(Compress_print_one_block(ptr2)); @@ -19644,6 +22329,7 @@ #else diff = (shifted1[0] ^ ptr2[0]) | (shifted1[1] ^ ptr2[4]) | (shifted1[2] ^ ptr2[8]); #endif + diff = clear_start_32(diff,startdiscard); diff = clear_end_32(diff,enddiscard); if (diff /* != 0U */) { @@ -19652,548 +22338,227 @@ #else mismatch_position = offset + mod_37_bit_position[(-diff & diff) % 37]; #endif - debug2(printf("case 12: returning %d matches\n",mismatch_position)); + debug2(printf("case 6: returning %d matches\n",mismatch_position)); return mismatch_position; } else { - debug2(printf("case 13: returning offset %d + enddiscard %d matches\n",offset,enddiscard)); - return offset + enddiscard; + debug2(printf("case 7: returning %d - %d matches\n",enddiscard,startdiscard)); + return (enddiscard - startdiscard); } - } -} - - - - -static int -count_mismatches_limit (Compress_T query_compress, Univcoord_T left, - int pos5, int pos3, int max_mismatches, bool plusp, int genestrand) { -#ifdef DEBUG14 - int answer; -#endif - int nmismatches; - int startdiscard, enddiscard; - Univcoord_T startblocki, endblocki, startblocki_32, endblocki_32; - Genomecomp_T *endblock, *ptr; - Genomecomp_T *query_shifted, *query_shifted_save_start; - Genomediff_T diff; - UINT4 diff_32; - int nshift; - int startcolumni, endcolumni; - - - debug( - printf("\n\n"); - printf("Genome (in count_mismatches_limit) from %u+%d to %u+%d:\n",left,pos5,left,pos3); - Genome_print_blocks(ref_blocks,left+pos5,left+pos3); - printf("\n"); - ); - - - startblocki = (left+pos5)/128U*12; - startcolumni = ((left+pos5) % 128) / 32; - startblocki_32 = startblocki + startcolumni; - - endblocki = (left+pos3)/128U*12; - endcolumni = ((left+pos3) % 128) / 32; - endblocki_32 = endblocki + endcolumni; - - debug(printf("left = %u, pos5 = %d, pos3 = %d, startblocki = %u, endblocki = %u\n", - left,pos5,pos3,startblocki,endblocki)); - - nshift = left % STEP_SIZE; - query_shifted = Compress_shift(query_compress,nshift); - debug(printf("Query shifted %d:\n",nshift)); - debug(Compress_print_blocks(query_shifted,nshift,pos5,pos3)); - query_shifted += (nshift+pos5)/STEP_SIZE*COMPRESS_BLOCKSIZE; - - if (endblocki_32 == startblocki_32) { - debug(printf("** Single block **\n")); - startdiscard = (left+pos5) % 32; - enddiscard = (left+pos3) % 32; - debug(printf("nshift = %d, startdiscard = %u, enddiscard = %u\n",nshift,startdiscard,enddiscard)); - - -#if defined(WORDS_BIGENDIAN) || !defined(HAVE_SSE2) - diff_32 = (block_diff_32)(query_shifted,&(ref_blocks[startblocki_32]), - plusp,genestrand,query_unk_mismatch_p); -#else - diff_32 = (block_diff_32)(query_shifted + startcolumni,&(ref_blocks[startblocki_32]), - plusp,genestrand,query_unk_mismatch_p); -#endif - diff_32 = clear_start_32(diff_32,startdiscard); - diff_32 = clear_end_32(diff_32,enddiscard); - -#ifdef DEBUG14 - answer = popcount_ones_32(diff_32); -#else - return popcount_ones_32(diff_32); -#endif - - } -#ifndef DEBUG14 - else { -#endif - -#if defined(WORDS_BIGENDIAN) || !defined(HAVE_SSE2) - startblocki = startblocki_32; - endblocki = endblocki_32; -#endif - - startdiscard = (left+pos5) % STEP_SIZE; - enddiscard = (left+pos3) % STEP_SIZE; - - debug(printf("nshift = %d, startdiscard = %u, enddiscard = %u\n",nshift,startdiscard,enddiscard)); -#ifndef DEBUG14 - } -#endif + } else { -#if defined(WORDS_BIGENDIAN) || !defined(HAVE_SSE2) + /* Startblock */ + ptr1 = &(ref_blocks[startblocki_1]); + ptr2 = &(ref_blocks[startblocki_2]); +#ifdef WORDS_BIGENDIAN + shifted1[0] = (Bigendian_convert_uint(ptr1[0]) << nshift); + shifted1[1] = (Bigendian_convert_uint(ptr1[4]) << nshift); + shifted1[2] = (Bigendian_convert_uint(ptr1[8]) << nshift); #else - if (endblocki == startblocki) { - debug(printf("** Single block **\n")); - diff = (block_diff)(query_shifted,&(ref_blocks[startblocki]), - plusp,genestrand,query_unk_mismatch_p); - diff = clear_start(diff,startdiscard); - diff = clear_end(diff,enddiscard); - - debug(print_diff_popcount(diff)); - debug14(if (endblocki_32 == startblocki_32) assert(answer == popcount_ones(diff))); - return popcount_ones(diff); - - } else if (endblocki == startblocki + 12) { - /* Only two blocks to check */ - - if (STEP_SIZE - startdiscard >= enddiscard) { - /* Two blocks to check and more bits counted in startblock */ - debug(printf("* Two blocks, start block first **\n")); - - /* 1/2: Startblock */ - diff = (block_diff)(query_shifted,&(ref_blocks[startblocki]), - plusp,genestrand,query_unk_mismatch_p); - diff = clear_start(diff,startdiscard); - - debug(print_diff_popcount(diff)); - if ((nmismatches = popcount_ones(diff)) > max_mismatches) { - debug14(if (endblocki_32 == startblocki_32) assert(answer == nmismatches)); - return nmismatches; - } - - /* 2/2: Endblock */ - diff = (block_diff)(/*endblock*/query_shifted+COMPRESS_BLOCKSIZE, - &(ref_blocks[endblocki]), - plusp,genestrand,query_unk_mismatch_p); - diff = clear_end(diff,enddiscard); - - debug(print_diff_popcount(diff)); - debug14(if (endblocki_32 == startblocki_32) assert(answer == nmismatches + popcount_ones(diff))); - return nmismatches + popcount_ones(diff); - - } else { - /* Two blocks to check and more bits counted in endblock */ - debug(printf("** Two blocks, end block first **\n")); - - /* 1/2: Endblock */ - diff = (block_diff)(/*endblock*/query_shifted+COMPRESS_BLOCKSIZE, - &(ref_blocks[endblocki]), - plusp,genestrand,query_unk_mismatch_p); - diff = clear_end(diff,enddiscard); - - debug(print_diff_popcount(diff)); - if ((nmismatches = popcount_ones(diff)) > max_mismatches) { - debug14(if (endblocki_32 == startblocki_32) assert(answer == nmismatches)); - return nmismatches; - } - - /* 2/2: Startblock */ - diff = (block_diff)(query_shifted,&(ref_blocks[startblocki]), - plusp,genestrand,query_unk_mismatch_p); - diff = clear_start(diff,startdiscard); - - debug(print_diff_popcount(diff)); - debug14(if (endblocki_32 == startblocki_32) assert(answer == nmismatches + popcount_ones(diff))); - return nmismatches + popcount_ones(diff); - } - - } else { + shifted1[0] = (ptr1[0] << nshift); + shifted1[1] = (ptr1[4] << nshift); + shifted1[2] = (ptr1[8] << nshift); #endif + debug2(Compress_print_one_block(ptr1)); + debug2(Compress_print_one_block(ptr2)); + debug2(Compress_print_one_block(shifted1)); - /* More than 2 blocks to check */ - debug(printf("** More than two blocks **\n")); - - query_shifted_save_start = query_shifted; - - /* 2..(n-1) / n: Check all middle blocks first */ - query_shifted += COMPRESS_BLOCKSIZE; -#if defined(WORDS_BIGENDIAN) || !defined(HAVE_SSE2) - ptr = &(ref_blocks[startblocki]); - ptr += 1; if (++startcolumni == 4) {ptr += 8; startcolumni = 0;} +#ifdef WORDS_BIGENDIAN + diff = (shifted1[0] ^ Bigendian_convert_uint(ptr2[0])) | (shifted1[1] ^ Bigendian_convert_uint(ptr2[4])) | (shifted1[2] ^ Bigendian_convert_uint(ptr2[8])); #else - ptr = &(ref_blocks[startblocki+12]); + diff = (shifted1[0] ^ ptr2[0]) | (shifted1[1] ^ ptr2[4]) | (shifted1[2] ^ ptr2[8]); #endif - endblock = &(ref_blocks[endblocki]); - nmismatches = 0; - - while (ptr < endblock) { - diff = (block_diff)(query_shifted,ptr,plusp,genestrand,query_unk_mismatch_p); - - debug(print_diff_popcount(diff)); - if ((nmismatches += popcount_ones(diff)) > max_mismatches) { - debug14(if (endblocki_32 == startblocki_32) assert(answer == nmismatches)); - return nmismatches; - } + diff = clear_start_32(diff,startdiscard); - query_shifted += COMPRESS_BLOCKSIZE; -#if defined(WORDS_BIGENDIAN) || !defined(HAVE_SSE2) - ptr += 1; if (++startcolumni == 4) {ptr += 8; startcolumni = 0;} + if (diff /* != 0U */) { +#ifdef HAVE_BUILTIN_CTZ + mismatch_position = offset + (relpos = __builtin_ctz(diff)); #else - ptr += 12; + mismatch_position = offset + mod_37_bit_position[(-diff & diff) % 37]; #endif - } - - if (enddiscard >= STEP_SIZE - startdiscard) { - /* More bits in end block */ - debug(printf("** Final block, end block first **\n")); - - /* n/n: Go first to end block */ - diff = (block_diff)(query_shifted,ptr,plusp,genestrand,query_unk_mismatch_p); - diff = clear_end(diff,enddiscard); - - debug(print_diff_popcount(diff)); - if ((nmismatches += popcount_ones(diff)) > max_mismatches) { - debug14(if (endblocki_32 == startblocki_32) assert(answer == nmismatches)); - return nmismatches; - } - - /* 1/n: Go second to start block */ - diff = (block_diff)(query_shifted_save_start,&(ref_blocks[startblocki]), - plusp,genestrand,query_unk_mismatch_p); - diff = clear_start(diff,startdiscard); - - debug(print_diff_popcount(diff)); - debug14(if (endblocki_32 == startblocki_32) assert(answer == nmismatches + popcount_ones(diff))); - return nmismatches + popcount_ones(diff); - + debug2(printf("case 8: returning %d matches\n",mismatch_position)); + return mismatch_position; } else { - debug(printf("** Final block, start block first **\n")); - - /* 1/n: Go first to start block */ - diff = (block_diff)(query_shifted_save_start,&(ref_blocks[startblocki]), - plusp,genestrand,query_unk_mismatch_p); - diff = clear_start(diff,startdiscard); - - debug(print_diff_popcount(diff)); - if ((nmismatches += popcount_ones(diff)) > max_mismatches) { - debug14(if (endblocki_32 == startblocki_32) assert(answer == nmismatches)); - return nmismatches; - } - - /* n/n: Go second to end block */ - diff = (block_diff)(query_shifted,&(ref_blocks[endblocki]), - plusp,genestrand,query_unk_mismatch_p); - diff = clear_end(diff,enddiscard); - - debug(print_diff_popcount(diff)); - debug14(if (endblocki_32 == startblocki_32) assert(answer == nmismatches + popcount_ones(diff))); - return nmismatches + popcount_ones(diff); + ptr1_prev = ptr1; + ptr1 += 1; if (++startcolumni_1 == 4) {ptr1 += 8; startcolumni_1 = 0;} + ptr2 += 1; if (++startcolumni_2 == 4) {ptr2 += 8; startcolumni_2 = 0;} + offset += 32; /* Not STEP_SIZE */ } -#if defined(WORDS_BIGENDIAN) || !defined(HAVE_SSE2) -#else - } -#endif -} - - -static int -count_mismatches_limit_snps (Compress_T query_compress, Univcoord_T left, int pos5, int pos3, int max_mismatches, - bool plusp, int genestrand) { -#ifdef DEBUG14 - int answer; -#endif - int nmismatches; - int startdiscard, enddiscard; - Univcoord_T startblocki, endblocki, startblocki_32, endblocki_32; - Genomecomp_T *endblock; - Genomecomp_T *query_shifted, *query_shifted_save_start; - UINT4 diff_32; - Genomediff_T diff; - int nshift; - Genomecomp_T *ref_ptr, *alt_ptr; - int startcolumni, endcolumni; - - - debug( - printf("\n\n"); - printf("Genome (in count_mismatches_limit_snps) from %u+%d to %u+%d\n",left,pos5,left,pos3); - Genome_print_blocks_snp(ref_blocks,snp_blocks,left+pos5,left+pos3); - printf("\n"); - ); - - - startblocki = (left+pos5)/128U*12; - startcolumni = ((left+pos5) % 128) / 32; - startblocki_32 = startblocki + startcolumni; - - endblocki = (left+pos3)/128U*12; - endcolumni = ((left+pos3) % 128) / 32; - endblocki_32 = endblocki + endcolumni; - - debug(printf("left = %u, pos5 = %d, pos3 = %d, startblocki = %u, endblocki = %u\n", - left,pos5,pos3,startblocki,endblocki)); - - nshift = left % STEP_SIZE; - query_shifted = Compress_shift(query_compress,nshift); - debug(printf("Query shifted %d:\n",nshift)); - debug(Compress_print_blocks(query_shifted,nshift,pos5,pos3)); - query_shifted += (nshift+pos5)/STEP_SIZE*COMPRESS_BLOCKSIZE; - - if (endblocki_32 == startblocki_32) { - debug(printf("** Single block **\n")); - startdiscard = (left+pos5) % 32; - enddiscard = (left+pos3) % 32; - debug(printf("nshift = %d, startdiscard = %u, enddiscard = %u\n",nshift,startdiscard,enddiscard)); - - -#if defined(WORDS_BIGENDIAN) || !defined(HAVE_SSE2) - diff_32 = (block_diff_snp_32)(query_shifted,&(snp_blocks[startblocki_32]),&(ref_blocks[startblocki_32]), - plusp,genestrand,query_unk_mismatch_p); -#else - diff_32 = (block_diff_snp_32)(query_shifted + startcolumni,&(snp_blocks[startblocki_32]),&(ref_blocks[startblocki_32]), - plusp,genestrand,query_unk_mismatch_p); -#endif - diff_32 = clear_start_32(diff_32,startdiscard); - diff_32 = clear_end_32(diff_32,enddiscard); - -#ifdef DEBUG14 - answer = popcount_ones_32(diff_32); + while (ptr1 < end && ptr2 < end) { + if (nshift == 0) { + /* rightshift of 32 is a no-op */ +#ifdef WORDS_BIGENDIAN + shifted1[0] = Bigendian_convert_uint(ptr1[0]); shifted1[1] = Bigendian_convert_uint(ptr1[4]); shifted1[2] = Bigendian_convert_uint(ptr1[8]); #else - return popcount_ones_32(diff_32); -#endif - - } -#ifndef DEBUG14 - else { + shifted1[0] = ptr1[0]; shifted1[1] = ptr1[4]; shifted1[2] = ptr1[8]; #endif - -#if defined(WORDS_BIGENDIAN) || !defined(HAVE_SSE2) - startblocki = startblocki_32; - endblocki = endblocki_32; + } else { +#ifdef WORDS_BIGENDIAN + shifted1[0] = (Bigendian_convert_uint(ptr1[0]) << nshift) | (Bigendian_convert_uint(ptr1_prev[0]) >> rightshift); + shifted1[1] = (Bigendian_convert_uint(ptr1[4]) << nshift) | (Bigendian_convert_uint(ptr1_prev[4]) >> rightshift); + shifted1[2] = (Bigendian_convert_uint(ptr1[8]) << nshift) | (Bigendian_convert_uint(ptr1_prev[8]) >> rightshift); +#else + shifted1[0] = (ptr1[0] << nshift) | (ptr1_prev[0] >> rightshift); + shifted1[1] = (ptr1[4] << nshift) | (ptr1_prev[4] >> rightshift); + shifted1[2] = (ptr1[8] << nshift) | (ptr1_prev[8] >> rightshift); #endif + } + debug2(Compress_print_one_block(ptr1)); + debug2(Compress_print_one_block(ptr2)); + debug2(Compress_print_one_block(shifted1)); - startdiscard = (left+pos5) % STEP_SIZE; - enddiscard = (left+pos3) % STEP_SIZE; - - debug(printf("nshift = %d, startdiscard = %u, enddiscard = %u\n",nshift,startdiscard,enddiscard)); - -#ifndef DEBUG14 - } +#ifdef WORDS_BIGENDIAN + diff = (shifted1[0] ^ Bigendian_convert_uint(ptr2[0])) | (shifted1[1] ^ Bigendian_convert_uint(ptr2[4])) | (shifted1[2] ^ Bigendian_convert_uint(ptr2[8])); +#else + diff = (shifted1[0] ^ ptr2[0]) | (shifted1[1] ^ ptr2[4]) | (shifted1[2] ^ ptr2[8]); #endif - -#if defined(WORDS_BIGENDIAN) || !defined(HAVE_SSE2) + if (diff /* != 0U */) { +#ifdef HAVE_BUILTIN_CTZ + mismatch_position = offset + (relpos = __builtin_ctz(diff)); #else - if (endblocki == startblocki) { - debug(printf("** Single block **\n")); - diff = (block_diff_snp)(query_shifted,&(snp_blocks[startblocki]),&(ref_blocks[startblocki]), - plusp,genestrand,query_unk_mismatch_p); - diff = clear_start(diff,startdiscard); - diff = clear_end(diff,enddiscard); - - debug(print_diff_popcount(diff)); - debug14(if (endblocki_32 == startblocki_32) assert(answer == popcount_ones(diff))); - return popcount_ones(diff); - - } else if (endblocki == startblocki + 12) { - /* Only two blocks to check */ - - if (STEP_SIZE - startdiscard >= enddiscard) { - /* Two blocks to check and more bits counted in startblock */ - debug(printf("* Two blocks, start block first **\n")); - - /* 1/2: Startblock */ - diff = (block_diff_snp)(query_shifted,&(snp_blocks[startblocki]),&(ref_blocks[startblocki]), - plusp,genestrand,query_unk_mismatch_p); - diff = clear_start(diff,startdiscard); - - debug(print_diff_popcount(diff)); - nmismatches /* init */ = popcount_ones(diff); - if (nmismatches > max_mismatches) { - debug14(if (endblocki_32 == startblocki_32) assert(answer == nmismatches)); - return nmismatches; - } - - /* 2/2: Endblock */ - diff = (block_diff_snp)(/*endblock*/query_shifted+COMPRESS_BLOCKSIZE, - &(snp_blocks[endblocki]),&(ref_blocks[endblocki]), - plusp,genestrand,query_unk_mismatch_p); - diff = clear_end(diff,enddiscard); - - debug(print_diff_popcount(diff)); - debug14(if (endblocki_32 == startblocki_32) assert(answer == nmismatches + popcount_ones(diff))); - return nmismatches + popcount_ones(diff); - - } else { - /* Two blocks to check and more bits counted in endblock */ - debug(printf("** Two blocks, end block first **\n")); - - /* 1/2: Endblock */ - diff = (block_diff_snp)(/*endblock*/query_shifted+COMPRESS_BLOCKSIZE, - &(snp_blocks[endblocki]),&(ref_blocks[endblocki]), - plusp,genestrand,query_unk_mismatch_p); - diff = clear_end(diff,enddiscard); - - debug(print_diff_popcount(diff)); - nmismatches /* init */ = popcount_ones(diff); - if (nmismatches > max_mismatches) { - debug14(if (endblocki_32 == startblocki_32) assert(answer == nmismatches)); - return nmismatches; + mismatch_position = offset + mod_37_bit_position[(-diff & diff) % 37]; +#endif + debug2(printf("case 9: returning %d matches\n",mismatch_position)); + return mismatch_position; + } else { + ptr1_prev = ptr1; + ptr1 += 1; if (++startcolumni_1 == 4) {ptr1 += 8; startcolumni_1 = 0;} + ptr2 += 1; if (++startcolumni_2 == 4) {ptr2 += 8; startcolumni_2 = 0;} + offset += 32; /* Not STEP_SIZE */ } + } - /* 2/2: Startblock */ - diff = (block_diff_snp)(query_shifted,&(snp_blocks[startblocki]),&(ref_blocks[startblocki]), - plusp,genestrand,query_unk_mismatch_p); - diff = clear_start(diff,startdiscard); - - debug(print_diff_popcount(diff)); - debug14(if (endblocki_32 == startblocki_32) assert(answer == nmismatches + popcount_ones(diff))); - return nmismatches + popcount_ones(diff); + /* Last block of entire genome */ + enddiscard = genomelength % 32; /* Not STEP_SIZE */ + if (ptr2 == end) { + debug2(printf("ptr2 == end\n")); + /* Keep enddiscard */ + nblocks = 1; + } else if (nshift + enddiscard < 32) { + debug2(printf("ptr1 == end and nshift %d + enddiscard %d < 32\n",nshift,enddiscard)); + enddiscard = nshift + enddiscard; + nblocks = 1; + } else if (nshift > 0) { + debug2(printf("ptr1 == end and nshift %d + enddiscard %d >= 32\n",nshift,enddiscard)); + enddiscard -= (32 - nshift); + nblocks = 2; + } else { + debug2(printf("ptr1 == end and nshift %d + enddiscard %d >= 32\n",nshift,enddiscard)); + /* Keep enddiscard */ + nblocks = 2; } - } else { + /* Block 1 */ + if (nshift == 0) { + /* rightshift of 32 is a no-op */ +#ifdef WORDS_BIGENDIAN + shifted1[0] = Bigendian_convert_uint(ptr1[0]); shifted1[1] = Bigendian_convert_uint(ptr1[4]); shifted1[2] = Bigendian_convert_uint(ptr1[8]); +#else + shifted1[0] = ptr1[0]; shifted1[1] = ptr1[4]; shifted1[2] = ptr1[8]; #endif - - /* More than 2 blocks to check */ - debug(printf("** More than two blocks **\n")); - - query_shifted_save_start = query_shifted; - - /* 2..(n-1) / n: Check all middle blocks first */ - query_shifted += COMPRESS_BLOCKSIZE; -#if defined(WORDS_BIGENDIAN) || !defined(HAVE_SSE2) - ref_ptr = &(ref_blocks[startblocki]); - alt_ptr = &(snp_blocks[startblocki]); - ref_ptr += 1; alt_ptr += 1; if (++startcolumni == 4) {ref_ptr += 8; alt_ptr += 8; startcolumni = 0;} -#else - ref_ptr = &(ref_blocks[startblocki+12]); - alt_ptr = &(snp_blocks[startblocki+12]); -#endif - endblock = &(ref_blocks[endblocki]); - nmismatches = 0; - - while (ref_ptr < endblock) { - diff = (block_diff_snp)(query_shifted,alt_ptr,ref_ptr,plusp,genestrand,query_unk_mismatch_p); - - debug(print_diff_popcount(diff)); - if ((nmismatches += popcount_ones(diff)) > max_mismatches) { - debug14(if (endblocki_32 == startblocki_32) assert(answer == nmismatches)); - return nmismatches; - } - - query_shifted += COMPRESS_BLOCKSIZE; -#if defined(WORDS_BIGENDIAN) || !defined(HAVE_SSE2) - ref_ptr += 1; alt_ptr += 1; if (++startcolumni == 4) {ref_ptr += 8; alt_ptr += 8; startcolumni = 0;} + } else { +#ifdef WORDS_BIGENDIAN + shifted1[0] = (Bigendian_convert_uint(ptr1[0]) << nshift) | (Bigendian_convert_uint(ptr1_prev[0]) >> rightshift); + shifted1[1] = (Bigendian_convert_uint(ptr1[4]) << nshift) | (Bigendian_convert_uint(ptr1_prev[4]) >> rightshift); + shifted1[2] = (Bigendian_convert_uint(ptr1[8]) << nshift) | (Bigendian_convert_uint(ptr1_prev[8]) >> rightshift); #else - ref_ptr += 12; alt_ptr += 12; + shifted1[0] = (ptr1[0] << nshift) | (ptr1_prev[0] >> rightshift); + shifted1[1] = (ptr1[4] << nshift) | (ptr1_prev[4] >> rightshift); + shifted1[2] = (ptr1[8] << nshift) | (ptr1_prev[8] >> rightshift); #endif } + debug2(Compress_print_one_block(ptr1)); + debug2(Compress_print_one_block(ptr2)); + debug2(Compress_print_one_block(shifted1)); - if (enddiscard >= STEP_SIZE - startdiscard) { - /* More bits in end block */ - debug(printf("** Final block, end block first **\n")); - - /* n/n: Go first to end block */ - diff = (block_diff_snp)(query_shifted,alt_ptr,ref_ptr,plusp,genestrand,query_unk_mismatch_p); - diff = clear_end(diff,enddiscard); - - debug(print_diff_popcount(diff)); - if ((nmismatches += popcount_ones(diff)) > max_mismatches) { - debug14(if (endblocki_32 == startblocki_32) assert(answer == nmismatches)); - return nmismatches; - } - - /* 1/n: Go second to start block */ - diff = (block_diff_snp)(query_shifted_save_start,&(snp_blocks[startblocki]),&(ref_blocks[startblocki]), - plusp,genestrand,query_unk_mismatch_p); - diff = clear_start(diff,startdiscard); - - debug(print_diff_popcount(diff)); - debug14(if (endblocki_32 == startblocki_32) assert(answer == nmismatches + popcount_ones(diff))); - return nmismatches + popcount_ones(diff); +#ifdef WORDS_BIGENDIAN + diff = (shifted1[0] ^ Bigendian_convert_uint(ptr2[0])) | (shifted1[1] ^ Bigendian_convert_uint(ptr2[4])) | (shifted1[2] ^ Bigendian_convert_uint(ptr2[8])); +#else + diff = (shifted1[0] ^ ptr2[0]) | (shifted1[1] ^ ptr2[4]) | (shifted1[2] ^ ptr2[8]); +#endif + if (nblocks == 1) { + diff = clear_end_32(diff,enddiscard); + } + if (diff /* != 0U */) { +#ifdef HAVE_BUILTIN_CTZ + mismatch_position = offset + (relpos = __builtin_ctz(diff)); +#else + mismatch_position = offset + mod_37_bit_position[(-diff & diff) % 37]; +#endif + debug2(printf("case 10: returning %d matches\n",mismatch_position)); + return mismatch_position; + } else if (nblocks == 1) { + debug2(printf("case 11: returning offset %d + enddiscard %d matches\n",offset,enddiscard)); + return offset + enddiscard; } else { - debug(printf("** Final block, start block first **\n")); - - /* 1/n: Go first to start block */ - diff = (block_diff_snp)(query_shifted_save_start,&(snp_blocks[startblocki]),&(ref_blocks[startblocki]), - plusp,genestrand,query_unk_mismatch_p); - diff = clear_start(diff,startdiscard); - - debug(print_diff_popcount(diff)); - if ((nmismatches += popcount_ones(diff)) > max_mismatches) { - debug14(if (endblocki_32 == startblocki_32) assert(answer == nmismatches)); - return nmismatches; - } - - /* n/n: Go second to end block */ - diff = (block_diff_snp)(query_shifted,&(snp_blocks[endblocki]),&(ref_blocks[endblocki]), - plusp,genestrand,query_unk_mismatch_p); - diff = clear_end(diff,enddiscard); - - debug(print_diff_popcount(diff)); - debug14(if (endblocki_32 == startblocki_32) assert(answer == nmismatches + popcount_ones(diff))); - return nmismatches + popcount_ones(diff); + ptr1_prev = ptr1; + ptr1 += 1; if (++startcolumni_1 == 4) {ptr1 += 8; startcolumni_1 = 0;} + ptr2 += 1; if (++startcolumni_2 == 4) {ptr2 += 8; startcolumni_2 = 0;} + offset += 32; /* Not STEP_SIZE */ } -#if defined(WORDS_BIGENDIAN) || !defined(HAVE_SSE2) + /* Block 2 */ +#ifdef WORDS_BIGENDIAN + shifted1[0] = (Bigendian_convert_uint(ptr1_prev[0]) >> rightshift); + shifted1[1] = (Bigendian_convert_uint(ptr1_prev[4]) >> rightshift); + shifted1[2] = (Bigendian_convert_uint(ptr1_prev[8]) >> rightshift); #else - } + shifted1[0] = (ptr1_prev[0] >> rightshift); + shifted1[1] = (ptr1_prev[4] >> rightshift); + shifted1[2] = (ptr1_prev[8] >> rightshift); #endif -} - - -int -Genome_count_mismatches_limit (Compress_T query_compress, Univcoord_T left, int pos5, int pos3, - int max_mismatches, bool plusp, int genestrand) { - -#if 0 - if (dibasep) { - debug(printf("Dibase_count_mismatches_limit from %u+%d to %u+%d with max_mismatches %d:\n", - left,pos5,left,pos3,max_mismatches)); + debug2(Compress_print_one_block(ptr1)); + debug2(Compress_print_one_block(ptr2)); + debug2(Compress_print_one_block(shifted1)); - return Dibase_count_mismatches_limit(&(*ncolordiffs),query,pos5,pos3, - /*startpos*/left+pos5,/*endpos*/left+pos3,max_mismatches); - } +#ifdef WORDS_BIGENDIAN + diff = (shifted1[0] ^ Bigendian_convert_uint(ptr2[0])) | (shifted1[1] ^ Bigendian_convert_uint(ptr2[4])) | (shifted1[2] ^ Bigendian_convert_uint(ptr2[8])); +#else + diff = (shifted1[0] ^ ptr2[0]) | (shifted1[1] ^ ptr2[4]) | (shifted1[2] ^ ptr2[8]); #endif + diff = clear_end_32(diff,enddiscard); - if (snp_blocks == NULL) { - return count_mismatches_limit(query_compress,left,pos5,pos3,max_mismatches,plusp,genestrand); - } else { - return count_mismatches_limit_snps(query_compress,left,pos5,pos3,max_mismatches,plusp,genestrand); + if (diff /* != 0U */) { +#ifdef HAVE_BUILTIN_CTZ + mismatch_position = offset + (relpos = __builtin_ctz(diff)); +#else + mismatch_position = offset + mod_37_bit_position[(-diff & diff) % 37]; +#endif + debug2(printf("case 12: returning %d matches\n",mismatch_position)); + return mismatch_position; + } else { + debug2(printf("case 13: returning offset %d + enddiscard %d matches\n",offset,enddiscard)); + return offset + enddiscard; + } } } -int -Genome_count_mismatches_substring_ref (Compress_T query_compress, Univcoord_T left, int pos5, int pos3, - bool plusp, int genestrand) { -#ifdef DEBUG14 - int answer; -#endif - int nmismatches; +static int +count_mismatches_limit (Compress_T query_compress, Univcoord_T left, + int pos5, int pos3, int max_mismatches, bool plusp, int genestrand) { + int nmismatches = 0; int startdiscard, enddiscard; Univcoord_T startblocki, endblocki, startblocki_32, endblocki_32; - Genomecomp_T *ptr, *end; + Genomecomp_T *ptr, *endptr; Genomecomp_T *query_shifted; - UINT4 diff_32; - Genomediff_T diff; int nshift; int startcolumni, endcolumni; + UINT4 diff_32; +#ifdef HAVE_SSE2 + __m128i diff_128; +#endif +#ifdef HAVE_AVX2 + __m256i diff_256; +#endif debug( printf("\n\n"); - printf("Genome (in count_mismatches_substring) from %u+%d to %u+%d:\n",left,pos5,left,pos3); + printf("Genome (in count_mismatches_limit) from %u+%d to %u+%d:\n",left,pos5,left,pos3); Genome_print_blocks(ref_blocks,left+pos5,left+pos3); printf("\n"); ); @@ -20215,128 +22580,189 @@ debug(printf("Query shifted %d:\n",nshift)); debug(Compress_print_blocks(query_shifted,nshift,pos5,pos3)); query_shifted += (nshift+pos5)/STEP_SIZE*COMPRESS_BLOCKSIZE; +#if defined(WORDS_BIGENDIAN) || !defined(HAVE_SSE2) +#else + query_shifted += startcolumni; +#endif + + startdiscard = (left+pos5) % 32; + enddiscard = (left+pos3) % 32; + ptr = &(ref_blocks[startblocki_32]); + endptr = &(ref_blocks[endblocki_32]); if (endblocki_32 == startblocki_32) { - startdiscard = (left+pos5) % 32; - enddiscard = (left+pos3) % 32; + /* Single block */ + debug(printf("** Single block **\n")); debug(printf("nshift = %d, startdiscard = %u, enddiscard = %u\n",nshift,startdiscard,enddiscard)); -#if defined(WORDS_BIGENDIAN) || !defined(HAVE_SSE2) - diff_32 = (block_diff_32)(query_shifted,&(ref_blocks[startblocki_32]), - plusp,genestrand,query_unk_mismatch_p); -#else - diff_32 = (block_diff_32)(query_shifted + startcolumni,&(ref_blocks[startblocki_32]), - plusp,genestrand,query_unk_mismatch_p); -#endif + diff_32 = (block_diff_32)(query_shifted,ptr,plusp,genestrand,query_unk_mismatch_p); diff_32 = clear_start_32(diff_32,startdiscard); diff_32 = clear_end_32(diff_32,enddiscard); -#ifdef DEBUG14 - answer = popcount_ones_32(diff_32); -#else return popcount_ones_32(diff_32); -#endif - } -#ifndef DEBUG14 - else { -#endif + } else if (endblocki == startblocki) { +#if defined(USE_SHIFT_POPCOUNT) && defined(HAVE_SSE2) + /* Shift */ +#ifdef USE_SHIFT_HILO + enddiscard += (endcolumni - startcolumni)*32; + diff_128 = (block_diff_128_shift_lo)(query_shifted,ptr,plusp,genestrand,query_unk_mismatch_p, + startcolumni); + diff_128 = clear_start_128(diff_128,startdiscard); + diff_128 = clear_end_128(diff_128,enddiscard); -#if defined(WORDS_BIGENDIAN) || !defined(HAVE_SSE2) - startblocki = startblocki_32; - endblocki = endblocki_32; + return popcount_ones_128(diff_128); +#else + /* Faster */ + startdiscard += startcolumni*32; + enddiscard += endcolumni*32; + + diff_128 = (block_diff_128)(query_shifted - startcolumni,ptr - startcolumni,plusp,genestrand,query_unk_mismatch_p); + diff_128 = clear_start_128(diff_128,startdiscard); + diff_128 = clear_end_128(diff_128,enddiscard); + + return popcount_ones_128(diff_128); #endif - startdiscard = (left+pos5) % STEP_SIZE; - enddiscard = (left+pos3) % STEP_SIZE; +#else + /* Start block */ + diff_32 = (block_diff_32)(query_shifted,ptr,plusp,genestrand,query_unk_mismatch_p); + diff_32 = clear_start_32(diff_32,startdiscard); + nmismatches = popcount_ones_32(diff_32); + if (nmismatches > max_mismatches) { + return nmismatches; + } + query_shifted += QUERY_NEXTCOL; ptr += GENOME_NEXTCOL; - debug(printf("nshift = %d, startdiscard = %u, enddiscard = %u\n",nshift,startdiscard,enddiscard)); + /* Single row */ + while (++startcolumni < endcolumni) { + diff_32 = (block_diff_32)(query_shifted,ptr,plusp,genestrand,query_unk_mismatch_p); + nmismatches += popcount_ones_32(diff_32); + if (nmismatches > max_mismatches) { + return nmismatches; + } + query_shifted += QUERY_NEXTCOL; ptr += GENOME_NEXTCOL; + } -#ifndef DEBUG14 - } + /* End block */ + diff_32 = (block_diff_32)(query_shifted,ptr,plusp,genestrand,query_unk_mismatch_p); + diff_32 = clear_end_32(diff_32,enddiscard); + return (nmismatches + popcount_ones_32(diff_32)); #endif +#if defined(USE_WRAP_POPCOUNT) && defined(HAVE_SSSE3) + } else if (endblocki == startblocki + 12 && endcolumni < startcolumni) { + /* Wrap */ + enddiscard += (4 + endcolumni - startcolumni)*32; + diff_128 = (block_diff_128_wrap_lo)(query_shifted,ptr,plusp,genestrand,query_unk_mismatch_p, + startcolumni); + diff_128 = clear_start_128(diff_128,startdiscard); + diff_128 = clear_end_128(diff_128,enddiscard); -#if defined(WORDS_BIGENDIAN) || !defined(HAVE_SSE2) -#else - if (endblocki == startblocki) { - diff = (block_diff)(query_shifted,&(ref_blocks[startblocki]), - plusp,genestrand,query_unk_mismatch_p); - diff = clear_start(diff,startdiscard); - diff = clear_end(diff,enddiscard); - - debug(print_diff_popcount(diff)); - debug14(if (endblocki_32 == startblocki_32) assert(answer == popcount_ones(diff))); - return popcount_ones(diff); + return popcount_ones_128(diff_128); +#endif } else { -#endif + /* Start block */ + diff_32 = (block_diff_32)(query_shifted,ptr,plusp,genestrand,query_unk_mismatch_p); + diff_32 = clear_start_32(diff_32,startdiscard); + nmismatches = popcount_ones_32(diff_32); + if (nmismatches > max_mismatches) { + return nmismatches; + } + query_shifted += QUERY_NEXTCOL; ptr += GENOME_NEXTCOL; - /* Startblock */ - diff = (block_diff)(query_shifted,&(ref_blocks[startblocki]), - plusp,genestrand,query_unk_mismatch_p); - diff = clear_start(diff,startdiscard); + /* Start row */ + while (++startcolumni < 4) { + diff_32 = (block_diff_32)(query_shifted,ptr,plusp,genestrand,query_unk_mismatch_p); + nmismatches += popcount_ones_32(diff_32); + if (nmismatches > max_mismatches) { + return nmismatches; + } + query_shifted += QUERY_NEXTCOL; ptr += GENOME_NEXTCOL; + } +#ifdef HAVE_SSE2 + query_shifted += QUERY_NEXTROW; +#endif + ptr += GENOME_NEXTROW; - debug(print_diff_popcount(diff)); - nmismatches = popcount_ones(diff); - query_shifted += COMPRESS_BLOCKSIZE; -#if defined(WORDS_BIGENDIAN) || !defined(HAVE_SSE2) - ptr = &(ref_blocks[startblocki]); - ptr += 1; if (++startcolumni == 4) {ptr += 8; startcolumni = 0;} -#else - ptr = &(ref_blocks[startblocki+12]); + /* Middle rows */ +#ifdef HAVE_AVX2 + while (ptr + 24 <= endptr) { + diff_256 = (block_diff_256)(query_shifted,ptr,plusp,genestrand,query_unk_mismatch_p); + nmismatches += popcount_ones_256(diff_256); + if (nmismatches > max_mismatches) { + return nmismatches; + } + query_shifted += 24; ptr += 24; + } #endif - end = &(ref_blocks[endblocki]); - while (ptr < end) { - diff = (block_diff)(query_shifted,ptr,plusp,genestrand,query_unk_mismatch_p); - - debug(print_diff_popcount(diff)); - nmismatches += popcount_ones(diff); - query_shifted += COMPRESS_BLOCKSIZE; -#if defined(WORDS_BIGENDIAN) || !defined(HAVE_SSE2) - ptr += 1; if (++startcolumni == 4) {ptr += 8; startcolumni = 0;} +#ifdef HAVE_SSE2 + while (ptr + 12 <= endptr) { + diff_128 = (block_diff_128)(query_shifted,ptr,plusp,genestrand,query_unk_mismatch_p); + nmismatches += popcount_ones_128(diff_128); + if (nmismatches > max_mismatches) { + return nmismatches; + } + query_shifted += 12; ptr += 12; + } #else - ptr += 12; -#endif + while (ptr + 12 <= endptr) { + for (startcolumni = 0; startcolumni < 4; startcolumni++) { + diff_32 = (block_diff_32)(query_shifted,ptr,plusp,genestrand,query_unk_mismatch_p); + nmismatches += popcount_ones_32(diff_32); + if (nmismatches > max_mismatches) { + return nmismatches; + } + query_shifted += QUERY_NEXTCOL; ptr += GENOME_NEXTCOL; + } + /* query_shifted += QUERY_NEXTROW; */ ptr += GENOME_NEXTROW; } +#endif - /* Endblock */ - diff = (block_diff)(query_shifted,ptr,plusp,genestrand,query_unk_mismatch_p); - diff = clear_end(diff,enddiscard); - - debug(print_diff_popcount(diff)); - debug14(if (endblocki_32 == startblocki_32) assert(answer == nmismatches + popcount_ones(diff))); - return nmismatches + popcount_ones(diff); + /* End row */ + while (ptr < endptr) { + diff_32 = (block_diff_32)(query_shifted,ptr,plusp,genestrand,query_unk_mismatch_p); + nmismatches += popcount_ones_32(diff_32); + if (nmismatches > max_mismatches) { + return nmismatches; + } + query_shifted += QUERY_NEXTCOL; ptr += GENOME_NEXTCOL; + } -#if defined(WORDS_BIGENDIAN) || !defined(HAVE_SSE2) -#else + /* End block */ + diff_32 = (block_diff_32)(query_shifted,ptr,plusp,genestrand,query_unk_mismatch_p); + diff_32 = clear_end_32(diff_32,enddiscard); + return (nmismatches + popcount_ones_32(diff_32)); } -#endif } + static int -count_mismatches_substring_snps (Compress_T query_compress, Univcoord_T left, int pos5, int pos3, - bool plusp, int genestrand) { -#ifdef DEBUG14 - int answer; -#endif - int nmismatches; +count_mismatches_limit_snps (Compress_T query_compress, Univcoord_T left, + int pos5, int pos3, int max_mismatches, bool plusp, int genestrand) { + int nmismatches = 0; int startdiscard, enddiscard; Univcoord_T startblocki, endblocki, startblocki_32, endblocki_32; - Genomecomp_T *ref_ptr, *alt_ptr, *end; + Genomecomp_T *ref_ptr, *alt_ptr, *endptr; Genomecomp_T *query_shifted; - UINT4 diff_32; - Genomediff_T diff; int nshift; int startcolumni, endcolumni; + UINT4 diff_32; +#ifdef HAVE_SSE2 + __m128i diff_128; +#endif +#ifdef HAVE_AVX2 + __m256i diff_256; +#endif debug( printf("\n\n"); - printf("Genome (in count_mismatches_substring_snps) from %u+%d to %u+%d:\n",left,pos5,left,pos3); - Genome_print_blocks_snp(ref_blocks,snp_blocks,left+pos5,left+pos3); + printf("Genome (in count_mismatches_limit) from %u+%d to %u+%d:\n",left,pos5,left,pos3); + Genome_print_blocks(ref_blocks,left+pos5,left+pos3); printf("\n"); ); @@ -20357,281 +22783,214 @@ debug(printf("Query shifted %d:\n",nshift)); debug(Compress_print_blocks(query_shifted,nshift,pos5,pos3)); query_shifted += (nshift+pos5)/STEP_SIZE*COMPRESS_BLOCKSIZE; +#if defined(WORDS_BIGENDIAN) || !defined(HAVE_SSE2) +#else + query_shifted += startcolumni; +#endif + + startdiscard = (left+pos5) % 32; + enddiscard = (left+pos3) % 32; + ref_ptr = &(ref_blocks[startblocki_32]); + alt_ptr = &(snp_blocks[startblocki_32]); + endptr = &(ref_blocks[endblocki_32]); if (endblocki_32 == startblocki_32) { - startdiscard = (left+pos5) % 32; - enddiscard = (left+pos3) % 32; + /* Single block */ + debug(printf("** Single block **\n")); debug(printf("nshift = %d, startdiscard = %u, enddiscard = %u\n",nshift,startdiscard,enddiscard)); -#if defined(WORDS_BIGENDIAN) || !defined(HAVE_SSE2) - diff_32 = (block_diff_snp_32)(query_shifted,&(snp_blocks[startblocki_32]),&(ref_blocks[startblocki_32]), - plusp,genestrand,query_unk_mismatch_p); -#else - diff_32 = (block_diff_snp_32)(query_shifted + startcolumni,&(snp_blocks[startblocki_32]),&(ref_blocks[startblocki_32]), - plusp,genestrand,query_unk_mismatch_p); -#endif + diff_32 = (block_diff_snp_32)(query_shifted,alt_ptr,ref_ptr,plusp,genestrand,query_unk_mismatch_p); diff_32 = clear_start_32(diff_32,startdiscard); diff_32 = clear_end_32(diff_32,enddiscard); -#ifdef DEBUG14 - answer = popcount_ones_32(diff_32); -#else return popcount_ones_32(diff_32); -#endif - } -#ifndef DEBUG14 - else { -#endif + } else if (endblocki == startblocki) { +#if defined(USE_SHIFT_POPCOUNT) && defined(HAVE_SSE2) + /* Shift */ +#ifdef USE_SHIFT_HILO + enddiscard += (endcolumni - startcolumni)*32; + diff_128 = (block_diff_snp_128_shift_lo)(query_shifted,alt_ptr,ref_ptr,plusp,genestrand,query_unk_mismatch_p, + startcolumni); + diff_128 = clear_start_128(diff_128,startdiscard); + diff_128 = clear_end_128(diff_128,enddiscard); -#if defined(WORDS_BIGENDIAN) || !defined(HAVE_SSE2) - startblocki = startblocki_32; - endblocki = endblocki_32; + return popcount_ones_128(diff_128); +#else + /* Faster */ + startdiscard += startcolumni*32; + enddiscard += endcolumni*32; + + diff_128 = (block_diff_snp_128)(query_shifted - startcolumni,alt_ptr - startcolumni,ref_ptr - startcolumni, + plusp,genestrand,query_unk_mismatch_p); + diff_128 = clear_start_128(diff_128,startdiscard); + diff_128 = clear_end_128(diff_128,enddiscard); + + return popcount_ones_128(diff_128); #endif - startdiscard = (left+pos5) % STEP_SIZE; - enddiscard = (left+pos3) % STEP_SIZE; +#else + /* Start block */ + diff_32 = (block_diff_snp_32)(query_shifted,alt_ptr,ref_ptr,plusp,genestrand,query_unk_mismatch_p); + diff_32 = clear_start_32(diff_32,startdiscard); + nmismatches = popcount_ones_32(diff_32); + if (nmismatches > max_mismatches) { + return nmismatches; + } + query_shifted += QUERY_NEXTCOL; ref_ptr += GENOME_NEXTCOL; alt_ptr += GENOME_NEXTCOL; - debug(printf("nshift = %d, startdiscard = %u, enddiscard = %u\n",nshift,startdiscard,enddiscard)); + /* Single row */ + while (++startcolumni < endcolumni) { + diff_32 = (block_diff_snp_32)(query_shifted,alt_ptr,ref_ptr,plusp,genestrand,query_unk_mismatch_p); + nmismatches += popcount_ones_32(diff_32); + if (nmismatches > max_mismatches) { + return nmismatches; + } + query_shifted += QUERY_NEXTCOL; ref_ptr += GENOME_NEXTCOL; alt_ptr += GENOME_NEXTCOL; + } -#ifndef DEBUG14 - } + /* End block */ + diff_32 = (block_diff_snp_32)(query_shifted,alt_ptr,ref_ptr,plusp,genestrand,query_unk_mismatch_p); + diff_32 = clear_end_32(diff_32,enddiscard); + return (nmismatches + popcount_ones_32(diff_32)); #endif +#if defined(USE_WRAP_POPCOUNT) && defined(HAVE_SSSE3) + } else if (endblocki == startblocki + 12 && endcolumni < startcolumni) { + /* Wrap */ + enddiscard += (4 + endcolumni - startcolumni)*32; + diff_128 = (block_diff_snp_128_wrap_lo)(query_shifted,alt_ptr,ref_ptr,plusp,genestrand,query_unk_mismatch_p, + startcolumni); + diff_128 = clear_start_128(diff_128,startdiscard); + diff_128 = clear_end_128(diff_128,enddiscard); -#if defined(WORDS_BIGENDIAN) || !defined(HAVE_SSE2) -#else - if (endblocki == startblocki) { - diff = (block_diff_snp)(query_shifted,&(snp_blocks[startblocki]),&(ref_blocks[startblocki]), - plusp,genestrand,query_unk_mismatch_p); - diff = clear_start(diff,startdiscard); - diff = clear_end(diff,enddiscard); - - debug(print_diff_popcount(diff)); - debug14(if (endblocki_32 == startblocki_32) assert(answer == popcount_ones(diff))); - return popcount_ones(diff); + return popcount_ones_128(diff_128); +#endif } else { -#endif + /* Start block */ + diff_32 = (block_diff_snp_32)(query_shifted,alt_ptr,ref_ptr,plusp,genestrand,query_unk_mismatch_p); + diff_32 = clear_start_32(diff_32,startdiscard); + nmismatches = popcount_ones_32(diff_32); + if (nmismatches > max_mismatches) { + return nmismatches; + } + query_shifted += QUERY_NEXTCOL; ref_ptr += GENOME_NEXTCOL; alt_ptr += GENOME_NEXTCOL; - /* Startblock */ - diff = (block_diff_snp)(query_shifted,&(snp_blocks[startblocki]),&(ref_blocks[startblocki]), - plusp,genestrand,query_unk_mismatch_p); - diff = clear_start(diff,startdiscard); + /* Start row */ + while (++startcolumni < 4) { + diff_32 = (block_diff_snp_32)(query_shifted,alt_ptr,ref_ptr,plusp,genestrand,query_unk_mismatch_p); + nmismatches += popcount_ones_32(diff_32); + if (nmismatches > max_mismatches) { + return nmismatches; + } + query_shifted += QUERY_NEXTCOL; ref_ptr += GENOME_NEXTCOL; alt_ptr += GENOME_NEXTCOL; + } +#ifdef HAVE_SSE2 + query_shifted += QUERY_NEXTROW; +#endif + ref_ptr += GENOME_NEXTROW; alt_ptr += GENOME_NEXTROW; - debug(print_diff_popcount(diff)); - nmismatches = popcount_ones(diff); - query_shifted += COMPRESS_BLOCKSIZE; -#if defined(WORDS_BIGENDIAN) || !defined(HAVE_SSE2) - ref_ptr = &(ref_blocks[startblocki]); - alt_ptr = &(snp_blocks[startblocki]); - ref_ptr += 1; alt_ptr += 1; if (++startcolumni == 4) {ref_ptr += 8; alt_ptr += 8; startcolumni = 0;} -#else - ref_ptr = &(ref_blocks[startblocki+12]); - alt_ptr = &(snp_blocks[startblocki+12]); + /* Middle rows */ +#ifdef HAVE_AVX2 + while (ref_ptr + 24 <= endptr) { + diff_256 = (block_diff_snp_256)(query_shifted,alt_ptr,ref_ptr,plusp,genestrand,query_unk_mismatch_p); + nmismatches += popcount_ones_256(diff_256); + if (nmismatches > max_mismatches) { + return nmismatches; + } + query_shifted += 24; ref_ptr += 24; alt_ptr += 24; + } #endif - end = &(ref_blocks[endblocki]); - while (ref_ptr < end) { - diff = (block_diff_snp)(query_shifted,alt_ptr,ref_ptr,plusp,genestrand,query_unk_mismatch_p); - - debug(print_diff_popcount(diff)); - nmismatches += popcount_ones(diff); - query_shifted += COMPRESS_BLOCKSIZE; -#if defined(WORDS_BIGENDIAN) || !defined(HAVE_SSE2) - ref_ptr += 1; alt_ptr += 1; if (++startcolumni == 4) {ref_ptr += 8; alt_ptr += 8; startcolumni = 0;} +#ifdef HAVE_SSE2 + while (ref_ptr + 12 <= endptr) { + diff_128 = (block_diff_snp_128)(query_shifted,alt_ptr,ref_ptr,plusp,genestrand,query_unk_mismatch_p); + nmismatches += popcount_ones_128(diff_128); + if (nmismatches > max_mismatches) { + return nmismatches; + } + query_shifted += 12; ref_ptr += 12; alt_ptr += 12; + } #else - ref_ptr += 12; alt_ptr += 12; -#endif + while (ref_ptr + 12 <= endptr) { + for (startcolumni = 0; startcolumni < 4; startcolumni++) { + diff_32 = (block_diff_snp_32)(query_shifted,alt_ptr,ref_ptr,plusp,genestrand,query_unk_mismatch_p); + nmismatches += popcount_ones_32(diff_32); + if (nmismatches > max_mismatches) { + return nmismatches; + } + query_shifted += QUERY_NEXTCOL; ref_ptr += GENOME_NEXTCOL; alt_ptr += GENOME_NEXTCOL; + } + /* query_shifted += QUERY_NEXTROW; */ ref_ptr += GENOME_NEXTROW; alt_ptr += GENOME_NEXTROW; } +#endif - /* Endblock */ - diff = (block_diff_snp)(query_shifted,alt_ptr,ref_ptr,plusp,genestrand,query_unk_mismatch_p); - diff = clear_end(diff,enddiscard); - - debug(print_diff_popcount(diff)); - debug14(if (endblocki_32 == startblocki_32) assert(answer == nmismatches + popcount_ones(diff))); - return nmismatches + popcount_ones(diff); + /* End row */ + while (ref_ptr < endptr) { + diff_32 = (block_diff_snp_32)(query_shifted,alt_ptr,ref_ptr,plusp,genestrand,query_unk_mismatch_p); + nmismatches += popcount_ones_32(diff_32); + if (nmismatches > max_mismatches) { + return nmismatches; + } + query_shifted += QUERY_NEXTCOL; ref_ptr += GENOME_NEXTCOL; alt_ptr += GENOME_NEXTCOL; + } -#if defined(WORDS_BIGENDIAN) || !defined(HAVE_SSE2) -#else + /* End block */ + diff_32 = (block_diff_snp_32)(query_shifted,alt_ptr,ref_ptr,plusp,genestrand,query_unk_mismatch_p); + diff_32 = clear_end_32(diff_32,enddiscard); + return (nmismatches + popcount_ones_32(diff_32)); } -#endif } -/* left is where the start of the query matches. pos5 is where we - want to start comparing in the query. pos3 is just after where we - want to stop comparing in the query, i.e., stop at (pos3-1) - inclusive */ + int -Genome_count_mismatches_substring (Compress_T query_compress, Univcoord_T left, int pos5, int pos3, - bool plusp, int genestrand) { +Genome_count_mismatches_limit (Compress_T query_compress, Univcoord_T left, int pos5, int pos3, + int max_mismatches, bool plusp, int genestrand) { #if 0 if (dibasep) { - Dibase_count_mismatches_substring(&ncolordiffs,query,pos5,pos3, - /*startpos*/left+pos5,/*endpos*/left+pos3); + debug(printf("Dibase_count_mismatches_limit from %u+%d to %u+%d with max_mismatches %d:\n", + left,pos5,left,pos3,max_mismatches)); + + return Dibase_count_mismatches_limit(&(*ncolordiffs),query,pos5,pos3, + /*startpos*/left+pos5,/*endpos*/left+pos3,max_mismatches); } #endif if (snp_blocks == NULL) { - return Genome_count_mismatches_substring_ref(query_compress,left,pos5,pos3,plusp,genestrand); + return count_mismatches_limit(query_compress,left,pos5,pos3,max_mismatches,plusp,genestrand); } else { - return count_mismatches_substring_snps(query_compress,left,pos5,pos3,plusp,genestrand); + return count_mismatches_limit_snps(query_compress,left,pos5,pos3,max_mismatches,plusp,genestrand); } } -/* pos5 is where we want to start comparing in the query. pos3 is - just after where we want to stop comparing in the query, i.e., stop - at (pos3-1) inclusive */ -int -Genome_count_mismatches_fragment_left (Compress_T query_compress, int pos5, int pos3, - Genomecomp_T ref_fragment, Genomecomp_T alt_fragment) { - Genomecomp_T diff, alt_diff, mask; - int startdiscard; - Genomecomp_T query_high, query_low, query_flags; - Genomecomp_T ref_high, ref_low, alt_high, alt_low; - - Compress_get_16mer_left(&query_high,&query_low,&query_flags,query_compress,pos3); - startdiscard = 16 - (pos3 - pos5); - - mask = clear_start_mask(startdiscard); - mask &= 0x0000FFFF; /* Therefore, result of Compress does not need masking */ - debug1(printf("Mask for startdiscard %d: %08X\n",startdiscard,mask)); - - - /* Unpack genomic fragments */ - ref_high = ref_fragment >> 16; - ref_low = ref_fragment /* & 0x0000FFFF */; - - alt_high = alt_fragment >> 16; - alt_low = alt_fragment /* & 0x0000FFFF */; - - - debug1(printf("Comparing: query high %08X, low %08X with ref fragment high %08X, %08X\n",query_high & 0xFFFF,query_low & 0xFFFF,ref_high & 0xFFFF,ref_low & 0xFFFF)); - - /* Taken from block_diff */ - diff = (query_high ^ ref_high) | (query_low ^ ref_low); - debug1(printf(" => ref_diff %04X",(unsigned short) diff)); - - alt_diff = (query_high ^ alt_high) | (query_low ^ alt_low); - debug1(printf(" and alt_diff %04X\n",(unsigned short) alt_diff)); - - diff &= alt_diff; - - diff |= query_flags; - - diff &= mask; - - assert(diff <= 0x0000FFFF); - -#if !defined(HAVE_SSE4_2) - debug1(printf("nmismatches %08X => %d\n",diff,count_bits[diff])); - return count_bits[diff]; -#elif defined(HAVE_POPCNT) - debug1(printf("nmismatches %08X => %d\n",diff,_popcnt32(diff))); - return _popcnt32(diff); -#elif defined(HAVE_MM_POPCNT) - debug1(printf("nmismatches %08X => %d\n",diff,_popcnt32(diff))); - return _mm_popcnt_u32(diff); -#elif defined(HAVE_BUILTIN_POPCOUNT) - debug1(printf("nmismatches %08X => %d\n",diff,__builtin_popcount(diff))); - return __builtin_popcount(diff); -#else - debug1(printf("nmismatches %08X => %d\n",diff,count_bits[diff])); - return count_bits[diff]; -#endif -} - -/* pos5 is where we want to start comparing in the query. pos3 is - just after where we want to stop comparing in the query, i.e., stop - at (pos3-1) inclusive */ int -Genome_count_mismatches_fragment_right (Compress_T query_compress, int pos5, int pos3, - Genomecomp_T ref_fragment, Genomecomp_T alt_fragment) { - Genomecomp_T diff, alt_diff, mask; - int enddiscard; - Genomecomp_T query_high, query_low, query_flags; - Genomecomp_T ref_high, ref_low, alt_high, alt_low; - - Compress_get_16mer_right(&query_high,&query_low,&query_flags,query_compress,pos5); - enddiscard = pos3 - pos5; - - mask = clear_end_mask(enddiscard); - mask &= 0x0000FFFF; /* Therefore, result of Compress does not need masking */ - debug1(printf("Mask for enddiscard %d: %08X\n",enddiscard,mask)); - - - /* Unpack genomic fragments */ - ref_high = ref_fragment >> 16; - ref_low = ref_fragment /* & 0x0000FFFF */; - - alt_high = alt_fragment >> 16; - alt_low = alt_fragment /* & 0x0000FFFF */; - - - debug1(printf("Comparing: query high %08X, low %08X with ref fragment high %08X, %08X\n",query_high & 0xFFFF,query_low & 0xFFFF,ref_high & 0xFFFF,ref_low & 0xFFFF)); - - /* Taken from block_diff */ - diff = (query_high ^ ref_high) | (query_low ^ ref_low); - debug1(printf(" => ref_diff %08X",diff)); - - alt_diff = (query_high ^ alt_high) | (query_low ^ alt_low); - debug1(printf(" and alt_diff %08X\n",alt_diff)); - - diff &= alt_diff; - - diff |= query_flags; - - diff &= mask; - - assert(diff <= 0x0000FFFF); - -#if !defined(HAVE_SSE4_2) - debug1(printf("nmismatches %08X => %d\n",diff,count_bits[diff])); - return count_bits[diff]; -#elif defined(HAVE_POPCNT) - debug1(printf("nmismatches %08X => %d\n",diff,_popcnt32(diff))); - return _popcnt32(diff); -#elif defined(HAVE_MM_POPCNT) - debug1(printf("nmismatches %08X => %d\n",diff,_popcnt32(diff))); - return _mm_popcnt_u32(diff); -#elif defined(HAVE_BUILTIN_POPCOUNT) - debug1(printf("nmismatches %08X => %d\n",diff,__builtin_popcount(diff))); - return __builtin_popcount(diff); -#else - debug1(printf("nmismatches %08X => %d\n",diff,count_bits[diff])); - return count_bits[diff]; -#endif -} - - - -static int -mismatches_left (int *mismatch_positions, int max_mismatches, Compress_T query_compress, - Univcoord_T left, int pos5, int pos3, bool plusp, int genestrand, - bool query_unk_mismatch_local_p) { -#ifdef DEBUG14 - int answer; -#endif - int nmismatches = 0, offset, nshift; +Genome_count_mismatches_substring_ref (Compress_T query_compress, Univcoord_T left, int pos5, int pos3, + bool plusp, int genestrand) { + int nmismatches = 0; int startdiscard, enddiscard; Univcoord_T startblocki, endblocki, startblocki_32, endblocki_32; - Genomecomp_T *query_shifted, *ptr, *end; - UINT4 diff_32; - Genomediff_T diff; - int relpos; + Genomecomp_T *ptr, *endptr; + Genomecomp_T *query_shifted; + int nshift; int startcolumni, endcolumni; + UINT4 diff_32; +#ifdef HAVE_SSE2 + __m128i diff_128; +#endif +#ifdef HAVE_AVX2 + __m256i diff_256; +#endif debug( printf("\n\n"); - printf("Entered mismatches_left with %d max_mismatches\n",max_mismatches); - printf("Genome (in mismatches_left):\n"); + printf("Genome (in count_mismatches_substring) from %u+%d to %u+%d:\n",left,pos5,left,pos3); Genome_print_blocks(ref_blocks,left+pos5,left+pos3); printf("\n"); ); @@ -20653,161 +23012,165 @@ debug(printf("Query shifted %d:\n",nshift)); debug(Compress_print_blocks(query_shifted,nshift,pos5,pos3)); query_shifted += (nshift+pos5)/STEP_SIZE*COMPRESS_BLOCKSIZE; +#if defined(WORDS_BIGENDIAN) || !defined(HAVE_SSE2) +#else + query_shifted += startcolumni; +#endif + + startdiscard = (left+pos5) % 32; + enddiscard = (left+pos3) % 32; + ptr = &(ref_blocks[startblocki_32]); + endptr = &(ref_blocks[endblocki_32]); if (endblocki_32 == startblocki_32) { - startdiscard = (left+pos5) % 32; - enddiscard = (left+pos3) % 32; - offset = -startdiscard + pos5; + /* Single block */ debug(printf("nshift = %d, startdiscard = %u, enddiscard = %u\n",nshift,startdiscard,enddiscard)); -#if defined(WORDS_BIGENDIAN) || !defined(HAVE_SSE2) - diff_32 = (block_diff_32)(query_shifted,&(ref_blocks[startblocki_32]), - plusp,genestrand,query_unk_mismatch_local_p); -#else - diff_32 = (block_diff_32)(query_shifted + startcolumni,&(ref_blocks[startblocki_32]), - plusp,genestrand,query_unk_mismatch_local_p); -#endif + diff_32 = (block_diff_32)(query_shifted,ptr,plusp,genestrand,query_unk_mismatch_p); diff_32 = clear_start_32(diff_32,startdiscard); diff_32 = clear_end_32(diff_32,enddiscard); - while (nonzero_p_32(diff_32) && nmismatches <= max_mismatches) { - mismatch_positions[nmismatches++] = offset + (relpos = count_trailing_zeroes_32(diff_32)); - diff_32 = clear_lowbit_32(diff_32,relpos); - } -#ifdef DEBUG14 - debug(printf("Would return nmismatches %d\n",nmismatches)); - answer = nmismatches; - nmismatches = 0; + return popcount_ones_32(diff_32); + + } else if (endblocki == startblocki) { +#if defined(USE_SHIFT_POPCOUNT) && defined(HAVE_SSE2) + /* Shift */ +#ifdef USE_SHIFT_HILO + enddiscard += (endcolumni - startcolumni)*32; + diff_128 = (block_diff_128_shift_lo)(query_shifted,ptr,plusp,genestrand,query_unk_mismatch_p, + startcolumni); + diff_128 = clear_start_128(diff_128,startdiscard); + diff_128 = clear_end_128(diff_128,enddiscard); + + return popcount_ones_128(diff_128); #else - return nmismatches; -#endif + /* Faster */ + startdiscard += startcolumni*32; + enddiscard += endcolumni*32; - } -#ifndef DEBUG14 - else { -#endif + diff_128 = (block_diff_128)(query_shifted - startcolumni,ptr - startcolumni,plusp,genestrand,query_unk_mismatch_p); + diff_128 = clear_start_128(diff_128,startdiscard); + diff_128 = clear_end_128(diff_128,enddiscard); -#if defined(WORDS_BIGENDIAN) || !defined(HAVE_SSE2) - startblocki = startblocki_32; - endblocki = endblocki_32; + return popcount_ones_128(diff_128); #endif - startdiscard = (left+pos5) % STEP_SIZE; - enddiscard = (left+pos3) % STEP_SIZE; - offset = -startdiscard + pos5; +#else + /* Start block */ + diff_32 = (block_diff_32)(query_shifted,ptr,plusp,genestrand,query_unk_mismatch_p); + diff_32 = clear_start_32(diff_32,startdiscard); + nmismatches = popcount_ones_32(diff_32); + query_shifted += QUERY_NEXTCOL; ptr += GENOME_NEXTCOL; - debug(printf("nshift = %d, startdiscard = %u, enddiscard = %u\n",nshift,startdiscard,enddiscard)); + /* Single row */ + while (++startcolumni < endcolumni) { + diff_32 = (block_diff_32)(query_shifted,ptr,plusp,genestrand,query_unk_mismatch_p); + nmismatches += popcount_ones_32(diff_32); + query_shifted += QUERY_NEXTCOL; ptr += GENOME_NEXTCOL; + } -#ifndef DEBUG14 - } + /* End block */ + diff_32 = (block_diff_32)(query_shifted,ptr,plusp,genestrand,query_unk_mismatch_p); + diff_32 = clear_end_32(diff_32,enddiscard); + return (nmismatches + popcount_ones_32(diff_32)); #endif +#if defined(USE_WRAP_POPCOUNT) && defined(HAVE_SSSE3) + } else if (endblocki == startblocki + 12 && endcolumni < startcolumni) { + /* Wrap */ + enddiscard += (4 + endcolumni - startcolumni)*32; + diff_128 = (block_diff_128_wrap_lo)(query_shifted,ptr,plusp,genestrand,query_unk_mismatch_p, + startcolumni); + diff_128 = clear_start_128(diff_128,startdiscard); + diff_128 = clear_end_128(diff_128,enddiscard); -#if defined(WORDS_BIGENDIAN) || !defined(HAVE_SSE2) -#else - if (endblocki == startblocki) { - diff = (block_diff)(query_shifted,&(ref_blocks[startblocki]), - plusp,genestrand,query_unk_mismatch_local_p); - diff = clear_start(diff,startdiscard); - diff = clear_end(diff,enddiscard); - - while (nonzero_p(diff) && nmismatches <= max_mismatches) { - mismatch_positions[nmismatches++] = offset + (relpos = count_trailing_zeroes(diff)); - debug(print_diff_trailing_zeroes(diff,offset)); - diff = clear_lowbit(diff,relpos); - } - debug14(if (endblocki_32 == startblocki_32) assert(answer == nmismatches)); - return nmismatches; + return popcount_ones_128(diff_128); +#endif } else { -#endif + /* Start block */ + diff_32 = (block_diff_32)(query_shifted,ptr,plusp,genestrand,query_unk_mismatch_p); + diff_32 = clear_start_32(diff_32,startdiscard); + nmismatches = popcount_ones_32(diff_32); + query_shifted += QUERY_NEXTCOL; ptr += GENOME_NEXTCOL; - /* Startblock */ - diff = (block_diff)(query_shifted,&(ref_blocks[startblocki]), - plusp,genestrand,query_unk_mismatch_local_p); - diff = clear_start(diff,startdiscard); - - while (nonzero_p(diff) && nmismatches <= max_mismatches) { - mismatch_positions[nmismatches++] = offset + (relpos = count_trailing_zeroes(diff)); - debug(print_diff_trailing_zeroes(diff,offset)); - diff = clear_lowbit(diff,relpos); - } - if (nmismatches > max_mismatches) { - debug14(if (endblocki_32 == startblocki_32) assert(answer == nmismatches)); - return nmismatches; + /* Start row */ + while (++startcolumni < 4) { + diff_32 = (block_diff_32)(query_shifted,ptr,plusp,genestrand,query_unk_mismatch_p); + nmismatches += popcount_ones_32(diff_32); + query_shifted += QUERY_NEXTCOL; ptr += GENOME_NEXTCOL; } +#ifdef HAVE_SSE2 + query_shifted += QUERY_NEXTROW; +#endif + ptr += GENOME_NEXTROW; - query_shifted += COMPRESS_BLOCKSIZE; -#if defined(WORDS_BIGENDIAN) || !defined(HAVE_SSE2) - ptr = &(ref_blocks[startblocki]); - ptr += 1; if (++startcolumni == 4) {ptr += 8; startcolumni = 0;} -#else - ptr = &(ref_blocks[startblocki+12]); + + /* Middle rows */ +#ifdef HAVE_AVX2 + while (ptr + 24 <= endptr) { + diff_256 = (block_diff_256)(query_shifted,ptr,plusp,genestrand,query_unk_mismatch_p); + nmismatches += popcount_ones_256(diff_256); + query_shifted += 24; ptr += 24; + } #endif - end = &(ref_blocks[endblocki]); - offset += STEP_SIZE; /* 128 or 32 */ - while (ptr < end) { - diff = (block_diff)(query_shifted,ptr,plusp,genestrand,query_unk_mismatch_local_p); - - while (nonzero_p(diff) && nmismatches <= max_mismatches) { - mismatch_positions[nmismatches++] = offset + (relpos = count_trailing_zeroes(diff)); - debug(print_diff_trailing_zeroes(diff,offset)); - diff = clear_lowbit(diff,relpos); - } - if (nmismatches > max_mismatches) { - debug14(if (endblocki_32 == startblocki_32) assert(answer == nmismatches)); - return nmismatches; - } - query_shifted += COMPRESS_BLOCKSIZE; -#if defined(WORDS_BIGENDIAN) || !defined(HAVE_SSE2) - ptr += 1; if (++startcolumni == 4) {ptr += 8; startcolumni = 0;} +#ifdef HAVE_SSE2 + while (ptr + 12 <= endptr) { + diff_128 = (block_diff_128)(query_shifted,ptr,plusp,genestrand,query_unk_mismatch_p); + nmismatches += popcount_ones_128(diff_128); + query_shifted += 12; ptr += 12; + } #else - ptr += 12; -#endif - offset += STEP_SIZE; /* 128 or 32 */ + while (ptr + 12 <= endptr) { + for (startcolumni = 0; startcolumni < 4; startcolumni++) { + diff_32 = (block_diff_32)(query_shifted,ptr,plusp,genestrand,query_unk_mismatch_p); + nmismatches += popcount_ones_32(diff_32); + query_shifted += QUERY_NEXTCOL; ptr += GENOME_NEXTCOL; + } + /* query_shifted += QUERY_NEXTROW; */ ptr += GENOME_NEXTROW; } +#endif - /* Endblock */ - diff = (block_diff)(query_shifted,ptr,plusp,genestrand,query_unk_mismatch_local_p); - diff = clear_end(diff,enddiscard); - - while (nonzero_p(diff) && nmismatches <= max_mismatches) { - mismatch_positions[nmismatches++] = offset + (relpos = count_trailing_zeroes(diff)); - debug(print_diff_trailing_zeroes(diff,offset)); - diff = clear_lowbit(diff,relpos); + /* End row */ + while (ptr < endptr) { + diff_32 = (block_diff_32)(query_shifted,ptr,plusp,genestrand,query_unk_mismatch_p); + nmismatches += popcount_ones_32(diff_32); + query_shifted += QUERY_NEXTCOL; ptr += GENOME_NEXTCOL; } - debug14(if (endblocki_32 == startblocki_32) assert(answer == nmismatches)); - return nmismatches; -#if defined(WORDS_BIGENDIAN) || !defined(HAVE_SSE2) -#else + /* End block */ + diff_32 = (block_diff_32)(query_shifted,ptr,plusp,genestrand,query_unk_mismatch_p); + diff_32 = clear_end_32(diff_32,enddiscard); + return (nmismatches + popcount_ones_32(diff_32)); } -#endif - } -/* Returns mismatch_positions[0..max_mismatches] */ static int -mismatches_left_snps (int *mismatch_positions, int max_mismatches, Compress_T query_compress, - Univcoord_T left, int pos5, int pos3, bool plusp, int genestrand, - bool query_unk_mismatch_local_p) { +count_mismatches_substring_snps (Compress_T query_compress, Univcoord_T left, int pos5, int pos3, + bool plusp, int genestrand) { #ifdef DEBUG14 int answer; #endif - int nmismatches_both = 0, offset, nshift; + int nmismatches; int startdiscard, enddiscard; Univcoord_T startblocki, endblocki, startblocki_32, endblocki_32; - Genomecomp_T *query_shifted, *ref_ptr, *alt_ptr, *end; - UINT4 diff_32; - Genomediff_T diff; - int relpos; + Genomecomp_T *ref_ptr, *alt_ptr, *endptr; + Genomecomp_T *query_shifted; + int nshift; int startcolumni, endcolumni; + UINT4 diff_32; +#ifdef HAVE_SSE2 + __m128i diff_128; +#endif +#ifdef HAVE_AVX2 + __m256i diff_256; +#endif debug( printf("\n\n"); - printf("Genome (in mismatches_left_snps):\n"); + printf("Genome (in count_mismatches_substring_snps) from %u+%d to %u+%d:\n",left,pos5,left,pos3); Genome_print_blocks_snp(ref_blocks,snp_blocks,left+pos5,left+pos3); printf("\n"); ); @@ -20829,254 +23192,322 @@ debug(printf("Query shifted %d:\n",nshift)); debug(Compress_print_blocks(query_shifted,nshift,pos5,pos3)); query_shifted += (nshift+pos5)/STEP_SIZE*COMPRESS_BLOCKSIZE; +#if defined(WORDS_BIGENDIAN) || !defined(HAVE_SSE2) +#else + query_shifted += startcolumni; +#endif + + startdiscard = (left+pos5) % 32; + enddiscard = (left+pos3) % 32; + ref_ptr = &(ref_blocks[startblocki_32]); + alt_ptr = &(snp_blocks[startblocki_32]); + endptr = &(ref_blocks[endblocki_32]); if (endblocki_32 == startblocki_32) { - startdiscard = (left+pos5) % 32; - enddiscard = (left+pos3) % 32; - offset = -startdiscard + pos5; + /* Single block */ + debug(printf("** Single block **\n")); debug(printf("nshift = %d, startdiscard = %u, enddiscard = %u\n",nshift,startdiscard,enddiscard)); -#if defined(WORDS_BIGENDIAN) || !defined(HAVE_SSE2) - diff_32 = (block_diff_snp_32)(query_shifted,&(snp_blocks[startblocki_32]),&(ref_blocks[startblocki_32]), - plusp,genestrand,query_unk_mismatch_local_p); -#else - diff_32 = (block_diff_snp_32)(query_shifted + startcolumni,&(snp_blocks[startblocki_32]),&(ref_blocks[startblocki_32]), - plusp,genestrand,query_unk_mismatch_local_p); -#endif + diff_32 = (block_diff_snp_32)(query_shifted,alt_ptr,ref_ptr,plusp,genestrand,query_unk_mismatch_p); diff_32 = clear_start_32(diff_32,startdiscard); diff_32 = clear_end_32(diff_32,enddiscard); - while (nonzero_p_32(diff_32) && nmismatches_both <= max_mismatches) { - mismatch_positions[nmismatches_both++] = offset + (relpos = count_trailing_zeroes_32(diff_32)); - diff_32 = clear_lowbit_32(diff_32,relpos); - } -#ifdef DEBUG14 - answer = nmismatches_both; - nmismatches_both = 0; + return popcount_ones_32(diff_32); + + } else if (endblocki == startblocki) { +#if defined(USE_SHIFT_POPCOUNT) && defined(HAVE_SSE2) + /* Shift */ +#ifdef USE_SHIFT_HILO + enddiscard += (endcolumni - startcolumni)*32; + diff_128 = (block_diff_snp_128_shift_lo)(query_shifted,alt_ptr,ref_ptr,plusp,genestrand,query_unk_mismatch_p, + startcolumni); + diff_128 = clear_start_128(diff_128,startdiscard); + diff_128 = clear_end_128(diff_128,enddiscard); + + return popcount_ones_128(diff_128); #else - return nmismatches_both; -#endif + /* Faster */ + startdiscard += startcolumni*32; + enddiscard += endcolumni*32; - } -#ifndef DEBUG14 - else { -#endif + diff_128 = (block_diff_snp_128)(query_shifted - startcolumni,alt_ptr - startcolumni,ref_ptr - startcolumni, + plusp,genestrand,query_unk_mismatch_p); + diff_128 = clear_start_128(diff_128,startdiscard); + diff_128 = clear_end_128(diff_128,enddiscard); -#if defined(WORDS_BIGENDIAN) || !defined(HAVE_SSE2) - startblocki = startblocki_32; - endblocki = endblocki_32; + return popcount_ones_128(diff_128); #endif - startdiscard = (left+pos5) % STEP_SIZE; - enddiscard = (left+pos3) % STEP_SIZE; - offset = -startdiscard + pos5; +#else + /* Start block */ + diff_32 = (block_diff_snp_32)(query_shifted,alt_ptr,ref_ptr,plusp,genestrand,query_unk_mismatch_p); + diff_32 = clear_start_32(diff_32,startdiscard); + nmismatches = popcount_ones_32(diff_32); + query_shifted += QUERY_NEXTCOL; ref_ptr += GENOME_NEXTCOL; alt_ptr += GENOME_NEXTCOL; - debug(printf("nshift = %d, startdiscard = %u, enddiscard = %u\n",nshift,startdiscard,enddiscard)); + /* Single row */ + while (++startcolumni < endcolumni) { + diff_32 = (block_diff_snp_32)(query_shifted,alt_ptr,ref_ptr,plusp,genestrand,query_unk_mismatch_p); + nmismatches += popcount_ones_32(diff_32); + query_shifted += QUERY_NEXTCOL; ref_ptr += GENOME_NEXTCOL; alt_ptr += GENOME_NEXTCOL; + } -#ifndef DEBUG14 - } + /* End block */ + diff_32 = (block_diff_snp_32)(query_shifted,alt_ptr,ref_ptr,plusp,genestrand,query_unk_mismatch_p); + diff_32 = clear_end_32(diff_32,enddiscard); + return (nmismatches + popcount_ones_32(diff_32)); #endif +#if defined(USE_WRAP_POPCOUNT) && defined(HAVE_SSSE3) + } else if (endblocki == startblocki + 12 && endcolumni < startcolumni) { + /* Wrap */ + enddiscard += (4 + endcolumni - startcolumni)*32; + diff_128 = (block_diff_snp_128_wrap_lo)(query_shifted,alt_ptr,ref_ptr,plusp,genestrand,query_unk_mismatch_p, + startcolumni); + diff_128 = clear_start_128(diff_128,startdiscard); + diff_128 = clear_end_128(diff_128,enddiscard); -#if defined(WORDS_BIGENDIAN) || !defined(HAVE_SSE2) -#else - if (endblocki == startblocki) { - diff = (block_diff_snp)(query_shifted,&(snp_blocks[startblocki]),&(ref_blocks[startblocki]), - plusp,genestrand,query_unk_mismatch_local_p); - diff = clear_start(diff,startdiscard); - diff = clear_end(diff,enddiscard); - - while (nonzero_p(diff) && nmismatches_both <= max_mismatches) { - mismatch_positions[nmismatches_both++] = offset + (relpos = count_trailing_zeroes(diff)); - debug(print_diff_trailing_zeroes(diff,offset)); - diff = clear_lowbit(diff,relpos); - } - debug14(if (endblocki_32 == startblocki_32) assert(answer == nmismatches_both)); - return nmismatches_both; + return popcount_ones_128(diff_128); +#endif } else { + /* Start block */ + diff_32 = (block_diff_snp_32)(query_shifted,alt_ptr,ref_ptr,plusp,genestrand,query_unk_mismatch_p); + diff_32 = clear_start_32(diff_32,startdiscard); + nmismatches = popcount_ones_32(diff_32); + query_shifted += QUERY_NEXTCOL; ref_ptr += GENOME_NEXTCOL; alt_ptr += GENOME_NEXTCOL; + + /* Start row */ + while (++startcolumni < 4) { + diff_32 = (block_diff_snp_32)(query_shifted,alt_ptr,ref_ptr,plusp,genestrand,query_unk_mismatch_p); + nmismatches += popcount_ones_32(diff_32); + query_shifted += QUERY_NEXTCOL; ref_ptr += GENOME_NEXTCOL; alt_ptr += GENOME_NEXTCOL; + } +#ifdef HAVE_SSE2 + query_shifted += QUERY_NEXTROW; #endif + ref_ptr += GENOME_NEXTROW; alt_ptr += GENOME_NEXTROW; - /* Startblock */ - diff = (block_diff_snp)(query_shifted,&(snp_blocks[startblocki]),&(ref_blocks[startblocki]), - plusp,genestrand,query_unk_mismatch_local_p); - diff = clear_start(diff,startdiscard); - while (nonzero_p(diff) && nmismatches_both <= max_mismatches) { - mismatch_positions[nmismatches_both++] = offset + (relpos = count_trailing_zeroes(diff)); - debug(print_diff_trailing_zeroes(diff,offset)); - diff = clear_lowbit(diff,relpos); - } - if (nmismatches_both > max_mismatches) { - debug14(if (endblocki_32 == startblocki_32) assert(answer == nmismatches_both)); - return nmismatches_both; + /* Middle rows */ +#ifdef HAVE_AVX2 + while (ref_ptr + 24 <= endptr) { + diff_256 = (block_diff_snp_256)(query_shifted,alt_ptr,ref_ptr,plusp,genestrand,query_unk_mismatch_p); + nmismatches += popcount_ones_256(diff_256); + query_shifted += 24; ref_ptr += 24; alt_ptr += 24; } - - query_shifted += COMPRESS_BLOCKSIZE; -#if defined(WORDS_BIGENDIAN) || !defined(HAVE_SSE2) - ref_ptr = &(ref_blocks[startblocki]); - alt_ptr = &(snp_blocks[startblocki]); - ref_ptr += 1; alt_ptr += 1; if (++startcolumni == 4) {ref_ptr += 8; alt_ptr += 8; startcolumni = 0;} -#else - ref_ptr = &(ref_blocks[startblocki+12]); - alt_ptr = &(snp_blocks[startblocki+12]); #endif - end = &(ref_blocks[endblocki]); - offset += STEP_SIZE; /* 128 or 32 */ - while (ref_ptr < end) { - diff = (block_diff_snp)(query_shifted,alt_ptr,ref_ptr,plusp,genestrand,query_unk_mismatch_local_p); - - while (nonzero_p(diff) && nmismatches_both <= max_mismatches) { - mismatch_positions[nmismatches_both++] = offset + (relpos = count_trailing_zeroes(diff)); - debug(print_diff_trailing_zeroes(diff,offset)); - diff = clear_lowbit(diff,relpos); - } - if (nmismatches_both > max_mismatches) { - debug14(if (endblocki_32 == startblocki_32) assert(answer == nmismatches_both)); - return nmismatches_both; - } - query_shifted += COMPRESS_BLOCKSIZE; -#if defined(WORDS_BIGENDIAN) || !defined(HAVE_SSE2) - ref_ptr += 1; alt_ptr += 1; if (++startcolumni == 4) {ref_ptr += 8; alt_ptr += 8; startcolumni = 0;} +#ifdef HAVE_SSE2 + while (ref_ptr + 12 <= endptr) { + diff_128 = (block_diff_snp_128)(query_shifted,alt_ptr,ref_ptr,plusp,genestrand,query_unk_mismatch_p); + nmismatches += popcount_ones_128(diff_128); + query_shifted += 12; ref_ptr += 12; alt_ptr += 12; + } #else - ref_ptr += 12; alt_ptr += 12; -#endif - offset += STEP_SIZE; /* 128 or 32 */ + while (ref_ptr + 12 <= endptr) { + for (startcolumni = 0; startcolumni < 4; startcolumni++) { + diff_32 = (block_diff_snp_32)(query_shifted,alt_ptr,ref_ptr,plusp,genestrand,query_unk_mismatch_p); + nmismatches += popcount_ones_32(diff_32); + query_shifted += QUERY_NEXTCOL; ref_ptr += GENOME_NEXTCOL; alt_ptr += GENOME_NEXTCOL; + } + /* query_shifted += QUERY_NEXTROW; */ ref_ptr += GENOME_NEXTROW; alt_ptr += GENOME_NEXTROW; } +#endif - /* Endblock */ - diff = (block_diff_snp)(query_shifted,alt_ptr,ref_ptr,plusp,genestrand,query_unk_mismatch_local_p); - diff = clear_end(diff,enddiscard); - - while (nonzero_p(diff) && nmismatches_both <= max_mismatches) { - mismatch_positions[nmismatches_both++] = offset + (relpos = count_trailing_zeroes(diff)); - debug(print_diff_trailing_zeroes(diff,offset)); - diff = clear_lowbit(diff,relpos); + /* End row */ + while (ref_ptr < endptr) { + diff_32 = (block_diff_snp_32)(query_shifted,alt_ptr,ref_ptr,plusp,genestrand,query_unk_mismatch_p); + nmismatches += popcount_ones_32(diff_32); + query_shifted += QUERY_NEXTCOL; ref_ptr += GENOME_NEXTCOL; alt_ptr += GENOME_NEXTCOL; } - debug14(if (endblocki_32 == startblocki_32) assert(answer == nmismatches_both)); - return nmismatches_both; -#if defined(WORDS_BIGENDIAN) || !defined(HAVE_SSE2) -#else + /* End block */ + diff_32 = (block_diff_snp_32)(query_shifted,alt_ptr,ref_ptr,plusp,genestrand,query_unk_mismatch_p); + diff_32 = clear_end_32(diff_32,enddiscard); + return (nmismatches + popcount_ones_32(diff_32)); } -#endif } - -/* Returns mismatch_positions[0..nmismatches], where nmismatches <= max_mismatches + 1 */ -/* If request max_mismatches 3, could return m0, m1, m2, m3, m4 */ +/* left is where the start of the query matches. pos5 is where we + want to start comparing in the query. pos3 is just after where we + want to stop comparing in the query, i.e., stop at (pos3-1) + inclusive */ int -Genome_mismatches_left (int *mismatch_positions, int max_mismatches, Compress_T query_compress, - Univcoord_T left, int pos5, int pos3, bool plusp, int genestrand) { - int nmismatches; -#ifdef DEBUG - int i; -#endif +Genome_count_mismatches_substring (Compress_T query_compress, Univcoord_T left, int pos5, int pos3, + bool plusp, int genestrand) { #if 0 if (dibasep) { - debug(printf("Dibase_mismatches_left from %u+%d to %u+%d:\n",left,pos5,left,pos3)); - - nmismatches = Dibase_mismatches_left(&(*mismatch_positions),&(*colordiffs),max_mismatches,query, - pos5,pos3,/*startpos*/left+pos5,/*endpos*/left+pos3); - mismatch_positions[nmismatches] = pos3 + 1; /* Need +1 because of starting assumed nt */ - + Dibase_count_mismatches_substring(&ncolordiffs,query,pos5,pos3, + /*startpos*/left+pos5,/*endpos*/left+pos3); } #endif if (snp_blocks == NULL) { - nmismatches = mismatches_left(&(*mismatch_positions),max_mismatches,query_compress, - left,pos5,pos3,plusp,genestrand,query_unk_mismatch_p); - mismatch_positions[nmismatches] = pos3; + return Genome_count_mismatches_substring_ref(query_compress,left,pos5,pos3,plusp,genestrand); } else { - nmismatches = mismatches_left_snps(&(*mismatch_positions),max_mismatches,query_compress, - left,pos5,pos3,plusp,genestrand,query_unk_mismatch_p); - mismatch_positions[nmismatches] = pos3; + return count_mismatches_substring_snps(query_compress,left,pos5,pos3,plusp,genestrand); } - debug( - printf("%d mismatches on left: ",nmismatches); - for (i = 0; i <= nmismatches; i++) { - printf("%d ",mismatch_positions[i]); - } - printf("\n"); - ); - - return nmismatches; } -/* Returns mismatch_positions[0..nmismatches], where nmismatches <= max_mismatches + 1 */ -/* If request max_mismatches 3, could return m0, m1, m2, m3, m4 */ -/* See note above about why we set query_unk_mismatch_p to false */ -int -Genome_mismatches_left_trim (int *mismatch_positions, int max_mismatches, Compress_T query_compress, - Univcoord_T left, int pos5, int pos3, bool plusp, int genestrand) { - int nmismatches; -#ifdef DEBUG - int i; -#endif +/* pos5 is where we want to start comparing in the query. pos3 is + just after where we want to stop comparing in the query, i.e., stop + at (pos3-1) inclusive */ +int +Genome_count_mismatches_fragment_left (Compress_T query_compress, int pos5, int pos3, + Genomecomp_T ref_fragment, Genomecomp_T alt_fragment) { + Genomecomp_T diff, alt_diff, mask; + int startdiscard; + Genomecomp_T query_high, query_low, query_flags; + Genomecomp_T ref_high, ref_low, alt_high, alt_low; + + Compress_get_16mer_left(&query_high,&query_low,&query_flags,query_compress,pos3); + startdiscard = 16 - (pos3 - pos5); + + mask = clear_start_mask(startdiscard); + mask &= 0x0000FFFF; /* Therefore, result of Compress does not need masking */ + debug1(printf("Mask for startdiscard %d: %08X\n",startdiscard,mask)); + + + /* Unpack genomic fragments */ + ref_high = ref_fragment >> 16; + ref_low = ref_fragment /* & 0x0000FFFF */; + + alt_high = alt_fragment >> 16; + alt_low = alt_fragment /* & 0x0000FFFF */; + + + debug1(printf("Comparing: query high %08X, low %08X with ref fragment high %08X, %08X\n",query_high & 0xFFFF,query_low & 0xFFFF,ref_high & 0xFFFF,ref_low & 0xFFFF)); + + /* Taken from block_diff */ + diff = (query_high ^ ref_high) | (query_low ^ ref_low); + debug1(printf(" => ref_diff %04X",(unsigned short) diff)); + + alt_diff = (query_high ^ alt_high) | (query_low ^ alt_low); + debug1(printf(" and alt_diff %04X\n",(unsigned short) alt_diff)); + + diff &= alt_diff; + + diff |= query_flags; + + diff &= mask; + + assert(diff <= 0x0000FFFF); + +#if !defined(HAVE_SSE4_2) + debug1(printf("nmismatches %08X => %d\n",diff,count_bits[diff])); + return count_bits[diff]; +#elif defined(HAVE_POPCNT) + debug1(printf("nmismatches %08X => %d\n",diff,_popcnt32(diff))); + return _popcnt32(diff); +#elif defined(HAVE_MM_POPCNT) + debug1(printf("nmismatches %08X => %d\n",diff,_popcnt32(diff))); + return _mm_popcnt_u32(diff); +#elif defined(HAVE_BUILTIN_POPCOUNT) + debug1(printf("nmismatches %08X => %d\n",diff,__builtin_popcount(diff))); + return __builtin_popcount(diff); +#else + debug1(printf("nmismatches %08X => %d\n",diff,count_bits[diff])); + return count_bits[diff]; +#endif +} + + +/* pos5 is where we want to start comparing in the query. pos3 is + just after where we want to stop comparing in the query, i.e., stop + at (pos3-1) inclusive */ +int +Genome_count_mismatches_fragment_right (Compress_T query_compress, int pos5, int pos3, + Genomecomp_T ref_fragment, Genomecomp_T alt_fragment) { + Genomecomp_T diff, alt_diff, mask; + int enddiscard; + Genomecomp_T query_high, query_low, query_flags; + Genomecomp_T ref_high, ref_low, alt_high, alt_low; + + Compress_get_16mer_right(&query_high,&query_low,&query_flags,query_compress,pos5); + enddiscard = pos3 - pos5; + + mask = clear_end_mask(enddiscard); + mask &= 0x0000FFFF; /* Therefore, result of Compress does not need masking */ + debug1(printf("Mask for enddiscard %d: %08X\n",enddiscard,mask)); + + + /* Unpack genomic fragments */ + ref_high = ref_fragment >> 16; + ref_low = ref_fragment /* & 0x0000FFFF */; + + alt_high = alt_fragment >> 16; + alt_low = alt_fragment /* & 0x0000FFFF */; + + + debug1(printf("Comparing: query high %08X, low %08X with ref fragment high %08X, %08X\n",query_high & 0xFFFF,query_low & 0xFFFF,ref_high & 0xFFFF,ref_low & 0xFFFF)); -#if 0 - if (dibasep) { - debug(printf("Dibase_mismatches_left from %u+%d to %u+%d:\n",left,pos5,left,pos3)); + /* Taken from block_diff */ + diff = (query_high ^ ref_high) | (query_low ^ ref_low); + debug1(printf(" => ref_diff %08X",diff)); - nmismatches = Dibase_mismatches_left(&(*mismatch_positions),&(*colordiffs),max_mismatches,query, - pos5,pos3,/*startpos*/left+pos5,/*endpos*/left+pos3); - mismatch_positions[nmismatches] = pos3 + 1; /* Need +1 because of starting assumed nt */ + alt_diff = (query_high ^ alt_high) | (query_low ^ alt_low); + debug1(printf(" and alt_diff %08X\n",alt_diff)); - } -#endif + diff &= alt_diff; - if (snp_blocks == NULL) { - nmismatches = mismatches_left(&(*mismatch_positions),max_mismatches,query_compress, - left,pos5,pos3,plusp,genestrand,/*query_unk_mismatch_p*/false); - mismatch_positions[nmismatches] = pos3; - } else { - nmismatches = mismatches_left_snps(&(*mismatch_positions),max_mismatches,query_compress, - left,pos5,pos3,plusp,genestrand,/*query_unk_mismatch_p*/false); - mismatch_positions[nmismatches] = pos3; - } - debug( - printf("%d mismatches on left: ",nmismatches); - for (i = 0; i <= nmismatches; i++) { - printf("%d ",mismatch_positions[i]); - } - printf("\n"); - ); - - return nmismatches; + diff |= query_flags; + + diff &= mask; + + assert(diff <= 0x0000FFFF); + +#if !defined(HAVE_SSE4_2) + debug1(printf("nmismatches %08X => %d\n",diff,count_bits[diff])); + return count_bits[diff]; +#elif defined(HAVE_POPCNT) + debug1(printf("nmismatches %08X => %d\n",diff,_popcnt32(diff))); + return _popcnt32(diff); +#elif defined(HAVE_MM_POPCNT) + debug1(printf("nmismatches %08X => %d\n",diff,_popcnt32(diff))); + return _mm_popcnt_u32(diff); +#elif defined(HAVE_BUILTIN_POPCOUNT) + debug1(printf("nmismatches %08X => %d\n",diff,__builtin_popcount(diff))); + return __builtin_popcount(diff); +#else + debug1(printf("nmismatches %08X => %d\n",diff,count_bits[diff])); + return count_bits[diff]; +#endif } + static int -mismatches_right (int *mismatch_positions, int max_mismatches, Compress_T query_compress, - Univcoord_T left, int pos5, int pos3, bool plusp, int genestrand, - bool query_unk_mismatch_local_p) { -#ifdef DEBUG14 - int answer; -#endif - int nmismatches = 0, offset, relpos, nshift; +mismatches_left (int *mismatch_positions, int max_mismatches, Compress_T query_compress, + Univcoord_T left, int pos5, int pos3, bool plusp, int genestrand, + bool query_unk_mismatch_local_p) { + int nmismatches = 0, offset, nshift; int startdiscard, enddiscard; Univcoord_T startblocki, endblocki, startblocki_32, endblocki_32; - Genomecomp_T *query_shifted, *start, *ptr; + Genomecomp_T *query_shifted, *ptr, *endptr; + int relpos; + int startcolumni, endcolumni; UINT4 diff_32; - Genomediff_T diff; -#ifndef HAVE_BUILTIN_CLZ - Genomecomp_T top; +#ifdef HAVE_SSE2 + __m128i diff_128; +#endif +#ifdef HAVE_AVX2 + __m256i diff_256; #endif - int startcolumni, endcolumni; debug( printf("\n\n"); - printf("Entered mismatches_right with %d max_mismatches\n",max_mismatches); - printf("Genome (in mismatches_right):\n"); + printf("Entered mismatches_left with %d max_mismatches\n",max_mismatches); + printf("Genome (in mismatches_left):\n"); Genome_print_blocks(ref_blocks,left+pos5,left+pos3); printf("\n"); ); + startblocki = (left+pos5)/128U*12; startcolumni = ((left+pos5) % 128) / 32; startblocki_32 = startblocki + startcolumni; @@ -21092,165 +23523,247 @@ query_shifted = Compress_shift(query_compress,nshift); debug(printf("Query shifted %d:\n",nshift)); debug(Compress_print_blocks(query_shifted,nshift,pos5,pos3)); - query_shifted += (nshift+pos3)/STEP_SIZE*COMPRESS_BLOCKSIZE; - - if (startblocki_32 == endblocki_32) { - startdiscard = (left+pos5) % 32; - enddiscard = (left+pos3) % 32; - offset = (pos3 - 1) - enddiscard + 32; - debug(printf("nshift = %d, startdiscard = %u, enddiscard = %u\n",nshift,startdiscard,enddiscard)); - + query_shifted += (nshift+pos5)/STEP_SIZE*COMPRESS_BLOCKSIZE; #if defined(WORDS_BIGENDIAN) || !defined(HAVE_SSE2) - diff_32 = (block_diff_32)(query_shifted,&(ref_blocks[endblocki_32]), - plusp,genestrand,query_unk_mismatch_local_p); #else - diff_32 = (block_diff_32)(query_shifted + endcolumni,&(ref_blocks[endblocki_32]), - plusp,genestrand,query_unk_mismatch_local_p); + query_shifted += startcolumni; #endif + + startdiscard = (left+pos5) % 32; + enddiscard = (left+pos3) % 32; + offset = -startdiscard + pos5; + ptr = &(ref_blocks[startblocki_32]); + endptr = &(ref_blocks[endblocki_32]); + + if (endblocki_32 == startblocki_32) { + /* Single block */ + diff_32 = (block_diff_32)(query_shifted,ptr,plusp,genestrand,query_unk_mismatch_local_p); diff_32 = clear_start_32(diff_32,startdiscard); diff_32 = clear_end_32(diff_32,enddiscard); while (nonzero_p_32(diff_32) && nmismatches <= max_mismatches) { - mismatch_positions[nmismatches++] = offset - (relpos = count_leading_zeroes_32(diff_32)); - diff_32 = clear_highbit_32(diff_32,relpos); + mismatch_positions[nmismatches++] = offset + (relpos = count_trailing_zeroes_32(diff_32)); + diff_32 = clear_lowbit_32(diff_32,relpos); } -#ifdef DEBUG14 - answer = nmismatches; - nmismatches = 0; -#else return nmismatches; -#endif - } -#ifndef DEBUG14 - else { -#endif + } else if (endblocki == startblocki) { +#if defined(USE_SHIFT) && defined(HAVE_SSE2) + /* Shift */ + enddiscard += (endcolumni - startcolumni)*32; + diff_128 = (block_diff_128_shift_lo)(query_shifted,ptr,plusp,genestrand,query_unk_mismatch_local_p, + startcolumni); + diff_128 = clear_start_128(diff_128,startdiscard); + diff_128 = clear_end_128(diff_128,enddiscard); + + while (nonzero_p_128(diff_128) && nmismatches <= max_mismatches) { + mismatch_positions[nmismatches++] = offset + (relpos = count_trailing_zeroes_128(diff_128)); + diff_128 = clear_lowbit_128(diff_128,relpos); + } + return nmismatches; -#if defined(WORDS_BIGENDIAN) || !defined(HAVE_SSE2) - startblocki = startblocki_32; - endblocki = endblocki_32; -#endif +#else + /* Start block */ + diff_32 = (block_diff_32)(query_shifted,ptr,plusp,genestrand,query_unk_mismatch_local_p); + diff_32 = clear_start_32(diff_32,startdiscard); - startdiscard = (left+pos5) % STEP_SIZE; - enddiscard = (left+pos3) % STEP_SIZE; - offset = (pos3 - 1) - enddiscard + STEP_SIZE; - - debug(printf("nshift = %d, startdiscard = %u, enddiscard = %u, offset = %d\n", - nshift,startdiscard,enddiscard,offset)); + while (nonzero_p_32(diff_32) && nmismatches <= max_mismatches) { + mismatch_positions[nmismatches++] = offset + (relpos = count_trailing_zeroes_32(diff_32)); + diff_32 = clear_lowbit_32(diff_32,relpos); + } + if (nmismatches > max_mismatches) { + return nmismatches; + } -#ifndef DEBUG14 - } -#endif + query_shifted += QUERY_NEXTCOL; ptr += GENOME_NEXTCOL; + offset += 32; + + /* Single row */ + while (++startcolumni < endcolumni) { + diff_32 = (block_diff_32)(query_shifted,ptr,plusp,genestrand,query_unk_mismatch_local_p); + + while (nonzero_p_32(diff_32) && nmismatches <= max_mismatches) { + mismatch_positions[nmismatches++] = offset + (relpos = count_trailing_zeroes_32(diff_32)); + diff_32 = clear_lowbit_32(diff_32,relpos); + } + if (nmismatches > max_mismatches) { + return nmismatches; + } + + query_shifted += QUERY_NEXTCOL; ptr += GENOME_NEXTCOL; + offset += 32; + } + /* End block */ + diff_32 = (block_diff_32)(query_shifted,ptr,plusp,genestrand,query_unk_mismatch_local_p); + diff_32 = clear_end_32(diff_32,enddiscard); -#if defined(WORDS_BIGENDIAN) || !defined(HAVE_SSE2) -#else - if (startblocki == endblocki) { - diff = (block_diff)(query_shifted,&(ref_blocks[endblocki]), - plusp,genestrand,query_unk_mismatch_local_p); - diff = clear_start(diff,startdiscard); - diff = clear_end(diff,enddiscard); - - while (nonzero_p(diff) && nmismatches <= max_mismatches) { - mismatch_positions[nmismatches++] = offset - (relpos = count_leading_zeroes(diff)); - debug(print_diff_leading_zeroes(diff,offset)); - diff = clear_highbit(diff,relpos); + while (nonzero_p_32(diff_32) && nmismatches <= max_mismatches) { + mismatch_positions[nmismatches++] = offset + (relpos = count_trailing_zeroes_32(diff_32)); + diff_32 = clear_lowbit_32(diff_32,relpos); } - debug14(if (startblocki_32 == endblocki_32) assert(answer == nmismatches)); return nmismatches; +#endif - } else { +#if defined(USE_WRAP) && defined(HAVE_SSSE3) + } else if (endblocki == startblocki + 12 && endcolumni < startcolumni) { + /* Wrap */ + enddiscard += (4 + endcolumni - startcolumni)*32; + diff_128 = (block_diff_128_wrap_lo)(query_shifted,ptr,plusp,genestrand,query_unk_mismatch_local_p, + startcolumni); + diff_128 = clear_start_128(diff_128,startdiscard); + diff_128 = clear_end_128(diff_128,enddiscard); + + while (nonzero_p_128(diff_128) && nmismatches <= max_mismatches) { + mismatch_positions[nmismatches++] = offset + (relpos = count_trailing_zeroes_128(diff_128)); + diff_128 = clear_lowbit_128(diff_128,relpos); + } + return nmismatches; #endif - /* Endblock */ - diff = (block_diff)(query_shifted,&(ref_blocks[endblocki]), - plusp,genestrand,query_unk_mismatch_local_p); - diff = clear_end(diff,enddiscard); - - while (nonzero_p(diff) && nmismatches <= max_mismatches) { - mismatch_positions[nmismatches++] = offset - (relpos = count_leading_zeroes(diff)); - debug(print_diff_leading_zeroes(diff,offset)); - diff = clear_highbit(diff,relpos); + } else { + /* Start block */ + diff_32 = (block_diff_32)(query_shifted,ptr,plusp,genestrand,query_unk_mismatch_local_p); + diff_32 = clear_start_32(diff_32,startdiscard); + + while (nonzero_p_32(diff_32) && nmismatches <= max_mismatches) { + mismatch_positions[nmismatches++] = offset + (relpos = count_trailing_zeroes_32(diff_32)); + diff_32 = clear_lowbit_32(diff_32,relpos); } if (nmismatches > max_mismatches) { - debug14(if (startblocki_32 == endblocki_32) assert(answer == nmismatches)); return nmismatches; } + + query_shifted += QUERY_NEXTCOL; ptr += GENOME_NEXTCOL; + offset += 32; - query_shifted -= COMPRESS_BLOCKSIZE; -#if defined(WORDS_BIGENDIAN) || !defined(HAVE_SSE2) - ptr = &(ref_blocks[endblocki]); - ptr -= 1; if (endcolumni-- == 0) {ptr -= 8; endcolumni = 3;} -#else - ptr = &(ref_blocks[endblocki-12]); + /* Start row */ + while (++startcolumni < 4) { + diff_32 = (block_diff_32)(query_shifted,ptr,plusp,genestrand,query_unk_mismatch_local_p); + + while (nonzero_p_32(diff_32) && nmismatches <= max_mismatches) { + mismatch_positions[nmismatches++] = offset + (relpos = count_trailing_zeroes_32(diff_32)); + diff_32 = clear_lowbit_32(diff_32,relpos); + } + if (nmismatches > max_mismatches) { + return nmismatches; + } + + query_shifted += QUERY_NEXTCOL; ptr += GENOME_NEXTCOL; + offset += 32; + } +#ifdef HAVE_SSE2 + query_shifted += QUERY_NEXTROW; #endif - start = &(ref_blocks[startblocki]); - offset -= STEP_SIZE; /* 128 or 32 */ - while (ptr > start) { - diff = (block_diff)(query_shifted,ptr,plusp,genestrand,query_unk_mismatch_local_p); - - while (nonzero_p(diff) && nmismatches <= max_mismatches) { - mismatch_positions[nmismatches++] = offset - (relpos = count_leading_zeroes(diff)); - debug(print_diff_leading_zeroes(diff,offset)); - diff = clear_highbit(diff,relpos); + ptr += GENOME_NEXTROW; + + + /* Middle rows */ +#ifdef HAVE_AVX2 + while (ptr + 24 <= endptr) { + diff_256 = (block_diff_256)(query_shifted,ptr,plusp,genestrand,query_unk_mismatch_local_p); + + while (nonzero_p_256(diff_256) && nmismatches <= max_mismatches) { + mismatch_positions[nmismatches++] = offset + (relpos = count_trailing_zeroes_256(diff_256)); + diff_256 = clear_lowbit_256(diff_256,relpos); } if (nmismatches > max_mismatches) { - debug14(if (startblocki_32 == endblocki_32) assert(answer == nmismatches)); return nmismatches; } + + query_shifted += 24; ptr += 24; + offset += 256; + } +#endif - query_shifted -= COMPRESS_BLOCKSIZE; -#if defined(WORDS_BIGENDIAN) || !defined(HAVE_SSE2) - ptr -= 1; if (endcolumni-- == 0) {ptr -= 8; endcolumni = 3;} +#ifdef HAVE_SSE2 + while (ptr + 12 <= endptr) { + diff_128 = (block_diff_128)(query_shifted,ptr,plusp,genestrand,query_unk_mismatch_local_p); + + while (nonzero_p_128(diff_128) && nmismatches <= max_mismatches) { + mismatch_positions[nmismatches++] = offset + (relpos = count_trailing_zeroes_128(diff_128)); + diff_128 = clear_lowbit_128(diff_128,relpos); + } + if (nmismatches > max_mismatches) { + return nmismatches; + } + + query_shifted += 12; ptr += 12; + offset += 128; + } #else - ptr -= 12; -#endif - offset -= STEP_SIZE; /* 128 or 32 */ + while (ptr + 12 <= endptr) { + for (startcolumni = 0; startcolumni < 4; startcolumni++) { + diff_32 = (block_diff_32)(query_shifted,ptr,plusp,genestrand,query_unk_mismatch_local_p); + + while (nonzero_p_32(diff_32) && nmismatches <= max_mismatches) { + mismatch_positions[nmismatches++] = offset + (relpos = count_trailing_zeroes_32(diff_32)); + diff_32 = clear_lowbit_32(diff_32,relpos); + } + if (nmismatches > max_mismatches) { + return nmismatches; + } + + query_shifted += QUERY_NEXTCOL; ptr += GENOME_NEXTCOL; + offset += 32; + } + /* query_shifted += QUERY_NEXTROW; */ ptr += GENOME_NEXTROW; } +#endif - /* Startblock */ - diff = (block_diff)(query_shifted,ptr,plusp,genestrand,query_unk_mismatch_local_p); - diff = clear_start(diff,startdiscard); + /* End row */ + while (ptr < endptr) { + diff_32 = (block_diff_32)(query_shifted,ptr,plusp,genestrand,query_unk_mismatch_local_p); - while (nonzero_p(diff) && nmismatches <= max_mismatches) { - mismatch_positions[nmismatches++] = offset - (relpos = count_leading_zeroes(diff)); - debug(print_diff_leading_zeroes(diff,offset)); - diff = clear_highbit(diff,relpos); + while (nonzero_p_32(diff_32) && nmismatches <= max_mismatches) { + mismatch_positions[nmismatches++] = offset + (relpos = count_trailing_zeroes_32(diff_32)); + diff_32 = clear_lowbit_32(diff_32,relpos); + } + if (nmismatches > max_mismatches) { + return nmismatches; + } + + query_shifted += QUERY_NEXTCOL; ptr += GENOME_NEXTCOL; + offset += 32; } - debug14(if (startblocki_32 == endblocki_32) assert(answer == nmismatches)); - return nmismatches; + /* End block */ + diff_32 = (block_diff_32)(query_shifted,ptr,plusp,genestrand,query_unk_mismatch_local_p); + diff_32 = clear_end_32(diff_32,enddiscard); -#if defined(WORDS_BIGENDIAN) || !defined(HAVE_SSE2) -#else + while (nonzero_p_32(diff_32) && nmismatches <= max_mismatches) { + mismatch_positions[nmismatches++] = offset + (relpos = count_trailing_zeroes_32(diff_32)); + diff_32 = clear_lowbit_32(diff_32,relpos); + } + return nmismatches; } -#endif } - +/* Returns nmismatches_both */ static int -mismatches_right_snps (int *mismatch_positions, int max_mismatches, Compress_T query_compress, - Univcoord_T left, int pos5, int pos3, bool plusp, int genestrand, - bool query_unk_mismatch_local_p) { -#ifdef DEBUG14 - int answer; -#endif - int nmismatches_both = 0, offset, relpos, nshift; +mismatches_left_snps (int *mismatch_positions, int max_mismatches, Compress_T query_compress, + Univcoord_T left, int pos5, int pos3, bool plusp, int genestrand, + bool query_unk_mismatch_local_p) { + int nmismatches = 0, offset, nshift; int startdiscard, enddiscard; Univcoord_T startblocki, endblocki, startblocki_32, endblocki_32; - Genomecomp_T *query_shifted, *ref_ptr, *alt_ptr, *start; + Genomecomp_T *query_shifted, *ref_ptr, *alt_ptr, *endptr; + int relpos; + int startcolumni, endcolumni; UINT4 diff_32; - Genomediff_T diff; -#ifndef HAVE_BUILTIN_CLZ - Genomecomp_T top; +#ifdef HAVE_SSE2 + __m128i diff_128; +#endif +#ifdef HAVE_AVX2 + __m256i diff_256; #endif - int startcolumni, endcolumni; debug( printf("\n\n"); - printf("Genome (in mismatches_right_snps):\n"); - Genome_print_blocks_snp(ref_blocks,snp_blocks,left+pos5,left+pos3); + printf("Entered mismatches_left_snps with %d max_mismatches\n",max_mismatches); + printf("Genome (in mismatches_left):\n"); + Genome_print_blocks(ref_blocks,left+pos5,left+pos3); printf("\n"); ); @@ -21270,147 +23783,229 @@ query_shifted = Compress_shift(query_compress,nshift); debug(printf("Query shifted %d:\n",nshift)); debug(Compress_print_blocks(query_shifted,nshift,pos5,pos3)); - query_shifted += (nshift+pos3)/STEP_SIZE*COMPRESS_BLOCKSIZE; - - if (startblocki_32 == endblocki_32) { - startdiscard = (left+pos5) % 32; - enddiscard = (left+pos3) % 32; - offset = (pos3 - 1) - enddiscard + 32; - debug(printf("nshift = %d, startdiscard = %u, enddiscard = %u\n",nshift,startdiscard,enddiscard)); - + query_shifted += (nshift+pos5)/STEP_SIZE*COMPRESS_BLOCKSIZE; #if defined(WORDS_BIGENDIAN) || !defined(HAVE_SSE2) - diff_32 = (block_diff_snp_32)(query_shifted,&(snp_blocks[endblocki_32]),&(ref_blocks[endblocki_32]), - plusp,genestrand,query_unk_mismatch_local_p); #else - diff_32 = (block_diff_snp_32)(query_shifted + endcolumni,&(snp_blocks[endblocki_32]),&(ref_blocks[endblocki_32]), - plusp,genestrand,query_unk_mismatch_local_p); + query_shifted += startcolumni; #endif + + startdiscard = (left+pos5) % 32; + enddiscard = (left+pos3) % 32; + offset = -startdiscard + pos5; + ref_ptr = &(ref_blocks[startblocki_32]); + alt_ptr = &(snp_blocks[startblocki_32]); + endptr = &(ref_blocks[endblocki_32]); + + if (endblocki_32 == startblocki_32) { + /* Single block */ + diff_32 = (block_diff_snp_32)(query_shifted,alt_ptr,ref_ptr,plusp,genestrand,query_unk_mismatch_local_p); diff_32 = clear_start_32(diff_32,startdiscard); diff_32 = clear_end_32(diff_32,enddiscard); - while (nonzero_p_32(diff_32) && nmismatches_both <= max_mismatches) { - mismatch_positions[nmismatches_both++] = offset - (relpos = count_leading_zeroes_32(diff_32)); - diff_32 = clear_highbit_32(diff_32,relpos); + while (nonzero_p_32(diff_32) && nmismatches <= max_mismatches) { + mismatch_positions[nmismatches++] = offset + (relpos = count_trailing_zeroes_32(diff_32)); + diff_32 = clear_lowbit_32(diff_32,relpos); } -#ifdef DEBUG14 - answer = nmismatches_both; - nmismatches_both = 0; + return nmismatches; + + } else if (endblocki == startblocki) { +#if defined(USE_SHIFT) && defined(HAVE_SSE2) + /* Shift */ + enddiscard += (endcolumni - startcolumni)*32; + diff_128 = (block_diff_snp_128_shift_lo)(query_shifted,alt_ptr,ref_ptr,plusp,genestrand,query_unk_mismatch_local_p, + startcolumni); + diff_128 = clear_start_128(diff_128,startdiscard); + diff_128 = clear_end_128(diff_128,enddiscard); + + while (nonzero_p_128(diff_128) && nmismatches <= max_mismatches) { + mismatch_positions[nmismatches++] = offset + (relpos = count_trailing_zeroes_128(diff_128)); + diff_128 = clear_lowbit_128(diff_128,relpos); + } + return nmismatches; + #else - return nmismatches_both; + /* Start block */ + diff_32 = (block_diff_snp_32)(query_shifted,alt_ptr,ref_ptr,plusp,genestrand,query_unk_mismatch_local_p); + diff_32 = clear_start_32(diff_32,startdiscard); + + while (nonzero_p_32(diff_32) && nmismatches <= max_mismatches) { + mismatch_positions[nmismatches++] = offset + (relpos = count_trailing_zeroes_32(diff_32)); + diff_32 = clear_lowbit_32(diff_32,relpos); + } + if (nmismatches > max_mismatches) { + return nmismatches; + } + + query_shifted += QUERY_NEXTCOL; ref_ptr += GENOME_NEXTCOL; alt_ptr += GENOME_NEXTCOL; + offset += 32; + + /* Single row */ + while (++startcolumni < endcolumni) { + diff_32 = (block_diff_snp_32)(query_shifted,alt_ptr,ref_ptr,plusp,genestrand,query_unk_mismatch_local_p); + + while (nonzero_p_32(diff_32) && nmismatches <= max_mismatches) { + mismatch_positions[nmismatches++] = offset + (relpos = count_trailing_zeroes_32(diff_32)); + diff_32 = clear_lowbit_32(diff_32,relpos); + } + if (nmismatches > max_mismatches) { + return nmismatches; + } + + query_shifted += QUERY_NEXTCOL; ref_ptr += GENOME_NEXTCOL; alt_ptr += GENOME_NEXTCOL; + offset += 32; + } + + /* End block */ + diff_32 = (block_diff_snp_32)(query_shifted,alt_ptr,ref_ptr,plusp,genestrand,query_unk_mismatch_local_p); + diff_32 = clear_end_32(diff_32,enddiscard); + + while (nonzero_p_32(diff_32) && nmismatches <= max_mismatches) { + mismatch_positions[nmismatches++] = offset + (relpos = count_trailing_zeroes_32(diff_32)); + diff_32 = clear_lowbit_32(diff_32,relpos); + } + return nmismatches; #endif - } -#ifndef DEBUG14 - else { +#if defined(USE_WRAP) && defined(HAVE_SSSE3) + } else if (endblocki == startblocki + 12 && endcolumni < startcolumni) { + /* Wrap */ + enddiscard += (4 + endcolumni - startcolumni)*32; + diff_128 = (block_diff_snp_128_wrap_lo)(query_shifted,alt_ptr,ref_ptr,plusp,genestrand,query_unk_mismatch_local_p, + startcolumni); + diff_128 = clear_start_128(diff_128,startdiscard); + diff_128 = clear_end_128(diff_128,enddiscard); + + while (nonzero_p_128(diff_128) && nmismatches <= max_mismatches) { + mismatch_positions[nmismatches++] = offset + (relpos = count_trailing_zeroes_128(diff_128)); + diff_128 = clear_lowbit_128(diff_128,relpos); + } + return nmismatches; #endif -#if defined(WORDS_BIGENDIAN) || !defined(HAVE_SSE2) - startblocki = startblocki_32; - endblocki = endblocki_32; -#endif + } else { + /* Start block */ + diff_32 = (block_diff_snp_32)(query_shifted,alt_ptr,ref_ptr,plusp,genestrand,query_unk_mismatch_local_p); + diff_32 = clear_start_32(diff_32,startdiscard); - startdiscard = (left+pos5) % STEP_SIZE; - enddiscard = (left+pos3) % STEP_SIZE; - offset = (pos3 - 1) - enddiscard + STEP_SIZE; - - debug(printf("nshift = %d, startdiscard = %u, enddiscard = %u, offset = %d\n", - nshift,startdiscard,enddiscard,offset)); -#ifndef DEBUG14 - } + while (nonzero_p_32(diff_32) && nmismatches <= max_mismatches) { + mismatch_positions[nmismatches++] = offset + (relpos = count_trailing_zeroes_32(diff_32)); + diff_32 = clear_lowbit_32(diff_32,relpos); + } + if (nmismatches > max_mismatches) { + return nmismatches; + } + + query_shifted += QUERY_NEXTCOL; ref_ptr += GENOME_NEXTCOL; alt_ptr += GENOME_NEXTCOL; + offset += 32; + + /* Start row */ + while (++startcolumni < 4) { + diff_32 = (block_diff_snp_32)(query_shifted,alt_ptr,ref_ptr,plusp,genestrand,query_unk_mismatch_local_p); + + while (nonzero_p_32(diff_32) && nmismatches <= max_mismatches) { + mismatch_positions[nmismatches++] = offset + (relpos = count_trailing_zeroes_32(diff_32)); + diff_32 = clear_lowbit_32(diff_32,relpos); + } + if (nmismatches > max_mismatches) { + return nmismatches; + } + + query_shifted += QUERY_NEXTCOL; ref_ptr += GENOME_NEXTCOL; alt_ptr += GENOME_NEXTCOL; + offset += 32; + } +#ifdef HAVE_SSE2 + query_shifted += QUERY_NEXTROW; #endif + ref_ptr += GENOME_NEXTROW; alt_ptr += GENOME_NEXTROW; -#if defined(WORDS_BIGENDIAN) || !defined(HAVE_SSE2) -#else - if (startblocki == endblocki) { - diff = (block_diff_snp)(query_shifted,&(snp_blocks[endblocki]),&(ref_blocks[endblocki]), - plusp,genestrand,query_unk_mismatch_local_p); - diff = clear_start(diff,startdiscard); - diff = clear_end(diff,enddiscard); + /* Middle rows */ +#ifdef HAVE_AVX2 + while (ref_ptr + 24 <= endptr) { + diff_256 = (block_diff_snp_256)(query_shifted,alt_ptr,ref_ptr,plusp,genestrand,query_unk_mismatch_local_p); - while (nonzero_p(diff) && nmismatches_both <= max_mismatches) { - mismatch_positions[nmismatches_both++] = offset - (relpos = count_leading_zeroes(diff)); - debug(print_diff_leading_zeroes(diff,offset)); - diff = clear_highbit(diff,relpos); + while (nonzero_p_256(diff_256) && nmismatches <= max_mismatches) { + mismatch_positions[nmismatches++] = offset + (relpos = count_trailing_zeroes_256(diff_256)); + diff_256 = clear_lowbit_256(diff_256,relpos); + } + if (nmismatches > max_mismatches) { + return nmismatches; + } + + query_shifted += 24; ref_ptr += 24; alt_ptr += 24; + offset += 256; } - debug14(if (startblocki_32 == endblocki_32) assert(answer == nmismatches_both)); - return nmismatches_both; - - } else { #endif - /* Endblock */ - diff = (block_diff_snp)(query_shifted,&(snp_blocks[endblocki]),&(ref_blocks[endblocki]), - plusp,genestrand,query_unk_mismatch_local_p); - diff = clear_end(diff,enddiscard); +#ifdef HAVE_SSE2 + while (ref_ptr + 12 <= endptr) { + diff_128 = (block_diff_snp_128)(query_shifted,alt_ptr,ref_ptr,plusp,genestrand,query_unk_mismatch_local_p); - while (nonzero_p(diff) && nmismatches_both <= max_mismatches) { - mismatch_positions[nmismatches_both++] = offset - (relpos = count_leading_zeroes(diff)); - debug(print_diff_leading_zeroes(diff,offset)); - diff = clear_highbit(diff,relpos); - } - if (nmismatches_both > max_mismatches) { - debug14(if (startblocki_32 == endblocki_32) assert(answer == nmismatches_both)); - return nmismatches_both; + while (nonzero_p_128(diff_128) && nmismatches <= max_mismatches) { + mismatch_positions[nmismatches++] = offset + (relpos = count_trailing_zeroes_128(diff_128)); + diff_128 = clear_lowbit_128(diff_128,relpos); + } + if (nmismatches > max_mismatches) { + return nmismatches; + } + + query_shifted += 12; ref_ptr += 12; alt_ptr += 12; + offset += 128; } - - query_shifted -= COMPRESS_BLOCKSIZE; -#if defined(WORDS_BIGENDIAN) || !defined(HAVE_SSE2) - ref_ptr = &(ref_blocks[endblocki]); - alt_ptr = &(snp_blocks[endblocki]); - ref_ptr -= 1; alt_ptr -= 1; if (endcolumni-- == 0) {ref_ptr -= 8; alt_ptr -= 8; endcolumni = 3;} #else - ref_ptr = &(ref_blocks[endblocki-12]); - alt_ptr = &(snp_blocks[endblocki-12]); + while (ref_ptr + 12 <= endptr) { + for (startcolumni = 0; startcolumni < 4; startcolumni++) { + diff_32 = (block_diff_snp_32)(query_shifted,alt_ptr,ref_ptr,plusp,genestrand,query_unk_mismatch_local_p); + + while (nonzero_p_32(diff_32) && nmismatches <= max_mismatches) { + mismatch_positions[nmismatches++] = offset + (relpos = count_trailing_zeroes_32(diff_32)); + diff_32 = clear_lowbit_32(diff_32,relpos); + } + if (nmismatches > max_mismatches) { + return nmismatches; + } + + query_shifted += QUERY_NEXTCOL; ref_ptr += GENOME_NEXTCOL; alt_ptr += GENOME_NEXTCOL; + offset += 32; + } + /* query_shifted += QUERY_NEXTROW; */ ref_ptr += GENOME_NEXTROW; alt_ptr += GENOME_NEXTROW; + } #endif - start = &(ref_blocks[startblocki]); - offset -= STEP_SIZE; /* 128 or 32 */ - while (ref_ptr > start) { - diff = (block_diff_snp)(query_shifted,alt_ptr,ref_ptr,plusp,genestrand,query_unk_mismatch_local_p); - while (nonzero_p(diff) && nmismatches_both <= max_mismatches) { - mismatch_positions[nmismatches_both++] = offset - (relpos = count_leading_zeroes(diff)); - debug(print_diff_leading_zeroes(diff,offset)); - diff = clear_highbit(diff,relpos); + /* End row */ + while (ref_ptr < endptr) { + diff_32 = (block_diff_snp_32)(query_shifted,alt_ptr,ref_ptr,plusp,genestrand,query_unk_mismatch_local_p); + + while (nonzero_p_32(diff_32) && nmismatches <= max_mismatches) { + mismatch_positions[nmismatches++] = offset + (relpos = count_trailing_zeroes_32(diff_32)); + diff_32 = clear_lowbit_32(diff_32,relpos); } - if (nmismatches_both > max_mismatches) { - debug14(if (startblocki_32 == endblocki_32) assert(answer == nmismatches_both)); - return nmismatches_both; + if (nmismatches > max_mismatches) { + return nmismatches; } - - query_shifted -= COMPRESS_BLOCKSIZE; -#if defined(WORDS_BIGENDIAN) || !defined(HAVE_SSE2) - ref_ptr -= 1; alt_ptr -= 1; if (endcolumni-- == 0) {ref_ptr -= 8; alt_ptr -= 8; endcolumni = 3;} -#else - ref_ptr -= 12; alt_ptr -= 12; -#endif - offset -= STEP_SIZE; /* 128 or 32 */ + + query_shifted += QUERY_NEXTCOL; ref_ptr += GENOME_NEXTCOL; alt_ptr += GENOME_NEXTCOL; + offset += 32; } - /* Startblock */ - diff = (block_diff_snp)(query_shifted,alt_ptr,ref_ptr,plusp,genestrand,query_unk_mismatch_local_p); - diff = clear_start(diff,startdiscard); + /* End block */ + diff_32 = (block_diff_snp_32)(query_shifted,alt_ptr,ref_ptr,plusp,genestrand,query_unk_mismatch_local_p); + diff_32 = clear_end_32(diff_32,enddiscard); - while (nonzero_p(diff) && nmismatches_both <= max_mismatches) { - mismatch_positions[nmismatches_both++] = offset - (relpos = count_leading_zeroes(diff)); - debug(print_diff_leading_zeroes(diff,offset)); - diff = clear_highbit(diff,relpos); + while (nonzero_p_32(diff_32) && nmismatches <= max_mismatches) { + mismatch_positions[nmismatches++] = offset + (relpos = count_trailing_zeroes_32(diff_32)); + diff_32 = clear_lowbit_32(diff_32,relpos); } - - debug14(if (startblocki_32 == endblocki_32) assert(answer == nmismatches_both)); - return nmismatches_both; - -#if defined(WORDS_BIGENDIAN) || !defined(HAVE_SSE2) -#else + return nmismatches; } -#endif } -/* Returns mismatch_positions[0..nmismatches], where nmismatches <= max_mismatches */ +/* Returns mismatch_positions[0..nmismatches], where nmismatches <= max_mismatches + 1 */ +/* If request max_mismatches 3, could return m0, m1, m2, m3, m4 */ int -Genome_mismatches_right (int *mismatch_positions, int max_mismatches, Compress_T query_compress, - Univcoord_T left, int pos5, int pos3, bool plusp, int genestrand) { +Genome_mismatches_left (int *mismatch_positions, int max_mismatches, Compress_T query_compress, + Univcoord_T left, int pos5, int pos3, bool plusp, int genestrand) { int nmismatches; #ifdef DEBUG int i; @@ -21418,37 +24013,42 @@ #if 0 if (dibasep) { - debug(printf("Dibase_mismatches_right from %u+%d to %u+%d:\n",left,pos5,left,pos3)); + debug(printf("Dibase_mismatches_left from %u+%d to %u+%d:\n",left,pos5,left,pos3)); + + nmismatches = Dibase_mismatches_left(&(*mismatch_positions),&(*colordiffs),max_mismatches,query, + pos5,pos3,/*startpos*/left+pos5,/*endpos*/left+pos3); + mismatch_positions[nmismatches] = pos3 + 1; /* Need +1 because of starting assumed nt */ - nmismatches = Dibase_mismatches_right(&(*mismatch_positions),&(*colordiffs),max_mismatches,query, - pos5,pos3,/*startpos*/left+pos5,/*endpos*/left+pos3); } #endif if (snp_blocks == NULL) { - nmismatches = mismatches_right(&(*mismatch_positions),max_mismatches,query_compress, - left,pos5,pos3,plusp,genestrand,query_unk_mismatch_p); + nmismatches = mismatches_left(&(*mismatch_positions),max_mismatches,query_compress, + left,pos5,pos3,plusp,genestrand,query_unk_mismatch_p); + mismatch_positions[nmismatches] = pos3; } else { - nmismatches = mismatches_right_snps(&(*mismatch_positions),max_mismatches,query_compress, - left,pos5,pos3,plusp,genestrand,query_unk_mismatch_p); + nmismatches = mismatches_left_snps(&(*mismatch_positions),max_mismatches,query_compress, + left,pos5,pos3,plusp,genestrand,query_unk_mismatch_p); + mismatch_positions[nmismatches] = pos3; } - mismatch_positions[nmismatches] = -1; debug( - printf("%d mismatches on right: ",nmismatches); + printf("%d mismatches on left: ",nmismatches); for (i = 0; i <= nmismatches; i++) { printf("%d ",mismatch_positions[i]); } printf("\n"); ); + return nmismatches; } -/* Returns mismatch_positions[0..nmismatches], where nmismatches <= max_mismatches */ +/* Returns mismatch_positions[0..nmismatches], where nmismatches <= max_mismatches + 1 */ +/* If request max_mismatches 3, could return m0, m1, m2, m3, m4 */ /* See note above about why we set query_unk_mismatch_p to false */ int -Genome_mismatches_right_trim (int *mismatch_positions, int max_mismatches, Compress_T query_compress, - Univcoord_T left, int pos5, int pos3, bool plusp, int genestrand) { +Genome_mismatches_left_trim (int *mismatch_positions, int max_mismatches, Compress_T query_compress, + Univcoord_T left, int pos5, int pos3, bool plusp, int genestrand) { int nmismatches; #ifdef DEBUG int i; @@ -21456,64 +24056,65 @@ #if 0 if (dibasep) { - debug(printf("Dibase_mismatches_right from %u+%d to %u+%d:\n",left,pos5,left,pos3)); + debug(printf("Dibase_mismatches_left from %u+%d to %u+%d:\n",left,pos5,left,pos3)); + + nmismatches = Dibase_mismatches_left(&(*mismatch_positions),&(*colordiffs),max_mismatches,query, + pos5,pos3,/*startpos*/left+pos5,/*endpos*/left+pos3); + mismatch_positions[nmismatches] = pos3 + 1; /* Need +1 because of starting assumed nt */ - nmismatches = Dibase_mismatches_right(&(*mismatch_positions),&(*colordiffs),max_mismatches,query, - pos5,pos3,/*startpos*/left+pos5,/*endpos*/left+pos3); } #endif if (snp_blocks == NULL) { - nmismatches = mismatches_right(&(*mismatch_positions),max_mismatches,query_compress, - left,pos5,pos3,plusp,genestrand,/*query_unk_mismatch_p*/false); + nmismatches = mismatches_left(&(*mismatch_positions),max_mismatches,query_compress, + left,pos5,pos3,plusp,genestrand,/*query_unk_mismatch_p*/false); + mismatch_positions[nmismatches] = pos3; } else { - nmismatches = mismatches_right_snps(&(*mismatch_positions),max_mismatches,query_compress, - left,pos5,pos3,plusp,genestrand,/*query_unk_mismatch_p*/false); + nmismatches = mismatches_left_snps(&(*mismatch_positions),max_mismatches,query_compress, + left,pos5,pos3,plusp,genestrand,/*query_unk_mismatch_p*/false); + mismatch_positions[nmismatches] = pos3; } - mismatch_positions[nmismatches] = -1; debug( - printf("%d mismatches on right: ",nmismatches); + printf("%d mismatches on left: ",nmismatches); for (i = 0; i <= nmismatches; i++) { printf("%d ",mismatch_positions[i]); } printf("\n"); ); + return nmismatches; } -/************************************************************************ - * Marking - ************************************************************************/ - -/* Derived from mismatches_left() */ -int -Genome_mark_mismatches_ref (char *genomic, int querylength, Compress_T query_compress, - Univcoord_T left, int pos5, int pos3, - bool plusp, int genestrand) { -#ifdef DEBUG14 - int answer; -#endif - int mismatch_position; - int nmismatches = 0, offset, nshift; +static int +mismatches_right (int *mismatch_positions, int max_mismatches, Compress_T query_compress, + Univcoord_T left, int pos5, int pos3, bool plusp, int genestrand, + bool query_unk_mismatch_local_p) { + int nmismatches = 0, offset, relpos, nshift; int startdiscard, enddiscard; Univcoord_T startblocki, endblocki, startblocki_32, endblocki_32; - Genomecomp_T *query_shifted, *ptr, *end; - UINT4 diff_32; - Genomediff_T diff; - int relpos; + Genomecomp_T *query_shifted, *ptr, *startptr; +#ifndef HAVE_BUILTIN_CLZ + Genomecomp_T top; +#endif int startcolumni, endcolumni; + UINT4 diff_32; +#ifdef HAVE_SSE2 + __m128i diff_128; +#endif +#ifdef HAVE_AVX2 + __m256i diff_256; +#endif - debug5( + debug( printf("\n\n"); - printf("genomic = %s\n",genomic); - printf("Genome (in mark_mismatches_ref):\n"); + printf("Entered mismatches_right with %d max_mismatches\n",max_mismatches); + printf("Genome (in mismatches_right):\n"); Genome_print_blocks(ref_blocks,left+pos5,left+pos3); printf("\n"); ); - startblocki = (left+pos5)/128U*12; startcolumni = ((left+pos5) % 128) / 32; startblocki_32 = startblocki + startcolumni; @@ -21522,211 +24123,257 @@ endcolumni = ((left+pos3) % 128) / 32; endblocki_32 = endblocki + endcolumni; - debug5(printf("left = %u, pos5 = %d, pos3 = %d, startblocki = %u, endblocki = %u, plusp %d, step_size %d\n", - left,pos5,pos3,startblocki,endblocki,plusp,STEP_SIZE)); + debug(printf("left = %u, pos5 = %d, pos3 = %d, startblocki = %u, endblocki = %u\n", + left,pos5,pos3,startblocki,endblocki)); nshift = left % STEP_SIZE; query_shifted = Compress_shift(query_compress,nshift); - debug5(printf("Query shifted %d:\n",nshift)); - debug5(Compress_print_blocks(query_shifted,nshift,pos5,pos3)); - query_shifted += (nshift+pos5)/STEP_SIZE*COMPRESS_BLOCKSIZE; - - if (endblocki_32 == startblocki_32) { - startdiscard = (left+pos5) % 32; - enddiscard = (left+pos3) % 32; -#if 0 - if (plusp == true) { - offset = -startdiscard + pos5 /*+ mismatch_offset*/; - } else { - offset = -startdiscard + pos5 /*- mismatch_offset*/; - } + debug(printf("Query shifted %d:\n",nshift)); + debug(Compress_print_blocks(query_shifted,nshift,pos5,pos3)); + query_shifted += (nshift+pos3)/STEP_SIZE*COMPRESS_BLOCKSIZE; +#if defined(WORDS_BIGENDIAN) || !defined(HAVE_SSE2) #else - offset = -startdiscard + pos5; + query_shifted += endcolumni; #endif - debug5(printf("nshift = %d, startdiscard = %u, enddiscard = %u\n",nshift,startdiscard,enddiscard)); + startdiscard = (left+pos5) % 32; + enddiscard = (left+pos3) % 32; + offset = (pos3 - 1) - enddiscard + 32; + ptr = &(ref_blocks[endblocki_32]); + startptr = &(ref_blocks[startblocki_32]); -#if defined(WORDS_BIGENDIAN) || !defined(HAVE_SSE2) - diff_32 = (block_diff_32)(query_shifted,&(ref_blocks[startblocki_32]), - plusp,genestrand,query_unk_mismatch_p); -#else - diff_32 = (block_diff_32)(query_shifted + startcolumni,&(ref_blocks[startblocki_32]), - plusp,genestrand,query_unk_mismatch_p); -#endif + if (startblocki_32 == endblocki_32) { + /* Single block */ + debug(printf("nshift = %d, startdiscard = %u, enddiscard = %u\n",nshift,startdiscard,enddiscard)); + + diff_32 = (block_diff_32)(query_shifted,ptr,plusp,genestrand,query_unk_mismatch_local_p); diff_32 = clear_start_32(diff_32,startdiscard); diff_32 = clear_end_32(diff_32,enddiscard); - while (nonzero_p_32(diff_32)) { - mismatch_position = offset + (relpos = count_trailing_zeroes_32(diff_32)); - diff_32 = clear_lowbit_32(diff_32,relpos); - if (plusp == false) { - mismatch_position = (querylength - 1) - mismatch_position; - } - genomic[mismatch_position] = tolower(genomic[mismatch_position]); - nmismatches++; + while (nonzero_p_32(diff_32) && nmismatches <= max_mismatches) { + mismatch_positions[nmismatches++] = offset - (relpos = count_leading_zeroes_32(diff_32)); + diff_32 = clear_highbit_32(diff_32,relpos); } - debug5(printf("genomic = %s\n",genomic)); -#ifdef DEBUG14 - answer = nmismatches; - nmismatches = 0; + return nmismatches; + + } else if (startblocki == endblocki) { +#if defined(USE_SHIFT_MISMATCH_POSITIONS) && defined(HAVE_SSE2) + /* Shift */ + startdiscard += 96 - (endcolumni - startcolumni)*32; + enddiscard += 96; + diff_128 = (block_diff_128_shift_hi)(query_shifted,ptr,plusp,genestrand,query_unk_mismatch_local_p, + endcolumni); + diff_128 = clear_start_128(diff_128,startdiscard); + diff_128 = clear_end_128(diff_128,enddiscard); + + while (nonzero_p_128(diff_128) && nmismatches <= max_mismatches) { + mismatch_positions[nmismatches++] = offset - (relpos = count_leading_zeroes_128(diff_128)); + diff_128 = clear_highbit_128(diff_128,relpos); + } + return nmismatches; + #else + /* End block */ + diff_32 = (block_diff_32)(query_shifted,ptr,plusp,genestrand,query_unk_mismatch_local_p); + diff_32 = clear_end_32(diff_32,enddiscard); + + while (nonzero_p_32(diff_32) && nmismatches <= max_mismatches) { + mismatch_positions[nmismatches++] = offset - (relpos = count_leading_zeroes_32(diff_32)); + diff_32 = clear_highbit_32(diff_32,relpos); + } + if (nmismatches > max_mismatches) { + return nmismatches; + } + + query_shifted -= QUERY_NEXTCOL; ptr -= GENOME_NEXTCOL; + offset -= 32; + + /* Single row */ + while (--endcolumni > startcolumni) { + diff_32 = (block_diff_32)(query_shifted,ptr,plusp,genestrand,query_unk_mismatch_local_p); + + while (nonzero_p_32(diff_32) && nmismatches <= max_mismatches) { + mismatch_positions[nmismatches++] = offset - (relpos = count_leading_zeroes_32(diff_32)); + diff_32 = clear_highbit_32(diff_32,relpos); + } + if (nmismatches > max_mismatches) { + return nmismatches; + } + + query_shifted -= QUERY_NEXTCOL; ptr -= GENOME_NEXTCOL; + offset -= 32; + } + + /* Start block */ + diff_32 = (block_diff_32)(query_shifted,ptr,plusp,genestrand,query_unk_mismatch_local_p); + diff_32 = clear_start_32(diff_32,startdiscard); + + while (nonzero_p_32(diff_32) && nmismatches <= max_mismatches) { + mismatch_positions[nmismatches++] = offset - (relpos = count_leading_zeroes_32(diff_32)); + diff_32 = clear_highbit_32(diff_32,relpos); + } return nmismatches; #endif - } -#ifndef DEBUG14 - else { +#if defined(USE_WRAP_MISMATCH_POSITIONS) && defined(HAVE_SSSE3) + } else if (endblocki == startblocki + 12 && endcolumni < startcolumni) { + /* Wrap */ + startdiscard += (startcolumni - endcolumni - 1)*32; + enddiscard += 96; + diff_128 = (block_diff_128_wrap_hi)(query_shifted,ptr,plusp,genestrand,query_unk_mismatch_local_p, + endcolumni); + diff_128 = clear_start_128(diff_128,startdiscard); + diff_128 = clear_end_128(diff_128,enddiscard); + + while (nonzero_p_128(diff_128) && nmismatches <= max_mismatches) { + mismatch_positions[nmismatches++] = offset - (relpos = count_leading_zeroes_128(diff_128)); + diff_128 = clear_highbit_128(diff_128,relpos); + } + return nmismatches; #endif -#if defined(WORDS_BIGENDIAN) || !defined(HAVE_SSE2) - startblocki = startblocki_32; - endblocki = endblocki_32; -#endif + } else { + /* End block */ + diff_32 = (block_diff_32)(query_shifted,ptr,plusp,genestrand,query_unk_mismatch_local_p); + diff_32 = clear_end_32(diff_32,enddiscard); - startdiscard = (left+pos5) % STEP_SIZE; - enddiscard = (left+pos3) % STEP_SIZE; -#if 0 - if (plusp == true) { - offset = -startdiscard + pos5 /*+ mismatch_offset*/; - } else { - offset = -startdiscard + pos5 /*- mismatch_offset*/; + while (nonzero_p_32(diff_32) && nmismatches <= max_mismatches) { + mismatch_positions[nmismatches++] = offset - (relpos = count_leading_zeroes_32(diff_32)); + diff_32 = clear_highbit_32(diff_32,relpos); } -#else - offset = -startdiscard + pos5; -#endif - - debug5(printf("nshift = %d, startdiscard = %u, enddiscard = %u\n",nshift,startdiscard,enddiscard)); + if (nmismatches > max_mismatches) { + return nmismatches; + } + query_shifted -= QUERY_NEXTCOL; ptr -= GENOME_NEXTCOL; + offset -= 32; -#ifndef DEBUG14 - } + /* End row */ + while (--endcolumni >= 0) { + diff_32 = (block_diff_32)(query_shifted,ptr,plusp,genestrand,query_unk_mismatch_local_p); + + while (nonzero_p_32(diff_32) && nmismatches <= max_mismatches) { + mismatch_positions[nmismatches++] = offset - (relpos = count_leading_zeroes_32(diff_32)); + diff_32 = clear_highbit_32(diff_32,relpos); + } + if (nmismatches > max_mismatches) { + return nmismatches; + } + query_shifted -= QUERY_NEXTCOL; ptr -= GENOME_NEXTCOL; + offset -= 32; + } +#ifdef HAVE_SSE2 + query_shifted -= QUERY_NEXTROW; #endif + ptr -= GENOME_NEXTROW; -#if defined(WORDS_BIGENDIAN) || !defined(HAVE_SSE2) -#else - if (endblocki == startblocki) { - diff = (block_diff)(query_shifted,&(ref_blocks[startblocki]), - plusp,genestrand,query_unk_mismatch_p); - diff = clear_start(diff,startdiscard); - diff = clear_end(diff,enddiscard); - - while (nonzero_p(diff)) { - mismatch_position = offset + (relpos = count_trailing_zeroes(diff)); - debug5(print_diff_trailing_zeroes(diff,offset)); - diff = clear_lowbit(diff,relpos); - if (plusp == false) { - mismatch_position = (querylength - 1) - mismatch_position; + /* Middle rows */ +#ifdef HAVE_AVX2 + while (ptr >= startptr + 24) { + diff_256 = (block_diff_256)(&(query_shifted[-15]),&(ptr[-15]),plusp,genestrand,query_unk_mismatch_local_p); + + while (nonzero_p_256(diff_256) && nmismatches <= max_mismatches) { + mismatch_positions[nmismatches++] = offset - (relpos = count_leading_zeroes_256(diff_256)); + diff_256 = clear_highbit_256(diff_256,relpos); + } + if (nmismatches > max_mismatches) { + return nmismatches; } - genomic[mismatch_position] = tolower(genomic[mismatch_position]); - nmismatches++; + query_shifted -= 24; ptr -= 24; + offset -= 256; } - debug5(printf("genomic = %s\n",genomic)); - debug14(if (endblocki_32 == startblocki) assert(answer == nmismatches)); - return nmismatches; - - } else { #endif - /* Startblock */ - diff = (block_diff)(query_shifted,&(ref_blocks[startblocki]), - plusp,genestrand,query_unk_mismatch_p); - diff = clear_start(diff,startdiscard); - - while (nonzero_p(diff)) { - mismatch_position = offset + (relpos = count_trailing_zeroes(diff)); - debug5(print_diff_trailing_zeroes(diff,offset)); - diff = clear_lowbit(diff,relpos); - if (plusp == false) { - mismatch_position = (querylength - 1) - mismatch_position; +#ifdef HAVE_SSE2 + while (ptr >= startptr + 12) { + diff_128 = (block_diff_128)(&(query_shifted[-3]),&(ptr[-3]),plusp,genestrand,query_unk_mismatch_local_p); + + while (nonzero_p_128(diff_128) && nmismatches <= max_mismatches) { + mismatch_positions[nmismatches++] = offset - (relpos = count_leading_zeroes_128(diff_128)); + diff_128 = clear_highbit_128(diff_128,relpos); } - genomic[mismatch_position] = tolower(genomic[mismatch_position]); - nmismatches++; + if (nmismatches > max_mismatches) { + return nmismatches; + } + query_shifted -= 12; ptr -= 12; + offset -= 128; } - - query_shifted += COMPRESS_BLOCKSIZE; -#if defined(WORDS_BIGENDIAN) || !defined(HAVE_SSE2) - ptr = &(ref_blocks[startblocki]); - ptr += 1; if (++startcolumni == 4) {ptr += 8; startcolumni = 0;} #else - ptr = &(ref_blocks[startblocki+12]); -#endif - end = &(ref_blocks[endblocki]); - offset += STEP_SIZE; /* 128 or 32 */ - while (ptr < end) { - diff = (block_diff)(query_shifted,ptr,plusp,genestrand,query_unk_mismatch_p); - - while (nonzero_p(diff)) { - mismatch_position = offset + (relpos = count_trailing_zeroes(diff)); - debug5(print_diff_trailing_zeroes(diff,offset)); - diff = clear_lowbit(diff,relpos); - if (plusp == false) { - mismatch_position = (querylength - 1) - mismatch_position; + while (ptr >= startptr + 12) { + for (endcolumni = 3; endcolumni >= 0; --endcolumni) { + diff_32 = (block_diff_32)(query_shifted,ptr,plusp,genestrand,query_unk_mismatch_local_p); + + while (nonzero_p_32(diff_32) && nmismatches <= max_mismatches) { + mismatch_positions[nmismatches++] = offset - (relpos = count_leading_zeroes_32(diff_32)); + diff_32 = clear_highbit_32(diff_32,relpos); } - genomic[mismatch_position] = tolower(genomic[mismatch_position]); - nmismatches++; + if (nmismatches > max_mismatches) { + return nmismatches; + } + query_shifted -= QUERY_NEXTCOL; ptr -= GENOME_NEXTCOL; + offset -= 32; } - - query_shifted += COMPRESS_BLOCKSIZE; -#if defined(WORDS_BIGENDIAN) || !defined(HAVE_SSE2) - ptr += 1; if (++startcolumni == 4) {ptr += 8; startcolumni = 0;} -#else - ptr += 12; -#endif - offset += STEP_SIZE; /* 128 or 32 */ + /* query_shifted -= QUERY_NEXTROW; */ ptr -= GENOME_NEXTROW; } +#endif - /* Endblock */ - diff = (block_diff)(query_shifted,ptr,plusp,genestrand,query_unk_mismatch_p); - diff = clear_end(diff,enddiscard); + /* Start row */ + while (ptr > startptr) { + diff_32 = (block_diff_32)(query_shifted,ptr,plusp,genestrand,query_unk_mismatch_local_p); - while (nonzero_p(diff)) { - mismatch_position = offset + (relpos = count_trailing_zeroes(diff)); - debug5(print_diff_trailing_zeroes(diff,offset)); - diff = clear_lowbit(diff,relpos); - if (plusp == false) { - mismatch_position = (querylength - 1) - mismatch_position; + while (nonzero_p_32(diff_32) && nmismatches <= max_mismatches) { + mismatch_positions[nmismatches++] = offset - (relpos = count_leading_zeroes_32(diff_32)); + diff_32 = clear_highbit_32(diff_32,relpos); } - genomic[mismatch_position] = tolower(genomic[mismatch_position]); - nmismatches++; + if (nmismatches > max_mismatches) { + return nmismatches; + } + query_shifted -= QUERY_NEXTCOL; ptr -= GENOME_NEXTCOL; + offset -= 32; } - debug5(printf("genomic = %s\n",genomic)); - debug14(if (endblocki_32 == startblocki) assert(answer == nmismatches)); - return nmismatches; -#if defined(WORDS_BIGENDIAN) || !defined(HAVE_SSE2) -#else + /* Start block */ + diff_32 = (block_diff_32)(query_shifted,ptr,plusp,genestrand,query_unk_mismatch_local_p); + diff_32 = clear_start_32(diff_32,startdiscard); + + while (nonzero_p_32(diff_32) && nmismatches <= max_mismatches) { + mismatch_positions[nmismatches++] = offset - (relpos = count_leading_zeroes_32(diff_32)); + diff_32 = clear_highbit_32(diff_32,relpos); + } + return nmismatches; } -#endif } -/* Derived from mismatches_left_snps() */ +/* Returns nmismatches_both */ static int -mark_mismatches_snps (char *genomic, int querylength, Compress_T query_compress, - Univcoord_T left, int pos5, int pos3, - bool plusp, int genestrand) { -#ifdef DEBUG14 - int answer; -#endif - int mismatch_position; - int nmismatches_both = 0, offset, nshift; +mismatches_right_snps (int *mismatch_positions, int max_mismatches, Compress_T query_compress, + Univcoord_T left, int pos5, int pos3, bool plusp, int genestrand, + bool query_unk_mismatch_local_p) { + int nmismatches = 0, offset, relpos, nshift; int startdiscard, enddiscard; Univcoord_T startblocki, endblocki, startblocki_32, endblocki_32; - Genomecomp_T *query_shifted, *ref_ptr, *alt_ptr, *end; - UINT4 diff_32; - Genomediff_T diff; - int relpos; + Genomecomp_T *query_shifted, *ref_ptr, *alt_ptr, *startptr; +#ifndef HAVE_BUILTIN_CLZ + Genomecomp_T top; +#endif int startcolumni, endcolumni; + UINT4 diff_32; +#ifdef HAVE_SSE2 + __m128i diff_128; +#endif +#ifdef HAVE_AVX2 + __m256i diff_256; +#endif - debug5( + debug( printf("\n\n"); - printf("genomic = %s\n",genomic); - printf("Genome (in mark_mismatches_snps):\n"); - Genome_print_blocks_snp(ref_blocks,snp_blocks,left+pos5,left+pos3); + printf("Entered mismatches_right_snps with %d max_mismatches\n",max_mismatches); + printf("Genome (in mismatches_right):\n"); + Genome_print_blocks(ref_blocks,left+pos5,left+pos3); printf("\n"); ); - startblocki = (left+pos5)/128U*12; startcolumni = ((left+pos5) % 128) / 32; startblocki_32 = startblocki + startcolumni; @@ -21735,241 +24382,334 @@ endcolumni = ((left+pos3) % 128) / 32; endblocki_32 = endblocki + endcolumni; - debug5(printf("left = %u, pos5 = %d, pos3 = %d, startblocki = %u, endblocki = %u\n", + debug(printf("left = %u, pos5 = %d, pos3 = %d, startblocki = %u, endblocki = %u\n", left,pos5,pos3,startblocki,endblocki)); nshift = left % STEP_SIZE; query_shifted = Compress_shift(query_compress,nshift); - debug5(printf("Query shifted %d:\n",nshift)); - debug5(Compress_print_blocks(query_shifted,nshift,pos5,pos3)); - query_shifted += (nshift+pos5)/STEP_SIZE*COMPRESS_BLOCKSIZE; - - if (endblocki_32 == startblocki_32) { - startdiscard = (left+pos5) % 32; - enddiscard = (left+pos3) % 32; -#if 0 - if (plusp == true) { - offset = -startdiscard + pos5 /*+ mismatch_offset*/; - } else { - offset = -startdiscard + pos5 /*- mismatch_offset*/; - } + debug(printf("Query shifted %d:\n",nshift)); + debug(Compress_print_blocks(query_shifted,nshift,pos5,pos3)); + query_shifted += (nshift+pos3)/STEP_SIZE*COMPRESS_BLOCKSIZE; +#if defined(WORDS_BIGENDIAN) || !defined(HAVE_SSE2) #else - offset = -startdiscard + pos5; + query_shifted += endcolumni; #endif - debug5(printf("nshift = %d, startdiscard = %u, enddiscard = %u\n",nshift,startdiscard,enddiscard)); + startdiscard = (left+pos5) % 32; + enddiscard = (left+pos3) % 32; + offset = (pos3 - 1) - enddiscard + 32; + ref_ptr = &(ref_blocks[endblocki_32]); + alt_ptr = &(snp_blocks[endblocki_32]); + startptr = &(ref_blocks[startblocki_32]); -#if defined(WORDS_BIGENDIAN) || !defined(HAVE_SSE2) - diff_32 = (block_diff_snp_32)(query_shifted,&(snp_blocks[startblocki_32]),&(ref_blocks[startblocki_32]), - plusp,genestrand,query_unk_mismatch_p); -#else - diff_32 = (block_diff_snp_32)(query_shifted + startcolumni,&(snp_blocks[startblocki_32]),&(ref_blocks[startblocki_32]), - plusp,genestrand,query_unk_mismatch_p); -#endif + if (startblocki_32 == endblocki_32) { + /* Single block */ + debug(printf("nshift = %d, startdiscard = %u, enddiscard = %u\n",nshift,startdiscard,enddiscard)); + + diff_32 = (block_diff_snp_32)(query_shifted,alt_ptr,ref_ptr,plusp,genestrand,query_unk_mismatch_local_p); diff_32 = clear_start_32(diff_32,startdiscard); diff_32 = clear_end_32(diff_32,enddiscard); - while (nonzero_p_32(diff_32)) { - mismatch_position = offset + (relpos = count_trailing_zeroes_32(diff_32)); - diff_32 = clear_lowbit_32(diff_32,relpos); - if (plusp == false) { - mismatch_position = (querylength - 1) - mismatch_position; - } - genomic[mismatch_position] = tolower(genomic[mismatch_position]); - nmismatches_both++; + while (nonzero_p_32(diff_32) && nmismatches <= max_mismatches) { + mismatch_positions[nmismatches++] = offset - (relpos = count_leading_zeroes_32(diff_32)); + diff_32 = clear_highbit_32(diff_32,relpos); } - debug5(printf("genomic = %s\n",genomic)); -#ifdef DEBUG14 - answer = nmismatches_both; - nmismatches_both = 0; + return nmismatches; + + } else if (startblocki == endblocki) { +#if defined(USE_SHIFT_MISMATCH_POSITIONS) && defined(HAVE_SSE2) + /* Shift */ + startdiscard += 96 - (endcolumni - startcolumni)*32; + enddiscard += 96; + diff_128 = (block_diff_snp_128_shift_hi)(query_shifted,alt_ptr,ref_ptr,plusp,genestrand,query_unk_mismatch_local_p, + endcolumni); + diff_128 = clear_start_128(diff_128,startdiscard); + diff_128 = clear_end_128(diff_128,enddiscard); + + while (nonzero_p_128(diff_128) && nmismatches <= max_mismatches) { + mismatch_positions[nmismatches++] = offset - (relpos = count_leading_zeroes_128(diff_128)); + diff_128 = clear_highbit_128(diff_128,relpos); + } + return nmismatches; + #else - return nmismatches_both; -#endif + /* End block */ + diff_32 = (block_diff_snp_32)(query_shifted,alt_ptr,ref_ptr,plusp,genestrand,query_unk_mismatch_local_p); + diff_32 = clear_end_32(diff_32,enddiscard); - } -#ifndef DEBUG14 - else { -#endif + while (nonzero_p_32(diff_32) && nmismatches <= max_mismatches) { + mismatch_positions[nmismatches++] = offset - (relpos = count_leading_zeroes_32(diff_32)); + diff_32 = clear_highbit_32(diff_32,relpos); + } + if (nmismatches > max_mismatches) { + return nmismatches; + } -#if defined(WORDS_BIGENDIAN) || !defined(HAVE_SSE2) - startblocki = startblocki_32; - endblocki = endblocki_32; -#endif + query_shifted -= QUERY_NEXTCOL; ref_ptr -= GENOME_NEXTCOL; alt_ptr -= GENOME_NEXTCOL; + offset -= 32; - startdiscard = (left+pos5) % STEP_SIZE; - enddiscard = (left+pos3) % STEP_SIZE; -#if 0 - if (plusp == true) { - offset = -startdiscard + pos5 /*+ mismatch_offset*/; - } else { - offset = -startdiscard + pos5 /*- mismatch_offset*/; + /* Single row */ + while (--endcolumni > startcolumni) { + diff_32 = (block_diff_snp_32)(query_shifted,alt_ptr,ref_ptr,plusp,genestrand,query_unk_mismatch_local_p); + + while (nonzero_p_32(diff_32) && nmismatches <= max_mismatches) { + mismatch_positions[nmismatches++] = offset - (relpos = count_leading_zeroes_32(diff_32)); + diff_32 = clear_highbit_32(diff_32,relpos); + } + if (nmismatches > max_mismatches) { + return nmismatches; + } + + query_shifted -= QUERY_NEXTCOL; ref_ptr -= GENOME_NEXTCOL; alt_ptr -= GENOME_NEXTCOL; + offset -= 32; } -#else - offset = -startdiscard + pos5; + + /* Start block */ + diff_32 = (block_diff_snp_32)(query_shifted,alt_ptr,ref_ptr,plusp,genestrand,query_unk_mismatch_local_p); + diff_32 = clear_start_32(diff_32,startdiscard); + + while (nonzero_p_32(diff_32) && nmismatches <= max_mismatches) { + mismatch_positions[nmismatches++] = offset - (relpos = count_leading_zeroes_32(diff_32)); + diff_32 = clear_highbit_32(diff_32,relpos); + } + return nmismatches; #endif - - debug5(printf("nshift = %d, startdiscard = %u, enddiscard = %u\n",nshift,startdiscard,enddiscard)); -#ifndef DEBUG14 - } +#if defined(USE_WRAP_MISMATCH_POSITIONS) && defined(HAVE_SSSE3) + } else if (endblocki == startblocki + 12 && endcolumni < startcolumni) { + /* Wrap */ + startdiscard += (startcolumni - endcolumni - 1)*32; + enddiscard += 96; + diff_128 = (block_diff_snp_128_wrap_hi)(query_shifted,alt_ptr,ref_ptr,plusp,genestrand,query_unk_mismatch_local_p, + endcolumni); + diff_128 = clear_start_128(diff_128,startdiscard); + diff_128 = clear_end_128(diff_128,enddiscard); + + while (nonzero_p_128(diff_128) && nmismatches <= max_mismatches) { + mismatch_positions[nmismatches++] = offset - (relpos = count_leading_zeroes_128(diff_128)); + diff_128 = clear_highbit_128(diff_128,relpos); + } + return nmismatches; #endif + } else { + /* End block */ + diff_32 = (block_diff_snp_32)(query_shifted,alt_ptr,ref_ptr,plusp,genestrand,query_unk_mismatch_local_p); + diff_32 = clear_end_32(diff_32,enddiscard); -#if defined(WORDS_BIGENDIAN)|| !defined(HAVE_SSE2) -#else - if (endblocki == startblocki) { - diff = (block_diff_snp)(query_shifted,&(snp_blocks[startblocki]),&(ref_blocks[startblocki]), - plusp,genestrand,query_unk_mismatch_p); - diff = clear_start(diff,startdiscard); - diff = clear_end(diff,enddiscard); + while (nonzero_p_32(diff_32) && nmismatches <= max_mismatches) { + mismatch_positions[nmismatches++] = offset - (relpos = count_leading_zeroes_32(diff_32)); + diff_32 = clear_highbit_32(diff_32,relpos); + } + if (nmismatches > max_mismatches) { + return nmismatches; + } + query_shifted -= QUERY_NEXTCOL; ref_ptr -= GENOME_NEXTCOL; alt_ptr -= GENOME_NEXTCOL; + offset -= 32; - while (nonzero_p(diff)) { - mismatch_position = offset + (relpos = count_trailing_zeroes(diff)); - debug5(print_diff_trailing_zeroes(diff,offset)); - diff = clear_lowbit(diff,relpos); - if (plusp == false) { - mismatch_position = (querylength - 1) - mismatch_position; + /* End row */ + while (--endcolumni >= 0) { + diff_32 = (block_diff_snp_32)(query_shifted,alt_ptr,ref_ptr,plusp,genestrand,query_unk_mismatch_local_p); + + while (nonzero_p_32(diff_32) && nmismatches <= max_mismatches) { + mismatch_positions[nmismatches++] = offset - (relpos = count_leading_zeroes_32(diff_32)); + diff_32 = clear_highbit_32(diff_32,relpos); } - genomic[mismatch_position] = tolower(genomic[mismatch_position]); - nmismatches_both++; + if (nmismatches > max_mismatches) { + return nmismatches; + } + query_shifted -= QUERY_NEXTCOL; ref_ptr -= GENOME_NEXTCOL; alt_ptr -= GENOME_NEXTCOL; + offset -= 32; } - debug5(printf("genomic = %s\n",genomic)); - debug14(if (endblocki_32 == startblocki_32) assert(answer == nmismatches_both)); - return nmismatches_both; +#ifdef HAVE_SSE2 + query_shifted -= QUERY_NEXTROW; +#endif + ref_ptr -= GENOME_NEXTROW; alt_ptr -= GENOME_NEXTROW; - } else { + + /* Middle rows */ +#ifdef HAVE_AVX2 + while (ref_ptr >= startptr + 24) { + diff_256 = (block_diff_snp_256)(&(query_shifted[-15]),&(alt_ptr[-15]),&(ref_ptr[-15]),plusp,genestrand,query_unk_mismatch_local_p); + + while (nonzero_p_256(diff_256) && nmismatches <= max_mismatches) { + mismatch_positions[nmismatches++] = offset - (relpos = count_leading_zeroes_256(diff_256)); + diff_256 = clear_highbit_256(diff_256,relpos); + } + if (nmismatches > max_mismatches) { + return nmismatches; + } + query_shifted -= 24; ref_ptr -= 24; alt_ptr -= 24; + offset -= 256; + } #endif - /* Startblock */ - diff = (block_diff_snp)(query_shifted,&(snp_blocks[startblocki]),&(ref_blocks[startblocki]), - plusp,genestrand,query_unk_mismatch_p); - diff = clear_start(diff,startdiscard); - - while (nonzero_p(diff)) { - mismatch_position = offset + (relpos = count_trailing_zeroes(diff)); - debug5(print_diff_trailing_zeroes(diff,offset)); - diff = clear_lowbit(diff,relpos); - if (plusp == false) { - mismatch_position = (querylength - 1) - mismatch_position; +#ifdef HAVE_SSE2 + while (ref_ptr >= startptr + 12) { + diff_128 = (block_diff_snp_128)(&(query_shifted[-3]),&(alt_ptr[-3]),&(ref_ptr[-3]),plusp,genestrand,query_unk_mismatch_local_p); + + while (nonzero_p_128(diff_128) && nmismatches <= max_mismatches) { + mismatch_positions[nmismatches++] = offset - (relpos = count_leading_zeroes_128(diff_128)); + diff_128 = clear_highbit_128(diff_128,relpos); } - genomic[mismatch_position] = tolower(genomic[mismatch_position]); - nmismatches_both++; + if (nmismatches > max_mismatches) { + return nmismatches; + } + query_shifted -= 12; ref_ptr -= 12; alt_ptr -= 12; + offset -= 128; } +#else + while (ref_ptr >= startptr + 12) { + for (endcolumni = 3; endcolumni >= 0; --endcolumni) { + diff_32 = (block_diff_snp_32)(query_shifted,alt_ptr,ref_ptr,plusp,genestrand,query_unk_mismatch_local_p); - query_shifted += COMPRESS_BLOCKSIZE; -#if defined(WORDS_BIGENDIAN) || !defined(HAVE_SSE2) - ref_ptr = &(ref_blocks[startblocki]); - alt_ptr = &(snp_blocks[startblocki]); - ref_ptr += 1; alt_ptr += 1; if (++startcolumni == 4) {ref_ptr += 8; alt_ptr += 8; startcolumni = 0;} -#else - ref_ptr = &(ref_blocks[startblocki+12]); - alt_ptr = &(snp_blocks[startblocki+12]); -#endif - end = &(ref_blocks[endblocki]); - offset += STEP_SIZE; /* 128 or 32 */ - while (ref_ptr < end) { - diff = (block_diff_snp)(query_shifted,alt_ptr,ref_ptr,plusp,genestrand,query_unk_mismatch_p); - - while (nonzero_p(diff)) { - mismatch_position = offset + (relpos = count_trailing_zeroes(diff)); - debug5(print_diff_trailing_zeroes(diff,offset)); - diff = clear_lowbit(diff,relpos); - if (plusp == false) { - mismatch_position = (querylength - 1) - mismatch_position; + while (nonzero_p_32(diff_32) && nmismatches <= max_mismatches) { + mismatch_positions[nmismatches++] = offset - (relpos = count_leading_zeroes_32(diff_32)); + diff_32 = clear_highbit_32(diff_32,relpos); } - genomic[mismatch_position] = tolower(genomic[mismatch_position]); - nmismatches_both++; + if (nmismatches > max_mismatches) { + return nmismatches; + } + query_shifted -= QUERY_NEXTCOL; ref_ptr -= GENOME_NEXTCOL; alt_ptr -= GENOME_NEXTCOL; + offset -= 32; } - - query_shifted += COMPRESS_BLOCKSIZE; -#if defined(WORDS_BIGENDIAN) || !defined(HAVE_SSE2) - ref_ptr += 1; alt_ptr += 1; if (++startcolumni == 4) {ref_ptr += 8; alt_ptr += 8; startcolumni = 0;} -#else - ref_ptr += 12; alt_ptr += 12; -#endif - offset += STEP_SIZE; /* 128 or 32 */ + /* query_shifted -= QUERY_NEXTROW; */ ref_ptr -= GENOME_NEXTROW; alt_ptr -= GENOME_NEXTROW; } +#endif - /* Endblock */ - diff = (block_diff_snp)(query_shifted,alt_ptr,ref_ptr,plusp,genestrand,query_unk_mismatch_p); - diff = clear_end(diff,enddiscard); + /* Start row */ + while (ref_ptr > startptr) { + diff_32 = (block_diff_snp_32)(query_shifted,alt_ptr,ref_ptr,plusp,genestrand,query_unk_mismatch_local_p); - while (nonzero_p(diff)) { - mismatch_position = offset + (relpos = count_trailing_zeroes(diff)); - debug5(print_diff_trailing_zeroes(diff,offset)); - diff = clear_lowbit(diff,relpos); - if (plusp == false) { - mismatch_position = (querylength - 1) - mismatch_position; + while (nonzero_p_32(diff_32) && nmismatches <= max_mismatches) { + mismatch_positions[nmismatches++] = offset - (relpos = count_leading_zeroes_32(diff_32)); + diff_32 = clear_highbit_32(diff_32,relpos); } - genomic[mismatch_position] = tolower(genomic[mismatch_position]); - nmismatches_both++; + if (nmismatches > max_mismatches) { + return nmismatches; + } + query_shifted -= QUERY_NEXTCOL; ref_ptr -= GENOME_NEXTCOL; alt_ptr -= GENOME_NEXTCOL; + offset -= 32; } - debug5(printf("genomic = %s\n",genomic)); - debug14(if (endblocki_32 == startblocki_32) assert(answer == nmismatches_both)); - return nmismatches_both; -#if defined(WORDS_BIGENDIAN) || !defined(HAVE_SSE2) -#else + /* Start block */ + diff_32 = (block_diff_snp_32)(query_shifted,alt_ptr,ref_ptr,plusp,genestrand,query_unk_mismatch_local_p); + diff_32 = clear_start_32(diff_32,startdiscard); + + while (nonzero_p_32(diff_32) && nmismatches <= max_mismatches) { + mismatch_positions[nmismatches++] = offset - (relpos = count_leading_zeroes_32(diff_32)); + diff_32 = clear_highbit_32(diff_32,relpos); + } + return nmismatches; + } +} + + + +/* Returns mismatch_positions[0..nmismatches], where nmismatches <= max_mismatches */ +int +Genome_mismatches_right (int *mismatch_positions, int max_mismatches, Compress_T query_compress, + Univcoord_T left, int pos5, int pos3, bool plusp, int genestrand) { + int nmismatches; +#ifdef DEBUG + int i; +#endif + +#if 0 + if (dibasep) { + debug(printf("Dibase_mismatches_right from %u+%d to %u+%d:\n",left,pos5,left,pos3)); + + nmismatches = Dibase_mismatches_right(&(*mismatch_positions),&(*colordiffs),max_mismatches,query, + pos5,pos3,/*startpos*/left+pos5,/*endpos*/left+pos3); } #endif + + if (snp_blocks == NULL) { + nmismatches = mismatches_right(&(*mismatch_positions),max_mismatches,query_compress, + left,pos5,pos3,plusp,genestrand,query_unk_mismatch_p); + } else { + nmismatches = mismatches_right_snps(&(*mismatch_positions),max_mismatches,query_compress, + left,pos5,pos3,plusp,genestrand,query_unk_mismatch_p); + } + mismatch_positions[nmismatches] = -1; + debug( + printf("%d mismatches on right: ",nmismatches); + for (i = 0; i <= nmismatches; i++) { + printf("%d ",mismatch_positions[i]); + } + printf("\n"); + ); + return nmismatches; } +/* Returns mismatch_positions[0..nmismatches], where nmismatches <= max_mismatches */ +/* See note above about why we set query_unk_mismatch_p to false */ int -Genome_mark_mismatches (char *genomic, int querylength, Compress_T query_compress, - Univcoord_T left, int pos5, int pos3, - bool plusp, int genestrand) { +Genome_mismatches_right_trim (int *mismatch_positions, int max_mismatches, Compress_T query_compress, + Univcoord_T left, int pos5, int pos3, bool plusp, int genestrand) { + int nmismatches; +#ifdef DEBUG + int i; +#endif #if 0 if (dibasep) { - fprintf(stderr,"Not implemented\n"); -#if 0 - debug5(printf("Dibase_mismatches_left from %u+%d to %u+%d:\n",left,pos5,left,pos3)); + debug(printf("Dibase_mismatches_right from %u+%d to %u+%d:\n",left,pos5,left,pos3)); - nmismatches = Dibase_mismatches_left(&(*mismatch_positions),&(*colordiffs),max_mismatches,query, - pos5,pos3,/*startpos*/left+pos5,/*endpos*/left+pos3); - mismatch_positions[nmismatches] = pos3 + 1; /* Need +1 because of starting assumed nt */ -#endif - return 0; + nmismatches = Dibase_mismatches_right(&(*mismatch_positions),&(*colordiffs),max_mismatches,query, + pos5,pos3,/*startpos*/left+pos5,/*endpos*/left+pos3); } #endif if (snp_blocks == NULL) { - return Genome_mark_mismatches_ref(&(*genomic),querylength,query_compress, - left,pos5,pos3,plusp,genestrand); + nmismatches = mismatches_right(&(*mismatch_positions),max_mismatches,query_compress, + left,pos5,pos3,plusp,genestrand,/*query_unk_mismatch_p*/false); } else { - return mark_mismatches_snps(&(*genomic),querylength,query_compress, - left,pos5,pos3,plusp,genestrand); + nmismatches = mismatches_right_snps(&(*mismatch_positions),max_mismatches,query_compress, + left,pos5,pos3,plusp,genestrand,/*query_unk_mismatch_p*/false); } + mismatch_positions[nmismatches] = -1; + debug( + printf("%d mismatches on right: ",nmismatches); + for (i = 0; i <= nmismatches; i++) { + printf("%d ",mismatch_positions[i]); + } + printf("\n"); + ); + return nmismatches; } /************************************************************************ - * Trimming + * Marking ************************************************************************/ -static int -trim_left_substring (Compress_T query_compress, Univcoord_T left, int pos5, int pos3, - bool plusp, int genestrand) { -#ifdef DEBUG14 - int answer; -#endif - int startdiscard, enddiscard, offset; +/* Derived from mismatches_left() */ +int +Genome_mark_mismatches_ref (char *genomic, int querylength, Compress_T query_compress, + Univcoord_T left, int pos5, int pos3, + bool plusp, int genestrand) { + int mismatch_position; + int nmismatches = 0, offset, nshift; + int startdiscard, enddiscard; Univcoord_T startblocki, endblocki, startblocki_32, endblocki_32; - Genomecomp_T *ptr, *start; - Genomecomp_T *query_shifted; - UINT4 diff_32; - Genomediff_T diff; - int nshift; + Genomecomp_T *query_shifted, *ptr, *endptr; + int relpos; int startcolumni, endcolumni; + UINT4 diff_32; +#ifdef HAVE_SSE2 + __m128i diff_128; +#endif +#ifdef HAVE_AVX2 + __m256i diff_256; +#endif - int totalscore, bestscore, score; - int trimpos; - Genomecomp_T p; - debug( + debug5( printf("\n\n"); - printf("Genome (in trim_left_substring) from %u+%d to %u+%d:\n",left,pos5,left,pos3); + printf("genomic = %s\n",genomic); + printf("Genome (in mark_mismatches_ref):\n"); Genome_print_blocks(ref_blocks,left+pos5,left+pos3); printf("\n"); ); @@ -21983,1082 +24723,1042 @@ endcolumni = ((left+pos3) % 128) / 32; endblocki_32 = endblocki + endcolumni; - debug(printf("left = %u, pos5 = %d, pos3 = %d, startblocki = %u, endblocki = %u\n", - left,pos5,pos3,startblocki,endblocki)); + debug5(printf("left = %u, pos5 = %d, pos3 = %d, startblocki = %u, endblocki = %u, plusp %d, step_size %d\n", + left,pos5,pos3,startblocki,endblocki,plusp,STEP_SIZE)); nshift = left % STEP_SIZE; query_shifted = Compress_shift(query_compress,nshift); - debug(printf("Query shifted %d:\n",nshift)); - debug(Compress_print_blocks(query_shifted,nshift,pos5,pos3)); - query_shifted += (nshift+pos3)/STEP_SIZE*COMPRESS_BLOCKSIZE; - - if (startblocki_32 == endblocki_32) { - startdiscard = (left+pos5) % 32; - enddiscard = (left+pos3) % 32; - offset = (pos3 - 1) - enddiscard + 32; - debug(printf("nshift = %d, startdiscard = %u, enddiscard = %u\n",nshift,startdiscard,enddiscard)); - + debug5(printf("Query shifted %d:\n",nshift)); + debug5(Compress_print_blocks(query_shifted,nshift,pos5,pos3)); + query_shifted += (nshift+pos5)/STEP_SIZE*COMPRESS_BLOCKSIZE; #if defined(WORDS_BIGENDIAN) || !defined(HAVE_SSE2) - diff_32 = (block_diff_32)(query_shifted,&(ref_blocks[endblocki_32]), - plusp,genestrand,query_unk_mismatch_p); #else - diff_32 = (block_diff_32)(query_shifted + endcolumni,&(ref_blocks[endblocki_32]), - plusp,genestrand,query_unk_mismatch_p); + query_shifted += startcolumni; #endif - diff_32 = clear_end_32(diff_32,enddiscard); /* puts 0 (matches) at end */ - diff_32 = set_start_32(diff_32,startdiscard); /* puts 1 (mismatches) at start */ - - p = 3*(diff_32 >> 16); - bestscore = score_high[p]; - trimpos = offset - score_high[p+1]; - totalscore = score_high[p+2]; - debug(printf("diff high %04X => bestscore %d at pos %d, offset %d, trimpos %d, totalscore %d\n", - diff_32 >> 16,score_high[p],score_high[p+1],offset,trimpos,totalscore)); - offset -= 16; + startdiscard = (left+pos5) % 32; + enddiscard = (left+pos3) % 32; + offset = -startdiscard + pos5; + ptr = &(ref_blocks[startblocki_32]); + endptr = &(ref_blocks[endblocki_32]); - p = 3*(diff_32 & 0x0000FFFF); - if ((score = score_high[p] + totalscore) > bestscore) { - bestscore = score; - trimpos = offset - score_high[p+1]; - } - /* totalscore += score_high[p+2]; */ - debug(printf("diff low %04X => bestscore %d at pos %d, offset %d, trimpos %d, totalscore %d\n", - diff_32 & 0x0000FFFF,score_high[p],score_high[p+1],offset,trimpos,totalscore)); - /* offset -= 16 */ + if (endblocki_32 == startblocki_32) { + /* Single block */ + debug5(printf("nshift = %d, startdiscard = %u, enddiscard = %u\n",nshift,startdiscard,enddiscard)); -#ifdef DEBUG14 - answer = (trimpos - 1); -#else - return (trimpos - 1); /* trimpos-1 is on side of mismatch */ -#endif + diff_32 = (block_diff_32)(query_shifted,ptr,plusp,genestrand,query_unk_mismatch_p); + diff_32 = clear_start_32(diff_32,startdiscard); + diff_32 = clear_end_32(diff_32,enddiscard); + while (nonzero_p_32(diff_32)) { + mismatch_position = offset + (relpos = count_trailing_zeroes_32(diff_32)); + diff_32 = clear_lowbit_32(diff_32,relpos); + if (plusp == false) { + mismatch_position = (querylength - 1) - mismatch_position; + } + genomic[mismatch_position] = tolower(genomic[mismatch_position]); + nmismatches++; } -#ifndef DEBUG14 - else { -#endif - -#if defined(WORDS_BIGENDIAN) || !defined(HAVE_SSE2) - startblocki = startblocki_32; - endblocki = endblocki_32; -#endif - - startdiscard = (left+pos5) % STEP_SIZE; - enddiscard = (left+pos3) % STEP_SIZE; - offset = (pos3 - 1) - enddiscard + STEP_SIZE; - - debug(printf("nshift = %d, startdiscard = %u, enddiscard = %u, offset = %d\n", - nshift,startdiscard,enddiscard,offset)); - -#ifndef DEBUG14 - } -#endif - - -#if defined(WORDS_BIGENDIAN) || !defined(HAVE_SSE2) -#else - if (startblocki == endblocki) { - diff = (block_diff)(query_shifted,&(ref_blocks[endblocki]), - plusp,genestrand,query_unk_mismatch_p); - diff = clear_end(diff,enddiscard); /* puts 0 (matches) at end */ - diff = set_start(diff,startdiscard); /* puts 1 (mismatches) at start */ - - - p = 3*((unsigned short) _mm_extract_epi16(diff,7)); - bestscore = score_high[p]; - trimpos = offset - score_high[p+1]; - totalscore = score_high[p+2]; - debug(printf("diff piece %d %04X => bestscore %d at pos %d, offset %d, trimpos %d, totalscore %d\n", - 7,_mm_extract_epi16(diff,7),score_high[p],score_high[p+1],offset,trimpos,totalscore)); - offset -= 16; + debug5(printf("genomic = %s\n",genomic)); + return nmismatches; - p = 3*((unsigned short) _mm_extract_epi16(diff,6)); - if ((score = score_high[p] + totalscore) > bestscore) { - bestscore = score; - trimpos = offset - score_high[p+1]; + } else if (endblocki == startblocki) { +#if defined(USE_SHIFT_MISMATCH_POSITIONS) && defined(HAVE_SSE2) + /* Shift */ + enddiscard += (endcolumni - startcolumni)*32; + diff_128 = (block_diff_128_shift_lo)(query_shifted,ptr,plusp,genestrand,query_unk_mismatch_p, + startcolumni); + diff_128 = clear_start_128(diff_128,startdiscard); + diff_128 = clear_end_128(diff_128,enddiscard); + + while (nonzero_p_128(diff_128)) { + mismatch_position = offset + (relpos = count_trailing_zeroes_128(diff_128)); + diff_128 = clear_lowbit_128(diff_128,relpos); + if (plusp == false) { + mismatch_position = (querylength - 1) - mismatch_position; + } + genomic[mismatch_position] = tolower(genomic[mismatch_position]); + nmismatches++; } - totalscore += score_high[p+2]; - debug(printf("diff piece %d %04X => bestscore %d at pos %d, offset %d, trimpos %d, totalscore %d\n", - 6,_mm_extract_epi16(diff,6),score_high[p],score_high[p+1],offset,trimpos,totalscore)); - offset -= 16; + return nmismatches; - p = 3*((unsigned short) _mm_extract_epi16(diff,5)); - if ((score = score_high[p] + totalscore) > bestscore) { - bestscore = score; - trimpos = offset - score_high[p+1]; - } - totalscore += score_high[p+2]; - debug(printf("diff piece %d %04X => bestscore %d at pos %d, offset %d, trimpos %d, totalscore %d\n", - 5,_mm_extract_epi16(diff,5),score_high[p],score_high[p+1],offset,trimpos,totalscore)); - offset -= 16; +#else + /* Start block */ + diff_32 = (block_diff_32)(query_shifted,ptr,plusp,genestrand,query_unk_mismatch_p); + diff_32 = clear_start_32(diff_32,startdiscard); - p = 3*((unsigned short) _mm_extract_epi16(diff,4)); - if ((score = score_high[p] + totalscore) > bestscore) { - bestscore = score; - trimpos = offset - score_high[p+1]; + while (nonzero_p_32(diff_32)) { + mismatch_position = offset + (relpos = count_trailing_zeroes_32(diff_32)); + diff_32 = clear_lowbit_32(diff_32,relpos); + if (plusp == false) { + mismatch_position = (querylength - 1) - mismatch_position; + } + genomic[mismatch_position] = tolower(genomic[mismatch_position]); + nmismatches++; } - totalscore += score_high[p+2]; - debug(printf("diff piece %d %04X => bestscore %d at pos %d, offset %d, trimpos %d, totalscore %d\n", - 4,_mm_extract_epi16(diff,4),score_high[p],score_high[p+1],offset,trimpos,totalscore)); - offset -= 16; - p = 3*((unsigned short) _mm_extract_epi16(diff,3)); - if ((score = score_high[p] + totalscore) > bestscore) { - bestscore = score; - trimpos = offset - score_high[p+1]; - } - totalscore += score_high[p+2]; - debug(printf("diff piece %d %04X => bestscore %d at pos %d, offset %d, trimpos %d, totalscore %d\n", - 3,_mm_extract_epi16(diff,3),score_high[p],score_high[p+1],offset,trimpos,totalscore)); - offset -= 16; + query_shifted += QUERY_NEXTCOL; ptr += GENOME_NEXTCOL; + offset += 32; - p = 3*((unsigned short) _mm_extract_epi16(diff,2)); - if ((score = score_high[p] + totalscore) > bestscore) { - bestscore = score; - trimpos = offset - score_high[p+1]; + /* Single row */ + while (++startcolumni < endcolumni) { + diff_32 = (block_diff_32)(query_shifted,ptr,plusp,genestrand,query_unk_mismatch_p); + + while (nonzero_p_32(diff_32)) { + mismatch_position = offset + (relpos = count_trailing_zeroes_32(diff_32)); + diff_32 = clear_lowbit_32(diff_32,relpos); + if (plusp == false) { + mismatch_position = (querylength - 1) - mismatch_position; + } + genomic[mismatch_position] = tolower(genomic[mismatch_position]); + nmismatches++; + } + + query_shifted += QUERY_NEXTCOL; ptr += GENOME_NEXTCOL; + offset += 32; } - totalscore += score_high[p+2]; - debug(printf("diff piece %d %04X => bestscore %d at pos %d, offset %d, trimpos %d, totalscore %d\n", - 2,_mm_extract_epi16(diff,2),score_high[p],score_high[p+1],offset,trimpos,totalscore)); - offset -= 16; - p = 3*((unsigned short) _mm_extract_epi16(diff,1)); - if ((score = score_high[p] + totalscore) > bestscore) { - bestscore = score; - trimpos = offset - score_high[p+1]; - } - totalscore += score_high[p+2]; - debug(printf("diff piece %d %04X => bestscore %d at pos %d, offset %d, trimpos %d, totalscore %d\n", - 1,_mm_extract_epi16(diff,1),score_high[p],score_high[p+1],offset,trimpos,totalscore)); - offset -= 16; + /* End block */ + diff_32 = (block_diff_32)(query_shifted,ptr,plusp,genestrand,query_unk_mismatch_p); + diff_32 = clear_end_32(diff_32,enddiscard); - p = 3*((unsigned short) _mm_extract_epi16(diff,0)); - if ((score = score_high[p] + totalscore) > bestscore) { - bestscore = score; - trimpos = offset - score_high[p+1]; + while (nonzero_p_32(diff_32)) { + mismatch_position = offset + (relpos = count_trailing_zeroes_32(diff_32)); + diff_32 = clear_lowbit_32(diff_32,relpos); + if (plusp == false) { + mismatch_position = (querylength - 1) - mismatch_position; + } + genomic[mismatch_position] = tolower(genomic[mismatch_position]); + nmismatches++; } - totalscore += score_high[p+2]; - debug(printf("diff piece %d %04X => bestscore %d at pos %d, offset %d, trimpos %d, totalscore %d\n", - 0,_mm_extract_epi16(diff,0),score_high[p],score_high[p+1],offset,trimpos,totalscore)); - offset -= 16; - - debug14(if (startblocki_32 == endblocki_32) assert(answer == trimpos - 1)); - return trimpos - 1; /* trimpos-1 is on side of mismatch */ - - } else { + + return nmismatches; #endif - /* Endblock */ - diff = (block_diff)(query_shifted,&(ref_blocks[endblocki]), - plusp,genestrand,query_unk_mismatch_p); - diff = clear_end(diff,enddiscard); /* puts 0 (matches) at end */ - - -#if defined(WORDS_BIGENDIAN) || !defined(HAVE_SSE2) - p = 3*(diff >> 16); - bestscore = score_high[p]; - trimpos = offset - score_high[p+1]; - totalscore = score_high[p+2]; - debug(printf("diff high %04X => bestscore %d at pos %d, offset %d, trimpos %d, totalscore %d\n", - diff >> 16,score_high[p],score_high[p+1],offset,trimpos,totalscore)); - offset -= 16; - - p = 3*(diff & 0x0000FFFF); - if ((score = score_high[p] + totalscore) > bestscore) { - bestscore = score; - trimpos = offset - score_high[p+1]; - } - totalscore += score_high[p+2]; - debug(printf("diff low %04X => bestscore %d at pos %d, offset %d, trimpos %d, totalscore %d\n", - diff & 0x0000FFFF,score_high[p],score_high[p+1],offset,trimpos,totalscore)); - offset -= 16; - -#else - p = 3*((unsigned short) _mm_extract_epi16(diff,7)); - bestscore = score_high[p]; - trimpos = offset - score_high[p+1]; - totalscore = score_high[p+2]; - debug(printf("diff piece %d %04X => bestscore %d at pos %d, offset %d, trimpos %d, totalscore %d\n", - 7,(unsigned short) _mm_extract_epi16(diff,7),score_high[p],score_high[p+1],offset,trimpos,totalscore)); - offset -= 16; - - p = 3*((unsigned short) _mm_extract_epi16(diff,6)); - if ((score = score_high[p] + totalscore) > bestscore) { - bestscore = score; - trimpos = offset - score_high[p+1]; +#if defined(USE_WRAP_MISMATCH_POSITIONS) && defined(HAVE_SSSE3) + } else if (endblocki == startblocki + 12 && endcolumni < startcolumni) { + /* Wrap */ + enddiscard += (4 + endcolumni - startcolumni)*32; + diff_128 = (block_diff_128_wrap_lo)(query_shifted,ptr,plusp,genestrand,query_unk_mismatch_p, + startcolumni); + diff_128 = clear_start_128(diff_128,startdiscard); + diff_128 = clear_end_128(diff_128,enddiscard); + + while (nonzero_p_128(diff_128)) { + mismatch_position = offset + (relpos = count_trailing_zeroes_128(diff_128)); + diff_128 = clear_lowbit_128(diff_128,relpos); + if (plusp == false) { + mismatch_position = (querylength - 1) - mismatch_position; + } + genomic[mismatch_position] = tolower(genomic[mismatch_position]); + nmismatches++; } - totalscore += score_high[p+2]; - debug(printf("diff piece %d %04X => bestscore %d at pos %d, offset %d, trimpos %d, totalscore %d\n", - 6,(unsigned short) _mm_extract_epi16(diff,6),score_high[p],score_high[p+1],offset,trimpos,totalscore)); - offset -= 16; + return nmismatches; +#endif - p = 3*((unsigned short) _mm_extract_epi16(diff,5)); - if ((score = score_high[p] + totalscore) > bestscore) { - bestscore = score; - trimpos = offset - score_high[p+1]; - } - totalscore += score_high[p+2]; - debug(printf("diff piece %d %04X => bestscore %d at pos %d, offset %d, trimpos %d, totalscore %d\n", - 5,(unsigned short) _mm_extract_epi16(diff,5),score_high[p],score_high[p+1],offset,trimpos,totalscore)); - offset -= 16; + } else { + /* Start block */ + diff_32 = (block_diff_32)(query_shifted,ptr,plusp,genestrand,query_unk_mismatch_p); + diff_32 = clear_start_32(diff_32,startdiscard); - p = 3*((unsigned short) _mm_extract_epi16(diff,4)); - if ((score = score_high[p] + totalscore) > bestscore) { - bestscore = score; - trimpos = offset - score_high[p+1]; + while (nonzero_p_32(diff_32)) { + mismatch_position = offset + (relpos = count_trailing_zeroes_32(diff_32)); + diff_32 = clear_lowbit_32(diff_32,relpos); + if (plusp == false) { + mismatch_position = (querylength - 1) - mismatch_position; + } + genomic[mismatch_position] = tolower(genomic[mismatch_position]); + nmismatches++; } - totalscore += score_high[p+2]; - debug(printf("diff piece %d %04X => bestscore %d at pos %d, offset %d, trimpos %d, totalscore %d\n", - 4,(unsigned short) _mm_extract_epi16(diff,4),score_high[p],score_high[p+1],offset,trimpos,totalscore)); - offset -= 16; - p = 3*((unsigned short) _mm_extract_epi16(diff,3)); - if ((score = score_high[p] + totalscore) > bestscore) { - bestscore = score; - trimpos = offset - score_high[p+1]; - } - totalscore += score_high[p+2]; - debug(printf("diff piece %d %04X => bestscore %d at pos %d, offset %d, trimpos %d, totalscore %d\n", - 3,(unsigned short) _mm_extract_epi16(diff,3),score_high[p],score_high[p+1],offset,trimpos,totalscore)); - offset -= 16; + query_shifted += QUERY_NEXTCOL; ptr += GENOME_NEXTCOL; + offset += 32; - p = 3*((unsigned short) _mm_extract_epi16(diff,2)); - if ((score = score_high[p] + totalscore) > bestscore) { - bestscore = score; - trimpos = offset - score_high[p+1]; + /* Start row */ + while (++startcolumni < 4) { + diff_32 = (block_diff_32)(query_shifted,ptr,plusp,genestrand,query_unk_mismatch_p); + + while (nonzero_p_32(diff_32)) { + mismatch_position = offset + (relpos = count_trailing_zeroes_32(diff_32)); + diff_32 = clear_lowbit_32(diff_32,relpos); + if (plusp == false) { + mismatch_position = (querylength - 1) - mismatch_position; + } + genomic[mismatch_position] = tolower(genomic[mismatch_position]); + nmismatches++; + } + + query_shifted += QUERY_NEXTCOL; ptr += GENOME_NEXTCOL; + offset += 32; } - totalscore += score_high[p+2]; - debug(printf("diff piece %d %04X => bestscore %d at pos %d, offset %d, trimpos %d, totalscore %d\n", - 2,(unsigned short) _mm_extract_epi16(diff,2),score_high[p],score_high[p+1],offset,trimpos,totalscore)); - offset -= 16; +#ifdef HAVE_SSE2 + query_shifted += QUERY_NEXTROW; +#endif + ptr += GENOME_NEXTROW; - p = 3*((unsigned short) _mm_extract_epi16(diff,1)); - if ((score = score_high[p] + totalscore) > bestscore) { - bestscore = score; - trimpos = offset - score_high[p+1]; - } - totalscore += score_high[p+2]; - debug(printf("diff piece %d %04X => bestscore %d at pos %d, offset %d, trimpos %d, totalscore %d\n", - 1,(unsigned short) _mm_extract_epi16(diff,1),score_high[p],score_high[p+1],offset,trimpos,totalscore)); - offset -= 16; - p = 3*((unsigned short) _mm_extract_epi16(diff,0)); - if ((score = score_high[p] + totalscore) > bestscore) { - bestscore = score; - trimpos = offset - score_high[p+1]; + /* Middle rows */ +#ifdef HAVE_AVX2 + while (ptr + 24 <= endptr) { + diff_256 = (block_diff_256)(query_shifted,ptr,plusp,genestrand,query_unk_mismatch_p); + + while (nonzero_p_256(diff_256)) { + mismatch_position = offset + (relpos = count_trailing_zeroes_256(diff_256)); + diff_256 = clear_lowbit_256(diff_256,relpos); + if (plusp == false) { + mismatch_position = (querylength - 1) - mismatch_position; + } + genomic[mismatch_position] = tolower(genomic[mismatch_position]); + nmismatches++; + } + + query_shifted += 24; ptr += 24; + offset += 256; } - totalscore += score_high[p+2]; - debug(printf("diff piece %d %04X => bestscore %d at pos %d, offset %d, trimpos %d, totalscore %d\n", - 0,(unsigned short) _mm_extract_epi16(diff,0),score_high[p],score_high[p+1],offset,trimpos,totalscore)); - offset -= 16; #endif - query_shifted -= COMPRESS_BLOCKSIZE; -#if defined(WORDS_BIGENDIAN) || !defined(HAVE_SSE2) - ptr = &(ref_blocks[endblocki]); - ptr -= 1; if (endcolumni-- == 0) {ptr -= 8; endcolumni = 3;} +#ifdef HAVE_SSE2 + while (ptr + 12 <= endptr) { + diff_128 = (block_diff_128)(query_shifted,ptr,plusp,genestrand,query_unk_mismatch_p); + + while (nonzero_p_128(diff_128)) { + mismatch_position = offset + (relpos = count_trailing_zeroes_128(diff_128)); + diff_128 = clear_lowbit_128(diff_128,relpos); + if (plusp == false) { + mismatch_position = (querylength - 1) - mismatch_position; + } + genomic[mismatch_position] = tolower(genomic[mismatch_position]); + nmismatches++; + } + + query_shifted += 12; ptr += 12; + offset += 128; + } #else - ptr = &(ref_blocks[endblocki-12]); + while (ptr + 12 <= endptr) { + for (startcolumni = 0; startcolumni < 4; startcolumni++) { + diff_32 = (block_diff_32)(query_shifted,ptr,plusp,genestrand,query_unk_mismatch_p); + + while (nonzero_p_32(diff_32)) { + mismatch_position = offset + (relpos = count_trailing_zeroes_32(diff_32)); + diff_32 = clear_lowbit_32(diff_32,relpos); + if (plusp == false) { + mismatch_position = (querylength - 1) - mismatch_position; + } + genomic[mismatch_position] = tolower(genomic[mismatch_position]); + nmismatches++; + } + + query_shifted += QUERY_NEXTCOL; ptr += GENOME_NEXTCOL; + offset += 32; + } + /* query_shifted += QUERY_NEXTROW; */ ptr += GENOME_NEXTROW; + } #endif - start = &(ref_blocks[startblocki]); - while (ptr > start) { - diff = (block_diff)(query_shifted,ptr,plusp,genestrand,query_unk_mismatch_p); + /* End row */ + while (ptr < endptr) { + diff_32 = (block_diff_32)(query_shifted,ptr,plusp,genestrand,query_unk_mismatch_p); -#if defined(WORDS_BIGENDIAN) || !defined(HAVE_SSE2) - p = 3*(diff >> 16); - if ((score = score_high[p] + totalscore) > bestscore) { - bestscore = score; - trimpos = offset - score_high[p+1]; + while (nonzero_p_32(diff_32)) { + mismatch_position = offset + (relpos = count_trailing_zeroes_32(diff_32)); + diff_32 = clear_lowbit_32(diff_32,relpos); + if (plusp == false) { + mismatch_position = (querylength - 1) - mismatch_position; + } + genomic[mismatch_position] = tolower(genomic[mismatch_position]); + nmismatches++; } - totalscore += score_high[p+2]; - debug(printf("diff high %04X => bestscore %d at pos %d, offset %d, trimpos %d, totalscore %d\n", - diff >> 16,score_high[p],score_high[p+1],offset,trimpos,totalscore)); - offset -= 16; + + query_shifted += QUERY_NEXTCOL; ptr += GENOME_NEXTCOL; + offset += 32; + } - p = 3*(diff & 0x0000FFFF); - if ((score = score_high[p] + totalscore) > bestscore) { - bestscore = score; - trimpos = offset - score_high[p+1]; - } - totalscore += score_high[p+2]; - debug(printf("diff low %04X => bestscore %d at pos %d, offset %d, trimpos %d, totalscore %d\n", - diff & 0x0000FFFF,score_high[p],score_high[p+1],offset,trimpos,totalscore)); - offset -= 16; + /* End block */ + diff_32 = (block_diff_32)(query_shifted,ptr,plusp,genestrand,query_unk_mismatch_p); + diff_32 = clear_end_32(diff_32,enddiscard); -#else - p = 3*((unsigned short) _mm_extract_epi16(diff,7)); - if ((score = score_high[p] + totalscore) > bestscore) { - bestscore = score; - trimpos = offset - score_high[p+1]; + while (nonzero_p_32(diff_32)) { + mismatch_position = offset + (relpos = count_trailing_zeroes_32(diff_32)); + diff_32 = clear_lowbit_32(diff_32,relpos); + if (plusp == false) { + mismatch_position = (querylength - 1) - mismatch_position; } - totalscore += score_high[p+2]; - debug(printf("diff piece %d %04X => bestscore %d at pos %d, offset %d, trimpos %d, totalscore %d\n", - 7,(unsigned short) _mm_extract_epi16(diff,7),score_high[p],score_high[p+1],offset,trimpos,totalscore)); - offset -= 16; + genomic[mismatch_position] = tolower(genomic[mismatch_position]); + nmismatches++; + } + return nmismatches; + } +} - p = 3*((unsigned short) _mm_extract_epi16(diff,6)); - if ((score = score_high[p] + totalscore) > bestscore) { - bestscore = score; - trimpos = offset - score_high[p+1]; - } - totalscore += score_high[p+2]; - debug(printf("diff piece %d %04X => bestscore %d at pos %d, offset %d, trimpos %d, totalscore %d\n", - 6,(unsigned short) _mm_extract_epi16(diff,6),score_high[p],score_high[p+1],offset,trimpos,totalscore)); - offset -= 16; - p = 3*((unsigned short) _mm_extract_epi16(diff,5)); - if ((score = score_high[p] + totalscore) > bestscore) { - bestscore = score; - trimpos = offset - score_high[p+1]; - } - totalscore += score_high[p+2]; - debug(printf("diff piece %d %04X => bestscore %d at pos %d, offset %d, trimpos %d, totalscore %d\n", - 5,(unsigned short) _mm_extract_epi16(diff,5),score_high[p],score_high[p+1],offset,trimpos,totalscore)); - offset -= 16; +/* Derived from mismatches_left_snps() */ +/* Returns nmismatches_both */ +static int +mark_mismatches_snps (char *genomic, int querylength, Compress_T query_compress, + Univcoord_T left, int pos5, int pos3, + bool plusp, int genestrand) { + int mismatch_position; + int nmismatches = 0, offset, nshift; + int startdiscard, enddiscard; + Univcoord_T startblocki, endblocki, startblocki_32, endblocki_32; + Genomecomp_T *query_shifted, *ref_ptr, *alt_ptr, *endptr; + int relpos; + int startcolumni, endcolumni; + UINT4 diff_32; +#ifdef HAVE_SSE2 + __m128i diff_128; +#endif +#ifdef HAVE_AVX2 + __m256i diff_256; +#endif - p = 3*((unsigned short) _mm_extract_epi16(diff,4)); - if ((score = score_high[p] + totalscore) > bestscore) { - bestscore = score; - trimpos = offset - score_high[p+1]; - } - totalscore += score_high[p+2]; - debug(printf("diff piece %d %04X => bestscore %d at pos %d, offset %d, trimpos %d, totalscore %d\n", - 4,(unsigned short) _mm_extract_epi16(diff,4),score_high[p],score_high[p+1],offset,trimpos,totalscore)); - offset -= 16; - p = 3*((unsigned short) _mm_extract_epi16(diff,3)); - if ((score = score_high[p] + totalscore) > bestscore) { - bestscore = score; - trimpos = offset - score_high[p+1]; - } - totalscore += score_high[p+2]; - debug(printf("diff piece %d %04X => bestscore %d at pos %d, offset %d, trimpos %d, totalscore %d\n", - 3,(unsigned short) _mm_extract_epi16(diff,3),score_high[p],score_high[p+1],offset,trimpos,totalscore)); - offset -= 16; + debug5( + printf("\n\n"); + printf("genomic = %s\n",genomic); + printf("Genome (in mark_mismatches_ref):\n"); + Genome_print_blocks(ref_blocks,left+pos5,left+pos3); + printf("\n"); + ); - p = 3*((unsigned short) _mm_extract_epi16(diff,2)); - if ((score = score_high[p] + totalscore) > bestscore) { - bestscore = score; - trimpos = offset - score_high[p+1]; - } - totalscore += score_high[p+2]; - debug(printf("diff piece %d %04X => bestscore %d at pos %d, offset %d, trimpos %d, totalscore %d\n", - 2,(unsigned short) _mm_extract_epi16(diff,2),score_high[p],score_high[p+1],offset,trimpos,totalscore)); - offset -= 16; - p = 3*((unsigned short) _mm_extract_epi16(diff,1)); - if ((score = score_high[p] + totalscore) > bestscore) { - bestscore = score; - trimpos = offset - score_high[p+1]; - } - totalscore += score_high[p+2]; - debug(printf("diff piece %d %04X => bestscore %d at pos %d, offset %d, trimpos %d, totalscore %d\n", - 1,(unsigned short) _mm_extract_epi16(diff,1),score_high[p],score_high[p+1],offset,trimpos,totalscore)); - offset -= 16; + startblocki = (left+pos5)/128U*12; + startcolumni = ((left+pos5) % 128) / 32; + startblocki_32 = startblocki + startcolumni; - p = 3*((unsigned short) _mm_extract_epi16(diff,0)); - if ((score = score_high[p] + totalscore) > bestscore) { - bestscore = score; - trimpos = offset - score_high[p+1]; - } - totalscore += score_high[p+2]; - debug(printf("diff piece %d %04X => bestscore %d at pos %d, offset %d, trimpos %d, totalscore %d\n", - 0,(unsigned short) _mm_extract_epi16(diff,0),score_high[p],score_high[p+1],offset,trimpos,totalscore)); - offset -= 16; -#endif + endblocki = (left+pos3)/128U*12; + endcolumni = ((left+pos3) % 128) / 32; + endblocki_32 = endblocki + endcolumni; + + debug5(printf("left = %u, pos5 = %d, pos3 = %d, startblocki = %u, endblocki = %u, plusp %d, step_size %d\n", + left,pos5,pos3,startblocki,endblocki,plusp,STEP_SIZE)); - query_shifted -= COMPRESS_BLOCKSIZE; + nshift = left % STEP_SIZE; + query_shifted = Compress_shift(query_compress,nshift); + debug5(printf("Query shifted %d:\n",nshift)); + debug5(Compress_print_blocks(query_shifted,nshift,pos5,pos3)); + query_shifted += (nshift+pos5)/STEP_SIZE*COMPRESS_BLOCKSIZE; #if defined(WORDS_BIGENDIAN) || !defined(HAVE_SSE2) - ptr -= 1; if (endcolumni-- == 0) {ptr -= 8; endcolumni = 3;} #else - ptr -= 12; + query_shifted += startcolumni; #endif - } - - /* Startblock */ - diff = (block_diff)(query_shifted,ptr,plusp,genestrand,query_unk_mismatch_p); - diff = set_start(diff,startdiscard); /* puts 1 (mismatches) at start */ + startdiscard = (left+pos5) % 32; + enddiscard = (left+pos3) % 32; + offset = -startdiscard + pos5; + ref_ptr = &(ref_blocks[startblocki_32]); + alt_ptr = &(snp_blocks[startblocki_32]); + endptr = &(ref_blocks[endblocki_32]); -#if defined(WORDS_BIGENDIAN) || !defined(HAVE_SSE2) - p = 3*(diff >> 16); - if ((score = score_high[p] + totalscore) > bestscore) { - bestscore = score; - trimpos = offset - score_high[p+1]; - } - totalscore += score_high[p+2]; - debug(printf("diff high %04X => bestscore %d at pos %d, offset %d, trimpos %d, totalscore %d\n", - diff >> 16,score_high[p],score_high[p+1],offset,trimpos,totalscore)); - offset -= 16; - - p = 3*(diff & 0x0000FFFF); - if ((score = score_high[p] + totalscore) > bestscore) { - bestscore = score; - trimpos = offset - score_high[p+1]; - } - /* totalscore += score_high[p+2]; */ - debug(printf("diff low %04X => bestscore %d at pos %d, offset %d, trimpos %d, totalscore %d\n", - diff & 0x0000FFFF,score_high[p],score_high[p+1],offset,trimpos,totalscore)); - /* offset -= 16; */ + if (endblocki_32 == startblocki_32) { + /* Single block */ + debug5(printf("nshift = %d, startdiscard = %u, enddiscard = %u\n",nshift,startdiscard,enddiscard)); -#else - p = 3*((unsigned short) _mm_extract_epi16(diff,7)); - if ((score = score_high[p] + totalscore) > bestscore) { - bestscore = score; - trimpos = offset - score_high[p+1]; - } - totalscore += score_high[p+2]; - debug(printf("diff piece %d %04X => bestscore %d at pos %d, offset %d, trimpos %d, totalscore %d\n", - 7,(unsigned short) _mm_extract_epi16(diff,7),score_high[p],score_high[p+1],offset,trimpos,totalscore)); - offset -= 16; + diff_32 = (block_diff_snp_32)(query_shifted,alt_ptr,ref_ptr,plusp,genestrand,query_unk_mismatch_p); + diff_32 = clear_start_32(diff_32,startdiscard); + diff_32 = clear_end_32(diff_32,enddiscard); - p = 3*((unsigned short) _mm_extract_epi16(diff,6)); - if ((score = score_high[p] + totalscore) > bestscore) { - bestscore = score; - trimpos = offset - score_high[p+1]; + while (nonzero_p_32(diff_32)) { + mismatch_position = offset + (relpos = count_trailing_zeroes_32(diff_32)); + diff_32 = clear_lowbit_32(diff_32,relpos); + if (plusp == false) { + mismatch_position = (querylength - 1) - mismatch_position; + } + genomic[mismatch_position] = tolower(genomic[mismatch_position]); + nmismatches++; } - totalscore += score_high[p+2]; - debug(printf("diff piece %d %04X => bestscore %d at pos %d, offset %d, trimpos %d, totalscore %d\n", - 6,(unsigned short) _mm_extract_epi16(diff,6),score_high[p],score_high[p+1],offset,trimpos,totalscore)); - offset -= 16; + debug5(printf("genomic = %s\n",genomic)); + return nmismatches; - p = 3*((unsigned short) _mm_extract_epi16(diff,5)); - if ((score = score_high[p] + totalscore) > bestscore) { - bestscore = score; - trimpos = offset - score_high[p+1]; + } else if (endblocki == startblocki) { +#if defined(USE_SHIFT_MISMATCH_POSITIONS) && defined(HAVE_SSE2) + /* Shift */ + enddiscard += (endcolumni - startcolumni)*32; + diff_128 = (block_diff_snp_128_shift_lo)(query_shifted,alt_ptr,ref_ptr,plusp,genestrand,query_unk_mismatch_p, + startcolumni); + diff_128 = clear_start_128(diff_128,startdiscard); + diff_128 = clear_end_128(diff_128,enddiscard); + + while (nonzero_p_128(diff_128)) { + mismatch_position = offset + (relpos = count_trailing_zeroes_128(diff_128)); + diff_128 = clear_lowbit_128(diff_128,relpos); + if (plusp == false) { + mismatch_position = (querylength - 1) - mismatch_position; + } + genomic[mismatch_position] = tolower(genomic[mismatch_position]); + nmismatches++; } - totalscore += score_high[p+2]; - debug(printf("diff piece %d %04X => bestscore %d at pos %d, offset %d, trimpos %d, totalscore %d\n", - 5,(unsigned short) _mm_extract_epi16(diff,5),score_high[p],score_high[p+1],offset,trimpos,totalscore)); - offset -= 16; + debug5(printf("genomic = %s\n",genomic)); + return nmismatches; - p = 3*((unsigned short) _mm_extract_epi16(diff,4)); - if ((score = score_high[p] + totalscore) > bestscore) { - bestscore = score; - trimpos = offset - score_high[p+1]; - } - totalscore += score_high[p+2]; - debug(printf("diff piece %d %04X => bestscore %d at pos %d, offset %d, trimpos %d, totalscore %d\n", - 4,(unsigned short) _mm_extract_epi16(diff,4),score_high[p],score_high[p+1],offset,trimpos,totalscore)); - offset -= 16; +#else + /* Start block */ + diff_32 = (block_diff_snp_32)(query_shifted,alt_ptr,ref_ptr,plusp,genestrand,query_unk_mismatch_p); + diff_32 = clear_start_32(diff_32,startdiscard); - p = 3*((unsigned short) _mm_extract_epi16(diff,3)); - if ((score = score_high[p] + totalscore) > bestscore) { - bestscore = score; - trimpos = offset - score_high[p+1]; + while (nonzero_p_32(diff_32)) { + mismatch_position = offset + (relpos = count_trailing_zeroes_32(diff_32)); + diff_32 = clear_lowbit_32(diff_32,relpos); + if (plusp == false) { + mismatch_position = (querylength - 1) - mismatch_position; + } + genomic[mismatch_position] = tolower(genomic[mismatch_position]); + nmismatches++; } - totalscore += score_high[p+2]; - debug(printf("diff piece %d %04X => bestscore %d at pos %d, offset %d, trimpos %d, totalscore %d\n", - 3,(unsigned short) _mm_extract_epi16(diff,3),score_high[p],score_high[p+1],offset,trimpos,totalscore)); - offset -= 16; - p = 3*((unsigned short) _mm_extract_epi16(diff,2)); - if ((score = score_high[p] + totalscore) > bestscore) { - bestscore = score; - trimpos = offset - score_high[p+1]; - } - totalscore += score_high[p+2]; - debug(printf("diff piece %d %04X => bestscore %d at pos %d, offset %d, trimpos %d, totalscore %d\n", - 2,(unsigned short) _mm_extract_epi16(diff,2),score_high[p],score_high[p+1],offset,trimpos,totalscore)); - offset -= 16; + query_shifted += QUERY_NEXTCOL; ref_ptr += GENOME_NEXTCOL; alt_ptr += GENOME_NEXTCOL; + offset += 32; - p = 3*((unsigned short) _mm_extract_epi16(diff,1)); - if ((score = score_high[p] + totalscore) > bestscore) { - bestscore = score; - trimpos = offset - score_high[p+1]; + /* Single row */ + while (++startcolumni < endcolumni) { + diff_32 = (block_diff_snp_32)(query_shifted,alt_ptr,ref_ptr,plusp,genestrand,query_unk_mismatch_p); + + while (nonzero_p_32(diff_32)) { + mismatch_position = offset + (relpos = count_trailing_zeroes_32(diff_32)); + diff_32 = clear_lowbit_32(diff_32,relpos); + if (plusp == false) { + mismatch_position = (querylength - 1) - mismatch_position; + } + genomic[mismatch_position] = tolower(genomic[mismatch_position]); + nmismatches++; + } + + query_shifted += QUERY_NEXTCOL; ref_ptr += GENOME_NEXTCOL; alt_ptr += GENOME_NEXTCOL; + offset += 32; } - totalscore += score_high[p+2]; - debug(printf("diff piece %d %04X => bestscore %d at pos %d, offset %d, trimpos %d, totalscore %d\n", - 1,(unsigned short) _mm_extract_epi16(diff,1),score_high[p],score_high[p+1],offset,trimpos,totalscore)); - offset -= 16; - p = 3*((unsigned short) _mm_extract_epi16(diff,0)); - if ((score = score_high[p] + totalscore) > bestscore) { - bestscore = score; - trimpos = offset - score_high[p+1]; + /* End block */ + diff_32 = (block_diff_snp_32)(query_shifted,alt_ptr,ref_ptr,plusp,genestrand,query_unk_mismatch_p); + diff_32 = clear_end_32(diff_32,enddiscard); + + while (nonzero_p_32(diff_32)) { + mismatch_position = offset + (relpos = count_trailing_zeroes_32(diff_32)); + diff_32 = clear_lowbit_32(diff_32,relpos); + if (plusp == false) { + mismatch_position = (querylength - 1) - mismatch_position; + } + genomic[mismatch_position] = tolower(genomic[mismatch_position]); + nmismatches++; } - totalscore += score_high[p+2]; - debug(printf("diff piece %d %04X => bestscore %d at pos %d, offset %d, trimpos %d, totalscore %d\n", - 0,(unsigned short) _mm_extract_epi16(diff,0),score_high[p],score_high[p+1],offset,trimpos,totalscore)); - offset -= 16; -#endif - debug14(if (startblocki_32 == endblocki_32) assert(answer == trimpos - 1)); - return (trimpos - 1); /* trimpos-1 is on side of mismatch */ - -#if defined(WORDS_BIGENDIAN) || !defined(HAVE_SSE2) -#else - } + return nmismatches; #endif -} - -static int -trim_left_substring_snps (Compress_T query_compress, Univcoord_T left, int pos5, int pos3, - bool plusp, int genestrand) { -#ifdef DEBUG14 - int answer; +#if defined(USE_WRAP_MISMATCH_POSITIONS) && defined(HAVE_SSSE3) + } else if (endblocki == startblocki + 12 && endcolumni < startcolumni) { + /* Wrap */ + enddiscard += (4 + endcolumni - startcolumni)*32; + diff_128 = (block_diff_snp_128_wrap_lo)(query_shifted,alt_ptr,ref_ptr,plusp,genestrand,query_unk_mismatch_p, + startcolumni); + diff_128 = clear_start_128(diff_128,startdiscard); + diff_128 = clear_end_128(diff_128,enddiscard); + + while (nonzero_p_128(diff_128)) { + mismatch_position = offset + (relpos = count_trailing_zeroes_128(diff_128)); + diff_128 = clear_lowbit_128(diff_128,relpos); + if (plusp == false) { + mismatch_position = (querylength - 1) - mismatch_position; + } + genomic[mismatch_position] = tolower(genomic[mismatch_position]); + nmismatches++; + } + debug5(printf("genomic = %s\n",genomic)); + return nmismatches; #endif - int startdiscard, enddiscard, offset; - Univcoord_T startblocki, endblocki, startblocki_32, endblocki_32; - Genomecomp_T *ref_ptr, *alt_ptr, *start; - Genomecomp_T *query_shifted; - UINT4 diff_32; - Genomediff_T diff; - int nshift; - int startcolumni, endcolumni; - int totalscore, bestscore, score; - int trimpos; - Genomecomp_T p; + } else { + /* Start block */ + diff_32 = (block_diff_snp_32)(query_shifted,alt_ptr,ref_ptr,plusp,genestrand,query_unk_mismatch_p); + diff_32 = clear_start_32(diff_32,startdiscard); - debug( - printf("\n\n"); - printf("Genome (in trim_left_substring_snps) from %u+%d to %u+%d:\n",left,pos5,left,pos3); - Genome_print_blocks_snp(ref_blocks,snp_blocks,left+pos5,left+pos3); - printf("\n"); - ); + while (nonzero_p_32(diff_32)) { + mismatch_position = offset + (relpos = count_trailing_zeroes_32(diff_32)); + diff_32 = clear_lowbit_32(diff_32,relpos); + if (plusp == false) { + mismatch_position = (querylength - 1) - mismatch_position; + } + genomic[mismatch_position] = tolower(genomic[mismatch_position]); + nmismatches++; + } + query_shifted += QUERY_NEXTCOL; ref_ptr += GENOME_NEXTCOL; alt_ptr += GENOME_NEXTCOL; + offset += 32; - startblocki = (left+pos5)/128U*12; - startcolumni = ((left+pos5) % 128) / 32; - startblocki_32 = startblocki + startcolumni; + /* Start row */ + while (++startcolumni < 4) { + diff_32 = (block_diff_snp_32)(query_shifted,alt_ptr,ref_ptr,plusp,genestrand,query_unk_mismatch_p); + + while (nonzero_p_32(diff_32)) { + mismatch_position = offset + (relpos = count_trailing_zeroes_32(diff_32)); + diff_32 = clear_lowbit_32(diff_32,relpos); + if (plusp == false) { + mismatch_position = (querylength - 1) - mismatch_position; + } + genomic[mismatch_position] = tolower(genomic[mismatch_position]); + nmismatches++; + } + + query_shifted += QUERY_NEXTCOL; ref_ptr += GENOME_NEXTCOL; alt_ptr += GENOME_NEXTCOL; + offset += 32; + } +#ifdef HAVE_SSE2 + query_shifted += QUERY_NEXTROW; +#endif + ref_ptr += GENOME_NEXTROW; alt_ptr += GENOME_NEXTROW; - endblocki = (left+pos3)/128U*12; - endcolumni = ((left+pos3) % 128) / 32; - endblocki_32 = endblocki + endcolumni; - debug(printf("left = %u, pos5 = %d, pos3 = %d, startblocki = %u, endblocki = %u\n", - left,pos5,pos3,startblocki,endblocki)); + /* Middle rows */ +#ifdef HAVE_AVX2 + while (ref_ptr + 24 <= endptr) { + diff_256 = (block_diff_snp_256)(query_shifted,alt_ptr,ref_ptr,plusp,genestrand,query_unk_mismatch_p); - nshift = left % STEP_SIZE; - query_shifted = Compress_shift(query_compress,nshift); - debug(printf("Query shifted %d:\n",nshift)); - debug(Compress_print_blocks(query_shifted,nshift,pos5,pos3)); - query_shifted += (nshift+pos3)/STEP_SIZE*COMPRESS_BLOCKSIZE; + while (nonzero_p_256(diff_256)) { + mismatch_position = offset + (relpos = count_trailing_zeroes_256(diff_256)); + diff_256 = clear_lowbit_256(diff_256,relpos); + if (plusp == false) { + mismatch_position = (querylength - 1) - mismatch_position; + } + genomic[mismatch_position] = tolower(genomic[mismatch_position]); + nmismatches++; + } + + query_shifted += 24; ref_ptr += 24; alt_ptr += 24; + offset += 256; + } +#endif - if (startblocki_32 == endblocki_32) { - startdiscard = (left+pos5) % 32; - enddiscard = (left+pos3) % 32; - offset = (pos3 - 1) - enddiscard + 32; - debug(printf("nshift = %d, startdiscard = %u, enddiscard = %u\n",nshift,startdiscard,enddiscard)); +#ifdef HAVE_SSE2 + while (ref_ptr + 12 <= endptr) { + diff_128 = (block_diff_snp_128)(query_shifted,alt_ptr,ref_ptr,plusp,genestrand,query_unk_mismatch_p); -#if defined(WORDS_BIGENDIAN) || !defined(HAVE_SSE2) - diff_32 = (block_diff_snp_32)(query_shifted,&(snp_blocks[endblocki_32]),&(ref_blocks[endblocki_32]), - plusp,genestrand,query_unk_mismatch_p); + while (nonzero_p_128(diff_128)) { + mismatch_position = offset + (relpos = count_trailing_zeroes_128(diff_128)); + diff_128 = clear_lowbit_128(diff_128,relpos); + if (plusp == false) { + mismatch_position = (querylength - 1) - mismatch_position; + } + genomic[mismatch_position] = tolower(genomic[mismatch_position]); + nmismatches++; + } + + query_shifted += 12; ref_ptr += 12; alt_ptr += 12; + offset += 128; + } #else - diff_32 = (block_diff_snp_32)(query_shifted + endcolumni,&(snp_blocks[endblocki_32]),&(ref_blocks[endblocki_32]), - plusp,genestrand,query_unk_mismatch_p); + while (ref_ptr + 12 <= endptr) { + for (startcolumni = 0; startcolumni < 4; startcolumni++) { + diff_32 = (block_diff_snp_32)(query_shifted,alt_ptr,ref_ptr,plusp,genestrand,query_unk_mismatch_p); + + while (nonzero_p_32(diff_32)) { + mismatch_position = offset + (relpos = count_trailing_zeroes_32(diff_32)); + diff_32 = clear_lowbit_32(diff_32,relpos); + if (plusp == false) { + mismatch_position = (querylength - 1) - mismatch_position; + } + genomic[mismatch_position] = tolower(genomic[mismatch_position]); + nmismatches++; + } + + query_shifted += QUERY_NEXTCOL; ref_ptr += GENOME_NEXTCOL; alt_ptr += GENOME_NEXTCOL; + offset += 32; + } + /* query_shifted += QUERY_NEXTROW; */ ref_ptr += GENOME_NEXTROW; alt_ptr += GENOME_NEXTROW; + } #endif - diff_32 = clear_end_32(diff_32,enddiscard); /* puts 0 (matches) at end */ - diff_32 = set_start_32(diff_32,startdiscard); /* puts 1 (mismatches) at start */ + /* End row */ + while (ref_ptr < endptr) { + diff_32 = (block_diff_snp_32)(query_shifted,alt_ptr,ref_ptr,plusp,genestrand,query_unk_mismatch_p); + while (nonzero_p_32(diff_32)) { + mismatch_position = offset + (relpos = count_trailing_zeroes_32(diff_32)); + diff_32 = clear_lowbit_32(diff_32,relpos); + if (plusp == false) { + mismatch_position = (querylength - 1) - mismatch_position; + } + genomic[mismatch_position] = tolower(genomic[mismatch_position]); + nmismatches++; + } + + query_shifted += QUERY_NEXTCOL; ref_ptr += GENOME_NEXTCOL; alt_ptr += GENOME_NEXTCOL; + offset += 32; + } - p = 3*(diff_32 >> 16); - bestscore = score_high[p]; - trimpos = offset - score_high[p+1]; - totalscore = score_high[p+2]; - debug(printf("diff high %04X => bestscore %d at pos %d, offset %d, trimpos %d, totalscore %d\n", - diff_32 >> 16,score_high[p],score_high[p+1],offset,trimpos,totalscore)); - offset -= 16; + /* End block */ + diff_32 = (block_diff_snp_32)(query_shifted,alt_ptr,ref_ptr,plusp,genestrand,query_unk_mismatch_p); + diff_32 = clear_end_32(diff_32,enddiscard); - p = 3*(diff_32 & 0x0000FFFF); - if ((score = score_high[p] + totalscore) > bestscore) { - bestscore = score; - trimpos = offset - score_high[p+1]; + while (nonzero_p_32(diff_32)) { + mismatch_position = offset + (relpos = count_trailing_zeroes_32(diff_32)); + diff_32 = clear_lowbit_32(diff_32,relpos); + if (plusp == false) { + mismatch_position = (querylength - 1) - mismatch_position; + } + genomic[mismatch_position] = tolower(genomic[mismatch_position]); + nmismatches++; } - /* totalscore += score_high[p+2]; */ - debug(printf("diff low %04X => bestscore %d at pos %d, offset %d, trimpos %d, totalscore %d\n", - diff_32 & 0x0000FFFF,score_high[p],score_high[p+1],offset,trimpos,totalscore)); - /* offset -= 16; */ - -#ifdef DEBUG14 - answer = (trimpos - 1); -#else - return (trimpos - 1); /* trimpos-1 is on side of mismatch */ -#endif - + return nmismatches; } -#ifndef DEBUG14 - else { -#endif +} -#if defined(WORDS_BIGENDIAN) || !defined(HAVE_SSE2) - startblocki = startblocki_32; - endblocki = endblocki_32; -#endif - startdiscard = (left+pos5) % STEP_SIZE; - enddiscard = (left+pos3) % STEP_SIZE; - offset = (pos3 - 1) - enddiscard + STEP_SIZE; - - debug(printf("nshift = %d, startdiscard = %u, enddiscard = %u, offset = %d\n", - nshift,startdiscard,enddiscard,offset)); -#ifndef DEBUG14 +int +Genome_mark_mismatches (char *genomic, int querylength, Compress_T query_compress, + Univcoord_T left, int pos5, int pos3, + bool plusp, int genestrand) { + +#if 0 + if (dibasep) { + fprintf(stderr,"Not implemented\n"); +#if 0 + debug5(printf("Dibase_mismatches_left from %u+%d to %u+%d:\n",left,pos5,left,pos3)); + + nmismatches = Dibase_mismatches_left(&(*mismatch_positions),&(*colordiffs),max_mismatches,query, + pos5,pos3,/*startpos*/left+pos5,/*endpos*/left+pos3); + mismatch_positions[nmismatches] = pos3 + 1; /* Need +1 because of starting assumed nt */ +#endif + return 0; } #endif + if (snp_blocks == NULL) { + return Genome_mark_mismatches_ref(&(*genomic),querylength,query_compress, + left,pos5,pos3,plusp,genestrand); + } else { + return mark_mismatches_snps(&(*genomic),querylength,query_compress, + left,pos5,pos3,plusp,genestrand); + } +} -#if defined(WORDS_BIGENDIAN) || !defined(HAVE_SSE2) -#else - if (startblocki == endblocki) { - diff = (block_diff_snp)(query_shifted,&(snp_blocks[endblocki]),&(ref_blocks[endblocki]), - plusp,genestrand,query_unk_mismatch_p); - diff = clear_end(diff,enddiscard); /* puts 0 (matches) at end */ - diff = set_start(diff,startdiscard); /* puts 1 (mismatches) at start */ +/************************************************************************ + * Trimming + ************************************************************************/ +static int +trim_left_substring (Compress_T query_compress, Univcoord_T left, int pos5, int pos3, + bool plusp, int genestrand) { + int startdiscard, enddiscard, offset; + Univcoord_T startblocki, endblocki, startblocki_32, endblocki_32; + Genomecomp_T *ptr, *startptr; + Genomecomp_T *query_shifted; + int nshift; + int startcolumni, endcolumni; + UINT4 diff_32; +#ifdef HAVE_SSE2 + __m128i diff_128; + int i; +#endif +#ifdef HAVE_AVX2 + __m256i diff_256; +#endif +#ifdef HAVE_AVX2 + unsigned short array[16]; +#elif defined(HAVE_SSE2) + unsigned short array[8]; +#endif - p = 3*((unsigned short) _mm_extract_epi16(diff,7)); - bestscore = score_high[p]; - trimpos = offset - score_high[p+1]; - totalscore = score_high[p+2]; - debug(printf("diff piecei %d %04X => bestscore %d at pos %d, offset %d, trimpos %d, totalscore %d\n", - 7,(unsigned short) _mm_extract_epi16(diff,7),score_high[p],score_high[p+1],offset,trimpos,totalscore)); - offset -= 16; - - p = 3*((unsigned short) _mm_extract_epi16(diff,6)); - if ((score = score_high[p] + totalscore) > bestscore) { - bestscore = score; - trimpos = offset - score_high[p+1]; - } - totalscore += score_high[p+2]; - debug(printf("diff piecei %d %04X => bestscore %d at pos %d, offset %d, trimpos %d, totalscore %d\n", - 6,(unsigned short) _mm_extract_epi16(diff,6),score_high[p],score_high[p+1],offset,trimpos,totalscore)); - offset -= 16; - p = 3*((unsigned short) _mm_extract_epi16(diff,5)); - if ((score = score_high[p] + totalscore) > bestscore) { - bestscore = score; - trimpos = offset - score_high[p+1]; - } - totalscore += score_high[p+2]; - debug(printf("diff piecei %d %04X => bestscore %d at pos %d, offset %d, trimpos %d, totalscore %d\n", - 5,(unsigned short) _mm_extract_epi16(diff,5),score_high[p],score_high[p+1],offset,trimpos,totalscore)); - offset -= 16; - p = 3*((unsigned short) _mm_extract_epi16(diff,4)); - if ((score = score_high[p] + totalscore) > bestscore) { - bestscore = score; - trimpos = offset - score_high[p+1]; - } - totalscore += score_high[p+2]; - debug(printf("diff piecei %d %04X => bestscore %d at pos %d, offset %d, trimpos %d, totalscore %d\n", - 4,(unsigned short) _mm_extract_epi16(diff,4),score_high[p],score_high[p+1],offset,trimpos,totalscore)); - offset -= 16; + int totalscore, bestscore, score; + int trimpos; + Genomecomp_T p; - p = 3*((unsigned short) _mm_extract_epi16(diff,3)); - if ((score = score_high[p] + totalscore) > bestscore) { - bestscore = score; - trimpos = offset - score_high[p+1]; - } - totalscore += score_high[p+2]; - debug(printf("diff piecei %d %04X => bestscore %d at pos %d, offset %d, trimpos %d, totalscore %d\n", - 3,(unsigned short) _mm_extract_epi16(diff,3),score_high[p],score_high[p+1],offset,trimpos,totalscore)); - offset -= 16; + debug( + printf("\n\n"); + printf("Genome (in trim_left_substring) from %u+%d to %u+%d:\n",left,pos5,left,pos3); + Genome_print_blocks(ref_blocks,left+pos5,left+pos3); + printf("\n"); + ); - p = 3*((unsigned short) _mm_extract_epi16(diff,2)); - if ((score = score_high[p] + totalscore) > bestscore) { - bestscore = score; - trimpos = offset - score_high[p+1]; - } - totalscore += score_high[p+2]; - debug(printf("diff piecei %d %04X => bestscore %d at pos %d, offset %d, trimpos %d, totalscore %d\n", - 2,(unsigned short) _mm_extract_epi16(diff,2),score_high[p],score_high[p+1],offset,trimpos,totalscore)); - offset -= 16; - p = 3*((unsigned short) _mm_extract_epi16(diff,1)); - if ((score = score_high[p] + totalscore) > bestscore) { - bestscore = score; - trimpos = offset - score_high[p+1]; - } - totalscore += score_high[p+2]; - debug(printf("diff piecei %d %04X => bestscore %d at pos %d, offset %d, trimpos %d, totalscore %d\n", - 1,(unsigned short) _mm_extract_epi16(diff,1),score_high[p],score_high[p+1],offset,trimpos,totalscore)); - offset -= 16; + startblocki = (left+pos5)/128U*12; + startcolumni = ((left+pos5) % 128) / 32; + startblocki_32 = startblocki + startcolumni; - p = 3*((unsigned short) _mm_extract_epi16(diff,0)); - if ((score = score_high[p] + totalscore) > bestscore) { - bestscore = score; - trimpos = offset - score_high[p+1]; - } - totalscore += score_high[p+2]; - debug(printf("diff piecei %d %04X => bestscore %d at pos %d, offset %d, trimpos %d, totalscore %d\n", - 0,(unsigned short) _mm_extract_epi16(diff,0),score_high[p],score_high[p+1],offset,trimpos,totalscore)); - offset -= 16; + endblocki = (left+pos3)/128U*12; + endcolumni = ((left+pos3) % 128) / 32; + endblocki_32 = endblocki + endcolumni; - debug14(if (startblocki_32 == endblocki_32) assert(answer == trimpos - 1)); - return trimpos - 1; /* trimpos-1 is on side of mismatch */ + debug(printf("left = %u, pos5 = %d, pos3 = %d, startblocki = %u, endblocki = %u\n", + left,pos5,pos3,startblocki,endblocki)); - } else { + nshift = left % STEP_SIZE; + query_shifted = Compress_shift(query_compress,nshift); + debug(printf("Query shifted %d:\n",nshift)); + debug(Compress_print_blocks(query_shifted,nshift,pos5,pos3)); + query_shifted += (nshift+pos3)/STEP_SIZE*COMPRESS_BLOCKSIZE; +#if defined(WORDS_BIGENDIAN) || !defined(HAVE_SSE2) +#else + query_shifted += endcolumni; #endif - /* Endblock */ - diff = (block_diff_snp)(query_shifted,&(snp_blocks[endblocki]),&(ref_blocks[endblocki]), - plusp,genestrand,query_unk_mismatch_p); - diff = clear_end(diff,enddiscard); /* puts 0 (matches) at end */ + startdiscard = (left+pos5) % 32; + enddiscard = (left+pos3) % 32; + offset = (pos3 - 1) - enddiscard + 32; + ptr = &(ref_blocks[endblocki_32]); + startptr = &(ref_blocks[startblocki_32]); + if (startblocki_32 == endblocki_32) { + /* Single block */ + debug(printf("nshift = %d, startdiscard = %u, enddiscard = %u\n",nshift,startdiscard,enddiscard)); -#if defined(WORDS_BIGENDIAN) || !defined(HAVE_SSE2) - p = 3*(diff >> 16); + diff_32 = (block_diff_32)(query_shifted,ptr,plusp,genestrand,query_unk_mismatch_p); + diff_32 = clear_end_32(diff_32,enddiscard); + diff_32 = set_start_32(diff_32,startdiscard); /* puts 1 (mismatches) at start */ + + p = 3*(diff_32 >> 16); bestscore = score_high[p]; trimpos = offset - score_high[p+1]; totalscore = score_high[p+2]; debug(printf("diff high %04X => bestscore %d at pos %d, offset %d, trimpos %d, totalscore %d\n", - diff >> 16,score_high[p],score_high[p+1],offset,trimpos,totalscore)); + diff_32 >> 16,score_high[p],score_high[p+1],offset,trimpos,totalscore)); offset -= 16; - p = 3*(diff & 0x0000FFFF); + p = 3*(diff_32 & 0x0000FFFF); if ((score = score_high[p] + totalscore) > bestscore) { bestscore = score; trimpos = offset - score_high[p+1]; } - totalscore += score_high[p+2]; + /* totalscore += score_high[p+2]; */ debug(printf("diff low %04X => bestscore %d at pos %d, offset %d, trimpos %d, totalscore %d\n", - diff & 0x0000FFFF,score_high[p],score_high[p+1],offset,trimpos,totalscore)); - offset -= 16; - -#else - p = 3*((unsigned short) _mm_extract_epi16(diff,7)); - bestscore = score_high[p]; - trimpos = offset - score_high[p+1]; - totalscore = score_high[p+2]; - debug(printf("diff piece %d %04X => bestscore %d at pos %d, offset %d, trimpos %d, totalscore %d\n", - 7,(unsigned short) _mm_extract_epi16(diff,7),score_high[p],score_high[p+1],offset,trimpos,totalscore)); - offset -= 16; - - p = 3*((unsigned short) _mm_extract_epi16(diff,6)); - if ((score = score_high[p] + totalscore) > bestscore) { - bestscore = score; - trimpos = offset - score_high[p+1]; - } - totalscore += score_high[p+2]; - debug(printf("diff piece %d %04X => bestscore %d at pos %d, offset %d, trimpos %d, totalscore %d\n", - 6,(unsigned short) _mm_extract_epi16(diff,6),score_high[p],score_high[p+1],offset,trimpos,totalscore)); - offset -= 16; + diff_32 & 0x0000FFFF,score_high[p],score_high[p+1],offset,trimpos,totalscore)); + /* offset -= 16; */ - p = 3*((unsigned short) _mm_extract_epi16(diff,5)); - if ((score = score_high[p] + totalscore) > bestscore) { - bestscore = score; - trimpos = offset - score_high[p+1]; - } - totalscore += score_high[p+2]; - debug(printf("diff piece %d %04X => bestscore %d at pos %d, offset %d, trimpos %d, totalscore %d\n", - 5,(unsigned short) _mm_extract_epi16(diff,5),score_high[p],score_high[p+1],offset,trimpos,totalscore)); - offset -= 16; + return (trimpos - 1); /* trimpos-1 is on side of mismatch */ - p = 3*((unsigned short) _mm_extract_epi16(diff,4)); - if ((score = score_high[p] + totalscore) > bestscore) { - bestscore = score; - trimpos = offset - score_high[p+1]; + } else if (startblocki == endblocki) { +#if defined(USE_SHIFT_TRIM) && defined(HAVE_SSE2) + /* Shift */ + startdiscard += 96 - (endcolumni - startcolumni)*32; + enddiscard += 96; + diff_128 = (block_diff_128_shift_hi)(query_shifted,ptr,plusp,genestrand,query_unk_mismatch_p, + endcolumni); + diff_128 = clear_end_128(diff_128,enddiscard); + diff_128 = set_start_128(diff_128,startdiscard); /* puts 1 (mismatches) at start */ + _mm_store_si128((__m128i *) array,diff_128); + + bestscore = -100; + for (i = 7; i >= 0; --i) { + p = 3*array[i]; + if ((score = score_high[p] + totalscore) > bestscore) { + bestscore = score; + trimpos = offset - score_high[p+1]; + } + totalscore += score_high[p+2]; + debug(printf("diff piece %d %04X => bestscore %d at pos %d, offset %d, trimpos %d, totalscore %d\n", + i,array[i],score_high[p],score_high[p+1],offset,trimpos,totalscore)); + offset -= 16; } - totalscore += score_high[p+2]; - debug(printf("diff piece %d %04X => bestscore %d at pos %d, offset %d, trimpos %d, totalscore %d\n", - 4,(unsigned short) _mm_extract_epi16(diff,4),score_high[p],score_high[p+1],offset,trimpos,totalscore)); - offset -= 16; - p = 3*((unsigned short) _mm_extract_epi16(diff,3)); - if ((score = score_high[p] + totalscore) > bestscore) { - bestscore = score; - trimpos = offset - score_high[p+1]; - } - totalscore += score_high[p+2]; - debug(printf("diff piece %d %04X => bestscore %d at pos %d, offset %d, trimpos %d, totalscore %d\n", - 3,(unsigned short) _mm_extract_epi16(diff,3),score_high[p],score_high[p+1],offset,trimpos,totalscore)); - offset -= 16; + return (trimpos - 1); /* trimpos-1 is on side of mismatch */ - p = 3*((unsigned short) _mm_extract_epi16(diff,2)); - if ((score = score_high[p] + totalscore) > bestscore) { - bestscore = score; - trimpos = offset - score_high[p+1]; - } - totalscore += score_high[p+2]; - debug(printf("diff piece %d %04X => bestscore %d at pos %d, offset %d, trimpos %d, totalscore %d\n", - 2,(unsigned short) _mm_extract_epi16(diff,2),score_high[p],score_high[p+1],offset,trimpos,totalscore)); - offset -= 16; +#else + /* End block */ + diff_32 = (block_diff_32)(query_shifted,ptr,plusp,genestrand,query_unk_mismatch_p); + diff_32 = clear_end_32(diff_32,enddiscard); - p = 3*((unsigned short) _mm_extract_epi16(diff,1)); - if ((score = score_high[p] + totalscore) > bestscore) { - bestscore = score; - trimpos = offset - score_high[p+1]; - } - totalscore += score_high[p+2]; - debug(printf("diff piece %d %04X => bestscore %d at pos %d, offset %d, trimpos %d, totalscore %d\n", - 1,(unsigned short) _mm_extract_epi16(diff,1),score_high[p],score_high[p+1],offset,trimpos,totalscore)); + p = 3*(diff_32 >> 16); + bestscore = score_high[p]; + trimpos = offset - score_high[p+1]; + totalscore = score_high[p+2]; + debug(printf("diff high %04X => bestscore %d at pos %d, offset %d, trimpos %d, totalscore %d\n", + diff_32 >> 16,score_high[p],score_high[p+1],offset,trimpos,totalscore)); offset -= 16; - p = 3*((unsigned short) _mm_extract_epi16(diff,0)); + p = 3*(diff_32 & 0x0000FFFF); if ((score = score_high[p] + totalscore) > bestscore) { bestscore = score; trimpos = offset - score_high[p+1]; } totalscore += score_high[p+2]; - debug(printf("diff piece %d %04X => bestscore %d at pos %d, offset %d, trimpos %d, totalscore %d\n", - 0,(unsigned short) _mm_extract_epi16(diff,0),score_high[p],score_high[p+1],offset,trimpos,totalscore)); + debug(printf("diff low %04X => bestscore %d at pos %d, offset %d, trimpos %d, totalscore %d\n", + diff_32 & 0x0000FFFF,score_high[p],score_high[p+1],offset,trimpos,totalscore)); offset -= 16; -#endif - - query_shifted -= COMPRESS_BLOCKSIZE; -#if defined(WORDS_BIGENDIAN) || !defined(HAVE_SSE2) - ref_ptr = &(ref_blocks[endblocki]); - alt_ptr = &(snp_blocks[endblocki]); - ref_ptr -= 1; alt_ptr -= 1; if (endcolumni-- == 0) {ref_ptr -= 8; alt_ptr -= 8; endcolumni = 3;} -#else - ref_ptr = &(ref_blocks[endblocki-12]); - alt_ptr = &(snp_blocks[endblocki-12]); -#endif - start = &(ref_blocks[startblocki]); - while (ref_ptr > start) { - diff = (block_diff_snp)(query_shifted,alt_ptr,ref_ptr,plusp,genestrand,query_unk_mismatch_p); + query_shifted -= QUERY_NEXTCOL; ptr -= GENOME_NEXTCOL; + /* Single row */ + while (--endcolumni > startcolumni) { + diff_32 = (block_diff_32)(query_shifted,ptr,plusp,genestrand,query_unk_mismatch_p); -#if defined(WORDS_BIGENDIAN) || !defined(HAVE_SSE2) - p = 3*(diff >> 16); + p = 3*(diff_32 >> 16); if ((score = score_high[p] + totalscore) > bestscore) { bestscore = score; trimpos = offset - score_high[p+1]; } totalscore += score_high[p+2]; debug(printf("diff high %04X => bestscore %d at pos %d, offset %d, trimpos %d, totalscore %d\n", - diff >> 16,score_high[p],score_high[p+1],offset,trimpos,totalscore)); + diff_32 >> 16,score_high[p],score_high[p+1],offset,trimpos,totalscore)); offset -= 16; - - p = 3*(diff & 0x0000FFFF); + + p = 3*(diff_32 & 0x0000FFFF); if ((score = score_high[p] + totalscore) > bestscore) { bestscore = score; trimpos = offset - score_high[p+1]; } totalscore += score_high[p+2]; debug(printf("diff low %04X => bestscore %d at pos %d, offset %d, trimpos %d, totalscore %d\n", - diff & 0x0000FFFF,score_high[p],score_high[p+1],offset,trimpos,totalscore)); - offset -= 16; - -#else - p = 3*((unsigned short) _mm_extract_epi16(diff,7)); - if ((score = score_high[p] + totalscore) > bestscore) { - bestscore = score; - trimpos = offset - score_high[p+1]; - } - totalscore += score_high[p+2]; - debug(printf("diff piece %d %04X => bestscore %d at pos %d, offset %d, trimpos %d, totalscore %d\n", - 7,(unsigned short) _mm_extract_epi16(diff,7),score_high[p],score_high[p+1],offset,trimpos,totalscore)); - offset -= 16; - - p = 3*((unsigned short) _mm_extract_epi16(diff,6)); - if ((score = score_high[p] + totalscore) > bestscore) { - bestscore = score; - trimpos = offset - score_high[p+1]; - } - totalscore += score_high[p+2]; - debug(printf("diff piece %d %04X => bestscore %d at pos %d, offset %d, trimpos %d, totalscore %d\n", - 6,(unsigned short) _mm_extract_epi16(diff,6),score_high[p],score_high[p+1],offset,trimpos,totalscore)); - offset -= 16; - - p = 3*((unsigned short) _mm_extract_epi16(diff,5)); - if ((score = score_high[p] + totalscore) > bestscore) { - bestscore = score; - trimpos = offset - score_high[p+1]; - } - totalscore += score_high[p+2]; - debug(printf("diff piece %d %04X => bestscore %d at pos %d, offset %d, trimpos %d, totalscore %d\n", - 5,(unsigned short) _mm_extract_epi16(diff,5),score_high[p],score_high[p+1],offset,trimpos,totalscore)); - offset -= 16; - - p = 3*((unsigned short) _mm_extract_epi16(diff,4)); - if ((score = score_high[p] + totalscore) > bestscore) { - bestscore = score; - trimpos = offset - score_high[p+1]; - } - totalscore += score_high[p+2]; - debug(printf("diff piece %d %04X => bestscore %d at pos %d, offset %d, trimpos %d, totalscore %d\n", - 4,(unsigned short) _mm_extract_epi16(diff,4),score_high[p],score_high[p+1],offset,trimpos,totalscore)); - offset -= 16; - - p = 3*((unsigned short) _mm_extract_epi16(diff,3)); - if ((score = score_high[p] + totalscore) > bestscore) { - bestscore = score; - trimpos = offset - score_high[p+1]; - } - totalscore += score_high[p+2]; - debug(printf("diff piece %d %04X => bestscore %d at pos %d, offset %d, trimpos %d, totalscore %d\n", - 3,(unsigned short) _mm_extract_epi16(diff,3),score_high[p],score_high[p+1],offset,trimpos,totalscore)); - offset -= 16; - - p = 3*((unsigned short) _mm_extract_epi16(diff,2)); - if ((score = score_high[p] + totalscore) > bestscore) { - bestscore = score; - trimpos = offset - score_high[p+1]; - } - totalscore += score_high[p+2]; - debug(printf("diff piece %d %04X => bestscore %d at pos %d, offset %d, trimpos %d, totalscore %d\n", - 2,(unsigned short) _mm_extract_epi16(diff,2),score_high[p],score_high[p+1],offset,trimpos,totalscore)); - offset -= 16; - - p = 3*((unsigned short) _mm_extract_epi16(diff,1)); - if ((score = score_high[p] + totalscore) > bestscore) { - bestscore = score; - trimpos = offset - score_high[p+1]; - } - totalscore += score_high[p+2]; - debug(printf("diff piece %d %04X => bestscore %d at pos %d, offset %d, trimpos %d, totalscore %d\n", - 1,(unsigned short) _mm_extract_epi16(diff,1),score_high[p],score_high[p+1],offset,trimpos,totalscore)); - offset -= 16; - - p = 3*((unsigned short) _mm_extract_epi16(diff,0)); - if ((score = score_high[p] + totalscore) > bestscore) { - bestscore = score; - trimpos = offset - score_high[p+1]; - } - totalscore += score_high[p+2]; - debug(printf("diff piece %d %04X => bestscore %d at pos %d, offset %d, trimpos %d, totalscore %d\n", - 0,(unsigned short) _mm_extract_epi16(diff,0),score_high[p],score_high[p+1],offset,trimpos,totalscore)); + diff_32 & 0x0000FFFF,score_high[p],score_high[p+1],offset,trimpos,totalscore)); offset -= 16; -#endif - - query_shifted -= COMPRESS_BLOCKSIZE; -#if defined(WORDS_BIGENDIAN) || !defined(HAVE_SSE2) - ref_ptr -= 1; alt_ptr -= 1; if (endcolumni-- == 0) {ref_ptr -= 8; alt_ptr -= 8; endcolumni = 3;} -#else - ref_ptr -= 12; alt_ptr -= 12; -#endif + query_shifted -= QUERY_NEXTCOL; ptr -= GENOME_NEXTCOL; } - /* Startblock */ - diff = (block_diff_snp)(query_shifted,alt_ptr,ref_ptr,plusp,genestrand,query_unk_mismatch_p); - - diff = set_start(diff,startdiscard); /* puts 1 (mismatches) at start */ - + /* Start block */ + diff_32 = (block_diff_32)(query_shifted,ptr,plusp,genestrand,query_unk_mismatch_p); + diff_32 = set_start_32(diff_32,startdiscard); /* puts 1 (mismatches) at start */ -#if defined(WORDS_BIGENDIAN) || !defined(HAVE_SSE2) - p = 3*(diff >> 16); + p = 3*(diff_32 >> 16); if ((score = score_high[p] + totalscore) > bestscore) { bestscore = score; trimpos = offset - score_high[p+1]; } totalscore += score_high[p+2]; debug(printf("diff high %04X => bestscore %d at pos %d, offset %d, trimpos %d, totalscore %d\n", - diff >> 16,score_high[p],score_high[p+1],offset,trimpos,totalscore)); + diff_32 >> 16,score_high[p],score_high[p+1],offset,trimpos,totalscore)); offset -= 16; - - p = 3*(diff & 0x0000FFFF); + + p = 3*(diff_32 & 0x0000FFFF); if ((score = score_high[p] + totalscore) > bestscore) { bestscore = score; trimpos = offset - score_high[p+1]; } /* totalscore += score_high[p+2]; */ debug(printf("diff low %04X => bestscore %d at pos %d, offset %d, trimpos %d, totalscore %d\n", - diff & 0x0000FFFF,score_high[p],score_high[p+1],offset,trimpos,totalscore)); + diff_32 & 0x0000FFFF,score_high[p],score_high[p+1],offset,trimpos,totalscore)); /* offset -= 16; */ + + return (trimpos - 1); /* trimpos-1 is on side of mismatch */ +#endif -#else - p = 3*((unsigned short) _mm_extract_epi16(diff,7)); - if ((score = score_high[p] + totalscore) > bestscore) { - bestscore = score; - trimpos = offset - score_high[p+1]; +#if defined(USE_WRAP_TRIM) && defined(HAVE_SSSE3) + } else if (endblocki == startblocki + 12 && endcolumni < startcolumni) { + /* Wrap */ + startdiscard += (startcolumni - endcolumni - 1)*32; + enddiscard += 96; + diff_128 = (block_diff_128_wrap_hi)(query_shifted,ptr,plusp,genestrand,query_unk_mismatch_p, + endcolumni); + diff_128 = clear_end_128(diff_128,enddiscard); + diff_128 = set_start_128(diff_128,startdiscard); /* puts 1 (mismatches) at start */ + _mm_store_si128((__m128i *) array,diff_128); + + bestscore = -100; + for (i = 7; i >= 0; --i) { + p = 3*array[i]; + if ((score = score_high[p] + totalscore) > bestscore) { + bestscore = score; + trimpos = offset - score_high[p+1]; + } + totalscore += score_high[p+2]; + debug(printf("diff piece %d %04X => bestscore %d at pos %d, offset %d, trimpos %d, totalscore %d\n", + i,array[i],score_high[p],score_high[p+1],offset,trimpos,totalscore)); + offset -= 16; } - totalscore += score_high[p+2]; - debug(printf("diff piece %d %04X => bestscore %d at pos %d, offset %d, trimpos %d, totalscore %d\n", - 7,(unsigned short) _mm_extract_epi16(diff,7),score_high[p],score_high[p+1],offset,trimpos,totalscore)); + + return (trimpos - 1); /* trimpos-1 is on side of mismatch */ +#endif + + } else { + /* End block */ + diff_32 = (block_diff_32)(query_shifted,ptr,plusp,genestrand,query_unk_mismatch_p); + diff_32 = clear_end_32(diff_32,enddiscard); + + p = 3*(diff_32 >> 16); + bestscore = score_high[p]; + trimpos = offset - score_high[p+1]; + totalscore = score_high[p+2]; + debug(printf("diff high %04X => bestscore %d at pos %d, offset %d, trimpos %d, totalscore %d\n", + diff_32 >> 16,score_high[p],score_high[p+1],offset,trimpos,totalscore)); offset -= 16; - p = 3*((unsigned short) _mm_extract_epi16(diff,6)); + p = 3*(diff_32 & 0x0000FFFF); if ((score = score_high[p] + totalscore) > bestscore) { bestscore = score; trimpos = offset - score_high[p+1]; } - totalscore += score_high[p+2]; - debug(printf("diff piece %d %04X => bestscore %d at pos %d, offset %d, trimpos %d, totalscore %d\n", - 6,(unsigned short) _mm_extract_epi16(diff,6),score_high[p],score_high[p+1],offset,trimpos,totalscore)); + /* totalscore += score_high[p+2]; */ + debug(printf("diff low %04X => bestscore %d at pos %d, offset %d, trimpos %d, totalscore %d\n", + diff_32 & 0x0000FFFF,score_high[p],score_high[p+1],offset,trimpos,totalscore)); offset -= 16; + query_shifted += QUERY_NEXTCOL; ptr += GENOME_NEXTCOL; - p = 3*((unsigned short) _mm_extract_epi16(diff,5)); - if ((score = score_high[p] + totalscore) > bestscore) { - bestscore = score; - trimpos = offset - score_high[p+1]; + /* End row */ + while (--endcolumni >= 0) { + diff_32 = (block_diff_32)(query_shifted,ptr,plusp,genestrand,query_unk_mismatch_p); + + p = 3*(diff_32 >> 16); + if ((score = score_high[p] + totalscore) > bestscore) { + bestscore = score; + trimpos = offset - score_high[p+1]; + } + totalscore = score_high[p+2]; + debug(printf("diff high %04X => bestscore %d at pos %d, offset %d, trimpos %d, totalscore %d\n", + diff_32 >> 16,score_high[p],score_high[p+1],offset,trimpos,totalscore)); + offset -= 16; + + p = 3*(diff_32 & 0x0000FFFF); + if ((score = score_high[p] + totalscore) > bestscore) { + bestscore = score; + trimpos = offset - score_high[p+1]; + } + totalscore += score_high[p+2]; + debug(printf("diff low %04X => bestscore %d at pos %d, offset %d, trimpos %d, totalscore %d\n", + diff_32 & 0x0000FFFF,score_high[p],score_high[p+1],offset,trimpos,totalscore)); + offset -= 16; + query_shifted -= QUERY_NEXTCOL; ptr -= GENOME_NEXTCOL; } - totalscore += score_high[p+2]; - debug(printf("diff piece %d %04X => bestscore %d at pos %d, offset %d, trimpos %d, totalscore %d\n", - 5,(unsigned short) _mm_extract_epi16(diff,5),score_high[p],score_high[p+1],offset,trimpos,totalscore)); - offset -= 16; +#ifdef HAVE_SSE2 + query_shifted -= QUERY_NEXTROW; +#endif + ptr -= GENOME_NEXTROW; - p = 3*((unsigned short) _mm_extract_epi16(diff,4)); - if ((score = score_high[p] + totalscore) > bestscore) { - bestscore = score; - trimpos = offset - score_high[p+1]; + + /* Middle rows */ +#ifdef HAVE_AVX2 + while (ptr >= startptr + 24) { + diff_256 = (block_diff_256)(&(query_shifted[-15]),&(ptr[-15]),plusp,genestrand,query_unk_mismatch_p); + _mm256_store_si256((__m256i *) array,diff_256); + + for (i = 15; i >= 0; --i) { + p = 3*array[i]; + if ((score = score_high[p] + totalscore) > bestscore) { + bestscore = score; + trimpos = offset - score_high[p+1]; + } + totalscore += score_high[p+2]; + debug(printf("diff piece %d %04X => bestscore %d at pos %d, offset %d, trimpos %d, totalscore %d\n", + i,array[i],score_high[p],score_high[p+1],offset,trimpos,totalscore)); + offset -= 16; + } + query_shifted -= 24; ptr -= 24; } - totalscore += score_high[p+2]; - debug(printf("diff piece %d %04X => bestscore %d at pos %d, offset %d, trimpos %d, totalscore %d\n", - 4,(unsigned short) _mm_extract_epi16(diff,4),score_high[p],score_high[p+1],offset,trimpos,totalscore)); - offset -= 16; +#endif - p = 3*((unsigned short) _mm_extract_epi16(diff,3)); - if ((score = score_high[p] + totalscore) > bestscore) { - bestscore = score; - trimpos = offset - score_high[p+1]; +#ifdef HAVE_SSE2 + while (ptr >= startptr + 12) { + diff_128 = (block_diff_128)(&(query_shifted[-3]),&(ptr[-3]),plusp,genestrand,query_unk_mismatch_p); + _mm_store_si128((__m128i *) array,diff_128); + + for (i = 7; i >= 0; --i) { + p = 3*array[i]; + if ((score = score_high[p] + totalscore) > bestscore) { + bestscore = score; + trimpos = offset - score_high[p+1]; + } + totalscore += score_high[p+2]; + debug(printf("diff piece %d %04X => bestscore %d at pos %d, offset %d, trimpos %d, totalscore %d\n", + i,array[i],score_high[p],score_high[p+1],offset,trimpos,totalscore)); + offset -= 16; + } + query_shifted -= 12; ptr -= 12; + } +#else + while (ptr >= startptr + 12) { + for (endcolumni = 3; endcolumni >= 0; --endcolumni) { + diff_32 = (block_diff_32)(query_shifted,ptr,plusp,genestrand,query_unk_mismatch_p); + + p = 3*(diff_32 >> 16); + if ((score = score_high[p] + totalscore) > bestscore) { + bestscore = score; + trimpos = offset - score_high[p+1]; + } + totalscore = score_high[p+2]; + debug(printf("diff high %04X => bestscore %d at pos %d, offset %d, trimpos %d, totalscore %d\n", + diff_32 >> 16,score_high[p],score_high[p+1],offset,trimpos,totalscore)); + offset -= 16; + + p = 3*(diff_32 & 0x0000FFFF); + if ((score = score_high[p] + totalscore) > bestscore) { + bestscore = score; + trimpos = offset - score_high[p+1]; + } + totalscore += score_high[p+2]; + debug(printf("diff low %04X => bestscore %d at pos %d, offset %d, trimpos %d, totalscore %d\n", + diff_32 & 0x0000FFFF,score_high[p],score_high[p+1],offset,trimpos,totalscore)); + offset -= 16; + query_shifted -= QUERY_NEXTCOL; ptr -= GENOME_NEXTCOL; + } + /* query_shifted -= QUERY_NEXTROW; */ ptr -= GENOME_NEXTROW; } - totalscore += score_high[p+2]; - debug(printf("diff piece %d %04X => bestscore %d at pos %d, offset %d, trimpos %d, totalscore %d\n", - 3,(unsigned short) _mm_extract_epi16(diff,3),score_high[p],score_high[p+1],offset,trimpos,totalscore)); - offset -= 16; +#endif - p = 3*((unsigned short) _mm_extract_epi16(diff,2)); - if ((score = score_high[p] + totalscore) > bestscore) { - bestscore = score; - trimpos = offset - score_high[p+1]; + /* Start row */ + while (ptr > startptr) { + diff_32 = (block_diff_32)(query_shifted,ptr,plusp,genestrand,query_unk_mismatch_p); + + p = 3*(diff_32 >> 16); + if ((score = score_high[p] + totalscore) > bestscore) { + bestscore = score; + trimpos = offset - score_high[p+1]; + } + totalscore = score_high[p+2]; + debug(printf("diff high %04X => bestscore %d at pos %d, offset %d, trimpos %d, totalscore %d\n", + diff_32 >> 16,score_high[p],score_high[p+1],offset,trimpos,totalscore)); + offset -= 16; + + p = 3*(diff_32 & 0x0000FFFF); + if ((score = score_high[p] + totalscore) > bestscore) { + bestscore = score; + trimpos = offset - score_high[p+1]; + } + totalscore += score_high[p+2]; + debug(printf("diff low %04X => bestscore %d at pos %d, offset %d, trimpos %d, totalscore %d\n", + diff_32 & 0x0000FFFF,score_high[p],score_high[p+1],offset,trimpos,totalscore)); + offset -= 16; + query_shifted -= QUERY_NEXTCOL; ptr -= GENOME_NEXTCOL; } - totalscore += score_high[p+2]; - debug(printf("diff piece %d %04X => bestscore %d at pos %d, offset %d, trimpos %d, totalscore %d\n", - 2,(unsigned short) _mm_extract_epi16(diff,2),score_high[p],score_high[p+1],offset,trimpos,totalscore)); - offset -= 16; - p = 3*((unsigned short) _mm_extract_epi16(diff,1)); + /* Start block */ + diff_32 = (block_diff_32)(query_shifted,ptr,plusp,genestrand,query_unk_mismatch_p); + diff_32 = set_start_32(diff_32,startdiscard); /* puts 1 (mismatches) at start */ + + p = 3*(diff_32 >> 16); if ((score = score_high[p] + totalscore) > bestscore) { bestscore = score; trimpos = offset - score_high[p+1]; } totalscore += score_high[p+2]; - debug(printf("diff piece %d %04X => bestscore %d at pos %d, offset %d, trimpos %d, totalscore %d\n", - 1,(unsigned short) _mm_extract_epi16(diff,1),score_high[p],score_high[p+1],offset,trimpos,totalscore)); + debug(printf("diff high %04X => bestscore %d at pos %d, offset %d, trimpos %d, totalscore %d\n", + diff_32 >> 16,score_high[p],score_high[p+1],offset,trimpos,totalscore)); offset -= 16; - - p = 3*((unsigned short) _mm_extract_epi16(diff,0)); + + p = 3*(diff_32 & 0x0000FFFF); if ((score = score_high[p] + totalscore) > bestscore) { bestscore = score; trimpos = offset - score_high[p+1]; } - totalscore += score_high[p+2]; - debug(printf("diff piece %d %04X => bestscore %d at pos %d, offset %d, trimpos %d, totalscore %d\n", - 0,(unsigned short) _mm_extract_epi16(diff,0),score_high[p],score_high[p+1],offset,trimpos,totalscore)); - offset -= 16; -#endif - - debug14(if (startblocki_32 == endblocki_32) assert(answer == trimpos - 1)); + /* totalscore += score_high[p+2]; */ + debug(printf("diff low %04X => bestscore %d at pos %d, offset %d, trimpos %d, totalscore %d\n", + diff_32 & 0x0000FFFF,score_high[p],score_high[p+1],offset,trimpos,totalscore)); + /* offset -= 16; */ + return (trimpos - 1); /* trimpos-1 is on side of mismatch */ - -#if defined(WORDS_BIGENDIAN) || !defined(HAVE_SSE2) -#else } -#endif } - static int -trim_right_substring (Compress_T query_compress, Univcoord_T left, int pos5, int pos3, - bool plusp, int genestrand) { -#ifdef DEBUG14 - int answer; -#endif +trim_left_substring_snps (Compress_T query_compress, Univcoord_T left, int pos5, int pos3, + bool plusp, int genestrand) { int startdiscard, enddiscard, offset; Univcoord_T startblocki, endblocki, startblocki_32, endblocki_32; - Genomecomp_T *ptr, *end; + Genomecomp_T *ref_ptr, *alt_ptr, *startptr; Genomecomp_T *query_shifted; - UINT4 diff_32; - Genomediff_T diff; int nshift; int startcolumni, endcolumni; + UINT4 diff_32; +#ifdef HAVE_SSE2 + __m128i diff_128; + int i; +#endif +#ifdef HAVE_AVX2 + __m256i diff_256; +#endif +#ifdef HAVE_AVX2 + unsigned short array[16]; +#elif defined(HAVE_SSE2) + unsigned short array[8]; +#endif int totalscore, bestscore, score; int trimpos; @@ -23066,7 +25766,7 @@ debug( printf("\n\n"); - printf("Genome (in trim_right_substring) from %u+%d to %u+%d:\n",left,pos5,left,pos3); + printf("Genome (in trim_left_substring_snps) from %u+%d to %u+%d:\n",left,pos5,left,pos3); Genome_print_blocks(ref_blocks,left+pos5,left+pos3); printf("\n"); ); @@ -23087,525 +25787,378 @@ query_shifted = Compress_shift(query_compress,nshift); debug(printf("Query shifted %d:\n",nshift)); debug(Compress_print_blocks(query_shifted,nshift,pos5,pos3)); - query_shifted += (nshift+pos5)/STEP_SIZE*COMPRESS_BLOCKSIZE; - - if (endblocki_32 == startblocki_32) { - startdiscard = (left+pos5) % 32; - enddiscard = (left+pos3) % 32; - offset = -startdiscard + pos5; - debug(printf("nshift = %d, startdiscard = %u, enddiscard = %u\n",nshift,startdiscard,enddiscard)); - + query_shifted += (nshift+pos3)/STEP_SIZE*COMPRESS_BLOCKSIZE; #if defined(WORDS_BIGENDIAN) || !defined(HAVE_SSE2) - diff_32 = (block_diff_32)(query_shifted,&(ref_blocks[startblocki_32]), - plusp,genestrand,query_unk_mismatch_p); -#else - diff_32 = (block_diff_32)(query_shifted + startcolumni,&(ref_blocks[startblocki_32]), - plusp,genestrand,query_unk_mismatch_p); -#endif - diff_32 = clear_start_32(diff_32,startdiscard); /* puts 0 (matches) at start */ - diff_32 = set_end_32(diff_32,enddiscard); /* puts 1 (mismatches) at end */ - - - p = 3*(diff_32 & 0x0000FFFF); - bestscore = score_low[p]; - trimpos = offset + score_low[p+1]; - totalscore = score_low[p+2]; - debug(printf("diff low %04X => bestscore %d at pos %d, offset %d, trimpos %d, totalscore %d\n", - diff_32 & 0x0000FFFF,score_high[p],score_high[p+1],offset,trimpos,totalscore)); - offset += 16; - - p = 3*(diff_32 >> 16); - if ((score = score_low[p] + totalscore) > bestscore) { - bestscore = score; - trimpos = offset + score_low[p+1]; - } - /* totalscore += score_low[p+2]; */ - debug(printf("diff high %04X => bestscore %d at pos %d, offset %d, trimpos %d, totalscore %d\n", - diff_32 >> 16,score_high[p],score_high[p+1],offset,trimpos,totalscore)); - /* offset += 16; */ - -#ifdef DEBUG14 - answer = (trimpos + 1); #else - return (trimpos + 1); /* trimpos+1 is on side of mismatch */ -#endif - - } -#ifndef DEBUG14 - else { + query_shifted += endcolumni; #endif -#if defined(WORDS_BIGENDIAN) || !defined(HAVE_SSE2) - startblocki = startblocki_32; - endblocki = endblocki_32; -#endif + startdiscard = (left+pos5) % 32; + enddiscard = (left+pos3) % 32; + offset = (pos3 - 1) - enddiscard + 32; + ref_ptr = &(ref_blocks[endblocki_32]); + alt_ptr = &(snp_blocks[endblocki_32]); + startptr = &(ref_blocks[startblocki_32]); - startdiscard = (left+pos5) % STEP_SIZE; - enddiscard = (left+pos3) % STEP_SIZE; - offset = -startdiscard + pos5; - + if (startblocki_32 == endblocki_32) { + /* Single block */ debug(printf("nshift = %d, startdiscard = %u, enddiscard = %u\n",nshift,startdiscard,enddiscard)); -#ifndef DEBUG14 - } -#endif - - -#if defined(WORDS_BIGENDIAN) || !defined(HAVE_SSE2) -#else - if (endblocki == startblocki) { - diff = (block_diff)(query_shifted,&(ref_blocks[startblocki]), - plusp,genestrand,query_unk_mismatch_p); - diff = clear_start(diff,startdiscard); /* puts 0 (matches) at start */ - diff = set_end(diff,enddiscard); /* puts 1 (mismatches) at end */ - - - p = 3*((unsigned short) _mm_extract_epi16(diff,0)); - bestscore = score_low[p]; - trimpos = offset + score_low[p+1]; - totalscore = score_low[p+2]; - debug(printf("diff piece %d %04X => bestscore %d at pos %d, offset %d, trimpos %d, totalscore %d\n", - 0,(unsigned short) _mm_extract_epi16(diff,0),score_high[p],score_high[p+1],offset,trimpos,totalscore)); - offset += 16; - - p = 3*((unsigned short) _mm_extract_epi16(diff,1)); - if ((score = score_low[p] + totalscore) > bestscore) { - bestscore = score; - trimpos = offset + score_low[p+1]; - } - totalscore += score_low[p+2]; - debug(printf("diff piece %d %04X => bestscore %d at pos %d, offset %d, trimpos %d, totalscore %d\n", - 1,(unsigned short) _mm_extract_epi16(diff,1),score_high[p],score_high[p+1],offset,trimpos,totalscore)); - offset += 16; - - p = 3*((unsigned short) _mm_extract_epi16(diff,2)); - if ((score = score_low[p] + totalscore) > bestscore) { - bestscore = score; - trimpos = offset + score_low[p+1]; - } - totalscore += score_low[p+2]; - debug(printf("diff piece %d %04X => bestscore %d at pos %d, offset %d, trimpos %d, totalscore %d\n", - 2,(unsigned short) _mm_extract_epi16(diff,2),score_high[p],score_high[p+1],offset,trimpos,totalscore)); - offset += 16; - - p = 3*((unsigned short) _mm_extract_epi16(diff,3)); - if ((score = score_low[p] + totalscore) > bestscore) { - bestscore = score; - trimpos = offset + score_low[p+1]; - } - totalscore += score_low[p+2]; - debug(printf("diff piece %d %04X => bestscore %d at pos %d, offset %d, trimpos %d, totalscore %d\n", - 3,(unsigned short) _mm_extract_epi16(diff,3),score_high[p],score_high[p+1],offset,trimpos,totalscore)); - offset += 16; - - p = 3*((unsigned short) _mm_extract_epi16(diff,4)); - if ((score = score_low[p] + totalscore) > bestscore) { - bestscore = score; - trimpos = offset + score_low[p+1]; - } - totalscore += score_low[p+2]; - debug(printf("diff piece %d %04X => bestscore %d at pos %d, offset %d, trimpos %d, totalscore %d\n", - 4,(unsigned short) _mm_extract_epi16(diff,4),score_high[p],score_high[p+1],offset,trimpos,totalscore)); - offset += 16; - - p = 3*((unsigned short) _mm_extract_epi16(diff,5)); - if ((score = score_low[p] + totalscore) > bestscore) { - bestscore = score; - trimpos = offset + score_low[p+1]; - } - totalscore += score_low[p+2]; - debug(printf("diff piece %d %04X => bestscore %d at pos %d, offset %d, trimpos %d, totalscore %d\n", - 5,(unsigned short) _mm_extract_epi16(diff,5),score_high[p],score_high[p+1],offset,trimpos,totalscore)); - offset += 16; - - p = 3*((unsigned short) _mm_extract_epi16(diff,6)); - if ((score = score_low[p] + totalscore) > bestscore) { - bestscore = score; - trimpos = offset + score_low[p+1]; - } - totalscore += score_low[p+2]; - debug(printf("diff piece %d %04X => bestscore %d at pos %d, offset %d, trimpos %d, totalscore %d\n", - 6,(unsigned short) _mm_extract_epi16(diff,6),score_high[p],score_high[p+1],offset,trimpos,totalscore)); - offset += 16; - - p = 3*((unsigned short) _mm_extract_epi16(diff,7)); - if ((score = score_low[p] + totalscore) > bestscore) { - bestscore = score; - trimpos = offset + score_low[p+1]; - } - totalscore += score_low[p+2]; - debug(printf("diff piece %d %04X => bestscore %d at pos %d, offset %d, trimpos %d, totalscore %d\n", - 7,(unsigned short) _mm_extract_epi16(diff,7),score_high[p],score_high[p+1],offset,trimpos,totalscore)); - offset += 16; - - debug14(if (startblocki_32 == endblocki_32) assert(answer == trimpos + 1)); - return (trimpos + 1); /* trimpos+1 is on side of mismatch */ - - } else { -#endif - - /* Startblock */ - diff = (block_diff)(query_shifted,&(ref_blocks[startblocki]), - plusp,genestrand,query_unk_mismatch_p); - diff = clear_start(diff,startdiscard); /* puts 0 (matches) at start */ - debug(printf("clearing start %08X\n",clear_start_mask(startdiscard))); - - -#if defined(WORDS_BIGENDIAN) || !defined(HAVE_SSE2) - p = 3*(diff & 0x0000FFFF); - bestscore = score_low[p]; - trimpos = offset + score_low[p+1]; - totalscore = score_low[p+2]; - debug(printf("diff low %04X => bestscore %d at pos %d, offset %d, trimpos %d, totalscore %d\n", - diff & 0x0000FFFF,score_high[p],score_high[p+1],offset,trimpos,totalscore)); - offset += 16; + diff_32 = (block_diff_snp_32)(query_shifted,alt_ptr,ref_ptr,plusp,genestrand,query_unk_mismatch_p); + diff_32 = clear_end_32(diff_32,enddiscard); + diff_32 = set_start_32(diff_32,startdiscard); /* puts 1 (mismatches) at start */ - p = 3*(diff >> 16); - if ((score = score_low[p] + totalscore) > bestscore) { - bestscore = score; - trimpos = offset + score_low[p+1]; - } - totalscore += score_low[p+2]; + p = 3*(diff_32 >> 16); + bestscore = score_high[p]; + trimpos = offset - score_high[p+1]; + totalscore = score_high[p+2]; debug(printf("diff high %04X => bestscore %d at pos %d, offset %d, trimpos %d, totalscore %d\n", - diff >> 16,score_high[p],score_high[p+1],offset,trimpos,totalscore)); - offset += 16; - -#else - p = 3*((unsigned short) _mm_extract_epi16(diff,0)); - bestscore = score_low[p]; - trimpos = offset + score_low[p+1]; - totalscore = score_low[p+2]; - debug(printf("diff piece %d %04X => bestscore %d at pos %d, offset %d, trimpos %d, totalscore %d\n", - 0,(unsigned short) _mm_extract_epi16(diff,0),score_high[p],score_high[p+1],offset,trimpos,totalscore)); - offset += 16; + diff_32 >> 16,score_high[p],score_high[p+1],offset,trimpos,totalscore)); + offset -= 16; - p = 3*((unsigned short) _mm_extract_epi16(diff,1)); - if ((score = score_low[p] + totalscore) > bestscore) { - bestscore = score; - trimpos = offset + score_low[p+1]; - } - totalscore += score_low[p+2]; - debug(printf("diff piece %d %04X => bestscore %d at pos %d, offset %d, trimpos %d, totalscore %d\n", - 1,(unsigned short) _mm_extract_epi16(diff,1),score_high[p],score_high[p+1],offset,trimpos,totalscore)); - offset += 16; - - p = 3*((unsigned short) _mm_extract_epi16(diff,2)); - if ((score = score_low[p] + totalscore) > bestscore) { - bestscore = score; - trimpos = offset + score_low[p+1]; - } - totalscore += score_low[p+2]; - debug(printf("diff piece %d %04X => bestscore %d at pos %d, offset %d, trimpos %d, totalscore %d\n", - 2,(unsigned short) _mm_extract_epi16(diff,2),score_high[p],score_high[p+1],offset,trimpos,totalscore)); - offset += 16; - - p = 3*((unsigned short) _mm_extract_epi16(diff,3)); - if ((score = score_low[p] + totalscore) > bestscore) { - bestscore = score; - trimpos = offset + score_low[p+1]; - } - totalscore += score_low[p+2]; - debug(printf("diff piece %d %04X => bestscore %d at pos %d, offset %d, trimpos %d, totalscore %d\n", - 3,(unsigned short) _mm_extract_epi16(diff,3),score_high[p],score_high[p+1],offset,trimpos,totalscore)); - offset += 16; - - p = 3*((unsigned short) _mm_extract_epi16(diff,4)); - if ((score = score_low[p] + totalscore) > bestscore) { - bestscore = score; - trimpos = offset + score_low[p+1]; - } - totalscore += score_low[p+2]; - debug(printf("diff piece %d %04X => bestscore %d at pos %d, offset %d, trimpos %d, totalscore %d\n", - 4,(unsigned short) _mm_extract_epi16(diff,4),score_high[p],score_high[p+1],offset,trimpos,totalscore)); - offset += 16; - - p = 3*((unsigned short) _mm_extract_epi16(diff,5)); - if ((score = score_low[p] + totalscore) > bestscore) { - bestscore = score; - trimpos = offset + score_low[p+1]; - } - totalscore += score_low[p+2]; - debug(printf("diff piece %d %04X => bestscore %d at pos %d, offset %d, trimpos %d, totalscore %d\n", - 5,(unsigned short) _mm_extract_epi16(diff,5),score_high[p],score_high[p+1],offset,trimpos,totalscore)); - offset += 16; - - p = 3*((unsigned short) _mm_extract_epi16(diff,6)); - if ((score = score_low[p] + totalscore) > bestscore) { - bestscore = score; - trimpos = offset + score_low[p+1]; - } - totalscore += score_low[p+2]; - debug(printf("diff piece %d %04X => bestscore %d at pos %d, offset %d, trimpos %d, totalscore %d\n", - 6,(unsigned short) _mm_extract_epi16(diff,6),score_high[p],score_high[p+1],offset,trimpos,totalscore)); - offset += 16; - - p = 3*((unsigned short) _mm_extract_epi16(diff,7)); - if ((score = score_low[p] + totalscore) > bestscore) { + p = 3*(diff_32 & 0x0000FFFF); + if ((score = score_high[p] + totalscore) > bestscore) { bestscore = score; - trimpos = offset + score_low[p+1]; + trimpos = offset - score_high[p+1]; } - totalscore += score_low[p+2]; - debug(printf("diff piece %d %04X => bestscore %d at pos %d, offset %d, trimpos %d, totalscore %d\n", - 7,(unsigned short) _mm_extract_epi16(diff,7),score_high[p],score_high[p+1],offset,trimpos,totalscore)); - offset += 16; -#endif - - query_shifted += COMPRESS_BLOCKSIZE; -#if defined(WORDS_BIGENDIAN) || !defined(HAVE_SSE2) - ptr = &(ref_blocks[startblocki]); - ptr += 1; if (++startcolumni == 4) {ptr += 8; startcolumni = 0;} -#else - ptr = &(ref_blocks[startblocki+12]); -#endif - end = &(ref_blocks[endblocki]); - while (ptr < end) { - diff = (block_diff)(query_shifted,ptr,plusp,genestrand,query_unk_mismatch_p); - + /* totalscore += score_high[p+2]; */ + debug(printf("diff low %04X => bestscore %d at pos %d, offset %d, trimpos %d, totalscore %d\n", + diff_32 & 0x0000FFFF,score_high[p],score_high[p+1],offset,trimpos,totalscore)); + /* offset -= 16; */ -#if defined(WORDS_BIGENDIAN) || !defined(HAVE_SSE2) - p = 3*(diff & 0x0000FFFF); - if ((score = score_low[p] + totalscore) > bestscore) { - bestscore = score; - trimpos = offset + score_low[p+1]; - } - totalscore += score_low[p+2]; - debug(printf("diff low %04X => bestscore %d at pos %d, offset %d, trimpos %d, totalscore %d\n", - diff & 0x0000FFFF,score_high[p],score_high[p+1],offset,trimpos,totalscore)); - offset += 16; + return (trimpos - 1); /* trimpos-1 is on side of mismatch */ - p = 3*(diff >> 16); - if ((score = score_low[p] + totalscore) > bestscore) { + } else if (startblocki == endblocki) { +#if defined(USE_SHIFT_TRIM) && defined(HAVE_SSE2) + /* Shift */ + startdiscard += 96 - (endcolumni - startcolumni)*32; + enddiscard += 96; + diff_128 = (block_diff_snp_128_shift_hi)(query_shifted,alt_ptr,ref_ptr,plusp,genestrand,query_unk_mismatch_p, + endcolumni); + diff_128 = clear_end_128(diff_128,enddiscard); + diff_128 = set_start_128(diff_128,startdiscard); /* puts 1 (mismatches) at start */ + _mm_store_si128((__m128i *) array,diff_128); + + bestscore = -100; + for (i = 7; i >= 0; --i) { + p = 3*array[i]; + if ((score = score_high[p] + totalscore) > bestscore) { bestscore = score; - trimpos = offset + score_low[p+1]; + trimpos = offset - score_high[p+1]; } - totalscore += score_low[p+2]; - debug(printf("diff high %04X => bestscore %d at pos %d, offset %d, trimpos %d, totalscore %d\n", - diff >> 16,score_high[p],score_high[p+1],offset,trimpos,totalscore)); - offset += 16; + totalscore += score_high[p+2]; + debug(printf("diff piece %d %04X => bestscore %d at pos %d, offset %d, trimpos %d, totalscore %d\n", + i,array[i],score_high[p],score_high[p+1],offset,trimpos,totalscore)); + offset -= 16; + } + + return (trimpos - 1); /* trimpos-1 is on side of mismatch */ #else - p = 3*((unsigned short) _mm_extract_epi16(diff,0)); - if ((score = score_low[p] + totalscore) > bestscore) { - bestscore = score; - trimpos = offset + score_low[p+1]; - } - totalscore += score_low[p+2]; - debug(printf("diff piece %d %04X => bestscore %d at pos %d, offset %d, trimpos %d, totalscore %d\n", - 0,(unsigned short) _mm_extract_epi16(diff,0),score_high[p],score_high[p+1],offset,trimpos,totalscore)); - offset += 16; + /* End block */ + diff_32 = (block_diff_snp_32)(query_shifted,alt_ptr,ref_ptr,plusp,genestrand,query_unk_mismatch_p); + diff_32 = clear_end_32(diff_32,enddiscard); - p = 3*((unsigned short) _mm_extract_epi16(diff,1)); - if ((score = score_low[p] + totalscore) > bestscore) { - bestscore = score; - trimpos = offset + score_low[p+1]; - } - totalscore += score_low[p+2]; - debug(printf("diff piece %d %04X => bestscore %d at pos %d, offset %d, trimpos %d, totalscore %d\n", - 1,(unsigned short) _mm_extract_epi16(diff,1),score_high[p],score_high[p+1],offset,trimpos,totalscore)); - offset += 16; + p = 3*(diff_32 >> 16); + bestscore = score_high[p]; + trimpos = offset - score_high[p+1]; + totalscore = score_high[p+2]; + debug(printf("diff high %04X => bestscore %d at pos %d, offset %d, trimpos %d, totalscore %d\n", + diff_32 >> 16,score_high[p],score_high[p+1],offset,trimpos,totalscore)); + offset -= 16; - p = 3*((unsigned short) _mm_extract_epi16(diff,2)); - if ((score = score_low[p] + totalscore) > bestscore) { - bestscore = score; - trimpos = offset + score_low[p+1]; - } - totalscore += score_low[p+2]; - debug(printf("diff piece %d %04X => bestscore %d at pos %d, offset %d, trimpos %d, totalscore %d\n", - 2,(unsigned short) _mm_extract_epi16(diff,2),score_high[p],score_high[p+1],offset,trimpos,totalscore)); - offset += 16; + p = 3*(diff_32 & 0x0000FFFF); + if ((score = score_high[p] + totalscore) > bestscore) { + bestscore = score; + trimpos = offset - score_high[p+1]; + } + totalscore += score_high[p+2]; + debug(printf("diff low %04X => bestscore %d at pos %d, offset %d, trimpos %d, totalscore %d\n", + diff_32 & 0x0000FFFF,score_high[p],score_high[p+1],offset,trimpos,totalscore)); + offset -= 16; + query_shifted -= QUERY_NEXTCOL; ref_ptr -= GENOME_NEXTCOL; alt_ptr -= GENOME_NEXTCOL; - p = 3*((unsigned short) _mm_extract_epi16(diff,3)); - if ((score = score_low[p] + totalscore) > bestscore) { - bestscore = score; - trimpos = offset + score_low[p+1]; - } - totalscore += score_low[p+2]; - debug(printf("diff piece %d %04X => bestscore %d at pos %d, offset %d, trimpos %d, totalscore %d\n", - 3,(unsigned short) _mm_extract_epi16(diff,3),score_high[p],score_high[p+1],offset,trimpos,totalscore)); - offset += 16; + /* Single row */ + while (--endcolumni > startcolumni) { + diff_32 = (block_diff_snp_32)(query_shifted,alt_ptr,ref_ptr,plusp,genestrand,query_unk_mismatch_p); - p = 3*((unsigned short) _mm_extract_epi16(diff,4)); - if ((score = score_low[p] + totalscore) > bestscore) { + p = 3*(diff_32 >> 16); + if ((score = score_high[p] + totalscore) > bestscore) { bestscore = score; - trimpos = offset + score_low[p+1]; + trimpos = offset - score_high[p+1]; } - totalscore += score_low[p+2]; - debug(printf("diff piece %d %04X => bestscore %d at pos %d, offset %d, trimpos %d, totalscore %d\n", - 4,(unsigned short) _mm_extract_epi16(diff,4),score_high[p],score_high[p+1],offset,trimpos,totalscore)); - offset += 16; - - p = 3*((unsigned short) _mm_extract_epi16(diff,5)); - if ((score = score_low[p] + totalscore) > bestscore) { + totalscore += score_high[p+2]; + debug(printf("diff high %04X => bestscore %d at pos %d, offset %d, trimpos %d, totalscore %d\n", + diff_32 >> 16,score_high[p],score_high[p+1],offset,trimpos,totalscore)); + offset -= 16; + + p = 3*(diff_32 & 0x0000FFFF); + if ((score = score_high[p] + totalscore) > bestscore) { bestscore = score; - trimpos = offset + score_low[p+1]; + trimpos = offset - score_high[p+1]; } - totalscore += score_low[p+2]; - debug(printf("diff piece %d %04X => bestscore %d at pos %d, offset %d, trimpos %d, totalscore %d\n", - 5,(unsigned short) _mm_extract_epi16(diff,5),score_high[p],score_high[p+1],offset,trimpos,totalscore)); - offset += 16; + totalscore += score_high[p+2]; + debug(printf("diff low %04X => bestscore %d at pos %d, offset %d, trimpos %d, totalscore %d\n", + diff_32 & 0x0000FFFF,score_high[p],score_high[p+1],offset,trimpos,totalscore)); + offset -= 16; + query_shifted -= QUERY_NEXTCOL; ref_ptr -= GENOME_NEXTCOL; alt_ptr -= GENOME_NEXTCOL; + } - p = 3*((unsigned short) _mm_extract_epi16(diff,6)); - if ((score = score_low[p] + totalscore) > bestscore) { - bestscore = score; - trimpos = offset + score_low[p+1]; - } - totalscore += score_low[p+2]; - debug(printf("diff piece %d %04X => bestscore %d at pos %d, offset %d, trimpos %d, totalscore %d\n", - 6,(unsigned short) _mm_extract_epi16(diff,6),score_high[p],score_high[p+1],offset,trimpos,totalscore)); - offset += 16; + /* Start block */ + diff_32 = (block_diff_snp_32)(query_shifted,alt_ptr,ref_ptr,plusp,genestrand,query_unk_mismatch_p); + diff_32 = set_start_32(diff_32,startdiscard); /* puts 1 (mismatches) at start */ - p = 3*((unsigned short) _mm_extract_epi16(diff,7)); - if ((score = score_low[p] + totalscore) > bestscore) { + p = 3*(diff_32 >> 16); + if ((score = score_high[p] + totalscore) > bestscore) { + bestscore = score; + trimpos = offset - score_high[p+1]; + } + totalscore += score_high[p+2]; + debug(printf("diff high %04X => bestscore %d at pos %d, offset %d, trimpos %d, totalscore %d\n", + diff_32 >> 16,score_high[p],score_high[p+1],offset,trimpos,totalscore)); + offset -= 16; + + p = 3*(diff_32 & 0x0000FFFF); + if ((score = score_high[p] + totalscore) > bestscore) { + bestscore = score; + trimpos = offset - score_high[p+1]; + } + /* totalscore += score_high[p+2]; */ + debug(printf("diff low %04X => bestscore %d at pos %d, offset %d, trimpos %d, totalscore %d\n", + diff_32 & 0x0000FFFF,score_high[p],score_high[p+1],offset,trimpos,totalscore)); + /* offset -= 16; */ + + return (trimpos - 1); /* trimpos-1 is on side of mismatch */ +#endif + +#if defined(USE_WRAP_TRIM) && defined(HAVE_SSSE3) + } else if (endblocki == startblocki + 12 && endcolumni < startcolumni) { + /* Wrap */ + startdiscard += (startcolumni - endcolumni - 1)*32; + enddiscard += 96; + diff_128 = (block_diff_snp_128_wrap_hi)(query_shifted,alt_ptr,ref_ptr,plusp,genestrand,query_unk_mismatch_p, + endcolumni); + diff_128 = clear_end_128(diff_128,enddiscard); + diff_128 = set_start_128(diff_128,startdiscard); /* puts 1 (mismatches) at start */ + _mm_store_si128((__m128i *) array,diff_128); + + bestscore = -100; + for (i = 7; i >= 0; --i) { + p = 3*array[i]; + if ((score = score_high[p] + totalscore) > bestscore) { bestscore = score; - trimpos = offset + score_low[p+1]; + trimpos = offset - score_high[p+1]; } - totalscore += score_low[p+2]; + totalscore += score_high[p+2]; debug(printf("diff piece %d %04X => bestscore %d at pos %d, offset %d, trimpos %d, totalscore %d\n", - 7,(unsigned short) _mm_extract_epi16(diff,7),score_high[p],score_high[p+1],offset,trimpos,totalscore)); - offset += 16; -#endif - - query_shifted += COMPRESS_BLOCKSIZE; -#if defined(WORDS_BIGENDIAN) || !defined(HAVE_SSE2) - ptr += 1; if (++startcolumni == 4) {ptr += 8; startcolumni = 0;} -#else - ptr += 12; -#endif + i,array[i],score_high[p],score_high[p+1],offset,trimpos,totalscore)); + offset -= 16; } + + return (trimpos - 1); /* trimpos-1 is on side of mismatch */ +#endif - /* Endblock */ - diff = (block_diff)(query_shifted,ptr,plusp,genestrand,query_unk_mismatch_p); - diff = set_end(diff,enddiscard); /* puts 1 (mismatches) at end */ + } else { + /* End block */ + diff_32 = (block_diff_snp_32)(query_shifted,alt_ptr,ref_ptr,plusp,genestrand,query_unk_mismatch_p); + diff_32 = clear_end_32(diff_32,enddiscard); + p = 3*(diff_32 >> 16); + bestscore = score_high[p]; + trimpos = offset - score_high[p+1]; + totalscore = score_high[p+2]; + debug(printf("diff high %04X => bestscore %d at pos %d, offset %d, trimpos %d, totalscore %d\n", + diff_32 >> 16,score_high[p],score_high[p+1],offset,trimpos,totalscore)); + offset -= 16; -#if defined(WORDS_BIGENDIAN) || !defined(HAVE_SSE2) - p = 3*(diff & 0x0000FFFF); - if ((score = score_low[p] + totalscore) > bestscore) { + p = 3*(diff_32 & 0x0000FFFF); + if ((score = score_high[p] + totalscore) > bestscore) { bestscore = score; - trimpos = offset + score_low[p+1]; + trimpos = offset - score_high[p+1]; } - totalscore += score_low[p+2]; + /* totalscore += score_high[p+2]; */ debug(printf("diff low %04X => bestscore %d at pos %d, offset %d, trimpos %d, totalscore %d\n", - diff & 0x0000FFFF,score_high[p],score_high[p+1],offset,trimpos,totalscore)); - offset += 16; + diff_32 & 0x0000FFFF,score_high[p],score_high[p+1],offset,trimpos,totalscore)); + offset -= 16; + query_shifted -= QUERY_NEXTCOL; ref_ptr -= GENOME_NEXTCOL; alt_ptr -= GENOME_NEXTCOL; - p = 3*(diff >> 16); - if ((score = score_low[p] + totalscore) > bestscore) { - bestscore = score; - trimpos = offset + score_low[p+1]; - } - /* totalscore += score_low[p+2]; */ - debug(printf("diff high %04X => bestscore %d at pos %d, offset %d, trimpos %d, totalscore %d\n", - diff >> 16,score_high[p],score_high[p+1],offset,trimpos,totalscore)); - /* offset += 16; */ + /* End row */ + while (--endcolumni >= 0) { + diff_32 = (block_diff_snp_32)(query_shifted,alt_ptr,ref_ptr,plusp,genestrand,query_unk_mismatch_p); -#else - p = 3*((unsigned short) _mm_extract_epi16(diff,0)); - if ((score = score_low[p] + totalscore) > bestscore) { - bestscore = score; - trimpos = offset + score_low[p+1]; - } - totalscore += score_low[p+2]; - debug(printf("diff piece %d %04X => bestscore %d at pos %d, offset %d, trimpos %d, totalscore %d\n", - 0,(unsigned short) _mm_extract_epi16(diff,0),score_high[p],score_high[p+1],offset,trimpos,totalscore)); - offset += 16; + p = 3*(diff_32 >> 16); + if ((score = score_high[p] + totalscore) > bestscore) { + bestscore = score; + trimpos = offset - score_high[p+1]; + } + totalscore = score_high[p+2]; + debug(printf("diff high %04X => bestscore %d at pos %d, offset %d, trimpos %d, totalscore %d\n", + diff_32 >> 16,score_high[p],score_high[p+1],offset,trimpos,totalscore)); + offset -= 16; - p = 3*((unsigned short) _mm_extract_epi16(diff,1)); - if ((score = score_low[p] + totalscore) > bestscore) { - bestscore = score; - trimpos = offset + score_low[p+1]; + p = 3*(diff_32 & 0x0000FFFF); + if ((score = score_high[p] + totalscore) > bestscore) { + bestscore = score; + trimpos = offset - score_high[p+1]; + } + totalscore += score_high[p+2]; + debug(printf("diff low %04X => bestscore %d at pos %d, offset %d, trimpos %d, totalscore %d\n", + diff_32 & 0x0000FFFF,score_high[p],score_high[p+1],offset,trimpos,totalscore)); + offset -= 16; + query_shifted -= QUERY_NEXTCOL; ref_ptr -= GENOME_NEXTCOL; alt_ptr -= GENOME_NEXTCOL; } - totalscore += score_low[p+2]; - debug(printf("diff piece %d %04X => bestscore %d at pos %d, offset %d, trimpos %d, totalscore %d\n", - 1,(unsigned short) _mm_extract_epi16(diff,1),score_high[p],score_high[p+1],offset,trimpos,totalscore)); - offset += 16; +#ifdef HAVE_SSE2 + query_shifted -= QUERY_NEXTROW; +#endif + ref_ptr -= GENOME_NEXTROW; alt_ptr -= GENOME_NEXTROW; - p = 3*((unsigned short) _mm_extract_epi16(diff,2)); - if ((score = score_low[p] + totalscore) > bestscore) { - bestscore = score; - trimpos = offset + score_low[p+1]; - } - totalscore += score_low[p+2]; - debug(printf("diff piece %d %04X => bestscore %d at pos %d, offset %d, trimpos %d, totalscore %d\n", - 2,(unsigned short) _mm_extract_epi16(diff,2),score_high[p],score_high[p+1],offset,trimpos,totalscore)); - offset += 16; - p = 3*((unsigned short) _mm_extract_epi16(diff,3)); - if ((score = score_low[p] + totalscore) > bestscore) { - bestscore = score; - trimpos = offset + score_low[p+1]; + /* Middle rows */ +#ifdef HAVE_AVX2 + while (ref_ptr >= startptr + 24) { + diff_256 = (block_diff_snp_256)(&(query_shifted[-15]),&(ref_ptr[-15]),alt_ptr,plusp,genestrand,query_unk_mismatch_p); + _mm256_store_si256((__m256i *) array,diff_256); + + for (i = 15; i >= 0; --i) { + p = 3*array[i]; + if ((score = score_high[p] + totalscore) > bestscore) { + bestscore = score; + trimpos = offset - score_high[p+1]; + } + totalscore += score_high[p+2]; + debug(printf("diff piece %d %04X => bestscore %d at pos %d, offset %d, trimpos %d, totalscore %d\n", + i,array[i],score_high[p],score_high[p+1],offset,trimpos,totalscore)); + offset -= 16; + } + query_shifted -= 24; ref_ptr -= 24; alt_ptr -= 24; } - totalscore += score_low[p+2]; - debug(printf("diff piece %d %04X => bestscore %d at pos %d, offset %d, trimpos %d, totalscore %d\n", - 3,(unsigned short) _mm_extract_epi16(diff,3),score_high[p],score_high[p+1],offset,trimpos,totalscore)); - offset += 16; +#endif - p = 3*((unsigned short) _mm_extract_epi16(diff,4)); - if ((score = score_low[p] + totalscore) > bestscore) { - bestscore = score; - trimpos = offset + score_low[p+1]; +#ifdef HAVE_SSE2 + while (ref_ptr >= startptr + 12) { + diff_128 = (block_diff_snp_128)(&(query_shifted[-3]),&(ref_ptr[-3]),alt_ptr,plusp,genestrand,query_unk_mismatch_p); + _mm_store_si128((__m128i *) array,diff_128); + + for (i = 7; i >= 0; --i) { + p = 3*array[i]; + if ((score = score_high[p] + totalscore) > bestscore) { + bestscore = score; + trimpos = offset - score_high[p+1]; + } + totalscore += score_high[p+2]; + debug(printf("diff piece %d %04X => bestscore %d at pos %d, offset %d, trimpos %d, totalscore %d\n", + i,array[i],score_high[p],score_high[p+1],offset,trimpos,totalscore)); + offset -= 16; + } + query_shifted -= 12; ref_ptr -= 12; alt_ptr -= 12; + } +#else + while (ref_ptr >= startptr + 12) { + for (endcolumni = 3; endcolumni >= 0; --endcolumni) { + diff_32 = (block_diff_snp_32)(query_shifted,alt_ptr,ref_ptr,plusp,genestrand,query_unk_mismatch_p); + + p = 3*(diff_32 >> 16); + if ((score = score_high[p] + totalscore) > bestscore) { + bestscore = score; + trimpos = offset - score_high[p+1]; + } + totalscore = score_high[p+2]; + debug(printf("diff high %04X => bestscore %d at pos %d, offset %d, trimpos %d, totalscore %d\n", + diff_32 >> 16,score_high[p],score_high[p+1],offset,trimpos,totalscore)); + offset -= 16; + + p = 3*(diff_32 & 0x0000FFFF); + if ((score = score_high[p] + totalscore) > bestscore) { + bestscore = score; + trimpos = offset - score_high[p+1]; + } + totalscore += score_high[p+2]; + debug(printf("diff low %04X => bestscore %d at pos %d, offset %d, trimpos %d, totalscore %d\n", + diff_32 & 0x0000FFFF,score_high[p],score_high[p+1],offset,trimpos,totalscore)); + offset -= 16; + query_shifted -= QUERY_NEXTCOL; ref_ptr -= GENOME_NEXTCOL; alt_ptr -= GENOME_NEXTCOL; + } + /* query_shifted -= QUERY_NEXTROW; */ ref_ptr -= GENOME_NEXTROW; alt_ptr -= GENOME_NEXTROW; } - totalscore += score_low[p+2]; - debug(printf("diff piece %d %04X => bestscore %d at pos %d, offset %d, trimpos %d, totalscore %d\n", - 4,(unsigned short) _mm_extract_epi16(diff,4),score_high[p],score_high[p+1],offset,trimpos,totalscore)); - offset += 16; +#endif - p = 3*((unsigned short) _mm_extract_epi16(diff,5)); - if ((score = score_low[p] + totalscore) > bestscore) { - bestscore = score; - trimpos = offset + score_low[p+1]; + /* Start row */ + while (ref_ptr > startptr) { + diff_32 = (block_diff_snp_32)(query_shifted,alt_ptr,ref_ptr,plusp,genestrand,query_unk_mismatch_p); + + p = 3*(diff_32 >> 16); + if ((score = score_high[p] + totalscore) > bestscore) { + bestscore = score; + trimpos = offset - score_high[p+1]; + } + totalscore = score_high[p+2]; + debug(printf("diff high %04X => bestscore %d at pos %d, offset %d, trimpos %d, totalscore %d\n", + diff_32 >> 16,score_high[p],score_high[p+1],offset,trimpos,totalscore)); + offset -= 16; + + p = 3*(diff_32 & 0x0000FFFF); + if ((score = score_high[p] + totalscore) > bestscore) { + bestscore = score; + trimpos = offset - score_high[p+1]; + } + totalscore += score_high[p+2]; + debug(printf("diff low %04X => bestscore %d at pos %d, offset %d, trimpos %d, totalscore %d\n", + diff_32 & 0x0000FFFF,score_high[p],score_high[p+1],offset,trimpos,totalscore)); + offset -= 16; + query_shifted -= QUERY_NEXTCOL; ref_ptr -= GENOME_NEXTCOL; alt_ptr -= GENOME_NEXTCOL; } - totalscore += score_low[p+2]; - debug(printf("diff piece %d %04X => bestscore %d at pos %d, offset %d, trimpos %d, totalscore %d\n", - 5,(unsigned short) _mm_extract_epi16(diff,5),score_high[p],score_high[p+1],offset,trimpos,totalscore)); - offset += 16; - p = 3*((unsigned short) _mm_extract_epi16(diff,6)); - if ((score = score_low[p] + totalscore) > bestscore) { + /* Start block */ + diff_32 = (block_diff_snp_32)(query_shifted,alt_ptr,ref_ptr,plusp,genestrand,query_unk_mismatch_p); + diff_32 = set_start_32(diff_32,startdiscard); /* puts 1 (mismatches) at start */ + + p = 3*(diff_32 >> 16); + if ((score = score_high[p] + totalscore) > bestscore) { bestscore = score; - trimpos = offset + score_low[p+1]; + trimpos = offset - score_high[p+1]; } - totalscore += score_low[p+2]; - debug(printf("diff piece %d %04X => bestscore %d at pos %d, offset %d, trimpos %d, totalscore %d\n", - 6,(unsigned short) _mm_extract_epi16(diff,6),score_high[p],score_high[p+1],offset,trimpos,totalscore)); - offset += 16; - - p = 3*((unsigned short) _mm_extract_epi16(diff,7)); - if ((score = score_low[p] + totalscore) > bestscore) { + totalscore += score_high[p+2]; + debug(printf("diff high %04X => bestscore %d at pos %d, offset %d, trimpos %d, totalscore %d\n", + diff_32 >> 16,score_high[p],score_high[p+1],offset,trimpos,totalscore)); + offset -= 16; + + p = 3*(diff_32 & 0x0000FFFF); + if ((score = score_high[p] + totalscore) > bestscore) { bestscore = score; - trimpos = offset + score_low[p+1]; + trimpos = offset - score_high[p+1]; } - totalscore += score_low[p+2]; - debug(printf("diff piece %d %04X => bestscore %d at pos %d, offset %d, trimpos %d, totalscore %d\n", - 7,(unsigned short) _mm_extract_epi16(diff,7),score_high[p],score_high[p+1],offset,trimpos,totalscore)); - offset += 16; -#endif + /* totalscore += score_high[p+2]; */ + debug(printf("diff low %04X => bestscore %d at pos %d, offset %d, trimpos %d, totalscore %d\n", + diff_32 & 0x0000FFFF,score_high[p],score_high[p+1],offset,trimpos,totalscore)); + /* offset -= 16; */ - debug14(if (startblocki_32 == endblocki_32) assert(answer == trimpos + 1)); - return (trimpos + 1); /* trimpos+1 is on side of mismatch */ - -#if defined(WORDS_BIGENDIAN) || !defined(HAVE_SSE2) -#else + return (trimpos - 1); /* trimpos-1 is on side of mismatch */ } -#endif } - static int -trim_right_substring_snps (Compress_T query_compress, Univcoord_T left, int pos5, int pos3, - bool plusp, int genestrand) { -#ifdef DEBUG14 - int answer; -#endif +trim_right_substring (Compress_T query_compress, Univcoord_T left, int pos5, int pos3, + bool plusp, int genestrand) { int startdiscard, enddiscard, offset; Univcoord_T startblocki, endblocki, startblocki_32, endblocki_32; - Genomecomp_T *ref_ptr, *alt_ptr, *end; + Genomecomp_T *ptr, *endptr; Genomecomp_T *query_shifted; - UINT4 diff_32; - Genomediff_T diff; int nshift; int startcolumni, endcolumni; + UINT4 diff_32; +#ifdef HAVE_SSE2 + __m128i diff_128; + int i; +#endif +#ifdef HAVE_AVX2 + __m256i diff_256; +#endif +#ifdef HAVE_AVX2 + unsigned short array[16]; +#elif defined(HAVE_SSE2) + unsigned short array[8]; +#endif int totalscore, bestscore, score; int trimpos; @@ -23613,8 +26166,8 @@ debug( printf("\n\n"); - printf("Genome (in trim_right_substring_snps) from %u+%d to %u+%d:\n",left,pos5,left,pos3); - Genome_print_blocks_snp(ref_blocks,snp_blocks,left+pos5,left+pos3); + printf("Genome (in trim_right_substring) from %u+%d to %u+%d:\n",left,pos5,left,pos3); + Genome_print_blocks(ref_blocks,left+pos5,left+pos3); printf("\n"); ); @@ -23635,24 +26188,25 @@ debug(printf("Query shifted %d:\n",nshift)); debug(Compress_print_blocks(query_shifted,nshift,pos5,pos3)); query_shifted += (nshift+pos5)/STEP_SIZE*COMPRESS_BLOCKSIZE; - - if (endblocki_32 == startblocki_32) { - startdiscard = (left+pos5) % 32; - enddiscard = (left+pos3) % 32; - offset = -startdiscard + pos5; - debug(printf("nshift = %d, startdiscard = %u, enddiscard = %u\n",nshift,startdiscard,enddiscard)); - #if defined(WORDS_BIGENDIAN) || !defined(HAVE_SSE2) - diff_32 = (block_diff_snp_32)(query_shifted,&(snp_blocks[startblocki_32]),&(ref_blocks[startblocki_32]), - plusp,genestrand,query_unk_mismatch_p); #else - diff_32 = (block_diff_snp_32)(query_shifted + startcolumni,&(snp_blocks[startblocki_32]),&(ref_blocks[startblocki_32]), - plusp,genestrand,query_unk_mismatch_p); + query_shifted += startcolumni; #endif + + startdiscard = (left+pos5) % 32; + enddiscard = (left+pos3) % 32; + offset = -startdiscard + pos5; + ptr = &(ref_blocks[startblocki_32]); + endptr = &(ref_blocks[endblocki_32]); + + if (endblocki_32 == startblocki_32) { + /* Single block */ + debug5(printf("nshift = %d, startdiscard = %u, enddiscard = %u\n",nshift,startdiscard,enddiscard)); + + diff_32 = (block_diff_32)(query_shifted,ptr,plusp,genestrand,query_unk_mismatch_p); diff_32 = clear_start_32(diff_32,startdiscard); /* puts 0 (matches) at start */ diff_32 = set_end_32(diff_32,enddiscard); /* puts 1 (mismatches) at end */ - p = 3*(diff_32 & 0x0000FFFF); bestscore = score_low[p]; trimpos = offset + score_low[p+1]; @@ -23671,472 +26225,709 @@ diff_32 >> 16,score_high[p],score_high[p+1],offset,trimpos,totalscore)); /* offset += 16; */ -#ifdef DEBUG14 - answer = (trimpos + 1); -#else return (trimpos + 1); /* trimpos+1 is on side of mismatch */ -#endif - - } -#ifndef DEBUG14 - else { -#endif - -#if defined(WORDS_BIGENDIAN) || !defined(HAVE_SSE2) - startblocki = startblocki_32; - endblocki = endblocki_32; -#endif - - startdiscard = (left+pos5) % STEP_SIZE; - enddiscard = (left+pos3) % STEP_SIZE; - offset = -startdiscard + pos5; - - debug(printf("nshift = %d, startdiscard = %u, enddiscard = %u\n",nshift,startdiscard,enddiscard)); -#ifndef DEBUG14 - } -#endif + } else if (endblocki == startblocki) { +#if defined(USE_SHIFT_TRIM) && defined(HAVE_SSE2) + /* Shift */ + enddiscard += (endcolumni - startcolumni)*32; + diff_128 = (block_diff_128_shift_lo)(query_shifted,ptr,plusp,genestrand,query_unk_mismatch_p, + startcolumni); + diff_128 = clear_start_128(diff_128,startdiscard); /* puts 0 (matches) at start */ + diff_128 = set_end_128(diff_128,enddiscard); /* puts 1 (mismatches) at end */ + _mm_store_si128((__m128i *) array,diff_128); + + bestscore = -100; + for (i = 0; i < 8; i++) { + p = 3*array[i]; + if ((score = score_low[p] + totalscore) > bestscore) { + bestscore = score; + trimpos = offset + score_low[p+1]; + } + totalscore += score_low[p+2]; + debug(printf("diff piece %d %04X => bestscore %d at pos %d, offset %d, trimpos %d, totalscore %d\n", + i,array[i],score_high[p],score_high[p+1],offset,trimpos,totalscore)); + offset += 16; + } -#if defined(WORDS_BIGENDIAN) || !defined(HAVE_SSE2) + return (trimpos + 1); /* trimpos+1 is on side of mismatch */ #else - if (endblocki == startblocki) { - diff = (block_diff_snp)(query_shifted,&(snp_blocks[startblocki]),&(ref_blocks[startblocki]), - plusp,genestrand,query_unk_mismatch_p); - diff = clear_start(diff,startdiscard); /* puts 0 (matches) at start */ - diff = set_end(diff,enddiscard); /* puts 1 (mismatches) at end */ - + /* Start block */ + diff_32 = (block_diff_32)(query_shifted,ptr,plusp,genestrand,query_unk_mismatch_p); + diff_32 = clear_start_32(diff_32,startdiscard); - p = 3*((unsigned short) _mm_extract_epi16(diff,0)); + p = 3*(diff_32 & 0x0000FFFF); bestscore = score_low[p]; trimpos = offset + score_low[p+1]; totalscore = score_low[p+2]; - debug(printf("diff piece %d %04X => bestscore %d at pos %d, offset %d, trimpos %d, totalscore %d\n", - 0,(unsigned short) _mm_extract_epi16(diff,0),score_high[p],score_high[p+1],offset,trimpos,totalscore)); + debug(printf("diff low %04X => bestscore %d at pos %d, offset %d, trimpos %d, totalscore %d\n", + diff_32 & 0x0000FFFF,score_high[p],score_high[p+1],offset,trimpos,totalscore)); offset += 16; - p = 3*((unsigned short) _mm_extract_epi16(diff,1)); + p = 3*(diff_32 >> 16); if ((score = score_low[p] + totalscore) > bestscore) { bestscore = score; trimpos = offset + score_low[p+1]; } totalscore += score_low[p+2]; - debug(printf("diff piece %d %04X => bestscore %d at pos %d, offset %d, trimpos %d, totalscore %d\n", - 1,(unsigned short) _mm_extract_epi16(diff,1),score_high[p],score_high[p+1],offset,trimpos,totalscore)); + debug(printf("diff high %04X => bestscore %d at pos %d, offset %d, trimpos %d, totalscore %d\n", + diff_32 >> 16,score_high[p],score_high[p+1],offset,trimpos,totalscore)); offset += 16; + query_shifted += QUERY_NEXTCOL; ptr += GENOME_NEXTCOL; - p = 3*((unsigned short) _mm_extract_epi16(diff,2)); - if ((score = score_low[p] + totalscore) > bestscore) { - bestscore = score; - trimpos = offset + score_low[p+1]; + /* Single row */ + while (++startcolumni < endcolumni) { + diff_32 = (block_diff_32)(query_shifted,ptr,plusp,genestrand,query_unk_mismatch_p); + + p = 3*(diff_32 & 0x0000FFFF); + if ((score = score_low[p] + totalscore) > bestscore) { + bestscore = score; + trimpos = offset + score_low[p+1]; + } + totalscore = score_low[p+2]; + debug(printf("diff low %04X => bestscore %d at pos %d, offset %d, trimpos %d, totalscore %d\n", + diff_32 & 0x0000FFFF,score_high[p],score_high[p+1],offset,trimpos,totalscore)); + offset += 16; + + p = 3*(diff_32 >> 16); + if ((score = score_low[p] + totalscore) > bestscore) { + bestscore = score; + trimpos = offset + score_low[p+1]; + } + totalscore += score_low[p+2]; + debug(printf("diff high %04X => bestscore %d at pos %d, offset %d, trimpos %d, totalscore %d\n", + diff_32 >> 16,score_high[p],score_high[p+1],offset,trimpos,totalscore)); + offset += 16; + query_shifted += QUERY_NEXTCOL; ptr += GENOME_NEXTCOL; } - totalscore += score_low[p+2]; - debug(printf("diff piece %d %04X => bestscore %d at pos %d, offset %d, trimpos %d, totalscore %d\n", - 2,(unsigned short) _mm_extract_epi16(diff,2),score_high[p],score_high[p+1],offset,trimpos,totalscore)); - offset += 16; - p = 3*((unsigned short) _mm_extract_epi16(diff,3)); + /* End block */ + diff_32 = (block_diff_32)(query_shifted,ptr,plusp,genestrand,query_unk_mismatch_p); + diff_32 = set_end_32(diff_32,enddiscard); /* puts 1 (mismatches) at end */ + + p = 3*(diff_32 & 0x0000FFFF); if ((score = score_low[p] + totalscore) > bestscore) { bestscore = score; trimpos = offset + score_low[p+1]; } - totalscore += score_low[p+2]; - debug(printf("diff piece %d %04X => bestscore %d at pos %d, offset %d, trimpos %d, totalscore %d\n", - 3,(unsigned short) _mm_extract_epi16(diff,3),score_high[p],score_high[p+1],offset,trimpos,totalscore)); + totalscore = score_low[p+2]; + debug(printf("diff low %04X => bestscore %d at pos %d, offset %d, trimpos %d, totalscore %d\n", + diff_32 & 0x0000FFFF,score_high[p],score_high[p+1],offset,trimpos,totalscore)); offset += 16; - p = 3*((unsigned short) _mm_extract_epi16(diff,4)); + p = 3*(diff_32 >> 16); if ((score = score_low[p] + totalscore) > bestscore) { bestscore = score; trimpos = offset + score_low[p+1]; } - totalscore += score_low[p+2]; - debug(printf("diff piece %d %04X => bestscore %d at pos %d, offset %d, trimpos %d, totalscore %d\n", - 4,(unsigned short) _mm_extract_epi16(diff,4),score_high[p],score_high[p+1],offset,trimpos,totalscore)); + /* totalscore += score_low[p+2]; */ + debug(printf("diff high %04X => bestscore %d at pos %d, offset %d, trimpos %d, totalscore %d\n", + diff_32 >> 16,score_high[p],score_high[p+1],offset,trimpos,totalscore)); + /* offset += 16; */ + + return (trimpos + 1); /* trimpos+1 is on side of mismatch */ +#endif + +#if defined(USE_WRAP_TRIM) && defined(HAVE_SSSE3) + } else if (endblocki == startblocki + 12 && endcolumni < startcolumni) { + /* Wrap */ + enddiscard += (4 + endcolumni - startcolumni)*32; + diff_128 = (block_diff_128_wrap_lo)(query_shifted,ptr,plusp,genestrand,query_unk_mismatch_p, + startcolumni); + diff_128 = clear_start_128(diff_128,startdiscard); /* puts 0 (matches) at start */ + diff_128 = set_end_128(diff_128,enddiscard); /* puts 1 (mismatches) at end */ + _mm_store_si128((__m128i *) array,diff_128); + + bestscore = -100; + for (i = 0; i < 8; i++) { + p = 3*array[i]; + if ((score = score_low[p] + totalscore) > bestscore) { + bestscore = score; + trimpos = offset + score_low[p+1]; + } + totalscore += score_low[p+2]; + debug(printf("diff piece %d %04X => bestscore %d at pos %d, offset %d, trimpos %d, totalscore %d\n", + i,array[i],score_high[p],score_high[p+1],offset,trimpos,totalscore)); + offset += 16; + } + + return (trimpos + 1); /* trimpos+1 is on side of mismatch */ +#endif + + } else { + /* Start block */ + diff_32 = (block_diff_32)(query_shifted,ptr,plusp,genestrand,query_unk_mismatch_p); + diff_32 = clear_start_32(diff_32,startdiscard); + + p = 3*(diff_32 & 0x0000FFFF); + bestscore = score_low[p]; + trimpos = offset + score_low[p+1]; + totalscore = score_low[p+2]; + debug(printf("diff low %04X => bestscore %d at pos %d, offset %d, trimpos %d, totalscore %d\n", + diff_32 & 0x0000FFFF,score_high[p],score_high[p+1],offset,trimpos,totalscore)); offset += 16; - p = 3*((unsigned short) _mm_extract_epi16(diff,5)); + p = 3*(diff_32 >> 16); if ((score = score_low[p] + totalscore) > bestscore) { bestscore = score; trimpos = offset + score_low[p+1]; } totalscore += score_low[p+2]; - debug(printf("diff piece %d %04X => bestscore %d at pos %d, offset %d, trimpos %d, totalscore %d\n", - 5,(unsigned short) _mm_extract_epi16(diff,5),score_high[p],score_high[p+1],offset,trimpos,totalscore)); + debug(printf("diff high %04X => bestscore %d at pos %d, offset %d, trimpos %d, totalscore %d\n", + diff_32 >> 16,score_high[p],score_high[p+1],offset,trimpos,totalscore)); offset += 16; + query_shifted += QUERY_NEXTCOL; ptr += GENOME_NEXTCOL; + + /* Start row */ + while (++startcolumni < 4) { + diff_32 = (block_diff_32)(query_shifted,ptr,plusp,genestrand,query_unk_mismatch_p); + + p = 3*(diff_32 & 0x0000FFFF); + if ((score = score_low[p] + totalscore) > bestscore) { + bestscore = score; + trimpos = offset + score_low[p+1]; + } + totalscore = score_low[p+2]; + debug(printf("diff low %04X => bestscore %d at pos %d, offset %d, trimpos %d, totalscore %d\n", + diff_32 & 0x0000FFFF,score_high[p],score_high[p+1],offset,trimpos,totalscore)); + offset += 16; + + p = 3*(diff_32 >> 16); + if ((score = score_low[p] + totalscore) > bestscore) { + bestscore = score; + trimpos = offset + score_low[p+1]; + } + totalscore += score_low[p+2]; + debug(printf("diff high %04X => bestscore %d at pos %d, offset %d, trimpos %d, totalscore %d\n", + diff_32 >> 16,score_high[p],score_high[p+1],offset,trimpos,totalscore)); + offset += 16; + query_shifted += QUERY_NEXTCOL; ptr += GENOME_NEXTCOL; + } +#ifdef HAVE_SSE2 + query_shifted += QUERY_NEXTROW; +#endif + ptr += GENOME_NEXTROW; + + + /* Middle rows */ +#ifdef HAVE_AVX2 + while (ptr + 24 <= endptr) { + diff_256 = (block_diff_256)(query_shifted,ptr,plusp,genestrand,query_unk_mismatch_p); + _mm256_store_si256((__m256i *) array,diff_256); + + for (i = 0; i < 16; i++) { + p = 3*array[i]; + if ((score = score_low[p] + totalscore) > bestscore) { + bestscore = score; + trimpos = offset + score_low[p+1]; + } + totalscore += score_low[p+2]; + debug(printf("diff piece %d %04X => bestscore %d at pos %d, offset %d, trimpos %d, totalscore %d\n", + i,array[i],score_high[p],score_high[p+1],offset,trimpos,totalscore)); + offset += 16; + } + query_shifted += 24; ptr += 24; + } +#endif + +#ifdef HAVE_SSE2 + while (ptr + 12 <= endptr) { + diff_128 = (block_diff_128)(query_shifted,ptr,plusp,genestrand,query_unk_mismatch_p); + _mm_store_si128((__m128i *) array,diff_128); + + for (i = 0; i < 8; i++) { + p = 3*array[i]; + if ((score = score_low[p] + totalscore) > bestscore) { + bestscore = score; + trimpos = offset + score_low[p+1]; + } + totalscore += score_low[p+2]; + debug(printf("diff piece %d %04X => bestscore %d at pos %d, offset %d, trimpos %d, totalscore %d\n", + i,array[i],score_high[p],score_high[p+1],offset,trimpos,totalscore)); + offset += 16; + } + query_shifted += 12; ptr += 12; + } +#else + while (ptr + 12 <= endptr) { + for (startcolumni = 0; startcolumni < 4; startcolumni++) { + diff_32 = (block_diff_32)(query_shifted,ptr,plusp,genestrand,query_unk_mismatch_p); + + p = 3*(diff_32 & 0x0000FFFF); + if ((score = score_low[p] + totalscore) > bestscore) { + bestscore = score; + trimpos = offset + score_low[p+1]; + } + totalscore = score_low[p+2]; + debug(printf("diff low %04X => bestscore %d at pos %d, offset %d, trimpos %d, totalscore %d\n", + diff_32 & 0x0000FFFF,score_high[p],score_high[p+1],offset,trimpos,totalscore)); + offset += 16; + + p = 3*(diff_32 >> 16); + if ((score = score_low[p] + totalscore) > bestscore) { + bestscore = score; + trimpos = offset + score_low[p+1]; + } + totalscore += score_low[p+2]; + debug(printf("diff high %04X => bestscore %d at pos %d, offset %d, trimpos %d, totalscore %d\n", + diff_32 >> 16,score_high[p],score_high[p+1],offset,trimpos,totalscore)); + offset += 16; + query_shifted += QUERY_NEXTCOL; ptr += GENOME_NEXTCOL; + } + /* query_shifted += QUERY_NEXTROW; */ ptr += GENOME_NEXTROW; + } +#endif + + /* End row */ + while (ptr < endptr) { + diff_32 = (block_diff_32)(query_shifted,ptr,plusp,genestrand,query_unk_mismatch_p); + + p = 3*(diff_32 & 0x0000FFFF); + if ((score = score_low[p] + totalscore) > bestscore) { + bestscore = score; + trimpos = offset + score_low[p+1]; + } + totalscore = score_low[p+2]; + debug(printf("diff low %04X => bestscore %d at pos %d, offset %d, trimpos %d, totalscore %d\n", + diff_32 & 0x0000FFFF,score_high[p],score_high[p+1],offset,trimpos,totalscore)); + offset += 16; + + p = 3*(diff_32 >> 16); + if ((score = score_low[p] + totalscore) > bestscore) { + bestscore = score; + trimpos = offset + score_low[p+1]; + } + totalscore += score_low[p+2]; + debug(printf("diff high %04X => bestscore %d at pos %d, offset %d, trimpos %d, totalscore %d\n", + diff_32 >> 16,score_high[p],score_high[p+1],offset,trimpos,totalscore)); + offset += 16; + query_shifted += QUERY_NEXTCOL; ptr += GENOME_NEXTCOL; + } + + /* End block */ + diff_32 = (block_diff_32)(query_shifted,ptr,plusp,genestrand,query_unk_mismatch_p); + diff_32 = set_end_32(diff_32,enddiscard); /* puts 1 (mismatches) at end */ - p = 3*((unsigned short) _mm_extract_epi16(diff,6)); + p = 3*(diff_32 & 0x0000FFFF); if ((score = score_low[p] + totalscore) > bestscore) { bestscore = score; trimpos = offset + score_low[p+1]; } - totalscore += score_low[p+2]; - debug(printf("diff piece %d %04X => bestscore %d at pos %d, offset %d, trimpos %d, totalscore %d\n", - 6,(unsigned short) _mm_extract_epi16(diff,6),score_high[p],score_high[p+1],offset,trimpos,totalscore)); + totalscore = score_low[p+2]; + debug(printf("diff low %04X => bestscore %d at pos %d, offset %d, trimpos %d, totalscore %d\n", + diff_32 & 0x0000FFFF,score_high[p],score_high[p+1],offset,trimpos,totalscore)); offset += 16; - p = 3*((unsigned short) _mm_extract_epi16(diff,7)); + p = 3*(diff_32 >> 16); if ((score = score_low[p] + totalscore) > bestscore) { bestscore = score; trimpos = offset + score_low[p+1]; } - totalscore += score_low[p+2]; - debug(printf("diff piece %d %04X => bestscore %d at pos %d, offset %d, trimpos %d, totalscore %d\n", - 7,(unsigned short) _mm_extract_epi16(diff,7),score_high[p],score_high[p+1],offset,trimpos,totalscore)); - offset += 16; - - debug14(if (startblocki_32 == endblocki_32) assert(answer == trimpos + 1)); + /* totalscore += score_low[p+2]; */ + debug(printf("diff high %04X => bestscore %d at pos %d, offset %d, trimpos %d, totalscore %d\n", + diff_32 >> 16,score_high[p],score_high[p+1],offset,trimpos,totalscore)); + /* offset += 16; */ + return (trimpos + 1); /* trimpos+1 is on side of mismatch */ + } +} - } else { + +static int +trim_right_substring_snps (Compress_T query_compress, Univcoord_T left, int pos5, int pos3, + bool plusp, int genestrand) { + int startdiscard, enddiscard, offset; + Univcoord_T startblocki, endblocki, startblocki_32, endblocki_32; + Genomecomp_T *ref_ptr, *alt_ptr, *endptr; + Genomecomp_T *query_shifted; + int nshift; + int startcolumni, endcolumni; + UINT4 diff_32; +#ifdef HAVE_SSE2 + __m128i diff_128; + int i; +#endif +#ifdef HAVE_AVX2 + __m256i diff_256; +#endif +#ifdef HAVE_AVX2 + unsigned short array[16]; +#elif defined(HAVE_SSE2) + unsigned short array[8]; +#endif + + int totalscore, bestscore, score; + int trimpos; + Genomecomp_T p; + + debug( + printf("\n\n"); + printf("Genome (in trim_right_substring) from %u+%d to %u+%d:\n",left,pos5,left,pos3); + Genome_print_blocks(ref_blocks,left+pos5,left+pos3); + printf("\n"); + ); + + + startblocki = (left+pos5)/128U*12; + startcolumni = ((left+pos5) % 128) / 32; + startblocki_32 = startblocki + startcolumni; + + endblocki = (left+pos3)/128U*12; + endcolumni = ((left+pos3) % 128) / 32; + endblocki_32 = endblocki + endcolumni; + + debug(printf("left = %u, pos5 = %d, pos3 = %d, startblocki = %u, endblocki = %u\n", + left,pos5,pos3,startblocki,endblocki)); + + nshift = left % STEP_SIZE; + query_shifted = Compress_shift(query_compress,nshift); + debug(printf("Query shifted %d:\n",nshift)); + debug(Compress_print_blocks(query_shifted,nshift,pos5,pos3)); + query_shifted += (nshift+pos5)/STEP_SIZE*COMPRESS_BLOCKSIZE; +#if defined(WORDS_BIGENDIAN) || !defined(HAVE_SSE2) +#else + query_shifted += startcolumni; #endif - /* Startblock */ - diff = (block_diff_snp)(query_shifted,&(snp_blocks[startblocki]),&(ref_blocks[startblocki]), - plusp,genestrand,query_unk_mismatch_p); + startdiscard = (left+pos5) % 32; + enddiscard = (left+pos3) % 32; + offset = -startdiscard + pos5; + ref_ptr = &(ref_blocks[startblocki_32]); + alt_ptr = &(snp_blocks[startblocki_32]); + endptr = &(ref_blocks[endblocki_32]); - diff = clear_start(diff,startdiscard); /* puts 0 (matches) at start */ + if (endblocki_32 == startblocki_32) { + /* Single block */ + debug5(printf("nshift = %d, startdiscard = %u, enddiscard = %u\n",nshift,startdiscard,enddiscard)); + diff_32 = (block_diff_snp_32)(query_shifted,alt_ptr,ref_ptr,plusp,genestrand,query_unk_mismatch_p); + diff_32 = clear_start_32(diff_32,startdiscard); /* puts 0 (matches) at start */ + diff_32 = set_end_32(diff_32,enddiscard); /* puts 1 (mismatches) at end */ -#if defined(WORDS_BIGENDIAN) || !defined(HAVE_SSE2) - p = 3*(diff & 0x0000FFFF); + p = 3*(diff_32 & 0x0000FFFF); bestscore = score_low[p]; trimpos = offset + score_low[p+1]; totalscore = score_low[p+2]; debug(printf("diff low %04X => bestscore %d at pos %d, offset %d, trimpos %d, totalscore %d\n", - diff & 0x0000FFFF,score_high[p],score_high[p+1],offset,trimpos,totalscore)); + diff_32 & 0x0000FFFF,score_high[p],score_high[p+1],offset,trimpos,totalscore)); offset += 16; - p = 3*(diff >> 16); + p = 3*(diff_32 >> 16); if ((score = score_low[p] + totalscore) > bestscore) { bestscore = score; trimpos = offset + score_low[p+1]; } - totalscore += score_low[p+2]; + /* totalscore += score_low[p+2]; */ debug(printf("diff high %04X => bestscore %d at pos %d, offset %d, trimpos %d, totalscore %d\n", - diff >> 16,score_high[p],score_high[p+1],offset,trimpos,totalscore)); - offset += 16; + diff_32 >> 16,score_high[p],score_high[p+1],offset,trimpos,totalscore)); + /* offset += 16; */ + + return (trimpos + 1); /* trimpos+1 is on side of mismatch */ + + } else if (endblocki == startblocki) { +#if defined(USE_SHIFT_TRIM) && defined(HAVE_SSE2) + /* Shift */ + enddiscard += (endcolumni - startcolumni)*32; + diff_128 = (block_diff_snp_128_shift_lo)(query_shifted,alt_ptr,ref_ptr,plusp,genestrand,query_unk_mismatch_p, + startcolumni); + diff_128 = clear_start_128(diff_128,startdiscard); /* puts 0 (matches) at start */ + diff_128 = set_end_128(diff_128,enddiscard); /* puts 1 (mismatches) at end */ + _mm_store_si128((__m128i *) array,diff_128); + + bestscore = -100; + for (i = 0; i < 8; i++) { + p = 3*array[i]; + if ((score = score_low[p] + totalscore) > bestscore) { + bestscore = score; + trimpos = offset + score_low[p+1]; + } + totalscore += score_low[p+2]; + debug(printf("diff piece %d %04X => bestscore %d at pos %d, offset %d, trimpos %d, totalscore %d\n", + i,array[i],score_high[p],score_high[p+1],offset,trimpos,totalscore)); + offset += 16; + } + + return (trimpos + 1); /* trimpos+1 is on side of mismatch */ #else - p = 3*((unsigned short) _mm_extract_epi16(diff,0)); + /* Start block */ + diff_32 = (block_diff_snp_32)(query_shifted,alt_ptr,ref_ptr,plusp,genestrand,query_unk_mismatch_p); + diff_32 = clear_start_32(diff_32,startdiscard); + + p = 3*(diff_32 & 0x0000FFFF); bestscore = score_low[p]; trimpos = offset + score_low[p+1]; totalscore = score_low[p+2]; - debug(printf("diff piece %d %04X => bestscore %d at pos %d, offset %d, trimpos %d, totalscore %d\n", - 0,(unsigned short) _mm_extract_epi16(diff,0),score_high[p],score_high[p+1],offset,trimpos,totalscore)); + debug(printf("diff low %04X => bestscore %d at pos %d, offset %d, trimpos %d, totalscore %d\n", + diff_32 & 0x0000FFFF,score_high[p],score_high[p+1],offset,trimpos,totalscore)); offset += 16; - p = 3*((unsigned short) _mm_extract_epi16(diff,1)); + p = 3*(diff_32 >> 16); if ((score = score_low[p] + totalscore) > bestscore) { bestscore = score; trimpos = offset + score_low[p+1]; } totalscore += score_low[p+2]; - debug(printf("diff piece %d %04X => bestscore %d at pos %d, offset %d, trimpos %d, totalscore %d\n", - 1,(unsigned short) _mm_extract_epi16(diff,1),score_high[p],score_high[p+1],offset,trimpos,totalscore)); + debug(printf("diff high %04X => bestscore %d at pos %d, offset %d, trimpos %d, totalscore %d\n", + diff_32 >> 16,score_high[p],score_high[p+1],offset,trimpos,totalscore)); offset += 16; + query_shifted += QUERY_NEXTCOL; ref_ptr += GENOME_NEXTCOL; alt_ptr += GENOME_NEXTCOL; - p = 3*((unsigned short) _mm_extract_epi16(diff,2)); - if ((score = score_low[p] + totalscore) > bestscore) { - bestscore = score; - trimpos = offset + score_low[p+1]; - } - totalscore += score_low[p+2]; - debug(printf("diff piece %d %04X => bestscore %d at pos %d, offset %d, trimpos %d, totalscore %d\n", - 2,(unsigned short) _mm_extract_epi16(diff,2),score_high[p],score_high[p+1],offset,trimpos,totalscore)); - offset += 16; + /* Single row */ + while (++startcolumni < endcolumni) { + diff_32 = (block_diff_snp_32)(query_shifted,alt_ptr,ref_ptr,plusp,genestrand,query_unk_mismatch_p); - p = 3*((unsigned short) _mm_extract_epi16(diff,3)); - if ((score = score_low[p] + totalscore) > bestscore) { - bestscore = score; - trimpos = offset + score_low[p+1]; + p = 3*(diff_32 & 0x0000FFFF); + if ((score = score_low[p] + totalscore) > bestscore) { + bestscore = score; + trimpos = offset + score_low[p+1]; + } + totalscore = score_low[p+2]; + debug(printf("diff low %04X => bestscore %d at pos %d, offset %d, trimpos %d, totalscore %d\n", + diff_32 & 0x0000FFFF,score_high[p],score_high[p+1],offset,trimpos,totalscore)); + offset += 16; + + p = 3*(diff_32 >> 16); + if ((score = score_low[p] + totalscore) > bestscore) { + bestscore = score; + trimpos = offset + score_low[p+1]; + } + totalscore += score_low[p+2]; + debug(printf("diff high %04X => bestscore %d at pos %d, offset %d, trimpos %d, totalscore %d\n", + diff_32 >> 16,score_high[p],score_high[p+1],offset,trimpos,totalscore)); + offset += 16; + query_shifted += QUERY_NEXTCOL; ref_ptr += GENOME_NEXTCOL; alt_ptr += GENOME_NEXTCOL; } - totalscore += score_low[p+2]; - debug(printf("diff piece %d %04X => bestscore %d at pos %d, offset %d, trimpos %d, totalscore %d\n", - 3,(unsigned short) _mm_extract_epi16(diff,3),score_high[p],score_high[p+1],offset,trimpos,totalscore)); - offset += 16; - p = 3*((unsigned short) _mm_extract_epi16(diff,4)); + /* End block */ + diff_32 = (block_diff_snp_32)(query_shifted,alt_ptr,ref_ptr,plusp,genestrand,query_unk_mismatch_p); + diff_32 = set_end_32(diff_32,enddiscard); /* puts 1 (mismatches) at end */ + + p = 3*(diff_32 & 0x0000FFFF); if ((score = score_low[p] + totalscore) > bestscore) { bestscore = score; trimpos = offset + score_low[p+1]; } - totalscore += score_low[p+2]; - debug(printf("diff piece %d %04X => bestscore %d at pos %d, offset %d, trimpos %d, totalscore %d\n", - 4,(unsigned short) _mm_extract_epi16(diff,4),score_high[p],score_high[p+1],offset,trimpos,totalscore)); + totalscore = score_low[p+2]; + debug(printf("diff low %04X => bestscore %d at pos %d, offset %d, trimpos %d, totalscore %d\n", + diff_32 & 0x0000FFFF,score_high[p],score_high[p+1],offset,trimpos,totalscore)); offset += 16; - p = 3*((unsigned short) _mm_extract_epi16(diff,5)); + p = 3*(diff_32 >> 16); if ((score = score_low[p] + totalscore) > bestscore) { bestscore = score; trimpos = offset + score_low[p+1]; } - totalscore += score_low[p+2]; - debug(printf("diff piece %d %04X => bestscore %d at pos %d, offset %d, trimpos %d, totalscore %d\n", - 5,(unsigned short) _mm_extract_epi16(diff,5),score_high[p],score_high[p+1],offset,trimpos,totalscore)); - offset += 16; + /* totalscore += score_low[p+2]; */ + debug(printf("diff high %04X => bestscore %d at pos %d, offset %d, trimpos %d, totalscore %d\n", + diff_32 >> 16,score_high[p],score_high[p+1],offset,trimpos,totalscore)); + /* offset += 16; */ + + return (trimpos + 1); /* trimpos+1 is on side of mismatch */ +#endif - p = 3*((unsigned short) _mm_extract_epi16(diff,6)); - if ((score = score_low[p] + totalscore) > bestscore) { - bestscore = score; - trimpos = offset + score_low[p+1]; +#if defined(USE_WRAP_TRIM) && defined(HAVE_SSSE3) + } else if (endblocki == startblocki + 12 && endcolumni < startcolumni) { + /* Wrap */ + enddiscard += (4 + endcolumni - startcolumni)*32; + diff_128 = (block_diff_snp_128_wrap_lo)(query_shifted,alt_ptr,ref_ptr,plusp,genestrand,query_unk_mismatch_p, + startcolumni); + diff_128 = clear_start_128(diff_128,startdiscard); /* puts 0 (matches) at start */ + diff_128 = set_end_128(diff_128,enddiscard); /* puts 1 (mismatches) at end */ + _mm_store_si128((__m128i *) array,diff_128); + + bestscore = -100; + for (i = 0; i < 8; i++) { + p = 3*array[i]; + if ((score = score_low[p] + totalscore) > bestscore) { + bestscore = score; + trimpos = offset + score_low[p+1]; + } + totalscore += score_low[p+2]; + debug(printf("diff piece %d %04X => bestscore %d at pos %d, offset %d, trimpos %d, totalscore %d\n", + i,array[i],score_high[p],score_high[p+1],offset,trimpos,totalscore)); + offset += 16; } - totalscore += score_low[p+2]; - debug(printf("diff piece %d %04X => bestscore %d at pos %d, offset %d, trimpos %d, totalscore %d\n", - 6,(unsigned short) _mm_extract_epi16(diff,6),score_high[p],score_high[p+1],offset,trimpos,totalscore)); +#endif + + return (trimpos + 1); /* trimpos+1 is on side of mismatch */ + + } else { + /* Start block */ + diff_32 = (block_diff_snp_32)(query_shifted,alt_ptr,ref_ptr,plusp,genestrand,query_unk_mismatch_p); + diff_32 = clear_start_32(diff_32,startdiscard); + + p = 3*(diff_32 & 0x0000FFFF); + bestscore = score_low[p]; + trimpos = offset + score_low[p+1]; + totalscore = score_low[p+2]; + debug(printf("diff low %04X => bestscore %d at pos %d, offset %d, trimpos %d, totalscore %d\n", + diff_32 & 0x0000FFFF,score_high[p],score_high[p+1],offset,trimpos,totalscore)); offset += 16; - p = 3*((unsigned short) _mm_extract_epi16(diff,7)); + p = 3*(diff_32 >> 16); if ((score = score_low[p] + totalscore) > bestscore) { bestscore = score; trimpos = offset + score_low[p+1]; } totalscore += score_low[p+2]; - debug(printf("diff piece %d %04X => bestscore %d at pos %d, offset %d, trimpos %d, totalscore %d\n", - 7,(unsigned short) _mm_extract_epi16(diff,7),score_high[p],score_high[p+1],offset,trimpos,totalscore)); + debug(printf("diff high %04X => bestscore %d at pos %d, offset %d, trimpos %d, totalscore %d\n", + diff_32 >> 16,score_high[p],score_high[p+1],offset,trimpos,totalscore)); offset += 16; -#endif - - query_shifted += COMPRESS_BLOCKSIZE; -#if defined(WORDS_BIGENDIAN) || !defined(HAVE_SSE2) - ref_ptr = &(ref_blocks[startblocki]); - alt_ptr = &(snp_blocks[startblocki]); - ref_ptr += 1; alt_ptr += 1; if (++startcolumni == 4) {ref_ptr += 8; alt_ptr += 8; startcolumni = 0;} -#else - ref_ptr = &(ref_blocks[startblocki+12]); - alt_ptr = &(snp_blocks[startblocki+12]); -#endif - end = &(ref_blocks[endblocki]); - while (ref_ptr < end) { - diff = (block_diff_snp)(query_shifted,alt_ptr,ref_ptr,plusp,genestrand,query_unk_mismatch_p); + query_shifted += QUERY_NEXTCOL; ref_ptr += GENOME_NEXTCOL; alt_ptr += GENOME_NEXTCOL; + /* Start row */ + while (++startcolumni < 4) { + diff_32 = (block_diff_snp_32)(query_shifted,alt_ptr,ref_ptr,plusp,genestrand,query_unk_mismatch_p); -#if defined(WORDS_BIGENDIAN) || !defined(HAVE_SSE2) - p = 3*(diff & 0x0000FFFF); + p = 3*(diff_32 & 0x0000FFFF); if ((score = score_low[p] + totalscore) > bestscore) { bestscore = score; trimpos = offset + score_low[p+1]; } - totalscore += score_low[p+2]; + totalscore = score_low[p+2]; debug(printf("diff low %04X => bestscore %d at pos %d, offset %d, trimpos %d, totalscore %d\n", - diff & 0x0000FFFF,score_high[p],score_high[p+1],offset,trimpos,totalscore)); + diff_32 & 0x0000FFFF,score_high[p],score_high[p+1],offset,trimpos,totalscore)); offset += 16; - - p = 3*(diff >> 16); + + p = 3*(diff_32 >> 16); if ((score = score_low[p] + totalscore) > bestscore) { bestscore = score; trimpos = offset + score_low[p+1]; } totalscore += score_low[p+2]; debug(printf("diff high %04X => bestscore %d at pos %d, offset %d, trimpos %d, totalscore %d\n", - diff >> 16,score_high[p],score_high[p+1],offset,trimpos,totalscore)); + diff_32 >> 16,score_high[p],score_high[p+1],offset,trimpos,totalscore)); offset += 16; + query_shifted += QUERY_NEXTCOL; ref_ptr += GENOME_NEXTCOL; alt_ptr += GENOME_NEXTCOL; + } +#ifdef HAVE_SSE2 + query_shifted += QUERY_NEXTROW; +#endif + ref_ptr += GENOME_NEXTROW; alt_ptr += GENOME_NEXTROW; -#else - p = 3*((unsigned short) _mm_extract_epi16(diff,0)); - if ((score = score_low[p] + totalscore) > bestscore) { - bestscore = score; - trimpos = offset + score_low[p+1]; - } - totalscore += score_low[p+2]; - debug(printf("diff piece %d %04X => bestscore %d at pos %d, offset %d, trimpos %d, totalscore %d\n", - 0,(unsigned short) _mm_extract_epi16(diff,0),score_high[p],score_high[p+1],offset,trimpos,totalscore)); - offset += 16; - p = 3*((unsigned short) _mm_extract_epi16(diff,1)); - if ((score = score_low[p] + totalscore) > bestscore) { - bestscore = score; - trimpos = offset + score_low[p+1]; - } - totalscore += score_low[p+2]; - debug(printf("diff piece %d %04X => bestscore %d at pos %d, offset %d, trimpos %d, totalscore %d\n", - 1,(unsigned short) _mm_extract_epi16(diff,1),score_high[p],score_high[p+1],offset,trimpos,totalscore)); - offset += 16; + /* Middle rows */ +#ifdef HAVE_AVX2 + while (ref_ptr + 24 <= endptr) { + diff_256 = (block_diff_snp_256)(query_shifted,alt_ptr,ref_ptr,plusp,genestrand,query_unk_mismatch_p); + _mm256_store_si256((__m256i *) array,diff_256); - p = 3*((unsigned short) _mm_extract_epi16(diff,2)); - if ((score = score_low[p] + totalscore) > bestscore) { - bestscore = score; - trimpos = offset + score_low[p+1]; + for (i = 0; i < 16; i++) { + p = 3*array[i]; + if ((score = score_low[p] + totalscore) > bestscore) { + bestscore = score; + trimpos = offset + score_low[p+1]; + } + totalscore += score_low[p+2]; + debug(printf("diff piece %d %04X => bestscore %d at pos %d, offset %d, trimpos %d, totalscore %d\n", + i,array[i],score_high[p],score_high[p+1],offset,trimpos,totalscore)); + offset += 16; } - totalscore += score_low[p+2]; - debug(printf("diff piece %d %04X => bestscore %d at pos %d, offset %d, trimpos %d, totalscore %d\n", - 2,(unsigned short) _mm_extract_epi16(diff,2),score_high[p],score_high[p+1],offset,trimpos,totalscore)); - offset += 16; + query_shifted += 24; ref_ptr += 24; alt_ptr += 24; + } +#endif - p = 3*((unsigned short) _mm_extract_epi16(diff,3)); - if ((score = score_low[p] + totalscore) > bestscore) { - bestscore = score; - trimpos = offset + score_low[p+1]; - } - totalscore += score_low[p+2]; - debug(printf("diff piece %d %04X => bestscore %d at pos %d, offset %d, trimpos %d, totalscore %d\n", - 3,(unsigned short) _mm_extract_epi16(diff,3),score_high[p],score_high[p+1],offset,trimpos,totalscore)); - offset += 16; +#ifdef HAVE_SSE2 + while (ref_ptr + 12 <= endptr) { + diff_128 = (block_diff_snp_128)(query_shifted,alt_ptr,ref_ptr,plusp,genestrand,query_unk_mismatch_p); + _mm_store_si128((__m128i *) array,diff_128); - p = 3*((unsigned short) _mm_extract_epi16(diff,4)); - if ((score = score_low[p] + totalscore) > bestscore) { - bestscore = score; - trimpos = offset + score_low[p+1]; + for (i = 0; i < 8; i++) { + p = 3*array[i]; + if ((score = score_low[p] + totalscore) > bestscore) { + bestscore = score; + trimpos = offset + score_low[p+1]; + } + totalscore += score_low[p+2]; + debug(printf("diff piece %d %04X => bestscore %d at pos %d, offset %d, trimpos %d, totalscore %d\n", + i,array[i],score_high[p],score_high[p+1],offset,trimpos,totalscore)); + offset += 16; + } + query_shifted += 12; ref_ptr += 12; alt_ptr += 12; + } +#else + while (ref_ptr + 12 <= endptr) { + for (startcolumni = 0; startcolumni < 4; startcolumni++) { + diff_32 = (block_diff_snp_32)(query_shifted,alt_ptr,ref_ptr,plusp,genestrand,query_unk_mismatch_p); + + p = 3*(diff_32 & 0x0000FFFF); + if ((score = score_low[p] + totalscore) > bestscore) { + bestscore = score; + trimpos = offset + score_low[p+1]; + } + totalscore = score_low[p+2]; + debug(printf("diff low %04X => bestscore %d at pos %d, offset %d, trimpos %d, totalscore %d\n", + diff_32 & 0x0000FFFF,score_high[p],score_high[p+1],offset,trimpos,totalscore)); + offset += 16; + + p = 3*(diff_32 >> 16); + if ((score = score_low[p] + totalscore) > bestscore) { + bestscore = score; + trimpos = offset + score_low[p+1]; + } + totalscore += score_low[p+2]; + debug(printf("diff high %04X => bestscore %d at pos %d, offset %d, trimpos %d, totalscore %d\n", + diff_32 >> 16,score_high[p],score_high[p+1],offset,trimpos,totalscore)); + offset += 16; + query_shifted += QUERY_NEXTCOL; ref_ptr += GENOME_NEXTCOL; alt_ptr += GENOME_NEXTCOL; } - totalscore += score_low[p+2]; - debug(printf("diff piece %d %04X => bestscore %d at pos %d, offset %d, trimpos %d, totalscore %d\n", - 4,(unsigned short) _mm_extract_epi16(diff,4),score_high[p],score_high[p+1],offset,trimpos,totalscore)); - offset += 16; + /* query_shifted += QUERY_NEXTROW; */ ref_ptr += GENOME_NEXTROW; alt_ptr += GENOME_NEXTROW; + } +#endif - p = 3*((unsigned short) _mm_extract_epi16(diff,5)); - if ((score = score_low[p] + totalscore) > bestscore) { - bestscore = score; - trimpos = offset + score_low[p+1]; - } - totalscore += score_low[p+2]; - debug(printf("diff piece %d %04X => bestscore %d at pos %d, offset %d, trimpos %d, totalscore %d\n", - 5,(unsigned short) _mm_extract_epi16(diff,5),score_high[p],score_high[p+1],offset,trimpos,totalscore)); - offset += 16; + /* End row */ + while (ref_ptr < endptr) { + diff_32 = (block_diff_snp_32)(query_shifted,alt_ptr,ref_ptr,plusp,genestrand,query_unk_mismatch_p); - p = 3*((unsigned short) _mm_extract_epi16(diff,6)); + p = 3*(diff_32 & 0x0000FFFF); if ((score = score_low[p] + totalscore) > bestscore) { bestscore = score; trimpos = offset + score_low[p+1]; } - totalscore += score_low[p+2]; - debug(printf("diff piece %d %04X => bestscore %d at pos %d, offset %d, trimpos %d, totalscore %d\n", - 6,(unsigned short) _mm_extract_epi16(diff,6),score_high[p],score_high[p+1],offset,trimpos,totalscore)); + totalscore = score_low[p+2]; + debug(printf("diff low %04X => bestscore %d at pos %d, offset %d, trimpos %d, totalscore %d\n", + diff_32 & 0x0000FFFF,score_high[p],score_high[p+1],offset,trimpos,totalscore)); offset += 16; - - p = 3*((unsigned short) _mm_extract_epi16(diff,7)); + + p = 3*(diff_32 >> 16); if ((score = score_low[p] + totalscore) > bestscore) { bestscore = score; trimpos = offset + score_low[p+1]; } totalscore += score_low[p+2]; - debug(printf("diff piece %d %04X => bestscore %d at pos %d, offset %d, trimpos %d, totalscore %d\n", - 7,(unsigned short) _mm_extract_epi16(diff,7),score_high[p],score_high[p+1],offset,trimpos,totalscore)); + debug(printf("diff high %04X => bestscore %d at pos %d, offset %d, trimpos %d, totalscore %d\n", + diff_32 >> 16,score_high[p],score_high[p+1],offset,trimpos,totalscore)); offset += 16; -#endif - - query_shifted += COMPRESS_BLOCKSIZE; -#if defined(WORDS_BIGENDIAN) || !defined(HAVE_SSE2) - ref_ptr += 1; alt_ptr += 1; if (++startcolumni == 4) {ref_ptr += 8; alt_ptr += 8; startcolumni = 0;} -#else - ref_ptr += 12; alt_ptr += 12; -#endif + query_shifted += QUERY_NEXTCOL; ref_ptr += GENOME_NEXTCOL; alt_ptr += GENOME_NEXTCOL; } - /* Endblock */ - diff = (block_diff_snp)(query_shifted,alt_ptr,ref_ptr,plusp,genestrand,query_unk_mismatch_p); - - diff = set_end(diff,enddiscard); /* puts 1 (mismatches) at end */ - + /* End block */ + diff_32 = (block_diff_snp_32)(query_shifted,alt_ptr,ref_ptr,plusp,genestrand,query_unk_mismatch_p); + diff_32 = set_end_32(diff_32,enddiscard); /* puts 1 (mismatches) at end */ -#if defined(WORDS_BIGENDIAN) || !defined(HAVE_SSE2) - p = 3*(diff & 0x0000FFFF); + p = 3*(diff_32 & 0x0000FFFF); if ((score = score_low[p] + totalscore) > bestscore) { bestscore = score; trimpos = offset + score_low[p+1]; } - totalscore += score_low[p+2]; + totalscore = score_low[p+2]; debug(printf("diff low %04X => bestscore %d at pos %d, offset %d, trimpos %d, totalscore %d\n", - diff & 0x0000FFFF,score_high[p],score_high[p+1],offset,trimpos,totalscore)); + diff_32 & 0x0000FFFF,score_high[p],score_high[p+1],offset,trimpos,totalscore)); offset += 16; - p = 3*(diff >> 16); + p = 3*(diff_32 >> 16); if ((score = score_low[p] + totalscore) > bestscore) { bestscore = score; trimpos = offset + score_low[p+1]; } /* totalscore += score_low[p+2]; */ debug(printf("diff high %04X => bestscore %d at pos %d, offset %d, trimpos %d, totalscore %d\n", - diff >> 16,score_high[p],score_high[p+1],offset,trimpos,totalscore)); + diff_32 >> 16,score_high[p],score_high[p+1],offset,trimpos,totalscore)); /* offset += 16; */ - -#else - p = 3*((unsigned short) _mm_extract_epi16(diff,0)); - if ((score = score_low[p] + totalscore) > bestscore) { - bestscore = score; - trimpos = offset + score_low[p+1]; - } - totalscore += score_low[p+2]; - debug(printf("diff piece %d %04X => bestscore %d at pos %d, offset %d, trimpos %d, totalscore %d\n", - 0,(unsigned short) _mm_extract_epi16(diff,0),score_high[p],score_high[p+1],offset,trimpos,totalscore)); - offset += 16; - - p = 3*((unsigned short) _mm_extract_epi16(diff,1)); - if ((score = score_low[p] + totalscore) > bestscore) { - bestscore = score; - trimpos = offset + score_low[p+1]; - } - totalscore += score_low[p+2]; - debug(printf("diff piece %d %04X => bestscore %d at pos %d, offset %d, trimpos %d, totalscore %d\n", - 1,(unsigned short) _mm_extract_epi16(diff,1),score_high[p],score_high[p+1],offset,trimpos,totalscore)); - offset += 16; - - p = 3*((unsigned short) _mm_extract_epi16(diff,2)); - if ((score = score_low[p] + totalscore) > bestscore) { - bestscore = score; - trimpos = offset + score_low[p+1]; - } - totalscore += score_low[p+2]; - debug(printf("diff piece %d %04X => bestscore %d at pos %d, offset %d, trimpos %d, totalscore %d\n", - 2,(unsigned short) _mm_extract_epi16(diff,2),score_high[p],score_high[p+1],offset,trimpos,totalscore)); - offset += 16; - - p = 3*((unsigned short) _mm_extract_epi16(diff,3)); - if ((score = score_low[p] + totalscore) > bestscore) { - bestscore = score; - trimpos = offset + score_low[p+1]; - } - totalscore += score_low[p+2]; - debug(printf("diff piece %d %04X => bestscore %d at pos %d, offset %d, trimpos %d, totalscore %d\n", - 3,(unsigned short) _mm_extract_epi16(diff,3),score_high[p],score_high[p+1],offset,trimpos,totalscore)); - offset += 16; - - p = 3*((unsigned short) _mm_extract_epi16(diff,4)); - if ((score = score_low[p] + totalscore) > bestscore) { - bestscore = score; - trimpos = offset + score_low[p+1]; - } - totalscore += score_low[p+2]; - debug(printf("diff piece %d %04X => bestscore %d at pos %d, offset %d, trimpos %d, totalscore %d\n", - 4,(unsigned short) _mm_extract_epi16(diff,4),score_high[p],score_high[p+1],offset,trimpos,totalscore)); - offset += 16; - - p = 3*((unsigned short) _mm_extract_epi16(diff,5)); - if ((score = score_low[p] + totalscore) > bestscore) { - bestscore = score; - trimpos = offset + score_low[p+1]; - } - totalscore += score_low[p+2]; - debug(printf("diff piece %d %04X => bestscore %d at pos %d, offset %d, trimpos %d, totalscore %d\n", - 5,(unsigned short) _mm_extract_epi16(diff,5),score_high[p],score_high[p+1],offset,trimpos,totalscore)); - offset += 16; - - p = 3*((unsigned short) _mm_extract_epi16(diff,6)); - if ((score = score_low[p] + totalscore) > bestscore) { - bestscore = score; - trimpos = offset + score_low[p+1]; - } - totalscore += score_low[p+2]; - debug(printf("diff piece %d %04X => bestscore %d at pos %d, offset %d, trimpos %d, totalscore %d\n", - 6,(unsigned short) _mm_extract_epi16(diff,6),score_high[p],score_high[p+1],offset,trimpos,totalscore)); - offset += 16; - - p = 3*((unsigned short) _mm_extract_epi16(diff,7)); - if ((score = score_low[p] + totalscore) > bestscore) { - bestscore = score; - trimpos = offset + score_low[p+1]; - } - totalscore += score_low[p+2]; - debug(printf("diff piece %d %04X => bestscore %d at pos %d, offset %d, trimpos %d, totalscore %d\n", - 7,(unsigned short) _mm_extract_epi16(diff,7),score_high[p],score_high[p+1],offset,trimpos,totalscore)); - offset += 16; -#endif - - debug14(if (startblocki_32 == endblocki_32) assert(answer == trimpos + 1)); + return (trimpos + 1); /* trimpos+1 is on side of mismatch */ - -#if defined(WORDS_BIGENDIAN) || !defined(HAVE_SSE2) -#else } -#endif } diff -Nru gmap-2016-11-07/src/genome128_hr.h gmap-2017-01-14/src/genome128_hr.h --- gmap-2016-11-07/src/genome128_hr.h 2016-02-12 20:14:41.000000000 +0000 +++ gmap-2017-01-14/src/genome128_hr.h 2016-12-16 16:38:23.000000000 +0000 @@ -1,4 +1,4 @@ -/* $Id: genome128_hr.h 184179 2016-02-12 20:14:39Z twu $ */ +/* $Id: genome128_hr.h 201740 2016-12-16 16:38:22Z twu $ */ #ifndef GENOME128_HR_INCLUDED #define GENOME128_HR_INCLUDED #include "types.h" @@ -11,11 +11,6 @@ bool query_unk_mismatch_p_in, bool genome_unk_mismatch_p_in, Mode_T mode_in); -extern void -Genome_hr_user_setup (UINT4 *ref_blocks_in, - bool query_unk_mismatch_p_in, bool genome_unk_mismatch_p_in, - Mode_T mode_in); - extern int Genome_consecutive_matches_rightward (Compress_T query_compress, Univcoord_T left, int pos5, int pos3, bool plusp, int genestrand); diff -Nru gmap-2016-11-07/src/gmap.c gmap-2017-01-14/src/gmap.c --- gmap-2016-11-07/src/gmap.c 2016-11-08 00:55:44.000000000 +0000 +++ gmap-2017-01-14/src/gmap.c 2016-12-16 16:39:35.000000000 +0000 @@ -1,4 +1,4 @@ -static char rcsid[] = "$Id: gmap.c 200232 2016-11-08 00:55:43Z twu $"; +static char rcsid[] = "$Id: gmap.c 201742 2016-12-16 16:39:35Z twu $"; #ifdef HAVE_CONFIG_H #include #endif @@ -4238,8 +4238,9 @@ genomecomp_blocks = Compress_create_blocks_comp(Sequence_fullpointer(usersegment),Sequence_fulllength(usersegment)); genomebits_blocks = Compress_create_blocks_bits(genomecomp_blocks,Sequence_fulllength(usersegment)); Genome_user_setup(genomecomp_blocks); - Genome_hr_user_setup(genomebits_blocks,/*query_unk_mismatch_p*/false, - /*genome_unk_mismatch_p*/true,/*mode*/STANDARD); + Genome_hr_setup(genomebits_blocks,/*snp_blocks*/NULL, + /*query_unk_mismatch_p*/false,/*genome_unk_mismatch_p*/true, + /*mode*/STANDARD); Genome_sites_setup(genomecomp_blocks,/*snp_blocks*/NULL); Maxent_hr_setup(genomecomp_blocks,/*genomealt_blocks*/genomecomp_blocks); #ifdef PMAP @@ -4424,8 +4425,9 @@ genomecomp_blocks = Compress_create_blocks_comp(Sequence_fullpointer(usersegment),Sequence_fulllength(usersegment)); genomebits_blocks = Compress_create_blocks_bits(genomecomp_blocks,Sequence_fulllength(usersegment)); Genome_user_setup(genomecomp_blocks); - Genome_hr_user_setup(genomebits_blocks,/*query_unk_mismatch_p*/false, - /*genome_unk_mismatch_p*/true,/*mode*/STANDARD); + Genome_hr_setup(genomebits_blocks,/*snp_blocks*/NULL, + /*query_unk_mismatch_p*/false,/*genome_unk_mismatch_p*/true, + /*mode*/STANDARD); Genome_sites_setup(genomecomp_blocks,/*snp_blocks*/NULL); Maxent_hr_setup(genomecomp_blocks,/*genomealt_blocks*/genomecomp_blocks); #ifdef PMAP @@ -4598,8 +4600,9 @@ genomecomp_blocks = Compress_create_blocks_comp(Sequence_fullpointer(usersegment),Sequence_fulllength(usersegment)); genomebits_blocks = Compress_create_blocks_bits(genomecomp_blocks,Sequence_fulllength(usersegment)); Genome_user_setup(genomecomp_blocks); - Genome_hr_user_setup(genomebits_blocks,/*query_unk_mismatch_p*/false, - /*genome_unk_mismatch_p*/true,/*mode*/STANDARD); + Genome_hr_setup(genomebits_blocks,/*snp_blocks*/NULL, + /*query_unk_mismatch_p*/false,/*genome_unk_mismatch_p*/true, + /*mode*/STANDARD); Genome_sites_setup(genomecomp_blocks,/*snp_blocks*/NULL); Maxent_hr_setup(genomecomp_blocks,/*genomealt_blocks*/genomecomp_blocks); #ifdef PMAP @@ -6546,8 +6549,9 @@ } else if (usersegment != NULL) { Genome_user_setup(genomecomp_blocks); - Genome_hr_user_setup(genomebits_blocks,/*query_unk_mismatch_p*/false, - /*genome_unk_mismatch_p*/true,/*mode*/STANDARD); + Genome_hr_setup(genomebits_blocks,/*snp_blocks*/NULL, + /*query_unk_mismatch_p*/false,/*genome_unk_mismatch_p*/true, + /*mode*/STANDARD); Genome_sites_setup(genomecomp_blocks,/*snp_blocks*/NULL); Maxent_hr_setup(genomecomp_blocks,/*genomealt_blocks*/genomecomp_blocks); #ifdef PMAP diff -Nru gmap-2016-11-07/src/gmapl_select.c gmap-2017-01-14/src/gmapl_select.c --- gmap-2016-11-07/src/gmapl_select.c 2016-10-31 20:05:55.000000000 +0000 +++ gmap-2017-01-14/src/gmapl_select.c 2016-12-16 16:30:59.000000000 +0000 @@ -45,6 +45,16 @@ /* Depend on path */ /* Cannot use file_exists_p, since it won't search PATH */ + if (avx512_support_p == true) { + new_argv[0] = "gmapl.avx512"; + if ((rc = execvp(new_argv[0],new_argv)) == -1 && errno == ENOENT) { + fprintf(stderr,"Note: %s does not exist. For faster speed, may want to compile package on an AVX512 machine\n",new_argv[0]); + } else { + free(new_argv); + return rc; + } + } + if (avx2_support_p == true) { new_argv[0] = "gmapl.avx2"; if ((rc = execvp(new_argv[0],new_argv)) == -1 && errno == ENOENT) { @@ -105,6 +115,20 @@ } else { dir = dirname(argv[0]); + if (avx512_support_p == true) { + new_argv[0] = (char *) malloc((strlen(dir) + strlen("/") + strlen("gmapl.avx512") + 1) * sizeof(char)); + sprintf(new_argv[0],"%s/gmapl.avx512",dir); + if (file_exists_p(new_argv[0]) == false) { + fprintf(stderr,"Note: %s does not exist. For faster speed, may want to compile package on an AVX512 machine\n",new_argv[0]); + free(new_argv[0]); + } else { + rc = execvp(new_argv[0],new_argv); + free(new_argv[0]); + free(new_argv); + return rc; + } + } + if (avx2_support_p == true) { new_argv[0] = (char *) malloc((strlen(dir) + strlen("/") + strlen("gmapl.avx2") + 1) * sizeof(char)); sprintf(new_argv[0],"%s/gmapl.avx2",dir); diff -Nru gmap-2016-11-07/src/gmap_select.c gmap-2017-01-14/src/gmap_select.c --- gmap-2016-11-07/src/gmap_select.c 2016-10-31 20:05:26.000000000 +0000 +++ gmap-2017-01-14/src/gmap_select.c 2016-12-16 16:31:06.000000000 +0000 @@ -45,6 +45,16 @@ /* Depend on path */ /* Cannot use file_exists_p, since it won't search PATH */ + if (avx512_support_p == true) { + new_argv[0] = "gmap.avx512"; + if ((rc = execvp(new_argv[0],new_argv)) == -1 && errno == ENOENT) { + fprintf(stderr,"Note: %s does not exist. For faster speed, may want to compile package on an AVX512 machine\n",new_argv[0]); + } else { + free(new_argv); + return rc; + } + } + if (avx2_support_p == true) { new_argv[0] = "gmap.avx2"; if ((rc = execvp(new_argv[0],new_argv)) == -1 && errno == ENOENT) { @@ -105,6 +115,20 @@ } else { dir = dirname(argv[0]); + if (avx512_support_p == true) { + new_argv[0] = (char *) malloc((strlen(dir) + strlen("/") + strlen("gmap.avx512") + 1) * sizeof(char)); + sprintf(new_argv[0],"%s/gmap.avx512",dir); + if (file_exists_p(new_argv[0]) == false) { + fprintf(stderr,"Note: %s does not exist. For faster speed, may want to compile package on an AVX512 machine\n",new_argv[0]); + free(new_argv[0]); + } else { + rc = execvp(new_argv[0],new_argv); + free(new_argv[0]); + free(new_argv); + return rc; + } + } + if (avx2_support_p == true) { new_argv[0] = (char *) malloc((strlen(dir) + strlen("/") + strlen("gmap.avx2") + 1) * sizeof(char)); sprintf(new_argv[0],"%s/gmap.avx2",dir); diff -Nru gmap-2016-11-07/src/gsnap.c gmap-2017-01-14/src/gsnap.c --- gmap-2016-11-07/src/gsnap.c 2016-11-08 00:56:52.000000000 +0000 +++ gmap-2017-01-14/src/gsnap.c 2017-01-09 18:26:27.000000000 +0000 @@ -1,4 +1,4 @@ -static char rcsid[] = "$Id: gsnap.c 200234 2016-11-08 00:56:52Z twu $"; +static char rcsid[] = "$Id: gsnap.c 202219 2017-01-09 18:26:26Z twu $"; #ifdef HAVE_CONFIG_H #include #endif @@ -151,7 +151,7 @@ * GMAP parameters ************************************************************************/ -static int gmap_mode = GMAP_PAIRSEARCH | GMAP_INDEL_KNOWNSPLICE | GMAP_TERMINAL | GMAP_IMPROVEMENT; +static int gmap_mode = GMAP_PAIRSEARCH | GMAP_TERMINAL | GMAP_IMPROVEMENT; static int gmap_min_nconsecutive = 20; static int nullgap = 600; static int maxpeelback = 20; /* Now controlled by defect_rate */ @@ -169,7 +169,6 @@ static int max_gmap_terminal = 50; /* Will perform GMAP on up to this many terminals5 or terminals3 */ static int max_gmap_improvement = 5; -static double microexon_spliceprob = 0.95; static int suboptimal_score_start = -1; /* Determined by simulations to have minimal effect */ static int suboptimal_score_end = 3; /* Determined by simulations to have diminishing returns above 3 */ @@ -591,7 +590,6 @@ {"max-gmap-pairsearch", required_argument, 0, 0}, /* max_gmap_pairsearch */ {"max-gmap-terminal", required_argument, 0, 0}, /* max_gmap_terminal */ {"max-gmap-improvement", required_argument, 0, 0}, /* max_gmap_improvement */ - {"microexon-spliceprob", required_argument, 0, 0}, /* microexon_spliceprob */ {"stage2-start", required_argument, 0, 0}, /* suboptimal_score_start */ {"stage2-end", required_argument, 0, 0}, /* suboptimal_score_end */ @@ -858,7 +856,7 @@ static Filestring_T -process_request (Filestring_T *fp_failedinput_1, Filestring_T *fp_failedinput_2, +process_request (Filestring_T *fp_failedinput, Filestring_T *fp_failedinput_1, Filestring_T *fp_failedinput_2, double *worker_runtime, Request_T request, Floors_T *floors_array, Oligoindex_array_T oligoindices_major, Oligoindex_array_T oligoindices_minor, Pairpool_T pairpool, Diagpool_T diagpool, Cellpool_T cellpool, @@ -900,7 +898,7 @@ /*keep_floors_p*/true); result = Result_single_read_new(jobid,(void **) stage3array,npaths_primary,npaths_altloc,first_absmq,second_absmq); - fp = Output_filestring_fromresult(&(*fp_failedinput_1),&(*fp_failedinput_2),result,request); + fp = Output_filestring_fromresult(&(*fp_failedinput),&(*fp_failedinput_1),&(*fp_failedinput_2),result,request); *worker_runtime = worker_stopwatch == NULL ? 0.00 : Stopwatch_stop(worker_stopwatch); Result_free(&result); return fp; @@ -919,7 +917,7 @@ /* Paired or concordant hits found */ result = Result_paired_read_new(jobid,(void **) stage3pairarray,npaths_primary,npaths_altloc,first_absmq,second_absmq, final_pairtype); - fp = Output_filestring_fromresult(&(*fp_failedinput_1),&(*fp_failedinput_2),result,request); + fp = Output_filestring_fromresult(&(*fp_failedinput),&(*fp_failedinput_1),&(*fp_failedinput_2),result,request); *worker_runtime = worker_stopwatch == NULL ? 0.00 : Stopwatch_stop(worker_stopwatch); Result_free(&result); return fp; @@ -929,7 +927,7 @@ /* Report ends as unpaired */ result = Result_paired_as_singles_new(jobid,(void **) stage3array5,npaths5_primary,npaths5_altloc,first_absmq5,second_absmq5, (void **) stage3array3,npaths3_primary,npaths3_altloc,first_absmq3,second_absmq3); - fp = Output_filestring_fromresult(&(*fp_failedinput_1),&(*fp_failedinput_2),result,request); + fp = Output_filestring_fromresult(&(*fp_failedinput),&(*fp_failedinput_1),&(*fp_failedinput_2),result,request); *worker_runtime = worker_stopwatch == NULL ? 0.00 : Stopwatch_stop(worker_stopwatch); Result_free(&result); return fp; @@ -967,7 +965,7 @@ (void **) stage3array3,npaths3_primary,npaths3_altloc,first_absmq3,second_absmq3); } - fp = Output_filestring_fromresult(&(*fp_failedinput_1),&(*fp_failedinput_2),result,request); + fp = Output_filestring_fromresult(&(*fp_failedinput),&(*fp_failedinput_1),&(*fp_failedinput_2),result,request); *worker_runtime = worker_stopwatch == NULL ? 0.00 : Stopwatch_stop(worker_stopwatch); Result_free(&result); return fp; @@ -1055,7 +1053,7 @@ single_thread () { Floors_T *floors_array; Request_T request; - Filestring_T fp, fp_failedinput_1, fp_failedinput_2; + Filestring_T fp, fp_failedinput, fp_failedinput_1, fp_failedinput_2; Shortread_T queryseq1; int i; Stopwatch_T worker_stopwatch; @@ -1111,7 +1109,7 @@ #endif TRY - fp = process_request(&fp_failedinput_1,&fp_failedinput_2,&worker_runtime, + fp = process_request(&fp_failedinput,&fp_failedinput_1,&fp_failedinput_2,&worker_runtime, request,floors_array,oligoindices_major,oligoindices_minor, pairpool,diagpool,cellpool,dynprogL,dynprogM,dynprogR,worker_stopwatch); if (timingp == true) { @@ -1144,7 +1142,7 @@ RERAISE; END_TRY; - Outbuffer_print_filestrings(fp,fp_failedinput_1,fp_failedinput_2); + Outbuffer_print_filestrings(fp,fp_failedinput,fp_failedinput_1,fp_failedinput_2); if (jobid % POOL_FREE_INTERVAL == 0) { Pairpool_free_memory(pairpool); @@ -1217,7 +1215,7 @@ worker_thread (void *data) { Floors_T *floors_array; Request_T request; - Filestring_T fp, fp_failedinput_1, fp_failedinput_2; + Filestring_T fp, fp_failedinput, fp_failedinput_1, fp_failedinput_2; Shortread_T queryseq1; int i; Stopwatch_T worker_stopwatch; @@ -1287,7 +1285,7 @@ #endif TRY - fp = process_request(&fp_failedinput_1,&fp_failedinput_2,&worker_runtime, + fp = process_request(&fp_failedinput,&fp_failedinput_1,&fp_failedinput_2,&worker_runtime, request,floors_array,oligoindices_major,oligoindices_minor, pairpool,diagpool,cellpool,dynprogL,dynprogM,dynprogR,worker_stopwatch); if (timingp == true) { @@ -1320,7 +1318,7 @@ RERAISE; END_TRY; - Outbuffer_put_filestrings(outbuffer,fp,fp_failedinput_1,fp_failedinput_2); + Outbuffer_put_filestrings(outbuffer,fp,fp_failedinput,fp_failedinput_1,fp_failedinput_2); if (worker_jobid % POOL_FREE_INTERVAL == 0) { Pairpool_free_memory(pairpool); @@ -1433,7 +1431,7 @@ gmap_mode = 0; return 0; } else if (!strcmp(string,"all")) { - gmap_mode = (GMAP_IMPROVEMENT | GMAP_TERMINAL | GMAP_INDEL_KNOWNSPLICE | GMAP_PAIRSEARCH); + gmap_mode = (GMAP_IMPROVEMENT | GMAP_TERMINAL | GMAP_PAIRSEARCH); return 1; } else { if (!strcmp(string,"improve")) { @@ -1441,12 +1439,12 @@ } else if (!strcmp(string,"terminal")) { gmap_mode |= GMAP_TERMINAL; } else if (!strcmp(string,"indel_knownsplice")) { - gmap_mode |= GMAP_INDEL_KNOWNSPLICE; + fprintf(stderr,"--gmap-mode indel_knownsplice now obsolete. Ignoring\n"); } else if (!strcmp(string,"pairsearch")) { gmap_mode |= GMAP_PAIRSEARCH; } else { fprintf(stderr,"Don't recognize gmap-mode type %s\n",string); - fprintf(stderr,"Allowed values are: none, all, improve, terminal, indel_knownsplice, pairsearch\n"); + fprintf(stderr,"Allowed values are: none, all, improve, terminal, pairsearch\n"); exit(9); } return 1; @@ -1793,9 +1791,6 @@ } else if (!strcmp(long_name,"max-gmap-improvement")) { max_gmap_improvement = atoi(check_valid_int(optarg)); - } else if (!strcmp(long_name,"microexon-spliceprob")) { - microexon_spliceprob = check_valid_float(optarg,long_name); - } else if (!strcmp(long_name,"stage2-start")) { /* No longer used by stage 2 */ suboptimal_score_start = atoi(check_valid_int(optarg)); @@ -3336,6 +3331,7 @@ novelsplicingp,knownsplicingp,find_dna_chimeras_p,distances_observed_p, subopt_levels,min_indel_end_matches,max_middle_insertions,max_middle_deletions, shortsplicedist,shortsplicedist_known,shortsplicedist_novelend,min_intronlength, + expected_pairlength,pairlength_deviation, min_distantsplicing_end_matches,min_distantsplicing_identity, nullgap,maxpeelback,maxpeelback_distalmedial, extramaterial_end,extramaterial_paired,gmap_mode, @@ -3353,7 +3349,7 @@ genes_iit,genes_divint_crosstable, tally_iit,tally_divint_crosstable,runlength_iit,runlength_divint_crosstable, distances_observed_p,pairmax_linear,pairmax_circular, - expected_pairlength,pairlength_deviation, + expected_pairlength,pairlength_deviation,maxpeelback, localsplicing_penalty,indel_penalty_middle,antistranded_penalty, favor_multiexon_p,gmap_min_nconsecutive,end_detail,subopt_levels, max_middle_insertions,max_middle_deletions, @@ -4269,9 +4265,9 @@ fprintf(stdout,"Options for GMAP alignment within GSNAP\n"); fprintf(stdout,"\ --gmap-mode=STRING Cases to use GMAP for complex alignments containing multiple splices or indels\n\ - Allowed values: none, all, pairsearch, indel_knownsplice, terminal, improve\n\ + Allowed values: none, all, pairsearch, terminal, improve\n\ (or multiple values, separated by commas).\n\ - Default: all, i.e., pairsearch,indel_knownsplice,terminal,improve\n\ + Default: all, i.e., pairsearch,terminal,improve\n\ "); fprintf(stdout,"\ --trigger-score-for-gmap=INT Try GMAP pairsearch on nearby genomic regions if best score (the total\n\ @@ -4295,10 +4291,6 @@ --max-gmap-improvement=INT Perform GMAP improvement on nearby genomic regions up to this many\n\ candidate ends (default %d). Requires improve in --gmap-mode\n\ ",max_gmap_improvement); - fprintf(stdout,"\ - --microexon-spliceprob=FLOAT Allow microexons only if one of the splice site probabilities is\n\ - greater than this value (default %.2f)\n\ -",microexon_spliceprob); fprintf(stdout,"\n"); diff -Nru gmap-2016-11-07/src/gsnapl_select.c gmap-2017-01-14/src/gsnapl_select.c --- gmap-2016-11-07/src/gsnapl_select.c 2016-10-31 20:06:33.000000000 +0000 +++ gmap-2017-01-14/src/gsnapl_select.c 2016-12-16 16:30:59.000000000 +0000 @@ -45,6 +45,16 @@ /* Depend on path */ /* Cannot use file_exists_p, since it won't search PATH */ + if (avx512_support_p == true) { + new_argv[0] = "gsnapl.avx512"; + if ((rc = execvp(new_argv[0],new_argv)) == -1 && errno == ENOENT) { + fprintf(stderr,"Note: %s does not exist. For faster speed, may want to compile package on an AVX512 machine\n",new_argv[0]); + } else { + free(new_argv); + return rc; + } + } + if (avx2_support_p == true) { new_argv[0] = "gsnapl.avx2"; if ((rc = execvp(new_argv[0],new_argv)) == -1 && errno == ENOENT) { @@ -105,6 +115,20 @@ } else { dir = dirname(argv[0]); + if (avx512_support_p == true) { + new_argv[0] = (char *) malloc((strlen(dir) + strlen("/") + strlen("gsnapl.avx512") + 1) * sizeof(char)); + sprintf(new_argv[0],"%s/gsnapl.avx512",dir); + if (file_exists_p(new_argv[0]) == false) { + fprintf(stderr,"Note: %s does not exist. For faster speed, may want to compile package on an AVX512 machine\n",new_argv[0]); + free(new_argv[0]); + } else { + rc = execvp(new_argv[0],new_argv); + free(new_argv[0]); + free(new_argv); + return rc; + } + } + if (avx2_support_p == true) { new_argv[0] = (char *) malloc((strlen(dir) + strlen("/") + strlen("gsnapl.avx2") + 1) * sizeof(char)); sprintf(new_argv[0],"%s/gsnapl.avx2",dir); diff -Nru gmap-2016-11-07/src/gsnap_select.c gmap-2017-01-14/src/gsnap_select.c --- gmap-2016-11-07/src/gsnap_select.c 2016-10-31 20:06:15.000000000 +0000 +++ gmap-2017-01-14/src/gsnap_select.c 2016-12-16 16:31:03.000000000 +0000 @@ -45,6 +45,16 @@ /* Depend on path */ /* Cannot use file_exists_p, since it won't search PATH */ + if (avx512_support_p == true) { + new_argv[0] = "gsnap.avx512"; + if ((rc = execvp(new_argv[0],new_argv)) == -1 && errno == ENOENT) { + fprintf(stderr,"Note: %s does not exist. For faster speed, may want to compile package on an AVX512 machine\n",new_argv[0]); + } else { + free(new_argv); + return rc; + } + } + if (avx2_support_p == true) { new_argv[0] = "gsnap.avx2"; if ((rc = execvp(new_argv[0],new_argv)) == -1 && errno == ENOENT) { @@ -105,6 +115,20 @@ } else { dir = dirname(argv[0]); + if (avx512_support_p == true) { + new_argv[0] = (char *) malloc((strlen(dir) + strlen("/") + strlen("gsnap.avx512") + 1) * sizeof(char)); + sprintf(new_argv[0],"%s/gsnap.avx512",dir); + if (file_exists_p(new_argv[0]) == false) { + fprintf(stderr,"Note: %s does not exist. For faster speed, may want to compile package on an AVX512 machine\n",new_argv[0]); + free(new_argv[0]); + } else { + rc = execvp(new_argv[0],new_argv); + free(new_argv[0]); + free(new_argv); + return rc; + } + } + if (avx2_support_p == true) { new_argv[0] = (char *) malloc((strlen(dir) + strlen("/") + strlen("gsnap.avx2") + 1) * sizeof(char)); sprintf(new_argv[0],"%s/gsnap.avx2",dir); diff -Nru gmap-2016-11-07/src/indexdb_hr.c gmap-2017-01-14/src/indexdb_hr.c --- gmap-2016-11-07/src/indexdb_hr.c 2016-02-13 03:46:24.000000000 +0000 +++ gmap-2017-01-14/src/indexdb_hr.c 2017-01-13 23:31:05.000000000 +0000 @@ -1,4 +1,4 @@ -static char rcsid[] = "$Id: indexdb_hr.c 184203 2016-02-13 03:46:21Z twu $"; +static char rcsid[] = "$Id: indexdb_hr.c 202592 2017-01-13 23:31:04Z twu $"; #ifdef HAVE_CONFIG_H #include #endif @@ -13,11 +13,16 @@ #include #endif +#if defined(HAVE_SSE4_1) +#include +#endif + #include "indexdb_hr.h" #include "indexdbdef.h" #include "genome128_hr.h" #include "bitpack64-read.h" #include "bitpack64-readtwo.h" +#include "merge.h" #ifdef WORDS_BIGENDIAN @@ -32,6 +37,7 @@ #include #include #include /* For memcpy */ +#include "assert.h" #include "mem.h" #include "listdef.h" @@ -204,1617 +210,1622 @@ } #endif -#define READ_THEN_WRITE 1 - #ifdef LARGE_GENOMES static unsigned char sentinel_position_high = (unsigned char) -1; static UINT4 sentinel_position_low = (UINT4) -1; #endif -static Univcoord_T * -merge_batches_one_heap_16_existing (int *nmerged, struct Batch_T *batchpool, int nentries, int diagterm) { - Univcoord_T *positions, *ptr, position, last_position, this_position; - struct Batch_T sentinel_struct; - Batch_T batch, sentinel, heap[17]; - int heapsize; - unsigned int i; -#ifdef READ_THEN_WRITE - unsigned int smallesti_1, smallesti_2, smallesti; + +/************************************************************************ + * The following positions functions are taken from indexdb.c + ************************************************************************/ + +#ifndef LARGE_GENOMES +static void +positions_move_absolute (int positions_fd, Positionsptr_T ptr) { + off_t offset = ptr*((off_t) sizeof(Univcoord_T)); + + if (lseek(positions_fd,offset,SEEK_SET) < 0) { + fprintf(stderr,"Attempted to do lseek on offset %u*%d=%zd\n", + ptr,(int) sizeof(Univcoord_T),offset); + perror("Error in indexdb.c, positions_move_absolute_4"); + exit(9); + } + return; +} + +static void +positions_read_multiple (int positions_fd, Univcoord_T *values, int n) { + int i; + Univcoord_T value; + unsigned char buffer[4]; + +#ifdef WORDS_BIGENDIAN + /* Need to keep in bigendian format */ + for (i = 0; i < n; i++) { + read(positions_fd,buffer,4); + + value = (buffer[0] & 0xff); + value <<= 8; + value |= (buffer[1] & 0xff); + value <<= 8; + value |= (buffer[2] & 0xff); + value <<= 8; + value |= (buffer[3] & 0xff); + + values[i] = value; + } #else - unsigned int parenti, smallesti; + for (i = 0; i < n; i++) { + read(positions_fd,buffer,4); + + value = (buffer[3] & 0xff); + value <<= 8; + value |= (buffer[2] & 0xff); + value <<= 8; + value |= (buffer[1] & 0xff); + value <<= 8; + value |= (buffer[0] & 0xff); + + values[i] = value; + } #endif - debug3(printf("starting merge_batches_one_heap_16_existing\n")); + return; +} +#endif - debug0(int nentries_save = nentries); - ptr = positions = (Univcoord_T *) CALLOC(nentries,sizeof(Univcoord_T)); - /* Set up heap */ - heapsize = 0; - for (i = 0; i < 16; i++) { - batch = &(batchpool[i]); - if (batch->nentries > 0) { #ifdef LARGE_GENOMES - batch->position = (((Univcoord_T) *batch->positionptr_high++) << 32) + (*batch->positionptr_low++); -#elif defined(WORDS_BIGENDIAN) - batch->position = Bigendian_convert_univcoord(*batch->positionptr++); -#else - batch->position = *batch->positionptr++; +static UINT4 * +point_one_shift (int *nentries, unsigned char **positions_high, T this, Oligospace_T subst) { + UINT4 *positions_low; + Positionsptr_T ptr0, end0; +#ifdef DEBUG + int i; #endif - heap_insert_even(heap,&heapsize,batch,batch->position); - } - } - sentinel_struct.position = (Univcoord_T) -1; /* infinity */ -#ifdef LARGE_GENOMES - sentinel_struct.positionptr_high = &sentinel_position_high; - sentinel_struct.positionptr_low = &sentinel_position_low; + if (this->compression_type == NO_COMPRESSION) { +#ifdef WORDS_BIGENDIAN + abort(); #else - sentinel_struct.positionptr = &(sentinel_struct.position); + ptr0 = this->offsetsstrm[subst]; + end0 = this->offsetsstrm[subst+1]; #endif - sentinel = &sentinel_struct; - for (i = heapsize+1; i <= 16; i++) { - heap[i] = sentinel; + } else if (this->compression_type == BITPACK64_COMPRESSION) { + ptr0 = Bitpack64_read_two_huge(&end0,subst,this->offsetspages,this->offsetsmeta,this->offsetsstrm); } - last_position = 0U; - while (--nentries >= 1) { - debug3(printf("nentries = %d, top of heap is %u (%d)\n", - nentries+1,heap[1]->position,heapsize)); - /* Get minimum */ - batch = heap[1]; -#ifdef CONVERT_TO_LITTLEENDIAN - this_position = Bigendian_convert_univcoord(batch->position) + diagterm; + debug(printf("point_one_shift: %08X %u %u\n",subst,ptr0,end0)); + + if ((*nentries = end0 - ptr0) == 0) { + *positions_high = (unsigned char *) NULL; + return (UINT4 *) NULL; + } else { + if (this->positions_high_access == FILEIO || this->positions_low_access == FILEIO) { + abort(); + + } else { + /* ALLOCATED or MMAPPED */ + *positions_high = &(this->positions_high[ptr0]); + positions_low = &(this->positions_low[ptr0]); + } + } + + debug( + printf("%d entries:",*nentries); + for (i = 0; i < *nentries; i++) { + printf(" %u",(Univcoord_T) positions_high[i] << 32 + positions_low[i]); + } + printf("\n"); + ); + + return positions_low; +} + #else - this_position = batch->position + diagterm; + +static Univcoord_T * +point_one_shift (int *nentries, T this, Oligospace_T subst) { + Univcoord_T *positions; + Positionsptr_T ptr0, end0; +#ifdef DEBUG + int i; #endif - if (this_position != last_position) { - *ptr++ = this_position; + + if (this->compression_type == NO_COMPRESSION) { +#ifdef WORDS_BIGENDIAN +#if 0 + if (this->offsetsstrm_access == ALLOCATED) { + ptr0 = this->offsetsstrm[subst]; + end0 = this->offsetsstrm[subst+1]; + } else { + ptr0 = Bigendian_convert_uint(this->offsetsstrm[subst]); + end0 = Bigendian_convert_uint(this->offsetsstrm[subst+1]); } - last_position = this_position; +#else + abort(); +#endif +#else + ptr0 = this->offsetsstrm[subst]; + end0 = this->offsetsstrm[subst+1]; +#endif - if (--batch->nentries <= 0) { - /* Use last batch (or sentinel) in heap for insertion */ - heap[1] = batch = (heapsize == 1) ? sentinel : heap[heapsize]; - heap[heapsize--] = sentinel; + } else if (this->compression_type == BITPACK64_COMPRESSION) { + ptr0 = Bitpack64_read_two(&end0,subst,this->offsetsmeta,this->offsetsstrm); + } + + + debug(printf("point_one_shift: %08X %u %u\n",subst,ptr0,end0)); + + if ((*nentries = end0 - ptr0) == 0) { + return (Univcoord_T *) NULL; + } else { + if (this->positions_access == FILEIO) { + positions = (Univcoord_T *) CALLOC(*nentries,sizeof(Univcoord_T)); +#ifdef HAVE_PTHREAD + pthread_mutex_lock(&this->positions_read_mutex); +#endif + positions_move_absolute(this->positions_fd,ptr0); + positions_read_multiple(this->positions_fd,positions,*nentries); +#ifdef HAVE_PTHREAD + pthread_mutex_unlock(&this->positions_read_mutex); +#endif } else { - /* Advance heap, and use this batch for insertion */ -#ifdef LARGE_GENOMES - batch->position = (((Univcoord_T) *batch->positionptr_high++) << 32) + (*batch->positionptr_low++); -#elif defined(WORDS_BIGENDIAN) - batch->position = Bigendian_convert_univcoord(*batch->positionptr++); + /* ALLOCATED or MMAPPED */ + positions = &(this->positions[ptr0]); + } + } + +#ifdef WORDS_BIGENDIAN + debug( + printf("%d entries:",*nentries); + for (i = 0; i < *nentries; i++) { + printf(" %u",Bigendian_convert_univcoord(positions[i])); + } + printf("\n"); + ); #else - batch->position = *batch->positionptr++; + debug( + printf("%d entries:",*nentries); + for (i = 0; i < *nentries; i++) { + printf(" %u",positions[i]); + } + printf("\n"); + ); +#endif + + return positions; +} + #endif + + +/* 87654321 */ +#define LOW_TWO_BITS 0x00000003 + +#ifdef DEBUG +static char * +shortoligo_nt (Oligospace_T oligo, int oligosize) { + char *nt; + int i, j; + Oligospace_T lowbits; + + nt = (char *) CALLOC(oligosize+1,sizeof(char)); + j = oligosize-1; + for (i = 0; i < oligosize; i++) { + lowbits = oligo & LOW_TWO_BITS; + switch (lowbits) { + case RIGHT_A: nt[j] = 'A'; break; + case RIGHT_C: nt[j] = 'C'; break; + case RIGHT_G: nt[j] = 'G'; break; + case RIGHT_T: nt[j] = 'T'; break; } + oligo >>= 2; + j--; + } - position = batch->position; - debug3(printf("starting heapify with %u\n",position)); + return nt; +} +#endif -#ifdef READ_THEN_WRITE - /* Comparison 0/3 */ - debug3(printf("Comparing right %d: %u\n",2,heap[2]->position)); - if (position <= heap[2]->position) { - debug3(printf("Inserting at 1\n")); - /* heap[1] = batch; -- not necessary because batch is already at heap[1] */ + +#ifdef LARGE_GENOMES +static int +count_one_shift (T this, Oligospace_T subst, int nadjacent) { + Positionsptr_T ptr0, end0; + + if (this->compression_type == NO_COMPRESSION) { +#ifdef WORDS_BIGENDIAN +#if 0 + if (this->offsetsstrm_access == ALLOCATED) { + ptr0 = this->offsetsstrm[subst]; + end0 = this->offsetsstrm[subst+nadjacent]; } else { - /* Comparison 1/3 */ - debug3(printf("Comparing left %d/right %d: %u and %u\n", - 3,4,heap[3]->position,heap[4]->position)); - smallesti = 4 - (heap[3]->position < heap[4]->position); - if (position <= heap[smallesti]->position) { - debug3(printf("Inserting at 2\n")); - heap[1] = heap[2]; - heap[2] = batch; - } else { - smallesti_1 = smallesti; - smallesti <<= 1; - /* Comparison 2/3 */ - debug3(printf("Comparing left %d/right %d: %u and %u\n", - smallesti-1,smallesti,heap[smallesti-1]->position,heap[smallesti]->position)); - smallesti -= (heap[LEFTSIBLING2(smallesti)]->position < heap[smallesti]->position); - if (position <= heap[smallesti]->position) { - debug3(printf("Inserting at %d\n",smallesti_1)); - heap[1] = heap[2]; - heap[2] = heap[smallesti_1]; - heap[smallesti_1] = batch; - } else { - smallesti_2 = smallesti; - smallesti <<= 1; - /* Comparison 3/3 */ - debug3(printf("Comparing left %d/right %d: %u and %u\n", - smallesti-1,smallesti,heap[smallesti-1]->position,heap[smallesti]->position)); - smallesti -= (heap[LEFTSIBLING2(smallesti)]->position < heap[smallesti]->position); - if (position <= heap[smallesti]->position) { - debug3(printf("Inserting at %d\n",smallesti_2)); - heap[1] = heap[2]; - heap[2] = heap[smallesti_1]; - heap[smallesti_1] = heap[smallesti_2]; - heap[smallesti_2] = batch; - } else { - debug3(printf("Inserting at %d\n",smallesti)); - heap[1] = heap[2]; - heap[2] = heap[smallesti_1]; - heap[smallesti_1] = heap[smallesti_2]; - heap[smallesti_2] = heap[smallesti]; - heap[smallesti] = batch; - } - } - } + ptr0 = Bigendian_convert_uint(this->offsetsstrm[subst]); + end0 = Bigendian_convert_uint(this->offsetsstrm[subst+nadjacent]); } #else - /* Comparison 0/3 */ - debug3(printf("Comparing right %d: %u\n",2,heap[2]->position)); - if (position <= heap[2]->position) { - debug3(printf("Inserting at 1\n")); - /* heap[1] = batch; -- not necessary because batch is already at heap[1] */ - } else { - heap[1] = heap[2]; - /* Comparison 1/3 */ - debug3(printf("Comparing left %d/right %d: %u and %u\n", - 3,4,heap[3]->position,heap[4]->position)); - smallesti = 4 - (heap[3]->position < heap[4]->position); - if (position <= heap[smallesti]->position) { - debug3(printf("Inserting at 2\n")); - heap[2] = batch; - } else { - heap[2] = heap[smallesti]; - parenti = smallesti; - smallesti <<= 1; - /* Comparison 2/3 */ - debug3(printf("Comparing left %d/right %d: %u and %u\n", - smallesti-1,smallesti,heap[smallesti-1]->position,heap[smallesti]->position)); - smallesti -= (heap[LEFTSIBLING2(smallesti)]->position < heap[smallesti]->position); - if (position <= heap[smallesti]->position) { - debug3(printf("Inserting at %d\n",parenti)); - heap[parenti] = batch; - } else { - heap[parenti] = heap[smallesti]; - parenti = smallesti; - smallesti <<= 1; - /* Comparison 3/3 */ - debug3(printf("Comparing left %d/right %d: %u and %u\n", - smallesti-1,smallesti,heap[smallesti-1]->position,heap[smallesti]->position)); - smallesti -= (heap[LEFTSIBLING2(smallesti)]->position < heap[smallesti]->position); - if (position <= heap[smallesti]->position) { - debug3(printf("Inserting at %d\n",parenti)); - heap[parenti] = batch; - } else { - heap[parenti] = heap[smallesti]; - debug3(printf("Inserting at %d\n",smallesti)); - heap[smallesti] = batch; - } - } - } - } + abort(); #endif - } - -#ifdef CONVERT_TO_LITTLEENDIAN - this_position = Bigendian_convert_univcoord(heap[1]->position) + diagterm; #else - this_position = heap[1]->position + diagterm; + ptr0 = this->offsetsstrm[subst]; + end0 = this->offsetsstrm[subst+nadjacent]; #endif - if (this_position != last_position) { - *ptr++ = this_position; - } - *nmerged = (ptr - positions); + } else if (this->compression_type == BITPACK64_COMPRESSION) { + ptr0 = Bitpack64_read_one_huge(subst,this->offsetspages,this->offsetsmeta,this->offsetsstrm); + end0 = Bitpack64_read_one_huge(subst+nadjacent,this->offsetspages,this->offsetsmeta,this->offsetsstrm); -#if 0 - position = positions[0]; - for (i = 1; i < nentries_save; i++) { - if (positions[i] <= position) { - abort(); - } - position = positions[i]; + } else { + abort(); } -#endif - debug0( - for (i = 0; i < nentries_save; i++) { - printf("%u\n",positions[i]); - } - printf("\n"); - ) + debug(printf("count_one_shift: oligo = %06X (%s), %u - %u = %u\n", + subst,shortoligo_nt(subst,index1part),end0,ptr0,end0-ptr0)); + return (end0 - ptr0); - return positions; } - -static Univcoord_T * -merge_batches_one_heap_4_existing (int *nmerged, struct Batch_T *batchpool, int nentries, int diagterm) { - Univcoord_T *positions, *ptr, position, last_position, this_position; - struct Batch_T sentinel_struct; - Batch_T batch, sentinel, heap[5]; - int heapsize; - unsigned int i; -#ifdef READ_THEN_WRITE - unsigned int smallesti; #else - unsigned int parenti, smallesti; -#endif - - debug3(printf("starting merge_batches_one_heap_4_existing\n")); - - debug0(int nentries_save = nentries); - - ptr = positions = (Univcoord_T *) CALLOC(nentries,sizeof(Univcoord_T)); +static int +count_one_shift (T this, Oligospace_T subst, int nadjacent) { + Positionsptr_T ptr0, end0; - /* Set up heap */ - heapsize = 0; - for (i = 0; i < 4; i++) { - batch = &(batchpool[i]); - if (batch->nentries > 0) { -#ifdef LARGE_GENOMES - batch->position = (((Univcoord_T) *batch->positionptr_high++) << 32) + (*batch->positionptr_low++); -#elif defined(WORDS_BIGENDIAN) - batch->position = Bigendian_convert_univcoord(*batch->positionptr++); + if (this->compression_type == NO_COMPRESSION) { +#ifdef WORDS_BIGENDIAN +#if 0 + if (this->offsetsstrm_access == ALLOCATED) { + ptr0 = this->offsetsstrm[subst]; + end0 = this->offsetsstrm[subst+nadjacent]; + } else { + ptr0 = Bigendian_convert_uint(this->offsetsstrm[subst]); + end0 = Bigendian_convert_uint(this->offsetsstrm[subst+nadjacent]); + } #else - batch->position = *batch->positionptr++; + abort(); #endif - heap_insert_even(heap,&heapsize,batch,batch->position); - } - } - - sentinel_struct.position = (Univcoord_T) -1; /* infinity */ -#ifdef LARGE_GENOMES - sentinel_struct.positionptr_high = &sentinel_position_high; - sentinel_struct.positionptr_low = &sentinel_position_low; #else - sentinel_struct.positionptr = &(sentinel_struct.position); + ptr0 = this->offsetsstrm[subst]; + end0 = this->offsetsstrm[subst+nadjacent]; #endif - sentinel = &sentinel_struct; - for (i = heapsize+1; i <= 4; i++) { - heap[i] = sentinel; + } else if (this->compression_type == BITPACK64_COMPRESSION) { + ptr0 = Bitpack64_read_one(subst,this->offsetsmeta,this->offsetsstrm); + end0 = Bitpack64_read_one(subst+nadjacent,this->offsetsmeta,this->offsetsstrm); + + } else { + abort(); } - last_position = 0U; - while (--nentries >= 1) { - debug3(printf("nentries = %d, top of heap is %u (%d)\n", - nentries+1,heap[1]->position,heapsize)); + debug(printf("count_one_shift: oligo = %06X (%s), %u - %u = %u\n", + subst,shortoligo_nt(subst,index1part),end0,ptr0,end0-ptr0)); + return (end0 - ptr0); + +} - /* Get minimum */ - batch = heap[1]; -#ifdef CONVERT_TO_LITTLEENDIAN - this_position = Bigendian_convert_univcoord(batch->position) + diagterm; -#else - this_position = batch->position + diagterm; #endif - if (this_position != last_position) { - *ptr++ = this_position; - } - last_position = this_position; - if (--batch->nentries <= 0) { - /* Use last batch (or sentinel) in heap for insertion */ - heap[1] = batch = (heapsize == 1) ? sentinel : heap[heapsize]; - heap[heapsize--] = sentinel; +/************************************************************************ + * Counting procedures + ************************************************************************/ - } else { - /* Advance heap, and use this batch for insertion */ -#ifdef LARGE_GENOMES - batch->position = (((Univcoord_T) *batch->positionptr_high++) << 32) + (*batch->positionptr_low++); -#elif defined(WORDS_BIGENDIAN) - batch->position = Bigendian_convert_univcoord(*batch->positionptr++); -#else - batch->position = *batch->positionptr++; -#endif - } +/* Don't mask out leftmost nucleotides with LOWXXMER */ +/* TODO: Eliminate the loop by implementing a single procedure in bitpack64-readtwo that counts over the base */ +int +Indexdb_count_left_subst_2 (T this, Oligospace_T oligo) { + int nentries = 0; + Oligospace_T base; + int i; - position = batch->position; - debug3(printf("starting heapify with %u\n",position)); - -#ifdef READ_THEN_WRITE - /* Comparison 0/3 */ - debug3(printf("Comparing right %d: %u\n",2,heap[2]->position)); - if (position <= heap[2]->position) { - debug3(printf("Inserting at 1\n")); - /* heap[1] = batch; -- not necessary because batch is already at heap[1] */ - } else { - /* Comparison 1/3 */ - debug3(printf("Comparing left %d/right %d: %u and %u\n", - 3,4,heap[3]->position,heap[4]->position)); - smallesti = 4 - (heap[3]->position < heap[4]->position); - if (position <= heap[smallesti]->position) { - debug3(printf("Inserting at 2\n")); - heap[1] = heap[2]; - heap[2] = batch; - } else { - debug3(printf("Inserting at %d\n",smallesti)); - heap[1] = heap[2]; - heap[2] = heap[smallesti]; - heap[smallesti] = batch; - } - } + debug(printf("count_left_subst_2: oligo = %06X (%s)\n",oligo,shortoligo_nt(oligo,index1part))); +#ifdef ALLOW_DUPLICATES + /* Right shift */ + base = (oligo >> 4); + for (i = 0; i < 16; i++, base += left_subst) { + nentries += count_one_shift(this,base); + } #else - /* Comparison 0/3 */ - debug3(printf("Comparing right %d: %u\n",2,heap[2]->position)); - if (position <= heap[2]->position) { - debug3(printf("Inserting at 1\n")); - /* heap[1] = batch; -- not necessary because batch is already at heap[1] */ - } else { - heap[1] = heap[2]; - /* Comparison 1/3 */ - debug3(printf("Comparing left %d/right %d: %u and %u\n", - 3,4,heap[3]->position,heap[4]->position)); - smallesti = 4 - (heap[3]->position < heap[4]->position); - if (position <= heap[smallesti]->position) { - debug3(printf("Inserting at 2\n")); - heap[2] = batch; - } else { - heap[2] = heap[smallesti]; - heap[smallesti] = batch; - } - } - + /* Right shift */ + base = (oligo >> 4); + debug(printf("shift right => %06X (%s)\n",base,shortoligo_nt(base,index1part))); + for (i = 0; i < 16; i++, base += left_subst) { +#if 0 + nentries += count_one_shift(this,base,/*nadjacent*/1); +#else + nentries += Indexdb_count_no_subst(this,base); #endif } +#endif + + return nentries; +} -#ifdef CONVERT_TO_LITTLEENDIAN - this_position = Bigendian_convert_univcoord(heap[1]->position) + diagterm; + +/* Don't mask out leftmost nucleotides with LOWXXMER */ +/* TODO: Eliminate the loop by implementing a single procedure in bitpack64-readtwo that counts over the base */ +int +Indexdb_count_left_subst_1 (T this, Oligospace_T oligo) { + int nentries = 0; + Oligospace_T base; + int i; + + debug(printf("count_left_subst_1: oligo = %06X (%s)\n",oligo,shortoligo_nt(oligo,index1part))); + +#ifdef ALLOW_DUPLICATES + /* Zero shift. */ + base = (oligo >> 2); + for (i = 0; i < 4; i++, base += top_subst) { + nentries += count_one_shift(this,base); + } #else - this_position = heap[1]->position + diagterm; + /* Zero shift. */ + base = (oligo >> 2); + for (i = 0; i < 4; i++, base += top_subst) { +#if 0 + nentries += count_one_shift(this,base,/*nadjacent*/1); +#else + nentries += Indexdb_count_no_subst(this,base); #endif - if (this_position != last_position) { - *ptr++ = this_position; } +#endif + + return nentries; +} - *nmerged = (ptr - positions); -#if 0 - position = positions[0]; - for (i = 1; i < nentries_save; i++) { - if (positions[i] <= position) { - abort(); - } - position = positions[i]; - } +int +Indexdb_count_right_subst_2 (T this, Oligospace_T oligo) { + int nentries; + Oligospace_T base; +#ifdef ALLOW_DUPLICATES + int i; +#endif +#ifdef DEBUG + int i; #endif - debug0( - for (i = 0; i < nentries_save; i++) { - printf("%u\n",positions[i]); - } - printf("\n"); - ) + debug(printf("count_right_subst_2: oligo = %06X (%s)\n",oligo,shortoligo_nt(oligo,index1part))); +#ifdef ALLOW_DUPLICATES + /* Left shift */ + base = (oligo << 4) & kmer_mask; + nentries = 0; + for (i = 0; i < 16; i++, base += right_subst) { + nentries += count_one_shift(this,base); + } +#else + /* Left shift */ + base = (oligo << 4) & kmer_mask; + nentries = count_one_shift(this,base,/*nadjacent*/16); - return positions; + debug( + printf("Details\n"); + nentries = 0; + for (i = 0; i < 16; i++, base += right_subst) { + nentries += count_one_shift(this,base,/*nadjacent*/1); + } + ); +#endif + + return nentries; } -/************************************************************************ - * The following positions functions are taken from indexdb.c - ************************************************************************/ +int +Indexdb_count_right_subst_1 (T this, Oligospace_T oligo) { + int nentries; + Oligospace_T base; +#ifdef ALLOW_DUPLICATES + int i; +#endif +#ifdef DEBUG + int i; +#endif -#ifndef LARGE_GENOMES -static void -positions_move_absolute (int positions_fd, Positionsptr_T ptr) { - off_t offset = ptr*((off_t) sizeof(Univcoord_T)); + debug(printf("count_right_subst_1: oligo = %06X (%s)\n",oligo,shortoligo_nt(oligo,index1part))); - if (lseek(positions_fd,offset,SEEK_SET) < 0) { - fprintf(stderr,"Attempted to do lseek on offset %u*%d=%zd\n", - ptr,(int) sizeof(Univcoord_T),offset); - perror("Error in indexdb.c, positions_move_absolute_4"); - exit(9); +#ifdef ALLOW_DUPLICATES + /* Zero shift */ + base = (oligo << 2) & kmer_mask; + nentries = 0; + for (i = 0; i < 4; i++, base += right_subst) { + nentries += count_one_shift(this,base); } - return; -} +#else + /* Zero shift */ + base = (oligo << 2) & kmer_mask; + nentries = count_one_shift(this,base,/*nadjacent*/4); -static void -positions_read_multiple (int positions_fd, Univcoord_T *values, int n) { - int i; - Univcoord_T value; - unsigned char buffer[4]; + debug( + printf("Details\n"); + nentries = 0; + for (i = 0; i < 4; i++, base += right_subst) { + nentries += count_one_shift(this,base,/*nadjacent*/1); + } + ); +#endif + + return nentries; +} -#ifdef WORDS_BIGENDIAN - /* Need to keep in bigendian format */ - for (i = 0; i < n; i++) { - read(positions_fd,buffer,4); - value = (buffer[0] & 0xff); - value <<= 8; - value |= (buffer[1] & 0xff); - value <<= 8; - value |= (buffer[2] & 0xff); - value <<= 8; - value |= (buffer[3] & 0xff); +/************************************************************************/ - values[i] = value; - } -#else - for (i = 0; i < n; i++) { - read(positions_fd,buffer,4); - value = (buffer[3] & 0xff); - value <<= 8; - value |= (buffer[2] & 0xff); - value <<= 8; - value |= (buffer[1] & 0xff); - value <<= 8; - value |= (buffer[0] & 0xff); +static bool free_positions_p; /* Needs to be true if Indexdb positions are FILEIO */ - values[i] = value; +void +Compoundpos_init_positions_free (bool positions_fileio_p) { + if (positions_fileio_p == true) { + free_positions_p = true; + } else { + free_positions_p = false; } -#endif - return; } -#endif +struct Compoundpos_T { + int n; + #ifdef LARGE_GENOMES -static UINT4 * -point_one_shift (int *nentries, unsigned char **positions_high, T this, Oligospace_T subst) { - UINT4 *positions_low; - Positionsptr_T ptr0, end0; -#ifdef DEBUG - int i; + unsigned char *positions_high[16]; + UINT4 *positions_low[16]; +#else + Univcoord_T *positions[16]; #endif + int npositions[16]; - if (this->compression_type == NO_COMPRESSION) { -#ifdef WORDS_BIGENDIAN - abort(); + struct Batch_T batchpool[16]; + Batch_T heap[17]; + int heapsize; + struct Batch_T sentinel_struct; + Batch_T sentinel; + +#ifdef LARGE_GENOMES + unsigned char *positions_high_reset[16]; /* altered by find_nomiss_aux and find_onemiss_aux */ + UINT4 *positions_low_reset[16]; /* altered by find_nomiss_aux and find_onemiss_aux */ #else - ptr0 = this->offsetsstrm[subst]; - end0 = this->offsetsstrm[subst+1]; + Univcoord_T *positions_reset[16]; /* altered by find_nomiss_aux and find_onemiss_aux */ #endif - - } else if (this->compression_type == BITPACK64_COMPRESSION) { - ptr0 = Bitpack64_read_two_huge(&end0,subst,this->offsetspages,this->offsetsmeta,this->offsetsstrm); - } + int npositions_reset[16]; /* altered by find_nomiss_aux and find_onemiss_aux */ +}; - debug(printf("point_one_shift: %08X %u %u\n",subst,ptr0,end0)); +void +Compoundpos_set (Compoundpos_T compoundpos) { + int i; - if ((*nentries = end0 - ptr0) == 0) { - *positions_high = (unsigned char *) NULL; - return (UINT4 *) NULL; - } else { - if (this->positions_high_access == FILEIO || this->positions_low_access == FILEIO) { - abort(); - - } else { - /* ALLOCATED or MMAPPED */ - *positions_high = &(this->positions_high[ptr0]); - positions_low = &(this->positions_low[ptr0]); - } + for (i = 0; i < compoundpos->n; i++) { +#ifdef LARGE_GENOMES + compoundpos->positions_high_reset[i] = compoundpos->positions_high[i]; + compoundpos->positions_low_reset[i] = compoundpos->positions_low[i]; +#else + compoundpos->positions_reset[i] = compoundpos->positions[i]; +#endif + compoundpos->npositions_reset[i] = compoundpos->npositions[i]; } - - debug( - printf("%d entries:",*nentries); - for (i = 0; i < *nentries; i++) { - printf(" %u",(Univcoord_T) positions_high[i] << 32 + positions_low[i]); - } - printf("\n"); - ); - - return positions_low; + return; } -#else - -static Univcoord_T * -point_one_shift (int *nentries, T this, Oligospace_T subst) { - Univcoord_T *positions; - Positionsptr_T ptr0, end0; -#ifdef DEBUG +void +Compoundpos_reset (Compoundpos_T compoundpos) { int i; -#endif - if (this->compression_type == NO_COMPRESSION) { -#ifdef WORDS_BIGENDIAN -#if 0 - if (this->offsetsstrm_access == ALLOCATED) { - ptr0 = this->offsetsstrm[subst]; - end0 = this->offsetsstrm[subst+1]; - } else { - ptr0 = Bigendian_convert_uint(this->offsetsstrm[subst]); - end0 = Bigendian_convert_uint(this->offsetsstrm[subst+1]); - } -#else - abort(); -#endif + for (i = 0; i < compoundpos->n; i++) { +#ifdef LARGE_GENOMES + compoundpos->positions_high[i] = compoundpos->positions_high_reset[i]; + compoundpos->positions_low[i] = compoundpos->positions_low_reset[i]; #else - ptr0 = this->offsetsstrm[subst]; - end0 = this->offsetsstrm[subst+1]; + compoundpos->positions[i] = compoundpos->positions_reset[i]; #endif - - } else if (this->compression_type == BITPACK64_COMPRESSION) { - ptr0 = Bitpack64_read_two(&end0,subst,this->offsetsmeta,this->offsetsstrm); + compoundpos->npositions[i] = compoundpos->npositions_reset[i]; } + return; +} - debug(printf("point_one_shift: %08X %u %u\n",subst,ptr0,end0)); - - if ((*nentries = end0 - ptr0) == 0) { - return (Univcoord_T *) NULL; - } else { - if (this->positions_access == FILEIO) { - positions = (Univcoord_T *) CALLOC(*nentries,sizeof(Univcoord_T)); -#ifdef HAVE_PTHREAD - pthread_mutex_lock(&this->positions_read_mutex); -#endif - positions_move_absolute(this->positions_fd,ptr0); - positions_read_multiple(this->positions_fd,positions,*nentries); -#ifdef HAVE_PTHREAD - pthread_mutex_unlock(&this->positions_read_mutex); -#endif +void +Compoundpos_print_sizes (Compoundpos_T compoundpos) { + int i; - } else { - /* ALLOCATED or MMAPPED */ - positions = &(this->positions[ptr0]); - } + for (i = 0; i < compoundpos->n; i++) { + printf(" %d",compoundpos->npositions[i]); } - -#ifdef WORDS_BIGENDIAN - debug( - printf("%d entries:",*nentries); - for (i = 0; i < *nentries; i++) { - printf(" %u",Bigendian_convert_univcoord(positions[i])); - } - printf("\n"); - ); -#else - debug( - printf("%d entries:",*nentries); - for (i = 0; i < *nentries; i++) { - printf(" %u",positions[i]); - } - printf("\n"); - ); -#endif - - return positions; -} - -#endif + return; +} -/* 87654321 */ -#define LOW_TWO_BITS 0x00000003 -#ifdef DEBUG -static char * -shortoligo_nt (Oligospace_T oligo, int oligosize) { - char *nt; +void +Compoundpos_dump (Compoundpos_T compoundpos, int diagterm) { int i, j; - Oligospace_T lowbits; - nt = (char *) CALLOC(oligosize+1,sizeof(char)); - j = oligosize-1; - for (i = 0; i < oligosize; i++) { - lowbits = oligo & LOW_TWO_BITS; - switch (lowbits) { - case RIGHT_A: nt[j] = 'A'; break; - case RIGHT_C: nt[j] = 'C'; break; - case RIGHT_G: nt[j] = 'G'; break; - case RIGHT_T: nt[j] = 'T'; break; - } - oligo >>= 2; - j--; + printf("%d diagonals: ",compoundpos->n); + for (i = 0; i < compoundpos->n; i++) { + printf(" %d",compoundpos->npositions[i]); } + printf("\n"); - return nt; -} -#endif - - + for (i = 0; i < compoundpos->n; i++) { + for (j = 0; j < compoundpos->npositions[i]; j++) { #ifdef LARGE_GENOMES -static int -count_one_shift (T this, Oligospace_T subst, int nadjacent) { - Positionsptr_T ptr0, end0; - - if (this->compression_type == NO_COMPRESSION) { -#ifdef WORDS_BIGENDIAN -#if 0 - if (this->offsetsstrm_access == ALLOCATED) { - ptr0 = this->offsetsstrm[subst]; - end0 = this->offsetsstrm[subst+nadjacent]; - } else { - ptr0 = Bigendian_convert_uint(this->offsetsstrm[subst]); - end0 = Bigendian_convert_uint(this->offsetsstrm[subst+nadjacent]); - } -#else - abort(); -#endif + printf(" compound%d.%d:%llu+%d\n", + i,j,((Univcoord_T) compoundpos->positions_high[i][j] << 32) + compoundpos->positions_low[i][j],diagterm); +#elif defined(WORDS_BIGENDIAN) + printf(" compound%d.%d:%u+%d\n", + i,j,Bigendian_convert_univcoord(compoundpos->positions[i][j]),diagterm); #else - ptr0 = this->offsetsstrm[subst]; - end0 = this->offsetsstrm[subst+nadjacent]; + printf(" compound%d.%d:%u+%d\n",i,j,compoundpos->positions[i][j],diagterm); #endif - - } else if (this->compression_type == BITPACK64_COMPRESSION) { - ptr0 = Bitpack64_read_one_huge(subst,this->offsetspages,this->offsetsmeta,this->offsetsstrm); - end0 = Bitpack64_read_one_huge(subst+nadjacent,this->offsetspages,this->offsetsmeta,this->offsetsstrm); - - } else { - abort(); + } } - - debug(printf("count_one_shift: oligo = %06X (%s), %u - %u = %u\n", - subst,shortoligo_nt(subst,index1part),end0,ptr0,end0-ptr0)); - return (end0 - ptr0); - + return; } -#else -static int -count_one_shift (T this, Oligospace_T subst, int nadjacent) { - Positionsptr_T ptr0, end0; - if (this->compression_type == NO_COMPRESSION) { -#ifdef WORDS_BIGENDIAN -#if 0 - if (this->offsetsstrm_access == ALLOCATED) { - ptr0 = this->offsetsstrm[subst]; - end0 = this->offsetsstrm[subst+nadjacent]; - } else { - ptr0 = Bigendian_convert_uint(this->offsetsstrm[subst]); - end0 = Bigendian_convert_uint(this->offsetsstrm[subst+nadjacent]); - } -#else - abort(); -#endif +void +Compoundpos_free (Compoundpos_T *old) { + int i; + + if (*old) { + if (free_positions_p == true) { + for (i = 0; i < (*old)->n; i++) { +#ifdef LARGE_GENOMES + FREE((*old)->positions_high[i]); + FREE((*old)->positions_low[i]); #else - ptr0 = this->offsetsstrm[subst]; - end0 = this->offsetsstrm[subst+nadjacent]; + FREE((*old)->positions[i]); #endif + } + } - } else if (this->compression_type == BITPACK64_COMPRESSION) { - ptr0 = Bitpack64_read_one(subst,this->offsetsmeta,this->offsetsstrm); - end0 = Bitpack64_read_one(subst+nadjacent,this->offsetsmeta,this->offsetsstrm); - - } else { - abort(); + /* No need, since allocated statically. FREE((*old)->npositions); */ + /* No need, since allocated statically. FREE((*old)->positions); */ + + FREE(*old); } - - debug(printf("count_one_shift: oligo = %06X (%s), %u - %u = %u\n", - subst,shortoligo_nt(subst,index1part),end0,ptr0,end0-ptr0)); - return (end0 - ptr0); - + return; } -#endif - - -/************************************************************************ - * Counting procedures - ************************************************************************/ -/* Don't mask out leftmost nucleotides with LOWXXMER */ -/* TODO: Eliminate the loop by implementing a single procedure in bitpack64-readtwo that counts over the base */ -int -Indexdb_count_left_subst_2 (T this, Oligospace_T oligo) { - int nentries = 0; +Compoundpos_T +Indexdb_compoundpos_left_subst_2 (T this, Oligospace_T oligo) { + Compoundpos_T compoundpos = (Compoundpos_T) MALLOC(sizeof(*compoundpos)); Oligospace_T base; int i; - debug(printf("count_left_subst_2: oligo = %06X (%s)\n",oligo,shortoligo_nt(oligo,index1part))); + debug(printf("compoundpos_left_subst_2: %06X (%s)\n",oligo,shortoligo_nt(oligo,index1part))); + + compoundpos->n = 16; + /* compoundpos->npositions = (int *) CALLOC(16,sizeof(int)); */ + /* compoundpos->positions = (Univcoord_T **) CALLOC(16,sizeof(Univcoord_T *)); */ -#ifdef ALLOW_DUPLICATES /* Right shift */ base = (oligo >> 4); for (i = 0; i < 16; i++, base += left_subst) { - nentries += count_one_shift(this,base); - } -#else - /* Right shift */ - base = (oligo >> 4); - debug(printf("shift right => %06X (%s)\n",base,shortoligo_nt(base,index1part))); - for (i = 0; i < 16; i++, base += left_subst) { -#if 0 - nentries += count_one_shift(this,base,/*nadjacent*/1); +#ifdef LARGE_GENOMES + compoundpos->positions_low[i] = + point_one_shift(&(compoundpos->npositions[i]),&(compoundpos->positions_high[i]),this,base); #else - nentries += Indexdb_count_no_subst(this,base); + compoundpos->positions[i] = point_one_shift(&(compoundpos->npositions[i]),this,base); #endif } -#endif - - return nentries; -} + return compoundpos; +} -/* Don't mask out leftmost nucleotides with LOWXXMER */ -/* TODO: Eliminate the loop by implementing a single procedure in bitpack64-readtwo that counts over the base */ -int -Indexdb_count_left_subst_1 (T this, Oligospace_T oligo) { - int nentries = 0; +Compoundpos_T +Indexdb_compoundpos_left_subst_1 (T this, Oligospace_T oligo) { + Compoundpos_T compoundpos = (Compoundpos_T) MALLOC(sizeof(*compoundpos)); Oligospace_T base; int i; - debug(printf("count_left_subst_1: oligo = %06X (%s)\n",oligo,shortoligo_nt(oligo,index1part))); + debug(printf("compoundpos_left_subst_1: %06X (%s)\n",oligo,shortoligo_nt(oligo,index1part))); -#ifdef ALLOW_DUPLICATES - /* Zero shift. */ - base = (oligo >> 2); - for (i = 0; i < 4; i++, base += top_subst) { - nentries += count_one_shift(this,base); - } -#else - /* Zero shift. */ + compoundpos->n = 4; + /* compoundpos->npositions = (int *) CALLOC(4,sizeof(int)); */ + /* compoundpos->positions = (Univcoord_T **) CALLOC(4,sizeof(Univcoord_T *)); */ + + /* Zero shift */ base = (oligo >> 2); for (i = 0; i < 4; i++, base += top_subst) { -#if 0 - nentries += count_one_shift(this,base,/*nadjacent*/1); +#ifdef LARGE_GENOMES + compoundpos->positions_low[i] = + point_one_shift(&(compoundpos->npositions[i]),&(compoundpos->positions_high[i]),this,base); #else - nentries += Indexdb_count_no_subst(this,base); + compoundpos->positions[i] = point_one_shift(&(compoundpos->npositions[i]),this,base); #endif } -#endif - - return nentries; -} + return compoundpos; +} -int -Indexdb_count_right_subst_2 (T this, Oligospace_T oligo) { - int nentries; +Compoundpos_T +Indexdb_compoundpos_right_subst_2 (T this, Oligospace_T oligo) { + Compoundpos_T compoundpos = (Compoundpos_T) MALLOC(sizeof(*compoundpos)); Oligospace_T base; -#ifdef ALLOW_DUPLICATES - int i; -#endif -#ifdef DEBUG int i; -#endif - debug(printf("count_right_subst_2: oligo = %06X (%s)\n",oligo,shortoligo_nt(oligo,index1part))); + debug(printf("compoundpos_right_subst_2: %06X (%s)\n",oligo,shortoligo_nt(oligo,index1part))); + + compoundpos->n = 16; + /* compoundpos->npositions = (int *) CALLOC(16,sizeof(int)); */ + /* compoundpos->positions = (Univcoord_T **) CALLOC(16,sizeof(Univcoord_T *)); */ -#ifdef ALLOW_DUPLICATES /* Left shift */ base = (oligo << 4) & kmer_mask; - nentries = 0; for (i = 0; i < 16; i++, base += right_subst) { - nentries += count_one_shift(this,base); - } +#ifdef LARGE_GENOMES + compoundpos->positions_low[i] = + point_one_shift(&(compoundpos->npositions[i]),&(compoundpos->positions_high[i]),this,base); #else - /* Left shift */ - base = (oligo << 4) & kmer_mask; - nentries = count_one_shift(this,base,/*nadjacent*/16); - - debug( - printf("Details\n"); - nentries = 0; - for (i = 0; i < 16; i++, base += right_subst) { - nentries += count_one_shift(this,base,/*nadjacent*/1); - } - ); + compoundpos->positions[i] = point_one_shift(&(compoundpos->npositions[i]),this,base); #endif - - return nentries; -} + } + return compoundpos; +} -int -Indexdb_count_right_subst_1 (T this, Oligospace_T oligo) { - int nentries; +Compoundpos_T +Indexdb_compoundpos_right_subst_1 (T this, Oligospace_T oligo) { + Compoundpos_T compoundpos = (Compoundpos_T) MALLOC(sizeof(*compoundpos)); Oligospace_T base; -#ifdef ALLOW_DUPLICATES - int i; -#endif -#ifdef DEBUG int i; -#endif - debug(printf("count_right_subst_1: oligo = %06X (%s)\n",oligo,shortoligo_nt(oligo,index1part))); + debug(printf("compoundpos_right_subst_1: %06X (%s)\n",oligo,shortoligo_nt(oligo,index1part))); + + compoundpos->n = 4; + /* compoundpos->npositions = (int *) CALLOC(4,sizeof(int)); */ + /* compoundpos->positions = (Univcoord_T **) CALLOC(4,sizeof(Univcoord_T *)); */ -#ifdef ALLOW_DUPLICATES /* Zero shift */ base = (oligo << 2) & kmer_mask; - nentries = 0; for (i = 0; i < 4; i++, base += right_subst) { - nentries += count_one_shift(this,base); - } +#ifdef LARGE_GENOMES + compoundpos->positions_low[i] = + point_one_shift(&(compoundpos->npositions[i]),&(compoundpos->positions_high[i]),this,base); #else - /* Zero shift */ - base = (oligo << 2) & kmer_mask; - nentries = count_one_shift(this,base,/*nadjacent*/4); - - debug( - printf("Details\n"); - nentries = 0; - for (i = 0; i < 4; i++, base += right_subst) { - nentries += count_one_shift(this,base,/*nadjacent*/1); - } - ); + compoundpos->positions[i] = point_one_shift(&(compoundpos->npositions[i]),this,base); #endif - - return nentries; + } + + return compoundpos; } + /************************************************************************/ +#ifdef LARGE_GENOMES +static int +binary_search (int lowi, int highi, unsigned char *positions_high, UINT4 *positions_low, Univcoord_T goal) { + bool foundp = false; + int middlei; + Univcoord_T position; + +#ifdef NOBINARY + return lowi; +#endif -static bool free_positions_p; /* Needs to be true if Indexdb positions are FILEIO */ + if (goal == 0U) { + return lowi; + } -void -Compoundpos_init_positions_free (bool positions_fileio_p) { - if (positions_fileio_p == true) { - free_positions_p = true; + while (!foundp && lowi < highi) { + middlei = lowi + ((highi - lowi) / 2); + position = ((Univcoord_T) positions_high[middlei] << 32) + positions_low[middlei]; + debug2(printf(" binary: %d:%u %d:%u %d:%u vs. %u\n", + lowi,(positions_high[lowi] << 32) + positions_low[lowi], + middlei,position, + highi,(positions_high[highi] << 32) + positions_low[highi],goal)); + if (goal < position) { + highi = middlei; + } else if (goal > position) { + lowi = middlei + 1; + } else { + foundp = true; + } + } + + if (foundp == true) { + return middlei; } else { - free_positions_p = false; + return highi; } - return; } +#else +static int +binary_search (int lowi, int highi, Univcoord_T *positions, Univcoord_T goal) { + bool foundp = false; + int middlei; -struct Compoundpos_T { - int n; +#ifdef NOBINARY + return lowi; +#endif -#ifdef LARGE_GENOMES - unsigned char *positions_high[16]; - UINT4 *positions_low[16]; + if (goal == 0U) { + return lowi; + } + + while (!foundp && lowi < highi) { + middlei = lowi + ((highi - lowi) / 2); +#ifdef WORDS_BIGENDIAN + debug2(printf(" binary: %d:%u %d:%u %d:%u vs. %u\n", + lowi,Bigendian_convert_univcoord(positions[lowi]), + middlei,Bigendian_convert_univcoord(positions[middlei]), + highi,Bigendian_convert_univcoord(positions[highi]),goal)); + if (goal < Bigendian_convert_univcoord(positions[middlei])) { + highi = middlei; + } else if (goal > Bigendian_convert_univcoord(positions[middlei])) { + lowi = middlei + 1; + } else { + foundp = true; + } #else - Univcoord_T *positions[16]; + debug2(printf(" binary: %d:%u %d:%u %d:%u vs. %u\n", + lowi,positions[lowi],middlei,positions[middlei], + highi,positions[highi],goal)); + if (goal < positions[middlei]) { + highi = middlei; + } else if (goal > positions[middlei]) { + lowi = middlei + 1; + } else { + foundp = true; + } #endif - int npositions[16]; + } - struct Batch_T batchpool[16]; - Batch_T heap[17]; - int heapsize; - struct Batch_T sentinel_struct; - Batch_T sentinel; + if (foundp == true) { + return middlei; + } else { + return highi; + } +} -#ifdef LARGE_GENOMES - unsigned char *positions_high_reset[16]; /* altered by find_nomiss_aux and find_onemiss_aux */ - UINT4 *positions_low_reset[16]; /* altered by find_nomiss_aux and find_onemiss_aux */ -#else - Univcoord_T *positions_reset[16]; /* altered by find_nomiss_aux and find_onemiss_aux */ #endif - int npositions_reset[16]; /* altered by find_nomiss_aux and find_onemiss_aux */ -}; void -Compoundpos_set (Compoundpos_T compoundpos) { - int i; +Compoundpos_heap_init (Compoundpos_T compoundpos, int querylength, int diagterm) { + Batch_T batch; + int startbound, i; + compoundpos->heapsize = 0; for (i = 0; i < compoundpos->n; i++) { + batch = &(compoundpos->batchpool[i]); #ifdef LARGE_GENOMES - compoundpos->positions_high_reset[i] = compoundpos->positions_high[i]; - compoundpos->positions_low_reset[i] = compoundpos->positions_low[i]; + batch->positionptr_high = compoundpos->positions_high[i]; + batch->positionptr_low = compoundpos->positions_low[i]; #else - compoundpos->positions_reset[i] = compoundpos->positions[i]; + batch->positionptr = compoundpos->positions[i]; #endif - compoundpos->npositions_reset[i] = compoundpos->npositions[i]; - } - return; -} - -void -Compoundpos_reset (Compoundpos_T compoundpos) { - int i; + batch->nentries = compoundpos->npositions[i]; + if (diagterm < querylength) { + startbound = querylength - diagterm; +#ifdef LARGE_GENOMES + while (batch->nentries > 0 && (((Univcoord_T) *batch->positionptr_high) << 32) + (*batch->positionptr_low) < (unsigned int) startbound) { + debug11(printf("Eliminating diagonal %u as straddling beginning of genome (Compoundpos_heap_init)\n", + ((Univcoord_T) *batch->positionptr_high << 32) + *batch->positionptr_low)); + ++batch->positionptr_high; + ++batch->positionptr_low; + --batch->nentries; + } +#elif defined(WORDS_BIGENDIAN) + while (batch->nentries > 0 && Bigendian_convert_univcoord(*batch->positionptr) < (unsigned int) startbound) { + debug11(printf("Eliminating diagonal %u as straddling beginning of genome (Compoundpos_heap_init)\n", + Bigendian_convert_univcoord(*batch->positionptr))); + ++batch->positionptr; + --batch->nentries; + } +#else + while (batch->nentries > 0 && *batch->positionptr < (unsigned int) startbound) { + debug11(printf("Eliminating diagonal %u as straddling beginning of genome (Compoundpos_heap_init)\n", + *batch->positionptr)); + ++batch->positionptr; + --batch->nentries; + } +#endif + } + if (batch->nentries > 0) { +#ifdef LARGE_GENOMES + batch->position = (((Univcoord_T) *batch->positionptr_high) << 32) + (*batch->positionptr_low); +#elif defined(WORDS_BIGENDIAN) + batch->position = Bigendian_convert_univcoord(*batch->positionptr); +#else + batch->position = *batch->positionptr; +#endif + heap_insert_even(compoundpos->heap,&compoundpos->heapsize,batch,batch->position); + } + } - for (i = 0; i < compoundpos->n; i++) { + compoundpos->sentinel_struct.position = (Univcoord_T) -1; /* infinity */ #ifdef LARGE_GENOMES - compoundpos->positions_high[i] = compoundpos->positions_high_reset[i]; - compoundpos->positions_low[i] = compoundpos->positions_low_reset[i]; + compoundpos->sentinel_struct.positionptr_high = &sentinel_position_high; + compoundpos->sentinel_struct.positionptr_low = &sentinel_position_low; #else - compoundpos->positions[i] = compoundpos->positions_reset[i]; + compoundpos->sentinel_struct.positionptr = &(compoundpos->sentinel_struct.position); #endif - compoundpos->npositions[i] = compoundpos->npositions_reset[i]; + compoundpos->sentinel = &compoundpos->sentinel_struct; + + for (i = compoundpos->heapsize+1; i <= compoundpos->n; i++) { + compoundpos->heap[i] = compoundpos->sentinel; } + return; } -void -Compoundpos_print_sizes (Compoundpos_T compoundpos) { +#if 0 +/* Used by DEBUG3 and DEBUG6 */ +static void +heap_even_dump (Batch_T *heap, int heapsize) { int i; + Batch_T batch; - for (i = 0; i < compoundpos->n; i++) { - printf(" %d",compoundpos->npositions[i]); + for (i = 1; i <= heapsize; i++) { + batch = heap[i]; + printf("#%d--%d:%llu ",i,batch->nentries,(unsigned long long) batch->position); } - - return; + printf("\n"); } +#endif -void -Compoundpos_dump (Compoundpos_T compoundpos, int diagterm) { + +/* Returns true if found. emptyp is true only if every batch is + empty. If procedure returns true, empty is guaranteed to be + false. */ +bool +Compoundpos_find (bool *emptyp, Compoundpos_T compoundpos, Univcoord_T local_goal) { + Batch_T *heap = compoundpos->heap, batch; int i, j; - printf("%d diagonals: ",compoundpos->n); - for (i = 0; i < compoundpos->n; i++) { - printf(" %d",compoundpos->npositions[i]); - } - printf("\n"); + debug6(printf("\nEntering Compoundpos_find with local_goal %u\n",local_goal)); - for (i = 0; i < compoundpos->n; i++) { - for (j = 0; j < compoundpos->npositions[i]; j++) { + *emptyp = true; + i = 1; + while (i <= compoundpos->heapsize) { + debug6(printf("Compoundpos_find iteration, heapsize %d:\n",compoundpos->heapsize)); + debug6(heap_even_dump(heap,compoundpos->heapsize)); + + batch = heap[i]; #ifdef LARGE_GENOMES - printf(" compound%d.%d:%llu+%d\n", - i,j,((Univcoord_T) compoundpos->positions_high[i][j] << 32) + compoundpos->positions_low[i][j],diagterm); + if (batch->nentries > 0 && (((Univcoord_T) *batch->positionptr_high) << 32) + (*batch->positionptr_low) < local_goal) { + j = 1; + while (j < batch->nentries && + ((Univcoord_T) batch->positionptr_high[j] << 32) + batch->positionptr_low[j] < local_goal) { + j <<= 1; /* gallop by 2 */ + } + if (j >= batch->nentries) { + j = binary_search(j >> 1,batch->nentries,batch->positionptr_high,batch->positionptr_low,local_goal); + } else { + j = binary_search(j >> 1,j,batch->positionptr_high,batch->positionptr_low,local_goal); + } + batch->positionptr_high += j; + batch->positionptr_low += j; + batch->nentries -= j; + debug6(printf("binary search jump %d positions to %d:%u\n", + j,batch->nentries,(((Univcoord_T) *batch->positionptr_high) << 32) + (*batch->positionptr_low))); + } #elif defined(WORDS_BIGENDIAN) - printf(" compound%d.%d:%u+%d\n", - i,j,Bigendian_convert_univcoord(compoundpos->positions[i][j]),diagterm); + if (batch->nentries > 0 && Bigendian_convert_univcoord(*batch->positionptr) < local_goal) { + j = 1; + while (j < batch->nentries && Bigendian_convert_univcoord(batch->positionptr[j]) < local_goal) { + j <<= 1; /* gallop by 2 */ + } + if (j >= batch->nentries) { + j = binary_search(j >> 1,batch->nentries,batch->positionptr,local_goal); + } else { + j = binary_search(j >> 1,j,batch->positionptr,local_goal); + } + batch->positionptr += j; + batch->nentries -= j; + debug6(printf("binary search jump %d positions to %d:%u\n", + j,batch->nentries,Bigendian_convert_univcoord(*batch->positionptr))); + } #else - printf(" compound%d.%d:%u+%d\n",i,j,compoundpos->positions[i][j],diagterm); -#endif + if (batch->nentries > 0 && *batch->positionptr < local_goal) { + j = 1; + while (j < batch->nentries && batch->positionptr[j] < local_goal) { + j <<= 1; /* gallop by 2 */ + } + if (j >= batch->nentries) { + j = binary_search(j >> 1,batch->nentries,batch->positionptr,local_goal); + } else { + j = binary_search(j >> 1,j,batch->positionptr,local_goal); + } + batch->positionptr += j; + batch->nentries -= j; + debug6(printf("binary search jump %d positions to %d:%u\n", + j,batch->nentries,*batch->positionptr)); } - } - return; -} - +#endif -void -Compoundpos_free (Compoundpos_T *old) { - int i; + if (batch->nentries <= 0) { + /* Empty, so continue with loop */ + /* Move last heap to this one, and reduce heapsize */ + compoundpos->heap[i] = compoundpos->heap[compoundpos->heapsize]; + --compoundpos->heapsize; - if (*old) { - if (free_positions_p == true) { - for (i = 0; i < (*old)->n; i++) { #ifdef LARGE_GENOMES - FREE((*old)->positions_high[i]); - FREE((*old)->positions_low[i]); + } else if (((Univcoord_T) *batch->positionptr_high << 32) + (*batch->positionptr_low) > local_goal) { + /* Already advanced past goal, so continue with loop */ + debug6(printf("Setting emptyp to be false\n")); + *emptyp = false; + i++; +#elif defined(WORDS_BIGENDIAN) + } else if (Bigendian_convert_univcoord(*batch->positionptr) > local_goal) { + /* Already advanced past goal, so continue with loop */ + debug6(printf("Setting emptyp to be false\n")); + *emptyp = false; + i++; #else - FREE((*old)->positions[i]); + } else if (*batch->positionptr > local_goal) { + /* Already advanced past goal, so continue with loop */ + debug6(printf("Setting emptyp to be false\n")); + *emptyp = false; + i++; #endif - } - } - - /* No need, since allocated statically. FREE((*old)->npositions); */ - /* No need, since allocated statically. FREE((*old)->positions); */ - - FREE(*old); - } - return; -} - - -Compoundpos_T -Indexdb_compoundpos_left_subst_2 (T this, Oligospace_T oligo) { - Compoundpos_T compoundpos = (Compoundpos_T) MALLOC(sizeof(*compoundpos)); - Oligospace_T base; - int i; - - debug(printf("compoundpos_left_subst_2: %06X (%s)\n",oligo,shortoligo_nt(oligo,index1part))); - - compoundpos->n = 16; - /* compoundpos->npositions = (int *) CALLOC(16,sizeof(int)); */ - /* compoundpos->positions = (Univcoord_T **) CALLOC(16,sizeof(Univcoord_T *)); */ - - /* Right shift */ - base = (oligo >> 4); - for (i = 0; i < 16; i++, base += left_subst) { + } else { + /* Found goal, so return */ + debug6(printf("Setting emptyp to be false\n")); + *emptyp = false; #ifdef LARGE_GENOMES - compoundpos->positions_low[i] = - point_one_shift(&(compoundpos->npositions[i]),&(compoundpos->positions_high[i]),this,base); + debug6(printf("Found! Returning position %llu\n",(((Univcoord_T) *batch->positionptr_high) << 32) + (*batch->positionptr_low))); +#elif defined(WORDS_BIGENDIAN) + debug6(printf("Found! Returning position %u\n",Bigendian_convert_univcoord(*batch->positionptr))); #else - compoundpos->positions[i] = point_one_shift(&(compoundpos->npositions[i]),this,base); + debug6(printf("Found! Returning position %u\n",*batch->positionptr)); #endif - } - - return compoundpos; -} - -Compoundpos_T -Indexdb_compoundpos_left_subst_1 (T this, Oligospace_T oligo) { - Compoundpos_T compoundpos = (Compoundpos_T) MALLOC(sizeof(*compoundpos)); - Oligospace_T base; - int i; - - debug(printf("compoundpos_left_subst_1: %06X (%s)\n",oligo,shortoligo_nt(oligo,index1part))); - - compoundpos->n = 4; - /* compoundpos->npositions = (int *) CALLOC(4,sizeof(int)); */ - /* compoundpos->positions = (Univcoord_T **) CALLOC(4,sizeof(Univcoord_T *)); */ - - /* Zero shift */ - base = (oligo >> 2); - for (i = 0; i < 4; i++, base += top_subst) { #ifdef LARGE_GENOMES - compoundpos->positions_low[i] = - point_one_shift(&(compoundpos->npositions[i]),&(compoundpos->positions_high[i]),this,base); + ++batch->positionptr_high; + ++batch->positionptr_low; #else - compoundpos->positions[i] = point_one_shift(&(compoundpos->npositions[i]),this,base); + ++batch->positionptr; #endif + --batch->nentries; + return true; + } } - return compoundpos; + /* Done with loop: Fail. */ + debug6(printf("Returning emptyp %d\n",*emptyp)); + return false; } -Compoundpos_T -Indexdb_compoundpos_right_subst_2 (T this, Oligospace_T oligo) { - Compoundpos_T compoundpos = (Compoundpos_T) MALLOC(sizeof(*compoundpos)); - Oligospace_T base; - int i; - debug(printf("compoundpos_right_subst_2: %06X (%s)\n",oligo,shortoligo_nt(oligo,index1part))); - compoundpos->n = 16; - /* compoundpos->npositions = (int *) CALLOC(16,sizeof(int)); */ - /* compoundpos->positions = (Univcoord_T **) CALLOC(16,sizeof(Univcoord_T *)); */ +/* Returns 0 if heapsize is 0, else 1, and returns smallest value >= local_goal */ +int +Compoundpos_search (Univcoord_T *value, Compoundpos_T compoundpos, Univcoord_T local_goal) { + int parenti, smallesti, j; + Batch_T batch, *heap = compoundpos->heap; + Univcoord_T position; - /* Left shift */ - base = (oligo << 4) & kmer_mask; - for (i = 0; i < 16; i++, base += right_subst) { -#ifdef LARGE_GENOMES - compoundpos->positions_low[i] = - point_one_shift(&(compoundpos->npositions[i]),&(compoundpos->positions_high[i]),this,base); -#else - compoundpos->positions[i] = point_one_shift(&(compoundpos->npositions[i]),this,base); -#endif + debug3(printf("\nEntering Compoundpos_search with local_goal %u\n",local_goal)); + if (compoundpos->heapsize <= 0) { + debug3(printf("Returning because heapsize is %d\n",compoundpos->heapsize)); + return 0; } - return compoundpos; -} - -Compoundpos_T -Indexdb_compoundpos_right_subst_1 (T this, Oligospace_T oligo) { - Compoundpos_T compoundpos = (Compoundpos_T) MALLOC(sizeof(*compoundpos)); - Oligospace_T base; - int i; - - debug(printf("compoundpos_right_subst_1: %06X (%s)\n",oligo,shortoligo_nt(oligo,index1part))); - - compoundpos->n = 4; - /* compoundpos->npositions = (int *) CALLOC(4,sizeof(int)); */ - /* compoundpos->positions = (Univcoord_T **) CALLOC(4,sizeof(Univcoord_T *)); */ - - /* Zero shift */ - base = (oligo << 2) & kmer_mask; - for (i = 0; i < 4; i++, base += right_subst) { + if (compoundpos->n == 4) { + while (compoundpos->heapsize > 0 && (batch = heap[1])->position < local_goal) { + debug3(printf("Compoundpos_search iteration, heapsize %d:\n",compoundpos->heapsize)); + debug3(heap_even_dump(heap,compoundpos->heapsize)); #ifdef LARGE_GENOMES - compoundpos->positions_low[i] = - point_one_shift(&(compoundpos->npositions[i]),&(compoundpos->positions_high[i]),this,base); + if (batch->nentries > 0 && (((Univcoord_T) *batch->positionptr_high) << 32) + (*batch->positionptr_low) < local_goal) { + j = 1; + while (j < batch->nentries && + ((Univcoord_T) batch->positionptr_high[j] << 32) + batch->positionptr_low[j] < local_goal) { + j <<= 1; /* gallop by 2 */ + } + if (j >= batch->nentries) { + j = binary_search(j >> 1,batch->nentries,batch->positionptr_high,batch->positionptr_low,local_goal); + } else { + j = binary_search(j >> 1,j,batch->positionptr_high,batch->positionptr_low,local_goal); + } + batch->positionptr_high += j; + batch->positionptr_low += j; + batch->nentries -= j; + debug3(printf("binary search jump %d positions to %d:%u\n", + j,batch->nentries,(((Univcoord_T) *batch->positionptr_high) << 32) + (*batch->positionptr_low))); + } + batch->position = (((Univcoord_T) *batch->positionptr_high) << 32) + (*batch->positionptr_low); +#elif defined(WORDS_BIGENDIAN) + if (batch->nentries > 0 && Bigendian_convert_univcoord(*batch->positionptr) < local_goal) { + j = 1; + while (j < batch->nentries && Bigendian_convert_univcoord(batch->positionptr[j]) < local_goal) { + j <<= 1; /* gallop by 2 */ + } + if (j >= batch->nentries) { + j = binary_search(j >> 1,batch->nentries,batch->positionptr,local_goal); + } else { + j = binary_search(j >> 1,j,batch->positionptr,local_goal); + } + batch->positionptr += j; + batch->nentries -= j; + debug3(printf("binary search jump %d positions to %d:%u\n", + j,batch->nentries,Bigendian_convert_univcoord(*batch->positionptr))); + } + batch->position = Bigendian_convert_univcoord(*batch->positionptr); #else - compoundpos->positions[i] = point_one_shift(&(compoundpos->npositions[i]),this,base); -#endif - } - - return compoundpos; -} - - - -/************************************************************************/ - -#ifdef LARGE_GENOMES -static int -binary_search (int lowi, int highi, unsigned char *positions_high, UINT4 *positions_low, Univcoord_T goal) { - bool foundp = false; - int middlei; - Univcoord_T position; - -#ifdef NOBINARY - return lowi; + if (batch->nentries > 0 && *batch->positionptr < local_goal) { + j = 1; + while (j < batch->nentries && batch->positionptr[j] < local_goal) { + j <<= 1; /* gallop by 2 */ + } + if (j >= batch->nentries) { + j = binary_search(j >> 1,batch->nentries,batch->positionptr,local_goal); + } else { + j = binary_search(j >> 1,j,batch->positionptr,local_goal); + } + batch->positionptr += j; + batch->nentries -= j; + debug3(printf("binary search jump %d positions to %d:%u\n", + j,batch->nentries,*batch->positionptr)); + } + batch->position = *batch->positionptr; #endif - if (goal == 0U) { - return lowi; - } - - while (!foundp && lowi < highi) { - middlei = lowi + ((highi - lowi) / 2); - position = ((Univcoord_T) positions_high[middlei] << 32) + positions_low[middlei]; - debug2(printf(" binary: %d:%u %d:%u %d:%u vs. %u\n", - lowi,(positions_high[lowi] << 32) + positions_low[lowi], - middlei,position, - highi,(positions_high[highi] << 32) + positions_low[highi],goal)); - if (goal < position) { - highi = middlei; - } else if (goal > position) { - lowi = middlei + 1; - } else { - foundp = true; + if (batch->nentries <= 0) { + debug3(printf("top of heap found to be empty\n")); + heap[1] = batch = (compoundpos->heapsize == 1) ? + compoundpos->sentinel : heap[compoundpos->heapsize]; + heap[compoundpos->heapsize--] = compoundpos->sentinel; + } + + position = batch->position; + debug3(printf("heapify downward on %u\n",position)); + debug3(printf("Comparing right %d: %u\n",2,heap[2]->position)); + if (position <= heap[2]->position) { + debug3(printf("Inserting at 1\n")); + /* heap[1] = batch; -- not necessary because batch is already at heap[1] */ + } else { + heap[1] = heap[2]; + debug3(printf("Comparing left %d/right %d: %u and %u\n", + 3,4,heap[3]->position,heap[4]->position)); + smallesti = 4 - (heap[3]->position < heap[4]->position); + if (position <= heap[smallesti]->position) { + debug3(printf("Inserting at 2\n")); + heap[2] = batch; + } else { + debug3(printf("Inserting at %d\n",smallesti)); + heap[2] = heap[smallesti]; + heap[smallesti] = batch; + } + } + } + if (batch->position == local_goal) { + *value = batch->position; + debug3(printf("Found! Returning position %llu\n",(unsigned long long) *value)); + return 1; } - } - if (foundp == true) { - return middlei; } else { - return highi; - } -} - + /* 16 batches */ + while (compoundpos->heapsize > 0 && (batch = heap[1])->position < local_goal) { + debug3(printf("Compoundpos_search iteration, heapsize %d:\n",compoundpos->heapsize)); + debug3(heap_even_dump(heap,compoundpos->heapsize)); +#ifdef LARGE_GENOMES + if (batch->nentries > 0 && (((Univcoord_T) *batch->positionptr_high) << 32) + (*batch->positionptr_low) < local_goal) { + j = 1; + while (j < batch->nentries && + ((Univcoord_T) batch->positionptr_high[j] << 32) + batch->positionptr_low[j] < local_goal) { + j <<= 1; /* gallop by 2 */ + } + if (j >= batch->nentries) { + j = binary_search(j >> 1,batch->nentries,batch->positionptr_high,batch->positionptr_low,local_goal); + } else { + j = binary_search(j >> 1,j,batch->positionptr_high,batch->positionptr_low,local_goal); + } + batch->positionptr_high += j; + batch->positionptr_low += j; + batch->nentries -= j; + debug3(printf("binary search jump %d positions to %d:%u\n", + j,batch->nentries,(((Univcoord_T) *batch->positionptr_high) << 32 + (*batch->positionptr_low)))); + } + batch->position = (((Univcoord_T) *batch->positionptr_high) << 32) + (*batch->positionptr_low); +#elif defined(WORDS_BIGENDIAN) + if (batch->nentries > 0 && Bigendian_convert_univcoord(*batch->positionptr) < local_goal) { + j = 1; + while (j < batch->nentries && Bigendian_convert_univcoord(batch->positionptr[j]) < local_goal) { + j <<= 1; /* gallop by 2 */ + } + if (j >= batch->nentries) { + j = binary_search(j >> 1,batch->nentries,batch->positionptr,local_goal); + } else { + j = binary_search(j >> 1,j,batch->positionptr,local_goal); + } + batch->positionptr += j; + batch->nentries -= j; + debug3(printf("binary search jump %d positions to %d:%u\n", + j,batch->nentries,Bigendian_convert_univcoord(*batch->positionptr))); + } + batch->position = Bigendian_convert_univcoord(*batch->positionptr); #else - -static int -binary_search (int lowi, int highi, Univcoord_T *positions, Univcoord_T goal) { - bool foundp = false; - int middlei; - -#ifdef NOBINARY - return lowi; + if (batch->nentries > 0 && *batch->positionptr < local_goal) { + j = 1; + while (j < batch->nentries && batch->positionptr[j] < local_goal) { + j <<= 1; /* gallop by 2 */ + } + if (j >= batch->nentries) { + j = binary_search(j >> 1,batch->nentries,batch->positionptr,local_goal); + } else { + j = binary_search(j >> 1,j,batch->positionptr,local_goal); + } + batch->positionptr += j; + batch->nentries -= j; + debug3(printf("binary search jump %d positions to %d:%u\n", + j,batch->nentries,*batch->positionptr)); + } + batch->position = *batch->positionptr; #endif - if (goal == 0U) { - return lowi; - } - - while (!foundp && lowi < highi) { - middlei = lowi + ((highi - lowi) / 2); -#ifdef WORDS_BIGENDIAN - debug2(printf(" binary: %d:%u %d:%u %d:%u vs. %u\n", - lowi,Bigendian_convert_univcoord(positions[lowi]), - middlei,Bigendian_convert_univcoord(positions[middlei]), - highi,Bigendian_convert_univcoord(positions[highi]),goal)); - if (goal < Bigendian_convert_univcoord(positions[middlei])) { - highi = middlei; - } else if (goal > Bigendian_convert_univcoord(positions[middlei])) { - lowi = middlei + 1; - } else { - foundp = true; + if (batch->nentries <= 0) { + debug3(printf("top of heap found to be empty\n")); + heap[1] = batch = (compoundpos->heapsize == 1) ? + compoundpos->sentinel : heap[compoundpos->heapsize]; + heap[compoundpos->heapsize--] = compoundpos->sentinel; + } + + position = batch->position; + debug3(printf("heapify downward on %u\n",position)); + /* Comparison 0/3 */ + debug3(printf("Comparing right %d: %u\n",2,heap[2]->position)); + if (position <= heap[2]->position) { + debug3(printf("Inserting at 1\n")); + /* heap[1] = batch; -- not necessary because batch is already at heap[1] */ + } else { + heap[1] = heap[2]; + /* Comparison 1/3 */ + debug3(printf("Comparing left %d/right %d: %u and %u\n", + 3,4,heap[3]->position,heap[4]->position)); + smallesti = 4 - (heap[3]->position < heap[4]->position); + if (position <= heap[smallesti]->position) { + debug3(printf("Inserting at 2\n")); + heap[2] = batch; + } else { + heap[2] = heap[smallesti]; + parenti = smallesti; + smallesti <<= 1; + /* Comparison 2/3 */ + debug3(printf("Comparing left %d/right %d: %u and %u\n", + smallesti-1,smallesti,heap[smallesti-1]->position,heap[smallesti]->position)); + smallesti -= (heap[LEFTSIBLING2(smallesti)]->position < heap[smallesti]->position); + if (position <= heap[smallesti]->position) { + debug3(printf("Inserting at %d\n",parenti)); + heap[parenti] = batch; + } else { + heap[parenti] = heap[smallesti]; + parenti = smallesti; + smallesti <<= 1; + /* Comparison 3/3 */ + debug3(printf("Comparing left %d/right %d: %u and %u\n", + smallesti-1,smallesti,heap[smallesti-1]->position,heap[smallesti]->position)); + smallesti -= (heap[LEFTSIBLING2(smallesti)]->position < heap[smallesti]->position); + if (position <= heap[smallesti]->position) { + debug3(printf("Inserting at %d\n",parenti)); + heap[parenti] = batch; + } else { + heap[parenti] = heap[smallesti]; + debug3(printf("Inserting at %d\n",smallesti)); + heap[smallesti] = batch; + } + } + } + } } -#else - debug2(printf(" binary: %d:%u %d:%u %d:%u vs. %u\n", - lowi,positions[lowi],middlei,positions[middlei], - highi,positions[highi],goal)); - if (goal < positions[middlei]) { - highi = middlei; - } else if (goal > positions[middlei]) { - lowi = middlei + 1; - } else { - foundp = true; + if (batch->position == local_goal) { + *value = batch->position; + debug3(printf("Found! Returning position %llu\n",(unsigned long long) *value)); + return 1; } -#endif } - if (foundp == true) { - return middlei; - } else { - return highi; - } + *value = batch->position; + debug3(printf("Returning position %llu\n",(unsigned long long) *value)); + return 1; } -#endif -void -Compoundpos_heap_init (Compoundpos_T compoundpos, int querylength, int diagterm) { - Batch_T batch; - int startbound, i; +#if defined(LARGE_GENOMES) || !defined(HAVE_SSE4_1) - compoundpos->heapsize = 0; - for (i = 0; i < compoundpos->n; i++) { - batch = &(compoundpos->batchpool[i]); -#ifdef LARGE_GENOMES - batch->positionptr_high = compoundpos->positions_high[i]; - batch->positionptr_low = compoundpos->positions_low[i]; -#else - batch->positionptr = compoundpos->positions[i]; -#endif - batch->nentries = compoundpos->npositions[i]; - if (diagterm < querylength) { - startbound = querylength - diagterm; -#ifdef LARGE_GENOMES - while (batch->nentries > 0 && (((Univcoord_T) *batch->positionptr_high) << 32) + (*batch->positionptr_low) < (unsigned int) startbound) { - debug11(printf("Eliminating diagonal %u as straddling beginning of genome (Compoundpos_heap_init)\n", - ((Univcoord_T) *batch->positionptr_high << 32) + *batch->positionptr_low)); - ++batch->positionptr_high; - ++batch->positionptr_low; - --batch->nentries; - } -#elif defined(WORDS_BIGENDIAN) - while (batch->nentries > 0 && Bigendian_convert_univcoord(*batch->positionptr) < (unsigned int) startbound) { - debug11(printf("Eliminating diagonal %u as straddling beginning of genome (Compoundpos_heap_init)\n", - Bigendian_convert_univcoord(*batch->positionptr))); - ++batch->positionptr; - --batch->nentries; - } +#define READ_THEN_WRITE 1 + +static Univcoord_T * +merge_batches_one_heap_16_existing (int *nmerged, struct Batch_T *batchpool, int nentries, int diagterm) { + Univcoord_T *positions, *ptr, position, last_position, this_position; + struct Batch_T sentinel_struct; + Batch_T batch, sentinel, heap[17]; + int heapsize; + unsigned int i; +#ifdef READ_THEN_WRITE + unsigned int smallesti_1, smallesti_2, smallesti; #else - while (batch->nentries > 0 && *batch->positionptr < (unsigned int) startbound) { - debug11(printf("Eliminating diagonal %u as straddling beginning of genome (Compoundpos_heap_init)\n", - *batch->positionptr)); - ++batch->positionptr; - --batch->nentries; - } + unsigned int parenti, smallesti; #endif - } + + debug3(printf("starting merge_batches_one_heap_16_existing\n")); + + debug0(int nentries_save = nentries); + + ptr = positions = (Univcoord_T *) MALLOC_ALIGN(nentries * sizeof(Univcoord_T)); + + /* Set up heap */ + heapsize = 0; + for (i = 0; i < 16; i++) { + batch = &(batchpool[i]); if (batch->nentries > 0) { #ifdef LARGE_GENOMES - batch->position = (((Univcoord_T) *batch->positionptr_high) << 32) + (*batch->positionptr_low); + batch->position = (((Univcoord_T) *batch->positionptr_high++) << 32) + (*batch->positionptr_low++); #elif defined(WORDS_BIGENDIAN) - batch->position = Bigendian_convert_univcoord(*batch->positionptr); + batch->position = Bigendian_convert_univcoord(*batch->positionptr++); #else - batch->position = *batch->positionptr; + batch->position = *batch->positionptr++; #endif - heap_insert_even(compoundpos->heap,&compoundpos->heapsize,batch,batch->position); + heap_insert_even(heap,&heapsize,batch,batch->position); } } - compoundpos->sentinel_struct.position = (Univcoord_T) -1; /* infinity */ + sentinel_struct.position = (Univcoord_T) -1; /* infinity */ #ifdef LARGE_GENOMES - compoundpos->sentinel_struct.positionptr_high = &sentinel_position_high; - compoundpos->sentinel_struct.positionptr_low = &sentinel_position_low; + sentinel_struct.positionptr_high = &sentinel_position_high; + sentinel_struct.positionptr_low = &sentinel_position_low; #else - compoundpos->sentinel_struct.positionptr = &(compoundpos->sentinel_struct.position); + sentinel_struct.positionptr = &(sentinel_struct.position); #endif - compoundpos->sentinel = &compoundpos->sentinel_struct; + sentinel = &sentinel_struct; - for (i = compoundpos->heapsize+1; i <= compoundpos->n; i++) { - compoundpos->heap[i] = compoundpos->sentinel; + for (i = heapsize+1; i <= 16; i++) { + heap[i] = sentinel; } - return; -} - - -#if 0 -/* Used by DEBUG3 and DEBUG6 */ -static void -heap_even_dump (Batch_T *heap, int heapsize) { - int i; - Batch_T batch; + last_position = 0U; + while (--nentries >= 1) { + debug3(printf("nentries = %d, top of heap is %u (%d)\n", + nentries+1,heap[1]->position,heapsize)); - for (i = 1; i <= heapsize; i++) { - batch = heap[i]; - printf("#%d--%d:%llu ",i,batch->nentries,(unsigned long long) batch->position); - } - printf("\n"); -} + /* Get minimum */ + batch = heap[1]; +#ifdef CONVERT_TO_LITTLEENDIAN + this_position = Bigendian_convert_univcoord(batch->position) + diagterm; +#else + this_position = batch->position + diagterm; #endif + if (this_position != last_position) { + *ptr++ = this_position; + } + last_position = this_position; + if (--batch->nentries <= 0) { + /* Use last batch (or sentinel) in heap for insertion */ + heap[1] = batch = (heapsize == 1) ? sentinel : heap[heapsize]; + heap[heapsize--] = sentinel; - -/* Returns true if found. emptyp is true only if every batch is - empty. If procedure returns true, empty is guaranteed to be - false. */ -bool -Compoundpos_find (bool *emptyp, Compoundpos_T compoundpos, Univcoord_T local_goal) { - Batch_T *heap = compoundpos->heap, batch; - int i, j; - - debug6(printf("\nEntering Compoundpos_find with local_goal %u\n",local_goal)); - - *emptyp = true; - i = 1; - while (i <= compoundpos->heapsize) { - debug6(printf("Compoundpos_find iteration, heapsize %d:\n",compoundpos->heapsize)); - debug6(heap_even_dump(heap,compoundpos->heapsize)); - - batch = heap[i]; + } else { + /* Advance heap, and use this batch for insertion */ #ifdef LARGE_GENOMES - if (batch->nentries > 0 && (((Univcoord_T) *batch->positionptr_high) << 32) + (*batch->positionptr_low) < local_goal) { - j = 1; - while (j < batch->nentries && - ((Univcoord_T) batch->positionptr_high[j] << 32) + batch->positionptr_low[j] < local_goal) { - j <<= 1; /* gallop by 2 */ - } - if (j >= batch->nentries) { - j = binary_search(j >> 1,batch->nentries,batch->positionptr_high,batch->positionptr_low,local_goal); - } else { - j = binary_search(j >> 1,j,batch->positionptr_high,batch->positionptr_low,local_goal); - } - batch->positionptr_high += j; - batch->positionptr_low += j; - batch->nentries -= j; - debug6(printf("binary search jump %d positions to %d:%u\n", - j,batch->nentries,(((Univcoord_T) *batch->positionptr_high) << 32) + (*batch->positionptr_low))); - } + batch->position = (((Univcoord_T) *batch->positionptr_high++) << 32) + (*batch->positionptr_low++); #elif defined(WORDS_BIGENDIAN) - if (batch->nentries > 0 && Bigendian_convert_univcoord(*batch->positionptr) < local_goal) { - j = 1; - while (j < batch->nentries && Bigendian_convert_univcoord(batch->positionptr[j]) < local_goal) { - j <<= 1; /* gallop by 2 */ - } - if (j >= batch->nentries) { - j = binary_search(j >> 1,batch->nentries,batch->positionptr,local_goal); - } else { - j = binary_search(j >> 1,j,batch->positionptr,local_goal); - } - batch->positionptr += j; - batch->nentries -= j; - debug6(printf("binary search jump %d positions to %d:%u\n", - j,batch->nentries,Bigendian_convert_univcoord(*batch->positionptr))); - } + batch->position = Bigendian_convert_univcoord(*batch->positionptr++); #else - if (batch->nentries > 0 && *batch->positionptr < local_goal) { - j = 1; - while (j < batch->nentries && batch->positionptr[j] < local_goal) { - j <<= 1; /* gallop by 2 */ - } - if (j >= batch->nentries) { - j = binary_search(j >> 1,batch->nentries,batch->positionptr,local_goal); - } else { - j = binary_search(j >> 1,j,batch->positionptr,local_goal); - } - batch->positionptr += j; - batch->nentries -= j; - debug6(printf("binary search jump %d positions to %d:%u\n", - j,batch->nentries,*batch->positionptr)); - } + batch->position = *batch->positionptr++; #endif + } - if (batch->nentries <= 0) { - /* Empty, so continue with loop */ - /* Move last heap to this one, and reduce heapsize */ - compoundpos->heap[i] = compoundpos->heap[compoundpos->heapsize]; - --compoundpos->heapsize; + position = batch->position; + debug3(printf("starting heapify with %u\n",position)); -#ifdef LARGE_GENOMES - } else if (((Univcoord_T) *batch->positionptr_high << 32) + (*batch->positionptr_low) > local_goal) { - /* Already advanced past goal, so continue with loop */ - debug6(printf("Setting emptyp to be false\n")); - *emptyp = false; - i++; -#elif defined(WORDS_BIGENDIAN) - } else if (Bigendian_convert_univcoord(*batch->positionptr) > local_goal) { - /* Already advanced past goal, so continue with loop */ - debug6(printf("Setting emptyp to be false\n")); - *emptyp = false; - i++; -#else - } else if (*batch->positionptr > local_goal) { - /* Already advanced past goal, so continue with loop */ - debug6(printf("Setting emptyp to be false\n")); - *emptyp = false; - i++; -#endif +#ifdef READ_THEN_WRITE + /* Comparison 0/3 */ + debug3(printf("Comparing right %d: %u\n",2,heap[2]->position)); + if (position <= heap[2]->position) { + debug3(printf("Inserting at 1\n")); + /* heap[1] = batch; -- not necessary because batch is already at heap[1] */ } else { - /* Found goal, so return */ - debug6(printf("Setting emptyp to be false\n")); - *emptyp = false; -#ifdef LARGE_GENOMES - debug6(printf("Found! Returning position %llu\n",(((Univcoord_T) *batch->positionptr_high) << 32) + (*batch->positionptr_low))); -#elif defined(WORDS_BIGENDIAN) - debug6(printf("Found! Returning position %u\n",Bigendian_convert_univcoord(*batch->positionptr))); + /* Comparison 1/3 */ + debug3(printf("Comparing left %d/right %d: %u and %u\n", + 3,4,heap[3]->position,heap[4]->position)); + smallesti = 4 - (heap[3]->position < heap[4]->position); + if (position <= heap[smallesti]->position) { + debug3(printf("Inserting at 2\n")); + heap[1] = heap[2]; + heap[2] = batch; + } else { + smallesti_1 = smallesti; + smallesti <<= 1; + /* Comparison 2/3 */ + debug3(printf("Comparing left %d/right %d: %u and %u\n", + smallesti-1,smallesti,heap[smallesti-1]->position,heap[smallesti]->position)); + smallesti -= (heap[LEFTSIBLING2(smallesti)]->position < heap[smallesti]->position); + if (position <= heap[smallesti]->position) { + debug3(printf("Inserting at %d\n",smallesti_1)); + heap[1] = heap[2]; + heap[2] = heap[smallesti_1]; + heap[smallesti_1] = batch; + } else { + smallesti_2 = smallesti; + smallesti <<= 1; + /* Comparison 3/3 */ + debug3(printf("Comparing left %d/right %d: %u and %u\n", + smallesti-1,smallesti,heap[smallesti-1]->position,heap[smallesti]->position)); + smallesti -= (heap[LEFTSIBLING2(smallesti)]->position < heap[smallesti]->position); + if (position <= heap[smallesti]->position) { + debug3(printf("Inserting at %d\n",smallesti_2)); + heap[1] = heap[2]; + heap[2] = heap[smallesti_1]; + heap[smallesti_1] = heap[smallesti_2]; + heap[smallesti_2] = batch; + } else { + debug3(printf("Inserting at %d\n",smallesti)); + heap[1] = heap[2]; + heap[2] = heap[smallesti_1]; + heap[smallesti_1] = heap[smallesti_2]; + heap[smallesti_2] = heap[smallesti]; + heap[smallesti] = batch; + } + } + } + } #else - debug6(printf("Found! Returning position %u\n",*batch->positionptr)); + /* Comparison 0/3 */ + debug3(printf("Comparing right %d: %u\n",2,heap[2]->position)); + if (position <= heap[2]->position) { + debug3(printf("Inserting at 1\n")); + /* heap[1] = batch; -- not necessary because batch is already at heap[1] */ + } else { + heap[1] = heap[2]; + /* Comparison 1/3 */ + debug3(printf("Comparing left %d/right %d: %u and %u\n", + 3,4,heap[3]->position,heap[4]->position)); + smallesti = 4 - (heap[3]->position < heap[4]->position); + if (position <= heap[smallesti]->position) { + debug3(printf("Inserting at 2\n")); + heap[2] = batch; + } else { + heap[2] = heap[smallesti]; + parenti = smallesti; + smallesti <<= 1; + /* Comparison 2/3 */ + debug3(printf("Comparing left %d/right %d: %u and %u\n", + smallesti-1,smallesti,heap[smallesti-1]->position,heap[smallesti]->position)); + smallesti -= (heap[LEFTSIBLING2(smallesti)]->position < heap[smallesti]->position); + if (position <= heap[smallesti]->position) { + debug3(printf("Inserting at %d\n",parenti)); + heap[parenti] = batch; + } else { + heap[parenti] = heap[smallesti]; + parenti = smallesti; + smallesti <<= 1; + /* Comparison 3/3 */ + debug3(printf("Comparing left %d/right %d: %u and %u\n", + smallesti-1,smallesti,heap[smallesti-1]->position,heap[smallesti]->position)); + smallesti -= (heap[LEFTSIBLING2(smallesti)]->position < heap[smallesti]->position); + if (position <= heap[smallesti]->position) { + debug3(printf("Inserting at %d\n",parenti)); + heap[parenti] = batch; + } else { + heap[parenti] = heap[smallesti]; + debug3(printf("Inserting at %d\n",smallesti)); + heap[smallesti] = batch; + } + } + } + } #endif -#ifdef LARGE_GENOMES - ++batch->positionptr_high; - ++batch->positionptr_low; + } + +#ifdef CONVERT_TO_LITTLEENDIAN + this_position = Bigendian_convert_univcoord(heap[1]->position) + diagterm; #else - ++batch->positionptr; + this_position = heap[1]->position + diagterm; #endif - --batch->nentries; - return true; + if (this_position != last_position) { + *ptr++ = this_position; + } + + *nmerged = (ptr - positions); + +#if 0 + position = positions[0]; + for (i = 1; i < nentries_save; i++) { + if (positions[i] <= position) { + abort(); } + position = positions[i]; } +#endif - /* Done with loop: Fail. */ - debug6(printf("Returning emptyp %d\n",*emptyp)); - return false; + debug0( + for (i = 0; i < nentries_save; i++) { + printf("%u\n",positions[i]); + } + printf("\n"); + ) + + return positions; } +static Univcoord_T * +merge_batches_one_heap_4_existing (int *nmerged, struct Batch_T *batchpool, int nentries, int diagterm) { + Univcoord_T *positions, *ptr, position, last_position, this_position; + struct Batch_T sentinel_struct; + Batch_T batch, sentinel, heap[5]; + int heapsize; + unsigned int i; +#ifdef READ_THEN_WRITE + unsigned int smallesti; +#else + unsigned int parenti, smallesti; +#endif -/* Returns 0 if heapsize is 0, else 1, and returns smallest value >= local_goal */ -int -Compoundpos_search (Univcoord_T *value, Compoundpos_T compoundpos, Univcoord_T local_goal) { - int parenti, smallesti, j; - Batch_T batch, *heap = compoundpos->heap; - Univcoord_T position; + debug3(printf("starting merge_batches_one_heap_4_existing\n")); - debug3(printf("\nEntering Compoundpos_search with local_goal %u\n",local_goal)); - if (compoundpos->heapsize <= 0) { - debug3(printf("Returning because heapsize is %d\n",compoundpos->heapsize)); - return 0; - } + debug0(int nentries_save = nentries); - if (compoundpos->n == 4) { - while (compoundpos->heapsize > 0 && (batch = heap[1])->position < local_goal) { - debug3(printf("Compoundpos_search iteration, heapsize %d:\n",compoundpos->heapsize)); - debug3(heap_even_dump(heap,compoundpos->heapsize)); + ptr = positions = (Univcoord_T *) MALLOC_ALIGN(nentries * sizeof(Univcoord_T)); + + /* Set up heap */ + heapsize = 0; + for (i = 0; i < 4; i++) { + batch = &(batchpool[i]); + if (batch->nentries > 0) { #ifdef LARGE_GENOMES - if (batch->nentries > 0 && (((Univcoord_T) *batch->positionptr_high) << 32) + (*batch->positionptr_low) < local_goal) { - j = 1; - while (j < batch->nentries && - ((Univcoord_T) batch->positionptr_high[j] << 32) + batch->positionptr_low[j] < local_goal) { - j <<= 1; /* gallop by 2 */ - } - if (j >= batch->nentries) { - j = binary_search(j >> 1,batch->nentries,batch->positionptr_high,batch->positionptr_low,local_goal); - } else { - j = binary_search(j >> 1,j,batch->positionptr_high,batch->positionptr_low,local_goal); - } - batch->positionptr_high += j; - batch->positionptr_low += j; - batch->nentries -= j; - debug3(printf("binary search jump %d positions to %d:%u\n", - j,batch->nentries,(((Univcoord_T) *batch->positionptr_high) << 32) + (*batch->positionptr_low))); - } - batch->position = (((Univcoord_T) *batch->positionptr_high) << 32) + (*batch->positionptr_low); + batch->position = (((Univcoord_T) *batch->positionptr_high++) << 32) + (*batch->positionptr_low++); #elif defined(WORDS_BIGENDIAN) - if (batch->nentries > 0 && Bigendian_convert_univcoord(*batch->positionptr) < local_goal) { - j = 1; - while (j < batch->nentries && Bigendian_convert_univcoord(batch->positionptr[j]) < local_goal) { - j <<= 1; /* gallop by 2 */ - } - if (j >= batch->nentries) { - j = binary_search(j >> 1,batch->nentries,batch->positionptr,local_goal); - } else { - j = binary_search(j >> 1,j,batch->positionptr,local_goal); - } - batch->positionptr += j; - batch->nentries -= j; - debug3(printf("binary search jump %d positions to %d:%u\n", - j,batch->nentries,Bigendian_convert_univcoord(*batch->positionptr))); - } - batch->position = Bigendian_convert_univcoord(*batch->positionptr); + batch->position = Bigendian_convert_univcoord(*batch->positionptr++); #else - if (batch->nentries > 0 && *batch->positionptr < local_goal) { - j = 1; - while (j < batch->nentries && batch->positionptr[j] < local_goal) { - j <<= 1; /* gallop by 2 */ - } - if (j >= batch->nentries) { - j = binary_search(j >> 1,batch->nentries,batch->positionptr,local_goal); - } else { - j = binary_search(j >> 1,j,batch->positionptr,local_goal); - } - batch->positionptr += j; - batch->nentries -= j; - debug3(printf("binary search jump %d positions to %d:%u\n", - j,batch->nentries,*batch->positionptr)); - } - batch->position = *batch->positionptr; + batch->position = *batch->positionptr++; #endif - - if (batch->nentries <= 0) { - debug3(printf("top of heap found to be empty\n")); - heap[1] = batch = (compoundpos->heapsize == 1) ? - compoundpos->sentinel : heap[compoundpos->heapsize]; - heap[compoundpos->heapsize--] = compoundpos->sentinel; - } - - position = batch->position; - debug3(printf("heapify downward on %u\n",position)); - debug3(printf("Comparing right %d: %u\n",2,heap[2]->position)); - if (position <= heap[2]->position) { - debug3(printf("Inserting at 1\n")); - /* heap[1] = batch; -- not necessary because batch is already at heap[1] */ - } else { - heap[1] = heap[2]; - debug3(printf("Comparing left %d/right %d: %u and %u\n", - 3,4,heap[3]->position,heap[4]->position)); - smallesti = 4 - (heap[3]->position < heap[4]->position); - if (position <= heap[smallesti]->position) { - debug3(printf("Inserting at 2\n")); - heap[2] = batch; - } else { - debug3(printf("Inserting at %d\n",smallesti)); - heap[2] = heap[smallesti]; - heap[smallesti] = batch; - } - } - } - if (batch->position == local_goal) { - *value = batch->position; - debug3(printf("Found! Returning position %llu\n",(unsigned long long) *value)); - return 1; + heap_insert_even(heap,&heapsize,batch,batch->position); } + } - } else { - /* 16 batches */ - while (compoundpos->heapsize > 0 && (batch = heap[1])->position < local_goal) { - debug3(printf("Compoundpos_search iteration, heapsize %d:\n",compoundpos->heapsize)); - debug3(heap_even_dump(heap,compoundpos->heapsize)); + sentinel_struct.position = (Univcoord_T) -1; /* infinity */ #ifdef LARGE_GENOMES - if (batch->nentries > 0 && (((Univcoord_T) *batch->positionptr_high) << 32) + (*batch->positionptr_low) < local_goal) { - j = 1; - while (j < batch->nentries && - ((Univcoord_T) batch->positionptr_high[j] << 32) + batch->positionptr_low[j] < local_goal) { - j <<= 1; /* gallop by 2 */ - } - if (j >= batch->nentries) { - j = binary_search(j >> 1,batch->nentries,batch->positionptr_high,batch->positionptr_low,local_goal); - } else { - j = binary_search(j >> 1,j,batch->positionptr_high,batch->positionptr_low,local_goal); - } - batch->positionptr_high += j; - batch->positionptr_low += j; - batch->nentries -= j; - debug3(printf("binary search jump %d positions to %d:%u\n", - j,batch->nentries,(((Univcoord_T) *batch->positionptr_high) << 32 + (*batch->positionptr_low)))); - } - batch->position = (((Univcoord_T) *batch->positionptr_high) << 32) + (*batch->positionptr_low); -#elif defined(WORDS_BIGENDIAN) - if (batch->nentries > 0 && Bigendian_convert_univcoord(*batch->positionptr) < local_goal) { - j = 1; - while (j < batch->nentries && Bigendian_convert_univcoord(batch->positionptr[j]) < local_goal) { - j <<= 1; /* gallop by 2 */ - } - if (j >= batch->nentries) { - j = binary_search(j >> 1,batch->nentries,batch->positionptr,local_goal); - } else { - j = binary_search(j >> 1,j,batch->positionptr,local_goal); - } - batch->positionptr += j; - batch->nentries -= j; - debug3(printf("binary search jump %d positions to %d:%u\n", - j,batch->nentries,Bigendian_convert_univcoord(*batch->positionptr))); - } - batch->position = Bigendian_convert_univcoord(*batch->positionptr); + sentinel_struct.positionptr_high = &sentinel_position_high; + sentinel_struct.positionptr_low = &sentinel_position_low; #else - if (batch->nentries > 0 && *batch->positionptr < local_goal) { - j = 1; - while (j < batch->nentries && batch->positionptr[j] < local_goal) { - j <<= 1; /* gallop by 2 */ - } - if (j >= batch->nentries) { - j = binary_search(j >> 1,batch->nentries,batch->positionptr,local_goal); - } else { - j = binary_search(j >> 1,j,batch->positionptr,local_goal); - } - batch->positionptr += j; - batch->nentries -= j; - debug3(printf("binary search jump %d positions to %d:%u\n", - j,batch->nentries,*batch->positionptr)); - } - batch->position = *batch->positionptr; + sentinel_struct.positionptr = &(sentinel_struct.position); +#endif + sentinel = &sentinel_struct; + + for (i = heapsize+1; i <= 4; i++) { + heap[i] = sentinel; + } + + last_position = 0U; + while (--nentries >= 1) { + debug3(printf("nentries = %d, top of heap is %u (%d)\n", + nentries+1,heap[1]->position,heapsize)); + + /* Get minimum */ + batch = heap[1]; +#ifdef CONVERT_TO_LITTLEENDIAN + this_position = Bigendian_convert_univcoord(batch->position) + diagterm; +#else + this_position = batch->position + diagterm; +#endif + if (this_position != last_position) { + *ptr++ = this_position; + } + last_position = this_position; + + + if (--batch->nentries <= 0) { + /* Use last batch (or sentinel) in heap for insertion */ + heap[1] = batch = (heapsize == 1) ? sentinel : heap[heapsize]; + heap[heapsize--] = sentinel; + + } else { + /* Advance heap, and use this batch for insertion */ +#ifdef LARGE_GENOMES + batch->position = (((Univcoord_T) *batch->positionptr_high++) << 32) + (*batch->positionptr_low++); +#elif defined(WORDS_BIGENDIAN) + batch->position = Bigendian_convert_univcoord(*batch->positionptr++); +#else + batch->position = *batch->positionptr++; #endif + } - if (batch->nentries <= 0) { - debug3(printf("top of heap found to be empty\n")); - heap[1] = batch = (compoundpos->heapsize == 1) ? - compoundpos->sentinel : heap[compoundpos->heapsize]; - heap[compoundpos->heapsize--] = compoundpos->sentinel; - } - - position = batch->position; - debug3(printf("heapify downward on %u\n",position)); - /* Comparison 0/3 */ - debug3(printf("Comparing right %d: %u\n",2,heap[2]->position)); - if (position <= heap[2]->position) { - debug3(printf("Inserting at 1\n")); - /* heap[1] = batch; -- not necessary because batch is already at heap[1] */ + position = batch->position; + debug3(printf("starting heapify with %u\n",position)); + +#ifdef READ_THEN_WRITE + /* Comparison 0/3 */ + debug3(printf("Comparing right %d: %u\n",2,heap[2]->position)); + if (position <= heap[2]->position) { + debug3(printf("Inserting at 1\n")); + /* heap[1] = batch; -- not necessary because batch is already at heap[1] */ + } else { + /* Comparison 1/3 */ + debug3(printf("Comparing left %d/right %d: %u and %u\n", + 3,4,heap[3]->position,heap[4]->position)); + smallesti = 4 - (heap[3]->position < heap[4]->position); + if (position <= heap[smallesti]->position) { + debug3(printf("Inserting at 2\n")); + heap[1] = heap[2]; + heap[2] = batch; } else { + debug3(printf("Inserting at %d\n",smallesti)); heap[1] = heap[2]; - /* Comparison 1/3 */ - debug3(printf("Comparing left %d/right %d: %u and %u\n", - 3,4,heap[3]->position,heap[4]->position)); - smallesti = 4 - (heap[3]->position < heap[4]->position); - if (position <= heap[smallesti]->position) { - debug3(printf("Inserting at 2\n")); - heap[2] = batch; - } else { - heap[2] = heap[smallesti]; - parenti = smallesti; - smallesti <<= 1; - /* Comparison 2/3 */ - debug3(printf("Comparing left %d/right %d: %u and %u\n", - smallesti-1,smallesti,heap[smallesti-1]->position,heap[smallesti]->position)); - smallesti -= (heap[LEFTSIBLING2(smallesti)]->position < heap[smallesti]->position); - if (position <= heap[smallesti]->position) { - debug3(printf("Inserting at %d\n",parenti)); - heap[parenti] = batch; - } else { - heap[parenti] = heap[smallesti]; - parenti = smallesti; - smallesti <<= 1; - /* Comparison 3/3 */ - debug3(printf("Comparing left %d/right %d: %u and %u\n", - smallesti-1,smallesti,heap[smallesti-1]->position,heap[smallesti]->position)); - smallesti -= (heap[LEFTSIBLING2(smallesti)]->position < heap[smallesti]->position); - if (position <= heap[smallesti]->position) { - debug3(printf("Inserting at %d\n",parenti)); - heap[parenti] = batch; - } else { - heap[parenti] = heap[smallesti]; - debug3(printf("Inserting at %d\n",smallesti)); - heap[smallesti] = batch; - } - } - } + heap[2] = heap[smallesti]; + heap[smallesti] = batch; } } - if (batch->position == local_goal) { - *value = batch->position; - debug3(printf("Found! Returning position %llu\n",(unsigned long long) *value)); - return 1; + +#else + /* Comparison 0/3 */ + debug3(printf("Comparing right %d: %u\n",2,heap[2]->position)); + if (position <= heap[2]->position) { + debug3(printf("Inserting at 1\n")); + /* heap[1] = batch; -- not necessary because batch is already at heap[1] */ + } else { + heap[1] = heap[2]; + /* Comparison 1/3 */ + debug3(printf("Comparing left %d/right %d: %u and %u\n", + 3,4,heap[3]->position,heap[4]->position)); + smallesti = 4 - (heap[3]->position < heap[4]->position); + if (position <= heap[smallesti]->position) { + debug3(printf("Inserting at 2\n")); + heap[2] = batch; + } else { + heap[2] = heap[smallesti]; + heap[smallesti] = batch; + } } + +#endif } - *value = batch->position; - debug3(printf("Returning position %llu\n",(unsigned long long) *value)); - return 1; -} +#ifdef CONVERT_TO_LITTLEENDIAN + this_position = Bigendian_convert_univcoord(heap[1]->position) + diagterm; +#else + this_position = heap[1]->position + diagterm; +#endif + if (this_position != last_position) { + *ptr++ = this_position; + } + + *nmerged = (ptr - positions); + +#if 0 + position = positions[0]; + for (i = 1; i < nentries_save; i++) { + if (positions[i] <= position) { + abort(); + } + position = positions[i]; + } +#endif + debug0( + for (i = 0; i < nentries_save; i++) { + printf("%u\n",positions[i]); + } + printf("\n"); + ) + + + return positions; +} +/* Called only by Spanningelt_diagonals */ +/* Note: Result has to be on a SIMD boundary (16-byte for SSE2, 32-byte for AVX2, 64-byte for AVX512) for Merge_uint4 to work */ Univcoord_T * Indexdb_merge_compoundpos (int *nmerged, Compoundpos_T compoundpos, int diagterm) { int i; @@ -1848,6 +1859,413 @@ } } +#elif defined(USE_REGISTER) + +#define KEY_MASK (~0U << 2) + +/* Without diagterm */ +static Univcoord_T * +merge_via_register (int *nmerged, unsigned int **positions, int *npositions) { + Univcoord_T *results, *ptr; + int ptrs[4]; + unsigned int diagonal; + __m128i queue, next, max, cmp; + int cmpflags; + unsigned int streami; + int j, i; + + __m128i shuffle_control[4]; + unsigned int sorter[4], curr; + + /* Initialize shuffle_control */ + shuffle_control[0] = _mm_set_epi8(0xF,0xE,0xD,0xC, 0xB,0xA,0x9,0x8, 0x7,0x6,0x5,0x4, 0x3,0x2,0x1,0x0); + shuffle_control[1] = _mm_set_epi8(0xF,0xE,0xD,0xC, 0xB,0xA,0x9,0x8, 0x3,0x2,0x1,0x0, 0x7,0x6,0x5,0x4); + shuffle_control[2] = _mm_set_epi8(0xF,0xE,0xD,0xC, 0x3,0x2,0x1,0x0, 0xB,0xA,0x9,0x8, 0x7,0x6,0x5,0x4); + shuffle_control[3] = _mm_set_epi8(0x3,0x2,0x1,0x0, 0xF,0xE,0xD,0xC, 0xB,0xA,0x9,0x8, 0x7,0x6,0x5,0x4); + + + debug(printf("merge_compoundpos, sizes:")); + + *nmerged = 0; + *nmerged += npositions[0]; + *nmerged += npositions[1]; + *nmerged += npositions[2]; + *nmerged += npositions[3]; + + if (*nmerged == 0) { + return (unsigned int *) NULL; + } else { + ptr = results = (unsigned int *) MALLOC_ALIGN((*nmerged) * sizeof(unsigned int)); + } + + /* Initialize queue with top of each stream (plus streami). Use an insertion sort. */ + memset(ptrs,0,4*sizeof(int)); + if (ptrs[0] >= npositions[0]) { + sorter[0] = -1U; + } else { + sorter[0] = (positions[0][0] & KEY_MASK) + 0; + } + + if (ptrs[1] >= npositions[1]) { + sorter[1] = -1U; + } else { + sorter[1] = (positions[1][0] & KEY_MASK) + 1; + } + + if (ptrs[2] >= npositions[2]) { + sorter[2] = -1U; + } else { + sorter[2] = (positions[2][0] & KEY_MASK) + 2; + } + + if (ptrs[3] >= npositions[3]) { + sorter[3] = -1U; + } else { + sorter[3] = (positions[3][0] & KEY_MASK) + 3; + } + + + for (j = 1; j < 4; j++) { + curr = sorter[j]; + i = j - 1; + while (i >= 0 && sorter[i] > curr) { + sorter[i+1] = sorter[i]; + i--; + } + sorter[i+1] = curr; + } + + queue = _mm_setr_epi32(sorter[0],sorter[1],sorter[2],sorter[3]); + + while ((diagonal = _mm_extract_epi32(queue,0)) < -1U) { + /* Get the stream from the coded diagonal */ + streami = diagonal & ~KEY_MASK; + + /* Write the true diagonal from that stream */ + *ptr++ = positions[streami][ptrs[streami]++] /*+ diagterm*/; + + /* Obtain next value from that stream and encode */ + if (ptrs[streami] >= npositions[streami]) { + diagonal = -1U; + } else { + diagonal = (positions[streami][ptrs[streami]] & KEY_MASK) + streami; + } + + /* Determine where to insert into queue */ + next = _mm_set1_epi32(diagonal); + max = _mm_max_epu32(next,queue); + cmp = _mm_cmpeq_epi32(max,next); + cmpflags = _mm_movemask_epi8(cmp); + + /* Update queue */ + queue = _mm_insert_epi32(queue,diagonal,0); + queue = _mm_shuffle_epi8(queue,shuffle_control[7 - __builtin_clz(cmpflags)/4]); + } + + return results; +} + +static Univcoord_T * +merge_via_register_diagterm (int *nmerged, unsigned int **positions, int *npositions, int diagterm) { + Univcoord_T *results, *ptr; + int ptrs[4]; + unsigned int diagonal; + __m128i queue, next, max, cmp; + int cmpflags; + unsigned int streami; + int j, i; + + __m128i shuffle_control[4]; + unsigned int sorter[4], curr; + + /* Initialize shuffle_control */ + shuffle_control[0] = _mm_set_epi8(0xF,0xE,0xD,0xC, 0xB,0xA,0x9,0x8, 0x7,0x6,0x5,0x4, 0x3,0x2,0x1,0x0); + shuffle_control[1] = _mm_set_epi8(0xF,0xE,0xD,0xC, 0xB,0xA,0x9,0x8, 0x3,0x2,0x1,0x0, 0x7,0x6,0x5,0x4); + shuffle_control[2] = _mm_set_epi8(0xF,0xE,0xD,0xC, 0x3,0x2,0x1,0x0, 0xB,0xA,0x9,0x8, 0x7,0x6,0x5,0x4); + shuffle_control[3] = _mm_set_epi8(0x3,0x2,0x1,0x0, 0xF,0xE,0xD,0xC, 0xB,0xA,0x9,0x8, 0x7,0x6,0x5,0x4); + + + debug(printf("merge_compoundpos, sizes:")); + + *nmerged = 0; + *nmerged += npositions[0]; + *nmerged += npositions[1]; + *nmerged += npositions[2]; + *nmerged += npositions[3]; + + if (*nmerged == 0) { + results = (unsigned int *) MALLOC_ALIGN(sizeof(unsigned int)); + } else { + ptr = results = (unsigned int *) MALLOC_ALIGN((*nmerged) * sizeof(unsigned int)); + } + + /* Initialize queue with top of each stream (plus streami). Use an insertion sort. */ + memset(ptrs,0,4*sizeof(int)); + if (ptrs[0] >= npositions[0]) { + sorter[0] = -1U; + } else { + sorter[0] = (positions[0][0] & KEY_MASK) + 0; + } + + if (ptrs[1] >= npositions[1]) { + sorter[1] = -1U; + } else { + sorter[1] = (positions[1][0] & KEY_MASK) + 1; + } + + if (ptrs[2] >= npositions[2]) { + sorter[2] = -1U; + } else { + sorter[2] = (positions[2][0] & KEY_MASK) + 2; + } + + if (ptrs[3] >= npositions[3]) { + sorter[3] = -1U; + } else { + sorter[3] = (positions[3][0] & KEY_MASK) + 3; + } + + + for (j = 1; j < 4; j++) { + curr = sorter[j]; + i = j - 1; + while (i >= 0 && sorter[i] > curr) { + sorter[i+1] = sorter[i]; + i--; + } + sorter[i+1] = curr; + } + + queue = _mm_setr_epi32(sorter[0],sorter[1],sorter[2],sorter[3]); + + while ((diagonal = _mm_extract_epi32(queue,0)) < -1U) { + /* Get the stream from the coded diagonal */ + streami = diagonal & ~KEY_MASK; + + /* Write the true diagonal from that stream */ + *ptr++ = positions[streami][ptrs[streami]++] + diagterm; + + /* Obtain next value from that stream and encode */ + if (ptrs[streami] >= npositions[streami]) { + diagonal = -1U; + } else { + diagonal = (positions[streami][ptrs[streami]] & KEY_MASK) + streami; + } + + /* Determine where to insert into queue */ + next = _mm_set1_epi32(diagonal); + max = _mm_max_epu32(next,queue); + cmp = _mm_cmpeq_epi32(max,next); + cmpflags = _mm_movemask_epi8(cmp); + + /* Update queue */ + queue = _mm_insert_epi32(queue,diagonal,0); + queue = _mm_shuffle_epi8(queue,shuffle_control[7 - __builtin_clz(cmpflags)/4]); + } + + return results; +} + + +/* Called only by Spanningelt_diagonals */ +/* SIMD register version (eventually need to pad just 1) */ +/* compoundpos->positions set by Indexdb_read_inplace, so we have to allocate */ +Univcoord_T * +Indexdb_merge_compoundpos (int *nmerged, Compoundpos_T compoundpos, int diagterm) { + Univcoord_T *results, curr; + Univcoord_T *part[4]; + int npart[4]; + int j, i; + + debug(printf("merge_compoundpos, sizes:")); + + if (compoundpos->n == 4) { + results = merge_via_register_diagterm(&(*nmerged),&(compoundpos->positions[0]),&(compoundpos->npositions[0]),diagterm); + + } else { + part[0] = merge_via_register(&(npart[0]),&(compoundpos->positions[0]),&(compoundpos->npositions[0])); + part[1] = merge_via_register(&(npart[1]),&(compoundpos->positions[4]),&(compoundpos->npositions[4])); + part[2] = merge_via_register(&(npart[2]),&(compoundpos->positions[8]),&(compoundpos->npositions[8])); + part[3] = merge_via_register(&(npart[3]),&(compoundpos->positions[12]),&(compoundpos->npositions[12])); + + results = merge_via_register_diagterm(&(*nmerged),&(part[0]),&(npart[0]),diagterm); + + FREE(part[3]); + FREE(part[2]); + FREE(part[1]); + FREE(part[0]); + } + + /* Final insertion sort to correct for truncation of keys */ + for (j = 1; j < *nmerged; j++) { + curr = results[j]; + i = j - 1; + /* For a stable merge sort, is the second condition possible? */ + while (i >= 0 && results[i] > curr) { + results[i+1] = results[i]; + i--; + } + results[i+1] = curr; + } + + return results; +} + +#else +/* SIMD merge version */ + +#define LEFT(i) (i << 1) +#define RIGHT(i) ((i << 1) | 1) + +Univcoord_T * +Indexdb_merge_compoundpos (int *nmerged, Compoundpos_T compoundpos, int diagterm) { + Univcoord_T *results; + int i, heapi, lefti, righti, k; + unsigned int *heap[32]; + int nelts[32]; + int nalloc, npadded; + UINT4 *prev_storage, *curr_storage; + + debug(printf("merge_compoundpos, sizes:")); + + if (compoundpos->n == 4) { + nelts[7] = compoundpos->npositions[3]; + nelts[6] = compoundpos->npositions[2]; + nelts[5] = compoundpos->npositions[1]; + nelts[4] = compoundpos->npositions[0]; + debug(printf(" %d %d %d %d\n",nelts[4],nelts[5],nelts[6],nelts[7])); + + nelts[3] = nelts[6] + nelts[7]; + nelts[2] = nelts[4] + nelts[5]; + + npadded = PAD_UINT4(nelts[2]) + PAD_UINT4(nelts[3]) + PAD_UINT4(nelts[4]) + PAD_UINT4(nelts[5]) + PAD_UINT4(nelts[6]) + PAD_UINT4(nelts[7]); + prev_storage = (UINT4 *) MALLOC_ALIGN(npadded * sizeof(UINT4)); + nalloc = 0; + heap[2] = &(prev_storage[nalloc]); nalloc += PAD_UINT4(nelts[2]); + heap[3] = &(prev_storage[nalloc]); nalloc += PAD_UINT4(nelts[3]); + heap[4] = &(prev_storage[nalloc]); nalloc += PAD_UINT4(nelts[4]); + heap[5] = &(prev_storage[nalloc]); nalloc += PAD_UINT4(nelts[5]); + heap[6] = &(prev_storage[nalloc]); nalloc += PAD_UINT4(nelts[6]); + heap[7] = &(prev_storage[nalloc]); + + memcpy(heap[4],compoundpos->positions[0],nelts[4]*sizeof(UINT4)); + memcpy(heap[5],compoundpos->positions[1],nelts[5]*sizeof(UINT4)); + Merge_uint4(/*dest*/heap[2],heap[4],heap[5],nelts[4],nelts[5]); + + memcpy(heap[6],compoundpos->positions[2],nelts[6]*sizeof(UINT4)); + memcpy(heap[7],compoundpos->positions[3],nelts[7]*sizeof(UINT4)); + Merge_uint4(/*dest*/heap[3],heap[6],heap[7],nelts[6],nelts[7]); + + heap[1] = Merge_uint4(/*dest*/NULL,heap[2],heap[3],nelts[2],nelts[3]); + *nmerged = nelts[2] + nelts[3]; + +#if defined(HAVE_SSE4_1) + /* Spanningelt procedure is not prepared for memory from _mm_malloc */ + if (*nmerged == 0) { + results = (unsigned int *) NULL; + } else { + results = (unsigned int *) MALLOC_ALIGN((*nmerged) * sizeof(unsigned int)); + memcpy(results,heap[1],(*nmerged) * sizeof(unsigned int)); + } + + _mm_free(heap[1]); + /* _mm_free(prev_storage); */ +#else + results = heap[1]; + /* FREE(prev_storage); */ +#endif + FREE_ALIGN(prev_storage); + + + for (i = 0; i < *nmerged; i++) { + results[i] += diagterm; + } + + CHECK_ALIGN(results); + return results; + + } else { + npadded = 0; + for (heapi = 16; heapi < 32; heapi++) { + nelts[heapi] = compoundpos->npositions[heapi-16]; + npadded += PAD_UINT4(nelts[heapi]); + } + + prev_storage = (UINT4 *) MALLOC_ALIGN(npadded * sizeof(UINT4)); + nalloc = 0; + for (heapi = 16; heapi < 32; heapi++) { + heap[heapi] = &(prev_storage[nalloc]); + memcpy(heap[heapi],compoundpos->positions[heapi-16],nelts[heapi]*sizeof(UINT4)); + nalloc += PAD_UINT4(nelts[heapi]); + } + + debug(printf(" %d %d %d %d %d %d %d %d %d %d %d %d %d %d %d %d\n", + nelts[16],nelts[17],nelts[18],nelts[19], + nelts[20],nelts[21],nelts[22],nelts[23], + nelts[24],nelts[25],nelts[26],nelts[27], + nelts[28],nelts[29],nelts[30],nelts[31])); + debug(printf("npadded = %d\n",npadded)); + + + curr_storage = (UINT4 *) MALLOC_ALIGN(npadded * sizeof(UINT4)); + nalloc = 0; + for (heapi = 8; heapi < 16; heapi++) { + heap[heapi] = &(curr_storage[nalloc]); + lefti = LEFT(heapi); + righti = RIGHT(heapi); + Merge_uint4(/*dest*/heap[heapi],heap[lefti],heap[righti],nelts[lefti],nelts[righti]); + nelts[heapi] = nelts[lefti] + nelts[righti]; + nalloc += PAD_UINT4(nelts[heapi]); + } + + nalloc = 0; + for (heapi = 4; heapi < 8; heapi++) { + heap[heapi] = &(prev_storage[nalloc]); + lefti = LEFT(heapi); + righti = RIGHT(heapi); + Merge_uint4(/*dest*/heap[heapi],heap[lefti],heap[righti],nelts[lefti],nelts[righti]); + nelts[heapi] = nelts[lefti] + nelts[righti]; + nalloc += PAD_UINT4(nelts[heapi]); + } + + heap[2] = &(curr_storage[0]); + Merge_uint4(/*dest*/heap[2],heap[4],heap[5],nelts[4],nelts[5]); + nelts[2] = nelts[4] + nelts[5]; + heap[3] = &(curr_storage[PAD_UINT4(nelts[2])]); + Merge_uint4(/*dest*/heap[3],heap[6],heap[7],nelts[6],nelts[7]); + nelts[3] = nelts[6] + nelts[7]; + + heap[1] = &(prev_storage[0]); + Merge_uint4(/*dest*/heap[1],heap[2],heap[3],nelts[2],nelts[3]); + *nmerged = nelts[2] + nelts[3]; + +#if defined(HAVE_SSE4_1) + /* Spanningelt procedure is not prepared for memory from _mm_malloc */ + if (*nmerged == 0) { + results = (unsigned int *) NULL; + } else { + results = (unsigned int *) MALLOC_ALIGN((*nmerged) * sizeof(unsigned int)); + memcpy(results,heap[1],(*nmerged) * sizeof(unsigned int)); + } + + _mm_free(prev_storage); + /* _mm_free(curr_storage); */ +#else + results = heap[1]; + /* FREE(curr_storage); */ +#endif + FREE_ALIGN(curr_storage); + + for (k = 0; k < *nmerged; k++) { + results[k] += diagterm; + } + + CHECK_ALIGN(results); + return results; + } +} + +#endif /* Should be the same as count_one_shift(this,oligo,1) */ diff -Nru gmap-2016-11-07/src/Makefile.am gmap-2017-01-14/src/Makefile.am --- gmap-2016-11-07/src/Makefile.am 2016-11-08 01:14:55.000000000 +0000 +++ gmap-2017-01-14/src/Makefile.am 2017-01-13 23:46:00.000000000 +0000 @@ -45,6 +45,12 @@ bin_PROGRAMS += gsnap.avx2 bin_PROGRAMS += gsnapl.avx2 endif +if MAKE_AVX512 + bin_PROGRAMS += gmap.avx512 + bin_PROGRAMS += gmapl.avx512 + bin_PROGRAMS += gsnap.avx512 + bin_PROGRAMS += gsnapl.avx512 +endif CPUID_FILES = bool.h cpuid.c cpuid.h @@ -98,6 +104,7 @@ popcount.c popcount.h genome128_hr.c genome128_hr.h genome_sites.c genome_sites.h \ genome-write.c genome-write.h \ bitpack64-read.c bitpack64-read.h bitpack64-readtwo.c bitpack64-readtwo.h \ + merge.c merge.h \ indexdbdef.h indexdb.c indexdb.h indexdb_hr.c indexdb_hr.h \ oligo.c oligo.h block.c block.h \ chrom.c chrom.h segmentpos.c segmentpos.h \ @@ -160,6 +167,12 @@ gmap_avx2_LDADD = $(PTHREAD_LIBS) $(ZLIB_LIBS) $(BZLIB_LIBS) dist_gmap_avx2_SOURCES = $(GMAP_FILES) +gmap_avx512_CC = $(PTHREAD_CC) +gmap_avx512_CFLAGS = $(AM_CFLAGS) $(PTHREAD_CFLAGS) -DTARGET=\"$(target)\" -DGMAPDB=\"$(GMAPDB)\" $(POPCNT_CFLAGS) -DHAVE_SSE2=1 -DHAVE_SSSE3=1 -DHAVE_SSE4_1=1 -DHAVE_SSE4_2=1 -DHAVE_AVX2=1 -DHAVE_AVX512=1 $(SIMD_AVX512_CFLAGS) +gmap_avx512_LDFLAGS = $(AM_LDFLAGS) $(STATIC_LDFLAG) +gmap_avx512_LDADD = $(PTHREAD_LIBS) $(ZLIB_LIBS) $(BZLIB_LIBS) +dist_gmap_avx512_SOURCES = $(GMAP_FILES) + GMAPL_FILES = fopen.h bool.h types.h separator.h comp.h \ @@ -176,6 +189,7 @@ popcount.c popcount.h genome128_hr.c genome128_hr.h genome_sites.c genome_sites.h \ genome-write.c genome-write.h \ bitpack64-read.c bitpack64-read.h bitpack64-readtwo.c bitpack64-readtwo.h \ + merge.c merge.h \ indexdbdef.h indexdb.c indexdb.h indexdb_hr.c indexdb_hr.h \ oligo.c oligo.h block.c block.h \ chrom.c chrom.h segmentpos.c segmentpos.h \ @@ -238,6 +252,12 @@ gmapl_avx2_LDADD = $(PTHREAD_LIBS) $(ZLIB_LIBS) $(BZLIB_LIBS) dist_gmapl_avx2_SOURCES = $(GMAPL_FILES) +gmapl_avx512_CC = $(PTHREAD_CC) +gmapl_avx512_CFLAGS = $(AM_CFLAGS) $(PTHREAD_CFLAGS) -DTARGET=\"$(target)\" -DGMAPDB=\"$(GMAPDB)\" -DLARGE_GENOMES=1 $(POPCNT_CFLAGS) -DHAVE_SSE2=1 -DHAVE_SSSE3=1 -DHAVE_SSE4_1=1 -DHAVE_SSE4_2=1 -DHAVE_AVX2=1 -DHAVE_AVX512=1 $(SIMD_AVX512_CFLAGS) +gmapl_avx512_LDFLAGS = $(AM_LDFLAGS) $(STATIC_LDFLAG) +gmapl_avx512_LDADD = $(PTHREAD_LIBS) $(ZLIB_LIBS) $(BZLIB_LIBS) +dist_gmapl_avx512_SOURCES = $(GMAPL_FILES) + GSNAP_FILES = fopen.h bool.h types.h separator.h comp.h \ except.c except.h assert.c assert.h mem.c mem.h \ @@ -253,6 +273,7 @@ genome.c genome.h \ popcount.c popcount.h genome128_hr.c genome128_hr.h genome_sites.c genome_sites.h \ bitpack64-read.c bitpack64-read.h bitpack64-readtwo.c bitpack64-readtwo.h \ + merge.c merge.h \ indexdbdef.h indexdb.c indexdb.h indexdb_hr.c indexdb_hr.h \ oligo.c oligo.h \ chrom.c chrom.h segmentpos.c segmentpos.h \ @@ -272,7 +293,7 @@ splice.c splice.h indel.c indel.h bitpack64-access.c bitpack64-access.h \ bytecoding.c bytecoding.h \ univdiagdef.h univdiag.c univdiag.h sedgesort.c sedgesort.h sarray-read.c sarray-read.h \ - stage1hr.c stage1hr.h \ + merge-heap.c merge-heap.h stage1hr.c stage1hr.h \ request.c request.h resulthr.c resulthr.h output.c output.h \ inbuffer.c inbuffer.h samheader.c samheader.h outbuffer.c outbuffer.h \ datadir.c datadir.h mode.h parserange.c parserange.h \ @@ -318,6 +339,12 @@ gsnap_avx2_LDADD = $(PTHREAD_LIBS) $(ZLIB_LIBS) $(BZLIB_LIBS) dist_gsnap_avx2_SOURCES = $(GSNAP_FILES) +gsnap_avx512_CC = $(PTHREAD_CC) +gsnap_avx512_CFLAGS = $(AM_CFLAGS) $(PTHREAD_CFLAGS) -DTARGET=\"$(target)\" -DGMAPDB=\"$(GMAPDB)\" -DMAX_STACK_READLENGTH=$(MAX_STACK_READLENGTH) -DGSNAP=1 $(POPCNT_CFLAGS) -DHAVE_SSE2=1 -DHAVE_SSSE3=1 -DHAVE_SSE4_1=1 -DHAVE_SSE4_2=1 -DHAVE_AVX2=1 -DHAVE_AVX512=1 $(SIMD_AVX512_CFLAGS) +gsnap_avx512_LDFLAGS = $(AM_LDFLAGS) $(STATIC_LDFLAG) +gsnap_avx512_LDADD = $(PTHREAD_LIBS) $(ZLIB_LIBS) $(BZLIB_LIBS) +dist_gsnap_avx512_SOURCES = $(GSNAP_FILES) + @@ -335,6 +362,7 @@ genome.c genome.h \ popcount.c popcount.h genome128_hr.c genome128_hr.h genome_sites.c genome_sites.h \ bitpack64-read.c bitpack64-read.h bitpack64-readtwo.c bitpack64-readtwo.h \ + merge.c merge.h \ indexdbdef.h indexdb.c indexdb.h indexdb_hr.c indexdb_hr.h \ oligo.c oligo.h \ chrom.c chrom.h segmentpos.c segmentpos.h \ @@ -352,7 +380,7 @@ chimera.c chimera.h sense.h fastlog.h stage3.c stage3.h \ splicestringpool.c splicestringpool.h splicetrie_build.c splicetrie_build.h splicetrie.c splicetrie.h \ splice.c splice.h indel.c indel.h bitpack64-access.c bitpack64-access.h \ - stage1hr.c stage1hr.h \ + merge-heap.c merge-heap.h stage1hr.c stage1hr.h \ request.c request.h resulthr.c resulthr.h output.c output.h \ inbuffer.c inbuffer.h samheader.c samheader.h outbuffer.c outbuffer.h \ datadir.c datadir.h mode.h parserange.c parserange.h \ @@ -397,6 +425,12 @@ gsnapl_avx2_LDADD = $(PTHREAD_LIBS) $(ZLIB_LIBS) $(BZLIB_LIBS) dist_gsnapl_avx2_SOURCES = $(GSNAPL_FILES) +gsnapl_avx512_CC = $(PTHREAD_CC) +gsnapl_avx512_CFLAGS = $(AM_CFLAGS) $(PTHREAD_CFLAGS) -DTARGET=\"$(target)\" -DGMAPDB=\"$(GMAPDB)\" -DMAX_STACK_READLENGTH=$(MAX_STACK_READLENGTH) -DGSNAP=1 -DLARGE_GENOMES=1 $(POPCNT_CFLAGS) -DHAVE_SSE2=1 -DHAVE_SSSE3=1 -DHAVE_SSE4_1=1 -DHAVE_SSE4_2=1 -DHAVE_AVX2=1 -DHAVE_AVX512=1 $(SIMD_AVX512_CFLAGS) +gsnapl_avx512_LDFLAGS = $(AM_LDFLAGS) $(STATIC_LDFLAG) +gsnapl_avx512_LDADD = $(PTHREAD_LIBS) $(ZLIB_LIBS) $(BZLIB_LIBS) +dist_gsnapl_avx512_SOURCES = $(GSNAPL_FILES) + # Build as a non-SIMD program UNIQSCAN_FILES = fopen.h bool.h types.h separator.h comp.h \ @@ -412,6 +446,7 @@ genome.c genome.h \ popcount.c popcount.h genome128_hr.c genome128_hr.h genome_sites.c genome_sites.h \ bitpack64-read.c bitpack64-read.h bitpack64-readtwo.c bitpack64-readtwo.h \ + merge.c merge.h \ indexdbdef.h indexdb.c indexdb.h indexdb_hr.c indexdb_hr.h \ oligo.c oligo.h \ chrom.c chrom.h segmentpos.c segmentpos.h \ @@ -431,7 +466,7 @@ splice.c splice.h indel.c indel.h bitpack64-access.c bitpack64-access.h \ bytecoding.c bytecoding.h \ univdiagdef.h univdiag.c univdiag.h sedgesort.c sedgesort.h sarray-read.c sarray-read.h \ - stage1hr.c stage1hr.h resulthr.c resulthr.h \ + merge-heap.c merge-heap.h stage1hr.c stage1hr.h resulthr.c resulthr.h \ datadir.c datadir.h mode.h parserange.c parserange.h \ getopt.c getopt1.c getopt.h uniqscan.c @@ -456,6 +491,7 @@ genome.c genome.h \ popcount.c popcount.h genome128_hr.c genome128_hr.h genome_sites.c genome_sites.h \ bitpack64-read.c bitpack64-read.h bitpack64-readtwo.c bitpack64-readtwo.h \ + merge.c merge.h \ indexdbdef.h indexdb.c indexdb.h indexdb_hr.c indexdb_hr.h \ oligo.c oligo.h \ chrom.c chrom.h segmentpos.c segmentpos.h \ @@ -473,7 +509,7 @@ chimera.c chimera.h sense.h fastlog.h stage3.c stage3.h \ splicestringpool.c splicestringpool.h splicetrie_build.c splicetrie_build.h splicetrie.c splicetrie.h \ splice.c splice.h indel.c indel.h bitpack64-access.c bitpack64-access.h \ - stage1hr.c stage1hr.h resulthr.c resulthr.h \ + merge-heap.c merge-heap.h stage1hr.c stage1hr.h resulthr.c resulthr.h \ datadir.c datadir.h mode.h parserange.c parserange.h \ getopt.c getopt1.c getopt.h uniqscan.c diff -Nru gmap-2016-11-07/src/Makefile.in gmap-2017-01-14/src/Makefile.in --- gmap-2016-11-07/src/Makefile.in 2016-11-08 01:15:36.000000000 +0000 +++ gmap-2017-01-14/src/Makefile.in 2017-01-13 23:46:49.000000000 +0000 @@ -97,7 +97,7 @@ sam_sort$(EXEEXT) gmap.nosimd$(EXEEXT) gmapl.nosimd$(EXEEXT) \ gsnap.nosimd$(EXEEXT) gsnapl.nosimd$(EXEEXT) $(am__EXEEXT_1) \ $(am__EXEEXT_2) $(am__EXEEXT_3) $(am__EXEEXT_4) \ - $(am__EXEEXT_5) + $(am__EXEEXT_5) $(am__EXEEXT_6) @MAKE_SSE2_TRUE@am__append_1 = gmap.sse2 gmapl.sse2 gsnap.sse2 \ @MAKE_SSE2_TRUE@ gsnapl.sse2 @MAKE_SSSE3_TRUE@am__append_2 = gmap.ssse3 gmapl.ssse3 gsnap.ssse3 \ @@ -108,6 +108,8 @@ @MAKE_SSE4_2_TRUE@ gsnapl.sse42 @MAKE_AVX2_TRUE@am__append_5 = gmap.avx2 gmapl.avx2 gsnap.avx2 \ @MAKE_AVX2_TRUE@ gsnapl.avx2 +@MAKE_AVX512_TRUE@am__append_6 = gmap.avx512 gmapl.avx512 gsnap.avx512 \ +@MAKE_AVX512_TRUE@ gsnapl.avx512 subdir = src ACLOCAL_M4 = $(top_srcdir)/aclocal.m4 am__aclocal_m4_deps = $(top_srcdir)/config/libtool.m4 \ @@ -153,6 +155,9 @@ @MAKE_SSE4_2_TRUE@ gsnapl.sse42$(EXEEXT) @MAKE_AVX2_TRUE@am__EXEEXT_5 = gmap.avx2$(EXEEXT) gmapl.avx2$(EXEEXT) \ @MAKE_AVX2_TRUE@ gsnap.avx2$(EXEEXT) gsnapl.avx2$(EXEEXT) +@MAKE_AVX512_TRUE@am__EXEEXT_6 = gmap.avx512$(EXEEXT) \ +@MAKE_AVX512_TRUE@ gmapl.avx512$(EXEEXT) gsnap.avx512$(EXEEXT) \ +@MAKE_AVX512_TRUE@ gsnapl.avx512$(EXEEXT) am__installdirs = "$(DESTDIR)$(bindir)" PROGRAMS = $(bin_PROGRAMS) am__objects_1 = atoiindex-except.$(OBJEXT) atoiindex-assert.$(OBJEXT) \ @@ -280,15 +285,16 @@ gmap_avx2-genome-write.$(OBJEXT) \ gmap_avx2-bitpack64-read.$(OBJEXT) \ gmap_avx2-bitpack64-readtwo.$(OBJEXT) \ - gmap_avx2-indexdb.$(OBJEXT) gmap_avx2-indexdb_hr.$(OBJEXT) \ - gmap_avx2-oligo.$(OBJEXT) gmap_avx2-block.$(OBJEXT) \ - gmap_avx2-chrom.$(OBJEXT) gmap_avx2-segmentpos.$(OBJEXT) \ - gmap_avx2-chrnum.$(OBJEXT) gmap_avx2-uinttable.$(OBJEXT) \ - gmap_avx2-gregion.$(OBJEXT) gmap_avx2-match.$(OBJEXT) \ - gmap_avx2-matchpool.$(OBJEXT) gmap_avx2-diagnostic.$(OBJEXT) \ - gmap_avx2-stage1.$(OBJEXT) gmap_avx2-diag.$(OBJEXT) \ - gmap_avx2-diagpool.$(OBJEXT) gmap_avx2-cmet.$(OBJEXT) \ - gmap_avx2-atoi.$(OBJEXT) gmap_avx2-orderstat.$(OBJEXT) \ + gmap_avx2-merge.$(OBJEXT) gmap_avx2-indexdb.$(OBJEXT) \ + gmap_avx2-indexdb_hr.$(OBJEXT) gmap_avx2-oligo.$(OBJEXT) \ + gmap_avx2-block.$(OBJEXT) gmap_avx2-chrom.$(OBJEXT) \ + gmap_avx2-segmentpos.$(OBJEXT) gmap_avx2-chrnum.$(OBJEXT) \ + gmap_avx2-uinttable.$(OBJEXT) gmap_avx2-gregion.$(OBJEXT) \ + gmap_avx2-match.$(OBJEXT) gmap_avx2-matchpool.$(OBJEXT) \ + gmap_avx2-diagnostic.$(OBJEXT) gmap_avx2-stage1.$(OBJEXT) \ + gmap_avx2-diag.$(OBJEXT) gmap_avx2-diagpool.$(OBJEXT) \ + gmap_avx2-cmet.$(OBJEXT) gmap_avx2-atoi.$(OBJEXT) \ + gmap_avx2-orderstat.$(OBJEXT) \ gmap_avx2-oligoindex_hr.$(OBJEXT) gmap_avx2-intron.$(OBJEXT) \ gmap_avx2-maxent.$(OBJEXT) gmap_avx2-maxent_hr.$(OBJEXT) \ gmap_avx2-pair.$(OBJEXT) gmap_avx2-pairpool.$(OBJEXT) \ @@ -317,7 +323,72 @@ gmap_avx2_LINK = $(LIBTOOL) $(AM_V_lt) --tag=CC $(AM_LIBTOOLFLAGS) \ $(LIBTOOLFLAGS) --mode=link $(CCLD) $(gmap_avx2_CFLAGS) \ $(CFLAGS) $(gmap_avx2_LDFLAGS) $(LDFLAGS) -o $@ -am__objects_7 = gmap_nosimd-except.$(OBJEXT) \ +am__objects_7 = gmap_avx512-except.$(OBJEXT) \ + gmap_avx512-assert.$(OBJEXT) gmap_avx512-mem.$(OBJEXT) \ + gmap_avx512-intlist.$(OBJEXT) gmap_avx512-list.$(OBJEXT) \ + gmap_avx512-littleendian.$(OBJEXT) \ + gmap_avx512-bigendian.$(OBJEXT) \ + gmap_avx512-univinterval.$(OBJEXT) \ + gmap_avx512-interval.$(OBJEXT) gmap_avx512-uintlist.$(OBJEXT) \ + gmap_avx512-stopwatch.$(OBJEXT) \ + gmap_avx512-semaphore.$(OBJEXT) gmap_avx512-access.$(OBJEXT) \ + gmap_avx512-filestring.$(OBJEXT) \ + gmap_avx512-iit-read-univ.$(OBJEXT) \ + gmap_avx512-iit-read.$(OBJEXT) gmap_avx512-md5.$(OBJEXT) \ + gmap_avx512-bzip2.$(OBJEXT) gmap_avx512-sequence.$(OBJEXT) \ + gmap_avx512-reader.$(OBJEXT) gmap_avx512-genomicpos.$(OBJEXT) \ + gmap_avx512-compress.$(OBJEXT) \ + gmap_avx512-compress-write.$(OBJEXT) \ + gmap_avx512-gbuffer.$(OBJEXT) gmap_avx512-genome.$(OBJEXT) \ + gmap_avx512-popcount.$(OBJEXT) \ + gmap_avx512-genome128_hr.$(OBJEXT) \ + gmap_avx512-genome_sites.$(OBJEXT) \ + gmap_avx512-genome-write.$(OBJEXT) \ + gmap_avx512-bitpack64-read.$(OBJEXT) \ + gmap_avx512-bitpack64-readtwo.$(OBJEXT) \ + gmap_avx512-merge.$(OBJEXT) gmap_avx512-indexdb.$(OBJEXT) \ + gmap_avx512-indexdb_hr.$(OBJEXT) gmap_avx512-oligo.$(OBJEXT) \ + gmap_avx512-block.$(OBJEXT) gmap_avx512-chrom.$(OBJEXT) \ + gmap_avx512-segmentpos.$(OBJEXT) gmap_avx512-chrnum.$(OBJEXT) \ + gmap_avx512-uinttable.$(OBJEXT) gmap_avx512-gregion.$(OBJEXT) \ + gmap_avx512-match.$(OBJEXT) gmap_avx512-matchpool.$(OBJEXT) \ + gmap_avx512-diagnostic.$(OBJEXT) gmap_avx512-stage1.$(OBJEXT) \ + gmap_avx512-diag.$(OBJEXT) gmap_avx512-diagpool.$(OBJEXT) \ + gmap_avx512-cmet.$(OBJEXT) gmap_avx512-atoi.$(OBJEXT) \ + gmap_avx512-orderstat.$(OBJEXT) \ + gmap_avx512-oligoindex_hr.$(OBJEXT) \ + gmap_avx512-intron.$(OBJEXT) gmap_avx512-maxent.$(OBJEXT) \ + gmap_avx512-maxent_hr.$(OBJEXT) gmap_avx512-pair.$(OBJEXT) \ + gmap_avx512-pairpool.$(OBJEXT) gmap_avx512-cellpool.$(OBJEXT) \ + gmap_avx512-stage2.$(OBJEXT) gmap_avx512-doublelist.$(OBJEXT) \ + gmap_avx512-smooth.$(OBJEXT) \ + gmap_avx512-splicestringpool.$(OBJEXT) \ + gmap_avx512-splicetrie_build.$(OBJEXT) \ + gmap_avx512-splicetrie.$(OBJEXT) \ + gmap_avx512-boyer-moore.$(OBJEXT) \ + gmap_avx512-dynprog.$(OBJEXT) \ + gmap_avx512-dynprog_simd.$(OBJEXT) \ + gmap_avx512-dynprog_single.$(OBJEXT) \ + gmap_avx512-dynprog_genome.$(OBJEXT) \ + gmap_avx512-dynprog_cdna.$(OBJEXT) \ + gmap_avx512-dynprog_end.$(OBJEXT) \ + gmap_avx512-translation.$(OBJEXT) gmap_avx512-pbinom.$(OBJEXT) \ + gmap_avx512-changepoint.$(OBJEXT) gmap_avx512-stage3.$(OBJEXT) \ + gmap_avx512-request.$(OBJEXT) gmap_avx512-result.$(OBJEXT) \ + gmap_avx512-output.$(OBJEXT) gmap_avx512-inbuffer.$(OBJEXT) \ + gmap_avx512-samheader.$(OBJEXT) \ + gmap_avx512-outbuffer.$(OBJEXT) gmap_avx512-chimera.$(OBJEXT) \ + gmap_avx512-datadir.$(OBJEXT) gmap_avx512-parserange.$(OBJEXT) \ + gmap_avx512-getopt.$(OBJEXT) gmap_avx512-getopt1.$(OBJEXT) \ + gmap_avx512-gmap.$(OBJEXT) +dist_gmap_avx512_OBJECTS = $(am__objects_7) +gmap_avx512_OBJECTS = $(dist_gmap_avx512_OBJECTS) +gmap_avx512_DEPENDENCIES = $(am__DEPENDENCIES_1) $(am__DEPENDENCIES_1) \ + $(am__DEPENDENCIES_1) +gmap_avx512_LINK = $(LIBTOOL) $(AM_V_lt) --tag=CC $(AM_LIBTOOLFLAGS) \ + $(LIBTOOLFLAGS) --mode=link $(CCLD) $(gmap_avx512_CFLAGS) \ + $(CFLAGS) $(gmap_avx512_LDFLAGS) $(LDFLAGS) -o $@ +am__objects_8 = gmap_nosimd-except.$(OBJEXT) \ gmap_nosimd-assert.$(OBJEXT) gmap_nosimd-mem.$(OBJEXT) \ gmap_nosimd-intlist.$(OBJEXT) gmap_nosimd-list.$(OBJEXT) \ gmap_nosimd-littleendian.$(OBJEXT) \ @@ -340,12 +411,12 @@ gmap_nosimd-genome-write.$(OBJEXT) \ gmap_nosimd-bitpack64-read.$(OBJEXT) \ gmap_nosimd-bitpack64-readtwo.$(OBJEXT) \ - gmap_nosimd-indexdb.$(OBJEXT) gmap_nosimd-indexdb_hr.$(OBJEXT) \ - gmap_nosimd-oligo.$(OBJEXT) gmap_nosimd-block.$(OBJEXT) \ - gmap_nosimd-chrom.$(OBJEXT) gmap_nosimd-segmentpos.$(OBJEXT) \ - gmap_nosimd-chrnum.$(OBJEXT) gmap_nosimd-uinttable.$(OBJEXT) \ - gmap_nosimd-gregion.$(OBJEXT) gmap_nosimd-match.$(OBJEXT) \ - gmap_nosimd-matchpool.$(OBJEXT) \ + gmap_nosimd-merge.$(OBJEXT) gmap_nosimd-indexdb.$(OBJEXT) \ + gmap_nosimd-indexdb_hr.$(OBJEXT) gmap_nosimd-oligo.$(OBJEXT) \ + gmap_nosimd-block.$(OBJEXT) gmap_nosimd-chrom.$(OBJEXT) \ + gmap_nosimd-segmentpos.$(OBJEXT) gmap_nosimd-chrnum.$(OBJEXT) \ + gmap_nosimd-uinttable.$(OBJEXT) gmap_nosimd-gregion.$(OBJEXT) \ + gmap_nosimd-match.$(OBJEXT) gmap_nosimd-matchpool.$(OBJEXT) \ gmap_nosimd-diagnostic.$(OBJEXT) gmap_nosimd-stage1.$(OBJEXT) \ gmap_nosimd-diag.$(OBJEXT) gmap_nosimd-diagpool.$(OBJEXT) \ gmap_nosimd-cmet.$(OBJEXT) gmap_nosimd-atoi.$(OBJEXT) \ @@ -375,14 +446,14 @@ gmap_nosimd-datadir.$(OBJEXT) gmap_nosimd-parserange.$(OBJEXT) \ gmap_nosimd-getopt.$(OBJEXT) gmap_nosimd-getopt1.$(OBJEXT) \ gmap_nosimd-gmap.$(OBJEXT) -dist_gmap_nosimd_OBJECTS = $(am__objects_7) +dist_gmap_nosimd_OBJECTS = $(am__objects_8) gmap_nosimd_OBJECTS = $(dist_gmap_nosimd_OBJECTS) gmap_nosimd_DEPENDENCIES = $(am__DEPENDENCIES_1) $(am__DEPENDENCIES_1) \ $(am__DEPENDENCIES_1) gmap_nosimd_LINK = $(LIBTOOL) $(AM_V_lt) --tag=CC $(AM_LIBTOOLFLAGS) \ $(LIBTOOLFLAGS) --mode=link $(CCLD) $(gmap_nosimd_CFLAGS) \ $(CFLAGS) $(gmap_nosimd_LDFLAGS) $(LDFLAGS) -o $@ -am__objects_8 = gmap_sse2-except.$(OBJEXT) gmap_sse2-assert.$(OBJEXT) \ +am__objects_9 = gmap_sse2-except.$(OBJEXT) gmap_sse2-assert.$(OBJEXT) \ gmap_sse2-mem.$(OBJEXT) gmap_sse2-intlist.$(OBJEXT) \ gmap_sse2-list.$(OBJEXT) gmap_sse2-littleendian.$(OBJEXT) \ gmap_sse2-bigendian.$(OBJEXT) gmap_sse2-univinterval.$(OBJEXT) \ @@ -400,15 +471,16 @@ gmap_sse2-genome-write.$(OBJEXT) \ gmap_sse2-bitpack64-read.$(OBJEXT) \ gmap_sse2-bitpack64-readtwo.$(OBJEXT) \ - gmap_sse2-indexdb.$(OBJEXT) gmap_sse2-indexdb_hr.$(OBJEXT) \ - gmap_sse2-oligo.$(OBJEXT) gmap_sse2-block.$(OBJEXT) \ - gmap_sse2-chrom.$(OBJEXT) gmap_sse2-segmentpos.$(OBJEXT) \ - gmap_sse2-chrnum.$(OBJEXT) gmap_sse2-uinttable.$(OBJEXT) \ - gmap_sse2-gregion.$(OBJEXT) gmap_sse2-match.$(OBJEXT) \ - gmap_sse2-matchpool.$(OBJEXT) gmap_sse2-diagnostic.$(OBJEXT) \ - gmap_sse2-stage1.$(OBJEXT) gmap_sse2-diag.$(OBJEXT) \ - gmap_sse2-diagpool.$(OBJEXT) gmap_sse2-cmet.$(OBJEXT) \ - gmap_sse2-atoi.$(OBJEXT) gmap_sse2-orderstat.$(OBJEXT) \ + gmap_sse2-merge.$(OBJEXT) gmap_sse2-indexdb.$(OBJEXT) \ + gmap_sse2-indexdb_hr.$(OBJEXT) gmap_sse2-oligo.$(OBJEXT) \ + gmap_sse2-block.$(OBJEXT) gmap_sse2-chrom.$(OBJEXT) \ + gmap_sse2-segmentpos.$(OBJEXT) gmap_sse2-chrnum.$(OBJEXT) \ + gmap_sse2-uinttable.$(OBJEXT) gmap_sse2-gregion.$(OBJEXT) \ + gmap_sse2-match.$(OBJEXT) gmap_sse2-matchpool.$(OBJEXT) \ + gmap_sse2-diagnostic.$(OBJEXT) gmap_sse2-stage1.$(OBJEXT) \ + gmap_sse2-diag.$(OBJEXT) gmap_sse2-diagpool.$(OBJEXT) \ + gmap_sse2-cmet.$(OBJEXT) gmap_sse2-atoi.$(OBJEXT) \ + gmap_sse2-orderstat.$(OBJEXT) \ gmap_sse2-oligoindex_hr.$(OBJEXT) gmap_sse2-intron.$(OBJEXT) \ gmap_sse2-maxent.$(OBJEXT) gmap_sse2-maxent_hr.$(OBJEXT) \ gmap_sse2-pair.$(OBJEXT) gmap_sse2-pairpool.$(OBJEXT) \ @@ -430,14 +502,14 @@ gmap_sse2-chimera.$(OBJEXT) gmap_sse2-datadir.$(OBJEXT) \ gmap_sse2-parserange.$(OBJEXT) gmap_sse2-getopt.$(OBJEXT) \ gmap_sse2-getopt1.$(OBJEXT) gmap_sse2-gmap.$(OBJEXT) -dist_gmap_sse2_OBJECTS = $(am__objects_8) +dist_gmap_sse2_OBJECTS = $(am__objects_9) gmap_sse2_OBJECTS = $(dist_gmap_sse2_OBJECTS) gmap_sse2_DEPENDENCIES = $(am__DEPENDENCIES_1) $(am__DEPENDENCIES_1) \ $(am__DEPENDENCIES_1) gmap_sse2_LINK = $(LIBTOOL) $(AM_V_lt) --tag=CC $(AM_LIBTOOLFLAGS) \ $(LIBTOOLFLAGS) --mode=link $(CCLD) $(gmap_sse2_CFLAGS) \ $(CFLAGS) $(gmap_sse2_LDFLAGS) $(LDFLAGS) -o $@ -am__objects_9 = gmap_sse41-except.$(OBJEXT) \ +am__objects_10 = gmap_sse41-except.$(OBJEXT) \ gmap_sse41-assert.$(OBJEXT) gmap_sse41-mem.$(OBJEXT) \ gmap_sse41-intlist.$(OBJEXT) gmap_sse41-list.$(OBJEXT) \ gmap_sse41-littleendian.$(OBJEXT) \ @@ -459,15 +531,16 @@ gmap_sse41-genome-write.$(OBJEXT) \ gmap_sse41-bitpack64-read.$(OBJEXT) \ gmap_sse41-bitpack64-readtwo.$(OBJEXT) \ - gmap_sse41-indexdb.$(OBJEXT) gmap_sse41-indexdb_hr.$(OBJEXT) \ - gmap_sse41-oligo.$(OBJEXT) gmap_sse41-block.$(OBJEXT) \ - gmap_sse41-chrom.$(OBJEXT) gmap_sse41-segmentpos.$(OBJEXT) \ - gmap_sse41-chrnum.$(OBJEXT) gmap_sse41-uinttable.$(OBJEXT) \ - gmap_sse41-gregion.$(OBJEXT) gmap_sse41-match.$(OBJEXT) \ - gmap_sse41-matchpool.$(OBJEXT) gmap_sse41-diagnostic.$(OBJEXT) \ - gmap_sse41-stage1.$(OBJEXT) gmap_sse41-diag.$(OBJEXT) \ - gmap_sse41-diagpool.$(OBJEXT) gmap_sse41-cmet.$(OBJEXT) \ - gmap_sse41-atoi.$(OBJEXT) gmap_sse41-orderstat.$(OBJEXT) \ + gmap_sse41-merge.$(OBJEXT) gmap_sse41-indexdb.$(OBJEXT) \ + gmap_sse41-indexdb_hr.$(OBJEXT) gmap_sse41-oligo.$(OBJEXT) \ + gmap_sse41-block.$(OBJEXT) gmap_sse41-chrom.$(OBJEXT) \ + gmap_sse41-segmentpos.$(OBJEXT) gmap_sse41-chrnum.$(OBJEXT) \ + gmap_sse41-uinttable.$(OBJEXT) gmap_sse41-gregion.$(OBJEXT) \ + gmap_sse41-match.$(OBJEXT) gmap_sse41-matchpool.$(OBJEXT) \ + gmap_sse41-diagnostic.$(OBJEXT) gmap_sse41-stage1.$(OBJEXT) \ + gmap_sse41-diag.$(OBJEXT) gmap_sse41-diagpool.$(OBJEXT) \ + gmap_sse41-cmet.$(OBJEXT) gmap_sse41-atoi.$(OBJEXT) \ + gmap_sse41-orderstat.$(OBJEXT) \ gmap_sse41-oligoindex_hr.$(OBJEXT) gmap_sse41-intron.$(OBJEXT) \ gmap_sse41-maxent.$(OBJEXT) gmap_sse41-maxent_hr.$(OBJEXT) \ gmap_sse41-pair.$(OBJEXT) gmap_sse41-pairpool.$(OBJEXT) \ @@ -490,14 +563,14 @@ gmap_sse41-chimera.$(OBJEXT) gmap_sse41-datadir.$(OBJEXT) \ gmap_sse41-parserange.$(OBJEXT) gmap_sse41-getopt.$(OBJEXT) \ gmap_sse41-getopt1.$(OBJEXT) gmap_sse41-gmap.$(OBJEXT) -dist_gmap_sse41_OBJECTS = $(am__objects_9) +dist_gmap_sse41_OBJECTS = $(am__objects_10) gmap_sse41_OBJECTS = $(dist_gmap_sse41_OBJECTS) gmap_sse41_DEPENDENCIES = $(am__DEPENDENCIES_1) $(am__DEPENDENCIES_1) \ $(am__DEPENDENCIES_1) gmap_sse41_LINK = $(LIBTOOL) $(AM_V_lt) --tag=CC $(AM_LIBTOOLFLAGS) \ $(LIBTOOLFLAGS) --mode=link $(CCLD) $(gmap_sse41_CFLAGS) \ $(CFLAGS) $(gmap_sse41_LDFLAGS) $(LDFLAGS) -o $@ -am__objects_10 = gmap_sse42-except.$(OBJEXT) \ +am__objects_11 = gmap_sse42-except.$(OBJEXT) \ gmap_sse42-assert.$(OBJEXT) gmap_sse42-mem.$(OBJEXT) \ gmap_sse42-intlist.$(OBJEXT) gmap_sse42-list.$(OBJEXT) \ gmap_sse42-littleendian.$(OBJEXT) \ @@ -519,15 +592,16 @@ gmap_sse42-genome-write.$(OBJEXT) \ gmap_sse42-bitpack64-read.$(OBJEXT) \ gmap_sse42-bitpack64-readtwo.$(OBJEXT) \ - gmap_sse42-indexdb.$(OBJEXT) gmap_sse42-indexdb_hr.$(OBJEXT) \ - gmap_sse42-oligo.$(OBJEXT) gmap_sse42-block.$(OBJEXT) \ - gmap_sse42-chrom.$(OBJEXT) gmap_sse42-segmentpos.$(OBJEXT) \ - gmap_sse42-chrnum.$(OBJEXT) gmap_sse42-uinttable.$(OBJEXT) \ - gmap_sse42-gregion.$(OBJEXT) gmap_sse42-match.$(OBJEXT) \ - gmap_sse42-matchpool.$(OBJEXT) gmap_sse42-diagnostic.$(OBJEXT) \ - gmap_sse42-stage1.$(OBJEXT) gmap_sse42-diag.$(OBJEXT) \ - gmap_sse42-diagpool.$(OBJEXT) gmap_sse42-cmet.$(OBJEXT) \ - gmap_sse42-atoi.$(OBJEXT) gmap_sse42-orderstat.$(OBJEXT) \ + gmap_sse42-merge.$(OBJEXT) gmap_sse42-indexdb.$(OBJEXT) \ + gmap_sse42-indexdb_hr.$(OBJEXT) gmap_sse42-oligo.$(OBJEXT) \ + gmap_sse42-block.$(OBJEXT) gmap_sse42-chrom.$(OBJEXT) \ + gmap_sse42-segmentpos.$(OBJEXT) gmap_sse42-chrnum.$(OBJEXT) \ + gmap_sse42-uinttable.$(OBJEXT) gmap_sse42-gregion.$(OBJEXT) \ + gmap_sse42-match.$(OBJEXT) gmap_sse42-matchpool.$(OBJEXT) \ + gmap_sse42-diagnostic.$(OBJEXT) gmap_sse42-stage1.$(OBJEXT) \ + gmap_sse42-diag.$(OBJEXT) gmap_sse42-diagpool.$(OBJEXT) \ + gmap_sse42-cmet.$(OBJEXT) gmap_sse42-atoi.$(OBJEXT) \ + gmap_sse42-orderstat.$(OBJEXT) \ gmap_sse42-oligoindex_hr.$(OBJEXT) gmap_sse42-intron.$(OBJEXT) \ gmap_sse42-maxent.$(OBJEXT) gmap_sse42-maxent_hr.$(OBJEXT) \ gmap_sse42-pair.$(OBJEXT) gmap_sse42-pairpool.$(OBJEXT) \ @@ -550,14 +624,14 @@ gmap_sse42-chimera.$(OBJEXT) gmap_sse42-datadir.$(OBJEXT) \ gmap_sse42-parserange.$(OBJEXT) gmap_sse42-getopt.$(OBJEXT) \ gmap_sse42-getopt1.$(OBJEXT) gmap_sse42-gmap.$(OBJEXT) -dist_gmap_sse42_OBJECTS = $(am__objects_10) +dist_gmap_sse42_OBJECTS = $(am__objects_11) gmap_sse42_OBJECTS = $(dist_gmap_sse42_OBJECTS) gmap_sse42_DEPENDENCIES = $(am__DEPENDENCIES_1) $(am__DEPENDENCIES_1) \ $(am__DEPENDENCIES_1) gmap_sse42_LINK = $(LIBTOOL) $(AM_V_lt) --tag=CC $(AM_LIBTOOLFLAGS) \ $(LIBTOOLFLAGS) --mode=link $(CCLD) $(gmap_sse42_CFLAGS) \ $(CFLAGS) $(gmap_sse42_LDFLAGS) $(LDFLAGS) -o $@ -am__objects_11 = gmap_ssse3-except.$(OBJEXT) \ +am__objects_12 = gmap_ssse3-except.$(OBJEXT) \ gmap_ssse3-assert.$(OBJEXT) gmap_ssse3-mem.$(OBJEXT) \ gmap_ssse3-intlist.$(OBJEXT) gmap_ssse3-list.$(OBJEXT) \ gmap_ssse3-littleendian.$(OBJEXT) \ @@ -579,15 +653,16 @@ gmap_ssse3-genome-write.$(OBJEXT) \ gmap_ssse3-bitpack64-read.$(OBJEXT) \ gmap_ssse3-bitpack64-readtwo.$(OBJEXT) \ - gmap_ssse3-indexdb.$(OBJEXT) gmap_ssse3-indexdb_hr.$(OBJEXT) \ - gmap_ssse3-oligo.$(OBJEXT) gmap_ssse3-block.$(OBJEXT) \ - gmap_ssse3-chrom.$(OBJEXT) gmap_ssse3-segmentpos.$(OBJEXT) \ - gmap_ssse3-chrnum.$(OBJEXT) gmap_ssse3-uinttable.$(OBJEXT) \ - gmap_ssse3-gregion.$(OBJEXT) gmap_ssse3-match.$(OBJEXT) \ - gmap_ssse3-matchpool.$(OBJEXT) gmap_ssse3-diagnostic.$(OBJEXT) \ - gmap_ssse3-stage1.$(OBJEXT) gmap_ssse3-diag.$(OBJEXT) \ - gmap_ssse3-diagpool.$(OBJEXT) gmap_ssse3-cmet.$(OBJEXT) \ - gmap_ssse3-atoi.$(OBJEXT) gmap_ssse3-orderstat.$(OBJEXT) \ + gmap_ssse3-merge.$(OBJEXT) gmap_ssse3-indexdb.$(OBJEXT) \ + gmap_ssse3-indexdb_hr.$(OBJEXT) gmap_ssse3-oligo.$(OBJEXT) \ + gmap_ssse3-block.$(OBJEXT) gmap_ssse3-chrom.$(OBJEXT) \ + gmap_ssse3-segmentpos.$(OBJEXT) gmap_ssse3-chrnum.$(OBJEXT) \ + gmap_ssse3-uinttable.$(OBJEXT) gmap_ssse3-gregion.$(OBJEXT) \ + gmap_ssse3-match.$(OBJEXT) gmap_ssse3-matchpool.$(OBJEXT) \ + gmap_ssse3-diagnostic.$(OBJEXT) gmap_ssse3-stage1.$(OBJEXT) \ + gmap_ssse3-diag.$(OBJEXT) gmap_ssse3-diagpool.$(OBJEXT) \ + gmap_ssse3-cmet.$(OBJEXT) gmap_ssse3-atoi.$(OBJEXT) \ + gmap_ssse3-orderstat.$(OBJEXT) \ gmap_ssse3-oligoindex_hr.$(OBJEXT) gmap_ssse3-intron.$(OBJEXT) \ gmap_ssse3-maxent.$(OBJEXT) gmap_ssse3-maxent_hr.$(OBJEXT) \ gmap_ssse3-pair.$(OBJEXT) gmap_ssse3-pairpool.$(OBJEXT) \ @@ -610,14 +685,14 @@ gmap_ssse3-chimera.$(OBJEXT) gmap_ssse3-datadir.$(OBJEXT) \ gmap_ssse3-parserange.$(OBJEXT) gmap_ssse3-getopt.$(OBJEXT) \ gmap_ssse3-getopt1.$(OBJEXT) gmap_ssse3-gmap.$(OBJEXT) -dist_gmap_ssse3_OBJECTS = $(am__objects_11) +dist_gmap_ssse3_OBJECTS = $(am__objects_12) gmap_ssse3_OBJECTS = $(dist_gmap_ssse3_OBJECTS) gmap_ssse3_DEPENDENCIES = $(am__DEPENDENCIES_1) $(am__DEPENDENCIES_1) \ $(am__DEPENDENCIES_1) gmap_ssse3_LINK = $(LIBTOOL) $(AM_V_lt) --tag=CC $(AM_LIBTOOLFLAGS) \ $(LIBTOOLFLAGS) --mode=link $(CCLD) $(gmap_ssse3_CFLAGS) \ $(CFLAGS) $(gmap_ssse3_LDFLAGS) $(LDFLAGS) -o $@ -am__objects_12 = gmapindex-except.$(OBJEXT) gmapindex-assert.$(OBJEXT) \ +am__objects_13 = gmapindex-except.$(OBJEXT) gmapindex-assert.$(OBJEXT) \ gmapindex-mem.$(OBJEXT) gmapindex-intlist.$(OBJEXT) \ gmapindex-list.$(OBJEXT) gmapindex-littleendian.$(OBJEXT) \ gmapindex-bigendian.$(OBJEXT) gmapindex-univinterval.$(OBJEXT) \ @@ -645,21 +720,21 @@ gmapindex-bytecoding.$(OBJEXT) \ gmapindex-sarray-write.$(OBJEXT) \ gmapindex-parserange.$(OBJEXT) gmapindex-gmapindex.$(OBJEXT) -dist_gmapindex_OBJECTS = $(am__objects_12) +dist_gmapindex_OBJECTS = $(am__objects_13) gmapindex_OBJECTS = $(dist_gmapindex_OBJECTS) gmapindex_DEPENDENCIES = $(am__DEPENDENCIES_1) $(am__DEPENDENCIES_1) \ $(am__DEPENDENCIES_1) gmapindex_LINK = $(LIBTOOL) $(AM_V_lt) --tag=CC $(AM_LIBTOOLFLAGS) \ $(LIBTOOLFLAGS) --mode=link $(CCLD) $(gmapindex_CFLAGS) \ $(CFLAGS) $(gmapindex_LDFLAGS) $(LDFLAGS) -o $@ -am__objects_13 = gmapl-cpuid.$(OBJEXT) gmapl-gmapl_select.$(OBJEXT) -dist_gmapl_OBJECTS = $(am__objects_13) +am__objects_14 = gmapl-cpuid.$(OBJEXT) gmapl-gmapl_select.$(OBJEXT) +dist_gmapl_OBJECTS = $(am__objects_14) gmapl_OBJECTS = $(dist_gmapl_OBJECTS) gmapl_DEPENDENCIES = gmapl_LINK = $(LIBTOOL) $(AM_V_lt) --tag=CC $(AM_LIBTOOLFLAGS) \ $(LIBTOOLFLAGS) --mode=link $(CCLD) $(gmapl_CFLAGS) $(CFLAGS) \ $(gmapl_LDFLAGS) $(LDFLAGS) -o $@ -am__objects_14 = gmapl_avx2-except.$(OBJEXT) \ +am__objects_15 = gmapl_avx2-except.$(OBJEXT) \ gmapl_avx2-assert.$(OBJEXT) gmapl_avx2-mem.$(OBJEXT) \ gmapl_avx2-intlist.$(OBJEXT) gmapl_avx2-list.$(OBJEXT) \ gmapl_avx2-littleendian.$(OBJEXT) \ @@ -682,15 +757,16 @@ gmapl_avx2-genome-write.$(OBJEXT) \ gmapl_avx2-bitpack64-read.$(OBJEXT) \ gmapl_avx2-bitpack64-readtwo.$(OBJEXT) \ - gmapl_avx2-indexdb.$(OBJEXT) gmapl_avx2-indexdb_hr.$(OBJEXT) \ - gmapl_avx2-oligo.$(OBJEXT) gmapl_avx2-block.$(OBJEXT) \ - gmapl_avx2-chrom.$(OBJEXT) gmapl_avx2-segmentpos.$(OBJEXT) \ - gmapl_avx2-chrnum.$(OBJEXT) gmapl_avx2-uinttable.$(OBJEXT) \ - gmapl_avx2-gregion.$(OBJEXT) gmapl_avx2-match.$(OBJEXT) \ - gmapl_avx2-matchpool.$(OBJEXT) gmapl_avx2-diagnostic.$(OBJEXT) \ - gmapl_avx2-stage1.$(OBJEXT) gmapl_avx2-diag.$(OBJEXT) \ - gmapl_avx2-diagpool.$(OBJEXT) gmapl_avx2-cmet.$(OBJEXT) \ - gmapl_avx2-atoi.$(OBJEXT) gmapl_avx2-orderstat.$(OBJEXT) \ + gmapl_avx2-merge.$(OBJEXT) gmapl_avx2-indexdb.$(OBJEXT) \ + gmapl_avx2-indexdb_hr.$(OBJEXT) gmapl_avx2-oligo.$(OBJEXT) \ + gmapl_avx2-block.$(OBJEXT) gmapl_avx2-chrom.$(OBJEXT) \ + gmapl_avx2-segmentpos.$(OBJEXT) gmapl_avx2-chrnum.$(OBJEXT) \ + gmapl_avx2-uinttable.$(OBJEXT) gmapl_avx2-gregion.$(OBJEXT) \ + gmapl_avx2-match.$(OBJEXT) gmapl_avx2-matchpool.$(OBJEXT) \ + gmapl_avx2-diagnostic.$(OBJEXT) gmapl_avx2-stage1.$(OBJEXT) \ + gmapl_avx2-diag.$(OBJEXT) gmapl_avx2-diagpool.$(OBJEXT) \ + gmapl_avx2-cmet.$(OBJEXT) gmapl_avx2-atoi.$(OBJEXT) \ + gmapl_avx2-orderstat.$(OBJEXT) \ gmapl_avx2-oligoindex_hr.$(OBJEXT) gmapl_avx2-intron.$(OBJEXT) \ gmapl_avx2-maxent.$(OBJEXT) gmapl_avx2-maxent_hr.$(OBJEXT) \ gmapl_avx2-pair.$(OBJEXT) gmapl_avx2-pairpool.$(OBJEXT) \ @@ -713,14 +789,87 @@ gmapl_avx2-chimera.$(OBJEXT) gmapl_avx2-datadir.$(OBJEXT) \ gmapl_avx2-parserange.$(OBJEXT) gmapl_avx2-getopt.$(OBJEXT) \ gmapl_avx2-getopt1.$(OBJEXT) gmapl_avx2-gmap.$(OBJEXT) -dist_gmapl_avx2_OBJECTS = $(am__objects_14) +dist_gmapl_avx2_OBJECTS = $(am__objects_15) gmapl_avx2_OBJECTS = $(dist_gmapl_avx2_OBJECTS) gmapl_avx2_DEPENDENCIES = $(am__DEPENDENCIES_1) $(am__DEPENDENCIES_1) \ $(am__DEPENDENCIES_1) gmapl_avx2_LINK = $(LIBTOOL) $(AM_V_lt) --tag=CC $(AM_LIBTOOLFLAGS) \ $(LIBTOOLFLAGS) --mode=link $(CCLD) $(gmapl_avx2_CFLAGS) \ $(CFLAGS) $(gmapl_avx2_LDFLAGS) $(LDFLAGS) -o $@ -am__objects_15 = gmapl_nosimd-except.$(OBJEXT) \ +am__objects_16 = gmapl_avx512-except.$(OBJEXT) \ + gmapl_avx512-assert.$(OBJEXT) gmapl_avx512-mem.$(OBJEXT) \ + gmapl_avx512-intlist.$(OBJEXT) gmapl_avx512-list.$(OBJEXT) \ + gmapl_avx512-littleendian.$(OBJEXT) \ + gmapl_avx512-bigendian.$(OBJEXT) \ + gmapl_avx512-univinterval.$(OBJEXT) \ + gmapl_avx512-interval.$(OBJEXT) \ + gmapl_avx512-uintlist.$(OBJEXT) \ + gmapl_avx512-uint8list.$(OBJEXT) \ + gmapl_avx512-stopwatch.$(OBJEXT) \ + gmapl_avx512-semaphore.$(OBJEXT) gmapl_avx512-access.$(OBJEXT) \ + gmapl_avx512-filestring.$(OBJEXT) \ + gmapl_avx512-iit-read-univ.$(OBJEXT) \ + gmapl_avx512-iit-read.$(OBJEXT) gmapl_avx512-md5.$(OBJEXT) \ + gmapl_avx512-bzip2.$(OBJEXT) gmapl_avx512-sequence.$(OBJEXT) \ + gmapl_avx512-reader.$(OBJEXT) \ + gmapl_avx512-genomicpos.$(OBJEXT) \ + gmapl_avx512-compress.$(OBJEXT) \ + gmapl_avx512-compress-write.$(OBJEXT) \ + gmapl_avx512-gbuffer.$(OBJEXT) gmapl_avx512-genome.$(OBJEXT) \ + gmapl_avx512-popcount.$(OBJEXT) \ + gmapl_avx512-genome128_hr.$(OBJEXT) \ + gmapl_avx512-genome_sites.$(OBJEXT) \ + gmapl_avx512-genome-write.$(OBJEXT) \ + gmapl_avx512-bitpack64-read.$(OBJEXT) \ + gmapl_avx512-bitpack64-readtwo.$(OBJEXT) \ + gmapl_avx512-merge.$(OBJEXT) gmapl_avx512-indexdb.$(OBJEXT) \ + gmapl_avx512-indexdb_hr.$(OBJEXT) gmapl_avx512-oligo.$(OBJEXT) \ + gmapl_avx512-block.$(OBJEXT) gmapl_avx512-chrom.$(OBJEXT) \ + gmapl_avx512-segmentpos.$(OBJEXT) \ + gmapl_avx512-chrnum.$(OBJEXT) gmapl_avx512-uinttable.$(OBJEXT) \ + gmapl_avx512-gregion.$(OBJEXT) gmapl_avx512-match.$(OBJEXT) \ + gmapl_avx512-matchpool.$(OBJEXT) \ + gmapl_avx512-diagnostic.$(OBJEXT) \ + gmapl_avx512-stage1.$(OBJEXT) gmapl_avx512-diag.$(OBJEXT) \ + gmapl_avx512-diagpool.$(OBJEXT) gmapl_avx512-cmet.$(OBJEXT) \ + gmapl_avx512-atoi.$(OBJEXT) gmapl_avx512-orderstat.$(OBJEXT) \ + gmapl_avx512-oligoindex_hr.$(OBJEXT) \ + gmapl_avx512-intron.$(OBJEXT) gmapl_avx512-maxent.$(OBJEXT) \ + gmapl_avx512-maxent_hr.$(OBJEXT) gmapl_avx512-pair.$(OBJEXT) \ + gmapl_avx512-pairpool.$(OBJEXT) \ + gmapl_avx512-cellpool.$(OBJEXT) gmapl_avx512-stage2.$(OBJEXT) \ + gmapl_avx512-doublelist.$(OBJEXT) \ + gmapl_avx512-smooth.$(OBJEXT) \ + gmapl_avx512-splicestringpool.$(OBJEXT) \ + gmapl_avx512-splicetrie_build.$(OBJEXT) \ + gmapl_avx512-splicetrie.$(OBJEXT) \ + gmapl_avx512-boyer-moore.$(OBJEXT) \ + gmapl_avx512-dynprog.$(OBJEXT) \ + gmapl_avx512-dynprog_simd.$(OBJEXT) \ + gmapl_avx512-dynprog_single.$(OBJEXT) \ + gmapl_avx512-dynprog_genome.$(OBJEXT) \ + gmapl_avx512-dynprog_cdna.$(OBJEXT) \ + gmapl_avx512-dynprog_end.$(OBJEXT) \ + gmapl_avx512-translation.$(OBJEXT) \ + gmapl_avx512-pbinom.$(OBJEXT) \ + gmapl_avx512-changepoint.$(OBJEXT) \ + gmapl_avx512-stage3.$(OBJEXT) gmapl_avx512-request.$(OBJEXT) \ + gmapl_avx512-result.$(OBJEXT) gmapl_avx512-output.$(OBJEXT) \ + gmapl_avx512-inbuffer.$(OBJEXT) \ + gmapl_avx512-samheader.$(OBJEXT) \ + gmapl_avx512-outbuffer.$(OBJEXT) \ + gmapl_avx512-chimera.$(OBJEXT) gmapl_avx512-datadir.$(OBJEXT) \ + gmapl_avx512-parserange.$(OBJEXT) \ + gmapl_avx512-getopt.$(OBJEXT) gmapl_avx512-getopt1.$(OBJEXT) \ + gmapl_avx512-gmap.$(OBJEXT) +dist_gmapl_avx512_OBJECTS = $(am__objects_16) +gmapl_avx512_OBJECTS = $(dist_gmapl_avx512_OBJECTS) +gmapl_avx512_DEPENDENCIES = $(am__DEPENDENCIES_1) \ + $(am__DEPENDENCIES_1) $(am__DEPENDENCIES_1) +gmapl_avx512_LINK = $(LIBTOOL) $(AM_V_lt) --tag=CC $(AM_LIBTOOLFLAGS) \ + $(LIBTOOLFLAGS) --mode=link $(CCLD) $(gmapl_avx512_CFLAGS) \ + $(CFLAGS) $(gmapl_avx512_LDFLAGS) $(LDFLAGS) -o $@ +am__objects_17 = gmapl_nosimd-except.$(OBJEXT) \ gmapl_nosimd-assert.$(OBJEXT) gmapl_nosimd-mem.$(OBJEXT) \ gmapl_nosimd-intlist.$(OBJEXT) gmapl_nosimd-list.$(OBJEXT) \ gmapl_nosimd-littleendian.$(OBJEXT) \ @@ -746,7 +895,7 @@ gmapl_nosimd-genome-write.$(OBJEXT) \ gmapl_nosimd-bitpack64-read.$(OBJEXT) \ gmapl_nosimd-bitpack64-readtwo.$(OBJEXT) \ - gmapl_nosimd-indexdb.$(OBJEXT) \ + gmapl_nosimd-merge.$(OBJEXT) gmapl_nosimd-indexdb.$(OBJEXT) \ gmapl_nosimd-indexdb_hr.$(OBJEXT) gmapl_nosimd-oligo.$(OBJEXT) \ gmapl_nosimd-block.$(OBJEXT) gmapl_nosimd-chrom.$(OBJEXT) \ gmapl_nosimd-segmentpos.$(OBJEXT) \ @@ -786,14 +935,14 @@ gmapl_nosimd-parserange.$(OBJEXT) \ gmapl_nosimd-getopt.$(OBJEXT) gmapl_nosimd-getopt1.$(OBJEXT) \ gmapl_nosimd-gmap.$(OBJEXT) -dist_gmapl_nosimd_OBJECTS = $(am__objects_15) +dist_gmapl_nosimd_OBJECTS = $(am__objects_17) gmapl_nosimd_OBJECTS = $(dist_gmapl_nosimd_OBJECTS) gmapl_nosimd_DEPENDENCIES = $(am__DEPENDENCIES_1) \ $(am__DEPENDENCIES_1) $(am__DEPENDENCIES_1) gmapl_nosimd_LINK = $(LIBTOOL) $(AM_V_lt) --tag=CC $(AM_LIBTOOLFLAGS) \ $(LIBTOOLFLAGS) --mode=link $(CCLD) $(gmapl_nosimd_CFLAGS) \ $(CFLAGS) $(gmapl_nosimd_LDFLAGS) $(LDFLAGS) -o $@ -am__objects_16 = gmapl_sse2-except.$(OBJEXT) \ +am__objects_18 = gmapl_sse2-except.$(OBJEXT) \ gmapl_sse2-assert.$(OBJEXT) gmapl_sse2-mem.$(OBJEXT) \ gmapl_sse2-intlist.$(OBJEXT) gmapl_sse2-list.$(OBJEXT) \ gmapl_sse2-littleendian.$(OBJEXT) \ @@ -816,15 +965,16 @@ gmapl_sse2-genome-write.$(OBJEXT) \ gmapl_sse2-bitpack64-read.$(OBJEXT) \ gmapl_sse2-bitpack64-readtwo.$(OBJEXT) \ - gmapl_sse2-indexdb.$(OBJEXT) gmapl_sse2-indexdb_hr.$(OBJEXT) \ - gmapl_sse2-oligo.$(OBJEXT) gmapl_sse2-block.$(OBJEXT) \ - gmapl_sse2-chrom.$(OBJEXT) gmapl_sse2-segmentpos.$(OBJEXT) \ - gmapl_sse2-chrnum.$(OBJEXT) gmapl_sse2-uinttable.$(OBJEXT) \ - gmapl_sse2-gregion.$(OBJEXT) gmapl_sse2-match.$(OBJEXT) \ - gmapl_sse2-matchpool.$(OBJEXT) gmapl_sse2-diagnostic.$(OBJEXT) \ - gmapl_sse2-stage1.$(OBJEXT) gmapl_sse2-diag.$(OBJEXT) \ - gmapl_sse2-diagpool.$(OBJEXT) gmapl_sse2-cmet.$(OBJEXT) \ - gmapl_sse2-atoi.$(OBJEXT) gmapl_sse2-orderstat.$(OBJEXT) \ + gmapl_sse2-merge.$(OBJEXT) gmapl_sse2-indexdb.$(OBJEXT) \ + gmapl_sse2-indexdb_hr.$(OBJEXT) gmapl_sse2-oligo.$(OBJEXT) \ + gmapl_sse2-block.$(OBJEXT) gmapl_sse2-chrom.$(OBJEXT) \ + gmapl_sse2-segmentpos.$(OBJEXT) gmapl_sse2-chrnum.$(OBJEXT) \ + gmapl_sse2-uinttable.$(OBJEXT) gmapl_sse2-gregion.$(OBJEXT) \ + gmapl_sse2-match.$(OBJEXT) gmapl_sse2-matchpool.$(OBJEXT) \ + gmapl_sse2-diagnostic.$(OBJEXT) gmapl_sse2-stage1.$(OBJEXT) \ + gmapl_sse2-diag.$(OBJEXT) gmapl_sse2-diagpool.$(OBJEXT) \ + gmapl_sse2-cmet.$(OBJEXT) gmapl_sse2-atoi.$(OBJEXT) \ + gmapl_sse2-orderstat.$(OBJEXT) \ gmapl_sse2-oligoindex_hr.$(OBJEXT) gmapl_sse2-intron.$(OBJEXT) \ gmapl_sse2-maxent.$(OBJEXT) gmapl_sse2-maxent_hr.$(OBJEXT) \ gmapl_sse2-pair.$(OBJEXT) gmapl_sse2-pairpool.$(OBJEXT) \ @@ -847,14 +997,14 @@ gmapl_sse2-chimera.$(OBJEXT) gmapl_sse2-datadir.$(OBJEXT) \ gmapl_sse2-parserange.$(OBJEXT) gmapl_sse2-getopt.$(OBJEXT) \ gmapl_sse2-getopt1.$(OBJEXT) gmapl_sse2-gmap.$(OBJEXT) -dist_gmapl_sse2_OBJECTS = $(am__objects_16) +dist_gmapl_sse2_OBJECTS = $(am__objects_18) gmapl_sse2_OBJECTS = $(dist_gmapl_sse2_OBJECTS) gmapl_sse2_DEPENDENCIES = $(am__DEPENDENCIES_1) $(am__DEPENDENCIES_1) \ $(am__DEPENDENCIES_1) gmapl_sse2_LINK = $(LIBTOOL) $(AM_V_lt) --tag=CC $(AM_LIBTOOLFLAGS) \ $(LIBTOOLFLAGS) --mode=link $(CCLD) $(gmapl_sse2_CFLAGS) \ $(CFLAGS) $(gmapl_sse2_LDFLAGS) $(LDFLAGS) -o $@ -am__objects_17 = gmapl_sse41-except.$(OBJEXT) \ +am__objects_19 = gmapl_sse41-except.$(OBJEXT) \ gmapl_sse41-assert.$(OBJEXT) gmapl_sse41-mem.$(OBJEXT) \ gmapl_sse41-intlist.$(OBJEXT) gmapl_sse41-list.$(OBJEXT) \ gmapl_sse41-littleendian.$(OBJEXT) \ @@ -878,12 +1028,12 @@ gmapl_sse41-genome-write.$(OBJEXT) \ gmapl_sse41-bitpack64-read.$(OBJEXT) \ gmapl_sse41-bitpack64-readtwo.$(OBJEXT) \ - gmapl_sse41-indexdb.$(OBJEXT) gmapl_sse41-indexdb_hr.$(OBJEXT) \ - gmapl_sse41-oligo.$(OBJEXT) gmapl_sse41-block.$(OBJEXT) \ - gmapl_sse41-chrom.$(OBJEXT) gmapl_sse41-segmentpos.$(OBJEXT) \ - gmapl_sse41-chrnum.$(OBJEXT) gmapl_sse41-uinttable.$(OBJEXT) \ - gmapl_sse41-gregion.$(OBJEXT) gmapl_sse41-match.$(OBJEXT) \ - gmapl_sse41-matchpool.$(OBJEXT) \ + gmapl_sse41-merge.$(OBJEXT) gmapl_sse41-indexdb.$(OBJEXT) \ + gmapl_sse41-indexdb_hr.$(OBJEXT) gmapl_sse41-oligo.$(OBJEXT) \ + gmapl_sse41-block.$(OBJEXT) gmapl_sse41-chrom.$(OBJEXT) \ + gmapl_sse41-segmentpos.$(OBJEXT) gmapl_sse41-chrnum.$(OBJEXT) \ + gmapl_sse41-uinttable.$(OBJEXT) gmapl_sse41-gregion.$(OBJEXT) \ + gmapl_sse41-match.$(OBJEXT) gmapl_sse41-matchpool.$(OBJEXT) \ gmapl_sse41-diagnostic.$(OBJEXT) gmapl_sse41-stage1.$(OBJEXT) \ gmapl_sse41-diag.$(OBJEXT) gmapl_sse41-diagpool.$(OBJEXT) \ gmapl_sse41-cmet.$(OBJEXT) gmapl_sse41-atoi.$(OBJEXT) \ @@ -913,14 +1063,14 @@ gmapl_sse41-datadir.$(OBJEXT) gmapl_sse41-parserange.$(OBJEXT) \ gmapl_sse41-getopt.$(OBJEXT) gmapl_sse41-getopt1.$(OBJEXT) \ gmapl_sse41-gmap.$(OBJEXT) -dist_gmapl_sse41_OBJECTS = $(am__objects_17) +dist_gmapl_sse41_OBJECTS = $(am__objects_19) gmapl_sse41_OBJECTS = $(dist_gmapl_sse41_OBJECTS) gmapl_sse41_DEPENDENCIES = $(am__DEPENDENCIES_1) $(am__DEPENDENCIES_1) \ $(am__DEPENDENCIES_1) gmapl_sse41_LINK = $(LIBTOOL) $(AM_V_lt) --tag=CC $(AM_LIBTOOLFLAGS) \ $(LIBTOOLFLAGS) --mode=link $(CCLD) $(gmapl_sse41_CFLAGS) \ $(CFLAGS) $(gmapl_sse41_LDFLAGS) $(LDFLAGS) -o $@ -am__objects_18 = gmapl_sse42-except.$(OBJEXT) \ +am__objects_20 = gmapl_sse42-except.$(OBJEXT) \ gmapl_sse42-assert.$(OBJEXT) gmapl_sse42-mem.$(OBJEXT) \ gmapl_sse42-intlist.$(OBJEXT) gmapl_sse42-list.$(OBJEXT) \ gmapl_sse42-littleendian.$(OBJEXT) \ @@ -944,12 +1094,12 @@ gmapl_sse42-genome-write.$(OBJEXT) \ gmapl_sse42-bitpack64-read.$(OBJEXT) \ gmapl_sse42-bitpack64-readtwo.$(OBJEXT) \ - gmapl_sse42-indexdb.$(OBJEXT) gmapl_sse42-indexdb_hr.$(OBJEXT) \ - gmapl_sse42-oligo.$(OBJEXT) gmapl_sse42-block.$(OBJEXT) \ - gmapl_sse42-chrom.$(OBJEXT) gmapl_sse42-segmentpos.$(OBJEXT) \ - gmapl_sse42-chrnum.$(OBJEXT) gmapl_sse42-uinttable.$(OBJEXT) \ - gmapl_sse42-gregion.$(OBJEXT) gmapl_sse42-match.$(OBJEXT) \ - gmapl_sse42-matchpool.$(OBJEXT) \ + gmapl_sse42-merge.$(OBJEXT) gmapl_sse42-indexdb.$(OBJEXT) \ + gmapl_sse42-indexdb_hr.$(OBJEXT) gmapl_sse42-oligo.$(OBJEXT) \ + gmapl_sse42-block.$(OBJEXT) gmapl_sse42-chrom.$(OBJEXT) \ + gmapl_sse42-segmentpos.$(OBJEXT) gmapl_sse42-chrnum.$(OBJEXT) \ + gmapl_sse42-uinttable.$(OBJEXT) gmapl_sse42-gregion.$(OBJEXT) \ + gmapl_sse42-match.$(OBJEXT) gmapl_sse42-matchpool.$(OBJEXT) \ gmapl_sse42-diagnostic.$(OBJEXT) gmapl_sse42-stage1.$(OBJEXT) \ gmapl_sse42-diag.$(OBJEXT) gmapl_sse42-diagpool.$(OBJEXT) \ gmapl_sse42-cmet.$(OBJEXT) gmapl_sse42-atoi.$(OBJEXT) \ @@ -979,14 +1129,14 @@ gmapl_sse42-datadir.$(OBJEXT) gmapl_sse42-parserange.$(OBJEXT) \ gmapl_sse42-getopt.$(OBJEXT) gmapl_sse42-getopt1.$(OBJEXT) \ gmapl_sse42-gmap.$(OBJEXT) -dist_gmapl_sse42_OBJECTS = $(am__objects_18) +dist_gmapl_sse42_OBJECTS = $(am__objects_20) gmapl_sse42_OBJECTS = $(dist_gmapl_sse42_OBJECTS) gmapl_sse42_DEPENDENCIES = $(am__DEPENDENCIES_1) $(am__DEPENDENCIES_1) \ $(am__DEPENDENCIES_1) gmapl_sse42_LINK = $(LIBTOOL) $(AM_V_lt) --tag=CC $(AM_LIBTOOLFLAGS) \ $(LIBTOOLFLAGS) --mode=link $(CCLD) $(gmapl_sse42_CFLAGS) \ $(CFLAGS) $(gmapl_sse42_LDFLAGS) $(LDFLAGS) -o $@ -am__objects_19 = gmapl_ssse3-except.$(OBJEXT) \ +am__objects_21 = gmapl_ssse3-except.$(OBJEXT) \ gmapl_ssse3-assert.$(OBJEXT) gmapl_ssse3-mem.$(OBJEXT) \ gmapl_ssse3-intlist.$(OBJEXT) gmapl_ssse3-list.$(OBJEXT) \ gmapl_ssse3-littleendian.$(OBJEXT) \ @@ -1010,12 +1160,12 @@ gmapl_ssse3-genome-write.$(OBJEXT) \ gmapl_ssse3-bitpack64-read.$(OBJEXT) \ gmapl_ssse3-bitpack64-readtwo.$(OBJEXT) \ - gmapl_ssse3-indexdb.$(OBJEXT) gmapl_ssse3-indexdb_hr.$(OBJEXT) \ - gmapl_ssse3-oligo.$(OBJEXT) gmapl_ssse3-block.$(OBJEXT) \ - gmapl_ssse3-chrom.$(OBJEXT) gmapl_ssse3-segmentpos.$(OBJEXT) \ - gmapl_ssse3-chrnum.$(OBJEXT) gmapl_ssse3-uinttable.$(OBJEXT) \ - gmapl_ssse3-gregion.$(OBJEXT) gmapl_ssse3-match.$(OBJEXT) \ - gmapl_ssse3-matchpool.$(OBJEXT) \ + gmapl_ssse3-merge.$(OBJEXT) gmapl_ssse3-indexdb.$(OBJEXT) \ + gmapl_ssse3-indexdb_hr.$(OBJEXT) gmapl_ssse3-oligo.$(OBJEXT) \ + gmapl_ssse3-block.$(OBJEXT) gmapl_ssse3-chrom.$(OBJEXT) \ + gmapl_ssse3-segmentpos.$(OBJEXT) gmapl_ssse3-chrnum.$(OBJEXT) \ + gmapl_ssse3-uinttable.$(OBJEXT) gmapl_ssse3-gregion.$(OBJEXT) \ + gmapl_ssse3-match.$(OBJEXT) gmapl_ssse3-matchpool.$(OBJEXT) \ gmapl_ssse3-diagnostic.$(OBJEXT) gmapl_ssse3-stage1.$(OBJEXT) \ gmapl_ssse3-diag.$(OBJEXT) gmapl_ssse3-diagpool.$(OBJEXT) \ gmapl_ssse3-cmet.$(OBJEXT) gmapl_ssse3-atoi.$(OBJEXT) \ @@ -1045,21 +1195,21 @@ gmapl_ssse3-datadir.$(OBJEXT) gmapl_ssse3-parserange.$(OBJEXT) \ gmapl_ssse3-getopt.$(OBJEXT) gmapl_ssse3-getopt1.$(OBJEXT) \ gmapl_ssse3-gmap.$(OBJEXT) -dist_gmapl_ssse3_OBJECTS = $(am__objects_19) +dist_gmapl_ssse3_OBJECTS = $(am__objects_21) gmapl_ssse3_OBJECTS = $(dist_gmapl_ssse3_OBJECTS) gmapl_ssse3_DEPENDENCIES = $(am__DEPENDENCIES_1) $(am__DEPENDENCIES_1) \ $(am__DEPENDENCIES_1) gmapl_ssse3_LINK = $(LIBTOOL) $(AM_V_lt) --tag=CC $(AM_LIBTOOLFLAGS) \ $(LIBTOOLFLAGS) --mode=link $(CCLD) $(gmapl_ssse3_CFLAGS) \ $(CFLAGS) $(gmapl_ssse3_LDFLAGS) $(LDFLAGS) -o $@ -am__objects_20 = gsnap-cpuid.$(OBJEXT) gsnap-gsnap_select.$(OBJEXT) -dist_gsnap_OBJECTS = $(am__objects_20) +am__objects_22 = gsnap-cpuid.$(OBJEXT) gsnap-gsnap_select.$(OBJEXT) +dist_gsnap_OBJECTS = $(am__objects_22) gsnap_OBJECTS = $(dist_gsnap_OBJECTS) gsnap_DEPENDENCIES = gsnap_LINK = $(LIBTOOL) $(AM_V_lt) --tag=CC $(AM_LIBTOOLFLAGS) \ $(LIBTOOLFLAGS) --mode=link $(CCLD) $(gsnap_CFLAGS) $(CFLAGS) \ $(gsnap_LDFLAGS) $(LDFLAGS) -o $@ -am__objects_21 = gsnap_avx2-except.$(OBJEXT) \ +am__objects_23 = gsnap_avx2-except.$(OBJEXT) \ gsnap_avx2-assert.$(OBJEXT) gsnap_avx2-mem.$(OBJEXT) \ gsnap_avx2-intlist.$(OBJEXT) gsnap_avx2-list.$(OBJEXT) \ gsnap_avx2-littleendian.$(OBJEXT) \ @@ -1078,17 +1228,18 @@ gsnap_avx2-genome_sites.$(OBJEXT) \ gsnap_avx2-bitpack64-read.$(OBJEXT) \ gsnap_avx2-bitpack64-readtwo.$(OBJEXT) \ - gsnap_avx2-indexdb.$(OBJEXT) gsnap_avx2-indexdb_hr.$(OBJEXT) \ - gsnap_avx2-oligo.$(OBJEXT) gsnap_avx2-chrom.$(OBJEXT) \ - gsnap_avx2-segmentpos.$(OBJEXT) gsnap_avx2-chrnum.$(OBJEXT) \ - gsnap_avx2-maxent_hr.$(OBJEXT) gsnap_avx2-samprint.$(OBJEXT) \ - gsnap_avx2-mapq.$(OBJEXT) gsnap_avx2-shortread.$(OBJEXT) \ - gsnap_avx2-substring.$(OBJEXT) gsnap_avx2-junction.$(OBJEXT) \ - gsnap_avx2-stage3hr.$(OBJEXT) gsnap_avx2-spanningelt.$(OBJEXT) \ - gsnap_avx2-cmet.$(OBJEXT) gsnap_avx2-atoi.$(OBJEXT) \ - gsnap_avx2-maxent.$(OBJEXT) gsnap_avx2-pair.$(OBJEXT) \ - gsnap_avx2-pairpool.$(OBJEXT) gsnap_avx2-diag.$(OBJEXT) \ - gsnap_avx2-diagpool.$(OBJEXT) gsnap_avx2-orderstat.$(OBJEXT) \ + gsnap_avx2-merge.$(OBJEXT) gsnap_avx2-indexdb.$(OBJEXT) \ + gsnap_avx2-indexdb_hr.$(OBJEXT) gsnap_avx2-oligo.$(OBJEXT) \ + gsnap_avx2-chrom.$(OBJEXT) gsnap_avx2-segmentpos.$(OBJEXT) \ + gsnap_avx2-chrnum.$(OBJEXT) gsnap_avx2-maxent_hr.$(OBJEXT) \ + gsnap_avx2-samprint.$(OBJEXT) gsnap_avx2-mapq.$(OBJEXT) \ + gsnap_avx2-shortread.$(OBJEXT) gsnap_avx2-substring.$(OBJEXT) \ + gsnap_avx2-junction.$(OBJEXT) gsnap_avx2-stage3hr.$(OBJEXT) \ + gsnap_avx2-spanningelt.$(OBJEXT) gsnap_avx2-cmet.$(OBJEXT) \ + gsnap_avx2-atoi.$(OBJEXT) gsnap_avx2-maxent.$(OBJEXT) \ + gsnap_avx2-pair.$(OBJEXT) gsnap_avx2-pairpool.$(OBJEXT) \ + gsnap_avx2-diag.$(OBJEXT) gsnap_avx2-diagpool.$(OBJEXT) \ + gsnap_avx2-orderstat.$(OBJEXT) \ gsnap_avx2-oligoindex_hr.$(OBJEXT) \ gsnap_avx2-cellpool.$(OBJEXT) gsnap_avx2-stage2.$(OBJEXT) \ gsnap_avx2-intron.$(OBJEXT) gsnap_avx2-boyer-moore.$(OBJEXT) \ @@ -1107,21 +1258,99 @@ gsnap_avx2-bitpack64-access.$(OBJEXT) \ gsnap_avx2-bytecoding.$(OBJEXT) gsnap_avx2-univdiag.$(OBJEXT) \ gsnap_avx2-sedgesort.$(OBJEXT) \ - gsnap_avx2-sarray-read.$(OBJEXT) gsnap_avx2-stage1hr.$(OBJEXT) \ + gsnap_avx2-sarray-read.$(OBJEXT) \ + gsnap_avx2-merge-heap.$(OBJEXT) gsnap_avx2-stage1hr.$(OBJEXT) \ gsnap_avx2-request.$(OBJEXT) gsnap_avx2-resulthr.$(OBJEXT) \ gsnap_avx2-output.$(OBJEXT) gsnap_avx2-inbuffer.$(OBJEXT) \ gsnap_avx2-samheader.$(OBJEXT) gsnap_avx2-outbuffer.$(OBJEXT) \ gsnap_avx2-datadir.$(OBJEXT) gsnap_avx2-parserange.$(OBJEXT) \ gsnap_avx2-getopt.$(OBJEXT) gsnap_avx2-getopt1.$(OBJEXT) \ gsnap_avx2-gsnap.$(OBJEXT) -dist_gsnap_avx2_OBJECTS = $(am__objects_21) +dist_gsnap_avx2_OBJECTS = $(am__objects_23) gsnap_avx2_OBJECTS = $(dist_gsnap_avx2_OBJECTS) gsnap_avx2_DEPENDENCIES = $(am__DEPENDENCIES_1) $(am__DEPENDENCIES_1) \ $(am__DEPENDENCIES_1) gsnap_avx2_LINK = $(LIBTOOL) $(AM_V_lt) --tag=CC $(AM_LIBTOOLFLAGS) \ $(LIBTOOLFLAGS) --mode=link $(CCLD) $(gsnap_avx2_CFLAGS) \ $(CFLAGS) $(gsnap_avx2_LDFLAGS) $(LDFLAGS) -o $@ -am__objects_22 = gsnap_nosimd-except.$(OBJEXT) \ +am__objects_24 = gsnap_avx512-except.$(OBJEXT) \ + gsnap_avx512-assert.$(OBJEXT) gsnap_avx512-mem.$(OBJEXT) \ + gsnap_avx512-intlist.$(OBJEXT) gsnap_avx512-list.$(OBJEXT) \ + gsnap_avx512-littleendian.$(OBJEXT) \ + gsnap_avx512-bigendian.$(OBJEXT) \ + gsnap_avx512-univinterval.$(OBJEXT) \ + gsnap_avx512-interval.$(OBJEXT) \ + gsnap_avx512-uintlist.$(OBJEXT) \ + gsnap_avx512-stopwatch.$(OBJEXT) \ + gsnap_avx512-semaphore.$(OBJEXT) gsnap_avx512-access.$(OBJEXT) \ + gsnap_avx512-filestring.$(OBJEXT) \ + gsnap_avx512-iit-read-univ.$(OBJEXT) \ + gsnap_avx512-iit-read.$(OBJEXT) gsnap_avx512-md5.$(OBJEXT) \ + gsnap_avx512-bzip2.$(OBJEXT) gsnap_avx512-sequence.$(OBJEXT) \ + gsnap_avx512-reader.$(OBJEXT) \ + gsnap_avx512-genomicpos.$(OBJEXT) \ + gsnap_avx512-compress.$(OBJEXT) gsnap_avx512-genome.$(OBJEXT) \ + gsnap_avx512-popcount.$(OBJEXT) \ + gsnap_avx512-genome128_hr.$(OBJEXT) \ + gsnap_avx512-genome_sites.$(OBJEXT) \ + gsnap_avx512-bitpack64-read.$(OBJEXT) \ + gsnap_avx512-bitpack64-readtwo.$(OBJEXT) \ + gsnap_avx512-merge.$(OBJEXT) gsnap_avx512-indexdb.$(OBJEXT) \ + gsnap_avx512-indexdb_hr.$(OBJEXT) gsnap_avx512-oligo.$(OBJEXT) \ + gsnap_avx512-chrom.$(OBJEXT) gsnap_avx512-segmentpos.$(OBJEXT) \ + gsnap_avx512-chrnum.$(OBJEXT) gsnap_avx512-maxent_hr.$(OBJEXT) \ + gsnap_avx512-samprint.$(OBJEXT) gsnap_avx512-mapq.$(OBJEXT) \ + gsnap_avx512-shortread.$(OBJEXT) \ + gsnap_avx512-substring.$(OBJEXT) \ + gsnap_avx512-junction.$(OBJEXT) \ + gsnap_avx512-stage3hr.$(OBJEXT) \ + gsnap_avx512-spanningelt.$(OBJEXT) gsnap_avx512-cmet.$(OBJEXT) \ + gsnap_avx512-atoi.$(OBJEXT) gsnap_avx512-maxent.$(OBJEXT) \ + gsnap_avx512-pair.$(OBJEXT) gsnap_avx512-pairpool.$(OBJEXT) \ + gsnap_avx512-diag.$(OBJEXT) gsnap_avx512-diagpool.$(OBJEXT) \ + gsnap_avx512-orderstat.$(OBJEXT) \ + gsnap_avx512-oligoindex_hr.$(OBJEXT) \ + gsnap_avx512-cellpool.$(OBJEXT) gsnap_avx512-stage2.$(OBJEXT) \ + gsnap_avx512-intron.$(OBJEXT) \ + gsnap_avx512-boyer-moore.$(OBJEXT) \ + gsnap_avx512-changepoint.$(OBJEXT) \ + gsnap_avx512-pbinom.$(OBJEXT) gsnap_avx512-dynprog.$(OBJEXT) \ + gsnap_avx512-dynprog_simd.$(OBJEXT) \ + gsnap_avx512-dynprog_single.$(OBJEXT) \ + gsnap_avx512-dynprog_genome.$(OBJEXT) \ + gsnap_avx512-dynprog_cdna.$(OBJEXT) \ + gsnap_avx512-dynprog_end.$(OBJEXT) \ + gsnap_avx512-gbuffer.$(OBJEXT) \ + gsnap_avx512-doublelist.$(OBJEXT) \ + gsnap_avx512-smooth.$(OBJEXT) gsnap_avx512-chimera.$(OBJEXT) \ + gsnap_avx512-stage3.$(OBJEXT) \ + gsnap_avx512-splicestringpool.$(OBJEXT) \ + gsnap_avx512-splicetrie_build.$(OBJEXT) \ + gsnap_avx512-splicetrie.$(OBJEXT) \ + gsnap_avx512-splice.$(OBJEXT) gsnap_avx512-indel.$(OBJEXT) \ + gsnap_avx512-bitpack64-access.$(OBJEXT) \ + gsnap_avx512-bytecoding.$(OBJEXT) \ + gsnap_avx512-univdiag.$(OBJEXT) \ + gsnap_avx512-sedgesort.$(OBJEXT) \ + gsnap_avx512-sarray-read.$(OBJEXT) \ + gsnap_avx512-merge-heap.$(OBJEXT) \ + gsnap_avx512-stage1hr.$(OBJEXT) gsnap_avx512-request.$(OBJEXT) \ + gsnap_avx512-resulthr.$(OBJEXT) gsnap_avx512-output.$(OBJEXT) \ + gsnap_avx512-inbuffer.$(OBJEXT) \ + gsnap_avx512-samheader.$(OBJEXT) \ + gsnap_avx512-outbuffer.$(OBJEXT) \ + gsnap_avx512-datadir.$(OBJEXT) \ + gsnap_avx512-parserange.$(OBJEXT) \ + gsnap_avx512-getopt.$(OBJEXT) gsnap_avx512-getopt1.$(OBJEXT) \ + gsnap_avx512-gsnap.$(OBJEXT) +dist_gsnap_avx512_OBJECTS = $(am__objects_24) +gsnap_avx512_OBJECTS = $(dist_gsnap_avx512_OBJECTS) +gsnap_avx512_DEPENDENCIES = $(am__DEPENDENCIES_1) \ + $(am__DEPENDENCIES_1) $(am__DEPENDENCIES_1) +gsnap_avx512_LINK = $(LIBTOOL) $(AM_V_lt) --tag=CC $(AM_LIBTOOLFLAGS) \ + $(LIBTOOLFLAGS) --mode=link $(CCLD) $(gsnap_avx512_CFLAGS) \ + $(CFLAGS) $(gsnap_avx512_LDFLAGS) $(LDFLAGS) -o $@ +am__objects_25 = gsnap_nosimd-except.$(OBJEXT) \ gsnap_nosimd-assert.$(OBJEXT) gsnap_nosimd-mem.$(OBJEXT) \ gsnap_nosimd-intlist.$(OBJEXT) gsnap_nosimd-list.$(OBJEXT) \ gsnap_nosimd-littleendian.$(OBJEXT) \ @@ -1143,7 +1372,7 @@ gsnap_nosimd-genome_sites.$(OBJEXT) \ gsnap_nosimd-bitpack64-read.$(OBJEXT) \ gsnap_nosimd-bitpack64-readtwo.$(OBJEXT) \ - gsnap_nosimd-indexdb.$(OBJEXT) \ + gsnap_nosimd-merge.$(OBJEXT) gsnap_nosimd-indexdb.$(OBJEXT) \ gsnap_nosimd-indexdb_hr.$(OBJEXT) gsnap_nosimd-oligo.$(OBJEXT) \ gsnap_nosimd-chrom.$(OBJEXT) gsnap_nosimd-segmentpos.$(OBJEXT) \ gsnap_nosimd-chrnum.$(OBJEXT) gsnap_nosimd-maxent_hr.$(OBJEXT) \ @@ -1181,6 +1410,7 @@ gsnap_nosimd-univdiag.$(OBJEXT) \ gsnap_nosimd-sedgesort.$(OBJEXT) \ gsnap_nosimd-sarray-read.$(OBJEXT) \ + gsnap_nosimd-merge-heap.$(OBJEXT) \ gsnap_nosimd-stage1hr.$(OBJEXT) gsnap_nosimd-request.$(OBJEXT) \ gsnap_nosimd-resulthr.$(OBJEXT) gsnap_nosimd-output.$(OBJEXT) \ gsnap_nosimd-inbuffer.$(OBJEXT) \ @@ -1190,14 +1420,14 @@ gsnap_nosimd-parserange.$(OBJEXT) \ gsnap_nosimd-getopt.$(OBJEXT) gsnap_nosimd-getopt1.$(OBJEXT) \ gsnap_nosimd-gsnap.$(OBJEXT) -dist_gsnap_nosimd_OBJECTS = $(am__objects_22) +dist_gsnap_nosimd_OBJECTS = $(am__objects_25) gsnap_nosimd_OBJECTS = $(dist_gsnap_nosimd_OBJECTS) gsnap_nosimd_DEPENDENCIES = $(am__DEPENDENCIES_1) \ $(am__DEPENDENCIES_1) $(am__DEPENDENCIES_1) gsnap_nosimd_LINK = $(LIBTOOL) $(AM_V_lt) --tag=CC $(AM_LIBTOOLFLAGS) \ $(LIBTOOLFLAGS) --mode=link $(CCLD) $(gsnap_nosimd_CFLAGS) \ $(CFLAGS) $(gsnap_nosimd_LDFLAGS) $(LDFLAGS) -o $@ -am__objects_23 = gsnap_sse2-except.$(OBJEXT) \ +am__objects_26 = gsnap_sse2-except.$(OBJEXT) \ gsnap_sse2-assert.$(OBJEXT) gsnap_sse2-mem.$(OBJEXT) \ gsnap_sse2-intlist.$(OBJEXT) gsnap_sse2-list.$(OBJEXT) \ gsnap_sse2-littleendian.$(OBJEXT) \ @@ -1216,17 +1446,18 @@ gsnap_sse2-genome_sites.$(OBJEXT) \ gsnap_sse2-bitpack64-read.$(OBJEXT) \ gsnap_sse2-bitpack64-readtwo.$(OBJEXT) \ - gsnap_sse2-indexdb.$(OBJEXT) gsnap_sse2-indexdb_hr.$(OBJEXT) \ - gsnap_sse2-oligo.$(OBJEXT) gsnap_sse2-chrom.$(OBJEXT) \ - gsnap_sse2-segmentpos.$(OBJEXT) gsnap_sse2-chrnum.$(OBJEXT) \ - gsnap_sse2-maxent_hr.$(OBJEXT) gsnap_sse2-samprint.$(OBJEXT) \ - gsnap_sse2-mapq.$(OBJEXT) gsnap_sse2-shortread.$(OBJEXT) \ - gsnap_sse2-substring.$(OBJEXT) gsnap_sse2-junction.$(OBJEXT) \ - gsnap_sse2-stage3hr.$(OBJEXT) gsnap_sse2-spanningelt.$(OBJEXT) \ - gsnap_sse2-cmet.$(OBJEXT) gsnap_sse2-atoi.$(OBJEXT) \ - gsnap_sse2-maxent.$(OBJEXT) gsnap_sse2-pair.$(OBJEXT) \ - gsnap_sse2-pairpool.$(OBJEXT) gsnap_sse2-diag.$(OBJEXT) \ - gsnap_sse2-diagpool.$(OBJEXT) gsnap_sse2-orderstat.$(OBJEXT) \ + gsnap_sse2-merge.$(OBJEXT) gsnap_sse2-indexdb.$(OBJEXT) \ + gsnap_sse2-indexdb_hr.$(OBJEXT) gsnap_sse2-oligo.$(OBJEXT) \ + gsnap_sse2-chrom.$(OBJEXT) gsnap_sse2-segmentpos.$(OBJEXT) \ + gsnap_sse2-chrnum.$(OBJEXT) gsnap_sse2-maxent_hr.$(OBJEXT) \ + gsnap_sse2-samprint.$(OBJEXT) gsnap_sse2-mapq.$(OBJEXT) \ + gsnap_sse2-shortread.$(OBJEXT) gsnap_sse2-substring.$(OBJEXT) \ + gsnap_sse2-junction.$(OBJEXT) gsnap_sse2-stage3hr.$(OBJEXT) \ + gsnap_sse2-spanningelt.$(OBJEXT) gsnap_sse2-cmet.$(OBJEXT) \ + gsnap_sse2-atoi.$(OBJEXT) gsnap_sse2-maxent.$(OBJEXT) \ + gsnap_sse2-pair.$(OBJEXT) gsnap_sse2-pairpool.$(OBJEXT) \ + gsnap_sse2-diag.$(OBJEXT) gsnap_sse2-diagpool.$(OBJEXT) \ + gsnap_sse2-orderstat.$(OBJEXT) \ gsnap_sse2-oligoindex_hr.$(OBJEXT) \ gsnap_sse2-cellpool.$(OBJEXT) gsnap_sse2-stage2.$(OBJEXT) \ gsnap_sse2-intron.$(OBJEXT) gsnap_sse2-boyer-moore.$(OBJEXT) \ @@ -1245,21 +1476,22 @@ gsnap_sse2-bitpack64-access.$(OBJEXT) \ gsnap_sse2-bytecoding.$(OBJEXT) gsnap_sse2-univdiag.$(OBJEXT) \ gsnap_sse2-sedgesort.$(OBJEXT) \ - gsnap_sse2-sarray-read.$(OBJEXT) gsnap_sse2-stage1hr.$(OBJEXT) \ + gsnap_sse2-sarray-read.$(OBJEXT) \ + gsnap_sse2-merge-heap.$(OBJEXT) gsnap_sse2-stage1hr.$(OBJEXT) \ gsnap_sse2-request.$(OBJEXT) gsnap_sse2-resulthr.$(OBJEXT) \ gsnap_sse2-output.$(OBJEXT) gsnap_sse2-inbuffer.$(OBJEXT) \ gsnap_sse2-samheader.$(OBJEXT) gsnap_sse2-outbuffer.$(OBJEXT) \ gsnap_sse2-datadir.$(OBJEXT) gsnap_sse2-parserange.$(OBJEXT) \ gsnap_sse2-getopt.$(OBJEXT) gsnap_sse2-getopt1.$(OBJEXT) \ gsnap_sse2-gsnap.$(OBJEXT) -dist_gsnap_sse2_OBJECTS = $(am__objects_23) +dist_gsnap_sse2_OBJECTS = $(am__objects_26) gsnap_sse2_OBJECTS = $(dist_gsnap_sse2_OBJECTS) gsnap_sse2_DEPENDENCIES = $(am__DEPENDENCIES_1) $(am__DEPENDENCIES_1) \ $(am__DEPENDENCIES_1) gsnap_sse2_LINK = $(LIBTOOL) $(AM_V_lt) --tag=CC $(AM_LIBTOOLFLAGS) \ $(LIBTOOLFLAGS) --mode=link $(CCLD) $(gsnap_sse2_CFLAGS) \ $(CFLAGS) $(gsnap_sse2_LDFLAGS) $(LDFLAGS) -o $@ -am__objects_24 = gsnap_sse41-except.$(OBJEXT) \ +am__objects_27 = gsnap_sse41-except.$(OBJEXT) \ gsnap_sse41-assert.$(OBJEXT) gsnap_sse41-mem.$(OBJEXT) \ gsnap_sse41-intlist.$(OBJEXT) gsnap_sse41-list.$(OBJEXT) \ gsnap_sse41-littleendian.$(OBJEXT) \ @@ -1279,11 +1511,12 @@ gsnap_sse41-genome_sites.$(OBJEXT) \ gsnap_sse41-bitpack64-read.$(OBJEXT) \ gsnap_sse41-bitpack64-readtwo.$(OBJEXT) \ - gsnap_sse41-indexdb.$(OBJEXT) gsnap_sse41-indexdb_hr.$(OBJEXT) \ - gsnap_sse41-oligo.$(OBJEXT) gsnap_sse41-chrom.$(OBJEXT) \ - gsnap_sse41-segmentpos.$(OBJEXT) gsnap_sse41-chrnum.$(OBJEXT) \ - gsnap_sse41-maxent_hr.$(OBJEXT) gsnap_sse41-samprint.$(OBJEXT) \ - gsnap_sse41-mapq.$(OBJEXT) gsnap_sse41-shortread.$(OBJEXT) \ + gsnap_sse41-merge.$(OBJEXT) gsnap_sse41-indexdb.$(OBJEXT) \ + gsnap_sse41-indexdb_hr.$(OBJEXT) gsnap_sse41-oligo.$(OBJEXT) \ + gsnap_sse41-chrom.$(OBJEXT) gsnap_sse41-segmentpos.$(OBJEXT) \ + gsnap_sse41-chrnum.$(OBJEXT) gsnap_sse41-maxent_hr.$(OBJEXT) \ + gsnap_sse41-samprint.$(OBJEXT) gsnap_sse41-mapq.$(OBJEXT) \ + gsnap_sse41-shortread.$(OBJEXT) \ gsnap_sse41-substring.$(OBJEXT) gsnap_sse41-junction.$(OBJEXT) \ gsnap_sse41-stage3hr.$(OBJEXT) \ gsnap_sse41-spanningelt.$(OBJEXT) gsnap_sse41-cmet.$(OBJEXT) \ @@ -1312,20 +1545,21 @@ gsnap_sse41-bytecoding.$(OBJEXT) \ gsnap_sse41-univdiag.$(OBJEXT) gsnap_sse41-sedgesort.$(OBJEXT) \ gsnap_sse41-sarray-read.$(OBJEXT) \ + gsnap_sse41-merge-heap.$(OBJEXT) \ gsnap_sse41-stage1hr.$(OBJEXT) gsnap_sse41-request.$(OBJEXT) \ gsnap_sse41-resulthr.$(OBJEXT) gsnap_sse41-output.$(OBJEXT) \ gsnap_sse41-inbuffer.$(OBJEXT) gsnap_sse41-samheader.$(OBJEXT) \ gsnap_sse41-outbuffer.$(OBJEXT) gsnap_sse41-datadir.$(OBJEXT) \ gsnap_sse41-parserange.$(OBJEXT) gsnap_sse41-getopt.$(OBJEXT) \ gsnap_sse41-getopt1.$(OBJEXT) gsnap_sse41-gsnap.$(OBJEXT) -dist_gsnap_sse41_OBJECTS = $(am__objects_24) +dist_gsnap_sse41_OBJECTS = $(am__objects_27) gsnap_sse41_OBJECTS = $(dist_gsnap_sse41_OBJECTS) gsnap_sse41_DEPENDENCIES = $(am__DEPENDENCIES_1) $(am__DEPENDENCIES_1) \ $(am__DEPENDENCIES_1) gsnap_sse41_LINK = $(LIBTOOL) $(AM_V_lt) --tag=CC $(AM_LIBTOOLFLAGS) \ $(LIBTOOLFLAGS) --mode=link $(CCLD) $(gsnap_sse41_CFLAGS) \ $(CFLAGS) $(gsnap_sse41_LDFLAGS) $(LDFLAGS) -o $@ -am__objects_25 = gsnap_sse42-except.$(OBJEXT) \ +am__objects_28 = gsnap_sse42-except.$(OBJEXT) \ gsnap_sse42-assert.$(OBJEXT) gsnap_sse42-mem.$(OBJEXT) \ gsnap_sse42-intlist.$(OBJEXT) gsnap_sse42-list.$(OBJEXT) \ gsnap_sse42-littleendian.$(OBJEXT) \ @@ -1345,11 +1579,12 @@ gsnap_sse42-genome_sites.$(OBJEXT) \ gsnap_sse42-bitpack64-read.$(OBJEXT) \ gsnap_sse42-bitpack64-readtwo.$(OBJEXT) \ - gsnap_sse42-indexdb.$(OBJEXT) gsnap_sse42-indexdb_hr.$(OBJEXT) \ - gsnap_sse42-oligo.$(OBJEXT) gsnap_sse42-chrom.$(OBJEXT) \ - gsnap_sse42-segmentpos.$(OBJEXT) gsnap_sse42-chrnum.$(OBJEXT) \ - gsnap_sse42-maxent_hr.$(OBJEXT) gsnap_sse42-samprint.$(OBJEXT) \ - gsnap_sse42-mapq.$(OBJEXT) gsnap_sse42-shortread.$(OBJEXT) \ + gsnap_sse42-merge.$(OBJEXT) gsnap_sse42-indexdb.$(OBJEXT) \ + gsnap_sse42-indexdb_hr.$(OBJEXT) gsnap_sse42-oligo.$(OBJEXT) \ + gsnap_sse42-chrom.$(OBJEXT) gsnap_sse42-segmentpos.$(OBJEXT) \ + gsnap_sse42-chrnum.$(OBJEXT) gsnap_sse42-maxent_hr.$(OBJEXT) \ + gsnap_sse42-samprint.$(OBJEXT) gsnap_sse42-mapq.$(OBJEXT) \ + gsnap_sse42-shortread.$(OBJEXT) \ gsnap_sse42-substring.$(OBJEXT) gsnap_sse42-junction.$(OBJEXT) \ gsnap_sse42-stage3hr.$(OBJEXT) \ gsnap_sse42-spanningelt.$(OBJEXT) gsnap_sse42-cmet.$(OBJEXT) \ @@ -1378,20 +1613,21 @@ gsnap_sse42-bytecoding.$(OBJEXT) \ gsnap_sse42-univdiag.$(OBJEXT) gsnap_sse42-sedgesort.$(OBJEXT) \ gsnap_sse42-sarray-read.$(OBJEXT) \ + gsnap_sse42-merge-heap.$(OBJEXT) \ gsnap_sse42-stage1hr.$(OBJEXT) gsnap_sse42-request.$(OBJEXT) \ gsnap_sse42-resulthr.$(OBJEXT) gsnap_sse42-output.$(OBJEXT) \ gsnap_sse42-inbuffer.$(OBJEXT) gsnap_sse42-samheader.$(OBJEXT) \ gsnap_sse42-outbuffer.$(OBJEXT) gsnap_sse42-datadir.$(OBJEXT) \ gsnap_sse42-parserange.$(OBJEXT) gsnap_sse42-getopt.$(OBJEXT) \ gsnap_sse42-getopt1.$(OBJEXT) gsnap_sse42-gsnap.$(OBJEXT) -dist_gsnap_sse42_OBJECTS = $(am__objects_25) +dist_gsnap_sse42_OBJECTS = $(am__objects_28) gsnap_sse42_OBJECTS = $(dist_gsnap_sse42_OBJECTS) gsnap_sse42_DEPENDENCIES = $(am__DEPENDENCIES_1) $(am__DEPENDENCIES_1) \ $(am__DEPENDENCIES_1) gsnap_sse42_LINK = $(LIBTOOL) $(AM_V_lt) --tag=CC $(AM_LIBTOOLFLAGS) \ $(LIBTOOLFLAGS) --mode=link $(CCLD) $(gsnap_sse42_CFLAGS) \ $(CFLAGS) $(gsnap_sse42_LDFLAGS) $(LDFLAGS) -o $@ -am__objects_26 = gsnap_ssse3-except.$(OBJEXT) \ +am__objects_29 = gsnap_ssse3-except.$(OBJEXT) \ gsnap_ssse3-assert.$(OBJEXT) gsnap_ssse3-mem.$(OBJEXT) \ gsnap_ssse3-intlist.$(OBJEXT) gsnap_ssse3-list.$(OBJEXT) \ gsnap_ssse3-littleendian.$(OBJEXT) \ @@ -1411,11 +1647,12 @@ gsnap_ssse3-genome_sites.$(OBJEXT) \ gsnap_ssse3-bitpack64-read.$(OBJEXT) \ gsnap_ssse3-bitpack64-readtwo.$(OBJEXT) \ - gsnap_ssse3-indexdb.$(OBJEXT) gsnap_ssse3-indexdb_hr.$(OBJEXT) \ - gsnap_ssse3-oligo.$(OBJEXT) gsnap_ssse3-chrom.$(OBJEXT) \ - gsnap_ssse3-segmentpos.$(OBJEXT) gsnap_ssse3-chrnum.$(OBJEXT) \ - gsnap_ssse3-maxent_hr.$(OBJEXT) gsnap_ssse3-samprint.$(OBJEXT) \ - gsnap_ssse3-mapq.$(OBJEXT) gsnap_ssse3-shortread.$(OBJEXT) \ + gsnap_ssse3-merge.$(OBJEXT) gsnap_ssse3-indexdb.$(OBJEXT) \ + gsnap_ssse3-indexdb_hr.$(OBJEXT) gsnap_ssse3-oligo.$(OBJEXT) \ + gsnap_ssse3-chrom.$(OBJEXT) gsnap_ssse3-segmentpos.$(OBJEXT) \ + gsnap_ssse3-chrnum.$(OBJEXT) gsnap_ssse3-maxent_hr.$(OBJEXT) \ + gsnap_ssse3-samprint.$(OBJEXT) gsnap_ssse3-mapq.$(OBJEXT) \ + gsnap_ssse3-shortread.$(OBJEXT) \ gsnap_ssse3-substring.$(OBJEXT) gsnap_ssse3-junction.$(OBJEXT) \ gsnap_ssse3-stage3hr.$(OBJEXT) \ gsnap_ssse3-spanningelt.$(OBJEXT) gsnap_ssse3-cmet.$(OBJEXT) \ @@ -1444,27 +1681,28 @@ gsnap_ssse3-bytecoding.$(OBJEXT) \ gsnap_ssse3-univdiag.$(OBJEXT) gsnap_ssse3-sedgesort.$(OBJEXT) \ gsnap_ssse3-sarray-read.$(OBJEXT) \ + gsnap_ssse3-merge-heap.$(OBJEXT) \ gsnap_ssse3-stage1hr.$(OBJEXT) gsnap_ssse3-request.$(OBJEXT) \ gsnap_ssse3-resulthr.$(OBJEXT) gsnap_ssse3-output.$(OBJEXT) \ gsnap_ssse3-inbuffer.$(OBJEXT) gsnap_ssse3-samheader.$(OBJEXT) \ gsnap_ssse3-outbuffer.$(OBJEXT) gsnap_ssse3-datadir.$(OBJEXT) \ gsnap_ssse3-parserange.$(OBJEXT) gsnap_ssse3-getopt.$(OBJEXT) \ gsnap_ssse3-getopt1.$(OBJEXT) gsnap_ssse3-gsnap.$(OBJEXT) -dist_gsnap_ssse3_OBJECTS = $(am__objects_26) +dist_gsnap_ssse3_OBJECTS = $(am__objects_29) gsnap_ssse3_OBJECTS = $(dist_gsnap_ssse3_OBJECTS) gsnap_ssse3_DEPENDENCIES = $(am__DEPENDENCIES_1) $(am__DEPENDENCIES_1) \ $(am__DEPENDENCIES_1) gsnap_ssse3_LINK = $(LIBTOOL) $(AM_V_lt) --tag=CC $(AM_LIBTOOLFLAGS) \ $(LIBTOOLFLAGS) --mode=link $(CCLD) $(gsnap_ssse3_CFLAGS) \ $(CFLAGS) $(gsnap_ssse3_LDFLAGS) $(LDFLAGS) -o $@ -am__objects_27 = gsnapl-cpuid.$(OBJEXT) gsnapl-gsnapl_select.$(OBJEXT) -dist_gsnapl_OBJECTS = $(am__objects_27) +am__objects_30 = gsnapl-cpuid.$(OBJEXT) gsnapl-gsnapl_select.$(OBJEXT) +dist_gsnapl_OBJECTS = $(am__objects_30) gsnapl_OBJECTS = $(dist_gsnapl_OBJECTS) gsnapl_DEPENDENCIES = gsnapl_LINK = $(LIBTOOL) $(AM_V_lt) --tag=CC $(AM_LIBTOOLFLAGS) \ $(LIBTOOLFLAGS) --mode=link $(CCLD) $(gsnapl_CFLAGS) $(CFLAGS) \ $(gsnapl_LDFLAGS) $(LDFLAGS) -o $@ -am__objects_28 = gsnapl_avx2-except.$(OBJEXT) \ +am__objects_31 = gsnapl_avx2-except.$(OBJEXT) \ gsnapl_avx2-assert.$(OBJEXT) gsnapl_avx2-mem.$(OBJEXT) \ gsnapl_avx2-intlist.$(OBJEXT) gsnapl_avx2-list.$(OBJEXT) \ gsnapl_avx2-littleendian.$(OBJEXT) \ @@ -1485,11 +1723,12 @@ gsnapl_avx2-genome_sites.$(OBJEXT) \ gsnapl_avx2-bitpack64-read.$(OBJEXT) \ gsnapl_avx2-bitpack64-readtwo.$(OBJEXT) \ - gsnapl_avx2-indexdb.$(OBJEXT) gsnapl_avx2-indexdb_hr.$(OBJEXT) \ - gsnapl_avx2-oligo.$(OBJEXT) gsnapl_avx2-chrom.$(OBJEXT) \ - gsnapl_avx2-segmentpos.$(OBJEXT) gsnapl_avx2-chrnum.$(OBJEXT) \ - gsnapl_avx2-maxent_hr.$(OBJEXT) gsnapl_avx2-samprint.$(OBJEXT) \ - gsnapl_avx2-mapq.$(OBJEXT) gsnapl_avx2-shortread.$(OBJEXT) \ + gsnapl_avx2-merge.$(OBJEXT) gsnapl_avx2-indexdb.$(OBJEXT) \ + gsnapl_avx2-indexdb_hr.$(OBJEXT) gsnapl_avx2-oligo.$(OBJEXT) \ + gsnapl_avx2-chrom.$(OBJEXT) gsnapl_avx2-segmentpos.$(OBJEXT) \ + gsnapl_avx2-chrnum.$(OBJEXT) gsnapl_avx2-maxent_hr.$(OBJEXT) \ + gsnapl_avx2-samprint.$(OBJEXT) gsnapl_avx2-mapq.$(OBJEXT) \ + gsnapl_avx2-shortread.$(OBJEXT) \ gsnapl_avx2-substring.$(OBJEXT) gsnapl_avx2-junction.$(OBJEXT) \ gsnapl_avx2-stage3hr.$(OBJEXT) \ gsnapl_avx2-spanningelt.$(OBJEXT) gsnapl_avx2-cmet.$(OBJEXT) \ @@ -1515,20 +1754,102 @@ gsnapl_avx2-splicetrie.$(OBJEXT) gsnapl_avx2-splice.$(OBJEXT) \ gsnapl_avx2-indel.$(OBJEXT) \ gsnapl_avx2-bitpack64-access.$(OBJEXT) \ + gsnapl_avx2-merge-heap.$(OBJEXT) \ gsnapl_avx2-stage1hr.$(OBJEXT) gsnapl_avx2-request.$(OBJEXT) \ gsnapl_avx2-resulthr.$(OBJEXT) gsnapl_avx2-output.$(OBJEXT) \ gsnapl_avx2-inbuffer.$(OBJEXT) gsnapl_avx2-samheader.$(OBJEXT) \ gsnapl_avx2-outbuffer.$(OBJEXT) gsnapl_avx2-datadir.$(OBJEXT) \ gsnapl_avx2-parserange.$(OBJEXT) gsnapl_avx2-getopt.$(OBJEXT) \ gsnapl_avx2-getopt1.$(OBJEXT) gsnapl_avx2-gsnap.$(OBJEXT) -dist_gsnapl_avx2_OBJECTS = $(am__objects_28) +dist_gsnapl_avx2_OBJECTS = $(am__objects_31) gsnapl_avx2_OBJECTS = $(dist_gsnapl_avx2_OBJECTS) gsnapl_avx2_DEPENDENCIES = $(am__DEPENDENCIES_1) $(am__DEPENDENCIES_1) \ $(am__DEPENDENCIES_1) gsnapl_avx2_LINK = $(LIBTOOL) $(AM_V_lt) --tag=CC $(AM_LIBTOOLFLAGS) \ $(LIBTOOLFLAGS) --mode=link $(CCLD) $(gsnapl_avx2_CFLAGS) \ $(CFLAGS) $(gsnapl_avx2_LDFLAGS) $(LDFLAGS) -o $@ -am__objects_29 = gsnapl_nosimd-except.$(OBJEXT) \ +am__objects_32 = gsnapl_avx512-except.$(OBJEXT) \ + gsnapl_avx512-assert.$(OBJEXT) gsnapl_avx512-mem.$(OBJEXT) \ + gsnapl_avx512-intlist.$(OBJEXT) gsnapl_avx512-list.$(OBJEXT) \ + gsnapl_avx512-littleendian.$(OBJEXT) \ + gsnapl_avx512-bigendian.$(OBJEXT) \ + gsnapl_avx512-univinterval.$(OBJEXT) \ + gsnapl_avx512-interval.$(OBJEXT) \ + gsnapl_avx512-uintlist.$(OBJEXT) \ + gsnapl_avx512-uint8list.$(OBJEXT) \ + gsnapl_avx512-stopwatch.$(OBJEXT) \ + gsnapl_avx512-semaphore.$(OBJEXT) \ + gsnapl_avx512-access.$(OBJEXT) \ + gsnapl_avx512-filestring.$(OBJEXT) \ + gsnapl_avx512-iit-read-univ.$(OBJEXT) \ + gsnapl_avx512-iit-read.$(OBJEXT) gsnapl_avx512-md5.$(OBJEXT) \ + gsnapl_avx512-bzip2.$(OBJEXT) gsnapl_avx512-sequence.$(OBJEXT) \ + gsnapl_avx512-reader.$(OBJEXT) \ + gsnapl_avx512-genomicpos.$(OBJEXT) \ + gsnapl_avx512-compress.$(OBJEXT) \ + gsnapl_avx512-genome.$(OBJEXT) \ + gsnapl_avx512-popcount.$(OBJEXT) \ + gsnapl_avx512-genome128_hr.$(OBJEXT) \ + gsnapl_avx512-genome_sites.$(OBJEXT) \ + gsnapl_avx512-bitpack64-read.$(OBJEXT) \ + gsnapl_avx512-bitpack64-readtwo.$(OBJEXT) \ + gsnapl_avx512-merge.$(OBJEXT) gsnapl_avx512-indexdb.$(OBJEXT) \ + gsnapl_avx512-indexdb_hr.$(OBJEXT) \ + gsnapl_avx512-oligo.$(OBJEXT) gsnapl_avx512-chrom.$(OBJEXT) \ + gsnapl_avx512-segmentpos.$(OBJEXT) \ + gsnapl_avx512-chrnum.$(OBJEXT) \ + gsnapl_avx512-maxent_hr.$(OBJEXT) \ + gsnapl_avx512-samprint.$(OBJEXT) gsnapl_avx512-mapq.$(OBJEXT) \ + gsnapl_avx512-shortread.$(OBJEXT) \ + gsnapl_avx512-substring.$(OBJEXT) \ + gsnapl_avx512-junction.$(OBJEXT) \ + gsnapl_avx512-stage3hr.$(OBJEXT) \ + gsnapl_avx512-spanningelt.$(OBJEXT) \ + gsnapl_avx512-cmet.$(OBJEXT) gsnapl_avx512-atoi.$(OBJEXT) \ + gsnapl_avx512-maxent.$(OBJEXT) gsnapl_avx512-pair.$(OBJEXT) \ + gsnapl_avx512-pairpool.$(OBJEXT) gsnapl_avx512-diag.$(OBJEXT) \ + gsnapl_avx512-diagpool.$(OBJEXT) \ + gsnapl_avx512-orderstat.$(OBJEXT) \ + gsnapl_avx512-oligoindex_hr.$(OBJEXT) \ + gsnapl_avx512-cellpool.$(OBJEXT) \ + gsnapl_avx512-stage2.$(OBJEXT) gsnapl_avx512-intron.$(OBJEXT) \ + gsnapl_avx512-boyer-moore.$(OBJEXT) \ + gsnapl_avx512-changepoint.$(OBJEXT) \ + gsnapl_avx512-pbinom.$(OBJEXT) gsnapl_avx512-dynprog.$(OBJEXT) \ + gsnapl_avx512-dynprog_simd.$(OBJEXT) \ + gsnapl_avx512-dynprog_single.$(OBJEXT) \ + gsnapl_avx512-dynprog_genome.$(OBJEXT) \ + gsnapl_avx512-dynprog_cdna.$(OBJEXT) \ + gsnapl_avx512-dynprog_end.$(OBJEXT) \ + gsnapl_avx512-gbuffer.$(OBJEXT) \ + gsnapl_avx512-doublelist.$(OBJEXT) \ + gsnapl_avx512-smooth.$(OBJEXT) gsnapl_avx512-chimera.$(OBJEXT) \ + gsnapl_avx512-stage3.$(OBJEXT) \ + gsnapl_avx512-splicestringpool.$(OBJEXT) \ + gsnapl_avx512-splicetrie_build.$(OBJEXT) \ + gsnapl_avx512-splicetrie.$(OBJEXT) \ + gsnapl_avx512-splice.$(OBJEXT) gsnapl_avx512-indel.$(OBJEXT) \ + gsnapl_avx512-bitpack64-access.$(OBJEXT) \ + gsnapl_avx512-merge-heap.$(OBJEXT) \ + gsnapl_avx512-stage1hr.$(OBJEXT) \ + gsnapl_avx512-request.$(OBJEXT) \ + gsnapl_avx512-resulthr.$(OBJEXT) \ + gsnapl_avx512-output.$(OBJEXT) \ + gsnapl_avx512-inbuffer.$(OBJEXT) \ + gsnapl_avx512-samheader.$(OBJEXT) \ + gsnapl_avx512-outbuffer.$(OBJEXT) \ + gsnapl_avx512-datadir.$(OBJEXT) \ + gsnapl_avx512-parserange.$(OBJEXT) \ + gsnapl_avx512-getopt.$(OBJEXT) gsnapl_avx512-getopt1.$(OBJEXT) \ + gsnapl_avx512-gsnap.$(OBJEXT) +dist_gsnapl_avx512_OBJECTS = $(am__objects_32) +gsnapl_avx512_OBJECTS = $(dist_gsnapl_avx512_OBJECTS) +gsnapl_avx512_DEPENDENCIES = $(am__DEPENDENCIES_1) \ + $(am__DEPENDENCIES_1) $(am__DEPENDENCIES_1) +gsnapl_avx512_LINK = $(LIBTOOL) $(AM_V_lt) --tag=CC $(AM_LIBTOOLFLAGS) \ + $(LIBTOOLFLAGS) --mode=link $(CCLD) $(gsnapl_avx512_CFLAGS) \ + $(CFLAGS) $(gsnapl_avx512_LDFLAGS) $(LDFLAGS) -o $@ +am__objects_33 = gsnapl_nosimd-except.$(OBJEXT) \ gsnapl_nosimd-assert.$(OBJEXT) gsnapl_nosimd-mem.$(OBJEXT) \ gsnapl_nosimd-intlist.$(OBJEXT) gsnapl_nosimd-list.$(OBJEXT) \ gsnapl_nosimd-littleendian.$(OBJEXT) \ @@ -1553,7 +1874,7 @@ gsnapl_nosimd-genome_sites.$(OBJEXT) \ gsnapl_nosimd-bitpack64-read.$(OBJEXT) \ gsnapl_nosimd-bitpack64-readtwo.$(OBJEXT) \ - gsnapl_nosimd-indexdb.$(OBJEXT) \ + gsnapl_nosimd-merge.$(OBJEXT) gsnapl_nosimd-indexdb.$(OBJEXT) \ gsnapl_nosimd-indexdb_hr.$(OBJEXT) \ gsnapl_nosimd-oligo.$(OBJEXT) gsnapl_nosimd-chrom.$(OBJEXT) \ gsnapl_nosimd-segmentpos.$(OBJEXT) \ @@ -1590,6 +1911,7 @@ gsnapl_nosimd-splicetrie.$(OBJEXT) \ gsnapl_nosimd-splice.$(OBJEXT) gsnapl_nosimd-indel.$(OBJEXT) \ gsnapl_nosimd-bitpack64-access.$(OBJEXT) \ + gsnapl_nosimd-merge-heap.$(OBJEXT) \ gsnapl_nosimd-stage1hr.$(OBJEXT) \ gsnapl_nosimd-request.$(OBJEXT) \ gsnapl_nosimd-resulthr.$(OBJEXT) \ @@ -1601,14 +1923,14 @@ gsnapl_nosimd-parserange.$(OBJEXT) \ gsnapl_nosimd-getopt.$(OBJEXT) gsnapl_nosimd-getopt1.$(OBJEXT) \ gsnapl_nosimd-gsnap.$(OBJEXT) -dist_gsnapl_nosimd_OBJECTS = $(am__objects_29) +dist_gsnapl_nosimd_OBJECTS = $(am__objects_33) gsnapl_nosimd_OBJECTS = $(dist_gsnapl_nosimd_OBJECTS) gsnapl_nosimd_DEPENDENCIES = $(am__DEPENDENCIES_1) \ $(am__DEPENDENCIES_1) $(am__DEPENDENCIES_1) gsnapl_nosimd_LINK = $(LIBTOOL) $(AM_V_lt) --tag=CC $(AM_LIBTOOLFLAGS) \ $(LIBTOOLFLAGS) --mode=link $(CCLD) $(gsnapl_nosimd_CFLAGS) \ $(CFLAGS) $(gsnapl_nosimd_LDFLAGS) $(LDFLAGS) -o $@ -am__objects_30 = gsnapl_sse2-except.$(OBJEXT) \ +am__objects_34 = gsnapl_sse2-except.$(OBJEXT) \ gsnapl_sse2-assert.$(OBJEXT) gsnapl_sse2-mem.$(OBJEXT) \ gsnapl_sse2-intlist.$(OBJEXT) gsnapl_sse2-list.$(OBJEXT) \ gsnapl_sse2-littleendian.$(OBJEXT) \ @@ -1629,11 +1951,12 @@ gsnapl_sse2-genome_sites.$(OBJEXT) \ gsnapl_sse2-bitpack64-read.$(OBJEXT) \ gsnapl_sse2-bitpack64-readtwo.$(OBJEXT) \ - gsnapl_sse2-indexdb.$(OBJEXT) gsnapl_sse2-indexdb_hr.$(OBJEXT) \ - gsnapl_sse2-oligo.$(OBJEXT) gsnapl_sse2-chrom.$(OBJEXT) \ - gsnapl_sse2-segmentpos.$(OBJEXT) gsnapl_sse2-chrnum.$(OBJEXT) \ - gsnapl_sse2-maxent_hr.$(OBJEXT) gsnapl_sse2-samprint.$(OBJEXT) \ - gsnapl_sse2-mapq.$(OBJEXT) gsnapl_sse2-shortread.$(OBJEXT) \ + gsnapl_sse2-merge.$(OBJEXT) gsnapl_sse2-indexdb.$(OBJEXT) \ + gsnapl_sse2-indexdb_hr.$(OBJEXT) gsnapl_sse2-oligo.$(OBJEXT) \ + gsnapl_sse2-chrom.$(OBJEXT) gsnapl_sse2-segmentpos.$(OBJEXT) \ + gsnapl_sse2-chrnum.$(OBJEXT) gsnapl_sse2-maxent_hr.$(OBJEXT) \ + gsnapl_sse2-samprint.$(OBJEXT) gsnapl_sse2-mapq.$(OBJEXT) \ + gsnapl_sse2-shortread.$(OBJEXT) \ gsnapl_sse2-substring.$(OBJEXT) gsnapl_sse2-junction.$(OBJEXT) \ gsnapl_sse2-stage3hr.$(OBJEXT) \ gsnapl_sse2-spanningelt.$(OBJEXT) gsnapl_sse2-cmet.$(OBJEXT) \ @@ -1659,20 +1982,21 @@ gsnapl_sse2-splicetrie.$(OBJEXT) gsnapl_sse2-splice.$(OBJEXT) \ gsnapl_sse2-indel.$(OBJEXT) \ gsnapl_sse2-bitpack64-access.$(OBJEXT) \ + gsnapl_sse2-merge-heap.$(OBJEXT) \ gsnapl_sse2-stage1hr.$(OBJEXT) gsnapl_sse2-request.$(OBJEXT) \ gsnapl_sse2-resulthr.$(OBJEXT) gsnapl_sse2-output.$(OBJEXT) \ gsnapl_sse2-inbuffer.$(OBJEXT) gsnapl_sse2-samheader.$(OBJEXT) \ gsnapl_sse2-outbuffer.$(OBJEXT) gsnapl_sse2-datadir.$(OBJEXT) \ gsnapl_sse2-parserange.$(OBJEXT) gsnapl_sse2-getopt.$(OBJEXT) \ gsnapl_sse2-getopt1.$(OBJEXT) gsnapl_sse2-gsnap.$(OBJEXT) -dist_gsnapl_sse2_OBJECTS = $(am__objects_30) +dist_gsnapl_sse2_OBJECTS = $(am__objects_34) gsnapl_sse2_OBJECTS = $(dist_gsnapl_sse2_OBJECTS) gsnapl_sse2_DEPENDENCIES = $(am__DEPENDENCIES_1) $(am__DEPENDENCIES_1) \ $(am__DEPENDENCIES_1) gsnapl_sse2_LINK = $(LIBTOOL) $(AM_V_lt) --tag=CC $(AM_LIBTOOLFLAGS) \ $(LIBTOOLFLAGS) --mode=link $(CCLD) $(gsnapl_sse2_CFLAGS) \ $(CFLAGS) $(gsnapl_sse2_LDFLAGS) $(LDFLAGS) -o $@ -am__objects_31 = gsnapl_sse41-except.$(OBJEXT) \ +am__objects_35 = gsnapl_sse41-except.$(OBJEXT) \ gsnapl_sse41-assert.$(OBJEXT) gsnapl_sse41-mem.$(OBJEXT) \ gsnapl_sse41-intlist.$(OBJEXT) gsnapl_sse41-list.$(OBJEXT) \ gsnapl_sse41-littleendian.$(OBJEXT) \ @@ -1695,7 +2019,7 @@ gsnapl_sse41-genome_sites.$(OBJEXT) \ gsnapl_sse41-bitpack64-read.$(OBJEXT) \ gsnapl_sse41-bitpack64-readtwo.$(OBJEXT) \ - gsnapl_sse41-indexdb.$(OBJEXT) \ + gsnapl_sse41-merge.$(OBJEXT) gsnapl_sse41-indexdb.$(OBJEXT) \ gsnapl_sse41-indexdb_hr.$(OBJEXT) gsnapl_sse41-oligo.$(OBJEXT) \ gsnapl_sse41-chrom.$(OBJEXT) gsnapl_sse41-segmentpos.$(OBJEXT) \ gsnapl_sse41-chrnum.$(OBJEXT) gsnapl_sse41-maxent_hr.$(OBJEXT) \ @@ -1729,6 +2053,7 @@ gsnapl_sse41-splicetrie.$(OBJEXT) \ gsnapl_sse41-splice.$(OBJEXT) gsnapl_sse41-indel.$(OBJEXT) \ gsnapl_sse41-bitpack64-access.$(OBJEXT) \ + gsnapl_sse41-merge-heap.$(OBJEXT) \ gsnapl_sse41-stage1hr.$(OBJEXT) gsnapl_sse41-request.$(OBJEXT) \ gsnapl_sse41-resulthr.$(OBJEXT) gsnapl_sse41-output.$(OBJEXT) \ gsnapl_sse41-inbuffer.$(OBJEXT) \ @@ -1738,14 +2063,14 @@ gsnapl_sse41-parserange.$(OBJEXT) \ gsnapl_sse41-getopt.$(OBJEXT) gsnapl_sse41-getopt1.$(OBJEXT) \ gsnapl_sse41-gsnap.$(OBJEXT) -dist_gsnapl_sse41_OBJECTS = $(am__objects_31) +dist_gsnapl_sse41_OBJECTS = $(am__objects_35) gsnapl_sse41_OBJECTS = $(dist_gsnapl_sse41_OBJECTS) gsnapl_sse41_DEPENDENCIES = $(am__DEPENDENCIES_1) \ $(am__DEPENDENCIES_1) $(am__DEPENDENCIES_1) gsnapl_sse41_LINK = $(LIBTOOL) $(AM_V_lt) --tag=CC $(AM_LIBTOOLFLAGS) \ $(LIBTOOLFLAGS) --mode=link $(CCLD) $(gsnapl_sse41_CFLAGS) \ $(CFLAGS) $(gsnapl_sse41_LDFLAGS) $(LDFLAGS) -o $@ -am__objects_32 = gsnapl_sse42-except.$(OBJEXT) \ +am__objects_36 = gsnapl_sse42-except.$(OBJEXT) \ gsnapl_sse42-assert.$(OBJEXT) gsnapl_sse42-mem.$(OBJEXT) \ gsnapl_sse42-intlist.$(OBJEXT) gsnapl_sse42-list.$(OBJEXT) \ gsnapl_sse42-littleendian.$(OBJEXT) \ @@ -1768,7 +2093,7 @@ gsnapl_sse42-genome_sites.$(OBJEXT) \ gsnapl_sse42-bitpack64-read.$(OBJEXT) \ gsnapl_sse42-bitpack64-readtwo.$(OBJEXT) \ - gsnapl_sse42-indexdb.$(OBJEXT) \ + gsnapl_sse42-merge.$(OBJEXT) gsnapl_sse42-indexdb.$(OBJEXT) \ gsnapl_sse42-indexdb_hr.$(OBJEXT) gsnapl_sse42-oligo.$(OBJEXT) \ gsnapl_sse42-chrom.$(OBJEXT) gsnapl_sse42-segmentpos.$(OBJEXT) \ gsnapl_sse42-chrnum.$(OBJEXT) gsnapl_sse42-maxent_hr.$(OBJEXT) \ @@ -1802,6 +2127,7 @@ gsnapl_sse42-splicetrie.$(OBJEXT) \ gsnapl_sse42-splice.$(OBJEXT) gsnapl_sse42-indel.$(OBJEXT) \ gsnapl_sse42-bitpack64-access.$(OBJEXT) \ + gsnapl_sse42-merge-heap.$(OBJEXT) \ gsnapl_sse42-stage1hr.$(OBJEXT) gsnapl_sse42-request.$(OBJEXT) \ gsnapl_sse42-resulthr.$(OBJEXT) gsnapl_sse42-output.$(OBJEXT) \ gsnapl_sse42-inbuffer.$(OBJEXT) \ @@ -1811,14 +2137,14 @@ gsnapl_sse42-parserange.$(OBJEXT) \ gsnapl_sse42-getopt.$(OBJEXT) gsnapl_sse42-getopt1.$(OBJEXT) \ gsnapl_sse42-gsnap.$(OBJEXT) -dist_gsnapl_sse42_OBJECTS = $(am__objects_32) +dist_gsnapl_sse42_OBJECTS = $(am__objects_36) gsnapl_sse42_OBJECTS = $(dist_gsnapl_sse42_OBJECTS) gsnapl_sse42_DEPENDENCIES = $(am__DEPENDENCIES_1) \ $(am__DEPENDENCIES_1) $(am__DEPENDENCIES_1) gsnapl_sse42_LINK = $(LIBTOOL) $(AM_V_lt) --tag=CC $(AM_LIBTOOLFLAGS) \ $(LIBTOOLFLAGS) --mode=link $(CCLD) $(gsnapl_sse42_CFLAGS) \ $(CFLAGS) $(gsnapl_sse42_LDFLAGS) $(LDFLAGS) -o $@ -am__objects_33 = gsnapl_ssse3-except.$(OBJEXT) \ +am__objects_37 = gsnapl_ssse3-except.$(OBJEXT) \ gsnapl_ssse3-assert.$(OBJEXT) gsnapl_ssse3-mem.$(OBJEXT) \ gsnapl_ssse3-intlist.$(OBJEXT) gsnapl_ssse3-list.$(OBJEXT) \ gsnapl_ssse3-littleendian.$(OBJEXT) \ @@ -1841,7 +2167,7 @@ gsnapl_ssse3-genome_sites.$(OBJEXT) \ gsnapl_ssse3-bitpack64-read.$(OBJEXT) \ gsnapl_ssse3-bitpack64-readtwo.$(OBJEXT) \ - gsnapl_ssse3-indexdb.$(OBJEXT) \ + gsnapl_ssse3-merge.$(OBJEXT) gsnapl_ssse3-indexdb.$(OBJEXT) \ gsnapl_ssse3-indexdb_hr.$(OBJEXT) gsnapl_ssse3-oligo.$(OBJEXT) \ gsnapl_ssse3-chrom.$(OBJEXT) gsnapl_ssse3-segmentpos.$(OBJEXT) \ gsnapl_ssse3-chrnum.$(OBJEXT) gsnapl_ssse3-maxent_hr.$(OBJEXT) \ @@ -1875,6 +2201,7 @@ gsnapl_ssse3-splicetrie.$(OBJEXT) \ gsnapl_ssse3-splice.$(OBJEXT) gsnapl_ssse3-indel.$(OBJEXT) \ gsnapl_ssse3-bitpack64-access.$(OBJEXT) \ + gsnapl_ssse3-merge-heap.$(OBJEXT) \ gsnapl_ssse3-stage1hr.$(OBJEXT) gsnapl_ssse3-request.$(OBJEXT) \ gsnapl_ssse3-resulthr.$(OBJEXT) gsnapl_ssse3-output.$(OBJEXT) \ gsnapl_ssse3-inbuffer.$(OBJEXT) \ @@ -1884,14 +2211,14 @@ gsnapl_ssse3-parserange.$(OBJEXT) \ gsnapl_ssse3-getopt.$(OBJEXT) gsnapl_ssse3-getopt1.$(OBJEXT) \ gsnapl_ssse3-gsnap.$(OBJEXT) -dist_gsnapl_ssse3_OBJECTS = $(am__objects_33) +dist_gsnapl_ssse3_OBJECTS = $(am__objects_37) gsnapl_ssse3_OBJECTS = $(dist_gsnapl_ssse3_OBJECTS) gsnapl_ssse3_DEPENDENCIES = $(am__DEPENDENCIES_1) \ $(am__DEPENDENCIES_1) $(am__DEPENDENCIES_1) gsnapl_ssse3_LINK = $(LIBTOOL) $(AM_V_lt) --tag=CC $(AM_LIBTOOLFLAGS) \ $(LIBTOOLFLAGS) --mode=link $(CCLD) $(gsnapl_ssse3_CFLAGS) \ $(CFLAGS) $(gsnapl_ssse3_LDFLAGS) $(LDFLAGS) -o $@ -am__objects_34 = iit_dump-except.$(OBJEXT) iit_dump-assert.$(OBJEXT) \ +am__objects_38 = iit_dump-except.$(OBJEXT) iit_dump-assert.$(OBJEXT) \ iit_dump-mem.$(OBJEXT) iit_dump-littleendian.$(OBJEXT) \ iit_dump-bigendian.$(OBJEXT) iit_dump-intlist.$(OBJEXT) \ iit_dump-list.$(OBJEXT) iit_dump-univinterval.$(OBJEXT) \ @@ -1901,13 +2228,13 @@ iit_dump-iit-read-univ.$(OBJEXT) iit_dump-iit-read.$(OBJEXT) \ iit_dump-parserange.$(OBJEXT) iit_dump-getopt.$(OBJEXT) \ iit_dump-getopt1.$(OBJEXT) iit_dump-iit_dump.$(OBJEXT) -dist_iit_dump_OBJECTS = $(am__objects_34) +dist_iit_dump_OBJECTS = $(am__objects_38) iit_dump_OBJECTS = $(dist_iit_dump_OBJECTS) iit_dump_DEPENDENCIES = $(am__DEPENDENCIES_1) iit_dump_LINK = $(LIBTOOL) $(AM_V_lt) --tag=CC $(AM_LIBTOOLFLAGS) \ $(LIBTOOLFLAGS) --mode=link $(CCLD) $(iit_dump_CFLAGS) \ $(CFLAGS) $(iit_dump_LDFLAGS) $(LDFLAGS) -o $@ -am__objects_35 = iit_get-except.$(OBJEXT) iit_get-assert.$(OBJEXT) \ +am__objects_39 = iit_get-except.$(OBJEXT) iit_get-assert.$(OBJEXT) \ iit_get-mem.$(OBJEXT) iit_get-intlist.$(OBJEXT) \ iit_get-list.$(OBJEXT) iit_get-littleendian.$(OBJEXT) \ iit_get-bigendian.$(OBJEXT) iit_get-univinterval.$(OBJEXT) \ @@ -1917,13 +2244,13 @@ iit_get-iit-read-univ.$(OBJEXT) iit_get-iit-read.$(OBJEXT) \ iit_get-parserange.$(OBJEXT) iit_get-getopt.$(OBJEXT) \ iit_get-getopt1.$(OBJEXT) iit_get-iit_get.$(OBJEXT) -dist_iit_get_OBJECTS = $(am__objects_35) +dist_iit_get_OBJECTS = $(am__objects_39) iit_get_OBJECTS = $(dist_iit_get_OBJECTS) iit_get_DEPENDENCIES = $(am__DEPENDENCIES_1) iit_get_LINK = $(LIBTOOL) $(AM_V_lt) --tag=CC $(AM_LIBTOOLFLAGS) \ $(LIBTOOLFLAGS) --mode=link $(CCLD) $(iit_get_CFLAGS) \ $(CFLAGS) $(iit_get_LDFLAGS) $(LDFLAGS) -o $@ -am__objects_36 = iit_store-except.$(OBJEXT) iit_store-assert.$(OBJEXT) \ +am__objects_40 = iit_store-except.$(OBJEXT) iit_store-assert.$(OBJEXT) \ iit_store-mem.$(OBJEXT) iit_store-intlist.$(OBJEXT) \ iit_store-list.$(OBJEXT) iit_store-littleendian.$(OBJEXT) \ iit_store-bigendian.$(OBJEXT) iit_store-univinterval.$(OBJEXT) \ @@ -1935,13 +2262,13 @@ iit_store-table.$(OBJEXT) iit_store-chrom.$(OBJEXT) \ iit_store-getopt.$(OBJEXT) iit_store-getopt1.$(OBJEXT) \ iit_store-iit_store.$(OBJEXT) -dist_iit_store_OBJECTS = $(am__objects_36) +dist_iit_store_OBJECTS = $(am__objects_40) iit_store_OBJECTS = $(dist_iit_store_OBJECTS) iit_store_DEPENDENCIES = $(am__DEPENDENCIES_1) iit_store_LINK = $(LIBTOOL) $(AM_V_lt) --tag=CC $(AM_LIBTOOLFLAGS) \ $(LIBTOOLFLAGS) --mode=link $(CCLD) $(iit_store_CFLAGS) \ $(CFLAGS) $(iit_store_LDFLAGS) $(LDFLAGS) -o $@ -am__objects_37 = sam_sort-except.$(OBJEXT) sam_sort-assert.$(OBJEXT) \ +am__objects_41 = sam_sort-except.$(OBJEXT) sam_sort-assert.$(OBJEXT) \ sam_sort-mem.$(OBJEXT) sam_sort-littleendian.$(OBJEXT) \ sam_sort-bigendian.$(OBJEXT) sam_sort-intlist.$(OBJEXT) \ sam_sort-list.$(OBJEXT) sam_sort-stopwatch.$(OBJEXT) \ @@ -1953,13 +2280,13 @@ sam_sort-samread.$(OBJEXT) sam_sort-datadir.$(OBJEXT) \ sam_sort-getopt.$(OBJEXT) sam_sort-getopt1.$(OBJEXT) \ sam_sort-sam_sort.$(OBJEXT) -dist_sam_sort_OBJECTS = $(am__objects_37) +dist_sam_sort_OBJECTS = $(am__objects_41) sam_sort_OBJECTS = $(dist_sam_sort_OBJECTS) sam_sort_DEPENDENCIES = $(am__DEPENDENCIES_1) sam_sort_LINK = $(LIBTOOL) $(AM_V_lt) --tag=CC $(AM_LIBTOOLFLAGS) \ $(LIBTOOLFLAGS) --mode=link $(CCLD) $(sam_sort_CFLAGS) \ $(CFLAGS) $(sam_sort_LDFLAGS) $(LDFLAGS) -o $@ -am__objects_38 = snpindex-except.$(OBJEXT) snpindex-assert.$(OBJEXT) \ +am__objects_42 = snpindex-except.$(OBJEXT) snpindex-assert.$(OBJEXT) \ snpindex-mem.$(OBJEXT) snpindex-intlist.$(OBJEXT) \ snpindex-list.$(OBJEXT) snpindex-littleendian.$(OBJEXT) \ snpindex-bigendian.$(OBJEXT) snpindex-univinterval.$(OBJEXT) \ @@ -1981,14 +2308,14 @@ snpindex-datadir.$(OBJEXT) snpindex-parserange.$(OBJEXT) \ snpindex-getopt.$(OBJEXT) snpindex-getopt1.$(OBJEXT) \ snpindex-snpindex.$(OBJEXT) -dist_snpindex_OBJECTS = $(am__objects_38) +dist_snpindex_OBJECTS = $(am__objects_42) snpindex_OBJECTS = $(dist_snpindex_OBJECTS) snpindex_DEPENDENCIES = $(am__DEPENDENCIES_1) $(am__DEPENDENCIES_1) \ $(am__DEPENDENCIES_1) snpindex_LINK = $(LIBTOOL) $(AM_V_lt) --tag=CC $(AM_LIBTOOLFLAGS) \ $(LIBTOOLFLAGS) --mode=link $(CCLD) $(snpindex_CFLAGS) \ $(CFLAGS) $(snpindex_LDFLAGS) $(LDFLAGS) -o $@ -am__objects_39 = uniqscan-except.$(OBJEXT) uniqscan-assert.$(OBJEXT) \ +am__objects_43 = uniqscan-except.$(OBJEXT) uniqscan-assert.$(OBJEXT) \ uniqscan-mem.$(OBJEXT) uniqscan-intlist.$(OBJEXT) \ uniqscan-list.$(OBJEXT) uniqscan-littleendian.$(OBJEXT) \ uniqscan-bigendian.$(OBJEXT) uniqscan-univinterval.$(OBJEXT) \ @@ -2003,7 +2330,7 @@ uniqscan-genome128_hr.$(OBJEXT) \ uniqscan-genome_sites.$(OBJEXT) \ uniqscan-bitpack64-read.$(OBJEXT) \ - uniqscan-bitpack64-readtwo.$(OBJEXT) \ + uniqscan-bitpack64-readtwo.$(OBJEXT) uniqscan-merge.$(OBJEXT) \ uniqscan-indexdb.$(OBJEXT) uniqscan-indexdb_hr.$(OBJEXT) \ uniqscan-oligo.$(OBJEXT) uniqscan-chrom.$(OBJEXT) \ uniqscan-segmentpos.$(OBJEXT) uniqscan-chrnum.$(OBJEXT) \ @@ -2030,18 +2357,18 @@ uniqscan-indel.$(OBJEXT) uniqscan-bitpack64-access.$(OBJEXT) \ uniqscan-bytecoding.$(OBJEXT) uniqscan-univdiag.$(OBJEXT) \ uniqscan-sedgesort.$(OBJEXT) uniqscan-sarray-read.$(OBJEXT) \ - uniqscan-stage1hr.$(OBJEXT) uniqscan-resulthr.$(OBJEXT) \ - uniqscan-datadir.$(OBJEXT) uniqscan-parserange.$(OBJEXT) \ - uniqscan-getopt.$(OBJEXT) uniqscan-getopt1.$(OBJEXT) \ - uniqscan-uniqscan.$(OBJEXT) -dist_uniqscan_OBJECTS = $(am__objects_39) + uniqscan-merge-heap.$(OBJEXT) uniqscan-stage1hr.$(OBJEXT) \ + uniqscan-resulthr.$(OBJEXT) uniqscan-datadir.$(OBJEXT) \ + uniqscan-parserange.$(OBJEXT) uniqscan-getopt.$(OBJEXT) \ + uniqscan-getopt1.$(OBJEXT) uniqscan-uniqscan.$(OBJEXT) +dist_uniqscan_OBJECTS = $(am__objects_43) uniqscan_OBJECTS = $(dist_uniqscan_OBJECTS) uniqscan_DEPENDENCIES = $(am__DEPENDENCIES_1) $(am__DEPENDENCIES_1) \ $(am__DEPENDENCIES_1) uniqscan_LINK = $(LIBTOOL) $(AM_V_lt) --tag=CC $(AM_LIBTOOLFLAGS) \ $(LIBTOOLFLAGS) --mode=link $(CCLD) $(uniqscan_CFLAGS) \ $(CFLAGS) $(uniqscan_LDFLAGS) $(LDFLAGS) -o $@ -am__objects_40 = uniqscanl-except.$(OBJEXT) uniqscanl-assert.$(OBJEXT) \ +am__objects_44 = uniqscanl-except.$(OBJEXT) uniqscanl-assert.$(OBJEXT) \ uniqscanl-mem.$(OBJEXT) uniqscanl-intlist.$(OBJEXT) \ uniqscanl-list.$(OBJEXT) uniqscanl-littleendian.$(OBJEXT) \ uniqscanl-bigendian.$(OBJEXT) uniqscanl-univinterval.$(OBJEXT) \ @@ -2058,17 +2385,17 @@ uniqscanl-genome_sites.$(OBJEXT) \ uniqscanl-bitpack64-read.$(OBJEXT) \ uniqscanl-bitpack64-readtwo.$(OBJEXT) \ - uniqscanl-indexdb.$(OBJEXT) uniqscanl-indexdb_hr.$(OBJEXT) \ - uniqscanl-oligo.$(OBJEXT) uniqscanl-chrom.$(OBJEXT) \ - uniqscanl-segmentpos.$(OBJEXT) uniqscanl-chrnum.$(OBJEXT) \ - uniqscanl-maxent_hr.$(OBJEXT) uniqscanl-mapq.$(OBJEXT) \ - uniqscanl-shortread.$(OBJEXT) uniqscanl-substring.$(OBJEXT) \ - uniqscanl-junction.$(OBJEXT) uniqscanl-stage3hr.$(OBJEXT) \ - uniqscanl-spanningelt.$(OBJEXT) uniqscanl-cmet.$(OBJEXT) \ - uniqscanl-atoi.$(OBJEXT) uniqscanl-maxent.$(OBJEXT) \ - uniqscanl-pair.$(OBJEXT) uniqscanl-pairpool.$(OBJEXT) \ - uniqscanl-diag.$(OBJEXT) uniqscanl-diagpool.$(OBJEXT) \ - uniqscanl-orderstat.$(OBJEXT) \ + uniqscanl-merge.$(OBJEXT) uniqscanl-indexdb.$(OBJEXT) \ + uniqscanl-indexdb_hr.$(OBJEXT) uniqscanl-oligo.$(OBJEXT) \ + uniqscanl-chrom.$(OBJEXT) uniqscanl-segmentpos.$(OBJEXT) \ + uniqscanl-chrnum.$(OBJEXT) uniqscanl-maxent_hr.$(OBJEXT) \ + uniqscanl-mapq.$(OBJEXT) uniqscanl-shortread.$(OBJEXT) \ + uniqscanl-substring.$(OBJEXT) uniqscanl-junction.$(OBJEXT) \ + uniqscanl-stage3hr.$(OBJEXT) uniqscanl-spanningelt.$(OBJEXT) \ + uniqscanl-cmet.$(OBJEXT) uniqscanl-atoi.$(OBJEXT) \ + uniqscanl-maxent.$(OBJEXT) uniqscanl-pair.$(OBJEXT) \ + uniqscanl-pairpool.$(OBJEXT) uniqscanl-diag.$(OBJEXT) \ + uniqscanl-diagpool.$(OBJEXT) uniqscanl-orderstat.$(OBJEXT) \ uniqscanl-oligoindex_hr.$(OBJEXT) uniqscanl-cellpool.$(OBJEXT) \ uniqscanl-stage2.$(OBJEXT) uniqscanl-intron.$(OBJEXT) \ uniqscanl-boyer-moore.$(OBJEXT) \ @@ -2085,11 +2412,11 @@ uniqscanl-splicetrie_build.$(OBJEXT) \ uniqscanl-splicetrie.$(OBJEXT) uniqscanl-splice.$(OBJEXT) \ uniqscanl-indel.$(OBJEXT) uniqscanl-bitpack64-access.$(OBJEXT) \ - uniqscanl-stage1hr.$(OBJEXT) uniqscanl-resulthr.$(OBJEXT) \ - uniqscanl-datadir.$(OBJEXT) uniqscanl-parserange.$(OBJEXT) \ - uniqscanl-getopt.$(OBJEXT) uniqscanl-getopt1.$(OBJEXT) \ - uniqscanl-uniqscan.$(OBJEXT) -dist_uniqscanl_OBJECTS = $(am__objects_40) + uniqscanl-merge-heap.$(OBJEXT) uniqscanl-stage1hr.$(OBJEXT) \ + uniqscanl-resulthr.$(OBJEXT) uniqscanl-datadir.$(OBJEXT) \ + uniqscanl-parserange.$(OBJEXT) uniqscanl-getopt.$(OBJEXT) \ + uniqscanl-getopt1.$(OBJEXT) uniqscanl-uniqscan.$(OBJEXT) +dist_uniqscanl_OBJECTS = $(am__objects_44) uniqscanl_OBJECTS = $(dist_uniqscanl_OBJECTS) uniqscanl_DEPENDENCIES = $(am__DEPENDENCIES_1) $(am__DEPENDENCIES_1) \ $(am__DEPENDENCIES_1) @@ -2133,17 +2460,19 @@ SOURCES = $(dist_atoiindex_SOURCES) $(dist_cmetindex_SOURCES) \ $(dist_cpuid_SOURCES) $(dist_get_genome_SOURCES) \ $(dist_gmap_SOURCES) $(dist_gmap_avx2_SOURCES) \ - $(dist_gmap_nosimd_SOURCES) $(dist_gmap_sse2_SOURCES) \ - $(dist_gmap_sse41_SOURCES) $(dist_gmap_sse42_SOURCES) \ - $(dist_gmap_ssse3_SOURCES) $(dist_gmapindex_SOURCES) \ - $(dist_gmapl_SOURCES) $(dist_gmapl_avx2_SOURCES) \ + $(dist_gmap_avx512_SOURCES) $(dist_gmap_nosimd_SOURCES) \ + $(dist_gmap_sse2_SOURCES) $(dist_gmap_sse41_SOURCES) \ + $(dist_gmap_sse42_SOURCES) $(dist_gmap_ssse3_SOURCES) \ + $(dist_gmapindex_SOURCES) $(dist_gmapl_SOURCES) \ + $(dist_gmapl_avx2_SOURCES) $(dist_gmapl_avx512_SOURCES) \ $(dist_gmapl_nosimd_SOURCES) $(dist_gmapl_sse2_SOURCES) \ $(dist_gmapl_sse41_SOURCES) $(dist_gmapl_sse42_SOURCES) \ $(dist_gmapl_ssse3_SOURCES) $(dist_gsnap_SOURCES) \ - $(dist_gsnap_avx2_SOURCES) $(dist_gsnap_nosimd_SOURCES) \ - $(dist_gsnap_sse2_SOURCES) $(dist_gsnap_sse41_SOURCES) \ - $(dist_gsnap_sse42_SOURCES) $(dist_gsnap_ssse3_SOURCES) \ - $(dist_gsnapl_SOURCES) $(dist_gsnapl_avx2_SOURCES) \ + $(dist_gsnap_avx2_SOURCES) $(dist_gsnap_avx512_SOURCES) \ + $(dist_gsnap_nosimd_SOURCES) $(dist_gsnap_sse2_SOURCES) \ + $(dist_gsnap_sse41_SOURCES) $(dist_gsnap_sse42_SOURCES) \ + $(dist_gsnap_ssse3_SOURCES) $(dist_gsnapl_SOURCES) \ + $(dist_gsnapl_avx2_SOURCES) $(dist_gsnapl_avx512_SOURCES) \ $(dist_gsnapl_nosimd_SOURCES) $(dist_gsnapl_sse2_SOURCES) \ $(dist_gsnapl_sse41_SOURCES) $(dist_gsnapl_sse42_SOURCES) \ $(dist_gsnapl_ssse3_SOURCES) $(dist_iit_dump_SOURCES) \ @@ -2153,17 +2482,19 @@ DIST_SOURCES = $(dist_atoiindex_SOURCES) $(dist_cmetindex_SOURCES) \ $(dist_cpuid_SOURCES) $(dist_get_genome_SOURCES) \ $(dist_gmap_SOURCES) $(dist_gmap_avx2_SOURCES) \ - $(dist_gmap_nosimd_SOURCES) $(dist_gmap_sse2_SOURCES) \ - $(dist_gmap_sse41_SOURCES) $(dist_gmap_sse42_SOURCES) \ - $(dist_gmap_ssse3_SOURCES) $(dist_gmapindex_SOURCES) \ - $(dist_gmapl_SOURCES) $(dist_gmapl_avx2_SOURCES) \ + $(dist_gmap_avx512_SOURCES) $(dist_gmap_nosimd_SOURCES) \ + $(dist_gmap_sse2_SOURCES) $(dist_gmap_sse41_SOURCES) \ + $(dist_gmap_sse42_SOURCES) $(dist_gmap_ssse3_SOURCES) \ + $(dist_gmapindex_SOURCES) $(dist_gmapl_SOURCES) \ + $(dist_gmapl_avx2_SOURCES) $(dist_gmapl_avx512_SOURCES) \ $(dist_gmapl_nosimd_SOURCES) $(dist_gmapl_sse2_SOURCES) \ $(dist_gmapl_sse41_SOURCES) $(dist_gmapl_sse42_SOURCES) \ $(dist_gmapl_ssse3_SOURCES) $(dist_gsnap_SOURCES) \ - $(dist_gsnap_avx2_SOURCES) $(dist_gsnap_nosimd_SOURCES) \ - $(dist_gsnap_sse2_SOURCES) $(dist_gsnap_sse41_SOURCES) \ - $(dist_gsnap_sse42_SOURCES) $(dist_gsnap_ssse3_SOURCES) \ - $(dist_gsnapl_SOURCES) $(dist_gsnapl_avx2_SOURCES) \ + $(dist_gsnap_avx2_SOURCES) $(dist_gsnap_avx512_SOURCES) \ + $(dist_gsnap_nosimd_SOURCES) $(dist_gsnap_sse2_SOURCES) \ + $(dist_gsnap_sse41_SOURCES) $(dist_gsnap_sse42_SOURCES) \ + $(dist_gsnap_ssse3_SOURCES) $(dist_gsnapl_SOURCES) \ + $(dist_gsnapl_avx2_SOURCES) $(dist_gsnapl_avx512_SOURCES) \ $(dist_gsnapl_nosimd_SOURCES) $(dist_gsnapl_sse2_SOURCES) \ $(dist_gsnapl_sse41_SOURCES) $(dist_gsnapl_sse42_SOURCES) \ $(dist_gsnapl_ssse3_SOURCES) $(dist_iit_dump_SOURCES) \ @@ -2274,6 +2605,7 @@ SET_MAKE = @SET_MAKE@ SHELL = @SHELL@ SIMD_AVX2_CFLAGS = @SIMD_AVX2_CFLAGS@ +SIMD_AVX512_CFLAGS = @SIMD_AVX512_CFLAGS@ SIMD_SSE2_CFLAGS = @SIMD_SSE2_CFLAGS@ SIMD_SSE4_1_CFLAGS = @SIMD_SSE4_1_CFLAGS@ SIMD_SSE4_2_CFLAGS = @SIMD_SSE4_2_CFLAGS@ @@ -2388,6 +2720,7 @@ popcount.c popcount.h genome128_hr.c genome128_hr.h genome_sites.c genome_sites.h \ genome-write.c genome-write.h \ bitpack64-read.c bitpack64-read.h bitpack64-readtwo.c bitpack64-readtwo.h \ + merge.c merge.h \ indexdbdef.h indexdb.c indexdb.h indexdb_hr.c indexdb_hr.h \ oligo.c oligo.h block.c block.h \ chrom.c chrom.h segmentpos.c segmentpos.h \ @@ -2443,6 +2776,11 @@ gmap_avx2_LDFLAGS = $(AM_LDFLAGS) $(STATIC_LDFLAG) gmap_avx2_LDADD = $(PTHREAD_LIBS) $(ZLIB_LIBS) $(BZLIB_LIBS) dist_gmap_avx2_SOURCES = $(GMAP_FILES) +gmap_avx512_CC = $(PTHREAD_CC) +gmap_avx512_CFLAGS = $(AM_CFLAGS) $(PTHREAD_CFLAGS) -DTARGET=\"$(target)\" -DGMAPDB=\"$(GMAPDB)\" $(POPCNT_CFLAGS) -DHAVE_SSE2=1 -DHAVE_SSSE3=1 -DHAVE_SSE4_1=1 -DHAVE_SSE4_2=1 -DHAVE_AVX2=1 -DHAVE_AVX512=1 $(SIMD_AVX512_CFLAGS) +gmap_avx512_LDFLAGS = $(AM_LDFLAGS) $(STATIC_LDFLAG) +gmap_avx512_LDADD = $(PTHREAD_LIBS) $(ZLIB_LIBS) $(BZLIB_LIBS) +dist_gmap_avx512_SOURCES = $(GMAP_FILES) GMAPL_FILES = fopen.h bool.h types.h separator.h comp.h \ except.c except.h assert.c assert.h mem.c mem.h \ intlistdef.h intlist.c intlist.h listdef.h list.c list.h \ @@ -2457,6 +2795,7 @@ popcount.c popcount.h genome128_hr.c genome128_hr.h genome_sites.c genome_sites.h \ genome-write.c genome-write.h \ bitpack64-read.c bitpack64-read.h bitpack64-readtwo.c bitpack64-readtwo.h \ + merge.c merge.h \ indexdbdef.h indexdb.c indexdb.h indexdb_hr.c indexdb_hr.h \ oligo.c oligo.h block.c block.h \ chrom.c chrom.h segmentpos.c segmentpos.h \ @@ -2512,6 +2851,11 @@ gmapl_avx2_LDFLAGS = $(AM_LDFLAGS) $(STATIC_LDFLAG) gmapl_avx2_LDADD = $(PTHREAD_LIBS) $(ZLIB_LIBS) $(BZLIB_LIBS) dist_gmapl_avx2_SOURCES = $(GMAPL_FILES) +gmapl_avx512_CC = $(PTHREAD_CC) +gmapl_avx512_CFLAGS = $(AM_CFLAGS) $(PTHREAD_CFLAGS) -DTARGET=\"$(target)\" -DGMAPDB=\"$(GMAPDB)\" -DLARGE_GENOMES=1 $(POPCNT_CFLAGS) -DHAVE_SSE2=1 -DHAVE_SSSE3=1 -DHAVE_SSE4_1=1 -DHAVE_SSE4_2=1 -DHAVE_AVX2=1 -DHAVE_AVX512=1 $(SIMD_AVX512_CFLAGS) +gmapl_avx512_LDFLAGS = $(AM_LDFLAGS) $(STATIC_LDFLAG) +gmapl_avx512_LDADD = $(PTHREAD_LIBS) $(ZLIB_LIBS) $(BZLIB_LIBS) +dist_gmapl_avx512_SOURCES = $(GMAPL_FILES) GSNAP_FILES = fopen.h bool.h types.h separator.h comp.h \ except.c except.h assert.c assert.h mem.c mem.h \ intlistdef.h intlist.c intlist.h listdef.h list.c list.h \ @@ -2526,6 +2870,7 @@ genome.c genome.h \ popcount.c popcount.h genome128_hr.c genome128_hr.h genome_sites.c genome_sites.h \ bitpack64-read.c bitpack64-read.h bitpack64-readtwo.c bitpack64-readtwo.h \ + merge.c merge.h \ indexdbdef.h indexdb.c indexdb.h indexdb_hr.c indexdb_hr.h \ oligo.c oligo.h \ chrom.c chrom.h segmentpos.c segmentpos.h \ @@ -2545,7 +2890,7 @@ splice.c splice.h indel.c indel.h bitpack64-access.c bitpack64-access.h \ bytecoding.c bytecoding.h \ univdiagdef.h univdiag.c univdiag.h sedgesort.c sedgesort.h sarray-read.c sarray-read.h \ - stage1hr.c stage1hr.h \ + merge-heap.c merge-heap.h stage1hr.c stage1hr.h \ request.c request.h resulthr.c resulthr.h output.c output.h \ inbuffer.c inbuffer.h samheader.c samheader.h outbuffer.c outbuffer.h \ datadir.c datadir.h mode.h parserange.c parserange.h \ @@ -2584,6 +2929,11 @@ gsnap_avx2_LDFLAGS = $(AM_LDFLAGS) $(STATIC_LDFLAG) gsnap_avx2_LDADD = $(PTHREAD_LIBS) $(ZLIB_LIBS) $(BZLIB_LIBS) dist_gsnap_avx2_SOURCES = $(GSNAP_FILES) +gsnap_avx512_CC = $(PTHREAD_CC) +gsnap_avx512_CFLAGS = $(AM_CFLAGS) $(PTHREAD_CFLAGS) -DTARGET=\"$(target)\" -DGMAPDB=\"$(GMAPDB)\" -DMAX_STACK_READLENGTH=$(MAX_STACK_READLENGTH) -DGSNAP=1 $(POPCNT_CFLAGS) -DHAVE_SSE2=1 -DHAVE_SSSE3=1 -DHAVE_SSE4_1=1 -DHAVE_SSE4_2=1 -DHAVE_AVX2=1 -DHAVE_AVX512=1 $(SIMD_AVX512_CFLAGS) +gsnap_avx512_LDFLAGS = $(AM_LDFLAGS) $(STATIC_LDFLAG) +gsnap_avx512_LDADD = $(PTHREAD_LIBS) $(ZLIB_LIBS) $(BZLIB_LIBS) +dist_gsnap_avx512_SOURCES = $(GSNAP_FILES) GSNAPL_FILES = fopen.h bool.h types.h separator.h comp.h \ except.c except.h assert.c assert.h mem.c mem.h \ intlistdef.h intlist.c intlist.h listdef.h list.c list.h \ @@ -2598,6 +2948,7 @@ genome.c genome.h \ popcount.c popcount.h genome128_hr.c genome128_hr.h genome_sites.c genome_sites.h \ bitpack64-read.c bitpack64-read.h bitpack64-readtwo.c bitpack64-readtwo.h \ + merge.c merge.h \ indexdbdef.h indexdb.c indexdb.h indexdb_hr.c indexdb_hr.h \ oligo.c oligo.h \ chrom.c chrom.h segmentpos.c segmentpos.h \ @@ -2615,7 +2966,7 @@ chimera.c chimera.h sense.h fastlog.h stage3.c stage3.h \ splicestringpool.c splicestringpool.h splicetrie_build.c splicetrie_build.h splicetrie.c splicetrie.h \ splice.c splice.h indel.c indel.h bitpack64-access.c bitpack64-access.h \ - stage1hr.c stage1hr.h \ + merge-heap.c merge-heap.h stage1hr.c stage1hr.h \ request.c request.h resulthr.c resulthr.h output.c output.h \ inbuffer.c inbuffer.h samheader.c samheader.h outbuffer.c outbuffer.h \ datadir.c datadir.h mode.h parserange.c parserange.h \ @@ -2653,6 +3004,11 @@ gsnapl_avx2_LDFLAGS = $(AM_LDFLAGS) $(STATIC_LDFLAG) gsnapl_avx2_LDADD = $(PTHREAD_LIBS) $(ZLIB_LIBS) $(BZLIB_LIBS) dist_gsnapl_avx2_SOURCES = $(GSNAPL_FILES) +gsnapl_avx512_CC = $(PTHREAD_CC) +gsnapl_avx512_CFLAGS = $(AM_CFLAGS) $(PTHREAD_CFLAGS) -DTARGET=\"$(target)\" -DGMAPDB=\"$(GMAPDB)\" -DMAX_STACK_READLENGTH=$(MAX_STACK_READLENGTH) -DGSNAP=1 -DLARGE_GENOMES=1 $(POPCNT_CFLAGS) -DHAVE_SSE2=1 -DHAVE_SSSE3=1 -DHAVE_SSE4_1=1 -DHAVE_SSE4_2=1 -DHAVE_AVX2=1 -DHAVE_AVX512=1 $(SIMD_AVX512_CFLAGS) +gsnapl_avx512_LDFLAGS = $(AM_LDFLAGS) $(STATIC_LDFLAG) +gsnapl_avx512_LDADD = $(PTHREAD_LIBS) $(ZLIB_LIBS) $(BZLIB_LIBS) +dist_gsnapl_avx512_SOURCES = $(GSNAPL_FILES) # Build as a non-SIMD program UNIQSCAN_FILES = fopen.h bool.h types.h separator.h comp.h \ @@ -2668,6 +3024,7 @@ genome.c genome.h \ popcount.c popcount.h genome128_hr.c genome128_hr.h genome_sites.c genome_sites.h \ bitpack64-read.c bitpack64-read.h bitpack64-readtwo.c bitpack64-readtwo.h \ + merge.c merge.h \ indexdbdef.h indexdb.c indexdb.h indexdb_hr.c indexdb_hr.h \ oligo.c oligo.h \ chrom.c chrom.h segmentpos.c segmentpos.h \ @@ -2687,7 +3044,7 @@ splice.c splice.h indel.c indel.h bitpack64-access.c bitpack64-access.h \ bytecoding.c bytecoding.h \ univdiagdef.h univdiag.c univdiag.h sedgesort.c sedgesort.h sarray-read.c sarray-read.h \ - stage1hr.c stage1hr.h resulthr.c resulthr.h \ + merge-heap.c merge-heap.h stage1hr.c stage1hr.h resulthr.c resulthr.h \ datadir.c datadir.h mode.h parserange.c parserange.h \ getopt.c getopt1.c getopt.h uniqscan.c @@ -2709,6 +3066,7 @@ genome.c genome.h \ popcount.c popcount.h genome128_hr.c genome128_hr.h genome_sites.c genome_sites.h \ bitpack64-read.c bitpack64-read.h bitpack64-readtwo.c bitpack64-readtwo.h \ + merge.c merge.h \ indexdbdef.h indexdb.c indexdb.h indexdb_hr.c indexdb_hr.h \ oligo.c oligo.h \ chrom.c chrom.h segmentpos.c segmentpos.h \ @@ -2726,7 +3084,7 @@ chimera.c chimera.h sense.h fastlog.h stage3.c stage3.h \ splicestringpool.c splicestringpool.h splicetrie_build.c splicetrie_build.h splicetrie.c splicetrie.h \ splice.c splice.h indel.c indel.h bitpack64-access.c bitpack64-access.h \ - stage1hr.c stage1hr.h resulthr.c resulthr.h \ + merge-heap.c merge-heap.h stage1hr.c stage1hr.h resulthr.c resulthr.h \ datadir.c datadir.h mode.h parserange.c parserange.h \ getopt.c getopt1.c getopt.h uniqscan.c @@ -3053,6 +3411,10 @@ @rm -f gmap.avx2$(EXEEXT) $(AM_V_CCLD)$(gmap_avx2_LINK) $(gmap_avx2_OBJECTS) $(gmap_avx2_LDADD) $(LIBS) +gmap.avx512$(EXEEXT): $(gmap_avx512_OBJECTS) $(gmap_avx512_DEPENDENCIES) $(EXTRA_gmap_avx512_DEPENDENCIES) + @rm -f gmap.avx512$(EXEEXT) + $(AM_V_CCLD)$(gmap_avx512_LINK) $(gmap_avx512_OBJECTS) $(gmap_avx512_LDADD) $(LIBS) + gmap.nosimd$(EXEEXT): $(gmap_nosimd_OBJECTS) $(gmap_nosimd_DEPENDENCIES) $(EXTRA_gmap_nosimd_DEPENDENCIES) @rm -f gmap.nosimd$(EXEEXT) $(AM_V_CCLD)$(gmap_nosimd_LINK) $(gmap_nosimd_OBJECTS) $(gmap_nosimd_LDADD) $(LIBS) @@ -3085,6 +3447,10 @@ @rm -f gmapl.avx2$(EXEEXT) $(AM_V_CCLD)$(gmapl_avx2_LINK) $(gmapl_avx2_OBJECTS) $(gmapl_avx2_LDADD) $(LIBS) +gmapl.avx512$(EXEEXT): $(gmapl_avx512_OBJECTS) $(gmapl_avx512_DEPENDENCIES) $(EXTRA_gmapl_avx512_DEPENDENCIES) + @rm -f gmapl.avx512$(EXEEXT) + $(AM_V_CCLD)$(gmapl_avx512_LINK) $(gmapl_avx512_OBJECTS) $(gmapl_avx512_LDADD) $(LIBS) + gmapl.nosimd$(EXEEXT): $(gmapl_nosimd_OBJECTS) $(gmapl_nosimd_DEPENDENCIES) $(EXTRA_gmapl_nosimd_DEPENDENCIES) @rm -f gmapl.nosimd$(EXEEXT) $(AM_V_CCLD)$(gmapl_nosimd_LINK) $(gmapl_nosimd_OBJECTS) $(gmapl_nosimd_LDADD) $(LIBS) @@ -3113,6 +3479,10 @@ @rm -f gsnap.avx2$(EXEEXT) $(AM_V_CCLD)$(gsnap_avx2_LINK) $(gsnap_avx2_OBJECTS) $(gsnap_avx2_LDADD) $(LIBS) +gsnap.avx512$(EXEEXT): $(gsnap_avx512_OBJECTS) $(gsnap_avx512_DEPENDENCIES) $(EXTRA_gsnap_avx512_DEPENDENCIES) + @rm -f gsnap.avx512$(EXEEXT) + $(AM_V_CCLD)$(gsnap_avx512_LINK) $(gsnap_avx512_OBJECTS) $(gsnap_avx512_LDADD) $(LIBS) + gsnap.nosimd$(EXEEXT): $(gsnap_nosimd_OBJECTS) $(gsnap_nosimd_DEPENDENCIES) $(EXTRA_gsnap_nosimd_DEPENDENCIES) @rm -f gsnap.nosimd$(EXEEXT) $(AM_V_CCLD)$(gsnap_nosimd_LINK) $(gsnap_nosimd_OBJECTS) $(gsnap_nosimd_LDADD) $(LIBS) @@ -3141,6 +3511,10 @@ @rm -f gsnapl.avx2$(EXEEXT) $(AM_V_CCLD)$(gsnapl_avx2_LINK) $(gsnapl_avx2_OBJECTS) $(gsnapl_avx2_LDADD) $(LIBS) +gsnapl.avx512$(EXEEXT): $(gsnapl_avx512_OBJECTS) $(gsnapl_avx512_DEPENDENCIES) $(EXTRA_gsnapl_avx512_DEPENDENCIES) + @rm -f gsnapl.avx512$(EXEEXT) + $(AM_V_CCLD)$(gsnapl_avx512_LINK) $(gsnapl_avx512_OBJECTS) $(gsnapl_avx512_LDADD) $(LIBS) + gsnapl.nosimd$(EXEEXT): $(gsnapl_nosimd_OBJECTS) $(gsnapl_nosimd_DEPENDENCIES) $(EXTRA_gsnapl_nosimd_DEPENDENCIES) @rm -f gsnapl.nosimd$(EXEEXT) $(AM_V_CCLD)$(gsnapl_nosimd_LINK) $(gsnapl_nosimd_OBJECTS) $(gsnapl_nosimd_LDADD) $(LIBS) @@ -3366,6 +3740,7 @@ @AMDEP_TRUE@@am__include@ @am__quote@./$(DEPDIR)/gmap_avx2-maxent_hr.Po@am__quote@ @AMDEP_TRUE@@am__include@ @am__quote@./$(DEPDIR)/gmap_avx2-md5.Po@am__quote@ @AMDEP_TRUE@@am__include@ @am__quote@./$(DEPDIR)/gmap_avx2-mem.Po@am__quote@ +@AMDEP_TRUE@@am__include@ @am__quote@./$(DEPDIR)/gmap_avx2-merge.Po@am__quote@ @AMDEP_TRUE@@am__include@ @am__quote@./$(DEPDIR)/gmap_avx2-oligo.Po@am__quote@ @AMDEP_TRUE@@am__include@ @am__quote@./$(DEPDIR)/gmap_avx2-oligoindex_hr.Po@am__quote@ @AMDEP_TRUE@@am__include@ @am__quote@./$(DEPDIR)/gmap_avx2-orderstat.Po@am__quote@ @@ -3395,6 +3770,92 @@ @AMDEP_TRUE@@am__include@ @am__quote@./$(DEPDIR)/gmap_avx2-uintlist.Po@am__quote@ @AMDEP_TRUE@@am__include@ @am__quote@./$(DEPDIR)/gmap_avx2-uinttable.Po@am__quote@ @AMDEP_TRUE@@am__include@ @am__quote@./$(DEPDIR)/gmap_avx2-univinterval.Po@am__quote@ +@AMDEP_TRUE@@am__include@ @am__quote@./$(DEPDIR)/gmap_avx512-access.Po@am__quote@ +@AMDEP_TRUE@@am__include@ @am__quote@./$(DEPDIR)/gmap_avx512-assert.Po@am__quote@ +@AMDEP_TRUE@@am__include@ @am__quote@./$(DEPDIR)/gmap_avx512-atoi.Po@am__quote@ +@AMDEP_TRUE@@am__include@ @am__quote@./$(DEPDIR)/gmap_avx512-bigendian.Po@am__quote@ +@AMDEP_TRUE@@am__include@ @am__quote@./$(DEPDIR)/gmap_avx512-bitpack64-read.Po@am__quote@ +@AMDEP_TRUE@@am__include@ @am__quote@./$(DEPDIR)/gmap_avx512-bitpack64-readtwo.Po@am__quote@ +@AMDEP_TRUE@@am__include@ @am__quote@./$(DEPDIR)/gmap_avx512-block.Po@am__quote@ +@AMDEP_TRUE@@am__include@ @am__quote@./$(DEPDIR)/gmap_avx512-boyer-moore.Po@am__quote@ +@AMDEP_TRUE@@am__include@ @am__quote@./$(DEPDIR)/gmap_avx512-bzip2.Po@am__quote@ +@AMDEP_TRUE@@am__include@ @am__quote@./$(DEPDIR)/gmap_avx512-cellpool.Po@am__quote@ +@AMDEP_TRUE@@am__include@ @am__quote@./$(DEPDIR)/gmap_avx512-changepoint.Po@am__quote@ +@AMDEP_TRUE@@am__include@ @am__quote@./$(DEPDIR)/gmap_avx512-chimera.Po@am__quote@ +@AMDEP_TRUE@@am__include@ @am__quote@./$(DEPDIR)/gmap_avx512-chrnum.Po@am__quote@ +@AMDEP_TRUE@@am__include@ @am__quote@./$(DEPDIR)/gmap_avx512-chrom.Po@am__quote@ +@AMDEP_TRUE@@am__include@ @am__quote@./$(DEPDIR)/gmap_avx512-cmet.Po@am__quote@ +@AMDEP_TRUE@@am__include@ @am__quote@./$(DEPDIR)/gmap_avx512-compress-write.Po@am__quote@ +@AMDEP_TRUE@@am__include@ @am__quote@./$(DEPDIR)/gmap_avx512-compress.Po@am__quote@ +@AMDEP_TRUE@@am__include@ @am__quote@./$(DEPDIR)/gmap_avx512-datadir.Po@am__quote@ +@AMDEP_TRUE@@am__include@ @am__quote@./$(DEPDIR)/gmap_avx512-diag.Po@am__quote@ +@AMDEP_TRUE@@am__include@ @am__quote@./$(DEPDIR)/gmap_avx512-diagnostic.Po@am__quote@ +@AMDEP_TRUE@@am__include@ @am__quote@./$(DEPDIR)/gmap_avx512-diagpool.Po@am__quote@ +@AMDEP_TRUE@@am__include@ @am__quote@./$(DEPDIR)/gmap_avx512-doublelist.Po@am__quote@ +@AMDEP_TRUE@@am__include@ @am__quote@./$(DEPDIR)/gmap_avx512-dynprog.Po@am__quote@ +@AMDEP_TRUE@@am__include@ @am__quote@./$(DEPDIR)/gmap_avx512-dynprog_cdna.Po@am__quote@ +@AMDEP_TRUE@@am__include@ @am__quote@./$(DEPDIR)/gmap_avx512-dynprog_end.Po@am__quote@ +@AMDEP_TRUE@@am__include@ @am__quote@./$(DEPDIR)/gmap_avx512-dynprog_genome.Po@am__quote@ +@AMDEP_TRUE@@am__include@ @am__quote@./$(DEPDIR)/gmap_avx512-dynprog_simd.Po@am__quote@ +@AMDEP_TRUE@@am__include@ @am__quote@./$(DEPDIR)/gmap_avx512-dynprog_single.Po@am__quote@ +@AMDEP_TRUE@@am__include@ @am__quote@./$(DEPDIR)/gmap_avx512-except.Po@am__quote@ +@AMDEP_TRUE@@am__include@ @am__quote@./$(DEPDIR)/gmap_avx512-filestring.Po@am__quote@ +@AMDEP_TRUE@@am__include@ @am__quote@./$(DEPDIR)/gmap_avx512-gbuffer.Po@am__quote@ +@AMDEP_TRUE@@am__include@ @am__quote@./$(DEPDIR)/gmap_avx512-genome-write.Po@am__quote@ +@AMDEP_TRUE@@am__include@ @am__quote@./$(DEPDIR)/gmap_avx512-genome.Po@am__quote@ +@AMDEP_TRUE@@am__include@ @am__quote@./$(DEPDIR)/gmap_avx512-genome128_hr.Po@am__quote@ +@AMDEP_TRUE@@am__include@ @am__quote@./$(DEPDIR)/gmap_avx512-genome_sites.Po@am__quote@ +@AMDEP_TRUE@@am__include@ @am__quote@./$(DEPDIR)/gmap_avx512-genomicpos.Po@am__quote@ +@AMDEP_TRUE@@am__include@ @am__quote@./$(DEPDIR)/gmap_avx512-getopt.Po@am__quote@ +@AMDEP_TRUE@@am__include@ @am__quote@./$(DEPDIR)/gmap_avx512-getopt1.Po@am__quote@ +@AMDEP_TRUE@@am__include@ @am__quote@./$(DEPDIR)/gmap_avx512-gmap.Po@am__quote@ +@AMDEP_TRUE@@am__include@ @am__quote@./$(DEPDIR)/gmap_avx512-gregion.Po@am__quote@ +@AMDEP_TRUE@@am__include@ @am__quote@./$(DEPDIR)/gmap_avx512-iit-read-univ.Po@am__quote@ +@AMDEP_TRUE@@am__include@ @am__quote@./$(DEPDIR)/gmap_avx512-iit-read.Po@am__quote@ +@AMDEP_TRUE@@am__include@ @am__quote@./$(DEPDIR)/gmap_avx512-inbuffer.Po@am__quote@ +@AMDEP_TRUE@@am__include@ @am__quote@./$(DEPDIR)/gmap_avx512-indexdb.Po@am__quote@ +@AMDEP_TRUE@@am__include@ @am__quote@./$(DEPDIR)/gmap_avx512-indexdb_hr.Po@am__quote@ +@AMDEP_TRUE@@am__include@ @am__quote@./$(DEPDIR)/gmap_avx512-interval.Po@am__quote@ +@AMDEP_TRUE@@am__include@ @am__quote@./$(DEPDIR)/gmap_avx512-intlist.Po@am__quote@ +@AMDEP_TRUE@@am__include@ @am__quote@./$(DEPDIR)/gmap_avx512-intron.Po@am__quote@ +@AMDEP_TRUE@@am__include@ @am__quote@./$(DEPDIR)/gmap_avx512-list.Po@am__quote@ +@AMDEP_TRUE@@am__include@ @am__quote@./$(DEPDIR)/gmap_avx512-littleendian.Po@am__quote@ +@AMDEP_TRUE@@am__include@ @am__quote@./$(DEPDIR)/gmap_avx512-match.Po@am__quote@ +@AMDEP_TRUE@@am__include@ @am__quote@./$(DEPDIR)/gmap_avx512-matchpool.Po@am__quote@ +@AMDEP_TRUE@@am__include@ @am__quote@./$(DEPDIR)/gmap_avx512-maxent.Po@am__quote@ +@AMDEP_TRUE@@am__include@ @am__quote@./$(DEPDIR)/gmap_avx512-maxent_hr.Po@am__quote@ +@AMDEP_TRUE@@am__include@ @am__quote@./$(DEPDIR)/gmap_avx512-md5.Po@am__quote@ +@AMDEP_TRUE@@am__include@ @am__quote@./$(DEPDIR)/gmap_avx512-mem.Po@am__quote@ +@AMDEP_TRUE@@am__include@ @am__quote@./$(DEPDIR)/gmap_avx512-merge.Po@am__quote@ +@AMDEP_TRUE@@am__include@ @am__quote@./$(DEPDIR)/gmap_avx512-oligo.Po@am__quote@ +@AMDEP_TRUE@@am__include@ @am__quote@./$(DEPDIR)/gmap_avx512-oligoindex_hr.Po@am__quote@ +@AMDEP_TRUE@@am__include@ @am__quote@./$(DEPDIR)/gmap_avx512-orderstat.Po@am__quote@ +@AMDEP_TRUE@@am__include@ @am__quote@./$(DEPDIR)/gmap_avx512-outbuffer.Po@am__quote@ +@AMDEP_TRUE@@am__include@ @am__quote@./$(DEPDIR)/gmap_avx512-output.Po@am__quote@ +@AMDEP_TRUE@@am__include@ @am__quote@./$(DEPDIR)/gmap_avx512-pair.Po@am__quote@ +@AMDEP_TRUE@@am__include@ @am__quote@./$(DEPDIR)/gmap_avx512-pairpool.Po@am__quote@ +@AMDEP_TRUE@@am__include@ @am__quote@./$(DEPDIR)/gmap_avx512-parserange.Po@am__quote@ +@AMDEP_TRUE@@am__include@ @am__quote@./$(DEPDIR)/gmap_avx512-pbinom.Po@am__quote@ +@AMDEP_TRUE@@am__include@ @am__quote@./$(DEPDIR)/gmap_avx512-popcount.Po@am__quote@ +@AMDEP_TRUE@@am__include@ @am__quote@./$(DEPDIR)/gmap_avx512-reader.Po@am__quote@ +@AMDEP_TRUE@@am__include@ @am__quote@./$(DEPDIR)/gmap_avx512-request.Po@am__quote@ +@AMDEP_TRUE@@am__include@ @am__quote@./$(DEPDIR)/gmap_avx512-result.Po@am__quote@ +@AMDEP_TRUE@@am__include@ @am__quote@./$(DEPDIR)/gmap_avx512-samheader.Po@am__quote@ +@AMDEP_TRUE@@am__include@ @am__quote@./$(DEPDIR)/gmap_avx512-segmentpos.Po@am__quote@ +@AMDEP_TRUE@@am__include@ @am__quote@./$(DEPDIR)/gmap_avx512-semaphore.Po@am__quote@ +@AMDEP_TRUE@@am__include@ @am__quote@./$(DEPDIR)/gmap_avx512-sequence.Po@am__quote@ +@AMDEP_TRUE@@am__include@ @am__quote@./$(DEPDIR)/gmap_avx512-smooth.Po@am__quote@ +@AMDEP_TRUE@@am__include@ @am__quote@./$(DEPDIR)/gmap_avx512-splicestringpool.Po@am__quote@ +@AMDEP_TRUE@@am__include@ @am__quote@./$(DEPDIR)/gmap_avx512-splicetrie.Po@am__quote@ +@AMDEP_TRUE@@am__include@ @am__quote@./$(DEPDIR)/gmap_avx512-splicetrie_build.Po@am__quote@ +@AMDEP_TRUE@@am__include@ @am__quote@./$(DEPDIR)/gmap_avx512-stage1.Po@am__quote@ +@AMDEP_TRUE@@am__include@ @am__quote@./$(DEPDIR)/gmap_avx512-stage2.Po@am__quote@ +@AMDEP_TRUE@@am__include@ @am__quote@./$(DEPDIR)/gmap_avx512-stage3.Po@am__quote@ +@AMDEP_TRUE@@am__include@ @am__quote@./$(DEPDIR)/gmap_avx512-stopwatch.Po@am__quote@ +@AMDEP_TRUE@@am__include@ @am__quote@./$(DEPDIR)/gmap_avx512-translation.Po@am__quote@ +@AMDEP_TRUE@@am__include@ @am__quote@./$(DEPDIR)/gmap_avx512-uintlist.Po@am__quote@ +@AMDEP_TRUE@@am__include@ @am__quote@./$(DEPDIR)/gmap_avx512-uinttable.Po@am__quote@ +@AMDEP_TRUE@@am__include@ @am__quote@./$(DEPDIR)/gmap_avx512-univinterval.Po@am__quote@ @AMDEP_TRUE@@am__include@ @am__quote@./$(DEPDIR)/gmap_nosimd-access.Po@am__quote@ @AMDEP_TRUE@@am__include@ @am__quote@./$(DEPDIR)/gmap_nosimd-assert.Po@am__quote@ @AMDEP_TRUE@@am__include@ @am__quote@./$(DEPDIR)/gmap_nosimd-atoi.Po@am__quote@ @@ -3451,6 +3912,7 @@ @AMDEP_TRUE@@am__include@ @am__quote@./$(DEPDIR)/gmap_nosimd-maxent_hr.Po@am__quote@ @AMDEP_TRUE@@am__include@ @am__quote@./$(DEPDIR)/gmap_nosimd-md5.Po@am__quote@ @AMDEP_TRUE@@am__include@ @am__quote@./$(DEPDIR)/gmap_nosimd-mem.Po@am__quote@ +@AMDEP_TRUE@@am__include@ @am__quote@./$(DEPDIR)/gmap_nosimd-merge.Po@am__quote@ @AMDEP_TRUE@@am__include@ @am__quote@./$(DEPDIR)/gmap_nosimd-oligo.Po@am__quote@ @AMDEP_TRUE@@am__include@ @am__quote@./$(DEPDIR)/gmap_nosimd-oligoindex_hr.Po@am__quote@ @AMDEP_TRUE@@am__include@ @am__quote@./$(DEPDIR)/gmap_nosimd-orderstat.Po@am__quote@ @@ -3536,6 +3998,7 @@ @AMDEP_TRUE@@am__include@ @am__quote@./$(DEPDIR)/gmap_sse2-maxent_hr.Po@am__quote@ @AMDEP_TRUE@@am__include@ @am__quote@./$(DEPDIR)/gmap_sse2-md5.Po@am__quote@ @AMDEP_TRUE@@am__include@ @am__quote@./$(DEPDIR)/gmap_sse2-mem.Po@am__quote@ +@AMDEP_TRUE@@am__include@ @am__quote@./$(DEPDIR)/gmap_sse2-merge.Po@am__quote@ @AMDEP_TRUE@@am__include@ @am__quote@./$(DEPDIR)/gmap_sse2-oligo.Po@am__quote@ @AMDEP_TRUE@@am__include@ @am__quote@./$(DEPDIR)/gmap_sse2-oligoindex_hr.Po@am__quote@ @AMDEP_TRUE@@am__include@ @am__quote@./$(DEPDIR)/gmap_sse2-orderstat.Po@am__quote@ @@ -3621,6 +4084,7 @@ @AMDEP_TRUE@@am__include@ @am__quote@./$(DEPDIR)/gmap_sse41-maxent_hr.Po@am__quote@ @AMDEP_TRUE@@am__include@ @am__quote@./$(DEPDIR)/gmap_sse41-md5.Po@am__quote@ @AMDEP_TRUE@@am__include@ @am__quote@./$(DEPDIR)/gmap_sse41-mem.Po@am__quote@ +@AMDEP_TRUE@@am__include@ @am__quote@./$(DEPDIR)/gmap_sse41-merge.Po@am__quote@ @AMDEP_TRUE@@am__include@ @am__quote@./$(DEPDIR)/gmap_sse41-oligo.Po@am__quote@ @AMDEP_TRUE@@am__include@ @am__quote@./$(DEPDIR)/gmap_sse41-oligoindex_hr.Po@am__quote@ @AMDEP_TRUE@@am__include@ @am__quote@./$(DEPDIR)/gmap_sse41-orderstat.Po@am__quote@ @@ -3706,6 +4170,7 @@ @AMDEP_TRUE@@am__include@ @am__quote@./$(DEPDIR)/gmap_sse42-maxent_hr.Po@am__quote@ @AMDEP_TRUE@@am__include@ @am__quote@./$(DEPDIR)/gmap_sse42-md5.Po@am__quote@ @AMDEP_TRUE@@am__include@ @am__quote@./$(DEPDIR)/gmap_sse42-mem.Po@am__quote@ +@AMDEP_TRUE@@am__include@ @am__quote@./$(DEPDIR)/gmap_sse42-merge.Po@am__quote@ @AMDEP_TRUE@@am__include@ @am__quote@./$(DEPDIR)/gmap_sse42-oligo.Po@am__quote@ @AMDEP_TRUE@@am__include@ @am__quote@./$(DEPDIR)/gmap_sse42-oligoindex_hr.Po@am__quote@ @AMDEP_TRUE@@am__include@ @am__quote@./$(DEPDIR)/gmap_sse42-orderstat.Po@am__quote@ @@ -3791,6 +4256,7 @@ @AMDEP_TRUE@@am__include@ @am__quote@./$(DEPDIR)/gmap_ssse3-maxent_hr.Po@am__quote@ @AMDEP_TRUE@@am__include@ @am__quote@./$(DEPDIR)/gmap_ssse3-md5.Po@am__quote@ @AMDEP_TRUE@@am__include@ @am__quote@./$(DEPDIR)/gmap_ssse3-mem.Po@am__quote@ +@AMDEP_TRUE@@am__include@ @am__quote@./$(DEPDIR)/gmap_ssse3-merge.Po@am__quote@ @AMDEP_TRUE@@am__include@ @am__quote@./$(DEPDIR)/gmap_ssse3-oligo.Po@am__quote@ @AMDEP_TRUE@@am__include@ @am__quote@./$(DEPDIR)/gmap_ssse3-oligoindex_hr.Po@am__quote@ @AMDEP_TRUE@@am__include@ @am__quote@./$(DEPDIR)/gmap_ssse3-orderstat.Po@am__quote@ @@ -3924,6 +4390,7 @@ @AMDEP_TRUE@@am__include@ @am__quote@./$(DEPDIR)/gmapl_avx2-maxent_hr.Po@am__quote@ @AMDEP_TRUE@@am__include@ @am__quote@./$(DEPDIR)/gmapl_avx2-md5.Po@am__quote@ @AMDEP_TRUE@@am__include@ @am__quote@./$(DEPDIR)/gmapl_avx2-mem.Po@am__quote@ +@AMDEP_TRUE@@am__include@ @am__quote@./$(DEPDIR)/gmapl_avx2-merge.Po@am__quote@ @AMDEP_TRUE@@am__include@ @am__quote@./$(DEPDIR)/gmapl_avx2-oligo.Po@am__quote@ @AMDEP_TRUE@@am__include@ @am__quote@./$(DEPDIR)/gmapl_avx2-oligoindex_hr.Po@am__quote@ @AMDEP_TRUE@@am__include@ @am__quote@./$(DEPDIR)/gmapl_avx2-orderstat.Po@am__quote@ @@ -3954,6 +4421,93 @@ @AMDEP_TRUE@@am__include@ @am__quote@./$(DEPDIR)/gmapl_avx2-uintlist.Po@am__quote@ @AMDEP_TRUE@@am__include@ @am__quote@./$(DEPDIR)/gmapl_avx2-uinttable.Po@am__quote@ @AMDEP_TRUE@@am__include@ @am__quote@./$(DEPDIR)/gmapl_avx2-univinterval.Po@am__quote@ +@AMDEP_TRUE@@am__include@ @am__quote@./$(DEPDIR)/gmapl_avx512-access.Po@am__quote@ +@AMDEP_TRUE@@am__include@ @am__quote@./$(DEPDIR)/gmapl_avx512-assert.Po@am__quote@ +@AMDEP_TRUE@@am__include@ @am__quote@./$(DEPDIR)/gmapl_avx512-atoi.Po@am__quote@ +@AMDEP_TRUE@@am__include@ @am__quote@./$(DEPDIR)/gmapl_avx512-bigendian.Po@am__quote@ +@AMDEP_TRUE@@am__include@ @am__quote@./$(DEPDIR)/gmapl_avx512-bitpack64-read.Po@am__quote@ +@AMDEP_TRUE@@am__include@ @am__quote@./$(DEPDIR)/gmapl_avx512-bitpack64-readtwo.Po@am__quote@ +@AMDEP_TRUE@@am__include@ @am__quote@./$(DEPDIR)/gmapl_avx512-block.Po@am__quote@ +@AMDEP_TRUE@@am__include@ @am__quote@./$(DEPDIR)/gmapl_avx512-boyer-moore.Po@am__quote@ +@AMDEP_TRUE@@am__include@ @am__quote@./$(DEPDIR)/gmapl_avx512-bzip2.Po@am__quote@ +@AMDEP_TRUE@@am__include@ @am__quote@./$(DEPDIR)/gmapl_avx512-cellpool.Po@am__quote@ +@AMDEP_TRUE@@am__include@ @am__quote@./$(DEPDIR)/gmapl_avx512-changepoint.Po@am__quote@ +@AMDEP_TRUE@@am__include@ @am__quote@./$(DEPDIR)/gmapl_avx512-chimera.Po@am__quote@ +@AMDEP_TRUE@@am__include@ @am__quote@./$(DEPDIR)/gmapl_avx512-chrnum.Po@am__quote@ +@AMDEP_TRUE@@am__include@ @am__quote@./$(DEPDIR)/gmapl_avx512-chrom.Po@am__quote@ +@AMDEP_TRUE@@am__include@ @am__quote@./$(DEPDIR)/gmapl_avx512-cmet.Po@am__quote@ +@AMDEP_TRUE@@am__include@ @am__quote@./$(DEPDIR)/gmapl_avx512-compress-write.Po@am__quote@ +@AMDEP_TRUE@@am__include@ @am__quote@./$(DEPDIR)/gmapl_avx512-compress.Po@am__quote@ +@AMDEP_TRUE@@am__include@ @am__quote@./$(DEPDIR)/gmapl_avx512-datadir.Po@am__quote@ +@AMDEP_TRUE@@am__include@ @am__quote@./$(DEPDIR)/gmapl_avx512-diag.Po@am__quote@ +@AMDEP_TRUE@@am__include@ @am__quote@./$(DEPDIR)/gmapl_avx512-diagnostic.Po@am__quote@ +@AMDEP_TRUE@@am__include@ @am__quote@./$(DEPDIR)/gmapl_avx512-diagpool.Po@am__quote@ +@AMDEP_TRUE@@am__include@ @am__quote@./$(DEPDIR)/gmapl_avx512-doublelist.Po@am__quote@ +@AMDEP_TRUE@@am__include@ @am__quote@./$(DEPDIR)/gmapl_avx512-dynprog.Po@am__quote@ +@AMDEP_TRUE@@am__include@ @am__quote@./$(DEPDIR)/gmapl_avx512-dynprog_cdna.Po@am__quote@ +@AMDEP_TRUE@@am__include@ @am__quote@./$(DEPDIR)/gmapl_avx512-dynprog_end.Po@am__quote@ +@AMDEP_TRUE@@am__include@ @am__quote@./$(DEPDIR)/gmapl_avx512-dynprog_genome.Po@am__quote@ +@AMDEP_TRUE@@am__include@ @am__quote@./$(DEPDIR)/gmapl_avx512-dynprog_simd.Po@am__quote@ +@AMDEP_TRUE@@am__include@ @am__quote@./$(DEPDIR)/gmapl_avx512-dynprog_single.Po@am__quote@ +@AMDEP_TRUE@@am__include@ @am__quote@./$(DEPDIR)/gmapl_avx512-except.Po@am__quote@ +@AMDEP_TRUE@@am__include@ @am__quote@./$(DEPDIR)/gmapl_avx512-filestring.Po@am__quote@ +@AMDEP_TRUE@@am__include@ @am__quote@./$(DEPDIR)/gmapl_avx512-gbuffer.Po@am__quote@ +@AMDEP_TRUE@@am__include@ @am__quote@./$(DEPDIR)/gmapl_avx512-genome-write.Po@am__quote@ +@AMDEP_TRUE@@am__include@ @am__quote@./$(DEPDIR)/gmapl_avx512-genome.Po@am__quote@ +@AMDEP_TRUE@@am__include@ @am__quote@./$(DEPDIR)/gmapl_avx512-genome128_hr.Po@am__quote@ +@AMDEP_TRUE@@am__include@ @am__quote@./$(DEPDIR)/gmapl_avx512-genome_sites.Po@am__quote@ +@AMDEP_TRUE@@am__include@ @am__quote@./$(DEPDIR)/gmapl_avx512-genomicpos.Po@am__quote@ +@AMDEP_TRUE@@am__include@ @am__quote@./$(DEPDIR)/gmapl_avx512-getopt.Po@am__quote@ +@AMDEP_TRUE@@am__include@ @am__quote@./$(DEPDIR)/gmapl_avx512-getopt1.Po@am__quote@ +@AMDEP_TRUE@@am__include@ @am__quote@./$(DEPDIR)/gmapl_avx512-gmap.Po@am__quote@ +@AMDEP_TRUE@@am__include@ @am__quote@./$(DEPDIR)/gmapl_avx512-gregion.Po@am__quote@ +@AMDEP_TRUE@@am__include@ @am__quote@./$(DEPDIR)/gmapl_avx512-iit-read-univ.Po@am__quote@ +@AMDEP_TRUE@@am__include@ @am__quote@./$(DEPDIR)/gmapl_avx512-iit-read.Po@am__quote@ +@AMDEP_TRUE@@am__include@ @am__quote@./$(DEPDIR)/gmapl_avx512-inbuffer.Po@am__quote@ +@AMDEP_TRUE@@am__include@ @am__quote@./$(DEPDIR)/gmapl_avx512-indexdb.Po@am__quote@ +@AMDEP_TRUE@@am__include@ @am__quote@./$(DEPDIR)/gmapl_avx512-indexdb_hr.Po@am__quote@ +@AMDEP_TRUE@@am__include@ @am__quote@./$(DEPDIR)/gmapl_avx512-interval.Po@am__quote@ +@AMDEP_TRUE@@am__include@ @am__quote@./$(DEPDIR)/gmapl_avx512-intlist.Po@am__quote@ +@AMDEP_TRUE@@am__include@ @am__quote@./$(DEPDIR)/gmapl_avx512-intron.Po@am__quote@ +@AMDEP_TRUE@@am__include@ @am__quote@./$(DEPDIR)/gmapl_avx512-list.Po@am__quote@ +@AMDEP_TRUE@@am__include@ @am__quote@./$(DEPDIR)/gmapl_avx512-littleendian.Po@am__quote@ +@AMDEP_TRUE@@am__include@ @am__quote@./$(DEPDIR)/gmapl_avx512-match.Po@am__quote@ +@AMDEP_TRUE@@am__include@ @am__quote@./$(DEPDIR)/gmapl_avx512-matchpool.Po@am__quote@ +@AMDEP_TRUE@@am__include@ @am__quote@./$(DEPDIR)/gmapl_avx512-maxent.Po@am__quote@ +@AMDEP_TRUE@@am__include@ @am__quote@./$(DEPDIR)/gmapl_avx512-maxent_hr.Po@am__quote@ +@AMDEP_TRUE@@am__include@ @am__quote@./$(DEPDIR)/gmapl_avx512-md5.Po@am__quote@ +@AMDEP_TRUE@@am__include@ @am__quote@./$(DEPDIR)/gmapl_avx512-mem.Po@am__quote@ +@AMDEP_TRUE@@am__include@ @am__quote@./$(DEPDIR)/gmapl_avx512-merge.Po@am__quote@ +@AMDEP_TRUE@@am__include@ @am__quote@./$(DEPDIR)/gmapl_avx512-oligo.Po@am__quote@ +@AMDEP_TRUE@@am__include@ @am__quote@./$(DEPDIR)/gmapl_avx512-oligoindex_hr.Po@am__quote@ +@AMDEP_TRUE@@am__include@ @am__quote@./$(DEPDIR)/gmapl_avx512-orderstat.Po@am__quote@ +@AMDEP_TRUE@@am__include@ @am__quote@./$(DEPDIR)/gmapl_avx512-outbuffer.Po@am__quote@ +@AMDEP_TRUE@@am__include@ @am__quote@./$(DEPDIR)/gmapl_avx512-output.Po@am__quote@ +@AMDEP_TRUE@@am__include@ @am__quote@./$(DEPDIR)/gmapl_avx512-pair.Po@am__quote@ +@AMDEP_TRUE@@am__include@ @am__quote@./$(DEPDIR)/gmapl_avx512-pairpool.Po@am__quote@ +@AMDEP_TRUE@@am__include@ @am__quote@./$(DEPDIR)/gmapl_avx512-parserange.Po@am__quote@ +@AMDEP_TRUE@@am__include@ @am__quote@./$(DEPDIR)/gmapl_avx512-pbinom.Po@am__quote@ +@AMDEP_TRUE@@am__include@ @am__quote@./$(DEPDIR)/gmapl_avx512-popcount.Po@am__quote@ +@AMDEP_TRUE@@am__include@ @am__quote@./$(DEPDIR)/gmapl_avx512-reader.Po@am__quote@ +@AMDEP_TRUE@@am__include@ @am__quote@./$(DEPDIR)/gmapl_avx512-request.Po@am__quote@ +@AMDEP_TRUE@@am__include@ @am__quote@./$(DEPDIR)/gmapl_avx512-result.Po@am__quote@ +@AMDEP_TRUE@@am__include@ @am__quote@./$(DEPDIR)/gmapl_avx512-samheader.Po@am__quote@ +@AMDEP_TRUE@@am__include@ @am__quote@./$(DEPDIR)/gmapl_avx512-segmentpos.Po@am__quote@ +@AMDEP_TRUE@@am__include@ @am__quote@./$(DEPDIR)/gmapl_avx512-semaphore.Po@am__quote@ +@AMDEP_TRUE@@am__include@ @am__quote@./$(DEPDIR)/gmapl_avx512-sequence.Po@am__quote@ +@AMDEP_TRUE@@am__include@ @am__quote@./$(DEPDIR)/gmapl_avx512-smooth.Po@am__quote@ +@AMDEP_TRUE@@am__include@ @am__quote@./$(DEPDIR)/gmapl_avx512-splicestringpool.Po@am__quote@ +@AMDEP_TRUE@@am__include@ @am__quote@./$(DEPDIR)/gmapl_avx512-splicetrie.Po@am__quote@ +@AMDEP_TRUE@@am__include@ @am__quote@./$(DEPDIR)/gmapl_avx512-splicetrie_build.Po@am__quote@ +@AMDEP_TRUE@@am__include@ @am__quote@./$(DEPDIR)/gmapl_avx512-stage1.Po@am__quote@ +@AMDEP_TRUE@@am__include@ @am__quote@./$(DEPDIR)/gmapl_avx512-stage2.Po@am__quote@ +@AMDEP_TRUE@@am__include@ @am__quote@./$(DEPDIR)/gmapl_avx512-stage3.Po@am__quote@ +@AMDEP_TRUE@@am__include@ @am__quote@./$(DEPDIR)/gmapl_avx512-stopwatch.Po@am__quote@ +@AMDEP_TRUE@@am__include@ @am__quote@./$(DEPDIR)/gmapl_avx512-translation.Po@am__quote@ +@AMDEP_TRUE@@am__include@ @am__quote@./$(DEPDIR)/gmapl_avx512-uint8list.Po@am__quote@ +@AMDEP_TRUE@@am__include@ @am__quote@./$(DEPDIR)/gmapl_avx512-uintlist.Po@am__quote@ +@AMDEP_TRUE@@am__include@ @am__quote@./$(DEPDIR)/gmapl_avx512-uinttable.Po@am__quote@ +@AMDEP_TRUE@@am__include@ @am__quote@./$(DEPDIR)/gmapl_avx512-univinterval.Po@am__quote@ @AMDEP_TRUE@@am__include@ @am__quote@./$(DEPDIR)/gmapl_nosimd-access.Po@am__quote@ @AMDEP_TRUE@@am__include@ @am__quote@./$(DEPDIR)/gmapl_nosimd-assert.Po@am__quote@ @AMDEP_TRUE@@am__include@ @am__quote@./$(DEPDIR)/gmapl_nosimd-atoi.Po@am__quote@ @@ -4010,6 +4564,7 @@ @AMDEP_TRUE@@am__include@ @am__quote@./$(DEPDIR)/gmapl_nosimd-maxent_hr.Po@am__quote@ @AMDEP_TRUE@@am__include@ @am__quote@./$(DEPDIR)/gmapl_nosimd-md5.Po@am__quote@ @AMDEP_TRUE@@am__include@ @am__quote@./$(DEPDIR)/gmapl_nosimd-mem.Po@am__quote@ +@AMDEP_TRUE@@am__include@ @am__quote@./$(DEPDIR)/gmapl_nosimd-merge.Po@am__quote@ @AMDEP_TRUE@@am__include@ @am__quote@./$(DEPDIR)/gmapl_nosimd-oligo.Po@am__quote@ @AMDEP_TRUE@@am__include@ @am__quote@./$(DEPDIR)/gmapl_nosimd-oligoindex_hr.Po@am__quote@ @AMDEP_TRUE@@am__include@ @am__quote@./$(DEPDIR)/gmapl_nosimd-orderstat.Po@am__quote@ @@ -4096,6 +4651,7 @@ @AMDEP_TRUE@@am__include@ @am__quote@./$(DEPDIR)/gmapl_sse2-maxent_hr.Po@am__quote@ @AMDEP_TRUE@@am__include@ @am__quote@./$(DEPDIR)/gmapl_sse2-md5.Po@am__quote@ @AMDEP_TRUE@@am__include@ @am__quote@./$(DEPDIR)/gmapl_sse2-mem.Po@am__quote@ +@AMDEP_TRUE@@am__include@ @am__quote@./$(DEPDIR)/gmapl_sse2-merge.Po@am__quote@ @AMDEP_TRUE@@am__include@ @am__quote@./$(DEPDIR)/gmapl_sse2-oligo.Po@am__quote@ @AMDEP_TRUE@@am__include@ @am__quote@./$(DEPDIR)/gmapl_sse2-oligoindex_hr.Po@am__quote@ @AMDEP_TRUE@@am__include@ @am__quote@./$(DEPDIR)/gmapl_sse2-orderstat.Po@am__quote@ @@ -4182,6 +4738,7 @@ @AMDEP_TRUE@@am__include@ @am__quote@./$(DEPDIR)/gmapl_sse41-maxent_hr.Po@am__quote@ @AMDEP_TRUE@@am__include@ @am__quote@./$(DEPDIR)/gmapl_sse41-md5.Po@am__quote@ @AMDEP_TRUE@@am__include@ @am__quote@./$(DEPDIR)/gmapl_sse41-mem.Po@am__quote@ +@AMDEP_TRUE@@am__include@ @am__quote@./$(DEPDIR)/gmapl_sse41-merge.Po@am__quote@ @AMDEP_TRUE@@am__include@ @am__quote@./$(DEPDIR)/gmapl_sse41-oligo.Po@am__quote@ @AMDEP_TRUE@@am__include@ @am__quote@./$(DEPDIR)/gmapl_sse41-oligoindex_hr.Po@am__quote@ @AMDEP_TRUE@@am__include@ @am__quote@./$(DEPDIR)/gmapl_sse41-orderstat.Po@am__quote@ @@ -4268,6 +4825,7 @@ @AMDEP_TRUE@@am__include@ @am__quote@./$(DEPDIR)/gmapl_sse42-maxent_hr.Po@am__quote@ @AMDEP_TRUE@@am__include@ @am__quote@./$(DEPDIR)/gmapl_sse42-md5.Po@am__quote@ @AMDEP_TRUE@@am__include@ @am__quote@./$(DEPDIR)/gmapl_sse42-mem.Po@am__quote@ +@AMDEP_TRUE@@am__include@ @am__quote@./$(DEPDIR)/gmapl_sse42-merge.Po@am__quote@ @AMDEP_TRUE@@am__include@ @am__quote@./$(DEPDIR)/gmapl_sse42-oligo.Po@am__quote@ @AMDEP_TRUE@@am__include@ @am__quote@./$(DEPDIR)/gmapl_sse42-oligoindex_hr.Po@am__quote@ @AMDEP_TRUE@@am__include@ @am__quote@./$(DEPDIR)/gmapl_sse42-orderstat.Po@am__quote@ @@ -4354,6 +4912,7 @@ @AMDEP_TRUE@@am__include@ @am__quote@./$(DEPDIR)/gmapl_ssse3-maxent_hr.Po@am__quote@ @AMDEP_TRUE@@am__include@ @am__quote@./$(DEPDIR)/gmapl_ssse3-md5.Po@am__quote@ @AMDEP_TRUE@@am__include@ @am__quote@./$(DEPDIR)/gmapl_ssse3-mem.Po@am__quote@ +@AMDEP_TRUE@@am__include@ @am__quote@./$(DEPDIR)/gmapl_ssse3-merge.Po@am__quote@ @AMDEP_TRUE@@am__include@ @am__quote@./$(DEPDIR)/gmapl_ssse3-oligo.Po@am__quote@ @AMDEP_TRUE@@am__include@ @am__quote@./$(DEPDIR)/gmapl_ssse3-oligoindex_hr.Po@am__quote@ @AMDEP_TRUE@@am__include@ @am__quote@./$(DEPDIR)/gmapl_ssse3-orderstat.Po@am__quote@ @@ -4440,6 +4999,8 @@ @AMDEP_TRUE@@am__include@ @am__quote@./$(DEPDIR)/gsnap_avx2-maxent_hr.Po@am__quote@ @AMDEP_TRUE@@am__include@ @am__quote@./$(DEPDIR)/gsnap_avx2-md5.Po@am__quote@ @AMDEP_TRUE@@am__include@ @am__quote@./$(DEPDIR)/gsnap_avx2-mem.Po@am__quote@ +@AMDEP_TRUE@@am__include@ @am__quote@./$(DEPDIR)/gsnap_avx2-merge-heap.Po@am__quote@ +@AMDEP_TRUE@@am__include@ @am__quote@./$(DEPDIR)/gsnap_avx2-merge.Po@am__quote@ @AMDEP_TRUE@@am__include@ @am__quote@./$(DEPDIR)/gsnap_avx2-oligo.Po@am__quote@ @AMDEP_TRUE@@am__include@ @am__quote@./$(DEPDIR)/gsnap_avx2-oligoindex_hr.Po@am__quote@ @AMDEP_TRUE@@am__include@ @am__quote@./$(DEPDIR)/gsnap_avx2-orderstat.Po@am__quote@ @@ -4476,6 +5037,98 @@ @AMDEP_TRUE@@am__include@ @am__quote@./$(DEPDIR)/gsnap_avx2-uintlist.Po@am__quote@ @AMDEP_TRUE@@am__include@ @am__quote@./$(DEPDIR)/gsnap_avx2-univdiag.Po@am__quote@ @AMDEP_TRUE@@am__include@ @am__quote@./$(DEPDIR)/gsnap_avx2-univinterval.Po@am__quote@ +@AMDEP_TRUE@@am__include@ @am__quote@./$(DEPDIR)/gsnap_avx512-access.Po@am__quote@ +@AMDEP_TRUE@@am__include@ @am__quote@./$(DEPDIR)/gsnap_avx512-assert.Po@am__quote@ +@AMDEP_TRUE@@am__include@ @am__quote@./$(DEPDIR)/gsnap_avx512-atoi.Po@am__quote@ +@AMDEP_TRUE@@am__include@ @am__quote@./$(DEPDIR)/gsnap_avx512-bigendian.Po@am__quote@ +@AMDEP_TRUE@@am__include@ @am__quote@./$(DEPDIR)/gsnap_avx512-bitpack64-access.Po@am__quote@ +@AMDEP_TRUE@@am__include@ @am__quote@./$(DEPDIR)/gsnap_avx512-bitpack64-read.Po@am__quote@ +@AMDEP_TRUE@@am__include@ @am__quote@./$(DEPDIR)/gsnap_avx512-bitpack64-readtwo.Po@am__quote@ +@AMDEP_TRUE@@am__include@ @am__quote@./$(DEPDIR)/gsnap_avx512-boyer-moore.Po@am__quote@ +@AMDEP_TRUE@@am__include@ @am__quote@./$(DEPDIR)/gsnap_avx512-bytecoding.Po@am__quote@ +@AMDEP_TRUE@@am__include@ @am__quote@./$(DEPDIR)/gsnap_avx512-bzip2.Po@am__quote@ +@AMDEP_TRUE@@am__include@ @am__quote@./$(DEPDIR)/gsnap_avx512-cellpool.Po@am__quote@ +@AMDEP_TRUE@@am__include@ @am__quote@./$(DEPDIR)/gsnap_avx512-changepoint.Po@am__quote@ +@AMDEP_TRUE@@am__include@ @am__quote@./$(DEPDIR)/gsnap_avx512-chimera.Po@am__quote@ +@AMDEP_TRUE@@am__include@ @am__quote@./$(DEPDIR)/gsnap_avx512-chrnum.Po@am__quote@ +@AMDEP_TRUE@@am__include@ @am__quote@./$(DEPDIR)/gsnap_avx512-chrom.Po@am__quote@ +@AMDEP_TRUE@@am__include@ @am__quote@./$(DEPDIR)/gsnap_avx512-cmet.Po@am__quote@ +@AMDEP_TRUE@@am__include@ @am__quote@./$(DEPDIR)/gsnap_avx512-compress.Po@am__quote@ +@AMDEP_TRUE@@am__include@ @am__quote@./$(DEPDIR)/gsnap_avx512-datadir.Po@am__quote@ +@AMDEP_TRUE@@am__include@ @am__quote@./$(DEPDIR)/gsnap_avx512-diag.Po@am__quote@ +@AMDEP_TRUE@@am__include@ @am__quote@./$(DEPDIR)/gsnap_avx512-diagpool.Po@am__quote@ +@AMDEP_TRUE@@am__include@ @am__quote@./$(DEPDIR)/gsnap_avx512-doublelist.Po@am__quote@ +@AMDEP_TRUE@@am__include@ @am__quote@./$(DEPDIR)/gsnap_avx512-dynprog.Po@am__quote@ +@AMDEP_TRUE@@am__include@ @am__quote@./$(DEPDIR)/gsnap_avx512-dynprog_cdna.Po@am__quote@ +@AMDEP_TRUE@@am__include@ @am__quote@./$(DEPDIR)/gsnap_avx512-dynprog_end.Po@am__quote@ +@AMDEP_TRUE@@am__include@ @am__quote@./$(DEPDIR)/gsnap_avx512-dynprog_genome.Po@am__quote@ +@AMDEP_TRUE@@am__include@ @am__quote@./$(DEPDIR)/gsnap_avx512-dynprog_simd.Po@am__quote@ +@AMDEP_TRUE@@am__include@ @am__quote@./$(DEPDIR)/gsnap_avx512-dynprog_single.Po@am__quote@ +@AMDEP_TRUE@@am__include@ @am__quote@./$(DEPDIR)/gsnap_avx512-except.Po@am__quote@ +@AMDEP_TRUE@@am__include@ @am__quote@./$(DEPDIR)/gsnap_avx512-filestring.Po@am__quote@ +@AMDEP_TRUE@@am__include@ @am__quote@./$(DEPDIR)/gsnap_avx512-gbuffer.Po@am__quote@ +@AMDEP_TRUE@@am__include@ @am__quote@./$(DEPDIR)/gsnap_avx512-genome.Po@am__quote@ +@AMDEP_TRUE@@am__include@ @am__quote@./$(DEPDIR)/gsnap_avx512-genome128_hr.Po@am__quote@ +@AMDEP_TRUE@@am__include@ @am__quote@./$(DEPDIR)/gsnap_avx512-genome_sites.Po@am__quote@ +@AMDEP_TRUE@@am__include@ @am__quote@./$(DEPDIR)/gsnap_avx512-genomicpos.Po@am__quote@ +@AMDEP_TRUE@@am__include@ @am__quote@./$(DEPDIR)/gsnap_avx512-getopt.Po@am__quote@ +@AMDEP_TRUE@@am__include@ @am__quote@./$(DEPDIR)/gsnap_avx512-getopt1.Po@am__quote@ +@AMDEP_TRUE@@am__include@ @am__quote@./$(DEPDIR)/gsnap_avx512-gsnap.Po@am__quote@ +@AMDEP_TRUE@@am__include@ @am__quote@./$(DEPDIR)/gsnap_avx512-iit-read-univ.Po@am__quote@ +@AMDEP_TRUE@@am__include@ @am__quote@./$(DEPDIR)/gsnap_avx512-iit-read.Po@am__quote@ +@AMDEP_TRUE@@am__include@ @am__quote@./$(DEPDIR)/gsnap_avx512-inbuffer.Po@am__quote@ +@AMDEP_TRUE@@am__include@ @am__quote@./$(DEPDIR)/gsnap_avx512-indel.Po@am__quote@ +@AMDEP_TRUE@@am__include@ @am__quote@./$(DEPDIR)/gsnap_avx512-indexdb.Po@am__quote@ +@AMDEP_TRUE@@am__include@ @am__quote@./$(DEPDIR)/gsnap_avx512-indexdb_hr.Po@am__quote@ +@AMDEP_TRUE@@am__include@ @am__quote@./$(DEPDIR)/gsnap_avx512-interval.Po@am__quote@ +@AMDEP_TRUE@@am__include@ @am__quote@./$(DEPDIR)/gsnap_avx512-intlist.Po@am__quote@ +@AMDEP_TRUE@@am__include@ @am__quote@./$(DEPDIR)/gsnap_avx512-intron.Po@am__quote@ +@AMDEP_TRUE@@am__include@ @am__quote@./$(DEPDIR)/gsnap_avx512-junction.Po@am__quote@ +@AMDEP_TRUE@@am__include@ @am__quote@./$(DEPDIR)/gsnap_avx512-list.Po@am__quote@ +@AMDEP_TRUE@@am__include@ @am__quote@./$(DEPDIR)/gsnap_avx512-littleendian.Po@am__quote@ +@AMDEP_TRUE@@am__include@ @am__quote@./$(DEPDIR)/gsnap_avx512-mapq.Po@am__quote@ +@AMDEP_TRUE@@am__include@ @am__quote@./$(DEPDIR)/gsnap_avx512-maxent.Po@am__quote@ +@AMDEP_TRUE@@am__include@ @am__quote@./$(DEPDIR)/gsnap_avx512-maxent_hr.Po@am__quote@ +@AMDEP_TRUE@@am__include@ @am__quote@./$(DEPDIR)/gsnap_avx512-md5.Po@am__quote@ +@AMDEP_TRUE@@am__include@ @am__quote@./$(DEPDIR)/gsnap_avx512-mem.Po@am__quote@ +@AMDEP_TRUE@@am__include@ @am__quote@./$(DEPDIR)/gsnap_avx512-merge-heap.Po@am__quote@ +@AMDEP_TRUE@@am__include@ @am__quote@./$(DEPDIR)/gsnap_avx512-merge.Po@am__quote@ +@AMDEP_TRUE@@am__include@ @am__quote@./$(DEPDIR)/gsnap_avx512-oligo.Po@am__quote@ +@AMDEP_TRUE@@am__include@ @am__quote@./$(DEPDIR)/gsnap_avx512-oligoindex_hr.Po@am__quote@ +@AMDEP_TRUE@@am__include@ @am__quote@./$(DEPDIR)/gsnap_avx512-orderstat.Po@am__quote@ +@AMDEP_TRUE@@am__include@ @am__quote@./$(DEPDIR)/gsnap_avx512-outbuffer.Po@am__quote@ +@AMDEP_TRUE@@am__include@ @am__quote@./$(DEPDIR)/gsnap_avx512-output.Po@am__quote@ +@AMDEP_TRUE@@am__include@ @am__quote@./$(DEPDIR)/gsnap_avx512-pair.Po@am__quote@ +@AMDEP_TRUE@@am__include@ @am__quote@./$(DEPDIR)/gsnap_avx512-pairpool.Po@am__quote@ +@AMDEP_TRUE@@am__include@ @am__quote@./$(DEPDIR)/gsnap_avx512-parserange.Po@am__quote@ +@AMDEP_TRUE@@am__include@ @am__quote@./$(DEPDIR)/gsnap_avx512-pbinom.Po@am__quote@ +@AMDEP_TRUE@@am__include@ @am__quote@./$(DEPDIR)/gsnap_avx512-popcount.Po@am__quote@ +@AMDEP_TRUE@@am__include@ @am__quote@./$(DEPDIR)/gsnap_avx512-reader.Po@am__quote@ +@AMDEP_TRUE@@am__include@ @am__quote@./$(DEPDIR)/gsnap_avx512-request.Po@am__quote@ +@AMDEP_TRUE@@am__include@ @am__quote@./$(DEPDIR)/gsnap_avx512-resulthr.Po@am__quote@ +@AMDEP_TRUE@@am__include@ @am__quote@./$(DEPDIR)/gsnap_avx512-samheader.Po@am__quote@ +@AMDEP_TRUE@@am__include@ @am__quote@./$(DEPDIR)/gsnap_avx512-samprint.Po@am__quote@ +@AMDEP_TRUE@@am__include@ @am__quote@./$(DEPDIR)/gsnap_avx512-sarray-read.Po@am__quote@ +@AMDEP_TRUE@@am__include@ @am__quote@./$(DEPDIR)/gsnap_avx512-sedgesort.Po@am__quote@ +@AMDEP_TRUE@@am__include@ @am__quote@./$(DEPDIR)/gsnap_avx512-segmentpos.Po@am__quote@ +@AMDEP_TRUE@@am__include@ @am__quote@./$(DEPDIR)/gsnap_avx512-semaphore.Po@am__quote@ +@AMDEP_TRUE@@am__include@ @am__quote@./$(DEPDIR)/gsnap_avx512-sequence.Po@am__quote@ +@AMDEP_TRUE@@am__include@ @am__quote@./$(DEPDIR)/gsnap_avx512-shortread.Po@am__quote@ +@AMDEP_TRUE@@am__include@ @am__quote@./$(DEPDIR)/gsnap_avx512-smooth.Po@am__quote@ +@AMDEP_TRUE@@am__include@ @am__quote@./$(DEPDIR)/gsnap_avx512-spanningelt.Po@am__quote@ +@AMDEP_TRUE@@am__include@ @am__quote@./$(DEPDIR)/gsnap_avx512-splice.Po@am__quote@ +@AMDEP_TRUE@@am__include@ @am__quote@./$(DEPDIR)/gsnap_avx512-splicestringpool.Po@am__quote@ +@AMDEP_TRUE@@am__include@ @am__quote@./$(DEPDIR)/gsnap_avx512-splicetrie.Po@am__quote@ +@AMDEP_TRUE@@am__include@ @am__quote@./$(DEPDIR)/gsnap_avx512-splicetrie_build.Po@am__quote@ +@AMDEP_TRUE@@am__include@ @am__quote@./$(DEPDIR)/gsnap_avx512-stage1hr.Po@am__quote@ +@AMDEP_TRUE@@am__include@ @am__quote@./$(DEPDIR)/gsnap_avx512-stage2.Po@am__quote@ +@AMDEP_TRUE@@am__include@ @am__quote@./$(DEPDIR)/gsnap_avx512-stage3.Po@am__quote@ +@AMDEP_TRUE@@am__include@ @am__quote@./$(DEPDIR)/gsnap_avx512-stage3hr.Po@am__quote@ +@AMDEP_TRUE@@am__include@ @am__quote@./$(DEPDIR)/gsnap_avx512-stopwatch.Po@am__quote@ +@AMDEP_TRUE@@am__include@ @am__quote@./$(DEPDIR)/gsnap_avx512-substring.Po@am__quote@ +@AMDEP_TRUE@@am__include@ @am__quote@./$(DEPDIR)/gsnap_avx512-uintlist.Po@am__quote@ +@AMDEP_TRUE@@am__include@ @am__quote@./$(DEPDIR)/gsnap_avx512-univdiag.Po@am__quote@ +@AMDEP_TRUE@@am__include@ @am__quote@./$(DEPDIR)/gsnap_avx512-univinterval.Po@am__quote@ @AMDEP_TRUE@@am__include@ @am__quote@./$(DEPDIR)/gsnap_nosimd-access.Po@am__quote@ @AMDEP_TRUE@@am__include@ @am__quote@./$(DEPDIR)/gsnap_nosimd-assert.Po@am__quote@ @AMDEP_TRUE@@am__include@ @am__quote@./$(DEPDIR)/gsnap_nosimd-atoi.Po@am__quote@ @@ -4530,6 +5183,8 @@ @AMDEP_TRUE@@am__include@ @am__quote@./$(DEPDIR)/gsnap_nosimd-maxent_hr.Po@am__quote@ @AMDEP_TRUE@@am__include@ @am__quote@./$(DEPDIR)/gsnap_nosimd-md5.Po@am__quote@ @AMDEP_TRUE@@am__include@ @am__quote@./$(DEPDIR)/gsnap_nosimd-mem.Po@am__quote@ +@AMDEP_TRUE@@am__include@ @am__quote@./$(DEPDIR)/gsnap_nosimd-merge-heap.Po@am__quote@ +@AMDEP_TRUE@@am__include@ @am__quote@./$(DEPDIR)/gsnap_nosimd-merge.Po@am__quote@ @AMDEP_TRUE@@am__include@ @am__quote@./$(DEPDIR)/gsnap_nosimd-oligo.Po@am__quote@ @AMDEP_TRUE@@am__include@ @am__quote@./$(DEPDIR)/gsnap_nosimd-oligoindex_hr.Po@am__quote@ @AMDEP_TRUE@@am__include@ @am__quote@./$(DEPDIR)/gsnap_nosimd-orderstat.Po@am__quote@ @@ -4620,6 +5275,8 @@ @AMDEP_TRUE@@am__include@ @am__quote@./$(DEPDIR)/gsnap_sse2-maxent_hr.Po@am__quote@ @AMDEP_TRUE@@am__include@ @am__quote@./$(DEPDIR)/gsnap_sse2-md5.Po@am__quote@ @AMDEP_TRUE@@am__include@ @am__quote@./$(DEPDIR)/gsnap_sse2-mem.Po@am__quote@ +@AMDEP_TRUE@@am__include@ @am__quote@./$(DEPDIR)/gsnap_sse2-merge-heap.Po@am__quote@ +@AMDEP_TRUE@@am__include@ @am__quote@./$(DEPDIR)/gsnap_sse2-merge.Po@am__quote@ @AMDEP_TRUE@@am__include@ @am__quote@./$(DEPDIR)/gsnap_sse2-oligo.Po@am__quote@ @AMDEP_TRUE@@am__include@ @am__quote@./$(DEPDIR)/gsnap_sse2-oligoindex_hr.Po@am__quote@ @AMDEP_TRUE@@am__include@ @am__quote@./$(DEPDIR)/gsnap_sse2-orderstat.Po@am__quote@ @@ -4710,6 +5367,8 @@ @AMDEP_TRUE@@am__include@ @am__quote@./$(DEPDIR)/gsnap_sse41-maxent_hr.Po@am__quote@ @AMDEP_TRUE@@am__include@ @am__quote@./$(DEPDIR)/gsnap_sse41-md5.Po@am__quote@ @AMDEP_TRUE@@am__include@ @am__quote@./$(DEPDIR)/gsnap_sse41-mem.Po@am__quote@ +@AMDEP_TRUE@@am__include@ @am__quote@./$(DEPDIR)/gsnap_sse41-merge-heap.Po@am__quote@ +@AMDEP_TRUE@@am__include@ @am__quote@./$(DEPDIR)/gsnap_sse41-merge.Po@am__quote@ @AMDEP_TRUE@@am__include@ @am__quote@./$(DEPDIR)/gsnap_sse41-oligo.Po@am__quote@ @AMDEP_TRUE@@am__include@ @am__quote@./$(DEPDIR)/gsnap_sse41-oligoindex_hr.Po@am__quote@ @AMDEP_TRUE@@am__include@ @am__quote@./$(DEPDIR)/gsnap_sse41-orderstat.Po@am__quote@ @@ -4800,6 +5459,8 @@ @AMDEP_TRUE@@am__include@ @am__quote@./$(DEPDIR)/gsnap_sse42-maxent_hr.Po@am__quote@ @AMDEP_TRUE@@am__include@ @am__quote@./$(DEPDIR)/gsnap_sse42-md5.Po@am__quote@ @AMDEP_TRUE@@am__include@ @am__quote@./$(DEPDIR)/gsnap_sse42-mem.Po@am__quote@ +@AMDEP_TRUE@@am__include@ @am__quote@./$(DEPDIR)/gsnap_sse42-merge-heap.Po@am__quote@ +@AMDEP_TRUE@@am__include@ @am__quote@./$(DEPDIR)/gsnap_sse42-merge.Po@am__quote@ @AMDEP_TRUE@@am__include@ @am__quote@./$(DEPDIR)/gsnap_sse42-oligo.Po@am__quote@ @AMDEP_TRUE@@am__include@ @am__quote@./$(DEPDIR)/gsnap_sse42-oligoindex_hr.Po@am__quote@ @AMDEP_TRUE@@am__include@ @am__quote@./$(DEPDIR)/gsnap_sse42-orderstat.Po@am__quote@ @@ -4890,6 +5551,8 @@ @AMDEP_TRUE@@am__include@ @am__quote@./$(DEPDIR)/gsnap_ssse3-maxent_hr.Po@am__quote@ @AMDEP_TRUE@@am__include@ @am__quote@./$(DEPDIR)/gsnap_ssse3-md5.Po@am__quote@ @AMDEP_TRUE@@am__include@ @am__quote@./$(DEPDIR)/gsnap_ssse3-mem.Po@am__quote@ +@AMDEP_TRUE@@am__include@ @am__quote@./$(DEPDIR)/gsnap_ssse3-merge-heap.Po@am__quote@ +@AMDEP_TRUE@@am__include@ @am__quote@./$(DEPDIR)/gsnap_ssse3-merge.Po@am__quote@ @AMDEP_TRUE@@am__include@ @am__quote@./$(DEPDIR)/gsnap_ssse3-oligo.Po@am__quote@ @AMDEP_TRUE@@am__include@ @am__quote@./$(DEPDIR)/gsnap_ssse3-oligoindex_hr.Po@am__quote@ @AMDEP_TRUE@@am__include@ @am__quote@./$(DEPDIR)/gsnap_ssse3-orderstat.Po@am__quote@ @@ -4981,6 +5644,8 @@ @AMDEP_TRUE@@am__include@ @am__quote@./$(DEPDIR)/gsnapl_avx2-maxent_hr.Po@am__quote@ @AMDEP_TRUE@@am__include@ @am__quote@./$(DEPDIR)/gsnapl_avx2-md5.Po@am__quote@ @AMDEP_TRUE@@am__include@ @am__quote@./$(DEPDIR)/gsnapl_avx2-mem.Po@am__quote@ +@AMDEP_TRUE@@am__include@ @am__quote@./$(DEPDIR)/gsnapl_avx2-merge-heap.Po@am__quote@ +@AMDEP_TRUE@@am__include@ @am__quote@./$(DEPDIR)/gsnapl_avx2-merge.Po@am__quote@ @AMDEP_TRUE@@am__include@ @am__quote@./$(DEPDIR)/gsnapl_avx2-oligo.Po@am__quote@ @AMDEP_TRUE@@am__include@ @am__quote@./$(DEPDIR)/gsnapl_avx2-oligoindex_hr.Po@am__quote@ @AMDEP_TRUE@@am__include@ @am__quote@./$(DEPDIR)/gsnapl_avx2-orderstat.Po@am__quote@ @@ -5015,6 +5680,95 @@ @AMDEP_TRUE@@am__include@ @am__quote@./$(DEPDIR)/gsnapl_avx2-uint8list.Po@am__quote@ @AMDEP_TRUE@@am__include@ @am__quote@./$(DEPDIR)/gsnapl_avx2-uintlist.Po@am__quote@ @AMDEP_TRUE@@am__include@ @am__quote@./$(DEPDIR)/gsnapl_avx2-univinterval.Po@am__quote@ +@AMDEP_TRUE@@am__include@ @am__quote@./$(DEPDIR)/gsnapl_avx512-access.Po@am__quote@ +@AMDEP_TRUE@@am__include@ @am__quote@./$(DEPDIR)/gsnapl_avx512-assert.Po@am__quote@ +@AMDEP_TRUE@@am__include@ @am__quote@./$(DEPDIR)/gsnapl_avx512-atoi.Po@am__quote@ +@AMDEP_TRUE@@am__include@ @am__quote@./$(DEPDIR)/gsnapl_avx512-bigendian.Po@am__quote@ +@AMDEP_TRUE@@am__include@ @am__quote@./$(DEPDIR)/gsnapl_avx512-bitpack64-access.Po@am__quote@ +@AMDEP_TRUE@@am__include@ @am__quote@./$(DEPDIR)/gsnapl_avx512-bitpack64-read.Po@am__quote@ +@AMDEP_TRUE@@am__include@ @am__quote@./$(DEPDIR)/gsnapl_avx512-bitpack64-readtwo.Po@am__quote@ +@AMDEP_TRUE@@am__include@ @am__quote@./$(DEPDIR)/gsnapl_avx512-boyer-moore.Po@am__quote@ +@AMDEP_TRUE@@am__include@ @am__quote@./$(DEPDIR)/gsnapl_avx512-bzip2.Po@am__quote@ +@AMDEP_TRUE@@am__include@ @am__quote@./$(DEPDIR)/gsnapl_avx512-cellpool.Po@am__quote@ +@AMDEP_TRUE@@am__include@ @am__quote@./$(DEPDIR)/gsnapl_avx512-changepoint.Po@am__quote@ +@AMDEP_TRUE@@am__include@ @am__quote@./$(DEPDIR)/gsnapl_avx512-chimera.Po@am__quote@ +@AMDEP_TRUE@@am__include@ @am__quote@./$(DEPDIR)/gsnapl_avx512-chrnum.Po@am__quote@ +@AMDEP_TRUE@@am__include@ @am__quote@./$(DEPDIR)/gsnapl_avx512-chrom.Po@am__quote@ +@AMDEP_TRUE@@am__include@ @am__quote@./$(DEPDIR)/gsnapl_avx512-cmet.Po@am__quote@ +@AMDEP_TRUE@@am__include@ @am__quote@./$(DEPDIR)/gsnapl_avx512-compress.Po@am__quote@ +@AMDEP_TRUE@@am__include@ @am__quote@./$(DEPDIR)/gsnapl_avx512-datadir.Po@am__quote@ +@AMDEP_TRUE@@am__include@ @am__quote@./$(DEPDIR)/gsnapl_avx512-diag.Po@am__quote@ +@AMDEP_TRUE@@am__include@ @am__quote@./$(DEPDIR)/gsnapl_avx512-diagpool.Po@am__quote@ +@AMDEP_TRUE@@am__include@ @am__quote@./$(DEPDIR)/gsnapl_avx512-doublelist.Po@am__quote@ +@AMDEP_TRUE@@am__include@ @am__quote@./$(DEPDIR)/gsnapl_avx512-dynprog.Po@am__quote@ +@AMDEP_TRUE@@am__include@ @am__quote@./$(DEPDIR)/gsnapl_avx512-dynprog_cdna.Po@am__quote@ +@AMDEP_TRUE@@am__include@ @am__quote@./$(DEPDIR)/gsnapl_avx512-dynprog_end.Po@am__quote@ +@AMDEP_TRUE@@am__include@ @am__quote@./$(DEPDIR)/gsnapl_avx512-dynprog_genome.Po@am__quote@ +@AMDEP_TRUE@@am__include@ @am__quote@./$(DEPDIR)/gsnapl_avx512-dynprog_simd.Po@am__quote@ +@AMDEP_TRUE@@am__include@ @am__quote@./$(DEPDIR)/gsnapl_avx512-dynprog_single.Po@am__quote@ +@AMDEP_TRUE@@am__include@ @am__quote@./$(DEPDIR)/gsnapl_avx512-except.Po@am__quote@ +@AMDEP_TRUE@@am__include@ @am__quote@./$(DEPDIR)/gsnapl_avx512-filestring.Po@am__quote@ +@AMDEP_TRUE@@am__include@ @am__quote@./$(DEPDIR)/gsnapl_avx512-gbuffer.Po@am__quote@ +@AMDEP_TRUE@@am__include@ @am__quote@./$(DEPDIR)/gsnapl_avx512-genome.Po@am__quote@ +@AMDEP_TRUE@@am__include@ @am__quote@./$(DEPDIR)/gsnapl_avx512-genome128_hr.Po@am__quote@ +@AMDEP_TRUE@@am__include@ @am__quote@./$(DEPDIR)/gsnapl_avx512-genome_sites.Po@am__quote@ +@AMDEP_TRUE@@am__include@ @am__quote@./$(DEPDIR)/gsnapl_avx512-genomicpos.Po@am__quote@ +@AMDEP_TRUE@@am__include@ @am__quote@./$(DEPDIR)/gsnapl_avx512-getopt.Po@am__quote@ +@AMDEP_TRUE@@am__include@ @am__quote@./$(DEPDIR)/gsnapl_avx512-getopt1.Po@am__quote@ +@AMDEP_TRUE@@am__include@ @am__quote@./$(DEPDIR)/gsnapl_avx512-gsnap.Po@am__quote@ +@AMDEP_TRUE@@am__include@ @am__quote@./$(DEPDIR)/gsnapl_avx512-iit-read-univ.Po@am__quote@ +@AMDEP_TRUE@@am__include@ @am__quote@./$(DEPDIR)/gsnapl_avx512-iit-read.Po@am__quote@ +@AMDEP_TRUE@@am__include@ @am__quote@./$(DEPDIR)/gsnapl_avx512-inbuffer.Po@am__quote@ +@AMDEP_TRUE@@am__include@ @am__quote@./$(DEPDIR)/gsnapl_avx512-indel.Po@am__quote@ +@AMDEP_TRUE@@am__include@ @am__quote@./$(DEPDIR)/gsnapl_avx512-indexdb.Po@am__quote@ +@AMDEP_TRUE@@am__include@ @am__quote@./$(DEPDIR)/gsnapl_avx512-indexdb_hr.Po@am__quote@ +@AMDEP_TRUE@@am__include@ @am__quote@./$(DEPDIR)/gsnapl_avx512-interval.Po@am__quote@ +@AMDEP_TRUE@@am__include@ @am__quote@./$(DEPDIR)/gsnapl_avx512-intlist.Po@am__quote@ +@AMDEP_TRUE@@am__include@ @am__quote@./$(DEPDIR)/gsnapl_avx512-intron.Po@am__quote@ +@AMDEP_TRUE@@am__include@ @am__quote@./$(DEPDIR)/gsnapl_avx512-junction.Po@am__quote@ +@AMDEP_TRUE@@am__include@ @am__quote@./$(DEPDIR)/gsnapl_avx512-list.Po@am__quote@ +@AMDEP_TRUE@@am__include@ @am__quote@./$(DEPDIR)/gsnapl_avx512-littleendian.Po@am__quote@ +@AMDEP_TRUE@@am__include@ @am__quote@./$(DEPDIR)/gsnapl_avx512-mapq.Po@am__quote@ +@AMDEP_TRUE@@am__include@ @am__quote@./$(DEPDIR)/gsnapl_avx512-maxent.Po@am__quote@ +@AMDEP_TRUE@@am__include@ @am__quote@./$(DEPDIR)/gsnapl_avx512-maxent_hr.Po@am__quote@ +@AMDEP_TRUE@@am__include@ @am__quote@./$(DEPDIR)/gsnapl_avx512-md5.Po@am__quote@ +@AMDEP_TRUE@@am__include@ @am__quote@./$(DEPDIR)/gsnapl_avx512-mem.Po@am__quote@ +@AMDEP_TRUE@@am__include@ @am__quote@./$(DEPDIR)/gsnapl_avx512-merge-heap.Po@am__quote@ +@AMDEP_TRUE@@am__include@ @am__quote@./$(DEPDIR)/gsnapl_avx512-merge.Po@am__quote@ +@AMDEP_TRUE@@am__include@ @am__quote@./$(DEPDIR)/gsnapl_avx512-oligo.Po@am__quote@ +@AMDEP_TRUE@@am__include@ @am__quote@./$(DEPDIR)/gsnapl_avx512-oligoindex_hr.Po@am__quote@ +@AMDEP_TRUE@@am__include@ @am__quote@./$(DEPDIR)/gsnapl_avx512-orderstat.Po@am__quote@ +@AMDEP_TRUE@@am__include@ @am__quote@./$(DEPDIR)/gsnapl_avx512-outbuffer.Po@am__quote@ +@AMDEP_TRUE@@am__include@ @am__quote@./$(DEPDIR)/gsnapl_avx512-output.Po@am__quote@ +@AMDEP_TRUE@@am__include@ @am__quote@./$(DEPDIR)/gsnapl_avx512-pair.Po@am__quote@ +@AMDEP_TRUE@@am__include@ @am__quote@./$(DEPDIR)/gsnapl_avx512-pairpool.Po@am__quote@ +@AMDEP_TRUE@@am__include@ @am__quote@./$(DEPDIR)/gsnapl_avx512-parserange.Po@am__quote@ +@AMDEP_TRUE@@am__include@ @am__quote@./$(DEPDIR)/gsnapl_avx512-pbinom.Po@am__quote@ +@AMDEP_TRUE@@am__include@ @am__quote@./$(DEPDIR)/gsnapl_avx512-popcount.Po@am__quote@ +@AMDEP_TRUE@@am__include@ @am__quote@./$(DEPDIR)/gsnapl_avx512-reader.Po@am__quote@ +@AMDEP_TRUE@@am__include@ @am__quote@./$(DEPDIR)/gsnapl_avx512-request.Po@am__quote@ +@AMDEP_TRUE@@am__include@ @am__quote@./$(DEPDIR)/gsnapl_avx512-resulthr.Po@am__quote@ +@AMDEP_TRUE@@am__include@ @am__quote@./$(DEPDIR)/gsnapl_avx512-samheader.Po@am__quote@ +@AMDEP_TRUE@@am__include@ @am__quote@./$(DEPDIR)/gsnapl_avx512-samprint.Po@am__quote@ +@AMDEP_TRUE@@am__include@ @am__quote@./$(DEPDIR)/gsnapl_avx512-segmentpos.Po@am__quote@ +@AMDEP_TRUE@@am__include@ @am__quote@./$(DEPDIR)/gsnapl_avx512-semaphore.Po@am__quote@ +@AMDEP_TRUE@@am__include@ @am__quote@./$(DEPDIR)/gsnapl_avx512-sequence.Po@am__quote@ +@AMDEP_TRUE@@am__include@ @am__quote@./$(DEPDIR)/gsnapl_avx512-shortread.Po@am__quote@ +@AMDEP_TRUE@@am__include@ @am__quote@./$(DEPDIR)/gsnapl_avx512-smooth.Po@am__quote@ +@AMDEP_TRUE@@am__include@ @am__quote@./$(DEPDIR)/gsnapl_avx512-spanningelt.Po@am__quote@ +@AMDEP_TRUE@@am__include@ @am__quote@./$(DEPDIR)/gsnapl_avx512-splice.Po@am__quote@ +@AMDEP_TRUE@@am__include@ @am__quote@./$(DEPDIR)/gsnapl_avx512-splicestringpool.Po@am__quote@ +@AMDEP_TRUE@@am__include@ @am__quote@./$(DEPDIR)/gsnapl_avx512-splicetrie.Po@am__quote@ +@AMDEP_TRUE@@am__include@ @am__quote@./$(DEPDIR)/gsnapl_avx512-splicetrie_build.Po@am__quote@ +@AMDEP_TRUE@@am__include@ @am__quote@./$(DEPDIR)/gsnapl_avx512-stage1hr.Po@am__quote@ +@AMDEP_TRUE@@am__include@ @am__quote@./$(DEPDIR)/gsnapl_avx512-stage2.Po@am__quote@ +@AMDEP_TRUE@@am__include@ @am__quote@./$(DEPDIR)/gsnapl_avx512-stage3.Po@am__quote@ +@AMDEP_TRUE@@am__include@ @am__quote@./$(DEPDIR)/gsnapl_avx512-stage3hr.Po@am__quote@ +@AMDEP_TRUE@@am__include@ @am__quote@./$(DEPDIR)/gsnapl_avx512-stopwatch.Po@am__quote@ +@AMDEP_TRUE@@am__include@ @am__quote@./$(DEPDIR)/gsnapl_avx512-substring.Po@am__quote@ +@AMDEP_TRUE@@am__include@ @am__quote@./$(DEPDIR)/gsnapl_avx512-uint8list.Po@am__quote@ +@AMDEP_TRUE@@am__include@ @am__quote@./$(DEPDIR)/gsnapl_avx512-uintlist.Po@am__quote@ +@AMDEP_TRUE@@am__include@ @am__quote@./$(DEPDIR)/gsnapl_avx512-univinterval.Po@am__quote@ @AMDEP_TRUE@@am__include@ @am__quote@./$(DEPDIR)/gsnapl_nosimd-access.Po@am__quote@ @AMDEP_TRUE@@am__include@ @am__quote@./$(DEPDIR)/gsnapl_nosimd-assert.Po@am__quote@ @AMDEP_TRUE@@am__include@ @am__quote@./$(DEPDIR)/gsnapl_nosimd-atoi.Po@am__quote@ @@ -5068,6 +5822,8 @@ @AMDEP_TRUE@@am__include@ @am__quote@./$(DEPDIR)/gsnapl_nosimd-maxent_hr.Po@am__quote@ @AMDEP_TRUE@@am__include@ @am__quote@./$(DEPDIR)/gsnapl_nosimd-md5.Po@am__quote@ @AMDEP_TRUE@@am__include@ @am__quote@./$(DEPDIR)/gsnapl_nosimd-mem.Po@am__quote@ +@AMDEP_TRUE@@am__include@ @am__quote@./$(DEPDIR)/gsnapl_nosimd-merge-heap.Po@am__quote@ +@AMDEP_TRUE@@am__include@ @am__quote@./$(DEPDIR)/gsnapl_nosimd-merge.Po@am__quote@ @AMDEP_TRUE@@am__include@ @am__quote@./$(DEPDIR)/gsnapl_nosimd-oligo.Po@am__quote@ @AMDEP_TRUE@@am__include@ @am__quote@./$(DEPDIR)/gsnapl_nosimd-oligoindex_hr.Po@am__quote@ @AMDEP_TRUE@@am__include@ @am__quote@./$(DEPDIR)/gsnapl_nosimd-orderstat.Po@am__quote@ @@ -5155,6 +5911,8 @@ @AMDEP_TRUE@@am__include@ @am__quote@./$(DEPDIR)/gsnapl_sse2-maxent_hr.Po@am__quote@ @AMDEP_TRUE@@am__include@ @am__quote@./$(DEPDIR)/gsnapl_sse2-md5.Po@am__quote@ @AMDEP_TRUE@@am__include@ @am__quote@./$(DEPDIR)/gsnapl_sse2-mem.Po@am__quote@ +@AMDEP_TRUE@@am__include@ @am__quote@./$(DEPDIR)/gsnapl_sse2-merge-heap.Po@am__quote@ +@AMDEP_TRUE@@am__include@ @am__quote@./$(DEPDIR)/gsnapl_sse2-merge.Po@am__quote@ @AMDEP_TRUE@@am__include@ @am__quote@./$(DEPDIR)/gsnapl_sse2-oligo.Po@am__quote@ @AMDEP_TRUE@@am__include@ @am__quote@./$(DEPDIR)/gsnapl_sse2-oligoindex_hr.Po@am__quote@ @AMDEP_TRUE@@am__include@ @am__quote@./$(DEPDIR)/gsnapl_sse2-orderstat.Po@am__quote@ @@ -5242,6 +6000,8 @@ @AMDEP_TRUE@@am__include@ @am__quote@./$(DEPDIR)/gsnapl_sse41-maxent_hr.Po@am__quote@ @AMDEP_TRUE@@am__include@ @am__quote@./$(DEPDIR)/gsnapl_sse41-md5.Po@am__quote@ @AMDEP_TRUE@@am__include@ @am__quote@./$(DEPDIR)/gsnapl_sse41-mem.Po@am__quote@ +@AMDEP_TRUE@@am__include@ @am__quote@./$(DEPDIR)/gsnapl_sse41-merge-heap.Po@am__quote@ +@AMDEP_TRUE@@am__include@ @am__quote@./$(DEPDIR)/gsnapl_sse41-merge.Po@am__quote@ @AMDEP_TRUE@@am__include@ @am__quote@./$(DEPDIR)/gsnapl_sse41-oligo.Po@am__quote@ @AMDEP_TRUE@@am__include@ @am__quote@./$(DEPDIR)/gsnapl_sse41-oligoindex_hr.Po@am__quote@ @AMDEP_TRUE@@am__include@ @am__quote@./$(DEPDIR)/gsnapl_sse41-orderstat.Po@am__quote@ @@ -5329,6 +6089,8 @@ @AMDEP_TRUE@@am__include@ @am__quote@./$(DEPDIR)/gsnapl_sse42-maxent_hr.Po@am__quote@ @AMDEP_TRUE@@am__include@ @am__quote@./$(DEPDIR)/gsnapl_sse42-md5.Po@am__quote@ @AMDEP_TRUE@@am__include@ @am__quote@./$(DEPDIR)/gsnapl_sse42-mem.Po@am__quote@ +@AMDEP_TRUE@@am__include@ @am__quote@./$(DEPDIR)/gsnapl_sse42-merge-heap.Po@am__quote@ +@AMDEP_TRUE@@am__include@ @am__quote@./$(DEPDIR)/gsnapl_sse42-merge.Po@am__quote@ @AMDEP_TRUE@@am__include@ @am__quote@./$(DEPDIR)/gsnapl_sse42-oligo.Po@am__quote@ @AMDEP_TRUE@@am__include@ @am__quote@./$(DEPDIR)/gsnapl_sse42-oligoindex_hr.Po@am__quote@ @AMDEP_TRUE@@am__include@ @am__quote@./$(DEPDIR)/gsnapl_sse42-orderstat.Po@am__quote@ @@ -5416,6 +6178,8 @@ @AMDEP_TRUE@@am__include@ @am__quote@./$(DEPDIR)/gsnapl_ssse3-maxent_hr.Po@am__quote@ @AMDEP_TRUE@@am__include@ @am__quote@./$(DEPDIR)/gsnapl_ssse3-md5.Po@am__quote@ @AMDEP_TRUE@@am__include@ @am__quote@./$(DEPDIR)/gsnapl_ssse3-mem.Po@am__quote@ +@AMDEP_TRUE@@am__include@ @am__quote@./$(DEPDIR)/gsnapl_ssse3-merge-heap.Po@am__quote@ +@AMDEP_TRUE@@am__include@ @am__quote@./$(DEPDIR)/gsnapl_ssse3-merge.Po@am__quote@ @AMDEP_TRUE@@am__include@ @am__quote@./$(DEPDIR)/gsnapl_ssse3-oligo.Po@am__quote@ @AMDEP_TRUE@@am__include@ @am__quote@./$(DEPDIR)/gsnapl_ssse3-oligoindex_hr.Po@am__quote@ @AMDEP_TRUE@@am__include@ @am__quote@./$(DEPDIR)/gsnapl_ssse3-orderstat.Po@am__quote@ @@ -5624,6 +6388,8 @@ @AMDEP_TRUE@@am__include@ @am__quote@./$(DEPDIR)/uniqscan-maxent_hr.Po@am__quote@ @AMDEP_TRUE@@am__include@ @am__quote@./$(DEPDIR)/uniqscan-md5.Po@am__quote@ @AMDEP_TRUE@@am__include@ @am__quote@./$(DEPDIR)/uniqscan-mem.Po@am__quote@ +@AMDEP_TRUE@@am__include@ @am__quote@./$(DEPDIR)/uniqscan-merge-heap.Po@am__quote@ +@AMDEP_TRUE@@am__include@ @am__quote@./$(DEPDIR)/uniqscan-merge.Po@am__quote@ @AMDEP_TRUE@@am__include@ @am__quote@./$(DEPDIR)/uniqscan-oligo.Po@am__quote@ @AMDEP_TRUE@@am__include@ @am__quote@./$(DEPDIR)/uniqscan-oligoindex_hr.Po@am__quote@ @AMDEP_TRUE@@am__include@ @am__quote@./$(DEPDIR)/uniqscan-orderstat.Po@am__quote@ @@ -5707,6 +6473,8 @@ @AMDEP_TRUE@@am__include@ @am__quote@./$(DEPDIR)/uniqscanl-maxent_hr.Po@am__quote@ @AMDEP_TRUE@@am__include@ @am__quote@./$(DEPDIR)/uniqscanl-md5.Po@am__quote@ @AMDEP_TRUE@@am__include@ @am__quote@./$(DEPDIR)/uniqscanl-mem.Po@am__quote@ +@AMDEP_TRUE@@am__include@ @am__quote@./$(DEPDIR)/uniqscanl-merge-heap.Po@am__quote@ +@AMDEP_TRUE@@am__include@ @am__quote@./$(DEPDIR)/uniqscanl-merge.Po@am__quote@ @AMDEP_TRUE@@am__include@ @am__quote@./$(DEPDIR)/uniqscanl-oligo.Po@am__quote@ @AMDEP_TRUE@@am__include@ @am__quote@./$(DEPDIR)/uniqscanl-oligoindex_hr.Po@am__quote@ @AMDEP_TRUE@@am__include@ @am__quote@./$(DEPDIR)/uniqscanl-orderstat.Po@am__quote@ @@ -7804,6 +8572,20 @@ @AMDEP_TRUE@@am__fastdepCC_FALSE@ DEPDIR=$(DEPDIR) $(CCDEPMODE) $(depcomp) @AMDEPBACKSLASH@ @am__fastdepCC_FALSE@ $(AM_V_CC@am__nodep@)$(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) $(gmap_avx2_CFLAGS) $(CFLAGS) -c -o gmap_avx2-bitpack64-readtwo.obj `if test -f 'bitpack64-readtwo.c'; then $(CYGPATH_W) 'bitpack64-readtwo.c'; else $(CYGPATH_W) '$(srcdir)/bitpack64-readtwo.c'; fi` +gmap_avx2-merge.o: merge.c +@am__fastdepCC_TRUE@ $(AM_V_CC)$(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) $(gmap_avx2_CFLAGS) $(CFLAGS) -MT gmap_avx2-merge.o -MD -MP -MF $(DEPDIR)/gmap_avx2-merge.Tpo -c -o gmap_avx2-merge.o `test -f 'merge.c' || echo '$(srcdir)/'`merge.c +@am__fastdepCC_TRUE@ $(AM_V_at)$(am__mv) $(DEPDIR)/gmap_avx2-merge.Tpo $(DEPDIR)/gmap_avx2-merge.Po +@AMDEP_TRUE@@am__fastdepCC_FALSE@ $(AM_V_CC)source='merge.c' object='gmap_avx2-merge.o' libtool=no @AMDEPBACKSLASH@ +@AMDEP_TRUE@@am__fastdepCC_FALSE@ DEPDIR=$(DEPDIR) $(CCDEPMODE) $(depcomp) @AMDEPBACKSLASH@ +@am__fastdepCC_FALSE@ $(AM_V_CC@am__nodep@)$(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) $(gmap_avx2_CFLAGS) $(CFLAGS) -c -o gmap_avx2-merge.o `test -f 'merge.c' || echo '$(srcdir)/'`merge.c + +gmap_avx2-merge.obj: merge.c +@am__fastdepCC_TRUE@ $(AM_V_CC)$(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) $(gmap_avx2_CFLAGS) $(CFLAGS) -MT gmap_avx2-merge.obj -MD -MP -MF $(DEPDIR)/gmap_avx2-merge.Tpo -c -o gmap_avx2-merge.obj `if test -f 'merge.c'; then $(CYGPATH_W) 'merge.c'; else $(CYGPATH_W) '$(srcdir)/merge.c'; fi` +@am__fastdepCC_TRUE@ $(AM_V_at)$(am__mv) $(DEPDIR)/gmap_avx2-merge.Tpo $(DEPDIR)/gmap_avx2-merge.Po +@AMDEP_TRUE@@am__fastdepCC_FALSE@ $(AM_V_CC)source='merge.c' object='gmap_avx2-merge.obj' libtool=no @AMDEPBACKSLASH@ +@AMDEP_TRUE@@am__fastdepCC_FALSE@ DEPDIR=$(DEPDIR) $(CCDEPMODE) $(depcomp) @AMDEPBACKSLASH@ +@am__fastdepCC_FALSE@ $(AM_V_CC@am__nodep@)$(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) $(gmap_avx2_CFLAGS) $(CFLAGS) -c -o gmap_avx2-merge.obj `if test -f 'merge.c'; then $(CYGPATH_W) 'merge.c'; else $(CYGPATH_W) '$(srcdir)/merge.c'; fi` + gmap_avx2-indexdb.o: indexdb.c @am__fastdepCC_TRUE@ $(AM_V_CC)$(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) $(gmap_avx2_CFLAGS) $(CFLAGS) -MT gmap_avx2-indexdb.o -MD -MP -MF $(DEPDIR)/gmap_avx2-indexdb.Tpo -c -o gmap_avx2-indexdb.o `test -f 'indexdb.c' || echo '$(srcdir)/'`indexdb.c @am__fastdepCC_TRUE@ $(AM_V_at)$(am__mv) $(DEPDIR)/gmap_avx2-indexdb.Tpo $(DEPDIR)/gmap_avx2-indexdb.Po @@ -8560,6 +9342,1210 @@ @AMDEP_TRUE@@am__fastdepCC_FALSE@ DEPDIR=$(DEPDIR) $(CCDEPMODE) $(depcomp) @AMDEPBACKSLASH@ @am__fastdepCC_FALSE@ $(AM_V_CC@am__nodep@)$(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) $(gmap_avx2_CFLAGS) $(CFLAGS) -c -o gmap_avx2-gmap.obj `if test -f 'gmap.c'; then $(CYGPATH_W) 'gmap.c'; else $(CYGPATH_W) '$(srcdir)/gmap.c'; fi` +gmap_avx512-except.o: except.c +@am__fastdepCC_TRUE@ $(AM_V_CC)$(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) $(gmap_avx512_CFLAGS) $(CFLAGS) -MT gmap_avx512-except.o -MD -MP -MF $(DEPDIR)/gmap_avx512-except.Tpo -c -o gmap_avx512-except.o `test -f 'except.c' || echo '$(srcdir)/'`except.c +@am__fastdepCC_TRUE@ $(AM_V_at)$(am__mv) $(DEPDIR)/gmap_avx512-except.Tpo $(DEPDIR)/gmap_avx512-except.Po +@AMDEP_TRUE@@am__fastdepCC_FALSE@ $(AM_V_CC)source='except.c' object='gmap_avx512-except.o' libtool=no @AMDEPBACKSLASH@ +@AMDEP_TRUE@@am__fastdepCC_FALSE@ DEPDIR=$(DEPDIR) $(CCDEPMODE) $(depcomp) @AMDEPBACKSLASH@ +@am__fastdepCC_FALSE@ $(AM_V_CC@am__nodep@)$(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) $(gmap_avx512_CFLAGS) $(CFLAGS) -c -o gmap_avx512-except.o `test -f 'except.c' || echo '$(srcdir)/'`except.c + +gmap_avx512-except.obj: except.c +@am__fastdepCC_TRUE@ $(AM_V_CC)$(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) $(gmap_avx512_CFLAGS) $(CFLAGS) -MT gmap_avx512-except.obj -MD -MP -MF $(DEPDIR)/gmap_avx512-except.Tpo -c -o gmap_avx512-except.obj `if test -f 'except.c'; then $(CYGPATH_W) 'except.c'; else $(CYGPATH_W) '$(srcdir)/except.c'; fi` +@am__fastdepCC_TRUE@ $(AM_V_at)$(am__mv) $(DEPDIR)/gmap_avx512-except.Tpo $(DEPDIR)/gmap_avx512-except.Po +@AMDEP_TRUE@@am__fastdepCC_FALSE@ $(AM_V_CC)source='except.c' object='gmap_avx512-except.obj' libtool=no @AMDEPBACKSLASH@ +@AMDEP_TRUE@@am__fastdepCC_FALSE@ DEPDIR=$(DEPDIR) $(CCDEPMODE) $(depcomp) @AMDEPBACKSLASH@ +@am__fastdepCC_FALSE@ $(AM_V_CC@am__nodep@)$(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) $(gmap_avx512_CFLAGS) $(CFLAGS) -c -o gmap_avx512-except.obj `if test -f 'except.c'; then $(CYGPATH_W) 'except.c'; else $(CYGPATH_W) '$(srcdir)/except.c'; fi` + +gmap_avx512-assert.o: assert.c +@am__fastdepCC_TRUE@ $(AM_V_CC)$(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) $(gmap_avx512_CFLAGS) $(CFLAGS) -MT gmap_avx512-assert.o -MD -MP -MF $(DEPDIR)/gmap_avx512-assert.Tpo -c -o gmap_avx512-assert.o `test -f 'assert.c' || echo '$(srcdir)/'`assert.c +@am__fastdepCC_TRUE@ $(AM_V_at)$(am__mv) $(DEPDIR)/gmap_avx512-assert.Tpo $(DEPDIR)/gmap_avx512-assert.Po +@AMDEP_TRUE@@am__fastdepCC_FALSE@ $(AM_V_CC)source='assert.c' object='gmap_avx512-assert.o' libtool=no @AMDEPBACKSLASH@ +@AMDEP_TRUE@@am__fastdepCC_FALSE@ DEPDIR=$(DEPDIR) $(CCDEPMODE) $(depcomp) @AMDEPBACKSLASH@ +@am__fastdepCC_FALSE@ $(AM_V_CC@am__nodep@)$(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) $(gmap_avx512_CFLAGS) $(CFLAGS) -c -o gmap_avx512-assert.o `test -f 'assert.c' || echo '$(srcdir)/'`assert.c + +gmap_avx512-assert.obj: assert.c +@am__fastdepCC_TRUE@ $(AM_V_CC)$(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) $(gmap_avx512_CFLAGS) $(CFLAGS) -MT gmap_avx512-assert.obj -MD -MP -MF $(DEPDIR)/gmap_avx512-assert.Tpo -c -o gmap_avx512-assert.obj `if test -f 'assert.c'; then $(CYGPATH_W) 'assert.c'; else $(CYGPATH_W) '$(srcdir)/assert.c'; fi` +@am__fastdepCC_TRUE@ $(AM_V_at)$(am__mv) $(DEPDIR)/gmap_avx512-assert.Tpo $(DEPDIR)/gmap_avx512-assert.Po +@AMDEP_TRUE@@am__fastdepCC_FALSE@ $(AM_V_CC)source='assert.c' object='gmap_avx512-assert.obj' libtool=no @AMDEPBACKSLASH@ +@AMDEP_TRUE@@am__fastdepCC_FALSE@ DEPDIR=$(DEPDIR) $(CCDEPMODE) $(depcomp) @AMDEPBACKSLASH@ +@am__fastdepCC_FALSE@ $(AM_V_CC@am__nodep@)$(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) $(gmap_avx512_CFLAGS) $(CFLAGS) -c -o gmap_avx512-assert.obj `if test -f 'assert.c'; then $(CYGPATH_W) 'assert.c'; else $(CYGPATH_W) '$(srcdir)/assert.c'; fi` + +gmap_avx512-mem.o: mem.c +@am__fastdepCC_TRUE@ $(AM_V_CC)$(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) $(gmap_avx512_CFLAGS) $(CFLAGS) -MT gmap_avx512-mem.o -MD -MP -MF $(DEPDIR)/gmap_avx512-mem.Tpo -c -o gmap_avx512-mem.o `test -f 'mem.c' || echo '$(srcdir)/'`mem.c +@am__fastdepCC_TRUE@ $(AM_V_at)$(am__mv) $(DEPDIR)/gmap_avx512-mem.Tpo $(DEPDIR)/gmap_avx512-mem.Po +@AMDEP_TRUE@@am__fastdepCC_FALSE@ $(AM_V_CC)source='mem.c' object='gmap_avx512-mem.o' libtool=no @AMDEPBACKSLASH@ +@AMDEP_TRUE@@am__fastdepCC_FALSE@ DEPDIR=$(DEPDIR) $(CCDEPMODE) $(depcomp) @AMDEPBACKSLASH@ +@am__fastdepCC_FALSE@ $(AM_V_CC@am__nodep@)$(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) $(gmap_avx512_CFLAGS) $(CFLAGS) -c -o gmap_avx512-mem.o `test -f 'mem.c' || echo '$(srcdir)/'`mem.c + +gmap_avx512-mem.obj: mem.c +@am__fastdepCC_TRUE@ $(AM_V_CC)$(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) $(gmap_avx512_CFLAGS) $(CFLAGS) -MT gmap_avx512-mem.obj -MD -MP -MF $(DEPDIR)/gmap_avx512-mem.Tpo -c -o gmap_avx512-mem.obj `if test -f 'mem.c'; then $(CYGPATH_W) 'mem.c'; else $(CYGPATH_W) '$(srcdir)/mem.c'; fi` +@am__fastdepCC_TRUE@ $(AM_V_at)$(am__mv) $(DEPDIR)/gmap_avx512-mem.Tpo $(DEPDIR)/gmap_avx512-mem.Po +@AMDEP_TRUE@@am__fastdepCC_FALSE@ $(AM_V_CC)source='mem.c' object='gmap_avx512-mem.obj' libtool=no @AMDEPBACKSLASH@ +@AMDEP_TRUE@@am__fastdepCC_FALSE@ DEPDIR=$(DEPDIR) $(CCDEPMODE) $(depcomp) @AMDEPBACKSLASH@ +@am__fastdepCC_FALSE@ $(AM_V_CC@am__nodep@)$(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) $(gmap_avx512_CFLAGS) $(CFLAGS) -c -o gmap_avx512-mem.obj `if test -f 'mem.c'; then $(CYGPATH_W) 'mem.c'; else $(CYGPATH_W) '$(srcdir)/mem.c'; fi` + +gmap_avx512-intlist.o: intlist.c +@am__fastdepCC_TRUE@ $(AM_V_CC)$(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) $(gmap_avx512_CFLAGS) $(CFLAGS) -MT gmap_avx512-intlist.o -MD -MP -MF $(DEPDIR)/gmap_avx512-intlist.Tpo -c -o gmap_avx512-intlist.o `test -f 'intlist.c' || echo '$(srcdir)/'`intlist.c +@am__fastdepCC_TRUE@ $(AM_V_at)$(am__mv) $(DEPDIR)/gmap_avx512-intlist.Tpo $(DEPDIR)/gmap_avx512-intlist.Po +@AMDEP_TRUE@@am__fastdepCC_FALSE@ $(AM_V_CC)source='intlist.c' object='gmap_avx512-intlist.o' libtool=no @AMDEPBACKSLASH@ +@AMDEP_TRUE@@am__fastdepCC_FALSE@ DEPDIR=$(DEPDIR) $(CCDEPMODE) $(depcomp) @AMDEPBACKSLASH@ +@am__fastdepCC_FALSE@ $(AM_V_CC@am__nodep@)$(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) $(gmap_avx512_CFLAGS) $(CFLAGS) -c -o gmap_avx512-intlist.o `test -f 'intlist.c' || echo '$(srcdir)/'`intlist.c + +gmap_avx512-intlist.obj: intlist.c +@am__fastdepCC_TRUE@ $(AM_V_CC)$(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) $(gmap_avx512_CFLAGS) $(CFLAGS) -MT gmap_avx512-intlist.obj -MD -MP -MF $(DEPDIR)/gmap_avx512-intlist.Tpo -c -o gmap_avx512-intlist.obj `if test -f 'intlist.c'; then $(CYGPATH_W) 'intlist.c'; else $(CYGPATH_W) '$(srcdir)/intlist.c'; fi` +@am__fastdepCC_TRUE@ $(AM_V_at)$(am__mv) $(DEPDIR)/gmap_avx512-intlist.Tpo $(DEPDIR)/gmap_avx512-intlist.Po +@AMDEP_TRUE@@am__fastdepCC_FALSE@ $(AM_V_CC)source='intlist.c' object='gmap_avx512-intlist.obj' libtool=no @AMDEPBACKSLASH@ +@AMDEP_TRUE@@am__fastdepCC_FALSE@ DEPDIR=$(DEPDIR) $(CCDEPMODE) $(depcomp) @AMDEPBACKSLASH@ +@am__fastdepCC_FALSE@ $(AM_V_CC@am__nodep@)$(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) $(gmap_avx512_CFLAGS) $(CFLAGS) -c -o gmap_avx512-intlist.obj `if test -f 'intlist.c'; then $(CYGPATH_W) 'intlist.c'; else $(CYGPATH_W) '$(srcdir)/intlist.c'; fi` + +gmap_avx512-list.o: list.c +@am__fastdepCC_TRUE@ $(AM_V_CC)$(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) $(gmap_avx512_CFLAGS) $(CFLAGS) -MT gmap_avx512-list.o -MD -MP -MF $(DEPDIR)/gmap_avx512-list.Tpo -c -o gmap_avx512-list.o `test -f 'list.c' || echo '$(srcdir)/'`list.c +@am__fastdepCC_TRUE@ $(AM_V_at)$(am__mv) $(DEPDIR)/gmap_avx512-list.Tpo $(DEPDIR)/gmap_avx512-list.Po +@AMDEP_TRUE@@am__fastdepCC_FALSE@ $(AM_V_CC)source='list.c' object='gmap_avx512-list.o' libtool=no @AMDEPBACKSLASH@ +@AMDEP_TRUE@@am__fastdepCC_FALSE@ DEPDIR=$(DEPDIR) $(CCDEPMODE) $(depcomp) @AMDEPBACKSLASH@ +@am__fastdepCC_FALSE@ $(AM_V_CC@am__nodep@)$(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) $(gmap_avx512_CFLAGS) $(CFLAGS) -c -o gmap_avx512-list.o `test -f 'list.c' || echo '$(srcdir)/'`list.c + +gmap_avx512-list.obj: list.c +@am__fastdepCC_TRUE@ $(AM_V_CC)$(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) $(gmap_avx512_CFLAGS) $(CFLAGS) -MT gmap_avx512-list.obj -MD -MP -MF $(DEPDIR)/gmap_avx512-list.Tpo -c -o gmap_avx512-list.obj `if test -f 'list.c'; then $(CYGPATH_W) 'list.c'; else $(CYGPATH_W) '$(srcdir)/list.c'; fi` +@am__fastdepCC_TRUE@ $(AM_V_at)$(am__mv) $(DEPDIR)/gmap_avx512-list.Tpo $(DEPDIR)/gmap_avx512-list.Po +@AMDEP_TRUE@@am__fastdepCC_FALSE@ $(AM_V_CC)source='list.c' object='gmap_avx512-list.obj' libtool=no @AMDEPBACKSLASH@ +@AMDEP_TRUE@@am__fastdepCC_FALSE@ DEPDIR=$(DEPDIR) $(CCDEPMODE) $(depcomp) @AMDEPBACKSLASH@ +@am__fastdepCC_FALSE@ $(AM_V_CC@am__nodep@)$(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) $(gmap_avx512_CFLAGS) $(CFLAGS) -c -o gmap_avx512-list.obj `if test -f 'list.c'; then $(CYGPATH_W) 'list.c'; else $(CYGPATH_W) '$(srcdir)/list.c'; fi` + +gmap_avx512-littleendian.o: littleendian.c +@am__fastdepCC_TRUE@ $(AM_V_CC)$(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) $(gmap_avx512_CFLAGS) $(CFLAGS) -MT gmap_avx512-littleendian.o -MD -MP -MF $(DEPDIR)/gmap_avx512-littleendian.Tpo -c -o gmap_avx512-littleendian.o `test -f 'littleendian.c' || echo '$(srcdir)/'`littleendian.c +@am__fastdepCC_TRUE@ $(AM_V_at)$(am__mv) $(DEPDIR)/gmap_avx512-littleendian.Tpo $(DEPDIR)/gmap_avx512-littleendian.Po +@AMDEP_TRUE@@am__fastdepCC_FALSE@ $(AM_V_CC)source='littleendian.c' object='gmap_avx512-littleendian.o' libtool=no @AMDEPBACKSLASH@ +@AMDEP_TRUE@@am__fastdepCC_FALSE@ DEPDIR=$(DEPDIR) $(CCDEPMODE) $(depcomp) @AMDEPBACKSLASH@ +@am__fastdepCC_FALSE@ $(AM_V_CC@am__nodep@)$(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) $(gmap_avx512_CFLAGS) $(CFLAGS) -c -o gmap_avx512-littleendian.o `test -f 'littleendian.c' || echo '$(srcdir)/'`littleendian.c + +gmap_avx512-littleendian.obj: littleendian.c +@am__fastdepCC_TRUE@ $(AM_V_CC)$(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) $(gmap_avx512_CFLAGS) $(CFLAGS) -MT gmap_avx512-littleendian.obj -MD -MP -MF $(DEPDIR)/gmap_avx512-littleendian.Tpo -c -o gmap_avx512-littleendian.obj `if test -f 'littleendian.c'; then $(CYGPATH_W) 'littleendian.c'; else $(CYGPATH_W) '$(srcdir)/littleendian.c'; fi` +@am__fastdepCC_TRUE@ $(AM_V_at)$(am__mv) $(DEPDIR)/gmap_avx512-littleendian.Tpo $(DEPDIR)/gmap_avx512-littleendian.Po +@AMDEP_TRUE@@am__fastdepCC_FALSE@ $(AM_V_CC)source='littleendian.c' object='gmap_avx512-littleendian.obj' libtool=no @AMDEPBACKSLASH@ +@AMDEP_TRUE@@am__fastdepCC_FALSE@ DEPDIR=$(DEPDIR) $(CCDEPMODE) $(depcomp) @AMDEPBACKSLASH@ +@am__fastdepCC_FALSE@ $(AM_V_CC@am__nodep@)$(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) $(gmap_avx512_CFLAGS) $(CFLAGS) -c -o gmap_avx512-littleendian.obj `if test -f 'littleendian.c'; then $(CYGPATH_W) 'littleendian.c'; else $(CYGPATH_W) '$(srcdir)/littleendian.c'; fi` + +gmap_avx512-bigendian.o: bigendian.c +@am__fastdepCC_TRUE@ $(AM_V_CC)$(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) $(gmap_avx512_CFLAGS) $(CFLAGS) -MT gmap_avx512-bigendian.o -MD -MP -MF $(DEPDIR)/gmap_avx512-bigendian.Tpo -c -o gmap_avx512-bigendian.o `test -f 'bigendian.c' || echo '$(srcdir)/'`bigendian.c +@am__fastdepCC_TRUE@ $(AM_V_at)$(am__mv) $(DEPDIR)/gmap_avx512-bigendian.Tpo $(DEPDIR)/gmap_avx512-bigendian.Po +@AMDEP_TRUE@@am__fastdepCC_FALSE@ $(AM_V_CC)source='bigendian.c' object='gmap_avx512-bigendian.o' libtool=no @AMDEPBACKSLASH@ +@AMDEP_TRUE@@am__fastdepCC_FALSE@ DEPDIR=$(DEPDIR) $(CCDEPMODE) $(depcomp) @AMDEPBACKSLASH@ +@am__fastdepCC_FALSE@ $(AM_V_CC@am__nodep@)$(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) $(gmap_avx512_CFLAGS) $(CFLAGS) -c -o gmap_avx512-bigendian.o `test -f 'bigendian.c' || echo '$(srcdir)/'`bigendian.c + +gmap_avx512-bigendian.obj: bigendian.c +@am__fastdepCC_TRUE@ $(AM_V_CC)$(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) $(gmap_avx512_CFLAGS) $(CFLAGS) -MT gmap_avx512-bigendian.obj -MD -MP -MF $(DEPDIR)/gmap_avx512-bigendian.Tpo -c -o gmap_avx512-bigendian.obj `if test -f 'bigendian.c'; then $(CYGPATH_W) 'bigendian.c'; else $(CYGPATH_W) '$(srcdir)/bigendian.c'; fi` +@am__fastdepCC_TRUE@ $(AM_V_at)$(am__mv) $(DEPDIR)/gmap_avx512-bigendian.Tpo $(DEPDIR)/gmap_avx512-bigendian.Po +@AMDEP_TRUE@@am__fastdepCC_FALSE@ $(AM_V_CC)source='bigendian.c' object='gmap_avx512-bigendian.obj' libtool=no @AMDEPBACKSLASH@ +@AMDEP_TRUE@@am__fastdepCC_FALSE@ DEPDIR=$(DEPDIR) $(CCDEPMODE) $(depcomp) @AMDEPBACKSLASH@ +@am__fastdepCC_FALSE@ $(AM_V_CC@am__nodep@)$(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) $(gmap_avx512_CFLAGS) $(CFLAGS) -c -o gmap_avx512-bigendian.obj `if test -f 'bigendian.c'; then $(CYGPATH_W) 'bigendian.c'; else $(CYGPATH_W) '$(srcdir)/bigendian.c'; fi` + +gmap_avx512-univinterval.o: univinterval.c +@am__fastdepCC_TRUE@ $(AM_V_CC)$(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) $(gmap_avx512_CFLAGS) $(CFLAGS) -MT gmap_avx512-univinterval.o -MD -MP -MF $(DEPDIR)/gmap_avx512-univinterval.Tpo -c -o gmap_avx512-univinterval.o `test -f 'univinterval.c' || echo '$(srcdir)/'`univinterval.c +@am__fastdepCC_TRUE@ $(AM_V_at)$(am__mv) $(DEPDIR)/gmap_avx512-univinterval.Tpo $(DEPDIR)/gmap_avx512-univinterval.Po +@AMDEP_TRUE@@am__fastdepCC_FALSE@ $(AM_V_CC)source='univinterval.c' object='gmap_avx512-univinterval.o' libtool=no @AMDEPBACKSLASH@ +@AMDEP_TRUE@@am__fastdepCC_FALSE@ DEPDIR=$(DEPDIR) $(CCDEPMODE) $(depcomp) @AMDEPBACKSLASH@ +@am__fastdepCC_FALSE@ $(AM_V_CC@am__nodep@)$(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) $(gmap_avx512_CFLAGS) $(CFLAGS) -c -o gmap_avx512-univinterval.o `test -f 'univinterval.c' || echo '$(srcdir)/'`univinterval.c + +gmap_avx512-univinterval.obj: univinterval.c +@am__fastdepCC_TRUE@ $(AM_V_CC)$(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) $(gmap_avx512_CFLAGS) $(CFLAGS) -MT gmap_avx512-univinterval.obj -MD -MP -MF $(DEPDIR)/gmap_avx512-univinterval.Tpo -c -o gmap_avx512-univinterval.obj `if test -f 'univinterval.c'; then $(CYGPATH_W) 'univinterval.c'; else $(CYGPATH_W) '$(srcdir)/univinterval.c'; fi` +@am__fastdepCC_TRUE@ $(AM_V_at)$(am__mv) $(DEPDIR)/gmap_avx512-univinterval.Tpo $(DEPDIR)/gmap_avx512-univinterval.Po +@AMDEP_TRUE@@am__fastdepCC_FALSE@ $(AM_V_CC)source='univinterval.c' object='gmap_avx512-univinterval.obj' libtool=no @AMDEPBACKSLASH@ +@AMDEP_TRUE@@am__fastdepCC_FALSE@ DEPDIR=$(DEPDIR) $(CCDEPMODE) $(depcomp) @AMDEPBACKSLASH@ +@am__fastdepCC_FALSE@ $(AM_V_CC@am__nodep@)$(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) $(gmap_avx512_CFLAGS) $(CFLAGS) -c -o gmap_avx512-univinterval.obj `if test -f 'univinterval.c'; then $(CYGPATH_W) 'univinterval.c'; else $(CYGPATH_W) '$(srcdir)/univinterval.c'; fi` + +gmap_avx512-interval.o: interval.c +@am__fastdepCC_TRUE@ $(AM_V_CC)$(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) $(gmap_avx512_CFLAGS) $(CFLAGS) -MT gmap_avx512-interval.o -MD -MP -MF $(DEPDIR)/gmap_avx512-interval.Tpo -c -o gmap_avx512-interval.o `test -f 'interval.c' || echo '$(srcdir)/'`interval.c +@am__fastdepCC_TRUE@ $(AM_V_at)$(am__mv) $(DEPDIR)/gmap_avx512-interval.Tpo $(DEPDIR)/gmap_avx512-interval.Po +@AMDEP_TRUE@@am__fastdepCC_FALSE@ $(AM_V_CC)source='interval.c' object='gmap_avx512-interval.o' libtool=no @AMDEPBACKSLASH@ +@AMDEP_TRUE@@am__fastdepCC_FALSE@ DEPDIR=$(DEPDIR) $(CCDEPMODE) $(depcomp) @AMDEPBACKSLASH@ +@am__fastdepCC_FALSE@ $(AM_V_CC@am__nodep@)$(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) $(gmap_avx512_CFLAGS) $(CFLAGS) -c -o gmap_avx512-interval.o `test -f 'interval.c' || echo '$(srcdir)/'`interval.c + +gmap_avx512-interval.obj: interval.c +@am__fastdepCC_TRUE@ $(AM_V_CC)$(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) $(gmap_avx512_CFLAGS) $(CFLAGS) -MT gmap_avx512-interval.obj -MD -MP -MF $(DEPDIR)/gmap_avx512-interval.Tpo -c -o gmap_avx512-interval.obj `if test -f 'interval.c'; then $(CYGPATH_W) 'interval.c'; else $(CYGPATH_W) '$(srcdir)/interval.c'; fi` +@am__fastdepCC_TRUE@ $(AM_V_at)$(am__mv) $(DEPDIR)/gmap_avx512-interval.Tpo $(DEPDIR)/gmap_avx512-interval.Po +@AMDEP_TRUE@@am__fastdepCC_FALSE@ $(AM_V_CC)source='interval.c' object='gmap_avx512-interval.obj' libtool=no @AMDEPBACKSLASH@ +@AMDEP_TRUE@@am__fastdepCC_FALSE@ DEPDIR=$(DEPDIR) $(CCDEPMODE) $(depcomp) @AMDEPBACKSLASH@ +@am__fastdepCC_FALSE@ $(AM_V_CC@am__nodep@)$(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) $(gmap_avx512_CFLAGS) $(CFLAGS) -c -o gmap_avx512-interval.obj `if test -f 'interval.c'; then $(CYGPATH_W) 'interval.c'; else $(CYGPATH_W) '$(srcdir)/interval.c'; fi` + +gmap_avx512-uintlist.o: uintlist.c +@am__fastdepCC_TRUE@ $(AM_V_CC)$(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) $(gmap_avx512_CFLAGS) $(CFLAGS) -MT gmap_avx512-uintlist.o -MD -MP -MF $(DEPDIR)/gmap_avx512-uintlist.Tpo -c -o gmap_avx512-uintlist.o `test -f 'uintlist.c' || echo '$(srcdir)/'`uintlist.c +@am__fastdepCC_TRUE@ $(AM_V_at)$(am__mv) $(DEPDIR)/gmap_avx512-uintlist.Tpo $(DEPDIR)/gmap_avx512-uintlist.Po +@AMDEP_TRUE@@am__fastdepCC_FALSE@ $(AM_V_CC)source='uintlist.c' object='gmap_avx512-uintlist.o' libtool=no @AMDEPBACKSLASH@ +@AMDEP_TRUE@@am__fastdepCC_FALSE@ DEPDIR=$(DEPDIR) $(CCDEPMODE) $(depcomp) @AMDEPBACKSLASH@ +@am__fastdepCC_FALSE@ $(AM_V_CC@am__nodep@)$(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) $(gmap_avx512_CFLAGS) $(CFLAGS) -c -o gmap_avx512-uintlist.o `test -f 'uintlist.c' || echo '$(srcdir)/'`uintlist.c + +gmap_avx512-uintlist.obj: uintlist.c +@am__fastdepCC_TRUE@ $(AM_V_CC)$(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) $(gmap_avx512_CFLAGS) $(CFLAGS) -MT gmap_avx512-uintlist.obj -MD -MP -MF $(DEPDIR)/gmap_avx512-uintlist.Tpo -c -o gmap_avx512-uintlist.obj `if test -f 'uintlist.c'; then $(CYGPATH_W) 'uintlist.c'; else $(CYGPATH_W) '$(srcdir)/uintlist.c'; fi` +@am__fastdepCC_TRUE@ $(AM_V_at)$(am__mv) $(DEPDIR)/gmap_avx512-uintlist.Tpo $(DEPDIR)/gmap_avx512-uintlist.Po +@AMDEP_TRUE@@am__fastdepCC_FALSE@ $(AM_V_CC)source='uintlist.c' object='gmap_avx512-uintlist.obj' libtool=no @AMDEPBACKSLASH@ +@AMDEP_TRUE@@am__fastdepCC_FALSE@ DEPDIR=$(DEPDIR) $(CCDEPMODE) $(depcomp) @AMDEPBACKSLASH@ +@am__fastdepCC_FALSE@ $(AM_V_CC@am__nodep@)$(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) $(gmap_avx512_CFLAGS) $(CFLAGS) -c -o gmap_avx512-uintlist.obj `if test -f 'uintlist.c'; then $(CYGPATH_W) 'uintlist.c'; else $(CYGPATH_W) '$(srcdir)/uintlist.c'; fi` + +gmap_avx512-stopwatch.o: stopwatch.c +@am__fastdepCC_TRUE@ $(AM_V_CC)$(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) $(gmap_avx512_CFLAGS) $(CFLAGS) -MT gmap_avx512-stopwatch.o -MD -MP -MF $(DEPDIR)/gmap_avx512-stopwatch.Tpo -c -o gmap_avx512-stopwatch.o `test -f 'stopwatch.c' || echo '$(srcdir)/'`stopwatch.c +@am__fastdepCC_TRUE@ $(AM_V_at)$(am__mv) $(DEPDIR)/gmap_avx512-stopwatch.Tpo $(DEPDIR)/gmap_avx512-stopwatch.Po +@AMDEP_TRUE@@am__fastdepCC_FALSE@ $(AM_V_CC)source='stopwatch.c' object='gmap_avx512-stopwatch.o' libtool=no @AMDEPBACKSLASH@ +@AMDEP_TRUE@@am__fastdepCC_FALSE@ DEPDIR=$(DEPDIR) $(CCDEPMODE) $(depcomp) @AMDEPBACKSLASH@ +@am__fastdepCC_FALSE@ $(AM_V_CC@am__nodep@)$(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) $(gmap_avx512_CFLAGS) $(CFLAGS) -c -o gmap_avx512-stopwatch.o `test -f 'stopwatch.c' || echo '$(srcdir)/'`stopwatch.c + +gmap_avx512-stopwatch.obj: stopwatch.c +@am__fastdepCC_TRUE@ $(AM_V_CC)$(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) $(gmap_avx512_CFLAGS) $(CFLAGS) -MT gmap_avx512-stopwatch.obj -MD -MP -MF $(DEPDIR)/gmap_avx512-stopwatch.Tpo -c -o gmap_avx512-stopwatch.obj `if test -f 'stopwatch.c'; then $(CYGPATH_W) 'stopwatch.c'; else $(CYGPATH_W) '$(srcdir)/stopwatch.c'; fi` +@am__fastdepCC_TRUE@ $(AM_V_at)$(am__mv) $(DEPDIR)/gmap_avx512-stopwatch.Tpo $(DEPDIR)/gmap_avx512-stopwatch.Po +@AMDEP_TRUE@@am__fastdepCC_FALSE@ $(AM_V_CC)source='stopwatch.c' object='gmap_avx512-stopwatch.obj' libtool=no @AMDEPBACKSLASH@ +@AMDEP_TRUE@@am__fastdepCC_FALSE@ DEPDIR=$(DEPDIR) $(CCDEPMODE) $(depcomp) @AMDEPBACKSLASH@ +@am__fastdepCC_FALSE@ $(AM_V_CC@am__nodep@)$(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) $(gmap_avx512_CFLAGS) $(CFLAGS) -c -o gmap_avx512-stopwatch.obj `if test -f 'stopwatch.c'; then $(CYGPATH_W) 'stopwatch.c'; else $(CYGPATH_W) '$(srcdir)/stopwatch.c'; fi` + +gmap_avx512-semaphore.o: semaphore.c +@am__fastdepCC_TRUE@ $(AM_V_CC)$(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) $(gmap_avx512_CFLAGS) $(CFLAGS) -MT gmap_avx512-semaphore.o -MD -MP -MF $(DEPDIR)/gmap_avx512-semaphore.Tpo -c -o gmap_avx512-semaphore.o `test -f 'semaphore.c' || echo '$(srcdir)/'`semaphore.c +@am__fastdepCC_TRUE@ $(AM_V_at)$(am__mv) $(DEPDIR)/gmap_avx512-semaphore.Tpo $(DEPDIR)/gmap_avx512-semaphore.Po +@AMDEP_TRUE@@am__fastdepCC_FALSE@ $(AM_V_CC)source='semaphore.c' object='gmap_avx512-semaphore.o' libtool=no @AMDEPBACKSLASH@ +@AMDEP_TRUE@@am__fastdepCC_FALSE@ DEPDIR=$(DEPDIR) $(CCDEPMODE) $(depcomp) @AMDEPBACKSLASH@ +@am__fastdepCC_FALSE@ $(AM_V_CC@am__nodep@)$(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) $(gmap_avx512_CFLAGS) $(CFLAGS) -c -o gmap_avx512-semaphore.o `test -f 'semaphore.c' || echo '$(srcdir)/'`semaphore.c + +gmap_avx512-semaphore.obj: semaphore.c +@am__fastdepCC_TRUE@ $(AM_V_CC)$(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) $(gmap_avx512_CFLAGS) $(CFLAGS) -MT gmap_avx512-semaphore.obj -MD -MP -MF $(DEPDIR)/gmap_avx512-semaphore.Tpo -c -o gmap_avx512-semaphore.obj `if test -f 'semaphore.c'; then $(CYGPATH_W) 'semaphore.c'; else $(CYGPATH_W) '$(srcdir)/semaphore.c'; fi` +@am__fastdepCC_TRUE@ $(AM_V_at)$(am__mv) $(DEPDIR)/gmap_avx512-semaphore.Tpo $(DEPDIR)/gmap_avx512-semaphore.Po +@AMDEP_TRUE@@am__fastdepCC_FALSE@ $(AM_V_CC)source='semaphore.c' object='gmap_avx512-semaphore.obj' libtool=no @AMDEPBACKSLASH@ +@AMDEP_TRUE@@am__fastdepCC_FALSE@ DEPDIR=$(DEPDIR) $(CCDEPMODE) $(depcomp) @AMDEPBACKSLASH@ +@am__fastdepCC_FALSE@ $(AM_V_CC@am__nodep@)$(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) $(gmap_avx512_CFLAGS) $(CFLAGS) -c -o gmap_avx512-semaphore.obj `if test -f 'semaphore.c'; then $(CYGPATH_W) 'semaphore.c'; else $(CYGPATH_W) '$(srcdir)/semaphore.c'; fi` + +gmap_avx512-access.o: access.c +@am__fastdepCC_TRUE@ $(AM_V_CC)$(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) $(gmap_avx512_CFLAGS) $(CFLAGS) -MT gmap_avx512-access.o -MD -MP -MF $(DEPDIR)/gmap_avx512-access.Tpo -c -o gmap_avx512-access.o `test -f 'access.c' || echo '$(srcdir)/'`access.c +@am__fastdepCC_TRUE@ $(AM_V_at)$(am__mv) $(DEPDIR)/gmap_avx512-access.Tpo $(DEPDIR)/gmap_avx512-access.Po +@AMDEP_TRUE@@am__fastdepCC_FALSE@ $(AM_V_CC)source='access.c' object='gmap_avx512-access.o' libtool=no @AMDEPBACKSLASH@ +@AMDEP_TRUE@@am__fastdepCC_FALSE@ DEPDIR=$(DEPDIR) $(CCDEPMODE) $(depcomp) @AMDEPBACKSLASH@ +@am__fastdepCC_FALSE@ $(AM_V_CC@am__nodep@)$(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) $(gmap_avx512_CFLAGS) $(CFLAGS) -c -o gmap_avx512-access.o `test -f 'access.c' || echo '$(srcdir)/'`access.c + +gmap_avx512-access.obj: access.c +@am__fastdepCC_TRUE@ $(AM_V_CC)$(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) $(gmap_avx512_CFLAGS) $(CFLAGS) -MT gmap_avx512-access.obj -MD -MP -MF $(DEPDIR)/gmap_avx512-access.Tpo -c -o gmap_avx512-access.obj `if test -f 'access.c'; then $(CYGPATH_W) 'access.c'; else $(CYGPATH_W) '$(srcdir)/access.c'; fi` +@am__fastdepCC_TRUE@ $(AM_V_at)$(am__mv) $(DEPDIR)/gmap_avx512-access.Tpo $(DEPDIR)/gmap_avx512-access.Po +@AMDEP_TRUE@@am__fastdepCC_FALSE@ $(AM_V_CC)source='access.c' object='gmap_avx512-access.obj' libtool=no @AMDEPBACKSLASH@ +@AMDEP_TRUE@@am__fastdepCC_FALSE@ DEPDIR=$(DEPDIR) $(CCDEPMODE) $(depcomp) @AMDEPBACKSLASH@ +@am__fastdepCC_FALSE@ $(AM_V_CC@am__nodep@)$(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) $(gmap_avx512_CFLAGS) $(CFLAGS) -c -o gmap_avx512-access.obj `if test -f 'access.c'; then $(CYGPATH_W) 'access.c'; else $(CYGPATH_W) '$(srcdir)/access.c'; fi` + +gmap_avx512-filestring.o: filestring.c +@am__fastdepCC_TRUE@ $(AM_V_CC)$(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) $(gmap_avx512_CFLAGS) $(CFLAGS) -MT gmap_avx512-filestring.o -MD -MP -MF $(DEPDIR)/gmap_avx512-filestring.Tpo -c -o gmap_avx512-filestring.o `test -f 'filestring.c' || echo '$(srcdir)/'`filestring.c +@am__fastdepCC_TRUE@ $(AM_V_at)$(am__mv) $(DEPDIR)/gmap_avx512-filestring.Tpo $(DEPDIR)/gmap_avx512-filestring.Po +@AMDEP_TRUE@@am__fastdepCC_FALSE@ $(AM_V_CC)source='filestring.c' object='gmap_avx512-filestring.o' libtool=no @AMDEPBACKSLASH@ +@AMDEP_TRUE@@am__fastdepCC_FALSE@ DEPDIR=$(DEPDIR) $(CCDEPMODE) $(depcomp) @AMDEPBACKSLASH@ +@am__fastdepCC_FALSE@ $(AM_V_CC@am__nodep@)$(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) $(gmap_avx512_CFLAGS) $(CFLAGS) -c -o gmap_avx512-filestring.o `test -f 'filestring.c' || echo '$(srcdir)/'`filestring.c + +gmap_avx512-filestring.obj: filestring.c +@am__fastdepCC_TRUE@ $(AM_V_CC)$(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) $(gmap_avx512_CFLAGS) $(CFLAGS) -MT gmap_avx512-filestring.obj -MD -MP -MF $(DEPDIR)/gmap_avx512-filestring.Tpo -c -o gmap_avx512-filestring.obj `if test -f 'filestring.c'; then $(CYGPATH_W) 'filestring.c'; else $(CYGPATH_W) '$(srcdir)/filestring.c'; fi` +@am__fastdepCC_TRUE@ $(AM_V_at)$(am__mv) $(DEPDIR)/gmap_avx512-filestring.Tpo $(DEPDIR)/gmap_avx512-filestring.Po +@AMDEP_TRUE@@am__fastdepCC_FALSE@ $(AM_V_CC)source='filestring.c' object='gmap_avx512-filestring.obj' libtool=no @AMDEPBACKSLASH@ +@AMDEP_TRUE@@am__fastdepCC_FALSE@ DEPDIR=$(DEPDIR) $(CCDEPMODE) $(depcomp) @AMDEPBACKSLASH@ +@am__fastdepCC_FALSE@ $(AM_V_CC@am__nodep@)$(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) $(gmap_avx512_CFLAGS) $(CFLAGS) -c -o gmap_avx512-filestring.obj `if test -f 'filestring.c'; then $(CYGPATH_W) 'filestring.c'; else $(CYGPATH_W) '$(srcdir)/filestring.c'; fi` + +gmap_avx512-iit-read-univ.o: iit-read-univ.c +@am__fastdepCC_TRUE@ $(AM_V_CC)$(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) $(gmap_avx512_CFLAGS) $(CFLAGS) -MT gmap_avx512-iit-read-univ.o -MD -MP -MF $(DEPDIR)/gmap_avx512-iit-read-univ.Tpo -c -o gmap_avx512-iit-read-univ.o `test -f 'iit-read-univ.c' || echo '$(srcdir)/'`iit-read-univ.c +@am__fastdepCC_TRUE@ $(AM_V_at)$(am__mv) $(DEPDIR)/gmap_avx512-iit-read-univ.Tpo $(DEPDIR)/gmap_avx512-iit-read-univ.Po +@AMDEP_TRUE@@am__fastdepCC_FALSE@ $(AM_V_CC)source='iit-read-univ.c' object='gmap_avx512-iit-read-univ.o' libtool=no @AMDEPBACKSLASH@ +@AMDEP_TRUE@@am__fastdepCC_FALSE@ DEPDIR=$(DEPDIR) $(CCDEPMODE) $(depcomp) @AMDEPBACKSLASH@ +@am__fastdepCC_FALSE@ $(AM_V_CC@am__nodep@)$(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) $(gmap_avx512_CFLAGS) $(CFLAGS) -c -o gmap_avx512-iit-read-univ.o `test -f 'iit-read-univ.c' || echo '$(srcdir)/'`iit-read-univ.c + +gmap_avx512-iit-read-univ.obj: iit-read-univ.c +@am__fastdepCC_TRUE@ $(AM_V_CC)$(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) $(gmap_avx512_CFLAGS) $(CFLAGS) -MT gmap_avx512-iit-read-univ.obj -MD -MP -MF $(DEPDIR)/gmap_avx512-iit-read-univ.Tpo -c -o gmap_avx512-iit-read-univ.obj `if test -f 'iit-read-univ.c'; then $(CYGPATH_W) 'iit-read-univ.c'; else $(CYGPATH_W) '$(srcdir)/iit-read-univ.c'; fi` +@am__fastdepCC_TRUE@ $(AM_V_at)$(am__mv) $(DEPDIR)/gmap_avx512-iit-read-univ.Tpo $(DEPDIR)/gmap_avx512-iit-read-univ.Po +@AMDEP_TRUE@@am__fastdepCC_FALSE@ $(AM_V_CC)source='iit-read-univ.c' object='gmap_avx512-iit-read-univ.obj' libtool=no @AMDEPBACKSLASH@ +@AMDEP_TRUE@@am__fastdepCC_FALSE@ DEPDIR=$(DEPDIR) $(CCDEPMODE) $(depcomp) @AMDEPBACKSLASH@ +@am__fastdepCC_FALSE@ $(AM_V_CC@am__nodep@)$(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) $(gmap_avx512_CFLAGS) $(CFLAGS) -c -o gmap_avx512-iit-read-univ.obj `if test -f 'iit-read-univ.c'; then $(CYGPATH_W) 'iit-read-univ.c'; else $(CYGPATH_W) '$(srcdir)/iit-read-univ.c'; fi` + +gmap_avx512-iit-read.o: iit-read.c +@am__fastdepCC_TRUE@ $(AM_V_CC)$(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) $(gmap_avx512_CFLAGS) $(CFLAGS) -MT gmap_avx512-iit-read.o -MD -MP -MF $(DEPDIR)/gmap_avx512-iit-read.Tpo -c -o gmap_avx512-iit-read.o `test -f 'iit-read.c' || echo '$(srcdir)/'`iit-read.c +@am__fastdepCC_TRUE@ $(AM_V_at)$(am__mv) $(DEPDIR)/gmap_avx512-iit-read.Tpo $(DEPDIR)/gmap_avx512-iit-read.Po +@AMDEP_TRUE@@am__fastdepCC_FALSE@ $(AM_V_CC)source='iit-read.c' object='gmap_avx512-iit-read.o' libtool=no @AMDEPBACKSLASH@ +@AMDEP_TRUE@@am__fastdepCC_FALSE@ DEPDIR=$(DEPDIR) $(CCDEPMODE) $(depcomp) @AMDEPBACKSLASH@ +@am__fastdepCC_FALSE@ $(AM_V_CC@am__nodep@)$(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) $(gmap_avx512_CFLAGS) $(CFLAGS) -c -o gmap_avx512-iit-read.o `test -f 'iit-read.c' || echo '$(srcdir)/'`iit-read.c + +gmap_avx512-iit-read.obj: iit-read.c +@am__fastdepCC_TRUE@ $(AM_V_CC)$(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) $(gmap_avx512_CFLAGS) $(CFLAGS) -MT gmap_avx512-iit-read.obj -MD -MP -MF $(DEPDIR)/gmap_avx512-iit-read.Tpo -c -o gmap_avx512-iit-read.obj `if test -f 'iit-read.c'; then $(CYGPATH_W) 'iit-read.c'; else $(CYGPATH_W) '$(srcdir)/iit-read.c'; fi` +@am__fastdepCC_TRUE@ $(AM_V_at)$(am__mv) $(DEPDIR)/gmap_avx512-iit-read.Tpo $(DEPDIR)/gmap_avx512-iit-read.Po +@AMDEP_TRUE@@am__fastdepCC_FALSE@ $(AM_V_CC)source='iit-read.c' object='gmap_avx512-iit-read.obj' libtool=no @AMDEPBACKSLASH@ +@AMDEP_TRUE@@am__fastdepCC_FALSE@ DEPDIR=$(DEPDIR) $(CCDEPMODE) $(depcomp) @AMDEPBACKSLASH@ +@am__fastdepCC_FALSE@ $(AM_V_CC@am__nodep@)$(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) $(gmap_avx512_CFLAGS) $(CFLAGS) -c -o gmap_avx512-iit-read.obj `if test -f 'iit-read.c'; then $(CYGPATH_W) 'iit-read.c'; else $(CYGPATH_W) '$(srcdir)/iit-read.c'; fi` + +gmap_avx512-md5.o: md5.c +@am__fastdepCC_TRUE@ $(AM_V_CC)$(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) $(gmap_avx512_CFLAGS) $(CFLAGS) -MT gmap_avx512-md5.o -MD -MP -MF $(DEPDIR)/gmap_avx512-md5.Tpo -c -o gmap_avx512-md5.o `test -f 'md5.c' || echo '$(srcdir)/'`md5.c +@am__fastdepCC_TRUE@ $(AM_V_at)$(am__mv) $(DEPDIR)/gmap_avx512-md5.Tpo $(DEPDIR)/gmap_avx512-md5.Po +@AMDEP_TRUE@@am__fastdepCC_FALSE@ $(AM_V_CC)source='md5.c' object='gmap_avx512-md5.o' libtool=no @AMDEPBACKSLASH@ +@AMDEP_TRUE@@am__fastdepCC_FALSE@ DEPDIR=$(DEPDIR) $(CCDEPMODE) $(depcomp) @AMDEPBACKSLASH@ +@am__fastdepCC_FALSE@ $(AM_V_CC@am__nodep@)$(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) $(gmap_avx512_CFLAGS) $(CFLAGS) -c -o gmap_avx512-md5.o `test -f 'md5.c' || echo '$(srcdir)/'`md5.c + +gmap_avx512-md5.obj: md5.c +@am__fastdepCC_TRUE@ $(AM_V_CC)$(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) $(gmap_avx512_CFLAGS) $(CFLAGS) -MT gmap_avx512-md5.obj -MD -MP -MF $(DEPDIR)/gmap_avx512-md5.Tpo -c -o gmap_avx512-md5.obj `if test -f 'md5.c'; then $(CYGPATH_W) 'md5.c'; else $(CYGPATH_W) '$(srcdir)/md5.c'; fi` +@am__fastdepCC_TRUE@ $(AM_V_at)$(am__mv) $(DEPDIR)/gmap_avx512-md5.Tpo $(DEPDIR)/gmap_avx512-md5.Po +@AMDEP_TRUE@@am__fastdepCC_FALSE@ $(AM_V_CC)source='md5.c' object='gmap_avx512-md5.obj' libtool=no @AMDEPBACKSLASH@ +@AMDEP_TRUE@@am__fastdepCC_FALSE@ DEPDIR=$(DEPDIR) $(CCDEPMODE) $(depcomp) @AMDEPBACKSLASH@ +@am__fastdepCC_FALSE@ $(AM_V_CC@am__nodep@)$(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) $(gmap_avx512_CFLAGS) $(CFLAGS) -c -o gmap_avx512-md5.obj `if test -f 'md5.c'; then $(CYGPATH_W) 'md5.c'; else $(CYGPATH_W) '$(srcdir)/md5.c'; fi` + +gmap_avx512-bzip2.o: bzip2.c +@am__fastdepCC_TRUE@ $(AM_V_CC)$(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) $(gmap_avx512_CFLAGS) $(CFLAGS) -MT gmap_avx512-bzip2.o -MD -MP -MF $(DEPDIR)/gmap_avx512-bzip2.Tpo -c -o gmap_avx512-bzip2.o `test -f 'bzip2.c' || echo '$(srcdir)/'`bzip2.c +@am__fastdepCC_TRUE@ $(AM_V_at)$(am__mv) $(DEPDIR)/gmap_avx512-bzip2.Tpo $(DEPDIR)/gmap_avx512-bzip2.Po +@AMDEP_TRUE@@am__fastdepCC_FALSE@ $(AM_V_CC)source='bzip2.c' object='gmap_avx512-bzip2.o' libtool=no @AMDEPBACKSLASH@ +@AMDEP_TRUE@@am__fastdepCC_FALSE@ DEPDIR=$(DEPDIR) $(CCDEPMODE) $(depcomp) @AMDEPBACKSLASH@ +@am__fastdepCC_FALSE@ $(AM_V_CC@am__nodep@)$(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) $(gmap_avx512_CFLAGS) $(CFLAGS) -c -o gmap_avx512-bzip2.o `test -f 'bzip2.c' || echo '$(srcdir)/'`bzip2.c + +gmap_avx512-bzip2.obj: bzip2.c +@am__fastdepCC_TRUE@ $(AM_V_CC)$(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) $(gmap_avx512_CFLAGS) $(CFLAGS) -MT gmap_avx512-bzip2.obj -MD -MP -MF $(DEPDIR)/gmap_avx512-bzip2.Tpo -c -o gmap_avx512-bzip2.obj `if test -f 'bzip2.c'; then $(CYGPATH_W) 'bzip2.c'; else $(CYGPATH_W) '$(srcdir)/bzip2.c'; fi` +@am__fastdepCC_TRUE@ $(AM_V_at)$(am__mv) $(DEPDIR)/gmap_avx512-bzip2.Tpo $(DEPDIR)/gmap_avx512-bzip2.Po +@AMDEP_TRUE@@am__fastdepCC_FALSE@ $(AM_V_CC)source='bzip2.c' object='gmap_avx512-bzip2.obj' libtool=no @AMDEPBACKSLASH@ +@AMDEP_TRUE@@am__fastdepCC_FALSE@ DEPDIR=$(DEPDIR) $(CCDEPMODE) $(depcomp) @AMDEPBACKSLASH@ +@am__fastdepCC_FALSE@ $(AM_V_CC@am__nodep@)$(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) $(gmap_avx512_CFLAGS) $(CFLAGS) -c -o gmap_avx512-bzip2.obj `if test -f 'bzip2.c'; then $(CYGPATH_W) 'bzip2.c'; else $(CYGPATH_W) '$(srcdir)/bzip2.c'; fi` + +gmap_avx512-sequence.o: sequence.c +@am__fastdepCC_TRUE@ $(AM_V_CC)$(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) $(gmap_avx512_CFLAGS) $(CFLAGS) -MT gmap_avx512-sequence.o -MD -MP -MF $(DEPDIR)/gmap_avx512-sequence.Tpo -c -o gmap_avx512-sequence.o `test -f 'sequence.c' || echo '$(srcdir)/'`sequence.c +@am__fastdepCC_TRUE@ $(AM_V_at)$(am__mv) $(DEPDIR)/gmap_avx512-sequence.Tpo $(DEPDIR)/gmap_avx512-sequence.Po +@AMDEP_TRUE@@am__fastdepCC_FALSE@ $(AM_V_CC)source='sequence.c' object='gmap_avx512-sequence.o' libtool=no @AMDEPBACKSLASH@ +@AMDEP_TRUE@@am__fastdepCC_FALSE@ DEPDIR=$(DEPDIR) $(CCDEPMODE) $(depcomp) @AMDEPBACKSLASH@ +@am__fastdepCC_FALSE@ $(AM_V_CC@am__nodep@)$(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) $(gmap_avx512_CFLAGS) $(CFLAGS) -c -o gmap_avx512-sequence.o `test -f 'sequence.c' || echo '$(srcdir)/'`sequence.c + +gmap_avx512-sequence.obj: sequence.c +@am__fastdepCC_TRUE@ $(AM_V_CC)$(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) $(gmap_avx512_CFLAGS) $(CFLAGS) -MT gmap_avx512-sequence.obj -MD -MP -MF $(DEPDIR)/gmap_avx512-sequence.Tpo -c -o gmap_avx512-sequence.obj `if test -f 'sequence.c'; then $(CYGPATH_W) 'sequence.c'; else $(CYGPATH_W) '$(srcdir)/sequence.c'; fi` +@am__fastdepCC_TRUE@ $(AM_V_at)$(am__mv) $(DEPDIR)/gmap_avx512-sequence.Tpo $(DEPDIR)/gmap_avx512-sequence.Po +@AMDEP_TRUE@@am__fastdepCC_FALSE@ $(AM_V_CC)source='sequence.c' object='gmap_avx512-sequence.obj' libtool=no @AMDEPBACKSLASH@ +@AMDEP_TRUE@@am__fastdepCC_FALSE@ DEPDIR=$(DEPDIR) $(CCDEPMODE) $(depcomp) @AMDEPBACKSLASH@ +@am__fastdepCC_FALSE@ $(AM_V_CC@am__nodep@)$(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) $(gmap_avx512_CFLAGS) $(CFLAGS) -c -o gmap_avx512-sequence.obj `if test -f 'sequence.c'; then $(CYGPATH_W) 'sequence.c'; else $(CYGPATH_W) '$(srcdir)/sequence.c'; fi` + +gmap_avx512-reader.o: reader.c +@am__fastdepCC_TRUE@ $(AM_V_CC)$(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) $(gmap_avx512_CFLAGS) $(CFLAGS) -MT gmap_avx512-reader.o -MD -MP -MF $(DEPDIR)/gmap_avx512-reader.Tpo -c -o gmap_avx512-reader.o `test -f 'reader.c' || echo '$(srcdir)/'`reader.c +@am__fastdepCC_TRUE@ $(AM_V_at)$(am__mv) $(DEPDIR)/gmap_avx512-reader.Tpo $(DEPDIR)/gmap_avx512-reader.Po +@AMDEP_TRUE@@am__fastdepCC_FALSE@ $(AM_V_CC)source='reader.c' object='gmap_avx512-reader.o' libtool=no @AMDEPBACKSLASH@ +@AMDEP_TRUE@@am__fastdepCC_FALSE@ DEPDIR=$(DEPDIR) $(CCDEPMODE) $(depcomp) @AMDEPBACKSLASH@ +@am__fastdepCC_FALSE@ $(AM_V_CC@am__nodep@)$(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) $(gmap_avx512_CFLAGS) $(CFLAGS) -c -o gmap_avx512-reader.o `test -f 'reader.c' || echo '$(srcdir)/'`reader.c + +gmap_avx512-reader.obj: reader.c +@am__fastdepCC_TRUE@ $(AM_V_CC)$(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) $(gmap_avx512_CFLAGS) $(CFLAGS) -MT gmap_avx512-reader.obj -MD -MP -MF $(DEPDIR)/gmap_avx512-reader.Tpo -c -o gmap_avx512-reader.obj `if test -f 'reader.c'; then $(CYGPATH_W) 'reader.c'; else $(CYGPATH_W) '$(srcdir)/reader.c'; fi` +@am__fastdepCC_TRUE@ $(AM_V_at)$(am__mv) $(DEPDIR)/gmap_avx512-reader.Tpo $(DEPDIR)/gmap_avx512-reader.Po +@AMDEP_TRUE@@am__fastdepCC_FALSE@ $(AM_V_CC)source='reader.c' object='gmap_avx512-reader.obj' libtool=no @AMDEPBACKSLASH@ +@AMDEP_TRUE@@am__fastdepCC_FALSE@ DEPDIR=$(DEPDIR) $(CCDEPMODE) $(depcomp) @AMDEPBACKSLASH@ +@am__fastdepCC_FALSE@ $(AM_V_CC@am__nodep@)$(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) $(gmap_avx512_CFLAGS) $(CFLAGS) -c -o gmap_avx512-reader.obj `if test -f 'reader.c'; then $(CYGPATH_W) 'reader.c'; else $(CYGPATH_W) '$(srcdir)/reader.c'; fi` + +gmap_avx512-genomicpos.o: genomicpos.c +@am__fastdepCC_TRUE@ $(AM_V_CC)$(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) $(gmap_avx512_CFLAGS) $(CFLAGS) -MT gmap_avx512-genomicpos.o -MD -MP -MF $(DEPDIR)/gmap_avx512-genomicpos.Tpo -c -o gmap_avx512-genomicpos.o `test -f 'genomicpos.c' || echo '$(srcdir)/'`genomicpos.c +@am__fastdepCC_TRUE@ $(AM_V_at)$(am__mv) $(DEPDIR)/gmap_avx512-genomicpos.Tpo $(DEPDIR)/gmap_avx512-genomicpos.Po +@AMDEP_TRUE@@am__fastdepCC_FALSE@ $(AM_V_CC)source='genomicpos.c' object='gmap_avx512-genomicpos.o' libtool=no @AMDEPBACKSLASH@ +@AMDEP_TRUE@@am__fastdepCC_FALSE@ DEPDIR=$(DEPDIR) $(CCDEPMODE) $(depcomp) @AMDEPBACKSLASH@ +@am__fastdepCC_FALSE@ $(AM_V_CC@am__nodep@)$(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) $(gmap_avx512_CFLAGS) $(CFLAGS) -c -o gmap_avx512-genomicpos.o `test -f 'genomicpos.c' || echo '$(srcdir)/'`genomicpos.c + +gmap_avx512-genomicpos.obj: genomicpos.c +@am__fastdepCC_TRUE@ $(AM_V_CC)$(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) $(gmap_avx512_CFLAGS) $(CFLAGS) -MT gmap_avx512-genomicpos.obj -MD -MP -MF $(DEPDIR)/gmap_avx512-genomicpos.Tpo -c -o gmap_avx512-genomicpos.obj `if test -f 'genomicpos.c'; then $(CYGPATH_W) 'genomicpos.c'; else $(CYGPATH_W) '$(srcdir)/genomicpos.c'; fi` +@am__fastdepCC_TRUE@ $(AM_V_at)$(am__mv) $(DEPDIR)/gmap_avx512-genomicpos.Tpo $(DEPDIR)/gmap_avx512-genomicpos.Po +@AMDEP_TRUE@@am__fastdepCC_FALSE@ $(AM_V_CC)source='genomicpos.c' object='gmap_avx512-genomicpos.obj' libtool=no @AMDEPBACKSLASH@ +@AMDEP_TRUE@@am__fastdepCC_FALSE@ DEPDIR=$(DEPDIR) $(CCDEPMODE) $(depcomp) @AMDEPBACKSLASH@ +@am__fastdepCC_FALSE@ $(AM_V_CC@am__nodep@)$(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) $(gmap_avx512_CFLAGS) $(CFLAGS) -c -o gmap_avx512-genomicpos.obj `if test -f 'genomicpos.c'; then $(CYGPATH_W) 'genomicpos.c'; else $(CYGPATH_W) '$(srcdir)/genomicpos.c'; fi` + +gmap_avx512-compress.o: compress.c +@am__fastdepCC_TRUE@ $(AM_V_CC)$(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) $(gmap_avx512_CFLAGS) $(CFLAGS) -MT gmap_avx512-compress.o -MD -MP -MF $(DEPDIR)/gmap_avx512-compress.Tpo -c -o gmap_avx512-compress.o `test -f 'compress.c' || echo '$(srcdir)/'`compress.c +@am__fastdepCC_TRUE@ $(AM_V_at)$(am__mv) $(DEPDIR)/gmap_avx512-compress.Tpo $(DEPDIR)/gmap_avx512-compress.Po +@AMDEP_TRUE@@am__fastdepCC_FALSE@ $(AM_V_CC)source='compress.c' object='gmap_avx512-compress.o' libtool=no @AMDEPBACKSLASH@ +@AMDEP_TRUE@@am__fastdepCC_FALSE@ DEPDIR=$(DEPDIR) $(CCDEPMODE) $(depcomp) @AMDEPBACKSLASH@ +@am__fastdepCC_FALSE@ $(AM_V_CC@am__nodep@)$(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) $(gmap_avx512_CFLAGS) $(CFLAGS) -c -o gmap_avx512-compress.o `test -f 'compress.c' || echo '$(srcdir)/'`compress.c + +gmap_avx512-compress.obj: compress.c +@am__fastdepCC_TRUE@ $(AM_V_CC)$(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) $(gmap_avx512_CFLAGS) $(CFLAGS) -MT gmap_avx512-compress.obj -MD -MP -MF $(DEPDIR)/gmap_avx512-compress.Tpo -c -o gmap_avx512-compress.obj `if test -f 'compress.c'; then $(CYGPATH_W) 'compress.c'; else $(CYGPATH_W) '$(srcdir)/compress.c'; fi` +@am__fastdepCC_TRUE@ $(AM_V_at)$(am__mv) $(DEPDIR)/gmap_avx512-compress.Tpo $(DEPDIR)/gmap_avx512-compress.Po +@AMDEP_TRUE@@am__fastdepCC_FALSE@ $(AM_V_CC)source='compress.c' object='gmap_avx512-compress.obj' libtool=no @AMDEPBACKSLASH@ +@AMDEP_TRUE@@am__fastdepCC_FALSE@ DEPDIR=$(DEPDIR) $(CCDEPMODE) $(depcomp) @AMDEPBACKSLASH@ +@am__fastdepCC_FALSE@ $(AM_V_CC@am__nodep@)$(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) $(gmap_avx512_CFLAGS) $(CFLAGS) -c -o gmap_avx512-compress.obj `if test -f 'compress.c'; then $(CYGPATH_W) 'compress.c'; else $(CYGPATH_W) '$(srcdir)/compress.c'; fi` + +gmap_avx512-compress-write.o: compress-write.c +@am__fastdepCC_TRUE@ $(AM_V_CC)$(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) $(gmap_avx512_CFLAGS) $(CFLAGS) -MT gmap_avx512-compress-write.o -MD -MP -MF $(DEPDIR)/gmap_avx512-compress-write.Tpo -c -o gmap_avx512-compress-write.o `test -f 'compress-write.c' || echo '$(srcdir)/'`compress-write.c +@am__fastdepCC_TRUE@ $(AM_V_at)$(am__mv) $(DEPDIR)/gmap_avx512-compress-write.Tpo $(DEPDIR)/gmap_avx512-compress-write.Po +@AMDEP_TRUE@@am__fastdepCC_FALSE@ $(AM_V_CC)source='compress-write.c' object='gmap_avx512-compress-write.o' libtool=no @AMDEPBACKSLASH@ +@AMDEP_TRUE@@am__fastdepCC_FALSE@ DEPDIR=$(DEPDIR) $(CCDEPMODE) $(depcomp) @AMDEPBACKSLASH@ +@am__fastdepCC_FALSE@ $(AM_V_CC@am__nodep@)$(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) $(gmap_avx512_CFLAGS) $(CFLAGS) -c -o gmap_avx512-compress-write.o `test -f 'compress-write.c' || echo '$(srcdir)/'`compress-write.c + +gmap_avx512-compress-write.obj: compress-write.c +@am__fastdepCC_TRUE@ $(AM_V_CC)$(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) $(gmap_avx512_CFLAGS) $(CFLAGS) -MT gmap_avx512-compress-write.obj -MD -MP -MF $(DEPDIR)/gmap_avx512-compress-write.Tpo -c -o gmap_avx512-compress-write.obj `if test -f 'compress-write.c'; then $(CYGPATH_W) 'compress-write.c'; else $(CYGPATH_W) '$(srcdir)/compress-write.c'; fi` +@am__fastdepCC_TRUE@ $(AM_V_at)$(am__mv) $(DEPDIR)/gmap_avx512-compress-write.Tpo $(DEPDIR)/gmap_avx512-compress-write.Po +@AMDEP_TRUE@@am__fastdepCC_FALSE@ $(AM_V_CC)source='compress-write.c' object='gmap_avx512-compress-write.obj' libtool=no @AMDEPBACKSLASH@ +@AMDEP_TRUE@@am__fastdepCC_FALSE@ DEPDIR=$(DEPDIR) $(CCDEPMODE) $(depcomp) @AMDEPBACKSLASH@ +@am__fastdepCC_FALSE@ $(AM_V_CC@am__nodep@)$(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) $(gmap_avx512_CFLAGS) $(CFLAGS) -c -o gmap_avx512-compress-write.obj `if test -f 'compress-write.c'; then $(CYGPATH_W) 'compress-write.c'; else $(CYGPATH_W) '$(srcdir)/compress-write.c'; fi` + +gmap_avx512-gbuffer.o: gbuffer.c +@am__fastdepCC_TRUE@ $(AM_V_CC)$(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) $(gmap_avx512_CFLAGS) $(CFLAGS) -MT gmap_avx512-gbuffer.o -MD -MP -MF $(DEPDIR)/gmap_avx512-gbuffer.Tpo -c -o gmap_avx512-gbuffer.o `test -f 'gbuffer.c' || echo '$(srcdir)/'`gbuffer.c +@am__fastdepCC_TRUE@ $(AM_V_at)$(am__mv) $(DEPDIR)/gmap_avx512-gbuffer.Tpo $(DEPDIR)/gmap_avx512-gbuffer.Po +@AMDEP_TRUE@@am__fastdepCC_FALSE@ $(AM_V_CC)source='gbuffer.c' object='gmap_avx512-gbuffer.o' libtool=no @AMDEPBACKSLASH@ +@AMDEP_TRUE@@am__fastdepCC_FALSE@ DEPDIR=$(DEPDIR) $(CCDEPMODE) $(depcomp) @AMDEPBACKSLASH@ +@am__fastdepCC_FALSE@ $(AM_V_CC@am__nodep@)$(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) $(gmap_avx512_CFLAGS) $(CFLAGS) -c -o gmap_avx512-gbuffer.o `test -f 'gbuffer.c' || echo '$(srcdir)/'`gbuffer.c + +gmap_avx512-gbuffer.obj: gbuffer.c +@am__fastdepCC_TRUE@ $(AM_V_CC)$(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) $(gmap_avx512_CFLAGS) $(CFLAGS) -MT gmap_avx512-gbuffer.obj -MD -MP -MF $(DEPDIR)/gmap_avx512-gbuffer.Tpo -c -o gmap_avx512-gbuffer.obj `if test -f 'gbuffer.c'; then $(CYGPATH_W) 'gbuffer.c'; else $(CYGPATH_W) '$(srcdir)/gbuffer.c'; fi` +@am__fastdepCC_TRUE@ $(AM_V_at)$(am__mv) $(DEPDIR)/gmap_avx512-gbuffer.Tpo $(DEPDIR)/gmap_avx512-gbuffer.Po +@AMDEP_TRUE@@am__fastdepCC_FALSE@ $(AM_V_CC)source='gbuffer.c' object='gmap_avx512-gbuffer.obj' libtool=no @AMDEPBACKSLASH@ +@AMDEP_TRUE@@am__fastdepCC_FALSE@ DEPDIR=$(DEPDIR) $(CCDEPMODE) $(depcomp) @AMDEPBACKSLASH@ +@am__fastdepCC_FALSE@ $(AM_V_CC@am__nodep@)$(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) $(gmap_avx512_CFLAGS) $(CFLAGS) -c -o gmap_avx512-gbuffer.obj `if test -f 'gbuffer.c'; then $(CYGPATH_W) 'gbuffer.c'; else $(CYGPATH_W) '$(srcdir)/gbuffer.c'; fi` + +gmap_avx512-genome.o: genome.c +@am__fastdepCC_TRUE@ $(AM_V_CC)$(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) $(gmap_avx512_CFLAGS) $(CFLAGS) -MT gmap_avx512-genome.o -MD -MP -MF $(DEPDIR)/gmap_avx512-genome.Tpo -c -o gmap_avx512-genome.o `test -f 'genome.c' || echo '$(srcdir)/'`genome.c +@am__fastdepCC_TRUE@ $(AM_V_at)$(am__mv) $(DEPDIR)/gmap_avx512-genome.Tpo $(DEPDIR)/gmap_avx512-genome.Po +@AMDEP_TRUE@@am__fastdepCC_FALSE@ $(AM_V_CC)source='genome.c' object='gmap_avx512-genome.o' libtool=no @AMDEPBACKSLASH@ +@AMDEP_TRUE@@am__fastdepCC_FALSE@ DEPDIR=$(DEPDIR) $(CCDEPMODE) $(depcomp) @AMDEPBACKSLASH@ +@am__fastdepCC_FALSE@ $(AM_V_CC@am__nodep@)$(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) $(gmap_avx512_CFLAGS) $(CFLAGS) -c -o gmap_avx512-genome.o `test -f 'genome.c' || echo '$(srcdir)/'`genome.c + +gmap_avx512-genome.obj: genome.c +@am__fastdepCC_TRUE@ $(AM_V_CC)$(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) $(gmap_avx512_CFLAGS) $(CFLAGS) -MT gmap_avx512-genome.obj -MD -MP -MF $(DEPDIR)/gmap_avx512-genome.Tpo -c -o gmap_avx512-genome.obj `if test -f 'genome.c'; then $(CYGPATH_W) 'genome.c'; else $(CYGPATH_W) '$(srcdir)/genome.c'; fi` +@am__fastdepCC_TRUE@ $(AM_V_at)$(am__mv) $(DEPDIR)/gmap_avx512-genome.Tpo $(DEPDIR)/gmap_avx512-genome.Po +@AMDEP_TRUE@@am__fastdepCC_FALSE@ $(AM_V_CC)source='genome.c' object='gmap_avx512-genome.obj' libtool=no @AMDEPBACKSLASH@ +@AMDEP_TRUE@@am__fastdepCC_FALSE@ DEPDIR=$(DEPDIR) $(CCDEPMODE) $(depcomp) @AMDEPBACKSLASH@ +@am__fastdepCC_FALSE@ $(AM_V_CC@am__nodep@)$(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) $(gmap_avx512_CFLAGS) $(CFLAGS) -c -o gmap_avx512-genome.obj `if test -f 'genome.c'; then $(CYGPATH_W) 'genome.c'; else $(CYGPATH_W) '$(srcdir)/genome.c'; fi` + +gmap_avx512-popcount.o: popcount.c +@am__fastdepCC_TRUE@ $(AM_V_CC)$(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) $(gmap_avx512_CFLAGS) $(CFLAGS) -MT gmap_avx512-popcount.o -MD -MP -MF $(DEPDIR)/gmap_avx512-popcount.Tpo -c -o gmap_avx512-popcount.o `test -f 'popcount.c' || echo '$(srcdir)/'`popcount.c +@am__fastdepCC_TRUE@ $(AM_V_at)$(am__mv) $(DEPDIR)/gmap_avx512-popcount.Tpo $(DEPDIR)/gmap_avx512-popcount.Po +@AMDEP_TRUE@@am__fastdepCC_FALSE@ $(AM_V_CC)source='popcount.c' object='gmap_avx512-popcount.o' libtool=no @AMDEPBACKSLASH@ +@AMDEP_TRUE@@am__fastdepCC_FALSE@ DEPDIR=$(DEPDIR) $(CCDEPMODE) $(depcomp) @AMDEPBACKSLASH@ +@am__fastdepCC_FALSE@ $(AM_V_CC@am__nodep@)$(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) $(gmap_avx512_CFLAGS) $(CFLAGS) -c -o gmap_avx512-popcount.o `test -f 'popcount.c' || echo '$(srcdir)/'`popcount.c + +gmap_avx512-popcount.obj: popcount.c +@am__fastdepCC_TRUE@ $(AM_V_CC)$(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) $(gmap_avx512_CFLAGS) $(CFLAGS) -MT gmap_avx512-popcount.obj -MD -MP -MF $(DEPDIR)/gmap_avx512-popcount.Tpo -c -o gmap_avx512-popcount.obj `if test -f 'popcount.c'; then $(CYGPATH_W) 'popcount.c'; else $(CYGPATH_W) '$(srcdir)/popcount.c'; fi` +@am__fastdepCC_TRUE@ $(AM_V_at)$(am__mv) $(DEPDIR)/gmap_avx512-popcount.Tpo $(DEPDIR)/gmap_avx512-popcount.Po +@AMDEP_TRUE@@am__fastdepCC_FALSE@ $(AM_V_CC)source='popcount.c' object='gmap_avx512-popcount.obj' libtool=no @AMDEPBACKSLASH@ +@AMDEP_TRUE@@am__fastdepCC_FALSE@ DEPDIR=$(DEPDIR) $(CCDEPMODE) $(depcomp) @AMDEPBACKSLASH@ +@am__fastdepCC_FALSE@ $(AM_V_CC@am__nodep@)$(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) $(gmap_avx512_CFLAGS) $(CFLAGS) -c -o gmap_avx512-popcount.obj `if test -f 'popcount.c'; then $(CYGPATH_W) 'popcount.c'; else $(CYGPATH_W) '$(srcdir)/popcount.c'; fi` + +gmap_avx512-genome128_hr.o: genome128_hr.c +@am__fastdepCC_TRUE@ $(AM_V_CC)$(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) $(gmap_avx512_CFLAGS) $(CFLAGS) -MT gmap_avx512-genome128_hr.o -MD -MP -MF $(DEPDIR)/gmap_avx512-genome128_hr.Tpo -c -o gmap_avx512-genome128_hr.o `test -f 'genome128_hr.c' || echo '$(srcdir)/'`genome128_hr.c +@am__fastdepCC_TRUE@ $(AM_V_at)$(am__mv) $(DEPDIR)/gmap_avx512-genome128_hr.Tpo $(DEPDIR)/gmap_avx512-genome128_hr.Po +@AMDEP_TRUE@@am__fastdepCC_FALSE@ $(AM_V_CC)source='genome128_hr.c' object='gmap_avx512-genome128_hr.o' libtool=no @AMDEPBACKSLASH@ +@AMDEP_TRUE@@am__fastdepCC_FALSE@ DEPDIR=$(DEPDIR) $(CCDEPMODE) $(depcomp) @AMDEPBACKSLASH@ +@am__fastdepCC_FALSE@ $(AM_V_CC@am__nodep@)$(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) $(gmap_avx512_CFLAGS) $(CFLAGS) -c -o gmap_avx512-genome128_hr.o `test -f 'genome128_hr.c' || echo '$(srcdir)/'`genome128_hr.c + +gmap_avx512-genome128_hr.obj: genome128_hr.c +@am__fastdepCC_TRUE@ $(AM_V_CC)$(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) $(gmap_avx512_CFLAGS) $(CFLAGS) -MT gmap_avx512-genome128_hr.obj -MD -MP -MF $(DEPDIR)/gmap_avx512-genome128_hr.Tpo -c -o gmap_avx512-genome128_hr.obj `if test -f 'genome128_hr.c'; then $(CYGPATH_W) 'genome128_hr.c'; else $(CYGPATH_W) '$(srcdir)/genome128_hr.c'; fi` +@am__fastdepCC_TRUE@ $(AM_V_at)$(am__mv) $(DEPDIR)/gmap_avx512-genome128_hr.Tpo $(DEPDIR)/gmap_avx512-genome128_hr.Po +@AMDEP_TRUE@@am__fastdepCC_FALSE@ $(AM_V_CC)source='genome128_hr.c' object='gmap_avx512-genome128_hr.obj' libtool=no @AMDEPBACKSLASH@ +@AMDEP_TRUE@@am__fastdepCC_FALSE@ DEPDIR=$(DEPDIR) $(CCDEPMODE) $(depcomp) @AMDEPBACKSLASH@ +@am__fastdepCC_FALSE@ $(AM_V_CC@am__nodep@)$(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) $(gmap_avx512_CFLAGS) $(CFLAGS) -c -o gmap_avx512-genome128_hr.obj `if test -f 'genome128_hr.c'; then $(CYGPATH_W) 'genome128_hr.c'; else $(CYGPATH_W) '$(srcdir)/genome128_hr.c'; fi` + +gmap_avx512-genome_sites.o: genome_sites.c +@am__fastdepCC_TRUE@ $(AM_V_CC)$(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) $(gmap_avx512_CFLAGS) $(CFLAGS) -MT gmap_avx512-genome_sites.o -MD -MP -MF $(DEPDIR)/gmap_avx512-genome_sites.Tpo -c -o gmap_avx512-genome_sites.o `test -f 'genome_sites.c' || echo '$(srcdir)/'`genome_sites.c +@am__fastdepCC_TRUE@ $(AM_V_at)$(am__mv) $(DEPDIR)/gmap_avx512-genome_sites.Tpo $(DEPDIR)/gmap_avx512-genome_sites.Po +@AMDEP_TRUE@@am__fastdepCC_FALSE@ $(AM_V_CC)source='genome_sites.c' object='gmap_avx512-genome_sites.o' libtool=no @AMDEPBACKSLASH@ +@AMDEP_TRUE@@am__fastdepCC_FALSE@ DEPDIR=$(DEPDIR) $(CCDEPMODE) $(depcomp) @AMDEPBACKSLASH@ +@am__fastdepCC_FALSE@ $(AM_V_CC@am__nodep@)$(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) $(gmap_avx512_CFLAGS) $(CFLAGS) -c -o gmap_avx512-genome_sites.o `test -f 'genome_sites.c' || echo '$(srcdir)/'`genome_sites.c + +gmap_avx512-genome_sites.obj: genome_sites.c +@am__fastdepCC_TRUE@ $(AM_V_CC)$(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) $(gmap_avx512_CFLAGS) $(CFLAGS) -MT gmap_avx512-genome_sites.obj -MD -MP -MF $(DEPDIR)/gmap_avx512-genome_sites.Tpo -c -o gmap_avx512-genome_sites.obj `if test -f 'genome_sites.c'; then $(CYGPATH_W) 'genome_sites.c'; else $(CYGPATH_W) '$(srcdir)/genome_sites.c'; fi` +@am__fastdepCC_TRUE@ $(AM_V_at)$(am__mv) $(DEPDIR)/gmap_avx512-genome_sites.Tpo $(DEPDIR)/gmap_avx512-genome_sites.Po +@AMDEP_TRUE@@am__fastdepCC_FALSE@ $(AM_V_CC)source='genome_sites.c' object='gmap_avx512-genome_sites.obj' libtool=no @AMDEPBACKSLASH@ +@AMDEP_TRUE@@am__fastdepCC_FALSE@ DEPDIR=$(DEPDIR) $(CCDEPMODE) $(depcomp) @AMDEPBACKSLASH@ +@am__fastdepCC_FALSE@ $(AM_V_CC@am__nodep@)$(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) $(gmap_avx512_CFLAGS) $(CFLAGS) -c -o gmap_avx512-genome_sites.obj `if test -f 'genome_sites.c'; then $(CYGPATH_W) 'genome_sites.c'; else $(CYGPATH_W) '$(srcdir)/genome_sites.c'; fi` + +gmap_avx512-genome-write.o: genome-write.c +@am__fastdepCC_TRUE@ $(AM_V_CC)$(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) $(gmap_avx512_CFLAGS) $(CFLAGS) -MT gmap_avx512-genome-write.o -MD -MP -MF $(DEPDIR)/gmap_avx512-genome-write.Tpo -c -o gmap_avx512-genome-write.o `test -f 'genome-write.c' || echo '$(srcdir)/'`genome-write.c +@am__fastdepCC_TRUE@ $(AM_V_at)$(am__mv) $(DEPDIR)/gmap_avx512-genome-write.Tpo $(DEPDIR)/gmap_avx512-genome-write.Po +@AMDEP_TRUE@@am__fastdepCC_FALSE@ $(AM_V_CC)source='genome-write.c' object='gmap_avx512-genome-write.o' libtool=no @AMDEPBACKSLASH@ +@AMDEP_TRUE@@am__fastdepCC_FALSE@ DEPDIR=$(DEPDIR) $(CCDEPMODE) $(depcomp) @AMDEPBACKSLASH@ +@am__fastdepCC_FALSE@ $(AM_V_CC@am__nodep@)$(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) $(gmap_avx512_CFLAGS) $(CFLAGS) -c -o gmap_avx512-genome-write.o `test -f 'genome-write.c' || echo '$(srcdir)/'`genome-write.c + +gmap_avx512-genome-write.obj: genome-write.c +@am__fastdepCC_TRUE@ $(AM_V_CC)$(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) $(gmap_avx512_CFLAGS) $(CFLAGS) -MT gmap_avx512-genome-write.obj -MD -MP -MF $(DEPDIR)/gmap_avx512-genome-write.Tpo -c -o gmap_avx512-genome-write.obj `if test -f 'genome-write.c'; then $(CYGPATH_W) 'genome-write.c'; else $(CYGPATH_W) '$(srcdir)/genome-write.c'; fi` +@am__fastdepCC_TRUE@ $(AM_V_at)$(am__mv) $(DEPDIR)/gmap_avx512-genome-write.Tpo $(DEPDIR)/gmap_avx512-genome-write.Po +@AMDEP_TRUE@@am__fastdepCC_FALSE@ $(AM_V_CC)source='genome-write.c' object='gmap_avx512-genome-write.obj' libtool=no @AMDEPBACKSLASH@ +@AMDEP_TRUE@@am__fastdepCC_FALSE@ DEPDIR=$(DEPDIR) $(CCDEPMODE) $(depcomp) @AMDEPBACKSLASH@ +@am__fastdepCC_FALSE@ $(AM_V_CC@am__nodep@)$(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) $(gmap_avx512_CFLAGS) $(CFLAGS) -c -o gmap_avx512-genome-write.obj `if test -f 'genome-write.c'; then $(CYGPATH_W) 'genome-write.c'; else $(CYGPATH_W) '$(srcdir)/genome-write.c'; fi` + +gmap_avx512-bitpack64-read.o: bitpack64-read.c +@am__fastdepCC_TRUE@ $(AM_V_CC)$(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) $(gmap_avx512_CFLAGS) $(CFLAGS) -MT gmap_avx512-bitpack64-read.o -MD -MP -MF $(DEPDIR)/gmap_avx512-bitpack64-read.Tpo -c -o gmap_avx512-bitpack64-read.o `test -f 'bitpack64-read.c' || echo '$(srcdir)/'`bitpack64-read.c +@am__fastdepCC_TRUE@ $(AM_V_at)$(am__mv) $(DEPDIR)/gmap_avx512-bitpack64-read.Tpo $(DEPDIR)/gmap_avx512-bitpack64-read.Po +@AMDEP_TRUE@@am__fastdepCC_FALSE@ $(AM_V_CC)source='bitpack64-read.c' object='gmap_avx512-bitpack64-read.o' libtool=no @AMDEPBACKSLASH@ +@AMDEP_TRUE@@am__fastdepCC_FALSE@ DEPDIR=$(DEPDIR) $(CCDEPMODE) $(depcomp) @AMDEPBACKSLASH@ +@am__fastdepCC_FALSE@ $(AM_V_CC@am__nodep@)$(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) $(gmap_avx512_CFLAGS) $(CFLAGS) -c -o gmap_avx512-bitpack64-read.o `test -f 'bitpack64-read.c' || echo '$(srcdir)/'`bitpack64-read.c + +gmap_avx512-bitpack64-read.obj: bitpack64-read.c +@am__fastdepCC_TRUE@ $(AM_V_CC)$(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) $(gmap_avx512_CFLAGS) $(CFLAGS) -MT gmap_avx512-bitpack64-read.obj -MD -MP -MF $(DEPDIR)/gmap_avx512-bitpack64-read.Tpo -c -o gmap_avx512-bitpack64-read.obj `if test -f 'bitpack64-read.c'; then $(CYGPATH_W) 'bitpack64-read.c'; else $(CYGPATH_W) '$(srcdir)/bitpack64-read.c'; fi` +@am__fastdepCC_TRUE@ $(AM_V_at)$(am__mv) $(DEPDIR)/gmap_avx512-bitpack64-read.Tpo $(DEPDIR)/gmap_avx512-bitpack64-read.Po +@AMDEP_TRUE@@am__fastdepCC_FALSE@ $(AM_V_CC)source='bitpack64-read.c' object='gmap_avx512-bitpack64-read.obj' libtool=no @AMDEPBACKSLASH@ +@AMDEP_TRUE@@am__fastdepCC_FALSE@ DEPDIR=$(DEPDIR) $(CCDEPMODE) $(depcomp) @AMDEPBACKSLASH@ +@am__fastdepCC_FALSE@ $(AM_V_CC@am__nodep@)$(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) $(gmap_avx512_CFLAGS) $(CFLAGS) -c -o gmap_avx512-bitpack64-read.obj `if test -f 'bitpack64-read.c'; then $(CYGPATH_W) 'bitpack64-read.c'; else $(CYGPATH_W) '$(srcdir)/bitpack64-read.c'; fi` + +gmap_avx512-bitpack64-readtwo.o: bitpack64-readtwo.c +@am__fastdepCC_TRUE@ $(AM_V_CC)$(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) $(gmap_avx512_CFLAGS) $(CFLAGS) -MT gmap_avx512-bitpack64-readtwo.o -MD -MP -MF $(DEPDIR)/gmap_avx512-bitpack64-readtwo.Tpo -c -o gmap_avx512-bitpack64-readtwo.o `test -f 'bitpack64-readtwo.c' || echo '$(srcdir)/'`bitpack64-readtwo.c +@am__fastdepCC_TRUE@ $(AM_V_at)$(am__mv) $(DEPDIR)/gmap_avx512-bitpack64-readtwo.Tpo $(DEPDIR)/gmap_avx512-bitpack64-readtwo.Po +@AMDEP_TRUE@@am__fastdepCC_FALSE@ $(AM_V_CC)source='bitpack64-readtwo.c' object='gmap_avx512-bitpack64-readtwo.o' libtool=no @AMDEPBACKSLASH@ +@AMDEP_TRUE@@am__fastdepCC_FALSE@ DEPDIR=$(DEPDIR) $(CCDEPMODE) $(depcomp) @AMDEPBACKSLASH@ +@am__fastdepCC_FALSE@ $(AM_V_CC@am__nodep@)$(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) $(gmap_avx512_CFLAGS) $(CFLAGS) -c -o gmap_avx512-bitpack64-readtwo.o `test -f 'bitpack64-readtwo.c' || echo '$(srcdir)/'`bitpack64-readtwo.c + +gmap_avx512-bitpack64-readtwo.obj: bitpack64-readtwo.c +@am__fastdepCC_TRUE@ $(AM_V_CC)$(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) $(gmap_avx512_CFLAGS) $(CFLAGS) -MT gmap_avx512-bitpack64-readtwo.obj -MD -MP -MF $(DEPDIR)/gmap_avx512-bitpack64-readtwo.Tpo -c -o gmap_avx512-bitpack64-readtwo.obj `if test -f 'bitpack64-readtwo.c'; then $(CYGPATH_W) 'bitpack64-readtwo.c'; else $(CYGPATH_W) '$(srcdir)/bitpack64-readtwo.c'; fi` +@am__fastdepCC_TRUE@ $(AM_V_at)$(am__mv) $(DEPDIR)/gmap_avx512-bitpack64-readtwo.Tpo $(DEPDIR)/gmap_avx512-bitpack64-readtwo.Po +@AMDEP_TRUE@@am__fastdepCC_FALSE@ $(AM_V_CC)source='bitpack64-readtwo.c' object='gmap_avx512-bitpack64-readtwo.obj' libtool=no @AMDEPBACKSLASH@ +@AMDEP_TRUE@@am__fastdepCC_FALSE@ DEPDIR=$(DEPDIR) $(CCDEPMODE) $(depcomp) @AMDEPBACKSLASH@ +@am__fastdepCC_FALSE@ $(AM_V_CC@am__nodep@)$(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) $(gmap_avx512_CFLAGS) $(CFLAGS) -c -o gmap_avx512-bitpack64-readtwo.obj `if test -f 'bitpack64-readtwo.c'; then $(CYGPATH_W) 'bitpack64-readtwo.c'; else $(CYGPATH_W) '$(srcdir)/bitpack64-readtwo.c'; fi` + +gmap_avx512-merge.o: merge.c +@am__fastdepCC_TRUE@ $(AM_V_CC)$(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) $(gmap_avx512_CFLAGS) $(CFLAGS) -MT gmap_avx512-merge.o -MD -MP -MF $(DEPDIR)/gmap_avx512-merge.Tpo -c -o gmap_avx512-merge.o `test -f 'merge.c' || echo '$(srcdir)/'`merge.c +@am__fastdepCC_TRUE@ $(AM_V_at)$(am__mv) $(DEPDIR)/gmap_avx512-merge.Tpo $(DEPDIR)/gmap_avx512-merge.Po +@AMDEP_TRUE@@am__fastdepCC_FALSE@ $(AM_V_CC)source='merge.c' object='gmap_avx512-merge.o' libtool=no @AMDEPBACKSLASH@ +@AMDEP_TRUE@@am__fastdepCC_FALSE@ DEPDIR=$(DEPDIR) $(CCDEPMODE) $(depcomp) @AMDEPBACKSLASH@ +@am__fastdepCC_FALSE@ $(AM_V_CC@am__nodep@)$(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) $(gmap_avx512_CFLAGS) $(CFLAGS) -c -o gmap_avx512-merge.o `test -f 'merge.c' || echo '$(srcdir)/'`merge.c + +gmap_avx512-merge.obj: merge.c +@am__fastdepCC_TRUE@ $(AM_V_CC)$(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) $(gmap_avx512_CFLAGS) $(CFLAGS) -MT gmap_avx512-merge.obj -MD -MP -MF $(DEPDIR)/gmap_avx512-merge.Tpo -c -o gmap_avx512-merge.obj `if test -f 'merge.c'; then $(CYGPATH_W) 'merge.c'; else $(CYGPATH_W) '$(srcdir)/merge.c'; fi` +@am__fastdepCC_TRUE@ $(AM_V_at)$(am__mv) $(DEPDIR)/gmap_avx512-merge.Tpo $(DEPDIR)/gmap_avx512-merge.Po +@AMDEP_TRUE@@am__fastdepCC_FALSE@ $(AM_V_CC)source='merge.c' object='gmap_avx512-merge.obj' libtool=no @AMDEPBACKSLASH@ +@AMDEP_TRUE@@am__fastdepCC_FALSE@ DEPDIR=$(DEPDIR) $(CCDEPMODE) $(depcomp) @AMDEPBACKSLASH@ +@am__fastdepCC_FALSE@ $(AM_V_CC@am__nodep@)$(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) $(gmap_avx512_CFLAGS) $(CFLAGS) -c -o gmap_avx512-merge.obj `if test -f 'merge.c'; then $(CYGPATH_W) 'merge.c'; else $(CYGPATH_W) '$(srcdir)/merge.c'; fi` + +gmap_avx512-indexdb.o: indexdb.c +@am__fastdepCC_TRUE@ $(AM_V_CC)$(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) $(gmap_avx512_CFLAGS) $(CFLAGS) -MT gmap_avx512-indexdb.o -MD -MP -MF $(DEPDIR)/gmap_avx512-indexdb.Tpo -c -o gmap_avx512-indexdb.o `test -f 'indexdb.c' || echo '$(srcdir)/'`indexdb.c +@am__fastdepCC_TRUE@ $(AM_V_at)$(am__mv) $(DEPDIR)/gmap_avx512-indexdb.Tpo $(DEPDIR)/gmap_avx512-indexdb.Po +@AMDEP_TRUE@@am__fastdepCC_FALSE@ $(AM_V_CC)source='indexdb.c' object='gmap_avx512-indexdb.o' libtool=no @AMDEPBACKSLASH@ +@AMDEP_TRUE@@am__fastdepCC_FALSE@ DEPDIR=$(DEPDIR) $(CCDEPMODE) $(depcomp) @AMDEPBACKSLASH@ +@am__fastdepCC_FALSE@ $(AM_V_CC@am__nodep@)$(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) $(gmap_avx512_CFLAGS) $(CFLAGS) -c -o gmap_avx512-indexdb.o `test -f 'indexdb.c' || echo '$(srcdir)/'`indexdb.c + +gmap_avx512-indexdb.obj: indexdb.c +@am__fastdepCC_TRUE@ $(AM_V_CC)$(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) $(gmap_avx512_CFLAGS) $(CFLAGS) -MT gmap_avx512-indexdb.obj -MD -MP -MF $(DEPDIR)/gmap_avx512-indexdb.Tpo -c -o gmap_avx512-indexdb.obj `if test -f 'indexdb.c'; then $(CYGPATH_W) 'indexdb.c'; else $(CYGPATH_W) '$(srcdir)/indexdb.c'; fi` +@am__fastdepCC_TRUE@ $(AM_V_at)$(am__mv) $(DEPDIR)/gmap_avx512-indexdb.Tpo $(DEPDIR)/gmap_avx512-indexdb.Po +@AMDEP_TRUE@@am__fastdepCC_FALSE@ $(AM_V_CC)source='indexdb.c' object='gmap_avx512-indexdb.obj' libtool=no @AMDEPBACKSLASH@ +@AMDEP_TRUE@@am__fastdepCC_FALSE@ DEPDIR=$(DEPDIR) $(CCDEPMODE) $(depcomp) @AMDEPBACKSLASH@ +@am__fastdepCC_FALSE@ $(AM_V_CC@am__nodep@)$(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) $(gmap_avx512_CFLAGS) $(CFLAGS) -c -o gmap_avx512-indexdb.obj `if test -f 'indexdb.c'; then $(CYGPATH_W) 'indexdb.c'; else $(CYGPATH_W) '$(srcdir)/indexdb.c'; fi` + +gmap_avx512-indexdb_hr.o: indexdb_hr.c +@am__fastdepCC_TRUE@ $(AM_V_CC)$(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) $(gmap_avx512_CFLAGS) $(CFLAGS) -MT gmap_avx512-indexdb_hr.o -MD -MP -MF $(DEPDIR)/gmap_avx512-indexdb_hr.Tpo -c -o gmap_avx512-indexdb_hr.o `test -f 'indexdb_hr.c' || echo '$(srcdir)/'`indexdb_hr.c +@am__fastdepCC_TRUE@ $(AM_V_at)$(am__mv) $(DEPDIR)/gmap_avx512-indexdb_hr.Tpo $(DEPDIR)/gmap_avx512-indexdb_hr.Po +@AMDEP_TRUE@@am__fastdepCC_FALSE@ $(AM_V_CC)source='indexdb_hr.c' object='gmap_avx512-indexdb_hr.o' libtool=no @AMDEPBACKSLASH@ +@AMDEP_TRUE@@am__fastdepCC_FALSE@ DEPDIR=$(DEPDIR) $(CCDEPMODE) $(depcomp) @AMDEPBACKSLASH@ +@am__fastdepCC_FALSE@ $(AM_V_CC@am__nodep@)$(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) $(gmap_avx512_CFLAGS) $(CFLAGS) -c -o gmap_avx512-indexdb_hr.o `test -f 'indexdb_hr.c' || echo '$(srcdir)/'`indexdb_hr.c + +gmap_avx512-indexdb_hr.obj: indexdb_hr.c +@am__fastdepCC_TRUE@ $(AM_V_CC)$(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) $(gmap_avx512_CFLAGS) $(CFLAGS) -MT gmap_avx512-indexdb_hr.obj -MD -MP -MF $(DEPDIR)/gmap_avx512-indexdb_hr.Tpo -c -o gmap_avx512-indexdb_hr.obj `if test -f 'indexdb_hr.c'; then $(CYGPATH_W) 'indexdb_hr.c'; else $(CYGPATH_W) '$(srcdir)/indexdb_hr.c'; fi` +@am__fastdepCC_TRUE@ $(AM_V_at)$(am__mv) $(DEPDIR)/gmap_avx512-indexdb_hr.Tpo $(DEPDIR)/gmap_avx512-indexdb_hr.Po +@AMDEP_TRUE@@am__fastdepCC_FALSE@ $(AM_V_CC)source='indexdb_hr.c' object='gmap_avx512-indexdb_hr.obj' libtool=no @AMDEPBACKSLASH@ +@AMDEP_TRUE@@am__fastdepCC_FALSE@ DEPDIR=$(DEPDIR) $(CCDEPMODE) $(depcomp) @AMDEPBACKSLASH@ +@am__fastdepCC_FALSE@ $(AM_V_CC@am__nodep@)$(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) $(gmap_avx512_CFLAGS) $(CFLAGS) -c -o gmap_avx512-indexdb_hr.obj `if test -f 'indexdb_hr.c'; then $(CYGPATH_W) 'indexdb_hr.c'; else $(CYGPATH_W) '$(srcdir)/indexdb_hr.c'; fi` + +gmap_avx512-oligo.o: oligo.c +@am__fastdepCC_TRUE@ $(AM_V_CC)$(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) $(gmap_avx512_CFLAGS) $(CFLAGS) -MT gmap_avx512-oligo.o -MD -MP -MF $(DEPDIR)/gmap_avx512-oligo.Tpo -c -o gmap_avx512-oligo.o `test -f 'oligo.c' || echo '$(srcdir)/'`oligo.c +@am__fastdepCC_TRUE@ $(AM_V_at)$(am__mv) $(DEPDIR)/gmap_avx512-oligo.Tpo $(DEPDIR)/gmap_avx512-oligo.Po +@AMDEP_TRUE@@am__fastdepCC_FALSE@ $(AM_V_CC)source='oligo.c' object='gmap_avx512-oligo.o' libtool=no @AMDEPBACKSLASH@ +@AMDEP_TRUE@@am__fastdepCC_FALSE@ DEPDIR=$(DEPDIR) $(CCDEPMODE) $(depcomp) @AMDEPBACKSLASH@ +@am__fastdepCC_FALSE@ $(AM_V_CC@am__nodep@)$(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) $(gmap_avx512_CFLAGS) $(CFLAGS) -c -o gmap_avx512-oligo.o `test -f 'oligo.c' || echo '$(srcdir)/'`oligo.c + +gmap_avx512-oligo.obj: oligo.c +@am__fastdepCC_TRUE@ $(AM_V_CC)$(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) $(gmap_avx512_CFLAGS) $(CFLAGS) -MT gmap_avx512-oligo.obj -MD -MP -MF $(DEPDIR)/gmap_avx512-oligo.Tpo -c -o gmap_avx512-oligo.obj `if test -f 'oligo.c'; then $(CYGPATH_W) 'oligo.c'; else $(CYGPATH_W) '$(srcdir)/oligo.c'; fi` +@am__fastdepCC_TRUE@ $(AM_V_at)$(am__mv) $(DEPDIR)/gmap_avx512-oligo.Tpo $(DEPDIR)/gmap_avx512-oligo.Po +@AMDEP_TRUE@@am__fastdepCC_FALSE@ $(AM_V_CC)source='oligo.c' object='gmap_avx512-oligo.obj' libtool=no @AMDEPBACKSLASH@ +@AMDEP_TRUE@@am__fastdepCC_FALSE@ DEPDIR=$(DEPDIR) $(CCDEPMODE) $(depcomp) @AMDEPBACKSLASH@ +@am__fastdepCC_FALSE@ $(AM_V_CC@am__nodep@)$(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) $(gmap_avx512_CFLAGS) $(CFLAGS) -c -o gmap_avx512-oligo.obj `if test -f 'oligo.c'; then $(CYGPATH_W) 'oligo.c'; else $(CYGPATH_W) '$(srcdir)/oligo.c'; fi` + +gmap_avx512-block.o: block.c +@am__fastdepCC_TRUE@ $(AM_V_CC)$(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) $(gmap_avx512_CFLAGS) $(CFLAGS) -MT gmap_avx512-block.o -MD -MP -MF $(DEPDIR)/gmap_avx512-block.Tpo -c -o gmap_avx512-block.o `test -f 'block.c' || echo '$(srcdir)/'`block.c +@am__fastdepCC_TRUE@ $(AM_V_at)$(am__mv) $(DEPDIR)/gmap_avx512-block.Tpo $(DEPDIR)/gmap_avx512-block.Po +@AMDEP_TRUE@@am__fastdepCC_FALSE@ $(AM_V_CC)source='block.c' object='gmap_avx512-block.o' libtool=no @AMDEPBACKSLASH@ +@AMDEP_TRUE@@am__fastdepCC_FALSE@ DEPDIR=$(DEPDIR) $(CCDEPMODE) $(depcomp) @AMDEPBACKSLASH@ +@am__fastdepCC_FALSE@ $(AM_V_CC@am__nodep@)$(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) $(gmap_avx512_CFLAGS) $(CFLAGS) -c -o gmap_avx512-block.o `test -f 'block.c' || echo '$(srcdir)/'`block.c + +gmap_avx512-block.obj: block.c +@am__fastdepCC_TRUE@ $(AM_V_CC)$(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) $(gmap_avx512_CFLAGS) $(CFLAGS) -MT gmap_avx512-block.obj -MD -MP -MF $(DEPDIR)/gmap_avx512-block.Tpo -c -o gmap_avx512-block.obj `if test -f 'block.c'; then $(CYGPATH_W) 'block.c'; else $(CYGPATH_W) '$(srcdir)/block.c'; fi` +@am__fastdepCC_TRUE@ $(AM_V_at)$(am__mv) $(DEPDIR)/gmap_avx512-block.Tpo $(DEPDIR)/gmap_avx512-block.Po +@AMDEP_TRUE@@am__fastdepCC_FALSE@ $(AM_V_CC)source='block.c' object='gmap_avx512-block.obj' libtool=no @AMDEPBACKSLASH@ +@AMDEP_TRUE@@am__fastdepCC_FALSE@ DEPDIR=$(DEPDIR) $(CCDEPMODE) $(depcomp) @AMDEPBACKSLASH@ +@am__fastdepCC_FALSE@ $(AM_V_CC@am__nodep@)$(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) $(gmap_avx512_CFLAGS) $(CFLAGS) -c -o gmap_avx512-block.obj `if test -f 'block.c'; then $(CYGPATH_W) 'block.c'; else $(CYGPATH_W) '$(srcdir)/block.c'; fi` + +gmap_avx512-chrom.o: chrom.c +@am__fastdepCC_TRUE@ $(AM_V_CC)$(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) $(gmap_avx512_CFLAGS) $(CFLAGS) -MT gmap_avx512-chrom.o -MD -MP -MF $(DEPDIR)/gmap_avx512-chrom.Tpo -c -o gmap_avx512-chrom.o `test -f 'chrom.c' || echo '$(srcdir)/'`chrom.c +@am__fastdepCC_TRUE@ $(AM_V_at)$(am__mv) $(DEPDIR)/gmap_avx512-chrom.Tpo $(DEPDIR)/gmap_avx512-chrom.Po +@AMDEP_TRUE@@am__fastdepCC_FALSE@ $(AM_V_CC)source='chrom.c' object='gmap_avx512-chrom.o' libtool=no @AMDEPBACKSLASH@ +@AMDEP_TRUE@@am__fastdepCC_FALSE@ DEPDIR=$(DEPDIR) $(CCDEPMODE) $(depcomp) @AMDEPBACKSLASH@ +@am__fastdepCC_FALSE@ $(AM_V_CC@am__nodep@)$(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) $(gmap_avx512_CFLAGS) $(CFLAGS) -c -o gmap_avx512-chrom.o `test -f 'chrom.c' || echo '$(srcdir)/'`chrom.c + +gmap_avx512-chrom.obj: chrom.c +@am__fastdepCC_TRUE@ $(AM_V_CC)$(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) $(gmap_avx512_CFLAGS) $(CFLAGS) -MT gmap_avx512-chrom.obj -MD -MP -MF $(DEPDIR)/gmap_avx512-chrom.Tpo -c -o gmap_avx512-chrom.obj `if test -f 'chrom.c'; then $(CYGPATH_W) 'chrom.c'; else $(CYGPATH_W) '$(srcdir)/chrom.c'; fi` +@am__fastdepCC_TRUE@ $(AM_V_at)$(am__mv) $(DEPDIR)/gmap_avx512-chrom.Tpo $(DEPDIR)/gmap_avx512-chrom.Po +@AMDEP_TRUE@@am__fastdepCC_FALSE@ $(AM_V_CC)source='chrom.c' object='gmap_avx512-chrom.obj' libtool=no @AMDEPBACKSLASH@ +@AMDEP_TRUE@@am__fastdepCC_FALSE@ DEPDIR=$(DEPDIR) $(CCDEPMODE) $(depcomp) @AMDEPBACKSLASH@ +@am__fastdepCC_FALSE@ $(AM_V_CC@am__nodep@)$(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) $(gmap_avx512_CFLAGS) $(CFLAGS) -c -o gmap_avx512-chrom.obj `if test -f 'chrom.c'; then $(CYGPATH_W) 'chrom.c'; else $(CYGPATH_W) '$(srcdir)/chrom.c'; fi` + +gmap_avx512-segmentpos.o: segmentpos.c +@am__fastdepCC_TRUE@ $(AM_V_CC)$(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) $(gmap_avx512_CFLAGS) $(CFLAGS) -MT gmap_avx512-segmentpos.o -MD -MP -MF $(DEPDIR)/gmap_avx512-segmentpos.Tpo -c -o gmap_avx512-segmentpos.o `test -f 'segmentpos.c' || echo '$(srcdir)/'`segmentpos.c +@am__fastdepCC_TRUE@ $(AM_V_at)$(am__mv) $(DEPDIR)/gmap_avx512-segmentpos.Tpo $(DEPDIR)/gmap_avx512-segmentpos.Po +@AMDEP_TRUE@@am__fastdepCC_FALSE@ $(AM_V_CC)source='segmentpos.c' object='gmap_avx512-segmentpos.o' libtool=no @AMDEPBACKSLASH@ +@AMDEP_TRUE@@am__fastdepCC_FALSE@ DEPDIR=$(DEPDIR) $(CCDEPMODE) $(depcomp) @AMDEPBACKSLASH@ +@am__fastdepCC_FALSE@ $(AM_V_CC@am__nodep@)$(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) $(gmap_avx512_CFLAGS) $(CFLAGS) -c -o gmap_avx512-segmentpos.o `test -f 'segmentpos.c' || echo '$(srcdir)/'`segmentpos.c + +gmap_avx512-segmentpos.obj: segmentpos.c +@am__fastdepCC_TRUE@ $(AM_V_CC)$(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) $(gmap_avx512_CFLAGS) $(CFLAGS) -MT gmap_avx512-segmentpos.obj -MD -MP -MF $(DEPDIR)/gmap_avx512-segmentpos.Tpo -c -o gmap_avx512-segmentpos.obj `if test -f 'segmentpos.c'; then $(CYGPATH_W) 'segmentpos.c'; else $(CYGPATH_W) '$(srcdir)/segmentpos.c'; fi` +@am__fastdepCC_TRUE@ $(AM_V_at)$(am__mv) $(DEPDIR)/gmap_avx512-segmentpos.Tpo $(DEPDIR)/gmap_avx512-segmentpos.Po +@AMDEP_TRUE@@am__fastdepCC_FALSE@ $(AM_V_CC)source='segmentpos.c' object='gmap_avx512-segmentpos.obj' libtool=no @AMDEPBACKSLASH@ +@AMDEP_TRUE@@am__fastdepCC_FALSE@ DEPDIR=$(DEPDIR) $(CCDEPMODE) $(depcomp) @AMDEPBACKSLASH@ +@am__fastdepCC_FALSE@ $(AM_V_CC@am__nodep@)$(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) $(gmap_avx512_CFLAGS) $(CFLAGS) -c -o gmap_avx512-segmentpos.obj `if test -f 'segmentpos.c'; then $(CYGPATH_W) 'segmentpos.c'; else $(CYGPATH_W) '$(srcdir)/segmentpos.c'; fi` + +gmap_avx512-chrnum.o: chrnum.c +@am__fastdepCC_TRUE@ $(AM_V_CC)$(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) $(gmap_avx512_CFLAGS) $(CFLAGS) -MT gmap_avx512-chrnum.o -MD -MP -MF $(DEPDIR)/gmap_avx512-chrnum.Tpo -c -o gmap_avx512-chrnum.o `test -f 'chrnum.c' || echo '$(srcdir)/'`chrnum.c +@am__fastdepCC_TRUE@ $(AM_V_at)$(am__mv) $(DEPDIR)/gmap_avx512-chrnum.Tpo $(DEPDIR)/gmap_avx512-chrnum.Po +@AMDEP_TRUE@@am__fastdepCC_FALSE@ $(AM_V_CC)source='chrnum.c' object='gmap_avx512-chrnum.o' libtool=no @AMDEPBACKSLASH@ +@AMDEP_TRUE@@am__fastdepCC_FALSE@ DEPDIR=$(DEPDIR) $(CCDEPMODE) $(depcomp) @AMDEPBACKSLASH@ +@am__fastdepCC_FALSE@ $(AM_V_CC@am__nodep@)$(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) $(gmap_avx512_CFLAGS) $(CFLAGS) -c -o gmap_avx512-chrnum.o `test -f 'chrnum.c' || echo '$(srcdir)/'`chrnum.c + +gmap_avx512-chrnum.obj: chrnum.c +@am__fastdepCC_TRUE@ $(AM_V_CC)$(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) $(gmap_avx512_CFLAGS) $(CFLAGS) -MT gmap_avx512-chrnum.obj -MD -MP -MF $(DEPDIR)/gmap_avx512-chrnum.Tpo -c -o gmap_avx512-chrnum.obj `if test -f 'chrnum.c'; then $(CYGPATH_W) 'chrnum.c'; else $(CYGPATH_W) '$(srcdir)/chrnum.c'; fi` +@am__fastdepCC_TRUE@ $(AM_V_at)$(am__mv) $(DEPDIR)/gmap_avx512-chrnum.Tpo $(DEPDIR)/gmap_avx512-chrnum.Po +@AMDEP_TRUE@@am__fastdepCC_FALSE@ $(AM_V_CC)source='chrnum.c' object='gmap_avx512-chrnum.obj' libtool=no @AMDEPBACKSLASH@ +@AMDEP_TRUE@@am__fastdepCC_FALSE@ DEPDIR=$(DEPDIR) $(CCDEPMODE) $(depcomp) @AMDEPBACKSLASH@ +@am__fastdepCC_FALSE@ $(AM_V_CC@am__nodep@)$(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) $(gmap_avx512_CFLAGS) $(CFLAGS) -c -o gmap_avx512-chrnum.obj `if test -f 'chrnum.c'; then $(CYGPATH_W) 'chrnum.c'; else $(CYGPATH_W) '$(srcdir)/chrnum.c'; fi` + +gmap_avx512-uinttable.o: uinttable.c +@am__fastdepCC_TRUE@ $(AM_V_CC)$(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) $(gmap_avx512_CFLAGS) $(CFLAGS) -MT gmap_avx512-uinttable.o -MD -MP -MF $(DEPDIR)/gmap_avx512-uinttable.Tpo -c -o gmap_avx512-uinttable.o `test -f 'uinttable.c' || echo '$(srcdir)/'`uinttable.c +@am__fastdepCC_TRUE@ $(AM_V_at)$(am__mv) $(DEPDIR)/gmap_avx512-uinttable.Tpo $(DEPDIR)/gmap_avx512-uinttable.Po +@AMDEP_TRUE@@am__fastdepCC_FALSE@ $(AM_V_CC)source='uinttable.c' object='gmap_avx512-uinttable.o' libtool=no @AMDEPBACKSLASH@ +@AMDEP_TRUE@@am__fastdepCC_FALSE@ DEPDIR=$(DEPDIR) $(CCDEPMODE) $(depcomp) @AMDEPBACKSLASH@ +@am__fastdepCC_FALSE@ $(AM_V_CC@am__nodep@)$(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) $(gmap_avx512_CFLAGS) $(CFLAGS) -c -o gmap_avx512-uinttable.o `test -f 'uinttable.c' || echo '$(srcdir)/'`uinttable.c + +gmap_avx512-uinttable.obj: uinttable.c +@am__fastdepCC_TRUE@ $(AM_V_CC)$(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) $(gmap_avx512_CFLAGS) $(CFLAGS) -MT gmap_avx512-uinttable.obj -MD -MP -MF $(DEPDIR)/gmap_avx512-uinttable.Tpo -c -o gmap_avx512-uinttable.obj `if test -f 'uinttable.c'; then $(CYGPATH_W) 'uinttable.c'; else $(CYGPATH_W) '$(srcdir)/uinttable.c'; fi` +@am__fastdepCC_TRUE@ $(AM_V_at)$(am__mv) $(DEPDIR)/gmap_avx512-uinttable.Tpo $(DEPDIR)/gmap_avx512-uinttable.Po +@AMDEP_TRUE@@am__fastdepCC_FALSE@ $(AM_V_CC)source='uinttable.c' object='gmap_avx512-uinttable.obj' libtool=no @AMDEPBACKSLASH@ +@AMDEP_TRUE@@am__fastdepCC_FALSE@ DEPDIR=$(DEPDIR) $(CCDEPMODE) $(depcomp) @AMDEPBACKSLASH@ +@am__fastdepCC_FALSE@ $(AM_V_CC@am__nodep@)$(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) $(gmap_avx512_CFLAGS) $(CFLAGS) -c -o gmap_avx512-uinttable.obj `if test -f 'uinttable.c'; then $(CYGPATH_W) 'uinttable.c'; else $(CYGPATH_W) '$(srcdir)/uinttable.c'; fi` + +gmap_avx512-gregion.o: gregion.c +@am__fastdepCC_TRUE@ $(AM_V_CC)$(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) $(gmap_avx512_CFLAGS) $(CFLAGS) -MT gmap_avx512-gregion.o -MD -MP -MF $(DEPDIR)/gmap_avx512-gregion.Tpo -c -o gmap_avx512-gregion.o `test -f 'gregion.c' || echo '$(srcdir)/'`gregion.c +@am__fastdepCC_TRUE@ $(AM_V_at)$(am__mv) $(DEPDIR)/gmap_avx512-gregion.Tpo $(DEPDIR)/gmap_avx512-gregion.Po +@AMDEP_TRUE@@am__fastdepCC_FALSE@ $(AM_V_CC)source='gregion.c' object='gmap_avx512-gregion.o' libtool=no @AMDEPBACKSLASH@ +@AMDEP_TRUE@@am__fastdepCC_FALSE@ DEPDIR=$(DEPDIR) $(CCDEPMODE) $(depcomp) @AMDEPBACKSLASH@ +@am__fastdepCC_FALSE@ $(AM_V_CC@am__nodep@)$(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) $(gmap_avx512_CFLAGS) $(CFLAGS) -c -o gmap_avx512-gregion.o `test -f 'gregion.c' || echo '$(srcdir)/'`gregion.c + +gmap_avx512-gregion.obj: gregion.c +@am__fastdepCC_TRUE@ $(AM_V_CC)$(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) $(gmap_avx512_CFLAGS) $(CFLAGS) -MT gmap_avx512-gregion.obj -MD -MP -MF $(DEPDIR)/gmap_avx512-gregion.Tpo -c -o gmap_avx512-gregion.obj `if test -f 'gregion.c'; then $(CYGPATH_W) 'gregion.c'; else $(CYGPATH_W) '$(srcdir)/gregion.c'; fi` +@am__fastdepCC_TRUE@ $(AM_V_at)$(am__mv) $(DEPDIR)/gmap_avx512-gregion.Tpo $(DEPDIR)/gmap_avx512-gregion.Po +@AMDEP_TRUE@@am__fastdepCC_FALSE@ $(AM_V_CC)source='gregion.c' object='gmap_avx512-gregion.obj' libtool=no @AMDEPBACKSLASH@ +@AMDEP_TRUE@@am__fastdepCC_FALSE@ DEPDIR=$(DEPDIR) $(CCDEPMODE) $(depcomp) @AMDEPBACKSLASH@ +@am__fastdepCC_FALSE@ $(AM_V_CC@am__nodep@)$(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) $(gmap_avx512_CFLAGS) $(CFLAGS) -c -o gmap_avx512-gregion.obj `if test -f 'gregion.c'; then $(CYGPATH_W) 'gregion.c'; else $(CYGPATH_W) '$(srcdir)/gregion.c'; fi` + +gmap_avx512-match.o: match.c +@am__fastdepCC_TRUE@ $(AM_V_CC)$(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) $(gmap_avx512_CFLAGS) $(CFLAGS) -MT gmap_avx512-match.o -MD -MP -MF $(DEPDIR)/gmap_avx512-match.Tpo -c -o gmap_avx512-match.o `test -f 'match.c' || echo '$(srcdir)/'`match.c +@am__fastdepCC_TRUE@ $(AM_V_at)$(am__mv) $(DEPDIR)/gmap_avx512-match.Tpo $(DEPDIR)/gmap_avx512-match.Po +@AMDEP_TRUE@@am__fastdepCC_FALSE@ $(AM_V_CC)source='match.c' object='gmap_avx512-match.o' libtool=no @AMDEPBACKSLASH@ +@AMDEP_TRUE@@am__fastdepCC_FALSE@ DEPDIR=$(DEPDIR) $(CCDEPMODE) $(depcomp) @AMDEPBACKSLASH@ +@am__fastdepCC_FALSE@ $(AM_V_CC@am__nodep@)$(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) $(gmap_avx512_CFLAGS) $(CFLAGS) -c -o gmap_avx512-match.o `test -f 'match.c' || echo '$(srcdir)/'`match.c + +gmap_avx512-match.obj: match.c +@am__fastdepCC_TRUE@ $(AM_V_CC)$(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) $(gmap_avx512_CFLAGS) $(CFLAGS) -MT gmap_avx512-match.obj -MD -MP -MF $(DEPDIR)/gmap_avx512-match.Tpo -c -o gmap_avx512-match.obj `if test -f 'match.c'; then $(CYGPATH_W) 'match.c'; else $(CYGPATH_W) '$(srcdir)/match.c'; fi` +@am__fastdepCC_TRUE@ $(AM_V_at)$(am__mv) $(DEPDIR)/gmap_avx512-match.Tpo $(DEPDIR)/gmap_avx512-match.Po +@AMDEP_TRUE@@am__fastdepCC_FALSE@ $(AM_V_CC)source='match.c' object='gmap_avx512-match.obj' libtool=no @AMDEPBACKSLASH@ +@AMDEP_TRUE@@am__fastdepCC_FALSE@ DEPDIR=$(DEPDIR) $(CCDEPMODE) $(depcomp) @AMDEPBACKSLASH@ +@am__fastdepCC_FALSE@ $(AM_V_CC@am__nodep@)$(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) $(gmap_avx512_CFLAGS) $(CFLAGS) -c -o gmap_avx512-match.obj `if test -f 'match.c'; then $(CYGPATH_W) 'match.c'; else $(CYGPATH_W) '$(srcdir)/match.c'; fi` + +gmap_avx512-matchpool.o: matchpool.c +@am__fastdepCC_TRUE@ $(AM_V_CC)$(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) $(gmap_avx512_CFLAGS) $(CFLAGS) -MT gmap_avx512-matchpool.o -MD -MP -MF $(DEPDIR)/gmap_avx512-matchpool.Tpo -c -o gmap_avx512-matchpool.o `test -f 'matchpool.c' || echo '$(srcdir)/'`matchpool.c +@am__fastdepCC_TRUE@ $(AM_V_at)$(am__mv) $(DEPDIR)/gmap_avx512-matchpool.Tpo $(DEPDIR)/gmap_avx512-matchpool.Po +@AMDEP_TRUE@@am__fastdepCC_FALSE@ $(AM_V_CC)source='matchpool.c' object='gmap_avx512-matchpool.o' libtool=no @AMDEPBACKSLASH@ +@AMDEP_TRUE@@am__fastdepCC_FALSE@ DEPDIR=$(DEPDIR) $(CCDEPMODE) $(depcomp) @AMDEPBACKSLASH@ +@am__fastdepCC_FALSE@ $(AM_V_CC@am__nodep@)$(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) $(gmap_avx512_CFLAGS) $(CFLAGS) -c -o gmap_avx512-matchpool.o `test -f 'matchpool.c' || echo '$(srcdir)/'`matchpool.c + +gmap_avx512-matchpool.obj: matchpool.c +@am__fastdepCC_TRUE@ $(AM_V_CC)$(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) $(gmap_avx512_CFLAGS) $(CFLAGS) -MT gmap_avx512-matchpool.obj -MD -MP -MF $(DEPDIR)/gmap_avx512-matchpool.Tpo -c -o gmap_avx512-matchpool.obj `if test -f 'matchpool.c'; then $(CYGPATH_W) 'matchpool.c'; else $(CYGPATH_W) '$(srcdir)/matchpool.c'; fi` +@am__fastdepCC_TRUE@ $(AM_V_at)$(am__mv) $(DEPDIR)/gmap_avx512-matchpool.Tpo $(DEPDIR)/gmap_avx512-matchpool.Po +@AMDEP_TRUE@@am__fastdepCC_FALSE@ $(AM_V_CC)source='matchpool.c' object='gmap_avx512-matchpool.obj' libtool=no @AMDEPBACKSLASH@ +@AMDEP_TRUE@@am__fastdepCC_FALSE@ DEPDIR=$(DEPDIR) $(CCDEPMODE) $(depcomp) @AMDEPBACKSLASH@ +@am__fastdepCC_FALSE@ $(AM_V_CC@am__nodep@)$(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) $(gmap_avx512_CFLAGS) $(CFLAGS) -c -o gmap_avx512-matchpool.obj `if test -f 'matchpool.c'; then $(CYGPATH_W) 'matchpool.c'; else $(CYGPATH_W) '$(srcdir)/matchpool.c'; fi` + +gmap_avx512-diagnostic.o: diagnostic.c +@am__fastdepCC_TRUE@ $(AM_V_CC)$(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) $(gmap_avx512_CFLAGS) $(CFLAGS) -MT gmap_avx512-diagnostic.o -MD -MP -MF $(DEPDIR)/gmap_avx512-diagnostic.Tpo -c -o gmap_avx512-diagnostic.o `test -f 'diagnostic.c' || echo '$(srcdir)/'`diagnostic.c +@am__fastdepCC_TRUE@ $(AM_V_at)$(am__mv) $(DEPDIR)/gmap_avx512-diagnostic.Tpo $(DEPDIR)/gmap_avx512-diagnostic.Po +@AMDEP_TRUE@@am__fastdepCC_FALSE@ $(AM_V_CC)source='diagnostic.c' object='gmap_avx512-diagnostic.o' libtool=no @AMDEPBACKSLASH@ +@AMDEP_TRUE@@am__fastdepCC_FALSE@ DEPDIR=$(DEPDIR) $(CCDEPMODE) $(depcomp) @AMDEPBACKSLASH@ +@am__fastdepCC_FALSE@ $(AM_V_CC@am__nodep@)$(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) $(gmap_avx512_CFLAGS) $(CFLAGS) -c -o gmap_avx512-diagnostic.o `test -f 'diagnostic.c' || echo '$(srcdir)/'`diagnostic.c + +gmap_avx512-diagnostic.obj: diagnostic.c +@am__fastdepCC_TRUE@ $(AM_V_CC)$(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) $(gmap_avx512_CFLAGS) $(CFLAGS) -MT gmap_avx512-diagnostic.obj -MD -MP -MF $(DEPDIR)/gmap_avx512-diagnostic.Tpo -c -o gmap_avx512-diagnostic.obj `if test -f 'diagnostic.c'; then $(CYGPATH_W) 'diagnostic.c'; else $(CYGPATH_W) '$(srcdir)/diagnostic.c'; fi` +@am__fastdepCC_TRUE@ $(AM_V_at)$(am__mv) $(DEPDIR)/gmap_avx512-diagnostic.Tpo $(DEPDIR)/gmap_avx512-diagnostic.Po +@AMDEP_TRUE@@am__fastdepCC_FALSE@ $(AM_V_CC)source='diagnostic.c' object='gmap_avx512-diagnostic.obj' libtool=no @AMDEPBACKSLASH@ +@AMDEP_TRUE@@am__fastdepCC_FALSE@ DEPDIR=$(DEPDIR) $(CCDEPMODE) $(depcomp) @AMDEPBACKSLASH@ +@am__fastdepCC_FALSE@ $(AM_V_CC@am__nodep@)$(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) $(gmap_avx512_CFLAGS) $(CFLAGS) -c -o gmap_avx512-diagnostic.obj `if test -f 'diagnostic.c'; then $(CYGPATH_W) 'diagnostic.c'; else $(CYGPATH_W) '$(srcdir)/diagnostic.c'; fi` + +gmap_avx512-stage1.o: stage1.c +@am__fastdepCC_TRUE@ $(AM_V_CC)$(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) $(gmap_avx512_CFLAGS) $(CFLAGS) -MT gmap_avx512-stage1.o -MD -MP -MF $(DEPDIR)/gmap_avx512-stage1.Tpo -c -o gmap_avx512-stage1.o `test -f 'stage1.c' || echo '$(srcdir)/'`stage1.c +@am__fastdepCC_TRUE@ $(AM_V_at)$(am__mv) $(DEPDIR)/gmap_avx512-stage1.Tpo $(DEPDIR)/gmap_avx512-stage1.Po +@AMDEP_TRUE@@am__fastdepCC_FALSE@ $(AM_V_CC)source='stage1.c' object='gmap_avx512-stage1.o' libtool=no @AMDEPBACKSLASH@ +@AMDEP_TRUE@@am__fastdepCC_FALSE@ DEPDIR=$(DEPDIR) $(CCDEPMODE) $(depcomp) @AMDEPBACKSLASH@ +@am__fastdepCC_FALSE@ $(AM_V_CC@am__nodep@)$(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) $(gmap_avx512_CFLAGS) $(CFLAGS) -c -o gmap_avx512-stage1.o `test -f 'stage1.c' || echo '$(srcdir)/'`stage1.c + +gmap_avx512-stage1.obj: stage1.c +@am__fastdepCC_TRUE@ $(AM_V_CC)$(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) $(gmap_avx512_CFLAGS) $(CFLAGS) -MT gmap_avx512-stage1.obj -MD -MP -MF $(DEPDIR)/gmap_avx512-stage1.Tpo -c -o gmap_avx512-stage1.obj `if test -f 'stage1.c'; then $(CYGPATH_W) 'stage1.c'; else $(CYGPATH_W) '$(srcdir)/stage1.c'; fi` +@am__fastdepCC_TRUE@ $(AM_V_at)$(am__mv) $(DEPDIR)/gmap_avx512-stage1.Tpo $(DEPDIR)/gmap_avx512-stage1.Po +@AMDEP_TRUE@@am__fastdepCC_FALSE@ $(AM_V_CC)source='stage1.c' object='gmap_avx512-stage1.obj' libtool=no @AMDEPBACKSLASH@ +@AMDEP_TRUE@@am__fastdepCC_FALSE@ DEPDIR=$(DEPDIR) $(CCDEPMODE) $(depcomp) @AMDEPBACKSLASH@ +@am__fastdepCC_FALSE@ $(AM_V_CC@am__nodep@)$(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) $(gmap_avx512_CFLAGS) $(CFLAGS) -c -o gmap_avx512-stage1.obj `if test -f 'stage1.c'; then $(CYGPATH_W) 'stage1.c'; else $(CYGPATH_W) '$(srcdir)/stage1.c'; fi` + +gmap_avx512-diag.o: diag.c +@am__fastdepCC_TRUE@ $(AM_V_CC)$(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) $(gmap_avx512_CFLAGS) $(CFLAGS) -MT gmap_avx512-diag.o -MD -MP -MF $(DEPDIR)/gmap_avx512-diag.Tpo -c -o gmap_avx512-diag.o `test -f 'diag.c' || echo '$(srcdir)/'`diag.c +@am__fastdepCC_TRUE@ $(AM_V_at)$(am__mv) $(DEPDIR)/gmap_avx512-diag.Tpo $(DEPDIR)/gmap_avx512-diag.Po +@AMDEP_TRUE@@am__fastdepCC_FALSE@ $(AM_V_CC)source='diag.c' object='gmap_avx512-diag.o' libtool=no @AMDEPBACKSLASH@ +@AMDEP_TRUE@@am__fastdepCC_FALSE@ DEPDIR=$(DEPDIR) $(CCDEPMODE) $(depcomp) @AMDEPBACKSLASH@ +@am__fastdepCC_FALSE@ $(AM_V_CC@am__nodep@)$(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) $(gmap_avx512_CFLAGS) $(CFLAGS) -c -o gmap_avx512-diag.o `test -f 'diag.c' || echo '$(srcdir)/'`diag.c + +gmap_avx512-diag.obj: diag.c +@am__fastdepCC_TRUE@ $(AM_V_CC)$(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) $(gmap_avx512_CFLAGS) $(CFLAGS) -MT gmap_avx512-diag.obj -MD -MP -MF $(DEPDIR)/gmap_avx512-diag.Tpo -c -o gmap_avx512-diag.obj `if test -f 'diag.c'; then $(CYGPATH_W) 'diag.c'; else $(CYGPATH_W) '$(srcdir)/diag.c'; fi` +@am__fastdepCC_TRUE@ $(AM_V_at)$(am__mv) $(DEPDIR)/gmap_avx512-diag.Tpo $(DEPDIR)/gmap_avx512-diag.Po +@AMDEP_TRUE@@am__fastdepCC_FALSE@ $(AM_V_CC)source='diag.c' object='gmap_avx512-diag.obj' libtool=no @AMDEPBACKSLASH@ +@AMDEP_TRUE@@am__fastdepCC_FALSE@ DEPDIR=$(DEPDIR) $(CCDEPMODE) $(depcomp) @AMDEPBACKSLASH@ +@am__fastdepCC_FALSE@ $(AM_V_CC@am__nodep@)$(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) $(gmap_avx512_CFLAGS) $(CFLAGS) -c -o gmap_avx512-diag.obj `if test -f 'diag.c'; then $(CYGPATH_W) 'diag.c'; else $(CYGPATH_W) '$(srcdir)/diag.c'; fi` + +gmap_avx512-diagpool.o: diagpool.c +@am__fastdepCC_TRUE@ $(AM_V_CC)$(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) $(gmap_avx512_CFLAGS) $(CFLAGS) -MT gmap_avx512-diagpool.o -MD -MP -MF $(DEPDIR)/gmap_avx512-diagpool.Tpo -c -o gmap_avx512-diagpool.o `test -f 'diagpool.c' || echo '$(srcdir)/'`diagpool.c +@am__fastdepCC_TRUE@ $(AM_V_at)$(am__mv) $(DEPDIR)/gmap_avx512-diagpool.Tpo $(DEPDIR)/gmap_avx512-diagpool.Po +@AMDEP_TRUE@@am__fastdepCC_FALSE@ $(AM_V_CC)source='diagpool.c' object='gmap_avx512-diagpool.o' libtool=no @AMDEPBACKSLASH@ +@AMDEP_TRUE@@am__fastdepCC_FALSE@ DEPDIR=$(DEPDIR) $(CCDEPMODE) $(depcomp) @AMDEPBACKSLASH@ +@am__fastdepCC_FALSE@ $(AM_V_CC@am__nodep@)$(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) $(gmap_avx512_CFLAGS) $(CFLAGS) -c -o gmap_avx512-diagpool.o `test -f 'diagpool.c' || echo '$(srcdir)/'`diagpool.c + +gmap_avx512-diagpool.obj: diagpool.c +@am__fastdepCC_TRUE@ $(AM_V_CC)$(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) $(gmap_avx512_CFLAGS) $(CFLAGS) -MT gmap_avx512-diagpool.obj -MD -MP -MF $(DEPDIR)/gmap_avx512-diagpool.Tpo -c -o gmap_avx512-diagpool.obj `if test -f 'diagpool.c'; then $(CYGPATH_W) 'diagpool.c'; else $(CYGPATH_W) '$(srcdir)/diagpool.c'; fi` +@am__fastdepCC_TRUE@ $(AM_V_at)$(am__mv) $(DEPDIR)/gmap_avx512-diagpool.Tpo $(DEPDIR)/gmap_avx512-diagpool.Po +@AMDEP_TRUE@@am__fastdepCC_FALSE@ $(AM_V_CC)source='diagpool.c' object='gmap_avx512-diagpool.obj' libtool=no @AMDEPBACKSLASH@ +@AMDEP_TRUE@@am__fastdepCC_FALSE@ DEPDIR=$(DEPDIR) $(CCDEPMODE) $(depcomp) @AMDEPBACKSLASH@ +@am__fastdepCC_FALSE@ $(AM_V_CC@am__nodep@)$(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) $(gmap_avx512_CFLAGS) $(CFLAGS) -c -o gmap_avx512-diagpool.obj `if test -f 'diagpool.c'; then $(CYGPATH_W) 'diagpool.c'; else $(CYGPATH_W) '$(srcdir)/diagpool.c'; fi` + +gmap_avx512-cmet.o: cmet.c +@am__fastdepCC_TRUE@ $(AM_V_CC)$(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) $(gmap_avx512_CFLAGS) $(CFLAGS) -MT gmap_avx512-cmet.o -MD -MP -MF $(DEPDIR)/gmap_avx512-cmet.Tpo -c -o gmap_avx512-cmet.o `test -f 'cmet.c' || echo '$(srcdir)/'`cmet.c +@am__fastdepCC_TRUE@ $(AM_V_at)$(am__mv) $(DEPDIR)/gmap_avx512-cmet.Tpo $(DEPDIR)/gmap_avx512-cmet.Po +@AMDEP_TRUE@@am__fastdepCC_FALSE@ $(AM_V_CC)source='cmet.c' object='gmap_avx512-cmet.o' libtool=no @AMDEPBACKSLASH@ +@AMDEP_TRUE@@am__fastdepCC_FALSE@ DEPDIR=$(DEPDIR) $(CCDEPMODE) $(depcomp) @AMDEPBACKSLASH@ +@am__fastdepCC_FALSE@ $(AM_V_CC@am__nodep@)$(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) $(gmap_avx512_CFLAGS) $(CFLAGS) -c -o gmap_avx512-cmet.o `test -f 'cmet.c' || echo '$(srcdir)/'`cmet.c + +gmap_avx512-cmet.obj: cmet.c +@am__fastdepCC_TRUE@ $(AM_V_CC)$(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) $(gmap_avx512_CFLAGS) $(CFLAGS) -MT gmap_avx512-cmet.obj -MD -MP -MF $(DEPDIR)/gmap_avx512-cmet.Tpo -c -o gmap_avx512-cmet.obj `if test -f 'cmet.c'; then $(CYGPATH_W) 'cmet.c'; else $(CYGPATH_W) '$(srcdir)/cmet.c'; fi` +@am__fastdepCC_TRUE@ $(AM_V_at)$(am__mv) $(DEPDIR)/gmap_avx512-cmet.Tpo $(DEPDIR)/gmap_avx512-cmet.Po +@AMDEP_TRUE@@am__fastdepCC_FALSE@ $(AM_V_CC)source='cmet.c' object='gmap_avx512-cmet.obj' libtool=no @AMDEPBACKSLASH@ +@AMDEP_TRUE@@am__fastdepCC_FALSE@ DEPDIR=$(DEPDIR) $(CCDEPMODE) $(depcomp) @AMDEPBACKSLASH@ +@am__fastdepCC_FALSE@ $(AM_V_CC@am__nodep@)$(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) $(gmap_avx512_CFLAGS) $(CFLAGS) -c -o gmap_avx512-cmet.obj `if test -f 'cmet.c'; then $(CYGPATH_W) 'cmet.c'; else $(CYGPATH_W) '$(srcdir)/cmet.c'; fi` + +gmap_avx512-atoi.o: atoi.c +@am__fastdepCC_TRUE@ $(AM_V_CC)$(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) $(gmap_avx512_CFLAGS) $(CFLAGS) -MT gmap_avx512-atoi.o -MD -MP -MF $(DEPDIR)/gmap_avx512-atoi.Tpo -c -o gmap_avx512-atoi.o `test -f 'atoi.c' || echo '$(srcdir)/'`atoi.c +@am__fastdepCC_TRUE@ $(AM_V_at)$(am__mv) $(DEPDIR)/gmap_avx512-atoi.Tpo $(DEPDIR)/gmap_avx512-atoi.Po +@AMDEP_TRUE@@am__fastdepCC_FALSE@ $(AM_V_CC)source='atoi.c' object='gmap_avx512-atoi.o' libtool=no @AMDEPBACKSLASH@ +@AMDEP_TRUE@@am__fastdepCC_FALSE@ DEPDIR=$(DEPDIR) $(CCDEPMODE) $(depcomp) @AMDEPBACKSLASH@ +@am__fastdepCC_FALSE@ $(AM_V_CC@am__nodep@)$(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) $(gmap_avx512_CFLAGS) $(CFLAGS) -c -o gmap_avx512-atoi.o `test -f 'atoi.c' || echo '$(srcdir)/'`atoi.c + +gmap_avx512-atoi.obj: atoi.c +@am__fastdepCC_TRUE@ $(AM_V_CC)$(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) $(gmap_avx512_CFLAGS) $(CFLAGS) -MT gmap_avx512-atoi.obj -MD -MP -MF $(DEPDIR)/gmap_avx512-atoi.Tpo -c -o gmap_avx512-atoi.obj `if test -f 'atoi.c'; then $(CYGPATH_W) 'atoi.c'; else $(CYGPATH_W) '$(srcdir)/atoi.c'; fi` +@am__fastdepCC_TRUE@ $(AM_V_at)$(am__mv) $(DEPDIR)/gmap_avx512-atoi.Tpo $(DEPDIR)/gmap_avx512-atoi.Po +@AMDEP_TRUE@@am__fastdepCC_FALSE@ $(AM_V_CC)source='atoi.c' object='gmap_avx512-atoi.obj' libtool=no @AMDEPBACKSLASH@ +@AMDEP_TRUE@@am__fastdepCC_FALSE@ DEPDIR=$(DEPDIR) $(CCDEPMODE) $(depcomp) @AMDEPBACKSLASH@ +@am__fastdepCC_FALSE@ $(AM_V_CC@am__nodep@)$(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) $(gmap_avx512_CFLAGS) $(CFLAGS) -c -o gmap_avx512-atoi.obj `if test -f 'atoi.c'; then $(CYGPATH_W) 'atoi.c'; else $(CYGPATH_W) '$(srcdir)/atoi.c'; fi` + +gmap_avx512-orderstat.o: orderstat.c +@am__fastdepCC_TRUE@ $(AM_V_CC)$(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) $(gmap_avx512_CFLAGS) $(CFLAGS) -MT gmap_avx512-orderstat.o -MD -MP -MF $(DEPDIR)/gmap_avx512-orderstat.Tpo -c -o gmap_avx512-orderstat.o `test -f 'orderstat.c' || echo '$(srcdir)/'`orderstat.c +@am__fastdepCC_TRUE@ $(AM_V_at)$(am__mv) $(DEPDIR)/gmap_avx512-orderstat.Tpo $(DEPDIR)/gmap_avx512-orderstat.Po +@AMDEP_TRUE@@am__fastdepCC_FALSE@ $(AM_V_CC)source='orderstat.c' object='gmap_avx512-orderstat.o' libtool=no @AMDEPBACKSLASH@ +@AMDEP_TRUE@@am__fastdepCC_FALSE@ DEPDIR=$(DEPDIR) $(CCDEPMODE) $(depcomp) @AMDEPBACKSLASH@ +@am__fastdepCC_FALSE@ $(AM_V_CC@am__nodep@)$(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) $(gmap_avx512_CFLAGS) $(CFLAGS) -c -o gmap_avx512-orderstat.o `test -f 'orderstat.c' || echo '$(srcdir)/'`orderstat.c + +gmap_avx512-orderstat.obj: orderstat.c +@am__fastdepCC_TRUE@ $(AM_V_CC)$(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) $(gmap_avx512_CFLAGS) $(CFLAGS) -MT gmap_avx512-orderstat.obj -MD -MP -MF $(DEPDIR)/gmap_avx512-orderstat.Tpo -c -o gmap_avx512-orderstat.obj `if test -f 'orderstat.c'; then $(CYGPATH_W) 'orderstat.c'; else $(CYGPATH_W) '$(srcdir)/orderstat.c'; fi` +@am__fastdepCC_TRUE@ $(AM_V_at)$(am__mv) $(DEPDIR)/gmap_avx512-orderstat.Tpo $(DEPDIR)/gmap_avx512-orderstat.Po +@AMDEP_TRUE@@am__fastdepCC_FALSE@ $(AM_V_CC)source='orderstat.c' object='gmap_avx512-orderstat.obj' libtool=no @AMDEPBACKSLASH@ +@AMDEP_TRUE@@am__fastdepCC_FALSE@ DEPDIR=$(DEPDIR) $(CCDEPMODE) $(depcomp) @AMDEPBACKSLASH@ +@am__fastdepCC_FALSE@ $(AM_V_CC@am__nodep@)$(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) $(gmap_avx512_CFLAGS) $(CFLAGS) -c -o gmap_avx512-orderstat.obj `if test -f 'orderstat.c'; then $(CYGPATH_W) 'orderstat.c'; else $(CYGPATH_W) '$(srcdir)/orderstat.c'; fi` + +gmap_avx512-oligoindex_hr.o: oligoindex_hr.c +@am__fastdepCC_TRUE@ $(AM_V_CC)$(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) $(gmap_avx512_CFLAGS) $(CFLAGS) -MT gmap_avx512-oligoindex_hr.o -MD -MP -MF $(DEPDIR)/gmap_avx512-oligoindex_hr.Tpo -c -o gmap_avx512-oligoindex_hr.o `test -f 'oligoindex_hr.c' || echo '$(srcdir)/'`oligoindex_hr.c +@am__fastdepCC_TRUE@ $(AM_V_at)$(am__mv) $(DEPDIR)/gmap_avx512-oligoindex_hr.Tpo $(DEPDIR)/gmap_avx512-oligoindex_hr.Po +@AMDEP_TRUE@@am__fastdepCC_FALSE@ $(AM_V_CC)source='oligoindex_hr.c' object='gmap_avx512-oligoindex_hr.o' libtool=no @AMDEPBACKSLASH@ +@AMDEP_TRUE@@am__fastdepCC_FALSE@ DEPDIR=$(DEPDIR) $(CCDEPMODE) $(depcomp) @AMDEPBACKSLASH@ +@am__fastdepCC_FALSE@ $(AM_V_CC@am__nodep@)$(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) $(gmap_avx512_CFLAGS) $(CFLAGS) -c -o gmap_avx512-oligoindex_hr.o `test -f 'oligoindex_hr.c' || echo '$(srcdir)/'`oligoindex_hr.c + +gmap_avx512-oligoindex_hr.obj: oligoindex_hr.c +@am__fastdepCC_TRUE@ $(AM_V_CC)$(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) $(gmap_avx512_CFLAGS) $(CFLAGS) -MT gmap_avx512-oligoindex_hr.obj -MD -MP -MF $(DEPDIR)/gmap_avx512-oligoindex_hr.Tpo -c -o gmap_avx512-oligoindex_hr.obj `if test -f 'oligoindex_hr.c'; then $(CYGPATH_W) 'oligoindex_hr.c'; else $(CYGPATH_W) '$(srcdir)/oligoindex_hr.c'; fi` +@am__fastdepCC_TRUE@ $(AM_V_at)$(am__mv) $(DEPDIR)/gmap_avx512-oligoindex_hr.Tpo $(DEPDIR)/gmap_avx512-oligoindex_hr.Po +@AMDEP_TRUE@@am__fastdepCC_FALSE@ $(AM_V_CC)source='oligoindex_hr.c' object='gmap_avx512-oligoindex_hr.obj' libtool=no @AMDEPBACKSLASH@ +@AMDEP_TRUE@@am__fastdepCC_FALSE@ DEPDIR=$(DEPDIR) $(CCDEPMODE) $(depcomp) @AMDEPBACKSLASH@ +@am__fastdepCC_FALSE@ $(AM_V_CC@am__nodep@)$(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) $(gmap_avx512_CFLAGS) $(CFLAGS) -c -o gmap_avx512-oligoindex_hr.obj `if test -f 'oligoindex_hr.c'; then $(CYGPATH_W) 'oligoindex_hr.c'; else $(CYGPATH_W) '$(srcdir)/oligoindex_hr.c'; fi` + +gmap_avx512-intron.o: intron.c +@am__fastdepCC_TRUE@ $(AM_V_CC)$(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) $(gmap_avx512_CFLAGS) $(CFLAGS) -MT gmap_avx512-intron.o -MD -MP -MF $(DEPDIR)/gmap_avx512-intron.Tpo -c -o gmap_avx512-intron.o `test -f 'intron.c' || echo '$(srcdir)/'`intron.c +@am__fastdepCC_TRUE@ $(AM_V_at)$(am__mv) $(DEPDIR)/gmap_avx512-intron.Tpo $(DEPDIR)/gmap_avx512-intron.Po +@AMDEP_TRUE@@am__fastdepCC_FALSE@ $(AM_V_CC)source='intron.c' object='gmap_avx512-intron.o' libtool=no @AMDEPBACKSLASH@ +@AMDEP_TRUE@@am__fastdepCC_FALSE@ DEPDIR=$(DEPDIR) $(CCDEPMODE) $(depcomp) @AMDEPBACKSLASH@ +@am__fastdepCC_FALSE@ $(AM_V_CC@am__nodep@)$(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) $(gmap_avx512_CFLAGS) $(CFLAGS) -c -o gmap_avx512-intron.o `test -f 'intron.c' || echo '$(srcdir)/'`intron.c + +gmap_avx512-intron.obj: intron.c +@am__fastdepCC_TRUE@ $(AM_V_CC)$(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) $(gmap_avx512_CFLAGS) $(CFLAGS) -MT gmap_avx512-intron.obj -MD -MP -MF $(DEPDIR)/gmap_avx512-intron.Tpo -c -o gmap_avx512-intron.obj `if test -f 'intron.c'; then $(CYGPATH_W) 'intron.c'; else $(CYGPATH_W) '$(srcdir)/intron.c'; fi` +@am__fastdepCC_TRUE@ $(AM_V_at)$(am__mv) $(DEPDIR)/gmap_avx512-intron.Tpo $(DEPDIR)/gmap_avx512-intron.Po +@AMDEP_TRUE@@am__fastdepCC_FALSE@ $(AM_V_CC)source='intron.c' object='gmap_avx512-intron.obj' libtool=no @AMDEPBACKSLASH@ +@AMDEP_TRUE@@am__fastdepCC_FALSE@ DEPDIR=$(DEPDIR) $(CCDEPMODE) $(depcomp) @AMDEPBACKSLASH@ +@am__fastdepCC_FALSE@ $(AM_V_CC@am__nodep@)$(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) $(gmap_avx512_CFLAGS) $(CFLAGS) -c -o gmap_avx512-intron.obj `if test -f 'intron.c'; then $(CYGPATH_W) 'intron.c'; else $(CYGPATH_W) '$(srcdir)/intron.c'; fi` + +gmap_avx512-maxent.o: maxent.c +@am__fastdepCC_TRUE@ $(AM_V_CC)$(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) $(gmap_avx512_CFLAGS) $(CFLAGS) -MT gmap_avx512-maxent.o -MD -MP -MF $(DEPDIR)/gmap_avx512-maxent.Tpo -c -o gmap_avx512-maxent.o `test -f 'maxent.c' || echo '$(srcdir)/'`maxent.c +@am__fastdepCC_TRUE@ $(AM_V_at)$(am__mv) $(DEPDIR)/gmap_avx512-maxent.Tpo $(DEPDIR)/gmap_avx512-maxent.Po +@AMDEP_TRUE@@am__fastdepCC_FALSE@ $(AM_V_CC)source='maxent.c' object='gmap_avx512-maxent.o' libtool=no @AMDEPBACKSLASH@ +@AMDEP_TRUE@@am__fastdepCC_FALSE@ DEPDIR=$(DEPDIR) $(CCDEPMODE) $(depcomp) @AMDEPBACKSLASH@ +@am__fastdepCC_FALSE@ $(AM_V_CC@am__nodep@)$(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) $(gmap_avx512_CFLAGS) $(CFLAGS) -c -o gmap_avx512-maxent.o `test -f 'maxent.c' || echo '$(srcdir)/'`maxent.c + +gmap_avx512-maxent.obj: maxent.c +@am__fastdepCC_TRUE@ $(AM_V_CC)$(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) $(gmap_avx512_CFLAGS) $(CFLAGS) -MT gmap_avx512-maxent.obj -MD -MP -MF $(DEPDIR)/gmap_avx512-maxent.Tpo -c -o gmap_avx512-maxent.obj `if test -f 'maxent.c'; then $(CYGPATH_W) 'maxent.c'; else $(CYGPATH_W) '$(srcdir)/maxent.c'; fi` +@am__fastdepCC_TRUE@ $(AM_V_at)$(am__mv) $(DEPDIR)/gmap_avx512-maxent.Tpo $(DEPDIR)/gmap_avx512-maxent.Po +@AMDEP_TRUE@@am__fastdepCC_FALSE@ $(AM_V_CC)source='maxent.c' object='gmap_avx512-maxent.obj' libtool=no @AMDEPBACKSLASH@ +@AMDEP_TRUE@@am__fastdepCC_FALSE@ DEPDIR=$(DEPDIR) $(CCDEPMODE) $(depcomp) @AMDEPBACKSLASH@ +@am__fastdepCC_FALSE@ $(AM_V_CC@am__nodep@)$(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) $(gmap_avx512_CFLAGS) $(CFLAGS) -c -o gmap_avx512-maxent.obj `if test -f 'maxent.c'; then $(CYGPATH_W) 'maxent.c'; else $(CYGPATH_W) '$(srcdir)/maxent.c'; fi` + +gmap_avx512-maxent_hr.o: maxent_hr.c +@am__fastdepCC_TRUE@ $(AM_V_CC)$(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) $(gmap_avx512_CFLAGS) $(CFLAGS) -MT gmap_avx512-maxent_hr.o -MD -MP -MF $(DEPDIR)/gmap_avx512-maxent_hr.Tpo -c -o gmap_avx512-maxent_hr.o `test -f 'maxent_hr.c' || echo '$(srcdir)/'`maxent_hr.c +@am__fastdepCC_TRUE@ $(AM_V_at)$(am__mv) $(DEPDIR)/gmap_avx512-maxent_hr.Tpo $(DEPDIR)/gmap_avx512-maxent_hr.Po +@AMDEP_TRUE@@am__fastdepCC_FALSE@ $(AM_V_CC)source='maxent_hr.c' object='gmap_avx512-maxent_hr.o' libtool=no @AMDEPBACKSLASH@ +@AMDEP_TRUE@@am__fastdepCC_FALSE@ DEPDIR=$(DEPDIR) $(CCDEPMODE) $(depcomp) @AMDEPBACKSLASH@ +@am__fastdepCC_FALSE@ $(AM_V_CC@am__nodep@)$(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) $(gmap_avx512_CFLAGS) $(CFLAGS) -c -o gmap_avx512-maxent_hr.o `test -f 'maxent_hr.c' || echo '$(srcdir)/'`maxent_hr.c + +gmap_avx512-maxent_hr.obj: maxent_hr.c +@am__fastdepCC_TRUE@ $(AM_V_CC)$(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) $(gmap_avx512_CFLAGS) $(CFLAGS) -MT gmap_avx512-maxent_hr.obj -MD -MP -MF $(DEPDIR)/gmap_avx512-maxent_hr.Tpo -c -o gmap_avx512-maxent_hr.obj `if test -f 'maxent_hr.c'; then $(CYGPATH_W) 'maxent_hr.c'; else $(CYGPATH_W) '$(srcdir)/maxent_hr.c'; fi` +@am__fastdepCC_TRUE@ $(AM_V_at)$(am__mv) $(DEPDIR)/gmap_avx512-maxent_hr.Tpo $(DEPDIR)/gmap_avx512-maxent_hr.Po +@AMDEP_TRUE@@am__fastdepCC_FALSE@ $(AM_V_CC)source='maxent_hr.c' object='gmap_avx512-maxent_hr.obj' libtool=no @AMDEPBACKSLASH@ +@AMDEP_TRUE@@am__fastdepCC_FALSE@ DEPDIR=$(DEPDIR) $(CCDEPMODE) $(depcomp) @AMDEPBACKSLASH@ +@am__fastdepCC_FALSE@ $(AM_V_CC@am__nodep@)$(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) $(gmap_avx512_CFLAGS) $(CFLAGS) -c -o gmap_avx512-maxent_hr.obj `if test -f 'maxent_hr.c'; then $(CYGPATH_W) 'maxent_hr.c'; else $(CYGPATH_W) '$(srcdir)/maxent_hr.c'; fi` + +gmap_avx512-pair.o: pair.c +@am__fastdepCC_TRUE@ $(AM_V_CC)$(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) $(gmap_avx512_CFLAGS) $(CFLAGS) -MT gmap_avx512-pair.o -MD -MP -MF $(DEPDIR)/gmap_avx512-pair.Tpo -c -o gmap_avx512-pair.o `test -f 'pair.c' || echo '$(srcdir)/'`pair.c +@am__fastdepCC_TRUE@ $(AM_V_at)$(am__mv) $(DEPDIR)/gmap_avx512-pair.Tpo $(DEPDIR)/gmap_avx512-pair.Po +@AMDEP_TRUE@@am__fastdepCC_FALSE@ $(AM_V_CC)source='pair.c' object='gmap_avx512-pair.o' libtool=no @AMDEPBACKSLASH@ +@AMDEP_TRUE@@am__fastdepCC_FALSE@ DEPDIR=$(DEPDIR) $(CCDEPMODE) $(depcomp) @AMDEPBACKSLASH@ +@am__fastdepCC_FALSE@ $(AM_V_CC@am__nodep@)$(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) $(gmap_avx512_CFLAGS) $(CFLAGS) -c -o gmap_avx512-pair.o `test -f 'pair.c' || echo '$(srcdir)/'`pair.c + +gmap_avx512-pair.obj: pair.c +@am__fastdepCC_TRUE@ $(AM_V_CC)$(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) $(gmap_avx512_CFLAGS) $(CFLAGS) -MT gmap_avx512-pair.obj -MD -MP -MF $(DEPDIR)/gmap_avx512-pair.Tpo -c -o gmap_avx512-pair.obj `if test -f 'pair.c'; then $(CYGPATH_W) 'pair.c'; else $(CYGPATH_W) '$(srcdir)/pair.c'; fi` +@am__fastdepCC_TRUE@ $(AM_V_at)$(am__mv) $(DEPDIR)/gmap_avx512-pair.Tpo $(DEPDIR)/gmap_avx512-pair.Po +@AMDEP_TRUE@@am__fastdepCC_FALSE@ $(AM_V_CC)source='pair.c' object='gmap_avx512-pair.obj' libtool=no @AMDEPBACKSLASH@ +@AMDEP_TRUE@@am__fastdepCC_FALSE@ DEPDIR=$(DEPDIR) $(CCDEPMODE) $(depcomp) @AMDEPBACKSLASH@ +@am__fastdepCC_FALSE@ $(AM_V_CC@am__nodep@)$(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) $(gmap_avx512_CFLAGS) $(CFLAGS) -c -o gmap_avx512-pair.obj `if test -f 'pair.c'; then $(CYGPATH_W) 'pair.c'; else $(CYGPATH_W) '$(srcdir)/pair.c'; fi` + +gmap_avx512-pairpool.o: pairpool.c +@am__fastdepCC_TRUE@ $(AM_V_CC)$(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) $(gmap_avx512_CFLAGS) $(CFLAGS) -MT gmap_avx512-pairpool.o -MD -MP -MF $(DEPDIR)/gmap_avx512-pairpool.Tpo -c -o gmap_avx512-pairpool.o `test -f 'pairpool.c' || echo '$(srcdir)/'`pairpool.c +@am__fastdepCC_TRUE@ $(AM_V_at)$(am__mv) $(DEPDIR)/gmap_avx512-pairpool.Tpo $(DEPDIR)/gmap_avx512-pairpool.Po +@AMDEP_TRUE@@am__fastdepCC_FALSE@ $(AM_V_CC)source='pairpool.c' object='gmap_avx512-pairpool.o' libtool=no @AMDEPBACKSLASH@ +@AMDEP_TRUE@@am__fastdepCC_FALSE@ DEPDIR=$(DEPDIR) $(CCDEPMODE) $(depcomp) @AMDEPBACKSLASH@ +@am__fastdepCC_FALSE@ $(AM_V_CC@am__nodep@)$(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) $(gmap_avx512_CFLAGS) $(CFLAGS) -c -o gmap_avx512-pairpool.o `test -f 'pairpool.c' || echo '$(srcdir)/'`pairpool.c + +gmap_avx512-pairpool.obj: pairpool.c +@am__fastdepCC_TRUE@ $(AM_V_CC)$(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) $(gmap_avx512_CFLAGS) $(CFLAGS) -MT gmap_avx512-pairpool.obj -MD -MP -MF $(DEPDIR)/gmap_avx512-pairpool.Tpo -c -o gmap_avx512-pairpool.obj `if test -f 'pairpool.c'; then $(CYGPATH_W) 'pairpool.c'; else $(CYGPATH_W) '$(srcdir)/pairpool.c'; fi` +@am__fastdepCC_TRUE@ $(AM_V_at)$(am__mv) $(DEPDIR)/gmap_avx512-pairpool.Tpo $(DEPDIR)/gmap_avx512-pairpool.Po +@AMDEP_TRUE@@am__fastdepCC_FALSE@ $(AM_V_CC)source='pairpool.c' object='gmap_avx512-pairpool.obj' libtool=no @AMDEPBACKSLASH@ +@AMDEP_TRUE@@am__fastdepCC_FALSE@ DEPDIR=$(DEPDIR) $(CCDEPMODE) $(depcomp) @AMDEPBACKSLASH@ +@am__fastdepCC_FALSE@ $(AM_V_CC@am__nodep@)$(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) $(gmap_avx512_CFLAGS) $(CFLAGS) -c -o gmap_avx512-pairpool.obj `if test -f 'pairpool.c'; then $(CYGPATH_W) 'pairpool.c'; else $(CYGPATH_W) '$(srcdir)/pairpool.c'; fi` + +gmap_avx512-cellpool.o: cellpool.c +@am__fastdepCC_TRUE@ $(AM_V_CC)$(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) $(gmap_avx512_CFLAGS) $(CFLAGS) -MT gmap_avx512-cellpool.o -MD -MP -MF $(DEPDIR)/gmap_avx512-cellpool.Tpo -c -o gmap_avx512-cellpool.o `test -f 'cellpool.c' || echo '$(srcdir)/'`cellpool.c +@am__fastdepCC_TRUE@ $(AM_V_at)$(am__mv) $(DEPDIR)/gmap_avx512-cellpool.Tpo $(DEPDIR)/gmap_avx512-cellpool.Po +@AMDEP_TRUE@@am__fastdepCC_FALSE@ $(AM_V_CC)source='cellpool.c' object='gmap_avx512-cellpool.o' libtool=no @AMDEPBACKSLASH@ +@AMDEP_TRUE@@am__fastdepCC_FALSE@ DEPDIR=$(DEPDIR) $(CCDEPMODE) $(depcomp) @AMDEPBACKSLASH@ +@am__fastdepCC_FALSE@ $(AM_V_CC@am__nodep@)$(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) $(gmap_avx512_CFLAGS) $(CFLAGS) -c -o gmap_avx512-cellpool.o `test -f 'cellpool.c' || echo '$(srcdir)/'`cellpool.c + +gmap_avx512-cellpool.obj: cellpool.c +@am__fastdepCC_TRUE@ $(AM_V_CC)$(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) $(gmap_avx512_CFLAGS) $(CFLAGS) -MT gmap_avx512-cellpool.obj -MD -MP -MF $(DEPDIR)/gmap_avx512-cellpool.Tpo -c -o gmap_avx512-cellpool.obj `if test -f 'cellpool.c'; then $(CYGPATH_W) 'cellpool.c'; else $(CYGPATH_W) '$(srcdir)/cellpool.c'; fi` +@am__fastdepCC_TRUE@ $(AM_V_at)$(am__mv) $(DEPDIR)/gmap_avx512-cellpool.Tpo $(DEPDIR)/gmap_avx512-cellpool.Po +@AMDEP_TRUE@@am__fastdepCC_FALSE@ $(AM_V_CC)source='cellpool.c' object='gmap_avx512-cellpool.obj' libtool=no @AMDEPBACKSLASH@ +@AMDEP_TRUE@@am__fastdepCC_FALSE@ DEPDIR=$(DEPDIR) $(CCDEPMODE) $(depcomp) @AMDEPBACKSLASH@ +@am__fastdepCC_FALSE@ $(AM_V_CC@am__nodep@)$(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) $(gmap_avx512_CFLAGS) $(CFLAGS) -c -o gmap_avx512-cellpool.obj `if test -f 'cellpool.c'; then $(CYGPATH_W) 'cellpool.c'; else $(CYGPATH_W) '$(srcdir)/cellpool.c'; fi` + +gmap_avx512-stage2.o: stage2.c +@am__fastdepCC_TRUE@ $(AM_V_CC)$(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) $(gmap_avx512_CFLAGS) $(CFLAGS) -MT gmap_avx512-stage2.o -MD -MP -MF $(DEPDIR)/gmap_avx512-stage2.Tpo -c -o gmap_avx512-stage2.o `test -f 'stage2.c' || echo '$(srcdir)/'`stage2.c +@am__fastdepCC_TRUE@ $(AM_V_at)$(am__mv) $(DEPDIR)/gmap_avx512-stage2.Tpo $(DEPDIR)/gmap_avx512-stage2.Po +@AMDEP_TRUE@@am__fastdepCC_FALSE@ $(AM_V_CC)source='stage2.c' object='gmap_avx512-stage2.o' libtool=no @AMDEPBACKSLASH@ +@AMDEP_TRUE@@am__fastdepCC_FALSE@ DEPDIR=$(DEPDIR) $(CCDEPMODE) $(depcomp) @AMDEPBACKSLASH@ +@am__fastdepCC_FALSE@ $(AM_V_CC@am__nodep@)$(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) $(gmap_avx512_CFLAGS) $(CFLAGS) -c -o gmap_avx512-stage2.o `test -f 'stage2.c' || echo '$(srcdir)/'`stage2.c + +gmap_avx512-stage2.obj: stage2.c +@am__fastdepCC_TRUE@ $(AM_V_CC)$(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) $(gmap_avx512_CFLAGS) $(CFLAGS) -MT gmap_avx512-stage2.obj -MD -MP -MF $(DEPDIR)/gmap_avx512-stage2.Tpo -c -o gmap_avx512-stage2.obj `if test -f 'stage2.c'; then $(CYGPATH_W) 'stage2.c'; else $(CYGPATH_W) '$(srcdir)/stage2.c'; fi` +@am__fastdepCC_TRUE@ $(AM_V_at)$(am__mv) $(DEPDIR)/gmap_avx512-stage2.Tpo $(DEPDIR)/gmap_avx512-stage2.Po +@AMDEP_TRUE@@am__fastdepCC_FALSE@ $(AM_V_CC)source='stage2.c' object='gmap_avx512-stage2.obj' libtool=no @AMDEPBACKSLASH@ +@AMDEP_TRUE@@am__fastdepCC_FALSE@ DEPDIR=$(DEPDIR) $(CCDEPMODE) $(depcomp) @AMDEPBACKSLASH@ +@am__fastdepCC_FALSE@ $(AM_V_CC@am__nodep@)$(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) $(gmap_avx512_CFLAGS) $(CFLAGS) -c -o gmap_avx512-stage2.obj `if test -f 'stage2.c'; then $(CYGPATH_W) 'stage2.c'; else $(CYGPATH_W) '$(srcdir)/stage2.c'; fi` + +gmap_avx512-doublelist.o: doublelist.c +@am__fastdepCC_TRUE@ $(AM_V_CC)$(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) $(gmap_avx512_CFLAGS) $(CFLAGS) -MT gmap_avx512-doublelist.o -MD -MP -MF $(DEPDIR)/gmap_avx512-doublelist.Tpo -c -o gmap_avx512-doublelist.o `test -f 'doublelist.c' || echo '$(srcdir)/'`doublelist.c +@am__fastdepCC_TRUE@ $(AM_V_at)$(am__mv) $(DEPDIR)/gmap_avx512-doublelist.Tpo $(DEPDIR)/gmap_avx512-doublelist.Po +@AMDEP_TRUE@@am__fastdepCC_FALSE@ $(AM_V_CC)source='doublelist.c' object='gmap_avx512-doublelist.o' libtool=no @AMDEPBACKSLASH@ +@AMDEP_TRUE@@am__fastdepCC_FALSE@ DEPDIR=$(DEPDIR) $(CCDEPMODE) $(depcomp) @AMDEPBACKSLASH@ +@am__fastdepCC_FALSE@ $(AM_V_CC@am__nodep@)$(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) $(gmap_avx512_CFLAGS) $(CFLAGS) -c -o gmap_avx512-doublelist.o `test -f 'doublelist.c' || echo '$(srcdir)/'`doublelist.c + +gmap_avx512-doublelist.obj: doublelist.c +@am__fastdepCC_TRUE@ $(AM_V_CC)$(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) $(gmap_avx512_CFLAGS) $(CFLAGS) -MT gmap_avx512-doublelist.obj -MD -MP -MF $(DEPDIR)/gmap_avx512-doublelist.Tpo -c -o gmap_avx512-doublelist.obj `if test -f 'doublelist.c'; then $(CYGPATH_W) 'doublelist.c'; else $(CYGPATH_W) '$(srcdir)/doublelist.c'; fi` +@am__fastdepCC_TRUE@ $(AM_V_at)$(am__mv) $(DEPDIR)/gmap_avx512-doublelist.Tpo $(DEPDIR)/gmap_avx512-doublelist.Po +@AMDEP_TRUE@@am__fastdepCC_FALSE@ $(AM_V_CC)source='doublelist.c' object='gmap_avx512-doublelist.obj' libtool=no @AMDEPBACKSLASH@ +@AMDEP_TRUE@@am__fastdepCC_FALSE@ DEPDIR=$(DEPDIR) $(CCDEPMODE) $(depcomp) @AMDEPBACKSLASH@ +@am__fastdepCC_FALSE@ $(AM_V_CC@am__nodep@)$(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) $(gmap_avx512_CFLAGS) $(CFLAGS) -c -o gmap_avx512-doublelist.obj `if test -f 'doublelist.c'; then $(CYGPATH_W) 'doublelist.c'; else $(CYGPATH_W) '$(srcdir)/doublelist.c'; fi` + +gmap_avx512-smooth.o: smooth.c +@am__fastdepCC_TRUE@ $(AM_V_CC)$(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) $(gmap_avx512_CFLAGS) $(CFLAGS) -MT gmap_avx512-smooth.o -MD -MP -MF $(DEPDIR)/gmap_avx512-smooth.Tpo -c -o gmap_avx512-smooth.o `test -f 'smooth.c' || echo '$(srcdir)/'`smooth.c +@am__fastdepCC_TRUE@ $(AM_V_at)$(am__mv) $(DEPDIR)/gmap_avx512-smooth.Tpo $(DEPDIR)/gmap_avx512-smooth.Po +@AMDEP_TRUE@@am__fastdepCC_FALSE@ $(AM_V_CC)source='smooth.c' object='gmap_avx512-smooth.o' libtool=no @AMDEPBACKSLASH@ +@AMDEP_TRUE@@am__fastdepCC_FALSE@ DEPDIR=$(DEPDIR) $(CCDEPMODE) $(depcomp) @AMDEPBACKSLASH@ +@am__fastdepCC_FALSE@ $(AM_V_CC@am__nodep@)$(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) $(gmap_avx512_CFLAGS) $(CFLAGS) -c -o gmap_avx512-smooth.o `test -f 'smooth.c' || echo '$(srcdir)/'`smooth.c + +gmap_avx512-smooth.obj: smooth.c +@am__fastdepCC_TRUE@ $(AM_V_CC)$(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) $(gmap_avx512_CFLAGS) $(CFLAGS) -MT gmap_avx512-smooth.obj -MD -MP -MF $(DEPDIR)/gmap_avx512-smooth.Tpo -c -o gmap_avx512-smooth.obj `if test -f 'smooth.c'; then $(CYGPATH_W) 'smooth.c'; else $(CYGPATH_W) '$(srcdir)/smooth.c'; fi` +@am__fastdepCC_TRUE@ $(AM_V_at)$(am__mv) $(DEPDIR)/gmap_avx512-smooth.Tpo $(DEPDIR)/gmap_avx512-smooth.Po +@AMDEP_TRUE@@am__fastdepCC_FALSE@ $(AM_V_CC)source='smooth.c' object='gmap_avx512-smooth.obj' libtool=no @AMDEPBACKSLASH@ +@AMDEP_TRUE@@am__fastdepCC_FALSE@ DEPDIR=$(DEPDIR) $(CCDEPMODE) $(depcomp) @AMDEPBACKSLASH@ +@am__fastdepCC_FALSE@ $(AM_V_CC@am__nodep@)$(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) $(gmap_avx512_CFLAGS) $(CFLAGS) -c -o gmap_avx512-smooth.obj `if test -f 'smooth.c'; then $(CYGPATH_W) 'smooth.c'; else $(CYGPATH_W) '$(srcdir)/smooth.c'; fi` + +gmap_avx512-splicestringpool.o: splicestringpool.c +@am__fastdepCC_TRUE@ $(AM_V_CC)$(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) $(gmap_avx512_CFLAGS) $(CFLAGS) -MT gmap_avx512-splicestringpool.o -MD -MP -MF $(DEPDIR)/gmap_avx512-splicestringpool.Tpo -c -o gmap_avx512-splicestringpool.o `test -f 'splicestringpool.c' || echo '$(srcdir)/'`splicestringpool.c +@am__fastdepCC_TRUE@ $(AM_V_at)$(am__mv) $(DEPDIR)/gmap_avx512-splicestringpool.Tpo $(DEPDIR)/gmap_avx512-splicestringpool.Po +@AMDEP_TRUE@@am__fastdepCC_FALSE@ $(AM_V_CC)source='splicestringpool.c' object='gmap_avx512-splicestringpool.o' libtool=no @AMDEPBACKSLASH@ +@AMDEP_TRUE@@am__fastdepCC_FALSE@ DEPDIR=$(DEPDIR) $(CCDEPMODE) $(depcomp) @AMDEPBACKSLASH@ +@am__fastdepCC_FALSE@ $(AM_V_CC@am__nodep@)$(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) $(gmap_avx512_CFLAGS) $(CFLAGS) -c -o gmap_avx512-splicestringpool.o `test -f 'splicestringpool.c' || echo '$(srcdir)/'`splicestringpool.c + +gmap_avx512-splicestringpool.obj: splicestringpool.c +@am__fastdepCC_TRUE@ $(AM_V_CC)$(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) $(gmap_avx512_CFLAGS) $(CFLAGS) -MT gmap_avx512-splicestringpool.obj -MD -MP -MF $(DEPDIR)/gmap_avx512-splicestringpool.Tpo -c -o gmap_avx512-splicestringpool.obj `if test -f 'splicestringpool.c'; then $(CYGPATH_W) 'splicestringpool.c'; else $(CYGPATH_W) '$(srcdir)/splicestringpool.c'; fi` +@am__fastdepCC_TRUE@ $(AM_V_at)$(am__mv) $(DEPDIR)/gmap_avx512-splicestringpool.Tpo $(DEPDIR)/gmap_avx512-splicestringpool.Po +@AMDEP_TRUE@@am__fastdepCC_FALSE@ $(AM_V_CC)source='splicestringpool.c' object='gmap_avx512-splicestringpool.obj' libtool=no @AMDEPBACKSLASH@ +@AMDEP_TRUE@@am__fastdepCC_FALSE@ DEPDIR=$(DEPDIR) $(CCDEPMODE) $(depcomp) @AMDEPBACKSLASH@ +@am__fastdepCC_FALSE@ $(AM_V_CC@am__nodep@)$(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) $(gmap_avx512_CFLAGS) $(CFLAGS) -c -o gmap_avx512-splicestringpool.obj `if test -f 'splicestringpool.c'; then $(CYGPATH_W) 'splicestringpool.c'; else $(CYGPATH_W) '$(srcdir)/splicestringpool.c'; fi` + +gmap_avx512-splicetrie_build.o: splicetrie_build.c +@am__fastdepCC_TRUE@ $(AM_V_CC)$(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) $(gmap_avx512_CFLAGS) $(CFLAGS) -MT gmap_avx512-splicetrie_build.o -MD -MP -MF $(DEPDIR)/gmap_avx512-splicetrie_build.Tpo -c -o gmap_avx512-splicetrie_build.o `test -f 'splicetrie_build.c' || echo '$(srcdir)/'`splicetrie_build.c +@am__fastdepCC_TRUE@ $(AM_V_at)$(am__mv) $(DEPDIR)/gmap_avx512-splicetrie_build.Tpo $(DEPDIR)/gmap_avx512-splicetrie_build.Po +@AMDEP_TRUE@@am__fastdepCC_FALSE@ $(AM_V_CC)source='splicetrie_build.c' object='gmap_avx512-splicetrie_build.o' libtool=no @AMDEPBACKSLASH@ +@AMDEP_TRUE@@am__fastdepCC_FALSE@ DEPDIR=$(DEPDIR) $(CCDEPMODE) $(depcomp) @AMDEPBACKSLASH@ +@am__fastdepCC_FALSE@ $(AM_V_CC@am__nodep@)$(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) $(gmap_avx512_CFLAGS) $(CFLAGS) -c -o gmap_avx512-splicetrie_build.o `test -f 'splicetrie_build.c' || echo '$(srcdir)/'`splicetrie_build.c + +gmap_avx512-splicetrie_build.obj: splicetrie_build.c +@am__fastdepCC_TRUE@ $(AM_V_CC)$(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) $(gmap_avx512_CFLAGS) $(CFLAGS) -MT gmap_avx512-splicetrie_build.obj -MD -MP -MF $(DEPDIR)/gmap_avx512-splicetrie_build.Tpo -c -o gmap_avx512-splicetrie_build.obj `if test -f 'splicetrie_build.c'; then $(CYGPATH_W) 'splicetrie_build.c'; else $(CYGPATH_W) '$(srcdir)/splicetrie_build.c'; fi` +@am__fastdepCC_TRUE@ $(AM_V_at)$(am__mv) $(DEPDIR)/gmap_avx512-splicetrie_build.Tpo $(DEPDIR)/gmap_avx512-splicetrie_build.Po +@AMDEP_TRUE@@am__fastdepCC_FALSE@ $(AM_V_CC)source='splicetrie_build.c' object='gmap_avx512-splicetrie_build.obj' libtool=no @AMDEPBACKSLASH@ +@AMDEP_TRUE@@am__fastdepCC_FALSE@ DEPDIR=$(DEPDIR) $(CCDEPMODE) $(depcomp) @AMDEPBACKSLASH@ +@am__fastdepCC_FALSE@ $(AM_V_CC@am__nodep@)$(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) $(gmap_avx512_CFLAGS) $(CFLAGS) -c -o gmap_avx512-splicetrie_build.obj `if test -f 'splicetrie_build.c'; then $(CYGPATH_W) 'splicetrie_build.c'; else $(CYGPATH_W) '$(srcdir)/splicetrie_build.c'; fi` + +gmap_avx512-splicetrie.o: splicetrie.c +@am__fastdepCC_TRUE@ $(AM_V_CC)$(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) $(gmap_avx512_CFLAGS) $(CFLAGS) -MT gmap_avx512-splicetrie.o -MD -MP -MF $(DEPDIR)/gmap_avx512-splicetrie.Tpo -c -o gmap_avx512-splicetrie.o `test -f 'splicetrie.c' || echo '$(srcdir)/'`splicetrie.c +@am__fastdepCC_TRUE@ $(AM_V_at)$(am__mv) $(DEPDIR)/gmap_avx512-splicetrie.Tpo $(DEPDIR)/gmap_avx512-splicetrie.Po +@AMDEP_TRUE@@am__fastdepCC_FALSE@ $(AM_V_CC)source='splicetrie.c' object='gmap_avx512-splicetrie.o' libtool=no @AMDEPBACKSLASH@ +@AMDEP_TRUE@@am__fastdepCC_FALSE@ DEPDIR=$(DEPDIR) $(CCDEPMODE) $(depcomp) @AMDEPBACKSLASH@ +@am__fastdepCC_FALSE@ $(AM_V_CC@am__nodep@)$(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) $(gmap_avx512_CFLAGS) $(CFLAGS) -c -o gmap_avx512-splicetrie.o `test -f 'splicetrie.c' || echo '$(srcdir)/'`splicetrie.c + +gmap_avx512-splicetrie.obj: splicetrie.c +@am__fastdepCC_TRUE@ $(AM_V_CC)$(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) $(gmap_avx512_CFLAGS) $(CFLAGS) -MT gmap_avx512-splicetrie.obj -MD -MP -MF $(DEPDIR)/gmap_avx512-splicetrie.Tpo -c -o gmap_avx512-splicetrie.obj `if test -f 'splicetrie.c'; then $(CYGPATH_W) 'splicetrie.c'; else $(CYGPATH_W) '$(srcdir)/splicetrie.c'; fi` +@am__fastdepCC_TRUE@ $(AM_V_at)$(am__mv) $(DEPDIR)/gmap_avx512-splicetrie.Tpo $(DEPDIR)/gmap_avx512-splicetrie.Po +@AMDEP_TRUE@@am__fastdepCC_FALSE@ $(AM_V_CC)source='splicetrie.c' object='gmap_avx512-splicetrie.obj' libtool=no @AMDEPBACKSLASH@ +@AMDEP_TRUE@@am__fastdepCC_FALSE@ DEPDIR=$(DEPDIR) $(CCDEPMODE) $(depcomp) @AMDEPBACKSLASH@ +@am__fastdepCC_FALSE@ $(AM_V_CC@am__nodep@)$(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) $(gmap_avx512_CFLAGS) $(CFLAGS) -c -o gmap_avx512-splicetrie.obj `if test -f 'splicetrie.c'; then $(CYGPATH_W) 'splicetrie.c'; else $(CYGPATH_W) '$(srcdir)/splicetrie.c'; fi` + +gmap_avx512-boyer-moore.o: boyer-moore.c +@am__fastdepCC_TRUE@ $(AM_V_CC)$(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) $(gmap_avx512_CFLAGS) $(CFLAGS) -MT gmap_avx512-boyer-moore.o -MD -MP -MF $(DEPDIR)/gmap_avx512-boyer-moore.Tpo -c -o gmap_avx512-boyer-moore.o `test -f 'boyer-moore.c' || echo '$(srcdir)/'`boyer-moore.c +@am__fastdepCC_TRUE@ $(AM_V_at)$(am__mv) $(DEPDIR)/gmap_avx512-boyer-moore.Tpo $(DEPDIR)/gmap_avx512-boyer-moore.Po +@AMDEP_TRUE@@am__fastdepCC_FALSE@ $(AM_V_CC)source='boyer-moore.c' object='gmap_avx512-boyer-moore.o' libtool=no @AMDEPBACKSLASH@ +@AMDEP_TRUE@@am__fastdepCC_FALSE@ DEPDIR=$(DEPDIR) $(CCDEPMODE) $(depcomp) @AMDEPBACKSLASH@ +@am__fastdepCC_FALSE@ $(AM_V_CC@am__nodep@)$(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) $(gmap_avx512_CFLAGS) $(CFLAGS) -c -o gmap_avx512-boyer-moore.o `test -f 'boyer-moore.c' || echo '$(srcdir)/'`boyer-moore.c + +gmap_avx512-boyer-moore.obj: boyer-moore.c +@am__fastdepCC_TRUE@ $(AM_V_CC)$(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) $(gmap_avx512_CFLAGS) $(CFLAGS) -MT gmap_avx512-boyer-moore.obj -MD -MP -MF $(DEPDIR)/gmap_avx512-boyer-moore.Tpo -c -o gmap_avx512-boyer-moore.obj `if test -f 'boyer-moore.c'; then $(CYGPATH_W) 'boyer-moore.c'; else $(CYGPATH_W) '$(srcdir)/boyer-moore.c'; fi` +@am__fastdepCC_TRUE@ $(AM_V_at)$(am__mv) $(DEPDIR)/gmap_avx512-boyer-moore.Tpo $(DEPDIR)/gmap_avx512-boyer-moore.Po +@AMDEP_TRUE@@am__fastdepCC_FALSE@ $(AM_V_CC)source='boyer-moore.c' object='gmap_avx512-boyer-moore.obj' libtool=no @AMDEPBACKSLASH@ +@AMDEP_TRUE@@am__fastdepCC_FALSE@ DEPDIR=$(DEPDIR) $(CCDEPMODE) $(depcomp) @AMDEPBACKSLASH@ +@am__fastdepCC_FALSE@ $(AM_V_CC@am__nodep@)$(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) $(gmap_avx512_CFLAGS) $(CFLAGS) -c -o gmap_avx512-boyer-moore.obj `if test -f 'boyer-moore.c'; then $(CYGPATH_W) 'boyer-moore.c'; else $(CYGPATH_W) '$(srcdir)/boyer-moore.c'; fi` + +gmap_avx512-dynprog.o: dynprog.c +@am__fastdepCC_TRUE@ $(AM_V_CC)$(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) $(gmap_avx512_CFLAGS) $(CFLAGS) -MT gmap_avx512-dynprog.o -MD -MP -MF $(DEPDIR)/gmap_avx512-dynprog.Tpo -c -o gmap_avx512-dynprog.o `test -f 'dynprog.c' || echo '$(srcdir)/'`dynprog.c +@am__fastdepCC_TRUE@ $(AM_V_at)$(am__mv) $(DEPDIR)/gmap_avx512-dynprog.Tpo $(DEPDIR)/gmap_avx512-dynprog.Po +@AMDEP_TRUE@@am__fastdepCC_FALSE@ $(AM_V_CC)source='dynprog.c' object='gmap_avx512-dynprog.o' libtool=no @AMDEPBACKSLASH@ +@AMDEP_TRUE@@am__fastdepCC_FALSE@ DEPDIR=$(DEPDIR) $(CCDEPMODE) $(depcomp) @AMDEPBACKSLASH@ +@am__fastdepCC_FALSE@ $(AM_V_CC@am__nodep@)$(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) $(gmap_avx512_CFLAGS) $(CFLAGS) -c -o gmap_avx512-dynprog.o `test -f 'dynprog.c' || echo '$(srcdir)/'`dynprog.c + +gmap_avx512-dynprog.obj: dynprog.c +@am__fastdepCC_TRUE@ $(AM_V_CC)$(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) $(gmap_avx512_CFLAGS) $(CFLAGS) -MT gmap_avx512-dynprog.obj -MD -MP -MF $(DEPDIR)/gmap_avx512-dynprog.Tpo -c -o gmap_avx512-dynprog.obj `if test -f 'dynprog.c'; then $(CYGPATH_W) 'dynprog.c'; else $(CYGPATH_W) '$(srcdir)/dynprog.c'; fi` +@am__fastdepCC_TRUE@ $(AM_V_at)$(am__mv) $(DEPDIR)/gmap_avx512-dynprog.Tpo $(DEPDIR)/gmap_avx512-dynprog.Po +@AMDEP_TRUE@@am__fastdepCC_FALSE@ $(AM_V_CC)source='dynprog.c' object='gmap_avx512-dynprog.obj' libtool=no @AMDEPBACKSLASH@ +@AMDEP_TRUE@@am__fastdepCC_FALSE@ DEPDIR=$(DEPDIR) $(CCDEPMODE) $(depcomp) @AMDEPBACKSLASH@ +@am__fastdepCC_FALSE@ $(AM_V_CC@am__nodep@)$(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) $(gmap_avx512_CFLAGS) $(CFLAGS) -c -o gmap_avx512-dynprog.obj `if test -f 'dynprog.c'; then $(CYGPATH_W) 'dynprog.c'; else $(CYGPATH_W) '$(srcdir)/dynprog.c'; fi` + +gmap_avx512-dynprog_simd.o: dynprog_simd.c +@am__fastdepCC_TRUE@ $(AM_V_CC)$(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) $(gmap_avx512_CFLAGS) $(CFLAGS) -MT gmap_avx512-dynprog_simd.o -MD -MP -MF $(DEPDIR)/gmap_avx512-dynprog_simd.Tpo -c -o gmap_avx512-dynprog_simd.o `test -f 'dynprog_simd.c' || echo '$(srcdir)/'`dynprog_simd.c +@am__fastdepCC_TRUE@ $(AM_V_at)$(am__mv) $(DEPDIR)/gmap_avx512-dynprog_simd.Tpo $(DEPDIR)/gmap_avx512-dynprog_simd.Po +@AMDEP_TRUE@@am__fastdepCC_FALSE@ $(AM_V_CC)source='dynprog_simd.c' object='gmap_avx512-dynprog_simd.o' libtool=no @AMDEPBACKSLASH@ +@AMDEP_TRUE@@am__fastdepCC_FALSE@ DEPDIR=$(DEPDIR) $(CCDEPMODE) $(depcomp) @AMDEPBACKSLASH@ +@am__fastdepCC_FALSE@ $(AM_V_CC@am__nodep@)$(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) $(gmap_avx512_CFLAGS) $(CFLAGS) -c -o gmap_avx512-dynprog_simd.o `test -f 'dynprog_simd.c' || echo '$(srcdir)/'`dynprog_simd.c + +gmap_avx512-dynprog_simd.obj: dynprog_simd.c +@am__fastdepCC_TRUE@ $(AM_V_CC)$(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) $(gmap_avx512_CFLAGS) $(CFLAGS) -MT gmap_avx512-dynprog_simd.obj -MD -MP -MF $(DEPDIR)/gmap_avx512-dynprog_simd.Tpo -c -o gmap_avx512-dynprog_simd.obj `if test -f 'dynprog_simd.c'; then $(CYGPATH_W) 'dynprog_simd.c'; else $(CYGPATH_W) '$(srcdir)/dynprog_simd.c'; fi` +@am__fastdepCC_TRUE@ $(AM_V_at)$(am__mv) $(DEPDIR)/gmap_avx512-dynprog_simd.Tpo $(DEPDIR)/gmap_avx512-dynprog_simd.Po +@AMDEP_TRUE@@am__fastdepCC_FALSE@ $(AM_V_CC)source='dynprog_simd.c' object='gmap_avx512-dynprog_simd.obj' libtool=no @AMDEPBACKSLASH@ +@AMDEP_TRUE@@am__fastdepCC_FALSE@ DEPDIR=$(DEPDIR) $(CCDEPMODE) $(depcomp) @AMDEPBACKSLASH@ +@am__fastdepCC_FALSE@ $(AM_V_CC@am__nodep@)$(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) $(gmap_avx512_CFLAGS) $(CFLAGS) -c -o gmap_avx512-dynprog_simd.obj `if test -f 'dynprog_simd.c'; then $(CYGPATH_W) 'dynprog_simd.c'; else $(CYGPATH_W) '$(srcdir)/dynprog_simd.c'; fi` + +gmap_avx512-dynprog_single.o: dynprog_single.c +@am__fastdepCC_TRUE@ $(AM_V_CC)$(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) $(gmap_avx512_CFLAGS) $(CFLAGS) -MT gmap_avx512-dynprog_single.o -MD -MP -MF $(DEPDIR)/gmap_avx512-dynprog_single.Tpo -c -o gmap_avx512-dynprog_single.o `test -f 'dynprog_single.c' || echo '$(srcdir)/'`dynprog_single.c +@am__fastdepCC_TRUE@ $(AM_V_at)$(am__mv) $(DEPDIR)/gmap_avx512-dynprog_single.Tpo $(DEPDIR)/gmap_avx512-dynprog_single.Po +@AMDEP_TRUE@@am__fastdepCC_FALSE@ $(AM_V_CC)source='dynprog_single.c' object='gmap_avx512-dynprog_single.o' libtool=no @AMDEPBACKSLASH@ +@AMDEP_TRUE@@am__fastdepCC_FALSE@ DEPDIR=$(DEPDIR) $(CCDEPMODE) $(depcomp) @AMDEPBACKSLASH@ +@am__fastdepCC_FALSE@ $(AM_V_CC@am__nodep@)$(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) $(gmap_avx512_CFLAGS) $(CFLAGS) -c -o gmap_avx512-dynprog_single.o `test -f 'dynprog_single.c' || echo '$(srcdir)/'`dynprog_single.c + +gmap_avx512-dynprog_single.obj: dynprog_single.c +@am__fastdepCC_TRUE@ $(AM_V_CC)$(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) $(gmap_avx512_CFLAGS) $(CFLAGS) -MT gmap_avx512-dynprog_single.obj -MD -MP -MF $(DEPDIR)/gmap_avx512-dynprog_single.Tpo -c -o gmap_avx512-dynprog_single.obj `if test -f 'dynprog_single.c'; then $(CYGPATH_W) 'dynprog_single.c'; else $(CYGPATH_W) '$(srcdir)/dynprog_single.c'; fi` +@am__fastdepCC_TRUE@ $(AM_V_at)$(am__mv) $(DEPDIR)/gmap_avx512-dynprog_single.Tpo $(DEPDIR)/gmap_avx512-dynprog_single.Po +@AMDEP_TRUE@@am__fastdepCC_FALSE@ $(AM_V_CC)source='dynprog_single.c' object='gmap_avx512-dynprog_single.obj' libtool=no @AMDEPBACKSLASH@ +@AMDEP_TRUE@@am__fastdepCC_FALSE@ DEPDIR=$(DEPDIR) $(CCDEPMODE) $(depcomp) @AMDEPBACKSLASH@ +@am__fastdepCC_FALSE@ $(AM_V_CC@am__nodep@)$(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) $(gmap_avx512_CFLAGS) $(CFLAGS) -c -o gmap_avx512-dynprog_single.obj `if test -f 'dynprog_single.c'; then $(CYGPATH_W) 'dynprog_single.c'; else $(CYGPATH_W) '$(srcdir)/dynprog_single.c'; fi` + +gmap_avx512-dynprog_genome.o: dynprog_genome.c +@am__fastdepCC_TRUE@ $(AM_V_CC)$(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) $(gmap_avx512_CFLAGS) $(CFLAGS) -MT gmap_avx512-dynprog_genome.o -MD -MP -MF $(DEPDIR)/gmap_avx512-dynprog_genome.Tpo -c -o gmap_avx512-dynprog_genome.o `test -f 'dynprog_genome.c' || echo '$(srcdir)/'`dynprog_genome.c +@am__fastdepCC_TRUE@ $(AM_V_at)$(am__mv) $(DEPDIR)/gmap_avx512-dynprog_genome.Tpo $(DEPDIR)/gmap_avx512-dynprog_genome.Po +@AMDEP_TRUE@@am__fastdepCC_FALSE@ $(AM_V_CC)source='dynprog_genome.c' object='gmap_avx512-dynprog_genome.o' libtool=no @AMDEPBACKSLASH@ +@AMDEP_TRUE@@am__fastdepCC_FALSE@ DEPDIR=$(DEPDIR) $(CCDEPMODE) $(depcomp) @AMDEPBACKSLASH@ +@am__fastdepCC_FALSE@ $(AM_V_CC@am__nodep@)$(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) $(gmap_avx512_CFLAGS) $(CFLAGS) -c -o gmap_avx512-dynprog_genome.o `test -f 'dynprog_genome.c' || echo '$(srcdir)/'`dynprog_genome.c + +gmap_avx512-dynprog_genome.obj: dynprog_genome.c +@am__fastdepCC_TRUE@ $(AM_V_CC)$(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) $(gmap_avx512_CFLAGS) $(CFLAGS) -MT gmap_avx512-dynprog_genome.obj -MD -MP -MF $(DEPDIR)/gmap_avx512-dynprog_genome.Tpo -c -o gmap_avx512-dynprog_genome.obj `if test -f 'dynprog_genome.c'; then $(CYGPATH_W) 'dynprog_genome.c'; else $(CYGPATH_W) '$(srcdir)/dynprog_genome.c'; fi` +@am__fastdepCC_TRUE@ $(AM_V_at)$(am__mv) $(DEPDIR)/gmap_avx512-dynprog_genome.Tpo $(DEPDIR)/gmap_avx512-dynprog_genome.Po +@AMDEP_TRUE@@am__fastdepCC_FALSE@ $(AM_V_CC)source='dynprog_genome.c' object='gmap_avx512-dynprog_genome.obj' libtool=no @AMDEPBACKSLASH@ +@AMDEP_TRUE@@am__fastdepCC_FALSE@ DEPDIR=$(DEPDIR) $(CCDEPMODE) $(depcomp) @AMDEPBACKSLASH@ +@am__fastdepCC_FALSE@ $(AM_V_CC@am__nodep@)$(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) $(gmap_avx512_CFLAGS) $(CFLAGS) -c -o gmap_avx512-dynprog_genome.obj `if test -f 'dynprog_genome.c'; then $(CYGPATH_W) 'dynprog_genome.c'; else $(CYGPATH_W) '$(srcdir)/dynprog_genome.c'; fi` + +gmap_avx512-dynprog_cdna.o: dynprog_cdna.c +@am__fastdepCC_TRUE@ $(AM_V_CC)$(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) $(gmap_avx512_CFLAGS) $(CFLAGS) -MT gmap_avx512-dynprog_cdna.o -MD -MP -MF $(DEPDIR)/gmap_avx512-dynprog_cdna.Tpo -c -o gmap_avx512-dynprog_cdna.o `test -f 'dynprog_cdna.c' || echo '$(srcdir)/'`dynprog_cdna.c +@am__fastdepCC_TRUE@ $(AM_V_at)$(am__mv) $(DEPDIR)/gmap_avx512-dynprog_cdna.Tpo $(DEPDIR)/gmap_avx512-dynprog_cdna.Po +@AMDEP_TRUE@@am__fastdepCC_FALSE@ $(AM_V_CC)source='dynprog_cdna.c' object='gmap_avx512-dynprog_cdna.o' libtool=no @AMDEPBACKSLASH@ +@AMDEP_TRUE@@am__fastdepCC_FALSE@ DEPDIR=$(DEPDIR) $(CCDEPMODE) $(depcomp) @AMDEPBACKSLASH@ +@am__fastdepCC_FALSE@ $(AM_V_CC@am__nodep@)$(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) $(gmap_avx512_CFLAGS) $(CFLAGS) -c -o gmap_avx512-dynprog_cdna.o `test -f 'dynprog_cdna.c' || echo '$(srcdir)/'`dynprog_cdna.c + +gmap_avx512-dynprog_cdna.obj: dynprog_cdna.c +@am__fastdepCC_TRUE@ $(AM_V_CC)$(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) $(gmap_avx512_CFLAGS) $(CFLAGS) -MT gmap_avx512-dynprog_cdna.obj -MD -MP -MF $(DEPDIR)/gmap_avx512-dynprog_cdna.Tpo -c -o gmap_avx512-dynprog_cdna.obj `if test -f 'dynprog_cdna.c'; then $(CYGPATH_W) 'dynprog_cdna.c'; else $(CYGPATH_W) '$(srcdir)/dynprog_cdna.c'; fi` +@am__fastdepCC_TRUE@ $(AM_V_at)$(am__mv) $(DEPDIR)/gmap_avx512-dynprog_cdna.Tpo $(DEPDIR)/gmap_avx512-dynprog_cdna.Po +@AMDEP_TRUE@@am__fastdepCC_FALSE@ $(AM_V_CC)source='dynprog_cdna.c' object='gmap_avx512-dynprog_cdna.obj' libtool=no @AMDEPBACKSLASH@ +@AMDEP_TRUE@@am__fastdepCC_FALSE@ DEPDIR=$(DEPDIR) $(CCDEPMODE) $(depcomp) @AMDEPBACKSLASH@ +@am__fastdepCC_FALSE@ $(AM_V_CC@am__nodep@)$(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) $(gmap_avx512_CFLAGS) $(CFLAGS) -c -o gmap_avx512-dynprog_cdna.obj `if test -f 'dynprog_cdna.c'; then $(CYGPATH_W) 'dynprog_cdna.c'; else $(CYGPATH_W) '$(srcdir)/dynprog_cdna.c'; fi` + +gmap_avx512-dynprog_end.o: dynprog_end.c +@am__fastdepCC_TRUE@ $(AM_V_CC)$(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) $(gmap_avx512_CFLAGS) $(CFLAGS) -MT gmap_avx512-dynprog_end.o -MD -MP -MF $(DEPDIR)/gmap_avx512-dynprog_end.Tpo -c -o gmap_avx512-dynprog_end.o `test -f 'dynprog_end.c' || echo '$(srcdir)/'`dynprog_end.c +@am__fastdepCC_TRUE@ $(AM_V_at)$(am__mv) $(DEPDIR)/gmap_avx512-dynprog_end.Tpo $(DEPDIR)/gmap_avx512-dynprog_end.Po +@AMDEP_TRUE@@am__fastdepCC_FALSE@ $(AM_V_CC)source='dynprog_end.c' object='gmap_avx512-dynprog_end.o' libtool=no @AMDEPBACKSLASH@ +@AMDEP_TRUE@@am__fastdepCC_FALSE@ DEPDIR=$(DEPDIR) $(CCDEPMODE) $(depcomp) @AMDEPBACKSLASH@ +@am__fastdepCC_FALSE@ $(AM_V_CC@am__nodep@)$(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) $(gmap_avx512_CFLAGS) $(CFLAGS) -c -o gmap_avx512-dynprog_end.o `test -f 'dynprog_end.c' || echo '$(srcdir)/'`dynprog_end.c + +gmap_avx512-dynprog_end.obj: dynprog_end.c +@am__fastdepCC_TRUE@ $(AM_V_CC)$(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) $(gmap_avx512_CFLAGS) $(CFLAGS) -MT gmap_avx512-dynprog_end.obj -MD -MP -MF $(DEPDIR)/gmap_avx512-dynprog_end.Tpo -c -o gmap_avx512-dynprog_end.obj `if test -f 'dynprog_end.c'; then $(CYGPATH_W) 'dynprog_end.c'; else $(CYGPATH_W) '$(srcdir)/dynprog_end.c'; fi` +@am__fastdepCC_TRUE@ $(AM_V_at)$(am__mv) $(DEPDIR)/gmap_avx512-dynprog_end.Tpo $(DEPDIR)/gmap_avx512-dynprog_end.Po +@AMDEP_TRUE@@am__fastdepCC_FALSE@ $(AM_V_CC)source='dynprog_end.c' object='gmap_avx512-dynprog_end.obj' libtool=no @AMDEPBACKSLASH@ +@AMDEP_TRUE@@am__fastdepCC_FALSE@ DEPDIR=$(DEPDIR) $(CCDEPMODE) $(depcomp) @AMDEPBACKSLASH@ +@am__fastdepCC_FALSE@ $(AM_V_CC@am__nodep@)$(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) $(gmap_avx512_CFLAGS) $(CFLAGS) -c -o gmap_avx512-dynprog_end.obj `if test -f 'dynprog_end.c'; then $(CYGPATH_W) 'dynprog_end.c'; else $(CYGPATH_W) '$(srcdir)/dynprog_end.c'; fi` + +gmap_avx512-translation.o: translation.c +@am__fastdepCC_TRUE@ $(AM_V_CC)$(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) $(gmap_avx512_CFLAGS) $(CFLAGS) -MT gmap_avx512-translation.o -MD -MP -MF $(DEPDIR)/gmap_avx512-translation.Tpo -c -o gmap_avx512-translation.o `test -f 'translation.c' || echo '$(srcdir)/'`translation.c +@am__fastdepCC_TRUE@ $(AM_V_at)$(am__mv) $(DEPDIR)/gmap_avx512-translation.Tpo $(DEPDIR)/gmap_avx512-translation.Po +@AMDEP_TRUE@@am__fastdepCC_FALSE@ $(AM_V_CC)source='translation.c' object='gmap_avx512-translation.o' libtool=no @AMDEPBACKSLASH@ +@AMDEP_TRUE@@am__fastdepCC_FALSE@ DEPDIR=$(DEPDIR) $(CCDEPMODE) $(depcomp) @AMDEPBACKSLASH@ +@am__fastdepCC_FALSE@ $(AM_V_CC@am__nodep@)$(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) $(gmap_avx512_CFLAGS) $(CFLAGS) -c -o gmap_avx512-translation.o `test -f 'translation.c' || echo '$(srcdir)/'`translation.c + +gmap_avx512-translation.obj: translation.c +@am__fastdepCC_TRUE@ $(AM_V_CC)$(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) $(gmap_avx512_CFLAGS) $(CFLAGS) -MT gmap_avx512-translation.obj -MD -MP -MF $(DEPDIR)/gmap_avx512-translation.Tpo -c -o gmap_avx512-translation.obj `if test -f 'translation.c'; then $(CYGPATH_W) 'translation.c'; else $(CYGPATH_W) '$(srcdir)/translation.c'; fi` +@am__fastdepCC_TRUE@ $(AM_V_at)$(am__mv) $(DEPDIR)/gmap_avx512-translation.Tpo $(DEPDIR)/gmap_avx512-translation.Po +@AMDEP_TRUE@@am__fastdepCC_FALSE@ $(AM_V_CC)source='translation.c' object='gmap_avx512-translation.obj' libtool=no @AMDEPBACKSLASH@ +@AMDEP_TRUE@@am__fastdepCC_FALSE@ DEPDIR=$(DEPDIR) $(CCDEPMODE) $(depcomp) @AMDEPBACKSLASH@ +@am__fastdepCC_FALSE@ $(AM_V_CC@am__nodep@)$(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) $(gmap_avx512_CFLAGS) $(CFLAGS) -c -o gmap_avx512-translation.obj `if test -f 'translation.c'; then $(CYGPATH_W) 'translation.c'; else $(CYGPATH_W) '$(srcdir)/translation.c'; fi` + +gmap_avx512-pbinom.o: pbinom.c +@am__fastdepCC_TRUE@ $(AM_V_CC)$(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) $(gmap_avx512_CFLAGS) $(CFLAGS) -MT gmap_avx512-pbinom.o -MD -MP -MF $(DEPDIR)/gmap_avx512-pbinom.Tpo -c -o gmap_avx512-pbinom.o `test -f 'pbinom.c' || echo '$(srcdir)/'`pbinom.c +@am__fastdepCC_TRUE@ $(AM_V_at)$(am__mv) $(DEPDIR)/gmap_avx512-pbinom.Tpo $(DEPDIR)/gmap_avx512-pbinom.Po +@AMDEP_TRUE@@am__fastdepCC_FALSE@ $(AM_V_CC)source='pbinom.c' object='gmap_avx512-pbinom.o' libtool=no @AMDEPBACKSLASH@ +@AMDEP_TRUE@@am__fastdepCC_FALSE@ DEPDIR=$(DEPDIR) $(CCDEPMODE) $(depcomp) @AMDEPBACKSLASH@ +@am__fastdepCC_FALSE@ $(AM_V_CC@am__nodep@)$(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) $(gmap_avx512_CFLAGS) $(CFLAGS) -c -o gmap_avx512-pbinom.o `test -f 'pbinom.c' || echo '$(srcdir)/'`pbinom.c + +gmap_avx512-pbinom.obj: pbinom.c +@am__fastdepCC_TRUE@ $(AM_V_CC)$(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) $(gmap_avx512_CFLAGS) $(CFLAGS) -MT gmap_avx512-pbinom.obj -MD -MP -MF $(DEPDIR)/gmap_avx512-pbinom.Tpo -c -o gmap_avx512-pbinom.obj `if test -f 'pbinom.c'; then $(CYGPATH_W) 'pbinom.c'; else $(CYGPATH_W) '$(srcdir)/pbinom.c'; fi` +@am__fastdepCC_TRUE@ $(AM_V_at)$(am__mv) $(DEPDIR)/gmap_avx512-pbinom.Tpo $(DEPDIR)/gmap_avx512-pbinom.Po +@AMDEP_TRUE@@am__fastdepCC_FALSE@ $(AM_V_CC)source='pbinom.c' object='gmap_avx512-pbinom.obj' libtool=no @AMDEPBACKSLASH@ +@AMDEP_TRUE@@am__fastdepCC_FALSE@ DEPDIR=$(DEPDIR) $(CCDEPMODE) $(depcomp) @AMDEPBACKSLASH@ +@am__fastdepCC_FALSE@ $(AM_V_CC@am__nodep@)$(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) $(gmap_avx512_CFLAGS) $(CFLAGS) -c -o gmap_avx512-pbinom.obj `if test -f 'pbinom.c'; then $(CYGPATH_W) 'pbinom.c'; else $(CYGPATH_W) '$(srcdir)/pbinom.c'; fi` + +gmap_avx512-changepoint.o: changepoint.c +@am__fastdepCC_TRUE@ $(AM_V_CC)$(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) $(gmap_avx512_CFLAGS) $(CFLAGS) -MT gmap_avx512-changepoint.o -MD -MP -MF $(DEPDIR)/gmap_avx512-changepoint.Tpo -c -o gmap_avx512-changepoint.o `test -f 'changepoint.c' || echo '$(srcdir)/'`changepoint.c +@am__fastdepCC_TRUE@ $(AM_V_at)$(am__mv) $(DEPDIR)/gmap_avx512-changepoint.Tpo $(DEPDIR)/gmap_avx512-changepoint.Po +@AMDEP_TRUE@@am__fastdepCC_FALSE@ $(AM_V_CC)source='changepoint.c' object='gmap_avx512-changepoint.o' libtool=no @AMDEPBACKSLASH@ +@AMDEP_TRUE@@am__fastdepCC_FALSE@ DEPDIR=$(DEPDIR) $(CCDEPMODE) $(depcomp) @AMDEPBACKSLASH@ +@am__fastdepCC_FALSE@ $(AM_V_CC@am__nodep@)$(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) $(gmap_avx512_CFLAGS) $(CFLAGS) -c -o gmap_avx512-changepoint.o `test -f 'changepoint.c' || echo '$(srcdir)/'`changepoint.c + +gmap_avx512-changepoint.obj: changepoint.c +@am__fastdepCC_TRUE@ $(AM_V_CC)$(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) $(gmap_avx512_CFLAGS) $(CFLAGS) -MT gmap_avx512-changepoint.obj -MD -MP -MF $(DEPDIR)/gmap_avx512-changepoint.Tpo -c -o gmap_avx512-changepoint.obj `if test -f 'changepoint.c'; then $(CYGPATH_W) 'changepoint.c'; else $(CYGPATH_W) '$(srcdir)/changepoint.c'; fi` +@am__fastdepCC_TRUE@ $(AM_V_at)$(am__mv) $(DEPDIR)/gmap_avx512-changepoint.Tpo $(DEPDIR)/gmap_avx512-changepoint.Po +@AMDEP_TRUE@@am__fastdepCC_FALSE@ $(AM_V_CC)source='changepoint.c' object='gmap_avx512-changepoint.obj' libtool=no @AMDEPBACKSLASH@ +@AMDEP_TRUE@@am__fastdepCC_FALSE@ DEPDIR=$(DEPDIR) $(CCDEPMODE) $(depcomp) @AMDEPBACKSLASH@ +@am__fastdepCC_FALSE@ $(AM_V_CC@am__nodep@)$(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) $(gmap_avx512_CFLAGS) $(CFLAGS) -c -o gmap_avx512-changepoint.obj `if test -f 'changepoint.c'; then $(CYGPATH_W) 'changepoint.c'; else $(CYGPATH_W) '$(srcdir)/changepoint.c'; fi` + +gmap_avx512-stage3.o: stage3.c +@am__fastdepCC_TRUE@ $(AM_V_CC)$(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) $(gmap_avx512_CFLAGS) $(CFLAGS) -MT gmap_avx512-stage3.o -MD -MP -MF $(DEPDIR)/gmap_avx512-stage3.Tpo -c -o gmap_avx512-stage3.o `test -f 'stage3.c' || echo '$(srcdir)/'`stage3.c +@am__fastdepCC_TRUE@ $(AM_V_at)$(am__mv) $(DEPDIR)/gmap_avx512-stage3.Tpo $(DEPDIR)/gmap_avx512-stage3.Po +@AMDEP_TRUE@@am__fastdepCC_FALSE@ $(AM_V_CC)source='stage3.c' object='gmap_avx512-stage3.o' libtool=no @AMDEPBACKSLASH@ +@AMDEP_TRUE@@am__fastdepCC_FALSE@ DEPDIR=$(DEPDIR) $(CCDEPMODE) $(depcomp) @AMDEPBACKSLASH@ +@am__fastdepCC_FALSE@ $(AM_V_CC@am__nodep@)$(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) $(gmap_avx512_CFLAGS) $(CFLAGS) -c -o gmap_avx512-stage3.o `test -f 'stage3.c' || echo '$(srcdir)/'`stage3.c + +gmap_avx512-stage3.obj: stage3.c +@am__fastdepCC_TRUE@ $(AM_V_CC)$(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) $(gmap_avx512_CFLAGS) $(CFLAGS) -MT gmap_avx512-stage3.obj -MD -MP -MF $(DEPDIR)/gmap_avx512-stage3.Tpo -c -o gmap_avx512-stage3.obj `if test -f 'stage3.c'; then $(CYGPATH_W) 'stage3.c'; else $(CYGPATH_W) '$(srcdir)/stage3.c'; fi` +@am__fastdepCC_TRUE@ $(AM_V_at)$(am__mv) $(DEPDIR)/gmap_avx512-stage3.Tpo $(DEPDIR)/gmap_avx512-stage3.Po +@AMDEP_TRUE@@am__fastdepCC_FALSE@ $(AM_V_CC)source='stage3.c' object='gmap_avx512-stage3.obj' libtool=no @AMDEPBACKSLASH@ +@AMDEP_TRUE@@am__fastdepCC_FALSE@ DEPDIR=$(DEPDIR) $(CCDEPMODE) $(depcomp) @AMDEPBACKSLASH@ +@am__fastdepCC_FALSE@ $(AM_V_CC@am__nodep@)$(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) $(gmap_avx512_CFLAGS) $(CFLAGS) -c -o gmap_avx512-stage3.obj `if test -f 'stage3.c'; then $(CYGPATH_W) 'stage3.c'; else $(CYGPATH_W) '$(srcdir)/stage3.c'; fi` + +gmap_avx512-request.o: request.c +@am__fastdepCC_TRUE@ $(AM_V_CC)$(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) $(gmap_avx512_CFLAGS) $(CFLAGS) -MT gmap_avx512-request.o -MD -MP -MF $(DEPDIR)/gmap_avx512-request.Tpo -c -o gmap_avx512-request.o `test -f 'request.c' || echo '$(srcdir)/'`request.c +@am__fastdepCC_TRUE@ $(AM_V_at)$(am__mv) $(DEPDIR)/gmap_avx512-request.Tpo $(DEPDIR)/gmap_avx512-request.Po +@AMDEP_TRUE@@am__fastdepCC_FALSE@ $(AM_V_CC)source='request.c' object='gmap_avx512-request.o' libtool=no @AMDEPBACKSLASH@ +@AMDEP_TRUE@@am__fastdepCC_FALSE@ DEPDIR=$(DEPDIR) $(CCDEPMODE) $(depcomp) @AMDEPBACKSLASH@ +@am__fastdepCC_FALSE@ $(AM_V_CC@am__nodep@)$(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) $(gmap_avx512_CFLAGS) $(CFLAGS) -c -o gmap_avx512-request.o `test -f 'request.c' || echo '$(srcdir)/'`request.c + +gmap_avx512-request.obj: request.c +@am__fastdepCC_TRUE@ $(AM_V_CC)$(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) $(gmap_avx512_CFLAGS) $(CFLAGS) -MT gmap_avx512-request.obj -MD -MP -MF $(DEPDIR)/gmap_avx512-request.Tpo -c -o gmap_avx512-request.obj `if test -f 'request.c'; then $(CYGPATH_W) 'request.c'; else $(CYGPATH_W) '$(srcdir)/request.c'; fi` +@am__fastdepCC_TRUE@ $(AM_V_at)$(am__mv) $(DEPDIR)/gmap_avx512-request.Tpo $(DEPDIR)/gmap_avx512-request.Po +@AMDEP_TRUE@@am__fastdepCC_FALSE@ $(AM_V_CC)source='request.c' object='gmap_avx512-request.obj' libtool=no @AMDEPBACKSLASH@ +@AMDEP_TRUE@@am__fastdepCC_FALSE@ DEPDIR=$(DEPDIR) $(CCDEPMODE) $(depcomp) @AMDEPBACKSLASH@ +@am__fastdepCC_FALSE@ $(AM_V_CC@am__nodep@)$(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) $(gmap_avx512_CFLAGS) $(CFLAGS) -c -o gmap_avx512-request.obj `if test -f 'request.c'; then $(CYGPATH_W) 'request.c'; else $(CYGPATH_W) '$(srcdir)/request.c'; fi` + +gmap_avx512-result.o: result.c +@am__fastdepCC_TRUE@ $(AM_V_CC)$(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) $(gmap_avx512_CFLAGS) $(CFLAGS) -MT gmap_avx512-result.o -MD -MP -MF $(DEPDIR)/gmap_avx512-result.Tpo -c -o gmap_avx512-result.o `test -f 'result.c' || echo '$(srcdir)/'`result.c +@am__fastdepCC_TRUE@ $(AM_V_at)$(am__mv) $(DEPDIR)/gmap_avx512-result.Tpo $(DEPDIR)/gmap_avx512-result.Po +@AMDEP_TRUE@@am__fastdepCC_FALSE@ $(AM_V_CC)source='result.c' object='gmap_avx512-result.o' libtool=no @AMDEPBACKSLASH@ +@AMDEP_TRUE@@am__fastdepCC_FALSE@ DEPDIR=$(DEPDIR) $(CCDEPMODE) $(depcomp) @AMDEPBACKSLASH@ +@am__fastdepCC_FALSE@ $(AM_V_CC@am__nodep@)$(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) $(gmap_avx512_CFLAGS) $(CFLAGS) -c -o gmap_avx512-result.o `test -f 'result.c' || echo '$(srcdir)/'`result.c + +gmap_avx512-result.obj: result.c +@am__fastdepCC_TRUE@ $(AM_V_CC)$(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) $(gmap_avx512_CFLAGS) $(CFLAGS) -MT gmap_avx512-result.obj -MD -MP -MF $(DEPDIR)/gmap_avx512-result.Tpo -c -o gmap_avx512-result.obj `if test -f 'result.c'; then $(CYGPATH_W) 'result.c'; else $(CYGPATH_W) '$(srcdir)/result.c'; fi` +@am__fastdepCC_TRUE@ $(AM_V_at)$(am__mv) $(DEPDIR)/gmap_avx512-result.Tpo $(DEPDIR)/gmap_avx512-result.Po +@AMDEP_TRUE@@am__fastdepCC_FALSE@ $(AM_V_CC)source='result.c' object='gmap_avx512-result.obj' libtool=no @AMDEPBACKSLASH@ +@AMDEP_TRUE@@am__fastdepCC_FALSE@ DEPDIR=$(DEPDIR) $(CCDEPMODE) $(depcomp) @AMDEPBACKSLASH@ +@am__fastdepCC_FALSE@ $(AM_V_CC@am__nodep@)$(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) $(gmap_avx512_CFLAGS) $(CFLAGS) -c -o gmap_avx512-result.obj `if test -f 'result.c'; then $(CYGPATH_W) 'result.c'; else $(CYGPATH_W) '$(srcdir)/result.c'; fi` + +gmap_avx512-output.o: output.c +@am__fastdepCC_TRUE@ $(AM_V_CC)$(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) $(gmap_avx512_CFLAGS) $(CFLAGS) -MT gmap_avx512-output.o -MD -MP -MF $(DEPDIR)/gmap_avx512-output.Tpo -c -o gmap_avx512-output.o `test -f 'output.c' || echo '$(srcdir)/'`output.c +@am__fastdepCC_TRUE@ $(AM_V_at)$(am__mv) $(DEPDIR)/gmap_avx512-output.Tpo $(DEPDIR)/gmap_avx512-output.Po +@AMDEP_TRUE@@am__fastdepCC_FALSE@ $(AM_V_CC)source='output.c' object='gmap_avx512-output.o' libtool=no @AMDEPBACKSLASH@ +@AMDEP_TRUE@@am__fastdepCC_FALSE@ DEPDIR=$(DEPDIR) $(CCDEPMODE) $(depcomp) @AMDEPBACKSLASH@ +@am__fastdepCC_FALSE@ $(AM_V_CC@am__nodep@)$(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) $(gmap_avx512_CFLAGS) $(CFLAGS) -c -o gmap_avx512-output.o `test -f 'output.c' || echo '$(srcdir)/'`output.c + +gmap_avx512-output.obj: output.c +@am__fastdepCC_TRUE@ $(AM_V_CC)$(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) $(gmap_avx512_CFLAGS) $(CFLAGS) -MT gmap_avx512-output.obj -MD -MP -MF $(DEPDIR)/gmap_avx512-output.Tpo -c -o gmap_avx512-output.obj `if test -f 'output.c'; then $(CYGPATH_W) 'output.c'; else $(CYGPATH_W) '$(srcdir)/output.c'; fi` +@am__fastdepCC_TRUE@ $(AM_V_at)$(am__mv) $(DEPDIR)/gmap_avx512-output.Tpo $(DEPDIR)/gmap_avx512-output.Po +@AMDEP_TRUE@@am__fastdepCC_FALSE@ $(AM_V_CC)source='output.c' object='gmap_avx512-output.obj' libtool=no @AMDEPBACKSLASH@ +@AMDEP_TRUE@@am__fastdepCC_FALSE@ DEPDIR=$(DEPDIR) $(CCDEPMODE) $(depcomp) @AMDEPBACKSLASH@ +@am__fastdepCC_FALSE@ $(AM_V_CC@am__nodep@)$(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) $(gmap_avx512_CFLAGS) $(CFLAGS) -c -o gmap_avx512-output.obj `if test -f 'output.c'; then $(CYGPATH_W) 'output.c'; else $(CYGPATH_W) '$(srcdir)/output.c'; fi` + +gmap_avx512-inbuffer.o: inbuffer.c +@am__fastdepCC_TRUE@ $(AM_V_CC)$(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) $(gmap_avx512_CFLAGS) $(CFLAGS) -MT gmap_avx512-inbuffer.o -MD -MP -MF $(DEPDIR)/gmap_avx512-inbuffer.Tpo -c -o gmap_avx512-inbuffer.o `test -f 'inbuffer.c' || echo '$(srcdir)/'`inbuffer.c +@am__fastdepCC_TRUE@ $(AM_V_at)$(am__mv) $(DEPDIR)/gmap_avx512-inbuffer.Tpo $(DEPDIR)/gmap_avx512-inbuffer.Po +@AMDEP_TRUE@@am__fastdepCC_FALSE@ $(AM_V_CC)source='inbuffer.c' object='gmap_avx512-inbuffer.o' libtool=no @AMDEPBACKSLASH@ +@AMDEP_TRUE@@am__fastdepCC_FALSE@ DEPDIR=$(DEPDIR) $(CCDEPMODE) $(depcomp) @AMDEPBACKSLASH@ +@am__fastdepCC_FALSE@ $(AM_V_CC@am__nodep@)$(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) $(gmap_avx512_CFLAGS) $(CFLAGS) -c -o gmap_avx512-inbuffer.o `test -f 'inbuffer.c' || echo '$(srcdir)/'`inbuffer.c + +gmap_avx512-inbuffer.obj: inbuffer.c +@am__fastdepCC_TRUE@ $(AM_V_CC)$(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) $(gmap_avx512_CFLAGS) $(CFLAGS) -MT gmap_avx512-inbuffer.obj -MD -MP -MF $(DEPDIR)/gmap_avx512-inbuffer.Tpo -c -o gmap_avx512-inbuffer.obj `if test -f 'inbuffer.c'; then $(CYGPATH_W) 'inbuffer.c'; else $(CYGPATH_W) '$(srcdir)/inbuffer.c'; fi` +@am__fastdepCC_TRUE@ $(AM_V_at)$(am__mv) $(DEPDIR)/gmap_avx512-inbuffer.Tpo $(DEPDIR)/gmap_avx512-inbuffer.Po +@AMDEP_TRUE@@am__fastdepCC_FALSE@ $(AM_V_CC)source='inbuffer.c' object='gmap_avx512-inbuffer.obj' libtool=no @AMDEPBACKSLASH@ +@AMDEP_TRUE@@am__fastdepCC_FALSE@ DEPDIR=$(DEPDIR) $(CCDEPMODE) $(depcomp) @AMDEPBACKSLASH@ +@am__fastdepCC_FALSE@ $(AM_V_CC@am__nodep@)$(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) $(gmap_avx512_CFLAGS) $(CFLAGS) -c -o gmap_avx512-inbuffer.obj `if test -f 'inbuffer.c'; then $(CYGPATH_W) 'inbuffer.c'; else $(CYGPATH_W) '$(srcdir)/inbuffer.c'; fi` + +gmap_avx512-samheader.o: samheader.c +@am__fastdepCC_TRUE@ $(AM_V_CC)$(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) $(gmap_avx512_CFLAGS) $(CFLAGS) -MT gmap_avx512-samheader.o -MD -MP -MF $(DEPDIR)/gmap_avx512-samheader.Tpo -c -o gmap_avx512-samheader.o `test -f 'samheader.c' || echo '$(srcdir)/'`samheader.c +@am__fastdepCC_TRUE@ $(AM_V_at)$(am__mv) $(DEPDIR)/gmap_avx512-samheader.Tpo $(DEPDIR)/gmap_avx512-samheader.Po +@AMDEP_TRUE@@am__fastdepCC_FALSE@ $(AM_V_CC)source='samheader.c' object='gmap_avx512-samheader.o' libtool=no @AMDEPBACKSLASH@ +@AMDEP_TRUE@@am__fastdepCC_FALSE@ DEPDIR=$(DEPDIR) $(CCDEPMODE) $(depcomp) @AMDEPBACKSLASH@ +@am__fastdepCC_FALSE@ $(AM_V_CC@am__nodep@)$(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) $(gmap_avx512_CFLAGS) $(CFLAGS) -c -o gmap_avx512-samheader.o `test -f 'samheader.c' || echo '$(srcdir)/'`samheader.c + +gmap_avx512-samheader.obj: samheader.c +@am__fastdepCC_TRUE@ $(AM_V_CC)$(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) $(gmap_avx512_CFLAGS) $(CFLAGS) -MT gmap_avx512-samheader.obj -MD -MP -MF $(DEPDIR)/gmap_avx512-samheader.Tpo -c -o gmap_avx512-samheader.obj `if test -f 'samheader.c'; then $(CYGPATH_W) 'samheader.c'; else $(CYGPATH_W) '$(srcdir)/samheader.c'; fi` +@am__fastdepCC_TRUE@ $(AM_V_at)$(am__mv) $(DEPDIR)/gmap_avx512-samheader.Tpo $(DEPDIR)/gmap_avx512-samheader.Po +@AMDEP_TRUE@@am__fastdepCC_FALSE@ $(AM_V_CC)source='samheader.c' object='gmap_avx512-samheader.obj' libtool=no @AMDEPBACKSLASH@ +@AMDEP_TRUE@@am__fastdepCC_FALSE@ DEPDIR=$(DEPDIR) $(CCDEPMODE) $(depcomp) @AMDEPBACKSLASH@ +@am__fastdepCC_FALSE@ $(AM_V_CC@am__nodep@)$(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) $(gmap_avx512_CFLAGS) $(CFLAGS) -c -o gmap_avx512-samheader.obj `if test -f 'samheader.c'; then $(CYGPATH_W) 'samheader.c'; else $(CYGPATH_W) '$(srcdir)/samheader.c'; fi` + +gmap_avx512-outbuffer.o: outbuffer.c +@am__fastdepCC_TRUE@ $(AM_V_CC)$(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) $(gmap_avx512_CFLAGS) $(CFLAGS) -MT gmap_avx512-outbuffer.o -MD -MP -MF $(DEPDIR)/gmap_avx512-outbuffer.Tpo -c -o gmap_avx512-outbuffer.o `test -f 'outbuffer.c' || echo '$(srcdir)/'`outbuffer.c +@am__fastdepCC_TRUE@ $(AM_V_at)$(am__mv) $(DEPDIR)/gmap_avx512-outbuffer.Tpo $(DEPDIR)/gmap_avx512-outbuffer.Po +@AMDEP_TRUE@@am__fastdepCC_FALSE@ $(AM_V_CC)source='outbuffer.c' object='gmap_avx512-outbuffer.o' libtool=no @AMDEPBACKSLASH@ +@AMDEP_TRUE@@am__fastdepCC_FALSE@ DEPDIR=$(DEPDIR) $(CCDEPMODE) $(depcomp) @AMDEPBACKSLASH@ +@am__fastdepCC_FALSE@ $(AM_V_CC@am__nodep@)$(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) $(gmap_avx512_CFLAGS) $(CFLAGS) -c -o gmap_avx512-outbuffer.o `test -f 'outbuffer.c' || echo '$(srcdir)/'`outbuffer.c + +gmap_avx512-outbuffer.obj: outbuffer.c +@am__fastdepCC_TRUE@ $(AM_V_CC)$(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) $(gmap_avx512_CFLAGS) $(CFLAGS) -MT gmap_avx512-outbuffer.obj -MD -MP -MF $(DEPDIR)/gmap_avx512-outbuffer.Tpo -c -o gmap_avx512-outbuffer.obj `if test -f 'outbuffer.c'; then $(CYGPATH_W) 'outbuffer.c'; else $(CYGPATH_W) '$(srcdir)/outbuffer.c'; fi` +@am__fastdepCC_TRUE@ $(AM_V_at)$(am__mv) $(DEPDIR)/gmap_avx512-outbuffer.Tpo $(DEPDIR)/gmap_avx512-outbuffer.Po +@AMDEP_TRUE@@am__fastdepCC_FALSE@ $(AM_V_CC)source='outbuffer.c' object='gmap_avx512-outbuffer.obj' libtool=no @AMDEPBACKSLASH@ +@AMDEP_TRUE@@am__fastdepCC_FALSE@ DEPDIR=$(DEPDIR) $(CCDEPMODE) $(depcomp) @AMDEPBACKSLASH@ +@am__fastdepCC_FALSE@ $(AM_V_CC@am__nodep@)$(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) $(gmap_avx512_CFLAGS) $(CFLAGS) -c -o gmap_avx512-outbuffer.obj `if test -f 'outbuffer.c'; then $(CYGPATH_W) 'outbuffer.c'; else $(CYGPATH_W) '$(srcdir)/outbuffer.c'; fi` + +gmap_avx512-chimera.o: chimera.c +@am__fastdepCC_TRUE@ $(AM_V_CC)$(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) $(gmap_avx512_CFLAGS) $(CFLAGS) -MT gmap_avx512-chimera.o -MD -MP -MF $(DEPDIR)/gmap_avx512-chimera.Tpo -c -o gmap_avx512-chimera.o `test -f 'chimera.c' || echo '$(srcdir)/'`chimera.c +@am__fastdepCC_TRUE@ $(AM_V_at)$(am__mv) $(DEPDIR)/gmap_avx512-chimera.Tpo $(DEPDIR)/gmap_avx512-chimera.Po +@AMDEP_TRUE@@am__fastdepCC_FALSE@ $(AM_V_CC)source='chimera.c' object='gmap_avx512-chimera.o' libtool=no @AMDEPBACKSLASH@ +@AMDEP_TRUE@@am__fastdepCC_FALSE@ DEPDIR=$(DEPDIR) $(CCDEPMODE) $(depcomp) @AMDEPBACKSLASH@ +@am__fastdepCC_FALSE@ $(AM_V_CC@am__nodep@)$(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) $(gmap_avx512_CFLAGS) $(CFLAGS) -c -o gmap_avx512-chimera.o `test -f 'chimera.c' || echo '$(srcdir)/'`chimera.c + +gmap_avx512-chimera.obj: chimera.c +@am__fastdepCC_TRUE@ $(AM_V_CC)$(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) $(gmap_avx512_CFLAGS) $(CFLAGS) -MT gmap_avx512-chimera.obj -MD -MP -MF $(DEPDIR)/gmap_avx512-chimera.Tpo -c -o gmap_avx512-chimera.obj `if test -f 'chimera.c'; then $(CYGPATH_W) 'chimera.c'; else $(CYGPATH_W) '$(srcdir)/chimera.c'; fi` +@am__fastdepCC_TRUE@ $(AM_V_at)$(am__mv) $(DEPDIR)/gmap_avx512-chimera.Tpo $(DEPDIR)/gmap_avx512-chimera.Po +@AMDEP_TRUE@@am__fastdepCC_FALSE@ $(AM_V_CC)source='chimera.c' object='gmap_avx512-chimera.obj' libtool=no @AMDEPBACKSLASH@ +@AMDEP_TRUE@@am__fastdepCC_FALSE@ DEPDIR=$(DEPDIR) $(CCDEPMODE) $(depcomp) @AMDEPBACKSLASH@ +@am__fastdepCC_FALSE@ $(AM_V_CC@am__nodep@)$(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) $(gmap_avx512_CFLAGS) $(CFLAGS) -c -o gmap_avx512-chimera.obj `if test -f 'chimera.c'; then $(CYGPATH_W) 'chimera.c'; else $(CYGPATH_W) '$(srcdir)/chimera.c'; fi` + +gmap_avx512-datadir.o: datadir.c +@am__fastdepCC_TRUE@ $(AM_V_CC)$(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) $(gmap_avx512_CFLAGS) $(CFLAGS) -MT gmap_avx512-datadir.o -MD -MP -MF $(DEPDIR)/gmap_avx512-datadir.Tpo -c -o gmap_avx512-datadir.o `test -f 'datadir.c' || echo '$(srcdir)/'`datadir.c +@am__fastdepCC_TRUE@ $(AM_V_at)$(am__mv) $(DEPDIR)/gmap_avx512-datadir.Tpo $(DEPDIR)/gmap_avx512-datadir.Po +@AMDEP_TRUE@@am__fastdepCC_FALSE@ $(AM_V_CC)source='datadir.c' object='gmap_avx512-datadir.o' libtool=no @AMDEPBACKSLASH@ +@AMDEP_TRUE@@am__fastdepCC_FALSE@ DEPDIR=$(DEPDIR) $(CCDEPMODE) $(depcomp) @AMDEPBACKSLASH@ +@am__fastdepCC_FALSE@ $(AM_V_CC@am__nodep@)$(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) $(gmap_avx512_CFLAGS) $(CFLAGS) -c -o gmap_avx512-datadir.o `test -f 'datadir.c' || echo '$(srcdir)/'`datadir.c + +gmap_avx512-datadir.obj: datadir.c +@am__fastdepCC_TRUE@ $(AM_V_CC)$(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) $(gmap_avx512_CFLAGS) $(CFLAGS) -MT gmap_avx512-datadir.obj -MD -MP -MF $(DEPDIR)/gmap_avx512-datadir.Tpo -c -o gmap_avx512-datadir.obj `if test -f 'datadir.c'; then $(CYGPATH_W) 'datadir.c'; else $(CYGPATH_W) '$(srcdir)/datadir.c'; fi` +@am__fastdepCC_TRUE@ $(AM_V_at)$(am__mv) $(DEPDIR)/gmap_avx512-datadir.Tpo $(DEPDIR)/gmap_avx512-datadir.Po +@AMDEP_TRUE@@am__fastdepCC_FALSE@ $(AM_V_CC)source='datadir.c' object='gmap_avx512-datadir.obj' libtool=no @AMDEPBACKSLASH@ +@AMDEP_TRUE@@am__fastdepCC_FALSE@ DEPDIR=$(DEPDIR) $(CCDEPMODE) $(depcomp) @AMDEPBACKSLASH@ +@am__fastdepCC_FALSE@ $(AM_V_CC@am__nodep@)$(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) $(gmap_avx512_CFLAGS) $(CFLAGS) -c -o gmap_avx512-datadir.obj `if test -f 'datadir.c'; then $(CYGPATH_W) 'datadir.c'; else $(CYGPATH_W) '$(srcdir)/datadir.c'; fi` + +gmap_avx512-parserange.o: parserange.c +@am__fastdepCC_TRUE@ $(AM_V_CC)$(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) $(gmap_avx512_CFLAGS) $(CFLAGS) -MT gmap_avx512-parserange.o -MD -MP -MF $(DEPDIR)/gmap_avx512-parserange.Tpo -c -o gmap_avx512-parserange.o `test -f 'parserange.c' || echo '$(srcdir)/'`parserange.c +@am__fastdepCC_TRUE@ $(AM_V_at)$(am__mv) $(DEPDIR)/gmap_avx512-parserange.Tpo $(DEPDIR)/gmap_avx512-parserange.Po +@AMDEP_TRUE@@am__fastdepCC_FALSE@ $(AM_V_CC)source='parserange.c' object='gmap_avx512-parserange.o' libtool=no @AMDEPBACKSLASH@ +@AMDEP_TRUE@@am__fastdepCC_FALSE@ DEPDIR=$(DEPDIR) $(CCDEPMODE) $(depcomp) @AMDEPBACKSLASH@ +@am__fastdepCC_FALSE@ $(AM_V_CC@am__nodep@)$(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) $(gmap_avx512_CFLAGS) $(CFLAGS) -c -o gmap_avx512-parserange.o `test -f 'parserange.c' || echo '$(srcdir)/'`parserange.c + +gmap_avx512-parserange.obj: parserange.c +@am__fastdepCC_TRUE@ $(AM_V_CC)$(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) $(gmap_avx512_CFLAGS) $(CFLAGS) -MT gmap_avx512-parserange.obj -MD -MP -MF $(DEPDIR)/gmap_avx512-parserange.Tpo -c -o gmap_avx512-parserange.obj `if test -f 'parserange.c'; then $(CYGPATH_W) 'parserange.c'; else $(CYGPATH_W) '$(srcdir)/parserange.c'; fi` +@am__fastdepCC_TRUE@ $(AM_V_at)$(am__mv) $(DEPDIR)/gmap_avx512-parserange.Tpo $(DEPDIR)/gmap_avx512-parserange.Po +@AMDEP_TRUE@@am__fastdepCC_FALSE@ $(AM_V_CC)source='parserange.c' object='gmap_avx512-parserange.obj' libtool=no @AMDEPBACKSLASH@ +@AMDEP_TRUE@@am__fastdepCC_FALSE@ DEPDIR=$(DEPDIR) $(CCDEPMODE) $(depcomp) @AMDEPBACKSLASH@ +@am__fastdepCC_FALSE@ $(AM_V_CC@am__nodep@)$(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) $(gmap_avx512_CFLAGS) $(CFLAGS) -c -o gmap_avx512-parserange.obj `if test -f 'parserange.c'; then $(CYGPATH_W) 'parserange.c'; else $(CYGPATH_W) '$(srcdir)/parserange.c'; fi` + +gmap_avx512-getopt.o: getopt.c +@am__fastdepCC_TRUE@ $(AM_V_CC)$(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) $(gmap_avx512_CFLAGS) $(CFLAGS) -MT gmap_avx512-getopt.o -MD -MP -MF $(DEPDIR)/gmap_avx512-getopt.Tpo -c -o gmap_avx512-getopt.o `test -f 'getopt.c' || echo '$(srcdir)/'`getopt.c +@am__fastdepCC_TRUE@ $(AM_V_at)$(am__mv) $(DEPDIR)/gmap_avx512-getopt.Tpo $(DEPDIR)/gmap_avx512-getopt.Po +@AMDEP_TRUE@@am__fastdepCC_FALSE@ $(AM_V_CC)source='getopt.c' object='gmap_avx512-getopt.o' libtool=no @AMDEPBACKSLASH@ +@AMDEP_TRUE@@am__fastdepCC_FALSE@ DEPDIR=$(DEPDIR) $(CCDEPMODE) $(depcomp) @AMDEPBACKSLASH@ +@am__fastdepCC_FALSE@ $(AM_V_CC@am__nodep@)$(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) $(gmap_avx512_CFLAGS) $(CFLAGS) -c -o gmap_avx512-getopt.o `test -f 'getopt.c' || echo '$(srcdir)/'`getopt.c + +gmap_avx512-getopt.obj: getopt.c +@am__fastdepCC_TRUE@ $(AM_V_CC)$(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) $(gmap_avx512_CFLAGS) $(CFLAGS) -MT gmap_avx512-getopt.obj -MD -MP -MF $(DEPDIR)/gmap_avx512-getopt.Tpo -c -o gmap_avx512-getopt.obj `if test -f 'getopt.c'; then $(CYGPATH_W) 'getopt.c'; else $(CYGPATH_W) '$(srcdir)/getopt.c'; fi` +@am__fastdepCC_TRUE@ $(AM_V_at)$(am__mv) $(DEPDIR)/gmap_avx512-getopt.Tpo $(DEPDIR)/gmap_avx512-getopt.Po +@AMDEP_TRUE@@am__fastdepCC_FALSE@ $(AM_V_CC)source='getopt.c' object='gmap_avx512-getopt.obj' libtool=no @AMDEPBACKSLASH@ +@AMDEP_TRUE@@am__fastdepCC_FALSE@ DEPDIR=$(DEPDIR) $(CCDEPMODE) $(depcomp) @AMDEPBACKSLASH@ +@am__fastdepCC_FALSE@ $(AM_V_CC@am__nodep@)$(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) $(gmap_avx512_CFLAGS) $(CFLAGS) -c -o gmap_avx512-getopt.obj `if test -f 'getopt.c'; then $(CYGPATH_W) 'getopt.c'; else $(CYGPATH_W) '$(srcdir)/getopt.c'; fi` + +gmap_avx512-getopt1.o: getopt1.c +@am__fastdepCC_TRUE@ $(AM_V_CC)$(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) $(gmap_avx512_CFLAGS) $(CFLAGS) -MT gmap_avx512-getopt1.o -MD -MP -MF $(DEPDIR)/gmap_avx512-getopt1.Tpo -c -o gmap_avx512-getopt1.o `test -f 'getopt1.c' || echo '$(srcdir)/'`getopt1.c +@am__fastdepCC_TRUE@ $(AM_V_at)$(am__mv) $(DEPDIR)/gmap_avx512-getopt1.Tpo $(DEPDIR)/gmap_avx512-getopt1.Po +@AMDEP_TRUE@@am__fastdepCC_FALSE@ $(AM_V_CC)source='getopt1.c' object='gmap_avx512-getopt1.o' libtool=no @AMDEPBACKSLASH@ +@AMDEP_TRUE@@am__fastdepCC_FALSE@ DEPDIR=$(DEPDIR) $(CCDEPMODE) $(depcomp) @AMDEPBACKSLASH@ +@am__fastdepCC_FALSE@ $(AM_V_CC@am__nodep@)$(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) $(gmap_avx512_CFLAGS) $(CFLAGS) -c -o gmap_avx512-getopt1.o `test -f 'getopt1.c' || echo '$(srcdir)/'`getopt1.c + +gmap_avx512-getopt1.obj: getopt1.c +@am__fastdepCC_TRUE@ $(AM_V_CC)$(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) $(gmap_avx512_CFLAGS) $(CFLAGS) -MT gmap_avx512-getopt1.obj -MD -MP -MF $(DEPDIR)/gmap_avx512-getopt1.Tpo -c -o gmap_avx512-getopt1.obj `if test -f 'getopt1.c'; then $(CYGPATH_W) 'getopt1.c'; else $(CYGPATH_W) '$(srcdir)/getopt1.c'; fi` +@am__fastdepCC_TRUE@ $(AM_V_at)$(am__mv) $(DEPDIR)/gmap_avx512-getopt1.Tpo $(DEPDIR)/gmap_avx512-getopt1.Po +@AMDEP_TRUE@@am__fastdepCC_FALSE@ $(AM_V_CC)source='getopt1.c' object='gmap_avx512-getopt1.obj' libtool=no @AMDEPBACKSLASH@ +@AMDEP_TRUE@@am__fastdepCC_FALSE@ DEPDIR=$(DEPDIR) $(CCDEPMODE) $(depcomp) @AMDEPBACKSLASH@ +@am__fastdepCC_FALSE@ $(AM_V_CC@am__nodep@)$(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) $(gmap_avx512_CFLAGS) $(CFLAGS) -c -o gmap_avx512-getopt1.obj `if test -f 'getopt1.c'; then $(CYGPATH_W) 'getopt1.c'; else $(CYGPATH_W) '$(srcdir)/getopt1.c'; fi` + +gmap_avx512-gmap.o: gmap.c +@am__fastdepCC_TRUE@ $(AM_V_CC)$(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) $(gmap_avx512_CFLAGS) $(CFLAGS) -MT gmap_avx512-gmap.o -MD -MP -MF $(DEPDIR)/gmap_avx512-gmap.Tpo -c -o gmap_avx512-gmap.o `test -f 'gmap.c' || echo '$(srcdir)/'`gmap.c +@am__fastdepCC_TRUE@ $(AM_V_at)$(am__mv) $(DEPDIR)/gmap_avx512-gmap.Tpo $(DEPDIR)/gmap_avx512-gmap.Po +@AMDEP_TRUE@@am__fastdepCC_FALSE@ $(AM_V_CC)source='gmap.c' object='gmap_avx512-gmap.o' libtool=no @AMDEPBACKSLASH@ +@AMDEP_TRUE@@am__fastdepCC_FALSE@ DEPDIR=$(DEPDIR) $(CCDEPMODE) $(depcomp) @AMDEPBACKSLASH@ +@am__fastdepCC_FALSE@ $(AM_V_CC@am__nodep@)$(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) $(gmap_avx512_CFLAGS) $(CFLAGS) -c -o gmap_avx512-gmap.o `test -f 'gmap.c' || echo '$(srcdir)/'`gmap.c + +gmap_avx512-gmap.obj: gmap.c +@am__fastdepCC_TRUE@ $(AM_V_CC)$(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) $(gmap_avx512_CFLAGS) $(CFLAGS) -MT gmap_avx512-gmap.obj -MD -MP -MF $(DEPDIR)/gmap_avx512-gmap.Tpo -c -o gmap_avx512-gmap.obj `if test -f 'gmap.c'; then $(CYGPATH_W) 'gmap.c'; else $(CYGPATH_W) '$(srcdir)/gmap.c'; fi` +@am__fastdepCC_TRUE@ $(AM_V_at)$(am__mv) $(DEPDIR)/gmap_avx512-gmap.Tpo $(DEPDIR)/gmap_avx512-gmap.Po +@AMDEP_TRUE@@am__fastdepCC_FALSE@ $(AM_V_CC)source='gmap.c' object='gmap_avx512-gmap.obj' libtool=no @AMDEPBACKSLASH@ +@AMDEP_TRUE@@am__fastdepCC_FALSE@ DEPDIR=$(DEPDIR) $(CCDEPMODE) $(depcomp) @AMDEPBACKSLASH@ +@am__fastdepCC_FALSE@ $(AM_V_CC@am__nodep@)$(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) $(gmap_avx512_CFLAGS) $(CFLAGS) -c -o gmap_avx512-gmap.obj `if test -f 'gmap.c'; then $(CYGPATH_W) 'gmap.c'; else $(CYGPATH_W) '$(srcdir)/gmap.c'; fi` + gmap_nosimd-except.o: except.c @am__fastdepCC_TRUE@ $(AM_V_CC)$(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) $(gmap_nosimd_CFLAGS) $(CFLAGS) -MT gmap_nosimd-except.o -MD -MP -MF $(DEPDIR)/gmap_nosimd-except.Tpo -c -o gmap_nosimd-except.o `test -f 'except.c' || echo '$(srcdir)/'`except.c @am__fastdepCC_TRUE@ $(AM_V_at)$(am__mv) $(DEPDIR)/gmap_nosimd-except.Tpo $(DEPDIR)/gmap_nosimd-except.Po @@ -8994,6 +10980,20 @@ @AMDEP_TRUE@@am__fastdepCC_FALSE@ DEPDIR=$(DEPDIR) $(CCDEPMODE) $(depcomp) @AMDEPBACKSLASH@ @am__fastdepCC_FALSE@ $(AM_V_CC@am__nodep@)$(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) $(gmap_nosimd_CFLAGS) $(CFLAGS) -c -o gmap_nosimd-bitpack64-readtwo.obj `if test -f 'bitpack64-readtwo.c'; then $(CYGPATH_W) 'bitpack64-readtwo.c'; else $(CYGPATH_W) '$(srcdir)/bitpack64-readtwo.c'; fi` +gmap_nosimd-merge.o: merge.c +@am__fastdepCC_TRUE@ $(AM_V_CC)$(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) $(gmap_nosimd_CFLAGS) $(CFLAGS) -MT gmap_nosimd-merge.o -MD -MP -MF $(DEPDIR)/gmap_nosimd-merge.Tpo -c -o gmap_nosimd-merge.o `test -f 'merge.c' || echo '$(srcdir)/'`merge.c +@am__fastdepCC_TRUE@ $(AM_V_at)$(am__mv) $(DEPDIR)/gmap_nosimd-merge.Tpo $(DEPDIR)/gmap_nosimd-merge.Po +@AMDEP_TRUE@@am__fastdepCC_FALSE@ $(AM_V_CC)source='merge.c' object='gmap_nosimd-merge.o' libtool=no @AMDEPBACKSLASH@ +@AMDEP_TRUE@@am__fastdepCC_FALSE@ DEPDIR=$(DEPDIR) $(CCDEPMODE) $(depcomp) @AMDEPBACKSLASH@ +@am__fastdepCC_FALSE@ $(AM_V_CC@am__nodep@)$(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) $(gmap_nosimd_CFLAGS) $(CFLAGS) -c -o gmap_nosimd-merge.o `test -f 'merge.c' || echo '$(srcdir)/'`merge.c + +gmap_nosimd-merge.obj: merge.c +@am__fastdepCC_TRUE@ $(AM_V_CC)$(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) $(gmap_nosimd_CFLAGS) $(CFLAGS) -MT gmap_nosimd-merge.obj -MD -MP -MF $(DEPDIR)/gmap_nosimd-merge.Tpo -c -o gmap_nosimd-merge.obj `if test -f 'merge.c'; then $(CYGPATH_W) 'merge.c'; else $(CYGPATH_W) '$(srcdir)/merge.c'; fi` +@am__fastdepCC_TRUE@ $(AM_V_at)$(am__mv) $(DEPDIR)/gmap_nosimd-merge.Tpo $(DEPDIR)/gmap_nosimd-merge.Po +@AMDEP_TRUE@@am__fastdepCC_FALSE@ $(AM_V_CC)source='merge.c' object='gmap_nosimd-merge.obj' libtool=no @AMDEPBACKSLASH@ +@AMDEP_TRUE@@am__fastdepCC_FALSE@ DEPDIR=$(DEPDIR) $(CCDEPMODE) $(depcomp) @AMDEPBACKSLASH@ +@am__fastdepCC_FALSE@ $(AM_V_CC@am__nodep@)$(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) $(gmap_nosimd_CFLAGS) $(CFLAGS) -c -o gmap_nosimd-merge.obj `if test -f 'merge.c'; then $(CYGPATH_W) 'merge.c'; else $(CYGPATH_W) '$(srcdir)/merge.c'; fi` + gmap_nosimd-indexdb.o: indexdb.c @am__fastdepCC_TRUE@ $(AM_V_CC)$(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) $(gmap_nosimd_CFLAGS) $(CFLAGS) -MT gmap_nosimd-indexdb.o -MD -MP -MF $(DEPDIR)/gmap_nosimd-indexdb.Tpo -c -o gmap_nosimd-indexdb.o `test -f 'indexdb.c' || echo '$(srcdir)/'`indexdb.c @am__fastdepCC_TRUE@ $(AM_V_at)$(am__mv) $(DEPDIR)/gmap_nosimd-indexdb.Tpo $(DEPDIR)/gmap_nosimd-indexdb.Po @@ -10184,6 +12184,20 @@ @AMDEP_TRUE@@am__fastdepCC_FALSE@ DEPDIR=$(DEPDIR) $(CCDEPMODE) $(depcomp) @AMDEPBACKSLASH@ @am__fastdepCC_FALSE@ $(AM_V_CC@am__nodep@)$(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) $(gmap_sse2_CFLAGS) $(CFLAGS) -c -o gmap_sse2-bitpack64-readtwo.obj `if test -f 'bitpack64-readtwo.c'; then $(CYGPATH_W) 'bitpack64-readtwo.c'; else $(CYGPATH_W) '$(srcdir)/bitpack64-readtwo.c'; fi` +gmap_sse2-merge.o: merge.c +@am__fastdepCC_TRUE@ $(AM_V_CC)$(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) $(gmap_sse2_CFLAGS) $(CFLAGS) -MT gmap_sse2-merge.o -MD -MP -MF $(DEPDIR)/gmap_sse2-merge.Tpo -c -o gmap_sse2-merge.o `test -f 'merge.c' || echo '$(srcdir)/'`merge.c +@am__fastdepCC_TRUE@ $(AM_V_at)$(am__mv) $(DEPDIR)/gmap_sse2-merge.Tpo $(DEPDIR)/gmap_sse2-merge.Po +@AMDEP_TRUE@@am__fastdepCC_FALSE@ $(AM_V_CC)source='merge.c' object='gmap_sse2-merge.o' libtool=no @AMDEPBACKSLASH@ +@AMDEP_TRUE@@am__fastdepCC_FALSE@ DEPDIR=$(DEPDIR) $(CCDEPMODE) $(depcomp) @AMDEPBACKSLASH@ +@am__fastdepCC_FALSE@ $(AM_V_CC@am__nodep@)$(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) $(gmap_sse2_CFLAGS) $(CFLAGS) -c -o gmap_sse2-merge.o `test -f 'merge.c' || echo '$(srcdir)/'`merge.c + +gmap_sse2-merge.obj: merge.c +@am__fastdepCC_TRUE@ $(AM_V_CC)$(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) $(gmap_sse2_CFLAGS) $(CFLAGS) -MT gmap_sse2-merge.obj -MD -MP -MF $(DEPDIR)/gmap_sse2-merge.Tpo -c -o gmap_sse2-merge.obj `if test -f 'merge.c'; then $(CYGPATH_W) 'merge.c'; else $(CYGPATH_W) '$(srcdir)/merge.c'; fi` +@am__fastdepCC_TRUE@ $(AM_V_at)$(am__mv) $(DEPDIR)/gmap_sse2-merge.Tpo $(DEPDIR)/gmap_sse2-merge.Po +@AMDEP_TRUE@@am__fastdepCC_FALSE@ $(AM_V_CC)source='merge.c' object='gmap_sse2-merge.obj' libtool=no @AMDEPBACKSLASH@ +@AMDEP_TRUE@@am__fastdepCC_FALSE@ DEPDIR=$(DEPDIR) $(CCDEPMODE) $(depcomp) @AMDEPBACKSLASH@ +@am__fastdepCC_FALSE@ $(AM_V_CC@am__nodep@)$(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) $(gmap_sse2_CFLAGS) $(CFLAGS) -c -o gmap_sse2-merge.obj `if test -f 'merge.c'; then $(CYGPATH_W) 'merge.c'; else $(CYGPATH_W) '$(srcdir)/merge.c'; fi` + gmap_sse2-indexdb.o: indexdb.c @am__fastdepCC_TRUE@ $(AM_V_CC)$(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) $(gmap_sse2_CFLAGS) $(CFLAGS) -MT gmap_sse2-indexdb.o -MD -MP -MF $(DEPDIR)/gmap_sse2-indexdb.Tpo -c -o gmap_sse2-indexdb.o `test -f 'indexdb.c' || echo '$(srcdir)/'`indexdb.c @am__fastdepCC_TRUE@ $(AM_V_at)$(am__mv) $(DEPDIR)/gmap_sse2-indexdb.Tpo $(DEPDIR)/gmap_sse2-indexdb.Po @@ -11374,6 +13388,20 @@ @AMDEP_TRUE@@am__fastdepCC_FALSE@ DEPDIR=$(DEPDIR) $(CCDEPMODE) $(depcomp) @AMDEPBACKSLASH@ @am__fastdepCC_FALSE@ $(AM_V_CC@am__nodep@)$(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) $(gmap_sse41_CFLAGS) $(CFLAGS) -c -o gmap_sse41-bitpack64-readtwo.obj `if test -f 'bitpack64-readtwo.c'; then $(CYGPATH_W) 'bitpack64-readtwo.c'; else $(CYGPATH_W) '$(srcdir)/bitpack64-readtwo.c'; fi` +gmap_sse41-merge.o: merge.c +@am__fastdepCC_TRUE@ $(AM_V_CC)$(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) $(gmap_sse41_CFLAGS) $(CFLAGS) -MT gmap_sse41-merge.o -MD -MP -MF $(DEPDIR)/gmap_sse41-merge.Tpo -c -o gmap_sse41-merge.o `test -f 'merge.c' || echo '$(srcdir)/'`merge.c +@am__fastdepCC_TRUE@ $(AM_V_at)$(am__mv) $(DEPDIR)/gmap_sse41-merge.Tpo $(DEPDIR)/gmap_sse41-merge.Po +@AMDEP_TRUE@@am__fastdepCC_FALSE@ $(AM_V_CC)source='merge.c' object='gmap_sse41-merge.o' libtool=no @AMDEPBACKSLASH@ +@AMDEP_TRUE@@am__fastdepCC_FALSE@ DEPDIR=$(DEPDIR) $(CCDEPMODE) $(depcomp) @AMDEPBACKSLASH@ +@am__fastdepCC_FALSE@ $(AM_V_CC@am__nodep@)$(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) $(gmap_sse41_CFLAGS) $(CFLAGS) -c -o gmap_sse41-merge.o `test -f 'merge.c' || echo '$(srcdir)/'`merge.c + +gmap_sse41-merge.obj: merge.c +@am__fastdepCC_TRUE@ $(AM_V_CC)$(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) $(gmap_sse41_CFLAGS) $(CFLAGS) -MT gmap_sse41-merge.obj -MD -MP -MF $(DEPDIR)/gmap_sse41-merge.Tpo -c -o gmap_sse41-merge.obj `if test -f 'merge.c'; then $(CYGPATH_W) 'merge.c'; else $(CYGPATH_W) '$(srcdir)/merge.c'; fi` +@am__fastdepCC_TRUE@ $(AM_V_at)$(am__mv) $(DEPDIR)/gmap_sse41-merge.Tpo $(DEPDIR)/gmap_sse41-merge.Po +@AMDEP_TRUE@@am__fastdepCC_FALSE@ $(AM_V_CC)source='merge.c' object='gmap_sse41-merge.obj' libtool=no @AMDEPBACKSLASH@ +@AMDEP_TRUE@@am__fastdepCC_FALSE@ DEPDIR=$(DEPDIR) $(CCDEPMODE) $(depcomp) @AMDEPBACKSLASH@ +@am__fastdepCC_FALSE@ $(AM_V_CC@am__nodep@)$(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) $(gmap_sse41_CFLAGS) $(CFLAGS) -c -o gmap_sse41-merge.obj `if test -f 'merge.c'; then $(CYGPATH_W) 'merge.c'; else $(CYGPATH_W) '$(srcdir)/merge.c'; fi` + gmap_sse41-indexdb.o: indexdb.c @am__fastdepCC_TRUE@ $(AM_V_CC)$(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) $(gmap_sse41_CFLAGS) $(CFLAGS) -MT gmap_sse41-indexdb.o -MD -MP -MF $(DEPDIR)/gmap_sse41-indexdb.Tpo -c -o gmap_sse41-indexdb.o `test -f 'indexdb.c' || echo '$(srcdir)/'`indexdb.c @am__fastdepCC_TRUE@ $(AM_V_at)$(am__mv) $(DEPDIR)/gmap_sse41-indexdb.Tpo $(DEPDIR)/gmap_sse41-indexdb.Po @@ -12564,6 +14592,20 @@ @AMDEP_TRUE@@am__fastdepCC_FALSE@ DEPDIR=$(DEPDIR) $(CCDEPMODE) $(depcomp) @AMDEPBACKSLASH@ @am__fastdepCC_FALSE@ $(AM_V_CC@am__nodep@)$(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) $(gmap_sse42_CFLAGS) $(CFLAGS) -c -o gmap_sse42-bitpack64-readtwo.obj `if test -f 'bitpack64-readtwo.c'; then $(CYGPATH_W) 'bitpack64-readtwo.c'; else $(CYGPATH_W) '$(srcdir)/bitpack64-readtwo.c'; fi` +gmap_sse42-merge.o: merge.c +@am__fastdepCC_TRUE@ $(AM_V_CC)$(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) $(gmap_sse42_CFLAGS) $(CFLAGS) -MT gmap_sse42-merge.o -MD -MP -MF $(DEPDIR)/gmap_sse42-merge.Tpo -c -o gmap_sse42-merge.o `test -f 'merge.c' || echo '$(srcdir)/'`merge.c +@am__fastdepCC_TRUE@ $(AM_V_at)$(am__mv) $(DEPDIR)/gmap_sse42-merge.Tpo $(DEPDIR)/gmap_sse42-merge.Po +@AMDEP_TRUE@@am__fastdepCC_FALSE@ $(AM_V_CC)source='merge.c' object='gmap_sse42-merge.o' libtool=no @AMDEPBACKSLASH@ +@AMDEP_TRUE@@am__fastdepCC_FALSE@ DEPDIR=$(DEPDIR) $(CCDEPMODE) $(depcomp) @AMDEPBACKSLASH@ +@am__fastdepCC_FALSE@ $(AM_V_CC@am__nodep@)$(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) $(gmap_sse42_CFLAGS) $(CFLAGS) -c -o gmap_sse42-merge.o `test -f 'merge.c' || echo '$(srcdir)/'`merge.c + +gmap_sse42-merge.obj: merge.c +@am__fastdepCC_TRUE@ $(AM_V_CC)$(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) $(gmap_sse42_CFLAGS) $(CFLAGS) -MT gmap_sse42-merge.obj -MD -MP -MF $(DEPDIR)/gmap_sse42-merge.Tpo -c -o gmap_sse42-merge.obj `if test -f 'merge.c'; then $(CYGPATH_W) 'merge.c'; else $(CYGPATH_W) '$(srcdir)/merge.c'; fi` +@am__fastdepCC_TRUE@ $(AM_V_at)$(am__mv) $(DEPDIR)/gmap_sse42-merge.Tpo $(DEPDIR)/gmap_sse42-merge.Po +@AMDEP_TRUE@@am__fastdepCC_FALSE@ $(AM_V_CC)source='merge.c' object='gmap_sse42-merge.obj' libtool=no @AMDEPBACKSLASH@ +@AMDEP_TRUE@@am__fastdepCC_FALSE@ DEPDIR=$(DEPDIR) $(CCDEPMODE) $(depcomp) @AMDEPBACKSLASH@ +@am__fastdepCC_FALSE@ $(AM_V_CC@am__nodep@)$(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) $(gmap_sse42_CFLAGS) $(CFLAGS) -c -o gmap_sse42-merge.obj `if test -f 'merge.c'; then $(CYGPATH_W) 'merge.c'; else $(CYGPATH_W) '$(srcdir)/merge.c'; fi` + gmap_sse42-indexdb.o: indexdb.c @am__fastdepCC_TRUE@ $(AM_V_CC)$(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) $(gmap_sse42_CFLAGS) $(CFLAGS) -MT gmap_sse42-indexdb.o -MD -MP -MF $(DEPDIR)/gmap_sse42-indexdb.Tpo -c -o gmap_sse42-indexdb.o `test -f 'indexdb.c' || echo '$(srcdir)/'`indexdb.c @am__fastdepCC_TRUE@ $(AM_V_at)$(am__mv) $(DEPDIR)/gmap_sse42-indexdb.Tpo $(DEPDIR)/gmap_sse42-indexdb.Po @@ -13754,6 +15796,20 @@ @AMDEP_TRUE@@am__fastdepCC_FALSE@ DEPDIR=$(DEPDIR) $(CCDEPMODE) $(depcomp) @AMDEPBACKSLASH@ @am__fastdepCC_FALSE@ $(AM_V_CC@am__nodep@)$(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) $(gmap_ssse3_CFLAGS) $(CFLAGS) -c -o gmap_ssse3-bitpack64-readtwo.obj `if test -f 'bitpack64-readtwo.c'; then $(CYGPATH_W) 'bitpack64-readtwo.c'; else $(CYGPATH_W) '$(srcdir)/bitpack64-readtwo.c'; fi` +gmap_ssse3-merge.o: merge.c +@am__fastdepCC_TRUE@ $(AM_V_CC)$(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) $(gmap_ssse3_CFLAGS) $(CFLAGS) -MT gmap_ssse3-merge.o -MD -MP -MF $(DEPDIR)/gmap_ssse3-merge.Tpo -c -o gmap_ssse3-merge.o `test -f 'merge.c' || echo '$(srcdir)/'`merge.c +@am__fastdepCC_TRUE@ $(AM_V_at)$(am__mv) $(DEPDIR)/gmap_ssse3-merge.Tpo $(DEPDIR)/gmap_ssse3-merge.Po +@AMDEP_TRUE@@am__fastdepCC_FALSE@ $(AM_V_CC)source='merge.c' object='gmap_ssse3-merge.o' libtool=no @AMDEPBACKSLASH@ +@AMDEP_TRUE@@am__fastdepCC_FALSE@ DEPDIR=$(DEPDIR) $(CCDEPMODE) $(depcomp) @AMDEPBACKSLASH@ +@am__fastdepCC_FALSE@ $(AM_V_CC@am__nodep@)$(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) $(gmap_ssse3_CFLAGS) $(CFLAGS) -c -o gmap_ssse3-merge.o `test -f 'merge.c' || echo '$(srcdir)/'`merge.c + +gmap_ssse3-merge.obj: merge.c +@am__fastdepCC_TRUE@ $(AM_V_CC)$(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) $(gmap_ssse3_CFLAGS) $(CFLAGS) -MT gmap_ssse3-merge.obj -MD -MP -MF $(DEPDIR)/gmap_ssse3-merge.Tpo -c -o gmap_ssse3-merge.obj `if test -f 'merge.c'; then $(CYGPATH_W) 'merge.c'; else $(CYGPATH_W) '$(srcdir)/merge.c'; fi` +@am__fastdepCC_TRUE@ $(AM_V_at)$(am__mv) $(DEPDIR)/gmap_ssse3-merge.Tpo $(DEPDIR)/gmap_ssse3-merge.Po +@AMDEP_TRUE@@am__fastdepCC_FALSE@ $(AM_V_CC)source='merge.c' object='gmap_ssse3-merge.obj' libtool=no @AMDEPBACKSLASH@ +@AMDEP_TRUE@@am__fastdepCC_FALSE@ DEPDIR=$(DEPDIR) $(CCDEPMODE) $(depcomp) @AMDEPBACKSLASH@ +@am__fastdepCC_FALSE@ $(AM_V_CC@am__nodep@)$(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) $(gmap_ssse3_CFLAGS) $(CFLAGS) -c -o gmap_ssse3-merge.obj `if test -f 'merge.c'; then $(CYGPATH_W) 'merge.c'; else $(CYGPATH_W) '$(srcdir)/merge.c'; fi` + gmap_ssse3-indexdb.o: indexdb.c @am__fastdepCC_TRUE@ $(AM_V_CC)$(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) $(gmap_ssse3_CFLAGS) $(CFLAGS) -MT gmap_ssse3-indexdb.o -MD -MP -MF $(DEPDIR)/gmap_ssse3-indexdb.Tpo -c -o gmap_ssse3-indexdb.o `test -f 'indexdb.c' || echo '$(srcdir)/'`indexdb.c @am__fastdepCC_TRUE@ $(AM_V_at)$(am__mv) $(DEPDIR)/gmap_ssse3-indexdb.Tpo $(DEPDIR)/gmap_ssse3-indexdb.Po @@ -15630,6 +17686,20 @@ @AMDEP_TRUE@@am__fastdepCC_FALSE@ DEPDIR=$(DEPDIR) $(CCDEPMODE) $(depcomp) @AMDEPBACKSLASH@ @am__fastdepCC_FALSE@ $(AM_V_CC@am__nodep@)$(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) $(gmapl_avx2_CFLAGS) $(CFLAGS) -c -o gmapl_avx2-bitpack64-readtwo.obj `if test -f 'bitpack64-readtwo.c'; then $(CYGPATH_W) 'bitpack64-readtwo.c'; else $(CYGPATH_W) '$(srcdir)/bitpack64-readtwo.c'; fi` +gmapl_avx2-merge.o: merge.c +@am__fastdepCC_TRUE@ $(AM_V_CC)$(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) $(gmapl_avx2_CFLAGS) $(CFLAGS) -MT gmapl_avx2-merge.o -MD -MP -MF $(DEPDIR)/gmapl_avx2-merge.Tpo -c -o gmapl_avx2-merge.o `test -f 'merge.c' || echo '$(srcdir)/'`merge.c +@am__fastdepCC_TRUE@ $(AM_V_at)$(am__mv) $(DEPDIR)/gmapl_avx2-merge.Tpo $(DEPDIR)/gmapl_avx2-merge.Po +@AMDEP_TRUE@@am__fastdepCC_FALSE@ $(AM_V_CC)source='merge.c' object='gmapl_avx2-merge.o' libtool=no @AMDEPBACKSLASH@ +@AMDEP_TRUE@@am__fastdepCC_FALSE@ DEPDIR=$(DEPDIR) $(CCDEPMODE) $(depcomp) @AMDEPBACKSLASH@ +@am__fastdepCC_FALSE@ $(AM_V_CC@am__nodep@)$(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) $(gmapl_avx2_CFLAGS) $(CFLAGS) -c -o gmapl_avx2-merge.o `test -f 'merge.c' || echo '$(srcdir)/'`merge.c + +gmapl_avx2-merge.obj: merge.c +@am__fastdepCC_TRUE@ $(AM_V_CC)$(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) $(gmapl_avx2_CFLAGS) $(CFLAGS) -MT gmapl_avx2-merge.obj -MD -MP -MF $(DEPDIR)/gmapl_avx2-merge.Tpo -c -o gmapl_avx2-merge.obj `if test -f 'merge.c'; then $(CYGPATH_W) 'merge.c'; else $(CYGPATH_W) '$(srcdir)/merge.c'; fi` +@am__fastdepCC_TRUE@ $(AM_V_at)$(am__mv) $(DEPDIR)/gmapl_avx2-merge.Tpo $(DEPDIR)/gmapl_avx2-merge.Po +@AMDEP_TRUE@@am__fastdepCC_FALSE@ $(AM_V_CC)source='merge.c' object='gmapl_avx2-merge.obj' libtool=no @AMDEPBACKSLASH@ +@AMDEP_TRUE@@am__fastdepCC_FALSE@ DEPDIR=$(DEPDIR) $(CCDEPMODE) $(depcomp) @AMDEPBACKSLASH@ +@am__fastdepCC_FALSE@ $(AM_V_CC@am__nodep@)$(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) $(gmapl_avx2_CFLAGS) $(CFLAGS) -c -o gmapl_avx2-merge.obj `if test -f 'merge.c'; then $(CYGPATH_W) 'merge.c'; else $(CYGPATH_W) '$(srcdir)/merge.c'; fi` + gmapl_avx2-indexdb.o: indexdb.c @am__fastdepCC_TRUE@ $(AM_V_CC)$(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) $(gmapl_avx2_CFLAGS) $(CFLAGS) -MT gmapl_avx2-indexdb.o -MD -MP -MF $(DEPDIR)/gmapl_avx2-indexdb.Tpo -c -o gmapl_avx2-indexdb.o `test -f 'indexdb.c' || echo '$(srcdir)/'`indexdb.c @am__fastdepCC_TRUE@ $(AM_V_at)$(am__mv) $(DEPDIR)/gmapl_avx2-indexdb.Tpo $(DEPDIR)/gmapl_avx2-indexdb.Po @@ -16386,210 +18456,1428 @@ @AMDEP_TRUE@@am__fastdepCC_FALSE@ DEPDIR=$(DEPDIR) $(CCDEPMODE) $(depcomp) @AMDEPBACKSLASH@ @am__fastdepCC_FALSE@ $(AM_V_CC@am__nodep@)$(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) $(gmapl_avx2_CFLAGS) $(CFLAGS) -c -o gmapl_avx2-gmap.obj `if test -f 'gmap.c'; then $(CYGPATH_W) 'gmap.c'; else $(CYGPATH_W) '$(srcdir)/gmap.c'; fi` -gmapl_nosimd-except.o: except.c -@am__fastdepCC_TRUE@ $(AM_V_CC)$(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) $(gmapl_nosimd_CFLAGS) $(CFLAGS) -MT gmapl_nosimd-except.o -MD -MP -MF $(DEPDIR)/gmapl_nosimd-except.Tpo -c -o gmapl_nosimd-except.o `test -f 'except.c' || echo '$(srcdir)/'`except.c -@am__fastdepCC_TRUE@ $(AM_V_at)$(am__mv) $(DEPDIR)/gmapl_nosimd-except.Tpo $(DEPDIR)/gmapl_nosimd-except.Po -@AMDEP_TRUE@@am__fastdepCC_FALSE@ $(AM_V_CC)source='except.c' object='gmapl_nosimd-except.o' libtool=no @AMDEPBACKSLASH@ +gmapl_avx512-except.o: except.c +@am__fastdepCC_TRUE@ $(AM_V_CC)$(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) $(gmapl_avx512_CFLAGS) $(CFLAGS) -MT gmapl_avx512-except.o -MD -MP -MF $(DEPDIR)/gmapl_avx512-except.Tpo -c -o gmapl_avx512-except.o `test -f 'except.c' || echo '$(srcdir)/'`except.c +@am__fastdepCC_TRUE@ $(AM_V_at)$(am__mv) $(DEPDIR)/gmapl_avx512-except.Tpo $(DEPDIR)/gmapl_avx512-except.Po +@AMDEP_TRUE@@am__fastdepCC_FALSE@ $(AM_V_CC)source='except.c' object='gmapl_avx512-except.o' libtool=no @AMDEPBACKSLASH@ @AMDEP_TRUE@@am__fastdepCC_FALSE@ DEPDIR=$(DEPDIR) $(CCDEPMODE) $(depcomp) @AMDEPBACKSLASH@ -@am__fastdepCC_FALSE@ $(AM_V_CC@am__nodep@)$(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) $(gmapl_nosimd_CFLAGS) $(CFLAGS) -c -o gmapl_nosimd-except.o `test -f 'except.c' || echo '$(srcdir)/'`except.c +@am__fastdepCC_FALSE@ $(AM_V_CC@am__nodep@)$(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) $(gmapl_avx512_CFLAGS) $(CFLAGS) -c -o gmapl_avx512-except.o `test -f 'except.c' || echo '$(srcdir)/'`except.c -gmapl_nosimd-except.obj: except.c -@am__fastdepCC_TRUE@ $(AM_V_CC)$(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) $(gmapl_nosimd_CFLAGS) $(CFLAGS) -MT gmapl_nosimd-except.obj -MD -MP -MF $(DEPDIR)/gmapl_nosimd-except.Tpo -c -o gmapl_nosimd-except.obj `if test -f 'except.c'; then $(CYGPATH_W) 'except.c'; else $(CYGPATH_W) '$(srcdir)/except.c'; fi` -@am__fastdepCC_TRUE@ $(AM_V_at)$(am__mv) $(DEPDIR)/gmapl_nosimd-except.Tpo $(DEPDIR)/gmapl_nosimd-except.Po -@AMDEP_TRUE@@am__fastdepCC_FALSE@ $(AM_V_CC)source='except.c' object='gmapl_nosimd-except.obj' libtool=no @AMDEPBACKSLASH@ +gmapl_avx512-except.obj: except.c +@am__fastdepCC_TRUE@ $(AM_V_CC)$(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) $(gmapl_avx512_CFLAGS) $(CFLAGS) -MT gmapl_avx512-except.obj -MD -MP -MF $(DEPDIR)/gmapl_avx512-except.Tpo -c -o gmapl_avx512-except.obj `if test -f 'except.c'; then $(CYGPATH_W) 'except.c'; else $(CYGPATH_W) '$(srcdir)/except.c'; fi` +@am__fastdepCC_TRUE@ $(AM_V_at)$(am__mv) $(DEPDIR)/gmapl_avx512-except.Tpo $(DEPDIR)/gmapl_avx512-except.Po +@AMDEP_TRUE@@am__fastdepCC_FALSE@ $(AM_V_CC)source='except.c' object='gmapl_avx512-except.obj' libtool=no @AMDEPBACKSLASH@ @AMDEP_TRUE@@am__fastdepCC_FALSE@ DEPDIR=$(DEPDIR) $(CCDEPMODE) $(depcomp) @AMDEPBACKSLASH@ -@am__fastdepCC_FALSE@ $(AM_V_CC@am__nodep@)$(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) $(gmapl_nosimd_CFLAGS) $(CFLAGS) -c -o gmapl_nosimd-except.obj `if test -f 'except.c'; then $(CYGPATH_W) 'except.c'; else $(CYGPATH_W) '$(srcdir)/except.c'; fi` +@am__fastdepCC_FALSE@ $(AM_V_CC@am__nodep@)$(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) $(gmapl_avx512_CFLAGS) $(CFLAGS) -c -o gmapl_avx512-except.obj `if test -f 'except.c'; then $(CYGPATH_W) 'except.c'; else $(CYGPATH_W) '$(srcdir)/except.c'; fi` -gmapl_nosimd-assert.o: assert.c -@am__fastdepCC_TRUE@ $(AM_V_CC)$(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) $(gmapl_nosimd_CFLAGS) $(CFLAGS) -MT gmapl_nosimd-assert.o -MD -MP -MF $(DEPDIR)/gmapl_nosimd-assert.Tpo -c -o gmapl_nosimd-assert.o `test -f 'assert.c' || echo '$(srcdir)/'`assert.c -@am__fastdepCC_TRUE@ $(AM_V_at)$(am__mv) $(DEPDIR)/gmapl_nosimd-assert.Tpo $(DEPDIR)/gmapl_nosimd-assert.Po -@AMDEP_TRUE@@am__fastdepCC_FALSE@ $(AM_V_CC)source='assert.c' object='gmapl_nosimd-assert.o' libtool=no @AMDEPBACKSLASH@ +gmapl_avx512-assert.o: assert.c +@am__fastdepCC_TRUE@ $(AM_V_CC)$(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) $(gmapl_avx512_CFLAGS) $(CFLAGS) -MT gmapl_avx512-assert.o -MD -MP -MF $(DEPDIR)/gmapl_avx512-assert.Tpo -c -o gmapl_avx512-assert.o `test -f 'assert.c' || echo '$(srcdir)/'`assert.c +@am__fastdepCC_TRUE@ $(AM_V_at)$(am__mv) $(DEPDIR)/gmapl_avx512-assert.Tpo $(DEPDIR)/gmapl_avx512-assert.Po +@AMDEP_TRUE@@am__fastdepCC_FALSE@ $(AM_V_CC)source='assert.c' object='gmapl_avx512-assert.o' libtool=no @AMDEPBACKSLASH@ @AMDEP_TRUE@@am__fastdepCC_FALSE@ DEPDIR=$(DEPDIR) $(CCDEPMODE) $(depcomp) @AMDEPBACKSLASH@ -@am__fastdepCC_FALSE@ $(AM_V_CC@am__nodep@)$(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) $(gmapl_nosimd_CFLAGS) $(CFLAGS) -c -o gmapl_nosimd-assert.o `test -f 'assert.c' || echo '$(srcdir)/'`assert.c +@am__fastdepCC_FALSE@ $(AM_V_CC@am__nodep@)$(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) $(gmapl_avx512_CFLAGS) $(CFLAGS) -c -o gmapl_avx512-assert.o `test -f 'assert.c' || echo '$(srcdir)/'`assert.c -gmapl_nosimd-assert.obj: assert.c -@am__fastdepCC_TRUE@ $(AM_V_CC)$(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) $(gmapl_nosimd_CFLAGS) $(CFLAGS) -MT gmapl_nosimd-assert.obj -MD -MP -MF $(DEPDIR)/gmapl_nosimd-assert.Tpo -c -o gmapl_nosimd-assert.obj `if test -f 'assert.c'; then $(CYGPATH_W) 'assert.c'; else $(CYGPATH_W) '$(srcdir)/assert.c'; fi` -@am__fastdepCC_TRUE@ $(AM_V_at)$(am__mv) $(DEPDIR)/gmapl_nosimd-assert.Tpo $(DEPDIR)/gmapl_nosimd-assert.Po -@AMDEP_TRUE@@am__fastdepCC_FALSE@ $(AM_V_CC)source='assert.c' object='gmapl_nosimd-assert.obj' libtool=no @AMDEPBACKSLASH@ +gmapl_avx512-assert.obj: assert.c +@am__fastdepCC_TRUE@ $(AM_V_CC)$(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) $(gmapl_avx512_CFLAGS) $(CFLAGS) -MT gmapl_avx512-assert.obj -MD -MP -MF $(DEPDIR)/gmapl_avx512-assert.Tpo -c -o gmapl_avx512-assert.obj `if test -f 'assert.c'; then $(CYGPATH_W) 'assert.c'; else $(CYGPATH_W) '$(srcdir)/assert.c'; fi` +@am__fastdepCC_TRUE@ $(AM_V_at)$(am__mv) $(DEPDIR)/gmapl_avx512-assert.Tpo $(DEPDIR)/gmapl_avx512-assert.Po +@AMDEP_TRUE@@am__fastdepCC_FALSE@ $(AM_V_CC)source='assert.c' object='gmapl_avx512-assert.obj' libtool=no @AMDEPBACKSLASH@ @AMDEP_TRUE@@am__fastdepCC_FALSE@ DEPDIR=$(DEPDIR) $(CCDEPMODE) $(depcomp) @AMDEPBACKSLASH@ -@am__fastdepCC_FALSE@ $(AM_V_CC@am__nodep@)$(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) $(gmapl_nosimd_CFLAGS) $(CFLAGS) -c -o gmapl_nosimd-assert.obj `if test -f 'assert.c'; then $(CYGPATH_W) 'assert.c'; else $(CYGPATH_W) '$(srcdir)/assert.c'; fi` +@am__fastdepCC_FALSE@ $(AM_V_CC@am__nodep@)$(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) $(gmapl_avx512_CFLAGS) $(CFLAGS) -c -o gmapl_avx512-assert.obj `if test -f 'assert.c'; then $(CYGPATH_W) 'assert.c'; else $(CYGPATH_W) '$(srcdir)/assert.c'; fi` -gmapl_nosimd-mem.o: mem.c -@am__fastdepCC_TRUE@ $(AM_V_CC)$(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) $(gmapl_nosimd_CFLAGS) $(CFLAGS) -MT gmapl_nosimd-mem.o -MD -MP -MF $(DEPDIR)/gmapl_nosimd-mem.Tpo -c -o gmapl_nosimd-mem.o `test -f 'mem.c' || echo '$(srcdir)/'`mem.c -@am__fastdepCC_TRUE@ $(AM_V_at)$(am__mv) $(DEPDIR)/gmapl_nosimd-mem.Tpo $(DEPDIR)/gmapl_nosimd-mem.Po -@AMDEP_TRUE@@am__fastdepCC_FALSE@ $(AM_V_CC)source='mem.c' object='gmapl_nosimd-mem.o' libtool=no @AMDEPBACKSLASH@ +gmapl_avx512-mem.o: mem.c +@am__fastdepCC_TRUE@ $(AM_V_CC)$(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) $(gmapl_avx512_CFLAGS) $(CFLAGS) -MT gmapl_avx512-mem.o -MD -MP -MF $(DEPDIR)/gmapl_avx512-mem.Tpo -c -o gmapl_avx512-mem.o `test -f 'mem.c' || echo '$(srcdir)/'`mem.c +@am__fastdepCC_TRUE@ $(AM_V_at)$(am__mv) $(DEPDIR)/gmapl_avx512-mem.Tpo $(DEPDIR)/gmapl_avx512-mem.Po +@AMDEP_TRUE@@am__fastdepCC_FALSE@ $(AM_V_CC)source='mem.c' object='gmapl_avx512-mem.o' libtool=no @AMDEPBACKSLASH@ @AMDEP_TRUE@@am__fastdepCC_FALSE@ DEPDIR=$(DEPDIR) $(CCDEPMODE) $(depcomp) @AMDEPBACKSLASH@ -@am__fastdepCC_FALSE@ $(AM_V_CC@am__nodep@)$(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) $(gmapl_nosimd_CFLAGS) $(CFLAGS) -c -o gmapl_nosimd-mem.o `test -f 'mem.c' || echo '$(srcdir)/'`mem.c +@am__fastdepCC_FALSE@ $(AM_V_CC@am__nodep@)$(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) $(gmapl_avx512_CFLAGS) $(CFLAGS) -c -o gmapl_avx512-mem.o `test -f 'mem.c' || echo '$(srcdir)/'`mem.c -gmapl_nosimd-mem.obj: mem.c -@am__fastdepCC_TRUE@ $(AM_V_CC)$(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) $(gmapl_nosimd_CFLAGS) $(CFLAGS) -MT gmapl_nosimd-mem.obj -MD -MP -MF $(DEPDIR)/gmapl_nosimd-mem.Tpo -c -o gmapl_nosimd-mem.obj `if test -f 'mem.c'; then $(CYGPATH_W) 'mem.c'; else $(CYGPATH_W) '$(srcdir)/mem.c'; fi` -@am__fastdepCC_TRUE@ $(AM_V_at)$(am__mv) $(DEPDIR)/gmapl_nosimd-mem.Tpo $(DEPDIR)/gmapl_nosimd-mem.Po -@AMDEP_TRUE@@am__fastdepCC_FALSE@ $(AM_V_CC)source='mem.c' object='gmapl_nosimd-mem.obj' libtool=no @AMDEPBACKSLASH@ +gmapl_avx512-mem.obj: mem.c +@am__fastdepCC_TRUE@ $(AM_V_CC)$(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) $(gmapl_avx512_CFLAGS) $(CFLAGS) -MT gmapl_avx512-mem.obj -MD -MP -MF $(DEPDIR)/gmapl_avx512-mem.Tpo -c -o gmapl_avx512-mem.obj `if test -f 'mem.c'; then $(CYGPATH_W) 'mem.c'; else $(CYGPATH_W) '$(srcdir)/mem.c'; fi` +@am__fastdepCC_TRUE@ $(AM_V_at)$(am__mv) $(DEPDIR)/gmapl_avx512-mem.Tpo $(DEPDIR)/gmapl_avx512-mem.Po +@AMDEP_TRUE@@am__fastdepCC_FALSE@ $(AM_V_CC)source='mem.c' object='gmapl_avx512-mem.obj' libtool=no @AMDEPBACKSLASH@ @AMDEP_TRUE@@am__fastdepCC_FALSE@ DEPDIR=$(DEPDIR) $(CCDEPMODE) $(depcomp) @AMDEPBACKSLASH@ -@am__fastdepCC_FALSE@ $(AM_V_CC@am__nodep@)$(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) $(gmapl_nosimd_CFLAGS) $(CFLAGS) -c -o gmapl_nosimd-mem.obj `if test -f 'mem.c'; then $(CYGPATH_W) 'mem.c'; else $(CYGPATH_W) '$(srcdir)/mem.c'; fi` +@am__fastdepCC_FALSE@ $(AM_V_CC@am__nodep@)$(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) $(gmapl_avx512_CFLAGS) $(CFLAGS) -c -o gmapl_avx512-mem.obj `if test -f 'mem.c'; then $(CYGPATH_W) 'mem.c'; else $(CYGPATH_W) '$(srcdir)/mem.c'; fi` -gmapl_nosimd-intlist.o: intlist.c -@am__fastdepCC_TRUE@ $(AM_V_CC)$(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) $(gmapl_nosimd_CFLAGS) $(CFLAGS) -MT gmapl_nosimd-intlist.o -MD -MP -MF $(DEPDIR)/gmapl_nosimd-intlist.Tpo -c -o gmapl_nosimd-intlist.o `test -f 'intlist.c' || echo '$(srcdir)/'`intlist.c -@am__fastdepCC_TRUE@ $(AM_V_at)$(am__mv) $(DEPDIR)/gmapl_nosimd-intlist.Tpo $(DEPDIR)/gmapl_nosimd-intlist.Po -@AMDEP_TRUE@@am__fastdepCC_FALSE@ $(AM_V_CC)source='intlist.c' object='gmapl_nosimd-intlist.o' libtool=no @AMDEPBACKSLASH@ +gmapl_avx512-intlist.o: intlist.c +@am__fastdepCC_TRUE@ $(AM_V_CC)$(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) $(gmapl_avx512_CFLAGS) $(CFLAGS) -MT gmapl_avx512-intlist.o -MD -MP -MF $(DEPDIR)/gmapl_avx512-intlist.Tpo -c -o gmapl_avx512-intlist.o `test -f 'intlist.c' || echo '$(srcdir)/'`intlist.c +@am__fastdepCC_TRUE@ $(AM_V_at)$(am__mv) $(DEPDIR)/gmapl_avx512-intlist.Tpo $(DEPDIR)/gmapl_avx512-intlist.Po +@AMDEP_TRUE@@am__fastdepCC_FALSE@ $(AM_V_CC)source='intlist.c' object='gmapl_avx512-intlist.o' libtool=no @AMDEPBACKSLASH@ @AMDEP_TRUE@@am__fastdepCC_FALSE@ DEPDIR=$(DEPDIR) $(CCDEPMODE) $(depcomp) @AMDEPBACKSLASH@ -@am__fastdepCC_FALSE@ $(AM_V_CC@am__nodep@)$(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) $(gmapl_nosimd_CFLAGS) $(CFLAGS) -c -o gmapl_nosimd-intlist.o `test -f 'intlist.c' || echo '$(srcdir)/'`intlist.c +@am__fastdepCC_FALSE@ $(AM_V_CC@am__nodep@)$(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) $(gmapl_avx512_CFLAGS) $(CFLAGS) -c -o gmapl_avx512-intlist.o `test -f 'intlist.c' || echo '$(srcdir)/'`intlist.c -gmapl_nosimd-intlist.obj: intlist.c -@am__fastdepCC_TRUE@ $(AM_V_CC)$(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) $(gmapl_nosimd_CFLAGS) $(CFLAGS) -MT gmapl_nosimd-intlist.obj -MD -MP -MF $(DEPDIR)/gmapl_nosimd-intlist.Tpo -c -o gmapl_nosimd-intlist.obj `if test -f 'intlist.c'; then $(CYGPATH_W) 'intlist.c'; else $(CYGPATH_W) '$(srcdir)/intlist.c'; fi` -@am__fastdepCC_TRUE@ $(AM_V_at)$(am__mv) $(DEPDIR)/gmapl_nosimd-intlist.Tpo $(DEPDIR)/gmapl_nosimd-intlist.Po -@AMDEP_TRUE@@am__fastdepCC_FALSE@ $(AM_V_CC)source='intlist.c' object='gmapl_nosimd-intlist.obj' libtool=no @AMDEPBACKSLASH@ +gmapl_avx512-intlist.obj: intlist.c +@am__fastdepCC_TRUE@ $(AM_V_CC)$(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) $(gmapl_avx512_CFLAGS) $(CFLAGS) -MT gmapl_avx512-intlist.obj -MD -MP -MF $(DEPDIR)/gmapl_avx512-intlist.Tpo -c -o gmapl_avx512-intlist.obj `if test -f 'intlist.c'; then $(CYGPATH_W) 'intlist.c'; else $(CYGPATH_W) '$(srcdir)/intlist.c'; fi` +@am__fastdepCC_TRUE@ $(AM_V_at)$(am__mv) $(DEPDIR)/gmapl_avx512-intlist.Tpo $(DEPDIR)/gmapl_avx512-intlist.Po +@AMDEP_TRUE@@am__fastdepCC_FALSE@ $(AM_V_CC)source='intlist.c' object='gmapl_avx512-intlist.obj' libtool=no @AMDEPBACKSLASH@ @AMDEP_TRUE@@am__fastdepCC_FALSE@ DEPDIR=$(DEPDIR) $(CCDEPMODE) $(depcomp) @AMDEPBACKSLASH@ -@am__fastdepCC_FALSE@ $(AM_V_CC@am__nodep@)$(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) $(gmapl_nosimd_CFLAGS) $(CFLAGS) -c -o gmapl_nosimd-intlist.obj `if test -f 'intlist.c'; then $(CYGPATH_W) 'intlist.c'; else $(CYGPATH_W) '$(srcdir)/intlist.c'; fi` +@am__fastdepCC_FALSE@ $(AM_V_CC@am__nodep@)$(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) $(gmapl_avx512_CFLAGS) $(CFLAGS) -c -o gmapl_avx512-intlist.obj `if test -f 'intlist.c'; then $(CYGPATH_W) 'intlist.c'; else $(CYGPATH_W) '$(srcdir)/intlist.c'; fi` -gmapl_nosimd-list.o: list.c -@am__fastdepCC_TRUE@ $(AM_V_CC)$(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) $(gmapl_nosimd_CFLAGS) $(CFLAGS) -MT gmapl_nosimd-list.o -MD -MP -MF $(DEPDIR)/gmapl_nosimd-list.Tpo -c -o gmapl_nosimd-list.o `test -f 'list.c' || echo '$(srcdir)/'`list.c -@am__fastdepCC_TRUE@ $(AM_V_at)$(am__mv) $(DEPDIR)/gmapl_nosimd-list.Tpo $(DEPDIR)/gmapl_nosimd-list.Po -@AMDEP_TRUE@@am__fastdepCC_FALSE@ $(AM_V_CC)source='list.c' object='gmapl_nosimd-list.o' libtool=no @AMDEPBACKSLASH@ +gmapl_avx512-list.o: list.c +@am__fastdepCC_TRUE@ $(AM_V_CC)$(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) $(gmapl_avx512_CFLAGS) $(CFLAGS) -MT gmapl_avx512-list.o -MD -MP -MF $(DEPDIR)/gmapl_avx512-list.Tpo -c -o gmapl_avx512-list.o `test -f 'list.c' || echo '$(srcdir)/'`list.c +@am__fastdepCC_TRUE@ $(AM_V_at)$(am__mv) $(DEPDIR)/gmapl_avx512-list.Tpo $(DEPDIR)/gmapl_avx512-list.Po +@AMDEP_TRUE@@am__fastdepCC_FALSE@ $(AM_V_CC)source='list.c' object='gmapl_avx512-list.o' libtool=no @AMDEPBACKSLASH@ @AMDEP_TRUE@@am__fastdepCC_FALSE@ DEPDIR=$(DEPDIR) $(CCDEPMODE) $(depcomp) @AMDEPBACKSLASH@ -@am__fastdepCC_FALSE@ $(AM_V_CC@am__nodep@)$(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) $(gmapl_nosimd_CFLAGS) $(CFLAGS) -c -o gmapl_nosimd-list.o `test -f 'list.c' || echo '$(srcdir)/'`list.c +@am__fastdepCC_FALSE@ $(AM_V_CC@am__nodep@)$(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) $(gmapl_avx512_CFLAGS) $(CFLAGS) -c -o gmapl_avx512-list.o `test -f 'list.c' || echo '$(srcdir)/'`list.c -gmapl_nosimd-list.obj: list.c -@am__fastdepCC_TRUE@ $(AM_V_CC)$(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) $(gmapl_nosimd_CFLAGS) $(CFLAGS) -MT gmapl_nosimd-list.obj -MD -MP -MF $(DEPDIR)/gmapl_nosimd-list.Tpo -c -o gmapl_nosimd-list.obj `if test -f 'list.c'; then $(CYGPATH_W) 'list.c'; else $(CYGPATH_W) '$(srcdir)/list.c'; fi` -@am__fastdepCC_TRUE@ $(AM_V_at)$(am__mv) $(DEPDIR)/gmapl_nosimd-list.Tpo $(DEPDIR)/gmapl_nosimd-list.Po -@AMDEP_TRUE@@am__fastdepCC_FALSE@ $(AM_V_CC)source='list.c' object='gmapl_nosimd-list.obj' libtool=no @AMDEPBACKSLASH@ +gmapl_avx512-list.obj: list.c +@am__fastdepCC_TRUE@ $(AM_V_CC)$(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) $(gmapl_avx512_CFLAGS) $(CFLAGS) -MT gmapl_avx512-list.obj -MD -MP -MF $(DEPDIR)/gmapl_avx512-list.Tpo -c -o gmapl_avx512-list.obj `if test -f 'list.c'; then $(CYGPATH_W) 'list.c'; else $(CYGPATH_W) '$(srcdir)/list.c'; fi` +@am__fastdepCC_TRUE@ $(AM_V_at)$(am__mv) $(DEPDIR)/gmapl_avx512-list.Tpo $(DEPDIR)/gmapl_avx512-list.Po +@AMDEP_TRUE@@am__fastdepCC_FALSE@ $(AM_V_CC)source='list.c' object='gmapl_avx512-list.obj' libtool=no @AMDEPBACKSLASH@ @AMDEP_TRUE@@am__fastdepCC_FALSE@ DEPDIR=$(DEPDIR) $(CCDEPMODE) $(depcomp) @AMDEPBACKSLASH@ -@am__fastdepCC_FALSE@ $(AM_V_CC@am__nodep@)$(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) $(gmapl_nosimd_CFLAGS) $(CFLAGS) -c -o gmapl_nosimd-list.obj `if test -f 'list.c'; then $(CYGPATH_W) 'list.c'; else $(CYGPATH_W) '$(srcdir)/list.c'; fi` +@am__fastdepCC_FALSE@ $(AM_V_CC@am__nodep@)$(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) $(gmapl_avx512_CFLAGS) $(CFLAGS) -c -o gmapl_avx512-list.obj `if test -f 'list.c'; then $(CYGPATH_W) 'list.c'; else $(CYGPATH_W) '$(srcdir)/list.c'; fi` -gmapl_nosimd-littleendian.o: littleendian.c -@am__fastdepCC_TRUE@ $(AM_V_CC)$(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) $(gmapl_nosimd_CFLAGS) $(CFLAGS) -MT gmapl_nosimd-littleendian.o -MD -MP -MF $(DEPDIR)/gmapl_nosimd-littleendian.Tpo -c -o gmapl_nosimd-littleendian.o `test -f 'littleendian.c' || echo '$(srcdir)/'`littleendian.c -@am__fastdepCC_TRUE@ $(AM_V_at)$(am__mv) $(DEPDIR)/gmapl_nosimd-littleendian.Tpo $(DEPDIR)/gmapl_nosimd-littleendian.Po -@AMDEP_TRUE@@am__fastdepCC_FALSE@ $(AM_V_CC)source='littleendian.c' object='gmapl_nosimd-littleendian.o' libtool=no @AMDEPBACKSLASH@ +gmapl_avx512-littleendian.o: littleendian.c +@am__fastdepCC_TRUE@ $(AM_V_CC)$(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) $(gmapl_avx512_CFLAGS) $(CFLAGS) -MT gmapl_avx512-littleendian.o -MD -MP -MF $(DEPDIR)/gmapl_avx512-littleendian.Tpo -c -o gmapl_avx512-littleendian.o `test -f 'littleendian.c' || echo '$(srcdir)/'`littleendian.c +@am__fastdepCC_TRUE@ $(AM_V_at)$(am__mv) $(DEPDIR)/gmapl_avx512-littleendian.Tpo $(DEPDIR)/gmapl_avx512-littleendian.Po +@AMDEP_TRUE@@am__fastdepCC_FALSE@ $(AM_V_CC)source='littleendian.c' object='gmapl_avx512-littleendian.o' libtool=no @AMDEPBACKSLASH@ @AMDEP_TRUE@@am__fastdepCC_FALSE@ DEPDIR=$(DEPDIR) $(CCDEPMODE) $(depcomp) @AMDEPBACKSLASH@ -@am__fastdepCC_FALSE@ $(AM_V_CC@am__nodep@)$(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) $(gmapl_nosimd_CFLAGS) $(CFLAGS) -c -o gmapl_nosimd-littleendian.o `test -f 'littleendian.c' || echo '$(srcdir)/'`littleendian.c +@am__fastdepCC_FALSE@ $(AM_V_CC@am__nodep@)$(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) $(gmapl_avx512_CFLAGS) $(CFLAGS) -c -o gmapl_avx512-littleendian.o `test -f 'littleendian.c' || echo '$(srcdir)/'`littleendian.c -gmapl_nosimd-littleendian.obj: littleendian.c -@am__fastdepCC_TRUE@ $(AM_V_CC)$(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) $(gmapl_nosimd_CFLAGS) $(CFLAGS) -MT gmapl_nosimd-littleendian.obj -MD -MP -MF $(DEPDIR)/gmapl_nosimd-littleendian.Tpo -c -o gmapl_nosimd-littleendian.obj `if test -f 'littleendian.c'; then $(CYGPATH_W) 'littleendian.c'; else $(CYGPATH_W) '$(srcdir)/littleendian.c'; fi` -@am__fastdepCC_TRUE@ $(AM_V_at)$(am__mv) $(DEPDIR)/gmapl_nosimd-littleendian.Tpo $(DEPDIR)/gmapl_nosimd-littleendian.Po -@AMDEP_TRUE@@am__fastdepCC_FALSE@ $(AM_V_CC)source='littleendian.c' object='gmapl_nosimd-littleendian.obj' libtool=no @AMDEPBACKSLASH@ +gmapl_avx512-littleendian.obj: littleendian.c +@am__fastdepCC_TRUE@ $(AM_V_CC)$(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) $(gmapl_avx512_CFLAGS) $(CFLAGS) -MT gmapl_avx512-littleendian.obj -MD -MP -MF $(DEPDIR)/gmapl_avx512-littleendian.Tpo -c -o gmapl_avx512-littleendian.obj `if test -f 'littleendian.c'; then $(CYGPATH_W) 'littleendian.c'; else $(CYGPATH_W) '$(srcdir)/littleendian.c'; fi` +@am__fastdepCC_TRUE@ $(AM_V_at)$(am__mv) $(DEPDIR)/gmapl_avx512-littleendian.Tpo $(DEPDIR)/gmapl_avx512-littleendian.Po +@AMDEP_TRUE@@am__fastdepCC_FALSE@ $(AM_V_CC)source='littleendian.c' object='gmapl_avx512-littleendian.obj' libtool=no @AMDEPBACKSLASH@ @AMDEP_TRUE@@am__fastdepCC_FALSE@ DEPDIR=$(DEPDIR) $(CCDEPMODE) $(depcomp) @AMDEPBACKSLASH@ -@am__fastdepCC_FALSE@ $(AM_V_CC@am__nodep@)$(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) $(gmapl_nosimd_CFLAGS) $(CFLAGS) -c -o gmapl_nosimd-littleendian.obj `if test -f 'littleendian.c'; then $(CYGPATH_W) 'littleendian.c'; else $(CYGPATH_W) '$(srcdir)/littleendian.c'; fi` +@am__fastdepCC_FALSE@ $(AM_V_CC@am__nodep@)$(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) $(gmapl_avx512_CFLAGS) $(CFLAGS) -c -o gmapl_avx512-littleendian.obj `if test -f 'littleendian.c'; then $(CYGPATH_W) 'littleendian.c'; else $(CYGPATH_W) '$(srcdir)/littleendian.c'; fi` -gmapl_nosimd-bigendian.o: bigendian.c -@am__fastdepCC_TRUE@ $(AM_V_CC)$(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) $(gmapl_nosimd_CFLAGS) $(CFLAGS) -MT gmapl_nosimd-bigendian.o -MD -MP -MF $(DEPDIR)/gmapl_nosimd-bigendian.Tpo -c -o gmapl_nosimd-bigendian.o `test -f 'bigendian.c' || echo '$(srcdir)/'`bigendian.c -@am__fastdepCC_TRUE@ $(AM_V_at)$(am__mv) $(DEPDIR)/gmapl_nosimd-bigendian.Tpo $(DEPDIR)/gmapl_nosimd-bigendian.Po -@AMDEP_TRUE@@am__fastdepCC_FALSE@ $(AM_V_CC)source='bigendian.c' object='gmapl_nosimd-bigendian.o' libtool=no @AMDEPBACKSLASH@ +gmapl_avx512-bigendian.o: bigendian.c +@am__fastdepCC_TRUE@ $(AM_V_CC)$(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) $(gmapl_avx512_CFLAGS) $(CFLAGS) -MT gmapl_avx512-bigendian.o -MD -MP -MF $(DEPDIR)/gmapl_avx512-bigendian.Tpo -c -o gmapl_avx512-bigendian.o `test -f 'bigendian.c' || echo '$(srcdir)/'`bigendian.c +@am__fastdepCC_TRUE@ $(AM_V_at)$(am__mv) $(DEPDIR)/gmapl_avx512-bigendian.Tpo $(DEPDIR)/gmapl_avx512-bigendian.Po +@AMDEP_TRUE@@am__fastdepCC_FALSE@ $(AM_V_CC)source='bigendian.c' object='gmapl_avx512-bigendian.o' libtool=no @AMDEPBACKSLASH@ @AMDEP_TRUE@@am__fastdepCC_FALSE@ DEPDIR=$(DEPDIR) $(CCDEPMODE) $(depcomp) @AMDEPBACKSLASH@ -@am__fastdepCC_FALSE@ $(AM_V_CC@am__nodep@)$(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) $(gmapl_nosimd_CFLAGS) $(CFLAGS) -c -o gmapl_nosimd-bigendian.o `test -f 'bigendian.c' || echo '$(srcdir)/'`bigendian.c +@am__fastdepCC_FALSE@ $(AM_V_CC@am__nodep@)$(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) $(gmapl_avx512_CFLAGS) $(CFLAGS) -c -o gmapl_avx512-bigendian.o `test -f 'bigendian.c' || echo '$(srcdir)/'`bigendian.c -gmapl_nosimd-bigendian.obj: bigendian.c -@am__fastdepCC_TRUE@ $(AM_V_CC)$(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) $(gmapl_nosimd_CFLAGS) $(CFLAGS) -MT gmapl_nosimd-bigendian.obj -MD -MP -MF $(DEPDIR)/gmapl_nosimd-bigendian.Tpo -c -o gmapl_nosimd-bigendian.obj `if test -f 'bigendian.c'; then $(CYGPATH_W) 'bigendian.c'; else $(CYGPATH_W) '$(srcdir)/bigendian.c'; fi` -@am__fastdepCC_TRUE@ $(AM_V_at)$(am__mv) $(DEPDIR)/gmapl_nosimd-bigendian.Tpo $(DEPDIR)/gmapl_nosimd-bigendian.Po -@AMDEP_TRUE@@am__fastdepCC_FALSE@ $(AM_V_CC)source='bigendian.c' object='gmapl_nosimd-bigendian.obj' libtool=no @AMDEPBACKSLASH@ +gmapl_avx512-bigendian.obj: bigendian.c +@am__fastdepCC_TRUE@ $(AM_V_CC)$(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) $(gmapl_avx512_CFLAGS) $(CFLAGS) -MT gmapl_avx512-bigendian.obj -MD -MP -MF $(DEPDIR)/gmapl_avx512-bigendian.Tpo -c -o gmapl_avx512-bigendian.obj `if test -f 'bigendian.c'; then $(CYGPATH_W) 'bigendian.c'; else $(CYGPATH_W) '$(srcdir)/bigendian.c'; fi` +@am__fastdepCC_TRUE@ $(AM_V_at)$(am__mv) $(DEPDIR)/gmapl_avx512-bigendian.Tpo $(DEPDIR)/gmapl_avx512-bigendian.Po +@AMDEP_TRUE@@am__fastdepCC_FALSE@ $(AM_V_CC)source='bigendian.c' object='gmapl_avx512-bigendian.obj' libtool=no @AMDEPBACKSLASH@ @AMDEP_TRUE@@am__fastdepCC_FALSE@ DEPDIR=$(DEPDIR) $(CCDEPMODE) $(depcomp) @AMDEPBACKSLASH@ -@am__fastdepCC_FALSE@ $(AM_V_CC@am__nodep@)$(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) $(gmapl_nosimd_CFLAGS) $(CFLAGS) -c -o gmapl_nosimd-bigendian.obj `if test -f 'bigendian.c'; then $(CYGPATH_W) 'bigendian.c'; else $(CYGPATH_W) '$(srcdir)/bigendian.c'; fi` +@am__fastdepCC_FALSE@ $(AM_V_CC@am__nodep@)$(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) $(gmapl_avx512_CFLAGS) $(CFLAGS) -c -o gmapl_avx512-bigendian.obj `if test -f 'bigendian.c'; then $(CYGPATH_W) 'bigendian.c'; else $(CYGPATH_W) '$(srcdir)/bigendian.c'; fi` -gmapl_nosimd-univinterval.o: univinterval.c -@am__fastdepCC_TRUE@ $(AM_V_CC)$(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) $(gmapl_nosimd_CFLAGS) $(CFLAGS) -MT gmapl_nosimd-univinterval.o -MD -MP -MF $(DEPDIR)/gmapl_nosimd-univinterval.Tpo -c -o gmapl_nosimd-univinterval.o `test -f 'univinterval.c' || echo '$(srcdir)/'`univinterval.c -@am__fastdepCC_TRUE@ $(AM_V_at)$(am__mv) $(DEPDIR)/gmapl_nosimd-univinterval.Tpo $(DEPDIR)/gmapl_nosimd-univinterval.Po -@AMDEP_TRUE@@am__fastdepCC_FALSE@ $(AM_V_CC)source='univinterval.c' object='gmapl_nosimd-univinterval.o' libtool=no @AMDEPBACKSLASH@ +gmapl_avx512-univinterval.o: univinterval.c +@am__fastdepCC_TRUE@ $(AM_V_CC)$(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) $(gmapl_avx512_CFLAGS) $(CFLAGS) -MT gmapl_avx512-univinterval.o -MD -MP -MF $(DEPDIR)/gmapl_avx512-univinterval.Tpo -c -o gmapl_avx512-univinterval.o `test -f 'univinterval.c' || echo '$(srcdir)/'`univinterval.c +@am__fastdepCC_TRUE@ $(AM_V_at)$(am__mv) $(DEPDIR)/gmapl_avx512-univinterval.Tpo $(DEPDIR)/gmapl_avx512-univinterval.Po +@AMDEP_TRUE@@am__fastdepCC_FALSE@ $(AM_V_CC)source='univinterval.c' object='gmapl_avx512-univinterval.o' libtool=no @AMDEPBACKSLASH@ @AMDEP_TRUE@@am__fastdepCC_FALSE@ DEPDIR=$(DEPDIR) $(CCDEPMODE) $(depcomp) @AMDEPBACKSLASH@ -@am__fastdepCC_FALSE@ $(AM_V_CC@am__nodep@)$(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) $(gmapl_nosimd_CFLAGS) $(CFLAGS) -c -o gmapl_nosimd-univinterval.o `test -f 'univinterval.c' || echo '$(srcdir)/'`univinterval.c +@am__fastdepCC_FALSE@ $(AM_V_CC@am__nodep@)$(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) $(gmapl_avx512_CFLAGS) $(CFLAGS) -c -o gmapl_avx512-univinterval.o `test -f 'univinterval.c' || echo '$(srcdir)/'`univinterval.c -gmapl_nosimd-univinterval.obj: univinterval.c -@am__fastdepCC_TRUE@ $(AM_V_CC)$(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) $(gmapl_nosimd_CFLAGS) $(CFLAGS) -MT gmapl_nosimd-univinterval.obj -MD -MP -MF $(DEPDIR)/gmapl_nosimd-univinterval.Tpo -c -o gmapl_nosimd-univinterval.obj `if test -f 'univinterval.c'; then $(CYGPATH_W) 'univinterval.c'; else $(CYGPATH_W) '$(srcdir)/univinterval.c'; fi` -@am__fastdepCC_TRUE@ $(AM_V_at)$(am__mv) $(DEPDIR)/gmapl_nosimd-univinterval.Tpo $(DEPDIR)/gmapl_nosimd-univinterval.Po -@AMDEP_TRUE@@am__fastdepCC_FALSE@ $(AM_V_CC)source='univinterval.c' object='gmapl_nosimd-univinterval.obj' libtool=no @AMDEPBACKSLASH@ +gmapl_avx512-univinterval.obj: univinterval.c +@am__fastdepCC_TRUE@ $(AM_V_CC)$(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) $(gmapl_avx512_CFLAGS) $(CFLAGS) -MT gmapl_avx512-univinterval.obj -MD -MP -MF $(DEPDIR)/gmapl_avx512-univinterval.Tpo -c -o gmapl_avx512-univinterval.obj `if test -f 'univinterval.c'; then $(CYGPATH_W) 'univinterval.c'; else $(CYGPATH_W) '$(srcdir)/univinterval.c'; fi` +@am__fastdepCC_TRUE@ $(AM_V_at)$(am__mv) $(DEPDIR)/gmapl_avx512-univinterval.Tpo $(DEPDIR)/gmapl_avx512-univinterval.Po +@AMDEP_TRUE@@am__fastdepCC_FALSE@ $(AM_V_CC)source='univinterval.c' object='gmapl_avx512-univinterval.obj' libtool=no @AMDEPBACKSLASH@ @AMDEP_TRUE@@am__fastdepCC_FALSE@ DEPDIR=$(DEPDIR) $(CCDEPMODE) $(depcomp) @AMDEPBACKSLASH@ -@am__fastdepCC_FALSE@ $(AM_V_CC@am__nodep@)$(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) $(gmapl_nosimd_CFLAGS) $(CFLAGS) -c -o gmapl_nosimd-univinterval.obj `if test -f 'univinterval.c'; then $(CYGPATH_W) 'univinterval.c'; else $(CYGPATH_W) '$(srcdir)/univinterval.c'; fi` +@am__fastdepCC_FALSE@ $(AM_V_CC@am__nodep@)$(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) $(gmapl_avx512_CFLAGS) $(CFLAGS) -c -o gmapl_avx512-univinterval.obj `if test -f 'univinterval.c'; then $(CYGPATH_W) 'univinterval.c'; else $(CYGPATH_W) '$(srcdir)/univinterval.c'; fi` -gmapl_nosimd-interval.o: interval.c -@am__fastdepCC_TRUE@ $(AM_V_CC)$(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) $(gmapl_nosimd_CFLAGS) $(CFLAGS) -MT gmapl_nosimd-interval.o -MD -MP -MF $(DEPDIR)/gmapl_nosimd-interval.Tpo -c -o gmapl_nosimd-interval.o `test -f 'interval.c' || echo '$(srcdir)/'`interval.c -@am__fastdepCC_TRUE@ $(AM_V_at)$(am__mv) $(DEPDIR)/gmapl_nosimd-interval.Tpo $(DEPDIR)/gmapl_nosimd-interval.Po -@AMDEP_TRUE@@am__fastdepCC_FALSE@ $(AM_V_CC)source='interval.c' object='gmapl_nosimd-interval.o' libtool=no @AMDEPBACKSLASH@ +gmapl_avx512-interval.o: interval.c +@am__fastdepCC_TRUE@ $(AM_V_CC)$(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) $(gmapl_avx512_CFLAGS) $(CFLAGS) -MT gmapl_avx512-interval.o -MD -MP -MF $(DEPDIR)/gmapl_avx512-interval.Tpo -c -o gmapl_avx512-interval.o `test -f 'interval.c' || echo '$(srcdir)/'`interval.c +@am__fastdepCC_TRUE@ $(AM_V_at)$(am__mv) $(DEPDIR)/gmapl_avx512-interval.Tpo $(DEPDIR)/gmapl_avx512-interval.Po +@AMDEP_TRUE@@am__fastdepCC_FALSE@ $(AM_V_CC)source='interval.c' object='gmapl_avx512-interval.o' libtool=no @AMDEPBACKSLASH@ @AMDEP_TRUE@@am__fastdepCC_FALSE@ DEPDIR=$(DEPDIR) $(CCDEPMODE) $(depcomp) @AMDEPBACKSLASH@ -@am__fastdepCC_FALSE@ $(AM_V_CC@am__nodep@)$(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) $(gmapl_nosimd_CFLAGS) $(CFLAGS) -c -o gmapl_nosimd-interval.o `test -f 'interval.c' || echo '$(srcdir)/'`interval.c +@am__fastdepCC_FALSE@ $(AM_V_CC@am__nodep@)$(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) $(gmapl_avx512_CFLAGS) $(CFLAGS) -c -o gmapl_avx512-interval.o `test -f 'interval.c' || echo '$(srcdir)/'`interval.c -gmapl_nosimd-interval.obj: interval.c -@am__fastdepCC_TRUE@ $(AM_V_CC)$(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) $(gmapl_nosimd_CFLAGS) $(CFLAGS) -MT gmapl_nosimd-interval.obj -MD -MP -MF $(DEPDIR)/gmapl_nosimd-interval.Tpo -c -o gmapl_nosimd-interval.obj `if test -f 'interval.c'; then $(CYGPATH_W) 'interval.c'; else $(CYGPATH_W) '$(srcdir)/interval.c'; fi` -@am__fastdepCC_TRUE@ $(AM_V_at)$(am__mv) $(DEPDIR)/gmapl_nosimd-interval.Tpo $(DEPDIR)/gmapl_nosimd-interval.Po -@AMDEP_TRUE@@am__fastdepCC_FALSE@ $(AM_V_CC)source='interval.c' object='gmapl_nosimd-interval.obj' libtool=no @AMDEPBACKSLASH@ +gmapl_avx512-interval.obj: interval.c +@am__fastdepCC_TRUE@ $(AM_V_CC)$(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) $(gmapl_avx512_CFLAGS) $(CFLAGS) -MT gmapl_avx512-interval.obj -MD -MP -MF $(DEPDIR)/gmapl_avx512-interval.Tpo -c -o gmapl_avx512-interval.obj `if test -f 'interval.c'; then $(CYGPATH_W) 'interval.c'; else $(CYGPATH_W) '$(srcdir)/interval.c'; fi` +@am__fastdepCC_TRUE@ $(AM_V_at)$(am__mv) $(DEPDIR)/gmapl_avx512-interval.Tpo $(DEPDIR)/gmapl_avx512-interval.Po +@AMDEP_TRUE@@am__fastdepCC_FALSE@ $(AM_V_CC)source='interval.c' object='gmapl_avx512-interval.obj' libtool=no @AMDEPBACKSLASH@ @AMDEP_TRUE@@am__fastdepCC_FALSE@ DEPDIR=$(DEPDIR) $(CCDEPMODE) $(depcomp) @AMDEPBACKSLASH@ -@am__fastdepCC_FALSE@ $(AM_V_CC@am__nodep@)$(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) $(gmapl_nosimd_CFLAGS) $(CFLAGS) -c -o gmapl_nosimd-interval.obj `if test -f 'interval.c'; then $(CYGPATH_W) 'interval.c'; else $(CYGPATH_W) '$(srcdir)/interval.c'; fi` +@am__fastdepCC_FALSE@ $(AM_V_CC@am__nodep@)$(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) $(gmapl_avx512_CFLAGS) $(CFLAGS) -c -o gmapl_avx512-interval.obj `if test -f 'interval.c'; then $(CYGPATH_W) 'interval.c'; else $(CYGPATH_W) '$(srcdir)/interval.c'; fi` -gmapl_nosimd-uintlist.o: uintlist.c -@am__fastdepCC_TRUE@ $(AM_V_CC)$(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) $(gmapl_nosimd_CFLAGS) $(CFLAGS) -MT gmapl_nosimd-uintlist.o -MD -MP -MF $(DEPDIR)/gmapl_nosimd-uintlist.Tpo -c -o gmapl_nosimd-uintlist.o `test -f 'uintlist.c' || echo '$(srcdir)/'`uintlist.c -@am__fastdepCC_TRUE@ $(AM_V_at)$(am__mv) $(DEPDIR)/gmapl_nosimd-uintlist.Tpo $(DEPDIR)/gmapl_nosimd-uintlist.Po -@AMDEP_TRUE@@am__fastdepCC_FALSE@ $(AM_V_CC)source='uintlist.c' object='gmapl_nosimd-uintlist.o' libtool=no @AMDEPBACKSLASH@ +gmapl_avx512-uintlist.o: uintlist.c +@am__fastdepCC_TRUE@ $(AM_V_CC)$(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) $(gmapl_avx512_CFLAGS) $(CFLAGS) -MT gmapl_avx512-uintlist.o -MD -MP -MF $(DEPDIR)/gmapl_avx512-uintlist.Tpo -c -o gmapl_avx512-uintlist.o `test -f 'uintlist.c' || echo '$(srcdir)/'`uintlist.c +@am__fastdepCC_TRUE@ $(AM_V_at)$(am__mv) $(DEPDIR)/gmapl_avx512-uintlist.Tpo $(DEPDIR)/gmapl_avx512-uintlist.Po +@AMDEP_TRUE@@am__fastdepCC_FALSE@ $(AM_V_CC)source='uintlist.c' object='gmapl_avx512-uintlist.o' libtool=no @AMDEPBACKSLASH@ @AMDEP_TRUE@@am__fastdepCC_FALSE@ DEPDIR=$(DEPDIR) $(CCDEPMODE) $(depcomp) @AMDEPBACKSLASH@ -@am__fastdepCC_FALSE@ $(AM_V_CC@am__nodep@)$(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) $(gmapl_nosimd_CFLAGS) $(CFLAGS) -c -o gmapl_nosimd-uintlist.o `test -f 'uintlist.c' || echo '$(srcdir)/'`uintlist.c +@am__fastdepCC_FALSE@ $(AM_V_CC@am__nodep@)$(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) $(gmapl_avx512_CFLAGS) $(CFLAGS) -c -o gmapl_avx512-uintlist.o `test -f 'uintlist.c' || echo '$(srcdir)/'`uintlist.c -gmapl_nosimd-uintlist.obj: uintlist.c -@am__fastdepCC_TRUE@ $(AM_V_CC)$(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) $(gmapl_nosimd_CFLAGS) $(CFLAGS) -MT gmapl_nosimd-uintlist.obj -MD -MP -MF $(DEPDIR)/gmapl_nosimd-uintlist.Tpo -c -o gmapl_nosimd-uintlist.obj `if test -f 'uintlist.c'; then $(CYGPATH_W) 'uintlist.c'; else $(CYGPATH_W) '$(srcdir)/uintlist.c'; fi` -@am__fastdepCC_TRUE@ $(AM_V_at)$(am__mv) $(DEPDIR)/gmapl_nosimd-uintlist.Tpo $(DEPDIR)/gmapl_nosimd-uintlist.Po -@AMDEP_TRUE@@am__fastdepCC_FALSE@ $(AM_V_CC)source='uintlist.c' object='gmapl_nosimd-uintlist.obj' libtool=no @AMDEPBACKSLASH@ +gmapl_avx512-uintlist.obj: uintlist.c +@am__fastdepCC_TRUE@ $(AM_V_CC)$(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) $(gmapl_avx512_CFLAGS) $(CFLAGS) -MT gmapl_avx512-uintlist.obj -MD -MP -MF $(DEPDIR)/gmapl_avx512-uintlist.Tpo -c -o gmapl_avx512-uintlist.obj `if test -f 'uintlist.c'; then $(CYGPATH_W) 'uintlist.c'; else $(CYGPATH_W) '$(srcdir)/uintlist.c'; fi` +@am__fastdepCC_TRUE@ $(AM_V_at)$(am__mv) $(DEPDIR)/gmapl_avx512-uintlist.Tpo $(DEPDIR)/gmapl_avx512-uintlist.Po +@AMDEP_TRUE@@am__fastdepCC_FALSE@ $(AM_V_CC)source='uintlist.c' object='gmapl_avx512-uintlist.obj' libtool=no @AMDEPBACKSLASH@ @AMDEP_TRUE@@am__fastdepCC_FALSE@ DEPDIR=$(DEPDIR) $(CCDEPMODE) $(depcomp) @AMDEPBACKSLASH@ -@am__fastdepCC_FALSE@ $(AM_V_CC@am__nodep@)$(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) $(gmapl_nosimd_CFLAGS) $(CFLAGS) -c -o gmapl_nosimd-uintlist.obj `if test -f 'uintlist.c'; then $(CYGPATH_W) 'uintlist.c'; else $(CYGPATH_W) '$(srcdir)/uintlist.c'; fi` +@am__fastdepCC_FALSE@ $(AM_V_CC@am__nodep@)$(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) $(gmapl_avx512_CFLAGS) $(CFLAGS) -c -o gmapl_avx512-uintlist.obj `if test -f 'uintlist.c'; then $(CYGPATH_W) 'uintlist.c'; else $(CYGPATH_W) '$(srcdir)/uintlist.c'; fi` -gmapl_nosimd-uint8list.o: uint8list.c -@am__fastdepCC_TRUE@ $(AM_V_CC)$(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) $(gmapl_nosimd_CFLAGS) $(CFLAGS) -MT gmapl_nosimd-uint8list.o -MD -MP -MF $(DEPDIR)/gmapl_nosimd-uint8list.Tpo -c -o gmapl_nosimd-uint8list.o `test -f 'uint8list.c' || echo '$(srcdir)/'`uint8list.c -@am__fastdepCC_TRUE@ $(AM_V_at)$(am__mv) $(DEPDIR)/gmapl_nosimd-uint8list.Tpo $(DEPDIR)/gmapl_nosimd-uint8list.Po -@AMDEP_TRUE@@am__fastdepCC_FALSE@ $(AM_V_CC)source='uint8list.c' object='gmapl_nosimd-uint8list.o' libtool=no @AMDEPBACKSLASH@ +gmapl_avx512-uint8list.o: uint8list.c +@am__fastdepCC_TRUE@ $(AM_V_CC)$(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) $(gmapl_avx512_CFLAGS) $(CFLAGS) -MT gmapl_avx512-uint8list.o -MD -MP -MF $(DEPDIR)/gmapl_avx512-uint8list.Tpo -c -o gmapl_avx512-uint8list.o `test -f 'uint8list.c' || echo '$(srcdir)/'`uint8list.c +@am__fastdepCC_TRUE@ $(AM_V_at)$(am__mv) $(DEPDIR)/gmapl_avx512-uint8list.Tpo $(DEPDIR)/gmapl_avx512-uint8list.Po +@AMDEP_TRUE@@am__fastdepCC_FALSE@ $(AM_V_CC)source='uint8list.c' object='gmapl_avx512-uint8list.o' libtool=no @AMDEPBACKSLASH@ @AMDEP_TRUE@@am__fastdepCC_FALSE@ DEPDIR=$(DEPDIR) $(CCDEPMODE) $(depcomp) @AMDEPBACKSLASH@ -@am__fastdepCC_FALSE@ $(AM_V_CC@am__nodep@)$(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) $(gmapl_nosimd_CFLAGS) $(CFLAGS) -c -o gmapl_nosimd-uint8list.o `test -f 'uint8list.c' || echo '$(srcdir)/'`uint8list.c +@am__fastdepCC_FALSE@ $(AM_V_CC@am__nodep@)$(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) $(gmapl_avx512_CFLAGS) $(CFLAGS) -c -o gmapl_avx512-uint8list.o `test -f 'uint8list.c' || echo '$(srcdir)/'`uint8list.c -gmapl_nosimd-uint8list.obj: uint8list.c -@am__fastdepCC_TRUE@ $(AM_V_CC)$(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) $(gmapl_nosimd_CFLAGS) $(CFLAGS) -MT gmapl_nosimd-uint8list.obj -MD -MP -MF $(DEPDIR)/gmapl_nosimd-uint8list.Tpo -c -o gmapl_nosimd-uint8list.obj `if test -f 'uint8list.c'; then $(CYGPATH_W) 'uint8list.c'; else $(CYGPATH_W) '$(srcdir)/uint8list.c'; fi` -@am__fastdepCC_TRUE@ $(AM_V_at)$(am__mv) $(DEPDIR)/gmapl_nosimd-uint8list.Tpo $(DEPDIR)/gmapl_nosimd-uint8list.Po -@AMDEP_TRUE@@am__fastdepCC_FALSE@ $(AM_V_CC)source='uint8list.c' object='gmapl_nosimd-uint8list.obj' libtool=no @AMDEPBACKSLASH@ +gmapl_avx512-uint8list.obj: uint8list.c +@am__fastdepCC_TRUE@ $(AM_V_CC)$(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) $(gmapl_avx512_CFLAGS) $(CFLAGS) -MT gmapl_avx512-uint8list.obj -MD -MP -MF $(DEPDIR)/gmapl_avx512-uint8list.Tpo -c -o gmapl_avx512-uint8list.obj `if test -f 'uint8list.c'; then $(CYGPATH_W) 'uint8list.c'; else $(CYGPATH_W) '$(srcdir)/uint8list.c'; fi` +@am__fastdepCC_TRUE@ $(AM_V_at)$(am__mv) $(DEPDIR)/gmapl_avx512-uint8list.Tpo $(DEPDIR)/gmapl_avx512-uint8list.Po +@AMDEP_TRUE@@am__fastdepCC_FALSE@ $(AM_V_CC)source='uint8list.c' object='gmapl_avx512-uint8list.obj' libtool=no @AMDEPBACKSLASH@ @AMDEP_TRUE@@am__fastdepCC_FALSE@ DEPDIR=$(DEPDIR) $(CCDEPMODE) $(depcomp) @AMDEPBACKSLASH@ -@am__fastdepCC_FALSE@ $(AM_V_CC@am__nodep@)$(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) $(gmapl_nosimd_CFLAGS) $(CFLAGS) -c -o gmapl_nosimd-uint8list.obj `if test -f 'uint8list.c'; then $(CYGPATH_W) 'uint8list.c'; else $(CYGPATH_W) '$(srcdir)/uint8list.c'; fi` +@am__fastdepCC_FALSE@ $(AM_V_CC@am__nodep@)$(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) $(gmapl_avx512_CFLAGS) $(CFLAGS) -c -o gmapl_avx512-uint8list.obj `if test -f 'uint8list.c'; then $(CYGPATH_W) 'uint8list.c'; else $(CYGPATH_W) '$(srcdir)/uint8list.c'; fi` -gmapl_nosimd-stopwatch.o: stopwatch.c -@am__fastdepCC_TRUE@ $(AM_V_CC)$(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) $(gmapl_nosimd_CFLAGS) $(CFLAGS) -MT gmapl_nosimd-stopwatch.o -MD -MP -MF $(DEPDIR)/gmapl_nosimd-stopwatch.Tpo -c -o gmapl_nosimd-stopwatch.o `test -f 'stopwatch.c' || echo '$(srcdir)/'`stopwatch.c -@am__fastdepCC_TRUE@ $(AM_V_at)$(am__mv) $(DEPDIR)/gmapl_nosimd-stopwatch.Tpo $(DEPDIR)/gmapl_nosimd-stopwatch.Po -@AMDEP_TRUE@@am__fastdepCC_FALSE@ $(AM_V_CC)source='stopwatch.c' object='gmapl_nosimd-stopwatch.o' libtool=no @AMDEPBACKSLASH@ +gmapl_avx512-stopwatch.o: stopwatch.c +@am__fastdepCC_TRUE@ $(AM_V_CC)$(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) $(gmapl_avx512_CFLAGS) $(CFLAGS) -MT gmapl_avx512-stopwatch.o -MD -MP -MF $(DEPDIR)/gmapl_avx512-stopwatch.Tpo -c -o gmapl_avx512-stopwatch.o `test -f 'stopwatch.c' || echo '$(srcdir)/'`stopwatch.c +@am__fastdepCC_TRUE@ $(AM_V_at)$(am__mv) $(DEPDIR)/gmapl_avx512-stopwatch.Tpo $(DEPDIR)/gmapl_avx512-stopwatch.Po +@AMDEP_TRUE@@am__fastdepCC_FALSE@ $(AM_V_CC)source='stopwatch.c' object='gmapl_avx512-stopwatch.o' libtool=no @AMDEPBACKSLASH@ @AMDEP_TRUE@@am__fastdepCC_FALSE@ DEPDIR=$(DEPDIR) $(CCDEPMODE) $(depcomp) @AMDEPBACKSLASH@ -@am__fastdepCC_FALSE@ $(AM_V_CC@am__nodep@)$(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) $(gmapl_nosimd_CFLAGS) $(CFLAGS) -c -o gmapl_nosimd-stopwatch.o `test -f 'stopwatch.c' || echo '$(srcdir)/'`stopwatch.c +@am__fastdepCC_FALSE@ $(AM_V_CC@am__nodep@)$(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) $(gmapl_avx512_CFLAGS) $(CFLAGS) -c -o gmapl_avx512-stopwatch.o `test -f 'stopwatch.c' || echo '$(srcdir)/'`stopwatch.c -gmapl_nosimd-stopwatch.obj: stopwatch.c -@am__fastdepCC_TRUE@ $(AM_V_CC)$(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) $(gmapl_nosimd_CFLAGS) $(CFLAGS) -MT gmapl_nosimd-stopwatch.obj -MD -MP -MF $(DEPDIR)/gmapl_nosimd-stopwatch.Tpo -c -o gmapl_nosimd-stopwatch.obj `if test -f 'stopwatch.c'; then $(CYGPATH_W) 'stopwatch.c'; else $(CYGPATH_W) '$(srcdir)/stopwatch.c'; fi` -@am__fastdepCC_TRUE@ $(AM_V_at)$(am__mv) $(DEPDIR)/gmapl_nosimd-stopwatch.Tpo $(DEPDIR)/gmapl_nosimd-stopwatch.Po -@AMDEP_TRUE@@am__fastdepCC_FALSE@ $(AM_V_CC)source='stopwatch.c' object='gmapl_nosimd-stopwatch.obj' libtool=no @AMDEPBACKSLASH@ +gmapl_avx512-stopwatch.obj: stopwatch.c +@am__fastdepCC_TRUE@ $(AM_V_CC)$(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) $(gmapl_avx512_CFLAGS) $(CFLAGS) -MT gmapl_avx512-stopwatch.obj -MD -MP -MF $(DEPDIR)/gmapl_avx512-stopwatch.Tpo -c -o gmapl_avx512-stopwatch.obj `if test -f 'stopwatch.c'; then $(CYGPATH_W) 'stopwatch.c'; else $(CYGPATH_W) '$(srcdir)/stopwatch.c'; fi` +@am__fastdepCC_TRUE@ $(AM_V_at)$(am__mv) $(DEPDIR)/gmapl_avx512-stopwatch.Tpo $(DEPDIR)/gmapl_avx512-stopwatch.Po +@AMDEP_TRUE@@am__fastdepCC_FALSE@ $(AM_V_CC)source='stopwatch.c' object='gmapl_avx512-stopwatch.obj' libtool=no @AMDEPBACKSLASH@ @AMDEP_TRUE@@am__fastdepCC_FALSE@ DEPDIR=$(DEPDIR) $(CCDEPMODE) $(depcomp) @AMDEPBACKSLASH@ -@am__fastdepCC_FALSE@ $(AM_V_CC@am__nodep@)$(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) $(gmapl_nosimd_CFLAGS) $(CFLAGS) -c -o gmapl_nosimd-stopwatch.obj `if test -f 'stopwatch.c'; then $(CYGPATH_W) 'stopwatch.c'; else $(CYGPATH_W) '$(srcdir)/stopwatch.c'; fi` +@am__fastdepCC_FALSE@ $(AM_V_CC@am__nodep@)$(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) $(gmapl_avx512_CFLAGS) $(CFLAGS) -c -o gmapl_avx512-stopwatch.obj `if test -f 'stopwatch.c'; then $(CYGPATH_W) 'stopwatch.c'; else $(CYGPATH_W) '$(srcdir)/stopwatch.c'; fi` -gmapl_nosimd-semaphore.o: semaphore.c -@am__fastdepCC_TRUE@ $(AM_V_CC)$(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) $(gmapl_nosimd_CFLAGS) $(CFLAGS) -MT gmapl_nosimd-semaphore.o -MD -MP -MF $(DEPDIR)/gmapl_nosimd-semaphore.Tpo -c -o gmapl_nosimd-semaphore.o `test -f 'semaphore.c' || echo '$(srcdir)/'`semaphore.c -@am__fastdepCC_TRUE@ $(AM_V_at)$(am__mv) $(DEPDIR)/gmapl_nosimd-semaphore.Tpo $(DEPDIR)/gmapl_nosimd-semaphore.Po -@AMDEP_TRUE@@am__fastdepCC_FALSE@ $(AM_V_CC)source='semaphore.c' object='gmapl_nosimd-semaphore.o' libtool=no @AMDEPBACKSLASH@ +gmapl_avx512-semaphore.o: semaphore.c +@am__fastdepCC_TRUE@ $(AM_V_CC)$(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) $(gmapl_avx512_CFLAGS) $(CFLAGS) -MT gmapl_avx512-semaphore.o -MD -MP -MF $(DEPDIR)/gmapl_avx512-semaphore.Tpo -c -o gmapl_avx512-semaphore.o `test -f 'semaphore.c' || echo '$(srcdir)/'`semaphore.c +@am__fastdepCC_TRUE@ $(AM_V_at)$(am__mv) $(DEPDIR)/gmapl_avx512-semaphore.Tpo $(DEPDIR)/gmapl_avx512-semaphore.Po +@AMDEP_TRUE@@am__fastdepCC_FALSE@ $(AM_V_CC)source='semaphore.c' object='gmapl_avx512-semaphore.o' libtool=no @AMDEPBACKSLASH@ @AMDEP_TRUE@@am__fastdepCC_FALSE@ DEPDIR=$(DEPDIR) $(CCDEPMODE) $(depcomp) @AMDEPBACKSLASH@ -@am__fastdepCC_FALSE@ $(AM_V_CC@am__nodep@)$(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) $(gmapl_nosimd_CFLAGS) $(CFLAGS) -c -o gmapl_nosimd-semaphore.o `test -f 'semaphore.c' || echo '$(srcdir)/'`semaphore.c +@am__fastdepCC_FALSE@ $(AM_V_CC@am__nodep@)$(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) $(gmapl_avx512_CFLAGS) $(CFLAGS) -c -o gmapl_avx512-semaphore.o `test -f 'semaphore.c' || echo '$(srcdir)/'`semaphore.c -gmapl_nosimd-semaphore.obj: semaphore.c -@am__fastdepCC_TRUE@ $(AM_V_CC)$(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) $(gmapl_nosimd_CFLAGS) $(CFLAGS) -MT gmapl_nosimd-semaphore.obj -MD -MP -MF $(DEPDIR)/gmapl_nosimd-semaphore.Tpo -c -o gmapl_nosimd-semaphore.obj `if test -f 'semaphore.c'; then $(CYGPATH_W) 'semaphore.c'; else $(CYGPATH_W) '$(srcdir)/semaphore.c'; fi` -@am__fastdepCC_TRUE@ $(AM_V_at)$(am__mv) $(DEPDIR)/gmapl_nosimd-semaphore.Tpo $(DEPDIR)/gmapl_nosimd-semaphore.Po -@AMDEP_TRUE@@am__fastdepCC_FALSE@ $(AM_V_CC)source='semaphore.c' object='gmapl_nosimd-semaphore.obj' libtool=no @AMDEPBACKSLASH@ +gmapl_avx512-semaphore.obj: semaphore.c +@am__fastdepCC_TRUE@ $(AM_V_CC)$(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) $(gmapl_avx512_CFLAGS) $(CFLAGS) -MT gmapl_avx512-semaphore.obj -MD -MP -MF $(DEPDIR)/gmapl_avx512-semaphore.Tpo -c -o gmapl_avx512-semaphore.obj `if test -f 'semaphore.c'; then $(CYGPATH_W) 'semaphore.c'; else $(CYGPATH_W) '$(srcdir)/semaphore.c'; fi` +@am__fastdepCC_TRUE@ $(AM_V_at)$(am__mv) $(DEPDIR)/gmapl_avx512-semaphore.Tpo $(DEPDIR)/gmapl_avx512-semaphore.Po +@AMDEP_TRUE@@am__fastdepCC_FALSE@ $(AM_V_CC)source='semaphore.c' object='gmapl_avx512-semaphore.obj' libtool=no @AMDEPBACKSLASH@ @AMDEP_TRUE@@am__fastdepCC_FALSE@ DEPDIR=$(DEPDIR) $(CCDEPMODE) $(depcomp) @AMDEPBACKSLASH@ -@am__fastdepCC_FALSE@ $(AM_V_CC@am__nodep@)$(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) $(gmapl_nosimd_CFLAGS) $(CFLAGS) -c -o gmapl_nosimd-semaphore.obj `if test -f 'semaphore.c'; then $(CYGPATH_W) 'semaphore.c'; else $(CYGPATH_W) '$(srcdir)/semaphore.c'; fi` +@am__fastdepCC_FALSE@ $(AM_V_CC@am__nodep@)$(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) $(gmapl_avx512_CFLAGS) $(CFLAGS) -c -o gmapl_avx512-semaphore.obj `if test -f 'semaphore.c'; then $(CYGPATH_W) 'semaphore.c'; else $(CYGPATH_W) '$(srcdir)/semaphore.c'; fi` -gmapl_nosimd-access.o: access.c -@am__fastdepCC_TRUE@ $(AM_V_CC)$(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) $(gmapl_nosimd_CFLAGS) $(CFLAGS) -MT gmapl_nosimd-access.o -MD -MP -MF $(DEPDIR)/gmapl_nosimd-access.Tpo -c -o gmapl_nosimd-access.o `test -f 'access.c' || echo '$(srcdir)/'`access.c -@am__fastdepCC_TRUE@ $(AM_V_at)$(am__mv) $(DEPDIR)/gmapl_nosimd-access.Tpo $(DEPDIR)/gmapl_nosimd-access.Po -@AMDEP_TRUE@@am__fastdepCC_FALSE@ $(AM_V_CC)source='access.c' object='gmapl_nosimd-access.o' libtool=no @AMDEPBACKSLASH@ +gmapl_avx512-access.o: access.c +@am__fastdepCC_TRUE@ $(AM_V_CC)$(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) $(gmapl_avx512_CFLAGS) $(CFLAGS) -MT gmapl_avx512-access.o -MD -MP -MF $(DEPDIR)/gmapl_avx512-access.Tpo -c -o gmapl_avx512-access.o `test -f 'access.c' || echo '$(srcdir)/'`access.c +@am__fastdepCC_TRUE@ $(AM_V_at)$(am__mv) $(DEPDIR)/gmapl_avx512-access.Tpo $(DEPDIR)/gmapl_avx512-access.Po +@AMDEP_TRUE@@am__fastdepCC_FALSE@ $(AM_V_CC)source='access.c' object='gmapl_avx512-access.o' libtool=no @AMDEPBACKSLASH@ @AMDEP_TRUE@@am__fastdepCC_FALSE@ DEPDIR=$(DEPDIR) $(CCDEPMODE) $(depcomp) @AMDEPBACKSLASH@ -@am__fastdepCC_FALSE@ $(AM_V_CC@am__nodep@)$(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) $(gmapl_nosimd_CFLAGS) $(CFLAGS) -c -o gmapl_nosimd-access.o `test -f 'access.c' || echo '$(srcdir)/'`access.c +@am__fastdepCC_FALSE@ $(AM_V_CC@am__nodep@)$(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) $(gmapl_avx512_CFLAGS) $(CFLAGS) -c -o gmapl_avx512-access.o `test -f 'access.c' || echo '$(srcdir)/'`access.c -gmapl_nosimd-access.obj: access.c -@am__fastdepCC_TRUE@ $(AM_V_CC)$(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) $(gmapl_nosimd_CFLAGS) $(CFLAGS) -MT gmapl_nosimd-access.obj -MD -MP -MF $(DEPDIR)/gmapl_nosimd-access.Tpo -c -o gmapl_nosimd-access.obj `if test -f 'access.c'; then $(CYGPATH_W) 'access.c'; else $(CYGPATH_W) '$(srcdir)/access.c'; fi` -@am__fastdepCC_TRUE@ $(AM_V_at)$(am__mv) $(DEPDIR)/gmapl_nosimd-access.Tpo $(DEPDIR)/gmapl_nosimd-access.Po -@AMDEP_TRUE@@am__fastdepCC_FALSE@ $(AM_V_CC)source='access.c' object='gmapl_nosimd-access.obj' libtool=no @AMDEPBACKSLASH@ +gmapl_avx512-access.obj: access.c +@am__fastdepCC_TRUE@ $(AM_V_CC)$(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) $(gmapl_avx512_CFLAGS) $(CFLAGS) -MT gmapl_avx512-access.obj -MD -MP -MF $(DEPDIR)/gmapl_avx512-access.Tpo -c -o gmapl_avx512-access.obj `if test -f 'access.c'; then $(CYGPATH_W) 'access.c'; else $(CYGPATH_W) '$(srcdir)/access.c'; fi` +@am__fastdepCC_TRUE@ $(AM_V_at)$(am__mv) $(DEPDIR)/gmapl_avx512-access.Tpo $(DEPDIR)/gmapl_avx512-access.Po +@AMDEP_TRUE@@am__fastdepCC_FALSE@ $(AM_V_CC)source='access.c' object='gmapl_avx512-access.obj' libtool=no @AMDEPBACKSLASH@ @AMDEP_TRUE@@am__fastdepCC_FALSE@ DEPDIR=$(DEPDIR) $(CCDEPMODE) $(depcomp) @AMDEPBACKSLASH@ -@am__fastdepCC_FALSE@ $(AM_V_CC@am__nodep@)$(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) $(gmapl_nosimd_CFLAGS) $(CFLAGS) -c -o gmapl_nosimd-access.obj `if test -f 'access.c'; then $(CYGPATH_W) 'access.c'; else $(CYGPATH_W) '$(srcdir)/access.c'; fi` +@am__fastdepCC_FALSE@ $(AM_V_CC@am__nodep@)$(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) $(gmapl_avx512_CFLAGS) $(CFLAGS) -c -o gmapl_avx512-access.obj `if test -f 'access.c'; then $(CYGPATH_W) 'access.c'; else $(CYGPATH_W) '$(srcdir)/access.c'; fi` -gmapl_nosimd-filestring.o: filestring.c -@am__fastdepCC_TRUE@ $(AM_V_CC)$(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) $(gmapl_nosimd_CFLAGS) $(CFLAGS) -MT gmapl_nosimd-filestring.o -MD -MP -MF $(DEPDIR)/gmapl_nosimd-filestring.Tpo -c -o gmapl_nosimd-filestring.o `test -f 'filestring.c' || echo '$(srcdir)/'`filestring.c -@am__fastdepCC_TRUE@ $(AM_V_at)$(am__mv) $(DEPDIR)/gmapl_nosimd-filestring.Tpo $(DEPDIR)/gmapl_nosimd-filestring.Po -@AMDEP_TRUE@@am__fastdepCC_FALSE@ $(AM_V_CC)source='filestring.c' object='gmapl_nosimd-filestring.o' libtool=no @AMDEPBACKSLASH@ +gmapl_avx512-filestring.o: filestring.c +@am__fastdepCC_TRUE@ $(AM_V_CC)$(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) $(gmapl_avx512_CFLAGS) $(CFLAGS) -MT gmapl_avx512-filestring.o -MD -MP -MF $(DEPDIR)/gmapl_avx512-filestring.Tpo -c -o gmapl_avx512-filestring.o `test -f 'filestring.c' || echo '$(srcdir)/'`filestring.c +@am__fastdepCC_TRUE@ $(AM_V_at)$(am__mv) $(DEPDIR)/gmapl_avx512-filestring.Tpo $(DEPDIR)/gmapl_avx512-filestring.Po +@AMDEP_TRUE@@am__fastdepCC_FALSE@ $(AM_V_CC)source='filestring.c' object='gmapl_avx512-filestring.o' libtool=no @AMDEPBACKSLASH@ @AMDEP_TRUE@@am__fastdepCC_FALSE@ DEPDIR=$(DEPDIR) $(CCDEPMODE) $(depcomp) @AMDEPBACKSLASH@ -@am__fastdepCC_FALSE@ $(AM_V_CC@am__nodep@)$(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) $(gmapl_nosimd_CFLAGS) $(CFLAGS) -c -o gmapl_nosimd-filestring.o `test -f 'filestring.c' || echo '$(srcdir)/'`filestring.c +@am__fastdepCC_FALSE@ $(AM_V_CC@am__nodep@)$(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) $(gmapl_avx512_CFLAGS) $(CFLAGS) -c -o gmapl_avx512-filestring.o `test -f 'filestring.c' || echo '$(srcdir)/'`filestring.c -gmapl_nosimd-filestring.obj: filestring.c +gmapl_avx512-filestring.obj: filestring.c +@am__fastdepCC_TRUE@ $(AM_V_CC)$(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) $(gmapl_avx512_CFLAGS) $(CFLAGS) -MT gmapl_avx512-filestring.obj -MD -MP -MF $(DEPDIR)/gmapl_avx512-filestring.Tpo -c -o gmapl_avx512-filestring.obj `if test -f 'filestring.c'; then $(CYGPATH_W) 'filestring.c'; else $(CYGPATH_W) '$(srcdir)/filestring.c'; fi` +@am__fastdepCC_TRUE@ $(AM_V_at)$(am__mv) $(DEPDIR)/gmapl_avx512-filestring.Tpo $(DEPDIR)/gmapl_avx512-filestring.Po +@AMDEP_TRUE@@am__fastdepCC_FALSE@ $(AM_V_CC)source='filestring.c' object='gmapl_avx512-filestring.obj' libtool=no @AMDEPBACKSLASH@ +@AMDEP_TRUE@@am__fastdepCC_FALSE@ DEPDIR=$(DEPDIR) $(CCDEPMODE) $(depcomp) @AMDEPBACKSLASH@ +@am__fastdepCC_FALSE@ $(AM_V_CC@am__nodep@)$(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) $(gmapl_avx512_CFLAGS) $(CFLAGS) -c -o gmapl_avx512-filestring.obj `if test -f 'filestring.c'; then $(CYGPATH_W) 'filestring.c'; else $(CYGPATH_W) '$(srcdir)/filestring.c'; fi` + +gmapl_avx512-iit-read-univ.o: iit-read-univ.c +@am__fastdepCC_TRUE@ $(AM_V_CC)$(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) $(gmapl_avx512_CFLAGS) $(CFLAGS) -MT gmapl_avx512-iit-read-univ.o -MD -MP -MF $(DEPDIR)/gmapl_avx512-iit-read-univ.Tpo -c -o gmapl_avx512-iit-read-univ.o `test -f 'iit-read-univ.c' || echo '$(srcdir)/'`iit-read-univ.c +@am__fastdepCC_TRUE@ $(AM_V_at)$(am__mv) $(DEPDIR)/gmapl_avx512-iit-read-univ.Tpo $(DEPDIR)/gmapl_avx512-iit-read-univ.Po +@AMDEP_TRUE@@am__fastdepCC_FALSE@ $(AM_V_CC)source='iit-read-univ.c' object='gmapl_avx512-iit-read-univ.o' libtool=no @AMDEPBACKSLASH@ +@AMDEP_TRUE@@am__fastdepCC_FALSE@ DEPDIR=$(DEPDIR) $(CCDEPMODE) $(depcomp) @AMDEPBACKSLASH@ +@am__fastdepCC_FALSE@ $(AM_V_CC@am__nodep@)$(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) $(gmapl_avx512_CFLAGS) $(CFLAGS) -c -o gmapl_avx512-iit-read-univ.o `test -f 'iit-read-univ.c' || echo '$(srcdir)/'`iit-read-univ.c + +gmapl_avx512-iit-read-univ.obj: iit-read-univ.c +@am__fastdepCC_TRUE@ $(AM_V_CC)$(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) $(gmapl_avx512_CFLAGS) $(CFLAGS) -MT gmapl_avx512-iit-read-univ.obj -MD -MP -MF $(DEPDIR)/gmapl_avx512-iit-read-univ.Tpo -c -o gmapl_avx512-iit-read-univ.obj `if test -f 'iit-read-univ.c'; then $(CYGPATH_W) 'iit-read-univ.c'; else $(CYGPATH_W) '$(srcdir)/iit-read-univ.c'; fi` +@am__fastdepCC_TRUE@ $(AM_V_at)$(am__mv) $(DEPDIR)/gmapl_avx512-iit-read-univ.Tpo $(DEPDIR)/gmapl_avx512-iit-read-univ.Po +@AMDEP_TRUE@@am__fastdepCC_FALSE@ $(AM_V_CC)source='iit-read-univ.c' object='gmapl_avx512-iit-read-univ.obj' libtool=no @AMDEPBACKSLASH@ +@AMDEP_TRUE@@am__fastdepCC_FALSE@ DEPDIR=$(DEPDIR) $(CCDEPMODE) $(depcomp) @AMDEPBACKSLASH@ +@am__fastdepCC_FALSE@ $(AM_V_CC@am__nodep@)$(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) $(gmapl_avx512_CFLAGS) $(CFLAGS) -c -o gmapl_avx512-iit-read-univ.obj `if test -f 'iit-read-univ.c'; then $(CYGPATH_W) 'iit-read-univ.c'; else $(CYGPATH_W) '$(srcdir)/iit-read-univ.c'; fi` + +gmapl_avx512-iit-read.o: iit-read.c +@am__fastdepCC_TRUE@ $(AM_V_CC)$(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) $(gmapl_avx512_CFLAGS) $(CFLAGS) -MT gmapl_avx512-iit-read.o -MD -MP -MF $(DEPDIR)/gmapl_avx512-iit-read.Tpo -c -o gmapl_avx512-iit-read.o `test -f 'iit-read.c' || echo '$(srcdir)/'`iit-read.c +@am__fastdepCC_TRUE@ $(AM_V_at)$(am__mv) $(DEPDIR)/gmapl_avx512-iit-read.Tpo $(DEPDIR)/gmapl_avx512-iit-read.Po +@AMDEP_TRUE@@am__fastdepCC_FALSE@ $(AM_V_CC)source='iit-read.c' object='gmapl_avx512-iit-read.o' libtool=no @AMDEPBACKSLASH@ +@AMDEP_TRUE@@am__fastdepCC_FALSE@ DEPDIR=$(DEPDIR) $(CCDEPMODE) $(depcomp) @AMDEPBACKSLASH@ +@am__fastdepCC_FALSE@ $(AM_V_CC@am__nodep@)$(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) $(gmapl_avx512_CFLAGS) $(CFLAGS) -c -o gmapl_avx512-iit-read.o `test -f 'iit-read.c' || echo '$(srcdir)/'`iit-read.c + +gmapl_avx512-iit-read.obj: iit-read.c +@am__fastdepCC_TRUE@ $(AM_V_CC)$(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) $(gmapl_avx512_CFLAGS) $(CFLAGS) -MT gmapl_avx512-iit-read.obj -MD -MP -MF $(DEPDIR)/gmapl_avx512-iit-read.Tpo -c -o gmapl_avx512-iit-read.obj `if test -f 'iit-read.c'; then $(CYGPATH_W) 'iit-read.c'; else $(CYGPATH_W) '$(srcdir)/iit-read.c'; fi` +@am__fastdepCC_TRUE@ $(AM_V_at)$(am__mv) $(DEPDIR)/gmapl_avx512-iit-read.Tpo $(DEPDIR)/gmapl_avx512-iit-read.Po +@AMDEP_TRUE@@am__fastdepCC_FALSE@ $(AM_V_CC)source='iit-read.c' object='gmapl_avx512-iit-read.obj' libtool=no @AMDEPBACKSLASH@ +@AMDEP_TRUE@@am__fastdepCC_FALSE@ DEPDIR=$(DEPDIR) $(CCDEPMODE) $(depcomp) @AMDEPBACKSLASH@ +@am__fastdepCC_FALSE@ $(AM_V_CC@am__nodep@)$(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) $(gmapl_avx512_CFLAGS) $(CFLAGS) -c -o gmapl_avx512-iit-read.obj `if test -f 'iit-read.c'; then $(CYGPATH_W) 'iit-read.c'; else $(CYGPATH_W) '$(srcdir)/iit-read.c'; fi` + +gmapl_avx512-md5.o: md5.c +@am__fastdepCC_TRUE@ $(AM_V_CC)$(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) $(gmapl_avx512_CFLAGS) $(CFLAGS) -MT gmapl_avx512-md5.o -MD -MP -MF $(DEPDIR)/gmapl_avx512-md5.Tpo -c -o gmapl_avx512-md5.o `test -f 'md5.c' || echo '$(srcdir)/'`md5.c +@am__fastdepCC_TRUE@ $(AM_V_at)$(am__mv) $(DEPDIR)/gmapl_avx512-md5.Tpo $(DEPDIR)/gmapl_avx512-md5.Po +@AMDEP_TRUE@@am__fastdepCC_FALSE@ $(AM_V_CC)source='md5.c' object='gmapl_avx512-md5.o' libtool=no @AMDEPBACKSLASH@ +@AMDEP_TRUE@@am__fastdepCC_FALSE@ DEPDIR=$(DEPDIR) $(CCDEPMODE) $(depcomp) @AMDEPBACKSLASH@ +@am__fastdepCC_FALSE@ $(AM_V_CC@am__nodep@)$(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) $(gmapl_avx512_CFLAGS) $(CFLAGS) -c -o gmapl_avx512-md5.o `test -f 'md5.c' || echo '$(srcdir)/'`md5.c + +gmapl_avx512-md5.obj: md5.c +@am__fastdepCC_TRUE@ $(AM_V_CC)$(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) $(gmapl_avx512_CFLAGS) $(CFLAGS) -MT gmapl_avx512-md5.obj -MD -MP -MF $(DEPDIR)/gmapl_avx512-md5.Tpo -c -o gmapl_avx512-md5.obj `if test -f 'md5.c'; then $(CYGPATH_W) 'md5.c'; else $(CYGPATH_W) '$(srcdir)/md5.c'; fi` +@am__fastdepCC_TRUE@ $(AM_V_at)$(am__mv) $(DEPDIR)/gmapl_avx512-md5.Tpo $(DEPDIR)/gmapl_avx512-md5.Po +@AMDEP_TRUE@@am__fastdepCC_FALSE@ $(AM_V_CC)source='md5.c' object='gmapl_avx512-md5.obj' libtool=no @AMDEPBACKSLASH@ +@AMDEP_TRUE@@am__fastdepCC_FALSE@ DEPDIR=$(DEPDIR) $(CCDEPMODE) $(depcomp) @AMDEPBACKSLASH@ +@am__fastdepCC_FALSE@ $(AM_V_CC@am__nodep@)$(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) $(gmapl_avx512_CFLAGS) $(CFLAGS) -c -o gmapl_avx512-md5.obj `if test -f 'md5.c'; then $(CYGPATH_W) 'md5.c'; else $(CYGPATH_W) '$(srcdir)/md5.c'; fi` + +gmapl_avx512-bzip2.o: bzip2.c +@am__fastdepCC_TRUE@ $(AM_V_CC)$(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) $(gmapl_avx512_CFLAGS) $(CFLAGS) -MT gmapl_avx512-bzip2.o -MD -MP -MF $(DEPDIR)/gmapl_avx512-bzip2.Tpo -c -o gmapl_avx512-bzip2.o `test -f 'bzip2.c' || echo '$(srcdir)/'`bzip2.c +@am__fastdepCC_TRUE@ $(AM_V_at)$(am__mv) $(DEPDIR)/gmapl_avx512-bzip2.Tpo $(DEPDIR)/gmapl_avx512-bzip2.Po +@AMDEP_TRUE@@am__fastdepCC_FALSE@ $(AM_V_CC)source='bzip2.c' object='gmapl_avx512-bzip2.o' libtool=no @AMDEPBACKSLASH@ +@AMDEP_TRUE@@am__fastdepCC_FALSE@ DEPDIR=$(DEPDIR) $(CCDEPMODE) $(depcomp) @AMDEPBACKSLASH@ +@am__fastdepCC_FALSE@ $(AM_V_CC@am__nodep@)$(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) $(gmapl_avx512_CFLAGS) $(CFLAGS) -c -o gmapl_avx512-bzip2.o `test -f 'bzip2.c' || echo '$(srcdir)/'`bzip2.c + +gmapl_avx512-bzip2.obj: bzip2.c +@am__fastdepCC_TRUE@ $(AM_V_CC)$(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) $(gmapl_avx512_CFLAGS) $(CFLAGS) -MT gmapl_avx512-bzip2.obj -MD -MP -MF $(DEPDIR)/gmapl_avx512-bzip2.Tpo -c -o gmapl_avx512-bzip2.obj `if test -f 'bzip2.c'; then $(CYGPATH_W) 'bzip2.c'; else $(CYGPATH_W) '$(srcdir)/bzip2.c'; fi` +@am__fastdepCC_TRUE@ $(AM_V_at)$(am__mv) $(DEPDIR)/gmapl_avx512-bzip2.Tpo $(DEPDIR)/gmapl_avx512-bzip2.Po +@AMDEP_TRUE@@am__fastdepCC_FALSE@ $(AM_V_CC)source='bzip2.c' object='gmapl_avx512-bzip2.obj' libtool=no @AMDEPBACKSLASH@ +@AMDEP_TRUE@@am__fastdepCC_FALSE@ DEPDIR=$(DEPDIR) $(CCDEPMODE) $(depcomp) @AMDEPBACKSLASH@ +@am__fastdepCC_FALSE@ $(AM_V_CC@am__nodep@)$(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) $(gmapl_avx512_CFLAGS) $(CFLAGS) -c -o gmapl_avx512-bzip2.obj `if test -f 'bzip2.c'; then $(CYGPATH_W) 'bzip2.c'; else $(CYGPATH_W) '$(srcdir)/bzip2.c'; fi` + +gmapl_avx512-sequence.o: sequence.c +@am__fastdepCC_TRUE@ $(AM_V_CC)$(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) $(gmapl_avx512_CFLAGS) $(CFLAGS) -MT gmapl_avx512-sequence.o -MD -MP -MF $(DEPDIR)/gmapl_avx512-sequence.Tpo -c -o gmapl_avx512-sequence.o `test -f 'sequence.c' || echo '$(srcdir)/'`sequence.c +@am__fastdepCC_TRUE@ $(AM_V_at)$(am__mv) $(DEPDIR)/gmapl_avx512-sequence.Tpo $(DEPDIR)/gmapl_avx512-sequence.Po +@AMDEP_TRUE@@am__fastdepCC_FALSE@ $(AM_V_CC)source='sequence.c' object='gmapl_avx512-sequence.o' libtool=no @AMDEPBACKSLASH@ +@AMDEP_TRUE@@am__fastdepCC_FALSE@ DEPDIR=$(DEPDIR) $(CCDEPMODE) $(depcomp) @AMDEPBACKSLASH@ +@am__fastdepCC_FALSE@ $(AM_V_CC@am__nodep@)$(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) $(gmapl_avx512_CFLAGS) $(CFLAGS) -c -o gmapl_avx512-sequence.o `test -f 'sequence.c' || echo '$(srcdir)/'`sequence.c + +gmapl_avx512-sequence.obj: sequence.c +@am__fastdepCC_TRUE@ $(AM_V_CC)$(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) $(gmapl_avx512_CFLAGS) $(CFLAGS) -MT gmapl_avx512-sequence.obj -MD -MP -MF $(DEPDIR)/gmapl_avx512-sequence.Tpo -c -o gmapl_avx512-sequence.obj `if test -f 'sequence.c'; then $(CYGPATH_W) 'sequence.c'; else $(CYGPATH_W) '$(srcdir)/sequence.c'; fi` +@am__fastdepCC_TRUE@ $(AM_V_at)$(am__mv) $(DEPDIR)/gmapl_avx512-sequence.Tpo $(DEPDIR)/gmapl_avx512-sequence.Po +@AMDEP_TRUE@@am__fastdepCC_FALSE@ $(AM_V_CC)source='sequence.c' object='gmapl_avx512-sequence.obj' libtool=no @AMDEPBACKSLASH@ +@AMDEP_TRUE@@am__fastdepCC_FALSE@ DEPDIR=$(DEPDIR) $(CCDEPMODE) $(depcomp) @AMDEPBACKSLASH@ +@am__fastdepCC_FALSE@ $(AM_V_CC@am__nodep@)$(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) $(gmapl_avx512_CFLAGS) $(CFLAGS) -c -o gmapl_avx512-sequence.obj `if test -f 'sequence.c'; then $(CYGPATH_W) 'sequence.c'; else $(CYGPATH_W) '$(srcdir)/sequence.c'; fi` + +gmapl_avx512-reader.o: reader.c +@am__fastdepCC_TRUE@ $(AM_V_CC)$(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) $(gmapl_avx512_CFLAGS) $(CFLAGS) -MT gmapl_avx512-reader.o -MD -MP -MF $(DEPDIR)/gmapl_avx512-reader.Tpo -c -o gmapl_avx512-reader.o `test -f 'reader.c' || echo '$(srcdir)/'`reader.c +@am__fastdepCC_TRUE@ $(AM_V_at)$(am__mv) $(DEPDIR)/gmapl_avx512-reader.Tpo $(DEPDIR)/gmapl_avx512-reader.Po +@AMDEP_TRUE@@am__fastdepCC_FALSE@ $(AM_V_CC)source='reader.c' object='gmapl_avx512-reader.o' libtool=no @AMDEPBACKSLASH@ +@AMDEP_TRUE@@am__fastdepCC_FALSE@ DEPDIR=$(DEPDIR) $(CCDEPMODE) $(depcomp) @AMDEPBACKSLASH@ +@am__fastdepCC_FALSE@ $(AM_V_CC@am__nodep@)$(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) $(gmapl_avx512_CFLAGS) $(CFLAGS) -c -o gmapl_avx512-reader.o `test -f 'reader.c' || echo '$(srcdir)/'`reader.c + +gmapl_avx512-reader.obj: reader.c +@am__fastdepCC_TRUE@ $(AM_V_CC)$(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) $(gmapl_avx512_CFLAGS) $(CFLAGS) -MT gmapl_avx512-reader.obj -MD -MP -MF $(DEPDIR)/gmapl_avx512-reader.Tpo -c -o gmapl_avx512-reader.obj `if test -f 'reader.c'; then $(CYGPATH_W) 'reader.c'; else $(CYGPATH_W) '$(srcdir)/reader.c'; fi` +@am__fastdepCC_TRUE@ $(AM_V_at)$(am__mv) $(DEPDIR)/gmapl_avx512-reader.Tpo $(DEPDIR)/gmapl_avx512-reader.Po +@AMDEP_TRUE@@am__fastdepCC_FALSE@ $(AM_V_CC)source='reader.c' object='gmapl_avx512-reader.obj' libtool=no @AMDEPBACKSLASH@ +@AMDEP_TRUE@@am__fastdepCC_FALSE@ DEPDIR=$(DEPDIR) $(CCDEPMODE) $(depcomp) @AMDEPBACKSLASH@ +@am__fastdepCC_FALSE@ $(AM_V_CC@am__nodep@)$(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) $(gmapl_avx512_CFLAGS) $(CFLAGS) -c -o gmapl_avx512-reader.obj `if test -f 'reader.c'; then $(CYGPATH_W) 'reader.c'; else $(CYGPATH_W) '$(srcdir)/reader.c'; fi` + +gmapl_avx512-genomicpos.o: genomicpos.c +@am__fastdepCC_TRUE@ $(AM_V_CC)$(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) $(gmapl_avx512_CFLAGS) $(CFLAGS) -MT gmapl_avx512-genomicpos.o -MD -MP -MF $(DEPDIR)/gmapl_avx512-genomicpos.Tpo -c -o gmapl_avx512-genomicpos.o `test -f 'genomicpos.c' || echo '$(srcdir)/'`genomicpos.c +@am__fastdepCC_TRUE@ $(AM_V_at)$(am__mv) $(DEPDIR)/gmapl_avx512-genomicpos.Tpo $(DEPDIR)/gmapl_avx512-genomicpos.Po +@AMDEP_TRUE@@am__fastdepCC_FALSE@ $(AM_V_CC)source='genomicpos.c' object='gmapl_avx512-genomicpos.o' libtool=no @AMDEPBACKSLASH@ +@AMDEP_TRUE@@am__fastdepCC_FALSE@ DEPDIR=$(DEPDIR) $(CCDEPMODE) $(depcomp) @AMDEPBACKSLASH@ +@am__fastdepCC_FALSE@ $(AM_V_CC@am__nodep@)$(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) $(gmapl_avx512_CFLAGS) $(CFLAGS) -c -o gmapl_avx512-genomicpos.o `test -f 'genomicpos.c' || echo '$(srcdir)/'`genomicpos.c + +gmapl_avx512-genomicpos.obj: genomicpos.c +@am__fastdepCC_TRUE@ $(AM_V_CC)$(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) $(gmapl_avx512_CFLAGS) $(CFLAGS) -MT gmapl_avx512-genomicpos.obj -MD -MP -MF $(DEPDIR)/gmapl_avx512-genomicpos.Tpo -c -o gmapl_avx512-genomicpos.obj `if test -f 'genomicpos.c'; then $(CYGPATH_W) 'genomicpos.c'; else $(CYGPATH_W) '$(srcdir)/genomicpos.c'; fi` +@am__fastdepCC_TRUE@ $(AM_V_at)$(am__mv) $(DEPDIR)/gmapl_avx512-genomicpos.Tpo $(DEPDIR)/gmapl_avx512-genomicpos.Po +@AMDEP_TRUE@@am__fastdepCC_FALSE@ $(AM_V_CC)source='genomicpos.c' object='gmapl_avx512-genomicpos.obj' libtool=no @AMDEPBACKSLASH@ +@AMDEP_TRUE@@am__fastdepCC_FALSE@ DEPDIR=$(DEPDIR) $(CCDEPMODE) $(depcomp) @AMDEPBACKSLASH@ +@am__fastdepCC_FALSE@ $(AM_V_CC@am__nodep@)$(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) $(gmapl_avx512_CFLAGS) $(CFLAGS) -c -o gmapl_avx512-genomicpos.obj `if test -f 'genomicpos.c'; then $(CYGPATH_W) 'genomicpos.c'; else $(CYGPATH_W) '$(srcdir)/genomicpos.c'; fi` + +gmapl_avx512-compress.o: compress.c +@am__fastdepCC_TRUE@ $(AM_V_CC)$(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) $(gmapl_avx512_CFLAGS) $(CFLAGS) -MT gmapl_avx512-compress.o -MD -MP -MF $(DEPDIR)/gmapl_avx512-compress.Tpo -c -o gmapl_avx512-compress.o `test -f 'compress.c' || echo '$(srcdir)/'`compress.c +@am__fastdepCC_TRUE@ $(AM_V_at)$(am__mv) $(DEPDIR)/gmapl_avx512-compress.Tpo $(DEPDIR)/gmapl_avx512-compress.Po +@AMDEP_TRUE@@am__fastdepCC_FALSE@ $(AM_V_CC)source='compress.c' object='gmapl_avx512-compress.o' libtool=no @AMDEPBACKSLASH@ +@AMDEP_TRUE@@am__fastdepCC_FALSE@ DEPDIR=$(DEPDIR) $(CCDEPMODE) $(depcomp) @AMDEPBACKSLASH@ +@am__fastdepCC_FALSE@ $(AM_V_CC@am__nodep@)$(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) $(gmapl_avx512_CFLAGS) $(CFLAGS) -c -o gmapl_avx512-compress.o `test -f 'compress.c' || echo '$(srcdir)/'`compress.c + +gmapl_avx512-compress.obj: compress.c +@am__fastdepCC_TRUE@ $(AM_V_CC)$(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) $(gmapl_avx512_CFLAGS) $(CFLAGS) -MT gmapl_avx512-compress.obj -MD -MP -MF $(DEPDIR)/gmapl_avx512-compress.Tpo -c -o gmapl_avx512-compress.obj `if test -f 'compress.c'; then $(CYGPATH_W) 'compress.c'; else $(CYGPATH_W) '$(srcdir)/compress.c'; fi` +@am__fastdepCC_TRUE@ $(AM_V_at)$(am__mv) $(DEPDIR)/gmapl_avx512-compress.Tpo $(DEPDIR)/gmapl_avx512-compress.Po +@AMDEP_TRUE@@am__fastdepCC_FALSE@ $(AM_V_CC)source='compress.c' object='gmapl_avx512-compress.obj' libtool=no @AMDEPBACKSLASH@ +@AMDEP_TRUE@@am__fastdepCC_FALSE@ DEPDIR=$(DEPDIR) $(CCDEPMODE) $(depcomp) @AMDEPBACKSLASH@ +@am__fastdepCC_FALSE@ $(AM_V_CC@am__nodep@)$(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) $(gmapl_avx512_CFLAGS) $(CFLAGS) -c -o gmapl_avx512-compress.obj `if test -f 'compress.c'; then $(CYGPATH_W) 'compress.c'; else $(CYGPATH_W) '$(srcdir)/compress.c'; fi` + +gmapl_avx512-compress-write.o: compress-write.c +@am__fastdepCC_TRUE@ $(AM_V_CC)$(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) $(gmapl_avx512_CFLAGS) $(CFLAGS) -MT gmapl_avx512-compress-write.o -MD -MP -MF $(DEPDIR)/gmapl_avx512-compress-write.Tpo -c -o gmapl_avx512-compress-write.o `test -f 'compress-write.c' || echo '$(srcdir)/'`compress-write.c +@am__fastdepCC_TRUE@ $(AM_V_at)$(am__mv) $(DEPDIR)/gmapl_avx512-compress-write.Tpo $(DEPDIR)/gmapl_avx512-compress-write.Po +@AMDEP_TRUE@@am__fastdepCC_FALSE@ $(AM_V_CC)source='compress-write.c' object='gmapl_avx512-compress-write.o' libtool=no @AMDEPBACKSLASH@ +@AMDEP_TRUE@@am__fastdepCC_FALSE@ DEPDIR=$(DEPDIR) $(CCDEPMODE) $(depcomp) @AMDEPBACKSLASH@ +@am__fastdepCC_FALSE@ $(AM_V_CC@am__nodep@)$(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) $(gmapl_avx512_CFLAGS) $(CFLAGS) -c -o gmapl_avx512-compress-write.o `test -f 'compress-write.c' || echo '$(srcdir)/'`compress-write.c + +gmapl_avx512-compress-write.obj: compress-write.c +@am__fastdepCC_TRUE@ $(AM_V_CC)$(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) $(gmapl_avx512_CFLAGS) $(CFLAGS) -MT gmapl_avx512-compress-write.obj -MD -MP -MF $(DEPDIR)/gmapl_avx512-compress-write.Tpo -c -o gmapl_avx512-compress-write.obj `if test -f 'compress-write.c'; then $(CYGPATH_W) 'compress-write.c'; else $(CYGPATH_W) '$(srcdir)/compress-write.c'; fi` +@am__fastdepCC_TRUE@ $(AM_V_at)$(am__mv) $(DEPDIR)/gmapl_avx512-compress-write.Tpo $(DEPDIR)/gmapl_avx512-compress-write.Po +@AMDEP_TRUE@@am__fastdepCC_FALSE@ $(AM_V_CC)source='compress-write.c' object='gmapl_avx512-compress-write.obj' libtool=no @AMDEPBACKSLASH@ +@AMDEP_TRUE@@am__fastdepCC_FALSE@ DEPDIR=$(DEPDIR) $(CCDEPMODE) $(depcomp) @AMDEPBACKSLASH@ +@am__fastdepCC_FALSE@ $(AM_V_CC@am__nodep@)$(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) $(gmapl_avx512_CFLAGS) $(CFLAGS) -c -o gmapl_avx512-compress-write.obj `if test -f 'compress-write.c'; then $(CYGPATH_W) 'compress-write.c'; else $(CYGPATH_W) '$(srcdir)/compress-write.c'; fi` + +gmapl_avx512-gbuffer.o: gbuffer.c +@am__fastdepCC_TRUE@ $(AM_V_CC)$(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) $(gmapl_avx512_CFLAGS) $(CFLAGS) -MT gmapl_avx512-gbuffer.o -MD -MP -MF $(DEPDIR)/gmapl_avx512-gbuffer.Tpo -c -o gmapl_avx512-gbuffer.o `test -f 'gbuffer.c' || echo '$(srcdir)/'`gbuffer.c +@am__fastdepCC_TRUE@ $(AM_V_at)$(am__mv) $(DEPDIR)/gmapl_avx512-gbuffer.Tpo $(DEPDIR)/gmapl_avx512-gbuffer.Po +@AMDEP_TRUE@@am__fastdepCC_FALSE@ $(AM_V_CC)source='gbuffer.c' object='gmapl_avx512-gbuffer.o' libtool=no @AMDEPBACKSLASH@ +@AMDEP_TRUE@@am__fastdepCC_FALSE@ DEPDIR=$(DEPDIR) $(CCDEPMODE) $(depcomp) @AMDEPBACKSLASH@ +@am__fastdepCC_FALSE@ $(AM_V_CC@am__nodep@)$(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) $(gmapl_avx512_CFLAGS) $(CFLAGS) -c -o gmapl_avx512-gbuffer.o `test -f 'gbuffer.c' || echo '$(srcdir)/'`gbuffer.c + +gmapl_avx512-gbuffer.obj: gbuffer.c +@am__fastdepCC_TRUE@ $(AM_V_CC)$(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) $(gmapl_avx512_CFLAGS) $(CFLAGS) -MT gmapl_avx512-gbuffer.obj -MD -MP -MF $(DEPDIR)/gmapl_avx512-gbuffer.Tpo -c -o gmapl_avx512-gbuffer.obj `if test -f 'gbuffer.c'; then $(CYGPATH_W) 'gbuffer.c'; else $(CYGPATH_W) '$(srcdir)/gbuffer.c'; fi` +@am__fastdepCC_TRUE@ $(AM_V_at)$(am__mv) $(DEPDIR)/gmapl_avx512-gbuffer.Tpo $(DEPDIR)/gmapl_avx512-gbuffer.Po +@AMDEP_TRUE@@am__fastdepCC_FALSE@ $(AM_V_CC)source='gbuffer.c' object='gmapl_avx512-gbuffer.obj' libtool=no @AMDEPBACKSLASH@ +@AMDEP_TRUE@@am__fastdepCC_FALSE@ DEPDIR=$(DEPDIR) $(CCDEPMODE) $(depcomp) @AMDEPBACKSLASH@ +@am__fastdepCC_FALSE@ $(AM_V_CC@am__nodep@)$(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) $(gmapl_avx512_CFLAGS) $(CFLAGS) -c -o gmapl_avx512-gbuffer.obj `if test -f 'gbuffer.c'; then $(CYGPATH_W) 'gbuffer.c'; else $(CYGPATH_W) '$(srcdir)/gbuffer.c'; fi` + +gmapl_avx512-genome.o: genome.c +@am__fastdepCC_TRUE@ $(AM_V_CC)$(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) $(gmapl_avx512_CFLAGS) $(CFLAGS) -MT gmapl_avx512-genome.o -MD -MP -MF $(DEPDIR)/gmapl_avx512-genome.Tpo -c -o gmapl_avx512-genome.o `test -f 'genome.c' || echo '$(srcdir)/'`genome.c +@am__fastdepCC_TRUE@ $(AM_V_at)$(am__mv) $(DEPDIR)/gmapl_avx512-genome.Tpo $(DEPDIR)/gmapl_avx512-genome.Po +@AMDEP_TRUE@@am__fastdepCC_FALSE@ $(AM_V_CC)source='genome.c' object='gmapl_avx512-genome.o' libtool=no @AMDEPBACKSLASH@ +@AMDEP_TRUE@@am__fastdepCC_FALSE@ DEPDIR=$(DEPDIR) $(CCDEPMODE) $(depcomp) @AMDEPBACKSLASH@ +@am__fastdepCC_FALSE@ $(AM_V_CC@am__nodep@)$(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) $(gmapl_avx512_CFLAGS) $(CFLAGS) -c -o gmapl_avx512-genome.o `test -f 'genome.c' || echo '$(srcdir)/'`genome.c + +gmapl_avx512-genome.obj: genome.c +@am__fastdepCC_TRUE@ $(AM_V_CC)$(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) $(gmapl_avx512_CFLAGS) $(CFLAGS) -MT gmapl_avx512-genome.obj -MD -MP -MF $(DEPDIR)/gmapl_avx512-genome.Tpo -c -o gmapl_avx512-genome.obj `if test -f 'genome.c'; then $(CYGPATH_W) 'genome.c'; else $(CYGPATH_W) '$(srcdir)/genome.c'; fi` +@am__fastdepCC_TRUE@ $(AM_V_at)$(am__mv) $(DEPDIR)/gmapl_avx512-genome.Tpo $(DEPDIR)/gmapl_avx512-genome.Po +@AMDEP_TRUE@@am__fastdepCC_FALSE@ $(AM_V_CC)source='genome.c' object='gmapl_avx512-genome.obj' libtool=no @AMDEPBACKSLASH@ +@AMDEP_TRUE@@am__fastdepCC_FALSE@ DEPDIR=$(DEPDIR) $(CCDEPMODE) $(depcomp) @AMDEPBACKSLASH@ +@am__fastdepCC_FALSE@ $(AM_V_CC@am__nodep@)$(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) $(gmapl_avx512_CFLAGS) $(CFLAGS) -c -o gmapl_avx512-genome.obj `if test -f 'genome.c'; then $(CYGPATH_W) 'genome.c'; else $(CYGPATH_W) '$(srcdir)/genome.c'; fi` + +gmapl_avx512-popcount.o: popcount.c +@am__fastdepCC_TRUE@ $(AM_V_CC)$(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) $(gmapl_avx512_CFLAGS) $(CFLAGS) -MT gmapl_avx512-popcount.o -MD -MP -MF $(DEPDIR)/gmapl_avx512-popcount.Tpo -c -o gmapl_avx512-popcount.o `test -f 'popcount.c' || echo '$(srcdir)/'`popcount.c +@am__fastdepCC_TRUE@ $(AM_V_at)$(am__mv) $(DEPDIR)/gmapl_avx512-popcount.Tpo $(DEPDIR)/gmapl_avx512-popcount.Po +@AMDEP_TRUE@@am__fastdepCC_FALSE@ $(AM_V_CC)source='popcount.c' object='gmapl_avx512-popcount.o' libtool=no @AMDEPBACKSLASH@ +@AMDEP_TRUE@@am__fastdepCC_FALSE@ DEPDIR=$(DEPDIR) $(CCDEPMODE) $(depcomp) @AMDEPBACKSLASH@ +@am__fastdepCC_FALSE@ $(AM_V_CC@am__nodep@)$(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) $(gmapl_avx512_CFLAGS) $(CFLAGS) -c -o gmapl_avx512-popcount.o `test -f 'popcount.c' || echo '$(srcdir)/'`popcount.c + +gmapl_avx512-popcount.obj: popcount.c +@am__fastdepCC_TRUE@ $(AM_V_CC)$(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) $(gmapl_avx512_CFLAGS) $(CFLAGS) -MT gmapl_avx512-popcount.obj -MD -MP -MF $(DEPDIR)/gmapl_avx512-popcount.Tpo -c -o gmapl_avx512-popcount.obj `if test -f 'popcount.c'; then $(CYGPATH_W) 'popcount.c'; else $(CYGPATH_W) '$(srcdir)/popcount.c'; fi` +@am__fastdepCC_TRUE@ $(AM_V_at)$(am__mv) $(DEPDIR)/gmapl_avx512-popcount.Tpo $(DEPDIR)/gmapl_avx512-popcount.Po +@AMDEP_TRUE@@am__fastdepCC_FALSE@ $(AM_V_CC)source='popcount.c' object='gmapl_avx512-popcount.obj' libtool=no @AMDEPBACKSLASH@ +@AMDEP_TRUE@@am__fastdepCC_FALSE@ DEPDIR=$(DEPDIR) $(CCDEPMODE) $(depcomp) @AMDEPBACKSLASH@ +@am__fastdepCC_FALSE@ $(AM_V_CC@am__nodep@)$(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) $(gmapl_avx512_CFLAGS) $(CFLAGS) -c -o gmapl_avx512-popcount.obj `if test -f 'popcount.c'; then $(CYGPATH_W) 'popcount.c'; else $(CYGPATH_W) '$(srcdir)/popcount.c'; fi` + +gmapl_avx512-genome128_hr.o: genome128_hr.c +@am__fastdepCC_TRUE@ $(AM_V_CC)$(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) $(gmapl_avx512_CFLAGS) $(CFLAGS) -MT gmapl_avx512-genome128_hr.o -MD -MP -MF $(DEPDIR)/gmapl_avx512-genome128_hr.Tpo -c -o gmapl_avx512-genome128_hr.o `test -f 'genome128_hr.c' || echo '$(srcdir)/'`genome128_hr.c +@am__fastdepCC_TRUE@ $(AM_V_at)$(am__mv) $(DEPDIR)/gmapl_avx512-genome128_hr.Tpo $(DEPDIR)/gmapl_avx512-genome128_hr.Po +@AMDEP_TRUE@@am__fastdepCC_FALSE@ $(AM_V_CC)source='genome128_hr.c' object='gmapl_avx512-genome128_hr.o' libtool=no @AMDEPBACKSLASH@ +@AMDEP_TRUE@@am__fastdepCC_FALSE@ DEPDIR=$(DEPDIR) $(CCDEPMODE) $(depcomp) @AMDEPBACKSLASH@ +@am__fastdepCC_FALSE@ $(AM_V_CC@am__nodep@)$(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) $(gmapl_avx512_CFLAGS) $(CFLAGS) -c -o gmapl_avx512-genome128_hr.o `test -f 'genome128_hr.c' || echo '$(srcdir)/'`genome128_hr.c + +gmapl_avx512-genome128_hr.obj: genome128_hr.c +@am__fastdepCC_TRUE@ $(AM_V_CC)$(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) $(gmapl_avx512_CFLAGS) $(CFLAGS) -MT gmapl_avx512-genome128_hr.obj -MD -MP -MF $(DEPDIR)/gmapl_avx512-genome128_hr.Tpo -c -o gmapl_avx512-genome128_hr.obj `if test -f 'genome128_hr.c'; then $(CYGPATH_W) 'genome128_hr.c'; else $(CYGPATH_W) '$(srcdir)/genome128_hr.c'; fi` +@am__fastdepCC_TRUE@ $(AM_V_at)$(am__mv) $(DEPDIR)/gmapl_avx512-genome128_hr.Tpo $(DEPDIR)/gmapl_avx512-genome128_hr.Po +@AMDEP_TRUE@@am__fastdepCC_FALSE@ $(AM_V_CC)source='genome128_hr.c' object='gmapl_avx512-genome128_hr.obj' libtool=no @AMDEPBACKSLASH@ +@AMDEP_TRUE@@am__fastdepCC_FALSE@ DEPDIR=$(DEPDIR) $(CCDEPMODE) $(depcomp) @AMDEPBACKSLASH@ +@am__fastdepCC_FALSE@ $(AM_V_CC@am__nodep@)$(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) $(gmapl_avx512_CFLAGS) $(CFLAGS) -c -o gmapl_avx512-genome128_hr.obj `if test -f 'genome128_hr.c'; then $(CYGPATH_W) 'genome128_hr.c'; else $(CYGPATH_W) '$(srcdir)/genome128_hr.c'; fi` + +gmapl_avx512-genome_sites.o: genome_sites.c +@am__fastdepCC_TRUE@ $(AM_V_CC)$(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) $(gmapl_avx512_CFLAGS) $(CFLAGS) -MT gmapl_avx512-genome_sites.o -MD -MP -MF $(DEPDIR)/gmapl_avx512-genome_sites.Tpo -c -o gmapl_avx512-genome_sites.o `test -f 'genome_sites.c' || echo '$(srcdir)/'`genome_sites.c +@am__fastdepCC_TRUE@ $(AM_V_at)$(am__mv) $(DEPDIR)/gmapl_avx512-genome_sites.Tpo $(DEPDIR)/gmapl_avx512-genome_sites.Po +@AMDEP_TRUE@@am__fastdepCC_FALSE@ $(AM_V_CC)source='genome_sites.c' object='gmapl_avx512-genome_sites.o' libtool=no @AMDEPBACKSLASH@ +@AMDEP_TRUE@@am__fastdepCC_FALSE@ DEPDIR=$(DEPDIR) $(CCDEPMODE) $(depcomp) @AMDEPBACKSLASH@ +@am__fastdepCC_FALSE@ $(AM_V_CC@am__nodep@)$(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) $(gmapl_avx512_CFLAGS) $(CFLAGS) -c -o gmapl_avx512-genome_sites.o `test -f 'genome_sites.c' || echo '$(srcdir)/'`genome_sites.c + +gmapl_avx512-genome_sites.obj: genome_sites.c +@am__fastdepCC_TRUE@ $(AM_V_CC)$(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) $(gmapl_avx512_CFLAGS) $(CFLAGS) -MT gmapl_avx512-genome_sites.obj -MD -MP -MF $(DEPDIR)/gmapl_avx512-genome_sites.Tpo -c -o gmapl_avx512-genome_sites.obj `if test -f 'genome_sites.c'; then $(CYGPATH_W) 'genome_sites.c'; else $(CYGPATH_W) '$(srcdir)/genome_sites.c'; fi` +@am__fastdepCC_TRUE@ $(AM_V_at)$(am__mv) $(DEPDIR)/gmapl_avx512-genome_sites.Tpo $(DEPDIR)/gmapl_avx512-genome_sites.Po +@AMDEP_TRUE@@am__fastdepCC_FALSE@ $(AM_V_CC)source='genome_sites.c' object='gmapl_avx512-genome_sites.obj' libtool=no @AMDEPBACKSLASH@ +@AMDEP_TRUE@@am__fastdepCC_FALSE@ DEPDIR=$(DEPDIR) $(CCDEPMODE) $(depcomp) @AMDEPBACKSLASH@ +@am__fastdepCC_FALSE@ $(AM_V_CC@am__nodep@)$(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) $(gmapl_avx512_CFLAGS) $(CFLAGS) -c -o gmapl_avx512-genome_sites.obj `if test -f 'genome_sites.c'; then $(CYGPATH_W) 'genome_sites.c'; else $(CYGPATH_W) '$(srcdir)/genome_sites.c'; fi` + +gmapl_avx512-genome-write.o: genome-write.c +@am__fastdepCC_TRUE@ $(AM_V_CC)$(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) $(gmapl_avx512_CFLAGS) $(CFLAGS) -MT gmapl_avx512-genome-write.o -MD -MP -MF $(DEPDIR)/gmapl_avx512-genome-write.Tpo -c -o gmapl_avx512-genome-write.o `test -f 'genome-write.c' || echo '$(srcdir)/'`genome-write.c +@am__fastdepCC_TRUE@ $(AM_V_at)$(am__mv) $(DEPDIR)/gmapl_avx512-genome-write.Tpo $(DEPDIR)/gmapl_avx512-genome-write.Po +@AMDEP_TRUE@@am__fastdepCC_FALSE@ $(AM_V_CC)source='genome-write.c' object='gmapl_avx512-genome-write.o' libtool=no @AMDEPBACKSLASH@ +@AMDEP_TRUE@@am__fastdepCC_FALSE@ DEPDIR=$(DEPDIR) $(CCDEPMODE) $(depcomp) @AMDEPBACKSLASH@ +@am__fastdepCC_FALSE@ $(AM_V_CC@am__nodep@)$(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) $(gmapl_avx512_CFLAGS) $(CFLAGS) -c -o gmapl_avx512-genome-write.o `test -f 'genome-write.c' || echo '$(srcdir)/'`genome-write.c + +gmapl_avx512-genome-write.obj: genome-write.c +@am__fastdepCC_TRUE@ $(AM_V_CC)$(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) $(gmapl_avx512_CFLAGS) $(CFLAGS) -MT gmapl_avx512-genome-write.obj -MD -MP -MF $(DEPDIR)/gmapl_avx512-genome-write.Tpo -c -o gmapl_avx512-genome-write.obj `if test -f 'genome-write.c'; then $(CYGPATH_W) 'genome-write.c'; else $(CYGPATH_W) '$(srcdir)/genome-write.c'; fi` +@am__fastdepCC_TRUE@ $(AM_V_at)$(am__mv) $(DEPDIR)/gmapl_avx512-genome-write.Tpo $(DEPDIR)/gmapl_avx512-genome-write.Po +@AMDEP_TRUE@@am__fastdepCC_FALSE@ $(AM_V_CC)source='genome-write.c' object='gmapl_avx512-genome-write.obj' libtool=no @AMDEPBACKSLASH@ +@AMDEP_TRUE@@am__fastdepCC_FALSE@ DEPDIR=$(DEPDIR) $(CCDEPMODE) $(depcomp) @AMDEPBACKSLASH@ +@am__fastdepCC_FALSE@ $(AM_V_CC@am__nodep@)$(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) $(gmapl_avx512_CFLAGS) $(CFLAGS) -c -o gmapl_avx512-genome-write.obj `if test -f 'genome-write.c'; then $(CYGPATH_W) 'genome-write.c'; else $(CYGPATH_W) '$(srcdir)/genome-write.c'; fi` + +gmapl_avx512-bitpack64-read.o: bitpack64-read.c +@am__fastdepCC_TRUE@ $(AM_V_CC)$(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) $(gmapl_avx512_CFLAGS) $(CFLAGS) -MT gmapl_avx512-bitpack64-read.o -MD -MP -MF $(DEPDIR)/gmapl_avx512-bitpack64-read.Tpo -c -o gmapl_avx512-bitpack64-read.o `test -f 'bitpack64-read.c' || echo '$(srcdir)/'`bitpack64-read.c +@am__fastdepCC_TRUE@ $(AM_V_at)$(am__mv) $(DEPDIR)/gmapl_avx512-bitpack64-read.Tpo $(DEPDIR)/gmapl_avx512-bitpack64-read.Po +@AMDEP_TRUE@@am__fastdepCC_FALSE@ $(AM_V_CC)source='bitpack64-read.c' object='gmapl_avx512-bitpack64-read.o' libtool=no @AMDEPBACKSLASH@ +@AMDEP_TRUE@@am__fastdepCC_FALSE@ DEPDIR=$(DEPDIR) $(CCDEPMODE) $(depcomp) @AMDEPBACKSLASH@ +@am__fastdepCC_FALSE@ $(AM_V_CC@am__nodep@)$(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) $(gmapl_avx512_CFLAGS) $(CFLAGS) -c -o gmapl_avx512-bitpack64-read.o `test -f 'bitpack64-read.c' || echo '$(srcdir)/'`bitpack64-read.c + +gmapl_avx512-bitpack64-read.obj: bitpack64-read.c +@am__fastdepCC_TRUE@ $(AM_V_CC)$(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) $(gmapl_avx512_CFLAGS) $(CFLAGS) -MT gmapl_avx512-bitpack64-read.obj -MD -MP -MF $(DEPDIR)/gmapl_avx512-bitpack64-read.Tpo -c -o gmapl_avx512-bitpack64-read.obj `if test -f 'bitpack64-read.c'; then $(CYGPATH_W) 'bitpack64-read.c'; else $(CYGPATH_W) '$(srcdir)/bitpack64-read.c'; fi` +@am__fastdepCC_TRUE@ $(AM_V_at)$(am__mv) $(DEPDIR)/gmapl_avx512-bitpack64-read.Tpo $(DEPDIR)/gmapl_avx512-bitpack64-read.Po +@AMDEP_TRUE@@am__fastdepCC_FALSE@ $(AM_V_CC)source='bitpack64-read.c' object='gmapl_avx512-bitpack64-read.obj' libtool=no @AMDEPBACKSLASH@ +@AMDEP_TRUE@@am__fastdepCC_FALSE@ DEPDIR=$(DEPDIR) $(CCDEPMODE) $(depcomp) @AMDEPBACKSLASH@ +@am__fastdepCC_FALSE@ $(AM_V_CC@am__nodep@)$(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) $(gmapl_avx512_CFLAGS) $(CFLAGS) -c -o gmapl_avx512-bitpack64-read.obj `if test -f 'bitpack64-read.c'; then $(CYGPATH_W) 'bitpack64-read.c'; else $(CYGPATH_W) '$(srcdir)/bitpack64-read.c'; fi` + +gmapl_avx512-bitpack64-readtwo.o: bitpack64-readtwo.c +@am__fastdepCC_TRUE@ $(AM_V_CC)$(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) $(gmapl_avx512_CFLAGS) $(CFLAGS) -MT gmapl_avx512-bitpack64-readtwo.o -MD -MP -MF $(DEPDIR)/gmapl_avx512-bitpack64-readtwo.Tpo -c -o gmapl_avx512-bitpack64-readtwo.o `test -f 'bitpack64-readtwo.c' || echo '$(srcdir)/'`bitpack64-readtwo.c +@am__fastdepCC_TRUE@ $(AM_V_at)$(am__mv) $(DEPDIR)/gmapl_avx512-bitpack64-readtwo.Tpo $(DEPDIR)/gmapl_avx512-bitpack64-readtwo.Po +@AMDEP_TRUE@@am__fastdepCC_FALSE@ $(AM_V_CC)source='bitpack64-readtwo.c' object='gmapl_avx512-bitpack64-readtwo.o' libtool=no @AMDEPBACKSLASH@ +@AMDEP_TRUE@@am__fastdepCC_FALSE@ DEPDIR=$(DEPDIR) $(CCDEPMODE) $(depcomp) @AMDEPBACKSLASH@ +@am__fastdepCC_FALSE@ $(AM_V_CC@am__nodep@)$(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) $(gmapl_avx512_CFLAGS) $(CFLAGS) -c -o gmapl_avx512-bitpack64-readtwo.o `test -f 'bitpack64-readtwo.c' || echo '$(srcdir)/'`bitpack64-readtwo.c + +gmapl_avx512-bitpack64-readtwo.obj: bitpack64-readtwo.c +@am__fastdepCC_TRUE@ $(AM_V_CC)$(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) $(gmapl_avx512_CFLAGS) $(CFLAGS) -MT gmapl_avx512-bitpack64-readtwo.obj -MD -MP -MF $(DEPDIR)/gmapl_avx512-bitpack64-readtwo.Tpo -c -o gmapl_avx512-bitpack64-readtwo.obj `if test -f 'bitpack64-readtwo.c'; then $(CYGPATH_W) 'bitpack64-readtwo.c'; else $(CYGPATH_W) '$(srcdir)/bitpack64-readtwo.c'; fi` +@am__fastdepCC_TRUE@ $(AM_V_at)$(am__mv) $(DEPDIR)/gmapl_avx512-bitpack64-readtwo.Tpo $(DEPDIR)/gmapl_avx512-bitpack64-readtwo.Po +@AMDEP_TRUE@@am__fastdepCC_FALSE@ $(AM_V_CC)source='bitpack64-readtwo.c' object='gmapl_avx512-bitpack64-readtwo.obj' libtool=no @AMDEPBACKSLASH@ +@AMDEP_TRUE@@am__fastdepCC_FALSE@ DEPDIR=$(DEPDIR) $(CCDEPMODE) $(depcomp) @AMDEPBACKSLASH@ +@am__fastdepCC_FALSE@ $(AM_V_CC@am__nodep@)$(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) $(gmapl_avx512_CFLAGS) $(CFLAGS) -c -o gmapl_avx512-bitpack64-readtwo.obj `if test -f 'bitpack64-readtwo.c'; then $(CYGPATH_W) 'bitpack64-readtwo.c'; else $(CYGPATH_W) '$(srcdir)/bitpack64-readtwo.c'; fi` + +gmapl_avx512-merge.o: merge.c +@am__fastdepCC_TRUE@ $(AM_V_CC)$(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) $(gmapl_avx512_CFLAGS) $(CFLAGS) -MT gmapl_avx512-merge.o -MD -MP -MF $(DEPDIR)/gmapl_avx512-merge.Tpo -c -o gmapl_avx512-merge.o `test -f 'merge.c' || echo '$(srcdir)/'`merge.c +@am__fastdepCC_TRUE@ $(AM_V_at)$(am__mv) $(DEPDIR)/gmapl_avx512-merge.Tpo $(DEPDIR)/gmapl_avx512-merge.Po +@AMDEP_TRUE@@am__fastdepCC_FALSE@ $(AM_V_CC)source='merge.c' object='gmapl_avx512-merge.o' libtool=no @AMDEPBACKSLASH@ +@AMDEP_TRUE@@am__fastdepCC_FALSE@ DEPDIR=$(DEPDIR) $(CCDEPMODE) $(depcomp) @AMDEPBACKSLASH@ +@am__fastdepCC_FALSE@ $(AM_V_CC@am__nodep@)$(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) $(gmapl_avx512_CFLAGS) $(CFLAGS) -c -o gmapl_avx512-merge.o `test -f 'merge.c' || echo '$(srcdir)/'`merge.c + +gmapl_avx512-merge.obj: merge.c +@am__fastdepCC_TRUE@ $(AM_V_CC)$(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) $(gmapl_avx512_CFLAGS) $(CFLAGS) -MT gmapl_avx512-merge.obj -MD -MP -MF $(DEPDIR)/gmapl_avx512-merge.Tpo -c -o gmapl_avx512-merge.obj `if test -f 'merge.c'; then $(CYGPATH_W) 'merge.c'; else $(CYGPATH_W) '$(srcdir)/merge.c'; fi` +@am__fastdepCC_TRUE@ $(AM_V_at)$(am__mv) $(DEPDIR)/gmapl_avx512-merge.Tpo $(DEPDIR)/gmapl_avx512-merge.Po +@AMDEP_TRUE@@am__fastdepCC_FALSE@ $(AM_V_CC)source='merge.c' object='gmapl_avx512-merge.obj' libtool=no @AMDEPBACKSLASH@ +@AMDEP_TRUE@@am__fastdepCC_FALSE@ DEPDIR=$(DEPDIR) $(CCDEPMODE) $(depcomp) @AMDEPBACKSLASH@ +@am__fastdepCC_FALSE@ $(AM_V_CC@am__nodep@)$(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) $(gmapl_avx512_CFLAGS) $(CFLAGS) -c -o gmapl_avx512-merge.obj `if test -f 'merge.c'; then $(CYGPATH_W) 'merge.c'; else $(CYGPATH_W) '$(srcdir)/merge.c'; fi` + +gmapl_avx512-indexdb.o: indexdb.c +@am__fastdepCC_TRUE@ $(AM_V_CC)$(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) $(gmapl_avx512_CFLAGS) $(CFLAGS) -MT gmapl_avx512-indexdb.o -MD -MP -MF $(DEPDIR)/gmapl_avx512-indexdb.Tpo -c -o gmapl_avx512-indexdb.o `test -f 'indexdb.c' || echo '$(srcdir)/'`indexdb.c +@am__fastdepCC_TRUE@ $(AM_V_at)$(am__mv) $(DEPDIR)/gmapl_avx512-indexdb.Tpo $(DEPDIR)/gmapl_avx512-indexdb.Po +@AMDEP_TRUE@@am__fastdepCC_FALSE@ $(AM_V_CC)source='indexdb.c' object='gmapl_avx512-indexdb.o' libtool=no @AMDEPBACKSLASH@ +@AMDEP_TRUE@@am__fastdepCC_FALSE@ DEPDIR=$(DEPDIR) $(CCDEPMODE) $(depcomp) @AMDEPBACKSLASH@ +@am__fastdepCC_FALSE@ $(AM_V_CC@am__nodep@)$(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) $(gmapl_avx512_CFLAGS) $(CFLAGS) -c -o gmapl_avx512-indexdb.o `test -f 'indexdb.c' || echo '$(srcdir)/'`indexdb.c + +gmapl_avx512-indexdb.obj: indexdb.c +@am__fastdepCC_TRUE@ $(AM_V_CC)$(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) $(gmapl_avx512_CFLAGS) $(CFLAGS) -MT gmapl_avx512-indexdb.obj -MD -MP -MF $(DEPDIR)/gmapl_avx512-indexdb.Tpo -c -o gmapl_avx512-indexdb.obj `if test -f 'indexdb.c'; then $(CYGPATH_W) 'indexdb.c'; else $(CYGPATH_W) '$(srcdir)/indexdb.c'; fi` +@am__fastdepCC_TRUE@ $(AM_V_at)$(am__mv) $(DEPDIR)/gmapl_avx512-indexdb.Tpo $(DEPDIR)/gmapl_avx512-indexdb.Po +@AMDEP_TRUE@@am__fastdepCC_FALSE@ $(AM_V_CC)source='indexdb.c' object='gmapl_avx512-indexdb.obj' libtool=no @AMDEPBACKSLASH@ +@AMDEP_TRUE@@am__fastdepCC_FALSE@ DEPDIR=$(DEPDIR) $(CCDEPMODE) $(depcomp) @AMDEPBACKSLASH@ +@am__fastdepCC_FALSE@ $(AM_V_CC@am__nodep@)$(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) $(gmapl_avx512_CFLAGS) $(CFLAGS) -c -o gmapl_avx512-indexdb.obj `if test -f 'indexdb.c'; then $(CYGPATH_W) 'indexdb.c'; else $(CYGPATH_W) '$(srcdir)/indexdb.c'; fi` + +gmapl_avx512-indexdb_hr.o: indexdb_hr.c +@am__fastdepCC_TRUE@ $(AM_V_CC)$(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) $(gmapl_avx512_CFLAGS) $(CFLAGS) -MT gmapl_avx512-indexdb_hr.o -MD -MP -MF $(DEPDIR)/gmapl_avx512-indexdb_hr.Tpo -c -o gmapl_avx512-indexdb_hr.o `test -f 'indexdb_hr.c' || echo '$(srcdir)/'`indexdb_hr.c +@am__fastdepCC_TRUE@ $(AM_V_at)$(am__mv) $(DEPDIR)/gmapl_avx512-indexdb_hr.Tpo $(DEPDIR)/gmapl_avx512-indexdb_hr.Po +@AMDEP_TRUE@@am__fastdepCC_FALSE@ $(AM_V_CC)source='indexdb_hr.c' object='gmapl_avx512-indexdb_hr.o' libtool=no @AMDEPBACKSLASH@ +@AMDEP_TRUE@@am__fastdepCC_FALSE@ DEPDIR=$(DEPDIR) $(CCDEPMODE) $(depcomp) @AMDEPBACKSLASH@ +@am__fastdepCC_FALSE@ $(AM_V_CC@am__nodep@)$(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) $(gmapl_avx512_CFLAGS) $(CFLAGS) -c -o gmapl_avx512-indexdb_hr.o `test -f 'indexdb_hr.c' || echo '$(srcdir)/'`indexdb_hr.c + +gmapl_avx512-indexdb_hr.obj: indexdb_hr.c +@am__fastdepCC_TRUE@ $(AM_V_CC)$(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) $(gmapl_avx512_CFLAGS) $(CFLAGS) -MT gmapl_avx512-indexdb_hr.obj -MD -MP -MF $(DEPDIR)/gmapl_avx512-indexdb_hr.Tpo -c -o gmapl_avx512-indexdb_hr.obj `if test -f 'indexdb_hr.c'; then $(CYGPATH_W) 'indexdb_hr.c'; else $(CYGPATH_W) '$(srcdir)/indexdb_hr.c'; fi` +@am__fastdepCC_TRUE@ $(AM_V_at)$(am__mv) $(DEPDIR)/gmapl_avx512-indexdb_hr.Tpo $(DEPDIR)/gmapl_avx512-indexdb_hr.Po +@AMDEP_TRUE@@am__fastdepCC_FALSE@ $(AM_V_CC)source='indexdb_hr.c' object='gmapl_avx512-indexdb_hr.obj' libtool=no @AMDEPBACKSLASH@ +@AMDEP_TRUE@@am__fastdepCC_FALSE@ DEPDIR=$(DEPDIR) $(CCDEPMODE) $(depcomp) @AMDEPBACKSLASH@ +@am__fastdepCC_FALSE@ $(AM_V_CC@am__nodep@)$(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) $(gmapl_avx512_CFLAGS) $(CFLAGS) -c -o gmapl_avx512-indexdb_hr.obj `if test -f 'indexdb_hr.c'; then $(CYGPATH_W) 'indexdb_hr.c'; else $(CYGPATH_W) '$(srcdir)/indexdb_hr.c'; fi` + +gmapl_avx512-oligo.o: oligo.c +@am__fastdepCC_TRUE@ $(AM_V_CC)$(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) $(gmapl_avx512_CFLAGS) $(CFLAGS) -MT gmapl_avx512-oligo.o -MD -MP -MF $(DEPDIR)/gmapl_avx512-oligo.Tpo -c -o gmapl_avx512-oligo.o `test -f 'oligo.c' || echo '$(srcdir)/'`oligo.c +@am__fastdepCC_TRUE@ $(AM_V_at)$(am__mv) $(DEPDIR)/gmapl_avx512-oligo.Tpo $(DEPDIR)/gmapl_avx512-oligo.Po +@AMDEP_TRUE@@am__fastdepCC_FALSE@ $(AM_V_CC)source='oligo.c' object='gmapl_avx512-oligo.o' libtool=no @AMDEPBACKSLASH@ +@AMDEP_TRUE@@am__fastdepCC_FALSE@ DEPDIR=$(DEPDIR) $(CCDEPMODE) $(depcomp) @AMDEPBACKSLASH@ +@am__fastdepCC_FALSE@ $(AM_V_CC@am__nodep@)$(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) $(gmapl_avx512_CFLAGS) $(CFLAGS) -c -o gmapl_avx512-oligo.o `test -f 'oligo.c' || echo '$(srcdir)/'`oligo.c + +gmapl_avx512-oligo.obj: oligo.c +@am__fastdepCC_TRUE@ $(AM_V_CC)$(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) $(gmapl_avx512_CFLAGS) $(CFLAGS) -MT gmapl_avx512-oligo.obj -MD -MP -MF $(DEPDIR)/gmapl_avx512-oligo.Tpo -c -o gmapl_avx512-oligo.obj `if test -f 'oligo.c'; then $(CYGPATH_W) 'oligo.c'; else $(CYGPATH_W) '$(srcdir)/oligo.c'; fi` +@am__fastdepCC_TRUE@ $(AM_V_at)$(am__mv) $(DEPDIR)/gmapl_avx512-oligo.Tpo $(DEPDIR)/gmapl_avx512-oligo.Po +@AMDEP_TRUE@@am__fastdepCC_FALSE@ $(AM_V_CC)source='oligo.c' object='gmapl_avx512-oligo.obj' libtool=no @AMDEPBACKSLASH@ +@AMDEP_TRUE@@am__fastdepCC_FALSE@ DEPDIR=$(DEPDIR) $(CCDEPMODE) $(depcomp) @AMDEPBACKSLASH@ +@am__fastdepCC_FALSE@ $(AM_V_CC@am__nodep@)$(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) $(gmapl_avx512_CFLAGS) $(CFLAGS) -c -o gmapl_avx512-oligo.obj `if test -f 'oligo.c'; then $(CYGPATH_W) 'oligo.c'; else $(CYGPATH_W) '$(srcdir)/oligo.c'; fi` + +gmapl_avx512-block.o: block.c +@am__fastdepCC_TRUE@ $(AM_V_CC)$(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) $(gmapl_avx512_CFLAGS) $(CFLAGS) -MT gmapl_avx512-block.o -MD -MP -MF $(DEPDIR)/gmapl_avx512-block.Tpo -c -o gmapl_avx512-block.o `test -f 'block.c' || echo '$(srcdir)/'`block.c +@am__fastdepCC_TRUE@ $(AM_V_at)$(am__mv) $(DEPDIR)/gmapl_avx512-block.Tpo $(DEPDIR)/gmapl_avx512-block.Po +@AMDEP_TRUE@@am__fastdepCC_FALSE@ $(AM_V_CC)source='block.c' object='gmapl_avx512-block.o' libtool=no @AMDEPBACKSLASH@ +@AMDEP_TRUE@@am__fastdepCC_FALSE@ DEPDIR=$(DEPDIR) $(CCDEPMODE) $(depcomp) @AMDEPBACKSLASH@ +@am__fastdepCC_FALSE@ $(AM_V_CC@am__nodep@)$(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) $(gmapl_avx512_CFLAGS) $(CFLAGS) -c -o gmapl_avx512-block.o `test -f 'block.c' || echo '$(srcdir)/'`block.c + +gmapl_avx512-block.obj: block.c +@am__fastdepCC_TRUE@ $(AM_V_CC)$(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) $(gmapl_avx512_CFLAGS) $(CFLAGS) -MT gmapl_avx512-block.obj -MD -MP -MF $(DEPDIR)/gmapl_avx512-block.Tpo -c -o gmapl_avx512-block.obj `if test -f 'block.c'; then $(CYGPATH_W) 'block.c'; else $(CYGPATH_W) '$(srcdir)/block.c'; fi` +@am__fastdepCC_TRUE@ $(AM_V_at)$(am__mv) $(DEPDIR)/gmapl_avx512-block.Tpo $(DEPDIR)/gmapl_avx512-block.Po +@AMDEP_TRUE@@am__fastdepCC_FALSE@ $(AM_V_CC)source='block.c' object='gmapl_avx512-block.obj' libtool=no @AMDEPBACKSLASH@ +@AMDEP_TRUE@@am__fastdepCC_FALSE@ DEPDIR=$(DEPDIR) $(CCDEPMODE) $(depcomp) @AMDEPBACKSLASH@ +@am__fastdepCC_FALSE@ $(AM_V_CC@am__nodep@)$(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) $(gmapl_avx512_CFLAGS) $(CFLAGS) -c -o gmapl_avx512-block.obj `if test -f 'block.c'; then $(CYGPATH_W) 'block.c'; else $(CYGPATH_W) '$(srcdir)/block.c'; fi` + +gmapl_avx512-chrom.o: chrom.c +@am__fastdepCC_TRUE@ $(AM_V_CC)$(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) $(gmapl_avx512_CFLAGS) $(CFLAGS) -MT gmapl_avx512-chrom.o -MD -MP -MF $(DEPDIR)/gmapl_avx512-chrom.Tpo -c -o gmapl_avx512-chrom.o `test -f 'chrom.c' || echo '$(srcdir)/'`chrom.c +@am__fastdepCC_TRUE@ $(AM_V_at)$(am__mv) $(DEPDIR)/gmapl_avx512-chrom.Tpo $(DEPDIR)/gmapl_avx512-chrom.Po +@AMDEP_TRUE@@am__fastdepCC_FALSE@ $(AM_V_CC)source='chrom.c' object='gmapl_avx512-chrom.o' libtool=no @AMDEPBACKSLASH@ +@AMDEP_TRUE@@am__fastdepCC_FALSE@ DEPDIR=$(DEPDIR) $(CCDEPMODE) $(depcomp) @AMDEPBACKSLASH@ +@am__fastdepCC_FALSE@ $(AM_V_CC@am__nodep@)$(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) $(gmapl_avx512_CFLAGS) $(CFLAGS) -c -o gmapl_avx512-chrom.o `test -f 'chrom.c' || echo '$(srcdir)/'`chrom.c + +gmapl_avx512-chrom.obj: chrom.c +@am__fastdepCC_TRUE@ $(AM_V_CC)$(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) $(gmapl_avx512_CFLAGS) $(CFLAGS) -MT gmapl_avx512-chrom.obj -MD -MP -MF $(DEPDIR)/gmapl_avx512-chrom.Tpo -c -o gmapl_avx512-chrom.obj `if test -f 'chrom.c'; then $(CYGPATH_W) 'chrom.c'; else $(CYGPATH_W) '$(srcdir)/chrom.c'; fi` +@am__fastdepCC_TRUE@ $(AM_V_at)$(am__mv) $(DEPDIR)/gmapl_avx512-chrom.Tpo $(DEPDIR)/gmapl_avx512-chrom.Po +@AMDEP_TRUE@@am__fastdepCC_FALSE@ $(AM_V_CC)source='chrom.c' object='gmapl_avx512-chrom.obj' libtool=no @AMDEPBACKSLASH@ +@AMDEP_TRUE@@am__fastdepCC_FALSE@ DEPDIR=$(DEPDIR) $(CCDEPMODE) $(depcomp) @AMDEPBACKSLASH@ +@am__fastdepCC_FALSE@ $(AM_V_CC@am__nodep@)$(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) $(gmapl_avx512_CFLAGS) $(CFLAGS) -c -o gmapl_avx512-chrom.obj `if test -f 'chrom.c'; then $(CYGPATH_W) 'chrom.c'; else $(CYGPATH_W) '$(srcdir)/chrom.c'; fi` + +gmapl_avx512-segmentpos.o: segmentpos.c +@am__fastdepCC_TRUE@ $(AM_V_CC)$(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) $(gmapl_avx512_CFLAGS) $(CFLAGS) -MT gmapl_avx512-segmentpos.o -MD -MP -MF $(DEPDIR)/gmapl_avx512-segmentpos.Tpo -c -o gmapl_avx512-segmentpos.o `test -f 'segmentpos.c' || echo '$(srcdir)/'`segmentpos.c +@am__fastdepCC_TRUE@ $(AM_V_at)$(am__mv) $(DEPDIR)/gmapl_avx512-segmentpos.Tpo $(DEPDIR)/gmapl_avx512-segmentpos.Po +@AMDEP_TRUE@@am__fastdepCC_FALSE@ $(AM_V_CC)source='segmentpos.c' object='gmapl_avx512-segmentpos.o' libtool=no @AMDEPBACKSLASH@ +@AMDEP_TRUE@@am__fastdepCC_FALSE@ DEPDIR=$(DEPDIR) $(CCDEPMODE) $(depcomp) @AMDEPBACKSLASH@ +@am__fastdepCC_FALSE@ $(AM_V_CC@am__nodep@)$(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) $(gmapl_avx512_CFLAGS) $(CFLAGS) -c -o gmapl_avx512-segmentpos.o `test -f 'segmentpos.c' || echo '$(srcdir)/'`segmentpos.c + +gmapl_avx512-segmentpos.obj: segmentpos.c +@am__fastdepCC_TRUE@ $(AM_V_CC)$(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) $(gmapl_avx512_CFLAGS) $(CFLAGS) -MT gmapl_avx512-segmentpos.obj -MD -MP -MF $(DEPDIR)/gmapl_avx512-segmentpos.Tpo -c -o gmapl_avx512-segmentpos.obj `if test -f 'segmentpos.c'; then $(CYGPATH_W) 'segmentpos.c'; else $(CYGPATH_W) '$(srcdir)/segmentpos.c'; fi` +@am__fastdepCC_TRUE@ $(AM_V_at)$(am__mv) $(DEPDIR)/gmapl_avx512-segmentpos.Tpo $(DEPDIR)/gmapl_avx512-segmentpos.Po +@AMDEP_TRUE@@am__fastdepCC_FALSE@ $(AM_V_CC)source='segmentpos.c' object='gmapl_avx512-segmentpos.obj' libtool=no @AMDEPBACKSLASH@ +@AMDEP_TRUE@@am__fastdepCC_FALSE@ DEPDIR=$(DEPDIR) $(CCDEPMODE) $(depcomp) @AMDEPBACKSLASH@ +@am__fastdepCC_FALSE@ $(AM_V_CC@am__nodep@)$(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) $(gmapl_avx512_CFLAGS) $(CFLAGS) -c -o gmapl_avx512-segmentpos.obj `if test -f 'segmentpos.c'; then $(CYGPATH_W) 'segmentpos.c'; else $(CYGPATH_W) '$(srcdir)/segmentpos.c'; fi` + +gmapl_avx512-chrnum.o: chrnum.c +@am__fastdepCC_TRUE@ $(AM_V_CC)$(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) $(gmapl_avx512_CFLAGS) $(CFLAGS) -MT gmapl_avx512-chrnum.o -MD -MP -MF $(DEPDIR)/gmapl_avx512-chrnum.Tpo -c -o gmapl_avx512-chrnum.o `test -f 'chrnum.c' || echo '$(srcdir)/'`chrnum.c +@am__fastdepCC_TRUE@ $(AM_V_at)$(am__mv) $(DEPDIR)/gmapl_avx512-chrnum.Tpo $(DEPDIR)/gmapl_avx512-chrnum.Po +@AMDEP_TRUE@@am__fastdepCC_FALSE@ $(AM_V_CC)source='chrnum.c' object='gmapl_avx512-chrnum.o' libtool=no @AMDEPBACKSLASH@ +@AMDEP_TRUE@@am__fastdepCC_FALSE@ DEPDIR=$(DEPDIR) $(CCDEPMODE) $(depcomp) @AMDEPBACKSLASH@ +@am__fastdepCC_FALSE@ $(AM_V_CC@am__nodep@)$(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) $(gmapl_avx512_CFLAGS) $(CFLAGS) -c -o gmapl_avx512-chrnum.o `test -f 'chrnum.c' || echo '$(srcdir)/'`chrnum.c + +gmapl_avx512-chrnum.obj: chrnum.c +@am__fastdepCC_TRUE@ $(AM_V_CC)$(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) $(gmapl_avx512_CFLAGS) $(CFLAGS) -MT gmapl_avx512-chrnum.obj -MD -MP -MF $(DEPDIR)/gmapl_avx512-chrnum.Tpo -c -o gmapl_avx512-chrnum.obj `if test -f 'chrnum.c'; then $(CYGPATH_W) 'chrnum.c'; else $(CYGPATH_W) '$(srcdir)/chrnum.c'; fi` +@am__fastdepCC_TRUE@ $(AM_V_at)$(am__mv) $(DEPDIR)/gmapl_avx512-chrnum.Tpo $(DEPDIR)/gmapl_avx512-chrnum.Po +@AMDEP_TRUE@@am__fastdepCC_FALSE@ $(AM_V_CC)source='chrnum.c' object='gmapl_avx512-chrnum.obj' libtool=no @AMDEPBACKSLASH@ +@AMDEP_TRUE@@am__fastdepCC_FALSE@ DEPDIR=$(DEPDIR) $(CCDEPMODE) $(depcomp) @AMDEPBACKSLASH@ +@am__fastdepCC_FALSE@ $(AM_V_CC@am__nodep@)$(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) $(gmapl_avx512_CFLAGS) $(CFLAGS) -c -o gmapl_avx512-chrnum.obj `if test -f 'chrnum.c'; then $(CYGPATH_W) 'chrnum.c'; else $(CYGPATH_W) '$(srcdir)/chrnum.c'; fi` + +gmapl_avx512-uinttable.o: uinttable.c +@am__fastdepCC_TRUE@ $(AM_V_CC)$(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) $(gmapl_avx512_CFLAGS) $(CFLAGS) -MT gmapl_avx512-uinttable.o -MD -MP -MF $(DEPDIR)/gmapl_avx512-uinttable.Tpo -c -o gmapl_avx512-uinttable.o `test -f 'uinttable.c' || echo '$(srcdir)/'`uinttable.c +@am__fastdepCC_TRUE@ $(AM_V_at)$(am__mv) $(DEPDIR)/gmapl_avx512-uinttable.Tpo $(DEPDIR)/gmapl_avx512-uinttable.Po +@AMDEP_TRUE@@am__fastdepCC_FALSE@ $(AM_V_CC)source='uinttable.c' object='gmapl_avx512-uinttable.o' libtool=no @AMDEPBACKSLASH@ +@AMDEP_TRUE@@am__fastdepCC_FALSE@ DEPDIR=$(DEPDIR) $(CCDEPMODE) $(depcomp) @AMDEPBACKSLASH@ +@am__fastdepCC_FALSE@ $(AM_V_CC@am__nodep@)$(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) $(gmapl_avx512_CFLAGS) $(CFLAGS) -c -o gmapl_avx512-uinttable.o `test -f 'uinttable.c' || echo '$(srcdir)/'`uinttable.c + +gmapl_avx512-uinttable.obj: uinttable.c +@am__fastdepCC_TRUE@ $(AM_V_CC)$(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) $(gmapl_avx512_CFLAGS) $(CFLAGS) -MT gmapl_avx512-uinttable.obj -MD -MP -MF $(DEPDIR)/gmapl_avx512-uinttable.Tpo -c -o gmapl_avx512-uinttable.obj `if test -f 'uinttable.c'; then $(CYGPATH_W) 'uinttable.c'; else $(CYGPATH_W) '$(srcdir)/uinttable.c'; fi` +@am__fastdepCC_TRUE@ $(AM_V_at)$(am__mv) $(DEPDIR)/gmapl_avx512-uinttable.Tpo $(DEPDIR)/gmapl_avx512-uinttable.Po +@AMDEP_TRUE@@am__fastdepCC_FALSE@ $(AM_V_CC)source='uinttable.c' object='gmapl_avx512-uinttable.obj' libtool=no @AMDEPBACKSLASH@ +@AMDEP_TRUE@@am__fastdepCC_FALSE@ DEPDIR=$(DEPDIR) $(CCDEPMODE) $(depcomp) @AMDEPBACKSLASH@ +@am__fastdepCC_FALSE@ $(AM_V_CC@am__nodep@)$(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) $(gmapl_avx512_CFLAGS) $(CFLAGS) -c -o gmapl_avx512-uinttable.obj `if test -f 'uinttable.c'; then $(CYGPATH_W) 'uinttable.c'; else $(CYGPATH_W) '$(srcdir)/uinttable.c'; fi` + +gmapl_avx512-gregion.o: gregion.c +@am__fastdepCC_TRUE@ $(AM_V_CC)$(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) $(gmapl_avx512_CFLAGS) $(CFLAGS) -MT gmapl_avx512-gregion.o -MD -MP -MF $(DEPDIR)/gmapl_avx512-gregion.Tpo -c -o gmapl_avx512-gregion.o `test -f 'gregion.c' || echo '$(srcdir)/'`gregion.c +@am__fastdepCC_TRUE@ $(AM_V_at)$(am__mv) $(DEPDIR)/gmapl_avx512-gregion.Tpo $(DEPDIR)/gmapl_avx512-gregion.Po +@AMDEP_TRUE@@am__fastdepCC_FALSE@ $(AM_V_CC)source='gregion.c' object='gmapl_avx512-gregion.o' libtool=no @AMDEPBACKSLASH@ +@AMDEP_TRUE@@am__fastdepCC_FALSE@ DEPDIR=$(DEPDIR) $(CCDEPMODE) $(depcomp) @AMDEPBACKSLASH@ +@am__fastdepCC_FALSE@ $(AM_V_CC@am__nodep@)$(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) $(gmapl_avx512_CFLAGS) $(CFLAGS) -c -o gmapl_avx512-gregion.o `test -f 'gregion.c' || echo '$(srcdir)/'`gregion.c + +gmapl_avx512-gregion.obj: gregion.c +@am__fastdepCC_TRUE@ $(AM_V_CC)$(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) $(gmapl_avx512_CFLAGS) $(CFLAGS) -MT gmapl_avx512-gregion.obj -MD -MP -MF $(DEPDIR)/gmapl_avx512-gregion.Tpo -c -o gmapl_avx512-gregion.obj `if test -f 'gregion.c'; then $(CYGPATH_W) 'gregion.c'; else $(CYGPATH_W) '$(srcdir)/gregion.c'; fi` +@am__fastdepCC_TRUE@ $(AM_V_at)$(am__mv) $(DEPDIR)/gmapl_avx512-gregion.Tpo $(DEPDIR)/gmapl_avx512-gregion.Po +@AMDEP_TRUE@@am__fastdepCC_FALSE@ $(AM_V_CC)source='gregion.c' object='gmapl_avx512-gregion.obj' libtool=no @AMDEPBACKSLASH@ +@AMDEP_TRUE@@am__fastdepCC_FALSE@ DEPDIR=$(DEPDIR) $(CCDEPMODE) $(depcomp) @AMDEPBACKSLASH@ +@am__fastdepCC_FALSE@ $(AM_V_CC@am__nodep@)$(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) $(gmapl_avx512_CFLAGS) $(CFLAGS) -c -o gmapl_avx512-gregion.obj `if test -f 'gregion.c'; then $(CYGPATH_W) 'gregion.c'; else $(CYGPATH_W) '$(srcdir)/gregion.c'; fi` + +gmapl_avx512-match.o: match.c +@am__fastdepCC_TRUE@ $(AM_V_CC)$(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) $(gmapl_avx512_CFLAGS) $(CFLAGS) -MT gmapl_avx512-match.o -MD -MP -MF $(DEPDIR)/gmapl_avx512-match.Tpo -c -o gmapl_avx512-match.o `test -f 'match.c' || echo '$(srcdir)/'`match.c +@am__fastdepCC_TRUE@ $(AM_V_at)$(am__mv) $(DEPDIR)/gmapl_avx512-match.Tpo $(DEPDIR)/gmapl_avx512-match.Po +@AMDEP_TRUE@@am__fastdepCC_FALSE@ $(AM_V_CC)source='match.c' object='gmapl_avx512-match.o' libtool=no @AMDEPBACKSLASH@ +@AMDEP_TRUE@@am__fastdepCC_FALSE@ DEPDIR=$(DEPDIR) $(CCDEPMODE) $(depcomp) @AMDEPBACKSLASH@ +@am__fastdepCC_FALSE@ $(AM_V_CC@am__nodep@)$(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) $(gmapl_avx512_CFLAGS) $(CFLAGS) -c -o gmapl_avx512-match.o `test -f 'match.c' || echo '$(srcdir)/'`match.c + +gmapl_avx512-match.obj: match.c +@am__fastdepCC_TRUE@ $(AM_V_CC)$(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) $(gmapl_avx512_CFLAGS) $(CFLAGS) -MT gmapl_avx512-match.obj -MD -MP -MF $(DEPDIR)/gmapl_avx512-match.Tpo -c -o gmapl_avx512-match.obj `if test -f 'match.c'; then $(CYGPATH_W) 'match.c'; else $(CYGPATH_W) '$(srcdir)/match.c'; fi` +@am__fastdepCC_TRUE@ $(AM_V_at)$(am__mv) $(DEPDIR)/gmapl_avx512-match.Tpo $(DEPDIR)/gmapl_avx512-match.Po +@AMDEP_TRUE@@am__fastdepCC_FALSE@ $(AM_V_CC)source='match.c' object='gmapl_avx512-match.obj' libtool=no @AMDEPBACKSLASH@ +@AMDEP_TRUE@@am__fastdepCC_FALSE@ DEPDIR=$(DEPDIR) $(CCDEPMODE) $(depcomp) @AMDEPBACKSLASH@ +@am__fastdepCC_FALSE@ $(AM_V_CC@am__nodep@)$(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) $(gmapl_avx512_CFLAGS) $(CFLAGS) -c -o gmapl_avx512-match.obj `if test -f 'match.c'; then $(CYGPATH_W) 'match.c'; else $(CYGPATH_W) '$(srcdir)/match.c'; fi` + +gmapl_avx512-matchpool.o: matchpool.c +@am__fastdepCC_TRUE@ $(AM_V_CC)$(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) $(gmapl_avx512_CFLAGS) $(CFLAGS) -MT gmapl_avx512-matchpool.o -MD -MP -MF $(DEPDIR)/gmapl_avx512-matchpool.Tpo -c -o gmapl_avx512-matchpool.o `test -f 'matchpool.c' || echo '$(srcdir)/'`matchpool.c +@am__fastdepCC_TRUE@ $(AM_V_at)$(am__mv) $(DEPDIR)/gmapl_avx512-matchpool.Tpo $(DEPDIR)/gmapl_avx512-matchpool.Po +@AMDEP_TRUE@@am__fastdepCC_FALSE@ $(AM_V_CC)source='matchpool.c' object='gmapl_avx512-matchpool.o' libtool=no @AMDEPBACKSLASH@ +@AMDEP_TRUE@@am__fastdepCC_FALSE@ DEPDIR=$(DEPDIR) $(CCDEPMODE) $(depcomp) @AMDEPBACKSLASH@ +@am__fastdepCC_FALSE@ $(AM_V_CC@am__nodep@)$(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) $(gmapl_avx512_CFLAGS) $(CFLAGS) -c -o gmapl_avx512-matchpool.o `test -f 'matchpool.c' || echo '$(srcdir)/'`matchpool.c + +gmapl_avx512-matchpool.obj: matchpool.c +@am__fastdepCC_TRUE@ $(AM_V_CC)$(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) $(gmapl_avx512_CFLAGS) $(CFLAGS) -MT gmapl_avx512-matchpool.obj -MD -MP -MF $(DEPDIR)/gmapl_avx512-matchpool.Tpo -c -o gmapl_avx512-matchpool.obj `if test -f 'matchpool.c'; then $(CYGPATH_W) 'matchpool.c'; else $(CYGPATH_W) '$(srcdir)/matchpool.c'; fi` +@am__fastdepCC_TRUE@ $(AM_V_at)$(am__mv) $(DEPDIR)/gmapl_avx512-matchpool.Tpo $(DEPDIR)/gmapl_avx512-matchpool.Po +@AMDEP_TRUE@@am__fastdepCC_FALSE@ $(AM_V_CC)source='matchpool.c' object='gmapl_avx512-matchpool.obj' libtool=no @AMDEPBACKSLASH@ +@AMDEP_TRUE@@am__fastdepCC_FALSE@ DEPDIR=$(DEPDIR) $(CCDEPMODE) $(depcomp) @AMDEPBACKSLASH@ +@am__fastdepCC_FALSE@ $(AM_V_CC@am__nodep@)$(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) $(gmapl_avx512_CFLAGS) $(CFLAGS) -c -o gmapl_avx512-matchpool.obj `if test -f 'matchpool.c'; then $(CYGPATH_W) 'matchpool.c'; else $(CYGPATH_W) '$(srcdir)/matchpool.c'; fi` + +gmapl_avx512-diagnostic.o: diagnostic.c +@am__fastdepCC_TRUE@ $(AM_V_CC)$(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) $(gmapl_avx512_CFLAGS) $(CFLAGS) -MT gmapl_avx512-diagnostic.o -MD -MP -MF $(DEPDIR)/gmapl_avx512-diagnostic.Tpo -c -o gmapl_avx512-diagnostic.o `test -f 'diagnostic.c' || echo '$(srcdir)/'`diagnostic.c +@am__fastdepCC_TRUE@ $(AM_V_at)$(am__mv) $(DEPDIR)/gmapl_avx512-diagnostic.Tpo $(DEPDIR)/gmapl_avx512-diagnostic.Po +@AMDEP_TRUE@@am__fastdepCC_FALSE@ $(AM_V_CC)source='diagnostic.c' object='gmapl_avx512-diagnostic.o' libtool=no @AMDEPBACKSLASH@ +@AMDEP_TRUE@@am__fastdepCC_FALSE@ DEPDIR=$(DEPDIR) $(CCDEPMODE) $(depcomp) @AMDEPBACKSLASH@ +@am__fastdepCC_FALSE@ $(AM_V_CC@am__nodep@)$(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) $(gmapl_avx512_CFLAGS) $(CFLAGS) -c -o gmapl_avx512-diagnostic.o `test -f 'diagnostic.c' || echo '$(srcdir)/'`diagnostic.c + +gmapl_avx512-diagnostic.obj: diagnostic.c +@am__fastdepCC_TRUE@ $(AM_V_CC)$(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) $(gmapl_avx512_CFLAGS) $(CFLAGS) -MT gmapl_avx512-diagnostic.obj -MD -MP -MF $(DEPDIR)/gmapl_avx512-diagnostic.Tpo -c -o gmapl_avx512-diagnostic.obj `if test -f 'diagnostic.c'; then $(CYGPATH_W) 'diagnostic.c'; else $(CYGPATH_W) '$(srcdir)/diagnostic.c'; fi` +@am__fastdepCC_TRUE@ $(AM_V_at)$(am__mv) $(DEPDIR)/gmapl_avx512-diagnostic.Tpo $(DEPDIR)/gmapl_avx512-diagnostic.Po +@AMDEP_TRUE@@am__fastdepCC_FALSE@ $(AM_V_CC)source='diagnostic.c' object='gmapl_avx512-diagnostic.obj' libtool=no @AMDEPBACKSLASH@ +@AMDEP_TRUE@@am__fastdepCC_FALSE@ DEPDIR=$(DEPDIR) $(CCDEPMODE) $(depcomp) @AMDEPBACKSLASH@ +@am__fastdepCC_FALSE@ $(AM_V_CC@am__nodep@)$(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) $(gmapl_avx512_CFLAGS) $(CFLAGS) -c -o gmapl_avx512-diagnostic.obj `if test -f 'diagnostic.c'; then $(CYGPATH_W) 'diagnostic.c'; else $(CYGPATH_W) '$(srcdir)/diagnostic.c'; fi` + +gmapl_avx512-stage1.o: stage1.c +@am__fastdepCC_TRUE@ $(AM_V_CC)$(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) $(gmapl_avx512_CFLAGS) $(CFLAGS) -MT gmapl_avx512-stage1.o -MD -MP -MF $(DEPDIR)/gmapl_avx512-stage1.Tpo -c -o gmapl_avx512-stage1.o `test -f 'stage1.c' || echo '$(srcdir)/'`stage1.c +@am__fastdepCC_TRUE@ $(AM_V_at)$(am__mv) $(DEPDIR)/gmapl_avx512-stage1.Tpo $(DEPDIR)/gmapl_avx512-stage1.Po +@AMDEP_TRUE@@am__fastdepCC_FALSE@ $(AM_V_CC)source='stage1.c' object='gmapl_avx512-stage1.o' libtool=no @AMDEPBACKSLASH@ +@AMDEP_TRUE@@am__fastdepCC_FALSE@ DEPDIR=$(DEPDIR) $(CCDEPMODE) $(depcomp) @AMDEPBACKSLASH@ +@am__fastdepCC_FALSE@ $(AM_V_CC@am__nodep@)$(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) $(gmapl_avx512_CFLAGS) $(CFLAGS) -c -o gmapl_avx512-stage1.o `test -f 'stage1.c' || echo '$(srcdir)/'`stage1.c + +gmapl_avx512-stage1.obj: stage1.c +@am__fastdepCC_TRUE@ $(AM_V_CC)$(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) $(gmapl_avx512_CFLAGS) $(CFLAGS) -MT gmapl_avx512-stage1.obj -MD -MP -MF $(DEPDIR)/gmapl_avx512-stage1.Tpo -c -o gmapl_avx512-stage1.obj `if test -f 'stage1.c'; then $(CYGPATH_W) 'stage1.c'; else $(CYGPATH_W) '$(srcdir)/stage1.c'; fi` +@am__fastdepCC_TRUE@ $(AM_V_at)$(am__mv) $(DEPDIR)/gmapl_avx512-stage1.Tpo $(DEPDIR)/gmapl_avx512-stage1.Po +@AMDEP_TRUE@@am__fastdepCC_FALSE@ $(AM_V_CC)source='stage1.c' object='gmapl_avx512-stage1.obj' libtool=no @AMDEPBACKSLASH@ +@AMDEP_TRUE@@am__fastdepCC_FALSE@ DEPDIR=$(DEPDIR) $(CCDEPMODE) $(depcomp) @AMDEPBACKSLASH@ +@am__fastdepCC_FALSE@ $(AM_V_CC@am__nodep@)$(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) $(gmapl_avx512_CFLAGS) $(CFLAGS) -c -o gmapl_avx512-stage1.obj `if test -f 'stage1.c'; then $(CYGPATH_W) 'stage1.c'; else $(CYGPATH_W) '$(srcdir)/stage1.c'; fi` + +gmapl_avx512-diag.o: diag.c +@am__fastdepCC_TRUE@ $(AM_V_CC)$(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) $(gmapl_avx512_CFLAGS) $(CFLAGS) -MT gmapl_avx512-diag.o -MD -MP -MF $(DEPDIR)/gmapl_avx512-diag.Tpo -c -o gmapl_avx512-diag.o `test -f 'diag.c' || echo '$(srcdir)/'`diag.c +@am__fastdepCC_TRUE@ $(AM_V_at)$(am__mv) $(DEPDIR)/gmapl_avx512-diag.Tpo $(DEPDIR)/gmapl_avx512-diag.Po +@AMDEP_TRUE@@am__fastdepCC_FALSE@ $(AM_V_CC)source='diag.c' object='gmapl_avx512-diag.o' libtool=no @AMDEPBACKSLASH@ +@AMDEP_TRUE@@am__fastdepCC_FALSE@ DEPDIR=$(DEPDIR) $(CCDEPMODE) $(depcomp) @AMDEPBACKSLASH@ +@am__fastdepCC_FALSE@ $(AM_V_CC@am__nodep@)$(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) $(gmapl_avx512_CFLAGS) $(CFLAGS) -c -o gmapl_avx512-diag.o `test -f 'diag.c' || echo '$(srcdir)/'`diag.c + +gmapl_avx512-diag.obj: diag.c +@am__fastdepCC_TRUE@ $(AM_V_CC)$(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) $(gmapl_avx512_CFLAGS) $(CFLAGS) -MT gmapl_avx512-diag.obj -MD -MP -MF $(DEPDIR)/gmapl_avx512-diag.Tpo -c -o gmapl_avx512-diag.obj `if test -f 'diag.c'; then $(CYGPATH_W) 'diag.c'; else $(CYGPATH_W) '$(srcdir)/diag.c'; fi` +@am__fastdepCC_TRUE@ $(AM_V_at)$(am__mv) $(DEPDIR)/gmapl_avx512-diag.Tpo $(DEPDIR)/gmapl_avx512-diag.Po +@AMDEP_TRUE@@am__fastdepCC_FALSE@ $(AM_V_CC)source='diag.c' object='gmapl_avx512-diag.obj' libtool=no @AMDEPBACKSLASH@ +@AMDEP_TRUE@@am__fastdepCC_FALSE@ DEPDIR=$(DEPDIR) $(CCDEPMODE) $(depcomp) @AMDEPBACKSLASH@ +@am__fastdepCC_FALSE@ $(AM_V_CC@am__nodep@)$(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) $(gmapl_avx512_CFLAGS) $(CFLAGS) -c -o gmapl_avx512-diag.obj `if test -f 'diag.c'; then $(CYGPATH_W) 'diag.c'; else $(CYGPATH_W) '$(srcdir)/diag.c'; fi` + +gmapl_avx512-diagpool.o: diagpool.c +@am__fastdepCC_TRUE@ $(AM_V_CC)$(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) $(gmapl_avx512_CFLAGS) $(CFLAGS) -MT gmapl_avx512-diagpool.o -MD -MP -MF $(DEPDIR)/gmapl_avx512-diagpool.Tpo -c -o gmapl_avx512-diagpool.o `test -f 'diagpool.c' || echo '$(srcdir)/'`diagpool.c +@am__fastdepCC_TRUE@ $(AM_V_at)$(am__mv) $(DEPDIR)/gmapl_avx512-diagpool.Tpo $(DEPDIR)/gmapl_avx512-diagpool.Po +@AMDEP_TRUE@@am__fastdepCC_FALSE@ $(AM_V_CC)source='diagpool.c' object='gmapl_avx512-diagpool.o' libtool=no @AMDEPBACKSLASH@ +@AMDEP_TRUE@@am__fastdepCC_FALSE@ DEPDIR=$(DEPDIR) $(CCDEPMODE) $(depcomp) @AMDEPBACKSLASH@ +@am__fastdepCC_FALSE@ $(AM_V_CC@am__nodep@)$(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) $(gmapl_avx512_CFLAGS) $(CFLAGS) -c -o gmapl_avx512-diagpool.o `test -f 'diagpool.c' || echo '$(srcdir)/'`diagpool.c + +gmapl_avx512-diagpool.obj: diagpool.c +@am__fastdepCC_TRUE@ $(AM_V_CC)$(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) $(gmapl_avx512_CFLAGS) $(CFLAGS) -MT gmapl_avx512-diagpool.obj -MD -MP -MF $(DEPDIR)/gmapl_avx512-diagpool.Tpo -c -o gmapl_avx512-diagpool.obj `if test -f 'diagpool.c'; then $(CYGPATH_W) 'diagpool.c'; else $(CYGPATH_W) '$(srcdir)/diagpool.c'; fi` +@am__fastdepCC_TRUE@ $(AM_V_at)$(am__mv) $(DEPDIR)/gmapl_avx512-diagpool.Tpo $(DEPDIR)/gmapl_avx512-diagpool.Po +@AMDEP_TRUE@@am__fastdepCC_FALSE@ $(AM_V_CC)source='diagpool.c' object='gmapl_avx512-diagpool.obj' libtool=no @AMDEPBACKSLASH@ +@AMDEP_TRUE@@am__fastdepCC_FALSE@ DEPDIR=$(DEPDIR) $(CCDEPMODE) $(depcomp) @AMDEPBACKSLASH@ +@am__fastdepCC_FALSE@ $(AM_V_CC@am__nodep@)$(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) $(gmapl_avx512_CFLAGS) $(CFLAGS) -c -o gmapl_avx512-diagpool.obj `if test -f 'diagpool.c'; then $(CYGPATH_W) 'diagpool.c'; else $(CYGPATH_W) '$(srcdir)/diagpool.c'; fi` + +gmapl_avx512-cmet.o: cmet.c +@am__fastdepCC_TRUE@ $(AM_V_CC)$(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) $(gmapl_avx512_CFLAGS) $(CFLAGS) -MT gmapl_avx512-cmet.o -MD -MP -MF $(DEPDIR)/gmapl_avx512-cmet.Tpo -c -o gmapl_avx512-cmet.o `test -f 'cmet.c' || echo '$(srcdir)/'`cmet.c +@am__fastdepCC_TRUE@ $(AM_V_at)$(am__mv) $(DEPDIR)/gmapl_avx512-cmet.Tpo $(DEPDIR)/gmapl_avx512-cmet.Po +@AMDEP_TRUE@@am__fastdepCC_FALSE@ $(AM_V_CC)source='cmet.c' object='gmapl_avx512-cmet.o' libtool=no @AMDEPBACKSLASH@ +@AMDEP_TRUE@@am__fastdepCC_FALSE@ DEPDIR=$(DEPDIR) $(CCDEPMODE) $(depcomp) @AMDEPBACKSLASH@ +@am__fastdepCC_FALSE@ $(AM_V_CC@am__nodep@)$(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) $(gmapl_avx512_CFLAGS) $(CFLAGS) -c -o gmapl_avx512-cmet.o `test -f 'cmet.c' || echo '$(srcdir)/'`cmet.c + +gmapl_avx512-cmet.obj: cmet.c +@am__fastdepCC_TRUE@ $(AM_V_CC)$(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) $(gmapl_avx512_CFLAGS) $(CFLAGS) -MT gmapl_avx512-cmet.obj -MD -MP -MF $(DEPDIR)/gmapl_avx512-cmet.Tpo -c -o gmapl_avx512-cmet.obj `if test -f 'cmet.c'; then $(CYGPATH_W) 'cmet.c'; else $(CYGPATH_W) '$(srcdir)/cmet.c'; fi` +@am__fastdepCC_TRUE@ $(AM_V_at)$(am__mv) $(DEPDIR)/gmapl_avx512-cmet.Tpo $(DEPDIR)/gmapl_avx512-cmet.Po +@AMDEP_TRUE@@am__fastdepCC_FALSE@ $(AM_V_CC)source='cmet.c' object='gmapl_avx512-cmet.obj' libtool=no @AMDEPBACKSLASH@ +@AMDEP_TRUE@@am__fastdepCC_FALSE@ DEPDIR=$(DEPDIR) $(CCDEPMODE) $(depcomp) @AMDEPBACKSLASH@ +@am__fastdepCC_FALSE@ $(AM_V_CC@am__nodep@)$(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) $(gmapl_avx512_CFLAGS) $(CFLAGS) -c -o gmapl_avx512-cmet.obj `if test -f 'cmet.c'; then $(CYGPATH_W) 'cmet.c'; else $(CYGPATH_W) '$(srcdir)/cmet.c'; fi` + +gmapl_avx512-atoi.o: atoi.c +@am__fastdepCC_TRUE@ $(AM_V_CC)$(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) $(gmapl_avx512_CFLAGS) $(CFLAGS) -MT gmapl_avx512-atoi.o -MD -MP -MF $(DEPDIR)/gmapl_avx512-atoi.Tpo -c -o gmapl_avx512-atoi.o `test -f 'atoi.c' || echo '$(srcdir)/'`atoi.c +@am__fastdepCC_TRUE@ $(AM_V_at)$(am__mv) $(DEPDIR)/gmapl_avx512-atoi.Tpo $(DEPDIR)/gmapl_avx512-atoi.Po +@AMDEP_TRUE@@am__fastdepCC_FALSE@ $(AM_V_CC)source='atoi.c' object='gmapl_avx512-atoi.o' libtool=no @AMDEPBACKSLASH@ +@AMDEP_TRUE@@am__fastdepCC_FALSE@ DEPDIR=$(DEPDIR) $(CCDEPMODE) $(depcomp) @AMDEPBACKSLASH@ +@am__fastdepCC_FALSE@ $(AM_V_CC@am__nodep@)$(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) $(gmapl_avx512_CFLAGS) $(CFLAGS) -c -o gmapl_avx512-atoi.o `test -f 'atoi.c' || echo '$(srcdir)/'`atoi.c + +gmapl_avx512-atoi.obj: atoi.c +@am__fastdepCC_TRUE@ $(AM_V_CC)$(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) $(gmapl_avx512_CFLAGS) $(CFLAGS) -MT gmapl_avx512-atoi.obj -MD -MP -MF $(DEPDIR)/gmapl_avx512-atoi.Tpo -c -o gmapl_avx512-atoi.obj `if test -f 'atoi.c'; then $(CYGPATH_W) 'atoi.c'; else $(CYGPATH_W) '$(srcdir)/atoi.c'; fi` +@am__fastdepCC_TRUE@ $(AM_V_at)$(am__mv) $(DEPDIR)/gmapl_avx512-atoi.Tpo $(DEPDIR)/gmapl_avx512-atoi.Po +@AMDEP_TRUE@@am__fastdepCC_FALSE@ $(AM_V_CC)source='atoi.c' object='gmapl_avx512-atoi.obj' libtool=no @AMDEPBACKSLASH@ +@AMDEP_TRUE@@am__fastdepCC_FALSE@ DEPDIR=$(DEPDIR) $(CCDEPMODE) $(depcomp) @AMDEPBACKSLASH@ +@am__fastdepCC_FALSE@ $(AM_V_CC@am__nodep@)$(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) $(gmapl_avx512_CFLAGS) $(CFLAGS) -c -o gmapl_avx512-atoi.obj `if test -f 'atoi.c'; then $(CYGPATH_W) 'atoi.c'; else $(CYGPATH_W) '$(srcdir)/atoi.c'; fi` + +gmapl_avx512-orderstat.o: orderstat.c +@am__fastdepCC_TRUE@ $(AM_V_CC)$(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) $(gmapl_avx512_CFLAGS) $(CFLAGS) -MT gmapl_avx512-orderstat.o -MD -MP -MF $(DEPDIR)/gmapl_avx512-orderstat.Tpo -c -o gmapl_avx512-orderstat.o `test -f 'orderstat.c' || echo '$(srcdir)/'`orderstat.c +@am__fastdepCC_TRUE@ $(AM_V_at)$(am__mv) $(DEPDIR)/gmapl_avx512-orderstat.Tpo $(DEPDIR)/gmapl_avx512-orderstat.Po +@AMDEP_TRUE@@am__fastdepCC_FALSE@ $(AM_V_CC)source='orderstat.c' object='gmapl_avx512-orderstat.o' libtool=no @AMDEPBACKSLASH@ +@AMDEP_TRUE@@am__fastdepCC_FALSE@ DEPDIR=$(DEPDIR) $(CCDEPMODE) $(depcomp) @AMDEPBACKSLASH@ +@am__fastdepCC_FALSE@ $(AM_V_CC@am__nodep@)$(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) $(gmapl_avx512_CFLAGS) $(CFLAGS) -c -o gmapl_avx512-orderstat.o `test -f 'orderstat.c' || echo '$(srcdir)/'`orderstat.c + +gmapl_avx512-orderstat.obj: orderstat.c +@am__fastdepCC_TRUE@ $(AM_V_CC)$(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) $(gmapl_avx512_CFLAGS) $(CFLAGS) -MT gmapl_avx512-orderstat.obj -MD -MP -MF $(DEPDIR)/gmapl_avx512-orderstat.Tpo -c -o gmapl_avx512-orderstat.obj `if test -f 'orderstat.c'; then $(CYGPATH_W) 'orderstat.c'; else $(CYGPATH_W) '$(srcdir)/orderstat.c'; fi` +@am__fastdepCC_TRUE@ $(AM_V_at)$(am__mv) $(DEPDIR)/gmapl_avx512-orderstat.Tpo $(DEPDIR)/gmapl_avx512-orderstat.Po +@AMDEP_TRUE@@am__fastdepCC_FALSE@ $(AM_V_CC)source='orderstat.c' object='gmapl_avx512-orderstat.obj' libtool=no @AMDEPBACKSLASH@ +@AMDEP_TRUE@@am__fastdepCC_FALSE@ DEPDIR=$(DEPDIR) $(CCDEPMODE) $(depcomp) @AMDEPBACKSLASH@ +@am__fastdepCC_FALSE@ $(AM_V_CC@am__nodep@)$(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) $(gmapl_avx512_CFLAGS) $(CFLAGS) -c -o gmapl_avx512-orderstat.obj `if test -f 'orderstat.c'; then $(CYGPATH_W) 'orderstat.c'; else $(CYGPATH_W) '$(srcdir)/orderstat.c'; fi` + +gmapl_avx512-oligoindex_hr.o: oligoindex_hr.c +@am__fastdepCC_TRUE@ $(AM_V_CC)$(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) $(gmapl_avx512_CFLAGS) $(CFLAGS) -MT gmapl_avx512-oligoindex_hr.o -MD -MP -MF $(DEPDIR)/gmapl_avx512-oligoindex_hr.Tpo -c -o gmapl_avx512-oligoindex_hr.o `test -f 'oligoindex_hr.c' || echo '$(srcdir)/'`oligoindex_hr.c +@am__fastdepCC_TRUE@ $(AM_V_at)$(am__mv) $(DEPDIR)/gmapl_avx512-oligoindex_hr.Tpo $(DEPDIR)/gmapl_avx512-oligoindex_hr.Po +@AMDEP_TRUE@@am__fastdepCC_FALSE@ $(AM_V_CC)source='oligoindex_hr.c' object='gmapl_avx512-oligoindex_hr.o' libtool=no @AMDEPBACKSLASH@ +@AMDEP_TRUE@@am__fastdepCC_FALSE@ DEPDIR=$(DEPDIR) $(CCDEPMODE) $(depcomp) @AMDEPBACKSLASH@ +@am__fastdepCC_FALSE@ $(AM_V_CC@am__nodep@)$(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) $(gmapl_avx512_CFLAGS) $(CFLAGS) -c -o gmapl_avx512-oligoindex_hr.o `test -f 'oligoindex_hr.c' || echo '$(srcdir)/'`oligoindex_hr.c + +gmapl_avx512-oligoindex_hr.obj: oligoindex_hr.c +@am__fastdepCC_TRUE@ $(AM_V_CC)$(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) $(gmapl_avx512_CFLAGS) $(CFLAGS) -MT gmapl_avx512-oligoindex_hr.obj -MD -MP -MF $(DEPDIR)/gmapl_avx512-oligoindex_hr.Tpo -c -o gmapl_avx512-oligoindex_hr.obj `if test -f 'oligoindex_hr.c'; then $(CYGPATH_W) 'oligoindex_hr.c'; else $(CYGPATH_W) '$(srcdir)/oligoindex_hr.c'; fi` +@am__fastdepCC_TRUE@ $(AM_V_at)$(am__mv) $(DEPDIR)/gmapl_avx512-oligoindex_hr.Tpo $(DEPDIR)/gmapl_avx512-oligoindex_hr.Po +@AMDEP_TRUE@@am__fastdepCC_FALSE@ $(AM_V_CC)source='oligoindex_hr.c' object='gmapl_avx512-oligoindex_hr.obj' libtool=no @AMDEPBACKSLASH@ +@AMDEP_TRUE@@am__fastdepCC_FALSE@ DEPDIR=$(DEPDIR) $(CCDEPMODE) $(depcomp) @AMDEPBACKSLASH@ +@am__fastdepCC_FALSE@ $(AM_V_CC@am__nodep@)$(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) $(gmapl_avx512_CFLAGS) $(CFLAGS) -c -o gmapl_avx512-oligoindex_hr.obj `if test -f 'oligoindex_hr.c'; then $(CYGPATH_W) 'oligoindex_hr.c'; else $(CYGPATH_W) '$(srcdir)/oligoindex_hr.c'; fi` + +gmapl_avx512-intron.o: intron.c +@am__fastdepCC_TRUE@ $(AM_V_CC)$(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) $(gmapl_avx512_CFLAGS) $(CFLAGS) -MT gmapl_avx512-intron.o -MD -MP -MF $(DEPDIR)/gmapl_avx512-intron.Tpo -c -o gmapl_avx512-intron.o `test -f 'intron.c' || echo '$(srcdir)/'`intron.c +@am__fastdepCC_TRUE@ $(AM_V_at)$(am__mv) $(DEPDIR)/gmapl_avx512-intron.Tpo $(DEPDIR)/gmapl_avx512-intron.Po +@AMDEP_TRUE@@am__fastdepCC_FALSE@ $(AM_V_CC)source='intron.c' object='gmapl_avx512-intron.o' libtool=no @AMDEPBACKSLASH@ +@AMDEP_TRUE@@am__fastdepCC_FALSE@ DEPDIR=$(DEPDIR) $(CCDEPMODE) $(depcomp) @AMDEPBACKSLASH@ +@am__fastdepCC_FALSE@ $(AM_V_CC@am__nodep@)$(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) $(gmapl_avx512_CFLAGS) $(CFLAGS) -c -o gmapl_avx512-intron.o `test -f 'intron.c' || echo '$(srcdir)/'`intron.c + +gmapl_avx512-intron.obj: intron.c +@am__fastdepCC_TRUE@ $(AM_V_CC)$(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) $(gmapl_avx512_CFLAGS) $(CFLAGS) -MT gmapl_avx512-intron.obj -MD -MP -MF $(DEPDIR)/gmapl_avx512-intron.Tpo -c -o gmapl_avx512-intron.obj `if test -f 'intron.c'; then $(CYGPATH_W) 'intron.c'; else $(CYGPATH_W) '$(srcdir)/intron.c'; fi` +@am__fastdepCC_TRUE@ $(AM_V_at)$(am__mv) $(DEPDIR)/gmapl_avx512-intron.Tpo $(DEPDIR)/gmapl_avx512-intron.Po +@AMDEP_TRUE@@am__fastdepCC_FALSE@ $(AM_V_CC)source='intron.c' object='gmapl_avx512-intron.obj' libtool=no @AMDEPBACKSLASH@ +@AMDEP_TRUE@@am__fastdepCC_FALSE@ DEPDIR=$(DEPDIR) $(CCDEPMODE) $(depcomp) @AMDEPBACKSLASH@ +@am__fastdepCC_FALSE@ $(AM_V_CC@am__nodep@)$(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) $(gmapl_avx512_CFLAGS) $(CFLAGS) -c -o gmapl_avx512-intron.obj `if test -f 'intron.c'; then $(CYGPATH_W) 'intron.c'; else $(CYGPATH_W) '$(srcdir)/intron.c'; fi` + +gmapl_avx512-maxent.o: maxent.c +@am__fastdepCC_TRUE@ $(AM_V_CC)$(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) $(gmapl_avx512_CFLAGS) $(CFLAGS) -MT gmapl_avx512-maxent.o -MD -MP -MF $(DEPDIR)/gmapl_avx512-maxent.Tpo -c -o gmapl_avx512-maxent.o `test -f 'maxent.c' || echo '$(srcdir)/'`maxent.c +@am__fastdepCC_TRUE@ $(AM_V_at)$(am__mv) $(DEPDIR)/gmapl_avx512-maxent.Tpo $(DEPDIR)/gmapl_avx512-maxent.Po +@AMDEP_TRUE@@am__fastdepCC_FALSE@ $(AM_V_CC)source='maxent.c' object='gmapl_avx512-maxent.o' libtool=no @AMDEPBACKSLASH@ +@AMDEP_TRUE@@am__fastdepCC_FALSE@ DEPDIR=$(DEPDIR) $(CCDEPMODE) $(depcomp) @AMDEPBACKSLASH@ +@am__fastdepCC_FALSE@ $(AM_V_CC@am__nodep@)$(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) $(gmapl_avx512_CFLAGS) $(CFLAGS) -c -o gmapl_avx512-maxent.o `test -f 'maxent.c' || echo '$(srcdir)/'`maxent.c + +gmapl_avx512-maxent.obj: maxent.c +@am__fastdepCC_TRUE@ $(AM_V_CC)$(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) $(gmapl_avx512_CFLAGS) $(CFLAGS) -MT gmapl_avx512-maxent.obj -MD -MP -MF $(DEPDIR)/gmapl_avx512-maxent.Tpo -c -o gmapl_avx512-maxent.obj `if test -f 'maxent.c'; then $(CYGPATH_W) 'maxent.c'; else $(CYGPATH_W) '$(srcdir)/maxent.c'; fi` +@am__fastdepCC_TRUE@ $(AM_V_at)$(am__mv) $(DEPDIR)/gmapl_avx512-maxent.Tpo $(DEPDIR)/gmapl_avx512-maxent.Po +@AMDEP_TRUE@@am__fastdepCC_FALSE@ $(AM_V_CC)source='maxent.c' object='gmapl_avx512-maxent.obj' libtool=no @AMDEPBACKSLASH@ +@AMDEP_TRUE@@am__fastdepCC_FALSE@ DEPDIR=$(DEPDIR) $(CCDEPMODE) $(depcomp) @AMDEPBACKSLASH@ +@am__fastdepCC_FALSE@ $(AM_V_CC@am__nodep@)$(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) $(gmapl_avx512_CFLAGS) $(CFLAGS) -c -o gmapl_avx512-maxent.obj `if test -f 'maxent.c'; then $(CYGPATH_W) 'maxent.c'; else $(CYGPATH_W) '$(srcdir)/maxent.c'; fi` + +gmapl_avx512-maxent_hr.o: maxent_hr.c +@am__fastdepCC_TRUE@ $(AM_V_CC)$(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) $(gmapl_avx512_CFLAGS) $(CFLAGS) -MT gmapl_avx512-maxent_hr.o -MD -MP -MF $(DEPDIR)/gmapl_avx512-maxent_hr.Tpo -c -o gmapl_avx512-maxent_hr.o `test -f 'maxent_hr.c' || echo '$(srcdir)/'`maxent_hr.c +@am__fastdepCC_TRUE@ $(AM_V_at)$(am__mv) $(DEPDIR)/gmapl_avx512-maxent_hr.Tpo $(DEPDIR)/gmapl_avx512-maxent_hr.Po +@AMDEP_TRUE@@am__fastdepCC_FALSE@ $(AM_V_CC)source='maxent_hr.c' object='gmapl_avx512-maxent_hr.o' libtool=no @AMDEPBACKSLASH@ +@AMDEP_TRUE@@am__fastdepCC_FALSE@ DEPDIR=$(DEPDIR) $(CCDEPMODE) $(depcomp) @AMDEPBACKSLASH@ +@am__fastdepCC_FALSE@ $(AM_V_CC@am__nodep@)$(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) $(gmapl_avx512_CFLAGS) $(CFLAGS) -c -o gmapl_avx512-maxent_hr.o `test -f 'maxent_hr.c' || echo '$(srcdir)/'`maxent_hr.c + +gmapl_avx512-maxent_hr.obj: maxent_hr.c +@am__fastdepCC_TRUE@ $(AM_V_CC)$(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) $(gmapl_avx512_CFLAGS) $(CFLAGS) -MT gmapl_avx512-maxent_hr.obj -MD -MP -MF $(DEPDIR)/gmapl_avx512-maxent_hr.Tpo -c -o gmapl_avx512-maxent_hr.obj `if test -f 'maxent_hr.c'; then $(CYGPATH_W) 'maxent_hr.c'; else $(CYGPATH_W) '$(srcdir)/maxent_hr.c'; fi` +@am__fastdepCC_TRUE@ $(AM_V_at)$(am__mv) $(DEPDIR)/gmapl_avx512-maxent_hr.Tpo $(DEPDIR)/gmapl_avx512-maxent_hr.Po +@AMDEP_TRUE@@am__fastdepCC_FALSE@ $(AM_V_CC)source='maxent_hr.c' object='gmapl_avx512-maxent_hr.obj' libtool=no @AMDEPBACKSLASH@ +@AMDEP_TRUE@@am__fastdepCC_FALSE@ DEPDIR=$(DEPDIR) $(CCDEPMODE) $(depcomp) @AMDEPBACKSLASH@ +@am__fastdepCC_FALSE@ $(AM_V_CC@am__nodep@)$(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) $(gmapl_avx512_CFLAGS) $(CFLAGS) -c -o gmapl_avx512-maxent_hr.obj `if test -f 'maxent_hr.c'; then $(CYGPATH_W) 'maxent_hr.c'; else $(CYGPATH_W) '$(srcdir)/maxent_hr.c'; fi` + +gmapl_avx512-pair.o: pair.c +@am__fastdepCC_TRUE@ $(AM_V_CC)$(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) $(gmapl_avx512_CFLAGS) $(CFLAGS) -MT gmapl_avx512-pair.o -MD -MP -MF $(DEPDIR)/gmapl_avx512-pair.Tpo -c -o gmapl_avx512-pair.o `test -f 'pair.c' || echo '$(srcdir)/'`pair.c +@am__fastdepCC_TRUE@ $(AM_V_at)$(am__mv) $(DEPDIR)/gmapl_avx512-pair.Tpo $(DEPDIR)/gmapl_avx512-pair.Po +@AMDEP_TRUE@@am__fastdepCC_FALSE@ $(AM_V_CC)source='pair.c' object='gmapl_avx512-pair.o' libtool=no @AMDEPBACKSLASH@ +@AMDEP_TRUE@@am__fastdepCC_FALSE@ DEPDIR=$(DEPDIR) $(CCDEPMODE) $(depcomp) @AMDEPBACKSLASH@ +@am__fastdepCC_FALSE@ $(AM_V_CC@am__nodep@)$(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) $(gmapl_avx512_CFLAGS) $(CFLAGS) -c -o gmapl_avx512-pair.o `test -f 'pair.c' || echo '$(srcdir)/'`pair.c + +gmapl_avx512-pair.obj: pair.c +@am__fastdepCC_TRUE@ $(AM_V_CC)$(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) $(gmapl_avx512_CFLAGS) $(CFLAGS) -MT gmapl_avx512-pair.obj -MD -MP -MF $(DEPDIR)/gmapl_avx512-pair.Tpo -c -o gmapl_avx512-pair.obj `if test -f 'pair.c'; then $(CYGPATH_W) 'pair.c'; else $(CYGPATH_W) '$(srcdir)/pair.c'; fi` +@am__fastdepCC_TRUE@ $(AM_V_at)$(am__mv) $(DEPDIR)/gmapl_avx512-pair.Tpo $(DEPDIR)/gmapl_avx512-pair.Po +@AMDEP_TRUE@@am__fastdepCC_FALSE@ $(AM_V_CC)source='pair.c' object='gmapl_avx512-pair.obj' libtool=no @AMDEPBACKSLASH@ +@AMDEP_TRUE@@am__fastdepCC_FALSE@ DEPDIR=$(DEPDIR) $(CCDEPMODE) $(depcomp) @AMDEPBACKSLASH@ +@am__fastdepCC_FALSE@ $(AM_V_CC@am__nodep@)$(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) $(gmapl_avx512_CFLAGS) $(CFLAGS) -c -o gmapl_avx512-pair.obj `if test -f 'pair.c'; then $(CYGPATH_W) 'pair.c'; else $(CYGPATH_W) '$(srcdir)/pair.c'; fi` + +gmapl_avx512-pairpool.o: pairpool.c +@am__fastdepCC_TRUE@ $(AM_V_CC)$(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) $(gmapl_avx512_CFLAGS) $(CFLAGS) -MT gmapl_avx512-pairpool.o -MD -MP -MF $(DEPDIR)/gmapl_avx512-pairpool.Tpo -c -o gmapl_avx512-pairpool.o `test -f 'pairpool.c' || echo '$(srcdir)/'`pairpool.c +@am__fastdepCC_TRUE@ $(AM_V_at)$(am__mv) $(DEPDIR)/gmapl_avx512-pairpool.Tpo $(DEPDIR)/gmapl_avx512-pairpool.Po +@AMDEP_TRUE@@am__fastdepCC_FALSE@ $(AM_V_CC)source='pairpool.c' object='gmapl_avx512-pairpool.o' libtool=no @AMDEPBACKSLASH@ +@AMDEP_TRUE@@am__fastdepCC_FALSE@ DEPDIR=$(DEPDIR) $(CCDEPMODE) $(depcomp) @AMDEPBACKSLASH@ +@am__fastdepCC_FALSE@ $(AM_V_CC@am__nodep@)$(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) $(gmapl_avx512_CFLAGS) $(CFLAGS) -c -o gmapl_avx512-pairpool.o `test -f 'pairpool.c' || echo '$(srcdir)/'`pairpool.c + +gmapl_avx512-pairpool.obj: pairpool.c +@am__fastdepCC_TRUE@ $(AM_V_CC)$(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) $(gmapl_avx512_CFLAGS) $(CFLAGS) -MT gmapl_avx512-pairpool.obj -MD -MP -MF $(DEPDIR)/gmapl_avx512-pairpool.Tpo -c -o gmapl_avx512-pairpool.obj `if test -f 'pairpool.c'; then $(CYGPATH_W) 'pairpool.c'; else $(CYGPATH_W) '$(srcdir)/pairpool.c'; fi` +@am__fastdepCC_TRUE@ $(AM_V_at)$(am__mv) $(DEPDIR)/gmapl_avx512-pairpool.Tpo $(DEPDIR)/gmapl_avx512-pairpool.Po +@AMDEP_TRUE@@am__fastdepCC_FALSE@ $(AM_V_CC)source='pairpool.c' object='gmapl_avx512-pairpool.obj' libtool=no @AMDEPBACKSLASH@ +@AMDEP_TRUE@@am__fastdepCC_FALSE@ DEPDIR=$(DEPDIR) $(CCDEPMODE) $(depcomp) @AMDEPBACKSLASH@ +@am__fastdepCC_FALSE@ $(AM_V_CC@am__nodep@)$(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) $(gmapl_avx512_CFLAGS) $(CFLAGS) -c -o gmapl_avx512-pairpool.obj `if test -f 'pairpool.c'; then $(CYGPATH_W) 'pairpool.c'; else $(CYGPATH_W) '$(srcdir)/pairpool.c'; fi` + +gmapl_avx512-cellpool.o: cellpool.c +@am__fastdepCC_TRUE@ $(AM_V_CC)$(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) $(gmapl_avx512_CFLAGS) $(CFLAGS) -MT gmapl_avx512-cellpool.o -MD -MP -MF $(DEPDIR)/gmapl_avx512-cellpool.Tpo -c -o gmapl_avx512-cellpool.o `test -f 'cellpool.c' || echo '$(srcdir)/'`cellpool.c +@am__fastdepCC_TRUE@ $(AM_V_at)$(am__mv) $(DEPDIR)/gmapl_avx512-cellpool.Tpo $(DEPDIR)/gmapl_avx512-cellpool.Po +@AMDEP_TRUE@@am__fastdepCC_FALSE@ $(AM_V_CC)source='cellpool.c' object='gmapl_avx512-cellpool.o' libtool=no @AMDEPBACKSLASH@ +@AMDEP_TRUE@@am__fastdepCC_FALSE@ DEPDIR=$(DEPDIR) $(CCDEPMODE) $(depcomp) @AMDEPBACKSLASH@ +@am__fastdepCC_FALSE@ $(AM_V_CC@am__nodep@)$(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) $(gmapl_avx512_CFLAGS) $(CFLAGS) -c -o gmapl_avx512-cellpool.o `test -f 'cellpool.c' || echo '$(srcdir)/'`cellpool.c + +gmapl_avx512-cellpool.obj: cellpool.c +@am__fastdepCC_TRUE@ $(AM_V_CC)$(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) $(gmapl_avx512_CFLAGS) $(CFLAGS) -MT gmapl_avx512-cellpool.obj -MD -MP -MF $(DEPDIR)/gmapl_avx512-cellpool.Tpo -c -o gmapl_avx512-cellpool.obj `if test -f 'cellpool.c'; then $(CYGPATH_W) 'cellpool.c'; else $(CYGPATH_W) '$(srcdir)/cellpool.c'; fi` +@am__fastdepCC_TRUE@ $(AM_V_at)$(am__mv) $(DEPDIR)/gmapl_avx512-cellpool.Tpo $(DEPDIR)/gmapl_avx512-cellpool.Po +@AMDEP_TRUE@@am__fastdepCC_FALSE@ $(AM_V_CC)source='cellpool.c' object='gmapl_avx512-cellpool.obj' libtool=no @AMDEPBACKSLASH@ +@AMDEP_TRUE@@am__fastdepCC_FALSE@ DEPDIR=$(DEPDIR) $(CCDEPMODE) $(depcomp) @AMDEPBACKSLASH@ +@am__fastdepCC_FALSE@ $(AM_V_CC@am__nodep@)$(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) $(gmapl_avx512_CFLAGS) $(CFLAGS) -c -o gmapl_avx512-cellpool.obj `if test -f 'cellpool.c'; then $(CYGPATH_W) 'cellpool.c'; else $(CYGPATH_W) '$(srcdir)/cellpool.c'; fi` + +gmapl_avx512-stage2.o: stage2.c +@am__fastdepCC_TRUE@ $(AM_V_CC)$(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) $(gmapl_avx512_CFLAGS) $(CFLAGS) -MT gmapl_avx512-stage2.o -MD -MP -MF $(DEPDIR)/gmapl_avx512-stage2.Tpo -c -o gmapl_avx512-stage2.o `test -f 'stage2.c' || echo '$(srcdir)/'`stage2.c +@am__fastdepCC_TRUE@ $(AM_V_at)$(am__mv) $(DEPDIR)/gmapl_avx512-stage2.Tpo $(DEPDIR)/gmapl_avx512-stage2.Po +@AMDEP_TRUE@@am__fastdepCC_FALSE@ $(AM_V_CC)source='stage2.c' object='gmapl_avx512-stage2.o' libtool=no @AMDEPBACKSLASH@ +@AMDEP_TRUE@@am__fastdepCC_FALSE@ DEPDIR=$(DEPDIR) $(CCDEPMODE) $(depcomp) @AMDEPBACKSLASH@ +@am__fastdepCC_FALSE@ $(AM_V_CC@am__nodep@)$(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) $(gmapl_avx512_CFLAGS) $(CFLAGS) -c -o gmapl_avx512-stage2.o `test -f 'stage2.c' || echo '$(srcdir)/'`stage2.c + +gmapl_avx512-stage2.obj: stage2.c +@am__fastdepCC_TRUE@ $(AM_V_CC)$(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) $(gmapl_avx512_CFLAGS) $(CFLAGS) -MT gmapl_avx512-stage2.obj -MD -MP -MF $(DEPDIR)/gmapl_avx512-stage2.Tpo -c -o gmapl_avx512-stage2.obj `if test -f 'stage2.c'; then $(CYGPATH_W) 'stage2.c'; else $(CYGPATH_W) '$(srcdir)/stage2.c'; fi` +@am__fastdepCC_TRUE@ $(AM_V_at)$(am__mv) $(DEPDIR)/gmapl_avx512-stage2.Tpo $(DEPDIR)/gmapl_avx512-stage2.Po +@AMDEP_TRUE@@am__fastdepCC_FALSE@ $(AM_V_CC)source='stage2.c' object='gmapl_avx512-stage2.obj' libtool=no @AMDEPBACKSLASH@ +@AMDEP_TRUE@@am__fastdepCC_FALSE@ DEPDIR=$(DEPDIR) $(CCDEPMODE) $(depcomp) @AMDEPBACKSLASH@ +@am__fastdepCC_FALSE@ $(AM_V_CC@am__nodep@)$(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) $(gmapl_avx512_CFLAGS) $(CFLAGS) -c -o gmapl_avx512-stage2.obj `if test -f 'stage2.c'; then $(CYGPATH_W) 'stage2.c'; else $(CYGPATH_W) '$(srcdir)/stage2.c'; fi` + +gmapl_avx512-doublelist.o: doublelist.c +@am__fastdepCC_TRUE@ $(AM_V_CC)$(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) $(gmapl_avx512_CFLAGS) $(CFLAGS) -MT gmapl_avx512-doublelist.o -MD -MP -MF $(DEPDIR)/gmapl_avx512-doublelist.Tpo -c -o gmapl_avx512-doublelist.o `test -f 'doublelist.c' || echo '$(srcdir)/'`doublelist.c +@am__fastdepCC_TRUE@ $(AM_V_at)$(am__mv) $(DEPDIR)/gmapl_avx512-doublelist.Tpo $(DEPDIR)/gmapl_avx512-doublelist.Po +@AMDEP_TRUE@@am__fastdepCC_FALSE@ $(AM_V_CC)source='doublelist.c' object='gmapl_avx512-doublelist.o' libtool=no @AMDEPBACKSLASH@ +@AMDEP_TRUE@@am__fastdepCC_FALSE@ DEPDIR=$(DEPDIR) $(CCDEPMODE) $(depcomp) @AMDEPBACKSLASH@ +@am__fastdepCC_FALSE@ $(AM_V_CC@am__nodep@)$(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) $(gmapl_avx512_CFLAGS) $(CFLAGS) -c -o gmapl_avx512-doublelist.o `test -f 'doublelist.c' || echo '$(srcdir)/'`doublelist.c + +gmapl_avx512-doublelist.obj: doublelist.c +@am__fastdepCC_TRUE@ $(AM_V_CC)$(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) $(gmapl_avx512_CFLAGS) $(CFLAGS) -MT gmapl_avx512-doublelist.obj -MD -MP -MF $(DEPDIR)/gmapl_avx512-doublelist.Tpo -c -o gmapl_avx512-doublelist.obj `if test -f 'doublelist.c'; then $(CYGPATH_W) 'doublelist.c'; else $(CYGPATH_W) '$(srcdir)/doublelist.c'; fi` +@am__fastdepCC_TRUE@ $(AM_V_at)$(am__mv) $(DEPDIR)/gmapl_avx512-doublelist.Tpo $(DEPDIR)/gmapl_avx512-doublelist.Po +@AMDEP_TRUE@@am__fastdepCC_FALSE@ $(AM_V_CC)source='doublelist.c' object='gmapl_avx512-doublelist.obj' libtool=no @AMDEPBACKSLASH@ +@AMDEP_TRUE@@am__fastdepCC_FALSE@ DEPDIR=$(DEPDIR) $(CCDEPMODE) $(depcomp) @AMDEPBACKSLASH@ +@am__fastdepCC_FALSE@ $(AM_V_CC@am__nodep@)$(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) $(gmapl_avx512_CFLAGS) $(CFLAGS) -c -o gmapl_avx512-doublelist.obj `if test -f 'doublelist.c'; then $(CYGPATH_W) 'doublelist.c'; else $(CYGPATH_W) '$(srcdir)/doublelist.c'; fi` + +gmapl_avx512-smooth.o: smooth.c +@am__fastdepCC_TRUE@ $(AM_V_CC)$(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) $(gmapl_avx512_CFLAGS) $(CFLAGS) -MT gmapl_avx512-smooth.o -MD -MP -MF $(DEPDIR)/gmapl_avx512-smooth.Tpo -c -o gmapl_avx512-smooth.o `test -f 'smooth.c' || echo '$(srcdir)/'`smooth.c +@am__fastdepCC_TRUE@ $(AM_V_at)$(am__mv) $(DEPDIR)/gmapl_avx512-smooth.Tpo $(DEPDIR)/gmapl_avx512-smooth.Po +@AMDEP_TRUE@@am__fastdepCC_FALSE@ $(AM_V_CC)source='smooth.c' object='gmapl_avx512-smooth.o' libtool=no @AMDEPBACKSLASH@ +@AMDEP_TRUE@@am__fastdepCC_FALSE@ DEPDIR=$(DEPDIR) $(CCDEPMODE) $(depcomp) @AMDEPBACKSLASH@ +@am__fastdepCC_FALSE@ $(AM_V_CC@am__nodep@)$(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) $(gmapl_avx512_CFLAGS) $(CFLAGS) -c -o gmapl_avx512-smooth.o `test -f 'smooth.c' || echo '$(srcdir)/'`smooth.c + +gmapl_avx512-smooth.obj: smooth.c +@am__fastdepCC_TRUE@ $(AM_V_CC)$(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) $(gmapl_avx512_CFLAGS) $(CFLAGS) -MT gmapl_avx512-smooth.obj -MD -MP -MF $(DEPDIR)/gmapl_avx512-smooth.Tpo -c -o gmapl_avx512-smooth.obj `if test -f 'smooth.c'; then $(CYGPATH_W) 'smooth.c'; else $(CYGPATH_W) '$(srcdir)/smooth.c'; fi` +@am__fastdepCC_TRUE@ $(AM_V_at)$(am__mv) $(DEPDIR)/gmapl_avx512-smooth.Tpo $(DEPDIR)/gmapl_avx512-smooth.Po +@AMDEP_TRUE@@am__fastdepCC_FALSE@ $(AM_V_CC)source='smooth.c' object='gmapl_avx512-smooth.obj' libtool=no @AMDEPBACKSLASH@ +@AMDEP_TRUE@@am__fastdepCC_FALSE@ DEPDIR=$(DEPDIR) $(CCDEPMODE) $(depcomp) @AMDEPBACKSLASH@ +@am__fastdepCC_FALSE@ $(AM_V_CC@am__nodep@)$(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) $(gmapl_avx512_CFLAGS) $(CFLAGS) -c -o gmapl_avx512-smooth.obj `if test -f 'smooth.c'; then $(CYGPATH_W) 'smooth.c'; else $(CYGPATH_W) '$(srcdir)/smooth.c'; fi` + +gmapl_avx512-splicestringpool.o: splicestringpool.c +@am__fastdepCC_TRUE@ $(AM_V_CC)$(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) $(gmapl_avx512_CFLAGS) $(CFLAGS) -MT gmapl_avx512-splicestringpool.o -MD -MP -MF $(DEPDIR)/gmapl_avx512-splicestringpool.Tpo -c -o gmapl_avx512-splicestringpool.o `test -f 'splicestringpool.c' || echo '$(srcdir)/'`splicestringpool.c +@am__fastdepCC_TRUE@ $(AM_V_at)$(am__mv) $(DEPDIR)/gmapl_avx512-splicestringpool.Tpo $(DEPDIR)/gmapl_avx512-splicestringpool.Po +@AMDEP_TRUE@@am__fastdepCC_FALSE@ $(AM_V_CC)source='splicestringpool.c' object='gmapl_avx512-splicestringpool.o' libtool=no @AMDEPBACKSLASH@ +@AMDEP_TRUE@@am__fastdepCC_FALSE@ DEPDIR=$(DEPDIR) $(CCDEPMODE) $(depcomp) @AMDEPBACKSLASH@ +@am__fastdepCC_FALSE@ $(AM_V_CC@am__nodep@)$(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) $(gmapl_avx512_CFLAGS) $(CFLAGS) -c -o gmapl_avx512-splicestringpool.o `test -f 'splicestringpool.c' || echo '$(srcdir)/'`splicestringpool.c + +gmapl_avx512-splicestringpool.obj: splicestringpool.c +@am__fastdepCC_TRUE@ $(AM_V_CC)$(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) $(gmapl_avx512_CFLAGS) $(CFLAGS) -MT gmapl_avx512-splicestringpool.obj -MD -MP -MF $(DEPDIR)/gmapl_avx512-splicestringpool.Tpo -c -o gmapl_avx512-splicestringpool.obj `if test -f 'splicestringpool.c'; then $(CYGPATH_W) 'splicestringpool.c'; else $(CYGPATH_W) '$(srcdir)/splicestringpool.c'; fi` +@am__fastdepCC_TRUE@ $(AM_V_at)$(am__mv) $(DEPDIR)/gmapl_avx512-splicestringpool.Tpo $(DEPDIR)/gmapl_avx512-splicestringpool.Po +@AMDEP_TRUE@@am__fastdepCC_FALSE@ $(AM_V_CC)source='splicestringpool.c' object='gmapl_avx512-splicestringpool.obj' libtool=no @AMDEPBACKSLASH@ +@AMDEP_TRUE@@am__fastdepCC_FALSE@ DEPDIR=$(DEPDIR) $(CCDEPMODE) $(depcomp) @AMDEPBACKSLASH@ +@am__fastdepCC_FALSE@ $(AM_V_CC@am__nodep@)$(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) $(gmapl_avx512_CFLAGS) $(CFLAGS) -c -o gmapl_avx512-splicestringpool.obj `if test -f 'splicestringpool.c'; then $(CYGPATH_W) 'splicestringpool.c'; else $(CYGPATH_W) '$(srcdir)/splicestringpool.c'; fi` + +gmapl_avx512-splicetrie_build.o: splicetrie_build.c +@am__fastdepCC_TRUE@ $(AM_V_CC)$(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) $(gmapl_avx512_CFLAGS) $(CFLAGS) -MT gmapl_avx512-splicetrie_build.o -MD -MP -MF $(DEPDIR)/gmapl_avx512-splicetrie_build.Tpo -c -o gmapl_avx512-splicetrie_build.o `test -f 'splicetrie_build.c' || echo '$(srcdir)/'`splicetrie_build.c +@am__fastdepCC_TRUE@ $(AM_V_at)$(am__mv) $(DEPDIR)/gmapl_avx512-splicetrie_build.Tpo $(DEPDIR)/gmapl_avx512-splicetrie_build.Po +@AMDEP_TRUE@@am__fastdepCC_FALSE@ $(AM_V_CC)source='splicetrie_build.c' object='gmapl_avx512-splicetrie_build.o' libtool=no @AMDEPBACKSLASH@ +@AMDEP_TRUE@@am__fastdepCC_FALSE@ DEPDIR=$(DEPDIR) $(CCDEPMODE) $(depcomp) @AMDEPBACKSLASH@ +@am__fastdepCC_FALSE@ $(AM_V_CC@am__nodep@)$(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) $(gmapl_avx512_CFLAGS) $(CFLAGS) -c -o gmapl_avx512-splicetrie_build.o `test -f 'splicetrie_build.c' || echo '$(srcdir)/'`splicetrie_build.c + +gmapl_avx512-splicetrie_build.obj: splicetrie_build.c +@am__fastdepCC_TRUE@ $(AM_V_CC)$(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) $(gmapl_avx512_CFLAGS) $(CFLAGS) -MT gmapl_avx512-splicetrie_build.obj -MD -MP -MF $(DEPDIR)/gmapl_avx512-splicetrie_build.Tpo -c -o gmapl_avx512-splicetrie_build.obj `if test -f 'splicetrie_build.c'; then $(CYGPATH_W) 'splicetrie_build.c'; else $(CYGPATH_W) '$(srcdir)/splicetrie_build.c'; fi` +@am__fastdepCC_TRUE@ $(AM_V_at)$(am__mv) $(DEPDIR)/gmapl_avx512-splicetrie_build.Tpo $(DEPDIR)/gmapl_avx512-splicetrie_build.Po +@AMDEP_TRUE@@am__fastdepCC_FALSE@ $(AM_V_CC)source='splicetrie_build.c' object='gmapl_avx512-splicetrie_build.obj' libtool=no @AMDEPBACKSLASH@ +@AMDEP_TRUE@@am__fastdepCC_FALSE@ DEPDIR=$(DEPDIR) $(CCDEPMODE) $(depcomp) @AMDEPBACKSLASH@ +@am__fastdepCC_FALSE@ $(AM_V_CC@am__nodep@)$(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) $(gmapl_avx512_CFLAGS) $(CFLAGS) -c -o gmapl_avx512-splicetrie_build.obj `if test -f 'splicetrie_build.c'; then $(CYGPATH_W) 'splicetrie_build.c'; else $(CYGPATH_W) '$(srcdir)/splicetrie_build.c'; fi` + +gmapl_avx512-splicetrie.o: splicetrie.c +@am__fastdepCC_TRUE@ $(AM_V_CC)$(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) $(gmapl_avx512_CFLAGS) $(CFLAGS) -MT gmapl_avx512-splicetrie.o -MD -MP -MF $(DEPDIR)/gmapl_avx512-splicetrie.Tpo -c -o gmapl_avx512-splicetrie.o `test -f 'splicetrie.c' || echo '$(srcdir)/'`splicetrie.c +@am__fastdepCC_TRUE@ $(AM_V_at)$(am__mv) $(DEPDIR)/gmapl_avx512-splicetrie.Tpo $(DEPDIR)/gmapl_avx512-splicetrie.Po +@AMDEP_TRUE@@am__fastdepCC_FALSE@ $(AM_V_CC)source='splicetrie.c' object='gmapl_avx512-splicetrie.o' libtool=no @AMDEPBACKSLASH@ +@AMDEP_TRUE@@am__fastdepCC_FALSE@ DEPDIR=$(DEPDIR) $(CCDEPMODE) $(depcomp) @AMDEPBACKSLASH@ +@am__fastdepCC_FALSE@ $(AM_V_CC@am__nodep@)$(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) $(gmapl_avx512_CFLAGS) $(CFLAGS) -c -o gmapl_avx512-splicetrie.o `test -f 'splicetrie.c' || echo '$(srcdir)/'`splicetrie.c + +gmapl_avx512-splicetrie.obj: splicetrie.c +@am__fastdepCC_TRUE@ $(AM_V_CC)$(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) $(gmapl_avx512_CFLAGS) $(CFLAGS) -MT gmapl_avx512-splicetrie.obj -MD -MP -MF $(DEPDIR)/gmapl_avx512-splicetrie.Tpo -c -o gmapl_avx512-splicetrie.obj `if test -f 'splicetrie.c'; then $(CYGPATH_W) 'splicetrie.c'; else $(CYGPATH_W) '$(srcdir)/splicetrie.c'; fi` +@am__fastdepCC_TRUE@ $(AM_V_at)$(am__mv) $(DEPDIR)/gmapl_avx512-splicetrie.Tpo $(DEPDIR)/gmapl_avx512-splicetrie.Po +@AMDEP_TRUE@@am__fastdepCC_FALSE@ $(AM_V_CC)source='splicetrie.c' object='gmapl_avx512-splicetrie.obj' libtool=no @AMDEPBACKSLASH@ +@AMDEP_TRUE@@am__fastdepCC_FALSE@ DEPDIR=$(DEPDIR) $(CCDEPMODE) $(depcomp) @AMDEPBACKSLASH@ +@am__fastdepCC_FALSE@ $(AM_V_CC@am__nodep@)$(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) $(gmapl_avx512_CFLAGS) $(CFLAGS) -c -o gmapl_avx512-splicetrie.obj `if test -f 'splicetrie.c'; then $(CYGPATH_W) 'splicetrie.c'; else $(CYGPATH_W) '$(srcdir)/splicetrie.c'; fi` + +gmapl_avx512-boyer-moore.o: boyer-moore.c +@am__fastdepCC_TRUE@ $(AM_V_CC)$(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) $(gmapl_avx512_CFLAGS) $(CFLAGS) -MT gmapl_avx512-boyer-moore.o -MD -MP -MF $(DEPDIR)/gmapl_avx512-boyer-moore.Tpo -c -o gmapl_avx512-boyer-moore.o `test -f 'boyer-moore.c' || echo '$(srcdir)/'`boyer-moore.c +@am__fastdepCC_TRUE@ $(AM_V_at)$(am__mv) $(DEPDIR)/gmapl_avx512-boyer-moore.Tpo $(DEPDIR)/gmapl_avx512-boyer-moore.Po +@AMDEP_TRUE@@am__fastdepCC_FALSE@ $(AM_V_CC)source='boyer-moore.c' object='gmapl_avx512-boyer-moore.o' libtool=no @AMDEPBACKSLASH@ +@AMDEP_TRUE@@am__fastdepCC_FALSE@ DEPDIR=$(DEPDIR) $(CCDEPMODE) $(depcomp) @AMDEPBACKSLASH@ +@am__fastdepCC_FALSE@ $(AM_V_CC@am__nodep@)$(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) $(gmapl_avx512_CFLAGS) $(CFLAGS) -c -o gmapl_avx512-boyer-moore.o `test -f 'boyer-moore.c' || echo '$(srcdir)/'`boyer-moore.c + +gmapl_avx512-boyer-moore.obj: boyer-moore.c +@am__fastdepCC_TRUE@ $(AM_V_CC)$(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) $(gmapl_avx512_CFLAGS) $(CFLAGS) -MT gmapl_avx512-boyer-moore.obj -MD -MP -MF $(DEPDIR)/gmapl_avx512-boyer-moore.Tpo -c -o gmapl_avx512-boyer-moore.obj `if test -f 'boyer-moore.c'; then $(CYGPATH_W) 'boyer-moore.c'; else $(CYGPATH_W) '$(srcdir)/boyer-moore.c'; fi` +@am__fastdepCC_TRUE@ $(AM_V_at)$(am__mv) $(DEPDIR)/gmapl_avx512-boyer-moore.Tpo $(DEPDIR)/gmapl_avx512-boyer-moore.Po +@AMDEP_TRUE@@am__fastdepCC_FALSE@ $(AM_V_CC)source='boyer-moore.c' object='gmapl_avx512-boyer-moore.obj' libtool=no @AMDEPBACKSLASH@ +@AMDEP_TRUE@@am__fastdepCC_FALSE@ DEPDIR=$(DEPDIR) $(CCDEPMODE) $(depcomp) @AMDEPBACKSLASH@ +@am__fastdepCC_FALSE@ $(AM_V_CC@am__nodep@)$(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) $(gmapl_avx512_CFLAGS) $(CFLAGS) -c -o gmapl_avx512-boyer-moore.obj `if test -f 'boyer-moore.c'; then $(CYGPATH_W) 'boyer-moore.c'; else $(CYGPATH_W) '$(srcdir)/boyer-moore.c'; fi` + +gmapl_avx512-dynprog.o: dynprog.c +@am__fastdepCC_TRUE@ $(AM_V_CC)$(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) $(gmapl_avx512_CFLAGS) $(CFLAGS) -MT gmapl_avx512-dynprog.o -MD -MP -MF $(DEPDIR)/gmapl_avx512-dynprog.Tpo -c -o gmapl_avx512-dynprog.o `test -f 'dynprog.c' || echo '$(srcdir)/'`dynprog.c +@am__fastdepCC_TRUE@ $(AM_V_at)$(am__mv) $(DEPDIR)/gmapl_avx512-dynprog.Tpo $(DEPDIR)/gmapl_avx512-dynprog.Po +@AMDEP_TRUE@@am__fastdepCC_FALSE@ $(AM_V_CC)source='dynprog.c' object='gmapl_avx512-dynprog.o' libtool=no @AMDEPBACKSLASH@ +@AMDEP_TRUE@@am__fastdepCC_FALSE@ DEPDIR=$(DEPDIR) $(CCDEPMODE) $(depcomp) @AMDEPBACKSLASH@ +@am__fastdepCC_FALSE@ $(AM_V_CC@am__nodep@)$(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) $(gmapl_avx512_CFLAGS) $(CFLAGS) -c -o gmapl_avx512-dynprog.o `test -f 'dynprog.c' || echo '$(srcdir)/'`dynprog.c + +gmapl_avx512-dynprog.obj: dynprog.c +@am__fastdepCC_TRUE@ $(AM_V_CC)$(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) $(gmapl_avx512_CFLAGS) $(CFLAGS) -MT gmapl_avx512-dynprog.obj -MD -MP -MF $(DEPDIR)/gmapl_avx512-dynprog.Tpo -c -o gmapl_avx512-dynprog.obj `if test -f 'dynprog.c'; then $(CYGPATH_W) 'dynprog.c'; else $(CYGPATH_W) '$(srcdir)/dynprog.c'; fi` +@am__fastdepCC_TRUE@ $(AM_V_at)$(am__mv) $(DEPDIR)/gmapl_avx512-dynprog.Tpo $(DEPDIR)/gmapl_avx512-dynprog.Po +@AMDEP_TRUE@@am__fastdepCC_FALSE@ $(AM_V_CC)source='dynprog.c' object='gmapl_avx512-dynprog.obj' libtool=no @AMDEPBACKSLASH@ +@AMDEP_TRUE@@am__fastdepCC_FALSE@ DEPDIR=$(DEPDIR) $(CCDEPMODE) $(depcomp) @AMDEPBACKSLASH@ +@am__fastdepCC_FALSE@ $(AM_V_CC@am__nodep@)$(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) $(gmapl_avx512_CFLAGS) $(CFLAGS) -c -o gmapl_avx512-dynprog.obj `if test -f 'dynprog.c'; then $(CYGPATH_W) 'dynprog.c'; else $(CYGPATH_W) '$(srcdir)/dynprog.c'; fi` + +gmapl_avx512-dynprog_simd.o: dynprog_simd.c +@am__fastdepCC_TRUE@ $(AM_V_CC)$(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) $(gmapl_avx512_CFLAGS) $(CFLAGS) -MT gmapl_avx512-dynprog_simd.o -MD -MP -MF $(DEPDIR)/gmapl_avx512-dynprog_simd.Tpo -c -o gmapl_avx512-dynprog_simd.o `test -f 'dynprog_simd.c' || echo '$(srcdir)/'`dynprog_simd.c +@am__fastdepCC_TRUE@ $(AM_V_at)$(am__mv) $(DEPDIR)/gmapl_avx512-dynprog_simd.Tpo $(DEPDIR)/gmapl_avx512-dynprog_simd.Po +@AMDEP_TRUE@@am__fastdepCC_FALSE@ $(AM_V_CC)source='dynprog_simd.c' object='gmapl_avx512-dynprog_simd.o' libtool=no @AMDEPBACKSLASH@ +@AMDEP_TRUE@@am__fastdepCC_FALSE@ DEPDIR=$(DEPDIR) $(CCDEPMODE) $(depcomp) @AMDEPBACKSLASH@ +@am__fastdepCC_FALSE@ $(AM_V_CC@am__nodep@)$(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) $(gmapl_avx512_CFLAGS) $(CFLAGS) -c -o gmapl_avx512-dynprog_simd.o `test -f 'dynprog_simd.c' || echo '$(srcdir)/'`dynprog_simd.c + +gmapl_avx512-dynprog_simd.obj: dynprog_simd.c +@am__fastdepCC_TRUE@ $(AM_V_CC)$(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) $(gmapl_avx512_CFLAGS) $(CFLAGS) -MT gmapl_avx512-dynprog_simd.obj -MD -MP -MF $(DEPDIR)/gmapl_avx512-dynprog_simd.Tpo -c -o gmapl_avx512-dynprog_simd.obj `if test -f 'dynprog_simd.c'; then $(CYGPATH_W) 'dynprog_simd.c'; else $(CYGPATH_W) '$(srcdir)/dynprog_simd.c'; fi` +@am__fastdepCC_TRUE@ $(AM_V_at)$(am__mv) $(DEPDIR)/gmapl_avx512-dynprog_simd.Tpo $(DEPDIR)/gmapl_avx512-dynprog_simd.Po +@AMDEP_TRUE@@am__fastdepCC_FALSE@ $(AM_V_CC)source='dynprog_simd.c' object='gmapl_avx512-dynprog_simd.obj' libtool=no @AMDEPBACKSLASH@ +@AMDEP_TRUE@@am__fastdepCC_FALSE@ DEPDIR=$(DEPDIR) $(CCDEPMODE) $(depcomp) @AMDEPBACKSLASH@ +@am__fastdepCC_FALSE@ $(AM_V_CC@am__nodep@)$(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) $(gmapl_avx512_CFLAGS) $(CFLAGS) -c -o gmapl_avx512-dynprog_simd.obj `if test -f 'dynprog_simd.c'; then $(CYGPATH_W) 'dynprog_simd.c'; else $(CYGPATH_W) '$(srcdir)/dynprog_simd.c'; fi` + +gmapl_avx512-dynprog_single.o: dynprog_single.c +@am__fastdepCC_TRUE@ $(AM_V_CC)$(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) $(gmapl_avx512_CFLAGS) $(CFLAGS) -MT gmapl_avx512-dynprog_single.o -MD -MP -MF $(DEPDIR)/gmapl_avx512-dynprog_single.Tpo -c -o gmapl_avx512-dynprog_single.o `test -f 'dynprog_single.c' || echo '$(srcdir)/'`dynprog_single.c +@am__fastdepCC_TRUE@ $(AM_V_at)$(am__mv) $(DEPDIR)/gmapl_avx512-dynprog_single.Tpo $(DEPDIR)/gmapl_avx512-dynprog_single.Po +@AMDEP_TRUE@@am__fastdepCC_FALSE@ $(AM_V_CC)source='dynprog_single.c' object='gmapl_avx512-dynprog_single.o' libtool=no @AMDEPBACKSLASH@ +@AMDEP_TRUE@@am__fastdepCC_FALSE@ DEPDIR=$(DEPDIR) $(CCDEPMODE) $(depcomp) @AMDEPBACKSLASH@ +@am__fastdepCC_FALSE@ $(AM_V_CC@am__nodep@)$(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) $(gmapl_avx512_CFLAGS) $(CFLAGS) -c -o gmapl_avx512-dynprog_single.o `test -f 'dynprog_single.c' || echo '$(srcdir)/'`dynprog_single.c + +gmapl_avx512-dynprog_single.obj: dynprog_single.c +@am__fastdepCC_TRUE@ $(AM_V_CC)$(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) $(gmapl_avx512_CFLAGS) $(CFLAGS) -MT gmapl_avx512-dynprog_single.obj -MD -MP -MF $(DEPDIR)/gmapl_avx512-dynprog_single.Tpo -c -o gmapl_avx512-dynprog_single.obj `if test -f 'dynprog_single.c'; then $(CYGPATH_W) 'dynprog_single.c'; else $(CYGPATH_W) '$(srcdir)/dynprog_single.c'; fi` +@am__fastdepCC_TRUE@ $(AM_V_at)$(am__mv) $(DEPDIR)/gmapl_avx512-dynprog_single.Tpo $(DEPDIR)/gmapl_avx512-dynprog_single.Po +@AMDEP_TRUE@@am__fastdepCC_FALSE@ $(AM_V_CC)source='dynprog_single.c' object='gmapl_avx512-dynprog_single.obj' libtool=no @AMDEPBACKSLASH@ +@AMDEP_TRUE@@am__fastdepCC_FALSE@ DEPDIR=$(DEPDIR) $(CCDEPMODE) $(depcomp) @AMDEPBACKSLASH@ +@am__fastdepCC_FALSE@ $(AM_V_CC@am__nodep@)$(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) $(gmapl_avx512_CFLAGS) $(CFLAGS) -c -o gmapl_avx512-dynprog_single.obj `if test -f 'dynprog_single.c'; then $(CYGPATH_W) 'dynprog_single.c'; else $(CYGPATH_W) '$(srcdir)/dynprog_single.c'; fi` + +gmapl_avx512-dynprog_genome.o: dynprog_genome.c +@am__fastdepCC_TRUE@ $(AM_V_CC)$(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) $(gmapl_avx512_CFLAGS) $(CFLAGS) -MT gmapl_avx512-dynprog_genome.o -MD -MP -MF $(DEPDIR)/gmapl_avx512-dynprog_genome.Tpo -c -o gmapl_avx512-dynprog_genome.o `test -f 'dynprog_genome.c' || echo '$(srcdir)/'`dynprog_genome.c +@am__fastdepCC_TRUE@ $(AM_V_at)$(am__mv) $(DEPDIR)/gmapl_avx512-dynprog_genome.Tpo $(DEPDIR)/gmapl_avx512-dynprog_genome.Po +@AMDEP_TRUE@@am__fastdepCC_FALSE@ $(AM_V_CC)source='dynprog_genome.c' object='gmapl_avx512-dynprog_genome.o' libtool=no @AMDEPBACKSLASH@ +@AMDEP_TRUE@@am__fastdepCC_FALSE@ DEPDIR=$(DEPDIR) $(CCDEPMODE) $(depcomp) @AMDEPBACKSLASH@ +@am__fastdepCC_FALSE@ $(AM_V_CC@am__nodep@)$(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) $(gmapl_avx512_CFLAGS) $(CFLAGS) -c -o gmapl_avx512-dynprog_genome.o `test -f 'dynprog_genome.c' || echo '$(srcdir)/'`dynprog_genome.c + +gmapl_avx512-dynprog_genome.obj: dynprog_genome.c +@am__fastdepCC_TRUE@ $(AM_V_CC)$(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) $(gmapl_avx512_CFLAGS) $(CFLAGS) -MT gmapl_avx512-dynprog_genome.obj -MD -MP -MF $(DEPDIR)/gmapl_avx512-dynprog_genome.Tpo -c -o gmapl_avx512-dynprog_genome.obj `if test -f 'dynprog_genome.c'; then $(CYGPATH_W) 'dynprog_genome.c'; else $(CYGPATH_W) '$(srcdir)/dynprog_genome.c'; fi` +@am__fastdepCC_TRUE@ $(AM_V_at)$(am__mv) $(DEPDIR)/gmapl_avx512-dynprog_genome.Tpo $(DEPDIR)/gmapl_avx512-dynprog_genome.Po +@AMDEP_TRUE@@am__fastdepCC_FALSE@ $(AM_V_CC)source='dynprog_genome.c' object='gmapl_avx512-dynprog_genome.obj' libtool=no @AMDEPBACKSLASH@ +@AMDEP_TRUE@@am__fastdepCC_FALSE@ DEPDIR=$(DEPDIR) $(CCDEPMODE) $(depcomp) @AMDEPBACKSLASH@ +@am__fastdepCC_FALSE@ $(AM_V_CC@am__nodep@)$(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) $(gmapl_avx512_CFLAGS) $(CFLAGS) -c -o gmapl_avx512-dynprog_genome.obj `if test -f 'dynprog_genome.c'; then $(CYGPATH_W) 'dynprog_genome.c'; else $(CYGPATH_W) '$(srcdir)/dynprog_genome.c'; fi` + +gmapl_avx512-dynprog_cdna.o: dynprog_cdna.c +@am__fastdepCC_TRUE@ $(AM_V_CC)$(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) $(gmapl_avx512_CFLAGS) $(CFLAGS) -MT gmapl_avx512-dynprog_cdna.o -MD -MP -MF $(DEPDIR)/gmapl_avx512-dynprog_cdna.Tpo -c -o gmapl_avx512-dynprog_cdna.o `test -f 'dynprog_cdna.c' || echo '$(srcdir)/'`dynprog_cdna.c +@am__fastdepCC_TRUE@ $(AM_V_at)$(am__mv) $(DEPDIR)/gmapl_avx512-dynprog_cdna.Tpo $(DEPDIR)/gmapl_avx512-dynprog_cdna.Po +@AMDEP_TRUE@@am__fastdepCC_FALSE@ $(AM_V_CC)source='dynprog_cdna.c' object='gmapl_avx512-dynprog_cdna.o' libtool=no @AMDEPBACKSLASH@ +@AMDEP_TRUE@@am__fastdepCC_FALSE@ DEPDIR=$(DEPDIR) $(CCDEPMODE) $(depcomp) @AMDEPBACKSLASH@ +@am__fastdepCC_FALSE@ $(AM_V_CC@am__nodep@)$(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) $(gmapl_avx512_CFLAGS) $(CFLAGS) -c -o gmapl_avx512-dynprog_cdna.o `test -f 'dynprog_cdna.c' || echo '$(srcdir)/'`dynprog_cdna.c + +gmapl_avx512-dynprog_cdna.obj: dynprog_cdna.c +@am__fastdepCC_TRUE@ $(AM_V_CC)$(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) $(gmapl_avx512_CFLAGS) $(CFLAGS) -MT gmapl_avx512-dynprog_cdna.obj -MD -MP -MF $(DEPDIR)/gmapl_avx512-dynprog_cdna.Tpo -c -o gmapl_avx512-dynprog_cdna.obj `if test -f 'dynprog_cdna.c'; then $(CYGPATH_W) 'dynprog_cdna.c'; else $(CYGPATH_W) '$(srcdir)/dynprog_cdna.c'; fi` +@am__fastdepCC_TRUE@ $(AM_V_at)$(am__mv) $(DEPDIR)/gmapl_avx512-dynprog_cdna.Tpo $(DEPDIR)/gmapl_avx512-dynprog_cdna.Po +@AMDEP_TRUE@@am__fastdepCC_FALSE@ $(AM_V_CC)source='dynprog_cdna.c' object='gmapl_avx512-dynprog_cdna.obj' libtool=no @AMDEPBACKSLASH@ +@AMDEP_TRUE@@am__fastdepCC_FALSE@ DEPDIR=$(DEPDIR) $(CCDEPMODE) $(depcomp) @AMDEPBACKSLASH@ +@am__fastdepCC_FALSE@ $(AM_V_CC@am__nodep@)$(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) $(gmapl_avx512_CFLAGS) $(CFLAGS) -c -o gmapl_avx512-dynprog_cdna.obj `if test -f 'dynprog_cdna.c'; then $(CYGPATH_W) 'dynprog_cdna.c'; else $(CYGPATH_W) '$(srcdir)/dynprog_cdna.c'; fi` + +gmapl_avx512-dynprog_end.o: dynprog_end.c +@am__fastdepCC_TRUE@ $(AM_V_CC)$(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) $(gmapl_avx512_CFLAGS) $(CFLAGS) -MT gmapl_avx512-dynprog_end.o -MD -MP -MF $(DEPDIR)/gmapl_avx512-dynprog_end.Tpo -c -o gmapl_avx512-dynprog_end.o `test -f 'dynprog_end.c' || echo '$(srcdir)/'`dynprog_end.c +@am__fastdepCC_TRUE@ $(AM_V_at)$(am__mv) $(DEPDIR)/gmapl_avx512-dynprog_end.Tpo $(DEPDIR)/gmapl_avx512-dynprog_end.Po +@AMDEP_TRUE@@am__fastdepCC_FALSE@ $(AM_V_CC)source='dynprog_end.c' object='gmapl_avx512-dynprog_end.o' libtool=no @AMDEPBACKSLASH@ +@AMDEP_TRUE@@am__fastdepCC_FALSE@ DEPDIR=$(DEPDIR) $(CCDEPMODE) $(depcomp) @AMDEPBACKSLASH@ +@am__fastdepCC_FALSE@ $(AM_V_CC@am__nodep@)$(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) $(gmapl_avx512_CFLAGS) $(CFLAGS) -c -o gmapl_avx512-dynprog_end.o `test -f 'dynprog_end.c' || echo '$(srcdir)/'`dynprog_end.c + +gmapl_avx512-dynprog_end.obj: dynprog_end.c +@am__fastdepCC_TRUE@ $(AM_V_CC)$(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) $(gmapl_avx512_CFLAGS) $(CFLAGS) -MT gmapl_avx512-dynprog_end.obj -MD -MP -MF $(DEPDIR)/gmapl_avx512-dynprog_end.Tpo -c -o gmapl_avx512-dynprog_end.obj `if test -f 'dynprog_end.c'; then $(CYGPATH_W) 'dynprog_end.c'; else $(CYGPATH_W) '$(srcdir)/dynprog_end.c'; fi` +@am__fastdepCC_TRUE@ $(AM_V_at)$(am__mv) $(DEPDIR)/gmapl_avx512-dynprog_end.Tpo $(DEPDIR)/gmapl_avx512-dynprog_end.Po +@AMDEP_TRUE@@am__fastdepCC_FALSE@ $(AM_V_CC)source='dynprog_end.c' object='gmapl_avx512-dynprog_end.obj' libtool=no @AMDEPBACKSLASH@ +@AMDEP_TRUE@@am__fastdepCC_FALSE@ DEPDIR=$(DEPDIR) $(CCDEPMODE) $(depcomp) @AMDEPBACKSLASH@ +@am__fastdepCC_FALSE@ $(AM_V_CC@am__nodep@)$(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) $(gmapl_avx512_CFLAGS) $(CFLAGS) -c -o gmapl_avx512-dynprog_end.obj `if test -f 'dynprog_end.c'; then $(CYGPATH_W) 'dynprog_end.c'; else $(CYGPATH_W) '$(srcdir)/dynprog_end.c'; fi` + +gmapl_avx512-translation.o: translation.c +@am__fastdepCC_TRUE@ $(AM_V_CC)$(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) $(gmapl_avx512_CFLAGS) $(CFLAGS) -MT gmapl_avx512-translation.o -MD -MP -MF $(DEPDIR)/gmapl_avx512-translation.Tpo -c -o gmapl_avx512-translation.o `test -f 'translation.c' || echo '$(srcdir)/'`translation.c +@am__fastdepCC_TRUE@ $(AM_V_at)$(am__mv) $(DEPDIR)/gmapl_avx512-translation.Tpo $(DEPDIR)/gmapl_avx512-translation.Po +@AMDEP_TRUE@@am__fastdepCC_FALSE@ $(AM_V_CC)source='translation.c' object='gmapl_avx512-translation.o' libtool=no @AMDEPBACKSLASH@ +@AMDEP_TRUE@@am__fastdepCC_FALSE@ DEPDIR=$(DEPDIR) $(CCDEPMODE) $(depcomp) @AMDEPBACKSLASH@ +@am__fastdepCC_FALSE@ $(AM_V_CC@am__nodep@)$(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) $(gmapl_avx512_CFLAGS) $(CFLAGS) -c -o gmapl_avx512-translation.o `test -f 'translation.c' || echo '$(srcdir)/'`translation.c + +gmapl_avx512-translation.obj: translation.c +@am__fastdepCC_TRUE@ $(AM_V_CC)$(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) $(gmapl_avx512_CFLAGS) $(CFLAGS) -MT gmapl_avx512-translation.obj -MD -MP -MF $(DEPDIR)/gmapl_avx512-translation.Tpo -c -o gmapl_avx512-translation.obj `if test -f 'translation.c'; then $(CYGPATH_W) 'translation.c'; else $(CYGPATH_W) '$(srcdir)/translation.c'; fi` +@am__fastdepCC_TRUE@ $(AM_V_at)$(am__mv) $(DEPDIR)/gmapl_avx512-translation.Tpo $(DEPDIR)/gmapl_avx512-translation.Po +@AMDEP_TRUE@@am__fastdepCC_FALSE@ $(AM_V_CC)source='translation.c' object='gmapl_avx512-translation.obj' libtool=no @AMDEPBACKSLASH@ +@AMDEP_TRUE@@am__fastdepCC_FALSE@ DEPDIR=$(DEPDIR) $(CCDEPMODE) $(depcomp) @AMDEPBACKSLASH@ +@am__fastdepCC_FALSE@ $(AM_V_CC@am__nodep@)$(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) $(gmapl_avx512_CFLAGS) $(CFLAGS) -c -o gmapl_avx512-translation.obj `if test -f 'translation.c'; then $(CYGPATH_W) 'translation.c'; else $(CYGPATH_W) '$(srcdir)/translation.c'; fi` + +gmapl_avx512-pbinom.o: pbinom.c +@am__fastdepCC_TRUE@ $(AM_V_CC)$(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) $(gmapl_avx512_CFLAGS) $(CFLAGS) -MT gmapl_avx512-pbinom.o -MD -MP -MF $(DEPDIR)/gmapl_avx512-pbinom.Tpo -c -o gmapl_avx512-pbinom.o `test -f 'pbinom.c' || echo '$(srcdir)/'`pbinom.c +@am__fastdepCC_TRUE@ $(AM_V_at)$(am__mv) $(DEPDIR)/gmapl_avx512-pbinom.Tpo $(DEPDIR)/gmapl_avx512-pbinom.Po +@AMDEP_TRUE@@am__fastdepCC_FALSE@ $(AM_V_CC)source='pbinom.c' object='gmapl_avx512-pbinom.o' libtool=no @AMDEPBACKSLASH@ +@AMDEP_TRUE@@am__fastdepCC_FALSE@ DEPDIR=$(DEPDIR) $(CCDEPMODE) $(depcomp) @AMDEPBACKSLASH@ +@am__fastdepCC_FALSE@ $(AM_V_CC@am__nodep@)$(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) $(gmapl_avx512_CFLAGS) $(CFLAGS) -c -o gmapl_avx512-pbinom.o `test -f 'pbinom.c' || echo '$(srcdir)/'`pbinom.c + +gmapl_avx512-pbinom.obj: pbinom.c +@am__fastdepCC_TRUE@ $(AM_V_CC)$(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) $(gmapl_avx512_CFLAGS) $(CFLAGS) -MT gmapl_avx512-pbinom.obj -MD -MP -MF $(DEPDIR)/gmapl_avx512-pbinom.Tpo -c -o gmapl_avx512-pbinom.obj `if test -f 'pbinom.c'; then $(CYGPATH_W) 'pbinom.c'; else $(CYGPATH_W) '$(srcdir)/pbinom.c'; fi` +@am__fastdepCC_TRUE@ $(AM_V_at)$(am__mv) $(DEPDIR)/gmapl_avx512-pbinom.Tpo $(DEPDIR)/gmapl_avx512-pbinom.Po +@AMDEP_TRUE@@am__fastdepCC_FALSE@ $(AM_V_CC)source='pbinom.c' object='gmapl_avx512-pbinom.obj' libtool=no @AMDEPBACKSLASH@ +@AMDEP_TRUE@@am__fastdepCC_FALSE@ DEPDIR=$(DEPDIR) $(CCDEPMODE) $(depcomp) @AMDEPBACKSLASH@ +@am__fastdepCC_FALSE@ $(AM_V_CC@am__nodep@)$(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) $(gmapl_avx512_CFLAGS) $(CFLAGS) -c -o gmapl_avx512-pbinom.obj `if test -f 'pbinom.c'; then $(CYGPATH_W) 'pbinom.c'; else $(CYGPATH_W) '$(srcdir)/pbinom.c'; fi` + +gmapl_avx512-changepoint.o: changepoint.c +@am__fastdepCC_TRUE@ $(AM_V_CC)$(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) $(gmapl_avx512_CFLAGS) $(CFLAGS) -MT gmapl_avx512-changepoint.o -MD -MP -MF $(DEPDIR)/gmapl_avx512-changepoint.Tpo -c -o gmapl_avx512-changepoint.o `test -f 'changepoint.c' || echo '$(srcdir)/'`changepoint.c +@am__fastdepCC_TRUE@ $(AM_V_at)$(am__mv) $(DEPDIR)/gmapl_avx512-changepoint.Tpo $(DEPDIR)/gmapl_avx512-changepoint.Po +@AMDEP_TRUE@@am__fastdepCC_FALSE@ $(AM_V_CC)source='changepoint.c' object='gmapl_avx512-changepoint.o' libtool=no @AMDEPBACKSLASH@ +@AMDEP_TRUE@@am__fastdepCC_FALSE@ DEPDIR=$(DEPDIR) $(CCDEPMODE) $(depcomp) @AMDEPBACKSLASH@ +@am__fastdepCC_FALSE@ $(AM_V_CC@am__nodep@)$(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) $(gmapl_avx512_CFLAGS) $(CFLAGS) -c -o gmapl_avx512-changepoint.o `test -f 'changepoint.c' || echo '$(srcdir)/'`changepoint.c + +gmapl_avx512-changepoint.obj: changepoint.c +@am__fastdepCC_TRUE@ $(AM_V_CC)$(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) $(gmapl_avx512_CFLAGS) $(CFLAGS) -MT gmapl_avx512-changepoint.obj -MD -MP -MF $(DEPDIR)/gmapl_avx512-changepoint.Tpo -c -o gmapl_avx512-changepoint.obj `if test -f 'changepoint.c'; then $(CYGPATH_W) 'changepoint.c'; else $(CYGPATH_W) '$(srcdir)/changepoint.c'; fi` +@am__fastdepCC_TRUE@ $(AM_V_at)$(am__mv) $(DEPDIR)/gmapl_avx512-changepoint.Tpo $(DEPDIR)/gmapl_avx512-changepoint.Po +@AMDEP_TRUE@@am__fastdepCC_FALSE@ $(AM_V_CC)source='changepoint.c' object='gmapl_avx512-changepoint.obj' libtool=no @AMDEPBACKSLASH@ +@AMDEP_TRUE@@am__fastdepCC_FALSE@ DEPDIR=$(DEPDIR) $(CCDEPMODE) $(depcomp) @AMDEPBACKSLASH@ +@am__fastdepCC_FALSE@ $(AM_V_CC@am__nodep@)$(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) $(gmapl_avx512_CFLAGS) $(CFLAGS) -c -o gmapl_avx512-changepoint.obj `if test -f 'changepoint.c'; then $(CYGPATH_W) 'changepoint.c'; else $(CYGPATH_W) '$(srcdir)/changepoint.c'; fi` + +gmapl_avx512-stage3.o: stage3.c +@am__fastdepCC_TRUE@ $(AM_V_CC)$(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) $(gmapl_avx512_CFLAGS) $(CFLAGS) -MT gmapl_avx512-stage3.o -MD -MP -MF $(DEPDIR)/gmapl_avx512-stage3.Tpo -c -o gmapl_avx512-stage3.o `test -f 'stage3.c' || echo '$(srcdir)/'`stage3.c +@am__fastdepCC_TRUE@ $(AM_V_at)$(am__mv) $(DEPDIR)/gmapl_avx512-stage3.Tpo $(DEPDIR)/gmapl_avx512-stage3.Po +@AMDEP_TRUE@@am__fastdepCC_FALSE@ $(AM_V_CC)source='stage3.c' object='gmapl_avx512-stage3.o' libtool=no @AMDEPBACKSLASH@ +@AMDEP_TRUE@@am__fastdepCC_FALSE@ DEPDIR=$(DEPDIR) $(CCDEPMODE) $(depcomp) @AMDEPBACKSLASH@ +@am__fastdepCC_FALSE@ $(AM_V_CC@am__nodep@)$(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) $(gmapl_avx512_CFLAGS) $(CFLAGS) -c -o gmapl_avx512-stage3.o `test -f 'stage3.c' || echo '$(srcdir)/'`stage3.c + +gmapl_avx512-stage3.obj: stage3.c +@am__fastdepCC_TRUE@ $(AM_V_CC)$(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) $(gmapl_avx512_CFLAGS) $(CFLAGS) -MT gmapl_avx512-stage3.obj -MD -MP -MF $(DEPDIR)/gmapl_avx512-stage3.Tpo -c -o gmapl_avx512-stage3.obj `if test -f 'stage3.c'; then $(CYGPATH_W) 'stage3.c'; else $(CYGPATH_W) '$(srcdir)/stage3.c'; fi` +@am__fastdepCC_TRUE@ $(AM_V_at)$(am__mv) $(DEPDIR)/gmapl_avx512-stage3.Tpo $(DEPDIR)/gmapl_avx512-stage3.Po +@AMDEP_TRUE@@am__fastdepCC_FALSE@ $(AM_V_CC)source='stage3.c' object='gmapl_avx512-stage3.obj' libtool=no @AMDEPBACKSLASH@ +@AMDEP_TRUE@@am__fastdepCC_FALSE@ DEPDIR=$(DEPDIR) $(CCDEPMODE) $(depcomp) @AMDEPBACKSLASH@ +@am__fastdepCC_FALSE@ $(AM_V_CC@am__nodep@)$(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) $(gmapl_avx512_CFLAGS) $(CFLAGS) -c -o gmapl_avx512-stage3.obj `if test -f 'stage3.c'; then $(CYGPATH_W) 'stage3.c'; else $(CYGPATH_W) '$(srcdir)/stage3.c'; fi` + +gmapl_avx512-request.o: request.c +@am__fastdepCC_TRUE@ $(AM_V_CC)$(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) $(gmapl_avx512_CFLAGS) $(CFLAGS) -MT gmapl_avx512-request.o -MD -MP -MF $(DEPDIR)/gmapl_avx512-request.Tpo -c -o gmapl_avx512-request.o `test -f 'request.c' || echo '$(srcdir)/'`request.c +@am__fastdepCC_TRUE@ $(AM_V_at)$(am__mv) $(DEPDIR)/gmapl_avx512-request.Tpo $(DEPDIR)/gmapl_avx512-request.Po +@AMDEP_TRUE@@am__fastdepCC_FALSE@ $(AM_V_CC)source='request.c' object='gmapl_avx512-request.o' libtool=no @AMDEPBACKSLASH@ +@AMDEP_TRUE@@am__fastdepCC_FALSE@ DEPDIR=$(DEPDIR) $(CCDEPMODE) $(depcomp) @AMDEPBACKSLASH@ +@am__fastdepCC_FALSE@ $(AM_V_CC@am__nodep@)$(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) $(gmapl_avx512_CFLAGS) $(CFLAGS) -c -o gmapl_avx512-request.o `test -f 'request.c' || echo '$(srcdir)/'`request.c + +gmapl_avx512-request.obj: request.c +@am__fastdepCC_TRUE@ $(AM_V_CC)$(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) $(gmapl_avx512_CFLAGS) $(CFLAGS) -MT gmapl_avx512-request.obj -MD -MP -MF $(DEPDIR)/gmapl_avx512-request.Tpo -c -o gmapl_avx512-request.obj `if test -f 'request.c'; then $(CYGPATH_W) 'request.c'; else $(CYGPATH_W) '$(srcdir)/request.c'; fi` +@am__fastdepCC_TRUE@ $(AM_V_at)$(am__mv) $(DEPDIR)/gmapl_avx512-request.Tpo $(DEPDIR)/gmapl_avx512-request.Po +@AMDEP_TRUE@@am__fastdepCC_FALSE@ $(AM_V_CC)source='request.c' object='gmapl_avx512-request.obj' libtool=no @AMDEPBACKSLASH@ +@AMDEP_TRUE@@am__fastdepCC_FALSE@ DEPDIR=$(DEPDIR) $(CCDEPMODE) $(depcomp) @AMDEPBACKSLASH@ +@am__fastdepCC_FALSE@ $(AM_V_CC@am__nodep@)$(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) $(gmapl_avx512_CFLAGS) $(CFLAGS) -c -o gmapl_avx512-request.obj `if test -f 'request.c'; then $(CYGPATH_W) 'request.c'; else $(CYGPATH_W) '$(srcdir)/request.c'; fi` + +gmapl_avx512-result.o: result.c +@am__fastdepCC_TRUE@ $(AM_V_CC)$(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) $(gmapl_avx512_CFLAGS) $(CFLAGS) -MT gmapl_avx512-result.o -MD -MP -MF $(DEPDIR)/gmapl_avx512-result.Tpo -c -o gmapl_avx512-result.o `test -f 'result.c' || echo '$(srcdir)/'`result.c +@am__fastdepCC_TRUE@ $(AM_V_at)$(am__mv) $(DEPDIR)/gmapl_avx512-result.Tpo $(DEPDIR)/gmapl_avx512-result.Po +@AMDEP_TRUE@@am__fastdepCC_FALSE@ $(AM_V_CC)source='result.c' object='gmapl_avx512-result.o' libtool=no @AMDEPBACKSLASH@ +@AMDEP_TRUE@@am__fastdepCC_FALSE@ DEPDIR=$(DEPDIR) $(CCDEPMODE) $(depcomp) @AMDEPBACKSLASH@ +@am__fastdepCC_FALSE@ $(AM_V_CC@am__nodep@)$(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) $(gmapl_avx512_CFLAGS) $(CFLAGS) -c -o gmapl_avx512-result.o `test -f 'result.c' || echo '$(srcdir)/'`result.c + +gmapl_avx512-result.obj: result.c +@am__fastdepCC_TRUE@ $(AM_V_CC)$(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) $(gmapl_avx512_CFLAGS) $(CFLAGS) -MT gmapl_avx512-result.obj -MD -MP -MF $(DEPDIR)/gmapl_avx512-result.Tpo -c -o gmapl_avx512-result.obj `if test -f 'result.c'; then $(CYGPATH_W) 'result.c'; else $(CYGPATH_W) '$(srcdir)/result.c'; fi` +@am__fastdepCC_TRUE@ $(AM_V_at)$(am__mv) $(DEPDIR)/gmapl_avx512-result.Tpo $(DEPDIR)/gmapl_avx512-result.Po +@AMDEP_TRUE@@am__fastdepCC_FALSE@ $(AM_V_CC)source='result.c' object='gmapl_avx512-result.obj' libtool=no @AMDEPBACKSLASH@ +@AMDEP_TRUE@@am__fastdepCC_FALSE@ DEPDIR=$(DEPDIR) $(CCDEPMODE) $(depcomp) @AMDEPBACKSLASH@ +@am__fastdepCC_FALSE@ $(AM_V_CC@am__nodep@)$(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) $(gmapl_avx512_CFLAGS) $(CFLAGS) -c -o gmapl_avx512-result.obj `if test -f 'result.c'; then $(CYGPATH_W) 'result.c'; else $(CYGPATH_W) '$(srcdir)/result.c'; fi` + +gmapl_avx512-output.o: output.c +@am__fastdepCC_TRUE@ $(AM_V_CC)$(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) $(gmapl_avx512_CFLAGS) $(CFLAGS) -MT gmapl_avx512-output.o -MD -MP -MF $(DEPDIR)/gmapl_avx512-output.Tpo -c -o gmapl_avx512-output.o `test -f 'output.c' || echo '$(srcdir)/'`output.c +@am__fastdepCC_TRUE@ $(AM_V_at)$(am__mv) $(DEPDIR)/gmapl_avx512-output.Tpo $(DEPDIR)/gmapl_avx512-output.Po +@AMDEP_TRUE@@am__fastdepCC_FALSE@ $(AM_V_CC)source='output.c' object='gmapl_avx512-output.o' libtool=no @AMDEPBACKSLASH@ +@AMDEP_TRUE@@am__fastdepCC_FALSE@ DEPDIR=$(DEPDIR) $(CCDEPMODE) $(depcomp) @AMDEPBACKSLASH@ +@am__fastdepCC_FALSE@ $(AM_V_CC@am__nodep@)$(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) $(gmapl_avx512_CFLAGS) $(CFLAGS) -c -o gmapl_avx512-output.o `test -f 'output.c' || echo '$(srcdir)/'`output.c + +gmapl_avx512-output.obj: output.c +@am__fastdepCC_TRUE@ $(AM_V_CC)$(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) $(gmapl_avx512_CFLAGS) $(CFLAGS) -MT gmapl_avx512-output.obj -MD -MP -MF $(DEPDIR)/gmapl_avx512-output.Tpo -c -o gmapl_avx512-output.obj `if test -f 'output.c'; then $(CYGPATH_W) 'output.c'; else $(CYGPATH_W) '$(srcdir)/output.c'; fi` +@am__fastdepCC_TRUE@ $(AM_V_at)$(am__mv) $(DEPDIR)/gmapl_avx512-output.Tpo $(DEPDIR)/gmapl_avx512-output.Po +@AMDEP_TRUE@@am__fastdepCC_FALSE@ $(AM_V_CC)source='output.c' object='gmapl_avx512-output.obj' libtool=no @AMDEPBACKSLASH@ +@AMDEP_TRUE@@am__fastdepCC_FALSE@ DEPDIR=$(DEPDIR) $(CCDEPMODE) $(depcomp) @AMDEPBACKSLASH@ +@am__fastdepCC_FALSE@ $(AM_V_CC@am__nodep@)$(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) $(gmapl_avx512_CFLAGS) $(CFLAGS) -c -o gmapl_avx512-output.obj `if test -f 'output.c'; then $(CYGPATH_W) 'output.c'; else $(CYGPATH_W) '$(srcdir)/output.c'; fi` + +gmapl_avx512-inbuffer.o: inbuffer.c +@am__fastdepCC_TRUE@ $(AM_V_CC)$(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) $(gmapl_avx512_CFLAGS) $(CFLAGS) -MT gmapl_avx512-inbuffer.o -MD -MP -MF $(DEPDIR)/gmapl_avx512-inbuffer.Tpo -c -o gmapl_avx512-inbuffer.o `test -f 'inbuffer.c' || echo '$(srcdir)/'`inbuffer.c +@am__fastdepCC_TRUE@ $(AM_V_at)$(am__mv) $(DEPDIR)/gmapl_avx512-inbuffer.Tpo $(DEPDIR)/gmapl_avx512-inbuffer.Po +@AMDEP_TRUE@@am__fastdepCC_FALSE@ $(AM_V_CC)source='inbuffer.c' object='gmapl_avx512-inbuffer.o' libtool=no @AMDEPBACKSLASH@ +@AMDEP_TRUE@@am__fastdepCC_FALSE@ DEPDIR=$(DEPDIR) $(CCDEPMODE) $(depcomp) @AMDEPBACKSLASH@ +@am__fastdepCC_FALSE@ $(AM_V_CC@am__nodep@)$(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) $(gmapl_avx512_CFLAGS) $(CFLAGS) -c -o gmapl_avx512-inbuffer.o `test -f 'inbuffer.c' || echo '$(srcdir)/'`inbuffer.c + +gmapl_avx512-inbuffer.obj: inbuffer.c +@am__fastdepCC_TRUE@ $(AM_V_CC)$(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) $(gmapl_avx512_CFLAGS) $(CFLAGS) -MT gmapl_avx512-inbuffer.obj -MD -MP -MF $(DEPDIR)/gmapl_avx512-inbuffer.Tpo -c -o gmapl_avx512-inbuffer.obj `if test -f 'inbuffer.c'; then $(CYGPATH_W) 'inbuffer.c'; else $(CYGPATH_W) '$(srcdir)/inbuffer.c'; fi` +@am__fastdepCC_TRUE@ $(AM_V_at)$(am__mv) $(DEPDIR)/gmapl_avx512-inbuffer.Tpo $(DEPDIR)/gmapl_avx512-inbuffer.Po +@AMDEP_TRUE@@am__fastdepCC_FALSE@ $(AM_V_CC)source='inbuffer.c' object='gmapl_avx512-inbuffer.obj' libtool=no @AMDEPBACKSLASH@ +@AMDEP_TRUE@@am__fastdepCC_FALSE@ DEPDIR=$(DEPDIR) $(CCDEPMODE) $(depcomp) @AMDEPBACKSLASH@ +@am__fastdepCC_FALSE@ $(AM_V_CC@am__nodep@)$(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) $(gmapl_avx512_CFLAGS) $(CFLAGS) -c -o gmapl_avx512-inbuffer.obj `if test -f 'inbuffer.c'; then $(CYGPATH_W) 'inbuffer.c'; else $(CYGPATH_W) '$(srcdir)/inbuffer.c'; fi` + +gmapl_avx512-samheader.o: samheader.c +@am__fastdepCC_TRUE@ $(AM_V_CC)$(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) $(gmapl_avx512_CFLAGS) $(CFLAGS) -MT gmapl_avx512-samheader.o -MD -MP -MF $(DEPDIR)/gmapl_avx512-samheader.Tpo -c -o gmapl_avx512-samheader.o `test -f 'samheader.c' || echo '$(srcdir)/'`samheader.c +@am__fastdepCC_TRUE@ $(AM_V_at)$(am__mv) $(DEPDIR)/gmapl_avx512-samheader.Tpo $(DEPDIR)/gmapl_avx512-samheader.Po +@AMDEP_TRUE@@am__fastdepCC_FALSE@ $(AM_V_CC)source='samheader.c' object='gmapl_avx512-samheader.o' libtool=no @AMDEPBACKSLASH@ +@AMDEP_TRUE@@am__fastdepCC_FALSE@ DEPDIR=$(DEPDIR) $(CCDEPMODE) $(depcomp) @AMDEPBACKSLASH@ +@am__fastdepCC_FALSE@ $(AM_V_CC@am__nodep@)$(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) $(gmapl_avx512_CFLAGS) $(CFLAGS) -c -o gmapl_avx512-samheader.o `test -f 'samheader.c' || echo '$(srcdir)/'`samheader.c + +gmapl_avx512-samheader.obj: samheader.c +@am__fastdepCC_TRUE@ $(AM_V_CC)$(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) $(gmapl_avx512_CFLAGS) $(CFLAGS) -MT gmapl_avx512-samheader.obj -MD -MP -MF $(DEPDIR)/gmapl_avx512-samheader.Tpo -c -o gmapl_avx512-samheader.obj `if test -f 'samheader.c'; then $(CYGPATH_W) 'samheader.c'; else $(CYGPATH_W) '$(srcdir)/samheader.c'; fi` +@am__fastdepCC_TRUE@ $(AM_V_at)$(am__mv) $(DEPDIR)/gmapl_avx512-samheader.Tpo $(DEPDIR)/gmapl_avx512-samheader.Po +@AMDEP_TRUE@@am__fastdepCC_FALSE@ $(AM_V_CC)source='samheader.c' object='gmapl_avx512-samheader.obj' libtool=no @AMDEPBACKSLASH@ +@AMDEP_TRUE@@am__fastdepCC_FALSE@ DEPDIR=$(DEPDIR) $(CCDEPMODE) $(depcomp) @AMDEPBACKSLASH@ +@am__fastdepCC_FALSE@ $(AM_V_CC@am__nodep@)$(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) $(gmapl_avx512_CFLAGS) $(CFLAGS) -c -o gmapl_avx512-samheader.obj `if test -f 'samheader.c'; then $(CYGPATH_W) 'samheader.c'; else $(CYGPATH_W) '$(srcdir)/samheader.c'; fi` + +gmapl_avx512-outbuffer.o: outbuffer.c +@am__fastdepCC_TRUE@ $(AM_V_CC)$(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) $(gmapl_avx512_CFLAGS) $(CFLAGS) -MT gmapl_avx512-outbuffer.o -MD -MP -MF $(DEPDIR)/gmapl_avx512-outbuffer.Tpo -c -o gmapl_avx512-outbuffer.o `test -f 'outbuffer.c' || echo '$(srcdir)/'`outbuffer.c +@am__fastdepCC_TRUE@ $(AM_V_at)$(am__mv) $(DEPDIR)/gmapl_avx512-outbuffer.Tpo $(DEPDIR)/gmapl_avx512-outbuffer.Po +@AMDEP_TRUE@@am__fastdepCC_FALSE@ $(AM_V_CC)source='outbuffer.c' object='gmapl_avx512-outbuffer.o' libtool=no @AMDEPBACKSLASH@ +@AMDEP_TRUE@@am__fastdepCC_FALSE@ DEPDIR=$(DEPDIR) $(CCDEPMODE) $(depcomp) @AMDEPBACKSLASH@ +@am__fastdepCC_FALSE@ $(AM_V_CC@am__nodep@)$(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) $(gmapl_avx512_CFLAGS) $(CFLAGS) -c -o gmapl_avx512-outbuffer.o `test -f 'outbuffer.c' || echo '$(srcdir)/'`outbuffer.c + +gmapl_avx512-outbuffer.obj: outbuffer.c +@am__fastdepCC_TRUE@ $(AM_V_CC)$(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) $(gmapl_avx512_CFLAGS) $(CFLAGS) -MT gmapl_avx512-outbuffer.obj -MD -MP -MF $(DEPDIR)/gmapl_avx512-outbuffer.Tpo -c -o gmapl_avx512-outbuffer.obj `if test -f 'outbuffer.c'; then $(CYGPATH_W) 'outbuffer.c'; else $(CYGPATH_W) '$(srcdir)/outbuffer.c'; fi` +@am__fastdepCC_TRUE@ $(AM_V_at)$(am__mv) $(DEPDIR)/gmapl_avx512-outbuffer.Tpo $(DEPDIR)/gmapl_avx512-outbuffer.Po +@AMDEP_TRUE@@am__fastdepCC_FALSE@ $(AM_V_CC)source='outbuffer.c' object='gmapl_avx512-outbuffer.obj' libtool=no @AMDEPBACKSLASH@ +@AMDEP_TRUE@@am__fastdepCC_FALSE@ DEPDIR=$(DEPDIR) $(CCDEPMODE) $(depcomp) @AMDEPBACKSLASH@ +@am__fastdepCC_FALSE@ $(AM_V_CC@am__nodep@)$(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) $(gmapl_avx512_CFLAGS) $(CFLAGS) -c -o gmapl_avx512-outbuffer.obj `if test -f 'outbuffer.c'; then $(CYGPATH_W) 'outbuffer.c'; else $(CYGPATH_W) '$(srcdir)/outbuffer.c'; fi` + +gmapl_avx512-chimera.o: chimera.c +@am__fastdepCC_TRUE@ $(AM_V_CC)$(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) $(gmapl_avx512_CFLAGS) $(CFLAGS) -MT gmapl_avx512-chimera.o -MD -MP -MF $(DEPDIR)/gmapl_avx512-chimera.Tpo -c -o gmapl_avx512-chimera.o `test -f 'chimera.c' || echo '$(srcdir)/'`chimera.c +@am__fastdepCC_TRUE@ $(AM_V_at)$(am__mv) $(DEPDIR)/gmapl_avx512-chimera.Tpo $(DEPDIR)/gmapl_avx512-chimera.Po +@AMDEP_TRUE@@am__fastdepCC_FALSE@ $(AM_V_CC)source='chimera.c' object='gmapl_avx512-chimera.o' libtool=no @AMDEPBACKSLASH@ +@AMDEP_TRUE@@am__fastdepCC_FALSE@ DEPDIR=$(DEPDIR) $(CCDEPMODE) $(depcomp) @AMDEPBACKSLASH@ +@am__fastdepCC_FALSE@ $(AM_V_CC@am__nodep@)$(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) $(gmapl_avx512_CFLAGS) $(CFLAGS) -c -o gmapl_avx512-chimera.o `test -f 'chimera.c' || echo '$(srcdir)/'`chimera.c + +gmapl_avx512-chimera.obj: chimera.c +@am__fastdepCC_TRUE@ $(AM_V_CC)$(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) $(gmapl_avx512_CFLAGS) $(CFLAGS) -MT gmapl_avx512-chimera.obj -MD -MP -MF $(DEPDIR)/gmapl_avx512-chimera.Tpo -c -o gmapl_avx512-chimera.obj `if test -f 'chimera.c'; then $(CYGPATH_W) 'chimera.c'; else $(CYGPATH_W) '$(srcdir)/chimera.c'; fi` +@am__fastdepCC_TRUE@ $(AM_V_at)$(am__mv) $(DEPDIR)/gmapl_avx512-chimera.Tpo $(DEPDIR)/gmapl_avx512-chimera.Po +@AMDEP_TRUE@@am__fastdepCC_FALSE@ $(AM_V_CC)source='chimera.c' object='gmapl_avx512-chimera.obj' libtool=no @AMDEPBACKSLASH@ +@AMDEP_TRUE@@am__fastdepCC_FALSE@ DEPDIR=$(DEPDIR) $(CCDEPMODE) $(depcomp) @AMDEPBACKSLASH@ +@am__fastdepCC_FALSE@ $(AM_V_CC@am__nodep@)$(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) $(gmapl_avx512_CFLAGS) $(CFLAGS) -c -o gmapl_avx512-chimera.obj `if test -f 'chimera.c'; then $(CYGPATH_W) 'chimera.c'; else $(CYGPATH_W) '$(srcdir)/chimera.c'; fi` + +gmapl_avx512-datadir.o: datadir.c +@am__fastdepCC_TRUE@ $(AM_V_CC)$(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) $(gmapl_avx512_CFLAGS) $(CFLAGS) -MT gmapl_avx512-datadir.o -MD -MP -MF $(DEPDIR)/gmapl_avx512-datadir.Tpo -c -o gmapl_avx512-datadir.o `test -f 'datadir.c' || echo '$(srcdir)/'`datadir.c +@am__fastdepCC_TRUE@ $(AM_V_at)$(am__mv) $(DEPDIR)/gmapl_avx512-datadir.Tpo $(DEPDIR)/gmapl_avx512-datadir.Po +@AMDEP_TRUE@@am__fastdepCC_FALSE@ $(AM_V_CC)source='datadir.c' object='gmapl_avx512-datadir.o' libtool=no @AMDEPBACKSLASH@ +@AMDEP_TRUE@@am__fastdepCC_FALSE@ DEPDIR=$(DEPDIR) $(CCDEPMODE) $(depcomp) @AMDEPBACKSLASH@ +@am__fastdepCC_FALSE@ $(AM_V_CC@am__nodep@)$(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) $(gmapl_avx512_CFLAGS) $(CFLAGS) -c -o gmapl_avx512-datadir.o `test -f 'datadir.c' || echo '$(srcdir)/'`datadir.c + +gmapl_avx512-datadir.obj: datadir.c +@am__fastdepCC_TRUE@ $(AM_V_CC)$(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) $(gmapl_avx512_CFLAGS) $(CFLAGS) -MT gmapl_avx512-datadir.obj -MD -MP -MF $(DEPDIR)/gmapl_avx512-datadir.Tpo -c -o gmapl_avx512-datadir.obj `if test -f 'datadir.c'; then $(CYGPATH_W) 'datadir.c'; else $(CYGPATH_W) '$(srcdir)/datadir.c'; fi` +@am__fastdepCC_TRUE@ $(AM_V_at)$(am__mv) $(DEPDIR)/gmapl_avx512-datadir.Tpo $(DEPDIR)/gmapl_avx512-datadir.Po +@AMDEP_TRUE@@am__fastdepCC_FALSE@ $(AM_V_CC)source='datadir.c' object='gmapl_avx512-datadir.obj' libtool=no @AMDEPBACKSLASH@ +@AMDEP_TRUE@@am__fastdepCC_FALSE@ DEPDIR=$(DEPDIR) $(CCDEPMODE) $(depcomp) @AMDEPBACKSLASH@ +@am__fastdepCC_FALSE@ $(AM_V_CC@am__nodep@)$(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) $(gmapl_avx512_CFLAGS) $(CFLAGS) -c -o gmapl_avx512-datadir.obj `if test -f 'datadir.c'; then $(CYGPATH_W) 'datadir.c'; else $(CYGPATH_W) '$(srcdir)/datadir.c'; fi` + +gmapl_avx512-parserange.o: parserange.c +@am__fastdepCC_TRUE@ $(AM_V_CC)$(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) $(gmapl_avx512_CFLAGS) $(CFLAGS) -MT gmapl_avx512-parserange.o -MD -MP -MF $(DEPDIR)/gmapl_avx512-parserange.Tpo -c -o gmapl_avx512-parserange.o `test -f 'parserange.c' || echo '$(srcdir)/'`parserange.c +@am__fastdepCC_TRUE@ $(AM_V_at)$(am__mv) $(DEPDIR)/gmapl_avx512-parserange.Tpo $(DEPDIR)/gmapl_avx512-parserange.Po +@AMDEP_TRUE@@am__fastdepCC_FALSE@ $(AM_V_CC)source='parserange.c' object='gmapl_avx512-parserange.o' libtool=no @AMDEPBACKSLASH@ +@AMDEP_TRUE@@am__fastdepCC_FALSE@ DEPDIR=$(DEPDIR) $(CCDEPMODE) $(depcomp) @AMDEPBACKSLASH@ +@am__fastdepCC_FALSE@ $(AM_V_CC@am__nodep@)$(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) $(gmapl_avx512_CFLAGS) $(CFLAGS) -c -o gmapl_avx512-parserange.o `test -f 'parserange.c' || echo '$(srcdir)/'`parserange.c + +gmapl_avx512-parserange.obj: parserange.c +@am__fastdepCC_TRUE@ $(AM_V_CC)$(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) $(gmapl_avx512_CFLAGS) $(CFLAGS) -MT gmapl_avx512-parserange.obj -MD -MP -MF $(DEPDIR)/gmapl_avx512-parserange.Tpo -c -o gmapl_avx512-parserange.obj `if test -f 'parserange.c'; then $(CYGPATH_W) 'parserange.c'; else $(CYGPATH_W) '$(srcdir)/parserange.c'; fi` +@am__fastdepCC_TRUE@ $(AM_V_at)$(am__mv) $(DEPDIR)/gmapl_avx512-parserange.Tpo $(DEPDIR)/gmapl_avx512-parserange.Po +@AMDEP_TRUE@@am__fastdepCC_FALSE@ $(AM_V_CC)source='parserange.c' object='gmapl_avx512-parserange.obj' libtool=no @AMDEPBACKSLASH@ +@AMDEP_TRUE@@am__fastdepCC_FALSE@ DEPDIR=$(DEPDIR) $(CCDEPMODE) $(depcomp) @AMDEPBACKSLASH@ +@am__fastdepCC_FALSE@ $(AM_V_CC@am__nodep@)$(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) $(gmapl_avx512_CFLAGS) $(CFLAGS) -c -o gmapl_avx512-parserange.obj `if test -f 'parserange.c'; then $(CYGPATH_W) 'parserange.c'; else $(CYGPATH_W) '$(srcdir)/parserange.c'; fi` + +gmapl_avx512-getopt.o: getopt.c +@am__fastdepCC_TRUE@ $(AM_V_CC)$(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) $(gmapl_avx512_CFLAGS) $(CFLAGS) -MT gmapl_avx512-getopt.o -MD -MP -MF $(DEPDIR)/gmapl_avx512-getopt.Tpo -c -o gmapl_avx512-getopt.o `test -f 'getopt.c' || echo '$(srcdir)/'`getopt.c +@am__fastdepCC_TRUE@ $(AM_V_at)$(am__mv) $(DEPDIR)/gmapl_avx512-getopt.Tpo $(DEPDIR)/gmapl_avx512-getopt.Po +@AMDEP_TRUE@@am__fastdepCC_FALSE@ $(AM_V_CC)source='getopt.c' object='gmapl_avx512-getopt.o' libtool=no @AMDEPBACKSLASH@ +@AMDEP_TRUE@@am__fastdepCC_FALSE@ DEPDIR=$(DEPDIR) $(CCDEPMODE) $(depcomp) @AMDEPBACKSLASH@ +@am__fastdepCC_FALSE@ $(AM_V_CC@am__nodep@)$(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) $(gmapl_avx512_CFLAGS) $(CFLAGS) -c -o gmapl_avx512-getopt.o `test -f 'getopt.c' || echo '$(srcdir)/'`getopt.c + +gmapl_avx512-getopt.obj: getopt.c +@am__fastdepCC_TRUE@ $(AM_V_CC)$(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) $(gmapl_avx512_CFLAGS) $(CFLAGS) -MT gmapl_avx512-getopt.obj -MD -MP -MF $(DEPDIR)/gmapl_avx512-getopt.Tpo -c -o gmapl_avx512-getopt.obj `if test -f 'getopt.c'; then $(CYGPATH_W) 'getopt.c'; else $(CYGPATH_W) '$(srcdir)/getopt.c'; fi` +@am__fastdepCC_TRUE@ $(AM_V_at)$(am__mv) $(DEPDIR)/gmapl_avx512-getopt.Tpo $(DEPDIR)/gmapl_avx512-getopt.Po +@AMDEP_TRUE@@am__fastdepCC_FALSE@ $(AM_V_CC)source='getopt.c' object='gmapl_avx512-getopt.obj' libtool=no @AMDEPBACKSLASH@ +@AMDEP_TRUE@@am__fastdepCC_FALSE@ DEPDIR=$(DEPDIR) $(CCDEPMODE) $(depcomp) @AMDEPBACKSLASH@ +@am__fastdepCC_FALSE@ $(AM_V_CC@am__nodep@)$(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) $(gmapl_avx512_CFLAGS) $(CFLAGS) -c -o gmapl_avx512-getopt.obj `if test -f 'getopt.c'; then $(CYGPATH_W) 'getopt.c'; else $(CYGPATH_W) '$(srcdir)/getopt.c'; fi` + +gmapl_avx512-getopt1.o: getopt1.c +@am__fastdepCC_TRUE@ $(AM_V_CC)$(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) $(gmapl_avx512_CFLAGS) $(CFLAGS) -MT gmapl_avx512-getopt1.o -MD -MP -MF $(DEPDIR)/gmapl_avx512-getopt1.Tpo -c -o gmapl_avx512-getopt1.o `test -f 'getopt1.c' || echo '$(srcdir)/'`getopt1.c +@am__fastdepCC_TRUE@ $(AM_V_at)$(am__mv) $(DEPDIR)/gmapl_avx512-getopt1.Tpo $(DEPDIR)/gmapl_avx512-getopt1.Po +@AMDEP_TRUE@@am__fastdepCC_FALSE@ $(AM_V_CC)source='getopt1.c' object='gmapl_avx512-getopt1.o' libtool=no @AMDEPBACKSLASH@ +@AMDEP_TRUE@@am__fastdepCC_FALSE@ DEPDIR=$(DEPDIR) $(CCDEPMODE) $(depcomp) @AMDEPBACKSLASH@ +@am__fastdepCC_FALSE@ $(AM_V_CC@am__nodep@)$(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) $(gmapl_avx512_CFLAGS) $(CFLAGS) -c -o gmapl_avx512-getopt1.o `test -f 'getopt1.c' || echo '$(srcdir)/'`getopt1.c + +gmapl_avx512-getopt1.obj: getopt1.c +@am__fastdepCC_TRUE@ $(AM_V_CC)$(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) $(gmapl_avx512_CFLAGS) $(CFLAGS) -MT gmapl_avx512-getopt1.obj -MD -MP -MF $(DEPDIR)/gmapl_avx512-getopt1.Tpo -c -o gmapl_avx512-getopt1.obj `if test -f 'getopt1.c'; then $(CYGPATH_W) 'getopt1.c'; else $(CYGPATH_W) '$(srcdir)/getopt1.c'; fi` +@am__fastdepCC_TRUE@ $(AM_V_at)$(am__mv) $(DEPDIR)/gmapl_avx512-getopt1.Tpo $(DEPDIR)/gmapl_avx512-getopt1.Po +@AMDEP_TRUE@@am__fastdepCC_FALSE@ $(AM_V_CC)source='getopt1.c' object='gmapl_avx512-getopt1.obj' libtool=no @AMDEPBACKSLASH@ +@AMDEP_TRUE@@am__fastdepCC_FALSE@ DEPDIR=$(DEPDIR) $(CCDEPMODE) $(depcomp) @AMDEPBACKSLASH@ +@am__fastdepCC_FALSE@ $(AM_V_CC@am__nodep@)$(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) $(gmapl_avx512_CFLAGS) $(CFLAGS) -c -o gmapl_avx512-getopt1.obj `if test -f 'getopt1.c'; then $(CYGPATH_W) 'getopt1.c'; else $(CYGPATH_W) '$(srcdir)/getopt1.c'; fi` + +gmapl_avx512-gmap.o: gmap.c +@am__fastdepCC_TRUE@ $(AM_V_CC)$(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) $(gmapl_avx512_CFLAGS) $(CFLAGS) -MT gmapl_avx512-gmap.o -MD -MP -MF $(DEPDIR)/gmapl_avx512-gmap.Tpo -c -o gmapl_avx512-gmap.o `test -f 'gmap.c' || echo '$(srcdir)/'`gmap.c +@am__fastdepCC_TRUE@ $(AM_V_at)$(am__mv) $(DEPDIR)/gmapl_avx512-gmap.Tpo $(DEPDIR)/gmapl_avx512-gmap.Po +@AMDEP_TRUE@@am__fastdepCC_FALSE@ $(AM_V_CC)source='gmap.c' object='gmapl_avx512-gmap.o' libtool=no @AMDEPBACKSLASH@ +@AMDEP_TRUE@@am__fastdepCC_FALSE@ DEPDIR=$(DEPDIR) $(CCDEPMODE) $(depcomp) @AMDEPBACKSLASH@ +@am__fastdepCC_FALSE@ $(AM_V_CC@am__nodep@)$(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) $(gmapl_avx512_CFLAGS) $(CFLAGS) -c -o gmapl_avx512-gmap.o `test -f 'gmap.c' || echo '$(srcdir)/'`gmap.c + +gmapl_avx512-gmap.obj: gmap.c +@am__fastdepCC_TRUE@ $(AM_V_CC)$(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) $(gmapl_avx512_CFLAGS) $(CFLAGS) -MT gmapl_avx512-gmap.obj -MD -MP -MF $(DEPDIR)/gmapl_avx512-gmap.Tpo -c -o gmapl_avx512-gmap.obj `if test -f 'gmap.c'; then $(CYGPATH_W) 'gmap.c'; else $(CYGPATH_W) '$(srcdir)/gmap.c'; fi` +@am__fastdepCC_TRUE@ $(AM_V_at)$(am__mv) $(DEPDIR)/gmapl_avx512-gmap.Tpo $(DEPDIR)/gmapl_avx512-gmap.Po +@AMDEP_TRUE@@am__fastdepCC_FALSE@ $(AM_V_CC)source='gmap.c' object='gmapl_avx512-gmap.obj' libtool=no @AMDEPBACKSLASH@ +@AMDEP_TRUE@@am__fastdepCC_FALSE@ DEPDIR=$(DEPDIR) $(CCDEPMODE) $(depcomp) @AMDEPBACKSLASH@ +@am__fastdepCC_FALSE@ $(AM_V_CC@am__nodep@)$(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) $(gmapl_avx512_CFLAGS) $(CFLAGS) -c -o gmapl_avx512-gmap.obj `if test -f 'gmap.c'; then $(CYGPATH_W) 'gmap.c'; else $(CYGPATH_W) '$(srcdir)/gmap.c'; fi` + +gmapl_nosimd-except.o: except.c +@am__fastdepCC_TRUE@ $(AM_V_CC)$(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) $(gmapl_nosimd_CFLAGS) $(CFLAGS) -MT gmapl_nosimd-except.o -MD -MP -MF $(DEPDIR)/gmapl_nosimd-except.Tpo -c -o gmapl_nosimd-except.o `test -f 'except.c' || echo '$(srcdir)/'`except.c +@am__fastdepCC_TRUE@ $(AM_V_at)$(am__mv) $(DEPDIR)/gmapl_nosimd-except.Tpo $(DEPDIR)/gmapl_nosimd-except.Po +@AMDEP_TRUE@@am__fastdepCC_FALSE@ $(AM_V_CC)source='except.c' object='gmapl_nosimd-except.o' libtool=no @AMDEPBACKSLASH@ +@AMDEP_TRUE@@am__fastdepCC_FALSE@ DEPDIR=$(DEPDIR) $(CCDEPMODE) $(depcomp) @AMDEPBACKSLASH@ +@am__fastdepCC_FALSE@ $(AM_V_CC@am__nodep@)$(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) $(gmapl_nosimd_CFLAGS) $(CFLAGS) -c -o gmapl_nosimd-except.o `test -f 'except.c' || echo '$(srcdir)/'`except.c + +gmapl_nosimd-except.obj: except.c +@am__fastdepCC_TRUE@ $(AM_V_CC)$(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) $(gmapl_nosimd_CFLAGS) $(CFLAGS) -MT gmapl_nosimd-except.obj -MD -MP -MF $(DEPDIR)/gmapl_nosimd-except.Tpo -c -o gmapl_nosimd-except.obj `if test -f 'except.c'; then $(CYGPATH_W) 'except.c'; else $(CYGPATH_W) '$(srcdir)/except.c'; fi` +@am__fastdepCC_TRUE@ $(AM_V_at)$(am__mv) $(DEPDIR)/gmapl_nosimd-except.Tpo $(DEPDIR)/gmapl_nosimd-except.Po +@AMDEP_TRUE@@am__fastdepCC_FALSE@ $(AM_V_CC)source='except.c' object='gmapl_nosimd-except.obj' libtool=no @AMDEPBACKSLASH@ +@AMDEP_TRUE@@am__fastdepCC_FALSE@ DEPDIR=$(DEPDIR) $(CCDEPMODE) $(depcomp) @AMDEPBACKSLASH@ +@am__fastdepCC_FALSE@ $(AM_V_CC@am__nodep@)$(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) $(gmapl_nosimd_CFLAGS) $(CFLAGS) -c -o gmapl_nosimd-except.obj `if test -f 'except.c'; then $(CYGPATH_W) 'except.c'; else $(CYGPATH_W) '$(srcdir)/except.c'; fi` + +gmapl_nosimd-assert.o: assert.c +@am__fastdepCC_TRUE@ $(AM_V_CC)$(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) $(gmapl_nosimd_CFLAGS) $(CFLAGS) -MT gmapl_nosimd-assert.o -MD -MP -MF $(DEPDIR)/gmapl_nosimd-assert.Tpo -c -o gmapl_nosimd-assert.o `test -f 'assert.c' || echo '$(srcdir)/'`assert.c +@am__fastdepCC_TRUE@ $(AM_V_at)$(am__mv) $(DEPDIR)/gmapl_nosimd-assert.Tpo $(DEPDIR)/gmapl_nosimd-assert.Po +@AMDEP_TRUE@@am__fastdepCC_FALSE@ $(AM_V_CC)source='assert.c' object='gmapl_nosimd-assert.o' libtool=no @AMDEPBACKSLASH@ +@AMDEP_TRUE@@am__fastdepCC_FALSE@ DEPDIR=$(DEPDIR) $(CCDEPMODE) $(depcomp) @AMDEPBACKSLASH@ +@am__fastdepCC_FALSE@ $(AM_V_CC@am__nodep@)$(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) $(gmapl_nosimd_CFLAGS) $(CFLAGS) -c -o gmapl_nosimd-assert.o `test -f 'assert.c' || echo '$(srcdir)/'`assert.c + +gmapl_nosimd-assert.obj: assert.c +@am__fastdepCC_TRUE@ $(AM_V_CC)$(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) $(gmapl_nosimd_CFLAGS) $(CFLAGS) -MT gmapl_nosimd-assert.obj -MD -MP -MF $(DEPDIR)/gmapl_nosimd-assert.Tpo -c -o gmapl_nosimd-assert.obj `if test -f 'assert.c'; then $(CYGPATH_W) 'assert.c'; else $(CYGPATH_W) '$(srcdir)/assert.c'; fi` +@am__fastdepCC_TRUE@ $(AM_V_at)$(am__mv) $(DEPDIR)/gmapl_nosimd-assert.Tpo $(DEPDIR)/gmapl_nosimd-assert.Po +@AMDEP_TRUE@@am__fastdepCC_FALSE@ $(AM_V_CC)source='assert.c' object='gmapl_nosimd-assert.obj' libtool=no @AMDEPBACKSLASH@ +@AMDEP_TRUE@@am__fastdepCC_FALSE@ DEPDIR=$(DEPDIR) $(CCDEPMODE) $(depcomp) @AMDEPBACKSLASH@ +@am__fastdepCC_FALSE@ $(AM_V_CC@am__nodep@)$(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) $(gmapl_nosimd_CFLAGS) $(CFLAGS) -c -o gmapl_nosimd-assert.obj `if test -f 'assert.c'; then $(CYGPATH_W) 'assert.c'; else $(CYGPATH_W) '$(srcdir)/assert.c'; fi` + +gmapl_nosimd-mem.o: mem.c +@am__fastdepCC_TRUE@ $(AM_V_CC)$(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) $(gmapl_nosimd_CFLAGS) $(CFLAGS) -MT gmapl_nosimd-mem.o -MD -MP -MF $(DEPDIR)/gmapl_nosimd-mem.Tpo -c -o gmapl_nosimd-mem.o `test -f 'mem.c' || echo '$(srcdir)/'`mem.c +@am__fastdepCC_TRUE@ $(AM_V_at)$(am__mv) $(DEPDIR)/gmapl_nosimd-mem.Tpo $(DEPDIR)/gmapl_nosimd-mem.Po +@AMDEP_TRUE@@am__fastdepCC_FALSE@ $(AM_V_CC)source='mem.c' object='gmapl_nosimd-mem.o' libtool=no @AMDEPBACKSLASH@ +@AMDEP_TRUE@@am__fastdepCC_FALSE@ DEPDIR=$(DEPDIR) $(CCDEPMODE) $(depcomp) @AMDEPBACKSLASH@ +@am__fastdepCC_FALSE@ $(AM_V_CC@am__nodep@)$(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) $(gmapl_nosimd_CFLAGS) $(CFLAGS) -c -o gmapl_nosimd-mem.o `test -f 'mem.c' || echo '$(srcdir)/'`mem.c + +gmapl_nosimd-mem.obj: mem.c +@am__fastdepCC_TRUE@ $(AM_V_CC)$(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) $(gmapl_nosimd_CFLAGS) $(CFLAGS) -MT gmapl_nosimd-mem.obj -MD -MP -MF $(DEPDIR)/gmapl_nosimd-mem.Tpo -c -o gmapl_nosimd-mem.obj `if test -f 'mem.c'; then $(CYGPATH_W) 'mem.c'; else $(CYGPATH_W) '$(srcdir)/mem.c'; fi` +@am__fastdepCC_TRUE@ $(AM_V_at)$(am__mv) $(DEPDIR)/gmapl_nosimd-mem.Tpo $(DEPDIR)/gmapl_nosimd-mem.Po +@AMDEP_TRUE@@am__fastdepCC_FALSE@ $(AM_V_CC)source='mem.c' object='gmapl_nosimd-mem.obj' libtool=no @AMDEPBACKSLASH@ +@AMDEP_TRUE@@am__fastdepCC_FALSE@ DEPDIR=$(DEPDIR) $(CCDEPMODE) $(depcomp) @AMDEPBACKSLASH@ +@am__fastdepCC_FALSE@ $(AM_V_CC@am__nodep@)$(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) $(gmapl_nosimd_CFLAGS) $(CFLAGS) -c -o gmapl_nosimd-mem.obj `if test -f 'mem.c'; then $(CYGPATH_W) 'mem.c'; else $(CYGPATH_W) '$(srcdir)/mem.c'; fi` + +gmapl_nosimd-intlist.o: intlist.c +@am__fastdepCC_TRUE@ $(AM_V_CC)$(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) $(gmapl_nosimd_CFLAGS) $(CFLAGS) -MT gmapl_nosimd-intlist.o -MD -MP -MF $(DEPDIR)/gmapl_nosimd-intlist.Tpo -c -o gmapl_nosimd-intlist.o `test -f 'intlist.c' || echo '$(srcdir)/'`intlist.c +@am__fastdepCC_TRUE@ $(AM_V_at)$(am__mv) $(DEPDIR)/gmapl_nosimd-intlist.Tpo $(DEPDIR)/gmapl_nosimd-intlist.Po +@AMDEP_TRUE@@am__fastdepCC_FALSE@ $(AM_V_CC)source='intlist.c' object='gmapl_nosimd-intlist.o' libtool=no @AMDEPBACKSLASH@ +@AMDEP_TRUE@@am__fastdepCC_FALSE@ DEPDIR=$(DEPDIR) $(CCDEPMODE) $(depcomp) @AMDEPBACKSLASH@ +@am__fastdepCC_FALSE@ $(AM_V_CC@am__nodep@)$(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) $(gmapl_nosimd_CFLAGS) $(CFLAGS) -c -o gmapl_nosimd-intlist.o `test -f 'intlist.c' || echo '$(srcdir)/'`intlist.c + +gmapl_nosimd-intlist.obj: intlist.c +@am__fastdepCC_TRUE@ $(AM_V_CC)$(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) $(gmapl_nosimd_CFLAGS) $(CFLAGS) -MT gmapl_nosimd-intlist.obj -MD -MP -MF $(DEPDIR)/gmapl_nosimd-intlist.Tpo -c -o gmapl_nosimd-intlist.obj `if test -f 'intlist.c'; then $(CYGPATH_W) 'intlist.c'; else $(CYGPATH_W) '$(srcdir)/intlist.c'; fi` +@am__fastdepCC_TRUE@ $(AM_V_at)$(am__mv) $(DEPDIR)/gmapl_nosimd-intlist.Tpo $(DEPDIR)/gmapl_nosimd-intlist.Po +@AMDEP_TRUE@@am__fastdepCC_FALSE@ $(AM_V_CC)source='intlist.c' object='gmapl_nosimd-intlist.obj' libtool=no @AMDEPBACKSLASH@ +@AMDEP_TRUE@@am__fastdepCC_FALSE@ DEPDIR=$(DEPDIR) $(CCDEPMODE) $(depcomp) @AMDEPBACKSLASH@ +@am__fastdepCC_FALSE@ $(AM_V_CC@am__nodep@)$(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) $(gmapl_nosimd_CFLAGS) $(CFLAGS) -c -o gmapl_nosimd-intlist.obj `if test -f 'intlist.c'; then $(CYGPATH_W) 'intlist.c'; else $(CYGPATH_W) '$(srcdir)/intlist.c'; fi` + +gmapl_nosimd-list.o: list.c +@am__fastdepCC_TRUE@ $(AM_V_CC)$(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) $(gmapl_nosimd_CFLAGS) $(CFLAGS) -MT gmapl_nosimd-list.o -MD -MP -MF $(DEPDIR)/gmapl_nosimd-list.Tpo -c -o gmapl_nosimd-list.o `test -f 'list.c' || echo '$(srcdir)/'`list.c +@am__fastdepCC_TRUE@ $(AM_V_at)$(am__mv) $(DEPDIR)/gmapl_nosimd-list.Tpo $(DEPDIR)/gmapl_nosimd-list.Po +@AMDEP_TRUE@@am__fastdepCC_FALSE@ $(AM_V_CC)source='list.c' object='gmapl_nosimd-list.o' libtool=no @AMDEPBACKSLASH@ +@AMDEP_TRUE@@am__fastdepCC_FALSE@ DEPDIR=$(DEPDIR) $(CCDEPMODE) $(depcomp) @AMDEPBACKSLASH@ +@am__fastdepCC_FALSE@ $(AM_V_CC@am__nodep@)$(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) $(gmapl_nosimd_CFLAGS) $(CFLAGS) -c -o gmapl_nosimd-list.o `test -f 'list.c' || echo '$(srcdir)/'`list.c + +gmapl_nosimd-list.obj: list.c +@am__fastdepCC_TRUE@ $(AM_V_CC)$(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) $(gmapl_nosimd_CFLAGS) $(CFLAGS) -MT gmapl_nosimd-list.obj -MD -MP -MF $(DEPDIR)/gmapl_nosimd-list.Tpo -c -o gmapl_nosimd-list.obj `if test -f 'list.c'; then $(CYGPATH_W) 'list.c'; else $(CYGPATH_W) '$(srcdir)/list.c'; fi` +@am__fastdepCC_TRUE@ $(AM_V_at)$(am__mv) $(DEPDIR)/gmapl_nosimd-list.Tpo $(DEPDIR)/gmapl_nosimd-list.Po +@AMDEP_TRUE@@am__fastdepCC_FALSE@ $(AM_V_CC)source='list.c' object='gmapl_nosimd-list.obj' libtool=no @AMDEPBACKSLASH@ +@AMDEP_TRUE@@am__fastdepCC_FALSE@ DEPDIR=$(DEPDIR) $(CCDEPMODE) $(depcomp) @AMDEPBACKSLASH@ +@am__fastdepCC_FALSE@ $(AM_V_CC@am__nodep@)$(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) $(gmapl_nosimd_CFLAGS) $(CFLAGS) -c -o gmapl_nosimd-list.obj `if test -f 'list.c'; then $(CYGPATH_W) 'list.c'; else $(CYGPATH_W) '$(srcdir)/list.c'; fi` + +gmapl_nosimd-littleendian.o: littleendian.c +@am__fastdepCC_TRUE@ $(AM_V_CC)$(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) $(gmapl_nosimd_CFLAGS) $(CFLAGS) -MT gmapl_nosimd-littleendian.o -MD -MP -MF $(DEPDIR)/gmapl_nosimd-littleendian.Tpo -c -o gmapl_nosimd-littleendian.o `test -f 'littleendian.c' || echo '$(srcdir)/'`littleendian.c +@am__fastdepCC_TRUE@ $(AM_V_at)$(am__mv) $(DEPDIR)/gmapl_nosimd-littleendian.Tpo $(DEPDIR)/gmapl_nosimd-littleendian.Po +@AMDEP_TRUE@@am__fastdepCC_FALSE@ $(AM_V_CC)source='littleendian.c' object='gmapl_nosimd-littleendian.o' libtool=no @AMDEPBACKSLASH@ +@AMDEP_TRUE@@am__fastdepCC_FALSE@ DEPDIR=$(DEPDIR) $(CCDEPMODE) $(depcomp) @AMDEPBACKSLASH@ +@am__fastdepCC_FALSE@ $(AM_V_CC@am__nodep@)$(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) $(gmapl_nosimd_CFLAGS) $(CFLAGS) -c -o gmapl_nosimd-littleendian.o `test -f 'littleendian.c' || echo '$(srcdir)/'`littleendian.c + +gmapl_nosimd-littleendian.obj: littleendian.c +@am__fastdepCC_TRUE@ $(AM_V_CC)$(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) $(gmapl_nosimd_CFLAGS) $(CFLAGS) -MT gmapl_nosimd-littleendian.obj -MD -MP -MF $(DEPDIR)/gmapl_nosimd-littleendian.Tpo -c -o gmapl_nosimd-littleendian.obj `if test -f 'littleendian.c'; then $(CYGPATH_W) 'littleendian.c'; else $(CYGPATH_W) '$(srcdir)/littleendian.c'; fi` +@am__fastdepCC_TRUE@ $(AM_V_at)$(am__mv) $(DEPDIR)/gmapl_nosimd-littleendian.Tpo $(DEPDIR)/gmapl_nosimd-littleendian.Po +@AMDEP_TRUE@@am__fastdepCC_FALSE@ $(AM_V_CC)source='littleendian.c' object='gmapl_nosimd-littleendian.obj' libtool=no @AMDEPBACKSLASH@ +@AMDEP_TRUE@@am__fastdepCC_FALSE@ DEPDIR=$(DEPDIR) $(CCDEPMODE) $(depcomp) @AMDEPBACKSLASH@ +@am__fastdepCC_FALSE@ $(AM_V_CC@am__nodep@)$(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) $(gmapl_nosimd_CFLAGS) $(CFLAGS) -c -o gmapl_nosimd-littleendian.obj `if test -f 'littleendian.c'; then $(CYGPATH_W) 'littleendian.c'; else $(CYGPATH_W) '$(srcdir)/littleendian.c'; fi` + +gmapl_nosimd-bigendian.o: bigendian.c +@am__fastdepCC_TRUE@ $(AM_V_CC)$(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) $(gmapl_nosimd_CFLAGS) $(CFLAGS) -MT gmapl_nosimd-bigendian.o -MD -MP -MF $(DEPDIR)/gmapl_nosimd-bigendian.Tpo -c -o gmapl_nosimd-bigendian.o `test -f 'bigendian.c' || echo '$(srcdir)/'`bigendian.c +@am__fastdepCC_TRUE@ $(AM_V_at)$(am__mv) $(DEPDIR)/gmapl_nosimd-bigendian.Tpo $(DEPDIR)/gmapl_nosimd-bigendian.Po +@AMDEP_TRUE@@am__fastdepCC_FALSE@ $(AM_V_CC)source='bigendian.c' object='gmapl_nosimd-bigendian.o' libtool=no @AMDEPBACKSLASH@ +@AMDEP_TRUE@@am__fastdepCC_FALSE@ DEPDIR=$(DEPDIR) $(CCDEPMODE) $(depcomp) @AMDEPBACKSLASH@ +@am__fastdepCC_FALSE@ $(AM_V_CC@am__nodep@)$(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) $(gmapl_nosimd_CFLAGS) $(CFLAGS) -c -o gmapl_nosimd-bigendian.o `test -f 'bigendian.c' || echo '$(srcdir)/'`bigendian.c + +gmapl_nosimd-bigendian.obj: bigendian.c +@am__fastdepCC_TRUE@ $(AM_V_CC)$(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) $(gmapl_nosimd_CFLAGS) $(CFLAGS) -MT gmapl_nosimd-bigendian.obj -MD -MP -MF $(DEPDIR)/gmapl_nosimd-bigendian.Tpo -c -o gmapl_nosimd-bigendian.obj `if test -f 'bigendian.c'; then $(CYGPATH_W) 'bigendian.c'; else $(CYGPATH_W) '$(srcdir)/bigendian.c'; fi` +@am__fastdepCC_TRUE@ $(AM_V_at)$(am__mv) $(DEPDIR)/gmapl_nosimd-bigendian.Tpo $(DEPDIR)/gmapl_nosimd-bigendian.Po +@AMDEP_TRUE@@am__fastdepCC_FALSE@ $(AM_V_CC)source='bigendian.c' object='gmapl_nosimd-bigendian.obj' libtool=no @AMDEPBACKSLASH@ +@AMDEP_TRUE@@am__fastdepCC_FALSE@ DEPDIR=$(DEPDIR) $(CCDEPMODE) $(depcomp) @AMDEPBACKSLASH@ +@am__fastdepCC_FALSE@ $(AM_V_CC@am__nodep@)$(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) $(gmapl_nosimd_CFLAGS) $(CFLAGS) -c -o gmapl_nosimd-bigendian.obj `if test -f 'bigendian.c'; then $(CYGPATH_W) 'bigendian.c'; else $(CYGPATH_W) '$(srcdir)/bigendian.c'; fi` + +gmapl_nosimd-univinterval.o: univinterval.c +@am__fastdepCC_TRUE@ $(AM_V_CC)$(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) $(gmapl_nosimd_CFLAGS) $(CFLAGS) -MT gmapl_nosimd-univinterval.o -MD -MP -MF $(DEPDIR)/gmapl_nosimd-univinterval.Tpo -c -o gmapl_nosimd-univinterval.o `test -f 'univinterval.c' || echo '$(srcdir)/'`univinterval.c +@am__fastdepCC_TRUE@ $(AM_V_at)$(am__mv) $(DEPDIR)/gmapl_nosimd-univinterval.Tpo $(DEPDIR)/gmapl_nosimd-univinterval.Po +@AMDEP_TRUE@@am__fastdepCC_FALSE@ $(AM_V_CC)source='univinterval.c' object='gmapl_nosimd-univinterval.o' libtool=no @AMDEPBACKSLASH@ +@AMDEP_TRUE@@am__fastdepCC_FALSE@ DEPDIR=$(DEPDIR) $(CCDEPMODE) $(depcomp) @AMDEPBACKSLASH@ +@am__fastdepCC_FALSE@ $(AM_V_CC@am__nodep@)$(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) $(gmapl_nosimd_CFLAGS) $(CFLAGS) -c -o gmapl_nosimd-univinterval.o `test -f 'univinterval.c' || echo '$(srcdir)/'`univinterval.c + +gmapl_nosimd-univinterval.obj: univinterval.c +@am__fastdepCC_TRUE@ $(AM_V_CC)$(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) $(gmapl_nosimd_CFLAGS) $(CFLAGS) -MT gmapl_nosimd-univinterval.obj -MD -MP -MF $(DEPDIR)/gmapl_nosimd-univinterval.Tpo -c -o gmapl_nosimd-univinterval.obj `if test -f 'univinterval.c'; then $(CYGPATH_W) 'univinterval.c'; else $(CYGPATH_W) '$(srcdir)/univinterval.c'; fi` +@am__fastdepCC_TRUE@ $(AM_V_at)$(am__mv) $(DEPDIR)/gmapl_nosimd-univinterval.Tpo $(DEPDIR)/gmapl_nosimd-univinterval.Po +@AMDEP_TRUE@@am__fastdepCC_FALSE@ $(AM_V_CC)source='univinterval.c' object='gmapl_nosimd-univinterval.obj' libtool=no @AMDEPBACKSLASH@ +@AMDEP_TRUE@@am__fastdepCC_FALSE@ DEPDIR=$(DEPDIR) $(CCDEPMODE) $(depcomp) @AMDEPBACKSLASH@ +@am__fastdepCC_FALSE@ $(AM_V_CC@am__nodep@)$(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) $(gmapl_nosimd_CFLAGS) $(CFLAGS) -c -o gmapl_nosimd-univinterval.obj `if test -f 'univinterval.c'; then $(CYGPATH_W) 'univinterval.c'; else $(CYGPATH_W) '$(srcdir)/univinterval.c'; fi` + +gmapl_nosimd-interval.o: interval.c +@am__fastdepCC_TRUE@ $(AM_V_CC)$(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) $(gmapl_nosimd_CFLAGS) $(CFLAGS) -MT gmapl_nosimd-interval.o -MD -MP -MF $(DEPDIR)/gmapl_nosimd-interval.Tpo -c -o gmapl_nosimd-interval.o `test -f 'interval.c' || echo '$(srcdir)/'`interval.c +@am__fastdepCC_TRUE@ $(AM_V_at)$(am__mv) $(DEPDIR)/gmapl_nosimd-interval.Tpo $(DEPDIR)/gmapl_nosimd-interval.Po +@AMDEP_TRUE@@am__fastdepCC_FALSE@ $(AM_V_CC)source='interval.c' object='gmapl_nosimd-interval.o' libtool=no @AMDEPBACKSLASH@ +@AMDEP_TRUE@@am__fastdepCC_FALSE@ DEPDIR=$(DEPDIR) $(CCDEPMODE) $(depcomp) @AMDEPBACKSLASH@ +@am__fastdepCC_FALSE@ $(AM_V_CC@am__nodep@)$(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) $(gmapl_nosimd_CFLAGS) $(CFLAGS) -c -o gmapl_nosimd-interval.o `test -f 'interval.c' || echo '$(srcdir)/'`interval.c + +gmapl_nosimd-interval.obj: interval.c +@am__fastdepCC_TRUE@ $(AM_V_CC)$(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) $(gmapl_nosimd_CFLAGS) $(CFLAGS) -MT gmapl_nosimd-interval.obj -MD -MP -MF $(DEPDIR)/gmapl_nosimd-interval.Tpo -c -o gmapl_nosimd-interval.obj `if test -f 'interval.c'; then $(CYGPATH_W) 'interval.c'; else $(CYGPATH_W) '$(srcdir)/interval.c'; fi` +@am__fastdepCC_TRUE@ $(AM_V_at)$(am__mv) $(DEPDIR)/gmapl_nosimd-interval.Tpo $(DEPDIR)/gmapl_nosimd-interval.Po +@AMDEP_TRUE@@am__fastdepCC_FALSE@ $(AM_V_CC)source='interval.c' object='gmapl_nosimd-interval.obj' libtool=no @AMDEPBACKSLASH@ +@AMDEP_TRUE@@am__fastdepCC_FALSE@ DEPDIR=$(DEPDIR) $(CCDEPMODE) $(depcomp) @AMDEPBACKSLASH@ +@am__fastdepCC_FALSE@ $(AM_V_CC@am__nodep@)$(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) $(gmapl_nosimd_CFLAGS) $(CFLAGS) -c -o gmapl_nosimd-interval.obj `if test -f 'interval.c'; then $(CYGPATH_W) 'interval.c'; else $(CYGPATH_W) '$(srcdir)/interval.c'; fi` + +gmapl_nosimd-uintlist.o: uintlist.c +@am__fastdepCC_TRUE@ $(AM_V_CC)$(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) $(gmapl_nosimd_CFLAGS) $(CFLAGS) -MT gmapl_nosimd-uintlist.o -MD -MP -MF $(DEPDIR)/gmapl_nosimd-uintlist.Tpo -c -o gmapl_nosimd-uintlist.o `test -f 'uintlist.c' || echo '$(srcdir)/'`uintlist.c +@am__fastdepCC_TRUE@ $(AM_V_at)$(am__mv) $(DEPDIR)/gmapl_nosimd-uintlist.Tpo $(DEPDIR)/gmapl_nosimd-uintlist.Po +@AMDEP_TRUE@@am__fastdepCC_FALSE@ $(AM_V_CC)source='uintlist.c' object='gmapl_nosimd-uintlist.o' libtool=no @AMDEPBACKSLASH@ +@AMDEP_TRUE@@am__fastdepCC_FALSE@ DEPDIR=$(DEPDIR) $(CCDEPMODE) $(depcomp) @AMDEPBACKSLASH@ +@am__fastdepCC_FALSE@ $(AM_V_CC@am__nodep@)$(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) $(gmapl_nosimd_CFLAGS) $(CFLAGS) -c -o gmapl_nosimd-uintlist.o `test -f 'uintlist.c' || echo '$(srcdir)/'`uintlist.c + +gmapl_nosimd-uintlist.obj: uintlist.c +@am__fastdepCC_TRUE@ $(AM_V_CC)$(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) $(gmapl_nosimd_CFLAGS) $(CFLAGS) -MT gmapl_nosimd-uintlist.obj -MD -MP -MF $(DEPDIR)/gmapl_nosimd-uintlist.Tpo -c -o gmapl_nosimd-uintlist.obj `if test -f 'uintlist.c'; then $(CYGPATH_W) 'uintlist.c'; else $(CYGPATH_W) '$(srcdir)/uintlist.c'; fi` +@am__fastdepCC_TRUE@ $(AM_V_at)$(am__mv) $(DEPDIR)/gmapl_nosimd-uintlist.Tpo $(DEPDIR)/gmapl_nosimd-uintlist.Po +@AMDEP_TRUE@@am__fastdepCC_FALSE@ $(AM_V_CC)source='uintlist.c' object='gmapl_nosimd-uintlist.obj' libtool=no @AMDEPBACKSLASH@ +@AMDEP_TRUE@@am__fastdepCC_FALSE@ DEPDIR=$(DEPDIR) $(CCDEPMODE) $(depcomp) @AMDEPBACKSLASH@ +@am__fastdepCC_FALSE@ $(AM_V_CC@am__nodep@)$(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) $(gmapl_nosimd_CFLAGS) $(CFLAGS) -c -o gmapl_nosimd-uintlist.obj `if test -f 'uintlist.c'; then $(CYGPATH_W) 'uintlist.c'; else $(CYGPATH_W) '$(srcdir)/uintlist.c'; fi` + +gmapl_nosimd-uint8list.o: uint8list.c +@am__fastdepCC_TRUE@ $(AM_V_CC)$(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) $(gmapl_nosimd_CFLAGS) $(CFLAGS) -MT gmapl_nosimd-uint8list.o -MD -MP -MF $(DEPDIR)/gmapl_nosimd-uint8list.Tpo -c -o gmapl_nosimd-uint8list.o `test -f 'uint8list.c' || echo '$(srcdir)/'`uint8list.c +@am__fastdepCC_TRUE@ $(AM_V_at)$(am__mv) $(DEPDIR)/gmapl_nosimd-uint8list.Tpo $(DEPDIR)/gmapl_nosimd-uint8list.Po +@AMDEP_TRUE@@am__fastdepCC_FALSE@ $(AM_V_CC)source='uint8list.c' object='gmapl_nosimd-uint8list.o' libtool=no @AMDEPBACKSLASH@ +@AMDEP_TRUE@@am__fastdepCC_FALSE@ DEPDIR=$(DEPDIR) $(CCDEPMODE) $(depcomp) @AMDEPBACKSLASH@ +@am__fastdepCC_FALSE@ $(AM_V_CC@am__nodep@)$(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) $(gmapl_nosimd_CFLAGS) $(CFLAGS) -c -o gmapl_nosimd-uint8list.o `test -f 'uint8list.c' || echo '$(srcdir)/'`uint8list.c + +gmapl_nosimd-uint8list.obj: uint8list.c +@am__fastdepCC_TRUE@ $(AM_V_CC)$(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) $(gmapl_nosimd_CFLAGS) $(CFLAGS) -MT gmapl_nosimd-uint8list.obj -MD -MP -MF $(DEPDIR)/gmapl_nosimd-uint8list.Tpo -c -o gmapl_nosimd-uint8list.obj `if test -f 'uint8list.c'; then $(CYGPATH_W) 'uint8list.c'; else $(CYGPATH_W) '$(srcdir)/uint8list.c'; fi` +@am__fastdepCC_TRUE@ $(AM_V_at)$(am__mv) $(DEPDIR)/gmapl_nosimd-uint8list.Tpo $(DEPDIR)/gmapl_nosimd-uint8list.Po +@AMDEP_TRUE@@am__fastdepCC_FALSE@ $(AM_V_CC)source='uint8list.c' object='gmapl_nosimd-uint8list.obj' libtool=no @AMDEPBACKSLASH@ +@AMDEP_TRUE@@am__fastdepCC_FALSE@ DEPDIR=$(DEPDIR) $(CCDEPMODE) $(depcomp) @AMDEPBACKSLASH@ +@am__fastdepCC_FALSE@ $(AM_V_CC@am__nodep@)$(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) $(gmapl_nosimd_CFLAGS) $(CFLAGS) -c -o gmapl_nosimd-uint8list.obj `if test -f 'uint8list.c'; then $(CYGPATH_W) 'uint8list.c'; else $(CYGPATH_W) '$(srcdir)/uint8list.c'; fi` + +gmapl_nosimd-stopwatch.o: stopwatch.c +@am__fastdepCC_TRUE@ $(AM_V_CC)$(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) $(gmapl_nosimd_CFLAGS) $(CFLAGS) -MT gmapl_nosimd-stopwatch.o -MD -MP -MF $(DEPDIR)/gmapl_nosimd-stopwatch.Tpo -c -o gmapl_nosimd-stopwatch.o `test -f 'stopwatch.c' || echo '$(srcdir)/'`stopwatch.c +@am__fastdepCC_TRUE@ $(AM_V_at)$(am__mv) $(DEPDIR)/gmapl_nosimd-stopwatch.Tpo $(DEPDIR)/gmapl_nosimd-stopwatch.Po +@AMDEP_TRUE@@am__fastdepCC_FALSE@ $(AM_V_CC)source='stopwatch.c' object='gmapl_nosimd-stopwatch.o' libtool=no @AMDEPBACKSLASH@ +@AMDEP_TRUE@@am__fastdepCC_FALSE@ DEPDIR=$(DEPDIR) $(CCDEPMODE) $(depcomp) @AMDEPBACKSLASH@ +@am__fastdepCC_FALSE@ $(AM_V_CC@am__nodep@)$(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) $(gmapl_nosimd_CFLAGS) $(CFLAGS) -c -o gmapl_nosimd-stopwatch.o `test -f 'stopwatch.c' || echo '$(srcdir)/'`stopwatch.c + +gmapl_nosimd-stopwatch.obj: stopwatch.c +@am__fastdepCC_TRUE@ $(AM_V_CC)$(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) $(gmapl_nosimd_CFLAGS) $(CFLAGS) -MT gmapl_nosimd-stopwatch.obj -MD -MP -MF $(DEPDIR)/gmapl_nosimd-stopwatch.Tpo -c -o gmapl_nosimd-stopwatch.obj `if test -f 'stopwatch.c'; then $(CYGPATH_W) 'stopwatch.c'; else $(CYGPATH_W) '$(srcdir)/stopwatch.c'; fi` +@am__fastdepCC_TRUE@ $(AM_V_at)$(am__mv) $(DEPDIR)/gmapl_nosimd-stopwatch.Tpo $(DEPDIR)/gmapl_nosimd-stopwatch.Po +@AMDEP_TRUE@@am__fastdepCC_FALSE@ $(AM_V_CC)source='stopwatch.c' object='gmapl_nosimd-stopwatch.obj' libtool=no @AMDEPBACKSLASH@ +@AMDEP_TRUE@@am__fastdepCC_FALSE@ DEPDIR=$(DEPDIR) $(CCDEPMODE) $(depcomp) @AMDEPBACKSLASH@ +@am__fastdepCC_FALSE@ $(AM_V_CC@am__nodep@)$(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) $(gmapl_nosimd_CFLAGS) $(CFLAGS) -c -o gmapl_nosimd-stopwatch.obj `if test -f 'stopwatch.c'; then $(CYGPATH_W) 'stopwatch.c'; else $(CYGPATH_W) '$(srcdir)/stopwatch.c'; fi` + +gmapl_nosimd-semaphore.o: semaphore.c +@am__fastdepCC_TRUE@ $(AM_V_CC)$(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) $(gmapl_nosimd_CFLAGS) $(CFLAGS) -MT gmapl_nosimd-semaphore.o -MD -MP -MF $(DEPDIR)/gmapl_nosimd-semaphore.Tpo -c -o gmapl_nosimd-semaphore.o `test -f 'semaphore.c' || echo '$(srcdir)/'`semaphore.c +@am__fastdepCC_TRUE@ $(AM_V_at)$(am__mv) $(DEPDIR)/gmapl_nosimd-semaphore.Tpo $(DEPDIR)/gmapl_nosimd-semaphore.Po +@AMDEP_TRUE@@am__fastdepCC_FALSE@ $(AM_V_CC)source='semaphore.c' object='gmapl_nosimd-semaphore.o' libtool=no @AMDEPBACKSLASH@ +@AMDEP_TRUE@@am__fastdepCC_FALSE@ DEPDIR=$(DEPDIR) $(CCDEPMODE) $(depcomp) @AMDEPBACKSLASH@ +@am__fastdepCC_FALSE@ $(AM_V_CC@am__nodep@)$(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) $(gmapl_nosimd_CFLAGS) $(CFLAGS) -c -o gmapl_nosimd-semaphore.o `test -f 'semaphore.c' || echo '$(srcdir)/'`semaphore.c + +gmapl_nosimd-semaphore.obj: semaphore.c +@am__fastdepCC_TRUE@ $(AM_V_CC)$(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) $(gmapl_nosimd_CFLAGS) $(CFLAGS) -MT gmapl_nosimd-semaphore.obj -MD -MP -MF $(DEPDIR)/gmapl_nosimd-semaphore.Tpo -c -o gmapl_nosimd-semaphore.obj `if test -f 'semaphore.c'; then $(CYGPATH_W) 'semaphore.c'; else $(CYGPATH_W) '$(srcdir)/semaphore.c'; fi` +@am__fastdepCC_TRUE@ $(AM_V_at)$(am__mv) $(DEPDIR)/gmapl_nosimd-semaphore.Tpo $(DEPDIR)/gmapl_nosimd-semaphore.Po +@AMDEP_TRUE@@am__fastdepCC_FALSE@ $(AM_V_CC)source='semaphore.c' object='gmapl_nosimd-semaphore.obj' libtool=no @AMDEPBACKSLASH@ +@AMDEP_TRUE@@am__fastdepCC_FALSE@ DEPDIR=$(DEPDIR) $(CCDEPMODE) $(depcomp) @AMDEPBACKSLASH@ +@am__fastdepCC_FALSE@ $(AM_V_CC@am__nodep@)$(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) $(gmapl_nosimd_CFLAGS) $(CFLAGS) -c -o gmapl_nosimd-semaphore.obj `if test -f 'semaphore.c'; then $(CYGPATH_W) 'semaphore.c'; else $(CYGPATH_W) '$(srcdir)/semaphore.c'; fi` + +gmapl_nosimd-access.o: access.c +@am__fastdepCC_TRUE@ $(AM_V_CC)$(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) $(gmapl_nosimd_CFLAGS) $(CFLAGS) -MT gmapl_nosimd-access.o -MD -MP -MF $(DEPDIR)/gmapl_nosimd-access.Tpo -c -o gmapl_nosimd-access.o `test -f 'access.c' || echo '$(srcdir)/'`access.c +@am__fastdepCC_TRUE@ $(AM_V_at)$(am__mv) $(DEPDIR)/gmapl_nosimd-access.Tpo $(DEPDIR)/gmapl_nosimd-access.Po +@AMDEP_TRUE@@am__fastdepCC_FALSE@ $(AM_V_CC)source='access.c' object='gmapl_nosimd-access.o' libtool=no @AMDEPBACKSLASH@ +@AMDEP_TRUE@@am__fastdepCC_FALSE@ DEPDIR=$(DEPDIR) $(CCDEPMODE) $(depcomp) @AMDEPBACKSLASH@ +@am__fastdepCC_FALSE@ $(AM_V_CC@am__nodep@)$(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) $(gmapl_nosimd_CFLAGS) $(CFLAGS) -c -o gmapl_nosimd-access.o `test -f 'access.c' || echo '$(srcdir)/'`access.c + +gmapl_nosimd-access.obj: access.c +@am__fastdepCC_TRUE@ $(AM_V_CC)$(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) $(gmapl_nosimd_CFLAGS) $(CFLAGS) -MT gmapl_nosimd-access.obj -MD -MP -MF $(DEPDIR)/gmapl_nosimd-access.Tpo -c -o gmapl_nosimd-access.obj `if test -f 'access.c'; then $(CYGPATH_W) 'access.c'; else $(CYGPATH_W) '$(srcdir)/access.c'; fi` +@am__fastdepCC_TRUE@ $(AM_V_at)$(am__mv) $(DEPDIR)/gmapl_nosimd-access.Tpo $(DEPDIR)/gmapl_nosimd-access.Po +@AMDEP_TRUE@@am__fastdepCC_FALSE@ $(AM_V_CC)source='access.c' object='gmapl_nosimd-access.obj' libtool=no @AMDEPBACKSLASH@ +@AMDEP_TRUE@@am__fastdepCC_FALSE@ DEPDIR=$(DEPDIR) $(CCDEPMODE) $(depcomp) @AMDEPBACKSLASH@ +@am__fastdepCC_FALSE@ $(AM_V_CC@am__nodep@)$(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) $(gmapl_nosimd_CFLAGS) $(CFLAGS) -c -o gmapl_nosimd-access.obj `if test -f 'access.c'; then $(CYGPATH_W) 'access.c'; else $(CYGPATH_W) '$(srcdir)/access.c'; fi` + +gmapl_nosimd-filestring.o: filestring.c +@am__fastdepCC_TRUE@ $(AM_V_CC)$(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) $(gmapl_nosimd_CFLAGS) $(CFLAGS) -MT gmapl_nosimd-filestring.o -MD -MP -MF $(DEPDIR)/gmapl_nosimd-filestring.Tpo -c -o gmapl_nosimd-filestring.o `test -f 'filestring.c' || echo '$(srcdir)/'`filestring.c +@am__fastdepCC_TRUE@ $(AM_V_at)$(am__mv) $(DEPDIR)/gmapl_nosimd-filestring.Tpo $(DEPDIR)/gmapl_nosimd-filestring.Po +@AMDEP_TRUE@@am__fastdepCC_FALSE@ $(AM_V_CC)source='filestring.c' object='gmapl_nosimd-filestring.o' libtool=no @AMDEPBACKSLASH@ +@AMDEP_TRUE@@am__fastdepCC_FALSE@ DEPDIR=$(DEPDIR) $(CCDEPMODE) $(depcomp) @AMDEPBACKSLASH@ +@am__fastdepCC_FALSE@ $(AM_V_CC@am__nodep@)$(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) $(gmapl_nosimd_CFLAGS) $(CFLAGS) -c -o gmapl_nosimd-filestring.o `test -f 'filestring.c' || echo '$(srcdir)/'`filestring.c + +gmapl_nosimd-filestring.obj: filestring.c @am__fastdepCC_TRUE@ $(AM_V_CC)$(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) $(gmapl_nosimd_CFLAGS) $(CFLAGS) -MT gmapl_nosimd-filestring.obj -MD -MP -MF $(DEPDIR)/gmapl_nosimd-filestring.Tpo -c -o gmapl_nosimd-filestring.obj `if test -f 'filestring.c'; then $(CYGPATH_W) 'filestring.c'; else $(CYGPATH_W) '$(srcdir)/filestring.c'; fi` @am__fastdepCC_TRUE@ $(AM_V_at)$(am__mv) $(DEPDIR)/gmapl_nosimd-filestring.Tpo $(DEPDIR)/gmapl_nosimd-filestring.Po @AMDEP_TRUE@@am__fastdepCC_FALSE@ $(AM_V_CC)source='filestring.c' object='gmapl_nosimd-filestring.obj' libtool=no @AMDEPBACKSLASH@ @@ -16834,6 +20122,20 @@ @AMDEP_TRUE@@am__fastdepCC_FALSE@ DEPDIR=$(DEPDIR) $(CCDEPMODE) $(depcomp) @AMDEPBACKSLASH@ @am__fastdepCC_FALSE@ $(AM_V_CC@am__nodep@)$(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) $(gmapl_nosimd_CFLAGS) $(CFLAGS) -c -o gmapl_nosimd-bitpack64-readtwo.obj `if test -f 'bitpack64-readtwo.c'; then $(CYGPATH_W) 'bitpack64-readtwo.c'; else $(CYGPATH_W) '$(srcdir)/bitpack64-readtwo.c'; fi` +gmapl_nosimd-merge.o: merge.c +@am__fastdepCC_TRUE@ $(AM_V_CC)$(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) $(gmapl_nosimd_CFLAGS) $(CFLAGS) -MT gmapl_nosimd-merge.o -MD -MP -MF $(DEPDIR)/gmapl_nosimd-merge.Tpo -c -o gmapl_nosimd-merge.o `test -f 'merge.c' || echo '$(srcdir)/'`merge.c +@am__fastdepCC_TRUE@ $(AM_V_at)$(am__mv) $(DEPDIR)/gmapl_nosimd-merge.Tpo $(DEPDIR)/gmapl_nosimd-merge.Po +@AMDEP_TRUE@@am__fastdepCC_FALSE@ $(AM_V_CC)source='merge.c' object='gmapl_nosimd-merge.o' libtool=no @AMDEPBACKSLASH@ +@AMDEP_TRUE@@am__fastdepCC_FALSE@ DEPDIR=$(DEPDIR) $(CCDEPMODE) $(depcomp) @AMDEPBACKSLASH@ +@am__fastdepCC_FALSE@ $(AM_V_CC@am__nodep@)$(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) $(gmapl_nosimd_CFLAGS) $(CFLAGS) -c -o gmapl_nosimd-merge.o `test -f 'merge.c' || echo '$(srcdir)/'`merge.c + +gmapl_nosimd-merge.obj: merge.c +@am__fastdepCC_TRUE@ $(AM_V_CC)$(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) $(gmapl_nosimd_CFLAGS) $(CFLAGS) -MT gmapl_nosimd-merge.obj -MD -MP -MF $(DEPDIR)/gmapl_nosimd-merge.Tpo -c -o gmapl_nosimd-merge.obj `if test -f 'merge.c'; then $(CYGPATH_W) 'merge.c'; else $(CYGPATH_W) '$(srcdir)/merge.c'; fi` +@am__fastdepCC_TRUE@ $(AM_V_at)$(am__mv) $(DEPDIR)/gmapl_nosimd-merge.Tpo $(DEPDIR)/gmapl_nosimd-merge.Po +@AMDEP_TRUE@@am__fastdepCC_FALSE@ $(AM_V_CC)source='merge.c' object='gmapl_nosimd-merge.obj' libtool=no @AMDEPBACKSLASH@ +@AMDEP_TRUE@@am__fastdepCC_FALSE@ DEPDIR=$(DEPDIR) $(CCDEPMODE) $(depcomp) @AMDEPBACKSLASH@ +@am__fastdepCC_FALSE@ $(AM_V_CC@am__nodep@)$(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) $(gmapl_nosimd_CFLAGS) $(CFLAGS) -c -o gmapl_nosimd-merge.obj `if test -f 'merge.c'; then $(CYGPATH_W) 'merge.c'; else $(CYGPATH_W) '$(srcdir)/merge.c'; fi` + gmapl_nosimd-indexdb.o: indexdb.c @am__fastdepCC_TRUE@ $(AM_V_CC)$(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) $(gmapl_nosimd_CFLAGS) $(CFLAGS) -MT gmapl_nosimd-indexdb.o -MD -MP -MF $(DEPDIR)/gmapl_nosimd-indexdb.Tpo -c -o gmapl_nosimd-indexdb.o `test -f 'indexdb.c' || echo '$(srcdir)/'`indexdb.c @am__fastdepCC_TRUE@ $(AM_V_at)$(am__mv) $(DEPDIR)/gmapl_nosimd-indexdb.Tpo $(DEPDIR)/gmapl_nosimd-indexdb.Po @@ -18038,6 +21340,20 @@ @AMDEP_TRUE@@am__fastdepCC_FALSE@ DEPDIR=$(DEPDIR) $(CCDEPMODE) $(depcomp) @AMDEPBACKSLASH@ @am__fastdepCC_FALSE@ $(AM_V_CC@am__nodep@)$(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) $(gmapl_sse2_CFLAGS) $(CFLAGS) -c -o gmapl_sse2-bitpack64-readtwo.obj `if test -f 'bitpack64-readtwo.c'; then $(CYGPATH_W) 'bitpack64-readtwo.c'; else $(CYGPATH_W) '$(srcdir)/bitpack64-readtwo.c'; fi` +gmapl_sse2-merge.o: merge.c +@am__fastdepCC_TRUE@ $(AM_V_CC)$(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) $(gmapl_sse2_CFLAGS) $(CFLAGS) -MT gmapl_sse2-merge.o -MD -MP -MF $(DEPDIR)/gmapl_sse2-merge.Tpo -c -o gmapl_sse2-merge.o `test -f 'merge.c' || echo '$(srcdir)/'`merge.c +@am__fastdepCC_TRUE@ $(AM_V_at)$(am__mv) $(DEPDIR)/gmapl_sse2-merge.Tpo $(DEPDIR)/gmapl_sse2-merge.Po +@AMDEP_TRUE@@am__fastdepCC_FALSE@ $(AM_V_CC)source='merge.c' object='gmapl_sse2-merge.o' libtool=no @AMDEPBACKSLASH@ +@AMDEP_TRUE@@am__fastdepCC_FALSE@ DEPDIR=$(DEPDIR) $(CCDEPMODE) $(depcomp) @AMDEPBACKSLASH@ +@am__fastdepCC_FALSE@ $(AM_V_CC@am__nodep@)$(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) $(gmapl_sse2_CFLAGS) $(CFLAGS) -c -o gmapl_sse2-merge.o `test -f 'merge.c' || echo '$(srcdir)/'`merge.c + +gmapl_sse2-merge.obj: merge.c +@am__fastdepCC_TRUE@ $(AM_V_CC)$(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) $(gmapl_sse2_CFLAGS) $(CFLAGS) -MT gmapl_sse2-merge.obj -MD -MP -MF $(DEPDIR)/gmapl_sse2-merge.Tpo -c -o gmapl_sse2-merge.obj `if test -f 'merge.c'; then $(CYGPATH_W) 'merge.c'; else $(CYGPATH_W) '$(srcdir)/merge.c'; fi` +@am__fastdepCC_TRUE@ $(AM_V_at)$(am__mv) $(DEPDIR)/gmapl_sse2-merge.Tpo $(DEPDIR)/gmapl_sse2-merge.Po +@AMDEP_TRUE@@am__fastdepCC_FALSE@ $(AM_V_CC)source='merge.c' object='gmapl_sse2-merge.obj' libtool=no @AMDEPBACKSLASH@ +@AMDEP_TRUE@@am__fastdepCC_FALSE@ DEPDIR=$(DEPDIR) $(CCDEPMODE) $(depcomp) @AMDEPBACKSLASH@ +@am__fastdepCC_FALSE@ $(AM_V_CC@am__nodep@)$(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) $(gmapl_sse2_CFLAGS) $(CFLAGS) -c -o gmapl_sse2-merge.obj `if test -f 'merge.c'; then $(CYGPATH_W) 'merge.c'; else $(CYGPATH_W) '$(srcdir)/merge.c'; fi` + gmapl_sse2-indexdb.o: indexdb.c @am__fastdepCC_TRUE@ $(AM_V_CC)$(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) $(gmapl_sse2_CFLAGS) $(CFLAGS) -MT gmapl_sse2-indexdb.o -MD -MP -MF $(DEPDIR)/gmapl_sse2-indexdb.Tpo -c -o gmapl_sse2-indexdb.o `test -f 'indexdb.c' || echo '$(srcdir)/'`indexdb.c @am__fastdepCC_TRUE@ $(AM_V_at)$(am__mv) $(DEPDIR)/gmapl_sse2-indexdb.Tpo $(DEPDIR)/gmapl_sse2-indexdb.Po @@ -19242,6 +22558,20 @@ @AMDEP_TRUE@@am__fastdepCC_FALSE@ DEPDIR=$(DEPDIR) $(CCDEPMODE) $(depcomp) @AMDEPBACKSLASH@ @am__fastdepCC_FALSE@ $(AM_V_CC@am__nodep@)$(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) $(gmapl_sse41_CFLAGS) $(CFLAGS) -c -o gmapl_sse41-bitpack64-readtwo.obj `if test -f 'bitpack64-readtwo.c'; then $(CYGPATH_W) 'bitpack64-readtwo.c'; else $(CYGPATH_W) '$(srcdir)/bitpack64-readtwo.c'; fi` +gmapl_sse41-merge.o: merge.c +@am__fastdepCC_TRUE@ $(AM_V_CC)$(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) $(gmapl_sse41_CFLAGS) $(CFLAGS) -MT gmapl_sse41-merge.o -MD -MP -MF $(DEPDIR)/gmapl_sse41-merge.Tpo -c -o gmapl_sse41-merge.o `test -f 'merge.c' || echo '$(srcdir)/'`merge.c +@am__fastdepCC_TRUE@ $(AM_V_at)$(am__mv) $(DEPDIR)/gmapl_sse41-merge.Tpo $(DEPDIR)/gmapl_sse41-merge.Po +@AMDEP_TRUE@@am__fastdepCC_FALSE@ $(AM_V_CC)source='merge.c' object='gmapl_sse41-merge.o' libtool=no @AMDEPBACKSLASH@ +@AMDEP_TRUE@@am__fastdepCC_FALSE@ DEPDIR=$(DEPDIR) $(CCDEPMODE) $(depcomp) @AMDEPBACKSLASH@ +@am__fastdepCC_FALSE@ $(AM_V_CC@am__nodep@)$(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) $(gmapl_sse41_CFLAGS) $(CFLAGS) -c -o gmapl_sse41-merge.o `test -f 'merge.c' || echo '$(srcdir)/'`merge.c + +gmapl_sse41-merge.obj: merge.c +@am__fastdepCC_TRUE@ $(AM_V_CC)$(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) $(gmapl_sse41_CFLAGS) $(CFLAGS) -MT gmapl_sse41-merge.obj -MD -MP -MF $(DEPDIR)/gmapl_sse41-merge.Tpo -c -o gmapl_sse41-merge.obj `if test -f 'merge.c'; then $(CYGPATH_W) 'merge.c'; else $(CYGPATH_W) '$(srcdir)/merge.c'; fi` +@am__fastdepCC_TRUE@ $(AM_V_at)$(am__mv) $(DEPDIR)/gmapl_sse41-merge.Tpo $(DEPDIR)/gmapl_sse41-merge.Po +@AMDEP_TRUE@@am__fastdepCC_FALSE@ $(AM_V_CC)source='merge.c' object='gmapl_sse41-merge.obj' libtool=no @AMDEPBACKSLASH@ +@AMDEP_TRUE@@am__fastdepCC_FALSE@ DEPDIR=$(DEPDIR) $(CCDEPMODE) $(depcomp) @AMDEPBACKSLASH@ +@am__fastdepCC_FALSE@ $(AM_V_CC@am__nodep@)$(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) $(gmapl_sse41_CFLAGS) $(CFLAGS) -c -o gmapl_sse41-merge.obj `if test -f 'merge.c'; then $(CYGPATH_W) 'merge.c'; else $(CYGPATH_W) '$(srcdir)/merge.c'; fi` + gmapl_sse41-indexdb.o: indexdb.c @am__fastdepCC_TRUE@ $(AM_V_CC)$(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) $(gmapl_sse41_CFLAGS) $(CFLAGS) -MT gmapl_sse41-indexdb.o -MD -MP -MF $(DEPDIR)/gmapl_sse41-indexdb.Tpo -c -o gmapl_sse41-indexdb.o `test -f 'indexdb.c' || echo '$(srcdir)/'`indexdb.c @am__fastdepCC_TRUE@ $(AM_V_at)$(am__mv) $(DEPDIR)/gmapl_sse41-indexdb.Tpo $(DEPDIR)/gmapl_sse41-indexdb.Po @@ -20446,6 +23776,20 @@ @AMDEP_TRUE@@am__fastdepCC_FALSE@ DEPDIR=$(DEPDIR) $(CCDEPMODE) $(depcomp) @AMDEPBACKSLASH@ @am__fastdepCC_FALSE@ $(AM_V_CC@am__nodep@)$(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) $(gmapl_sse42_CFLAGS) $(CFLAGS) -c -o gmapl_sse42-bitpack64-readtwo.obj `if test -f 'bitpack64-readtwo.c'; then $(CYGPATH_W) 'bitpack64-readtwo.c'; else $(CYGPATH_W) '$(srcdir)/bitpack64-readtwo.c'; fi` +gmapl_sse42-merge.o: merge.c +@am__fastdepCC_TRUE@ $(AM_V_CC)$(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) $(gmapl_sse42_CFLAGS) $(CFLAGS) -MT gmapl_sse42-merge.o -MD -MP -MF $(DEPDIR)/gmapl_sse42-merge.Tpo -c -o gmapl_sse42-merge.o `test -f 'merge.c' || echo '$(srcdir)/'`merge.c +@am__fastdepCC_TRUE@ $(AM_V_at)$(am__mv) $(DEPDIR)/gmapl_sse42-merge.Tpo $(DEPDIR)/gmapl_sse42-merge.Po +@AMDEP_TRUE@@am__fastdepCC_FALSE@ $(AM_V_CC)source='merge.c' object='gmapl_sse42-merge.o' libtool=no @AMDEPBACKSLASH@ +@AMDEP_TRUE@@am__fastdepCC_FALSE@ DEPDIR=$(DEPDIR) $(CCDEPMODE) $(depcomp) @AMDEPBACKSLASH@ +@am__fastdepCC_FALSE@ $(AM_V_CC@am__nodep@)$(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) $(gmapl_sse42_CFLAGS) $(CFLAGS) -c -o gmapl_sse42-merge.o `test -f 'merge.c' || echo '$(srcdir)/'`merge.c + +gmapl_sse42-merge.obj: merge.c +@am__fastdepCC_TRUE@ $(AM_V_CC)$(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) $(gmapl_sse42_CFLAGS) $(CFLAGS) -MT gmapl_sse42-merge.obj -MD -MP -MF $(DEPDIR)/gmapl_sse42-merge.Tpo -c -o gmapl_sse42-merge.obj `if test -f 'merge.c'; then $(CYGPATH_W) 'merge.c'; else $(CYGPATH_W) '$(srcdir)/merge.c'; fi` +@am__fastdepCC_TRUE@ $(AM_V_at)$(am__mv) $(DEPDIR)/gmapl_sse42-merge.Tpo $(DEPDIR)/gmapl_sse42-merge.Po +@AMDEP_TRUE@@am__fastdepCC_FALSE@ $(AM_V_CC)source='merge.c' object='gmapl_sse42-merge.obj' libtool=no @AMDEPBACKSLASH@ +@AMDEP_TRUE@@am__fastdepCC_FALSE@ DEPDIR=$(DEPDIR) $(CCDEPMODE) $(depcomp) @AMDEPBACKSLASH@ +@am__fastdepCC_FALSE@ $(AM_V_CC@am__nodep@)$(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) $(gmapl_sse42_CFLAGS) $(CFLAGS) -c -o gmapl_sse42-merge.obj `if test -f 'merge.c'; then $(CYGPATH_W) 'merge.c'; else $(CYGPATH_W) '$(srcdir)/merge.c'; fi` + gmapl_sse42-indexdb.o: indexdb.c @am__fastdepCC_TRUE@ $(AM_V_CC)$(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) $(gmapl_sse42_CFLAGS) $(CFLAGS) -MT gmapl_sse42-indexdb.o -MD -MP -MF $(DEPDIR)/gmapl_sse42-indexdb.Tpo -c -o gmapl_sse42-indexdb.o `test -f 'indexdb.c' || echo '$(srcdir)/'`indexdb.c @am__fastdepCC_TRUE@ $(AM_V_at)$(am__mv) $(DEPDIR)/gmapl_sse42-indexdb.Tpo $(DEPDIR)/gmapl_sse42-indexdb.Po @@ -21650,6 +24994,20 @@ @AMDEP_TRUE@@am__fastdepCC_FALSE@ DEPDIR=$(DEPDIR) $(CCDEPMODE) $(depcomp) @AMDEPBACKSLASH@ @am__fastdepCC_FALSE@ $(AM_V_CC@am__nodep@)$(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) $(gmapl_ssse3_CFLAGS) $(CFLAGS) -c -o gmapl_ssse3-bitpack64-readtwo.obj `if test -f 'bitpack64-readtwo.c'; then $(CYGPATH_W) 'bitpack64-readtwo.c'; else $(CYGPATH_W) '$(srcdir)/bitpack64-readtwo.c'; fi` +gmapl_ssse3-merge.o: merge.c +@am__fastdepCC_TRUE@ $(AM_V_CC)$(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) $(gmapl_ssse3_CFLAGS) $(CFLAGS) -MT gmapl_ssse3-merge.o -MD -MP -MF $(DEPDIR)/gmapl_ssse3-merge.Tpo -c -o gmapl_ssse3-merge.o `test -f 'merge.c' || echo '$(srcdir)/'`merge.c +@am__fastdepCC_TRUE@ $(AM_V_at)$(am__mv) $(DEPDIR)/gmapl_ssse3-merge.Tpo $(DEPDIR)/gmapl_ssse3-merge.Po +@AMDEP_TRUE@@am__fastdepCC_FALSE@ $(AM_V_CC)source='merge.c' object='gmapl_ssse3-merge.o' libtool=no @AMDEPBACKSLASH@ +@AMDEP_TRUE@@am__fastdepCC_FALSE@ DEPDIR=$(DEPDIR) $(CCDEPMODE) $(depcomp) @AMDEPBACKSLASH@ +@am__fastdepCC_FALSE@ $(AM_V_CC@am__nodep@)$(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) $(gmapl_ssse3_CFLAGS) $(CFLAGS) -c -o gmapl_ssse3-merge.o `test -f 'merge.c' || echo '$(srcdir)/'`merge.c + +gmapl_ssse3-merge.obj: merge.c +@am__fastdepCC_TRUE@ $(AM_V_CC)$(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) $(gmapl_ssse3_CFLAGS) $(CFLAGS) -MT gmapl_ssse3-merge.obj -MD -MP -MF $(DEPDIR)/gmapl_ssse3-merge.Tpo -c -o gmapl_ssse3-merge.obj `if test -f 'merge.c'; then $(CYGPATH_W) 'merge.c'; else $(CYGPATH_W) '$(srcdir)/merge.c'; fi` +@am__fastdepCC_TRUE@ $(AM_V_at)$(am__mv) $(DEPDIR)/gmapl_ssse3-merge.Tpo $(DEPDIR)/gmapl_ssse3-merge.Po +@AMDEP_TRUE@@am__fastdepCC_FALSE@ $(AM_V_CC)source='merge.c' object='gmapl_ssse3-merge.obj' libtool=no @AMDEPBACKSLASH@ +@AMDEP_TRUE@@am__fastdepCC_FALSE@ DEPDIR=$(DEPDIR) $(CCDEPMODE) $(depcomp) @AMDEPBACKSLASH@ +@am__fastdepCC_FALSE@ $(AM_V_CC@am__nodep@)$(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) $(gmapl_ssse3_CFLAGS) $(CFLAGS) -c -o gmapl_ssse3-merge.obj `if test -f 'merge.c'; then $(CYGPATH_W) 'merge.c'; else $(CYGPATH_W) '$(srcdir)/merge.c'; fi` + gmapl_ssse3-indexdb.o: indexdb.c @am__fastdepCC_TRUE@ $(AM_V_CC)$(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) $(gmapl_ssse3_CFLAGS) $(CFLAGS) -MT gmapl_ssse3-indexdb.o -MD -MP -MF $(DEPDIR)/gmapl_ssse3-indexdb.Tpo -c -o gmapl_ssse3-indexdb.o `test -f 'indexdb.c' || echo '$(srcdir)/'`indexdb.c @am__fastdepCC_TRUE@ $(AM_V_at)$(am__mv) $(DEPDIR)/gmapl_ssse3-indexdb.Tpo $(DEPDIR)/gmapl_ssse3-indexdb.Po @@ -22826,6 +26184,20 @@ @AMDEP_TRUE@@am__fastdepCC_FALSE@ DEPDIR=$(DEPDIR) $(CCDEPMODE) $(depcomp) @AMDEPBACKSLASH@ @am__fastdepCC_FALSE@ $(AM_V_CC@am__nodep@)$(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) $(gsnap_avx2_CFLAGS) $(CFLAGS) -c -o gsnap_avx2-bitpack64-readtwo.obj `if test -f 'bitpack64-readtwo.c'; then $(CYGPATH_W) 'bitpack64-readtwo.c'; else $(CYGPATH_W) '$(srcdir)/bitpack64-readtwo.c'; fi` +gsnap_avx2-merge.o: merge.c +@am__fastdepCC_TRUE@ $(AM_V_CC)$(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) $(gsnap_avx2_CFLAGS) $(CFLAGS) -MT gsnap_avx2-merge.o -MD -MP -MF $(DEPDIR)/gsnap_avx2-merge.Tpo -c -o gsnap_avx2-merge.o `test -f 'merge.c' || echo '$(srcdir)/'`merge.c +@am__fastdepCC_TRUE@ $(AM_V_at)$(am__mv) $(DEPDIR)/gsnap_avx2-merge.Tpo $(DEPDIR)/gsnap_avx2-merge.Po +@AMDEP_TRUE@@am__fastdepCC_FALSE@ $(AM_V_CC)source='merge.c' object='gsnap_avx2-merge.o' libtool=no @AMDEPBACKSLASH@ +@AMDEP_TRUE@@am__fastdepCC_FALSE@ DEPDIR=$(DEPDIR) $(CCDEPMODE) $(depcomp) @AMDEPBACKSLASH@ +@am__fastdepCC_FALSE@ $(AM_V_CC@am__nodep@)$(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) $(gsnap_avx2_CFLAGS) $(CFLAGS) -c -o gsnap_avx2-merge.o `test -f 'merge.c' || echo '$(srcdir)/'`merge.c + +gsnap_avx2-merge.obj: merge.c +@am__fastdepCC_TRUE@ $(AM_V_CC)$(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) $(gsnap_avx2_CFLAGS) $(CFLAGS) -MT gsnap_avx2-merge.obj -MD -MP -MF $(DEPDIR)/gsnap_avx2-merge.Tpo -c -o gsnap_avx2-merge.obj `if test -f 'merge.c'; then $(CYGPATH_W) 'merge.c'; else $(CYGPATH_W) '$(srcdir)/merge.c'; fi` +@am__fastdepCC_TRUE@ $(AM_V_at)$(am__mv) $(DEPDIR)/gsnap_avx2-merge.Tpo $(DEPDIR)/gsnap_avx2-merge.Po +@AMDEP_TRUE@@am__fastdepCC_FALSE@ $(AM_V_CC)source='merge.c' object='gsnap_avx2-merge.obj' libtool=no @AMDEPBACKSLASH@ +@AMDEP_TRUE@@am__fastdepCC_FALSE@ DEPDIR=$(DEPDIR) $(CCDEPMODE) $(depcomp) @AMDEPBACKSLASH@ +@am__fastdepCC_FALSE@ $(AM_V_CC@am__nodep@)$(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) $(gsnap_avx2_CFLAGS) $(CFLAGS) -c -o gsnap_avx2-merge.obj `if test -f 'merge.c'; then $(CYGPATH_W) 'merge.c'; else $(CYGPATH_W) '$(srcdir)/merge.c'; fi` + gsnap_avx2-indexdb.o: indexdb.c @am__fastdepCC_TRUE@ $(AM_V_CC)$(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) $(gsnap_avx2_CFLAGS) $(CFLAGS) -MT gsnap_avx2-indexdb.o -MD -MP -MF $(DEPDIR)/gsnap_avx2-indexdb.Tpo -c -o gsnap_avx2-indexdb.o `test -f 'indexdb.c' || echo '$(srcdir)/'`indexdb.c @am__fastdepCC_TRUE@ $(AM_V_at)$(am__mv) $(DEPDIR)/gsnap_avx2-indexdb.Tpo $(DEPDIR)/gsnap_avx2-indexdb.Po @@ -23526,6 +26898,20 @@ @AMDEP_TRUE@@am__fastdepCC_FALSE@ DEPDIR=$(DEPDIR) $(CCDEPMODE) $(depcomp) @AMDEPBACKSLASH@ @am__fastdepCC_FALSE@ $(AM_V_CC@am__nodep@)$(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) $(gsnap_avx2_CFLAGS) $(CFLAGS) -c -o gsnap_avx2-sarray-read.obj `if test -f 'sarray-read.c'; then $(CYGPATH_W) 'sarray-read.c'; else $(CYGPATH_W) '$(srcdir)/sarray-read.c'; fi` +gsnap_avx2-merge-heap.o: merge-heap.c +@am__fastdepCC_TRUE@ $(AM_V_CC)$(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) $(gsnap_avx2_CFLAGS) $(CFLAGS) -MT gsnap_avx2-merge-heap.o -MD -MP -MF $(DEPDIR)/gsnap_avx2-merge-heap.Tpo -c -o gsnap_avx2-merge-heap.o `test -f 'merge-heap.c' || echo '$(srcdir)/'`merge-heap.c +@am__fastdepCC_TRUE@ $(AM_V_at)$(am__mv) $(DEPDIR)/gsnap_avx2-merge-heap.Tpo $(DEPDIR)/gsnap_avx2-merge-heap.Po +@AMDEP_TRUE@@am__fastdepCC_FALSE@ $(AM_V_CC)source='merge-heap.c' object='gsnap_avx2-merge-heap.o' libtool=no @AMDEPBACKSLASH@ +@AMDEP_TRUE@@am__fastdepCC_FALSE@ DEPDIR=$(DEPDIR) $(CCDEPMODE) $(depcomp) @AMDEPBACKSLASH@ +@am__fastdepCC_FALSE@ $(AM_V_CC@am__nodep@)$(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) $(gsnap_avx2_CFLAGS) $(CFLAGS) -c -o gsnap_avx2-merge-heap.o `test -f 'merge-heap.c' || echo '$(srcdir)/'`merge-heap.c + +gsnap_avx2-merge-heap.obj: merge-heap.c +@am__fastdepCC_TRUE@ $(AM_V_CC)$(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) $(gsnap_avx2_CFLAGS) $(CFLAGS) -MT gsnap_avx2-merge-heap.obj -MD -MP -MF $(DEPDIR)/gsnap_avx2-merge-heap.Tpo -c -o gsnap_avx2-merge-heap.obj `if test -f 'merge-heap.c'; then $(CYGPATH_W) 'merge-heap.c'; else $(CYGPATH_W) '$(srcdir)/merge-heap.c'; fi` +@am__fastdepCC_TRUE@ $(AM_V_at)$(am__mv) $(DEPDIR)/gsnap_avx2-merge-heap.Tpo $(DEPDIR)/gsnap_avx2-merge-heap.Po +@AMDEP_TRUE@@am__fastdepCC_FALSE@ $(AM_V_CC)source='merge-heap.c' object='gsnap_avx2-merge-heap.obj' libtool=no @AMDEPBACKSLASH@ +@AMDEP_TRUE@@am__fastdepCC_FALSE@ DEPDIR=$(DEPDIR) $(CCDEPMODE) $(depcomp) @AMDEPBACKSLASH@ +@am__fastdepCC_FALSE@ $(AM_V_CC@am__nodep@)$(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) $(gsnap_avx2_CFLAGS) $(CFLAGS) -c -o gsnap_avx2-merge-heap.obj `if test -f 'merge-heap.c'; then $(CYGPATH_W) 'merge-heap.c'; else $(CYGPATH_W) '$(srcdir)/merge-heap.c'; fi` + gsnap_avx2-stage1hr.o: stage1hr.c @am__fastdepCC_TRUE@ $(AM_V_CC)$(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) $(gsnap_avx2_CFLAGS) $(CFLAGS) -MT gsnap_avx2-stage1hr.o -MD -MP -MF $(DEPDIR)/gsnap_avx2-stage1hr.Tpo -c -o gsnap_avx2-stage1hr.o `test -f 'stage1hr.c' || echo '$(srcdir)/'`stage1hr.c @am__fastdepCC_TRUE@ $(AM_V_at)$(am__mv) $(DEPDIR)/gsnap_avx2-stage1hr.Tpo $(DEPDIR)/gsnap_avx2-stage1hr.Po @@ -23552,147 +26938,1435 @@ @am__fastdepCC_TRUE@ $(AM_V_at)$(am__mv) $(DEPDIR)/gsnap_avx2-request.Tpo $(DEPDIR)/gsnap_avx2-request.Po @AMDEP_TRUE@@am__fastdepCC_FALSE@ $(AM_V_CC)source='request.c' object='gsnap_avx2-request.obj' libtool=no @AMDEPBACKSLASH@ @AMDEP_TRUE@@am__fastdepCC_FALSE@ DEPDIR=$(DEPDIR) $(CCDEPMODE) $(depcomp) @AMDEPBACKSLASH@ -@am__fastdepCC_FALSE@ $(AM_V_CC@am__nodep@)$(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) $(gsnap_avx2_CFLAGS) $(CFLAGS) -c -o gsnap_avx2-request.obj `if test -f 'request.c'; then $(CYGPATH_W) 'request.c'; else $(CYGPATH_W) '$(srcdir)/request.c'; fi` +@am__fastdepCC_FALSE@ $(AM_V_CC@am__nodep@)$(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) $(gsnap_avx2_CFLAGS) $(CFLAGS) -c -o gsnap_avx2-request.obj `if test -f 'request.c'; then $(CYGPATH_W) 'request.c'; else $(CYGPATH_W) '$(srcdir)/request.c'; fi` + +gsnap_avx2-resulthr.o: resulthr.c +@am__fastdepCC_TRUE@ $(AM_V_CC)$(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) $(gsnap_avx2_CFLAGS) $(CFLAGS) -MT gsnap_avx2-resulthr.o -MD -MP -MF $(DEPDIR)/gsnap_avx2-resulthr.Tpo -c -o gsnap_avx2-resulthr.o `test -f 'resulthr.c' || echo '$(srcdir)/'`resulthr.c +@am__fastdepCC_TRUE@ $(AM_V_at)$(am__mv) $(DEPDIR)/gsnap_avx2-resulthr.Tpo $(DEPDIR)/gsnap_avx2-resulthr.Po +@AMDEP_TRUE@@am__fastdepCC_FALSE@ $(AM_V_CC)source='resulthr.c' object='gsnap_avx2-resulthr.o' libtool=no @AMDEPBACKSLASH@ +@AMDEP_TRUE@@am__fastdepCC_FALSE@ DEPDIR=$(DEPDIR) $(CCDEPMODE) $(depcomp) @AMDEPBACKSLASH@ +@am__fastdepCC_FALSE@ $(AM_V_CC@am__nodep@)$(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) $(gsnap_avx2_CFLAGS) $(CFLAGS) -c -o gsnap_avx2-resulthr.o `test -f 'resulthr.c' || echo '$(srcdir)/'`resulthr.c + +gsnap_avx2-resulthr.obj: resulthr.c +@am__fastdepCC_TRUE@ $(AM_V_CC)$(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) $(gsnap_avx2_CFLAGS) $(CFLAGS) -MT gsnap_avx2-resulthr.obj -MD -MP -MF $(DEPDIR)/gsnap_avx2-resulthr.Tpo -c -o gsnap_avx2-resulthr.obj `if test -f 'resulthr.c'; then $(CYGPATH_W) 'resulthr.c'; else $(CYGPATH_W) '$(srcdir)/resulthr.c'; fi` +@am__fastdepCC_TRUE@ $(AM_V_at)$(am__mv) $(DEPDIR)/gsnap_avx2-resulthr.Tpo $(DEPDIR)/gsnap_avx2-resulthr.Po +@AMDEP_TRUE@@am__fastdepCC_FALSE@ $(AM_V_CC)source='resulthr.c' object='gsnap_avx2-resulthr.obj' libtool=no @AMDEPBACKSLASH@ +@AMDEP_TRUE@@am__fastdepCC_FALSE@ DEPDIR=$(DEPDIR) $(CCDEPMODE) $(depcomp) @AMDEPBACKSLASH@ +@am__fastdepCC_FALSE@ $(AM_V_CC@am__nodep@)$(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) $(gsnap_avx2_CFLAGS) $(CFLAGS) -c -o gsnap_avx2-resulthr.obj `if test -f 'resulthr.c'; then $(CYGPATH_W) 'resulthr.c'; else $(CYGPATH_W) '$(srcdir)/resulthr.c'; fi` + +gsnap_avx2-output.o: output.c +@am__fastdepCC_TRUE@ $(AM_V_CC)$(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) $(gsnap_avx2_CFLAGS) $(CFLAGS) -MT gsnap_avx2-output.o -MD -MP -MF $(DEPDIR)/gsnap_avx2-output.Tpo -c -o gsnap_avx2-output.o `test -f 'output.c' || echo '$(srcdir)/'`output.c +@am__fastdepCC_TRUE@ $(AM_V_at)$(am__mv) $(DEPDIR)/gsnap_avx2-output.Tpo $(DEPDIR)/gsnap_avx2-output.Po +@AMDEP_TRUE@@am__fastdepCC_FALSE@ $(AM_V_CC)source='output.c' object='gsnap_avx2-output.o' libtool=no @AMDEPBACKSLASH@ +@AMDEP_TRUE@@am__fastdepCC_FALSE@ DEPDIR=$(DEPDIR) $(CCDEPMODE) $(depcomp) @AMDEPBACKSLASH@ +@am__fastdepCC_FALSE@ $(AM_V_CC@am__nodep@)$(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) $(gsnap_avx2_CFLAGS) $(CFLAGS) -c -o gsnap_avx2-output.o `test -f 'output.c' || echo '$(srcdir)/'`output.c + +gsnap_avx2-output.obj: output.c +@am__fastdepCC_TRUE@ $(AM_V_CC)$(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) $(gsnap_avx2_CFLAGS) $(CFLAGS) -MT gsnap_avx2-output.obj -MD -MP -MF $(DEPDIR)/gsnap_avx2-output.Tpo -c -o gsnap_avx2-output.obj `if test -f 'output.c'; then $(CYGPATH_W) 'output.c'; else $(CYGPATH_W) '$(srcdir)/output.c'; fi` +@am__fastdepCC_TRUE@ $(AM_V_at)$(am__mv) $(DEPDIR)/gsnap_avx2-output.Tpo $(DEPDIR)/gsnap_avx2-output.Po +@AMDEP_TRUE@@am__fastdepCC_FALSE@ $(AM_V_CC)source='output.c' object='gsnap_avx2-output.obj' libtool=no @AMDEPBACKSLASH@ +@AMDEP_TRUE@@am__fastdepCC_FALSE@ DEPDIR=$(DEPDIR) $(CCDEPMODE) $(depcomp) @AMDEPBACKSLASH@ +@am__fastdepCC_FALSE@ $(AM_V_CC@am__nodep@)$(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) $(gsnap_avx2_CFLAGS) $(CFLAGS) -c -o gsnap_avx2-output.obj `if test -f 'output.c'; then $(CYGPATH_W) 'output.c'; else $(CYGPATH_W) '$(srcdir)/output.c'; fi` + +gsnap_avx2-inbuffer.o: inbuffer.c +@am__fastdepCC_TRUE@ $(AM_V_CC)$(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) $(gsnap_avx2_CFLAGS) $(CFLAGS) -MT gsnap_avx2-inbuffer.o -MD -MP -MF $(DEPDIR)/gsnap_avx2-inbuffer.Tpo -c -o gsnap_avx2-inbuffer.o `test -f 'inbuffer.c' || echo '$(srcdir)/'`inbuffer.c +@am__fastdepCC_TRUE@ $(AM_V_at)$(am__mv) $(DEPDIR)/gsnap_avx2-inbuffer.Tpo $(DEPDIR)/gsnap_avx2-inbuffer.Po +@AMDEP_TRUE@@am__fastdepCC_FALSE@ $(AM_V_CC)source='inbuffer.c' object='gsnap_avx2-inbuffer.o' libtool=no @AMDEPBACKSLASH@ +@AMDEP_TRUE@@am__fastdepCC_FALSE@ DEPDIR=$(DEPDIR) $(CCDEPMODE) $(depcomp) @AMDEPBACKSLASH@ +@am__fastdepCC_FALSE@ $(AM_V_CC@am__nodep@)$(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) $(gsnap_avx2_CFLAGS) $(CFLAGS) -c -o gsnap_avx2-inbuffer.o `test -f 'inbuffer.c' || echo '$(srcdir)/'`inbuffer.c + +gsnap_avx2-inbuffer.obj: inbuffer.c +@am__fastdepCC_TRUE@ $(AM_V_CC)$(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) $(gsnap_avx2_CFLAGS) $(CFLAGS) -MT gsnap_avx2-inbuffer.obj -MD -MP -MF $(DEPDIR)/gsnap_avx2-inbuffer.Tpo -c -o gsnap_avx2-inbuffer.obj `if test -f 'inbuffer.c'; then $(CYGPATH_W) 'inbuffer.c'; else $(CYGPATH_W) '$(srcdir)/inbuffer.c'; fi` +@am__fastdepCC_TRUE@ $(AM_V_at)$(am__mv) $(DEPDIR)/gsnap_avx2-inbuffer.Tpo $(DEPDIR)/gsnap_avx2-inbuffer.Po +@AMDEP_TRUE@@am__fastdepCC_FALSE@ $(AM_V_CC)source='inbuffer.c' object='gsnap_avx2-inbuffer.obj' libtool=no @AMDEPBACKSLASH@ +@AMDEP_TRUE@@am__fastdepCC_FALSE@ DEPDIR=$(DEPDIR) $(CCDEPMODE) $(depcomp) @AMDEPBACKSLASH@ +@am__fastdepCC_FALSE@ $(AM_V_CC@am__nodep@)$(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) $(gsnap_avx2_CFLAGS) $(CFLAGS) -c -o gsnap_avx2-inbuffer.obj `if test -f 'inbuffer.c'; then $(CYGPATH_W) 'inbuffer.c'; else $(CYGPATH_W) '$(srcdir)/inbuffer.c'; fi` + +gsnap_avx2-samheader.o: samheader.c +@am__fastdepCC_TRUE@ $(AM_V_CC)$(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) $(gsnap_avx2_CFLAGS) $(CFLAGS) -MT gsnap_avx2-samheader.o -MD -MP -MF $(DEPDIR)/gsnap_avx2-samheader.Tpo -c -o gsnap_avx2-samheader.o `test -f 'samheader.c' || echo '$(srcdir)/'`samheader.c +@am__fastdepCC_TRUE@ $(AM_V_at)$(am__mv) $(DEPDIR)/gsnap_avx2-samheader.Tpo $(DEPDIR)/gsnap_avx2-samheader.Po +@AMDEP_TRUE@@am__fastdepCC_FALSE@ $(AM_V_CC)source='samheader.c' object='gsnap_avx2-samheader.o' libtool=no @AMDEPBACKSLASH@ +@AMDEP_TRUE@@am__fastdepCC_FALSE@ DEPDIR=$(DEPDIR) $(CCDEPMODE) $(depcomp) @AMDEPBACKSLASH@ +@am__fastdepCC_FALSE@ $(AM_V_CC@am__nodep@)$(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) $(gsnap_avx2_CFLAGS) $(CFLAGS) -c -o gsnap_avx2-samheader.o `test -f 'samheader.c' || echo '$(srcdir)/'`samheader.c + +gsnap_avx2-samheader.obj: samheader.c +@am__fastdepCC_TRUE@ $(AM_V_CC)$(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) $(gsnap_avx2_CFLAGS) $(CFLAGS) -MT gsnap_avx2-samheader.obj -MD -MP -MF $(DEPDIR)/gsnap_avx2-samheader.Tpo -c -o gsnap_avx2-samheader.obj `if test -f 'samheader.c'; then $(CYGPATH_W) 'samheader.c'; else $(CYGPATH_W) '$(srcdir)/samheader.c'; fi` +@am__fastdepCC_TRUE@ $(AM_V_at)$(am__mv) $(DEPDIR)/gsnap_avx2-samheader.Tpo $(DEPDIR)/gsnap_avx2-samheader.Po +@AMDEP_TRUE@@am__fastdepCC_FALSE@ $(AM_V_CC)source='samheader.c' object='gsnap_avx2-samheader.obj' libtool=no @AMDEPBACKSLASH@ +@AMDEP_TRUE@@am__fastdepCC_FALSE@ DEPDIR=$(DEPDIR) $(CCDEPMODE) $(depcomp) @AMDEPBACKSLASH@ +@am__fastdepCC_FALSE@ $(AM_V_CC@am__nodep@)$(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) $(gsnap_avx2_CFLAGS) $(CFLAGS) -c -o gsnap_avx2-samheader.obj `if test -f 'samheader.c'; then $(CYGPATH_W) 'samheader.c'; else $(CYGPATH_W) '$(srcdir)/samheader.c'; fi` + +gsnap_avx2-outbuffer.o: outbuffer.c +@am__fastdepCC_TRUE@ $(AM_V_CC)$(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) $(gsnap_avx2_CFLAGS) $(CFLAGS) -MT gsnap_avx2-outbuffer.o -MD -MP -MF $(DEPDIR)/gsnap_avx2-outbuffer.Tpo -c -o gsnap_avx2-outbuffer.o `test -f 'outbuffer.c' || echo '$(srcdir)/'`outbuffer.c +@am__fastdepCC_TRUE@ $(AM_V_at)$(am__mv) $(DEPDIR)/gsnap_avx2-outbuffer.Tpo $(DEPDIR)/gsnap_avx2-outbuffer.Po +@AMDEP_TRUE@@am__fastdepCC_FALSE@ $(AM_V_CC)source='outbuffer.c' object='gsnap_avx2-outbuffer.o' libtool=no @AMDEPBACKSLASH@ +@AMDEP_TRUE@@am__fastdepCC_FALSE@ DEPDIR=$(DEPDIR) $(CCDEPMODE) $(depcomp) @AMDEPBACKSLASH@ +@am__fastdepCC_FALSE@ $(AM_V_CC@am__nodep@)$(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) $(gsnap_avx2_CFLAGS) $(CFLAGS) -c -o gsnap_avx2-outbuffer.o `test -f 'outbuffer.c' || echo '$(srcdir)/'`outbuffer.c + +gsnap_avx2-outbuffer.obj: outbuffer.c +@am__fastdepCC_TRUE@ $(AM_V_CC)$(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) $(gsnap_avx2_CFLAGS) $(CFLAGS) -MT gsnap_avx2-outbuffer.obj -MD -MP -MF $(DEPDIR)/gsnap_avx2-outbuffer.Tpo -c -o gsnap_avx2-outbuffer.obj `if test -f 'outbuffer.c'; then $(CYGPATH_W) 'outbuffer.c'; else $(CYGPATH_W) '$(srcdir)/outbuffer.c'; fi` +@am__fastdepCC_TRUE@ $(AM_V_at)$(am__mv) $(DEPDIR)/gsnap_avx2-outbuffer.Tpo $(DEPDIR)/gsnap_avx2-outbuffer.Po +@AMDEP_TRUE@@am__fastdepCC_FALSE@ $(AM_V_CC)source='outbuffer.c' object='gsnap_avx2-outbuffer.obj' libtool=no @AMDEPBACKSLASH@ +@AMDEP_TRUE@@am__fastdepCC_FALSE@ DEPDIR=$(DEPDIR) $(CCDEPMODE) $(depcomp) @AMDEPBACKSLASH@ +@am__fastdepCC_FALSE@ $(AM_V_CC@am__nodep@)$(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) $(gsnap_avx2_CFLAGS) $(CFLAGS) -c -o gsnap_avx2-outbuffer.obj `if test -f 'outbuffer.c'; then $(CYGPATH_W) 'outbuffer.c'; else $(CYGPATH_W) '$(srcdir)/outbuffer.c'; fi` + +gsnap_avx2-datadir.o: datadir.c +@am__fastdepCC_TRUE@ $(AM_V_CC)$(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) $(gsnap_avx2_CFLAGS) $(CFLAGS) -MT gsnap_avx2-datadir.o -MD -MP -MF $(DEPDIR)/gsnap_avx2-datadir.Tpo -c -o gsnap_avx2-datadir.o `test -f 'datadir.c' || echo '$(srcdir)/'`datadir.c +@am__fastdepCC_TRUE@ $(AM_V_at)$(am__mv) $(DEPDIR)/gsnap_avx2-datadir.Tpo $(DEPDIR)/gsnap_avx2-datadir.Po +@AMDEP_TRUE@@am__fastdepCC_FALSE@ $(AM_V_CC)source='datadir.c' object='gsnap_avx2-datadir.o' libtool=no @AMDEPBACKSLASH@ +@AMDEP_TRUE@@am__fastdepCC_FALSE@ DEPDIR=$(DEPDIR) $(CCDEPMODE) $(depcomp) @AMDEPBACKSLASH@ +@am__fastdepCC_FALSE@ $(AM_V_CC@am__nodep@)$(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) $(gsnap_avx2_CFLAGS) $(CFLAGS) -c -o gsnap_avx2-datadir.o `test -f 'datadir.c' || echo '$(srcdir)/'`datadir.c + +gsnap_avx2-datadir.obj: datadir.c +@am__fastdepCC_TRUE@ $(AM_V_CC)$(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) $(gsnap_avx2_CFLAGS) $(CFLAGS) -MT gsnap_avx2-datadir.obj -MD -MP -MF $(DEPDIR)/gsnap_avx2-datadir.Tpo -c -o gsnap_avx2-datadir.obj `if test -f 'datadir.c'; then $(CYGPATH_W) 'datadir.c'; else $(CYGPATH_W) '$(srcdir)/datadir.c'; fi` +@am__fastdepCC_TRUE@ $(AM_V_at)$(am__mv) $(DEPDIR)/gsnap_avx2-datadir.Tpo $(DEPDIR)/gsnap_avx2-datadir.Po +@AMDEP_TRUE@@am__fastdepCC_FALSE@ $(AM_V_CC)source='datadir.c' object='gsnap_avx2-datadir.obj' libtool=no @AMDEPBACKSLASH@ +@AMDEP_TRUE@@am__fastdepCC_FALSE@ DEPDIR=$(DEPDIR) $(CCDEPMODE) $(depcomp) @AMDEPBACKSLASH@ +@am__fastdepCC_FALSE@ $(AM_V_CC@am__nodep@)$(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) $(gsnap_avx2_CFLAGS) $(CFLAGS) -c -o gsnap_avx2-datadir.obj `if test -f 'datadir.c'; then $(CYGPATH_W) 'datadir.c'; else $(CYGPATH_W) '$(srcdir)/datadir.c'; fi` + +gsnap_avx2-parserange.o: parserange.c +@am__fastdepCC_TRUE@ $(AM_V_CC)$(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) $(gsnap_avx2_CFLAGS) $(CFLAGS) -MT gsnap_avx2-parserange.o -MD -MP -MF $(DEPDIR)/gsnap_avx2-parserange.Tpo -c -o gsnap_avx2-parserange.o `test -f 'parserange.c' || echo '$(srcdir)/'`parserange.c +@am__fastdepCC_TRUE@ $(AM_V_at)$(am__mv) $(DEPDIR)/gsnap_avx2-parserange.Tpo $(DEPDIR)/gsnap_avx2-parserange.Po +@AMDEP_TRUE@@am__fastdepCC_FALSE@ $(AM_V_CC)source='parserange.c' object='gsnap_avx2-parserange.o' libtool=no @AMDEPBACKSLASH@ +@AMDEP_TRUE@@am__fastdepCC_FALSE@ DEPDIR=$(DEPDIR) $(CCDEPMODE) $(depcomp) @AMDEPBACKSLASH@ +@am__fastdepCC_FALSE@ $(AM_V_CC@am__nodep@)$(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) $(gsnap_avx2_CFLAGS) $(CFLAGS) -c -o gsnap_avx2-parserange.o `test -f 'parserange.c' || echo '$(srcdir)/'`parserange.c + +gsnap_avx2-parserange.obj: parserange.c +@am__fastdepCC_TRUE@ $(AM_V_CC)$(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) $(gsnap_avx2_CFLAGS) $(CFLAGS) -MT gsnap_avx2-parserange.obj -MD -MP -MF $(DEPDIR)/gsnap_avx2-parserange.Tpo -c -o gsnap_avx2-parserange.obj `if test -f 'parserange.c'; then $(CYGPATH_W) 'parserange.c'; else $(CYGPATH_W) '$(srcdir)/parserange.c'; fi` +@am__fastdepCC_TRUE@ $(AM_V_at)$(am__mv) $(DEPDIR)/gsnap_avx2-parserange.Tpo $(DEPDIR)/gsnap_avx2-parserange.Po +@AMDEP_TRUE@@am__fastdepCC_FALSE@ $(AM_V_CC)source='parserange.c' object='gsnap_avx2-parserange.obj' libtool=no @AMDEPBACKSLASH@ +@AMDEP_TRUE@@am__fastdepCC_FALSE@ DEPDIR=$(DEPDIR) $(CCDEPMODE) $(depcomp) @AMDEPBACKSLASH@ +@am__fastdepCC_FALSE@ $(AM_V_CC@am__nodep@)$(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) $(gsnap_avx2_CFLAGS) $(CFLAGS) -c -o gsnap_avx2-parserange.obj `if test -f 'parserange.c'; then $(CYGPATH_W) 'parserange.c'; else $(CYGPATH_W) '$(srcdir)/parserange.c'; fi` + +gsnap_avx2-getopt.o: getopt.c +@am__fastdepCC_TRUE@ $(AM_V_CC)$(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) $(gsnap_avx2_CFLAGS) $(CFLAGS) -MT gsnap_avx2-getopt.o -MD -MP -MF $(DEPDIR)/gsnap_avx2-getopt.Tpo -c -o gsnap_avx2-getopt.o `test -f 'getopt.c' || echo '$(srcdir)/'`getopt.c +@am__fastdepCC_TRUE@ $(AM_V_at)$(am__mv) $(DEPDIR)/gsnap_avx2-getopt.Tpo $(DEPDIR)/gsnap_avx2-getopt.Po +@AMDEP_TRUE@@am__fastdepCC_FALSE@ $(AM_V_CC)source='getopt.c' object='gsnap_avx2-getopt.o' libtool=no @AMDEPBACKSLASH@ +@AMDEP_TRUE@@am__fastdepCC_FALSE@ DEPDIR=$(DEPDIR) $(CCDEPMODE) $(depcomp) @AMDEPBACKSLASH@ +@am__fastdepCC_FALSE@ $(AM_V_CC@am__nodep@)$(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) $(gsnap_avx2_CFLAGS) $(CFLAGS) -c -o gsnap_avx2-getopt.o `test -f 'getopt.c' || echo '$(srcdir)/'`getopt.c + +gsnap_avx2-getopt.obj: getopt.c +@am__fastdepCC_TRUE@ $(AM_V_CC)$(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) $(gsnap_avx2_CFLAGS) $(CFLAGS) -MT gsnap_avx2-getopt.obj -MD -MP -MF $(DEPDIR)/gsnap_avx2-getopt.Tpo -c -o gsnap_avx2-getopt.obj `if test -f 'getopt.c'; then $(CYGPATH_W) 'getopt.c'; else $(CYGPATH_W) '$(srcdir)/getopt.c'; fi` +@am__fastdepCC_TRUE@ $(AM_V_at)$(am__mv) $(DEPDIR)/gsnap_avx2-getopt.Tpo $(DEPDIR)/gsnap_avx2-getopt.Po +@AMDEP_TRUE@@am__fastdepCC_FALSE@ $(AM_V_CC)source='getopt.c' object='gsnap_avx2-getopt.obj' libtool=no @AMDEPBACKSLASH@ +@AMDEP_TRUE@@am__fastdepCC_FALSE@ DEPDIR=$(DEPDIR) $(CCDEPMODE) $(depcomp) @AMDEPBACKSLASH@ +@am__fastdepCC_FALSE@ $(AM_V_CC@am__nodep@)$(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) $(gsnap_avx2_CFLAGS) $(CFLAGS) -c -o gsnap_avx2-getopt.obj `if test -f 'getopt.c'; then $(CYGPATH_W) 'getopt.c'; else $(CYGPATH_W) '$(srcdir)/getopt.c'; fi` + +gsnap_avx2-getopt1.o: getopt1.c +@am__fastdepCC_TRUE@ $(AM_V_CC)$(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) $(gsnap_avx2_CFLAGS) $(CFLAGS) -MT gsnap_avx2-getopt1.o -MD -MP -MF $(DEPDIR)/gsnap_avx2-getopt1.Tpo -c -o gsnap_avx2-getopt1.o `test -f 'getopt1.c' || echo '$(srcdir)/'`getopt1.c +@am__fastdepCC_TRUE@ $(AM_V_at)$(am__mv) $(DEPDIR)/gsnap_avx2-getopt1.Tpo $(DEPDIR)/gsnap_avx2-getopt1.Po +@AMDEP_TRUE@@am__fastdepCC_FALSE@ $(AM_V_CC)source='getopt1.c' object='gsnap_avx2-getopt1.o' libtool=no @AMDEPBACKSLASH@ +@AMDEP_TRUE@@am__fastdepCC_FALSE@ DEPDIR=$(DEPDIR) $(CCDEPMODE) $(depcomp) @AMDEPBACKSLASH@ +@am__fastdepCC_FALSE@ $(AM_V_CC@am__nodep@)$(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) $(gsnap_avx2_CFLAGS) $(CFLAGS) -c -o gsnap_avx2-getopt1.o `test -f 'getopt1.c' || echo '$(srcdir)/'`getopt1.c + +gsnap_avx2-getopt1.obj: getopt1.c +@am__fastdepCC_TRUE@ $(AM_V_CC)$(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) $(gsnap_avx2_CFLAGS) $(CFLAGS) -MT gsnap_avx2-getopt1.obj -MD -MP -MF $(DEPDIR)/gsnap_avx2-getopt1.Tpo -c -o gsnap_avx2-getopt1.obj `if test -f 'getopt1.c'; then $(CYGPATH_W) 'getopt1.c'; else $(CYGPATH_W) '$(srcdir)/getopt1.c'; fi` +@am__fastdepCC_TRUE@ $(AM_V_at)$(am__mv) $(DEPDIR)/gsnap_avx2-getopt1.Tpo $(DEPDIR)/gsnap_avx2-getopt1.Po +@AMDEP_TRUE@@am__fastdepCC_FALSE@ $(AM_V_CC)source='getopt1.c' object='gsnap_avx2-getopt1.obj' libtool=no @AMDEPBACKSLASH@ +@AMDEP_TRUE@@am__fastdepCC_FALSE@ DEPDIR=$(DEPDIR) $(CCDEPMODE) $(depcomp) @AMDEPBACKSLASH@ +@am__fastdepCC_FALSE@ $(AM_V_CC@am__nodep@)$(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) $(gsnap_avx2_CFLAGS) $(CFLAGS) -c -o gsnap_avx2-getopt1.obj `if test -f 'getopt1.c'; then $(CYGPATH_W) 'getopt1.c'; else $(CYGPATH_W) '$(srcdir)/getopt1.c'; fi` + +gsnap_avx2-gsnap.o: gsnap.c +@am__fastdepCC_TRUE@ $(AM_V_CC)$(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) $(gsnap_avx2_CFLAGS) $(CFLAGS) -MT gsnap_avx2-gsnap.o -MD -MP -MF $(DEPDIR)/gsnap_avx2-gsnap.Tpo -c -o gsnap_avx2-gsnap.o `test -f 'gsnap.c' || echo '$(srcdir)/'`gsnap.c +@am__fastdepCC_TRUE@ $(AM_V_at)$(am__mv) $(DEPDIR)/gsnap_avx2-gsnap.Tpo $(DEPDIR)/gsnap_avx2-gsnap.Po +@AMDEP_TRUE@@am__fastdepCC_FALSE@ $(AM_V_CC)source='gsnap.c' object='gsnap_avx2-gsnap.o' libtool=no @AMDEPBACKSLASH@ +@AMDEP_TRUE@@am__fastdepCC_FALSE@ DEPDIR=$(DEPDIR) $(CCDEPMODE) $(depcomp) @AMDEPBACKSLASH@ +@am__fastdepCC_FALSE@ $(AM_V_CC@am__nodep@)$(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) $(gsnap_avx2_CFLAGS) $(CFLAGS) -c -o gsnap_avx2-gsnap.o `test -f 'gsnap.c' || echo '$(srcdir)/'`gsnap.c + +gsnap_avx2-gsnap.obj: gsnap.c +@am__fastdepCC_TRUE@ $(AM_V_CC)$(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) $(gsnap_avx2_CFLAGS) $(CFLAGS) -MT gsnap_avx2-gsnap.obj -MD -MP -MF $(DEPDIR)/gsnap_avx2-gsnap.Tpo -c -o gsnap_avx2-gsnap.obj `if test -f 'gsnap.c'; then $(CYGPATH_W) 'gsnap.c'; else $(CYGPATH_W) '$(srcdir)/gsnap.c'; fi` +@am__fastdepCC_TRUE@ $(AM_V_at)$(am__mv) $(DEPDIR)/gsnap_avx2-gsnap.Tpo $(DEPDIR)/gsnap_avx2-gsnap.Po +@AMDEP_TRUE@@am__fastdepCC_FALSE@ $(AM_V_CC)source='gsnap.c' object='gsnap_avx2-gsnap.obj' libtool=no @AMDEPBACKSLASH@ +@AMDEP_TRUE@@am__fastdepCC_FALSE@ DEPDIR=$(DEPDIR) $(CCDEPMODE) $(depcomp) @AMDEPBACKSLASH@ +@am__fastdepCC_FALSE@ $(AM_V_CC@am__nodep@)$(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) $(gsnap_avx2_CFLAGS) $(CFLAGS) -c -o gsnap_avx2-gsnap.obj `if test -f 'gsnap.c'; then $(CYGPATH_W) 'gsnap.c'; else $(CYGPATH_W) '$(srcdir)/gsnap.c'; fi` + +gsnap_avx512-except.o: except.c +@am__fastdepCC_TRUE@ $(AM_V_CC)$(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) $(gsnap_avx512_CFLAGS) $(CFLAGS) -MT gsnap_avx512-except.o -MD -MP -MF $(DEPDIR)/gsnap_avx512-except.Tpo -c -o gsnap_avx512-except.o `test -f 'except.c' || echo '$(srcdir)/'`except.c +@am__fastdepCC_TRUE@ $(AM_V_at)$(am__mv) $(DEPDIR)/gsnap_avx512-except.Tpo $(DEPDIR)/gsnap_avx512-except.Po +@AMDEP_TRUE@@am__fastdepCC_FALSE@ $(AM_V_CC)source='except.c' object='gsnap_avx512-except.o' libtool=no @AMDEPBACKSLASH@ +@AMDEP_TRUE@@am__fastdepCC_FALSE@ DEPDIR=$(DEPDIR) $(CCDEPMODE) $(depcomp) @AMDEPBACKSLASH@ +@am__fastdepCC_FALSE@ $(AM_V_CC@am__nodep@)$(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) $(gsnap_avx512_CFLAGS) $(CFLAGS) -c -o gsnap_avx512-except.o `test -f 'except.c' || echo '$(srcdir)/'`except.c + +gsnap_avx512-except.obj: except.c +@am__fastdepCC_TRUE@ $(AM_V_CC)$(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) $(gsnap_avx512_CFLAGS) $(CFLAGS) -MT gsnap_avx512-except.obj -MD -MP -MF $(DEPDIR)/gsnap_avx512-except.Tpo -c -o gsnap_avx512-except.obj `if test -f 'except.c'; then $(CYGPATH_W) 'except.c'; else $(CYGPATH_W) '$(srcdir)/except.c'; fi` +@am__fastdepCC_TRUE@ $(AM_V_at)$(am__mv) $(DEPDIR)/gsnap_avx512-except.Tpo $(DEPDIR)/gsnap_avx512-except.Po +@AMDEP_TRUE@@am__fastdepCC_FALSE@ $(AM_V_CC)source='except.c' object='gsnap_avx512-except.obj' libtool=no @AMDEPBACKSLASH@ +@AMDEP_TRUE@@am__fastdepCC_FALSE@ DEPDIR=$(DEPDIR) $(CCDEPMODE) $(depcomp) @AMDEPBACKSLASH@ +@am__fastdepCC_FALSE@ $(AM_V_CC@am__nodep@)$(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) $(gsnap_avx512_CFLAGS) $(CFLAGS) -c -o gsnap_avx512-except.obj `if test -f 'except.c'; then $(CYGPATH_W) 'except.c'; else $(CYGPATH_W) '$(srcdir)/except.c'; fi` + +gsnap_avx512-assert.o: assert.c +@am__fastdepCC_TRUE@ $(AM_V_CC)$(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) $(gsnap_avx512_CFLAGS) $(CFLAGS) -MT gsnap_avx512-assert.o -MD -MP -MF $(DEPDIR)/gsnap_avx512-assert.Tpo -c -o gsnap_avx512-assert.o `test -f 'assert.c' || echo '$(srcdir)/'`assert.c +@am__fastdepCC_TRUE@ $(AM_V_at)$(am__mv) $(DEPDIR)/gsnap_avx512-assert.Tpo $(DEPDIR)/gsnap_avx512-assert.Po +@AMDEP_TRUE@@am__fastdepCC_FALSE@ $(AM_V_CC)source='assert.c' object='gsnap_avx512-assert.o' libtool=no @AMDEPBACKSLASH@ +@AMDEP_TRUE@@am__fastdepCC_FALSE@ DEPDIR=$(DEPDIR) $(CCDEPMODE) $(depcomp) @AMDEPBACKSLASH@ +@am__fastdepCC_FALSE@ $(AM_V_CC@am__nodep@)$(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) $(gsnap_avx512_CFLAGS) $(CFLAGS) -c -o gsnap_avx512-assert.o `test -f 'assert.c' || echo '$(srcdir)/'`assert.c + +gsnap_avx512-assert.obj: assert.c +@am__fastdepCC_TRUE@ $(AM_V_CC)$(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) $(gsnap_avx512_CFLAGS) $(CFLAGS) -MT gsnap_avx512-assert.obj -MD -MP -MF $(DEPDIR)/gsnap_avx512-assert.Tpo -c -o gsnap_avx512-assert.obj `if test -f 'assert.c'; then $(CYGPATH_W) 'assert.c'; else $(CYGPATH_W) '$(srcdir)/assert.c'; fi` +@am__fastdepCC_TRUE@ $(AM_V_at)$(am__mv) $(DEPDIR)/gsnap_avx512-assert.Tpo $(DEPDIR)/gsnap_avx512-assert.Po +@AMDEP_TRUE@@am__fastdepCC_FALSE@ $(AM_V_CC)source='assert.c' object='gsnap_avx512-assert.obj' libtool=no @AMDEPBACKSLASH@ +@AMDEP_TRUE@@am__fastdepCC_FALSE@ DEPDIR=$(DEPDIR) $(CCDEPMODE) $(depcomp) @AMDEPBACKSLASH@ +@am__fastdepCC_FALSE@ $(AM_V_CC@am__nodep@)$(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) $(gsnap_avx512_CFLAGS) $(CFLAGS) -c -o gsnap_avx512-assert.obj `if test -f 'assert.c'; then $(CYGPATH_W) 'assert.c'; else $(CYGPATH_W) '$(srcdir)/assert.c'; fi` + +gsnap_avx512-mem.o: mem.c +@am__fastdepCC_TRUE@ $(AM_V_CC)$(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) $(gsnap_avx512_CFLAGS) $(CFLAGS) -MT gsnap_avx512-mem.o -MD -MP -MF $(DEPDIR)/gsnap_avx512-mem.Tpo -c -o gsnap_avx512-mem.o `test -f 'mem.c' || echo '$(srcdir)/'`mem.c +@am__fastdepCC_TRUE@ $(AM_V_at)$(am__mv) $(DEPDIR)/gsnap_avx512-mem.Tpo $(DEPDIR)/gsnap_avx512-mem.Po +@AMDEP_TRUE@@am__fastdepCC_FALSE@ $(AM_V_CC)source='mem.c' object='gsnap_avx512-mem.o' libtool=no @AMDEPBACKSLASH@ +@AMDEP_TRUE@@am__fastdepCC_FALSE@ DEPDIR=$(DEPDIR) $(CCDEPMODE) $(depcomp) @AMDEPBACKSLASH@ +@am__fastdepCC_FALSE@ $(AM_V_CC@am__nodep@)$(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) $(gsnap_avx512_CFLAGS) $(CFLAGS) -c -o gsnap_avx512-mem.o `test -f 'mem.c' || echo '$(srcdir)/'`mem.c + +gsnap_avx512-mem.obj: mem.c +@am__fastdepCC_TRUE@ $(AM_V_CC)$(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) $(gsnap_avx512_CFLAGS) $(CFLAGS) -MT gsnap_avx512-mem.obj -MD -MP -MF $(DEPDIR)/gsnap_avx512-mem.Tpo -c -o gsnap_avx512-mem.obj `if test -f 'mem.c'; then $(CYGPATH_W) 'mem.c'; else $(CYGPATH_W) '$(srcdir)/mem.c'; fi` +@am__fastdepCC_TRUE@ $(AM_V_at)$(am__mv) $(DEPDIR)/gsnap_avx512-mem.Tpo $(DEPDIR)/gsnap_avx512-mem.Po +@AMDEP_TRUE@@am__fastdepCC_FALSE@ $(AM_V_CC)source='mem.c' object='gsnap_avx512-mem.obj' libtool=no @AMDEPBACKSLASH@ +@AMDEP_TRUE@@am__fastdepCC_FALSE@ DEPDIR=$(DEPDIR) $(CCDEPMODE) $(depcomp) @AMDEPBACKSLASH@ +@am__fastdepCC_FALSE@ $(AM_V_CC@am__nodep@)$(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) $(gsnap_avx512_CFLAGS) $(CFLAGS) -c -o gsnap_avx512-mem.obj `if test -f 'mem.c'; then $(CYGPATH_W) 'mem.c'; else $(CYGPATH_W) '$(srcdir)/mem.c'; fi` + +gsnap_avx512-intlist.o: intlist.c +@am__fastdepCC_TRUE@ $(AM_V_CC)$(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) $(gsnap_avx512_CFLAGS) $(CFLAGS) -MT gsnap_avx512-intlist.o -MD -MP -MF $(DEPDIR)/gsnap_avx512-intlist.Tpo -c -o gsnap_avx512-intlist.o `test -f 'intlist.c' || echo '$(srcdir)/'`intlist.c +@am__fastdepCC_TRUE@ $(AM_V_at)$(am__mv) $(DEPDIR)/gsnap_avx512-intlist.Tpo $(DEPDIR)/gsnap_avx512-intlist.Po +@AMDEP_TRUE@@am__fastdepCC_FALSE@ $(AM_V_CC)source='intlist.c' object='gsnap_avx512-intlist.o' libtool=no @AMDEPBACKSLASH@ +@AMDEP_TRUE@@am__fastdepCC_FALSE@ DEPDIR=$(DEPDIR) $(CCDEPMODE) $(depcomp) @AMDEPBACKSLASH@ +@am__fastdepCC_FALSE@ $(AM_V_CC@am__nodep@)$(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) $(gsnap_avx512_CFLAGS) $(CFLAGS) -c -o gsnap_avx512-intlist.o `test -f 'intlist.c' || echo '$(srcdir)/'`intlist.c + +gsnap_avx512-intlist.obj: intlist.c +@am__fastdepCC_TRUE@ $(AM_V_CC)$(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) $(gsnap_avx512_CFLAGS) $(CFLAGS) -MT gsnap_avx512-intlist.obj -MD -MP -MF $(DEPDIR)/gsnap_avx512-intlist.Tpo -c -o gsnap_avx512-intlist.obj `if test -f 'intlist.c'; then $(CYGPATH_W) 'intlist.c'; else $(CYGPATH_W) '$(srcdir)/intlist.c'; fi` +@am__fastdepCC_TRUE@ $(AM_V_at)$(am__mv) $(DEPDIR)/gsnap_avx512-intlist.Tpo $(DEPDIR)/gsnap_avx512-intlist.Po +@AMDEP_TRUE@@am__fastdepCC_FALSE@ $(AM_V_CC)source='intlist.c' object='gsnap_avx512-intlist.obj' libtool=no @AMDEPBACKSLASH@ +@AMDEP_TRUE@@am__fastdepCC_FALSE@ DEPDIR=$(DEPDIR) $(CCDEPMODE) $(depcomp) @AMDEPBACKSLASH@ +@am__fastdepCC_FALSE@ $(AM_V_CC@am__nodep@)$(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) $(gsnap_avx512_CFLAGS) $(CFLAGS) -c -o gsnap_avx512-intlist.obj `if test -f 'intlist.c'; then $(CYGPATH_W) 'intlist.c'; else $(CYGPATH_W) '$(srcdir)/intlist.c'; fi` + +gsnap_avx512-list.o: list.c +@am__fastdepCC_TRUE@ $(AM_V_CC)$(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) $(gsnap_avx512_CFLAGS) $(CFLAGS) -MT gsnap_avx512-list.o -MD -MP -MF $(DEPDIR)/gsnap_avx512-list.Tpo -c -o gsnap_avx512-list.o `test -f 'list.c' || echo '$(srcdir)/'`list.c +@am__fastdepCC_TRUE@ $(AM_V_at)$(am__mv) $(DEPDIR)/gsnap_avx512-list.Tpo $(DEPDIR)/gsnap_avx512-list.Po +@AMDEP_TRUE@@am__fastdepCC_FALSE@ $(AM_V_CC)source='list.c' object='gsnap_avx512-list.o' libtool=no @AMDEPBACKSLASH@ +@AMDEP_TRUE@@am__fastdepCC_FALSE@ DEPDIR=$(DEPDIR) $(CCDEPMODE) $(depcomp) @AMDEPBACKSLASH@ +@am__fastdepCC_FALSE@ $(AM_V_CC@am__nodep@)$(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) $(gsnap_avx512_CFLAGS) $(CFLAGS) -c -o gsnap_avx512-list.o `test -f 'list.c' || echo '$(srcdir)/'`list.c + +gsnap_avx512-list.obj: list.c +@am__fastdepCC_TRUE@ $(AM_V_CC)$(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) $(gsnap_avx512_CFLAGS) $(CFLAGS) -MT gsnap_avx512-list.obj -MD -MP -MF $(DEPDIR)/gsnap_avx512-list.Tpo -c -o gsnap_avx512-list.obj `if test -f 'list.c'; then $(CYGPATH_W) 'list.c'; else $(CYGPATH_W) '$(srcdir)/list.c'; fi` +@am__fastdepCC_TRUE@ $(AM_V_at)$(am__mv) $(DEPDIR)/gsnap_avx512-list.Tpo $(DEPDIR)/gsnap_avx512-list.Po +@AMDEP_TRUE@@am__fastdepCC_FALSE@ $(AM_V_CC)source='list.c' object='gsnap_avx512-list.obj' libtool=no @AMDEPBACKSLASH@ +@AMDEP_TRUE@@am__fastdepCC_FALSE@ DEPDIR=$(DEPDIR) $(CCDEPMODE) $(depcomp) @AMDEPBACKSLASH@ +@am__fastdepCC_FALSE@ $(AM_V_CC@am__nodep@)$(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) $(gsnap_avx512_CFLAGS) $(CFLAGS) -c -o gsnap_avx512-list.obj `if test -f 'list.c'; then $(CYGPATH_W) 'list.c'; else $(CYGPATH_W) '$(srcdir)/list.c'; fi` + +gsnap_avx512-littleendian.o: littleendian.c +@am__fastdepCC_TRUE@ $(AM_V_CC)$(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) $(gsnap_avx512_CFLAGS) $(CFLAGS) -MT gsnap_avx512-littleendian.o -MD -MP -MF $(DEPDIR)/gsnap_avx512-littleendian.Tpo -c -o gsnap_avx512-littleendian.o `test -f 'littleendian.c' || echo '$(srcdir)/'`littleendian.c +@am__fastdepCC_TRUE@ $(AM_V_at)$(am__mv) $(DEPDIR)/gsnap_avx512-littleendian.Tpo $(DEPDIR)/gsnap_avx512-littleendian.Po +@AMDEP_TRUE@@am__fastdepCC_FALSE@ $(AM_V_CC)source='littleendian.c' object='gsnap_avx512-littleendian.o' libtool=no @AMDEPBACKSLASH@ +@AMDEP_TRUE@@am__fastdepCC_FALSE@ DEPDIR=$(DEPDIR) $(CCDEPMODE) $(depcomp) @AMDEPBACKSLASH@ +@am__fastdepCC_FALSE@ $(AM_V_CC@am__nodep@)$(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) $(gsnap_avx512_CFLAGS) $(CFLAGS) -c -o gsnap_avx512-littleendian.o `test -f 'littleendian.c' || echo '$(srcdir)/'`littleendian.c + +gsnap_avx512-littleendian.obj: littleendian.c +@am__fastdepCC_TRUE@ $(AM_V_CC)$(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) $(gsnap_avx512_CFLAGS) $(CFLAGS) -MT gsnap_avx512-littleendian.obj -MD -MP -MF $(DEPDIR)/gsnap_avx512-littleendian.Tpo -c -o gsnap_avx512-littleendian.obj `if test -f 'littleendian.c'; then $(CYGPATH_W) 'littleendian.c'; else $(CYGPATH_W) '$(srcdir)/littleendian.c'; fi` +@am__fastdepCC_TRUE@ $(AM_V_at)$(am__mv) $(DEPDIR)/gsnap_avx512-littleendian.Tpo $(DEPDIR)/gsnap_avx512-littleendian.Po +@AMDEP_TRUE@@am__fastdepCC_FALSE@ $(AM_V_CC)source='littleendian.c' object='gsnap_avx512-littleendian.obj' libtool=no @AMDEPBACKSLASH@ +@AMDEP_TRUE@@am__fastdepCC_FALSE@ DEPDIR=$(DEPDIR) $(CCDEPMODE) $(depcomp) @AMDEPBACKSLASH@ +@am__fastdepCC_FALSE@ $(AM_V_CC@am__nodep@)$(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) $(gsnap_avx512_CFLAGS) $(CFLAGS) -c -o gsnap_avx512-littleendian.obj `if test -f 'littleendian.c'; then $(CYGPATH_W) 'littleendian.c'; else $(CYGPATH_W) '$(srcdir)/littleendian.c'; fi` + +gsnap_avx512-bigendian.o: bigendian.c +@am__fastdepCC_TRUE@ $(AM_V_CC)$(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) $(gsnap_avx512_CFLAGS) $(CFLAGS) -MT gsnap_avx512-bigendian.o -MD -MP -MF $(DEPDIR)/gsnap_avx512-bigendian.Tpo -c -o gsnap_avx512-bigendian.o `test -f 'bigendian.c' || echo '$(srcdir)/'`bigendian.c +@am__fastdepCC_TRUE@ $(AM_V_at)$(am__mv) $(DEPDIR)/gsnap_avx512-bigendian.Tpo $(DEPDIR)/gsnap_avx512-bigendian.Po +@AMDEP_TRUE@@am__fastdepCC_FALSE@ $(AM_V_CC)source='bigendian.c' object='gsnap_avx512-bigendian.o' libtool=no @AMDEPBACKSLASH@ +@AMDEP_TRUE@@am__fastdepCC_FALSE@ DEPDIR=$(DEPDIR) $(CCDEPMODE) $(depcomp) @AMDEPBACKSLASH@ +@am__fastdepCC_FALSE@ $(AM_V_CC@am__nodep@)$(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) $(gsnap_avx512_CFLAGS) $(CFLAGS) -c -o gsnap_avx512-bigendian.o `test -f 'bigendian.c' || echo '$(srcdir)/'`bigendian.c + +gsnap_avx512-bigendian.obj: bigendian.c +@am__fastdepCC_TRUE@ $(AM_V_CC)$(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) $(gsnap_avx512_CFLAGS) $(CFLAGS) -MT gsnap_avx512-bigendian.obj -MD -MP -MF $(DEPDIR)/gsnap_avx512-bigendian.Tpo -c -o gsnap_avx512-bigendian.obj `if test -f 'bigendian.c'; then $(CYGPATH_W) 'bigendian.c'; else $(CYGPATH_W) '$(srcdir)/bigendian.c'; fi` +@am__fastdepCC_TRUE@ $(AM_V_at)$(am__mv) $(DEPDIR)/gsnap_avx512-bigendian.Tpo $(DEPDIR)/gsnap_avx512-bigendian.Po +@AMDEP_TRUE@@am__fastdepCC_FALSE@ $(AM_V_CC)source='bigendian.c' object='gsnap_avx512-bigendian.obj' libtool=no @AMDEPBACKSLASH@ +@AMDEP_TRUE@@am__fastdepCC_FALSE@ DEPDIR=$(DEPDIR) $(CCDEPMODE) $(depcomp) @AMDEPBACKSLASH@ +@am__fastdepCC_FALSE@ $(AM_V_CC@am__nodep@)$(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) $(gsnap_avx512_CFLAGS) $(CFLAGS) -c -o gsnap_avx512-bigendian.obj `if test -f 'bigendian.c'; then $(CYGPATH_W) 'bigendian.c'; else $(CYGPATH_W) '$(srcdir)/bigendian.c'; fi` + +gsnap_avx512-univinterval.o: univinterval.c +@am__fastdepCC_TRUE@ $(AM_V_CC)$(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) $(gsnap_avx512_CFLAGS) $(CFLAGS) -MT gsnap_avx512-univinterval.o -MD -MP -MF $(DEPDIR)/gsnap_avx512-univinterval.Tpo -c -o gsnap_avx512-univinterval.o `test -f 'univinterval.c' || echo '$(srcdir)/'`univinterval.c +@am__fastdepCC_TRUE@ $(AM_V_at)$(am__mv) $(DEPDIR)/gsnap_avx512-univinterval.Tpo $(DEPDIR)/gsnap_avx512-univinterval.Po +@AMDEP_TRUE@@am__fastdepCC_FALSE@ $(AM_V_CC)source='univinterval.c' object='gsnap_avx512-univinterval.o' libtool=no @AMDEPBACKSLASH@ +@AMDEP_TRUE@@am__fastdepCC_FALSE@ DEPDIR=$(DEPDIR) $(CCDEPMODE) $(depcomp) @AMDEPBACKSLASH@ +@am__fastdepCC_FALSE@ $(AM_V_CC@am__nodep@)$(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) $(gsnap_avx512_CFLAGS) $(CFLAGS) -c -o gsnap_avx512-univinterval.o `test -f 'univinterval.c' || echo '$(srcdir)/'`univinterval.c + +gsnap_avx512-univinterval.obj: univinterval.c +@am__fastdepCC_TRUE@ $(AM_V_CC)$(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) $(gsnap_avx512_CFLAGS) $(CFLAGS) -MT gsnap_avx512-univinterval.obj -MD -MP -MF $(DEPDIR)/gsnap_avx512-univinterval.Tpo -c -o gsnap_avx512-univinterval.obj `if test -f 'univinterval.c'; then $(CYGPATH_W) 'univinterval.c'; else $(CYGPATH_W) '$(srcdir)/univinterval.c'; fi` +@am__fastdepCC_TRUE@ $(AM_V_at)$(am__mv) $(DEPDIR)/gsnap_avx512-univinterval.Tpo $(DEPDIR)/gsnap_avx512-univinterval.Po +@AMDEP_TRUE@@am__fastdepCC_FALSE@ $(AM_V_CC)source='univinterval.c' object='gsnap_avx512-univinterval.obj' libtool=no @AMDEPBACKSLASH@ +@AMDEP_TRUE@@am__fastdepCC_FALSE@ DEPDIR=$(DEPDIR) $(CCDEPMODE) $(depcomp) @AMDEPBACKSLASH@ +@am__fastdepCC_FALSE@ $(AM_V_CC@am__nodep@)$(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) $(gsnap_avx512_CFLAGS) $(CFLAGS) -c -o gsnap_avx512-univinterval.obj `if test -f 'univinterval.c'; then $(CYGPATH_W) 'univinterval.c'; else $(CYGPATH_W) '$(srcdir)/univinterval.c'; fi` + +gsnap_avx512-interval.o: interval.c +@am__fastdepCC_TRUE@ $(AM_V_CC)$(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) $(gsnap_avx512_CFLAGS) $(CFLAGS) -MT gsnap_avx512-interval.o -MD -MP -MF $(DEPDIR)/gsnap_avx512-interval.Tpo -c -o gsnap_avx512-interval.o `test -f 'interval.c' || echo '$(srcdir)/'`interval.c +@am__fastdepCC_TRUE@ $(AM_V_at)$(am__mv) $(DEPDIR)/gsnap_avx512-interval.Tpo $(DEPDIR)/gsnap_avx512-interval.Po +@AMDEP_TRUE@@am__fastdepCC_FALSE@ $(AM_V_CC)source='interval.c' object='gsnap_avx512-interval.o' libtool=no @AMDEPBACKSLASH@ +@AMDEP_TRUE@@am__fastdepCC_FALSE@ DEPDIR=$(DEPDIR) $(CCDEPMODE) $(depcomp) @AMDEPBACKSLASH@ +@am__fastdepCC_FALSE@ $(AM_V_CC@am__nodep@)$(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) $(gsnap_avx512_CFLAGS) $(CFLAGS) -c -o gsnap_avx512-interval.o `test -f 'interval.c' || echo '$(srcdir)/'`interval.c + +gsnap_avx512-interval.obj: interval.c +@am__fastdepCC_TRUE@ $(AM_V_CC)$(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) $(gsnap_avx512_CFLAGS) $(CFLAGS) -MT gsnap_avx512-interval.obj -MD -MP -MF $(DEPDIR)/gsnap_avx512-interval.Tpo -c -o gsnap_avx512-interval.obj `if test -f 'interval.c'; then $(CYGPATH_W) 'interval.c'; else $(CYGPATH_W) '$(srcdir)/interval.c'; fi` +@am__fastdepCC_TRUE@ $(AM_V_at)$(am__mv) $(DEPDIR)/gsnap_avx512-interval.Tpo $(DEPDIR)/gsnap_avx512-interval.Po +@AMDEP_TRUE@@am__fastdepCC_FALSE@ $(AM_V_CC)source='interval.c' object='gsnap_avx512-interval.obj' libtool=no @AMDEPBACKSLASH@ +@AMDEP_TRUE@@am__fastdepCC_FALSE@ DEPDIR=$(DEPDIR) $(CCDEPMODE) $(depcomp) @AMDEPBACKSLASH@ +@am__fastdepCC_FALSE@ $(AM_V_CC@am__nodep@)$(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) $(gsnap_avx512_CFLAGS) $(CFLAGS) -c -o gsnap_avx512-interval.obj `if test -f 'interval.c'; then $(CYGPATH_W) 'interval.c'; else $(CYGPATH_W) '$(srcdir)/interval.c'; fi` + +gsnap_avx512-uintlist.o: uintlist.c +@am__fastdepCC_TRUE@ $(AM_V_CC)$(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) $(gsnap_avx512_CFLAGS) $(CFLAGS) -MT gsnap_avx512-uintlist.o -MD -MP -MF $(DEPDIR)/gsnap_avx512-uintlist.Tpo -c -o gsnap_avx512-uintlist.o `test -f 'uintlist.c' || echo '$(srcdir)/'`uintlist.c +@am__fastdepCC_TRUE@ $(AM_V_at)$(am__mv) $(DEPDIR)/gsnap_avx512-uintlist.Tpo $(DEPDIR)/gsnap_avx512-uintlist.Po +@AMDEP_TRUE@@am__fastdepCC_FALSE@ $(AM_V_CC)source='uintlist.c' object='gsnap_avx512-uintlist.o' libtool=no @AMDEPBACKSLASH@ +@AMDEP_TRUE@@am__fastdepCC_FALSE@ DEPDIR=$(DEPDIR) $(CCDEPMODE) $(depcomp) @AMDEPBACKSLASH@ +@am__fastdepCC_FALSE@ $(AM_V_CC@am__nodep@)$(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) $(gsnap_avx512_CFLAGS) $(CFLAGS) -c -o gsnap_avx512-uintlist.o `test -f 'uintlist.c' || echo '$(srcdir)/'`uintlist.c + +gsnap_avx512-uintlist.obj: uintlist.c +@am__fastdepCC_TRUE@ $(AM_V_CC)$(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) $(gsnap_avx512_CFLAGS) $(CFLAGS) -MT gsnap_avx512-uintlist.obj -MD -MP -MF $(DEPDIR)/gsnap_avx512-uintlist.Tpo -c -o gsnap_avx512-uintlist.obj `if test -f 'uintlist.c'; then $(CYGPATH_W) 'uintlist.c'; else $(CYGPATH_W) '$(srcdir)/uintlist.c'; fi` +@am__fastdepCC_TRUE@ $(AM_V_at)$(am__mv) $(DEPDIR)/gsnap_avx512-uintlist.Tpo $(DEPDIR)/gsnap_avx512-uintlist.Po +@AMDEP_TRUE@@am__fastdepCC_FALSE@ $(AM_V_CC)source='uintlist.c' object='gsnap_avx512-uintlist.obj' libtool=no @AMDEPBACKSLASH@ +@AMDEP_TRUE@@am__fastdepCC_FALSE@ DEPDIR=$(DEPDIR) $(CCDEPMODE) $(depcomp) @AMDEPBACKSLASH@ +@am__fastdepCC_FALSE@ $(AM_V_CC@am__nodep@)$(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) $(gsnap_avx512_CFLAGS) $(CFLAGS) -c -o gsnap_avx512-uintlist.obj `if test -f 'uintlist.c'; then $(CYGPATH_W) 'uintlist.c'; else $(CYGPATH_W) '$(srcdir)/uintlist.c'; fi` + +gsnap_avx512-stopwatch.o: stopwatch.c +@am__fastdepCC_TRUE@ $(AM_V_CC)$(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) $(gsnap_avx512_CFLAGS) $(CFLAGS) -MT gsnap_avx512-stopwatch.o -MD -MP -MF $(DEPDIR)/gsnap_avx512-stopwatch.Tpo -c -o gsnap_avx512-stopwatch.o `test -f 'stopwatch.c' || echo '$(srcdir)/'`stopwatch.c +@am__fastdepCC_TRUE@ $(AM_V_at)$(am__mv) $(DEPDIR)/gsnap_avx512-stopwatch.Tpo $(DEPDIR)/gsnap_avx512-stopwatch.Po +@AMDEP_TRUE@@am__fastdepCC_FALSE@ $(AM_V_CC)source='stopwatch.c' object='gsnap_avx512-stopwatch.o' libtool=no @AMDEPBACKSLASH@ +@AMDEP_TRUE@@am__fastdepCC_FALSE@ DEPDIR=$(DEPDIR) $(CCDEPMODE) $(depcomp) @AMDEPBACKSLASH@ +@am__fastdepCC_FALSE@ $(AM_V_CC@am__nodep@)$(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) $(gsnap_avx512_CFLAGS) $(CFLAGS) -c -o gsnap_avx512-stopwatch.o `test -f 'stopwatch.c' || echo '$(srcdir)/'`stopwatch.c + +gsnap_avx512-stopwatch.obj: stopwatch.c +@am__fastdepCC_TRUE@ $(AM_V_CC)$(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) $(gsnap_avx512_CFLAGS) $(CFLAGS) -MT gsnap_avx512-stopwatch.obj -MD -MP -MF $(DEPDIR)/gsnap_avx512-stopwatch.Tpo -c -o gsnap_avx512-stopwatch.obj `if test -f 'stopwatch.c'; then $(CYGPATH_W) 'stopwatch.c'; else $(CYGPATH_W) '$(srcdir)/stopwatch.c'; fi` +@am__fastdepCC_TRUE@ $(AM_V_at)$(am__mv) $(DEPDIR)/gsnap_avx512-stopwatch.Tpo $(DEPDIR)/gsnap_avx512-stopwatch.Po +@AMDEP_TRUE@@am__fastdepCC_FALSE@ $(AM_V_CC)source='stopwatch.c' object='gsnap_avx512-stopwatch.obj' libtool=no @AMDEPBACKSLASH@ +@AMDEP_TRUE@@am__fastdepCC_FALSE@ DEPDIR=$(DEPDIR) $(CCDEPMODE) $(depcomp) @AMDEPBACKSLASH@ +@am__fastdepCC_FALSE@ $(AM_V_CC@am__nodep@)$(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) $(gsnap_avx512_CFLAGS) $(CFLAGS) -c -o gsnap_avx512-stopwatch.obj `if test -f 'stopwatch.c'; then $(CYGPATH_W) 'stopwatch.c'; else $(CYGPATH_W) '$(srcdir)/stopwatch.c'; fi` + +gsnap_avx512-semaphore.o: semaphore.c +@am__fastdepCC_TRUE@ $(AM_V_CC)$(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) $(gsnap_avx512_CFLAGS) $(CFLAGS) -MT gsnap_avx512-semaphore.o -MD -MP -MF $(DEPDIR)/gsnap_avx512-semaphore.Tpo -c -o gsnap_avx512-semaphore.o `test -f 'semaphore.c' || echo '$(srcdir)/'`semaphore.c +@am__fastdepCC_TRUE@ $(AM_V_at)$(am__mv) $(DEPDIR)/gsnap_avx512-semaphore.Tpo $(DEPDIR)/gsnap_avx512-semaphore.Po +@AMDEP_TRUE@@am__fastdepCC_FALSE@ $(AM_V_CC)source='semaphore.c' object='gsnap_avx512-semaphore.o' libtool=no @AMDEPBACKSLASH@ +@AMDEP_TRUE@@am__fastdepCC_FALSE@ DEPDIR=$(DEPDIR) $(CCDEPMODE) $(depcomp) @AMDEPBACKSLASH@ +@am__fastdepCC_FALSE@ $(AM_V_CC@am__nodep@)$(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) $(gsnap_avx512_CFLAGS) $(CFLAGS) -c -o gsnap_avx512-semaphore.o `test -f 'semaphore.c' || echo '$(srcdir)/'`semaphore.c + +gsnap_avx512-semaphore.obj: semaphore.c +@am__fastdepCC_TRUE@ $(AM_V_CC)$(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) $(gsnap_avx512_CFLAGS) $(CFLAGS) -MT gsnap_avx512-semaphore.obj -MD -MP -MF $(DEPDIR)/gsnap_avx512-semaphore.Tpo -c -o gsnap_avx512-semaphore.obj `if test -f 'semaphore.c'; then $(CYGPATH_W) 'semaphore.c'; else $(CYGPATH_W) '$(srcdir)/semaphore.c'; fi` +@am__fastdepCC_TRUE@ $(AM_V_at)$(am__mv) $(DEPDIR)/gsnap_avx512-semaphore.Tpo $(DEPDIR)/gsnap_avx512-semaphore.Po +@AMDEP_TRUE@@am__fastdepCC_FALSE@ $(AM_V_CC)source='semaphore.c' object='gsnap_avx512-semaphore.obj' libtool=no @AMDEPBACKSLASH@ +@AMDEP_TRUE@@am__fastdepCC_FALSE@ DEPDIR=$(DEPDIR) $(CCDEPMODE) $(depcomp) @AMDEPBACKSLASH@ +@am__fastdepCC_FALSE@ $(AM_V_CC@am__nodep@)$(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) $(gsnap_avx512_CFLAGS) $(CFLAGS) -c -o gsnap_avx512-semaphore.obj `if test -f 'semaphore.c'; then $(CYGPATH_W) 'semaphore.c'; else $(CYGPATH_W) '$(srcdir)/semaphore.c'; fi` + +gsnap_avx512-access.o: access.c +@am__fastdepCC_TRUE@ $(AM_V_CC)$(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) $(gsnap_avx512_CFLAGS) $(CFLAGS) -MT gsnap_avx512-access.o -MD -MP -MF $(DEPDIR)/gsnap_avx512-access.Tpo -c -o gsnap_avx512-access.o `test -f 'access.c' || echo '$(srcdir)/'`access.c +@am__fastdepCC_TRUE@ $(AM_V_at)$(am__mv) $(DEPDIR)/gsnap_avx512-access.Tpo $(DEPDIR)/gsnap_avx512-access.Po +@AMDEP_TRUE@@am__fastdepCC_FALSE@ $(AM_V_CC)source='access.c' object='gsnap_avx512-access.o' libtool=no @AMDEPBACKSLASH@ +@AMDEP_TRUE@@am__fastdepCC_FALSE@ DEPDIR=$(DEPDIR) $(CCDEPMODE) $(depcomp) @AMDEPBACKSLASH@ +@am__fastdepCC_FALSE@ $(AM_V_CC@am__nodep@)$(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) $(gsnap_avx512_CFLAGS) $(CFLAGS) -c -o gsnap_avx512-access.o `test -f 'access.c' || echo '$(srcdir)/'`access.c + +gsnap_avx512-access.obj: access.c +@am__fastdepCC_TRUE@ $(AM_V_CC)$(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) $(gsnap_avx512_CFLAGS) $(CFLAGS) -MT gsnap_avx512-access.obj -MD -MP -MF $(DEPDIR)/gsnap_avx512-access.Tpo -c -o gsnap_avx512-access.obj `if test -f 'access.c'; then $(CYGPATH_W) 'access.c'; else $(CYGPATH_W) '$(srcdir)/access.c'; fi` +@am__fastdepCC_TRUE@ $(AM_V_at)$(am__mv) $(DEPDIR)/gsnap_avx512-access.Tpo $(DEPDIR)/gsnap_avx512-access.Po +@AMDEP_TRUE@@am__fastdepCC_FALSE@ $(AM_V_CC)source='access.c' object='gsnap_avx512-access.obj' libtool=no @AMDEPBACKSLASH@ +@AMDEP_TRUE@@am__fastdepCC_FALSE@ DEPDIR=$(DEPDIR) $(CCDEPMODE) $(depcomp) @AMDEPBACKSLASH@ +@am__fastdepCC_FALSE@ $(AM_V_CC@am__nodep@)$(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) $(gsnap_avx512_CFLAGS) $(CFLAGS) -c -o gsnap_avx512-access.obj `if test -f 'access.c'; then $(CYGPATH_W) 'access.c'; else $(CYGPATH_W) '$(srcdir)/access.c'; fi` + +gsnap_avx512-filestring.o: filestring.c +@am__fastdepCC_TRUE@ $(AM_V_CC)$(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) $(gsnap_avx512_CFLAGS) $(CFLAGS) -MT gsnap_avx512-filestring.o -MD -MP -MF $(DEPDIR)/gsnap_avx512-filestring.Tpo -c -o gsnap_avx512-filestring.o `test -f 'filestring.c' || echo '$(srcdir)/'`filestring.c +@am__fastdepCC_TRUE@ $(AM_V_at)$(am__mv) $(DEPDIR)/gsnap_avx512-filestring.Tpo $(DEPDIR)/gsnap_avx512-filestring.Po +@AMDEP_TRUE@@am__fastdepCC_FALSE@ $(AM_V_CC)source='filestring.c' object='gsnap_avx512-filestring.o' libtool=no @AMDEPBACKSLASH@ +@AMDEP_TRUE@@am__fastdepCC_FALSE@ DEPDIR=$(DEPDIR) $(CCDEPMODE) $(depcomp) @AMDEPBACKSLASH@ +@am__fastdepCC_FALSE@ $(AM_V_CC@am__nodep@)$(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) $(gsnap_avx512_CFLAGS) $(CFLAGS) -c -o gsnap_avx512-filestring.o `test -f 'filestring.c' || echo '$(srcdir)/'`filestring.c + +gsnap_avx512-filestring.obj: filestring.c +@am__fastdepCC_TRUE@ $(AM_V_CC)$(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) $(gsnap_avx512_CFLAGS) $(CFLAGS) -MT gsnap_avx512-filestring.obj -MD -MP -MF $(DEPDIR)/gsnap_avx512-filestring.Tpo -c -o gsnap_avx512-filestring.obj `if test -f 'filestring.c'; then $(CYGPATH_W) 'filestring.c'; else $(CYGPATH_W) '$(srcdir)/filestring.c'; fi` +@am__fastdepCC_TRUE@ $(AM_V_at)$(am__mv) $(DEPDIR)/gsnap_avx512-filestring.Tpo $(DEPDIR)/gsnap_avx512-filestring.Po +@AMDEP_TRUE@@am__fastdepCC_FALSE@ $(AM_V_CC)source='filestring.c' object='gsnap_avx512-filestring.obj' libtool=no @AMDEPBACKSLASH@ +@AMDEP_TRUE@@am__fastdepCC_FALSE@ DEPDIR=$(DEPDIR) $(CCDEPMODE) $(depcomp) @AMDEPBACKSLASH@ +@am__fastdepCC_FALSE@ $(AM_V_CC@am__nodep@)$(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) $(gsnap_avx512_CFLAGS) $(CFLAGS) -c -o gsnap_avx512-filestring.obj `if test -f 'filestring.c'; then $(CYGPATH_W) 'filestring.c'; else $(CYGPATH_W) '$(srcdir)/filestring.c'; fi` + +gsnap_avx512-iit-read-univ.o: iit-read-univ.c +@am__fastdepCC_TRUE@ $(AM_V_CC)$(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) $(gsnap_avx512_CFLAGS) $(CFLAGS) -MT gsnap_avx512-iit-read-univ.o -MD -MP -MF $(DEPDIR)/gsnap_avx512-iit-read-univ.Tpo -c -o gsnap_avx512-iit-read-univ.o `test -f 'iit-read-univ.c' || echo '$(srcdir)/'`iit-read-univ.c +@am__fastdepCC_TRUE@ $(AM_V_at)$(am__mv) $(DEPDIR)/gsnap_avx512-iit-read-univ.Tpo $(DEPDIR)/gsnap_avx512-iit-read-univ.Po +@AMDEP_TRUE@@am__fastdepCC_FALSE@ $(AM_V_CC)source='iit-read-univ.c' object='gsnap_avx512-iit-read-univ.o' libtool=no @AMDEPBACKSLASH@ +@AMDEP_TRUE@@am__fastdepCC_FALSE@ DEPDIR=$(DEPDIR) $(CCDEPMODE) $(depcomp) @AMDEPBACKSLASH@ +@am__fastdepCC_FALSE@ $(AM_V_CC@am__nodep@)$(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) $(gsnap_avx512_CFLAGS) $(CFLAGS) -c -o gsnap_avx512-iit-read-univ.o `test -f 'iit-read-univ.c' || echo '$(srcdir)/'`iit-read-univ.c + +gsnap_avx512-iit-read-univ.obj: iit-read-univ.c +@am__fastdepCC_TRUE@ $(AM_V_CC)$(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) $(gsnap_avx512_CFLAGS) $(CFLAGS) -MT gsnap_avx512-iit-read-univ.obj -MD -MP -MF $(DEPDIR)/gsnap_avx512-iit-read-univ.Tpo -c -o gsnap_avx512-iit-read-univ.obj `if test -f 'iit-read-univ.c'; then $(CYGPATH_W) 'iit-read-univ.c'; else $(CYGPATH_W) '$(srcdir)/iit-read-univ.c'; fi` +@am__fastdepCC_TRUE@ $(AM_V_at)$(am__mv) $(DEPDIR)/gsnap_avx512-iit-read-univ.Tpo $(DEPDIR)/gsnap_avx512-iit-read-univ.Po +@AMDEP_TRUE@@am__fastdepCC_FALSE@ $(AM_V_CC)source='iit-read-univ.c' object='gsnap_avx512-iit-read-univ.obj' libtool=no @AMDEPBACKSLASH@ +@AMDEP_TRUE@@am__fastdepCC_FALSE@ DEPDIR=$(DEPDIR) $(CCDEPMODE) $(depcomp) @AMDEPBACKSLASH@ +@am__fastdepCC_FALSE@ $(AM_V_CC@am__nodep@)$(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) $(gsnap_avx512_CFLAGS) $(CFLAGS) -c -o gsnap_avx512-iit-read-univ.obj `if test -f 'iit-read-univ.c'; then $(CYGPATH_W) 'iit-read-univ.c'; else $(CYGPATH_W) '$(srcdir)/iit-read-univ.c'; fi` + +gsnap_avx512-iit-read.o: iit-read.c +@am__fastdepCC_TRUE@ $(AM_V_CC)$(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) $(gsnap_avx512_CFLAGS) $(CFLAGS) -MT gsnap_avx512-iit-read.o -MD -MP -MF $(DEPDIR)/gsnap_avx512-iit-read.Tpo -c -o gsnap_avx512-iit-read.o `test -f 'iit-read.c' || echo '$(srcdir)/'`iit-read.c +@am__fastdepCC_TRUE@ $(AM_V_at)$(am__mv) $(DEPDIR)/gsnap_avx512-iit-read.Tpo $(DEPDIR)/gsnap_avx512-iit-read.Po +@AMDEP_TRUE@@am__fastdepCC_FALSE@ $(AM_V_CC)source='iit-read.c' object='gsnap_avx512-iit-read.o' libtool=no @AMDEPBACKSLASH@ +@AMDEP_TRUE@@am__fastdepCC_FALSE@ DEPDIR=$(DEPDIR) $(CCDEPMODE) $(depcomp) @AMDEPBACKSLASH@ +@am__fastdepCC_FALSE@ $(AM_V_CC@am__nodep@)$(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) $(gsnap_avx512_CFLAGS) $(CFLAGS) -c -o gsnap_avx512-iit-read.o `test -f 'iit-read.c' || echo '$(srcdir)/'`iit-read.c + +gsnap_avx512-iit-read.obj: iit-read.c +@am__fastdepCC_TRUE@ $(AM_V_CC)$(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) $(gsnap_avx512_CFLAGS) $(CFLAGS) -MT gsnap_avx512-iit-read.obj -MD -MP -MF $(DEPDIR)/gsnap_avx512-iit-read.Tpo -c -o gsnap_avx512-iit-read.obj `if test -f 'iit-read.c'; then $(CYGPATH_W) 'iit-read.c'; else $(CYGPATH_W) '$(srcdir)/iit-read.c'; fi` +@am__fastdepCC_TRUE@ $(AM_V_at)$(am__mv) $(DEPDIR)/gsnap_avx512-iit-read.Tpo $(DEPDIR)/gsnap_avx512-iit-read.Po +@AMDEP_TRUE@@am__fastdepCC_FALSE@ $(AM_V_CC)source='iit-read.c' object='gsnap_avx512-iit-read.obj' libtool=no @AMDEPBACKSLASH@ +@AMDEP_TRUE@@am__fastdepCC_FALSE@ DEPDIR=$(DEPDIR) $(CCDEPMODE) $(depcomp) @AMDEPBACKSLASH@ +@am__fastdepCC_FALSE@ $(AM_V_CC@am__nodep@)$(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) $(gsnap_avx512_CFLAGS) $(CFLAGS) -c -o gsnap_avx512-iit-read.obj `if test -f 'iit-read.c'; then $(CYGPATH_W) 'iit-read.c'; else $(CYGPATH_W) '$(srcdir)/iit-read.c'; fi` + +gsnap_avx512-md5.o: md5.c +@am__fastdepCC_TRUE@ $(AM_V_CC)$(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) $(gsnap_avx512_CFLAGS) $(CFLAGS) -MT gsnap_avx512-md5.o -MD -MP -MF $(DEPDIR)/gsnap_avx512-md5.Tpo -c -o gsnap_avx512-md5.o `test -f 'md5.c' || echo '$(srcdir)/'`md5.c +@am__fastdepCC_TRUE@ $(AM_V_at)$(am__mv) $(DEPDIR)/gsnap_avx512-md5.Tpo $(DEPDIR)/gsnap_avx512-md5.Po +@AMDEP_TRUE@@am__fastdepCC_FALSE@ $(AM_V_CC)source='md5.c' object='gsnap_avx512-md5.o' libtool=no @AMDEPBACKSLASH@ +@AMDEP_TRUE@@am__fastdepCC_FALSE@ DEPDIR=$(DEPDIR) $(CCDEPMODE) $(depcomp) @AMDEPBACKSLASH@ +@am__fastdepCC_FALSE@ $(AM_V_CC@am__nodep@)$(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) $(gsnap_avx512_CFLAGS) $(CFLAGS) -c -o gsnap_avx512-md5.o `test -f 'md5.c' || echo '$(srcdir)/'`md5.c + +gsnap_avx512-md5.obj: md5.c +@am__fastdepCC_TRUE@ $(AM_V_CC)$(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) $(gsnap_avx512_CFLAGS) $(CFLAGS) -MT gsnap_avx512-md5.obj -MD -MP -MF $(DEPDIR)/gsnap_avx512-md5.Tpo -c -o gsnap_avx512-md5.obj `if test -f 'md5.c'; then $(CYGPATH_W) 'md5.c'; else $(CYGPATH_W) '$(srcdir)/md5.c'; fi` +@am__fastdepCC_TRUE@ $(AM_V_at)$(am__mv) $(DEPDIR)/gsnap_avx512-md5.Tpo $(DEPDIR)/gsnap_avx512-md5.Po +@AMDEP_TRUE@@am__fastdepCC_FALSE@ $(AM_V_CC)source='md5.c' object='gsnap_avx512-md5.obj' libtool=no @AMDEPBACKSLASH@ +@AMDEP_TRUE@@am__fastdepCC_FALSE@ DEPDIR=$(DEPDIR) $(CCDEPMODE) $(depcomp) @AMDEPBACKSLASH@ +@am__fastdepCC_FALSE@ $(AM_V_CC@am__nodep@)$(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) $(gsnap_avx512_CFLAGS) $(CFLAGS) -c -o gsnap_avx512-md5.obj `if test -f 'md5.c'; then $(CYGPATH_W) 'md5.c'; else $(CYGPATH_W) '$(srcdir)/md5.c'; fi` + +gsnap_avx512-bzip2.o: bzip2.c +@am__fastdepCC_TRUE@ $(AM_V_CC)$(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) $(gsnap_avx512_CFLAGS) $(CFLAGS) -MT gsnap_avx512-bzip2.o -MD -MP -MF $(DEPDIR)/gsnap_avx512-bzip2.Tpo -c -o gsnap_avx512-bzip2.o `test -f 'bzip2.c' || echo '$(srcdir)/'`bzip2.c +@am__fastdepCC_TRUE@ $(AM_V_at)$(am__mv) $(DEPDIR)/gsnap_avx512-bzip2.Tpo $(DEPDIR)/gsnap_avx512-bzip2.Po +@AMDEP_TRUE@@am__fastdepCC_FALSE@ $(AM_V_CC)source='bzip2.c' object='gsnap_avx512-bzip2.o' libtool=no @AMDEPBACKSLASH@ +@AMDEP_TRUE@@am__fastdepCC_FALSE@ DEPDIR=$(DEPDIR) $(CCDEPMODE) $(depcomp) @AMDEPBACKSLASH@ +@am__fastdepCC_FALSE@ $(AM_V_CC@am__nodep@)$(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) $(gsnap_avx512_CFLAGS) $(CFLAGS) -c -o gsnap_avx512-bzip2.o `test -f 'bzip2.c' || echo '$(srcdir)/'`bzip2.c + +gsnap_avx512-bzip2.obj: bzip2.c +@am__fastdepCC_TRUE@ $(AM_V_CC)$(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) $(gsnap_avx512_CFLAGS) $(CFLAGS) -MT gsnap_avx512-bzip2.obj -MD -MP -MF $(DEPDIR)/gsnap_avx512-bzip2.Tpo -c -o gsnap_avx512-bzip2.obj `if test -f 'bzip2.c'; then $(CYGPATH_W) 'bzip2.c'; else $(CYGPATH_W) '$(srcdir)/bzip2.c'; fi` +@am__fastdepCC_TRUE@ $(AM_V_at)$(am__mv) $(DEPDIR)/gsnap_avx512-bzip2.Tpo $(DEPDIR)/gsnap_avx512-bzip2.Po +@AMDEP_TRUE@@am__fastdepCC_FALSE@ $(AM_V_CC)source='bzip2.c' object='gsnap_avx512-bzip2.obj' libtool=no @AMDEPBACKSLASH@ +@AMDEP_TRUE@@am__fastdepCC_FALSE@ DEPDIR=$(DEPDIR) $(CCDEPMODE) $(depcomp) @AMDEPBACKSLASH@ +@am__fastdepCC_FALSE@ $(AM_V_CC@am__nodep@)$(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) $(gsnap_avx512_CFLAGS) $(CFLAGS) -c -o gsnap_avx512-bzip2.obj `if test -f 'bzip2.c'; then $(CYGPATH_W) 'bzip2.c'; else $(CYGPATH_W) '$(srcdir)/bzip2.c'; fi` + +gsnap_avx512-sequence.o: sequence.c +@am__fastdepCC_TRUE@ $(AM_V_CC)$(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) $(gsnap_avx512_CFLAGS) $(CFLAGS) -MT gsnap_avx512-sequence.o -MD -MP -MF $(DEPDIR)/gsnap_avx512-sequence.Tpo -c -o gsnap_avx512-sequence.o `test -f 'sequence.c' || echo '$(srcdir)/'`sequence.c +@am__fastdepCC_TRUE@ $(AM_V_at)$(am__mv) $(DEPDIR)/gsnap_avx512-sequence.Tpo $(DEPDIR)/gsnap_avx512-sequence.Po +@AMDEP_TRUE@@am__fastdepCC_FALSE@ $(AM_V_CC)source='sequence.c' object='gsnap_avx512-sequence.o' libtool=no @AMDEPBACKSLASH@ +@AMDEP_TRUE@@am__fastdepCC_FALSE@ DEPDIR=$(DEPDIR) $(CCDEPMODE) $(depcomp) @AMDEPBACKSLASH@ +@am__fastdepCC_FALSE@ $(AM_V_CC@am__nodep@)$(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) $(gsnap_avx512_CFLAGS) $(CFLAGS) -c -o gsnap_avx512-sequence.o `test -f 'sequence.c' || echo '$(srcdir)/'`sequence.c + +gsnap_avx512-sequence.obj: sequence.c +@am__fastdepCC_TRUE@ $(AM_V_CC)$(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) $(gsnap_avx512_CFLAGS) $(CFLAGS) -MT gsnap_avx512-sequence.obj -MD -MP -MF $(DEPDIR)/gsnap_avx512-sequence.Tpo -c -o gsnap_avx512-sequence.obj `if test -f 'sequence.c'; then $(CYGPATH_W) 'sequence.c'; else $(CYGPATH_W) '$(srcdir)/sequence.c'; fi` +@am__fastdepCC_TRUE@ $(AM_V_at)$(am__mv) $(DEPDIR)/gsnap_avx512-sequence.Tpo $(DEPDIR)/gsnap_avx512-sequence.Po +@AMDEP_TRUE@@am__fastdepCC_FALSE@ $(AM_V_CC)source='sequence.c' object='gsnap_avx512-sequence.obj' libtool=no @AMDEPBACKSLASH@ +@AMDEP_TRUE@@am__fastdepCC_FALSE@ DEPDIR=$(DEPDIR) $(CCDEPMODE) $(depcomp) @AMDEPBACKSLASH@ +@am__fastdepCC_FALSE@ $(AM_V_CC@am__nodep@)$(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) $(gsnap_avx512_CFLAGS) $(CFLAGS) -c -o gsnap_avx512-sequence.obj `if test -f 'sequence.c'; then $(CYGPATH_W) 'sequence.c'; else $(CYGPATH_W) '$(srcdir)/sequence.c'; fi` + +gsnap_avx512-reader.o: reader.c +@am__fastdepCC_TRUE@ $(AM_V_CC)$(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) $(gsnap_avx512_CFLAGS) $(CFLAGS) -MT gsnap_avx512-reader.o -MD -MP -MF $(DEPDIR)/gsnap_avx512-reader.Tpo -c -o gsnap_avx512-reader.o `test -f 'reader.c' || echo '$(srcdir)/'`reader.c +@am__fastdepCC_TRUE@ $(AM_V_at)$(am__mv) $(DEPDIR)/gsnap_avx512-reader.Tpo $(DEPDIR)/gsnap_avx512-reader.Po +@AMDEP_TRUE@@am__fastdepCC_FALSE@ $(AM_V_CC)source='reader.c' object='gsnap_avx512-reader.o' libtool=no @AMDEPBACKSLASH@ +@AMDEP_TRUE@@am__fastdepCC_FALSE@ DEPDIR=$(DEPDIR) $(CCDEPMODE) $(depcomp) @AMDEPBACKSLASH@ +@am__fastdepCC_FALSE@ $(AM_V_CC@am__nodep@)$(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) $(gsnap_avx512_CFLAGS) $(CFLAGS) -c -o gsnap_avx512-reader.o `test -f 'reader.c' || echo '$(srcdir)/'`reader.c + +gsnap_avx512-reader.obj: reader.c +@am__fastdepCC_TRUE@ $(AM_V_CC)$(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) $(gsnap_avx512_CFLAGS) $(CFLAGS) -MT gsnap_avx512-reader.obj -MD -MP -MF $(DEPDIR)/gsnap_avx512-reader.Tpo -c -o gsnap_avx512-reader.obj `if test -f 'reader.c'; then $(CYGPATH_W) 'reader.c'; else $(CYGPATH_W) '$(srcdir)/reader.c'; fi` +@am__fastdepCC_TRUE@ $(AM_V_at)$(am__mv) $(DEPDIR)/gsnap_avx512-reader.Tpo $(DEPDIR)/gsnap_avx512-reader.Po +@AMDEP_TRUE@@am__fastdepCC_FALSE@ $(AM_V_CC)source='reader.c' object='gsnap_avx512-reader.obj' libtool=no @AMDEPBACKSLASH@ +@AMDEP_TRUE@@am__fastdepCC_FALSE@ DEPDIR=$(DEPDIR) $(CCDEPMODE) $(depcomp) @AMDEPBACKSLASH@ +@am__fastdepCC_FALSE@ $(AM_V_CC@am__nodep@)$(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) $(gsnap_avx512_CFLAGS) $(CFLAGS) -c -o gsnap_avx512-reader.obj `if test -f 'reader.c'; then $(CYGPATH_W) 'reader.c'; else $(CYGPATH_W) '$(srcdir)/reader.c'; fi` + +gsnap_avx512-genomicpos.o: genomicpos.c +@am__fastdepCC_TRUE@ $(AM_V_CC)$(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) $(gsnap_avx512_CFLAGS) $(CFLAGS) -MT gsnap_avx512-genomicpos.o -MD -MP -MF $(DEPDIR)/gsnap_avx512-genomicpos.Tpo -c -o gsnap_avx512-genomicpos.o `test -f 'genomicpos.c' || echo '$(srcdir)/'`genomicpos.c +@am__fastdepCC_TRUE@ $(AM_V_at)$(am__mv) $(DEPDIR)/gsnap_avx512-genomicpos.Tpo $(DEPDIR)/gsnap_avx512-genomicpos.Po +@AMDEP_TRUE@@am__fastdepCC_FALSE@ $(AM_V_CC)source='genomicpos.c' object='gsnap_avx512-genomicpos.o' libtool=no @AMDEPBACKSLASH@ +@AMDEP_TRUE@@am__fastdepCC_FALSE@ DEPDIR=$(DEPDIR) $(CCDEPMODE) $(depcomp) @AMDEPBACKSLASH@ +@am__fastdepCC_FALSE@ $(AM_V_CC@am__nodep@)$(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) $(gsnap_avx512_CFLAGS) $(CFLAGS) -c -o gsnap_avx512-genomicpos.o `test -f 'genomicpos.c' || echo '$(srcdir)/'`genomicpos.c + +gsnap_avx512-genomicpos.obj: genomicpos.c +@am__fastdepCC_TRUE@ $(AM_V_CC)$(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) $(gsnap_avx512_CFLAGS) $(CFLAGS) -MT gsnap_avx512-genomicpos.obj -MD -MP -MF $(DEPDIR)/gsnap_avx512-genomicpos.Tpo -c -o gsnap_avx512-genomicpos.obj `if test -f 'genomicpos.c'; then $(CYGPATH_W) 'genomicpos.c'; else $(CYGPATH_W) '$(srcdir)/genomicpos.c'; fi` +@am__fastdepCC_TRUE@ $(AM_V_at)$(am__mv) $(DEPDIR)/gsnap_avx512-genomicpos.Tpo $(DEPDIR)/gsnap_avx512-genomicpos.Po +@AMDEP_TRUE@@am__fastdepCC_FALSE@ $(AM_V_CC)source='genomicpos.c' object='gsnap_avx512-genomicpos.obj' libtool=no @AMDEPBACKSLASH@ +@AMDEP_TRUE@@am__fastdepCC_FALSE@ DEPDIR=$(DEPDIR) $(CCDEPMODE) $(depcomp) @AMDEPBACKSLASH@ +@am__fastdepCC_FALSE@ $(AM_V_CC@am__nodep@)$(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) $(gsnap_avx512_CFLAGS) $(CFLAGS) -c -o gsnap_avx512-genomicpos.obj `if test -f 'genomicpos.c'; then $(CYGPATH_W) 'genomicpos.c'; else $(CYGPATH_W) '$(srcdir)/genomicpos.c'; fi` + +gsnap_avx512-compress.o: compress.c +@am__fastdepCC_TRUE@ $(AM_V_CC)$(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) $(gsnap_avx512_CFLAGS) $(CFLAGS) -MT gsnap_avx512-compress.o -MD -MP -MF $(DEPDIR)/gsnap_avx512-compress.Tpo -c -o gsnap_avx512-compress.o `test -f 'compress.c' || echo '$(srcdir)/'`compress.c +@am__fastdepCC_TRUE@ $(AM_V_at)$(am__mv) $(DEPDIR)/gsnap_avx512-compress.Tpo $(DEPDIR)/gsnap_avx512-compress.Po +@AMDEP_TRUE@@am__fastdepCC_FALSE@ $(AM_V_CC)source='compress.c' object='gsnap_avx512-compress.o' libtool=no @AMDEPBACKSLASH@ +@AMDEP_TRUE@@am__fastdepCC_FALSE@ DEPDIR=$(DEPDIR) $(CCDEPMODE) $(depcomp) @AMDEPBACKSLASH@ +@am__fastdepCC_FALSE@ $(AM_V_CC@am__nodep@)$(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) $(gsnap_avx512_CFLAGS) $(CFLAGS) -c -o gsnap_avx512-compress.o `test -f 'compress.c' || echo '$(srcdir)/'`compress.c + +gsnap_avx512-compress.obj: compress.c +@am__fastdepCC_TRUE@ $(AM_V_CC)$(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) $(gsnap_avx512_CFLAGS) $(CFLAGS) -MT gsnap_avx512-compress.obj -MD -MP -MF $(DEPDIR)/gsnap_avx512-compress.Tpo -c -o gsnap_avx512-compress.obj `if test -f 'compress.c'; then $(CYGPATH_W) 'compress.c'; else $(CYGPATH_W) '$(srcdir)/compress.c'; fi` +@am__fastdepCC_TRUE@ $(AM_V_at)$(am__mv) $(DEPDIR)/gsnap_avx512-compress.Tpo $(DEPDIR)/gsnap_avx512-compress.Po +@AMDEP_TRUE@@am__fastdepCC_FALSE@ $(AM_V_CC)source='compress.c' object='gsnap_avx512-compress.obj' libtool=no @AMDEPBACKSLASH@ +@AMDEP_TRUE@@am__fastdepCC_FALSE@ DEPDIR=$(DEPDIR) $(CCDEPMODE) $(depcomp) @AMDEPBACKSLASH@ +@am__fastdepCC_FALSE@ $(AM_V_CC@am__nodep@)$(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) $(gsnap_avx512_CFLAGS) $(CFLAGS) -c -o gsnap_avx512-compress.obj `if test -f 'compress.c'; then $(CYGPATH_W) 'compress.c'; else $(CYGPATH_W) '$(srcdir)/compress.c'; fi` + +gsnap_avx512-genome.o: genome.c +@am__fastdepCC_TRUE@ $(AM_V_CC)$(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) $(gsnap_avx512_CFLAGS) $(CFLAGS) -MT gsnap_avx512-genome.o -MD -MP -MF $(DEPDIR)/gsnap_avx512-genome.Tpo -c -o gsnap_avx512-genome.o `test -f 'genome.c' || echo '$(srcdir)/'`genome.c +@am__fastdepCC_TRUE@ $(AM_V_at)$(am__mv) $(DEPDIR)/gsnap_avx512-genome.Tpo $(DEPDIR)/gsnap_avx512-genome.Po +@AMDEP_TRUE@@am__fastdepCC_FALSE@ $(AM_V_CC)source='genome.c' object='gsnap_avx512-genome.o' libtool=no @AMDEPBACKSLASH@ +@AMDEP_TRUE@@am__fastdepCC_FALSE@ DEPDIR=$(DEPDIR) $(CCDEPMODE) $(depcomp) @AMDEPBACKSLASH@ +@am__fastdepCC_FALSE@ $(AM_V_CC@am__nodep@)$(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) $(gsnap_avx512_CFLAGS) $(CFLAGS) -c -o gsnap_avx512-genome.o `test -f 'genome.c' || echo '$(srcdir)/'`genome.c + +gsnap_avx512-genome.obj: genome.c +@am__fastdepCC_TRUE@ $(AM_V_CC)$(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) $(gsnap_avx512_CFLAGS) $(CFLAGS) -MT gsnap_avx512-genome.obj -MD -MP -MF $(DEPDIR)/gsnap_avx512-genome.Tpo -c -o gsnap_avx512-genome.obj `if test -f 'genome.c'; then $(CYGPATH_W) 'genome.c'; else $(CYGPATH_W) '$(srcdir)/genome.c'; fi` +@am__fastdepCC_TRUE@ $(AM_V_at)$(am__mv) $(DEPDIR)/gsnap_avx512-genome.Tpo $(DEPDIR)/gsnap_avx512-genome.Po +@AMDEP_TRUE@@am__fastdepCC_FALSE@ $(AM_V_CC)source='genome.c' object='gsnap_avx512-genome.obj' libtool=no @AMDEPBACKSLASH@ +@AMDEP_TRUE@@am__fastdepCC_FALSE@ DEPDIR=$(DEPDIR) $(CCDEPMODE) $(depcomp) @AMDEPBACKSLASH@ +@am__fastdepCC_FALSE@ $(AM_V_CC@am__nodep@)$(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) $(gsnap_avx512_CFLAGS) $(CFLAGS) -c -o gsnap_avx512-genome.obj `if test -f 'genome.c'; then $(CYGPATH_W) 'genome.c'; else $(CYGPATH_W) '$(srcdir)/genome.c'; fi` + +gsnap_avx512-popcount.o: popcount.c +@am__fastdepCC_TRUE@ $(AM_V_CC)$(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) $(gsnap_avx512_CFLAGS) $(CFLAGS) -MT gsnap_avx512-popcount.o -MD -MP -MF $(DEPDIR)/gsnap_avx512-popcount.Tpo -c -o gsnap_avx512-popcount.o `test -f 'popcount.c' || echo '$(srcdir)/'`popcount.c +@am__fastdepCC_TRUE@ $(AM_V_at)$(am__mv) $(DEPDIR)/gsnap_avx512-popcount.Tpo $(DEPDIR)/gsnap_avx512-popcount.Po +@AMDEP_TRUE@@am__fastdepCC_FALSE@ $(AM_V_CC)source='popcount.c' object='gsnap_avx512-popcount.o' libtool=no @AMDEPBACKSLASH@ +@AMDEP_TRUE@@am__fastdepCC_FALSE@ DEPDIR=$(DEPDIR) $(CCDEPMODE) $(depcomp) @AMDEPBACKSLASH@ +@am__fastdepCC_FALSE@ $(AM_V_CC@am__nodep@)$(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) $(gsnap_avx512_CFLAGS) $(CFLAGS) -c -o gsnap_avx512-popcount.o `test -f 'popcount.c' || echo '$(srcdir)/'`popcount.c + +gsnap_avx512-popcount.obj: popcount.c +@am__fastdepCC_TRUE@ $(AM_V_CC)$(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) $(gsnap_avx512_CFLAGS) $(CFLAGS) -MT gsnap_avx512-popcount.obj -MD -MP -MF $(DEPDIR)/gsnap_avx512-popcount.Tpo -c -o gsnap_avx512-popcount.obj `if test -f 'popcount.c'; then $(CYGPATH_W) 'popcount.c'; else $(CYGPATH_W) '$(srcdir)/popcount.c'; fi` +@am__fastdepCC_TRUE@ $(AM_V_at)$(am__mv) $(DEPDIR)/gsnap_avx512-popcount.Tpo $(DEPDIR)/gsnap_avx512-popcount.Po +@AMDEP_TRUE@@am__fastdepCC_FALSE@ $(AM_V_CC)source='popcount.c' object='gsnap_avx512-popcount.obj' libtool=no @AMDEPBACKSLASH@ +@AMDEP_TRUE@@am__fastdepCC_FALSE@ DEPDIR=$(DEPDIR) $(CCDEPMODE) $(depcomp) @AMDEPBACKSLASH@ +@am__fastdepCC_FALSE@ $(AM_V_CC@am__nodep@)$(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) $(gsnap_avx512_CFLAGS) $(CFLAGS) -c -o gsnap_avx512-popcount.obj `if test -f 'popcount.c'; then $(CYGPATH_W) 'popcount.c'; else $(CYGPATH_W) '$(srcdir)/popcount.c'; fi` + +gsnap_avx512-genome128_hr.o: genome128_hr.c +@am__fastdepCC_TRUE@ $(AM_V_CC)$(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) $(gsnap_avx512_CFLAGS) $(CFLAGS) -MT gsnap_avx512-genome128_hr.o -MD -MP -MF $(DEPDIR)/gsnap_avx512-genome128_hr.Tpo -c -o gsnap_avx512-genome128_hr.o `test -f 'genome128_hr.c' || echo '$(srcdir)/'`genome128_hr.c +@am__fastdepCC_TRUE@ $(AM_V_at)$(am__mv) $(DEPDIR)/gsnap_avx512-genome128_hr.Tpo $(DEPDIR)/gsnap_avx512-genome128_hr.Po +@AMDEP_TRUE@@am__fastdepCC_FALSE@ $(AM_V_CC)source='genome128_hr.c' object='gsnap_avx512-genome128_hr.o' libtool=no @AMDEPBACKSLASH@ +@AMDEP_TRUE@@am__fastdepCC_FALSE@ DEPDIR=$(DEPDIR) $(CCDEPMODE) $(depcomp) @AMDEPBACKSLASH@ +@am__fastdepCC_FALSE@ $(AM_V_CC@am__nodep@)$(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) $(gsnap_avx512_CFLAGS) $(CFLAGS) -c -o gsnap_avx512-genome128_hr.o `test -f 'genome128_hr.c' || echo '$(srcdir)/'`genome128_hr.c + +gsnap_avx512-genome128_hr.obj: genome128_hr.c +@am__fastdepCC_TRUE@ $(AM_V_CC)$(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) $(gsnap_avx512_CFLAGS) $(CFLAGS) -MT gsnap_avx512-genome128_hr.obj -MD -MP -MF $(DEPDIR)/gsnap_avx512-genome128_hr.Tpo -c -o gsnap_avx512-genome128_hr.obj `if test -f 'genome128_hr.c'; then $(CYGPATH_W) 'genome128_hr.c'; else $(CYGPATH_W) '$(srcdir)/genome128_hr.c'; fi` +@am__fastdepCC_TRUE@ $(AM_V_at)$(am__mv) $(DEPDIR)/gsnap_avx512-genome128_hr.Tpo $(DEPDIR)/gsnap_avx512-genome128_hr.Po +@AMDEP_TRUE@@am__fastdepCC_FALSE@ $(AM_V_CC)source='genome128_hr.c' object='gsnap_avx512-genome128_hr.obj' libtool=no @AMDEPBACKSLASH@ +@AMDEP_TRUE@@am__fastdepCC_FALSE@ DEPDIR=$(DEPDIR) $(CCDEPMODE) $(depcomp) @AMDEPBACKSLASH@ +@am__fastdepCC_FALSE@ $(AM_V_CC@am__nodep@)$(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) $(gsnap_avx512_CFLAGS) $(CFLAGS) -c -o gsnap_avx512-genome128_hr.obj `if test -f 'genome128_hr.c'; then $(CYGPATH_W) 'genome128_hr.c'; else $(CYGPATH_W) '$(srcdir)/genome128_hr.c'; fi` + +gsnap_avx512-genome_sites.o: genome_sites.c +@am__fastdepCC_TRUE@ $(AM_V_CC)$(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) $(gsnap_avx512_CFLAGS) $(CFLAGS) -MT gsnap_avx512-genome_sites.o -MD -MP -MF $(DEPDIR)/gsnap_avx512-genome_sites.Tpo -c -o gsnap_avx512-genome_sites.o `test -f 'genome_sites.c' || echo '$(srcdir)/'`genome_sites.c +@am__fastdepCC_TRUE@ $(AM_V_at)$(am__mv) $(DEPDIR)/gsnap_avx512-genome_sites.Tpo $(DEPDIR)/gsnap_avx512-genome_sites.Po +@AMDEP_TRUE@@am__fastdepCC_FALSE@ $(AM_V_CC)source='genome_sites.c' object='gsnap_avx512-genome_sites.o' libtool=no @AMDEPBACKSLASH@ +@AMDEP_TRUE@@am__fastdepCC_FALSE@ DEPDIR=$(DEPDIR) $(CCDEPMODE) $(depcomp) @AMDEPBACKSLASH@ +@am__fastdepCC_FALSE@ $(AM_V_CC@am__nodep@)$(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) $(gsnap_avx512_CFLAGS) $(CFLAGS) -c -o gsnap_avx512-genome_sites.o `test -f 'genome_sites.c' || echo '$(srcdir)/'`genome_sites.c + +gsnap_avx512-genome_sites.obj: genome_sites.c +@am__fastdepCC_TRUE@ $(AM_V_CC)$(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) $(gsnap_avx512_CFLAGS) $(CFLAGS) -MT gsnap_avx512-genome_sites.obj -MD -MP -MF $(DEPDIR)/gsnap_avx512-genome_sites.Tpo -c -o gsnap_avx512-genome_sites.obj `if test -f 'genome_sites.c'; then $(CYGPATH_W) 'genome_sites.c'; else $(CYGPATH_W) '$(srcdir)/genome_sites.c'; fi` +@am__fastdepCC_TRUE@ $(AM_V_at)$(am__mv) $(DEPDIR)/gsnap_avx512-genome_sites.Tpo $(DEPDIR)/gsnap_avx512-genome_sites.Po +@AMDEP_TRUE@@am__fastdepCC_FALSE@ $(AM_V_CC)source='genome_sites.c' object='gsnap_avx512-genome_sites.obj' libtool=no @AMDEPBACKSLASH@ +@AMDEP_TRUE@@am__fastdepCC_FALSE@ DEPDIR=$(DEPDIR) $(CCDEPMODE) $(depcomp) @AMDEPBACKSLASH@ +@am__fastdepCC_FALSE@ $(AM_V_CC@am__nodep@)$(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) $(gsnap_avx512_CFLAGS) $(CFLAGS) -c -o gsnap_avx512-genome_sites.obj `if test -f 'genome_sites.c'; then $(CYGPATH_W) 'genome_sites.c'; else $(CYGPATH_W) '$(srcdir)/genome_sites.c'; fi` + +gsnap_avx512-bitpack64-read.o: bitpack64-read.c +@am__fastdepCC_TRUE@ $(AM_V_CC)$(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) $(gsnap_avx512_CFLAGS) $(CFLAGS) -MT gsnap_avx512-bitpack64-read.o -MD -MP -MF $(DEPDIR)/gsnap_avx512-bitpack64-read.Tpo -c -o gsnap_avx512-bitpack64-read.o `test -f 'bitpack64-read.c' || echo '$(srcdir)/'`bitpack64-read.c +@am__fastdepCC_TRUE@ $(AM_V_at)$(am__mv) $(DEPDIR)/gsnap_avx512-bitpack64-read.Tpo $(DEPDIR)/gsnap_avx512-bitpack64-read.Po +@AMDEP_TRUE@@am__fastdepCC_FALSE@ $(AM_V_CC)source='bitpack64-read.c' object='gsnap_avx512-bitpack64-read.o' libtool=no @AMDEPBACKSLASH@ +@AMDEP_TRUE@@am__fastdepCC_FALSE@ DEPDIR=$(DEPDIR) $(CCDEPMODE) $(depcomp) @AMDEPBACKSLASH@ +@am__fastdepCC_FALSE@ $(AM_V_CC@am__nodep@)$(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) $(gsnap_avx512_CFLAGS) $(CFLAGS) -c -o gsnap_avx512-bitpack64-read.o `test -f 'bitpack64-read.c' || echo '$(srcdir)/'`bitpack64-read.c + +gsnap_avx512-bitpack64-read.obj: bitpack64-read.c +@am__fastdepCC_TRUE@ $(AM_V_CC)$(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) $(gsnap_avx512_CFLAGS) $(CFLAGS) -MT gsnap_avx512-bitpack64-read.obj -MD -MP -MF $(DEPDIR)/gsnap_avx512-bitpack64-read.Tpo -c -o gsnap_avx512-bitpack64-read.obj `if test -f 'bitpack64-read.c'; then $(CYGPATH_W) 'bitpack64-read.c'; else $(CYGPATH_W) '$(srcdir)/bitpack64-read.c'; fi` +@am__fastdepCC_TRUE@ $(AM_V_at)$(am__mv) $(DEPDIR)/gsnap_avx512-bitpack64-read.Tpo $(DEPDIR)/gsnap_avx512-bitpack64-read.Po +@AMDEP_TRUE@@am__fastdepCC_FALSE@ $(AM_V_CC)source='bitpack64-read.c' object='gsnap_avx512-bitpack64-read.obj' libtool=no @AMDEPBACKSLASH@ +@AMDEP_TRUE@@am__fastdepCC_FALSE@ DEPDIR=$(DEPDIR) $(CCDEPMODE) $(depcomp) @AMDEPBACKSLASH@ +@am__fastdepCC_FALSE@ $(AM_V_CC@am__nodep@)$(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) $(gsnap_avx512_CFLAGS) $(CFLAGS) -c -o gsnap_avx512-bitpack64-read.obj `if test -f 'bitpack64-read.c'; then $(CYGPATH_W) 'bitpack64-read.c'; else $(CYGPATH_W) '$(srcdir)/bitpack64-read.c'; fi` + +gsnap_avx512-bitpack64-readtwo.o: bitpack64-readtwo.c +@am__fastdepCC_TRUE@ $(AM_V_CC)$(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) $(gsnap_avx512_CFLAGS) $(CFLAGS) -MT gsnap_avx512-bitpack64-readtwo.o -MD -MP -MF $(DEPDIR)/gsnap_avx512-bitpack64-readtwo.Tpo -c -o gsnap_avx512-bitpack64-readtwo.o `test -f 'bitpack64-readtwo.c' || echo '$(srcdir)/'`bitpack64-readtwo.c +@am__fastdepCC_TRUE@ $(AM_V_at)$(am__mv) $(DEPDIR)/gsnap_avx512-bitpack64-readtwo.Tpo $(DEPDIR)/gsnap_avx512-bitpack64-readtwo.Po +@AMDEP_TRUE@@am__fastdepCC_FALSE@ $(AM_V_CC)source='bitpack64-readtwo.c' object='gsnap_avx512-bitpack64-readtwo.o' libtool=no @AMDEPBACKSLASH@ +@AMDEP_TRUE@@am__fastdepCC_FALSE@ DEPDIR=$(DEPDIR) $(CCDEPMODE) $(depcomp) @AMDEPBACKSLASH@ +@am__fastdepCC_FALSE@ $(AM_V_CC@am__nodep@)$(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) $(gsnap_avx512_CFLAGS) $(CFLAGS) -c -o gsnap_avx512-bitpack64-readtwo.o `test -f 'bitpack64-readtwo.c' || echo '$(srcdir)/'`bitpack64-readtwo.c + +gsnap_avx512-bitpack64-readtwo.obj: bitpack64-readtwo.c +@am__fastdepCC_TRUE@ $(AM_V_CC)$(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) $(gsnap_avx512_CFLAGS) $(CFLAGS) -MT gsnap_avx512-bitpack64-readtwo.obj -MD -MP -MF $(DEPDIR)/gsnap_avx512-bitpack64-readtwo.Tpo -c -o gsnap_avx512-bitpack64-readtwo.obj `if test -f 'bitpack64-readtwo.c'; then $(CYGPATH_W) 'bitpack64-readtwo.c'; else $(CYGPATH_W) '$(srcdir)/bitpack64-readtwo.c'; fi` +@am__fastdepCC_TRUE@ $(AM_V_at)$(am__mv) $(DEPDIR)/gsnap_avx512-bitpack64-readtwo.Tpo $(DEPDIR)/gsnap_avx512-bitpack64-readtwo.Po +@AMDEP_TRUE@@am__fastdepCC_FALSE@ $(AM_V_CC)source='bitpack64-readtwo.c' object='gsnap_avx512-bitpack64-readtwo.obj' libtool=no @AMDEPBACKSLASH@ +@AMDEP_TRUE@@am__fastdepCC_FALSE@ DEPDIR=$(DEPDIR) $(CCDEPMODE) $(depcomp) @AMDEPBACKSLASH@ +@am__fastdepCC_FALSE@ $(AM_V_CC@am__nodep@)$(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) $(gsnap_avx512_CFLAGS) $(CFLAGS) -c -o gsnap_avx512-bitpack64-readtwo.obj `if test -f 'bitpack64-readtwo.c'; then $(CYGPATH_W) 'bitpack64-readtwo.c'; else $(CYGPATH_W) '$(srcdir)/bitpack64-readtwo.c'; fi` + +gsnap_avx512-merge.o: merge.c +@am__fastdepCC_TRUE@ $(AM_V_CC)$(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) $(gsnap_avx512_CFLAGS) $(CFLAGS) -MT gsnap_avx512-merge.o -MD -MP -MF $(DEPDIR)/gsnap_avx512-merge.Tpo -c -o gsnap_avx512-merge.o `test -f 'merge.c' || echo '$(srcdir)/'`merge.c +@am__fastdepCC_TRUE@ $(AM_V_at)$(am__mv) $(DEPDIR)/gsnap_avx512-merge.Tpo $(DEPDIR)/gsnap_avx512-merge.Po +@AMDEP_TRUE@@am__fastdepCC_FALSE@ $(AM_V_CC)source='merge.c' object='gsnap_avx512-merge.o' libtool=no @AMDEPBACKSLASH@ +@AMDEP_TRUE@@am__fastdepCC_FALSE@ DEPDIR=$(DEPDIR) $(CCDEPMODE) $(depcomp) @AMDEPBACKSLASH@ +@am__fastdepCC_FALSE@ $(AM_V_CC@am__nodep@)$(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) $(gsnap_avx512_CFLAGS) $(CFLAGS) -c -o gsnap_avx512-merge.o `test -f 'merge.c' || echo '$(srcdir)/'`merge.c + +gsnap_avx512-merge.obj: merge.c +@am__fastdepCC_TRUE@ $(AM_V_CC)$(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) $(gsnap_avx512_CFLAGS) $(CFLAGS) -MT gsnap_avx512-merge.obj -MD -MP -MF $(DEPDIR)/gsnap_avx512-merge.Tpo -c -o gsnap_avx512-merge.obj `if test -f 'merge.c'; then $(CYGPATH_W) 'merge.c'; else $(CYGPATH_W) '$(srcdir)/merge.c'; fi` +@am__fastdepCC_TRUE@ $(AM_V_at)$(am__mv) $(DEPDIR)/gsnap_avx512-merge.Tpo $(DEPDIR)/gsnap_avx512-merge.Po +@AMDEP_TRUE@@am__fastdepCC_FALSE@ $(AM_V_CC)source='merge.c' object='gsnap_avx512-merge.obj' libtool=no @AMDEPBACKSLASH@ +@AMDEP_TRUE@@am__fastdepCC_FALSE@ DEPDIR=$(DEPDIR) $(CCDEPMODE) $(depcomp) @AMDEPBACKSLASH@ +@am__fastdepCC_FALSE@ $(AM_V_CC@am__nodep@)$(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) $(gsnap_avx512_CFLAGS) $(CFLAGS) -c -o gsnap_avx512-merge.obj `if test -f 'merge.c'; then $(CYGPATH_W) 'merge.c'; else $(CYGPATH_W) '$(srcdir)/merge.c'; fi` + +gsnap_avx512-indexdb.o: indexdb.c +@am__fastdepCC_TRUE@ $(AM_V_CC)$(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) $(gsnap_avx512_CFLAGS) $(CFLAGS) -MT gsnap_avx512-indexdb.o -MD -MP -MF $(DEPDIR)/gsnap_avx512-indexdb.Tpo -c -o gsnap_avx512-indexdb.o `test -f 'indexdb.c' || echo '$(srcdir)/'`indexdb.c +@am__fastdepCC_TRUE@ $(AM_V_at)$(am__mv) $(DEPDIR)/gsnap_avx512-indexdb.Tpo $(DEPDIR)/gsnap_avx512-indexdb.Po +@AMDEP_TRUE@@am__fastdepCC_FALSE@ $(AM_V_CC)source='indexdb.c' object='gsnap_avx512-indexdb.o' libtool=no @AMDEPBACKSLASH@ +@AMDEP_TRUE@@am__fastdepCC_FALSE@ DEPDIR=$(DEPDIR) $(CCDEPMODE) $(depcomp) @AMDEPBACKSLASH@ +@am__fastdepCC_FALSE@ $(AM_V_CC@am__nodep@)$(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) $(gsnap_avx512_CFLAGS) $(CFLAGS) -c -o gsnap_avx512-indexdb.o `test -f 'indexdb.c' || echo '$(srcdir)/'`indexdb.c + +gsnap_avx512-indexdb.obj: indexdb.c +@am__fastdepCC_TRUE@ $(AM_V_CC)$(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) $(gsnap_avx512_CFLAGS) $(CFLAGS) -MT gsnap_avx512-indexdb.obj -MD -MP -MF $(DEPDIR)/gsnap_avx512-indexdb.Tpo -c -o gsnap_avx512-indexdb.obj `if test -f 'indexdb.c'; then $(CYGPATH_W) 'indexdb.c'; else $(CYGPATH_W) '$(srcdir)/indexdb.c'; fi` +@am__fastdepCC_TRUE@ $(AM_V_at)$(am__mv) $(DEPDIR)/gsnap_avx512-indexdb.Tpo $(DEPDIR)/gsnap_avx512-indexdb.Po +@AMDEP_TRUE@@am__fastdepCC_FALSE@ $(AM_V_CC)source='indexdb.c' object='gsnap_avx512-indexdb.obj' libtool=no @AMDEPBACKSLASH@ +@AMDEP_TRUE@@am__fastdepCC_FALSE@ DEPDIR=$(DEPDIR) $(CCDEPMODE) $(depcomp) @AMDEPBACKSLASH@ +@am__fastdepCC_FALSE@ $(AM_V_CC@am__nodep@)$(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) $(gsnap_avx512_CFLAGS) $(CFLAGS) -c -o gsnap_avx512-indexdb.obj `if test -f 'indexdb.c'; then $(CYGPATH_W) 'indexdb.c'; else $(CYGPATH_W) '$(srcdir)/indexdb.c'; fi` + +gsnap_avx512-indexdb_hr.o: indexdb_hr.c +@am__fastdepCC_TRUE@ $(AM_V_CC)$(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) $(gsnap_avx512_CFLAGS) $(CFLAGS) -MT gsnap_avx512-indexdb_hr.o -MD -MP -MF $(DEPDIR)/gsnap_avx512-indexdb_hr.Tpo -c -o gsnap_avx512-indexdb_hr.o `test -f 'indexdb_hr.c' || echo '$(srcdir)/'`indexdb_hr.c +@am__fastdepCC_TRUE@ $(AM_V_at)$(am__mv) $(DEPDIR)/gsnap_avx512-indexdb_hr.Tpo $(DEPDIR)/gsnap_avx512-indexdb_hr.Po +@AMDEP_TRUE@@am__fastdepCC_FALSE@ $(AM_V_CC)source='indexdb_hr.c' object='gsnap_avx512-indexdb_hr.o' libtool=no @AMDEPBACKSLASH@ +@AMDEP_TRUE@@am__fastdepCC_FALSE@ DEPDIR=$(DEPDIR) $(CCDEPMODE) $(depcomp) @AMDEPBACKSLASH@ +@am__fastdepCC_FALSE@ $(AM_V_CC@am__nodep@)$(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) $(gsnap_avx512_CFLAGS) $(CFLAGS) -c -o gsnap_avx512-indexdb_hr.o `test -f 'indexdb_hr.c' || echo '$(srcdir)/'`indexdb_hr.c + +gsnap_avx512-indexdb_hr.obj: indexdb_hr.c +@am__fastdepCC_TRUE@ $(AM_V_CC)$(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) $(gsnap_avx512_CFLAGS) $(CFLAGS) -MT gsnap_avx512-indexdb_hr.obj -MD -MP -MF $(DEPDIR)/gsnap_avx512-indexdb_hr.Tpo -c -o gsnap_avx512-indexdb_hr.obj `if test -f 'indexdb_hr.c'; then $(CYGPATH_W) 'indexdb_hr.c'; else $(CYGPATH_W) '$(srcdir)/indexdb_hr.c'; fi` +@am__fastdepCC_TRUE@ $(AM_V_at)$(am__mv) $(DEPDIR)/gsnap_avx512-indexdb_hr.Tpo $(DEPDIR)/gsnap_avx512-indexdb_hr.Po +@AMDEP_TRUE@@am__fastdepCC_FALSE@ $(AM_V_CC)source='indexdb_hr.c' object='gsnap_avx512-indexdb_hr.obj' libtool=no @AMDEPBACKSLASH@ +@AMDEP_TRUE@@am__fastdepCC_FALSE@ DEPDIR=$(DEPDIR) $(CCDEPMODE) $(depcomp) @AMDEPBACKSLASH@ +@am__fastdepCC_FALSE@ $(AM_V_CC@am__nodep@)$(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) $(gsnap_avx512_CFLAGS) $(CFLAGS) -c -o gsnap_avx512-indexdb_hr.obj `if test -f 'indexdb_hr.c'; then $(CYGPATH_W) 'indexdb_hr.c'; else $(CYGPATH_W) '$(srcdir)/indexdb_hr.c'; fi` + +gsnap_avx512-oligo.o: oligo.c +@am__fastdepCC_TRUE@ $(AM_V_CC)$(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) $(gsnap_avx512_CFLAGS) $(CFLAGS) -MT gsnap_avx512-oligo.o -MD -MP -MF $(DEPDIR)/gsnap_avx512-oligo.Tpo -c -o gsnap_avx512-oligo.o `test -f 'oligo.c' || echo '$(srcdir)/'`oligo.c +@am__fastdepCC_TRUE@ $(AM_V_at)$(am__mv) $(DEPDIR)/gsnap_avx512-oligo.Tpo $(DEPDIR)/gsnap_avx512-oligo.Po +@AMDEP_TRUE@@am__fastdepCC_FALSE@ $(AM_V_CC)source='oligo.c' object='gsnap_avx512-oligo.o' libtool=no @AMDEPBACKSLASH@ +@AMDEP_TRUE@@am__fastdepCC_FALSE@ DEPDIR=$(DEPDIR) $(CCDEPMODE) $(depcomp) @AMDEPBACKSLASH@ +@am__fastdepCC_FALSE@ $(AM_V_CC@am__nodep@)$(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) $(gsnap_avx512_CFLAGS) $(CFLAGS) -c -o gsnap_avx512-oligo.o `test -f 'oligo.c' || echo '$(srcdir)/'`oligo.c + +gsnap_avx512-oligo.obj: oligo.c +@am__fastdepCC_TRUE@ $(AM_V_CC)$(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) $(gsnap_avx512_CFLAGS) $(CFLAGS) -MT gsnap_avx512-oligo.obj -MD -MP -MF $(DEPDIR)/gsnap_avx512-oligo.Tpo -c -o gsnap_avx512-oligo.obj `if test -f 'oligo.c'; then $(CYGPATH_W) 'oligo.c'; else $(CYGPATH_W) '$(srcdir)/oligo.c'; fi` +@am__fastdepCC_TRUE@ $(AM_V_at)$(am__mv) $(DEPDIR)/gsnap_avx512-oligo.Tpo $(DEPDIR)/gsnap_avx512-oligo.Po +@AMDEP_TRUE@@am__fastdepCC_FALSE@ $(AM_V_CC)source='oligo.c' object='gsnap_avx512-oligo.obj' libtool=no @AMDEPBACKSLASH@ +@AMDEP_TRUE@@am__fastdepCC_FALSE@ DEPDIR=$(DEPDIR) $(CCDEPMODE) $(depcomp) @AMDEPBACKSLASH@ +@am__fastdepCC_FALSE@ $(AM_V_CC@am__nodep@)$(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) $(gsnap_avx512_CFLAGS) $(CFLAGS) -c -o gsnap_avx512-oligo.obj `if test -f 'oligo.c'; then $(CYGPATH_W) 'oligo.c'; else $(CYGPATH_W) '$(srcdir)/oligo.c'; fi` + +gsnap_avx512-chrom.o: chrom.c +@am__fastdepCC_TRUE@ $(AM_V_CC)$(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) $(gsnap_avx512_CFLAGS) $(CFLAGS) -MT gsnap_avx512-chrom.o -MD -MP -MF $(DEPDIR)/gsnap_avx512-chrom.Tpo -c -o gsnap_avx512-chrom.o `test -f 'chrom.c' || echo '$(srcdir)/'`chrom.c +@am__fastdepCC_TRUE@ $(AM_V_at)$(am__mv) $(DEPDIR)/gsnap_avx512-chrom.Tpo $(DEPDIR)/gsnap_avx512-chrom.Po +@AMDEP_TRUE@@am__fastdepCC_FALSE@ $(AM_V_CC)source='chrom.c' object='gsnap_avx512-chrom.o' libtool=no @AMDEPBACKSLASH@ +@AMDEP_TRUE@@am__fastdepCC_FALSE@ DEPDIR=$(DEPDIR) $(CCDEPMODE) $(depcomp) @AMDEPBACKSLASH@ +@am__fastdepCC_FALSE@ $(AM_V_CC@am__nodep@)$(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) $(gsnap_avx512_CFLAGS) $(CFLAGS) -c -o gsnap_avx512-chrom.o `test -f 'chrom.c' || echo '$(srcdir)/'`chrom.c + +gsnap_avx512-chrom.obj: chrom.c +@am__fastdepCC_TRUE@ $(AM_V_CC)$(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) $(gsnap_avx512_CFLAGS) $(CFLAGS) -MT gsnap_avx512-chrom.obj -MD -MP -MF $(DEPDIR)/gsnap_avx512-chrom.Tpo -c -o gsnap_avx512-chrom.obj `if test -f 'chrom.c'; then $(CYGPATH_W) 'chrom.c'; else $(CYGPATH_W) '$(srcdir)/chrom.c'; fi` +@am__fastdepCC_TRUE@ $(AM_V_at)$(am__mv) $(DEPDIR)/gsnap_avx512-chrom.Tpo $(DEPDIR)/gsnap_avx512-chrom.Po +@AMDEP_TRUE@@am__fastdepCC_FALSE@ $(AM_V_CC)source='chrom.c' object='gsnap_avx512-chrom.obj' libtool=no @AMDEPBACKSLASH@ +@AMDEP_TRUE@@am__fastdepCC_FALSE@ DEPDIR=$(DEPDIR) $(CCDEPMODE) $(depcomp) @AMDEPBACKSLASH@ +@am__fastdepCC_FALSE@ $(AM_V_CC@am__nodep@)$(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) $(gsnap_avx512_CFLAGS) $(CFLAGS) -c -o gsnap_avx512-chrom.obj `if test -f 'chrom.c'; then $(CYGPATH_W) 'chrom.c'; else $(CYGPATH_W) '$(srcdir)/chrom.c'; fi` + +gsnap_avx512-segmentpos.o: segmentpos.c +@am__fastdepCC_TRUE@ $(AM_V_CC)$(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) $(gsnap_avx512_CFLAGS) $(CFLAGS) -MT gsnap_avx512-segmentpos.o -MD -MP -MF $(DEPDIR)/gsnap_avx512-segmentpos.Tpo -c -o gsnap_avx512-segmentpos.o `test -f 'segmentpos.c' || echo '$(srcdir)/'`segmentpos.c +@am__fastdepCC_TRUE@ $(AM_V_at)$(am__mv) $(DEPDIR)/gsnap_avx512-segmentpos.Tpo $(DEPDIR)/gsnap_avx512-segmentpos.Po +@AMDEP_TRUE@@am__fastdepCC_FALSE@ $(AM_V_CC)source='segmentpos.c' object='gsnap_avx512-segmentpos.o' libtool=no @AMDEPBACKSLASH@ +@AMDEP_TRUE@@am__fastdepCC_FALSE@ DEPDIR=$(DEPDIR) $(CCDEPMODE) $(depcomp) @AMDEPBACKSLASH@ +@am__fastdepCC_FALSE@ $(AM_V_CC@am__nodep@)$(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) $(gsnap_avx512_CFLAGS) $(CFLAGS) -c -o gsnap_avx512-segmentpos.o `test -f 'segmentpos.c' || echo '$(srcdir)/'`segmentpos.c + +gsnap_avx512-segmentpos.obj: segmentpos.c +@am__fastdepCC_TRUE@ $(AM_V_CC)$(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) $(gsnap_avx512_CFLAGS) $(CFLAGS) -MT gsnap_avx512-segmentpos.obj -MD -MP -MF $(DEPDIR)/gsnap_avx512-segmentpos.Tpo -c -o gsnap_avx512-segmentpos.obj `if test -f 'segmentpos.c'; then $(CYGPATH_W) 'segmentpos.c'; else $(CYGPATH_W) '$(srcdir)/segmentpos.c'; fi` +@am__fastdepCC_TRUE@ $(AM_V_at)$(am__mv) $(DEPDIR)/gsnap_avx512-segmentpos.Tpo $(DEPDIR)/gsnap_avx512-segmentpos.Po +@AMDEP_TRUE@@am__fastdepCC_FALSE@ $(AM_V_CC)source='segmentpos.c' object='gsnap_avx512-segmentpos.obj' libtool=no @AMDEPBACKSLASH@ +@AMDEP_TRUE@@am__fastdepCC_FALSE@ DEPDIR=$(DEPDIR) $(CCDEPMODE) $(depcomp) @AMDEPBACKSLASH@ +@am__fastdepCC_FALSE@ $(AM_V_CC@am__nodep@)$(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) $(gsnap_avx512_CFLAGS) $(CFLAGS) -c -o gsnap_avx512-segmentpos.obj `if test -f 'segmentpos.c'; then $(CYGPATH_W) 'segmentpos.c'; else $(CYGPATH_W) '$(srcdir)/segmentpos.c'; fi` + +gsnap_avx512-chrnum.o: chrnum.c +@am__fastdepCC_TRUE@ $(AM_V_CC)$(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) $(gsnap_avx512_CFLAGS) $(CFLAGS) -MT gsnap_avx512-chrnum.o -MD -MP -MF $(DEPDIR)/gsnap_avx512-chrnum.Tpo -c -o gsnap_avx512-chrnum.o `test -f 'chrnum.c' || echo '$(srcdir)/'`chrnum.c +@am__fastdepCC_TRUE@ $(AM_V_at)$(am__mv) $(DEPDIR)/gsnap_avx512-chrnum.Tpo $(DEPDIR)/gsnap_avx512-chrnum.Po +@AMDEP_TRUE@@am__fastdepCC_FALSE@ $(AM_V_CC)source='chrnum.c' object='gsnap_avx512-chrnum.o' libtool=no @AMDEPBACKSLASH@ +@AMDEP_TRUE@@am__fastdepCC_FALSE@ DEPDIR=$(DEPDIR) $(CCDEPMODE) $(depcomp) @AMDEPBACKSLASH@ +@am__fastdepCC_FALSE@ $(AM_V_CC@am__nodep@)$(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) $(gsnap_avx512_CFLAGS) $(CFLAGS) -c -o gsnap_avx512-chrnum.o `test -f 'chrnum.c' || echo '$(srcdir)/'`chrnum.c + +gsnap_avx512-chrnum.obj: chrnum.c +@am__fastdepCC_TRUE@ $(AM_V_CC)$(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) $(gsnap_avx512_CFLAGS) $(CFLAGS) -MT gsnap_avx512-chrnum.obj -MD -MP -MF $(DEPDIR)/gsnap_avx512-chrnum.Tpo -c -o gsnap_avx512-chrnum.obj `if test -f 'chrnum.c'; then $(CYGPATH_W) 'chrnum.c'; else $(CYGPATH_W) '$(srcdir)/chrnum.c'; fi` +@am__fastdepCC_TRUE@ $(AM_V_at)$(am__mv) $(DEPDIR)/gsnap_avx512-chrnum.Tpo $(DEPDIR)/gsnap_avx512-chrnum.Po +@AMDEP_TRUE@@am__fastdepCC_FALSE@ $(AM_V_CC)source='chrnum.c' object='gsnap_avx512-chrnum.obj' libtool=no @AMDEPBACKSLASH@ +@AMDEP_TRUE@@am__fastdepCC_FALSE@ DEPDIR=$(DEPDIR) $(CCDEPMODE) $(depcomp) @AMDEPBACKSLASH@ +@am__fastdepCC_FALSE@ $(AM_V_CC@am__nodep@)$(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) $(gsnap_avx512_CFLAGS) $(CFLAGS) -c -o gsnap_avx512-chrnum.obj `if test -f 'chrnum.c'; then $(CYGPATH_W) 'chrnum.c'; else $(CYGPATH_W) '$(srcdir)/chrnum.c'; fi` + +gsnap_avx512-maxent_hr.o: maxent_hr.c +@am__fastdepCC_TRUE@ $(AM_V_CC)$(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) $(gsnap_avx512_CFLAGS) $(CFLAGS) -MT gsnap_avx512-maxent_hr.o -MD -MP -MF $(DEPDIR)/gsnap_avx512-maxent_hr.Tpo -c -o gsnap_avx512-maxent_hr.o `test -f 'maxent_hr.c' || echo '$(srcdir)/'`maxent_hr.c +@am__fastdepCC_TRUE@ $(AM_V_at)$(am__mv) $(DEPDIR)/gsnap_avx512-maxent_hr.Tpo $(DEPDIR)/gsnap_avx512-maxent_hr.Po +@AMDEP_TRUE@@am__fastdepCC_FALSE@ $(AM_V_CC)source='maxent_hr.c' object='gsnap_avx512-maxent_hr.o' libtool=no @AMDEPBACKSLASH@ +@AMDEP_TRUE@@am__fastdepCC_FALSE@ DEPDIR=$(DEPDIR) $(CCDEPMODE) $(depcomp) @AMDEPBACKSLASH@ +@am__fastdepCC_FALSE@ $(AM_V_CC@am__nodep@)$(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) $(gsnap_avx512_CFLAGS) $(CFLAGS) -c -o gsnap_avx512-maxent_hr.o `test -f 'maxent_hr.c' || echo '$(srcdir)/'`maxent_hr.c + +gsnap_avx512-maxent_hr.obj: maxent_hr.c +@am__fastdepCC_TRUE@ $(AM_V_CC)$(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) $(gsnap_avx512_CFLAGS) $(CFLAGS) -MT gsnap_avx512-maxent_hr.obj -MD -MP -MF $(DEPDIR)/gsnap_avx512-maxent_hr.Tpo -c -o gsnap_avx512-maxent_hr.obj `if test -f 'maxent_hr.c'; then $(CYGPATH_W) 'maxent_hr.c'; else $(CYGPATH_W) '$(srcdir)/maxent_hr.c'; fi` +@am__fastdepCC_TRUE@ $(AM_V_at)$(am__mv) $(DEPDIR)/gsnap_avx512-maxent_hr.Tpo $(DEPDIR)/gsnap_avx512-maxent_hr.Po +@AMDEP_TRUE@@am__fastdepCC_FALSE@ $(AM_V_CC)source='maxent_hr.c' object='gsnap_avx512-maxent_hr.obj' libtool=no @AMDEPBACKSLASH@ +@AMDEP_TRUE@@am__fastdepCC_FALSE@ DEPDIR=$(DEPDIR) $(CCDEPMODE) $(depcomp) @AMDEPBACKSLASH@ +@am__fastdepCC_FALSE@ $(AM_V_CC@am__nodep@)$(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) $(gsnap_avx512_CFLAGS) $(CFLAGS) -c -o gsnap_avx512-maxent_hr.obj `if test -f 'maxent_hr.c'; then $(CYGPATH_W) 'maxent_hr.c'; else $(CYGPATH_W) '$(srcdir)/maxent_hr.c'; fi` + +gsnap_avx512-samprint.o: samprint.c +@am__fastdepCC_TRUE@ $(AM_V_CC)$(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) $(gsnap_avx512_CFLAGS) $(CFLAGS) -MT gsnap_avx512-samprint.o -MD -MP -MF $(DEPDIR)/gsnap_avx512-samprint.Tpo -c -o gsnap_avx512-samprint.o `test -f 'samprint.c' || echo '$(srcdir)/'`samprint.c +@am__fastdepCC_TRUE@ $(AM_V_at)$(am__mv) $(DEPDIR)/gsnap_avx512-samprint.Tpo $(DEPDIR)/gsnap_avx512-samprint.Po +@AMDEP_TRUE@@am__fastdepCC_FALSE@ $(AM_V_CC)source='samprint.c' object='gsnap_avx512-samprint.o' libtool=no @AMDEPBACKSLASH@ +@AMDEP_TRUE@@am__fastdepCC_FALSE@ DEPDIR=$(DEPDIR) $(CCDEPMODE) $(depcomp) @AMDEPBACKSLASH@ +@am__fastdepCC_FALSE@ $(AM_V_CC@am__nodep@)$(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) $(gsnap_avx512_CFLAGS) $(CFLAGS) -c -o gsnap_avx512-samprint.o `test -f 'samprint.c' || echo '$(srcdir)/'`samprint.c + +gsnap_avx512-samprint.obj: samprint.c +@am__fastdepCC_TRUE@ $(AM_V_CC)$(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) $(gsnap_avx512_CFLAGS) $(CFLAGS) -MT gsnap_avx512-samprint.obj -MD -MP -MF $(DEPDIR)/gsnap_avx512-samprint.Tpo -c -o gsnap_avx512-samprint.obj `if test -f 'samprint.c'; then $(CYGPATH_W) 'samprint.c'; else $(CYGPATH_W) '$(srcdir)/samprint.c'; fi` +@am__fastdepCC_TRUE@ $(AM_V_at)$(am__mv) $(DEPDIR)/gsnap_avx512-samprint.Tpo $(DEPDIR)/gsnap_avx512-samprint.Po +@AMDEP_TRUE@@am__fastdepCC_FALSE@ $(AM_V_CC)source='samprint.c' object='gsnap_avx512-samprint.obj' libtool=no @AMDEPBACKSLASH@ +@AMDEP_TRUE@@am__fastdepCC_FALSE@ DEPDIR=$(DEPDIR) $(CCDEPMODE) $(depcomp) @AMDEPBACKSLASH@ +@am__fastdepCC_FALSE@ $(AM_V_CC@am__nodep@)$(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) $(gsnap_avx512_CFLAGS) $(CFLAGS) -c -o gsnap_avx512-samprint.obj `if test -f 'samprint.c'; then $(CYGPATH_W) 'samprint.c'; else $(CYGPATH_W) '$(srcdir)/samprint.c'; fi` + +gsnap_avx512-mapq.o: mapq.c +@am__fastdepCC_TRUE@ $(AM_V_CC)$(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) $(gsnap_avx512_CFLAGS) $(CFLAGS) -MT gsnap_avx512-mapq.o -MD -MP -MF $(DEPDIR)/gsnap_avx512-mapq.Tpo -c -o gsnap_avx512-mapq.o `test -f 'mapq.c' || echo '$(srcdir)/'`mapq.c +@am__fastdepCC_TRUE@ $(AM_V_at)$(am__mv) $(DEPDIR)/gsnap_avx512-mapq.Tpo $(DEPDIR)/gsnap_avx512-mapq.Po +@AMDEP_TRUE@@am__fastdepCC_FALSE@ $(AM_V_CC)source='mapq.c' object='gsnap_avx512-mapq.o' libtool=no @AMDEPBACKSLASH@ +@AMDEP_TRUE@@am__fastdepCC_FALSE@ DEPDIR=$(DEPDIR) $(CCDEPMODE) $(depcomp) @AMDEPBACKSLASH@ +@am__fastdepCC_FALSE@ $(AM_V_CC@am__nodep@)$(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) $(gsnap_avx512_CFLAGS) $(CFLAGS) -c -o gsnap_avx512-mapq.o `test -f 'mapq.c' || echo '$(srcdir)/'`mapq.c + +gsnap_avx512-mapq.obj: mapq.c +@am__fastdepCC_TRUE@ $(AM_V_CC)$(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) $(gsnap_avx512_CFLAGS) $(CFLAGS) -MT gsnap_avx512-mapq.obj -MD -MP -MF $(DEPDIR)/gsnap_avx512-mapq.Tpo -c -o gsnap_avx512-mapq.obj `if test -f 'mapq.c'; then $(CYGPATH_W) 'mapq.c'; else $(CYGPATH_W) '$(srcdir)/mapq.c'; fi` +@am__fastdepCC_TRUE@ $(AM_V_at)$(am__mv) $(DEPDIR)/gsnap_avx512-mapq.Tpo $(DEPDIR)/gsnap_avx512-mapq.Po +@AMDEP_TRUE@@am__fastdepCC_FALSE@ $(AM_V_CC)source='mapq.c' object='gsnap_avx512-mapq.obj' libtool=no @AMDEPBACKSLASH@ +@AMDEP_TRUE@@am__fastdepCC_FALSE@ DEPDIR=$(DEPDIR) $(CCDEPMODE) $(depcomp) @AMDEPBACKSLASH@ +@am__fastdepCC_FALSE@ $(AM_V_CC@am__nodep@)$(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) $(gsnap_avx512_CFLAGS) $(CFLAGS) -c -o gsnap_avx512-mapq.obj `if test -f 'mapq.c'; then $(CYGPATH_W) 'mapq.c'; else $(CYGPATH_W) '$(srcdir)/mapq.c'; fi` + +gsnap_avx512-shortread.o: shortread.c +@am__fastdepCC_TRUE@ $(AM_V_CC)$(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) $(gsnap_avx512_CFLAGS) $(CFLAGS) -MT gsnap_avx512-shortread.o -MD -MP -MF $(DEPDIR)/gsnap_avx512-shortread.Tpo -c -o gsnap_avx512-shortread.o `test -f 'shortread.c' || echo '$(srcdir)/'`shortread.c +@am__fastdepCC_TRUE@ $(AM_V_at)$(am__mv) $(DEPDIR)/gsnap_avx512-shortread.Tpo $(DEPDIR)/gsnap_avx512-shortread.Po +@AMDEP_TRUE@@am__fastdepCC_FALSE@ $(AM_V_CC)source='shortread.c' object='gsnap_avx512-shortread.o' libtool=no @AMDEPBACKSLASH@ +@AMDEP_TRUE@@am__fastdepCC_FALSE@ DEPDIR=$(DEPDIR) $(CCDEPMODE) $(depcomp) @AMDEPBACKSLASH@ +@am__fastdepCC_FALSE@ $(AM_V_CC@am__nodep@)$(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) $(gsnap_avx512_CFLAGS) $(CFLAGS) -c -o gsnap_avx512-shortread.o `test -f 'shortread.c' || echo '$(srcdir)/'`shortread.c + +gsnap_avx512-shortread.obj: shortread.c +@am__fastdepCC_TRUE@ $(AM_V_CC)$(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) $(gsnap_avx512_CFLAGS) $(CFLAGS) -MT gsnap_avx512-shortread.obj -MD -MP -MF $(DEPDIR)/gsnap_avx512-shortread.Tpo -c -o gsnap_avx512-shortread.obj `if test -f 'shortread.c'; then $(CYGPATH_W) 'shortread.c'; else $(CYGPATH_W) '$(srcdir)/shortread.c'; fi` +@am__fastdepCC_TRUE@ $(AM_V_at)$(am__mv) $(DEPDIR)/gsnap_avx512-shortread.Tpo $(DEPDIR)/gsnap_avx512-shortread.Po +@AMDEP_TRUE@@am__fastdepCC_FALSE@ $(AM_V_CC)source='shortread.c' object='gsnap_avx512-shortread.obj' libtool=no @AMDEPBACKSLASH@ +@AMDEP_TRUE@@am__fastdepCC_FALSE@ DEPDIR=$(DEPDIR) $(CCDEPMODE) $(depcomp) @AMDEPBACKSLASH@ +@am__fastdepCC_FALSE@ $(AM_V_CC@am__nodep@)$(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) $(gsnap_avx512_CFLAGS) $(CFLAGS) -c -o gsnap_avx512-shortread.obj `if test -f 'shortread.c'; then $(CYGPATH_W) 'shortread.c'; else $(CYGPATH_W) '$(srcdir)/shortread.c'; fi` + +gsnap_avx512-substring.o: substring.c +@am__fastdepCC_TRUE@ $(AM_V_CC)$(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) $(gsnap_avx512_CFLAGS) $(CFLAGS) -MT gsnap_avx512-substring.o -MD -MP -MF $(DEPDIR)/gsnap_avx512-substring.Tpo -c -o gsnap_avx512-substring.o `test -f 'substring.c' || echo '$(srcdir)/'`substring.c +@am__fastdepCC_TRUE@ $(AM_V_at)$(am__mv) $(DEPDIR)/gsnap_avx512-substring.Tpo $(DEPDIR)/gsnap_avx512-substring.Po +@AMDEP_TRUE@@am__fastdepCC_FALSE@ $(AM_V_CC)source='substring.c' object='gsnap_avx512-substring.o' libtool=no @AMDEPBACKSLASH@ +@AMDEP_TRUE@@am__fastdepCC_FALSE@ DEPDIR=$(DEPDIR) $(CCDEPMODE) $(depcomp) @AMDEPBACKSLASH@ +@am__fastdepCC_FALSE@ $(AM_V_CC@am__nodep@)$(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) $(gsnap_avx512_CFLAGS) $(CFLAGS) -c -o gsnap_avx512-substring.o `test -f 'substring.c' || echo '$(srcdir)/'`substring.c + +gsnap_avx512-substring.obj: substring.c +@am__fastdepCC_TRUE@ $(AM_V_CC)$(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) $(gsnap_avx512_CFLAGS) $(CFLAGS) -MT gsnap_avx512-substring.obj -MD -MP -MF $(DEPDIR)/gsnap_avx512-substring.Tpo -c -o gsnap_avx512-substring.obj `if test -f 'substring.c'; then $(CYGPATH_W) 'substring.c'; else $(CYGPATH_W) '$(srcdir)/substring.c'; fi` +@am__fastdepCC_TRUE@ $(AM_V_at)$(am__mv) $(DEPDIR)/gsnap_avx512-substring.Tpo $(DEPDIR)/gsnap_avx512-substring.Po +@AMDEP_TRUE@@am__fastdepCC_FALSE@ $(AM_V_CC)source='substring.c' object='gsnap_avx512-substring.obj' libtool=no @AMDEPBACKSLASH@ +@AMDEP_TRUE@@am__fastdepCC_FALSE@ DEPDIR=$(DEPDIR) $(CCDEPMODE) $(depcomp) @AMDEPBACKSLASH@ +@am__fastdepCC_FALSE@ $(AM_V_CC@am__nodep@)$(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) $(gsnap_avx512_CFLAGS) $(CFLAGS) -c -o gsnap_avx512-substring.obj `if test -f 'substring.c'; then $(CYGPATH_W) 'substring.c'; else $(CYGPATH_W) '$(srcdir)/substring.c'; fi` + +gsnap_avx512-junction.o: junction.c +@am__fastdepCC_TRUE@ $(AM_V_CC)$(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) $(gsnap_avx512_CFLAGS) $(CFLAGS) -MT gsnap_avx512-junction.o -MD -MP -MF $(DEPDIR)/gsnap_avx512-junction.Tpo -c -o gsnap_avx512-junction.o `test -f 'junction.c' || echo '$(srcdir)/'`junction.c +@am__fastdepCC_TRUE@ $(AM_V_at)$(am__mv) $(DEPDIR)/gsnap_avx512-junction.Tpo $(DEPDIR)/gsnap_avx512-junction.Po +@AMDEP_TRUE@@am__fastdepCC_FALSE@ $(AM_V_CC)source='junction.c' object='gsnap_avx512-junction.o' libtool=no @AMDEPBACKSLASH@ +@AMDEP_TRUE@@am__fastdepCC_FALSE@ DEPDIR=$(DEPDIR) $(CCDEPMODE) $(depcomp) @AMDEPBACKSLASH@ +@am__fastdepCC_FALSE@ $(AM_V_CC@am__nodep@)$(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) $(gsnap_avx512_CFLAGS) $(CFLAGS) -c -o gsnap_avx512-junction.o `test -f 'junction.c' || echo '$(srcdir)/'`junction.c + +gsnap_avx512-junction.obj: junction.c +@am__fastdepCC_TRUE@ $(AM_V_CC)$(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) $(gsnap_avx512_CFLAGS) $(CFLAGS) -MT gsnap_avx512-junction.obj -MD -MP -MF $(DEPDIR)/gsnap_avx512-junction.Tpo -c -o gsnap_avx512-junction.obj `if test -f 'junction.c'; then $(CYGPATH_W) 'junction.c'; else $(CYGPATH_W) '$(srcdir)/junction.c'; fi` +@am__fastdepCC_TRUE@ $(AM_V_at)$(am__mv) $(DEPDIR)/gsnap_avx512-junction.Tpo $(DEPDIR)/gsnap_avx512-junction.Po +@AMDEP_TRUE@@am__fastdepCC_FALSE@ $(AM_V_CC)source='junction.c' object='gsnap_avx512-junction.obj' libtool=no @AMDEPBACKSLASH@ +@AMDEP_TRUE@@am__fastdepCC_FALSE@ DEPDIR=$(DEPDIR) $(CCDEPMODE) $(depcomp) @AMDEPBACKSLASH@ +@am__fastdepCC_FALSE@ $(AM_V_CC@am__nodep@)$(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) $(gsnap_avx512_CFLAGS) $(CFLAGS) -c -o gsnap_avx512-junction.obj `if test -f 'junction.c'; then $(CYGPATH_W) 'junction.c'; else $(CYGPATH_W) '$(srcdir)/junction.c'; fi` + +gsnap_avx512-stage3hr.o: stage3hr.c +@am__fastdepCC_TRUE@ $(AM_V_CC)$(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) $(gsnap_avx512_CFLAGS) $(CFLAGS) -MT gsnap_avx512-stage3hr.o -MD -MP -MF $(DEPDIR)/gsnap_avx512-stage3hr.Tpo -c -o gsnap_avx512-stage3hr.o `test -f 'stage3hr.c' || echo '$(srcdir)/'`stage3hr.c +@am__fastdepCC_TRUE@ $(AM_V_at)$(am__mv) $(DEPDIR)/gsnap_avx512-stage3hr.Tpo $(DEPDIR)/gsnap_avx512-stage3hr.Po +@AMDEP_TRUE@@am__fastdepCC_FALSE@ $(AM_V_CC)source='stage3hr.c' object='gsnap_avx512-stage3hr.o' libtool=no @AMDEPBACKSLASH@ +@AMDEP_TRUE@@am__fastdepCC_FALSE@ DEPDIR=$(DEPDIR) $(CCDEPMODE) $(depcomp) @AMDEPBACKSLASH@ +@am__fastdepCC_FALSE@ $(AM_V_CC@am__nodep@)$(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) $(gsnap_avx512_CFLAGS) $(CFLAGS) -c -o gsnap_avx512-stage3hr.o `test -f 'stage3hr.c' || echo '$(srcdir)/'`stage3hr.c + +gsnap_avx512-stage3hr.obj: stage3hr.c +@am__fastdepCC_TRUE@ $(AM_V_CC)$(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) $(gsnap_avx512_CFLAGS) $(CFLAGS) -MT gsnap_avx512-stage3hr.obj -MD -MP -MF $(DEPDIR)/gsnap_avx512-stage3hr.Tpo -c -o gsnap_avx512-stage3hr.obj `if test -f 'stage3hr.c'; then $(CYGPATH_W) 'stage3hr.c'; else $(CYGPATH_W) '$(srcdir)/stage3hr.c'; fi` +@am__fastdepCC_TRUE@ $(AM_V_at)$(am__mv) $(DEPDIR)/gsnap_avx512-stage3hr.Tpo $(DEPDIR)/gsnap_avx512-stage3hr.Po +@AMDEP_TRUE@@am__fastdepCC_FALSE@ $(AM_V_CC)source='stage3hr.c' object='gsnap_avx512-stage3hr.obj' libtool=no @AMDEPBACKSLASH@ +@AMDEP_TRUE@@am__fastdepCC_FALSE@ DEPDIR=$(DEPDIR) $(CCDEPMODE) $(depcomp) @AMDEPBACKSLASH@ +@am__fastdepCC_FALSE@ $(AM_V_CC@am__nodep@)$(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) $(gsnap_avx512_CFLAGS) $(CFLAGS) -c -o gsnap_avx512-stage3hr.obj `if test -f 'stage3hr.c'; then $(CYGPATH_W) 'stage3hr.c'; else $(CYGPATH_W) '$(srcdir)/stage3hr.c'; fi` + +gsnap_avx512-spanningelt.o: spanningelt.c +@am__fastdepCC_TRUE@ $(AM_V_CC)$(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) $(gsnap_avx512_CFLAGS) $(CFLAGS) -MT gsnap_avx512-spanningelt.o -MD -MP -MF $(DEPDIR)/gsnap_avx512-spanningelt.Tpo -c -o gsnap_avx512-spanningelt.o `test -f 'spanningelt.c' || echo '$(srcdir)/'`spanningelt.c +@am__fastdepCC_TRUE@ $(AM_V_at)$(am__mv) $(DEPDIR)/gsnap_avx512-spanningelt.Tpo $(DEPDIR)/gsnap_avx512-spanningelt.Po +@AMDEP_TRUE@@am__fastdepCC_FALSE@ $(AM_V_CC)source='spanningelt.c' object='gsnap_avx512-spanningelt.o' libtool=no @AMDEPBACKSLASH@ +@AMDEP_TRUE@@am__fastdepCC_FALSE@ DEPDIR=$(DEPDIR) $(CCDEPMODE) $(depcomp) @AMDEPBACKSLASH@ +@am__fastdepCC_FALSE@ $(AM_V_CC@am__nodep@)$(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) $(gsnap_avx512_CFLAGS) $(CFLAGS) -c -o gsnap_avx512-spanningelt.o `test -f 'spanningelt.c' || echo '$(srcdir)/'`spanningelt.c + +gsnap_avx512-spanningelt.obj: spanningelt.c +@am__fastdepCC_TRUE@ $(AM_V_CC)$(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) $(gsnap_avx512_CFLAGS) $(CFLAGS) -MT gsnap_avx512-spanningelt.obj -MD -MP -MF $(DEPDIR)/gsnap_avx512-spanningelt.Tpo -c -o gsnap_avx512-spanningelt.obj `if test -f 'spanningelt.c'; then $(CYGPATH_W) 'spanningelt.c'; else $(CYGPATH_W) '$(srcdir)/spanningelt.c'; fi` +@am__fastdepCC_TRUE@ $(AM_V_at)$(am__mv) $(DEPDIR)/gsnap_avx512-spanningelt.Tpo $(DEPDIR)/gsnap_avx512-spanningelt.Po +@AMDEP_TRUE@@am__fastdepCC_FALSE@ $(AM_V_CC)source='spanningelt.c' object='gsnap_avx512-spanningelt.obj' libtool=no @AMDEPBACKSLASH@ +@AMDEP_TRUE@@am__fastdepCC_FALSE@ DEPDIR=$(DEPDIR) $(CCDEPMODE) $(depcomp) @AMDEPBACKSLASH@ +@am__fastdepCC_FALSE@ $(AM_V_CC@am__nodep@)$(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) $(gsnap_avx512_CFLAGS) $(CFLAGS) -c -o gsnap_avx512-spanningelt.obj `if test -f 'spanningelt.c'; then $(CYGPATH_W) 'spanningelt.c'; else $(CYGPATH_W) '$(srcdir)/spanningelt.c'; fi` + +gsnap_avx512-cmet.o: cmet.c +@am__fastdepCC_TRUE@ $(AM_V_CC)$(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) $(gsnap_avx512_CFLAGS) $(CFLAGS) -MT gsnap_avx512-cmet.o -MD -MP -MF $(DEPDIR)/gsnap_avx512-cmet.Tpo -c -o gsnap_avx512-cmet.o `test -f 'cmet.c' || echo '$(srcdir)/'`cmet.c +@am__fastdepCC_TRUE@ $(AM_V_at)$(am__mv) $(DEPDIR)/gsnap_avx512-cmet.Tpo $(DEPDIR)/gsnap_avx512-cmet.Po +@AMDEP_TRUE@@am__fastdepCC_FALSE@ $(AM_V_CC)source='cmet.c' object='gsnap_avx512-cmet.o' libtool=no @AMDEPBACKSLASH@ +@AMDEP_TRUE@@am__fastdepCC_FALSE@ DEPDIR=$(DEPDIR) $(CCDEPMODE) $(depcomp) @AMDEPBACKSLASH@ +@am__fastdepCC_FALSE@ $(AM_V_CC@am__nodep@)$(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) $(gsnap_avx512_CFLAGS) $(CFLAGS) -c -o gsnap_avx512-cmet.o `test -f 'cmet.c' || echo '$(srcdir)/'`cmet.c + +gsnap_avx512-cmet.obj: cmet.c +@am__fastdepCC_TRUE@ $(AM_V_CC)$(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) $(gsnap_avx512_CFLAGS) $(CFLAGS) -MT gsnap_avx512-cmet.obj -MD -MP -MF $(DEPDIR)/gsnap_avx512-cmet.Tpo -c -o gsnap_avx512-cmet.obj `if test -f 'cmet.c'; then $(CYGPATH_W) 'cmet.c'; else $(CYGPATH_W) '$(srcdir)/cmet.c'; fi` +@am__fastdepCC_TRUE@ $(AM_V_at)$(am__mv) $(DEPDIR)/gsnap_avx512-cmet.Tpo $(DEPDIR)/gsnap_avx512-cmet.Po +@AMDEP_TRUE@@am__fastdepCC_FALSE@ $(AM_V_CC)source='cmet.c' object='gsnap_avx512-cmet.obj' libtool=no @AMDEPBACKSLASH@ +@AMDEP_TRUE@@am__fastdepCC_FALSE@ DEPDIR=$(DEPDIR) $(CCDEPMODE) $(depcomp) @AMDEPBACKSLASH@ +@am__fastdepCC_FALSE@ $(AM_V_CC@am__nodep@)$(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) $(gsnap_avx512_CFLAGS) $(CFLAGS) -c -o gsnap_avx512-cmet.obj `if test -f 'cmet.c'; then $(CYGPATH_W) 'cmet.c'; else $(CYGPATH_W) '$(srcdir)/cmet.c'; fi` + +gsnap_avx512-atoi.o: atoi.c +@am__fastdepCC_TRUE@ $(AM_V_CC)$(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) $(gsnap_avx512_CFLAGS) $(CFLAGS) -MT gsnap_avx512-atoi.o -MD -MP -MF $(DEPDIR)/gsnap_avx512-atoi.Tpo -c -o gsnap_avx512-atoi.o `test -f 'atoi.c' || echo '$(srcdir)/'`atoi.c +@am__fastdepCC_TRUE@ $(AM_V_at)$(am__mv) $(DEPDIR)/gsnap_avx512-atoi.Tpo $(DEPDIR)/gsnap_avx512-atoi.Po +@AMDEP_TRUE@@am__fastdepCC_FALSE@ $(AM_V_CC)source='atoi.c' object='gsnap_avx512-atoi.o' libtool=no @AMDEPBACKSLASH@ +@AMDEP_TRUE@@am__fastdepCC_FALSE@ DEPDIR=$(DEPDIR) $(CCDEPMODE) $(depcomp) @AMDEPBACKSLASH@ +@am__fastdepCC_FALSE@ $(AM_V_CC@am__nodep@)$(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) $(gsnap_avx512_CFLAGS) $(CFLAGS) -c -o gsnap_avx512-atoi.o `test -f 'atoi.c' || echo '$(srcdir)/'`atoi.c + +gsnap_avx512-atoi.obj: atoi.c +@am__fastdepCC_TRUE@ $(AM_V_CC)$(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) $(gsnap_avx512_CFLAGS) $(CFLAGS) -MT gsnap_avx512-atoi.obj -MD -MP -MF $(DEPDIR)/gsnap_avx512-atoi.Tpo -c -o gsnap_avx512-atoi.obj `if test -f 'atoi.c'; then $(CYGPATH_W) 'atoi.c'; else $(CYGPATH_W) '$(srcdir)/atoi.c'; fi` +@am__fastdepCC_TRUE@ $(AM_V_at)$(am__mv) $(DEPDIR)/gsnap_avx512-atoi.Tpo $(DEPDIR)/gsnap_avx512-atoi.Po +@AMDEP_TRUE@@am__fastdepCC_FALSE@ $(AM_V_CC)source='atoi.c' object='gsnap_avx512-atoi.obj' libtool=no @AMDEPBACKSLASH@ +@AMDEP_TRUE@@am__fastdepCC_FALSE@ DEPDIR=$(DEPDIR) $(CCDEPMODE) $(depcomp) @AMDEPBACKSLASH@ +@am__fastdepCC_FALSE@ $(AM_V_CC@am__nodep@)$(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) $(gsnap_avx512_CFLAGS) $(CFLAGS) -c -o gsnap_avx512-atoi.obj `if test -f 'atoi.c'; then $(CYGPATH_W) 'atoi.c'; else $(CYGPATH_W) '$(srcdir)/atoi.c'; fi` + +gsnap_avx512-maxent.o: maxent.c +@am__fastdepCC_TRUE@ $(AM_V_CC)$(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) $(gsnap_avx512_CFLAGS) $(CFLAGS) -MT gsnap_avx512-maxent.o -MD -MP -MF $(DEPDIR)/gsnap_avx512-maxent.Tpo -c -o gsnap_avx512-maxent.o `test -f 'maxent.c' || echo '$(srcdir)/'`maxent.c +@am__fastdepCC_TRUE@ $(AM_V_at)$(am__mv) $(DEPDIR)/gsnap_avx512-maxent.Tpo $(DEPDIR)/gsnap_avx512-maxent.Po +@AMDEP_TRUE@@am__fastdepCC_FALSE@ $(AM_V_CC)source='maxent.c' object='gsnap_avx512-maxent.o' libtool=no @AMDEPBACKSLASH@ +@AMDEP_TRUE@@am__fastdepCC_FALSE@ DEPDIR=$(DEPDIR) $(CCDEPMODE) $(depcomp) @AMDEPBACKSLASH@ +@am__fastdepCC_FALSE@ $(AM_V_CC@am__nodep@)$(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) $(gsnap_avx512_CFLAGS) $(CFLAGS) -c -o gsnap_avx512-maxent.o `test -f 'maxent.c' || echo '$(srcdir)/'`maxent.c + +gsnap_avx512-maxent.obj: maxent.c +@am__fastdepCC_TRUE@ $(AM_V_CC)$(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) $(gsnap_avx512_CFLAGS) $(CFLAGS) -MT gsnap_avx512-maxent.obj -MD -MP -MF $(DEPDIR)/gsnap_avx512-maxent.Tpo -c -o gsnap_avx512-maxent.obj `if test -f 'maxent.c'; then $(CYGPATH_W) 'maxent.c'; else $(CYGPATH_W) '$(srcdir)/maxent.c'; fi` +@am__fastdepCC_TRUE@ $(AM_V_at)$(am__mv) $(DEPDIR)/gsnap_avx512-maxent.Tpo $(DEPDIR)/gsnap_avx512-maxent.Po +@AMDEP_TRUE@@am__fastdepCC_FALSE@ $(AM_V_CC)source='maxent.c' object='gsnap_avx512-maxent.obj' libtool=no @AMDEPBACKSLASH@ +@AMDEP_TRUE@@am__fastdepCC_FALSE@ DEPDIR=$(DEPDIR) $(CCDEPMODE) $(depcomp) @AMDEPBACKSLASH@ +@am__fastdepCC_FALSE@ $(AM_V_CC@am__nodep@)$(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) $(gsnap_avx512_CFLAGS) $(CFLAGS) -c -o gsnap_avx512-maxent.obj `if test -f 'maxent.c'; then $(CYGPATH_W) 'maxent.c'; else $(CYGPATH_W) '$(srcdir)/maxent.c'; fi` + +gsnap_avx512-pair.o: pair.c +@am__fastdepCC_TRUE@ $(AM_V_CC)$(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) $(gsnap_avx512_CFLAGS) $(CFLAGS) -MT gsnap_avx512-pair.o -MD -MP -MF $(DEPDIR)/gsnap_avx512-pair.Tpo -c -o gsnap_avx512-pair.o `test -f 'pair.c' || echo '$(srcdir)/'`pair.c +@am__fastdepCC_TRUE@ $(AM_V_at)$(am__mv) $(DEPDIR)/gsnap_avx512-pair.Tpo $(DEPDIR)/gsnap_avx512-pair.Po +@AMDEP_TRUE@@am__fastdepCC_FALSE@ $(AM_V_CC)source='pair.c' object='gsnap_avx512-pair.o' libtool=no @AMDEPBACKSLASH@ +@AMDEP_TRUE@@am__fastdepCC_FALSE@ DEPDIR=$(DEPDIR) $(CCDEPMODE) $(depcomp) @AMDEPBACKSLASH@ +@am__fastdepCC_FALSE@ $(AM_V_CC@am__nodep@)$(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) $(gsnap_avx512_CFLAGS) $(CFLAGS) -c -o gsnap_avx512-pair.o `test -f 'pair.c' || echo '$(srcdir)/'`pair.c + +gsnap_avx512-pair.obj: pair.c +@am__fastdepCC_TRUE@ $(AM_V_CC)$(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) $(gsnap_avx512_CFLAGS) $(CFLAGS) -MT gsnap_avx512-pair.obj -MD -MP -MF $(DEPDIR)/gsnap_avx512-pair.Tpo -c -o gsnap_avx512-pair.obj `if test -f 'pair.c'; then $(CYGPATH_W) 'pair.c'; else $(CYGPATH_W) '$(srcdir)/pair.c'; fi` +@am__fastdepCC_TRUE@ $(AM_V_at)$(am__mv) $(DEPDIR)/gsnap_avx512-pair.Tpo $(DEPDIR)/gsnap_avx512-pair.Po +@AMDEP_TRUE@@am__fastdepCC_FALSE@ $(AM_V_CC)source='pair.c' object='gsnap_avx512-pair.obj' libtool=no @AMDEPBACKSLASH@ +@AMDEP_TRUE@@am__fastdepCC_FALSE@ DEPDIR=$(DEPDIR) $(CCDEPMODE) $(depcomp) @AMDEPBACKSLASH@ +@am__fastdepCC_FALSE@ $(AM_V_CC@am__nodep@)$(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) $(gsnap_avx512_CFLAGS) $(CFLAGS) -c -o gsnap_avx512-pair.obj `if test -f 'pair.c'; then $(CYGPATH_W) 'pair.c'; else $(CYGPATH_W) '$(srcdir)/pair.c'; fi` + +gsnap_avx512-pairpool.o: pairpool.c +@am__fastdepCC_TRUE@ $(AM_V_CC)$(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) $(gsnap_avx512_CFLAGS) $(CFLAGS) -MT gsnap_avx512-pairpool.o -MD -MP -MF $(DEPDIR)/gsnap_avx512-pairpool.Tpo -c -o gsnap_avx512-pairpool.o `test -f 'pairpool.c' || echo '$(srcdir)/'`pairpool.c +@am__fastdepCC_TRUE@ $(AM_V_at)$(am__mv) $(DEPDIR)/gsnap_avx512-pairpool.Tpo $(DEPDIR)/gsnap_avx512-pairpool.Po +@AMDEP_TRUE@@am__fastdepCC_FALSE@ $(AM_V_CC)source='pairpool.c' object='gsnap_avx512-pairpool.o' libtool=no @AMDEPBACKSLASH@ +@AMDEP_TRUE@@am__fastdepCC_FALSE@ DEPDIR=$(DEPDIR) $(CCDEPMODE) $(depcomp) @AMDEPBACKSLASH@ +@am__fastdepCC_FALSE@ $(AM_V_CC@am__nodep@)$(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) $(gsnap_avx512_CFLAGS) $(CFLAGS) -c -o gsnap_avx512-pairpool.o `test -f 'pairpool.c' || echo '$(srcdir)/'`pairpool.c + +gsnap_avx512-pairpool.obj: pairpool.c +@am__fastdepCC_TRUE@ $(AM_V_CC)$(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) $(gsnap_avx512_CFLAGS) $(CFLAGS) -MT gsnap_avx512-pairpool.obj -MD -MP -MF $(DEPDIR)/gsnap_avx512-pairpool.Tpo -c -o gsnap_avx512-pairpool.obj `if test -f 'pairpool.c'; then $(CYGPATH_W) 'pairpool.c'; else $(CYGPATH_W) '$(srcdir)/pairpool.c'; fi` +@am__fastdepCC_TRUE@ $(AM_V_at)$(am__mv) $(DEPDIR)/gsnap_avx512-pairpool.Tpo $(DEPDIR)/gsnap_avx512-pairpool.Po +@AMDEP_TRUE@@am__fastdepCC_FALSE@ $(AM_V_CC)source='pairpool.c' object='gsnap_avx512-pairpool.obj' libtool=no @AMDEPBACKSLASH@ +@AMDEP_TRUE@@am__fastdepCC_FALSE@ DEPDIR=$(DEPDIR) $(CCDEPMODE) $(depcomp) @AMDEPBACKSLASH@ +@am__fastdepCC_FALSE@ $(AM_V_CC@am__nodep@)$(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) $(gsnap_avx512_CFLAGS) $(CFLAGS) -c -o gsnap_avx512-pairpool.obj `if test -f 'pairpool.c'; then $(CYGPATH_W) 'pairpool.c'; else $(CYGPATH_W) '$(srcdir)/pairpool.c'; fi` + +gsnap_avx512-diag.o: diag.c +@am__fastdepCC_TRUE@ $(AM_V_CC)$(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) $(gsnap_avx512_CFLAGS) $(CFLAGS) -MT gsnap_avx512-diag.o -MD -MP -MF $(DEPDIR)/gsnap_avx512-diag.Tpo -c -o gsnap_avx512-diag.o `test -f 'diag.c' || echo '$(srcdir)/'`diag.c +@am__fastdepCC_TRUE@ $(AM_V_at)$(am__mv) $(DEPDIR)/gsnap_avx512-diag.Tpo $(DEPDIR)/gsnap_avx512-diag.Po +@AMDEP_TRUE@@am__fastdepCC_FALSE@ $(AM_V_CC)source='diag.c' object='gsnap_avx512-diag.o' libtool=no @AMDEPBACKSLASH@ +@AMDEP_TRUE@@am__fastdepCC_FALSE@ DEPDIR=$(DEPDIR) $(CCDEPMODE) $(depcomp) @AMDEPBACKSLASH@ +@am__fastdepCC_FALSE@ $(AM_V_CC@am__nodep@)$(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) $(gsnap_avx512_CFLAGS) $(CFLAGS) -c -o gsnap_avx512-diag.o `test -f 'diag.c' || echo '$(srcdir)/'`diag.c + +gsnap_avx512-diag.obj: diag.c +@am__fastdepCC_TRUE@ $(AM_V_CC)$(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) $(gsnap_avx512_CFLAGS) $(CFLAGS) -MT gsnap_avx512-diag.obj -MD -MP -MF $(DEPDIR)/gsnap_avx512-diag.Tpo -c -o gsnap_avx512-diag.obj `if test -f 'diag.c'; then $(CYGPATH_W) 'diag.c'; else $(CYGPATH_W) '$(srcdir)/diag.c'; fi` +@am__fastdepCC_TRUE@ $(AM_V_at)$(am__mv) $(DEPDIR)/gsnap_avx512-diag.Tpo $(DEPDIR)/gsnap_avx512-diag.Po +@AMDEP_TRUE@@am__fastdepCC_FALSE@ $(AM_V_CC)source='diag.c' object='gsnap_avx512-diag.obj' libtool=no @AMDEPBACKSLASH@ +@AMDEP_TRUE@@am__fastdepCC_FALSE@ DEPDIR=$(DEPDIR) $(CCDEPMODE) $(depcomp) @AMDEPBACKSLASH@ +@am__fastdepCC_FALSE@ $(AM_V_CC@am__nodep@)$(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) $(gsnap_avx512_CFLAGS) $(CFLAGS) -c -o gsnap_avx512-diag.obj `if test -f 'diag.c'; then $(CYGPATH_W) 'diag.c'; else $(CYGPATH_W) '$(srcdir)/diag.c'; fi` + +gsnap_avx512-diagpool.o: diagpool.c +@am__fastdepCC_TRUE@ $(AM_V_CC)$(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) $(gsnap_avx512_CFLAGS) $(CFLAGS) -MT gsnap_avx512-diagpool.o -MD -MP -MF $(DEPDIR)/gsnap_avx512-diagpool.Tpo -c -o gsnap_avx512-diagpool.o `test -f 'diagpool.c' || echo '$(srcdir)/'`diagpool.c +@am__fastdepCC_TRUE@ $(AM_V_at)$(am__mv) $(DEPDIR)/gsnap_avx512-diagpool.Tpo $(DEPDIR)/gsnap_avx512-diagpool.Po +@AMDEP_TRUE@@am__fastdepCC_FALSE@ $(AM_V_CC)source='diagpool.c' object='gsnap_avx512-diagpool.o' libtool=no @AMDEPBACKSLASH@ +@AMDEP_TRUE@@am__fastdepCC_FALSE@ DEPDIR=$(DEPDIR) $(CCDEPMODE) $(depcomp) @AMDEPBACKSLASH@ +@am__fastdepCC_FALSE@ $(AM_V_CC@am__nodep@)$(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) $(gsnap_avx512_CFLAGS) $(CFLAGS) -c -o gsnap_avx512-diagpool.o `test -f 'diagpool.c' || echo '$(srcdir)/'`diagpool.c + +gsnap_avx512-diagpool.obj: diagpool.c +@am__fastdepCC_TRUE@ $(AM_V_CC)$(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) $(gsnap_avx512_CFLAGS) $(CFLAGS) -MT gsnap_avx512-diagpool.obj -MD -MP -MF $(DEPDIR)/gsnap_avx512-diagpool.Tpo -c -o gsnap_avx512-diagpool.obj `if test -f 'diagpool.c'; then $(CYGPATH_W) 'diagpool.c'; else $(CYGPATH_W) '$(srcdir)/diagpool.c'; fi` +@am__fastdepCC_TRUE@ $(AM_V_at)$(am__mv) $(DEPDIR)/gsnap_avx512-diagpool.Tpo $(DEPDIR)/gsnap_avx512-diagpool.Po +@AMDEP_TRUE@@am__fastdepCC_FALSE@ $(AM_V_CC)source='diagpool.c' object='gsnap_avx512-diagpool.obj' libtool=no @AMDEPBACKSLASH@ +@AMDEP_TRUE@@am__fastdepCC_FALSE@ DEPDIR=$(DEPDIR) $(CCDEPMODE) $(depcomp) @AMDEPBACKSLASH@ +@am__fastdepCC_FALSE@ $(AM_V_CC@am__nodep@)$(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) $(gsnap_avx512_CFLAGS) $(CFLAGS) -c -o gsnap_avx512-diagpool.obj `if test -f 'diagpool.c'; then $(CYGPATH_W) 'diagpool.c'; else $(CYGPATH_W) '$(srcdir)/diagpool.c'; fi` + +gsnap_avx512-orderstat.o: orderstat.c +@am__fastdepCC_TRUE@ $(AM_V_CC)$(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) $(gsnap_avx512_CFLAGS) $(CFLAGS) -MT gsnap_avx512-orderstat.o -MD -MP -MF $(DEPDIR)/gsnap_avx512-orderstat.Tpo -c -o gsnap_avx512-orderstat.o `test -f 'orderstat.c' || echo '$(srcdir)/'`orderstat.c +@am__fastdepCC_TRUE@ $(AM_V_at)$(am__mv) $(DEPDIR)/gsnap_avx512-orderstat.Tpo $(DEPDIR)/gsnap_avx512-orderstat.Po +@AMDEP_TRUE@@am__fastdepCC_FALSE@ $(AM_V_CC)source='orderstat.c' object='gsnap_avx512-orderstat.o' libtool=no @AMDEPBACKSLASH@ +@AMDEP_TRUE@@am__fastdepCC_FALSE@ DEPDIR=$(DEPDIR) $(CCDEPMODE) $(depcomp) @AMDEPBACKSLASH@ +@am__fastdepCC_FALSE@ $(AM_V_CC@am__nodep@)$(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) $(gsnap_avx512_CFLAGS) $(CFLAGS) -c -o gsnap_avx512-orderstat.o `test -f 'orderstat.c' || echo '$(srcdir)/'`orderstat.c + +gsnap_avx512-orderstat.obj: orderstat.c +@am__fastdepCC_TRUE@ $(AM_V_CC)$(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) $(gsnap_avx512_CFLAGS) $(CFLAGS) -MT gsnap_avx512-orderstat.obj -MD -MP -MF $(DEPDIR)/gsnap_avx512-orderstat.Tpo -c -o gsnap_avx512-orderstat.obj `if test -f 'orderstat.c'; then $(CYGPATH_W) 'orderstat.c'; else $(CYGPATH_W) '$(srcdir)/orderstat.c'; fi` +@am__fastdepCC_TRUE@ $(AM_V_at)$(am__mv) $(DEPDIR)/gsnap_avx512-orderstat.Tpo $(DEPDIR)/gsnap_avx512-orderstat.Po +@AMDEP_TRUE@@am__fastdepCC_FALSE@ $(AM_V_CC)source='orderstat.c' object='gsnap_avx512-orderstat.obj' libtool=no @AMDEPBACKSLASH@ +@AMDEP_TRUE@@am__fastdepCC_FALSE@ DEPDIR=$(DEPDIR) $(CCDEPMODE) $(depcomp) @AMDEPBACKSLASH@ +@am__fastdepCC_FALSE@ $(AM_V_CC@am__nodep@)$(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) $(gsnap_avx512_CFLAGS) $(CFLAGS) -c -o gsnap_avx512-orderstat.obj `if test -f 'orderstat.c'; then $(CYGPATH_W) 'orderstat.c'; else $(CYGPATH_W) '$(srcdir)/orderstat.c'; fi` + +gsnap_avx512-oligoindex_hr.o: oligoindex_hr.c +@am__fastdepCC_TRUE@ $(AM_V_CC)$(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) $(gsnap_avx512_CFLAGS) $(CFLAGS) -MT gsnap_avx512-oligoindex_hr.o -MD -MP -MF $(DEPDIR)/gsnap_avx512-oligoindex_hr.Tpo -c -o gsnap_avx512-oligoindex_hr.o `test -f 'oligoindex_hr.c' || echo '$(srcdir)/'`oligoindex_hr.c +@am__fastdepCC_TRUE@ $(AM_V_at)$(am__mv) $(DEPDIR)/gsnap_avx512-oligoindex_hr.Tpo $(DEPDIR)/gsnap_avx512-oligoindex_hr.Po +@AMDEP_TRUE@@am__fastdepCC_FALSE@ $(AM_V_CC)source='oligoindex_hr.c' object='gsnap_avx512-oligoindex_hr.o' libtool=no @AMDEPBACKSLASH@ +@AMDEP_TRUE@@am__fastdepCC_FALSE@ DEPDIR=$(DEPDIR) $(CCDEPMODE) $(depcomp) @AMDEPBACKSLASH@ +@am__fastdepCC_FALSE@ $(AM_V_CC@am__nodep@)$(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) $(gsnap_avx512_CFLAGS) $(CFLAGS) -c -o gsnap_avx512-oligoindex_hr.o `test -f 'oligoindex_hr.c' || echo '$(srcdir)/'`oligoindex_hr.c + +gsnap_avx512-oligoindex_hr.obj: oligoindex_hr.c +@am__fastdepCC_TRUE@ $(AM_V_CC)$(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) $(gsnap_avx512_CFLAGS) $(CFLAGS) -MT gsnap_avx512-oligoindex_hr.obj -MD -MP -MF $(DEPDIR)/gsnap_avx512-oligoindex_hr.Tpo -c -o gsnap_avx512-oligoindex_hr.obj `if test -f 'oligoindex_hr.c'; then $(CYGPATH_W) 'oligoindex_hr.c'; else $(CYGPATH_W) '$(srcdir)/oligoindex_hr.c'; fi` +@am__fastdepCC_TRUE@ $(AM_V_at)$(am__mv) $(DEPDIR)/gsnap_avx512-oligoindex_hr.Tpo $(DEPDIR)/gsnap_avx512-oligoindex_hr.Po +@AMDEP_TRUE@@am__fastdepCC_FALSE@ $(AM_V_CC)source='oligoindex_hr.c' object='gsnap_avx512-oligoindex_hr.obj' libtool=no @AMDEPBACKSLASH@ +@AMDEP_TRUE@@am__fastdepCC_FALSE@ DEPDIR=$(DEPDIR) $(CCDEPMODE) $(depcomp) @AMDEPBACKSLASH@ +@am__fastdepCC_FALSE@ $(AM_V_CC@am__nodep@)$(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) $(gsnap_avx512_CFLAGS) $(CFLAGS) -c -o gsnap_avx512-oligoindex_hr.obj `if test -f 'oligoindex_hr.c'; then $(CYGPATH_W) 'oligoindex_hr.c'; else $(CYGPATH_W) '$(srcdir)/oligoindex_hr.c'; fi` + +gsnap_avx512-cellpool.o: cellpool.c +@am__fastdepCC_TRUE@ $(AM_V_CC)$(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) $(gsnap_avx512_CFLAGS) $(CFLAGS) -MT gsnap_avx512-cellpool.o -MD -MP -MF $(DEPDIR)/gsnap_avx512-cellpool.Tpo -c -o gsnap_avx512-cellpool.o `test -f 'cellpool.c' || echo '$(srcdir)/'`cellpool.c +@am__fastdepCC_TRUE@ $(AM_V_at)$(am__mv) $(DEPDIR)/gsnap_avx512-cellpool.Tpo $(DEPDIR)/gsnap_avx512-cellpool.Po +@AMDEP_TRUE@@am__fastdepCC_FALSE@ $(AM_V_CC)source='cellpool.c' object='gsnap_avx512-cellpool.o' libtool=no @AMDEPBACKSLASH@ +@AMDEP_TRUE@@am__fastdepCC_FALSE@ DEPDIR=$(DEPDIR) $(CCDEPMODE) $(depcomp) @AMDEPBACKSLASH@ +@am__fastdepCC_FALSE@ $(AM_V_CC@am__nodep@)$(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) $(gsnap_avx512_CFLAGS) $(CFLAGS) -c -o gsnap_avx512-cellpool.o `test -f 'cellpool.c' || echo '$(srcdir)/'`cellpool.c + +gsnap_avx512-cellpool.obj: cellpool.c +@am__fastdepCC_TRUE@ $(AM_V_CC)$(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) $(gsnap_avx512_CFLAGS) $(CFLAGS) -MT gsnap_avx512-cellpool.obj -MD -MP -MF $(DEPDIR)/gsnap_avx512-cellpool.Tpo -c -o gsnap_avx512-cellpool.obj `if test -f 'cellpool.c'; then $(CYGPATH_W) 'cellpool.c'; else $(CYGPATH_W) '$(srcdir)/cellpool.c'; fi` +@am__fastdepCC_TRUE@ $(AM_V_at)$(am__mv) $(DEPDIR)/gsnap_avx512-cellpool.Tpo $(DEPDIR)/gsnap_avx512-cellpool.Po +@AMDEP_TRUE@@am__fastdepCC_FALSE@ $(AM_V_CC)source='cellpool.c' object='gsnap_avx512-cellpool.obj' libtool=no @AMDEPBACKSLASH@ +@AMDEP_TRUE@@am__fastdepCC_FALSE@ DEPDIR=$(DEPDIR) $(CCDEPMODE) $(depcomp) @AMDEPBACKSLASH@ +@am__fastdepCC_FALSE@ $(AM_V_CC@am__nodep@)$(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) $(gsnap_avx512_CFLAGS) $(CFLAGS) -c -o gsnap_avx512-cellpool.obj `if test -f 'cellpool.c'; then $(CYGPATH_W) 'cellpool.c'; else $(CYGPATH_W) '$(srcdir)/cellpool.c'; fi` + +gsnap_avx512-stage2.o: stage2.c +@am__fastdepCC_TRUE@ $(AM_V_CC)$(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) $(gsnap_avx512_CFLAGS) $(CFLAGS) -MT gsnap_avx512-stage2.o -MD -MP -MF $(DEPDIR)/gsnap_avx512-stage2.Tpo -c -o gsnap_avx512-stage2.o `test -f 'stage2.c' || echo '$(srcdir)/'`stage2.c +@am__fastdepCC_TRUE@ $(AM_V_at)$(am__mv) $(DEPDIR)/gsnap_avx512-stage2.Tpo $(DEPDIR)/gsnap_avx512-stage2.Po +@AMDEP_TRUE@@am__fastdepCC_FALSE@ $(AM_V_CC)source='stage2.c' object='gsnap_avx512-stage2.o' libtool=no @AMDEPBACKSLASH@ +@AMDEP_TRUE@@am__fastdepCC_FALSE@ DEPDIR=$(DEPDIR) $(CCDEPMODE) $(depcomp) @AMDEPBACKSLASH@ +@am__fastdepCC_FALSE@ $(AM_V_CC@am__nodep@)$(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) $(gsnap_avx512_CFLAGS) $(CFLAGS) -c -o gsnap_avx512-stage2.o `test -f 'stage2.c' || echo '$(srcdir)/'`stage2.c + +gsnap_avx512-stage2.obj: stage2.c +@am__fastdepCC_TRUE@ $(AM_V_CC)$(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) $(gsnap_avx512_CFLAGS) $(CFLAGS) -MT gsnap_avx512-stage2.obj -MD -MP -MF $(DEPDIR)/gsnap_avx512-stage2.Tpo -c -o gsnap_avx512-stage2.obj `if test -f 'stage2.c'; then $(CYGPATH_W) 'stage2.c'; else $(CYGPATH_W) '$(srcdir)/stage2.c'; fi` +@am__fastdepCC_TRUE@ $(AM_V_at)$(am__mv) $(DEPDIR)/gsnap_avx512-stage2.Tpo $(DEPDIR)/gsnap_avx512-stage2.Po +@AMDEP_TRUE@@am__fastdepCC_FALSE@ $(AM_V_CC)source='stage2.c' object='gsnap_avx512-stage2.obj' libtool=no @AMDEPBACKSLASH@ +@AMDEP_TRUE@@am__fastdepCC_FALSE@ DEPDIR=$(DEPDIR) $(CCDEPMODE) $(depcomp) @AMDEPBACKSLASH@ +@am__fastdepCC_FALSE@ $(AM_V_CC@am__nodep@)$(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) $(gsnap_avx512_CFLAGS) $(CFLAGS) -c -o gsnap_avx512-stage2.obj `if test -f 'stage2.c'; then $(CYGPATH_W) 'stage2.c'; else $(CYGPATH_W) '$(srcdir)/stage2.c'; fi` + +gsnap_avx512-intron.o: intron.c +@am__fastdepCC_TRUE@ $(AM_V_CC)$(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) $(gsnap_avx512_CFLAGS) $(CFLAGS) -MT gsnap_avx512-intron.o -MD -MP -MF $(DEPDIR)/gsnap_avx512-intron.Tpo -c -o gsnap_avx512-intron.o `test -f 'intron.c' || echo '$(srcdir)/'`intron.c +@am__fastdepCC_TRUE@ $(AM_V_at)$(am__mv) $(DEPDIR)/gsnap_avx512-intron.Tpo $(DEPDIR)/gsnap_avx512-intron.Po +@AMDEP_TRUE@@am__fastdepCC_FALSE@ $(AM_V_CC)source='intron.c' object='gsnap_avx512-intron.o' libtool=no @AMDEPBACKSLASH@ +@AMDEP_TRUE@@am__fastdepCC_FALSE@ DEPDIR=$(DEPDIR) $(CCDEPMODE) $(depcomp) @AMDEPBACKSLASH@ +@am__fastdepCC_FALSE@ $(AM_V_CC@am__nodep@)$(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) $(gsnap_avx512_CFLAGS) $(CFLAGS) -c -o gsnap_avx512-intron.o `test -f 'intron.c' || echo '$(srcdir)/'`intron.c + +gsnap_avx512-intron.obj: intron.c +@am__fastdepCC_TRUE@ $(AM_V_CC)$(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) $(gsnap_avx512_CFLAGS) $(CFLAGS) -MT gsnap_avx512-intron.obj -MD -MP -MF $(DEPDIR)/gsnap_avx512-intron.Tpo -c -o gsnap_avx512-intron.obj `if test -f 'intron.c'; then $(CYGPATH_W) 'intron.c'; else $(CYGPATH_W) '$(srcdir)/intron.c'; fi` +@am__fastdepCC_TRUE@ $(AM_V_at)$(am__mv) $(DEPDIR)/gsnap_avx512-intron.Tpo $(DEPDIR)/gsnap_avx512-intron.Po +@AMDEP_TRUE@@am__fastdepCC_FALSE@ $(AM_V_CC)source='intron.c' object='gsnap_avx512-intron.obj' libtool=no @AMDEPBACKSLASH@ +@AMDEP_TRUE@@am__fastdepCC_FALSE@ DEPDIR=$(DEPDIR) $(CCDEPMODE) $(depcomp) @AMDEPBACKSLASH@ +@am__fastdepCC_FALSE@ $(AM_V_CC@am__nodep@)$(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) $(gsnap_avx512_CFLAGS) $(CFLAGS) -c -o gsnap_avx512-intron.obj `if test -f 'intron.c'; then $(CYGPATH_W) 'intron.c'; else $(CYGPATH_W) '$(srcdir)/intron.c'; fi` + +gsnap_avx512-boyer-moore.o: boyer-moore.c +@am__fastdepCC_TRUE@ $(AM_V_CC)$(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) $(gsnap_avx512_CFLAGS) $(CFLAGS) -MT gsnap_avx512-boyer-moore.o -MD -MP -MF $(DEPDIR)/gsnap_avx512-boyer-moore.Tpo -c -o gsnap_avx512-boyer-moore.o `test -f 'boyer-moore.c' || echo '$(srcdir)/'`boyer-moore.c +@am__fastdepCC_TRUE@ $(AM_V_at)$(am__mv) $(DEPDIR)/gsnap_avx512-boyer-moore.Tpo $(DEPDIR)/gsnap_avx512-boyer-moore.Po +@AMDEP_TRUE@@am__fastdepCC_FALSE@ $(AM_V_CC)source='boyer-moore.c' object='gsnap_avx512-boyer-moore.o' libtool=no @AMDEPBACKSLASH@ +@AMDEP_TRUE@@am__fastdepCC_FALSE@ DEPDIR=$(DEPDIR) $(CCDEPMODE) $(depcomp) @AMDEPBACKSLASH@ +@am__fastdepCC_FALSE@ $(AM_V_CC@am__nodep@)$(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) $(gsnap_avx512_CFLAGS) $(CFLAGS) -c -o gsnap_avx512-boyer-moore.o `test -f 'boyer-moore.c' || echo '$(srcdir)/'`boyer-moore.c + +gsnap_avx512-boyer-moore.obj: boyer-moore.c +@am__fastdepCC_TRUE@ $(AM_V_CC)$(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) $(gsnap_avx512_CFLAGS) $(CFLAGS) -MT gsnap_avx512-boyer-moore.obj -MD -MP -MF $(DEPDIR)/gsnap_avx512-boyer-moore.Tpo -c -o gsnap_avx512-boyer-moore.obj `if test -f 'boyer-moore.c'; then $(CYGPATH_W) 'boyer-moore.c'; else $(CYGPATH_W) '$(srcdir)/boyer-moore.c'; fi` +@am__fastdepCC_TRUE@ $(AM_V_at)$(am__mv) $(DEPDIR)/gsnap_avx512-boyer-moore.Tpo $(DEPDIR)/gsnap_avx512-boyer-moore.Po +@AMDEP_TRUE@@am__fastdepCC_FALSE@ $(AM_V_CC)source='boyer-moore.c' object='gsnap_avx512-boyer-moore.obj' libtool=no @AMDEPBACKSLASH@ +@AMDEP_TRUE@@am__fastdepCC_FALSE@ DEPDIR=$(DEPDIR) $(CCDEPMODE) $(depcomp) @AMDEPBACKSLASH@ +@am__fastdepCC_FALSE@ $(AM_V_CC@am__nodep@)$(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) $(gsnap_avx512_CFLAGS) $(CFLAGS) -c -o gsnap_avx512-boyer-moore.obj `if test -f 'boyer-moore.c'; then $(CYGPATH_W) 'boyer-moore.c'; else $(CYGPATH_W) '$(srcdir)/boyer-moore.c'; fi` + +gsnap_avx512-changepoint.o: changepoint.c +@am__fastdepCC_TRUE@ $(AM_V_CC)$(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) $(gsnap_avx512_CFLAGS) $(CFLAGS) -MT gsnap_avx512-changepoint.o -MD -MP -MF $(DEPDIR)/gsnap_avx512-changepoint.Tpo -c -o gsnap_avx512-changepoint.o `test -f 'changepoint.c' || echo '$(srcdir)/'`changepoint.c +@am__fastdepCC_TRUE@ $(AM_V_at)$(am__mv) $(DEPDIR)/gsnap_avx512-changepoint.Tpo $(DEPDIR)/gsnap_avx512-changepoint.Po +@AMDEP_TRUE@@am__fastdepCC_FALSE@ $(AM_V_CC)source='changepoint.c' object='gsnap_avx512-changepoint.o' libtool=no @AMDEPBACKSLASH@ +@AMDEP_TRUE@@am__fastdepCC_FALSE@ DEPDIR=$(DEPDIR) $(CCDEPMODE) $(depcomp) @AMDEPBACKSLASH@ +@am__fastdepCC_FALSE@ $(AM_V_CC@am__nodep@)$(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) $(gsnap_avx512_CFLAGS) $(CFLAGS) -c -o gsnap_avx512-changepoint.o `test -f 'changepoint.c' || echo '$(srcdir)/'`changepoint.c + +gsnap_avx512-changepoint.obj: changepoint.c +@am__fastdepCC_TRUE@ $(AM_V_CC)$(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) $(gsnap_avx512_CFLAGS) $(CFLAGS) -MT gsnap_avx512-changepoint.obj -MD -MP -MF $(DEPDIR)/gsnap_avx512-changepoint.Tpo -c -o gsnap_avx512-changepoint.obj `if test -f 'changepoint.c'; then $(CYGPATH_W) 'changepoint.c'; else $(CYGPATH_W) '$(srcdir)/changepoint.c'; fi` +@am__fastdepCC_TRUE@ $(AM_V_at)$(am__mv) $(DEPDIR)/gsnap_avx512-changepoint.Tpo $(DEPDIR)/gsnap_avx512-changepoint.Po +@AMDEP_TRUE@@am__fastdepCC_FALSE@ $(AM_V_CC)source='changepoint.c' object='gsnap_avx512-changepoint.obj' libtool=no @AMDEPBACKSLASH@ +@AMDEP_TRUE@@am__fastdepCC_FALSE@ DEPDIR=$(DEPDIR) $(CCDEPMODE) $(depcomp) @AMDEPBACKSLASH@ +@am__fastdepCC_FALSE@ $(AM_V_CC@am__nodep@)$(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) $(gsnap_avx512_CFLAGS) $(CFLAGS) -c -o gsnap_avx512-changepoint.obj `if test -f 'changepoint.c'; then $(CYGPATH_W) 'changepoint.c'; else $(CYGPATH_W) '$(srcdir)/changepoint.c'; fi` + +gsnap_avx512-pbinom.o: pbinom.c +@am__fastdepCC_TRUE@ $(AM_V_CC)$(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) $(gsnap_avx512_CFLAGS) $(CFLAGS) -MT gsnap_avx512-pbinom.o -MD -MP -MF $(DEPDIR)/gsnap_avx512-pbinom.Tpo -c -o gsnap_avx512-pbinom.o `test -f 'pbinom.c' || echo '$(srcdir)/'`pbinom.c +@am__fastdepCC_TRUE@ $(AM_V_at)$(am__mv) $(DEPDIR)/gsnap_avx512-pbinom.Tpo $(DEPDIR)/gsnap_avx512-pbinom.Po +@AMDEP_TRUE@@am__fastdepCC_FALSE@ $(AM_V_CC)source='pbinom.c' object='gsnap_avx512-pbinom.o' libtool=no @AMDEPBACKSLASH@ +@AMDEP_TRUE@@am__fastdepCC_FALSE@ DEPDIR=$(DEPDIR) $(CCDEPMODE) $(depcomp) @AMDEPBACKSLASH@ +@am__fastdepCC_FALSE@ $(AM_V_CC@am__nodep@)$(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) $(gsnap_avx512_CFLAGS) $(CFLAGS) -c -o gsnap_avx512-pbinom.o `test -f 'pbinom.c' || echo '$(srcdir)/'`pbinom.c + +gsnap_avx512-pbinom.obj: pbinom.c +@am__fastdepCC_TRUE@ $(AM_V_CC)$(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) $(gsnap_avx512_CFLAGS) $(CFLAGS) -MT gsnap_avx512-pbinom.obj -MD -MP -MF $(DEPDIR)/gsnap_avx512-pbinom.Tpo -c -o gsnap_avx512-pbinom.obj `if test -f 'pbinom.c'; then $(CYGPATH_W) 'pbinom.c'; else $(CYGPATH_W) '$(srcdir)/pbinom.c'; fi` +@am__fastdepCC_TRUE@ $(AM_V_at)$(am__mv) $(DEPDIR)/gsnap_avx512-pbinom.Tpo $(DEPDIR)/gsnap_avx512-pbinom.Po +@AMDEP_TRUE@@am__fastdepCC_FALSE@ $(AM_V_CC)source='pbinom.c' object='gsnap_avx512-pbinom.obj' libtool=no @AMDEPBACKSLASH@ +@AMDEP_TRUE@@am__fastdepCC_FALSE@ DEPDIR=$(DEPDIR) $(CCDEPMODE) $(depcomp) @AMDEPBACKSLASH@ +@am__fastdepCC_FALSE@ $(AM_V_CC@am__nodep@)$(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) $(gsnap_avx512_CFLAGS) $(CFLAGS) -c -o gsnap_avx512-pbinom.obj `if test -f 'pbinom.c'; then $(CYGPATH_W) 'pbinom.c'; else $(CYGPATH_W) '$(srcdir)/pbinom.c'; fi` + +gsnap_avx512-dynprog.o: dynprog.c +@am__fastdepCC_TRUE@ $(AM_V_CC)$(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) $(gsnap_avx512_CFLAGS) $(CFLAGS) -MT gsnap_avx512-dynprog.o -MD -MP -MF $(DEPDIR)/gsnap_avx512-dynprog.Tpo -c -o gsnap_avx512-dynprog.o `test -f 'dynprog.c' || echo '$(srcdir)/'`dynprog.c +@am__fastdepCC_TRUE@ $(AM_V_at)$(am__mv) $(DEPDIR)/gsnap_avx512-dynprog.Tpo $(DEPDIR)/gsnap_avx512-dynprog.Po +@AMDEP_TRUE@@am__fastdepCC_FALSE@ $(AM_V_CC)source='dynprog.c' object='gsnap_avx512-dynprog.o' libtool=no @AMDEPBACKSLASH@ +@AMDEP_TRUE@@am__fastdepCC_FALSE@ DEPDIR=$(DEPDIR) $(CCDEPMODE) $(depcomp) @AMDEPBACKSLASH@ +@am__fastdepCC_FALSE@ $(AM_V_CC@am__nodep@)$(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) $(gsnap_avx512_CFLAGS) $(CFLAGS) -c -o gsnap_avx512-dynprog.o `test -f 'dynprog.c' || echo '$(srcdir)/'`dynprog.c + +gsnap_avx512-dynprog.obj: dynprog.c +@am__fastdepCC_TRUE@ $(AM_V_CC)$(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) $(gsnap_avx512_CFLAGS) $(CFLAGS) -MT gsnap_avx512-dynprog.obj -MD -MP -MF $(DEPDIR)/gsnap_avx512-dynprog.Tpo -c -o gsnap_avx512-dynprog.obj `if test -f 'dynprog.c'; then $(CYGPATH_W) 'dynprog.c'; else $(CYGPATH_W) '$(srcdir)/dynprog.c'; fi` +@am__fastdepCC_TRUE@ $(AM_V_at)$(am__mv) $(DEPDIR)/gsnap_avx512-dynprog.Tpo $(DEPDIR)/gsnap_avx512-dynprog.Po +@AMDEP_TRUE@@am__fastdepCC_FALSE@ $(AM_V_CC)source='dynprog.c' object='gsnap_avx512-dynprog.obj' libtool=no @AMDEPBACKSLASH@ +@AMDEP_TRUE@@am__fastdepCC_FALSE@ DEPDIR=$(DEPDIR) $(CCDEPMODE) $(depcomp) @AMDEPBACKSLASH@ +@am__fastdepCC_FALSE@ $(AM_V_CC@am__nodep@)$(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) $(gsnap_avx512_CFLAGS) $(CFLAGS) -c -o gsnap_avx512-dynprog.obj `if test -f 'dynprog.c'; then $(CYGPATH_W) 'dynprog.c'; else $(CYGPATH_W) '$(srcdir)/dynprog.c'; fi` + +gsnap_avx512-dynprog_simd.o: dynprog_simd.c +@am__fastdepCC_TRUE@ $(AM_V_CC)$(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) $(gsnap_avx512_CFLAGS) $(CFLAGS) -MT gsnap_avx512-dynprog_simd.o -MD -MP -MF $(DEPDIR)/gsnap_avx512-dynprog_simd.Tpo -c -o gsnap_avx512-dynprog_simd.o `test -f 'dynprog_simd.c' || echo '$(srcdir)/'`dynprog_simd.c +@am__fastdepCC_TRUE@ $(AM_V_at)$(am__mv) $(DEPDIR)/gsnap_avx512-dynprog_simd.Tpo $(DEPDIR)/gsnap_avx512-dynprog_simd.Po +@AMDEP_TRUE@@am__fastdepCC_FALSE@ $(AM_V_CC)source='dynprog_simd.c' object='gsnap_avx512-dynprog_simd.o' libtool=no @AMDEPBACKSLASH@ +@AMDEP_TRUE@@am__fastdepCC_FALSE@ DEPDIR=$(DEPDIR) $(CCDEPMODE) $(depcomp) @AMDEPBACKSLASH@ +@am__fastdepCC_FALSE@ $(AM_V_CC@am__nodep@)$(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) $(gsnap_avx512_CFLAGS) $(CFLAGS) -c -o gsnap_avx512-dynprog_simd.o `test -f 'dynprog_simd.c' || echo '$(srcdir)/'`dynprog_simd.c + +gsnap_avx512-dynprog_simd.obj: dynprog_simd.c +@am__fastdepCC_TRUE@ $(AM_V_CC)$(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) $(gsnap_avx512_CFLAGS) $(CFLAGS) -MT gsnap_avx512-dynprog_simd.obj -MD -MP -MF $(DEPDIR)/gsnap_avx512-dynprog_simd.Tpo -c -o gsnap_avx512-dynprog_simd.obj `if test -f 'dynprog_simd.c'; then $(CYGPATH_W) 'dynprog_simd.c'; else $(CYGPATH_W) '$(srcdir)/dynprog_simd.c'; fi` +@am__fastdepCC_TRUE@ $(AM_V_at)$(am__mv) $(DEPDIR)/gsnap_avx512-dynprog_simd.Tpo $(DEPDIR)/gsnap_avx512-dynprog_simd.Po +@AMDEP_TRUE@@am__fastdepCC_FALSE@ $(AM_V_CC)source='dynprog_simd.c' object='gsnap_avx512-dynprog_simd.obj' libtool=no @AMDEPBACKSLASH@ +@AMDEP_TRUE@@am__fastdepCC_FALSE@ DEPDIR=$(DEPDIR) $(CCDEPMODE) $(depcomp) @AMDEPBACKSLASH@ +@am__fastdepCC_FALSE@ $(AM_V_CC@am__nodep@)$(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) $(gsnap_avx512_CFLAGS) $(CFLAGS) -c -o gsnap_avx512-dynprog_simd.obj `if test -f 'dynprog_simd.c'; then $(CYGPATH_W) 'dynprog_simd.c'; else $(CYGPATH_W) '$(srcdir)/dynprog_simd.c'; fi` + +gsnap_avx512-dynprog_single.o: dynprog_single.c +@am__fastdepCC_TRUE@ $(AM_V_CC)$(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) $(gsnap_avx512_CFLAGS) $(CFLAGS) -MT gsnap_avx512-dynprog_single.o -MD -MP -MF $(DEPDIR)/gsnap_avx512-dynprog_single.Tpo -c -o gsnap_avx512-dynprog_single.o `test -f 'dynprog_single.c' || echo '$(srcdir)/'`dynprog_single.c +@am__fastdepCC_TRUE@ $(AM_V_at)$(am__mv) $(DEPDIR)/gsnap_avx512-dynprog_single.Tpo $(DEPDIR)/gsnap_avx512-dynprog_single.Po +@AMDEP_TRUE@@am__fastdepCC_FALSE@ $(AM_V_CC)source='dynprog_single.c' object='gsnap_avx512-dynprog_single.o' libtool=no @AMDEPBACKSLASH@ +@AMDEP_TRUE@@am__fastdepCC_FALSE@ DEPDIR=$(DEPDIR) $(CCDEPMODE) $(depcomp) @AMDEPBACKSLASH@ +@am__fastdepCC_FALSE@ $(AM_V_CC@am__nodep@)$(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) $(gsnap_avx512_CFLAGS) $(CFLAGS) -c -o gsnap_avx512-dynprog_single.o `test -f 'dynprog_single.c' || echo '$(srcdir)/'`dynprog_single.c + +gsnap_avx512-dynprog_single.obj: dynprog_single.c +@am__fastdepCC_TRUE@ $(AM_V_CC)$(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) $(gsnap_avx512_CFLAGS) $(CFLAGS) -MT gsnap_avx512-dynprog_single.obj -MD -MP -MF $(DEPDIR)/gsnap_avx512-dynprog_single.Tpo -c -o gsnap_avx512-dynprog_single.obj `if test -f 'dynprog_single.c'; then $(CYGPATH_W) 'dynprog_single.c'; else $(CYGPATH_W) '$(srcdir)/dynprog_single.c'; fi` +@am__fastdepCC_TRUE@ $(AM_V_at)$(am__mv) $(DEPDIR)/gsnap_avx512-dynprog_single.Tpo $(DEPDIR)/gsnap_avx512-dynprog_single.Po +@AMDEP_TRUE@@am__fastdepCC_FALSE@ $(AM_V_CC)source='dynprog_single.c' object='gsnap_avx512-dynprog_single.obj' libtool=no @AMDEPBACKSLASH@ +@AMDEP_TRUE@@am__fastdepCC_FALSE@ DEPDIR=$(DEPDIR) $(CCDEPMODE) $(depcomp) @AMDEPBACKSLASH@ +@am__fastdepCC_FALSE@ $(AM_V_CC@am__nodep@)$(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) $(gsnap_avx512_CFLAGS) $(CFLAGS) -c -o gsnap_avx512-dynprog_single.obj `if test -f 'dynprog_single.c'; then $(CYGPATH_W) 'dynprog_single.c'; else $(CYGPATH_W) '$(srcdir)/dynprog_single.c'; fi` + +gsnap_avx512-dynprog_genome.o: dynprog_genome.c +@am__fastdepCC_TRUE@ $(AM_V_CC)$(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) $(gsnap_avx512_CFLAGS) $(CFLAGS) -MT gsnap_avx512-dynprog_genome.o -MD -MP -MF $(DEPDIR)/gsnap_avx512-dynprog_genome.Tpo -c -o gsnap_avx512-dynprog_genome.o `test -f 'dynprog_genome.c' || echo '$(srcdir)/'`dynprog_genome.c +@am__fastdepCC_TRUE@ $(AM_V_at)$(am__mv) $(DEPDIR)/gsnap_avx512-dynprog_genome.Tpo $(DEPDIR)/gsnap_avx512-dynprog_genome.Po +@AMDEP_TRUE@@am__fastdepCC_FALSE@ $(AM_V_CC)source='dynprog_genome.c' object='gsnap_avx512-dynprog_genome.o' libtool=no @AMDEPBACKSLASH@ +@AMDEP_TRUE@@am__fastdepCC_FALSE@ DEPDIR=$(DEPDIR) $(CCDEPMODE) $(depcomp) @AMDEPBACKSLASH@ +@am__fastdepCC_FALSE@ $(AM_V_CC@am__nodep@)$(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) $(gsnap_avx512_CFLAGS) $(CFLAGS) -c -o gsnap_avx512-dynprog_genome.o `test -f 'dynprog_genome.c' || echo '$(srcdir)/'`dynprog_genome.c + +gsnap_avx512-dynprog_genome.obj: dynprog_genome.c +@am__fastdepCC_TRUE@ $(AM_V_CC)$(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) $(gsnap_avx512_CFLAGS) $(CFLAGS) -MT gsnap_avx512-dynprog_genome.obj -MD -MP -MF $(DEPDIR)/gsnap_avx512-dynprog_genome.Tpo -c -o gsnap_avx512-dynprog_genome.obj `if test -f 'dynprog_genome.c'; then $(CYGPATH_W) 'dynprog_genome.c'; else $(CYGPATH_W) '$(srcdir)/dynprog_genome.c'; fi` +@am__fastdepCC_TRUE@ $(AM_V_at)$(am__mv) $(DEPDIR)/gsnap_avx512-dynprog_genome.Tpo $(DEPDIR)/gsnap_avx512-dynprog_genome.Po +@AMDEP_TRUE@@am__fastdepCC_FALSE@ $(AM_V_CC)source='dynprog_genome.c' object='gsnap_avx512-dynprog_genome.obj' libtool=no @AMDEPBACKSLASH@ +@AMDEP_TRUE@@am__fastdepCC_FALSE@ DEPDIR=$(DEPDIR) $(CCDEPMODE) $(depcomp) @AMDEPBACKSLASH@ +@am__fastdepCC_FALSE@ $(AM_V_CC@am__nodep@)$(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) $(gsnap_avx512_CFLAGS) $(CFLAGS) -c -o gsnap_avx512-dynprog_genome.obj `if test -f 'dynprog_genome.c'; then $(CYGPATH_W) 'dynprog_genome.c'; else $(CYGPATH_W) '$(srcdir)/dynprog_genome.c'; fi` + +gsnap_avx512-dynprog_cdna.o: dynprog_cdna.c +@am__fastdepCC_TRUE@ $(AM_V_CC)$(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) $(gsnap_avx512_CFLAGS) $(CFLAGS) -MT gsnap_avx512-dynprog_cdna.o -MD -MP -MF $(DEPDIR)/gsnap_avx512-dynprog_cdna.Tpo -c -o gsnap_avx512-dynprog_cdna.o `test -f 'dynprog_cdna.c' || echo '$(srcdir)/'`dynprog_cdna.c +@am__fastdepCC_TRUE@ $(AM_V_at)$(am__mv) $(DEPDIR)/gsnap_avx512-dynprog_cdna.Tpo $(DEPDIR)/gsnap_avx512-dynprog_cdna.Po +@AMDEP_TRUE@@am__fastdepCC_FALSE@ $(AM_V_CC)source='dynprog_cdna.c' object='gsnap_avx512-dynprog_cdna.o' libtool=no @AMDEPBACKSLASH@ +@AMDEP_TRUE@@am__fastdepCC_FALSE@ DEPDIR=$(DEPDIR) $(CCDEPMODE) $(depcomp) @AMDEPBACKSLASH@ +@am__fastdepCC_FALSE@ $(AM_V_CC@am__nodep@)$(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) $(gsnap_avx512_CFLAGS) $(CFLAGS) -c -o gsnap_avx512-dynprog_cdna.o `test -f 'dynprog_cdna.c' || echo '$(srcdir)/'`dynprog_cdna.c + +gsnap_avx512-dynprog_cdna.obj: dynprog_cdna.c +@am__fastdepCC_TRUE@ $(AM_V_CC)$(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) $(gsnap_avx512_CFLAGS) $(CFLAGS) -MT gsnap_avx512-dynprog_cdna.obj -MD -MP -MF $(DEPDIR)/gsnap_avx512-dynprog_cdna.Tpo -c -o gsnap_avx512-dynprog_cdna.obj `if test -f 'dynprog_cdna.c'; then $(CYGPATH_W) 'dynprog_cdna.c'; else $(CYGPATH_W) '$(srcdir)/dynprog_cdna.c'; fi` +@am__fastdepCC_TRUE@ $(AM_V_at)$(am__mv) $(DEPDIR)/gsnap_avx512-dynprog_cdna.Tpo $(DEPDIR)/gsnap_avx512-dynprog_cdna.Po +@AMDEP_TRUE@@am__fastdepCC_FALSE@ $(AM_V_CC)source='dynprog_cdna.c' object='gsnap_avx512-dynprog_cdna.obj' libtool=no @AMDEPBACKSLASH@ +@AMDEP_TRUE@@am__fastdepCC_FALSE@ DEPDIR=$(DEPDIR) $(CCDEPMODE) $(depcomp) @AMDEPBACKSLASH@ +@am__fastdepCC_FALSE@ $(AM_V_CC@am__nodep@)$(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) $(gsnap_avx512_CFLAGS) $(CFLAGS) -c -o gsnap_avx512-dynprog_cdna.obj `if test -f 'dynprog_cdna.c'; then $(CYGPATH_W) 'dynprog_cdna.c'; else $(CYGPATH_W) '$(srcdir)/dynprog_cdna.c'; fi` + +gsnap_avx512-dynprog_end.o: dynprog_end.c +@am__fastdepCC_TRUE@ $(AM_V_CC)$(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) $(gsnap_avx512_CFLAGS) $(CFLAGS) -MT gsnap_avx512-dynprog_end.o -MD -MP -MF $(DEPDIR)/gsnap_avx512-dynprog_end.Tpo -c -o gsnap_avx512-dynprog_end.o `test -f 'dynprog_end.c' || echo '$(srcdir)/'`dynprog_end.c +@am__fastdepCC_TRUE@ $(AM_V_at)$(am__mv) $(DEPDIR)/gsnap_avx512-dynprog_end.Tpo $(DEPDIR)/gsnap_avx512-dynprog_end.Po +@AMDEP_TRUE@@am__fastdepCC_FALSE@ $(AM_V_CC)source='dynprog_end.c' object='gsnap_avx512-dynprog_end.o' libtool=no @AMDEPBACKSLASH@ +@AMDEP_TRUE@@am__fastdepCC_FALSE@ DEPDIR=$(DEPDIR) $(CCDEPMODE) $(depcomp) @AMDEPBACKSLASH@ +@am__fastdepCC_FALSE@ $(AM_V_CC@am__nodep@)$(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) $(gsnap_avx512_CFLAGS) $(CFLAGS) -c -o gsnap_avx512-dynprog_end.o `test -f 'dynprog_end.c' || echo '$(srcdir)/'`dynprog_end.c + +gsnap_avx512-dynprog_end.obj: dynprog_end.c +@am__fastdepCC_TRUE@ $(AM_V_CC)$(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) $(gsnap_avx512_CFLAGS) $(CFLAGS) -MT gsnap_avx512-dynprog_end.obj -MD -MP -MF $(DEPDIR)/gsnap_avx512-dynprog_end.Tpo -c -o gsnap_avx512-dynprog_end.obj `if test -f 'dynprog_end.c'; then $(CYGPATH_W) 'dynprog_end.c'; else $(CYGPATH_W) '$(srcdir)/dynprog_end.c'; fi` +@am__fastdepCC_TRUE@ $(AM_V_at)$(am__mv) $(DEPDIR)/gsnap_avx512-dynprog_end.Tpo $(DEPDIR)/gsnap_avx512-dynprog_end.Po +@AMDEP_TRUE@@am__fastdepCC_FALSE@ $(AM_V_CC)source='dynprog_end.c' object='gsnap_avx512-dynprog_end.obj' libtool=no @AMDEPBACKSLASH@ +@AMDEP_TRUE@@am__fastdepCC_FALSE@ DEPDIR=$(DEPDIR) $(CCDEPMODE) $(depcomp) @AMDEPBACKSLASH@ +@am__fastdepCC_FALSE@ $(AM_V_CC@am__nodep@)$(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) $(gsnap_avx512_CFLAGS) $(CFLAGS) -c -o gsnap_avx512-dynprog_end.obj `if test -f 'dynprog_end.c'; then $(CYGPATH_W) 'dynprog_end.c'; else $(CYGPATH_W) '$(srcdir)/dynprog_end.c'; fi` + +gsnap_avx512-gbuffer.o: gbuffer.c +@am__fastdepCC_TRUE@ $(AM_V_CC)$(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) $(gsnap_avx512_CFLAGS) $(CFLAGS) -MT gsnap_avx512-gbuffer.o -MD -MP -MF $(DEPDIR)/gsnap_avx512-gbuffer.Tpo -c -o gsnap_avx512-gbuffer.o `test -f 'gbuffer.c' || echo '$(srcdir)/'`gbuffer.c +@am__fastdepCC_TRUE@ $(AM_V_at)$(am__mv) $(DEPDIR)/gsnap_avx512-gbuffer.Tpo $(DEPDIR)/gsnap_avx512-gbuffer.Po +@AMDEP_TRUE@@am__fastdepCC_FALSE@ $(AM_V_CC)source='gbuffer.c' object='gsnap_avx512-gbuffer.o' libtool=no @AMDEPBACKSLASH@ +@AMDEP_TRUE@@am__fastdepCC_FALSE@ DEPDIR=$(DEPDIR) $(CCDEPMODE) $(depcomp) @AMDEPBACKSLASH@ +@am__fastdepCC_FALSE@ $(AM_V_CC@am__nodep@)$(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) $(gsnap_avx512_CFLAGS) $(CFLAGS) -c -o gsnap_avx512-gbuffer.o `test -f 'gbuffer.c' || echo '$(srcdir)/'`gbuffer.c + +gsnap_avx512-gbuffer.obj: gbuffer.c +@am__fastdepCC_TRUE@ $(AM_V_CC)$(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) $(gsnap_avx512_CFLAGS) $(CFLAGS) -MT gsnap_avx512-gbuffer.obj -MD -MP -MF $(DEPDIR)/gsnap_avx512-gbuffer.Tpo -c -o gsnap_avx512-gbuffer.obj `if test -f 'gbuffer.c'; then $(CYGPATH_W) 'gbuffer.c'; else $(CYGPATH_W) '$(srcdir)/gbuffer.c'; fi` +@am__fastdepCC_TRUE@ $(AM_V_at)$(am__mv) $(DEPDIR)/gsnap_avx512-gbuffer.Tpo $(DEPDIR)/gsnap_avx512-gbuffer.Po +@AMDEP_TRUE@@am__fastdepCC_FALSE@ $(AM_V_CC)source='gbuffer.c' object='gsnap_avx512-gbuffer.obj' libtool=no @AMDEPBACKSLASH@ +@AMDEP_TRUE@@am__fastdepCC_FALSE@ DEPDIR=$(DEPDIR) $(CCDEPMODE) $(depcomp) @AMDEPBACKSLASH@ +@am__fastdepCC_FALSE@ $(AM_V_CC@am__nodep@)$(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) $(gsnap_avx512_CFLAGS) $(CFLAGS) -c -o gsnap_avx512-gbuffer.obj `if test -f 'gbuffer.c'; then $(CYGPATH_W) 'gbuffer.c'; else $(CYGPATH_W) '$(srcdir)/gbuffer.c'; fi` + +gsnap_avx512-doublelist.o: doublelist.c +@am__fastdepCC_TRUE@ $(AM_V_CC)$(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) $(gsnap_avx512_CFLAGS) $(CFLAGS) -MT gsnap_avx512-doublelist.o -MD -MP -MF $(DEPDIR)/gsnap_avx512-doublelist.Tpo -c -o gsnap_avx512-doublelist.o `test -f 'doublelist.c' || echo '$(srcdir)/'`doublelist.c +@am__fastdepCC_TRUE@ $(AM_V_at)$(am__mv) $(DEPDIR)/gsnap_avx512-doublelist.Tpo $(DEPDIR)/gsnap_avx512-doublelist.Po +@AMDEP_TRUE@@am__fastdepCC_FALSE@ $(AM_V_CC)source='doublelist.c' object='gsnap_avx512-doublelist.o' libtool=no @AMDEPBACKSLASH@ +@AMDEP_TRUE@@am__fastdepCC_FALSE@ DEPDIR=$(DEPDIR) $(CCDEPMODE) $(depcomp) @AMDEPBACKSLASH@ +@am__fastdepCC_FALSE@ $(AM_V_CC@am__nodep@)$(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) $(gsnap_avx512_CFLAGS) $(CFLAGS) -c -o gsnap_avx512-doublelist.o `test -f 'doublelist.c' || echo '$(srcdir)/'`doublelist.c + +gsnap_avx512-doublelist.obj: doublelist.c +@am__fastdepCC_TRUE@ $(AM_V_CC)$(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) $(gsnap_avx512_CFLAGS) $(CFLAGS) -MT gsnap_avx512-doublelist.obj -MD -MP -MF $(DEPDIR)/gsnap_avx512-doublelist.Tpo -c -o gsnap_avx512-doublelist.obj `if test -f 'doublelist.c'; then $(CYGPATH_W) 'doublelist.c'; else $(CYGPATH_W) '$(srcdir)/doublelist.c'; fi` +@am__fastdepCC_TRUE@ $(AM_V_at)$(am__mv) $(DEPDIR)/gsnap_avx512-doublelist.Tpo $(DEPDIR)/gsnap_avx512-doublelist.Po +@AMDEP_TRUE@@am__fastdepCC_FALSE@ $(AM_V_CC)source='doublelist.c' object='gsnap_avx512-doublelist.obj' libtool=no @AMDEPBACKSLASH@ +@AMDEP_TRUE@@am__fastdepCC_FALSE@ DEPDIR=$(DEPDIR) $(CCDEPMODE) $(depcomp) @AMDEPBACKSLASH@ +@am__fastdepCC_FALSE@ $(AM_V_CC@am__nodep@)$(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) $(gsnap_avx512_CFLAGS) $(CFLAGS) -c -o gsnap_avx512-doublelist.obj `if test -f 'doublelist.c'; then $(CYGPATH_W) 'doublelist.c'; else $(CYGPATH_W) '$(srcdir)/doublelist.c'; fi` + +gsnap_avx512-smooth.o: smooth.c +@am__fastdepCC_TRUE@ $(AM_V_CC)$(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) $(gsnap_avx512_CFLAGS) $(CFLAGS) -MT gsnap_avx512-smooth.o -MD -MP -MF $(DEPDIR)/gsnap_avx512-smooth.Tpo -c -o gsnap_avx512-smooth.o `test -f 'smooth.c' || echo '$(srcdir)/'`smooth.c +@am__fastdepCC_TRUE@ $(AM_V_at)$(am__mv) $(DEPDIR)/gsnap_avx512-smooth.Tpo $(DEPDIR)/gsnap_avx512-smooth.Po +@AMDEP_TRUE@@am__fastdepCC_FALSE@ $(AM_V_CC)source='smooth.c' object='gsnap_avx512-smooth.o' libtool=no @AMDEPBACKSLASH@ +@AMDEP_TRUE@@am__fastdepCC_FALSE@ DEPDIR=$(DEPDIR) $(CCDEPMODE) $(depcomp) @AMDEPBACKSLASH@ +@am__fastdepCC_FALSE@ $(AM_V_CC@am__nodep@)$(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) $(gsnap_avx512_CFLAGS) $(CFLAGS) -c -o gsnap_avx512-smooth.o `test -f 'smooth.c' || echo '$(srcdir)/'`smooth.c + +gsnap_avx512-smooth.obj: smooth.c +@am__fastdepCC_TRUE@ $(AM_V_CC)$(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) $(gsnap_avx512_CFLAGS) $(CFLAGS) -MT gsnap_avx512-smooth.obj -MD -MP -MF $(DEPDIR)/gsnap_avx512-smooth.Tpo -c -o gsnap_avx512-smooth.obj `if test -f 'smooth.c'; then $(CYGPATH_W) 'smooth.c'; else $(CYGPATH_W) '$(srcdir)/smooth.c'; fi` +@am__fastdepCC_TRUE@ $(AM_V_at)$(am__mv) $(DEPDIR)/gsnap_avx512-smooth.Tpo $(DEPDIR)/gsnap_avx512-smooth.Po +@AMDEP_TRUE@@am__fastdepCC_FALSE@ $(AM_V_CC)source='smooth.c' object='gsnap_avx512-smooth.obj' libtool=no @AMDEPBACKSLASH@ +@AMDEP_TRUE@@am__fastdepCC_FALSE@ DEPDIR=$(DEPDIR) $(CCDEPMODE) $(depcomp) @AMDEPBACKSLASH@ +@am__fastdepCC_FALSE@ $(AM_V_CC@am__nodep@)$(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) $(gsnap_avx512_CFLAGS) $(CFLAGS) -c -o gsnap_avx512-smooth.obj `if test -f 'smooth.c'; then $(CYGPATH_W) 'smooth.c'; else $(CYGPATH_W) '$(srcdir)/smooth.c'; fi` + +gsnap_avx512-chimera.o: chimera.c +@am__fastdepCC_TRUE@ $(AM_V_CC)$(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) $(gsnap_avx512_CFLAGS) $(CFLAGS) -MT gsnap_avx512-chimera.o -MD -MP -MF $(DEPDIR)/gsnap_avx512-chimera.Tpo -c -o gsnap_avx512-chimera.o `test -f 'chimera.c' || echo '$(srcdir)/'`chimera.c +@am__fastdepCC_TRUE@ $(AM_V_at)$(am__mv) $(DEPDIR)/gsnap_avx512-chimera.Tpo $(DEPDIR)/gsnap_avx512-chimera.Po +@AMDEP_TRUE@@am__fastdepCC_FALSE@ $(AM_V_CC)source='chimera.c' object='gsnap_avx512-chimera.o' libtool=no @AMDEPBACKSLASH@ +@AMDEP_TRUE@@am__fastdepCC_FALSE@ DEPDIR=$(DEPDIR) $(CCDEPMODE) $(depcomp) @AMDEPBACKSLASH@ +@am__fastdepCC_FALSE@ $(AM_V_CC@am__nodep@)$(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) $(gsnap_avx512_CFLAGS) $(CFLAGS) -c -o gsnap_avx512-chimera.o `test -f 'chimera.c' || echo '$(srcdir)/'`chimera.c + +gsnap_avx512-chimera.obj: chimera.c +@am__fastdepCC_TRUE@ $(AM_V_CC)$(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) $(gsnap_avx512_CFLAGS) $(CFLAGS) -MT gsnap_avx512-chimera.obj -MD -MP -MF $(DEPDIR)/gsnap_avx512-chimera.Tpo -c -o gsnap_avx512-chimera.obj `if test -f 'chimera.c'; then $(CYGPATH_W) 'chimera.c'; else $(CYGPATH_W) '$(srcdir)/chimera.c'; fi` +@am__fastdepCC_TRUE@ $(AM_V_at)$(am__mv) $(DEPDIR)/gsnap_avx512-chimera.Tpo $(DEPDIR)/gsnap_avx512-chimera.Po +@AMDEP_TRUE@@am__fastdepCC_FALSE@ $(AM_V_CC)source='chimera.c' object='gsnap_avx512-chimera.obj' libtool=no @AMDEPBACKSLASH@ +@AMDEP_TRUE@@am__fastdepCC_FALSE@ DEPDIR=$(DEPDIR) $(CCDEPMODE) $(depcomp) @AMDEPBACKSLASH@ +@am__fastdepCC_FALSE@ $(AM_V_CC@am__nodep@)$(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) $(gsnap_avx512_CFLAGS) $(CFLAGS) -c -o gsnap_avx512-chimera.obj `if test -f 'chimera.c'; then $(CYGPATH_W) 'chimera.c'; else $(CYGPATH_W) '$(srcdir)/chimera.c'; fi` + +gsnap_avx512-stage3.o: stage3.c +@am__fastdepCC_TRUE@ $(AM_V_CC)$(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) $(gsnap_avx512_CFLAGS) $(CFLAGS) -MT gsnap_avx512-stage3.o -MD -MP -MF $(DEPDIR)/gsnap_avx512-stage3.Tpo -c -o gsnap_avx512-stage3.o `test -f 'stage3.c' || echo '$(srcdir)/'`stage3.c +@am__fastdepCC_TRUE@ $(AM_V_at)$(am__mv) $(DEPDIR)/gsnap_avx512-stage3.Tpo $(DEPDIR)/gsnap_avx512-stage3.Po +@AMDEP_TRUE@@am__fastdepCC_FALSE@ $(AM_V_CC)source='stage3.c' object='gsnap_avx512-stage3.o' libtool=no @AMDEPBACKSLASH@ +@AMDEP_TRUE@@am__fastdepCC_FALSE@ DEPDIR=$(DEPDIR) $(CCDEPMODE) $(depcomp) @AMDEPBACKSLASH@ +@am__fastdepCC_FALSE@ $(AM_V_CC@am__nodep@)$(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) $(gsnap_avx512_CFLAGS) $(CFLAGS) -c -o gsnap_avx512-stage3.o `test -f 'stage3.c' || echo '$(srcdir)/'`stage3.c + +gsnap_avx512-stage3.obj: stage3.c +@am__fastdepCC_TRUE@ $(AM_V_CC)$(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) $(gsnap_avx512_CFLAGS) $(CFLAGS) -MT gsnap_avx512-stage3.obj -MD -MP -MF $(DEPDIR)/gsnap_avx512-stage3.Tpo -c -o gsnap_avx512-stage3.obj `if test -f 'stage3.c'; then $(CYGPATH_W) 'stage3.c'; else $(CYGPATH_W) '$(srcdir)/stage3.c'; fi` +@am__fastdepCC_TRUE@ $(AM_V_at)$(am__mv) $(DEPDIR)/gsnap_avx512-stage3.Tpo $(DEPDIR)/gsnap_avx512-stage3.Po +@AMDEP_TRUE@@am__fastdepCC_FALSE@ $(AM_V_CC)source='stage3.c' object='gsnap_avx512-stage3.obj' libtool=no @AMDEPBACKSLASH@ +@AMDEP_TRUE@@am__fastdepCC_FALSE@ DEPDIR=$(DEPDIR) $(CCDEPMODE) $(depcomp) @AMDEPBACKSLASH@ +@am__fastdepCC_FALSE@ $(AM_V_CC@am__nodep@)$(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) $(gsnap_avx512_CFLAGS) $(CFLAGS) -c -o gsnap_avx512-stage3.obj `if test -f 'stage3.c'; then $(CYGPATH_W) 'stage3.c'; else $(CYGPATH_W) '$(srcdir)/stage3.c'; fi` + +gsnap_avx512-splicestringpool.o: splicestringpool.c +@am__fastdepCC_TRUE@ $(AM_V_CC)$(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) $(gsnap_avx512_CFLAGS) $(CFLAGS) -MT gsnap_avx512-splicestringpool.o -MD -MP -MF $(DEPDIR)/gsnap_avx512-splicestringpool.Tpo -c -o gsnap_avx512-splicestringpool.o `test -f 'splicestringpool.c' || echo '$(srcdir)/'`splicestringpool.c +@am__fastdepCC_TRUE@ $(AM_V_at)$(am__mv) $(DEPDIR)/gsnap_avx512-splicestringpool.Tpo $(DEPDIR)/gsnap_avx512-splicestringpool.Po +@AMDEP_TRUE@@am__fastdepCC_FALSE@ $(AM_V_CC)source='splicestringpool.c' object='gsnap_avx512-splicestringpool.o' libtool=no @AMDEPBACKSLASH@ +@AMDEP_TRUE@@am__fastdepCC_FALSE@ DEPDIR=$(DEPDIR) $(CCDEPMODE) $(depcomp) @AMDEPBACKSLASH@ +@am__fastdepCC_FALSE@ $(AM_V_CC@am__nodep@)$(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) $(gsnap_avx512_CFLAGS) $(CFLAGS) -c -o gsnap_avx512-splicestringpool.o `test -f 'splicestringpool.c' || echo '$(srcdir)/'`splicestringpool.c + +gsnap_avx512-splicestringpool.obj: splicestringpool.c +@am__fastdepCC_TRUE@ $(AM_V_CC)$(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) $(gsnap_avx512_CFLAGS) $(CFLAGS) -MT gsnap_avx512-splicestringpool.obj -MD -MP -MF $(DEPDIR)/gsnap_avx512-splicestringpool.Tpo -c -o gsnap_avx512-splicestringpool.obj `if test -f 'splicestringpool.c'; then $(CYGPATH_W) 'splicestringpool.c'; else $(CYGPATH_W) '$(srcdir)/splicestringpool.c'; fi` +@am__fastdepCC_TRUE@ $(AM_V_at)$(am__mv) $(DEPDIR)/gsnap_avx512-splicestringpool.Tpo $(DEPDIR)/gsnap_avx512-splicestringpool.Po +@AMDEP_TRUE@@am__fastdepCC_FALSE@ $(AM_V_CC)source='splicestringpool.c' object='gsnap_avx512-splicestringpool.obj' libtool=no @AMDEPBACKSLASH@ +@AMDEP_TRUE@@am__fastdepCC_FALSE@ DEPDIR=$(DEPDIR) $(CCDEPMODE) $(depcomp) @AMDEPBACKSLASH@ +@am__fastdepCC_FALSE@ $(AM_V_CC@am__nodep@)$(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) $(gsnap_avx512_CFLAGS) $(CFLAGS) -c -o gsnap_avx512-splicestringpool.obj `if test -f 'splicestringpool.c'; then $(CYGPATH_W) 'splicestringpool.c'; else $(CYGPATH_W) '$(srcdir)/splicestringpool.c'; fi` + +gsnap_avx512-splicetrie_build.o: splicetrie_build.c +@am__fastdepCC_TRUE@ $(AM_V_CC)$(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) $(gsnap_avx512_CFLAGS) $(CFLAGS) -MT gsnap_avx512-splicetrie_build.o -MD -MP -MF $(DEPDIR)/gsnap_avx512-splicetrie_build.Tpo -c -o gsnap_avx512-splicetrie_build.o `test -f 'splicetrie_build.c' || echo '$(srcdir)/'`splicetrie_build.c +@am__fastdepCC_TRUE@ $(AM_V_at)$(am__mv) $(DEPDIR)/gsnap_avx512-splicetrie_build.Tpo $(DEPDIR)/gsnap_avx512-splicetrie_build.Po +@AMDEP_TRUE@@am__fastdepCC_FALSE@ $(AM_V_CC)source='splicetrie_build.c' object='gsnap_avx512-splicetrie_build.o' libtool=no @AMDEPBACKSLASH@ +@AMDEP_TRUE@@am__fastdepCC_FALSE@ DEPDIR=$(DEPDIR) $(CCDEPMODE) $(depcomp) @AMDEPBACKSLASH@ +@am__fastdepCC_FALSE@ $(AM_V_CC@am__nodep@)$(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) $(gsnap_avx512_CFLAGS) $(CFLAGS) -c -o gsnap_avx512-splicetrie_build.o `test -f 'splicetrie_build.c' || echo '$(srcdir)/'`splicetrie_build.c + +gsnap_avx512-splicetrie_build.obj: splicetrie_build.c +@am__fastdepCC_TRUE@ $(AM_V_CC)$(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) $(gsnap_avx512_CFLAGS) $(CFLAGS) -MT gsnap_avx512-splicetrie_build.obj -MD -MP -MF $(DEPDIR)/gsnap_avx512-splicetrie_build.Tpo -c -o gsnap_avx512-splicetrie_build.obj `if test -f 'splicetrie_build.c'; then $(CYGPATH_W) 'splicetrie_build.c'; else $(CYGPATH_W) '$(srcdir)/splicetrie_build.c'; fi` +@am__fastdepCC_TRUE@ $(AM_V_at)$(am__mv) $(DEPDIR)/gsnap_avx512-splicetrie_build.Tpo $(DEPDIR)/gsnap_avx512-splicetrie_build.Po +@AMDEP_TRUE@@am__fastdepCC_FALSE@ $(AM_V_CC)source='splicetrie_build.c' object='gsnap_avx512-splicetrie_build.obj' libtool=no @AMDEPBACKSLASH@ +@AMDEP_TRUE@@am__fastdepCC_FALSE@ DEPDIR=$(DEPDIR) $(CCDEPMODE) $(depcomp) @AMDEPBACKSLASH@ +@am__fastdepCC_FALSE@ $(AM_V_CC@am__nodep@)$(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) $(gsnap_avx512_CFLAGS) $(CFLAGS) -c -o gsnap_avx512-splicetrie_build.obj `if test -f 'splicetrie_build.c'; then $(CYGPATH_W) 'splicetrie_build.c'; else $(CYGPATH_W) '$(srcdir)/splicetrie_build.c'; fi` + +gsnap_avx512-splicetrie.o: splicetrie.c +@am__fastdepCC_TRUE@ $(AM_V_CC)$(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) $(gsnap_avx512_CFLAGS) $(CFLAGS) -MT gsnap_avx512-splicetrie.o -MD -MP -MF $(DEPDIR)/gsnap_avx512-splicetrie.Tpo -c -o gsnap_avx512-splicetrie.o `test -f 'splicetrie.c' || echo '$(srcdir)/'`splicetrie.c +@am__fastdepCC_TRUE@ $(AM_V_at)$(am__mv) $(DEPDIR)/gsnap_avx512-splicetrie.Tpo $(DEPDIR)/gsnap_avx512-splicetrie.Po +@AMDEP_TRUE@@am__fastdepCC_FALSE@ $(AM_V_CC)source='splicetrie.c' object='gsnap_avx512-splicetrie.o' libtool=no @AMDEPBACKSLASH@ +@AMDEP_TRUE@@am__fastdepCC_FALSE@ DEPDIR=$(DEPDIR) $(CCDEPMODE) $(depcomp) @AMDEPBACKSLASH@ +@am__fastdepCC_FALSE@ $(AM_V_CC@am__nodep@)$(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) $(gsnap_avx512_CFLAGS) $(CFLAGS) -c -o gsnap_avx512-splicetrie.o `test -f 'splicetrie.c' || echo '$(srcdir)/'`splicetrie.c + +gsnap_avx512-splicetrie.obj: splicetrie.c +@am__fastdepCC_TRUE@ $(AM_V_CC)$(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) $(gsnap_avx512_CFLAGS) $(CFLAGS) -MT gsnap_avx512-splicetrie.obj -MD -MP -MF $(DEPDIR)/gsnap_avx512-splicetrie.Tpo -c -o gsnap_avx512-splicetrie.obj `if test -f 'splicetrie.c'; then $(CYGPATH_W) 'splicetrie.c'; else $(CYGPATH_W) '$(srcdir)/splicetrie.c'; fi` +@am__fastdepCC_TRUE@ $(AM_V_at)$(am__mv) $(DEPDIR)/gsnap_avx512-splicetrie.Tpo $(DEPDIR)/gsnap_avx512-splicetrie.Po +@AMDEP_TRUE@@am__fastdepCC_FALSE@ $(AM_V_CC)source='splicetrie.c' object='gsnap_avx512-splicetrie.obj' libtool=no @AMDEPBACKSLASH@ +@AMDEP_TRUE@@am__fastdepCC_FALSE@ DEPDIR=$(DEPDIR) $(CCDEPMODE) $(depcomp) @AMDEPBACKSLASH@ +@am__fastdepCC_FALSE@ $(AM_V_CC@am__nodep@)$(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) $(gsnap_avx512_CFLAGS) $(CFLAGS) -c -o gsnap_avx512-splicetrie.obj `if test -f 'splicetrie.c'; then $(CYGPATH_W) 'splicetrie.c'; else $(CYGPATH_W) '$(srcdir)/splicetrie.c'; fi` + +gsnap_avx512-splice.o: splice.c +@am__fastdepCC_TRUE@ $(AM_V_CC)$(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) $(gsnap_avx512_CFLAGS) $(CFLAGS) -MT gsnap_avx512-splice.o -MD -MP -MF $(DEPDIR)/gsnap_avx512-splice.Tpo -c -o gsnap_avx512-splice.o `test -f 'splice.c' || echo '$(srcdir)/'`splice.c +@am__fastdepCC_TRUE@ $(AM_V_at)$(am__mv) $(DEPDIR)/gsnap_avx512-splice.Tpo $(DEPDIR)/gsnap_avx512-splice.Po +@AMDEP_TRUE@@am__fastdepCC_FALSE@ $(AM_V_CC)source='splice.c' object='gsnap_avx512-splice.o' libtool=no @AMDEPBACKSLASH@ +@AMDEP_TRUE@@am__fastdepCC_FALSE@ DEPDIR=$(DEPDIR) $(CCDEPMODE) $(depcomp) @AMDEPBACKSLASH@ +@am__fastdepCC_FALSE@ $(AM_V_CC@am__nodep@)$(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) $(gsnap_avx512_CFLAGS) $(CFLAGS) -c -o gsnap_avx512-splice.o `test -f 'splice.c' || echo '$(srcdir)/'`splice.c + +gsnap_avx512-splice.obj: splice.c +@am__fastdepCC_TRUE@ $(AM_V_CC)$(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) $(gsnap_avx512_CFLAGS) $(CFLAGS) -MT gsnap_avx512-splice.obj -MD -MP -MF $(DEPDIR)/gsnap_avx512-splice.Tpo -c -o gsnap_avx512-splice.obj `if test -f 'splice.c'; then $(CYGPATH_W) 'splice.c'; else $(CYGPATH_W) '$(srcdir)/splice.c'; fi` +@am__fastdepCC_TRUE@ $(AM_V_at)$(am__mv) $(DEPDIR)/gsnap_avx512-splice.Tpo $(DEPDIR)/gsnap_avx512-splice.Po +@AMDEP_TRUE@@am__fastdepCC_FALSE@ $(AM_V_CC)source='splice.c' object='gsnap_avx512-splice.obj' libtool=no @AMDEPBACKSLASH@ +@AMDEP_TRUE@@am__fastdepCC_FALSE@ DEPDIR=$(DEPDIR) $(CCDEPMODE) $(depcomp) @AMDEPBACKSLASH@ +@am__fastdepCC_FALSE@ $(AM_V_CC@am__nodep@)$(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) $(gsnap_avx512_CFLAGS) $(CFLAGS) -c -o gsnap_avx512-splice.obj `if test -f 'splice.c'; then $(CYGPATH_W) 'splice.c'; else $(CYGPATH_W) '$(srcdir)/splice.c'; fi` + +gsnap_avx512-indel.o: indel.c +@am__fastdepCC_TRUE@ $(AM_V_CC)$(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) $(gsnap_avx512_CFLAGS) $(CFLAGS) -MT gsnap_avx512-indel.o -MD -MP -MF $(DEPDIR)/gsnap_avx512-indel.Tpo -c -o gsnap_avx512-indel.o `test -f 'indel.c' || echo '$(srcdir)/'`indel.c +@am__fastdepCC_TRUE@ $(AM_V_at)$(am__mv) $(DEPDIR)/gsnap_avx512-indel.Tpo $(DEPDIR)/gsnap_avx512-indel.Po +@AMDEP_TRUE@@am__fastdepCC_FALSE@ $(AM_V_CC)source='indel.c' object='gsnap_avx512-indel.o' libtool=no @AMDEPBACKSLASH@ +@AMDEP_TRUE@@am__fastdepCC_FALSE@ DEPDIR=$(DEPDIR) $(CCDEPMODE) $(depcomp) @AMDEPBACKSLASH@ +@am__fastdepCC_FALSE@ $(AM_V_CC@am__nodep@)$(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) $(gsnap_avx512_CFLAGS) $(CFLAGS) -c -o gsnap_avx512-indel.o `test -f 'indel.c' || echo '$(srcdir)/'`indel.c + +gsnap_avx512-indel.obj: indel.c +@am__fastdepCC_TRUE@ $(AM_V_CC)$(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) $(gsnap_avx512_CFLAGS) $(CFLAGS) -MT gsnap_avx512-indel.obj -MD -MP -MF $(DEPDIR)/gsnap_avx512-indel.Tpo -c -o gsnap_avx512-indel.obj `if test -f 'indel.c'; then $(CYGPATH_W) 'indel.c'; else $(CYGPATH_W) '$(srcdir)/indel.c'; fi` +@am__fastdepCC_TRUE@ $(AM_V_at)$(am__mv) $(DEPDIR)/gsnap_avx512-indel.Tpo $(DEPDIR)/gsnap_avx512-indel.Po +@AMDEP_TRUE@@am__fastdepCC_FALSE@ $(AM_V_CC)source='indel.c' object='gsnap_avx512-indel.obj' libtool=no @AMDEPBACKSLASH@ +@AMDEP_TRUE@@am__fastdepCC_FALSE@ DEPDIR=$(DEPDIR) $(CCDEPMODE) $(depcomp) @AMDEPBACKSLASH@ +@am__fastdepCC_FALSE@ $(AM_V_CC@am__nodep@)$(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) $(gsnap_avx512_CFLAGS) $(CFLAGS) -c -o gsnap_avx512-indel.obj `if test -f 'indel.c'; then $(CYGPATH_W) 'indel.c'; else $(CYGPATH_W) '$(srcdir)/indel.c'; fi` + +gsnap_avx512-bitpack64-access.o: bitpack64-access.c +@am__fastdepCC_TRUE@ $(AM_V_CC)$(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) $(gsnap_avx512_CFLAGS) $(CFLAGS) -MT gsnap_avx512-bitpack64-access.o -MD -MP -MF $(DEPDIR)/gsnap_avx512-bitpack64-access.Tpo -c -o gsnap_avx512-bitpack64-access.o `test -f 'bitpack64-access.c' || echo '$(srcdir)/'`bitpack64-access.c +@am__fastdepCC_TRUE@ $(AM_V_at)$(am__mv) $(DEPDIR)/gsnap_avx512-bitpack64-access.Tpo $(DEPDIR)/gsnap_avx512-bitpack64-access.Po +@AMDEP_TRUE@@am__fastdepCC_FALSE@ $(AM_V_CC)source='bitpack64-access.c' object='gsnap_avx512-bitpack64-access.o' libtool=no @AMDEPBACKSLASH@ +@AMDEP_TRUE@@am__fastdepCC_FALSE@ DEPDIR=$(DEPDIR) $(CCDEPMODE) $(depcomp) @AMDEPBACKSLASH@ +@am__fastdepCC_FALSE@ $(AM_V_CC@am__nodep@)$(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) $(gsnap_avx512_CFLAGS) $(CFLAGS) -c -o gsnap_avx512-bitpack64-access.o `test -f 'bitpack64-access.c' || echo '$(srcdir)/'`bitpack64-access.c + +gsnap_avx512-bitpack64-access.obj: bitpack64-access.c +@am__fastdepCC_TRUE@ $(AM_V_CC)$(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) $(gsnap_avx512_CFLAGS) $(CFLAGS) -MT gsnap_avx512-bitpack64-access.obj -MD -MP -MF $(DEPDIR)/gsnap_avx512-bitpack64-access.Tpo -c -o gsnap_avx512-bitpack64-access.obj `if test -f 'bitpack64-access.c'; then $(CYGPATH_W) 'bitpack64-access.c'; else $(CYGPATH_W) '$(srcdir)/bitpack64-access.c'; fi` +@am__fastdepCC_TRUE@ $(AM_V_at)$(am__mv) $(DEPDIR)/gsnap_avx512-bitpack64-access.Tpo $(DEPDIR)/gsnap_avx512-bitpack64-access.Po +@AMDEP_TRUE@@am__fastdepCC_FALSE@ $(AM_V_CC)source='bitpack64-access.c' object='gsnap_avx512-bitpack64-access.obj' libtool=no @AMDEPBACKSLASH@ +@AMDEP_TRUE@@am__fastdepCC_FALSE@ DEPDIR=$(DEPDIR) $(CCDEPMODE) $(depcomp) @AMDEPBACKSLASH@ +@am__fastdepCC_FALSE@ $(AM_V_CC@am__nodep@)$(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) $(gsnap_avx512_CFLAGS) $(CFLAGS) -c -o gsnap_avx512-bitpack64-access.obj `if test -f 'bitpack64-access.c'; then $(CYGPATH_W) 'bitpack64-access.c'; else $(CYGPATH_W) '$(srcdir)/bitpack64-access.c'; fi` + +gsnap_avx512-bytecoding.o: bytecoding.c +@am__fastdepCC_TRUE@ $(AM_V_CC)$(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) $(gsnap_avx512_CFLAGS) $(CFLAGS) -MT gsnap_avx512-bytecoding.o -MD -MP -MF $(DEPDIR)/gsnap_avx512-bytecoding.Tpo -c -o gsnap_avx512-bytecoding.o `test -f 'bytecoding.c' || echo '$(srcdir)/'`bytecoding.c +@am__fastdepCC_TRUE@ $(AM_V_at)$(am__mv) $(DEPDIR)/gsnap_avx512-bytecoding.Tpo $(DEPDIR)/gsnap_avx512-bytecoding.Po +@AMDEP_TRUE@@am__fastdepCC_FALSE@ $(AM_V_CC)source='bytecoding.c' object='gsnap_avx512-bytecoding.o' libtool=no @AMDEPBACKSLASH@ +@AMDEP_TRUE@@am__fastdepCC_FALSE@ DEPDIR=$(DEPDIR) $(CCDEPMODE) $(depcomp) @AMDEPBACKSLASH@ +@am__fastdepCC_FALSE@ $(AM_V_CC@am__nodep@)$(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) $(gsnap_avx512_CFLAGS) $(CFLAGS) -c -o gsnap_avx512-bytecoding.o `test -f 'bytecoding.c' || echo '$(srcdir)/'`bytecoding.c + +gsnap_avx512-bytecoding.obj: bytecoding.c +@am__fastdepCC_TRUE@ $(AM_V_CC)$(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) $(gsnap_avx512_CFLAGS) $(CFLAGS) -MT gsnap_avx512-bytecoding.obj -MD -MP -MF $(DEPDIR)/gsnap_avx512-bytecoding.Tpo -c -o gsnap_avx512-bytecoding.obj `if test -f 'bytecoding.c'; then $(CYGPATH_W) 'bytecoding.c'; else $(CYGPATH_W) '$(srcdir)/bytecoding.c'; fi` +@am__fastdepCC_TRUE@ $(AM_V_at)$(am__mv) $(DEPDIR)/gsnap_avx512-bytecoding.Tpo $(DEPDIR)/gsnap_avx512-bytecoding.Po +@AMDEP_TRUE@@am__fastdepCC_FALSE@ $(AM_V_CC)source='bytecoding.c' object='gsnap_avx512-bytecoding.obj' libtool=no @AMDEPBACKSLASH@ +@AMDEP_TRUE@@am__fastdepCC_FALSE@ DEPDIR=$(DEPDIR) $(CCDEPMODE) $(depcomp) @AMDEPBACKSLASH@ +@am__fastdepCC_FALSE@ $(AM_V_CC@am__nodep@)$(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) $(gsnap_avx512_CFLAGS) $(CFLAGS) -c -o gsnap_avx512-bytecoding.obj `if test -f 'bytecoding.c'; then $(CYGPATH_W) 'bytecoding.c'; else $(CYGPATH_W) '$(srcdir)/bytecoding.c'; fi` + +gsnap_avx512-univdiag.o: univdiag.c +@am__fastdepCC_TRUE@ $(AM_V_CC)$(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) $(gsnap_avx512_CFLAGS) $(CFLAGS) -MT gsnap_avx512-univdiag.o -MD -MP -MF $(DEPDIR)/gsnap_avx512-univdiag.Tpo -c -o gsnap_avx512-univdiag.o `test -f 'univdiag.c' || echo '$(srcdir)/'`univdiag.c +@am__fastdepCC_TRUE@ $(AM_V_at)$(am__mv) $(DEPDIR)/gsnap_avx512-univdiag.Tpo $(DEPDIR)/gsnap_avx512-univdiag.Po +@AMDEP_TRUE@@am__fastdepCC_FALSE@ $(AM_V_CC)source='univdiag.c' object='gsnap_avx512-univdiag.o' libtool=no @AMDEPBACKSLASH@ +@AMDEP_TRUE@@am__fastdepCC_FALSE@ DEPDIR=$(DEPDIR) $(CCDEPMODE) $(depcomp) @AMDEPBACKSLASH@ +@am__fastdepCC_FALSE@ $(AM_V_CC@am__nodep@)$(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) $(gsnap_avx512_CFLAGS) $(CFLAGS) -c -o gsnap_avx512-univdiag.o `test -f 'univdiag.c' || echo '$(srcdir)/'`univdiag.c + +gsnap_avx512-univdiag.obj: univdiag.c +@am__fastdepCC_TRUE@ $(AM_V_CC)$(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) $(gsnap_avx512_CFLAGS) $(CFLAGS) -MT gsnap_avx512-univdiag.obj -MD -MP -MF $(DEPDIR)/gsnap_avx512-univdiag.Tpo -c -o gsnap_avx512-univdiag.obj `if test -f 'univdiag.c'; then $(CYGPATH_W) 'univdiag.c'; else $(CYGPATH_W) '$(srcdir)/univdiag.c'; fi` +@am__fastdepCC_TRUE@ $(AM_V_at)$(am__mv) $(DEPDIR)/gsnap_avx512-univdiag.Tpo $(DEPDIR)/gsnap_avx512-univdiag.Po +@AMDEP_TRUE@@am__fastdepCC_FALSE@ $(AM_V_CC)source='univdiag.c' object='gsnap_avx512-univdiag.obj' libtool=no @AMDEPBACKSLASH@ +@AMDEP_TRUE@@am__fastdepCC_FALSE@ DEPDIR=$(DEPDIR) $(CCDEPMODE) $(depcomp) @AMDEPBACKSLASH@ +@am__fastdepCC_FALSE@ $(AM_V_CC@am__nodep@)$(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) $(gsnap_avx512_CFLAGS) $(CFLAGS) -c -o gsnap_avx512-univdiag.obj `if test -f 'univdiag.c'; then $(CYGPATH_W) 'univdiag.c'; else $(CYGPATH_W) '$(srcdir)/univdiag.c'; fi` + +gsnap_avx512-sedgesort.o: sedgesort.c +@am__fastdepCC_TRUE@ $(AM_V_CC)$(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) $(gsnap_avx512_CFLAGS) $(CFLAGS) -MT gsnap_avx512-sedgesort.o -MD -MP -MF $(DEPDIR)/gsnap_avx512-sedgesort.Tpo -c -o gsnap_avx512-sedgesort.o `test -f 'sedgesort.c' || echo '$(srcdir)/'`sedgesort.c +@am__fastdepCC_TRUE@ $(AM_V_at)$(am__mv) $(DEPDIR)/gsnap_avx512-sedgesort.Tpo $(DEPDIR)/gsnap_avx512-sedgesort.Po +@AMDEP_TRUE@@am__fastdepCC_FALSE@ $(AM_V_CC)source='sedgesort.c' object='gsnap_avx512-sedgesort.o' libtool=no @AMDEPBACKSLASH@ +@AMDEP_TRUE@@am__fastdepCC_FALSE@ DEPDIR=$(DEPDIR) $(CCDEPMODE) $(depcomp) @AMDEPBACKSLASH@ +@am__fastdepCC_FALSE@ $(AM_V_CC@am__nodep@)$(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) $(gsnap_avx512_CFLAGS) $(CFLAGS) -c -o gsnap_avx512-sedgesort.o `test -f 'sedgesort.c' || echo '$(srcdir)/'`sedgesort.c + +gsnap_avx512-sedgesort.obj: sedgesort.c +@am__fastdepCC_TRUE@ $(AM_V_CC)$(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) $(gsnap_avx512_CFLAGS) $(CFLAGS) -MT gsnap_avx512-sedgesort.obj -MD -MP -MF $(DEPDIR)/gsnap_avx512-sedgesort.Tpo -c -o gsnap_avx512-sedgesort.obj `if test -f 'sedgesort.c'; then $(CYGPATH_W) 'sedgesort.c'; else $(CYGPATH_W) '$(srcdir)/sedgesort.c'; fi` +@am__fastdepCC_TRUE@ $(AM_V_at)$(am__mv) $(DEPDIR)/gsnap_avx512-sedgesort.Tpo $(DEPDIR)/gsnap_avx512-sedgesort.Po +@AMDEP_TRUE@@am__fastdepCC_FALSE@ $(AM_V_CC)source='sedgesort.c' object='gsnap_avx512-sedgesort.obj' libtool=no @AMDEPBACKSLASH@ +@AMDEP_TRUE@@am__fastdepCC_FALSE@ DEPDIR=$(DEPDIR) $(CCDEPMODE) $(depcomp) @AMDEPBACKSLASH@ +@am__fastdepCC_FALSE@ $(AM_V_CC@am__nodep@)$(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) $(gsnap_avx512_CFLAGS) $(CFLAGS) -c -o gsnap_avx512-sedgesort.obj `if test -f 'sedgesort.c'; then $(CYGPATH_W) 'sedgesort.c'; else $(CYGPATH_W) '$(srcdir)/sedgesort.c'; fi` + +gsnap_avx512-sarray-read.o: sarray-read.c +@am__fastdepCC_TRUE@ $(AM_V_CC)$(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) $(gsnap_avx512_CFLAGS) $(CFLAGS) -MT gsnap_avx512-sarray-read.o -MD -MP -MF $(DEPDIR)/gsnap_avx512-sarray-read.Tpo -c -o gsnap_avx512-sarray-read.o `test -f 'sarray-read.c' || echo '$(srcdir)/'`sarray-read.c +@am__fastdepCC_TRUE@ $(AM_V_at)$(am__mv) $(DEPDIR)/gsnap_avx512-sarray-read.Tpo $(DEPDIR)/gsnap_avx512-sarray-read.Po +@AMDEP_TRUE@@am__fastdepCC_FALSE@ $(AM_V_CC)source='sarray-read.c' object='gsnap_avx512-sarray-read.o' libtool=no @AMDEPBACKSLASH@ +@AMDEP_TRUE@@am__fastdepCC_FALSE@ DEPDIR=$(DEPDIR) $(CCDEPMODE) $(depcomp) @AMDEPBACKSLASH@ +@am__fastdepCC_FALSE@ $(AM_V_CC@am__nodep@)$(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) $(gsnap_avx512_CFLAGS) $(CFLAGS) -c -o gsnap_avx512-sarray-read.o `test -f 'sarray-read.c' || echo '$(srcdir)/'`sarray-read.c + +gsnap_avx512-sarray-read.obj: sarray-read.c +@am__fastdepCC_TRUE@ $(AM_V_CC)$(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) $(gsnap_avx512_CFLAGS) $(CFLAGS) -MT gsnap_avx512-sarray-read.obj -MD -MP -MF $(DEPDIR)/gsnap_avx512-sarray-read.Tpo -c -o gsnap_avx512-sarray-read.obj `if test -f 'sarray-read.c'; then $(CYGPATH_W) 'sarray-read.c'; else $(CYGPATH_W) '$(srcdir)/sarray-read.c'; fi` +@am__fastdepCC_TRUE@ $(AM_V_at)$(am__mv) $(DEPDIR)/gsnap_avx512-sarray-read.Tpo $(DEPDIR)/gsnap_avx512-sarray-read.Po +@AMDEP_TRUE@@am__fastdepCC_FALSE@ $(AM_V_CC)source='sarray-read.c' object='gsnap_avx512-sarray-read.obj' libtool=no @AMDEPBACKSLASH@ +@AMDEP_TRUE@@am__fastdepCC_FALSE@ DEPDIR=$(DEPDIR) $(CCDEPMODE) $(depcomp) @AMDEPBACKSLASH@ +@am__fastdepCC_FALSE@ $(AM_V_CC@am__nodep@)$(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) $(gsnap_avx512_CFLAGS) $(CFLAGS) -c -o gsnap_avx512-sarray-read.obj `if test -f 'sarray-read.c'; then $(CYGPATH_W) 'sarray-read.c'; else $(CYGPATH_W) '$(srcdir)/sarray-read.c'; fi` + +gsnap_avx512-merge-heap.o: merge-heap.c +@am__fastdepCC_TRUE@ $(AM_V_CC)$(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) $(gsnap_avx512_CFLAGS) $(CFLAGS) -MT gsnap_avx512-merge-heap.o -MD -MP -MF $(DEPDIR)/gsnap_avx512-merge-heap.Tpo -c -o gsnap_avx512-merge-heap.o `test -f 'merge-heap.c' || echo '$(srcdir)/'`merge-heap.c +@am__fastdepCC_TRUE@ $(AM_V_at)$(am__mv) $(DEPDIR)/gsnap_avx512-merge-heap.Tpo $(DEPDIR)/gsnap_avx512-merge-heap.Po +@AMDEP_TRUE@@am__fastdepCC_FALSE@ $(AM_V_CC)source='merge-heap.c' object='gsnap_avx512-merge-heap.o' libtool=no @AMDEPBACKSLASH@ +@AMDEP_TRUE@@am__fastdepCC_FALSE@ DEPDIR=$(DEPDIR) $(CCDEPMODE) $(depcomp) @AMDEPBACKSLASH@ +@am__fastdepCC_FALSE@ $(AM_V_CC@am__nodep@)$(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) $(gsnap_avx512_CFLAGS) $(CFLAGS) -c -o gsnap_avx512-merge-heap.o `test -f 'merge-heap.c' || echo '$(srcdir)/'`merge-heap.c + +gsnap_avx512-merge-heap.obj: merge-heap.c +@am__fastdepCC_TRUE@ $(AM_V_CC)$(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) $(gsnap_avx512_CFLAGS) $(CFLAGS) -MT gsnap_avx512-merge-heap.obj -MD -MP -MF $(DEPDIR)/gsnap_avx512-merge-heap.Tpo -c -o gsnap_avx512-merge-heap.obj `if test -f 'merge-heap.c'; then $(CYGPATH_W) 'merge-heap.c'; else $(CYGPATH_W) '$(srcdir)/merge-heap.c'; fi` +@am__fastdepCC_TRUE@ $(AM_V_at)$(am__mv) $(DEPDIR)/gsnap_avx512-merge-heap.Tpo $(DEPDIR)/gsnap_avx512-merge-heap.Po +@AMDEP_TRUE@@am__fastdepCC_FALSE@ $(AM_V_CC)source='merge-heap.c' object='gsnap_avx512-merge-heap.obj' libtool=no @AMDEPBACKSLASH@ +@AMDEP_TRUE@@am__fastdepCC_FALSE@ DEPDIR=$(DEPDIR) $(CCDEPMODE) $(depcomp) @AMDEPBACKSLASH@ +@am__fastdepCC_FALSE@ $(AM_V_CC@am__nodep@)$(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) $(gsnap_avx512_CFLAGS) $(CFLAGS) -c -o gsnap_avx512-merge-heap.obj `if test -f 'merge-heap.c'; then $(CYGPATH_W) 'merge-heap.c'; else $(CYGPATH_W) '$(srcdir)/merge-heap.c'; fi` + +gsnap_avx512-stage1hr.o: stage1hr.c +@am__fastdepCC_TRUE@ $(AM_V_CC)$(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) $(gsnap_avx512_CFLAGS) $(CFLAGS) -MT gsnap_avx512-stage1hr.o -MD -MP -MF $(DEPDIR)/gsnap_avx512-stage1hr.Tpo -c -o gsnap_avx512-stage1hr.o `test -f 'stage1hr.c' || echo '$(srcdir)/'`stage1hr.c +@am__fastdepCC_TRUE@ $(AM_V_at)$(am__mv) $(DEPDIR)/gsnap_avx512-stage1hr.Tpo $(DEPDIR)/gsnap_avx512-stage1hr.Po +@AMDEP_TRUE@@am__fastdepCC_FALSE@ $(AM_V_CC)source='stage1hr.c' object='gsnap_avx512-stage1hr.o' libtool=no @AMDEPBACKSLASH@ +@AMDEP_TRUE@@am__fastdepCC_FALSE@ DEPDIR=$(DEPDIR) $(CCDEPMODE) $(depcomp) @AMDEPBACKSLASH@ +@am__fastdepCC_FALSE@ $(AM_V_CC@am__nodep@)$(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) $(gsnap_avx512_CFLAGS) $(CFLAGS) -c -o gsnap_avx512-stage1hr.o `test -f 'stage1hr.c' || echo '$(srcdir)/'`stage1hr.c + +gsnap_avx512-stage1hr.obj: stage1hr.c +@am__fastdepCC_TRUE@ $(AM_V_CC)$(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) $(gsnap_avx512_CFLAGS) $(CFLAGS) -MT gsnap_avx512-stage1hr.obj -MD -MP -MF $(DEPDIR)/gsnap_avx512-stage1hr.Tpo -c -o gsnap_avx512-stage1hr.obj `if test -f 'stage1hr.c'; then $(CYGPATH_W) 'stage1hr.c'; else $(CYGPATH_W) '$(srcdir)/stage1hr.c'; fi` +@am__fastdepCC_TRUE@ $(AM_V_at)$(am__mv) $(DEPDIR)/gsnap_avx512-stage1hr.Tpo $(DEPDIR)/gsnap_avx512-stage1hr.Po +@AMDEP_TRUE@@am__fastdepCC_FALSE@ $(AM_V_CC)source='stage1hr.c' object='gsnap_avx512-stage1hr.obj' libtool=no @AMDEPBACKSLASH@ +@AMDEP_TRUE@@am__fastdepCC_FALSE@ DEPDIR=$(DEPDIR) $(CCDEPMODE) $(depcomp) @AMDEPBACKSLASH@ +@am__fastdepCC_FALSE@ $(AM_V_CC@am__nodep@)$(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) $(gsnap_avx512_CFLAGS) $(CFLAGS) -c -o gsnap_avx512-stage1hr.obj `if test -f 'stage1hr.c'; then $(CYGPATH_W) 'stage1hr.c'; else $(CYGPATH_W) '$(srcdir)/stage1hr.c'; fi` + +gsnap_avx512-request.o: request.c +@am__fastdepCC_TRUE@ $(AM_V_CC)$(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) $(gsnap_avx512_CFLAGS) $(CFLAGS) -MT gsnap_avx512-request.o -MD -MP -MF $(DEPDIR)/gsnap_avx512-request.Tpo -c -o gsnap_avx512-request.o `test -f 'request.c' || echo '$(srcdir)/'`request.c +@am__fastdepCC_TRUE@ $(AM_V_at)$(am__mv) $(DEPDIR)/gsnap_avx512-request.Tpo $(DEPDIR)/gsnap_avx512-request.Po +@AMDEP_TRUE@@am__fastdepCC_FALSE@ $(AM_V_CC)source='request.c' object='gsnap_avx512-request.o' libtool=no @AMDEPBACKSLASH@ +@AMDEP_TRUE@@am__fastdepCC_FALSE@ DEPDIR=$(DEPDIR) $(CCDEPMODE) $(depcomp) @AMDEPBACKSLASH@ +@am__fastdepCC_FALSE@ $(AM_V_CC@am__nodep@)$(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) $(gsnap_avx512_CFLAGS) $(CFLAGS) -c -o gsnap_avx512-request.o `test -f 'request.c' || echo '$(srcdir)/'`request.c -gsnap_avx2-resulthr.o: resulthr.c -@am__fastdepCC_TRUE@ $(AM_V_CC)$(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) $(gsnap_avx2_CFLAGS) $(CFLAGS) -MT gsnap_avx2-resulthr.o -MD -MP -MF $(DEPDIR)/gsnap_avx2-resulthr.Tpo -c -o gsnap_avx2-resulthr.o `test -f 'resulthr.c' || echo '$(srcdir)/'`resulthr.c -@am__fastdepCC_TRUE@ $(AM_V_at)$(am__mv) $(DEPDIR)/gsnap_avx2-resulthr.Tpo $(DEPDIR)/gsnap_avx2-resulthr.Po -@AMDEP_TRUE@@am__fastdepCC_FALSE@ $(AM_V_CC)source='resulthr.c' object='gsnap_avx2-resulthr.o' libtool=no @AMDEPBACKSLASH@ +gsnap_avx512-request.obj: request.c +@am__fastdepCC_TRUE@ $(AM_V_CC)$(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) $(gsnap_avx512_CFLAGS) $(CFLAGS) -MT gsnap_avx512-request.obj -MD -MP -MF $(DEPDIR)/gsnap_avx512-request.Tpo -c -o gsnap_avx512-request.obj `if test -f 'request.c'; then $(CYGPATH_W) 'request.c'; else $(CYGPATH_W) '$(srcdir)/request.c'; fi` +@am__fastdepCC_TRUE@ $(AM_V_at)$(am__mv) $(DEPDIR)/gsnap_avx512-request.Tpo $(DEPDIR)/gsnap_avx512-request.Po +@AMDEP_TRUE@@am__fastdepCC_FALSE@ $(AM_V_CC)source='request.c' object='gsnap_avx512-request.obj' libtool=no @AMDEPBACKSLASH@ @AMDEP_TRUE@@am__fastdepCC_FALSE@ DEPDIR=$(DEPDIR) $(CCDEPMODE) $(depcomp) @AMDEPBACKSLASH@ -@am__fastdepCC_FALSE@ $(AM_V_CC@am__nodep@)$(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) $(gsnap_avx2_CFLAGS) $(CFLAGS) -c -o gsnap_avx2-resulthr.o `test -f 'resulthr.c' || echo '$(srcdir)/'`resulthr.c +@am__fastdepCC_FALSE@ $(AM_V_CC@am__nodep@)$(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) $(gsnap_avx512_CFLAGS) $(CFLAGS) -c -o gsnap_avx512-request.obj `if test -f 'request.c'; then $(CYGPATH_W) 'request.c'; else $(CYGPATH_W) '$(srcdir)/request.c'; fi` -gsnap_avx2-resulthr.obj: resulthr.c -@am__fastdepCC_TRUE@ $(AM_V_CC)$(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) $(gsnap_avx2_CFLAGS) $(CFLAGS) -MT gsnap_avx2-resulthr.obj -MD -MP -MF $(DEPDIR)/gsnap_avx2-resulthr.Tpo -c -o gsnap_avx2-resulthr.obj `if test -f 'resulthr.c'; then $(CYGPATH_W) 'resulthr.c'; else $(CYGPATH_W) '$(srcdir)/resulthr.c'; fi` -@am__fastdepCC_TRUE@ $(AM_V_at)$(am__mv) $(DEPDIR)/gsnap_avx2-resulthr.Tpo $(DEPDIR)/gsnap_avx2-resulthr.Po -@AMDEP_TRUE@@am__fastdepCC_FALSE@ $(AM_V_CC)source='resulthr.c' object='gsnap_avx2-resulthr.obj' libtool=no @AMDEPBACKSLASH@ +gsnap_avx512-resulthr.o: resulthr.c +@am__fastdepCC_TRUE@ $(AM_V_CC)$(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) $(gsnap_avx512_CFLAGS) $(CFLAGS) -MT gsnap_avx512-resulthr.o -MD -MP -MF $(DEPDIR)/gsnap_avx512-resulthr.Tpo -c -o gsnap_avx512-resulthr.o `test -f 'resulthr.c' || echo '$(srcdir)/'`resulthr.c +@am__fastdepCC_TRUE@ $(AM_V_at)$(am__mv) $(DEPDIR)/gsnap_avx512-resulthr.Tpo $(DEPDIR)/gsnap_avx512-resulthr.Po +@AMDEP_TRUE@@am__fastdepCC_FALSE@ $(AM_V_CC)source='resulthr.c' object='gsnap_avx512-resulthr.o' libtool=no @AMDEPBACKSLASH@ @AMDEP_TRUE@@am__fastdepCC_FALSE@ DEPDIR=$(DEPDIR) $(CCDEPMODE) $(depcomp) @AMDEPBACKSLASH@ -@am__fastdepCC_FALSE@ $(AM_V_CC@am__nodep@)$(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) $(gsnap_avx2_CFLAGS) $(CFLAGS) -c -o gsnap_avx2-resulthr.obj `if test -f 'resulthr.c'; then $(CYGPATH_W) 'resulthr.c'; else $(CYGPATH_W) '$(srcdir)/resulthr.c'; fi` +@am__fastdepCC_FALSE@ $(AM_V_CC@am__nodep@)$(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) $(gsnap_avx512_CFLAGS) $(CFLAGS) -c -o gsnap_avx512-resulthr.o `test -f 'resulthr.c' || echo '$(srcdir)/'`resulthr.c -gsnap_avx2-output.o: output.c -@am__fastdepCC_TRUE@ $(AM_V_CC)$(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) $(gsnap_avx2_CFLAGS) $(CFLAGS) -MT gsnap_avx2-output.o -MD -MP -MF $(DEPDIR)/gsnap_avx2-output.Tpo -c -o gsnap_avx2-output.o `test -f 'output.c' || echo '$(srcdir)/'`output.c -@am__fastdepCC_TRUE@ $(AM_V_at)$(am__mv) $(DEPDIR)/gsnap_avx2-output.Tpo $(DEPDIR)/gsnap_avx2-output.Po -@AMDEP_TRUE@@am__fastdepCC_FALSE@ $(AM_V_CC)source='output.c' object='gsnap_avx2-output.o' libtool=no @AMDEPBACKSLASH@ +gsnap_avx512-resulthr.obj: resulthr.c +@am__fastdepCC_TRUE@ $(AM_V_CC)$(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) $(gsnap_avx512_CFLAGS) $(CFLAGS) -MT gsnap_avx512-resulthr.obj -MD -MP -MF $(DEPDIR)/gsnap_avx512-resulthr.Tpo -c -o gsnap_avx512-resulthr.obj `if test -f 'resulthr.c'; then $(CYGPATH_W) 'resulthr.c'; else $(CYGPATH_W) '$(srcdir)/resulthr.c'; fi` +@am__fastdepCC_TRUE@ $(AM_V_at)$(am__mv) $(DEPDIR)/gsnap_avx512-resulthr.Tpo $(DEPDIR)/gsnap_avx512-resulthr.Po +@AMDEP_TRUE@@am__fastdepCC_FALSE@ $(AM_V_CC)source='resulthr.c' object='gsnap_avx512-resulthr.obj' libtool=no @AMDEPBACKSLASH@ @AMDEP_TRUE@@am__fastdepCC_FALSE@ DEPDIR=$(DEPDIR) $(CCDEPMODE) $(depcomp) @AMDEPBACKSLASH@ -@am__fastdepCC_FALSE@ $(AM_V_CC@am__nodep@)$(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) $(gsnap_avx2_CFLAGS) $(CFLAGS) -c -o gsnap_avx2-output.o `test -f 'output.c' || echo '$(srcdir)/'`output.c +@am__fastdepCC_FALSE@ $(AM_V_CC@am__nodep@)$(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) $(gsnap_avx512_CFLAGS) $(CFLAGS) -c -o gsnap_avx512-resulthr.obj `if test -f 'resulthr.c'; then $(CYGPATH_W) 'resulthr.c'; else $(CYGPATH_W) '$(srcdir)/resulthr.c'; fi` -gsnap_avx2-output.obj: output.c -@am__fastdepCC_TRUE@ $(AM_V_CC)$(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) $(gsnap_avx2_CFLAGS) $(CFLAGS) -MT gsnap_avx2-output.obj -MD -MP -MF $(DEPDIR)/gsnap_avx2-output.Tpo -c -o gsnap_avx2-output.obj `if test -f 'output.c'; then $(CYGPATH_W) 'output.c'; else $(CYGPATH_W) '$(srcdir)/output.c'; fi` -@am__fastdepCC_TRUE@ $(AM_V_at)$(am__mv) $(DEPDIR)/gsnap_avx2-output.Tpo $(DEPDIR)/gsnap_avx2-output.Po -@AMDEP_TRUE@@am__fastdepCC_FALSE@ $(AM_V_CC)source='output.c' object='gsnap_avx2-output.obj' libtool=no @AMDEPBACKSLASH@ +gsnap_avx512-output.o: output.c +@am__fastdepCC_TRUE@ $(AM_V_CC)$(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) $(gsnap_avx512_CFLAGS) $(CFLAGS) -MT gsnap_avx512-output.o -MD -MP -MF $(DEPDIR)/gsnap_avx512-output.Tpo -c -o gsnap_avx512-output.o `test -f 'output.c' || echo '$(srcdir)/'`output.c +@am__fastdepCC_TRUE@ $(AM_V_at)$(am__mv) $(DEPDIR)/gsnap_avx512-output.Tpo $(DEPDIR)/gsnap_avx512-output.Po +@AMDEP_TRUE@@am__fastdepCC_FALSE@ $(AM_V_CC)source='output.c' object='gsnap_avx512-output.o' libtool=no @AMDEPBACKSLASH@ @AMDEP_TRUE@@am__fastdepCC_FALSE@ DEPDIR=$(DEPDIR) $(CCDEPMODE) $(depcomp) @AMDEPBACKSLASH@ -@am__fastdepCC_FALSE@ $(AM_V_CC@am__nodep@)$(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) $(gsnap_avx2_CFLAGS) $(CFLAGS) -c -o gsnap_avx2-output.obj `if test -f 'output.c'; then $(CYGPATH_W) 'output.c'; else $(CYGPATH_W) '$(srcdir)/output.c'; fi` +@am__fastdepCC_FALSE@ $(AM_V_CC@am__nodep@)$(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) $(gsnap_avx512_CFLAGS) $(CFLAGS) -c -o gsnap_avx512-output.o `test -f 'output.c' || echo '$(srcdir)/'`output.c -gsnap_avx2-inbuffer.o: inbuffer.c -@am__fastdepCC_TRUE@ $(AM_V_CC)$(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) $(gsnap_avx2_CFLAGS) $(CFLAGS) -MT gsnap_avx2-inbuffer.o -MD -MP -MF $(DEPDIR)/gsnap_avx2-inbuffer.Tpo -c -o gsnap_avx2-inbuffer.o `test -f 'inbuffer.c' || echo '$(srcdir)/'`inbuffer.c -@am__fastdepCC_TRUE@ $(AM_V_at)$(am__mv) $(DEPDIR)/gsnap_avx2-inbuffer.Tpo $(DEPDIR)/gsnap_avx2-inbuffer.Po -@AMDEP_TRUE@@am__fastdepCC_FALSE@ $(AM_V_CC)source='inbuffer.c' object='gsnap_avx2-inbuffer.o' libtool=no @AMDEPBACKSLASH@ +gsnap_avx512-output.obj: output.c +@am__fastdepCC_TRUE@ $(AM_V_CC)$(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) $(gsnap_avx512_CFLAGS) $(CFLAGS) -MT gsnap_avx512-output.obj -MD -MP -MF $(DEPDIR)/gsnap_avx512-output.Tpo -c -o gsnap_avx512-output.obj `if test -f 'output.c'; then $(CYGPATH_W) 'output.c'; else $(CYGPATH_W) '$(srcdir)/output.c'; fi` +@am__fastdepCC_TRUE@ $(AM_V_at)$(am__mv) $(DEPDIR)/gsnap_avx512-output.Tpo $(DEPDIR)/gsnap_avx512-output.Po +@AMDEP_TRUE@@am__fastdepCC_FALSE@ $(AM_V_CC)source='output.c' object='gsnap_avx512-output.obj' libtool=no @AMDEPBACKSLASH@ @AMDEP_TRUE@@am__fastdepCC_FALSE@ DEPDIR=$(DEPDIR) $(CCDEPMODE) $(depcomp) @AMDEPBACKSLASH@ -@am__fastdepCC_FALSE@ $(AM_V_CC@am__nodep@)$(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) $(gsnap_avx2_CFLAGS) $(CFLAGS) -c -o gsnap_avx2-inbuffer.o `test -f 'inbuffer.c' || echo '$(srcdir)/'`inbuffer.c +@am__fastdepCC_FALSE@ $(AM_V_CC@am__nodep@)$(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) $(gsnap_avx512_CFLAGS) $(CFLAGS) -c -o gsnap_avx512-output.obj `if test -f 'output.c'; then $(CYGPATH_W) 'output.c'; else $(CYGPATH_W) '$(srcdir)/output.c'; fi` -gsnap_avx2-inbuffer.obj: inbuffer.c -@am__fastdepCC_TRUE@ $(AM_V_CC)$(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) $(gsnap_avx2_CFLAGS) $(CFLAGS) -MT gsnap_avx2-inbuffer.obj -MD -MP -MF $(DEPDIR)/gsnap_avx2-inbuffer.Tpo -c -o gsnap_avx2-inbuffer.obj `if test -f 'inbuffer.c'; then $(CYGPATH_W) 'inbuffer.c'; else $(CYGPATH_W) '$(srcdir)/inbuffer.c'; fi` -@am__fastdepCC_TRUE@ $(AM_V_at)$(am__mv) $(DEPDIR)/gsnap_avx2-inbuffer.Tpo $(DEPDIR)/gsnap_avx2-inbuffer.Po -@AMDEP_TRUE@@am__fastdepCC_FALSE@ $(AM_V_CC)source='inbuffer.c' object='gsnap_avx2-inbuffer.obj' libtool=no @AMDEPBACKSLASH@ +gsnap_avx512-inbuffer.o: inbuffer.c +@am__fastdepCC_TRUE@ $(AM_V_CC)$(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) $(gsnap_avx512_CFLAGS) $(CFLAGS) -MT gsnap_avx512-inbuffer.o -MD -MP -MF $(DEPDIR)/gsnap_avx512-inbuffer.Tpo -c -o gsnap_avx512-inbuffer.o `test -f 'inbuffer.c' || echo '$(srcdir)/'`inbuffer.c +@am__fastdepCC_TRUE@ $(AM_V_at)$(am__mv) $(DEPDIR)/gsnap_avx512-inbuffer.Tpo $(DEPDIR)/gsnap_avx512-inbuffer.Po +@AMDEP_TRUE@@am__fastdepCC_FALSE@ $(AM_V_CC)source='inbuffer.c' object='gsnap_avx512-inbuffer.o' libtool=no @AMDEPBACKSLASH@ @AMDEP_TRUE@@am__fastdepCC_FALSE@ DEPDIR=$(DEPDIR) $(CCDEPMODE) $(depcomp) @AMDEPBACKSLASH@ -@am__fastdepCC_FALSE@ $(AM_V_CC@am__nodep@)$(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) $(gsnap_avx2_CFLAGS) $(CFLAGS) -c -o gsnap_avx2-inbuffer.obj `if test -f 'inbuffer.c'; then $(CYGPATH_W) 'inbuffer.c'; else $(CYGPATH_W) '$(srcdir)/inbuffer.c'; fi` +@am__fastdepCC_FALSE@ $(AM_V_CC@am__nodep@)$(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) $(gsnap_avx512_CFLAGS) $(CFLAGS) -c -o gsnap_avx512-inbuffer.o `test -f 'inbuffer.c' || echo '$(srcdir)/'`inbuffer.c -gsnap_avx2-samheader.o: samheader.c -@am__fastdepCC_TRUE@ $(AM_V_CC)$(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) $(gsnap_avx2_CFLAGS) $(CFLAGS) -MT gsnap_avx2-samheader.o -MD -MP -MF $(DEPDIR)/gsnap_avx2-samheader.Tpo -c -o gsnap_avx2-samheader.o `test -f 'samheader.c' || echo '$(srcdir)/'`samheader.c -@am__fastdepCC_TRUE@ $(AM_V_at)$(am__mv) $(DEPDIR)/gsnap_avx2-samheader.Tpo $(DEPDIR)/gsnap_avx2-samheader.Po -@AMDEP_TRUE@@am__fastdepCC_FALSE@ $(AM_V_CC)source='samheader.c' object='gsnap_avx2-samheader.o' libtool=no @AMDEPBACKSLASH@ +gsnap_avx512-inbuffer.obj: inbuffer.c +@am__fastdepCC_TRUE@ $(AM_V_CC)$(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) $(gsnap_avx512_CFLAGS) $(CFLAGS) -MT gsnap_avx512-inbuffer.obj -MD -MP -MF $(DEPDIR)/gsnap_avx512-inbuffer.Tpo -c -o gsnap_avx512-inbuffer.obj `if test -f 'inbuffer.c'; then $(CYGPATH_W) 'inbuffer.c'; else $(CYGPATH_W) '$(srcdir)/inbuffer.c'; fi` +@am__fastdepCC_TRUE@ $(AM_V_at)$(am__mv) $(DEPDIR)/gsnap_avx512-inbuffer.Tpo $(DEPDIR)/gsnap_avx512-inbuffer.Po +@AMDEP_TRUE@@am__fastdepCC_FALSE@ $(AM_V_CC)source='inbuffer.c' object='gsnap_avx512-inbuffer.obj' libtool=no @AMDEPBACKSLASH@ @AMDEP_TRUE@@am__fastdepCC_FALSE@ DEPDIR=$(DEPDIR) $(CCDEPMODE) $(depcomp) @AMDEPBACKSLASH@ -@am__fastdepCC_FALSE@ $(AM_V_CC@am__nodep@)$(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) $(gsnap_avx2_CFLAGS) $(CFLAGS) -c -o gsnap_avx2-samheader.o `test -f 'samheader.c' || echo '$(srcdir)/'`samheader.c +@am__fastdepCC_FALSE@ $(AM_V_CC@am__nodep@)$(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) $(gsnap_avx512_CFLAGS) $(CFLAGS) -c -o gsnap_avx512-inbuffer.obj `if test -f 'inbuffer.c'; then $(CYGPATH_W) 'inbuffer.c'; else $(CYGPATH_W) '$(srcdir)/inbuffer.c'; fi` -gsnap_avx2-samheader.obj: samheader.c -@am__fastdepCC_TRUE@ $(AM_V_CC)$(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) $(gsnap_avx2_CFLAGS) $(CFLAGS) -MT gsnap_avx2-samheader.obj -MD -MP -MF $(DEPDIR)/gsnap_avx2-samheader.Tpo -c -o gsnap_avx2-samheader.obj `if test -f 'samheader.c'; then $(CYGPATH_W) 'samheader.c'; else $(CYGPATH_W) '$(srcdir)/samheader.c'; fi` -@am__fastdepCC_TRUE@ $(AM_V_at)$(am__mv) $(DEPDIR)/gsnap_avx2-samheader.Tpo $(DEPDIR)/gsnap_avx2-samheader.Po -@AMDEP_TRUE@@am__fastdepCC_FALSE@ $(AM_V_CC)source='samheader.c' object='gsnap_avx2-samheader.obj' libtool=no @AMDEPBACKSLASH@ +gsnap_avx512-samheader.o: samheader.c +@am__fastdepCC_TRUE@ $(AM_V_CC)$(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) $(gsnap_avx512_CFLAGS) $(CFLAGS) -MT gsnap_avx512-samheader.o -MD -MP -MF $(DEPDIR)/gsnap_avx512-samheader.Tpo -c -o gsnap_avx512-samheader.o `test -f 'samheader.c' || echo '$(srcdir)/'`samheader.c +@am__fastdepCC_TRUE@ $(AM_V_at)$(am__mv) $(DEPDIR)/gsnap_avx512-samheader.Tpo $(DEPDIR)/gsnap_avx512-samheader.Po +@AMDEP_TRUE@@am__fastdepCC_FALSE@ $(AM_V_CC)source='samheader.c' object='gsnap_avx512-samheader.o' libtool=no @AMDEPBACKSLASH@ @AMDEP_TRUE@@am__fastdepCC_FALSE@ DEPDIR=$(DEPDIR) $(CCDEPMODE) $(depcomp) @AMDEPBACKSLASH@ -@am__fastdepCC_FALSE@ $(AM_V_CC@am__nodep@)$(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) $(gsnap_avx2_CFLAGS) $(CFLAGS) -c -o gsnap_avx2-samheader.obj `if test -f 'samheader.c'; then $(CYGPATH_W) 'samheader.c'; else $(CYGPATH_W) '$(srcdir)/samheader.c'; fi` +@am__fastdepCC_FALSE@ $(AM_V_CC@am__nodep@)$(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) $(gsnap_avx512_CFLAGS) $(CFLAGS) -c -o gsnap_avx512-samheader.o `test -f 'samheader.c' || echo '$(srcdir)/'`samheader.c -gsnap_avx2-outbuffer.o: outbuffer.c -@am__fastdepCC_TRUE@ $(AM_V_CC)$(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) $(gsnap_avx2_CFLAGS) $(CFLAGS) -MT gsnap_avx2-outbuffer.o -MD -MP -MF $(DEPDIR)/gsnap_avx2-outbuffer.Tpo -c -o gsnap_avx2-outbuffer.o `test -f 'outbuffer.c' || echo '$(srcdir)/'`outbuffer.c -@am__fastdepCC_TRUE@ $(AM_V_at)$(am__mv) $(DEPDIR)/gsnap_avx2-outbuffer.Tpo $(DEPDIR)/gsnap_avx2-outbuffer.Po -@AMDEP_TRUE@@am__fastdepCC_FALSE@ $(AM_V_CC)source='outbuffer.c' object='gsnap_avx2-outbuffer.o' libtool=no @AMDEPBACKSLASH@ +gsnap_avx512-samheader.obj: samheader.c +@am__fastdepCC_TRUE@ $(AM_V_CC)$(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) $(gsnap_avx512_CFLAGS) $(CFLAGS) -MT gsnap_avx512-samheader.obj -MD -MP -MF $(DEPDIR)/gsnap_avx512-samheader.Tpo -c -o gsnap_avx512-samheader.obj `if test -f 'samheader.c'; then $(CYGPATH_W) 'samheader.c'; else $(CYGPATH_W) '$(srcdir)/samheader.c'; fi` +@am__fastdepCC_TRUE@ $(AM_V_at)$(am__mv) $(DEPDIR)/gsnap_avx512-samheader.Tpo $(DEPDIR)/gsnap_avx512-samheader.Po +@AMDEP_TRUE@@am__fastdepCC_FALSE@ $(AM_V_CC)source='samheader.c' object='gsnap_avx512-samheader.obj' libtool=no @AMDEPBACKSLASH@ @AMDEP_TRUE@@am__fastdepCC_FALSE@ DEPDIR=$(DEPDIR) $(CCDEPMODE) $(depcomp) @AMDEPBACKSLASH@ -@am__fastdepCC_FALSE@ $(AM_V_CC@am__nodep@)$(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) $(gsnap_avx2_CFLAGS) $(CFLAGS) -c -o gsnap_avx2-outbuffer.o `test -f 'outbuffer.c' || echo '$(srcdir)/'`outbuffer.c +@am__fastdepCC_FALSE@ $(AM_V_CC@am__nodep@)$(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) $(gsnap_avx512_CFLAGS) $(CFLAGS) -c -o gsnap_avx512-samheader.obj `if test -f 'samheader.c'; then $(CYGPATH_W) 'samheader.c'; else $(CYGPATH_W) '$(srcdir)/samheader.c'; fi` -gsnap_avx2-outbuffer.obj: outbuffer.c -@am__fastdepCC_TRUE@ $(AM_V_CC)$(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) $(gsnap_avx2_CFLAGS) $(CFLAGS) -MT gsnap_avx2-outbuffer.obj -MD -MP -MF $(DEPDIR)/gsnap_avx2-outbuffer.Tpo -c -o gsnap_avx2-outbuffer.obj `if test -f 'outbuffer.c'; then $(CYGPATH_W) 'outbuffer.c'; else $(CYGPATH_W) '$(srcdir)/outbuffer.c'; fi` -@am__fastdepCC_TRUE@ $(AM_V_at)$(am__mv) $(DEPDIR)/gsnap_avx2-outbuffer.Tpo $(DEPDIR)/gsnap_avx2-outbuffer.Po -@AMDEP_TRUE@@am__fastdepCC_FALSE@ $(AM_V_CC)source='outbuffer.c' object='gsnap_avx2-outbuffer.obj' libtool=no @AMDEPBACKSLASH@ +gsnap_avx512-outbuffer.o: outbuffer.c +@am__fastdepCC_TRUE@ $(AM_V_CC)$(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) $(gsnap_avx512_CFLAGS) $(CFLAGS) -MT gsnap_avx512-outbuffer.o -MD -MP -MF $(DEPDIR)/gsnap_avx512-outbuffer.Tpo -c -o gsnap_avx512-outbuffer.o `test -f 'outbuffer.c' || echo '$(srcdir)/'`outbuffer.c +@am__fastdepCC_TRUE@ $(AM_V_at)$(am__mv) $(DEPDIR)/gsnap_avx512-outbuffer.Tpo $(DEPDIR)/gsnap_avx512-outbuffer.Po +@AMDEP_TRUE@@am__fastdepCC_FALSE@ $(AM_V_CC)source='outbuffer.c' object='gsnap_avx512-outbuffer.o' libtool=no @AMDEPBACKSLASH@ @AMDEP_TRUE@@am__fastdepCC_FALSE@ DEPDIR=$(DEPDIR) $(CCDEPMODE) $(depcomp) @AMDEPBACKSLASH@ -@am__fastdepCC_FALSE@ $(AM_V_CC@am__nodep@)$(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) $(gsnap_avx2_CFLAGS) $(CFLAGS) -c -o gsnap_avx2-outbuffer.obj `if test -f 'outbuffer.c'; then $(CYGPATH_W) 'outbuffer.c'; else $(CYGPATH_W) '$(srcdir)/outbuffer.c'; fi` +@am__fastdepCC_FALSE@ $(AM_V_CC@am__nodep@)$(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) $(gsnap_avx512_CFLAGS) $(CFLAGS) -c -o gsnap_avx512-outbuffer.o `test -f 'outbuffer.c' || echo '$(srcdir)/'`outbuffer.c -gsnap_avx2-datadir.o: datadir.c -@am__fastdepCC_TRUE@ $(AM_V_CC)$(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) $(gsnap_avx2_CFLAGS) $(CFLAGS) -MT gsnap_avx2-datadir.o -MD -MP -MF $(DEPDIR)/gsnap_avx2-datadir.Tpo -c -o gsnap_avx2-datadir.o `test -f 'datadir.c' || echo '$(srcdir)/'`datadir.c -@am__fastdepCC_TRUE@ $(AM_V_at)$(am__mv) $(DEPDIR)/gsnap_avx2-datadir.Tpo $(DEPDIR)/gsnap_avx2-datadir.Po -@AMDEP_TRUE@@am__fastdepCC_FALSE@ $(AM_V_CC)source='datadir.c' object='gsnap_avx2-datadir.o' libtool=no @AMDEPBACKSLASH@ +gsnap_avx512-outbuffer.obj: outbuffer.c +@am__fastdepCC_TRUE@ $(AM_V_CC)$(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) $(gsnap_avx512_CFLAGS) $(CFLAGS) -MT gsnap_avx512-outbuffer.obj -MD -MP -MF $(DEPDIR)/gsnap_avx512-outbuffer.Tpo -c -o gsnap_avx512-outbuffer.obj `if test -f 'outbuffer.c'; then $(CYGPATH_W) 'outbuffer.c'; else $(CYGPATH_W) '$(srcdir)/outbuffer.c'; fi` +@am__fastdepCC_TRUE@ $(AM_V_at)$(am__mv) $(DEPDIR)/gsnap_avx512-outbuffer.Tpo $(DEPDIR)/gsnap_avx512-outbuffer.Po +@AMDEP_TRUE@@am__fastdepCC_FALSE@ $(AM_V_CC)source='outbuffer.c' object='gsnap_avx512-outbuffer.obj' libtool=no @AMDEPBACKSLASH@ @AMDEP_TRUE@@am__fastdepCC_FALSE@ DEPDIR=$(DEPDIR) $(CCDEPMODE) $(depcomp) @AMDEPBACKSLASH@ -@am__fastdepCC_FALSE@ $(AM_V_CC@am__nodep@)$(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) $(gsnap_avx2_CFLAGS) $(CFLAGS) -c -o gsnap_avx2-datadir.o `test -f 'datadir.c' || echo '$(srcdir)/'`datadir.c +@am__fastdepCC_FALSE@ $(AM_V_CC@am__nodep@)$(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) $(gsnap_avx512_CFLAGS) $(CFLAGS) -c -o gsnap_avx512-outbuffer.obj `if test -f 'outbuffer.c'; then $(CYGPATH_W) 'outbuffer.c'; else $(CYGPATH_W) '$(srcdir)/outbuffer.c'; fi` -gsnap_avx2-datadir.obj: datadir.c -@am__fastdepCC_TRUE@ $(AM_V_CC)$(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) $(gsnap_avx2_CFLAGS) $(CFLAGS) -MT gsnap_avx2-datadir.obj -MD -MP -MF $(DEPDIR)/gsnap_avx2-datadir.Tpo -c -o gsnap_avx2-datadir.obj `if test -f 'datadir.c'; then $(CYGPATH_W) 'datadir.c'; else $(CYGPATH_W) '$(srcdir)/datadir.c'; fi` -@am__fastdepCC_TRUE@ $(AM_V_at)$(am__mv) $(DEPDIR)/gsnap_avx2-datadir.Tpo $(DEPDIR)/gsnap_avx2-datadir.Po -@AMDEP_TRUE@@am__fastdepCC_FALSE@ $(AM_V_CC)source='datadir.c' object='gsnap_avx2-datadir.obj' libtool=no @AMDEPBACKSLASH@ +gsnap_avx512-datadir.o: datadir.c +@am__fastdepCC_TRUE@ $(AM_V_CC)$(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) $(gsnap_avx512_CFLAGS) $(CFLAGS) -MT gsnap_avx512-datadir.o -MD -MP -MF $(DEPDIR)/gsnap_avx512-datadir.Tpo -c -o gsnap_avx512-datadir.o `test -f 'datadir.c' || echo '$(srcdir)/'`datadir.c +@am__fastdepCC_TRUE@ $(AM_V_at)$(am__mv) $(DEPDIR)/gsnap_avx512-datadir.Tpo $(DEPDIR)/gsnap_avx512-datadir.Po +@AMDEP_TRUE@@am__fastdepCC_FALSE@ $(AM_V_CC)source='datadir.c' object='gsnap_avx512-datadir.o' libtool=no @AMDEPBACKSLASH@ @AMDEP_TRUE@@am__fastdepCC_FALSE@ DEPDIR=$(DEPDIR) $(CCDEPMODE) $(depcomp) @AMDEPBACKSLASH@ -@am__fastdepCC_FALSE@ $(AM_V_CC@am__nodep@)$(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) $(gsnap_avx2_CFLAGS) $(CFLAGS) -c -o gsnap_avx2-datadir.obj `if test -f 'datadir.c'; then $(CYGPATH_W) 'datadir.c'; else $(CYGPATH_W) '$(srcdir)/datadir.c'; fi` +@am__fastdepCC_FALSE@ $(AM_V_CC@am__nodep@)$(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) $(gsnap_avx512_CFLAGS) $(CFLAGS) -c -o gsnap_avx512-datadir.o `test -f 'datadir.c' || echo '$(srcdir)/'`datadir.c -gsnap_avx2-parserange.o: parserange.c -@am__fastdepCC_TRUE@ $(AM_V_CC)$(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) $(gsnap_avx2_CFLAGS) $(CFLAGS) -MT gsnap_avx2-parserange.o -MD -MP -MF $(DEPDIR)/gsnap_avx2-parserange.Tpo -c -o gsnap_avx2-parserange.o `test -f 'parserange.c' || echo '$(srcdir)/'`parserange.c -@am__fastdepCC_TRUE@ $(AM_V_at)$(am__mv) $(DEPDIR)/gsnap_avx2-parserange.Tpo $(DEPDIR)/gsnap_avx2-parserange.Po -@AMDEP_TRUE@@am__fastdepCC_FALSE@ $(AM_V_CC)source='parserange.c' object='gsnap_avx2-parserange.o' libtool=no @AMDEPBACKSLASH@ +gsnap_avx512-datadir.obj: datadir.c +@am__fastdepCC_TRUE@ $(AM_V_CC)$(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) $(gsnap_avx512_CFLAGS) $(CFLAGS) -MT gsnap_avx512-datadir.obj -MD -MP -MF $(DEPDIR)/gsnap_avx512-datadir.Tpo -c -o gsnap_avx512-datadir.obj `if test -f 'datadir.c'; then $(CYGPATH_W) 'datadir.c'; else $(CYGPATH_W) '$(srcdir)/datadir.c'; fi` +@am__fastdepCC_TRUE@ $(AM_V_at)$(am__mv) $(DEPDIR)/gsnap_avx512-datadir.Tpo $(DEPDIR)/gsnap_avx512-datadir.Po +@AMDEP_TRUE@@am__fastdepCC_FALSE@ $(AM_V_CC)source='datadir.c' object='gsnap_avx512-datadir.obj' libtool=no @AMDEPBACKSLASH@ @AMDEP_TRUE@@am__fastdepCC_FALSE@ DEPDIR=$(DEPDIR) $(CCDEPMODE) $(depcomp) @AMDEPBACKSLASH@ -@am__fastdepCC_FALSE@ $(AM_V_CC@am__nodep@)$(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) $(gsnap_avx2_CFLAGS) $(CFLAGS) -c -o gsnap_avx2-parserange.o `test -f 'parserange.c' || echo '$(srcdir)/'`parserange.c +@am__fastdepCC_FALSE@ $(AM_V_CC@am__nodep@)$(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) $(gsnap_avx512_CFLAGS) $(CFLAGS) -c -o gsnap_avx512-datadir.obj `if test -f 'datadir.c'; then $(CYGPATH_W) 'datadir.c'; else $(CYGPATH_W) '$(srcdir)/datadir.c'; fi` -gsnap_avx2-parserange.obj: parserange.c -@am__fastdepCC_TRUE@ $(AM_V_CC)$(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) $(gsnap_avx2_CFLAGS) $(CFLAGS) -MT gsnap_avx2-parserange.obj -MD -MP -MF $(DEPDIR)/gsnap_avx2-parserange.Tpo -c -o gsnap_avx2-parserange.obj `if test -f 'parserange.c'; then $(CYGPATH_W) 'parserange.c'; else $(CYGPATH_W) '$(srcdir)/parserange.c'; fi` -@am__fastdepCC_TRUE@ $(AM_V_at)$(am__mv) $(DEPDIR)/gsnap_avx2-parserange.Tpo $(DEPDIR)/gsnap_avx2-parserange.Po -@AMDEP_TRUE@@am__fastdepCC_FALSE@ $(AM_V_CC)source='parserange.c' object='gsnap_avx2-parserange.obj' libtool=no @AMDEPBACKSLASH@ +gsnap_avx512-parserange.o: parserange.c +@am__fastdepCC_TRUE@ $(AM_V_CC)$(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) $(gsnap_avx512_CFLAGS) $(CFLAGS) -MT gsnap_avx512-parserange.o -MD -MP -MF $(DEPDIR)/gsnap_avx512-parserange.Tpo -c -o gsnap_avx512-parserange.o `test -f 'parserange.c' || echo '$(srcdir)/'`parserange.c +@am__fastdepCC_TRUE@ $(AM_V_at)$(am__mv) $(DEPDIR)/gsnap_avx512-parserange.Tpo $(DEPDIR)/gsnap_avx512-parserange.Po +@AMDEP_TRUE@@am__fastdepCC_FALSE@ $(AM_V_CC)source='parserange.c' object='gsnap_avx512-parserange.o' libtool=no @AMDEPBACKSLASH@ @AMDEP_TRUE@@am__fastdepCC_FALSE@ DEPDIR=$(DEPDIR) $(CCDEPMODE) $(depcomp) @AMDEPBACKSLASH@ -@am__fastdepCC_FALSE@ $(AM_V_CC@am__nodep@)$(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) $(gsnap_avx2_CFLAGS) $(CFLAGS) -c -o gsnap_avx2-parserange.obj `if test -f 'parserange.c'; then $(CYGPATH_W) 'parserange.c'; else $(CYGPATH_W) '$(srcdir)/parserange.c'; fi` +@am__fastdepCC_FALSE@ $(AM_V_CC@am__nodep@)$(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) $(gsnap_avx512_CFLAGS) $(CFLAGS) -c -o gsnap_avx512-parserange.o `test -f 'parserange.c' || echo '$(srcdir)/'`parserange.c -gsnap_avx2-getopt.o: getopt.c -@am__fastdepCC_TRUE@ $(AM_V_CC)$(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) $(gsnap_avx2_CFLAGS) $(CFLAGS) -MT gsnap_avx2-getopt.o -MD -MP -MF $(DEPDIR)/gsnap_avx2-getopt.Tpo -c -o gsnap_avx2-getopt.o `test -f 'getopt.c' || echo '$(srcdir)/'`getopt.c -@am__fastdepCC_TRUE@ $(AM_V_at)$(am__mv) $(DEPDIR)/gsnap_avx2-getopt.Tpo $(DEPDIR)/gsnap_avx2-getopt.Po -@AMDEP_TRUE@@am__fastdepCC_FALSE@ $(AM_V_CC)source='getopt.c' object='gsnap_avx2-getopt.o' libtool=no @AMDEPBACKSLASH@ +gsnap_avx512-parserange.obj: parserange.c +@am__fastdepCC_TRUE@ $(AM_V_CC)$(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) $(gsnap_avx512_CFLAGS) $(CFLAGS) -MT gsnap_avx512-parserange.obj -MD -MP -MF $(DEPDIR)/gsnap_avx512-parserange.Tpo -c -o gsnap_avx512-parserange.obj `if test -f 'parserange.c'; then $(CYGPATH_W) 'parserange.c'; else $(CYGPATH_W) '$(srcdir)/parserange.c'; fi` +@am__fastdepCC_TRUE@ $(AM_V_at)$(am__mv) $(DEPDIR)/gsnap_avx512-parserange.Tpo $(DEPDIR)/gsnap_avx512-parserange.Po +@AMDEP_TRUE@@am__fastdepCC_FALSE@ $(AM_V_CC)source='parserange.c' object='gsnap_avx512-parserange.obj' libtool=no @AMDEPBACKSLASH@ @AMDEP_TRUE@@am__fastdepCC_FALSE@ DEPDIR=$(DEPDIR) $(CCDEPMODE) $(depcomp) @AMDEPBACKSLASH@ -@am__fastdepCC_FALSE@ $(AM_V_CC@am__nodep@)$(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) $(gsnap_avx2_CFLAGS) $(CFLAGS) -c -o gsnap_avx2-getopt.o `test -f 'getopt.c' || echo '$(srcdir)/'`getopt.c +@am__fastdepCC_FALSE@ $(AM_V_CC@am__nodep@)$(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) $(gsnap_avx512_CFLAGS) $(CFLAGS) -c -o gsnap_avx512-parserange.obj `if test -f 'parserange.c'; then $(CYGPATH_W) 'parserange.c'; else $(CYGPATH_W) '$(srcdir)/parserange.c'; fi` -gsnap_avx2-getopt.obj: getopt.c -@am__fastdepCC_TRUE@ $(AM_V_CC)$(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) $(gsnap_avx2_CFLAGS) $(CFLAGS) -MT gsnap_avx2-getopt.obj -MD -MP -MF $(DEPDIR)/gsnap_avx2-getopt.Tpo -c -o gsnap_avx2-getopt.obj `if test -f 'getopt.c'; then $(CYGPATH_W) 'getopt.c'; else $(CYGPATH_W) '$(srcdir)/getopt.c'; fi` -@am__fastdepCC_TRUE@ $(AM_V_at)$(am__mv) $(DEPDIR)/gsnap_avx2-getopt.Tpo $(DEPDIR)/gsnap_avx2-getopt.Po -@AMDEP_TRUE@@am__fastdepCC_FALSE@ $(AM_V_CC)source='getopt.c' object='gsnap_avx2-getopt.obj' libtool=no @AMDEPBACKSLASH@ +gsnap_avx512-getopt.o: getopt.c +@am__fastdepCC_TRUE@ $(AM_V_CC)$(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) $(gsnap_avx512_CFLAGS) $(CFLAGS) -MT gsnap_avx512-getopt.o -MD -MP -MF $(DEPDIR)/gsnap_avx512-getopt.Tpo -c -o gsnap_avx512-getopt.o `test -f 'getopt.c' || echo '$(srcdir)/'`getopt.c +@am__fastdepCC_TRUE@ $(AM_V_at)$(am__mv) $(DEPDIR)/gsnap_avx512-getopt.Tpo $(DEPDIR)/gsnap_avx512-getopt.Po +@AMDEP_TRUE@@am__fastdepCC_FALSE@ $(AM_V_CC)source='getopt.c' object='gsnap_avx512-getopt.o' libtool=no @AMDEPBACKSLASH@ @AMDEP_TRUE@@am__fastdepCC_FALSE@ DEPDIR=$(DEPDIR) $(CCDEPMODE) $(depcomp) @AMDEPBACKSLASH@ -@am__fastdepCC_FALSE@ $(AM_V_CC@am__nodep@)$(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) $(gsnap_avx2_CFLAGS) $(CFLAGS) -c -o gsnap_avx2-getopt.obj `if test -f 'getopt.c'; then $(CYGPATH_W) 'getopt.c'; else $(CYGPATH_W) '$(srcdir)/getopt.c'; fi` +@am__fastdepCC_FALSE@ $(AM_V_CC@am__nodep@)$(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) $(gsnap_avx512_CFLAGS) $(CFLAGS) -c -o gsnap_avx512-getopt.o `test -f 'getopt.c' || echo '$(srcdir)/'`getopt.c -gsnap_avx2-getopt1.o: getopt1.c -@am__fastdepCC_TRUE@ $(AM_V_CC)$(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) $(gsnap_avx2_CFLAGS) $(CFLAGS) -MT gsnap_avx2-getopt1.o -MD -MP -MF $(DEPDIR)/gsnap_avx2-getopt1.Tpo -c -o gsnap_avx2-getopt1.o `test -f 'getopt1.c' || echo '$(srcdir)/'`getopt1.c -@am__fastdepCC_TRUE@ $(AM_V_at)$(am__mv) $(DEPDIR)/gsnap_avx2-getopt1.Tpo $(DEPDIR)/gsnap_avx2-getopt1.Po -@AMDEP_TRUE@@am__fastdepCC_FALSE@ $(AM_V_CC)source='getopt1.c' object='gsnap_avx2-getopt1.o' libtool=no @AMDEPBACKSLASH@ +gsnap_avx512-getopt.obj: getopt.c +@am__fastdepCC_TRUE@ $(AM_V_CC)$(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) $(gsnap_avx512_CFLAGS) $(CFLAGS) -MT gsnap_avx512-getopt.obj -MD -MP -MF $(DEPDIR)/gsnap_avx512-getopt.Tpo -c -o gsnap_avx512-getopt.obj `if test -f 'getopt.c'; then $(CYGPATH_W) 'getopt.c'; else $(CYGPATH_W) '$(srcdir)/getopt.c'; fi` +@am__fastdepCC_TRUE@ $(AM_V_at)$(am__mv) $(DEPDIR)/gsnap_avx512-getopt.Tpo $(DEPDIR)/gsnap_avx512-getopt.Po +@AMDEP_TRUE@@am__fastdepCC_FALSE@ $(AM_V_CC)source='getopt.c' object='gsnap_avx512-getopt.obj' libtool=no @AMDEPBACKSLASH@ @AMDEP_TRUE@@am__fastdepCC_FALSE@ DEPDIR=$(DEPDIR) $(CCDEPMODE) $(depcomp) @AMDEPBACKSLASH@ -@am__fastdepCC_FALSE@ $(AM_V_CC@am__nodep@)$(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) $(gsnap_avx2_CFLAGS) $(CFLAGS) -c -o gsnap_avx2-getopt1.o `test -f 'getopt1.c' || echo '$(srcdir)/'`getopt1.c +@am__fastdepCC_FALSE@ $(AM_V_CC@am__nodep@)$(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) $(gsnap_avx512_CFLAGS) $(CFLAGS) -c -o gsnap_avx512-getopt.obj `if test -f 'getopt.c'; then $(CYGPATH_W) 'getopt.c'; else $(CYGPATH_W) '$(srcdir)/getopt.c'; fi` -gsnap_avx2-getopt1.obj: getopt1.c -@am__fastdepCC_TRUE@ $(AM_V_CC)$(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) $(gsnap_avx2_CFLAGS) $(CFLAGS) -MT gsnap_avx2-getopt1.obj -MD -MP -MF $(DEPDIR)/gsnap_avx2-getopt1.Tpo -c -o gsnap_avx2-getopt1.obj `if test -f 'getopt1.c'; then $(CYGPATH_W) 'getopt1.c'; else $(CYGPATH_W) '$(srcdir)/getopt1.c'; fi` -@am__fastdepCC_TRUE@ $(AM_V_at)$(am__mv) $(DEPDIR)/gsnap_avx2-getopt1.Tpo $(DEPDIR)/gsnap_avx2-getopt1.Po -@AMDEP_TRUE@@am__fastdepCC_FALSE@ $(AM_V_CC)source='getopt1.c' object='gsnap_avx2-getopt1.obj' libtool=no @AMDEPBACKSLASH@ +gsnap_avx512-getopt1.o: getopt1.c +@am__fastdepCC_TRUE@ $(AM_V_CC)$(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) $(gsnap_avx512_CFLAGS) $(CFLAGS) -MT gsnap_avx512-getopt1.o -MD -MP -MF $(DEPDIR)/gsnap_avx512-getopt1.Tpo -c -o gsnap_avx512-getopt1.o `test -f 'getopt1.c' || echo '$(srcdir)/'`getopt1.c +@am__fastdepCC_TRUE@ $(AM_V_at)$(am__mv) $(DEPDIR)/gsnap_avx512-getopt1.Tpo $(DEPDIR)/gsnap_avx512-getopt1.Po +@AMDEP_TRUE@@am__fastdepCC_FALSE@ $(AM_V_CC)source='getopt1.c' object='gsnap_avx512-getopt1.o' libtool=no @AMDEPBACKSLASH@ @AMDEP_TRUE@@am__fastdepCC_FALSE@ DEPDIR=$(DEPDIR) $(CCDEPMODE) $(depcomp) @AMDEPBACKSLASH@ -@am__fastdepCC_FALSE@ $(AM_V_CC@am__nodep@)$(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) $(gsnap_avx2_CFLAGS) $(CFLAGS) -c -o gsnap_avx2-getopt1.obj `if test -f 'getopt1.c'; then $(CYGPATH_W) 'getopt1.c'; else $(CYGPATH_W) '$(srcdir)/getopt1.c'; fi` +@am__fastdepCC_FALSE@ $(AM_V_CC@am__nodep@)$(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) $(gsnap_avx512_CFLAGS) $(CFLAGS) -c -o gsnap_avx512-getopt1.o `test -f 'getopt1.c' || echo '$(srcdir)/'`getopt1.c -gsnap_avx2-gsnap.o: gsnap.c -@am__fastdepCC_TRUE@ $(AM_V_CC)$(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) $(gsnap_avx2_CFLAGS) $(CFLAGS) -MT gsnap_avx2-gsnap.o -MD -MP -MF $(DEPDIR)/gsnap_avx2-gsnap.Tpo -c -o gsnap_avx2-gsnap.o `test -f 'gsnap.c' || echo '$(srcdir)/'`gsnap.c -@am__fastdepCC_TRUE@ $(AM_V_at)$(am__mv) $(DEPDIR)/gsnap_avx2-gsnap.Tpo $(DEPDIR)/gsnap_avx2-gsnap.Po -@AMDEP_TRUE@@am__fastdepCC_FALSE@ $(AM_V_CC)source='gsnap.c' object='gsnap_avx2-gsnap.o' libtool=no @AMDEPBACKSLASH@ +gsnap_avx512-getopt1.obj: getopt1.c +@am__fastdepCC_TRUE@ $(AM_V_CC)$(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) $(gsnap_avx512_CFLAGS) $(CFLAGS) -MT gsnap_avx512-getopt1.obj -MD -MP -MF $(DEPDIR)/gsnap_avx512-getopt1.Tpo -c -o gsnap_avx512-getopt1.obj `if test -f 'getopt1.c'; then $(CYGPATH_W) 'getopt1.c'; else $(CYGPATH_W) '$(srcdir)/getopt1.c'; fi` +@am__fastdepCC_TRUE@ $(AM_V_at)$(am__mv) $(DEPDIR)/gsnap_avx512-getopt1.Tpo $(DEPDIR)/gsnap_avx512-getopt1.Po +@AMDEP_TRUE@@am__fastdepCC_FALSE@ $(AM_V_CC)source='getopt1.c' object='gsnap_avx512-getopt1.obj' libtool=no @AMDEPBACKSLASH@ @AMDEP_TRUE@@am__fastdepCC_FALSE@ DEPDIR=$(DEPDIR) $(CCDEPMODE) $(depcomp) @AMDEPBACKSLASH@ -@am__fastdepCC_FALSE@ $(AM_V_CC@am__nodep@)$(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) $(gsnap_avx2_CFLAGS) $(CFLAGS) -c -o gsnap_avx2-gsnap.o `test -f 'gsnap.c' || echo '$(srcdir)/'`gsnap.c +@am__fastdepCC_FALSE@ $(AM_V_CC@am__nodep@)$(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) $(gsnap_avx512_CFLAGS) $(CFLAGS) -c -o gsnap_avx512-getopt1.obj `if test -f 'getopt1.c'; then $(CYGPATH_W) 'getopt1.c'; else $(CYGPATH_W) '$(srcdir)/getopt1.c'; fi` -gsnap_avx2-gsnap.obj: gsnap.c -@am__fastdepCC_TRUE@ $(AM_V_CC)$(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) $(gsnap_avx2_CFLAGS) $(CFLAGS) -MT gsnap_avx2-gsnap.obj -MD -MP -MF $(DEPDIR)/gsnap_avx2-gsnap.Tpo -c -o gsnap_avx2-gsnap.obj `if test -f 'gsnap.c'; then $(CYGPATH_W) 'gsnap.c'; else $(CYGPATH_W) '$(srcdir)/gsnap.c'; fi` -@am__fastdepCC_TRUE@ $(AM_V_at)$(am__mv) $(DEPDIR)/gsnap_avx2-gsnap.Tpo $(DEPDIR)/gsnap_avx2-gsnap.Po -@AMDEP_TRUE@@am__fastdepCC_FALSE@ $(AM_V_CC)source='gsnap.c' object='gsnap_avx2-gsnap.obj' libtool=no @AMDEPBACKSLASH@ +gsnap_avx512-gsnap.o: gsnap.c +@am__fastdepCC_TRUE@ $(AM_V_CC)$(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) $(gsnap_avx512_CFLAGS) $(CFLAGS) -MT gsnap_avx512-gsnap.o -MD -MP -MF $(DEPDIR)/gsnap_avx512-gsnap.Tpo -c -o gsnap_avx512-gsnap.o `test -f 'gsnap.c' || echo '$(srcdir)/'`gsnap.c +@am__fastdepCC_TRUE@ $(AM_V_at)$(am__mv) $(DEPDIR)/gsnap_avx512-gsnap.Tpo $(DEPDIR)/gsnap_avx512-gsnap.Po +@AMDEP_TRUE@@am__fastdepCC_FALSE@ $(AM_V_CC)source='gsnap.c' object='gsnap_avx512-gsnap.o' libtool=no @AMDEPBACKSLASH@ @AMDEP_TRUE@@am__fastdepCC_FALSE@ DEPDIR=$(DEPDIR) $(CCDEPMODE) $(depcomp) @AMDEPBACKSLASH@ -@am__fastdepCC_FALSE@ $(AM_V_CC@am__nodep@)$(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) $(gsnap_avx2_CFLAGS) $(CFLAGS) -c -o gsnap_avx2-gsnap.obj `if test -f 'gsnap.c'; then $(CYGPATH_W) 'gsnap.c'; else $(CYGPATH_W) '$(srcdir)/gsnap.c'; fi` +@am__fastdepCC_FALSE@ $(AM_V_CC@am__nodep@)$(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) $(gsnap_avx512_CFLAGS) $(CFLAGS) -c -o gsnap_avx512-gsnap.o `test -f 'gsnap.c' || echo '$(srcdir)/'`gsnap.c + +gsnap_avx512-gsnap.obj: gsnap.c +@am__fastdepCC_TRUE@ $(AM_V_CC)$(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) $(gsnap_avx512_CFLAGS) $(CFLAGS) -MT gsnap_avx512-gsnap.obj -MD -MP -MF $(DEPDIR)/gsnap_avx512-gsnap.Tpo -c -o gsnap_avx512-gsnap.obj `if test -f 'gsnap.c'; then $(CYGPATH_W) 'gsnap.c'; else $(CYGPATH_W) '$(srcdir)/gsnap.c'; fi` +@am__fastdepCC_TRUE@ $(AM_V_at)$(am__mv) $(DEPDIR)/gsnap_avx512-gsnap.Tpo $(DEPDIR)/gsnap_avx512-gsnap.Po +@AMDEP_TRUE@@am__fastdepCC_FALSE@ $(AM_V_CC)source='gsnap.c' object='gsnap_avx512-gsnap.obj' libtool=no @AMDEPBACKSLASH@ +@AMDEP_TRUE@@am__fastdepCC_FALSE@ DEPDIR=$(DEPDIR) $(CCDEPMODE) $(depcomp) @AMDEPBACKSLASH@ +@am__fastdepCC_FALSE@ $(AM_V_CC@am__nodep@)$(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) $(gsnap_avx512_CFLAGS) $(CFLAGS) -c -o gsnap_avx512-gsnap.obj `if test -f 'gsnap.c'; then $(CYGPATH_W) 'gsnap.c'; else $(CYGPATH_W) '$(srcdir)/gsnap.c'; fi` gsnap_nosimd-except.o: except.c @am__fastdepCC_TRUE@ $(AM_V_CC)$(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) $(gsnap_nosimd_CFLAGS) $(CFLAGS) -MT gsnap_nosimd-except.o -MD -MP -MF $(DEPDIR)/gsnap_nosimd-except.Tpo -c -o gsnap_nosimd-except.o `test -f 'except.c' || echo '$(srcdir)/'`except.c @@ -24086,6 +28760,20 @@ @AMDEP_TRUE@@am__fastdepCC_FALSE@ DEPDIR=$(DEPDIR) $(CCDEPMODE) $(depcomp) @AMDEPBACKSLASH@ @am__fastdepCC_FALSE@ $(AM_V_CC@am__nodep@)$(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) $(gsnap_nosimd_CFLAGS) $(CFLAGS) -c -o gsnap_nosimd-bitpack64-readtwo.obj `if test -f 'bitpack64-readtwo.c'; then $(CYGPATH_W) 'bitpack64-readtwo.c'; else $(CYGPATH_W) '$(srcdir)/bitpack64-readtwo.c'; fi` +gsnap_nosimd-merge.o: merge.c +@am__fastdepCC_TRUE@ $(AM_V_CC)$(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) $(gsnap_nosimd_CFLAGS) $(CFLAGS) -MT gsnap_nosimd-merge.o -MD -MP -MF $(DEPDIR)/gsnap_nosimd-merge.Tpo -c -o gsnap_nosimd-merge.o `test -f 'merge.c' || echo '$(srcdir)/'`merge.c +@am__fastdepCC_TRUE@ $(AM_V_at)$(am__mv) $(DEPDIR)/gsnap_nosimd-merge.Tpo $(DEPDIR)/gsnap_nosimd-merge.Po +@AMDEP_TRUE@@am__fastdepCC_FALSE@ $(AM_V_CC)source='merge.c' object='gsnap_nosimd-merge.o' libtool=no @AMDEPBACKSLASH@ +@AMDEP_TRUE@@am__fastdepCC_FALSE@ DEPDIR=$(DEPDIR) $(CCDEPMODE) $(depcomp) @AMDEPBACKSLASH@ +@am__fastdepCC_FALSE@ $(AM_V_CC@am__nodep@)$(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) $(gsnap_nosimd_CFLAGS) $(CFLAGS) -c -o gsnap_nosimd-merge.o `test -f 'merge.c' || echo '$(srcdir)/'`merge.c + +gsnap_nosimd-merge.obj: merge.c +@am__fastdepCC_TRUE@ $(AM_V_CC)$(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) $(gsnap_nosimd_CFLAGS) $(CFLAGS) -MT gsnap_nosimd-merge.obj -MD -MP -MF $(DEPDIR)/gsnap_nosimd-merge.Tpo -c -o gsnap_nosimd-merge.obj `if test -f 'merge.c'; then $(CYGPATH_W) 'merge.c'; else $(CYGPATH_W) '$(srcdir)/merge.c'; fi` +@am__fastdepCC_TRUE@ $(AM_V_at)$(am__mv) $(DEPDIR)/gsnap_nosimd-merge.Tpo $(DEPDIR)/gsnap_nosimd-merge.Po +@AMDEP_TRUE@@am__fastdepCC_FALSE@ $(AM_V_CC)source='merge.c' object='gsnap_nosimd-merge.obj' libtool=no @AMDEPBACKSLASH@ +@AMDEP_TRUE@@am__fastdepCC_FALSE@ DEPDIR=$(DEPDIR) $(CCDEPMODE) $(depcomp) @AMDEPBACKSLASH@ +@am__fastdepCC_FALSE@ $(AM_V_CC@am__nodep@)$(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) $(gsnap_nosimd_CFLAGS) $(CFLAGS) -c -o gsnap_nosimd-merge.obj `if test -f 'merge.c'; then $(CYGPATH_W) 'merge.c'; else $(CYGPATH_W) '$(srcdir)/merge.c'; fi` + gsnap_nosimd-indexdb.o: indexdb.c @am__fastdepCC_TRUE@ $(AM_V_CC)$(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) $(gsnap_nosimd_CFLAGS) $(CFLAGS) -MT gsnap_nosimd-indexdb.o -MD -MP -MF $(DEPDIR)/gsnap_nosimd-indexdb.Tpo -c -o gsnap_nosimd-indexdb.o `test -f 'indexdb.c' || echo '$(srcdir)/'`indexdb.c @am__fastdepCC_TRUE@ $(AM_V_at)$(am__mv) $(DEPDIR)/gsnap_nosimd-indexdb.Tpo $(DEPDIR)/gsnap_nosimd-indexdb.Po @@ -24786,6 +29474,20 @@ @AMDEP_TRUE@@am__fastdepCC_FALSE@ DEPDIR=$(DEPDIR) $(CCDEPMODE) $(depcomp) @AMDEPBACKSLASH@ @am__fastdepCC_FALSE@ $(AM_V_CC@am__nodep@)$(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) $(gsnap_nosimd_CFLAGS) $(CFLAGS) -c -o gsnap_nosimd-sarray-read.obj `if test -f 'sarray-read.c'; then $(CYGPATH_W) 'sarray-read.c'; else $(CYGPATH_W) '$(srcdir)/sarray-read.c'; fi` +gsnap_nosimd-merge-heap.o: merge-heap.c +@am__fastdepCC_TRUE@ $(AM_V_CC)$(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) $(gsnap_nosimd_CFLAGS) $(CFLAGS) -MT gsnap_nosimd-merge-heap.o -MD -MP -MF $(DEPDIR)/gsnap_nosimd-merge-heap.Tpo -c -o gsnap_nosimd-merge-heap.o `test -f 'merge-heap.c' || echo '$(srcdir)/'`merge-heap.c +@am__fastdepCC_TRUE@ $(AM_V_at)$(am__mv) $(DEPDIR)/gsnap_nosimd-merge-heap.Tpo $(DEPDIR)/gsnap_nosimd-merge-heap.Po +@AMDEP_TRUE@@am__fastdepCC_FALSE@ $(AM_V_CC)source='merge-heap.c' object='gsnap_nosimd-merge-heap.o' libtool=no @AMDEPBACKSLASH@ +@AMDEP_TRUE@@am__fastdepCC_FALSE@ DEPDIR=$(DEPDIR) $(CCDEPMODE) $(depcomp) @AMDEPBACKSLASH@ +@am__fastdepCC_FALSE@ $(AM_V_CC@am__nodep@)$(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) $(gsnap_nosimd_CFLAGS) $(CFLAGS) -c -o gsnap_nosimd-merge-heap.o `test -f 'merge-heap.c' || echo '$(srcdir)/'`merge-heap.c + +gsnap_nosimd-merge-heap.obj: merge-heap.c +@am__fastdepCC_TRUE@ $(AM_V_CC)$(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) $(gsnap_nosimd_CFLAGS) $(CFLAGS) -MT gsnap_nosimd-merge-heap.obj -MD -MP -MF $(DEPDIR)/gsnap_nosimd-merge-heap.Tpo -c -o gsnap_nosimd-merge-heap.obj `if test -f 'merge-heap.c'; then $(CYGPATH_W) 'merge-heap.c'; else $(CYGPATH_W) '$(srcdir)/merge-heap.c'; fi` +@am__fastdepCC_TRUE@ $(AM_V_at)$(am__mv) $(DEPDIR)/gsnap_nosimd-merge-heap.Tpo $(DEPDIR)/gsnap_nosimd-merge-heap.Po +@AMDEP_TRUE@@am__fastdepCC_FALSE@ $(AM_V_CC)source='merge-heap.c' object='gsnap_nosimd-merge-heap.obj' libtool=no @AMDEPBACKSLASH@ +@AMDEP_TRUE@@am__fastdepCC_FALSE@ DEPDIR=$(DEPDIR) $(CCDEPMODE) $(depcomp) @AMDEPBACKSLASH@ +@am__fastdepCC_FALSE@ $(AM_V_CC@am__nodep@)$(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) $(gsnap_nosimd_CFLAGS) $(CFLAGS) -c -o gsnap_nosimd-merge-heap.obj `if test -f 'merge-heap.c'; then $(CYGPATH_W) 'merge-heap.c'; else $(CYGPATH_W) '$(srcdir)/merge-heap.c'; fi` + gsnap_nosimd-stage1hr.o: stage1hr.c @am__fastdepCC_TRUE@ $(AM_V_CC)$(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) $(gsnap_nosimd_CFLAGS) $(CFLAGS) -MT gsnap_nosimd-stage1hr.o -MD -MP -MF $(DEPDIR)/gsnap_nosimd-stage1hr.Tpo -c -o gsnap_nosimd-stage1hr.o `test -f 'stage1hr.c' || echo '$(srcdir)/'`stage1hr.c @am__fastdepCC_TRUE@ $(AM_V_at)$(am__mv) $(DEPDIR)/gsnap_nosimd-stage1hr.Tpo $(DEPDIR)/gsnap_nosimd-stage1hr.Po @@ -25346,6 +30048,20 @@ @AMDEP_TRUE@@am__fastdepCC_FALSE@ DEPDIR=$(DEPDIR) $(CCDEPMODE) $(depcomp) @AMDEPBACKSLASH@ @am__fastdepCC_FALSE@ $(AM_V_CC@am__nodep@)$(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) $(gsnap_sse2_CFLAGS) $(CFLAGS) -c -o gsnap_sse2-bitpack64-readtwo.obj `if test -f 'bitpack64-readtwo.c'; then $(CYGPATH_W) 'bitpack64-readtwo.c'; else $(CYGPATH_W) '$(srcdir)/bitpack64-readtwo.c'; fi` +gsnap_sse2-merge.o: merge.c +@am__fastdepCC_TRUE@ $(AM_V_CC)$(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) $(gsnap_sse2_CFLAGS) $(CFLAGS) -MT gsnap_sse2-merge.o -MD -MP -MF $(DEPDIR)/gsnap_sse2-merge.Tpo -c -o gsnap_sse2-merge.o `test -f 'merge.c' || echo '$(srcdir)/'`merge.c +@am__fastdepCC_TRUE@ $(AM_V_at)$(am__mv) $(DEPDIR)/gsnap_sse2-merge.Tpo $(DEPDIR)/gsnap_sse2-merge.Po +@AMDEP_TRUE@@am__fastdepCC_FALSE@ $(AM_V_CC)source='merge.c' object='gsnap_sse2-merge.o' libtool=no @AMDEPBACKSLASH@ +@AMDEP_TRUE@@am__fastdepCC_FALSE@ DEPDIR=$(DEPDIR) $(CCDEPMODE) $(depcomp) @AMDEPBACKSLASH@ +@am__fastdepCC_FALSE@ $(AM_V_CC@am__nodep@)$(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) $(gsnap_sse2_CFLAGS) $(CFLAGS) -c -o gsnap_sse2-merge.o `test -f 'merge.c' || echo '$(srcdir)/'`merge.c + +gsnap_sse2-merge.obj: merge.c +@am__fastdepCC_TRUE@ $(AM_V_CC)$(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) $(gsnap_sse2_CFLAGS) $(CFLAGS) -MT gsnap_sse2-merge.obj -MD -MP -MF $(DEPDIR)/gsnap_sse2-merge.Tpo -c -o gsnap_sse2-merge.obj `if test -f 'merge.c'; then $(CYGPATH_W) 'merge.c'; else $(CYGPATH_W) '$(srcdir)/merge.c'; fi` +@am__fastdepCC_TRUE@ $(AM_V_at)$(am__mv) $(DEPDIR)/gsnap_sse2-merge.Tpo $(DEPDIR)/gsnap_sse2-merge.Po +@AMDEP_TRUE@@am__fastdepCC_FALSE@ $(AM_V_CC)source='merge.c' object='gsnap_sse2-merge.obj' libtool=no @AMDEPBACKSLASH@ +@AMDEP_TRUE@@am__fastdepCC_FALSE@ DEPDIR=$(DEPDIR) $(CCDEPMODE) $(depcomp) @AMDEPBACKSLASH@ +@am__fastdepCC_FALSE@ $(AM_V_CC@am__nodep@)$(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) $(gsnap_sse2_CFLAGS) $(CFLAGS) -c -o gsnap_sse2-merge.obj `if test -f 'merge.c'; then $(CYGPATH_W) 'merge.c'; else $(CYGPATH_W) '$(srcdir)/merge.c'; fi` + gsnap_sse2-indexdb.o: indexdb.c @am__fastdepCC_TRUE@ $(AM_V_CC)$(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) $(gsnap_sse2_CFLAGS) $(CFLAGS) -MT gsnap_sse2-indexdb.o -MD -MP -MF $(DEPDIR)/gsnap_sse2-indexdb.Tpo -c -o gsnap_sse2-indexdb.o `test -f 'indexdb.c' || echo '$(srcdir)/'`indexdb.c @am__fastdepCC_TRUE@ $(AM_V_at)$(am__mv) $(DEPDIR)/gsnap_sse2-indexdb.Tpo $(DEPDIR)/gsnap_sse2-indexdb.Po @@ -26046,6 +30762,20 @@ @AMDEP_TRUE@@am__fastdepCC_FALSE@ DEPDIR=$(DEPDIR) $(CCDEPMODE) $(depcomp) @AMDEPBACKSLASH@ @am__fastdepCC_FALSE@ $(AM_V_CC@am__nodep@)$(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) $(gsnap_sse2_CFLAGS) $(CFLAGS) -c -o gsnap_sse2-sarray-read.obj `if test -f 'sarray-read.c'; then $(CYGPATH_W) 'sarray-read.c'; else $(CYGPATH_W) '$(srcdir)/sarray-read.c'; fi` +gsnap_sse2-merge-heap.o: merge-heap.c +@am__fastdepCC_TRUE@ $(AM_V_CC)$(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) $(gsnap_sse2_CFLAGS) $(CFLAGS) -MT gsnap_sse2-merge-heap.o -MD -MP -MF $(DEPDIR)/gsnap_sse2-merge-heap.Tpo -c -o gsnap_sse2-merge-heap.o `test -f 'merge-heap.c' || echo '$(srcdir)/'`merge-heap.c +@am__fastdepCC_TRUE@ $(AM_V_at)$(am__mv) $(DEPDIR)/gsnap_sse2-merge-heap.Tpo $(DEPDIR)/gsnap_sse2-merge-heap.Po +@AMDEP_TRUE@@am__fastdepCC_FALSE@ $(AM_V_CC)source='merge-heap.c' object='gsnap_sse2-merge-heap.o' libtool=no @AMDEPBACKSLASH@ +@AMDEP_TRUE@@am__fastdepCC_FALSE@ DEPDIR=$(DEPDIR) $(CCDEPMODE) $(depcomp) @AMDEPBACKSLASH@ +@am__fastdepCC_FALSE@ $(AM_V_CC@am__nodep@)$(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) $(gsnap_sse2_CFLAGS) $(CFLAGS) -c -o gsnap_sse2-merge-heap.o `test -f 'merge-heap.c' || echo '$(srcdir)/'`merge-heap.c + +gsnap_sse2-merge-heap.obj: merge-heap.c +@am__fastdepCC_TRUE@ $(AM_V_CC)$(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) $(gsnap_sse2_CFLAGS) $(CFLAGS) -MT gsnap_sse2-merge-heap.obj -MD -MP -MF $(DEPDIR)/gsnap_sse2-merge-heap.Tpo -c -o gsnap_sse2-merge-heap.obj `if test -f 'merge-heap.c'; then $(CYGPATH_W) 'merge-heap.c'; else $(CYGPATH_W) '$(srcdir)/merge-heap.c'; fi` +@am__fastdepCC_TRUE@ $(AM_V_at)$(am__mv) $(DEPDIR)/gsnap_sse2-merge-heap.Tpo $(DEPDIR)/gsnap_sse2-merge-heap.Po +@AMDEP_TRUE@@am__fastdepCC_FALSE@ $(AM_V_CC)source='merge-heap.c' object='gsnap_sse2-merge-heap.obj' libtool=no @AMDEPBACKSLASH@ +@AMDEP_TRUE@@am__fastdepCC_FALSE@ DEPDIR=$(DEPDIR) $(CCDEPMODE) $(depcomp) @AMDEPBACKSLASH@ +@am__fastdepCC_FALSE@ $(AM_V_CC@am__nodep@)$(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) $(gsnap_sse2_CFLAGS) $(CFLAGS) -c -o gsnap_sse2-merge-heap.obj `if test -f 'merge-heap.c'; then $(CYGPATH_W) 'merge-heap.c'; else $(CYGPATH_W) '$(srcdir)/merge-heap.c'; fi` + gsnap_sse2-stage1hr.o: stage1hr.c @am__fastdepCC_TRUE@ $(AM_V_CC)$(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) $(gsnap_sse2_CFLAGS) $(CFLAGS) -MT gsnap_sse2-stage1hr.o -MD -MP -MF $(DEPDIR)/gsnap_sse2-stage1hr.Tpo -c -o gsnap_sse2-stage1hr.o `test -f 'stage1hr.c' || echo '$(srcdir)/'`stage1hr.c @am__fastdepCC_TRUE@ $(AM_V_at)$(am__mv) $(DEPDIR)/gsnap_sse2-stage1hr.Tpo $(DEPDIR)/gsnap_sse2-stage1hr.Po @@ -26606,6 +31336,20 @@ @AMDEP_TRUE@@am__fastdepCC_FALSE@ DEPDIR=$(DEPDIR) $(CCDEPMODE) $(depcomp) @AMDEPBACKSLASH@ @am__fastdepCC_FALSE@ $(AM_V_CC@am__nodep@)$(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) $(gsnap_sse41_CFLAGS) $(CFLAGS) -c -o gsnap_sse41-bitpack64-readtwo.obj `if test -f 'bitpack64-readtwo.c'; then $(CYGPATH_W) 'bitpack64-readtwo.c'; else $(CYGPATH_W) '$(srcdir)/bitpack64-readtwo.c'; fi` +gsnap_sse41-merge.o: merge.c +@am__fastdepCC_TRUE@ $(AM_V_CC)$(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) $(gsnap_sse41_CFLAGS) $(CFLAGS) -MT gsnap_sse41-merge.o -MD -MP -MF $(DEPDIR)/gsnap_sse41-merge.Tpo -c -o gsnap_sse41-merge.o `test -f 'merge.c' || echo '$(srcdir)/'`merge.c +@am__fastdepCC_TRUE@ $(AM_V_at)$(am__mv) $(DEPDIR)/gsnap_sse41-merge.Tpo $(DEPDIR)/gsnap_sse41-merge.Po +@AMDEP_TRUE@@am__fastdepCC_FALSE@ $(AM_V_CC)source='merge.c' object='gsnap_sse41-merge.o' libtool=no @AMDEPBACKSLASH@ +@AMDEP_TRUE@@am__fastdepCC_FALSE@ DEPDIR=$(DEPDIR) $(CCDEPMODE) $(depcomp) @AMDEPBACKSLASH@ +@am__fastdepCC_FALSE@ $(AM_V_CC@am__nodep@)$(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) $(gsnap_sse41_CFLAGS) $(CFLAGS) -c -o gsnap_sse41-merge.o `test -f 'merge.c' || echo '$(srcdir)/'`merge.c + +gsnap_sse41-merge.obj: merge.c +@am__fastdepCC_TRUE@ $(AM_V_CC)$(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) $(gsnap_sse41_CFLAGS) $(CFLAGS) -MT gsnap_sse41-merge.obj -MD -MP -MF $(DEPDIR)/gsnap_sse41-merge.Tpo -c -o gsnap_sse41-merge.obj `if test -f 'merge.c'; then $(CYGPATH_W) 'merge.c'; else $(CYGPATH_W) '$(srcdir)/merge.c'; fi` +@am__fastdepCC_TRUE@ $(AM_V_at)$(am__mv) $(DEPDIR)/gsnap_sse41-merge.Tpo $(DEPDIR)/gsnap_sse41-merge.Po +@AMDEP_TRUE@@am__fastdepCC_FALSE@ $(AM_V_CC)source='merge.c' object='gsnap_sse41-merge.obj' libtool=no @AMDEPBACKSLASH@ +@AMDEP_TRUE@@am__fastdepCC_FALSE@ DEPDIR=$(DEPDIR) $(CCDEPMODE) $(depcomp) @AMDEPBACKSLASH@ +@am__fastdepCC_FALSE@ $(AM_V_CC@am__nodep@)$(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) $(gsnap_sse41_CFLAGS) $(CFLAGS) -c -o gsnap_sse41-merge.obj `if test -f 'merge.c'; then $(CYGPATH_W) 'merge.c'; else $(CYGPATH_W) '$(srcdir)/merge.c'; fi` + gsnap_sse41-indexdb.o: indexdb.c @am__fastdepCC_TRUE@ $(AM_V_CC)$(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) $(gsnap_sse41_CFLAGS) $(CFLAGS) -MT gsnap_sse41-indexdb.o -MD -MP -MF $(DEPDIR)/gsnap_sse41-indexdb.Tpo -c -o gsnap_sse41-indexdb.o `test -f 'indexdb.c' || echo '$(srcdir)/'`indexdb.c @am__fastdepCC_TRUE@ $(AM_V_at)$(am__mv) $(DEPDIR)/gsnap_sse41-indexdb.Tpo $(DEPDIR)/gsnap_sse41-indexdb.Po @@ -27306,6 +32050,20 @@ @AMDEP_TRUE@@am__fastdepCC_FALSE@ DEPDIR=$(DEPDIR) $(CCDEPMODE) $(depcomp) @AMDEPBACKSLASH@ @am__fastdepCC_FALSE@ $(AM_V_CC@am__nodep@)$(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) $(gsnap_sse41_CFLAGS) $(CFLAGS) -c -o gsnap_sse41-sarray-read.obj `if test -f 'sarray-read.c'; then $(CYGPATH_W) 'sarray-read.c'; else $(CYGPATH_W) '$(srcdir)/sarray-read.c'; fi` +gsnap_sse41-merge-heap.o: merge-heap.c +@am__fastdepCC_TRUE@ $(AM_V_CC)$(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) $(gsnap_sse41_CFLAGS) $(CFLAGS) -MT gsnap_sse41-merge-heap.o -MD -MP -MF $(DEPDIR)/gsnap_sse41-merge-heap.Tpo -c -o gsnap_sse41-merge-heap.o `test -f 'merge-heap.c' || echo '$(srcdir)/'`merge-heap.c +@am__fastdepCC_TRUE@ $(AM_V_at)$(am__mv) $(DEPDIR)/gsnap_sse41-merge-heap.Tpo $(DEPDIR)/gsnap_sse41-merge-heap.Po +@AMDEP_TRUE@@am__fastdepCC_FALSE@ $(AM_V_CC)source='merge-heap.c' object='gsnap_sse41-merge-heap.o' libtool=no @AMDEPBACKSLASH@ +@AMDEP_TRUE@@am__fastdepCC_FALSE@ DEPDIR=$(DEPDIR) $(CCDEPMODE) $(depcomp) @AMDEPBACKSLASH@ +@am__fastdepCC_FALSE@ $(AM_V_CC@am__nodep@)$(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) $(gsnap_sse41_CFLAGS) $(CFLAGS) -c -o gsnap_sse41-merge-heap.o `test -f 'merge-heap.c' || echo '$(srcdir)/'`merge-heap.c + +gsnap_sse41-merge-heap.obj: merge-heap.c +@am__fastdepCC_TRUE@ $(AM_V_CC)$(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) $(gsnap_sse41_CFLAGS) $(CFLAGS) -MT gsnap_sse41-merge-heap.obj -MD -MP -MF $(DEPDIR)/gsnap_sse41-merge-heap.Tpo -c -o gsnap_sse41-merge-heap.obj `if test -f 'merge-heap.c'; then $(CYGPATH_W) 'merge-heap.c'; else $(CYGPATH_W) '$(srcdir)/merge-heap.c'; fi` +@am__fastdepCC_TRUE@ $(AM_V_at)$(am__mv) $(DEPDIR)/gsnap_sse41-merge-heap.Tpo $(DEPDIR)/gsnap_sse41-merge-heap.Po +@AMDEP_TRUE@@am__fastdepCC_FALSE@ $(AM_V_CC)source='merge-heap.c' object='gsnap_sse41-merge-heap.obj' libtool=no @AMDEPBACKSLASH@ +@AMDEP_TRUE@@am__fastdepCC_FALSE@ DEPDIR=$(DEPDIR) $(CCDEPMODE) $(depcomp) @AMDEPBACKSLASH@ +@am__fastdepCC_FALSE@ $(AM_V_CC@am__nodep@)$(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) $(gsnap_sse41_CFLAGS) $(CFLAGS) -c -o gsnap_sse41-merge-heap.obj `if test -f 'merge-heap.c'; then $(CYGPATH_W) 'merge-heap.c'; else $(CYGPATH_W) '$(srcdir)/merge-heap.c'; fi` + gsnap_sse41-stage1hr.o: stage1hr.c @am__fastdepCC_TRUE@ $(AM_V_CC)$(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) $(gsnap_sse41_CFLAGS) $(CFLAGS) -MT gsnap_sse41-stage1hr.o -MD -MP -MF $(DEPDIR)/gsnap_sse41-stage1hr.Tpo -c -o gsnap_sse41-stage1hr.o `test -f 'stage1hr.c' || echo '$(srcdir)/'`stage1hr.c @am__fastdepCC_TRUE@ $(AM_V_at)$(am__mv) $(DEPDIR)/gsnap_sse41-stage1hr.Tpo $(DEPDIR)/gsnap_sse41-stage1hr.Po @@ -27866,6 +32624,20 @@ @AMDEP_TRUE@@am__fastdepCC_FALSE@ DEPDIR=$(DEPDIR) $(CCDEPMODE) $(depcomp) @AMDEPBACKSLASH@ @am__fastdepCC_FALSE@ $(AM_V_CC@am__nodep@)$(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) $(gsnap_sse42_CFLAGS) $(CFLAGS) -c -o gsnap_sse42-bitpack64-readtwo.obj `if test -f 'bitpack64-readtwo.c'; then $(CYGPATH_W) 'bitpack64-readtwo.c'; else $(CYGPATH_W) '$(srcdir)/bitpack64-readtwo.c'; fi` +gsnap_sse42-merge.o: merge.c +@am__fastdepCC_TRUE@ $(AM_V_CC)$(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) $(gsnap_sse42_CFLAGS) $(CFLAGS) -MT gsnap_sse42-merge.o -MD -MP -MF $(DEPDIR)/gsnap_sse42-merge.Tpo -c -o gsnap_sse42-merge.o `test -f 'merge.c' || echo '$(srcdir)/'`merge.c +@am__fastdepCC_TRUE@ $(AM_V_at)$(am__mv) $(DEPDIR)/gsnap_sse42-merge.Tpo $(DEPDIR)/gsnap_sse42-merge.Po +@AMDEP_TRUE@@am__fastdepCC_FALSE@ $(AM_V_CC)source='merge.c' object='gsnap_sse42-merge.o' libtool=no @AMDEPBACKSLASH@ +@AMDEP_TRUE@@am__fastdepCC_FALSE@ DEPDIR=$(DEPDIR) $(CCDEPMODE) $(depcomp) @AMDEPBACKSLASH@ +@am__fastdepCC_FALSE@ $(AM_V_CC@am__nodep@)$(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) $(gsnap_sse42_CFLAGS) $(CFLAGS) -c -o gsnap_sse42-merge.o `test -f 'merge.c' || echo '$(srcdir)/'`merge.c + +gsnap_sse42-merge.obj: merge.c +@am__fastdepCC_TRUE@ $(AM_V_CC)$(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) $(gsnap_sse42_CFLAGS) $(CFLAGS) -MT gsnap_sse42-merge.obj -MD -MP -MF $(DEPDIR)/gsnap_sse42-merge.Tpo -c -o gsnap_sse42-merge.obj `if test -f 'merge.c'; then $(CYGPATH_W) 'merge.c'; else $(CYGPATH_W) '$(srcdir)/merge.c'; fi` +@am__fastdepCC_TRUE@ $(AM_V_at)$(am__mv) $(DEPDIR)/gsnap_sse42-merge.Tpo $(DEPDIR)/gsnap_sse42-merge.Po +@AMDEP_TRUE@@am__fastdepCC_FALSE@ $(AM_V_CC)source='merge.c' object='gsnap_sse42-merge.obj' libtool=no @AMDEPBACKSLASH@ +@AMDEP_TRUE@@am__fastdepCC_FALSE@ DEPDIR=$(DEPDIR) $(CCDEPMODE) $(depcomp) @AMDEPBACKSLASH@ +@am__fastdepCC_FALSE@ $(AM_V_CC@am__nodep@)$(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) $(gsnap_sse42_CFLAGS) $(CFLAGS) -c -o gsnap_sse42-merge.obj `if test -f 'merge.c'; then $(CYGPATH_W) 'merge.c'; else $(CYGPATH_W) '$(srcdir)/merge.c'; fi` + gsnap_sse42-indexdb.o: indexdb.c @am__fastdepCC_TRUE@ $(AM_V_CC)$(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) $(gsnap_sse42_CFLAGS) $(CFLAGS) -MT gsnap_sse42-indexdb.o -MD -MP -MF $(DEPDIR)/gsnap_sse42-indexdb.Tpo -c -o gsnap_sse42-indexdb.o `test -f 'indexdb.c' || echo '$(srcdir)/'`indexdb.c @am__fastdepCC_TRUE@ $(AM_V_at)$(am__mv) $(DEPDIR)/gsnap_sse42-indexdb.Tpo $(DEPDIR)/gsnap_sse42-indexdb.Po @@ -28566,6 +33338,20 @@ @AMDEP_TRUE@@am__fastdepCC_FALSE@ DEPDIR=$(DEPDIR) $(CCDEPMODE) $(depcomp) @AMDEPBACKSLASH@ @am__fastdepCC_FALSE@ $(AM_V_CC@am__nodep@)$(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) $(gsnap_sse42_CFLAGS) $(CFLAGS) -c -o gsnap_sse42-sarray-read.obj `if test -f 'sarray-read.c'; then $(CYGPATH_W) 'sarray-read.c'; else $(CYGPATH_W) '$(srcdir)/sarray-read.c'; fi` +gsnap_sse42-merge-heap.o: merge-heap.c +@am__fastdepCC_TRUE@ $(AM_V_CC)$(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) $(gsnap_sse42_CFLAGS) $(CFLAGS) -MT gsnap_sse42-merge-heap.o -MD -MP -MF $(DEPDIR)/gsnap_sse42-merge-heap.Tpo -c -o gsnap_sse42-merge-heap.o `test -f 'merge-heap.c' || echo '$(srcdir)/'`merge-heap.c +@am__fastdepCC_TRUE@ $(AM_V_at)$(am__mv) $(DEPDIR)/gsnap_sse42-merge-heap.Tpo $(DEPDIR)/gsnap_sse42-merge-heap.Po +@AMDEP_TRUE@@am__fastdepCC_FALSE@ $(AM_V_CC)source='merge-heap.c' object='gsnap_sse42-merge-heap.o' libtool=no @AMDEPBACKSLASH@ +@AMDEP_TRUE@@am__fastdepCC_FALSE@ DEPDIR=$(DEPDIR) $(CCDEPMODE) $(depcomp) @AMDEPBACKSLASH@ +@am__fastdepCC_FALSE@ $(AM_V_CC@am__nodep@)$(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) $(gsnap_sse42_CFLAGS) $(CFLAGS) -c -o gsnap_sse42-merge-heap.o `test -f 'merge-heap.c' || echo '$(srcdir)/'`merge-heap.c + +gsnap_sse42-merge-heap.obj: merge-heap.c +@am__fastdepCC_TRUE@ $(AM_V_CC)$(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) $(gsnap_sse42_CFLAGS) $(CFLAGS) -MT gsnap_sse42-merge-heap.obj -MD -MP -MF $(DEPDIR)/gsnap_sse42-merge-heap.Tpo -c -o gsnap_sse42-merge-heap.obj `if test -f 'merge-heap.c'; then $(CYGPATH_W) 'merge-heap.c'; else $(CYGPATH_W) '$(srcdir)/merge-heap.c'; fi` +@am__fastdepCC_TRUE@ $(AM_V_at)$(am__mv) $(DEPDIR)/gsnap_sse42-merge-heap.Tpo $(DEPDIR)/gsnap_sse42-merge-heap.Po +@AMDEP_TRUE@@am__fastdepCC_FALSE@ $(AM_V_CC)source='merge-heap.c' object='gsnap_sse42-merge-heap.obj' libtool=no @AMDEPBACKSLASH@ +@AMDEP_TRUE@@am__fastdepCC_FALSE@ DEPDIR=$(DEPDIR) $(CCDEPMODE) $(depcomp) @AMDEPBACKSLASH@ +@am__fastdepCC_FALSE@ $(AM_V_CC@am__nodep@)$(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) $(gsnap_sse42_CFLAGS) $(CFLAGS) -c -o gsnap_sse42-merge-heap.obj `if test -f 'merge-heap.c'; then $(CYGPATH_W) 'merge-heap.c'; else $(CYGPATH_W) '$(srcdir)/merge-heap.c'; fi` + gsnap_sse42-stage1hr.o: stage1hr.c @am__fastdepCC_TRUE@ $(AM_V_CC)$(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) $(gsnap_sse42_CFLAGS) $(CFLAGS) -MT gsnap_sse42-stage1hr.o -MD -MP -MF $(DEPDIR)/gsnap_sse42-stage1hr.Tpo -c -o gsnap_sse42-stage1hr.o `test -f 'stage1hr.c' || echo '$(srcdir)/'`stage1hr.c @am__fastdepCC_TRUE@ $(AM_V_at)$(am__mv) $(DEPDIR)/gsnap_sse42-stage1hr.Tpo $(DEPDIR)/gsnap_sse42-stage1hr.Po @@ -29126,6 +33912,20 @@ @AMDEP_TRUE@@am__fastdepCC_FALSE@ DEPDIR=$(DEPDIR) $(CCDEPMODE) $(depcomp) @AMDEPBACKSLASH@ @am__fastdepCC_FALSE@ $(AM_V_CC@am__nodep@)$(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) $(gsnap_ssse3_CFLAGS) $(CFLAGS) -c -o gsnap_ssse3-bitpack64-readtwo.obj `if test -f 'bitpack64-readtwo.c'; then $(CYGPATH_W) 'bitpack64-readtwo.c'; else $(CYGPATH_W) '$(srcdir)/bitpack64-readtwo.c'; fi` +gsnap_ssse3-merge.o: merge.c +@am__fastdepCC_TRUE@ $(AM_V_CC)$(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) $(gsnap_ssse3_CFLAGS) $(CFLAGS) -MT gsnap_ssse3-merge.o -MD -MP -MF $(DEPDIR)/gsnap_ssse3-merge.Tpo -c -o gsnap_ssse3-merge.o `test -f 'merge.c' || echo '$(srcdir)/'`merge.c +@am__fastdepCC_TRUE@ $(AM_V_at)$(am__mv) $(DEPDIR)/gsnap_ssse3-merge.Tpo $(DEPDIR)/gsnap_ssse3-merge.Po +@AMDEP_TRUE@@am__fastdepCC_FALSE@ $(AM_V_CC)source='merge.c' object='gsnap_ssse3-merge.o' libtool=no @AMDEPBACKSLASH@ +@AMDEP_TRUE@@am__fastdepCC_FALSE@ DEPDIR=$(DEPDIR) $(CCDEPMODE) $(depcomp) @AMDEPBACKSLASH@ +@am__fastdepCC_FALSE@ $(AM_V_CC@am__nodep@)$(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) $(gsnap_ssse3_CFLAGS) $(CFLAGS) -c -o gsnap_ssse3-merge.o `test -f 'merge.c' || echo '$(srcdir)/'`merge.c + +gsnap_ssse3-merge.obj: merge.c +@am__fastdepCC_TRUE@ $(AM_V_CC)$(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) $(gsnap_ssse3_CFLAGS) $(CFLAGS) -MT gsnap_ssse3-merge.obj -MD -MP -MF $(DEPDIR)/gsnap_ssse3-merge.Tpo -c -o gsnap_ssse3-merge.obj `if test -f 'merge.c'; then $(CYGPATH_W) 'merge.c'; else $(CYGPATH_W) '$(srcdir)/merge.c'; fi` +@am__fastdepCC_TRUE@ $(AM_V_at)$(am__mv) $(DEPDIR)/gsnap_ssse3-merge.Tpo $(DEPDIR)/gsnap_ssse3-merge.Po +@AMDEP_TRUE@@am__fastdepCC_FALSE@ $(AM_V_CC)source='merge.c' object='gsnap_ssse3-merge.obj' libtool=no @AMDEPBACKSLASH@ +@AMDEP_TRUE@@am__fastdepCC_FALSE@ DEPDIR=$(DEPDIR) $(CCDEPMODE) $(depcomp) @AMDEPBACKSLASH@ +@am__fastdepCC_FALSE@ $(AM_V_CC@am__nodep@)$(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) $(gsnap_ssse3_CFLAGS) $(CFLAGS) -c -o gsnap_ssse3-merge.obj `if test -f 'merge.c'; then $(CYGPATH_W) 'merge.c'; else $(CYGPATH_W) '$(srcdir)/merge.c'; fi` + gsnap_ssse3-indexdb.o: indexdb.c @am__fastdepCC_TRUE@ $(AM_V_CC)$(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) $(gsnap_ssse3_CFLAGS) $(CFLAGS) -MT gsnap_ssse3-indexdb.o -MD -MP -MF $(DEPDIR)/gsnap_ssse3-indexdb.Tpo -c -o gsnap_ssse3-indexdb.o `test -f 'indexdb.c' || echo '$(srcdir)/'`indexdb.c @am__fastdepCC_TRUE@ $(AM_V_at)$(am__mv) $(DEPDIR)/gsnap_ssse3-indexdb.Tpo $(DEPDIR)/gsnap_ssse3-indexdb.Po @@ -29826,6 +34626,20 @@ @AMDEP_TRUE@@am__fastdepCC_FALSE@ DEPDIR=$(DEPDIR) $(CCDEPMODE) $(depcomp) @AMDEPBACKSLASH@ @am__fastdepCC_FALSE@ $(AM_V_CC@am__nodep@)$(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) $(gsnap_ssse3_CFLAGS) $(CFLAGS) -c -o gsnap_ssse3-sarray-read.obj `if test -f 'sarray-read.c'; then $(CYGPATH_W) 'sarray-read.c'; else $(CYGPATH_W) '$(srcdir)/sarray-read.c'; fi` +gsnap_ssse3-merge-heap.o: merge-heap.c +@am__fastdepCC_TRUE@ $(AM_V_CC)$(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) $(gsnap_ssse3_CFLAGS) $(CFLAGS) -MT gsnap_ssse3-merge-heap.o -MD -MP -MF $(DEPDIR)/gsnap_ssse3-merge-heap.Tpo -c -o gsnap_ssse3-merge-heap.o `test -f 'merge-heap.c' || echo '$(srcdir)/'`merge-heap.c +@am__fastdepCC_TRUE@ $(AM_V_at)$(am__mv) $(DEPDIR)/gsnap_ssse3-merge-heap.Tpo $(DEPDIR)/gsnap_ssse3-merge-heap.Po +@AMDEP_TRUE@@am__fastdepCC_FALSE@ $(AM_V_CC)source='merge-heap.c' object='gsnap_ssse3-merge-heap.o' libtool=no @AMDEPBACKSLASH@ +@AMDEP_TRUE@@am__fastdepCC_FALSE@ DEPDIR=$(DEPDIR) $(CCDEPMODE) $(depcomp) @AMDEPBACKSLASH@ +@am__fastdepCC_FALSE@ $(AM_V_CC@am__nodep@)$(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) $(gsnap_ssse3_CFLAGS) $(CFLAGS) -c -o gsnap_ssse3-merge-heap.o `test -f 'merge-heap.c' || echo '$(srcdir)/'`merge-heap.c + +gsnap_ssse3-merge-heap.obj: merge-heap.c +@am__fastdepCC_TRUE@ $(AM_V_CC)$(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) $(gsnap_ssse3_CFLAGS) $(CFLAGS) -MT gsnap_ssse3-merge-heap.obj -MD -MP -MF $(DEPDIR)/gsnap_ssse3-merge-heap.Tpo -c -o gsnap_ssse3-merge-heap.obj `if test -f 'merge-heap.c'; then $(CYGPATH_W) 'merge-heap.c'; else $(CYGPATH_W) '$(srcdir)/merge-heap.c'; fi` +@am__fastdepCC_TRUE@ $(AM_V_at)$(am__mv) $(DEPDIR)/gsnap_ssse3-merge-heap.Tpo $(DEPDIR)/gsnap_ssse3-merge-heap.Po +@AMDEP_TRUE@@am__fastdepCC_FALSE@ $(AM_V_CC)source='merge-heap.c' object='gsnap_ssse3-merge-heap.obj' libtool=no @AMDEPBACKSLASH@ +@AMDEP_TRUE@@am__fastdepCC_FALSE@ DEPDIR=$(DEPDIR) $(CCDEPMODE) $(depcomp) @AMDEPBACKSLASH@ +@am__fastdepCC_FALSE@ $(AM_V_CC@am__nodep@)$(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) $(gsnap_ssse3_CFLAGS) $(CFLAGS) -c -o gsnap_ssse3-merge-heap.obj `if test -f 'merge-heap.c'; then $(CYGPATH_W) 'merge-heap.c'; else $(CYGPATH_W) '$(srcdir)/merge-heap.c'; fi` + gsnap_ssse3-stage1hr.o: stage1hr.c @am__fastdepCC_TRUE@ $(AM_V_CC)$(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) $(gsnap_ssse3_CFLAGS) $(CFLAGS) -MT gsnap_ssse3-stage1hr.o -MD -MP -MF $(DEPDIR)/gsnap_ssse3-stage1hr.Tpo -c -o gsnap_ssse3-stage1hr.o `test -f 'stage1hr.c' || echo '$(srcdir)/'`stage1hr.c @am__fastdepCC_TRUE@ $(AM_V_at)$(am__mv) $(DEPDIR)/gsnap_ssse3-stage1hr.Tpo $(DEPDIR)/gsnap_ssse3-stage1hr.Po @@ -30428,6 +35242,20 @@ @AMDEP_TRUE@@am__fastdepCC_FALSE@ DEPDIR=$(DEPDIR) $(CCDEPMODE) $(depcomp) @AMDEPBACKSLASH@ @am__fastdepCC_FALSE@ $(AM_V_CC@am__nodep@)$(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) $(gsnapl_avx2_CFLAGS) $(CFLAGS) -c -o gsnapl_avx2-bitpack64-readtwo.obj `if test -f 'bitpack64-readtwo.c'; then $(CYGPATH_W) 'bitpack64-readtwo.c'; else $(CYGPATH_W) '$(srcdir)/bitpack64-readtwo.c'; fi` +gsnapl_avx2-merge.o: merge.c +@am__fastdepCC_TRUE@ $(AM_V_CC)$(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) $(gsnapl_avx2_CFLAGS) $(CFLAGS) -MT gsnapl_avx2-merge.o -MD -MP -MF $(DEPDIR)/gsnapl_avx2-merge.Tpo -c -o gsnapl_avx2-merge.o `test -f 'merge.c' || echo '$(srcdir)/'`merge.c +@am__fastdepCC_TRUE@ $(AM_V_at)$(am__mv) $(DEPDIR)/gsnapl_avx2-merge.Tpo $(DEPDIR)/gsnapl_avx2-merge.Po +@AMDEP_TRUE@@am__fastdepCC_FALSE@ $(AM_V_CC)source='merge.c' object='gsnapl_avx2-merge.o' libtool=no @AMDEPBACKSLASH@ +@AMDEP_TRUE@@am__fastdepCC_FALSE@ DEPDIR=$(DEPDIR) $(CCDEPMODE) $(depcomp) @AMDEPBACKSLASH@ +@am__fastdepCC_FALSE@ $(AM_V_CC@am__nodep@)$(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) $(gsnapl_avx2_CFLAGS) $(CFLAGS) -c -o gsnapl_avx2-merge.o `test -f 'merge.c' || echo '$(srcdir)/'`merge.c + +gsnapl_avx2-merge.obj: merge.c +@am__fastdepCC_TRUE@ $(AM_V_CC)$(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) $(gsnapl_avx2_CFLAGS) $(CFLAGS) -MT gsnapl_avx2-merge.obj -MD -MP -MF $(DEPDIR)/gsnapl_avx2-merge.Tpo -c -o gsnapl_avx2-merge.obj `if test -f 'merge.c'; then $(CYGPATH_W) 'merge.c'; else $(CYGPATH_W) '$(srcdir)/merge.c'; fi` +@am__fastdepCC_TRUE@ $(AM_V_at)$(am__mv) $(DEPDIR)/gsnapl_avx2-merge.Tpo $(DEPDIR)/gsnapl_avx2-merge.Po +@AMDEP_TRUE@@am__fastdepCC_FALSE@ $(AM_V_CC)source='merge.c' object='gsnapl_avx2-merge.obj' libtool=no @AMDEPBACKSLASH@ +@AMDEP_TRUE@@am__fastdepCC_FALSE@ DEPDIR=$(DEPDIR) $(CCDEPMODE) $(depcomp) @AMDEPBACKSLASH@ +@am__fastdepCC_FALSE@ $(AM_V_CC@am__nodep@)$(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) $(gsnapl_avx2_CFLAGS) $(CFLAGS) -c -o gsnapl_avx2-merge.obj `if test -f 'merge.c'; then $(CYGPATH_W) 'merge.c'; else $(CYGPATH_W) '$(srcdir)/merge.c'; fi` + gsnapl_avx2-indexdb.o: indexdb.c @am__fastdepCC_TRUE@ $(AM_V_CC)$(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) $(gsnapl_avx2_CFLAGS) $(CFLAGS) -MT gsnapl_avx2-indexdb.o -MD -MP -MF $(DEPDIR)/gsnapl_avx2-indexdb.Tpo -c -o gsnapl_avx2-indexdb.o `test -f 'indexdb.c' || echo '$(srcdir)/'`indexdb.c @am__fastdepCC_TRUE@ $(AM_V_at)$(am__mv) $(DEPDIR)/gsnapl_avx2-indexdb.Tpo $(DEPDIR)/gsnapl_avx2-indexdb.Po @@ -31072,6 +35900,20 @@ @AMDEP_TRUE@@am__fastdepCC_FALSE@ DEPDIR=$(DEPDIR) $(CCDEPMODE) $(depcomp) @AMDEPBACKSLASH@ @am__fastdepCC_FALSE@ $(AM_V_CC@am__nodep@)$(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) $(gsnapl_avx2_CFLAGS) $(CFLAGS) -c -o gsnapl_avx2-bitpack64-access.obj `if test -f 'bitpack64-access.c'; then $(CYGPATH_W) 'bitpack64-access.c'; else $(CYGPATH_W) '$(srcdir)/bitpack64-access.c'; fi` +gsnapl_avx2-merge-heap.o: merge-heap.c +@am__fastdepCC_TRUE@ $(AM_V_CC)$(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) $(gsnapl_avx2_CFLAGS) $(CFLAGS) -MT gsnapl_avx2-merge-heap.o -MD -MP -MF $(DEPDIR)/gsnapl_avx2-merge-heap.Tpo -c -o gsnapl_avx2-merge-heap.o `test -f 'merge-heap.c' || echo '$(srcdir)/'`merge-heap.c +@am__fastdepCC_TRUE@ $(AM_V_at)$(am__mv) $(DEPDIR)/gsnapl_avx2-merge-heap.Tpo $(DEPDIR)/gsnapl_avx2-merge-heap.Po +@AMDEP_TRUE@@am__fastdepCC_FALSE@ $(AM_V_CC)source='merge-heap.c' object='gsnapl_avx2-merge-heap.o' libtool=no @AMDEPBACKSLASH@ +@AMDEP_TRUE@@am__fastdepCC_FALSE@ DEPDIR=$(DEPDIR) $(CCDEPMODE) $(depcomp) @AMDEPBACKSLASH@ +@am__fastdepCC_FALSE@ $(AM_V_CC@am__nodep@)$(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) $(gsnapl_avx2_CFLAGS) $(CFLAGS) -c -o gsnapl_avx2-merge-heap.o `test -f 'merge-heap.c' || echo '$(srcdir)/'`merge-heap.c + +gsnapl_avx2-merge-heap.obj: merge-heap.c +@am__fastdepCC_TRUE@ $(AM_V_CC)$(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) $(gsnapl_avx2_CFLAGS) $(CFLAGS) -MT gsnapl_avx2-merge-heap.obj -MD -MP -MF $(DEPDIR)/gsnapl_avx2-merge-heap.Tpo -c -o gsnapl_avx2-merge-heap.obj `if test -f 'merge-heap.c'; then $(CYGPATH_W) 'merge-heap.c'; else $(CYGPATH_W) '$(srcdir)/merge-heap.c'; fi` +@am__fastdepCC_TRUE@ $(AM_V_at)$(am__mv) $(DEPDIR)/gsnapl_avx2-merge-heap.Tpo $(DEPDIR)/gsnapl_avx2-merge-heap.Po +@AMDEP_TRUE@@am__fastdepCC_FALSE@ $(AM_V_CC)source='merge-heap.c' object='gsnapl_avx2-merge-heap.obj' libtool=no @AMDEPBACKSLASH@ +@AMDEP_TRUE@@am__fastdepCC_FALSE@ DEPDIR=$(DEPDIR) $(CCDEPMODE) $(depcomp) @AMDEPBACKSLASH@ +@am__fastdepCC_FALSE@ $(AM_V_CC@am__nodep@)$(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) $(gsnapl_avx2_CFLAGS) $(CFLAGS) -c -o gsnapl_avx2-merge-heap.obj `if test -f 'merge-heap.c'; then $(CYGPATH_W) 'merge-heap.c'; else $(CYGPATH_W) '$(srcdir)/merge-heap.c'; fi` + gsnapl_avx2-stage1hr.o: stage1hr.c @am__fastdepCC_TRUE@ $(AM_V_CC)$(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) $(gsnapl_avx2_CFLAGS) $(CFLAGS) -MT gsnapl_avx2-stage1hr.o -MD -MP -MF $(DEPDIR)/gsnapl_avx2-stage1hr.Tpo -c -o gsnapl_avx2-stage1hr.o `test -f 'stage1hr.c' || echo '$(srcdir)/'`stage1hr.c @am__fastdepCC_TRUE@ $(AM_V_at)$(am__mv) $(DEPDIR)/gsnapl_avx2-stage1hr.Tpo $(DEPDIR)/gsnapl_avx2-stage1hr.Po @@ -31240,6 +36082,1252 @@ @AMDEP_TRUE@@am__fastdepCC_FALSE@ DEPDIR=$(DEPDIR) $(CCDEPMODE) $(depcomp) @AMDEPBACKSLASH@ @am__fastdepCC_FALSE@ $(AM_V_CC@am__nodep@)$(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) $(gsnapl_avx2_CFLAGS) $(CFLAGS) -c -o gsnapl_avx2-gsnap.obj `if test -f 'gsnap.c'; then $(CYGPATH_W) 'gsnap.c'; else $(CYGPATH_W) '$(srcdir)/gsnap.c'; fi` +gsnapl_avx512-except.o: except.c +@am__fastdepCC_TRUE@ $(AM_V_CC)$(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) $(gsnapl_avx512_CFLAGS) $(CFLAGS) -MT gsnapl_avx512-except.o -MD -MP -MF $(DEPDIR)/gsnapl_avx512-except.Tpo -c -o gsnapl_avx512-except.o `test -f 'except.c' || echo '$(srcdir)/'`except.c +@am__fastdepCC_TRUE@ $(AM_V_at)$(am__mv) $(DEPDIR)/gsnapl_avx512-except.Tpo $(DEPDIR)/gsnapl_avx512-except.Po +@AMDEP_TRUE@@am__fastdepCC_FALSE@ $(AM_V_CC)source='except.c' object='gsnapl_avx512-except.o' libtool=no @AMDEPBACKSLASH@ +@AMDEP_TRUE@@am__fastdepCC_FALSE@ DEPDIR=$(DEPDIR) $(CCDEPMODE) $(depcomp) @AMDEPBACKSLASH@ +@am__fastdepCC_FALSE@ $(AM_V_CC@am__nodep@)$(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) $(gsnapl_avx512_CFLAGS) $(CFLAGS) -c -o gsnapl_avx512-except.o `test -f 'except.c' || echo '$(srcdir)/'`except.c + +gsnapl_avx512-except.obj: except.c +@am__fastdepCC_TRUE@ $(AM_V_CC)$(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) $(gsnapl_avx512_CFLAGS) $(CFLAGS) -MT gsnapl_avx512-except.obj -MD -MP -MF $(DEPDIR)/gsnapl_avx512-except.Tpo -c -o gsnapl_avx512-except.obj `if test -f 'except.c'; then $(CYGPATH_W) 'except.c'; else $(CYGPATH_W) '$(srcdir)/except.c'; fi` +@am__fastdepCC_TRUE@ $(AM_V_at)$(am__mv) $(DEPDIR)/gsnapl_avx512-except.Tpo $(DEPDIR)/gsnapl_avx512-except.Po +@AMDEP_TRUE@@am__fastdepCC_FALSE@ $(AM_V_CC)source='except.c' object='gsnapl_avx512-except.obj' libtool=no @AMDEPBACKSLASH@ +@AMDEP_TRUE@@am__fastdepCC_FALSE@ DEPDIR=$(DEPDIR) $(CCDEPMODE) $(depcomp) @AMDEPBACKSLASH@ +@am__fastdepCC_FALSE@ $(AM_V_CC@am__nodep@)$(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) $(gsnapl_avx512_CFLAGS) $(CFLAGS) -c -o gsnapl_avx512-except.obj `if test -f 'except.c'; then $(CYGPATH_W) 'except.c'; else $(CYGPATH_W) '$(srcdir)/except.c'; fi` + +gsnapl_avx512-assert.o: assert.c +@am__fastdepCC_TRUE@ $(AM_V_CC)$(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) $(gsnapl_avx512_CFLAGS) $(CFLAGS) -MT gsnapl_avx512-assert.o -MD -MP -MF $(DEPDIR)/gsnapl_avx512-assert.Tpo -c -o gsnapl_avx512-assert.o `test -f 'assert.c' || echo '$(srcdir)/'`assert.c +@am__fastdepCC_TRUE@ $(AM_V_at)$(am__mv) $(DEPDIR)/gsnapl_avx512-assert.Tpo $(DEPDIR)/gsnapl_avx512-assert.Po +@AMDEP_TRUE@@am__fastdepCC_FALSE@ $(AM_V_CC)source='assert.c' object='gsnapl_avx512-assert.o' libtool=no @AMDEPBACKSLASH@ +@AMDEP_TRUE@@am__fastdepCC_FALSE@ DEPDIR=$(DEPDIR) $(CCDEPMODE) $(depcomp) @AMDEPBACKSLASH@ +@am__fastdepCC_FALSE@ $(AM_V_CC@am__nodep@)$(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) $(gsnapl_avx512_CFLAGS) $(CFLAGS) -c -o gsnapl_avx512-assert.o `test -f 'assert.c' || echo '$(srcdir)/'`assert.c + +gsnapl_avx512-assert.obj: assert.c +@am__fastdepCC_TRUE@ $(AM_V_CC)$(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) $(gsnapl_avx512_CFLAGS) $(CFLAGS) -MT gsnapl_avx512-assert.obj -MD -MP -MF $(DEPDIR)/gsnapl_avx512-assert.Tpo -c -o gsnapl_avx512-assert.obj `if test -f 'assert.c'; then $(CYGPATH_W) 'assert.c'; else $(CYGPATH_W) '$(srcdir)/assert.c'; fi` +@am__fastdepCC_TRUE@ $(AM_V_at)$(am__mv) $(DEPDIR)/gsnapl_avx512-assert.Tpo $(DEPDIR)/gsnapl_avx512-assert.Po +@AMDEP_TRUE@@am__fastdepCC_FALSE@ $(AM_V_CC)source='assert.c' object='gsnapl_avx512-assert.obj' libtool=no @AMDEPBACKSLASH@ +@AMDEP_TRUE@@am__fastdepCC_FALSE@ DEPDIR=$(DEPDIR) $(CCDEPMODE) $(depcomp) @AMDEPBACKSLASH@ +@am__fastdepCC_FALSE@ $(AM_V_CC@am__nodep@)$(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) $(gsnapl_avx512_CFLAGS) $(CFLAGS) -c -o gsnapl_avx512-assert.obj `if test -f 'assert.c'; then $(CYGPATH_W) 'assert.c'; else $(CYGPATH_W) '$(srcdir)/assert.c'; fi` + +gsnapl_avx512-mem.o: mem.c +@am__fastdepCC_TRUE@ $(AM_V_CC)$(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) $(gsnapl_avx512_CFLAGS) $(CFLAGS) -MT gsnapl_avx512-mem.o -MD -MP -MF $(DEPDIR)/gsnapl_avx512-mem.Tpo -c -o gsnapl_avx512-mem.o `test -f 'mem.c' || echo '$(srcdir)/'`mem.c +@am__fastdepCC_TRUE@ $(AM_V_at)$(am__mv) $(DEPDIR)/gsnapl_avx512-mem.Tpo $(DEPDIR)/gsnapl_avx512-mem.Po +@AMDEP_TRUE@@am__fastdepCC_FALSE@ $(AM_V_CC)source='mem.c' object='gsnapl_avx512-mem.o' libtool=no @AMDEPBACKSLASH@ +@AMDEP_TRUE@@am__fastdepCC_FALSE@ DEPDIR=$(DEPDIR) $(CCDEPMODE) $(depcomp) @AMDEPBACKSLASH@ +@am__fastdepCC_FALSE@ $(AM_V_CC@am__nodep@)$(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) $(gsnapl_avx512_CFLAGS) $(CFLAGS) -c -o gsnapl_avx512-mem.o `test -f 'mem.c' || echo '$(srcdir)/'`mem.c + +gsnapl_avx512-mem.obj: mem.c +@am__fastdepCC_TRUE@ $(AM_V_CC)$(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) $(gsnapl_avx512_CFLAGS) $(CFLAGS) -MT gsnapl_avx512-mem.obj -MD -MP -MF $(DEPDIR)/gsnapl_avx512-mem.Tpo -c -o gsnapl_avx512-mem.obj `if test -f 'mem.c'; then $(CYGPATH_W) 'mem.c'; else $(CYGPATH_W) '$(srcdir)/mem.c'; fi` +@am__fastdepCC_TRUE@ $(AM_V_at)$(am__mv) $(DEPDIR)/gsnapl_avx512-mem.Tpo $(DEPDIR)/gsnapl_avx512-mem.Po +@AMDEP_TRUE@@am__fastdepCC_FALSE@ $(AM_V_CC)source='mem.c' object='gsnapl_avx512-mem.obj' libtool=no @AMDEPBACKSLASH@ +@AMDEP_TRUE@@am__fastdepCC_FALSE@ DEPDIR=$(DEPDIR) $(CCDEPMODE) $(depcomp) @AMDEPBACKSLASH@ +@am__fastdepCC_FALSE@ $(AM_V_CC@am__nodep@)$(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) $(gsnapl_avx512_CFLAGS) $(CFLAGS) -c -o gsnapl_avx512-mem.obj `if test -f 'mem.c'; then $(CYGPATH_W) 'mem.c'; else $(CYGPATH_W) '$(srcdir)/mem.c'; fi` + +gsnapl_avx512-intlist.o: intlist.c +@am__fastdepCC_TRUE@ $(AM_V_CC)$(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) $(gsnapl_avx512_CFLAGS) $(CFLAGS) -MT gsnapl_avx512-intlist.o -MD -MP -MF $(DEPDIR)/gsnapl_avx512-intlist.Tpo -c -o gsnapl_avx512-intlist.o `test -f 'intlist.c' || echo '$(srcdir)/'`intlist.c +@am__fastdepCC_TRUE@ $(AM_V_at)$(am__mv) $(DEPDIR)/gsnapl_avx512-intlist.Tpo $(DEPDIR)/gsnapl_avx512-intlist.Po +@AMDEP_TRUE@@am__fastdepCC_FALSE@ $(AM_V_CC)source='intlist.c' object='gsnapl_avx512-intlist.o' libtool=no @AMDEPBACKSLASH@ +@AMDEP_TRUE@@am__fastdepCC_FALSE@ DEPDIR=$(DEPDIR) $(CCDEPMODE) $(depcomp) @AMDEPBACKSLASH@ +@am__fastdepCC_FALSE@ $(AM_V_CC@am__nodep@)$(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) $(gsnapl_avx512_CFLAGS) $(CFLAGS) -c -o gsnapl_avx512-intlist.o `test -f 'intlist.c' || echo '$(srcdir)/'`intlist.c + +gsnapl_avx512-intlist.obj: intlist.c +@am__fastdepCC_TRUE@ $(AM_V_CC)$(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) $(gsnapl_avx512_CFLAGS) $(CFLAGS) -MT gsnapl_avx512-intlist.obj -MD -MP -MF $(DEPDIR)/gsnapl_avx512-intlist.Tpo -c -o gsnapl_avx512-intlist.obj `if test -f 'intlist.c'; then $(CYGPATH_W) 'intlist.c'; else $(CYGPATH_W) '$(srcdir)/intlist.c'; fi` +@am__fastdepCC_TRUE@ $(AM_V_at)$(am__mv) $(DEPDIR)/gsnapl_avx512-intlist.Tpo $(DEPDIR)/gsnapl_avx512-intlist.Po +@AMDEP_TRUE@@am__fastdepCC_FALSE@ $(AM_V_CC)source='intlist.c' object='gsnapl_avx512-intlist.obj' libtool=no @AMDEPBACKSLASH@ +@AMDEP_TRUE@@am__fastdepCC_FALSE@ DEPDIR=$(DEPDIR) $(CCDEPMODE) $(depcomp) @AMDEPBACKSLASH@ +@am__fastdepCC_FALSE@ $(AM_V_CC@am__nodep@)$(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) $(gsnapl_avx512_CFLAGS) $(CFLAGS) -c -o gsnapl_avx512-intlist.obj `if test -f 'intlist.c'; then $(CYGPATH_W) 'intlist.c'; else $(CYGPATH_W) '$(srcdir)/intlist.c'; fi` + +gsnapl_avx512-list.o: list.c +@am__fastdepCC_TRUE@ $(AM_V_CC)$(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) $(gsnapl_avx512_CFLAGS) $(CFLAGS) -MT gsnapl_avx512-list.o -MD -MP -MF $(DEPDIR)/gsnapl_avx512-list.Tpo -c -o gsnapl_avx512-list.o `test -f 'list.c' || echo '$(srcdir)/'`list.c +@am__fastdepCC_TRUE@ $(AM_V_at)$(am__mv) $(DEPDIR)/gsnapl_avx512-list.Tpo $(DEPDIR)/gsnapl_avx512-list.Po +@AMDEP_TRUE@@am__fastdepCC_FALSE@ $(AM_V_CC)source='list.c' object='gsnapl_avx512-list.o' libtool=no @AMDEPBACKSLASH@ +@AMDEP_TRUE@@am__fastdepCC_FALSE@ DEPDIR=$(DEPDIR) $(CCDEPMODE) $(depcomp) @AMDEPBACKSLASH@ +@am__fastdepCC_FALSE@ $(AM_V_CC@am__nodep@)$(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) $(gsnapl_avx512_CFLAGS) $(CFLAGS) -c -o gsnapl_avx512-list.o `test -f 'list.c' || echo '$(srcdir)/'`list.c + +gsnapl_avx512-list.obj: list.c +@am__fastdepCC_TRUE@ $(AM_V_CC)$(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) $(gsnapl_avx512_CFLAGS) $(CFLAGS) -MT gsnapl_avx512-list.obj -MD -MP -MF $(DEPDIR)/gsnapl_avx512-list.Tpo -c -o gsnapl_avx512-list.obj `if test -f 'list.c'; then $(CYGPATH_W) 'list.c'; else $(CYGPATH_W) '$(srcdir)/list.c'; fi` +@am__fastdepCC_TRUE@ $(AM_V_at)$(am__mv) $(DEPDIR)/gsnapl_avx512-list.Tpo $(DEPDIR)/gsnapl_avx512-list.Po +@AMDEP_TRUE@@am__fastdepCC_FALSE@ $(AM_V_CC)source='list.c' object='gsnapl_avx512-list.obj' libtool=no @AMDEPBACKSLASH@ +@AMDEP_TRUE@@am__fastdepCC_FALSE@ DEPDIR=$(DEPDIR) $(CCDEPMODE) $(depcomp) @AMDEPBACKSLASH@ +@am__fastdepCC_FALSE@ $(AM_V_CC@am__nodep@)$(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) $(gsnapl_avx512_CFLAGS) $(CFLAGS) -c -o gsnapl_avx512-list.obj `if test -f 'list.c'; then $(CYGPATH_W) 'list.c'; else $(CYGPATH_W) '$(srcdir)/list.c'; fi` + +gsnapl_avx512-littleendian.o: littleendian.c +@am__fastdepCC_TRUE@ $(AM_V_CC)$(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) $(gsnapl_avx512_CFLAGS) $(CFLAGS) -MT gsnapl_avx512-littleendian.o -MD -MP -MF $(DEPDIR)/gsnapl_avx512-littleendian.Tpo -c -o gsnapl_avx512-littleendian.o `test -f 'littleendian.c' || echo '$(srcdir)/'`littleendian.c +@am__fastdepCC_TRUE@ $(AM_V_at)$(am__mv) $(DEPDIR)/gsnapl_avx512-littleendian.Tpo $(DEPDIR)/gsnapl_avx512-littleendian.Po +@AMDEP_TRUE@@am__fastdepCC_FALSE@ $(AM_V_CC)source='littleendian.c' object='gsnapl_avx512-littleendian.o' libtool=no @AMDEPBACKSLASH@ +@AMDEP_TRUE@@am__fastdepCC_FALSE@ DEPDIR=$(DEPDIR) $(CCDEPMODE) $(depcomp) @AMDEPBACKSLASH@ +@am__fastdepCC_FALSE@ $(AM_V_CC@am__nodep@)$(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) $(gsnapl_avx512_CFLAGS) $(CFLAGS) -c -o gsnapl_avx512-littleendian.o `test -f 'littleendian.c' || echo '$(srcdir)/'`littleendian.c + +gsnapl_avx512-littleendian.obj: littleendian.c +@am__fastdepCC_TRUE@ $(AM_V_CC)$(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) $(gsnapl_avx512_CFLAGS) $(CFLAGS) -MT gsnapl_avx512-littleendian.obj -MD -MP -MF $(DEPDIR)/gsnapl_avx512-littleendian.Tpo -c -o gsnapl_avx512-littleendian.obj `if test -f 'littleendian.c'; then $(CYGPATH_W) 'littleendian.c'; else $(CYGPATH_W) '$(srcdir)/littleendian.c'; fi` +@am__fastdepCC_TRUE@ $(AM_V_at)$(am__mv) $(DEPDIR)/gsnapl_avx512-littleendian.Tpo $(DEPDIR)/gsnapl_avx512-littleendian.Po +@AMDEP_TRUE@@am__fastdepCC_FALSE@ $(AM_V_CC)source='littleendian.c' object='gsnapl_avx512-littleendian.obj' libtool=no @AMDEPBACKSLASH@ +@AMDEP_TRUE@@am__fastdepCC_FALSE@ DEPDIR=$(DEPDIR) $(CCDEPMODE) $(depcomp) @AMDEPBACKSLASH@ +@am__fastdepCC_FALSE@ $(AM_V_CC@am__nodep@)$(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) $(gsnapl_avx512_CFLAGS) $(CFLAGS) -c -o gsnapl_avx512-littleendian.obj `if test -f 'littleendian.c'; then $(CYGPATH_W) 'littleendian.c'; else $(CYGPATH_W) '$(srcdir)/littleendian.c'; fi` + +gsnapl_avx512-bigendian.o: bigendian.c +@am__fastdepCC_TRUE@ $(AM_V_CC)$(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) $(gsnapl_avx512_CFLAGS) $(CFLAGS) -MT gsnapl_avx512-bigendian.o -MD -MP -MF $(DEPDIR)/gsnapl_avx512-bigendian.Tpo -c -o gsnapl_avx512-bigendian.o `test -f 'bigendian.c' || echo '$(srcdir)/'`bigendian.c +@am__fastdepCC_TRUE@ $(AM_V_at)$(am__mv) $(DEPDIR)/gsnapl_avx512-bigendian.Tpo $(DEPDIR)/gsnapl_avx512-bigendian.Po +@AMDEP_TRUE@@am__fastdepCC_FALSE@ $(AM_V_CC)source='bigendian.c' object='gsnapl_avx512-bigendian.o' libtool=no @AMDEPBACKSLASH@ +@AMDEP_TRUE@@am__fastdepCC_FALSE@ DEPDIR=$(DEPDIR) $(CCDEPMODE) $(depcomp) @AMDEPBACKSLASH@ +@am__fastdepCC_FALSE@ $(AM_V_CC@am__nodep@)$(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) $(gsnapl_avx512_CFLAGS) $(CFLAGS) -c -o gsnapl_avx512-bigendian.o `test -f 'bigendian.c' || echo '$(srcdir)/'`bigendian.c + +gsnapl_avx512-bigendian.obj: bigendian.c +@am__fastdepCC_TRUE@ $(AM_V_CC)$(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) $(gsnapl_avx512_CFLAGS) $(CFLAGS) -MT gsnapl_avx512-bigendian.obj -MD -MP -MF $(DEPDIR)/gsnapl_avx512-bigendian.Tpo -c -o gsnapl_avx512-bigendian.obj `if test -f 'bigendian.c'; then $(CYGPATH_W) 'bigendian.c'; else $(CYGPATH_W) '$(srcdir)/bigendian.c'; fi` +@am__fastdepCC_TRUE@ $(AM_V_at)$(am__mv) $(DEPDIR)/gsnapl_avx512-bigendian.Tpo $(DEPDIR)/gsnapl_avx512-bigendian.Po +@AMDEP_TRUE@@am__fastdepCC_FALSE@ $(AM_V_CC)source='bigendian.c' object='gsnapl_avx512-bigendian.obj' libtool=no @AMDEPBACKSLASH@ +@AMDEP_TRUE@@am__fastdepCC_FALSE@ DEPDIR=$(DEPDIR) $(CCDEPMODE) $(depcomp) @AMDEPBACKSLASH@ +@am__fastdepCC_FALSE@ $(AM_V_CC@am__nodep@)$(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) $(gsnapl_avx512_CFLAGS) $(CFLAGS) -c -o gsnapl_avx512-bigendian.obj `if test -f 'bigendian.c'; then $(CYGPATH_W) 'bigendian.c'; else $(CYGPATH_W) '$(srcdir)/bigendian.c'; fi` + +gsnapl_avx512-univinterval.o: univinterval.c +@am__fastdepCC_TRUE@ $(AM_V_CC)$(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) $(gsnapl_avx512_CFLAGS) $(CFLAGS) -MT gsnapl_avx512-univinterval.o -MD -MP -MF $(DEPDIR)/gsnapl_avx512-univinterval.Tpo -c -o gsnapl_avx512-univinterval.o `test -f 'univinterval.c' || echo '$(srcdir)/'`univinterval.c +@am__fastdepCC_TRUE@ $(AM_V_at)$(am__mv) $(DEPDIR)/gsnapl_avx512-univinterval.Tpo $(DEPDIR)/gsnapl_avx512-univinterval.Po +@AMDEP_TRUE@@am__fastdepCC_FALSE@ $(AM_V_CC)source='univinterval.c' object='gsnapl_avx512-univinterval.o' libtool=no @AMDEPBACKSLASH@ +@AMDEP_TRUE@@am__fastdepCC_FALSE@ DEPDIR=$(DEPDIR) $(CCDEPMODE) $(depcomp) @AMDEPBACKSLASH@ +@am__fastdepCC_FALSE@ $(AM_V_CC@am__nodep@)$(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) $(gsnapl_avx512_CFLAGS) $(CFLAGS) -c -o gsnapl_avx512-univinterval.o `test -f 'univinterval.c' || echo '$(srcdir)/'`univinterval.c + +gsnapl_avx512-univinterval.obj: univinterval.c +@am__fastdepCC_TRUE@ $(AM_V_CC)$(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) $(gsnapl_avx512_CFLAGS) $(CFLAGS) -MT gsnapl_avx512-univinterval.obj -MD -MP -MF $(DEPDIR)/gsnapl_avx512-univinterval.Tpo -c -o gsnapl_avx512-univinterval.obj `if test -f 'univinterval.c'; then $(CYGPATH_W) 'univinterval.c'; else $(CYGPATH_W) '$(srcdir)/univinterval.c'; fi` +@am__fastdepCC_TRUE@ $(AM_V_at)$(am__mv) $(DEPDIR)/gsnapl_avx512-univinterval.Tpo $(DEPDIR)/gsnapl_avx512-univinterval.Po +@AMDEP_TRUE@@am__fastdepCC_FALSE@ $(AM_V_CC)source='univinterval.c' object='gsnapl_avx512-univinterval.obj' libtool=no @AMDEPBACKSLASH@ +@AMDEP_TRUE@@am__fastdepCC_FALSE@ DEPDIR=$(DEPDIR) $(CCDEPMODE) $(depcomp) @AMDEPBACKSLASH@ +@am__fastdepCC_FALSE@ $(AM_V_CC@am__nodep@)$(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) $(gsnapl_avx512_CFLAGS) $(CFLAGS) -c -o gsnapl_avx512-univinterval.obj `if test -f 'univinterval.c'; then $(CYGPATH_W) 'univinterval.c'; else $(CYGPATH_W) '$(srcdir)/univinterval.c'; fi` + +gsnapl_avx512-interval.o: interval.c +@am__fastdepCC_TRUE@ $(AM_V_CC)$(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) $(gsnapl_avx512_CFLAGS) $(CFLAGS) -MT gsnapl_avx512-interval.o -MD -MP -MF $(DEPDIR)/gsnapl_avx512-interval.Tpo -c -o gsnapl_avx512-interval.o `test -f 'interval.c' || echo '$(srcdir)/'`interval.c +@am__fastdepCC_TRUE@ $(AM_V_at)$(am__mv) $(DEPDIR)/gsnapl_avx512-interval.Tpo $(DEPDIR)/gsnapl_avx512-interval.Po +@AMDEP_TRUE@@am__fastdepCC_FALSE@ $(AM_V_CC)source='interval.c' object='gsnapl_avx512-interval.o' libtool=no @AMDEPBACKSLASH@ +@AMDEP_TRUE@@am__fastdepCC_FALSE@ DEPDIR=$(DEPDIR) $(CCDEPMODE) $(depcomp) @AMDEPBACKSLASH@ +@am__fastdepCC_FALSE@ $(AM_V_CC@am__nodep@)$(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) $(gsnapl_avx512_CFLAGS) $(CFLAGS) -c -o gsnapl_avx512-interval.o `test -f 'interval.c' || echo '$(srcdir)/'`interval.c + +gsnapl_avx512-interval.obj: interval.c +@am__fastdepCC_TRUE@ $(AM_V_CC)$(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) $(gsnapl_avx512_CFLAGS) $(CFLAGS) -MT gsnapl_avx512-interval.obj -MD -MP -MF $(DEPDIR)/gsnapl_avx512-interval.Tpo -c -o gsnapl_avx512-interval.obj `if test -f 'interval.c'; then $(CYGPATH_W) 'interval.c'; else $(CYGPATH_W) '$(srcdir)/interval.c'; fi` +@am__fastdepCC_TRUE@ $(AM_V_at)$(am__mv) $(DEPDIR)/gsnapl_avx512-interval.Tpo $(DEPDIR)/gsnapl_avx512-interval.Po +@AMDEP_TRUE@@am__fastdepCC_FALSE@ $(AM_V_CC)source='interval.c' object='gsnapl_avx512-interval.obj' libtool=no @AMDEPBACKSLASH@ +@AMDEP_TRUE@@am__fastdepCC_FALSE@ DEPDIR=$(DEPDIR) $(CCDEPMODE) $(depcomp) @AMDEPBACKSLASH@ +@am__fastdepCC_FALSE@ $(AM_V_CC@am__nodep@)$(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) $(gsnapl_avx512_CFLAGS) $(CFLAGS) -c -o gsnapl_avx512-interval.obj `if test -f 'interval.c'; then $(CYGPATH_W) 'interval.c'; else $(CYGPATH_W) '$(srcdir)/interval.c'; fi` + +gsnapl_avx512-uintlist.o: uintlist.c +@am__fastdepCC_TRUE@ $(AM_V_CC)$(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) $(gsnapl_avx512_CFLAGS) $(CFLAGS) -MT gsnapl_avx512-uintlist.o -MD -MP -MF $(DEPDIR)/gsnapl_avx512-uintlist.Tpo -c -o gsnapl_avx512-uintlist.o `test -f 'uintlist.c' || echo '$(srcdir)/'`uintlist.c +@am__fastdepCC_TRUE@ $(AM_V_at)$(am__mv) $(DEPDIR)/gsnapl_avx512-uintlist.Tpo $(DEPDIR)/gsnapl_avx512-uintlist.Po +@AMDEP_TRUE@@am__fastdepCC_FALSE@ $(AM_V_CC)source='uintlist.c' object='gsnapl_avx512-uintlist.o' libtool=no @AMDEPBACKSLASH@ +@AMDEP_TRUE@@am__fastdepCC_FALSE@ DEPDIR=$(DEPDIR) $(CCDEPMODE) $(depcomp) @AMDEPBACKSLASH@ +@am__fastdepCC_FALSE@ $(AM_V_CC@am__nodep@)$(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) $(gsnapl_avx512_CFLAGS) $(CFLAGS) -c -o gsnapl_avx512-uintlist.o `test -f 'uintlist.c' || echo '$(srcdir)/'`uintlist.c + +gsnapl_avx512-uintlist.obj: uintlist.c +@am__fastdepCC_TRUE@ $(AM_V_CC)$(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) $(gsnapl_avx512_CFLAGS) $(CFLAGS) -MT gsnapl_avx512-uintlist.obj -MD -MP -MF $(DEPDIR)/gsnapl_avx512-uintlist.Tpo -c -o gsnapl_avx512-uintlist.obj `if test -f 'uintlist.c'; then $(CYGPATH_W) 'uintlist.c'; else $(CYGPATH_W) '$(srcdir)/uintlist.c'; fi` +@am__fastdepCC_TRUE@ $(AM_V_at)$(am__mv) $(DEPDIR)/gsnapl_avx512-uintlist.Tpo $(DEPDIR)/gsnapl_avx512-uintlist.Po +@AMDEP_TRUE@@am__fastdepCC_FALSE@ $(AM_V_CC)source='uintlist.c' object='gsnapl_avx512-uintlist.obj' libtool=no @AMDEPBACKSLASH@ +@AMDEP_TRUE@@am__fastdepCC_FALSE@ DEPDIR=$(DEPDIR) $(CCDEPMODE) $(depcomp) @AMDEPBACKSLASH@ +@am__fastdepCC_FALSE@ $(AM_V_CC@am__nodep@)$(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) $(gsnapl_avx512_CFLAGS) $(CFLAGS) -c -o gsnapl_avx512-uintlist.obj `if test -f 'uintlist.c'; then $(CYGPATH_W) 'uintlist.c'; else $(CYGPATH_W) '$(srcdir)/uintlist.c'; fi` + +gsnapl_avx512-uint8list.o: uint8list.c +@am__fastdepCC_TRUE@ $(AM_V_CC)$(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) $(gsnapl_avx512_CFLAGS) $(CFLAGS) -MT gsnapl_avx512-uint8list.o -MD -MP -MF $(DEPDIR)/gsnapl_avx512-uint8list.Tpo -c -o gsnapl_avx512-uint8list.o `test -f 'uint8list.c' || echo '$(srcdir)/'`uint8list.c +@am__fastdepCC_TRUE@ $(AM_V_at)$(am__mv) $(DEPDIR)/gsnapl_avx512-uint8list.Tpo $(DEPDIR)/gsnapl_avx512-uint8list.Po +@AMDEP_TRUE@@am__fastdepCC_FALSE@ $(AM_V_CC)source='uint8list.c' object='gsnapl_avx512-uint8list.o' libtool=no @AMDEPBACKSLASH@ +@AMDEP_TRUE@@am__fastdepCC_FALSE@ DEPDIR=$(DEPDIR) $(CCDEPMODE) $(depcomp) @AMDEPBACKSLASH@ +@am__fastdepCC_FALSE@ $(AM_V_CC@am__nodep@)$(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) $(gsnapl_avx512_CFLAGS) $(CFLAGS) -c -o gsnapl_avx512-uint8list.o `test -f 'uint8list.c' || echo '$(srcdir)/'`uint8list.c + +gsnapl_avx512-uint8list.obj: uint8list.c +@am__fastdepCC_TRUE@ $(AM_V_CC)$(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) $(gsnapl_avx512_CFLAGS) $(CFLAGS) -MT gsnapl_avx512-uint8list.obj -MD -MP -MF $(DEPDIR)/gsnapl_avx512-uint8list.Tpo -c -o gsnapl_avx512-uint8list.obj `if test -f 'uint8list.c'; then $(CYGPATH_W) 'uint8list.c'; else $(CYGPATH_W) '$(srcdir)/uint8list.c'; fi` +@am__fastdepCC_TRUE@ $(AM_V_at)$(am__mv) $(DEPDIR)/gsnapl_avx512-uint8list.Tpo $(DEPDIR)/gsnapl_avx512-uint8list.Po +@AMDEP_TRUE@@am__fastdepCC_FALSE@ $(AM_V_CC)source='uint8list.c' object='gsnapl_avx512-uint8list.obj' libtool=no @AMDEPBACKSLASH@ +@AMDEP_TRUE@@am__fastdepCC_FALSE@ DEPDIR=$(DEPDIR) $(CCDEPMODE) $(depcomp) @AMDEPBACKSLASH@ +@am__fastdepCC_FALSE@ $(AM_V_CC@am__nodep@)$(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) $(gsnapl_avx512_CFLAGS) $(CFLAGS) -c -o gsnapl_avx512-uint8list.obj `if test -f 'uint8list.c'; then $(CYGPATH_W) 'uint8list.c'; else $(CYGPATH_W) '$(srcdir)/uint8list.c'; fi` + +gsnapl_avx512-stopwatch.o: stopwatch.c +@am__fastdepCC_TRUE@ $(AM_V_CC)$(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) $(gsnapl_avx512_CFLAGS) $(CFLAGS) -MT gsnapl_avx512-stopwatch.o -MD -MP -MF $(DEPDIR)/gsnapl_avx512-stopwatch.Tpo -c -o gsnapl_avx512-stopwatch.o `test -f 'stopwatch.c' || echo '$(srcdir)/'`stopwatch.c +@am__fastdepCC_TRUE@ $(AM_V_at)$(am__mv) $(DEPDIR)/gsnapl_avx512-stopwatch.Tpo $(DEPDIR)/gsnapl_avx512-stopwatch.Po +@AMDEP_TRUE@@am__fastdepCC_FALSE@ $(AM_V_CC)source='stopwatch.c' object='gsnapl_avx512-stopwatch.o' libtool=no @AMDEPBACKSLASH@ +@AMDEP_TRUE@@am__fastdepCC_FALSE@ DEPDIR=$(DEPDIR) $(CCDEPMODE) $(depcomp) @AMDEPBACKSLASH@ +@am__fastdepCC_FALSE@ $(AM_V_CC@am__nodep@)$(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) $(gsnapl_avx512_CFLAGS) $(CFLAGS) -c -o gsnapl_avx512-stopwatch.o `test -f 'stopwatch.c' || echo '$(srcdir)/'`stopwatch.c + +gsnapl_avx512-stopwatch.obj: stopwatch.c +@am__fastdepCC_TRUE@ $(AM_V_CC)$(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) $(gsnapl_avx512_CFLAGS) $(CFLAGS) -MT gsnapl_avx512-stopwatch.obj -MD -MP -MF $(DEPDIR)/gsnapl_avx512-stopwatch.Tpo -c -o gsnapl_avx512-stopwatch.obj `if test -f 'stopwatch.c'; then $(CYGPATH_W) 'stopwatch.c'; else $(CYGPATH_W) '$(srcdir)/stopwatch.c'; fi` +@am__fastdepCC_TRUE@ $(AM_V_at)$(am__mv) $(DEPDIR)/gsnapl_avx512-stopwatch.Tpo $(DEPDIR)/gsnapl_avx512-stopwatch.Po +@AMDEP_TRUE@@am__fastdepCC_FALSE@ $(AM_V_CC)source='stopwatch.c' object='gsnapl_avx512-stopwatch.obj' libtool=no @AMDEPBACKSLASH@ +@AMDEP_TRUE@@am__fastdepCC_FALSE@ DEPDIR=$(DEPDIR) $(CCDEPMODE) $(depcomp) @AMDEPBACKSLASH@ +@am__fastdepCC_FALSE@ $(AM_V_CC@am__nodep@)$(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) $(gsnapl_avx512_CFLAGS) $(CFLAGS) -c -o gsnapl_avx512-stopwatch.obj `if test -f 'stopwatch.c'; then $(CYGPATH_W) 'stopwatch.c'; else $(CYGPATH_W) '$(srcdir)/stopwatch.c'; fi` + +gsnapl_avx512-semaphore.o: semaphore.c +@am__fastdepCC_TRUE@ $(AM_V_CC)$(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) $(gsnapl_avx512_CFLAGS) $(CFLAGS) -MT gsnapl_avx512-semaphore.o -MD -MP -MF $(DEPDIR)/gsnapl_avx512-semaphore.Tpo -c -o gsnapl_avx512-semaphore.o `test -f 'semaphore.c' || echo '$(srcdir)/'`semaphore.c +@am__fastdepCC_TRUE@ $(AM_V_at)$(am__mv) $(DEPDIR)/gsnapl_avx512-semaphore.Tpo $(DEPDIR)/gsnapl_avx512-semaphore.Po +@AMDEP_TRUE@@am__fastdepCC_FALSE@ $(AM_V_CC)source='semaphore.c' object='gsnapl_avx512-semaphore.o' libtool=no @AMDEPBACKSLASH@ +@AMDEP_TRUE@@am__fastdepCC_FALSE@ DEPDIR=$(DEPDIR) $(CCDEPMODE) $(depcomp) @AMDEPBACKSLASH@ +@am__fastdepCC_FALSE@ $(AM_V_CC@am__nodep@)$(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) $(gsnapl_avx512_CFLAGS) $(CFLAGS) -c -o gsnapl_avx512-semaphore.o `test -f 'semaphore.c' || echo '$(srcdir)/'`semaphore.c + +gsnapl_avx512-semaphore.obj: semaphore.c +@am__fastdepCC_TRUE@ $(AM_V_CC)$(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) $(gsnapl_avx512_CFLAGS) $(CFLAGS) -MT gsnapl_avx512-semaphore.obj -MD -MP -MF $(DEPDIR)/gsnapl_avx512-semaphore.Tpo -c -o gsnapl_avx512-semaphore.obj `if test -f 'semaphore.c'; then $(CYGPATH_W) 'semaphore.c'; else $(CYGPATH_W) '$(srcdir)/semaphore.c'; fi` +@am__fastdepCC_TRUE@ $(AM_V_at)$(am__mv) $(DEPDIR)/gsnapl_avx512-semaphore.Tpo $(DEPDIR)/gsnapl_avx512-semaphore.Po +@AMDEP_TRUE@@am__fastdepCC_FALSE@ $(AM_V_CC)source='semaphore.c' object='gsnapl_avx512-semaphore.obj' libtool=no @AMDEPBACKSLASH@ +@AMDEP_TRUE@@am__fastdepCC_FALSE@ DEPDIR=$(DEPDIR) $(CCDEPMODE) $(depcomp) @AMDEPBACKSLASH@ +@am__fastdepCC_FALSE@ $(AM_V_CC@am__nodep@)$(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) $(gsnapl_avx512_CFLAGS) $(CFLAGS) -c -o gsnapl_avx512-semaphore.obj `if test -f 'semaphore.c'; then $(CYGPATH_W) 'semaphore.c'; else $(CYGPATH_W) '$(srcdir)/semaphore.c'; fi` + +gsnapl_avx512-access.o: access.c +@am__fastdepCC_TRUE@ $(AM_V_CC)$(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) $(gsnapl_avx512_CFLAGS) $(CFLAGS) -MT gsnapl_avx512-access.o -MD -MP -MF $(DEPDIR)/gsnapl_avx512-access.Tpo -c -o gsnapl_avx512-access.o `test -f 'access.c' || echo '$(srcdir)/'`access.c +@am__fastdepCC_TRUE@ $(AM_V_at)$(am__mv) $(DEPDIR)/gsnapl_avx512-access.Tpo $(DEPDIR)/gsnapl_avx512-access.Po +@AMDEP_TRUE@@am__fastdepCC_FALSE@ $(AM_V_CC)source='access.c' object='gsnapl_avx512-access.o' libtool=no @AMDEPBACKSLASH@ +@AMDEP_TRUE@@am__fastdepCC_FALSE@ DEPDIR=$(DEPDIR) $(CCDEPMODE) $(depcomp) @AMDEPBACKSLASH@ +@am__fastdepCC_FALSE@ $(AM_V_CC@am__nodep@)$(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) $(gsnapl_avx512_CFLAGS) $(CFLAGS) -c -o gsnapl_avx512-access.o `test -f 'access.c' || echo '$(srcdir)/'`access.c + +gsnapl_avx512-access.obj: access.c +@am__fastdepCC_TRUE@ $(AM_V_CC)$(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) $(gsnapl_avx512_CFLAGS) $(CFLAGS) -MT gsnapl_avx512-access.obj -MD -MP -MF $(DEPDIR)/gsnapl_avx512-access.Tpo -c -o gsnapl_avx512-access.obj `if test -f 'access.c'; then $(CYGPATH_W) 'access.c'; else $(CYGPATH_W) '$(srcdir)/access.c'; fi` +@am__fastdepCC_TRUE@ $(AM_V_at)$(am__mv) $(DEPDIR)/gsnapl_avx512-access.Tpo $(DEPDIR)/gsnapl_avx512-access.Po +@AMDEP_TRUE@@am__fastdepCC_FALSE@ $(AM_V_CC)source='access.c' object='gsnapl_avx512-access.obj' libtool=no @AMDEPBACKSLASH@ +@AMDEP_TRUE@@am__fastdepCC_FALSE@ DEPDIR=$(DEPDIR) $(CCDEPMODE) $(depcomp) @AMDEPBACKSLASH@ +@am__fastdepCC_FALSE@ $(AM_V_CC@am__nodep@)$(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) $(gsnapl_avx512_CFLAGS) $(CFLAGS) -c -o gsnapl_avx512-access.obj `if test -f 'access.c'; then $(CYGPATH_W) 'access.c'; else $(CYGPATH_W) '$(srcdir)/access.c'; fi` + +gsnapl_avx512-filestring.o: filestring.c +@am__fastdepCC_TRUE@ $(AM_V_CC)$(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) $(gsnapl_avx512_CFLAGS) $(CFLAGS) -MT gsnapl_avx512-filestring.o -MD -MP -MF $(DEPDIR)/gsnapl_avx512-filestring.Tpo -c -o gsnapl_avx512-filestring.o `test -f 'filestring.c' || echo '$(srcdir)/'`filestring.c +@am__fastdepCC_TRUE@ $(AM_V_at)$(am__mv) $(DEPDIR)/gsnapl_avx512-filestring.Tpo $(DEPDIR)/gsnapl_avx512-filestring.Po +@AMDEP_TRUE@@am__fastdepCC_FALSE@ $(AM_V_CC)source='filestring.c' object='gsnapl_avx512-filestring.o' libtool=no @AMDEPBACKSLASH@ +@AMDEP_TRUE@@am__fastdepCC_FALSE@ DEPDIR=$(DEPDIR) $(CCDEPMODE) $(depcomp) @AMDEPBACKSLASH@ +@am__fastdepCC_FALSE@ $(AM_V_CC@am__nodep@)$(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) $(gsnapl_avx512_CFLAGS) $(CFLAGS) -c -o gsnapl_avx512-filestring.o `test -f 'filestring.c' || echo '$(srcdir)/'`filestring.c + +gsnapl_avx512-filestring.obj: filestring.c +@am__fastdepCC_TRUE@ $(AM_V_CC)$(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) $(gsnapl_avx512_CFLAGS) $(CFLAGS) -MT gsnapl_avx512-filestring.obj -MD -MP -MF $(DEPDIR)/gsnapl_avx512-filestring.Tpo -c -o gsnapl_avx512-filestring.obj `if test -f 'filestring.c'; then $(CYGPATH_W) 'filestring.c'; else $(CYGPATH_W) '$(srcdir)/filestring.c'; fi` +@am__fastdepCC_TRUE@ $(AM_V_at)$(am__mv) $(DEPDIR)/gsnapl_avx512-filestring.Tpo $(DEPDIR)/gsnapl_avx512-filestring.Po +@AMDEP_TRUE@@am__fastdepCC_FALSE@ $(AM_V_CC)source='filestring.c' object='gsnapl_avx512-filestring.obj' libtool=no @AMDEPBACKSLASH@ +@AMDEP_TRUE@@am__fastdepCC_FALSE@ DEPDIR=$(DEPDIR) $(CCDEPMODE) $(depcomp) @AMDEPBACKSLASH@ +@am__fastdepCC_FALSE@ $(AM_V_CC@am__nodep@)$(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) $(gsnapl_avx512_CFLAGS) $(CFLAGS) -c -o gsnapl_avx512-filestring.obj `if test -f 'filestring.c'; then $(CYGPATH_W) 'filestring.c'; else $(CYGPATH_W) '$(srcdir)/filestring.c'; fi` + +gsnapl_avx512-iit-read-univ.o: iit-read-univ.c +@am__fastdepCC_TRUE@ $(AM_V_CC)$(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) $(gsnapl_avx512_CFLAGS) $(CFLAGS) -MT gsnapl_avx512-iit-read-univ.o -MD -MP -MF $(DEPDIR)/gsnapl_avx512-iit-read-univ.Tpo -c -o gsnapl_avx512-iit-read-univ.o `test -f 'iit-read-univ.c' || echo '$(srcdir)/'`iit-read-univ.c +@am__fastdepCC_TRUE@ $(AM_V_at)$(am__mv) $(DEPDIR)/gsnapl_avx512-iit-read-univ.Tpo $(DEPDIR)/gsnapl_avx512-iit-read-univ.Po +@AMDEP_TRUE@@am__fastdepCC_FALSE@ $(AM_V_CC)source='iit-read-univ.c' object='gsnapl_avx512-iit-read-univ.o' libtool=no @AMDEPBACKSLASH@ +@AMDEP_TRUE@@am__fastdepCC_FALSE@ DEPDIR=$(DEPDIR) $(CCDEPMODE) $(depcomp) @AMDEPBACKSLASH@ +@am__fastdepCC_FALSE@ $(AM_V_CC@am__nodep@)$(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) $(gsnapl_avx512_CFLAGS) $(CFLAGS) -c -o gsnapl_avx512-iit-read-univ.o `test -f 'iit-read-univ.c' || echo '$(srcdir)/'`iit-read-univ.c + +gsnapl_avx512-iit-read-univ.obj: iit-read-univ.c +@am__fastdepCC_TRUE@ $(AM_V_CC)$(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) $(gsnapl_avx512_CFLAGS) $(CFLAGS) -MT gsnapl_avx512-iit-read-univ.obj -MD -MP -MF $(DEPDIR)/gsnapl_avx512-iit-read-univ.Tpo -c -o gsnapl_avx512-iit-read-univ.obj `if test -f 'iit-read-univ.c'; then $(CYGPATH_W) 'iit-read-univ.c'; else $(CYGPATH_W) '$(srcdir)/iit-read-univ.c'; fi` +@am__fastdepCC_TRUE@ $(AM_V_at)$(am__mv) $(DEPDIR)/gsnapl_avx512-iit-read-univ.Tpo $(DEPDIR)/gsnapl_avx512-iit-read-univ.Po +@AMDEP_TRUE@@am__fastdepCC_FALSE@ $(AM_V_CC)source='iit-read-univ.c' object='gsnapl_avx512-iit-read-univ.obj' libtool=no @AMDEPBACKSLASH@ +@AMDEP_TRUE@@am__fastdepCC_FALSE@ DEPDIR=$(DEPDIR) $(CCDEPMODE) $(depcomp) @AMDEPBACKSLASH@ +@am__fastdepCC_FALSE@ $(AM_V_CC@am__nodep@)$(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) $(gsnapl_avx512_CFLAGS) $(CFLAGS) -c -o gsnapl_avx512-iit-read-univ.obj `if test -f 'iit-read-univ.c'; then $(CYGPATH_W) 'iit-read-univ.c'; else $(CYGPATH_W) '$(srcdir)/iit-read-univ.c'; fi` + +gsnapl_avx512-iit-read.o: iit-read.c +@am__fastdepCC_TRUE@ $(AM_V_CC)$(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) $(gsnapl_avx512_CFLAGS) $(CFLAGS) -MT gsnapl_avx512-iit-read.o -MD -MP -MF $(DEPDIR)/gsnapl_avx512-iit-read.Tpo -c -o gsnapl_avx512-iit-read.o `test -f 'iit-read.c' || echo '$(srcdir)/'`iit-read.c +@am__fastdepCC_TRUE@ $(AM_V_at)$(am__mv) $(DEPDIR)/gsnapl_avx512-iit-read.Tpo $(DEPDIR)/gsnapl_avx512-iit-read.Po +@AMDEP_TRUE@@am__fastdepCC_FALSE@ $(AM_V_CC)source='iit-read.c' object='gsnapl_avx512-iit-read.o' libtool=no @AMDEPBACKSLASH@ +@AMDEP_TRUE@@am__fastdepCC_FALSE@ DEPDIR=$(DEPDIR) $(CCDEPMODE) $(depcomp) @AMDEPBACKSLASH@ +@am__fastdepCC_FALSE@ $(AM_V_CC@am__nodep@)$(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) $(gsnapl_avx512_CFLAGS) $(CFLAGS) -c -o gsnapl_avx512-iit-read.o `test -f 'iit-read.c' || echo '$(srcdir)/'`iit-read.c + +gsnapl_avx512-iit-read.obj: iit-read.c +@am__fastdepCC_TRUE@ $(AM_V_CC)$(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) $(gsnapl_avx512_CFLAGS) $(CFLAGS) -MT gsnapl_avx512-iit-read.obj -MD -MP -MF $(DEPDIR)/gsnapl_avx512-iit-read.Tpo -c -o gsnapl_avx512-iit-read.obj `if test -f 'iit-read.c'; then $(CYGPATH_W) 'iit-read.c'; else $(CYGPATH_W) '$(srcdir)/iit-read.c'; fi` +@am__fastdepCC_TRUE@ $(AM_V_at)$(am__mv) $(DEPDIR)/gsnapl_avx512-iit-read.Tpo $(DEPDIR)/gsnapl_avx512-iit-read.Po +@AMDEP_TRUE@@am__fastdepCC_FALSE@ $(AM_V_CC)source='iit-read.c' object='gsnapl_avx512-iit-read.obj' libtool=no @AMDEPBACKSLASH@ +@AMDEP_TRUE@@am__fastdepCC_FALSE@ DEPDIR=$(DEPDIR) $(CCDEPMODE) $(depcomp) @AMDEPBACKSLASH@ +@am__fastdepCC_FALSE@ $(AM_V_CC@am__nodep@)$(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) $(gsnapl_avx512_CFLAGS) $(CFLAGS) -c -o gsnapl_avx512-iit-read.obj `if test -f 'iit-read.c'; then $(CYGPATH_W) 'iit-read.c'; else $(CYGPATH_W) '$(srcdir)/iit-read.c'; fi` + +gsnapl_avx512-md5.o: md5.c +@am__fastdepCC_TRUE@ $(AM_V_CC)$(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) $(gsnapl_avx512_CFLAGS) $(CFLAGS) -MT gsnapl_avx512-md5.o -MD -MP -MF $(DEPDIR)/gsnapl_avx512-md5.Tpo -c -o gsnapl_avx512-md5.o `test -f 'md5.c' || echo '$(srcdir)/'`md5.c +@am__fastdepCC_TRUE@ $(AM_V_at)$(am__mv) $(DEPDIR)/gsnapl_avx512-md5.Tpo $(DEPDIR)/gsnapl_avx512-md5.Po +@AMDEP_TRUE@@am__fastdepCC_FALSE@ $(AM_V_CC)source='md5.c' object='gsnapl_avx512-md5.o' libtool=no @AMDEPBACKSLASH@ +@AMDEP_TRUE@@am__fastdepCC_FALSE@ DEPDIR=$(DEPDIR) $(CCDEPMODE) $(depcomp) @AMDEPBACKSLASH@ +@am__fastdepCC_FALSE@ $(AM_V_CC@am__nodep@)$(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) $(gsnapl_avx512_CFLAGS) $(CFLAGS) -c -o gsnapl_avx512-md5.o `test -f 'md5.c' || echo '$(srcdir)/'`md5.c + +gsnapl_avx512-md5.obj: md5.c +@am__fastdepCC_TRUE@ $(AM_V_CC)$(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) $(gsnapl_avx512_CFLAGS) $(CFLAGS) -MT gsnapl_avx512-md5.obj -MD -MP -MF $(DEPDIR)/gsnapl_avx512-md5.Tpo -c -o gsnapl_avx512-md5.obj `if test -f 'md5.c'; then $(CYGPATH_W) 'md5.c'; else $(CYGPATH_W) '$(srcdir)/md5.c'; fi` +@am__fastdepCC_TRUE@ $(AM_V_at)$(am__mv) $(DEPDIR)/gsnapl_avx512-md5.Tpo $(DEPDIR)/gsnapl_avx512-md5.Po +@AMDEP_TRUE@@am__fastdepCC_FALSE@ $(AM_V_CC)source='md5.c' object='gsnapl_avx512-md5.obj' libtool=no @AMDEPBACKSLASH@ +@AMDEP_TRUE@@am__fastdepCC_FALSE@ DEPDIR=$(DEPDIR) $(CCDEPMODE) $(depcomp) @AMDEPBACKSLASH@ +@am__fastdepCC_FALSE@ $(AM_V_CC@am__nodep@)$(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) $(gsnapl_avx512_CFLAGS) $(CFLAGS) -c -o gsnapl_avx512-md5.obj `if test -f 'md5.c'; then $(CYGPATH_W) 'md5.c'; else $(CYGPATH_W) '$(srcdir)/md5.c'; fi` + +gsnapl_avx512-bzip2.o: bzip2.c +@am__fastdepCC_TRUE@ $(AM_V_CC)$(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) $(gsnapl_avx512_CFLAGS) $(CFLAGS) -MT gsnapl_avx512-bzip2.o -MD -MP -MF $(DEPDIR)/gsnapl_avx512-bzip2.Tpo -c -o gsnapl_avx512-bzip2.o `test -f 'bzip2.c' || echo '$(srcdir)/'`bzip2.c +@am__fastdepCC_TRUE@ $(AM_V_at)$(am__mv) $(DEPDIR)/gsnapl_avx512-bzip2.Tpo $(DEPDIR)/gsnapl_avx512-bzip2.Po +@AMDEP_TRUE@@am__fastdepCC_FALSE@ $(AM_V_CC)source='bzip2.c' object='gsnapl_avx512-bzip2.o' libtool=no @AMDEPBACKSLASH@ +@AMDEP_TRUE@@am__fastdepCC_FALSE@ DEPDIR=$(DEPDIR) $(CCDEPMODE) $(depcomp) @AMDEPBACKSLASH@ +@am__fastdepCC_FALSE@ $(AM_V_CC@am__nodep@)$(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) $(gsnapl_avx512_CFLAGS) $(CFLAGS) -c -o gsnapl_avx512-bzip2.o `test -f 'bzip2.c' || echo '$(srcdir)/'`bzip2.c + +gsnapl_avx512-bzip2.obj: bzip2.c +@am__fastdepCC_TRUE@ $(AM_V_CC)$(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) $(gsnapl_avx512_CFLAGS) $(CFLAGS) -MT gsnapl_avx512-bzip2.obj -MD -MP -MF $(DEPDIR)/gsnapl_avx512-bzip2.Tpo -c -o gsnapl_avx512-bzip2.obj `if test -f 'bzip2.c'; then $(CYGPATH_W) 'bzip2.c'; else $(CYGPATH_W) '$(srcdir)/bzip2.c'; fi` +@am__fastdepCC_TRUE@ $(AM_V_at)$(am__mv) $(DEPDIR)/gsnapl_avx512-bzip2.Tpo $(DEPDIR)/gsnapl_avx512-bzip2.Po +@AMDEP_TRUE@@am__fastdepCC_FALSE@ $(AM_V_CC)source='bzip2.c' object='gsnapl_avx512-bzip2.obj' libtool=no @AMDEPBACKSLASH@ +@AMDEP_TRUE@@am__fastdepCC_FALSE@ DEPDIR=$(DEPDIR) $(CCDEPMODE) $(depcomp) @AMDEPBACKSLASH@ +@am__fastdepCC_FALSE@ $(AM_V_CC@am__nodep@)$(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) $(gsnapl_avx512_CFLAGS) $(CFLAGS) -c -o gsnapl_avx512-bzip2.obj `if test -f 'bzip2.c'; then $(CYGPATH_W) 'bzip2.c'; else $(CYGPATH_W) '$(srcdir)/bzip2.c'; fi` + +gsnapl_avx512-sequence.o: sequence.c +@am__fastdepCC_TRUE@ $(AM_V_CC)$(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) $(gsnapl_avx512_CFLAGS) $(CFLAGS) -MT gsnapl_avx512-sequence.o -MD -MP -MF $(DEPDIR)/gsnapl_avx512-sequence.Tpo -c -o gsnapl_avx512-sequence.o `test -f 'sequence.c' || echo '$(srcdir)/'`sequence.c +@am__fastdepCC_TRUE@ $(AM_V_at)$(am__mv) $(DEPDIR)/gsnapl_avx512-sequence.Tpo $(DEPDIR)/gsnapl_avx512-sequence.Po +@AMDEP_TRUE@@am__fastdepCC_FALSE@ $(AM_V_CC)source='sequence.c' object='gsnapl_avx512-sequence.o' libtool=no @AMDEPBACKSLASH@ +@AMDEP_TRUE@@am__fastdepCC_FALSE@ DEPDIR=$(DEPDIR) $(CCDEPMODE) $(depcomp) @AMDEPBACKSLASH@ +@am__fastdepCC_FALSE@ $(AM_V_CC@am__nodep@)$(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) $(gsnapl_avx512_CFLAGS) $(CFLAGS) -c -o gsnapl_avx512-sequence.o `test -f 'sequence.c' || echo '$(srcdir)/'`sequence.c + +gsnapl_avx512-sequence.obj: sequence.c +@am__fastdepCC_TRUE@ $(AM_V_CC)$(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) $(gsnapl_avx512_CFLAGS) $(CFLAGS) -MT gsnapl_avx512-sequence.obj -MD -MP -MF $(DEPDIR)/gsnapl_avx512-sequence.Tpo -c -o gsnapl_avx512-sequence.obj `if test -f 'sequence.c'; then $(CYGPATH_W) 'sequence.c'; else $(CYGPATH_W) '$(srcdir)/sequence.c'; fi` +@am__fastdepCC_TRUE@ $(AM_V_at)$(am__mv) $(DEPDIR)/gsnapl_avx512-sequence.Tpo $(DEPDIR)/gsnapl_avx512-sequence.Po +@AMDEP_TRUE@@am__fastdepCC_FALSE@ $(AM_V_CC)source='sequence.c' object='gsnapl_avx512-sequence.obj' libtool=no @AMDEPBACKSLASH@ +@AMDEP_TRUE@@am__fastdepCC_FALSE@ DEPDIR=$(DEPDIR) $(CCDEPMODE) $(depcomp) @AMDEPBACKSLASH@ +@am__fastdepCC_FALSE@ $(AM_V_CC@am__nodep@)$(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) $(gsnapl_avx512_CFLAGS) $(CFLAGS) -c -o gsnapl_avx512-sequence.obj `if test -f 'sequence.c'; then $(CYGPATH_W) 'sequence.c'; else $(CYGPATH_W) '$(srcdir)/sequence.c'; fi` + +gsnapl_avx512-reader.o: reader.c +@am__fastdepCC_TRUE@ $(AM_V_CC)$(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) $(gsnapl_avx512_CFLAGS) $(CFLAGS) -MT gsnapl_avx512-reader.o -MD -MP -MF $(DEPDIR)/gsnapl_avx512-reader.Tpo -c -o gsnapl_avx512-reader.o `test -f 'reader.c' || echo '$(srcdir)/'`reader.c +@am__fastdepCC_TRUE@ $(AM_V_at)$(am__mv) $(DEPDIR)/gsnapl_avx512-reader.Tpo $(DEPDIR)/gsnapl_avx512-reader.Po +@AMDEP_TRUE@@am__fastdepCC_FALSE@ $(AM_V_CC)source='reader.c' object='gsnapl_avx512-reader.o' libtool=no @AMDEPBACKSLASH@ +@AMDEP_TRUE@@am__fastdepCC_FALSE@ DEPDIR=$(DEPDIR) $(CCDEPMODE) $(depcomp) @AMDEPBACKSLASH@ +@am__fastdepCC_FALSE@ $(AM_V_CC@am__nodep@)$(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) $(gsnapl_avx512_CFLAGS) $(CFLAGS) -c -o gsnapl_avx512-reader.o `test -f 'reader.c' || echo '$(srcdir)/'`reader.c + +gsnapl_avx512-reader.obj: reader.c +@am__fastdepCC_TRUE@ $(AM_V_CC)$(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) $(gsnapl_avx512_CFLAGS) $(CFLAGS) -MT gsnapl_avx512-reader.obj -MD -MP -MF $(DEPDIR)/gsnapl_avx512-reader.Tpo -c -o gsnapl_avx512-reader.obj `if test -f 'reader.c'; then $(CYGPATH_W) 'reader.c'; else $(CYGPATH_W) '$(srcdir)/reader.c'; fi` +@am__fastdepCC_TRUE@ $(AM_V_at)$(am__mv) $(DEPDIR)/gsnapl_avx512-reader.Tpo $(DEPDIR)/gsnapl_avx512-reader.Po +@AMDEP_TRUE@@am__fastdepCC_FALSE@ $(AM_V_CC)source='reader.c' object='gsnapl_avx512-reader.obj' libtool=no @AMDEPBACKSLASH@ +@AMDEP_TRUE@@am__fastdepCC_FALSE@ DEPDIR=$(DEPDIR) $(CCDEPMODE) $(depcomp) @AMDEPBACKSLASH@ +@am__fastdepCC_FALSE@ $(AM_V_CC@am__nodep@)$(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) $(gsnapl_avx512_CFLAGS) $(CFLAGS) -c -o gsnapl_avx512-reader.obj `if test -f 'reader.c'; then $(CYGPATH_W) 'reader.c'; else $(CYGPATH_W) '$(srcdir)/reader.c'; fi` + +gsnapl_avx512-genomicpos.o: genomicpos.c +@am__fastdepCC_TRUE@ $(AM_V_CC)$(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) $(gsnapl_avx512_CFLAGS) $(CFLAGS) -MT gsnapl_avx512-genomicpos.o -MD -MP -MF $(DEPDIR)/gsnapl_avx512-genomicpos.Tpo -c -o gsnapl_avx512-genomicpos.o `test -f 'genomicpos.c' || echo '$(srcdir)/'`genomicpos.c +@am__fastdepCC_TRUE@ $(AM_V_at)$(am__mv) $(DEPDIR)/gsnapl_avx512-genomicpos.Tpo $(DEPDIR)/gsnapl_avx512-genomicpos.Po +@AMDEP_TRUE@@am__fastdepCC_FALSE@ $(AM_V_CC)source='genomicpos.c' object='gsnapl_avx512-genomicpos.o' libtool=no @AMDEPBACKSLASH@ +@AMDEP_TRUE@@am__fastdepCC_FALSE@ DEPDIR=$(DEPDIR) $(CCDEPMODE) $(depcomp) @AMDEPBACKSLASH@ +@am__fastdepCC_FALSE@ $(AM_V_CC@am__nodep@)$(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) $(gsnapl_avx512_CFLAGS) $(CFLAGS) -c -o gsnapl_avx512-genomicpos.o `test -f 'genomicpos.c' || echo '$(srcdir)/'`genomicpos.c + +gsnapl_avx512-genomicpos.obj: genomicpos.c +@am__fastdepCC_TRUE@ $(AM_V_CC)$(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) $(gsnapl_avx512_CFLAGS) $(CFLAGS) -MT gsnapl_avx512-genomicpos.obj -MD -MP -MF $(DEPDIR)/gsnapl_avx512-genomicpos.Tpo -c -o gsnapl_avx512-genomicpos.obj `if test -f 'genomicpos.c'; then $(CYGPATH_W) 'genomicpos.c'; else $(CYGPATH_W) '$(srcdir)/genomicpos.c'; fi` +@am__fastdepCC_TRUE@ $(AM_V_at)$(am__mv) $(DEPDIR)/gsnapl_avx512-genomicpos.Tpo $(DEPDIR)/gsnapl_avx512-genomicpos.Po +@AMDEP_TRUE@@am__fastdepCC_FALSE@ $(AM_V_CC)source='genomicpos.c' object='gsnapl_avx512-genomicpos.obj' libtool=no @AMDEPBACKSLASH@ +@AMDEP_TRUE@@am__fastdepCC_FALSE@ DEPDIR=$(DEPDIR) $(CCDEPMODE) $(depcomp) @AMDEPBACKSLASH@ +@am__fastdepCC_FALSE@ $(AM_V_CC@am__nodep@)$(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) $(gsnapl_avx512_CFLAGS) $(CFLAGS) -c -o gsnapl_avx512-genomicpos.obj `if test -f 'genomicpos.c'; then $(CYGPATH_W) 'genomicpos.c'; else $(CYGPATH_W) '$(srcdir)/genomicpos.c'; fi` + +gsnapl_avx512-compress.o: compress.c +@am__fastdepCC_TRUE@ $(AM_V_CC)$(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) $(gsnapl_avx512_CFLAGS) $(CFLAGS) -MT gsnapl_avx512-compress.o -MD -MP -MF $(DEPDIR)/gsnapl_avx512-compress.Tpo -c -o gsnapl_avx512-compress.o `test -f 'compress.c' || echo '$(srcdir)/'`compress.c +@am__fastdepCC_TRUE@ $(AM_V_at)$(am__mv) $(DEPDIR)/gsnapl_avx512-compress.Tpo $(DEPDIR)/gsnapl_avx512-compress.Po +@AMDEP_TRUE@@am__fastdepCC_FALSE@ $(AM_V_CC)source='compress.c' object='gsnapl_avx512-compress.o' libtool=no @AMDEPBACKSLASH@ +@AMDEP_TRUE@@am__fastdepCC_FALSE@ DEPDIR=$(DEPDIR) $(CCDEPMODE) $(depcomp) @AMDEPBACKSLASH@ +@am__fastdepCC_FALSE@ $(AM_V_CC@am__nodep@)$(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) $(gsnapl_avx512_CFLAGS) $(CFLAGS) -c -o gsnapl_avx512-compress.o `test -f 'compress.c' || echo '$(srcdir)/'`compress.c + +gsnapl_avx512-compress.obj: compress.c +@am__fastdepCC_TRUE@ $(AM_V_CC)$(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) $(gsnapl_avx512_CFLAGS) $(CFLAGS) -MT gsnapl_avx512-compress.obj -MD -MP -MF $(DEPDIR)/gsnapl_avx512-compress.Tpo -c -o gsnapl_avx512-compress.obj `if test -f 'compress.c'; then $(CYGPATH_W) 'compress.c'; else $(CYGPATH_W) '$(srcdir)/compress.c'; fi` +@am__fastdepCC_TRUE@ $(AM_V_at)$(am__mv) $(DEPDIR)/gsnapl_avx512-compress.Tpo $(DEPDIR)/gsnapl_avx512-compress.Po +@AMDEP_TRUE@@am__fastdepCC_FALSE@ $(AM_V_CC)source='compress.c' object='gsnapl_avx512-compress.obj' libtool=no @AMDEPBACKSLASH@ +@AMDEP_TRUE@@am__fastdepCC_FALSE@ DEPDIR=$(DEPDIR) $(CCDEPMODE) $(depcomp) @AMDEPBACKSLASH@ +@am__fastdepCC_FALSE@ $(AM_V_CC@am__nodep@)$(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) $(gsnapl_avx512_CFLAGS) $(CFLAGS) -c -o gsnapl_avx512-compress.obj `if test -f 'compress.c'; then $(CYGPATH_W) 'compress.c'; else $(CYGPATH_W) '$(srcdir)/compress.c'; fi` + +gsnapl_avx512-genome.o: genome.c +@am__fastdepCC_TRUE@ $(AM_V_CC)$(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) $(gsnapl_avx512_CFLAGS) $(CFLAGS) -MT gsnapl_avx512-genome.o -MD -MP -MF $(DEPDIR)/gsnapl_avx512-genome.Tpo -c -o gsnapl_avx512-genome.o `test -f 'genome.c' || echo '$(srcdir)/'`genome.c +@am__fastdepCC_TRUE@ $(AM_V_at)$(am__mv) $(DEPDIR)/gsnapl_avx512-genome.Tpo $(DEPDIR)/gsnapl_avx512-genome.Po +@AMDEP_TRUE@@am__fastdepCC_FALSE@ $(AM_V_CC)source='genome.c' object='gsnapl_avx512-genome.o' libtool=no @AMDEPBACKSLASH@ +@AMDEP_TRUE@@am__fastdepCC_FALSE@ DEPDIR=$(DEPDIR) $(CCDEPMODE) $(depcomp) @AMDEPBACKSLASH@ +@am__fastdepCC_FALSE@ $(AM_V_CC@am__nodep@)$(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) $(gsnapl_avx512_CFLAGS) $(CFLAGS) -c -o gsnapl_avx512-genome.o `test -f 'genome.c' || echo '$(srcdir)/'`genome.c + +gsnapl_avx512-genome.obj: genome.c +@am__fastdepCC_TRUE@ $(AM_V_CC)$(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) $(gsnapl_avx512_CFLAGS) $(CFLAGS) -MT gsnapl_avx512-genome.obj -MD -MP -MF $(DEPDIR)/gsnapl_avx512-genome.Tpo -c -o gsnapl_avx512-genome.obj `if test -f 'genome.c'; then $(CYGPATH_W) 'genome.c'; else $(CYGPATH_W) '$(srcdir)/genome.c'; fi` +@am__fastdepCC_TRUE@ $(AM_V_at)$(am__mv) $(DEPDIR)/gsnapl_avx512-genome.Tpo $(DEPDIR)/gsnapl_avx512-genome.Po +@AMDEP_TRUE@@am__fastdepCC_FALSE@ $(AM_V_CC)source='genome.c' object='gsnapl_avx512-genome.obj' libtool=no @AMDEPBACKSLASH@ +@AMDEP_TRUE@@am__fastdepCC_FALSE@ DEPDIR=$(DEPDIR) $(CCDEPMODE) $(depcomp) @AMDEPBACKSLASH@ +@am__fastdepCC_FALSE@ $(AM_V_CC@am__nodep@)$(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) $(gsnapl_avx512_CFLAGS) $(CFLAGS) -c -o gsnapl_avx512-genome.obj `if test -f 'genome.c'; then $(CYGPATH_W) 'genome.c'; else $(CYGPATH_W) '$(srcdir)/genome.c'; fi` + +gsnapl_avx512-popcount.o: popcount.c +@am__fastdepCC_TRUE@ $(AM_V_CC)$(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) $(gsnapl_avx512_CFLAGS) $(CFLAGS) -MT gsnapl_avx512-popcount.o -MD -MP -MF $(DEPDIR)/gsnapl_avx512-popcount.Tpo -c -o gsnapl_avx512-popcount.o `test -f 'popcount.c' || echo '$(srcdir)/'`popcount.c +@am__fastdepCC_TRUE@ $(AM_V_at)$(am__mv) $(DEPDIR)/gsnapl_avx512-popcount.Tpo $(DEPDIR)/gsnapl_avx512-popcount.Po +@AMDEP_TRUE@@am__fastdepCC_FALSE@ $(AM_V_CC)source='popcount.c' object='gsnapl_avx512-popcount.o' libtool=no @AMDEPBACKSLASH@ +@AMDEP_TRUE@@am__fastdepCC_FALSE@ DEPDIR=$(DEPDIR) $(CCDEPMODE) $(depcomp) @AMDEPBACKSLASH@ +@am__fastdepCC_FALSE@ $(AM_V_CC@am__nodep@)$(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) $(gsnapl_avx512_CFLAGS) $(CFLAGS) -c -o gsnapl_avx512-popcount.o `test -f 'popcount.c' || echo '$(srcdir)/'`popcount.c + +gsnapl_avx512-popcount.obj: popcount.c +@am__fastdepCC_TRUE@ $(AM_V_CC)$(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) $(gsnapl_avx512_CFLAGS) $(CFLAGS) -MT gsnapl_avx512-popcount.obj -MD -MP -MF $(DEPDIR)/gsnapl_avx512-popcount.Tpo -c -o gsnapl_avx512-popcount.obj `if test -f 'popcount.c'; then $(CYGPATH_W) 'popcount.c'; else $(CYGPATH_W) '$(srcdir)/popcount.c'; fi` +@am__fastdepCC_TRUE@ $(AM_V_at)$(am__mv) $(DEPDIR)/gsnapl_avx512-popcount.Tpo $(DEPDIR)/gsnapl_avx512-popcount.Po +@AMDEP_TRUE@@am__fastdepCC_FALSE@ $(AM_V_CC)source='popcount.c' object='gsnapl_avx512-popcount.obj' libtool=no @AMDEPBACKSLASH@ +@AMDEP_TRUE@@am__fastdepCC_FALSE@ DEPDIR=$(DEPDIR) $(CCDEPMODE) $(depcomp) @AMDEPBACKSLASH@ +@am__fastdepCC_FALSE@ $(AM_V_CC@am__nodep@)$(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) $(gsnapl_avx512_CFLAGS) $(CFLAGS) -c -o gsnapl_avx512-popcount.obj `if test -f 'popcount.c'; then $(CYGPATH_W) 'popcount.c'; else $(CYGPATH_W) '$(srcdir)/popcount.c'; fi` + +gsnapl_avx512-genome128_hr.o: genome128_hr.c +@am__fastdepCC_TRUE@ $(AM_V_CC)$(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) $(gsnapl_avx512_CFLAGS) $(CFLAGS) -MT gsnapl_avx512-genome128_hr.o -MD -MP -MF $(DEPDIR)/gsnapl_avx512-genome128_hr.Tpo -c -o gsnapl_avx512-genome128_hr.o `test -f 'genome128_hr.c' || echo '$(srcdir)/'`genome128_hr.c +@am__fastdepCC_TRUE@ $(AM_V_at)$(am__mv) $(DEPDIR)/gsnapl_avx512-genome128_hr.Tpo $(DEPDIR)/gsnapl_avx512-genome128_hr.Po +@AMDEP_TRUE@@am__fastdepCC_FALSE@ $(AM_V_CC)source='genome128_hr.c' object='gsnapl_avx512-genome128_hr.o' libtool=no @AMDEPBACKSLASH@ +@AMDEP_TRUE@@am__fastdepCC_FALSE@ DEPDIR=$(DEPDIR) $(CCDEPMODE) $(depcomp) @AMDEPBACKSLASH@ +@am__fastdepCC_FALSE@ $(AM_V_CC@am__nodep@)$(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) $(gsnapl_avx512_CFLAGS) $(CFLAGS) -c -o gsnapl_avx512-genome128_hr.o `test -f 'genome128_hr.c' || echo '$(srcdir)/'`genome128_hr.c + +gsnapl_avx512-genome128_hr.obj: genome128_hr.c +@am__fastdepCC_TRUE@ $(AM_V_CC)$(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) $(gsnapl_avx512_CFLAGS) $(CFLAGS) -MT gsnapl_avx512-genome128_hr.obj -MD -MP -MF $(DEPDIR)/gsnapl_avx512-genome128_hr.Tpo -c -o gsnapl_avx512-genome128_hr.obj `if test -f 'genome128_hr.c'; then $(CYGPATH_W) 'genome128_hr.c'; else $(CYGPATH_W) '$(srcdir)/genome128_hr.c'; fi` +@am__fastdepCC_TRUE@ $(AM_V_at)$(am__mv) $(DEPDIR)/gsnapl_avx512-genome128_hr.Tpo $(DEPDIR)/gsnapl_avx512-genome128_hr.Po +@AMDEP_TRUE@@am__fastdepCC_FALSE@ $(AM_V_CC)source='genome128_hr.c' object='gsnapl_avx512-genome128_hr.obj' libtool=no @AMDEPBACKSLASH@ +@AMDEP_TRUE@@am__fastdepCC_FALSE@ DEPDIR=$(DEPDIR) $(CCDEPMODE) $(depcomp) @AMDEPBACKSLASH@ +@am__fastdepCC_FALSE@ $(AM_V_CC@am__nodep@)$(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) $(gsnapl_avx512_CFLAGS) $(CFLAGS) -c -o gsnapl_avx512-genome128_hr.obj `if test -f 'genome128_hr.c'; then $(CYGPATH_W) 'genome128_hr.c'; else $(CYGPATH_W) '$(srcdir)/genome128_hr.c'; fi` + +gsnapl_avx512-genome_sites.o: genome_sites.c +@am__fastdepCC_TRUE@ $(AM_V_CC)$(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) $(gsnapl_avx512_CFLAGS) $(CFLAGS) -MT gsnapl_avx512-genome_sites.o -MD -MP -MF $(DEPDIR)/gsnapl_avx512-genome_sites.Tpo -c -o gsnapl_avx512-genome_sites.o `test -f 'genome_sites.c' || echo '$(srcdir)/'`genome_sites.c +@am__fastdepCC_TRUE@ $(AM_V_at)$(am__mv) $(DEPDIR)/gsnapl_avx512-genome_sites.Tpo $(DEPDIR)/gsnapl_avx512-genome_sites.Po +@AMDEP_TRUE@@am__fastdepCC_FALSE@ $(AM_V_CC)source='genome_sites.c' object='gsnapl_avx512-genome_sites.o' libtool=no @AMDEPBACKSLASH@ +@AMDEP_TRUE@@am__fastdepCC_FALSE@ DEPDIR=$(DEPDIR) $(CCDEPMODE) $(depcomp) @AMDEPBACKSLASH@ +@am__fastdepCC_FALSE@ $(AM_V_CC@am__nodep@)$(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) $(gsnapl_avx512_CFLAGS) $(CFLAGS) -c -o gsnapl_avx512-genome_sites.o `test -f 'genome_sites.c' || echo '$(srcdir)/'`genome_sites.c + +gsnapl_avx512-genome_sites.obj: genome_sites.c +@am__fastdepCC_TRUE@ $(AM_V_CC)$(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) $(gsnapl_avx512_CFLAGS) $(CFLAGS) -MT gsnapl_avx512-genome_sites.obj -MD -MP -MF $(DEPDIR)/gsnapl_avx512-genome_sites.Tpo -c -o gsnapl_avx512-genome_sites.obj `if test -f 'genome_sites.c'; then $(CYGPATH_W) 'genome_sites.c'; else $(CYGPATH_W) '$(srcdir)/genome_sites.c'; fi` +@am__fastdepCC_TRUE@ $(AM_V_at)$(am__mv) $(DEPDIR)/gsnapl_avx512-genome_sites.Tpo $(DEPDIR)/gsnapl_avx512-genome_sites.Po +@AMDEP_TRUE@@am__fastdepCC_FALSE@ $(AM_V_CC)source='genome_sites.c' object='gsnapl_avx512-genome_sites.obj' libtool=no @AMDEPBACKSLASH@ +@AMDEP_TRUE@@am__fastdepCC_FALSE@ DEPDIR=$(DEPDIR) $(CCDEPMODE) $(depcomp) @AMDEPBACKSLASH@ +@am__fastdepCC_FALSE@ $(AM_V_CC@am__nodep@)$(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) $(gsnapl_avx512_CFLAGS) $(CFLAGS) -c -o gsnapl_avx512-genome_sites.obj `if test -f 'genome_sites.c'; then $(CYGPATH_W) 'genome_sites.c'; else $(CYGPATH_W) '$(srcdir)/genome_sites.c'; fi` + +gsnapl_avx512-bitpack64-read.o: bitpack64-read.c +@am__fastdepCC_TRUE@ $(AM_V_CC)$(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) $(gsnapl_avx512_CFLAGS) $(CFLAGS) -MT gsnapl_avx512-bitpack64-read.o -MD -MP -MF $(DEPDIR)/gsnapl_avx512-bitpack64-read.Tpo -c -o gsnapl_avx512-bitpack64-read.o `test -f 'bitpack64-read.c' || echo '$(srcdir)/'`bitpack64-read.c +@am__fastdepCC_TRUE@ $(AM_V_at)$(am__mv) $(DEPDIR)/gsnapl_avx512-bitpack64-read.Tpo $(DEPDIR)/gsnapl_avx512-bitpack64-read.Po +@AMDEP_TRUE@@am__fastdepCC_FALSE@ $(AM_V_CC)source='bitpack64-read.c' object='gsnapl_avx512-bitpack64-read.o' libtool=no @AMDEPBACKSLASH@ +@AMDEP_TRUE@@am__fastdepCC_FALSE@ DEPDIR=$(DEPDIR) $(CCDEPMODE) $(depcomp) @AMDEPBACKSLASH@ +@am__fastdepCC_FALSE@ $(AM_V_CC@am__nodep@)$(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) $(gsnapl_avx512_CFLAGS) $(CFLAGS) -c -o gsnapl_avx512-bitpack64-read.o `test -f 'bitpack64-read.c' || echo '$(srcdir)/'`bitpack64-read.c + +gsnapl_avx512-bitpack64-read.obj: bitpack64-read.c +@am__fastdepCC_TRUE@ $(AM_V_CC)$(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) $(gsnapl_avx512_CFLAGS) $(CFLAGS) -MT gsnapl_avx512-bitpack64-read.obj -MD -MP -MF $(DEPDIR)/gsnapl_avx512-bitpack64-read.Tpo -c -o gsnapl_avx512-bitpack64-read.obj `if test -f 'bitpack64-read.c'; then $(CYGPATH_W) 'bitpack64-read.c'; else $(CYGPATH_W) '$(srcdir)/bitpack64-read.c'; fi` +@am__fastdepCC_TRUE@ $(AM_V_at)$(am__mv) $(DEPDIR)/gsnapl_avx512-bitpack64-read.Tpo $(DEPDIR)/gsnapl_avx512-bitpack64-read.Po +@AMDEP_TRUE@@am__fastdepCC_FALSE@ $(AM_V_CC)source='bitpack64-read.c' object='gsnapl_avx512-bitpack64-read.obj' libtool=no @AMDEPBACKSLASH@ +@AMDEP_TRUE@@am__fastdepCC_FALSE@ DEPDIR=$(DEPDIR) $(CCDEPMODE) $(depcomp) @AMDEPBACKSLASH@ +@am__fastdepCC_FALSE@ $(AM_V_CC@am__nodep@)$(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) $(gsnapl_avx512_CFLAGS) $(CFLAGS) -c -o gsnapl_avx512-bitpack64-read.obj `if test -f 'bitpack64-read.c'; then $(CYGPATH_W) 'bitpack64-read.c'; else $(CYGPATH_W) '$(srcdir)/bitpack64-read.c'; fi` + +gsnapl_avx512-bitpack64-readtwo.o: bitpack64-readtwo.c +@am__fastdepCC_TRUE@ $(AM_V_CC)$(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) $(gsnapl_avx512_CFLAGS) $(CFLAGS) -MT gsnapl_avx512-bitpack64-readtwo.o -MD -MP -MF $(DEPDIR)/gsnapl_avx512-bitpack64-readtwo.Tpo -c -o gsnapl_avx512-bitpack64-readtwo.o `test -f 'bitpack64-readtwo.c' || echo '$(srcdir)/'`bitpack64-readtwo.c +@am__fastdepCC_TRUE@ $(AM_V_at)$(am__mv) $(DEPDIR)/gsnapl_avx512-bitpack64-readtwo.Tpo $(DEPDIR)/gsnapl_avx512-bitpack64-readtwo.Po +@AMDEP_TRUE@@am__fastdepCC_FALSE@ $(AM_V_CC)source='bitpack64-readtwo.c' object='gsnapl_avx512-bitpack64-readtwo.o' libtool=no @AMDEPBACKSLASH@ +@AMDEP_TRUE@@am__fastdepCC_FALSE@ DEPDIR=$(DEPDIR) $(CCDEPMODE) $(depcomp) @AMDEPBACKSLASH@ +@am__fastdepCC_FALSE@ $(AM_V_CC@am__nodep@)$(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) $(gsnapl_avx512_CFLAGS) $(CFLAGS) -c -o gsnapl_avx512-bitpack64-readtwo.o `test -f 'bitpack64-readtwo.c' || echo '$(srcdir)/'`bitpack64-readtwo.c + +gsnapl_avx512-bitpack64-readtwo.obj: bitpack64-readtwo.c +@am__fastdepCC_TRUE@ $(AM_V_CC)$(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) $(gsnapl_avx512_CFLAGS) $(CFLAGS) -MT gsnapl_avx512-bitpack64-readtwo.obj -MD -MP -MF $(DEPDIR)/gsnapl_avx512-bitpack64-readtwo.Tpo -c -o gsnapl_avx512-bitpack64-readtwo.obj `if test -f 'bitpack64-readtwo.c'; then $(CYGPATH_W) 'bitpack64-readtwo.c'; else $(CYGPATH_W) '$(srcdir)/bitpack64-readtwo.c'; fi` +@am__fastdepCC_TRUE@ $(AM_V_at)$(am__mv) $(DEPDIR)/gsnapl_avx512-bitpack64-readtwo.Tpo $(DEPDIR)/gsnapl_avx512-bitpack64-readtwo.Po +@AMDEP_TRUE@@am__fastdepCC_FALSE@ $(AM_V_CC)source='bitpack64-readtwo.c' object='gsnapl_avx512-bitpack64-readtwo.obj' libtool=no @AMDEPBACKSLASH@ +@AMDEP_TRUE@@am__fastdepCC_FALSE@ DEPDIR=$(DEPDIR) $(CCDEPMODE) $(depcomp) @AMDEPBACKSLASH@ +@am__fastdepCC_FALSE@ $(AM_V_CC@am__nodep@)$(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) $(gsnapl_avx512_CFLAGS) $(CFLAGS) -c -o gsnapl_avx512-bitpack64-readtwo.obj `if test -f 'bitpack64-readtwo.c'; then $(CYGPATH_W) 'bitpack64-readtwo.c'; else $(CYGPATH_W) '$(srcdir)/bitpack64-readtwo.c'; fi` + +gsnapl_avx512-merge.o: merge.c +@am__fastdepCC_TRUE@ $(AM_V_CC)$(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) $(gsnapl_avx512_CFLAGS) $(CFLAGS) -MT gsnapl_avx512-merge.o -MD -MP -MF $(DEPDIR)/gsnapl_avx512-merge.Tpo -c -o gsnapl_avx512-merge.o `test -f 'merge.c' || echo '$(srcdir)/'`merge.c +@am__fastdepCC_TRUE@ $(AM_V_at)$(am__mv) $(DEPDIR)/gsnapl_avx512-merge.Tpo $(DEPDIR)/gsnapl_avx512-merge.Po +@AMDEP_TRUE@@am__fastdepCC_FALSE@ $(AM_V_CC)source='merge.c' object='gsnapl_avx512-merge.o' libtool=no @AMDEPBACKSLASH@ +@AMDEP_TRUE@@am__fastdepCC_FALSE@ DEPDIR=$(DEPDIR) $(CCDEPMODE) $(depcomp) @AMDEPBACKSLASH@ +@am__fastdepCC_FALSE@ $(AM_V_CC@am__nodep@)$(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) $(gsnapl_avx512_CFLAGS) $(CFLAGS) -c -o gsnapl_avx512-merge.o `test -f 'merge.c' || echo '$(srcdir)/'`merge.c + +gsnapl_avx512-merge.obj: merge.c +@am__fastdepCC_TRUE@ $(AM_V_CC)$(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) $(gsnapl_avx512_CFLAGS) $(CFLAGS) -MT gsnapl_avx512-merge.obj -MD -MP -MF $(DEPDIR)/gsnapl_avx512-merge.Tpo -c -o gsnapl_avx512-merge.obj `if test -f 'merge.c'; then $(CYGPATH_W) 'merge.c'; else $(CYGPATH_W) '$(srcdir)/merge.c'; fi` +@am__fastdepCC_TRUE@ $(AM_V_at)$(am__mv) $(DEPDIR)/gsnapl_avx512-merge.Tpo $(DEPDIR)/gsnapl_avx512-merge.Po +@AMDEP_TRUE@@am__fastdepCC_FALSE@ $(AM_V_CC)source='merge.c' object='gsnapl_avx512-merge.obj' libtool=no @AMDEPBACKSLASH@ +@AMDEP_TRUE@@am__fastdepCC_FALSE@ DEPDIR=$(DEPDIR) $(CCDEPMODE) $(depcomp) @AMDEPBACKSLASH@ +@am__fastdepCC_FALSE@ $(AM_V_CC@am__nodep@)$(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) $(gsnapl_avx512_CFLAGS) $(CFLAGS) -c -o gsnapl_avx512-merge.obj `if test -f 'merge.c'; then $(CYGPATH_W) 'merge.c'; else $(CYGPATH_W) '$(srcdir)/merge.c'; fi` + +gsnapl_avx512-indexdb.o: indexdb.c +@am__fastdepCC_TRUE@ $(AM_V_CC)$(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) $(gsnapl_avx512_CFLAGS) $(CFLAGS) -MT gsnapl_avx512-indexdb.o -MD -MP -MF $(DEPDIR)/gsnapl_avx512-indexdb.Tpo -c -o gsnapl_avx512-indexdb.o `test -f 'indexdb.c' || echo '$(srcdir)/'`indexdb.c +@am__fastdepCC_TRUE@ $(AM_V_at)$(am__mv) $(DEPDIR)/gsnapl_avx512-indexdb.Tpo $(DEPDIR)/gsnapl_avx512-indexdb.Po +@AMDEP_TRUE@@am__fastdepCC_FALSE@ $(AM_V_CC)source='indexdb.c' object='gsnapl_avx512-indexdb.o' libtool=no @AMDEPBACKSLASH@ +@AMDEP_TRUE@@am__fastdepCC_FALSE@ DEPDIR=$(DEPDIR) $(CCDEPMODE) $(depcomp) @AMDEPBACKSLASH@ +@am__fastdepCC_FALSE@ $(AM_V_CC@am__nodep@)$(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) $(gsnapl_avx512_CFLAGS) $(CFLAGS) -c -o gsnapl_avx512-indexdb.o `test -f 'indexdb.c' || echo '$(srcdir)/'`indexdb.c + +gsnapl_avx512-indexdb.obj: indexdb.c +@am__fastdepCC_TRUE@ $(AM_V_CC)$(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) $(gsnapl_avx512_CFLAGS) $(CFLAGS) -MT gsnapl_avx512-indexdb.obj -MD -MP -MF $(DEPDIR)/gsnapl_avx512-indexdb.Tpo -c -o gsnapl_avx512-indexdb.obj `if test -f 'indexdb.c'; then $(CYGPATH_W) 'indexdb.c'; else $(CYGPATH_W) '$(srcdir)/indexdb.c'; fi` +@am__fastdepCC_TRUE@ $(AM_V_at)$(am__mv) $(DEPDIR)/gsnapl_avx512-indexdb.Tpo $(DEPDIR)/gsnapl_avx512-indexdb.Po +@AMDEP_TRUE@@am__fastdepCC_FALSE@ $(AM_V_CC)source='indexdb.c' object='gsnapl_avx512-indexdb.obj' libtool=no @AMDEPBACKSLASH@ +@AMDEP_TRUE@@am__fastdepCC_FALSE@ DEPDIR=$(DEPDIR) $(CCDEPMODE) $(depcomp) @AMDEPBACKSLASH@ +@am__fastdepCC_FALSE@ $(AM_V_CC@am__nodep@)$(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) $(gsnapl_avx512_CFLAGS) $(CFLAGS) -c -o gsnapl_avx512-indexdb.obj `if test -f 'indexdb.c'; then $(CYGPATH_W) 'indexdb.c'; else $(CYGPATH_W) '$(srcdir)/indexdb.c'; fi` + +gsnapl_avx512-indexdb_hr.o: indexdb_hr.c +@am__fastdepCC_TRUE@ $(AM_V_CC)$(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) $(gsnapl_avx512_CFLAGS) $(CFLAGS) -MT gsnapl_avx512-indexdb_hr.o -MD -MP -MF $(DEPDIR)/gsnapl_avx512-indexdb_hr.Tpo -c -o gsnapl_avx512-indexdb_hr.o `test -f 'indexdb_hr.c' || echo '$(srcdir)/'`indexdb_hr.c +@am__fastdepCC_TRUE@ $(AM_V_at)$(am__mv) $(DEPDIR)/gsnapl_avx512-indexdb_hr.Tpo $(DEPDIR)/gsnapl_avx512-indexdb_hr.Po +@AMDEP_TRUE@@am__fastdepCC_FALSE@ $(AM_V_CC)source='indexdb_hr.c' object='gsnapl_avx512-indexdb_hr.o' libtool=no @AMDEPBACKSLASH@ +@AMDEP_TRUE@@am__fastdepCC_FALSE@ DEPDIR=$(DEPDIR) $(CCDEPMODE) $(depcomp) @AMDEPBACKSLASH@ +@am__fastdepCC_FALSE@ $(AM_V_CC@am__nodep@)$(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) $(gsnapl_avx512_CFLAGS) $(CFLAGS) -c -o gsnapl_avx512-indexdb_hr.o `test -f 'indexdb_hr.c' || echo '$(srcdir)/'`indexdb_hr.c + +gsnapl_avx512-indexdb_hr.obj: indexdb_hr.c +@am__fastdepCC_TRUE@ $(AM_V_CC)$(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) $(gsnapl_avx512_CFLAGS) $(CFLAGS) -MT gsnapl_avx512-indexdb_hr.obj -MD -MP -MF $(DEPDIR)/gsnapl_avx512-indexdb_hr.Tpo -c -o gsnapl_avx512-indexdb_hr.obj `if test -f 'indexdb_hr.c'; then $(CYGPATH_W) 'indexdb_hr.c'; else $(CYGPATH_W) '$(srcdir)/indexdb_hr.c'; fi` +@am__fastdepCC_TRUE@ $(AM_V_at)$(am__mv) $(DEPDIR)/gsnapl_avx512-indexdb_hr.Tpo $(DEPDIR)/gsnapl_avx512-indexdb_hr.Po +@AMDEP_TRUE@@am__fastdepCC_FALSE@ $(AM_V_CC)source='indexdb_hr.c' object='gsnapl_avx512-indexdb_hr.obj' libtool=no @AMDEPBACKSLASH@ +@AMDEP_TRUE@@am__fastdepCC_FALSE@ DEPDIR=$(DEPDIR) $(CCDEPMODE) $(depcomp) @AMDEPBACKSLASH@ +@am__fastdepCC_FALSE@ $(AM_V_CC@am__nodep@)$(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) $(gsnapl_avx512_CFLAGS) $(CFLAGS) -c -o gsnapl_avx512-indexdb_hr.obj `if test -f 'indexdb_hr.c'; then $(CYGPATH_W) 'indexdb_hr.c'; else $(CYGPATH_W) '$(srcdir)/indexdb_hr.c'; fi` + +gsnapl_avx512-oligo.o: oligo.c +@am__fastdepCC_TRUE@ $(AM_V_CC)$(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) $(gsnapl_avx512_CFLAGS) $(CFLAGS) -MT gsnapl_avx512-oligo.o -MD -MP -MF $(DEPDIR)/gsnapl_avx512-oligo.Tpo -c -o gsnapl_avx512-oligo.o `test -f 'oligo.c' || echo '$(srcdir)/'`oligo.c +@am__fastdepCC_TRUE@ $(AM_V_at)$(am__mv) $(DEPDIR)/gsnapl_avx512-oligo.Tpo $(DEPDIR)/gsnapl_avx512-oligo.Po +@AMDEP_TRUE@@am__fastdepCC_FALSE@ $(AM_V_CC)source='oligo.c' object='gsnapl_avx512-oligo.o' libtool=no @AMDEPBACKSLASH@ +@AMDEP_TRUE@@am__fastdepCC_FALSE@ DEPDIR=$(DEPDIR) $(CCDEPMODE) $(depcomp) @AMDEPBACKSLASH@ +@am__fastdepCC_FALSE@ $(AM_V_CC@am__nodep@)$(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) $(gsnapl_avx512_CFLAGS) $(CFLAGS) -c -o gsnapl_avx512-oligo.o `test -f 'oligo.c' || echo '$(srcdir)/'`oligo.c + +gsnapl_avx512-oligo.obj: oligo.c +@am__fastdepCC_TRUE@ $(AM_V_CC)$(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) $(gsnapl_avx512_CFLAGS) $(CFLAGS) -MT gsnapl_avx512-oligo.obj -MD -MP -MF $(DEPDIR)/gsnapl_avx512-oligo.Tpo -c -o gsnapl_avx512-oligo.obj `if test -f 'oligo.c'; then $(CYGPATH_W) 'oligo.c'; else $(CYGPATH_W) '$(srcdir)/oligo.c'; fi` +@am__fastdepCC_TRUE@ $(AM_V_at)$(am__mv) $(DEPDIR)/gsnapl_avx512-oligo.Tpo $(DEPDIR)/gsnapl_avx512-oligo.Po +@AMDEP_TRUE@@am__fastdepCC_FALSE@ $(AM_V_CC)source='oligo.c' object='gsnapl_avx512-oligo.obj' libtool=no @AMDEPBACKSLASH@ +@AMDEP_TRUE@@am__fastdepCC_FALSE@ DEPDIR=$(DEPDIR) $(CCDEPMODE) $(depcomp) @AMDEPBACKSLASH@ +@am__fastdepCC_FALSE@ $(AM_V_CC@am__nodep@)$(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) $(gsnapl_avx512_CFLAGS) $(CFLAGS) -c -o gsnapl_avx512-oligo.obj `if test -f 'oligo.c'; then $(CYGPATH_W) 'oligo.c'; else $(CYGPATH_W) '$(srcdir)/oligo.c'; fi` + +gsnapl_avx512-chrom.o: chrom.c +@am__fastdepCC_TRUE@ $(AM_V_CC)$(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) $(gsnapl_avx512_CFLAGS) $(CFLAGS) -MT gsnapl_avx512-chrom.o -MD -MP -MF $(DEPDIR)/gsnapl_avx512-chrom.Tpo -c -o gsnapl_avx512-chrom.o `test -f 'chrom.c' || echo '$(srcdir)/'`chrom.c +@am__fastdepCC_TRUE@ $(AM_V_at)$(am__mv) $(DEPDIR)/gsnapl_avx512-chrom.Tpo $(DEPDIR)/gsnapl_avx512-chrom.Po +@AMDEP_TRUE@@am__fastdepCC_FALSE@ $(AM_V_CC)source='chrom.c' object='gsnapl_avx512-chrom.o' libtool=no @AMDEPBACKSLASH@ +@AMDEP_TRUE@@am__fastdepCC_FALSE@ DEPDIR=$(DEPDIR) $(CCDEPMODE) $(depcomp) @AMDEPBACKSLASH@ +@am__fastdepCC_FALSE@ $(AM_V_CC@am__nodep@)$(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) $(gsnapl_avx512_CFLAGS) $(CFLAGS) -c -o gsnapl_avx512-chrom.o `test -f 'chrom.c' || echo '$(srcdir)/'`chrom.c + +gsnapl_avx512-chrom.obj: chrom.c +@am__fastdepCC_TRUE@ $(AM_V_CC)$(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) $(gsnapl_avx512_CFLAGS) $(CFLAGS) -MT gsnapl_avx512-chrom.obj -MD -MP -MF $(DEPDIR)/gsnapl_avx512-chrom.Tpo -c -o gsnapl_avx512-chrom.obj `if test -f 'chrom.c'; then $(CYGPATH_W) 'chrom.c'; else $(CYGPATH_W) '$(srcdir)/chrom.c'; fi` +@am__fastdepCC_TRUE@ $(AM_V_at)$(am__mv) $(DEPDIR)/gsnapl_avx512-chrom.Tpo $(DEPDIR)/gsnapl_avx512-chrom.Po +@AMDEP_TRUE@@am__fastdepCC_FALSE@ $(AM_V_CC)source='chrom.c' object='gsnapl_avx512-chrom.obj' libtool=no @AMDEPBACKSLASH@ +@AMDEP_TRUE@@am__fastdepCC_FALSE@ DEPDIR=$(DEPDIR) $(CCDEPMODE) $(depcomp) @AMDEPBACKSLASH@ +@am__fastdepCC_FALSE@ $(AM_V_CC@am__nodep@)$(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) $(gsnapl_avx512_CFLAGS) $(CFLAGS) -c -o gsnapl_avx512-chrom.obj `if test -f 'chrom.c'; then $(CYGPATH_W) 'chrom.c'; else $(CYGPATH_W) '$(srcdir)/chrom.c'; fi` + +gsnapl_avx512-segmentpos.o: segmentpos.c +@am__fastdepCC_TRUE@ $(AM_V_CC)$(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) $(gsnapl_avx512_CFLAGS) $(CFLAGS) -MT gsnapl_avx512-segmentpos.o -MD -MP -MF $(DEPDIR)/gsnapl_avx512-segmentpos.Tpo -c -o gsnapl_avx512-segmentpos.o `test -f 'segmentpos.c' || echo '$(srcdir)/'`segmentpos.c +@am__fastdepCC_TRUE@ $(AM_V_at)$(am__mv) $(DEPDIR)/gsnapl_avx512-segmentpos.Tpo $(DEPDIR)/gsnapl_avx512-segmentpos.Po +@AMDEP_TRUE@@am__fastdepCC_FALSE@ $(AM_V_CC)source='segmentpos.c' object='gsnapl_avx512-segmentpos.o' libtool=no @AMDEPBACKSLASH@ +@AMDEP_TRUE@@am__fastdepCC_FALSE@ DEPDIR=$(DEPDIR) $(CCDEPMODE) $(depcomp) @AMDEPBACKSLASH@ +@am__fastdepCC_FALSE@ $(AM_V_CC@am__nodep@)$(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) $(gsnapl_avx512_CFLAGS) $(CFLAGS) -c -o gsnapl_avx512-segmentpos.o `test -f 'segmentpos.c' || echo '$(srcdir)/'`segmentpos.c + +gsnapl_avx512-segmentpos.obj: segmentpos.c +@am__fastdepCC_TRUE@ $(AM_V_CC)$(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) $(gsnapl_avx512_CFLAGS) $(CFLAGS) -MT gsnapl_avx512-segmentpos.obj -MD -MP -MF $(DEPDIR)/gsnapl_avx512-segmentpos.Tpo -c -o gsnapl_avx512-segmentpos.obj `if test -f 'segmentpos.c'; then $(CYGPATH_W) 'segmentpos.c'; else $(CYGPATH_W) '$(srcdir)/segmentpos.c'; fi` +@am__fastdepCC_TRUE@ $(AM_V_at)$(am__mv) $(DEPDIR)/gsnapl_avx512-segmentpos.Tpo $(DEPDIR)/gsnapl_avx512-segmentpos.Po +@AMDEP_TRUE@@am__fastdepCC_FALSE@ $(AM_V_CC)source='segmentpos.c' object='gsnapl_avx512-segmentpos.obj' libtool=no @AMDEPBACKSLASH@ +@AMDEP_TRUE@@am__fastdepCC_FALSE@ DEPDIR=$(DEPDIR) $(CCDEPMODE) $(depcomp) @AMDEPBACKSLASH@ +@am__fastdepCC_FALSE@ $(AM_V_CC@am__nodep@)$(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) $(gsnapl_avx512_CFLAGS) $(CFLAGS) -c -o gsnapl_avx512-segmentpos.obj `if test -f 'segmentpos.c'; then $(CYGPATH_W) 'segmentpos.c'; else $(CYGPATH_W) '$(srcdir)/segmentpos.c'; fi` + +gsnapl_avx512-chrnum.o: chrnum.c +@am__fastdepCC_TRUE@ $(AM_V_CC)$(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) $(gsnapl_avx512_CFLAGS) $(CFLAGS) -MT gsnapl_avx512-chrnum.o -MD -MP -MF $(DEPDIR)/gsnapl_avx512-chrnum.Tpo -c -o gsnapl_avx512-chrnum.o `test -f 'chrnum.c' || echo '$(srcdir)/'`chrnum.c +@am__fastdepCC_TRUE@ $(AM_V_at)$(am__mv) $(DEPDIR)/gsnapl_avx512-chrnum.Tpo $(DEPDIR)/gsnapl_avx512-chrnum.Po +@AMDEP_TRUE@@am__fastdepCC_FALSE@ $(AM_V_CC)source='chrnum.c' object='gsnapl_avx512-chrnum.o' libtool=no @AMDEPBACKSLASH@ +@AMDEP_TRUE@@am__fastdepCC_FALSE@ DEPDIR=$(DEPDIR) $(CCDEPMODE) $(depcomp) @AMDEPBACKSLASH@ +@am__fastdepCC_FALSE@ $(AM_V_CC@am__nodep@)$(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) $(gsnapl_avx512_CFLAGS) $(CFLAGS) -c -o gsnapl_avx512-chrnum.o `test -f 'chrnum.c' || echo '$(srcdir)/'`chrnum.c + +gsnapl_avx512-chrnum.obj: chrnum.c +@am__fastdepCC_TRUE@ $(AM_V_CC)$(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) $(gsnapl_avx512_CFLAGS) $(CFLAGS) -MT gsnapl_avx512-chrnum.obj -MD -MP -MF $(DEPDIR)/gsnapl_avx512-chrnum.Tpo -c -o gsnapl_avx512-chrnum.obj `if test -f 'chrnum.c'; then $(CYGPATH_W) 'chrnum.c'; else $(CYGPATH_W) '$(srcdir)/chrnum.c'; fi` +@am__fastdepCC_TRUE@ $(AM_V_at)$(am__mv) $(DEPDIR)/gsnapl_avx512-chrnum.Tpo $(DEPDIR)/gsnapl_avx512-chrnum.Po +@AMDEP_TRUE@@am__fastdepCC_FALSE@ $(AM_V_CC)source='chrnum.c' object='gsnapl_avx512-chrnum.obj' libtool=no @AMDEPBACKSLASH@ +@AMDEP_TRUE@@am__fastdepCC_FALSE@ DEPDIR=$(DEPDIR) $(CCDEPMODE) $(depcomp) @AMDEPBACKSLASH@ +@am__fastdepCC_FALSE@ $(AM_V_CC@am__nodep@)$(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) $(gsnapl_avx512_CFLAGS) $(CFLAGS) -c -o gsnapl_avx512-chrnum.obj `if test -f 'chrnum.c'; then $(CYGPATH_W) 'chrnum.c'; else $(CYGPATH_W) '$(srcdir)/chrnum.c'; fi` + +gsnapl_avx512-maxent_hr.o: maxent_hr.c +@am__fastdepCC_TRUE@ $(AM_V_CC)$(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) $(gsnapl_avx512_CFLAGS) $(CFLAGS) -MT gsnapl_avx512-maxent_hr.o -MD -MP -MF $(DEPDIR)/gsnapl_avx512-maxent_hr.Tpo -c -o gsnapl_avx512-maxent_hr.o `test -f 'maxent_hr.c' || echo '$(srcdir)/'`maxent_hr.c +@am__fastdepCC_TRUE@ $(AM_V_at)$(am__mv) $(DEPDIR)/gsnapl_avx512-maxent_hr.Tpo $(DEPDIR)/gsnapl_avx512-maxent_hr.Po +@AMDEP_TRUE@@am__fastdepCC_FALSE@ $(AM_V_CC)source='maxent_hr.c' object='gsnapl_avx512-maxent_hr.o' libtool=no @AMDEPBACKSLASH@ +@AMDEP_TRUE@@am__fastdepCC_FALSE@ DEPDIR=$(DEPDIR) $(CCDEPMODE) $(depcomp) @AMDEPBACKSLASH@ +@am__fastdepCC_FALSE@ $(AM_V_CC@am__nodep@)$(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) $(gsnapl_avx512_CFLAGS) $(CFLAGS) -c -o gsnapl_avx512-maxent_hr.o `test -f 'maxent_hr.c' || echo '$(srcdir)/'`maxent_hr.c + +gsnapl_avx512-maxent_hr.obj: maxent_hr.c +@am__fastdepCC_TRUE@ $(AM_V_CC)$(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) $(gsnapl_avx512_CFLAGS) $(CFLAGS) -MT gsnapl_avx512-maxent_hr.obj -MD -MP -MF $(DEPDIR)/gsnapl_avx512-maxent_hr.Tpo -c -o gsnapl_avx512-maxent_hr.obj `if test -f 'maxent_hr.c'; then $(CYGPATH_W) 'maxent_hr.c'; else $(CYGPATH_W) '$(srcdir)/maxent_hr.c'; fi` +@am__fastdepCC_TRUE@ $(AM_V_at)$(am__mv) $(DEPDIR)/gsnapl_avx512-maxent_hr.Tpo $(DEPDIR)/gsnapl_avx512-maxent_hr.Po +@AMDEP_TRUE@@am__fastdepCC_FALSE@ $(AM_V_CC)source='maxent_hr.c' object='gsnapl_avx512-maxent_hr.obj' libtool=no @AMDEPBACKSLASH@ +@AMDEP_TRUE@@am__fastdepCC_FALSE@ DEPDIR=$(DEPDIR) $(CCDEPMODE) $(depcomp) @AMDEPBACKSLASH@ +@am__fastdepCC_FALSE@ $(AM_V_CC@am__nodep@)$(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) $(gsnapl_avx512_CFLAGS) $(CFLAGS) -c -o gsnapl_avx512-maxent_hr.obj `if test -f 'maxent_hr.c'; then $(CYGPATH_W) 'maxent_hr.c'; else $(CYGPATH_W) '$(srcdir)/maxent_hr.c'; fi` + +gsnapl_avx512-samprint.o: samprint.c +@am__fastdepCC_TRUE@ $(AM_V_CC)$(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) $(gsnapl_avx512_CFLAGS) $(CFLAGS) -MT gsnapl_avx512-samprint.o -MD -MP -MF $(DEPDIR)/gsnapl_avx512-samprint.Tpo -c -o gsnapl_avx512-samprint.o `test -f 'samprint.c' || echo '$(srcdir)/'`samprint.c +@am__fastdepCC_TRUE@ $(AM_V_at)$(am__mv) $(DEPDIR)/gsnapl_avx512-samprint.Tpo $(DEPDIR)/gsnapl_avx512-samprint.Po +@AMDEP_TRUE@@am__fastdepCC_FALSE@ $(AM_V_CC)source='samprint.c' object='gsnapl_avx512-samprint.o' libtool=no @AMDEPBACKSLASH@ +@AMDEP_TRUE@@am__fastdepCC_FALSE@ DEPDIR=$(DEPDIR) $(CCDEPMODE) $(depcomp) @AMDEPBACKSLASH@ +@am__fastdepCC_FALSE@ $(AM_V_CC@am__nodep@)$(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) $(gsnapl_avx512_CFLAGS) $(CFLAGS) -c -o gsnapl_avx512-samprint.o `test -f 'samprint.c' || echo '$(srcdir)/'`samprint.c + +gsnapl_avx512-samprint.obj: samprint.c +@am__fastdepCC_TRUE@ $(AM_V_CC)$(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) $(gsnapl_avx512_CFLAGS) $(CFLAGS) -MT gsnapl_avx512-samprint.obj -MD -MP -MF $(DEPDIR)/gsnapl_avx512-samprint.Tpo -c -o gsnapl_avx512-samprint.obj `if test -f 'samprint.c'; then $(CYGPATH_W) 'samprint.c'; else $(CYGPATH_W) '$(srcdir)/samprint.c'; fi` +@am__fastdepCC_TRUE@ $(AM_V_at)$(am__mv) $(DEPDIR)/gsnapl_avx512-samprint.Tpo $(DEPDIR)/gsnapl_avx512-samprint.Po +@AMDEP_TRUE@@am__fastdepCC_FALSE@ $(AM_V_CC)source='samprint.c' object='gsnapl_avx512-samprint.obj' libtool=no @AMDEPBACKSLASH@ +@AMDEP_TRUE@@am__fastdepCC_FALSE@ DEPDIR=$(DEPDIR) $(CCDEPMODE) $(depcomp) @AMDEPBACKSLASH@ +@am__fastdepCC_FALSE@ $(AM_V_CC@am__nodep@)$(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) $(gsnapl_avx512_CFLAGS) $(CFLAGS) -c -o gsnapl_avx512-samprint.obj `if test -f 'samprint.c'; then $(CYGPATH_W) 'samprint.c'; else $(CYGPATH_W) '$(srcdir)/samprint.c'; fi` + +gsnapl_avx512-mapq.o: mapq.c +@am__fastdepCC_TRUE@ $(AM_V_CC)$(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) $(gsnapl_avx512_CFLAGS) $(CFLAGS) -MT gsnapl_avx512-mapq.o -MD -MP -MF $(DEPDIR)/gsnapl_avx512-mapq.Tpo -c -o gsnapl_avx512-mapq.o `test -f 'mapq.c' || echo '$(srcdir)/'`mapq.c +@am__fastdepCC_TRUE@ $(AM_V_at)$(am__mv) $(DEPDIR)/gsnapl_avx512-mapq.Tpo $(DEPDIR)/gsnapl_avx512-mapq.Po +@AMDEP_TRUE@@am__fastdepCC_FALSE@ $(AM_V_CC)source='mapq.c' object='gsnapl_avx512-mapq.o' libtool=no @AMDEPBACKSLASH@ +@AMDEP_TRUE@@am__fastdepCC_FALSE@ DEPDIR=$(DEPDIR) $(CCDEPMODE) $(depcomp) @AMDEPBACKSLASH@ +@am__fastdepCC_FALSE@ $(AM_V_CC@am__nodep@)$(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) $(gsnapl_avx512_CFLAGS) $(CFLAGS) -c -o gsnapl_avx512-mapq.o `test -f 'mapq.c' || echo '$(srcdir)/'`mapq.c + +gsnapl_avx512-mapq.obj: mapq.c +@am__fastdepCC_TRUE@ $(AM_V_CC)$(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) $(gsnapl_avx512_CFLAGS) $(CFLAGS) -MT gsnapl_avx512-mapq.obj -MD -MP -MF $(DEPDIR)/gsnapl_avx512-mapq.Tpo -c -o gsnapl_avx512-mapq.obj `if test -f 'mapq.c'; then $(CYGPATH_W) 'mapq.c'; else $(CYGPATH_W) '$(srcdir)/mapq.c'; fi` +@am__fastdepCC_TRUE@ $(AM_V_at)$(am__mv) $(DEPDIR)/gsnapl_avx512-mapq.Tpo $(DEPDIR)/gsnapl_avx512-mapq.Po +@AMDEP_TRUE@@am__fastdepCC_FALSE@ $(AM_V_CC)source='mapq.c' object='gsnapl_avx512-mapq.obj' libtool=no @AMDEPBACKSLASH@ +@AMDEP_TRUE@@am__fastdepCC_FALSE@ DEPDIR=$(DEPDIR) $(CCDEPMODE) $(depcomp) @AMDEPBACKSLASH@ +@am__fastdepCC_FALSE@ $(AM_V_CC@am__nodep@)$(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) $(gsnapl_avx512_CFLAGS) $(CFLAGS) -c -o gsnapl_avx512-mapq.obj `if test -f 'mapq.c'; then $(CYGPATH_W) 'mapq.c'; else $(CYGPATH_W) '$(srcdir)/mapq.c'; fi` + +gsnapl_avx512-shortread.o: shortread.c +@am__fastdepCC_TRUE@ $(AM_V_CC)$(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) $(gsnapl_avx512_CFLAGS) $(CFLAGS) -MT gsnapl_avx512-shortread.o -MD -MP -MF $(DEPDIR)/gsnapl_avx512-shortread.Tpo -c -o gsnapl_avx512-shortread.o `test -f 'shortread.c' || echo '$(srcdir)/'`shortread.c +@am__fastdepCC_TRUE@ $(AM_V_at)$(am__mv) $(DEPDIR)/gsnapl_avx512-shortread.Tpo $(DEPDIR)/gsnapl_avx512-shortread.Po +@AMDEP_TRUE@@am__fastdepCC_FALSE@ $(AM_V_CC)source='shortread.c' object='gsnapl_avx512-shortread.o' libtool=no @AMDEPBACKSLASH@ +@AMDEP_TRUE@@am__fastdepCC_FALSE@ DEPDIR=$(DEPDIR) $(CCDEPMODE) $(depcomp) @AMDEPBACKSLASH@ +@am__fastdepCC_FALSE@ $(AM_V_CC@am__nodep@)$(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) $(gsnapl_avx512_CFLAGS) $(CFLAGS) -c -o gsnapl_avx512-shortread.o `test -f 'shortread.c' || echo '$(srcdir)/'`shortread.c + +gsnapl_avx512-shortread.obj: shortread.c +@am__fastdepCC_TRUE@ $(AM_V_CC)$(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) $(gsnapl_avx512_CFLAGS) $(CFLAGS) -MT gsnapl_avx512-shortread.obj -MD -MP -MF $(DEPDIR)/gsnapl_avx512-shortread.Tpo -c -o gsnapl_avx512-shortread.obj `if test -f 'shortread.c'; then $(CYGPATH_W) 'shortread.c'; else $(CYGPATH_W) '$(srcdir)/shortread.c'; fi` +@am__fastdepCC_TRUE@ $(AM_V_at)$(am__mv) $(DEPDIR)/gsnapl_avx512-shortread.Tpo $(DEPDIR)/gsnapl_avx512-shortread.Po +@AMDEP_TRUE@@am__fastdepCC_FALSE@ $(AM_V_CC)source='shortread.c' object='gsnapl_avx512-shortread.obj' libtool=no @AMDEPBACKSLASH@ +@AMDEP_TRUE@@am__fastdepCC_FALSE@ DEPDIR=$(DEPDIR) $(CCDEPMODE) $(depcomp) @AMDEPBACKSLASH@ +@am__fastdepCC_FALSE@ $(AM_V_CC@am__nodep@)$(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) $(gsnapl_avx512_CFLAGS) $(CFLAGS) -c -o gsnapl_avx512-shortread.obj `if test -f 'shortread.c'; then $(CYGPATH_W) 'shortread.c'; else $(CYGPATH_W) '$(srcdir)/shortread.c'; fi` + +gsnapl_avx512-substring.o: substring.c +@am__fastdepCC_TRUE@ $(AM_V_CC)$(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) $(gsnapl_avx512_CFLAGS) $(CFLAGS) -MT gsnapl_avx512-substring.o -MD -MP -MF $(DEPDIR)/gsnapl_avx512-substring.Tpo -c -o gsnapl_avx512-substring.o `test -f 'substring.c' || echo '$(srcdir)/'`substring.c +@am__fastdepCC_TRUE@ $(AM_V_at)$(am__mv) $(DEPDIR)/gsnapl_avx512-substring.Tpo $(DEPDIR)/gsnapl_avx512-substring.Po +@AMDEP_TRUE@@am__fastdepCC_FALSE@ $(AM_V_CC)source='substring.c' object='gsnapl_avx512-substring.o' libtool=no @AMDEPBACKSLASH@ +@AMDEP_TRUE@@am__fastdepCC_FALSE@ DEPDIR=$(DEPDIR) $(CCDEPMODE) $(depcomp) @AMDEPBACKSLASH@ +@am__fastdepCC_FALSE@ $(AM_V_CC@am__nodep@)$(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) $(gsnapl_avx512_CFLAGS) $(CFLAGS) -c -o gsnapl_avx512-substring.o `test -f 'substring.c' || echo '$(srcdir)/'`substring.c + +gsnapl_avx512-substring.obj: substring.c +@am__fastdepCC_TRUE@ $(AM_V_CC)$(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) $(gsnapl_avx512_CFLAGS) $(CFLAGS) -MT gsnapl_avx512-substring.obj -MD -MP -MF $(DEPDIR)/gsnapl_avx512-substring.Tpo -c -o gsnapl_avx512-substring.obj `if test -f 'substring.c'; then $(CYGPATH_W) 'substring.c'; else $(CYGPATH_W) '$(srcdir)/substring.c'; fi` +@am__fastdepCC_TRUE@ $(AM_V_at)$(am__mv) $(DEPDIR)/gsnapl_avx512-substring.Tpo $(DEPDIR)/gsnapl_avx512-substring.Po +@AMDEP_TRUE@@am__fastdepCC_FALSE@ $(AM_V_CC)source='substring.c' object='gsnapl_avx512-substring.obj' libtool=no @AMDEPBACKSLASH@ +@AMDEP_TRUE@@am__fastdepCC_FALSE@ DEPDIR=$(DEPDIR) $(CCDEPMODE) $(depcomp) @AMDEPBACKSLASH@ +@am__fastdepCC_FALSE@ $(AM_V_CC@am__nodep@)$(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) $(gsnapl_avx512_CFLAGS) $(CFLAGS) -c -o gsnapl_avx512-substring.obj `if test -f 'substring.c'; then $(CYGPATH_W) 'substring.c'; else $(CYGPATH_W) '$(srcdir)/substring.c'; fi` + +gsnapl_avx512-junction.o: junction.c +@am__fastdepCC_TRUE@ $(AM_V_CC)$(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) $(gsnapl_avx512_CFLAGS) $(CFLAGS) -MT gsnapl_avx512-junction.o -MD -MP -MF $(DEPDIR)/gsnapl_avx512-junction.Tpo -c -o gsnapl_avx512-junction.o `test -f 'junction.c' || echo '$(srcdir)/'`junction.c +@am__fastdepCC_TRUE@ $(AM_V_at)$(am__mv) $(DEPDIR)/gsnapl_avx512-junction.Tpo $(DEPDIR)/gsnapl_avx512-junction.Po +@AMDEP_TRUE@@am__fastdepCC_FALSE@ $(AM_V_CC)source='junction.c' object='gsnapl_avx512-junction.o' libtool=no @AMDEPBACKSLASH@ +@AMDEP_TRUE@@am__fastdepCC_FALSE@ DEPDIR=$(DEPDIR) $(CCDEPMODE) $(depcomp) @AMDEPBACKSLASH@ +@am__fastdepCC_FALSE@ $(AM_V_CC@am__nodep@)$(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) $(gsnapl_avx512_CFLAGS) $(CFLAGS) -c -o gsnapl_avx512-junction.o `test -f 'junction.c' || echo '$(srcdir)/'`junction.c + +gsnapl_avx512-junction.obj: junction.c +@am__fastdepCC_TRUE@ $(AM_V_CC)$(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) $(gsnapl_avx512_CFLAGS) $(CFLAGS) -MT gsnapl_avx512-junction.obj -MD -MP -MF $(DEPDIR)/gsnapl_avx512-junction.Tpo -c -o gsnapl_avx512-junction.obj `if test -f 'junction.c'; then $(CYGPATH_W) 'junction.c'; else $(CYGPATH_W) '$(srcdir)/junction.c'; fi` +@am__fastdepCC_TRUE@ $(AM_V_at)$(am__mv) $(DEPDIR)/gsnapl_avx512-junction.Tpo $(DEPDIR)/gsnapl_avx512-junction.Po +@AMDEP_TRUE@@am__fastdepCC_FALSE@ $(AM_V_CC)source='junction.c' object='gsnapl_avx512-junction.obj' libtool=no @AMDEPBACKSLASH@ +@AMDEP_TRUE@@am__fastdepCC_FALSE@ DEPDIR=$(DEPDIR) $(CCDEPMODE) $(depcomp) @AMDEPBACKSLASH@ +@am__fastdepCC_FALSE@ $(AM_V_CC@am__nodep@)$(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) $(gsnapl_avx512_CFLAGS) $(CFLAGS) -c -o gsnapl_avx512-junction.obj `if test -f 'junction.c'; then $(CYGPATH_W) 'junction.c'; else $(CYGPATH_W) '$(srcdir)/junction.c'; fi` + +gsnapl_avx512-stage3hr.o: stage3hr.c +@am__fastdepCC_TRUE@ $(AM_V_CC)$(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) $(gsnapl_avx512_CFLAGS) $(CFLAGS) -MT gsnapl_avx512-stage3hr.o -MD -MP -MF $(DEPDIR)/gsnapl_avx512-stage3hr.Tpo -c -o gsnapl_avx512-stage3hr.o `test -f 'stage3hr.c' || echo '$(srcdir)/'`stage3hr.c +@am__fastdepCC_TRUE@ $(AM_V_at)$(am__mv) $(DEPDIR)/gsnapl_avx512-stage3hr.Tpo $(DEPDIR)/gsnapl_avx512-stage3hr.Po +@AMDEP_TRUE@@am__fastdepCC_FALSE@ $(AM_V_CC)source='stage3hr.c' object='gsnapl_avx512-stage3hr.o' libtool=no @AMDEPBACKSLASH@ +@AMDEP_TRUE@@am__fastdepCC_FALSE@ DEPDIR=$(DEPDIR) $(CCDEPMODE) $(depcomp) @AMDEPBACKSLASH@ +@am__fastdepCC_FALSE@ $(AM_V_CC@am__nodep@)$(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) $(gsnapl_avx512_CFLAGS) $(CFLAGS) -c -o gsnapl_avx512-stage3hr.o `test -f 'stage3hr.c' || echo '$(srcdir)/'`stage3hr.c + +gsnapl_avx512-stage3hr.obj: stage3hr.c +@am__fastdepCC_TRUE@ $(AM_V_CC)$(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) $(gsnapl_avx512_CFLAGS) $(CFLAGS) -MT gsnapl_avx512-stage3hr.obj -MD -MP -MF $(DEPDIR)/gsnapl_avx512-stage3hr.Tpo -c -o gsnapl_avx512-stage3hr.obj `if test -f 'stage3hr.c'; then $(CYGPATH_W) 'stage3hr.c'; else $(CYGPATH_W) '$(srcdir)/stage3hr.c'; fi` +@am__fastdepCC_TRUE@ $(AM_V_at)$(am__mv) $(DEPDIR)/gsnapl_avx512-stage3hr.Tpo $(DEPDIR)/gsnapl_avx512-stage3hr.Po +@AMDEP_TRUE@@am__fastdepCC_FALSE@ $(AM_V_CC)source='stage3hr.c' object='gsnapl_avx512-stage3hr.obj' libtool=no @AMDEPBACKSLASH@ +@AMDEP_TRUE@@am__fastdepCC_FALSE@ DEPDIR=$(DEPDIR) $(CCDEPMODE) $(depcomp) @AMDEPBACKSLASH@ +@am__fastdepCC_FALSE@ $(AM_V_CC@am__nodep@)$(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) $(gsnapl_avx512_CFLAGS) $(CFLAGS) -c -o gsnapl_avx512-stage3hr.obj `if test -f 'stage3hr.c'; then $(CYGPATH_W) 'stage3hr.c'; else $(CYGPATH_W) '$(srcdir)/stage3hr.c'; fi` + +gsnapl_avx512-spanningelt.o: spanningelt.c +@am__fastdepCC_TRUE@ $(AM_V_CC)$(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) $(gsnapl_avx512_CFLAGS) $(CFLAGS) -MT gsnapl_avx512-spanningelt.o -MD -MP -MF $(DEPDIR)/gsnapl_avx512-spanningelt.Tpo -c -o gsnapl_avx512-spanningelt.o `test -f 'spanningelt.c' || echo '$(srcdir)/'`spanningelt.c +@am__fastdepCC_TRUE@ $(AM_V_at)$(am__mv) $(DEPDIR)/gsnapl_avx512-spanningelt.Tpo $(DEPDIR)/gsnapl_avx512-spanningelt.Po +@AMDEP_TRUE@@am__fastdepCC_FALSE@ $(AM_V_CC)source='spanningelt.c' object='gsnapl_avx512-spanningelt.o' libtool=no @AMDEPBACKSLASH@ +@AMDEP_TRUE@@am__fastdepCC_FALSE@ DEPDIR=$(DEPDIR) $(CCDEPMODE) $(depcomp) @AMDEPBACKSLASH@ +@am__fastdepCC_FALSE@ $(AM_V_CC@am__nodep@)$(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) $(gsnapl_avx512_CFLAGS) $(CFLAGS) -c -o gsnapl_avx512-spanningelt.o `test -f 'spanningelt.c' || echo '$(srcdir)/'`spanningelt.c + +gsnapl_avx512-spanningelt.obj: spanningelt.c +@am__fastdepCC_TRUE@ $(AM_V_CC)$(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) $(gsnapl_avx512_CFLAGS) $(CFLAGS) -MT gsnapl_avx512-spanningelt.obj -MD -MP -MF $(DEPDIR)/gsnapl_avx512-spanningelt.Tpo -c -o gsnapl_avx512-spanningelt.obj `if test -f 'spanningelt.c'; then $(CYGPATH_W) 'spanningelt.c'; else $(CYGPATH_W) '$(srcdir)/spanningelt.c'; fi` +@am__fastdepCC_TRUE@ $(AM_V_at)$(am__mv) $(DEPDIR)/gsnapl_avx512-spanningelt.Tpo $(DEPDIR)/gsnapl_avx512-spanningelt.Po +@AMDEP_TRUE@@am__fastdepCC_FALSE@ $(AM_V_CC)source='spanningelt.c' object='gsnapl_avx512-spanningelt.obj' libtool=no @AMDEPBACKSLASH@ +@AMDEP_TRUE@@am__fastdepCC_FALSE@ DEPDIR=$(DEPDIR) $(CCDEPMODE) $(depcomp) @AMDEPBACKSLASH@ +@am__fastdepCC_FALSE@ $(AM_V_CC@am__nodep@)$(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) $(gsnapl_avx512_CFLAGS) $(CFLAGS) -c -o gsnapl_avx512-spanningelt.obj `if test -f 'spanningelt.c'; then $(CYGPATH_W) 'spanningelt.c'; else $(CYGPATH_W) '$(srcdir)/spanningelt.c'; fi` + +gsnapl_avx512-cmet.o: cmet.c +@am__fastdepCC_TRUE@ $(AM_V_CC)$(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) $(gsnapl_avx512_CFLAGS) $(CFLAGS) -MT gsnapl_avx512-cmet.o -MD -MP -MF $(DEPDIR)/gsnapl_avx512-cmet.Tpo -c -o gsnapl_avx512-cmet.o `test -f 'cmet.c' || echo '$(srcdir)/'`cmet.c +@am__fastdepCC_TRUE@ $(AM_V_at)$(am__mv) $(DEPDIR)/gsnapl_avx512-cmet.Tpo $(DEPDIR)/gsnapl_avx512-cmet.Po +@AMDEP_TRUE@@am__fastdepCC_FALSE@ $(AM_V_CC)source='cmet.c' object='gsnapl_avx512-cmet.o' libtool=no @AMDEPBACKSLASH@ +@AMDEP_TRUE@@am__fastdepCC_FALSE@ DEPDIR=$(DEPDIR) $(CCDEPMODE) $(depcomp) @AMDEPBACKSLASH@ +@am__fastdepCC_FALSE@ $(AM_V_CC@am__nodep@)$(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) $(gsnapl_avx512_CFLAGS) $(CFLAGS) -c -o gsnapl_avx512-cmet.o `test -f 'cmet.c' || echo '$(srcdir)/'`cmet.c + +gsnapl_avx512-cmet.obj: cmet.c +@am__fastdepCC_TRUE@ $(AM_V_CC)$(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) $(gsnapl_avx512_CFLAGS) $(CFLAGS) -MT gsnapl_avx512-cmet.obj -MD -MP -MF $(DEPDIR)/gsnapl_avx512-cmet.Tpo -c -o gsnapl_avx512-cmet.obj `if test -f 'cmet.c'; then $(CYGPATH_W) 'cmet.c'; else $(CYGPATH_W) '$(srcdir)/cmet.c'; fi` +@am__fastdepCC_TRUE@ $(AM_V_at)$(am__mv) $(DEPDIR)/gsnapl_avx512-cmet.Tpo $(DEPDIR)/gsnapl_avx512-cmet.Po +@AMDEP_TRUE@@am__fastdepCC_FALSE@ $(AM_V_CC)source='cmet.c' object='gsnapl_avx512-cmet.obj' libtool=no @AMDEPBACKSLASH@ +@AMDEP_TRUE@@am__fastdepCC_FALSE@ DEPDIR=$(DEPDIR) $(CCDEPMODE) $(depcomp) @AMDEPBACKSLASH@ +@am__fastdepCC_FALSE@ $(AM_V_CC@am__nodep@)$(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) $(gsnapl_avx512_CFLAGS) $(CFLAGS) -c -o gsnapl_avx512-cmet.obj `if test -f 'cmet.c'; then $(CYGPATH_W) 'cmet.c'; else $(CYGPATH_W) '$(srcdir)/cmet.c'; fi` + +gsnapl_avx512-atoi.o: atoi.c +@am__fastdepCC_TRUE@ $(AM_V_CC)$(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) $(gsnapl_avx512_CFLAGS) $(CFLAGS) -MT gsnapl_avx512-atoi.o -MD -MP -MF $(DEPDIR)/gsnapl_avx512-atoi.Tpo -c -o gsnapl_avx512-atoi.o `test -f 'atoi.c' || echo '$(srcdir)/'`atoi.c +@am__fastdepCC_TRUE@ $(AM_V_at)$(am__mv) $(DEPDIR)/gsnapl_avx512-atoi.Tpo $(DEPDIR)/gsnapl_avx512-atoi.Po +@AMDEP_TRUE@@am__fastdepCC_FALSE@ $(AM_V_CC)source='atoi.c' object='gsnapl_avx512-atoi.o' libtool=no @AMDEPBACKSLASH@ +@AMDEP_TRUE@@am__fastdepCC_FALSE@ DEPDIR=$(DEPDIR) $(CCDEPMODE) $(depcomp) @AMDEPBACKSLASH@ +@am__fastdepCC_FALSE@ $(AM_V_CC@am__nodep@)$(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) $(gsnapl_avx512_CFLAGS) $(CFLAGS) -c -o gsnapl_avx512-atoi.o `test -f 'atoi.c' || echo '$(srcdir)/'`atoi.c + +gsnapl_avx512-atoi.obj: atoi.c +@am__fastdepCC_TRUE@ $(AM_V_CC)$(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) $(gsnapl_avx512_CFLAGS) $(CFLAGS) -MT gsnapl_avx512-atoi.obj -MD -MP -MF $(DEPDIR)/gsnapl_avx512-atoi.Tpo -c -o gsnapl_avx512-atoi.obj `if test -f 'atoi.c'; then $(CYGPATH_W) 'atoi.c'; else $(CYGPATH_W) '$(srcdir)/atoi.c'; fi` +@am__fastdepCC_TRUE@ $(AM_V_at)$(am__mv) $(DEPDIR)/gsnapl_avx512-atoi.Tpo $(DEPDIR)/gsnapl_avx512-atoi.Po +@AMDEP_TRUE@@am__fastdepCC_FALSE@ $(AM_V_CC)source='atoi.c' object='gsnapl_avx512-atoi.obj' libtool=no @AMDEPBACKSLASH@ +@AMDEP_TRUE@@am__fastdepCC_FALSE@ DEPDIR=$(DEPDIR) $(CCDEPMODE) $(depcomp) @AMDEPBACKSLASH@ +@am__fastdepCC_FALSE@ $(AM_V_CC@am__nodep@)$(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) $(gsnapl_avx512_CFLAGS) $(CFLAGS) -c -o gsnapl_avx512-atoi.obj `if test -f 'atoi.c'; then $(CYGPATH_W) 'atoi.c'; else $(CYGPATH_W) '$(srcdir)/atoi.c'; fi` + +gsnapl_avx512-maxent.o: maxent.c +@am__fastdepCC_TRUE@ $(AM_V_CC)$(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) $(gsnapl_avx512_CFLAGS) $(CFLAGS) -MT gsnapl_avx512-maxent.o -MD -MP -MF $(DEPDIR)/gsnapl_avx512-maxent.Tpo -c -o gsnapl_avx512-maxent.o `test -f 'maxent.c' || echo '$(srcdir)/'`maxent.c +@am__fastdepCC_TRUE@ $(AM_V_at)$(am__mv) $(DEPDIR)/gsnapl_avx512-maxent.Tpo $(DEPDIR)/gsnapl_avx512-maxent.Po +@AMDEP_TRUE@@am__fastdepCC_FALSE@ $(AM_V_CC)source='maxent.c' object='gsnapl_avx512-maxent.o' libtool=no @AMDEPBACKSLASH@ +@AMDEP_TRUE@@am__fastdepCC_FALSE@ DEPDIR=$(DEPDIR) $(CCDEPMODE) $(depcomp) @AMDEPBACKSLASH@ +@am__fastdepCC_FALSE@ $(AM_V_CC@am__nodep@)$(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) $(gsnapl_avx512_CFLAGS) $(CFLAGS) -c -o gsnapl_avx512-maxent.o `test -f 'maxent.c' || echo '$(srcdir)/'`maxent.c + +gsnapl_avx512-maxent.obj: maxent.c +@am__fastdepCC_TRUE@ $(AM_V_CC)$(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) $(gsnapl_avx512_CFLAGS) $(CFLAGS) -MT gsnapl_avx512-maxent.obj -MD -MP -MF $(DEPDIR)/gsnapl_avx512-maxent.Tpo -c -o gsnapl_avx512-maxent.obj `if test -f 'maxent.c'; then $(CYGPATH_W) 'maxent.c'; else $(CYGPATH_W) '$(srcdir)/maxent.c'; fi` +@am__fastdepCC_TRUE@ $(AM_V_at)$(am__mv) $(DEPDIR)/gsnapl_avx512-maxent.Tpo $(DEPDIR)/gsnapl_avx512-maxent.Po +@AMDEP_TRUE@@am__fastdepCC_FALSE@ $(AM_V_CC)source='maxent.c' object='gsnapl_avx512-maxent.obj' libtool=no @AMDEPBACKSLASH@ +@AMDEP_TRUE@@am__fastdepCC_FALSE@ DEPDIR=$(DEPDIR) $(CCDEPMODE) $(depcomp) @AMDEPBACKSLASH@ +@am__fastdepCC_FALSE@ $(AM_V_CC@am__nodep@)$(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) $(gsnapl_avx512_CFLAGS) $(CFLAGS) -c -o gsnapl_avx512-maxent.obj `if test -f 'maxent.c'; then $(CYGPATH_W) 'maxent.c'; else $(CYGPATH_W) '$(srcdir)/maxent.c'; fi` + +gsnapl_avx512-pair.o: pair.c +@am__fastdepCC_TRUE@ $(AM_V_CC)$(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) $(gsnapl_avx512_CFLAGS) $(CFLAGS) -MT gsnapl_avx512-pair.o -MD -MP -MF $(DEPDIR)/gsnapl_avx512-pair.Tpo -c -o gsnapl_avx512-pair.o `test -f 'pair.c' || echo '$(srcdir)/'`pair.c +@am__fastdepCC_TRUE@ $(AM_V_at)$(am__mv) $(DEPDIR)/gsnapl_avx512-pair.Tpo $(DEPDIR)/gsnapl_avx512-pair.Po +@AMDEP_TRUE@@am__fastdepCC_FALSE@ $(AM_V_CC)source='pair.c' object='gsnapl_avx512-pair.o' libtool=no @AMDEPBACKSLASH@ +@AMDEP_TRUE@@am__fastdepCC_FALSE@ DEPDIR=$(DEPDIR) $(CCDEPMODE) $(depcomp) @AMDEPBACKSLASH@ +@am__fastdepCC_FALSE@ $(AM_V_CC@am__nodep@)$(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) $(gsnapl_avx512_CFLAGS) $(CFLAGS) -c -o gsnapl_avx512-pair.o `test -f 'pair.c' || echo '$(srcdir)/'`pair.c + +gsnapl_avx512-pair.obj: pair.c +@am__fastdepCC_TRUE@ $(AM_V_CC)$(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) $(gsnapl_avx512_CFLAGS) $(CFLAGS) -MT gsnapl_avx512-pair.obj -MD -MP -MF $(DEPDIR)/gsnapl_avx512-pair.Tpo -c -o gsnapl_avx512-pair.obj `if test -f 'pair.c'; then $(CYGPATH_W) 'pair.c'; else $(CYGPATH_W) '$(srcdir)/pair.c'; fi` +@am__fastdepCC_TRUE@ $(AM_V_at)$(am__mv) $(DEPDIR)/gsnapl_avx512-pair.Tpo $(DEPDIR)/gsnapl_avx512-pair.Po +@AMDEP_TRUE@@am__fastdepCC_FALSE@ $(AM_V_CC)source='pair.c' object='gsnapl_avx512-pair.obj' libtool=no @AMDEPBACKSLASH@ +@AMDEP_TRUE@@am__fastdepCC_FALSE@ DEPDIR=$(DEPDIR) $(CCDEPMODE) $(depcomp) @AMDEPBACKSLASH@ +@am__fastdepCC_FALSE@ $(AM_V_CC@am__nodep@)$(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) $(gsnapl_avx512_CFLAGS) $(CFLAGS) -c -o gsnapl_avx512-pair.obj `if test -f 'pair.c'; then $(CYGPATH_W) 'pair.c'; else $(CYGPATH_W) '$(srcdir)/pair.c'; fi` + +gsnapl_avx512-pairpool.o: pairpool.c +@am__fastdepCC_TRUE@ $(AM_V_CC)$(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) $(gsnapl_avx512_CFLAGS) $(CFLAGS) -MT gsnapl_avx512-pairpool.o -MD -MP -MF $(DEPDIR)/gsnapl_avx512-pairpool.Tpo -c -o gsnapl_avx512-pairpool.o `test -f 'pairpool.c' || echo '$(srcdir)/'`pairpool.c +@am__fastdepCC_TRUE@ $(AM_V_at)$(am__mv) $(DEPDIR)/gsnapl_avx512-pairpool.Tpo $(DEPDIR)/gsnapl_avx512-pairpool.Po +@AMDEP_TRUE@@am__fastdepCC_FALSE@ $(AM_V_CC)source='pairpool.c' object='gsnapl_avx512-pairpool.o' libtool=no @AMDEPBACKSLASH@ +@AMDEP_TRUE@@am__fastdepCC_FALSE@ DEPDIR=$(DEPDIR) $(CCDEPMODE) $(depcomp) @AMDEPBACKSLASH@ +@am__fastdepCC_FALSE@ $(AM_V_CC@am__nodep@)$(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) $(gsnapl_avx512_CFLAGS) $(CFLAGS) -c -o gsnapl_avx512-pairpool.o `test -f 'pairpool.c' || echo '$(srcdir)/'`pairpool.c + +gsnapl_avx512-pairpool.obj: pairpool.c +@am__fastdepCC_TRUE@ $(AM_V_CC)$(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) $(gsnapl_avx512_CFLAGS) $(CFLAGS) -MT gsnapl_avx512-pairpool.obj -MD -MP -MF $(DEPDIR)/gsnapl_avx512-pairpool.Tpo -c -o gsnapl_avx512-pairpool.obj `if test -f 'pairpool.c'; then $(CYGPATH_W) 'pairpool.c'; else $(CYGPATH_W) '$(srcdir)/pairpool.c'; fi` +@am__fastdepCC_TRUE@ $(AM_V_at)$(am__mv) $(DEPDIR)/gsnapl_avx512-pairpool.Tpo $(DEPDIR)/gsnapl_avx512-pairpool.Po +@AMDEP_TRUE@@am__fastdepCC_FALSE@ $(AM_V_CC)source='pairpool.c' object='gsnapl_avx512-pairpool.obj' libtool=no @AMDEPBACKSLASH@ +@AMDEP_TRUE@@am__fastdepCC_FALSE@ DEPDIR=$(DEPDIR) $(CCDEPMODE) $(depcomp) @AMDEPBACKSLASH@ +@am__fastdepCC_FALSE@ $(AM_V_CC@am__nodep@)$(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) $(gsnapl_avx512_CFLAGS) $(CFLAGS) -c -o gsnapl_avx512-pairpool.obj `if test -f 'pairpool.c'; then $(CYGPATH_W) 'pairpool.c'; else $(CYGPATH_W) '$(srcdir)/pairpool.c'; fi` + +gsnapl_avx512-diag.o: diag.c +@am__fastdepCC_TRUE@ $(AM_V_CC)$(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) $(gsnapl_avx512_CFLAGS) $(CFLAGS) -MT gsnapl_avx512-diag.o -MD -MP -MF $(DEPDIR)/gsnapl_avx512-diag.Tpo -c -o gsnapl_avx512-diag.o `test -f 'diag.c' || echo '$(srcdir)/'`diag.c +@am__fastdepCC_TRUE@ $(AM_V_at)$(am__mv) $(DEPDIR)/gsnapl_avx512-diag.Tpo $(DEPDIR)/gsnapl_avx512-diag.Po +@AMDEP_TRUE@@am__fastdepCC_FALSE@ $(AM_V_CC)source='diag.c' object='gsnapl_avx512-diag.o' libtool=no @AMDEPBACKSLASH@ +@AMDEP_TRUE@@am__fastdepCC_FALSE@ DEPDIR=$(DEPDIR) $(CCDEPMODE) $(depcomp) @AMDEPBACKSLASH@ +@am__fastdepCC_FALSE@ $(AM_V_CC@am__nodep@)$(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) $(gsnapl_avx512_CFLAGS) $(CFLAGS) -c -o gsnapl_avx512-diag.o `test -f 'diag.c' || echo '$(srcdir)/'`diag.c + +gsnapl_avx512-diag.obj: diag.c +@am__fastdepCC_TRUE@ $(AM_V_CC)$(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) $(gsnapl_avx512_CFLAGS) $(CFLAGS) -MT gsnapl_avx512-diag.obj -MD -MP -MF $(DEPDIR)/gsnapl_avx512-diag.Tpo -c -o gsnapl_avx512-diag.obj `if test -f 'diag.c'; then $(CYGPATH_W) 'diag.c'; else $(CYGPATH_W) '$(srcdir)/diag.c'; fi` +@am__fastdepCC_TRUE@ $(AM_V_at)$(am__mv) $(DEPDIR)/gsnapl_avx512-diag.Tpo $(DEPDIR)/gsnapl_avx512-diag.Po +@AMDEP_TRUE@@am__fastdepCC_FALSE@ $(AM_V_CC)source='diag.c' object='gsnapl_avx512-diag.obj' libtool=no @AMDEPBACKSLASH@ +@AMDEP_TRUE@@am__fastdepCC_FALSE@ DEPDIR=$(DEPDIR) $(CCDEPMODE) $(depcomp) @AMDEPBACKSLASH@ +@am__fastdepCC_FALSE@ $(AM_V_CC@am__nodep@)$(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) $(gsnapl_avx512_CFLAGS) $(CFLAGS) -c -o gsnapl_avx512-diag.obj `if test -f 'diag.c'; then $(CYGPATH_W) 'diag.c'; else $(CYGPATH_W) '$(srcdir)/diag.c'; fi` + +gsnapl_avx512-diagpool.o: diagpool.c +@am__fastdepCC_TRUE@ $(AM_V_CC)$(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) $(gsnapl_avx512_CFLAGS) $(CFLAGS) -MT gsnapl_avx512-diagpool.o -MD -MP -MF $(DEPDIR)/gsnapl_avx512-diagpool.Tpo -c -o gsnapl_avx512-diagpool.o `test -f 'diagpool.c' || echo '$(srcdir)/'`diagpool.c +@am__fastdepCC_TRUE@ $(AM_V_at)$(am__mv) $(DEPDIR)/gsnapl_avx512-diagpool.Tpo $(DEPDIR)/gsnapl_avx512-diagpool.Po +@AMDEP_TRUE@@am__fastdepCC_FALSE@ $(AM_V_CC)source='diagpool.c' object='gsnapl_avx512-diagpool.o' libtool=no @AMDEPBACKSLASH@ +@AMDEP_TRUE@@am__fastdepCC_FALSE@ DEPDIR=$(DEPDIR) $(CCDEPMODE) $(depcomp) @AMDEPBACKSLASH@ +@am__fastdepCC_FALSE@ $(AM_V_CC@am__nodep@)$(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) $(gsnapl_avx512_CFLAGS) $(CFLAGS) -c -o gsnapl_avx512-diagpool.o `test -f 'diagpool.c' || echo '$(srcdir)/'`diagpool.c + +gsnapl_avx512-diagpool.obj: diagpool.c +@am__fastdepCC_TRUE@ $(AM_V_CC)$(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) $(gsnapl_avx512_CFLAGS) $(CFLAGS) -MT gsnapl_avx512-diagpool.obj -MD -MP -MF $(DEPDIR)/gsnapl_avx512-diagpool.Tpo -c -o gsnapl_avx512-diagpool.obj `if test -f 'diagpool.c'; then $(CYGPATH_W) 'diagpool.c'; else $(CYGPATH_W) '$(srcdir)/diagpool.c'; fi` +@am__fastdepCC_TRUE@ $(AM_V_at)$(am__mv) $(DEPDIR)/gsnapl_avx512-diagpool.Tpo $(DEPDIR)/gsnapl_avx512-diagpool.Po +@AMDEP_TRUE@@am__fastdepCC_FALSE@ $(AM_V_CC)source='diagpool.c' object='gsnapl_avx512-diagpool.obj' libtool=no @AMDEPBACKSLASH@ +@AMDEP_TRUE@@am__fastdepCC_FALSE@ DEPDIR=$(DEPDIR) $(CCDEPMODE) $(depcomp) @AMDEPBACKSLASH@ +@am__fastdepCC_FALSE@ $(AM_V_CC@am__nodep@)$(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) $(gsnapl_avx512_CFLAGS) $(CFLAGS) -c -o gsnapl_avx512-diagpool.obj `if test -f 'diagpool.c'; then $(CYGPATH_W) 'diagpool.c'; else $(CYGPATH_W) '$(srcdir)/diagpool.c'; fi` + +gsnapl_avx512-orderstat.o: orderstat.c +@am__fastdepCC_TRUE@ $(AM_V_CC)$(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) $(gsnapl_avx512_CFLAGS) $(CFLAGS) -MT gsnapl_avx512-orderstat.o -MD -MP -MF $(DEPDIR)/gsnapl_avx512-orderstat.Tpo -c -o gsnapl_avx512-orderstat.o `test -f 'orderstat.c' || echo '$(srcdir)/'`orderstat.c +@am__fastdepCC_TRUE@ $(AM_V_at)$(am__mv) $(DEPDIR)/gsnapl_avx512-orderstat.Tpo $(DEPDIR)/gsnapl_avx512-orderstat.Po +@AMDEP_TRUE@@am__fastdepCC_FALSE@ $(AM_V_CC)source='orderstat.c' object='gsnapl_avx512-orderstat.o' libtool=no @AMDEPBACKSLASH@ +@AMDEP_TRUE@@am__fastdepCC_FALSE@ DEPDIR=$(DEPDIR) $(CCDEPMODE) $(depcomp) @AMDEPBACKSLASH@ +@am__fastdepCC_FALSE@ $(AM_V_CC@am__nodep@)$(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) $(gsnapl_avx512_CFLAGS) $(CFLAGS) -c -o gsnapl_avx512-orderstat.o `test -f 'orderstat.c' || echo '$(srcdir)/'`orderstat.c + +gsnapl_avx512-orderstat.obj: orderstat.c +@am__fastdepCC_TRUE@ $(AM_V_CC)$(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) $(gsnapl_avx512_CFLAGS) $(CFLAGS) -MT gsnapl_avx512-orderstat.obj -MD -MP -MF $(DEPDIR)/gsnapl_avx512-orderstat.Tpo -c -o gsnapl_avx512-orderstat.obj `if test -f 'orderstat.c'; then $(CYGPATH_W) 'orderstat.c'; else $(CYGPATH_W) '$(srcdir)/orderstat.c'; fi` +@am__fastdepCC_TRUE@ $(AM_V_at)$(am__mv) $(DEPDIR)/gsnapl_avx512-orderstat.Tpo $(DEPDIR)/gsnapl_avx512-orderstat.Po +@AMDEP_TRUE@@am__fastdepCC_FALSE@ $(AM_V_CC)source='orderstat.c' object='gsnapl_avx512-orderstat.obj' libtool=no @AMDEPBACKSLASH@ +@AMDEP_TRUE@@am__fastdepCC_FALSE@ DEPDIR=$(DEPDIR) $(CCDEPMODE) $(depcomp) @AMDEPBACKSLASH@ +@am__fastdepCC_FALSE@ $(AM_V_CC@am__nodep@)$(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) $(gsnapl_avx512_CFLAGS) $(CFLAGS) -c -o gsnapl_avx512-orderstat.obj `if test -f 'orderstat.c'; then $(CYGPATH_W) 'orderstat.c'; else $(CYGPATH_W) '$(srcdir)/orderstat.c'; fi` + +gsnapl_avx512-oligoindex_hr.o: oligoindex_hr.c +@am__fastdepCC_TRUE@ $(AM_V_CC)$(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) $(gsnapl_avx512_CFLAGS) $(CFLAGS) -MT gsnapl_avx512-oligoindex_hr.o -MD -MP -MF $(DEPDIR)/gsnapl_avx512-oligoindex_hr.Tpo -c -o gsnapl_avx512-oligoindex_hr.o `test -f 'oligoindex_hr.c' || echo '$(srcdir)/'`oligoindex_hr.c +@am__fastdepCC_TRUE@ $(AM_V_at)$(am__mv) $(DEPDIR)/gsnapl_avx512-oligoindex_hr.Tpo $(DEPDIR)/gsnapl_avx512-oligoindex_hr.Po +@AMDEP_TRUE@@am__fastdepCC_FALSE@ $(AM_V_CC)source='oligoindex_hr.c' object='gsnapl_avx512-oligoindex_hr.o' libtool=no @AMDEPBACKSLASH@ +@AMDEP_TRUE@@am__fastdepCC_FALSE@ DEPDIR=$(DEPDIR) $(CCDEPMODE) $(depcomp) @AMDEPBACKSLASH@ +@am__fastdepCC_FALSE@ $(AM_V_CC@am__nodep@)$(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) $(gsnapl_avx512_CFLAGS) $(CFLAGS) -c -o gsnapl_avx512-oligoindex_hr.o `test -f 'oligoindex_hr.c' || echo '$(srcdir)/'`oligoindex_hr.c + +gsnapl_avx512-oligoindex_hr.obj: oligoindex_hr.c +@am__fastdepCC_TRUE@ $(AM_V_CC)$(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) $(gsnapl_avx512_CFLAGS) $(CFLAGS) -MT gsnapl_avx512-oligoindex_hr.obj -MD -MP -MF $(DEPDIR)/gsnapl_avx512-oligoindex_hr.Tpo -c -o gsnapl_avx512-oligoindex_hr.obj `if test -f 'oligoindex_hr.c'; then $(CYGPATH_W) 'oligoindex_hr.c'; else $(CYGPATH_W) '$(srcdir)/oligoindex_hr.c'; fi` +@am__fastdepCC_TRUE@ $(AM_V_at)$(am__mv) $(DEPDIR)/gsnapl_avx512-oligoindex_hr.Tpo $(DEPDIR)/gsnapl_avx512-oligoindex_hr.Po +@AMDEP_TRUE@@am__fastdepCC_FALSE@ $(AM_V_CC)source='oligoindex_hr.c' object='gsnapl_avx512-oligoindex_hr.obj' libtool=no @AMDEPBACKSLASH@ +@AMDEP_TRUE@@am__fastdepCC_FALSE@ DEPDIR=$(DEPDIR) $(CCDEPMODE) $(depcomp) @AMDEPBACKSLASH@ +@am__fastdepCC_FALSE@ $(AM_V_CC@am__nodep@)$(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) $(gsnapl_avx512_CFLAGS) $(CFLAGS) -c -o gsnapl_avx512-oligoindex_hr.obj `if test -f 'oligoindex_hr.c'; then $(CYGPATH_W) 'oligoindex_hr.c'; else $(CYGPATH_W) '$(srcdir)/oligoindex_hr.c'; fi` + +gsnapl_avx512-cellpool.o: cellpool.c +@am__fastdepCC_TRUE@ $(AM_V_CC)$(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) $(gsnapl_avx512_CFLAGS) $(CFLAGS) -MT gsnapl_avx512-cellpool.o -MD -MP -MF $(DEPDIR)/gsnapl_avx512-cellpool.Tpo -c -o gsnapl_avx512-cellpool.o `test -f 'cellpool.c' || echo '$(srcdir)/'`cellpool.c +@am__fastdepCC_TRUE@ $(AM_V_at)$(am__mv) $(DEPDIR)/gsnapl_avx512-cellpool.Tpo $(DEPDIR)/gsnapl_avx512-cellpool.Po +@AMDEP_TRUE@@am__fastdepCC_FALSE@ $(AM_V_CC)source='cellpool.c' object='gsnapl_avx512-cellpool.o' libtool=no @AMDEPBACKSLASH@ +@AMDEP_TRUE@@am__fastdepCC_FALSE@ DEPDIR=$(DEPDIR) $(CCDEPMODE) $(depcomp) @AMDEPBACKSLASH@ +@am__fastdepCC_FALSE@ $(AM_V_CC@am__nodep@)$(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) $(gsnapl_avx512_CFLAGS) $(CFLAGS) -c -o gsnapl_avx512-cellpool.o `test -f 'cellpool.c' || echo '$(srcdir)/'`cellpool.c + +gsnapl_avx512-cellpool.obj: cellpool.c +@am__fastdepCC_TRUE@ $(AM_V_CC)$(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) $(gsnapl_avx512_CFLAGS) $(CFLAGS) -MT gsnapl_avx512-cellpool.obj -MD -MP -MF $(DEPDIR)/gsnapl_avx512-cellpool.Tpo -c -o gsnapl_avx512-cellpool.obj `if test -f 'cellpool.c'; then $(CYGPATH_W) 'cellpool.c'; else $(CYGPATH_W) '$(srcdir)/cellpool.c'; fi` +@am__fastdepCC_TRUE@ $(AM_V_at)$(am__mv) $(DEPDIR)/gsnapl_avx512-cellpool.Tpo $(DEPDIR)/gsnapl_avx512-cellpool.Po +@AMDEP_TRUE@@am__fastdepCC_FALSE@ $(AM_V_CC)source='cellpool.c' object='gsnapl_avx512-cellpool.obj' libtool=no @AMDEPBACKSLASH@ +@AMDEP_TRUE@@am__fastdepCC_FALSE@ DEPDIR=$(DEPDIR) $(CCDEPMODE) $(depcomp) @AMDEPBACKSLASH@ +@am__fastdepCC_FALSE@ $(AM_V_CC@am__nodep@)$(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) $(gsnapl_avx512_CFLAGS) $(CFLAGS) -c -o gsnapl_avx512-cellpool.obj `if test -f 'cellpool.c'; then $(CYGPATH_W) 'cellpool.c'; else $(CYGPATH_W) '$(srcdir)/cellpool.c'; fi` + +gsnapl_avx512-stage2.o: stage2.c +@am__fastdepCC_TRUE@ $(AM_V_CC)$(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) $(gsnapl_avx512_CFLAGS) $(CFLAGS) -MT gsnapl_avx512-stage2.o -MD -MP -MF $(DEPDIR)/gsnapl_avx512-stage2.Tpo -c -o gsnapl_avx512-stage2.o `test -f 'stage2.c' || echo '$(srcdir)/'`stage2.c +@am__fastdepCC_TRUE@ $(AM_V_at)$(am__mv) $(DEPDIR)/gsnapl_avx512-stage2.Tpo $(DEPDIR)/gsnapl_avx512-stage2.Po +@AMDEP_TRUE@@am__fastdepCC_FALSE@ $(AM_V_CC)source='stage2.c' object='gsnapl_avx512-stage2.o' libtool=no @AMDEPBACKSLASH@ +@AMDEP_TRUE@@am__fastdepCC_FALSE@ DEPDIR=$(DEPDIR) $(CCDEPMODE) $(depcomp) @AMDEPBACKSLASH@ +@am__fastdepCC_FALSE@ $(AM_V_CC@am__nodep@)$(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) $(gsnapl_avx512_CFLAGS) $(CFLAGS) -c -o gsnapl_avx512-stage2.o `test -f 'stage2.c' || echo '$(srcdir)/'`stage2.c + +gsnapl_avx512-stage2.obj: stage2.c +@am__fastdepCC_TRUE@ $(AM_V_CC)$(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) $(gsnapl_avx512_CFLAGS) $(CFLAGS) -MT gsnapl_avx512-stage2.obj -MD -MP -MF $(DEPDIR)/gsnapl_avx512-stage2.Tpo -c -o gsnapl_avx512-stage2.obj `if test -f 'stage2.c'; then $(CYGPATH_W) 'stage2.c'; else $(CYGPATH_W) '$(srcdir)/stage2.c'; fi` +@am__fastdepCC_TRUE@ $(AM_V_at)$(am__mv) $(DEPDIR)/gsnapl_avx512-stage2.Tpo $(DEPDIR)/gsnapl_avx512-stage2.Po +@AMDEP_TRUE@@am__fastdepCC_FALSE@ $(AM_V_CC)source='stage2.c' object='gsnapl_avx512-stage2.obj' libtool=no @AMDEPBACKSLASH@ +@AMDEP_TRUE@@am__fastdepCC_FALSE@ DEPDIR=$(DEPDIR) $(CCDEPMODE) $(depcomp) @AMDEPBACKSLASH@ +@am__fastdepCC_FALSE@ $(AM_V_CC@am__nodep@)$(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) $(gsnapl_avx512_CFLAGS) $(CFLAGS) -c -o gsnapl_avx512-stage2.obj `if test -f 'stage2.c'; then $(CYGPATH_W) 'stage2.c'; else $(CYGPATH_W) '$(srcdir)/stage2.c'; fi` + +gsnapl_avx512-intron.o: intron.c +@am__fastdepCC_TRUE@ $(AM_V_CC)$(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) $(gsnapl_avx512_CFLAGS) $(CFLAGS) -MT gsnapl_avx512-intron.o -MD -MP -MF $(DEPDIR)/gsnapl_avx512-intron.Tpo -c -o gsnapl_avx512-intron.o `test -f 'intron.c' || echo '$(srcdir)/'`intron.c +@am__fastdepCC_TRUE@ $(AM_V_at)$(am__mv) $(DEPDIR)/gsnapl_avx512-intron.Tpo $(DEPDIR)/gsnapl_avx512-intron.Po +@AMDEP_TRUE@@am__fastdepCC_FALSE@ $(AM_V_CC)source='intron.c' object='gsnapl_avx512-intron.o' libtool=no @AMDEPBACKSLASH@ +@AMDEP_TRUE@@am__fastdepCC_FALSE@ DEPDIR=$(DEPDIR) $(CCDEPMODE) $(depcomp) @AMDEPBACKSLASH@ +@am__fastdepCC_FALSE@ $(AM_V_CC@am__nodep@)$(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) $(gsnapl_avx512_CFLAGS) $(CFLAGS) -c -o gsnapl_avx512-intron.o `test -f 'intron.c' || echo '$(srcdir)/'`intron.c + +gsnapl_avx512-intron.obj: intron.c +@am__fastdepCC_TRUE@ $(AM_V_CC)$(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) $(gsnapl_avx512_CFLAGS) $(CFLAGS) -MT gsnapl_avx512-intron.obj -MD -MP -MF $(DEPDIR)/gsnapl_avx512-intron.Tpo -c -o gsnapl_avx512-intron.obj `if test -f 'intron.c'; then $(CYGPATH_W) 'intron.c'; else $(CYGPATH_W) '$(srcdir)/intron.c'; fi` +@am__fastdepCC_TRUE@ $(AM_V_at)$(am__mv) $(DEPDIR)/gsnapl_avx512-intron.Tpo $(DEPDIR)/gsnapl_avx512-intron.Po +@AMDEP_TRUE@@am__fastdepCC_FALSE@ $(AM_V_CC)source='intron.c' object='gsnapl_avx512-intron.obj' libtool=no @AMDEPBACKSLASH@ +@AMDEP_TRUE@@am__fastdepCC_FALSE@ DEPDIR=$(DEPDIR) $(CCDEPMODE) $(depcomp) @AMDEPBACKSLASH@ +@am__fastdepCC_FALSE@ $(AM_V_CC@am__nodep@)$(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) $(gsnapl_avx512_CFLAGS) $(CFLAGS) -c -o gsnapl_avx512-intron.obj `if test -f 'intron.c'; then $(CYGPATH_W) 'intron.c'; else $(CYGPATH_W) '$(srcdir)/intron.c'; fi` + +gsnapl_avx512-boyer-moore.o: boyer-moore.c +@am__fastdepCC_TRUE@ $(AM_V_CC)$(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) $(gsnapl_avx512_CFLAGS) $(CFLAGS) -MT gsnapl_avx512-boyer-moore.o -MD -MP -MF $(DEPDIR)/gsnapl_avx512-boyer-moore.Tpo -c -o gsnapl_avx512-boyer-moore.o `test -f 'boyer-moore.c' || echo '$(srcdir)/'`boyer-moore.c +@am__fastdepCC_TRUE@ $(AM_V_at)$(am__mv) $(DEPDIR)/gsnapl_avx512-boyer-moore.Tpo $(DEPDIR)/gsnapl_avx512-boyer-moore.Po +@AMDEP_TRUE@@am__fastdepCC_FALSE@ $(AM_V_CC)source='boyer-moore.c' object='gsnapl_avx512-boyer-moore.o' libtool=no @AMDEPBACKSLASH@ +@AMDEP_TRUE@@am__fastdepCC_FALSE@ DEPDIR=$(DEPDIR) $(CCDEPMODE) $(depcomp) @AMDEPBACKSLASH@ +@am__fastdepCC_FALSE@ $(AM_V_CC@am__nodep@)$(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) $(gsnapl_avx512_CFLAGS) $(CFLAGS) -c -o gsnapl_avx512-boyer-moore.o `test -f 'boyer-moore.c' || echo '$(srcdir)/'`boyer-moore.c + +gsnapl_avx512-boyer-moore.obj: boyer-moore.c +@am__fastdepCC_TRUE@ $(AM_V_CC)$(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) $(gsnapl_avx512_CFLAGS) $(CFLAGS) -MT gsnapl_avx512-boyer-moore.obj -MD -MP -MF $(DEPDIR)/gsnapl_avx512-boyer-moore.Tpo -c -o gsnapl_avx512-boyer-moore.obj `if test -f 'boyer-moore.c'; then $(CYGPATH_W) 'boyer-moore.c'; else $(CYGPATH_W) '$(srcdir)/boyer-moore.c'; fi` +@am__fastdepCC_TRUE@ $(AM_V_at)$(am__mv) $(DEPDIR)/gsnapl_avx512-boyer-moore.Tpo $(DEPDIR)/gsnapl_avx512-boyer-moore.Po +@AMDEP_TRUE@@am__fastdepCC_FALSE@ $(AM_V_CC)source='boyer-moore.c' object='gsnapl_avx512-boyer-moore.obj' libtool=no @AMDEPBACKSLASH@ +@AMDEP_TRUE@@am__fastdepCC_FALSE@ DEPDIR=$(DEPDIR) $(CCDEPMODE) $(depcomp) @AMDEPBACKSLASH@ +@am__fastdepCC_FALSE@ $(AM_V_CC@am__nodep@)$(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) $(gsnapl_avx512_CFLAGS) $(CFLAGS) -c -o gsnapl_avx512-boyer-moore.obj `if test -f 'boyer-moore.c'; then $(CYGPATH_W) 'boyer-moore.c'; else $(CYGPATH_W) '$(srcdir)/boyer-moore.c'; fi` + +gsnapl_avx512-changepoint.o: changepoint.c +@am__fastdepCC_TRUE@ $(AM_V_CC)$(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) $(gsnapl_avx512_CFLAGS) $(CFLAGS) -MT gsnapl_avx512-changepoint.o -MD -MP -MF $(DEPDIR)/gsnapl_avx512-changepoint.Tpo -c -o gsnapl_avx512-changepoint.o `test -f 'changepoint.c' || echo '$(srcdir)/'`changepoint.c +@am__fastdepCC_TRUE@ $(AM_V_at)$(am__mv) $(DEPDIR)/gsnapl_avx512-changepoint.Tpo $(DEPDIR)/gsnapl_avx512-changepoint.Po +@AMDEP_TRUE@@am__fastdepCC_FALSE@ $(AM_V_CC)source='changepoint.c' object='gsnapl_avx512-changepoint.o' libtool=no @AMDEPBACKSLASH@ +@AMDEP_TRUE@@am__fastdepCC_FALSE@ DEPDIR=$(DEPDIR) $(CCDEPMODE) $(depcomp) @AMDEPBACKSLASH@ +@am__fastdepCC_FALSE@ $(AM_V_CC@am__nodep@)$(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) $(gsnapl_avx512_CFLAGS) $(CFLAGS) -c -o gsnapl_avx512-changepoint.o `test -f 'changepoint.c' || echo '$(srcdir)/'`changepoint.c + +gsnapl_avx512-changepoint.obj: changepoint.c +@am__fastdepCC_TRUE@ $(AM_V_CC)$(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) $(gsnapl_avx512_CFLAGS) $(CFLAGS) -MT gsnapl_avx512-changepoint.obj -MD -MP -MF $(DEPDIR)/gsnapl_avx512-changepoint.Tpo -c -o gsnapl_avx512-changepoint.obj `if test -f 'changepoint.c'; then $(CYGPATH_W) 'changepoint.c'; else $(CYGPATH_W) '$(srcdir)/changepoint.c'; fi` +@am__fastdepCC_TRUE@ $(AM_V_at)$(am__mv) $(DEPDIR)/gsnapl_avx512-changepoint.Tpo $(DEPDIR)/gsnapl_avx512-changepoint.Po +@AMDEP_TRUE@@am__fastdepCC_FALSE@ $(AM_V_CC)source='changepoint.c' object='gsnapl_avx512-changepoint.obj' libtool=no @AMDEPBACKSLASH@ +@AMDEP_TRUE@@am__fastdepCC_FALSE@ DEPDIR=$(DEPDIR) $(CCDEPMODE) $(depcomp) @AMDEPBACKSLASH@ +@am__fastdepCC_FALSE@ $(AM_V_CC@am__nodep@)$(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) $(gsnapl_avx512_CFLAGS) $(CFLAGS) -c -o gsnapl_avx512-changepoint.obj `if test -f 'changepoint.c'; then $(CYGPATH_W) 'changepoint.c'; else $(CYGPATH_W) '$(srcdir)/changepoint.c'; fi` + +gsnapl_avx512-pbinom.o: pbinom.c +@am__fastdepCC_TRUE@ $(AM_V_CC)$(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) $(gsnapl_avx512_CFLAGS) $(CFLAGS) -MT gsnapl_avx512-pbinom.o -MD -MP -MF $(DEPDIR)/gsnapl_avx512-pbinom.Tpo -c -o gsnapl_avx512-pbinom.o `test -f 'pbinom.c' || echo '$(srcdir)/'`pbinom.c +@am__fastdepCC_TRUE@ $(AM_V_at)$(am__mv) $(DEPDIR)/gsnapl_avx512-pbinom.Tpo $(DEPDIR)/gsnapl_avx512-pbinom.Po +@AMDEP_TRUE@@am__fastdepCC_FALSE@ $(AM_V_CC)source='pbinom.c' object='gsnapl_avx512-pbinom.o' libtool=no @AMDEPBACKSLASH@ +@AMDEP_TRUE@@am__fastdepCC_FALSE@ DEPDIR=$(DEPDIR) $(CCDEPMODE) $(depcomp) @AMDEPBACKSLASH@ +@am__fastdepCC_FALSE@ $(AM_V_CC@am__nodep@)$(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) $(gsnapl_avx512_CFLAGS) $(CFLAGS) -c -o gsnapl_avx512-pbinom.o `test -f 'pbinom.c' || echo '$(srcdir)/'`pbinom.c + +gsnapl_avx512-pbinom.obj: pbinom.c +@am__fastdepCC_TRUE@ $(AM_V_CC)$(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) $(gsnapl_avx512_CFLAGS) $(CFLAGS) -MT gsnapl_avx512-pbinom.obj -MD -MP -MF $(DEPDIR)/gsnapl_avx512-pbinom.Tpo -c -o gsnapl_avx512-pbinom.obj `if test -f 'pbinom.c'; then $(CYGPATH_W) 'pbinom.c'; else $(CYGPATH_W) '$(srcdir)/pbinom.c'; fi` +@am__fastdepCC_TRUE@ $(AM_V_at)$(am__mv) $(DEPDIR)/gsnapl_avx512-pbinom.Tpo $(DEPDIR)/gsnapl_avx512-pbinom.Po +@AMDEP_TRUE@@am__fastdepCC_FALSE@ $(AM_V_CC)source='pbinom.c' object='gsnapl_avx512-pbinom.obj' libtool=no @AMDEPBACKSLASH@ +@AMDEP_TRUE@@am__fastdepCC_FALSE@ DEPDIR=$(DEPDIR) $(CCDEPMODE) $(depcomp) @AMDEPBACKSLASH@ +@am__fastdepCC_FALSE@ $(AM_V_CC@am__nodep@)$(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) $(gsnapl_avx512_CFLAGS) $(CFLAGS) -c -o gsnapl_avx512-pbinom.obj `if test -f 'pbinom.c'; then $(CYGPATH_W) 'pbinom.c'; else $(CYGPATH_W) '$(srcdir)/pbinom.c'; fi` + +gsnapl_avx512-dynprog.o: dynprog.c +@am__fastdepCC_TRUE@ $(AM_V_CC)$(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) $(gsnapl_avx512_CFLAGS) $(CFLAGS) -MT gsnapl_avx512-dynprog.o -MD -MP -MF $(DEPDIR)/gsnapl_avx512-dynprog.Tpo -c -o gsnapl_avx512-dynprog.o `test -f 'dynprog.c' || echo '$(srcdir)/'`dynprog.c +@am__fastdepCC_TRUE@ $(AM_V_at)$(am__mv) $(DEPDIR)/gsnapl_avx512-dynprog.Tpo $(DEPDIR)/gsnapl_avx512-dynprog.Po +@AMDEP_TRUE@@am__fastdepCC_FALSE@ $(AM_V_CC)source='dynprog.c' object='gsnapl_avx512-dynprog.o' libtool=no @AMDEPBACKSLASH@ +@AMDEP_TRUE@@am__fastdepCC_FALSE@ DEPDIR=$(DEPDIR) $(CCDEPMODE) $(depcomp) @AMDEPBACKSLASH@ +@am__fastdepCC_FALSE@ $(AM_V_CC@am__nodep@)$(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) $(gsnapl_avx512_CFLAGS) $(CFLAGS) -c -o gsnapl_avx512-dynprog.o `test -f 'dynprog.c' || echo '$(srcdir)/'`dynprog.c + +gsnapl_avx512-dynprog.obj: dynprog.c +@am__fastdepCC_TRUE@ $(AM_V_CC)$(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) $(gsnapl_avx512_CFLAGS) $(CFLAGS) -MT gsnapl_avx512-dynprog.obj -MD -MP -MF $(DEPDIR)/gsnapl_avx512-dynprog.Tpo -c -o gsnapl_avx512-dynprog.obj `if test -f 'dynprog.c'; then $(CYGPATH_W) 'dynprog.c'; else $(CYGPATH_W) '$(srcdir)/dynprog.c'; fi` +@am__fastdepCC_TRUE@ $(AM_V_at)$(am__mv) $(DEPDIR)/gsnapl_avx512-dynprog.Tpo $(DEPDIR)/gsnapl_avx512-dynprog.Po +@AMDEP_TRUE@@am__fastdepCC_FALSE@ $(AM_V_CC)source='dynprog.c' object='gsnapl_avx512-dynprog.obj' libtool=no @AMDEPBACKSLASH@ +@AMDEP_TRUE@@am__fastdepCC_FALSE@ DEPDIR=$(DEPDIR) $(CCDEPMODE) $(depcomp) @AMDEPBACKSLASH@ +@am__fastdepCC_FALSE@ $(AM_V_CC@am__nodep@)$(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) $(gsnapl_avx512_CFLAGS) $(CFLAGS) -c -o gsnapl_avx512-dynprog.obj `if test -f 'dynprog.c'; then $(CYGPATH_W) 'dynprog.c'; else $(CYGPATH_W) '$(srcdir)/dynprog.c'; fi` + +gsnapl_avx512-dynprog_simd.o: dynprog_simd.c +@am__fastdepCC_TRUE@ $(AM_V_CC)$(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) $(gsnapl_avx512_CFLAGS) $(CFLAGS) -MT gsnapl_avx512-dynprog_simd.o -MD -MP -MF $(DEPDIR)/gsnapl_avx512-dynprog_simd.Tpo -c -o gsnapl_avx512-dynprog_simd.o `test -f 'dynprog_simd.c' || echo '$(srcdir)/'`dynprog_simd.c +@am__fastdepCC_TRUE@ $(AM_V_at)$(am__mv) $(DEPDIR)/gsnapl_avx512-dynprog_simd.Tpo $(DEPDIR)/gsnapl_avx512-dynprog_simd.Po +@AMDEP_TRUE@@am__fastdepCC_FALSE@ $(AM_V_CC)source='dynprog_simd.c' object='gsnapl_avx512-dynprog_simd.o' libtool=no @AMDEPBACKSLASH@ +@AMDEP_TRUE@@am__fastdepCC_FALSE@ DEPDIR=$(DEPDIR) $(CCDEPMODE) $(depcomp) @AMDEPBACKSLASH@ +@am__fastdepCC_FALSE@ $(AM_V_CC@am__nodep@)$(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) $(gsnapl_avx512_CFLAGS) $(CFLAGS) -c -o gsnapl_avx512-dynprog_simd.o `test -f 'dynprog_simd.c' || echo '$(srcdir)/'`dynprog_simd.c + +gsnapl_avx512-dynprog_simd.obj: dynprog_simd.c +@am__fastdepCC_TRUE@ $(AM_V_CC)$(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) $(gsnapl_avx512_CFLAGS) $(CFLAGS) -MT gsnapl_avx512-dynprog_simd.obj -MD -MP -MF $(DEPDIR)/gsnapl_avx512-dynprog_simd.Tpo -c -o gsnapl_avx512-dynprog_simd.obj `if test -f 'dynprog_simd.c'; then $(CYGPATH_W) 'dynprog_simd.c'; else $(CYGPATH_W) '$(srcdir)/dynprog_simd.c'; fi` +@am__fastdepCC_TRUE@ $(AM_V_at)$(am__mv) $(DEPDIR)/gsnapl_avx512-dynprog_simd.Tpo $(DEPDIR)/gsnapl_avx512-dynprog_simd.Po +@AMDEP_TRUE@@am__fastdepCC_FALSE@ $(AM_V_CC)source='dynprog_simd.c' object='gsnapl_avx512-dynprog_simd.obj' libtool=no @AMDEPBACKSLASH@ +@AMDEP_TRUE@@am__fastdepCC_FALSE@ DEPDIR=$(DEPDIR) $(CCDEPMODE) $(depcomp) @AMDEPBACKSLASH@ +@am__fastdepCC_FALSE@ $(AM_V_CC@am__nodep@)$(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) $(gsnapl_avx512_CFLAGS) $(CFLAGS) -c -o gsnapl_avx512-dynprog_simd.obj `if test -f 'dynprog_simd.c'; then $(CYGPATH_W) 'dynprog_simd.c'; else $(CYGPATH_W) '$(srcdir)/dynprog_simd.c'; fi` + +gsnapl_avx512-dynprog_single.o: dynprog_single.c +@am__fastdepCC_TRUE@ $(AM_V_CC)$(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) $(gsnapl_avx512_CFLAGS) $(CFLAGS) -MT gsnapl_avx512-dynprog_single.o -MD -MP -MF $(DEPDIR)/gsnapl_avx512-dynprog_single.Tpo -c -o gsnapl_avx512-dynprog_single.o `test -f 'dynprog_single.c' || echo '$(srcdir)/'`dynprog_single.c +@am__fastdepCC_TRUE@ $(AM_V_at)$(am__mv) $(DEPDIR)/gsnapl_avx512-dynprog_single.Tpo $(DEPDIR)/gsnapl_avx512-dynprog_single.Po +@AMDEP_TRUE@@am__fastdepCC_FALSE@ $(AM_V_CC)source='dynprog_single.c' object='gsnapl_avx512-dynprog_single.o' libtool=no @AMDEPBACKSLASH@ +@AMDEP_TRUE@@am__fastdepCC_FALSE@ DEPDIR=$(DEPDIR) $(CCDEPMODE) $(depcomp) @AMDEPBACKSLASH@ +@am__fastdepCC_FALSE@ $(AM_V_CC@am__nodep@)$(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) $(gsnapl_avx512_CFLAGS) $(CFLAGS) -c -o gsnapl_avx512-dynprog_single.o `test -f 'dynprog_single.c' || echo '$(srcdir)/'`dynprog_single.c + +gsnapl_avx512-dynprog_single.obj: dynprog_single.c +@am__fastdepCC_TRUE@ $(AM_V_CC)$(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) $(gsnapl_avx512_CFLAGS) $(CFLAGS) -MT gsnapl_avx512-dynprog_single.obj -MD -MP -MF $(DEPDIR)/gsnapl_avx512-dynprog_single.Tpo -c -o gsnapl_avx512-dynprog_single.obj `if test -f 'dynprog_single.c'; then $(CYGPATH_W) 'dynprog_single.c'; else $(CYGPATH_W) '$(srcdir)/dynprog_single.c'; fi` +@am__fastdepCC_TRUE@ $(AM_V_at)$(am__mv) $(DEPDIR)/gsnapl_avx512-dynprog_single.Tpo $(DEPDIR)/gsnapl_avx512-dynprog_single.Po +@AMDEP_TRUE@@am__fastdepCC_FALSE@ $(AM_V_CC)source='dynprog_single.c' object='gsnapl_avx512-dynprog_single.obj' libtool=no @AMDEPBACKSLASH@ +@AMDEP_TRUE@@am__fastdepCC_FALSE@ DEPDIR=$(DEPDIR) $(CCDEPMODE) $(depcomp) @AMDEPBACKSLASH@ +@am__fastdepCC_FALSE@ $(AM_V_CC@am__nodep@)$(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) $(gsnapl_avx512_CFLAGS) $(CFLAGS) -c -o gsnapl_avx512-dynprog_single.obj `if test -f 'dynprog_single.c'; then $(CYGPATH_W) 'dynprog_single.c'; else $(CYGPATH_W) '$(srcdir)/dynprog_single.c'; fi` + +gsnapl_avx512-dynprog_genome.o: dynprog_genome.c +@am__fastdepCC_TRUE@ $(AM_V_CC)$(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) $(gsnapl_avx512_CFLAGS) $(CFLAGS) -MT gsnapl_avx512-dynprog_genome.o -MD -MP -MF $(DEPDIR)/gsnapl_avx512-dynprog_genome.Tpo -c -o gsnapl_avx512-dynprog_genome.o `test -f 'dynprog_genome.c' || echo '$(srcdir)/'`dynprog_genome.c +@am__fastdepCC_TRUE@ $(AM_V_at)$(am__mv) $(DEPDIR)/gsnapl_avx512-dynprog_genome.Tpo $(DEPDIR)/gsnapl_avx512-dynprog_genome.Po +@AMDEP_TRUE@@am__fastdepCC_FALSE@ $(AM_V_CC)source='dynprog_genome.c' object='gsnapl_avx512-dynprog_genome.o' libtool=no @AMDEPBACKSLASH@ +@AMDEP_TRUE@@am__fastdepCC_FALSE@ DEPDIR=$(DEPDIR) $(CCDEPMODE) $(depcomp) @AMDEPBACKSLASH@ +@am__fastdepCC_FALSE@ $(AM_V_CC@am__nodep@)$(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) $(gsnapl_avx512_CFLAGS) $(CFLAGS) -c -o gsnapl_avx512-dynprog_genome.o `test -f 'dynprog_genome.c' || echo '$(srcdir)/'`dynprog_genome.c + +gsnapl_avx512-dynprog_genome.obj: dynprog_genome.c +@am__fastdepCC_TRUE@ $(AM_V_CC)$(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) $(gsnapl_avx512_CFLAGS) $(CFLAGS) -MT gsnapl_avx512-dynprog_genome.obj -MD -MP -MF $(DEPDIR)/gsnapl_avx512-dynprog_genome.Tpo -c -o gsnapl_avx512-dynprog_genome.obj `if test -f 'dynprog_genome.c'; then $(CYGPATH_W) 'dynprog_genome.c'; else $(CYGPATH_W) '$(srcdir)/dynprog_genome.c'; fi` +@am__fastdepCC_TRUE@ $(AM_V_at)$(am__mv) $(DEPDIR)/gsnapl_avx512-dynprog_genome.Tpo $(DEPDIR)/gsnapl_avx512-dynprog_genome.Po +@AMDEP_TRUE@@am__fastdepCC_FALSE@ $(AM_V_CC)source='dynprog_genome.c' object='gsnapl_avx512-dynprog_genome.obj' libtool=no @AMDEPBACKSLASH@ +@AMDEP_TRUE@@am__fastdepCC_FALSE@ DEPDIR=$(DEPDIR) $(CCDEPMODE) $(depcomp) @AMDEPBACKSLASH@ +@am__fastdepCC_FALSE@ $(AM_V_CC@am__nodep@)$(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) $(gsnapl_avx512_CFLAGS) $(CFLAGS) -c -o gsnapl_avx512-dynprog_genome.obj `if test -f 'dynprog_genome.c'; then $(CYGPATH_W) 'dynprog_genome.c'; else $(CYGPATH_W) '$(srcdir)/dynprog_genome.c'; fi` + +gsnapl_avx512-dynprog_cdna.o: dynprog_cdna.c +@am__fastdepCC_TRUE@ $(AM_V_CC)$(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) $(gsnapl_avx512_CFLAGS) $(CFLAGS) -MT gsnapl_avx512-dynprog_cdna.o -MD -MP -MF $(DEPDIR)/gsnapl_avx512-dynprog_cdna.Tpo -c -o gsnapl_avx512-dynprog_cdna.o `test -f 'dynprog_cdna.c' || echo '$(srcdir)/'`dynprog_cdna.c +@am__fastdepCC_TRUE@ $(AM_V_at)$(am__mv) $(DEPDIR)/gsnapl_avx512-dynprog_cdna.Tpo $(DEPDIR)/gsnapl_avx512-dynprog_cdna.Po +@AMDEP_TRUE@@am__fastdepCC_FALSE@ $(AM_V_CC)source='dynprog_cdna.c' object='gsnapl_avx512-dynprog_cdna.o' libtool=no @AMDEPBACKSLASH@ +@AMDEP_TRUE@@am__fastdepCC_FALSE@ DEPDIR=$(DEPDIR) $(CCDEPMODE) $(depcomp) @AMDEPBACKSLASH@ +@am__fastdepCC_FALSE@ $(AM_V_CC@am__nodep@)$(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) $(gsnapl_avx512_CFLAGS) $(CFLAGS) -c -o gsnapl_avx512-dynprog_cdna.o `test -f 'dynprog_cdna.c' || echo '$(srcdir)/'`dynprog_cdna.c + +gsnapl_avx512-dynprog_cdna.obj: dynprog_cdna.c +@am__fastdepCC_TRUE@ $(AM_V_CC)$(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) $(gsnapl_avx512_CFLAGS) $(CFLAGS) -MT gsnapl_avx512-dynprog_cdna.obj -MD -MP -MF $(DEPDIR)/gsnapl_avx512-dynprog_cdna.Tpo -c -o gsnapl_avx512-dynprog_cdna.obj `if test -f 'dynprog_cdna.c'; then $(CYGPATH_W) 'dynprog_cdna.c'; else $(CYGPATH_W) '$(srcdir)/dynprog_cdna.c'; fi` +@am__fastdepCC_TRUE@ $(AM_V_at)$(am__mv) $(DEPDIR)/gsnapl_avx512-dynprog_cdna.Tpo $(DEPDIR)/gsnapl_avx512-dynprog_cdna.Po +@AMDEP_TRUE@@am__fastdepCC_FALSE@ $(AM_V_CC)source='dynprog_cdna.c' object='gsnapl_avx512-dynprog_cdna.obj' libtool=no @AMDEPBACKSLASH@ +@AMDEP_TRUE@@am__fastdepCC_FALSE@ DEPDIR=$(DEPDIR) $(CCDEPMODE) $(depcomp) @AMDEPBACKSLASH@ +@am__fastdepCC_FALSE@ $(AM_V_CC@am__nodep@)$(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) $(gsnapl_avx512_CFLAGS) $(CFLAGS) -c -o gsnapl_avx512-dynprog_cdna.obj `if test -f 'dynprog_cdna.c'; then $(CYGPATH_W) 'dynprog_cdna.c'; else $(CYGPATH_W) '$(srcdir)/dynprog_cdna.c'; fi` + +gsnapl_avx512-dynprog_end.o: dynprog_end.c +@am__fastdepCC_TRUE@ $(AM_V_CC)$(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) $(gsnapl_avx512_CFLAGS) $(CFLAGS) -MT gsnapl_avx512-dynprog_end.o -MD -MP -MF $(DEPDIR)/gsnapl_avx512-dynprog_end.Tpo -c -o gsnapl_avx512-dynprog_end.o `test -f 'dynprog_end.c' || echo '$(srcdir)/'`dynprog_end.c +@am__fastdepCC_TRUE@ $(AM_V_at)$(am__mv) $(DEPDIR)/gsnapl_avx512-dynprog_end.Tpo $(DEPDIR)/gsnapl_avx512-dynprog_end.Po +@AMDEP_TRUE@@am__fastdepCC_FALSE@ $(AM_V_CC)source='dynprog_end.c' object='gsnapl_avx512-dynprog_end.o' libtool=no @AMDEPBACKSLASH@ +@AMDEP_TRUE@@am__fastdepCC_FALSE@ DEPDIR=$(DEPDIR) $(CCDEPMODE) $(depcomp) @AMDEPBACKSLASH@ +@am__fastdepCC_FALSE@ $(AM_V_CC@am__nodep@)$(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) $(gsnapl_avx512_CFLAGS) $(CFLAGS) -c -o gsnapl_avx512-dynprog_end.o `test -f 'dynprog_end.c' || echo '$(srcdir)/'`dynprog_end.c + +gsnapl_avx512-dynprog_end.obj: dynprog_end.c +@am__fastdepCC_TRUE@ $(AM_V_CC)$(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) $(gsnapl_avx512_CFLAGS) $(CFLAGS) -MT gsnapl_avx512-dynprog_end.obj -MD -MP -MF $(DEPDIR)/gsnapl_avx512-dynprog_end.Tpo -c -o gsnapl_avx512-dynprog_end.obj `if test -f 'dynprog_end.c'; then $(CYGPATH_W) 'dynprog_end.c'; else $(CYGPATH_W) '$(srcdir)/dynprog_end.c'; fi` +@am__fastdepCC_TRUE@ $(AM_V_at)$(am__mv) $(DEPDIR)/gsnapl_avx512-dynprog_end.Tpo $(DEPDIR)/gsnapl_avx512-dynprog_end.Po +@AMDEP_TRUE@@am__fastdepCC_FALSE@ $(AM_V_CC)source='dynprog_end.c' object='gsnapl_avx512-dynprog_end.obj' libtool=no @AMDEPBACKSLASH@ +@AMDEP_TRUE@@am__fastdepCC_FALSE@ DEPDIR=$(DEPDIR) $(CCDEPMODE) $(depcomp) @AMDEPBACKSLASH@ +@am__fastdepCC_FALSE@ $(AM_V_CC@am__nodep@)$(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) $(gsnapl_avx512_CFLAGS) $(CFLAGS) -c -o gsnapl_avx512-dynprog_end.obj `if test -f 'dynprog_end.c'; then $(CYGPATH_W) 'dynprog_end.c'; else $(CYGPATH_W) '$(srcdir)/dynprog_end.c'; fi` + +gsnapl_avx512-gbuffer.o: gbuffer.c +@am__fastdepCC_TRUE@ $(AM_V_CC)$(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) $(gsnapl_avx512_CFLAGS) $(CFLAGS) -MT gsnapl_avx512-gbuffer.o -MD -MP -MF $(DEPDIR)/gsnapl_avx512-gbuffer.Tpo -c -o gsnapl_avx512-gbuffer.o `test -f 'gbuffer.c' || echo '$(srcdir)/'`gbuffer.c +@am__fastdepCC_TRUE@ $(AM_V_at)$(am__mv) $(DEPDIR)/gsnapl_avx512-gbuffer.Tpo $(DEPDIR)/gsnapl_avx512-gbuffer.Po +@AMDEP_TRUE@@am__fastdepCC_FALSE@ $(AM_V_CC)source='gbuffer.c' object='gsnapl_avx512-gbuffer.o' libtool=no @AMDEPBACKSLASH@ +@AMDEP_TRUE@@am__fastdepCC_FALSE@ DEPDIR=$(DEPDIR) $(CCDEPMODE) $(depcomp) @AMDEPBACKSLASH@ +@am__fastdepCC_FALSE@ $(AM_V_CC@am__nodep@)$(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) $(gsnapl_avx512_CFLAGS) $(CFLAGS) -c -o gsnapl_avx512-gbuffer.o `test -f 'gbuffer.c' || echo '$(srcdir)/'`gbuffer.c + +gsnapl_avx512-gbuffer.obj: gbuffer.c +@am__fastdepCC_TRUE@ $(AM_V_CC)$(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) $(gsnapl_avx512_CFLAGS) $(CFLAGS) -MT gsnapl_avx512-gbuffer.obj -MD -MP -MF $(DEPDIR)/gsnapl_avx512-gbuffer.Tpo -c -o gsnapl_avx512-gbuffer.obj `if test -f 'gbuffer.c'; then $(CYGPATH_W) 'gbuffer.c'; else $(CYGPATH_W) '$(srcdir)/gbuffer.c'; fi` +@am__fastdepCC_TRUE@ $(AM_V_at)$(am__mv) $(DEPDIR)/gsnapl_avx512-gbuffer.Tpo $(DEPDIR)/gsnapl_avx512-gbuffer.Po +@AMDEP_TRUE@@am__fastdepCC_FALSE@ $(AM_V_CC)source='gbuffer.c' object='gsnapl_avx512-gbuffer.obj' libtool=no @AMDEPBACKSLASH@ +@AMDEP_TRUE@@am__fastdepCC_FALSE@ DEPDIR=$(DEPDIR) $(CCDEPMODE) $(depcomp) @AMDEPBACKSLASH@ +@am__fastdepCC_FALSE@ $(AM_V_CC@am__nodep@)$(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) $(gsnapl_avx512_CFLAGS) $(CFLAGS) -c -o gsnapl_avx512-gbuffer.obj `if test -f 'gbuffer.c'; then $(CYGPATH_W) 'gbuffer.c'; else $(CYGPATH_W) '$(srcdir)/gbuffer.c'; fi` + +gsnapl_avx512-doublelist.o: doublelist.c +@am__fastdepCC_TRUE@ $(AM_V_CC)$(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) $(gsnapl_avx512_CFLAGS) $(CFLAGS) -MT gsnapl_avx512-doublelist.o -MD -MP -MF $(DEPDIR)/gsnapl_avx512-doublelist.Tpo -c -o gsnapl_avx512-doublelist.o `test -f 'doublelist.c' || echo '$(srcdir)/'`doublelist.c +@am__fastdepCC_TRUE@ $(AM_V_at)$(am__mv) $(DEPDIR)/gsnapl_avx512-doublelist.Tpo $(DEPDIR)/gsnapl_avx512-doublelist.Po +@AMDEP_TRUE@@am__fastdepCC_FALSE@ $(AM_V_CC)source='doublelist.c' object='gsnapl_avx512-doublelist.o' libtool=no @AMDEPBACKSLASH@ +@AMDEP_TRUE@@am__fastdepCC_FALSE@ DEPDIR=$(DEPDIR) $(CCDEPMODE) $(depcomp) @AMDEPBACKSLASH@ +@am__fastdepCC_FALSE@ $(AM_V_CC@am__nodep@)$(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) $(gsnapl_avx512_CFLAGS) $(CFLAGS) -c -o gsnapl_avx512-doublelist.o `test -f 'doublelist.c' || echo '$(srcdir)/'`doublelist.c + +gsnapl_avx512-doublelist.obj: doublelist.c +@am__fastdepCC_TRUE@ $(AM_V_CC)$(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) $(gsnapl_avx512_CFLAGS) $(CFLAGS) -MT gsnapl_avx512-doublelist.obj -MD -MP -MF $(DEPDIR)/gsnapl_avx512-doublelist.Tpo -c -o gsnapl_avx512-doublelist.obj `if test -f 'doublelist.c'; then $(CYGPATH_W) 'doublelist.c'; else $(CYGPATH_W) '$(srcdir)/doublelist.c'; fi` +@am__fastdepCC_TRUE@ $(AM_V_at)$(am__mv) $(DEPDIR)/gsnapl_avx512-doublelist.Tpo $(DEPDIR)/gsnapl_avx512-doublelist.Po +@AMDEP_TRUE@@am__fastdepCC_FALSE@ $(AM_V_CC)source='doublelist.c' object='gsnapl_avx512-doublelist.obj' libtool=no @AMDEPBACKSLASH@ +@AMDEP_TRUE@@am__fastdepCC_FALSE@ DEPDIR=$(DEPDIR) $(CCDEPMODE) $(depcomp) @AMDEPBACKSLASH@ +@am__fastdepCC_FALSE@ $(AM_V_CC@am__nodep@)$(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) $(gsnapl_avx512_CFLAGS) $(CFLAGS) -c -o gsnapl_avx512-doublelist.obj `if test -f 'doublelist.c'; then $(CYGPATH_W) 'doublelist.c'; else $(CYGPATH_W) '$(srcdir)/doublelist.c'; fi` + +gsnapl_avx512-smooth.o: smooth.c +@am__fastdepCC_TRUE@ $(AM_V_CC)$(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) $(gsnapl_avx512_CFLAGS) $(CFLAGS) -MT gsnapl_avx512-smooth.o -MD -MP -MF $(DEPDIR)/gsnapl_avx512-smooth.Tpo -c -o gsnapl_avx512-smooth.o `test -f 'smooth.c' || echo '$(srcdir)/'`smooth.c +@am__fastdepCC_TRUE@ $(AM_V_at)$(am__mv) $(DEPDIR)/gsnapl_avx512-smooth.Tpo $(DEPDIR)/gsnapl_avx512-smooth.Po +@AMDEP_TRUE@@am__fastdepCC_FALSE@ $(AM_V_CC)source='smooth.c' object='gsnapl_avx512-smooth.o' libtool=no @AMDEPBACKSLASH@ +@AMDEP_TRUE@@am__fastdepCC_FALSE@ DEPDIR=$(DEPDIR) $(CCDEPMODE) $(depcomp) @AMDEPBACKSLASH@ +@am__fastdepCC_FALSE@ $(AM_V_CC@am__nodep@)$(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) $(gsnapl_avx512_CFLAGS) $(CFLAGS) -c -o gsnapl_avx512-smooth.o `test -f 'smooth.c' || echo '$(srcdir)/'`smooth.c + +gsnapl_avx512-smooth.obj: smooth.c +@am__fastdepCC_TRUE@ $(AM_V_CC)$(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) $(gsnapl_avx512_CFLAGS) $(CFLAGS) -MT gsnapl_avx512-smooth.obj -MD -MP -MF $(DEPDIR)/gsnapl_avx512-smooth.Tpo -c -o gsnapl_avx512-smooth.obj `if test -f 'smooth.c'; then $(CYGPATH_W) 'smooth.c'; else $(CYGPATH_W) '$(srcdir)/smooth.c'; fi` +@am__fastdepCC_TRUE@ $(AM_V_at)$(am__mv) $(DEPDIR)/gsnapl_avx512-smooth.Tpo $(DEPDIR)/gsnapl_avx512-smooth.Po +@AMDEP_TRUE@@am__fastdepCC_FALSE@ $(AM_V_CC)source='smooth.c' object='gsnapl_avx512-smooth.obj' libtool=no @AMDEPBACKSLASH@ +@AMDEP_TRUE@@am__fastdepCC_FALSE@ DEPDIR=$(DEPDIR) $(CCDEPMODE) $(depcomp) @AMDEPBACKSLASH@ +@am__fastdepCC_FALSE@ $(AM_V_CC@am__nodep@)$(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) $(gsnapl_avx512_CFLAGS) $(CFLAGS) -c -o gsnapl_avx512-smooth.obj `if test -f 'smooth.c'; then $(CYGPATH_W) 'smooth.c'; else $(CYGPATH_W) '$(srcdir)/smooth.c'; fi` + +gsnapl_avx512-chimera.o: chimera.c +@am__fastdepCC_TRUE@ $(AM_V_CC)$(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) $(gsnapl_avx512_CFLAGS) $(CFLAGS) -MT gsnapl_avx512-chimera.o -MD -MP -MF $(DEPDIR)/gsnapl_avx512-chimera.Tpo -c -o gsnapl_avx512-chimera.o `test -f 'chimera.c' || echo '$(srcdir)/'`chimera.c +@am__fastdepCC_TRUE@ $(AM_V_at)$(am__mv) $(DEPDIR)/gsnapl_avx512-chimera.Tpo $(DEPDIR)/gsnapl_avx512-chimera.Po +@AMDEP_TRUE@@am__fastdepCC_FALSE@ $(AM_V_CC)source='chimera.c' object='gsnapl_avx512-chimera.o' libtool=no @AMDEPBACKSLASH@ +@AMDEP_TRUE@@am__fastdepCC_FALSE@ DEPDIR=$(DEPDIR) $(CCDEPMODE) $(depcomp) @AMDEPBACKSLASH@ +@am__fastdepCC_FALSE@ $(AM_V_CC@am__nodep@)$(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) $(gsnapl_avx512_CFLAGS) $(CFLAGS) -c -o gsnapl_avx512-chimera.o `test -f 'chimera.c' || echo '$(srcdir)/'`chimera.c + +gsnapl_avx512-chimera.obj: chimera.c +@am__fastdepCC_TRUE@ $(AM_V_CC)$(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) $(gsnapl_avx512_CFLAGS) $(CFLAGS) -MT gsnapl_avx512-chimera.obj -MD -MP -MF $(DEPDIR)/gsnapl_avx512-chimera.Tpo -c -o gsnapl_avx512-chimera.obj `if test -f 'chimera.c'; then $(CYGPATH_W) 'chimera.c'; else $(CYGPATH_W) '$(srcdir)/chimera.c'; fi` +@am__fastdepCC_TRUE@ $(AM_V_at)$(am__mv) $(DEPDIR)/gsnapl_avx512-chimera.Tpo $(DEPDIR)/gsnapl_avx512-chimera.Po +@AMDEP_TRUE@@am__fastdepCC_FALSE@ $(AM_V_CC)source='chimera.c' object='gsnapl_avx512-chimera.obj' libtool=no @AMDEPBACKSLASH@ +@AMDEP_TRUE@@am__fastdepCC_FALSE@ DEPDIR=$(DEPDIR) $(CCDEPMODE) $(depcomp) @AMDEPBACKSLASH@ +@am__fastdepCC_FALSE@ $(AM_V_CC@am__nodep@)$(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) $(gsnapl_avx512_CFLAGS) $(CFLAGS) -c -o gsnapl_avx512-chimera.obj `if test -f 'chimera.c'; then $(CYGPATH_W) 'chimera.c'; else $(CYGPATH_W) '$(srcdir)/chimera.c'; fi` + +gsnapl_avx512-stage3.o: stage3.c +@am__fastdepCC_TRUE@ $(AM_V_CC)$(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) $(gsnapl_avx512_CFLAGS) $(CFLAGS) -MT gsnapl_avx512-stage3.o -MD -MP -MF $(DEPDIR)/gsnapl_avx512-stage3.Tpo -c -o gsnapl_avx512-stage3.o `test -f 'stage3.c' || echo '$(srcdir)/'`stage3.c +@am__fastdepCC_TRUE@ $(AM_V_at)$(am__mv) $(DEPDIR)/gsnapl_avx512-stage3.Tpo $(DEPDIR)/gsnapl_avx512-stage3.Po +@AMDEP_TRUE@@am__fastdepCC_FALSE@ $(AM_V_CC)source='stage3.c' object='gsnapl_avx512-stage3.o' libtool=no @AMDEPBACKSLASH@ +@AMDEP_TRUE@@am__fastdepCC_FALSE@ DEPDIR=$(DEPDIR) $(CCDEPMODE) $(depcomp) @AMDEPBACKSLASH@ +@am__fastdepCC_FALSE@ $(AM_V_CC@am__nodep@)$(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) $(gsnapl_avx512_CFLAGS) $(CFLAGS) -c -o gsnapl_avx512-stage3.o `test -f 'stage3.c' || echo '$(srcdir)/'`stage3.c + +gsnapl_avx512-stage3.obj: stage3.c +@am__fastdepCC_TRUE@ $(AM_V_CC)$(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) $(gsnapl_avx512_CFLAGS) $(CFLAGS) -MT gsnapl_avx512-stage3.obj -MD -MP -MF $(DEPDIR)/gsnapl_avx512-stage3.Tpo -c -o gsnapl_avx512-stage3.obj `if test -f 'stage3.c'; then $(CYGPATH_W) 'stage3.c'; else $(CYGPATH_W) '$(srcdir)/stage3.c'; fi` +@am__fastdepCC_TRUE@ $(AM_V_at)$(am__mv) $(DEPDIR)/gsnapl_avx512-stage3.Tpo $(DEPDIR)/gsnapl_avx512-stage3.Po +@AMDEP_TRUE@@am__fastdepCC_FALSE@ $(AM_V_CC)source='stage3.c' object='gsnapl_avx512-stage3.obj' libtool=no @AMDEPBACKSLASH@ +@AMDEP_TRUE@@am__fastdepCC_FALSE@ DEPDIR=$(DEPDIR) $(CCDEPMODE) $(depcomp) @AMDEPBACKSLASH@ +@am__fastdepCC_FALSE@ $(AM_V_CC@am__nodep@)$(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) $(gsnapl_avx512_CFLAGS) $(CFLAGS) -c -o gsnapl_avx512-stage3.obj `if test -f 'stage3.c'; then $(CYGPATH_W) 'stage3.c'; else $(CYGPATH_W) '$(srcdir)/stage3.c'; fi` + +gsnapl_avx512-splicestringpool.o: splicestringpool.c +@am__fastdepCC_TRUE@ $(AM_V_CC)$(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) $(gsnapl_avx512_CFLAGS) $(CFLAGS) -MT gsnapl_avx512-splicestringpool.o -MD -MP -MF $(DEPDIR)/gsnapl_avx512-splicestringpool.Tpo -c -o gsnapl_avx512-splicestringpool.o `test -f 'splicestringpool.c' || echo '$(srcdir)/'`splicestringpool.c +@am__fastdepCC_TRUE@ $(AM_V_at)$(am__mv) $(DEPDIR)/gsnapl_avx512-splicestringpool.Tpo $(DEPDIR)/gsnapl_avx512-splicestringpool.Po +@AMDEP_TRUE@@am__fastdepCC_FALSE@ $(AM_V_CC)source='splicestringpool.c' object='gsnapl_avx512-splicestringpool.o' libtool=no @AMDEPBACKSLASH@ +@AMDEP_TRUE@@am__fastdepCC_FALSE@ DEPDIR=$(DEPDIR) $(CCDEPMODE) $(depcomp) @AMDEPBACKSLASH@ +@am__fastdepCC_FALSE@ $(AM_V_CC@am__nodep@)$(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) $(gsnapl_avx512_CFLAGS) $(CFLAGS) -c -o gsnapl_avx512-splicestringpool.o `test -f 'splicestringpool.c' || echo '$(srcdir)/'`splicestringpool.c + +gsnapl_avx512-splicestringpool.obj: splicestringpool.c +@am__fastdepCC_TRUE@ $(AM_V_CC)$(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) $(gsnapl_avx512_CFLAGS) $(CFLAGS) -MT gsnapl_avx512-splicestringpool.obj -MD -MP -MF $(DEPDIR)/gsnapl_avx512-splicestringpool.Tpo -c -o gsnapl_avx512-splicestringpool.obj `if test -f 'splicestringpool.c'; then $(CYGPATH_W) 'splicestringpool.c'; else $(CYGPATH_W) '$(srcdir)/splicestringpool.c'; fi` +@am__fastdepCC_TRUE@ $(AM_V_at)$(am__mv) $(DEPDIR)/gsnapl_avx512-splicestringpool.Tpo $(DEPDIR)/gsnapl_avx512-splicestringpool.Po +@AMDEP_TRUE@@am__fastdepCC_FALSE@ $(AM_V_CC)source='splicestringpool.c' object='gsnapl_avx512-splicestringpool.obj' libtool=no @AMDEPBACKSLASH@ +@AMDEP_TRUE@@am__fastdepCC_FALSE@ DEPDIR=$(DEPDIR) $(CCDEPMODE) $(depcomp) @AMDEPBACKSLASH@ +@am__fastdepCC_FALSE@ $(AM_V_CC@am__nodep@)$(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) $(gsnapl_avx512_CFLAGS) $(CFLAGS) -c -o gsnapl_avx512-splicestringpool.obj `if test -f 'splicestringpool.c'; then $(CYGPATH_W) 'splicestringpool.c'; else $(CYGPATH_W) '$(srcdir)/splicestringpool.c'; fi` + +gsnapl_avx512-splicetrie_build.o: splicetrie_build.c +@am__fastdepCC_TRUE@ $(AM_V_CC)$(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) $(gsnapl_avx512_CFLAGS) $(CFLAGS) -MT gsnapl_avx512-splicetrie_build.o -MD -MP -MF $(DEPDIR)/gsnapl_avx512-splicetrie_build.Tpo -c -o gsnapl_avx512-splicetrie_build.o `test -f 'splicetrie_build.c' || echo '$(srcdir)/'`splicetrie_build.c +@am__fastdepCC_TRUE@ $(AM_V_at)$(am__mv) $(DEPDIR)/gsnapl_avx512-splicetrie_build.Tpo $(DEPDIR)/gsnapl_avx512-splicetrie_build.Po +@AMDEP_TRUE@@am__fastdepCC_FALSE@ $(AM_V_CC)source='splicetrie_build.c' object='gsnapl_avx512-splicetrie_build.o' libtool=no @AMDEPBACKSLASH@ +@AMDEP_TRUE@@am__fastdepCC_FALSE@ DEPDIR=$(DEPDIR) $(CCDEPMODE) $(depcomp) @AMDEPBACKSLASH@ +@am__fastdepCC_FALSE@ $(AM_V_CC@am__nodep@)$(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) $(gsnapl_avx512_CFLAGS) $(CFLAGS) -c -o gsnapl_avx512-splicetrie_build.o `test -f 'splicetrie_build.c' || echo '$(srcdir)/'`splicetrie_build.c + +gsnapl_avx512-splicetrie_build.obj: splicetrie_build.c +@am__fastdepCC_TRUE@ $(AM_V_CC)$(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) $(gsnapl_avx512_CFLAGS) $(CFLAGS) -MT gsnapl_avx512-splicetrie_build.obj -MD -MP -MF $(DEPDIR)/gsnapl_avx512-splicetrie_build.Tpo -c -o gsnapl_avx512-splicetrie_build.obj `if test -f 'splicetrie_build.c'; then $(CYGPATH_W) 'splicetrie_build.c'; else $(CYGPATH_W) '$(srcdir)/splicetrie_build.c'; fi` +@am__fastdepCC_TRUE@ $(AM_V_at)$(am__mv) $(DEPDIR)/gsnapl_avx512-splicetrie_build.Tpo $(DEPDIR)/gsnapl_avx512-splicetrie_build.Po +@AMDEP_TRUE@@am__fastdepCC_FALSE@ $(AM_V_CC)source='splicetrie_build.c' object='gsnapl_avx512-splicetrie_build.obj' libtool=no @AMDEPBACKSLASH@ +@AMDEP_TRUE@@am__fastdepCC_FALSE@ DEPDIR=$(DEPDIR) $(CCDEPMODE) $(depcomp) @AMDEPBACKSLASH@ +@am__fastdepCC_FALSE@ $(AM_V_CC@am__nodep@)$(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) $(gsnapl_avx512_CFLAGS) $(CFLAGS) -c -o gsnapl_avx512-splicetrie_build.obj `if test -f 'splicetrie_build.c'; then $(CYGPATH_W) 'splicetrie_build.c'; else $(CYGPATH_W) '$(srcdir)/splicetrie_build.c'; fi` + +gsnapl_avx512-splicetrie.o: splicetrie.c +@am__fastdepCC_TRUE@ $(AM_V_CC)$(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) $(gsnapl_avx512_CFLAGS) $(CFLAGS) -MT gsnapl_avx512-splicetrie.o -MD -MP -MF $(DEPDIR)/gsnapl_avx512-splicetrie.Tpo -c -o gsnapl_avx512-splicetrie.o `test -f 'splicetrie.c' || echo '$(srcdir)/'`splicetrie.c +@am__fastdepCC_TRUE@ $(AM_V_at)$(am__mv) $(DEPDIR)/gsnapl_avx512-splicetrie.Tpo $(DEPDIR)/gsnapl_avx512-splicetrie.Po +@AMDEP_TRUE@@am__fastdepCC_FALSE@ $(AM_V_CC)source='splicetrie.c' object='gsnapl_avx512-splicetrie.o' libtool=no @AMDEPBACKSLASH@ +@AMDEP_TRUE@@am__fastdepCC_FALSE@ DEPDIR=$(DEPDIR) $(CCDEPMODE) $(depcomp) @AMDEPBACKSLASH@ +@am__fastdepCC_FALSE@ $(AM_V_CC@am__nodep@)$(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) $(gsnapl_avx512_CFLAGS) $(CFLAGS) -c -o gsnapl_avx512-splicetrie.o `test -f 'splicetrie.c' || echo '$(srcdir)/'`splicetrie.c + +gsnapl_avx512-splicetrie.obj: splicetrie.c +@am__fastdepCC_TRUE@ $(AM_V_CC)$(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) $(gsnapl_avx512_CFLAGS) $(CFLAGS) -MT gsnapl_avx512-splicetrie.obj -MD -MP -MF $(DEPDIR)/gsnapl_avx512-splicetrie.Tpo -c -o gsnapl_avx512-splicetrie.obj `if test -f 'splicetrie.c'; then $(CYGPATH_W) 'splicetrie.c'; else $(CYGPATH_W) '$(srcdir)/splicetrie.c'; fi` +@am__fastdepCC_TRUE@ $(AM_V_at)$(am__mv) $(DEPDIR)/gsnapl_avx512-splicetrie.Tpo $(DEPDIR)/gsnapl_avx512-splicetrie.Po +@AMDEP_TRUE@@am__fastdepCC_FALSE@ $(AM_V_CC)source='splicetrie.c' object='gsnapl_avx512-splicetrie.obj' libtool=no @AMDEPBACKSLASH@ +@AMDEP_TRUE@@am__fastdepCC_FALSE@ DEPDIR=$(DEPDIR) $(CCDEPMODE) $(depcomp) @AMDEPBACKSLASH@ +@am__fastdepCC_FALSE@ $(AM_V_CC@am__nodep@)$(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) $(gsnapl_avx512_CFLAGS) $(CFLAGS) -c -o gsnapl_avx512-splicetrie.obj `if test -f 'splicetrie.c'; then $(CYGPATH_W) 'splicetrie.c'; else $(CYGPATH_W) '$(srcdir)/splicetrie.c'; fi` + +gsnapl_avx512-splice.o: splice.c +@am__fastdepCC_TRUE@ $(AM_V_CC)$(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) $(gsnapl_avx512_CFLAGS) $(CFLAGS) -MT gsnapl_avx512-splice.o -MD -MP -MF $(DEPDIR)/gsnapl_avx512-splice.Tpo -c -o gsnapl_avx512-splice.o `test -f 'splice.c' || echo '$(srcdir)/'`splice.c +@am__fastdepCC_TRUE@ $(AM_V_at)$(am__mv) $(DEPDIR)/gsnapl_avx512-splice.Tpo $(DEPDIR)/gsnapl_avx512-splice.Po +@AMDEP_TRUE@@am__fastdepCC_FALSE@ $(AM_V_CC)source='splice.c' object='gsnapl_avx512-splice.o' libtool=no @AMDEPBACKSLASH@ +@AMDEP_TRUE@@am__fastdepCC_FALSE@ DEPDIR=$(DEPDIR) $(CCDEPMODE) $(depcomp) @AMDEPBACKSLASH@ +@am__fastdepCC_FALSE@ $(AM_V_CC@am__nodep@)$(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) $(gsnapl_avx512_CFLAGS) $(CFLAGS) -c -o gsnapl_avx512-splice.o `test -f 'splice.c' || echo '$(srcdir)/'`splice.c + +gsnapl_avx512-splice.obj: splice.c +@am__fastdepCC_TRUE@ $(AM_V_CC)$(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) $(gsnapl_avx512_CFLAGS) $(CFLAGS) -MT gsnapl_avx512-splice.obj -MD -MP -MF $(DEPDIR)/gsnapl_avx512-splice.Tpo -c -o gsnapl_avx512-splice.obj `if test -f 'splice.c'; then $(CYGPATH_W) 'splice.c'; else $(CYGPATH_W) '$(srcdir)/splice.c'; fi` +@am__fastdepCC_TRUE@ $(AM_V_at)$(am__mv) $(DEPDIR)/gsnapl_avx512-splice.Tpo $(DEPDIR)/gsnapl_avx512-splice.Po +@AMDEP_TRUE@@am__fastdepCC_FALSE@ $(AM_V_CC)source='splice.c' object='gsnapl_avx512-splice.obj' libtool=no @AMDEPBACKSLASH@ +@AMDEP_TRUE@@am__fastdepCC_FALSE@ DEPDIR=$(DEPDIR) $(CCDEPMODE) $(depcomp) @AMDEPBACKSLASH@ +@am__fastdepCC_FALSE@ $(AM_V_CC@am__nodep@)$(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) $(gsnapl_avx512_CFLAGS) $(CFLAGS) -c -o gsnapl_avx512-splice.obj `if test -f 'splice.c'; then $(CYGPATH_W) 'splice.c'; else $(CYGPATH_W) '$(srcdir)/splice.c'; fi` + +gsnapl_avx512-indel.o: indel.c +@am__fastdepCC_TRUE@ $(AM_V_CC)$(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) $(gsnapl_avx512_CFLAGS) $(CFLAGS) -MT gsnapl_avx512-indel.o -MD -MP -MF $(DEPDIR)/gsnapl_avx512-indel.Tpo -c -o gsnapl_avx512-indel.o `test -f 'indel.c' || echo '$(srcdir)/'`indel.c +@am__fastdepCC_TRUE@ $(AM_V_at)$(am__mv) $(DEPDIR)/gsnapl_avx512-indel.Tpo $(DEPDIR)/gsnapl_avx512-indel.Po +@AMDEP_TRUE@@am__fastdepCC_FALSE@ $(AM_V_CC)source='indel.c' object='gsnapl_avx512-indel.o' libtool=no @AMDEPBACKSLASH@ +@AMDEP_TRUE@@am__fastdepCC_FALSE@ DEPDIR=$(DEPDIR) $(CCDEPMODE) $(depcomp) @AMDEPBACKSLASH@ +@am__fastdepCC_FALSE@ $(AM_V_CC@am__nodep@)$(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) $(gsnapl_avx512_CFLAGS) $(CFLAGS) -c -o gsnapl_avx512-indel.o `test -f 'indel.c' || echo '$(srcdir)/'`indel.c + +gsnapl_avx512-indel.obj: indel.c +@am__fastdepCC_TRUE@ $(AM_V_CC)$(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) $(gsnapl_avx512_CFLAGS) $(CFLAGS) -MT gsnapl_avx512-indel.obj -MD -MP -MF $(DEPDIR)/gsnapl_avx512-indel.Tpo -c -o gsnapl_avx512-indel.obj `if test -f 'indel.c'; then $(CYGPATH_W) 'indel.c'; else $(CYGPATH_W) '$(srcdir)/indel.c'; fi` +@am__fastdepCC_TRUE@ $(AM_V_at)$(am__mv) $(DEPDIR)/gsnapl_avx512-indel.Tpo $(DEPDIR)/gsnapl_avx512-indel.Po +@AMDEP_TRUE@@am__fastdepCC_FALSE@ $(AM_V_CC)source='indel.c' object='gsnapl_avx512-indel.obj' libtool=no @AMDEPBACKSLASH@ +@AMDEP_TRUE@@am__fastdepCC_FALSE@ DEPDIR=$(DEPDIR) $(CCDEPMODE) $(depcomp) @AMDEPBACKSLASH@ +@am__fastdepCC_FALSE@ $(AM_V_CC@am__nodep@)$(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) $(gsnapl_avx512_CFLAGS) $(CFLAGS) -c -o gsnapl_avx512-indel.obj `if test -f 'indel.c'; then $(CYGPATH_W) 'indel.c'; else $(CYGPATH_W) '$(srcdir)/indel.c'; fi` + +gsnapl_avx512-bitpack64-access.o: bitpack64-access.c +@am__fastdepCC_TRUE@ $(AM_V_CC)$(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) $(gsnapl_avx512_CFLAGS) $(CFLAGS) -MT gsnapl_avx512-bitpack64-access.o -MD -MP -MF $(DEPDIR)/gsnapl_avx512-bitpack64-access.Tpo -c -o gsnapl_avx512-bitpack64-access.o `test -f 'bitpack64-access.c' || echo '$(srcdir)/'`bitpack64-access.c +@am__fastdepCC_TRUE@ $(AM_V_at)$(am__mv) $(DEPDIR)/gsnapl_avx512-bitpack64-access.Tpo $(DEPDIR)/gsnapl_avx512-bitpack64-access.Po +@AMDEP_TRUE@@am__fastdepCC_FALSE@ $(AM_V_CC)source='bitpack64-access.c' object='gsnapl_avx512-bitpack64-access.o' libtool=no @AMDEPBACKSLASH@ +@AMDEP_TRUE@@am__fastdepCC_FALSE@ DEPDIR=$(DEPDIR) $(CCDEPMODE) $(depcomp) @AMDEPBACKSLASH@ +@am__fastdepCC_FALSE@ $(AM_V_CC@am__nodep@)$(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) $(gsnapl_avx512_CFLAGS) $(CFLAGS) -c -o gsnapl_avx512-bitpack64-access.o `test -f 'bitpack64-access.c' || echo '$(srcdir)/'`bitpack64-access.c + +gsnapl_avx512-bitpack64-access.obj: bitpack64-access.c +@am__fastdepCC_TRUE@ $(AM_V_CC)$(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) $(gsnapl_avx512_CFLAGS) $(CFLAGS) -MT gsnapl_avx512-bitpack64-access.obj -MD -MP -MF $(DEPDIR)/gsnapl_avx512-bitpack64-access.Tpo -c -o gsnapl_avx512-bitpack64-access.obj `if test -f 'bitpack64-access.c'; then $(CYGPATH_W) 'bitpack64-access.c'; else $(CYGPATH_W) '$(srcdir)/bitpack64-access.c'; fi` +@am__fastdepCC_TRUE@ $(AM_V_at)$(am__mv) $(DEPDIR)/gsnapl_avx512-bitpack64-access.Tpo $(DEPDIR)/gsnapl_avx512-bitpack64-access.Po +@AMDEP_TRUE@@am__fastdepCC_FALSE@ $(AM_V_CC)source='bitpack64-access.c' object='gsnapl_avx512-bitpack64-access.obj' libtool=no @AMDEPBACKSLASH@ +@AMDEP_TRUE@@am__fastdepCC_FALSE@ DEPDIR=$(DEPDIR) $(CCDEPMODE) $(depcomp) @AMDEPBACKSLASH@ +@am__fastdepCC_FALSE@ $(AM_V_CC@am__nodep@)$(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) $(gsnapl_avx512_CFLAGS) $(CFLAGS) -c -o gsnapl_avx512-bitpack64-access.obj `if test -f 'bitpack64-access.c'; then $(CYGPATH_W) 'bitpack64-access.c'; else $(CYGPATH_W) '$(srcdir)/bitpack64-access.c'; fi` + +gsnapl_avx512-merge-heap.o: merge-heap.c +@am__fastdepCC_TRUE@ $(AM_V_CC)$(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) $(gsnapl_avx512_CFLAGS) $(CFLAGS) -MT gsnapl_avx512-merge-heap.o -MD -MP -MF $(DEPDIR)/gsnapl_avx512-merge-heap.Tpo -c -o gsnapl_avx512-merge-heap.o `test -f 'merge-heap.c' || echo '$(srcdir)/'`merge-heap.c +@am__fastdepCC_TRUE@ $(AM_V_at)$(am__mv) $(DEPDIR)/gsnapl_avx512-merge-heap.Tpo $(DEPDIR)/gsnapl_avx512-merge-heap.Po +@AMDEP_TRUE@@am__fastdepCC_FALSE@ $(AM_V_CC)source='merge-heap.c' object='gsnapl_avx512-merge-heap.o' libtool=no @AMDEPBACKSLASH@ +@AMDEP_TRUE@@am__fastdepCC_FALSE@ DEPDIR=$(DEPDIR) $(CCDEPMODE) $(depcomp) @AMDEPBACKSLASH@ +@am__fastdepCC_FALSE@ $(AM_V_CC@am__nodep@)$(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) $(gsnapl_avx512_CFLAGS) $(CFLAGS) -c -o gsnapl_avx512-merge-heap.o `test -f 'merge-heap.c' || echo '$(srcdir)/'`merge-heap.c + +gsnapl_avx512-merge-heap.obj: merge-heap.c +@am__fastdepCC_TRUE@ $(AM_V_CC)$(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) $(gsnapl_avx512_CFLAGS) $(CFLAGS) -MT gsnapl_avx512-merge-heap.obj -MD -MP -MF $(DEPDIR)/gsnapl_avx512-merge-heap.Tpo -c -o gsnapl_avx512-merge-heap.obj `if test -f 'merge-heap.c'; then $(CYGPATH_W) 'merge-heap.c'; else $(CYGPATH_W) '$(srcdir)/merge-heap.c'; fi` +@am__fastdepCC_TRUE@ $(AM_V_at)$(am__mv) $(DEPDIR)/gsnapl_avx512-merge-heap.Tpo $(DEPDIR)/gsnapl_avx512-merge-heap.Po +@AMDEP_TRUE@@am__fastdepCC_FALSE@ $(AM_V_CC)source='merge-heap.c' object='gsnapl_avx512-merge-heap.obj' libtool=no @AMDEPBACKSLASH@ +@AMDEP_TRUE@@am__fastdepCC_FALSE@ DEPDIR=$(DEPDIR) $(CCDEPMODE) $(depcomp) @AMDEPBACKSLASH@ +@am__fastdepCC_FALSE@ $(AM_V_CC@am__nodep@)$(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) $(gsnapl_avx512_CFLAGS) $(CFLAGS) -c -o gsnapl_avx512-merge-heap.obj `if test -f 'merge-heap.c'; then $(CYGPATH_W) 'merge-heap.c'; else $(CYGPATH_W) '$(srcdir)/merge-heap.c'; fi` + +gsnapl_avx512-stage1hr.o: stage1hr.c +@am__fastdepCC_TRUE@ $(AM_V_CC)$(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) $(gsnapl_avx512_CFLAGS) $(CFLAGS) -MT gsnapl_avx512-stage1hr.o -MD -MP -MF $(DEPDIR)/gsnapl_avx512-stage1hr.Tpo -c -o gsnapl_avx512-stage1hr.o `test -f 'stage1hr.c' || echo '$(srcdir)/'`stage1hr.c +@am__fastdepCC_TRUE@ $(AM_V_at)$(am__mv) $(DEPDIR)/gsnapl_avx512-stage1hr.Tpo $(DEPDIR)/gsnapl_avx512-stage1hr.Po +@AMDEP_TRUE@@am__fastdepCC_FALSE@ $(AM_V_CC)source='stage1hr.c' object='gsnapl_avx512-stage1hr.o' libtool=no @AMDEPBACKSLASH@ +@AMDEP_TRUE@@am__fastdepCC_FALSE@ DEPDIR=$(DEPDIR) $(CCDEPMODE) $(depcomp) @AMDEPBACKSLASH@ +@am__fastdepCC_FALSE@ $(AM_V_CC@am__nodep@)$(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) $(gsnapl_avx512_CFLAGS) $(CFLAGS) -c -o gsnapl_avx512-stage1hr.o `test -f 'stage1hr.c' || echo '$(srcdir)/'`stage1hr.c + +gsnapl_avx512-stage1hr.obj: stage1hr.c +@am__fastdepCC_TRUE@ $(AM_V_CC)$(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) $(gsnapl_avx512_CFLAGS) $(CFLAGS) -MT gsnapl_avx512-stage1hr.obj -MD -MP -MF $(DEPDIR)/gsnapl_avx512-stage1hr.Tpo -c -o gsnapl_avx512-stage1hr.obj `if test -f 'stage1hr.c'; then $(CYGPATH_W) 'stage1hr.c'; else $(CYGPATH_W) '$(srcdir)/stage1hr.c'; fi` +@am__fastdepCC_TRUE@ $(AM_V_at)$(am__mv) $(DEPDIR)/gsnapl_avx512-stage1hr.Tpo $(DEPDIR)/gsnapl_avx512-stage1hr.Po +@AMDEP_TRUE@@am__fastdepCC_FALSE@ $(AM_V_CC)source='stage1hr.c' object='gsnapl_avx512-stage1hr.obj' libtool=no @AMDEPBACKSLASH@ +@AMDEP_TRUE@@am__fastdepCC_FALSE@ DEPDIR=$(DEPDIR) $(CCDEPMODE) $(depcomp) @AMDEPBACKSLASH@ +@am__fastdepCC_FALSE@ $(AM_V_CC@am__nodep@)$(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) $(gsnapl_avx512_CFLAGS) $(CFLAGS) -c -o gsnapl_avx512-stage1hr.obj `if test -f 'stage1hr.c'; then $(CYGPATH_W) 'stage1hr.c'; else $(CYGPATH_W) '$(srcdir)/stage1hr.c'; fi` + +gsnapl_avx512-request.o: request.c +@am__fastdepCC_TRUE@ $(AM_V_CC)$(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) $(gsnapl_avx512_CFLAGS) $(CFLAGS) -MT gsnapl_avx512-request.o -MD -MP -MF $(DEPDIR)/gsnapl_avx512-request.Tpo -c -o gsnapl_avx512-request.o `test -f 'request.c' || echo '$(srcdir)/'`request.c +@am__fastdepCC_TRUE@ $(AM_V_at)$(am__mv) $(DEPDIR)/gsnapl_avx512-request.Tpo $(DEPDIR)/gsnapl_avx512-request.Po +@AMDEP_TRUE@@am__fastdepCC_FALSE@ $(AM_V_CC)source='request.c' object='gsnapl_avx512-request.o' libtool=no @AMDEPBACKSLASH@ +@AMDEP_TRUE@@am__fastdepCC_FALSE@ DEPDIR=$(DEPDIR) $(CCDEPMODE) $(depcomp) @AMDEPBACKSLASH@ +@am__fastdepCC_FALSE@ $(AM_V_CC@am__nodep@)$(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) $(gsnapl_avx512_CFLAGS) $(CFLAGS) -c -o gsnapl_avx512-request.o `test -f 'request.c' || echo '$(srcdir)/'`request.c + +gsnapl_avx512-request.obj: request.c +@am__fastdepCC_TRUE@ $(AM_V_CC)$(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) $(gsnapl_avx512_CFLAGS) $(CFLAGS) -MT gsnapl_avx512-request.obj -MD -MP -MF $(DEPDIR)/gsnapl_avx512-request.Tpo -c -o gsnapl_avx512-request.obj `if test -f 'request.c'; then $(CYGPATH_W) 'request.c'; else $(CYGPATH_W) '$(srcdir)/request.c'; fi` +@am__fastdepCC_TRUE@ $(AM_V_at)$(am__mv) $(DEPDIR)/gsnapl_avx512-request.Tpo $(DEPDIR)/gsnapl_avx512-request.Po +@AMDEP_TRUE@@am__fastdepCC_FALSE@ $(AM_V_CC)source='request.c' object='gsnapl_avx512-request.obj' libtool=no @AMDEPBACKSLASH@ +@AMDEP_TRUE@@am__fastdepCC_FALSE@ DEPDIR=$(DEPDIR) $(CCDEPMODE) $(depcomp) @AMDEPBACKSLASH@ +@am__fastdepCC_FALSE@ $(AM_V_CC@am__nodep@)$(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) $(gsnapl_avx512_CFLAGS) $(CFLAGS) -c -o gsnapl_avx512-request.obj `if test -f 'request.c'; then $(CYGPATH_W) 'request.c'; else $(CYGPATH_W) '$(srcdir)/request.c'; fi` + +gsnapl_avx512-resulthr.o: resulthr.c +@am__fastdepCC_TRUE@ $(AM_V_CC)$(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) $(gsnapl_avx512_CFLAGS) $(CFLAGS) -MT gsnapl_avx512-resulthr.o -MD -MP -MF $(DEPDIR)/gsnapl_avx512-resulthr.Tpo -c -o gsnapl_avx512-resulthr.o `test -f 'resulthr.c' || echo '$(srcdir)/'`resulthr.c +@am__fastdepCC_TRUE@ $(AM_V_at)$(am__mv) $(DEPDIR)/gsnapl_avx512-resulthr.Tpo $(DEPDIR)/gsnapl_avx512-resulthr.Po +@AMDEP_TRUE@@am__fastdepCC_FALSE@ $(AM_V_CC)source='resulthr.c' object='gsnapl_avx512-resulthr.o' libtool=no @AMDEPBACKSLASH@ +@AMDEP_TRUE@@am__fastdepCC_FALSE@ DEPDIR=$(DEPDIR) $(CCDEPMODE) $(depcomp) @AMDEPBACKSLASH@ +@am__fastdepCC_FALSE@ $(AM_V_CC@am__nodep@)$(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) $(gsnapl_avx512_CFLAGS) $(CFLAGS) -c -o gsnapl_avx512-resulthr.o `test -f 'resulthr.c' || echo '$(srcdir)/'`resulthr.c + +gsnapl_avx512-resulthr.obj: resulthr.c +@am__fastdepCC_TRUE@ $(AM_V_CC)$(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) $(gsnapl_avx512_CFLAGS) $(CFLAGS) -MT gsnapl_avx512-resulthr.obj -MD -MP -MF $(DEPDIR)/gsnapl_avx512-resulthr.Tpo -c -o gsnapl_avx512-resulthr.obj `if test -f 'resulthr.c'; then $(CYGPATH_W) 'resulthr.c'; else $(CYGPATH_W) '$(srcdir)/resulthr.c'; fi` +@am__fastdepCC_TRUE@ $(AM_V_at)$(am__mv) $(DEPDIR)/gsnapl_avx512-resulthr.Tpo $(DEPDIR)/gsnapl_avx512-resulthr.Po +@AMDEP_TRUE@@am__fastdepCC_FALSE@ $(AM_V_CC)source='resulthr.c' object='gsnapl_avx512-resulthr.obj' libtool=no @AMDEPBACKSLASH@ +@AMDEP_TRUE@@am__fastdepCC_FALSE@ DEPDIR=$(DEPDIR) $(CCDEPMODE) $(depcomp) @AMDEPBACKSLASH@ +@am__fastdepCC_FALSE@ $(AM_V_CC@am__nodep@)$(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) $(gsnapl_avx512_CFLAGS) $(CFLAGS) -c -o gsnapl_avx512-resulthr.obj `if test -f 'resulthr.c'; then $(CYGPATH_W) 'resulthr.c'; else $(CYGPATH_W) '$(srcdir)/resulthr.c'; fi` + +gsnapl_avx512-output.o: output.c +@am__fastdepCC_TRUE@ $(AM_V_CC)$(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) $(gsnapl_avx512_CFLAGS) $(CFLAGS) -MT gsnapl_avx512-output.o -MD -MP -MF $(DEPDIR)/gsnapl_avx512-output.Tpo -c -o gsnapl_avx512-output.o `test -f 'output.c' || echo '$(srcdir)/'`output.c +@am__fastdepCC_TRUE@ $(AM_V_at)$(am__mv) $(DEPDIR)/gsnapl_avx512-output.Tpo $(DEPDIR)/gsnapl_avx512-output.Po +@AMDEP_TRUE@@am__fastdepCC_FALSE@ $(AM_V_CC)source='output.c' object='gsnapl_avx512-output.o' libtool=no @AMDEPBACKSLASH@ +@AMDEP_TRUE@@am__fastdepCC_FALSE@ DEPDIR=$(DEPDIR) $(CCDEPMODE) $(depcomp) @AMDEPBACKSLASH@ +@am__fastdepCC_FALSE@ $(AM_V_CC@am__nodep@)$(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) $(gsnapl_avx512_CFLAGS) $(CFLAGS) -c -o gsnapl_avx512-output.o `test -f 'output.c' || echo '$(srcdir)/'`output.c + +gsnapl_avx512-output.obj: output.c +@am__fastdepCC_TRUE@ $(AM_V_CC)$(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) $(gsnapl_avx512_CFLAGS) $(CFLAGS) -MT gsnapl_avx512-output.obj -MD -MP -MF $(DEPDIR)/gsnapl_avx512-output.Tpo -c -o gsnapl_avx512-output.obj `if test -f 'output.c'; then $(CYGPATH_W) 'output.c'; else $(CYGPATH_W) '$(srcdir)/output.c'; fi` +@am__fastdepCC_TRUE@ $(AM_V_at)$(am__mv) $(DEPDIR)/gsnapl_avx512-output.Tpo $(DEPDIR)/gsnapl_avx512-output.Po +@AMDEP_TRUE@@am__fastdepCC_FALSE@ $(AM_V_CC)source='output.c' object='gsnapl_avx512-output.obj' libtool=no @AMDEPBACKSLASH@ +@AMDEP_TRUE@@am__fastdepCC_FALSE@ DEPDIR=$(DEPDIR) $(CCDEPMODE) $(depcomp) @AMDEPBACKSLASH@ +@am__fastdepCC_FALSE@ $(AM_V_CC@am__nodep@)$(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) $(gsnapl_avx512_CFLAGS) $(CFLAGS) -c -o gsnapl_avx512-output.obj `if test -f 'output.c'; then $(CYGPATH_W) 'output.c'; else $(CYGPATH_W) '$(srcdir)/output.c'; fi` + +gsnapl_avx512-inbuffer.o: inbuffer.c +@am__fastdepCC_TRUE@ $(AM_V_CC)$(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) $(gsnapl_avx512_CFLAGS) $(CFLAGS) -MT gsnapl_avx512-inbuffer.o -MD -MP -MF $(DEPDIR)/gsnapl_avx512-inbuffer.Tpo -c -o gsnapl_avx512-inbuffer.o `test -f 'inbuffer.c' || echo '$(srcdir)/'`inbuffer.c +@am__fastdepCC_TRUE@ $(AM_V_at)$(am__mv) $(DEPDIR)/gsnapl_avx512-inbuffer.Tpo $(DEPDIR)/gsnapl_avx512-inbuffer.Po +@AMDEP_TRUE@@am__fastdepCC_FALSE@ $(AM_V_CC)source='inbuffer.c' object='gsnapl_avx512-inbuffer.o' libtool=no @AMDEPBACKSLASH@ +@AMDEP_TRUE@@am__fastdepCC_FALSE@ DEPDIR=$(DEPDIR) $(CCDEPMODE) $(depcomp) @AMDEPBACKSLASH@ +@am__fastdepCC_FALSE@ $(AM_V_CC@am__nodep@)$(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) $(gsnapl_avx512_CFLAGS) $(CFLAGS) -c -o gsnapl_avx512-inbuffer.o `test -f 'inbuffer.c' || echo '$(srcdir)/'`inbuffer.c + +gsnapl_avx512-inbuffer.obj: inbuffer.c +@am__fastdepCC_TRUE@ $(AM_V_CC)$(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) $(gsnapl_avx512_CFLAGS) $(CFLAGS) -MT gsnapl_avx512-inbuffer.obj -MD -MP -MF $(DEPDIR)/gsnapl_avx512-inbuffer.Tpo -c -o gsnapl_avx512-inbuffer.obj `if test -f 'inbuffer.c'; then $(CYGPATH_W) 'inbuffer.c'; else $(CYGPATH_W) '$(srcdir)/inbuffer.c'; fi` +@am__fastdepCC_TRUE@ $(AM_V_at)$(am__mv) $(DEPDIR)/gsnapl_avx512-inbuffer.Tpo $(DEPDIR)/gsnapl_avx512-inbuffer.Po +@AMDEP_TRUE@@am__fastdepCC_FALSE@ $(AM_V_CC)source='inbuffer.c' object='gsnapl_avx512-inbuffer.obj' libtool=no @AMDEPBACKSLASH@ +@AMDEP_TRUE@@am__fastdepCC_FALSE@ DEPDIR=$(DEPDIR) $(CCDEPMODE) $(depcomp) @AMDEPBACKSLASH@ +@am__fastdepCC_FALSE@ $(AM_V_CC@am__nodep@)$(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) $(gsnapl_avx512_CFLAGS) $(CFLAGS) -c -o gsnapl_avx512-inbuffer.obj `if test -f 'inbuffer.c'; then $(CYGPATH_W) 'inbuffer.c'; else $(CYGPATH_W) '$(srcdir)/inbuffer.c'; fi` + +gsnapl_avx512-samheader.o: samheader.c +@am__fastdepCC_TRUE@ $(AM_V_CC)$(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) $(gsnapl_avx512_CFLAGS) $(CFLAGS) -MT gsnapl_avx512-samheader.o -MD -MP -MF $(DEPDIR)/gsnapl_avx512-samheader.Tpo -c -o gsnapl_avx512-samheader.o `test -f 'samheader.c' || echo '$(srcdir)/'`samheader.c +@am__fastdepCC_TRUE@ $(AM_V_at)$(am__mv) $(DEPDIR)/gsnapl_avx512-samheader.Tpo $(DEPDIR)/gsnapl_avx512-samheader.Po +@AMDEP_TRUE@@am__fastdepCC_FALSE@ $(AM_V_CC)source='samheader.c' object='gsnapl_avx512-samheader.o' libtool=no @AMDEPBACKSLASH@ +@AMDEP_TRUE@@am__fastdepCC_FALSE@ DEPDIR=$(DEPDIR) $(CCDEPMODE) $(depcomp) @AMDEPBACKSLASH@ +@am__fastdepCC_FALSE@ $(AM_V_CC@am__nodep@)$(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) $(gsnapl_avx512_CFLAGS) $(CFLAGS) -c -o gsnapl_avx512-samheader.o `test -f 'samheader.c' || echo '$(srcdir)/'`samheader.c + +gsnapl_avx512-samheader.obj: samheader.c +@am__fastdepCC_TRUE@ $(AM_V_CC)$(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) $(gsnapl_avx512_CFLAGS) $(CFLAGS) -MT gsnapl_avx512-samheader.obj -MD -MP -MF $(DEPDIR)/gsnapl_avx512-samheader.Tpo -c -o gsnapl_avx512-samheader.obj `if test -f 'samheader.c'; then $(CYGPATH_W) 'samheader.c'; else $(CYGPATH_W) '$(srcdir)/samheader.c'; fi` +@am__fastdepCC_TRUE@ $(AM_V_at)$(am__mv) $(DEPDIR)/gsnapl_avx512-samheader.Tpo $(DEPDIR)/gsnapl_avx512-samheader.Po +@AMDEP_TRUE@@am__fastdepCC_FALSE@ $(AM_V_CC)source='samheader.c' object='gsnapl_avx512-samheader.obj' libtool=no @AMDEPBACKSLASH@ +@AMDEP_TRUE@@am__fastdepCC_FALSE@ DEPDIR=$(DEPDIR) $(CCDEPMODE) $(depcomp) @AMDEPBACKSLASH@ +@am__fastdepCC_FALSE@ $(AM_V_CC@am__nodep@)$(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) $(gsnapl_avx512_CFLAGS) $(CFLAGS) -c -o gsnapl_avx512-samheader.obj `if test -f 'samheader.c'; then $(CYGPATH_W) 'samheader.c'; else $(CYGPATH_W) '$(srcdir)/samheader.c'; fi` + +gsnapl_avx512-outbuffer.o: outbuffer.c +@am__fastdepCC_TRUE@ $(AM_V_CC)$(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) $(gsnapl_avx512_CFLAGS) $(CFLAGS) -MT gsnapl_avx512-outbuffer.o -MD -MP -MF $(DEPDIR)/gsnapl_avx512-outbuffer.Tpo -c -o gsnapl_avx512-outbuffer.o `test -f 'outbuffer.c' || echo '$(srcdir)/'`outbuffer.c +@am__fastdepCC_TRUE@ $(AM_V_at)$(am__mv) $(DEPDIR)/gsnapl_avx512-outbuffer.Tpo $(DEPDIR)/gsnapl_avx512-outbuffer.Po +@AMDEP_TRUE@@am__fastdepCC_FALSE@ $(AM_V_CC)source='outbuffer.c' object='gsnapl_avx512-outbuffer.o' libtool=no @AMDEPBACKSLASH@ +@AMDEP_TRUE@@am__fastdepCC_FALSE@ DEPDIR=$(DEPDIR) $(CCDEPMODE) $(depcomp) @AMDEPBACKSLASH@ +@am__fastdepCC_FALSE@ $(AM_V_CC@am__nodep@)$(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) $(gsnapl_avx512_CFLAGS) $(CFLAGS) -c -o gsnapl_avx512-outbuffer.o `test -f 'outbuffer.c' || echo '$(srcdir)/'`outbuffer.c + +gsnapl_avx512-outbuffer.obj: outbuffer.c +@am__fastdepCC_TRUE@ $(AM_V_CC)$(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) $(gsnapl_avx512_CFLAGS) $(CFLAGS) -MT gsnapl_avx512-outbuffer.obj -MD -MP -MF $(DEPDIR)/gsnapl_avx512-outbuffer.Tpo -c -o gsnapl_avx512-outbuffer.obj `if test -f 'outbuffer.c'; then $(CYGPATH_W) 'outbuffer.c'; else $(CYGPATH_W) '$(srcdir)/outbuffer.c'; fi` +@am__fastdepCC_TRUE@ $(AM_V_at)$(am__mv) $(DEPDIR)/gsnapl_avx512-outbuffer.Tpo $(DEPDIR)/gsnapl_avx512-outbuffer.Po +@AMDEP_TRUE@@am__fastdepCC_FALSE@ $(AM_V_CC)source='outbuffer.c' object='gsnapl_avx512-outbuffer.obj' libtool=no @AMDEPBACKSLASH@ +@AMDEP_TRUE@@am__fastdepCC_FALSE@ DEPDIR=$(DEPDIR) $(CCDEPMODE) $(depcomp) @AMDEPBACKSLASH@ +@am__fastdepCC_FALSE@ $(AM_V_CC@am__nodep@)$(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) $(gsnapl_avx512_CFLAGS) $(CFLAGS) -c -o gsnapl_avx512-outbuffer.obj `if test -f 'outbuffer.c'; then $(CYGPATH_W) 'outbuffer.c'; else $(CYGPATH_W) '$(srcdir)/outbuffer.c'; fi` + +gsnapl_avx512-datadir.o: datadir.c +@am__fastdepCC_TRUE@ $(AM_V_CC)$(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) $(gsnapl_avx512_CFLAGS) $(CFLAGS) -MT gsnapl_avx512-datadir.o -MD -MP -MF $(DEPDIR)/gsnapl_avx512-datadir.Tpo -c -o gsnapl_avx512-datadir.o `test -f 'datadir.c' || echo '$(srcdir)/'`datadir.c +@am__fastdepCC_TRUE@ $(AM_V_at)$(am__mv) $(DEPDIR)/gsnapl_avx512-datadir.Tpo $(DEPDIR)/gsnapl_avx512-datadir.Po +@AMDEP_TRUE@@am__fastdepCC_FALSE@ $(AM_V_CC)source='datadir.c' object='gsnapl_avx512-datadir.o' libtool=no @AMDEPBACKSLASH@ +@AMDEP_TRUE@@am__fastdepCC_FALSE@ DEPDIR=$(DEPDIR) $(CCDEPMODE) $(depcomp) @AMDEPBACKSLASH@ +@am__fastdepCC_FALSE@ $(AM_V_CC@am__nodep@)$(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) $(gsnapl_avx512_CFLAGS) $(CFLAGS) -c -o gsnapl_avx512-datadir.o `test -f 'datadir.c' || echo '$(srcdir)/'`datadir.c + +gsnapl_avx512-datadir.obj: datadir.c +@am__fastdepCC_TRUE@ $(AM_V_CC)$(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) $(gsnapl_avx512_CFLAGS) $(CFLAGS) -MT gsnapl_avx512-datadir.obj -MD -MP -MF $(DEPDIR)/gsnapl_avx512-datadir.Tpo -c -o gsnapl_avx512-datadir.obj `if test -f 'datadir.c'; then $(CYGPATH_W) 'datadir.c'; else $(CYGPATH_W) '$(srcdir)/datadir.c'; fi` +@am__fastdepCC_TRUE@ $(AM_V_at)$(am__mv) $(DEPDIR)/gsnapl_avx512-datadir.Tpo $(DEPDIR)/gsnapl_avx512-datadir.Po +@AMDEP_TRUE@@am__fastdepCC_FALSE@ $(AM_V_CC)source='datadir.c' object='gsnapl_avx512-datadir.obj' libtool=no @AMDEPBACKSLASH@ +@AMDEP_TRUE@@am__fastdepCC_FALSE@ DEPDIR=$(DEPDIR) $(CCDEPMODE) $(depcomp) @AMDEPBACKSLASH@ +@am__fastdepCC_FALSE@ $(AM_V_CC@am__nodep@)$(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) $(gsnapl_avx512_CFLAGS) $(CFLAGS) -c -o gsnapl_avx512-datadir.obj `if test -f 'datadir.c'; then $(CYGPATH_W) 'datadir.c'; else $(CYGPATH_W) '$(srcdir)/datadir.c'; fi` + +gsnapl_avx512-parserange.o: parserange.c +@am__fastdepCC_TRUE@ $(AM_V_CC)$(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) $(gsnapl_avx512_CFLAGS) $(CFLAGS) -MT gsnapl_avx512-parserange.o -MD -MP -MF $(DEPDIR)/gsnapl_avx512-parserange.Tpo -c -o gsnapl_avx512-parserange.o `test -f 'parserange.c' || echo '$(srcdir)/'`parserange.c +@am__fastdepCC_TRUE@ $(AM_V_at)$(am__mv) $(DEPDIR)/gsnapl_avx512-parserange.Tpo $(DEPDIR)/gsnapl_avx512-parserange.Po +@AMDEP_TRUE@@am__fastdepCC_FALSE@ $(AM_V_CC)source='parserange.c' object='gsnapl_avx512-parserange.o' libtool=no @AMDEPBACKSLASH@ +@AMDEP_TRUE@@am__fastdepCC_FALSE@ DEPDIR=$(DEPDIR) $(CCDEPMODE) $(depcomp) @AMDEPBACKSLASH@ +@am__fastdepCC_FALSE@ $(AM_V_CC@am__nodep@)$(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) $(gsnapl_avx512_CFLAGS) $(CFLAGS) -c -o gsnapl_avx512-parserange.o `test -f 'parserange.c' || echo '$(srcdir)/'`parserange.c + +gsnapl_avx512-parserange.obj: parserange.c +@am__fastdepCC_TRUE@ $(AM_V_CC)$(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) $(gsnapl_avx512_CFLAGS) $(CFLAGS) -MT gsnapl_avx512-parserange.obj -MD -MP -MF $(DEPDIR)/gsnapl_avx512-parserange.Tpo -c -o gsnapl_avx512-parserange.obj `if test -f 'parserange.c'; then $(CYGPATH_W) 'parserange.c'; else $(CYGPATH_W) '$(srcdir)/parserange.c'; fi` +@am__fastdepCC_TRUE@ $(AM_V_at)$(am__mv) $(DEPDIR)/gsnapl_avx512-parserange.Tpo $(DEPDIR)/gsnapl_avx512-parserange.Po +@AMDEP_TRUE@@am__fastdepCC_FALSE@ $(AM_V_CC)source='parserange.c' object='gsnapl_avx512-parserange.obj' libtool=no @AMDEPBACKSLASH@ +@AMDEP_TRUE@@am__fastdepCC_FALSE@ DEPDIR=$(DEPDIR) $(CCDEPMODE) $(depcomp) @AMDEPBACKSLASH@ +@am__fastdepCC_FALSE@ $(AM_V_CC@am__nodep@)$(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) $(gsnapl_avx512_CFLAGS) $(CFLAGS) -c -o gsnapl_avx512-parserange.obj `if test -f 'parserange.c'; then $(CYGPATH_W) 'parserange.c'; else $(CYGPATH_W) '$(srcdir)/parserange.c'; fi` + +gsnapl_avx512-getopt.o: getopt.c +@am__fastdepCC_TRUE@ $(AM_V_CC)$(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) $(gsnapl_avx512_CFLAGS) $(CFLAGS) -MT gsnapl_avx512-getopt.o -MD -MP -MF $(DEPDIR)/gsnapl_avx512-getopt.Tpo -c -o gsnapl_avx512-getopt.o `test -f 'getopt.c' || echo '$(srcdir)/'`getopt.c +@am__fastdepCC_TRUE@ $(AM_V_at)$(am__mv) $(DEPDIR)/gsnapl_avx512-getopt.Tpo $(DEPDIR)/gsnapl_avx512-getopt.Po +@AMDEP_TRUE@@am__fastdepCC_FALSE@ $(AM_V_CC)source='getopt.c' object='gsnapl_avx512-getopt.o' libtool=no @AMDEPBACKSLASH@ +@AMDEP_TRUE@@am__fastdepCC_FALSE@ DEPDIR=$(DEPDIR) $(CCDEPMODE) $(depcomp) @AMDEPBACKSLASH@ +@am__fastdepCC_FALSE@ $(AM_V_CC@am__nodep@)$(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) $(gsnapl_avx512_CFLAGS) $(CFLAGS) -c -o gsnapl_avx512-getopt.o `test -f 'getopt.c' || echo '$(srcdir)/'`getopt.c + +gsnapl_avx512-getopt.obj: getopt.c +@am__fastdepCC_TRUE@ $(AM_V_CC)$(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) $(gsnapl_avx512_CFLAGS) $(CFLAGS) -MT gsnapl_avx512-getopt.obj -MD -MP -MF $(DEPDIR)/gsnapl_avx512-getopt.Tpo -c -o gsnapl_avx512-getopt.obj `if test -f 'getopt.c'; then $(CYGPATH_W) 'getopt.c'; else $(CYGPATH_W) '$(srcdir)/getopt.c'; fi` +@am__fastdepCC_TRUE@ $(AM_V_at)$(am__mv) $(DEPDIR)/gsnapl_avx512-getopt.Tpo $(DEPDIR)/gsnapl_avx512-getopt.Po +@AMDEP_TRUE@@am__fastdepCC_FALSE@ $(AM_V_CC)source='getopt.c' object='gsnapl_avx512-getopt.obj' libtool=no @AMDEPBACKSLASH@ +@AMDEP_TRUE@@am__fastdepCC_FALSE@ DEPDIR=$(DEPDIR) $(CCDEPMODE) $(depcomp) @AMDEPBACKSLASH@ +@am__fastdepCC_FALSE@ $(AM_V_CC@am__nodep@)$(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) $(gsnapl_avx512_CFLAGS) $(CFLAGS) -c -o gsnapl_avx512-getopt.obj `if test -f 'getopt.c'; then $(CYGPATH_W) 'getopt.c'; else $(CYGPATH_W) '$(srcdir)/getopt.c'; fi` + +gsnapl_avx512-getopt1.o: getopt1.c +@am__fastdepCC_TRUE@ $(AM_V_CC)$(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) $(gsnapl_avx512_CFLAGS) $(CFLAGS) -MT gsnapl_avx512-getopt1.o -MD -MP -MF $(DEPDIR)/gsnapl_avx512-getopt1.Tpo -c -o gsnapl_avx512-getopt1.o `test -f 'getopt1.c' || echo '$(srcdir)/'`getopt1.c +@am__fastdepCC_TRUE@ $(AM_V_at)$(am__mv) $(DEPDIR)/gsnapl_avx512-getopt1.Tpo $(DEPDIR)/gsnapl_avx512-getopt1.Po +@AMDEP_TRUE@@am__fastdepCC_FALSE@ $(AM_V_CC)source='getopt1.c' object='gsnapl_avx512-getopt1.o' libtool=no @AMDEPBACKSLASH@ +@AMDEP_TRUE@@am__fastdepCC_FALSE@ DEPDIR=$(DEPDIR) $(CCDEPMODE) $(depcomp) @AMDEPBACKSLASH@ +@am__fastdepCC_FALSE@ $(AM_V_CC@am__nodep@)$(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) $(gsnapl_avx512_CFLAGS) $(CFLAGS) -c -o gsnapl_avx512-getopt1.o `test -f 'getopt1.c' || echo '$(srcdir)/'`getopt1.c + +gsnapl_avx512-getopt1.obj: getopt1.c +@am__fastdepCC_TRUE@ $(AM_V_CC)$(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) $(gsnapl_avx512_CFLAGS) $(CFLAGS) -MT gsnapl_avx512-getopt1.obj -MD -MP -MF $(DEPDIR)/gsnapl_avx512-getopt1.Tpo -c -o gsnapl_avx512-getopt1.obj `if test -f 'getopt1.c'; then $(CYGPATH_W) 'getopt1.c'; else $(CYGPATH_W) '$(srcdir)/getopt1.c'; fi` +@am__fastdepCC_TRUE@ $(AM_V_at)$(am__mv) $(DEPDIR)/gsnapl_avx512-getopt1.Tpo $(DEPDIR)/gsnapl_avx512-getopt1.Po +@AMDEP_TRUE@@am__fastdepCC_FALSE@ $(AM_V_CC)source='getopt1.c' object='gsnapl_avx512-getopt1.obj' libtool=no @AMDEPBACKSLASH@ +@AMDEP_TRUE@@am__fastdepCC_FALSE@ DEPDIR=$(DEPDIR) $(CCDEPMODE) $(depcomp) @AMDEPBACKSLASH@ +@am__fastdepCC_FALSE@ $(AM_V_CC@am__nodep@)$(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) $(gsnapl_avx512_CFLAGS) $(CFLAGS) -c -o gsnapl_avx512-getopt1.obj `if test -f 'getopt1.c'; then $(CYGPATH_W) 'getopt1.c'; else $(CYGPATH_W) '$(srcdir)/getopt1.c'; fi` + +gsnapl_avx512-gsnap.o: gsnap.c +@am__fastdepCC_TRUE@ $(AM_V_CC)$(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) $(gsnapl_avx512_CFLAGS) $(CFLAGS) -MT gsnapl_avx512-gsnap.o -MD -MP -MF $(DEPDIR)/gsnapl_avx512-gsnap.Tpo -c -o gsnapl_avx512-gsnap.o `test -f 'gsnap.c' || echo '$(srcdir)/'`gsnap.c +@am__fastdepCC_TRUE@ $(AM_V_at)$(am__mv) $(DEPDIR)/gsnapl_avx512-gsnap.Tpo $(DEPDIR)/gsnapl_avx512-gsnap.Po +@AMDEP_TRUE@@am__fastdepCC_FALSE@ $(AM_V_CC)source='gsnap.c' object='gsnapl_avx512-gsnap.o' libtool=no @AMDEPBACKSLASH@ +@AMDEP_TRUE@@am__fastdepCC_FALSE@ DEPDIR=$(DEPDIR) $(CCDEPMODE) $(depcomp) @AMDEPBACKSLASH@ +@am__fastdepCC_FALSE@ $(AM_V_CC@am__nodep@)$(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) $(gsnapl_avx512_CFLAGS) $(CFLAGS) -c -o gsnapl_avx512-gsnap.o `test -f 'gsnap.c' || echo '$(srcdir)/'`gsnap.c + +gsnapl_avx512-gsnap.obj: gsnap.c +@am__fastdepCC_TRUE@ $(AM_V_CC)$(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) $(gsnapl_avx512_CFLAGS) $(CFLAGS) -MT gsnapl_avx512-gsnap.obj -MD -MP -MF $(DEPDIR)/gsnapl_avx512-gsnap.Tpo -c -o gsnapl_avx512-gsnap.obj `if test -f 'gsnap.c'; then $(CYGPATH_W) 'gsnap.c'; else $(CYGPATH_W) '$(srcdir)/gsnap.c'; fi` +@am__fastdepCC_TRUE@ $(AM_V_at)$(am__mv) $(DEPDIR)/gsnapl_avx512-gsnap.Tpo $(DEPDIR)/gsnapl_avx512-gsnap.Po +@AMDEP_TRUE@@am__fastdepCC_FALSE@ $(AM_V_CC)source='gsnap.c' object='gsnapl_avx512-gsnap.obj' libtool=no @AMDEPBACKSLASH@ +@AMDEP_TRUE@@am__fastdepCC_FALSE@ DEPDIR=$(DEPDIR) $(CCDEPMODE) $(depcomp) @AMDEPBACKSLASH@ +@am__fastdepCC_FALSE@ $(AM_V_CC@am__nodep@)$(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) $(gsnapl_avx512_CFLAGS) $(CFLAGS) -c -o gsnapl_avx512-gsnap.obj `if test -f 'gsnap.c'; then $(CYGPATH_W) 'gsnap.c'; else $(CYGPATH_W) '$(srcdir)/gsnap.c'; fi` + gsnapl_nosimd-except.o: except.c @am__fastdepCC_TRUE@ $(AM_V_CC)$(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) $(gsnapl_nosimd_CFLAGS) $(CFLAGS) -MT gsnapl_nosimd-except.o -MD -MP -MF $(DEPDIR)/gsnapl_nosimd-except.Tpo -c -o gsnapl_nosimd-except.o `test -f 'except.c' || echo '$(srcdir)/'`except.c @am__fastdepCC_TRUE@ $(AM_V_at)$(am__mv) $(DEPDIR)/gsnapl_nosimd-except.Tpo $(DEPDIR)/gsnapl_nosimd-except.Po @@ -31646,6 +37734,20 @@ @AMDEP_TRUE@@am__fastdepCC_FALSE@ DEPDIR=$(DEPDIR) $(CCDEPMODE) $(depcomp) @AMDEPBACKSLASH@ @am__fastdepCC_FALSE@ $(AM_V_CC@am__nodep@)$(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) $(gsnapl_nosimd_CFLAGS) $(CFLAGS) -c -o gsnapl_nosimd-bitpack64-readtwo.obj `if test -f 'bitpack64-readtwo.c'; then $(CYGPATH_W) 'bitpack64-readtwo.c'; else $(CYGPATH_W) '$(srcdir)/bitpack64-readtwo.c'; fi` +gsnapl_nosimd-merge.o: merge.c +@am__fastdepCC_TRUE@ $(AM_V_CC)$(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) $(gsnapl_nosimd_CFLAGS) $(CFLAGS) -MT gsnapl_nosimd-merge.o -MD -MP -MF $(DEPDIR)/gsnapl_nosimd-merge.Tpo -c -o gsnapl_nosimd-merge.o `test -f 'merge.c' || echo '$(srcdir)/'`merge.c +@am__fastdepCC_TRUE@ $(AM_V_at)$(am__mv) $(DEPDIR)/gsnapl_nosimd-merge.Tpo $(DEPDIR)/gsnapl_nosimd-merge.Po +@AMDEP_TRUE@@am__fastdepCC_FALSE@ $(AM_V_CC)source='merge.c' object='gsnapl_nosimd-merge.o' libtool=no @AMDEPBACKSLASH@ +@AMDEP_TRUE@@am__fastdepCC_FALSE@ DEPDIR=$(DEPDIR) $(CCDEPMODE) $(depcomp) @AMDEPBACKSLASH@ +@am__fastdepCC_FALSE@ $(AM_V_CC@am__nodep@)$(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) $(gsnapl_nosimd_CFLAGS) $(CFLAGS) -c -o gsnapl_nosimd-merge.o `test -f 'merge.c' || echo '$(srcdir)/'`merge.c + +gsnapl_nosimd-merge.obj: merge.c +@am__fastdepCC_TRUE@ $(AM_V_CC)$(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) $(gsnapl_nosimd_CFLAGS) $(CFLAGS) -MT gsnapl_nosimd-merge.obj -MD -MP -MF $(DEPDIR)/gsnapl_nosimd-merge.Tpo -c -o gsnapl_nosimd-merge.obj `if test -f 'merge.c'; then $(CYGPATH_W) 'merge.c'; else $(CYGPATH_W) '$(srcdir)/merge.c'; fi` +@am__fastdepCC_TRUE@ $(AM_V_at)$(am__mv) $(DEPDIR)/gsnapl_nosimd-merge.Tpo $(DEPDIR)/gsnapl_nosimd-merge.Po +@AMDEP_TRUE@@am__fastdepCC_FALSE@ $(AM_V_CC)source='merge.c' object='gsnapl_nosimd-merge.obj' libtool=no @AMDEPBACKSLASH@ +@AMDEP_TRUE@@am__fastdepCC_FALSE@ DEPDIR=$(DEPDIR) $(CCDEPMODE) $(depcomp) @AMDEPBACKSLASH@ +@am__fastdepCC_FALSE@ $(AM_V_CC@am__nodep@)$(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) $(gsnapl_nosimd_CFLAGS) $(CFLAGS) -c -o gsnapl_nosimd-merge.obj `if test -f 'merge.c'; then $(CYGPATH_W) 'merge.c'; else $(CYGPATH_W) '$(srcdir)/merge.c'; fi` + gsnapl_nosimd-indexdb.o: indexdb.c @am__fastdepCC_TRUE@ $(AM_V_CC)$(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) $(gsnapl_nosimd_CFLAGS) $(CFLAGS) -MT gsnapl_nosimd-indexdb.o -MD -MP -MF $(DEPDIR)/gsnapl_nosimd-indexdb.Tpo -c -o gsnapl_nosimd-indexdb.o `test -f 'indexdb.c' || echo '$(srcdir)/'`indexdb.c @am__fastdepCC_TRUE@ $(AM_V_at)$(am__mv) $(DEPDIR)/gsnapl_nosimd-indexdb.Tpo $(DEPDIR)/gsnapl_nosimd-indexdb.Po @@ -32290,6 +38392,20 @@ @AMDEP_TRUE@@am__fastdepCC_FALSE@ DEPDIR=$(DEPDIR) $(CCDEPMODE) $(depcomp) @AMDEPBACKSLASH@ @am__fastdepCC_FALSE@ $(AM_V_CC@am__nodep@)$(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) $(gsnapl_nosimd_CFLAGS) $(CFLAGS) -c -o gsnapl_nosimd-bitpack64-access.obj `if test -f 'bitpack64-access.c'; then $(CYGPATH_W) 'bitpack64-access.c'; else $(CYGPATH_W) '$(srcdir)/bitpack64-access.c'; fi` +gsnapl_nosimd-merge-heap.o: merge-heap.c +@am__fastdepCC_TRUE@ $(AM_V_CC)$(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) $(gsnapl_nosimd_CFLAGS) $(CFLAGS) -MT gsnapl_nosimd-merge-heap.o -MD -MP -MF $(DEPDIR)/gsnapl_nosimd-merge-heap.Tpo -c -o gsnapl_nosimd-merge-heap.o `test -f 'merge-heap.c' || echo '$(srcdir)/'`merge-heap.c +@am__fastdepCC_TRUE@ $(AM_V_at)$(am__mv) $(DEPDIR)/gsnapl_nosimd-merge-heap.Tpo $(DEPDIR)/gsnapl_nosimd-merge-heap.Po +@AMDEP_TRUE@@am__fastdepCC_FALSE@ $(AM_V_CC)source='merge-heap.c' object='gsnapl_nosimd-merge-heap.o' libtool=no @AMDEPBACKSLASH@ +@AMDEP_TRUE@@am__fastdepCC_FALSE@ DEPDIR=$(DEPDIR) $(CCDEPMODE) $(depcomp) @AMDEPBACKSLASH@ +@am__fastdepCC_FALSE@ $(AM_V_CC@am__nodep@)$(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) $(gsnapl_nosimd_CFLAGS) $(CFLAGS) -c -o gsnapl_nosimd-merge-heap.o `test -f 'merge-heap.c' || echo '$(srcdir)/'`merge-heap.c + +gsnapl_nosimd-merge-heap.obj: merge-heap.c +@am__fastdepCC_TRUE@ $(AM_V_CC)$(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) $(gsnapl_nosimd_CFLAGS) $(CFLAGS) -MT gsnapl_nosimd-merge-heap.obj -MD -MP -MF $(DEPDIR)/gsnapl_nosimd-merge-heap.Tpo -c -o gsnapl_nosimd-merge-heap.obj `if test -f 'merge-heap.c'; then $(CYGPATH_W) 'merge-heap.c'; else $(CYGPATH_W) '$(srcdir)/merge-heap.c'; fi` +@am__fastdepCC_TRUE@ $(AM_V_at)$(am__mv) $(DEPDIR)/gsnapl_nosimd-merge-heap.Tpo $(DEPDIR)/gsnapl_nosimd-merge-heap.Po +@AMDEP_TRUE@@am__fastdepCC_FALSE@ $(AM_V_CC)source='merge-heap.c' object='gsnapl_nosimd-merge-heap.obj' libtool=no @AMDEPBACKSLASH@ +@AMDEP_TRUE@@am__fastdepCC_FALSE@ DEPDIR=$(DEPDIR) $(CCDEPMODE) $(depcomp) @AMDEPBACKSLASH@ +@am__fastdepCC_FALSE@ $(AM_V_CC@am__nodep@)$(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) $(gsnapl_nosimd_CFLAGS) $(CFLAGS) -c -o gsnapl_nosimd-merge-heap.obj `if test -f 'merge-heap.c'; then $(CYGPATH_W) 'merge-heap.c'; else $(CYGPATH_W) '$(srcdir)/merge-heap.c'; fi` + gsnapl_nosimd-stage1hr.o: stage1hr.c @am__fastdepCC_TRUE@ $(AM_V_CC)$(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) $(gsnapl_nosimd_CFLAGS) $(CFLAGS) -MT gsnapl_nosimd-stage1hr.o -MD -MP -MF $(DEPDIR)/gsnapl_nosimd-stage1hr.Tpo -c -o gsnapl_nosimd-stage1hr.o `test -f 'stage1hr.c' || echo '$(srcdir)/'`stage1hr.c @am__fastdepCC_TRUE@ $(AM_V_at)$(am__mv) $(DEPDIR)/gsnapl_nosimd-stage1hr.Tpo $(DEPDIR)/gsnapl_nosimd-stage1hr.Po @@ -32864,6 +38980,20 @@ @AMDEP_TRUE@@am__fastdepCC_FALSE@ DEPDIR=$(DEPDIR) $(CCDEPMODE) $(depcomp) @AMDEPBACKSLASH@ @am__fastdepCC_FALSE@ $(AM_V_CC@am__nodep@)$(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) $(gsnapl_sse2_CFLAGS) $(CFLAGS) -c -o gsnapl_sse2-bitpack64-readtwo.obj `if test -f 'bitpack64-readtwo.c'; then $(CYGPATH_W) 'bitpack64-readtwo.c'; else $(CYGPATH_W) '$(srcdir)/bitpack64-readtwo.c'; fi` +gsnapl_sse2-merge.o: merge.c +@am__fastdepCC_TRUE@ $(AM_V_CC)$(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) $(gsnapl_sse2_CFLAGS) $(CFLAGS) -MT gsnapl_sse2-merge.o -MD -MP -MF $(DEPDIR)/gsnapl_sse2-merge.Tpo -c -o gsnapl_sse2-merge.o `test -f 'merge.c' || echo '$(srcdir)/'`merge.c +@am__fastdepCC_TRUE@ $(AM_V_at)$(am__mv) $(DEPDIR)/gsnapl_sse2-merge.Tpo $(DEPDIR)/gsnapl_sse2-merge.Po +@AMDEP_TRUE@@am__fastdepCC_FALSE@ $(AM_V_CC)source='merge.c' object='gsnapl_sse2-merge.o' libtool=no @AMDEPBACKSLASH@ +@AMDEP_TRUE@@am__fastdepCC_FALSE@ DEPDIR=$(DEPDIR) $(CCDEPMODE) $(depcomp) @AMDEPBACKSLASH@ +@am__fastdepCC_FALSE@ $(AM_V_CC@am__nodep@)$(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) $(gsnapl_sse2_CFLAGS) $(CFLAGS) -c -o gsnapl_sse2-merge.o `test -f 'merge.c' || echo '$(srcdir)/'`merge.c + +gsnapl_sse2-merge.obj: merge.c +@am__fastdepCC_TRUE@ $(AM_V_CC)$(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) $(gsnapl_sse2_CFLAGS) $(CFLAGS) -MT gsnapl_sse2-merge.obj -MD -MP -MF $(DEPDIR)/gsnapl_sse2-merge.Tpo -c -o gsnapl_sse2-merge.obj `if test -f 'merge.c'; then $(CYGPATH_W) 'merge.c'; else $(CYGPATH_W) '$(srcdir)/merge.c'; fi` +@am__fastdepCC_TRUE@ $(AM_V_at)$(am__mv) $(DEPDIR)/gsnapl_sse2-merge.Tpo $(DEPDIR)/gsnapl_sse2-merge.Po +@AMDEP_TRUE@@am__fastdepCC_FALSE@ $(AM_V_CC)source='merge.c' object='gsnapl_sse2-merge.obj' libtool=no @AMDEPBACKSLASH@ +@AMDEP_TRUE@@am__fastdepCC_FALSE@ DEPDIR=$(DEPDIR) $(CCDEPMODE) $(depcomp) @AMDEPBACKSLASH@ +@am__fastdepCC_FALSE@ $(AM_V_CC@am__nodep@)$(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) $(gsnapl_sse2_CFLAGS) $(CFLAGS) -c -o gsnapl_sse2-merge.obj `if test -f 'merge.c'; then $(CYGPATH_W) 'merge.c'; else $(CYGPATH_W) '$(srcdir)/merge.c'; fi` + gsnapl_sse2-indexdb.o: indexdb.c @am__fastdepCC_TRUE@ $(AM_V_CC)$(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) $(gsnapl_sse2_CFLAGS) $(CFLAGS) -MT gsnapl_sse2-indexdb.o -MD -MP -MF $(DEPDIR)/gsnapl_sse2-indexdb.Tpo -c -o gsnapl_sse2-indexdb.o `test -f 'indexdb.c' || echo '$(srcdir)/'`indexdb.c @am__fastdepCC_TRUE@ $(AM_V_at)$(am__mv) $(DEPDIR)/gsnapl_sse2-indexdb.Tpo $(DEPDIR)/gsnapl_sse2-indexdb.Po @@ -33508,6 +39638,20 @@ @AMDEP_TRUE@@am__fastdepCC_FALSE@ DEPDIR=$(DEPDIR) $(CCDEPMODE) $(depcomp) @AMDEPBACKSLASH@ @am__fastdepCC_FALSE@ $(AM_V_CC@am__nodep@)$(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) $(gsnapl_sse2_CFLAGS) $(CFLAGS) -c -o gsnapl_sse2-bitpack64-access.obj `if test -f 'bitpack64-access.c'; then $(CYGPATH_W) 'bitpack64-access.c'; else $(CYGPATH_W) '$(srcdir)/bitpack64-access.c'; fi` +gsnapl_sse2-merge-heap.o: merge-heap.c +@am__fastdepCC_TRUE@ $(AM_V_CC)$(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) $(gsnapl_sse2_CFLAGS) $(CFLAGS) -MT gsnapl_sse2-merge-heap.o -MD -MP -MF $(DEPDIR)/gsnapl_sse2-merge-heap.Tpo -c -o gsnapl_sse2-merge-heap.o `test -f 'merge-heap.c' || echo '$(srcdir)/'`merge-heap.c +@am__fastdepCC_TRUE@ $(AM_V_at)$(am__mv) $(DEPDIR)/gsnapl_sse2-merge-heap.Tpo $(DEPDIR)/gsnapl_sse2-merge-heap.Po +@AMDEP_TRUE@@am__fastdepCC_FALSE@ $(AM_V_CC)source='merge-heap.c' object='gsnapl_sse2-merge-heap.o' libtool=no @AMDEPBACKSLASH@ +@AMDEP_TRUE@@am__fastdepCC_FALSE@ DEPDIR=$(DEPDIR) $(CCDEPMODE) $(depcomp) @AMDEPBACKSLASH@ +@am__fastdepCC_FALSE@ $(AM_V_CC@am__nodep@)$(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) $(gsnapl_sse2_CFLAGS) $(CFLAGS) -c -o gsnapl_sse2-merge-heap.o `test -f 'merge-heap.c' || echo '$(srcdir)/'`merge-heap.c + +gsnapl_sse2-merge-heap.obj: merge-heap.c +@am__fastdepCC_TRUE@ $(AM_V_CC)$(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) $(gsnapl_sse2_CFLAGS) $(CFLAGS) -MT gsnapl_sse2-merge-heap.obj -MD -MP -MF $(DEPDIR)/gsnapl_sse2-merge-heap.Tpo -c -o gsnapl_sse2-merge-heap.obj `if test -f 'merge-heap.c'; then $(CYGPATH_W) 'merge-heap.c'; else $(CYGPATH_W) '$(srcdir)/merge-heap.c'; fi` +@am__fastdepCC_TRUE@ $(AM_V_at)$(am__mv) $(DEPDIR)/gsnapl_sse2-merge-heap.Tpo $(DEPDIR)/gsnapl_sse2-merge-heap.Po +@AMDEP_TRUE@@am__fastdepCC_FALSE@ $(AM_V_CC)source='merge-heap.c' object='gsnapl_sse2-merge-heap.obj' libtool=no @AMDEPBACKSLASH@ +@AMDEP_TRUE@@am__fastdepCC_FALSE@ DEPDIR=$(DEPDIR) $(CCDEPMODE) $(depcomp) @AMDEPBACKSLASH@ +@am__fastdepCC_FALSE@ $(AM_V_CC@am__nodep@)$(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) $(gsnapl_sse2_CFLAGS) $(CFLAGS) -c -o gsnapl_sse2-merge-heap.obj `if test -f 'merge-heap.c'; then $(CYGPATH_W) 'merge-heap.c'; else $(CYGPATH_W) '$(srcdir)/merge-heap.c'; fi` + gsnapl_sse2-stage1hr.o: stage1hr.c @am__fastdepCC_TRUE@ $(AM_V_CC)$(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) $(gsnapl_sse2_CFLAGS) $(CFLAGS) -MT gsnapl_sse2-stage1hr.o -MD -MP -MF $(DEPDIR)/gsnapl_sse2-stage1hr.Tpo -c -o gsnapl_sse2-stage1hr.o `test -f 'stage1hr.c' || echo '$(srcdir)/'`stage1hr.c @am__fastdepCC_TRUE@ $(AM_V_at)$(am__mv) $(DEPDIR)/gsnapl_sse2-stage1hr.Tpo $(DEPDIR)/gsnapl_sse2-stage1hr.Po @@ -34082,6 +40226,20 @@ @AMDEP_TRUE@@am__fastdepCC_FALSE@ DEPDIR=$(DEPDIR) $(CCDEPMODE) $(depcomp) @AMDEPBACKSLASH@ @am__fastdepCC_FALSE@ $(AM_V_CC@am__nodep@)$(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) $(gsnapl_sse41_CFLAGS) $(CFLAGS) -c -o gsnapl_sse41-bitpack64-readtwo.obj `if test -f 'bitpack64-readtwo.c'; then $(CYGPATH_W) 'bitpack64-readtwo.c'; else $(CYGPATH_W) '$(srcdir)/bitpack64-readtwo.c'; fi` +gsnapl_sse41-merge.o: merge.c +@am__fastdepCC_TRUE@ $(AM_V_CC)$(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) $(gsnapl_sse41_CFLAGS) $(CFLAGS) -MT gsnapl_sse41-merge.o -MD -MP -MF $(DEPDIR)/gsnapl_sse41-merge.Tpo -c -o gsnapl_sse41-merge.o `test -f 'merge.c' || echo '$(srcdir)/'`merge.c +@am__fastdepCC_TRUE@ $(AM_V_at)$(am__mv) $(DEPDIR)/gsnapl_sse41-merge.Tpo $(DEPDIR)/gsnapl_sse41-merge.Po +@AMDEP_TRUE@@am__fastdepCC_FALSE@ $(AM_V_CC)source='merge.c' object='gsnapl_sse41-merge.o' libtool=no @AMDEPBACKSLASH@ +@AMDEP_TRUE@@am__fastdepCC_FALSE@ DEPDIR=$(DEPDIR) $(CCDEPMODE) $(depcomp) @AMDEPBACKSLASH@ +@am__fastdepCC_FALSE@ $(AM_V_CC@am__nodep@)$(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) $(gsnapl_sse41_CFLAGS) $(CFLAGS) -c -o gsnapl_sse41-merge.o `test -f 'merge.c' || echo '$(srcdir)/'`merge.c + +gsnapl_sse41-merge.obj: merge.c +@am__fastdepCC_TRUE@ $(AM_V_CC)$(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) $(gsnapl_sse41_CFLAGS) $(CFLAGS) -MT gsnapl_sse41-merge.obj -MD -MP -MF $(DEPDIR)/gsnapl_sse41-merge.Tpo -c -o gsnapl_sse41-merge.obj `if test -f 'merge.c'; then $(CYGPATH_W) 'merge.c'; else $(CYGPATH_W) '$(srcdir)/merge.c'; fi` +@am__fastdepCC_TRUE@ $(AM_V_at)$(am__mv) $(DEPDIR)/gsnapl_sse41-merge.Tpo $(DEPDIR)/gsnapl_sse41-merge.Po +@AMDEP_TRUE@@am__fastdepCC_FALSE@ $(AM_V_CC)source='merge.c' object='gsnapl_sse41-merge.obj' libtool=no @AMDEPBACKSLASH@ +@AMDEP_TRUE@@am__fastdepCC_FALSE@ DEPDIR=$(DEPDIR) $(CCDEPMODE) $(depcomp) @AMDEPBACKSLASH@ +@am__fastdepCC_FALSE@ $(AM_V_CC@am__nodep@)$(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) $(gsnapl_sse41_CFLAGS) $(CFLAGS) -c -o gsnapl_sse41-merge.obj `if test -f 'merge.c'; then $(CYGPATH_W) 'merge.c'; else $(CYGPATH_W) '$(srcdir)/merge.c'; fi` + gsnapl_sse41-indexdb.o: indexdb.c @am__fastdepCC_TRUE@ $(AM_V_CC)$(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) $(gsnapl_sse41_CFLAGS) $(CFLAGS) -MT gsnapl_sse41-indexdb.o -MD -MP -MF $(DEPDIR)/gsnapl_sse41-indexdb.Tpo -c -o gsnapl_sse41-indexdb.o `test -f 'indexdb.c' || echo '$(srcdir)/'`indexdb.c @am__fastdepCC_TRUE@ $(AM_V_at)$(am__mv) $(DEPDIR)/gsnapl_sse41-indexdb.Tpo $(DEPDIR)/gsnapl_sse41-indexdb.Po @@ -34726,6 +40884,20 @@ @AMDEP_TRUE@@am__fastdepCC_FALSE@ DEPDIR=$(DEPDIR) $(CCDEPMODE) $(depcomp) @AMDEPBACKSLASH@ @am__fastdepCC_FALSE@ $(AM_V_CC@am__nodep@)$(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) $(gsnapl_sse41_CFLAGS) $(CFLAGS) -c -o gsnapl_sse41-bitpack64-access.obj `if test -f 'bitpack64-access.c'; then $(CYGPATH_W) 'bitpack64-access.c'; else $(CYGPATH_W) '$(srcdir)/bitpack64-access.c'; fi` +gsnapl_sse41-merge-heap.o: merge-heap.c +@am__fastdepCC_TRUE@ $(AM_V_CC)$(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) $(gsnapl_sse41_CFLAGS) $(CFLAGS) -MT gsnapl_sse41-merge-heap.o -MD -MP -MF $(DEPDIR)/gsnapl_sse41-merge-heap.Tpo -c -o gsnapl_sse41-merge-heap.o `test -f 'merge-heap.c' || echo '$(srcdir)/'`merge-heap.c +@am__fastdepCC_TRUE@ $(AM_V_at)$(am__mv) $(DEPDIR)/gsnapl_sse41-merge-heap.Tpo $(DEPDIR)/gsnapl_sse41-merge-heap.Po +@AMDEP_TRUE@@am__fastdepCC_FALSE@ $(AM_V_CC)source='merge-heap.c' object='gsnapl_sse41-merge-heap.o' libtool=no @AMDEPBACKSLASH@ +@AMDEP_TRUE@@am__fastdepCC_FALSE@ DEPDIR=$(DEPDIR) $(CCDEPMODE) $(depcomp) @AMDEPBACKSLASH@ +@am__fastdepCC_FALSE@ $(AM_V_CC@am__nodep@)$(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) $(gsnapl_sse41_CFLAGS) $(CFLAGS) -c -o gsnapl_sse41-merge-heap.o `test -f 'merge-heap.c' || echo '$(srcdir)/'`merge-heap.c + +gsnapl_sse41-merge-heap.obj: merge-heap.c +@am__fastdepCC_TRUE@ $(AM_V_CC)$(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) $(gsnapl_sse41_CFLAGS) $(CFLAGS) -MT gsnapl_sse41-merge-heap.obj -MD -MP -MF $(DEPDIR)/gsnapl_sse41-merge-heap.Tpo -c -o gsnapl_sse41-merge-heap.obj `if test -f 'merge-heap.c'; then $(CYGPATH_W) 'merge-heap.c'; else $(CYGPATH_W) '$(srcdir)/merge-heap.c'; fi` +@am__fastdepCC_TRUE@ $(AM_V_at)$(am__mv) $(DEPDIR)/gsnapl_sse41-merge-heap.Tpo $(DEPDIR)/gsnapl_sse41-merge-heap.Po +@AMDEP_TRUE@@am__fastdepCC_FALSE@ $(AM_V_CC)source='merge-heap.c' object='gsnapl_sse41-merge-heap.obj' libtool=no @AMDEPBACKSLASH@ +@AMDEP_TRUE@@am__fastdepCC_FALSE@ DEPDIR=$(DEPDIR) $(CCDEPMODE) $(depcomp) @AMDEPBACKSLASH@ +@am__fastdepCC_FALSE@ $(AM_V_CC@am__nodep@)$(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) $(gsnapl_sse41_CFLAGS) $(CFLAGS) -c -o gsnapl_sse41-merge-heap.obj `if test -f 'merge-heap.c'; then $(CYGPATH_W) 'merge-heap.c'; else $(CYGPATH_W) '$(srcdir)/merge-heap.c'; fi` + gsnapl_sse41-stage1hr.o: stage1hr.c @am__fastdepCC_TRUE@ $(AM_V_CC)$(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) $(gsnapl_sse41_CFLAGS) $(CFLAGS) -MT gsnapl_sse41-stage1hr.o -MD -MP -MF $(DEPDIR)/gsnapl_sse41-stage1hr.Tpo -c -o gsnapl_sse41-stage1hr.o `test -f 'stage1hr.c' || echo '$(srcdir)/'`stage1hr.c @am__fastdepCC_TRUE@ $(AM_V_at)$(am__mv) $(DEPDIR)/gsnapl_sse41-stage1hr.Tpo $(DEPDIR)/gsnapl_sse41-stage1hr.Po @@ -35300,6 +41472,20 @@ @AMDEP_TRUE@@am__fastdepCC_FALSE@ DEPDIR=$(DEPDIR) $(CCDEPMODE) $(depcomp) @AMDEPBACKSLASH@ @am__fastdepCC_FALSE@ $(AM_V_CC@am__nodep@)$(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) $(gsnapl_sse42_CFLAGS) $(CFLAGS) -c -o gsnapl_sse42-bitpack64-readtwo.obj `if test -f 'bitpack64-readtwo.c'; then $(CYGPATH_W) 'bitpack64-readtwo.c'; else $(CYGPATH_W) '$(srcdir)/bitpack64-readtwo.c'; fi` +gsnapl_sse42-merge.o: merge.c +@am__fastdepCC_TRUE@ $(AM_V_CC)$(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) $(gsnapl_sse42_CFLAGS) $(CFLAGS) -MT gsnapl_sse42-merge.o -MD -MP -MF $(DEPDIR)/gsnapl_sse42-merge.Tpo -c -o gsnapl_sse42-merge.o `test -f 'merge.c' || echo '$(srcdir)/'`merge.c +@am__fastdepCC_TRUE@ $(AM_V_at)$(am__mv) $(DEPDIR)/gsnapl_sse42-merge.Tpo $(DEPDIR)/gsnapl_sse42-merge.Po +@AMDEP_TRUE@@am__fastdepCC_FALSE@ $(AM_V_CC)source='merge.c' object='gsnapl_sse42-merge.o' libtool=no @AMDEPBACKSLASH@ +@AMDEP_TRUE@@am__fastdepCC_FALSE@ DEPDIR=$(DEPDIR) $(CCDEPMODE) $(depcomp) @AMDEPBACKSLASH@ +@am__fastdepCC_FALSE@ $(AM_V_CC@am__nodep@)$(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) $(gsnapl_sse42_CFLAGS) $(CFLAGS) -c -o gsnapl_sse42-merge.o `test -f 'merge.c' || echo '$(srcdir)/'`merge.c + +gsnapl_sse42-merge.obj: merge.c +@am__fastdepCC_TRUE@ $(AM_V_CC)$(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) $(gsnapl_sse42_CFLAGS) $(CFLAGS) -MT gsnapl_sse42-merge.obj -MD -MP -MF $(DEPDIR)/gsnapl_sse42-merge.Tpo -c -o gsnapl_sse42-merge.obj `if test -f 'merge.c'; then $(CYGPATH_W) 'merge.c'; else $(CYGPATH_W) '$(srcdir)/merge.c'; fi` +@am__fastdepCC_TRUE@ $(AM_V_at)$(am__mv) $(DEPDIR)/gsnapl_sse42-merge.Tpo $(DEPDIR)/gsnapl_sse42-merge.Po +@AMDEP_TRUE@@am__fastdepCC_FALSE@ $(AM_V_CC)source='merge.c' object='gsnapl_sse42-merge.obj' libtool=no @AMDEPBACKSLASH@ +@AMDEP_TRUE@@am__fastdepCC_FALSE@ DEPDIR=$(DEPDIR) $(CCDEPMODE) $(depcomp) @AMDEPBACKSLASH@ +@am__fastdepCC_FALSE@ $(AM_V_CC@am__nodep@)$(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) $(gsnapl_sse42_CFLAGS) $(CFLAGS) -c -o gsnapl_sse42-merge.obj `if test -f 'merge.c'; then $(CYGPATH_W) 'merge.c'; else $(CYGPATH_W) '$(srcdir)/merge.c'; fi` + gsnapl_sse42-indexdb.o: indexdb.c @am__fastdepCC_TRUE@ $(AM_V_CC)$(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) $(gsnapl_sse42_CFLAGS) $(CFLAGS) -MT gsnapl_sse42-indexdb.o -MD -MP -MF $(DEPDIR)/gsnapl_sse42-indexdb.Tpo -c -o gsnapl_sse42-indexdb.o `test -f 'indexdb.c' || echo '$(srcdir)/'`indexdb.c @am__fastdepCC_TRUE@ $(AM_V_at)$(am__mv) $(DEPDIR)/gsnapl_sse42-indexdb.Tpo $(DEPDIR)/gsnapl_sse42-indexdb.Po @@ -35944,6 +42130,20 @@ @AMDEP_TRUE@@am__fastdepCC_FALSE@ DEPDIR=$(DEPDIR) $(CCDEPMODE) $(depcomp) @AMDEPBACKSLASH@ @am__fastdepCC_FALSE@ $(AM_V_CC@am__nodep@)$(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) $(gsnapl_sse42_CFLAGS) $(CFLAGS) -c -o gsnapl_sse42-bitpack64-access.obj `if test -f 'bitpack64-access.c'; then $(CYGPATH_W) 'bitpack64-access.c'; else $(CYGPATH_W) '$(srcdir)/bitpack64-access.c'; fi` +gsnapl_sse42-merge-heap.o: merge-heap.c +@am__fastdepCC_TRUE@ $(AM_V_CC)$(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) $(gsnapl_sse42_CFLAGS) $(CFLAGS) -MT gsnapl_sse42-merge-heap.o -MD -MP -MF $(DEPDIR)/gsnapl_sse42-merge-heap.Tpo -c -o gsnapl_sse42-merge-heap.o `test -f 'merge-heap.c' || echo '$(srcdir)/'`merge-heap.c +@am__fastdepCC_TRUE@ $(AM_V_at)$(am__mv) $(DEPDIR)/gsnapl_sse42-merge-heap.Tpo $(DEPDIR)/gsnapl_sse42-merge-heap.Po +@AMDEP_TRUE@@am__fastdepCC_FALSE@ $(AM_V_CC)source='merge-heap.c' object='gsnapl_sse42-merge-heap.o' libtool=no @AMDEPBACKSLASH@ +@AMDEP_TRUE@@am__fastdepCC_FALSE@ DEPDIR=$(DEPDIR) $(CCDEPMODE) $(depcomp) @AMDEPBACKSLASH@ +@am__fastdepCC_FALSE@ $(AM_V_CC@am__nodep@)$(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) $(gsnapl_sse42_CFLAGS) $(CFLAGS) -c -o gsnapl_sse42-merge-heap.o `test -f 'merge-heap.c' || echo '$(srcdir)/'`merge-heap.c + +gsnapl_sse42-merge-heap.obj: merge-heap.c +@am__fastdepCC_TRUE@ $(AM_V_CC)$(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) $(gsnapl_sse42_CFLAGS) $(CFLAGS) -MT gsnapl_sse42-merge-heap.obj -MD -MP -MF $(DEPDIR)/gsnapl_sse42-merge-heap.Tpo -c -o gsnapl_sse42-merge-heap.obj `if test -f 'merge-heap.c'; then $(CYGPATH_W) 'merge-heap.c'; else $(CYGPATH_W) '$(srcdir)/merge-heap.c'; fi` +@am__fastdepCC_TRUE@ $(AM_V_at)$(am__mv) $(DEPDIR)/gsnapl_sse42-merge-heap.Tpo $(DEPDIR)/gsnapl_sse42-merge-heap.Po +@AMDEP_TRUE@@am__fastdepCC_FALSE@ $(AM_V_CC)source='merge-heap.c' object='gsnapl_sse42-merge-heap.obj' libtool=no @AMDEPBACKSLASH@ +@AMDEP_TRUE@@am__fastdepCC_FALSE@ DEPDIR=$(DEPDIR) $(CCDEPMODE) $(depcomp) @AMDEPBACKSLASH@ +@am__fastdepCC_FALSE@ $(AM_V_CC@am__nodep@)$(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) $(gsnapl_sse42_CFLAGS) $(CFLAGS) -c -o gsnapl_sse42-merge-heap.obj `if test -f 'merge-heap.c'; then $(CYGPATH_W) 'merge-heap.c'; else $(CYGPATH_W) '$(srcdir)/merge-heap.c'; fi` + gsnapl_sse42-stage1hr.o: stage1hr.c @am__fastdepCC_TRUE@ $(AM_V_CC)$(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) $(gsnapl_sse42_CFLAGS) $(CFLAGS) -MT gsnapl_sse42-stage1hr.o -MD -MP -MF $(DEPDIR)/gsnapl_sse42-stage1hr.Tpo -c -o gsnapl_sse42-stage1hr.o `test -f 'stage1hr.c' || echo '$(srcdir)/'`stage1hr.c @am__fastdepCC_TRUE@ $(AM_V_at)$(am__mv) $(DEPDIR)/gsnapl_sse42-stage1hr.Tpo $(DEPDIR)/gsnapl_sse42-stage1hr.Po @@ -36518,6 +42718,20 @@ @AMDEP_TRUE@@am__fastdepCC_FALSE@ DEPDIR=$(DEPDIR) $(CCDEPMODE) $(depcomp) @AMDEPBACKSLASH@ @am__fastdepCC_FALSE@ $(AM_V_CC@am__nodep@)$(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) $(gsnapl_ssse3_CFLAGS) $(CFLAGS) -c -o gsnapl_ssse3-bitpack64-readtwo.obj `if test -f 'bitpack64-readtwo.c'; then $(CYGPATH_W) 'bitpack64-readtwo.c'; else $(CYGPATH_W) '$(srcdir)/bitpack64-readtwo.c'; fi` +gsnapl_ssse3-merge.o: merge.c +@am__fastdepCC_TRUE@ $(AM_V_CC)$(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) $(gsnapl_ssse3_CFLAGS) $(CFLAGS) -MT gsnapl_ssse3-merge.o -MD -MP -MF $(DEPDIR)/gsnapl_ssse3-merge.Tpo -c -o gsnapl_ssse3-merge.o `test -f 'merge.c' || echo '$(srcdir)/'`merge.c +@am__fastdepCC_TRUE@ $(AM_V_at)$(am__mv) $(DEPDIR)/gsnapl_ssse3-merge.Tpo $(DEPDIR)/gsnapl_ssse3-merge.Po +@AMDEP_TRUE@@am__fastdepCC_FALSE@ $(AM_V_CC)source='merge.c' object='gsnapl_ssse3-merge.o' libtool=no @AMDEPBACKSLASH@ +@AMDEP_TRUE@@am__fastdepCC_FALSE@ DEPDIR=$(DEPDIR) $(CCDEPMODE) $(depcomp) @AMDEPBACKSLASH@ +@am__fastdepCC_FALSE@ $(AM_V_CC@am__nodep@)$(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) $(gsnapl_ssse3_CFLAGS) $(CFLAGS) -c -o gsnapl_ssse3-merge.o `test -f 'merge.c' || echo '$(srcdir)/'`merge.c + +gsnapl_ssse3-merge.obj: merge.c +@am__fastdepCC_TRUE@ $(AM_V_CC)$(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) $(gsnapl_ssse3_CFLAGS) $(CFLAGS) -MT gsnapl_ssse3-merge.obj -MD -MP -MF $(DEPDIR)/gsnapl_ssse3-merge.Tpo -c -o gsnapl_ssse3-merge.obj `if test -f 'merge.c'; then $(CYGPATH_W) 'merge.c'; else $(CYGPATH_W) '$(srcdir)/merge.c'; fi` +@am__fastdepCC_TRUE@ $(AM_V_at)$(am__mv) $(DEPDIR)/gsnapl_ssse3-merge.Tpo $(DEPDIR)/gsnapl_ssse3-merge.Po +@AMDEP_TRUE@@am__fastdepCC_FALSE@ $(AM_V_CC)source='merge.c' object='gsnapl_ssse3-merge.obj' libtool=no @AMDEPBACKSLASH@ +@AMDEP_TRUE@@am__fastdepCC_FALSE@ DEPDIR=$(DEPDIR) $(CCDEPMODE) $(depcomp) @AMDEPBACKSLASH@ +@am__fastdepCC_FALSE@ $(AM_V_CC@am__nodep@)$(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) $(gsnapl_ssse3_CFLAGS) $(CFLAGS) -c -o gsnapl_ssse3-merge.obj `if test -f 'merge.c'; then $(CYGPATH_W) 'merge.c'; else $(CYGPATH_W) '$(srcdir)/merge.c'; fi` + gsnapl_ssse3-indexdb.o: indexdb.c @am__fastdepCC_TRUE@ $(AM_V_CC)$(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) $(gsnapl_ssse3_CFLAGS) $(CFLAGS) -MT gsnapl_ssse3-indexdb.o -MD -MP -MF $(DEPDIR)/gsnapl_ssse3-indexdb.Tpo -c -o gsnapl_ssse3-indexdb.o `test -f 'indexdb.c' || echo '$(srcdir)/'`indexdb.c @am__fastdepCC_TRUE@ $(AM_V_at)$(am__mv) $(DEPDIR)/gsnapl_ssse3-indexdb.Tpo $(DEPDIR)/gsnapl_ssse3-indexdb.Po @@ -37162,6 +43376,20 @@ @AMDEP_TRUE@@am__fastdepCC_FALSE@ DEPDIR=$(DEPDIR) $(CCDEPMODE) $(depcomp) @AMDEPBACKSLASH@ @am__fastdepCC_FALSE@ $(AM_V_CC@am__nodep@)$(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) $(gsnapl_ssse3_CFLAGS) $(CFLAGS) -c -o gsnapl_ssse3-bitpack64-access.obj `if test -f 'bitpack64-access.c'; then $(CYGPATH_W) 'bitpack64-access.c'; else $(CYGPATH_W) '$(srcdir)/bitpack64-access.c'; fi` +gsnapl_ssse3-merge-heap.o: merge-heap.c +@am__fastdepCC_TRUE@ $(AM_V_CC)$(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) $(gsnapl_ssse3_CFLAGS) $(CFLAGS) -MT gsnapl_ssse3-merge-heap.o -MD -MP -MF $(DEPDIR)/gsnapl_ssse3-merge-heap.Tpo -c -o gsnapl_ssse3-merge-heap.o `test -f 'merge-heap.c' || echo '$(srcdir)/'`merge-heap.c +@am__fastdepCC_TRUE@ $(AM_V_at)$(am__mv) $(DEPDIR)/gsnapl_ssse3-merge-heap.Tpo $(DEPDIR)/gsnapl_ssse3-merge-heap.Po +@AMDEP_TRUE@@am__fastdepCC_FALSE@ $(AM_V_CC)source='merge-heap.c' object='gsnapl_ssse3-merge-heap.o' libtool=no @AMDEPBACKSLASH@ +@AMDEP_TRUE@@am__fastdepCC_FALSE@ DEPDIR=$(DEPDIR) $(CCDEPMODE) $(depcomp) @AMDEPBACKSLASH@ +@am__fastdepCC_FALSE@ $(AM_V_CC@am__nodep@)$(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) $(gsnapl_ssse3_CFLAGS) $(CFLAGS) -c -o gsnapl_ssse3-merge-heap.o `test -f 'merge-heap.c' || echo '$(srcdir)/'`merge-heap.c + +gsnapl_ssse3-merge-heap.obj: merge-heap.c +@am__fastdepCC_TRUE@ $(AM_V_CC)$(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) $(gsnapl_ssse3_CFLAGS) $(CFLAGS) -MT gsnapl_ssse3-merge-heap.obj -MD -MP -MF $(DEPDIR)/gsnapl_ssse3-merge-heap.Tpo -c -o gsnapl_ssse3-merge-heap.obj `if test -f 'merge-heap.c'; then $(CYGPATH_W) 'merge-heap.c'; else $(CYGPATH_W) '$(srcdir)/merge-heap.c'; fi` +@am__fastdepCC_TRUE@ $(AM_V_at)$(am__mv) $(DEPDIR)/gsnapl_ssse3-merge-heap.Tpo $(DEPDIR)/gsnapl_ssse3-merge-heap.Po +@AMDEP_TRUE@@am__fastdepCC_FALSE@ $(AM_V_CC)source='merge-heap.c' object='gsnapl_ssse3-merge-heap.obj' libtool=no @AMDEPBACKSLASH@ +@AMDEP_TRUE@@am__fastdepCC_FALSE@ DEPDIR=$(DEPDIR) $(CCDEPMODE) $(depcomp) @AMDEPBACKSLASH@ +@am__fastdepCC_FALSE@ $(AM_V_CC@am__nodep@)$(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) $(gsnapl_ssse3_CFLAGS) $(CFLAGS) -c -o gsnapl_ssse3-merge-heap.obj `if test -f 'merge-heap.c'; then $(CYGPATH_W) 'merge-heap.c'; else $(CYGPATH_W) '$(srcdir)/merge-heap.c'; fi` + gsnapl_ssse3-stage1hr.o: stage1hr.c @am__fastdepCC_TRUE@ $(AM_V_CC)$(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) $(gsnapl_ssse3_CFLAGS) $(CFLAGS) -MT gsnapl_ssse3-stage1hr.o -MD -MP -MF $(DEPDIR)/gsnapl_ssse3-stage1hr.Tpo -c -o gsnapl_ssse3-stage1hr.o `test -f 'stage1hr.c' || echo '$(srcdir)/'`stage1hr.c @am__fastdepCC_TRUE@ $(AM_V_at)$(am__mv) $(DEPDIR)/gsnapl_ssse3-stage1hr.Tpo $(DEPDIR)/gsnapl_ssse3-stage1hr.Po @@ -39444,6 +45672,20 @@ @AMDEP_TRUE@@am__fastdepCC_FALSE@ DEPDIR=$(DEPDIR) $(CCDEPMODE) $(depcomp) @AMDEPBACKSLASH@ @am__fastdepCC_FALSE@ $(AM_V_CC@am__nodep@)$(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) $(uniqscan_CFLAGS) $(CFLAGS) -c -o uniqscan-bitpack64-readtwo.obj `if test -f 'bitpack64-readtwo.c'; then $(CYGPATH_W) 'bitpack64-readtwo.c'; else $(CYGPATH_W) '$(srcdir)/bitpack64-readtwo.c'; fi` +uniqscan-merge.o: merge.c +@am__fastdepCC_TRUE@ $(AM_V_CC)$(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) $(uniqscan_CFLAGS) $(CFLAGS) -MT uniqscan-merge.o -MD -MP -MF $(DEPDIR)/uniqscan-merge.Tpo -c -o uniqscan-merge.o `test -f 'merge.c' || echo '$(srcdir)/'`merge.c +@am__fastdepCC_TRUE@ $(AM_V_at)$(am__mv) $(DEPDIR)/uniqscan-merge.Tpo $(DEPDIR)/uniqscan-merge.Po +@AMDEP_TRUE@@am__fastdepCC_FALSE@ $(AM_V_CC)source='merge.c' object='uniqscan-merge.o' libtool=no @AMDEPBACKSLASH@ +@AMDEP_TRUE@@am__fastdepCC_FALSE@ DEPDIR=$(DEPDIR) $(CCDEPMODE) $(depcomp) @AMDEPBACKSLASH@ +@am__fastdepCC_FALSE@ $(AM_V_CC@am__nodep@)$(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) $(uniqscan_CFLAGS) $(CFLAGS) -c -o uniqscan-merge.o `test -f 'merge.c' || echo '$(srcdir)/'`merge.c + +uniqscan-merge.obj: merge.c +@am__fastdepCC_TRUE@ $(AM_V_CC)$(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) $(uniqscan_CFLAGS) $(CFLAGS) -MT uniqscan-merge.obj -MD -MP -MF $(DEPDIR)/uniqscan-merge.Tpo -c -o uniqscan-merge.obj `if test -f 'merge.c'; then $(CYGPATH_W) 'merge.c'; else $(CYGPATH_W) '$(srcdir)/merge.c'; fi` +@am__fastdepCC_TRUE@ $(AM_V_at)$(am__mv) $(DEPDIR)/uniqscan-merge.Tpo $(DEPDIR)/uniqscan-merge.Po +@AMDEP_TRUE@@am__fastdepCC_FALSE@ $(AM_V_CC)source='merge.c' object='uniqscan-merge.obj' libtool=no @AMDEPBACKSLASH@ +@AMDEP_TRUE@@am__fastdepCC_FALSE@ DEPDIR=$(DEPDIR) $(CCDEPMODE) $(depcomp) @AMDEPBACKSLASH@ +@am__fastdepCC_FALSE@ $(AM_V_CC@am__nodep@)$(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) $(uniqscan_CFLAGS) $(CFLAGS) -c -o uniqscan-merge.obj `if test -f 'merge.c'; then $(CYGPATH_W) 'merge.c'; else $(CYGPATH_W) '$(srcdir)/merge.c'; fi` + uniqscan-indexdb.o: indexdb.c @am__fastdepCC_TRUE@ $(AM_V_CC)$(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) $(uniqscan_CFLAGS) $(CFLAGS) -MT uniqscan-indexdb.o -MD -MP -MF $(DEPDIR)/uniqscan-indexdb.Tpo -c -o uniqscan-indexdb.o `test -f 'indexdb.c' || echo '$(srcdir)/'`indexdb.c @am__fastdepCC_TRUE@ $(AM_V_at)$(am__mv) $(DEPDIR)/uniqscan-indexdb.Tpo $(DEPDIR)/uniqscan-indexdb.Po @@ -40130,6 +46372,20 @@ @AMDEP_TRUE@@am__fastdepCC_FALSE@ DEPDIR=$(DEPDIR) $(CCDEPMODE) $(depcomp) @AMDEPBACKSLASH@ @am__fastdepCC_FALSE@ $(AM_V_CC@am__nodep@)$(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) $(uniqscan_CFLAGS) $(CFLAGS) -c -o uniqscan-sarray-read.obj `if test -f 'sarray-read.c'; then $(CYGPATH_W) 'sarray-read.c'; else $(CYGPATH_W) '$(srcdir)/sarray-read.c'; fi` +uniqscan-merge-heap.o: merge-heap.c +@am__fastdepCC_TRUE@ $(AM_V_CC)$(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) $(uniqscan_CFLAGS) $(CFLAGS) -MT uniqscan-merge-heap.o -MD -MP -MF $(DEPDIR)/uniqscan-merge-heap.Tpo -c -o uniqscan-merge-heap.o `test -f 'merge-heap.c' || echo '$(srcdir)/'`merge-heap.c +@am__fastdepCC_TRUE@ $(AM_V_at)$(am__mv) $(DEPDIR)/uniqscan-merge-heap.Tpo $(DEPDIR)/uniqscan-merge-heap.Po +@AMDEP_TRUE@@am__fastdepCC_FALSE@ $(AM_V_CC)source='merge-heap.c' object='uniqscan-merge-heap.o' libtool=no @AMDEPBACKSLASH@ +@AMDEP_TRUE@@am__fastdepCC_FALSE@ DEPDIR=$(DEPDIR) $(CCDEPMODE) $(depcomp) @AMDEPBACKSLASH@ +@am__fastdepCC_FALSE@ $(AM_V_CC@am__nodep@)$(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) $(uniqscan_CFLAGS) $(CFLAGS) -c -o uniqscan-merge-heap.o `test -f 'merge-heap.c' || echo '$(srcdir)/'`merge-heap.c + +uniqscan-merge-heap.obj: merge-heap.c +@am__fastdepCC_TRUE@ $(AM_V_CC)$(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) $(uniqscan_CFLAGS) $(CFLAGS) -MT uniqscan-merge-heap.obj -MD -MP -MF $(DEPDIR)/uniqscan-merge-heap.Tpo -c -o uniqscan-merge-heap.obj `if test -f 'merge-heap.c'; then $(CYGPATH_W) 'merge-heap.c'; else $(CYGPATH_W) '$(srcdir)/merge-heap.c'; fi` +@am__fastdepCC_TRUE@ $(AM_V_at)$(am__mv) $(DEPDIR)/uniqscan-merge-heap.Tpo $(DEPDIR)/uniqscan-merge-heap.Po +@AMDEP_TRUE@@am__fastdepCC_FALSE@ $(AM_V_CC)source='merge-heap.c' object='uniqscan-merge-heap.obj' libtool=no @AMDEPBACKSLASH@ +@AMDEP_TRUE@@am__fastdepCC_FALSE@ DEPDIR=$(DEPDIR) $(CCDEPMODE) $(depcomp) @AMDEPBACKSLASH@ +@am__fastdepCC_FALSE@ $(AM_V_CC@am__nodep@)$(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) $(uniqscan_CFLAGS) $(CFLAGS) -c -o uniqscan-merge-heap.obj `if test -f 'merge-heap.c'; then $(CYGPATH_W) 'merge-heap.c'; else $(CYGPATH_W) '$(srcdir)/merge-heap.c'; fi` + uniqscan-stage1hr.o: stage1hr.c @am__fastdepCC_TRUE@ $(AM_V_CC)$(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) $(uniqscan_CFLAGS) $(CFLAGS) -MT uniqscan-stage1hr.o -MD -MP -MF $(DEPDIR)/uniqscan-stage1hr.Tpo -c -o uniqscan-stage1hr.o `test -f 'stage1hr.c' || echo '$(srcdir)/'`stage1hr.c @am__fastdepCC_TRUE@ $(AM_V_at)$(am__mv) $(DEPDIR)/uniqscan-stage1hr.Tpo $(DEPDIR)/uniqscan-stage1hr.Po @@ -40634,6 +46890,20 @@ @AMDEP_TRUE@@am__fastdepCC_FALSE@ DEPDIR=$(DEPDIR) $(CCDEPMODE) $(depcomp) @AMDEPBACKSLASH@ @am__fastdepCC_FALSE@ $(AM_V_CC@am__nodep@)$(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) $(uniqscanl_CFLAGS) $(CFLAGS) -c -o uniqscanl-bitpack64-readtwo.obj `if test -f 'bitpack64-readtwo.c'; then $(CYGPATH_W) 'bitpack64-readtwo.c'; else $(CYGPATH_W) '$(srcdir)/bitpack64-readtwo.c'; fi` +uniqscanl-merge.o: merge.c +@am__fastdepCC_TRUE@ $(AM_V_CC)$(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) $(uniqscanl_CFLAGS) $(CFLAGS) -MT uniqscanl-merge.o -MD -MP -MF $(DEPDIR)/uniqscanl-merge.Tpo -c -o uniqscanl-merge.o `test -f 'merge.c' || echo '$(srcdir)/'`merge.c +@am__fastdepCC_TRUE@ $(AM_V_at)$(am__mv) $(DEPDIR)/uniqscanl-merge.Tpo $(DEPDIR)/uniqscanl-merge.Po +@AMDEP_TRUE@@am__fastdepCC_FALSE@ $(AM_V_CC)source='merge.c' object='uniqscanl-merge.o' libtool=no @AMDEPBACKSLASH@ +@AMDEP_TRUE@@am__fastdepCC_FALSE@ DEPDIR=$(DEPDIR) $(CCDEPMODE) $(depcomp) @AMDEPBACKSLASH@ +@am__fastdepCC_FALSE@ $(AM_V_CC@am__nodep@)$(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) $(uniqscanl_CFLAGS) $(CFLAGS) -c -o uniqscanl-merge.o `test -f 'merge.c' || echo '$(srcdir)/'`merge.c + +uniqscanl-merge.obj: merge.c +@am__fastdepCC_TRUE@ $(AM_V_CC)$(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) $(uniqscanl_CFLAGS) $(CFLAGS) -MT uniqscanl-merge.obj -MD -MP -MF $(DEPDIR)/uniqscanl-merge.Tpo -c -o uniqscanl-merge.obj `if test -f 'merge.c'; then $(CYGPATH_W) 'merge.c'; else $(CYGPATH_W) '$(srcdir)/merge.c'; fi` +@am__fastdepCC_TRUE@ $(AM_V_at)$(am__mv) $(DEPDIR)/uniqscanl-merge.Tpo $(DEPDIR)/uniqscanl-merge.Po +@AMDEP_TRUE@@am__fastdepCC_FALSE@ $(AM_V_CC)source='merge.c' object='uniqscanl-merge.obj' libtool=no @AMDEPBACKSLASH@ +@AMDEP_TRUE@@am__fastdepCC_FALSE@ DEPDIR=$(DEPDIR) $(CCDEPMODE) $(depcomp) @AMDEPBACKSLASH@ +@am__fastdepCC_FALSE@ $(AM_V_CC@am__nodep@)$(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) $(uniqscanl_CFLAGS) $(CFLAGS) -c -o uniqscanl-merge.obj `if test -f 'merge.c'; then $(CYGPATH_W) 'merge.c'; else $(CYGPATH_W) '$(srcdir)/merge.c'; fi` + uniqscanl-indexdb.o: indexdb.c @am__fastdepCC_TRUE@ $(AM_V_CC)$(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) $(uniqscanl_CFLAGS) $(CFLAGS) -MT uniqscanl-indexdb.o -MD -MP -MF $(DEPDIR)/uniqscanl-indexdb.Tpo -c -o uniqscanl-indexdb.o `test -f 'indexdb.c' || echo '$(srcdir)/'`indexdb.c @am__fastdepCC_TRUE@ $(AM_V_at)$(am__mv) $(DEPDIR)/uniqscanl-indexdb.Tpo $(DEPDIR)/uniqscanl-indexdb.Po @@ -41264,6 +47534,20 @@ @AMDEP_TRUE@@am__fastdepCC_FALSE@ DEPDIR=$(DEPDIR) $(CCDEPMODE) $(depcomp) @AMDEPBACKSLASH@ @am__fastdepCC_FALSE@ $(AM_V_CC@am__nodep@)$(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) $(uniqscanl_CFLAGS) $(CFLAGS) -c -o uniqscanl-bitpack64-access.obj `if test -f 'bitpack64-access.c'; then $(CYGPATH_W) 'bitpack64-access.c'; else $(CYGPATH_W) '$(srcdir)/bitpack64-access.c'; fi` +uniqscanl-merge-heap.o: merge-heap.c +@am__fastdepCC_TRUE@ $(AM_V_CC)$(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) $(uniqscanl_CFLAGS) $(CFLAGS) -MT uniqscanl-merge-heap.o -MD -MP -MF $(DEPDIR)/uniqscanl-merge-heap.Tpo -c -o uniqscanl-merge-heap.o `test -f 'merge-heap.c' || echo '$(srcdir)/'`merge-heap.c +@am__fastdepCC_TRUE@ $(AM_V_at)$(am__mv) $(DEPDIR)/uniqscanl-merge-heap.Tpo $(DEPDIR)/uniqscanl-merge-heap.Po +@AMDEP_TRUE@@am__fastdepCC_FALSE@ $(AM_V_CC)source='merge-heap.c' object='uniqscanl-merge-heap.o' libtool=no @AMDEPBACKSLASH@ +@AMDEP_TRUE@@am__fastdepCC_FALSE@ DEPDIR=$(DEPDIR) $(CCDEPMODE) $(depcomp) @AMDEPBACKSLASH@ +@am__fastdepCC_FALSE@ $(AM_V_CC@am__nodep@)$(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) $(uniqscanl_CFLAGS) $(CFLAGS) -c -o uniqscanl-merge-heap.o `test -f 'merge-heap.c' || echo '$(srcdir)/'`merge-heap.c + +uniqscanl-merge-heap.obj: merge-heap.c +@am__fastdepCC_TRUE@ $(AM_V_CC)$(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) $(uniqscanl_CFLAGS) $(CFLAGS) -MT uniqscanl-merge-heap.obj -MD -MP -MF $(DEPDIR)/uniqscanl-merge-heap.Tpo -c -o uniqscanl-merge-heap.obj `if test -f 'merge-heap.c'; then $(CYGPATH_W) 'merge-heap.c'; else $(CYGPATH_W) '$(srcdir)/merge-heap.c'; fi` +@am__fastdepCC_TRUE@ $(AM_V_at)$(am__mv) $(DEPDIR)/uniqscanl-merge-heap.Tpo $(DEPDIR)/uniqscanl-merge-heap.Po +@AMDEP_TRUE@@am__fastdepCC_FALSE@ $(AM_V_CC)source='merge-heap.c' object='uniqscanl-merge-heap.obj' libtool=no @AMDEPBACKSLASH@ +@AMDEP_TRUE@@am__fastdepCC_FALSE@ DEPDIR=$(DEPDIR) $(CCDEPMODE) $(depcomp) @AMDEPBACKSLASH@ +@am__fastdepCC_FALSE@ $(AM_V_CC@am__nodep@)$(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) $(uniqscanl_CFLAGS) $(CFLAGS) -c -o uniqscanl-merge-heap.obj `if test -f 'merge-heap.c'; then $(CYGPATH_W) 'merge-heap.c'; else $(CYGPATH_W) '$(srcdir)/merge-heap.c'; fi` + uniqscanl-stage1hr.o: stage1hr.c @am__fastdepCC_TRUE@ $(AM_V_CC)$(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) $(uniqscanl_CFLAGS) $(CFLAGS) -MT uniqscanl-stage1hr.o -MD -MP -MF $(DEPDIR)/uniqscanl-stage1hr.Tpo -c -o uniqscanl-stage1hr.o `test -f 'stage1hr.c' || echo '$(srcdir)/'`stage1hr.c @am__fastdepCC_TRUE@ $(AM_V_at)$(am__mv) $(DEPDIR)/uniqscanl-stage1hr.Tpo $(DEPDIR)/uniqscanl-stage1hr.Po diff -Nru gmap-2016-11-07/src/mem.h gmap-2017-01-14/src/mem.h --- gmap-2016-11-07/src/mem.h 2016-11-07 17:15:30.000000000 +0000 +++ gmap-2017-01-14/src/mem.h 2017-01-13 23:28:23.000000000 +0000 @@ -1,4 +1,4 @@ -/* $Id: mem.h 157223 2015-01-22 18:43:01Z twu $ */ +/* $Id: mem.h 202588 2017-01-13 23:28:23Z twu $ */ #ifndef MEM_INCLUDED #define MEM_INCLUDED #ifdef HAVE_CONFIG_H @@ -16,6 +16,11 @@ #include #endif +#ifdef HAVE_SSE4_1 +/* For SIMD merge, which requires SSE4.1 as a minimum */ +#include "xmmintrin.h" /* For MALLOC_ALIGN */ +#endif + #define MAX_QUERYLENGTH_STACK 10000 @@ -86,6 +91,7 @@ #define MTRAP(location) Mem_trap_start((location), __FILE__, __LINE__) #define MCHECK() Mem_trap_check(__FILE__, __LINE__) + #define MALLOC(nbytes) Mem_alloc((nbytes), __FILE__, __LINE__) #define CALLOC(count, nbytes) Mem_calloc((count), (nbytes), __FILE__, __LINE__) #define FREE(ptr) ((void)(Mem_free((ptr),__FILE__, __LINE__), (ptr) = 0)) @@ -111,6 +117,24 @@ #endif +#ifdef HAVE_AVX512 +#define MALLOC_ALIGN(x) _mm_malloc(x,64) +#define FREE_ALIGN(x) _mm_free(x) +#define CHECK_ALIGN(x) assert((unsigned long) x % 64 == 0) +#elif defined(HAVE_AVX2) +#define MALLOC_ALIGN(x) _mm_malloc(x,32) +#define FREE_ALIGN(x) _mm_free(x) +#define CHECK_ALIGN(x) assert((unsigned long) x % 32 == 0) +#elif defined(HAVE_SSE4_1) +#define MALLOC_ALIGN(x) _mm_malloc(x,16) +#define FREE_ALIGN(x) _mm_free(x) +#define CHECK_ALIGN(x) assert((unsigned long) x % 16 == 0) +#else +#define MALLOC_ALIGN(x) MALLOC(x) +#define FREE_ALIGN(x) FREE(x) +#define CHECK_ALIGN(x) +#endif + #ifdef MEMUSAGE #define MALLOC_KEEP(nbytes) Mem_alloc_keep((nbytes), __FILE__, __LINE__) diff -Nru gmap-2016-11-07/src/merge.c gmap-2017-01-14/src/merge.c --- gmap-2016-11-07/src/merge.c 1970-01-01 00:00:00.000000000 +0000 +++ gmap-2017-01-14/src/merge.c 2017-01-13 23:29:03.000000000 +0000 @@ -0,0 +1,1086 @@ +static char rcsid[] = "$Id: merge.c 202589 2017-01-13 23:29:02Z twu $"; +#ifdef HAVE_CONFIG_H +#include +#endif + +#include "merge.h" +#include "assert.h" +#include "mem.h" +#include "popcount.h" + +#include +#include +#include /* For memcpy */ + + +#if defined(HAVE_SSE4_1) +#include +#endif +#if defined(HAVE_AVX2) +#include +#endif +#if defined(HAVE_AVX512) +#include +#endif + + +/* #define PYRAMID_SIZE 4 */ +/* #define KEY_MASK (~0U << 2) */ + +#define PYRAMID_SIZE 32 +#define KEY_MASK (~0U << 5) + +#ifdef DEBUG0 +#define debug0(x) x +#else +#define debug0(x) +#endif + +#ifdef DEBUG +#define debug(x) x +#else +#define debug(x) +#endif + +#ifdef DEBUG2 +#define debug2(x) x +#else +#define debug2(x) +#endif + + +#ifdef DEBUG +#ifdef HAVE_SSE4_1 +static void +print_vector (__m128i x, char *label) { + unsigned int *s = (unsigned int *) &x; + + printf("%s: %u %u %u %u\n",label,s[0],s[1],s[2],s[3]); + return; +} +#endif + +#ifdef HAVE_AVX2 +static void +print_vector_256 (__m256i x, char *label) { + unsigned int *s = (unsigned int *) &x; + + printf("%s: %u %u %u %u %u %u %u %u\n",label,s[0],s[1],s[2],s[3],s[4],s[5],s[6],s[7]); + return; +} +#endif + +#ifdef HAVE_AVX512 +static void +print_vector_512 (__m512i x, char *label) { + unsigned int *s = (unsigned int *) &x; + + printf("%s: %u %u %u %u %u %u %u %u %u %u %u %u %u %u %u %u\n", + label,s[0],s[1],s[2],s[3],s[4],s[5],s[6],s[7], + s[8],s[9],s[10],s[11],s[12],s[13],s[14],s[15]); + return; +} +#endif +#endif + + + +#ifdef HAVE_SSE4_1 +/* The min and max procedures require SSE4.1, which makes SSE4.1 the minimum requirement for SIMD-based merge */ +static void +merge_4x4 (__m128i *__restrict__ vMergedA, __m128i *__restrict__ vMergedB, __m128i vA, __m128i vB) { + __m128i vTmp, vMin, vMax; + + vMin = _mm_min_epu32(vA, vB); + vMax = _mm_max_epu32(vA, vB); + /* print_vector(vMin,"Min 1"); */ + /* print_vector(vMax,"Max 1"); */ + + vTmp = _mm_alignr_epi8(vMin, vMin, 4); /* Rotate Min by 4 */ + vMin = _mm_min_epu32(vTmp, vMax); + vMax = _mm_max_epu32(vTmp, vMax); + /* print_vector(vTmp,"Tmp 2"); */ + /* print_vector(vMin,"Min 2"); */ + /* print_vector(vMax,"Max 2"); */ + + vTmp = _mm_alignr_epi8(vMin, vMin, 4); + vMin = _mm_min_epu32(vTmp, vMax); + vMax = _mm_max_epu32(vTmp, vMax); + /* print_vector(vTmp,"Tmp 3"); */ + /* print_vector(vMin,"Min 3"); */ + /* print_vector(vMax,"Max 3"); */ + + vTmp = _mm_alignr_epi8(vMin, vMin, 4); + vMin = _mm_min_epu32(vTmp, vMax); + /* print_vector(vTmp,"Tmp 4"); */ + /* print_vector(vMin,"Min 4"); */ + + *vMergedB = _mm_max_epu32(vTmp, vMax); + *vMergedA = _mm_alignr_epi8(vMin, vMin, 4); + + return; +} + + +#ifndef HAVE_AVX2 +static void +merge_8x8_network (__m128i *__restrict__ vMergedA, __m128i *__restrict__ vMergedB, + __m128i *__restrict__ vMergedC, __m128i *__restrict__ vMergedD, + __m128i vA0, __m128i vA1, __m128i vB0, __m128i vB1) { + merge_4x4(&(*vMergedA),&(*vMergedB),vA0,vB0); + merge_4x4(&(*vMergedC),&(*vMergedD),vA1,vB1); + + merge_4x4(&(*vMergedB),&(*vMergedC),*vMergedC,*vMergedB); + return; +} +#endif +#endif + + +#ifdef HAVE_AVX2 +/* The problem is that _mm256_alignr_epi8 rotates within 128-bit lanes */ +/* So use _mm256_permutevar8x32_epi32, which shuffles across lanes */ +static void +merge_8x8 (__m256i *__restrict__ vMergedA, __m256i *__restrict__ vMergedB, __m256i vA, __m256i vB) { + __m256i vTmp, vMin, vMax; + __m256i vRot; + + vRot = _mm256_setr_epi32(1, 2, 3, 4, 5, 6, 7, 0); + + /* print_vector_256(vA,"vA"); */ + /* print_vector_256(*vB,"vB"); */ + + /* 1 */ + vMin = _mm256_min_epu32(vA, vB); + vMax = _mm256_max_epu32(vA, vB); + /* print_vector_256(vMin,"Min 1"); */ + /* print_vector_256(vMax,"Max 1"); */ + + /* 2 */ + vTmp = _mm256_permutevar8x32_epi32(vMin, vRot); /* Rotate Min by ints */ + vMin = _mm256_min_epu32(vTmp, vMax); + vMax = _mm256_max_epu32(vTmp, vMax); + + /* 3 */ + vTmp = _mm256_permutevar8x32_epi32(vMin, vRot); /* Rotate Min by ints */ + vMin = _mm256_min_epu32(vTmp, vMax); + vMax = _mm256_max_epu32(vTmp, vMax); + + /* 4 */ + vTmp = _mm256_permutevar8x32_epi32(vMin, vRot); /* Rotate Min by ints */ + vMin = _mm256_min_epu32(vTmp, vMax); + vMax = _mm256_max_epu32(vTmp, vMax); + + /* 5 */ + vTmp = _mm256_permutevar8x32_epi32(vMin, vRot); /* Rotate Min by ints */ + vMin = _mm256_min_epu32(vTmp, vMax); + vMax = _mm256_max_epu32(vTmp, vMax); + + /* 6 */ + vTmp = _mm256_permutevar8x32_epi32(vMin, vRot); /* Rotate Min by ints */ + vMin = _mm256_min_epu32(vTmp, vMax); + vMax = _mm256_max_epu32(vTmp, vMax); + + /* 7 */ + vTmp = _mm256_permutevar8x32_epi32(vMin, vRot); /* Rotate Min by ints */ + vMin = _mm256_min_epu32(vTmp, vMax); + vMax = _mm256_max_epu32(vTmp, vMax); + + /* 8 */ + vTmp = _mm256_permutevar8x32_epi32(vMin, vRot); /* Rotate Min by ints */ + vMin = _mm256_min_epu32(vTmp, vMax); + /* print_vector_256(vTmp,"Tmp 8"); */ + /* print_vector_256(vMin,"Min 8"); */ + + *vMergedB = _mm256_max_epu32(vTmp, vMax); + *vMergedA = _mm256_permutevar8x32_epi32(vMin, vRot); /* Rotate Min by ints */ + /* print_vector_256(*vMergedA,"vMergedA"); */ + /* print_vector_256(*vMergedB,"vMergedB"); */ + /* printf("\n"); */ + + return; +} + +#ifndef HAVE_AVX512 +static void +merge_16x16_network (__m256i *__restrict__ vMergedA, __m256i *__restrict__ vMergedB, + __m256i *__restrict__ vMergedC, __m256i *__restrict__ vMergedD, + __m256i vA0, __m256i vA1, __m256i vB0, __m256i vB1) { + merge_8x8(&(*vMergedA),&(*vMergedB),vA0,vB0); + merge_8x8(&(*vMergedC),&(*vMergedD),vA1,vB1); + + merge_8x8(&(*vMergedB),&(*vMergedC),*vMergedC,*vMergedB); + return; +} +#endif +#endif + + +#ifdef HAVE_AVX512 +static void +merge_16x16 (__m512i *__restrict__ vMergedA, __m512i *__restrict__ vMergedB, __m512i vA, __m512i vB) { + __m512i vTmp, vMin, vMax; + __m512i vRot; + int i; + + vRot = _mm512_setr_epi32(1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 0); + + /* print_vector_512(vA,"vA"); */ + /* print_vector_512(vB,"vB"); */ + + /* 1 */ + vMin = _mm512_min_epu32(vA, vB); + vMax = _mm512_max_epu32(vA, vB); + /* print_vector_512(vMin,"Min 1"); */ + /* print_vector_512(vMax,"Max 1"); */ + + /* 2..15 */ + for (i = 0; i < 14; i++) { + vTmp = _mm512_permutexvar_epi32(vRot, vMin); /* Rotate Min by ints */ + vMin = _mm512_min_epu32(vTmp, vMax); + vMax = _mm512_max_epu32(vTmp, vMax); + /* print_vector_512(vTmp,"Tmp 2"); */ + /* print_vector_512(vMin,"Min 2"); */ + /* print_vector_512(vMax,"Max 2"); */ + } + + /* 16 */ + vTmp = _mm512_permutexvar_epi32(vRot, vMin); /* Rotate Min by ints */ + vMin = _mm512_min_epu32(vTmp, vMax); + /* print_vector_512(vTmp,"Tmp 16"); */ + /* print_vector_512(vMin,"Min 16"); */ + + *vMergedB = _mm512_max_epu32(vTmp, vMax); + *vMergedA = _mm512_permutexvar_epi32(vRot, vMin); /* Rotate Min by ints */ + /* print_vector_512(*vMergedA,"vMergedA"); */ + /* print_vector_512(*vMergedB,"vMergedB"); */ + /* printf("\n"); */ + + return; +} + +static void +merge_32x32_network (__m512i *__restrict__ vMergedA, __m512i *__restrict__ vMergedB, + __m512i *__restrict__ vMergedC, __m512i *__restrict__ vMergedD, + __m512i vA0, __m512i vA1, __m512i vB0, __m512i vB1) { + merge_16x16(&(*vMergedA),&(*vMergedB),vA0,vB0); + merge_16x16(&(*vMergedC),&(*vMergedD),vA1,vB1); + + merge_16x16(&(*vMergedB),&(*vMergedC),*vMergedC,*vMergedB); + return; +} +#endif + + + +/* Assumes padding to nearest 4 uints, and alignment to nearest 16 bytes */ +/* If dest is NULL, then allocates and returns memory. Otherwise, fills in at dest */ +unsigned int * +Merge_uint4 (unsigned int *__restrict__ dest, unsigned int *__restrict__ A, + unsigned int *__restrict__ B, int nA, int nB) { + unsigned int *C0, *C, *Aend, *Bend; + unsigned int nextA, nextB; + int nC; +#ifdef HAVE_AVX512 + __m512i vMerged512, vMerged512_0, vMerged512_1, + vOld512, vNew512, vOld512_0, vOld512_1, vNew512_0, vNew512_1; +#endif +#ifdef HAVE_AVX2 + __m256i vMerged256, vMerged256_0, vMerged256_1, + vOld256, vNew256, vOld256_0, vOld256_1, vNew256_0, vNew256_1; +#endif +#ifdef HAVE_SSE4_1 + __m128i vMerged128, vMerged128_0, vMerged128_1, + vOld128, vNew128, vOld128_0, vOld128_1, vNew128_0, vNew128_1; +#endif + + + if ((nC = nA + nB) == 0) { + return (unsigned int *) NULL; + } else if (dest) { + C0 = C = dest; + } else { +#if defined(HAVE_SSE4_1) + C0 = C = (unsigned int *) MALLOC_ALIGN(nC * sizeof(unsigned int)); +#else + C0 = C = (unsigned int *) MALLOC(nC * sizeof(unsigned int)); +#endif + } + + Aend = &(A[nA]); + Bend = &(B[nB]); + +#ifdef HAVE_AVX512 + if (A < Aend - 32 && B < Bend - 32) { + /* 32 ints = 1024 bits */ + if ((nextA = A[32]) < (nextB = B[32])) { + vOld512_0 = _mm512_load_si512((__m512i *) B); B += 16; + vOld512_1 = _mm512_load_si512((__m512i *) B); B += 16; + vNew512_0 = _mm512_load_si512((__m512i *) A); A += 16; + vNew512_1 = _mm512_load_si512((__m512i *) A); A += 16; + } else { + vOld512_0 = _mm512_load_si512((__m512i *) A); A += 16; + vOld512_1 = _mm512_load_si512((__m512i *) A); A += 16; + vNew512_0 = _mm512_load_si512((__m512i *) B); B += 16; + vNew512_1 = _mm512_load_si512((__m512i *) B); B += 16; + } + merge_32x32_network(&vMerged512_0,&vMerged512_1,&vOld512_0,&vOld512_1, + vOld512_0,vOld512_1,vNew512_0,vNew512_1); + _mm512_stream_si512((__m512i *) C,vMerged512_0); C += 16; + _mm512_stream_si512((__m512i *) C,vMerged512_1); C += 16; + + while (A < Aend - 32 && B < Bend - 32) { + if (nextA < nextB) { + vNew512_0 = _mm512_load_si512((__m512i *) A); A += 16; + vNew512_1 = _mm512_load_si512((__m512i *) A); A += 16; + nextA = *A; + } else { + vNew512_0 = _mm512_load_si512((__m512i *) B); B += 16; + vNew512_1 = _mm512_load_si512((__m512i *) B); B += 16; + nextB = *B; + } + merge_32x32_network(&vMerged512_0,&vMerged512_1,&vOld512_0,&vOld512_1, + vOld512_0,vOld512_1,vNew512_0,vNew512_1); + _mm512_stream_si512((__m512i *) C,vMerged512_0); C += 16; + _mm512_stream_si512((__m512i *) C,vMerged512_1); C += 16; + } + + /* Re-insert before largest element */ + if (nextA < nextB) { + B -= 16; _mm512_store_si512((__m512i *) B,vOld512_1); + B -= 16; _mm512_store_si512((__m512i *) B,vOld512_0); + } else { + A -= 16; _mm512_store_si512((__m512i *) A,vOld512_1); + A -= 16; _mm512_store_si512((__m512i *) A,vOld512_0); + } + } +#endif + + +#ifdef HAVE_AVX512 + if (A < Aend - 16 && B < Bend - 16) { + /* 512 bits */ + if ((nextA = A[16]) < (nextB = B[16])) { + vOld512 = _mm512_load_si512((__m512i *) B); B += 16; + vNew512 = _mm512_load_si512((__m512i *) A); A += 16; + } else { + vOld512 = _mm512_load_si512((__m512i *) A); A += 16; + vNew512 = _mm512_load_si512((__m512i *) B); B += 16; + } + merge_16x16(&vMerged512,&vOld512,vOld512,vNew512); + _mm512_stream_si512((__m512i *) C,vMerged512); C += 16; + + while (A < Aend - 16 && B < Bend - 16) { + if (nextA < nextB) { + vNew512 = _mm512_load_si512((__m512i *) A); A += 16; nextA = *A; + } else { + vNew512 = _mm512_load_si512((__m512i *) B); B += 16; nextB = *B; + } + merge_16x16(&vMerged512,&vOld512,vOld512,vNew512); + _mm512_stream_si512((__m512i *) C,vMerged512); C += 16; + } + + /* Re-insert before largest element */ + if (nextA < nextB) { + B -= 16; _mm512_store_si512((__m512i *) B,vOld512); + } else { + A -= 16; _mm512_store_si512((__m512i *) A,vOld512); + } + } + +#elif defined(HAVE_AVX2) + if (A < Aend - 16 && B < Bend - 16) { + if ((nextA = A[16]) < (nextB = B[16])) { + vOld256_0 = _mm256_load_si256((__m256i *) B); B += 8; + vOld256_1 = _mm256_load_si256((__m256i *) B); B += 8; + vNew256_0 = _mm256_load_si256((__m256i *) A); A += 8; + vNew256_1 = _mm256_load_si256((__m256i *) A); A += 8; + } else { + vOld256_0 = _mm256_load_si256((__m256i *) A); A += 8; + vOld256_1 = _mm256_load_si256((__m256i *) A); A += 8; + vNew256_0 = _mm256_load_si256((__m256i *) B); B += 8; + vNew256_1 = _mm256_load_si256((__m256i *) B); B += 8; + } + merge_16x16_network(&vMerged256_0,&vMerged256_1,&vOld256_0,&vOld256_1, + vOld256_0,vOld256_1,vNew256_0,vNew256_1); + _mm256_stream_si256((__m256i *) C,vMerged256_0); C += 8; + _mm256_stream_si256((__m256i *) C,vMerged256_1); C += 8; + + while (A < Aend - 16 && B < Bend - 16) { + if (nextA < nextB) { + vNew256_0 = _mm256_load_si256((__m256i *) A); A += 8; + vNew256_1 = _mm256_load_si256((__m256i *) A); A += 8; + nextA = *A; + } else { + vNew256_0 = _mm256_load_si256((__m256i *) B); B += 8; + vNew256_1 = _mm256_load_si256((__m256i *) B); B += 8; + nextB = *B; + } + merge_16x16_network(&vMerged256_0,&vMerged256_1,&vOld256_0,&vOld256_1, + vOld256_0,vOld256_1,vNew256_0,vNew256_1); + _mm256_stream_si256((__m256i *) C,vMerged256_0); C += 8; + _mm256_stream_si256((__m256i *) C,vMerged256_1); C += 8; + } + + /* Re-insert before largest element */ + if (nextA < nextB) { + B -= 8; _mm256_store_si256((__m256i *) B,vOld256_1); + B -= 8; _mm256_store_si256((__m256i *) B,vOld256_0); + } else { + A -= 8; _mm256_store_si256((__m256i *) A,vOld256_1); + A -= 8; _mm256_store_si256((__m256i *) A,vOld256_0); + } + } +#endif + + +#ifdef HAVE_AVX2 + if (A < Aend - 8 && B < Bend - 8) { + /* 256 bits */ + if ((nextA = A[8]) < (nextB = B[8])) { + vOld256 = _mm256_load_si256((__m256i *) B); B += 8; + vNew256 = _mm256_load_si256((__m256i *) A); A += 8; + } else { + vOld256 = _mm256_load_si256((__m256i *) A); A += 8; + vNew256 = _mm256_load_si256((__m256i *) B); B += 8; + } + merge_8x8(&vMerged256,&vOld256,vOld256,vNew256); + _mm256_stream_si256((__m256i *) C,vMerged256); C += 8; + + while (A < Aend - 8 && B < Bend - 8) { + if (nextA < nextB) { + vNew256 = _mm256_load_si256((__m256i *) A); A += 8; nextA = *A; + } else { + vNew256 = _mm256_load_si256((__m256i *) B); B += 8; nextB = *B; + } + merge_8x8(&vMerged256,&vOld256,vOld256,vNew256); + _mm256_stream_si256((__m256i *) C,vMerged256); C += 8; + } + + /* Re-insert before largest element */ + if (nextA < nextB) { + B -= 8; _mm256_store_si256((__m256i *) B,vOld256); + } else { + A -= 8; _mm256_store_si256((__m256i *) A,vOld256); + } + } + +#elif defined(HAVE_SSE4_1) + if (A < Aend - 8 && B < Bend - 8) { + if ((nextA = A[8]) < (nextB = B[8])) { + vOld128_0 = _mm_load_si128((__m128i *) B); B += 4; + vOld128_1 = _mm_load_si128((__m128i *) B); B += 4; + vNew128_0 = _mm_load_si128((__m128i *) A); A += 4; + vNew128_1 = _mm_load_si128((__m128i *) A); A += 4; + } else { + vOld128_0 = _mm_load_si128((__m128i *) A); A += 4; + vOld128_1 = _mm_load_si128((__m128i *) A); A += 4; + vNew128_0 = _mm_load_si128((__m128i *) B); B += 4; + vNew128_1 = _mm_load_si128((__m128i *) B); B += 4; + } + merge_8x8_network(&vMerged128_0,&vMerged128_1,&vOld128_0,&vOld128_1, + vOld128_0,vOld128_1,vNew128_0,vNew128_1); + _mm_stream_si128((__m128i *) C,vMerged128_0); C += 4; + _mm_stream_si128((__m128i *) C,vMerged128_1); C += 4; + + while (A < Aend - 8 && B < Bend - 8) { + if (nextA < nextB) { + vNew128_0 = _mm_load_si128((__m128i *) A); A += 4; + vNew128_1 = _mm_load_si128((__m128i *) A); A += 4; + nextA = *A; + } else { + vNew128_0 = _mm_load_si128((__m128i *) B); B += 4; + vNew128_1 = _mm_load_si128((__m128i *) B); B += 4; + nextB = *B; + } + merge_8x8_network(&vMerged128_0,&vMerged128_1,&vOld128_0,&vOld128_1, + vOld128_0,vOld128_1,vNew128_0,vNew128_1); + _mm_stream_si128((__m128i *) C,vMerged128_0); C += 4; + _mm_stream_si128((__m128i *) C,vMerged128_1); C += 4; + } + + /* Re-insert before largest element */ + if (nextA < nextB) { + B -= 4; _mm_store_si128((__m128i *) B,vOld128_1); + B -= 4; _mm_store_si128((__m128i *) B,vOld128_0); + } else { + A -= 4; _mm_store_si128((__m128i *) A,vOld128_1); + A -= 4; _mm_store_si128((__m128i *) A,vOld128_0); + } + } +#endif + + +#ifdef HAVE_SSE4_1 + if (A < Aend - 4 && B < Bend - 4) { + /* 128 bits */ + if ((nextA = A[4]) < (nextB = B[4])) { + vOld128 = _mm_load_si128((__m128i *) B); B += 4; + vNew128 = _mm_load_si128((__m128i *) A); A += 4; + } else { + vOld128 = _mm_load_si128((__m128i *) A); A += 4; + vNew128 = _mm_load_si128((__m128i *) B); B += 4; + } + merge_4x4(&vMerged128,&vOld128,vOld128,vNew128); + _mm_stream_si128((__m128i *) C,vMerged128); C += 4; + + while (A < Aend - 4 && B < Bend - 4) { + if (nextA < nextB) { + vNew128 = _mm_load_si128((__m128i *) A); A += 4; nextA = *A; + } else { + vNew128 = _mm_load_si128((__m128i *) B); B += 4; nextB = *B; + } + merge_4x4(&vMerged128,&vOld128,vOld128,vNew128); + _mm_stream_si128((__m128i *) C,vMerged128); C += 4; + } + + /* Re-insert before largest element */ + if (nextA < nextB) { + B -= 4; _mm_store_si128((__m128i *) B,vOld128); + } else { + A -= 4; _mm_store_si128((__m128i *) A,vOld128); + } + } +#endif + + /* Serial */ + while (A < Aend && B < Bend) { + if (*A < *B) { + *C++ = *A++; + } else { + *C++ = *B++; + } + } + + memcpy(C,A,(Aend - A) * sizeof(unsigned int)); + memcpy(C,B,(Bend - B) * sizeof(unsigned int)); + + return C0; +} + + + +#define PARENT(i) (i >> 1) +#define LEFT(i) (i << 1) +#define RIGHT(i) ((i << 1) | 1) + +static int +pyramid_merge (unsigned int **heap, int nstreams, int heapsize, int *nelts, + int pyramid_start, int pyramid_end) { + int nodei; +#ifdef DEBUG + int i; +#endif + + while (pyramid_end > pyramid_start) { + debug(printf("Merging level: %d..%d for heapsize %d\n",pyramid_start,pyramid_end,heapsize)); + + if (pyramid_end > heapsize) { + nodei = heapsize; + } else { + nodei = pyramid_end; + } + + while (nodei >= pyramid_start) { + debug2(printf("Merging nodes %d (%d elts) and %d (%d elts) => %d\n", + nodei-1,nelts[nodei-1],nodei,nelts[nodei],PARENT(nodei))); + heap[PARENT(nodei)] = Merge_uint4(/*dest*/NULL,heap[nodei-1],heap[nodei],nelts[nodei-1],nelts[nodei]); + CHECK_ALIGN(heap[PARENT(nodei)]); + nelts[PARENT(nodei)] = nelts[nodei-1] + nelts[nodei]; + debug2(printf("Created list %p of length %d at node %d\n", + heap[PARENT(nodei)],nelts[PARENT(nodei)],PARENT(nodei))); + +#ifdef DEBUG + for (i = 0; i < nelts[PARENT(nodei)]; i++) { + printf("%u\n",heap[PARENT(nodei)][i]); + } +#endif + + /* Don't free original lists (when nodei >= nstreams) */ + debug(printf("Freeing nodes %d and %d\n",nodei-1,nodei)); + if (nodei < nstreams) { + FREE_ALIGN(heap[nodei]); + } + if (nodei-1 < nstreams) { + FREE_ALIGN(heap[nodei-1]); + } + nodei -= 2; + } + + pyramid_end = PARENT(pyramid_end); + pyramid_start = PARENT(pyramid_start); + } + + debug(printf("Returning ancestor %d\n\n",pyramid_start)); + return pyramid_start; +} + + +/* Assumes heapi < base put into LEFT(heapi) */ +static int +pyramid_merge_prealloc (unsigned int **heap, unsigned int *curr_storage, unsigned int *prev_storage, + int *nelts, int pyramid_start, int pyramid_end) { + unsigned int *temp; + int nodei; + int nalloc; +#ifdef HAVE_SSE4_1 + int n; +#endif + + while (pyramid_end > pyramid_start) { + debug2(printf("Merging level: %d..%d\n",pyramid_start,pyramid_end)); + nalloc = 0; + + nodei = pyramid_end; + while (nodei >= pyramid_start) { + debug2(printf("Merging nodes %d (%d elts) and %d (%d elts) => %d\n", + nodei-1,nelts[nodei-1],nodei,nelts[nodei],PARENT(nodei))); + heap[PARENT(nodei)] = Merge_uint4(/*dest*/&(curr_storage[nalloc]),heap[nodei-1],heap[nodei],nelts[nodei-1],nelts[nodei]); + CHECK_ALIGN(heap[PARENT(nodei)]); + /* Have to align start of each entry curr_storage[nalloc], regardless of end padding */ +#ifdef HAVE_SSE4_1 + n = nelts[PARENT(nodei)] = nelts[nodei-1] + nelts[nodei]; + nalloc += PAD_UINT4(n); +#else + nalloc += (nelts[PARENT(nodei)] = nelts[nodei-1] + nelts[nodei]); +#endif + debug2(printf("Created list %p of length %d at node %d\n", + heap[PARENT(nodei)],nelts[PARENT(nodei)],PARENT(nodei))); + +#ifdef DEBUG2 + for (i = 0; i < nelts[PARENT(nodei)]; i++) { + printf("%u\n",heap[PARENT(nodei)][i]); + } +#endif + + /* Freeing memory one row at a time, so don't do it here */ + nodei -= 2; + } + + /* Swap memory spaces */ + debug(printf("Swapping storage spaces\n")); + temp = prev_storage; + prev_storage = curr_storage; + curr_storage = temp; + + /* Go up a level */ + pyramid_end = PARENT(pyramid_end); + pyramid_start = PARENT(pyramid_start); + } + + debug(printf("Returning ancestor %d\n\n",pyramid_start)); + return pyramid_start; +} + + +static UINT4 ** +make_diagonals_heap (int **nelts, List_T stream_list, Intlist_T streamsize_list, int nstreams) { + UINT4 **heap; + int heapsize, heapi; +#ifdef DEBUG + int i; +#endif + + heapsize = 2*nstreams - 1; + + heap = (UINT4 **) CALLOC((heapsize + 1),sizeof(UINT4 *)); + *nelts = (int *) CALLOC((heapsize + 1),sizeof(int)); + + /* Process in reverse order, because stream_list is in reverse order of elts */ + heapi = heapsize; + while (stream_list != NULL) { + stream_list = List_pop(stream_list,(void *) &(heap[heapi])); /* already padded */ + CHECK_ALIGN(heap[heapi]); + streamsize_list = Intlist_pop(streamsize_list,&((*nelts)[heapi])); + debug(printf("Assigning node %d with %d elts",heapi,(*nelts)[heapi])); +#ifdef DEBUG + for (i = 0; i < (*nelts)[heapi]; i++) { + printf(" %u",heap[heapi][i]); + } +#endif + debug(printf("\n")); + heapi--; + } + + return heap; +} + + +UINT4 * +Merge_diagonals (int *nelts1, List_T stream_list, Intlist_T streamsize_list) { + UINT4 *result, **heap; + int *nelts; + int nstreams, heapsize, base, ancestori, pyramid_start, pyramid_end; + int bits; +#ifdef DEBUG + int i; +#endif + + + if ((nstreams = List_length(stream_list)) == 0) { + *nelts1 = 0; + return (UINT4 *) NULL; + + } else { + heapsize = 2*nstreams - 1; /* also index of last node */ +#ifdef HAVE_BUILTIN_CLZ + bits = 31 - __builtin_clz((unsigned int) heapsize); +#elif defined(HAVE_ASM_BSR) + asm("bsr %1,%0" : "=r"(bits) : "r"(heapsize)); +#else + bits = 31 - ((heapsize >> 16) ? clz_table[heapsize >> 16] : 16 + clz_table[heapsize]); +#endif + + base = (1 << bits); + heap = make_diagonals_heap(&nelts,stream_list,streamsize_list,nstreams); + debug(printf("nstreams %d, heapsize %d, clz %d, bits %d, base %d\n",nstreams,heapsize,__builtin_clz(heapsize),bits,base)); + } + + /* Middle pyramids */ + while (base > PYRAMID_SIZE) { + for (pyramid_start = 2*base - PYRAMID_SIZE, pyramid_end = 2*base - 1; pyramid_start >= base; + pyramid_start -= PYRAMID_SIZE, pyramid_end -= PYRAMID_SIZE) { + debug(printf("diagonals: pyramid_start %d, pyramid_end %d, nstreams %d\n",pyramid_start,pyramid_end,nstreams)); + ancestori = pyramid_merge(heap,nstreams,heapsize,nelts,pyramid_start,pyramid_end); + } + base = ancestori; + } + + /* Last pyramid */ + pyramid_start = base; + pyramid_end = 2*base - 1; + debug(printf("diagonals: pyramid_start %d, pyramid_end %d, nstreams %d\n",pyramid_start,pyramid_end,nstreams)); + /* base = */ pyramid_merge(heap,nstreams,heapsize,nelts,pyramid_start,pyramid_end); + + *nelts1 = nelts[1]; + result = heap[1]; + + FREE(heap); + FREE(nelts); + +#ifdef DEBUG + printf("Merge_diagonals returning result of length %d\n",*nelts1); + for (i = 0; i < *nelts1; i++) { + printf("%u\n",result[i]); + } +#endif + + return result; +} + + +static Record_T ** +make_record_heap (int **nelts, List_T stream_list, Intlist_T streamsize_list, + Intlist_T querypos_list, Intlist_T diagterm_list, int nstreams, + int base, struct Record_T *all_records) { + Record_T **record_heap; + UINT4 *diagonals; + int heapsize, null_pyramid_start, heapi, basei; + int querypos, diagterm; + int i, k; + + heapsize = 2*nstreams - 1; + null_pyramid_start = (heapsize + PYRAMID_SIZE - 1)/PYRAMID_SIZE * PYRAMID_SIZE; /* full or partial pyramid for entries below this */ + + /* Add PYRAMID_SIZE to handle partial pyramid */ + record_heap = (Record_T **) CALLOC(heapsize + PYRAMID_SIZE,sizeof(Record_T *)); + *nelts = (int *) CALLOC(heapsize + PYRAMID_SIZE,sizeof(int)); + + /* Process as (base - 1) downto nstreams, then heapsize downto base, + because stream_list is in reverse order of elts */ + k = 0; + for (heapi = base - 1; heapi >= PARENT(null_pyramid_start); heapi--) { + /* Put all information into penultimate row */ + stream_list = List_pop(stream_list,(void *) &diagonals); /* already padded */ + streamsize_list = Intlist_pop(streamsize_list,&((*nelts)[heapi])); + querypos_list = Intlist_pop(querypos_list,&querypos); + diagterm_list = Intlist_pop(diagterm_list,&diagterm); + record_heap[heapi] = (Record_T *) MALLOC(((*nelts)[heapi]) * sizeof(Record_T)); + debug2(printf("NULL: Assigning node %d with %d elts (%p)",heapi,(*nelts)[heapi],record_heap[heapi])); + + for (i = 0; i < (*nelts)[heapi]; i++) { + /* Process in forward order to keep records in order */ + all_records[k].diagonal = diagonals[i] + diagterm; + all_records[k].querypos = querypos; + record_heap[heapi][i] = &(all_records[k]); + debug2(printf(" %u+%d",diagonals[i],querypos)); + k++; + } + debug2(printf("\n")); + } + + for ( ; heapi >= nstreams; heapi--) { + /* Move all information down to left child */ + basei = LEFT(heapi); + stream_list = List_pop(stream_list,(void *) &diagonals); /* already padded */ + streamsize_list = Intlist_pop(streamsize_list,&((*nelts)[basei])); + querypos_list = Intlist_pop(querypos_list,&querypos); + diagterm_list = Intlist_pop(diagterm_list,&diagterm); + record_heap[basei] = (Record_T *) MALLOC(((*nelts)[basei]) * sizeof(Record_T)); + debug2(printf("PART: Assigning node %d => %d with %d elts (%p)",heapi,basei,(*nelts)[basei],record_heap[basei])); + + for (i = 0; i < (*nelts)[basei]; i++) { + /* Process in forward order to keep records in order */ + all_records[k].diagonal = diagonals[i] + diagterm; + all_records[k].querypos = querypos; + record_heap[basei][i] = &(all_records[k]); + debug2(printf(" %u+%d",diagonals[i],querypos)); + k++; + } + debug2(printf("\n")); + } + + for (heapi = heapsize; heapi >= base; heapi--) { + /* Put all information into base row */ + stream_list = List_pop(stream_list,(void *) &diagonals); /* already padded */ + streamsize_list = Intlist_pop(streamsize_list,&((*nelts)[heapi])); + querypos_list = Intlist_pop(querypos_list,&querypos); + diagterm_list = Intlist_pop(diagterm_list,&diagterm); + record_heap[heapi] = (Record_T *) MALLOC(((*nelts)[heapi]) * sizeof(Record_T)); + debug2(printf("FULL: Assigning node %d with %d elts (%p)",heapi,(*nelts)[heapi],record_heap[heapi])); + + for (i = 0; i < (*nelts)[heapi]; i++) { + /* Process in forward order to keep records in order */ + all_records[k].diagonal = diagonals[i] + diagterm; + all_records[k].querypos = querypos; + record_heap[heapi][i] = &(all_records[k]); + debug2(printf(" %u+%d",diagonals[i],querypos)); + k++; + } + debug2(printf("\n")); + } + + return record_heap; +} + + + +/* For initializing heap, there are three categories: + base..(heapsize % PYRAMID_SIZE) + PYRAMID_SIZE: Fill bottom row + straddling heapsize: Pull down some nodes to bottom row + heapsize..(2*base - 1): Fill penultimate row */ +Record_T * +Merge_records (int *nelts1, List_T stream_list, Intlist_T streamsize_list, + Intlist_T querypos_list, Intlist_T diagterm_list, + struct Record_T *all_records) { + Record_T *result, **record_heap, curr; + UINT4 **key_heap, *prev_storage, *curr_storage; + int ptrs[PYRAMID_SIZE]; + int *nelts, nalloc; + int nstreams, heapsize, base, ancestori, pyramid_start, pyramid_end, + node_start, node_end; + int bits; + int heapi, streami, i, j, k; + + debug2(printf("Entered Merge_records\n")); + + if ((nstreams = List_length(stream_list)) == 0) { + *nelts1 = 0; + return (Record_T *) NULL; + + } else { + heapsize = 2*nstreams - 1; /* also index of last node */ +#ifdef HAVE_BUILTIN_CLZ + bits = 31 - __builtin_clz(heapsize); +#elif defined(HAVE_ASM_BSR) + asm("bsr %1,%0" : "=r"(bits) : "r"(heapsize)); +#else + bits = 31 - ((heapsize >> 16) ? clz_table[heapsize >> 16] : 16 + clz_table[heapsize]); +#endif + base = (1 << bits); + debug2(printf("nstreams %d, heapsize %d, base %d\n",nstreams,heapsize,base)); + record_heap = make_record_heap(&nelts,stream_list,streamsize_list,querypos_list,diagterm_list, + nstreams,base,all_records); + } + + if (nstreams == 1) { + *nelts1 = nelts[1]; + result = record_heap[1]; + + FREE(nelts); + FREE(record_heap); + +#ifdef DEBUG2 + printf("Merge_records returning result of length %d\n",*nelts1); + for (i = 0; i < *nelts1; i++) { + printf("%u %d\n",result[i]->diagonal,result[i]->querypos); + } +#endif + + return result; + } + + + key_heap = (UINT4 **) CALLOC(heapsize + PYRAMID_SIZE,sizeof(UINT4 *)); + + /* Middle pyramids */ + while (base > PYRAMID_SIZE) { + for (pyramid_start = 2*base - PYRAMID_SIZE, pyramid_end = 2*base - 1; pyramid_start >= base; + pyramid_start -= PYRAMID_SIZE, pyramid_end -= PYRAMID_SIZE) { + debug2(printf("records: pyramid_start %d, pyramid_end %d, nstreams %d",pyramid_start,pyramid_end,nstreams)); + + if (pyramid_start > heapsize) { + node_start = PARENT(pyramid_start); + node_end = PARENT(pyramid_end); + debug2(printf(" => node_start %d, node_end %d\n",node_start,node_end)); + } else { + node_start = pyramid_start; + node_end = pyramid_end; + } + debug2(printf("\n")); + + /* Allocate memory for the pyramid */ + nalloc = 0; + /* Have to align start of each entry prev_storage[nalloc] and curr_storage[nalloc], regardless of end padding */ +#ifdef HAVE_SSE4_1 + for (heapi = node_start; heapi <= node_end; heapi++) { + nalloc += PAD_UINT4(nelts[heapi]); + } + prev_storage = (UINT4 *) MALLOC_ALIGN(nalloc * sizeof(UINT4)); + curr_storage = (UINT4 *) MALLOC_ALIGN(nalloc * sizeof(UINT4)); +#else + for (heapi = node_start; heapi <= node_end; heapi++) { + nalloc += nelts[heapi]; + } + prev_storage = (UINT4 *) MALLOC(nalloc * sizeof(UINT4)); + curr_storage = (UINT4 *) MALLOC(nalloc * sizeof(UINT4)); +#endif + + /* Convert structures to integers (key_heap) */ + nalloc = 0; + for (heapi = node_start, streami = 0; heapi <= node_end; heapi++, streami++) { + debug2(printf("Creating key node %d from %p\n",heapi,record_heap[heapi])); + /* key_heap[heapi] = (UINT4 *) MALLOC((npadded + 1) * sizeof(UINT4)); */ + key_heap[heapi] = &(prev_storage[nalloc]); + for (i = 0; i < nelts[heapi]; i++) { + key_heap[heapi][i] = (record_heap[heapi][i]->diagonal & KEY_MASK) + streami; + } + /* Had to align start of each entry prev_storage[nalloc], regardless of end padding */ +#ifdef HAVE_SSE4_1 + nalloc += PAD_UINT4(nelts[heapi]); +#else + nalloc += nelts[heapi]; +#endif + } + + ancestori = pyramid_merge_prealloc(key_heap,curr_storage,prev_storage,nelts, + node_start,node_end); + + /* Convert integers to structures */ + record_heap[ancestori] = (Record_T *) MALLOC(nelts[ancestori] * sizeof(Record_T)); + memset(ptrs,0,PYRAMID_SIZE*sizeof(int)); + k = 0; + for (i = 0; i < nelts[ancestori]; i++) { + streami = key_heap[ancestori][i] & ~KEY_MASK; + record_heap[ancestori][k++] = record_heap[node_start + streami][ptrs[streami]++]; + } + + /* Free base heaps */ + for (heapi = node_start; heapi <= node_end; heapi++) { + FREE(record_heap[heapi]); + } + + /* Free key_heap storage */ + FREE_ALIGN(prev_storage); + FREE_ALIGN(curr_storage); + + } + base = ancestori; + } + + /* Last pyramid */ + pyramid_start = base; + pyramid_end = 2*base - 1; + debug2(printf("records: pyramid_start %d, pyramid_end %d, nstreams %d\n",pyramid_start,pyramid_end,nstreams)); + + /* Allocate memory for the pyramid */ + nalloc = 0; + + /* Have to align start of each entry prev_storage[nalloc] and curr_storage[nalloc], regardless of end padding */ +#ifdef HAVE_SSE4_1 + for (heapi = pyramid_start; heapi <= pyramid_end; heapi++) { + nalloc += PAD_UINT4(nelts[heapi]); + } + prev_storage = (UINT4 *) MALLOC_ALIGN(nalloc * sizeof(UINT4)); + curr_storage = (UINT4 *) MALLOC_ALIGN(nalloc * sizeof(UINT4)); +#else + for (heapi = pyramid_start; heapi <= pyramid_end; heapi++) { + nalloc += nelts[heapi]; + } + prev_storage = (UINT4 *) MALLOC(nalloc * sizeof(UINT4)); + curr_storage = (UINT4 *) MALLOC(nalloc * sizeof(UINT4)); +#endif + + /* Convert structures to integers (key_heap) */ + nalloc = 0; + for (heapi = pyramid_start, streami = 0; heapi <= pyramid_end; heapi++, streami++) { + /* key_heap[heapi] = (UINT4 *) MALLOC((npadded + 1) * sizeof(UINT4)); */ + key_heap[heapi] = &(prev_storage[nalloc]); + for (i = 0; i < nelts[heapi]; i++) { + key_heap[heapi][i] = (record_heap[heapi][i]->diagonal & KEY_MASK) + streami; + } + /* Had to align start each entry prev_storage[nalloc], regardless of end padding */ +#ifdef HAVE_SSE4_1 + nalloc += PAD_UINT4(nelts[heapi]); +#else + nalloc += nelts[heapi]; +#endif + } + + ancestori = pyramid_merge_prealloc(key_heap,curr_storage,prev_storage, + nelts,pyramid_start,pyramid_end); + /* ancestori should be 1 */ + + /* Convert integers to structures */ + record_heap[ancestori] = (Record_T *) MALLOC(nelts[ancestori] * sizeof(Record_T)); + memset(ptrs,0,PYRAMID_SIZE*sizeof(int)); + k = 0; + for (i = 0; i < nelts[ancestori]; i++) { + streami = key_heap[ancestori][i] & ~KEY_MASK; + record_heap[ancestori][k++] = record_heap[pyramid_start + streami][ptrs[streami]++]; + } + + /* Free base heaps (unless pyramid_start == 1, implying that base == 1) */ + for (heapi = pyramid_start, streami = 0; heapi <= pyramid_end; heapi++, streami++) { + FREE(record_heap[pyramid_start + streami]); + } + + /* Free key_heap storage */ + FREE_ALIGN(prev_storage); + FREE_ALIGN(curr_storage); + + + *nelts1 = nelts[1]; + result = record_heap[1]; + + /* Final insertion sort to correct for truncation of keys */ + for (j = 1; j < *nelts1; j++) { + curr = result[j]; + i = j - 1; + /* For a stable merge sort, is the second condition possible? */ + while (i >= 0 && (result[i]->diagonal > curr->diagonal || + (result[i]->diagonal == curr->diagonal && + result[i]->querypos > curr->querypos))) { + assert(result[i]->diagonal > curr->diagonal); + result[i+1] = result[i]; + i--; + } + result[i+1] = curr; + } + + + FREE(key_heap); + FREE(nelts); + FREE(record_heap); + +#ifdef DEBUG2 + printf("Merge_records returning result of length %d\n",*nelts1); + for (i = 0; i < *nelts1; i++) { + printf("%u %d\n",result[i]->diagonal,result[i]->querypos); + } +#endif + + return result; +} + + diff -Nru gmap-2016-11-07/src/merge.h gmap-2017-01-14/src/merge.h --- gmap-2016-11-07/src/merge.h 1970-01-01 00:00:00.000000000 +0000 +++ gmap-2017-01-14/src/merge.h 2016-12-16 16:43:03.000000000 +0000 @@ -0,0 +1,45 @@ +#ifndef MERGE_INCLUDED +#define MERGE_INCLUDED +#ifdef HAVE_CONFIG_H +#include /* For HAVE_64_BIT */ +#endif + +#include "types.h" +#include "list.h" +#include "intlist.h" + + +/* Pad lengths at end for row-based storage */ +#ifdef HAVE_AVX512 +#define PAD_UINT4(x) (((x + 15)/16) * 16) +#elif defined(HAVE_AVX2) +#define PAD_UINT4(x) (((x + 7)/8) * 8) +#elif defined(HAVE_SSE4_1) +#define PAD_UINT4(x) (((x + 3)/4) * 4) +#else +#define PAD_UINT4(x) (x) +#endif + + +typedef struct Record_T *Record_T; +struct Record_T { + Univcoord_T diagonal; /* Primary sort */ + int querypos; /* Secondary sort */ +}; + + +extern unsigned int * +Merge_uint4 (unsigned int *__restrict__ dest, unsigned int *__restrict__ A, + unsigned int *__restrict__ B, int nA, int nB); + +extern UINT4 * +Merge_diagonals (int *nelts1, List_T stream_list, Intlist_T streamsize_list); + +extern Record_T * +Merge_records (int *nelts1, List_T stream_list, Intlist_T streamsize_list, + Intlist_T querypos_list, Intlist_T diagterm_list, + struct Record_T *all_records); + +#endif + + diff -Nru gmap-2016-11-07/src/merge-heap.c gmap-2017-01-14/src/merge-heap.c --- gmap-2016-11-07/src/merge-heap.c 1970-01-01 00:00:00.000000000 +0000 +++ gmap-2017-01-14/src/merge-heap.c 2016-12-16 16:51:26.000000000 +0000 @@ -0,0 +1,400 @@ +static char rcsid[] = "$Id: merge-heap.c 201745 2016-12-16 16:51:24Z twu $"; +#ifdef HAVE_CONFIG_H +#include +#endif + +#include "merge-heap.h" +#include "assert.h" +#include "mem.h" +#include "popcount.h" + +#include +#include + +#define PYRAMID_SIZE 32 +#define KEY_MASK (~0U << 5) + + +#ifdef DEBUG +#define debug(x) x +#else +#define debug(x) +#endif + +#ifdef DEBUG0 +#define debug0(x) x +#else +#define debug0(x) +#endif + +#ifdef DEBUG6 +#define debug6(x) x +#else +#define debug6(x) +#endif + + +#define PARENT(i) (i >> 1) +#define LEFT(i) (i << 1) +#define RIGHT(i) ((i << 1) | 1) + +static void +min_heap_insert (UINT4 *heap, int *heapsize, UINT4 diagonal) { + int i; + + i = ++(*heapsize); + while (i > 1 && (heap[PARENT(i)] > diagonal)) { + heap[i] = heap[PARENT(i)]; + i = PARENT(i); + } + heap[i] = diagonal; + + return; +} + + +/* Provide ancestori as inserti */ +static void +heapify (unsigned int *heap, unsigned int diagonal, int merge_heap_size) { + int inserti, smallesti, righti; + int i; + + debug6(printf("Starting heapify with %llu\n",(unsigned long long) diagonal)); +#ifdef DEBUG6 + for (i = 1; i <= 2*merge_heap_size + 1; i++) { + printf("%d %u\n",i,heap[i]); + } + printf("\n"); +#endif + + inserti = 1; + smallesti = (heap[3] < heap[2]) ? 3 : 2; + debug6(printf("Comparing left %d/right %d: %llu and %llu\n", + 2,3,(unsigned long long) heap[2],(unsigned long long)heap[3])); + while (diagonal > heap[smallesti]) { + heap[inserti] = heap[smallesti]; + inserti = smallesti; + smallesti = LEFT(inserti); + righti = smallesti+1; + debug6(printf("Comparing left %d/right %d: %llu and %llu\n", + smallesti,righti,(unsigned long long) heap[smallesti], + (unsigned long long) heap[righti])); + if (heap[righti] < heap[smallesti]) { + smallesti = righti; + } + } + heap[inserti] = diagonal; + debug6(printf("Inserting at %d\n\n",inserti)); + return; +} + + +static int +pyramid_merge_full (Record_T **record_heap, unsigned int **key_streams, unsigned int *merge_heap, + int node_start, int ancestori, int merge_heap_size) { + int nelts = 0; + unsigned int diagonal; + int streami, k; + int ptrs[PYRAMID_SIZE]; + + k = 0; + memset(ptrs,0,PYRAMID_SIZE*sizeof(int)); + while ((diagonal = merge_heap[1]) < -1U) { + /* Convert integer to structure */ + streami = diagonal & ~KEY_MASK; + record_heap[ancestori][k++] = record_heap[node_start + streami][ptrs[streami]]; + debug(printf("Writing %u (stream %d): %u\n",diagonal,streami,record_heap[ancestori][k-1]->diagonal)); + + /* Advance pointer and get next value */ + diagonal = key_streams[streami][++ptrs[streami]]; + heapify(merge_heap,diagonal,merge_heap_size); + nelts += 1; + } + + return nelts; +} + + +static Record_T ** +make_record_heap (int **nelts, List_T stream_list, Intlist_T streamsize_list, + Intlist_T querypos_list, Intlist_T diagterm_list, int nstreams, + int base, struct Record_T *all_records) { + Record_T **record_heap; + UINT4 *diagonals; + int heapsize, null_pyramid_start, heapi, basei; + int querypos, diagterm; + int i, k; + + heapsize = 2*nstreams - 1; + null_pyramid_start = (heapsize + PYRAMID_SIZE - 1)/PYRAMID_SIZE * PYRAMID_SIZE; /* full or partial pyramid for entries below this */ + + /* Add 4 to handle partial pyramid */ + record_heap = (Record_T **) CALLOC(heapsize + PYRAMID_SIZE,sizeof(Record_T *)); + *nelts = (int *) CALLOC(heapsize + PYRAMID_SIZE,sizeof(int)); + + /* Process as (base - 1) downto nstreams, then heapsize downto base, + because stream_list is in reverse order of elts */ + k = 0; + for (heapi = base - 1; heapi >= PARENT(null_pyramid_start); heapi--) { + /* Put all information into penultimate row */ + stream_list = List_pop(stream_list,(void *) &diagonals); /* already padded */ + streamsize_list = Intlist_pop(streamsize_list,&((*nelts)[heapi])); + querypos_list = Intlist_pop(querypos_list,&querypos); + diagterm_list = Intlist_pop(diagterm_list,&diagterm); + record_heap[heapi] = (Record_T *) MALLOC(((*nelts)[heapi]) * sizeof(Record_T)); + debug(printf("NULL: Assigning node %d with %d elts (%p)",heapi,(*nelts)[heapi],record_heap[heapi])); + + for (i = 0; i < (*nelts)[heapi]; i++) { + /* Process in forward order to keep records in order */ + all_records[k].diagonal = diagonals[i] + diagterm; + all_records[k].querypos = querypos; + record_heap[heapi][i] = &(all_records[k]); + debug(printf(" %u+%d",diagonals[i],querypos)); + k++; + } + debug(printf("\n")); + } + + for ( ; heapi >= nstreams; heapi--) { + /* Move all information down to left child */ + basei = LEFT(heapi); + stream_list = List_pop(stream_list,(void *) &diagonals); /* already padded */ + streamsize_list = Intlist_pop(streamsize_list,&((*nelts)[basei])); + querypos_list = Intlist_pop(querypos_list,&querypos); + diagterm_list = Intlist_pop(diagterm_list,&diagterm); + record_heap[basei] = (Record_T *) MALLOC(((*nelts)[basei]) * sizeof(Record_T)); + debug(printf("PART: Assigning node %d => %d with %d elts (%p)",heapi,basei,(*nelts)[basei],record_heap[basei])); + + for (i = 0; i < (*nelts)[basei]; i++) { + /* Process in forward order to keep records in order */ + all_records[k].diagonal = diagonals[i] + diagterm; + all_records[k].querypos = querypos; + record_heap[basei][i] = &(all_records[k]); + debug(printf(" %u+%d",diagonals[i],querypos)); + k++; + } + debug(printf("\n")); + } + + for (heapi = heapsize; heapi >= base; heapi--) { + /* Put all information into base row */ + stream_list = List_pop(stream_list,(void *) &diagonals); /* already padded */ + streamsize_list = Intlist_pop(streamsize_list,&((*nelts)[heapi])); + querypos_list = Intlist_pop(querypos_list,&querypos); + diagterm_list = Intlist_pop(diagterm_list,&diagterm); + record_heap[heapi] = (Record_T *) MALLOC(((*nelts)[heapi]) * sizeof(Record_T)); + debug(printf("FULL: Assigning node %d with %d elts (%p)",heapi,(*nelts)[heapi],record_heap[heapi])); + + for (i = 0; i < (*nelts)[heapi]; i++) { + /* Process in forward order to keep records in order */ + all_records[k].diagonal = diagonals[i] + diagterm; + all_records[k].querypos = querypos; + record_heap[heapi][i] = &(all_records[k]); + debug(printf(" %u+%d",diagonals[i],querypos)); + k++; + } + debug(printf("\n")); + } + + return record_heap; +} + + +/* For initializing heap, there are three categories: + base..(heapsize % PYRAMID_SIZE) + PYRAMID_SIZE: Fill bottom row + straddling heapsize: Pull down some nodes to bottom row + heapsize..(2*base - 1): Fill penultimate row */ +Record_T * +Merge_records_heap (int *nelts1, List_T stream_list, Intlist_T streamsize_list, + Intlist_T querypos_list, Intlist_T diagterm_list, + struct Record_T *all_records) { + Record_T *result, **record_heap, curr; + UINT4 *key_streams[PYRAMID_SIZE]; + UINT4 merge_heap[2*PYRAMID_SIZE+1+1]; /* Add second 1 because top node is at 1 */ + UINT4 *storage; + int *nelts, nalloc; + int nstreams, heapsize, base, ancestori, pyramid_start, pyramid_end, + node_start, node_end, start, end; + int merge_heap_size; + int bits; + int heapi, streami, i, j; + + debug(printf("Entered Merge_records\n")); + + if ((nstreams = List_length(stream_list)) == 0) { + *nelts1 = 0; + return (Record_T *) NULL; + + } else { + heapsize = 2*nstreams - 1; /* also index of last node */ +#ifdef HAVE_BUILTIN_CLZ + bits = 31 - __builtin_clz(heapsize); +#elif defined(HAVE_ASM_BSR) + asm("bsr %1,%0" : "=r"(bits) : "r"(heapsize)); +#else + bits = 31 - ((heapsize >> 16) ? clz_table[heapsize >> 16] : 16 + clz_table[heapsize]); +#endif + base = (1 << bits); + debug(printf("nstreams %d, heapsize %d, base %d\n",nstreams,heapsize,base)); + record_heap = make_record_heap(&nelts,stream_list,streamsize_list,querypos_list,diagterm_list, + nstreams,base,all_records); + } + + + while (base > 1) { + if (base < PYRAMID_SIZE) { + pyramid_start = base; + pyramid_end = 2*base - 1; + + ancestori = 1; + debug(printf("records: pyramid_start %d, pyramid_end %d, nstreams %d\n",pyramid_start,pyramid_end,nstreams)); + + /* Allocate memory for the pyramid key_streams */ + nalloc = 0; + for (heapi = pyramid_start; heapi <= pyramid_end; heapi++) { + nalloc += (nelts[heapi] + 1); + } + storage = (UINT4 *) MALLOC(nalloc * sizeof(UINT4)); + + /* Convert structures to integers (key_streams) */ + nalloc = 0; + merge_heap_size = 0; + for (heapi = pyramid_start, streami = 0; heapi <= pyramid_end; heapi++, streami++) { + key_streams[streami] = &(storage[nalloc]); + for (i = 0; i < nelts[heapi]; i++) { + key_streams[streami][i] = (record_heap[heapi][i]->diagonal & KEY_MASK) + streami; + } + key_streams[streami][i] = -1U; + nalloc += (i + 1); /* nelts[heapi] + 1 */ + + min_heap_insert(merge_heap,&merge_heap_size,key_streams[streami][0]); + } + + /* Set up bounds of heap (sentinels) */ + assert(merge_heap_size <= PYRAMID_SIZE); + debug(printf("merge_heap_size is %d\n",merge_heap_size)); + for (i = merge_heap_size+1; i <= 2*merge_heap_size+1; i++) { + merge_heap[i] = -1U; + } + + /* Merge and convert integers to structures */ + record_heap[1] = (Record_T *) MALLOC(nalloc * sizeof(Record_T)); + nelts[1] = pyramid_merge_full(record_heap,key_streams,merge_heap,pyramid_start,ancestori,merge_heap_size); + + /* Free base heaps */ + for (heapi = pyramid_start, streami = 0; heapi <= pyramid_end; heapi++, streami++) { + FREE(record_heap[pyramid_start + streami]); + } + + /* Free key_streams storage */ + FREE(storage); + + } else { + for (pyramid_start = 2*base - PYRAMID_SIZE, pyramid_end = 2*base - 1; pyramid_start >= base; + pyramid_start -= PYRAMID_SIZE, pyramid_end -= PYRAMID_SIZE) { + debug(printf("records: pyramid_start %d, pyramid_end %d, nstreams %d",pyramid_start,pyramid_end,nstreams)); + + if (pyramid_start > heapsize) { + node_start = PARENT(pyramid_start); + node_end = PARENT(pyramid_end); + debug(printf(" => node_start %d, node_end %d\n",node_start,node_end)); + } else { + node_start = pyramid_start; + node_end = pyramid_end; + } + debug(printf("\n")); + + /* Determine ancestori */ + start = node_start; + end = node_end; + while ((start = PARENT(start)) < (end = PARENT(end))) ; + ancestori = start; + + /* Allocate memory for the pyramid key_streams */ + nalloc = 0; + for (heapi = node_start; heapi <= node_end; heapi++) { + nalloc += (nelts[heapi] + 1); + } + storage = (UINT4 *) MALLOC(nalloc * sizeof(UINT4)); + + /* Convert structures to integers (key_streams) */ + nalloc = 0; + merge_heap_size = 0; + for (heapi = node_start, streami = 0; heapi <= node_end; heapi++, streami++) { + key_streams[streami] = &(storage[nalloc]); + for (i = 0; i < nelts[heapi]; i++) { + key_streams[streami][i] = (record_heap[heapi][i]->diagonal & KEY_MASK) + streami; + } + key_streams[streami][i] = -1U; + nalloc += (i + 1); /* nelts[heapi] + 1 */ + + min_heap_insert(merge_heap,&merge_heap_size,key_streams[streami][0]); + } + +#ifdef DEBUG + for (heapi = node_start, streami = 0; heapi <= node_end; heapi++, streami++) { + printf("key_stream %d:",streami); + for (i = 0; i <= nelts[heapi]; i++) { + printf(" %u",key_streams[streami][i]); + } + printf("\n"); + } +#endif + + /* Set up bounds of heap (sentinels) */ + assert(merge_heap_size <= PYRAMID_SIZE); + debug(printf("merge_heap_size is %d\n",merge_heap_size)); + for (i = merge_heap_size+1; i <= 2*merge_heap_size+1; i++) { + merge_heap[i] = -1U; + } + + /* Merge and convert integers to structures */ + record_heap[ancestori] = (Record_T *) MALLOC(nalloc * sizeof(Record_T)); + nelts[ancestori] = pyramid_merge_full(record_heap,key_streams,merge_heap,node_start,ancestori,merge_heap_size); + + /* Free base heaps */ + for (heapi = node_start; heapi <= node_end; heapi++) { + FREE(record_heap[heapi]); + } + + /* Free key_streams storage */ + FREE(storage); + } + } + + base = ancestori; + } + + *nelts1 = nelts[1]; + result = record_heap[1]; + + /* Final insertion sort to correct for truncation of keys */ + for (j = 1; j < *nelts1; j++) { + curr = result[j]; + i = j - 1; + /* For a stable merge sort, is the second condition possible? */ + while (i >= 0 && (result[i]->diagonal > curr->diagonal || + (result[i]->diagonal == curr->diagonal && + result[i]->querypos > curr->querypos))) { + assert(result[i]->diagonal > curr->diagonal); + result[i+1] = result[i]; + i--; + } + result[i+1] = curr; + } + + + FREE(nelts); + FREE(record_heap); + +#ifdef DEBUG0 + printf("Merge_records returning result of length %d\n",*nelts1); + for (i = 0; i < *nelts1; i++) { + printf("%u %d\n",result[i]->diagonal,result[i]->querypos); + } +#endif + + return result; +} + + diff -Nru gmap-2016-11-07/src/merge-heap.h gmap-2017-01-14/src/merge-heap.h --- gmap-2016-11-07/src/merge-heap.h 1970-01-01 00:00:00.000000000 +0000 +++ gmap-2017-01-14/src/merge-heap.h 2016-12-16 16:43:03.000000000 +0000 @@ -0,0 +1,19 @@ +#ifndef MERGE_HEAP_INCLUDED +#define MERGE_HEAP_INCLUDED +#ifdef HAVE_CONFIG_H +#include +#endif + + +#include "list.h" +#include "intlist.h" +#include "merge.h" /* For Record_T */ + + +extern Record_T * +Merge_records_heap (int *nelts1, List_T stream_list, Intlist_T streamsize_list, + Intlist_T querypos_list, Intlist_T diagterm_list, + struct Record_T *all_records); + +#endif + diff -Nru gmap-2016-11-07/src/oligoindex_hr.c gmap-2017-01-14/src/oligoindex_hr.c --- gmap-2016-11-07/src/oligoindex_hr.c 2016-05-10 21:50:34.000000000 +0000 +++ gmap-2017-01-14/src/oligoindex_hr.c 2017-01-13 23:30:18.000000000 +0000 @@ -1,4 +1,4 @@ -static char rcsid[] = "$Id: oligoindex_hr.c 184484 2016-02-18 03:11:53Z twu $"; +static char rcsid[] = "$Id: oligoindex_hr.c 202591 2017-01-13 23:30:18Z twu $"; #ifdef HAVE_CONFIG_H #include #endif @@ -17,6 +17,7 @@ #include "mem.h" #include "orderstat.h" #include "cmet.h" +#include "atoi.h" #ifdef DEBUG14 /* Need to change Makefile.am to include oligoindex_old.c and oligoindex_old.h */ @@ -39,22 +40,72 @@ #ifdef HAVE_AVX2 #include #endif +#ifdef HAVE_AVX512 +#include +#endif -#ifdef HAVE_SSE2 -#define USE_SIMD_FOR_COUNTS 1 -#endif +#ifdef HAVE_AVX512 +/* AVX512 */ +#define EXTRACT(x,i) _mm_extract_epi32(x,i) +#define EXTRACT256(x,i) _mm256_extract_epi32(x,i) + +#elif defined(HAVE_AVX2) +/* AVX2 */ +#define EXTRACT(x,i) _mm_extract_epi32(x,i) +#define EXTRACT256(x,i) _mm256_extract_epi32(x,i) + +#elif defined(HAVE_SSE4_2) +/* SSE4.2 */ +#define USE_UNORDERED_9 1 +#define USE_UNORDERED_8 1 +#define USE_UNORDERED_7 1 +#define USE_UNORDERED_6 1 +#define USE_UNORDERED_5 1 + +#define EXTRACT(x,i) _mm_extract_epi32(x,i) + +#elif defined(HAVE_SSE4_1) +/* SSE4.1 */ +#define USE_UNORDERED_9 1 +#define USE_UNORDERED_8 1 +#define USE_UNORDERED_7 1 +#define USE_UNORDERED_6 1 +#define USE_UNORDERED_5 1 + +#define EXTRACT(x,i) _mm_extract_epi32(x,i) + +#elif defined(HAVE_SSSE3) +/* SSSE3 */ +#define USE_UNORDERED_9 1 +#define USE_UNORDERED_8 1 +#define USE_UNORDERED_7 1 +#define USE_UNORDERED_6 1 +#define USE_UNORDERED_5 1 -#if !defined(HAVE_SSE2) -#define INDIVIDUAL_SHIFTS 1 -#elif !defined(HAVE_SSE4_1) #define SIMD_MASK_THEN_STORE #define EXTRACT(x,i) x[i] -#elif !defined(HAVE_AVX2) -#define EXTRACT(x,i) _mm_extract_epi32(x,i) + +#elif defined(HAVE_SSE2) +/* SSE2 */ +#define USE_UNORDERED_9 1 +#define USE_UNORDERED_8 1 +#define USE_UNORDERED_7 1 +#define USE_UNORDERED_6 1 +#define USE_UNORDERED_5 1 + +#define SIMD_MASK_THEN_STORE +#define EXTRACT(x,i) x[i] + #else -#define EXTRACT(x,i) _mm_extract_epi32(x,i) -#define EXTRACT256(x,i) _mm256_extract_epi32(x,i) +/* non-SIMD */ +#define USE_UNORDERED_9 1 +#define USE_UNORDERED_8 1 +#define USE_UNORDERED_7 1 +#define USE_UNORDERED_6 1 +#define USE_UNORDERED_5 1 + +#define INDIVIDUAL_SHIFTS 1 #endif @@ -89,7 +140,10 @@ /* bool query_evaluated_p; */ Oligospace_T oligospace; -#ifdef HAVE_AVX2 +#if defined(HAVE_AVX512) + __m512i *inquery_allocated; + __m512i *counts_allocated; +#elif defined(HAVE_AVX2) __m256i *inquery_allocated; __m256i *counts_allocated; #elif defined(HAVE_SSE2) @@ -101,8 +155,8 @@ Chrpos_T *table; UINT4 *positions; - UINT4 *pointers; - UINT4 *pointers_allocated; + /* UINT4 *pointers; */ + /* UINT4 *pointers_allocated; */ }; struct Oligoindex_array_T { @@ -158,38 +212,27 @@ #endif -#if defined(DEBUG) +#if 1 #ifdef HAVE_SSE2 /* For debugging of SIMD procedures*/ static void print_vector (__m128i x, char *label) { - __m128i a[1]; - unsigned int *s = a; + unsigned int s[4]; - _mm_store_si128(a,x); + _mm_store_si128((__m128i *) s,x); _mm_mfence(); - printf("%s: %08X %u\n",label,s[0],s[0]); - printf("%s: %08X %u\n",label,s[1],s[1]); - printf("%s: %08X %u\n",label,s[2],s[2]); - printf("%s: %08X %u\n",label,s[3],s[3]); + printf("%s: %08X %08X %08X %08X\n",label,s[0],s[1],s[2],s[3]); return; } /* For debugging of SIMD procedures*/ static void print_counts (__m128i x, char *label) { - __m128i a[1]; - Count_T *s = a; + Count_T s[16]; - _mm_store_si128(a,x); + _mm_store_si128((__m128i *) s,x); _mm_mfence(); printf("%s:",label); -#ifdef HAVE_AVX2 - printf(" %u",s[0]); - printf(" %u",s[1]); - printf(" %u",s[2]); - printf(" %u",s[3]); -#else printf(" %hd",s[0]); printf(" %hd",s[1]); printf(" %hd",s[2]); @@ -206,7 +249,6 @@ printf(" %hd",s[13]); printf(" %hd",s[14]); printf(" %hd",s[15]); -#endif printf("\n"); return; } @@ -215,11 +257,79 @@ #ifdef HAVE_AVX2 static void print_counts_256 (__m256i x, char *label) { - __m256i a[1]; - Count_T *s = a; + Count_T s[32]; + + _mm256_store_si256((__m256i *) s,x); + _mm_mfence(); + printf("%s:",label); + printf(" %hd",s[0]); + printf(" %hd",s[1]); + printf(" %hd",s[2]); + printf(" %hd",s[3]); + printf(" %hd",s[4]); + printf(" %hd",s[5]); + printf(" %hd",s[6]); + printf(" %hd",s[7]); + printf(" %hd",s[8]); + printf(" %hd",s[9]); + printf(" %hd",s[10]); + printf(" %hd",s[11]); + printf(" %hd",s[12]); + printf(" %hd",s[13]); + printf(" %hd",s[14]); + printf(" %hd",s[15]); + printf(" %hd",s[16]); + printf(" %hd",s[17]); + printf(" %hd",s[18]); + printf(" %hd",s[19]); + printf(" %hd",s[20]); + printf(" %hd",s[21]); + printf(" %hd",s[22]); + printf(" %hd",s[23]); + printf(" %hd",s[24]); + printf(" %hd",s[25]); + printf(" %hd",s[26]); + printf(" %hd",s[27]); + printf(" %hd",s[28]); + printf(" %hd",s[29]); + printf(" %hd",s[30]); + printf(" %hd",s[31]); + printf("\n"); + return; +} + +/* For debugging of SIMD procedures*/ +static void +print_vector_256 (__m256i x, char *label) { + unsigned int s[8]; + + _mm256_store_si256((__m256i *) s,x); + _mm_mfence(); +#if 0 + printf("%s: %08X %u\n",label,s[0],s[0]); + printf("%s: %08X %u\n",label,s[1],s[1]); + printf("%s: %08X %u\n",label,s[2],s[2]); + printf("%s: %08X %u\n",label,s[3],s[3]); + printf("%s: %08X %u\n",label,s[4],s[4]); + printf("%s: %08X %u\n",label,s[5],s[5]); + printf("%s: %08X %u\n",label,s[6],s[6]); + printf("%s: %08X %u\n",label,s[7],s[7]); +#else + printf("%s: %08X %08X %08X %08X %08X %08X %08X %08X\n", + label,s[0],s[1],s[2],s[3],s[4],s[5],s[6],s[7]); +#endif + return; +} +#endif + +#ifdef HAVE_AVX512 +static void +print_counts_512 (__m512i x, char *label) { + Count_T s[64]; - _mm256_store_si256(a,x); + _mm512_store_si512((__m512i *) s,x); _mm_mfence(); +#if 0 printf("%s:",label); printf(" %u",s[0]); printf(" %u",s[1]); @@ -229,18 +339,30 @@ printf(" %u",s[5]); printf(" %u",s[6]); printf(" %u",s[7]); + printf(" %u",s[8]); + printf(" %u",s[9]); + printf(" %u",s[10]); + printf(" %u",s[11]); + printf(" %u",s[12]); + printf(" %u",s[13]); + printf(" %u",s[14]); + printf(" %u",s[15]); printf("\n"); +#else + printf("%s: %08X %08X %08X %08X %08X %08X %08X %08X %08X %08X %08X %08X %08X %08X %08X %08X\n", + label,s[0],s[1],s[2],s[3],s[4],s[5],s[6],s[7],s[8],s[9],s[10],s[11],s[12],s[13],s[14],s[15]); +#endif return; } /* For debugging of SIMD procedures*/ static void -print_vector_256 (__m256i x, char *label) { - __m256i a[1]; - unsigned int *s = a; +print_vector_512 (__m512i x, char *label) { + unsigned int s[16]; - _mm256_store_si256(a,x); + _mm512_store_si512((__m512i *) s,x); _mm_mfence(); +#if 0 printf("%s: %08X %u\n",label,s[0],s[0]); printf("%s: %08X %u\n",label,s[1],s[1]); printf("%s: %08X %u\n",label,s[2],s[2]); @@ -249,6 +371,18 @@ printf("%s: %08X %u\n",label,s[5],s[5]); printf("%s: %08X %u\n",label,s[6],s[6]); printf("%s: %08X %u\n",label,s[7],s[7]); + printf("%s: %08X %u\n",label,s[8],s[8]); + printf("%s: %08X %u\n",label,s[9],s[9]); + printf("%s: %08X %u\n",label,s[10],s[10]); + printf("%s: %08X %u\n",label,s[11],s[11]); + printf("%s: %08X %u\n",label,s[12],s[12]); + printf("%s: %08X %u\n",label,s[13],s[13]); + printf("%s: %08X %u\n",label,s[14],s[14]); + printf("%s: %08X %u\n",label,s[15],s[15]); +#else + printf("%s: %08X %08X %08X %08X %08X %08X %08X %08X %08X %08X %08X %08X %08X %08X %08X %08X\n", + label,s[0],s[1],s[2],s[3],s[4],s[5],s[6],s[7],s[8],s[9],s[10],s[11],s[12],s[13],s[14],s[15]); +#endif return; } #endif @@ -8527,9 +8661,7 @@ static Genomecomp_T *ref_blocks; static Mode_T mode; -#define USE_GATHER 1 - -#ifdef USE_SIMD_FOR_COUNTS +#ifdef HAVE_SSE2 static __m128i mask9; static __m128i mask8; static __m128i mask7; @@ -8537,32 +8669,46 @@ static __m128i mask5; #endif -#ifdef HAVE_AVX2 -#ifdef CHECK_FOR_OVERFLOW -static __m128i maxcount128; -static __m256i maxcount256; +#ifdef HAVE_SSE4_1 +static __m128i mask7_epi16; +static __m128i mask6_epi16; +static __m128i mask5_epi16; #endif -static __m256i shift0to14; -/* static __m256i low8; */ -static __m256i low7; -static __m256i low6; -static __m256i low5; -static __m256i low4; + +#if defined(HAVE_AVX2) +static __m256i bigshift0to14; static __m256i bigmask9; static __m256i bigmask8; static __m256i bigmask7; static __m256i bigmask6; static __m256i bigmask5; -static __m256i byfours; -static __m256i byeights; +static __m256i bigmask7_epi16; +static __m256i bigmask6_epi16; +static __m256i bigmask5_epi16; +#endif + +#ifdef HAVE_AVX512 +static __m512i hugeshift0to14; +static __m512i hugemask9; +static __m512i hugemask8; +static __m512i hugemask7; +static __m512i hugemask6; +static __m512i hugemask5; +static __m512i highmask8; +static __m512i highmask7; +static __m512i highmask6; +static __m512i highmask5; + #endif + void Oligoindex_hr_setup (Genomecomp_T *ref_blocks_in, Mode_T mode_in) { ref_blocks = ref_blocks_in; mode = mode_in; -#ifdef USE_SIMD_FOR_COUNTS + +#ifdef HAVE_SSE2 mask9 = _mm_set1_epi32(262143U); mask8 = _mm_set1_epi32(65535U); mask7 = _mm_set1_epi32(16383U); @@ -8570,26 +8716,38 @@ mask5 = _mm_set1_epi32(1023U); #endif -#ifdef HAVE_AVX2 -#ifdef CHECK_FOR_OVERFLOW - maxcount128 = _mm_set1_epi32(MAXCOUNT); - maxcount256 = _mm256_set1_epi32(MAXCOUNT); +#ifdef HAVE_SSE4_1 + mask7_epi16 = _mm_set1_epi16(16383U); + mask6_epi16 = _mm_set1_epi16(4095U); + mask5_epi16 = _mm_set1_epi16(1023U); #endif - shift0to14 = _mm256_setr_epi32(0,2,4,6,8,10,12,14); - /* low8 = _mm256_setr_epi32(-1U,-1U,-1U,-1U,-1U,-1U,-1U,-1U); */ - low7 = _mm256_setr_epi32(-1U,-1U,-1U,-1U,-1U,-1U,-1U, 0U); - low6 = _mm256_setr_epi32(-1U,-1U,-1U,-1U,-1U,-1U, 0U, 0U); - low5 = _mm256_setr_epi32(-1U,-1U,-1U,-1U,-1U, 0U, 0U, 0U); - low4 = _mm256_setr_epi32(-1U,-1U,-1U,-1U, 0U, 0U, 0U, 0U); + +#if defined(HAVE_AVX2) + bigshift0to14 = _mm256_setr_epi32(0,2,4,6,8,10,12,14); bigmask9 = _mm256_set1_epi32(262143U); bigmask8 = _mm256_set1_epi32(65535U); bigmask7 = _mm256_set1_epi32(16383U); bigmask6 = _mm256_set1_epi32(4095U); bigmask5 = _mm256_set1_epi32(1023U); - byfours = _mm256_setr_epi32(28,24,20,16,12,8,4,0); - byeights = _mm256_setr_epi32(56,48,40,32,24,16,8,0); + bigmask7_epi16 = _mm256_set1_epi16(16383U); + bigmask6_epi16 = _mm256_set1_epi16(4095U); + bigmask5_epi16 = _mm256_set1_epi16(1023U); +#endif + +#ifdef HAVE_AVX512 + hugeshift0to14 = _mm512_setr_epi32(0,2,4,6,8,10,12,14, 0,2,4,6,8,10,12,14); + hugemask9 = _mm512_set1_epi32(262143U); + hugemask8 = _mm512_set1_epi32(65535U); /* 0x0000FFFF */ + hugemask7 = _mm512_set1_epi32(16383U); /* 0x00003FFF */ + hugemask6 = _mm512_set1_epi32(4095U); /* 0x00000FFF */ + hugemask5 = _mm512_set1_epi32(1023U); /* 0x000003FF */ + highmask8 = _mm512_set1_epi32(0xFFFF0000); + highmask7 = _mm512_set1_epi32(0x3FFF0000); + highmask6 = _mm512_set1_epi32(0x0FFF0000); + highmask5 = _mm512_set1_epi32(0x03FF0000); #endif + #ifdef DEBUG14 Oligoindex_old_setup(ref_blocks_in,mode_in); #endif @@ -8627,7 +8785,14 @@ new->suffnconsecutive = suffnconsecutive; /* new->query_evaluated_p = false; */ -#ifdef HAVE_AVX2 +#if defined(HAVE_AVX512) + new->inquery_allocated = (__m512i *) _mm_malloc(new->oligospace * sizeof(Inquery_T),64); + new->counts_allocated = (__m512i *) _mm_malloc(new->oligospace * sizeof(Count_T),64); + assert((long) new->inquery_allocated % 64 == 0); + assert((long) new->counts_allocated % 64 == 0); + new->inquery = (Inquery_T *) new->inquery_allocated; + new->counts = (Count_T *) new->counts_allocated; +#elif defined(HAVE_AVX2) new->inquery_allocated = (__m256i *) _mm_malloc(new->oligospace * sizeof(Inquery_T),32); new->counts_allocated = (__m256i *) _mm_malloc(new->oligospace * sizeof(Count_T),32); assert((long) new->inquery_allocated % 32 == 0); @@ -8649,8 +8814,8 @@ memset((void *) new->inquery,INQUERY_FALSE,new->oligospace*sizeof(Inquery_T)); memset((void *) new->counts,0,new->oligospace*sizeof(Count_T)); - new->pointers_allocated = (UINT4 *) MALLOC((new->oligospace+1) * sizeof(UINT4)); - new->pointers = &(new->pointers_allocated[1]); + /* new->pointers_allocated = (UINT4 *) MALLOC((new->oligospace+1) * sizeof(UINT4)); */ + /* new->pointers = &(new->pointers_allocated[1]); */ new->positions = (UINT4 *) MALLOC(new->oligospace * sizeof(UINT4)); new->table = (Chrpos_T *) NULL; @@ -8841,7 +9006,7 @@ printf("Oligo_hr %s (%llu) => %u entries\n", nt,(unsigned long long) i,counts[i]); } else { - printf("Oligo_hr %s (%llu) => %u entries: allocation %p (%d entries)\n", + printf("Oligo_hr %s (%llu) => %u entries: allocation %p (%lu entries)\n", nt,(unsigned long long) i,counts[i],positions[i],positions[i] - lastptr); lastptr = positions[i]; } @@ -8882,31 +9047,40 @@ * Counting and storage procedures. We count the number of * occurrences of each oligomer in the genomic region, modulo 256 * (because Count_T is an unsigned char). The allocate_positions - * procedure then assigns pointers_end (which start at the end of - * each positions block and go backward) and positions - * (which stay fixed) based on those counts, except that oligomers - * not in the query sequence have their counts set to 0, and have no - * space allocated. However, during storage, if a pointer hits the - * beginning of the position block, that must mean that the count cycled - * past 255. We set that count to be 0, so that oligomer is not used by - * Oligomer_get_mappings. A count greater that 255 is overabundant - * and not useful in stage 2. + * procedure then sets counts to 0 when oligomers are not in the + * query sequence, and then assigns positions based on the counts. + * During storage, we decrement count and store at positions + + * count, which could lead to cycling if the count overflowed. ************************************************************************/ /************************************************************************ + * Use SIMD to process 256 k-mers at a time: + * extract_*mers_{fwd|rev}_simd_256 (AVX512) + * extract_*mers_{fwd|rev}_simd_256_ordered (AVX512) + * + * Use SIMD to process 128 k-mers at a time: + * extract_*mers_{fwd|rev}_simd_128 (AVX2) + * extract_*mers_{fwd|rev}_simd_128_ordered (AVX2) + * * Use SIMD to process 64 k-mers at a time: - * extract_*mers_{fwd|rev}_simd - * count_fwdrev_simd - * store_fwdrev_simd + * extract_*mers_{fwd|rev}_simd_64 (SSE2) + * extract_*mers_{fwd|rev}_simd_64_ordered (SSE2 for 9mers, SSE4.1 for 8mers and smaller) + * + * count_fwdrev_simd_n * - * Now, extract_*mers_{fwd|rev}_simd plus count_fwdrev_simd has been merged - * into count_*mers_{fwd|rev}_simd. However, we retain extract/store. + * store_fwdrev_simd_256 (AVX512) + * store_fwdrev_simd_128 + * store_fwdrev_simd_64 + * + * store_fwdrev_simd_256_ordered + * store_fwdrev_simd_128_ordered + * store_fwdrev_simd_64_ordered (AVX2) * * Use a special procedure to compute an odd block of 32 k-mers - * count_*mers_{fwd|rev} + * count_*mers_{fwd|rev}_32 * This procedure can use SIMD if we compute backwards * - * Use a slow procedure to compute the start and end blocks + * Use a serial procedure to compute the start and end blocks (< 32) * count_*mers_{fwd|rev}_partial ************************************************************************/ @@ -8916,7 +9090,7 @@ ************************************************************************/ static void -count_9mers_fwd_partial (Count_T *counts, Inquery_T *inquery, Genomecomp_T high_rev, Genomecomp_T low_rev, +count_9mers_fwd_partial (Count_T *counts, Genomecomp_T high_rev, Genomecomp_T low_rev, Genomecomp_T nexthigh_rev, int startdiscard, int enddiscard) { Genomecomp_T masked; int pos; @@ -8927,16 +9101,16 @@ masked = nexthigh_rev >> ((96 - 2*9) - 2*pos); masked |= low_rev << (2*pos - (64 - 2*9)); masked &= MASK9; - INCR_COUNT(counts[masked],inquery[masked]); - debug(printf("%d partial %04X => %d\n",pos,masked,counts[masked])); + INCR_COUNT(counts[masked]); + debug(printf("%d partial Counting masked %04X (%u) => %d\n",pos,masked,masked,counts[masked])); pos--; } while (pos >= startdiscard && pos >= 16) { masked = low_rev >> ((64 - 2*9) - 2*pos); masked &= MASK9; - INCR_COUNT(counts[masked],inquery[masked]); - debug(printf("%d partial %04X => %d\n",pos,masked,counts[masked])); + INCR_COUNT(counts[masked]); + debug(printf("%d partial Counting masked %04X (%u) => %d\n",pos,masked,masked,counts[masked])); pos--; } @@ -8944,16 +9118,16 @@ masked = low_rev >> ((64 - 2*9) - 2*pos); masked |= high_rev << (2*pos - (32 - 2*9)); masked &= MASK9; - INCR_COUNT(counts[masked],inquery[masked]); - debug(printf("%d partial %04X => %d\n",pos,masked,counts[masked])); + INCR_COUNT(counts[masked]); + debug(printf("%d partial Counting masked %04X (%u) => %d\n",pos,masked,masked,counts[masked])); pos--; } while (pos >= startdiscard) { masked = high_rev >> ((32 - 2*9) - 2*pos); masked &= MASK9; - INCR_COUNT(counts[masked],inquery[masked]); - debug(printf("%d partial %04X => %d\n",pos,masked,counts[masked])); + INCR_COUNT(counts[masked]); + debug(printf("%d partial Counting masked %04X (%u) => %d\n",pos,masked,masked,counts[masked])); pos--; } @@ -8961,7 +9135,7 @@ } static int -store_9mers_fwd_partial (Chrpos_T chrpos, Chrpos_T *table, UINT4 *pointers, UINT4 *positions, Count_T *counts, +store_9mers_fwd_partial (Chrpos_T chrpos, Chrpos_T *table, UINT4 *positions, Count_T *counts, Genomecomp_T high_rev, Genomecomp_T low_rev, Genomecomp_T nexthigh_rev, int startdiscard, int enddiscard) { Genomecomp_T masked; @@ -8974,9 +9148,8 @@ masked |= low_rev << (2*pos - (64 - 2*9)); masked &= MASK9; if (counts[masked]) { - assert(pointers[masked] > positions[masked]); - debug(printf("Storing masked %u at %u (partial)\n",masked,chrpos)); - table[--pointers[masked]] = chrpos; + debug(printf("Storing masked %04X (%u) at %u (partial)\n",masked,masked,chrpos)); + table[positions[masked] + (--counts[masked])] = chrpos; } chrpos--; pos--; @@ -8986,9 +9159,8 @@ masked = low_rev >> ((64 - 2*9) - 2*pos); masked &= MASK9; if (counts[masked]) { - assert(pointers[masked] > positions[masked]); - debug(printf("Storing masked %u at %u (partial)\n",masked,chrpos)); - table[--pointers[masked]] = chrpos; + debug(printf("Storing masked %04X (%u) at %u (partial)\n",masked,masked,chrpos)); + table[positions[masked] + (--counts[masked])] = chrpos; } chrpos--; pos--; @@ -8999,9 +9171,8 @@ masked |= high_rev << (2*pos - (32 - 2*9)); masked &= MASK9; if (counts[masked]) { - assert(pointers[masked] > positions[masked]); - debug(printf("Storing masked %u at %u (partial)\n",masked,chrpos)); - table[--pointers[masked]] = chrpos; + debug(printf("Storing masked %04X (%u) at %u (partial)\n",masked,masked,chrpos)); + table[positions[masked] + (--counts[masked])] = chrpos; } chrpos--; pos--; @@ -9011,9 +9182,8 @@ masked = high_rev >> ((32 - 2*9) - 2*pos); masked &= MASK9; if (counts[masked]) { - assert(pointers[masked] > positions[masked]); - debug(printf("Storing masked %u at %u (partial)\n",masked,chrpos)); - table[--pointers[masked]] = chrpos; + debug(printf("Storing masked %04X (%u) at %u (partial)\n",masked,masked,chrpos)); + table[positions[masked] + (--counts[masked])] = chrpos; } chrpos--; pos--; @@ -9024,7 +9194,7 @@ static void -count_8mers_fwd_partial (Count_T *counts, Inquery_T *inquery, Genomecomp_T high_rev, Genomecomp_T low_rev, +count_8mers_fwd_partial (Count_T *counts, Genomecomp_T high_rev, Genomecomp_T low_rev, Genomecomp_T nexthigh_rev, int startdiscard, int enddiscard) { Genomecomp_T masked; int pos; @@ -9035,16 +9205,16 @@ masked = nexthigh_rev >> ((96 - 2*8) - 2*pos); masked |= low_rev << (2*pos - (64 - 2*8)); masked &= MASK8; - INCR_COUNT(counts[masked],inquery[masked]); - debug(printf("%d partial %04X => %d\n",pos,masked,counts[masked])); + INCR_COUNT(counts[masked]); + debug(printf("%d partial Counting masked %04X (%u) => %d\n",pos,masked,masked,counts[masked])); pos--; } while (pos >= startdiscard && pos >= 16) { masked = low_rev >> ((64 - 2*8) - 2*pos); masked &= MASK8; - INCR_COUNT(counts[masked],inquery[masked]); - debug(printf("%d partial %04X => %d\n",pos,masked,counts[masked])); + INCR_COUNT(counts[masked]); + debug(printf("%d partial Counting masked %04X (%u) => %d\n",pos,masked,masked,counts[masked])); pos--; } @@ -9052,16 +9222,16 @@ masked = low_rev >> ((64 - 2*8) - 2*pos); masked |= high_rev << (2*pos - (32 - 2*8)); masked &= MASK8; - INCR_COUNT(counts[masked],inquery[masked]); - debug(printf("%d partial %04X => %d\n",pos,masked,counts[masked])); + INCR_COUNT(counts[masked]); + debug(printf("%d partial Counting masked %04X (%u) => %d\n",pos,masked,masked,counts[masked])); pos--; } while (pos >= startdiscard) { masked = high_rev >> ((32 - 2*8) - 2*pos); masked &= MASK8; - INCR_COUNT(counts[masked],inquery[masked]); - debug(printf("%d partial %04X => %d\n",pos,masked,counts[masked])); + INCR_COUNT(counts[masked]); + debug(printf("%d partial Counting masked %04X (%u) => %d\n",pos,masked,masked,counts[masked])); pos--; } @@ -9069,7 +9239,7 @@ } static int -store_8mers_fwd_partial (Chrpos_T chrpos, Chrpos_T *table, UINT4 *pointers, UINT4 *positions, Count_T *counts, +store_8mers_fwd_partial (Chrpos_T chrpos, Chrpos_T *table, UINT4 *positions, Count_T *counts, Genomecomp_T high_rev, Genomecomp_T low_rev, Genomecomp_T nexthigh_rev, int startdiscard, int enddiscard) { Genomecomp_T masked; @@ -9082,9 +9252,8 @@ masked |= low_rev << (2*pos - (64 - 2*8)); masked &= MASK8; if (counts[masked]) { - assert(pointers[masked] > positions[masked]); debug(printf("Storing masked %u at %u (partial)\n",masked,chrpos)); - table[--pointers[masked]] = chrpos; + table[positions[masked] + (--counts[masked])] = chrpos; } chrpos--; pos--; @@ -9094,9 +9263,8 @@ masked = low_rev >> ((64 - 2*8) - 2*pos); masked &= MASK8; if (counts[masked]) { - assert(pointers[masked] > positions[masked]); debug(printf("Storing masked %u at %u (partial)\n",masked,chrpos)); - table[--pointers[masked]] = chrpos; + table[positions[masked] + (--counts[masked])] = chrpos; } chrpos--; pos--; @@ -9107,9 +9275,8 @@ masked |= high_rev << (2*pos - (32 - 2*8)); masked &= MASK8; if (counts[masked]) { - assert(pointers[masked] > positions[masked]); debug(printf("Storing masked %u at %u (partial)\n",masked,chrpos)); - table[--pointers[masked]] = chrpos; + table[positions[masked] + (--counts[masked])] = chrpos; } chrpos--; pos--; @@ -9119,9 +9286,8 @@ masked = high_rev >> ((32 - 2*8) - 2*pos); masked &= MASK8; if (counts[masked]) { - assert(pointers[masked] > positions[masked]); debug(printf("Storing masked %u at %u (partial)\n",masked,chrpos)); - table[--pointers[masked]] = chrpos; + table[positions[masked] + (--counts[masked])] = chrpos; } chrpos--; pos--; @@ -9132,7 +9298,7 @@ static void -count_7mers_fwd_partial (Count_T *counts, Inquery_T *inquery, Genomecomp_T high_rev, Genomecomp_T low_rev, +count_7mers_fwd_partial (Count_T *counts, Genomecomp_T high_rev, Genomecomp_T low_rev, Genomecomp_T nexthigh_rev, int startdiscard, int enddiscard) { Genomecomp_T masked; int pos; @@ -9143,7 +9309,7 @@ masked = nexthigh_rev >> ((96 - 2*7) - 2*pos); masked |= low_rev << (2*pos - (64 - 2*7)); masked &= MASK7; - INCR_COUNT(counts[masked],inquery[masked]); + INCR_COUNT(counts[masked]); debug(printf("%d partial %04X => %d\n",pos,masked,counts[masked])); pos--; } @@ -9151,7 +9317,7 @@ while (pos >= startdiscard && pos >= 16) { masked = low_rev >> ((64 - 2*7) - 2*pos); masked &= MASK7; - INCR_COUNT(counts[masked],inquery[masked]); + INCR_COUNT(counts[masked]); debug(printf("%d partial %04X => %d\n",pos,masked,counts[masked])); pos--; } @@ -9160,7 +9326,7 @@ masked = low_rev >> ((64 - 2*7) - 2*pos); masked |= high_rev << (2*pos - (32 - 2*7)); masked &= MASK7; - INCR_COUNT(counts[masked],inquery[masked]); + INCR_COUNT(counts[masked]); debug(printf("%d partial %04X => %d\n",pos,masked,counts[masked])); pos--; } @@ -9168,7 +9334,7 @@ while (pos >= startdiscard) { masked = high_rev >> ((32 - 2*7) - 2*pos); masked &= MASK7; - INCR_COUNT(counts[masked],inquery[masked]); + INCR_COUNT(counts[masked]); debug(printf("%d partial %04X => %d\n",pos,masked,counts[masked])); pos--; } @@ -9177,7 +9343,7 @@ } static int -store_7mers_fwd_partial (Chrpos_T chrpos, Chrpos_T *table, UINT4 *pointers, UINT4 *positions, Count_T *counts, +store_7mers_fwd_partial (Chrpos_T chrpos, Chrpos_T *table, UINT4 *positions, Count_T *counts, Genomecomp_T high_rev, Genomecomp_T low_rev, Genomecomp_T nexthigh_rev, int startdiscard, int enddiscard) { Genomecomp_T masked; @@ -9190,9 +9356,8 @@ masked |= low_rev << (2*pos - (64 - 2*7)); masked &= MASK7; if (counts[masked]) { - assert(pointers[masked] > positions[masked]); debug(printf("Storing masked %u at %u (partial)\n",masked,chrpos)); - table[--pointers[masked]] = chrpos; + table[positions[masked] + (--counts[masked])] = chrpos; } chrpos--; pos--; @@ -9202,9 +9367,8 @@ masked = low_rev >> ((64 - 2*7) - 2*pos); masked &= MASK7; if (counts[masked]) { - assert(pointers[masked] > positions[masked]); debug(printf("Storing masked %u at %u (partial)\n",masked,chrpos)); - table[--pointers[masked]] = chrpos; + table[positions[masked] + (--counts[masked])] = chrpos; } chrpos--; pos--; @@ -9215,9 +9379,8 @@ masked |= high_rev << (2*pos - (32 - 2*7)); masked &= MASK7; if (counts[masked]) { - assert(pointers[masked] > positions[masked]); debug(printf("Storing masked %u at %u (partial)\n",masked,chrpos)); - table[--pointers[masked]] = chrpos; + table[positions[masked] + (--counts[masked])] = chrpos; } chrpos--; pos--; @@ -9227,9 +9390,8 @@ masked = high_rev >> ((32 - 2*7) - 2*pos); masked &= MASK7; if (counts[masked]) { - assert(pointers[masked] > positions[masked]); debug(printf("Storing masked %u at %u (partial)\n",masked,chrpos)); - table[--pointers[masked]] = chrpos; + table[positions[masked] + (--counts[masked])] = chrpos; } chrpos--; pos--; @@ -9240,7 +9402,7 @@ static void -count_6mers_fwd_partial (Count_T *counts, Inquery_T *inquery, Genomecomp_T high_rev, Genomecomp_T low_rev, +count_6mers_fwd_partial (Count_T *counts, Genomecomp_T high_rev, Genomecomp_T low_rev, Genomecomp_T nexthigh_rev, int startdiscard, int enddiscard) { Genomecomp_T masked; int pos; @@ -9251,7 +9413,7 @@ masked = nexthigh_rev >> ((96 - 2*6) - 2*pos); masked |= low_rev << (2*pos - (64 - 2*6)); masked &= MASK6; - INCR_COUNT(counts[masked],inquery[masked]); + INCR_COUNT(counts[masked]); debug(printf("%d partial %04X => %d\n",pos,masked,counts[masked])); pos--; } @@ -9259,7 +9421,7 @@ while (pos >= startdiscard && pos >= 16) { masked = low_rev >> ((64 - 2*6) - 2*pos); masked &= MASK6; - INCR_COUNT(counts[masked],inquery[masked]); + INCR_COUNT(counts[masked]); debug(printf("%d partial %04X => %d\n",pos,masked,counts[masked])); pos--; } @@ -9268,7 +9430,7 @@ masked = low_rev >> ((64 - 2*6) - 2*pos); masked |= high_rev << (2*pos - (32 - 2*6)); masked &= MASK6; - INCR_COUNT(counts[masked],inquery[masked]); + INCR_COUNT(counts[masked]); debug(printf("%d partial %04X => %d\n",pos,masked,counts[masked])); pos--; } @@ -9276,7 +9438,7 @@ while (pos >= startdiscard) { masked = high_rev >> ((32 - 2*6) - 2*pos); masked &= MASK6; - INCR_COUNT(counts[masked],inquery[masked]); + INCR_COUNT(counts[masked]); debug(printf("%d partial %04X => %d\n",pos,masked,counts[masked])); pos--; } @@ -9285,7 +9447,7 @@ } static int -store_6mers_fwd_partial (Chrpos_T chrpos, Chrpos_T *table, UINT4 *pointers, UINT4 *positions, Count_T *counts, +store_6mers_fwd_partial (Chrpos_T chrpos, Chrpos_T *table, UINT4 *positions, Count_T *counts, Genomecomp_T high_rev, Genomecomp_T low_rev, Genomecomp_T nexthigh_rev, int startdiscard, int enddiscard) { Genomecomp_T masked; @@ -9298,9 +9460,8 @@ masked |= low_rev << (2*pos - (64 - 2*6)); masked &= MASK6; if (counts[masked]) { - assert(pointers[masked] > positions[masked]); debug(printf("Storing masked %u at %u (partial)\n",masked,chrpos)); - table[--pointers[masked]] = chrpos; + table[positions[masked] + (--counts[masked])] = chrpos; } chrpos--; pos--; @@ -9310,9 +9471,8 @@ masked = low_rev >> ((64 - 2*6) - 2*pos); masked &= MASK6; if (counts[masked]) { - assert(pointers[masked] > positions[masked]); debug(printf("Storing masked %u at %u (partial)\n",masked,chrpos)); - table[--pointers[masked]] = chrpos; + table[positions[masked] + (--counts[masked])] = chrpos; } chrpos--; pos--; @@ -9323,9 +9483,8 @@ masked |= high_rev << (2*pos - (32 - 2*6)); masked &= MASK6; if (counts[masked]) { - assert(pointers[masked] > positions[masked]); debug(printf("Storing masked %u at %u (partial)\n",masked,chrpos)); - table[--pointers[masked]] = chrpos; + table[positions[masked] + (--counts[masked])] = chrpos; } chrpos--; pos--; @@ -9335,9 +9494,8 @@ masked = high_rev >> ((32 - 2*6) - 2*pos); masked &= MASK6; if (counts[masked]) { - assert(pointers[masked] > positions[masked]); debug(printf("Storing masked %u at %u (partial)\n",masked,chrpos)); - table[--pointers[masked]] = chrpos; + table[positions[masked] + (--counts[masked])] = chrpos; } chrpos--; pos--; @@ -9348,7 +9506,7 @@ static void -count_5mers_fwd_partial (Count_T *counts, Inquery_T *inquery, Genomecomp_T high_rev, Genomecomp_T low_rev, +count_5mers_fwd_partial (Count_T *counts, Genomecomp_T high_rev, Genomecomp_T low_rev, Genomecomp_T nexthigh_rev, int startdiscard, int enddiscard) { Genomecomp_T masked; int pos; @@ -9359,7 +9517,7 @@ masked = nexthigh_rev >> ((96 - 2*5) - 2*pos); masked |= low_rev << (2*pos - (64 - 2*5)); masked &= MASK5; - INCR_COUNT(counts[masked],inquery[masked]); + INCR_COUNT(counts[masked]); debug(printf("%d partial %04X => %d\n",pos,masked,counts[masked])); pos--; } @@ -9367,7 +9525,7 @@ while (pos >= startdiscard && pos >= 16) { masked = low_rev >> ((64 - 2*5) - 2*pos); masked &= MASK5; - INCR_COUNT(counts[masked],inquery[masked]); + INCR_COUNT(counts[masked]); debug(printf("%d partial %04X => %d\n",pos,masked,counts[masked])); pos--; } @@ -9376,7 +9534,7 @@ masked = low_rev >> ((64 - 2*5) - 2*pos); masked |= high_rev << (2*pos - (32 - 2*5)); masked &= MASK5; - INCR_COUNT(counts[masked],inquery[masked]); + INCR_COUNT(counts[masked]); debug(printf("%d partial %04X => %d\n",pos,masked,counts[masked])); pos--; } @@ -9384,7 +9542,7 @@ while (pos >= startdiscard) { masked = high_rev >> ((32 - 2*5) - 2*pos); masked &= MASK5; - INCR_COUNT(counts[masked],inquery[masked]); + INCR_COUNT(counts[masked]); debug(printf("%d partial %04X => %d\n",pos,masked,counts[masked])); pos--; } @@ -9393,7 +9551,7 @@ } static int -store_5mers_fwd_partial (Chrpos_T chrpos, Chrpos_T *table, UINT4 *pointers, UINT4 *positions, Count_T *counts, +store_5mers_fwd_partial (Chrpos_T chrpos, Chrpos_T *table, UINT4 *positions, Count_T *counts, Genomecomp_T high_rev, Genomecomp_T low_rev, Genomecomp_T nexthigh_rev, int startdiscard, int enddiscard) { Genomecomp_T masked; @@ -9406,9 +9564,8 @@ masked |= low_rev << (2*pos - (64- 2*5)); masked &= MASK5; if (counts[masked]) { - assert(pointers[masked] > positions[masked]); debug(printf("Storing masked %u at %u (partial)\n",masked,chrpos)); - table[--pointers[masked]] = chrpos; + table[positions[masked] + (--counts[masked])] = chrpos; } chrpos--; pos--; @@ -9418,9 +9575,8 @@ masked = low_rev >> ((64 - 2*5) - 2*pos); masked &= MASK5; if (counts[masked]) { - assert(pointers[masked] > positions[masked]); debug(printf("Storing masked %u at %u (partial)\n",masked,chrpos)); - table[--pointers[masked]] = chrpos; + table[positions[masked] + (--counts[masked])] = chrpos; } chrpos--; pos--; @@ -9431,9 +9587,8 @@ masked |= high_rev << (2*pos - (32 - 2*5)); masked &= MASK5; if (counts[masked]) { - assert(pointers[masked] > positions[masked]); debug(printf("Storing masked %u at %u (partial)\n",masked,chrpos)); - table[--pointers[masked]] = chrpos; + table[positions[masked] + (--counts[masked])] = chrpos; } chrpos--; pos--; @@ -9443,9 +9598,8 @@ masked = high_rev >> ((32 - 2*5) - 2*pos); masked &= MASK5; if (counts[masked]) { - assert(pointers[masked] > positions[masked]); debug(printf("Storing masked %u at %u (partial)\n",masked,chrpos)); - table[--pointers[masked]] = chrpos; + table[positions[masked] + (--counts[masked])] = chrpos; } chrpos--; pos--; @@ -9455,62 +9609,6 @@ } - -#if 0 -/* Note; for AVX2 and AVX512 */ -/* Variable bit shift right logical (VPSRLVD/Q) */ -_varcount is 16, 14, 12, 10, 8, 6, 4 2 in eight 32-bit quantities in __m256i -_high_rev is broadcast in eight 32-bit quantities in __m256i - - _mm256_slrv_epi32(_high_rev,_varcount); - Then need to mask - (Gather in AVX2) - (Scatter in AVX-512) - - - Previously, did - _oligo = _mm256_srlv_epi32(_mm256_set1_epi32(oligo),shift0to14); - _masked = _mm256_and_si256(_oligo, bigmask9); - _counts = _mm256_i32gather_epi32(counts,_masked,/*scale*/4); - _counts = _mm256_add_epi32(_counts,ones256); - - Problem: Cannot add ones if any oligo repeats itself within the same SIMD register - Need to wait for conflict instructions from AVX512: - - _oligo = _mm256_srlv_epi32(_mm256_set1_epi32(oligo),shift0to14); - _masked = _mm256_and_si256(_oligo, bigmask9); - _conflict = _mm256_conflict_epi32(_masked); - if (_conflict is zero) then - _counts = _mm256_i32gather_epi32(counts,_masked,/*scale*/4); - _counts = _mm256_add_epi32(_counts,ones256); - } else { - Increment counts manually, or account for conflict - } - - - - _counts = _mm_i32gather_epi32(counts,_masked,/*scale*/4); - _counts = _mm_add_epi32(_counts,ones); - _mm_i32scatter_epi32(counts,_masked,/*scale*/4); - - /* Need to change pointers and positions to be indices into a set of values */ - _pointers = _mm256_i32gather_epi32(pointers,_masked,/*scale*/4); - _positions = _mm256_i32gather_epi32(positions,_masked,/*scale*/4); - _space = _mm256_sub_epi32(_positions,_pointers); - _pointers = _mm256_sub_epi32(_pointers,ones); /* New pointers */ - _chrpos = _mm256_sub_epi32(_mm256_set1_epi32(chrpos),ramp); - - _mm256_mask_i32scatter_epi32(pointers,_masked,_pointers,/*scale*/4); - _mm256_mask_i32scatter_epi32(values,_pointers,_chrpos,/*scale*/4); - - if (EXTRACT256(_space,0)) { - pointer = EXTRACT256(_pointers,0); - *pointer = EXTRACT256(_chrpos,0); - } - -#endif - - #if 0 /* Replaced by individual count_*mer_{fwd|rev}_simd procedures */ /* array is filled by extract_*mers_{fwd|rev}_simd */ @@ -9639,2595 +9737,1760 @@ #endif -#ifdef USE_SIMD_FOR_COUNTS -/* Forward and reverse procedures are identical, because forward has - chrpos ascending from left and reverse has chrpos ascending from - right */ -static Chrpos_T -store_fwdrev_simd (Chrpos_T chrpos, Chrpos_T *table, UINT4 *pointers, UINT4 *positions, Count_T *counts, - UINT4 *array) { -#if defined(HAVE_AVX2) && defined(USE_GATHER) - __m256i _counts, _masked; +#ifdef HAVE_AVX512 +/* Uses gather, conflict detection, and scatter. Not worth it for + AVX2, since we don't have conflict detection or scatter */ +static void +count_fwdrev_simd_n (Count_T *counts, UINT4 *array, int n) { + UINT4 *ptr; + __m512i _envelopes, _increment; + __m512i _masked, _conflicts, _blocks, _address_mask; + __m512i _zeroes; + __mmask16 pending_mask, current_mask; + int i; + +#if defined(HAVE_AVX512BW) + __m512i _addresses, _ones; +#elif defined(USE_ROTATE) + __m512i _rotates; #else - Genomecomp_T masked; + __m512i _new_envelopes, _addresses, _add_mask, _byte_mask, _ones; #endif - /* Row 3 */ -#if defined(HAVE_AVX2) && defined(USE_GATHER) - _masked = _mm256_i32gather_epi32((int *) &(array[32+3]),byfours,/*scale*/4); - _counts = _mm256_i32gather_epi32(counts,_masked,/*scale*/4); - assert(EXTRACT256(_counts,0) == counts[array[63]]); - assert(EXTRACT256(_counts,1) == counts[array[59]]); - assert(EXTRACT256(_counts,2) == counts[array[55]]); - assert(EXTRACT256(_counts,3) == counts[array[51]]); - assert(EXTRACT256(_counts,4) == counts[array[47]]); - assert(EXTRACT256(_counts,5) == counts[array[43]]); - assert(EXTRACT256(_counts,6) == counts[array[39]]); - assert(EXTRACT256(_counts,7) == counts[array[35]]); - if (_mm256_testz_si256(_counts,_counts) == 0) { - if (EXTRACT256(_counts,0)) { - debug(printf("Storing masked %u at %u (%u - %d)\n",EXTRACT256(_masked,0),chrpos,chrpos,0)); - table[--pointers[EXTRACT256(_masked,0)]] = chrpos; - } - if (EXTRACT256(_counts,1)) { - debug(printf("Storing masked %u at %u (%u - %d)\n",EXTRACT256(_masked,1),chrpos - 1,chrpos,1)); - table[--pointers[EXTRACT256(_masked,1)]] = chrpos - 1; - } - if (EXTRACT256(_counts,2)) { - debug(printf("Storing masked %u at %u (%u - %d)\n",EXTRACT256(_masked,2),chrpos - 2,chrpos,2)); - table[--pointers[EXTRACT256(_masked,2)]] = chrpos - 2; - } - if (EXTRACT256(_counts,3)) { - debug(printf("Storing masked %u at %u (%u - %d)\n",EXTRACT256(_masked,3),chrpos - 3,chrpos,3)); - table[--pointers[EXTRACT256(_masked,3)]] = chrpos - 3; - } - if (EXTRACT256(_counts,4)) { - debug(printf("Storing masked %u at %u (%u - %d)\n",EXTRACT256(_masked,4),chrpos - 4,chrpos,4)); - table[--pointers[EXTRACT256(_masked,4)]] = chrpos - 4; - } - if (EXTRACT256(_counts,5)) { - debug(printf("Storing masked %u at %u (%u - %d)\n",EXTRACT256(_masked,5),chrpos - 5,chrpos,5)); - table[--pointers[EXTRACT256(_masked,5)]] = chrpos - 5; + +#ifdef DEBUG + if (n == 64) { + printf("Counting of %d\n",n); + for (i = 0; i < n; i += 4) { + printf("%d: %08X %08X %08X %08X\n",i,array[i],array[i+1],array[i+2],array[i+3]); } - if (EXTRACT256(_counts,6)) { - debug(printf("Storing masked %u at %u (%u - %d)\n",EXTRACT256(_masked,6),chrpos - 6,chrpos,6)); - table[--pointers[EXTRACT256(_masked,6)]] = chrpos - 6; + } else if (n == 128) { + printf("Counting of %d\n",n); + for (i = 0; i < n; i += 8) { + printf("%d: %08X %08X %08X %08X %08X %08X %08X %08X\n", + i,array[i],array[i+1],array[i+2],array[i+3],array[i+4],array[i+5],array[i+6],array[i+7]); } - if (EXTRACT256(_counts,7)) { - debug(printf("Storing masked %u at %u (%u - %d)\n",EXTRACT256(_masked,7),chrpos - 7,chrpos,7)); - table[--pointers[EXTRACT256(_masked,7)]] = chrpos - 7; + } else if (n == 256) { + printf("Counting of %d\n",n); + for (i = 0; i < n; i += 16) { + printf("%d: %08X %08X %08X %08X %08X %08X %08X %08X %08X %08X %08X %08X %08X %08X %08X %08X\n", + i,array[i],array[i+1],array[i+2],array[i+3],array[i+4],array[i+5],array[i+6],array[i+7], + array[i+8],array[i+9],array[i+10],array[i+11],array[i+12],array[i+13],array[i+14],array[i+15]); + } } +#endif - _masked = _mm256_i32gather_epi32((int *) &(array[3]),byfours,/*scale*/4); - _counts = _mm256_i32gather_epi32(counts,_masked,/*scale*/4); - assert(EXTRACT256(_counts,0) == counts[array[31]]); - assert(EXTRACT256(_counts,1) == counts[array[27]]); - assert(EXTRACT256(_counts,2) == counts[array[23]]); - assert(EXTRACT256(_counts,3) == counts[array[19]]); - assert(EXTRACT256(_counts,4) == counts[array[15]]); - assert(EXTRACT256(_counts,5) == counts[array[11]]); - assert(EXTRACT256(_counts,6) == counts[array[7]]); - assert(EXTRACT256(_counts,7) == counts[array[3]]); - if (_mm256_testz_si256(_counts,_counts) == 0) { - if (EXTRACT256(_counts,0)) { - debug(printf("Storing masked %u at %u (%u - %d)\n",EXTRACT256(_masked,0),chrpos - 8,chrpos,8)); - table[--pointers[EXTRACT256(_masked,0)]] = chrpos - 8; - } - if (EXTRACT256(_counts,1)) { - debug(printf("Storing masked %u at %u (%u - %d)\n",EXTRACT256(_masked,1),chrpos - 9,chrpos,9)); - table[--pointers[EXTRACT256(_masked,1)]] = chrpos - 9; - } - if (EXTRACT256(_counts,2)) { - debug(printf("Storing masked %u at %u (%u - %d)\n",EXTRACT256(_masked,2),chrpos - 10,chrpos,10)); - table[--pointers[EXTRACT256(_masked,2)]] = chrpos - 10; - } - if (EXTRACT256(_counts,3)) { - debug(printf("Storing masked %u at %u (%u - %d)\n",EXTRACT256(_masked,3),chrpos - 11,chrpos,11)); - table[--pointers[EXTRACT256(_masked,3)]] = chrpos - 11; - } - if (EXTRACT256(_counts,4)) { - debug(printf("Storing masked %u at %u (%u - %d)\n",EXTRACT256(_masked,4),chrpos - 12,chrpos,12)); - table[--pointers[EXTRACT256(_masked,4)]] = chrpos - 12; - } - if (EXTRACT256(_counts,5)) { - debug(printf("Storing masked %u at %u (%u - %d)\n",EXTRACT256(_masked,5),chrpos - 13,chrpos,13)); - table[--pointers[EXTRACT256(_masked,5)]] = chrpos - 13; - } - if (EXTRACT256(_counts,6)) { - debug(printf("Storing masked %u at %u (%u - %d)\n",EXTRACT256(_masked,6),chrpos - 14,chrpos,14)); - table[--pointers[EXTRACT256(_masked,6)]] = chrpos - 14; - } - if (EXTRACT256(_counts,7)) { - debug(printf("Storing masked %u at %u (%u - %d)\n",EXTRACT256(_masked,7),chrpos - 15,chrpos,15)); - table[--pointers[EXTRACT256(_masked,7)]] = chrpos - 15; - } - } -#else - masked = array[63]; - if (counts[masked]) { - assert(pointers[masked] > positions[masked]); - debug(printf("Storing masked %u at %u (%u - %d)\n",masked,chrpos,chrpos,0)); - table[--pointers[masked]] = chrpos; - } + _address_mask = _mm512_set1_epi32(0x3); + _zeroes = _mm512_setzero_si512(); - masked = array[59]; - if (counts[masked]) { - assert(pointers[masked] > positions[masked]); - debug(printf("Storing masked %u at %u (%u - %d)\n",masked,chrpos - 1,chrpos,1)); - table[--pointers[masked]] = chrpos - 1; - } - masked = array[55]; - if (counts[masked]) { - assert(pointers[masked] > positions[masked]); - debug(printf("Storing masked %u at %u (%u - %d)\n",masked,chrpos - 2,chrpos,2)); - table[--pointers[masked]] = chrpos - 2; - } +#if defined(HAVE_AVX512BW) + _ones = _mm512_set1_epi32(1); +#elif defined(USE_ROTATE) + _increment = _mm512_set1_epi32(0x01000000); /* Add 1 to most significante byte */ +#else + _ones = _mm512_set1_epi32(1); +#endif - masked = array[51]; - if (counts[masked]) { - assert(pointers[masked] > positions[masked]); - debug(printf("Storing masked %u at %u (%u - %d)\n",masked,chrpos - 3,chrpos,3)); - table[--pointers[masked]] = chrpos - 3; - } - masked = array[47]; - if (counts[masked]) { - assert(pointers[masked] > positions[masked]); - debug(printf("Storing masked %u at %u (%u - %d)\n",masked,chrpos - 4,chrpos,4)); - table[--pointers[masked]] = chrpos - 4; - } + ptr = &(array[0]); +#ifdef HAVE_AVX512BW + while (ptr < &(array[n])) { + _masked = _mm512_loadu_si512((__m512i *) ptr); + _blocks = _mm512_srli_epi32(_masked,2); /* div by 4 bytes/int */ + + _addresses = _mm512_and_si512(_masked,_address_mask); + _addresses = _mm512_slli_epi32(_addresses,3); /* Multiply by 8 bits/byte */ + + /* Note: Have to check for conflicts in _blocks, not _masked, since we update one address per block */ + _conflicts = _mm512_conflict_epi32(_blocks); + pending_mask = 0xFFFF; + while (pending_mask) { + current_mask = _mm512_cmpeq_epi32_mask(_conflicts,_zeroes); + current_mask = current_mask & pending_mask; +#if 0 + _envelopes = _mm512_mask_i32gather_epi32(_zeroes,current_mask,_blocks,(const void *) counts,/*scale*/4); +#else + _envelopes = _mm512_i32gather_epi32(_blocks,(const void *) counts,/*scale*/4); /* Not using mask */ +#endif + /* _increment = _mm512_sllv_epi32(_mm512_mask_set1_epi32(_zeroes,current_mask,1),_addresses); */ + _increment = _mm512_sllv_epi32(_ones,_addresses); /* Puts 1 in correct byte, but not masked */ + _envelopes = _mm512_add_epi8(_envelopes,_increment); - masked = array[43]; - if (counts[masked]) { - assert(pointers[masked] > positions[masked]); - debug(printf("Storing masked %u at %u (%u - %d)\n",masked,chrpos - 5,chrpos,5)); - table[--pointers[masked]] = chrpos - 5; - } + _mm512_mask_i32scatter_epi32((void *) counts,current_mask,_blocks,_envelopes,/*scale*/4); + _conflicts = _mm512_andnot_si512(_mm512_set1_epi32(current_mask),_conflicts); + pending_mask = pending_mask & (~current_mask); + } - masked = array[39]; - if (counts[masked]) { - assert(pointers[masked] > positions[masked]); - debug(printf("Storing masked %u at %u (%u - %d)\n",masked,chrpos - 6,chrpos,6)); - table[--pointers[masked]] = chrpos - 6; + ptr += 16; } - masked = array[35]; - if (counts[masked]) { - assert(pointers[masked] > positions[masked]); - debug(printf("Storing masked %u at %u (%u - %d)\n",masked,chrpos - 7,chrpos,7)); - table[--pointers[masked]] = chrpos - 7; - } +#elif defined(USE_ROTATE) + /* rolv command is slow */ + while (ptr < &(array[n])) { + _masked = _mm512_loadu_si512((__m512i *) ptr); + _blocks = _mm512_srli_epi32(_masked,2); /* div by 4 bytes/int */ + _rotates = _mm512_andnot_si512(_masked,_address_mask); /* Faster way to subtract addresses from 3 */ + _rotates = _mm512_slli_epi32(_rotates,3); /* Multiply by 8 bits/byte */ - masked = array[31]; - if (counts[masked]) { - assert(pointers[masked] > positions[masked]); - debug(printf("Storing masked %u at %u (%u - %d)\n",masked,chrpos - 8,chrpos,8)); - table[--pointers[masked]] = chrpos - 8; - } + /* Note: Have to check for conflicts in _blocks, not _masked, since we update one address per block */ + _conflicts = _mm512_conflict_epi32(_blocks); + pending_mask = 0xFFFF; + while (pending_mask) { + current_mask = _mm512_cmpeq_epi32_mask(_conflicts,_zeroes); + current_mask = current_mask & pending_mask; +#if 0 + _envelopes = _mm512_mask_i32gather_epi32(_zeroes,current_mask,_blocks,(const void *) counts,/*scale*/4); +#else + _envelopes = _mm512_i32gather_epi32(_blocks,(const void *) counts,/*scale*/4); /* Not using mask */ +#endif - masked = array[27]; - if (counts[masked]) { - assert(pointers[masked] > positions[masked]); - debug(printf("Storing masked %u at %u (%u - %d)\n",masked,chrpos - 9,chrpos,9)); - table[--pointers[masked]] = chrpos - 9; - } + /* rolv command is slow */ + _envelopes = _mm512_rolv_epi32(_envelopes,_rotates); + _envelopes = _mm512_add_epi32(_envelopes,_increment); + _envelopes = _mm512_rorv_epi32(_envelopes,_rotates); - masked = array[23]; - if (counts[masked]) { - assert(pointers[masked] > positions[masked]); - debug(printf("Storing masked %u at %u (%u - %d)\n",masked,chrpos - 10,chrpos,10)); - table[--pointers[masked]] = chrpos - 10; - } + _mm512_mask_i32scatter_epi32((void *) counts,current_mask,_blocks,_envelopes,/*scale*/4); + _conflicts = _mm512_andnot_si512(_mm512_set1_epi32(current_mask),_conflicts); + pending_mask = pending_mask & (~current_mask); + } - masked = array[19]; - if (counts[masked]) { - assert(pointers[masked] > positions[masked]); - debug(printf("Storing masked %u at %u (%u - %d)\n",masked,chrpos - 11,chrpos,11)); - table[--pointers[masked]] = chrpos - 11; + ptr += 16; } - masked = array[15]; - if (counts[masked]) { - assert(pointers[masked] > positions[masked]); - debug(printf("Storing masked %u at %u (%u - %d)\n",masked,chrpos - 12,chrpos,12)); - table[--pointers[masked]] = chrpos - 12; - } +#else + _byte_mask = _mm512_set1_epi32(0xFF); - masked = array[11]; - if (counts[masked]) { - assert(pointers[masked] > positions[masked]); - debug(printf("Storing masked %u at %u (%u - %d)\n",masked,chrpos - 13,chrpos,13)); - table[--pointers[masked]] = chrpos - 13; - } + while (ptr < &(array[n])) { + _masked = _mm512_loadu_si512((__m512i *) ptr); + _blocks = _mm512_srli_epi32(_masked,2); /* div by 4 bytes/int */ - masked = array[7]; - if (counts[masked]) { - assert(pointers[masked] > positions[masked]); - debug(printf("Storing masked %u at %u (%u - %d)\n",masked,chrpos - 14,chrpos,14)); - table[--pointers[masked]] = chrpos - 14; - } + _addresses = _mm512_and_si512(_masked,_address_mask); + _addresses = _mm512_slli_epi32(_addresses,3); /* Multiply by 8 bits/byte */ - masked = array[3]; - if (counts[masked]) { - assert(pointers[masked] > positions[masked]); - debug(printf("Storing masked %u at %u (%u - %d)\n",masked,chrpos - 15,chrpos,15)); - table[--pointers[masked]] = chrpos - 15; - } + /* Note: Have to check for conflicts in _blocks, not _masked, since we update one address per block */ + _conflicts = _mm512_conflict_epi32(_blocks); + pending_mask = 0xFFFF; + while (pending_mask) { + current_mask = _mm512_cmpeq_epi32_mask(_conflicts,_zeroes); + current_mask = current_mask & pending_mask; +#if 0 + _envelopes = _mm512_mask_i32gather_epi32(_zeroes,current_mask,_blocks,(const void *) counts,/*scale*/4); +#else + _envelopes = _mm512_i32gather_epi32(_blocks,(const void *) counts,/*scale*/4); /* Not using mask */ #endif + /* _increment = _mm512_sllv_epi32(_mm512_mask_set1_epi32(_zeroes,current_mask,1),_addresses); */ + _increment = _mm512_sllv_epi32(_ones,_addresses); /* Puts 1 in correct byte, but not masked */ + /* Need to add epi32, mask the carry, and combine previous solution */ + _add_mask = _mm512_sllv_epi32(_byte_mask,_addresses); + _new_envelopes = _mm512_add_epi32(_envelopes,_increment); +#if 0 + _envelopes = _mm512_or_si512(_mm512_andnot_si512(_add_mask,_envelopes),_mm512_and_si512(_add_mask,_new_envelopes)); +#else + _envelopes = _mm512_ternarylogic_epi32(_add_mask,_envelopes,_new_envelopes,0xAC); +#endif - /* Row 2 */ -#if defined(HAVE_AVX2) && defined(USE_GATHER) - _masked = _mm256_i32gather_epi32((int *) &(array[32+2]),byfours,/*scale*/4); - _counts = _mm256_i32gather_epi32(counts,_masked,/*scale*/4); - assert(EXTRACT256(_counts,0) == counts[array[62]]); - assert(EXTRACT256(_counts,1) == counts[array[58]]); - assert(EXTRACT256(_counts,2) == counts[array[54]]); - assert(EXTRACT256(_counts,3) == counts[array[50]]); - assert(EXTRACT256(_counts,4) == counts[array[46]]); - assert(EXTRACT256(_counts,5) == counts[array[42]]); - assert(EXTRACT256(_counts,6) == counts[array[38]]); - assert(EXTRACT256(_counts,7) == counts[array[34]]); - if (_mm256_testz_si256(_counts,_counts) == 0) { - if (EXTRACT256(_counts,0)) { - debug(printf("Storing masked %u at %u (%u - %d)\n",EXTRACT256(_masked,0),chrpos - 16,chrpos,16)); - table[--pointers[EXTRACT256(_masked,0)]] = chrpos - 16; - } - if (EXTRACT256(_counts,1)) { - debug(printf("Storing masked %u at %u (%u - %d)\n",EXTRACT256(_masked,1),chrpos - 17,chrpos,17)); - table[--pointers[EXTRACT256(_masked,1)]] = chrpos - 17; - } - if (EXTRACT256(_counts,2)) { - debug(printf("Storing masked %u at %u (%u - %d)\n",EXTRACT256(_masked,2),chrpos - 18,chrpos,18)); - table[--pointers[EXTRACT256(_masked,2)]] = chrpos - 18; - } - if (EXTRACT256(_counts,3)) { - debug(printf("Storing masked %u at %u (%u - %d)\n",EXTRACT256(_masked,3),chrpos - 19,chrpos,19)); - table[--pointers[EXTRACT256(_masked,3)]] = chrpos - 19; + _mm512_mask_i32scatter_epi32((void *) counts,current_mask,_blocks,_envelopes,/*scale*/4); + _conflicts = _mm512_andnot_si512(_mm512_set1_epi32(current_mask),_conflicts); + pending_mask = pending_mask & (~current_mask); } - if (EXTRACT256(_counts,4)) { - debug(printf("Storing masked %u at %u (%u - %d)\n",EXTRACT256(_masked,4),chrpos - 20,chrpos,20)); - table[--pointers[EXTRACT256(_masked,4)]] = chrpos - 20; - } - if (EXTRACT256(_counts,5)) { - debug(printf("Storing masked %u at %u (%u - %d)\n",EXTRACT256(_masked,5),chrpos - 21,chrpos,21)); - table[--pointers[EXTRACT256(_masked,5)]] = chrpos - 21; - } - if (EXTRACT256(_counts,6)) { - debug(printf("Storing masked %u at %u (%u - %d)\n",EXTRACT256(_masked,6),chrpos - 22,chrpos,22)); - table[--pointers[EXTRACT256(_masked,6)]] = chrpos - 22; - } - if (EXTRACT256(_counts,7)) { - debug(printf("Storing masked %u at %u (%u - %d)\n",EXTRACT256(_masked,7),chrpos - 23,chrpos,23)); - table[--pointers[EXTRACT256(_masked,7)]] = chrpos - 23; - } - } - _masked = _mm256_i32gather_epi32((int *) &(array[2]),byfours,/*scale*/4); - _counts = _mm256_i32gather_epi32(counts,_masked,/*scale*/4); - assert(EXTRACT256(_counts,0) == counts[array[30]]); - assert(EXTRACT256(_counts,1) == counts[array[26]]); - assert(EXTRACT256(_counts,2) == counts[array[22]]); - assert(EXTRACT256(_counts,3) == counts[array[18]]); - assert(EXTRACT256(_counts,4) == counts[array[14]]); - assert(EXTRACT256(_counts,5) == counts[array[10]]); - assert(EXTRACT256(_counts,6) == counts[array[6]]); - assert(EXTRACT256(_counts,7) == counts[array[2]]); - if (_mm256_testz_si256(_counts,_counts) == 0) { - if (EXTRACT256(_counts,0)) { - debug(printf("Storing masked %u at %u (%u - %d)\n",EXTRACT256(_masked,0),chrpos - 24,chrpos,24)); - table[--pointers[EXTRACT256(_masked,0)]] = chrpos - 24; - } - if (EXTRACT256(_counts,1)) { - debug(printf("Storing masked %u at %u (%u - %d)\n",EXTRACT256(_masked,1),chrpos - 25,chrpos,25)); - table[--pointers[EXTRACT256(_masked,1)]] = chrpos - 25; - } - if (EXTRACT256(_counts,2)) { - debug(printf("Storing masked %u at %u (%u - %d)\n",EXTRACT256(_masked,2),chrpos - 26,chrpos,26)); - table[--pointers[EXTRACT256(_masked,2)]] = chrpos - 26; - } - if (EXTRACT256(_counts,3)) { - debug(printf("Storing masked %u at %u (%u - %d)\n",EXTRACT256(_masked,3),chrpos - 27,chrpos,27)); - table[--pointers[EXTRACT256(_masked,3)]] = chrpos - 27; - } - if (EXTRACT256(_counts,4)) { - debug(printf("Storing masked %u at %u (%u - %d)\n",EXTRACT256(_masked,4),chrpos - 28,chrpos,28)); - table[--pointers[EXTRACT256(_masked,4)]] = chrpos - 28; - } - if (EXTRACT256(_counts,5)) { - debug(printf("Storing masked %u at %u (%u - %d)\n",EXTRACT256(_masked,5),chrpos - 29,chrpos,29)); - table[--pointers[EXTRACT256(_masked,5)]] = chrpos - 29; - } - if (EXTRACT256(_counts,6)) { - debug(printf("Storing masked %u at %u (%u - %d)\n",EXTRACT256(_masked,6),chrpos - 30,chrpos,30)); - table[--pointers[EXTRACT256(_masked,6)]] = chrpos - 30; - } - if (EXTRACT256(_counts,7)) { - debug(printf("Storing masked %u at %u (%u - %d)\n",EXTRACT256(_masked,7),chrpos - 31,chrpos,31)); - table[--pointers[EXTRACT256(_masked,7)]] = chrpos - 31; - } - } -#else - masked = array[62]; - if (counts[masked]) { - assert(pointers[masked] > positions[masked]); - debug(printf("Storing masked %u at %u (%u - %d)\n",masked,chrpos - 16,chrpos,16)); - table[--pointers[masked]] = chrpos - 16; + ptr += 16; } +#endif - masked = array[58]; - if (counts[masked]) { - assert(pointers[masked] > positions[masked]); - debug(printf("Storing masked %u at %u (%u - %d)\n",masked,chrpos - 17,chrpos,17)); - table[--pointers[masked]] = chrpos - 17; - } + return; +} - masked = array[54]; - if (counts[masked]) { - assert(pointers[masked] > positions[masked]); - debug(printf("Storing masked %u at %u (%u - %d)\n",masked,chrpos - 18,chrpos,18)); - table[--pointers[masked]] = chrpos - 18; +#else +/* Serial */ +static void +count_fwdrev_simd_n (Count_T *counts, UINT4 *array, int n) { + UINT4 *ptr; + +#ifdef DEBUG + int i; + printf("Counting of %d\n",n); + for (i = 0; i < n; i += 16) { + printf("%d: %08X %08X %08X %08X %08X %08X %08X %08X %08X %08X %08X %08X %08X %08X %08X %08X\n", + i,array[i],array[i+1],array[i+2],array[i+3],array[i+4],array[i+5],array[i+6],array[i+7], + array[i+8],array[i+9],array[i+10],array[i+11],array[i+12],array[i+13],array[i+14],array[i+15]); } +#endif - masked = array[50]; - if (counts[masked]) { - assert(pointers[masked] > positions[masked]); - debug(printf("Storing masked %u at %u (%u - %d)\n",masked,chrpos - 19,chrpos,19)); - table[--pointers[masked]] = chrpos - 19; + ptr = &(array[0]); + while (ptr < &(array[n])) { + counts[*ptr++] += 1; } - masked = array[46]; - if (counts[masked]) { - assert(pointers[masked] > positions[masked]); - debug(printf("Storing masked %u at %u (%u - %d)\n",masked,chrpos - 20,chrpos,20)); - table[--pointers[masked]] = chrpos - 20; - } - - masked = array[42]; - if (counts[masked]) { - assert(pointers[masked] > positions[masked]); - debug(printf("Storing masked %u at %u (%u - %d)\n",masked,chrpos - 21,chrpos,21)); - table[--pointers[masked]] = chrpos - 21; - } - - masked = array[38]; - if (counts[masked]) { - assert(pointers[masked] > positions[masked]); - debug(printf("Storing masked %u at %u (%u - %d)\n",masked,chrpos - 22,chrpos,22)); - table[--pointers[masked]] = chrpos - 22; - } - - masked = array[34]; - if (counts[masked]) { - assert(pointers[masked] > positions[masked]); - debug(printf("Storing masked %u at %u (%u - %d)\n",masked,chrpos - 23,chrpos,23)); - table[--pointers[masked]] = chrpos - 23; - } + return; +} +#endif - masked = array[30]; - if (counts[masked]) { - assert(pointers[masked] > positions[masked]); - debug(printf("Storing masked %u at %u (%u - %d)\n",masked,chrpos - 24,chrpos,24)); - table[--pointers[masked]] = chrpos - 24; - } - masked = array[26]; - if (counts[masked]) { - assert(pointers[masked] > positions[masked]); - debug(printf("Storing masked %u at %u (%u - %d)\n",masked,chrpos - 25,chrpos,25)); - table[--pointers[masked]] = chrpos - 25; - } +#define nonzero_p_32(diff) diff - masked = array[22]; - if (counts[masked]) { - assert(pointers[masked] > positions[masked]); - debug(printf("Storing masked %u at %u (%u - %d)\n",masked,chrpos - 26,chrpos,26)); - table[--pointers[masked]] = chrpos - 26; - } +#if !defined(HAVE_SSE4_2) +#define count_trailing_zeroes_32(diff) mod_37_bit_position[(-diff & diff) % 37] +#elif defined(HAVE_TZCNT) +#define count_trailing_zeroes_32(diff) _tzcnt_u32(diff) +#elif defined(HAVE_BUILTIN_CTZ) +#define count_trailing_zeroes_32(diff) __builtin_ctz(diff) +#else +/* lowbit = -diff & diff */ +#define count_trailing_zeroes_32(diff) mod_37_bit_position[(-diff & diff) % 37] +#endif - masked = array[18]; - if (counts[masked]) { - assert(pointers[masked] > positions[masked]); - debug(printf("Storing masked %u at %u (%u - %d)\n",masked,chrpos - 27,chrpos,27)); - table[--pointers[masked]] = chrpos - 27; - } +/* Slower: clear_lowbit(diff,relpos) diff -= (1 << relpos) */ +#define clear_lowbit_32(diff,relpos) (diff & (diff - 1)); - masked = array[14]; - if (counts[masked]) { - assert(pointers[masked] > positions[masked]); - debug(printf("Storing masked %u at %u (%u - %d)\n",masked,chrpos - 28,chrpos,28)); - table[--pointers[masked]] = chrpos - 28; - } - masked = array[10]; - if (counts[masked]) { - assert(pointers[masked] > positions[masked]); - debug(printf("Storing masked %u at %u (%u - %d)\n",masked,chrpos - 29,chrpos,29)); - table[--pointers[masked]] = chrpos - 29; - } - masked = array[6]; - if (counts[masked]) { - assert(pointers[masked] > positions[masked]); - debug(printf("Storing masked %u at %u (%u - %d)\n",masked,chrpos - 30,chrpos,30)); - table[--pointers[masked]] = chrpos - 30; - } +#ifdef HAVE_SSE2 +/* Forward and reverse procedures are identical, because forward has + chrpos ascending from left and reverse has chrpos ascending from + right. Right now using SSE2. For AVX2, can use gather by shifting + bytes. */ +static Chrpos_T +store_fwdrev_simd_64 (Chrpos_T chrpos, Chrpos_T *table, UINT4 *positions, Count_T *counts, + UINT4 *array) { + Genomecomp_T masked; + int relpos; - masked = array[2]; - if (counts[masked]) { - assert(pointers[masked] > positions[masked]); - debug(printf("Storing masked %u at %u (%u - %d)\n",masked,chrpos - 31,chrpos,31)); - table[--pointers[masked]] = chrpos - 31; +#ifdef DEBUG + int i; + printf("Storing of %d\n",64); + for (i = 0; i < 64; i += 4) { + printf("%d: %08X %08X %08X %08X\n",i,array[i],array[i+1],array[i+2],array[i+3]); } #endif - - /* Row 1 */ -#if defined(HAVE_AVX2) && defined(USE_GATHER) - _masked = _mm256_i32gather_epi32((int *) &(array[32+1]),byfours,/*scale*/4); - _counts = _mm256_i32gather_epi32(counts,_masked,/*scale*/4); - assert(EXTRACT256(_counts,0) == counts[array[61]]); - assert(EXTRACT256(_counts,1) == counts[array[57]]); - assert(EXTRACT256(_counts,2) == counts[array[53]]); - assert(EXTRACT256(_counts,3) == counts[array[49]]); - assert(EXTRACT256(_counts,4) == counts[array[45]]); - assert(EXTRACT256(_counts,5) == counts[array[41]]); - assert(EXTRACT256(_counts,6) == counts[array[37]]); - assert(EXTRACT256(_counts,7) == counts[array[33]]); - if (_mm256_testz_si256(_counts,_counts) == 0) { - if (EXTRACT256(_counts,0)) { - debug(printf("Storing masked %u at %u (%u - %d)\n",EXTRACT256(_masked,0),chrpos - 32,chrpos,32)); - table[--pointers[EXTRACT256(_masked,0)]] = chrpos - 32; - } - if (EXTRACT256(_counts,1)) { - debug(printf("Storing masked %u at %u (%u - %d)\n",EXTRACT256(_masked,1),chrpos - 33,chrpos,33)); - table[--pointers[EXTRACT256(_masked,1)]] = chrpos - 33; - } - if (EXTRACT256(_counts,2)) { - debug(printf("Storing masked %u at %u (%u - %d)\n",EXTRACT256(_masked,2),chrpos - 34,chrpos,34)); - table[--pointers[EXTRACT256(_masked,2)]] = chrpos - 34; - } - if (EXTRACT256(_counts,3)) { - debug(printf("Storing masked %u at %u (%u - %d)\n",EXTRACT256(_masked,3),chrpos - 35,chrpos,35)); - table[--pointers[EXTRACT256(_masked,3)]] = chrpos - 35; - } - if (EXTRACT256(_counts,4)) { - debug(printf("Storing masked %u at %u (%u - %d)\n",EXTRACT256(_masked,4),chrpos - 36,chrpos,36)); - table[--pointers[EXTRACT256(_masked,4)]] = chrpos - 36; - } - if (EXTRACT256(_counts,5)) { - debug(printf("Storing masked %u at %u (%u - %d)\n",EXTRACT256(_masked,5),chrpos - 37,chrpos,37)); - table[--pointers[EXTRACT256(_masked,5)]] = chrpos - 37; - } - if (EXTRACT256(_counts,6)) { - debug(printf("Storing masked %u at %u (%u - %d)\n",EXTRACT256(_masked,6),chrpos - 38,chrpos,38)); - table[--pointers[EXTRACT256(_masked,6)]] = chrpos - 38; - } - if (EXTRACT256(_counts,7)) { - debug(printf("Storing masked %u at %u (%u - %d)\n",EXTRACT256(_masked,7),chrpos - 39,chrpos,39)); - table[--pointers[EXTRACT256(_masked,7)]] = chrpos - 39; + /* Row 3 */ + for (relpos = 0; relpos < 16; relpos++) { + masked = array[63 - relpos*4]; + if (counts[masked]) { + debug(printf("Storing masked %u at %u (%u - %d)\n",masked,chrpos - relpos,chrpos,relpos)); + table[positions[masked] + (--counts[masked])] = chrpos - relpos; } } + chrpos -= 16; - _masked = _mm256_i32gather_epi32((int *) &(array[1]),byfours,/*scale*/4); - _counts = _mm256_i32gather_epi32(counts,_masked,/*scale*/4); - assert(EXTRACT256(_counts,0) == counts[array[29]]); - assert(EXTRACT256(_counts,1) == counts[array[25]]); - assert(EXTRACT256(_counts,2) == counts[array[21]]); - assert(EXTRACT256(_counts,3) == counts[array[17]]); - assert(EXTRACT256(_counts,4) == counts[array[13]]); - assert(EXTRACT256(_counts,5) == counts[array[9]]); - assert(EXTRACT256(_counts,6) == counts[array[5]]); - assert(EXTRACT256(_counts,7) == counts[array[1]]); - if (_mm256_testz_si256(_counts,_counts) == 0) { - if (EXTRACT256(_counts,0)) { - debug(printf("Storing masked %u at %u (%u - %d)\n",EXTRACT256(_masked,0),chrpos - 40,chrpos,40)); - table[--pointers[EXTRACT256(_masked,0)]] = chrpos - 40; - } - if (EXTRACT256(_counts,1)) { - debug(printf("Storing masked %u at %u (%u - %d)\n",EXTRACT256(_masked,1),chrpos - 41,chrpos,41)); - table[--pointers[EXTRACT256(_masked,1)]] = chrpos - 41; - } - if (EXTRACT256(_counts,2)) { - debug(printf("Storing masked %u at %u (%u - %d)\n",EXTRACT256(_masked,2),chrpos - 42,chrpos,42)); - table[--pointers[EXTRACT256(_masked,2)]] = chrpos - 42; - } - if (EXTRACT256(_counts,3)) { - debug(printf("Storing masked %u at %u (%u - %d)\n",EXTRACT256(_masked,3),chrpos - 43,chrpos,43)); - table[--pointers[EXTRACT256(_masked,3)]] = chrpos - 43; - } - if (EXTRACT256(_counts,4)) { - debug(printf("Storing masked %u at %u (%u - %d)\n",EXTRACT256(_masked,4),chrpos - 44,chrpos,44)); - table[--pointers[EXTRACT256(_masked,4)]] = chrpos - 44; - } - if (EXTRACT256(_counts,5)) { - debug(printf("Storing masked %u at %u (%u - %d)\n",EXTRACT256(_masked,5),chrpos - 45,chrpos,45)); - table[--pointers[EXTRACT256(_masked,5)]] = chrpos - 45; - } - if (EXTRACT256(_counts,6)) { - debug(printf("Storing masked %u at %u (%u - %d)\n",EXTRACT256(_masked,6),chrpos - 46,chrpos,46)); - table[--pointers[EXTRACT256(_masked,6)]] = chrpos - 46; - } - if (EXTRACT256(_counts,7)) { - debug(printf("Storing masked %u at %u (%u - %d)\n",EXTRACT256(_masked,7),chrpos - 47,chrpos,47)); - table[--pointers[EXTRACT256(_masked,7)]] = chrpos - 47; + /* Row 2 */ + for (relpos = 0; relpos < 16; relpos++) { + masked = array[62 - relpos*4]; + if (counts[masked]) { + debug(printf("Storing masked %u at %u (%u - %d)\n",masked,chrpos - relpos,chrpos,relpos)); + table[positions[masked] + (--counts[masked])] = chrpos - relpos; } } -#else - masked = array[61]; - if (counts[masked]) { - assert(pointers[masked] > positions[masked]); - debug(printf("Storing masked %u at %u (%u - %d)\n",masked,chrpos - 32,chrpos,32)); - table[--pointers[masked]] = chrpos - 32; - } + chrpos -= 16; - masked = array[57]; - if (counts[masked]) { - assert(pointers[masked] > positions[masked]); - debug(printf("Storing masked %u at %u (%u - %d)\n",masked,chrpos - 33,chrpos,33)); - table[--pointers[masked]] = chrpos - 33; - } - - masked = array[53]; - if (counts[masked]) { - assert(pointers[masked] > positions[masked]); - debug(printf("Storing masked %u at %u (%u - %d)\n",masked,chrpos - 34,chrpos,34)); - table[--pointers[masked]] = chrpos - 34; + /* Row 1 */ + for (relpos = 0; relpos < 16; relpos++) { + masked = array[61 - relpos*4]; + if (counts[masked]) { + debug(printf("Storing masked %u at %u (%u - %d)\n",masked,chrpos - relpos,chrpos,relpos)); + table[positions[masked] + (--counts[masked])] = chrpos - relpos; + } } + chrpos -= 16; - masked = array[49]; - if (counts[masked]) { - assert(pointers[masked] > positions[masked]); - debug(printf("Storing masked %u at %u (%u - %d)\n",masked,chrpos - 35,chrpos,35)); - table[--pointers[masked]] = chrpos - 35; + /* Row 0 */ + for (relpos = 0; relpos < 16; relpos++) { + masked = array[60 - relpos*4]; + if (counts[masked]) { + debug(printf("Storing masked %u at %u (%u - %d)\n",masked,chrpos - relpos,chrpos,relpos)); + table[positions[masked] + (--counts[masked])] = chrpos - relpos; + } } + chrpos -= 16; - masked = array[45]; - if (counts[masked]) { - assert(pointers[masked] > positions[masked]); - debug(printf("Storing masked %u at %u (%u - %d)\n",masked,chrpos - 36,chrpos,36)); - table[--pointers[masked]] = chrpos - 36; - } + return chrpos; +} +#endif - masked = array[41]; - if (counts[masked]) { - assert(pointers[masked] > positions[masked]); - debug(printf("Storing masked %u at %u (%u - %d)\n",masked,chrpos - 37,chrpos,37)); - table[--pointers[masked]] = chrpos - 37; - } +#ifdef HAVE_AVX2 +static Chrpos_T +store_fwdrev_simd_64_ordered (Chrpos_T chrpos, Chrpos_T *table, UINT4 *positions, Count_T *counts, + UINT4 *array) { + Genomecomp_T masked; + UINT4 *ptr; + __m128i _present, _counts, _zeroes; + __m128i _masked, _blocks, _envelopes, _addresses, _address_mask, _byte_mask; + unsigned int diff_32; + int relpos; + int i; - masked = array[37]; - if (counts[masked]) { - assert(pointers[masked] > positions[masked]); - debug(printf("Storing masked %u at %u (%u - %d)\n",masked,chrpos - 38,chrpos,38)); - table[--pointers[masked]] = chrpos - 38; +#ifdef DEBUG + printf("Storing of %d\n",64); + for (i = 0; i < 64; i += 4) { + printf("%d: %08X %08X %08X %08X\n",i,array[i],array[i+1],array[i+2],array[i+3]); } +#endif - masked = array[33]; - if (counts[masked]) { - assert(pointers[masked] > positions[masked]); - debug(printf("Storing masked %u at %u (%u - %d)\n",masked,chrpos - 39,chrpos,39)); - table[--pointers[masked]] = chrpos - 39; - } + _address_mask = _mm_set1_epi32(0x3); + _byte_mask = _mm_set1_epi32(0xFF); + _zeroes = _mm_setzero_si128(); - masked = array[29]; - if (counts[masked]) { - assert(pointers[masked] > positions[masked]); - debug(printf("Storing masked %u at %u (%u - %d)\n",masked,chrpos - 40,chrpos,40)); - table[--pointers[masked]] = chrpos - 40; - } + ptr = &(array[0]); + for (i = 0; i < 16; i++) { + _masked = _mm_load_si128((__m128i *) ptr); + _blocks = _mm_srli_epi32(_masked,2); /* div by 4 bytes/int */ + _addresses = _mm_and_si128(_masked,_address_mask); + _addresses = _mm_slli_epi32(_addresses,3); /* Multiply by 8 bits/byte */ + _envelopes = _mm_i32gather_epi32((const int *) counts,_blocks,/*scale*/4); + _counts = _mm_srlv_epi32(_envelopes,_addresses); /* Puts byte on right */ + _counts = _mm_and_si128(_counts,_byte_mask); /* Ignore bytes to left */ + + _present = _mm_cmpgt_epi32(_counts,_zeroes); + diff_32 = _mm_movemask_ps(_mm_castsi128_ps(_present)); + + while (nonzero_p_32(diff_32)) { + relpos = count_trailing_zeroes_32(diff_32); + masked = ptr[relpos]; + if (counts[masked]) { + debug(printf("64: Storing masked %u (%08X) at %u (%u - %d) using relpos\n",masked,masked,chrpos - relpos,chrpos,relpos)); + table[positions[masked] + (--counts[masked])] = chrpos - relpos; + } + diff_32 = clear_lowbit_32(diff_32,relpos); + } - masked = array[25]; - if (counts[masked]) { - assert(pointers[masked] > positions[masked]); - debug(printf("Storing masked %u at %u (%u - %d)\n",masked,chrpos - 41,chrpos,41)); - table[--pointers[masked]] = chrpos - 41; + chrpos -= 4; + ptr += 4; } - masked = array[21]; - if (counts[masked]) { - assert(pointers[masked] > positions[masked]); - debug(printf("Storing masked %u at %u (%u - %d)\n",masked,chrpos - 42,chrpos,42)); - table[--pointers[masked]] = chrpos - 42; - } + return chrpos; +} +#endif - masked = array[17]; - if (counts[masked]) { - assert(pointers[masked] > positions[masked]); - debug(printf("Storing masked %u at %u (%u - %d)\n",masked,chrpos - 43,chrpos,43)); - table[--pointers[masked]] = chrpos - 43; - } - masked = array[13]; - if (counts[masked]) { - assert(pointers[masked] > positions[masked]); - debug(printf("Storing masked %u at %u (%u - %d)\n",masked,chrpos - 44,chrpos,44)); - table[--pointers[masked]] = chrpos - 44; - } +#ifdef HAVE_AVX2 +static Chrpos_T +store_fwdrev_simd_128 (Chrpos_T chrpos, Chrpos_T *table, UINT4 *positions, Count_T *counts, + UINT4 *array) { + Genomecomp_T masked; + int relpos; +#ifdef DEBUG + int i; +#endif - masked = array[9]; - if (counts[masked]) { - assert(pointers[masked] > positions[masked]); - debug(printf("Storing masked %u at %u (%u - %d)\n",masked,chrpos - 45,chrpos,45)); - table[--pointers[masked]] = chrpos - 45; +#ifdef DEBUG + printf("Storage of 128\n"); + for (i = 0; i < 128; i += 8) { + printf("%d: %08X %08X %08X %08X %08X %08X %08X %08X\n", + i,array[i],array[i+1],array[i+2],array[i+3],array[i+4],array[i+5],array[i+6],array[i+7]); } +#endif - masked = array[5]; - if (counts[masked]) { - assert(pointers[masked] > positions[masked]); - debug(printf("Storing masked %u at %u (%u - %d)\n",masked,chrpos - 46,chrpos,46)); - table[--pointers[masked]] = chrpos - 46; + /* Row 7 */ + for (relpos = 0; relpos < 16; relpos++) { + masked = array[127 - relpos*8]; + if (counts[masked]) { + debug(printf("Storing masked %u at %u (%u - %d)\n",masked,chrpos - relpos,chrpos,relpos)); + table[positions[masked] + (--counts[masked])] = chrpos - relpos; + } } + chrpos -= 16; - masked = array[1]; - if (counts[masked]) { - assert(pointers[masked] > positions[masked]); - debug(printf("Storing masked %u at %u (%u - %d)\n",masked,chrpos - 47,chrpos,47)); - table[--pointers[masked]] = chrpos - 47; + /* Row 6 */ + for (relpos = 0; relpos < 16; relpos++) { + masked = array[126 - relpos*8]; + if (counts[masked]) { + debug(printf("Storing masked %u at %u (%u - %d)\n",masked,chrpos - relpos,chrpos,relpos)); + table[positions[masked] + (--counts[masked])] = chrpos - relpos; + } } -#endif + chrpos -= 16; - /* Row 0 */ -#if defined(HAVE_AVX2) && defined(USE_GATHER) - _masked = _mm256_i32gather_epi32((int *) &(array[32+0]),byfours,/*scale*/4); - _counts = _mm256_i32gather_epi32(counts,_masked,/*scale*/4); - assert(EXTRACT256(_counts,0) == counts[array[60]]); - assert(EXTRACT256(_counts,1) == counts[array[56]]); - assert(EXTRACT256(_counts,2) == counts[array[52]]); - assert(EXTRACT256(_counts,3) == counts[array[48]]); - assert(EXTRACT256(_counts,4) == counts[array[44]]); - assert(EXTRACT256(_counts,5) == counts[array[40]]); - assert(EXTRACT256(_counts,6) == counts[array[36]]); - assert(EXTRACT256(_counts,7) == counts[array[32]]); - if (_mm256_testz_si256(_counts,_counts) == 0) { - if (EXTRACT256(_counts,0)) { - debug(printf("Storing masked %u at %u (%u - %d)\n",EXTRACT256(_masked,0),chrpos - 48,chrpos,48)); - table[--pointers[EXTRACT256(_masked,0)]] = chrpos - 48; - } - if (EXTRACT256(_counts,1)) { - debug(printf("Storing masked %u at %u (%u - %d)\n",EXTRACT256(_masked,1),chrpos - 49,chrpos,49)); - table[--pointers[EXTRACT256(_masked,1)]] = chrpos - 49; - } - if (EXTRACT256(_counts,2)) { - debug(printf("Storing masked %u at %u (%u - %d)\n",EXTRACT256(_masked,2),chrpos - 50,chrpos,50)); - table[--pointers[EXTRACT256(_masked,2)]] = chrpos - 50; - } - if (EXTRACT256(_counts,3)) { - debug(printf("Storing masked %u at %u (%u - %d)\n",EXTRACT256(_masked,3),chrpos - 51,chrpos,51)); - table[--pointers[EXTRACT256(_masked,3)]] = chrpos - 51; - } - if (EXTRACT256(_counts,4)) { - debug(printf("Storing masked %u at %u (%u - %d)\n",EXTRACT256(_masked,4),chrpos - 52,chrpos,52)); - table[--pointers[EXTRACT256(_masked,4)]] = chrpos - 52; - } - if (EXTRACT256(_counts,5)) { - debug(printf("Storing masked %u at %u (%u - %d)\n",EXTRACT256(_masked,5),chrpos - 53,chrpos,53)); - table[--pointers[EXTRACT256(_masked,5)]] = chrpos - 53; - } - if (EXTRACT256(_counts,6)) { - debug(printf("Storing masked %u at %u (%u - %d)\n",EXTRACT256(_masked,6),chrpos - 54,chrpos,54)); - table[--pointers[EXTRACT256(_masked,6)]] = chrpos - 54; - } - if (EXTRACT256(_counts,7)) { - debug(printf("Storing masked %u at %u (%u - %d)\n",EXTRACT256(_masked,7),chrpos - 55,chrpos,55)); - table[--pointers[EXTRACT256(_masked,7)]] = chrpos - 55; + /* Row 5 */ + for (relpos = 0; relpos < 16; relpos++) { + masked = array[125 - relpos*8]; + if (counts[masked]) { + debug(printf("Storing masked %u at %u (%u - %d)\n",masked,chrpos - relpos,chrpos,relpos)); + table[positions[masked] + (--counts[masked])] = chrpos - relpos; } } + chrpos -= 16; - _masked = _mm256_i32gather_epi32((int *) &(array[0]),byfours,/*scale*/4); - _counts = _mm256_i32gather_epi32(counts,_masked,/*scale*/4); - assert(EXTRACT256(_counts,0) == counts[array[28]]); - assert(EXTRACT256(_counts,1) == counts[array[24]]); - assert(EXTRACT256(_counts,2) == counts[array[20]]); - assert(EXTRACT256(_counts,3) == counts[array[16]]); - assert(EXTRACT256(_counts,4) == counts[array[12]]); - assert(EXTRACT256(_counts,5) == counts[array[8]]); - assert(EXTRACT256(_counts,6) == counts[array[4]]); - assert(EXTRACT256(_counts,7) == counts[array[0]]); - if (_mm256_testz_si256(_counts,_counts) == 0) { - if (EXTRACT256(_counts,0)) { - debug(printf("Storing masked %u at %u (%u - %d)\n",EXTRACT256(_masked,0),chrpos - 56,chrpos,56)); - table[--pointers[EXTRACT256(_masked,0)]] = chrpos - 56; - } - if (EXTRACT256(_counts,1)) { - debug(printf("Storing masked %u at %u (%u - %d)\n",EXTRACT256(_masked,1),chrpos - 57,chrpos,57)); - table[--pointers[EXTRACT256(_masked,1)]] = chrpos - 57; - } - if (EXTRACT256(_counts,2)) { - debug(printf("Storing masked %u at %u (%u - %d)\n",EXTRACT256(_masked,2),chrpos - 58,chrpos,58)); - table[--pointers[EXTRACT256(_masked,2)]] = chrpos - 58; - } - if (EXTRACT256(_counts,3)) { - debug(printf("Storing masked %u at %u (%u - %d)\n",EXTRACT256(_masked,3),chrpos - 59,chrpos,59)); - table[--pointers[EXTRACT256(_masked,3)]] = chrpos - 59; - } - if (EXTRACT256(_counts,4)) { - debug(printf("Storing masked %u at %u (%u - %d)\n",EXTRACT256(_masked,4),chrpos - 60,chrpos,60)); - table[--pointers[EXTRACT256(_masked,4)]] = chrpos - 60; - } - if (EXTRACT256(_counts,5)) { - debug(printf("Storing masked %u at %u (%u - %d)\n",EXTRACT256(_masked,5),chrpos - 61,chrpos,61)); - table[--pointers[EXTRACT256(_masked,5)]] = chrpos - 61; - } - if (EXTRACT256(_counts,6)) { - debug(printf("Storing masked %u at %u (%u - %d)\n",EXTRACT256(_masked,6),chrpos - 62,chrpos,62)); - table[--pointers[EXTRACT256(_masked,6)]] = chrpos - 62; - } - if (EXTRACT256(_counts,7)) { - debug(printf("Storing masked %u at %u (%u - %d)\n",EXTRACT256(_masked,7),chrpos - 63,chrpos,63)); - table[--pointers[EXTRACT256(_masked,7)]] = chrpos - 63; - } - } -#else - masked = array[60]; - if (counts[masked]) { - assert(pointers[masked] > positions[masked]); - debug(printf("Storing masked %u at %u (%u - %d)\n",masked,chrpos - 48,chrpos,48)); - table[--pointers[masked]] = chrpos - 48; - } - masked = array[56]; - if (counts[masked]) { - assert(pointers[masked] > positions[masked]); - debug(printf("Storing masked %u at %u (%u - %d)\n",masked,chrpos - 49,chrpos,49)); - table[--pointers[masked]] = chrpos - 49; + /* Row 4 */ + for (relpos = 0; relpos < 16; relpos++) { + masked = array[124 - relpos*8]; + if (counts[masked]) { + debug(printf("Storing masked %u at %u (%u - %d)\n",masked,chrpos - relpos,chrpos,relpos)); + table[positions[masked] + (--counts[masked])] = chrpos - relpos; + } } + chrpos -= 16; - masked = array[52]; - if (counts[masked]) { - assert(pointers[masked] > positions[masked]); - debug(printf("Storing masked %u at %u (%u - %d)\n",masked,chrpos - 50,chrpos,50)); - table[--pointers[masked]] = chrpos - 50; - } - masked = array[48]; - if (counts[masked]) { - assert(pointers[masked] > positions[masked]); - debug(printf("Storing masked %u at %u (%u - %d)\n",masked,chrpos - 51,chrpos,51)); - table[--pointers[masked]] = chrpos - 51; + /* Row 3 */ + for (relpos = 0; relpos < 16; relpos++) { + masked = array[123 - relpos*8]; + if (counts[masked]) { + debug(printf("Storing masked %u at %u (%u - %d)\n",masked,chrpos - relpos,chrpos,relpos)); + table[positions[masked] + (--counts[masked])] = chrpos - relpos; + } } + chrpos -= 16; - masked = array[44]; - if (counts[masked]) { - assert(pointers[masked] > positions[masked]); - debug(printf("Storing masked %u at %u (%u - %d)\n",masked,chrpos - 52,chrpos,52)); - table[--pointers[masked]] = chrpos - 52; - } - masked = array[40]; - if (counts[masked]) { - assert(pointers[masked] > positions[masked]); - debug(printf("Storing masked %u at %u (%u - %d)\n",masked,chrpos - 53,chrpos,53)); - table[--pointers[masked]] = chrpos - 53; + /* Row 2 */ + for (relpos = 0; relpos < 16; relpos++) { + masked = array[122 - relpos*8]; + if (counts[masked]) { + debug(printf("Storing masked %u at %u (%u - %d)\n",masked,chrpos - relpos,chrpos,relpos)); + table[positions[masked] + (--counts[masked])] = chrpos - relpos; + } } + chrpos -= 16; - masked = array[36]; - if (counts[masked]) { - assert(pointers[masked] > positions[masked]); - debug(printf("Storing masked %u at %u (%u - %d)\n",masked,chrpos - 54,chrpos,54)); - table[--pointers[masked]] = chrpos - 54; - } - masked = array[32]; - if (counts[masked]) { - assert(pointers[masked] > positions[masked]); - debug(printf("Storing masked %u at %u (%u - %d)\n",masked,chrpos - 55,chrpos,55)); - table[--pointers[masked]] = chrpos - 55; + /* Row 1 */ + for (relpos = 0; relpos < 16; relpos++) { + masked = array[121 - relpos*8]; + if (counts[masked]) { + debug(printf("Storing masked %u at %u (%u - %d)\n",masked,chrpos - relpos,chrpos,relpos)); + table[positions[masked] + (--counts[masked])] = chrpos - relpos; + } } + chrpos -= 16; - masked = array[28]; - if (counts[masked]) { - assert(pointers[masked] > positions[masked]); - debug(printf("Storing masked %u at %u (%u - %d)\n",masked,chrpos - 56,chrpos,56)); - table[--pointers[masked]] = chrpos - 56; - } - masked = array[24]; - if (counts[masked]) { - assert(pointers[masked] > positions[masked]); - debug(printf("Storing masked %u at %u (%u - %d)\n",masked,chrpos - 57,chrpos,57)); - table[--pointers[masked]] = chrpos - 57; + /* Row 0 */ + for (relpos = 0; relpos < 16; relpos++) { + masked = array[120 - relpos*8]; + if (counts[masked]) { + debug(printf("Storing masked %u at %u (%u - %d)\n",masked,chrpos - relpos,chrpos,relpos)); + table[positions[masked] + (--counts[masked])] = chrpos - relpos; + } } + chrpos -= 16; - masked = array[20]; - if (counts[masked]) { - assert(pointers[masked] > positions[masked]); - debug(printf("Storing masked %u at %u (%u - %d)\n",masked,chrpos - 58,chrpos,58)); - table[--pointers[masked]] = chrpos - 58; - } + return chrpos; +} - masked = array[16]; - if (counts[masked]) { - assert(pointers[masked] > positions[masked]); - debug(printf("Storing masked %u at %u (%u - %d)\n",masked,chrpos - 59,chrpos,59)); - table[--pointers[masked]] = chrpos - 59; - } +static Chrpos_T +store_fwdrev_simd_128_ordered (Chrpos_T chrpos, Chrpos_T *table, UINT4 *positions, Count_T *counts, + UINT4 *array) { + Genomecomp_T masked; + UINT4 *ptr; + __m256i _present, _counts, _zeroes; + __m256i _masked, _blocks, _envelopes, _addresses, _address_mask, _count_mask; + unsigned int diff_32; + int relpos; + int i; - masked = array[12]; - if (counts[masked]) { - assert(pointers[masked] > positions[masked]); - debug(printf("Storing masked %u at %u (%u - %d)\n",masked,chrpos - 60,chrpos,60)); - table[--pointers[masked]] = chrpos - 60; +#ifdef DEBUG + printf("Storage of 128\n"); + for (i = 0; i < 128; i += 8) { + printf("%d: %08X %08X %08X %08X %08X %08X %08X %08X\n", + i,array[i],array[i+1],array[i+2],array[i+3],array[i+4],array[i+5],array[i+6],array[i+7]); } +#endif - masked = array[8]; - if (counts[masked]) { - assert(pointers[masked] > positions[masked]); - debug(printf("Storing masked %u at %u (%u - %d)\n",masked,chrpos - 61,chrpos,61)); - table[--pointers[masked]] = chrpos - 61; - } + _address_mask = _mm256_set1_epi32(0x3); + _count_mask = _mm256_set1_epi32(0xFF); + _zeroes = _mm256_setzero_si256(); - masked = array[4]; - if (counts[masked]) { - assert(pointers[masked] > positions[masked]); - debug(printf("Storing masked %u at %u (%u - %d)\n",masked,chrpos - 62,chrpos,62)); - table[--pointers[masked]] = chrpos - 62; - } + ptr = &(array[0]); + for (i = 0; i < 16; i++) { + _masked = _mm256_load_si256((__m256i *) ptr); + _blocks = _mm256_srli_epi32(_masked,2); /* div by 4 bytes/int */ + _addresses = _mm256_and_si256(_masked,_address_mask); + _addresses = _mm256_slli_epi32(_addresses,3); /* Multiply by 8 bits/byte */ + _envelopes = _mm256_i32gather_epi32((const int *) counts,_blocks,/*scale*/4); + _counts = _mm256_srlv_epi32(_envelopes,_addresses); /* Puts byte on right */ + _counts = _mm256_and_si256(_counts,_count_mask); /* Ignore bytes to left */ + + _present = _mm256_cmpgt_epi32(_counts,_zeroes); + diff_32 = _mm256_movemask_ps(_mm256_castsi256_ps(_present)); + + while (nonzero_p_32(diff_32)) { + relpos = count_trailing_zeroes_32(diff_32); + masked = ptr[relpos]; + if (counts[masked]) { + debug(printf("128: Storing masked %u (%08X) at %u (%u - %d) using relpos\n",masked,masked,chrpos - relpos,chrpos,relpos)); + table[positions[masked] + (--counts[masked])] = chrpos - relpos; + } + diff_32 = clear_lowbit_32(diff_32,relpos); + } - masked = array[0]; - if (counts[masked]) { - assert(pointers[masked] > positions[masked]); - debug(printf("Storing masked %u at %u (%u - %d)\n",masked,chrpos - 63,chrpos,63)); - table[--pointers[masked]] = chrpos - 63; + chrpos -= 8; + ptr += 8; } -#endif - return chrpos - 64; + return chrpos; } +#endif /* HAVE_AVX2 */ -#ifdef HAVE_AVX2 - -/* testz(counts,counts) == 0 implies there is a nonzero count */ +#ifdef HAVE_AVX512 static Chrpos_T -store_fwdrev_simd_128 (Chrpos_T chrpos, Chrpos_T *table, UINT4 *pointers, UINT4 *positions, Count_T *counts, +store_fwdrev_simd_256 (Chrpos_T chrpos, Chrpos_T *table, UINT4 *positions, Count_T *counts, UINT4 *array) { -#ifdef USE_GATHER - __m256i _counts, _masked; -#else Genomecomp_T masked; + int relpos; +#ifdef DEBUG + int i; #endif #ifdef DEBUG - int i; - for (i = 0; i < 128; i += 8) { - printf("%d: %08X %08X %08X %08X %08X %08X %08X %08X\n", - i,array[i],array[i+1],array[i+2],array[i+3],array[i+4],array[i+5],array[i+6],array[i+7]); + printf("Storage of 256\n"); + for (i = 0; i < 256; i += 16) { + printf("%d: %08X %08X %08X %08X %08X %08X %08X %08X %08X %08X %08X %08X %08X %08X %08X %08X\n", + i,array[i],array[i+1],array[i+2],array[i+3],array[i+4],array[i+5],array[i+6],array[i+7], + array[i+8],array[i+9],array[i+10],array[i+11],array[i+12],array[i+13],array[i+14],array[i+15]); } #endif - /* Row 7 */ -#ifdef USE_GATHER - _masked = _mm256_i32gather_epi32((int *) &(array[64+7]),byeights,/*scale*/4); - _counts = _mm256_i32gather_epi32(counts,_masked,/*scale*/4); - assert(EXTRACT256(_counts,0) == counts[array[127]]); - assert(EXTRACT256(_counts,1) == counts[array[119]]); - assert(EXTRACT256(_counts,2) == counts[array[111]]); - assert(EXTRACT256(_counts,3) == counts[array[103]]); - assert(EXTRACT256(_counts,4) == counts[array[95]]); - assert(EXTRACT256(_counts,5) == counts[array[87]]); - assert(EXTRACT256(_counts,6) == counts[array[79]]); - assert(EXTRACT256(_counts,7) == counts[array[71]]); - - if (_mm256_testz_si256(_counts,_counts) == 0) { - if (EXTRACT256(_counts,0)) { - debug(printf("Storing masked %u at %u (%u - %d)\n",EXTRACT256(_masked,0),chrpos,chrpos,0)); - table[--pointers[EXTRACT256(_masked,0)]] = chrpos; - } - if (EXTRACT256(_counts,1)) { - debug(printf("Storing masked %u at %u (%u - %d)\n",EXTRACT256(_masked,1),chrpos - 1,chrpos,1)); - table[--pointers[EXTRACT256(_masked,1)]] = chrpos - 1; - } - if (EXTRACT256(_counts,2)) { - debug(printf("Storing masked %u at %u (%u - %d)\n",EXTRACT256(_masked,2),chrpos - 2,chrpos,2)); - table[--pointers[EXTRACT256(_masked,2)]] = chrpos - 2; - } - if (EXTRACT256(_counts,3)) { - debug(printf("Storing masked %u at %u (%u - %d)\n",EXTRACT256(_masked,3),chrpos - 3,chrpos,3)); - table[--pointers[EXTRACT256(_masked,3)]] = chrpos - 3; - } - if (EXTRACT256(_counts,4)) { - debug(printf("Storing masked %u at %u (%u - %d)\n",EXTRACT256(_masked,4),chrpos - 4,chrpos,4)); - table[--pointers[EXTRACT256(_masked,4)]] = chrpos - 4; - } - if (EXTRACT256(_counts,5)) { - debug(printf("Storing masked %u at %u (%u - %d)\n",EXTRACT256(_masked,5),chrpos - 5,chrpos,5)); - table[--pointers[EXTRACT256(_masked,5)]] = chrpos - 5; - } - if (EXTRACT256(_counts,6)) { - debug(printf("Storing masked %u at %u (%u - %d)\n",EXTRACT256(_masked,6),chrpos - 6,chrpos,6)); - table[--pointers[EXTRACT256(_masked,6)]] = chrpos - 6; - } - if (EXTRACT256(_counts,7)) { - debug(printf("Storing masked %u at %u (%u - %d)\n",EXTRACT256(_masked,7),chrpos - 7,chrpos,7)); - table[--pointers[EXTRACT256(_masked,7)]] = chrpos - 7; + /* Row 15 */ + for (relpos = 0; relpos < 16; relpos++) { + masked = array[255 - relpos*16]; + if (counts[masked]) { + debug(printf("Storing masked %u at %u (%u - %d)\n",masked,chrpos - relpos,chrpos,relpos)); + table[positions[masked] + (--counts[masked])] = chrpos - relpos; } } + chrpos -= 16; - _masked = _mm256_i32gather_epi32((int *) &(array[7]),byeights,/*scale*/4); - _counts = _mm256_i32gather_epi32(counts,_masked,/*scale*/4); - assert(EXTRACT256(_counts,0) == counts[array[63]]); - assert(EXTRACT256(_counts,1) == counts[array[55]]); - assert(EXTRACT256(_counts,2) == counts[array[47]]); - assert(EXTRACT256(_counts,3) == counts[array[39]]); - assert(EXTRACT256(_counts,4) == counts[array[31]]); - assert(EXTRACT256(_counts,5) == counts[array[23]]); - assert(EXTRACT256(_counts,6) == counts[array[15]]); - assert(EXTRACT256(_counts,7) == counts[array[7]]); - if (_mm256_testz_si256(_counts,_counts) == 0) { - if (EXTRACT256(_counts,0)) { - debug(printf("Storing masked %u at %u (%u - %d)\n",EXTRACT256(_masked,0),chrpos - 8,chrpos,8)); - table[--pointers[EXTRACT256(_masked,0)]] = chrpos - 8; - } - if (EXTRACT256(_counts,1)) { - debug(printf("Storing masked %u at %u (%u - %d)\n",EXTRACT256(_masked,1),chrpos - 9,chrpos,9)); - table[--pointers[EXTRACT256(_masked,1)]] = chrpos - 9; - } - if (EXTRACT256(_counts,2)) { - debug(printf("Storing masked %u at %u (%u - %d)\n",EXTRACT256(_masked,2),chrpos - 10,chrpos,10)); - table[--pointers[EXTRACT256(_masked,2)]] = chrpos - 10; - } - if (EXTRACT256(_counts,3)) { - debug(printf("Storing masked %u at %u (%u - %d)\n",EXTRACT256(_masked,3),chrpos - 11,chrpos,11)); - table[--pointers[EXTRACT256(_masked,3)]] = chrpos - 11; - } - if (EXTRACT256(_counts,4)) { - debug(printf("Storing masked %u at %u (%u - %d)\n",EXTRACT256(_masked,4),chrpos - 12,chrpos,12)); - table[--pointers[EXTRACT256(_masked,4)]] = chrpos - 12; - } - if (EXTRACT256(_counts,5)) { - debug(printf("Storing masked %u at %u (%u - %d)\n",EXTRACT256(_masked,5),chrpos - 13,chrpos,13)); - table[--pointers[EXTRACT256(_masked,5)]] = chrpos - 13; - } - if (EXTRACT256(_counts,6)) { - debug(printf("Storing masked %u at %u (%u - %d)\n",EXTRACT256(_masked,6),chrpos - 14,chrpos,14)); - table[--pointers[EXTRACT256(_masked,6)]] = chrpos - 14; - } - if (EXTRACT256(_counts,7)) { - debug(printf("Storing masked %u at %u (%u - %d)\n",EXTRACT256(_masked,7),chrpos - 15,chrpos,15)); - table[--pointers[EXTRACT256(_masked,7)]] = chrpos - 15; + /* Row 14 */ + for (relpos = 0; relpos < 16; relpos++) { + masked = array[254 - relpos*16]; + if (counts[masked]) { + debug(printf("Storing masked %u at %u (%u - %d)\n",masked,chrpos - relpos,chrpos,relpos)); + table[positions[masked] + (--counts[masked])] = chrpos - relpos; } } -#else - masked = array[127]; - if (counts[masked]) { - assert(pointers[masked] > positions[masked]); - debug(printf("Storing masked %u at %u (%u - %d)\n",masked,chrpos,chrpos,0)); - table[--pointers[masked]] = chrpos; - } + chrpos -= 16; - masked = array[119]; - if (counts[masked]) { - assert(pointers[masked] > positions[masked]); - debug(printf("Storing masked %u at %u (%u - %d)\n",masked,chrpos - 1,chrpos,1)); - table[--pointers[masked]] = chrpos - 1; + /* Row 13 */ + for (relpos = 0; relpos < 16; relpos++) { + masked = array[253 - relpos*16]; + if (counts[masked]) { + debug(printf("Storing masked %u at %u (%u - %d)\n",masked,chrpos - relpos,chrpos,relpos)); + table[positions[masked] + (--counts[masked])] = chrpos - relpos; + } } + chrpos -= 16; - masked = array[111]; - if (counts[masked]) { - assert(pointers[masked] > positions[masked]); - debug(printf("Storing masked %u at %u (%u - %d)\n",masked,chrpos - 2,chrpos,2)); - table[--pointers[masked]] = chrpos - 2; + /* Row 12 */ + for (relpos = 0; relpos < 16; relpos++) { + masked = array[252 - relpos*16]; + if (counts[masked]) { + debug(printf("Storing masked %u at %u (%u - %d)\n",masked,chrpos - relpos,chrpos,relpos)); + table[positions[masked] + (--counts[masked])] = chrpos - relpos; + } } + chrpos -= 16; - masked = array[103]; - if (counts[masked]) { - assert(pointers[masked] > positions[masked]); - debug(printf("Storing masked %u at %u (%u - %d)\n",masked,chrpos - 3,chrpos,3)); - table[--pointers[masked]] = chrpos - 3; + /* Row 11 */ + for (relpos = 0; relpos < 16; relpos++) { + masked = array[251 - relpos*16]; + if (counts[masked]) { + debug(printf("Storing masked %u at %u (%u - %d)\n",masked,chrpos - relpos,chrpos,relpos)); + table[positions[masked] + (--counts[masked])] = chrpos - relpos; + } } + chrpos -= 16; - masked = array[95]; - if (counts[masked]) { - assert(pointers[masked] > positions[masked]); - debug(printf("Storing masked %u at %u (%u - %d)\n",masked,chrpos - 4,chrpos,4)); - table[--pointers[masked]] = chrpos - 4; + /* Row 10 */ + for (relpos = 0; relpos < 16; relpos++) { + masked = array[250 - relpos*16]; + if (counts[masked]) { + debug(printf("Storing masked %u at %u (%u - %d)\n",masked,chrpos - relpos,chrpos,relpos)); + table[positions[masked] + (--counts[masked])] = chrpos - relpos; + } } + chrpos -= 16; - masked = array[87]; - if (counts[masked]) { - assert(pointers[masked] > positions[masked]); - debug(printf("Storing masked %u at %u (%u - %d)\n",masked,chrpos - 5,chrpos,5)); - table[--pointers[masked]] = chrpos - 5; + /* Row 9 */ + for (relpos = 0; relpos < 16; relpos++) { + masked = array[249 - relpos*16]; + if (counts[masked]) { + debug(printf("Storing masked %u at %u (%u - %d)\n",masked,chrpos - relpos,chrpos,relpos)); + table[positions[masked] + (--counts[masked])] = chrpos - relpos; + } } + chrpos -= 16; - masked = array[79]; - if (counts[masked]) { - assert(pointers[masked] > positions[masked]); - debug(printf("Storing masked %u at %u (%u - %d)\n",masked,chrpos - 6,chrpos,6)); - table[--pointers[masked]] = chrpos - 6; + /* Row 8 */ + for (relpos = 0; relpos < 16; relpos++) { + masked = array[248 - relpos*16]; + if (counts[masked]) { + debug(printf("Storing masked %u at %u (%u - %d)\n",masked,chrpos - relpos,chrpos,relpos)); + table[positions[masked] + (--counts[masked])] = chrpos - relpos; + } } + chrpos -= 16; - masked = array[71]; - if (counts[masked]) { - assert(pointers[masked] > positions[masked]); - debug(printf("Storing masked %u at %u (%u - %d)\n",masked,chrpos - 7,chrpos,7)); - table[--pointers[masked]] = chrpos - 7; + /* Row 7 */ + for (relpos = 0; relpos < 16; relpos++) { + masked = array[247 - relpos*16]; + if (counts[masked]) { + debug(printf("Storing masked %u at %u (%u - %d)\n",masked,chrpos - relpos,chrpos,relpos)); + table[positions[masked] + (--counts[masked])] = chrpos - relpos; + } } + chrpos -= 16; - masked = array[63]; - if (counts[masked]) { - assert(pointers[masked] > positions[masked]); - debug(printf("Storing masked %u at %u (%u - %d)\n",masked,chrpos - 8,chrpos,8)); - table[--pointers[masked]] = chrpos - 8; + /* Row 6 */ + for (relpos = 0; relpos < 16; relpos++) { + masked = array[246 - relpos*16]; + if (counts[masked]) { + debug(printf("Storing masked %u at %u (%u - %d)\n",masked,chrpos - relpos,chrpos,relpos)); + table[positions[masked] + (--counts[masked])] = chrpos - relpos; + } } + chrpos -= 16; - masked = array[55]; - if (counts[masked]) { - assert(pointers[masked] > positions[masked]); - debug(printf("Storing masked %u at %u (%u - %d)\n",masked,chrpos - 9,chrpos,9)); - table[--pointers[masked]] = chrpos - 9; + /* Row 5 */ + for (relpos = 0; relpos < 16; relpos++) { + masked = array[245 - relpos*16]; + if (counts[masked]) { + debug(printf("Storing masked %u at %u (%u - %d)\n",masked,chrpos - relpos,chrpos,relpos)); + table[positions[masked] + (--counts[masked])] = chrpos - relpos; + } } + chrpos -= 16; - masked = array[47]; - if (counts[masked]) { - assert(pointers[masked] > positions[masked]); - debug(printf("Storing masked %u at %u (%u - %d)\n",masked,chrpos - 10,chrpos,10)); - table[--pointers[masked]] = chrpos - 10; + /* Row 4 */ + for (relpos = 0; relpos < 16; relpos++) { + masked = array[244 - relpos*16]; + if (counts[masked]) { + debug(printf("Storing masked %u at %u (%u - %d)\n",masked,chrpos - relpos,chrpos,relpos)); + table[positions[masked] + (--counts[masked])] = chrpos - relpos; + } } + chrpos -= 16; - masked = array[39]; - if (counts[masked]) { - assert(pointers[masked] > positions[masked]); - debug(printf("Storing masked %u at %u (%u - %d)\n",masked,chrpos - 11,chrpos,11)); - table[--pointers[masked]] = chrpos - 11; + /* Row 3 */ + for (relpos = 0; relpos < 16; relpos++) { + masked = array[243 - relpos*16]; + if (counts[masked]) { + debug(printf("Storing masked %u at %u (%u - %d)\n",masked,chrpos - relpos,chrpos,relpos)); + table[positions[masked] + (--counts[masked])] = chrpos - relpos; + } } + chrpos -= 16; - masked = array[31]; - if (counts[masked]) { - assert(pointers[masked] > positions[masked]); - debug(printf("Storing masked %u at %u (%u - %d)\n",masked,chrpos - 12,chrpos,12)); - table[--pointers[masked]] = chrpos - 12; + /* Row 2 */ + for (relpos = 0; relpos < 16; relpos++) { + masked = array[242 - relpos*16]; + if (counts[masked]) { + debug(printf("Storing masked %u at %u (%u - %d)\n",masked,chrpos - relpos,chrpos,relpos)); + table[positions[masked] + (--counts[masked])] = chrpos - relpos; + } } + chrpos -= 16; - masked = array[23]; - if (counts[masked]) { - assert(pointers[masked] > positions[masked]); - debug(printf("Storing masked %u at %u (%u - %d)\n",masked,chrpos - 13,chrpos,13)); - table[--pointers[masked]] = chrpos - 13; + /* Row 1 */ + for (relpos = 0; relpos < 16; relpos++) { + masked = array[241 - relpos*16]; + if (counts[masked]) { + debug(printf("Storing masked %u at %u (%u - %d)\n",masked,chrpos - relpos,chrpos,relpos)); + table[positions[masked] + (--counts[masked])] = chrpos - relpos; + } } + chrpos -= 16; - masked = array[15]; - if (counts[masked]) { - assert(pointers[masked] > positions[masked]); - debug(printf("Storing masked %u at %u (%u - %d)\n",masked,chrpos - 14,chrpos,14)); - table[--pointers[masked]] = chrpos - 14; + /* Row 0 */ + for (relpos = 0; relpos < 16; relpos++) { + masked = array[240 - relpos*16]; + if (counts[masked]) { + debug(printf("Storing masked %u at %u (%u - %d)\n",masked,chrpos - relpos,chrpos,relpos)); + table[positions[masked] + (--counts[masked])] = chrpos - relpos; + } } + chrpos -= 16; - masked = array[7]; - if (counts[masked]) { - assert(pointers[masked] > positions[masked]); - debug(printf("Storing masked %u at %u (%u - %d)\n",masked,chrpos - 15,chrpos,15)); - table[--pointers[masked]] = chrpos - 15; + return chrpos; +} + +static Chrpos_T +store_fwdrev_simd_256_ordered (Chrpos_T chrpos, Chrpos_T *table, UINT4 *positions, Count_T *counts, + UINT4 *array) { + Genomecomp_T masked; + UINT4 *ptr; + __m512i _counts, _zeroes; + __m512i _masked, _blocks, _envelopes, _addresses, _address_mask, _count_mask; + __mmask16 diff_32; + int relpos; + int i; + +#ifdef DEBUG + printf("Storage of 256\n"); + for (i = 0; i < 256; i += 16) { + printf("%d: %08X %08X %08X %08X %08X %08X %08X %08X %08X %08X %08X %08X %08X %08X %08X %08X\n", + i,array[i],array[i+1],array[i+2],array[i+3],array[i+4],array[i+5],array[i+6],array[i+7], + array[i+8],array[i+9],array[i+10],array[i+11],array[i+12],array[i+13],array[i+14],array[i+15]); } #endif + _address_mask = _mm512_set1_epi32(0x3); + _count_mask = _mm512_set1_epi32(0xFF); + _zeroes = _mm512_setzero_si512(); - /* Row 6 */ -#ifdef USE_GATHER - _masked = _mm256_i32gather_epi32((int *) &(array[64+6]),byeights,/*scale*/4); - _counts = _mm256_i32gather_epi32(counts,_masked,/*scale*/4); - assert(EXTRACT256(_counts,0) == counts[array[126]]); - assert(EXTRACT256(_counts,1) == counts[array[118]]); - assert(EXTRACT256(_counts,2) == counts[array[110]]); - assert(EXTRACT256(_counts,3) == counts[array[102]]); - assert(EXTRACT256(_counts,4) == counts[array[94]]); - assert(EXTRACT256(_counts,5) == counts[array[86]]); - assert(EXTRACT256(_counts,6) == counts[array[78]]); - assert(EXTRACT256(_counts,7) == counts[array[70]]); - if (_mm256_testz_si256(_counts,_counts) == 0) { - if (EXTRACT256(_counts,0)) { - debug(printf("Storing masked %u at %u (%u - %d)\n",EXTRACT256(_masked,0),chrpos - 16,chrpos,16)); - table[--pointers[EXTRACT256(_masked,0)]] = chrpos - 16; - } - if (EXTRACT256(_counts,1)) { - debug(printf("Storing masked %u at %u (%u - %d)\n",EXTRACT256(_masked,1),chrpos - 17,chrpos,17)); - table[--pointers[EXTRACT256(_masked,1)]] = chrpos - 17; - } - if (EXTRACT256(_counts,2)) { - debug(printf("Storing masked %u at %u (%u - %d)\n",EXTRACT256(_masked,2),chrpos - 18,chrpos,18)); - table[--pointers[EXTRACT256(_masked,2)]] = chrpos - 18; - } - if (EXTRACT256(_counts,3)) { - debug(printf("Storing masked %u at %u (%u - %d)\n",EXTRACT256(_masked,3),chrpos - 19,chrpos,19)); - table[--pointers[EXTRACT256(_masked,3)]] = chrpos - 19; - } - if (EXTRACT256(_counts,4)) { - debug(printf("Storing masked %u at %u (%u - %d)\n",EXTRACT256(_masked,4),chrpos - 20,chrpos,20)); - table[--pointers[EXTRACT256(_masked,4)]] = chrpos - 20; - } - if (EXTRACT256(_counts,5)) { - debug(printf("Storing masked %u at %u (%u - %d)\n",EXTRACT256(_masked,5),chrpos - 21,chrpos,21)); - table[--pointers[EXTRACT256(_masked,5)]] = chrpos - 21; - } - if (EXTRACT256(_counts,6)) { - debug(printf("Storing masked %u at %u (%u - %d)\n",EXTRACT256(_masked,6),chrpos - 22,chrpos,22)); - table[--pointers[EXTRACT256(_masked,6)]] = chrpos - 22; - } - if (EXTRACT256(_counts,7)) { - debug(printf("Storing masked %u at %u (%u - %d)\n",EXTRACT256(_masked,7),chrpos - 23,chrpos,23)); - table[--pointers[EXTRACT256(_masked,7)]] = chrpos - 23; + ptr = &(array[0]); + for (i = 0; i < 16; i++) { + _masked = _mm512_load_si512((__m512i *) ptr); + _blocks = _mm512_srli_epi32(_masked,2); /* div by 4 bytes/int */ + _addresses = _mm512_and_si512(_masked,_address_mask); + _addresses = _mm512_slli_epi32(_addresses,3); /* Multiply by 8 bits/byte */ + _envelopes = _mm512_i32gather_epi32(_blocks,(const void *) counts,/*scale*/4); + _counts = _mm512_srlv_epi32(_envelopes,_addresses); /* Puts byte on right */ + _counts = _mm512_and_si512(_counts,_count_mask); /* Ignore bytes to left */ + + diff_32 = _mm512_cmpgt_epi32_mask(_counts,_zeroes); + while (nonzero_p_32(diff_32)) { + relpos = count_trailing_zeroes_32(diff_32); + masked = ptr[relpos]; + if (counts[masked]) { + debug(printf("256: Storing masked %u (%08X) at %u (%u - %d) using relpos\n",masked,masked,chrpos - relpos,chrpos,relpos)); + table[positions[masked] + (--counts[masked])] = chrpos - relpos; + } + diff_32 = clear_lowbit_32(diff_32,relpos); } - } - _masked = _mm256_i32gather_epi32((int *) &(array[6]),byeights,/*scale*/4); - _counts = _mm256_i32gather_epi32(counts,_masked,/*scale*/4); - assert(EXTRACT256(_counts,0) == counts[array[62]]); - assert(EXTRACT256(_counts,1) == counts[array[54]]); - assert(EXTRACT256(_counts,2) == counts[array[46]]); - assert(EXTRACT256(_counts,3) == counts[array[38]]); - assert(EXTRACT256(_counts,4) == counts[array[30]]); - assert(EXTRACT256(_counts,5) == counts[array[22]]); - assert(EXTRACT256(_counts,6) == counts[array[14]]); - assert(EXTRACT256(_counts,7) == counts[array[6]]); - if (_mm256_testz_si256(_counts,_counts) == 0) { - if (EXTRACT256(_counts,0)) { - debug(printf("Storing masked %u at %u (%u - %d)\n",EXTRACT256(_masked,0),chrpos - 24,chrpos,24)); - table[--pointers[EXTRACT256(_masked,0)]] = chrpos - 24; - } - if (EXTRACT256(_counts,1)) { - debug(printf("Storing masked %u at %u (%u - %d)\n",EXTRACT256(_masked,1),chrpos - 25,chrpos,25)); - table[--pointers[EXTRACT256(_masked,1)]] = chrpos - 25; - } - if (EXTRACT256(_counts,2)) { - debug(printf("Storing masked %u at %u (%u - %d)\n",EXTRACT256(_masked,2),chrpos - 26,chrpos,26)); - table[--pointers[EXTRACT256(_masked,2)]] = chrpos - 26; - } - if (EXTRACT256(_counts,3)) { - debug(printf("Storing masked %u at %u (%u - %d)\n",EXTRACT256(_masked,3),chrpos - 27,chrpos,27)); - table[--pointers[EXTRACT256(_masked,3)]] = chrpos - 27; - } - if (EXTRACT256(_counts,4)) { - debug(printf("Storing masked %u at %u (%u - %d)\n",EXTRACT256(_masked,4),chrpos - 28,chrpos,28)); - table[--pointers[EXTRACT256(_masked,4)]] = chrpos - 28; - } - if (EXTRACT256(_counts,5)) { - debug(printf("Storing masked %u at %u (%u - %d)\n",EXTRACT256(_masked,5),chrpos - 29,chrpos,29)); - table[--pointers[EXTRACT256(_masked,5)]] = chrpos - 29; - } - if (EXTRACT256(_counts,6)) { - debug(printf("Storing masked %u at %u (%u - %d)\n",EXTRACT256(_masked,6),chrpos - 30,chrpos,30)); - table[--pointers[EXTRACT256(_masked,6)]] = chrpos - 30; - } - if (EXTRACT256(_counts,7)) { - debug(printf("Storing masked %u at %u (%u - %d)\n",EXTRACT256(_masked,7),chrpos - 31,chrpos,31)); - table[--pointers[EXTRACT256(_masked,7)]] = chrpos - 31; - } - } -#else - masked = array[126]; - if (counts[masked]) { - assert(pointers[masked] > positions[masked]); - debug(printf("Storing masked %u at %u (%u - %d)\n",masked,chrpos - 16,chrpos,16)); - table[--pointers[masked]] = chrpos - 16; + chrpos -= 16; + ptr += 16; } - masked = array[118]; - if (counts[masked]) { - assert(pointers[masked] > positions[masked]); - debug(printf("Storing masked %u at %u (%u - %d)\n",masked,chrpos - 17,chrpos,17)); - table[--pointers[masked]] = chrpos - 17; - } + return chrpos; +} +#endif /* HAVE_AVX512 */ - masked = array[110]; - if (counts[masked]) { - assert(pointers[masked] > positions[masked]); - debug(printf("Storing masked %u at %u (%u - %d)\n",masked,chrpos - 18,chrpos,18)); - table[--pointers[masked]] = chrpos - 18; - } - masked = array[102]; - if (counts[masked]) { - assert(pointers[masked] > positions[masked]); - debug(printf("Storing masked %u at %u (%u - %d)\n",masked,chrpos - 19,chrpos,19)); - table[--pointers[masked]] = chrpos - 19; - } - masked = array[94]; - if (counts[masked]) { - assert(pointers[masked] > positions[masked]); - debug(printf("Storing masked %u at %u (%u - %d)\n",masked,chrpos - 20,chrpos,20)); - table[--pointers[masked]] = chrpos - 20; - } - masked = array[86]; - if (counts[masked]) { - assert(pointers[masked] > positions[masked]); - debug(printf("Storing masked %u at %u (%u - %d)\n",masked,chrpos - 21,chrpos,21)); - table[--pointers[masked]] = chrpos - 21; - } +#if !defined(HAVE_AVX2) - masked = array[78]; - if (counts[masked]) { - assert(pointers[masked] > positions[masked]); - debug(printf("Storing masked %u at %u (%u - %d)\n",masked,chrpos - 22,chrpos,22)); - table[--pointers[masked]] = chrpos - 22; - } +static void +count_9mers_fwd_32 (Count_T *counts, Genomecomp_T high_rev, Genomecomp_T low_rev, Genomecomp_T nexthigh_rev) { + Genomecomp_T masked, oligo; +#ifdef INDIVIDUAL_SHIFTS +#elif defined(SIMD_MASK_THEN_STORE) + UINT4 _masked[4] __attribute__ ((aligned (16))); + __m128i _oligo; +#else + __m128i _oligo, _masked; +#endif - masked = array[70]; - if (counts[masked]) { - assert(pointers[masked] > positions[masked]); - debug(printf("Storing masked %u at %u (%u - %d)\n",masked,chrpos - 23,chrpos,23)); - table[--pointers[masked]] = chrpos - 23; - } - masked = array[62]; - if (counts[masked]) { - assert(pointers[masked] > positions[masked]); - debug(printf("Storing masked %u at %u (%u - %d)\n",masked,chrpos - 24,chrpos,24)); - table[--pointers[masked]] = chrpos - 24; - } + oligo = nexthigh_rev >> 16; /* For 31..24 */ + oligo |= low_rev << 16; - masked = array[54]; - if (counts[masked]) { - assert(pointers[masked] > positions[masked]); - debug(printf("Storing masked %u at %u (%u - %d)\n",masked,chrpos - 25,chrpos,25)); - table[--pointers[masked]] = chrpos - 25; - } +#ifdef INDIVIDUAL_SHIFTS + masked = oligo & MASK9; /* 31 */ + INCR_COUNT(counts[masked]); + debug(printf("31 %04X => %d\n",masked,counts[masked])); - masked = array[46]; - if (counts[masked]) { - assert(pointers[masked] > positions[masked]); - debug(printf("Storing masked %u at %u (%u - %d)\n",masked,chrpos - 26,chrpos,26)); - table[--pointers[masked]] = chrpos - 26; - } + masked = (oligo >> 2) & MASK9; /* 30 */ + INCR_COUNT(counts[masked]); + debug(printf("30 %04X => %d\n",masked,counts[masked])); - masked = array[38]; - if (counts[masked]) { - assert(pointers[masked] > positions[masked]); - debug(printf("Storing masked %u at %u (%u - %d)\n",masked,chrpos - 27,chrpos,27)); - table[--pointers[masked]] = chrpos - 27; - } + masked = (oligo >> 4) & MASK9; /* 29 */ + INCR_COUNT(counts[masked]); + debug(printf("29 %04X => %d\n",masked,counts[masked])); - masked = array[30]; - if (counts[masked]) { - assert(pointers[masked] > positions[masked]); - debug(printf("Storing masked %u at %u (%u - %d)\n",masked,chrpos - 28,chrpos,28)); - table[--pointers[masked]] = chrpos - 28; - } + masked = (oligo >> 6) & MASK9; /* 28 */ + INCR_COUNT(counts[masked]); + debug(printf("28 %04X => %d\n",masked,counts[masked])); - masked = array[22]; - if (counts[masked]) { - assert(pointers[masked] > positions[masked]); - debug(printf("Storing masked %u at %u (%u - %d)\n",masked,chrpos - 29,chrpos,29)); - table[--pointers[masked]] = chrpos - 29; - } + masked = (oligo >> 8) & MASK9; /* 27 */ + INCR_COUNT(counts[masked]); + debug(printf("27 %04X => %d\n",masked,counts[masked])); - masked = array[14]; - if (counts[masked]) { - assert(pointers[masked] > positions[masked]); - debug(printf("Storing masked %u at %u (%u - %d)\n",masked,chrpos - 30,chrpos,30)); - table[--pointers[masked]] = chrpos - 30; - } + masked = (oligo >> 10) & MASK9; /* 26 */ + INCR_COUNT(counts[masked]); + debug(printf("26 %04X => %d\n",masked,counts[masked])); - masked = array[6]; - if (counts[masked]) { - assert(pointers[masked] > positions[masked]); - debug(printf("Storing masked %u at %u (%u - %d)\n",masked,chrpos - 31,chrpos,31)); - table[--pointers[masked]] = chrpos - 31; - } -#endif + masked = (oligo >> 12) & MASK9; /* 25 */ + INCR_COUNT(counts[masked]); + debug(printf("25 %04X => %d\n",masked,counts[masked])); - /* Row 5 */ -#ifdef USE_GATHER - _masked = _mm256_i32gather_epi32((int *) &(array[64+5]),byeights,/*scale*/4); - _counts = _mm256_i32gather_epi32(counts,_masked,/*scale*/4); - assert(EXTRACT256(_counts,0) == counts[array[125]]); - assert(EXTRACT256(_counts,1) == counts[array[117]]); - assert(EXTRACT256(_counts,2) == counts[array[109]]); - assert(EXTRACT256(_counts,3) == counts[array[101]]); - assert(EXTRACT256(_counts,4) == counts[array[93]]); - assert(EXTRACT256(_counts,5) == counts[array[85]]); - assert(EXTRACT256(_counts,6) == counts[array[77]]); - assert(EXTRACT256(_counts,7) == counts[array[69]]); - if (_mm256_testz_si256(_counts,_counts) == 0) { - if (EXTRACT256(_counts,0)) { - debug(printf("Storing masked %u at %u (%u - %d)\n",EXTRACT256(_masked,0),chrpos - 32,chrpos,32)); - table[--pointers[EXTRACT256(_masked,0)]] = chrpos - 32; - } - if (EXTRACT256(_counts,1)) { - debug(printf("Storing masked %u at %u (%u - %d)\n",EXTRACT256(_masked,1),chrpos - 33,chrpos,33)); - table[--pointers[EXTRACT256(_masked,1)]] = chrpos - 33; - } - if (EXTRACT256(_counts,2)) { - debug(printf("Storing masked %u at %u (%u - %d)\n",EXTRACT256(_masked,2),chrpos - 34,chrpos,34)); - table[--pointers[EXTRACT256(_masked,2)]] = chrpos - 34; - } - if (EXTRACT256(_counts,3)) { - debug(printf("Storing masked %u at %u (%u - %d)\n",EXTRACT256(_masked,3),chrpos - 35,chrpos,35)); - table[--pointers[EXTRACT256(_masked,3)]] = chrpos - 35; - } - if (EXTRACT256(_counts,4)) { - debug(printf("Storing masked %u at %u (%u - %d)\n",EXTRACT256(_masked,4),chrpos - 36,chrpos,36)); - table[--pointers[EXTRACT256(_masked,4)]] = chrpos - 36; - } - if (EXTRACT256(_counts,5)) { - debug(printf("Storing masked %u at %u (%u - %d)\n",EXTRACT256(_masked,5),chrpos - 37,chrpos,37)); - table[--pointers[EXTRACT256(_masked,5)]] = chrpos - 37; - } - if (EXTRACT256(_counts,6)) { - debug(printf("Storing masked %u at %u (%u - %d)\n",EXTRACT256(_masked,6),chrpos - 38,chrpos,38)); - table[--pointers[EXTRACT256(_masked,6)]] = chrpos - 38; - } - if (EXTRACT256(_counts,7)) { - debug(printf("Storing masked %u at %u (%u - %d)\n",EXTRACT256(_masked,7),chrpos - 39,chrpos,39)); - table[--pointers[EXTRACT256(_masked,7)]] = chrpos - 39; - } - } + masked = (oligo >> 14) & MASK9; /* 24 */ + INCR_COUNT(counts[masked]); + debug(printf("24 %04X => %d\n",masked,counts[masked])); - _masked = _mm256_i32gather_epi32((int *) &(array[5]),byeights,/*scale*/4); - _counts = _mm256_i32gather_epi32(counts,_masked,/*scale*/4); - assert(EXTRACT256(_counts,0) == counts[array[61]]); - assert(EXTRACT256(_counts,1) == counts[array[53]]); - assert(EXTRACT256(_counts,2) == counts[array[45]]); - assert(EXTRACT256(_counts,3) == counts[array[37]]); - assert(EXTRACT256(_counts,4) == counts[array[29]]); - assert(EXTRACT256(_counts,5) == counts[array[21]]); - assert(EXTRACT256(_counts,6) == counts[array[13]]); - assert(EXTRACT256(_counts,7) == counts[array[5]]); - if (_mm256_testz_si256(_counts,_counts) == 0) { - if (EXTRACT256(_counts,0)) { - debug(printf("Storing masked %u at %u (%u - %d)\n",EXTRACT256(_masked,0),chrpos - 40,chrpos,40)); - table[--pointers[EXTRACT256(_masked,0)]] = chrpos - 40; - } - if (EXTRACT256(_counts,1)) { - debug(printf("Storing masked %u at %u (%u - %d)\n",EXTRACT256(_masked,1),chrpos - 41,chrpos,41)); - table[--pointers[EXTRACT256(_masked,1)]] = chrpos - 41; - } - if (EXTRACT256(_counts,2)) { - debug(printf("Storing masked %u at %u (%u - %d)\n",EXTRACT256(_masked,2),chrpos - 42,chrpos,42)); - table[--pointers[EXTRACT256(_masked,2)]] = chrpos - 42; - } - if (EXTRACT256(_counts,3)) { - debug(printf("Storing masked %u at %u (%u - %d)\n",EXTRACT256(_masked,3),chrpos - 43,chrpos,43)); - table[--pointers[EXTRACT256(_masked,3)]] = chrpos - 43; - } - if (EXTRACT256(_counts,4)) { - debug(printf("Storing masked %u at %u (%u - %d)\n",EXTRACT256(_masked,4),chrpos - 44,chrpos,44)); - table[--pointers[EXTRACT256(_masked,4)]] = chrpos - 44; - } - if (EXTRACT256(_counts,5)) { - debug(printf("Storing masked %u at %u (%u - %d)\n",EXTRACT256(_masked,5),chrpos - 45,chrpos,45)); - table[--pointers[EXTRACT256(_masked,5)]] = chrpos - 45; - } - if (EXTRACT256(_counts,6)) { - debug(printf("Storing masked %u at %u (%u - %d)\n",EXTRACT256(_masked,6),chrpos - 46,chrpos,46)); - table[--pointers[EXTRACT256(_masked,6)]] = chrpos - 46; - } - if (EXTRACT256(_counts,7)) { - debug(printf("Storing masked %u at %u (%u - %d)\n",EXTRACT256(_masked,7),chrpos - 47,chrpos,47)); - table[--pointers[EXTRACT256(_masked,7)]] = chrpos - 47; - } - } #else - masked = array[125]; - if (counts[masked]) { - assert(pointers[masked] > positions[masked]); - debug(printf("Storing masked %u at %u (%u - %d)\n",masked,chrpos - 32,chrpos,32)); - table[--pointers[masked]] = chrpos - 32; - } + _oligo = _mm_setr_epi32(oligo, oligo >> 2, oligo >> 4, oligo >> 6); +#ifdef SIMD_MASK_THEN_STORE + _mm_store_si128((__m128i *) _masked,_mm_and_si128(_oligo, mask9)); +#else + _masked = _mm_and_si128(_oligo, mask9); +#endif - masked = array[117]; - if (counts[masked]) { - assert(pointers[masked] > positions[masked]); - debug(printf("Storing masked %u at %u (%u - %d)\n",masked,chrpos - 33,chrpos,33)); - table[--pointers[masked]] = chrpos - 33; - } + masked = EXTRACT(_masked,0); + INCR_COUNT(counts[masked]); + debug(printf("31 %04X => %d\n",masked,counts[masked])); - masked = array[109]; - if (counts[masked]) { - assert(pointers[masked] > positions[masked]); - debug(printf("Storing masked %u at %u (%u - %d)\n",masked,chrpos - 34,chrpos,34)); - table[--pointers[masked]] = chrpos - 34; - } + masked = EXTRACT(_masked,1); + INCR_COUNT(counts[masked]); + debug(printf("30 %04X => %d\n",masked,counts[masked])); - masked = array[101]; - if (counts[masked]) { - assert(pointers[masked] > positions[masked]); - debug(printf("Storing masked %u at %u (%u - %d)\n",masked,chrpos - 35,chrpos,35)); - table[--pointers[masked]] = chrpos - 35; - } + masked = EXTRACT(_masked,2); + INCR_COUNT(counts[masked]); + debug(printf("29 %04X => %d\n",masked,counts[masked])); - masked = array[93]; - if (counts[masked]) { - assert(pointers[masked] > positions[masked]); - debug(printf("Storing masked %u at %u (%u - %d)\n",masked,chrpos - 36,chrpos,36)); - table[--pointers[masked]] = chrpos - 36; - } + masked = EXTRACT(_masked,3); + INCR_COUNT(counts[masked]); + debug(printf("28 %04X => %d\n",masked,counts[masked])); - masked = array[85]; - if (counts[masked]) { - assert(pointers[masked] > positions[masked]); - debug(printf("Storing masked %u at %u (%u - %d)\n",masked,chrpos - 37,chrpos,37)); - table[--pointers[masked]] = chrpos - 37; - } - masked = array[77]; - if (counts[masked]) { - assert(pointers[masked] > positions[masked]); - debug(printf("Storing masked %u at %u (%u - %d)\n",masked,chrpos - 38,chrpos,38)); - table[--pointers[masked]] = chrpos - 38; - } + _oligo = _mm_srli_epi32(_oligo, 8); +#ifdef SIMD_MASK_THEN_STORE + _mm_store_si128((__m128i *) _masked,_mm_and_si128(_oligo, mask9)); +#else + _masked = _mm_and_si128(_oligo, mask9); +#endif - masked = array[69]; - if (counts[masked]) { - assert(pointers[masked] > positions[masked]); - debug(printf("Storing masked %u at %u (%u - %d)\n",masked,chrpos - 39,chrpos,39)); - table[--pointers[masked]] = chrpos - 39; - } + masked = EXTRACT(_masked,0); + INCR_COUNT(counts[masked]); + debug(printf("27 %04X => %d\n",masked,counts[masked])); - masked = array[61]; - if (counts[masked]) { - assert(pointers[masked] > positions[masked]); - debug(printf("Storing masked %u at %u (%u - %d)\n",masked,chrpos - 40,chrpos,40)); - table[--pointers[masked]] = chrpos - 40; - } + masked = EXTRACT(_masked,1); + INCR_COUNT(counts[masked]); + debug(printf("26 %04X => %d\n",masked,counts[masked])); - masked = array[53]; - if (counts[masked]) { - assert(pointers[masked] > positions[masked]); - debug(printf("Storing masked %u at %u (%u - %d)\n",masked,chrpos - 41,chrpos,41)); - table[--pointers[masked]] = chrpos - 41; - } + masked = EXTRACT(_masked,2); + INCR_COUNT(counts[masked]); + debug(printf("25 %04X => %d\n",masked,counts[masked])); - masked = array[45]; - if (counts[masked]) { - assert(pointers[masked] > positions[masked]); - debug(printf("Storing masked %u at %u (%u - %d)\n",masked,chrpos - 42,chrpos,42)); - table[--pointers[masked]] = chrpos - 42; - } + masked = EXTRACT(_masked,3); + INCR_COUNT(counts[masked]); + debug(printf("24 %04X => %d\n",masked,counts[masked])); +#endif - masked = array[37]; - if (counts[masked]) { - assert(pointers[masked] > positions[masked]); - debug(printf("Storing masked %u at %u (%u - %d)\n",masked,chrpos - 43,chrpos,43)); - table[--pointers[masked]] = chrpos - 43; - } - masked = array[29]; - if (counts[masked]) { - assert(pointers[masked] > positions[masked]); - debug(printf("Storing masked %u at %u (%u - %d)\n",masked,chrpos - 44,chrpos,44)); - table[--pointers[masked]] = chrpos - 44; - } +#ifdef INDIVIDUAL_SHIFTS + masked = low_rev & MASK9; /* 23 */ + INCR_COUNT(counts[masked]); + debug(printf("23 %04X => %d\n",masked,counts[masked])); - masked = array[21]; - if (counts[masked]) { - assert(pointers[masked] > positions[masked]); - debug(printf("Storing masked %u at %u (%u - %d)\n",masked,chrpos - 45,chrpos,45)); - table[--pointers[masked]] = chrpos - 45; - } + masked = (low_rev >> 2) & MASK9; /* 22 */ + INCR_COUNT(counts[masked]); + debug(printf("22 %04X => %d\n",masked,counts[masked])); - masked = array[13]; - if (counts[masked]) { - assert(pointers[masked] > positions[masked]); - debug(printf("Storing masked %u at %u (%u - %d)\n",masked,chrpos - 46,chrpos,46)); - table[--pointers[masked]] = chrpos - 46; - } + masked = (low_rev >> 4) & MASK9; /* 21 */ + INCR_COUNT(counts[masked]); + debug(printf("21 %04X => %d\n",masked,counts[masked])); - masked = array[5]; - if (counts[masked]) { - assert(pointers[masked] > positions[masked]); - debug(printf("Storing masked %u at %u (%u - %d)\n",masked,chrpos - 47,chrpos,47)); - table[--pointers[masked]] = chrpos - 47; - } -#endif + masked = (low_rev >> 6) & MASK9; /* 20 */ + INCR_COUNT(counts[masked]); + debug(printf("20 %04X => %d\n",masked,counts[masked])); + masked = (low_rev >> 8) & MASK9; /* 19 */ + INCR_COUNT(counts[masked]); + debug(printf("19 %04X => %d\n",masked,counts[masked])); - /* Row 4 */ -#ifdef USE_GATHER - _masked = _mm256_i32gather_epi32((int *) &(array[64+4]),byeights,/*scale*/4); - _counts = _mm256_i32gather_epi32(counts,_masked,/*scale*/4); - assert(EXTRACT256(_counts,0) == counts[array[124]]); - assert(EXTRACT256(_counts,1) == counts[array[116]]); - assert(EXTRACT256(_counts,2) == counts[array[108]]); - assert(EXTRACT256(_counts,3) == counts[array[100]]); - assert(EXTRACT256(_counts,4) == counts[array[92]]); - assert(EXTRACT256(_counts,5) == counts[array[84]]); - assert(EXTRACT256(_counts,6) == counts[array[76]]); - assert(EXTRACT256(_counts,7) == counts[array[68]]); - if (_mm256_testz_si256(_counts,_counts) == 0) { - if (EXTRACT256(_counts,0)) { - debug(printf("Storing masked %u at %u (%u - %d)\n",EXTRACT256(_masked,0),chrpos - 48,chrpos,48)); - table[--pointers[EXTRACT256(_masked,0)]] = chrpos - 48; - } - if (EXTRACT256(_counts,1)) { - debug(printf("Storing masked %u at %u (%u - %d)\n",EXTRACT256(_masked,1),chrpos - 49,chrpos,49)); - table[--pointers[EXTRACT256(_masked,1)]] = chrpos - 49; - } - if (EXTRACT256(_counts,2)) { - debug(printf("Storing masked %u at %u (%u - %d)\n",EXTRACT256(_masked,2),chrpos - 50,chrpos,50)); - table[--pointers[EXTRACT256(_masked,2)]] = chrpos - 50; - } - if (EXTRACT256(_counts,3)) { - debug(printf("Storing masked %u at %u (%u - %d)\n",EXTRACT256(_masked,3),chrpos - 51,chrpos,51)); - table[--pointers[EXTRACT256(_masked,3)]] = chrpos - 51; - } - if (EXTRACT256(_counts,4)) { - debug(printf("Storing masked %u at %u (%u - %d)\n",EXTRACT256(_masked,4),chrpos - 52,chrpos,52)); - table[--pointers[EXTRACT256(_masked,4)]] = chrpos - 52; - } - if (EXTRACT256(_counts,5)) { - debug(printf("Storing masked %u at %u (%u - %d)\n",EXTRACT256(_masked,5),chrpos - 53,chrpos,53)); - table[--pointers[EXTRACT256(_masked,5)]] = chrpos - 53; - } - if (EXTRACT256(_counts,6)) { - debug(printf("Storing masked %u at %u (%u - %d)\n",EXTRACT256(_masked,6),chrpos - 54,chrpos,54)); - table[--pointers[EXTRACT256(_masked,6)]] = chrpos - 54; - } - if (EXTRACT256(_counts,7)) { - debug(printf("Storing masked %u at %u (%u - %d)\n",EXTRACT256(_masked,7),chrpos - 55,chrpos,55)); - table[--pointers[EXTRACT256(_masked,7)]] = chrpos - 55; - } - } + masked = (low_rev >> 10) & MASK9; /* 18 */ + INCR_COUNT(counts[masked]); + debug(printf("18 %04X => %d\n",masked,counts[masked])); - _masked = _mm256_i32gather_epi32((int *) &(array[4]),byeights,/*scale*/4); - _counts = _mm256_i32gather_epi32(counts,_masked,/*scale*/4); - assert(EXTRACT256(_counts,0) == counts[array[60]]); - assert(EXTRACT256(_counts,1) == counts[array[52]]); - assert(EXTRACT256(_counts,2) == counts[array[44]]); - assert(EXTRACT256(_counts,3) == counts[array[36]]); - assert(EXTRACT256(_counts,4) == counts[array[28]]); - assert(EXTRACT256(_counts,5) == counts[array[20]]); - assert(EXTRACT256(_counts,6) == counts[array[12]]); - assert(EXTRACT256(_counts,7) == counts[array[4]]); - if (_mm256_testz_si256(_counts,_counts) == 0) { - if (EXTRACT256(_counts,0)) { - debug(printf("Storing masked %u at %u (%u - %d)\n",EXTRACT256(_masked,0),chrpos - 56,chrpos,56)); - table[--pointers[EXTRACT256(_masked,0)]] = chrpos - 56; - } - if (EXTRACT256(_counts,1)) { - debug(printf("Storing masked %u at %u (%u - %d)\n",EXTRACT256(_masked,1),chrpos - 57,chrpos,57)); - table[--pointers[EXTRACT256(_masked,1)]] = chrpos - 57; - } - if (EXTRACT256(_counts,2)) { - debug(printf("Storing masked %u at %u (%u - %d)\n",EXTRACT256(_masked,2),chrpos - 58,chrpos,58)); - table[--pointers[EXTRACT256(_masked,2)]] = chrpos - 58; - } - if (EXTRACT256(_counts,3)) { - debug(printf("Storing masked %u at %u (%u - %d)\n",EXTRACT256(_masked,3),chrpos - 59,chrpos,59)); - table[--pointers[EXTRACT256(_masked,3)]] = chrpos - 59; - } - if (EXTRACT256(_counts,4)) { - debug(printf("Storing masked %u at %u (%u - %d)\n",EXTRACT256(_masked,4),chrpos - 60,chrpos,60)); - table[--pointers[EXTRACT256(_masked,4)]] = chrpos - 60; - } - if (EXTRACT256(_counts,5)) { - debug(printf("Storing masked %u at %u (%u - %d)\n",EXTRACT256(_masked,5),chrpos - 61,chrpos,61)); - table[--pointers[EXTRACT256(_masked,5)]] = chrpos - 61; - } - if (EXTRACT256(_counts,6)) { - debug(printf("Storing masked %u at %u (%u - %d)\n",EXTRACT256(_masked,6),chrpos - 62,chrpos,62)); - table[--pointers[EXTRACT256(_masked,6)]] = chrpos - 62; - } - if (EXTRACT256(_counts,7)) { - debug(printf("Storing masked %u at %u (%u - %d)\n",EXTRACT256(_masked,7),chrpos - 63,chrpos,63)); - table[--pointers[EXTRACT256(_masked,7)]] = chrpos - 63; - } - } -#else - masked = array[124]; - if (counts[masked]) { - assert(pointers[masked] > positions[masked]); - debug(printf("Storing masked %u at %u (%u - %d)\n",masked,chrpos - 48,chrpos,48)); - table[--pointers[masked]] = chrpos - 48; - } + masked = (low_rev >> 12) & MASK9; /* 17 */ + INCR_COUNT(counts[masked]); + debug(printf("17 %04X => %d\n",masked,counts[masked])); - masked = array[116]; - if (counts[masked]) { - assert(pointers[masked] > positions[masked]); - debug(printf("Storing masked %u at %u (%u - %d)\n",masked,chrpos - 49,chrpos,49)); - table[--pointers[masked]] = chrpos - 49; - } + masked = low_rev >> 14; /* 16, No mask necessary */ + INCR_COUNT(counts[masked]); + debug(printf("16 %04X => %d\n",masked,counts[masked])); - masked = array[108]; - if (counts[masked]) { - assert(pointers[masked] > positions[masked]); - debug(printf("Storing masked %u at %u (%u - %d)\n",masked,chrpos - 50,chrpos,50)); - table[--pointers[masked]] = chrpos - 50; - } +#else + _oligo = _mm_setr_epi32(low_rev, low_rev >> 2, low_rev >> 4, low_rev >> 6); +#ifdef SIMD_MASK_THEN_STORE + _mm_store_si128((__m128i *) _masked,_mm_and_si128(_oligo, mask9)); +#else + _masked = _mm_and_si128(_oligo, mask9); +#endif - masked = array[100]; - if (counts[masked]) { - assert(pointers[masked] > positions[masked]); - debug(printf("Storing masked %u at %u (%u - %d)\n",masked,chrpos - 51,chrpos,51)); - table[--pointers[masked]] = chrpos - 51; - } + masked = EXTRACT(_masked,0); + INCR_COUNT(counts[masked]); + debug(printf("23 %04X => %d\n",masked,counts[masked])); - masked = array[92]; - if (counts[masked]) { - assert(pointers[masked] > positions[masked]); - debug(printf("Storing masked %u at %u (%u - %d)\n",masked,chrpos - 52,chrpos,52)); - table[--pointers[masked]] = chrpos - 52; - } + masked = EXTRACT(_masked,1); + INCR_COUNT(counts[masked]); + debug(printf("22 %04X => %d\n",masked,counts[masked])); - masked = array[84]; - if (counts[masked]) { - assert(pointers[masked] > positions[masked]); - debug(printf("Storing masked %u at %u (%u - %d)\n",masked,chrpos - 53,chrpos,53)); - table[--pointers[masked]] = chrpos - 53; - } + masked = EXTRACT(_masked,2); + INCR_COUNT(counts[masked]); + debug(printf("21 %04X => %d\n",masked,counts[masked])); - masked = array[76]; - if (counts[masked]) { - assert(pointers[masked] > positions[masked]); - debug(printf("Storing masked %u at %u (%u - %d)\n",masked,chrpos - 54,chrpos,54)); - table[--pointers[masked]] = chrpos - 54; - } + masked = EXTRACT(_masked,3); + INCR_COUNT(counts[masked]); + debug(printf("20 %04X => %d\n",masked,counts[masked])); - masked = array[68]; - if (counts[masked]) { - assert(pointers[masked] > positions[masked]); - debug(printf("Storing masked %u at %u (%u - %d)\n",masked,chrpos - 55,chrpos,55)); - table[--pointers[masked]] = chrpos - 55; - } - masked = array[60]; - if (counts[masked]) { - assert(pointers[masked] > positions[masked]); - debug(printf("Storing masked %u at %u (%u - %d)\n",masked,chrpos - 56,chrpos,56)); - table[--pointers[masked]] = chrpos - 56; - } + _oligo = _mm_srli_epi32(_oligo, 8); +#ifdef SIMD_MASK_THEN_STORE + _mm_store_si128((__m128i *) _masked,_mm_and_si128(_oligo, mask9)); +#else + _masked = _mm_and_si128(_oligo, mask9); +#endif - masked = array[52]; - if (counts[masked]) { - assert(pointers[masked] > positions[masked]); - debug(printf("Storing masked %u at %u (%u - %d)\n",masked,chrpos - 57,chrpos,57)); - table[--pointers[masked]] = chrpos - 57; - } + masked = EXTRACT(_masked,0); + INCR_COUNT(counts[masked]); + debug(printf("19 %04X => %d\n",masked,counts[masked])); - masked = array[44]; - if (counts[masked]) { - assert(pointers[masked] > positions[masked]); - debug(printf("Storing masked %u at %u (%u - %d)\n",masked,chrpos - 58,chrpos,58)); - table[--pointers[masked]] = chrpos - 58; - } + masked = EXTRACT(_masked,1); + INCR_COUNT(counts[masked]); + debug(printf("18 %04X => %d\n",masked,counts[masked])); - masked = array[36]; - if (counts[masked]) { - assert(pointers[masked] > positions[masked]); - debug(printf("Storing masked %u at %u (%u - %d)\n",masked,chrpos - 59,chrpos,59)); - table[--pointers[masked]] = chrpos - 59; - } + masked = EXTRACT(_masked,2); + INCR_COUNT(counts[masked]); + debug(printf("17 %04X => %d\n",masked,counts[masked])); - masked = array[28]; - if (counts[masked]) { - assert(pointers[masked] > positions[masked]); - debug(printf("Storing masked %u at %u (%u - %d)\n",masked,chrpos - 60,chrpos,60)); - table[--pointers[masked]] = chrpos - 60; - } + masked = EXTRACT(_masked,3); + INCR_COUNT(counts[masked]); + debug(printf("16 %04X => %d\n",masked,counts[masked])); +#endif - masked = array[20]; - if (counts[masked]) { - assert(pointers[masked] > positions[masked]); - debug(printf("Storing masked %u at %u (%u - %d)\n",masked,chrpos - 61,chrpos,61)); - table[--pointers[masked]] = chrpos - 61; - } - masked = array[12]; - if (counts[masked]) { - assert(pointers[masked] > positions[masked]); - debug(printf("Storing masked %u at %u (%u - %d)\n",masked,chrpos - 62,chrpos,62)); - table[--pointers[masked]] = chrpos - 62; - } + oligo = low_rev >> 16; /* For 15..8 */ + oligo |= high_rev << 16; - masked = array[4]; - if (counts[masked]) { - assert(pointers[masked] > positions[masked]); - debug(printf("Storing masked %u at %u (%u - %d)\n",masked,chrpos - 63,chrpos,63)); - table[--pointers[masked]] = chrpos - 63; - } -#endif +#ifdef INDIVIDUAL_SHIFTS + masked = oligo & MASK9; /* 15 */ + INCR_COUNT(counts[masked]); + debug(printf("15 %04X => %d\n",masked,counts[masked])); + masked = (oligo >> 2) & MASK9; /* 14 */ + INCR_COUNT(counts[masked]); + debug(printf("14 %04X => %d\n",masked,counts[masked])); - /* Row 3 */ -#ifdef USE_GATHER - _masked = _mm256_i32gather_epi32((int *) &(array[64+3]),byeights,/*scale*/4); - _counts = _mm256_i32gather_epi32(counts,_masked,/*scale*/4); - assert(EXTRACT256(_counts,0) == counts[array[123]]); - assert(EXTRACT256(_counts,1) == counts[array[115]]); - assert(EXTRACT256(_counts,2) == counts[array[107]]); - assert(EXTRACT256(_counts,3) == counts[array[99]]); - assert(EXTRACT256(_counts,4) == counts[array[91]]); - assert(EXTRACT256(_counts,5) == counts[array[83]]); - assert(EXTRACT256(_counts,6) == counts[array[75]]); - assert(EXTRACT256(_counts,7) == counts[array[67]]); - if (_mm256_testz_si256(_counts,_counts) == 0) { - if (EXTRACT256(_counts,0)) { - debug(printf("Storing masked %u at %u (%u - %d)\n",EXTRACT256(_masked,0),chrpos - 64,chrpos,64)); - table[--pointers[EXTRACT256(_masked,0)]] = chrpos - 64; - } - if (EXTRACT256(_counts,1)) { - debug(printf("Storing masked %u at %u (%u - %d)\n",EXTRACT256(_masked,1),chrpos - 65,chrpos,65)); - table[--pointers[EXTRACT256(_masked,1)]] = chrpos - 65; - } - if (EXTRACT256(_counts,2)) { - debug(printf("Storing masked %u at %u (%u - %d)\n",EXTRACT256(_masked,2),chrpos - 66,chrpos,66)); - table[--pointers[EXTRACT256(_masked,2)]] = chrpos - 66; - } - if (EXTRACT256(_counts,3)) { - debug(printf("Storing masked %u at %u (%u - %d)\n",EXTRACT256(_masked,3),chrpos - 67,chrpos,67)); - table[--pointers[EXTRACT256(_masked,3)]] = chrpos - 67; - } - if (EXTRACT256(_counts,4)) { - debug(printf("Storing masked %u at %u (%u - %d)\n",EXTRACT256(_masked,4),chrpos - 68,chrpos,68)); - table[--pointers[EXTRACT256(_masked,4)]] = chrpos - 68; - } - if (EXTRACT256(_counts,5)) { - debug(printf("Storing masked %u at %u (%u - %d)\n",EXTRACT256(_masked,5),chrpos - 69,chrpos,69)); - table[--pointers[EXTRACT256(_masked,5)]] = chrpos - 69; - } - if (EXTRACT256(_counts,6)) { - debug(printf("Storing masked %u at %u (%u - %d)\n",EXTRACT256(_masked,6),chrpos - 70,chrpos,70)); - table[--pointers[EXTRACT256(_masked,6)]] = chrpos - 70; - } - if (EXTRACT256(_counts,7)) { - debug(printf("Storing masked %u at %u (%u - %d)\n",EXTRACT256(_masked,7),chrpos - 71,chrpos,71)); - table[--pointers[EXTRACT256(_masked,7)]] = chrpos - 71; - } - } + masked = (oligo >> 4) & MASK9; /* 13 */ + INCR_COUNT(counts[masked]); + debug(printf("13 %04X => %d\n",masked,counts[masked])); - _masked = _mm256_i32gather_epi32((int *) &(array[3]),byeights,/*scale*/4); - _counts = _mm256_i32gather_epi32(counts,_masked,/*scale*/4); - assert(EXTRACT256(_counts,0) == counts[array[59]]); - assert(EXTRACT256(_counts,1) == counts[array[51]]); - assert(EXTRACT256(_counts,2) == counts[array[43]]); - assert(EXTRACT256(_counts,3) == counts[array[35]]); - assert(EXTRACT256(_counts,4) == counts[array[27]]); - assert(EXTRACT256(_counts,5) == counts[array[19]]); - assert(EXTRACT256(_counts,6) == counts[array[11]]); - assert(EXTRACT256(_counts,7) == counts[array[3]]); - if (_mm256_testz_si256(_counts,_counts) == 0) { - if (EXTRACT256(_counts,0)) { - debug(printf("Storing masked %u at %u (%u - %d)\n",EXTRACT256(_masked,0),chrpos - 72,chrpos,72)); - table[--pointers[EXTRACT256(_masked,0)]] = chrpos - 72; - } - if (EXTRACT256(_counts,1)) { - debug(printf("Storing masked %u at %u (%u - %d)\n",EXTRACT256(_masked,1),chrpos - 73,chrpos,73)); - table[--pointers[EXTRACT256(_masked,1)]] = chrpos - 73; - } - if (EXTRACT256(_counts,2)) { - debug(printf("Storing masked %u at %u (%u - %d)\n",EXTRACT256(_masked,2),chrpos - 74,chrpos,74)); - table[--pointers[EXTRACT256(_masked,2)]] = chrpos - 74; - } - if (EXTRACT256(_counts,3)) { - debug(printf("Storing masked %u at %u (%u - %d)\n",EXTRACT256(_masked,3),chrpos - 75,chrpos,75)); - table[--pointers[EXTRACT256(_masked,3)]] = chrpos - 75; - } - if (EXTRACT256(_counts,4)) { - debug(printf("Storing masked %u at %u (%u - %d)\n",EXTRACT256(_masked,4),chrpos - 76,chrpos,76)); - table[--pointers[EXTRACT256(_masked,4)]] = chrpos - 76; - } - if (EXTRACT256(_counts,5)) { - debug(printf("Storing masked %u at %u (%u - %d)\n",EXTRACT256(_masked,5),chrpos - 77,chrpos,77)); - table[--pointers[EXTRACT256(_masked,5)]] = chrpos - 77; - } - if (EXTRACT256(_counts,6)) { - debug(printf("Storing masked %u at %u (%u - %d)\n",EXTRACT256(_masked,6),chrpos - 78,chrpos,78)); - table[--pointers[EXTRACT256(_masked,6)]] = chrpos - 78; - } - if (EXTRACT256(_counts,7)) { - debug(printf("Storing masked %u at %u (%u - %d)\n",EXTRACT256(_masked,7),chrpos - 79,chrpos,79)); - table[--pointers[EXTRACT256(_masked,7)]] = chrpos - 79; - } - } -#else - masked = array[123]; - if (counts[masked]) { - assert(pointers[masked] > positions[masked]); - debug(printf("Storing masked %u at %u (%u - %d)\n",masked,chrpos - 64,chrpos,64)); - table[--pointers[masked]] = chrpos - 64; - } + masked = (oligo >> 6) & MASK9; /* 12 */ + INCR_COUNT(counts[masked]); + debug(printf("12 %04X => %d\n",masked,counts[masked])); - masked = array[115]; - if (counts[masked]) { - assert(pointers[masked] > positions[masked]); - debug(printf("Storing masked %u at %u (%u - %d)\n",masked,chrpos - 65,chrpos,65)); - table[--pointers[masked]] = chrpos - 65; - } + masked = (oligo >> 8) & MASK9; /* 11 */ + INCR_COUNT(counts[masked]); + debug(printf("11 %04X => %d\n",masked,counts[masked])); - masked = array[107]; - if (counts[masked]) { - assert(pointers[masked] > positions[masked]); - debug(printf("Storing masked %u at %u (%u - %d)\n",masked,chrpos - 66,chrpos,66)); - table[--pointers[masked]] = chrpos - 66; - } + masked = (oligo >> 10) & MASK9; /* 10 */ + INCR_COUNT(counts[masked]); + debug(printf("10 %04X => %d\n",masked,counts[masked])); - masked = array[99]; - if (counts[masked]) { - assert(pointers[masked] > positions[masked]); - debug(printf("Storing masked %u at %u (%u - %d)\n",masked,chrpos - 67,chrpos,67)); - table[--pointers[masked]] = chrpos - 67; - } + masked = (oligo >> 12) & MASK9; /* 9 */ + INCR_COUNT(counts[masked]); + debug(printf("9 %04X => %d\n",masked,counts[masked])); - masked = array[91]; - if (counts[masked]) { - assert(pointers[masked] > positions[masked]); - debug(printf("Storing masked %u at %u (%u - %d)\n",masked,chrpos - 68,chrpos,68)); - table[--pointers[masked]] = chrpos - 68; - } + masked = (oligo >> 14) & MASK9; /* 8 */ + INCR_COUNT(counts[masked]); + debug(printf("8 %04X => %d\n",masked,counts[masked])); - masked = array[83]; - if (counts[masked]) { - assert(pointers[masked] > positions[masked]); - debug(printf("Storing masked %u at %u (%u - %d)\n",masked,chrpos - 69,chrpos,69)); - table[--pointers[masked]] = chrpos - 69; - } +#else + _oligo = _mm_setr_epi32(oligo, oligo >> 2, oligo >> 4, oligo >> 6); +#ifdef SIMD_MASK_THEN_STORE + _mm_store_si128((__m128i *) _masked,_mm_and_si128(_oligo, mask9)); +#else + _masked = _mm_and_si128(_oligo, mask9); +#endif - masked = array[75]; - if (counts[masked]) { - assert(pointers[masked] > positions[masked]); - debug(printf("Storing masked %u at %u (%u - %d)\n",masked,chrpos - 70,chrpos,70)); - table[--pointers[masked]] = chrpos - 70; - } + masked = EXTRACT(_masked,0); + INCR_COUNT(counts[masked]); + debug(printf("15 %04X => %d\n",masked,counts[masked])); - masked = array[67]; - if (counts[masked]) { - assert(pointers[masked] > positions[masked]); - debug(printf("Storing masked %u at %u (%u - %d)\n",masked,chrpos - 71,chrpos,71)); - table[--pointers[masked]] = chrpos - 71; - } + masked = EXTRACT(_masked,1); + INCR_COUNT(counts[masked]); + debug(printf("14 %04X => %d\n",masked,counts[masked])); - masked = array[59]; - if (counts[masked]) { - assert(pointers[masked] > positions[masked]); - debug(printf("Storing masked %u at %u (%u - %d)\n",masked,chrpos - 72,chrpos,72)); - table[--pointers[masked]] = chrpos - 72; - } + masked = EXTRACT(_masked,2); + INCR_COUNT(counts[masked]); + debug(printf("13 %04X => %d\n",masked,counts[masked])); - masked = array[51]; - if (counts[masked]) { - assert(pointers[masked] > positions[masked]); - debug(printf("Storing masked %u at %u (%u - %d)\n",masked,chrpos - 73,chrpos,73)); - table[--pointers[masked]] = chrpos - 73; - } + masked = EXTRACT(_masked,3); + INCR_COUNT(counts[masked]); + debug(printf("12 %04X => %d\n",masked,counts[masked])); - masked = array[43]; - if (counts[masked]) { - assert(pointers[masked] > positions[masked]); - debug(printf("Storing masked %u at %u (%u - %d)\n",masked,chrpos - 74,chrpos,74)); - table[--pointers[masked]] = chrpos - 74; - } - masked = array[35]; - if (counts[masked]) { - assert(pointers[masked] > positions[masked]); - debug(printf("Storing masked %u at %u (%u - %d)\n",masked,chrpos - 75,chrpos,75)); - table[--pointers[masked]] = chrpos - 75; - } + _oligo = _mm_srli_epi32(_oligo, 8); +#ifdef SIMD_MASK_THEN_STORE + _mm_store_si128((__m128i *) _masked,_mm_and_si128(_oligo, mask9)); +#else + _masked = _mm_and_si128(_oligo, mask9); +#endif - masked = array[27]; - if (counts[masked]) { - assert(pointers[masked] > positions[masked]); - debug(printf("Storing masked %u at %u (%u - %d)\n",masked,chrpos - 76,chrpos,76)); - table[--pointers[masked]] = chrpos - 76; - } + masked = EXTRACT(_masked,0); + INCR_COUNT(counts[masked]); + debug(printf("11 %04X => %d\n",masked,counts[masked])); - masked = array[19]; - if (counts[masked]) { - assert(pointers[masked] > positions[masked]); - debug(printf("Storing masked %u at %u (%u - %d)\n",masked,chrpos - 77,chrpos,77)); - table[--pointers[masked]] = chrpos - 77; - } + masked = EXTRACT(_masked,1); + INCR_COUNT(counts[masked]); + debug(printf("10 %04X => %d\n",masked,counts[masked])); - masked = array[11]; - if (counts[masked]) { - assert(pointers[masked] > positions[masked]); - debug(printf("Storing masked %u at %u (%u - %d)\n",masked,chrpos - 78,chrpos,78)); - table[--pointers[masked]] = chrpos - 78; - } + masked = EXTRACT(_masked,2); + INCR_COUNT(counts[masked]); + debug(printf("9 %04X => %d\n",masked,counts[masked])); - masked = array[3]; - if (counts[masked]) { - assert(pointers[masked] > positions[masked]); - debug(printf("Storing masked %u at %u (%u - %d)\n",masked,chrpos - 79,chrpos,79)); - table[--pointers[masked]] = chrpos - 79; - } + masked = EXTRACT(_masked,3); + INCR_COUNT(counts[masked]); + debug(printf("8 %04X => %d\n",masked,counts[masked])); #endif - /* Row 2 */ -#ifdef USE_GATHER - _masked = _mm256_i32gather_epi32((int *) &(array[64+2]),byeights,/*scale*/4); - _counts = _mm256_i32gather_epi32(counts,_masked,/*scale*/4); - assert(EXTRACT256(_counts,0) == counts[array[122]]); - assert(EXTRACT256(_counts,1) == counts[array[114]]); - assert(EXTRACT256(_counts,2) == counts[array[106]]); - assert(EXTRACT256(_counts,3) == counts[array[98]]); - assert(EXTRACT256(_counts,4) == counts[array[90]]); - assert(EXTRACT256(_counts,5) == counts[array[82]]); - assert(EXTRACT256(_counts,6) == counts[array[74]]); - assert(EXTRACT256(_counts,7) == counts[array[66]]); - if (_mm256_testz_si256(_counts,_counts) == 0) { - if (EXTRACT256(_counts,0)) { - debug(printf("Storing masked %u at %u (%u - %d)\n",EXTRACT256(_masked,0),chrpos - 80,chrpos,80)); - table[--pointers[EXTRACT256(_masked,0)]] = chrpos - 80; - } - if (EXTRACT256(_counts,1)) { - debug(printf("Storing masked %u at %u (%u - %d)\n",EXTRACT256(_masked,1),chrpos - 81,chrpos,81)); - table[--pointers[EXTRACT256(_masked,1)]] = chrpos - 81; - } - if (EXTRACT256(_counts,2)) { - debug(printf("Storing masked %u at %u (%u - %d)\n",EXTRACT256(_masked,2),chrpos - 82,chrpos,82)); - table[--pointers[EXTRACT256(_masked,2)]] = chrpos - 82; - } - if (EXTRACT256(_counts,3)) { - debug(printf("Storing masked %u at %u (%u - %d)\n",EXTRACT256(_masked,3),chrpos - 83,chrpos,83)); - table[--pointers[EXTRACT256(_masked,3)]] = chrpos - 83; - } - if (EXTRACT256(_counts,4)) { - debug(printf("Storing masked %u at %u (%u - %d)\n",EXTRACT256(_masked,4),chrpos - 84,chrpos,84)); - table[--pointers[EXTRACT256(_masked,4)]] = chrpos - 84; - } - if (EXTRACT256(_counts,5)) { - debug(printf("Storing masked %u at %u (%u - %d)\n",EXTRACT256(_masked,5),chrpos - 85,chrpos,85)); - table[--pointers[EXTRACT256(_masked,5)]] = chrpos - 85; - } - if (EXTRACT256(_counts,6)) { - debug(printf("Storing masked %u at %u (%u - %d)\n",EXTRACT256(_masked,6),chrpos - 86,chrpos,86)); - table[--pointers[EXTRACT256(_masked,6)]] = chrpos - 86; - } - if (EXTRACT256(_counts,7)) { - debug(printf("Storing masked %u at %u (%u - %d)\n",EXTRACT256(_masked,7),chrpos - 87,chrpos,87)); - table[--pointers[EXTRACT256(_masked,7)]] = chrpos - 87; - } - } +#ifdef INDIVIDUAL_SHIFTS + masked = high_rev & MASK9; /* 7 */ + INCR_COUNT(counts[masked]); + debug(printf("7 %04X => %d\n",masked,counts[masked])); - _masked = _mm256_i32gather_epi32((int *) &(array[2]),byeights,/*scale*/4); - _counts = _mm256_i32gather_epi32(counts,_masked,/*scale*/4); - assert(EXTRACT256(_counts,0) == counts[array[58]]); - assert(EXTRACT256(_counts,1) == counts[array[50]]); - assert(EXTRACT256(_counts,2) == counts[array[42]]); - assert(EXTRACT256(_counts,3) == counts[array[34]]); - assert(EXTRACT256(_counts,4) == counts[array[26]]); - assert(EXTRACT256(_counts,5) == counts[array[18]]); - assert(EXTRACT256(_counts,6) == counts[array[10]]); - assert(EXTRACT256(_counts,7) == counts[array[2]]); - if (_mm256_testz_si256(_counts,_counts) == 0) { - if (EXTRACT256(_counts,0)) { - debug(printf("Storing masked %u at %u (%u - %d)\n",EXTRACT256(_masked,0),chrpos - 88,chrpos,88)); - table[--pointers[EXTRACT256(_masked,0)]] = chrpos - 88; - } - if (EXTRACT256(_counts,1)) { - debug(printf("Storing masked %u at %u (%u - %d)\n",EXTRACT256(_masked,1),chrpos - 89,chrpos,89)); - table[--pointers[EXTRACT256(_masked,1)]] = chrpos - 89; - } - if (EXTRACT256(_counts,2)) { - debug(printf("Storing masked %u at %u (%u - %d)\n",EXTRACT256(_masked,2),chrpos - 90,chrpos,90)); - table[--pointers[EXTRACT256(_masked,2)]] = chrpos - 90; - } - if (EXTRACT256(_counts,3)) { - debug(printf("Storing masked %u at %u (%u - %d)\n",EXTRACT256(_masked,3),chrpos - 91,chrpos,91)); - table[--pointers[EXTRACT256(_masked,3)]] = chrpos - 91; - } - if (EXTRACT256(_counts,4)) { - debug(printf("Storing masked %u at %u (%u - %d)\n",EXTRACT256(_masked,4),chrpos - 92,chrpos,92)); - table[--pointers[EXTRACT256(_masked,4)]] = chrpos - 92; - } - if (EXTRACT256(_counts,5)) { - debug(printf("Storing masked %u at %u (%u - %d)\n",EXTRACT256(_masked,5),chrpos - 93,chrpos,93)); - table[--pointers[EXTRACT256(_masked,5)]] = chrpos - 93; - } - if (EXTRACT256(_counts,6)) { - debug(printf("Storing masked %u at %u (%u - %d)\n",EXTRACT256(_masked,6),chrpos - 94,chrpos,94)); - table[--pointers[EXTRACT256(_masked,6)]] = chrpos - 94; - } - if (EXTRACT256(_counts,7)) { - debug(printf("Storing masked %u at %u (%u - %d)\n",EXTRACT256(_masked,7),chrpos - 95,chrpos,95)); - table[--pointers[EXTRACT256(_masked,7)]] = chrpos - 95; - } - } -#else - masked = array[122]; - if (counts[masked]) { - assert(pointers[masked] > positions[masked]); - debug(printf("Storing masked %u at %u (%u - %d)\n",masked,chrpos - 80,chrpos,80)); - table[--pointers[masked]] = chrpos - 80; - } + masked = (high_rev >> 2) & MASK9; /* 6 */ + INCR_COUNT(counts[masked]); + debug(printf("6 %04X => %d\n",masked,counts[masked])); - masked = array[114]; - if (counts[masked]) { - assert(pointers[masked] > positions[masked]); - debug(printf("Storing masked %u at %u (%u - %d)\n",masked,chrpos - 81,chrpos,81)); - table[--pointers[masked]] = chrpos - 81; - } + masked = (high_rev >> 4) & MASK9; /* 5 */ + INCR_COUNT(counts[masked]); + debug(printf("5 %04X => %d\n",masked,counts[masked])); - masked = array[106]; - if (counts[masked]) { - assert(pointers[masked] > positions[masked]); - debug(printf("Storing masked %u at %u (%u - %d)\n",masked,chrpos - 82,chrpos,82)); - table[--pointers[masked]] = chrpos - 82; - } + masked = (high_rev >> 6) & MASK9; /* 4 */ + INCR_COUNT(counts[masked]); + debug(printf("4 %04X => %d\n",masked,counts[masked])); - masked = array[98]; - if (counts[masked]) { - assert(pointers[masked] > positions[masked]); - debug(printf("Storing masked %u at %u (%u - %d)\n",masked,chrpos - 83,chrpos,83)); - table[--pointers[masked]] = chrpos - 83; - } + masked = (high_rev >> 8) & MASK9; /* 3 */ + INCR_COUNT(counts[masked]); + debug(printf("3 %04X => %d\n",masked,counts[masked])); - masked = array[90]; - if (counts[masked]) { - assert(pointers[masked] > positions[masked]); - debug(printf("Storing masked %u at %u (%u - %d)\n",masked,chrpos - 84,chrpos,84)); - table[--pointers[masked]] = chrpos - 84; - } + masked = (high_rev >> 10) & MASK9; /* 2 */ + INCR_COUNT(counts[masked]); + debug(printf("2 %04X => %d\n",masked,counts[masked])); - masked = array[82]; - if (counts[masked]) { - assert(pointers[masked] > positions[masked]); - debug(printf("Storing masked %u at %u (%u - %d)\n",masked,chrpos - 85,chrpos,85)); - table[--pointers[masked]] = chrpos - 85; - } + masked = (high_rev >> 12) & MASK9; /* 1 */ + INCR_COUNT(counts[masked]); + debug(printf("1 %04X => %d\n",masked,counts[masked])); - masked = array[74]; - if (counts[masked]) { - assert(pointers[masked] > positions[masked]); - debug(printf("Storing masked %u at %u (%u - %d)\n",masked,chrpos - 86,chrpos,86)); - table[--pointers[masked]] = chrpos - 86; - } + masked = high_rev >> 14; /* 0, No mask necessary */ + INCR_COUNT(counts[masked]); + debug(printf("0 %04X => %d\n",masked,counts[masked])); - masked = array[66]; - if (counts[masked]) { - assert(pointers[masked] > positions[masked]); - debug(printf("Storing masked %u at %u (%u - %d)\n",masked,chrpos - 87,chrpos,87)); - table[--pointers[masked]] = chrpos - 87; - } +#else + _oligo = _mm_setr_epi32(high_rev, high_rev >> 2, high_rev >> 4, high_rev >> 6); +#ifdef SIMD_MASK_THEN_STORE + _mm_store_si128((__m128i *) _masked,_mm_and_si128(_oligo, mask9)); +#else + _masked = _mm_and_si128(_oligo, mask9); +#endif - masked = array[58]; - if (counts[masked]) { - assert(pointers[masked] > positions[masked]); - debug(printf("Storing masked %u at %u (%u - %d)\n",masked,chrpos - 88,chrpos,88)); - table[--pointers[masked]] = chrpos - 88; - } + masked = EXTRACT(_masked,0); + INCR_COUNT(counts[masked]); + debug(printf("7 %04X => %d\n",masked,counts[masked])); - masked = array[50]; - if (counts[masked]) { - assert(pointers[masked] > positions[masked]); - debug(printf("Storing masked %u at %u (%u - %d)\n",masked,chrpos - 89,chrpos,89)); - table[--pointers[masked]] = chrpos - 89; - } + masked = EXTRACT(_masked,1); + INCR_COUNT(counts[masked]); + debug(printf("6 %04X => %d\n",masked,counts[masked])); - masked = array[42]; - if (counts[masked]) { - assert(pointers[masked] > positions[masked]); - debug(printf("Storing masked %u at %u (%u - %d)\n",masked,chrpos - 90,chrpos,90)); - table[--pointers[masked]] = chrpos - 90; - } + masked = EXTRACT(_masked,2); + INCR_COUNT(counts[masked]); + debug(printf("5 %04X => %d\n",masked,counts[masked])); - masked = array[34]; - if (counts[masked]) { - assert(pointers[masked] > positions[masked]); - debug(printf("Storing masked %u at %u (%u - %d)\n",masked,chrpos - 91,chrpos,91)); - table[--pointers[masked]] = chrpos - 91; - } + masked = EXTRACT(_masked,3); + INCR_COUNT(counts[masked]); + debug(printf("4 %04X => %d\n",masked,counts[masked])); - masked = array[26]; - if (counts[masked]) { - assert(pointers[masked] > positions[masked]); - debug(printf("Storing masked %u at %u (%u - %d)\n",masked,chrpos - 92,chrpos,92)); - table[--pointers[masked]] = chrpos - 92; - } - masked = array[18]; - if (counts[masked]) { - assert(pointers[masked] > positions[masked]); - debug(printf("Storing masked %u at %u (%u - %d)\n",masked,chrpos - 93,chrpos,93)); - table[--pointers[masked]] = chrpos - 93; - } + _oligo = _mm_srli_epi32(_oligo, 8); +#ifdef SIMD_MASK_THEN_STORE + _mm_store_si128((__m128i *) _masked,_mm_and_si128(_oligo, mask9)); +#else + _masked = _mm_and_si128(_oligo, mask9); +#endif - masked = array[10]; - if (counts[masked]) { - assert(pointers[masked] > positions[masked]); - debug(printf("Storing masked %u at %u (%u - %d)\n",masked,chrpos - 94,chrpos,94)); - table[--pointers[masked]] = chrpos - 94; - } + masked = EXTRACT(_masked,0); + INCR_COUNT(counts[masked]); + debug(printf("3 %04X => %d\n",masked,counts[masked])); - masked = array[2]; - if (counts[masked]) { - assert(pointers[masked] > positions[masked]); - debug(printf("Storing masked %u at %u (%u - %d)\n",masked,chrpos - 95,chrpos,95)); - table[--pointers[masked]] = chrpos - 95; - } + masked = EXTRACT(_masked,1); + INCR_COUNT(counts[masked]); + debug(printf("2 %04X => %d\n",masked,counts[masked])); + + masked = EXTRACT(_masked,2); + INCR_COUNT(counts[masked]); + debug(printf("1 %04X => %d\n",masked,counts[masked])); + + masked = EXTRACT(_masked,3); + INCR_COUNT(counts[masked]); + debug(printf("0 %04X => %d\n",masked,counts[masked])); #endif + return; +} - /* Row 1 */ -#ifdef USE_GATHER - _masked = _mm256_i32gather_epi32((int *) &(array[64+1]),byeights,/*scale*/4); - _counts = _mm256_i32gather_epi32(counts,_masked,/*scale*/4); - assert(EXTRACT256(_counts,0) == counts[array[121]]); - assert(EXTRACT256(_counts,1) == counts[array[113]]); - assert(EXTRACT256(_counts,2) == counts[array[105]]); - assert(EXTRACT256(_counts,3) == counts[array[97]]); - assert(EXTRACT256(_counts,4) == counts[array[89]]); - assert(EXTRACT256(_counts,5) == counts[array[81]]); - assert(EXTRACT256(_counts,6) == counts[array[73]]); - assert(EXTRACT256(_counts,7) == counts[array[65]]); - if (_mm256_testz_si256(_counts,_counts) == 0) { - if (EXTRACT256(_counts,0)) { - debug(printf("Storing masked %u at %u (%u - %d)\n",EXTRACT256(_masked,0),chrpos - 96,chrpos,96)); - table[--pointers[EXTRACT256(_masked,0)]] = chrpos - 96; - } - if (EXTRACT256(_counts,1)) { - debug(printf("Storing masked %u at %u (%u - %d)\n",EXTRACT256(_masked,1),chrpos - 97,chrpos,97)); - table[--pointers[EXTRACT256(_masked,1)]] = chrpos - 97; - } - if (EXTRACT256(_counts,2)) { - debug(printf("Storing masked %u at %u (%u - %d)\n",EXTRACT256(_masked,2),chrpos - 98,chrpos,98)); - table[--pointers[EXTRACT256(_masked,2)]] = chrpos - 98; - } - if (EXTRACT256(_counts,3)) { - debug(printf("Storing masked %u at %u (%u - %d)\n",EXTRACT256(_masked,3),chrpos - 99,chrpos,99)); - table[--pointers[EXTRACT256(_masked,3)]] = chrpos - 99; - } - if (EXTRACT256(_counts,4)) { - debug(printf("Storing masked %u at %u (%u - %d)\n",EXTRACT256(_masked,4),chrpos - 100,chrpos,100)); - table[--pointers[EXTRACT256(_masked,4)]] = chrpos - 100; - } - if (EXTRACT256(_counts,5)) { - debug(printf("Storing masked %u at %u (%u - %d)\n",EXTRACT256(_masked,5),chrpos - 101,chrpos,101)); - table[--pointers[EXTRACT256(_masked,5)]] = chrpos - 101; - } - if (EXTRACT256(_counts,6)) { - debug(printf("Storing masked %u at %u (%u - %d)\n",EXTRACT256(_masked,6),chrpos - 102,chrpos,102)); - table[--pointers[EXTRACT256(_masked,6)]] = chrpos - 102; - } - if (EXTRACT256(_counts,7)) { - debug(printf("Storing masked %u at %u (%u - %d)\n",EXTRACT256(_masked,7),chrpos - 103,chrpos,103)); - table[--pointers[EXTRACT256(_masked,7)]] = chrpos - 103; - } - } - - _masked = _mm256_i32gather_epi32((int *) &(array[1]),byeights,/*scale*/4); - _counts = _mm256_i32gather_epi32(counts,_masked,/*scale*/4); - assert(EXTRACT256(_counts,0) == counts[array[57]]); - assert(EXTRACT256(_counts,1) == counts[array[49]]); - assert(EXTRACT256(_counts,2) == counts[array[41]]); - assert(EXTRACT256(_counts,3) == counts[array[33]]); - assert(EXTRACT256(_counts,4) == counts[array[25]]); - assert(EXTRACT256(_counts,5) == counts[array[17]]); - assert(EXTRACT256(_counts,6) == counts[array[9]]); - assert(EXTRACT256(_counts,7) == counts[array[1]]); - if (_mm256_testz_si256(_counts,_counts) == 0) { - if (EXTRACT256(_counts,0)) { - debug(printf("Storing masked %u at %u (%u - %d)\n",EXTRACT256(_masked,0),chrpos - 104,chrpos,104)); - table[--pointers[EXTRACT256(_masked,0)]] = chrpos - 104; - } - if (EXTRACT256(_counts,1)) { - debug(printf("Storing masked %u at %u (%u - %d)\n",EXTRACT256(_masked,1),chrpos - 105,chrpos,105)); - table[--pointers[EXTRACT256(_masked,1)]] = chrpos - 105; - } - if (EXTRACT256(_counts,2)) { - debug(printf("Storing masked %u at %u (%u - %d)\n",EXTRACT256(_masked,2),chrpos - 106,chrpos,106)); - table[--pointers[EXTRACT256(_masked,2)]] = chrpos - 106; - } - if (EXTRACT256(_counts,3)) { - debug(printf("Storing masked %u at %u (%u - %d)\n",EXTRACT256(_masked,3),chrpos - 107,chrpos,107)); - table[--pointers[EXTRACT256(_masked,3)]] = chrpos - 107; - } - if (EXTRACT256(_counts,4)) { - debug(printf("Storing masked %u at %u (%u - %d)\n",EXTRACT256(_masked,4),chrpos - 108,chrpos,108)); - table[--pointers[EXTRACT256(_masked,4)]] = chrpos - 108; - } - if (EXTRACT256(_counts,5)) { - debug(printf("Storing masked %u at %u (%u - %d)\n",EXTRACT256(_masked,5),chrpos - 109,chrpos,109)); - table[--pointers[EXTRACT256(_masked,5)]] = chrpos - 109; - } - if (EXTRACT256(_counts,6)) { - debug(printf("Storing masked %u at %u (%u - %d)\n",EXTRACT256(_masked,6),chrpos - 110,chrpos,110)); - table[--pointers[EXTRACT256(_masked,6)]] = chrpos - 110; - } - if (EXTRACT256(_counts,7)) { - debug(printf("Storing masked %u at %u (%u - %d)\n",EXTRACT256(_masked,7),chrpos - 111,chrpos,111)); - table[--pointers[EXTRACT256(_masked,7)]] = chrpos - 111; - } - } #else - masked = array[121]; - if (counts[masked]) { - assert(pointers[masked] > positions[masked]); - debug(printf("Storing masked %u at %u (%u - %d)\n",masked,chrpos - 96,chrpos,96)); - table[--pointers[masked]] = chrpos - 96; - } - masked = array[113]; - if (counts[masked]) { - assert(pointers[masked] > positions[masked]); - debug(printf("Storing masked %u at %u (%u - %d)\n",masked,chrpos - 97,chrpos,97)); - table[--pointers[masked]] = chrpos - 97; - } +static void +count_9mers_fwd_32 (Count_T *counts, Genomecomp_T high_rev, Genomecomp_T low_rev, Genomecomp_T nexthigh_rev) { + Genomecomp_T masked, oligo; + __m256i _oligo, _masked; - masked = array[105]; - if (counts[masked]) { - assert(pointers[masked] > positions[masked]); - debug(printf("Storing masked %u at %u (%u - %d)\n",masked,chrpos - 98,chrpos,98)); - table[--pointers[masked]] = chrpos - 98; - } - masked = array[97]; - if (counts[masked]) { - assert(pointers[masked] > positions[masked]); - debug(printf("Storing masked %u at %u (%u - %d)\n",masked,chrpos - 99,chrpos,99)); - table[--pointers[masked]] = chrpos - 99; - } + oligo = nexthigh_rev >> 16; /* For 31..24 */ + oligo |= low_rev << 16; - masked = array[89]; - if (counts[masked]) { - assert(pointers[masked] > positions[masked]); - debug(printf("Storing masked %u at %u (%u - %d)\n",masked,chrpos - 100,chrpos,100)); - table[--pointers[masked]] = chrpos - 100; - } + _oligo = _mm256_srlv_epi32(_mm256_set1_epi32(oligo),bigshift0to14); + _masked = _mm256_and_si256(_oligo, bigmask9); - masked = array[81]; - if (counts[masked]) { - assert(pointers[masked] > positions[masked]); - debug(printf("Storing masked %u at %u (%u - %d)\n",masked,chrpos - 101,chrpos,101)); - table[--pointers[masked]] = chrpos - 101; - } + masked = EXTRACT256(_masked,0); + INCR_COUNT(counts[masked]); + debug(printf("31 %04X => %d\n",masked,counts[masked])); - masked = array[73]; - if (counts[masked]) { - assert(pointers[masked] > positions[masked]); - debug(printf("Storing masked %u at %u (%u - %d)\n",masked,chrpos - 102,chrpos,102)); - table[--pointers[masked]] = chrpos - 102; - } + masked = EXTRACT256(_masked,1); + INCR_COUNT(counts[masked]); + debug(printf("30 %04X => %d\n",masked,counts[masked])); - masked = array[65]; - if (counts[masked]) { - assert(pointers[masked] > positions[masked]); - debug(printf("Storing masked %u at %u (%u - %d)\n",masked,chrpos - 103,chrpos,103)); - table[--pointers[masked]] = chrpos - 103; - } + masked = EXTRACT256(_masked,2); + INCR_COUNT(counts[masked]); + debug(printf("29 %04X => %d\n",masked,counts[masked])); - masked = array[57]; - if (counts[masked]) { - assert(pointers[masked] > positions[masked]); - debug(printf("Storing masked %u at %u (%u - %d)\n",masked,chrpos - 104,chrpos,104)); - table[--pointers[masked]] = chrpos - 104; - } + masked = EXTRACT256(_masked,3); + INCR_COUNT(counts[masked]); + debug(printf("28 %04X => %d\n",masked,counts[masked])); - masked = array[49]; - if (counts[masked]) { - assert(pointers[masked] > positions[masked]); - debug(printf("Storing masked %u at %u (%u - %d)\n",masked,chrpos - 105,chrpos,105)); - table[--pointers[masked]] = chrpos - 105; - } + masked = EXTRACT256(_masked,4); + INCR_COUNT(counts[masked]); + debug(printf("27 %04X => %d\n",masked,counts[masked])); - masked = array[41]; - if (counts[masked]) { - assert(pointers[masked] > positions[masked]); - debug(printf("Storing masked %u at %u (%u - %d)\n",masked,chrpos - 106,chrpos,106)); - table[--pointers[masked]] = chrpos - 106; - } + masked = EXTRACT256(_masked,5); + INCR_COUNT(counts[masked]); + debug(printf("26 %04X => %d\n",masked,counts[masked])); - masked = array[33]; - if (counts[masked]) { - assert(pointers[masked] > positions[masked]); - debug(printf("Storing masked %u at %u (%u - %d)\n",masked,chrpos - 107,chrpos,107)); - table[--pointers[masked]] = chrpos - 107; - } + masked = EXTRACT256(_masked,6); + INCR_COUNT(counts[masked]); + debug(printf("25 %04X => %d\n",masked,counts[masked])); - masked = array[25]; - if (counts[masked]) { - assert(pointers[masked] > positions[masked]); - debug(printf("Storing masked %u at %u (%u - %d)\n",masked,chrpos - 108,chrpos,108)); - table[--pointers[masked]] = chrpos - 108; - } + masked = EXTRACT256(_masked,7); + INCR_COUNT(counts[masked]); + debug(printf("24 %04X => %d\n",masked,counts[masked])); - masked = array[17]; - if (counts[masked]) { - assert(pointers[masked] > positions[masked]); - debug(printf("Storing masked %u at %u (%u - %d)\n",masked,chrpos - 109,chrpos,109)); - table[--pointers[masked]] = chrpos - 109; - } - masked = array[9]; - if (counts[masked]) { - assert(pointers[masked] > positions[masked]); - debug(printf("Storing masked %u at %u (%u - %d)\n",masked,chrpos - 110,chrpos,110)); - table[--pointers[masked]] = chrpos - 110; - } + _oligo = _mm256_srlv_epi32(_mm256_set1_epi32(low_rev),bigshift0to14); + _masked = _mm256_and_si256(_oligo, bigmask9); - masked = array[1]; - if (counts[masked]) { - assert(pointers[masked] > positions[masked]); - debug(printf("Storing masked %u at %u (%u - %d)\n",masked,chrpos - 111,chrpos,111)); - table[--pointers[masked]] = chrpos - 111; - } -#endif + masked = EXTRACT256(_masked,0); + INCR_COUNT(counts[masked]); + debug(printf("23 %04X => %d\n",masked,counts[masked])); + masked = EXTRACT256(_masked,1); + INCR_COUNT(counts[masked]); + debug(printf("22 %04X => %d\n",masked,counts[masked])); - /* Row 0 */ -#ifdef USE_GATHER - _masked = _mm256_i32gather_epi32((int *) &(array[64+0]),byeights,/*scale*/4); - _counts = _mm256_i32gather_epi32(counts,_masked,/*scale*/4); - assert(EXTRACT256(_counts,0) == counts[array[120]]); - assert(EXTRACT256(_counts,1) == counts[array[112]]); - assert(EXTRACT256(_counts,2) == counts[array[104]]); - assert(EXTRACT256(_counts,3) == counts[array[96]]); - assert(EXTRACT256(_counts,4) == counts[array[88]]); - assert(EXTRACT256(_counts,5) == counts[array[80]]); - assert(EXTRACT256(_counts,6) == counts[array[72]]); - assert(EXTRACT256(_counts,7) == counts[array[64]]); - if (_mm256_testz_si256(_counts,_counts) == 0) { - if (EXTRACT256(_counts,0)) { - debug(printf("Storing masked %u at %u (%u - %d)\n",EXTRACT256(_masked,0),chrpos - 112,chrpos,112)); - table[--pointers[EXTRACT256(_masked,0)]] = chrpos - 112; - } - if (EXTRACT256(_counts,1)) { - debug(printf("Storing masked %u at %u (%u - %d)\n",EXTRACT256(_masked,1),chrpos - 113,chrpos,113)); - table[--pointers[EXTRACT256(_masked,1)]] = chrpos - 113; - } - if (EXTRACT256(_counts,2)) { - debug(printf("Storing masked %u at %u (%u - %d)\n",EXTRACT256(_masked,2),chrpos - 114,chrpos,114)); - table[--pointers[EXTRACT256(_masked,2)]] = chrpos - 114; - } - if (EXTRACT256(_counts,3)) { - debug(printf("Storing masked %u at %u (%u - %d)\n",EXTRACT256(_masked,3),chrpos - 115,chrpos,115)); - table[--pointers[EXTRACT256(_masked,3)]] = chrpos - 115; - } - if (EXTRACT256(_counts,4)) { - debug(printf("Storing masked %u at %u (%u - %d)\n",EXTRACT256(_masked,4),chrpos - 116,chrpos,116)); - table[--pointers[EXTRACT256(_masked,4)]] = chrpos - 116; - } - if (EXTRACT256(_counts,5)) { - debug(printf("Storing masked %u at %u (%u - %d)\n",EXTRACT256(_masked,5),chrpos - 117,chrpos,117)); - table[--pointers[EXTRACT256(_masked,5)]] = chrpos - 117; - } - if (EXTRACT256(_counts,6)) { - debug(printf("Storing masked %u at %u (%u - %d)\n",EXTRACT256(_masked,6),chrpos - 118,chrpos,118)); - table[--pointers[EXTRACT256(_masked,6)]] = chrpos - 118; - } - if (EXTRACT256(_counts,7)) { - debug(printf("Storing masked %u at %u (%u - %d)\n",EXTRACT256(_masked,7),chrpos - 119,chrpos,119)); - table[--pointers[EXTRACT256(_masked,7)]] = chrpos - 119; - } - } + masked = EXTRACT256(_masked,2); + INCR_COUNT(counts[masked]); + debug(printf("21 %04X => %d\n",masked,counts[masked])); - _masked = _mm256_i32gather_epi32((int *) &(array[0]),byeights,/*scale*/4); - _counts = _mm256_i32gather_epi32(counts,_masked,/*scale*/4); - assert(EXTRACT256(_counts,0) == counts[array[56]]); - assert(EXTRACT256(_counts,1) == counts[array[48]]); - assert(EXTRACT256(_counts,2) == counts[array[40]]); - assert(EXTRACT256(_counts,3) == counts[array[32]]); - assert(EXTRACT256(_counts,4) == counts[array[24]]); - assert(EXTRACT256(_counts,5) == counts[array[16]]); - assert(EXTRACT256(_counts,6) == counts[array[8]]); - assert(EXTRACT256(_counts,7) == counts[array[0]]); - if (_mm256_testz_si256(_counts,_counts) == 0) { - if (EXTRACT256(_counts,0)) { - debug(printf("Storing masked %u at %u (%u - %d)\n",EXTRACT256(_masked,0),chrpos - 120,chrpos,120)); - table[--pointers[EXTRACT256(_masked,0)]] = chrpos - 120; - } - if (EXTRACT256(_counts,1)) { - debug(printf("Storing masked %u at %u (%u - %d)\n",EXTRACT256(_masked,1),chrpos - 121,chrpos,121)); - table[--pointers[EXTRACT256(_masked,1)]] = chrpos - 121; - } - if (EXTRACT256(_counts,2)) { - debug(printf("Storing masked %u at %u (%u - %d)\n",EXTRACT256(_masked,2),chrpos - 122,chrpos,122)); - table[--pointers[EXTRACT256(_masked,2)]] = chrpos - 122; - } - if (EXTRACT256(_counts,3)) { - debug(printf("Storing masked %u at %u (%u - %d)\n",EXTRACT256(_masked,3),chrpos - 123,chrpos,123)); - table[--pointers[EXTRACT256(_masked,3)]] = chrpos - 123; - } - if (EXTRACT256(_counts,4)) { - debug(printf("Storing masked %u at %u (%u - %d)\n",EXTRACT256(_masked,4),chrpos - 124,chrpos,124)); - table[--pointers[EXTRACT256(_masked,4)]] = chrpos - 124; - } - if (EXTRACT256(_counts,5)) { - debug(printf("Storing masked %u at %u (%u - %d)\n",EXTRACT256(_masked,5),chrpos - 125,chrpos,125)); - table[--pointers[EXTRACT256(_masked,5)]] = chrpos - 125; - } - if (EXTRACT256(_counts,6)) { - debug(printf("Storing masked %u at %u (%u - %d)\n",EXTRACT256(_masked,6),chrpos - 126,chrpos,126)); - table[--pointers[EXTRACT256(_masked,6)]] = chrpos - 126; - } - if (EXTRACT256(_counts,7)) { - debug(printf("Storing masked %u at %u (%u - %d)\n",EXTRACT256(_masked,7),chrpos - 127,chrpos,127)); - table[--pointers[EXTRACT256(_masked,7)]] = chrpos - 127; - } - } -#else - masked = array[120]; - if (counts[masked]) { - assert(pointers[masked] > positions[masked]); - debug(printf("Storing masked %u at %u (%u - %d)\n",masked,chrpos - 112,chrpos,112)); - table[--pointers[masked]] = chrpos - 112; - } + masked = EXTRACT256(_masked,3); + INCR_COUNT(counts[masked]); + debug(printf("20 %04X => %d\n",masked,counts[masked])); - masked = array[112]; - if (counts[masked]) { - assert(pointers[masked] > positions[masked]); - debug(printf("Storing masked %u at %u (%u - %d)\n",masked,chrpos - 113,chrpos,113)); - table[--pointers[masked]] = chrpos - 113; - } + masked = EXTRACT256(_masked,4); + INCR_COUNT(counts[masked]); + debug(printf("19 %04X => %d\n",masked,counts[masked])); - masked = array[104]; - if (counts[masked]) { - assert(pointers[masked] > positions[masked]); - debug(printf("Storing masked %u at %u (%u - %d)\n",masked,chrpos - 114,chrpos,114)); - table[--pointers[masked]] = chrpos - 114; - } + masked = EXTRACT256(_masked,5); + INCR_COUNT(counts[masked]); + debug(printf("18 %04X => %d\n",masked,counts[masked])); - masked = array[96]; - if (counts[masked]) { - assert(pointers[masked] > positions[masked]); - debug(printf("Storing masked %u at %u (%u - %d)\n",masked,chrpos - 115,chrpos,115)); - table[--pointers[masked]] = chrpos - 115; - } + masked = EXTRACT256(_masked,6); + INCR_COUNT(counts[masked]); + debug(printf("17 %04X => %d\n",masked,counts[masked])); - masked = array[88]; - if (counts[masked]) { - assert(pointers[masked] > positions[masked]); - debug(printf("Storing masked %u at %u (%u - %d)\n",masked,chrpos - 116,chrpos,116)); - table[--pointers[masked]] = chrpos - 116; - } + masked = EXTRACT256(_masked,7); + INCR_COUNT(counts[masked]); + debug(printf("16 %04X => %d\n",masked,counts[masked])); - masked = array[80]; - if (counts[masked]) { - assert(pointers[masked] > positions[masked]); - debug(printf("Storing masked %u at %u (%u - %d)\n",masked,chrpos - 117,chrpos,117)); - table[--pointers[masked]] = chrpos - 117; - } - masked = array[72]; - if (counts[masked]) { - assert(pointers[masked] > positions[masked]); - debug(printf("Storing masked %u at %u (%u - %d)\n",masked,chrpos - 118,chrpos,118)); - table[--pointers[masked]] = chrpos - 118; - } + oligo = low_rev >> 16; /* For 15..8 */ + oligo |= high_rev << 16; - masked = array[64]; - if (counts[masked]) { - assert(pointers[masked] > positions[masked]); - debug(printf("Storing masked %u at %u (%u - %d)\n",masked,chrpos - 119,chrpos,119)); - table[--pointers[masked]] = chrpos - 119; - } + _oligo = _mm256_srlv_epi32(_mm256_set1_epi32(oligo),bigshift0to14); + _masked = _mm256_and_si256(_oligo, bigmask9); - masked = array[56]; - if (counts[masked]) { - assert(pointers[masked] > positions[masked]); - debug(printf("Storing masked %u at %u (%u - %d)\n",masked,chrpos - 120,chrpos,120)); - table[--pointers[masked]] = chrpos - 120; - } + masked = EXTRACT256(_masked,0); + INCR_COUNT(counts[masked]); + debug(printf("15 %04X => %d\n",masked,counts[masked])); - masked = array[48]; - if (counts[masked]) { - assert(pointers[masked] > positions[masked]); - debug(printf("Storing masked %u at %u (%u - %d)\n",masked,chrpos - 121,chrpos,121)); - table[--pointers[masked]] = chrpos - 121; - } + masked = EXTRACT256(_masked,1); + INCR_COUNT(counts[masked]); + debug(printf("14 %04X => %d\n",masked,counts[masked])); - masked = array[40]; - if (counts[masked]) { - assert(pointers[masked] > positions[masked]); - debug(printf("Storing masked %u at %u (%u - %d)\n",masked,chrpos - 122,chrpos,122)); - table[--pointers[masked]] = chrpos - 122; - } + masked = EXTRACT256(_masked,2); + INCR_COUNT(counts[masked]); + debug(printf("13 %04X => %d\n",masked,counts[masked])); - masked = array[32]; - if (counts[masked]) { - assert(pointers[masked] > positions[masked]); - debug(printf("Storing masked %u at %u (%u - %d)\n",masked,chrpos - 123,chrpos,123)); - table[--pointers[masked]] = chrpos - 123; - } + masked = EXTRACT256(_masked,3); + INCR_COUNT(counts[masked]); + debug(printf("12 %04X => %d\n",masked,counts[masked])); - masked = array[24]; - if (counts[masked]) { - assert(pointers[masked] > positions[masked]); - debug(printf("Storing masked %u at %u (%u - %d)\n",masked,chrpos - 124,chrpos,124)); - table[--pointers[masked]] = chrpos - 124; - } + masked = EXTRACT256(_masked,4); + INCR_COUNT(counts[masked]); + debug(printf("11 %04X => %d\n",masked,counts[masked])); - masked = array[16]; - if (counts[masked]) { - assert(pointers[masked] > positions[masked]); - debug(printf("Storing masked %u at %u (%u - %d)\n",masked,chrpos - 125,chrpos,125)); - table[--pointers[masked]] = chrpos - 125; - } + masked = EXTRACT256(_masked,5); + INCR_COUNT(counts[masked]); + debug(printf("10 %04X => %d\n",masked,counts[masked])); - masked = array[8]; - if (counts[masked]) { - assert(pointers[masked] > positions[masked]); - debug(printf("Storing masked %u at %u (%u - %d)\n",masked,chrpos - 126,chrpos,126)); - table[--pointers[masked]] = chrpos - 126; - } + masked = EXTRACT256(_masked,6); + INCR_COUNT(counts[masked]); + debug(printf("9 %04X => %d\n",masked,counts[masked])); - masked = array[0]; - if (counts[masked]) { - assert(pointers[masked] > positions[masked]); - debug(printf("Storing masked %u at %u (%u - %d)\n",masked,chrpos - 127,chrpos,127)); - table[--pointers[masked]] = chrpos - 127; - } -#endif + masked = EXTRACT256(_masked,7); + INCR_COUNT(counts[masked]); + debug(printf("8 %04X => %d\n",masked,counts[masked])); - return chrpos - 128; -} -#endif /* HAVE_AVX2 */ + _oligo = _mm256_srlv_epi32(_mm256_set1_epi32(high_rev),bigshift0to14); + _masked = _mm256_and_si256(_oligo, bigmask9); -#endif /* USE_SIMD_FOR_COUNTS */ + masked = EXTRACT256(_masked,0); + INCR_COUNT(counts[masked]); + debug(printf("7 %04X => %d\n",masked,counts[masked])); + masked = EXTRACT256(_masked,1); + INCR_COUNT(counts[masked]); + debug(printf("6 %04X => %d\n",masked,counts[masked])); + masked = EXTRACT256(_masked,2); + INCR_COUNT(counts[masked]); + debug(printf("5 %04X => %d\n",masked,counts[masked])); -#if !defined(HAVE_AVX2) + masked = EXTRACT256(_masked,3); + INCR_COUNT(counts[masked]); + debug(printf("4 %04X => %d\n",masked,counts[masked])); -static void -count_9mers_fwd (Count_T *counts, Inquery_T *inquery, Genomecomp_T high_rev, Genomecomp_T low_rev, Genomecomp_T nexthigh_rev) { - Genomecomp_T masked, oligo; -#ifdef INDIVIDUAL_SHIFTS -#elif defined(SIMD_MASK_THEN_STORE) - UINT4 _masked[4] __attribute__ ((aligned (16))); - __m128i _oligo; -#else - __m128i _oligo, _masked; -#endif + masked = EXTRACT256(_masked,4); + INCR_COUNT(counts[masked]); + debug(printf("3 %04X => %d\n",masked,counts[masked])); + masked = EXTRACT256(_masked,5); + INCR_COUNT(counts[masked]); + debug(printf("2 %04X => %d\n",masked,counts[masked])); - oligo = nexthigh_rev >> 16; /* For 31..24 */ - oligo |= low_rev << 16; + masked = EXTRACT256(_masked,6); + INCR_COUNT(counts[masked]); + debug(printf("1 %04X => %d\n",masked,counts[masked])); -#ifdef INDIVIDUAL_SHIFTS - masked = oligo & MASK9; /* 31 */ - INCR_COUNT(counts[masked],inquery[masked]); - debug(printf("31 %04X => %d\n",masked,counts[masked])); + masked = EXTRACT256(_masked,7); + INCR_COUNT(counts[masked]); + debug(printf("0 %04X => %d\n",masked,counts[masked])); - masked = (oligo >> 2) & MASK9; /* 30 */ - INCR_COUNT(counts[masked],inquery[masked]); - debug(printf("30 %04X => %d\n",masked,counts[masked])); + return; +} - masked = (oligo >> 4) & MASK9; /* 29 */ - INCR_COUNT(counts[masked],inquery[masked]); - debug(printf("29 %04X => %d\n",masked,counts[masked])); +#endif /* HAVE_AVX2 */ - masked = (oligo >> 6) & MASK9; /* 28 */ - INCR_COUNT(counts[masked],inquery[masked]); - debug(printf("28 %04X => %d\n",masked,counts[masked])); - masked = (oligo >> 8) & MASK9; /* 27 */ - INCR_COUNT(counts[masked],inquery[masked]); - debug(printf("27 %04X => %d\n",masked,counts[masked])); +/* Expecting current to have {high0_rev, low0_rev, high1_rev, + low1_rev}, and next to have {low0_rev, high1_rev, low1_rev, and + high2_rev} */ +#ifdef HAVE_SSE2 +static void +extract_9mers_fwd_simd_64 (__m128i *out, __m128i current, __m128i next) { + __m128i oligo; - masked = (oligo >> 10) & MASK9; /* 26 */ - INCR_COUNT(counts[masked],inquery[masked]); - debug(printf("26 %04X => %d\n",masked,counts[masked])); + _mm_store_si128(out++, _mm_srli_epi32(current,14)); /* No mask necessary */ + _mm_store_si128(out++, _mm_and_si128( _mm_srli_epi32(current,12), mask9)); + _mm_store_si128(out++, _mm_and_si128( _mm_srli_epi32(current,10), mask9)); + _mm_store_si128(out++, _mm_and_si128( _mm_srli_epi32(current,8), mask9)); + _mm_store_si128(out++, _mm_and_si128( _mm_srli_epi32(current,6), mask9)); + _mm_store_si128(out++, _mm_and_si128( _mm_srli_epi32(current,4), mask9)); + _mm_store_si128(out++, _mm_and_si128( _mm_srli_epi32(current,2), mask9)); + _mm_store_si128(out++, _mm_and_si128( current, mask9)); - masked = (oligo >> 12) & MASK9; /* 25 */ - INCR_COUNT(counts[masked],inquery[masked]); - debug(printf("25 %04X => %d\n",masked,counts[masked])); + oligo = _mm_or_si128( _mm_srli_epi32(next,16), _mm_slli_epi32(current,16)); + _mm_store_si128(out++, _mm_and_si128( _mm_srli_epi32(oligo,14), mask9)); + _mm_store_si128(out++, _mm_and_si128( _mm_srli_epi32(oligo,12), mask9)); + _mm_store_si128(out++, _mm_and_si128( _mm_srli_epi32(oligo,10), mask9)); + _mm_store_si128(out++, _mm_and_si128( _mm_srli_epi32(oligo,8), mask9)); + _mm_store_si128(out++, _mm_and_si128( _mm_srli_epi32(oligo,6), mask9)); + _mm_store_si128(out++, _mm_and_si128( _mm_srli_epi32(oligo,4), mask9)); + _mm_store_si128(out++, _mm_and_si128( _mm_srli_epi32(oligo,2), mask9)); + _mm_store_si128(out++, _mm_and_si128( oligo, mask9)); - masked = (oligo >> 14) & MASK9; /* 24 */ - INCR_COUNT(counts[masked],inquery[masked]); - debug(printf("24 %04X => %d\n",masked,counts[masked])); + return; +} + +#ifdef USE_UNORDERED_9 +static Chrpos_T +store_9mers_fwd_simd_64 (Chrpos_T chrpos, Chrpos_T *table, UINT4 *positions, Count_T *counts, + __m128i current, __m128i next) { + __m128i array[16]; + + extract_9mers_fwd_simd_64(array,current,next); + return store_fwdrev_simd_64(chrpos,table,positions,counts,(UINT4 *) array); +} #else - _oligo = _mm_setr_epi32(oligo, oligo >> 2, oligo >> 4, oligo >> 6); -#ifdef SIMD_MASK_THEN_STORE - _mm_store_si128((__m128i *) _masked,_mm_and_si128(_oligo, mask9)); -#else - _masked = _mm_and_si128(_oligo, mask9); -#endif +/* Includes extract_9mers_fwd_simd_64_ordered (__m128i *out, __m128i current, __m128i next) */ +static Chrpos_T +store_9mers_fwd_simd_64 (Chrpos_T chrpos, Chrpos_T *table, UINT4 *positions, Count_T *counts, + __m128i current, __m128i next) { + __m128i array[16], *out; + __m128i oligo; + __m128i _row0, _row1, _row2, _row3, _row4, _row5, _row6, _row7, + _row8, _row9, _row10, _row11, _row12, _row13, _row14, _row15; + __m128i _t0, _t1, _t2, _t3, _t4, _t5, _t6, _t7; - masked = EXTRACT(_masked,0); - INCR_COUNT(counts[masked],inquery[masked]); - debug(printf("31 %04X => %d\n",masked,counts[masked])); + out = &(array[0]); - masked = EXTRACT(_masked,1); - INCR_COUNT(counts[masked],inquery[masked]); - debug(printf("30 %04X => %d\n",masked,counts[masked])); + oligo = _mm_or_si128( _mm_srli_epi32(next,16), _mm_slli_epi32(current,16)); + _row0 = _mm_and_si128( oligo, mask9); + _row1 = _mm_and_si128( _mm_srli_epi32(oligo,2), mask9); + _row2 = _mm_and_si128( _mm_srli_epi32(oligo,4), mask9); + _row3 = _mm_and_si128( _mm_srli_epi32(oligo,6), mask9); + _row4 = _mm_and_si128( _mm_srli_epi32(oligo,8), mask9); + _row5 = _mm_and_si128( _mm_srli_epi32(oligo,10), mask9); + _row6 = _mm_and_si128( _mm_srli_epi32(oligo,12), mask9); + _row7 = _mm_and_si128( _mm_srli_epi32(oligo,14), mask9); + + _row8 = _mm_and_si128( current, mask9); + _row9 = _mm_and_si128( _mm_srli_epi32(current,2), mask9); + _row10 = _mm_and_si128( _mm_srli_epi32(current,4), mask9); + _row11 = _mm_and_si128( _mm_srli_epi32(current,6), mask9); + _row12 = _mm_and_si128( _mm_srli_epi32(current,8), mask9); + _row13 = _mm_and_si128( _mm_srli_epi32(current,10), mask9); + _row14 = _mm_and_si128( _mm_srli_epi32(current,12), mask9); + _row15 = _mm_srli_epi32(current,14); /* No mask necessary */ + + + /* Split: top half */ + _t0 = _mm_unpackhi_epi32(_row0,_row1); + _t1 = _mm_unpackhi_epi32(_row2,_row3); + _t2 = _mm_unpackhi_epi32(_row4,_row5); + _t3 = _mm_unpackhi_epi32(_row6,_row7); + _t4 = _mm_unpackhi_epi32(_row8,_row9); + _t5 = _mm_unpackhi_epi32(_row10,_row11); + _t6 = _mm_unpackhi_epi32(_row12,_row13); + _t7 = _mm_unpackhi_epi32(_row14,_row15); + + _mm_store_si128(out++, _mm_unpackhi_epi64(_t0,_t1)); + _mm_store_si128(out++, _mm_unpackhi_epi64(_t2,_t3)); + _mm_store_si128(out++, _mm_unpackhi_epi64(_t4,_t5)); + _mm_store_si128(out++, _mm_unpackhi_epi64(_t6,_t7)); + _mm_store_si128(out++, _mm_unpacklo_epi64(_t0,_t1)); + _mm_store_si128(out++, _mm_unpacklo_epi64(_t2,_t3)); + _mm_store_si128(out++, _mm_unpacklo_epi64(_t4,_t5)); + _mm_store_si128(out++, _mm_unpacklo_epi64(_t6,_t7)); + + + /* Split: bottom half */ + _t0 = _mm_unpacklo_epi32(_row0,_row1); + _t1 = _mm_unpacklo_epi32(_row2,_row3); + _t2 = _mm_unpacklo_epi32(_row4,_row5); + _t3 = _mm_unpacklo_epi32(_row6,_row7); + _t4 = _mm_unpacklo_epi32(_row8,_row9); + _t5 = _mm_unpacklo_epi32(_row10,_row11); + _t6 = _mm_unpacklo_epi32(_row12,_row13); + _t7 = _mm_unpacklo_epi32(_row14,_row15); + + _mm_store_si128(out++, _mm_unpackhi_epi64(_t0,_t1)); + _mm_store_si128(out++, _mm_unpackhi_epi64(_t2,_t3)); + _mm_store_si128(out++, _mm_unpackhi_epi64(_t4,_t5)); + _mm_store_si128(out++, _mm_unpackhi_epi64(_t6,_t7)); + _mm_store_si128(out++, _mm_unpacklo_epi64(_t0,_t1)); + _mm_store_si128(out++, _mm_unpacklo_epi64(_t2,_t3)); + _mm_store_si128(out++, _mm_unpacklo_epi64(_t4,_t5)); + _mm_store_si128(out++, _mm_unpacklo_epi64(_t6,_t7)); - masked = EXTRACT(_masked,2); - INCR_COUNT(counts[masked],inquery[masked]); - debug(printf("29 %04X => %d\n",masked,counts[masked])); + return store_fwdrev_simd_64_ordered(chrpos,table,positions,counts,(UINT4 *) array); +} +#endif +#endif + + +#ifdef HAVE_AVX2 +static void +extract_9mers_fwd_simd_128 (__m256i *out, __m256i current, __m256i next) { + __m256i oligo; + + _mm256_store_si256(out++, _mm256_srli_epi32(current,14)); /* No mask necessary */ + _mm256_store_si256(out++, _mm256_and_si256( _mm256_srli_epi32(current,12), bigmask9)); + _mm256_store_si256(out++, _mm256_and_si256( _mm256_srli_epi32(current,10), bigmask9)); + _mm256_store_si256(out++, _mm256_and_si256( _mm256_srli_epi32(current,8), bigmask9)); + _mm256_store_si256(out++, _mm256_and_si256( _mm256_srli_epi32(current,6), bigmask9)); + _mm256_store_si256(out++, _mm256_and_si256( _mm256_srli_epi32(current,4), bigmask9)); + _mm256_store_si256(out++, _mm256_and_si256( _mm256_srli_epi32(current,2), bigmask9)); + _mm256_store_si256(out++, _mm256_and_si256( current, bigmask9)); + + oligo = _mm256_or_si256( _mm256_srli_epi32(next,16), _mm256_slli_epi32(current,16)); + _mm256_store_si256(out++, _mm256_and_si256( _mm256_srli_epi32(oligo,14), bigmask9)); + _mm256_store_si256(out++, _mm256_and_si256( _mm256_srli_epi32(oligo,12), bigmask9)); + _mm256_store_si256(out++, _mm256_and_si256( _mm256_srli_epi32(oligo,10), bigmask9)); + _mm256_store_si256(out++, _mm256_and_si256( _mm256_srli_epi32(oligo,8), bigmask9)); + _mm256_store_si256(out++, _mm256_and_si256( _mm256_srli_epi32(oligo,6), bigmask9)); + _mm256_store_si256(out++, _mm256_and_si256( _mm256_srli_epi32(oligo,4), bigmask9)); + _mm256_store_si256(out++, _mm256_and_si256( _mm256_srli_epi32(oligo,2), bigmask9)); + _mm256_store_si256(out++, _mm256_and_si256( oligo, bigmask9)); + + return; +} + +#ifdef USE_UNORDERED_9 +static Chrpos_T +store_9mers_fwd_simd_128 (Chrpos_T chrpos, Chrpos_T *table, UINT4 *positions, Count_T *counts, + __m256i current, __m256i next) { + __m256i array[16]; + + extract_9mers_fwd_simd_128(array,current,next); + return store_fwdrev_simd_128(chrpos,table,positions,counts,(UINT4 *) array); +} + +#else +/* Includes extract_9mers_fwd_simd_128_ordered (__m256i *out, __m256i current, __m256i next) */ +static Chrpos_T +store_9mers_fwd_simd_128 (Chrpos_T chrpos, Chrpos_T *table, UINT4 *positions, Count_T *counts, + __m256i current, __m256i next) { + __m256i array[16], *out; + __m256i oligo; + __m256i _row0, _row1, _row2, _row3, _row4, _row5, _row6, _row7, + _row8, _row9, _row10, _row11, _row12, _row13, _row14, _row15; + __m256i _t0, _t1, _t2, _t3, _t4, _t5, _t6, _t7; + __m256i _u0, _u1, _u2, _u3, _u4, _u5, _u6, _u7; + + out = &(array[0]); + + oligo = _mm256_or_si256( _mm256_srli_epi32(next,16), _mm256_slli_epi32(current,16)); + _row0 = _mm256_and_si256( oligo, bigmask9); + _row1 = _mm256_and_si256( _mm256_srli_epi32(oligo,2), bigmask9); + _row2 = _mm256_and_si256( _mm256_srli_epi32(oligo,4), bigmask9); + _row3 = _mm256_and_si256( _mm256_srli_epi32(oligo,6), bigmask9); + _row4 = _mm256_and_si256( _mm256_srli_epi32(oligo,8), bigmask9); + _row5 = _mm256_and_si256( _mm256_srli_epi32(oligo,10), bigmask9); + _row6 = _mm256_and_si256( _mm256_srli_epi32(oligo,12), bigmask9); + _row7 = _mm256_and_si256( _mm256_srli_epi32(oligo,14), bigmask9); + + _row8 = _mm256_and_si256( current, bigmask9); + _row9 = _mm256_and_si256( _mm256_srli_epi32(current,2), bigmask9); + _row10 = _mm256_and_si256( _mm256_srli_epi32(current,4), bigmask9); + _row11 = _mm256_and_si256( _mm256_srli_epi32(current,6), bigmask9); + _row12 = _mm256_and_si256( _mm256_srli_epi32(current,8), bigmask9); + _row13 = _mm256_and_si256( _mm256_srli_epi32(current,10), bigmask9); + _row14 = _mm256_and_si256( _mm256_srli_epi32(current,12), bigmask9); + _row15 = _mm256_srli_epi32(current,14); + + + /* Split: top half */ + _t0 = _mm256_unpackhi_epi32(_row0,_row1); + _t1 = _mm256_unpackhi_epi32(_row2,_row3); + _t2 = _mm256_unpackhi_epi32(_row4,_row5); + _t3 = _mm256_unpackhi_epi32(_row6,_row7); + _t4 = _mm256_unpackhi_epi32(_row8,_row9); + _t5 = _mm256_unpackhi_epi32(_row10,_row11); + _t6 = _mm256_unpackhi_epi32(_row12,_row13); + _t7 = _mm256_unpackhi_epi32(_row14,_row15); + + _u0 = _mm256_unpackhi_epi64(_t0,_t1); + _u1 = _mm256_unpackhi_epi64(_t2,_t3); + _u2 = _mm256_unpackhi_epi64(_t4,_t5); + _u3 = _mm256_unpackhi_epi64(_t6,_t7); + _u4 = _mm256_unpacklo_epi64(_t0,_t1); + _u5 = _mm256_unpacklo_epi64(_t2,_t3); + _u6 = _mm256_unpacklo_epi64(_t4,_t5); + _u7 = _mm256_unpacklo_epi64(_t6,_t7); + + /* Split: bottom half */ + _t0 = _mm256_unpacklo_epi32(_row0,_row1); + _t1 = _mm256_unpacklo_epi32(_row2,_row3); + _t2 = _mm256_unpacklo_epi32(_row4,_row5); + _t3 = _mm256_unpacklo_epi32(_row6,_row7); + _t4 = _mm256_unpacklo_epi32(_row8,_row9); + _t5 = _mm256_unpacklo_epi32(_row10,_row11); + _t6 = _mm256_unpacklo_epi32(_row12,_row13); + _t7 = _mm256_unpacklo_epi32(_row14,_row15); + + _row8 = _mm256_unpackhi_epi64(_t0,_t1); + _row9 = _mm256_unpackhi_epi64(_t2,_t3); + _row10 = _mm256_unpackhi_epi64(_t4,_t5); + _row11 = _mm256_unpackhi_epi64(_t6,_t7); + _row12 = _mm256_unpacklo_epi64(_t0,_t1); + _row13 = _mm256_unpacklo_epi64(_t2,_t3); + _row14 = _mm256_unpacklo_epi64(_t4,_t5); + _row15 = _mm256_unpacklo_epi64(_t6,_t7); + + + _mm256_store_si256(out++, _mm256_permute2x128_si256(_u0, _u1, 0x31)); + _mm256_store_si256(out++, _mm256_permute2x128_si256(_u2, _u3, 0x31)); + _mm256_store_si256(out++, _mm256_permute2x128_si256(_u4, _u5, 0x31)); + _mm256_store_si256(out++, _mm256_permute2x128_si256(_u6, _u7, 0x31)); + _mm256_store_si256(out++, _mm256_permute2x128_si256(_row8, _row9, 0x31)); + _mm256_store_si256(out++, _mm256_permute2x128_si256(_row10, _row11, 0x31)); + _mm256_store_si256(out++, _mm256_permute2x128_si256(_row12, _row13, 0x31)); + _mm256_store_si256(out++, _mm256_permute2x128_si256(_row14, _row15, 0x31)); + + _mm256_store_si256(out++, _mm256_permute2x128_si256(_u0, _u1, 0x20)); + _mm256_store_si256(out++, _mm256_permute2x128_si256(_u2, _u3, 0x20)); + _mm256_store_si256(out++, _mm256_permute2x128_si256(_u4, _u5, 0x20)); + _mm256_store_si256(out++, _mm256_permute2x128_si256(_u6, _u7, 0x20)); + _mm256_store_si256(out++, _mm256_permute2x128_si256(_row8, _row9, 0x20)); + _mm256_store_si256(out++, _mm256_permute2x128_si256(_row10, _row11, 0x20)); + _mm256_store_si256(out++, _mm256_permute2x128_si256(_row12, _row13, 0x20)); + _mm256_store_si256(out++, _mm256_permute2x128_si256(_row14, _row15, 0x20)); + + return store_fwdrev_simd_128_ordered(chrpos,table,positions,counts,(UINT4 *) array); +} +#endif +#endif + +#ifdef HAVE_AVX512 +static void +extract_9mers_fwd_simd_256 (__m512i *out, __m512i current, __m512i next) { + __m512i oligo; + + _mm512_store_si512(out++, _mm512_srli_epi32(current,14)); /* No mask necessary */ + _mm512_store_si512(out++, _mm512_and_si512( _mm512_srli_epi32(current,12), hugemask9)); + _mm512_store_si512(out++, _mm512_and_si512( _mm512_srli_epi32(current,10), hugemask9)); + _mm512_store_si512(out++, _mm512_and_si512( _mm512_srli_epi32(current,8), hugemask9)); + _mm512_store_si512(out++, _mm512_and_si512( _mm512_srli_epi32(current,6), hugemask9)); + _mm512_store_si512(out++, _mm512_and_si512( _mm512_srli_epi32(current,4), hugemask9)); + _mm512_store_si512(out++, _mm512_and_si512( _mm512_srli_epi32(current,2), hugemask9)); + _mm512_store_si512(out++, _mm512_and_si512( current, hugemask9)); + + oligo = _mm512_or_si512( _mm512_srli_epi32(next,16), _mm512_slli_epi32(current,16)); + _mm512_store_si512(out++, _mm512_and_si512( _mm512_srli_epi32(oligo,14), hugemask9)); + _mm512_store_si512(out++, _mm512_and_si512( _mm512_srli_epi32(oligo,12), hugemask9)); + _mm512_store_si512(out++, _mm512_and_si512( _mm512_srli_epi32(oligo,10), hugemask9)); + _mm512_store_si512(out++, _mm512_and_si512( _mm512_srli_epi32(oligo,8), hugemask9)); + _mm512_store_si512(out++, _mm512_and_si512( _mm512_srli_epi32(oligo,6), hugemask9)); + _mm512_store_si512(out++, _mm512_and_si512( _mm512_srli_epi32(oligo,4), hugemask9)); + _mm512_store_si512(out++, _mm512_and_si512( _mm512_srli_epi32(oligo,2), hugemask9)); + _mm512_store_si512(out++, _mm512_and_si512( oligo, hugemask9)); + + return; +} + +#ifdef USE_UNORDERED_9 +static Chrpos_T +store_9mers_fwd_simd_256 (Chrpos_T chrpos, Chrpos_T *table, UINT4 *positions, Count_T *counts, + __m512i current, __m512i next) { + __m512i array[16]; + + extract_9mers_fwd_simd_256(array,current,next); + return store_fwdrev_simd_256(chrpos,table,positions,counts,(UINT4 *) array); +} + +#else +/* Includes extract_9mers_fwd_simd_256_ordered (__m512i *out, __m512i current, __m512i next) */ +static Chrpos_T +store_9mers_fwd_simd_256 (Chrpos_T chrpos, Chrpos_T *table, UINT4 *positions, Count_T *counts, + __m512i current, __m512i next) { + __m512i array[16], *out; + __m512i oligo, _shuffle0, _shuffle1, _shuffle2; + __m512i _row0, _row1, _row2, _row3, _row4, _row5, _row6, _row7, + _row8, _row9, _row10, _row11, _row12, _row13, _row14, _row15; + __m512i _t0, _t1, _t2, _t3, _t4, _t5, _t6, _t7; + __m512i _u0, _u1, _u2, _u3, _u4, _u5, _u6, _u7; + + out = &(array[0]); + + oligo = _mm512_or_si512( _mm512_srli_epi32(next,16), _mm512_slli_epi32(current,16)); + _row0 = _mm512_and_si512( oligo, hugemask9); + _row1 = _mm512_and_si512( _mm512_srli_epi32(oligo,2), hugemask9); + _row2 = _mm512_and_si512( _mm512_srli_epi32(oligo,4), hugemask9); + _row3 = _mm512_and_si512( _mm512_srli_epi32(oligo,6), hugemask9); + _row4 = _mm512_and_si512( _mm512_srli_epi32(oligo,8), hugemask9); + _row5 = _mm512_and_si512( _mm512_srli_epi32(oligo,10), hugemask9); + _row6 = _mm512_and_si512( _mm512_srli_epi32(oligo,12), hugemask9); + _row7 = _mm512_and_si512( _mm512_srli_epi32(oligo,14), hugemask9); + + _row8 = _mm512_and_si512( current, hugemask9); + _row9 = _mm512_and_si512( _mm512_srli_epi32(current,2), hugemask9); + _row10 = _mm512_and_si512( _mm512_srli_epi32(current,4), hugemask9); + _row11 = _mm512_and_si512( _mm512_srli_epi32(current,6), hugemask9); + _row12 = _mm512_and_si512( _mm512_srli_epi32(current,8), hugemask9); + _row13 = _mm512_and_si512( _mm512_srli_epi32(current,10), hugemask9); + _row14 = _mm512_and_si512( _mm512_srli_epi32(current,12), hugemask9); + _row15 = _mm512_srli_epi32(current,14); /* No mask necessary */ + + + /* Split: top half */ + _t0 = _mm512_unpackhi_epi32(_row0,_row1); + _t1 = _mm512_unpackhi_epi32(_row2,_row3); + _t2 = _mm512_unpackhi_epi32(_row4,_row5); + _t3 = _mm512_unpackhi_epi32(_row6,_row7); + _t4 = _mm512_unpackhi_epi32(_row8,_row9); + _t5 = _mm512_unpackhi_epi32(_row10,_row11); + _t6 = _mm512_unpackhi_epi32(_row12,_row13); + _t7 = _mm512_unpackhi_epi32(_row14,_row15); + + _u0 = _mm512_unpackhi_epi64(_t0,_t1); + _u1 = _mm512_unpackhi_epi64(_t2,_t3); + _u2 = _mm512_unpackhi_epi64(_t4,_t5); + _u3 = _mm512_unpackhi_epi64(_t6,_t7); + _u4 = _mm512_unpacklo_epi64(_t0,_t1); + _u5 = _mm512_unpacklo_epi64(_t2,_t3); + _u6 = _mm512_unpacklo_epi64(_t4,_t5); + _u7 = _mm512_unpacklo_epi64(_t6,_t7); + + /* Split: bottom half */ + _t0 = _mm512_unpacklo_epi32(_row0,_row1); + _t1 = _mm512_unpacklo_epi32(_row2,_row3); + _t2 = _mm512_unpacklo_epi32(_row4,_row5); + _t3 = _mm512_unpacklo_epi32(_row6,_row7); + _t4 = _mm512_unpacklo_epi32(_row8,_row9); + _t5 = _mm512_unpacklo_epi32(_row10,_row11); + _t6 = _mm512_unpacklo_epi32(_row12,_row13); + _t7 = _mm512_unpacklo_epi32(_row14,_row15); + + _row8 = _mm512_unpackhi_epi64(_t0,_t1); + _row9 = _mm512_unpackhi_epi64(_t2,_t3); + _row10 = _mm512_unpackhi_epi64(_t4,_t5); + _row11 = _mm512_unpackhi_epi64(_t6,_t7); + _row12 = _mm512_unpacklo_epi64(_t0,_t1); + _row13 = _mm512_unpacklo_epi64(_t2,_t3); + _row14 = _mm512_unpacklo_epi64(_t4,_t5); + _row15 = _mm512_unpacklo_epi64(_t6,_t7); + + + /* Split: top half */ + _shuffle0 = _mm512_setr_epi64(6, 7, 8+6, 8+7, 4, 5, 8+4, 8+5); + _t0 = _mm512_permutex2var_epi64(_u0, _shuffle0, _u1); + _t1 = _mm512_permutex2var_epi64(_u2, _shuffle0, _u3); + _t2 = _mm512_permutex2var_epi64(_u4, _shuffle0, _u5); + _t3 = _mm512_permutex2var_epi64(_u6, _shuffle0, _u7); + _t4 = _mm512_permutex2var_epi64(_row8, _shuffle0, _row9); + _t5 = _mm512_permutex2var_epi64(_row10, _shuffle0, _row11); + _t6 = _mm512_permutex2var_epi64(_row12, _shuffle0, _row13); + _t7 = _mm512_permutex2var_epi64(_row14, _shuffle0, _row15); + + _shuffle1 = _mm512_setr_epi64(0, 1, 2, 3, 8+0, 8+1, 8+2, 8+3); + _shuffle2 = _mm512_setr_epi64(4, 5, 6, 7, 8+4, 8+5, 8+6, 8+7); + _mm512_store_si512(out++, _mm512_permutex2var_epi64(_t0, _shuffle1, _t1)); + _mm512_store_si512(out++, _mm512_permutex2var_epi64(_t2, _shuffle1, _t3)); + _mm512_store_si512(out++, _mm512_permutex2var_epi64(_t4, _shuffle1, _t5)); + _mm512_store_si512(out++, _mm512_permutex2var_epi64(_t6, _shuffle1, _t7)); + _mm512_store_si512(out++, _mm512_permutex2var_epi64(_t0, _shuffle2, _t1)); + _mm512_store_si512(out++, _mm512_permutex2var_epi64(_t2, _shuffle2, _t3)); + _mm512_store_si512(out++, _mm512_permutex2var_epi64(_t4, _shuffle2, _t5)); + _mm512_store_si512(out++, _mm512_permutex2var_epi64(_t6, _shuffle2, _t7)); + + /* Split: bottom half */ + _shuffle0 = _mm512_setr_epi64(2, 3, 8+2, 8+3, 0, 1, 8+0, 8+1); + _t0 = _mm512_permutex2var_epi64(_u0, _shuffle0, _u1); + _t1 = _mm512_permutex2var_epi64(_u2, _shuffle0, _u3); + _t2 = _mm512_permutex2var_epi64(_u4, _shuffle0, _u5); + _t3 = _mm512_permutex2var_epi64(_u6, _shuffle0, _u7); + _t4 = _mm512_permutex2var_epi64(_row8, _shuffle0, _row9); + _t5 = _mm512_permutex2var_epi64(_row10, _shuffle0, _row11); + _t6 = _mm512_permutex2var_epi64(_row12, _shuffle0, _row13); + _t7 = _mm512_permutex2var_epi64(_row14, _shuffle0, _row15); + + /* _shuffle1 = _mm512_setr_epi64(0, 1, 2, 3, 8+0, 8+1, 8+2, 8+3); */ + /* _shuffle2 = _mm512_setr_epi64(4, 5, 6, 7, 8+4, 8+5, 8+6, 8+7); */ + _mm512_store_si512(out++, _mm512_permutex2var_epi64(_t0, _shuffle1, _t1)); + _mm512_store_si512(out++, _mm512_permutex2var_epi64(_t2, _shuffle1, _t3)); + _mm512_store_si512(out++, _mm512_permutex2var_epi64(_t4, _shuffle1, _t5)); + _mm512_store_si512(out++, _mm512_permutex2var_epi64(_t6, _shuffle1, _t7)); + _mm512_store_si512(out++, _mm512_permutex2var_epi64(_t0, _shuffle2, _t1)); + _mm512_store_si512(out++, _mm512_permutex2var_epi64(_t2, _shuffle2, _t3)); + _mm512_store_si512(out++, _mm512_permutex2var_epi64(_t4, _shuffle2, _t5)); + _mm512_store_si512(out++, _mm512_permutex2var_epi64(_t6, _shuffle2, _t7)); + + return store_fwdrev_simd_256_ordered(chrpos,table,positions,counts,(UINT4 *) array); +} +#endif +#endif + + +#if !defined(HAVE_AVX2) +static int +store_9mers_fwd_32 (Chrpos_T chrpos, Chrpos_T *table, UINT4 *positions, Count_T *counts, + Genomecomp_T high_rev, Genomecomp_T low_rev, Genomecomp_T nexthigh_rev) { + Genomecomp_T masked, oligo; +#ifdef INDIVIDUAL_SHIFTS +#elif defined(SIMD_MASK_THEN_STORE) + UINT4 _masked[4] __attribute__ ((aligned (16))); + __m128i _oligo; +#else + __m128i _oligo, _masked; +#endif + + + oligo = nexthigh_rev >> 16; /* For 31..24 */ + oligo |= low_rev << 16; + +#ifdef INDIVIDUAL_SHIFTS + masked = oligo & MASK9; /* 31 */ + if (counts[masked]) { + debug(printf("Storing masked %u at %u\n",masked,chrpos)); + table[positions[masked] + (--counts[masked])] = chrpos; + } + + masked = (oligo >> 2) & MASK9; /* 30 */ + if (counts[masked]) { + debug(printf("Storing masked %u at %u\n",masked,chrpos - 1)); + table[positions[masked] + (--counts[masked])] = chrpos - 1; + } + + masked = (oligo >> 4) & MASK9; /* 29 */ + if (counts[masked]) { + debug(printf("Storing masked %u at %u\n",masked,chrpos - 2)); + table[positions[masked] + (--counts[masked])] = chrpos - 2; + } + + masked = (oligo >> 6) & MASK9; /* 28 */ + if (counts[masked]) { + debug(printf("Storing masked %u at %u\n",masked,chrpos - 3)); + table[positions[masked] + (--counts[masked])] = chrpos - 3; + } + + masked = (oligo >> 8) & MASK9; /* 27 */ + if (counts[masked]) { + debug(printf("Storing masked %u at %u\n",masked,chrpos - 4)); + table[positions[masked] + (--counts[masked])] = chrpos - 4; + } + + masked = (oligo >> 10) & MASK9; /* 26 */ + if (counts[masked]) { + debug(printf("Storing masked %u at %u\n",masked,chrpos - 5)); + table[positions[masked] + (--counts[masked])] = chrpos - 5; + } + + masked = (oligo >> 12) & MASK9; /* 25 */ + if (counts[masked]) { + debug(printf("Storing masked %u at %u\n",masked,chrpos - 6)); + table[positions[masked] + (--counts[masked])] = chrpos - 6; + } + + masked = (oligo >> 14) & MASK9; /* 24 */ + if (counts[masked]) { + debug(printf("Storing masked %u at %u\n",masked,chrpos - 7)); + table[positions[masked] + (--counts[masked])] = chrpos - 7; + } + +#else + _oligo = _mm_setr_epi32(oligo, oligo >> 2, oligo >> 4, oligo >> 6); +#ifdef SIMD_MASK_THEN_STORE + _mm_store_si128((__m128i *) _masked,_mm_and_si128(_oligo, mask9)); +#else + _masked = _mm_and_si128(_oligo, mask9); +#endif + + masked = EXTRACT(_masked,0); + if (counts[masked]) { + debug(printf("Storing masked %u at %u\n",masked,chrpos)); + table[positions[masked] + (--counts[masked])] = chrpos; + } + + masked = EXTRACT(_masked,1); + if (counts[masked]) { + debug(printf("Storing masked %u at %u\n",masked,chrpos - 1)); + table[positions[masked] + (--counts[masked])] = chrpos - 1; + } + + masked = EXTRACT(_masked,2); + if (counts[masked]) { + debug(printf("Storing masked %u at %u\n",masked,chrpos - 2)); + table[positions[masked] + (--counts[masked])] = chrpos - 2; + } masked = EXTRACT(_masked,3); - INCR_COUNT(counts[masked],inquery[masked]); - debug(printf("28 %04X => %d\n",masked,counts[masked])); + if (counts[masked]) { + debug(printf("Storing masked %u at %u\n",masked,chrpos - 3)); + table[positions[masked] + (--counts[masked])] = chrpos - 3; + } _oligo = _mm_srli_epi32(_oligo, 8); @@ -12238,55 +11501,79 @@ #endif masked = EXTRACT(_masked,0); - INCR_COUNT(counts[masked],inquery[masked]); - debug(printf("27 %04X => %d\n",masked,counts[masked])); + if (counts[masked]) { + debug(printf("Storing masked %u at %u\n",masked,chrpos - 4)); + table[positions[masked] + (--counts[masked])] = chrpos - 4; + } masked = EXTRACT(_masked,1); - INCR_COUNT(counts[masked],inquery[masked]); - debug(printf("26 %04X => %d\n",masked,counts[masked])); + if (counts[masked]) { + debug(printf("Storing masked %u at %u\n",masked,chrpos - 5)); + table[positions[masked] + (--counts[masked])] = chrpos - 5; + } masked = EXTRACT(_masked,2); - INCR_COUNT(counts[masked],inquery[masked]); - debug(printf("25 %04X => %d\n",masked,counts[masked])); + if (counts[masked]) { + debug(printf("Storing masked %u at %u\n",masked,chrpos - 6)); + table[positions[masked] + (--counts[masked])] = chrpos - 6; + } masked = EXTRACT(_masked,3); - INCR_COUNT(counts[masked],inquery[masked]); - debug(printf("24 %04X => %d\n",masked,counts[masked])); + if (counts[masked]) { + debug(printf("Storing masked %u at %u\n",masked,chrpos - 7)); + table[positions[masked] + (--counts[masked])] = chrpos - 7; + } #endif #ifdef INDIVIDUAL_SHIFTS masked = low_rev & MASK9; /* 23 */ - INCR_COUNT(counts[masked],inquery[masked]); - debug(printf("23 %04X => %d\n",masked,counts[masked])); + if (counts[masked]) { + debug(printf("Storing masked %u at %u\n",masked,chrpos - 8)); + table[positions[masked] + (--counts[masked])] = chrpos - 8; + } masked = (low_rev >> 2) & MASK9; /* 22 */ - INCR_COUNT(counts[masked],inquery[masked]); - debug(printf("22 %04X => %d\n",masked,counts[masked])); + if (counts[masked]) { + debug(printf("Storing masked %u at %u\n",masked,chrpos - 9)); + table[positions[masked] + (--counts[masked])] = chrpos - 9; + } masked = (low_rev >> 4) & MASK9; /* 21 */ - INCR_COUNT(counts[masked],inquery[masked]); - debug(printf("21 %04X => %d\n",masked,counts[masked])); + if (counts[masked]) { + debug(printf("Storing masked %u at %u\n",masked,chrpos - 10)); + table[positions[masked] + (--counts[masked])] = chrpos - 10; + } masked = (low_rev >> 6) & MASK9; /* 20 */ - INCR_COUNT(counts[masked],inquery[masked]); - debug(printf("20 %04X => %d\n",masked,counts[masked])); + if (counts[masked]) { + debug(printf("Storing masked %u at %u\n",masked,chrpos - 11)); + table[positions[masked] + (--counts[masked])] = chrpos - 11; + } masked = (low_rev >> 8) & MASK9; /* 19 */ - INCR_COUNT(counts[masked],inquery[masked]); - debug(printf("19 %04X => %d\n",masked,counts[masked])); + if (counts[masked]) { + debug(printf("Storing masked %u at %u\n",masked,chrpos - 12)); + table[positions[masked] + (--counts[masked])] = chrpos - 12; + } masked = (low_rev >> 10) & MASK9; /* 18 */ - INCR_COUNT(counts[masked],inquery[masked]); - debug(printf("18 %04X => %d\n",masked,counts[masked])); + if (counts[masked]) { + debug(printf("Storing masked %u at %u\n",masked,chrpos - 13)); + table[positions[masked] + (--counts[masked])] = chrpos - 13; + } masked = (low_rev >> 12) & MASK9; /* 17 */ - INCR_COUNT(counts[masked],inquery[masked]); - debug(printf("17 %04X => %d\n",masked,counts[masked])); + if (counts[masked]) { + debug(printf("Storing masked %u at %u\n",masked,chrpos - 14)); + table[positions[masked] + (--counts[masked])] = chrpos - 14; + } masked = low_rev >> 14; /* 16, No mask necessary */ - INCR_COUNT(counts[masked],inquery[masked]); - debug(printf("16 %04X => %d\n",masked,counts[masked])); + if (counts[masked]) { + debug(printf("Storing masked %u at %u\n",masked,chrpos - 15)); + table[positions[masked] + (--counts[masked])] = chrpos - 15; + } #else _oligo = _mm_setr_epi32(low_rev, low_rev >> 2, low_rev >> 4, low_rev >> 6); @@ -12297,20 +11584,28 @@ #endif masked = EXTRACT(_masked,0); - INCR_COUNT(counts[masked],inquery[masked]); - debug(printf("23 %04X => %d\n",masked,counts[masked])); + if (counts[masked]) { + debug(printf("Storing masked %u at %u\n",masked,chrpos - 8)); + table[positions[masked] + (--counts[masked])] = chrpos - 8; + } masked = EXTRACT(_masked,1); - INCR_COUNT(counts[masked],inquery[masked]); - debug(printf("22 %04X => %d\n",masked,counts[masked])); + if (counts[masked]) { + debug(printf("Storing masked %u at %u\n",masked,chrpos - 9)); + table[positions[masked] + (--counts[masked])] = chrpos - 9; + } masked = EXTRACT(_masked,2); - INCR_COUNT(counts[masked],inquery[masked]); - debug(printf("21 %04X => %d\n",masked,counts[masked])); + if (counts[masked]) { + debug(printf("Storing masked %u at %u\n",masked,chrpos - 10)); + table[positions[masked] + (--counts[masked])] = chrpos - 10; + } masked = EXTRACT(_masked,3); - INCR_COUNT(counts[masked],inquery[masked]); - debug(printf("20 %04X => %d\n",masked,counts[masked])); + if (counts[masked]) { + debug(printf("Storing masked %u at %u\n",masked,chrpos - 11)); + table[positions[masked] + (--counts[masked])] = chrpos - 11; + } _oligo = _mm_srli_epi32(_oligo, 8); @@ -12321,20 +11616,28 @@ #endif masked = EXTRACT(_masked,0); - INCR_COUNT(counts[masked],inquery[masked]); - debug(printf("19 %04X => %d\n",masked,counts[masked])); + if (counts[masked]) { + debug(printf("Storing masked %u at %u\n",masked,chrpos - 12)); + table[positions[masked] + (--counts[masked])] = chrpos - 12; + } masked = EXTRACT(_masked,1); - INCR_COUNT(counts[masked],inquery[masked]); - debug(printf("18 %04X => %d\n",masked,counts[masked])); + if (counts[masked]) { + debug(printf("Storing masked %u at %u\n",masked,chrpos - 13)); + table[positions[masked] + (--counts[masked])] = chrpos - 13; + } masked = EXTRACT(_masked,2); - INCR_COUNT(counts[masked],inquery[masked]); - debug(printf("17 %04X => %d\n",masked,counts[masked])); + if (counts[masked]) { + debug(printf("Storing masked %u at %u\n",masked,chrpos - 14)); + table[positions[masked] + (--counts[masked])] = chrpos - 14; + } masked = EXTRACT(_masked,3); - INCR_COUNT(counts[masked],inquery[masked]); - debug(printf("16 %04X => %d\n",masked,counts[masked])); + if (counts[masked]) { + debug(printf("Storing masked %u at %u\n",masked,chrpos - 15)); + table[positions[masked] + (--counts[masked])] = chrpos - 15; + } #endif @@ -12343,36 +11646,52 @@ #ifdef INDIVIDUAL_SHIFTS masked = oligo & MASK9; /* 15 */ - INCR_COUNT(counts[masked],inquery[masked]); - debug(printf("15 %04X => %d\n",masked,counts[masked])); + if (counts[masked]) { + debug(printf("Storing masked %u at %u\n",masked,chrpos - 16)); + table[positions[masked] + (--counts[masked])] = chrpos - 16; + } masked = (oligo >> 2) & MASK9; /* 14 */ - INCR_COUNT(counts[masked],inquery[masked]); - debug(printf("14 %04X => %d\n",masked,counts[masked])); + if (counts[masked]) { + debug(printf("Storing masked %u at %u\n",masked,chrpos - 17)); + table[positions[masked] + (--counts[masked])] = chrpos - 17; + } masked = (oligo >> 4) & MASK9; /* 13 */ - INCR_COUNT(counts[masked],inquery[masked]); - debug(printf("13 %04X => %d\n",masked,counts[masked])); + if (counts[masked]) { + debug(printf("Storing masked %u at %u\n",masked,chrpos - 18)); + table[positions[masked] + (--counts[masked])] = chrpos - 18; + } masked = (oligo >> 6) & MASK9; /* 12 */ - INCR_COUNT(counts[masked],inquery[masked]); - debug(printf("12 %04X => %d\n",masked,counts[masked])); + if (counts[masked]) { + debug(printf("Storing masked %u at %u\n",masked,chrpos - 19)); + table[positions[masked] + (--counts[masked])] = chrpos - 19; + } masked = (oligo >> 8) & MASK9; /* 11 */ - INCR_COUNT(counts[masked],inquery[masked]); - debug(printf("11 %04X => %d\n",masked,counts[masked])); + if (counts[masked]) { + debug(printf("Storing masked %u at %u\n",masked,chrpos - 20)); + table[positions[masked] + (--counts[masked])] = chrpos - 20; + } masked = (oligo >> 10) & MASK9; /* 10 */ - INCR_COUNT(counts[masked],inquery[masked]); - debug(printf("10 %04X => %d\n",masked,counts[masked])); + if (counts[masked]) { + debug(printf("Storing masked %u at %u\n",masked,chrpos - 21)); + table[positions[masked] + (--counts[masked])] = chrpos - 21; + } masked = (oligo >> 12) & MASK9; /* 9 */ - INCR_COUNT(counts[masked],inquery[masked]); - debug(printf("9 %04X => %d\n",masked,counts[masked])); + if (counts[masked]) { + debug(printf("Storing masked %u at %u\n",masked,chrpos - 22)); + table[positions[masked] + (--counts[masked])] = chrpos - 22; + } - masked = (oligo >> 14) & MASK9; /* 8 */ - INCR_COUNT(counts[masked],inquery[masked]); - debug(printf("8 %04X => %d\n",masked,counts[masked])); + masked = (oligo >> 14) & MASK9; /* 9 */ + if (counts[masked]) { + debug(printf("Storing masked %u at %u\n",masked,chrpos - 23)); + table[positions[masked] + (--counts[masked])] = chrpos - 23; + } #else _oligo = _mm_setr_epi32(oligo, oligo >> 2, oligo >> 4, oligo >> 6); @@ -12383,20 +11702,28 @@ #endif masked = EXTRACT(_masked,0); - INCR_COUNT(counts[masked],inquery[masked]); - debug(printf("15 %04X => %d\n",masked,counts[masked])); + if (counts[masked]) { + debug(printf("Storing masked %u at %u\n",masked,chrpos - 16)); + table[positions[masked] + (--counts[masked])] = chrpos - 16; + } masked = EXTRACT(_masked,1); - INCR_COUNT(counts[masked],inquery[masked]); - debug(printf("14 %04X => %d\n",masked,counts[masked])); - - masked = EXTRACT(_masked,2); - INCR_COUNT(counts[masked],inquery[masked]); - debug(printf("13 %04X => %d\n",masked,counts[masked])); + if (counts[masked]) { + debug(printf("Storing masked %u at %u\n",masked,chrpos - 17)); + table[positions[masked] + (--counts[masked])] = chrpos - 17; + } + + masked = EXTRACT(_masked,2); + if (counts[masked]) { + debug(printf("Storing masked %u at %u\n",masked,chrpos - 18)); + table[positions[masked] + (--counts[masked])] = chrpos - 18; + } masked = EXTRACT(_masked,3); - INCR_COUNT(counts[masked],inquery[masked]); - debug(printf("12 %04X => %d\n",masked,counts[masked])); + if (counts[masked]) { + debug(printf("Storing masked %u at %u\n",masked,chrpos - 19)); + table[positions[masked] + (--counts[masked])] = chrpos - 19; + } _oligo = _mm_srli_epi32(_oligo, 8); @@ -12407,55 +11734,79 @@ #endif masked = EXTRACT(_masked,0); - INCR_COUNT(counts[masked],inquery[masked]); - debug(printf("11 %04X => %d\n",masked,counts[masked])); + if (counts[masked]) { + debug(printf("Storing masked %u at %u\n",masked,chrpos - 20)); + table[positions[masked] + (--counts[masked])] = chrpos - 20; + } masked = EXTRACT(_masked,1); - INCR_COUNT(counts[masked],inquery[masked]); - debug(printf("10 %04X => %d\n",masked,counts[masked])); + if (counts[masked]) { + debug(printf("Storing masked %u at %u\n",masked,chrpos - 21)); + table[positions[masked] + (--counts[masked])] = chrpos - 21; + } masked = EXTRACT(_masked,2); - INCR_COUNT(counts[masked],inquery[masked]); - debug(printf("9 %04X => %d\n",masked,counts[masked])); + if (counts[masked]) { + debug(printf("Storing masked %u at %u\n",masked,chrpos - 22)); + table[positions[masked] + (--counts[masked])] = chrpos - 22; + } masked = EXTRACT(_masked,3); - INCR_COUNT(counts[masked],inquery[masked]); - debug(printf("8 %04X => %d\n",masked,counts[masked])); + if (counts[masked]) { + debug(printf("Storing masked %u at %u\n",masked,chrpos - 23)); + table[positions[masked] + (--counts[masked])] = chrpos - 23; + } #endif #ifdef INDIVIDUAL_SHIFTS masked = high_rev & MASK9; /* 7 */ - INCR_COUNT(counts[masked],inquery[masked]); - debug(printf("7 %04X => %d\n",masked,counts[masked])); + if (counts[masked]) { + debug(printf("Storing masked %u at %u\n",masked,chrpos - 24)); + table[positions[masked] + (--counts[masked])] = chrpos - 24; + } masked = (high_rev >> 2) & MASK9; /* 6 */ - INCR_COUNT(counts[masked],inquery[masked]); - debug(printf("6 %04X => %d\n",masked,counts[masked])); + if (counts[masked]) { + debug(printf("Storing masked %u at %u\n",masked,chrpos - 25)); + table[positions[masked] + (--counts[masked])] = chrpos - 25; + } masked = (high_rev >> 4) & MASK9; /* 5 */ - INCR_COUNT(counts[masked],inquery[masked]); - debug(printf("5 %04X => %d\n",masked,counts[masked])); + if (counts[masked]) { + debug(printf("Storing masked %u at %u\n",masked,chrpos - 26)); + table[positions[masked] + (--counts[masked])] = chrpos - 26; + } masked = (high_rev >> 6) & MASK9; /* 4 */ - INCR_COUNT(counts[masked],inquery[masked]); - debug(printf("4 %04X => %d\n",masked,counts[masked])); + if (counts[masked]) { + debug(printf("Storing masked %u at %u\n",masked,chrpos - 27)); + table[positions[masked] + (--counts[masked])] = chrpos - 27; + } masked = (high_rev >> 8) & MASK9; /* 3 */ - INCR_COUNT(counts[masked],inquery[masked]); - debug(printf("3 %04X => %d\n",masked,counts[masked])); + if (counts[masked]) { + debug(printf("Storing masked %u at %u\n",masked,chrpos - 28)); + table[positions[masked] + (--counts[masked])] = chrpos - 28; + } masked = (high_rev >> 10) & MASK9; /* 2 */ - INCR_COUNT(counts[masked],inquery[masked]); - debug(printf("2 %04X => %d\n",masked,counts[masked])); + if (counts[masked]) { + debug(printf("Storing masked %u at %u\n",masked,chrpos - 29)); + table[positions[masked] + (--counts[masked])] = chrpos - 29; + } masked = (high_rev >> 12) & MASK9; /* 1 */ - INCR_COUNT(counts[masked],inquery[masked]); - debug(printf("1 %04X => %d\n",masked,counts[masked])); + if (counts[masked]) { + debug(printf("Storing masked %u at %u\n",masked,chrpos - 30)); + table[positions[masked] + (--counts[masked])] = chrpos - 30; + } masked = high_rev >> 14; /* 0, No mask necessary */ - INCR_COUNT(counts[masked],inquery[masked]); - debug(printf("0 %04X => %d\n",masked,counts[masked])); + if (counts[masked]) { + debug(printf("Storing masked %u at %u\n",masked,chrpos - 31)); + table[positions[masked] + (--counts[masked])] = chrpos - 31; + } #else _oligo = _mm_setr_epi32(high_rev, high_rev >> 2, high_rev >> 4, high_rev >> 6); @@ -12466,20 +11817,28 @@ #endif masked = EXTRACT(_masked,0); - INCR_COUNT(counts[masked],inquery[masked]); - debug(printf("7 %04X => %d\n",masked,counts[masked])); + if (counts[masked]) { + debug(printf("Storing masked %u at %u\n",masked,chrpos - 24)); + table[positions[masked] + (--counts[masked])] = chrpos - 24; + } masked = EXTRACT(_masked,1); - INCR_COUNT(counts[masked],inquery[masked]); - debug(printf("6 %04X => %d\n",masked,counts[masked])); + if (counts[masked]) { + debug(printf("Storing masked %u at %u\n",masked,chrpos - 25)); + table[positions[masked] + (--counts[masked])] = chrpos - 25; + } masked = EXTRACT(_masked,2); - INCR_COUNT(counts[masked],inquery[masked]); - debug(printf("5 %04X => %d\n",masked,counts[masked])); + if (counts[masked]) { + debug(printf("Storing masked %u at %u\n",masked,chrpos - 26)); + table[positions[masked] + (--counts[masked])] = chrpos - 26; + } masked = EXTRACT(_masked,3); - INCR_COUNT(counts[masked],inquery[masked]); - debug(printf("4 %04X => %d\n",masked,counts[masked])); + if (counts[masked]) { + debug(printf("Storing masked %u at %u\n",masked,chrpos - 27)); + table[positions[masked] + (--counts[masked])] = chrpos - 27; + } _oligo = _mm_srli_epi32(_oligo, 8); @@ -12490,2736 +11849,1297 @@ #endif masked = EXTRACT(_masked,0); - INCR_COUNT(counts[masked],inquery[masked]); - debug(printf("3 %04X => %d\n",masked,counts[masked])); + if (counts[masked]) { + debug(printf("Storing masked %u at %u\n",masked,chrpos - 28)); + table[positions[masked] + (--counts[masked])] = chrpos - 28; + } masked = EXTRACT(_masked,1); - INCR_COUNT(counts[masked],inquery[masked]); - debug(printf("2 %04X => %d\n",masked,counts[masked])); + if (counts[masked]) { + debug(printf("Storing masked %u at %u\n",masked,chrpos - 29)); + table[positions[masked] + (--counts[masked])] = chrpos - 29; + } masked = EXTRACT(_masked,2); - INCR_COUNT(counts[masked],inquery[masked]); - debug(printf("1 %04X => %d\n",masked,counts[masked])); + if (counts[masked]) { + debug(printf("Storing masked %u at %u\n",masked,chrpos - 30)); + table[positions[masked] + (--counts[masked])] = chrpos - 30; + } masked = EXTRACT(_masked,3); - INCR_COUNT(counts[masked],inquery[masked]); - debug(printf("0 %04X => %d\n",masked,counts[masked])); + if (counts[masked]) { + debug(printf("Storing masked %u at %u\n",masked,chrpos - 31)); + table[positions[masked] + (--counts[masked])] = chrpos - 31; + } #endif - return; + return chrpos - 32; } #else /* HAVE_AVX2 */ -static void -count_9mers_fwd (Count_T *counts, Inquery_T *inquery, Genomecomp_T high_rev, Genomecomp_T low_rev, Genomecomp_T nexthigh_rev) { +static int +store_9mers_fwd_32 (Chrpos_T chrpos, Chrpos_T *table, UINT4 *positions, Count_T *counts, + Genomecomp_T high_rev, Genomecomp_T low_rev, Genomecomp_T nexthigh_rev) { Genomecomp_T masked, oligo; - __m256i _oligo, _masked; -#ifdef CHECK_FOR_OVERFLOW - __m256i _counts_after, _counts_neg; -#endif + __m256i _oligo, _masked, _counts; + __m256i _blocks, _envelopes, _addresses, _address_mask, _count_mask; + + + _address_mask = _mm256_set1_epi32(0x3); + _count_mask = _mm256_set1_epi32(0xFF); oligo = nexthigh_rev >> 16; /* For 31..24 */ oligo |= low_rev << 16; - _oligo = _mm256_srlv_epi32(_mm256_set1_epi32(oligo),shift0to14); + _oligo = _mm256_srlv_epi32(_mm256_set1_epi32(oligo),bigshift0to14); _masked = _mm256_and_si256(_oligo, bigmask9); - masked = EXTRACT256(_masked,0); - counts[masked] += 1; - debug(printf("31 %04X => %d\n",masked,counts[masked])); - - masked = EXTRACT256(_masked,1); - counts[masked] += 1; - debug(printf("30 %04X => %d\n",masked,counts[masked])); - - masked = EXTRACT256(_masked,2); - counts[masked] += 1; - debug(printf("29 %04X => %d\n",masked,counts[masked])); - - masked = EXTRACT256(_masked,3); - counts[masked] += 1; - debug(printf("28 %04X => %d\n",masked,counts[masked])); - - masked = EXTRACT256(_masked,4); - counts[masked] += 1; - debug(printf("27 %04X => %d\n",masked,counts[masked])); - - masked = EXTRACT256(_masked,5); - counts[masked] += 1; - debug(printf("26 %04X => %d\n",masked,counts[masked])); + _blocks = _mm256_srli_epi32(_masked,2); /* div by 4 bytes/int */ + _addresses = _mm256_and_si256(_masked,_address_mask); + _addresses = _mm256_slli_epi32(_addresses,3); /* Multiply by 8 bits/byte */ + _envelopes = _mm256_i32gather_epi32((const int *) counts,_blocks,/*scale*/4); + _counts = _mm256_srlv_epi32(_envelopes,_addresses); /* Puts byte on right */ + _counts = _mm256_and_si256(_counts,_count_mask); - masked = EXTRACT256(_masked,6); - counts[masked] += 1; - debug(printf("25 %04X => %d\n",masked,counts[masked])); - - masked = EXTRACT256(_masked,7); - counts[masked] += 1; - debug(printf("24 %04X => %d\n",masked,counts[masked])); -#ifdef CHECK_FOR_OVERFLOW - _counts_after = _mm256_i32gather_epi32(counts,_masked,/*shift*/4); - /* _counts_after = _mm256_and_si256(_counts_after,low8); */ - _counts_neg = _mm256_cmpgt_epi32(_counts_after,maxcount256); - if (_mm256_testz_si256(_counts_neg,_counts_neg) == 0) { - debug(printf("Exceeded maximum counts\n")); - if (EXTRACT256(_counts_neg,0)) { - inquery[EXTRACT256(_masked,0)] = INQUERY_FALSE; - } - if (EXTRACT256(_counts_neg,1)) { - inquery[EXTRACT256(_masked,1)] = INQUERY_FALSE; - } - if (EXTRACT256(_counts_neg,2)) { - inquery[EXTRACT256(_masked,2)] = INQUERY_FALSE; - } - if (EXTRACT256(_counts_neg,3)) { - inquery[EXTRACT256(_masked,3)] = INQUERY_FALSE; - } - if (EXTRACT256(_counts_neg,4)) { - inquery[EXTRACT256(_masked,4)] = INQUERY_FALSE; - } - if (EXTRACT256(_counts_neg,5)) { - inquery[EXTRACT256(_masked,5)] = INQUERY_FALSE; - } - if (EXTRACT256(_counts_neg,6)) { - inquery[EXTRACT256(_masked,6)] = INQUERY_FALSE; + if (EXTRACT256(_counts,0)) { + masked = EXTRACT256(_masked,0); + if (counts[masked]) { /* Have to re-check if there is a conflict */ + debug(printf("Storing masked %u at %u\n",masked,chrpos)); + table[positions[masked] + (--counts[masked])] = chrpos; } - if (EXTRACT256(_counts_neg,7)) { - inquery[EXTRACT256(_masked,7)] = INQUERY_FALSE; + } + + if (EXTRACT256(_counts,1)) { + masked = EXTRACT256(_masked,1); + if (counts[masked]) { /* Have to re-check if there is a conflict */ + debug(printf("Storing masked %u at %u\n",masked,chrpos - 1)); + table[positions[masked] + (--counts[masked])] = chrpos - 1; } } -#endif /* CHECK_FOR_OVERFLOW */ - _oligo = _mm256_srlv_epi32(_mm256_set1_epi32(low_rev),shift0to14); - _masked = _mm256_and_si256(_oligo, bigmask9); + if (EXTRACT256(_counts,2)) { + masked = EXTRACT256(_masked,2); + if (counts[masked]) { /* Have to re-check if there is a conflict */ + debug(printf("Storing masked %u at %u\n",masked,chrpos - 2)); + table[positions[masked] + (--counts[masked])] = chrpos - 2; + } + } - masked = EXTRACT256(_masked,0); - counts[masked] += 1; - debug(printf("23 %04X => %d\n",masked,counts[masked])); + if (EXTRACT256(_counts,3)) { + masked = EXTRACT256(_masked,3); + if (counts[masked]) { /* Have to re-check if there is a conflict */ + debug(printf("Storing masked %u at %u\n",masked,chrpos - 3)); + table[positions[masked] + (--counts[masked])] = chrpos - 3; + } + } - masked = EXTRACT256(_masked,1); - counts[masked] += 1; - debug(printf("22 %04X => %d\n",masked,counts[masked])); + if (EXTRACT256(_counts,4)) { + masked = EXTRACT256(_masked,4); + if (counts[masked]) { /* Have to re-check if there is a conflict */ + debug(printf("Storing masked %u at %u\n",masked,chrpos - 4)); + table[positions[masked] + (--counts[masked])] = chrpos - 4; + } + } - masked = EXTRACT256(_masked,2); - counts[masked] += 1; - debug(printf("21 %04X => %d\n",masked,counts[masked])); + if (EXTRACT256(_counts,5)) { + masked = EXTRACT256(_masked,5); + if (counts[masked]) { /* Have to re-check if there is a conflict */ + debug(printf("Storing masked %u at %u\n",masked,chrpos - 5)); + table[positions[masked] + (--counts[masked])] = chrpos - 5; + } + } - masked = EXTRACT256(_masked,3); - counts[masked] += 1; - debug(printf("20 %04X => %d\n",masked,counts[masked])); + if (EXTRACT256(_counts,6)) { + masked = EXTRACT256(_masked,6); + if (counts[masked]) { /* Have to re-check if there is a conflict */ + debug(printf("Storing masked %u at %u\n",masked,chrpos - 6)); + table[positions[masked] + (--counts[masked])] = chrpos - 6; + } + } - masked = EXTRACT256(_masked,4); - counts[masked] += 1; - debug(printf("19 %04X => %d\n",masked,counts[masked])); + if (EXTRACT256(_counts,7)) { + masked = EXTRACT256(_masked,7); + if (counts[masked]) { /* Have to re-check if there is a conflict */ + debug(printf("Storing masked %u at %u\n",masked,chrpos - 7)); + table[positions[masked] + (--counts[masked])] = chrpos - 7; + } + } - masked = EXTRACT256(_masked,5); - counts[masked] += 1; - debug(printf("18 %04X => %d\n",masked,counts[masked])); - masked = EXTRACT256(_masked,6); - counts[masked] += 1; - debug(printf("17 %04X => %d\n",masked,counts[masked])); + _oligo = _mm256_srlv_epi32(_mm256_set1_epi32(low_rev),bigshift0to14); + _masked = _mm256_and_si256(_oligo, bigmask9); - masked = EXTRACT256(_masked,7); - counts[masked] += 1; - debug(printf("16 %04X => %d\n",masked,counts[masked])); + _blocks = _mm256_srli_epi32(_masked,2); /* div by 4 bytes/int */ + _addresses = _mm256_and_si256(_masked,_address_mask); + _addresses = _mm256_slli_epi32(_addresses,3); /* Multiply by 8 bits/byte */ + _envelopes = _mm256_i32gather_epi32((const int *) counts,_blocks,/*scale*/4); + _counts = _mm256_srlv_epi32(_envelopes,_addresses); /* Puts byte on right */ + _counts = _mm256_and_si256(_counts,_count_mask); -#ifdef CHECK_FOR_OVERFLOW - _counts_after = _mm256_i32gather_epi32(counts,_masked,/*shift*/4); - /* _counts_after = _mm256_and_si256(_counts_after,low8); */ - _counts_neg = _mm256_cmpgt_epi32(_counts_after,maxcount256); - if (_mm256_testz_si256(_counts_neg,_counts_neg) == 0) { - debug(printf("Exceeded maximum counts\n")); - if (EXTRACT256(_counts_neg,0)) { - inquery[EXTRACT256(_masked,0)] = INQUERY_FALSE; + if (EXTRACT256(_counts,0)) { + masked = EXTRACT256(_masked,0); + if (counts[masked]) { /* Have to re-check if there is a conflict */ + debug(printf("Storing masked %u at %u\n",masked,chrpos - 8)); + table[positions[masked] + (--counts[masked])] = chrpos - 8; } - if (EXTRACT256(_counts_neg,1)) { - inquery[EXTRACT256(_masked,1)] = INQUERY_FALSE; + } + + if (EXTRACT256(_counts,1)) { + masked = EXTRACT256(_masked,1); + if (counts[masked]) { /* Have to re-check if there is a conflict */ + debug(printf("Storing masked %u at %u\n",masked,chrpos - 9)); + table[positions[masked] + (--counts[masked])] = chrpos - 9; } - if (EXTRACT256(_counts_neg,2)) { - inquery[EXTRACT256(_masked,2)] = INQUERY_FALSE; + } + + if (EXTRACT256(_counts,2)) { + masked = EXTRACT256(_masked,2); + if (counts[masked]) { /* Have to re-check if there is a conflict */ + debug(printf("Storing masked %u at %u\n",masked,chrpos - 10)); + table[positions[masked] + (--counts[masked])] = chrpos - 10; } - if (EXTRACT256(_counts_neg,3)) { - inquery[EXTRACT256(_masked,3)] = INQUERY_FALSE; + } + + if (EXTRACT256(_counts,3)) { + masked = EXTRACT256(_masked,3); + if (counts[masked]) { /* Have to re-check if there is a conflict */ + debug(printf("Storing masked %u at %u\n",masked,chrpos - 11)); + table[positions[masked] + (--counts[masked])] = chrpos - 11; } - if (EXTRACT256(_counts_neg,4)) { - inquery[EXTRACT256(_masked,4)] = INQUERY_FALSE; + } + + if (EXTRACT256(_counts,4)) { + masked = EXTRACT256(_masked,4); + if (counts[masked]) { /* Have to re-check if there is a conflict */ + debug(printf("Storing masked %u at %u\n",masked,chrpos - 12)); + table[positions[masked] + (--counts[masked])] = chrpos - 12; } - if (EXTRACT256(_counts_neg,5)) { - inquery[EXTRACT256(_masked,5)] = INQUERY_FALSE; + } + + if (EXTRACT256(_counts,5)) { + masked = EXTRACT256(_masked,5); + if (counts[masked]) { /* Have to re-check if there is a conflict */ + debug(printf("Storing masked %u at %u\n",masked,chrpos - 13)); + table[positions[masked] + (--counts[masked])] = chrpos - 13; } - if (EXTRACT256(_counts_neg,6)) { - inquery[EXTRACT256(_masked,6)] = INQUERY_FALSE; + } + + if (EXTRACT256(_counts,6)) { + masked = EXTRACT256(_masked,6); + if (counts[masked]) { /* Have to re-check if there is a conflict */ + debug(printf("Storing masked %u at %u\n",masked,chrpos - 14)); + table[positions[masked] + (--counts[masked])] = chrpos - 14; } - if (EXTRACT256(_counts_neg,7)) { - inquery[EXTRACT256(_masked,7)] = INQUERY_FALSE; + } + + if (EXTRACT256(_counts,7)) { + masked = EXTRACT256(_masked,7); + if (counts[masked]) { /* Have to re-check if there is a conflict */ + debug(printf("Storing masked %u at %u\n",masked,chrpos - 15)); + table[positions[masked] + (--counts[masked])] = chrpos - 15; } } -#endif /* CHECK_FOR_OVERFLOW */ oligo = low_rev >> 16; /* For 15..8 */ oligo |= high_rev << 16; - _oligo = _mm256_srlv_epi32(_mm256_set1_epi32(oligo),shift0to14); + _oligo = _mm256_srlv_epi32(_mm256_set1_epi32(oligo),bigshift0to14); _masked = _mm256_and_si256(_oligo, bigmask9); - masked = EXTRACT256(_masked,0); - counts[masked] += 1; - debug(printf("15 %04X => %d\n",masked,counts[masked])); - - masked = EXTRACT256(_masked,1); - counts[masked] += 1; - debug(printf("14 %04X => %d\n",masked,counts[masked])); - - masked = EXTRACT256(_masked,2); - counts[masked] += 1; - debug(printf("13 %04X => %d\n",masked,counts[masked])); - - masked = EXTRACT256(_masked,3); - counts[masked] += 1; - debug(printf("12 %04X => %d\n",masked,counts[masked])); - - masked = EXTRACT256(_masked,4); - counts[masked] += 1; - debug(printf("11 %04X => %d\n",masked,counts[masked])); - - masked = EXTRACT256(_masked,5); - counts[masked] += 1; - debug(printf("10 %04X => %d\n",masked,counts[masked])); - - masked = EXTRACT256(_masked,6); - counts[masked] += 1; - debug(printf("9 %04X => %d\n",masked,counts[masked])); - - masked = EXTRACT256(_masked,7); - counts[masked] += 1; - debug(printf("8 %04X => %d\n",masked,counts[masked])); + _blocks = _mm256_srli_epi32(_masked,2); /* div by 4 bytes/int */ + _addresses = _mm256_and_si256(_masked,_address_mask); + _addresses = _mm256_slli_epi32(_addresses,3); /* Multiply by 8 bits/byte */ + _envelopes = _mm256_i32gather_epi32((const int *) counts,_blocks,/*scale*/4); + _counts = _mm256_srlv_epi32(_envelopes,_addresses); /* Puts byte on right */ + _counts = _mm256_and_si256(_counts,_count_mask); -#ifdef CHECK_FOR_OVERFLOW - _counts_after = _mm256_i32gather_epi32(counts,_masked,/*shift*/4); - /* _counts_after = _mm256_and_si256(_counts_after,low8); */ - _counts_neg = _mm256_cmpgt_epi32(_counts_after,maxcount256); - if (_mm256_testz_si256(_counts_neg,_counts_neg) == 0) { - debug(printf("Exceeded maximum counts\n")); - if (EXTRACT256(_counts_neg,0)) { - inquery[EXTRACT256(_masked,0)] = INQUERY_FALSE; - } - if (EXTRACT256(_counts_neg,1)) { - inquery[EXTRACT256(_masked,1)] = INQUERY_FALSE; - } - if (EXTRACT256(_counts_neg,2)) { - inquery[EXTRACT256(_masked,2)] = INQUERY_FALSE; - } - if (EXTRACT256(_counts_neg,3)) { - inquery[EXTRACT256(_masked,3)] = INQUERY_FALSE; + if (EXTRACT256(_counts,0)) { + masked = EXTRACT256(_masked,0); + if (counts[masked]) { /* Have to re-check if there is a conflict */ + debug(printf("Storing masked %u at %u\n",masked,chrpos - 16)); + table[positions[masked] + (--counts[masked])] = chrpos - 16; } - if (EXTRACT256(_counts_neg,4)) { - inquery[EXTRACT256(_masked,4)] = INQUERY_FALSE; + } + + if (EXTRACT256(_counts,1)) { + masked = EXTRACT256(_masked,1); + if (counts[masked]) { /* Have to re-check if there is a conflict */ + debug(printf("Storing masked %u at %u\n",masked,chrpos - 17)); + table[positions[masked] + (--counts[masked])] = chrpos - 17; } - if (EXTRACT256(_counts_neg,5)) { - inquery[EXTRACT256(_masked,5)] = INQUERY_FALSE; + } + + if (EXTRACT256(_counts,2)) { + masked = EXTRACT256(_masked,2); + if (counts[masked]) { /* Have to re-check if there is a conflict */ + debug(printf("Storing masked %u at %u\n",masked,chrpos - 18)); + table[positions[masked] + (--counts[masked])] = chrpos - 18; } - if (EXTRACT256(_counts_neg,6)) { - inquery[EXTRACT256(_masked,6)] = INQUERY_FALSE; + } + + if (EXTRACT256(_counts,3)) { + masked = EXTRACT256(_masked,3); + if (counts[masked]) { /* Have to re-check if there is a conflict */ + debug(printf("Storing masked %u at %u\n",masked,chrpos - 19)); + table[positions[masked] + (--counts[masked])] = chrpos - 19; } - if (EXTRACT256(_counts_neg,7)) { - inquery[EXTRACT256(_masked,7)] = INQUERY_FALSE; + } + + if (EXTRACT256(_counts,4)) { + masked = EXTRACT256(_masked,4); + if (counts[masked]) { /* Have to re-check if there is a conflict */ + debug(printf("Storing masked %u at %u\n",masked,chrpos - 20)); + table[positions[masked] + (--counts[masked])] = chrpos - 20; } } -#endif /* CHECK_FOR_OVERFLOW */ + if (EXTRACT256(_counts,5)) { + masked = EXTRACT256(_masked,5); + if (counts[masked]) { /* Have to re-check if there is a conflict */ + debug(printf("Storing masked %u at %u\n",masked,chrpos - 21)); + table[positions[masked] + (--counts[masked])] = chrpos - 21; + } + } - _oligo = _mm256_srlv_epi32(_mm256_set1_epi32(high_rev),shift0to14); - _masked = _mm256_and_si256(_oligo, bigmask9); - - masked = EXTRACT256(_masked,0); - counts[masked] += 1; - debug(printf("7 %04X => %d\n",masked,counts[masked])); - - masked = EXTRACT256(_masked,1); - counts[masked] += 1; - debug(printf("6 %04X => %d\n",masked,counts[masked])); - - masked = EXTRACT256(_masked,2); - counts[masked] += 1; - debug(printf("5 %04X => %d\n",masked,counts[masked])); - - masked = EXTRACT256(_masked,3); - counts[masked] += 1; - debug(printf("4 %04X => %d\n",masked,counts[masked])); + if (EXTRACT256(_counts,6)) { + masked = EXTRACT256(_masked,6); + if (counts[masked]) { /* Have to re-check if there is a conflict */ + debug(printf("Storing masked %u at %u\n",masked,chrpos - 22)); + table[positions[masked] + (--counts[masked])] = chrpos - 22; + } + } - masked = EXTRACT256(_masked,4); - counts[masked] += 1; - debug(printf("3 %04X => %d\n",masked,counts[masked])); + if (EXTRACT256(_counts,7)) { + masked = EXTRACT256(_masked,7); + if (counts[masked]) { /* Have to re-check if there is a conflict */ + debug(printf("Storing masked %u at %u\n",masked,chrpos - 23)); + table[positions[masked] + (--counts[masked])] = chrpos - 23; + } + } - masked = EXTRACT256(_masked,5); - counts[masked] += 1; - debug(printf("2 %04X => %d\n",masked,counts[masked])); - masked = EXTRACT256(_masked,6); - counts[masked] += 1; - debug(printf("1 %04X => %d\n",masked,counts[masked])); + _oligo = _mm256_srlv_epi32(_mm256_set1_epi32(high_rev),bigshift0to14); + _masked = _mm256_and_si256(_oligo, bigmask9); - masked = EXTRACT256(_masked,7); - counts[masked] += 1; - debug(printf("0 %04X => %d\n",masked,counts[masked])); + _blocks = _mm256_srli_epi32(_masked,2); /* div by 4 bytes/int */ + _addresses = _mm256_and_si256(_masked,_address_mask); + _addresses = _mm256_slli_epi32(_addresses,3); /* Multiply by 8 bits/byte */ + _envelopes = _mm256_i32gather_epi32((const int *) counts,_blocks,/*scale*/4); + _counts = _mm256_srlv_epi32(_envelopes,_addresses); /* Puts byte on right */ + _counts = _mm256_and_si256(_counts,_count_mask); -#ifdef CHECK_FOR_OVERFLOW - _counts_after = _mm256_i32gather_epi32(counts,_masked,/*shift*/4); - /* _counts_after = _mm256_and_si256(_counts_after,low8); */ - _counts_neg = _mm256_cmpgt_epi32(_counts_after,maxcount256); - if (_mm256_testz_si256(_counts_neg,_counts_neg) == 0) { - debug(printf("Exceeded maximum counts\n")); - if (EXTRACT256(_counts_neg,0)) { - inquery[EXTRACT256(_masked,0)] = INQUERY_FALSE; + if (EXTRACT256(_counts,0)) { + masked = EXTRACT256(_masked,0); + if (counts[masked]) { /* Have to re-check if there is a conflict */ + debug(printf("Storing masked %u at %u\n",masked,chrpos - 24)); + table[positions[masked] + (--counts[masked])] = chrpos - 24; } - if (EXTRACT256(_counts_neg,1)) { - inquery[EXTRACT256(_masked,1)] = INQUERY_FALSE; + } + + if (EXTRACT256(_counts,1)) { + masked = EXTRACT256(_masked,1); + if (counts[masked]) { /* Have to re-check if there is a conflict */ + debug(printf("Storing masked %u at %u\n",masked,chrpos - 25)); + table[positions[masked] + (--counts[masked])] = chrpos - 25; } - if (EXTRACT256(_counts_neg,2)) { - inquery[EXTRACT256(_masked,2)] = INQUERY_FALSE; + } + + if (EXTRACT256(_counts,2)) { + masked = EXTRACT256(_masked,2); + if (counts[masked]) { /* Have to re-check if there is a conflict */ + debug(printf("Storing masked %u at %u\n",masked,chrpos - 26)); + table[positions[masked] + (--counts[masked])] = chrpos - 26; } - if (EXTRACT256(_counts_neg,3)) { - inquery[EXTRACT256(_masked,3)] = INQUERY_FALSE; + } + + if (EXTRACT256(_counts,3)) { + masked = EXTRACT256(_masked,3); + if (counts[masked]) { /* Have to re-check if there is a conflict */ + debug(printf("Storing masked %u at %u\n",masked,chrpos - 27)); + table[positions[masked] + (--counts[masked])] = chrpos - 27; } - if (EXTRACT256(_counts_neg,4)) { - inquery[EXTRACT256(_masked,4)] = INQUERY_FALSE; + } + + if (EXTRACT256(_counts,4)) { + masked = EXTRACT256(_masked,4); + if (counts[masked]) { /* Have to re-check if there is a conflict */ + debug(printf("Storing masked %u at %u\n",masked,chrpos - 28)); + table[positions[masked] + (--counts[masked])] = chrpos - 28; } - if (EXTRACT256(_counts_neg,5)) { - inquery[EXTRACT256(_masked,5)] = INQUERY_FALSE; + } + + if (EXTRACT256(_counts,5)) { + masked = EXTRACT256(_masked,5); + if (counts[masked]) { /* Have to re-check if there is a conflict */ + debug(printf("Storing masked %u at %u\n",masked,chrpos - 29)); + table[positions[masked] + (--counts[masked])] = chrpos - 29; } - if (EXTRACT256(_counts_neg,6)) { - inquery[EXTRACT256(_masked,6)] = INQUERY_FALSE; + } + + if (EXTRACT256(_counts,6)) { + masked = EXTRACT256(_masked,6); + if (counts[masked]) { /* Have to re-check if there is a conflict */ + debug(printf("Storing masked %u at %u\n",masked,chrpos - 30)); + table[positions[masked] + (--counts[masked])] = chrpos - 30; } - if (EXTRACT256(_counts_neg,7)) { - inquery[EXTRACT256(_masked,7)] = INQUERY_FALSE; + } + + if (EXTRACT256(_counts,7)) { + masked = EXTRACT256(_masked,7); + if (counts[masked]) { /* Have to re-check if there is a conflict */ + debug(printf("Storing masked %u at %u\n",masked,chrpos - 31)); + table[positions[masked] + (--counts[masked])] = chrpos - 31; } } -#endif /* CHECK_FOR_OVERFLOW */ - return; + return chrpos - 32; } #endif /* HAVE_AVX2 */ -/* Expecting current to have {high0_rev, low0_rev, high1_rev, - low1_rev}, and next to have {low0_rev, high1_rev, low1_rev, and - high2_rev} */ -#ifdef USE_SIMD_FOR_COUNTS +#if !defined(HAVE_AVX2) + static void -extract_9mers_fwd_simd (__m128i *out, __m128i current, __m128i next) { - __m128i oligo; +count_8mers_fwd_32 (Count_T *counts, Genomecomp_T high_rev, Genomecomp_T low_rev, Genomecomp_T nexthigh_rev) { + Genomecomp_T masked, oligo; +#ifdef INDIVIDUAL_SHIFTS +#elif defined(SIMD_MASK_THEN_STORE) + UINT4 _masked[4] __attribute__ ((aligned (16))); + __m128i _oligo; +#else + __m128i _oligo, _masked; +#endif - _mm_store_si128(out++, _mm_srli_epi32(current,14)); /* No mask necessary */ - _mm_store_si128(out++, _mm_and_si128( _mm_srli_epi32(current,12), mask9)); - _mm_store_si128(out++, _mm_and_si128( _mm_srli_epi32(current,10), mask9)); - _mm_store_si128(out++, _mm_and_si128( _mm_srli_epi32(current,8), mask9)); - _mm_store_si128(out++, _mm_and_si128( _mm_srli_epi32(current,6), mask9)); - _mm_store_si128(out++, _mm_and_si128( _mm_srli_epi32(current,4), mask9)); - _mm_store_si128(out++, _mm_and_si128( _mm_srli_epi32(current,2), mask9)); - _mm_store_si128(out++, _mm_and_si128( current, mask9)); - oligo = _mm_or_si128( _mm_srli_epi32(next,16), _mm_slli_epi32(current,16)); - _mm_store_si128(out++, _mm_and_si128( _mm_srli_epi32(oligo,14), mask9)); - _mm_store_si128(out++, _mm_and_si128( _mm_srli_epi32(oligo,12), mask9)); - _mm_store_si128(out++, _mm_and_si128( _mm_srli_epi32(oligo,10), mask9)); - _mm_store_si128(out++, _mm_and_si128( _mm_srli_epi32(oligo,8), mask9)); - _mm_store_si128(out++, _mm_and_si128( _mm_srli_epi32(oligo,6), mask9)); - _mm_store_si128(out++, _mm_and_si128( _mm_srli_epi32(oligo,4), mask9)); - _mm_store_si128(out++, _mm_and_si128( _mm_srli_epi32(oligo,2), mask9)); - _mm_store_si128(out++, _mm_and_si128( oligo, mask9)); + oligo = nexthigh_rev >> 18; /* For 31..25 */ + oligo |= low_rev << 14; - return; -} +#ifdef INDIVIDUAL_SHIFTS + masked = oligo & MASK8; /* 31 */ + INCR_COUNT(counts[masked]); + debug(printf("31 %04X => %d\n",masked,counts[masked])); + masked = (oligo >> 2) & MASK8; /* 30 */ + INCR_COUNT(counts[masked]); + debug(printf("30 %04X => %d\n",masked,counts[masked])); -#ifdef HAVE_AVX2 -static void -extract_9mers_fwd_simd_128 (__m256i *out, __m256i current, __m256i next) { - __m256i oligo; + masked = (oligo >> 4) & MASK8; /* 29 */ + INCR_COUNT(counts[masked]); + debug(printf("29 %04X => %d\n",masked,counts[masked])); - _mm256_store_si256(out++, _mm256_srli_epi32(current,14)); /* No mask necessary */ - _mm256_store_si256(out++, _mm256_and_si256( _mm256_srli_epi32(current,12), bigmask9)); - _mm256_store_si256(out++, _mm256_and_si256( _mm256_srli_epi32(current,10), bigmask9)); - _mm256_store_si256(out++, _mm256_and_si256( _mm256_srli_epi32(current,8), bigmask9)); - _mm256_store_si256(out++, _mm256_and_si256( _mm256_srli_epi32(current,6), bigmask9)); - _mm256_store_si256(out++, _mm256_and_si256( _mm256_srli_epi32(current,4), bigmask9)); - _mm256_store_si256(out++, _mm256_and_si256( _mm256_srli_epi32(current,2), bigmask9)); - _mm256_store_si256(out++, _mm256_and_si256( current, bigmask9)); + masked = (oligo >> 6) & MASK8; /* 28 */ + INCR_COUNT(counts[masked]); + debug(printf("28 %04X => %d\n",masked,counts[masked])); - oligo = _mm256_or_si256( _mm256_srli_epi32(next,16), _mm256_slli_epi32(current,16)); - _mm256_store_si256(out++, _mm256_and_si256( _mm256_srli_epi32(oligo,14), bigmask9)); - _mm256_store_si256(out++, _mm256_and_si256( _mm256_srli_epi32(oligo,12), bigmask9)); - _mm256_store_si256(out++, _mm256_and_si256( _mm256_srli_epi32(oligo,10), bigmask9)); - _mm256_store_si256(out++, _mm256_and_si256( _mm256_srli_epi32(oligo,8), bigmask9)); - _mm256_store_si256(out++, _mm256_and_si256( _mm256_srli_epi32(oligo,6), bigmask9)); - _mm256_store_si256(out++, _mm256_and_si256( _mm256_srli_epi32(oligo,4), bigmask9)); - _mm256_store_si256(out++, _mm256_and_si256( _mm256_srli_epi32(oligo,2), bigmask9)); - _mm256_store_si256(out++, _mm256_and_si256( oligo, bigmask9)); + masked = (oligo >> 8) & MASK8; /* 27 */ + INCR_COUNT(counts[masked]); + debug(printf("27 %04X => %d\n",masked,counts[masked])); - return; -} -#endif + masked = (oligo >> 10) & MASK8; /* 26 */ + INCR_COUNT(counts[masked]); + debug(printf("26 %04X => %d\n",masked,counts[masked])); + masked = (oligo >> 12) & MASK8; /* 25 */ + INCR_COUNT(counts[masked]); + debug(printf("25 %04X => %d\n",masked,counts[masked])); -static void -count_9mers_fwd_simd (Count_T *counts, Inquery_T *inquery, __m128i current, __m128i next) { - __m128i oligo; -#ifdef HAVE_SSE4_1 - __m128i array; #else - Genomecomp_T array[4]; -#endif -#if defined(HAVE_AVX2) && defined(CHECK_FOR_OVERFLOW) - __m128i _counts_after, _counts_neg; -#endif - -#ifdef HAVE_SSE4_1 - array = _mm_srli_epi32(current,14); /* No mask necessary */ + _oligo = _mm_setr_epi32(oligo, oligo >> 2, oligo >> 4, oligo >> 6); +#ifdef SIMD_MASK_THEN_STORE + _mm_store_si128((__m128i *) _masked,_mm_and_si128(_oligo, mask8)); #else - _mm_store_si128((__m128i *) array, _mm_srli_epi32(current,14)); /* No mask necessary */ + _masked = _mm_and_si128(_oligo, mask8); #endif -#ifdef HAVE_AVX2 - counts[EXTRACT(array,0)] += 1; - counts[EXTRACT(array,1)] += 1; - counts[EXTRACT(array,2)] += 1; - counts[EXTRACT(array,3)] += 1; -#ifdef CHECK_FOR_OVERFLOW - _counts_after = _mm_i32gather_epi32(counts,array,/*scale*/4); - _counts_neg = _mm_cmpgt_epi32(_counts_after,maxcount128); - if (_mm_testz_si128(_counts_neg,_counts_neg) == 0) { - debug(printf("Exceeded maximum counts\n")); - if (EXTRACT(_counts_neg,0)) { - inquery[EXTRACT(array,0)] = INQUERY_FALSE; - } - if (EXTRACT(_counts_neg,1)) { - inquery[EXTRACT(array,1)] = INQUERY_FALSE; - } - if (EXTRACT(_counts_neg,2)) { - inquery[EXTRACT(array,2)] = INQUERY_FALSE; - } - if (EXTRACT(_counts_neg,3)) { - inquery[EXTRACT(array,3)] = INQUERY_FALSE; - } - } -#endif /* CHECK_FOR_OVERFLOW */ - debug(printf("0 %04X => %d\n",array[0],counts[array[0]])); - debug(printf("16 %04X => %d\n",array[1],counts[array[1]])); - debug(printf("32 %04X => %d\n",array[2],counts[array[2]])); - debug(printf("48 %04X => %d\n",array[3],counts[array[3]])); - -#else - INCR_COUNT(counts[EXTRACT(array,0)],inquery[EXTRACT(array,0)]); /* 0 */ - INCR_COUNT(counts[EXTRACT(array,1)],inquery[EXTRACT(array,1)]); /* 16 */ - INCR_COUNT(counts[EXTRACT(array,2)],inquery[EXTRACT(array,2)]); /* 32 */ - INCR_COUNT(counts[EXTRACT(array,3)],inquery[EXTRACT(array,3)]); /* 48 */ -#endif - debug(printf("0 %04X => %d\n",EXTRACT(array,0),counts[EXTRACT(array,0)])); - debug(printf("16 %04X => %d\n",EXTRACT(array,1),counts[EXTRACT(array,1)])); - debug(printf("32 %04X => %d\n",EXTRACT(array,2),counts[EXTRACT(array,2)])); - debug(printf("48 %04X => %d\n",EXTRACT(array,3),counts[EXTRACT(array,3)])); -#ifdef HAVE_SSE4_1 - array = _mm_and_si128( _mm_srli_epi32(current,12), mask9); -#else - _mm_store_si128((__m128i *) array, _mm_and_si128( _mm_srli_epi32(current,12), mask9)); -#endif -#ifdef HAVE_AVX2 - counts[EXTRACT(array,0)] += 1; - counts[EXTRACT(array,1)] += 1; - counts[EXTRACT(array,2)] += 1; - counts[EXTRACT(array,3)] += 1; -#ifdef CHECK_FOR_OVERFLOW - _counts_after = _mm_i32gather_epi32(counts,array,/*scale*/4); - _counts_neg = _mm_cmpgt_epi32(_counts_after,maxcount128); - if (_mm_testz_si128(_counts_neg,_counts_neg) == 0) { - debug(printf("Exceeded maximum counts\n")); - if (EXTRACT(_counts_neg,0)) { - inquery[EXTRACT(array,0)] = INQUERY_FALSE; - } - if (EXTRACT(_counts_neg,1)) { - inquery[EXTRACT(array,1)] = INQUERY_FALSE; - } - if (EXTRACT(_counts_neg,2)) { - inquery[EXTRACT(array,2)] = INQUERY_FALSE; - } - if (EXTRACT(_counts_neg,3)) { - inquery[EXTRACT(array,3)] = INQUERY_FALSE; - } - } -#endif /* CHECK_FOR_OVERFLOW */ - debug(printf("1 %04X => %d\n",array[0],counts[array[0]])); - debug(printf("17 %04X => %d\n",array[1],counts[array[1]])); - debug(printf("33 %04X => %d\n",array[2],counts[array[2]])); - debug(printf("49 %04X => %d\n",array[3],counts[array[3]])); - -#else - INCR_COUNT(counts[EXTRACT(array,0)],inquery[EXTRACT(array,0)]); /* 1 */ - INCR_COUNT(counts[EXTRACT(array,1)],inquery[EXTRACT(array,1)]); /* 17 */ - INCR_COUNT(counts[EXTRACT(array,2)],inquery[EXTRACT(array,2)]); /* 33 */ - INCR_COUNT(counts[EXTRACT(array,3)],inquery[EXTRACT(array,3)]); /* 49 */ -#endif - debug(printf("1 %04X => %d\n",EXTRACT(array,0),counts[EXTRACT(array,0)])); - debug(printf("17 %04X => %d\n",EXTRACT(array,1),counts[EXTRACT(array,1)])); - debug(printf("33 %04X => %d\n",EXTRACT(array,2),counts[EXTRACT(array,2)])); - debug(printf("49 %04X => %d\n",EXTRACT(array,3),counts[EXTRACT(array,3)])); + masked = EXTRACT(_masked,0); + INCR_COUNT(counts[masked]); + debug(printf("31 %04X => %d\n",masked,counts[masked])); -#ifdef HAVE_SSE4_1 - array = _mm_and_si128( _mm_srli_epi32(current,10), mask9); -#else - _mm_store_si128((__m128i *) array, _mm_and_si128( _mm_srli_epi32(current,10), mask9)); -#endif -#ifdef HAVE_AVX2 - counts[EXTRACT(array,0)] += 1; - counts[EXTRACT(array,1)] += 1; - counts[EXTRACT(array,2)] += 1; - counts[EXTRACT(array,3)] += 1; -#ifdef CHECK_FOR_OVERFLOW - _counts_after = _mm_i32gather_epi32(counts,array,/*scale*/4); - _counts_neg = _mm_cmpgt_epi32(_counts_after,maxcount128); - if (_mm_testz_si128(_counts_neg,_counts_neg) == 0) { - debug(printf("Exceeded maximum counts\n")); - if (EXTRACT(_counts_neg,0)) { - inquery[EXTRACT(array,0)] = INQUERY_FALSE; - } - if (EXTRACT(_counts_neg,1)) { - inquery[EXTRACT(array,1)] = INQUERY_FALSE; - } - if (EXTRACT(_counts_neg,2)) { - inquery[EXTRACT(array,2)] = INQUERY_FALSE; - } - if (EXTRACT(_counts_neg,3)) { - inquery[EXTRACT(array,3)] = INQUERY_FALSE; - } - } -#endif /* CHECK_FOR_OVERFLOW */ - debug(printf("2 %04X => %d\n",array[0],counts[array[0]])); - debug(printf("18 %04X => %d\n",array[1],counts[array[1]])); - debug(printf("34 %04X => %d\n",array[2],counts[array[2]])); - debug(printf("50 %04X => %d\n",array[3],counts[array[3]])); - -#else - INCR_COUNT(counts[EXTRACT(array,0)],inquery[EXTRACT(array,0)]); /* 2 */ - INCR_COUNT(counts[EXTRACT(array,1)],inquery[EXTRACT(array,1)]); /* 18 */ - INCR_COUNT(counts[EXTRACT(array,2)],inquery[EXTRACT(array,2)]); /* 34 */ - INCR_COUNT(counts[EXTRACT(array,3)],inquery[EXTRACT(array,3)]); /* 50 */ -#endif - debug(printf("2 %04X => %d\n",EXTRACT(array,0),counts[EXTRACT(array,0)])); - debug(printf("18 %04X => %d\n",EXTRACT(array,1),counts[EXTRACT(array,1)])); - debug(printf("34 %04X => %d\n",EXTRACT(array,2),counts[EXTRACT(array,2)])); - debug(printf("50 %04X => %d\n",EXTRACT(array,3),counts[EXTRACT(array,3)])); + masked = EXTRACT(_masked,1); + INCR_COUNT(counts[masked]); + debug(printf("30 %04X => %d\n",masked,counts[masked])); -#ifdef HAVE_SSE4_1 - array = _mm_and_si128( _mm_srli_epi32(current,8), mask9); -#else - _mm_store_si128((__m128i *) array, _mm_and_si128( _mm_srli_epi32(current,8), mask9)); -#endif -#ifdef HAVE_AVX2 - counts[EXTRACT(array,0)] += 1; - counts[EXTRACT(array,1)] += 1; - counts[EXTRACT(array,2)] += 1; - counts[EXTRACT(array,3)] += 1; -#ifdef CHECK_FOR_OVERFLOW - _counts_after = _mm_i32gather_epi32(counts,array,/*scale*/4); - _counts_neg = _mm_cmpgt_epi32(_counts_after,maxcount128); - if (_mm_testz_si128(_counts_neg,_counts_neg) == 0) { - debug(printf("Exceeded maximum counts\n")); - if (EXTRACT(_counts_neg,0)) { - inquery[EXTRACT(array,0)] = INQUERY_FALSE; - } - if (EXTRACT(_counts_neg,1)) { - inquery[EXTRACT(array,1)] = INQUERY_FALSE; - } - if (EXTRACT(_counts_neg,2)) { - inquery[EXTRACT(array,2)] = INQUERY_FALSE; - } - if (EXTRACT(_counts_neg,3)) { - inquery[EXTRACT(array,3)] = INQUERY_FALSE; - } - } -#endif /* CHECK_FOR_OVERFLOW */ - debug(printf("3 %04X => %d\n",array[0],counts[array[0]])); - debug(printf("19 %04X => %d\n",array[1],counts[array[1]])); - debug(printf("35 %04X => %d\n",array[2],counts[array[2]])); - debug(printf("51 %04X => %d\n",array[3],counts[array[3]])); - -#else - INCR_COUNT(counts[EXTRACT(array,0)],inquery[EXTRACT(array,0)]); /* 3 */ - INCR_COUNT(counts[EXTRACT(array,1)],inquery[EXTRACT(array,1)]); /* 19 */ - INCR_COUNT(counts[EXTRACT(array,2)],inquery[EXTRACT(array,2)]); /* 35 */ - INCR_COUNT(counts[EXTRACT(array,3)],inquery[EXTRACT(array,3)]); /* 51 */ -#endif - debug(printf("3 %04X => %d\n",EXTRACT(array,0),counts[EXTRACT(array,0)])); - debug(printf("19 %04X => %d\n",EXTRACT(array,1),counts[EXTRACT(array,1)])); - debug(printf("35 %04X => %d\n",EXTRACT(array,2),counts[EXTRACT(array,2)])); - debug(printf("51 %04X => %d\n",EXTRACT(array,3),counts[EXTRACT(array,3)])); + masked = EXTRACT(_masked,2); + INCR_COUNT(counts[masked]); + debug(printf("29 %04X => %d\n",masked,counts[masked])); -#ifdef HAVE_SSE4_1 - array = _mm_and_si128( _mm_srli_epi32(current,6), mask9); -#else - _mm_store_si128((__m128i *) array, _mm_and_si128( _mm_srli_epi32(current,6), mask9)); -#endif -#ifdef HAVE_AVX2 - counts[EXTRACT(array,0)] += 1; - counts[EXTRACT(array,1)] += 1; - counts[EXTRACT(array,2)] += 1; - counts[EXTRACT(array,3)] += 1; -#ifdef CHECK_FOR_OVERFLOW - _counts_after = _mm_i32gather_epi32(counts,array,/*scale*/4); - _counts_neg = _mm_cmpgt_epi32(_counts_after,maxcount128); - if (_mm_testz_si128(_counts_neg,_counts_neg) == 0) { - debug(printf("Exceeded maximum counts\n")); - if (EXTRACT(_counts_neg,0)) { - inquery[EXTRACT(array,0)] = INQUERY_FALSE; - } - if (EXTRACT(_counts_neg,1)) { - inquery[EXTRACT(array,1)] = INQUERY_FALSE; - } - if (EXTRACT(_counts_neg,2)) { - inquery[EXTRACT(array,2)] = INQUERY_FALSE; - } - if (EXTRACT(_counts_neg,3)) { - inquery[EXTRACT(array,3)] = INQUERY_FALSE; - } - } -#endif /* CHECK_FOR_OVERFLOW */ - debug(printf("4 %04X => %d\n",array[0],counts[array[0]])); - debug(printf("20 %04X => %d\n",array[1],counts[array[1]])); - debug(printf("36 %04X => %d\n",array[2],counts[array[2]])); - debug(printf("52 %04X => %d\n",array[3],counts[array[3]])); - -#else - INCR_COUNT(counts[EXTRACT(array,0)],inquery[EXTRACT(array,0)]); /* 4 */ - INCR_COUNT(counts[EXTRACT(array,1)],inquery[EXTRACT(array,1)]); /* 20 */ - INCR_COUNT(counts[EXTRACT(array,2)],inquery[EXTRACT(array,2)]); /* 36 */ - INCR_COUNT(counts[EXTRACT(array,3)],inquery[EXTRACT(array,3)]); /* 52 */ -#endif - debug(printf("4 %04X => %d\n",EXTRACT(array,0),counts[EXTRACT(array,0)])); - debug(printf("20 %04X => %d\n",EXTRACT(array,1),counts[EXTRACT(array,1)])); - debug(printf("36 %04X => %d\n",EXTRACT(array,2),counts[EXTRACT(array,2)])); - debug(printf("52 %04X => %d\n",EXTRACT(array,3),counts[EXTRACT(array,3)])); + masked = EXTRACT(_masked,3); + INCR_COUNT(counts[masked]); + debug(printf("28 %04X => %d\n",masked,counts[masked])); -#ifdef HAVE_SSE4_1 - array = _mm_and_si128( _mm_srli_epi32(current,4), mask9); -#else - _mm_store_si128((__m128i *) array, _mm_and_si128( _mm_srli_epi32(current,4), mask9)); -#endif -#ifdef HAVE_AVX2 - counts[EXTRACT(array,0)] += 1; - counts[EXTRACT(array,1)] += 1; - counts[EXTRACT(array,2)] += 1; - counts[EXTRACT(array,3)] += 1; -#ifdef CHECK_FOR_OVERFLOW - _counts_after = _mm_i32gather_epi32(counts,array,/*scale*/4); - _counts_neg = _mm_cmpgt_epi32(_counts_after,maxcount128); - if (_mm_testz_si128(_counts_neg,_counts_neg) == 0) { - debug(printf("Exceeded maximum counts\n")); - if (EXTRACT(_counts_neg,0)) { - inquery[EXTRACT(array,0)] = INQUERY_FALSE; - } - if (EXTRACT(_counts_neg,1)) { - inquery[EXTRACT(array,1)] = INQUERY_FALSE; - } - if (EXTRACT(_counts_neg,2)) { - inquery[EXTRACT(array,2)] = INQUERY_FALSE; - } - if (EXTRACT(_counts_neg,3)) { - inquery[EXTRACT(array,3)] = INQUERY_FALSE; - } - } -#endif /* CHECK_FOR_OVERFLOW */ - debug(printf("5 %04X => %d\n",array[0],counts[array[0]])); - debug(printf("21 %04X => %d\n",array[1],counts[array[1]])); - debug(printf("37 %04X => %d\n",array[2],counts[array[2]])); - debug(printf("53 %04X => %d\n",array[3],counts[array[3]])); - -#else - INCR_COUNT(counts[EXTRACT(array,0)],inquery[EXTRACT(array,0)]); /* 5 */ - INCR_COUNT(counts[EXTRACT(array,1)],inquery[EXTRACT(array,1)]); /* 21 */ - INCR_COUNT(counts[EXTRACT(array,2)],inquery[EXTRACT(array,2)]); /* 37 */ - INCR_COUNT(counts[EXTRACT(array,3)],inquery[EXTRACT(array,3)]); /* 53 */ -#endif - debug(printf("5 %04X => %d\n",EXTRACT(array,0),counts[EXTRACT(array,0)])); - debug(printf("21 %04X => %d\n",EXTRACT(array,1),counts[EXTRACT(array,1)])); - debug(printf("37 %04X => %d\n",EXTRACT(array,2),counts[EXTRACT(array,2)])); - debug(printf("53 %04X => %d\n",EXTRACT(array,3),counts[EXTRACT(array,3)])); -#ifdef HAVE_SSE4_1 - array = _mm_and_si128( _mm_srli_epi32(current,2), mask9); + _oligo = _mm_srli_epi32(_oligo, 8); +#ifdef SIMD_MASK_THEN_STORE + _mm_store_si128((__m128i *) _masked,_mm_and_si128(_oligo, mask8)); #else - _mm_store_si128((__m128i *) array, _mm_and_si128( _mm_srli_epi32(current,2), mask9)); + _masked = _mm_and_si128(_oligo, mask8); #endif -#ifdef HAVE_AVX2 - counts[EXTRACT(array,0)] += 1; - counts[EXTRACT(array,1)] += 1; - counts[EXTRACT(array,2)] += 1; - counts[EXTRACT(array,3)] += 1; -#ifdef CHECK_FOR_OVERFLOW - _counts_after = _mm_i32gather_epi32(counts,array,/*scale*/4); - _counts_neg = _mm_cmpgt_epi32(_counts_after,maxcount128); - if (_mm_testz_si128(_counts_neg,_counts_neg) == 0) { - debug(printf("Exceeded maximum counts\n")); - if (EXTRACT(_counts_neg,0)) { - inquery[EXTRACT(array,0)] = INQUERY_FALSE; - } - if (EXTRACT(_counts_neg,1)) { - inquery[EXTRACT(array,1)] = INQUERY_FALSE; - } - if (EXTRACT(_counts_neg,2)) { - inquery[EXTRACT(array,2)] = INQUERY_FALSE; - } - if (EXTRACT(_counts_neg,3)) { - inquery[EXTRACT(array,3)] = INQUERY_FALSE; - } - } -#endif /* CHECK_FOR_OVERFLOW */ - debug(printf("6 %04X => %d\n",array[0],counts[array[0]])); - debug(printf("22 %04X => %d\n",array[1],counts[array[1]])); - debug(printf("38 %04X => %d\n",array[2],counts[array[2]])); - debug(printf("54 %04X => %d\n",array[3],counts[array[3]])); - -#else - INCR_COUNT(counts[EXTRACT(array,0)],inquery[EXTRACT(array,0)]); /* 6 */ - INCR_COUNT(counts[EXTRACT(array,1)],inquery[EXTRACT(array,1)]); /* 22 */ - INCR_COUNT(counts[EXTRACT(array,2)],inquery[EXTRACT(array,2)]); /* 38 */ - INCR_COUNT(counts[EXTRACT(array,3)],inquery[EXTRACT(array,3)]); /* 54 */ -#endif - debug(printf("6 %04X => %d\n",EXTRACT(array,0),counts[EXTRACT(array,0)])); - debug(printf("22 %04X => %d\n",EXTRACT(array,1),counts[EXTRACT(array,1)])); - debug(printf("38 %04X => %d\n",EXTRACT(array,2),counts[EXTRACT(array,2)])); - debug(printf("54 %04X => %d\n",EXTRACT(array,3),counts[EXTRACT(array,3)])); -#ifdef HAVE_SSE4_1 - array = _mm_and_si128( current, mask9); -#else - _mm_store_si128((__m128i *) array, _mm_and_si128( current, mask9)); -#endif -#ifdef HAVE_AVX2 - counts[EXTRACT(array,0)] += 1; - counts[EXTRACT(array,1)] += 1; - counts[EXTRACT(array,2)] += 1; - counts[EXTRACT(array,3)] += 1; -#ifdef CHECK_FOR_OVERFLOW - _counts_after = _mm_i32gather_epi32(counts,array,/*scale*/4); - _counts_neg = _mm_cmpgt_epi32(_counts_after,maxcount128); - if (_mm_testz_si128(_counts_neg,_counts_neg) == 0) { - debug(printf("Exceeded maximum counts\n")); - if (EXTRACT(_counts_neg,0)) { - inquery[EXTRACT(array,0)] = INQUERY_FALSE; - } - if (EXTRACT(_counts_neg,1)) { - inquery[EXTRACT(array,1)] = INQUERY_FALSE; - } - if (EXTRACT(_counts_neg,2)) { - inquery[EXTRACT(array,2)] = INQUERY_FALSE; - } - if (EXTRACT(_counts_neg,3)) { - inquery[EXTRACT(array,3)] = INQUERY_FALSE; - } - } -#endif /* CHECK_FOR_OVERFLOW */ - debug(printf("7 %04X => %d\n",array[0],counts[array[0]])); - debug(printf("23 %04X => %d\n",array[1],counts[array[1]])); - debug(printf("39 %04X => %d\n",array[2],counts[array[2]])); - debug(printf("55 %04X => %d\n",array[3],counts[array[3]])); - -#else - INCR_COUNT(counts[EXTRACT(array,0)],inquery[EXTRACT(array,0)]); /* 7 */ - INCR_COUNT(counts[EXTRACT(array,1)],inquery[EXTRACT(array,1)]); /* 23 */ - INCR_COUNT(counts[EXTRACT(array,2)],inquery[EXTRACT(array,2)]); /* 39 */ - INCR_COUNT(counts[EXTRACT(array,3)],inquery[EXTRACT(array,3)]); /* 55 */ -#endif - debug(printf("7 %04X => %d\n",EXTRACT(array,0),counts[EXTRACT(array,0)])); - debug(printf("23 %04X => %d\n",EXTRACT(array,1),counts[EXTRACT(array,1)])); - debug(printf("39 %04X => %d\n",EXTRACT(array,2),counts[EXTRACT(array,2)])); - debug(printf("55 %04X => %d\n",EXTRACT(array,3),counts[EXTRACT(array,3)])); + masked = EXTRACT(_masked,0); + INCR_COUNT(counts[masked]); + debug(printf("27 %04X => %d\n",masked,counts[masked])); - oligo = _mm_or_si128( _mm_srli_epi32(next,16), _mm_slli_epi32(current,16)); -#ifdef HAVE_SSE4_1 - array = _mm_and_si128( _mm_srli_epi32(oligo,14), mask9); -#else - _mm_store_si128((__m128i *) array, _mm_and_si128( _mm_srli_epi32(oligo,14), mask9)); -#endif -#ifdef HAVE_AVX2 - counts[EXTRACT(array,0)] += 1; - counts[EXTRACT(array,1)] += 1; - counts[EXTRACT(array,2)] += 1; - counts[EXTRACT(array,3)] += 1; -#ifdef CHECK_FOR_OVERFLOW - _counts_after = _mm_i32gather_epi32(counts,array,/*scale*/4); - _counts_neg = _mm_cmpgt_epi32(_counts_after,maxcount128); - if (_mm_testz_si128(_counts_neg,_counts_neg) == 0) { - debug(printf("Exceeded maximum counts\n")); - if (EXTRACT(_counts_neg,0)) { - inquery[EXTRACT(array,0)] = INQUERY_FALSE; - } - if (EXTRACT(_counts_neg,1)) { - inquery[EXTRACT(array,1)] = INQUERY_FALSE; - } - if (EXTRACT(_counts_neg,2)) { - inquery[EXTRACT(array,2)] = INQUERY_FALSE; - } - if (EXTRACT(_counts_neg,3)) { - inquery[EXTRACT(array,3)] = INQUERY_FALSE; - } - } -#endif /* CHECK_FOR_OVERFLOW */ - debug(printf("8 %04X => %d\n",array[0],counts[array[0]])); - debug(printf("24 %04X => %d\n",array[1],counts[array[1]])); - debug(printf("40 %04X => %d\n",array[2],counts[array[2]])); - debug(printf("56 %04X => %d\n",array[3],counts[array[3]])); - -#else - INCR_COUNT(counts[EXTRACT(array,0)],inquery[EXTRACT(array,0)]); /* 8 */ - INCR_COUNT(counts[EXTRACT(array,1)],inquery[EXTRACT(array,1)]); /* 24 */ - INCR_COUNT(counts[EXTRACT(array,2)],inquery[EXTRACT(array,2)]); /* 40 */ - INCR_COUNT(counts[EXTRACT(array,3)],inquery[EXTRACT(array,3)]); /* 56 */ -#endif - debug(printf("8 %04X => %d\n",EXTRACT(array,0),counts[EXTRACT(array,0)])); - debug(printf("24 %04X => %d\n",EXTRACT(array,1),counts[EXTRACT(array,1)])); - debug(printf("40 %04X => %d\n",EXTRACT(array,2),counts[EXTRACT(array,2)])); - debug(printf("56 %04X => %d\n",EXTRACT(array,3),counts[EXTRACT(array,3)])); + masked = EXTRACT(_masked,1); + INCR_COUNT(counts[masked]); + debug(printf("26 %04X => %d\n",masked,counts[masked])); -#ifdef HAVE_SSE4_1 - array = _mm_and_si128( _mm_srli_epi32(oligo,12), mask9); -#else - _mm_store_si128((__m128i *) array, _mm_and_si128( _mm_srli_epi32(oligo,12), mask9)); + masked = EXTRACT(_masked,2); + INCR_COUNT(counts[masked]); + debug(printf("25 %04X => %d\n",masked,counts[masked])); #endif -#ifdef HAVE_AVX2 - counts[EXTRACT(array,0)] += 1; - counts[EXTRACT(array,1)] += 1; - counts[EXTRACT(array,2)] += 1; - counts[EXTRACT(array,3)] += 1; -#ifdef CHECK_FOR_OVERFLOW - _counts_after = _mm_i32gather_epi32(counts,array,/*scale*/4); - _counts_neg = _mm_cmpgt_epi32(_counts_after,maxcount128); - if (_mm_testz_si128(_counts_neg,_counts_neg) == 0) { - debug(printf("Exceeded maximum counts\n")); - if (EXTRACT(_counts_neg,0)) { - inquery[EXTRACT(array,0)] = INQUERY_FALSE; - } - if (EXTRACT(_counts_neg,1)) { - inquery[EXTRACT(array,1)] = INQUERY_FALSE; - } - if (EXTRACT(_counts_neg,2)) { - inquery[EXTRACT(array,2)] = INQUERY_FALSE; - } - if (EXTRACT(_counts_neg,3)) { - inquery[EXTRACT(array,3)] = INQUERY_FALSE; - } - } -#endif /* CHECK_FOR_OVERFLOW */ - debug(printf("9 %04X => %d\n",array[0],counts[array[0]])); - debug(printf("25 %04X => %d\n",array[1],counts[array[1]])); - debug(printf("41 %04X => %d\n",array[2],counts[array[2]])); - debug(printf("57 %04X => %d\n",array[3],counts[array[3]])); - -#else - INCR_COUNT(counts[EXTRACT(array,0)],inquery[EXTRACT(array,0)]); /* 9 */ - INCR_COUNT(counts[EXTRACT(array,1)],inquery[EXTRACT(array,1)]); /* 25 */ - INCR_COUNT(counts[EXTRACT(array,2)],inquery[EXTRACT(array,2)]); /* 41 */ - INCR_COUNT(counts[EXTRACT(array,3)],inquery[EXTRACT(array,3)]); /* 57 */ -#endif - debug(printf("9 %04X => %d\n",EXTRACT(array,0),counts[EXTRACT(array,0)])); - debug(printf("25 %04X => %d\n",EXTRACT(array,1),counts[EXTRACT(array,1)])); - debug(printf("41 %04X => %d\n",EXTRACT(array,2),counts[EXTRACT(array,2)])); - debug(printf("57 %04X => %d\n",EXTRACT(array,3),counts[EXTRACT(array,3)])); -#ifdef HAVE_SSE4_1 - array = _mm_and_si128( _mm_srli_epi32(oligo,10), mask9); -#else - _mm_store_si128((__m128i *) array, _mm_and_si128( _mm_srli_epi32(oligo,10), mask9)); -#endif -#ifdef HAVE_AVX2 - counts[EXTRACT(array,0)] += 1; - counts[EXTRACT(array,1)] += 1; - counts[EXTRACT(array,2)] += 1; - counts[EXTRACT(array,3)] += 1; -#ifdef CHECK_FOR_OVERFLOW - _counts_after = _mm_i32gather_epi32(counts,array,/*scale*/4); - _counts_neg = _mm_cmpgt_epi32(_counts_after,maxcount128); - if (_mm_testz_si128(_counts_neg,_counts_neg) == 0) { - debug(printf("Exceeded maximum counts\n")); - if (EXTRACT(_counts_neg,0)) { - inquery[EXTRACT(array,0)] = INQUERY_FALSE; - } - if (EXTRACT(_counts_neg,1)) { - inquery[EXTRACT(array,1)] = INQUERY_FALSE; - } - if (EXTRACT(_counts_neg,2)) { - inquery[EXTRACT(array,2)] = INQUERY_FALSE; - } - if (EXTRACT(_counts_neg,3)) { - inquery[EXTRACT(array,3)] = INQUERY_FALSE; - } - } -#endif /* CHECK_FOR_OVERFLOW */ - debug(printf("10 %04X => %d\n",array[0],counts[array[0]])); - debug(printf("26 %04X => %d\n",array[1],counts[array[1]])); - debug(printf("42 %04X => %d\n",array[2],counts[array[2]])); - debug(printf("58 %04X => %d\n",array[3],counts[array[3]])); - -#else - INCR_COUNT(counts[EXTRACT(array,0)],inquery[EXTRACT(array,0)]); /* 10 */ - INCR_COUNT(counts[EXTRACT(array,1)],inquery[EXTRACT(array,1)]); /* 26 */ - INCR_COUNT(counts[EXTRACT(array,2)],inquery[EXTRACT(array,2)]); /* 42 */ - INCR_COUNT(counts[EXTRACT(array,3)],inquery[EXTRACT(array,3)]); /* 58 */ -#endif - debug(printf("10 %04X => %d\n",EXTRACT(array,0),counts[EXTRACT(array,0)])); - debug(printf("26 %04X => %d\n",EXTRACT(array,1),counts[EXTRACT(array,1)])); - debug(printf("42 %04X => %d\n",EXTRACT(array,2),counts[EXTRACT(array,2)])); - debug(printf("58 %04X => %d\n",EXTRACT(array,3),counts[EXTRACT(array,3)])); -#ifdef HAVE_SSE4_1 - array = _mm_and_si128( _mm_srli_epi32(oligo,8), mask9); -#else - _mm_store_si128((__m128i *) array, _mm_and_si128( _mm_srli_epi32(oligo,8), mask9)); -#endif -#ifdef HAVE_AVX2 - counts[EXTRACT(array,0)] += 1; - counts[EXTRACT(array,1)] += 1; - counts[EXTRACT(array,2)] += 1; - counts[EXTRACT(array,3)] += 1; -#ifdef CHECK_FOR_OVERFLOW - _counts_after = _mm_i32gather_epi32(counts,array,/*scale*/4); - _counts_neg = _mm_cmpgt_epi32(_counts_after,maxcount128); - if (_mm_testz_si128(_counts_neg,_counts_neg) == 0) { - debug(printf("Exceeded maximum counts\n")); - if (EXTRACT(_counts_neg,0)) { - inquery[EXTRACT(array,0)] = INQUERY_FALSE; - } - if (EXTRACT(_counts_neg,1)) { - inquery[EXTRACT(array,1)] = INQUERY_FALSE; - } - if (EXTRACT(_counts_neg,2)) { - inquery[EXTRACT(array,2)] = INQUERY_FALSE; - } - if (EXTRACT(_counts_neg,3)) { - inquery[EXTRACT(array,3)] = INQUERY_FALSE; - } - } -#endif /* CHECK_FOR_OVERFLOW */ - debug(printf("11 %04X => %d\n",array[0],counts[array[0]])); - debug(printf("27 %04X => %d\n",array[1],counts[array[1]])); - debug(printf("43 %04X => %d\n",array[2],counts[array[2]])); - debug(printf("59 %04X => %d\n",array[3],counts[array[3]])); - -#else - INCR_COUNT(counts[EXTRACT(array,0)],inquery[EXTRACT(array,0)]); /* 11 */ - INCR_COUNT(counts[EXTRACT(array,1)],inquery[EXTRACT(array,1)]); /* 27 */ - INCR_COUNT(counts[EXTRACT(array,2)],inquery[EXTRACT(array,2)]); /* 43 */ - INCR_COUNT(counts[EXTRACT(array,3)],inquery[EXTRACT(array,3)]); /* 59 */ -#endif - debug(printf("11 %04X => %d\n",EXTRACT(array,0),counts[EXTRACT(array,0)])); - debug(printf("27 %04X => %d\n",EXTRACT(array,1),counts[EXTRACT(array,1)])); - debug(printf("43 %04X => %d\n",EXTRACT(array,2),counts[EXTRACT(array,2)])); - debug(printf("59 %04X => %d\n",EXTRACT(array,3),counts[EXTRACT(array,3)])); +#ifdef INDIVIDUAL_SHIFTS + masked = low_rev & MASK8; /* 24 */ + INCR_COUNT(counts[masked]); + debug(printf("24 %04X => %d\n",masked,counts[masked])); -#ifdef HAVE_SSE4_1 - array = _mm_and_si128( _mm_srli_epi32(oligo,6), mask9); -#else - _mm_store_si128((__m128i *) array, _mm_and_si128( _mm_srli_epi32(oligo,6), mask9)); -#endif -#ifdef HAVE_AVX2 - counts[EXTRACT(array,0)] += 1; - counts[EXTRACT(array,1)] += 1; - counts[EXTRACT(array,2)] += 1; - counts[EXTRACT(array,3)] += 1; -#ifdef CHECK_FOR_OVERFLOW - _counts_after = _mm_i32gather_epi32(counts,array,/*scale*/4); - _counts_neg = _mm_cmpgt_epi32(_counts_after,maxcount128); - if (_mm_testz_si128(_counts_neg,_counts_neg) == 0) { - debug(printf("Exceeded maximum counts\n")); - if (EXTRACT(_counts_neg,0)) { - inquery[EXTRACT(array,0)] = INQUERY_FALSE; - } - if (EXTRACT(_counts_neg,1)) { - inquery[EXTRACT(array,1)] = INQUERY_FALSE; - } - if (EXTRACT(_counts_neg,2)) { - inquery[EXTRACT(array,2)] = INQUERY_FALSE; - } - if (EXTRACT(_counts_neg,3)) { - inquery[EXTRACT(array,3)] = INQUERY_FALSE; - } - } -#endif /* CHECK_FOR_OVERFLOW */ - debug(printf("12 %04X => %d\n",array[0],counts[array[0]])); - debug(printf("28 %04X => %d\n",array[1],counts[array[1]])); - debug(printf("44 %04X => %d\n",array[2],counts[array[2]])); - debug(printf("60 %04X => %d\n",array[3],counts[array[3]])); - -#else - INCR_COUNT(counts[EXTRACT(array,0)],inquery[EXTRACT(array,0)]); /* 12 */ - INCR_COUNT(counts[EXTRACT(array,1)],inquery[EXTRACT(array,1)]); /* 28 */ - INCR_COUNT(counts[EXTRACT(array,2)],inquery[EXTRACT(array,2)]); /* 44 */ - INCR_COUNT(counts[EXTRACT(array,3)],inquery[EXTRACT(array,3)]); /* 60 */ -#endif - debug(printf("12 %04X => %d\n",EXTRACT(array,0),counts[EXTRACT(array,0)])); - debug(printf("28 %04X => %d\n",EXTRACT(array,1),counts[EXTRACT(array,1)])); - debug(printf("44 %04X => %d\n",EXTRACT(array,2),counts[EXTRACT(array,2)])); - debug(printf("60 %04X => %d\n",EXTRACT(array,3),counts[EXTRACT(array,3)])); + masked = (low_rev >> 2) & MASK8; /* 23 */ + INCR_COUNT(counts[masked]); + debug(printf("23 %04X => %d\n",masked,counts[masked])); -#ifdef HAVE_SSE4_1 - array = _mm_and_si128( _mm_srli_epi32(oligo,4), mask9); -#else - _mm_store_si128((__m128i *) array, _mm_and_si128( _mm_srli_epi32(oligo,4), mask9)); -#endif -#ifdef HAVE_AVX2 - counts[EXTRACT(array,0)] += 1; - counts[EXTRACT(array,1)] += 1; - counts[EXTRACT(array,2)] += 1; - counts[EXTRACT(array,3)] += 1; -#ifdef CHECK_FOR_OVERFLOW - _counts_after = _mm_i32gather_epi32(counts,array,/*scale*/4); - _counts_neg = _mm_cmpgt_epi32(_counts_after,maxcount128); - if (_mm_testz_si128(_counts_neg,_counts_neg) == 0) { - debug(printf("Exceeded maximum counts\n")); - if (EXTRACT(_counts_neg,0)) { - inquery[EXTRACT(array,0)] = INQUERY_FALSE; - } - if (EXTRACT(_counts_neg,1)) { - inquery[EXTRACT(array,1)] = INQUERY_FALSE; - } - if (EXTRACT(_counts_neg,2)) { - inquery[EXTRACT(array,2)] = INQUERY_FALSE; - } - if (EXTRACT(_counts_neg,3)) { - inquery[EXTRACT(array,3)] = INQUERY_FALSE; - } - } -#endif /* CHECK_FOR_OVERFLOW */ - debug(printf("13 %04X => %d\n",array[0],counts[array[0]])); - debug(printf("29 %04X => %d\n",array[1],counts[array[1]])); - debug(printf("45 %04X => %d\n",array[2],counts[array[2]])); - debug(printf("61 %04X => %d\n",array[3],counts[array[3]])); - -#else - INCR_COUNT(counts[EXTRACT(array,0)],inquery[EXTRACT(array,0)]); /* 13 */ - INCR_COUNT(counts[EXTRACT(array,1)],inquery[EXTRACT(array,1)]); /* 29 */ - INCR_COUNT(counts[EXTRACT(array,2)],inquery[EXTRACT(array,2)]); /* 45 */ - INCR_COUNT(counts[EXTRACT(array,3)],inquery[EXTRACT(array,3)]); /* 61 */ -#endif - debug(printf("13 %04X => %d\n",EXTRACT(array,0),counts[EXTRACT(array,0)])); - debug(printf("29 %04X => %d\n",EXTRACT(array,1),counts[EXTRACT(array,1)])); - debug(printf("45 %04X => %d\n",EXTRACT(array,2),counts[EXTRACT(array,2)])); - debug(printf("61 %04X => %d\n",EXTRACT(array,3),counts[EXTRACT(array,3)])); + masked = (low_rev >> 4) & MASK8; /* 22 */ + INCR_COUNT(counts[masked]); + debug(printf("22 %04X => %d\n",masked,counts[masked])); -#ifdef HAVE_SSE4_1 - array = _mm_and_si128( _mm_srli_epi32(oligo,2), mask9); -#else - _mm_store_si128((__m128i *) array, _mm_and_si128( _mm_srli_epi32(oligo,2), mask9)); -#endif -#ifdef HAVE_AVX2 - counts[EXTRACT(array,0)] += 1; - counts[EXTRACT(array,1)] += 1; - counts[EXTRACT(array,2)] += 1; - counts[EXTRACT(array,3)] += 1; -#ifdef CHECK_FOR_OVERFLOW - _counts_after = _mm_i32gather_epi32(counts,array,/*scale*/4); - _counts_neg = _mm_cmpgt_epi32(_counts_after,maxcount128); - if (_mm_testz_si128(_counts_neg,_counts_neg) == 0) { - debug(printf("Exceeded maximum counts\n")); - if (EXTRACT(_counts_neg,0)) { - inquery[EXTRACT(array,0)] = INQUERY_FALSE; - } - if (EXTRACT(_counts_neg,1)) { - inquery[EXTRACT(array,1)] = INQUERY_FALSE; - } - if (EXTRACT(_counts_neg,2)) { - inquery[EXTRACT(array,2)] = INQUERY_FALSE; - } - if (EXTRACT(_counts_neg,3)) { - inquery[EXTRACT(array,3)] = INQUERY_FALSE; - } - } -#endif /* CHECK_FOR_OVERFLOW */ - debug(printf("14 %04X => %d\n",array[0],counts[array[0]])); - debug(printf("30 %04X => %d\n",array[1],counts[array[1]])); - debug(printf("46 %04X => %d\n",array[2],counts[array[2]])); - debug(printf("62 %04X => %d\n",array[3],counts[array[3]])); - -#else - INCR_COUNT(counts[EXTRACT(array,0)],inquery[EXTRACT(array,0)]); /* 14 */ - INCR_COUNT(counts[EXTRACT(array,1)],inquery[EXTRACT(array,1)]); /* 30 */ - INCR_COUNT(counts[EXTRACT(array,2)],inquery[EXTRACT(array,2)]); /* 46 */ - INCR_COUNT(counts[EXTRACT(array,3)],inquery[EXTRACT(array,3)]); /* 62 */ -#endif - debug(printf("14 %04X => %d\n",EXTRACT(array,0),counts[EXTRACT(array,0)])); - debug(printf("30 %04X => %d\n",EXTRACT(array,1),counts[EXTRACT(array,1)])); - debug(printf("46 %04X => %d\n",EXTRACT(array,2),counts[EXTRACT(array,2)])); - debug(printf("62 %04X => %d\n",EXTRACT(array,3),counts[EXTRACT(array,3)])); + masked = (low_rev >> 6) & MASK8; /* 21 */ + INCR_COUNT(counts[masked]); + debug(printf("21 %04X => %d\n",masked,counts[masked])); -#ifdef HAVE_SSE4_1 - array = _mm_and_si128( oligo, mask9); -#else - _mm_store_si128((__m128i *) array, _mm_and_si128( oligo, mask9)); -#endif -#ifdef HAVE_AVX2 - counts[EXTRACT(array,0)] += 1; - counts[EXTRACT(array,1)] += 1; - counts[EXTRACT(array,2)] += 1; - counts[EXTRACT(array,3)] += 1; -#ifdef CHECK_FOR_OVERFLOW - _counts_after = _mm_i32gather_epi32(counts,array,/*scale*/4); - _counts_neg = _mm_cmpgt_epi32(_counts_after,maxcount128); - if (_mm_testz_si128(_counts_neg,_counts_neg) == 0) { - debug(printf("Exceeded maximum counts\n")); - if (EXTRACT(_counts_neg,0)) { - inquery[EXTRACT(array,0)] = INQUERY_FALSE; - } - if (EXTRACT(_counts_neg,1)) { - inquery[EXTRACT(array,1)] = INQUERY_FALSE; - } - if (EXTRACT(_counts_neg,2)) { - inquery[EXTRACT(array,2)] = INQUERY_FALSE; - } - if (EXTRACT(_counts_neg,3)) { - inquery[EXTRACT(array,3)] = INQUERY_FALSE; - } - } -#endif /* CHECK_FOR_OVERFLOW */ - debug(printf("15 %04X => %d\n",array[0],counts[array[0]])); - debug(printf("31 %04X => %d\n",array[1],counts[array[1]])); - debug(printf("47 %04X => %d\n",array[2],counts[array[2]])); - debug(printf("63 %04X => %d\n",array[3],counts[array[3]])); - -#else - INCR_COUNT(counts[EXTRACT(array,0)],inquery[EXTRACT(array,0)]); /* 15 */ - INCR_COUNT(counts[EXTRACT(array,1)],inquery[EXTRACT(array,1)]); /* 31 */ - INCR_COUNT(counts[EXTRACT(array,2)],inquery[EXTRACT(array,2)]); /* 47 */ - INCR_COUNT(counts[EXTRACT(array,3)],inquery[EXTRACT(array,3)]); /* 63 */ -#endif - debug(printf("15 %04X => %d\n",EXTRACT(array,0),counts[EXTRACT(array,0)])); - debug(printf("31 %04X => %d\n",EXTRACT(array,1),counts[EXTRACT(array,1)])); - debug(printf("47 %04X => %d\n",EXTRACT(array,2),counts[EXTRACT(array,2)])); - debug(printf("63 %04X => %d\n",EXTRACT(array,3),counts[EXTRACT(array,3)])); + masked = (low_rev >> 8) & MASK8; /* 20 */ + INCR_COUNT(counts[masked]); + debug(printf("20 %04X => %d\n",masked,counts[masked])); - return; -} -#endif + masked = (low_rev >> 10) & MASK8; /* 19 */ + INCR_COUNT(counts[masked]); + debug(printf("19 %04X => %d\n",masked,counts[masked])); + masked = (low_rev >> 12) & MASK8; /* 18 */ + INCR_COUNT(counts[masked]); + debug(printf("18 %04X => %d\n",masked,counts[masked])); -#ifdef HAVE_AVX2 -static void -count_9mers_fwd_simd_128 (Count_T *counts, Inquery_T *inquery, __m256i current, __m256i next) { - __m256i oligo; - __m256i array; -#ifdef CHECK_FOR_OVERFLOW - __m256i _counts_after, _counts_neg; + masked = (low_rev >> 14) & MASK8; /* 17 */ + INCR_COUNT(counts[masked]); + debug(printf("17 %04X => %d\n",masked,counts[masked])); + + masked = low_rev >> 16; /* 16, No mask necessary */ + INCR_COUNT(counts[masked]); + debug(printf("16 %04X => %d\n",masked,counts[masked])); + +#else + _oligo = _mm_setr_epi32(low_rev, low_rev >> 2, low_rev >> 4, low_rev >> 6); +#ifdef SIMD_MASK_THEN_STORE + _mm_store_si128((__m128i *) _masked,_mm_and_si128(_oligo, mask8)); +#else + _masked = _mm_and_si128(_oligo, mask8); #endif - array = _mm256_srli_epi32(current,14); /* No mask necessary */ - counts[EXTRACT256(array,0)] += 1; /* 0 */ - counts[EXTRACT256(array,1)] += 1; /* 16 */ - counts[EXTRACT256(array,2)] += 1; /* 32 */ - counts[EXTRACT256(array,3)] += 1; /* 48 */ - counts[EXTRACT256(array,4)] += 1; /* 64 */ - counts[EXTRACT256(array,5)] += 1; /* 80 */ - counts[EXTRACT256(array,6)] += 1; /* 96 */ - counts[EXTRACT256(array,7)] += 1; /* 112 */ -#ifdef CHECK_FOR_OVERFLOW - _counts_after = _mm256_i32gather_epi32(counts,array,/*scale*/4); - _counts_neg = _mm256_cmpgt_epi32(_counts_after,maxcount256); - if (_mm256_testz_si256(_counts_neg,_counts_neg) == 0) { - debug(printf("Exceeded maximum counts\n")); - if (EXTRACT256(_counts_neg,0)) { - inquery[EXTRACT256(array,0)] = INQUERY_FALSE; - } - if (EXTRACT256(_counts_neg,1)) { - inquery[EXTRACT256(array,1)] = INQUERY_FALSE; - } - if (EXTRACT256(_counts_neg,2)) { - inquery[EXTRACT256(array,2)] = INQUERY_FALSE; - } - if (EXTRACT256(_counts_neg,3)) { - inquery[EXTRACT256(array,3)] = INQUERY_FALSE; - } - if (EXTRACT256(_counts_neg,4)) { - inquery[EXTRACT256(array,4)] = INQUERY_FALSE; - } - if (EXTRACT256(_counts_neg,5)) { - inquery[EXTRACT256(array,5)] = INQUERY_FALSE; - } - if (EXTRACT256(_counts_neg,6)) { - inquery[EXTRACT256(array,6)] = INQUERY_FALSE; - } - if (EXTRACT256(_counts_neg,7)) { - inquery[EXTRACT256(array,7)] = INQUERY_FALSE; - } - } -#endif /* CHECK_FOR_OVERFLOW */ - debug(printf("0 %04X => %d\n",EXTRACT256(array,0),counts[EXTRACT256(array,0)])); - debug(printf("16 %04X => %d\n",EXTRACT256(array,1),counts[EXTRACT256(array,1)])); - debug(printf("32 %04X => %d\n",EXTRACT256(array,2),counts[EXTRACT256(array,2)])); - debug(printf("48 %04X => %d\n",EXTRACT256(array,3),counts[EXTRACT256(array,3)])); - debug(printf("0 %04X => %d\n",EXTRACT256(array,4),counts[EXTRACT256(array,4)])); - debug(printf("16 %04X => %d\n",EXTRACT256(array,5),counts[EXTRACT256(array,5)])); - debug(printf("32 %04X => %d\n",EXTRACT256(array,6),counts[EXTRACT256(array,6)])); - debug(printf("48 %04X => %d\n",EXTRACT256(array,7),counts[EXTRACT256(array,7)])); - - array = _mm256_and_si256( _mm256_srli_epi32(current,12), bigmask9); - counts[EXTRACT256(array,0)] += 1; /* 1 */ - counts[EXTRACT256(array,1)] += 1; /* 17 */ - counts[EXTRACT256(array,2)] += 1; /* 33 */ - counts[EXTRACT256(array,3)] += 1; /* 49 */ - counts[EXTRACT256(array,4)] += 1; /* 65 */ - counts[EXTRACT256(array,5)] += 1; /* 81 */ - counts[EXTRACT256(array,6)] += 1; /* 97 */ - counts[EXTRACT256(array,7)] += 1; /* 113 */ -#ifdef CHECK_FOR_OVERFLOW - _counts_after = _mm256_i32gather_epi32(counts,array,/*scale*/4); - _counts_neg = _mm256_cmpgt_epi32(_counts_after,maxcount256); - if (_mm256_testz_si256(_counts_neg,_counts_neg) == 0) { - debug(printf("Exceeded maximum counts\n")); - if (EXTRACT256(_counts_neg,0)) { - inquery[EXTRACT256(array,0)] = INQUERY_FALSE; - } - if (EXTRACT256(_counts_neg,1)) { - inquery[EXTRACT256(array,1)] = INQUERY_FALSE; - } - if (EXTRACT256(_counts_neg,2)) { - inquery[EXTRACT256(array,2)] = INQUERY_FALSE; - } - if (EXTRACT256(_counts_neg,3)) { - inquery[EXTRACT256(array,3)] = INQUERY_FALSE; - } - if (EXTRACT256(_counts_neg,4)) { - inquery[EXTRACT256(array,4)] = INQUERY_FALSE; - } - if (EXTRACT256(_counts_neg,5)) { - inquery[EXTRACT256(array,5)] = INQUERY_FALSE; - } - if (EXTRACT256(_counts_neg,6)) { - inquery[EXTRACT256(array,6)] = INQUERY_FALSE; - } - if (EXTRACT256(_counts_neg,7)) { - inquery[EXTRACT256(array,7)] = INQUERY_FALSE; - } - } -#endif /* CHECK_FOR_OVERFLOW */ - debug(printf("1 %04X => %d\n",EXTRACT256(array,0),counts[EXTRACT256(array,0)])); - debug(printf("17 %04X => %d\n",EXTRACT256(array,1),counts[EXTRACT256(array,1)])); - debug(printf("33 %04X => %d\n",EXTRACT256(array,2),counts[EXTRACT256(array,2)])); - debug(printf("49 %04X => %d\n",EXTRACT256(array,3),counts[EXTRACT256(array,3)])); - debug(printf("1 %04X => %d\n",EXTRACT256(array,4),counts[EXTRACT256(array,4)])); - debug(printf("17 %04X => %d\n",EXTRACT256(array,5),counts[EXTRACT256(array,5)])); - debug(printf("33 %04X => %d\n",EXTRACT256(array,6),counts[EXTRACT256(array,6)])); - debug(printf("49 %04X => %d\n",EXTRACT256(array,7),counts[EXTRACT256(array,7)])); - - array = _mm256_and_si256( _mm256_srli_epi32(current,10), bigmask9); - counts[EXTRACT256(array,0)] += 1; /* 2 */ - counts[EXTRACT256(array,1)] += 1; /* 18 */ - counts[EXTRACT256(array,2)] += 1; /* 34 */ - counts[EXTRACT256(array,3)] += 1; /* 50 */ - counts[EXTRACT256(array,4)] += 1; /* 66 */ - counts[EXTRACT256(array,5)] += 1; /* 82 */ - counts[EXTRACT256(array,6)] += 1; /* 98 */ - counts[EXTRACT256(array,7)] += 1; /* 114 */ -#ifdef CHECK_FOR_OVERFLOW - _counts_after = _mm256_i32gather_epi32(counts,array,/*scale*/4); - _counts_neg = _mm256_cmpgt_epi32(_counts_after,maxcount256); - if (_mm256_testz_si256(_counts_neg,_counts_neg) == 0) { - debug(printf("Exceeded maximum counts\n")); - if (EXTRACT256(_counts_neg,0)) { - inquery[EXTRACT256(array,0)] = INQUERY_FALSE; - } - if (EXTRACT256(_counts_neg,1)) { - inquery[EXTRACT256(array,1)] = INQUERY_FALSE; - } - if (EXTRACT256(_counts_neg,2)) { - inquery[EXTRACT256(array,2)] = INQUERY_FALSE; - } - if (EXTRACT256(_counts_neg,3)) { - inquery[EXTRACT256(array,3)] = INQUERY_FALSE; - } - if (EXTRACT256(_counts_neg,4)) { - inquery[EXTRACT256(array,4)] = INQUERY_FALSE; - } - if (EXTRACT256(_counts_neg,5)) { - inquery[EXTRACT256(array,5)] = INQUERY_FALSE; - } - if (EXTRACT256(_counts_neg,6)) { - inquery[EXTRACT256(array,6)] = INQUERY_FALSE; - } - if (EXTRACT256(_counts_neg,7)) { - inquery[EXTRACT256(array,7)] = INQUERY_FALSE; - } - } -#endif /* CHECK_FOR_OVERFLOW */ - debug(printf("2 %04X => %d\n",EXTRACT256(array,0),counts[EXTRACT256(array,0)])); - debug(printf("18 %04X => %d\n",EXTRACT256(array,1),counts[EXTRACT256(array,1)])); - debug(printf("34 %04X => %d\n",EXTRACT256(array,2),counts[EXTRACT256(array,2)])); - debug(printf("50 %04X => %d\n",EXTRACT256(array,3),counts[EXTRACT256(array,3)])); - debug(printf("2 %04X => %d\n",EXTRACT256(array,4),counts[EXTRACT256(array,4)])); - debug(printf("18 %04X => %d\n",EXTRACT256(array,5),counts[EXTRACT256(array,5)])); - debug(printf("34 %04X => %d\n",EXTRACT256(array,6),counts[EXTRACT256(array,6)])); - debug(printf("50 %04X => %d\n",EXTRACT256(array,7),counts[EXTRACT256(array,7)])); - - array = _mm256_and_si256( _mm256_srli_epi32(current,8), bigmask9); - counts[EXTRACT256(array,0)] += 1; /* 3 */ - counts[EXTRACT256(array,1)] += 1; /* 19 */ - counts[EXTRACT256(array,2)] += 1; /* 35 */ - counts[EXTRACT256(array,3)] += 1; /* 51 */ - counts[EXTRACT256(array,4)] += 1; /* 67 */ - counts[EXTRACT256(array,5)] += 1; /* 83 */ - counts[EXTRACT256(array,6)] += 1; /* 99 */ - counts[EXTRACT256(array,7)] += 1; /* 115 */ -#ifdef CHECK_FOR_OVERFLOW - _counts_after = _mm256_i32gather_epi32(counts,array,/*scale*/4); - _counts_neg = _mm256_cmpgt_epi32(_counts_after,maxcount256); - if (_mm256_testz_si256(_counts_neg,_counts_neg) == 0) { - debug(printf("Exceeded maximum counts\n")); - if (EXTRACT256(_counts_neg,0)) { - inquery[EXTRACT256(array,0)] = INQUERY_FALSE; - } - if (EXTRACT256(_counts_neg,1)) { - inquery[EXTRACT256(array,1)] = INQUERY_FALSE; - } - if (EXTRACT256(_counts_neg,2)) { - inquery[EXTRACT256(array,2)] = INQUERY_FALSE; - } - if (EXTRACT256(_counts_neg,3)) { - inquery[EXTRACT256(array,3)] = INQUERY_FALSE; - } - if (EXTRACT256(_counts_neg,4)) { - inquery[EXTRACT256(array,4)] = INQUERY_FALSE; - } - if (EXTRACT256(_counts_neg,5)) { - inquery[EXTRACT256(array,5)] = INQUERY_FALSE; - } - if (EXTRACT256(_counts_neg,6)) { - inquery[EXTRACT256(array,6)] = INQUERY_FALSE; - } - if (EXTRACT256(_counts_neg,7)) { - inquery[EXTRACT256(array,7)] = INQUERY_FALSE; - } - } -#endif /* CHECK_FOR_OVERFLOW */ - debug(printf("3 %04X => %d\n",EXTRACT256(array,0),counts[EXTRACT256(array,0)])); - debug(printf("19 %04X => %d\n",EXTRACT256(array,1),counts[EXTRACT256(array,1)])); - debug(printf("35 %04X => %d\n",EXTRACT256(array,2),counts[EXTRACT256(array,2)])); - debug(printf("51 %04X => %d\n",EXTRACT256(array,3),counts[EXTRACT256(array,3)])); - debug(printf("3 %04X => %d\n",EXTRACT256(array,4),counts[EXTRACT256(array,4)])); - debug(printf("19 %04X => %d\n",EXTRACT256(array,5),counts[EXTRACT256(array,5)])); - debug(printf("35 %04X => %d\n",EXTRACT256(array,6),counts[EXTRACT256(array,6)])); - debug(printf("51 %04X => %d\n",EXTRACT256(array,7),counts[EXTRACT256(array,7)])); - - array = _mm256_and_si256( _mm256_srli_epi32(current,6), bigmask9); - counts[EXTRACT256(array,0)] += 1; /* 4 */ - counts[EXTRACT256(array,1)] += 1; /* 20 */ - counts[EXTRACT256(array,2)] += 1; /* 36 */ - counts[EXTRACT256(array,3)] += 1; /* 52 */ - counts[EXTRACT256(array,4)] += 1; /* 68 */ - counts[EXTRACT256(array,5)] += 1; /* 84 */ - counts[EXTRACT256(array,6)] += 1; /* 100 */ - counts[EXTRACT256(array,7)] += 1; /* 116 */ -#ifdef CHECK_FOR_OVERFLOW - _counts_after = _mm256_i32gather_epi32(counts,array,/*scale*/4); - _counts_neg = _mm256_cmpgt_epi32(_counts_after,maxcount256); - if (_mm256_testz_si256(_counts_neg,_counts_neg) == 0) { - debug(printf("Exceeded maximum counts\n")); - if (EXTRACT256(_counts_neg,0)) { - inquery[EXTRACT256(array,0)] = INQUERY_FALSE; - } - if (EXTRACT256(_counts_neg,1)) { - inquery[EXTRACT256(array,1)] = INQUERY_FALSE; - } - if (EXTRACT256(_counts_neg,2)) { - inquery[EXTRACT256(array,2)] = INQUERY_FALSE; - } - if (EXTRACT256(_counts_neg,3)) { - inquery[EXTRACT256(array,3)] = INQUERY_FALSE; - } - if (EXTRACT256(_counts_neg,4)) { - inquery[EXTRACT256(array,4)] = INQUERY_FALSE; - } - if (EXTRACT256(_counts_neg,5)) { - inquery[EXTRACT256(array,5)] = INQUERY_FALSE; - } - if (EXTRACT256(_counts_neg,6)) { - inquery[EXTRACT256(array,6)] = INQUERY_FALSE; - } - if (EXTRACT256(_counts_neg,7)) { - inquery[EXTRACT256(array,7)] = INQUERY_FALSE; - } - } -#endif /* CHECK_FOR_OVERFLOW */ - debug(printf("4 %04X => %d\n",EXTRACT256(array,0),counts[EXTRACT256(array,0)])); - debug(printf("20 %04X => %d\n",EXTRACT256(array,1),counts[EXTRACT256(array,1)])); - debug(printf("36 %04X => %d\n",EXTRACT256(array,2),counts[EXTRACT256(array,2)])); - debug(printf("52 %04X => %d\n",EXTRACT256(array,3),counts[EXTRACT256(array,3)])); - debug(printf("4 %04X => %d\n",EXTRACT256(array,4),counts[EXTRACT256(array,4)])); - debug(printf("20 %04X => %d\n",EXTRACT256(array,5),counts[EXTRACT256(array,5)])); - debug(printf("36 %04X => %d\n",EXTRACT256(array,6),counts[EXTRACT256(array,6)])); - debug(printf("52 %04X => %d\n",EXTRACT256(array,7),counts[EXTRACT256(array,7)])); - - array = _mm256_and_si256( _mm256_srli_epi32(current,4), bigmask9); - counts[EXTRACT256(array,0)] += 1; /* 5 */ - counts[EXTRACT256(array,1)] += 1; /* 21 */ - counts[EXTRACT256(array,2)] += 1; /* 37 */ - counts[EXTRACT256(array,3)] += 1; /* 53 */ - counts[EXTRACT256(array,4)] += 1; /* 69 */ - counts[EXTRACT256(array,5)] += 1; /* 85 */ - counts[EXTRACT256(array,6)] += 1; /* 101 */ - counts[EXTRACT256(array,7)] += 1; /* 117 */ -#ifdef CHECK_FOR_OVERFLOW - _counts_after = _mm256_i32gather_epi32(counts,array,/*scale*/4); - _counts_neg = _mm256_cmpgt_epi32(_counts_after,maxcount256); - if (_mm256_testz_si256(_counts_neg,_counts_neg) == 0) { - debug(printf("Exceeded maximum counts\n")); - if (EXTRACT256(_counts_neg,0)) { - inquery[EXTRACT256(array,0)] = INQUERY_FALSE; - } - if (EXTRACT256(_counts_neg,1)) { - inquery[EXTRACT256(array,1)] = INQUERY_FALSE; - } - if (EXTRACT256(_counts_neg,2)) { - inquery[EXTRACT256(array,2)] = INQUERY_FALSE; - } - if (EXTRACT256(_counts_neg,3)) { - inquery[EXTRACT256(array,3)] = INQUERY_FALSE; - } - if (EXTRACT256(_counts_neg,4)) { - inquery[EXTRACT256(array,4)] = INQUERY_FALSE; - } - if (EXTRACT256(_counts_neg,5)) { - inquery[EXTRACT256(array,5)] = INQUERY_FALSE; - } - if (EXTRACT256(_counts_neg,6)) { - inquery[EXTRACT256(array,6)] = INQUERY_FALSE; - } - if (EXTRACT256(_counts_neg,7)) { - inquery[EXTRACT256(array,7)] = INQUERY_FALSE; - } - } -#endif /* CHECK_FOR_OVERFLOW */ - debug(printf("5 %04X => %d\n",EXTRACT256(array,0),counts[EXTRACT256(array,0)])); - debug(printf("21 %04X => %d\n",EXTRACT256(array,1),counts[EXTRACT256(array,1)])); - debug(printf("37 %04X => %d\n",EXTRACT256(array,2),counts[EXTRACT256(array,2)])); - debug(printf("53 %04X => %d\n",EXTRACT256(array,3),counts[EXTRACT256(array,3)])); - debug(printf("5 %04X => %d\n",EXTRACT256(array,4),counts[EXTRACT256(array,4)])); - debug(printf("21 %04X => %d\n",EXTRACT256(array,5),counts[EXTRACT256(array,5)])); - debug(printf("37 %04X => %d\n",EXTRACT256(array,6),counts[EXTRACT256(array,6)])); - debug(printf("53 %04X => %d\n",EXTRACT256(array,7),counts[EXTRACT256(array,7)])); - - array = _mm256_and_si256( _mm256_srli_epi32(current,2), bigmask9); - counts[EXTRACT256(array,0)] += 1; /* 6 */ - counts[EXTRACT256(array,1)] += 1; /* 22 */ - counts[EXTRACT256(array,2)] += 1; /* 38 */ - counts[EXTRACT256(array,3)] += 1; /* 54 */ - counts[EXTRACT256(array,4)] += 1; /* 70 */ - counts[EXTRACT256(array,5)] += 1; /* 86 */ - counts[EXTRACT256(array,6)] += 1; /* 102 */ - counts[EXTRACT256(array,7)] += 1; /* 118 */ -#ifdef CHECK_FOR_OVERFLOW - _counts_after = _mm256_i32gather_epi32(counts,array,/*scale*/4); - _counts_neg = _mm256_cmpgt_epi32(_counts_after,maxcount256); - if (_mm256_testz_si256(_counts_neg,_counts_neg) == 0) { - debug(printf("Exceeded maximum counts\n")); - if (EXTRACT256(_counts_neg,0)) { - inquery[EXTRACT256(array,0)] = INQUERY_FALSE; - } - if (EXTRACT256(_counts_neg,1)) { - inquery[EXTRACT256(array,1)] = INQUERY_FALSE; - } - if (EXTRACT256(_counts_neg,2)) { - inquery[EXTRACT256(array,2)] = INQUERY_FALSE; - } - if (EXTRACT256(_counts_neg,3)) { - inquery[EXTRACT256(array,3)] = INQUERY_FALSE; - } - if (EXTRACT256(_counts_neg,4)) { - inquery[EXTRACT256(array,4)] = INQUERY_FALSE; - } - if (EXTRACT256(_counts_neg,5)) { - inquery[EXTRACT256(array,5)] = INQUERY_FALSE; - } - if (EXTRACT256(_counts_neg,6)) { - inquery[EXTRACT256(array,6)] = INQUERY_FALSE; - } - if (EXTRACT256(_counts_neg,7)) { - inquery[EXTRACT256(array,7)] = INQUERY_FALSE; - } - } -#endif /* CHECK_FOR_OVERFLOW */ - debug(printf("6 %04X => %d\n",EXTRACT256(array,0),counts[EXTRACT256(array,0)])); - debug(printf("22 %04X => %d\n",EXTRACT256(array,1),counts[EXTRACT256(array,1)])); - debug(printf("38 %04X => %d\n",EXTRACT256(array,2),counts[EXTRACT256(array,2)])); - debug(printf("54 %04X => %d\n",EXTRACT256(array,3),counts[EXTRACT256(array,3)])); - debug(printf("6 %04X => %d\n",EXTRACT256(array,4),counts[EXTRACT256(array,4)])); - debug(printf("22 %04X => %d\n",EXTRACT256(array,5),counts[EXTRACT256(array,5)])); - debug(printf("38 %04X => %d\n",EXTRACT256(array,6),counts[EXTRACT256(array,6)])); - debug(printf("54 %04X => %d\n",EXTRACT256(array,7),counts[EXTRACT256(array,7)])); - - array = _mm256_and_si256( current, bigmask9); - counts[EXTRACT256(array,0)] += 1; /* 7 */ - counts[EXTRACT256(array,1)] += 1; /* 23 */ - counts[EXTRACT256(array,2)] += 1; /* 39 */ - counts[EXTRACT256(array,3)] += 1; /* 55 */ - counts[EXTRACT256(array,4)] += 1; /* 71 */ - counts[EXTRACT256(array,5)] += 1; /* 87 */ - counts[EXTRACT256(array,6)] += 1; /* 103 */ - counts[EXTRACT256(array,7)] += 1; /* 119 */ -#ifdef CHECK_FOR_OVERFLOW - _counts_after = _mm256_i32gather_epi32(counts,array,/*scale*/4); - _counts_neg = _mm256_cmpgt_epi32(_counts_after,maxcount256); - if (_mm256_testz_si256(_counts_neg,_counts_neg) == 0) { - debug(printf("Exceeded maximum counts\n")); - if (EXTRACT256(_counts_neg,0)) { - inquery[EXTRACT256(array,0)] = INQUERY_FALSE; - } - if (EXTRACT256(_counts_neg,1)) { - inquery[EXTRACT256(array,1)] = INQUERY_FALSE; - } - if (EXTRACT256(_counts_neg,2)) { - inquery[EXTRACT256(array,2)] = INQUERY_FALSE; - } - if (EXTRACT256(_counts_neg,3)) { - inquery[EXTRACT256(array,3)] = INQUERY_FALSE; - } - if (EXTRACT256(_counts_neg,4)) { - inquery[EXTRACT256(array,4)] = INQUERY_FALSE; - } - if (EXTRACT256(_counts_neg,5)) { - inquery[EXTRACT256(array,5)] = INQUERY_FALSE; - } - if (EXTRACT256(_counts_neg,6)) { - inquery[EXTRACT256(array,6)] = INQUERY_FALSE; - } - if (EXTRACT256(_counts_neg,7)) { - inquery[EXTRACT256(array,7)] = INQUERY_FALSE; - } - } -#endif /* CHECK_FOR_OVERFLOW */ - debug(printf("7 %04X => %d\n",EXTRACT256(array,0),counts[EXTRACT256(array,0)])); - debug(printf("23 %04X => %d\n",EXTRACT256(array,1),counts[EXTRACT256(array,1)])); - debug(printf("39 %04X => %d\n",EXTRACT256(array,2),counts[EXTRACT256(array,2)])); - debug(printf("55 %04X => %d\n",EXTRACT256(array,3),counts[EXTRACT256(array,3)])); - debug(printf("7 %04X => %d\n",EXTRACT256(array,4),counts[EXTRACT256(array,4)])); - debug(printf("23 %04X => %d\n",EXTRACT256(array,5),counts[EXTRACT256(array,5)])); - debug(printf("39 %04X => %d\n",EXTRACT256(array,6),counts[EXTRACT256(array,6)])); - debug(printf("55 %04X => %d\n",EXTRACT256(array,7),counts[EXTRACT256(array,7)])); + masked = EXTRACT(_masked,0); + INCR_COUNT(counts[masked]); + debug(printf("24 %04X => %d\n",masked,counts[masked])); - oligo = _mm256_or_si256( _mm256_srli_epi32(next,16), _mm256_slli_epi32(current,16)); - array = _mm256_and_si256( _mm256_srli_epi32(oligo,14), bigmask9); - counts[EXTRACT256(array,0)] += 1; /* 8 */ - counts[EXTRACT256(array,1)] += 1; /* 24 */ - counts[EXTRACT256(array,2)] += 1; /* 40 */ - counts[EXTRACT256(array,3)] += 1; /* 56 */ - counts[EXTRACT256(array,4)] += 1; /* 72 */ - counts[EXTRACT256(array,5)] += 1; /* 88 */ - counts[EXTRACT256(array,6)] += 1; /* 104 */ - counts[EXTRACT256(array,7)] += 1; /* 120 */ -#ifdef CHECK_FOR_OVERFLOW - _counts_after = _mm256_i32gather_epi32(counts,array,/*scale*/4); - _counts_neg = _mm256_cmpgt_epi32(_counts_after,maxcount256); - if (_mm256_testz_si256(_counts_neg,_counts_neg) == 0) { - debug(printf("Exceeded maximum counts\n")); - if (EXTRACT256(_counts_neg,0)) { - inquery[EXTRACT256(array,0)] = INQUERY_FALSE; - } - if (EXTRACT256(_counts_neg,1)) { - inquery[EXTRACT256(array,1)] = INQUERY_FALSE; - } - if (EXTRACT256(_counts_neg,2)) { - inquery[EXTRACT256(array,2)] = INQUERY_FALSE; - } - if (EXTRACT256(_counts_neg,3)) { - inquery[EXTRACT256(array,3)] = INQUERY_FALSE; - } - if (EXTRACT256(_counts_neg,4)) { - inquery[EXTRACT256(array,4)] = INQUERY_FALSE; - } - if (EXTRACT256(_counts_neg,5)) { - inquery[EXTRACT256(array,5)] = INQUERY_FALSE; - } - if (EXTRACT256(_counts_neg,6)) { - inquery[EXTRACT256(array,6)] = INQUERY_FALSE; - } - if (EXTRACT256(_counts_neg,7)) { - inquery[EXTRACT256(array,7)] = INQUERY_FALSE; - } - } -#endif /* CHECK_FOR_OVERFLOW */ - debug(printf("8 %04X => %d\n",EXTRACT256(array,0),counts[EXTRACT256(array,0)])); - debug(printf("24 %04X => %d\n",EXTRACT256(array,1),counts[EXTRACT256(array,1)])); - debug(printf("40 %04X => %d\n",EXTRACT256(array,2),counts[EXTRACT256(array,2)])); - debug(printf("56 %04X => %d\n",EXTRACT256(array,3),counts[EXTRACT256(array,3)])); - debug(printf("8 %04X => %d\n",EXTRACT256(array,4),counts[EXTRACT256(array,4)])); - debug(printf("24 %04X => %d\n",EXTRACT256(array,5),counts[EXTRACT256(array,5)])); - debug(printf("40 %04X => %d\n",EXTRACT256(array,6),counts[EXTRACT256(array,6)])); - debug(printf("56 %04X => %d\n",EXTRACT256(array,7),counts[EXTRACT256(array,7)])); - - array = _mm256_and_si256( _mm256_srli_epi32(oligo,12), bigmask9); - counts[EXTRACT256(array,0)] += 1; /* 9 */ - counts[EXTRACT256(array,1)] += 1; /* 25 */ - counts[EXTRACT256(array,2)] += 1; /* 41 */ - counts[EXTRACT256(array,3)] += 1; /* 57 */ - counts[EXTRACT256(array,4)] += 1; /* 73 */ - counts[EXTRACT256(array,5)] += 1; /* 89 */ - counts[EXTRACT256(array,6)] += 1; /* 105 */ - counts[EXTRACT256(array,7)] += 1; /* 121 */ -#ifdef CHECK_FOR_OVERFLOW - _counts_after = _mm256_i32gather_epi32(counts,array,/*scale*/4); - _counts_neg = _mm256_cmpgt_epi32(_counts_after,maxcount256); - if (_mm256_testz_si256(_counts_neg,_counts_neg) == 0) { - debug(printf("Exceeded maximum counts\n")); - if (EXTRACT256(_counts_neg,0)) { - inquery[EXTRACT256(array,0)] = INQUERY_FALSE; - } - if (EXTRACT256(_counts_neg,1)) { - inquery[EXTRACT256(array,1)] = INQUERY_FALSE; - } - if (EXTRACT256(_counts_neg,2)) { - inquery[EXTRACT256(array,2)] = INQUERY_FALSE; - } - if (EXTRACT256(_counts_neg,3)) { - inquery[EXTRACT256(array,3)] = INQUERY_FALSE; - } - if (EXTRACT256(_counts_neg,4)) { - inquery[EXTRACT256(array,4)] = INQUERY_FALSE; - } - if (EXTRACT256(_counts_neg,5)) { - inquery[EXTRACT256(array,5)] = INQUERY_FALSE; - } - if (EXTRACT256(_counts_neg,6)) { - inquery[EXTRACT256(array,6)] = INQUERY_FALSE; - } - if (EXTRACT256(_counts_neg,7)) { - inquery[EXTRACT256(array,7)] = INQUERY_FALSE; - } - } -#endif /* CHECK_FOR_OVERFLOW */ - debug(printf("9 %04X => %d\n",EXTRACT256(array,0),counts[EXTRACT256(array,0)])); - debug(printf("25 %04X => %d\n",EXTRACT256(array,1),counts[EXTRACT256(array,1)])); - debug(printf("41 %04X => %d\n",EXTRACT256(array,2),counts[EXTRACT256(array,2)])); - debug(printf("57 %04X => %d\n",EXTRACT256(array,3),counts[EXTRACT256(array,3)])); - debug(printf("9 %04X => %d\n",EXTRACT256(array,4),counts[EXTRACT256(array,4)])); - debug(printf("25 %04X => %d\n",EXTRACT256(array,5),counts[EXTRACT256(array,5)])); - debug(printf("41 %04X => %d\n",EXTRACT256(array,6),counts[EXTRACT256(array,6)])); - debug(printf("57 %04X => %d\n",EXTRACT256(array,7),counts[EXTRACT256(array,7)])); - - array = _mm256_and_si256( _mm256_srli_epi32(oligo,10), bigmask9); - counts[EXTRACT256(array,0)] += 1; /* 10 */ - counts[EXTRACT256(array,1)] += 1; /* 26 */ - counts[EXTRACT256(array,2)] += 1; /* 42 */ - counts[EXTRACT256(array,3)] += 1; /* 58 */ - counts[EXTRACT256(array,4)] += 1; /* 74 */ - counts[EXTRACT256(array,5)] += 1; /* 90 */ - counts[EXTRACT256(array,6)] += 1; /* 106 */ - counts[EXTRACT256(array,7)] += 1; /* 122 */ -#ifdef CHECK_FOR_OVERFLOW - _counts_after = _mm256_i32gather_epi32(counts,array,/*scale*/4); - _counts_neg = _mm256_cmpgt_epi32(_counts_after,maxcount256); - if (_mm256_testz_si256(_counts_neg,_counts_neg) == 0) { - debug(printf("Exceeded maximum counts\n")); - if (EXTRACT256(_counts_neg,0)) { - inquery[EXTRACT256(array,0)] = INQUERY_FALSE; - } - if (EXTRACT256(_counts_neg,1)) { - inquery[EXTRACT256(array,1)] = INQUERY_FALSE; - } - if (EXTRACT256(_counts_neg,2)) { - inquery[EXTRACT256(array,2)] = INQUERY_FALSE; - } - if (EXTRACT256(_counts_neg,3)) { - inquery[EXTRACT256(array,3)] = INQUERY_FALSE; - } - if (EXTRACT256(_counts_neg,4)) { - inquery[EXTRACT256(array,4)] = INQUERY_FALSE; - } - if (EXTRACT256(_counts_neg,5)) { - inquery[EXTRACT256(array,5)] = INQUERY_FALSE; - } - if (EXTRACT256(_counts_neg,6)) { - inquery[EXTRACT256(array,6)] = INQUERY_FALSE; - } - if (EXTRACT256(_counts_neg,7)) { - inquery[EXTRACT256(array,7)] = INQUERY_FALSE; - } - } -#endif /* CHECK_FOR_OVERFLOW */ - debug(printf("10 %04X => %d\n",EXTRACT256(array,0),counts[EXTRACT256(array,0)])); - debug(printf("26 %04X => %d\n",EXTRACT256(array,1),counts[EXTRACT256(array,1)])); - debug(printf("42 %04X => %d\n",EXTRACT256(array,2),counts[EXTRACT256(array,2)])); - debug(printf("58 %04X => %d\n",EXTRACT256(array,3),counts[EXTRACT256(array,3)])); - debug(printf("10 %04X => %d\n",EXTRACT256(array,4),counts[EXTRACT256(array,4)])); - debug(printf("26 %04X => %d\n",EXTRACT256(array,5),counts[EXTRACT256(array,5)])); - debug(printf("42 %04X => %d\n",EXTRACT256(array,6),counts[EXTRACT256(array,6)])); - debug(printf("58 %04X => %d\n",EXTRACT256(array,7),counts[EXTRACT256(array,7)])); - - array = _mm256_and_si256( _mm256_srli_epi32(oligo,8), bigmask9); - counts[EXTRACT256(array,0)] += 1; /* 11 */ - counts[EXTRACT256(array,1)] += 1; /* 27 */ - counts[EXTRACT256(array,2)] += 1; /* 43 */ - counts[EXTRACT256(array,3)] += 1; /* 59 */ - counts[EXTRACT256(array,4)] += 1; /* 75 */ - counts[EXTRACT256(array,5)] += 1; /* 91 */ - counts[EXTRACT256(array,6)] += 1; /* 107 */ - counts[EXTRACT256(array,7)] += 1; /* 123 */ -#ifdef CHECK_FOR_OVERFLOW - _counts_after = _mm256_i32gather_epi32(counts,array,/*scale*/4); - _counts_neg = _mm256_cmpgt_epi32(_counts_after,maxcount256); - if (_mm256_testz_si256(_counts_neg,_counts_neg) == 0) { - debug(printf("Exceeded maximum counts\n")); - if (EXTRACT256(_counts_neg,0)) { - inquery[EXTRACT256(array,0)] = INQUERY_FALSE; - } - if (EXTRACT256(_counts_neg,1)) { - inquery[EXTRACT256(array,1)] = INQUERY_FALSE; - } - if (EXTRACT256(_counts_neg,2)) { - inquery[EXTRACT256(array,2)] = INQUERY_FALSE; - } - if (EXTRACT256(_counts_neg,3)) { - inquery[EXTRACT256(array,3)] = INQUERY_FALSE; - } - if (EXTRACT256(_counts_neg,4)) { - inquery[EXTRACT256(array,4)] = INQUERY_FALSE; - } - if (EXTRACT256(_counts_neg,5)) { - inquery[EXTRACT256(array,5)] = INQUERY_FALSE; - } - if (EXTRACT256(_counts_neg,6)) { - inquery[EXTRACT256(array,6)] = INQUERY_FALSE; - } - if (EXTRACT256(_counts_neg,7)) { - inquery[EXTRACT256(array,7)] = INQUERY_FALSE; - } - } -#endif /* CHECK_FOR_OVERFLOW */ - debug(printf("11 %04X => %d\n",EXTRACT256(array,0),counts[EXTRACT256(array,0)])); - debug(printf("27 %04X => %d\n",EXTRACT256(array,1),counts[EXTRACT256(array,1)])); - debug(printf("43 %04X => %d\n",EXTRACT256(array,2),counts[EXTRACT256(array,2)])); - debug(printf("59 %04X => %d\n",EXTRACT256(array,3),counts[EXTRACT256(array,3)])); - debug(printf("11 %04X => %d\n",EXTRACT256(array,4),counts[EXTRACT256(array,4)])); - debug(printf("27 %04X => %d\n",EXTRACT256(array,5),counts[EXTRACT256(array,5)])); - debug(printf("43 %04X => %d\n",EXTRACT256(array,6),counts[EXTRACT256(array,6)])); - debug(printf("59 %04X => %d\n",EXTRACT256(array,7),counts[EXTRACT256(array,7)])); - - array = _mm256_and_si256( _mm256_srli_epi32(oligo,6), bigmask9); - counts[EXTRACT256(array,0)] += 1; /* 12 */ - counts[EXTRACT256(array,1)] += 1; /* 28 */ - counts[EXTRACT256(array,2)] += 1; /* 44 */ - counts[EXTRACT256(array,3)] += 1; /* 60 */ - counts[EXTRACT256(array,4)] += 1; /* 76 */ - counts[EXTRACT256(array,5)] += 1; /* 92 */ - counts[EXTRACT256(array,6)] += 1; /* 108 */ - counts[EXTRACT256(array,7)] += 1; /* 124 */ -#ifdef CHECK_FOR_OVERFLOW - _counts_after = _mm256_i32gather_epi32(counts,array,/*scale*/4); - _counts_neg = _mm256_cmpgt_epi32(_counts_after,maxcount256); - if (_mm256_testz_si256(_counts_neg,_counts_neg) == 0) { - debug(printf("Exceeded maximum counts\n")); - if (EXTRACT256(_counts_neg,0)) { - inquery[EXTRACT256(array,0)] = INQUERY_FALSE; - } - if (EXTRACT256(_counts_neg,1)) { - inquery[EXTRACT256(array,1)] = INQUERY_FALSE; - } - if (EXTRACT256(_counts_neg,2)) { - inquery[EXTRACT256(array,2)] = INQUERY_FALSE; - } - if (EXTRACT256(_counts_neg,3)) { - inquery[EXTRACT256(array,3)] = INQUERY_FALSE; - } - if (EXTRACT256(_counts_neg,4)) { - inquery[EXTRACT256(array,4)] = INQUERY_FALSE; - } - if (EXTRACT256(_counts_neg,5)) { - inquery[EXTRACT256(array,5)] = INQUERY_FALSE; - } - if (EXTRACT256(_counts_neg,6)) { - inquery[EXTRACT256(array,6)] = INQUERY_FALSE; - } - if (EXTRACT256(_counts_neg,7)) { - inquery[EXTRACT256(array,7)] = INQUERY_FALSE; - } - } -#endif /* CHECK_FOR_OVERFLOW */ - debug(printf("12 %04X => %d\n",EXTRACT256(array,0),counts[EXTRACT256(array,0)])); - debug(printf("28 %04X => %d\n",EXTRACT256(array,1),counts[EXTRACT256(array,1)])); - debug(printf("44 %04X => %d\n",EXTRACT256(array,2),counts[EXTRACT256(array,2)])); - debug(printf("60 %04X => %d\n",EXTRACT256(array,3),counts[EXTRACT256(array,3)])); - debug(printf("12 %04X => %d\n",EXTRACT256(array,4),counts[EXTRACT256(array,4)])); - debug(printf("28 %04X => %d\n",EXTRACT256(array,5),counts[EXTRACT256(array,5)])); - debug(printf("44 %04X => %d\n",EXTRACT256(array,6),counts[EXTRACT256(array,6)])); - debug(printf("60 %04X => %d\n",EXTRACT256(array,7),counts[EXTRACT256(array,7)])); - - array = _mm256_and_si256( _mm256_srli_epi32(oligo,4), bigmask9); - counts[EXTRACT256(array,0)] += 1; /* 13 */ - counts[EXTRACT256(array,1)] += 1; /* 29 */ - counts[EXTRACT256(array,2)] += 1; /* 45 */ - counts[EXTRACT256(array,3)] += 1; /* 61 */ - counts[EXTRACT256(array,4)] += 1; /* 77 */ - counts[EXTRACT256(array,5)] += 1; /* 93 */ - counts[EXTRACT256(array,6)] += 1; /* 109 */ - counts[EXTRACT256(array,7)] += 1; /* 125 */ -#ifdef CHECK_FOR_OVERFLOW - _counts_after = _mm256_i32gather_epi32(counts,array,/*scale*/4); - _counts_neg = _mm256_cmpgt_epi32(_counts_after,maxcount256); - if (_mm256_testz_si256(_counts_neg,_counts_neg) == 0) { - debug(printf("Exceeded maximum counts\n")); - if (EXTRACT256(_counts_neg,0)) { - inquery[EXTRACT256(array,0)] = INQUERY_FALSE; - } - if (EXTRACT256(_counts_neg,1)) { - inquery[EXTRACT256(array,1)] = INQUERY_FALSE; - } - if (EXTRACT256(_counts_neg,2)) { - inquery[EXTRACT256(array,2)] = INQUERY_FALSE; - } - if (EXTRACT256(_counts_neg,3)) { - inquery[EXTRACT256(array,3)] = INQUERY_FALSE; - } - if (EXTRACT256(_counts_neg,4)) { - inquery[EXTRACT256(array,4)] = INQUERY_FALSE; - } - if (EXTRACT256(_counts_neg,5)) { - inquery[EXTRACT256(array,5)] = INQUERY_FALSE; - } - if (EXTRACT256(_counts_neg,6)) { - inquery[EXTRACT256(array,6)] = INQUERY_FALSE; - } - if (EXTRACT256(_counts_neg,7)) { - inquery[EXTRACT256(array,7)] = INQUERY_FALSE; - } - } -#endif /* CHECK_FOR_OVERFLOW */ - debug(printf("13 %04X => %d\n",EXTRACT256(array,0),counts[EXTRACT256(array,0)])); - debug(printf("29 %04X => %d\n",EXTRACT256(array,1),counts[EXTRACT256(array,1)])); - debug(printf("45 %04X => %d\n",EXTRACT256(array,2),counts[EXTRACT256(array,2)])); - debug(printf("61 %04X => %d\n",EXTRACT256(array,3),counts[EXTRACT256(array,3)])); - debug(printf("13 %04X => %d\n",EXTRACT256(array,4),counts[EXTRACT256(array,4)])); - debug(printf("29 %04X => %d\n",EXTRACT256(array,5),counts[EXTRACT256(array,5)])); - debug(printf("45 %04X => %d\n",EXTRACT256(array,6),counts[EXTRACT256(array,6)])); - debug(printf("61 %04X => %d\n",EXTRACT256(array,7),counts[EXTRACT256(array,7)])); - - array = _mm256_and_si256( _mm256_srli_epi32(oligo,2), bigmask9); - counts[EXTRACT256(array,0)] += 1; /* 14 */ - counts[EXTRACT256(array,1)] += 1; /* 30 */ - counts[EXTRACT256(array,2)] += 1; /* 46 */ - counts[EXTRACT256(array,3)] += 1; /* 62 */ - counts[EXTRACT256(array,4)] += 1; /* 78 */ - counts[EXTRACT256(array,5)] += 1; /* 94 */ - counts[EXTRACT256(array,6)] += 1; /* 110 */ - counts[EXTRACT256(array,7)] += 1; /* 126 */ -#ifdef CHECK_FOR_OVERFLOW - _counts_after = _mm256_i32gather_epi32(counts,array,/*scale*/4); - _counts_neg = _mm256_cmpgt_epi32(_counts_after,maxcount256); - if (_mm256_testz_si256(_counts_neg,_counts_neg) == 0) { - debug(printf("Exceeded maximum counts\n")); - if (EXTRACT256(_counts_neg,0)) { - inquery[EXTRACT256(array,0)] = INQUERY_FALSE; - } - if (EXTRACT256(_counts_neg,1)) { - inquery[EXTRACT256(array,1)] = INQUERY_FALSE; - } - if (EXTRACT256(_counts_neg,2)) { - inquery[EXTRACT256(array,2)] = INQUERY_FALSE; - } - if (EXTRACT256(_counts_neg,3)) { - inquery[EXTRACT256(array,3)] = INQUERY_FALSE; - } - if (EXTRACT256(_counts_neg,4)) { - inquery[EXTRACT256(array,4)] = INQUERY_FALSE; - } - if (EXTRACT256(_counts_neg,5)) { - inquery[EXTRACT256(array,5)] = INQUERY_FALSE; - } - if (EXTRACT256(_counts_neg,6)) { - inquery[EXTRACT256(array,6)] = INQUERY_FALSE; - } - if (EXTRACT256(_counts_neg,7)) { - inquery[EXTRACT256(array,7)] = INQUERY_FALSE; - } - } -#endif /* CHECK_FOR_OVERFLOW */ - debug(printf("14 %04X => %d\n",EXTRACT256(array,0),counts[EXTRACT256(array,0)])); - debug(printf("30 %04X => %d\n",EXTRACT256(array,1),counts[EXTRACT256(array,1)])); - debug(printf("46 %04X => %d\n",EXTRACT256(array,2),counts[EXTRACT256(array,2)])); - debug(printf("62 %04X => %d\n",EXTRACT256(array,3),counts[EXTRACT256(array,3)])); - debug(printf("14 %04X => %d\n",EXTRACT256(array,4),counts[EXTRACT256(array,4)])); - debug(printf("30 %04X => %d\n",EXTRACT256(array,5),counts[EXTRACT256(array,5)])); - debug(printf("46 %04X => %d\n",EXTRACT256(array,6),counts[EXTRACT256(array,6)])); - debug(printf("62 %04X => %d\n",EXTRACT256(array,7),counts[EXTRACT256(array,7)])); - - array = _mm256_and_si256( oligo, bigmask9); - counts[EXTRACT256(array,0)] += 1; /* 15 */ - counts[EXTRACT256(array,1)] += 1; /* 31 */ - counts[EXTRACT256(array,2)] += 1; /* 47 */ - counts[EXTRACT256(array,3)] += 1; /* 63 */ - counts[EXTRACT256(array,4)] += 1; /* 79 */ - counts[EXTRACT256(array,5)] += 1; /* 95 */ - counts[EXTRACT256(array,6)] += 1; /* 111 */ - counts[EXTRACT256(array,7)] += 1; /* 127 */ -#ifdef CHECK_FOR_OVERFLOW - _counts_after = _mm256_i32gather_epi32(counts,array,/*scale*/4); - _counts_neg = _mm256_cmpgt_epi32(_counts_after,maxcount256); - if (_mm256_testz_si256(_counts_neg,_counts_neg) == 0) { - debug(printf("Exceeded maximum counts\n")); - if (EXTRACT256(_counts_neg,0)) { - inquery[EXTRACT256(array,0)] = INQUERY_FALSE; - } - if (EXTRACT256(_counts_neg,1)) { - inquery[EXTRACT256(array,1)] = INQUERY_FALSE; - } - if (EXTRACT256(_counts_neg,2)) { - inquery[EXTRACT256(array,2)] = INQUERY_FALSE; - } - if (EXTRACT256(_counts_neg,3)) { - inquery[EXTRACT256(array,3)] = INQUERY_FALSE; - } - if (EXTRACT256(_counts_neg,4)) { - inquery[EXTRACT256(array,4)] = INQUERY_FALSE; - } - if (EXTRACT256(_counts_neg,5)) { - inquery[EXTRACT256(array,5)] = INQUERY_FALSE; - } - if (EXTRACT256(_counts_neg,6)) { - inquery[EXTRACT256(array,6)] = INQUERY_FALSE; - } - if (EXTRACT256(_counts_neg,7)) { - inquery[EXTRACT256(array,7)] = INQUERY_FALSE; - } - } -#endif /* CHECK_FOR_OVERFLOW */ - debug(printf("15 %04X => %d\n",EXTRACT256(array,0),counts[EXTRACT256(array,0)])); - debug(printf("31 %04X => %d\n",EXTRACT256(array,1),counts[EXTRACT256(array,1)])); - debug(printf("47 %04X => %d\n",EXTRACT256(array,2),counts[EXTRACT256(array,2)])); - debug(printf("63 %04X => %d\n",EXTRACT256(array,3),counts[EXTRACT256(array,3)])); - debug(printf("15 %04X => %d\n",EXTRACT256(array,4),counts[EXTRACT256(array,4)])); - debug(printf("31 %04X => %d\n",EXTRACT256(array,5),counts[EXTRACT256(array,5)])); - debug(printf("47 %04X => %d\n",EXTRACT256(array,6),counts[EXTRACT256(array,6)])); - debug(printf("63 %04X => %d\n",EXTRACT256(array,7),counts[EXTRACT256(array,7)])); + masked = EXTRACT(_masked,1); + INCR_COUNT(counts[masked]); + debug(printf("23 %04X => %d\n",masked,counts[masked])); - return; -} + masked = EXTRACT(_masked,2); + INCR_COUNT(counts[masked]); + debug(printf("22 %04X => %d\n",masked,counts[masked])); + + masked = EXTRACT(_masked,3); + INCR_COUNT(counts[masked]); + debug(printf("21 %04X => %d\n",masked,counts[masked])); + + + _oligo = _mm_srli_epi32(_oligo, 8); +#ifdef SIMD_MASK_THEN_STORE + _mm_store_si128((__m128i *) _masked,_mm_and_si128(_oligo, mask8)); +#else + _masked = _mm_and_si128(_oligo, mask8); #endif + masked = EXTRACT(_masked,0); + INCR_COUNT(counts[masked]); + debug(printf("20 %04X => %d\n",masked,counts[masked])); + masked = EXTRACT(_masked,1); + INCR_COUNT(counts[masked]); + debug(printf("19 %04X => %d\n",masked,counts[masked])); -#if !defined(HAVE_AVX2) + masked = EXTRACT(_masked,2); + INCR_COUNT(counts[masked]); + debug(printf("18 %04X => %d\n",masked,counts[masked])); -static int -store_9mers_fwd (Chrpos_T chrpos, Chrpos_T *table, UINT4 *pointers, UINT4 *positions, Count_T *counts, - Genomecomp_T high_rev, Genomecomp_T low_rev, Genomecomp_T nexthigh_rev) { - Genomecomp_T masked, oligo; -#ifdef INDIVIDUAL_SHIFTS -#elif defined(SIMD_MASK_THEN_STORE) - UINT4 _masked[4] __attribute__ ((aligned (16))); - __m128i _oligo; -#else - __m128i _oligo, _masked; + masked = EXTRACT(_masked,3); + INCR_COUNT(counts[masked]); + debug(printf("17 %04X => %d\n",masked,counts[masked])); + + + masked = low_rev >> 16; /* 16, No mask necessary */ + INCR_COUNT(counts[masked]); + debug(printf("16 %04X => %d\n",masked,counts[masked])); #endif - oligo = nexthigh_rev >> 16; /* For 31..24 */ - oligo |= low_rev << 16; + oligo = low_rev >> 18; /* For 15..9 */ + oligo |= high_rev << 14; #ifdef INDIVIDUAL_SHIFTS - masked = oligo & MASK9; /* 31 */ - if (counts[masked]) { - assert(pointers[masked] > positions[masked]); - debug(printf("Storing masked %u at %u\n",masked,chrpos)); - table[--pointers[masked]] = chrpos; - } - - masked = (oligo >> 2) & MASK9; /* 30 */ - if (counts[masked]) { - assert(pointers[masked] > positions[masked]); - debug(printf("Storing masked %u at %u\n",masked,chrpos - 1)); - table[--pointers[masked]] = chrpos - 1; - } + masked = oligo & MASK8; /* 15 */ + INCR_COUNT(counts[masked]); + debug(printf("15 %04X => %d\n",masked,counts[masked])); - masked = (oligo >> 4) & MASK9; /* 29 */ - if (counts[masked]) { - assert(pointers[masked] > positions[masked]); - debug(printf("Storing masked %u at %u\n",masked,chrpos - 2)); - table[--pointers[masked]] = chrpos - 2; - } + masked = (oligo >> 2) & MASK8; /* 14 */ + INCR_COUNT(counts[masked]); + debug(printf("14 %04X => %d\n",masked,counts[masked])); - masked = (oligo >> 6) & MASK9; /* 28 */ - if (counts[masked]) { - assert(pointers[masked] > positions[masked]); - debug(printf("Storing masked %u at %u\n",masked,chrpos - 3)); - table[--pointers[masked]] = chrpos - 3; - } + masked = (oligo >> 4) & MASK8; /* 13 */ + INCR_COUNT(counts[masked]); + debug(printf("13 %04X => %d\n",masked,counts[masked])); - masked = (oligo >> 8) & MASK9; /* 27 */ - if (counts[masked]) { - assert(pointers[masked] > positions[masked]); - debug(printf("Storing masked %u at %u\n",masked,chrpos - 4)); - table[--pointers[masked]] = chrpos - 4; - } + masked = (oligo >> 6) & MASK8; /* 12 */ + INCR_COUNT(counts[masked]); + debug(printf("12 %04X => %d\n",masked,counts[masked])); - masked = (oligo >> 10) & MASK9; /* 26 */ - if (counts[masked]) { - assert(pointers[masked] > positions[masked]); - debug(printf("Storing masked %u at %u\n",masked,chrpos - 5)); - table[--pointers[masked]] = chrpos - 5; - } + masked = (oligo >> 8) & MASK8; /* 11 */ + INCR_COUNT(counts[masked]); + debug(printf("11 %04X => %d\n",masked,counts[masked])); - masked = (oligo >> 12) & MASK9; /* 25 */ - if (counts[masked]) { - assert(pointers[masked] > positions[masked]); - debug(printf("Storing masked %u at %u\n",masked,chrpos - 6)); - table[--pointers[masked]] = chrpos - 6; - } + masked = (oligo >> 10) & MASK8; /* 10 */ + INCR_COUNT(counts[masked]); + debug(printf("10 %04X => %d\n",masked,counts[masked])); - masked = (oligo >> 14) & MASK9; /* 24 */ - if (counts[masked]) { - assert(pointers[masked] > positions[masked]); - debug(printf("Storing masked %u at %u\n",masked,chrpos - 7)); - table[--pointers[masked]] = chrpos - 7; - } + masked = (oligo >> 12) & MASK8; /* 9 */ + INCR_COUNT(counts[masked]); + debug(printf("9 %04X => %d\n",masked,counts[masked])); #else _oligo = _mm_setr_epi32(oligo, oligo >> 2, oligo >> 4, oligo >> 6); #ifdef SIMD_MASK_THEN_STORE - _mm_store_si128((__m128i *) _masked,_mm_and_si128(_oligo, mask9)); + _mm_store_si128((__m128i *) _masked,_mm_and_si128(_oligo, mask8)); #else - _masked = _mm_and_si128(_oligo, mask9); + _masked = _mm_and_si128(_oligo, mask8); #endif masked = EXTRACT(_masked,0); - if (counts[masked]) { - assert(pointers[masked] > positions[masked]); - debug(printf("Storing masked %u at %u\n",masked,chrpos)); - table[--pointers[masked]] = chrpos; - } + INCR_COUNT(counts[masked]); + debug(printf("15 %04X => %d\n",masked,counts[masked])); masked = EXTRACT(_masked,1); - if (counts[masked]) { - assert(pointers[masked] > positions[masked]); - debug(printf("Storing masked %u at %u\n",masked,chrpos - 1)); - table[--pointers[masked]] = chrpos - 1; - } + INCR_COUNT(counts[masked]); + debug(printf("14 %04X => %d\n",masked,counts[masked])); masked = EXTRACT(_masked,2); - if (counts[masked]) { - assert(pointers[masked] > positions[masked]); - debug(printf("Storing masked %u at %u\n",masked,chrpos - 2)); - table[--pointers[masked]] = chrpos - 2; - } + INCR_COUNT(counts[masked]); + debug(printf("13 %04X => %d\n",masked,counts[masked])); masked = EXTRACT(_masked,3); - if (counts[masked]) { - assert(pointers[masked] > positions[masked]); - debug(printf("Storing masked %u at %u\n",masked,chrpos - 3)); - table[--pointers[masked]] = chrpos - 3; - } + INCR_COUNT(counts[masked]); + debug(printf("12 %04X => %d\n",masked,counts[masked])); _oligo = _mm_srli_epi32(_oligo, 8); #ifdef SIMD_MASK_THEN_STORE - _mm_store_si128((__m128i *) _masked,_mm_and_si128(_oligo, mask9)); + _mm_store_si128((__m128i *) _masked,_mm_and_si128(_oligo, mask8)); #else - _masked = _mm_and_si128(_oligo, mask9); + _masked = _mm_and_si128(_oligo, mask8); #endif masked = EXTRACT(_masked,0); - if (counts[masked]) { - assert(pointers[masked] > positions[masked]); - debug(printf("Storing masked %u at %u\n",masked,chrpos - 4)); - table[--pointers[masked]] = chrpos - 4; - } + INCR_COUNT(counts[masked]); + debug(printf("11 %04X => %d\n",masked,counts[masked])); masked = EXTRACT(_masked,1); - if (counts[masked]) { - assert(pointers[masked] > positions[masked]); - debug(printf("Storing masked %u at %u\n",masked,chrpos - 5)); - table[--pointers[masked]] = chrpos - 5; - } + INCR_COUNT(counts[masked]); + debug(printf("10 %04X => %d\n",masked,counts[masked])); masked = EXTRACT(_masked,2); - if (counts[masked]) { - assert(pointers[masked] > positions[masked]); - debug(printf("Storing masked %u at %u\n",masked,chrpos - 6)); - table[--pointers[masked]] = chrpos - 6; - } - - masked = EXTRACT(_masked,3); - if (counts[masked]) { - assert(pointers[masked] > positions[masked]); - debug(printf("Storing masked %u at %u\n",masked,chrpos - 7)); - table[--pointers[masked]] = chrpos - 7; - } + INCR_COUNT(counts[masked]); + debug(printf("9 %04X => %d\n",masked,counts[masked])); #endif #ifdef INDIVIDUAL_SHIFTS - masked = low_rev & MASK9; /* 23 */ - if (counts[masked]) { - assert(pointers[masked] > positions[masked]); - debug(printf("Storing masked %u at %u\n",masked,chrpos - 8)); - table[--pointers[masked]] = chrpos - 8; - } - - masked = (low_rev >> 2) & MASK9; /* 22 */ - if (counts[masked]) { - assert(pointers[masked] > positions[masked]); - debug(printf("Storing masked %u at %u\n",masked,chrpos - 9)); - table[--pointers[masked]] = chrpos - 9; - } + masked = high_rev & MASK8; /* 8 */ + INCR_COUNT(counts[masked]); + debug(printf("8 %04X => %d\n",masked,counts[masked])); - masked = (low_rev >> 4) & MASK9; /* 21 */ - if (counts[masked]) { - assert(pointers[masked] > positions[masked]); - debug(printf("Storing masked %u at %u\n",masked,chrpos - 10)); - table[--pointers[masked]] = chrpos - 10; - } + masked = (high_rev >> 2) & MASK8; /* 7 */ + INCR_COUNT(counts[masked]); + debug(printf("7 %04X => %d\n",masked,counts[masked])); - masked = (low_rev >> 6) & MASK9; /* 20 */ - if (counts[masked]) { - assert(pointers[masked] > positions[masked]); - debug(printf("Storing masked %u at %u\n",masked,chrpos - 11)); - table[--pointers[masked]] = chrpos - 11; - } + masked = (high_rev >> 4) & MASK8; /* 6 */ + INCR_COUNT(counts[masked]); + debug(printf("6 %04X => %d\n",masked,counts[masked])); - masked = (low_rev >> 8) & MASK9; /* 19 */ - if (counts[masked]) { - assert(pointers[masked] > positions[masked]); - debug(printf("Storing masked %u at %u\n",masked,chrpos - 12)); - table[--pointers[masked]] = chrpos - 12; - } + masked = (high_rev >> 6) & MASK8; /* 5 */ + INCR_COUNT(counts[masked]); + debug(printf("5 %04X => %d\n",masked,counts[masked])); + + masked = (high_rev >> 8) & MASK8; /* 4 */ + INCR_COUNT(counts[masked]); + debug(printf("4 %04X => %d\n",masked,counts[masked])); - masked = (low_rev >> 10) & MASK9; /* 18 */ - if (counts[masked]) { - assert(pointers[masked] > positions[masked]); - debug(printf("Storing masked %u at %u\n",masked,chrpos - 13)); - table[--pointers[masked]] = chrpos - 13; - } + masked = (high_rev >> 10) & MASK8; /* 3 */ + INCR_COUNT(counts[masked]); + debug(printf("3 %04X => %d\n",masked,counts[masked])); - masked = (low_rev >> 12) & MASK9; /* 17 */ - if (counts[masked]) { - assert(pointers[masked] > positions[masked]); - debug(printf("Storing masked %u at %u\n",masked,chrpos - 14)); - table[--pointers[masked]] = chrpos - 14; - } + masked = (high_rev >> 12) & MASK8; /* 2 */ + INCR_COUNT(counts[masked]); + debug(printf("2 %04X => %d\n",masked,counts[masked])); - masked = low_rev >> 14; /* 16, No mask necessary */ - if (counts[masked]) { - assert(pointers[masked] > positions[masked]); - debug(printf("Storing masked %u at %u\n",masked,chrpos - 15)); - table[--pointers[masked]] = chrpos - 15; - } + masked = (high_rev >> 14) & MASK8; /* 1 */ + INCR_COUNT(counts[masked]); + debug(printf("1 %04X => %d\n",masked,counts[masked])); + masked = high_rev >> 16; /* 0, No mask necessary */ + INCR_COUNT(counts[masked]); + debug(printf("0 %04X => %d\n",masked,counts[masked])); + #else - _oligo = _mm_setr_epi32(low_rev, low_rev >> 2, low_rev >> 4, low_rev >> 6); + _oligo = _mm_setr_epi32(high_rev, high_rev >> 2, high_rev >> 4, high_rev >> 6); #ifdef SIMD_MASK_THEN_STORE - _mm_store_si128((__m128i *) _masked,_mm_and_si128(_oligo, mask9)); + _mm_store_si128((__m128i *) _masked,_mm_and_si128(_oligo, mask8)); #else - _masked = _mm_and_si128(_oligo, mask9); + _masked = _mm_and_si128(_oligo, mask8); #endif masked = EXTRACT(_masked,0); - if (counts[masked]) { - assert(pointers[masked] > positions[masked]); - debug(printf("Storing masked %u at %u\n",masked,chrpos - 8)); - table[--pointers[masked]] = chrpos - 8; - } + INCR_COUNT(counts[masked]); + debug(printf("8 %04X => %d\n",masked,counts[masked])); masked = EXTRACT(_masked,1); - if (counts[masked]) { - assert(pointers[masked] > positions[masked]); - debug(printf("Storing masked %u at %u\n",masked,chrpos - 9)); - table[--pointers[masked]] = chrpos - 9; - } + INCR_COUNT(counts[masked]); + debug(printf("7 %04X => %d\n",masked,counts[masked])); masked = EXTRACT(_masked,2); - if (counts[masked]) { - assert(pointers[masked] > positions[masked]); - debug(printf("Storing masked %u at %u\n",masked,chrpos - 10)); - table[--pointers[masked]] = chrpos - 10; - } + INCR_COUNT(counts[masked]); + debug(printf("6 %04X => %d\n",masked,counts[masked])); masked = EXTRACT(_masked,3); - if (counts[masked]) { - assert(pointers[masked] > positions[masked]); - debug(printf("Storing masked %u at %u\n",masked,chrpos - 11)); - table[--pointers[masked]] = chrpos - 11; - } + INCR_COUNT(counts[masked]); + debug(printf("5 %04X => %d\n",masked,counts[masked])); _oligo = _mm_srli_epi32(_oligo, 8); #ifdef SIMD_MASK_THEN_STORE - _mm_store_si128((__m128i *) _masked,_mm_and_si128(_oligo, mask9)); + _mm_store_si128((__m128i *) _masked,_mm_and_si128(_oligo, mask8)); #else - _masked = _mm_and_si128(_oligo, mask9); + _masked = _mm_and_si128(_oligo, mask8); #endif masked = EXTRACT(_masked,0); - if (counts[masked]) { - assert(pointers[masked] > positions[masked]); - debug(printf("Storing masked %u at %u\n",masked,chrpos - 12)); - table[--pointers[masked]] = chrpos - 12; - } + INCR_COUNT(counts[masked]); + debug(printf("4 %04X => %d\n",masked,counts[masked])); masked = EXTRACT(_masked,1); - if (counts[masked]) { - assert(pointers[masked] > positions[masked]); - debug(printf("Storing masked %u at %u\n",masked,chrpos - 13)); - table[--pointers[masked]] = chrpos - 13; - } + INCR_COUNT(counts[masked]); + debug(printf("3 %04X => %d\n",masked,counts[masked])); masked = EXTRACT(_masked,2); - if (counts[masked]) { - assert(pointers[masked] > positions[masked]); - debug(printf("Storing masked %u at %u\n",masked,chrpos - 14)); - table[--pointers[masked]] = chrpos - 14; - } + INCR_COUNT(counts[masked]); + debug(printf("2 %04X => %d\n",masked,counts[masked])); masked = EXTRACT(_masked,3); - if (counts[masked]) { - assert(pointers[masked] > positions[masked]); - debug(printf("Storing masked %u at %u\n",masked,chrpos - 15)); - table[--pointers[masked]] = chrpos - 15; - } -#endif + INCR_COUNT(counts[masked]); + debug(printf("1 %04X => %d\n",masked,counts[masked])); - oligo = low_rev >> 16; /* For 15..8 */ - oligo |= high_rev << 16; + masked = high_rev >> 16; /* 0, No mask necessary */ + INCR_COUNT(counts[masked]); + debug(printf("0 %04X => %d\n",masked,counts[masked])); +#endif -#ifdef INDIVIDUAL_SHIFTS - masked = oligo & MASK9; /* 15 */ - if (counts[masked]) { - assert(pointers[masked] > positions[masked]); - debug(printf("Storing masked %u at %u\n",masked,chrpos - 16)); - table[--pointers[masked]] = chrpos - 16; - } + return; +} - masked = (oligo >> 2) & MASK9; /* 14 */ - if (counts[masked]) { - assert(pointers[masked] > positions[masked]); - debug(printf("Storing masked %u at %u\n",masked,chrpos - 17)); - table[--pointers[masked]] = chrpos - 17; - } +#else /* HAVE_AVX2 */ - masked = (oligo >> 4) & MASK9; /* 13 */ - if (counts[masked]) { - assert(pointers[masked] > positions[masked]); - debug(printf("Storing masked %u at %u\n",masked,chrpos - 18)); - table[--pointers[masked]] = chrpos - 18; - } +static void +count_8mers_fwd_32 (Count_T *counts, Genomecomp_T high_rev, Genomecomp_T low_rev, Genomecomp_T nexthigh_rev) { + Genomecomp_T masked, oligo; + __m256i _oligo, _masked; - masked = (oligo >> 6) & MASK9; /* 12 */ - if (counts[masked]) { - assert(pointers[masked] > positions[masked]); - debug(printf("Storing masked %u at %u\n",masked,chrpos - 19)); - table[--pointers[masked]] = chrpos - 19; - } - masked = (oligo >> 8) & MASK9; /* 11 */ - if (counts[masked]) { - assert(pointers[masked] > positions[masked]); - debug(printf("Storing masked %u at %u\n",masked,chrpos - 20)); - table[--pointers[masked]] = chrpos - 20; - } + oligo = nexthigh_rev >> 18; /* For 31..25 */ + oligo |= low_rev << 14; - masked = (oligo >> 10) & MASK9; /* 10 */ - if (counts[masked]) { - assert(pointers[masked] > positions[masked]); - debug(printf("Storing masked %u at %u\n",masked,chrpos - 21)); - table[--pointers[masked]] = chrpos - 21; - } + _oligo = _mm256_srlv_epi32(_mm256_set1_epi32(oligo),bigshift0to14); + _masked = _mm256_and_si256(_oligo, bigmask8); - masked = (oligo >> 12) & MASK9; /* 9 */ - if (counts[masked]) { - assert(pointers[masked] > positions[masked]); - debug(printf("Storing masked %u at %u\n",masked,chrpos - 22)); - table[--pointers[masked]] = chrpos - 22; - } + masked = EXTRACT256(_masked,0); + INCR_COUNT(counts[masked]); + debug(printf("31 %04X => %d\n",masked,counts[masked])); - masked = (oligo >> 14) & MASK9; /* 9 */ - if (counts[masked]) { - assert(pointers[masked] > positions[masked]); - debug(printf("Storing masked %u at %u\n",masked,chrpos - 23)); - table[--pointers[masked]] = chrpos - 23; - } + masked = EXTRACT256(_masked,1); + INCR_COUNT(counts[masked]); + debug(printf("30 %04X => %d\n",masked,counts[masked])); -#else - _oligo = _mm_setr_epi32(oligo, oligo >> 2, oligo >> 4, oligo >> 6); -#ifdef SIMD_MASK_THEN_STORE - _mm_store_si128((__m128i *) _masked,_mm_and_si128(_oligo, mask9)); -#else - _masked = _mm_and_si128(_oligo, mask9); -#endif + masked = EXTRACT256(_masked,2); + INCR_COUNT(counts[masked]); + debug(printf("29 %04X => %d\n",masked,counts[masked])); - masked = EXTRACT(_masked,0); - if (counts[masked]) { - assert(pointers[masked] > positions[masked]); - debug(printf("Storing masked %u at %u\n",masked,chrpos - 16)); - table[--pointers[masked]] = chrpos - 16; - } + masked = EXTRACT256(_masked,3); + INCR_COUNT(counts[masked]); + debug(printf("28 %04X => %d\n",masked,counts[masked])); - masked = EXTRACT(_masked,1); - if (counts[masked]) { - assert(pointers[masked] > positions[masked]); - debug(printf("Storing masked %u at %u\n",masked,chrpos - 17)); - table[--pointers[masked]] = chrpos - 17; - } + masked = EXTRACT256(_masked,4); + INCR_COUNT(counts[masked]); + debug(printf("27 %04X => %d\n",masked,counts[masked])); - masked = EXTRACT(_masked,2); - if (counts[masked]) { - assert(pointers[masked] > positions[masked]); - debug(printf("Storing masked %u at %u\n",masked,chrpos - 18)); - table[--pointers[masked]] = chrpos - 18; - } + masked = EXTRACT256(_masked,5); + INCR_COUNT(counts[masked]); + debug(printf("26 %04X => %d\n",masked,counts[masked])); - masked = EXTRACT(_masked,3); - if (counts[masked]) { - assert(pointers[masked] > positions[masked]); - debug(printf("Storing masked %u at %u\n",masked,chrpos - 19)); - table[--pointers[masked]] = chrpos - 19; - } + masked = EXTRACT256(_masked,6); + INCR_COUNT(counts[masked]); + debug(printf("25 %04X => %d\n",masked,counts[masked])); - _oligo = _mm_srli_epi32(_oligo, 8); -#ifdef SIMD_MASK_THEN_STORE - _mm_store_si128((__m128i *) _masked,_mm_and_si128(_oligo, mask9)); -#else - _masked = _mm_and_si128(_oligo, mask9); -#endif + _oligo = _mm256_srlv_epi32(_mm256_set1_epi32(low_rev),bigshift0to14); + _masked = _mm256_and_si256(_oligo, bigmask8); - masked = EXTRACT(_masked,0); - if (counts[masked]) { - assert(pointers[masked] > positions[masked]); - debug(printf("Storing masked %u at %u\n",masked,chrpos - 20)); - table[--pointers[masked]] = chrpos - 20; - } + masked = EXTRACT256(_masked,0); + INCR_COUNT(counts[masked]); + debug(printf("24 %04X => %d\n",masked,counts[masked])); - masked = EXTRACT(_masked,1); - if (counts[masked]) { - assert(pointers[masked] > positions[masked]); - debug(printf("Storing masked %u at %u\n",masked,chrpos - 21)); - table[--pointers[masked]] = chrpos - 21; - } + masked = EXTRACT256(_masked,1); + INCR_COUNT(counts[masked]); + debug(printf("23 %04X => %d\n",masked,counts[masked])); - masked = EXTRACT(_masked,2); - if (counts[masked]) { - assert(pointers[masked] > positions[masked]); - debug(printf("Storing masked %u at %u\n",masked,chrpos - 22)); - table[--pointers[masked]] = chrpos - 22; - } + masked = EXTRACT256(_masked,2); + INCR_COUNT(counts[masked]); + debug(printf("22 %04X => %d\n",masked,counts[masked])); - masked = EXTRACT(_masked,3); - if (counts[masked]) { - assert(pointers[masked] > positions[masked]); - debug(printf("Storing masked %u at %u\n",masked,chrpos - 23)); - table[--pointers[masked]] = chrpos - 23; - } -#endif + masked = EXTRACT256(_masked,3); + INCR_COUNT(counts[masked]); + debug(printf("21 %04X => %d\n",masked,counts[masked])); + masked = EXTRACT256(_masked,4); + INCR_COUNT(counts[masked]); + debug(printf("20 %04X => %d\n",masked,counts[masked])); -#ifdef INDIVIDUAL_SHIFTS - masked = high_rev & MASK9; /* 7 */ - if (counts[masked]) { - assert(pointers[masked] > positions[masked]); - debug(printf("Storing masked %u at %u\n",masked,chrpos - 24)); - table[--pointers[masked]] = chrpos - 24; - } + masked = EXTRACT256(_masked,5); + INCR_COUNT(counts[masked]); + debug(printf("19 %04X => %d\n",masked,counts[masked])); - masked = (high_rev >> 2) & MASK9; /* 6 */ - if (counts[masked]) { - assert(pointers[masked] > positions[masked]); - debug(printf("Storing masked %u at %u\n",masked,chrpos - 25)); - table[--pointers[masked]] = chrpos - 25; - } + masked = EXTRACT256(_masked,6); + INCR_COUNT(counts[masked]); + debug(printf("18 %04X => %d\n",masked,counts[masked])); - masked = (high_rev >> 4) & MASK9; /* 5 */ - if (counts[masked]) { - assert(pointers[masked] > positions[masked]); - debug(printf("Storing masked %u at %u\n",masked,chrpos - 26)); - table[--pointers[masked]] = chrpos - 26; - } + masked = EXTRACT256(_masked,7); + INCR_COUNT(counts[masked]); + debug(printf("17 %04X => %d\n",masked,counts[masked])); - masked = (high_rev >> 6) & MASK9; /* 4 */ - if (counts[masked]) { - assert(pointers[masked] > positions[masked]); - debug(printf("Storing masked %u at %u\n",masked,chrpos - 27)); - table[--pointers[masked]] = chrpos - 27; - } - masked = (high_rev >> 8) & MASK9; /* 3 */ - if (counts[masked]) { - assert(pointers[masked] > positions[masked]); - debug(printf("Storing masked %u at %u\n",masked,chrpos - 28)); - table[--pointers[masked]] = chrpos - 28; - } + masked = low_rev >> 16; /* 16, No mask necessary */ + INCR_COUNT(counts[masked]); + debug(printf("16 %04X => %d\n",masked,counts[masked])); - masked = (high_rev >> 10) & MASK9; /* 2 */ - if (counts[masked]) { - assert(pointers[masked] > positions[masked]); - debug(printf("Storing masked %u at %u\n",masked,chrpos - 29)); - table[--pointers[masked]] = chrpos - 29; - } - masked = (high_rev >> 12) & MASK9; /* 1 */ - if (counts[masked]) { - assert(pointers[masked] > positions[masked]); - debug(printf("Storing masked %u at %u\n",masked,chrpos - 30)); - table[--pointers[masked]] = chrpos - 30; - } + oligo = low_rev >> 18; /* For 15..9 */ + oligo |= high_rev << 14; - masked = high_rev >> 14; /* 0, No mask necessary */ - if (counts[masked]) { - assert(pointers[masked] > positions[masked]); - debug(printf("Storing masked %u at %u\n",masked,chrpos - 31)); - table[--pointers[masked]] = chrpos - 31; - } + _oligo = _mm256_srlv_epi32(_mm256_set1_epi32(oligo),bigshift0to14); + _masked = _mm256_and_si256(_oligo, bigmask8); -#else - _oligo = _mm_setr_epi32(high_rev, high_rev >> 2, high_rev >> 4, high_rev >> 6); -#ifdef SIMD_MASK_THEN_STORE - _mm_store_si128((__m128i *) _masked,_mm_and_si128(_oligo, mask9)); -#else - _masked = _mm_and_si128(_oligo, mask9); -#endif + masked = EXTRACT256(_masked,0); + INCR_COUNT(counts[masked]); + debug(printf("15 %04X => %d\n",masked,counts[masked])); - masked = EXTRACT(_masked,0); - if (counts[masked]) { - assert(pointers[masked] > positions[masked]); - debug(printf("Storing masked %u at %u\n",masked,chrpos - 24)); - table[--pointers[masked]] = chrpos - 24; - } + masked = EXTRACT256(_masked,1); + INCR_COUNT(counts[masked]); + debug(printf("14 %04X => %d\n",masked,counts[masked])); - masked = EXTRACT(_masked,1); - if (counts[masked]) { - assert(pointers[masked] > positions[masked]); - debug(printf("Storing masked %u at %u\n",masked,chrpos - 25)); - table[--pointers[masked]] = chrpos - 25; - } + masked = EXTRACT256(_masked,2); + INCR_COUNT(counts[masked]); + debug(printf("13 %04X => %d\n",masked,counts[masked])); - masked = EXTRACT(_masked,2); - if (counts[masked]) { - assert(pointers[masked] > positions[masked]); - debug(printf("Storing masked %u at %u\n",masked,chrpos - 26)); - table[--pointers[masked]] = chrpos - 26; - } + masked = EXTRACT256(_masked,3); + INCR_COUNT(counts[masked]); + debug(printf("12 %04X => %d\n",masked,counts[masked])); - masked = EXTRACT(_masked,3); - if (counts[masked]) { - assert(pointers[masked] > positions[masked]); - debug(printf("Storing masked %u at %u\n",masked,chrpos - 27)); - table[--pointers[masked]] = chrpos - 27; - } + masked = EXTRACT256(_masked,4); + INCR_COUNT(counts[masked]); + debug(printf("11 %04X => %d\n",masked,counts[masked])); + masked = EXTRACT256(_masked,5); + INCR_COUNT(counts[masked]); + debug(printf("10 %04X => %d\n",masked,counts[masked])); - _oligo = _mm_srli_epi32(_oligo, 8); -#ifdef SIMD_MASK_THEN_STORE - _mm_store_si128((__m128i *) _masked,_mm_and_si128(_oligo, mask9)); -#else - _masked = _mm_and_si128(_oligo, mask9); -#endif + masked = EXTRACT256(_masked,6); + INCR_COUNT(counts[masked]); + debug(printf("9 %04X => %d\n",masked,counts[masked])); - masked = EXTRACT(_masked,0); - if (counts[masked]) { - assert(pointers[masked] > positions[masked]); - debug(printf("Storing masked %u at %u\n",masked,chrpos - 28)); - table[--pointers[masked]] = chrpos - 28; - } - masked = EXTRACT(_masked,1); - if (counts[masked]) { - assert(pointers[masked] > positions[masked]); - debug(printf("Storing masked %u at %u\n",masked,chrpos - 29)); - table[--pointers[masked]] = chrpos - 29; - } + _oligo = _mm256_srlv_epi32(_mm256_set1_epi32(high_rev),bigshift0to14); + _masked = _mm256_and_si256(_oligo, bigmask8); - masked = EXTRACT(_masked,2); - if (counts[masked]) { - assert(pointers[masked] > positions[masked]); - debug(printf("Storing masked %u at %u\n",masked,chrpos - 30)); - table[--pointers[masked]] = chrpos - 30; - } + masked = EXTRACT256(_masked,0); + INCR_COUNT(counts[masked]); + debug(printf("8 %04X => %d\n",masked,counts[masked])); - masked = EXTRACT(_masked,3); - if (counts[masked]) { - assert(pointers[masked] > positions[masked]); - debug(printf("Storing masked %u at %u\n",masked,chrpos - 31)); - table[--pointers[masked]] = chrpos - 31; - } -#endif + masked = EXTRACT256(_masked,1); + INCR_COUNT(counts[masked]); + debug(printf("7 %04X => %d\n",masked,counts[masked])); - return chrpos - 32; -} + masked = EXTRACT256(_masked,2); + INCR_COUNT(counts[masked]); + debug(printf("6 %04X => %d\n",masked,counts[masked])); -#else /* HAVE_AVX2 */ + masked = EXTRACT256(_masked,3); + INCR_COUNT(counts[masked]); + debug(printf("5 %04X => %d\n",masked,counts[masked])); -static int -store_9mers_fwd (Chrpos_T chrpos, Chrpos_T *table, UINT4 *pointers, UINT4 *positions, Count_T *counts, - Genomecomp_T high_rev, Genomecomp_T low_rev, Genomecomp_T nexthigh_rev) { - Genomecomp_T masked, oligo; - __m256i _oligo, _masked, _counts; + masked = EXTRACT256(_masked,4); + INCR_COUNT(counts[masked]); + debug(printf("4 %04X => %d\n",masked,counts[masked])); + masked = EXTRACT256(_masked,5); + INCR_COUNT(counts[masked]); + debug(printf("3 %04X => %d\n",masked,counts[masked])); - oligo = nexthigh_rev >> 16; /* For 31..24 */ - oligo |= low_rev << 16; + masked = EXTRACT256(_masked,6); + INCR_COUNT(counts[masked]); + debug(printf("2 %04X => %d\n",masked,counts[masked])); - _oligo = _mm256_srlv_epi32(_mm256_set1_epi32(oligo),shift0to14); - _masked = _mm256_and_si256(_oligo, bigmask9); - _counts = _mm256_i32gather_epi32(counts,_masked,/*scale*/4); - - - if (EXTRACT256(_counts,0)) { - masked = EXTRACT256(_masked,0); - assert(pointers[masked] > positions[masked]); - debug(printf("Storing masked %u at %u\n",masked,chrpos)); - table[--pointers[masked]] = chrpos; - } - - if (EXTRACT256(_counts,1)) { - masked = EXTRACT256(_masked,1); - assert(pointers[masked] > positions[masked]); - debug(printf("Storing masked %u at %u\n",masked,chrpos - 1)); - table[--pointers[masked]] = chrpos - 1; - } - - if (EXTRACT256(_counts,2)) { - masked = EXTRACT256(_masked,2); - assert(pointers[masked] > positions[masked]); - debug(printf("Storing masked %u at %u\n",masked,chrpos - 2)); - table[--pointers[masked]] = chrpos - 2; - } - - if (EXTRACT256(_counts,3)) { - masked = EXTRACT256(_masked,3); - assert(pointers[masked] > positions[masked]); - debug(printf("Storing masked %u at %u\n",masked,chrpos - 3)); - table[--pointers[masked]] = chrpos - 3; - } - - if (EXTRACT256(_counts,4)) { - masked = EXTRACT256(_masked,4); - assert(pointers[masked] > positions[masked]); - debug(printf("Storing masked %u at %u\n",masked,chrpos - 4)); - table[--pointers[masked]] = chrpos - 4; - } - - if (EXTRACT256(_counts,5)) { - masked = EXTRACT256(_masked,5); - assert(pointers[masked] > positions[masked]); - debug(printf("Storing masked %u at %u\n",masked,chrpos - 5)); - table[--pointers[masked]] = chrpos - 5; - } - - if (EXTRACT256(_counts,6)) { - masked = EXTRACT256(_masked,6); - assert(pointers[masked] > positions[masked]); - debug(printf("Storing masked %u at %u\n",masked,chrpos - 6)); - table[--pointers[masked]] = chrpos - 6; - } - - if (EXTRACT256(_counts,7)) { - masked = EXTRACT256(_masked,7); - assert(pointers[masked] > positions[masked]); - debug(printf("Storing masked %u at %u\n",masked,chrpos - 7)); - table[--pointers[masked]] = chrpos - 7; - } - - - _oligo = _mm256_srlv_epi32(_mm256_set1_epi32(low_rev),shift0to14); - _masked = _mm256_and_si256(_oligo, bigmask9); - _counts = _mm256_i32gather_epi32(counts,_masked,/*scale*/4); - - if (EXTRACT256(_counts,0)) { - masked = EXTRACT256(_masked,0); - assert(pointers[masked] > positions[masked]); - debug(printf("Storing masked %u at %u\n",masked,chrpos - 8)); - table[--pointers[masked]] = chrpos - 8; - } + masked = EXTRACT256(_masked,7); + INCR_COUNT(counts[masked]); + debug(printf("1 %04X => %d\n",masked,counts[masked])); - if (EXTRACT256(_counts,1)) { - masked = EXTRACT256(_masked,1); - assert(pointers[masked] > positions[masked]); - debug(printf("Storing masked %u at %u\n",masked,chrpos - 9)); - table[--pointers[masked]] = chrpos - 9; - } - if (EXTRACT256(_counts,2)) { - masked = EXTRACT256(_masked,2); - assert(pointers[masked] > positions[masked]); - debug(printf("Storing masked %u at %u\n",masked,chrpos - 10)); - table[--pointers[masked]] = chrpos - 10; - } + masked = high_rev >> 16; /* 0, No mask necessary */ + INCR_COUNT(counts[masked]); + debug(printf("0 %04X => %d\n",masked,counts[masked])); - if (EXTRACT256(_counts,3)) { - masked = EXTRACT256(_masked,3); - assert(pointers[masked] > positions[masked]); - debug(printf("Storing masked %u at %u\n",masked,chrpos - 11)); - table[--pointers[masked]] = chrpos - 11; - } + return; +} - if (EXTRACT256(_counts,4)) { - masked = EXTRACT256(_masked,4); - assert(pointers[masked] > positions[masked]); - debug(printf("Storing masked %u at %u\n",masked,chrpos - 12)); - table[--pointers[masked]] = chrpos - 12; - } +#endif /* HAVE_AVX2 */ - if (EXTRACT256(_counts,5)) { - masked = EXTRACT256(_masked,5); - assert(pointers[masked] > positions[masked]); - debug(printf("Storing masked %u at %u\n",masked,chrpos - 13)); - table[--pointers[masked]] = chrpos - 13; - } - if (EXTRACT256(_counts,6)) { - masked = EXTRACT256(_masked,6); - assert(pointers[masked] > positions[masked]); - debug(printf("Storing masked %u at %u\n",masked,chrpos - 14)); - table[--pointers[masked]] = chrpos - 14; - } - if (EXTRACT256(_counts,7)) { - masked = EXTRACT256(_masked,7); - assert(pointers[masked] > positions[masked]); - debug(printf("Storing masked %u at %u\n",masked,chrpos - 15)); - table[--pointers[masked]] = chrpos - 15; - } +/* Expecting current to have {high0_rev, low0_rev, high1_rev, + low1_rev}, and next to have {low0_rev, high1_rev, low1_rev, and + high2_rev} */ +#ifdef HAVE_SSE2 +static void +extract_8mers_fwd_simd_64 (__m128i *out, __m128i current, __m128i next) { + __m128i oligo; + _mm_store_si128(out++, _mm_srli_epi32(current,16)); /* No mask necessary */ + _mm_store_si128(out++, _mm_and_si128( _mm_srli_epi32(current,14), mask8)); + _mm_store_si128(out++, _mm_and_si128( _mm_srli_epi32(current,12), mask8)); + _mm_store_si128(out++, _mm_and_si128( _mm_srli_epi32(current,10), mask8)); + _mm_store_si128(out++, _mm_and_si128( _mm_srli_epi32(current,8), mask8)); + _mm_store_si128(out++, _mm_and_si128( _mm_srli_epi32(current,6), mask8)); + _mm_store_si128(out++, _mm_and_si128( _mm_srli_epi32(current,4), mask8)); + _mm_store_si128(out++, _mm_and_si128( _mm_srli_epi32(current,2), mask8)); + _mm_store_si128(out++, _mm_and_si128( current, mask8)); - oligo = low_rev >> 16; /* For 15..8 */ - oligo |= high_rev << 16; + oligo = _mm_or_si128( _mm_srli_epi32(next,18), _mm_slli_epi32(current,14)); + _mm_store_si128(out++, _mm_and_si128( _mm_srli_epi32(oligo,12), mask8)); + _mm_store_si128(out++, _mm_and_si128( _mm_srli_epi32(oligo,10), mask8)); + _mm_store_si128(out++, _mm_and_si128( _mm_srli_epi32(oligo,8), mask8)); + _mm_store_si128(out++, _mm_and_si128( _mm_srli_epi32(oligo,6), mask8)); + _mm_store_si128(out++, _mm_and_si128( _mm_srli_epi32(oligo,4), mask8)); + _mm_store_si128(out++, _mm_and_si128( _mm_srli_epi32(oligo,2), mask8)); + _mm_store_si128(out++, _mm_and_si128( oligo, mask8)); - _oligo = _mm256_srlv_epi32(_mm256_set1_epi32(oligo),shift0to14); - _masked = _mm256_and_si256(_oligo, bigmask9); - _counts = _mm256_i32gather_epi32(counts,_masked,/*scale*/4); + return; +} - if (EXTRACT256(_counts,0)) { - masked = EXTRACT256(_masked,0); - assert(pointers[masked] > positions[masked]); - debug(printf("Storing masked %u at %u\n",masked,chrpos - 16)); - table[--pointers[masked]] = chrpos - 16; - } +#ifdef USE_UNORDERED_8 +static Chrpos_T +store_8mers_fwd_simd_64 (Chrpos_T chrpos, Chrpos_T *table, UINT4 *positions, Count_T *counts, + __m128i current, __m128i next) { + __m128i array[16]; + + extract_8mers_fwd_simd_64(array,current,next); + return store_fwdrev_simd_64(chrpos,table,positions,counts,(UINT4 *) array); +} - if (EXTRACT256(_counts,1)) { - masked = EXTRACT256(_masked,1); - assert(pointers[masked] > positions[masked]); - debug(printf("Storing masked %u at %u\n",masked,chrpos - 17)); - table[--pointers[masked]] = chrpos - 17; - } +#else +/* Includes extract_8mers_fwd_simd_64_ordered (__m128i *out, __m128i current, __m128i next) */ +static Chrpos_T +store_8mers_fwd_simd_64 (Chrpos_T chrpos, Chrpos_T *table, UINT4 *positions, Count_T *counts, + __m128i current, __m128i next) { + __m128i array[16], *out; + __m128i oligo; + __m128i _t0, _t1, _t2, _t3, _t4, _t5, _t6, _t7; + __m128i _u0, _u1, _u2, _u3; - if (EXTRACT256(_counts,2)) { - masked = EXTRACT256(_masked,2); - assert(pointers[masked] > positions[masked]); - debug(printf("Storing masked %u at %u\n",masked,chrpos - 18)); - table[--pointers[masked]] = chrpos - 18; - } + out = &(array[0]); - if (EXTRACT256(_counts,3)) { - masked = EXTRACT256(_masked,3); - assert(pointers[masked] > positions[masked]); - debug(printf("Storing masked %u at %u\n",masked,chrpos - 19)); - table[--pointers[masked]] = chrpos - 19; - } + /* As a special case, 8_mers don't need to be masked, since they fill each 16-mer */ - if (EXTRACT256(_counts,4)) { - masked = EXTRACT256(_masked,4); - assert(pointers[masked] > positions[masked]); - debug(printf("Storing masked %u at %u\n",masked,chrpos - 20)); - table[--pointers[masked]] = chrpos - 20; - } + oligo = _mm_or_si128( _mm_srli_epi32(next,18), _mm_slli_epi32(current,14)); + /* _row0 = _mm_and_si128( oligo, mask8); */ + /* _row1 = _mm_and_si128( _mm_srli_epi32(oligo,2), mask8); */ + _t0 = _mm_blend_epi16(_mm_slli_epi32(oligo,14), oligo, 0x55); + + /* _row2 = _mm_and_si128( _mm_srli_epi32(oligo,4), mask8); */ + /* _row3 = _mm_and_si128( _mm_srli_epi32(oligo,6), mask8); */ + _t1 = _mm_blend_epi16(_mm_slli_epi32(oligo,10), _mm_srli_epi32(oligo,4), 0x55); + + /* _row4 = _mm_and_si128( _mm_srli_epi32(oligo,8), mask8); */ + /* _row5 = _mm_and_si128( _mm_srli_epi32(oligo,10), mask8); */ + _t2 = _mm_blend_epi16(_mm_slli_epi32(oligo,6), _mm_srli_epi32(oligo,8), 0x55); + + + /* _row6 = _mm_and_si128( _mm_srli_epi32(oligo,12), mask8);*/ + /* _row7 = _mm_and_si128( current, mask8); */ + _t3 = _mm_blend_epi16(_mm_slli_epi32(current,16), _mm_srli_epi32(oligo,12), 0x55); + + /* _row8 = _mm_and_si128( _mm_srli_epi32(current,2), mask8); */ + /* _row9 = _mm_and_si128( _mm_srli_epi32(current,4), mask8); */ + _t4 = _mm_blend_epi16(_mm_slli_epi32(current,12), _mm_srli_epi32(current,2), 0x55); + + /* _row10 = _mm_and_si128( _mm_srli_epi32(current,6), mask8); */ + /* _row11 = _mm_and_si128( _mm_srli_epi32(current,8), mask8); */ + _t5 = _mm_blend_epi16(_mm_slli_epi32(current,8), _mm_srli_epi32(current,6), 0x55); + + /* _row12 = _mm_and_si128( _mm_srli_epi32(current,10), mask8); */ + /* _row13 = _mm_and_si128( _mm_srli_epi32(current,12), mask8); */ + _t6 = _mm_blend_epi16(_mm_slli_epi32(current,4), _mm_srli_epi32(current,10), 0x55); + + /* _row14 = _mm_and_si128( _mm_srli_epi32(current,14), mask8); */ + /* _row15 = _mm_srli_epi32(current,16); */ /* No mask necessary */ + _t7 = _mm_blend_epi16(current, _mm_srli_epi32(current,14), 0x55); + + + /* Split: top half */ + _u0 = _mm_unpackhi_epi32(_t0,_t1); + _u1 = _mm_unpackhi_epi32(_t2,_t3); + _u2 = _mm_unpackhi_epi32(_t4,_t5); + _u3 = _mm_unpackhi_epi32(_t6,_t7); + + _mm_store_si128(out++, _mm_cvtepu16_epi32(_mm_srli_si128(_u0,8))); + _mm_store_si128(out++, _mm_cvtepu16_epi32(_mm_srli_si128(_u1,8))); + _mm_store_si128(out++, _mm_cvtepu16_epi32(_mm_srli_si128(_u2,8))); + _mm_store_si128(out++, _mm_cvtepu16_epi32(_mm_srli_si128(_u3,8))); + _mm_store_si128(out++, _mm_cvtepu16_epi32(_u0)); + _mm_store_si128(out++, _mm_cvtepu16_epi32(_u1)); + _mm_store_si128(out++, _mm_cvtepu16_epi32(_u2)); + _mm_store_si128(out++, _mm_cvtepu16_epi32(_u3)); + + /* Split: bottom half */ + _u0 = _mm_unpacklo_epi32(_t0,_t1); + _u1 = _mm_unpacklo_epi32(_t2,_t3); + _u2 = _mm_unpacklo_epi32(_t4,_t5); + _u3 = _mm_unpacklo_epi32(_t6,_t7); + + _mm_store_si128(out++, _mm_cvtepu16_epi32(_mm_srli_si128(_u0,8))); + _mm_store_si128(out++, _mm_cvtepu16_epi32(_mm_srli_si128(_u1,8))); + _mm_store_si128(out++, _mm_cvtepu16_epi32(_mm_srli_si128(_u2,8))); + _mm_store_si128(out++, _mm_cvtepu16_epi32(_mm_srli_si128(_u3,8))); + _mm_store_si128(out++, _mm_cvtepu16_epi32(_u0)); + _mm_store_si128(out++, _mm_cvtepu16_epi32(_u1)); + _mm_store_si128(out++, _mm_cvtepu16_epi32(_u2)); + _mm_store_si128(out++, _mm_cvtepu16_epi32(_u3)); - if (EXTRACT256(_counts,5)) { - masked = EXTRACT256(_masked,5); - assert(pointers[masked] > positions[masked]); - debug(printf("Storing masked %u at %u\n",masked,chrpos - 21)); - table[--pointers[masked]] = chrpos - 21; - } + return store_fwdrev_simd_64_ordered(chrpos,table,positions,counts,(UINT4 *) array); +} +#endif +#endif - if (EXTRACT256(_counts,6)) { - masked = EXTRACT256(_masked,6); - assert(pointers[masked] > positions[masked]); - debug(printf("Storing masked %u at %u\n",masked,chrpos - 22)); - table[--pointers[masked]] = chrpos - 22; - } +#ifdef HAVE_AVX2 +static void +extract_8mers_fwd_simd_128 (__m256i *out, __m256i current, __m256i next) { + __m256i oligo; - if (EXTRACT256(_counts,7)) { - masked = EXTRACT256(_masked,7); - assert(pointers[masked] > positions[masked]); - debug(printf("Storing masked %u at %u\n",masked,chrpos - 23)); - table[--pointers[masked]] = chrpos - 23; - } + _mm256_store_si256(out++, _mm256_srli_epi32(current,16)); /* No mask necessary */ + _mm256_store_si256(out++, _mm256_and_si256( _mm256_srli_epi32(current,14), bigmask8)); + _mm256_store_si256(out++, _mm256_and_si256( _mm256_srli_epi32(current,12), bigmask8)); + _mm256_store_si256(out++, _mm256_and_si256( _mm256_srli_epi32(current,10), bigmask8)); + _mm256_store_si256(out++, _mm256_and_si256( _mm256_srli_epi32(current,8), bigmask8)); + _mm256_store_si256(out++, _mm256_and_si256( _mm256_srli_epi32(current,6), bigmask8)); + _mm256_store_si256(out++, _mm256_and_si256( _mm256_srli_epi32(current,4), bigmask8)); + _mm256_store_si256(out++, _mm256_and_si256( _mm256_srli_epi32(current,2), bigmask8)); + _mm256_store_si256(out++, _mm256_and_si256( current, bigmask8)); + oligo = _mm256_or_si256( _mm256_srli_epi32(next,18), _mm256_slli_epi32(current,14)); + _mm256_store_si256(out++, _mm256_and_si256( _mm256_srli_epi32(oligo,12), bigmask8)); + _mm256_store_si256(out++, _mm256_and_si256( _mm256_srli_epi32(oligo,10), bigmask8)); + _mm256_store_si256(out++, _mm256_and_si256( _mm256_srli_epi32(oligo,8), bigmask8)); + _mm256_store_si256(out++, _mm256_and_si256( _mm256_srli_epi32(oligo,6), bigmask8)); + _mm256_store_si256(out++, _mm256_and_si256( _mm256_srli_epi32(oligo,4), bigmask8)); + _mm256_store_si256(out++, _mm256_and_si256( _mm256_srli_epi32(oligo,2), bigmask8)); + _mm256_store_si256(out++, _mm256_and_si256( oligo, bigmask8)); - _oligo = _mm256_srlv_epi32(_mm256_set1_epi32(high_rev),shift0to14); - _masked = _mm256_and_si256(_oligo, bigmask9); - _counts = _mm256_i32gather_epi32(counts,_masked,/*scale*/4); + return; +} - if (EXTRACT256(_counts,0)) { - masked = EXTRACT256(_masked,0); - assert(pointers[masked] > positions[masked]); - debug(printf("Storing masked %u at %u\n",masked,chrpos - 24)); - table[--pointers[masked]] = chrpos - 24; - } +#ifdef USE_UNORDERED_8 +static Chrpos_T +store_8mers_fwd_simd_128 (Chrpos_T chrpos, Chrpos_T *table, UINT4 *positions, Count_T *counts, + __m256i current, __m256i next) { + __m256i array[16]; + + extract_8mers_fwd_simd_128(array,current,next); + return store_fwdrev_simd_128(chrpos,table,positions,counts,(UINT4 *) array); +} - if (EXTRACT256(_counts,1)) { - masked = EXTRACT256(_masked,1); - assert(pointers[masked] > positions[masked]); - debug(printf("Storing masked %u at %u\n",masked,chrpos - 25)); - table[--pointers[masked]] = chrpos - 25; - } +#else +/* Includes extract_8mers_fwd_simd_128_ordered (__m256i *out, __m256i current, __m256i next) */ +static Chrpos_T +store_8mers_fwd_simd_128 (Chrpos_T chrpos, Chrpos_T *table, UINT4 *positions, Count_T *counts, + __m256i current, __m256i next) { + __m256i array[16], *out; + __m256i oligo; + __m256i _t0, _t1, _t2, _t3, _t4, _t5, _t6, _t7; + __m256i _u0, _u1, _u2, _u3, _u4, _u5, _u6, _u7; - if (EXTRACT256(_counts,2)) { - masked = EXTRACT256(_masked,2); - assert(pointers[masked] > positions[masked]); - debug(printf("Storing masked %u at %u\n",masked,chrpos - 26)); - table[--pointers[masked]] = chrpos - 26; - } + out = &(array[0]); - if (EXTRACT256(_counts,3)) { - masked = EXTRACT256(_masked,3); - assert(pointers[masked] > positions[masked]); - debug(printf("Storing masked %u at %u\n",masked,chrpos - 27)); - table[--pointers[masked]] = chrpos - 27; - } + /* As a special case, 8_mers don't need to be masked, since they fill each 16-mer */ - if (EXTRACT256(_counts,4)) { - masked = EXTRACT256(_masked,4); - assert(pointers[masked] > positions[masked]); - debug(printf("Storing masked %u at %u\n",masked,chrpos - 28)); - table[--pointers[masked]] = chrpos - 28; - } + oligo = _mm256_or_si256( _mm256_srli_epi32(next,18), _mm256_slli_epi32(current,14)); + /* _row0 = _mm256_and_si256( oligo, bigmask8); */ + /* _row1 = _mm256_and_si256( _mm256_srli_epi32(oligo,2), bigmask8); */ + _t0 = _mm256_blend_epi16(_mm256_slli_epi32(oligo,14), oligo, 0x55); + + /* _row2 = _mm256_and_si256( _mm256_srli_epi32(oligo,4), bigmask8); */ + /* _row3 = _mm256_and_si256( _mm256_srli_epi32(oligo,6), bigmask8); */ + _t1 = _mm256_blend_epi16(_mm256_slli_epi32(oligo,10), _mm256_srli_epi32(oligo,4), 0x55); + + /* _row4 = _mm256_and_si256( _mm256_srli_epi32(oligo,8), bigmask8); */ + /* _row5 = _mm256_and_si256( _mm256_srli_epi32(oligo,10), bigmask8); */ + _t2 = _mm256_blend_epi16(_mm256_slli_epi32(oligo,6), _mm256_srli_epi32(oligo,8), 0x55); + + /* _row6 = _mm256_and_si256( _mm256_srli_epi32(oligo,12), bigmask8); */ + /* _row7 = _mm256_and_si256( current, bigmask8); */ + _t3 = _mm256_blend_epi16(_mm256_slli_epi32(current,16), _mm256_srli_epi32(oligo,12), 0x55); + + /* _row8 = _mm256_and_si256( _mm256_srli_epi32(current,2), bigmask8); */ + /* _row9 = _mm256_and_si256( _mm256_srli_epi32(current,4), bigmask8); */ + _t4 = _mm256_blend_epi16(_mm256_slli_epi32(current,12), _mm256_srli_epi32(current,2), 0x55); + + /* _row10 = _mm256_and_si256( _mm256_srli_epi32(current,6), bigmask8); */ + /* _row11 = _mm256_and_si256( _mm256_srli_epi32(current,8), bigmask8); */ + _t5 = _mm256_blend_epi16(_mm256_slli_epi32(current,8), _mm256_srli_epi32(current,6), 0x55); + + /* _row12 = _mm256_and_si256( _mm256_srli_epi32(current,10), bigmask8); */ + /* _row13 = _mm256_and_si256( _mm256_srli_epi32(current,12), bigmask8); */ + _t6 = _mm256_blend_epi16(_mm256_slli_epi32(current,4), _mm256_srli_epi32(current,10), 0x55); + + /* _row14 = _mm256_and_si256( _mm256_srli_epi32(current,14), bigmask8); */ + /* _row15 = _mm256_srli_epi32(current,16); */ /* No mask necessary */ + _t7 = _mm256_blend_epi16(current, _mm256_srli_epi32(current,14), 0x55); + + + _u0 = _mm256_unpackhi_epi32(_t0,_t1); + _u1 = _mm256_unpackhi_epi32(_t2,_t3); + _u2 = _mm256_unpackhi_epi32(_t4,_t5); + _u3 = _mm256_unpackhi_epi32(_t6,_t7); + _u4 = _mm256_unpacklo_epi32(_t0,_t1); + _u5 = _mm256_unpacklo_epi32(_t2,_t3); + _u6 = _mm256_unpacklo_epi32(_t4,_t5); + _u7 = _mm256_unpacklo_epi32(_t6,_t7); + + + _t0 = _mm256_unpackhi_epi64(_u0,_u1); + _t1 = _mm256_unpackhi_epi64(_u2,_u3); + _t2 = _mm256_unpacklo_epi64(_u0,_u1); + _t3 = _mm256_unpacklo_epi64(_u2,_u3); + _t4 = _mm256_unpackhi_epi64(_u4,_u5); + _t5 = _mm256_unpackhi_epi64(_u6,_u7); + _t6 = _mm256_unpacklo_epi64(_u4,_u5); + _t7 = _mm256_unpacklo_epi64(_u6,_u7); + + + _mm256_store_si256(out++, _mm256_cvtepu16_epi32(_mm256_extracti128_si256(_t0,1))); + _mm256_store_si256(out++, _mm256_cvtepu16_epi32(_mm256_extracti128_si256(_t1,1))); + _mm256_store_si256(out++, _mm256_cvtepu16_epi32(_mm256_extracti128_si256(_t2,1))); + _mm256_store_si256(out++, _mm256_cvtepu16_epi32(_mm256_extracti128_si256(_t3,1))); + _mm256_store_si256(out++, _mm256_cvtepu16_epi32(_mm256_extracti128_si256(_t4,1))); + _mm256_store_si256(out++, _mm256_cvtepu16_epi32(_mm256_extracti128_si256(_t5,1))); + _mm256_store_si256(out++, _mm256_cvtepu16_epi32(_mm256_extracti128_si256(_t6,1))); + _mm256_store_si256(out++, _mm256_cvtepu16_epi32(_mm256_extracti128_si256(_t7,1))); + _mm256_store_si256(out++, _mm256_cvtepu16_epi32(_mm256_extracti128_si256(_t0,0))); + _mm256_store_si256(out++, _mm256_cvtepu16_epi32(_mm256_extracti128_si256(_t1,0))); + _mm256_store_si256(out++, _mm256_cvtepu16_epi32(_mm256_extracti128_si256(_t2,0))); + _mm256_store_si256(out++, _mm256_cvtepu16_epi32(_mm256_extracti128_si256(_t3,0))); + _mm256_store_si256(out++, _mm256_cvtepu16_epi32(_mm256_extracti128_si256(_t4,0))); + _mm256_store_si256(out++, _mm256_cvtepu16_epi32(_mm256_extracti128_si256(_t5,0))); + _mm256_store_si256(out++, _mm256_cvtepu16_epi32(_mm256_extracti128_si256(_t6,0))); + _mm256_store_si256(out++, _mm256_cvtepu16_epi32(_mm256_extracti128_si256(_t7,0))); + + return store_fwdrev_simd_128_ordered(chrpos,table,positions,counts,(UINT4 *) array); +} +#endif +#endif + +#ifdef HAVE_AVX512 +static void +extract_8mers_fwd_simd_256 (__m512i *out, __m512i current, __m512i next) { + __m512i oligo; + + _mm512_store_si512(out++, _mm512_srli_epi32(current,16)); /* No mask necessary */ + _mm512_store_si512(out++, _mm512_and_si512( _mm512_srli_epi32(current,14), hugemask8)); + _mm512_store_si512(out++, _mm512_and_si512( _mm512_srli_epi32(current,12), hugemask8)); + _mm512_store_si512(out++, _mm512_and_si512( _mm512_srli_epi32(current,10), hugemask8)); + _mm512_store_si512(out++, _mm512_and_si512( _mm512_srli_epi32(current,8), hugemask8)); + _mm512_store_si512(out++, _mm512_and_si512( _mm512_srli_epi32(current,6), hugemask8)); + _mm512_store_si512(out++, _mm512_and_si512( _mm512_srli_epi32(current,4), hugemask8)); + _mm512_store_si512(out++, _mm512_and_si512( _mm512_srli_epi32(current,2), hugemask8)); + _mm512_store_si512(out++, _mm512_and_si512( current, hugemask8)); + + oligo = _mm512_or_si512( _mm512_srli_epi32(next,18), _mm512_slli_epi32(current,14)); + _mm512_store_si512(out++, _mm512_and_si512( _mm512_srli_epi32(oligo,12), hugemask8)); + _mm512_store_si512(out++, _mm512_and_si512( _mm512_srli_epi32(oligo,10), hugemask8)); + _mm512_store_si512(out++, _mm512_and_si512( _mm512_srli_epi32(oligo,8), hugemask8)); + _mm512_store_si512(out++, _mm512_and_si512( _mm512_srli_epi32(oligo,6), hugemask8)); + _mm512_store_si512(out++, _mm512_and_si512( _mm512_srli_epi32(oligo,4), hugemask8)); + _mm512_store_si512(out++, _mm512_and_si512( _mm512_srli_epi32(oligo,2), hugemask8)); + _mm512_store_si512(out++, _mm512_and_si512( oligo, hugemask8)); - if (EXTRACT256(_counts,5)) { - masked = EXTRACT256(_masked,5); - assert(pointers[masked] > positions[masked]); - debug(printf("Storing masked %u at %u\n",masked,chrpos - 29)); - table[--pointers[masked]] = chrpos - 29; - } + return; +} - if (EXTRACT256(_counts,6)) { - masked = EXTRACT256(_masked,6); - assert(pointers[masked] > positions[masked]); - debug(printf("Storing masked %u at %u\n",masked,chrpos - 30)); - table[--pointers[masked]] = chrpos - 30; - } +#ifdef USE_UNORDERED_8 +static Chrpos_T +store_8mers_fwd_simd_256 (Chrpos_T chrpos, Chrpos_T *table, UINT4 *positions, Count_T *counts, + __m512i current, __m512i next) { + __m512i array[16]; + + extract_8mers_fwd_simd_256(array,current,next); + return store_fwdrev_simd_256(chrpos,table,positions,counts,(UINT4 *) array); +} - if (EXTRACT256(_counts,7)) { - masked = EXTRACT256(_masked,7); - assert(pointers[masked] > positions[masked]); - debug(printf("Storing masked %u at %u\n",masked,chrpos - 31)); - table[--pointers[masked]] = chrpos - 31; - } +#else +/* Includes extract_8mers_fwd_simd_256_ordered (__m512i *out, __m512i current, __m512i next) */ +static Chrpos_T +store_8mers_fwd_simd_256 (Chrpos_T chrpos, Chrpos_T *table, UINT4 *positions, Count_T *counts, + __m512i current, __m512i next) { + __m512i array[16], *out; + __m512i oligo, _shuffle0, _shuffle1, _shuffle2; + __m512i _t0, _t1, _t2, _t3, _t4, _t5, _t6, _t7; + __m512i _u0, _u1, _u2, _u3, _u4, _u5, _u6, _u7; + + out = &(array[0]); + + oligo = _mm512_or_si512( _mm512_srli_epi32(next,18), _mm512_slli_epi32(current,14)); + _u0 = _mm512_and_si512( oligo, hugemask8); + /* _row1 = _mm512_and_si512( _mm512_srli_epi32(oligo,2), hugemask8); */ + _u1 = _mm512_and_si512( _mm512_slli_epi32(oligo,14), highmask8); + _t0 = _mm512_or_si512(_u0, _u1); + + _u0 = _mm512_and_si512( _mm512_srli_epi32(oligo,4), hugemask8); + /* _row3 = _mm512_and_si512( _mm512_srli_epi32(oligo,6), hugemask8); */ + _u1 = _mm512_and_si512(_mm512_slli_epi32(oligo,10), highmask8); + _t1 = _mm512_or_si512(_u0, _u1); + + _u0 = _mm512_and_si512( _mm512_srli_epi32(oligo,8), hugemask8); + /* _row5 = _mm512_and_si512( _mm512_srli_epi32(oligo,10), hugemask8); */ + _u1 = _mm512_and_si512( _mm512_slli_epi32(oligo,6), highmask8); + _t2 = _mm512_or_si512(_u0, _u1); + + _u0 = _mm512_and_si512( _mm512_srli_epi32(oligo,12), hugemask8); + /* _row7 = _mm512_and_si512( current, hugemask8); */ + _u1 = _mm512_and_si512( _mm512_slli_epi32(current,16), highmask8); + _t3 = _mm512_or_si512(_u0, _u1); + + _u0 = _mm512_and_si512( _mm512_srli_epi32(current,2), hugemask8); + /* _row9 = _mm512_and_si512( _mm512_srli_epi32(current,4), hugemask8); */ + _u1 = _mm512_and_si512( _mm512_slli_epi32(current,12), highmask8); + _t4 = _mm512_or_si512(_u0, _u1); + + _u0 = _mm512_and_si512( _mm512_srli_epi32(current,6), hugemask8); + /* _row11 = _mm512_and_si512( _mm512_srli_epi32(current,8), hugemask8); */ + _u1 = _mm512_and_si512( _mm512_slli_epi32(current,8), highmask8); + _t5 = _mm512_or_si512(_u0, _u1); + + _u0 = _mm512_and_si512( _mm512_srli_epi32(current,10), hugemask8); + /* _row13 = _mm512_and_si512( _mm512_srli_epi32(current,12), hugemask8); */ + _u1 = _mm512_and_si512(_mm512_slli_epi32(current,4), highmask8); + _t6 = _mm512_or_si512(_u0, _u1); + + _u0 = _mm512_and_si512( _mm512_srli_epi32(current,14), hugemask8); + /* _row15 = _mm512_srli_epi32(current,16); */ /* No mask necessary */ + _u1 = _mm512_and_si512(current, highmask8); + _t7 = _mm512_or_si512(_u0, _u1); + + + _u0 = _mm512_unpackhi_epi32(_t0,_t1); + _u1 = _mm512_unpackhi_epi32(_t2,_t3); + _u2 = _mm512_unpackhi_epi32(_t4,_t5); + _u3 = _mm512_unpackhi_epi32(_t6,_t7); + _u4 = _mm512_unpacklo_epi32(_t0,_t1); + _u5 = _mm512_unpacklo_epi32(_t2,_t3); + _u6 = _mm512_unpacklo_epi32(_t4,_t5); + _u7 = _mm512_unpacklo_epi32(_t6,_t7); + + + /* Split: top half */ + _shuffle0 = _mm512_setr_epi64(7, 8+7, 6, 8+6, 5, 8+5, 4, 8+4); + _t0 = _mm512_permutex2var_epi64(_u0, _shuffle0, _u1); + _t1 = _mm512_permutex2var_epi64(_u2, _shuffle0, _u3); + _t2 = _mm512_permutex2var_epi64(_u4, _shuffle0, _u5); + _t3 = _mm512_permutex2var_epi64(_u6, _shuffle0, _u7); + + + _shuffle1 = _mm512_setr_epi64(0, 1, 8+0, 8+1, 2, 3, 8+2, 8+3); + _t7 = _mm512_permutex2var_epi64(_t0, _shuffle1, _t1); + _mm512_store_si512(out++, _mm512_cvtepu16_epi32(_mm512_extracti64x4_epi64(_t7,0))); + _mm512_store_si512(out++, _mm512_cvtepu16_epi32(_mm512_extracti64x4_epi64(_t7,1))); + + _t7 = _mm512_permutex2var_epi64(_t2, _shuffle1, _t3); + _mm512_store_si512(out++, _mm512_cvtepu16_epi32(_mm512_extracti64x4_epi64(_t7,0))); + _mm512_store_si512(out++, _mm512_cvtepu16_epi32(_mm512_extracti64x4_epi64(_t7,1))); + + _shuffle2 = _mm512_setr_epi64(4, 5, 8+4, 8+5, 6, 7, 8+6, 8+7); + _t7 = _mm512_permutex2var_epi64(_t0, _shuffle2, _t1); + _mm512_store_si512(out++, _mm512_cvtepu16_epi32(_mm512_extracti64x4_epi64(_t7,0))); + _mm512_store_si512(out++, _mm512_cvtepu16_epi32(_mm512_extracti64x4_epi64(_t7,1))); + + _t7 = _mm512_permutex2var_epi64(_t2, _shuffle2, _t3); + _mm512_store_si512(out++, _mm512_cvtepu16_epi32(_mm512_extracti64x4_epi64(_t7,0))); + _mm512_store_si512(out++, _mm512_cvtepu16_epi32(_mm512_extracti64x4_epi64(_t7,1))); + + + /* Split: bottom half */ + _shuffle0 = _mm512_setr_epi64(3, 8+3, 2, 8+2, 1, 8+1, 0, 8+0); + _t0 = _mm512_permutex2var_epi64(_u0, _shuffle0, _u1); + _t1 = _mm512_permutex2var_epi64(_u2, _shuffle0, _u3); + _t2 = _mm512_permutex2var_epi64(_u4, _shuffle0, _u5); + _t3 = _mm512_permutex2var_epi64(_u6, _shuffle0, _u7); + + + /* _shuffle1 = _mm512_setr_epi64(0, 1, 8+0, 8+1, 2, 3, 8+2, 8+3); */ + _t7 = _mm512_permutex2var_epi64(_t0, _shuffle1, _t1); + _mm512_store_si512(out++, _mm512_cvtepu16_epi32(_mm512_extracti64x4_epi64(_t7,0))); + _mm512_store_si512(out++, _mm512_cvtepu16_epi32(_mm512_extracti64x4_epi64(_t7,1))); + + _t7 = _mm512_permutex2var_epi64(_t2, _shuffle1, _t3); + _mm512_store_si512(out++, _mm512_cvtepu16_epi32(_mm512_extracti64x4_epi64(_t7,0))); + _mm512_store_si512(out++, _mm512_cvtepu16_epi32(_mm512_extracti64x4_epi64(_t7,1))); + + /* _shuffle2 = _mm512_setr_epi64(4, 5, 8+4, 8+5, 6, 7, 8+6, 8+7); */ + _t7 = _mm512_permutex2var_epi64(_t0, _shuffle2, _t1); + _mm512_store_si512(out++, _mm512_cvtepu16_epi32(_mm512_extracti64x4_epi64(_t7,0))); + _mm512_store_si512(out++, _mm512_cvtepu16_epi32(_mm512_extracti64x4_epi64(_t7,1))); + + _t7 = _mm512_permutex2var_epi64(_t2, _shuffle2, _t3); + _mm512_store_si512(out++, _mm512_cvtepu16_epi32(_mm512_extracti64x4_epi64(_t7,0))); + _mm512_store_si512(out++, _mm512_cvtepu16_epi32(_mm512_extracti64x4_epi64(_t7,1))); - return chrpos - 32; + return store_fwdrev_simd_256_ordered(chrpos,table,positions,counts,(UINT4 *) array); } - -#endif /* HAVE_AVX2 */ +#endif +#endif #if !defined(HAVE_AVX2) -static void -count_8mers_fwd (Count_T *counts, Inquery_T *inquery, Genomecomp_T high_rev, Genomecomp_T low_rev, Genomecomp_T nexthigh_rev) { +static int +store_8mers_fwd_32 (Chrpos_T chrpos, Chrpos_T *table, UINT4 *positions, Count_T *counts, + Genomecomp_T high_rev, Genomecomp_T low_rev, Genomecomp_T nexthigh_rev) { Genomecomp_T masked, oligo; #ifdef INDIVIDUAL_SHIFTS #elif defined(SIMD_MASK_THEN_STORE) @@ -15235,33 +13155,47 @@ #ifdef INDIVIDUAL_SHIFTS masked = oligo & MASK8; /* 31 */ - INCR_COUNT(counts[masked],inquery[masked]); - debug(printf("31 %04X => %d\n",masked,counts[masked])); + if (counts[masked]) { + debug(printf("Storing masked %u at %u\n",masked,chrpos)); + table[positions[masked] + (--counts[masked])] = chrpos; + } masked = (oligo >> 2) & MASK8; /* 30 */ - INCR_COUNT(counts[masked],inquery[masked]); - debug(printf("30 %04X => %d\n",masked,counts[masked])); + if (counts[masked]) { + debug(printf("Storing masked %u at %u\n",masked,chrpos - 1)); + table[positions[masked] + (--counts[masked])] = chrpos - 1; + } masked = (oligo >> 4) & MASK8; /* 29 */ - INCR_COUNT(counts[masked],inquery[masked]); - debug(printf("29 %04X => %d\n",masked,counts[masked])); + if (counts[masked]) { + debug(printf("Storing masked %u at %u\n",masked,chrpos - 2)); + table[positions[masked] + (--counts[masked])] = chrpos - 2; + } masked = (oligo >> 6) & MASK8; /* 28 */ - INCR_COUNT(counts[masked],inquery[masked]); - debug(printf("28 %04X => %d\n",masked,counts[masked])); + if (counts[masked]) { + debug(printf("Storing masked %u at %u\n",masked,chrpos - 3)); + table[positions[masked] + (--counts[masked])] = chrpos - 3; + } masked = (oligo >> 8) & MASK8; /* 27 */ - INCR_COUNT(counts[masked],inquery[masked]); - debug(printf("27 %04X => %d\n",masked,counts[masked])); + if (counts[masked]) { + debug(printf("Storing masked %u at %u\n",masked,chrpos - 4)); + table[positions[masked] + (--counts[masked])] = chrpos - 4; + } masked = (oligo >> 10) & MASK8; /* 26 */ - INCR_COUNT(counts[masked],inquery[masked]); - debug(printf("26 %04X => %d\n",masked,counts[masked])); + if (counts[masked]) { + debug(printf("Storing masked %u at %u\n",masked,chrpos - 5)); + table[positions[masked] + (--counts[masked])] = chrpos - 5; + } masked = (oligo >> 12) & MASK8; /* 25 */ - INCR_COUNT(counts[masked],inquery[masked]); - debug(printf("25 %04X => %d\n",masked,counts[masked])); - + if (counts[masked]) { + debug(printf("Storing masked %u at %u\n",masked,chrpos - 6)); + table[positions[masked] + (--counts[masked])] = chrpos - 6; + } + #else _oligo = _mm_setr_epi32(oligo, oligo >> 2, oligo >> 4, oligo >> 6); #ifdef SIMD_MASK_THEN_STORE @@ -15271,20 +13205,28 @@ #endif masked = EXTRACT(_masked,0); - INCR_COUNT(counts[masked],inquery[masked]); - debug(printf("31 %04X => %d\n",masked,counts[masked])); + if (counts[masked]) { + debug(printf("Storing masked %u at %u\n",masked,chrpos)); + table[positions[masked] + (--counts[masked])] = chrpos; + } masked = EXTRACT(_masked,1); - INCR_COUNT(counts[masked],inquery[masked]); - debug(printf("30 %04X => %d\n",masked,counts[masked])); + if (counts[masked]) { + debug(printf("Storing masked %u at %u\n",masked,chrpos - 1)); + table[positions[masked] + (--counts[masked])] = chrpos - 1; + } masked = EXTRACT(_masked,2); - INCR_COUNT(counts[masked],inquery[masked]); - debug(printf("29 %04X => %d\n",masked,counts[masked])); + if (counts[masked]) { + debug(printf("Storing masked %u at %u\n",masked,chrpos - 2)); + table[positions[masked] + (--counts[masked])] = chrpos - 2; + } masked = EXTRACT(_masked,3); - INCR_COUNT(counts[masked],inquery[masked]); - debug(printf("28 %04X => %d\n",masked,counts[masked])); + if (counts[masked]) { + debug(printf("Storing masked %u at %u\n",masked,chrpos - 3)); + table[positions[masked] + (--counts[masked])] = chrpos - 3; + } _oligo = _mm_srli_epi32(_oligo, 8); @@ -15295,56 +13237,80 @@ #endif masked = EXTRACT(_masked,0); - INCR_COUNT(counts[masked],inquery[masked]); - debug(printf("27 %04X => %d\n",masked,counts[masked])); + if (counts[masked]) { + debug(printf("Storing masked %u at %u\n",masked,chrpos - 4)); + table[positions[masked] + (--counts[masked])] = chrpos - 4; + } masked = EXTRACT(_masked,1); - INCR_COUNT(counts[masked],inquery[masked]); - debug(printf("26 %04X => %d\n",masked,counts[masked])); + if (counts[masked]) { + debug(printf("Storing masked %u at %u\n",masked,chrpos - 5)); + table[positions[masked] + (--counts[masked])] = chrpos - 5; + } masked = EXTRACT(_masked,2); - INCR_COUNT(counts[masked],inquery[masked]); - debug(printf("25 %04X => %d\n",masked,counts[masked])); + if (counts[masked]) { + debug(printf("Storing masked %u at %u\n",masked,chrpos - 6)); + table[positions[masked] + (--counts[masked])] = chrpos - 6; + } #endif #ifdef INDIVIDUAL_SHIFTS masked = low_rev & MASK8; /* 24 */ - INCR_COUNT(counts[masked],inquery[masked]); - debug(printf("24 %04X => %d\n",masked,counts[masked])); + if (counts[masked]) { + debug(printf("Storing masked %u at %u\n",masked,chrpos - 7)); + table[positions[masked] + (--counts[masked])] = chrpos - 7; + } masked = (low_rev >> 2) & MASK8; /* 23 */ - INCR_COUNT(counts[masked],inquery[masked]); - debug(printf("23 %04X => %d\n",masked,counts[masked])); + if (counts[masked]) { + debug(printf("Storing masked %u at %u\n",masked,chrpos - 8)); + table[positions[masked] + (--counts[masked])] = chrpos - 8; + } masked = (low_rev >> 4) & MASK8; /* 22 */ - INCR_COUNT(counts[masked],inquery[masked]); - debug(printf("22 %04X => %d\n",masked,counts[masked])); + if (counts[masked]) { + debug(printf("Storing masked %u at %u\n",masked,chrpos - 9)); + table[positions[masked] + (--counts[masked])] = chrpos - 9; + } masked = (low_rev >> 6) & MASK8; /* 21 */ - INCR_COUNT(counts[masked],inquery[masked]); - debug(printf("21 %04X => %d\n",masked,counts[masked])); + if (counts[masked]) { + debug(printf("Storing masked %u at %u\n",masked,chrpos - 10)); + table[positions[masked] + (--counts[masked])] = chrpos - 10; + } masked = (low_rev >> 8) & MASK8; /* 20 */ - INCR_COUNT(counts[masked],inquery[masked]); - debug(printf("20 %04X => %d\n",masked,counts[masked])); + if (counts[masked]) { + debug(printf("Storing masked %u at %u\n",masked,chrpos - 11)); + table[positions[masked] + (--counts[masked])] = chrpos - 11; + } masked = (low_rev >> 10) & MASK8; /* 19 */ - INCR_COUNT(counts[masked],inquery[masked]); - debug(printf("19 %04X => %d\n",masked,counts[masked])); + if (counts[masked]) { + debug(printf("Storing masked %u at %u\n",masked,chrpos - 12)); + table[positions[masked] + (--counts[masked])] = chrpos - 12; + } masked = (low_rev >> 12) & MASK8; /* 18 */ - INCR_COUNT(counts[masked],inquery[masked]); - debug(printf("18 %04X => %d\n",masked,counts[masked])); + if (counts[masked]) { + debug(printf("Storing masked %u at %u\n",masked,chrpos - 13)); + table[positions[masked] + (--counts[masked])] = chrpos - 13; + } masked = (low_rev >> 14) & MASK8; /* 17 */ - INCR_COUNT(counts[masked],inquery[masked]); - debug(printf("17 %04X => %d\n",masked,counts[masked])); + if (counts[masked]) { + debug(printf("Storing masked %u at %u\n",masked,chrpos - 14)); + table[positions[masked] + (--counts[masked])] = chrpos - 14; + } masked = low_rev >> 16; /* 16, No mask necessary */ - INCR_COUNT(counts[masked],inquery[masked]); - debug(printf("16 %04X => %d\n",masked,counts[masked])); - + if (counts[masked]) { + debug(printf("Storing masked %u at %u\n",masked,chrpos - 15)); + table[positions[masked] + (--counts[masked])] = chrpos - 15; + } + #else _oligo = _mm_setr_epi32(low_rev, low_rev >> 2, low_rev >> 4, low_rev >> 6); #ifdef SIMD_MASK_THEN_STORE @@ -15354,20 +13320,28 @@ #endif masked = EXTRACT(_masked,0); - INCR_COUNT(counts[masked],inquery[masked]); - debug(printf("24 %04X => %d\n",masked,counts[masked])); + if (counts[masked]) { + debug(printf("Storing masked %u at %u\n",masked,chrpos - 7)); + table[positions[masked] + (--counts[masked])] = chrpos - 7; + } masked = EXTRACT(_masked,1); - INCR_COUNT(counts[masked],inquery[masked]); - debug(printf("23 %04X => %d\n",masked,counts[masked])); + if (counts[masked]) { + debug(printf("Storing masked %u at %u\n",masked,chrpos - 8)); + table[positions[masked] + (--counts[masked])] = chrpos - 8; + } masked = EXTRACT(_masked,2); - INCR_COUNT(counts[masked],inquery[masked]); - debug(printf("22 %04X => %d\n",masked,counts[masked])); + if (counts[masked]) { + debug(printf("Storing masked %u at %u\n",masked,chrpos - 9)); + table[positions[masked] + (--counts[masked])] = chrpos - 9; + } masked = EXTRACT(_masked,3); - INCR_COUNT(counts[masked],inquery[masked]); - debug(printf("21 %04X => %d\n",masked,counts[masked])); + if (counts[masked]) { + debug(printf("Storing masked %u at %u\n",masked,chrpos - 10)); + table[positions[masked] + (--counts[masked])] = chrpos - 10; + } _oligo = _mm_srli_epi32(_oligo, 8); @@ -15378,59 +13352,83 @@ #endif masked = EXTRACT(_masked,0); - INCR_COUNT(counts[masked],inquery[masked]); - debug(printf("20 %04X => %d\n",masked,counts[masked])); + if (counts[masked]) { + debug(printf("Storing masked %u at %u\n",masked,chrpos - 11)); + table[positions[masked] + (--counts[masked])] = chrpos - 11; + } masked = EXTRACT(_masked,1); - INCR_COUNT(counts[masked],inquery[masked]); - debug(printf("19 %04X => %d\n",masked,counts[masked])); + if (counts[masked]) { + debug(printf("Storing masked %u at %u\n",masked,chrpos - 12)); + table[positions[masked] + (--counts[masked])] = chrpos - 12; + } masked = EXTRACT(_masked,2); - INCR_COUNT(counts[masked],inquery[masked]); - debug(printf("18 %04X => %d\n",masked,counts[masked])); + if (counts[masked]) { + debug(printf("Storing masked %u at %u\n",masked,chrpos - 13)); + table[positions[masked] + (--counts[masked])] = chrpos - 13; + } masked = EXTRACT(_masked,3); - INCR_COUNT(counts[masked],inquery[masked]); - debug(printf("17 %04X => %d\n",masked,counts[masked])); + if (counts[masked]) { + debug(printf("Storing masked %u at %u\n",masked,chrpos - 14)); + table[positions[masked] + (--counts[masked])] = chrpos - 14; + } masked = low_rev >> 16; /* 16, No mask necessary */ - INCR_COUNT(counts[masked],inquery[masked]); - debug(printf("16 %04X => %d\n",masked,counts[masked])); + if (counts[masked]) { + debug(printf("Storing masked %u at %u\n",masked,chrpos - 15)); + table[positions[masked] + (--counts[masked])] = chrpos - 15; + } #endif - oligo = low_rev >> 18; /* For 15..9 */ + oligo = low_rev >> 18; /* For 9..15 */ oligo |= high_rev << 14; #ifdef INDIVIDUAL_SHIFTS masked = oligo & MASK8; /* 15 */ - INCR_COUNT(counts[masked],inquery[masked]); - debug(printf("15 %04X => %d\n",masked,counts[masked])); + if (counts[masked]) { + debug(printf("Storing masked %u at %u\n",masked,chrpos - 16)); + table[positions[masked] + (--counts[masked])] = chrpos - 16; + } masked = (oligo >> 2) & MASK8; /* 14 */ - INCR_COUNT(counts[masked],inquery[masked]); - debug(printf("14 %04X => %d\n",masked,counts[masked])); + if (counts[masked]) { + debug(printf("Storing masked %u at %u\n",masked,chrpos - 17)); + table[positions[masked] + (--counts[masked])] = chrpos - 17; + } masked = (oligo >> 4) & MASK8; /* 13 */ - INCR_COUNT(counts[masked],inquery[masked]); - debug(printf("13 %04X => %d\n",masked,counts[masked])); + if (counts[masked]) { + debug(printf("Storing masked %u at %u\n",masked,chrpos - 18)); + table[positions[masked] + (--counts[masked])] = chrpos - 18; + } masked = (oligo >> 6) & MASK8; /* 12 */ - INCR_COUNT(counts[masked],inquery[masked]); - debug(printf("12 %04X => %d\n",masked,counts[masked])); + if (counts[masked]) { + debug(printf("Storing masked %u at %u\n",masked,chrpos - 19)); + table[positions[masked] + (--counts[masked])] = chrpos - 19; + } masked = (oligo >> 8) & MASK8; /* 11 */ - INCR_COUNT(counts[masked],inquery[masked]); - debug(printf("11 %04X => %d\n",masked,counts[masked])); + if (counts[masked]) { + debug(printf("Storing masked %u at %u\n",masked,chrpos - 20)); + table[positions[masked] + (--counts[masked])] = chrpos - 20; + } masked = (oligo >> 10) & MASK8; /* 10 */ - INCR_COUNT(counts[masked],inquery[masked]); - debug(printf("10 %04X => %d\n",masked,counts[masked])); + if (counts[masked]) { + debug(printf("Storing masked %u at %u\n",masked,chrpos - 21)); + table[positions[masked] + (--counts[masked])] = chrpos - 21; + } masked = (oligo >> 12) & MASK8; /* 9 */ - INCR_COUNT(counts[masked],inquery[masked]); - debug(printf("9 %04X => %d\n",masked,counts[masked])); + if (counts[masked]) { + debug(printf("Storing masked %u at %u\n",masked,chrpos - 22)); + table[positions[masked] + (--counts[masked])] = chrpos - 22; + } #else _oligo = _mm_setr_epi32(oligo, oligo >> 2, oligo >> 4, oligo >> 6); @@ -15441,20 +13439,28 @@ #endif masked = EXTRACT(_masked,0); - INCR_COUNT(counts[masked],inquery[masked]); - debug(printf("15 %04X => %d\n",masked,counts[masked])); + if (counts[masked]) { + debug(printf("Storing masked %u at %u\n",masked,chrpos - 16)); + table[positions[masked] + (--counts[masked])] = chrpos - 16; + } masked = EXTRACT(_masked,1); - INCR_COUNT(counts[masked],inquery[masked]); - debug(printf("14 %04X => %d\n",masked,counts[masked])); + if (counts[masked]) { + debug(printf("Storing masked %u at %u\n",masked,chrpos - 17)); + table[positions[masked] + (--counts[masked])] = chrpos - 17; + } masked = EXTRACT(_masked,2); - INCR_COUNT(counts[masked],inquery[masked]); - debug(printf("13 %04X => %d\n",masked,counts[masked])); + if (counts[masked]) { + debug(printf("Storing masked %u at %u\n",masked,chrpos - 18)); + table[positions[masked] + (--counts[masked])] = chrpos - 18; + } masked = EXTRACT(_masked,3); - INCR_COUNT(counts[masked],inquery[masked]); - debug(printf("12 %04X => %d\n",masked,counts[masked])); + if (counts[masked]) { + debug(printf("Storing masked %u at %u\n",masked,chrpos - 19)); + table[positions[masked] + (--counts[masked])] = chrpos - 19; + } _oligo = _mm_srli_epi32(_oligo, 8); @@ -15465,56 +13471,80 @@ #endif masked = EXTRACT(_masked,0); - INCR_COUNT(counts[masked],inquery[masked]); - debug(printf("11 %04X => %d\n",masked,counts[masked])); + if (counts[masked]) { + debug(printf("Storing masked %u at %u\n",masked,chrpos - 20)); + table[positions[masked] + (--counts[masked])] = chrpos - 20; + } masked = EXTRACT(_masked,1); - INCR_COUNT(counts[masked],inquery[masked]); - debug(printf("10 %04X => %d\n",masked,counts[masked])); + if (counts[masked]) { + debug(printf("Storing masked %u at %u\n",masked,chrpos - 21)); + table[positions[masked] + (--counts[masked])] = chrpos - 21; + } masked = EXTRACT(_masked,2); - INCR_COUNT(counts[masked],inquery[masked]); - debug(printf("9 %04X => %d\n",masked,counts[masked])); + if (counts[masked]) { + debug(printf("Storing masked %u at %u\n",masked,chrpos - 22)); + table[positions[masked] + (--counts[masked])] = chrpos - 22; + } #endif #ifdef INDIVIDUAL_SHIFTS masked = high_rev & MASK8; /* 8 */ - INCR_COUNT(counts[masked],inquery[masked]); - debug(printf("8 %04X => %d\n",masked,counts[masked])); + if (counts[masked]) { + debug(printf("Storing masked %u at %u\n",masked,chrpos - 23)); + table[positions[masked] + (--counts[masked])] = chrpos - 23; + } masked = (high_rev >> 2) & MASK8; /* 7 */ - INCR_COUNT(counts[masked],inquery[masked]); - debug(printf("7 %04X => %d\n",masked,counts[masked])); + if (counts[masked]) { + debug(printf("Storing masked %u at %u\n",masked,chrpos - 24)); + table[positions[masked] + (--counts[masked])] = chrpos - 24; + } masked = (high_rev >> 4) & MASK8; /* 6 */ - INCR_COUNT(counts[masked],inquery[masked]); - debug(printf("6 %04X => %d\n",masked,counts[masked])); + if (counts[masked]) { + debug(printf("Storing masked %u at %u\n",masked,chrpos - 25)); + table[positions[masked] + (--counts[masked])] = chrpos - 25; + } masked = (high_rev >> 6) & MASK8; /* 5 */ - INCR_COUNT(counts[masked],inquery[masked]); - debug(printf("5 %04X => %d\n",masked,counts[masked])); - + if (counts[masked]) { + debug(printf("Storing masked %u at %u\n",masked,chrpos - 26)); + table[positions[masked] + (--counts[masked])] = chrpos - 26; + } + masked = (high_rev >> 8) & MASK8; /* 4 */ - INCR_COUNT(counts[masked],inquery[masked]); - debug(printf("4 %04X => %d\n",masked,counts[masked])); + if (counts[masked]) { + debug(printf("Storing masked %u at %u\n",masked,chrpos - 27)); + table[positions[masked] + (--counts[masked])] = chrpos - 27; + } masked = (high_rev >> 10) & MASK8; /* 3 */ - INCR_COUNT(counts[masked],inquery[masked]); - debug(printf("3 %04X => %d\n",masked,counts[masked])); + if (counts[masked]) { + debug(printf("Storing masked %u at %u\n",masked,chrpos - 28)); + table[positions[masked] + (--counts[masked])] = chrpos - 28; + } masked = (high_rev >> 12) & MASK8; /* 2 */ - INCR_COUNT(counts[masked],inquery[masked]); - debug(printf("2 %04X => %d\n",masked,counts[masked])); + if (counts[masked]) { + debug(printf("Storing masked %u at %u\n",masked,chrpos - 29)); + table[positions[masked] + (--counts[masked])] = chrpos - 29; + } masked = (high_rev >> 14) & MASK8; /* 1 */ - INCR_COUNT(counts[masked],inquery[masked]); - debug(printf("1 %04X => %d\n",masked,counts[masked])); + if (counts[masked]) { + debug(printf("Storing masked %u at %u\n",masked,chrpos - 30)); + table[positions[masked] + (--counts[masked])] = chrpos - 30; + } masked = high_rev >> 16; /* 0, No mask necessary */ - INCR_COUNT(counts[masked],inquery[masked]); - debug(printf("0 %04X => %d\n",masked,counts[masked])); - + if (counts[masked]) { + debug(printf("Storing masked %u at %u\n",masked,chrpos - 31)); + table[positions[masked] + (--counts[masked])] = chrpos - 31; + } + #else _oligo = _mm_setr_epi32(high_rev, high_rev >> 2, high_rev >> 4, high_rev >> 6); #ifdef SIMD_MASK_THEN_STORE @@ -15524,20 +13554,28 @@ #endif masked = EXTRACT(_masked,0); - INCR_COUNT(counts[masked],inquery[masked]); - debug(printf("8 %04X => %d\n",masked,counts[masked])); + if (counts[masked]) { + debug(printf("Storing masked %u at %u\n",masked,chrpos - 23)); + table[positions[masked] + (--counts[masked])] = chrpos - 23; + } masked = EXTRACT(_masked,1); - INCR_COUNT(counts[masked],inquery[masked]); - debug(printf("7 %04X => %d\n",masked,counts[masked])); + if (counts[masked]) { + debug(printf("Storing masked %u at %u\n",masked,chrpos - 24)); + table[positions[masked] + (--counts[masked])] = chrpos - 24; + } masked = EXTRACT(_masked,2); - INCR_COUNT(counts[masked],inquery[masked]); - debug(printf("6 %04X => %d\n",masked,counts[masked])); + if (counts[masked]) { + debug(printf("Storing masked %u at %u\n",masked,chrpos - 25)); + table[positions[masked] + (--counts[masked])] = chrpos - 25; + } masked = EXTRACT(_masked,3); - INCR_COUNT(counts[masked],inquery[masked]); - debug(printf("5 %04X => %d\n",masked,counts[masked])); + if (counts[masked]) { + debug(printf("Storing masked %u at %u\n",masked,chrpos - 26)); + table[positions[masked] + (--counts[masked])] = chrpos - 26; + } _oligo = _mm_srli_epi32(_oligo, 8); @@ -15548,2739 +13586,1322 @@ #endif masked = EXTRACT(_masked,0); - INCR_COUNT(counts[masked],inquery[masked]); - debug(printf("4 %04X => %d\n",masked,counts[masked])); + if (counts[masked]) { + debug(printf("Storing masked %u at %u\n",masked,chrpos - 27)); + table[positions[masked] + (--counts[masked])] = chrpos - 27; + } masked = EXTRACT(_masked,1); - INCR_COUNT(counts[masked],inquery[masked]); - debug(printf("3 %04X => %d\n",masked,counts[masked])); + if (counts[masked]) { + debug(printf("Storing masked %u at %u\n",masked,chrpos - 28)); + table[positions[masked] + (--counts[masked])] = chrpos - 28; + } masked = EXTRACT(_masked,2); - INCR_COUNT(counts[masked],inquery[masked]); - debug(printf("2 %04X => %d\n",masked,counts[masked])); + if (counts[masked]) { + debug(printf("Storing masked %u at %u\n",masked,chrpos - 29)); + table[positions[masked] + (--counts[masked])] = chrpos - 29; + } masked = EXTRACT(_masked,3); - INCR_COUNT(counts[masked],inquery[masked]); - debug(printf("1 %04X => %d\n",masked,counts[masked])); - + if (counts[masked]) { + debug(printf("Storing masked %u at %u\n",masked,chrpos - 30)); + table[positions[masked] + (--counts[masked])] = chrpos - 30; + } masked = high_rev >> 16; /* 0, No mask necessary */ - INCR_COUNT(counts[masked],inquery[masked]); - debug(printf("0 %04X => %d\n",masked,counts[masked])); + if (counts[masked]) { + debug(printf("Storing masked %u at %u\n",masked,chrpos - 31)); + table[positions[masked] + (--counts[masked])] = chrpos - 31; + } #endif - return; + return chrpos - 32; } #else /* HAVE_AVX2 */ -static void -count_8mers_fwd (Count_T *counts, Inquery_T *inquery, Genomecomp_T high_rev, Genomecomp_T low_rev, Genomecomp_T nexthigh_rev) { +static int +store_8mers_fwd_32 (Chrpos_T chrpos, Chrpos_T *table, UINT4 *positions, Count_T *counts, + Genomecomp_T high_rev, Genomecomp_T low_rev, Genomecomp_T nexthigh_rev) { Genomecomp_T masked, oligo; - __m256i _oligo, _masked; -#ifdef CHECK_FOR_OVERFLOW - __m256i _counts_after, _counts_neg; -#endif + __m256i _oligo, _masked, _counts; + __m256i _blocks, _envelopes, _addresses, _address_mask, _count_mask; + + + _address_mask = _mm256_set1_epi32(0x3); + _count_mask = _mm256_set1_epi32(0xFF); oligo = nexthigh_rev >> 18; /* For 31..25 */ oligo |= low_rev << 14; - _oligo = _mm256_srlv_epi32(_mm256_set1_epi32(oligo),shift0to14); + _oligo = _mm256_srlv_epi32(_mm256_set1_epi32(oligo),bigshift0to14); _masked = _mm256_and_si256(_oligo, bigmask8); - masked = EXTRACT256(_masked,0); - counts[masked] += 1; - debug(printf("31 %04X => %d\n",masked,counts[masked])); - - masked = EXTRACT256(_masked,1); - counts[masked] += 1; - debug(printf("30 %04X => %d\n",masked,counts[masked])); + _blocks = _mm256_srli_epi32(_masked,2); /* div by 4 bytes/int */ + _addresses = _mm256_and_si256(_masked,_address_mask); + _addresses = _mm256_slli_epi32(_addresses,3); /* Multiply by 8 bits/byte */ + _envelopes = _mm256_i32gather_epi32((const int *) counts,_blocks,/*scale*/4); + _counts = _mm256_srlv_epi32(_envelopes,_addresses); /* Puts byte on right */ + _counts = _mm256_and_si256(_counts,_count_mask); - masked = EXTRACT256(_masked,2); - counts[masked] += 1; - debug(printf("29 %04X => %d\n",masked,counts[masked])); - - masked = EXTRACT256(_masked,3); - counts[masked] += 1; - debug(printf("28 %04X => %d\n",masked,counts[masked])); - - masked = EXTRACT256(_masked,4); - counts[masked] += 1; - debug(printf("27 %04X => %d\n",masked,counts[masked])); - - masked = EXTRACT256(_masked,5); - counts[masked] += 1; - debug(printf("26 %04X => %d\n",masked,counts[masked])); - - masked = EXTRACT256(_masked,6); - counts[masked] += 1; - debug(printf("25 %04X => %d\n",masked,counts[masked])); - -#ifdef CHECK_FOR_OVERFLOW - _counts_after = _mm256_i32gather_epi32(counts,_masked,/*shift*/4); - _counts_after = _mm256_and_si256(_counts_after,low7); - _counts_neg = _mm256_cmpgt_epi32(_counts_after,maxcount256); - if (_mm256_testz_si256(_counts_neg,_counts_neg) == 0) { - debug(printf("Exceeded maximum counts\n")); - if (EXTRACT256(_counts_neg,0)) { - inquery[EXTRACT256(_masked,0)] = INQUERY_FALSE; - } - if (EXTRACT256(_counts_neg,1)) { - inquery[EXTRACT256(_masked,1)] = INQUERY_FALSE; - } - if (EXTRACT256(_counts_neg,2)) { - inquery[EXTRACT256(_masked,2)] = INQUERY_FALSE; - } - if (EXTRACT256(_counts_neg,3)) { - inquery[EXTRACT256(_masked,3)] = INQUERY_FALSE; - } - if (EXTRACT256(_counts_neg,4)) { - inquery[EXTRACT256(_masked,4)] = INQUERY_FALSE; - } - if (EXTRACT256(_counts_neg,5)) { - inquery[EXTRACT256(_masked,5)] = INQUERY_FALSE; - } - if (EXTRACT256(_counts_neg,6)) { - inquery[EXTRACT256(_masked,6)] = INQUERY_FALSE; + if (EXTRACT256(_counts,0)) { + masked = EXTRACT256(_masked,0); + if (counts[masked]) { /* Have to re-check if there is a conflict */ + debug(printf("Storing masked %u at %u\n",masked,chrpos)); + table[positions[masked] + (--counts[masked])] = chrpos; } } -#endif /* CHECK_FOR_OVERFLOW */ - _oligo = _mm256_srlv_epi32(_mm256_set1_epi32(low_rev),shift0to14); - _masked = _mm256_and_si256(_oligo, bigmask8); + if (EXTRACT256(_counts,1)) { + masked = EXTRACT256(_masked,1); + if (counts[masked]) { /* Have to re-check if there is a conflict */ + debug(printf("Storing masked %u at %u\n",masked,chrpos - 1)); + table[positions[masked] + (--counts[masked])] = chrpos - 1; + } + } - masked = EXTRACT256(_masked,0); - counts[masked] += 1; - debug(printf("24 %04X => %d\n",masked,counts[masked])); + if (EXTRACT256(_counts,2)) { + masked = EXTRACT256(_masked,2); + if (counts[masked]) { /* Have to re-check if there is a conflict */ + debug(printf("Storing masked %u at %u\n",masked,chrpos - 2)); + table[positions[masked] + (--counts[masked])] = chrpos - 2; + } + } - masked = EXTRACT256(_masked,1); - counts[masked] += 1; - debug(printf("23 %04X => %d\n",masked,counts[masked])); + if (EXTRACT256(_counts,3)) { + masked = EXTRACT256(_masked,3); + if (counts[masked]) { /* Have to re-check if there is a conflict */ + debug(printf("Storing masked %u at %u\n",masked,chrpos - 3)); + table[positions[masked] + (--counts[masked])] = chrpos - 3; + } + } - masked = EXTRACT256(_masked,2); - counts[masked] += 1; - debug(printf("22 %04X => %d\n",masked,counts[masked])); + if (EXTRACT256(_counts,4)) { + masked = EXTRACT256(_masked,4); + if (counts[masked]) { /* Have to re-check if there is a conflict */ + debug(printf("Storing masked %u at %u\n",masked,chrpos - 4)); + table[positions[masked] + (--counts[masked])] = chrpos - 4; + } + } - masked = EXTRACT256(_masked,3); - counts[masked] += 1; - debug(printf("21 %04X => %d\n",masked,counts[masked])); + if (EXTRACT256(_counts,5)) { + masked = EXTRACT256(_masked,5); + if (counts[masked]) { /* Have to re-check if there is a conflict */ + debug(printf("Storing masked %u at %u\n",masked,chrpos - 5)); + table[positions[masked] + (--counts[masked])] = chrpos - 5; + } + } - masked = EXTRACT256(_masked,4); - counts[masked] += 1; - debug(printf("20 %04X => %d\n",masked,counts[masked])); + if (EXTRACT256(_counts,6)) { + masked = EXTRACT256(_masked,6); + if (counts[masked]) { /* Have to re-check if there is a conflict */ + debug(printf("Storing masked %u at %u\n",masked,chrpos - 6)); + table[positions[masked] + (--counts[masked])] = chrpos - 6; + } + } - masked = EXTRACT256(_masked,5); - counts[masked] += 1; - debug(printf("19 %04X => %d\n",masked,counts[masked])); - masked = EXTRACT256(_masked,6); - counts[masked] += 1; - debug(printf("18 %04X => %d\n",masked,counts[masked])); + _oligo = _mm256_srlv_epi32(_mm256_set1_epi32(low_rev),bigshift0to14); + _masked = _mm256_and_si256(_oligo, bigmask8); - masked = EXTRACT256(_masked,7); - counts[masked] += 1; - debug(printf("17 %04X => %d\n",masked,counts[masked])); + _blocks = _mm256_srli_epi32(_masked,2); /* div by 4 bytes/int */ + _addresses = _mm256_and_si256(_masked,_address_mask); + _addresses = _mm256_slli_epi32(_addresses,3); /* Multiply by 8 bits/byte */ + _envelopes = _mm256_i32gather_epi32((const int *) counts,_blocks,/*scale*/4); + _counts = _mm256_srlv_epi32(_envelopes,_addresses); /* Puts byte on right */ + _counts = _mm256_and_si256(_counts,_count_mask); -#ifdef CHECK_FOR_OVERFLOW - _counts_after = _mm256_i32gather_epi32(counts,_masked,/*shift*/4); - /* _counts_after = _mm256_and_si256(_counts_after,low8); */ - _counts_neg = _mm256_cmpgt_epi32(_counts_after,maxcount256); - if (_mm256_testz_si256(_counts_neg,_counts_neg) == 0) { - debug(printf("Exceeded maximum counts\n")); - if (EXTRACT256(_counts_neg,0)) { - inquery[EXTRACT256(_masked,0)] = INQUERY_FALSE; + if (EXTRACT256(_counts,0)) { + masked = EXTRACT256(_masked,0); + if (counts[masked]) { /* Have to re-check if there is a conflict */ + debug(printf("Storing masked %u at %u\n",masked,chrpos - 7)); + table[positions[masked] + (--counts[masked])] = chrpos - 7; } - if (EXTRACT256(_counts_neg,1)) { - inquery[EXTRACT256(_masked,1)] = INQUERY_FALSE; + } + + if (EXTRACT256(_counts,1)) { + masked = EXTRACT256(_masked,1); + if (counts[masked]) { /* Have to re-check if there is a conflict */ + debug(printf("Storing masked %u at %u\n",masked,chrpos - 8)); + table[positions[masked] + (--counts[masked])] = chrpos - 8; } - if (EXTRACT256(_counts_neg,2)) { - inquery[EXTRACT256(_masked,2)] = INQUERY_FALSE; + } + + if (EXTRACT256(_counts,2)) { + masked = EXTRACT256(_masked,2); + if (counts[masked]) { /* Have to re-check if there is a conflict */ + debug(printf("Storing masked %u at %u\n",masked,chrpos - 9)); + table[positions[masked] + (--counts[masked])] = chrpos - 9; } - if (EXTRACT256(_counts_neg,3)) { - inquery[EXTRACT256(_masked,3)] = INQUERY_FALSE; + } + + if (EXTRACT256(_counts,3)) { + masked = EXTRACT256(_masked,3); + if (counts[masked]) { /* Have to re-check if there is a conflict */ + debug(printf("Storing masked %u at %u\n",masked,chrpos - 10)); + table[positions[masked] + (--counts[masked])] = chrpos - 10; } - if (EXTRACT256(_counts_neg,4)) { - inquery[EXTRACT256(_masked,4)] = INQUERY_FALSE; + } + + if (EXTRACT256(_counts,4)) { + masked = EXTRACT256(_masked,4); + if (counts[masked]) { /* Have to re-check if there is a conflict */ + debug(printf("Storing masked %u at %u\n",masked,chrpos - 11)); + table[positions[masked] + (--counts[masked])] = chrpos - 11; } - if (EXTRACT256(_counts_neg,5)) { - inquery[EXTRACT256(_masked,5)] = INQUERY_FALSE; + } + + if (EXTRACT256(_counts,5)) { + masked = EXTRACT256(_masked,5); + if (counts[masked]) { /* Have to re-check if there is a conflict */ + debug(printf("Storing masked %u at %u\n",masked,chrpos - 12)); + table[positions[masked] + (--counts[masked])] = chrpos - 12; } - if (EXTRACT256(_counts_neg,6)) { - inquery[EXTRACT256(_masked,6)] = INQUERY_FALSE; + } + + if (EXTRACT256(_counts,6)) { + masked = EXTRACT256(_masked,6); + if (counts[masked]) { /* Have to re-check if there is a conflict */ + debug(printf("Storing masked %u at %u\n",masked,chrpos - 13)); + table[positions[masked] + (--counts[masked])] = chrpos - 13; } - if (EXTRACT256(_counts_neg,7)) { - inquery[EXTRACT256(_masked,7)] = INQUERY_FALSE; + } + + if (EXTRACT256(_counts,7)) { + masked = EXTRACT256(_masked,7); + if (counts[masked]) { /* Have to re-check if there is a conflict */ + debug(printf("Storing masked %u at %u\n",masked,chrpos - 14)); + table[positions[masked] + (--counts[masked])] = chrpos - 14; } } -#endif /* CHECK_FOR_OVERFLOW */ + masked = low_rev >> 16; /* 16, No mask necessary */ - INCR_COUNT(counts[masked],inquery[masked]); - debug(printf("16 %04X => %d\n",masked,counts[masked])); + if (counts[masked]) { + debug(printf("Storing masked %u at %u\n",masked,chrpos - 15)); + table[positions[masked] + (--counts[masked])] = chrpos - 15; + } - oligo = low_rev >> 18; /* For 15..9 */ + + oligo = low_rev >> 18; /* For 9..15 */ oligo |= high_rev << 14; - _oligo = _mm256_srlv_epi32(_mm256_set1_epi32(oligo),shift0to14); + _oligo = _mm256_srlv_epi32(_mm256_set1_epi32(oligo),bigshift0to14); _masked = _mm256_and_si256(_oligo, bigmask8); - masked = EXTRACT256(_masked,0); - counts[masked] += 1; - debug(printf("15 %04X => %d\n",masked,counts[masked])); - - masked = EXTRACT256(_masked,1); - counts[masked] += 1; - debug(printf("14 %04X => %d\n",masked,counts[masked])); - - masked = EXTRACT256(_masked,2); - counts[masked] += 1; - debug(printf("13 %04X => %d\n",masked,counts[masked])); - - masked = EXTRACT256(_masked,3); - counts[masked] += 1; - debug(printf("12 %04X => %d\n",masked,counts[masked])); - - masked = EXTRACT256(_masked,4); - counts[masked] += 1; - debug(printf("11 %04X => %d\n",masked,counts[masked])); - - masked = EXTRACT256(_masked,5); - counts[masked] += 1; - debug(printf("10 %04X => %d\n",masked,counts[masked])); - - masked = EXTRACT256(_masked,6); - counts[masked] += 1; - debug(printf("9 %04X => %d\n",masked,counts[masked])); + _blocks = _mm256_srli_epi32(_masked,2); /* div by 4 bytes/int */ + _addresses = _mm256_and_si256(_masked,_address_mask); + _addresses = _mm256_slli_epi32(_addresses,3); /* Multiply by 8 bits/byte */ + _envelopes = _mm256_i32gather_epi32((const int *) counts,_blocks,/*scale*/4); + _counts = _mm256_srlv_epi32(_envelopes,_addresses); /* Puts byte on right */ + _counts = _mm256_and_si256(_counts,_count_mask); -#ifdef CHECK_FOR_OVERFLOW - _counts_after = _mm256_i32gather_epi32(counts,_masked,/*shift*/4); - _counts_after = _mm256_and_si256(_counts_after,low7); - _counts_neg = _mm256_cmpgt_epi32(_counts_after,maxcount256); - if (_mm256_testz_si256(_counts_neg,_counts_neg) == 0) { - debug(printf("Exceeded maximum counts\n")); - if (EXTRACT256(_counts_neg,0)) { - inquery[EXTRACT256(_masked,0)] = INQUERY_FALSE; + if (EXTRACT256(_counts,0)) { + masked = EXTRACT256(_masked,0); + if (counts[masked]) { /* Have to re-check if there is a conflict */ + debug(printf("Storing masked %u at %u\n",masked,chrpos - 16)); + table[positions[masked] + (--counts[masked])] = chrpos - 16; } - if (EXTRACT256(_counts_neg,1)) { - inquery[EXTRACT256(_masked,1)] = INQUERY_FALSE; + } + + if (EXTRACT256(_counts,1)) { + masked = EXTRACT256(_masked,1); + if (counts[masked]) { /* Have to re-check if there is a conflict */ + debug(printf("Storing masked %u at %u\n",masked,chrpos - 17)); + table[positions[masked] + (--counts[masked])] = chrpos - 17; } - if (EXTRACT256(_counts_neg,2)) { - inquery[EXTRACT256(_masked,2)] = INQUERY_FALSE; + } + + if (EXTRACT256(_counts,2)) { + masked = EXTRACT256(_masked,2); + if (counts[masked]) { /* Have to re-check if there is a conflict */ + debug(printf("Storing masked %u at %u\n",masked,chrpos - 18)); + table[positions[masked] + (--counts[masked])] = chrpos - 18; } - if (EXTRACT256(_counts_neg,3)) { - inquery[EXTRACT256(_masked,3)] = INQUERY_FALSE; + } + + if (EXTRACT256(_counts,3)) { + masked = EXTRACT256(_masked,3); + if (counts[masked]) { /* Have to re-check if there is a conflict */ + debug(printf("Storing masked %u at %u\n",masked,chrpos - 19)); + table[positions[masked] + (--counts[masked])] = chrpos - 19; } - if (EXTRACT256(_counts_neg,4)) { - inquery[EXTRACT256(_masked,4)] = INQUERY_FALSE; + } + + if (EXTRACT256(_counts,4)) { + masked = EXTRACT256(_masked,4); + if (counts[masked]) { /* Have to re-check if there is a conflict */ + debug(printf("Storing masked %u at %u\n",masked,chrpos - 20)); + table[positions[masked] + (--counts[masked])] = chrpos - 20; } - if (EXTRACT256(_counts_neg,5)) { - inquery[EXTRACT256(_masked,5)] = INQUERY_FALSE; + } + + if (EXTRACT256(_counts,5)) { + masked = EXTRACT256(_masked,5); + if (counts[masked]) { /* Have to re-check if there is a conflict */ + debug(printf("Storing masked %u at %u\n",masked,chrpos - 21)); + table[positions[masked] + (--counts[masked])] = chrpos - 21; } - if (EXTRACT256(_counts_neg,6)) { - inquery[EXTRACT256(_masked,6)] = INQUERY_FALSE; + } + + if (EXTRACT256(_counts,6)) { + masked = EXTRACT256(_masked,6); + if (counts[masked]) { /* Have to re-check if there is a conflict */ + debug(printf("Storing masked %u at %u\n",masked,chrpos - 22)); + table[positions[masked] + (--counts[masked])] = chrpos - 22; } } -#endif /* CHECK_FOR_OVERFLOW */ - _oligo = _mm256_srlv_epi32(_mm256_set1_epi32(high_rev),shift0to14); + _oligo = _mm256_srlv_epi32(_mm256_set1_epi32(high_rev),bigshift0to14); _masked = _mm256_and_si256(_oligo, bigmask8); - masked = EXTRACT256(_masked,0); - counts[masked] += 1; - debug(printf("8 %04X => %d\n",masked,counts[masked])); - - masked = EXTRACT256(_masked,1); - counts[masked] += 1; - debug(printf("7 %04X => %d\n",masked,counts[masked])); - - masked = EXTRACT256(_masked,2); - counts[masked] += 1; - debug(printf("6 %04X => %d\n",masked,counts[masked])); - - masked = EXTRACT256(_masked,3); - counts[masked] += 1; - debug(printf("5 %04X => %d\n",masked,counts[masked])); - - masked = EXTRACT256(_masked,4); - counts[masked] += 1; - debug(printf("4 %04X => %d\n",masked,counts[masked])); - - masked = EXTRACT256(_masked,5); - counts[masked] += 1; - debug(printf("3 %04X => %d\n",masked,counts[masked])); - - masked = EXTRACT256(_masked,6); - counts[masked] += 1; - debug(printf("2 %04X => %d\n",masked,counts[masked])); - - masked = EXTRACT256(_masked,7); - counts[masked] += 1; - debug(printf("1 %04X => %d\n",masked,counts[masked])); + _blocks = _mm256_srli_epi32(_masked,2); /* div by 4 bytes/int */ + _addresses = _mm256_and_si256(_masked,_address_mask); + _addresses = _mm256_slli_epi32(_addresses,3); /* Multiply by 8 bits/byte */ + _envelopes = _mm256_i32gather_epi32((const int *) counts,_blocks,/*scale*/4); + _counts = _mm256_srlv_epi32(_envelopes,_addresses); /* Puts byte on right */ + _counts = _mm256_and_si256(_counts,_count_mask); -#ifdef CHECK_FOR_OVERFLOW - _counts_after = _mm256_i32gather_epi32(counts,_masked,/*shift*/4); - /* _counts_after = _mm256_and_si256(_counts_after,low8); */ - _counts_neg = _mm256_cmpgt_epi32(_counts_after,maxcount256); - if (_mm256_testz_si256(_counts_neg,_counts_neg) == 0) { - debug(printf("Exceeded maximum counts\n")); - if (EXTRACT256(_counts_neg,0)) { - inquery[EXTRACT256(_masked,0)] = INQUERY_FALSE; + if (EXTRACT256(_counts,0)) { + masked = EXTRACT256(_masked,0); + if (counts[masked]) { /* Have to re-check if there is a conflict */ + debug(printf("Storing masked %u at %u\n",masked,chrpos - 23)); + table[positions[masked] + (--counts[masked])] = chrpos - 23; } - if (EXTRACT256(_counts_neg,1)) { - inquery[EXTRACT256(_masked,1)] = INQUERY_FALSE; + } + + if (EXTRACT256(_counts,1)) { + masked = EXTRACT256(_masked,1); + if (counts[masked]) { /* Have to re-check if there is a conflict */ + debug(printf("Storing masked %u at %u\n",masked,chrpos - 24)); + table[positions[masked] + (--counts[masked])] = chrpos - 24; } - if (EXTRACT256(_counts_neg,2)) { - inquery[EXTRACT256(_masked,2)] = INQUERY_FALSE; + } + + if (EXTRACT256(_counts,2)) { + masked = EXTRACT256(_masked,2); + if (counts[masked]) { /* Have to re-check if there is a conflict */ + debug(printf("Storing masked %u at %u\n",masked,chrpos - 25)); + table[positions[masked] + (--counts[masked])] = chrpos - 25; } - if (EXTRACT256(_counts_neg,3)) { - inquery[EXTRACT256(_masked,3)] = INQUERY_FALSE; + } + + if (EXTRACT256(_counts,3)) { + masked = EXTRACT256(_masked,3); + if (counts[masked]) { /* Have to re-check if there is a conflict */ + debug(printf("Storing masked %u at %u\n",masked,chrpos - 26)); + table[positions[masked] + (--counts[masked])] = chrpos - 26; } - if (EXTRACT256(_counts_neg,4)) { - inquery[EXTRACT256(_masked,4)] = INQUERY_FALSE; + } + + if (EXTRACT256(_counts,4)) { + masked = EXTRACT256(_masked,4); + if (counts[masked]) { /* Have to re-check if there is a conflict */ + debug(printf("Storing masked %u at %u\n",masked,chrpos - 27)); + table[positions[masked] + (--counts[masked])] = chrpos - 27; } - if (EXTRACT256(_counts_neg,5)) { - inquery[EXTRACT256(_masked,5)] = INQUERY_FALSE; + } + + if (EXTRACT256(_counts,5)) { + masked = EXTRACT256(_masked,5); + if (counts[masked]) { /* Have to re-check if there is a conflict */ + debug(printf("Storing masked %u at %u\n",masked,chrpos - 28)); + table[positions[masked] + (--counts[masked])] = chrpos - 28; } - if (EXTRACT256(_counts_neg,6)) { - inquery[EXTRACT256(_masked,6)] = INQUERY_FALSE; + } + + if (EXTRACT256(_counts,6)) { + masked = EXTRACT256(_masked,6); + if (counts[masked]) { /* Have to re-check if there is a conflict */ + debug(printf("Storing masked %u at %u\n",masked,chrpos - 29)); + table[positions[masked] + (--counts[masked])] = chrpos - 29; } - if (EXTRACT256(_counts_neg,7)) { - inquery[EXTRACT256(_masked,7)] = INQUERY_FALSE; + } + + if (EXTRACT256(_counts,7)) { + masked = EXTRACT256(_masked,7); + if (counts[masked]) { /* Have to re-check if there is a conflict */ + debug(printf("Storing masked %u at %u\n",masked,chrpos - 30)); + table[positions[masked] + (--counts[masked])] = chrpos - 30; } } -#endif /* CHECK_FOR_OVERFLOW */ masked = high_rev >> 16; /* 0, No mask necessary */ - INCR_COUNT(counts[masked],inquery[masked]); - debug(printf("0 %04X => %d\n",masked,counts[masked])); + if (counts[masked]) { + debug(printf("Storing masked %u at %u\n",masked,chrpos - 31)); + table[positions[masked] + (--counts[masked])] = chrpos - 31; + } - return; + return chrpos - 32; } #endif /* HAVE_AVX2 */ -/* Expecting current to have {high0_rev, low0_rev, high1_rev, - low1_rev}, and next to have {low0_rev, high1_rev, low1_rev, and - high2_rev} */ -#ifdef USE_SIMD_FOR_COUNTS -static void -extract_8mers_fwd_simd (__m128i *out, __m128i current, __m128i next) { - __m128i oligo; - _mm_store_si128(out++, _mm_srli_epi32(current,16)); /* No mask necessary */ - _mm_store_si128(out++, _mm_and_si128( _mm_srli_epi32(current,14), mask8)); - _mm_store_si128(out++, _mm_and_si128( _mm_srli_epi32(current,12), mask8)); - _mm_store_si128(out++, _mm_and_si128( _mm_srli_epi32(current,10), mask8)); - _mm_store_si128(out++, _mm_and_si128( _mm_srli_epi32(current,8), mask8)); - _mm_store_si128(out++, _mm_and_si128( _mm_srli_epi32(current,6), mask8)); - _mm_store_si128(out++, _mm_and_si128( _mm_srli_epi32(current,4), mask8)); - _mm_store_si128(out++, _mm_and_si128( _mm_srli_epi32(current,2), mask8)); - _mm_store_si128(out++, _mm_and_si128( current, mask8)); +#if !defined(HAVE_AVX2) - oligo = _mm_or_si128( _mm_srli_epi32(next,18), _mm_slli_epi32(current,14)); - _mm_store_si128(out++, _mm_and_si128( _mm_srli_epi32(oligo,12), mask8)); - _mm_store_si128(out++, _mm_and_si128( _mm_srli_epi32(oligo,10), mask8)); - _mm_store_si128(out++, _mm_and_si128( _mm_srli_epi32(oligo,8), mask8)); - _mm_store_si128(out++, _mm_and_si128( _mm_srli_epi32(oligo,6), mask8)); - _mm_store_si128(out++, _mm_and_si128( _mm_srli_epi32(oligo,4), mask8)); - _mm_store_si128(out++, _mm_and_si128( _mm_srli_epi32(oligo,2), mask8)); - _mm_store_si128(out++, _mm_and_si128( oligo, mask8)); +static void +count_7mers_fwd_32 (Count_T *counts, Genomecomp_T high_rev, Genomecomp_T low_rev, Genomecomp_T nexthigh_rev) { + Genomecomp_T masked, oligo; +#ifdef INDIVIDUAL_SHIFTS +#elif defined(SIMD_MASK_THEN_STORE) + UINT4 _masked[4] __attribute__ ((aligned (16))); + __m128i _oligo; +#else + __m128i _oligo, _masked; +#endif - return; -} -#ifdef HAVE_AVX2 -static void -extract_8mers_fwd_simd_128 (__m256i *out, __m256i current, __m256i next) { - __m256i oligo; + oligo = nexthigh_rev >> 20; /* For 31..26 */ + oligo |= low_rev << 12; - _mm256_store_si256(out++, _mm256_srli_epi32(current,16)); /* No mask necessary */ - _mm256_store_si256(out++, _mm256_and_si256( _mm256_srli_epi32(current,14), bigmask8)); - _mm256_store_si256(out++, _mm256_and_si256( _mm256_srli_epi32(current,12), bigmask8)); - _mm256_store_si256(out++, _mm256_and_si256( _mm256_srli_epi32(current,10), bigmask8)); - _mm256_store_si256(out++, _mm256_and_si256( _mm256_srli_epi32(current,8), bigmask8)); - _mm256_store_si256(out++, _mm256_and_si256( _mm256_srli_epi32(current,6), bigmask8)); - _mm256_store_si256(out++, _mm256_and_si256( _mm256_srli_epi32(current,4), bigmask8)); - _mm256_store_si256(out++, _mm256_and_si256( _mm256_srli_epi32(current,2), bigmask8)); - _mm256_store_si256(out++, _mm256_and_si256( current, bigmask8)); - - oligo = _mm256_or_si256( _mm256_srli_epi32(next,18), _mm256_slli_epi32(current,14)); - _mm256_store_si256(out++, _mm256_and_si256( _mm256_srli_epi32(oligo,12), bigmask8)); - _mm256_store_si256(out++, _mm256_and_si256( _mm256_srli_epi32(oligo,10), bigmask8)); - _mm256_store_si256(out++, _mm256_and_si256( _mm256_srli_epi32(oligo,8), bigmask8)); - _mm256_store_si256(out++, _mm256_and_si256( _mm256_srli_epi32(oligo,6), bigmask8)); - _mm256_store_si256(out++, _mm256_and_si256( _mm256_srli_epi32(oligo,4), bigmask8)); - _mm256_store_si256(out++, _mm256_and_si256( _mm256_srli_epi32(oligo,2), bigmask8)); - _mm256_store_si256(out++, _mm256_and_si256( oligo, bigmask8)); - - return; -} -#endif - - -static void -count_8mers_fwd_simd (Count_T *counts, Inquery_T *inquery, __m128i current, __m128i next) { - __m128i oligo; -#ifdef HAVE_SSE4_1 - __m128i array; -#else - Genomecomp_T array[4]; -#endif -#if defined(HAVE_AVX2) && defined(CHECK_FOR_OVERFLOW) - __m128i _counts_after, _counts_neg; -#endif - -#ifdef HAVE_SSE4_1 - array = _mm_srli_epi32(current,16); /* No mask necessary */ -#else - _mm_store_si128((__m128i *) array, _mm_srli_epi32(current,16)); /* No mask necessary */ -#endif -#ifdef HAVE_AVX2 - counts[EXTRACT(array,0)] += 1; - counts[EXTRACT(array,1)] += 1; - counts[EXTRACT(array,2)] += 1; - counts[EXTRACT(array,3)] += 1; -#ifdef CHECK_FOR_OVERFLOW - _counts_after = _mm_i32gather_epi32(counts,array,/*scale*/4); - _counts_neg = _mm_cmpgt_epi32(_counts_after,maxcount128); - if (_mm_testz_si128(_counts_neg,_counts_neg) == 0) { - debug(printf("Exceeded maximum counts\n")); - if (EXTRACT(_counts_neg,0)) { - inquery[EXTRACT(array,0)] = INQUERY_FALSE; - } - if (EXTRACT(_counts_neg,1)) { - inquery[EXTRACT(array,1)] = INQUERY_FALSE; - } - if (EXTRACT(_counts_neg,2)) { - inquery[EXTRACT(array,2)] = INQUERY_FALSE; - } - if (EXTRACT(_counts_neg,3)) { - inquery[EXTRACT(array,3)] = INQUERY_FALSE; - } - } -#endif /* CHECK_FOR_OVERFLOW */ - debug(printf("0 %04X => %d\n",array[0],counts[array[0]])); - debug(printf("16 %04X => %d\n",array[1],counts[array[1]])); - debug(printf("32 %04X => %d\n",array[2],counts[array[2]])); - debug(printf("48 %04X => %d\n",array[3],counts[array[3]])); - -#else - INCR_COUNT(counts[EXTRACT(array,0)],inquery[EXTRACT(array,0)]); /* 0 */ - INCR_COUNT(counts[EXTRACT(array,1)],inquery[EXTRACT(array,1)]); /* 16 */ - INCR_COUNT(counts[EXTRACT(array,2)],inquery[EXTRACT(array,2)]); /* 32 */ - INCR_COUNT(counts[EXTRACT(array,3)],inquery[EXTRACT(array,3)]); /* 48 */ -#endif - debug(printf("0 %04X => %d\n",EXTRACT(array,0),counts[EXTRACT(array,0)])); - debug(printf("16 %04X => %d\n",EXTRACT(array,1),counts[EXTRACT(array,1)])); - debug(printf("32 %04X => %d\n",EXTRACT(array,2),counts[EXTRACT(array,2)])); - debug(printf("48 %04X => %d\n",EXTRACT(array,3),counts[EXTRACT(array,3)])); - -#ifdef HAVE_SSE4_1 - array = _mm_and_si128( _mm_srli_epi32(current,14), mask8); -#else - _mm_store_si128((__m128i *) array, _mm_and_si128( _mm_srli_epi32(current,14), mask8)); -#endif -#ifdef HAVE_AVX2 - counts[EXTRACT(array,0)] += 1; - counts[EXTRACT(array,1)] += 1; - counts[EXTRACT(array,2)] += 1; - counts[EXTRACT(array,3)] += 1; -#ifdef CHECK_FOR_OVERFLOW - _counts_after = _mm_i32gather_epi32(counts,array,/*scale*/4); - _counts_neg = _mm_cmpgt_epi32(_counts_after,maxcount128); - if (_mm_testz_si128(_counts_neg,_counts_neg) == 0) { - debug(printf("Exceeded maximum counts\n")); - if (EXTRACT(_counts_neg,0)) { - inquery[EXTRACT(array,0)] = INQUERY_FALSE; - } - if (EXTRACT(_counts_neg,1)) { - inquery[EXTRACT(array,1)] = INQUERY_FALSE; - } - if (EXTRACT(_counts_neg,2)) { - inquery[EXTRACT(array,2)] = INQUERY_FALSE; - } - if (EXTRACT(_counts_neg,3)) { - inquery[EXTRACT(array,3)] = INQUERY_FALSE; - } - } -#endif /* CHECK_FOR_OVERFLOW */ - debug(printf("1 %04X => %d\n",array[0],counts[array[0]])); - debug(printf("17 %04X => %d\n",array[1],counts[array[1]])); - debug(printf("33 %04X => %d\n",array[2],counts[array[2]])); - debug(printf("49 %04X => %d\n",array[3],counts[array[3]])); - -#else - INCR_COUNT(counts[EXTRACT(array,0)],inquery[EXTRACT(array,0)]); /* 1 */ - INCR_COUNT(counts[EXTRACT(array,1)],inquery[EXTRACT(array,1)]); /* 17 */ - INCR_COUNT(counts[EXTRACT(array,2)],inquery[EXTRACT(array,2)]); /* 33 */ - INCR_COUNT(counts[EXTRACT(array,3)],inquery[EXTRACT(array,3)]); /* 49 */ -#endif - debug(printf("1 %04X => %d\n",EXTRACT(array,0),counts[EXTRACT(array,0)])); - debug(printf("17 %04X => %d\n",EXTRACT(array,1),counts[EXTRACT(array,1)])); - debug(printf("33 %04X => %d\n",EXTRACT(array,2),counts[EXTRACT(array,2)])); - debug(printf("49 %04X => %d\n",EXTRACT(array,3),counts[EXTRACT(array,3)])); - -#ifdef HAVE_SSE4_1 - array = _mm_and_si128( _mm_srli_epi32(current,12), mask8); -#else - _mm_store_si128((__m128i *) array, _mm_and_si128( _mm_srli_epi32(current,12), mask8)); -#endif -#ifdef HAVE_AVX2 - counts[EXTRACT(array,0)] += 1; - counts[EXTRACT(array,1)] += 1; - counts[EXTRACT(array,2)] += 1; - counts[EXTRACT(array,3)] += 1; -#ifdef CHECK_FOR_OVERFLOW - _counts_after = _mm_i32gather_epi32(counts,array,/*scale*/4); - _counts_neg = _mm_cmpgt_epi32(_counts_after,maxcount128); - if (_mm_testz_si128(_counts_neg,_counts_neg) == 0) { - debug(printf("Exceeded maximum counts\n")); - if (EXTRACT(_counts_neg,0)) { - inquery[EXTRACT(array,0)] = INQUERY_FALSE; - } - if (EXTRACT(_counts_neg,1)) { - inquery[EXTRACT(array,1)] = INQUERY_FALSE; - } - if (EXTRACT(_counts_neg,2)) { - inquery[EXTRACT(array,2)] = INQUERY_FALSE; - } - if (EXTRACT(_counts_neg,3)) { - inquery[EXTRACT(array,3)] = INQUERY_FALSE; - } - } -#endif /* CHECK_FOR_OVERFLOW */ - debug(printf("2 %04X => %d\n",array[0],counts[array[0]])); - debug(printf("18 %04X => %d\n",array[1],counts[array[1]])); - debug(printf("34 %04X => %d\n",array[2],counts[array[2]])); - debug(printf("50 %04X => %d\n",array[3],counts[array[3]])); - -#else - INCR_COUNT(counts[EXTRACT(array,0)],inquery[EXTRACT(array,0)]); /* 2 */ - INCR_COUNT(counts[EXTRACT(array,1)],inquery[EXTRACT(array,1)]); /* 18 */ - INCR_COUNT(counts[EXTRACT(array,2)],inquery[EXTRACT(array,2)]); /* 34 */ - INCR_COUNT(counts[EXTRACT(array,3)],inquery[EXTRACT(array,3)]); /* 50 */ -#endif - debug(printf("2 %04X => %d\n",EXTRACT(array,0),counts[EXTRACT(array,0)])); - debug(printf("18 %04X => %d\n",EXTRACT(array,1),counts[EXTRACT(array,1)])); - debug(printf("34 %04X => %d\n",EXTRACT(array,2),counts[EXTRACT(array,2)])); - debug(printf("50 %04X => %d\n",EXTRACT(array,3),counts[EXTRACT(array,3)])); +#ifdef INDIVIDUAL_SHIFTS + masked = oligo & MASK7; /* 31 */ + INCR_COUNT(counts[masked]); + debug(printf("31 %04X => %d\n",masked,counts[masked])); -#ifdef HAVE_SSE4_1 - array = _mm_and_si128( _mm_srli_epi32(current,10), mask8); -#else - _mm_store_si128((__m128i *) array, _mm_and_si128( _mm_srli_epi32(current,10), mask8)); -#endif -#ifdef HAVE_AVX2 - counts[EXTRACT(array,0)] += 1; - counts[EXTRACT(array,1)] += 1; - counts[EXTRACT(array,2)] += 1; - counts[EXTRACT(array,3)] += 1; -#ifdef CHECK_FOR_OVERFLOW - _counts_after = _mm_i32gather_epi32(counts,array,/*scale*/4); - _counts_neg = _mm_cmpgt_epi32(_counts_after,maxcount128); - if (_mm_testz_si128(_counts_neg,_counts_neg) == 0) { - debug(printf("Exceeded maximum counts\n")); - if (EXTRACT(_counts_neg,0)) { - inquery[EXTRACT(array,0)] = INQUERY_FALSE; - } - if (EXTRACT(_counts_neg,1)) { - inquery[EXTRACT(array,1)] = INQUERY_FALSE; - } - if (EXTRACT(_counts_neg,2)) { - inquery[EXTRACT(array,2)] = INQUERY_FALSE; - } - if (EXTRACT(_counts_neg,3)) { - inquery[EXTRACT(array,3)] = INQUERY_FALSE; - } - } -#endif /* CHECK_FOR_OVERFLOW */ - debug(printf("3 %04X => %d\n",array[0],counts[array[0]])); - debug(printf("19 %04X => %d\n",array[1],counts[array[1]])); - debug(printf("35 %04X => %d\n",array[2],counts[array[2]])); - debug(printf("51 %04X => %d\n",array[3],counts[array[3]])); - -#else - INCR_COUNT(counts[EXTRACT(array,0)],inquery[EXTRACT(array,0)]); /* 3 */ - INCR_COUNT(counts[EXTRACT(array,1)],inquery[EXTRACT(array,1)]); /* 19 */ - INCR_COUNT(counts[EXTRACT(array,2)],inquery[EXTRACT(array,2)]); /* 35 */ - INCR_COUNT(counts[EXTRACT(array,3)],inquery[EXTRACT(array,3)]); /* 51 */ -#endif - debug(printf("3 %04X => %d\n",EXTRACT(array,0),counts[EXTRACT(array,0)])); - debug(printf("19 %04X => %d\n",EXTRACT(array,1),counts[EXTRACT(array,1)])); - debug(printf("35 %04X => %d\n",EXTRACT(array,2),counts[EXTRACT(array,2)])); - debug(printf("51 %04X => %d\n",EXTRACT(array,3),counts[EXTRACT(array,3)])); + masked = (oligo >> 2) & MASK7; /* 30 */ + INCR_COUNT(counts[masked]); + debug(printf("30 %04X => %d\n",masked,counts[masked])); -#ifdef HAVE_SSE4_1 - array = _mm_and_si128( _mm_srli_epi32(current,8), mask8); -#else - _mm_store_si128((__m128i *) array, _mm_and_si128( _mm_srli_epi32(current,8), mask8)); -#endif -#ifdef HAVE_AVX2 - counts[EXTRACT(array,0)] += 1; - counts[EXTRACT(array,1)] += 1; - counts[EXTRACT(array,2)] += 1; - counts[EXTRACT(array,3)] += 1; -#ifdef CHECK_FOR_OVERFLOW - _counts_after = _mm_i32gather_epi32(counts,array,/*scale*/4); - _counts_neg = _mm_cmpgt_epi32(_counts_after,maxcount128); - if (_mm_testz_si128(_counts_neg,_counts_neg) == 0) { - debug(printf("Exceeded maximum counts\n")); - if (EXTRACT(_counts_neg,0)) { - inquery[EXTRACT(array,0)] = INQUERY_FALSE; - } - if (EXTRACT(_counts_neg,1)) { - inquery[EXTRACT(array,1)] = INQUERY_FALSE; - } - if (EXTRACT(_counts_neg,2)) { - inquery[EXTRACT(array,2)] = INQUERY_FALSE; - } - if (EXTRACT(_counts_neg,3)) { - inquery[EXTRACT(array,3)] = INQUERY_FALSE; - } - } -#endif /* CHECK_FOR_OVERFLOW */ - debug(printf("4 %04X => %d\n",array[0],counts[array[0]])); - debug(printf("20 %04X => %d\n",array[1],counts[array[1]])); - debug(printf("36 %04X => %d\n",array[2],counts[array[2]])); - debug(printf("52 %04X => %d\n",array[3],counts[array[3]])); - -#else - INCR_COUNT(counts[EXTRACT(array,0)],inquery[EXTRACT(array,0)]); /* 4 */ - INCR_COUNT(counts[EXTRACT(array,1)],inquery[EXTRACT(array,1)]); /* 20 */ - INCR_COUNT(counts[EXTRACT(array,2)],inquery[EXTRACT(array,2)]); /* 36 */ - INCR_COUNT(counts[EXTRACT(array,3)],inquery[EXTRACT(array,3)]); /* 52 */ -#endif - debug(printf("4 %04X => %d\n",EXTRACT(array,0),counts[EXTRACT(array,0)])); - debug(printf("20 %04X => %d\n",EXTRACT(array,1),counts[EXTRACT(array,1)])); - debug(printf("36 %04X => %d\n",EXTRACT(array,2),counts[EXTRACT(array,2)])); - debug(printf("52 %04X => %d\n",EXTRACT(array,3),counts[EXTRACT(array,3)])); + masked = (oligo >> 4) & MASK7; /* 29 */ + INCR_COUNT(counts[masked]); + debug(printf("29 %04X => %d\n",masked,counts[masked])); -#ifdef HAVE_SSE4_1 - array = _mm_and_si128( _mm_srli_epi32(current,6), mask8); -#else - _mm_store_si128((__m128i *) array, _mm_and_si128( _mm_srli_epi32(current,6), mask8)); -#endif -#ifdef HAVE_AVX2 - counts[EXTRACT(array,0)] += 1; - counts[EXTRACT(array,1)] += 1; - counts[EXTRACT(array,2)] += 1; - counts[EXTRACT(array,3)] += 1; -#ifdef CHECK_FOR_OVERFLOW - _counts_after = _mm_i32gather_epi32(counts,array,/*scale*/4); - _counts_neg = _mm_cmpgt_epi32(_counts_after,maxcount128); - if (_mm_testz_si128(_counts_neg,_counts_neg) == 0) { - debug(printf("Exceeded maximum counts\n")); - if (EXTRACT(_counts_neg,0)) { - inquery[EXTRACT(array,0)] = INQUERY_FALSE; - } - if (EXTRACT(_counts_neg,1)) { - inquery[EXTRACT(array,1)] = INQUERY_FALSE; - } - if (EXTRACT(_counts_neg,2)) { - inquery[EXTRACT(array,2)] = INQUERY_FALSE; - } - if (EXTRACT(_counts_neg,3)) { - inquery[EXTRACT(array,3)] = INQUERY_FALSE; - } - } -#endif /* CHECK_FOR_OVERFLOW */ - debug(printf("5 %04X => %d\n",array[0],counts[array[0]])); - debug(printf("21 %04X => %d\n",array[1],counts[array[1]])); - debug(printf("37 %04X => %d\n",array[2],counts[array[2]])); - debug(printf("53 %04X => %d\n",array[3],counts[array[3]])); - -#else - INCR_COUNT(counts[EXTRACT(array,0)],inquery[EXTRACT(array,0)]); /* 5 */ - INCR_COUNT(counts[EXTRACT(array,1)],inquery[EXTRACT(array,1)]); /* 21 */ - INCR_COUNT(counts[EXTRACT(array,2)],inquery[EXTRACT(array,2)]); /* 37 */ - INCR_COUNT(counts[EXTRACT(array,3)],inquery[EXTRACT(array,3)]); /* 53 */ -#endif - debug(printf("5 %04X => %d\n",EXTRACT(array,0),counts[EXTRACT(array,0)])); - debug(printf("21 %04X => %d\n",EXTRACT(array,1),counts[EXTRACT(array,1)])); - debug(printf("37 %04X => %d\n",EXTRACT(array,2),counts[EXTRACT(array,2)])); - debug(printf("53 %04X => %d\n",EXTRACT(array,3),counts[EXTRACT(array,3)])); + masked = (oligo >> 6) & MASK7; /* 28 */ + INCR_COUNT(counts[masked]); + debug(printf("28 %04X => %d\n",masked,counts[masked])); -#ifdef HAVE_SSE4_1 - array = _mm_and_si128( _mm_srli_epi32(current,4), mask8); -#else - _mm_store_si128((__m128i *) array, _mm_and_si128( _mm_srli_epi32(current,4), mask8)); -#endif -#ifdef HAVE_AVX2 - counts[EXTRACT(array,0)] += 1; - counts[EXTRACT(array,1)] += 1; - counts[EXTRACT(array,2)] += 1; - counts[EXTRACT(array,3)] += 1; -#ifdef CHECK_FOR_OVERFLOW - _counts_after = _mm_i32gather_epi32(counts,array,/*scale*/4); - _counts_neg = _mm_cmpgt_epi32(_counts_after,maxcount128); - if (_mm_testz_si128(_counts_neg,_counts_neg) == 0) { - debug(printf("Exceeded maximum counts\n")); - if (EXTRACT(_counts_neg,0)) { - inquery[EXTRACT(array,0)] = INQUERY_FALSE; - } - if (EXTRACT(_counts_neg,1)) { - inquery[EXTRACT(array,1)] = INQUERY_FALSE; - } - if (EXTRACT(_counts_neg,2)) { - inquery[EXTRACT(array,2)] = INQUERY_FALSE; - } - if (EXTRACT(_counts_neg,3)) { - inquery[EXTRACT(array,3)] = INQUERY_FALSE; - } - } -#endif /* CHECK_FOR_OVERFLOW */ - debug(printf("6 %04X => %d\n",array[0],counts[array[0]])); - debug(printf("22 %04X => %d\n",array[1],counts[array[1]])); - debug(printf("38 %04X => %d\n",array[2],counts[array[2]])); - debug(printf("54 %04X => %d\n",array[3],counts[array[3]])); - -#else - INCR_COUNT(counts[EXTRACT(array,0)],inquery[EXTRACT(array,0)]); /* 6 */ - INCR_COUNT(counts[EXTRACT(array,1)],inquery[EXTRACT(array,1)]); /* 22 */ - INCR_COUNT(counts[EXTRACT(array,2)],inquery[EXTRACT(array,2)]); /* 38 */ - INCR_COUNT(counts[EXTRACT(array,3)],inquery[EXTRACT(array,3)]); /* 54 */ -#endif - debug(printf("6 %04X => %d\n",EXTRACT(array,0),counts[EXTRACT(array,0)])); - debug(printf("22 %04X => %d\n",EXTRACT(array,1),counts[EXTRACT(array,1)])); - debug(printf("38 %04X => %d\n",EXTRACT(array,2),counts[EXTRACT(array,2)])); - debug(printf("54 %04X => %d\n",EXTRACT(array,3),counts[EXTRACT(array,3)])); + masked = (oligo >> 8) & MASK7; /* 27 */ + INCR_COUNT(counts[masked]); + debug(printf("27 %04X => %d\n",masked,counts[masked])); -#ifdef HAVE_SSE4_1 - array = _mm_and_si128( _mm_srli_epi32(current,2), mask8); -#else - _mm_store_si128((__m128i *) array, _mm_and_si128( _mm_srli_epi32(current,2), mask8)); -#endif -#ifdef HAVE_AVX2 - counts[EXTRACT(array,0)] += 1; - counts[EXTRACT(array,1)] += 1; - counts[EXTRACT(array,2)] += 1; - counts[EXTRACT(array,3)] += 1; -#ifdef CHECK_FOR_OVERFLOW - _counts_after = _mm_i32gather_epi32(counts,array,/*scale*/4); - _counts_neg = _mm_cmpgt_epi32(_counts_after,maxcount128); - if (_mm_testz_si128(_counts_neg,_counts_neg) == 0) { - debug(printf("Exceeded maximum counts\n")); - if (EXTRACT(_counts_neg,0)) { - inquery[EXTRACT(array,0)] = INQUERY_FALSE; - } - if (EXTRACT(_counts_neg,1)) { - inquery[EXTRACT(array,1)] = INQUERY_FALSE; - } - if (EXTRACT(_counts_neg,2)) { - inquery[EXTRACT(array,2)] = INQUERY_FALSE; - } - if (EXTRACT(_counts_neg,3)) { - inquery[EXTRACT(array,3)] = INQUERY_FALSE; - } - } -#endif /* CHECK_FOR_OVERFLOW */ - debug(printf("7 %04X => %d\n",array[0],counts[array[0]])); - debug(printf("23 %04X => %d\n",array[1],counts[array[1]])); - debug(printf("39 %04X => %d\n",array[2],counts[array[2]])); - debug(printf("55 %04X => %d\n",array[3],counts[array[3]])); - -#else - INCR_COUNT(counts[EXTRACT(array,0)],inquery[EXTRACT(array,0)]); /* 7 */ - INCR_COUNT(counts[EXTRACT(array,1)],inquery[EXTRACT(array,1)]); /* 23 */ - INCR_COUNT(counts[EXTRACT(array,2)],inquery[EXTRACT(array,2)]); /* 39 */ - INCR_COUNT(counts[EXTRACT(array,3)],inquery[EXTRACT(array,3)]); /* 55 */ -#endif - debug(printf("7 %04X => %d\n",EXTRACT(array,0),counts[EXTRACT(array,0)])); - debug(printf("23 %04X => %d\n",EXTRACT(array,1),counts[EXTRACT(array,1)])); - debug(printf("39 %04X => %d\n",EXTRACT(array,2),counts[EXTRACT(array,2)])); - debug(printf("55 %04X => %d\n",EXTRACT(array,3),counts[EXTRACT(array,3)])); + masked = (oligo >> 10) & MASK7; /* 26 */ + INCR_COUNT(counts[masked]); + debug(printf("26 %04X => %d\n",masked,counts[masked])); -#ifdef HAVE_SSE4_1 - array = _mm_and_si128( current, mask8); #else - _mm_store_si128((__m128i *) array, _mm_and_si128( current, mask8)); -#endif -#ifdef HAVE_AVX2 - counts[EXTRACT(array,0)] += 1; - counts[EXTRACT(array,1)] += 1; - counts[EXTRACT(array,2)] += 1; - counts[EXTRACT(array,3)] += 1; -#ifdef CHECK_FOR_OVERFLOW - _counts_after = _mm_i32gather_epi32(counts,array,/*scale*/4); - _counts_neg = _mm_cmpgt_epi32(_counts_after,maxcount128); - if (_mm_testz_si128(_counts_neg,_counts_neg) == 0) { - debug(printf("Exceeded maximum counts\n")); - if (EXTRACT(_counts_neg,0)) { - inquery[EXTRACT(array,0)] = INQUERY_FALSE; - } - if (EXTRACT(_counts_neg,1)) { - inquery[EXTRACT(array,1)] = INQUERY_FALSE; - } - if (EXTRACT(_counts_neg,2)) { - inquery[EXTRACT(array,2)] = INQUERY_FALSE; - } - if (EXTRACT(_counts_neg,3)) { - inquery[EXTRACT(array,3)] = INQUERY_FALSE; - } - } -#endif /* CHECK_FOR_OVERFLOW */ - debug(printf("8 %04X => %d\n",array[0],counts[array[0]])); - debug(printf("24 %04X => %d\n",array[1],counts[array[1]])); - debug(printf("40 %04X => %d\n",array[2],counts[array[2]])); - debug(printf("56 %04X => %d\n",array[3],counts[array[3]])); - -#else - INCR_COUNT(counts[EXTRACT(array,0)],inquery[EXTRACT(array,0)]); /* 8 */ - INCR_COUNT(counts[EXTRACT(array,1)],inquery[EXTRACT(array,1)]); /* 24 */ - INCR_COUNT(counts[EXTRACT(array,2)],inquery[EXTRACT(array,2)]); /* 40 */ - INCR_COUNT(counts[EXTRACT(array,3)],inquery[EXTRACT(array,3)]); /* 56 */ -#endif - debug(printf("8 %04X => %d\n",EXTRACT(array,0),counts[EXTRACT(array,0)])); - debug(printf("24 %04X => %d\n",EXTRACT(array,1),counts[EXTRACT(array,1)])); - debug(printf("40 %04X => %d\n",EXTRACT(array,2),counts[EXTRACT(array,2)])); - debug(printf("56 %04X => %d\n",EXTRACT(array,3),counts[EXTRACT(array,3)])); - - - oligo = _mm_or_si128( _mm_srli_epi32(next,18), _mm_slli_epi32(current,14)); -#ifdef HAVE_SSE4_1 - array = _mm_and_si128( _mm_srli_epi32(oligo,12), mask8); + _oligo = _mm_setr_epi32(oligo, oligo >> 2, oligo >> 4, oligo >> 6); +#ifdef SIMD_MASK_THEN_STORE + _mm_store_si128((__m128i *) _masked,_mm_and_si128(_oligo, mask7)); #else - _mm_store_si128((__m128i *) array, _mm_and_si128( _mm_srli_epi32(oligo,12), mask8)); + _masked = _mm_and_si128(_oligo, mask7); #endif -#ifdef HAVE_AVX2 - counts[EXTRACT(array,0)] += 1; - counts[EXTRACT(array,1)] += 1; - counts[EXTRACT(array,2)] += 1; - counts[EXTRACT(array,3)] += 1; -#ifdef CHECK_FOR_OVERFLOW - _counts_after = _mm_i32gather_epi32(counts,array,/*scale*/4); - _counts_neg = _mm_cmpgt_epi32(_counts_after,maxcount128); - if (_mm_testz_si128(_counts_neg,_counts_neg) == 0) { - debug(printf("Exceeded maximum counts\n")); - if (EXTRACT(_counts_neg,0)) { - inquery[EXTRACT(array,0)] = INQUERY_FALSE; - } - if (EXTRACT(_counts_neg,1)) { - inquery[EXTRACT(array,1)] = INQUERY_FALSE; - } - if (EXTRACT(_counts_neg,2)) { - inquery[EXTRACT(array,2)] = INQUERY_FALSE; - } - if (EXTRACT(_counts_neg,3)) { - inquery[EXTRACT(array,3)] = INQUERY_FALSE; - } - } -#endif /* CHECK_FOR_OVERFLOW */ - debug(printf("9 %04X => %d\n",array[0],counts[array[0]])); - debug(printf("25 %04X => %d\n",array[1],counts[array[1]])); - debug(printf("41 %04X => %d\n",array[2],counts[array[2]])); - debug(printf("57 %04X => %d\n",array[3],counts[array[3]])); - -#else - INCR_COUNT(counts[EXTRACT(array,0)],inquery[EXTRACT(array,0)]); /* 9 */ - INCR_COUNT(counts[EXTRACT(array,1)],inquery[EXTRACT(array,1)]); /* 25 */ - INCR_COUNT(counts[EXTRACT(array,2)],inquery[EXTRACT(array,2)]); /* 41 */ - INCR_COUNT(counts[EXTRACT(array,3)],inquery[EXTRACT(array,3)]); /* 57 */ -#endif - debug(printf("9 %04X => %d\n",EXTRACT(array,0),counts[EXTRACT(array,0)])); - debug(printf("25 %04X => %d\n",EXTRACT(array,1),counts[EXTRACT(array,1)])); - debug(printf("41 %04X => %d\n",EXTRACT(array,2),counts[EXTRACT(array,2)])); - debug(printf("57 %04X => %d\n",EXTRACT(array,3),counts[EXTRACT(array,3)])); -#ifdef HAVE_SSE4_1 - array = _mm_and_si128( _mm_srli_epi32(oligo,10), mask8); -#else - _mm_store_si128((__m128i *) array, _mm_and_si128( _mm_srli_epi32(oligo,10), mask8)); -#endif -#ifdef HAVE_AVX2 - counts[EXTRACT(array,0)] += 1; - counts[EXTRACT(array,1)] += 1; - counts[EXTRACT(array,2)] += 1; - counts[EXTRACT(array,3)] += 1; -#ifdef CHECK_FOR_OVERFLOW - _counts_after = _mm_i32gather_epi32(counts,array,/*scale*/4); - _counts_neg = _mm_cmpgt_epi32(_counts_after,maxcount128); - if (_mm_testz_si128(_counts_neg,_counts_neg) == 0) { - debug(printf("Exceeded maximum counts\n")); - if (EXTRACT(_counts_neg,0)) { - inquery[EXTRACT(array,0)] = INQUERY_FALSE; - } - if (EXTRACT(_counts_neg,1)) { - inquery[EXTRACT(array,1)] = INQUERY_FALSE; - } - if (EXTRACT(_counts_neg,2)) { - inquery[EXTRACT(array,2)] = INQUERY_FALSE; - } - if (EXTRACT(_counts_neg,3)) { - inquery[EXTRACT(array,3)] = INQUERY_FALSE; - } - } -#endif /* CHECK_FOR_OVERFLOW */ - debug(printf("10 %04X => %d\n",array[0],counts[array[0]])); - debug(printf("26 %04X => %d\n",array[1],counts[array[1]])); - debug(printf("42 %04X => %d\n",array[2],counts[array[2]])); - debug(printf("58 %04X => %d\n",array[3],counts[array[3]])); - -#else - INCR_COUNT(counts[EXTRACT(array,0)],inquery[EXTRACT(array,0)]); /* 10 */ - INCR_COUNT(counts[EXTRACT(array,1)],inquery[EXTRACT(array,1)]); /* 26 */ - INCR_COUNT(counts[EXTRACT(array,2)],inquery[EXTRACT(array,2)]); /* 42 */ - INCR_COUNT(counts[EXTRACT(array,3)],inquery[EXTRACT(array,3)]); /* 58 */ -#endif - debug(printf("10 %04X => %d\n",EXTRACT(array,0),counts[EXTRACT(array,0)])); - debug(printf("26 %04X => %d\n",EXTRACT(array,1),counts[EXTRACT(array,1)])); - debug(printf("42 %04X => %d\n",EXTRACT(array,2),counts[EXTRACT(array,2)])); - debug(printf("58 %04X => %d\n",EXTRACT(array,3),counts[EXTRACT(array,3)])); + masked = EXTRACT(_masked,0); + INCR_COUNT(counts[masked]); + debug(printf("31 %04X => %d\n",masked,counts[masked])); -#ifdef HAVE_SSE4_1 - array = _mm_and_si128( _mm_srli_epi32(oligo,8), mask8); -#else - _mm_store_si128((__m128i *) array, _mm_and_si128( _mm_srli_epi32(oligo,8), mask8)); -#endif -#ifdef HAVE_AVX2 - counts[EXTRACT(array,0)] += 1; - counts[EXTRACT(array,1)] += 1; - counts[EXTRACT(array,2)] += 1; - counts[EXTRACT(array,3)] += 1; -#ifdef CHECK_FOR_OVERFLOW - _counts_after = _mm_i32gather_epi32(counts,array,/*scale*/4); - _counts_neg = _mm_cmpgt_epi32(_counts_after,maxcount128); - if (_mm_testz_si128(_counts_neg,_counts_neg) == 0) { - debug(printf("Exceeded maximum counts\n")); - if (EXTRACT(_counts_neg,0)) { - inquery[EXTRACT(array,0)] = INQUERY_FALSE; - } - if (EXTRACT(_counts_neg,1)) { - inquery[EXTRACT(array,1)] = INQUERY_FALSE; - } - if (EXTRACT(_counts_neg,2)) { - inquery[EXTRACT(array,2)] = INQUERY_FALSE; - } - if (EXTRACT(_counts_neg,3)) { - inquery[EXTRACT(array,3)] = INQUERY_FALSE; - } - } -#endif /* CHECK_FOR_OVERFLOW */ - debug(printf("11 %04X => %d\n",array[0],counts[array[0]])); - debug(printf("27 %04X => %d\n",array[1],counts[array[1]])); - debug(printf("43 %04X => %d\n",array[2],counts[array[2]])); - debug(printf("59 %04X => %d\n",array[3],counts[array[3]])); - -#else - INCR_COUNT(counts[EXTRACT(array,0)],inquery[EXTRACT(array,0)]); /* 11 */ - INCR_COUNT(counts[EXTRACT(array,1)],inquery[EXTRACT(array,1)]); /* 27 */ - INCR_COUNT(counts[EXTRACT(array,2)],inquery[EXTRACT(array,2)]); /* 43 */ - INCR_COUNT(counts[EXTRACT(array,3)],inquery[EXTRACT(array,3)]); /* 59 */ -#endif - debug(printf("11 %04X => %d\n",EXTRACT(array,0),counts[EXTRACT(array,0)])); - debug(printf("27 %04X => %d\n",EXTRACT(array,1),counts[EXTRACT(array,1)])); - debug(printf("43 %04X => %d\n",EXTRACT(array,2),counts[EXTRACT(array,2)])); - debug(printf("59 %04X => %d\n",EXTRACT(array,3),counts[EXTRACT(array,3)])); + masked = EXTRACT(_masked,1); + INCR_COUNT(counts[masked]); + debug(printf("30 %04X => %d\n",masked,counts[masked])); -#ifdef HAVE_SSE4_1 - array = _mm_and_si128( _mm_srli_epi32(oligo,6), mask8); -#else - _mm_store_si128((__m128i *) array, _mm_and_si128( _mm_srli_epi32(oligo,6), mask8)); -#endif -#ifdef HAVE_AVX2 - counts[EXTRACT(array,0)] += 1; - counts[EXTRACT(array,1)] += 1; - counts[EXTRACT(array,2)] += 1; - counts[EXTRACT(array,3)] += 1; -#ifdef CHECK_FOR_OVERFLOW - _counts_after = _mm_i32gather_epi32(counts,array,/*scale*/4); - _counts_neg = _mm_cmpgt_epi32(_counts_after,maxcount128); - if (_mm_testz_si128(_counts_neg,_counts_neg) == 0) { - debug(printf("Exceeded maximum counts\n")); - if (EXTRACT(_counts_neg,0)) { - inquery[EXTRACT(array,0)] = INQUERY_FALSE; - } - if (EXTRACT(_counts_neg,1)) { - inquery[EXTRACT(array,1)] = INQUERY_FALSE; - } - if (EXTRACT(_counts_neg,2)) { - inquery[EXTRACT(array,2)] = INQUERY_FALSE; - } - if (EXTRACT(_counts_neg,3)) { - inquery[EXTRACT(array,3)] = INQUERY_FALSE; - } - } -#endif /* CHECK_FOR_OVERFLOW */ - debug(printf("12 %04X => %d\n",array[0],counts[array[0]])); - debug(printf("28 %04X => %d\n",array[1],counts[array[1]])); - debug(printf("44 %04X => %d\n",array[2],counts[array[2]])); - debug(printf("60 %04X => %d\n",array[3],counts[array[3]])); - -#else - INCR_COUNT(counts[EXTRACT(array,0)],inquery[EXTRACT(array,0)]); /* 12 */ - INCR_COUNT(counts[EXTRACT(array,1)],inquery[EXTRACT(array,1)]); /* 28 */ - INCR_COUNT(counts[EXTRACT(array,2)],inquery[EXTRACT(array,2)]); /* 44 */ - INCR_COUNT(counts[EXTRACT(array,3)],inquery[EXTRACT(array,3)]); /* 60 */ -#endif - debug(printf("12 %04X => %d\n",EXTRACT(array,0),counts[EXTRACT(array,0)])); - debug(printf("28 %04X => %d\n",EXTRACT(array,1),counts[EXTRACT(array,1)])); - debug(printf("44 %04X => %d\n",EXTRACT(array,2),counts[EXTRACT(array,2)])); - debug(printf("60 %04X => %d\n",EXTRACT(array,3),counts[EXTRACT(array,3)])); + masked = EXTRACT(_masked,2); + INCR_COUNT(counts[masked]); + debug(printf("29 %04X => %d\n",masked,counts[masked])); -#ifdef HAVE_SSE4_1 - array = _mm_and_si128( _mm_srli_epi32(oligo,4), mask8); -#else - _mm_store_si128((__m128i *) array, _mm_and_si128( _mm_srli_epi32(oligo,4), mask8)); -#endif -#ifdef HAVE_AVX2 - counts[EXTRACT(array,0)] += 1; - counts[EXTRACT(array,1)] += 1; - counts[EXTRACT(array,2)] += 1; - counts[EXTRACT(array,3)] += 1; -#ifdef CHECK_FOR_OVERFLOW - _counts_after = _mm_i32gather_epi32(counts,array,/*scale*/4); - _counts_neg = _mm_cmpgt_epi32(_counts_after,maxcount128); - if (_mm_testz_si128(_counts_neg,_counts_neg) == 0) { - debug(printf("Exceeded maximum counts\n")); - if (EXTRACT(_counts_neg,0)) { - inquery[EXTRACT(array,0)] = INQUERY_FALSE; - } - if (EXTRACT(_counts_neg,1)) { - inquery[EXTRACT(array,1)] = INQUERY_FALSE; - } - if (EXTRACT(_counts_neg,2)) { - inquery[EXTRACT(array,2)] = INQUERY_FALSE; - } - if (EXTRACT(_counts_neg,3)) { - inquery[EXTRACT(array,3)] = INQUERY_FALSE; - } - } -#endif /* CHECK_FOR_OVERFLOW */ - debug(printf("13 %04X => %d\n",array[0],counts[array[0]])); - debug(printf("29 %04X => %d\n",array[1],counts[array[1]])); - debug(printf("45 %04X => %d\n",array[2],counts[array[2]])); - debug(printf("61 %04X => %d\n",array[3],counts[array[3]])); - -#else - INCR_COUNT(counts[EXTRACT(array,0)],inquery[EXTRACT(array,0)]); /* 13 */ - INCR_COUNT(counts[EXTRACT(array,1)],inquery[EXTRACT(array,1)]); /* 29 */ - INCR_COUNT(counts[EXTRACT(array,2)],inquery[EXTRACT(array,2)]); /* 45 */ - INCR_COUNT(counts[EXTRACT(array,3)],inquery[EXTRACT(array,3)]); /* 61 */ -#endif - debug(printf("13 %04X => %d\n",EXTRACT(array,0),counts[EXTRACT(array,0)])); - debug(printf("29 %04X => %d\n",EXTRACT(array,1),counts[EXTRACT(array,1)])); - debug(printf("45 %04X => %d\n",EXTRACT(array,2),counts[EXTRACT(array,2)])); - debug(printf("61 %04X => %d\n",EXTRACT(array,3),counts[EXTRACT(array,3)])); + masked = EXTRACT(_masked,3); + INCR_COUNT(counts[masked]); + debug(printf("28 %04X => %d\n",masked,counts[masked])); -#ifdef HAVE_SSE4_1 - array = _mm_and_si128( _mm_srli_epi32(oligo,2), mask8); -#else - _mm_store_si128((__m128i *) array, _mm_and_si128( _mm_srli_epi32(oligo,2), mask8)); -#endif -#ifdef HAVE_AVX2 - counts[EXTRACT(array,0)] += 1; - counts[EXTRACT(array,1)] += 1; - counts[EXTRACT(array,2)] += 1; - counts[EXTRACT(array,3)] += 1; -#ifdef CHECK_FOR_OVERFLOW - _counts_after = _mm_i32gather_epi32(counts,array,/*scale*/4); - _counts_neg = _mm_cmpgt_epi32(_counts_after,maxcount128); - if (_mm_testz_si128(_counts_neg,_counts_neg) == 0) { - debug(printf("Exceeded maximum counts\n")); - if (EXTRACT(_counts_neg,0)) { - inquery[EXTRACT(array,0)] = INQUERY_FALSE; - } - if (EXTRACT(_counts_neg,1)) { - inquery[EXTRACT(array,1)] = INQUERY_FALSE; - } - if (EXTRACT(_counts_neg,2)) { - inquery[EXTRACT(array,2)] = INQUERY_FALSE; - } - if (EXTRACT(_counts_neg,3)) { - inquery[EXTRACT(array,3)] = INQUERY_FALSE; - } - } -#endif /* CHECK_FOR_OVERFLOW */ - debug(printf("14 %04X => %d\n",array[0],counts[array[0]])); - debug(printf("30 %04X => %d\n",array[1],counts[array[1]])); - debug(printf("46 %04X => %d\n",array[2],counts[array[2]])); - debug(printf("62 %04X => %d\n",array[3],counts[array[3]])); - -#else - INCR_COUNT(counts[EXTRACT(array,0)],inquery[EXTRACT(array,0)]); /* 14 */ - INCR_COUNT(counts[EXTRACT(array,1)],inquery[EXTRACT(array,1)]); /* 30 */ - INCR_COUNT(counts[EXTRACT(array,2)],inquery[EXTRACT(array,2)]); /* 46 */ - INCR_COUNT(counts[EXTRACT(array,3)],inquery[EXTRACT(array,3)]); /* 62 */ -#endif - debug(printf("14 %04X => %d\n",EXTRACT(array,0),counts[EXTRACT(array,0)])); - debug(printf("30 %04X => %d\n",EXTRACT(array,1),counts[EXTRACT(array,1)])); - debug(printf("46 %04X => %d\n",EXTRACT(array,2),counts[EXTRACT(array,2)])); - debug(printf("62 %04X => %d\n",EXTRACT(array,3),counts[EXTRACT(array,3)])); -#ifdef HAVE_SSE4_1 - array = _mm_and_si128( oligo, mask8); + _oligo = _mm_srli_epi32(_oligo, 8); +#ifdef SIMD_MASK_THEN_STORE + _mm_store_si128((__m128i *) _masked,_mm_and_si128(_oligo, mask7)); #else - _mm_store_si128((__m128i *) array, _mm_and_si128( oligo, mask8)); -#endif -#ifdef HAVE_AVX2 - counts[EXTRACT(array,0)] += 1; - counts[EXTRACT(array,1)] += 1; - counts[EXTRACT(array,2)] += 1; - counts[EXTRACT(array,3)] += 1; -#ifdef CHECK_FOR_OVERFLOW - _counts_after = _mm_i32gather_epi32(counts,array,/*scale*/4); - _counts_neg = _mm_cmpgt_epi32(_counts_after,maxcount128); - if (_mm_testz_si128(_counts_neg,_counts_neg) == 0) { - debug(printf("Exceeded maximum counts\n")); - if (EXTRACT(_counts_neg,0)) { - inquery[EXTRACT(array,0)] = INQUERY_FALSE; - } - if (EXTRACT(_counts_neg,1)) { - inquery[EXTRACT(array,1)] = INQUERY_FALSE; - } - if (EXTRACT(_counts_neg,2)) { - inquery[EXTRACT(array,2)] = INQUERY_FALSE; - } - if (EXTRACT(_counts_neg,3)) { - inquery[EXTRACT(array,3)] = INQUERY_FALSE; - } - } -#endif /* CHECK_FOR_OVERFLOW */ - debug(printf("15 %04X => %d\n",array[0],counts[array[0]])); - debug(printf("31 %04X => %d\n",array[1],counts[array[1]])); - debug(printf("47 %04X => %d\n",array[2],counts[array[2]])); - debug(printf("63 %04X => %d\n",array[3],counts[array[3]])); - -#else - INCR_COUNT(counts[EXTRACT(array,0)],inquery[EXTRACT(array,0)]); /* 15 */ - INCR_COUNT(counts[EXTRACT(array,1)],inquery[EXTRACT(array,1)]); /* 31 */ - INCR_COUNT(counts[EXTRACT(array,2)],inquery[EXTRACT(array,2)]); /* 47 */ - INCR_COUNT(counts[EXTRACT(array,3)],inquery[EXTRACT(array,3)]); /* 63 */ -#endif - debug(printf("15 %04X => %d\n",EXTRACT(array,0),counts[EXTRACT(array,0)])); - debug(printf("31 %04X => %d\n",EXTRACT(array,1),counts[EXTRACT(array,1)])); - debug(printf("47 %04X => %d\n",EXTRACT(array,2),counts[EXTRACT(array,2)])); - debug(printf("63 %04X => %d\n",EXTRACT(array,3),counts[EXTRACT(array,3)])); - - return; -} -#endif - - -#ifdef HAVE_AVX2 -static void -count_8mers_fwd_simd_128 (Count_T *counts, Inquery_T *inquery, __m256i current, __m256i next) { - __m256i oligo; - __m256i array; -#ifdef CHECK_FOR_OVERFLOW - __m256i _counts_after, _counts_neg; + _masked = _mm_and_si128(_oligo, mask7); #endif - array = _mm256_srli_epi32(current,16); /* No mask necessary */ - counts[EXTRACT256(array,0)] += 1; /* 0 */ - counts[EXTRACT256(array,1)] += 1; /* 16 */ - counts[EXTRACT256(array,2)] += 1; /* 32 */ - counts[EXTRACT256(array,3)] += 1; /* 48 */ - counts[EXTRACT256(array,4)] += 1; /* 64 */ - counts[EXTRACT256(array,5)] += 1; /* 80 */ - counts[EXTRACT256(array,6)] += 1; /* 96 */ - counts[EXTRACT256(array,7)] += 1; /* 112 */ -#ifdef CHECK_FOR_OVERFLOW - _counts_after = _mm256_i32gather_epi32(counts,array,/*scale*/4); - _counts_neg = _mm256_cmpgt_epi32(_counts_after,maxcount256); - if (_mm256_testz_si256(_counts_neg,_counts_neg) == 0) { - debug(printf("Exceeded maximum counts\n")); - if (EXTRACT256(_counts_neg,0)) { - inquery[EXTRACT256(array,0)] = INQUERY_FALSE; - } - if (EXTRACT256(_counts_neg,1)) { - inquery[EXTRACT256(array,1)] = INQUERY_FALSE; - } - if (EXTRACT256(_counts_neg,2)) { - inquery[EXTRACT256(array,2)] = INQUERY_FALSE; - } - if (EXTRACT256(_counts_neg,3)) { - inquery[EXTRACT256(array,3)] = INQUERY_FALSE; - } - if (EXTRACT256(_counts_neg,4)) { - inquery[EXTRACT256(array,4)] = INQUERY_FALSE; - } - if (EXTRACT256(_counts_neg,5)) { - inquery[EXTRACT256(array,5)] = INQUERY_FALSE; - } - if (EXTRACT256(_counts_neg,6)) { - inquery[EXTRACT256(array,6)] = INQUERY_FALSE; - } - if (EXTRACT256(_counts_neg,7)) { - inquery[EXTRACT256(array,7)] = INQUERY_FALSE; - } - } -#endif /* CHECK_FOR_OVERFLOW */ - debug(printf("0 %04X => %d\n",EXTRACT256(array,0),counts[EXTRACT256(array,0)])); - debug(printf("16 %04X => %d\n",EXTRACT256(array,1),counts[EXTRACT256(array,1)])); - debug(printf("32 %04X => %d\n",EXTRACT256(array,2),counts[EXTRACT256(array,2)])); - debug(printf("48 %04X => %d\n",EXTRACT256(array,3),counts[EXTRACT256(array,3)])); - debug(printf("0 %04X => %d\n",EXTRACT256(array,4),counts[EXTRACT256(array,4)])); - debug(printf("16 %04X => %d\n",EXTRACT256(array,5),counts[EXTRACT256(array,5)])); - debug(printf("32 %04X => %d\n",EXTRACT256(array,6),counts[EXTRACT256(array,6)])); - debug(printf("48 %04X => %d\n",EXTRACT256(array,7),counts[EXTRACT256(array,7)])); - - array = _mm256_and_si256( _mm256_srli_epi32(current,14), bigmask8); - counts[EXTRACT256(array,0)] += 1; /* 1 */ - counts[EXTRACT256(array,1)] += 1; /* 17 */ - counts[EXTRACT256(array,2)] += 1; /* 33 */ - counts[EXTRACT256(array,3)] += 1; /* 49 */ - counts[EXTRACT256(array,4)] += 1; /* 65 */ - counts[EXTRACT256(array,5)] += 1; /* 81 */ - counts[EXTRACT256(array,6)] += 1; /* 97 */ - counts[EXTRACT256(array,7)] += 1; /* 113 */ -#ifdef CHECK_FOR_OVERFLOW - _counts_after = _mm256_i32gather_epi32(counts,array,/*scale*/4); - _counts_neg = _mm256_cmpgt_epi32(_counts_after,maxcount256); - if (_mm256_testz_si256(_counts_neg,_counts_neg) == 0) { - debug(printf("Exceeded maximum counts\n")); - if (EXTRACT256(_counts_neg,0)) { - inquery[EXTRACT256(array,0)] = INQUERY_FALSE; - } - if (EXTRACT256(_counts_neg,1)) { - inquery[EXTRACT256(array,1)] = INQUERY_FALSE; - } - if (EXTRACT256(_counts_neg,2)) { - inquery[EXTRACT256(array,2)] = INQUERY_FALSE; - } - if (EXTRACT256(_counts_neg,3)) { - inquery[EXTRACT256(array,3)] = INQUERY_FALSE; - } - if (EXTRACT256(_counts_neg,4)) { - inquery[EXTRACT256(array,4)] = INQUERY_FALSE; - } - if (EXTRACT256(_counts_neg,5)) { - inquery[EXTRACT256(array,5)] = INQUERY_FALSE; - } - if (EXTRACT256(_counts_neg,6)) { - inquery[EXTRACT256(array,6)] = INQUERY_FALSE; - } - if (EXTRACT256(_counts_neg,7)) { - inquery[EXTRACT256(array,7)] = INQUERY_FALSE; - } - } -#endif /* CHECK_FOR_OVERFLOW */ - debug(printf("1 %04X => %d\n",EXTRACT256(array,0),counts[EXTRACT256(array,0)])); - debug(printf("17 %04X => %d\n",EXTRACT256(array,1),counts[EXTRACT256(array,1)])); - debug(printf("33 %04X => %d\n",EXTRACT256(array,2),counts[EXTRACT256(array,2)])); - debug(printf("49 %04X => %d\n",EXTRACT256(array,3),counts[EXTRACT256(array,3)])); - debug(printf("1 %04X => %d\n",EXTRACT256(array,4),counts[EXTRACT256(array,4)])); - debug(printf("17 %04X => %d\n",EXTRACT256(array,5),counts[EXTRACT256(array,5)])); - debug(printf("33 %04X => %d\n",EXTRACT256(array,6),counts[EXTRACT256(array,6)])); - debug(printf("49 %04X => %d\n",EXTRACT256(array,7),counts[EXTRACT256(array,7)])); - - array = _mm256_and_si256( _mm256_srli_epi32(current,12), bigmask8); - counts[EXTRACT256(array,0)] += 1; /* 2 */ - counts[EXTRACT256(array,1)] += 1; /* 18 */ - counts[EXTRACT256(array,2)] += 1; /* 34 */ - counts[EXTRACT256(array,3)] += 1; /* 50 */ - counts[EXTRACT256(array,4)] += 1; /* 66 */ - counts[EXTRACT256(array,5)] += 1; /* 82 */ - counts[EXTRACT256(array,6)] += 1; /* 98 */ - counts[EXTRACT256(array,7)] += 1; /* 114 */ -#ifdef CHECK_FOR_OVERFLOW - _counts_after = _mm256_i32gather_epi32(counts,array,/*scale*/4); - _counts_neg = _mm256_cmpgt_epi32(_counts_after,maxcount256); - if (_mm256_testz_si256(_counts_neg,_counts_neg) == 0) { - debug(printf("Exceeded maximum counts\n")); - if (EXTRACT256(_counts_neg,0)) { - inquery[EXTRACT256(array,0)] = INQUERY_FALSE; - } - if (EXTRACT256(_counts_neg,1)) { - inquery[EXTRACT256(array,1)] = INQUERY_FALSE; - } - if (EXTRACT256(_counts_neg,2)) { - inquery[EXTRACT256(array,2)] = INQUERY_FALSE; - } - if (EXTRACT256(_counts_neg,3)) { - inquery[EXTRACT256(array,3)] = INQUERY_FALSE; - } - if (EXTRACT256(_counts_neg,4)) { - inquery[EXTRACT256(array,4)] = INQUERY_FALSE; - } - if (EXTRACT256(_counts_neg,5)) { - inquery[EXTRACT256(array,5)] = INQUERY_FALSE; - } - if (EXTRACT256(_counts_neg,6)) { - inquery[EXTRACT256(array,6)] = INQUERY_FALSE; - } - if (EXTRACT256(_counts_neg,7)) { - inquery[EXTRACT256(array,7)] = INQUERY_FALSE; - } - } -#endif /* CHECK_FOR_OVERFLOW */ - debug(printf("2 %04X => %d\n",EXTRACT256(array,0),counts[EXTRACT256(array,0)])); - debug(printf("18 %04X => %d\n",EXTRACT256(array,1),counts[EXTRACT256(array,1)])); - debug(printf("34 %04X => %d\n",EXTRACT256(array,2),counts[EXTRACT256(array,2)])); - debug(printf("50 %04X => %d\n",EXTRACT256(array,3),counts[EXTRACT256(array,3)])); - debug(printf("2 %04X => %d\n",EXTRACT256(array,4),counts[EXTRACT256(array,4)])); - debug(printf("18 %04X => %d\n",EXTRACT256(array,5),counts[EXTRACT256(array,5)])); - debug(printf("34 %04X => %d\n",EXTRACT256(array,6),counts[EXTRACT256(array,6)])); - debug(printf("50 %04X => %d\n",EXTRACT256(array,7),counts[EXTRACT256(array,7)])); - - array = _mm256_and_si256( _mm256_srli_epi32(current,10), bigmask8); - counts[EXTRACT256(array,0)] += 1; /* 3 */ - counts[EXTRACT256(array,1)] += 1; /* 19 */ - counts[EXTRACT256(array,2)] += 1; /* 35 */ - counts[EXTRACT256(array,3)] += 1; /* 51 */ - counts[EXTRACT256(array,4)] += 1; /* 67 */ - counts[EXTRACT256(array,5)] += 1; /* 83 */ - counts[EXTRACT256(array,6)] += 1; /* 99 */ - counts[EXTRACT256(array,7)] += 1; /* 115 */ -#ifdef CHECK_FOR_OVERFLOW - _counts_after = _mm256_i32gather_epi32(counts,array,/*scale*/4); - _counts_neg = _mm256_cmpgt_epi32(_counts_after,maxcount256); - if (_mm256_testz_si256(_counts_neg,_counts_neg) == 0) { - debug(printf("Exceeded maximum counts\n")); - if (EXTRACT256(_counts_neg,0)) { - inquery[EXTRACT256(array,0)] = INQUERY_FALSE; - } - if (EXTRACT256(_counts_neg,1)) { - inquery[EXTRACT256(array,1)] = INQUERY_FALSE; - } - if (EXTRACT256(_counts_neg,2)) { - inquery[EXTRACT256(array,2)] = INQUERY_FALSE; - } - if (EXTRACT256(_counts_neg,3)) { - inquery[EXTRACT256(array,3)] = INQUERY_FALSE; - } - if (EXTRACT256(_counts_neg,4)) { - inquery[EXTRACT256(array,4)] = INQUERY_FALSE; - } - if (EXTRACT256(_counts_neg,5)) { - inquery[EXTRACT256(array,5)] = INQUERY_FALSE; - } - if (EXTRACT256(_counts_neg,6)) { - inquery[EXTRACT256(array,6)] = INQUERY_FALSE; - } - if (EXTRACT256(_counts_neg,7)) { - inquery[EXTRACT256(array,7)] = INQUERY_FALSE; - } - } -#endif /* CHECK_FOR_OVERFLOW */ - debug(printf("3 %04X => %d\n",EXTRACT256(array,0),counts[EXTRACT256(array,0)])); - debug(printf("19 %04X => %d\n",EXTRACT256(array,1),counts[EXTRACT256(array,1)])); - debug(printf("35 %04X => %d\n",EXTRACT256(array,2),counts[EXTRACT256(array,2)])); - debug(printf("51 %04X => %d\n",EXTRACT256(array,3),counts[EXTRACT256(array,3)])); - debug(printf("3 %04X => %d\n",EXTRACT256(array,4),counts[EXTRACT256(array,4)])); - debug(printf("19 %04X => %d\n",EXTRACT256(array,5),counts[EXTRACT256(array,5)])); - debug(printf("35 %04X => %d\n",EXTRACT256(array,6),counts[EXTRACT256(array,6)])); - debug(printf("51 %04X => %d\n",EXTRACT256(array,7),counts[EXTRACT256(array,7)])); - - array = _mm256_and_si256( _mm256_srli_epi32(current,8), bigmask8); - counts[EXTRACT256(array,0)] += 1; /* 4 */ - counts[EXTRACT256(array,1)] += 1; /* 20 */ - counts[EXTRACT256(array,2)] += 1; /* 36 */ - counts[EXTRACT256(array,3)] += 1; /* 52 */ - counts[EXTRACT256(array,4)] += 1; /* 68 */ - counts[EXTRACT256(array,5)] += 1; /* 84 */ - counts[EXTRACT256(array,6)] += 1; /* 100 */ - counts[EXTRACT256(array,7)] += 1; /* 116 */ -#ifdef CHECK_FOR_OVERFLOW - _counts_after = _mm256_i32gather_epi32(counts,array,/*scale*/4); - _counts_neg = _mm256_cmpgt_epi32(_counts_after,maxcount256); - if (_mm256_testz_si256(_counts_neg,_counts_neg) == 0) { - debug(printf("Exceeded maximum counts\n")); - if (EXTRACT256(_counts_neg,0)) { - inquery[EXTRACT256(array,0)] = INQUERY_FALSE; - } - if (EXTRACT256(_counts_neg,1)) { - inquery[EXTRACT256(array,1)] = INQUERY_FALSE; - } - if (EXTRACT256(_counts_neg,2)) { - inquery[EXTRACT256(array,2)] = INQUERY_FALSE; - } - if (EXTRACT256(_counts_neg,3)) { - inquery[EXTRACT256(array,3)] = INQUERY_FALSE; - } - if (EXTRACT256(_counts_neg,4)) { - inquery[EXTRACT256(array,4)] = INQUERY_FALSE; - } - if (EXTRACT256(_counts_neg,5)) { - inquery[EXTRACT256(array,5)] = INQUERY_FALSE; - } - if (EXTRACT256(_counts_neg,6)) { - inquery[EXTRACT256(array,6)] = INQUERY_FALSE; - } - if (EXTRACT256(_counts_neg,7)) { - inquery[EXTRACT256(array,7)] = INQUERY_FALSE; - } - } -#endif /* CHECK_FOR_OVERFLOW */ - debug(printf("4 %04X => %d\n",EXTRACT256(array,0),counts[EXTRACT256(array,0)])); - debug(printf("20 %04X => %d\n",EXTRACT256(array,1),counts[EXTRACT256(array,1)])); - debug(printf("36 %04X => %d\n",EXTRACT256(array,2),counts[EXTRACT256(array,2)])); - debug(printf("52 %04X => %d\n",EXTRACT256(array,3),counts[EXTRACT256(array,3)])); - debug(printf("4 %04X => %d\n",EXTRACT256(array,4),counts[EXTRACT256(array,4)])); - debug(printf("20 %04X => %d\n",EXTRACT256(array,5),counts[EXTRACT256(array,5)])); - debug(printf("36 %04X => %d\n",EXTRACT256(array,6),counts[EXTRACT256(array,6)])); - debug(printf("52 %04X => %d\n",EXTRACT256(array,7),counts[EXTRACT256(array,7)])); - - array = _mm256_and_si256( _mm256_srli_epi32(current,6), bigmask8); - counts[EXTRACT256(array,0)] += 1; /* 5 */ - counts[EXTRACT256(array,1)] += 1; /* 21 */ - counts[EXTRACT256(array,2)] += 1; /* 37 */ - counts[EXTRACT256(array,3)] += 1; /* 53 */ - counts[EXTRACT256(array,4)] += 1; /* 69 */ - counts[EXTRACT256(array,5)] += 1; /* 85 */ - counts[EXTRACT256(array,6)] += 1; /* 101 */ - counts[EXTRACT256(array,7)] += 1; /* 117 */ -#ifdef CHECK_FOR_OVERFLOW - _counts_after = _mm256_i32gather_epi32(counts,array,/*scale*/4); - _counts_neg = _mm256_cmpgt_epi32(_counts_after,maxcount256); - if (_mm256_testz_si256(_counts_neg,_counts_neg) == 0) { - debug(printf("Exceeded maximum counts\n")); - if (EXTRACT256(_counts_neg,0)) { - inquery[EXTRACT256(array,0)] = INQUERY_FALSE; - } - if (EXTRACT256(_counts_neg,1)) { - inquery[EXTRACT256(array,1)] = INQUERY_FALSE; - } - if (EXTRACT256(_counts_neg,2)) { - inquery[EXTRACT256(array,2)] = INQUERY_FALSE; - } - if (EXTRACT256(_counts_neg,3)) { - inquery[EXTRACT256(array,3)] = INQUERY_FALSE; - } - if (EXTRACT256(_counts_neg,4)) { - inquery[EXTRACT256(array,4)] = INQUERY_FALSE; - } - if (EXTRACT256(_counts_neg,5)) { - inquery[EXTRACT256(array,5)] = INQUERY_FALSE; - } - if (EXTRACT256(_counts_neg,6)) { - inquery[EXTRACT256(array,6)] = INQUERY_FALSE; - } - if (EXTRACT256(_counts_neg,7)) { - inquery[EXTRACT256(array,7)] = INQUERY_FALSE; - } - } -#endif /* CHECK_FOR_OVERFLOW */ - debug(printf("5 %04X => %d\n",EXTRACT256(array,0),counts[EXTRACT256(array,0)])); - debug(printf("21 %04X => %d\n",EXTRACT256(array,1),counts[EXTRACT256(array,1)])); - debug(printf("37 %04X => %d\n",EXTRACT256(array,2),counts[EXTRACT256(array,2)])); - debug(printf("53 %04X => %d\n",EXTRACT256(array,3),counts[EXTRACT256(array,3)])); - debug(printf("5 %04X => %d\n",EXTRACT256(array,4),counts[EXTRACT256(array,4)])); - debug(printf("21 %04X => %d\n",EXTRACT256(array,5),counts[EXTRACT256(array,5)])); - debug(printf("37 %04X => %d\n",EXTRACT256(array,6),counts[EXTRACT256(array,6)])); - debug(printf("53 %04X => %d\n",EXTRACT256(array,7),counts[EXTRACT256(array,7)])); - - array = _mm256_and_si256( _mm256_srli_epi32(current,4), bigmask8); - counts[EXTRACT256(array,0)] += 1; /* 6 */ - counts[EXTRACT256(array,1)] += 1; /* 22 */ - counts[EXTRACT256(array,2)] += 1; /* 38 */ - counts[EXTRACT256(array,3)] += 1; /* 54 */ - counts[EXTRACT256(array,4)] += 1; /* 70 */ - counts[EXTRACT256(array,5)] += 1; /* 86 */ - counts[EXTRACT256(array,6)] += 1; /* 102 */ - counts[EXTRACT256(array,7)] += 1; /* 118 */ -#ifdef CHECK_FOR_OVERFLOW - _counts_after = _mm256_i32gather_epi32(counts,array,/*scale*/4); - _counts_neg = _mm256_cmpgt_epi32(_counts_after,maxcount256); - if (_mm256_testz_si256(_counts_neg,_counts_neg) == 0) { - debug(printf("Exceeded maximum counts\n")); - if (EXTRACT256(_counts_neg,0)) { - inquery[EXTRACT256(array,0)] = INQUERY_FALSE; - } - if (EXTRACT256(_counts_neg,1)) { - inquery[EXTRACT256(array,1)] = INQUERY_FALSE; - } - if (EXTRACT256(_counts_neg,2)) { - inquery[EXTRACT256(array,2)] = INQUERY_FALSE; - } - if (EXTRACT256(_counts_neg,3)) { - inquery[EXTRACT256(array,3)] = INQUERY_FALSE; - } - if (EXTRACT256(_counts_neg,4)) { - inquery[EXTRACT256(array,4)] = INQUERY_FALSE; - } - if (EXTRACT256(_counts_neg,5)) { - inquery[EXTRACT256(array,5)] = INQUERY_FALSE; - } - if (EXTRACT256(_counts_neg,6)) { - inquery[EXTRACT256(array,6)] = INQUERY_FALSE; - } - if (EXTRACT256(_counts_neg,7)) { - inquery[EXTRACT256(array,7)] = INQUERY_FALSE; - } - } -#endif /* CHECK_FOR_OVERFLOW */ - debug(printf("6 %04X => %d\n",EXTRACT256(array,0),counts[EXTRACT256(array,0)])); - debug(printf("22 %04X => %d\n",EXTRACT256(array,1),counts[EXTRACT256(array,1)])); - debug(printf("38 %04X => %d\n",EXTRACT256(array,2),counts[EXTRACT256(array,2)])); - debug(printf("54 %04X => %d\n",EXTRACT256(array,3),counts[EXTRACT256(array,3)])); - debug(printf("6 %04X => %d\n",EXTRACT256(array,4),counts[EXTRACT256(array,4)])); - debug(printf("22 %04X => %d\n",EXTRACT256(array,5),counts[EXTRACT256(array,5)])); - debug(printf("38 %04X => %d\n",EXTRACT256(array,6),counts[EXTRACT256(array,6)])); - debug(printf("54 %04X => %d\n",EXTRACT256(array,7),counts[EXTRACT256(array,7)])); - - array = _mm256_and_si256( _mm256_srli_epi32(current,2), bigmask8); - counts[EXTRACT256(array,0)] += 1; /* 7 */ - counts[EXTRACT256(array,1)] += 1; /* 23 */ - counts[EXTRACT256(array,2)] += 1; /* 39 */ - counts[EXTRACT256(array,3)] += 1; /* 55 */ - counts[EXTRACT256(array,4)] += 1; /* 71 */ - counts[EXTRACT256(array,5)] += 1; /* 87 */ - counts[EXTRACT256(array,6)] += 1; /* 103 */ - counts[EXTRACT256(array,7)] += 1; /* 119 */ -#ifdef CHECK_FOR_OVERFLOW - _counts_after = _mm256_i32gather_epi32(counts,array,/*scale*/4); - _counts_neg = _mm256_cmpgt_epi32(_counts_after,maxcount256); - if (_mm256_testz_si256(_counts_neg,_counts_neg) == 0) { - debug(printf("Exceeded maximum counts\n")); - if (EXTRACT256(_counts_neg,0)) { - inquery[EXTRACT256(array,0)] = INQUERY_FALSE; - } - if (EXTRACT256(_counts_neg,1)) { - inquery[EXTRACT256(array,1)] = INQUERY_FALSE; - } - if (EXTRACT256(_counts_neg,2)) { - inquery[EXTRACT256(array,2)] = INQUERY_FALSE; - } - if (EXTRACT256(_counts_neg,3)) { - inquery[EXTRACT256(array,3)] = INQUERY_FALSE; - } - if (EXTRACT256(_counts_neg,4)) { - inquery[EXTRACT256(array,4)] = INQUERY_FALSE; - } - if (EXTRACT256(_counts_neg,5)) { - inquery[EXTRACT256(array,5)] = INQUERY_FALSE; - } - if (EXTRACT256(_counts_neg,6)) { - inquery[EXTRACT256(array,6)] = INQUERY_FALSE; - } - if (EXTRACT256(_counts_neg,7)) { - inquery[EXTRACT256(array,7)] = INQUERY_FALSE; - } - } -#endif /* CHECK_FOR_OVERFLOW */ - debug(printf("7 %04X => %d\n",EXTRACT256(array,0),counts[EXTRACT256(array,0)])); - debug(printf("23 %04X => %d\n",EXTRACT256(array,1),counts[EXTRACT256(array,1)])); - debug(printf("39 %04X => %d\n",EXTRACT256(array,2),counts[EXTRACT256(array,2)])); - debug(printf("55 %04X => %d\n",EXTRACT256(array,3),counts[EXTRACT256(array,3)])); - debug(printf("7 %04X => %d\n",EXTRACT256(array,4),counts[EXTRACT256(array,4)])); - debug(printf("23 %04X => %d\n",EXTRACT256(array,5),counts[EXTRACT256(array,5)])); - debug(printf("39 %04X => %d\n",EXTRACT256(array,6),counts[EXTRACT256(array,6)])); - debug(printf("55 %04X => %d\n",EXTRACT256(array,7),counts[EXTRACT256(array,7)])); - - array = _mm256_and_si256( current, bigmask8); - counts[EXTRACT256(array,0)] += 1; /* 8 */ - counts[EXTRACT256(array,1)] += 1; /* 24 */ - counts[EXTRACT256(array,2)] += 1; /* 40 */ - counts[EXTRACT256(array,3)] += 1; /* 56 */ - counts[EXTRACT256(array,4)] += 1; /* 72 */ - counts[EXTRACT256(array,5)] += 1; /* 88 */ - counts[EXTRACT256(array,6)] += 1; /* 104 */ - counts[EXTRACT256(array,7)] += 1; /* 120 */ -#ifdef CHECK_FOR_OVERFLOW - _counts_after = _mm256_i32gather_epi32(counts,array,/*scale*/4); - _counts_neg = _mm256_cmpgt_epi32(_counts_after,maxcount256); - if (_mm256_testz_si256(_counts_neg,_counts_neg) == 0) { - debug(printf("Exceeded maximum counts\n")); - if (EXTRACT256(_counts_neg,0)) { - inquery[EXTRACT256(array,0)] = INQUERY_FALSE; - } - if (EXTRACT256(_counts_neg,1)) { - inquery[EXTRACT256(array,1)] = INQUERY_FALSE; - } - if (EXTRACT256(_counts_neg,2)) { - inquery[EXTRACT256(array,2)] = INQUERY_FALSE; - } - if (EXTRACT256(_counts_neg,3)) { - inquery[EXTRACT256(array,3)] = INQUERY_FALSE; - } - if (EXTRACT256(_counts_neg,4)) { - inquery[EXTRACT256(array,4)] = INQUERY_FALSE; - } - if (EXTRACT256(_counts_neg,5)) { - inquery[EXTRACT256(array,5)] = INQUERY_FALSE; - } - if (EXTRACT256(_counts_neg,6)) { - inquery[EXTRACT256(array,6)] = INQUERY_FALSE; - } - if (EXTRACT256(_counts_neg,7)) { - inquery[EXTRACT256(array,7)] = INQUERY_FALSE; - } - } -#endif /* CHECK_FOR_OVERFLOW */ - debug(printf("8 %04X => %d\n",EXTRACT256(array,0),counts[EXTRACT256(array,0)])); - debug(printf("24 %04X => %d\n",EXTRACT256(array,1),counts[EXTRACT256(array,1)])); - debug(printf("40 %04X => %d\n",EXTRACT256(array,2),counts[EXTRACT256(array,2)])); - debug(printf("56 %04X => %d\n",EXTRACT256(array,3),counts[EXTRACT256(array,3)])); - debug(printf("8 %04X => %d\n",EXTRACT256(array,4),counts[EXTRACT256(array,4)])); - debug(printf("24 %04X => %d\n",EXTRACT256(array,5),counts[EXTRACT256(array,5)])); - debug(printf("40 %04X => %d\n",EXTRACT256(array,6),counts[EXTRACT256(array,6)])); - debug(printf("56 %04X => %d\n",EXTRACT256(array,7),counts[EXTRACT256(array,7)])); - - oligo = _mm256_or_si256( _mm256_srli_epi32(next,18), _mm256_slli_epi32(current,14)); - array = _mm256_and_si256( _mm256_srli_epi32(oligo,12), bigmask8); - counts[EXTRACT256(array,0)] += 1; /* 9 */ - counts[EXTRACT256(array,1)] += 1; /* 25 */ - counts[EXTRACT256(array,2)] += 1; /* 41 */ - counts[EXTRACT256(array,3)] += 1; /* 57 */ - counts[EXTRACT256(array,4)] += 1; /* 73 */ - counts[EXTRACT256(array,5)] += 1; /* 89 */ - counts[EXTRACT256(array,6)] += 1; /* 105 */ - counts[EXTRACT256(array,7)] += 1; /* 121 */ -#ifdef CHECK_FOR_OVERFLOW - _counts_after = _mm256_i32gather_epi32(counts,array,/*scale*/4); - _counts_neg = _mm256_cmpgt_epi32(_counts_after,maxcount256); - if (_mm256_testz_si256(_counts_neg,_counts_neg) == 0) { - debug(printf("Exceeded maximum counts\n")); - if (EXTRACT256(_counts_neg,0)) { - inquery[EXTRACT256(array,0)] = INQUERY_FALSE; - } - if (EXTRACT256(_counts_neg,1)) { - inquery[EXTRACT256(array,1)] = INQUERY_FALSE; - } - if (EXTRACT256(_counts_neg,2)) { - inquery[EXTRACT256(array,2)] = INQUERY_FALSE; - } - if (EXTRACT256(_counts_neg,3)) { - inquery[EXTRACT256(array,3)] = INQUERY_FALSE; - } - if (EXTRACT256(_counts_neg,4)) { - inquery[EXTRACT256(array,4)] = INQUERY_FALSE; - } - if (EXTRACT256(_counts_neg,5)) { - inquery[EXTRACT256(array,5)] = INQUERY_FALSE; - } - if (EXTRACT256(_counts_neg,6)) { - inquery[EXTRACT256(array,6)] = INQUERY_FALSE; - } - if (EXTRACT256(_counts_neg,7)) { - inquery[EXTRACT256(array,7)] = INQUERY_FALSE; - } - } -#endif /* CHECK_FOR_OVERFLOW */ - debug(printf("9 %04X => %d\n",EXTRACT256(array,0),counts[EXTRACT256(array,0)])); - debug(printf("25 %04X => %d\n",EXTRACT256(array,1),counts[EXTRACT256(array,1)])); - debug(printf("41 %04X => %d\n",EXTRACT256(array,2),counts[EXTRACT256(array,2)])); - debug(printf("57 %04X => %d\n",EXTRACT256(array,3),counts[EXTRACT256(array,3)])); - debug(printf("9 %04X => %d\n",EXTRACT256(array,4),counts[EXTRACT256(array,4)])); - debug(printf("25 %04X => %d\n",EXTRACT256(array,5),counts[EXTRACT256(array,5)])); - debug(printf("41 %04X => %d\n",EXTRACT256(array,6),counts[EXTRACT256(array,6)])); - debug(printf("57 %04X => %d\n",EXTRACT256(array,7),counts[EXTRACT256(array,7)])); - - array = _mm256_and_si256( _mm256_srli_epi32(oligo,10), bigmask8); - counts[EXTRACT256(array,0)] += 1; /* 10 */ - counts[EXTRACT256(array,1)] += 1; /* 26 */ - counts[EXTRACT256(array,2)] += 1; /* 42 */ - counts[EXTRACT256(array,3)] += 1; /* 58 */ - counts[EXTRACT256(array,4)] += 1; /* 74 */ - counts[EXTRACT256(array,5)] += 1; /* 90 */ - counts[EXTRACT256(array,6)] += 1; /* 106 */ - counts[EXTRACT256(array,7)] += 1; /* 122 */ -#ifdef CHECK_FOR_OVERFLOW - _counts_after = _mm256_i32gather_epi32(counts,array,/*scale*/4); - _counts_neg = _mm256_cmpgt_epi32(_counts_after,maxcount256); - if (_mm256_testz_si256(_counts_neg,_counts_neg) == 0) { - debug(printf("Exceeded maximum counts\n")); - if (EXTRACT256(_counts_neg,0)) { - inquery[EXTRACT256(array,0)] = INQUERY_FALSE; - } - if (EXTRACT256(_counts_neg,1)) { - inquery[EXTRACT256(array,1)] = INQUERY_FALSE; - } - if (EXTRACT256(_counts_neg,2)) { - inquery[EXTRACT256(array,2)] = INQUERY_FALSE; - } - if (EXTRACT256(_counts_neg,3)) { - inquery[EXTRACT256(array,3)] = INQUERY_FALSE; - } - if (EXTRACT256(_counts_neg,4)) { - inquery[EXTRACT256(array,4)] = INQUERY_FALSE; - } - if (EXTRACT256(_counts_neg,5)) { - inquery[EXTRACT256(array,5)] = INQUERY_FALSE; - } - if (EXTRACT256(_counts_neg,6)) { - inquery[EXTRACT256(array,6)] = INQUERY_FALSE; - } - if (EXTRACT256(_counts_neg,7)) { - inquery[EXTRACT256(array,7)] = INQUERY_FALSE; - } - } -#endif /* CHECK_FOR_OVERFLOW */ - debug(printf("10 %04X => %d\n",EXTRACT256(array,0),counts[EXTRACT256(array,0)])); - debug(printf("26 %04X => %d\n",EXTRACT256(array,1),counts[EXTRACT256(array,1)])); - debug(printf("42 %04X => %d\n",EXTRACT256(array,2),counts[EXTRACT256(array,2)])); - debug(printf("58 %04X => %d\n",EXTRACT256(array,3),counts[EXTRACT256(array,3)])); - debug(printf("10 %04X => %d\n",EXTRACT256(array,4),counts[EXTRACT256(array,4)])); - debug(printf("26 %04X => %d\n",EXTRACT256(array,5),counts[EXTRACT256(array,5)])); - debug(printf("42 %04X => %d\n",EXTRACT256(array,6),counts[EXTRACT256(array,6)])); - debug(printf("58 %04X => %d\n",EXTRACT256(array,7),counts[EXTRACT256(array,7)])); - - array = _mm256_and_si256( _mm256_srli_epi32(oligo,8), bigmask8); - counts[EXTRACT256(array,0)] += 1; /* 11 */ - counts[EXTRACT256(array,1)] += 1; /* 27 */ - counts[EXTRACT256(array,2)] += 1; /* 43 */ - counts[EXTRACT256(array,3)] += 1; /* 59 */ - counts[EXTRACT256(array,4)] += 1; /* 75 */ - counts[EXTRACT256(array,5)] += 1; /* 91 */ - counts[EXTRACT256(array,6)] += 1; /* 107 */ - counts[EXTRACT256(array,7)] += 1; /* 123 */ -#ifdef CHECK_FOR_OVERFLOW - _counts_after = _mm256_i32gather_epi32(counts,array,/*scale*/4); - _counts_neg = _mm256_cmpgt_epi32(_counts_after,maxcount256); - if (_mm256_testz_si256(_counts_neg,_counts_neg) == 0) { - debug(printf("Exceeded maximum counts\n")); - if (EXTRACT256(_counts_neg,0)) { - inquery[EXTRACT256(array,0)] = INQUERY_FALSE; - } - if (EXTRACT256(_counts_neg,1)) { - inquery[EXTRACT256(array,1)] = INQUERY_FALSE; - } - if (EXTRACT256(_counts_neg,2)) { - inquery[EXTRACT256(array,2)] = INQUERY_FALSE; - } - if (EXTRACT256(_counts_neg,3)) { - inquery[EXTRACT256(array,3)] = INQUERY_FALSE; - } - if (EXTRACT256(_counts_neg,4)) { - inquery[EXTRACT256(array,4)] = INQUERY_FALSE; - } - if (EXTRACT256(_counts_neg,5)) { - inquery[EXTRACT256(array,5)] = INQUERY_FALSE; - } - if (EXTRACT256(_counts_neg,6)) { - inquery[EXTRACT256(array,6)] = INQUERY_FALSE; - } - if (EXTRACT256(_counts_neg,7)) { - inquery[EXTRACT256(array,7)] = INQUERY_FALSE; - } - } -#endif /* CHECK_FOR_OVERFLOW */ - debug(printf("11 %04X => %d\n",EXTRACT256(array,0),counts[EXTRACT256(array,0)])); - debug(printf("27 %04X => %d\n",EXTRACT256(array,1),counts[EXTRACT256(array,1)])); - debug(printf("43 %04X => %d\n",EXTRACT256(array,2),counts[EXTRACT256(array,2)])); - debug(printf("59 %04X => %d\n",EXTRACT256(array,3),counts[EXTRACT256(array,3)])); - debug(printf("11 %04X => %d\n",EXTRACT256(array,4),counts[EXTRACT256(array,4)])); - debug(printf("27 %04X => %d\n",EXTRACT256(array,5),counts[EXTRACT256(array,5)])); - debug(printf("43 %04X => %d\n",EXTRACT256(array,6),counts[EXTRACT256(array,6)])); - debug(printf("59 %04X => %d\n",EXTRACT256(array,7),counts[EXTRACT256(array,7)])); - - array = _mm256_and_si256( _mm256_srli_epi32(oligo,6), bigmask8); - counts[EXTRACT256(array,0)] += 1; /* 12 */ - counts[EXTRACT256(array,1)] += 1; /* 28 */ - counts[EXTRACT256(array,2)] += 1; /* 44 */ - counts[EXTRACT256(array,3)] += 1; /* 60 */ - counts[EXTRACT256(array,4)] += 1; /* 76 */ - counts[EXTRACT256(array,5)] += 1; /* 92 */ - counts[EXTRACT256(array,6)] += 1; /* 108 */ - counts[EXTRACT256(array,7)] += 1; /* 124 */ -#ifdef CHECK_FOR_OVERFLOW - _counts_after = _mm256_i32gather_epi32(counts,array,/*scale*/4); - _counts_neg = _mm256_cmpgt_epi32(_counts_after,maxcount256); - if (_mm256_testz_si256(_counts_neg,_counts_neg) == 0) { - debug(printf("Exceeded maximum counts\n")); - if (EXTRACT256(_counts_neg,0)) { - inquery[EXTRACT256(array,0)] = INQUERY_FALSE; - } - if (EXTRACT256(_counts_neg,1)) { - inquery[EXTRACT256(array,1)] = INQUERY_FALSE; - } - if (EXTRACT256(_counts_neg,2)) { - inquery[EXTRACT256(array,2)] = INQUERY_FALSE; - } - if (EXTRACT256(_counts_neg,3)) { - inquery[EXTRACT256(array,3)] = INQUERY_FALSE; - } - if (EXTRACT256(_counts_neg,4)) { - inquery[EXTRACT256(array,4)] = INQUERY_FALSE; - } - if (EXTRACT256(_counts_neg,5)) { - inquery[EXTRACT256(array,5)] = INQUERY_FALSE; - } - if (EXTRACT256(_counts_neg,6)) { - inquery[EXTRACT256(array,6)] = INQUERY_FALSE; - } - if (EXTRACT256(_counts_neg,7)) { - inquery[EXTRACT256(array,7)] = INQUERY_FALSE; - } - } -#endif /* CHECK_FOR_OVERFLOW */ - debug(printf("12 %04X => %d\n",EXTRACT256(array,0),counts[EXTRACT256(array,0)])); - debug(printf("28 %04X => %d\n",EXTRACT256(array,1),counts[EXTRACT256(array,1)])); - debug(printf("44 %04X => %d\n",EXTRACT256(array,2),counts[EXTRACT256(array,2)])); - debug(printf("60 %04X => %d\n",EXTRACT256(array,3),counts[EXTRACT256(array,3)])); - debug(printf("12 %04X => %d\n",EXTRACT256(array,4),counts[EXTRACT256(array,4)])); - debug(printf("28 %04X => %d\n",EXTRACT256(array,5),counts[EXTRACT256(array,5)])); - debug(printf("44 %04X => %d\n",EXTRACT256(array,6),counts[EXTRACT256(array,6)])); - debug(printf("60 %04X => %d\n",EXTRACT256(array,7),counts[EXTRACT256(array,7)])); - - array = _mm256_and_si256( _mm256_srli_epi32(oligo,4), bigmask8); - counts[EXTRACT256(array,0)] += 1; /* 13 */ - counts[EXTRACT256(array,1)] += 1; /* 29 */ - counts[EXTRACT256(array,2)] += 1; /* 45 */ - counts[EXTRACT256(array,3)] += 1; /* 61 */ - counts[EXTRACT256(array,4)] += 1; /* 77 */ - counts[EXTRACT256(array,5)] += 1; /* 93 */ - counts[EXTRACT256(array,6)] += 1; /* 109 */ - counts[EXTRACT256(array,7)] += 1; /* 125 */ -#ifdef CHECK_FOR_OVERFLOW - _counts_after = _mm256_i32gather_epi32(counts,array,/*scale*/4); - _counts_neg = _mm256_cmpgt_epi32(_counts_after,maxcount256); - if (_mm256_testz_si256(_counts_neg,_counts_neg) == 0) { - debug(printf("Exceeded maximum counts\n")); - if (EXTRACT256(_counts_neg,0)) { - inquery[EXTRACT256(array,0)] = INQUERY_FALSE; - } - if (EXTRACT256(_counts_neg,1)) { - inquery[EXTRACT256(array,1)] = INQUERY_FALSE; - } - if (EXTRACT256(_counts_neg,2)) { - inquery[EXTRACT256(array,2)] = INQUERY_FALSE; - } - if (EXTRACT256(_counts_neg,3)) { - inquery[EXTRACT256(array,3)] = INQUERY_FALSE; - } - if (EXTRACT256(_counts_neg,4)) { - inquery[EXTRACT256(array,4)] = INQUERY_FALSE; - } - if (EXTRACT256(_counts_neg,5)) { - inquery[EXTRACT256(array,5)] = INQUERY_FALSE; - } - if (EXTRACT256(_counts_neg,6)) { - inquery[EXTRACT256(array,6)] = INQUERY_FALSE; - } - if (EXTRACT256(_counts_neg,7)) { - inquery[EXTRACT256(array,7)] = INQUERY_FALSE; - } - } -#endif /* CHECK_FOR_OVERFLOW */ - debug(printf("13 %04X => %d\n",EXTRACT256(array,0),counts[EXTRACT256(array,0)])); - debug(printf("29 %04X => %d\n",EXTRACT256(array,1),counts[EXTRACT256(array,1)])); - debug(printf("45 %04X => %d\n",EXTRACT256(array,2),counts[EXTRACT256(array,2)])); - debug(printf("61 %04X => %d\n",EXTRACT256(array,3),counts[EXTRACT256(array,3)])); - debug(printf("13 %04X => %d\n",EXTRACT256(array,4),counts[EXTRACT256(array,4)])); - debug(printf("29 %04X => %d\n",EXTRACT256(array,5),counts[EXTRACT256(array,5)])); - debug(printf("45 %04X => %d\n",EXTRACT256(array,6),counts[EXTRACT256(array,6)])); - debug(printf("61 %04X => %d\n",EXTRACT256(array,7),counts[EXTRACT256(array,7)])); - - array = _mm256_and_si256( _mm256_srli_epi32(oligo,2), bigmask8); - counts[EXTRACT256(array,0)] += 1; /* 14 */ - counts[EXTRACT256(array,1)] += 1; /* 30 */ - counts[EXTRACT256(array,2)] += 1; /* 46 */ - counts[EXTRACT256(array,3)] += 1; /* 62 */ - counts[EXTRACT256(array,4)] += 1; /* 78 */ - counts[EXTRACT256(array,5)] += 1; /* 94 */ - counts[EXTRACT256(array,6)] += 1; /* 110 */ - counts[EXTRACT256(array,7)] += 1; /* 126 */ -#ifdef CHECK_FOR_OVERFLOW - _counts_after = _mm256_i32gather_epi32(counts,array,/*scale*/4); - _counts_neg = _mm256_cmpgt_epi32(_counts_after,maxcount256); - if (_mm256_testz_si256(_counts_neg,_counts_neg) == 0) { - debug(printf("Exceeded maximum counts\n")); - if (EXTRACT256(_counts_neg,0)) { - inquery[EXTRACT256(array,0)] = INQUERY_FALSE; - } - if (EXTRACT256(_counts_neg,1)) { - inquery[EXTRACT256(array,1)] = INQUERY_FALSE; - } - if (EXTRACT256(_counts_neg,2)) { - inquery[EXTRACT256(array,2)] = INQUERY_FALSE; - } - if (EXTRACT256(_counts_neg,3)) { - inquery[EXTRACT256(array,3)] = INQUERY_FALSE; - } - if (EXTRACT256(_counts_neg,4)) { - inquery[EXTRACT256(array,4)] = INQUERY_FALSE; - } - if (EXTRACT256(_counts_neg,5)) { - inquery[EXTRACT256(array,5)] = INQUERY_FALSE; - } - if (EXTRACT256(_counts_neg,6)) { - inquery[EXTRACT256(array,6)] = INQUERY_FALSE; - } - if (EXTRACT256(_counts_neg,7)) { - inquery[EXTRACT256(array,7)] = INQUERY_FALSE; - } - } -#endif /* CHECK_FOR_OVERFLOW */ - debug(printf("14 %04X => %d\n",EXTRACT256(array,0),counts[EXTRACT256(array,0)])); - debug(printf("30 %04X => %d\n",EXTRACT256(array,1),counts[EXTRACT256(array,1)])); - debug(printf("46 %04X => %d\n",EXTRACT256(array,2),counts[EXTRACT256(array,2)])); - debug(printf("62 %04X => %d\n",EXTRACT256(array,3),counts[EXTRACT256(array,3)])); - debug(printf("14 %04X => %d\n",EXTRACT256(array,4),counts[EXTRACT256(array,4)])); - debug(printf("30 %04X => %d\n",EXTRACT256(array,5),counts[EXTRACT256(array,5)])); - debug(printf("46 %04X => %d\n",EXTRACT256(array,6),counts[EXTRACT256(array,6)])); - debug(printf("62 %04X => %d\n",EXTRACT256(array,7),counts[EXTRACT256(array,7)])); - - array = _mm256_and_si256( oligo, bigmask8); - counts[EXTRACT256(array,0)] += 1; /* 15 */ - counts[EXTRACT256(array,1)] += 1; /* 31 */ - counts[EXTRACT256(array,2)] += 1; /* 47 */ - counts[EXTRACT256(array,3)] += 1; /* 63 */ - counts[EXTRACT256(array,4)] += 1; /* 79 */ - counts[EXTRACT256(array,5)] += 1; /* 95 */ - counts[EXTRACT256(array,6)] += 1; /* 111 */ - counts[EXTRACT256(array,7)] += 1; /* 127 */ -#ifdef CHECK_FOR_OVERFLOW - _counts_after = _mm256_i32gather_epi32(counts,array,/*scale*/4); - _counts_neg = _mm256_cmpgt_epi32(_counts_after,maxcount256); - if (_mm256_testz_si256(_counts_neg,_counts_neg) == 0) { - debug(printf("Exceeded maximum counts\n")); - if (EXTRACT256(_counts_neg,0)) { - inquery[EXTRACT256(array,0)] = INQUERY_FALSE; - } - if (EXTRACT256(_counts_neg,1)) { - inquery[EXTRACT256(array,1)] = INQUERY_FALSE; - } - if (EXTRACT256(_counts_neg,2)) { - inquery[EXTRACT256(array,2)] = INQUERY_FALSE; - } - if (EXTRACT256(_counts_neg,3)) { - inquery[EXTRACT256(array,3)] = INQUERY_FALSE; - } - if (EXTRACT256(_counts_neg,4)) { - inquery[EXTRACT256(array,4)] = INQUERY_FALSE; - } - if (EXTRACT256(_counts_neg,5)) { - inquery[EXTRACT256(array,5)] = INQUERY_FALSE; - } - if (EXTRACT256(_counts_neg,6)) { - inquery[EXTRACT256(array,6)] = INQUERY_FALSE; - } - if (EXTRACT256(_counts_neg,7)) { - inquery[EXTRACT256(array,7)] = INQUERY_FALSE; - } - } -#endif /* CHECK_FOR_OVERFLOW */ - debug(printf("15 %04X => %d\n",EXTRACT256(array,0),counts[EXTRACT256(array,0)])); - debug(printf("31 %04X => %d\n",EXTRACT256(array,1),counts[EXTRACT256(array,1)])); - debug(printf("47 %04X => %d\n",EXTRACT256(array,2),counts[EXTRACT256(array,2)])); - debug(printf("63 %04X => %d\n",EXTRACT256(array,3),counts[EXTRACT256(array,3)])); - debug(printf("15 %04X => %d\n",EXTRACT256(array,4),counts[EXTRACT256(array,4)])); - debug(printf("31 %04X => %d\n",EXTRACT256(array,5),counts[EXTRACT256(array,5)])); - debug(printf("47 %04X => %d\n",EXTRACT256(array,6),counts[EXTRACT256(array,6)])); - debug(printf("63 %04X => %d\n",EXTRACT256(array,7),counts[EXTRACT256(array,7)])); + masked = EXTRACT(_masked,0); + INCR_COUNT(counts[masked]); + debug(printf("27 %04X => %d\n",masked,counts[masked])); - return; -} + masked = EXTRACT(_masked,1); + INCR_COUNT(counts[masked]); + debug(printf("26 %04X => %d\n",masked,counts[masked])); #endif -#if !defined(HAVE_AVX2) - -static int -store_8mers_fwd (Chrpos_T chrpos, Chrpos_T *table, UINT4 *pointers, UINT4 *positions, Count_T *counts, - Genomecomp_T high_rev, Genomecomp_T low_rev, Genomecomp_T nexthigh_rev) { - Genomecomp_T masked, oligo; #ifdef INDIVIDUAL_SHIFTS -#elif defined(SIMD_MASK_THEN_STORE) - UINT4 _masked[4] __attribute__ ((aligned (16))); - __m128i _oligo; -#else - __m128i _oligo, _masked; -#endif + masked = low_rev & MASK7; /* 25 */ + INCR_COUNT(counts[masked]); + debug(printf("25 %04X => %d\n",masked,counts[masked])); + masked = (low_rev >> 2) & MASK7; /* 24 */ + INCR_COUNT(counts[masked]); + debug(printf("24 %04X => %d\n",masked,counts[masked])); - oligo = nexthigh_rev >> 18; /* For 31..25 */ - oligo |= low_rev << 14; + masked = (low_rev >> 4) & MASK7; /* 23 */ + INCR_COUNT(counts[masked]); + debug(printf("23 %04X => %d\n",masked,counts[masked])); -#ifdef INDIVIDUAL_SHIFTS - masked = oligo & MASK8; /* 31 */ - if (counts[masked]) { - assert(pointers[masked] > positions[masked]); - debug(printf("Storing masked %u at %u\n",masked,chrpos)); - table[--pointers[masked]] = chrpos; - } + masked = (low_rev >> 6) & MASK7; /* 22 */ + INCR_COUNT(counts[masked]); + debug(printf("22 %04X => %d\n",masked,counts[masked])); - masked = (oligo >> 2) & MASK8; /* 30 */ - if (counts[masked]) { - assert(pointers[masked] > positions[masked]); - debug(printf("Storing masked %u at %u\n",masked,chrpos - 1)); - table[--pointers[masked]] = chrpos - 1; - } + masked = (low_rev >> 8) & MASK7; /* 21 */ + INCR_COUNT(counts[masked]); + debug(printf("21 %04X => %d\n",masked,counts[masked])); - masked = (oligo >> 4) & MASK8; /* 29 */ - if (counts[masked]) { - assert(pointers[masked] > positions[masked]); - debug(printf("Storing masked %u at %u\n",masked,chrpos - 2)); - table[--pointers[masked]] = chrpos - 2; - } + masked = (low_rev >> 10) & MASK7; /* 20 */ + INCR_COUNT(counts[masked]); + debug(printf("20 %04X => %d\n",masked,counts[masked])); - masked = (oligo >> 6) & MASK8; /* 28 */ - if (counts[masked]) { - assert(pointers[masked] > positions[masked]); - debug(printf("Storing masked %u at %u\n",masked,chrpos - 3)); - table[--pointers[masked]] = chrpos - 3; - } + masked = (low_rev >> 12) & MASK7; /* 19 */ + INCR_COUNT(counts[masked]); + debug(printf("19 %04X => %d\n",masked,counts[masked])); - masked = (oligo >> 8) & MASK8; /* 27 */ - if (counts[masked]) { - assert(pointers[masked] > positions[masked]); - debug(printf("Storing masked %u at %u\n",masked,chrpos - 4)); - table[--pointers[masked]] = chrpos - 4; - } + masked = (low_rev >> 14) & MASK7; /* 18 */ + INCR_COUNT(counts[masked]); + debug(printf("18 %04X => %d\n",masked,counts[masked])); - masked = (oligo >> 10) & MASK8; /* 26 */ - if (counts[masked]) { - assert(pointers[masked] > positions[masked]); - debug(printf("Storing masked %u at %u\n",masked,chrpos - 5)); - table[--pointers[masked]] = chrpos - 5; - } + masked = (low_rev >> 16) & MASK7; /* 17 */ + INCR_COUNT(counts[masked]); + debug(printf("17 %04X => %d\n",masked,counts[masked])); - masked = (oligo >> 12) & MASK8; /* 25 */ - if (counts[masked]) { - assert(pointers[masked] > positions[masked]); - debug(printf("Storing masked %u at %u\n",masked,chrpos - 6)); - table[--pointers[masked]] = chrpos - 6; - } + masked = low_rev >> 18; /* 16, No mask necessary */ + INCR_COUNT(counts[masked]); + debug(printf("16 %04X => %d\n",masked,counts[masked])); #else - _oligo = _mm_setr_epi32(oligo, oligo >> 2, oligo >> 4, oligo >> 6); + _oligo = _mm_setr_epi32(low_rev, low_rev >> 2, low_rev >> 4, low_rev >> 6); #ifdef SIMD_MASK_THEN_STORE - _mm_store_si128((__m128i *) _masked,_mm_and_si128(_oligo, mask8)); + _mm_store_si128((__m128i *) _masked,_mm_and_si128(_oligo, mask7)); #else - _masked = _mm_and_si128(_oligo, mask8); + _masked = _mm_and_si128(_oligo, mask7); #endif masked = EXTRACT(_masked,0); - if (counts[masked]) { - assert(pointers[masked] > positions[masked]); - debug(printf("Storing masked %u at %u\n",masked,chrpos)); - table[--pointers[masked]] = chrpos; - } + INCR_COUNT(counts[masked]); + debug(printf("25 %04X => %d\n",masked,counts[masked])); masked = EXTRACT(_masked,1); - if (counts[masked]) { - assert(pointers[masked] > positions[masked]); - debug(printf("Storing masked %u at %u\n",masked,chrpos - 1)); - table[--pointers[masked]] = chrpos - 1; - } + INCR_COUNT(counts[masked]); + debug(printf("24 %04X => %d\n",masked,counts[masked])); masked = EXTRACT(_masked,2); - if (counts[masked]) { - assert(pointers[masked] > positions[masked]); - debug(printf("Storing masked %u at %u\n",masked,chrpos - 2)); - table[--pointers[masked]] = chrpos - 2; - } + INCR_COUNT(counts[masked]); + debug(printf("23 %04X => %d\n",masked,counts[masked])); masked = EXTRACT(_masked,3); - if (counts[masked]) { - assert(pointers[masked] > positions[masked]); - debug(printf("Storing masked %u at %u\n",masked,chrpos - 3)); - table[--pointers[masked]] = chrpos - 3; - } + INCR_COUNT(counts[masked]); + debug(printf("22 %04X => %d\n",masked,counts[masked])); _oligo = _mm_srli_epi32(_oligo, 8); #ifdef SIMD_MASK_THEN_STORE - _mm_store_si128((__m128i *) _masked,_mm_and_si128(_oligo, mask8)); + _mm_store_si128((__m128i *) _masked,_mm_and_si128(_oligo, mask7)); #else - _masked = _mm_and_si128(_oligo, mask8); + _masked = _mm_and_si128(_oligo, mask7); #endif masked = EXTRACT(_masked,0); - if (counts[masked]) { - assert(pointers[masked] > positions[masked]); - debug(printf("Storing masked %u at %u\n",masked,chrpos - 4)); - table[--pointers[masked]] = chrpos - 4; - } + INCR_COUNT(counts[masked]); + debug(printf("21 %04X => %d\n",masked,counts[masked])); masked = EXTRACT(_masked,1); - if (counts[masked]) { - assert(pointers[masked] > positions[masked]); - debug(printf("Storing masked %u at %u\n",masked,chrpos - 5)); - table[--pointers[masked]] = chrpos - 5; - } + INCR_COUNT(counts[masked]); + debug(printf("20 %04X => %d\n",masked,counts[masked])); masked = EXTRACT(_masked,2); - if (counts[masked]) { - assert(pointers[masked] > positions[masked]); - debug(printf("Storing masked %u at %u\n",masked,chrpos - 6)); - table[--pointers[masked]] = chrpos - 6; - } + INCR_COUNT(counts[masked]); + debug(printf("19 %04X => %d\n",masked,counts[masked])); + + masked = EXTRACT(_masked,3); + INCR_COUNT(counts[masked]); + debug(printf("18 %04X => %d\n",masked,counts[masked])); + + + _oligo = _mm_srli_epi32(_oligo, 8); +#ifdef SIMD_MASK_THEN_STORE + _mm_store_si128((__m128i *) _masked,_mm_and_si128(_oligo, mask7)); +#else + _masked = _mm_and_si128(_oligo, mask7); #endif + masked = EXTRACT(_masked,0); + INCR_COUNT(counts[masked]); + debug(printf("17 %04X => %d\n",masked,counts[masked])); -#ifdef INDIVIDUAL_SHIFTS - masked = low_rev & MASK8; /* 24 */ - if (counts[masked]) { - assert(pointers[masked] > positions[masked]); - debug(printf("Storing masked %u at %u\n",masked,chrpos - 7)); - table[--pointers[masked]] = chrpos - 7; - } + masked = EXTRACT(_masked,1); + INCR_COUNT(counts[masked]); + debug(printf("16 %04X => %d\n",masked,counts[masked])); +#endif - masked = (low_rev >> 2) & MASK8; /* 23 */ - if (counts[masked]) { - assert(pointers[masked] > positions[masked]); - debug(printf("Storing masked %u at %u\n",masked,chrpos - 8)); - table[--pointers[masked]] = chrpos - 8; - } - masked = (low_rev >> 4) & MASK8; /* 22 */ - if (counts[masked]) { - assert(pointers[masked] > positions[masked]); - debug(printf("Storing masked %u at %u\n",masked,chrpos - 9)); - table[--pointers[masked]] = chrpos - 9; - } + oligo = low_rev >> 20; /* For 15..10 */ + oligo |= high_rev << 12; - masked = (low_rev >> 6) & MASK8; /* 21 */ - if (counts[masked]) { - assert(pointers[masked] > positions[masked]); - debug(printf("Storing masked %u at %u\n",masked,chrpos - 10)); - table[--pointers[masked]] = chrpos - 10; - } +#ifdef INDIVIDUAL_SHIFTS + masked = oligo & MASK7; /* 15 */ + INCR_COUNT(counts[masked]); + debug(printf("15 %04X => %d\n",masked,counts[masked])); - masked = (low_rev >> 8) & MASK8; /* 20 */ - if (counts[masked]) { - assert(pointers[masked] > positions[masked]); - debug(printf("Storing masked %u at %u\n",masked,chrpos - 11)); - table[--pointers[masked]] = chrpos - 11; - } + masked = (oligo >> 2) & MASK7; /* 14 */ + INCR_COUNT(counts[masked]); + debug(printf("14 %04X => %d\n",masked,counts[masked])); - masked = (low_rev >> 10) & MASK8; /* 19 */ - if (counts[masked]) { - assert(pointers[masked] > positions[masked]); - debug(printf("Storing masked %u at %u\n",masked,chrpos - 12)); - table[--pointers[masked]] = chrpos - 12; - } + masked = (oligo >> 4) & MASK7; /* 13 */ + INCR_COUNT(counts[masked]); + debug(printf("13 %04X => %d\n",masked,counts[masked])); - masked = (low_rev >> 12) & MASK8; /* 18 */ - if (counts[masked]) { - assert(pointers[masked] > positions[masked]); - debug(printf("Storing masked %u at %u\n",masked,chrpos - 13)); - table[--pointers[masked]] = chrpos - 13; - } + masked = (oligo >> 6) & MASK7; /* 12 */ + INCR_COUNT(counts[masked]); + debug(printf("12 %04X => %d\n",masked,counts[masked])); - masked = (low_rev >> 14) & MASK8; /* 17 */ - if (counts[masked]) { - assert(pointers[masked] > positions[masked]); - debug(printf("Storing masked %u at %u\n",masked,chrpos - 14)); - table[--pointers[masked]] = chrpos - 14; - } + masked = (oligo >> 8) & MASK7; /* 11 */ + INCR_COUNT(counts[masked]); + debug(printf("11 %04X => %d\n",masked,counts[masked])); - masked = low_rev >> 16; /* 16, No mask necessary */ - if (counts[masked]) { - assert(pointers[masked] > positions[masked]); - debug(printf("Storing masked %u at %u\n",masked,chrpos - 15)); - table[--pointers[masked]] = chrpos - 15; - } + masked = (oligo >> 10) & MASK7; /* 10 */ + INCR_COUNT(counts[masked]); + debug(printf("10 %04X => %d\n",masked,counts[masked])); #else - _oligo = _mm_setr_epi32(low_rev, low_rev >> 2, low_rev >> 4, low_rev >> 6); + _oligo = _mm_setr_epi32(oligo, oligo >> 2, oligo >> 4, oligo >> 6); #ifdef SIMD_MASK_THEN_STORE - _mm_store_si128((__m128i *) _masked,_mm_and_si128(_oligo, mask8)); + _mm_store_si128((__m128i *) _masked,_mm_and_si128(_oligo, mask7)); #else - _masked = _mm_and_si128(_oligo, mask8); + _masked = _mm_and_si128(_oligo, mask7); #endif masked = EXTRACT(_masked,0); - if (counts[masked]) { - assert(pointers[masked] > positions[masked]); - debug(printf("Storing masked %u at %u\n",masked,chrpos - 7)); - table[--pointers[masked]] = chrpos - 7; - } + INCR_COUNT(counts[masked]); + debug(printf("15 %04X => %d\n",masked,counts[masked])); masked = EXTRACT(_masked,1); - if (counts[masked]) { - assert(pointers[masked] > positions[masked]); - debug(printf("Storing masked %u at %u\n",masked,chrpos - 8)); - table[--pointers[masked]] = chrpos - 8; - } + INCR_COUNT(counts[masked]); + debug(printf("14 %04X => %d\n",masked,counts[masked])); masked = EXTRACT(_masked,2); - if (counts[masked]) { - assert(pointers[masked] > positions[masked]); - debug(printf("Storing masked %u at %u\n",masked,chrpos - 9)); - table[--pointers[masked]] = chrpos - 9; - } + INCR_COUNT(counts[masked]); + debug(printf("13 %04X => %d\n",masked,counts[masked])); masked = EXTRACT(_masked,3); - if (counts[masked]) { - assert(pointers[masked] > positions[masked]); - debug(printf("Storing masked %u at %u\n",masked,chrpos - 10)); - table[--pointers[masked]] = chrpos - 10; - } + INCR_COUNT(counts[masked]); + debug(printf("12 %04X => %d\n",masked,counts[masked])); _oligo = _mm_srli_epi32(_oligo, 8); #ifdef SIMD_MASK_THEN_STORE - _mm_store_si128((__m128i *) _masked,_mm_and_si128(_oligo, mask8)); + _mm_store_si128((__m128i *) _masked,_mm_and_si128(_oligo, mask7)); #else - _masked = _mm_and_si128(_oligo, mask8); + _masked = _mm_and_si128(_oligo, mask7); #endif masked = EXTRACT(_masked,0); - if (counts[masked]) { - assert(pointers[masked] > positions[masked]); - debug(printf("Storing masked %u at %u\n",masked,chrpos - 11)); - table[--pointers[masked]] = chrpos - 11; - } + INCR_COUNT(counts[masked]); + debug(printf("11 %04X => %d\n",masked,counts[masked])); masked = EXTRACT(_masked,1); - if (counts[masked]) { - assert(pointers[masked] > positions[masked]); - debug(printf("Storing masked %u at %u\n",masked,chrpos - 12)); - table[--pointers[masked]] = chrpos - 12; - } - - masked = EXTRACT(_masked,2); - if (counts[masked]) { - assert(pointers[masked] > positions[masked]); - debug(printf("Storing masked %u at %u\n",masked,chrpos - 13)); - table[--pointers[masked]] = chrpos - 13; - } - - masked = EXTRACT(_masked,3); - if (counts[masked]) { - assert(pointers[masked] > positions[masked]); - debug(printf("Storing masked %u at %u\n",masked,chrpos - 14)); - table[--pointers[masked]] = chrpos - 14; - } + INCR_COUNT(counts[masked]); + debug(printf("10 %04X => %d\n",masked,counts[masked])); +#endif - masked = low_rev >> 16; /* 16, No mask necessary */ - if (counts[masked]) { - assert(pointers[masked] > positions[masked]); - debug(printf("Storing masked %u at %u\n",masked,chrpos - 15)); - table[--pointers[masked]] = chrpos - 15; - } -#endif +#ifdef INDIVIDUAL_SHIFTS + masked = high_rev & MASK7; /* 9 */ + INCR_COUNT(counts[masked]); + debug(printf("9 %04X => %d\n",masked,counts[masked])); + masked = (high_rev >> 2) & MASK7; /* 8 */ + INCR_COUNT(counts[masked]); + debug(printf("8 %04X => %d\n",masked,counts[masked])); - oligo = low_rev >> 18; /* For 9..15 */ - oligo |= high_rev << 14; + masked = (high_rev >> 4) & MASK7; /* 7 */ + INCR_COUNT(counts[masked]); + debug(printf("7 %04X => %d\n",masked,counts[masked])); -#ifdef INDIVIDUAL_SHIFTS - masked = oligo & MASK8; /* 15 */ - if (counts[masked]) { - assert(pointers[masked] > positions[masked]); - debug(printf("Storing masked %u at %u\n",masked,chrpos - 16)); - table[--pointers[masked]] = chrpos - 16; - } + masked = (high_rev >> 6) & MASK7; /* 6 */ + INCR_COUNT(counts[masked]); + debug(printf("6 %04X => %d\n",masked,counts[masked])); - masked = (oligo >> 2) & MASK8; /* 14 */ - if (counts[masked]) { - assert(pointers[masked] > positions[masked]); - debug(printf("Storing masked %u at %u\n",masked,chrpos - 17)); - table[--pointers[masked]] = chrpos - 17; - } + masked = (high_rev >> 8) & MASK7; /* 5 */ + INCR_COUNT(counts[masked]); + debug(printf("5 %04X => %d\n",masked,counts[masked])); - masked = (oligo >> 4) & MASK8; /* 13 */ - if (counts[masked]) { - assert(pointers[masked] > positions[masked]); - debug(printf("Storing masked %u at %u\n",masked,chrpos - 18)); - table[--pointers[masked]] = chrpos - 18; - } + masked = (high_rev >> 10) & MASK7; /* 4 */ + INCR_COUNT(counts[masked]); + debug(printf("4 %04X => %d\n",masked,counts[masked])); - masked = (oligo >> 6) & MASK8; /* 12 */ - if (counts[masked]) { - assert(pointers[masked] > positions[masked]); - debug(printf("Storing masked %u at %u\n",masked,chrpos - 19)); - table[--pointers[masked]] = chrpos - 19; - } + masked = (high_rev >> 12) & MASK7; /* 3 */ + INCR_COUNT(counts[masked]); + debug(printf("3 %04X => %d\n",masked,counts[masked])); - masked = (oligo >> 8) & MASK8; /* 11 */ - if (counts[masked]) { - assert(pointers[masked] > positions[masked]); - debug(printf("Storing masked %u at %u\n",masked,chrpos - 20)); - table[--pointers[masked]] = chrpos - 20; - } + masked = (high_rev >> 14) & MASK7; /* 2 */ + INCR_COUNT(counts[masked]); + debug(printf("2 %04X => %d\n",masked,counts[masked])); - masked = (oligo >> 10) & MASK8; /* 10 */ - if (counts[masked]) { - assert(pointers[masked] > positions[masked]); - debug(printf("Storing masked %u at %u\n",masked,chrpos - 21)); - table[--pointers[masked]] = chrpos - 21; - } + masked = (high_rev >> 16) & MASK7; /* 1 */ + INCR_COUNT(counts[masked]); + debug(printf("1 %04X => %d\n",masked,counts[masked])); - masked = (oligo >> 12) & MASK8; /* 9 */ - if (counts[masked]) { - assert(pointers[masked] > positions[masked]); - debug(printf("Storing masked %u at %u\n",masked,chrpos - 22)); - table[--pointers[masked]] = chrpos - 22; - } + masked = high_rev >> 18; /* 0, No mask necessary */ + INCR_COUNT(counts[masked]); + debug(printf("0 %04X => %d\n",masked,counts[masked])); #else - _oligo = _mm_setr_epi32(oligo, oligo >> 2, oligo >> 4, oligo >> 6); + _oligo = _mm_setr_epi32(high_rev, high_rev >> 2, high_rev >> 4, high_rev >> 6); #ifdef SIMD_MASK_THEN_STORE - _mm_store_si128((__m128i *) _masked,_mm_and_si128(_oligo, mask8)); + _mm_store_si128((__m128i *) _masked,_mm_and_si128(_oligo, mask7)); #else - _masked = _mm_and_si128(_oligo, mask8); + _masked = _mm_and_si128(_oligo, mask7); #endif masked = EXTRACT(_masked,0); - if (counts[masked]) { - assert(pointers[masked] > positions[masked]); - debug(printf("Storing masked %u at %u\n",masked,chrpos - 16)); - table[--pointers[masked]] = chrpos - 16; - } + INCR_COUNT(counts[masked]); + debug(printf("9 %04X => %d\n",masked,counts[masked])); masked = EXTRACT(_masked,1); - if (counts[masked]) { - assert(pointers[masked] > positions[masked]); - debug(printf("Storing masked %u at %u\n",masked,chrpos - 17)); - table[--pointers[masked]] = chrpos - 17; - } + INCR_COUNT(counts[masked]); + debug(printf("8 %04X => %d\n",masked,counts[masked])); masked = EXTRACT(_masked,2); - if (counts[masked]) { - assert(pointers[masked] > positions[masked]); - debug(printf("Storing masked %u at %u\n",masked,chrpos - 18)); - table[--pointers[masked]] = chrpos - 18; - } + INCR_COUNT(counts[masked]); + debug(printf("7 %04X => %d\n",masked,counts[masked])); masked = EXTRACT(_masked,3); - if (counts[masked]) { - assert(pointers[masked] > positions[masked]); - debug(printf("Storing masked %u at %u\n",masked,chrpos - 19)); - table[--pointers[masked]] = chrpos - 19; - } + INCR_COUNT(counts[masked]); + debug(printf("6 %04X => %d\n",masked,counts[masked])); _oligo = _mm_srli_epi32(_oligo, 8); #ifdef SIMD_MASK_THEN_STORE - _mm_store_si128((__m128i *) _masked,_mm_and_si128(_oligo, mask8)); + _mm_store_si128((__m128i *) _masked,_mm_and_si128(_oligo, mask7)); #else - _masked = _mm_and_si128(_oligo, mask8); + _masked = _mm_and_si128(_oligo, mask7); #endif masked = EXTRACT(_masked,0); - if (counts[masked]) { - assert(pointers[masked] > positions[masked]); - debug(printf("Storing masked %u at %u\n",masked,chrpos - 20)); - table[--pointers[masked]] = chrpos - 20; - } + INCR_COUNT(counts[masked]); + debug(printf("5 %04X => %d\n",masked,counts[masked])); masked = EXTRACT(_masked,1); - if (counts[masked]) { - assert(pointers[masked] > positions[masked]); - debug(printf("Storing masked %u at %u\n",masked,chrpos - 21)); - table[--pointers[masked]] = chrpos - 21; - } + INCR_COUNT(counts[masked]); + debug(printf("4 %04X => %d\n",masked,counts[masked])); masked = EXTRACT(_masked,2); - if (counts[masked]) { - assert(pointers[masked] > positions[masked]); - debug(printf("Storing masked %u at %u\n",masked,chrpos - 22)); - table[--pointers[masked]] = chrpos - 22; - } -#endif + INCR_COUNT(counts[masked]); + debug(printf("3 %04X => %d\n",masked,counts[masked])); + masked = EXTRACT(_masked,3); + INCR_COUNT(counts[masked]); + debug(printf("2 %04X => %d\n",masked,counts[masked])); -#ifdef INDIVIDUAL_SHIFTS - masked = high_rev & MASK8; /* 8 */ - if (counts[masked]) { - assert(pointers[masked] > positions[masked]); - debug(printf("Storing masked %u at %u\n",masked,chrpos - 23)); - table[--pointers[masked]] = chrpos - 23; - } - masked = (high_rev >> 2) & MASK8; /* 7 */ - if (counts[masked]) { - assert(pointers[masked] > positions[masked]); - debug(printf("Storing masked %u at %u\n",masked,chrpos - 24)); - table[--pointers[masked]] = chrpos - 24; - } + _oligo = _mm_srli_epi32(_oligo, 8); +#ifdef SIMD_MASK_THEN_STORE + _mm_store_si128((__m128i *) _masked,_mm_and_si128(_oligo, mask7)); +#else + _masked = _mm_and_si128(_oligo, mask7); +#endif - masked = (high_rev >> 4) & MASK8; /* 6 */ - if (counts[masked]) { - assert(pointers[masked] > positions[masked]); - debug(printf("Storing masked %u at %u\n",masked,chrpos - 25)); - table[--pointers[masked]] = chrpos - 25; - } + masked = EXTRACT(_masked,0); + INCR_COUNT(counts[masked]); + debug(printf("1 %04X => %d\n",masked,counts[masked])); - masked = (high_rev >> 6) & MASK8; /* 5 */ - if (counts[masked]) { - assert(pointers[masked] > positions[masked]); - debug(printf("Storing masked %u at %u\n",masked,chrpos - 26)); - table[--pointers[masked]] = chrpos - 26; - } + masked = EXTRACT(_masked,1); + INCR_COUNT(counts[masked]); + debug(printf("0 %04X => %d\n",masked,counts[masked])); +#endif - masked = (high_rev >> 8) & MASK8; /* 4 */ - if (counts[masked]) { - assert(pointers[masked] > positions[masked]); - debug(printf("Storing masked %u at %u\n",masked,chrpos - 27)); - table[--pointers[masked]] = chrpos - 27; - } + return; +} - masked = (high_rev >> 10) & MASK8; /* 3 */ - if (counts[masked]) { - assert(pointers[masked] > positions[masked]); - debug(printf("Storing masked %u at %u\n",masked,chrpos - 28)); - table[--pointers[masked]] = chrpos - 28; - } +#else /* HAVE_AVX2 */ - masked = (high_rev >> 12) & MASK8; /* 2 */ - if (counts[masked]) { - assert(pointers[masked] > positions[masked]); - debug(printf("Storing masked %u at %u\n",masked,chrpos - 29)); - table[--pointers[masked]] = chrpos - 29; - } +static void +count_7mers_fwd_32 (Count_T *counts, Genomecomp_T high_rev, Genomecomp_T low_rev, Genomecomp_T nexthigh_rev) { + Genomecomp_T masked, oligo; + __m256i _oligo, _masked; - masked = (high_rev >> 14) & MASK8; /* 1 */ - if (counts[masked]) { - assert(pointers[masked] > positions[masked]); - debug(printf("Storing masked %u at %u\n",masked,chrpos - 30)); - table[--pointers[masked]] = chrpos - 30; - } - masked = high_rev >> 16; /* 0, No mask necessary */ - if (counts[masked]) { - assert(pointers[masked] > positions[masked]); - debug(printf("Storing masked %u at %u\n",masked,chrpos - 31)); - table[--pointers[masked]] = chrpos - 31; - } + oligo = nexthigh_rev >> 20; /* For 31..26 */ + oligo |= low_rev << 12; -#else - _oligo = _mm_setr_epi32(high_rev, high_rev >> 2, high_rev >> 4, high_rev >> 6); -#ifdef SIMD_MASK_THEN_STORE - _mm_store_si128((__m128i *) _masked,_mm_and_si128(_oligo, mask8)); -#else - _masked = _mm_and_si128(_oligo, mask8); -#endif + _oligo = _mm256_srlv_epi32(_mm256_set1_epi32(oligo),bigshift0to14); + _masked = _mm256_and_si256(_oligo, bigmask7); - masked = EXTRACT(_masked,0); - if (counts[masked]) { - assert(pointers[masked] > positions[masked]); - debug(printf("Storing masked %u at %u\n",masked,chrpos - 23)); - table[--pointers[masked]] = chrpos - 23; - } + masked = EXTRACT256(_masked,0); + INCR_COUNT(counts[masked]); + debug(printf("31 %04X => %d\n",masked,counts[masked])); - masked = EXTRACT(_masked,1); - if (counts[masked]) { - assert(pointers[masked] > positions[masked]); - debug(printf("Storing masked %u at %u\n",masked,chrpos - 24)); - table[--pointers[masked]] = chrpos - 24; - } + masked = EXTRACT256(_masked,1); + INCR_COUNT(counts[masked]); + debug(printf("30 %04X => %d\n",masked,counts[masked])); - masked = EXTRACT(_masked,2); - if (counts[masked]) { - assert(pointers[masked] > positions[masked]); - debug(printf("Storing masked %u at %u\n",masked,chrpos - 25)); - table[--pointers[masked]] = chrpos - 25; - } + masked = EXTRACT256(_masked,2); + INCR_COUNT(counts[masked]); + debug(printf("29 %04X => %d\n",masked,counts[masked])); - masked = EXTRACT(_masked,3); - if (counts[masked]) { - assert(pointers[masked] > positions[masked]); - debug(printf("Storing masked %u at %u\n",masked,chrpos - 26)); - table[--pointers[masked]] = chrpos - 26; - } + masked = EXTRACT256(_masked,3); + INCR_COUNT(counts[masked]); + debug(printf("28 %04X => %d\n",masked,counts[masked])); + masked = EXTRACT256(_masked,4); + INCR_COUNT(counts[masked]); + debug(printf("27 %04X => %d\n",masked,counts[masked])); - _oligo = _mm_srli_epi32(_oligo, 8); -#ifdef SIMD_MASK_THEN_STORE - _mm_store_si128((__m128i *) _masked,_mm_and_si128(_oligo, mask8)); -#else - _masked = _mm_and_si128(_oligo, mask8); -#endif + masked = EXTRACT256(_masked,5); + INCR_COUNT(counts[masked]); + debug(printf("26 %04X => %d\n",masked,counts[masked])); - masked = EXTRACT(_masked,0); - if (counts[masked]) { - assert(pointers[masked] > positions[masked]); - debug(printf("Storing masked %u at %u\n",masked,chrpos - 27)); - table[--pointers[masked]] = chrpos - 27; - } - masked = EXTRACT(_masked,1); - if (counts[masked]) { - assert(pointers[masked] > positions[masked]); - debug(printf("Storing masked %u at %u\n",masked,chrpos - 28)); - table[--pointers[masked]] = chrpos - 28; - } + _oligo = _mm256_srlv_epi32(_mm256_set1_epi32(low_rev),bigshift0to14); + _masked = _mm256_and_si256(_oligo, bigmask7); - masked = EXTRACT(_masked,2); - if (counts[masked]) { - assert(pointers[masked] > positions[masked]); - debug(printf("Storing masked %u at %u\n",masked,chrpos - 29)); - table[--pointers[masked]] = chrpos - 29; - } + masked = EXTRACT256(_masked,0); + INCR_COUNT(counts[masked]); + debug(printf("25 %04X => %d\n",masked,counts[masked])); - masked = EXTRACT(_masked,3); - if (counts[masked]) { - assert(pointers[masked] > positions[masked]); - debug(printf("Storing masked %u at %u\n",masked,chrpos - 30)); - table[--pointers[masked]] = chrpos - 30; - } + masked = EXTRACT256(_masked,1); + INCR_COUNT(counts[masked]); + debug(printf("24 %04X => %d\n",masked,counts[masked])); - masked = high_rev >> 16; /* 0, No mask necessary */ - if (counts[masked]) { - assert(pointers[masked] > positions[masked]); - debug(printf("Storing masked %u at %u\n",masked,chrpos - 31)); - table[--pointers[masked]] = chrpos - 31; - } -#endif + masked = EXTRACT256(_masked,2); + INCR_COUNT(counts[masked]); + debug(printf("23 %04X => %d\n",masked,counts[masked])); - return chrpos - 32; -} + masked = EXTRACT256(_masked,3); + INCR_COUNT(counts[masked]); + debug(printf("22 %04X => %d\n",masked,counts[masked])); -#else /* HAVE_AVX2 */ + masked = EXTRACT256(_masked,4); + INCR_COUNT(counts[masked]); + debug(printf("21 %04X => %d\n",masked,counts[masked])); -static int -store_8mers_fwd (Chrpos_T chrpos, Chrpos_T *table, UINT4 *pointers, UINT4 *positions, Count_T *counts, - Genomecomp_T high_rev, Genomecomp_T low_rev, Genomecomp_T nexthigh_rev) { - Genomecomp_T masked, oligo; - __m256i _oligo, _masked, _counts; + masked = EXTRACT256(_masked,5); + INCR_COUNT(counts[masked]); + debug(printf("20 %04X => %d\n",masked,counts[masked])); + masked = EXTRACT256(_masked,6); + INCR_COUNT(counts[masked]); + debug(printf("19 %04X => %d\n",masked,counts[masked])); - oligo = nexthigh_rev >> 18; /* For 31..25 */ - oligo |= low_rev << 14; + masked = EXTRACT256(_masked,7); + INCR_COUNT(counts[masked]); + debug(printf("18 %04X => %d\n",masked,counts[masked])); - _oligo = _mm256_srlv_epi32(_mm256_set1_epi32(oligo),shift0to14); - _masked = _mm256_and_si256(_oligo, bigmask8); - _counts = _mm256_i32gather_epi32(counts,_masked,/*scale*/4); - if (EXTRACT256(_counts,0)) { - masked = EXTRACT256(_masked,0); - assert(pointers[masked] > positions[masked]); - debug(printf("Storing masked %u at %u\n",masked,chrpos)); - table[--pointers[masked]] = chrpos; - } + _oligo = _mm256_srli_epi32(_oligo, 16); + _masked = _mm256_and_si256(_oligo, bigmask7); - if (EXTRACT256(_counts,1)) { - masked = EXTRACT256(_masked,1); - assert(pointers[masked] > positions[masked]); - debug(printf("Storing masked %u at %u\n",masked,chrpos - 1)); - table[--pointers[masked]] = chrpos - 1; - } + masked = EXTRACT256(_masked,0); + INCR_COUNT(counts[masked]); + debug(printf("17 %04X => %d\n",masked,counts[masked])); - if (EXTRACT256(_counts,2)) { - masked = EXTRACT256(_masked,2); - assert(pointers[masked] > positions[masked]); - debug(printf("Storing masked %u at %u\n",masked,chrpos - 2)); - table[--pointers[masked]] = chrpos - 2; - } + masked = EXTRACT256(_masked,1); + INCR_COUNT(counts[masked]); + debug(printf("16 %04X => %d\n",masked,counts[masked])); - if (EXTRACT256(_counts,3)) { - masked = EXTRACT256(_masked,3); - assert(pointers[masked] > positions[masked]); - debug(printf("Storing masked %u at %u\n",masked,chrpos - 3)); - table[--pointers[masked]] = chrpos - 3; - } - if (EXTRACT256(_counts,4)) { - masked = EXTRACT256(_masked,4); - assert(pointers[masked] > positions[masked]); - debug(printf("Storing masked %u at %u\n",masked,chrpos - 4)); - table[--pointers[masked]] = chrpos - 4; - } - if (EXTRACT256(_counts,5)) { - masked = EXTRACT256(_masked,5); - assert(pointers[masked] > positions[masked]); - debug(printf("Storing masked %u at %u\n",masked,chrpos - 5)); - table[--pointers[masked]] = chrpos - 5; - } + oligo = low_rev >> 20; /* For 15..10 */ + oligo |= high_rev << 12; - if (EXTRACT256(_counts,6)) { - masked = EXTRACT256(_masked,6); - assert(pointers[masked] > positions[masked]); - debug(printf("Storing masked %u at %u\n",masked,chrpos - 6)); - table[--pointers[masked]] = chrpos - 6; - } + _oligo = _mm256_srlv_epi32(_mm256_set1_epi32(oligo),bigshift0to14); + _masked = _mm256_and_si256(_oligo, bigmask7); + masked = EXTRACT256(_masked,0); + INCR_COUNT(counts[masked]); + debug(printf("15 %04X => %d\n",masked,counts[masked])); - _oligo = _mm256_srlv_epi32(_mm256_set1_epi32(low_rev),shift0to14); - _masked = _mm256_and_si256(_oligo, bigmask8); - _counts = _mm256_i32gather_epi32(counts,_masked,/*scale*/4); + masked = EXTRACT256(_masked,1); + INCR_COUNT(counts[masked]); + debug(printf("14 %04X => %d\n",masked,counts[masked])); - if (EXTRACT256(_counts,0)) { - masked = EXTRACT256(_masked,0); - assert(pointers[masked] > positions[masked]); - debug(printf("Storing masked %u at %u\n",masked,chrpos - 7)); - table[--pointers[masked]] = chrpos - 7; - } + masked = EXTRACT256(_masked,2); + INCR_COUNT(counts[masked]); + debug(printf("13 %04X => %d\n",masked,counts[masked])); - if (EXTRACT256(_counts,1)) { - masked = EXTRACT256(_masked,1); - assert(pointers[masked] > positions[masked]); - debug(printf("Storing masked %u at %u\n",masked,chrpos - 8)); - table[--pointers[masked]] = chrpos - 8; - } + masked = EXTRACT256(_masked,3); + INCR_COUNT(counts[masked]); + debug(printf("12 %04X => %d\n",masked,counts[masked])); - if (EXTRACT256(_counts,2)) { - masked = EXTRACT256(_masked,2); - assert(pointers[masked] > positions[masked]); - debug(printf("Storing masked %u at %u\n",masked,chrpos - 9)); - table[--pointers[masked]] = chrpos - 9; - } + masked = EXTRACT256(_masked,4); + INCR_COUNT(counts[masked]); + debug(printf("11 %04X => %d\n",masked,counts[masked])); - if (EXTRACT256(_counts,3)) { - masked = EXTRACT256(_masked,3); - assert(pointers[masked] > positions[masked]); - debug(printf("Storing masked %u at %u\n",masked,chrpos - 10)); - table[--pointers[masked]] = chrpos - 10; - } + masked = EXTRACT256(_masked,5); + INCR_COUNT(counts[masked]); + debug(printf("10 %04X => %d\n",masked,counts[masked])); - if (EXTRACT256(_counts,4)) { - masked = EXTRACT256(_masked,4); - assert(pointers[masked] > positions[masked]); - debug(printf("Storing masked %u at %u\n",masked,chrpos - 11)); - table[--pointers[masked]] = chrpos - 11; - } - if (EXTRACT256(_counts,5)) { - masked = EXTRACT256(_masked,5); - assert(pointers[masked] > positions[masked]); - debug(printf("Storing masked %u at %u\n",masked,chrpos - 12)); - table[--pointers[masked]] = chrpos - 12; - } + _oligo = _mm256_srlv_epi32(_mm256_set1_epi32(high_rev),bigshift0to14); + _masked = _mm256_and_si256(_oligo, bigmask7); - if (EXTRACT256(_counts,6)) { - masked = EXTRACT256(_masked,6); - assert(pointers[masked] > positions[masked]); - debug(printf("Storing masked %u at %u\n",masked,chrpos - 13)); - table[--pointers[masked]] = chrpos - 13; - } + masked = EXTRACT256(_masked,0); + INCR_COUNT(counts[masked]); + debug(printf("9 %04X => %d\n",masked,counts[masked])); - if (EXTRACT256(_counts,7)) { - masked = EXTRACT256(_masked,7); - assert(pointers[masked] > positions[masked]); - debug(printf("Storing masked %u at %u\n",masked,chrpos - 14)); - table[--pointers[masked]] = chrpos - 14; - } + masked = EXTRACT256(_masked,1); + INCR_COUNT(counts[masked]); + debug(printf("8 %04X => %d\n",masked,counts[masked])); + masked = EXTRACT256(_masked,2); + INCR_COUNT(counts[masked]); + debug(printf("7 %04X => %d\n",masked,counts[masked])); - masked = low_rev >> 16; /* 16, No mask necessary */ - if (counts[masked]) { - assert(pointers[masked] > positions[masked]); - debug(printf("Storing masked %u at %u\n",masked,chrpos - 15)); - table[--pointers[masked]] = chrpos - 15; - } + masked = EXTRACT256(_masked,3); + INCR_COUNT(counts[masked]); + debug(printf("6 %04X => %d\n",masked,counts[masked])); + masked = EXTRACT256(_masked,4); + INCR_COUNT(counts[masked]); + debug(printf("5 %04X => %d\n",masked,counts[masked])); + masked = EXTRACT256(_masked,5); + INCR_COUNT(counts[masked]); + debug(printf("4 %04X => %d\n",masked,counts[masked])); - oligo = low_rev >> 18; /* For 9..15 */ - oligo |= high_rev << 14; + masked = EXTRACT256(_masked,6); + INCR_COUNT(counts[masked]); + debug(printf("3 %04X => %d\n",masked,counts[masked])); - _oligo = _mm256_srlv_epi32(_mm256_set1_epi32(oligo),shift0to14); - _masked = _mm256_and_si256(_oligo, bigmask8); - _counts = _mm256_i32gather_epi32(counts,_masked,/*scale*/4); + masked = EXTRACT256(_masked,7); + INCR_COUNT(counts[masked]); + debug(printf("2 %04X => %d\n",masked,counts[masked])); - if (EXTRACT256(_counts,0)) { - masked = EXTRACT256(_masked,0); - assert(pointers[masked] > positions[masked]); - debug(printf("Storing masked %u at %u\n",masked,chrpos - 16)); - table[--pointers[masked]] = chrpos - 16; - } - if (EXTRACT256(_counts,1)) { - masked = EXTRACT256(_masked,1); - assert(pointers[masked] > positions[masked]); - debug(printf("Storing masked %u at %u\n",masked,chrpos - 17)); - table[--pointers[masked]] = chrpos - 17; - } + _oligo = _mm256_srli_epi32(_oligo, 16); + _masked = _mm256_and_si256(_oligo, bigmask7); - if (EXTRACT256(_counts,2)) { - masked = EXTRACT256(_masked,2); - assert(pointers[masked] > positions[masked]); - debug(printf("Storing masked %u at %u\n",masked,chrpos - 18)); - table[--pointers[masked]] = chrpos - 18; - } + masked = EXTRACT256(_masked,0); + INCR_COUNT(counts[masked]); + debug(printf("1 %04X => %d\n",masked,counts[masked])); - if (EXTRACT256(_counts,3)) { - masked = EXTRACT256(_masked,3); - assert(pointers[masked] > positions[masked]); - debug(printf("Storing masked %u at %u\n",masked,chrpos - 19)); - table[--pointers[masked]] = chrpos - 19; - } + masked = EXTRACT256(_masked,1); + INCR_COUNT(counts[masked]); + debug(printf("0 %04X => %d\n",masked,counts[masked])); - if (EXTRACT256(_counts,4)) { - masked = EXTRACT256(_masked,4); - assert(pointers[masked] > positions[masked]); - debug(printf("Storing masked %u at %u\n",masked,chrpos - 20)); - table[--pointers[masked]] = chrpos - 20; - } + return; +} - if (EXTRACT256(_counts,5)) { - masked = EXTRACT256(_masked,5); - assert(pointers[masked] > positions[masked]); - debug(printf("Storing masked %u at %u\n",masked,chrpos - 21)); - table[--pointers[masked]] = chrpos - 21; - } +#endif /* HAVE_AVX2 */ - if (EXTRACT256(_counts,6)) { - masked = EXTRACT256(_masked,6); - assert(pointers[masked] > positions[masked]); - debug(printf("Storing masked %u at %u\n",masked,chrpos - 22)); - table[--pointers[masked]] = chrpos - 22; - } - _oligo = _mm256_srlv_epi32(_mm256_set1_epi32(high_rev),shift0to14); - _masked = _mm256_and_si256(_oligo, bigmask8); - _counts = _mm256_i32gather_epi32(counts,_masked,/*scale*/4); +/* Expecting current to have {high0_rev, low0_rev, high1_rev, + low1_rev}, and next to have {low0_rev, high1_rev, low1_rev, and + high2_rev} */ +#ifdef HAVE_SSE2 +static void +extract_7mers_fwd_simd_64 (__m128i *out, __m128i current, __m128i next) { + __m128i oligo; - if (EXTRACT256(_counts,0)) { - masked = EXTRACT256(_masked,0); - assert(pointers[masked] > positions[masked]); - debug(printf("Storing masked %u at %u\n",masked,chrpos - 23)); - table[--pointers[masked]] = chrpos - 23; - } + _mm_store_si128(out++, _mm_srli_epi32(current,18)); /* No mask necessary */ + _mm_store_si128(out++, _mm_and_si128( _mm_srli_epi32(current,16), mask7)); + _mm_store_si128(out++, _mm_and_si128( _mm_srli_epi32(current,14), mask7)); + _mm_store_si128(out++, _mm_and_si128( _mm_srli_epi32(current,12), mask7)); + _mm_store_si128(out++, _mm_and_si128( _mm_srli_epi32(current,10), mask7)); + _mm_store_si128(out++, _mm_and_si128( _mm_srli_epi32(current,8), mask7)); + _mm_store_si128(out++, _mm_and_si128( _mm_srli_epi32(current,6), mask7)); + _mm_store_si128(out++, _mm_and_si128( _mm_srli_epi32(current,4), mask7)); + _mm_store_si128(out++, _mm_and_si128( _mm_srli_epi32(current,2), mask7)); + _mm_store_si128(out++, _mm_and_si128( current, mask7)); - if (EXTRACT256(_counts,1)) { - masked = EXTRACT256(_masked,1); - assert(pointers[masked] > positions[masked]); - debug(printf("Storing masked %u at %u\n",masked,chrpos - 24)); - table[--pointers[masked]] = chrpos - 24; - } + oligo = _mm_or_si128( _mm_srli_epi32(next,20), _mm_slli_epi32(current,12)); + _mm_store_si128(out++, _mm_and_si128( _mm_srli_epi32(oligo,10), mask7)); + _mm_store_si128(out++, _mm_and_si128( _mm_srli_epi32(oligo,8), mask7)); + _mm_store_si128(out++, _mm_and_si128( _mm_srli_epi32(oligo,6), mask7)); + _mm_store_si128(out++, _mm_and_si128( _mm_srli_epi32(oligo,4), mask7)); + _mm_store_si128(out++, _mm_and_si128( _mm_srli_epi32(oligo,2), mask7)); + _mm_store_si128(out++, _mm_and_si128( oligo, mask7)); - if (EXTRACT256(_counts,2)) { - masked = EXTRACT256(_masked,2); - assert(pointers[masked] > positions[masked]); - debug(printf("Storing masked %u at %u\n",masked,chrpos - 25)); - table[--pointers[masked]] = chrpos - 25; - } + return; +} - if (EXTRACT256(_counts,3)) { - masked = EXTRACT256(_masked,3); - assert(pointers[masked] > positions[masked]); - debug(printf("Storing masked %u at %u\n",masked,chrpos - 26)); - table[--pointers[masked]] = chrpos - 26; - } +#ifdef USE_UNORDERED_7 +static Chrpos_T +store_7mers_fwd_simd_64 (Chrpos_T chrpos, Chrpos_T *table, UINT4 *positions, Count_T *counts, + __m128i current, __m128i next) { + __m128i array[16]; + + extract_7mers_fwd_simd_64(array,current,next); + return store_fwdrev_simd_64(chrpos,table,positions,counts,(UINT4 *) array); +} - if (EXTRACT256(_counts,4)) { - masked = EXTRACT256(_masked,4); - assert(pointers[masked] > positions[masked]); - debug(printf("Storing masked %u at %u\n",masked,chrpos - 27)); - table[--pointers[masked]] = chrpos - 27; - } +#else +/* Includes extract_7mers_fwd_simd_64_ordered (__m128i *out, __m128i current, __m128i next) */ +static Chrpos_T +store_7mers_fwd_simd_64 (Chrpos_T chrpos, Chrpos_T *table, UINT4 *positions, Count_T *counts, + __m128i current, __m128i next) { + __m128i array[16], *out; + __m128i oligo; + __m128i _t0, _t1, _t2, _t3, _t4, _t5, _t6, _t7; + __m128i _u0, _u1, _u2, _u3; - if (EXTRACT256(_counts,5)) { - masked = EXTRACT256(_masked,5); - assert(pointers[masked] > positions[masked]); - debug(printf("Storing masked %u at %u\n",masked,chrpos - 28)); - table[--pointers[masked]] = chrpos - 28; - } + out = &(array[0]); - if (EXTRACT256(_counts,6)) { - masked = EXTRACT256(_masked,6); - assert(pointers[masked] > positions[masked]); - debug(printf("Storing masked %u at %u\n",masked,chrpos - 29)); - table[--pointers[masked]] = chrpos - 29; - } + oligo = _mm_or_si128( _mm_srli_epi32(next,20), _mm_slli_epi32(current,12)); + /* _row0 = _mm_and_si128( oligo, mask7); */ + /* _row1 = _mm_and_si128( _mm_srli_epi32(oligo,2), mask7); */ + _t0 = _mm_and_si128(_mm_blend_epi16(_mm_slli_epi32(oligo,14), oligo, 0x55), mask7_epi16); + + /* _row2 = _mm_and_si128( _mm_srli_epi32(oligo,4), mask7); */ + /* _row3 = _mm_and_si128( _mm_srli_epi32(oligo,6), mask7); */ + _t1 = _mm_and_si128(_mm_blend_epi16(_mm_slli_epi32(oligo,10), _mm_srli_epi32(oligo, 4), 0x55), mask7_epi16); + + /* _row4 = _mm_and_si128( _mm_srli_epi32(oligo,8), mask7); */ + /* _row5 = _mm_and_si128( _mm_srli_epi32(oligo,10), mask7); */ + _t2 = _mm_and_si128(_mm_blend_epi16(_mm_slli_epi32(oligo,6), _mm_srli_epi32(oligo, 8), 0x55), mask7_epi16); + + + /* _row6 = _mm_and_si128( current, mask7); */ + /* _row7 = _mm_and_si128( _mm_srli_epi32(current,2), mask7); */ + _t3 = _mm_and_si128(_mm_blend_epi16(_mm_slli_epi32(current,14), current, 0x55), mask7_epi16); + + /* _row8 = _mm_and_si128( _mm_srli_epi32(current,4), mask7); */ + /* _row9 = _mm_and_si128( _mm_srli_epi32(current,6), mask7); */ + _t4 = _mm_and_si128(_mm_blend_epi16(_mm_slli_epi32(current,10), _mm_srli_epi32(current, 4), 0x55), mask7_epi16); + + /* _row10 = _mm_and_si128( _mm_srli_epi32(current,8), mask7); */ + /* _row11 = _mm_and_si128( _mm_srli_epi32(current,10), mask7); */ + _t5 = _mm_and_si128(_mm_blend_epi16(_mm_slli_epi32(current,6), _mm_srli_epi32(current, 8), 0x55), mask7_epi16); + + /* _row12 = _mm_and_si128( _mm_srli_epi32(current,12), mask7); */ + /* _row13 = _mm_and_si128( _mm_srli_epi32(current,14), mask7); */ + _t6 = _mm_and_si128(_mm_blend_epi16(_mm_slli_epi32(current,2), _mm_srli_epi32(current, 12), 0x55), mask7_epi16); + + /* _row14 = _mm_and_si128( _mm_srli_epi32(current,16), mask7); */ + /* _row15 = _mm_srli_epi32(current,18); */ /* No mask necessary */ + _t7 = _mm_and_si128(_mm_blend_epi16(_mm_srli_epi32(current,2), _mm_srli_epi32(current, 16), 0x55), mask7_epi16); + + + /* Split: top half */ + _u0 = _mm_unpackhi_epi32(_t0,_t1); + _u1 = _mm_unpackhi_epi32(_t2,_t3); + _u2 = _mm_unpackhi_epi32(_t4,_t5); + _u3 = _mm_unpackhi_epi32(_t6,_t7); + + _mm_store_si128(out++, _mm_cvtepu16_epi32(_mm_srli_si128(_u0,8))); + _mm_store_si128(out++, _mm_cvtepu16_epi32(_mm_srli_si128(_u1,8))); + _mm_store_si128(out++, _mm_cvtepu16_epi32(_mm_srli_si128(_u2,8))); + _mm_store_si128(out++, _mm_cvtepu16_epi32(_mm_srli_si128(_u3,8))); + _mm_store_si128(out++, _mm_cvtepu16_epi32(_u0)); + _mm_store_si128(out++, _mm_cvtepu16_epi32(_u1)); + _mm_store_si128(out++, _mm_cvtepu16_epi32(_u2)); + _mm_store_si128(out++, _mm_cvtepu16_epi32(_u3)); + + /* Split: bottom half */ + _u0 = _mm_unpacklo_epi32(_t0,_t1); + _u1 = _mm_unpacklo_epi32(_t2,_t3); + _u2 = _mm_unpacklo_epi32(_t4,_t5); + _u3 = _mm_unpacklo_epi32(_t6,_t7); + + _mm_store_si128(out++, _mm_cvtepu16_epi32(_mm_srli_si128(_u0,8))); + _mm_store_si128(out++, _mm_cvtepu16_epi32(_mm_srli_si128(_u1,8))); + _mm_store_si128(out++, _mm_cvtepu16_epi32(_mm_srli_si128(_u2,8))); + _mm_store_si128(out++, _mm_cvtepu16_epi32(_mm_srli_si128(_u3,8))); + _mm_store_si128(out++, _mm_cvtepu16_epi32(_u0)); + _mm_store_si128(out++, _mm_cvtepu16_epi32(_u1)); + _mm_store_si128(out++, _mm_cvtepu16_epi32(_u2)); + _mm_store_si128(out++, _mm_cvtepu16_epi32(_u3)); - if (EXTRACT256(_counts,7)) { - masked = EXTRACT256(_masked,7); - assert(pointers[masked] > positions[masked]); - debug(printf("Storing masked %u at %u\n",masked,chrpos - 30)); - table[--pointers[masked]] = chrpos - 30; - } + return store_fwdrev_simd_64_ordered(chrpos,table,positions,counts,(UINT4 *) array); +} +#endif +#endif - masked = high_rev >> 16; /* 0, No mask necessary */ - if (counts[masked]) { - assert(pointers[masked] > positions[masked]); - debug(printf("Storing masked %u at %u\n",masked,chrpos - 31)); - table[--pointers[masked]] = chrpos - 31; - } +#ifdef HAVE_AVX2 +static void +extract_7mers_fwd_simd_128 (__m256i *out, __m256i current, __m256i next) { + __m256i oligo; - return chrpos - 32; + _mm256_store_si256(out++, _mm256_srli_epi32(current,18)); /* No mask necessary */ + _mm256_store_si256(out++, _mm256_and_si256( _mm256_srli_epi32(current,16), bigmask7)); + _mm256_store_si256(out++, _mm256_and_si256( _mm256_srli_epi32(current,14), bigmask7)); + _mm256_store_si256(out++, _mm256_and_si256( _mm256_srli_epi32(current,12), bigmask7)); + _mm256_store_si256(out++, _mm256_and_si256( _mm256_srli_epi32(current,10), bigmask7)); + _mm256_store_si256(out++, _mm256_and_si256( _mm256_srli_epi32(current,8), bigmask7)); + _mm256_store_si256(out++, _mm256_and_si256( _mm256_srli_epi32(current,6), bigmask7)); + _mm256_store_si256(out++, _mm256_and_si256( _mm256_srli_epi32(current,4), bigmask7)); + _mm256_store_si256(out++, _mm256_and_si256( _mm256_srli_epi32(current,2), bigmask7)); + _mm256_store_si256(out++, _mm256_and_si256( current, bigmask7)); + + oligo = _mm256_or_si256( _mm256_srli_epi32(next,20), _mm256_slli_epi32(current,12)); + _mm256_store_si256(out++, _mm256_and_si256( _mm256_srli_epi32(oligo,10), bigmask7)); + _mm256_store_si256(out++, _mm256_and_si256( _mm256_srli_epi32(oligo,8), bigmask7)); + _mm256_store_si256(out++, _mm256_and_si256( _mm256_srli_epi32(oligo,6), bigmask7)); + _mm256_store_si256(out++, _mm256_and_si256( _mm256_srli_epi32(oligo,4), bigmask7)); + _mm256_store_si256(out++, _mm256_and_si256( _mm256_srli_epi32(oligo,2), bigmask7)); + _mm256_store_si256(out++, _mm256_and_si256( oligo, bigmask7)); + + return; } -#endif /* HAVE_AVX2 */ +#ifdef USE_UNORDERED_7 +static Chrpos_T +store_7mers_fwd_simd_128 (Chrpos_T chrpos, Chrpos_T *table, UINT4 *positions, Count_T *counts, + __m256i current, __m256i next) { + __m256i array[16]; + + extract_7mers_fwd_simd_128(array,current,next); + return store_fwdrev_simd_128(chrpos,table,positions,counts,(UINT4 *) array); +} + +#else +/* Includes extract_7mers_fwd_simd_128_ordered (__m256i *out, __m256i current, __m256i next) */ +static Chrpos_T +store_7mers_fwd_simd_128 (Chrpos_T chrpos, Chrpos_T *table, UINT4 *positions, Count_T *counts, + __m256i current, __m256i next) { + __m256i array[16], *out; + __m256i oligo; + __m256i _t0, _t1, _t2, _t3, _t4, _t5, _t6, _t7; + __m256i _u0, _u1, _u2, _u3, _u4, _u5, _u6, _u7; + + out = &(array[0]); + + oligo = _mm256_or_si256( _mm256_srli_epi32(next,20), _mm256_slli_epi32(current,12)); + /* _row0 = _mm256_and_si256( oligo, bigmask7); */ + /* _row1 = _mm256_and_si256( _mm256_srli_epi32(oligo,2), bigmask7); */ + _t0 = _mm256_and_si256(_mm256_blend_epi16(_mm256_slli_epi32(oligo,14), oligo, 0x55), bigmask7_epi16); + + /* _row2 = _mm256_and_si256( _mm256_srli_epi32(oligo,4), bigmask7); */ + /* _row3 = _mm256_and_si256( _mm256_srli_epi32(oligo,6), bigmask7); */ + _t1 = _mm256_and_si256(_mm256_blend_epi16(_mm256_slli_epi32(oligo,10), _mm256_srli_epi32(oligo,4), 0x55), bigmask7_epi16); + + /* _row4 = _mm256_and_si256( _mm256_srli_epi32(oligo,8), bigmask7); */ + /* _row5 = _mm256_and_si256( _mm256_srli_epi32(oligo,10), bigmask7); */ + _t2 = _mm256_and_si256(_mm256_blend_epi16(_mm256_slli_epi32(oligo,6), _mm256_srli_epi32(oligo,8), 0x55), bigmask7_epi16); + + + /* _row6 = _mm256_and_si256( current, bigmask7); */ + /* _row7 = _mm256_and_si256( _mm256_srli_epi32(current,2), bigmask7); */ + _t3 = _mm256_and_si256(_mm256_blend_epi16(_mm256_slli_epi32(current,14), current, 0x55), bigmask7_epi16); + + /* _row8 = _mm256_and_si256( _mm256_srli_epi32(current,4), bigmask7); */ + /* _row9 = _mm256_and_si256( _mm256_srli_epi32(current,6), bigmask7); */ + _t4 = _mm256_and_si256(_mm256_blend_epi16(_mm256_slli_epi32(current,10), _mm256_srli_epi32(current,4), 0x55), bigmask7_epi16); + + /* _row10 = _mm256_and_si256( _mm256_srli_epi32(current,8), bigmask7); */ + /* _row11 = _mm256_and_si256( _mm256_srli_epi32(current,10), bigmask7); */ + _t5 = _mm256_and_si256(_mm256_blend_epi16(_mm256_slli_epi32(current,6), _mm256_srli_epi32(current,8), 0x55), bigmask7_epi16); + + /* _row12 = _mm256_and_si256( _mm256_srli_epi32(current,12), bigmask7); */ + /* _row13 = _mm256_and_si256( _mm256_srli_epi32(current,14), bigmask7); */ + _t6 = _mm256_and_si256(_mm256_blend_epi16(_mm256_slli_epi32(current,2), _mm256_srli_epi32(current,12), 0x55), bigmask7_epi16); + + /* _row14 = _mm256_and_si256( _mm256_srli_epi32(current,16), bigmask7); */ + /* _row15 = _mm256_srli_epi32(current,18)); */ /* No mask necessary */ + _t7 = _mm256_and_si256(_mm256_blend_epi16(_mm256_srli_epi32(current,2), _mm256_srli_epi32(current,16), 0x55), bigmask7_epi16); + + + _u0 = _mm256_unpackhi_epi32(_t0,_t1); + _u1 = _mm256_unpackhi_epi32(_t2,_t3); + _u2 = _mm256_unpackhi_epi32(_t4,_t5); + _u3 = _mm256_unpackhi_epi32(_t6,_t7); + _u4 = _mm256_unpacklo_epi32(_t0,_t1); + _u5 = _mm256_unpacklo_epi32(_t2,_t3); + _u6 = _mm256_unpacklo_epi32(_t4,_t5); + _u7 = _mm256_unpacklo_epi32(_t6,_t7); + + + _t0 = _mm256_unpackhi_epi64(_u0,_u1); + _t1 = _mm256_unpackhi_epi64(_u2,_u3); + _t2 = _mm256_unpacklo_epi64(_u0,_u1); + _t3 = _mm256_unpacklo_epi64(_u2,_u3); + _t4 = _mm256_unpackhi_epi64(_u4,_u5); + _t5 = _mm256_unpackhi_epi64(_u6,_u7); + _t6 = _mm256_unpacklo_epi64(_u4,_u5); + _t7 = _mm256_unpacklo_epi64(_u6,_u7); + + + _mm256_store_si256(out++, _mm256_cvtepu16_epi32(_mm256_extracti128_si256(_t0,1))); + _mm256_store_si256(out++, _mm256_cvtepu16_epi32(_mm256_extracti128_si256(_t1,1))); + _mm256_store_si256(out++, _mm256_cvtepu16_epi32(_mm256_extracti128_si256(_t2,1))); + _mm256_store_si256(out++, _mm256_cvtepu16_epi32(_mm256_extracti128_si256(_t3,1))); + _mm256_store_si256(out++, _mm256_cvtepu16_epi32(_mm256_extracti128_si256(_t4,1))); + _mm256_store_si256(out++, _mm256_cvtepu16_epi32(_mm256_extracti128_si256(_t5,1))); + _mm256_store_si256(out++, _mm256_cvtepu16_epi32(_mm256_extracti128_si256(_t6,1))); + _mm256_store_si256(out++, _mm256_cvtepu16_epi32(_mm256_extracti128_si256(_t7,1))); + _mm256_store_si256(out++, _mm256_cvtepu16_epi32(_mm256_extracti128_si256(_t0,0))); + _mm256_store_si256(out++, _mm256_cvtepu16_epi32(_mm256_extracti128_si256(_t1,0))); + _mm256_store_si256(out++, _mm256_cvtepu16_epi32(_mm256_extracti128_si256(_t2,0))); + _mm256_store_si256(out++, _mm256_cvtepu16_epi32(_mm256_extracti128_si256(_t3,0))); + _mm256_store_si256(out++, _mm256_cvtepu16_epi32(_mm256_extracti128_si256(_t4,0))); + _mm256_store_si256(out++, _mm256_cvtepu16_epi32(_mm256_extracti128_si256(_t5,0))); + _mm256_store_si256(out++, _mm256_cvtepu16_epi32(_mm256_extracti128_si256(_t6,0))); + _mm256_store_si256(out++, _mm256_cvtepu16_epi32(_mm256_extracti128_si256(_t7,0))); + + return store_fwdrev_simd_128_ordered(chrpos,table,positions,counts,(UINT4 *) array); +} +#endif +#endif + +#ifdef HAVE_AVX512 +static void +extract_7mers_fwd_simd_256 (__m512i *out, __m512i current, __m512i next) { + __m512i oligo; + + _mm512_store_si512(out++, _mm512_srli_epi32(current,18)); /* No mask necessary */ + _mm512_store_si512(out++, _mm512_and_si512( _mm512_srli_epi32(current,16), hugemask7)); + _mm512_store_si512(out++, _mm512_and_si512( _mm512_srli_epi32(current,14), hugemask7)); + _mm512_store_si512(out++, _mm512_and_si512( _mm512_srli_epi32(current,12), hugemask7)); + _mm512_store_si512(out++, _mm512_and_si512( _mm512_srli_epi32(current,10), hugemask7)); + _mm512_store_si512(out++, _mm512_and_si512( _mm512_srli_epi32(current,8), hugemask7)); + _mm512_store_si512(out++, _mm512_and_si512( _mm512_srli_epi32(current,6), hugemask7)); + _mm512_store_si512(out++, _mm512_and_si512( _mm512_srli_epi32(current,4), hugemask7)); + _mm512_store_si512(out++, _mm512_and_si512( _mm512_srli_epi32(current,2), hugemask7)); + _mm512_store_si512(out++, _mm512_and_si512( current, hugemask7)); + + oligo = _mm512_or_si512( _mm512_srli_epi32(next,20), _mm512_slli_epi32(current,12)); + _mm512_store_si512(out++, _mm512_and_si512( _mm512_srli_epi32(oligo,10), hugemask7)); + _mm512_store_si512(out++, _mm512_and_si512( _mm512_srli_epi32(oligo,8), hugemask7)); + _mm512_store_si512(out++, _mm512_and_si512( _mm512_srli_epi32(oligo,6), hugemask7)); + _mm512_store_si512(out++, _mm512_and_si512( _mm512_srli_epi32(oligo,4), hugemask7)); + _mm512_store_si512(out++, _mm512_and_si512( _mm512_srli_epi32(oligo,2), hugemask7)); + _mm512_store_si512(out++, _mm512_and_si512( oligo, hugemask7)); + + return; +} + +#ifdef USE_UNORDERED_7 +static Chrpos_T +store_7mers_fwd_simd_256 (Chrpos_T chrpos, Chrpos_T *table, UINT4 *positions, Count_T *counts, + __m512i current, __m512i next) { + __m512i array[16]; + + extract_7mers_fwd_simd_256(array,current,next); + return store_fwdrev_simd_256(chrpos,table,positions,counts,(UINT4 *) array); +} + +#else +/* Includes extract_7mers_fwd_simd_256_ordered (__m512i *out, __m512i current, __m512i next) */ +static Chrpos_T +store_7mers_fwd_simd_256 (Chrpos_T chrpos, Chrpos_T *table, UINT4 *positions, Count_T *counts, + __m512i current, __m512i next) { + __m512i array[16], *out; + __m512i oligo, _shuffle0, _shuffle1, _shuffle2; + __m512i _t0, _t1, _t2, _t3, _t4, _t5, _t6, _t7; + __m512i _u0, _u1, _u2, _u3, _u4, _u5, _u6, _u7; + + out = &(array[0]); + + oligo = _mm512_or_si512( _mm512_srli_epi32(next,20), _mm512_slli_epi32(current,12)); + _u0 = _mm512_and_si512( oligo, hugemask7); + /* _row1 = _mm512_and_si512( _mm512_srli_epi32(oligo,2), hugemask7); */ + _u1 = _mm512_and_si512( _mm512_slli_epi32(oligo,14), highmask7); + _t0 = _mm512_or_si512(_u0, _u1); + + _u0 = _mm512_and_si512( _mm512_srli_epi32(oligo,4), hugemask7); + /* _row3 = _mm512_and_si512( _mm512_srli_epi32(oligo,6), hugemask7); */ + _u1 = _mm512_and_si512( _mm512_slli_epi32(oligo,10), highmask7); + _t1 = _mm512_or_si512(_u0, _u1); + + _u0 = _mm512_and_si512( _mm512_srli_epi32(oligo,8), hugemask7); + /* _row5 = _mm512_and_si512( _mm512_srli_epi32(oligo,10), hugemask7); */ + _u1 = _mm512_and_si512( _mm512_slli_epi32(oligo,6), highmask7); + _t2 = _mm512_or_si512(_u0, _u1); + + + _u0 = _mm512_and_si512( current, hugemask7); + /* _row7 = _mm512_and_si512( _mm512_srli_epi32(current,2), hugemask7); */ + _u1 = _mm512_and_si512( _mm512_slli_epi32(current,14), highmask7); + _t3 = _mm512_or_si512(_u0, _u1); + + _u0 = _mm512_and_si512( _mm512_srli_epi32(current,4), hugemask7); + /* _row9 = _mm512_and_si512( _mm512_srli_epi32(current,6), hugemask7); */ + _u1 = _mm512_and_si512( _mm512_slli_epi32(current,10), highmask7); + _t4 = _mm512_or_si512(_u0, _u1); + + _u0 = _mm512_and_si512( _mm512_srli_epi32(current,8), hugemask7); + /* _row11 = _mm512_and_si512( _mm512_srli_epi32(current,10), hugemask7); */ + _u1 = _mm512_and_si512( _mm512_slli_epi32(current,6), highmask7); + _t5 = _mm512_or_si512(_u0, _u1); + + _u0 = _mm512_and_si512( _mm512_srli_epi32(current,12), hugemask7); + /* _row13 = _mm512_and_si512( _mm512_srli_epi32(current,14), hugemask7); */ + _u1 = _mm512_and_si512( _mm512_slli_epi32(current,2), highmask7); + _t6 = _mm512_or_si512(_u0, _u1); + + _u0 = _mm512_and_si512( _mm512_srli_epi32(current,16), hugemask7); + /* _row15 = _mm512_srli_epi32(current,18)); */ /* No mask necessary */ + _u1 = _mm512_and_si512( _mm512_srli_epi32(current,2), highmask7); + _t7 = _mm512_or_si512(_u0, _u1); + + + _u0 = _mm512_unpackhi_epi32(_t0,_t1); + _u1 = _mm512_unpackhi_epi32(_t2,_t3); + _u2 = _mm512_unpackhi_epi32(_t4,_t5); + _u3 = _mm512_unpackhi_epi32(_t6,_t7); + _u4 = _mm512_unpacklo_epi32(_t0,_t1); + _u5 = _mm512_unpacklo_epi32(_t2,_t3); + _u6 = _mm512_unpacklo_epi32(_t4,_t5); + _u7 = _mm512_unpacklo_epi32(_t6,_t7); + + + /* Split: top half */ + _shuffle0 = _mm512_setr_epi64(7, 8+7, 6, 8+6, 5, 8+5, 4, 8+4); + _t0 = _mm512_permutex2var_epi64(_u0, _shuffle0, _u1); + _t1 = _mm512_permutex2var_epi64(_u2, _shuffle0, _u3); + _t2 = _mm512_permutex2var_epi64(_u4, _shuffle0, _u5); + _t3 = _mm512_permutex2var_epi64(_u6, _shuffle0, _u7); + + + _shuffle1 = _mm512_setr_epi64(0, 1, 8+0, 8+1, 2, 3, 8+2, 8+3); + _t7 = _mm512_permutex2var_epi64(_t0, _shuffle1, _t1); + _mm512_store_si512(out++, _mm512_cvtepu16_epi32(_mm512_extracti64x4_epi64(_t7,0))); + _mm512_store_si512(out++, _mm512_cvtepu16_epi32(_mm512_extracti64x4_epi64(_t7,1))); + + _t7 = _mm512_permutex2var_epi64(_t2, _shuffle1, _t3); + _mm512_store_si512(out++, _mm512_cvtepu16_epi32(_mm512_extracti64x4_epi64(_t7,0))); + _mm512_store_si512(out++, _mm512_cvtepu16_epi32(_mm512_extracti64x4_epi64(_t7,1))); + + _shuffle2 = _mm512_setr_epi64(4, 5, 8+4, 8+5, 6, 7, 8+6, 8+7); + _t7 = _mm512_permutex2var_epi64(_t0, _shuffle2, _t1); + _mm512_store_si512(out++, _mm512_cvtepu16_epi32(_mm512_extracti64x4_epi64(_t7,0))); + _mm512_store_si512(out++, _mm512_cvtepu16_epi32(_mm512_extracti64x4_epi64(_t7,1))); + + _t7 = _mm512_permutex2var_epi64(_t2, _shuffle2, _t3); + _mm512_store_si512(out++, _mm512_cvtepu16_epi32(_mm512_extracti64x4_epi64(_t7,0))); + _mm512_store_si512(out++, _mm512_cvtepu16_epi32(_mm512_extracti64x4_epi64(_t7,1))); + + + /* Split: bottom half */ + _shuffle0 = _mm512_setr_epi64(3, 8+3, 2, 8+2, 1, 8+1, 0, 8+0); + _t0 = _mm512_permutex2var_epi64(_u0, _shuffle0, _u1); + _t1 = _mm512_permutex2var_epi64(_u2, _shuffle0, _u3); + _t2 = _mm512_permutex2var_epi64(_u4, _shuffle0, _u5); + _t3 = _mm512_permutex2var_epi64(_u6, _shuffle0, _u7); + + + /* _shuffle1 = _mm512_setr_epi64(0, 1, 8+0, 8+1, 2, 3, 8+2, 8+3); */ + _t7 = _mm512_permutex2var_epi64(_t0, _shuffle1, _t1); + _mm512_store_si512(out++, _mm512_cvtepu16_epi32(_mm512_extracti64x4_epi64(_t7,0))); + _mm512_store_si512(out++, _mm512_cvtepu16_epi32(_mm512_extracti64x4_epi64(_t7,1))); + + _t7 = _mm512_permutex2var_epi64(_t2, _shuffle1, _t3); + _mm512_store_si512(out++, _mm512_cvtepu16_epi32(_mm512_extracti64x4_epi64(_t7,0))); + _mm512_store_si512(out++, _mm512_cvtepu16_epi32(_mm512_extracti64x4_epi64(_t7,1))); + + /* _shuffle2 = _mm512_setr_epi64(4, 5, 8+4, 8+5, 6, 7, 8+6, 8+7); */ + _t7 = _mm512_permutex2var_epi64(_t0, _shuffle2, _t1); + _mm512_store_si512(out++, _mm512_cvtepu16_epi32(_mm512_extracti64x4_epi64(_t7,0))); + _mm512_store_si512(out++, _mm512_cvtepu16_epi32(_mm512_extracti64x4_epi64(_t7,1))); + + _t7 = _mm512_permutex2var_epi64(_t2, _shuffle2, _t3); + _mm512_store_si512(out++, _mm512_cvtepu16_epi32(_mm512_extracti64x4_epi64(_t7,0))); + _mm512_store_si512(out++, _mm512_cvtepu16_epi32(_mm512_extracti64x4_epi64(_t7,1))); + return store_fwdrev_simd_256_ordered(chrpos,table,positions,counts,(UINT4 *) array); +} +#endif +#endif #if !defined(HAVE_AVX2) -static void -count_7mers_fwd (Count_T *counts, Inquery_T *inquery, Genomecomp_T high_rev, Genomecomp_T low_rev, Genomecomp_T nexthigh_rev) { +static int +store_7mers_fwd_32 (Chrpos_T chrpos, Chrpos_T *table, UINT4 *positions, Count_T *counts, + Genomecomp_T high_rev, Genomecomp_T low_rev, Genomecomp_T nexthigh_rev) { Genomecomp_T masked, oligo; #ifdef INDIVIDUAL_SHIFTS #elif defined(SIMD_MASK_THEN_STORE) @@ -18290,34 +14911,46 @@ __m128i _oligo, _masked; #endif - + oligo = nexthigh_rev >> 20; /* For 31..26 */ oligo |= low_rev << 12; #ifdef INDIVIDUAL_SHIFTS masked = oligo & MASK7; /* 31 */ - INCR_COUNT(counts[masked],inquery[masked]); - debug(printf("31 %04X => %d\n",masked,counts[masked])); + if (counts[masked]) { + debug(printf("Storing masked %u at %u\n",masked,chrpos)); + table[positions[masked] + (--counts[masked])] = chrpos; + } masked = (oligo >> 2) & MASK7; /* 30 */ - INCR_COUNT(counts[masked],inquery[masked]); - debug(printf("30 %04X => %d\n",masked,counts[masked])); + if (counts[masked]) { + debug(printf("Storing masked %u at %u\n",masked,chrpos - 1)); + table[positions[masked] + (--counts[masked])] = chrpos - 1; + } masked = (oligo >> 4) & MASK7; /* 29 */ - INCR_COUNT(counts[masked],inquery[masked]); - debug(printf("29 %04X => %d\n",masked,counts[masked])); + if (counts[masked]) { + debug(printf("Storing masked %u at %u\n",masked,chrpos - 2)); + table[positions[masked] + (--counts[masked])] = chrpos - 2; + } masked = (oligo >> 6) & MASK7; /* 28 */ - INCR_COUNT(counts[masked],inquery[masked]); - debug(printf("28 %04X => %d\n",masked,counts[masked])); + if (counts[masked]) { + debug(printf("Storing masked %u at %u\n",masked,chrpos - 3)); + table[positions[masked] + (--counts[masked])] = chrpos - 3; + } masked = (oligo >> 8) & MASK7; /* 27 */ - INCR_COUNT(counts[masked],inquery[masked]); - debug(printf("27 %04X => %d\n",masked,counts[masked])); + if (counts[masked]) { + debug(printf("Storing masked %u at %u\n",masked,chrpos - 4)); + table[positions[masked] + (--counts[masked])] = chrpos - 4; + } masked = (oligo >> 10) & MASK7; /* 26 */ - INCR_COUNT(counts[masked],inquery[masked]); - debug(printf("26 %04X => %d\n",masked,counts[masked])); + if (counts[masked]) { + debug(printf("Storing masked %u at %u\n",masked,chrpos - 5)); + table[positions[masked] + (--counts[masked])] = chrpos - 5; + } #else _oligo = _mm_setr_epi32(oligo, oligo >> 2, oligo >> 4, oligo >> 6); @@ -18328,20 +14961,28 @@ #endif masked = EXTRACT(_masked,0); - INCR_COUNT(counts[masked],inquery[masked]); - debug(printf("31 %04X => %d\n",masked,counts[masked])); + if (counts[masked]) { + debug(printf("Storing masked %u at %u\n",masked,chrpos)); + table[positions[masked] + (--counts[masked])] = chrpos; + } masked = EXTRACT(_masked,1); - INCR_COUNT(counts[masked],inquery[masked]); - debug(printf("30 %04X => %d\n",masked,counts[masked])); + if (counts[masked]) { + debug(printf("Storing masked %u at %u\n",masked,chrpos - 1)); + table[positions[masked] + (--counts[masked])] = chrpos - 1; + } masked = EXTRACT(_masked,2); - INCR_COUNT(counts[masked],inquery[masked]); - debug(printf("29 %04X => %d\n",masked,counts[masked])); + if (counts[masked]) { + debug(printf("Storing masked %u at %u\n",masked,chrpos - 2)); + table[positions[masked] + (--counts[masked])] = chrpos - 2; + } masked = EXTRACT(_masked,3); - INCR_COUNT(counts[masked],inquery[masked]); - debug(printf("28 %04X => %d\n",masked,counts[masked])); + if (counts[masked]) { + debug(printf("Storing masked %u at %u\n",masked,chrpos - 3)); + table[positions[masked] + (--counts[masked])] = chrpos - 3; + } _oligo = _mm_srli_epi32(_oligo, 8); @@ -18352,55 +14993,79 @@ #endif masked = EXTRACT(_masked,0); - INCR_COUNT(counts[masked],inquery[masked]); - debug(printf("27 %04X => %d\n",masked,counts[masked])); + if (counts[masked]) { + debug(printf("Storing masked %u at %u\n",masked,chrpos - 4)); + table[positions[masked] + (--counts[masked])] = chrpos - 4; + } masked = EXTRACT(_masked,1); - INCR_COUNT(counts[masked],inquery[masked]); - debug(printf("26 %04X => %d\n",masked,counts[masked])); + if (counts[masked]) { + debug(printf("Storing masked %u at %u\n",masked,chrpos - 5)); + table[positions[masked] + (--counts[masked])] = chrpos - 5; + } #endif #ifdef INDIVIDUAL_SHIFTS masked = low_rev & MASK7; /* 25 */ - INCR_COUNT(counts[masked],inquery[masked]); - debug(printf("25 %04X => %d\n",masked,counts[masked])); + if (counts[masked]) { + debug(printf("Storing masked %u at %u\n",masked,chrpos - 6)); + table[positions[masked] + (--counts[masked])] = chrpos - 6; + } masked = (low_rev >> 2) & MASK7; /* 24 */ - INCR_COUNT(counts[masked],inquery[masked]); - debug(printf("24 %04X => %d\n",masked,counts[masked])); + if (counts[masked]) { + debug(printf("Storing masked %u at %u\n",masked,chrpos - 7)); + table[positions[masked] + (--counts[masked])] = chrpos - 7; + } masked = (low_rev >> 4) & MASK7; /* 23 */ - INCR_COUNT(counts[masked],inquery[masked]); - debug(printf("23 %04X => %d\n",masked,counts[masked])); + if (counts[masked]) { + debug(printf("Storing masked %u at %u\n",masked,chrpos - 8)); + table[positions[masked] + (--counts[masked])] = chrpos - 8; + } masked = (low_rev >> 6) & MASK7; /* 22 */ - INCR_COUNT(counts[masked],inquery[masked]); - debug(printf("22 %04X => %d\n",masked,counts[masked])); + if (counts[masked]) { + debug(printf("Storing masked %u at %u\n",masked,chrpos - 9)); + table[positions[masked] + (--counts[masked])] = chrpos - 9; + } masked = (low_rev >> 8) & MASK7; /* 21 */ - INCR_COUNT(counts[masked],inquery[masked]); - debug(printf("21 %04X => %d\n",masked,counts[masked])); + if (counts[masked]) { + debug(printf("Storing masked %u at %u\n",masked,chrpos - 10)); + table[positions[masked] + (--counts[masked])] = chrpos - 10; + } masked = (low_rev >> 10) & MASK7; /* 20 */ - INCR_COUNT(counts[masked],inquery[masked]); - debug(printf("20 %04X => %d\n",masked,counts[masked])); + if (counts[masked]) { + debug(printf("Storing masked %u at %u\n",masked,chrpos - 11)); + table[positions[masked] + (--counts[masked])] = chrpos - 11; + } masked = (low_rev >> 12) & MASK7; /* 19 */ - INCR_COUNT(counts[masked],inquery[masked]); - debug(printf("19 %04X => %d\n",masked,counts[masked])); + if (counts[masked]) { + debug(printf("Storing masked %u at %u\n",masked,chrpos - 12)); + table[positions[masked] + (--counts[masked])] = chrpos - 12; + } masked = (low_rev >> 14) & MASK7; /* 18 */ - INCR_COUNT(counts[masked],inquery[masked]); - debug(printf("18 %04X => %d\n",masked,counts[masked])); + if (counts[masked]) { + debug(printf("Storing masked %u at %u\n",masked,chrpos - 13)); + table[positions[masked] + (--counts[masked])] = chrpos - 13; + } masked = (low_rev >> 16) & MASK7; /* 17 */ - INCR_COUNT(counts[masked],inquery[masked]); - debug(printf("17 %04X => %d\n",masked,counts[masked])); + if (counts[masked]) { + debug(printf("Storing masked %u at %u\n",masked,chrpos - 14)); + table[positions[masked] + (--counts[masked])] = chrpos - 14; + } masked = low_rev >> 18; /* 16, No mask necessary */ - INCR_COUNT(counts[masked],inquery[masked]); - debug(printf("16 %04X => %d\n",masked,counts[masked])); + if (counts[masked]) { + debug(printf("Storing masked %u at %u\n",masked,chrpos - 15)); + table[positions[masked] + (--counts[masked])] = chrpos - 15; + } #else _oligo = _mm_setr_epi32(low_rev, low_rev >> 2, low_rev >> 4, low_rev >> 6); @@ -18411,20 +15076,28 @@ #endif masked = EXTRACT(_masked,0); - INCR_COUNT(counts[masked],inquery[masked]); - debug(printf("25 %04X => %d\n",masked,counts[masked])); + if (counts[masked]) { + debug(printf("Storing masked %u at %u\n",masked,chrpos - 6)); + table[positions[masked] + (--counts[masked])] = chrpos - 6; + } masked = EXTRACT(_masked,1); - INCR_COUNT(counts[masked],inquery[masked]); - debug(printf("24 %04X => %d\n",masked,counts[masked])); + if (counts[masked]) { + debug(printf("Storing masked %u at %u\n",masked,chrpos - 7)); + table[positions[masked] + (--counts[masked])] = chrpos - 7; + } masked = EXTRACT(_masked,2); - INCR_COUNT(counts[masked],inquery[masked]); - debug(printf("23 %04X => %d\n",masked,counts[masked])); + if (counts[masked]) { + debug(printf("Storing masked %u at %u\n",masked,chrpos - 8)); + table[positions[masked] + (--counts[masked])] = chrpos - 8; + } masked = EXTRACT(_masked,3); - INCR_COUNT(counts[masked],inquery[masked]); - debug(printf("22 %04X => %d\n",masked,counts[masked])); + if (counts[masked]) { + debug(printf("Storing masked %u at %u\n",masked,chrpos - 9)); + table[positions[masked] + (--counts[masked])] = chrpos - 9; + } _oligo = _mm_srli_epi32(_oligo, 8); @@ -18435,20 +15108,28 @@ #endif masked = EXTRACT(_masked,0); - INCR_COUNT(counts[masked],inquery[masked]); - debug(printf("21 %04X => %d\n",masked,counts[masked])); + if (counts[masked]) { + debug(printf("Storing masked %u at %u\n",masked,chrpos - 10)); + table[positions[masked] + (--counts[masked])] = chrpos - 10; + } masked = EXTRACT(_masked,1); - INCR_COUNT(counts[masked],inquery[masked]); - debug(printf("20 %04X => %d\n",masked,counts[masked])); + if (counts[masked]) { + debug(printf("Storing masked %u at %u\n",masked,chrpos - 11)); + table[positions[masked] + (--counts[masked])] = chrpos - 11; + } masked = EXTRACT(_masked,2); - INCR_COUNT(counts[masked],inquery[masked]); - debug(printf("19 %04X => %d\n",masked,counts[masked])); + if (counts[masked]) { + debug(printf("Storing masked %u at %u\n",masked,chrpos - 12)); + table[positions[masked] + (--counts[masked])] = chrpos - 12; + } masked = EXTRACT(_masked,3); - INCR_COUNT(counts[masked],inquery[masked]); - debug(printf("18 %04X => %d\n",masked,counts[masked])); + if (counts[masked]) { + debug(printf("Storing masked %u at %u\n",masked,chrpos - 13)); + table[positions[masked] + (--counts[masked])] = chrpos - 13; + } _oligo = _mm_srli_epi32(_oligo, 8); @@ -18459,12 +15140,16 @@ #endif masked = EXTRACT(_masked,0); - INCR_COUNT(counts[masked],inquery[masked]); - debug(printf("17 %04X => %d\n",masked,counts[masked])); + if (counts[masked]) { + debug(printf("Storing masked %u at %u\n",masked,chrpos - 14)); + table[positions[masked] + (--counts[masked])] = chrpos - 14; + } masked = EXTRACT(_masked,1); - INCR_COUNT(counts[masked],inquery[masked]); - debug(printf("16 %04X => %d\n",masked,counts[masked])); + if (counts[masked]) { + debug(printf("Storing masked %u at %u\n",masked,chrpos - 15)); + table[positions[masked] + (--counts[masked])] = chrpos - 15; + } #endif @@ -18473,28 +15158,40 @@ #ifdef INDIVIDUAL_SHIFTS masked = oligo & MASK7; /* 15 */ - INCR_COUNT(counts[masked],inquery[masked]); - debug(printf("15 %04X => %d\n",masked,counts[masked])); + if (counts[masked]) { + debug(printf("Storing masked %u at %u\n",masked,chrpos - 16)); + table[positions[masked] + (--counts[masked])] = chrpos - 16; + } masked = (oligo >> 2) & MASK7; /* 14 */ - INCR_COUNT(counts[masked],inquery[masked]); - debug(printf("14 %04X => %d\n",masked,counts[masked])); + if (counts[masked]) { + debug(printf("Storing masked %u at %u\n",masked,chrpos - 17)); + table[positions[masked] + (--counts[masked])] = chrpos - 17; + } masked = (oligo >> 4) & MASK7; /* 13 */ - INCR_COUNT(counts[masked],inquery[masked]); - debug(printf("13 %04X => %d\n",masked,counts[masked])); + if (counts[masked]) { + debug(printf("Storing masked %u at %u\n",masked,chrpos - 18)); + table[positions[masked] + (--counts[masked])] = chrpos - 18; + } masked = (oligo >> 6) & MASK7; /* 12 */ - INCR_COUNT(counts[masked],inquery[masked]); - debug(printf("12 %04X => %d\n",masked,counts[masked])); + if (counts[masked]) { + debug(printf("Storing masked %u at %u\n",masked,chrpos - 19)); + table[positions[masked] + (--counts[masked])] = chrpos - 19; + } masked = (oligo >> 8) & MASK7; /* 11 */ - INCR_COUNT(counts[masked],inquery[masked]); - debug(printf("11 %04X => %d\n",masked,counts[masked])); + if (counts[masked]) { + debug(printf("Storing masked %u at %u\n",masked,chrpos - 20)); + table[positions[masked] + (--counts[masked])] = chrpos - 20; + } masked = (oligo >> 10) & MASK7; /* 10 */ - INCR_COUNT(counts[masked],inquery[masked]); - debug(printf("10 %04X => %d\n",masked,counts[masked])); + if (counts[masked]) { + debug(printf("Storing masked %u at %u\n",masked,chrpos - 21)); + table[positions[masked] + (--counts[masked])] = chrpos - 21; + } #else _oligo = _mm_setr_epi32(oligo, oligo >> 2, oligo >> 4, oligo >> 6); @@ -18505,20 +15202,28 @@ #endif masked = EXTRACT(_masked,0); - INCR_COUNT(counts[masked],inquery[masked]); - debug(printf("15 %04X => %d\n",masked,counts[masked])); + if (counts[masked]) { + debug(printf("Storing masked %u at %u\n",masked,chrpos - 16)); + table[positions[masked] + (--counts[masked])] = chrpos - 16; + } masked = EXTRACT(_masked,1); - INCR_COUNT(counts[masked],inquery[masked]); - debug(printf("14 %04X => %d\n",masked,counts[masked])); + if (counts[masked]) { + debug(printf("Storing masked %u at %u\n",masked,chrpos - 17)); + table[positions[masked] + (--counts[masked])] = chrpos - 17; + } masked = EXTRACT(_masked,2); - INCR_COUNT(counts[masked],inquery[masked]); - debug(printf("13 %04X => %d\n",masked,counts[masked])); + if (counts[masked]) { + debug(printf("Storing masked %u at %u\n",masked,chrpos - 18)); + table[positions[masked] + (--counts[masked])] = chrpos - 18; + } masked = EXTRACT(_masked,3); - INCR_COUNT(counts[masked],inquery[masked]); - debug(printf("12 %04X => %d\n",masked,counts[masked])); + if (counts[masked]) { + debug(printf("Storing masked %u at %u\n",masked,chrpos - 19)); + table[positions[masked] + (--counts[masked])] = chrpos - 19; + } _oligo = _mm_srli_epi32(_oligo, 8); @@ -18529,56 +15234,80 @@ #endif masked = EXTRACT(_masked,0); - INCR_COUNT(counts[masked],inquery[masked]); - debug(printf("11 %04X => %d\n",masked,counts[masked])); + if (counts[masked]) { + debug(printf("Storing masked %u at %u\n",masked,chrpos - 20)); + table[positions[masked] + (--counts[masked])] = chrpos - 20; + } masked = EXTRACT(_masked,1); - INCR_COUNT(counts[masked],inquery[masked]); - debug(printf("10 %04X => %d\n",masked,counts[masked])); + if (counts[masked]) { + debug(printf("Storing masked %u at %u\n",masked,chrpos - 21)); + table[positions[masked] + (--counts[masked])] = chrpos - 21; + } #endif #ifdef INDIVIDUAL_SHIFTS masked = high_rev & MASK7; /* 9 */ - INCR_COUNT(counts[masked],inquery[masked]); - debug(printf("9 %04X => %d\n",masked,counts[masked])); + if (counts[masked]) { + debug(printf("Storing masked %u at %u\n",masked,chrpos - 22)); + table[positions[masked] + (--counts[masked])] = chrpos - 22; + } masked = (high_rev >> 2) & MASK7; /* 8 */ - INCR_COUNT(counts[masked],inquery[masked]); - debug(printf("8 %04X => %d\n",masked,counts[masked])); + if (counts[masked]) { + debug(printf("Storing masked %u at %u\n",masked,chrpos - 23)); + table[positions[masked] + (--counts[masked])] = chrpos - 23; + } masked = (high_rev >> 4) & MASK7; /* 7 */ - INCR_COUNT(counts[masked],inquery[masked]); - debug(printf("7 %04X => %d\n",masked,counts[masked])); + if (counts[masked]) { + debug(printf("Storing masked %u at %u\n",masked,chrpos - 24)); + table[positions[masked] + (--counts[masked])] = chrpos - 24; + } masked = (high_rev >> 6) & MASK7; /* 6 */ - INCR_COUNT(counts[masked],inquery[masked]); - debug(printf("6 %04X => %d\n",masked,counts[masked])); + if (counts[masked]) { + debug(printf("Storing masked %u at %u\n",masked,chrpos - 25)); + table[positions[masked] + (--counts[masked])] = chrpos - 25; + } masked = (high_rev >> 8) & MASK7; /* 5 */ - INCR_COUNT(counts[masked],inquery[masked]); - debug(printf("5 %04X => %d\n",masked,counts[masked])); + if (counts[masked]) { + debug(printf("Storing masked %u at %u\n",masked,chrpos - 26)); + table[positions[masked] + (--counts[masked])] = chrpos - 26; + } masked = (high_rev >> 10) & MASK7; /* 4 */ - INCR_COUNT(counts[masked],inquery[masked]); - debug(printf("4 %04X => %d\n",masked,counts[masked])); + if (counts[masked]) { + debug(printf("Storing masked %u at %u\n",masked,chrpos - 27)); + table[positions[masked] + (--counts[masked])] = chrpos - 27; + } masked = (high_rev >> 12) & MASK7; /* 3 */ - INCR_COUNT(counts[masked],inquery[masked]); - debug(printf("3 %04X => %d\n",masked,counts[masked])); + if (counts[masked]) { + debug(printf("Storing masked %u at %u\n",masked,chrpos - 28)); + table[positions[masked] + (--counts[masked])] = chrpos - 28; + } masked = (high_rev >> 14) & MASK7; /* 2 */ - INCR_COUNT(counts[masked],inquery[masked]); - debug(printf("2 %04X => %d\n",masked,counts[masked])); + if (counts[masked]) { + debug(printf("Storing masked %u at %u\n",masked,chrpos - 29)); + table[positions[masked] + (--counts[masked])] = chrpos - 29; + } masked = (high_rev >> 16) & MASK7; /* 1 */ - INCR_COUNT(counts[masked],inquery[masked]); - debug(printf("1 %04X => %d\n",masked,counts[masked])); + if (counts[masked]) { + debug(printf("Storing masked %u at %u\n",masked,chrpos - 30)); + table[positions[masked] + (--counts[masked])] = chrpos - 30; + } masked = high_rev >> 18; /* 0, No mask necessary */ - INCR_COUNT(counts[masked],inquery[masked]); - debug(printf("0 %04X => %d\n",masked,counts[masked])); - + if (counts[masked]) { + debug(printf("Storing masked %u at %u\n",masked,chrpos - 31)); + table[positions[masked] + (--counts[masked])] = chrpos - 31; + } + #else _oligo = _mm_setr_epi32(high_rev, high_rev >> 2, high_rev >> 4, high_rev >> 6); #ifdef SIMD_MASK_THEN_STORE @@ -18588,20 +15317,28 @@ #endif masked = EXTRACT(_masked,0); - INCR_COUNT(counts[masked],inquery[masked]); - debug(printf("9 %04X => %d\n",masked,counts[masked])); + if (counts[masked]) { + debug(printf("Storing masked %u at %u\n",masked,chrpos - 22)); + table[positions[masked] + (--counts[masked])] = chrpos - 22; + } masked = EXTRACT(_masked,1); - INCR_COUNT(counts[masked],inquery[masked]); - debug(printf("8 %04X => %d\n",masked,counts[masked])); + if (counts[masked]) { + debug(printf("Storing masked %u at %u\n",masked,chrpos - 23)); + table[positions[masked] + (--counts[masked])] = chrpos - 23; + } masked = EXTRACT(_masked,2); - INCR_COUNT(counts[masked],inquery[masked]); - debug(printf("7 %04X => %d\n",masked,counts[masked])); + if (counts[masked]) { + debug(printf("Storing masked %u at %u\n",masked,chrpos - 24)); + table[positions[masked] + (--counts[masked])] = chrpos - 24; + } masked = EXTRACT(_masked,3); - INCR_COUNT(counts[masked],inquery[masked]); - debug(printf("6 %04X => %d\n",masked,counts[masked])); + if (counts[masked]) { + debug(printf("Storing masked %u at %u\n",masked,chrpos - 25)); + table[positions[masked] + (--counts[masked])] = chrpos - 25; + } _oligo = _mm_srli_epi32(_oligo, 8); @@ -18612,20 +15349,28 @@ #endif masked = EXTRACT(_masked,0); - INCR_COUNT(counts[masked],inquery[masked]); - debug(printf("5 %04X => %d\n",masked,counts[masked])); + if (counts[masked]) { + debug(printf("Storing masked %u at %u\n",masked,chrpos - 26)); + table[positions[masked] + (--counts[masked])] = chrpos - 26; + } masked = EXTRACT(_masked,1); - INCR_COUNT(counts[masked],inquery[masked]); - debug(printf("4 %04X => %d\n",masked,counts[masked])); + if (counts[masked]) { + debug(printf("Storing masked %u at %u\n",masked,chrpos - 27)); + table[positions[masked] + (--counts[masked])] = chrpos - 27; + } masked = EXTRACT(_masked,2); - INCR_COUNT(counts[masked],inquery[masked]); - debug(printf("3 %04X => %d\n",masked,counts[masked])); + if (counts[masked]) { + debug(printf("Storing masked %u at %u\n",masked,chrpos - 28)); + table[positions[masked] + (--counts[masked])] = chrpos - 28; + } masked = EXTRACT(_masked,3); - INCR_COUNT(counts[masked],inquery[masked]); - debug(printf("2 %04X => %d\n",masked,counts[masked])); + if (counts[masked]) { + debug(printf("Storing masked %u at %u\n",masked,chrpos - 29)); + table[positions[masked] + (--counts[masked])] = chrpos - 29; + } _oligo = _mm_srli_epi32(_oligo, 8); @@ -18636,2859 +15381,1463 @@ #endif masked = EXTRACT(_masked,0); - INCR_COUNT(counts[masked],inquery[masked]); - debug(printf("1 %04X => %d\n",masked,counts[masked])); + if (counts[masked]) { + debug(printf("Storing masked %u at %u\n",masked,chrpos - 30)); + table[positions[masked] + (--counts[masked])] = chrpos - 30; + } masked = EXTRACT(_masked,1); - INCR_COUNT(counts[masked],inquery[masked]); - debug(printf("0 %04X => %d\n",masked,counts[masked])); + if (counts[masked]) { + debug(printf("Storing masked %u at %u\n",masked,chrpos - 31)); + table[positions[masked] + (--counts[masked])] = chrpos - 31; + } #endif - return; + return chrpos - 32; } #else /* HAVE_AVX2 */ -static void -count_7mers_fwd (Count_T *counts, Inquery_T *inquery, Genomecomp_T high_rev, Genomecomp_T low_rev, Genomecomp_T nexthigh_rev) { +static int +store_7mers_fwd_32 (Chrpos_T chrpos, Chrpos_T *table, UINT4 *positions, Count_T *counts, + Genomecomp_T high_rev, Genomecomp_T low_rev, Genomecomp_T nexthigh_rev) { Genomecomp_T masked, oligo; - __m256i _oligo, _masked; -#ifdef CHECK_FOR_OVERFLOW - __m256i _counts_after, _counts_neg; -#endif + __m256i _oligo, _masked, _counts; + __m256i _blocks, _envelopes, _addresses, _address_mask, _count_mask; + + + _address_mask = _mm256_set1_epi32(0x3); + _count_mask = _mm256_set1_epi32(0xFF); oligo = nexthigh_rev >> 20; /* For 31..26 */ oligo |= low_rev << 12; - _oligo = _mm256_srlv_epi32(_mm256_set1_epi32(oligo),shift0to14); + _oligo = _mm256_srlv_epi32(_mm256_set1_epi32(oligo),bigshift0to14); _masked = _mm256_and_si256(_oligo, bigmask7); - masked = EXTRACT256(_masked,0); - counts[masked] += 1; - debug(printf("31 %04X => %d\n",masked,counts[masked])); - - masked = EXTRACT256(_masked,1); - counts[masked] += 1; - debug(printf("30 %04X => %d\n",masked,counts[masked])); - - masked = EXTRACT256(_masked,2); - counts[masked] += 1; - debug(printf("29 %04X => %d\n",masked,counts[masked])); - - masked = EXTRACT256(_masked,3); - counts[masked] += 1; - debug(printf("28 %04X => %d\n",masked,counts[masked])); - - masked = EXTRACT256(_masked,4); - counts[masked] += 1; - debug(printf("27 %04X => %d\n",masked,counts[masked])); - - masked = EXTRACT256(_masked,5); - counts[masked] += 1; - debug(printf("26 %04X => %d\n",masked,counts[masked])); + _blocks = _mm256_srli_epi32(_masked,2); /* div by 4 bytes/int */ + _addresses = _mm256_and_si256(_masked,_address_mask); + _addresses = _mm256_slli_epi32(_addresses,3); /* Multiply by 8 bits/byte */ + _envelopes = _mm256_i32gather_epi32((const int *) counts,_blocks,/*scale*/4); + _counts = _mm256_srlv_epi32(_envelopes,_addresses); /* Puts byte on right */ + _counts = _mm256_and_si256(_counts,_count_mask); -#ifdef CHECK_FOR_OVERFLOW - _counts_after = _mm256_i32gather_epi32(counts,_masked,/*shift*/4); - _counts_after = _mm256_and_si256(_counts_after,low6); - _counts_neg = _mm256_cmpgt_epi32(_counts_after,maxcount256); - if (_mm256_testz_si256(_counts_neg,_counts_neg) == 0) { - debug(printf("Exceeded maximum counts\n")); - if (EXTRACT256(_counts_neg,0)) { - inquery[EXTRACT256(_masked,0)] = INQUERY_FALSE; + if (EXTRACT256(_counts,0)) { + masked = EXTRACT256(_masked,0); + if (counts[masked]) { /* Have to re-check if there is a conflict */ + debug(printf("Storing masked %u at %u\n",masked,chrpos)); + table[positions[masked] + (--counts[masked])] = chrpos; } - if (EXTRACT256(_counts_neg,1)) { - inquery[EXTRACT256(_masked,1)] = INQUERY_FALSE; + } + + if (EXTRACT256(_counts,1)) { + masked = EXTRACT256(_masked,1); + if (counts[masked]) { /* Have to re-check if there is a conflict */ + debug(printf("Storing masked %u at %u\n",masked,chrpos - 1)); + table[positions[masked] + (--counts[masked])] = chrpos - 1; } - if (EXTRACT256(_counts_neg,2)) { - inquery[EXTRACT256(_masked,2)] = INQUERY_FALSE; + } + + if (EXTRACT256(_counts,2)) { + masked = EXTRACT256(_masked,2); + if (counts[masked]) { /* Have to re-check if there is a conflict */ + debug(printf("Storing masked %u at %u\n",masked,chrpos - 2)); + table[positions[masked] + (--counts[masked])] = chrpos - 2; } - if (EXTRACT256(_counts_neg,3)) { - inquery[EXTRACT256(_masked,3)] = INQUERY_FALSE; + } + + if (EXTRACT256(_counts,3)) { + masked = EXTRACT256(_masked,3); + if (counts[masked]) { /* Have to re-check if there is a conflict */ + debug(printf("Storing masked %u at %u\n",masked,chrpos - 3)); + table[positions[masked] + (--counts[masked])] = chrpos - 3; } - if (EXTRACT256(_counts_neg,4)) { - inquery[EXTRACT256(_masked,4)] = INQUERY_FALSE; + } + + if (EXTRACT256(_counts,4)) { + masked = EXTRACT256(_masked,4); + if (counts[masked]) { /* Have to re-check if there is a conflict */ + debug(printf("Storing masked %u at %u\n",masked,chrpos - 4)); + table[positions[masked] + (--counts[masked])] = chrpos - 4; } - if (EXTRACT256(_counts_neg,5)) { - inquery[EXTRACT256(_masked,5)] = INQUERY_FALSE; + } + + if (EXTRACT256(_counts,5)) { + masked = EXTRACT256(_masked,5); + if (counts[masked]) { /* Have to re-check if there is a conflict */ + debug(printf("Storing masked %u at %u\n",masked,chrpos - 5)); + table[positions[masked] + (--counts[masked])] = chrpos - 5; } } -#endif /* CHECK_FOR_OVERFLOW */ - _oligo = _mm256_srlv_epi32(_mm256_set1_epi32(low_rev),shift0to14); + _oligo = _mm256_srlv_epi32(_mm256_set1_epi32(low_rev),bigshift0to14); _masked = _mm256_and_si256(_oligo, bigmask7); - masked = EXTRACT256(_masked,0); - counts[masked] += 1; - debug(printf("25 %04X => %d\n",masked,counts[masked])); - - masked = EXTRACT256(_masked,1); - counts[masked] += 1; - debug(printf("24 %04X => %d\n",masked,counts[masked])); - - masked = EXTRACT256(_masked,2); - counts[masked] += 1; - debug(printf("23 %04X => %d\n",masked,counts[masked])); - - masked = EXTRACT256(_masked,3); - counts[masked] += 1; - debug(printf("22 %04X => %d\n",masked,counts[masked])); - - masked = EXTRACT256(_masked,4); - counts[masked] += 1; - debug(printf("21 %04X => %d\n",masked,counts[masked])); - - masked = EXTRACT256(_masked,5); - counts[masked] += 1; - debug(printf("20 %04X => %d\n",masked,counts[masked])); - - masked = EXTRACT256(_masked,6); - counts[masked] += 1; - debug(printf("19 %04X => %d\n",masked,counts[masked])); - - masked = EXTRACT256(_masked,7); - counts[masked] += 1; - debug(printf("18 %04X => %d\n",masked,counts[masked])); + _blocks = _mm256_srli_epi32(_masked,2); /* div by 4 bytes/int */ + _addresses = _mm256_and_si256(_masked,_address_mask); + _addresses = _mm256_slli_epi32(_addresses,3); /* Multiply by 8 bits/byte */ + _envelopes = _mm256_i32gather_epi32((const int *) counts,_blocks,/*scale*/4); + _counts = _mm256_srlv_epi32(_envelopes,_addresses); /* Puts byte on right */ + _counts = _mm256_and_si256(_counts,_count_mask); -#ifdef CHECK_FOR_OVERFLOW - _counts_after = _mm256_i32gather_epi32(counts,_masked,/*shift*/4); - /* _counts_after = _mm256_and_si256(_counts_after,low8); */ - _counts_neg = _mm256_cmpgt_epi32(_counts_after,maxcount256); - if (_mm256_testz_si256(_counts_neg,_counts_neg) == 0) { - debug(printf("Exceeded maximum counts\n")); - if (EXTRACT256(_counts_neg,0)) { - inquery[EXTRACT256(_masked,0)] = INQUERY_FALSE; + if (EXTRACT256(_counts,0)) { + masked = EXTRACT256(_masked,0); + if (counts[masked]) { /* Have to re-check if there is a conflict */ + debug(printf("Storing masked %u at %u\n",masked,chrpos - 6)); + table[positions[masked] + (--counts[masked])] = chrpos - 6; } - if (EXTRACT256(_counts_neg,1)) { - inquery[EXTRACT256(_masked,1)] = INQUERY_FALSE; + } + + if (EXTRACT256(_counts,1)) { + masked = EXTRACT256(_masked,1); + if (counts[masked]) { /* Have to re-check if there is a conflict */ + debug(printf("Storing masked %u at %u\n",masked,chrpos - 7)); + table[positions[masked] + (--counts[masked])] = chrpos - 7; } - if (EXTRACT256(_counts_neg,2)) { - inquery[EXTRACT256(_masked,2)] = INQUERY_FALSE; + } + + if (EXTRACT256(_counts,2)) { + masked = EXTRACT256(_masked,2); + if (counts[masked]) { /* Have to re-check if there is a conflict */ + debug(printf("Storing masked %u at %u\n",masked,chrpos - 8)); + table[positions[masked] + (--counts[masked])] = chrpos - 8; } - if (EXTRACT256(_counts_neg,3)) { - inquery[EXTRACT256(_masked,3)] = INQUERY_FALSE; + } + + if (EXTRACT256(_counts,3)) { + masked = EXTRACT256(_masked,3); + if (counts[masked]) { /* Have to re-check if there is a conflict */ + debug(printf("Storing masked %u at %u\n",masked,chrpos - 9)); + table[positions[masked] + (--counts[masked])] = chrpos - 9; } - if (EXTRACT256(_counts_neg,4)) { - inquery[EXTRACT256(_masked,4)] = INQUERY_FALSE; + } + + if (EXTRACT256(_counts,4)) { + masked = EXTRACT256(_masked,4); + if (counts[masked]) { /* Have to re-check if there is a conflict */ + debug(printf("Storing masked %u at %u\n",masked,chrpos - 10)); + table[positions[masked] + (--counts[masked])] = chrpos - 10; } - if (EXTRACT256(_counts_neg,5)) { - inquery[EXTRACT256(_masked,5)] = INQUERY_FALSE; + } + + if (EXTRACT256(_counts,5)) { + masked = EXTRACT256(_masked,5); + if (counts[masked]) { /* Have to re-check if there is a conflict */ + debug(printf("Storing masked %u at %u\n",masked,chrpos - 11)); + table[positions[masked] + (--counts[masked])] = chrpos - 11; } - if (EXTRACT256(_counts_neg,6)) { - inquery[EXTRACT256(_masked,6)] = INQUERY_FALSE; + } + + if (EXTRACT256(_counts,6)) { + masked = EXTRACT256(_masked,6); + if (counts[masked]) { /* Have to re-check if there is a conflict */ + debug(printf("Storing masked %u at %u\n",masked,chrpos - 12)); + table[positions[masked] + (--counts[masked])] = chrpos - 12; } - if (EXTRACT256(_counts_neg,7)) { - inquery[EXTRACT256(_masked,7)] = INQUERY_FALSE; + } + + if (EXTRACT256(_counts,7)) { + masked = EXTRACT256(_masked,7); + if (counts[masked]) { /* Have to re-check if there is a conflict */ + debug(printf("Storing masked %u at %u\n",masked,chrpos - 13)); + table[positions[masked] + (--counts[masked])] = chrpos - 13; } } -#endif /* CHECK_FOR_OVERFLOW */ + _oligo = _mm256_srli_epi32(_oligo, 16); _masked = _mm256_and_si256(_oligo, bigmask7); - masked = EXTRACT256(_masked,0); - INCR_COUNT(counts[masked],inquery[masked]); - debug(printf("17 %04X => %d\n",masked,counts[masked])); + _blocks = _mm256_srli_epi32(_masked,2); /* div by 4 bytes/int */ + _addresses = _mm256_and_si256(_masked,_address_mask); + _addresses = _mm256_slli_epi32(_addresses,3); /* Multiply by 8 bits/byte */ + _envelopes = _mm256_i32gather_epi32((const int *) counts,_blocks,/*scale*/4); + _counts = _mm256_srlv_epi32(_envelopes,_addresses); /* Puts byte on right */ + _counts = _mm256_and_si256(_counts,_count_mask); - masked = EXTRACT256(_masked,1); - INCR_COUNT(counts[masked],inquery[masked]); - debug(printf("16 %04X => %d\n",masked,counts[masked])); + if (EXTRACT256(_counts,0)) { + masked = EXTRACT256(_masked,0); + if (counts[masked]) { /* Have to re-check if there is a conflict */ + debug(printf("Storing masked %u at %u\n",masked,chrpos - 14)); + table[positions[masked] + (--counts[masked])] = chrpos - 14; + } + } + if (EXTRACT256(_counts,1)) { + masked = EXTRACT256(_masked,1); + if (counts[masked]) { /* Have to re-check if there is a conflict */ + debug(printf("Storing masked %u at %u\n",masked,chrpos - 15)); + table[positions[masked] + (--counts[masked])] = chrpos - 15; + } + } oligo = low_rev >> 20; /* For 15..10 */ oligo |= high_rev << 12; - _oligo = _mm256_srlv_epi32(_mm256_set1_epi32(oligo),shift0to14); + _oligo = _mm256_srlv_epi32(_mm256_set1_epi32(oligo),bigshift0to14); _masked = _mm256_and_si256(_oligo, bigmask7); - masked = EXTRACT256(_masked,0); - counts[masked] += 1; - debug(printf("15 %04X => %d\n",masked,counts[masked])); - - masked = EXTRACT256(_masked,1); - counts[masked] += 1; - debug(printf("14 %04X => %d\n",masked,counts[masked])); + _blocks = _mm256_srli_epi32(_masked,2); /* div by 4 bytes/int */ + _addresses = _mm256_and_si256(_masked,_address_mask); + _addresses = _mm256_slli_epi32(_addresses,3); /* Multiply by 8 bits/byte */ + _envelopes = _mm256_i32gather_epi32((const int *) counts,_blocks,/*scale*/4); + _counts = _mm256_srlv_epi32(_envelopes,_addresses); /* Puts byte on right */ + _counts = _mm256_and_si256(_counts,_count_mask); - masked = EXTRACT256(_masked,2); - counts[masked] += 1; - debug(printf("13 %04X => %d\n",masked,counts[masked])); + if (EXTRACT256(_counts,0)) { + masked = EXTRACT256(_masked,0); + if (counts[masked]) { /* Have to re-check if there is a conflict */ + debug(printf("Storing masked %u at %u\n",masked,chrpos - 16)); + table[positions[masked] + (--counts[masked])] = chrpos - 16; + } + } - masked = EXTRACT256(_masked,3); - counts[masked] += 1; - debug(printf("12 %04X => %d\n",masked,counts[masked])); - - masked = EXTRACT256(_masked,4); - counts[masked] += 1; - debug(printf("11 %04X => %d\n",masked,counts[masked])); - - masked = EXTRACT256(_masked,5); - counts[masked] += 1; - debug(printf("10 %04X => %d\n",masked,counts[masked])); - -#ifdef CHECK_FOR_OVERFLOW - _counts_after = _mm256_i32gather_epi32(counts,_masked,/*shift*/4); - _counts_after = _mm256_and_si256(_counts_after,low6); - _counts_neg = _mm256_cmpgt_epi32(_counts_after,maxcount256); - if (_mm256_testz_si256(_counts_neg,_counts_neg) == 0) { - debug(printf("Exceeded maximum counts\n")); - if (EXTRACT256(_counts_neg,0)) { - inquery[EXTRACT256(_masked,0)] = INQUERY_FALSE; - } - if (EXTRACT256(_counts_neg,1)) { - inquery[EXTRACT256(_masked,1)] = INQUERY_FALSE; + if (EXTRACT256(_counts,1)) { + masked = EXTRACT256(_masked,1); + if (counts[masked]) { /* Have to re-check if there is a conflict */ + debug(printf("Storing masked %u at %u\n",masked,chrpos - 17)); + table[positions[masked] + (--counts[masked])] = chrpos - 17; } - if (EXTRACT256(_counts_neg,2)) { - inquery[EXTRACT256(_masked,2)] = INQUERY_FALSE; + } + + if (EXTRACT256(_counts,2)) { + masked = EXTRACT256(_masked,2); + if (counts[masked]) { /* Have to re-check if there is a conflict */ + debug(printf("Storing masked %u at %u\n",masked,chrpos - 18)); + table[positions[masked] + (--counts[masked])] = chrpos - 18; } - if (EXTRACT256(_counts_neg,3)) { - inquery[EXTRACT256(_masked,3)] = INQUERY_FALSE; + } + + if (EXTRACT256(_counts,3)) { + masked = EXTRACT256(_masked,3); + if (counts[masked]) { /* Have to re-check if there is a conflict */ + debug(printf("Storing masked %u at %u\n",masked,chrpos - 19)); + table[positions[masked] + (--counts[masked])] = chrpos - 19; } - if (EXTRACT256(_counts_neg,4)) { - inquery[EXTRACT256(_masked,4)] = INQUERY_FALSE; + } + + if (EXTRACT256(_counts,4)) { + masked = EXTRACT256(_masked,4); + if (counts[masked]) { /* Have to re-check if there is a conflict */ + debug(printf("Storing masked %u at %u\n",masked,chrpos - 20)); + table[positions[masked] + (--counts[masked])] = chrpos - 20; } - if (EXTRACT256(_counts_neg,5)) { - inquery[EXTRACT256(_masked,5)] = INQUERY_FALSE; + } + + if (EXTRACT256(_counts,5)) { + masked = EXTRACT256(_masked,5); + if (counts[masked]) { /* Have to re-check if there is a conflict */ + debug(printf("Storing masked %u at %u\n",masked,chrpos - 21)); + table[positions[masked] + (--counts[masked])] = chrpos - 21; } } -#endif /* CHECK_FOR_OVERFLOW */ - _oligo = _mm256_srlv_epi32(_mm256_set1_epi32(high_rev),shift0to14); + _oligo = _mm256_srlv_epi32(_mm256_set1_epi32(high_rev),bigshift0to14); _masked = _mm256_and_si256(_oligo, bigmask7); - masked = EXTRACT256(_masked,0); - counts[masked] += 1; - debug(printf("9 %04X => %d\n",masked,counts[masked])); - - masked = EXTRACT256(_masked,1); - counts[masked] += 1; - debug(printf("8 %04X => %d\n",masked,counts[masked])); - - masked = EXTRACT256(_masked,2); - counts[masked] += 1; - debug(printf("7 %04X => %d\n",masked,counts[masked])); - - masked = EXTRACT256(_masked,3); - counts[masked] += 1; - debug(printf("6 %04X => %d\n",masked,counts[masked])); - - masked = EXTRACT256(_masked,4); - counts[masked] += 1; - debug(printf("5 %04X => %d\n",masked,counts[masked])); - - masked = EXTRACT256(_masked,5); - counts[masked] += 1; - debug(printf("4 %04X => %d\n",masked,counts[masked])); - - masked = EXTRACT256(_masked,6); - counts[masked] += 1; - debug(printf("3 %04X => %d\n",masked,counts[masked])); - - masked = EXTRACT256(_masked,7); - counts[masked] += 1; - debug(printf("2 %04X => %d\n",masked,counts[masked])); + _blocks = _mm256_srli_epi32(_masked,2); /* div by 4 bytes/int */ + _addresses = _mm256_and_si256(_masked,_address_mask); + _addresses = _mm256_slli_epi32(_addresses,3); /* Multiply by 8 bits/byte */ + _envelopes = _mm256_i32gather_epi32((const int *) counts,_blocks,/*scale*/4); + _counts = _mm256_srlv_epi32(_envelopes,_addresses); /* Puts byte on right */ + _counts = _mm256_and_si256(_counts,_count_mask); -#ifdef CHECK_FOR_OVERFLOW - _counts_after = _mm256_i32gather_epi32(counts,_masked,/*shift*/4); - /* _counts_after = _mm256_and_si256(_counts_after,low8); */ - _counts_neg = _mm256_cmpgt_epi32(_counts_after,maxcount256); - if (_mm256_testz_si256(_counts_neg,_counts_neg) == 0) { - debug(printf("Exceeded maximum counts\n")); - if (EXTRACT256(_counts_neg,0)) { - inquery[EXTRACT256(_masked,0)] = INQUERY_FALSE; + if (EXTRACT256(_counts,0)) { + masked = EXTRACT256(_masked,0); + if (counts[masked]) { /* Have to re-check if there is a conflict */ + debug(printf("Storing masked %u at %u\n",masked,chrpos - 22)); + table[positions[masked] + (--counts[masked])] = chrpos - 22; } - if (EXTRACT256(_counts_neg,1)) { - inquery[EXTRACT256(_masked,1)] = INQUERY_FALSE; + } + + if (EXTRACT256(_counts,1)) { + masked = EXTRACT256(_masked,1); + if (counts[masked]) { /* Have to re-check if there is a conflict */ + debug(printf("Storing masked %u at %u\n",masked,chrpos - 23)); + table[positions[masked] + (--counts[masked])] = chrpos - 23; } - if (EXTRACT256(_counts_neg,2)) { - inquery[EXTRACT256(_masked,2)] = INQUERY_FALSE; + } + + if (EXTRACT256(_counts,2)) { + masked = EXTRACT256(_masked,2); + if (counts[masked]) { /* Have to re-check if there is a conflict */ + debug(printf("Storing masked %u at %u\n",masked,chrpos - 24)); + table[positions[masked] + (--counts[masked])] = chrpos - 24; } - if (EXTRACT256(_counts_neg,3)) { - inquery[EXTRACT256(_masked,3)] = INQUERY_FALSE; + } + + if (EXTRACT256(_counts,3)) { + masked = EXTRACT256(_masked,3); + if (counts[masked]) { /* Have to re-check if there is a conflict */ + debug(printf("Storing masked %u at %u\n",masked,chrpos - 25)); + table[positions[masked] + (--counts[masked])] = chrpos - 25; } - if (EXTRACT256(_counts_neg,4)) { - inquery[EXTRACT256(_masked,4)] = INQUERY_FALSE; + } + + if (EXTRACT256(_counts,4)) { + masked = EXTRACT256(_masked,4); + if (counts[masked]) { /* Have to re-check if there is a conflict */ + debug(printf("Storing masked %u at %u\n",masked,chrpos - 26)); + table[positions[masked] + (--counts[masked])] = chrpos - 26; } - if (EXTRACT256(_counts_neg,5)) { - inquery[EXTRACT256(_masked,5)] = INQUERY_FALSE; + } + + if (EXTRACT256(_counts,5)) { + masked = EXTRACT256(_masked,5); + if (counts[masked]) { /* Have to re-check if there is a conflict */ + debug(printf("Storing masked %u at %u\n",masked,chrpos - 27)); + table[positions[masked] + (--counts[masked])] = chrpos - 27; } - if (EXTRACT256(_counts_neg,6)) { - inquery[EXTRACT256(_masked,6)] = INQUERY_FALSE; + } + + if (EXTRACT256(_counts,6)) { + masked = EXTRACT256(_masked,6); + if (counts[masked]) { /* Have to re-check if there is a conflict */ + debug(printf("Storing masked %u at %u\n",masked,chrpos - 28)); + table[positions[masked] + (--counts[masked])] = chrpos - 28; } - if (EXTRACT256(_counts_neg,7)) { - inquery[EXTRACT256(_masked,7)] = INQUERY_FALSE; + } + + if (EXTRACT256(_counts,7)) { + masked = EXTRACT256(_masked,7); + if (counts[masked]) { /* Have to re-check if there is a conflict */ + debug(printf("Storing masked %u at %u\n",masked,chrpos - 29)); + table[positions[masked] + (--counts[masked])] = chrpos - 29; } } -#endif /* CHECK_FOR_OVERFLOW */ + _oligo = _mm256_srli_epi32(_oligo, 16); _masked = _mm256_and_si256(_oligo, bigmask7); - masked = EXTRACT256(_masked,0); - INCR_COUNT(counts[masked],inquery[masked]); - debug(printf("1 %04X => %d\n",masked,counts[masked])); + _blocks = _mm256_srli_epi32(_masked,2); /* div by 4 bytes/int */ + _addresses = _mm256_and_si256(_masked,_address_mask); + _addresses = _mm256_slli_epi32(_addresses,3); /* Multiply by 8 bits/byte */ + _envelopes = _mm256_i32gather_epi32((const int *) counts,_blocks,/*scale*/4); + _counts = _mm256_srlv_epi32(_envelopes,_addresses); /* Puts byte on right */ + _counts = _mm256_and_si256(_counts,_count_mask); - masked = EXTRACT256(_masked,1); - INCR_COUNT(counts[masked],inquery[masked]); - debug(printf("0 %04X => %d\n",masked,counts[masked])); + if (EXTRACT256(_counts,0)) { + masked = EXTRACT256(_masked,0); + if (counts[masked]) { /* Have to re-check if there is a conflict */ + debug(printf("Storing masked %u at %u\n",masked,chrpos - 30)); + table[positions[masked] + (--counts[masked])] = chrpos - 30; + } + } - return; + if (EXTRACT256(_counts,1)) { + masked = EXTRACT256(_masked,1); + if (counts[masked]) { /* Have to re-check if there is a conflict */ + debug(printf("Storing masked %u at %u\n",masked,chrpos - 31)); + table[positions[masked] + (--counts[masked])] = chrpos - 31; + } + } + + return chrpos - 32; } #endif /* HAVE_AVX2 */ -/* Expecting current to have {high0_rev, low0_rev, high1_rev, - low1_rev}, and next to have {low0_rev, high1_rev, low1_rev, and - high2_rev} */ -#ifdef USE_SIMD_FOR_COUNTS +#if !defined(HAVE_AVX2) + static void -extract_7mers_fwd_simd (__m128i *out, __m128i current, __m128i next) { - __m128i oligo; +count_6mers_fwd_32 (Count_T *counts, Genomecomp_T high_rev, Genomecomp_T low_rev, Genomecomp_T nexthigh_rev) { + Genomecomp_T masked, oligo; +#ifdef INDIVIDUAL_SHIFTS +#elif defined(SIMD_MASK_THEN_STORE) + UINT4 _masked[4] __attribute__ ((aligned (16))); + __m128i _oligo; +#else + __m128i _oligo, _masked; +#endif - _mm_store_si128(out++, _mm_srli_epi32(current,18)); /* No mask necessary */ - _mm_store_si128(out++, _mm_and_si128( _mm_srli_epi32(current,16), mask7)); - _mm_store_si128(out++, _mm_and_si128( _mm_srli_epi32(current,14), mask7)); - _mm_store_si128(out++, _mm_and_si128( _mm_srli_epi32(current,12), mask7)); - _mm_store_si128(out++, _mm_and_si128( _mm_srli_epi32(current,10), mask7)); - _mm_store_si128(out++, _mm_and_si128( _mm_srli_epi32(current,8), mask7)); - _mm_store_si128(out++, _mm_and_si128( _mm_srli_epi32(current,6), mask7)); - _mm_store_si128(out++, _mm_and_si128( _mm_srli_epi32(current,4), mask7)); - _mm_store_si128(out++, _mm_and_si128( _mm_srli_epi32(current,2), mask7)); - _mm_store_si128(out++, _mm_and_si128( current, mask7)); - oligo = _mm_or_si128( _mm_srli_epi32(next,20), _mm_slli_epi32(current,12)); - _mm_store_si128(out++, _mm_and_si128( _mm_srli_epi32(oligo,10), mask7)); - _mm_store_si128(out++, _mm_and_si128( _mm_srli_epi32(oligo,8), mask7)); - _mm_store_si128(out++, _mm_and_si128( _mm_srli_epi32(oligo,6), mask7)); - _mm_store_si128(out++, _mm_and_si128( _mm_srli_epi32(oligo,4), mask7)); - _mm_store_si128(out++, _mm_and_si128( _mm_srli_epi32(oligo,2), mask7)); - _mm_store_si128(out++, _mm_and_si128( oligo, mask7)); + oligo = nexthigh_rev >> 22; /* For 31..27 */ + oligo |= low_rev << 10; - return; -} +#ifdef INDIVIDUAL_SHIFTS + masked = oligo & MASK6; /* 31 */ + INCR_COUNT(counts[masked]); + debug(printf("31 %04X => %d\n",masked,counts[masked])); -#ifdef HAVE_AVX2 -static void -extract_7mers_fwd_simd_128 (__m256i *out, __m256i current, __m256i next) { - __m256i oligo; + masked = (oligo >> 2) & MASK6; /* 30 */ + INCR_COUNT(counts[masked]); + debug(printf("30 %04X => %d\n",masked,counts[masked])); - _mm256_store_si256(out++, _mm256_srli_epi32(current,18)); /* No mask necessary */ - _mm256_store_si256(out++, _mm256_and_si256( _mm256_srli_epi32(current,16), bigmask7)); - _mm256_store_si256(out++, _mm256_and_si256( _mm256_srli_epi32(current,14), bigmask7)); - _mm256_store_si256(out++, _mm256_and_si256( _mm256_srli_epi32(current,12), bigmask7)); - _mm256_store_si256(out++, _mm256_and_si256( _mm256_srli_epi32(current,10), bigmask7)); - _mm256_store_si256(out++, _mm256_and_si256( _mm256_srli_epi32(current,8), bigmask7)); - _mm256_store_si256(out++, _mm256_and_si256( _mm256_srli_epi32(current,6), bigmask7)); - _mm256_store_si256(out++, _mm256_and_si256( _mm256_srli_epi32(current,4), bigmask7)); - _mm256_store_si256(out++, _mm256_and_si256( _mm256_srli_epi32(current,2), bigmask7)); - _mm256_store_si256(out++, _mm256_and_si256( current, bigmask7)); + masked = (oligo >> 4) & MASK6; /* 29 */ + INCR_COUNT(counts[masked]); + debug(printf("29 %04X => %d\n",masked,counts[masked])); - oligo = _mm256_or_si256( _mm256_srli_epi32(next,20), _mm256_slli_epi32(current,12)); - _mm256_store_si256(out++, _mm256_and_si256( _mm256_srli_epi32(oligo,10), bigmask7)); - _mm256_store_si256(out++, _mm256_and_si256( _mm256_srli_epi32(oligo,8), bigmask7)); - _mm256_store_si256(out++, _mm256_and_si256( _mm256_srli_epi32(oligo,6), bigmask7)); - _mm256_store_si256(out++, _mm256_and_si256( _mm256_srli_epi32(oligo,4), bigmask7)); - _mm256_store_si256(out++, _mm256_and_si256( _mm256_srli_epi32(oligo,2), bigmask7)); - _mm256_store_si256(out++, _mm256_and_si256( oligo, bigmask7)); + masked = (oligo >> 6) & MASK6; /* 28 */ + INCR_COUNT(counts[masked]); + debug(printf("28 %04X => %d\n",masked,counts[masked])); - return; -} -#endif + masked = (oligo >> 8) & MASK6; /* 27 */ + INCR_COUNT(counts[masked]); + debug(printf("27 %04X => %d\n",masked,counts[masked])); -static void -count_7mers_fwd_simd (Count_T *counts, Inquery_T *inquery, __m128i current, __m128i next) { - __m128i oligo; -#ifdef HAVE_SSE4_1 - __m128i array; #else - Genomecomp_T array[4]; -#endif -#if defined(HAVE_AVX2) && defined(CHECK_FOR_OVERFLOW) - __m128i _counts_after, _counts_neg; -#endif - -#ifdef HAVE_SSE4_1 - array = _mm_srli_epi32(current,18); /* No mask necessary */ + _oligo = _mm_setr_epi32(oligo, oligo >> 2, oligo >> 4, oligo >> 6); +#ifdef SIMD_MASK_THEN_STORE + _mm_store_si128((__m128i *) _masked,_mm_and_si128(_oligo, mask6)); #else - _mm_store_si128((__m128i *) array, _mm_srli_epi32(current,18)); /* No mask necessary */ + _masked = _mm_and_si128(_oligo, mask6); #endif -#ifdef HAVE_AVX2 - counts[EXTRACT(array,0)] += 1; - counts[EXTRACT(array,1)] += 1; - counts[EXTRACT(array,2)] += 1; - counts[EXTRACT(array,3)] += 1; -#ifdef CHECK_FOR_OVERFLOW - _counts_after = _mm_i32gather_epi32(counts,array,/*scale*/4); - _counts_neg = _mm_cmpgt_epi32(_counts_after,maxcount128); - if (_mm_testz_si128(_counts_neg,_counts_neg) == 0) { - debug(printf("Exceeded maximum counts\n")); - if (EXTRACT(_counts_neg,0)) { - inquery[EXTRACT(array,0)] = INQUERY_FALSE; - } - if (EXTRACT(_counts_neg,1)) { - inquery[EXTRACT(array,1)] = INQUERY_FALSE; - } - if (EXTRACT(_counts_neg,2)) { - inquery[EXTRACT(array,2)] = INQUERY_FALSE; - } - if (EXTRACT(_counts_neg,3)) { - inquery[EXTRACT(array,3)] = INQUERY_FALSE; - } - } -#endif /* CHECK_FOR_OVERFLOW */ - debug(printf("0 %04X => %d\n",array[0],counts[array[0]])); - debug(printf("16 %04X => %d\n",array[1],counts[array[1]])); - debug(printf("32 %04X => %d\n",array[2],counts[array[2]])); - debug(printf("48 %04X => %d\n",array[3],counts[array[3]])); - -#else - INCR_COUNT(counts[EXTRACT(array,0)],inquery[EXTRACT(array,0)]); /* 0 */ - INCR_COUNT(counts[EXTRACT(array,1)],inquery[EXTRACT(array,1)]); /* 16 */ - INCR_COUNT(counts[EXTRACT(array,2)],inquery[EXTRACT(array,2)]); /* 32 */ - INCR_COUNT(counts[EXTRACT(array,3)],inquery[EXTRACT(array,3)]); /* 48 */ -#endif - debug(printf("0 %04X => %d\n",EXTRACT(array,0),counts[EXTRACT(array,0)])); - debug(printf("16 %04X => %d\n",EXTRACT(array,1),counts[EXTRACT(array,1)])); - debug(printf("32 %04X => %d\n",EXTRACT(array,2),counts[EXTRACT(array,2)])); - debug(printf("48 %04X => %d\n",EXTRACT(array,3),counts[EXTRACT(array,3)])); -#ifdef HAVE_SSE4_1 - array = _mm_and_si128( _mm_srli_epi32(current,16), mask7); -#else - _mm_store_si128((__m128i *) array, _mm_and_si128( _mm_srli_epi32(current,16), mask7)); -#endif -#ifdef HAVE_AVX2 - counts[EXTRACT(array,0)] += 1; - counts[EXTRACT(array,1)] += 1; - counts[EXTRACT(array,2)] += 1; - counts[EXTRACT(array,3)] += 1; -#ifdef CHECK_FOR_OVERFLOW - _counts_after = _mm_i32gather_epi32(counts,array,/*scale*/4); - _counts_neg = _mm_cmpgt_epi32(_counts_after,maxcount128); - if (_mm_testz_si128(_counts_neg,_counts_neg) == 0) { - debug(printf("Exceeded maximum counts\n")); - if (EXTRACT(_counts_neg,0)) { - inquery[EXTRACT(array,0)] = INQUERY_FALSE; - } - if (EXTRACT(_counts_neg,1)) { - inquery[EXTRACT(array,1)] = INQUERY_FALSE; - } - if (EXTRACT(_counts_neg,2)) { - inquery[EXTRACT(array,2)] = INQUERY_FALSE; - } - if (EXTRACT(_counts_neg,3)) { - inquery[EXTRACT(array,3)] = INQUERY_FALSE; - } - } -#endif /* CHECK_FOR_OVERFLOW */ - debug(printf("1 %04X => %d\n",array[0],counts[array[0]])); - debug(printf("17 %04X => %d\n",array[1],counts[array[1]])); - debug(printf("33 %04X => %d\n",array[2],counts[array[2]])); - debug(printf("49 %04X => %d\n",array[3],counts[array[3]])); - -#else - INCR_COUNT(counts[EXTRACT(array,0)],inquery[EXTRACT(array,0)]); /* 1 */ - INCR_COUNT(counts[EXTRACT(array,1)],inquery[EXTRACT(array,1)]); /* 17 */ - INCR_COUNT(counts[EXTRACT(array,2)],inquery[EXTRACT(array,2)]); /* 33 */ - INCR_COUNT(counts[EXTRACT(array,3)],inquery[EXTRACT(array,3)]); /* 49 */ -#endif - debug(printf("1 %04X => %d\n",EXTRACT(array,0),counts[EXTRACT(array,0)])); - debug(printf("17 %04X => %d\n",EXTRACT(array,1),counts[EXTRACT(array,1)])); - debug(printf("33 %04X => %d\n",EXTRACT(array,2),counts[EXTRACT(array,2)])); - debug(printf("49 %04X => %d\n",EXTRACT(array,3),counts[EXTRACT(array,3)])); + masked = EXTRACT(_masked,0); + INCR_COUNT(counts[masked]); + debug(printf("31 %04X => %d\n",masked,counts[masked])); -#ifdef HAVE_SSE4_1 - array = _mm_and_si128( _mm_srli_epi32(current,14), mask7); -#else - _mm_store_si128((__m128i *) array, _mm_and_si128( _mm_srli_epi32(current,14), mask7)); -#endif -#ifdef HAVE_AVX2 - counts[EXTRACT(array,0)] += 1; - counts[EXTRACT(array,1)] += 1; - counts[EXTRACT(array,2)] += 1; - counts[EXTRACT(array,3)] += 1; -#ifdef CHECK_FOR_OVERFLOW - _counts_after = _mm_i32gather_epi32(counts,array,/*scale*/4); - _counts_neg = _mm_cmpgt_epi32(_counts_after,maxcount128); - if (_mm_testz_si128(_counts_neg,_counts_neg) == 0) { - debug(printf("Exceeded maximum counts\n")); - if (EXTRACT(_counts_neg,0)) { - inquery[EXTRACT(array,0)] = INQUERY_FALSE; - } - if (EXTRACT(_counts_neg,1)) { - inquery[EXTRACT(array,1)] = INQUERY_FALSE; - } - if (EXTRACT(_counts_neg,2)) { - inquery[EXTRACT(array,2)] = INQUERY_FALSE; - } - if (EXTRACT(_counts_neg,3)) { - inquery[EXTRACT(array,3)] = INQUERY_FALSE; - } - } -#endif /* CHECK_FOR_OVERFLOW */ - debug(printf("2 %04X => %d\n",array[0],counts[array[0]])); - debug(printf("18 %04X => %d\n",array[1],counts[array[1]])); - debug(printf("34 %04X => %d\n",array[2],counts[array[2]])); - debug(printf("50 %04X => %d\n",array[3],counts[array[3]])); - -#else - INCR_COUNT(counts[EXTRACT(array,0)],inquery[EXTRACT(array,0)]); /* 2 */ - INCR_COUNT(counts[EXTRACT(array,1)],inquery[EXTRACT(array,1)]); /* 18 */ - INCR_COUNT(counts[EXTRACT(array,2)],inquery[EXTRACT(array,2)]); /* 34 */ - INCR_COUNT(counts[EXTRACT(array,3)],inquery[EXTRACT(array,3)]); /* 50 */ -#endif - debug(printf("2 %04X => %d\n",EXTRACT(array,0),counts[EXTRACT(array,0)])); - debug(printf("18 %04X => %d\n",EXTRACT(array,1),counts[EXTRACT(array,1)])); - debug(printf("34 %04X => %d\n",EXTRACT(array,2),counts[EXTRACT(array,2)])); - debug(printf("50 %04X => %d\n",EXTRACT(array,3),counts[EXTRACT(array,3)])); + masked = EXTRACT(_masked,1); + INCR_COUNT(counts[masked]); + debug(printf("30 %04X => %d\n",masked,counts[masked])); -#ifdef HAVE_SSE4_1 - array = _mm_and_si128( _mm_srli_epi32(current,12), mask7); -#else - _mm_store_si128((__m128i *) array, _mm_and_si128( _mm_srli_epi32(current,12), mask7)); -#endif -#ifdef HAVE_AVX2 - counts[EXTRACT(array,0)] += 1; - counts[EXTRACT(array,1)] += 1; - counts[EXTRACT(array,2)] += 1; - counts[EXTRACT(array,3)] += 1; -#ifdef CHECK_FOR_OVERFLOW - _counts_after = _mm_i32gather_epi32(counts,array,/*scale*/4); - _counts_neg = _mm_cmpgt_epi32(_counts_after,maxcount128); - if (_mm_testz_si128(_counts_neg,_counts_neg) == 0) { - debug(printf("Exceeded maximum counts\n")); - if (EXTRACT(_counts_neg,0)) { - inquery[EXTRACT(array,0)] = INQUERY_FALSE; - } - if (EXTRACT(_counts_neg,1)) { - inquery[EXTRACT(array,1)] = INQUERY_FALSE; - } - if (EXTRACT(_counts_neg,2)) { - inquery[EXTRACT(array,2)] = INQUERY_FALSE; - } - if (EXTRACT(_counts_neg,3)) { - inquery[EXTRACT(array,3)] = INQUERY_FALSE; - } - } -#endif /* CHECK_FOR_OVERFLOW */ - debug(printf("3 %04X => %d\n",array[0],counts[array[0]])); - debug(printf("19 %04X => %d\n",array[1],counts[array[1]])); - debug(printf("35 %04X => %d\n",array[2],counts[array[2]])); - debug(printf("51 %04X => %d\n",array[3],counts[array[3]])); - -#else - INCR_COUNT(counts[EXTRACT(array,0)],inquery[EXTRACT(array,0)]); /* 3 */ - INCR_COUNT(counts[EXTRACT(array,1)],inquery[EXTRACT(array,1)]); /* 19 */ - INCR_COUNT(counts[EXTRACT(array,2)],inquery[EXTRACT(array,2)]); /* 35 */ - INCR_COUNT(counts[EXTRACT(array,3)],inquery[EXTRACT(array,3)]); /* 51 */ -#endif - debug(printf("3 %04X => %d\n",EXTRACT(array,0),counts[EXTRACT(array,0)])); - debug(printf("19 %04X => %d\n",EXTRACT(array,1),counts[EXTRACT(array,1)])); - debug(printf("35 %04X => %d\n",EXTRACT(array,2),counts[EXTRACT(array,2)])); - debug(printf("51 %04X => %d\n",EXTRACT(array,3),counts[EXTRACT(array,3)])); + masked = EXTRACT(_masked,2); + INCR_COUNT(counts[masked]); + debug(printf("29 %04X => %d\n",masked,counts[masked])); -#ifdef HAVE_SSE4_1 - array = _mm_and_si128( _mm_srli_epi32(current,10), mask7); -#else - _mm_store_si128((__m128i *) array, _mm_and_si128( _mm_srli_epi32(current,10), mask7)); -#endif -#ifdef HAVE_AVX2 - counts[EXTRACT(array,0)] += 1; - counts[EXTRACT(array,1)] += 1; - counts[EXTRACT(array,2)] += 1; - counts[EXTRACT(array,3)] += 1; -#ifdef CHECK_FOR_OVERFLOW - _counts_after = _mm_i32gather_epi32(counts,array,/*scale*/4); - _counts_neg = _mm_cmpgt_epi32(_counts_after,maxcount128); - if (_mm_testz_si128(_counts_neg,_counts_neg) == 0) { - debug(printf("Exceeded maximum counts\n")); - if (EXTRACT(_counts_neg,0)) { - inquery[EXTRACT(array,0)] = INQUERY_FALSE; - } - if (EXTRACT(_counts_neg,1)) { - inquery[EXTRACT(array,1)] = INQUERY_FALSE; - } - if (EXTRACT(_counts_neg,2)) { - inquery[EXTRACT(array,2)] = INQUERY_FALSE; - } - if (EXTRACT(_counts_neg,3)) { - inquery[EXTRACT(array,3)] = INQUERY_FALSE; - } - } -#endif /* CHECK_FOR_OVERFLOW */ - debug(printf("4 %04X => %d\n",array[0],counts[array[0]])); - debug(printf("20 %04X => %d\n",array[1],counts[array[1]])); - debug(printf("36 %04X => %d\n",array[2],counts[array[2]])); - debug(printf("52 %04X => %d\n",array[3],counts[array[3]])); - -#else - INCR_COUNT(counts[EXTRACT(array,0)],inquery[EXTRACT(array,0)]); /* 4 */ - INCR_COUNT(counts[EXTRACT(array,1)],inquery[EXTRACT(array,1)]); /* 20 */ - INCR_COUNT(counts[EXTRACT(array,2)],inquery[EXTRACT(array,2)]); /* 36 */ - INCR_COUNT(counts[EXTRACT(array,3)],inquery[EXTRACT(array,3)]); /* 52 */ -#endif - debug(printf("4 %04X => %d\n",EXTRACT(array,0),counts[EXTRACT(array,0)])); - debug(printf("20 %04X => %d\n",EXTRACT(array,1),counts[EXTRACT(array,1)])); - debug(printf("36 %04X => %d\n",EXTRACT(array,2),counts[EXTRACT(array,2)])); - debug(printf("52 %04X => %d\n",EXTRACT(array,3),counts[EXTRACT(array,3)])); + masked = EXTRACT(_masked,3); + INCR_COUNT(counts[masked]); + debug(printf("28 %04X => %d\n",masked,counts[masked])); -#ifdef HAVE_SSE4_1 - array = _mm_and_si128( _mm_srli_epi32(current,8), mask7); -#else - _mm_store_si128((__m128i *) array, _mm_and_si128( _mm_srli_epi32(current,8), mask7)); -#endif -#ifdef HAVE_AVX2 - counts[EXTRACT(array,0)] += 1; - counts[EXTRACT(array,1)] += 1; - counts[EXTRACT(array,2)] += 1; - counts[EXTRACT(array,3)] += 1; -#ifdef CHECK_FOR_OVERFLOW - _counts_after = _mm_i32gather_epi32(counts,array,/*scale*/4); - _counts_neg = _mm_cmpgt_epi32(_counts_after,maxcount128); - if (_mm_testz_si128(_counts_neg,_counts_neg) == 0) { - debug(printf("Exceeded maximum counts\n")); - if (EXTRACT(_counts_neg,0)) { - inquery[EXTRACT(array,0)] = INQUERY_FALSE; - } - if (EXTRACT(_counts_neg,1)) { - inquery[EXTRACT(array,1)] = INQUERY_FALSE; - } - if (EXTRACT(_counts_neg,2)) { - inquery[EXTRACT(array,2)] = INQUERY_FALSE; - } - if (EXTRACT(_counts_neg,3)) { - inquery[EXTRACT(array,3)] = INQUERY_FALSE; - } - } -#endif /* CHECK_FOR_OVERFLOW */ - debug(printf("5 %04X => %d\n",array[0],counts[array[0]])); - debug(printf("21 %04X => %d\n",array[1],counts[array[1]])); - debug(printf("37 %04X => %d\n",array[2],counts[array[2]])); - debug(printf("53 %04X => %d\n",array[3],counts[array[3]])); - -#else - INCR_COUNT(counts[EXTRACT(array,0)],inquery[EXTRACT(array,0)]); /* 5 */ - INCR_COUNT(counts[EXTRACT(array,1)],inquery[EXTRACT(array,1)]); /* 21 */ - INCR_COUNT(counts[EXTRACT(array,2)],inquery[EXTRACT(array,2)]); /* 37 */ - INCR_COUNT(counts[EXTRACT(array,3)],inquery[EXTRACT(array,3)]); /* 53 */ -#endif - debug(printf("5 %04X => %d\n",EXTRACT(array,0),counts[EXTRACT(array,0)])); - debug(printf("21 %04X => %d\n",EXTRACT(array,1),counts[EXTRACT(array,1)])); - debug(printf("37 %04X => %d\n",EXTRACT(array,2),counts[EXTRACT(array,2)])); - debug(printf("53 %04X => %d\n",EXTRACT(array,3),counts[EXTRACT(array,3)])); -#ifdef HAVE_SSE4_1 - array = _mm_and_si128( _mm_srli_epi32(current,6), mask7); -#else - _mm_store_si128((__m128i *) array, _mm_and_si128( _mm_srli_epi32(current,6), mask7)); + masked = (oligo >> 8) & MASK6; /* 27 */ + INCR_COUNT(counts[masked]); + debug(printf("27 %04X => %d\n",masked,counts[masked])); #endif -#ifdef HAVE_AVX2 - counts[EXTRACT(array,0)] += 1; - counts[EXTRACT(array,1)] += 1; - counts[EXTRACT(array,2)] += 1; - counts[EXTRACT(array,3)] += 1; -#ifdef CHECK_FOR_OVERFLOW - _counts_after = _mm_i32gather_epi32(counts,array,/*scale*/4); - _counts_neg = _mm_cmpgt_epi32(_counts_after,maxcount128); - if (_mm_testz_si128(_counts_neg,_counts_neg) == 0) { - debug(printf("Exceeded maximum counts\n")); - if (EXTRACT(_counts_neg,0)) { - inquery[EXTRACT(array,0)] = INQUERY_FALSE; - } - if (EXTRACT(_counts_neg,1)) { - inquery[EXTRACT(array,1)] = INQUERY_FALSE; - } - if (EXTRACT(_counts_neg,2)) { - inquery[EXTRACT(array,2)] = INQUERY_FALSE; - } - if (EXTRACT(_counts_neg,3)) { - inquery[EXTRACT(array,3)] = INQUERY_FALSE; - } - } -#endif /* CHECK_FOR_OVERFLOW */ - debug(printf("6 %04X => %d\n",array[0],counts[array[0]])); - debug(printf("22 %04X => %d\n",array[1],counts[array[1]])); - debug(printf("38 %04X => %d\n",array[2],counts[array[2]])); - debug(printf("54 %04X => %d\n",array[3],counts[array[3]])); - -#else - INCR_COUNT(counts[EXTRACT(array,0)],inquery[EXTRACT(array,0)]); /* 6 */ - INCR_COUNT(counts[EXTRACT(array,1)],inquery[EXTRACT(array,1)]); /* 22 */ - INCR_COUNT(counts[EXTRACT(array,2)],inquery[EXTRACT(array,2)]); /* 38 */ - INCR_COUNT(counts[EXTRACT(array,3)],inquery[EXTRACT(array,3)]); /* 54 */ -#endif - debug(printf("6 %04X => %d\n",EXTRACT(array,0),counts[EXTRACT(array,0)])); - debug(printf("22 %04X => %d\n",EXTRACT(array,1),counts[EXTRACT(array,1)])); - debug(printf("38 %04X => %d\n",EXTRACT(array,2),counts[EXTRACT(array,2)])); - debug(printf("54 %04X => %d\n",EXTRACT(array,3),counts[EXTRACT(array,3)])); -#ifdef HAVE_SSE4_1 - array = _mm_and_si128( _mm_srli_epi32(current,4), mask7); -#else - _mm_store_si128((__m128i *) array, _mm_and_si128( _mm_srli_epi32(current,4), mask7)); -#endif -#ifdef HAVE_AVX2 - counts[EXTRACT(array,0)] += 1; - counts[EXTRACT(array,1)] += 1; - counts[EXTRACT(array,2)] += 1; - counts[EXTRACT(array,3)] += 1; -#ifdef CHECK_FOR_OVERFLOW - _counts_after = _mm_i32gather_epi32(counts,array,/*scale*/4); - _counts_neg = _mm_cmpgt_epi32(_counts_after,maxcount128); - if (_mm_testz_si128(_counts_neg,_counts_neg) == 0) { - debug(printf("Exceeded maximum counts\n")); - if (EXTRACT(_counts_neg,0)) { - inquery[EXTRACT(array,0)] = INQUERY_FALSE; - } - if (EXTRACT(_counts_neg,1)) { - inquery[EXTRACT(array,1)] = INQUERY_FALSE; - } - if (EXTRACT(_counts_neg,2)) { - inquery[EXTRACT(array,2)] = INQUERY_FALSE; - } - if (EXTRACT(_counts_neg,3)) { - inquery[EXTRACT(array,3)] = INQUERY_FALSE; - } - } -#endif /* CHECK_FOR_OVERFLOW */ - debug(printf("7 %04X => %d\n",array[0],counts[array[0]])); - debug(printf("23 %04X => %d\n",array[1],counts[array[1]])); - debug(printf("39 %04X => %d\n",array[2],counts[array[2]])); - debug(printf("55 %04X => %d\n",array[3],counts[array[3]])); - -#else - INCR_COUNT(counts[EXTRACT(array,0)],inquery[EXTRACT(array,0)]); /* 7 */ - INCR_COUNT(counts[EXTRACT(array,1)],inquery[EXTRACT(array,1)]); /* 23 */ - INCR_COUNT(counts[EXTRACT(array,2)],inquery[EXTRACT(array,2)]); /* 39 */ - INCR_COUNT(counts[EXTRACT(array,3)],inquery[EXTRACT(array,3)]); /* 55 */ -#endif - debug(printf("7 %04X => %d\n",EXTRACT(array,0),counts[EXTRACT(array,0)])); - debug(printf("23 %04X => %d\n",EXTRACT(array,1),counts[EXTRACT(array,1)])); - debug(printf("39 %04X => %d\n",EXTRACT(array,2),counts[EXTRACT(array,2)])); - debug(printf("55 %04X => %d\n",EXTRACT(array,3),counts[EXTRACT(array,3)])); -#ifdef HAVE_SSE4_1 - array = _mm_and_si128( _mm_srli_epi32(current,2), mask7); -#else - _mm_store_si128((__m128i *) array, _mm_and_si128( _mm_srli_epi32(current,2), mask7)); -#endif -#ifdef HAVE_AVX2 - counts[EXTRACT(array,0)] += 1; - counts[EXTRACT(array,1)] += 1; - counts[EXTRACT(array,2)] += 1; - counts[EXTRACT(array,3)] += 1; -#ifdef CHECK_FOR_OVERFLOW - _counts_after = _mm_i32gather_epi32(counts,array,/*scale*/4); - _counts_neg = _mm_cmpgt_epi32(_counts_after,maxcount128); - if (_mm_testz_si128(_counts_neg,_counts_neg) == 0) { - debug(printf("Exceeded maximum counts\n")); - if (EXTRACT(_counts_neg,0)) { - inquery[EXTRACT(array,0)] = INQUERY_FALSE; - } - if (EXTRACT(_counts_neg,1)) { - inquery[EXTRACT(array,1)] = INQUERY_FALSE; - } - if (EXTRACT(_counts_neg,2)) { - inquery[EXTRACT(array,2)] = INQUERY_FALSE; - } - if (EXTRACT(_counts_neg,3)) { - inquery[EXTRACT(array,3)] = INQUERY_FALSE; - } - } -#endif /* CHECK_FOR_OVERFLOW */ - debug(printf("8 %04X => %d\n",array[0],counts[array[0]])); - debug(printf("24 %04X => %d\n",array[1],counts[array[1]])); - debug(printf("40 %04X => %d\n",array[2],counts[array[2]])); - debug(printf("56 %04X => %d\n",array[3],counts[array[3]])); - -#else - INCR_COUNT(counts[EXTRACT(array,0)],inquery[EXTRACT(array,0)]); /* 8 */ - INCR_COUNT(counts[EXTRACT(array,1)],inquery[EXTRACT(array,1)]); /* 24 */ - INCR_COUNT(counts[EXTRACT(array,2)],inquery[EXTRACT(array,2)]); /* 40 */ - INCR_COUNT(counts[EXTRACT(array,3)],inquery[EXTRACT(array,3)]); /* 56 */ -#endif - debug(printf("8 %04X => %d\n",EXTRACT(array,0),counts[EXTRACT(array,0)])); - debug(printf("24 %04X => %d\n",EXTRACT(array,1),counts[EXTRACT(array,1)])); - debug(printf("40 %04X => %d\n",EXTRACT(array,2),counts[EXTRACT(array,2)])); - debug(printf("56 %04X => %d\n",EXTRACT(array,3),counts[EXTRACT(array,3)])); +#ifdef INDIVIDUAL_SHIFTS + masked = low_rev & MASK6; /* 26 */ + INCR_COUNT(counts[masked]); + debug(printf("26 %04X => %d\n",masked,counts[masked])); -#ifdef HAVE_SSE4_1 - array = _mm_and_si128( current, mask7); -#else - _mm_store_si128((__m128i *) array, _mm_and_si128( current, mask7)); -#endif -#ifdef HAVE_AVX2 - counts[EXTRACT(array,0)] += 1; - counts[EXTRACT(array,1)] += 1; - counts[EXTRACT(array,2)] += 1; - counts[EXTRACT(array,3)] += 1; -#ifdef CHECK_FOR_OVERFLOW - _counts_after = _mm_i32gather_epi32(counts,array,/*scale*/4); - _counts_neg = _mm_cmpgt_epi32(_counts_after,maxcount128); - if (_mm_testz_si128(_counts_neg,_counts_neg) == 0) { - debug(printf("Exceeded maximum counts\n")); - if (EXTRACT(_counts_neg,0)) { - inquery[EXTRACT(array,0)] = INQUERY_FALSE; - } - if (EXTRACT(_counts_neg,1)) { - inquery[EXTRACT(array,1)] = INQUERY_FALSE; - } - if (EXTRACT(_counts_neg,2)) { - inquery[EXTRACT(array,2)] = INQUERY_FALSE; - } - if (EXTRACT(_counts_neg,3)) { - inquery[EXTRACT(array,3)] = INQUERY_FALSE; - } - } -#endif /* CHECK_FOR_OVERFLOW */ - debug(printf("9 %04X => %d\n",array[0],counts[array[0]])); - debug(printf("25 %04X => %d\n",array[1],counts[array[1]])); - debug(printf("41 %04X => %d\n",array[2],counts[array[2]])); - debug(printf("57 %04X => %d\n",array[3],counts[array[3]])); - -#else - INCR_COUNT(counts[EXTRACT(array,0)],inquery[EXTRACT(array,0)]); /* 9 */ - INCR_COUNT(counts[EXTRACT(array,1)],inquery[EXTRACT(array,1)]); /* 25 */ - INCR_COUNT(counts[EXTRACT(array,2)],inquery[EXTRACT(array,2)]); /* 41 */ - INCR_COUNT(counts[EXTRACT(array,3)],inquery[EXTRACT(array,3)]); /* 57 */ -#endif - debug(printf("9 %04X => %d\n",EXTRACT(array,0),counts[EXTRACT(array,0)])); - debug(printf("25 %04X => %d\n",EXTRACT(array,1),counts[EXTRACT(array,1)])); - debug(printf("41 %04X => %d\n",EXTRACT(array,2),counts[EXTRACT(array,2)])); - debug(printf("57 %04X => %d\n",EXTRACT(array,3),counts[EXTRACT(array,3)])); + masked = (low_rev >> 2) & MASK6; /* 25 */ + INCR_COUNT(counts[masked]); + debug(printf("25 %04X => %d\n",masked,counts[masked])); + masked = (low_rev >> 4) & MASK6; /* 24 */ + INCR_COUNT(counts[masked]); + debug(printf("24 %04X => %d\n",masked,counts[masked])); - oligo = _mm_or_si128( _mm_srli_epi32(next,20), _mm_slli_epi32(current,12)); -#ifdef HAVE_SSE4_1 - array = _mm_and_si128( _mm_srli_epi32(oligo,10), mask7); -#else - _mm_store_si128((__m128i *) array, _mm_and_si128( _mm_srli_epi32(oligo,10), mask7)); -#endif -#ifdef HAVE_AVX2 - counts[EXTRACT(array,0)] += 1; - counts[EXTRACT(array,1)] += 1; - counts[EXTRACT(array,2)] += 1; - counts[EXTRACT(array,3)] += 1; -#ifdef CHECK_FOR_OVERFLOW - _counts_after = _mm_i32gather_epi32(counts,array,/*scale*/4); - _counts_neg = _mm_cmpgt_epi32(_counts_after,maxcount128); - if (_mm_testz_si128(_counts_neg,_counts_neg) == 0) { - debug(printf("Exceeded maximum counts\n")); - if (EXTRACT(_counts_neg,0)) { - inquery[EXTRACT(array,0)] = INQUERY_FALSE; - } - if (EXTRACT(_counts_neg,1)) { - inquery[EXTRACT(array,1)] = INQUERY_FALSE; - } - if (EXTRACT(_counts_neg,2)) { - inquery[EXTRACT(array,2)] = INQUERY_FALSE; - } - if (EXTRACT(_counts_neg,3)) { - inquery[EXTRACT(array,3)] = INQUERY_FALSE; - } - } -#endif /* CHECK_FOR_OVERFLOW */ - debug(printf("10 %04X => %d\n",array[0],counts[array[0]])); - debug(printf("26 %04X => %d\n",array[1],counts[array[1]])); - debug(printf("42 %04X => %d\n",array[2],counts[array[2]])); - debug(printf("58 %04X => %d\n",array[3],counts[array[3]])); - -#else - INCR_COUNT(counts[EXTRACT(array,0)],inquery[EXTRACT(array,0)]); /* 10 */ - INCR_COUNT(counts[EXTRACT(array,1)],inquery[EXTRACT(array,1)]); /* 26 */ - INCR_COUNT(counts[EXTRACT(array,2)],inquery[EXTRACT(array,2)]); /* 42 */ - INCR_COUNT(counts[EXTRACT(array,3)],inquery[EXTRACT(array,3)]); /* 58 */ -#endif - debug(printf("10 %04X => %d\n",EXTRACT(array,0),counts[EXTRACT(array,0)])); - debug(printf("26 %04X => %d\n",EXTRACT(array,1),counts[EXTRACT(array,1)])); - debug(printf("42 %04X => %d\n",EXTRACT(array,2),counts[EXTRACT(array,2)])); - debug(printf("58 %04X => %d\n",EXTRACT(array,3),counts[EXTRACT(array,3)])); + masked = (low_rev >> 6) & MASK6; /* 23 */ + INCR_COUNT(counts[masked]); + debug(printf("23 %04X => %d\n",masked,counts[masked])); -#ifdef HAVE_SSE4_1 - array = _mm_and_si128( _mm_srli_epi32(oligo,8), mask7); -#else - _mm_store_si128((__m128i *) array, _mm_and_si128( _mm_srli_epi32(oligo,8), mask7)); -#endif -#ifdef HAVE_AVX2 - counts[EXTRACT(array,0)] += 1; - counts[EXTRACT(array,1)] += 1; - counts[EXTRACT(array,2)] += 1; - counts[EXTRACT(array,3)] += 1; -#ifdef CHECK_FOR_OVERFLOW - _counts_after = _mm_i32gather_epi32(counts,array,/*scale*/4); - _counts_neg = _mm_cmpgt_epi32(_counts_after,maxcount128); - if (_mm_testz_si128(_counts_neg,_counts_neg) == 0) { - debug(printf("Exceeded maximum counts\n")); - if (EXTRACT(_counts_neg,0)) { - inquery[EXTRACT(array,0)] = INQUERY_FALSE; - } - if (EXTRACT(_counts_neg,1)) { - inquery[EXTRACT(array,1)] = INQUERY_FALSE; - } - if (EXTRACT(_counts_neg,2)) { - inquery[EXTRACT(array,2)] = INQUERY_FALSE; - } - if (EXTRACT(_counts_neg,3)) { - inquery[EXTRACT(array,3)] = INQUERY_FALSE; - } - } -#endif /* CHECK_FOR_OVERFLOW */ - debug(printf("11 %04X => %d\n",array[0],counts[array[0]])); - debug(printf("27 %04X => %d\n",array[1],counts[array[1]])); - debug(printf("43 %04X => %d\n",array[2],counts[array[2]])); - debug(printf("59 %04X => %d\n",array[3],counts[array[3]])); - -#else - INCR_COUNT(counts[EXTRACT(array,0)],inquery[EXTRACT(array,0)]); /* 11 */ - INCR_COUNT(counts[EXTRACT(array,1)],inquery[EXTRACT(array,1)]); /* 27 */ - INCR_COUNT(counts[EXTRACT(array,2)],inquery[EXTRACT(array,2)]); /* 43 */ - INCR_COUNT(counts[EXTRACT(array,3)],inquery[EXTRACT(array,3)]); /* 59 */ -#endif - debug(printf("11 %04X => %d\n",EXTRACT(array,0),counts[EXTRACT(array,0)])); - debug(printf("27 %04X => %d\n",EXTRACT(array,1),counts[EXTRACT(array,1)])); - debug(printf("43 %04X => %d\n",EXTRACT(array,2),counts[EXTRACT(array,2)])); - debug(printf("59 %04X => %d\n",EXTRACT(array,3),counts[EXTRACT(array,3)])); + masked = (low_rev >> 8) & MASK6; /* 22 */ + INCR_COUNT(counts[masked]); + debug(printf("22 %04X => %d\n",masked,counts[masked])); -#ifdef HAVE_SSE4_1 - array = _mm_and_si128( _mm_srli_epi32(oligo,6), mask7); -#else - _mm_store_si128((__m128i *) array, _mm_and_si128( _mm_srli_epi32(oligo,6), mask7)); -#endif -#ifdef HAVE_AVX2 - counts[EXTRACT(array,0)] += 1; - counts[EXTRACT(array,1)] += 1; - counts[EXTRACT(array,2)] += 1; - counts[EXTRACT(array,3)] += 1; -#ifdef CHECK_FOR_OVERFLOW - _counts_after = _mm_i32gather_epi32(counts,array,/*scale*/4); - _counts_neg = _mm_cmpgt_epi32(_counts_after,maxcount128); - if (_mm_testz_si128(_counts_neg,_counts_neg) == 0) { - debug(printf("Exceeded maximum counts\n")); - if (EXTRACT(_counts_neg,0)) { - inquery[EXTRACT(array,0)] = INQUERY_FALSE; - } - if (EXTRACT(_counts_neg,1)) { - inquery[EXTRACT(array,1)] = INQUERY_FALSE; - } - if (EXTRACT(_counts_neg,2)) { - inquery[EXTRACT(array,2)] = INQUERY_FALSE; - } - if (EXTRACT(_counts_neg,3)) { - inquery[EXTRACT(array,3)] = INQUERY_FALSE; - } - } -#endif /* CHECK_FOR_OVERFLOW */ - debug(printf("12 %04X => %d\n",array[0],counts[array[0]])); - debug(printf("28 %04X => %d\n",array[1],counts[array[1]])); - debug(printf("44 %04X => %d\n",array[2],counts[array[2]])); - debug(printf("60 %04X => %d\n",array[3],counts[array[3]])); - -#else - INCR_COUNT(counts[EXTRACT(array,0)],inquery[EXTRACT(array,0)]); /* 12 */ - INCR_COUNT(counts[EXTRACT(array,1)],inquery[EXTRACT(array,1)]); /* 28 */ - INCR_COUNT(counts[EXTRACT(array,2)],inquery[EXTRACT(array,2)]); /* 44 */ - INCR_COUNT(counts[EXTRACT(array,3)],inquery[EXTRACT(array,3)]); /* 60 */ -#endif - debug(printf("12 %04X => %d\n",EXTRACT(array,0),counts[EXTRACT(array,0)])); - debug(printf("28 %04X => %d\n",EXTRACT(array,1),counts[EXTRACT(array,1)])); - debug(printf("44 %04X => %d\n",EXTRACT(array,2),counts[EXTRACT(array,2)])); - debug(printf("60 %04X => %d\n",EXTRACT(array,3),counts[EXTRACT(array,3)])); - -#ifdef HAVE_SSE4_1 - array = _mm_and_si128( _mm_srli_epi32(oligo,4), mask7); -#else - _mm_store_si128((__m128i *) array, _mm_and_si128( _mm_srli_epi32(oligo,4), mask7)); -#endif -#ifdef HAVE_AVX2 - counts[EXTRACT(array,0)] += 1; - counts[EXTRACT(array,1)] += 1; - counts[EXTRACT(array,2)] += 1; - counts[EXTRACT(array,3)] += 1; -#ifdef CHECK_FOR_OVERFLOW - _counts_after = _mm_i32gather_epi32(counts,array,/*scale*/4); - _counts_neg = _mm_cmpgt_epi32(_counts_after,maxcount128); - if (_mm_testz_si128(_counts_neg,_counts_neg) == 0) { - debug(printf("Exceeded maximum counts\n")); - if (EXTRACT(_counts_neg,0)) { - inquery[EXTRACT(array,0)] = INQUERY_FALSE; - } - if (EXTRACT(_counts_neg,1)) { - inquery[EXTRACT(array,1)] = INQUERY_FALSE; - } - if (EXTRACT(_counts_neg,2)) { - inquery[EXTRACT(array,2)] = INQUERY_FALSE; - } - if (EXTRACT(_counts_neg,3)) { - inquery[EXTRACT(array,3)] = INQUERY_FALSE; - } - } -#endif /* CHECK_FOR_OVERFLOW */ - debug(printf("13 %04X => %d\n",array[0],counts[array[0]])); - debug(printf("29 %04X => %d\n",array[1],counts[array[1]])); - debug(printf("45 %04X => %d\n",array[2],counts[array[2]])); - debug(printf("61 %04X => %d\n",array[3],counts[array[3]])); - -#else - INCR_COUNT(counts[EXTRACT(array,0)],inquery[EXTRACT(array,0)]); /* 13 */ - INCR_COUNT(counts[EXTRACT(array,1)],inquery[EXTRACT(array,1)]); /* 29 */ - INCR_COUNT(counts[EXTRACT(array,2)],inquery[EXTRACT(array,2)]); /* 45 */ - INCR_COUNT(counts[EXTRACT(array,3)],inquery[EXTRACT(array,3)]); /* 61 */ -#endif - debug(printf("13 %04X => %d\n",EXTRACT(array,0),counts[EXTRACT(array,0)])); - debug(printf("29 %04X => %d\n",EXTRACT(array,1),counts[EXTRACT(array,1)])); - debug(printf("45 %04X => %d\n",EXTRACT(array,2),counts[EXTRACT(array,2)])); - debug(printf("61 %04X => %d\n",EXTRACT(array,3),counts[EXTRACT(array,3)])); - -#ifdef HAVE_SSE4_1 - array = _mm_and_si128( _mm_srli_epi32(oligo,2), mask7); -#else - _mm_store_si128((__m128i *) array, _mm_and_si128( _mm_srli_epi32(oligo,2), mask7)); -#endif -#ifdef HAVE_AVX2 - counts[EXTRACT(array,0)] += 1; - counts[EXTRACT(array,1)] += 1; - counts[EXTRACT(array,2)] += 1; - counts[EXTRACT(array,3)] += 1; -#ifdef CHECK_FOR_OVERFLOW - _counts_after = _mm_i32gather_epi32(counts,array,/*scale*/4); - _counts_neg = _mm_cmpgt_epi32(_counts_after,maxcount128); - if (_mm_testz_si128(_counts_neg,_counts_neg) == 0) { - debug(printf("Exceeded maximum counts\n")); - if (EXTRACT(_counts_neg,0)) { - inquery[EXTRACT(array,0)] = INQUERY_FALSE; - } - if (EXTRACT(_counts_neg,1)) { - inquery[EXTRACT(array,1)] = INQUERY_FALSE; - } - if (EXTRACT(_counts_neg,2)) { - inquery[EXTRACT(array,2)] = INQUERY_FALSE; - } - if (EXTRACT(_counts_neg,3)) { - inquery[EXTRACT(array,3)] = INQUERY_FALSE; - } - } -#endif /* CHECK_FOR_OVERFLOW */ - debug(printf("14 %04X => %d\n",array[0],counts[array[0]])); - debug(printf("30 %04X => %d\n",array[1],counts[array[1]])); - debug(printf("46 %04X => %d\n",array[2],counts[array[2]])); - debug(printf("62 %04X => %d\n",array[3],counts[array[3]])); - -#else - INCR_COUNT(counts[EXTRACT(array,0)],inquery[EXTRACT(array,0)]); /* 14 */ - INCR_COUNT(counts[EXTRACT(array,1)],inquery[EXTRACT(array,1)]); /* 30 */ - INCR_COUNT(counts[EXTRACT(array,2)],inquery[EXTRACT(array,2)]); /* 46 */ - INCR_COUNT(counts[EXTRACT(array,3)],inquery[EXTRACT(array,3)]); /* 62 */ -#endif - debug(printf("14 %04X => %d\n",EXTRACT(array,0),counts[EXTRACT(array,0)])); - debug(printf("30 %04X => %d\n",EXTRACT(array,1),counts[EXTRACT(array,1)])); - debug(printf("46 %04X => %d\n",EXTRACT(array,2),counts[EXTRACT(array,2)])); - debug(printf("62 %04X => %d\n",EXTRACT(array,3),counts[EXTRACT(array,3)])); - -#ifdef HAVE_SSE4_1 - array = _mm_and_si128( oligo, mask7); -#else - _mm_store_si128((__m128i *) array, _mm_and_si128( oligo, mask7)); -#endif -#ifdef HAVE_AVX2 - counts[EXTRACT(array,0)] += 1; - counts[EXTRACT(array,1)] += 1; - counts[EXTRACT(array,2)] += 1; - counts[EXTRACT(array,3)] += 1; -#ifdef CHECK_FOR_OVERFLOW - _counts_after = _mm_i32gather_epi32(counts,array,/*scale*/4); - _counts_neg = _mm_cmpgt_epi32(_counts_after,maxcount128); - if (_mm_testz_si128(_counts_neg,_counts_neg) == 0) { - debug(printf("Exceeded maximum counts\n")); - if (EXTRACT(_counts_neg,0)) { - inquery[EXTRACT(array,0)] = INQUERY_FALSE; - } - if (EXTRACT(_counts_neg,1)) { - inquery[EXTRACT(array,1)] = INQUERY_FALSE; - } - if (EXTRACT(_counts_neg,2)) { - inquery[EXTRACT(array,2)] = INQUERY_FALSE; - } - if (EXTRACT(_counts_neg,3)) { - inquery[EXTRACT(array,3)] = INQUERY_FALSE; - } - } -#endif /* CHECK_FOR_OVERFLOW */ - debug(printf("15 %04X => %d\n",array[0],counts[array[0]])); - debug(printf("31 %04X => %d\n",array[1],counts[array[1]])); - debug(printf("47 %04X => %d\n",array[2],counts[array[2]])); - debug(printf("63 %04X => %d\n",array[3],counts[array[3]])); - -#else - INCR_COUNT(counts[EXTRACT(array,0)],inquery[EXTRACT(array,0)]); /* 15 */ - INCR_COUNT(counts[EXTRACT(array,1)],inquery[EXTRACT(array,1)]); /* 31 */ - INCR_COUNT(counts[EXTRACT(array,2)],inquery[EXTRACT(array,2)]); /* 47 */ - INCR_COUNT(counts[EXTRACT(array,3)],inquery[EXTRACT(array,3)]); /* 63 */ -#endif - debug(printf("15 %04X => %d\n",EXTRACT(array,0),counts[EXTRACT(array,0)])); - debug(printf("31 %04X => %d\n",EXTRACT(array,1),counts[EXTRACT(array,1)])); - debug(printf("47 %04X => %d\n",EXTRACT(array,2),counts[EXTRACT(array,2)])); - debug(printf("63 %04X => %d\n",EXTRACT(array,3),counts[EXTRACT(array,3)])); - - return; -} - -#endif - -#ifdef HAVE_AVX2 -static void -count_7mers_fwd_simd_128 (Count_T *counts, Inquery_T *inquery, __m256i current, __m256i next) { - __m256i oligo; - __m256i array; -#ifdef CHECK_FOR_OVERFLOW - __m256i _counts_after, _counts_neg; -#endif + masked = (low_rev >> 10) & MASK6; /* 21 */ + INCR_COUNT(counts[masked]); + debug(printf("21 %04X => %d\n",masked,counts[masked])); - array = _mm256_srli_epi32(current,18); /* No mask necessary */ - counts[EXTRACT256(array,0)] += 1; /* 0 */ - counts[EXTRACT256(array,1)] += 1; /* 16 */ - counts[EXTRACT256(array,2)] += 1; /* 32 */ - counts[EXTRACT256(array,3)] += 1; /* 48 */ - counts[EXTRACT256(array,4)] += 1; /* 64 */ - counts[EXTRACT256(array,5)] += 1; /* 80 */ - counts[EXTRACT256(array,6)] += 1; /* 96 */ - counts[EXTRACT256(array,7)] += 1; /* 112 */ -#ifdef CHECK_FOR_OVERFLOW - _counts_after = _mm256_i32gather_epi32(counts,array,/*scale*/4); - _counts_neg = _mm256_cmpgt_epi32(_counts_after,maxcount256); - if (_mm256_testz_si256(_counts_neg,_counts_neg) == 0) { - debug(printf("Exceeded maximum counts\n")); - if (EXTRACT256(_counts_neg,0)) { - inquery[EXTRACT256(array,0)] = INQUERY_FALSE; - } - if (EXTRACT256(_counts_neg,1)) { - inquery[EXTRACT256(array,1)] = INQUERY_FALSE; - } - if (EXTRACT256(_counts_neg,2)) { - inquery[EXTRACT256(array,2)] = INQUERY_FALSE; - } - if (EXTRACT256(_counts_neg,3)) { - inquery[EXTRACT256(array,3)] = INQUERY_FALSE; - } - if (EXTRACT256(_counts_neg,4)) { - inquery[EXTRACT256(array,4)] = INQUERY_FALSE; - } - if (EXTRACT256(_counts_neg,5)) { - inquery[EXTRACT256(array,5)] = INQUERY_FALSE; - } - if (EXTRACT256(_counts_neg,6)) { - inquery[EXTRACT256(array,6)] = INQUERY_FALSE; - } - if (EXTRACT256(_counts_neg,7)) { - inquery[EXTRACT256(array,7)] = INQUERY_FALSE; - } - } -#endif /* CHECK_FOR_OVERFLOW */ - debug(printf("0 %04X => %d\n",EXTRACT256(array,0),counts[EXTRACT256(array,0)])); - debug(printf("16 %04X => %d\n",EXTRACT256(array,1),counts[EXTRACT256(array,1)])); - debug(printf("32 %04X => %d\n",EXTRACT256(array,2),counts[EXTRACT256(array,2)])); - debug(printf("48 %04X => %d\n",EXTRACT256(array,3),counts[EXTRACT256(array,3)])); - debug(printf("0 %04X => %d\n",EXTRACT256(array,4),counts[EXTRACT256(array,4)])); - debug(printf("16 %04X => %d\n",EXTRACT256(array,5),counts[EXTRACT256(array,5)])); - debug(printf("32 %04X => %d\n",EXTRACT256(array,6),counts[EXTRACT256(array,6)])); - debug(printf("48 %04X => %d\n",EXTRACT256(array,7),counts[EXTRACT256(array,7)])); - - array = _mm256_and_si256( _mm256_srli_epi32(current,16), bigmask7); - counts[EXTRACT256(array,0)] += 1; /* 1 */ - counts[EXTRACT256(array,1)] += 1; /* 17 */ - counts[EXTRACT256(array,2)] += 1; /* 33 */ - counts[EXTRACT256(array,3)] += 1; /* 49 */ - counts[EXTRACT256(array,4)] += 1; /* 65 */ - counts[EXTRACT256(array,5)] += 1; /* 81 */ - counts[EXTRACT256(array,6)] += 1; /* 97 */ - counts[EXTRACT256(array,7)] += 1; /* 113 */ -#ifdef CHECK_FOR_OVERFLOW - _counts_after = _mm256_i32gather_epi32(counts,array,/*scale*/4); - _counts_neg = _mm256_cmpgt_epi32(_counts_after,maxcount256); - if (_mm256_testz_si256(_counts_neg,_counts_neg) == 0) { - debug(printf("Exceeded maximum counts\n")); - if (EXTRACT256(_counts_neg,0)) { - inquery[EXTRACT256(array,0)] = INQUERY_FALSE; - } - if (EXTRACT256(_counts_neg,1)) { - inquery[EXTRACT256(array,1)] = INQUERY_FALSE; - } - if (EXTRACT256(_counts_neg,2)) { - inquery[EXTRACT256(array,2)] = INQUERY_FALSE; - } - if (EXTRACT256(_counts_neg,3)) { - inquery[EXTRACT256(array,3)] = INQUERY_FALSE; - } - if (EXTRACT256(_counts_neg,4)) { - inquery[EXTRACT256(array,4)] = INQUERY_FALSE; - } - if (EXTRACT256(_counts_neg,5)) { - inquery[EXTRACT256(array,5)] = INQUERY_FALSE; - } - if (EXTRACT256(_counts_neg,6)) { - inquery[EXTRACT256(array,6)] = INQUERY_FALSE; - } - if (EXTRACT256(_counts_neg,7)) { - inquery[EXTRACT256(array,7)] = INQUERY_FALSE; - } - } -#endif /* CHECK_FOR_OVERFLOW */ - debug(printf("1 %04X => %d\n",EXTRACT256(array,0),counts[EXTRACT256(array,0)])); - debug(printf("17 %04X => %d\n",EXTRACT256(array,1),counts[EXTRACT256(array,1)])); - debug(printf("33 %04X => %d\n",EXTRACT256(array,2),counts[EXTRACT256(array,2)])); - debug(printf("49 %04X => %d\n",EXTRACT256(array,3),counts[EXTRACT256(array,3)])); - debug(printf("1 %04X => %d\n",EXTRACT256(array,4),counts[EXTRACT256(array,4)])); - debug(printf("17 %04X => %d\n",EXTRACT256(array,5),counts[EXTRACT256(array,5)])); - debug(printf("33 %04X => %d\n",EXTRACT256(array,6),counts[EXTRACT256(array,6)])); - debug(printf("49 %04X => %d\n",EXTRACT256(array,7),counts[EXTRACT256(array,7)])); - - array = _mm256_and_si256( _mm256_srli_epi32(current,14), bigmask7); - counts[EXTRACT256(array,0)] += 1; /* 2 */ - counts[EXTRACT256(array,1)] += 1; /* 18 */ - counts[EXTRACT256(array,2)] += 1; /* 34 */ - counts[EXTRACT256(array,3)] += 1; /* 50 */ - counts[EXTRACT256(array,4)] += 1; /* 66 */ - counts[EXTRACT256(array,5)] += 1; /* 82 */ - counts[EXTRACT256(array,6)] += 1; /* 98 */ - counts[EXTRACT256(array,7)] += 1; /* 114 */ -#ifdef CHECK_FOR_OVERFLOW - _counts_after = _mm256_i32gather_epi32(counts,array,/*scale*/4); - _counts_neg = _mm256_cmpgt_epi32(_counts_after,maxcount256); - if (_mm256_testz_si256(_counts_neg,_counts_neg) == 0) { - debug(printf("Exceeded maximum counts\n")); - if (EXTRACT256(_counts_neg,0)) { - inquery[EXTRACT256(array,0)] = INQUERY_FALSE; - } - if (EXTRACT256(_counts_neg,1)) { - inquery[EXTRACT256(array,1)] = INQUERY_FALSE; - } - if (EXTRACT256(_counts_neg,2)) { - inquery[EXTRACT256(array,2)] = INQUERY_FALSE; - } - if (EXTRACT256(_counts_neg,3)) { - inquery[EXTRACT256(array,3)] = INQUERY_FALSE; - } - if (EXTRACT256(_counts_neg,4)) { - inquery[EXTRACT256(array,4)] = INQUERY_FALSE; - } - if (EXTRACT256(_counts_neg,5)) { - inquery[EXTRACT256(array,5)] = INQUERY_FALSE; - } - if (EXTRACT256(_counts_neg,6)) { - inquery[EXTRACT256(array,6)] = INQUERY_FALSE; - } - if (EXTRACT256(_counts_neg,7)) { - inquery[EXTRACT256(array,7)] = INQUERY_FALSE; - } - } -#endif /* CHECK_FOR_OVERFLOW */ - debug(printf("2 %04X => %d\n",EXTRACT256(array,0),counts[EXTRACT256(array,0)])); - debug(printf("18 %04X => %d\n",EXTRACT256(array,1),counts[EXTRACT256(array,1)])); - debug(printf("34 %04X => %d\n",EXTRACT256(array,2),counts[EXTRACT256(array,2)])); - debug(printf("50 %04X => %d\n",EXTRACT256(array,3),counts[EXTRACT256(array,3)])); - debug(printf("2 %04X => %d\n",EXTRACT256(array,4),counts[EXTRACT256(array,4)])); - debug(printf("18 %04X => %d\n",EXTRACT256(array,5),counts[EXTRACT256(array,5)])); - debug(printf("34 %04X => %d\n",EXTRACT256(array,6),counts[EXTRACT256(array,6)])); - debug(printf("50 %04X => %d\n",EXTRACT256(array,7),counts[EXTRACT256(array,7)])); - - array = _mm256_and_si256( _mm256_srli_epi32(current,12), bigmask7); - counts[EXTRACT256(array,0)] += 1; /* 3 */ - counts[EXTRACT256(array,1)] += 1; /* 19 */ - counts[EXTRACT256(array,2)] += 1; /* 35 */ - counts[EXTRACT256(array,3)] += 1; /* 51 */ - counts[EXTRACT256(array,4)] += 1; /* 67 */ - counts[EXTRACT256(array,5)] += 1; /* 83 */ - counts[EXTRACT256(array,6)] += 1; /* 99 */ - counts[EXTRACT256(array,7)] += 1; /* 115 */ -#ifdef CHECK_FOR_OVERFLOW - _counts_after = _mm256_i32gather_epi32(counts,array,/*scale*/4); - _counts_neg = _mm256_cmpgt_epi32(_counts_after,maxcount256); - if (_mm256_testz_si256(_counts_neg,_counts_neg) == 0) { - debug(printf("Exceeded maximum counts\n")); - if (EXTRACT256(_counts_neg,0)) { - inquery[EXTRACT256(array,0)] = INQUERY_FALSE; - } - if (EXTRACT256(_counts_neg,1)) { - inquery[EXTRACT256(array,1)] = INQUERY_FALSE; - } - if (EXTRACT256(_counts_neg,2)) { - inquery[EXTRACT256(array,2)] = INQUERY_FALSE; - } - if (EXTRACT256(_counts_neg,3)) { - inquery[EXTRACT256(array,3)] = INQUERY_FALSE; - } - if (EXTRACT256(_counts_neg,4)) { - inquery[EXTRACT256(array,4)] = INQUERY_FALSE; - } - if (EXTRACT256(_counts_neg,5)) { - inquery[EXTRACT256(array,5)] = INQUERY_FALSE; - } - if (EXTRACT256(_counts_neg,6)) { - inquery[EXTRACT256(array,6)] = INQUERY_FALSE; - } - if (EXTRACT256(_counts_neg,7)) { - inquery[EXTRACT256(array,7)] = INQUERY_FALSE; - } - } -#endif /* CHECK_FOR_OVERFLOW */ - debug(printf("3 %04X => %d\n",EXTRACT256(array,0),counts[EXTRACT256(array,0)])); - debug(printf("19 %04X => %d\n",EXTRACT256(array,1),counts[EXTRACT256(array,1)])); - debug(printf("35 %04X => %d\n",EXTRACT256(array,2),counts[EXTRACT256(array,2)])); - debug(printf("51 %04X => %d\n",EXTRACT256(array,3),counts[EXTRACT256(array,3)])); - debug(printf("3 %04X => %d\n",EXTRACT256(array,4),counts[EXTRACT256(array,4)])); - debug(printf("19 %04X => %d\n",EXTRACT256(array,5),counts[EXTRACT256(array,5)])); - debug(printf("35 %04X => %d\n",EXTRACT256(array,6),counts[EXTRACT256(array,6)])); - debug(printf("51 %04X => %d\n",EXTRACT256(array,7),counts[EXTRACT256(array,7)])); - - array = _mm256_and_si256( _mm256_srli_epi32(current,10), bigmask7); - counts[EXTRACT256(array,0)] += 1; /* 4 */ - counts[EXTRACT256(array,1)] += 1; /* 20 */ - counts[EXTRACT256(array,2)] += 1; /* 36 */ - counts[EXTRACT256(array,3)] += 1; /* 52 */ - counts[EXTRACT256(array,4)] += 1; /* 68 */ - counts[EXTRACT256(array,5)] += 1; /* 84 */ - counts[EXTRACT256(array,6)] += 1; /* 100 */ - counts[EXTRACT256(array,7)] += 1; /* 116 */ -#ifdef CHECK_FOR_OVERFLOW - _counts_after = _mm256_i32gather_epi32(counts,array,/*scale*/4); - _counts_neg = _mm256_cmpgt_epi32(_counts_after,maxcount256); - if (_mm256_testz_si256(_counts_neg,_counts_neg) == 0) { - debug(printf("Exceeded maximum counts\n")); - if (EXTRACT256(_counts_neg,0)) { - inquery[EXTRACT256(array,0)] = INQUERY_FALSE; - } - if (EXTRACT256(_counts_neg,1)) { - inquery[EXTRACT256(array,1)] = INQUERY_FALSE; - } - if (EXTRACT256(_counts_neg,2)) { - inquery[EXTRACT256(array,2)] = INQUERY_FALSE; - } - if (EXTRACT256(_counts_neg,3)) { - inquery[EXTRACT256(array,3)] = INQUERY_FALSE; - } - if (EXTRACT256(_counts_neg,4)) { - inquery[EXTRACT256(array,4)] = INQUERY_FALSE; - } - if (EXTRACT256(_counts_neg,5)) { - inquery[EXTRACT256(array,5)] = INQUERY_FALSE; - } - if (EXTRACT256(_counts_neg,6)) { - inquery[EXTRACT256(array,6)] = INQUERY_FALSE; - } - if (EXTRACT256(_counts_neg,7)) { - inquery[EXTRACT256(array,7)] = INQUERY_FALSE; - } - } -#endif /* CHECK_FOR_OVERFLOW */ - debug(printf("4 %04X => %d\n",EXTRACT256(array,0),counts[EXTRACT256(array,0)])); - debug(printf("20 %04X => %d\n",EXTRACT256(array,1),counts[EXTRACT256(array,1)])); - debug(printf("36 %04X => %d\n",EXTRACT256(array,2),counts[EXTRACT256(array,2)])); - debug(printf("52 %04X => %d\n",EXTRACT256(array,3),counts[EXTRACT256(array,3)])); - debug(printf("4 %04X => %d\n",EXTRACT256(array,4),counts[EXTRACT256(array,4)])); - debug(printf("20 %04X => %d\n",EXTRACT256(array,5),counts[EXTRACT256(array,5)])); - debug(printf("36 %04X => %d\n",EXTRACT256(array,6),counts[EXTRACT256(array,6)])); - debug(printf("52 %04X => %d\n",EXTRACT256(array,7),counts[EXTRACT256(array,7)])); - - array = _mm256_and_si256( _mm256_srli_epi32(current,8), bigmask7); - counts[EXTRACT256(array,0)] += 1; /* 5 */ - counts[EXTRACT256(array,1)] += 1; /* 21 */ - counts[EXTRACT256(array,2)] += 1; /* 37 */ - counts[EXTRACT256(array,3)] += 1; /* 53 */ - counts[EXTRACT256(array,4)] += 1; /* 69 */ - counts[EXTRACT256(array,5)] += 1; /* 85 */ - counts[EXTRACT256(array,6)] += 1; /* 101 */ - counts[EXTRACT256(array,7)] += 1; /* 117 */ -#ifdef CHECK_FOR_OVERFLOW - _counts_after = _mm256_i32gather_epi32(counts,array,/*scale*/4); - _counts_neg = _mm256_cmpgt_epi32(_counts_after,maxcount256); - if (_mm256_testz_si256(_counts_neg,_counts_neg) == 0) { - debug(printf("Exceeded maximum counts\n")); - if (EXTRACT256(_counts_neg,0)) { - inquery[EXTRACT256(array,0)] = INQUERY_FALSE; - } - if (EXTRACT256(_counts_neg,1)) { - inquery[EXTRACT256(array,1)] = INQUERY_FALSE; - } - if (EXTRACT256(_counts_neg,2)) { - inquery[EXTRACT256(array,2)] = INQUERY_FALSE; - } - if (EXTRACT256(_counts_neg,3)) { - inquery[EXTRACT256(array,3)] = INQUERY_FALSE; - } - if (EXTRACT256(_counts_neg,4)) { - inquery[EXTRACT256(array,4)] = INQUERY_FALSE; - } - if (EXTRACT256(_counts_neg,5)) { - inquery[EXTRACT256(array,5)] = INQUERY_FALSE; - } - if (EXTRACT256(_counts_neg,6)) { - inquery[EXTRACT256(array,6)] = INQUERY_FALSE; - } - if (EXTRACT256(_counts_neg,7)) { - inquery[EXTRACT256(array,7)] = INQUERY_FALSE; - } - } -#endif /* CHECK_FOR_OVERFLOW */ - debug(printf("5 %04X => %d\n",EXTRACT256(array,0),counts[EXTRACT256(array,0)])); - debug(printf("21 %04X => %d\n",EXTRACT256(array,1),counts[EXTRACT256(array,1)])); - debug(printf("37 %04X => %d\n",EXTRACT256(array,2),counts[EXTRACT256(array,2)])); - debug(printf("53 %04X => %d\n",EXTRACT256(array,3),counts[EXTRACT256(array,3)])); - debug(printf("5 %04X => %d\n",EXTRACT256(array,4),counts[EXTRACT256(array,4)])); - debug(printf("21 %04X => %d\n",EXTRACT256(array,5),counts[EXTRACT256(array,5)])); - debug(printf("37 %04X => %d\n",EXTRACT256(array,6),counts[EXTRACT256(array,6)])); - debug(printf("53 %04X => %d\n",EXTRACT256(array,7),counts[EXTRACT256(array,7)])); - - array = _mm256_and_si256( _mm256_srli_epi32(current,6), bigmask7); - counts[EXTRACT256(array,0)] += 1; /* 6 */ - counts[EXTRACT256(array,1)] += 1; /* 22 */ - counts[EXTRACT256(array,2)] += 1; /* 38 */ - counts[EXTRACT256(array,3)] += 1; /* 54 */ - counts[EXTRACT256(array,4)] += 1; /* 70 */ - counts[EXTRACT256(array,5)] += 1; /* 86 */ - counts[EXTRACT256(array,6)] += 1; /* 102 */ - counts[EXTRACT256(array,7)] += 1; /* 118 */ -#ifdef CHECK_FOR_OVERFLOW - _counts_after = _mm256_i32gather_epi32(counts,array,/*scale*/4); - _counts_neg = _mm256_cmpgt_epi32(_counts_after,maxcount256); - if (_mm256_testz_si256(_counts_neg,_counts_neg) == 0) { - debug(printf("Exceeded maximum counts\n")); - if (EXTRACT256(_counts_neg,0)) { - inquery[EXTRACT256(array,0)] = INQUERY_FALSE; - } - if (EXTRACT256(_counts_neg,1)) { - inquery[EXTRACT256(array,1)] = INQUERY_FALSE; - } - if (EXTRACT256(_counts_neg,2)) { - inquery[EXTRACT256(array,2)] = INQUERY_FALSE; - } - if (EXTRACT256(_counts_neg,3)) { - inquery[EXTRACT256(array,3)] = INQUERY_FALSE; - } - if (EXTRACT256(_counts_neg,4)) { - inquery[EXTRACT256(array,4)] = INQUERY_FALSE; - } - if (EXTRACT256(_counts_neg,5)) { - inquery[EXTRACT256(array,5)] = INQUERY_FALSE; - } - if (EXTRACT256(_counts_neg,6)) { - inquery[EXTRACT256(array,6)] = INQUERY_FALSE; - } - if (EXTRACT256(_counts_neg,7)) { - inquery[EXTRACT256(array,7)] = INQUERY_FALSE; - } - } -#endif /* CHECK_FOR_OVERFLOW */ - debug(printf("6 %04X => %d\n",EXTRACT256(array,0),counts[EXTRACT256(array,0)])); - debug(printf("22 %04X => %d\n",EXTRACT256(array,1),counts[EXTRACT256(array,1)])); - debug(printf("38 %04X => %d\n",EXTRACT256(array,2),counts[EXTRACT256(array,2)])); - debug(printf("54 %04X => %d\n",EXTRACT256(array,3),counts[EXTRACT256(array,3)])); - debug(printf("6 %04X => %d\n",EXTRACT256(array,4),counts[EXTRACT256(array,4)])); - debug(printf("22 %04X => %d\n",EXTRACT256(array,5),counts[EXTRACT256(array,5)])); - debug(printf("38 %04X => %d\n",EXTRACT256(array,6),counts[EXTRACT256(array,6)])); - debug(printf("54 %04X => %d\n",EXTRACT256(array,7),counts[EXTRACT256(array,7)])); - - array = _mm256_and_si256( _mm256_srli_epi32(current,4), bigmask7); - counts[EXTRACT256(array,0)] += 1; /* 7 */ - counts[EXTRACT256(array,1)] += 1; /* 23 */ - counts[EXTRACT256(array,2)] += 1; /* 39 */ - counts[EXTRACT256(array,3)] += 1; /* 55 */ - counts[EXTRACT256(array,4)] += 1; /* 71 */ - counts[EXTRACT256(array,5)] += 1; /* 87 */ - counts[EXTRACT256(array,6)] += 1; /* 103 */ - counts[EXTRACT256(array,7)] += 1; /* 119 */ -#ifdef CHECK_FOR_OVERFLOW - _counts_after = _mm256_i32gather_epi32(counts,array,/*scale*/4); - _counts_neg = _mm256_cmpgt_epi32(_counts_after,maxcount256); - if (_mm256_testz_si256(_counts_neg,_counts_neg) == 0) { - debug(printf("Exceeded maximum counts\n")); - if (EXTRACT256(_counts_neg,0)) { - inquery[EXTRACT256(array,0)] = INQUERY_FALSE; - } - if (EXTRACT256(_counts_neg,1)) { - inquery[EXTRACT256(array,1)] = INQUERY_FALSE; - } - if (EXTRACT256(_counts_neg,2)) { - inquery[EXTRACT256(array,2)] = INQUERY_FALSE; - } - if (EXTRACT256(_counts_neg,3)) { - inquery[EXTRACT256(array,3)] = INQUERY_FALSE; - } - if (EXTRACT256(_counts_neg,4)) { - inquery[EXTRACT256(array,4)] = INQUERY_FALSE; - } - if (EXTRACT256(_counts_neg,5)) { - inquery[EXTRACT256(array,5)] = INQUERY_FALSE; - } - if (EXTRACT256(_counts_neg,6)) { - inquery[EXTRACT256(array,6)] = INQUERY_FALSE; - } - if (EXTRACT256(_counts_neg,7)) { - inquery[EXTRACT256(array,7)] = INQUERY_FALSE; - } - } -#endif /* CHECK_FOR_OVERFLOW */ - debug(printf("7 %04X => %d\n",EXTRACT256(array,0),counts[EXTRACT256(array,0)])); - debug(printf("23 %04X => %d\n",EXTRACT256(array,1),counts[EXTRACT256(array,1)])); - debug(printf("39 %04X => %d\n",EXTRACT256(array,2),counts[EXTRACT256(array,2)])); - debug(printf("55 %04X => %d\n",EXTRACT256(array,3),counts[EXTRACT256(array,3)])); - debug(printf("7 %04X => %d\n",EXTRACT256(array,4),counts[EXTRACT256(array,4)])); - debug(printf("23 %04X => %d\n",EXTRACT256(array,5),counts[EXTRACT256(array,5)])); - debug(printf("39 %04X => %d\n",EXTRACT256(array,6),counts[EXTRACT256(array,6)])); - debug(printf("55 %04X => %d\n",EXTRACT256(array,7),counts[EXTRACT256(array,7)])); - - array = _mm256_and_si256( _mm256_srli_epi32(current,2), bigmask7); - counts[EXTRACT256(array,0)] += 1; /* 8 */ - counts[EXTRACT256(array,1)] += 1; /* 24 */ - counts[EXTRACT256(array,2)] += 1; /* 40 */ - counts[EXTRACT256(array,3)] += 1; /* 56 */ - counts[EXTRACT256(array,4)] += 1; /* 72 */ - counts[EXTRACT256(array,5)] += 1; /* 88 */ - counts[EXTRACT256(array,6)] += 1; /* 104 */ - counts[EXTRACT256(array,7)] += 1; /* 120 */ -#ifdef CHECK_FOR_OVERFLOW - _counts_after = _mm256_i32gather_epi32(counts,array,/*scale*/4); - _counts_neg = _mm256_cmpgt_epi32(_counts_after,maxcount256); - if (_mm256_testz_si256(_counts_neg,_counts_neg) == 0) { - debug(printf("Exceeded maximum counts\n")); - if (EXTRACT256(_counts_neg,0)) { - inquery[EXTRACT256(array,0)] = INQUERY_FALSE; - } - if (EXTRACT256(_counts_neg,1)) { - inquery[EXTRACT256(array,1)] = INQUERY_FALSE; - } - if (EXTRACT256(_counts_neg,2)) { - inquery[EXTRACT256(array,2)] = INQUERY_FALSE; - } - if (EXTRACT256(_counts_neg,3)) { - inquery[EXTRACT256(array,3)] = INQUERY_FALSE; - } - if (EXTRACT256(_counts_neg,4)) { - inquery[EXTRACT256(array,4)] = INQUERY_FALSE; - } - if (EXTRACT256(_counts_neg,5)) { - inquery[EXTRACT256(array,5)] = INQUERY_FALSE; - } - if (EXTRACT256(_counts_neg,6)) { - inquery[EXTRACT256(array,6)] = INQUERY_FALSE; - } - if (EXTRACT256(_counts_neg,7)) { - inquery[EXTRACT256(array,7)] = INQUERY_FALSE; - } - } -#endif /* CHECK_FOR_OVERFLOW */ - debug(printf("8 %04X => %d\n",EXTRACT256(array,0),counts[EXTRACT256(array,0)])); - debug(printf("24 %04X => %d\n",EXTRACT256(array,1),counts[EXTRACT256(array,1)])); - debug(printf("40 %04X => %d\n",EXTRACT256(array,2),counts[EXTRACT256(array,2)])); - debug(printf("56 %04X => %d\n",EXTRACT256(array,3),counts[EXTRACT256(array,3)])); - debug(printf("8 %04X => %d\n",EXTRACT256(array,4),counts[EXTRACT256(array,4)])); - debug(printf("24 %04X => %d\n",EXTRACT256(array,5),counts[EXTRACT256(array,5)])); - debug(printf("40 %04X => %d\n",EXTRACT256(array,6),counts[EXTRACT256(array,6)])); - debug(printf("56 %04X => %d\n",EXTRACT256(array,7),counts[EXTRACT256(array,7)])); - - array = _mm256_and_si256( current, bigmask7); - counts[EXTRACT256(array,0)] += 1; /* 9 */ - counts[EXTRACT256(array,1)] += 1; /* 25 */ - counts[EXTRACT256(array,2)] += 1; /* 41 */ - counts[EXTRACT256(array,3)] += 1; /* 57 */ - counts[EXTRACT256(array,4)] += 1; /* 73 */ - counts[EXTRACT256(array,5)] += 1; /* 89 */ - counts[EXTRACT256(array,6)] += 1; /* 105 */ - counts[EXTRACT256(array,7)] += 1; /* 121 */ -#ifdef CHECK_FOR_OVERFLOW - _counts_after = _mm256_i32gather_epi32(counts,array,/*scale*/4); - _counts_neg = _mm256_cmpgt_epi32(_counts_after,maxcount256); - if (_mm256_testz_si256(_counts_neg,_counts_neg) == 0) { - debug(printf("Exceeded maximum counts\n")); - if (EXTRACT256(_counts_neg,0)) { - inquery[EXTRACT256(array,0)] = INQUERY_FALSE; - } - if (EXTRACT256(_counts_neg,1)) { - inquery[EXTRACT256(array,1)] = INQUERY_FALSE; - } - if (EXTRACT256(_counts_neg,2)) { - inquery[EXTRACT256(array,2)] = INQUERY_FALSE; - } - if (EXTRACT256(_counts_neg,3)) { - inquery[EXTRACT256(array,3)] = INQUERY_FALSE; - } - if (EXTRACT256(_counts_neg,4)) { - inquery[EXTRACT256(array,4)] = INQUERY_FALSE; - } - if (EXTRACT256(_counts_neg,5)) { - inquery[EXTRACT256(array,5)] = INQUERY_FALSE; - } - if (EXTRACT256(_counts_neg,6)) { - inquery[EXTRACT256(array,6)] = INQUERY_FALSE; - } - if (EXTRACT256(_counts_neg,7)) { - inquery[EXTRACT256(array,7)] = INQUERY_FALSE; - } - } -#endif /* CHECK_FOR_OVERFLOW */ - debug(printf("9 %04X => %d\n",EXTRACT256(array,0),counts[EXTRACT256(array,0)])); - debug(printf("25 %04X => %d\n",EXTRACT256(array,1),counts[EXTRACT256(array,1)])); - debug(printf("41 %04X => %d\n",EXTRACT256(array,2),counts[EXTRACT256(array,2)])); - debug(printf("57 %04X => %d\n",EXTRACT256(array,3),counts[EXTRACT256(array,3)])); - debug(printf("9 %04X => %d\n",EXTRACT256(array,4),counts[EXTRACT256(array,4)])); - debug(printf("25 %04X => %d\n",EXTRACT256(array,5),counts[EXTRACT256(array,5)])); - debug(printf("41 %04X => %d\n",EXTRACT256(array,6),counts[EXTRACT256(array,6)])); - debug(printf("57 %04X => %d\n",EXTRACT256(array,7),counts[EXTRACT256(array,7)])); + masked = (low_rev >> 12) & MASK6; /* 20 */ + INCR_COUNT(counts[masked]); + debug(printf("20 %04X => %d\n",masked,counts[masked])); - oligo = _mm256_or_si256( _mm256_srli_epi32(next,20), _mm256_slli_epi32(current,12)); - array = _mm256_and_si256( _mm256_srli_epi32(oligo,10), bigmask7); - counts[EXTRACT256(array,0)] += 1; /* 10 */ - counts[EXTRACT256(array,1)] += 1; /* 26 */ - counts[EXTRACT256(array,2)] += 1; /* 42 */ - counts[EXTRACT256(array,3)] += 1; /* 58 */ - counts[EXTRACT256(array,4)] += 1; /* 74 */ - counts[EXTRACT256(array,5)] += 1; /* 90 */ - counts[EXTRACT256(array,6)] += 1; /* 106 */ - counts[EXTRACT256(array,7)] += 1; /* 122 */ -#ifdef CHECK_FOR_OVERFLOW - _counts_after = _mm256_i32gather_epi32(counts,array,/*scale*/4); - _counts_neg = _mm256_cmpgt_epi32(_counts_after,maxcount256); - if (_mm256_testz_si256(_counts_neg,_counts_neg) == 0) { - debug(printf("Exceeded maximum counts\n")); - if (EXTRACT256(_counts_neg,0)) { - inquery[EXTRACT256(array,0)] = INQUERY_FALSE; - } - if (EXTRACT256(_counts_neg,1)) { - inquery[EXTRACT256(array,1)] = INQUERY_FALSE; - } - if (EXTRACT256(_counts_neg,2)) { - inquery[EXTRACT256(array,2)] = INQUERY_FALSE; - } - if (EXTRACT256(_counts_neg,3)) { - inquery[EXTRACT256(array,3)] = INQUERY_FALSE; - } - if (EXTRACT256(_counts_neg,4)) { - inquery[EXTRACT256(array,4)] = INQUERY_FALSE; - } - if (EXTRACT256(_counts_neg,5)) { - inquery[EXTRACT256(array,5)] = INQUERY_FALSE; - } - if (EXTRACT256(_counts_neg,6)) { - inquery[EXTRACT256(array,6)] = INQUERY_FALSE; - } - if (EXTRACT256(_counts_neg,7)) { - inquery[EXTRACT256(array,7)] = INQUERY_FALSE; - } - } -#endif /* CHECK_FOR_OVERFLOW */ - debug(printf("10 %04X => %d\n",EXTRACT256(array,0),counts[EXTRACT256(array,0)])); - debug(printf("26 %04X => %d\n",EXTRACT256(array,1),counts[EXTRACT256(array,1)])); - debug(printf("42 %04X => %d\n",EXTRACT256(array,2),counts[EXTRACT256(array,2)])); - debug(printf("58 %04X => %d\n",EXTRACT256(array,3),counts[EXTRACT256(array,3)])); - debug(printf("10 %04X => %d\n",EXTRACT256(array,4),counts[EXTRACT256(array,4)])); - debug(printf("26 %04X => %d\n",EXTRACT256(array,5),counts[EXTRACT256(array,5)])); - debug(printf("42 %04X => %d\n",EXTRACT256(array,6),counts[EXTRACT256(array,6)])); - debug(printf("58 %04X => %d\n",EXTRACT256(array,7),counts[EXTRACT256(array,7)])); - - array = _mm256_and_si256( _mm256_srli_epi32(oligo,8), bigmask7); - counts[EXTRACT256(array,0)] += 1; /* 11 */ - counts[EXTRACT256(array,1)] += 1; /* 27 */ - counts[EXTRACT256(array,2)] += 1; /* 43 */ - counts[EXTRACT256(array,3)] += 1; /* 59 */ - counts[EXTRACT256(array,4)] += 1; /* 75 */ - counts[EXTRACT256(array,5)] += 1; /* 91 */ - counts[EXTRACT256(array,6)] += 1; /* 107 */ - counts[EXTRACT256(array,7)] += 1; /* 123 */ -#ifdef CHECK_FOR_OVERFLOW - _counts_after = _mm256_i32gather_epi32(counts,array,/*scale*/4); - _counts_neg = _mm256_cmpgt_epi32(_counts_after,maxcount256); - if (_mm256_testz_si256(_counts_neg,_counts_neg) == 0) { - debug(printf("Exceeded maximum counts\n")); - if (EXTRACT256(_counts_neg,0)) { - inquery[EXTRACT256(array,0)] = INQUERY_FALSE; - } - if (EXTRACT256(_counts_neg,1)) { - inquery[EXTRACT256(array,1)] = INQUERY_FALSE; - } - if (EXTRACT256(_counts_neg,2)) { - inquery[EXTRACT256(array,2)] = INQUERY_FALSE; - } - if (EXTRACT256(_counts_neg,3)) { - inquery[EXTRACT256(array,3)] = INQUERY_FALSE; - } - if (EXTRACT256(_counts_neg,4)) { - inquery[EXTRACT256(array,4)] = INQUERY_FALSE; - } - if (EXTRACT256(_counts_neg,5)) { - inquery[EXTRACT256(array,5)] = INQUERY_FALSE; - } - if (EXTRACT256(_counts_neg,6)) { - inquery[EXTRACT256(array,6)] = INQUERY_FALSE; - } - if (EXTRACT256(_counts_neg,7)) { - inquery[EXTRACT256(array,7)] = INQUERY_FALSE; - } - } -#endif /* CHECK_FOR_OVERFLOW */ - debug(printf("11 %04X => %d\n",EXTRACT256(array,0),counts[EXTRACT256(array,0)])); - debug(printf("27 %04X => %d\n",EXTRACT256(array,1),counts[EXTRACT256(array,1)])); - debug(printf("43 %04X => %d\n",EXTRACT256(array,2),counts[EXTRACT256(array,2)])); - debug(printf("59 %04X => %d\n",EXTRACT256(array,3),counts[EXTRACT256(array,3)])); - debug(printf("11 %04X => %d\n",EXTRACT256(array,4),counts[EXTRACT256(array,4)])); - debug(printf("27 %04X => %d\n",EXTRACT256(array,5),counts[EXTRACT256(array,5)])); - debug(printf("43 %04X => %d\n",EXTRACT256(array,6),counts[EXTRACT256(array,6)])); - debug(printf("59 %04X => %d\n",EXTRACT256(array,7),counts[EXTRACT256(array,7)])); - - array = _mm256_and_si256( _mm256_srli_epi32(oligo,6), bigmask7); - counts[EXTRACT256(array,0)] += 1; /* 12 */ - counts[EXTRACT256(array,1)] += 1; /* 28 */ - counts[EXTRACT256(array,2)] += 1; /* 44 */ - counts[EXTRACT256(array,3)] += 1; /* 60 */ - counts[EXTRACT256(array,4)] += 1; /* 76 */ - counts[EXTRACT256(array,5)] += 1; /* 92 */ - counts[EXTRACT256(array,6)] += 1; /* 108 */ - counts[EXTRACT256(array,7)] += 1; /* 124 */ -#ifdef CHECK_FOR_OVERFLOW - _counts_after = _mm256_i32gather_epi32(counts,array,/*scale*/4); - _counts_neg = _mm256_cmpgt_epi32(_counts_after,maxcount256); - if (_mm256_testz_si256(_counts_neg,_counts_neg) == 0) { - debug(printf("Exceeded maximum counts\n")); - if (EXTRACT256(_counts_neg,0)) { - inquery[EXTRACT256(array,0)] = INQUERY_FALSE; - } - if (EXTRACT256(_counts_neg,1)) { - inquery[EXTRACT256(array,1)] = INQUERY_FALSE; - } - if (EXTRACT256(_counts_neg,2)) { - inquery[EXTRACT256(array,2)] = INQUERY_FALSE; - } - if (EXTRACT256(_counts_neg,3)) { - inquery[EXTRACT256(array,3)] = INQUERY_FALSE; - } - if (EXTRACT256(_counts_neg,4)) { - inquery[EXTRACT256(array,4)] = INQUERY_FALSE; - } - if (EXTRACT256(_counts_neg,5)) { - inquery[EXTRACT256(array,5)] = INQUERY_FALSE; - } - if (EXTRACT256(_counts_neg,6)) { - inquery[EXTRACT256(array,6)] = INQUERY_FALSE; - } - if (EXTRACT256(_counts_neg,7)) { - inquery[EXTRACT256(array,7)] = INQUERY_FALSE; - } - } -#endif /* CHECK_FOR_OVERFLOW */ - debug(printf("12 %04X => %d\n",EXTRACT256(array,0),counts[EXTRACT256(array,0)])); - debug(printf("28 %04X => %d\n",EXTRACT256(array,1),counts[EXTRACT256(array,1)])); - debug(printf("44 %04X => %d\n",EXTRACT256(array,2),counts[EXTRACT256(array,2)])); - debug(printf("60 %04X => %d\n",EXTRACT256(array,3),counts[EXTRACT256(array,3)])); - debug(printf("12 %04X => %d\n",EXTRACT256(array,4),counts[EXTRACT256(array,4)])); - debug(printf("28 %04X => %d\n",EXTRACT256(array,5),counts[EXTRACT256(array,5)])); - debug(printf("44 %04X => %d\n",EXTRACT256(array,6),counts[EXTRACT256(array,6)])); - debug(printf("60 %04X => %d\n",EXTRACT256(array,7),counts[EXTRACT256(array,7)])); - - array = _mm256_and_si256( _mm256_srli_epi32(oligo,4), bigmask7); - counts[EXTRACT256(array,0)] += 1; /* 13 */ - counts[EXTRACT256(array,1)] += 1; /* 29 */ - counts[EXTRACT256(array,2)] += 1; /* 45 */ - counts[EXTRACT256(array,3)] += 1; /* 61 */ - counts[EXTRACT256(array,4)] += 1; /* 77 */ - counts[EXTRACT256(array,5)] += 1; /* 93 */ - counts[EXTRACT256(array,6)] += 1; /* 109 */ - counts[EXTRACT256(array,7)] += 1; /* 125 */ -#ifdef CHECK_FOR_OVERFLOW - _counts_after = _mm256_i32gather_epi32(counts,array,/*scale*/4); - _counts_neg = _mm256_cmpgt_epi32(_counts_after,maxcount256); - if (_mm256_testz_si256(_counts_neg,_counts_neg) == 0) { - debug(printf("Exceeded maximum counts\n")); - if (EXTRACT256(_counts_neg,0)) { - inquery[EXTRACT256(array,0)] = INQUERY_FALSE; - } - if (EXTRACT256(_counts_neg,1)) { - inquery[EXTRACT256(array,1)] = INQUERY_FALSE; - } - if (EXTRACT256(_counts_neg,2)) { - inquery[EXTRACT256(array,2)] = INQUERY_FALSE; - } - if (EXTRACT256(_counts_neg,3)) { - inquery[EXTRACT256(array,3)] = INQUERY_FALSE; - } - if (EXTRACT256(_counts_neg,4)) { - inquery[EXTRACT256(array,4)] = INQUERY_FALSE; - } - if (EXTRACT256(_counts_neg,5)) { - inquery[EXTRACT256(array,5)] = INQUERY_FALSE; - } - if (EXTRACT256(_counts_neg,6)) { - inquery[EXTRACT256(array,6)] = INQUERY_FALSE; - } - if (EXTRACT256(_counts_neg,7)) { - inquery[EXTRACT256(array,7)] = INQUERY_FALSE; - } - } -#endif /* CHECK_FOR_OVERFLOW */ - debug(printf("13 %04X => %d\n",EXTRACT256(array,0),counts[EXTRACT256(array,0)])); - debug(printf("29 %04X => %d\n",EXTRACT256(array,1),counts[EXTRACT256(array,1)])); - debug(printf("45 %04X => %d\n",EXTRACT256(array,2),counts[EXTRACT256(array,2)])); - debug(printf("61 %04X => %d\n",EXTRACT256(array,3),counts[EXTRACT256(array,3)])); - debug(printf("13 %04X => %d\n",EXTRACT256(array,4),counts[EXTRACT256(array,4)])); - debug(printf("29 %04X => %d\n",EXTRACT256(array,5),counts[EXTRACT256(array,5)])); - debug(printf("45 %04X => %d\n",EXTRACT256(array,6),counts[EXTRACT256(array,6)])); - debug(printf("61 %04X => %d\n",EXTRACT256(array,7),counts[EXTRACT256(array,7)])); - - array = _mm256_and_si256( _mm256_srli_epi32(oligo,2), bigmask7); - counts[EXTRACT256(array,0)] += 1; /* 14 */ - counts[EXTRACT256(array,1)] += 1; /* 30 */ - counts[EXTRACT256(array,2)] += 1; /* 46 */ - counts[EXTRACT256(array,3)] += 1; /* 62 */ - counts[EXTRACT256(array,4)] += 1; /* 78 */ - counts[EXTRACT256(array,5)] += 1; /* 94 */ - counts[EXTRACT256(array,6)] += 1; /* 110 */ - counts[EXTRACT256(array,7)] += 1; /* 126 */ -#ifdef CHECK_FOR_OVERFLOW - _counts_after = _mm256_i32gather_epi32(counts,array,/*scale*/4); - _counts_neg = _mm256_cmpgt_epi32(_counts_after,maxcount256); - if (_mm256_testz_si256(_counts_neg,_counts_neg) == 0) { - debug(printf("Exceeded maximum counts\n")); - if (EXTRACT256(_counts_neg,0)) { - inquery[EXTRACT256(array,0)] = INQUERY_FALSE; - } - if (EXTRACT256(_counts_neg,1)) { - inquery[EXTRACT256(array,1)] = INQUERY_FALSE; - } - if (EXTRACT256(_counts_neg,2)) { - inquery[EXTRACT256(array,2)] = INQUERY_FALSE; - } - if (EXTRACT256(_counts_neg,3)) { - inquery[EXTRACT256(array,3)] = INQUERY_FALSE; - } - if (EXTRACT256(_counts_neg,4)) { - inquery[EXTRACT256(array,4)] = INQUERY_FALSE; - } - if (EXTRACT256(_counts_neg,5)) { - inquery[EXTRACT256(array,5)] = INQUERY_FALSE; - } - if (EXTRACT256(_counts_neg,6)) { - inquery[EXTRACT256(array,6)] = INQUERY_FALSE; - } - if (EXTRACT256(_counts_neg,7)) { - inquery[EXTRACT256(array,7)] = INQUERY_FALSE; - } - } -#endif /* CHECK_FOR_OVERFLOW */ - debug(printf("14 %04X => %d\n",EXTRACT256(array,0),counts[EXTRACT256(array,0)])); - debug(printf("30 %04X => %d\n",EXTRACT256(array,1),counts[EXTRACT256(array,1)])); - debug(printf("46 %04X => %d\n",EXTRACT256(array,2),counts[EXTRACT256(array,2)])); - debug(printf("62 %04X => %d\n",EXTRACT256(array,3),counts[EXTRACT256(array,3)])); - debug(printf("14 %04X => %d\n",EXTRACT256(array,4),counts[EXTRACT256(array,4)])); - debug(printf("30 %04X => %d\n",EXTRACT256(array,5),counts[EXTRACT256(array,5)])); - debug(printf("46 %04X => %d\n",EXTRACT256(array,6),counts[EXTRACT256(array,6)])); - debug(printf("62 %04X => %d\n",EXTRACT256(array,7),counts[EXTRACT256(array,7)])); - - array = _mm256_and_si256( oligo, bigmask7); - counts[EXTRACT256(array,0)] += 1; /* 15 */ - counts[EXTRACT256(array,1)] += 1; /* 31 */ - counts[EXTRACT256(array,2)] += 1; /* 47 */ - counts[EXTRACT256(array,3)] += 1; /* 63 */ - counts[EXTRACT256(array,4)] += 1; /* 79 */ - counts[EXTRACT256(array,5)] += 1; /* 95 */ - counts[EXTRACT256(array,6)] += 1; /* 111 */ - counts[EXTRACT256(array,7)] += 1; /* 127 */ -#ifdef CHECK_FOR_OVERFLOW - _counts_after = _mm256_i32gather_epi32(counts,array,/*scale*/4); - _counts_neg = _mm256_cmpgt_epi32(_counts_after,maxcount256); - if (_mm256_testz_si256(_counts_neg,_counts_neg) == 0) { - debug(printf("Exceeded maximum counts\n")); - if (EXTRACT256(_counts_neg,0)) { - inquery[EXTRACT256(array,0)] = INQUERY_FALSE; - } - if (EXTRACT256(_counts_neg,1)) { - inquery[EXTRACT256(array,1)] = INQUERY_FALSE; - } - if (EXTRACT256(_counts_neg,2)) { - inquery[EXTRACT256(array,2)] = INQUERY_FALSE; - } - if (EXTRACT256(_counts_neg,3)) { - inquery[EXTRACT256(array,3)] = INQUERY_FALSE; - } - if (EXTRACT256(_counts_neg,4)) { - inquery[EXTRACT256(array,4)] = INQUERY_FALSE; - } - if (EXTRACT256(_counts_neg,5)) { - inquery[EXTRACT256(array,5)] = INQUERY_FALSE; - } - if (EXTRACT256(_counts_neg,6)) { - inquery[EXTRACT256(array,6)] = INQUERY_FALSE; - } - if (EXTRACT256(_counts_neg,7)) { - inquery[EXTRACT256(array,7)] = INQUERY_FALSE; - } - } -#endif /* CHECK_FOR_OVERFLOW */ - debug(printf("15 %04X => %d\n",EXTRACT256(array,0),counts[EXTRACT256(array,0)])); - debug(printf("31 %04X => %d\n",EXTRACT256(array,1),counts[EXTRACT256(array,1)])); - debug(printf("47 %04X => %d\n",EXTRACT256(array,2),counts[EXTRACT256(array,2)])); - debug(printf("63 %04X => %d\n",EXTRACT256(array,3),counts[EXTRACT256(array,3)])); - debug(printf("15 %04X => %d\n",EXTRACT256(array,4),counts[EXTRACT256(array,4)])); - debug(printf("31 %04X => %d\n",EXTRACT256(array,5),counts[EXTRACT256(array,5)])); - debug(printf("47 %04X => %d\n",EXTRACT256(array,6),counts[EXTRACT256(array,6)])); - debug(printf("63 %04X => %d\n",EXTRACT256(array,7),counts[EXTRACT256(array,7)])); + masked = (low_rev >> 14) & MASK6; /* 19 */ + INCR_COUNT(counts[masked]); + debug(printf("19 %04X => %d\n",masked,counts[masked])); - return; -} -#endif + masked = (low_rev >> 16) & MASK6; /* 18 */ + INCR_COUNT(counts[masked]); + debug(printf("18 %04X => %d\n",masked,counts[masked])); + masked = (low_rev >> 18) & MASK6; /* 17 */ + INCR_COUNT(counts[masked]); + debug(printf("17 %04X => %d\n",masked,counts[masked])); -#if !defined(HAVE_AVX2) + masked = low_rev >> 20; /* 16, No mask necessary */ + INCR_COUNT(counts[masked]); + debug(printf("16 %04X => %d\n",masked,counts[masked])); -static int -store_7mers_fwd (Chrpos_T chrpos, Chrpos_T *table, UINT4 *pointers, UINT4 *positions, Count_T *counts, - Genomecomp_T high_rev, Genomecomp_T low_rev, Genomecomp_T nexthigh_rev) { - Genomecomp_T masked, oligo; -#ifdef INDIVIDUAL_SHIFTS -#elif defined(SIMD_MASK_THEN_STORE) - UINT4 _masked[4] __attribute__ ((aligned (16))); - __m128i _oligo; #else - __m128i _oligo, _masked; + _oligo = _mm_setr_epi32(low_rev, low_rev >> 2, low_rev >> 4, low_rev >> 6); +#ifdef SIMD_MASK_THEN_STORE + _mm_store_si128((__m128i *) _masked,_mm_and_si128(_oligo, mask6)); +#else + _masked = _mm_and_si128(_oligo, mask6); #endif - - oligo = nexthigh_rev >> 20; /* For 31..26 */ - oligo |= low_rev << 12; - -#ifdef INDIVIDUAL_SHIFTS - masked = oligo & MASK7; /* 31 */ - if (counts[masked]) { - assert(pointers[masked] > positions[masked]); - debug(printf("Storing masked %u at %u\n",masked,chrpos)); - table[--pointers[masked]] = chrpos; - } - - masked = (oligo >> 2) & MASK7; /* 30 */ - if (counts[masked]) { - assert(pointers[masked] > positions[masked]); - debug(printf("Storing masked %u at %u\n",masked,chrpos - 1)); - table[--pointers[masked]] = chrpos - 1; - } + masked = EXTRACT(_masked,0); + INCR_COUNT(counts[masked]); + debug(printf("26 %04X => %d\n",masked,counts[masked])); - masked = (oligo >> 4) & MASK7; /* 29 */ - if (counts[masked]) { - assert(pointers[masked] > positions[masked]); - debug(printf("Storing masked %u at %u\n",masked,chrpos - 2)); - table[--pointers[masked]] = chrpos - 2; - } + masked = EXTRACT(_masked,1); + INCR_COUNT(counts[masked]); + debug(printf("25 %04X => %d\n",masked,counts[masked])); - masked = (oligo >> 6) & MASK7; /* 28 */ - if (counts[masked]) { - assert(pointers[masked] > positions[masked]); - debug(printf("Storing masked %u at %u\n",masked,chrpos - 3)); - table[--pointers[masked]] = chrpos - 3; - } + masked = EXTRACT(_masked,2); + INCR_COUNT(counts[masked]); + debug(printf("24 %04X => %d\n",masked,counts[masked])); - masked = (oligo >> 8) & MASK7; /* 27 */ - if (counts[masked]) { - assert(pointers[masked] > positions[masked]); - debug(printf("Storing masked %u at %u\n",masked,chrpos - 4)); - table[--pointers[masked]] = chrpos - 4; - } + masked = EXTRACT(_masked,3); + INCR_COUNT(counts[masked]); + debug(printf("23 %04X => %d\n",masked,counts[masked])); - masked = (oligo >> 10) & MASK7; /* 26 */ - if (counts[masked]) { - assert(pointers[masked] > positions[masked]); - debug(printf("Storing masked %u at %u\n",masked,chrpos - 5)); - table[--pointers[masked]] = chrpos - 5; - } -#else - _oligo = _mm_setr_epi32(oligo, oligo >> 2, oligo >> 4, oligo >> 6); + _oligo = _mm_srli_epi32(_oligo, 8); #ifdef SIMD_MASK_THEN_STORE - _mm_store_si128((__m128i *) _masked,_mm_and_si128(_oligo, mask7)); + _mm_store_si128((__m128i *) _masked,_mm_and_si128(_oligo, mask6)); #else - _masked = _mm_and_si128(_oligo, mask7); + _masked = _mm_and_si128(_oligo, mask6); #endif masked = EXTRACT(_masked,0); - if (counts[masked]) { - assert(pointers[masked] > positions[masked]); - debug(printf("Storing masked %u at %u\n",masked,chrpos)); - table[--pointers[masked]] = chrpos; - } + INCR_COUNT(counts[masked]); + debug(printf("22 %04X => %d\n",masked,counts[masked])); masked = EXTRACT(_masked,1); - if (counts[masked]) { - assert(pointers[masked] > positions[masked]); - debug(printf("Storing masked %u at %u\n",masked,chrpos - 1)); - table[--pointers[masked]] = chrpos - 1; - } + INCR_COUNT(counts[masked]); + debug(printf("21 %04X => %d\n",masked,counts[masked])); masked = EXTRACT(_masked,2); - if (counts[masked]) { - assert(pointers[masked] > positions[masked]); - debug(printf("Storing masked %u at %u\n",masked,chrpos - 2)); - table[--pointers[masked]] = chrpos - 2; - } + INCR_COUNT(counts[masked]); + debug(printf("20 %04X => %d\n",masked,counts[masked])); masked = EXTRACT(_masked,3); - if (counts[masked]) { - assert(pointers[masked] > positions[masked]); - debug(printf("Storing masked %u at %u\n",masked,chrpos - 3)); - table[--pointers[masked]] = chrpos - 3; - } + INCR_COUNT(counts[masked]); + debug(printf("19 %04X => %d\n",masked,counts[masked])); _oligo = _mm_srli_epi32(_oligo, 8); #ifdef SIMD_MASK_THEN_STORE - _mm_store_si128((__m128i *) _masked,_mm_and_si128(_oligo, mask7)); + _mm_store_si128((__m128i *) _masked,_mm_and_si128(_oligo, mask6)); #else - _masked = _mm_and_si128(_oligo, mask7); + _masked = _mm_and_si128(_oligo, mask6); #endif masked = EXTRACT(_masked,0); - if (counts[masked]) { - assert(pointers[masked] > positions[masked]); - debug(printf("Storing masked %u at %u\n",masked,chrpos - 4)); - table[--pointers[masked]] = chrpos - 4; - } + INCR_COUNT(counts[masked]); + debug(printf("18 %04X => %d\n",masked,counts[masked])); masked = EXTRACT(_masked,1); - if (counts[masked]) { - assert(pointers[masked] > positions[masked]); - debug(printf("Storing masked %u at %u\n",masked,chrpos - 5)); - table[--pointers[masked]] = chrpos - 5; - } -#endif - - -#ifdef INDIVIDUAL_SHIFTS - masked = low_rev & MASK7; /* 25 */ - if (counts[masked]) { - assert(pointers[masked] > positions[masked]); - debug(printf("Storing masked %u at %u\n",masked,chrpos - 6)); - table[--pointers[masked]] = chrpos - 6; - } - - masked = (low_rev >> 2) & MASK7; /* 24 */ - if (counts[masked]) { - assert(pointers[masked] > positions[masked]); - debug(printf("Storing masked %u at %u\n",masked,chrpos - 7)); - table[--pointers[masked]] = chrpos - 7; - } + INCR_COUNT(counts[masked]); + debug(printf("17 %04X => %d\n",masked,counts[masked])); - masked = (low_rev >> 4) & MASK7; /* 23 */ - if (counts[masked]) { - assert(pointers[masked] > positions[masked]); - debug(printf("Storing masked %u at %u\n",masked,chrpos - 8)); - table[--pointers[masked]] = chrpos - 8; - } + masked = EXTRACT(_masked,2); + INCR_COUNT(counts[masked]); + debug(printf("16 %04X => %d\n",masked,counts[masked])); +#endif - masked = (low_rev >> 6) & MASK7; /* 22 */ - if (counts[masked]) { - assert(pointers[masked] > positions[masked]); - debug(printf("Storing masked %u at %u\n",masked,chrpos - 9)); - table[--pointers[masked]] = chrpos - 9; - } - masked = (low_rev >> 8) & MASK7; /* 21 */ - if (counts[masked]) { - assert(pointers[masked] > positions[masked]); - debug(printf("Storing masked %u at %u\n",masked,chrpos - 10)); - table[--pointers[masked]] = chrpos - 10; - } + oligo = low_rev >> 22; /* For 15..11 */ + oligo |= high_rev << 10; - masked = (low_rev >> 10) & MASK7; /* 20 */ - if (counts[masked]) { - assert(pointers[masked] > positions[masked]); - debug(printf("Storing masked %u at %u\n",masked,chrpos - 11)); - table[--pointers[masked]] = chrpos - 11; - } +#ifdef INDIVIDUAL_SHIFTS + masked = oligo & MASK6; /* 15 */ + INCR_COUNT(counts[masked]); + debug(printf("15 %04X => %d\n",masked,counts[masked])); - masked = (low_rev >> 12) & MASK7; /* 19 */ - if (counts[masked]) { - assert(pointers[masked] > positions[masked]); - debug(printf("Storing masked %u at %u\n",masked,chrpos - 12)); - table[--pointers[masked]] = chrpos - 12; - } + masked = (oligo >> 2) & MASK6; /* 14 */ + INCR_COUNT(counts[masked]); + debug(printf("14 %04X => %d\n",masked,counts[masked])); - masked = (low_rev >> 14) & MASK7; /* 18 */ - if (counts[masked]) { - assert(pointers[masked] > positions[masked]); - debug(printf("Storing masked %u at %u\n",masked,chrpos - 13)); - table[--pointers[masked]] = chrpos - 13; - } + masked = (oligo >> 4) & MASK6; /* 13 */ + INCR_COUNT(counts[masked]); + debug(printf("13 %04X => %d\n",masked,counts[masked])); - masked = (low_rev >> 16) & MASK7; /* 17 */ - if (counts[masked]) { - assert(pointers[masked] > positions[masked]); - debug(printf("Storing masked %u at %u\n",masked,chrpos - 14)); - table[--pointers[masked]] = chrpos - 14; - } + masked = (oligo >> 6) & MASK6; /* 12 */ + INCR_COUNT(counts[masked]); + debug(printf("12 %04X => %d\n",masked,counts[masked])); - masked = low_rev >> 18; /* 16, No mask necessary */ - if (counts[masked]) { - assert(pointers[masked] > positions[masked]); - debug(printf("Storing masked %u at %u\n",masked,chrpos - 15)); - table[--pointers[masked]] = chrpos - 15; - } + masked = (oligo >> 8) & MASK6; /* 11 */ + INCR_COUNT(counts[masked]); + debug(printf("11 %04X => %d\n",masked,counts[masked])); #else - _oligo = _mm_setr_epi32(low_rev, low_rev >> 2, low_rev >> 4, low_rev >> 6); + _oligo = _mm_setr_epi32(oligo, oligo >> 2, oligo >> 4, oligo >> 6); #ifdef SIMD_MASK_THEN_STORE - _mm_store_si128((__m128i *) _masked,_mm_and_si128(_oligo, mask7)); + _mm_store_si128((__m128i *) _masked,_mm_and_si128(_oligo, mask6)); #else - _masked = _mm_and_si128(_oligo, mask7); + _masked = _mm_and_si128(_oligo, mask6); #endif masked = EXTRACT(_masked,0); - if (counts[masked]) { - assert(pointers[masked] > positions[masked]); - debug(printf("Storing masked %u at %u\n",masked,chrpos - 6)); - table[--pointers[masked]] = chrpos - 6; - } + INCR_COUNT(counts[masked]); + debug(printf("15 %04X => %d\n",masked,counts[masked])); masked = EXTRACT(_masked,1); - if (counts[masked]) { - assert(pointers[masked] > positions[masked]); - debug(printf("Storing masked %u at %u\n",masked,chrpos - 7)); - table[--pointers[masked]] = chrpos - 7; - } + INCR_COUNT(counts[masked]); + debug(printf("14 %04X => %d\n",masked,counts[masked])); masked = EXTRACT(_masked,2); - if (counts[masked]) { - assert(pointers[masked] > positions[masked]); - debug(printf("Storing masked %u at %u\n",masked,chrpos - 8)); - table[--pointers[masked]] = chrpos - 8; - } + INCR_COUNT(counts[masked]); + debug(printf("13 %04X => %d\n",masked,counts[masked])); masked = EXTRACT(_masked,3); - if (counts[masked]) { - assert(pointers[masked] > positions[masked]); - debug(printf("Storing masked %u at %u\n",masked,chrpos - 9)); - table[--pointers[masked]] = chrpos - 9; - } + INCR_COUNT(counts[masked]); + debug(printf("12 %04X => %d\n",masked,counts[masked])); - _oligo = _mm_srli_epi32(_oligo, 8); -#ifdef SIMD_MASK_THEN_STORE - _mm_store_si128((__m128i *) _masked,_mm_and_si128(_oligo, mask7)); -#else - _masked = _mm_and_si128(_oligo, mask7); + masked = (oligo >> 8) & MASK6; /* 11 */ + INCR_COUNT(counts[masked]); + debug(printf("11 %04X => %d\n",masked,counts[masked])); #endif - masked = EXTRACT(_masked,0); - if (counts[masked]) { - assert(pointers[masked] > positions[masked]); - debug(printf("Storing masked %u at %u\n",masked,chrpos - 10)); - table[--pointers[masked]] = chrpos - 10; - } - masked = EXTRACT(_masked,1); - if (counts[masked]) { - assert(pointers[masked] > positions[masked]); - debug(printf("Storing masked %u at %u\n",masked,chrpos - 11)); - table[--pointers[masked]] = chrpos - 11; - } +#ifdef INDIVIDUAL_SHIFTS + masked = high_rev & MASK6; /* 10 */ + INCR_COUNT(counts[masked]); + debug(printf("10 %04X => %d\n",masked,counts[masked])); - masked = EXTRACT(_masked,2); - if (counts[masked]) { - assert(pointers[masked] > positions[masked]); - debug(printf("Storing masked %u at %u\n",masked,chrpos - 12)); - table[--pointers[masked]] = chrpos - 12; - } - - masked = EXTRACT(_masked,3); - if (counts[masked]) { - assert(pointers[masked] > positions[masked]); - debug(printf("Storing masked %u at %u\n",masked,chrpos - 13)); - table[--pointers[masked]] = chrpos - 13; - } - - - _oligo = _mm_srli_epi32(_oligo, 8); -#ifdef SIMD_MASK_THEN_STORE - _mm_store_si128((__m128i *) _masked,_mm_and_si128(_oligo, mask7)); -#else - _masked = _mm_and_si128(_oligo, mask7); -#endif - - masked = EXTRACT(_masked,0); - if (counts[masked]) { - assert(pointers[masked] > positions[masked]); - debug(printf("Storing masked %u at %u\n",masked,chrpos - 14)); - table[--pointers[masked]] = chrpos - 14; - } + masked = (high_rev >> 2) & MASK6; /* 9 */ + INCR_COUNT(counts[masked]); + debug(printf("9 %04X => %d\n",masked,counts[masked])); - masked = EXTRACT(_masked,1); - if (counts[masked]) { - assert(pointers[masked] > positions[masked]); - debug(printf("Storing masked %u at %u\n",masked,chrpos - 15)); - table[--pointers[masked]] = chrpos - 15; - } -#endif + masked = (high_rev >> 4) & MASK6; /* 8 */ + INCR_COUNT(counts[masked]); + debug(printf("8 %04X => %d\n",masked,counts[masked])); + masked = (high_rev >> 6) & MASK6; /* 7 */ + INCR_COUNT(counts[masked]); + debug(printf("7 %04X => %d\n",masked,counts[masked])); - oligo = low_rev >> 20; /* For 15..10 */ - oligo |= high_rev << 12; + masked = (high_rev >> 8) & MASK6; /* 6 */ + INCR_COUNT(counts[masked]); + debug(printf("6 %04X => %d\n",masked,counts[masked])); -#ifdef INDIVIDUAL_SHIFTS - masked = oligo & MASK7; /* 15 */ - if (counts[masked]) { - assert(pointers[masked] > positions[masked]); - debug(printf("Storing masked %u at %u\n",masked,chrpos - 16)); - table[--pointers[masked]] = chrpos - 16; - } + masked = (high_rev >> 10) & MASK6; /* 5 */ + INCR_COUNT(counts[masked]); + debug(printf("5 %04X => %d\n",masked,counts[masked])); - masked = (oligo >> 2) & MASK7; /* 14 */ - if (counts[masked]) { - assert(pointers[masked] > positions[masked]); - debug(printf("Storing masked %u at %u\n",masked,chrpos - 17)); - table[--pointers[masked]] = chrpos - 17; - } + masked = (high_rev >> 12) & MASK6; /* 4 */ + INCR_COUNT(counts[masked]); + debug(printf("4 %04X => %d\n",masked,counts[masked])); - masked = (oligo >> 4) & MASK7; /* 13 */ - if (counts[masked]) { - assert(pointers[masked] > positions[masked]); - debug(printf("Storing masked %u at %u\n",masked,chrpos - 18)); - table[--pointers[masked]] = chrpos - 18; - } + masked = (high_rev >> 14) & MASK6; /* 3 */ + INCR_COUNT(counts[masked]); + debug(printf("3 %04X => %d\n",masked,counts[masked])); - masked = (oligo >> 6) & MASK7; /* 12 */ - if (counts[masked]) { - assert(pointers[masked] > positions[masked]); - debug(printf("Storing masked %u at %u\n",masked,chrpos - 19)); - table[--pointers[masked]] = chrpos - 19; - } + masked = (high_rev >> 16) & MASK6; /* 2 */ + INCR_COUNT(counts[masked]); + debug(printf("2 %04X => %d\n",masked,counts[masked])); - masked = (oligo >> 8) & MASK7; /* 11 */ - if (counts[masked]) { - assert(pointers[masked] > positions[masked]); - debug(printf("Storing masked %u at %u\n",masked,chrpos - 20)); - table[--pointers[masked]] = chrpos - 20; - } + masked = (high_rev >> 18) & MASK6; /* 1 */ + INCR_COUNT(counts[masked]); + debug(printf("1 %04X => %d\n",masked,counts[masked])); - masked = (oligo >> 10) & MASK7; /* 10 */ - if (counts[masked]) { - assert(pointers[masked] > positions[masked]); - debug(printf("Storing masked %u at %u\n",masked,chrpos - 21)); - table[--pointers[masked]] = chrpos - 21; - } + masked = high_rev >> 20; /* 0, No mask necessary */ + INCR_COUNT(counts[masked]); + debug(printf("0 %04X => %d\n",masked,counts[masked])); #else - _oligo = _mm_setr_epi32(oligo, oligo >> 2, oligo >> 4, oligo >> 6); + _oligo = _mm_setr_epi32(high_rev, high_rev >> 2, high_rev >> 4, high_rev >> 6); #ifdef SIMD_MASK_THEN_STORE - _mm_store_si128((__m128i *) _masked,_mm_and_si128(_oligo, mask7)); + _mm_store_si128((__m128i *) _masked,_mm_and_si128(_oligo, mask6)); #else - _masked = _mm_and_si128(_oligo, mask7); + _masked = _mm_and_si128(_oligo, mask6); #endif masked = EXTRACT(_masked,0); - if (counts[masked]) { - assert(pointers[masked] > positions[masked]); - debug(printf("Storing masked %u at %u\n",masked,chrpos - 16)); - table[--pointers[masked]] = chrpos - 16; - } + INCR_COUNT(counts[masked]); + debug(printf("10 %04X => %d\n",masked,counts[masked])); masked = EXTRACT(_masked,1); - if (counts[masked]) { - assert(pointers[masked] > positions[masked]); - debug(printf("Storing masked %u at %u\n",masked,chrpos - 17)); - table[--pointers[masked]] = chrpos - 17; - } + INCR_COUNT(counts[masked]); + debug(printf("9 %04X => %d\n",masked,counts[masked])); masked = EXTRACT(_masked,2); - if (counts[masked]) { - assert(pointers[masked] > positions[masked]); - debug(printf("Storing masked %u at %u\n",masked,chrpos - 18)); - table[--pointers[masked]] = chrpos - 18; - } + INCR_COUNT(counts[masked]); + debug(printf("8 %04X => %d\n",masked,counts[masked])); masked = EXTRACT(_masked,3); - if (counts[masked]) { - assert(pointers[masked] > positions[masked]); - debug(printf("Storing masked %u at %u\n",masked,chrpos - 19)); - table[--pointers[masked]] = chrpos - 19; - } + INCR_COUNT(counts[masked]); + debug(printf("7 %04X => %d\n",masked,counts[masked])); _oligo = _mm_srli_epi32(_oligo, 8); #ifdef SIMD_MASK_THEN_STORE - _mm_store_si128((__m128i *) _masked,_mm_and_si128(_oligo, mask7)); -#else - _masked = _mm_and_si128(_oligo, mask7); -#endif - - masked = EXTRACT(_masked,0); - if (counts[masked]) { - assert(pointers[masked] > positions[masked]); - debug(printf("Storing masked %u at %u\n",masked,chrpos - 20)); - table[--pointers[masked]] = chrpos - 20; - } - - masked = EXTRACT(_masked,1); - if (counts[masked]) { - assert(pointers[masked] > positions[masked]); - debug(printf("Storing masked %u at %u\n",masked,chrpos - 21)); - table[--pointers[masked]] = chrpos - 21; - } -#endif - - -#ifdef INDIVIDUAL_SHIFTS - masked = high_rev & MASK7; /* 9 */ - if (counts[masked]) { - assert(pointers[masked] > positions[masked]); - debug(printf("Storing masked %u at %u\n",masked,chrpos - 22)); - table[--pointers[masked]] = chrpos - 22; - } - - masked = (high_rev >> 2) & MASK7; /* 8 */ - if (counts[masked]) { - assert(pointers[masked] > positions[masked]); - debug(printf("Storing masked %u at %u\n",masked,chrpos - 23)); - table[--pointers[masked]] = chrpos - 23; - } - - masked = (high_rev >> 4) & MASK7; /* 7 */ - if (counts[masked]) { - assert(pointers[masked] > positions[masked]); - debug(printf("Storing masked %u at %u\n",masked,chrpos - 24)); - table[--pointers[masked]] = chrpos - 24; - } - - masked = (high_rev >> 6) & MASK7; /* 6 */ - if (counts[masked]) { - assert(pointers[masked] > positions[masked]); - debug(printf("Storing masked %u at %u\n",masked,chrpos - 25)); - table[--pointers[masked]] = chrpos - 25; - } - - masked = (high_rev >> 8) & MASK7; /* 5 */ - if (counts[masked]) { - assert(pointers[masked] > positions[masked]); - debug(printf("Storing masked %u at %u\n",masked,chrpos - 26)); - table[--pointers[masked]] = chrpos - 26; - } - - masked = (high_rev >> 10) & MASK7; /* 4 */ - if (counts[masked]) { - assert(pointers[masked] > positions[masked]); - debug(printf("Storing masked %u at %u\n",masked,chrpos - 27)); - table[--pointers[masked]] = chrpos - 27; - } - - masked = (high_rev >> 12) & MASK7; /* 3 */ - if (counts[masked]) { - assert(pointers[masked] > positions[masked]); - debug(printf("Storing masked %u at %u\n",masked,chrpos - 28)); - table[--pointers[masked]] = chrpos - 28; - } - - masked = (high_rev >> 14) & MASK7; /* 2 */ - if (counts[masked]) { - assert(pointers[masked] > positions[masked]); - debug(printf("Storing masked %u at %u\n",masked,chrpos - 29)); - table[--pointers[masked]] = chrpos - 29; - } - - masked = (high_rev >> 16) & MASK7; /* 1 */ - if (counts[masked]) { - assert(pointers[masked] > positions[masked]); - debug(printf("Storing masked %u at %u\n",masked,chrpos - 30)); - table[--pointers[masked]] = chrpos - 30; - } - - masked = high_rev >> 18; /* 0, No mask necessary */ - if (counts[masked]) { - assert(pointers[masked] > positions[masked]); - debug(printf("Storing masked %u at %u\n",masked,chrpos - 31)); - table[--pointers[masked]] = chrpos - 31; - } - -#else - _oligo = _mm_setr_epi32(high_rev, high_rev >> 2, high_rev >> 4, high_rev >> 6); -#ifdef SIMD_MASK_THEN_STORE - _mm_store_si128((__m128i *) _masked,_mm_and_si128(_oligo, mask7)); + _mm_store_si128((__m128i *) _masked,_mm_and_si128(_oligo, mask6)); #else - _masked = _mm_and_si128(_oligo, mask7); + _masked = _mm_and_si128(_oligo, mask6); #endif masked = EXTRACT(_masked,0); - if (counts[masked]) { - assert(pointers[masked] > positions[masked]); - debug(printf("Storing masked %u at %u\n",masked,chrpos - 22)); - table[--pointers[masked]] = chrpos - 22; - } + INCR_COUNT(counts[masked]); + debug(printf("6 %04X => %d\n",masked,counts[masked])); masked = EXTRACT(_masked,1); - if (counts[masked]) { - assert(pointers[masked] > positions[masked]); - debug(printf("Storing masked %u at %u\n",masked,chrpos - 23)); - table[--pointers[masked]] = chrpos - 23; - } + INCR_COUNT(counts[masked]); + debug(printf("5 %04X => %d\n",masked,counts[masked])); masked = EXTRACT(_masked,2); - if (counts[masked]) { - assert(pointers[masked] > positions[masked]); - debug(printf("Storing masked %u at %u\n",masked,chrpos - 24)); - table[--pointers[masked]] = chrpos - 24; - } + INCR_COUNT(counts[masked]); + debug(printf("4 %04X => %d\n",masked,counts[masked])); masked = EXTRACT(_masked,3); - if (counts[masked]) { - assert(pointers[masked] > positions[masked]); - debug(printf("Storing masked %u at %u\n",masked,chrpos - 25)); - table[--pointers[masked]] = chrpos - 25; - } + INCR_COUNT(counts[masked]); + debug(printf("3 %04X => %d\n",masked,counts[masked])); _oligo = _mm_srli_epi32(_oligo, 8); #ifdef SIMD_MASK_THEN_STORE - _mm_store_si128((__m128i *) _masked,_mm_and_si128(_oligo, mask7)); + _mm_store_si128((__m128i *) _masked,_mm_and_si128(_oligo, mask6)); #else - _masked = _mm_and_si128(_oligo, mask7); + _masked = _mm_and_si128(_oligo, mask6); #endif masked = EXTRACT(_masked,0); - if (counts[masked]) { - assert(pointers[masked] > positions[masked]); - debug(printf("Storing masked %u at %u\n",masked,chrpos - 26)); - table[--pointers[masked]] = chrpos - 26; - } + INCR_COUNT(counts[masked]); + debug(printf("2 %04X => %d\n",masked,counts[masked])); masked = EXTRACT(_masked,1); - if (counts[masked]) { - assert(pointers[masked] > positions[masked]); - debug(printf("Storing masked %u at %u\n",masked,chrpos - 27)); - table[--pointers[masked]] = chrpos - 27; - } + INCR_COUNT(counts[masked]); + debug(printf("1 %04X => %d\n",masked,counts[masked])); masked = EXTRACT(_masked,2); - if (counts[masked]) { - assert(pointers[masked] > positions[masked]); - debug(printf("Storing masked %u at %u\n",masked,chrpos - 28)); - table[--pointers[masked]] = chrpos - 28; - } - - masked = EXTRACT(_masked,3); - if (counts[masked]) { - assert(pointers[masked] > positions[masked]); - debug(printf("Storing masked %u at %u\n",masked,chrpos - 29)); - table[--pointers[masked]] = chrpos - 29; - } - - - _oligo = _mm_srli_epi32(_oligo, 8); -#ifdef SIMD_MASK_THEN_STORE - _mm_store_si128((__m128i *) _masked,_mm_and_si128(_oligo, mask7)); -#else - _masked = _mm_and_si128(_oligo, mask7); -#endif - - masked = EXTRACT(_masked,0); - if (counts[masked]) { - assert(pointers[masked] > positions[masked]); - debug(printf("Storing masked %u at %u\n",masked,chrpos - 30)); - table[--pointers[masked]] = chrpos - 30; - } - - masked = EXTRACT(_masked,1); - if (counts[masked]) { - assert(pointers[masked] > positions[masked]); - debug(printf("Storing masked %u at %u\n",masked,chrpos - 31)); - table[--pointers[masked]] = chrpos - 31; - } + INCR_COUNT(counts[masked]); + debug(printf("0 %04X => %d\n",masked,counts[masked])); #endif - return chrpos - 32; + return; } #else /* HAVE_AVX2 */ -static int -store_7mers_fwd (Chrpos_T chrpos, Chrpos_T *table, UINT4 *pointers, UINT4 *positions, Count_T *counts, - Genomecomp_T high_rev, Genomecomp_T low_rev, Genomecomp_T nexthigh_rev) { +static void +count_6mers_fwd_32 (Count_T *counts, Genomecomp_T high_rev, Genomecomp_T low_rev, Genomecomp_T nexthigh_rev) { Genomecomp_T masked, oligo; - __m256i _oligo, _masked, _counts; + __m256i _oligo, _masked; - oligo = nexthigh_rev >> 20; /* For 31..26 */ - oligo |= low_rev << 12; + oligo = nexthigh_rev >> 22; /* For 31..27 */ + oligo |= low_rev << 10; - _oligo = _mm256_srlv_epi32(_mm256_set1_epi32(oligo),shift0to14); - _masked = _mm256_and_si256(_oligo, bigmask7); - _counts = _mm256_i32gather_epi32(counts,_masked,/*scale*/4); + _oligo = _mm256_srlv_epi32(_mm256_set1_epi32(oligo),bigshift0to14); + _masked = _mm256_and_si256(_oligo, bigmask6); - if (EXTRACT256(_counts,0)) { - masked = EXTRACT256(_masked,0); - assert(pointers[masked] > positions[masked]); - debug(printf("Storing masked %u at %u\n",masked,chrpos)); - table[--pointers[masked]] = chrpos; - } + masked = EXTRACT256(_masked,0); + INCR_COUNT(counts[masked]); + debug(printf("31 %04X => %d\n",masked,counts[masked])); - if (EXTRACT256(_counts,1)) { - masked = EXTRACT256(_masked,1); - assert(pointers[masked] > positions[masked]); - debug(printf("Storing masked %u at %u\n",masked,chrpos - 1)); - table[--pointers[masked]] = chrpos - 1; - } + masked = EXTRACT256(_masked,1); + INCR_COUNT(counts[masked]); + debug(printf("30 %04X => %d\n",masked,counts[masked])); - if (EXTRACT256(_counts,2)) { - masked = EXTRACT256(_masked,2); - assert(pointers[masked] > positions[masked]); - debug(printf("Storing masked %u at %u\n",masked,chrpos - 2)); - table[--pointers[masked]] = chrpos - 2; - } + masked = EXTRACT256(_masked,2); + INCR_COUNT(counts[masked]); + debug(printf("29 %04X => %d\n",masked,counts[masked])); - if (EXTRACT256(_counts,3)) { - masked = EXTRACT256(_masked,3); - assert(pointers[masked] > positions[masked]); - debug(printf("Storing masked %u at %u\n",masked,chrpos - 3)); - table[--pointers[masked]] = chrpos - 3; - } - - if (EXTRACT256(_counts,4)) { - masked = EXTRACT256(_masked,4); - assert(pointers[masked] > positions[masked]); - debug(printf("Storing masked %u at %u\n",masked,chrpos - 4)); - table[--pointers[masked]] = chrpos - 4; - } + masked = EXTRACT256(_masked,3); + INCR_COUNT(counts[masked]); + debug(printf("28 %04X => %d\n",masked,counts[masked])); - if (EXTRACT256(_counts,5)) { - masked = EXTRACT256(_masked,5); - assert(pointers[masked] > positions[masked]); - debug(printf("Storing masked %u at %u\n",masked,chrpos - 5)); - table[--pointers[masked]] = chrpos - 5; - } + masked = EXTRACT256(_masked,4); + INCR_COUNT(counts[masked]); + debug(printf("27 %04X => %d\n",masked,counts[masked])); - _oligo = _mm256_srlv_epi32(_mm256_set1_epi32(low_rev),shift0to14); - _masked = _mm256_and_si256(_oligo, bigmask7); - _counts = _mm256_i32gather_epi32(counts,_masked,/*scale*/4); + _oligo = _mm256_srlv_epi32(_mm256_set1_epi32(low_rev),bigshift0to14); + _masked = _mm256_and_si256(_oligo, bigmask6); - if (EXTRACT256(_counts,0)) { - masked = EXTRACT256(_masked,0); - assert(pointers[masked] > positions[masked]); - debug(printf("Storing masked %u at %u\n",masked,chrpos - 6)); - table[--pointers[masked]] = chrpos - 6; - } + masked = EXTRACT256(_masked,0); + INCR_COUNT(counts[masked]); + debug(printf("26 %04X => %d\n",masked,counts[masked])); - if (EXTRACT256(_counts,1)) { - masked = EXTRACT256(_masked,1); - assert(pointers[masked] > positions[masked]); - debug(printf("Storing masked %u at %u\n",masked,chrpos - 7)); - table[--pointers[masked]] = chrpos - 7; - } + masked = EXTRACT256(_masked,1); + INCR_COUNT(counts[masked]); + debug(printf("25 %04X => %d\n",masked,counts[masked])); - if (EXTRACT256(_counts,2)) { - masked = EXTRACT256(_masked,2); - assert(pointers[masked] > positions[masked]); - debug(printf("Storing masked %u at %u\n",masked,chrpos - 8)); - table[--pointers[masked]] = chrpos - 8; - } + masked = EXTRACT256(_masked,2); + INCR_COUNT(counts[masked]); + debug(printf("24 %04X => %d\n",masked,counts[masked])); - if (EXTRACT256(_counts,3)) { - masked = EXTRACT256(_masked,3); - assert(pointers[masked] > positions[masked]); - debug(printf("Storing masked %u at %u\n",masked,chrpos - 9)); - table[--pointers[masked]] = chrpos - 9; - } + masked = EXTRACT256(_masked,3); + INCR_COUNT(counts[masked]); + debug(printf("23 %04X => %d\n",masked,counts[masked])); - if (EXTRACT256(_counts,4)) { - masked = EXTRACT256(_masked,4); - assert(pointers[masked] > positions[masked]); - debug(printf("Storing masked %u at %u\n",masked,chrpos - 10)); - table[--pointers[masked]] = chrpos - 10; - } + masked = EXTRACT256(_masked,4); + INCR_COUNT(counts[masked]); + debug(printf("22 %04X => %d\n",masked,counts[masked])); - if (EXTRACT256(_counts,5)) { - masked = EXTRACT256(_masked,5); - assert(pointers[masked] > positions[masked]); - debug(printf("Storing masked %u at %u\n",masked,chrpos - 11)); - table[--pointers[masked]] = chrpos - 11; - } + masked = EXTRACT256(_masked,5); + INCR_COUNT(counts[masked]); + debug(printf("21 %04X => %d\n",masked,counts[masked])); - if (EXTRACT256(_counts,6)) { - masked = EXTRACT256(_masked,6); - assert(pointers[masked] > positions[masked]); - debug(printf("Storing masked %u at %u\n",masked,chrpos - 12)); - table[--pointers[masked]] = chrpos - 12; - } + masked = EXTRACT256(_masked,6); + INCR_COUNT(counts[masked]); + debug(printf("20 %04X => %d\n",masked,counts[masked])); - if (EXTRACT256(_counts,7)) { - masked = EXTRACT256(_masked,7); - assert(pointers[masked] > positions[masked]); - debug(printf("Storing masked %u at %u\n",masked,chrpos - 13)); - table[--pointers[masked]] = chrpos - 13; - } + masked = EXTRACT256(_masked,7); + INCR_COUNT(counts[masked]); + debug(printf("19 %04X => %d\n",masked,counts[masked])); _oligo = _mm256_srli_epi32(_oligo, 16); - _masked = _mm256_and_si256(_oligo, bigmask7); - _counts = _mm256_i32gather_epi32(counts,_masked,/*scale*/4); + _masked = _mm256_and_si256(_oligo, bigmask6); - if (EXTRACT256(_counts,0)) { - masked = EXTRACT256(_masked,0); - assert(pointers[masked] > positions[masked]); - debug(printf("Storing masked %u at %u\n",masked,chrpos - 14)); - table[--pointers[masked]] = chrpos - 14; - } + masked = EXTRACT256(_masked,0); + INCR_COUNT(counts[masked]); + debug(printf("18 %04X => %d\n",masked,counts[masked])); - if (EXTRACT256(_counts,1)) { - masked = EXTRACT256(_masked,1); - assert(pointers[masked] > positions[masked]); - debug(printf("Storing masked %u at %u\n",masked,chrpos - 15)); - table[--pointers[masked]] = chrpos - 15; - } + masked = EXTRACT256(_masked,1); + INCR_COUNT(counts[masked]); + debug(printf("17 %04X => %d\n",masked,counts[masked])); + masked = EXTRACT256(_masked,2); + INCR_COUNT(counts[masked]); + debug(printf("16 %04X => %d\n",masked,counts[masked])); - oligo = low_rev >> 20; /* For 15..10 */ - oligo |= high_rev << 12; - _oligo = _mm256_srlv_epi32(_mm256_set1_epi32(oligo),shift0to14); - _masked = _mm256_and_si256(_oligo, bigmask7); - _counts = _mm256_i32gather_epi32(counts,_masked,/*scale*/4); + oligo = low_rev >> 22; /* For 15..11 */ + oligo |= high_rev << 10; - if (EXTRACT256(_counts,0)) { - masked = EXTRACT256(_masked,0); - assert(pointers[masked] > positions[masked]); - debug(printf("Storing masked %u at %u\n",masked,chrpos - 16)); - table[--pointers[masked]] = chrpos - 16; - } + _oligo = _mm256_srlv_epi32(_mm256_set1_epi32(oligo),bigshift0to14); + _masked = _mm256_and_si256(_oligo, bigmask6); - if (EXTRACT256(_counts,1)) { - masked = EXTRACT256(_masked,1); - assert(pointers[masked] > positions[masked]); - debug(printf("Storing masked %u at %u\n",masked,chrpos - 17)); - table[--pointers[masked]] = chrpos - 17; - } + masked = EXTRACT256(_masked,0); + INCR_COUNT(counts[masked]); + debug(printf("15 %04X => %d\n",masked,counts[masked])); - if (EXTRACT256(_counts,2)) { - masked = EXTRACT256(_masked,2); - assert(pointers[masked] > positions[masked]); - debug(printf("Storing masked %u at %u\n",masked,chrpos - 18)); - table[--pointers[masked]] = chrpos - 18; - } + masked = EXTRACT256(_masked,1); + INCR_COUNT(counts[masked]); + debug(printf("14 %04X => %d\n",masked,counts[masked])); - if (EXTRACT256(_counts,3)) { - masked = EXTRACT256(_masked,3); - assert(pointers[masked] > positions[masked]); - debug(printf("Storing masked %u at %u\n",masked,chrpos - 19)); - table[--pointers[masked]] = chrpos - 19; - } + masked = EXTRACT256(_masked,2); + INCR_COUNT(counts[masked]); + debug(printf("13 %04X => %d\n",masked,counts[masked])); - if (EXTRACT256(_counts,4)) { - masked = EXTRACT256(_masked,4); - assert(pointers[masked] > positions[masked]); - debug(printf("Storing masked %u at %u\n",masked,chrpos - 20)); - table[--pointers[masked]] = chrpos - 20; - } + masked = EXTRACT256(_masked,3); + INCR_COUNT(counts[masked]); + debug(printf("12 %04X => %d\n",masked,counts[masked])); - if (EXTRACT256(_counts,5)) { - masked = EXTRACT256(_masked,5); - assert(pointers[masked] > positions[masked]); - debug(printf("Storing masked %u at %u\n",masked,chrpos - 21)); - table[--pointers[masked]] = chrpos - 21; - } + masked = EXTRACT256(_masked,4); + INCR_COUNT(counts[masked]); + debug(printf("11 %04X => %d\n",masked,counts[masked])); - _oligo = _mm256_srlv_epi32(_mm256_set1_epi32(high_rev),shift0to14); - _masked = _mm256_and_si256(_oligo, bigmask7); - _counts = _mm256_i32gather_epi32(counts,_masked,/*scale*/4); + _oligo = _mm256_srlv_epi32(_mm256_set1_epi32(high_rev),bigshift0to14); + _masked = _mm256_and_si256(_oligo, bigmask6); - if (EXTRACT256(_counts,0)) { - masked = EXTRACT256(_masked,0); - assert(pointers[masked] > positions[masked]); - debug(printf("Storing masked %u at %u\n",masked,chrpos - 22)); - table[--pointers[masked]] = chrpos - 22; - } + masked = EXTRACT256(_masked,0); + INCR_COUNT(counts[masked]); + debug(printf("10 %04X => %d\n",masked,counts[masked])); - if (EXTRACT256(_counts,1)) { - masked = EXTRACT256(_masked,1); - assert(pointers[masked] > positions[masked]); - debug(printf("Storing masked %u at %u\n",masked,chrpos - 23)); - table[--pointers[masked]] = chrpos - 23; - } + masked = EXTRACT256(_masked,1); + INCR_COUNT(counts[masked]); + debug(printf("9 %04X => %d\n",masked,counts[masked])); - if (EXTRACT256(_counts,2)) { - masked = EXTRACT256(_masked,2); - assert(pointers[masked] > positions[masked]); - debug(printf("Storing masked %u at %u\n",masked,chrpos - 24)); - table[--pointers[masked]] = chrpos - 24; - } + masked = EXTRACT256(_masked,2); + INCR_COUNT(counts[masked]); + debug(printf("8 %04X => %d\n",masked,counts[masked])); - if (EXTRACT256(_counts,3)) { - masked = EXTRACT256(_masked,3); - assert(pointers[masked] > positions[masked]); - debug(printf("Storing masked %u at %u\n",masked,chrpos - 25)); - table[--pointers[masked]] = chrpos - 25; - } + masked = EXTRACT256(_masked,3); + INCR_COUNT(counts[masked]); + debug(printf("7 %04X => %d\n",masked,counts[masked])); - if (EXTRACT256(_counts,4)) { - masked = EXTRACT256(_masked,4); - assert(pointers[masked] > positions[masked]); - debug(printf("Storing masked %u at %u\n",masked,chrpos - 26)); - table[--pointers[masked]] = chrpos - 26; - } + masked = EXTRACT256(_masked,4); + INCR_COUNT(counts[masked]); + debug(printf("6 %04X => %d\n",masked,counts[masked])); - if (EXTRACT256(_counts,5)) { - masked = EXTRACT256(_masked,5); - assert(pointers[masked] > positions[masked]); - debug(printf("Storing masked %u at %u\n",masked,chrpos - 27)); - table[--pointers[masked]] = chrpos - 27; - } + masked = EXTRACT256(_masked,5); + INCR_COUNT(counts[masked]); + debug(printf("5 %04X => %d\n",masked,counts[masked])); - if (EXTRACT256(_counts,6)) { - masked = EXTRACT256(_masked,6); - assert(pointers[masked] > positions[masked]); - debug(printf("Storing masked %u at %u\n",masked,chrpos - 28)); - table[--pointers[masked]] = chrpos - 28; - } + masked = EXTRACT256(_masked,6); + INCR_COUNT(counts[masked]); + debug(printf("4 %04X => %d\n",masked,counts[masked])); - if (EXTRACT256(_counts,7)) { - masked = EXTRACT256(_masked,7); - assert(pointers[masked] > positions[masked]); - debug(printf("Storing masked %u at %u\n",masked,chrpos - 29)); - table[--pointers[masked]] = chrpos - 29; - } + masked = EXTRACT256(_masked,7); + INCR_COUNT(counts[masked]); + debug(printf("3 %04X => %d\n",masked,counts[masked])); _oligo = _mm256_srli_epi32(_oligo, 16); - _masked = _mm256_and_si256(_oligo, bigmask7); - _counts = _mm256_i32gather_epi32(counts,_masked,/*scale*/4); + _masked = _mm256_and_si256(_oligo, bigmask6); - if (EXTRACT256(_counts,0)) { - masked = EXTRACT256(_masked,0); - assert(pointers[masked] > positions[masked]); - debug(printf("Storing masked %u at %u\n",masked,chrpos - 30)); - table[--pointers[masked]] = chrpos - 30; - } + masked = EXTRACT256(_masked,0); + INCR_COUNT(counts[masked]); + debug(printf("2 %04X => %d\n",masked,counts[masked])); - if (EXTRACT256(_counts,1)) { - masked = EXTRACT256(_masked,1); - assert(pointers[masked] > positions[masked]); - debug(printf("Storing masked %u at %u\n",masked,chrpos - 31)); - table[--pointers[masked]] = chrpos - 31; - } + masked = EXTRACT256(_masked,1); + INCR_COUNT(counts[masked]); + debug(printf("1 %04X => %d\n",masked,counts[masked])); - return chrpos - 32; + masked = EXTRACT256(_masked,2); + INCR_COUNT(counts[masked]); + debug(printf("0 %04X => %d\n",masked,counts[masked])); + + return; } #endif /* HAVE_AVX2 */ -#if !defined(HAVE_AVX2) - +/* Expecting current to have {high0_rev, low0_rev, high1_rev, + low1_rev}, and next to have {low0_rev, high1_rev, low1_rev, and + high2_rev} */ +#ifdef HAVE_SSE2 static void -count_6mers_fwd (Count_T *counts, Inquery_T *inquery, Genomecomp_T high_rev, Genomecomp_T low_rev, Genomecomp_T nexthigh_rev) { - Genomecomp_T masked, oligo; -#ifdef INDIVIDUAL_SHIFTS -#elif defined(SIMD_MASK_THEN_STORE) - UINT4 _masked[4] __attribute__ ((aligned (16))); - __m128i _oligo; -#else - __m128i _oligo, _masked; -#endif +extract_6mers_fwd_simd_64 (__m128i *out, __m128i current, __m128i next) { + __m128i oligo; + _mm_store_si128(out++, _mm_srli_epi32(current,20)); /* No mask necessary */; + _mm_store_si128(out++, _mm_and_si128( _mm_srli_epi32(current,18), mask6)); + _mm_store_si128(out++, _mm_and_si128( _mm_srli_epi32(current,16), mask6)); + _mm_store_si128(out++, _mm_and_si128( _mm_srli_epi32(current,14), mask6)); + _mm_store_si128(out++, _mm_and_si128( _mm_srli_epi32(current,12), mask6)); + _mm_store_si128(out++, _mm_and_si128( _mm_srli_epi32(current,10), mask6)); + _mm_store_si128(out++, _mm_and_si128( _mm_srli_epi32(current,8), mask6)); + _mm_store_si128(out++, _mm_and_si128( _mm_srli_epi32(current,6), mask6)); + _mm_store_si128(out++, _mm_and_si128( _mm_srli_epi32(current,4), mask6)); + _mm_store_si128(out++, _mm_and_si128( _mm_srli_epi32(current,2), mask6)); + _mm_store_si128(out++, _mm_and_si128( current, mask6)); - oligo = nexthigh_rev >> 22; /* For 31..27 */ - oligo |= low_rev << 10; + oligo = _mm_or_si128( _mm_srli_epi32(next,22), _mm_slli_epi32(current,10)); + _mm_store_si128(out++, _mm_and_si128( _mm_srli_epi32(oligo,8), mask6)); + _mm_store_si128(out++, _mm_and_si128( _mm_srli_epi32(oligo,6), mask6)); + _mm_store_si128(out++, _mm_and_si128( _mm_srli_epi32(oligo,4), mask6)); + _mm_store_si128(out++, _mm_and_si128( _mm_srli_epi32(oligo,2), mask6)); + _mm_store_si128(out++, _mm_and_si128( oligo, mask6)); -#ifdef INDIVIDUAL_SHIFTS - masked = oligo & MASK6; /* 31 */ - INCR_COUNT(counts[masked],inquery[masked]); - debug(printf("31 %04X => %d\n",masked,counts[masked])); + return; +} - masked = (oligo >> 2) & MASK6; /* 30 */ - INCR_COUNT(counts[masked],inquery[masked]); - debug(printf("30 %04X => %d\n",masked,counts[masked])); +#ifdef USE_UNORDERED_6 +static Chrpos_T +store_6mers_fwd_simd_64 (Chrpos_T chrpos, Chrpos_T *table, UINT4 *positions, Count_T *counts, + __m128i current, __m128i next) { + __m128i array[16]; + + extract_6mers_fwd_simd_64(array,current,next); + return store_fwdrev_simd_64(chrpos,table,positions,counts,(UINT4 *) array); +} - masked = (oligo >> 4) & MASK6; /* 29 */ - INCR_COUNT(counts[masked],inquery[masked]); - debug(printf("29 %04X => %d\n",masked,counts[masked])); +#else +/* Includes extract_6mers_fwd_simd_64_ordered (__m128i *out, __m128i current, __m128i next) */ +static Chrpos_T +store_6mers_fwd_simd_64 (Chrpos_T chrpos, Chrpos_T *table, UINT4 *positions, Count_T *counts, + __m128i current, __m128i next) { + __m128i array[16], *out; + __m128i oligo; + __m128i _t0, _t1, _t2, _t3, _t4, _t5, _t6, _t7; + __m128i _u0, _u1, _u2, _u3; - masked = (oligo >> 6) & MASK6; /* 28 */ - INCR_COUNT(counts[masked],inquery[masked]); - debug(printf("28 %04X => %d\n",masked,counts[masked])); + out = &(array[0]); - masked = (oligo >> 8) & MASK6; /* 27 */ - INCR_COUNT(counts[masked],inquery[masked]); - debug(printf("27 %04X => %d\n",masked,counts[masked])); + oligo = _mm_or_si128( _mm_srli_epi32(next,22), _mm_slli_epi32(current,10)); + /* _row0 = _mm_and_si128( oligo, mask6); */ + /* _row1 = _mm_and_si128( _mm_srli_epi32(oligo,2), mask6); */ + _t0 = _mm_and_si128(_mm_blend_epi16(_mm_slli_epi32(oligo,14), oligo, 0x55), mask6_epi16); + + /* _row2 = _mm_and_si128( _mm_srli_epi32(oligo,4), mask6); */ + /* _row3 = _mm_and_si128( _mm_srli_epi32(oligo,6), mask6); */ + _t1 = _mm_and_si128(_mm_blend_epi16(_mm_slli_epi32(oligo,10), _mm_srli_epi32(oligo, 4), 0x55), mask6_epi16); + + /* _row4 = _mm_and_si128( _mm_srli_epi32(oligo,8), mask6); */ + /* _row5 = _mm_and_si128( current, mask6); */ + _t2 = _mm_and_si128(_mm_blend_epi16(_mm_slli_epi32(current,16), _mm_srli_epi32(oligo, 8), 0x55), mask6_epi16); + + + /* _row6 = _mm_and_si128( _mm_srli_epi32(current,2), mask6); */ + /* _row7 = _mm_and_si128( _mm_srli_epi32(current,4), mask6); */ + _t3 = _mm_and_si128(_mm_blend_epi16(_mm_slli_epi32(current,12), _mm_srli_epi32(current, 2), 0x55), mask6_epi16); + + /* _row8 = _mm_and_si128( _mm_srli_epi32(current,6), mask6); */ + /* _row9 = _mm_and_si128( _mm_srli_epi32(current,8), mask6); */ + _t4 = _mm_and_si128(_mm_blend_epi16(_mm_slli_epi32(current,8), _mm_srli_epi32(current, 6), 0x55), mask6_epi16); + + /* _row10 = _mm_and_si128( _mm_srli_epi32(current,10), mask6); */ + /* _row11 = _mm_and_si128( _mm_srli_epi32(current,12), mask6); */ + _t5 = _mm_and_si128(_mm_blend_epi16(_mm_slli_epi32(current,4), _mm_srli_epi32(current, 10), 0x55), mask6_epi16); + + /* _row12 = _mm_and_si128( _mm_srli_epi32(current,14), mask6); */ + /* _row13 = _mm_and_si128( _mm_srli_epi32(current,16), mask6); */ + _t6 = _mm_and_si128(_mm_blend_epi16(current, _mm_srli_epi32(current, 14), 0x55), mask6_epi16); + + /* _row14 = _mm_and_si128( _mm_srli_epi32(current,18), mask6); */ + /* _row15 = _mm_srli_epi32(current,20); */ /* No mask necessary */; + _t7 = _mm_and_si128(_mm_blend_epi16(_mm_srli_epi32(current,4), _mm_srli_epi32(current, 18), 0x55), mask6_epi16); + + + /* Split: top half */ + _u0 = _mm_unpackhi_epi32(_t0,_t1); + _u1 = _mm_unpackhi_epi32(_t2,_t3); + _u2 = _mm_unpackhi_epi32(_t4,_t5); + _u3 = _mm_unpackhi_epi32(_t6,_t7); + + _mm_store_si128(out++, _mm_cvtepu16_epi32(_mm_srli_si128(_u0,8))); + _mm_store_si128(out++, _mm_cvtepu16_epi32(_mm_srli_si128(_u1,8))); + _mm_store_si128(out++, _mm_cvtepu16_epi32(_mm_srli_si128(_u2,8))); + _mm_store_si128(out++, _mm_cvtepu16_epi32(_mm_srli_si128(_u3,8))); + _mm_store_si128(out++, _mm_cvtepu16_epi32(_u0)); + _mm_store_si128(out++, _mm_cvtepu16_epi32(_u1)); + _mm_store_si128(out++, _mm_cvtepu16_epi32(_u2)); + _mm_store_si128(out++, _mm_cvtepu16_epi32(_u3)); + + /* Split: bottom half */ + _u0 = _mm_unpacklo_epi32(_t0,_t1); + _u1 = _mm_unpacklo_epi32(_t2,_t3); + _u2 = _mm_unpacklo_epi32(_t4,_t5); + _u3 = _mm_unpacklo_epi32(_t6,_t7); + + _mm_store_si128(out++, _mm_cvtepu16_epi32(_mm_srli_si128(_u0,8))); + _mm_store_si128(out++, _mm_cvtepu16_epi32(_mm_srli_si128(_u1,8))); + _mm_store_si128(out++, _mm_cvtepu16_epi32(_mm_srli_si128(_u2,8))); + _mm_store_si128(out++, _mm_cvtepu16_epi32(_mm_srli_si128(_u3,8))); + _mm_store_si128(out++, _mm_cvtepu16_epi32(_u0)); + _mm_store_si128(out++, _mm_cvtepu16_epi32(_u1)); + _mm_store_si128(out++, _mm_cvtepu16_epi32(_u2)); + _mm_store_si128(out++, _mm_cvtepu16_epi32(_u3)); -#else - _oligo = _mm_setr_epi32(oligo, oligo >> 2, oligo >> 4, oligo >> 6); -#ifdef SIMD_MASK_THEN_STORE - _mm_store_si128((__m128i *) _masked,_mm_and_si128(_oligo, mask6)); -#else - _masked = _mm_and_si128(_oligo, mask6); + return store_fwdrev_simd_64_ordered(chrpos,table,positions,counts,(UINT4 *) array); +} +#endif #endif - masked = EXTRACT(_masked,0); - INCR_COUNT(counts[masked],inquery[masked]); - debug(printf("31 %04X => %d\n",masked,counts[masked])); - - masked = EXTRACT(_masked,1); - INCR_COUNT(counts[masked],inquery[masked]); - debug(printf("30 %04X => %d\n",masked,counts[masked])); +#ifdef HAVE_AVX2 +static void +extract_6mers_fwd_simd_128 (__m256i *out, __m256i current, __m256i next) { + __m256i oligo; + + _mm256_store_si256(out++, _mm256_srli_epi32(current,20)); /* No mask necessary */; + _mm256_store_si256(out++, _mm256_and_si256( _mm256_srli_epi32(current,18), bigmask6)); + _mm256_store_si256(out++, _mm256_and_si256( _mm256_srli_epi32(current,16), bigmask6)); + _mm256_store_si256(out++, _mm256_and_si256( _mm256_srli_epi32(current,14), bigmask6)); + _mm256_store_si256(out++, _mm256_and_si256( _mm256_srli_epi32(current,12), bigmask6)); + _mm256_store_si256(out++, _mm256_and_si256( _mm256_srli_epi32(current,10), bigmask6)); + _mm256_store_si256(out++, _mm256_and_si256( _mm256_srli_epi32(current,8), bigmask6)); + _mm256_store_si256(out++, _mm256_and_si256( _mm256_srli_epi32(current,6), bigmask6)); + _mm256_store_si256(out++, _mm256_and_si256( _mm256_srli_epi32(current,4), bigmask6)); + _mm256_store_si256(out++, _mm256_and_si256( _mm256_srli_epi32(current,2), bigmask6)); + _mm256_store_si256(out++, _mm256_and_si256( current, bigmask6)); + + oligo = _mm256_or_si256( _mm256_srli_epi32(next,22), _mm256_slli_epi32(current,10)); + _mm256_store_si256(out++, _mm256_and_si256( _mm256_srli_epi32(oligo,8), bigmask6)); + _mm256_store_si256(out++, _mm256_and_si256( _mm256_srli_epi32(oligo,6), bigmask6)); + _mm256_store_si256(out++, _mm256_and_si256( _mm256_srli_epi32(oligo,4), bigmask6)); + _mm256_store_si256(out++, _mm256_and_si256( _mm256_srli_epi32(oligo,2), bigmask6)); + _mm256_store_si256(out++, _mm256_and_si256( oligo, bigmask6)); + + return; +} + +#ifdef USE_UNORDERED_6 +static Chrpos_T +store_6mers_fwd_simd_128 (Chrpos_T chrpos, Chrpos_T *table, UINT4 *positions, Count_T *counts, + __m256i current, __m256i next) { + __m256i array[16]; + + extract_6mers_fwd_simd_128(array,current,next); + return store_fwdrev_simd_128(chrpos,table,positions,counts,(UINT4 *) array); +} + +#else +/* Includes extract_6mers_fwd_simd_128_ordered (__m256i *out, __m256i current, __m256i next) */ +static Chrpos_T +store_6mers_fwd_simd_128 (Chrpos_T chrpos, Chrpos_T *table, UINT4 *positions, Count_T *counts, + __m256i current, __m256i next) { + __m256i array[16], *out; + __m256i oligo; + __m256i _t0, _t1, _t2, _t3, _t4, _t5, _t6, _t7; + __m256i _u0, _u1, _u2, _u3, _u4, _u5, _u6, _u7; + + out = &(array[0]); + + oligo = _mm256_or_si256( _mm256_srli_epi32(next,22), _mm256_slli_epi32(current,10)); + /* _row0 = _mm256_and_si256( oligo, bigmask6); */ + /* _row1 = _mm256_and_si256( _mm256_srli_epi32(oligo,2), bigmask6); */ + _t0 = _mm256_and_si256(_mm256_blend_epi16(_mm256_slli_epi32(oligo,14), oligo, 0x55), bigmask6_epi16); + + /* _row2 = _mm256_and_si256( _mm256_srli_epi32(oligo,4), bigmask6); */ + /* _row3 = _mm256_and_si256( _mm256_srli_epi32(oligo,6), bigmask6); */ + _t1 = _mm256_and_si256(_mm256_blend_epi16(_mm256_slli_epi32(oligo,10), _mm256_srli_epi32(oligo,4), 0x55), bigmask6_epi16); + + + /* _row4 = _mm256_and_si256( _mm256_srli_epi32(oligo,8), bigmask6); */ + /* _row5 = _mm256_and_si256( current, bigmask6); */ + _t2 = _mm256_and_si256(_mm256_blend_epi16(_mm256_slli_epi32(current,16), _mm256_srli_epi32(oligo,8), 0x55), bigmask6_epi16); + + /* _row6 = _mm256_and_si256( _mm256_srli_epi32(current,2), bigmask6); */ + /* _row7 = _mm256_and_si256( _mm256_srli_epi32(current,4), bigmask6); */ + _t3 = _mm256_and_si256(_mm256_blend_epi16(_mm256_slli_epi32(current,12), _mm256_srli_epi32(current,2), 0x55), bigmask6_epi16); + + /* _row8 = _mm256_and_si256( _mm256_srli_epi32(current,6), bigmask6); */ + /* _row9 = _mm256_and_si256( _mm256_srli_epi32(current,8), bigmask6); */ + _t4 = _mm256_and_si256(_mm256_blend_epi16(_mm256_slli_epi32(current,8), _mm256_srli_epi32(current,6), 0x55), bigmask6_epi16); + + /* _row10 = _mm256_and_si256( _mm256_srli_epi32(current,10), bigmask6); */ + /* _row11 = _mm256_and_si256( _mm256_srli_epi32(current,12), bigmask6); */ + _t5 = _mm256_and_si256(_mm256_blend_epi16(_mm256_slli_epi32(current,4), _mm256_srli_epi32(current,10), 0x55), bigmask6_epi16); + + /* _row12 = _mm256_and_si256( _mm256_srli_epi32(current,14), bigmask6); */ + /* _row13 = _mm256_and_si256( _mm256_srli_epi32(current,16), bigmask6); */ + _t6 = _mm256_and_si256(_mm256_blend_epi16(current, _mm256_srli_epi32(current,14), 0x55), bigmask6_epi16); + + /* _row14 = _mm256_and_si256( _mm256_srli_epi32(current,18), bigmask6); */ + /* _row15 = _mm256_srli_epi32(current,20); */ /* No mask necessary */; + _t7 = _mm256_and_si256(_mm256_blend_epi16(_mm256_srli_epi32(current,4), _mm256_srli_epi32(current,18), 0x55), bigmask6_epi16); + + + _u0 = _mm256_unpackhi_epi32(_t0,_t1); + _u1 = _mm256_unpackhi_epi32(_t2,_t3); + _u2 = _mm256_unpackhi_epi32(_t4,_t5); + _u3 = _mm256_unpackhi_epi32(_t6,_t7); + _u4 = _mm256_unpacklo_epi32(_t0,_t1); + _u5 = _mm256_unpacklo_epi32(_t2,_t3); + _u6 = _mm256_unpacklo_epi32(_t4,_t5); + _u7 = _mm256_unpacklo_epi32(_t6,_t7); + + + _t0 = _mm256_unpackhi_epi64(_u0,_u1); + _t1 = _mm256_unpackhi_epi64(_u2,_u3); + _t2 = _mm256_unpacklo_epi64(_u0,_u1); + _t3 = _mm256_unpacklo_epi64(_u2,_u3); + _t4 = _mm256_unpackhi_epi64(_u4,_u5); + _t5 = _mm256_unpackhi_epi64(_u6,_u7); + _t6 = _mm256_unpacklo_epi64(_u4,_u5); + _t7 = _mm256_unpacklo_epi64(_u6,_u7); + + + _mm256_store_si256(out++, _mm256_cvtepu16_epi32(_mm256_extracti128_si256(_t0,1))); + _mm256_store_si256(out++, _mm256_cvtepu16_epi32(_mm256_extracti128_si256(_t1,1))); + _mm256_store_si256(out++, _mm256_cvtepu16_epi32(_mm256_extracti128_si256(_t2,1))); + _mm256_store_si256(out++, _mm256_cvtepu16_epi32(_mm256_extracti128_si256(_t3,1))); + _mm256_store_si256(out++, _mm256_cvtepu16_epi32(_mm256_extracti128_si256(_t4,1))); + _mm256_store_si256(out++, _mm256_cvtepu16_epi32(_mm256_extracti128_si256(_t5,1))); + _mm256_store_si256(out++, _mm256_cvtepu16_epi32(_mm256_extracti128_si256(_t6,1))); + _mm256_store_si256(out++, _mm256_cvtepu16_epi32(_mm256_extracti128_si256(_t7,1))); + _mm256_store_si256(out++, _mm256_cvtepu16_epi32(_mm256_extracti128_si256(_t0,0))); + _mm256_store_si256(out++, _mm256_cvtepu16_epi32(_mm256_extracti128_si256(_t1,0))); + _mm256_store_si256(out++, _mm256_cvtepu16_epi32(_mm256_extracti128_si256(_t2,0))); + _mm256_store_si256(out++, _mm256_cvtepu16_epi32(_mm256_extracti128_si256(_t3,0))); + _mm256_store_si256(out++, _mm256_cvtepu16_epi32(_mm256_extracti128_si256(_t4,0))); + _mm256_store_si256(out++, _mm256_cvtepu16_epi32(_mm256_extracti128_si256(_t5,0))); + _mm256_store_si256(out++, _mm256_cvtepu16_epi32(_mm256_extracti128_si256(_t6,0))); + _mm256_store_si256(out++, _mm256_cvtepu16_epi32(_mm256_extracti128_si256(_t7,0))); + + return store_fwdrev_simd_128_ordered(chrpos,table,positions,counts,(UINT4 *) array); +} +#endif +#endif + +#ifdef HAVE_AVX512 +static void +extract_6mers_fwd_simd_256 (__m512i *out, __m512i current, __m512i next) { + __m512i oligo; + + _mm512_store_si512(out++, _mm512_srli_epi32(current,20)); /* No mask necessary */; + _mm512_store_si512(out++, _mm512_and_si512( _mm512_srli_epi32(current,18), hugemask6)); + _mm512_store_si512(out++, _mm512_and_si512( _mm512_srli_epi32(current,16), hugemask6)); + _mm512_store_si512(out++, _mm512_and_si512( _mm512_srli_epi32(current,14), hugemask6)); + _mm512_store_si512(out++, _mm512_and_si512( _mm512_srli_epi32(current,12), hugemask6)); + _mm512_store_si512(out++, _mm512_and_si512( _mm512_srli_epi32(current,10), hugemask6)); + _mm512_store_si512(out++, _mm512_and_si512( _mm512_srli_epi32(current,8), hugemask6)); + _mm512_store_si512(out++, _mm512_and_si512( _mm512_srli_epi32(current,6), hugemask6)); + _mm512_store_si512(out++, _mm512_and_si512( _mm512_srli_epi32(current,4), hugemask6)); + _mm512_store_si512(out++, _mm512_and_si512( _mm512_srli_epi32(current,2), hugemask6)); + _mm512_store_si512(out++, _mm512_and_si512( current, hugemask6)); + + oligo = _mm512_or_si512( _mm512_srli_epi32(next,22), _mm512_slli_epi32(current,10)); + _mm512_store_si512(out++, _mm512_and_si512( _mm512_srli_epi32(oligo,8), hugemask6)); + _mm512_store_si512(out++, _mm512_and_si512( _mm512_srli_epi32(oligo,6), hugemask6)); + _mm512_store_si512(out++, _mm512_and_si512( _mm512_srli_epi32(oligo,4), hugemask6)); + _mm512_store_si512(out++, _mm512_and_si512( _mm512_srli_epi32(oligo,2), hugemask6)); + _mm512_store_si512(out++, _mm512_and_si512( oligo, hugemask6)); + + return; +} + +#ifdef USE_UNORDERED_6 +static Chrpos_T +store_6mers_fwd_simd_256 (Chrpos_T chrpos, Chrpos_T *table, UINT4 *positions, Count_T *counts, + __m512i current, __m512i next) { + __m512i array[16]; + + extract_6mers_fwd_simd_256(array,current,next); + return store_fwdrev_simd_256(chrpos,table,positions,counts,(UINT4 *) array); +} + +#else +/* Includes extract_6mers_fwd_simd_256_ordered (__m512i *out, __m512i current, __m512i next) */ +static Chrpos_T +store_6mers_fwd_simd_256 (Chrpos_T chrpos, Chrpos_T *table, UINT4 *positions, Count_T *counts, + __m512i current, __m512i next) { + __m512i array[16], *out; + __m512i oligo, _shuffle0, _shuffle1, _shuffle2; + __m512i _t0, _t1, _t2, _t3, _t4, _t5, _t6, _t7; + __m512i _u0, _u1, _u2, _u3, _u4, _u5, _u6, _u7; + + out = &(array[0]); + + oligo = _mm512_or_si512( _mm512_srli_epi32(next,22), _mm512_slli_epi32(current,10)); + _u0 = _mm512_and_si512( oligo, hugemask6); + /* _row1 = _mm512_and_si512( _mm512_srli_epi32(oligo,2), hugemask6); */ + _u1 = _mm512_and_si512( _mm512_slli_epi32(oligo,14), highmask6); + _t0 = _mm512_or_si512(_u0, _u1); + + _u0 = _mm512_and_si512( _mm512_srli_epi32(oligo,4), hugemask6); + /* _row3 = _mm512_and_si512( _mm512_srli_epi32(oligo,6), hugemask6); */ + _u1 = _mm512_and_si512( _mm512_slli_epi32(oligo,10), highmask6); + _t1 = _mm512_or_si512(_u0, _u1); + + + _u0 = _mm512_and_si512( _mm512_srli_epi32(oligo,8), hugemask6); + /* _row5 = _mm512_and_si512( current, hugemask6); */ + _u1 = _mm512_and_si512( _mm512_slli_epi32(current,16), highmask6); + _t2 = _mm512_or_si512(_u0, _u1); + + _u0 = _mm512_and_si512( _mm512_srli_epi32(current,2), hugemask6); + /* _row7 = _mm512_and_si512( _mm512_srli_epi32(current,4), hugemask6); */ + _u1 = _mm512_and_si512( _mm512_slli_epi32(current,12), highmask6); + _t3 = _mm512_or_si512(_u0, _u1); + + _u0 = _mm512_and_si512( _mm512_srli_epi32(current,6), hugemask6); + /* _row9 = _mm512_and_si512( _mm512_srli_epi32(current,8), hugemask6); */ + _u1 = _mm512_and_si512( _mm512_slli_epi32(current,8), highmask6); + _t4 = _mm512_or_si512(_u0, _u1); + + _u0 = _mm512_and_si512( _mm512_srli_epi32(current,10), hugemask6); + /* _row11 = _mm512_and_si512( _mm512_srli_epi32(current,12), hugemask6); */ + _u1 = _mm512_and_si512( _mm512_slli_epi32(current,4), highmask6); + _t5 = _mm512_or_si512(_u0, _u1); + + _u0 = _mm512_and_si512( _mm512_srli_epi32(current,14), hugemask6); + /* _row13 = _mm512_and_si512( _mm512_srli_epi32(current,16), hugemask6); */ + _u1 = _mm512_and_si512( current, highmask6); + _t6 = _mm512_or_si512(_u0, _u1); + + _u0 = _mm512_and_si512( _mm512_srli_epi32(current,18), hugemask6); + /* _row15 = _mm512_srli_epi32(current,20); */ /* No mask necessary */; + _u1 = _mm512_and_si512( _mm512_srli_epi32(current,4), highmask6); + _t7 = _mm512_or_si512(_u0, _u1); + + + _u0 = _mm512_unpackhi_epi32(_t0,_t1); + _u1 = _mm512_unpackhi_epi32(_t2,_t3); + _u2 = _mm512_unpackhi_epi32(_t4,_t5); + _u3 = _mm512_unpackhi_epi32(_t6,_t7); + _u4 = _mm512_unpacklo_epi32(_t0,_t1); + _u5 = _mm512_unpacklo_epi32(_t2,_t3); + _u6 = _mm512_unpacklo_epi32(_t4,_t5); + _u7 = _mm512_unpacklo_epi32(_t6,_t7); + + + /* Split: top half */ + _shuffle0 = _mm512_setr_epi64(7, 8+7, 6, 8+6, 5, 8+5, 4, 8+4); + _t0 = _mm512_permutex2var_epi64(_u0, _shuffle0, _u1); + _t1 = _mm512_permutex2var_epi64(_u2, _shuffle0, _u3); + _t2 = _mm512_permutex2var_epi64(_u4, _shuffle0, _u5); + _t3 = _mm512_permutex2var_epi64(_u6, _shuffle0, _u7); + + + _shuffle1 = _mm512_setr_epi64(0, 1, 8+0, 8+1, 2, 3, 8+2, 8+3); + _t7 = _mm512_permutex2var_epi64(_t0, _shuffle1, _t1); + _mm512_store_si512(out++, _mm512_cvtepu16_epi32(_mm512_extracti64x4_epi64(_t7,0))); + _mm512_store_si512(out++, _mm512_cvtepu16_epi32(_mm512_extracti64x4_epi64(_t7,1))); + + _t7 = _mm512_permutex2var_epi64(_t2, _shuffle1, _t3); + _mm512_store_si512(out++, _mm512_cvtepu16_epi32(_mm512_extracti64x4_epi64(_t7,0))); + _mm512_store_si512(out++, _mm512_cvtepu16_epi32(_mm512_extracti64x4_epi64(_t7,1))); + + _shuffle2 = _mm512_setr_epi64(4, 5, 8+4, 8+5, 6, 7, 8+6, 8+7); + _t7 = _mm512_permutex2var_epi64(_t0, _shuffle2, _t1); + _mm512_store_si512(out++, _mm512_cvtepu16_epi32(_mm512_extracti64x4_epi64(_t7,0))); + _mm512_store_si512(out++, _mm512_cvtepu16_epi32(_mm512_extracti64x4_epi64(_t7,1))); + + _t7 = _mm512_permutex2var_epi64(_t2, _shuffle2, _t3); + _mm512_store_si512(out++, _mm512_cvtepu16_epi32(_mm512_extracti64x4_epi64(_t7,0))); + _mm512_store_si512(out++, _mm512_cvtepu16_epi32(_mm512_extracti64x4_epi64(_t7,1))); + + + /* Split: bottom half */ + _shuffle0 = _mm512_setr_epi64(3, 8+3, 2, 8+2, 1, 8+1, 0, 8+0); + _t0 = _mm512_permutex2var_epi64(_u0, _shuffle0, _u1); + _t1 = _mm512_permutex2var_epi64(_u2, _shuffle0, _u3); + _t2 = _mm512_permutex2var_epi64(_u4, _shuffle0, _u5); + _t3 = _mm512_permutex2var_epi64(_u6, _shuffle0, _u7); + + + /* _shuffle1 = _mm512_setr_epi64(0, 1, 8+0, 8+1, 2, 3, 8+2, 8+3); */ + _t7 = _mm512_permutex2var_epi64(_t0, _shuffle1, _t1); + _mm512_store_si512(out++, _mm512_cvtepu16_epi32(_mm512_extracti64x4_epi64(_t7,0))); + _mm512_store_si512(out++, _mm512_cvtepu16_epi32(_mm512_extracti64x4_epi64(_t7,1))); + + _t7 = _mm512_permutex2var_epi64(_t2, _shuffle1, _t3); + _mm512_store_si512(out++, _mm512_cvtepu16_epi32(_mm512_extracti64x4_epi64(_t7,0))); + _mm512_store_si512(out++, _mm512_cvtepu16_epi32(_mm512_extracti64x4_epi64(_t7,1))); + + /* _shuffle2 = _mm512_setr_epi64(4, 5, 8+4, 8+5, 6, 7, 8+6, 8+7); */ + _t7 = _mm512_permutex2var_epi64(_t0, _shuffle2, _t1); + _mm512_store_si512(out++, _mm512_cvtepu16_epi32(_mm512_extracti64x4_epi64(_t7,0))); + _mm512_store_si512(out++, _mm512_cvtepu16_epi32(_mm512_extracti64x4_epi64(_t7,1))); + + _t7 = _mm512_permutex2var_epi64(_t2, _shuffle2, _t3); + _mm512_store_si512(out++, _mm512_cvtepu16_epi32(_mm512_extracti64x4_epi64(_t7,0))); + _mm512_store_si512(out++, _mm512_cvtepu16_epi32(_mm512_extracti64x4_epi64(_t7,1))); + + return store_fwdrev_simd_256_ordered(chrpos,table,positions,counts,(UINT4 *) array); +} +#endif +#endif + + +#if !defined(HAVE_AVX2) + +static int +store_6mers_fwd_32 (Chrpos_T chrpos, Chrpos_T *table, UINT4 *positions, Count_T *counts, + Genomecomp_T high_rev, Genomecomp_T low_rev, Genomecomp_T nexthigh_rev) { + Genomecomp_T masked, oligo; +#ifdef INDIVIDUAL_SHIFTS +#elif defined(SIMD_MASK_THEN_STORE) + UINT4 _masked[4] __attribute__ ((aligned (16))); + __m128i _oligo; +#else + __m128i _oligo, _masked; +#endif + + + oligo = nexthigh_rev >> 22; /* For 27..31 */ + oligo |= low_rev << 10; + +#ifdef INDIVIDUAL_SHIFTS + masked = oligo & MASK6; /* 31 */ + if (counts[masked]) { + debug(printf("Storing masked %u at %u\n",masked,chrpos)); + table[positions[masked] + (--counts[masked])] = chrpos; + } + + masked = (oligo >> 2) & MASK6; /* 30 */ + if (counts[masked]) { + debug(printf("Storing masked %u at %u\n",masked,chrpos - 1)); + table[positions[masked] + (--counts[masked])] = chrpos - 1; + } + + masked = (oligo >> 4) & MASK6; /* 29 */ + if (counts[masked]) { + debug(printf("Storing masked %u at %u\n",masked,chrpos - 2)); + table[positions[masked] + (--counts[masked])] = chrpos - 2; + } + + masked = (oligo >> 6) & MASK6; /* 28 */ + if (counts[masked]) { + debug(printf("Storing masked %u at %u\n",masked,chrpos - 3)); + table[positions[masked] + (--counts[masked])] = chrpos - 3; + } + + masked = (oligo >> 8) & MASK6; /* 27 */ + if (counts[masked]) { + debug(printf("Storing masked %u at %u\n",masked,chrpos - 4)); + table[positions[masked] + (--counts[masked])] = chrpos - 4; + } + +#else + _oligo = _mm_setr_epi32(oligo, oligo >> 2, oligo >> 4, oligo >> 6); +#ifdef SIMD_MASK_THEN_STORE + _mm_store_si128((__m128i *) _masked,_mm_and_si128(_oligo, mask6)); +#else + _masked = _mm_and_si128(_oligo, mask6); +#endif + + masked = EXTRACT(_masked,0); + if (counts[masked]) { + debug(printf("Storing masked %u at %u\n",masked,chrpos)); + table[positions[masked] + (--counts[masked])] = chrpos; + } + + masked = EXTRACT(_masked,1); + if (counts[masked]) { + debug(printf("Storing masked %u at %u\n",masked,chrpos - 1)); + table[positions[masked] + (--counts[masked])] = chrpos - 1; + } masked = EXTRACT(_masked,2); - INCR_COUNT(counts[masked],inquery[masked]); - debug(printf("29 %04X => %d\n",masked,counts[masked])); + if (counts[masked]) { + debug(printf("Storing masked %u at %u\n",masked,chrpos - 2)); + table[positions[masked] + (--counts[masked])] = chrpos - 2; + } masked = EXTRACT(_masked,3); - INCR_COUNT(counts[masked],inquery[masked]); - debug(printf("28 %04X => %d\n",masked,counts[masked])); + if (counts[masked]) { + debug(printf("Storing masked %u at %u\n",masked,chrpos - 3)); + table[positions[masked] + (--counts[masked])] = chrpos - 3; + } masked = (oligo >> 8) & MASK6; /* 27 */ - INCR_COUNT(counts[masked],inquery[masked]); - debug(printf("27 %04X => %d\n",masked,counts[masked])); + if (counts[masked]) { + debug(printf("Storing masked %u at %u\n",masked,chrpos - 4)); + table[positions[masked] + (--counts[masked])] = chrpos - 4; + } #endif #ifdef INDIVIDUAL_SHIFTS masked = low_rev & MASK6; /* 26 */ - INCR_COUNT(counts[masked],inquery[masked]); - debug(printf("26 %04X => %d\n",masked,counts[masked])); + if (counts[masked]) { + debug(printf("Storing masked %u at %u\n",masked,chrpos - 5)); + table[positions[masked] + (--counts[masked])] = chrpos - 5; + } masked = (low_rev >> 2) & MASK6; /* 25 */ - INCR_COUNT(counts[masked],inquery[masked]); - debug(printf("25 %04X => %d\n",masked,counts[masked])); + if (counts[masked]) { + debug(printf("Storing masked %u at %u\n",masked,chrpos - 6)); + table[positions[masked] + (--counts[masked])] = chrpos - 6; + } masked = (low_rev >> 4) & MASK6; /* 24 */ - INCR_COUNT(counts[masked],inquery[masked]); - debug(printf("24 %04X => %d\n",masked,counts[masked])); + if (counts[masked]) { + debug(printf("Storing masked %u at %u\n",masked,chrpos - 7)); + table[positions[masked] + (--counts[masked])] = chrpos - 7; + } masked = (low_rev >> 6) & MASK6; /* 23 */ - INCR_COUNT(counts[masked],inquery[masked]); - debug(printf("23 %04X => %d\n",masked,counts[masked])); + if (counts[masked]) { + debug(printf("Storing masked %u at %u\n",masked,chrpos - 8)); + table[positions[masked] + (--counts[masked])] = chrpos - 8; + } masked = (low_rev >> 8) & MASK6; /* 22 */ - INCR_COUNT(counts[masked],inquery[masked]); - debug(printf("22 %04X => %d\n",masked,counts[masked])); + if (counts[masked]) { + debug(printf("Storing masked %u at %u\n",masked,chrpos - 9)); + table[positions[masked] + (--counts[masked])] = chrpos - 9; + } masked = (low_rev >> 10) & MASK6; /* 21 */ - INCR_COUNT(counts[masked],inquery[masked]); - debug(printf("21 %04X => %d\n",masked,counts[masked])); + if (counts[masked]) { + debug(printf("Storing masked %u at %u\n",masked,chrpos - 10)); + table[positions[masked] + (--counts[masked])] = chrpos - 10; + } masked = (low_rev >> 12) & MASK6; /* 20 */ - INCR_COUNT(counts[masked],inquery[masked]); - debug(printf("20 %04X => %d\n",masked,counts[masked])); + if (counts[masked]) { + debug(printf("Storing masked %u at %u\n",masked,chrpos - 11)); + table[positions[masked] + (--counts[masked])] = chrpos - 11; + } masked = (low_rev >> 14) & MASK6; /* 19 */ - INCR_COUNT(counts[masked],inquery[masked]); - debug(printf("19 %04X => %d\n",masked,counts[masked])); + if (counts[masked]) { + debug(printf("Storing masked %u at %u\n",masked,chrpos - 12)); + table[positions[masked] + (--counts[masked])] = chrpos - 12; + } masked = (low_rev >> 16) & MASK6; /* 18 */ - INCR_COUNT(counts[masked],inquery[masked]); - debug(printf("18 %04X => %d\n",masked,counts[masked])); + if (counts[masked]) { + debug(printf("Storing masked %u at %u\n",masked,chrpos - 13)); + table[positions[masked] + (--counts[masked])] = chrpos - 13; + } masked = (low_rev >> 18) & MASK6; /* 17 */ - INCR_COUNT(counts[masked],inquery[masked]); - debug(printf("17 %04X => %d\n",masked,counts[masked])); + if (counts[masked]) { + debug(printf("Storing masked %u at %u\n",masked,chrpos - 14)); + table[positions[masked] + (--counts[masked])] = chrpos - 14; + } masked = low_rev >> 20; /* 16, No mask necessary */ - INCR_COUNT(counts[masked],inquery[masked]); - debug(printf("16 %04X => %d\n",masked,counts[masked])); - + if (counts[masked]) { + debug(printf("Storing masked %u at %u\n",masked,chrpos - 15)); + table[positions[masked] + (--counts[masked])] = chrpos - 15; + } + #else _oligo = _mm_setr_epi32(low_rev, low_rev >> 2, low_rev >> 4, low_rev >> 6); #ifdef SIMD_MASK_THEN_STORE @@ -21498,20 +16847,28 @@ #endif masked = EXTRACT(_masked,0); - INCR_COUNT(counts[masked],inquery[masked]); - debug(printf("26 %04X => %d\n",masked,counts[masked])); + if (counts[masked]) { + debug(printf("Storing masked %u at %u\n",masked,chrpos - 5)); + table[positions[masked] + (--counts[masked])] = chrpos - 5; + } masked = EXTRACT(_masked,1); - INCR_COUNT(counts[masked],inquery[masked]); - debug(printf("25 %04X => %d\n",masked,counts[masked])); + if (counts[masked]) { + debug(printf("Storing masked %u at %u\n",masked,chrpos - 6)); + table[positions[masked] + (--counts[masked])] = chrpos - 6; + } masked = EXTRACT(_masked,2); - INCR_COUNT(counts[masked],inquery[masked]); - debug(printf("24 %04X => %d\n",masked,counts[masked])); + if (counts[masked]) { + debug(printf("Storing masked %u at %u\n",masked,chrpos - 7)); + table[positions[masked] + (--counts[masked])] = chrpos - 7; + } masked = EXTRACT(_masked,3); - INCR_COUNT(counts[masked],inquery[masked]); - debug(printf("23 %04X => %d\n",masked,counts[masked])); + if (counts[masked]) { + debug(printf("Storing masked %u at %u\n",masked,chrpos - 8)); + table[positions[masked] + (--counts[masked])] = chrpos - 8; + } _oligo = _mm_srli_epi32(_oligo, 8); @@ -21522,20 +16879,28 @@ #endif masked = EXTRACT(_masked,0); - INCR_COUNT(counts[masked],inquery[masked]); - debug(printf("22 %04X => %d\n",masked,counts[masked])); + if (counts[masked]) { + debug(printf("Storing masked %u at %u\n",masked,chrpos - 9)); + table[positions[masked] + (--counts[masked])] = chrpos - 9; + } masked = EXTRACT(_masked,1); - INCR_COUNT(counts[masked],inquery[masked]); - debug(printf("21 %04X => %d\n",masked,counts[masked])); + if (counts[masked]) { + debug(printf("Storing masked %u at %u\n",masked,chrpos - 10)); + table[positions[masked] + (--counts[masked])] = chrpos - 10; + } masked = EXTRACT(_masked,2); - INCR_COUNT(counts[masked],inquery[masked]); - debug(printf("20 %04X => %d\n",masked,counts[masked])); + if (counts[masked]) { + debug(printf("Storing masked %u at %u\n",masked,chrpos - 11)); + table[positions[masked] + (--counts[masked])] = chrpos - 11; + } masked = EXTRACT(_masked,3); - INCR_COUNT(counts[masked],inquery[masked]); - debug(printf("19 %04X => %d\n",masked,counts[masked])); + if (counts[masked]) { + debug(printf("Storing masked %u at %u\n",masked,chrpos - 12)); + table[positions[masked] + (--counts[masked])] = chrpos - 12; + } _oligo = _mm_srli_epi32(_oligo, 8); @@ -21546,16 +16911,22 @@ #endif masked = EXTRACT(_masked,0); - INCR_COUNT(counts[masked],inquery[masked]); - debug(printf("18 %04X => %d\n",masked,counts[masked])); + if (counts[masked]) { + debug(printf("Storing masked %u at %u\n",masked,chrpos - 13)); + table[positions[masked] + (--counts[masked])] = chrpos - 13; + } masked = EXTRACT(_masked,1); - INCR_COUNT(counts[masked],inquery[masked]); - debug(printf("17 %04X => %d\n",masked,counts[masked])); + if (counts[masked]) { + debug(printf("Storing masked %u at %u\n",masked,chrpos - 14)); + table[positions[masked] + (--counts[masked])] = chrpos - 14; + } masked = EXTRACT(_masked,2); - INCR_COUNT(counts[masked],inquery[masked]); - debug(printf("16 %04X => %d\n",masked,counts[masked])); + if (counts[masked]) { + debug(printf("Storing masked %u at %u\n",masked,chrpos - 15)); + table[positions[masked] + (--counts[masked])] = chrpos - 15; + } #endif @@ -21564,24 +16935,34 @@ #ifdef INDIVIDUAL_SHIFTS masked = oligo & MASK6; /* 15 */ - INCR_COUNT(counts[masked],inquery[masked]); - debug(printf("15 %04X => %d\n",masked,counts[masked])); + if (counts[masked]) { + debug(printf("Storing masked %u at %u\n",masked,chrpos - 16)); + table[positions[masked] + (--counts[masked])] = chrpos - 16; + } masked = (oligo >> 2) & MASK6; /* 14 */ - INCR_COUNT(counts[masked],inquery[masked]); - debug(printf("14 %04X => %d\n",masked,counts[masked])); + if (counts[masked]) { + debug(printf("Storing masked %u at %u\n",masked,chrpos - 17)); + table[positions[masked] + (--counts[masked])] = chrpos - 17; + } masked = (oligo >> 4) & MASK6; /* 13 */ - INCR_COUNT(counts[masked],inquery[masked]); - debug(printf("13 %04X => %d\n",masked,counts[masked])); + if (counts[masked]) { + debug(printf("Storing masked %u at %u\n",masked,chrpos - 18)); + table[positions[masked] + (--counts[masked])] = chrpos - 18; + } masked = (oligo >> 6) & MASK6; /* 12 */ - INCR_COUNT(counts[masked],inquery[masked]); - debug(printf("12 %04X => %d\n",masked,counts[masked])); + if (counts[masked]) { + debug(printf("Storing masked %u at %u\n",masked,chrpos - 19)); + table[positions[masked] + (--counts[masked])] = chrpos - 19; + } masked = (oligo >> 8) & MASK6; /* 11 */ - INCR_COUNT(counts[masked],inquery[masked]); - debug(printf("11 %04X => %d\n",masked,counts[masked])); + if (counts[masked]) { + debug(printf("Storing masked %u at %u\n",masked,chrpos - 20)); + table[positions[masked] + (--counts[masked])] = chrpos - 20; + } #else _oligo = _mm_setr_epi32(oligo, oligo >> 2, oligo >> 4, oligo >> 6); @@ -21592,73 +16973,105 @@ #endif masked = EXTRACT(_masked,0); - INCR_COUNT(counts[masked],inquery[masked]); - debug(printf("15 %04X => %d\n",masked,counts[masked])); + if (counts[masked]) { + debug(printf("Storing masked %u at %u\n",masked,chrpos - 16)); + table[positions[masked] + (--counts[masked])] = chrpos - 16; + } masked = EXTRACT(_masked,1); - INCR_COUNT(counts[masked],inquery[masked]); - debug(printf("14 %04X => %d\n",masked,counts[masked])); + if (counts[masked]) { + debug(printf("Storing masked %u at %u\n",masked,chrpos - 17)); + table[positions[masked] + (--counts[masked])] = chrpos - 17; + } masked = EXTRACT(_masked,2); - INCR_COUNT(counts[masked],inquery[masked]); - debug(printf("13 %04X => %d\n",masked,counts[masked])); + if (counts[masked]) { + debug(printf("Storing masked %u at %u\n",masked,chrpos - 18)); + table[positions[masked] + (--counts[masked])] = chrpos - 18; + } masked = EXTRACT(_masked,3); - INCR_COUNT(counts[masked],inquery[masked]); - debug(printf("12 %04X => %d\n",masked,counts[masked])); + if (counts[masked]) { + debug(printf("Storing masked %u at %u\n",masked,chrpos - 19)); + table[positions[masked] + (--counts[masked])] = chrpos - 19; + } masked = (oligo >> 8) & MASK6; /* 11 */ - INCR_COUNT(counts[masked],inquery[masked]); - debug(printf("11 %04X => %d\n",masked,counts[masked])); + if (counts[masked]) { + debug(printf("Storing masked %u at %u\n",masked,chrpos - 20)); + table[positions[masked] + (--counts[masked])] = chrpos - 20; + } #endif #ifdef INDIVIDUAL_SHIFTS masked = high_rev & MASK6; /* 10 */ - INCR_COUNT(counts[masked],inquery[masked]); - debug(printf("10 %04X => %d\n",masked,counts[masked])); + if (counts[masked]) { + debug(printf("Storing masked %u at %u\n",masked,chrpos - 21)); + table[positions[masked] + (--counts[masked])] = chrpos - 21; + } masked = (high_rev >> 2) & MASK6; /* 9 */ - INCR_COUNT(counts[masked],inquery[masked]); - debug(printf("9 %04X => %d\n",masked,counts[masked])); + if (counts[masked]) { + debug(printf("Storing masked %u at %u\n",masked,chrpos - 22)); + table[positions[masked] + (--counts[masked])] = chrpos - 22; + } masked = (high_rev >> 4) & MASK6; /* 8 */ - INCR_COUNT(counts[masked],inquery[masked]); - debug(printf("8 %04X => %d\n",masked,counts[masked])); + if (counts[masked]) { + debug(printf("Storing masked %u at %u\n",masked,chrpos - 23)); + table[positions[masked] + (--counts[masked])] = chrpos - 23; + } masked = (high_rev >> 6) & MASK6; /* 7 */ - INCR_COUNT(counts[masked],inquery[masked]); - debug(printf("7 %04X => %d\n",masked,counts[masked])); + if (counts[masked]) { + debug(printf("Storing masked %u at %u\n",masked,chrpos - 24)); + table[positions[masked] + (--counts[masked])] = chrpos - 24; + } masked = (high_rev >> 8) & MASK6; /* 6 */ - INCR_COUNT(counts[masked],inquery[masked]); - debug(printf("6 %04X => %d\n",masked,counts[masked])); + if (counts[masked]) { + debug(printf("Storing masked %u at %u\n",masked,chrpos - 25)); + table[positions[masked] + (--counts[masked])] = chrpos - 25; + } masked = (high_rev >> 10) & MASK6; /* 5 */ - INCR_COUNT(counts[masked],inquery[masked]); - debug(printf("5 %04X => %d\n",masked,counts[masked])); + if (counts[masked]) { + debug(printf("Storing masked %u at %u\n",masked,chrpos - 26)); + table[positions[masked] + (--counts[masked])] = chrpos - 26; + } masked = (high_rev >> 12) & MASK6; /* 4 */ - INCR_COUNT(counts[masked],inquery[masked]); - debug(printf("4 %04X => %d\n",masked,counts[masked])); + if (counts[masked]) { + debug(printf("Storing masked %u at %u\n",masked,chrpos - 27)); + table[positions[masked] + (--counts[masked])] = chrpos - 27; + } masked = (high_rev >> 14) & MASK6; /* 3 */ - INCR_COUNT(counts[masked],inquery[masked]); - debug(printf("3 %04X => %d\n",masked,counts[masked])); + if (counts[masked]) { + debug(printf("Storing masked %u at %u\n",masked,chrpos - 28)); + table[positions[masked] + (--counts[masked])] = chrpos - 28; + } masked = (high_rev >> 16) & MASK6; /* 2 */ - INCR_COUNT(counts[masked],inquery[masked]); - debug(printf("2 %04X => %d\n",masked,counts[masked])); + if (counts[masked]) { + debug(printf("Storing masked %u at %u\n",masked,chrpos - 29)); + table[positions[masked] + (--counts[masked])] = chrpos - 29; + } masked = (high_rev >> 18) & MASK6; /* 1 */ - INCR_COUNT(counts[masked],inquery[masked]); - debug(printf("1 %04X => %d\n",masked,counts[masked])); + if (counts[masked]) { + debug(printf("Storing masked %u at %u\n",masked,chrpos - 30)); + table[positions[masked] + (--counts[masked])] = chrpos - 30; + } masked = high_rev >> 20; /* 0, No mask necessary */ - INCR_COUNT(counts[masked],inquery[masked]); - debug(printf("0 %04X => %d\n",masked,counts[masked])); - + if (counts[masked]) { + debug(printf("Storing masked %u at %u\n",masked,chrpos - 31)); + table[positions[masked] + (--counts[masked])] = chrpos - 31; + } + #else _oligo = _mm_setr_epi32(high_rev, high_rev >> 2, high_rev >> 4, high_rev >> 6); #ifdef SIMD_MASK_THEN_STORE @@ -21668,20 +17081,28 @@ #endif masked = EXTRACT(_masked,0); - INCR_COUNT(counts[masked],inquery[masked]); - debug(printf("10 %04X => %d\n",masked,counts[masked])); + if (counts[masked]) { + debug(printf("Storing masked %u at %u\n",masked,chrpos - 21)); + table[positions[masked] + (--counts[masked])] = chrpos - 21; + } masked = EXTRACT(_masked,1); - INCR_COUNT(counts[masked],inquery[masked]); - debug(printf("9 %04X => %d\n",masked,counts[masked])); + if (counts[masked]) { + debug(printf("Storing masked %u at %u\n",masked,chrpos - 22)); + table[positions[masked] + (--counts[masked])] = chrpos - 22; + } masked = EXTRACT(_masked,2); - INCR_COUNT(counts[masked],inquery[masked]); - debug(printf("8 %04X => %d\n",masked,counts[masked])); + if (counts[masked]) { + debug(printf("Storing masked %u at %u\n",masked,chrpos - 23)); + table[positions[masked] + (--counts[masked])] = chrpos - 23; + } masked = EXTRACT(_masked,3); - INCR_COUNT(counts[masked],inquery[masked]); - debug(printf("7 %04X => %d\n",masked,counts[masked])); + if (counts[masked]) { + debug(printf("Storing masked %u at %u\n",masked,chrpos - 24)); + table[positions[masked] + (--counts[masked])] = chrpos - 24; + } _oligo = _mm_srli_epi32(_oligo, 8); @@ -21692,20 +17113,28 @@ #endif masked = EXTRACT(_masked,0); - INCR_COUNT(counts[masked],inquery[masked]); - debug(printf("6 %04X => %d\n",masked,counts[masked])); + if (counts[masked]) { + debug(printf("Storing masked %u at %u\n",masked,chrpos - 25)); + table[positions[masked] + (--counts[masked])] = chrpos - 25; + } masked = EXTRACT(_masked,1); - INCR_COUNT(counts[masked],inquery[masked]); - debug(printf("5 %04X => %d\n",masked,counts[masked])); + if (counts[masked]) { + debug(printf("Storing masked %u at %u\n",masked,chrpos - 26)); + table[positions[masked] + (--counts[masked])] = chrpos - 26; + } masked = EXTRACT(_masked,2); - INCR_COUNT(counts[masked],inquery[masked]); - debug(printf("4 %04X => %d\n",masked,counts[masked])); + if (counts[masked]) { + debug(printf("Storing masked %u at %u\n",masked,chrpos - 27)); + table[positions[masked] + (--counts[masked])] = chrpos - 27; + } masked = EXTRACT(_masked,3); - INCR_COUNT(counts[masked],inquery[masked]); - debug(printf("3 %04X => %d\n",masked,counts[masked])); + if (counts[masked]) { + debug(printf("Storing masked %u at %u\n",masked,chrpos - 28)); + table[positions[masked] + (--counts[masked])] = chrpos - 28; + } _oligo = _mm_srli_epi32(_oligo, 8); @@ -21716,2736 +17145,1315 @@ #endif masked = EXTRACT(_masked,0); - INCR_COUNT(counts[masked],inquery[masked]); - debug(printf("2 %04X => %d\n",masked,counts[masked])); + if (counts[masked]) { + debug(printf("Storing masked %u at %u\n",masked,chrpos - 29)); + table[positions[masked] + (--counts[masked])] = chrpos - 29; + } masked = EXTRACT(_masked,1); - INCR_COUNT(counts[masked],inquery[masked]); - debug(printf("1 %04X => %d\n",masked,counts[masked])); + if (counts[masked]) { + debug(printf("Storing masked %u at %u\n",masked,chrpos - 30)); + table[positions[masked] + (--counts[masked])] = chrpos - 30; + } masked = EXTRACT(_masked,2); - INCR_COUNT(counts[masked],inquery[masked]); - debug(printf("0 %04X => %d\n",masked,counts[masked])); + if (counts[masked]) { + debug(printf("Storing masked %u at %u\n",masked,chrpos - 31)); + table[positions[masked] + (--counts[masked])] = chrpos - 31; + } #endif - return; + + return chrpos - 32; } -#else /* HAVE_AVX2 */ +#else /* HAVE_AVX2 */ -static void -count_6mers_fwd (Count_T *counts, Inquery_T *inquery, Genomecomp_T high_rev, Genomecomp_T low_rev, Genomecomp_T nexthigh_rev) { +static int +store_6mers_fwd_32 (Chrpos_T chrpos, Chrpos_T *table, UINT4 *positions, Count_T *counts, + Genomecomp_T high_rev, Genomecomp_T low_rev, Genomecomp_T nexthigh_rev) { Genomecomp_T masked, oligo; - __m256i _oligo, _masked; -#ifdef CHECK_FOR_OVERFLOW - __m256i _counts_after, _counts_neg; -#endif - - - oligo = nexthigh_rev >> 22; /* For 31..27 */ - oligo |= low_rev << 10; + __m256i _oligo, _masked, _counts; + __m256i _blocks, _envelopes, _addresses, _address_mask, _count_mask; - _oligo = _mm256_srlv_epi32(_mm256_set1_epi32(oligo),shift0to14); - _masked = _mm256_and_si256(_oligo, bigmask6); - masked = EXTRACT256(_masked,0); - counts[masked] += 1; - debug(printf("31 %04X => %d\n",masked,counts[masked])); + _address_mask = _mm256_set1_epi32(0x3); + _count_mask = _mm256_set1_epi32(0xFF); - masked = EXTRACT256(_masked,1); - counts[masked] += 1; - debug(printf("30 %04X => %d\n",masked,counts[masked])); - masked = EXTRACT256(_masked,2); - counts[masked] += 1; - debug(printf("29 %04X => %d\n",masked,counts[masked])); + oligo = nexthigh_rev >> 22; /* For 27..31 */ + oligo |= low_rev << 10; - masked = EXTRACT256(_masked,3); - counts[masked] += 1; - debug(printf("28 %04X => %d\n",masked,counts[masked])); + _oligo = _mm256_srlv_epi32(_mm256_set1_epi32(oligo),bigshift0to14); + _masked = _mm256_and_si256(_oligo, bigmask6); - masked = EXTRACT256(_masked,4); - counts[masked] += 1; - debug(printf("27 %04X => %d\n",masked,counts[masked])); + _blocks = _mm256_srli_epi32(_masked,2); /* div by 4 bytes/int */ + _addresses = _mm256_and_si256(_masked,_address_mask); + _addresses = _mm256_slli_epi32(_addresses,3); /* Multiply by 8 bits/byte */ + _envelopes = _mm256_i32gather_epi32((const int *) counts,_blocks,/*scale*/4); + _counts = _mm256_srlv_epi32(_envelopes,_addresses); /* Puts byte on right */ + _counts = _mm256_and_si256(_counts,_count_mask); -#ifdef CHECK_FOR_OVERFLOW - _counts_after = _mm256_i32gather_epi32(counts,_masked,/*shift*/4); - _counts_after = _mm256_and_si256(_counts_after,low5); - _counts_neg = _mm256_cmpgt_epi32(_counts_after,maxcount256); - if (_mm256_testz_si256(_counts_neg,_counts_neg) == 0) { - debug(printf("Exceeded maximum counts\n")); - if (EXTRACT256(_counts_neg,0)) { - inquery[EXTRACT256(_masked,0)] = INQUERY_FALSE; - } - if (EXTRACT256(_counts_neg,1)) { - inquery[EXTRACT256(_masked,1)] = INQUERY_FALSE; - } - if (EXTRACT256(_counts_neg,2)) { - inquery[EXTRACT256(_masked,2)] = INQUERY_FALSE; - } - if (EXTRACT256(_counts_neg,3)) { - inquery[EXTRACT256(_masked,3)] = INQUERY_FALSE; - } - if (EXTRACT256(_counts_neg,4)) { - inquery[EXTRACT256(_masked,4)] = INQUERY_FALSE; + if (EXTRACT256(_counts,0)) { + masked = EXTRACT256(_masked,0); + if (counts[masked]) { /* Have to re-check if there is a conflict */ + debug(printf("Storing masked %u at %u\n",masked,chrpos)); + table[positions[masked] + (--counts[masked])] = chrpos; } } -#endif /* CHECK_FOR_OVERFLOW */ - - _oligo = _mm256_srlv_epi32(_mm256_set1_epi32(low_rev),shift0to14); - _masked = _mm256_and_si256(_oligo, bigmask6); - masked = EXTRACT256(_masked,0); - counts[masked] += 1; - debug(printf("26 %04X => %d\n",masked,counts[masked])); + if (EXTRACT256(_counts,1)) { + masked = EXTRACT256(_masked,1); + if (counts[masked]) { /* Have to re-check if there is a conflict */ + debug(printf("Storing masked %u at %u\n",masked,chrpos - 1)); + table[positions[masked] + (--counts[masked])] = chrpos - 1; + } + } - masked = EXTRACT256(_masked,1); - counts[masked] += 1; - debug(printf("25 %04X => %d\n",masked,counts[masked])); + if (EXTRACT256(_counts,2)) { + masked = EXTRACT256(_masked,2); + if (counts[masked]) { /* Have to re-check if there is a conflict */ + debug(printf("Storing masked %u at %u\n",masked,chrpos - 2)); + table[positions[masked] + (--counts[masked])] = chrpos - 2; + } + } - masked = EXTRACT256(_masked,2); - counts[masked] += 1; - debug(printf("24 %04X => %d\n",masked,counts[masked])); + if (EXTRACT256(_counts,3)) { + masked = EXTRACT256(_masked,3); + if (counts[masked]) { /* Have to re-check if there is a conflict */ + debug(printf("Storing masked %u at %u\n",masked,chrpos - 3)); + table[positions[masked] + (--counts[masked])] = chrpos - 3; + } + } - masked = EXTRACT256(_masked,3); - counts[masked] += 1; - debug(printf("23 %04X => %d\n",masked,counts[masked])); - masked = EXTRACT256(_masked,4); - counts[masked] += 1; - debug(printf("22 %04X => %d\n",masked,counts[masked])); + if (EXTRACT256(_counts,4)) { + masked = EXTRACT256(_masked,4); + if (counts[masked]) { /* Have to re-check if there is a conflict */ + debug(printf("Storing masked %u at %u\n",masked,chrpos - 4)); + table[positions[masked] + (--counts[masked])] = chrpos - 4; + } + } - masked = EXTRACT256(_masked,5); - counts[masked] += 1; - debug(printf("21 %04X => %d\n",masked,counts[masked])); - masked = EXTRACT256(_masked,6); - counts[masked] += 1; - debug(printf("20 %04X => %d\n",masked,counts[masked])); + _oligo = _mm256_srlv_epi32(_mm256_set1_epi32(low_rev),bigshift0to14); + _masked = _mm256_and_si256(_oligo, bigmask6); - masked = EXTRACT256(_masked,7); - counts[masked] += 1; - debug(printf("19 %04X => %d\n",masked,counts[masked])); + _blocks = _mm256_srli_epi32(_masked,2); /* div by 4 bytes/int */ + _addresses = _mm256_and_si256(_masked,_address_mask); + _addresses = _mm256_slli_epi32(_addresses,3); /* Multiply by 8 bits/byte */ + _envelopes = _mm256_i32gather_epi32((const int *) counts,_blocks,/*scale*/4); + _counts = _mm256_srlv_epi32(_envelopes,_addresses); /* Puts byte on right */ + _counts = _mm256_and_si256(_counts,_count_mask); -#ifdef CHECK_FOR_OVERFLOW - _counts_after = _mm256_i32gather_epi32(counts,_masked,/*shift*/4); - /* _counts_after = _mm256_and_si256(_counts_after,low8); */ - _counts_neg = _mm256_cmpgt_epi32(_counts_after,maxcount256); - if (_mm256_testz_si256(_counts_neg,_counts_neg) == 0) { - debug(printf("Exceeded maximum counts\n")); - if (EXTRACT256(_counts_neg,0)) { - inquery[EXTRACT256(_masked,0)] = INQUERY_FALSE; + if (EXTRACT256(_counts,0)) { + masked = EXTRACT256(_masked,0); + if (counts[masked]) { /* Have to re-check if there is a conflict */ + debug(printf("Storing masked %u at %u\n",masked,chrpos - 5)); + table[positions[masked] + (--counts[masked])] = chrpos - 5; } - if (EXTRACT256(_counts_neg,1)) { - inquery[EXTRACT256(_masked,1)] = INQUERY_FALSE; + } + + if (EXTRACT256(_counts,1)) { + masked = EXTRACT256(_masked,1); + if (counts[masked]) { /* Have to re-check if there is a conflict */ + debug(printf("Storing masked %u at %u\n",masked,chrpos - 6)); + table[positions[masked] + (--counts[masked])] = chrpos - 6; } - if (EXTRACT256(_counts_neg,2)) { - inquery[EXTRACT256(_masked,2)] = INQUERY_FALSE; + } + + if (EXTRACT256(_counts,2)) { + masked = EXTRACT256(_masked,2); + if (counts[masked]) { /* Have to re-check if there is a conflict */ + debug(printf("Storing masked %u at %u\n",masked,chrpos - 7)); + table[positions[masked] + (--counts[masked])] = chrpos - 7; } - if (EXTRACT256(_counts_neg,3)) { - inquery[EXTRACT256(_masked,3)] = INQUERY_FALSE; + } + + if (EXTRACT256(_counts,3)) { + masked = EXTRACT256(_masked,3); + if (counts[masked]) { /* Have to re-check if there is a conflict */ + debug(printf("Storing masked %u at %u\n",masked,chrpos - 8)); + table[positions[masked] + (--counts[masked])] = chrpos - 8; } - if (EXTRACT256(_counts_neg,4)) { - inquery[EXTRACT256(_masked,4)] = INQUERY_FALSE; + } + + if (EXTRACT256(_counts,4)) { + masked = EXTRACT256(_masked,4); + if (counts[masked]) { /* Have to re-check if there is a conflict */ + debug(printf("Storing masked %u at %u\n",masked,chrpos - 9)); + table[positions[masked] + (--counts[masked])] = chrpos - 9; } - if (EXTRACT256(_counts_neg,5)) { - inquery[EXTRACT256(_masked,5)] = INQUERY_FALSE; + } + + if (EXTRACT256(_counts,5)) { + masked = EXTRACT256(_masked,5); + if (counts[masked]) { /* Have to re-check if there is a conflict */ + debug(printf("Storing masked %u at %u\n",masked,chrpos - 10)); + table[positions[masked] + (--counts[masked])] = chrpos - 10; } - if (EXTRACT256(_counts_neg,6)) { - inquery[EXTRACT256(_masked,6)] = INQUERY_FALSE; + } + + if (EXTRACT256(_counts,6)) { + masked = EXTRACT256(_masked,6); + if (counts[masked]) { /* Have to re-check if there is a conflict */ + debug(printf("Storing masked %u at %u\n",masked,chrpos - 11)); + table[positions[masked] + (--counts[masked])] = chrpos - 11; } - if (EXTRACT256(_counts_neg,7)) { - inquery[EXTRACT256(_masked,7)] = INQUERY_FALSE; + } + + if (EXTRACT256(_counts,7)) { + masked = EXTRACT256(_masked,7); + if (counts[masked]) { /* Have to re-check if there is a conflict */ + debug(printf("Storing masked %u at %u\n",masked,chrpos - 12)); + table[positions[masked] + (--counts[masked])] = chrpos - 12; } } -#endif /* CHECK_FOR_OVERFLOW */ + _oligo = _mm256_srli_epi32(_oligo, 16); _masked = _mm256_and_si256(_oligo, bigmask6); - masked = EXTRACT256(_masked,0); - INCR_COUNT(counts[masked],inquery[masked]); - debug(printf("18 %04X => %d\n",masked,counts[masked])); + _blocks = _mm256_srli_epi32(_masked,2); /* div by 4 bytes/int */ + _addresses = _mm256_and_si256(_masked,_address_mask); + _addresses = _mm256_slli_epi32(_addresses,3); /* Multiply by 8 bits/byte */ + _envelopes = _mm256_i32gather_epi32((const int *) counts,_blocks,/*scale*/4); + _counts = _mm256_srlv_epi32(_envelopes,_addresses); /* Puts byte on right */ + _counts = _mm256_and_si256(_counts,_count_mask); - masked = EXTRACT256(_masked,1); - INCR_COUNT(counts[masked],inquery[masked]); - debug(printf("17 %04X => %d\n",masked,counts[masked])); + if (EXTRACT256(_counts,0)) { + masked = EXTRACT256(_masked,0); + if (counts[masked]) { /* Have to re-check if there is a conflict */ + debug(printf("Storing masked %u at %u\n",masked,chrpos - 13)); + table[positions[masked] + (--counts[masked])] = chrpos - 13; + } + } - masked = EXTRACT256(_masked,2); - INCR_COUNT(counts[masked],inquery[masked]); - debug(printf("16 %04X => %d\n",masked,counts[masked])); + if (EXTRACT256(_counts,1)) { + masked = EXTRACT256(_masked,1); + if (counts[masked]) { /* Have to re-check if there is a conflict */ + debug(printf("Storing masked %u at %u\n",masked,chrpos - 14)); + table[positions[masked] + (--counts[masked])] = chrpos - 14; + } + } + + if (EXTRACT256(_counts,2)) { + masked = EXTRACT256(_masked,2); + if (counts[masked]) { /* Have to re-check if there is a conflict */ + debug(printf("Storing masked %u at %u\n",masked,chrpos - 15)); + table[positions[masked] + (--counts[masked])] = chrpos - 15; + } + } oligo = low_rev >> 22; /* For 15..11 */ oligo |= high_rev << 10; - _oligo = _mm256_srlv_epi32(_mm256_set1_epi32(oligo),shift0to14); + _oligo = _mm256_srlv_epi32(_mm256_set1_epi32(oligo),bigshift0to14); _masked = _mm256_and_si256(_oligo, bigmask6); - masked = EXTRACT256(_masked,0); - counts[masked] += 1; - debug(printf("15 %04X => %d\n",masked,counts[masked])); - - masked = EXTRACT256(_masked,1); - counts[masked] += 1; - debug(printf("14 %04X => %d\n",masked,counts[masked])); - - masked = EXTRACT256(_masked,2); - counts[masked] += 1; - debug(printf("13 %04X => %d\n",masked,counts[masked])); + _blocks = _mm256_srli_epi32(_masked,2); /* div by 4 bytes/int */ + _addresses = _mm256_and_si256(_masked,_address_mask); + _addresses = _mm256_slli_epi32(_addresses,3); /* Multiply by 8 bits/byte */ + _envelopes = _mm256_i32gather_epi32((const int *) counts,_blocks,/*scale*/4); + _counts = _mm256_srlv_epi32(_envelopes,_addresses); /* Puts byte on right */ + _counts = _mm256_and_si256(_counts,_count_mask); - masked = EXTRACT256(_masked,3); - counts[masked] += 1; - debug(printf("12 %04X => %d\n",masked,counts[masked])); - - masked = EXTRACT256(_masked,4); - counts[masked] += 1; - debug(printf("11 %04X => %d\n",masked,counts[masked])); - -#ifdef CHECK_FOR_OVERFLOW - _counts_after = _mm256_i32gather_epi32(counts,_masked,/*shift*/4); - _counts_after = _mm256_and_si256(_counts_after,low5); - _counts_neg = _mm256_cmpgt_epi32(_counts_after,maxcount256); - if (_mm256_testz_si256(_counts_neg,_counts_neg) == 0) { - debug(printf("Exceeded maximum counts\n")); - if (EXTRACT256(_counts_neg,0)) { - inquery[EXTRACT256(_masked,0)] = INQUERY_FALSE; - } - if (EXTRACT256(_counts_neg,1)) { - inquery[EXTRACT256(_masked,1)] = INQUERY_FALSE; - } - if (EXTRACT256(_counts_neg,2)) { - inquery[EXTRACT256(_masked,2)] = INQUERY_FALSE; + if (EXTRACT256(_counts,0)) { + masked = EXTRACT256(_masked,0); + if (counts[masked]) { /* Have to re-check if there is a conflict */ + debug(printf("Storing masked %u at %u\n",masked,chrpos - 16)); + table[positions[masked] + (--counts[masked])] = chrpos - 16; } - if (EXTRACT256(_counts_neg,3)) { - inquery[EXTRACT256(_masked,3)] = INQUERY_FALSE; + } + + if (EXTRACT256(_counts,1)) { + masked = EXTRACT256(_masked,1); + if (counts[masked]) { /* Have to re-check if there is a conflict */ + debug(printf("Storing masked %u at %u\n",masked,chrpos - 17)); + table[positions[masked] + (--counts[masked])] = chrpos - 17; } - if (EXTRACT256(_counts_neg,4)) { - inquery[EXTRACT256(_masked,4)] = INQUERY_FALSE; + } + + if (EXTRACT256(_counts,2)) { + masked = EXTRACT256(_masked,2); + if (counts[masked]) { /* Have to re-check if there is a conflict */ + debug(printf("Storing masked %u at %u\n",masked,chrpos - 18)); + table[positions[masked] + (--counts[masked])] = chrpos - 18; } } -#endif /* CHECK_FOR_OVERFLOW */ + if (EXTRACT256(_counts,3)) { + masked = EXTRACT256(_masked,3); + if (counts[masked]) { /* Have to re-check if there is a conflict */ + debug(printf("Storing masked %u at %u\n",masked,chrpos - 19)); + table[positions[masked] + (--counts[masked])] = chrpos - 19; + } + } - _oligo = _mm256_srlv_epi32(_mm256_set1_epi32(high_rev),shift0to14); - _masked = _mm256_and_si256(_oligo, bigmask6); - masked = EXTRACT256(_masked,0); - counts[masked] += 1; - debug(printf("10 %04X => %d\n",masked,counts[masked])); + if (EXTRACT256(_counts,4)) { + masked = EXTRACT256(_masked,4); + if (counts[masked]) { /* Have to re-check if there is a conflict */ + debug(printf("Storing masked %u at %u\n",masked,chrpos - 20)); + table[positions[masked] + (--counts[masked])] = chrpos - 20; + } + } - masked = EXTRACT256(_masked,1); - counts[masked] += 1; - debug(printf("9 %04X => %d\n",masked,counts[masked])); - masked = EXTRACT256(_masked,2); - counts[masked] += 1; - debug(printf("8 %04X => %d\n",masked,counts[masked])); - masked = EXTRACT256(_masked,3); - counts[masked] += 1; - debug(printf("7 %04X => %d\n",masked,counts[masked])); + _oligo = _mm256_srlv_epi32(_mm256_set1_epi32(high_rev),bigshift0to14); + _masked = _mm256_and_si256(_oligo, bigmask6); - masked = EXTRACT256(_masked,4); - counts[masked] += 1; - debug(printf("6 %04X => %d\n",masked,counts[masked])); + _blocks = _mm256_srli_epi32(_masked,2); /* div by 4 bytes/int */ + _addresses = _mm256_and_si256(_masked,_address_mask); + _addresses = _mm256_slli_epi32(_addresses,3); /* Multiply by 8 bits/byte */ + _envelopes = _mm256_i32gather_epi32((const int *) counts,_blocks,/*scale*/4); + _counts = _mm256_srlv_epi32(_envelopes,_addresses); /* Puts byte on right */ + _counts = _mm256_and_si256(_counts,_count_mask); - masked = EXTRACT256(_masked,5); - counts[masked] += 1; - debug(printf("5 %04X => %d\n",masked,counts[masked])); - - masked = EXTRACT256(_masked,6); - counts[masked] += 1; - debug(printf("4 %04X => %d\n",masked,counts[masked])); - - masked = EXTRACT256(_masked,7); - counts[masked] += 1; - debug(printf("3 %04X => %d\n",masked,counts[masked])); - -#ifdef CHECK_FOR_OVERFLOW - _counts_after = _mm256_i32gather_epi32(counts,_masked,/*shift*/4); - /* _counts_after = _mm256_and_si256(_counts_after,low8); */ - _counts_neg = _mm256_cmpgt_epi32(_counts_after,maxcount256); - if (_mm256_testz_si256(_counts_neg,_counts_neg) == 0) { - debug(printf("Exceeded maximum counts\n")); - if (EXTRACT256(_counts_neg,0)) { - inquery[EXTRACT256(_masked,0)] = INQUERY_FALSE; + if (EXTRACT256(_counts,0)) { + masked = EXTRACT256(_masked,0); + if (counts[masked]) { /* Have to re-check if there is a conflict */ + debug(printf("Storing masked %u at %u\n",masked,chrpos - 21)); + table[positions[masked] + (--counts[masked])] = chrpos - 21; } - if (EXTRACT256(_counts_neg,1)) { - inquery[EXTRACT256(_masked,1)] = INQUERY_FALSE; + } + + if (EXTRACT256(_counts,1)) { + masked = EXTRACT256(_masked,1); + if (counts[masked]) { /* Have to re-check if there is a conflict */ + debug(printf("Storing masked %u at %u\n",masked,chrpos - 22)); + table[positions[masked] + (--counts[masked])] = chrpos - 22; } - if (EXTRACT256(_counts_neg,2)) { - inquery[EXTRACT256(_masked,2)] = INQUERY_FALSE; + } + + if (EXTRACT256(_counts,2)) { + masked = EXTRACT256(_masked,2); + if (counts[masked]) { /* Have to re-check if there is a conflict */ + debug(printf("Storing masked %u at %u\n",masked,chrpos - 23)); + table[positions[masked] + (--counts[masked])] = chrpos - 23; } - if (EXTRACT256(_counts_neg,3)) { - inquery[EXTRACT256(_masked,3)] = INQUERY_FALSE; + } + + if (EXTRACT256(_counts,3)) { + masked = EXTRACT256(_masked,3); + if (counts[masked]) { /* Have to re-check if there is a conflict */ + debug(printf("Storing masked %u at %u\n",masked,chrpos - 24)); + table[positions[masked] + (--counts[masked])] = chrpos - 24; } - if (EXTRACT256(_counts_neg,4)) { - inquery[EXTRACT256(_masked,4)] = INQUERY_FALSE; + } + + if (EXTRACT256(_counts,4)) { + masked = EXTRACT256(_masked,4); + if (counts[masked]) { /* Have to re-check if there is a conflict */ + debug(printf("Storing masked %u at %u\n",masked,chrpos - 25)); + table[positions[masked] + (--counts[masked])] = chrpos - 25; } - if (EXTRACT256(_counts_neg,5)) { - inquery[EXTRACT256(_masked,5)] = INQUERY_FALSE; + } + + if (EXTRACT256(_counts,5)) { + masked = EXTRACT256(_masked,5); + if (counts[masked]) { /* Have to re-check if there is a conflict */ + debug(printf("Storing masked %u at %u\n",masked,chrpos - 26)); + table[positions[masked] + (--counts[masked])] = chrpos - 26; } - if (EXTRACT256(_counts_neg,6)) { - inquery[EXTRACT256(_masked,6)] = INQUERY_FALSE; + } + + if (EXTRACT256(_counts,6)) { + masked = EXTRACT256(_masked,6); + if (counts[masked]) { /* Have to re-check if there is a conflict */ + debug(printf("Storing masked %u at %u\n",masked,chrpos - 27)); + table[positions[masked] + (--counts[masked])] = chrpos - 27; } - if (EXTRACT256(_counts_neg,7)) { - inquery[EXTRACT256(_masked,7)] = INQUERY_FALSE; + } + + if (EXTRACT256(_counts,7)) { + masked = EXTRACT256(_masked,7); + if (counts[masked]) { /* Have to re-check if there is a conflict */ + debug(printf("Storing masked %u at %u\n",masked,chrpos - 28)); + table[positions[masked] + (--counts[masked])] = chrpos - 28; } } -#endif /* CHECK_FOR_OVERFLOW */ + _oligo = _mm256_srli_epi32(_oligo, 16); _masked = _mm256_and_si256(_oligo, bigmask6); - masked = EXTRACT256(_masked,0); - INCR_COUNT(counts[masked],inquery[masked]); - debug(printf("2 %04X => %d\n",masked,counts[masked])); + _blocks = _mm256_srli_epi32(_masked,2); /* div by 4 bytes/int */ + _addresses = _mm256_and_si256(_masked,_address_mask); + _addresses = _mm256_slli_epi32(_addresses,3); /* Multiply by 8 bits/byte */ + _envelopes = _mm256_i32gather_epi32((const int *) counts,_blocks,/*scale*/4); + _counts = _mm256_srlv_epi32(_envelopes,_addresses); /* Puts byte on right */ + _counts = _mm256_and_si256(_counts,_count_mask); - masked = EXTRACT256(_masked,1); - INCR_COUNT(counts[masked],inquery[masked]); - debug(printf("1 %04X => %d\n",masked,counts[masked])); + if (EXTRACT256(_counts,0)) { + masked = EXTRACT256(_masked,0); + if (counts[masked]) { /* Have to re-check if there is a conflict */ + debug(printf("Storing masked %u at %u\n",masked,chrpos - 29)); + table[positions[masked] + (--counts[masked])] = chrpos - 29; + } + } - masked = EXTRACT256(_masked,2); - INCR_COUNT(counts[masked],inquery[masked]); - debug(printf("0 %04X => %d\n",masked,counts[masked])); + if (EXTRACT256(_counts,1)) { + masked = EXTRACT256(_masked,1); + if (counts[masked]) { /* Have to re-check if there is a conflict */ + debug(printf("Storing masked %u at %u\n",masked,chrpos - 30)); + table[positions[masked] + (--counts[masked])] = chrpos - 30; + } + } - return; + if (EXTRACT256(_counts,2)) { + masked = EXTRACT256(_masked,2); + if (counts[masked]) { /* Have to re-check if there is a conflict */ + debug(printf("Storing masked %u at %u\n",masked,chrpos - 31)); + table[positions[masked] + (--counts[masked])] = chrpos - 31; + } + } + + return chrpos - 32; } -#endif /* HAVE_AVX2 */ +#endif /* HAVE_AVX2 */ +#if !defined(HAVE_AVX2) -/* Expecting current to have {high0_rev, low0_rev, high1_rev, - low1_rev}, and next to have {low0_rev, high1_rev, low1_rev, and - high2_rev} */ -#ifdef USE_SIMD_FOR_COUNTS static void -extract_6mers_fwd_simd (__m128i *out, __m128i current, __m128i next) { - __m128i oligo; +count_5mers_fwd_32 (Count_T *counts, Genomecomp_T high_rev, Genomecomp_T low_rev, Genomecomp_T nexthigh_rev) { + Genomecomp_T masked, oligo; +#ifdef INDIVIDUAL_SHIFTS +#elif defined(SIMD_MASK_THEN_STORE) + UINT4 _masked[4] __attribute__ ((aligned (16))); + __m128i _oligo; +#else + __m128i _oligo, _masked; +#endif - _mm_store_si128(out++, _mm_srli_epi32(current,20)); /* No mask necessary */; - _mm_store_si128(out++, _mm_and_si128( _mm_srli_epi32(current,18), mask6)); - _mm_store_si128(out++, _mm_and_si128( _mm_srli_epi32(current,16), mask6)); - _mm_store_si128(out++, _mm_and_si128( _mm_srli_epi32(current,14), mask6)); - _mm_store_si128(out++, _mm_and_si128( _mm_srli_epi32(current,12), mask6)); - _mm_store_si128(out++, _mm_and_si128( _mm_srli_epi32(current,10), mask6)); - _mm_store_si128(out++, _mm_and_si128( _mm_srli_epi32(current,8), mask6)); - _mm_store_si128(out++, _mm_and_si128( _mm_srli_epi32(current,6), mask6)); - _mm_store_si128(out++, _mm_and_si128( _mm_srli_epi32(current,4), mask6)); - _mm_store_si128(out++, _mm_and_si128( _mm_srli_epi32(current,2), mask6)); - _mm_store_si128(out++, _mm_and_si128( current, mask6)); - oligo = _mm_or_si128( _mm_srli_epi32(next,22), _mm_slli_epi32(current,10)); - _mm_store_si128(out++, _mm_and_si128( _mm_srli_epi32(oligo,8), mask6)); - _mm_store_si128(out++, _mm_and_si128( _mm_srli_epi32(oligo,6), mask6)); - _mm_store_si128(out++, _mm_and_si128( _mm_srli_epi32(oligo,4), mask6)); - _mm_store_si128(out++, _mm_and_si128( _mm_srli_epi32(oligo,2), mask6)); - _mm_store_si128(out++, _mm_and_si128( oligo, mask6)); + oligo = nexthigh_rev >> 24; /* For 31..28 */ + oligo |= low_rev << 8; - return; -} +#ifdef INDIVIDUAL_SHIFTS + masked = oligo & MASK5; /* 31 */ + INCR_COUNT(counts[masked]); + debug(printf("31 %04X => %d\n",masked,counts[masked])); -#ifdef HAVE_AVX2 -static void -extract_6mers_fwd_simd_128 (__m256i *out, __m256i current, __m256i next) { - __m256i oligo; + masked = (oligo >> 2) & MASK5; /* 30 */ + INCR_COUNT(counts[masked]); + debug(printf("30 %04X => %d\n",masked,counts[masked])); - _mm256_store_si256(out++, _mm256_srli_epi32(current,20)); /* No mask necessary */; - _mm256_store_si256(out++, _mm256_and_si256( _mm256_srli_epi32(current,18), bigmask6)); - _mm256_store_si256(out++, _mm256_and_si256( _mm256_srli_epi32(current,16), bigmask6)); - _mm256_store_si256(out++, _mm256_and_si256( _mm256_srli_epi32(current,14), bigmask6)); - _mm256_store_si256(out++, _mm256_and_si256( _mm256_srli_epi32(current,12), bigmask6)); - _mm256_store_si256(out++, _mm256_and_si256( _mm256_srli_epi32(current,10), bigmask6)); - _mm256_store_si256(out++, _mm256_and_si256( _mm256_srli_epi32(current,8), bigmask6)); - _mm256_store_si256(out++, _mm256_and_si256( _mm256_srli_epi32(current,6), bigmask6)); - _mm256_store_si256(out++, _mm256_and_si256( _mm256_srli_epi32(current,4), bigmask6)); - _mm256_store_si256(out++, _mm256_and_si256( _mm256_srli_epi32(current,2), bigmask6)); - _mm256_store_si256(out++, _mm256_and_si256( current, bigmask6)); + masked = (oligo >> 4) & MASK5; /* 29 */ + INCR_COUNT(counts[masked]); + debug(printf("29 %04X => %d\n",masked,counts[masked])); - oligo = _mm256_or_si256( _mm256_srli_epi32(next,22), _mm256_slli_epi32(current,10)); - _mm256_store_si256(out++, _mm256_and_si256( _mm256_srli_epi32(oligo,8), bigmask6)); - _mm256_store_si256(out++, _mm256_and_si256( _mm256_srli_epi32(oligo,6), bigmask6)); - _mm256_store_si256(out++, _mm256_and_si256( _mm256_srli_epi32(oligo,4), bigmask6)); - _mm256_store_si256(out++, _mm256_and_si256( _mm256_srli_epi32(oligo,2), bigmask6)); - _mm256_store_si256(out++, _mm256_and_si256( oligo, bigmask6)); + masked = (oligo >> 6) & MASK5; /* 28 */ + INCR_COUNT(counts[masked]); + debug(printf("28 %04X => %d\n",masked,counts[masked])); - return; -} +#else + _oligo = _mm_setr_epi32(oligo, oligo >> 2, oligo >> 4, oligo >> 6); +#ifdef SIMD_MASK_THEN_STORE + _mm_store_si128((__m128i *) _masked,_mm_and_si128(_oligo, mask5)); +#else + _masked = _mm_and_si128(_oligo, mask5); #endif + masked = EXTRACT(_masked,0); + INCR_COUNT(counts[masked]); + debug(printf("31 %04X => %d\n",masked,counts[masked])); -static void -count_6mers_fwd_simd (Count_T *counts, Inquery_T *inquery, __m128i current, __m128i next) { - __m128i oligo; -#ifdef HAVE_SSE4_1 - __m128i array; -#else - Genomecomp_T array[4]; -#endif -#if defined(HAVE_AVX2) && defined(CHECK_FOR_OVERFLOW) - __m128i _counts_after, _counts_neg; -#endif + masked = EXTRACT(_masked,1); + INCR_COUNT(counts[masked]); + debug(printf("30 %04X => %d\n",masked,counts[masked])); -#ifdef HAVE_SSE4_1 - array = _mm_srli_epi32(current,20); /* No mask necessary */; -#else - _mm_store_si128((__m128i *) array, _mm_srli_epi32(current,20)); /* No mask necessary */; -#endif -#ifdef HAVE_AVX2 - counts[EXTRACT(array,0)] += 1; - counts[EXTRACT(array,1)] += 1; - counts[EXTRACT(array,2)] += 1; - counts[EXTRACT(array,3)] += 1; -#ifdef CHECK_FOR_OVERFLOW - _counts_after = _mm_i32gather_epi32(counts,array,/*scale*/4); - _counts_neg = _mm_cmpgt_epi32(_counts_after,maxcount128); - if (_mm_testz_si128(_counts_neg,_counts_neg) == 0) { - debug(printf("Exceeded maximum counts\n")); - if (EXTRACT(_counts_neg,0)) { - inquery[EXTRACT(array,0)] = INQUERY_FALSE; - } - if (EXTRACT(_counts_neg,1)) { - inquery[EXTRACT(array,1)] = INQUERY_FALSE; - } - if (EXTRACT(_counts_neg,2)) { - inquery[EXTRACT(array,2)] = INQUERY_FALSE; - } - if (EXTRACT(_counts_neg,3)) { - inquery[EXTRACT(array,3)] = INQUERY_FALSE; - } - } -#endif /* CHECK_FOR_OVERFLOW */ - debug(printf("0 %04X => %d\n",array[0],counts[array[0]])); - debug(printf("16 %04X => %d\n",array[1],counts[array[1]])); - debug(printf("32 %04X => %d\n",array[2],counts[array[2]])); - debug(printf("48 %04X => %d\n",array[3],counts[array[3]])); - -#else - INCR_COUNT(counts[EXTRACT(array,0)],inquery[EXTRACT(array,0)]); /* 0 */ - INCR_COUNT(counts[EXTRACT(array,1)],inquery[EXTRACT(array,1)]); /* 16 */ - INCR_COUNT(counts[EXTRACT(array,2)],inquery[EXTRACT(array,2)]); /* 32 */ - INCR_COUNT(counts[EXTRACT(array,3)],inquery[EXTRACT(array,3)]); /* 48 */ -#endif - debug(printf("0 %04X => %d\n",EXTRACT(array,0),counts[EXTRACT(array,0)])); - debug(printf("16 %04X => %d\n",EXTRACT(array,1),counts[EXTRACT(array,1)])); - debug(printf("32 %04X => %d\n",EXTRACT(array,2),counts[EXTRACT(array,2)])); - debug(printf("48 %04X => %d\n",EXTRACT(array,3),counts[EXTRACT(array,3)])); + masked = EXTRACT(_masked,2); + INCR_COUNT(counts[masked]); + debug(printf("29 %04X => %d\n",masked,counts[masked])); -#ifdef HAVE_SSE4_1 - array = _mm_and_si128( _mm_srli_epi32(current,18), mask6); -#else - _mm_store_si128((__m128i *) array, _mm_and_si128( _mm_srli_epi32(current,18), mask6)); + masked = EXTRACT(_masked,3); + INCR_COUNT(counts[masked]); + debug(printf("28 %04X => %d\n",masked,counts[masked])); #endif -#ifdef HAVE_AVX2 - counts[EXTRACT(array,0)] += 1; - counts[EXTRACT(array,1)] += 1; - counts[EXTRACT(array,2)] += 1; - counts[EXTRACT(array,3)] += 1; -#ifdef CHECK_FOR_OVERFLOW - _counts_after = _mm_i32gather_epi32(counts,array,/*scale*/4); - _counts_neg = _mm_cmpgt_epi32(_counts_after,maxcount128); - if (_mm_testz_si128(_counts_neg,_counts_neg) == 0) { - debug(printf("Exceeded maximum counts\n")); - if (EXTRACT(_counts_neg,0)) { - inquery[EXTRACT(array,0)] = INQUERY_FALSE; - } - if (EXTRACT(_counts_neg,1)) { - inquery[EXTRACT(array,1)] = INQUERY_FALSE; - } - if (EXTRACT(_counts_neg,2)) { - inquery[EXTRACT(array,2)] = INQUERY_FALSE; - } - if (EXTRACT(_counts_neg,3)) { - inquery[EXTRACT(array,3)] = INQUERY_FALSE; - } - } -#endif /* CHECK_FOR_OVERFLOW */ - debug(printf("1 %04X => %d\n",array[0],counts[array[0]])); - debug(printf("17 %04X => %d\n",array[1],counts[array[1]])); - debug(printf("33 %04X => %d\n",array[2],counts[array[2]])); - debug(printf("49 %04X => %d\n",array[3],counts[array[3]])); - -#else - INCR_COUNT(counts[EXTRACT(array,0)],inquery[EXTRACT(array,0)]); /* 1 */ - INCR_COUNT(counts[EXTRACT(array,1)],inquery[EXTRACT(array,1)]); /* 17 */ - INCR_COUNT(counts[EXTRACT(array,2)],inquery[EXTRACT(array,2)]); /* 33 */ - INCR_COUNT(counts[EXTRACT(array,3)],inquery[EXTRACT(array,3)]); /* 49 */ -#endif - debug(printf("1 %04X => %d\n",EXTRACT(array,0),counts[EXTRACT(array,0)])); - debug(printf("17 %04X => %d\n",EXTRACT(array,1),counts[EXTRACT(array,1)])); - debug(printf("33 %04X => %d\n",EXTRACT(array,2),counts[EXTRACT(array,2)])); - debug(printf("49 %04X => %d\n",EXTRACT(array,3),counts[EXTRACT(array,3)])); -#ifdef HAVE_SSE4_1 - array = _mm_and_si128( _mm_srli_epi32(current,16), mask6); -#else - _mm_store_si128((__m128i *) array, _mm_and_si128( _mm_srli_epi32(current,16), mask6)); -#endif -#ifdef HAVE_AVX2 - counts[EXTRACT(array,0)] += 1; - counts[EXTRACT(array,1)] += 1; - counts[EXTRACT(array,2)] += 1; - counts[EXTRACT(array,3)] += 1; -#ifdef CHECK_FOR_OVERFLOW - _counts_after = _mm_i32gather_epi32(counts,array,/*scale*/4); - _counts_neg = _mm_cmpgt_epi32(_counts_after,maxcount128); - if (_mm_testz_si128(_counts_neg,_counts_neg) == 0) { - debug(printf("Exceeded maximum counts\n")); - if (EXTRACT(_counts_neg,0)) { - inquery[EXTRACT(array,0)] = INQUERY_FALSE; - } - if (EXTRACT(_counts_neg,1)) { - inquery[EXTRACT(array,1)] = INQUERY_FALSE; - } - if (EXTRACT(_counts_neg,2)) { - inquery[EXTRACT(array,2)] = INQUERY_FALSE; - } - if (EXTRACT(_counts_neg,3)) { - inquery[EXTRACT(array,3)] = INQUERY_FALSE; - } - } -#endif /* CHECK_FOR_OVERFLOW */ - debug(printf("2 %04X => %d\n",array[0],counts[array[0]])); - debug(printf("18 %04X => %d\n",array[1],counts[array[1]])); - debug(printf("34 %04X => %d\n",array[2],counts[array[2]])); - debug(printf("50 %04X => %d\n",array[3],counts[array[3]])); - -#else - INCR_COUNT(counts[EXTRACT(array,0)],inquery[EXTRACT(array,0)]); /* 2 */ - INCR_COUNT(counts[EXTRACT(array,1)],inquery[EXTRACT(array,1)]); /* 18 */ - INCR_COUNT(counts[EXTRACT(array,2)],inquery[EXTRACT(array,2)]); /* 34 */ - INCR_COUNT(counts[EXTRACT(array,3)],inquery[EXTRACT(array,3)]); /* 50 */ -#endif - debug(printf("2 %04X => %d\n",EXTRACT(array,0),counts[EXTRACT(array,0)])); - debug(printf("18 %04X => %d\n",EXTRACT(array,1),counts[EXTRACT(array,1)])); - debug(printf("34 %04X => %d\n",EXTRACT(array,2),counts[EXTRACT(array,2)])); - debug(printf("50 %04X => %d\n",EXTRACT(array,3),counts[EXTRACT(array,3)])); -#ifdef HAVE_SSE4_1 - array = _mm_and_si128( _mm_srli_epi32(current,14), mask6); -#else - _mm_store_si128((__m128i *) array, _mm_and_si128( _mm_srli_epi32(current,14), mask6)); -#endif -#ifdef HAVE_AVX2 - counts[EXTRACT(array,0)] += 1; - counts[EXTRACT(array,1)] += 1; - counts[EXTRACT(array,2)] += 1; - counts[EXTRACT(array,3)] += 1; -#ifdef CHECK_FOR_OVERFLOW - _counts_after = _mm_i32gather_epi32(counts,array,/*scale*/4); - _counts_neg = _mm_cmpgt_epi32(_counts_after,maxcount128); - if (_mm_testz_si128(_counts_neg,_counts_neg) == 0) { - debug(printf("Exceeded maximum counts\n")); - if (EXTRACT(_counts_neg,0)) { - inquery[EXTRACT(array,0)] = INQUERY_FALSE; - } - if (EXTRACT(_counts_neg,1)) { - inquery[EXTRACT(array,1)] = INQUERY_FALSE; - } - if (EXTRACT(_counts_neg,2)) { - inquery[EXTRACT(array,2)] = INQUERY_FALSE; - } - if (EXTRACT(_counts_neg,3)) { - inquery[EXTRACT(array,3)] = INQUERY_FALSE; - } - } -#endif /* CHECK_FOR_OVERFLOW */ - debug(printf("3 %04X => %d\n",array[0],counts[array[0]])); - debug(printf("19 %04X => %d\n",array[1],counts[array[1]])); - debug(printf("35 %04X => %d\n",array[2],counts[array[2]])); - debug(printf("51 %04X => %d\n",array[3],counts[array[3]])); - -#else - INCR_COUNT(counts[EXTRACT(array,0)],inquery[EXTRACT(array,0)]); /* 3 */ - INCR_COUNT(counts[EXTRACT(array,1)],inquery[EXTRACT(array,1)]); /* 19 */ - INCR_COUNT(counts[EXTRACT(array,2)],inquery[EXTRACT(array,2)]); /* 35 */ - INCR_COUNT(counts[EXTRACT(array,3)],inquery[EXTRACT(array,3)]); /* 51 */ -#endif - debug(printf("3 %04X => %d\n",EXTRACT(array,0),counts[EXTRACT(array,0)])); - debug(printf("19 %04X => %d\n",EXTRACT(array,1),counts[EXTRACT(array,1)])); - debug(printf("35 %04X => %d\n",EXTRACT(array,2),counts[EXTRACT(array,2)])); - debug(printf("51 %04X => %d\n",EXTRACT(array,3),counts[EXTRACT(array,3)])); +#ifdef INDIVIDUAL_SHIFTS + masked = low_rev & MASK5; /* 27 */ + INCR_COUNT(counts[masked]); + debug(printf("27 %04X => %d\n",masked,counts[masked])); -#ifdef HAVE_SSE4_1 - array = _mm_and_si128( _mm_srli_epi32(current,12), mask6); -#else - _mm_store_si128((__m128i *) array, _mm_and_si128( _mm_srli_epi32(current,12), mask6)); -#endif -#ifdef HAVE_AVX2 - counts[EXTRACT(array,0)] += 1; - counts[EXTRACT(array,1)] += 1; - counts[EXTRACT(array,2)] += 1; - counts[EXTRACT(array,3)] += 1; -#ifdef CHECK_FOR_OVERFLOW - _counts_after = _mm_i32gather_epi32(counts,array,/*scale*/4); - _counts_neg = _mm_cmpgt_epi32(_counts_after,maxcount128); - if (_mm_testz_si128(_counts_neg,_counts_neg) == 0) { - debug(printf("Exceeded maximum counts\n")); - if (EXTRACT(_counts_neg,0)) { - inquery[EXTRACT(array,0)] = INQUERY_FALSE; - } - if (EXTRACT(_counts_neg,1)) { - inquery[EXTRACT(array,1)] = INQUERY_FALSE; - } - if (EXTRACT(_counts_neg,2)) { - inquery[EXTRACT(array,2)] = INQUERY_FALSE; - } - if (EXTRACT(_counts_neg,3)) { - inquery[EXTRACT(array,3)] = INQUERY_FALSE; - } - } -#endif /* CHECK_FOR_OVERFLOW */ - debug(printf("4 %04X => %d\n",array[0],counts[array[0]])); - debug(printf("20 %04X => %d\n",array[1],counts[array[1]])); - debug(printf("36 %04X => %d\n",array[2],counts[array[2]])); - debug(printf("52 %04X => %d\n",array[3],counts[array[3]])); - -#else - INCR_COUNT(counts[EXTRACT(array,0)],inquery[EXTRACT(array,0)]); /* 4 */ - INCR_COUNT(counts[EXTRACT(array,1)],inquery[EXTRACT(array,1)]); /* 20 */ - INCR_COUNT(counts[EXTRACT(array,2)],inquery[EXTRACT(array,2)]); /* 36 */ - INCR_COUNT(counts[EXTRACT(array,3)],inquery[EXTRACT(array,3)]); /* 52 */ -#endif - debug(printf("4 %04X => %d\n",EXTRACT(array,0),counts[EXTRACT(array,0)])); - debug(printf("20 %04X => %d\n",EXTRACT(array,1),counts[EXTRACT(array,1)])); - debug(printf("36 %04X => %d\n",EXTRACT(array,2),counts[EXTRACT(array,2)])); - debug(printf("52 %04X => %d\n",EXTRACT(array,3),counts[EXTRACT(array,3)])); + masked = (low_rev >> 2) & MASK5; /* 26 */ + INCR_COUNT(counts[masked]); + debug(printf("26 %04X => %d\n",masked,counts[masked])); -#ifdef HAVE_SSE4_1 - array = _mm_and_si128( _mm_srli_epi32(current,10), mask6); -#else - _mm_store_si128((__m128i *) array, _mm_and_si128( _mm_srli_epi32(current,10), mask6)); -#endif -#ifdef HAVE_AVX2 - counts[EXTRACT(array,0)] += 1; - counts[EXTRACT(array,1)] += 1; - counts[EXTRACT(array,2)] += 1; - counts[EXTRACT(array,3)] += 1; -#ifdef CHECK_FOR_OVERFLOW - _counts_after = _mm_i32gather_epi32(counts,array,/*scale*/4); - _counts_neg = _mm_cmpgt_epi32(_counts_after,maxcount128); - if (_mm_testz_si128(_counts_neg,_counts_neg) == 0) { - debug(printf("Exceeded maximum counts\n")); - if (EXTRACT(_counts_neg,0)) { - inquery[EXTRACT(array,0)] = INQUERY_FALSE; - } - if (EXTRACT(_counts_neg,1)) { - inquery[EXTRACT(array,1)] = INQUERY_FALSE; - } - if (EXTRACT(_counts_neg,2)) { - inquery[EXTRACT(array,2)] = INQUERY_FALSE; - } - if (EXTRACT(_counts_neg,3)) { - inquery[EXTRACT(array,3)] = INQUERY_FALSE; - } - } -#endif /* CHECK_FOR_OVERFLOW */ - debug(printf("5 %04X => %d\n",array[0],counts[array[0]])); - debug(printf("21 %04X => %d\n",array[1],counts[array[1]])); - debug(printf("37 %04X => %d\n",array[2],counts[array[2]])); - debug(printf("53 %04X => %d\n",array[3],counts[array[3]])); - -#else - INCR_COUNT(counts[EXTRACT(array,0)],inquery[EXTRACT(array,0)]); /* 5 */ - INCR_COUNT(counts[EXTRACT(array,1)],inquery[EXTRACT(array,1)]); /* 21 */ - INCR_COUNT(counts[EXTRACT(array,2)],inquery[EXTRACT(array,2)]); /* 37 */ - INCR_COUNT(counts[EXTRACT(array,3)],inquery[EXTRACT(array,3)]); /* 53 */ -#endif - debug(printf("5 %04X => %d\n",EXTRACT(array,0),counts[EXTRACT(array,0)])); - debug(printf("21 %04X => %d\n",EXTRACT(array,1),counts[EXTRACT(array,1)])); - debug(printf("37 %04X => %d\n",EXTRACT(array,2),counts[EXTRACT(array,2)])); - debug(printf("53 %04X => %d\n",EXTRACT(array,3),counts[EXTRACT(array,3)])); + masked = (low_rev >> 4) & MASK5; /* 25 */ + INCR_COUNT(counts[masked]); + debug(printf("25 %04X => %d\n",masked,counts[masked])); -#ifdef HAVE_SSE4_1 - array = _mm_and_si128( _mm_srli_epi32(current,8), mask6); -#else - _mm_store_si128((__m128i *) array, _mm_and_si128( _mm_srli_epi32(current,8), mask6)); -#endif -#ifdef HAVE_AVX2 - counts[EXTRACT(array,0)] += 1; - counts[EXTRACT(array,1)] += 1; - counts[EXTRACT(array,2)] += 1; - counts[EXTRACT(array,3)] += 1; -#ifdef CHECK_FOR_OVERFLOW - _counts_after = _mm_i32gather_epi32(counts,array,/*scale*/4); - _counts_neg = _mm_cmpgt_epi32(_counts_after,maxcount128); - if (_mm_testz_si128(_counts_neg,_counts_neg) == 0) { - debug(printf("Exceeded maximum counts\n")); - if (EXTRACT(_counts_neg,0)) { - inquery[EXTRACT(array,0)] = INQUERY_FALSE; - } - if (EXTRACT(_counts_neg,1)) { - inquery[EXTRACT(array,1)] = INQUERY_FALSE; - } - if (EXTRACT(_counts_neg,2)) { - inquery[EXTRACT(array,2)] = INQUERY_FALSE; - } - if (EXTRACT(_counts_neg,3)) { - inquery[EXTRACT(array,3)] = INQUERY_FALSE; - } - } -#endif /* CHECK_FOR_OVERFLOW */ - debug(printf("6 %04X => %d\n",array[0],counts[array[0]])); - debug(printf("22 %04X => %d\n",array[1],counts[array[1]])); - debug(printf("38 %04X => %d\n",array[2],counts[array[2]])); - debug(printf("54 %04X => %d\n",array[3],counts[array[3]])); - -#else - INCR_COUNT(counts[EXTRACT(array,0)],inquery[EXTRACT(array,0)]); /* 6 */ - INCR_COUNT(counts[EXTRACT(array,1)],inquery[EXTRACT(array,1)]); /* 22 */ - INCR_COUNT(counts[EXTRACT(array,2)],inquery[EXTRACT(array,2)]); /* 38 */ - INCR_COUNT(counts[EXTRACT(array,3)],inquery[EXTRACT(array,3)]); /* 54 */ -#endif - debug(printf("6 %04X => %d\n",EXTRACT(array,0),counts[EXTRACT(array,0)])); - debug(printf("22 %04X => %d\n",EXTRACT(array,1),counts[EXTRACT(array,1)])); - debug(printf("38 %04X => %d\n",EXTRACT(array,2),counts[EXTRACT(array,2)])); - debug(printf("54 %04X => %d\n",EXTRACT(array,3),counts[EXTRACT(array,3)])); + masked = (low_rev >> 6) & MASK5; /* 24 */ + INCR_COUNT(counts[masked]); + debug(printf("24 %04X => %d\n",masked,counts[masked])); -#ifdef HAVE_SSE4_1 - array = _mm_and_si128( _mm_srli_epi32(current,6), mask6); -#else - _mm_store_si128((__m128i *) array, _mm_and_si128( _mm_srli_epi32(current,6), mask6)); -#endif -#ifdef HAVE_AVX2 - counts[EXTRACT(array,0)] += 1; - counts[EXTRACT(array,1)] += 1; - counts[EXTRACT(array,2)] += 1; - counts[EXTRACT(array,3)] += 1; -#ifdef CHECK_FOR_OVERFLOW - _counts_after = _mm_i32gather_epi32(counts,array,/*scale*/4); - _counts_neg = _mm_cmpgt_epi32(_counts_after,maxcount128); - if (_mm_testz_si128(_counts_neg,_counts_neg) == 0) { - debug(printf("Exceeded maximum counts\n")); - if (EXTRACT(_counts_neg,0)) { - inquery[EXTRACT(array,0)] = INQUERY_FALSE; - } - if (EXTRACT(_counts_neg,1)) { - inquery[EXTRACT(array,1)] = INQUERY_FALSE; - } - if (EXTRACT(_counts_neg,2)) { - inquery[EXTRACT(array,2)] = INQUERY_FALSE; - } - if (EXTRACT(_counts_neg,3)) { - inquery[EXTRACT(array,3)] = INQUERY_FALSE; - } - } -#endif /* CHECK_FOR_OVERFLOW */ - debug(printf("7 %04X => %d\n",array[0],counts[array[0]])); - debug(printf("23 %04X => %d\n",array[1],counts[array[1]])); - debug(printf("39 %04X => %d\n",array[2],counts[array[2]])); - debug(printf("55 %04X => %d\n",array[3],counts[array[3]])); - -#else - INCR_COUNT(counts[EXTRACT(array,0)],inquery[EXTRACT(array,0)]); /* 7 */ - INCR_COUNT(counts[EXTRACT(array,1)],inquery[EXTRACT(array,1)]); /* 23 */ - INCR_COUNT(counts[EXTRACT(array,2)],inquery[EXTRACT(array,2)]); /* 39 */ - INCR_COUNT(counts[EXTRACT(array,3)],inquery[EXTRACT(array,3)]); /* 55 */ -#endif - debug(printf("7 %04X => %d\n",EXTRACT(array,0),counts[EXTRACT(array,0)])); - debug(printf("23 %04X => %d\n",EXTRACT(array,1),counts[EXTRACT(array,1)])); - debug(printf("39 %04X => %d\n",EXTRACT(array,2),counts[EXTRACT(array,2)])); - debug(printf("55 %04X => %d\n",EXTRACT(array,3),counts[EXTRACT(array,3)])); + masked = (low_rev >> 8) & MASK5; /* 23 */ + INCR_COUNT(counts[masked]); + debug(printf("23 %04X => %d\n",masked,counts[masked])); -#ifdef HAVE_SSE4_1 - array = _mm_and_si128( _mm_srli_epi32(current,4), mask6); -#else - _mm_store_si128((__m128i *) array, _mm_and_si128( _mm_srli_epi32(current,4), mask6)); -#endif -#ifdef HAVE_AVX2 - counts[EXTRACT(array,0)] += 1; - counts[EXTRACT(array,1)] += 1; - counts[EXTRACT(array,2)] += 1; - counts[EXTRACT(array,3)] += 1; -#ifdef CHECK_FOR_OVERFLOW - _counts_after = _mm_i32gather_epi32(counts,array,/*scale*/4); - _counts_neg = _mm_cmpgt_epi32(_counts_after,maxcount128); - if (_mm_testz_si128(_counts_neg,_counts_neg) == 0) { - debug(printf("Exceeded maximum counts\n")); - if (EXTRACT(_counts_neg,0)) { - inquery[EXTRACT(array,0)] = INQUERY_FALSE; - } - if (EXTRACT(_counts_neg,1)) { - inquery[EXTRACT(array,1)] = INQUERY_FALSE; - } - if (EXTRACT(_counts_neg,2)) { - inquery[EXTRACT(array,2)] = INQUERY_FALSE; - } - if (EXTRACT(_counts_neg,3)) { - inquery[EXTRACT(array,3)] = INQUERY_FALSE; - } - } -#endif /* CHECK_FOR_OVERFLOW */ - debug(printf("8 %04X => %d\n",array[0],counts[array[0]])); - debug(printf("24 %04X => %d\n",array[1],counts[array[1]])); - debug(printf("40 %04X => %d\n",array[2],counts[array[2]])); - debug(printf("56 %04X => %d\n",array[3],counts[array[3]])); - -#else - INCR_COUNT(counts[EXTRACT(array,0)],inquery[EXTRACT(array,0)]); /* 8 */ - INCR_COUNT(counts[EXTRACT(array,1)],inquery[EXTRACT(array,1)]); /* 24 */ - INCR_COUNT(counts[EXTRACT(array,2)],inquery[EXTRACT(array,2)]); /* 40 */ - INCR_COUNT(counts[EXTRACT(array,3)],inquery[EXTRACT(array,3)]); /* 56 */ -#endif - debug(printf("8 %04X => %d\n",EXTRACT(array,0),counts[EXTRACT(array,0)])); - debug(printf("24 %04X => %d\n",EXTRACT(array,1),counts[EXTRACT(array,1)])); - debug(printf("40 %04X => %d\n",EXTRACT(array,2),counts[EXTRACT(array,2)])); - debug(printf("56 %04X => %d\n",EXTRACT(array,3),counts[EXTRACT(array,3)])); + masked = (low_rev >> 10) & MASK5; /* 22 */ + INCR_COUNT(counts[masked]); + debug(printf("22 %04X => %d\n",masked,counts[masked])); -#ifdef HAVE_SSE4_1 - array = _mm_and_si128( _mm_srli_epi32(current,2), mask6); -#else - _mm_store_si128((__m128i *) array, _mm_and_si128( _mm_srli_epi32(current,2), mask6)); -#endif -#ifdef HAVE_AVX2 - counts[EXTRACT(array,0)] += 1; - counts[EXTRACT(array,1)] += 1; - counts[EXTRACT(array,2)] += 1; - counts[EXTRACT(array,3)] += 1; -#ifdef CHECK_FOR_OVERFLOW - _counts_after = _mm_i32gather_epi32(counts,array,/*scale*/4); - _counts_neg = _mm_cmpgt_epi32(_counts_after,maxcount128); - if (_mm_testz_si128(_counts_neg,_counts_neg) == 0) { - debug(printf("Exceeded maximum counts\n")); - if (EXTRACT(_counts_neg,0)) { - inquery[EXTRACT(array,0)] = INQUERY_FALSE; - } - if (EXTRACT(_counts_neg,1)) { - inquery[EXTRACT(array,1)] = INQUERY_FALSE; - } - if (EXTRACT(_counts_neg,2)) { - inquery[EXTRACT(array,2)] = INQUERY_FALSE; - } - if (EXTRACT(_counts_neg,3)) { - inquery[EXTRACT(array,3)] = INQUERY_FALSE; - } - } -#endif /* CHECK_FOR_OVERFLOW */ - debug(printf("9 %04X => %d\n",array[0],counts[array[0]])); - debug(printf("25 %04X => %d\n",array[1],counts[array[1]])); - debug(printf("41 %04X => %d\n",array[2],counts[array[2]])); - debug(printf("57 %04X => %d\n",array[3],counts[array[3]])); - -#else - INCR_COUNT(counts[EXTRACT(array,0)],inquery[EXTRACT(array,0)]); /* 9 */ - INCR_COUNT(counts[EXTRACT(array,1)],inquery[EXTRACT(array,1)]); /* 25 */ - INCR_COUNT(counts[EXTRACT(array,2)],inquery[EXTRACT(array,2)]); /* 41 */ - INCR_COUNT(counts[EXTRACT(array,3)],inquery[EXTRACT(array,3)]); /* 57 */ -#endif - debug(printf("9 %04X => %d\n",EXTRACT(array,0),counts[EXTRACT(array,0)])); - debug(printf("25 %04X => %d\n",EXTRACT(array,1),counts[EXTRACT(array,1)])); - debug(printf("41 %04X => %d\n",EXTRACT(array,2),counts[EXTRACT(array,2)])); - debug(printf("57 %04X => %d\n",EXTRACT(array,3),counts[EXTRACT(array,3)])); + masked = (low_rev >> 12) & MASK5; /* 21 */ + INCR_COUNT(counts[masked]); + debug(printf("21 %04X => %d\n",masked,counts[masked])); -#ifdef HAVE_SSE4_1 - array = _mm_and_si128( current, mask6); -#else - _mm_store_si128((__m128i *) array, _mm_and_si128( current, mask6)); -#endif -#ifdef HAVE_AVX2 - counts[EXTRACT(array,0)] += 1; - counts[EXTRACT(array,1)] += 1; - counts[EXTRACT(array,2)] += 1; - counts[EXTRACT(array,3)] += 1; -#ifdef CHECK_FOR_OVERFLOW - _counts_after = _mm_i32gather_epi32(counts,array,/*scale*/4); - _counts_neg = _mm_cmpgt_epi32(_counts_after,maxcount128); - if (_mm_testz_si128(_counts_neg,_counts_neg) == 0) { - debug(printf("Exceeded maximum counts\n")); - if (EXTRACT(_counts_neg,0)) { - inquery[EXTRACT(array,0)] = INQUERY_FALSE; - } - if (EXTRACT(_counts_neg,1)) { - inquery[EXTRACT(array,1)] = INQUERY_FALSE; - } - if (EXTRACT(_counts_neg,2)) { - inquery[EXTRACT(array,2)] = INQUERY_FALSE; - } - if (EXTRACT(_counts_neg,3)) { - inquery[EXTRACT(array,3)] = INQUERY_FALSE; - } - } -#endif /* CHECK_FOR_OVERFLOW */ - debug(printf("10 %04X => %d\n",array[0],counts[array[0]])); - debug(printf("26 %04X => %d\n",array[1],counts[array[1]])); - debug(printf("42 %04X => %d\n",array[2],counts[array[2]])); - debug(printf("58 %04X => %d\n",array[3],counts[array[3]])); - -#else - INCR_COUNT(counts[EXTRACT(array,0)],inquery[EXTRACT(array,0)]); /* 10 */ - INCR_COUNT(counts[EXTRACT(array,1)],inquery[EXTRACT(array,1)]); /* 26 */ - INCR_COUNT(counts[EXTRACT(array,2)],inquery[EXTRACT(array,2)]); /* 42 */ - INCR_COUNT(counts[EXTRACT(array,3)],inquery[EXTRACT(array,3)]); /* 58 */ -#endif - debug(printf("10 %04X => %d\n",EXTRACT(array,0),counts[EXTRACT(array,0)])); - debug(printf("26 %04X => %d\n",EXTRACT(array,1),counts[EXTRACT(array,1)])); - debug(printf("42 %04X => %d\n",EXTRACT(array,2),counts[EXTRACT(array,2)])); - debug(printf("58 %04X => %d\n",EXTRACT(array,3),counts[EXTRACT(array,3)])); + masked = (low_rev >> 14) & MASK5; /* 20 */ + INCR_COUNT(counts[masked]); + debug(printf("20 %04X => %d\n",masked,counts[masked])); + masked = (low_rev >> 16) & MASK5; /* 19 */ + INCR_COUNT(counts[masked]); + debug(printf("19 %04X => %d\n",masked,counts[masked])); - oligo = _mm_or_si128( _mm_srli_epi32(next,22), _mm_slli_epi32(current,10)); -#ifdef HAVE_SSE4_1 - array = _mm_and_si128( _mm_srli_epi32(oligo,8), mask6); -#else - _mm_store_si128((__m128i *) array, _mm_and_si128( _mm_srli_epi32(oligo,8), mask6)); -#endif -#ifdef HAVE_AVX2 - counts[EXTRACT(array,0)] += 1; - counts[EXTRACT(array,1)] += 1; - counts[EXTRACT(array,2)] += 1; - counts[EXTRACT(array,3)] += 1; -#ifdef CHECK_FOR_OVERFLOW - _counts_after = _mm_i32gather_epi32(counts,array,/*scale*/4); - _counts_neg = _mm_cmpgt_epi32(_counts_after,maxcount128); - if (_mm_testz_si128(_counts_neg,_counts_neg) == 0) { - debug(printf("Exceeded maximum counts\n")); - if (EXTRACT(_counts_neg,0)) { - inquery[EXTRACT(array,0)] = INQUERY_FALSE; - } - if (EXTRACT(_counts_neg,1)) { - inquery[EXTRACT(array,1)] = INQUERY_FALSE; - } - if (EXTRACT(_counts_neg,2)) { - inquery[EXTRACT(array,2)] = INQUERY_FALSE; - } - if (EXTRACT(_counts_neg,3)) { - inquery[EXTRACT(array,3)] = INQUERY_FALSE; - } - } -#endif /* CHECK_FOR_OVERFLOW */ - debug(printf("11 %04X => %d\n",array[0],counts[array[0]])); - debug(printf("27 %04X => %d\n",array[1],counts[array[1]])); - debug(printf("43 %04X => %d\n",array[2],counts[array[2]])); - debug(printf("59 %04X => %d\n",array[3],counts[array[3]])); - -#else - INCR_COUNT(counts[EXTRACT(array,0)],inquery[EXTRACT(array,0)]); /* 11 */ - INCR_COUNT(counts[EXTRACT(array,1)],inquery[EXTRACT(array,1)]); /* 27 */ - INCR_COUNT(counts[EXTRACT(array,2)],inquery[EXTRACT(array,2)]); /* 43 */ - INCR_COUNT(counts[EXTRACT(array,3)],inquery[EXTRACT(array,3)]); /* 59 */ -#endif - debug(printf("11 %04X => %d\n",EXTRACT(array,0),counts[EXTRACT(array,0)])); - debug(printf("27 %04X => %d\n",EXTRACT(array,1),counts[EXTRACT(array,1)])); - debug(printf("43 %04X => %d\n",EXTRACT(array,2),counts[EXTRACT(array,2)])); - debug(printf("59 %04X => %d\n",EXTRACT(array,3),counts[EXTRACT(array,3)])); + masked = (low_rev >> 18) & MASK5; /* 18 */ + INCR_COUNT(counts[masked]); + debug(printf("18 %04X => %d\n",masked,counts[masked])); -#ifdef HAVE_SSE4_1 - array = _mm_and_si128( _mm_srli_epi32(oligo,6), mask6); -#else - _mm_store_si128((__m128i *) array, _mm_and_si128( _mm_srli_epi32(oligo,6), mask6)); -#endif -#ifdef HAVE_AVX2 - counts[EXTRACT(array,0)] += 1; - counts[EXTRACT(array,1)] += 1; - counts[EXTRACT(array,2)] += 1; - counts[EXTRACT(array,3)] += 1; -#ifdef CHECK_FOR_OVERFLOW - _counts_after = _mm_i32gather_epi32(counts,array,/*scale*/4); - _counts_neg = _mm_cmpgt_epi32(_counts_after,maxcount128); - if (_mm_testz_si128(_counts_neg,_counts_neg) == 0) { - debug(printf("Exceeded maximum counts\n")); - if (EXTRACT(_counts_neg,0)) { - inquery[EXTRACT(array,0)] = INQUERY_FALSE; - } - if (EXTRACT(_counts_neg,1)) { - inquery[EXTRACT(array,1)] = INQUERY_FALSE; - } - if (EXTRACT(_counts_neg,2)) { - inquery[EXTRACT(array,2)] = INQUERY_FALSE; - } - if (EXTRACT(_counts_neg,3)) { - inquery[EXTRACT(array,3)] = INQUERY_FALSE; - } - } -#endif /* CHECK_FOR_OVERFLOW */ - debug(printf("12 %04X => %d\n",array[0],counts[array[0]])); - debug(printf("28 %04X => %d\n",array[1],counts[array[1]])); - debug(printf("44 %04X => %d\n",array[2],counts[array[2]])); - debug(printf("60 %04X => %d\n",array[3],counts[array[3]])); - -#else - INCR_COUNT(counts[EXTRACT(array,0)],inquery[EXTRACT(array,0)]); /* 12 */ - INCR_COUNT(counts[EXTRACT(array,1)],inquery[EXTRACT(array,1)]); /* 28 */ - INCR_COUNT(counts[EXTRACT(array,2)],inquery[EXTRACT(array,2)]); /* 44 */ - INCR_COUNT(counts[EXTRACT(array,3)],inquery[EXTRACT(array,3)]); /* 60 */ -#endif - debug(printf("12 %04X => %d\n",EXTRACT(array,0),counts[EXTRACT(array,0)])); - debug(printf("28 %04X => %d\n",EXTRACT(array,1),counts[EXTRACT(array,1)])); - debug(printf("44 %04X => %d\n",EXTRACT(array,2),counts[EXTRACT(array,2)])); - debug(printf("60 %04X => %d\n",EXTRACT(array,3),counts[EXTRACT(array,3)])); + masked = (low_rev >> 20) & MASK5; /* 17 */ + INCR_COUNT(counts[masked]); + debug(printf("17 %04X => %d\n",masked,counts[masked])); -#ifdef HAVE_SSE4_1 - array = _mm_and_si128( _mm_srli_epi32(oligo,4), mask6); -#else - _mm_store_si128((__m128i *) array, _mm_and_si128( _mm_srli_epi32(oligo,4), mask6)); -#endif -#ifdef HAVE_AVX2 - counts[EXTRACT(array,0)] += 1; - counts[EXTRACT(array,1)] += 1; - counts[EXTRACT(array,2)] += 1; - counts[EXTRACT(array,3)] += 1; -#ifdef CHECK_FOR_OVERFLOW - _counts_after = _mm_i32gather_epi32(counts,array,/*scale*/4); - _counts_neg = _mm_cmpgt_epi32(_counts_after,maxcount128); - if (_mm_testz_si128(_counts_neg,_counts_neg) == 0) { - debug(printf("Exceeded maximum counts\n")); - if (EXTRACT(_counts_neg,0)) { - inquery[EXTRACT(array,0)] = INQUERY_FALSE; - } - if (EXTRACT(_counts_neg,1)) { - inquery[EXTRACT(array,1)] = INQUERY_FALSE; - } - if (EXTRACT(_counts_neg,2)) { - inquery[EXTRACT(array,2)] = INQUERY_FALSE; - } - if (EXTRACT(_counts_neg,3)) { - inquery[EXTRACT(array,3)] = INQUERY_FALSE; - } - } -#endif /* CHECK_FOR_OVERFLOW */ - debug(printf("13 %04X => %d\n",array[0],counts[array[0]])); - debug(printf("29 %04X => %d\n",array[1],counts[array[1]])); - debug(printf("45 %04X => %d\n",array[2],counts[array[2]])); - debug(printf("61 %04X => %d\n",array[3],counts[array[3]])); - -#else - INCR_COUNT(counts[EXTRACT(array,0)],inquery[EXTRACT(array,0)]); /* 13 */ - INCR_COUNT(counts[EXTRACT(array,1)],inquery[EXTRACT(array,1)]); /* 29 */ - INCR_COUNT(counts[EXTRACT(array,2)],inquery[EXTRACT(array,2)]); /* 45 */ - INCR_COUNT(counts[EXTRACT(array,3)],inquery[EXTRACT(array,3)]); /* 61 */ -#endif - debug(printf("13 %04X => %d\n",EXTRACT(array,0),counts[EXTRACT(array,0)])); - debug(printf("29 %04X => %d\n",EXTRACT(array,1),counts[EXTRACT(array,1)])); - debug(printf("45 %04X => %d\n",EXTRACT(array,2),counts[EXTRACT(array,2)])); - debug(printf("61 %04X => %d\n",EXTRACT(array,3),counts[EXTRACT(array,3)])); + masked = low_rev >> 22; /* 16, No mask necessary */ + INCR_COUNT(counts[masked]); + debug(printf("16 %04X => %d\n",masked,counts[masked])); -#ifdef HAVE_SSE4_1 - array = _mm_and_si128( _mm_srli_epi32(oligo,2), mask6); #else - _mm_store_si128((__m128i *) array, _mm_and_si128( _mm_srli_epi32(oligo,2), mask6)); -#endif -#ifdef HAVE_AVX2 - counts[EXTRACT(array,0)] += 1; - counts[EXTRACT(array,1)] += 1; - counts[EXTRACT(array,2)] += 1; - counts[EXTRACT(array,3)] += 1; -#ifdef CHECK_FOR_OVERFLOW - _counts_after = _mm_i32gather_epi32(counts,array,/*scale*/4); - _counts_neg = _mm_cmpgt_epi32(_counts_after,maxcount128); - if (_mm_testz_si128(_counts_neg,_counts_neg) == 0) { - debug(printf("Exceeded maximum counts\n")); - if (EXTRACT(_counts_neg,0)) { - inquery[EXTRACT(array,0)] = INQUERY_FALSE; - } - if (EXTRACT(_counts_neg,1)) { - inquery[EXTRACT(array,1)] = INQUERY_FALSE; - } - if (EXTRACT(_counts_neg,2)) { - inquery[EXTRACT(array,2)] = INQUERY_FALSE; - } - if (EXTRACT(_counts_neg,3)) { - inquery[EXTRACT(array,3)] = INQUERY_FALSE; - } - } -#endif /* CHECK_FOR_OVERFLOW */ - debug(printf("14 %04X => %d\n",array[0],counts[array[0]])); - debug(printf("30 %04X => %d\n",array[1],counts[array[1]])); - debug(printf("46 %04X => %d\n",array[2],counts[array[2]])); - debug(printf("62 %04X => %d\n",array[3],counts[array[3]])); - -#else - INCR_COUNT(counts[EXTRACT(array,0)],inquery[EXTRACT(array,0)]); /* 14 */ - INCR_COUNT(counts[EXTRACT(array,1)],inquery[EXTRACT(array,1)]); /* 30 */ - INCR_COUNT(counts[EXTRACT(array,2)],inquery[EXTRACT(array,2)]); /* 46 */ - INCR_COUNT(counts[EXTRACT(array,3)],inquery[EXTRACT(array,3)]); /* 62 */ -#endif - debug(printf("14 %04X => %d\n",EXTRACT(array,0),counts[EXTRACT(array,0)])); - debug(printf("30 %04X => %d\n",EXTRACT(array,1),counts[EXTRACT(array,1)])); - debug(printf("46 %04X => %d\n",EXTRACT(array,2),counts[EXTRACT(array,2)])); - debug(printf("62 %04X => %d\n",EXTRACT(array,3),counts[EXTRACT(array,3)])); - -#ifdef HAVE_SSE4_1 - array = _mm_and_si128( oligo, mask6); + _oligo = _mm_setr_epi32(low_rev, low_rev >> 2, low_rev >> 4, low_rev >> 6); +#ifdef SIMD_MASK_THEN_STORE + _mm_store_si128((__m128i *) _masked,_mm_and_si128(_oligo, mask5)); #else - _mm_store_si128((__m128i *) array, _mm_and_si128( oligo, mask6)); + _masked = _mm_and_si128(_oligo, mask5); #endif -#ifdef HAVE_AVX2 - counts[EXTRACT(array,0)] += 1; - counts[EXTRACT(array,1)] += 1; - counts[EXTRACT(array,2)] += 1; - counts[EXTRACT(array,3)] += 1; -#ifdef CHECK_FOR_OVERFLOW - _counts_after = _mm_i32gather_epi32(counts,array,/*scale*/4); - _counts_neg = _mm_cmpgt_epi32(_counts_after,maxcount128); - if (_mm_testz_si128(_counts_neg,_counts_neg) == 0) { - debug(printf("Exceeded maximum counts\n")); - if (EXTRACT(_counts_neg,0)) { - inquery[EXTRACT(array,0)] = INQUERY_FALSE; - } - if (EXTRACT(_counts_neg,1)) { - inquery[EXTRACT(array,1)] = INQUERY_FALSE; - } - if (EXTRACT(_counts_neg,2)) { - inquery[EXTRACT(array,2)] = INQUERY_FALSE; - } - if (EXTRACT(_counts_neg,3)) { - inquery[EXTRACT(array,3)] = INQUERY_FALSE; - } - } -#endif /* CHECK_FOR_OVERFLOW */ - debug(printf("15 %04X => %d\n",array[0],counts[array[0]])); - debug(printf("31 %04X => %d\n",array[1],counts[array[1]])); - debug(printf("47 %04X => %d\n",array[2],counts[array[2]])); - debug(printf("63 %04X => %d\n",array[3],counts[array[3]])); - -#else - INCR_COUNT(counts[EXTRACT(array,0)],inquery[EXTRACT(array,0)]); /* 15 */ - INCR_COUNT(counts[EXTRACT(array,1)],inquery[EXTRACT(array,1)]); /* 31 */ - INCR_COUNT(counts[EXTRACT(array,2)],inquery[EXTRACT(array,2)]); /* 47 */ - INCR_COUNT(counts[EXTRACT(array,3)],inquery[EXTRACT(array,3)]); /* 63 */ -#endif - debug(printf("15 %04X => %d\n",EXTRACT(array,0),counts[EXTRACT(array,0)])); - debug(printf("31 %04X => %d\n",EXTRACT(array,1),counts[EXTRACT(array,1)])); - debug(printf("47 %04X => %d\n",EXTRACT(array,2),counts[EXTRACT(array,2)])); - debug(printf("63 %04X => %d\n",EXTRACT(array,3),counts[EXTRACT(array,3)])); - return; -} + masked = EXTRACT(_masked,0); + INCR_COUNT(counts[masked]); + debug(printf("27 %04X => %d\n",masked,counts[masked])); -#endif + masked = EXTRACT(_masked,1); + INCR_COUNT(counts[masked]); + debug(printf("26 %04X => %d\n",masked,counts[masked])); + + masked = EXTRACT(_masked,2); + INCR_COUNT(counts[masked]); + debug(printf("25 %04X => %d\n",masked,counts[masked])); + masked = EXTRACT(_masked,3); + INCR_COUNT(counts[masked]); + debug(printf("24 %04X => %d\n",masked,counts[masked])); -#ifdef HAVE_AVX2 -static void -count_6mers_fwd_simd_128 (Count_T *counts, Inquery_T *inquery, __m256i current, __m256i next) { - __m256i oligo; - __m256i array; -#ifdef CHECK_FOR_OVERFLOW - __m256i _counts_after, _counts_neg; + + _oligo = _mm_srli_epi32(_oligo, 8); +#ifdef SIMD_MASK_THEN_STORE + _mm_store_si128((__m128i *) _masked,_mm_and_si128(_oligo, mask5)); +#else + _masked = _mm_and_si128(_oligo, mask5); #endif - array = _mm256_srli_epi32(current,20); /* No mask necessary */; - counts[EXTRACT256(array,0)] += 1; /* 0 */ - counts[EXTRACT256(array,1)] += 1; /* 16 */ - counts[EXTRACT256(array,2)] += 1; /* 32 */ - counts[EXTRACT256(array,3)] += 1; /* 48 */ - counts[EXTRACT256(array,4)] += 1; /* 64 */ - counts[EXTRACT256(array,5)] += 1; /* 80 */ - counts[EXTRACT256(array,6)] += 1; /* 96 */ - counts[EXTRACT256(array,7)] += 1; /* 112 */ -#ifdef CHECK_FOR_OVERFLOW - _counts_after = _mm256_i32gather_epi32(counts,array,/*scale*/4); - _counts_neg = _mm256_cmpgt_epi32(_counts_after,maxcount256); - if (_mm256_testz_si256(_counts_neg,_counts_neg) == 0) { - debug(printf("Exceeded maximum counts\n")); - if (EXTRACT256(_counts_neg,0)) { - inquery[EXTRACT256(array,0)] = INQUERY_FALSE; - } - if (EXTRACT256(_counts_neg,1)) { - inquery[EXTRACT256(array,1)] = INQUERY_FALSE; - } - if (EXTRACT256(_counts_neg,2)) { - inquery[EXTRACT256(array,2)] = INQUERY_FALSE; - } - if (EXTRACT256(_counts_neg,3)) { - inquery[EXTRACT256(array,3)] = INQUERY_FALSE; - } - if (EXTRACT256(_counts_neg,4)) { - inquery[EXTRACT256(array,4)] = INQUERY_FALSE; - } - if (EXTRACT256(_counts_neg,5)) { - inquery[EXTRACT256(array,5)] = INQUERY_FALSE; - } - if (EXTRACT256(_counts_neg,6)) { - inquery[EXTRACT256(array,6)] = INQUERY_FALSE; - } - if (EXTRACT256(_counts_neg,7)) { - inquery[EXTRACT256(array,7)] = INQUERY_FALSE; - } - } -#endif /* CHECK_FOR_OVERFLOW */ - debug(printf("0 %04X => %d\n",EXTRACT256(array,0),counts[EXTRACT256(array,0)])); - debug(printf("16 %04X => %d\n",EXTRACT256(array,1),counts[EXTRACT256(array,1)])); - debug(printf("32 %04X => %d\n",EXTRACT256(array,2),counts[EXTRACT256(array,2)])); - debug(printf("48 %04X => %d\n",EXTRACT256(array,3),counts[EXTRACT256(array,3)])); - debug(printf("0 %04X => %d\n",EXTRACT256(array,4),counts[EXTRACT256(array,4)])); - debug(printf("16 %04X => %d\n",EXTRACT256(array,5),counts[EXTRACT256(array,5)])); - debug(printf("32 %04X => %d\n",EXTRACT256(array,6),counts[EXTRACT256(array,6)])); - debug(printf("48 %04X => %d\n",EXTRACT256(array,7),counts[EXTRACT256(array,7)])); - - array = _mm256_and_si256( _mm256_srli_epi32(current,18), bigmask6); - counts[EXTRACT256(array,0)] += 1; /* 1 */ - counts[EXTRACT256(array,1)] += 1; /* 17 */ - counts[EXTRACT256(array,2)] += 1; /* 33 */ - counts[EXTRACT256(array,3)] += 1; /* 49 */ - counts[EXTRACT256(array,4)] += 1; /* 65 */ - counts[EXTRACT256(array,5)] += 1; /* 81 */ - counts[EXTRACT256(array,6)] += 1; /* 97 */ - counts[EXTRACT256(array,7)] += 1; /* 113 */ -#ifdef CHECK_FOR_OVERFLOW - _counts_after = _mm256_i32gather_epi32(counts,array,/*scale*/4); - _counts_neg = _mm256_cmpgt_epi32(_counts_after,maxcount256); - if (_mm256_testz_si256(_counts_neg,_counts_neg) == 0) { - debug(printf("Exceeded maximum counts\n")); - if (EXTRACT256(_counts_neg,0)) { - inquery[EXTRACT256(array,0)] = INQUERY_FALSE; - } - if (EXTRACT256(_counts_neg,1)) { - inquery[EXTRACT256(array,1)] = INQUERY_FALSE; - } - if (EXTRACT256(_counts_neg,2)) { - inquery[EXTRACT256(array,2)] = INQUERY_FALSE; - } - if (EXTRACT256(_counts_neg,3)) { - inquery[EXTRACT256(array,3)] = INQUERY_FALSE; - } - if (EXTRACT256(_counts_neg,4)) { - inquery[EXTRACT256(array,4)] = INQUERY_FALSE; - } - if (EXTRACT256(_counts_neg,5)) { - inquery[EXTRACT256(array,5)] = INQUERY_FALSE; - } - if (EXTRACT256(_counts_neg,6)) { - inquery[EXTRACT256(array,6)] = INQUERY_FALSE; - } - if (EXTRACT256(_counts_neg,7)) { - inquery[EXTRACT256(array,7)] = INQUERY_FALSE; - } - } -#endif /* CHECK_FOR_OVERFLOW */ - debug(printf("1 %04X => %d\n",EXTRACT256(array,0),counts[EXTRACT256(array,0)])); - debug(printf("17 %04X => %d\n",EXTRACT256(array,1),counts[EXTRACT256(array,1)])); - debug(printf("33 %04X => %d\n",EXTRACT256(array,2),counts[EXTRACT256(array,2)])); - debug(printf("49 %04X => %d\n",EXTRACT256(array,3),counts[EXTRACT256(array,3)])); - debug(printf("1 %04X => %d\n",EXTRACT256(array,4),counts[EXTRACT256(array,4)])); - debug(printf("17 %04X => %d\n",EXTRACT256(array,5),counts[EXTRACT256(array,5)])); - debug(printf("33 %04X => %d\n",EXTRACT256(array,6),counts[EXTRACT256(array,6)])); - debug(printf("49 %04X => %d\n",EXTRACT256(array,7),counts[EXTRACT256(array,7)])); - - array = _mm256_and_si256( _mm256_srli_epi32(current,16), bigmask6); - counts[EXTRACT256(array,0)] += 1; /* 2 */ - counts[EXTRACT256(array,1)] += 1; /* 18 */ - counts[EXTRACT256(array,2)] += 1; /* 34 */ - counts[EXTRACT256(array,3)] += 1; /* 50 */ - counts[EXTRACT256(array,4)] += 1; /* 66 */ - counts[EXTRACT256(array,5)] += 1; /* 82 */ - counts[EXTRACT256(array,6)] += 1; /* 98 */ - counts[EXTRACT256(array,7)] += 1; /* 114 */ -#ifdef CHECK_FOR_OVERFLOW - _counts_after = _mm256_i32gather_epi32(counts,array,/*scale*/4); - _counts_neg = _mm256_cmpgt_epi32(_counts_after,maxcount256); - if (_mm256_testz_si256(_counts_neg,_counts_neg) == 0) { - debug(printf("Exceeded maximum counts\n")); - if (EXTRACT256(_counts_neg,0)) { - inquery[EXTRACT256(array,0)] = INQUERY_FALSE; - } - if (EXTRACT256(_counts_neg,1)) { - inquery[EXTRACT256(array,1)] = INQUERY_FALSE; - } - if (EXTRACT256(_counts_neg,2)) { - inquery[EXTRACT256(array,2)] = INQUERY_FALSE; - } - if (EXTRACT256(_counts_neg,3)) { - inquery[EXTRACT256(array,3)] = INQUERY_FALSE; - } - if (EXTRACT256(_counts_neg,4)) { - inquery[EXTRACT256(array,4)] = INQUERY_FALSE; - } - if (EXTRACT256(_counts_neg,5)) { - inquery[EXTRACT256(array,5)] = INQUERY_FALSE; - } - if (EXTRACT256(_counts_neg,6)) { - inquery[EXTRACT256(array,6)] = INQUERY_FALSE; - } - if (EXTRACT256(_counts_neg,7)) { - inquery[EXTRACT256(array,7)] = INQUERY_FALSE; - } - } -#endif /* CHECK_FOR_OVERFLOW */ - debug(printf("2 %04X => %d\n",EXTRACT256(array,0),counts[EXTRACT256(array,0)])); - debug(printf("18 %04X => %d\n",EXTRACT256(array,1),counts[EXTRACT256(array,1)])); - debug(printf("34 %04X => %d\n",EXTRACT256(array,2),counts[EXTRACT256(array,2)])); - debug(printf("50 %04X => %d\n",EXTRACT256(array,3),counts[EXTRACT256(array,3)])); - debug(printf("2 %04X => %d\n",EXTRACT256(array,4),counts[EXTRACT256(array,4)])); - debug(printf("18 %04X => %d\n",EXTRACT256(array,5),counts[EXTRACT256(array,5)])); - debug(printf("34 %04X => %d\n",EXTRACT256(array,6),counts[EXTRACT256(array,6)])); - debug(printf("50 %04X => %d\n",EXTRACT256(array,7),counts[EXTRACT256(array,7)])); - - array = _mm256_and_si256( _mm256_srli_epi32(current,14), bigmask6); - counts[EXTRACT256(array,0)] += 1; /* 3 */ - counts[EXTRACT256(array,1)] += 1; /* 19 */ - counts[EXTRACT256(array,2)] += 1; /* 35 */ - counts[EXTRACT256(array,3)] += 1; /* 51 */ - counts[EXTRACT256(array,4)] += 1; /* 67 */ - counts[EXTRACT256(array,5)] += 1; /* 83 */ - counts[EXTRACT256(array,6)] += 1; /* 99 */ - counts[EXTRACT256(array,7)] += 1; /* 115 */ -#ifdef CHECK_FOR_OVERFLOW - _counts_after = _mm256_i32gather_epi32(counts,array,/*scale*/4); - _counts_neg = _mm256_cmpgt_epi32(_counts_after,maxcount256); - if (_mm256_testz_si256(_counts_neg,_counts_neg) == 0) { - debug(printf("Exceeded maximum counts\n")); - if (EXTRACT256(_counts_neg,0)) { - inquery[EXTRACT256(array,0)] = INQUERY_FALSE; - } - if (EXTRACT256(_counts_neg,1)) { - inquery[EXTRACT256(array,1)] = INQUERY_FALSE; - } - if (EXTRACT256(_counts_neg,2)) { - inquery[EXTRACT256(array,2)] = INQUERY_FALSE; - } - if (EXTRACT256(_counts_neg,3)) { - inquery[EXTRACT256(array,3)] = INQUERY_FALSE; - } - if (EXTRACT256(_counts_neg,4)) { - inquery[EXTRACT256(array,4)] = INQUERY_FALSE; - } - if (EXTRACT256(_counts_neg,5)) { - inquery[EXTRACT256(array,5)] = INQUERY_FALSE; - } - if (EXTRACT256(_counts_neg,6)) { - inquery[EXTRACT256(array,6)] = INQUERY_FALSE; - } - if (EXTRACT256(_counts_neg,7)) { - inquery[EXTRACT256(array,7)] = INQUERY_FALSE; - } - } -#endif /* CHECK_FOR_OVERFLOW */ - debug(printf("3 %04X => %d\n",EXTRACT256(array,0),counts[EXTRACT256(array,0)])); - debug(printf("19 %04X => %d\n",EXTRACT256(array,1),counts[EXTRACT256(array,1)])); - debug(printf("35 %04X => %d\n",EXTRACT256(array,2),counts[EXTRACT256(array,2)])); - debug(printf("51 %04X => %d\n",EXTRACT256(array,3),counts[EXTRACT256(array,3)])); - debug(printf("3 %04X => %d\n",EXTRACT256(array,4),counts[EXTRACT256(array,4)])); - debug(printf("19 %04X => %d\n",EXTRACT256(array,5),counts[EXTRACT256(array,5)])); - debug(printf("35 %04X => %d\n",EXTRACT256(array,6),counts[EXTRACT256(array,6)])); - debug(printf("51 %04X => %d\n",EXTRACT256(array,7),counts[EXTRACT256(array,7)])); - - array = _mm256_and_si256( _mm256_srli_epi32(current,12), bigmask6); - counts[EXTRACT256(array,0)] += 1; /* 4 */ - counts[EXTRACT256(array,1)] += 1; /* 20 */ - counts[EXTRACT256(array,2)] += 1; /* 36 */ - counts[EXTRACT256(array,3)] += 1; /* 52 */ - counts[EXTRACT256(array,4)] += 1; /* 68 */ - counts[EXTRACT256(array,5)] += 1; /* 84 */ - counts[EXTRACT256(array,6)] += 1; /* 100 */ - counts[EXTRACT256(array,7)] += 1; /* 116 */ -#ifdef CHECK_FOR_OVERFLOW - _counts_after = _mm256_i32gather_epi32(counts,array,/*scale*/4); - _counts_neg = _mm256_cmpgt_epi32(_counts_after,maxcount256); - if (_mm256_testz_si256(_counts_neg,_counts_neg) == 0) { - debug(printf("Exceeded maximum counts\n")); - if (EXTRACT256(_counts_neg,0)) { - inquery[EXTRACT256(array,0)] = INQUERY_FALSE; - } - if (EXTRACT256(_counts_neg,1)) { - inquery[EXTRACT256(array,1)] = INQUERY_FALSE; - } - if (EXTRACT256(_counts_neg,2)) { - inquery[EXTRACT256(array,2)] = INQUERY_FALSE; - } - if (EXTRACT256(_counts_neg,3)) { - inquery[EXTRACT256(array,3)] = INQUERY_FALSE; - } - if (EXTRACT256(_counts_neg,4)) { - inquery[EXTRACT256(array,4)] = INQUERY_FALSE; - } - if (EXTRACT256(_counts_neg,5)) { - inquery[EXTRACT256(array,5)] = INQUERY_FALSE; - } - if (EXTRACT256(_counts_neg,6)) { - inquery[EXTRACT256(array,6)] = INQUERY_FALSE; - } - if (EXTRACT256(_counts_neg,7)) { - inquery[EXTRACT256(array,7)] = INQUERY_FALSE; - } - } -#endif /* CHECK_FOR_OVERFLOW */ - debug(printf("4 %04X => %d\n",EXTRACT256(array,0),counts[EXTRACT256(array,0)])); - debug(printf("20 %04X => %d\n",EXTRACT256(array,1),counts[EXTRACT256(array,1)])); - debug(printf("36 %04X => %d\n",EXTRACT256(array,2),counts[EXTRACT256(array,2)])); - debug(printf("52 %04X => %d\n",EXTRACT256(array,3),counts[EXTRACT256(array,3)])); - debug(printf("4 %04X => %d\n",EXTRACT256(array,4),counts[EXTRACT256(array,4)])); - debug(printf("20 %04X => %d\n",EXTRACT256(array,5),counts[EXTRACT256(array,5)])); - debug(printf("36 %04X => %d\n",EXTRACT256(array,6),counts[EXTRACT256(array,6)])); - debug(printf("52 %04X => %d\n",EXTRACT256(array,7),counts[EXTRACT256(array,7)])); - - array = _mm256_and_si256( _mm256_srli_epi32(current,10), bigmask6); - counts[EXTRACT256(array,0)] += 1; /* 5 */ - counts[EXTRACT256(array,1)] += 1; /* 21 */ - counts[EXTRACT256(array,2)] += 1; /* 37 */ - counts[EXTRACT256(array,3)] += 1; /* 53 */ - counts[EXTRACT256(array,4)] += 1; /* 69 */ - counts[EXTRACT256(array,5)] += 1; /* 85 */ - counts[EXTRACT256(array,6)] += 1; /* 101 */ - counts[EXTRACT256(array,7)] += 1; /* 117 */ -#ifdef CHECK_FOR_OVERFLOW - _counts_after = _mm256_i32gather_epi32(counts,array,/*scale*/4); - _counts_neg = _mm256_cmpgt_epi32(_counts_after,maxcount256); - if (_mm256_testz_si256(_counts_neg,_counts_neg) == 0) { - debug(printf("Exceeded maximum counts\n")); - if (EXTRACT256(_counts_neg,0)) { - inquery[EXTRACT256(array,0)] = INQUERY_FALSE; - } - if (EXTRACT256(_counts_neg,1)) { - inquery[EXTRACT256(array,1)] = INQUERY_FALSE; - } - if (EXTRACT256(_counts_neg,2)) { - inquery[EXTRACT256(array,2)] = INQUERY_FALSE; - } - if (EXTRACT256(_counts_neg,3)) { - inquery[EXTRACT256(array,3)] = INQUERY_FALSE; - } - if (EXTRACT256(_counts_neg,4)) { - inquery[EXTRACT256(array,4)] = INQUERY_FALSE; - } - if (EXTRACT256(_counts_neg,5)) { - inquery[EXTRACT256(array,5)] = INQUERY_FALSE; - } - if (EXTRACT256(_counts_neg,6)) { - inquery[EXTRACT256(array,6)] = INQUERY_FALSE; - } - if (EXTRACT256(_counts_neg,7)) { - inquery[EXTRACT256(array,7)] = INQUERY_FALSE; - } - } -#endif /* CHECK_FOR_OVERFLOW */ - debug(printf("5 %04X => %d\n",EXTRACT256(array,0),counts[EXTRACT256(array,0)])); - debug(printf("21 %04X => %d\n",EXTRACT256(array,1),counts[EXTRACT256(array,1)])); - debug(printf("37 %04X => %d\n",EXTRACT256(array,2),counts[EXTRACT256(array,2)])); - debug(printf("53 %04X => %d\n",EXTRACT256(array,3),counts[EXTRACT256(array,3)])); - debug(printf("5 %04X => %d\n",EXTRACT256(array,4),counts[EXTRACT256(array,4)])); - debug(printf("21 %04X => %d\n",EXTRACT256(array,5),counts[EXTRACT256(array,5)])); - debug(printf("37 %04X => %d\n",EXTRACT256(array,6),counts[EXTRACT256(array,6)])); - debug(printf("53 %04X => %d\n",EXTRACT256(array,7),counts[EXTRACT256(array,7)])); - - array = _mm256_and_si256( _mm256_srli_epi32(current,8), bigmask6); - counts[EXTRACT256(array,0)] += 1; /* 6 */ - counts[EXTRACT256(array,1)] += 1; /* 22 */ - counts[EXTRACT256(array,2)] += 1; /* 38 */ - counts[EXTRACT256(array,3)] += 1; /* 54 */ - counts[EXTRACT256(array,4)] += 1; /* 70 */ - counts[EXTRACT256(array,5)] += 1; /* 86 */ - counts[EXTRACT256(array,6)] += 1; /* 102 */ - counts[EXTRACT256(array,7)] += 1; /* 118 */ -#ifdef CHECK_FOR_OVERFLOW - _counts_after = _mm256_i32gather_epi32(counts,array,/*scale*/4); - _counts_neg = _mm256_cmpgt_epi32(_counts_after,maxcount256); - if (_mm256_testz_si256(_counts_neg,_counts_neg) == 0) { - debug(printf("Exceeded maximum counts\n")); - if (EXTRACT256(_counts_neg,0)) { - inquery[EXTRACT256(array,0)] = INQUERY_FALSE; - } - if (EXTRACT256(_counts_neg,1)) { - inquery[EXTRACT256(array,1)] = INQUERY_FALSE; - } - if (EXTRACT256(_counts_neg,2)) { - inquery[EXTRACT256(array,2)] = INQUERY_FALSE; - } - if (EXTRACT256(_counts_neg,3)) { - inquery[EXTRACT256(array,3)] = INQUERY_FALSE; - } - if (EXTRACT256(_counts_neg,4)) { - inquery[EXTRACT256(array,4)] = INQUERY_FALSE; - } - if (EXTRACT256(_counts_neg,5)) { - inquery[EXTRACT256(array,5)] = INQUERY_FALSE; - } - if (EXTRACT256(_counts_neg,6)) { - inquery[EXTRACT256(array,6)] = INQUERY_FALSE; - } - if (EXTRACT256(_counts_neg,7)) { - inquery[EXTRACT256(array,7)] = INQUERY_FALSE; - } - } -#endif /* CHECK_FOR_OVERFLOW */ - debug(printf("6 %04X => %d\n",EXTRACT256(array,0),counts[EXTRACT256(array,0)])); - debug(printf("22 %04X => %d\n",EXTRACT256(array,1),counts[EXTRACT256(array,1)])); - debug(printf("38 %04X => %d\n",EXTRACT256(array,2),counts[EXTRACT256(array,2)])); - debug(printf("54 %04X => %d\n",EXTRACT256(array,3),counts[EXTRACT256(array,3)])); - debug(printf("6 %04X => %d\n",EXTRACT256(array,4),counts[EXTRACT256(array,4)])); - debug(printf("22 %04X => %d\n",EXTRACT256(array,5),counts[EXTRACT256(array,5)])); - debug(printf("38 %04X => %d\n",EXTRACT256(array,6),counts[EXTRACT256(array,6)])); - debug(printf("54 %04X => %d\n",EXTRACT256(array,7),counts[EXTRACT256(array,7)])); - - array = _mm256_and_si256( _mm256_srli_epi32(current,6), bigmask6); - counts[EXTRACT256(array,0)] += 1; /* 7 */ - counts[EXTRACT256(array,1)] += 1; /* 23 */ - counts[EXTRACT256(array,2)] += 1; /* 39 */ - counts[EXTRACT256(array,3)] += 1; /* 55 */ - counts[EXTRACT256(array,4)] += 1; /* 71 */ - counts[EXTRACT256(array,5)] += 1; /* 87 */ - counts[EXTRACT256(array,6)] += 1; /* 103 */ - counts[EXTRACT256(array,7)] += 1; /* 119 */ -#ifdef CHECK_FOR_OVERFLOW - _counts_after = _mm256_i32gather_epi32(counts,array,/*scale*/4); - _counts_neg = _mm256_cmpgt_epi32(_counts_after,maxcount256); - if (_mm256_testz_si256(_counts_neg,_counts_neg) == 0) { - debug(printf("Exceeded maximum counts\n")); - if (EXTRACT256(_counts_neg,0)) { - inquery[EXTRACT256(array,0)] = INQUERY_FALSE; - } - if (EXTRACT256(_counts_neg,1)) { - inquery[EXTRACT256(array,1)] = INQUERY_FALSE; - } - if (EXTRACT256(_counts_neg,2)) { - inquery[EXTRACT256(array,2)] = INQUERY_FALSE; - } - if (EXTRACT256(_counts_neg,3)) { - inquery[EXTRACT256(array,3)] = INQUERY_FALSE; - } - if (EXTRACT256(_counts_neg,4)) { - inquery[EXTRACT256(array,4)] = INQUERY_FALSE; - } - if (EXTRACT256(_counts_neg,5)) { - inquery[EXTRACT256(array,5)] = INQUERY_FALSE; - } - if (EXTRACT256(_counts_neg,6)) { - inquery[EXTRACT256(array,6)] = INQUERY_FALSE; - } - if (EXTRACT256(_counts_neg,7)) { - inquery[EXTRACT256(array,7)] = INQUERY_FALSE; - } - } -#endif /* CHECK_FOR_OVERFLOW */ - debug(printf("7 %04X => %d\n",EXTRACT256(array,0),counts[EXTRACT256(array,0)])); - debug(printf("23 %04X => %d\n",EXTRACT256(array,1),counts[EXTRACT256(array,1)])); - debug(printf("39 %04X => %d\n",EXTRACT256(array,2),counts[EXTRACT256(array,2)])); - debug(printf("55 %04X => %d\n",EXTRACT256(array,3),counts[EXTRACT256(array,3)])); - debug(printf("7 %04X => %d\n",EXTRACT256(array,4),counts[EXTRACT256(array,4)])); - debug(printf("23 %04X => %d\n",EXTRACT256(array,5),counts[EXTRACT256(array,5)])); - debug(printf("39 %04X => %d\n",EXTRACT256(array,6),counts[EXTRACT256(array,6)])); - debug(printf("55 %04X => %d\n",EXTRACT256(array,7),counts[EXTRACT256(array,7)])); - - array = _mm256_and_si256( _mm256_srli_epi32(current,4), bigmask6); - counts[EXTRACT256(array,0)] += 1; /* 8 */ - counts[EXTRACT256(array,1)] += 1; /* 24 */ - counts[EXTRACT256(array,2)] += 1; /* 40 */ - counts[EXTRACT256(array,3)] += 1; /* 56 */ - counts[EXTRACT256(array,4)] += 1; /* 72 */ - counts[EXTRACT256(array,5)] += 1; /* 88 */ - counts[EXTRACT256(array,6)] += 1; /* 104 */ - counts[EXTRACT256(array,7)] += 1; /* 120 */ -#ifdef CHECK_FOR_OVERFLOW - _counts_after = _mm256_i32gather_epi32(counts,array,/*scale*/4); - _counts_neg = _mm256_cmpgt_epi32(_counts_after,maxcount256); - if (_mm256_testz_si256(_counts_neg,_counts_neg) == 0) { - debug(printf("Exceeded maximum counts\n")); - if (EXTRACT256(_counts_neg,0)) { - inquery[EXTRACT256(array,0)] = INQUERY_FALSE; - } - if (EXTRACT256(_counts_neg,1)) { - inquery[EXTRACT256(array,1)] = INQUERY_FALSE; - } - if (EXTRACT256(_counts_neg,2)) { - inquery[EXTRACT256(array,2)] = INQUERY_FALSE; - } - if (EXTRACT256(_counts_neg,3)) { - inquery[EXTRACT256(array,3)] = INQUERY_FALSE; - } - if (EXTRACT256(_counts_neg,4)) { - inquery[EXTRACT256(array,4)] = INQUERY_FALSE; - } - if (EXTRACT256(_counts_neg,5)) { - inquery[EXTRACT256(array,5)] = INQUERY_FALSE; - } - if (EXTRACT256(_counts_neg,6)) { - inquery[EXTRACT256(array,6)] = INQUERY_FALSE; - } - if (EXTRACT256(_counts_neg,7)) { - inquery[EXTRACT256(array,7)] = INQUERY_FALSE; - } - } -#endif /* CHECK_FOR_OVERFLOW */ - debug(printf("8 %04X => %d\n",EXTRACT256(array,0),counts[EXTRACT256(array,0)])); - debug(printf("24 %04X => %d\n",EXTRACT256(array,1),counts[EXTRACT256(array,1)])); - debug(printf("40 %04X => %d\n",EXTRACT256(array,2),counts[EXTRACT256(array,2)])); - debug(printf("56 %04X => %d\n",EXTRACT256(array,3),counts[EXTRACT256(array,3)])); - debug(printf("8 %04X => %d\n",EXTRACT256(array,4),counts[EXTRACT256(array,4)])); - debug(printf("24 %04X => %d\n",EXTRACT256(array,5),counts[EXTRACT256(array,5)])); - debug(printf("40 %04X => %d\n",EXTRACT256(array,6),counts[EXTRACT256(array,6)])); - debug(printf("56 %04X => %d\n",EXTRACT256(array,7),counts[EXTRACT256(array,7)])); - - array = _mm256_and_si256( _mm256_srli_epi32(current,2), bigmask6); - counts[EXTRACT256(array,0)] += 1; /* 9 */ - counts[EXTRACT256(array,1)] += 1; /* 25 */ - counts[EXTRACT256(array,2)] += 1; /* 41 */ - counts[EXTRACT256(array,3)] += 1; /* 57 */ - counts[EXTRACT256(array,4)] += 1; /* 73 */ - counts[EXTRACT256(array,5)] += 1; /* 89 */ - counts[EXTRACT256(array,6)] += 1; /* 105 */ - counts[EXTRACT256(array,7)] += 1; /* 121 */ -#ifdef CHECK_FOR_OVERFLOW - _counts_after = _mm256_i32gather_epi32(counts,array,/*scale*/4); - _counts_neg = _mm256_cmpgt_epi32(_counts_after,maxcount256); - if (_mm256_testz_si256(_counts_neg,_counts_neg) == 0) { - debug(printf("Exceeded maximum counts\n")); - if (EXTRACT256(_counts_neg,0)) { - inquery[EXTRACT256(array,0)] = INQUERY_FALSE; - } - if (EXTRACT256(_counts_neg,1)) { - inquery[EXTRACT256(array,1)] = INQUERY_FALSE; - } - if (EXTRACT256(_counts_neg,2)) { - inquery[EXTRACT256(array,2)] = INQUERY_FALSE; - } - if (EXTRACT256(_counts_neg,3)) { - inquery[EXTRACT256(array,3)] = INQUERY_FALSE; - } - if (EXTRACT256(_counts_neg,4)) { - inquery[EXTRACT256(array,4)] = INQUERY_FALSE; - } - if (EXTRACT256(_counts_neg,5)) { - inquery[EXTRACT256(array,5)] = INQUERY_FALSE; - } - if (EXTRACT256(_counts_neg,6)) { - inquery[EXTRACT256(array,6)] = INQUERY_FALSE; - } - if (EXTRACT256(_counts_neg,7)) { - inquery[EXTRACT256(array,7)] = INQUERY_FALSE; - } - } -#endif /* CHECK_FOR_OVERFLOW */ - debug(printf("9 %04X => %d\n",EXTRACT256(array,0),counts[EXTRACT256(array,0)])); - debug(printf("25 %04X => %d\n",EXTRACT256(array,1),counts[EXTRACT256(array,1)])); - debug(printf("41 %04X => %d\n",EXTRACT256(array,2),counts[EXTRACT256(array,2)])); - debug(printf("57 %04X => %d\n",EXTRACT256(array,3),counts[EXTRACT256(array,3)])); - debug(printf("9 %04X => %d\n",EXTRACT256(array,4),counts[EXTRACT256(array,4)])); - debug(printf("25 %04X => %d\n",EXTRACT256(array,5),counts[EXTRACT256(array,5)])); - debug(printf("41 %04X => %d\n",EXTRACT256(array,6),counts[EXTRACT256(array,6)])); - debug(printf("57 %04X => %d\n",EXTRACT256(array,7),counts[EXTRACT256(array,7)])); - - array = _mm256_and_si256( current, bigmask6); - counts[EXTRACT256(array,0)] += 1; /* 10 */ - counts[EXTRACT256(array,1)] += 1; /* 26 */ - counts[EXTRACT256(array,2)] += 1; /* 42 */ - counts[EXTRACT256(array,3)] += 1; /* 58 */ - counts[EXTRACT256(array,4)] += 1; /* 74 */ - counts[EXTRACT256(array,5)] += 1; /* 90 */ - counts[EXTRACT256(array,6)] += 1; /* 106 */ - counts[EXTRACT256(array,7)] += 1; /* 122 */ -#ifdef CHECK_FOR_OVERFLOW - _counts_after = _mm256_i32gather_epi32(counts,array,/*scale*/4); - _counts_neg = _mm256_cmpgt_epi32(_counts_after,maxcount256); - if (_mm256_testz_si256(_counts_neg,_counts_neg) == 0) { - debug(printf("Exceeded maximum counts\n")); - if (EXTRACT256(_counts_neg,0)) { - inquery[EXTRACT256(array,0)] = INQUERY_FALSE; - } - if (EXTRACT256(_counts_neg,1)) { - inquery[EXTRACT256(array,1)] = INQUERY_FALSE; - } - if (EXTRACT256(_counts_neg,2)) { - inquery[EXTRACT256(array,2)] = INQUERY_FALSE; - } - if (EXTRACT256(_counts_neg,3)) { - inquery[EXTRACT256(array,3)] = INQUERY_FALSE; - } - if (EXTRACT256(_counts_neg,4)) { - inquery[EXTRACT256(array,4)] = INQUERY_FALSE; - } - if (EXTRACT256(_counts_neg,5)) { - inquery[EXTRACT256(array,5)] = INQUERY_FALSE; - } - if (EXTRACT256(_counts_neg,6)) { - inquery[EXTRACT256(array,6)] = INQUERY_FALSE; - } - if (EXTRACT256(_counts_neg,7)) { - inquery[EXTRACT256(array,7)] = INQUERY_FALSE; - } - } -#endif /* CHECK_FOR_OVERFLOW */ - debug(printf("10 %04X => %d\n",EXTRACT256(array,0),counts[EXTRACT256(array,0)])); - debug(printf("26 %04X => %d\n",EXTRACT256(array,1),counts[EXTRACT256(array,1)])); - debug(printf("42 %04X => %d\n",EXTRACT256(array,2),counts[EXTRACT256(array,2)])); - debug(printf("58 %04X => %d\n",EXTRACT256(array,3),counts[EXTRACT256(array,3)])); - debug(printf("10 %04X => %d\n",EXTRACT256(array,4),counts[EXTRACT256(array,4)])); - debug(printf("26 %04X => %d\n",EXTRACT256(array,5),counts[EXTRACT256(array,5)])); - debug(printf("42 %04X => %d\n",EXTRACT256(array,6),counts[EXTRACT256(array,6)])); - debug(printf("58 %04X => %d\n",EXTRACT256(array,7),counts[EXTRACT256(array,7)])); + masked = EXTRACT(_masked,0); + INCR_COUNT(counts[masked]); + debug(printf("23 %04X => %d\n",masked,counts[masked])); - oligo = _mm256_or_si256( _mm256_srli_epi32(next,22), _mm256_slli_epi32(current,10)); - array = _mm256_and_si256( _mm256_srli_epi32(oligo,8), bigmask6); - counts[EXTRACT256(array,0)] += 1; /* 11 */ - counts[EXTRACT256(array,1)] += 1; /* 27 */ - counts[EXTRACT256(array,2)] += 1; /* 43 */ - counts[EXTRACT256(array,3)] += 1; /* 59 */ - counts[EXTRACT256(array,4)] += 1; /* 75 */ - counts[EXTRACT256(array,5)] += 1; /* 91 */ - counts[EXTRACT256(array,6)] += 1; /* 107 */ - counts[EXTRACT256(array,7)] += 1; /* 123 */ -#ifdef CHECK_FOR_OVERFLOW - _counts_after = _mm256_i32gather_epi32(counts,array,/*scale*/4); - _counts_neg = _mm256_cmpgt_epi32(_counts_after,maxcount256); - if (_mm256_testz_si256(_counts_neg,_counts_neg) == 0) { - debug(printf("Exceeded maximum counts\n")); - if (EXTRACT256(_counts_neg,0)) { - inquery[EXTRACT256(array,0)] = INQUERY_FALSE; - } - if (EXTRACT256(_counts_neg,1)) { - inquery[EXTRACT256(array,1)] = INQUERY_FALSE; - } - if (EXTRACT256(_counts_neg,2)) { - inquery[EXTRACT256(array,2)] = INQUERY_FALSE; - } - if (EXTRACT256(_counts_neg,3)) { - inquery[EXTRACT256(array,3)] = INQUERY_FALSE; - } - if (EXTRACT256(_counts_neg,4)) { - inquery[EXTRACT256(array,4)] = INQUERY_FALSE; - } - if (EXTRACT256(_counts_neg,5)) { - inquery[EXTRACT256(array,5)] = INQUERY_FALSE; - } - if (EXTRACT256(_counts_neg,6)) { - inquery[EXTRACT256(array,6)] = INQUERY_FALSE; - } - if (EXTRACT256(_counts_neg,7)) { - inquery[EXTRACT256(array,7)] = INQUERY_FALSE; - } - } -#endif /* CHECK_FOR_OVERFLOW */ - debug(printf("11 %04X => %d\n",EXTRACT256(array,0),counts[EXTRACT256(array,0)])); - debug(printf("27 %04X => %d\n",EXTRACT256(array,1),counts[EXTRACT256(array,1)])); - debug(printf("43 %04X => %d\n",EXTRACT256(array,2),counts[EXTRACT256(array,2)])); - debug(printf("59 %04X => %d\n",EXTRACT256(array,3),counts[EXTRACT256(array,3)])); - debug(printf("11 %04X => %d\n",EXTRACT256(array,4),counts[EXTRACT256(array,4)])); - debug(printf("27 %04X => %d\n",EXTRACT256(array,5),counts[EXTRACT256(array,5)])); - debug(printf("43 %04X => %d\n",EXTRACT256(array,6),counts[EXTRACT256(array,6)])); - debug(printf("59 %04X => %d\n",EXTRACT256(array,7),counts[EXTRACT256(array,7)])); - - array = _mm256_and_si256( _mm256_srli_epi32(oligo,6), bigmask6); - counts[EXTRACT256(array,0)] += 1; /* 12 */ - counts[EXTRACT256(array,1)] += 1; /* 28 */ - counts[EXTRACT256(array,2)] += 1; /* 44 */ - counts[EXTRACT256(array,3)] += 1; /* 60 */ - counts[EXTRACT256(array,4)] += 1; /* 76 */ - counts[EXTRACT256(array,5)] += 1; /* 92 */ - counts[EXTRACT256(array,6)] += 1; /* 108 */ - counts[EXTRACT256(array,7)] += 1; /* 124 */ -#ifdef CHECK_FOR_OVERFLOW - _counts_after = _mm256_i32gather_epi32(counts,array,/*scale*/4); - _counts_neg = _mm256_cmpgt_epi32(_counts_after,maxcount256); - if (_mm256_testz_si256(_counts_neg,_counts_neg) == 0) { - debug(printf("Exceeded maximum counts\n")); - if (EXTRACT256(_counts_neg,0)) { - inquery[EXTRACT256(array,0)] = INQUERY_FALSE; - } - if (EXTRACT256(_counts_neg,1)) { - inquery[EXTRACT256(array,1)] = INQUERY_FALSE; - } - if (EXTRACT256(_counts_neg,2)) { - inquery[EXTRACT256(array,2)] = INQUERY_FALSE; - } - if (EXTRACT256(_counts_neg,3)) { - inquery[EXTRACT256(array,3)] = INQUERY_FALSE; - } - if (EXTRACT256(_counts_neg,4)) { - inquery[EXTRACT256(array,4)] = INQUERY_FALSE; - } - if (EXTRACT256(_counts_neg,5)) { - inquery[EXTRACT256(array,5)] = INQUERY_FALSE; - } - if (EXTRACT256(_counts_neg,6)) { - inquery[EXTRACT256(array,6)] = INQUERY_FALSE; - } - if (EXTRACT256(_counts_neg,7)) { - inquery[EXTRACT256(array,7)] = INQUERY_FALSE; - } - } -#endif /* CHECK_FOR_OVERFLOW */ - debug(printf("12 %04X => %d\n",EXTRACT256(array,0),counts[EXTRACT256(array,0)])); - debug(printf("28 %04X => %d\n",EXTRACT256(array,1),counts[EXTRACT256(array,1)])); - debug(printf("44 %04X => %d\n",EXTRACT256(array,2),counts[EXTRACT256(array,2)])); - debug(printf("60 %04X => %d\n",EXTRACT256(array,3),counts[EXTRACT256(array,3)])); - debug(printf("12 %04X => %d\n",EXTRACT256(array,4),counts[EXTRACT256(array,4)])); - debug(printf("28 %04X => %d\n",EXTRACT256(array,5),counts[EXTRACT256(array,5)])); - debug(printf("44 %04X => %d\n",EXTRACT256(array,6),counts[EXTRACT256(array,6)])); - debug(printf("60 %04X => %d\n",EXTRACT256(array,7),counts[EXTRACT256(array,7)])); - - array = _mm256_and_si256( _mm256_srli_epi32(oligo,4), bigmask6); - counts[EXTRACT256(array,0)] += 1; /* 13 */ - counts[EXTRACT256(array,1)] += 1; /* 29 */ - counts[EXTRACT256(array,2)] += 1; /* 45 */ - counts[EXTRACT256(array,3)] += 1; /* 61 */ - counts[EXTRACT256(array,4)] += 1; /* 77 */ - counts[EXTRACT256(array,5)] += 1; /* 93 */ - counts[EXTRACT256(array,6)] += 1; /* 109 */ - counts[EXTRACT256(array,7)] += 1; /* 125 */ -#ifdef CHECK_FOR_OVERFLOW - _counts_after = _mm256_i32gather_epi32(counts,array,/*scale*/4); - _counts_neg = _mm256_cmpgt_epi32(_counts_after,maxcount256); - if (_mm256_testz_si256(_counts_neg,_counts_neg) == 0) { - debug(printf("Exceeded maximum counts\n")); - if (EXTRACT256(_counts_neg,0)) { - inquery[EXTRACT256(array,0)] = INQUERY_FALSE; - } - if (EXTRACT256(_counts_neg,1)) { - inquery[EXTRACT256(array,1)] = INQUERY_FALSE; - } - if (EXTRACT256(_counts_neg,2)) { - inquery[EXTRACT256(array,2)] = INQUERY_FALSE; - } - if (EXTRACT256(_counts_neg,3)) { - inquery[EXTRACT256(array,3)] = INQUERY_FALSE; - } - if (EXTRACT256(_counts_neg,4)) { - inquery[EXTRACT256(array,4)] = INQUERY_FALSE; - } - if (EXTRACT256(_counts_neg,5)) { - inquery[EXTRACT256(array,5)] = INQUERY_FALSE; - } - if (EXTRACT256(_counts_neg,6)) { - inquery[EXTRACT256(array,6)] = INQUERY_FALSE; - } - if (EXTRACT256(_counts_neg,7)) { - inquery[EXTRACT256(array,7)] = INQUERY_FALSE; - } - } -#endif /* CHECK_FOR_OVERFLOW */ - debug(printf("13 %04X => %d\n",EXTRACT256(array,0),counts[EXTRACT256(array,0)])); - debug(printf("29 %04X => %d\n",EXTRACT256(array,1),counts[EXTRACT256(array,1)])); - debug(printf("45 %04X => %d\n",EXTRACT256(array,2),counts[EXTRACT256(array,2)])); - debug(printf("61 %04X => %d\n",EXTRACT256(array,3),counts[EXTRACT256(array,3)])); - debug(printf("13 %04X => %d\n",EXTRACT256(array,4),counts[EXTRACT256(array,4)])); - debug(printf("29 %04X => %d\n",EXTRACT256(array,5),counts[EXTRACT256(array,5)])); - debug(printf("45 %04X => %d\n",EXTRACT256(array,6),counts[EXTRACT256(array,6)])); - debug(printf("61 %04X => %d\n",EXTRACT256(array,7),counts[EXTRACT256(array,7)])); - - array = _mm256_and_si256( _mm256_srli_epi32(oligo,2), bigmask6); - counts[EXTRACT256(array,0)] += 1; /* 14 */ - counts[EXTRACT256(array,1)] += 1; /* 30 */ - counts[EXTRACT256(array,2)] += 1; /* 46 */ - counts[EXTRACT256(array,3)] += 1; /* 62 */ - counts[EXTRACT256(array,4)] += 1; /* 78 */ - counts[EXTRACT256(array,5)] += 1; /* 94 */ - counts[EXTRACT256(array,6)] += 1; /* 110 */ - counts[EXTRACT256(array,7)] += 1; /* 126 */ -#ifdef CHECK_FOR_OVERFLOW - _counts_after = _mm256_i32gather_epi32(counts,array,/*scale*/4); - _counts_neg = _mm256_cmpgt_epi32(_counts_after,maxcount256); - if (_mm256_testz_si256(_counts_neg,_counts_neg) == 0) { - debug(printf("Exceeded maximum counts\n")); - if (EXTRACT256(_counts_neg,0)) { - inquery[EXTRACT256(array,0)] = INQUERY_FALSE; - } - if (EXTRACT256(_counts_neg,1)) { - inquery[EXTRACT256(array,1)] = INQUERY_FALSE; - } - if (EXTRACT256(_counts_neg,2)) { - inquery[EXTRACT256(array,2)] = INQUERY_FALSE; - } - if (EXTRACT256(_counts_neg,3)) { - inquery[EXTRACT256(array,3)] = INQUERY_FALSE; - } - if (EXTRACT256(_counts_neg,4)) { - inquery[EXTRACT256(array,4)] = INQUERY_FALSE; - } - if (EXTRACT256(_counts_neg,5)) { - inquery[EXTRACT256(array,5)] = INQUERY_FALSE; - } - if (EXTRACT256(_counts_neg,6)) { - inquery[EXTRACT256(array,6)] = INQUERY_FALSE; - } - if (EXTRACT256(_counts_neg,7)) { - inquery[EXTRACT256(array,7)] = INQUERY_FALSE; - } - } -#endif /* CHECK_FOR_OVERFLOW */ - debug(printf("14 %04X => %d\n",EXTRACT256(array,0),counts[EXTRACT256(array,0)])); - debug(printf("30 %04X => %d\n",EXTRACT256(array,1),counts[EXTRACT256(array,1)])); - debug(printf("46 %04X => %d\n",EXTRACT256(array,2),counts[EXTRACT256(array,2)])); - debug(printf("62 %04X => %d\n",EXTRACT256(array,3),counts[EXTRACT256(array,3)])); - debug(printf("14 %04X => %d\n",EXTRACT256(array,4),counts[EXTRACT256(array,4)])); - debug(printf("30 %04X => %d\n",EXTRACT256(array,5),counts[EXTRACT256(array,5)])); - debug(printf("46 %04X => %d\n",EXTRACT256(array,6),counts[EXTRACT256(array,6)])); - debug(printf("62 %04X => %d\n",EXTRACT256(array,7),counts[EXTRACT256(array,7)])); - - array = _mm256_and_si256( oligo, bigmask6); - counts[EXTRACT256(array,0)] += 1; /* 15 */ - counts[EXTRACT256(array,1)] += 1; /* 31 */ - counts[EXTRACT256(array,2)] += 1; /* 47 */ - counts[EXTRACT256(array,3)] += 1; /* 63 */ - counts[EXTRACT256(array,4)] += 1; /* 79 */ - counts[EXTRACT256(array,5)] += 1; /* 95 */ - counts[EXTRACT256(array,6)] += 1; /* 111 */ - counts[EXTRACT256(array,7)] += 1; /* 127 */ -#ifdef CHECK_FOR_OVERFLOW - _counts_after = _mm256_i32gather_epi32(counts,array,/*scale*/4); - _counts_neg = _mm256_cmpgt_epi32(_counts_after,maxcount256); - if (_mm256_testz_si256(_counts_neg,_counts_neg) == 0) { - debug(printf("Exceeded maximum counts\n")); - if (EXTRACT256(_counts_neg,0)) { - inquery[EXTRACT256(array,0)] = INQUERY_FALSE; - } - if (EXTRACT256(_counts_neg,1)) { - inquery[EXTRACT256(array,1)] = INQUERY_FALSE; - } - if (EXTRACT256(_counts_neg,2)) { - inquery[EXTRACT256(array,2)] = INQUERY_FALSE; - } - if (EXTRACT256(_counts_neg,3)) { - inquery[EXTRACT256(array,3)] = INQUERY_FALSE; - } - if (EXTRACT256(_counts_neg,4)) { - inquery[EXTRACT256(array,4)] = INQUERY_FALSE; - } - if (EXTRACT256(_counts_neg,5)) { - inquery[EXTRACT256(array,5)] = INQUERY_FALSE; - } - if (EXTRACT256(_counts_neg,6)) { - inquery[EXTRACT256(array,6)] = INQUERY_FALSE; - } - if (EXTRACT256(_counts_neg,7)) { - inquery[EXTRACT256(array,7)] = INQUERY_FALSE; - } - } -#endif /* CHECK_FOR_OVERFLOW */ - debug(printf("15 %04X => %d\n",EXTRACT256(array,0),counts[EXTRACT256(array,0)])); - debug(printf("31 %04X => %d\n",EXTRACT256(array,1),counts[EXTRACT256(array,1)])); - debug(printf("47 %04X => %d\n",EXTRACT256(array,2),counts[EXTRACT256(array,2)])); - debug(printf("63 %04X => %d\n",EXTRACT256(array,3),counts[EXTRACT256(array,3)])); - debug(printf("15 %04X => %d\n",EXTRACT256(array,4),counts[EXTRACT256(array,4)])); - debug(printf("31 %04X => %d\n",EXTRACT256(array,5),counts[EXTRACT256(array,5)])); - debug(printf("47 %04X => %d\n",EXTRACT256(array,6),counts[EXTRACT256(array,6)])); - debug(printf("63 %04X => %d\n",EXTRACT256(array,7),counts[EXTRACT256(array,7)])); + masked = EXTRACT(_masked,1); + INCR_COUNT(counts[masked]); + debug(printf("22 %04X => %d\n",masked,counts[masked])); - return; -} -#endif + masked = EXTRACT(_masked,2); + INCR_COUNT(counts[masked]); + debug(printf("21 %04X => %d\n",masked,counts[masked])); + masked = EXTRACT(_masked,3); + INCR_COUNT(counts[masked]); + debug(printf("20 %04X => %d\n",masked,counts[masked])); -#if !defined(HAVE_AVX2) -static int -store_6mers_fwd (Chrpos_T chrpos, Chrpos_T *table, UINT4 *pointers, UINT4 *positions, Count_T *counts, - Genomecomp_T high_rev, Genomecomp_T low_rev, Genomecomp_T nexthigh_rev) { - Genomecomp_T masked, oligo; -#ifdef INDIVIDUAL_SHIFTS -#elif defined(SIMD_MASK_THEN_STORE) - UINT4 _masked[4] __attribute__ ((aligned (16))); - __m128i _oligo; + _oligo = _mm_srli_epi32(_oligo, 8); +#ifdef SIMD_MASK_THEN_STORE + _mm_store_si128((__m128i *) _masked,_mm_and_si128(_oligo, mask5)); #else - __m128i _oligo, _masked; + _masked = _mm_and_si128(_oligo, mask5); +#endif + + masked = EXTRACT(_masked,0); + INCR_COUNT(counts[masked]); + debug(printf("19 %04X => %d\n",masked,counts[masked])); + + masked = EXTRACT(_masked,1); + INCR_COUNT(counts[masked]); + debug(printf("18 %04X => %d\n",masked,counts[masked])); + + masked = EXTRACT(_masked,2); + INCR_COUNT(counts[masked]); + debug(printf("17 %04X => %d\n",masked,counts[masked])); + + masked = EXTRACT(_masked,3); + INCR_COUNT(counts[masked]); + debug(printf("16 %04X => %d\n",masked,counts[masked])); #endif - oligo = nexthigh_rev >> 22; /* For 27..31 */ - oligo |= low_rev << 10; + oligo = low_rev >> 24; /* For 15..12 */ + oligo |= high_rev << 8; #ifdef INDIVIDUAL_SHIFTS - masked = oligo & MASK6; /* 31 */ - if (counts[masked]) { - assert(pointers[masked] > positions[masked]); - debug(printf("Storing masked %u at %u\n",masked,chrpos)); - table[--pointers[masked]] = chrpos; - } - - masked = (oligo >> 2) & MASK6; /* 30 */ - if (counts[masked]) { - assert(pointers[masked] > positions[masked]); - debug(printf("Storing masked %u at %u\n",masked,chrpos - 1)); - table[--pointers[masked]] = chrpos - 1; - } + masked = oligo & MASK5; /* 15 */ + INCR_COUNT(counts[masked]); + debug(printf("15 %04X => %d\n",masked,counts[masked])); - masked = (oligo >> 4) & MASK6; /* 29 */ - if (counts[masked]) { - assert(pointers[masked] > positions[masked]); - debug(printf("Storing masked %u at %u\n",masked,chrpos - 2)); - table[--pointers[masked]] = chrpos - 2; - } + masked = (oligo >> 2) & MASK5; /* 14 */ + INCR_COUNT(counts[masked]); + debug(printf("14 %04X => %d\n",masked,counts[masked])); - masked = (oligo >> 6) & MASK6; /* 28 */ - if (counts[masked]) { - assert(pointers[masked] > positions[masked]); - debug(printf("Storing masked %u at %u\n",masked,chrpos - 3)); - table[--pointers[masked]] = chrpos - 3; - } + masked = (oligo >> 4) & MASK5; /* 13 */ + INCR_COUNT(counts[masked]); + debug(printf("13 %04X => %d\n",masked,counts[masked])); - masked = (oligo >> 8) & MASK6; /* 27 */ - if (counts[masked]) { - assert(pointers[masked] > positions[masked]); - debug(printf("Storing masked %u at %u\n",masked,chrpos - 4)); - table[--pointers[masked]] = chrpos - 4; - } + masked = (oligo >> 6) & MASK5; /* 12 */ + INCR_COUNT(counts[masked]); + debug(printf("12 %04X => %d\n",masked,counts[masked])); #else _oligo = _mm_setr_epi32(oligo, oligo >> 2, oligo >> 4, oligo >> 6); #ifdef SIMD_MASK_THEN_STORE - _mm_store_si128((__m128i *) _masked,_mm_and_si128(_oligo, mask6)); + _mm_store_si128((__m128i *) _masked,_mm_and_si128(_oligo, mask5)); #else - _masked = _mm_and_si128(_oligo, mask6); + _masked = _mm_and_si128(_oligo, mask5); #endif masked = EXTRACT(_masked,0); - if (counts[masked]) { - assert(pointers[masked] > positions[masked]); - debug(printf("Storing masked %u at %u\n",masked,chrpos)); - table[--pointers[masked]] = chrpos; - } + INCR_COUNT(counts[masked]); + debug(printf("15 %04X => %d\n",masked,counts[masked])); masked = EXTRACT(_masked,1); - if (counts[masked]) { - assert(pointers[masked] > positions[masked]); - debug(printf("Storing masked %u at %u\n",masked,chrpos - 1)); - table[--pointers[masked]] = chrpos - 1; - } + INCR_COUNT(counts[masked]); + debug(printf("14 %04X => %d\n",masked,counts[masked])); masked = EXTRACT(_masked,2); - if (counts[masked]) { - assert(pointers[masked] > positions[masked]); - debug(printf("Storing masked %u at %u\n",masked,chrpos - 2)); - table[--pointers[masked]] = chrpos - 2; - } + INCR_COUNT(counts[masked]); + debug(printf("13 %04X => %d\n",masked,counts[masked])); masked = EXTRACT(_masked,3); - if (counts[masked]) { - assert(pointers[masked] > positions[masked]); - debug(printf("Storing masked %u at %u\n",masked,chrpos - 3)); - table[--pointers[masked]] = chrpos - 3; - } - - - masked = (oligo >> 8) & MASK6; /* 27 */ - if (counts[masked]) { - assert(pointers[masked] > positions[masked]); - debug(printf("Storing masked %u at %u\n",masked,chrpos - 4)); - table[--pointers[masked]] = chrpos - 4; - } + INCR_COUNT(counts[masked]); + debug(printf("12 %04X => %d\n",masked,counts[masked])); #endif #ifdef INDIVIDUAL_SHIFTS - masked = low_rev & MASK6; /* 26 */ - if (counts[masked]) { - assert(pointers[masked] > positions[masked]); - debug(printf("Storing masked %u at %u\n",masked,chrpos - 5)); - table[--pointers[masked]] = chrpos - 5; - } + masked = high_rev & MASK5; /* 11 */ + INCR_COUNT(counts[masked]); + debug(printf("11 %04X => %d\n",masked,counts[masked])); - masked = (low_rev >> 2) & MASK6; /* 25 */ - if (counts[masked]) { - assert(pointers[masked] > positions[masked]); - debug(printf("Storing masked %u at %u\n",masked,chrpos - 6)); - table[--pointers[masked]] = chrpos - 6; - } + masked = (high_rev >> 2) & MASK5; /* 10 */ + INCR_COUNT(counts[masked]); + debug(printf("10 %04X => %d\n",masked,counts[masked])); - masked = (low_rev >> 4) & MASK6; /* 24 */ - if (counts[masked]) { - assert(pointers[masked] > positions[masked]); - debug(printf("Storing masked %u at %u\n",masked,chrpos - 7)); - table[--pointers[masked]] = chrpos - 7; - } + masked = (high_rev >> 4) & MASK5; /* 9 */ + INCR_COUNT(counts[masked]); + debug(printf("9 %04X => %d\n",masked,counts[masked])); - masked = (low_rev >> 6) & MASK6; /* 23 */ - if (counts[masked]) { - assert(pointers[masked] > positions[masked]); - debug(printf("Storing masked %u at %u\n",masked,chrpos - 8)); - table[--pointers[masked]] = chrpos - 8; - } + masked = (high_rev >> 6) & MASK5; /* 8 */ + INCR_COUNT(counts[masked]); + debug(printf("8 %04X => %d\n",masked,counts[masked])); - masked = (low_rev >> 8) & MASK6; /* 22 */ - if (counts[masked]) { - assert(pointers[masked] > positions[masked]); - debug(printf("Storing masked %u at %u\n",masked,chrpos - 9)); - table[--pointers[masked]] = chrpos - 9; - } + masked = (high_rev >> 8) & MASK5; /* 7 */ + INCR_COUNT(counts[masked]); + debug(printf("7 %04X => %d\n",masked,counts[masked])); - masked = (low_rev >> 10) & MASK6; /* 21 */ - if (counts[masked]) { - assert(pointers[masked] > positions[masked]); - debug(printf("Storing masked %u at %u\n",masked,chrpos - 10)); - table[--pointers[masked]] = chrpos - 10; - } + masked = (high_rev >> 10) & MASK5; /* 6 */ + INCR_COUNT(counts[masked]); + debug(printf("6 %04X => %d\n",masked,counts[masked])); - masked = (low_rev >> 12) & MASK6; /* 20 */ - if (counts[masked]) { - assert(pointers[masked] > positions[masked]); - debug(printf("Storing masked %u at %u\n",masked,chrpos - 11)); - table[--pointers[masked]] = chrpos - 11; - } + masked = (high_rev >> 12) & MASK5; /* 5 */ + INCR_COUNT(counts[masked]); + debug(printf("5 %04X => %d\n",masked,counts[masked])); - masked = (low_rev >> 14) & MASK6; /* 19 */ - if (counts[masked]) { - assert(pointers[masked] > positions[masked]); - debug(printf("Storing masked %u at %u\n",masked,chrpos - 12)); - table[--pointers[masked]] = chrpos - 12; - } + masked = (high_rev >> 14) & MASK5; /* 4 */ + INCR_COUNT(counts[masked]); + debug(printf("4 %04X => %d\n",masked,counts[masked])); - masked = (low_rev >> 16) & MASK6; /* 18 */ - if (counts[masked]) { - assert(pointers[masked] > positions[masked]); - debug(printf("Storing masked %u at %u\n",masked,chrpos - 13)); - table[--pointers[masked]] = chrpos - 13; - } + masked = (high_rev >> 16) & MASK5; /* 3 */ + INCR_COUNT(counts[masked]); + debug(printf("3 %04X => %d\n",masked,counts[masked])); - masked = (low_rev >> 18) & MASK6; /* 17 */ - if (counts[masked]) { - assert(pointers[masked] > positions[masked]); - debug(printf("Storing masked %u at %u\n",masked,chrpos - 14)); - table[--pointers[masked]] = chrpos - 14; - } + masked = (high_rev >> 18) & MASK5; /* 2 */ + INCR_COUNT(counts[masked]); + debug(printf("2 %04X => %d\n",masked,counts[masked])); - masked = low_rev >> 20; /* 16, No mask necessary */ - if (counts[masked]) { - assert(pointers[masked] > positions[masked]); - debug(printf("Storing masked %u at %u\n",masked,chrpos - 15)); - table[--pointers[masked]] = chrpos - 15; - } + masked = (high_rev >> 20) & MASK5; /* 1 */ + INCR_COUNT(counts[masked]); + debug(printf("1 %04X => %d\n",masked,counts[masked])); + + masked = high_rev >> 22; /* 0, No mask necessary */ + INCR_COUNT(counts[masked]); + debug(printf("0 %04X => %d\n",masked,counts[masked])); #else - _oligo = _mm_setr_epi32(low_rev, low_rev >> 2, low_rev >> 4, low_rev >> 6); + _oligo = _mm_setr_epi32(high_rev, high_rev >> 2, high_rev >> 4, high_rev >> 6); #ifdef SIMD_MASK_THEN_STORE - _mm_store_si128((__m128i *) _masked,_mm_and_si128(_oligo, mask6)); + _mm_store_si128((__m128i *) _masked,_mm_and_si128(_oligo, mask5)); #else - _masked = _mm_and_si128(_oligo, mask6); + _masked = _mm_and_si128(_oligo, mask5); #endif masked = EXTRACT(_masked,0); - if (counts[masked]) { - assert(pointers[masked] > positions[masked]); - debug(printf("Storing masked %u at %u\n",masked,chrpos - 5)); - table[--pointers[masked]] = chrpos - 5; - } + INCR_COUNT(counts[masked]); + debug(printf("11 %04X => %d\n",masked,counts[masked])); masked = EXTRACT(_masked,1); - if (counts[masked]) { - assert(pointers[masked] > positions[masked]); - debug(printf("Storing masked %u at %u\n",masked,chrpos - 6)); - table[--pointers[masked]] = chrpos - 6; - } + INCR_COUNT(counts[masked]); + debug(printf("10 %04X => %d\n",masked,counts[masked])); masked = EXTRACT(_masked,2); - if (counts[masked]) { - assert(pointers[masked] > positions[masked]); - debug(printf("Storing masked %u at %u\n",masked,chrpos - 7)); - table[--pointers[masked]] = chrpos - 7; - } + INCR_COUNT(counts[masked]); + debug(printf("9 %04X => %d\n",masked,counts[masked])); masked = EXTRACT(_masked,3); - if (counts[masked]) { - assert(pointers[masked] > positions[masked]); - debug(printf("Storing masked %u at %u\n",masked,chrpos - 8)); - table[--pointers[masked]] = chrpos - 8; - } + INCR_COUNT(counts[masked]); + debug(printf("8 %04X => %d\n",masked,counts[masked])); _oligo = _mm_srli_epi32(_oligo, 8); #ifdef SIMD_MASK_THEN_STORE - _mm_store_si128((__m128i *) _masked,_mm_and_si128(_oligo, mask6)); + _mm_store_si128((__m128i *) _masked,_mm_and_si128(_oligo, mask5)); #else - _masked = _mm_and_si128(_oligo, mask6); + _masked = _mm_and_si128(_oligo, mask5); #endif masked = EXTRACT(_masked,0); - if (counts[masked]) { - assert(pointers[masked] > positions[masked]); - debug(printf("Storing masked %u at %u\n",masked,chrpos - 9)); - table[--pointers[masked]] = chrpos - 9; - } + INCR_COUNT(counts[masked]); + debug(printf("7 %04X => %d\n",masked,counts[masked])); masked = EXTRACT(_masked,1); - if (counts[masked]) { - assert(pointers[masked] > positions[masked]); - debug(printf("Storing masked %u at %u\n",masked,chrpos - 10)); - table[--pointers[masked]] = chrpos - 10; - } + INCR_COUNT(counts[masked]); + debug(printf("6 %04X => %d\n",masked,counts[masked])); masked = EXTRACT(_masked,2); - if (counts[masked]) { - assert(pointers[masked] > positions[masked]); - debug(printf("Storing masked %u at %u\n",masked,chrpos - 11)); - table[--pointers[masked]] = chrpos - 11; - } + INCR_COUNT(counts[masked]); + debug(printf("5 %04X => %d\n",masked,counts[masked])); masked = EXTRACT(_masked,3); - if (counts[masked]) { - assert(pointers[masked] > positions[masked]); - debug(printf("Storing masked %u at %u\n",masked,chrpos - 12)); - table[--pointers[masked]] = chrpos - 12; - } + INCR_COUNT(counts[masked]); + debug(printf("4 %04X => %d\n",masked,counts[masked])); _oligo = _mm_srli_epi32(_oligo, 8); #ifdef SIMD_MASK_THEN_STORE - _mm_store_si128((__m128i *) _masked,_mm_and_si128(_oligo, mask6)); + _mm_store_si128((__m128i *) _masked,_mm_and_si128(_oligo, mask5)); #else - _masked = _mm_and_si128(_oligo, mask6); + _masked = _mm_and_si128(_oligo, mask5); #endif masked = EXTRACT(_masked,0); - if (counts[masked]) { - assert(pointers[masked] > positions[masked]); - debug(printf("Storing masked %u at %u\n",masked,chrpos - 13)); - table[--pointers[masked]] = chrpos - 13; - } + INCR_COUNT(counts[masked]); + debug(printf("3 %04X => %d\n",masked,counts[masked])); masked = EXTRACT(_masked,1); - if (counts[masked]) { - assert(pointers[masked] > positions[masked]); - debug(printf("Storing masked %u at %u\n",masked,chrpos - 14)); - table[--pointers[masked]] = chrpos - 14; - } + INCR_COUNT(counts[masked]); + debug(printf("2 %04X => %d\n",masked,counts[masked])); masked = EXTRACT(_masked,2); - if (counts[masked]) { - assert(pointers[masked] > positions[masked]); - debug(printf("Storing masked %u at %u\n",masked,chrpos - 15)); - table[--pointers[masked]] = chrpos - 15; - } + INCR_COUNT(counts[masked]); + debug(printf("1 %04X => %d\n",masked,counts[masked])); + + masked = EXTRACT(_masked,3); + INCR_COUNT(counts[masked]); + debug(printf("0 %04X => %d\n",masked,counts[masked])); #endif + return; +} - oligo = low_rev >> 22; /* For 15..11 */ - oligo |= high_rev << 10; +#else /* HAVE_AVX2 */ -#ifdef INDIVIDUAL_SHIFTS - masked = oligo & MASK6; /* 15 */ - if (counts[masked]) { - assert(pointers[masked] > positions[masked]); - debug(printf("Storing masked %u at %u\n",masked,chrpos - 16)); - table[--pointers[masked]] = chrpos - 16; - } +static void +count_5mers_fwd_32 (Count_T *counts, Genomecomp_T high_rev, Genomecomp_T low_rev, Genomecomp_T nexthigh_rev) { + Genomecomp_T masked, oligo; + __m256i _oligo, _masked; - masked = (oligo >> 2) & MASK6; /* 14 */ - if (counts[masked]) { - assert(pointers[masked] > positions[masked]); - debug(printf("Storing masked %u at %u\n",masked,chrpos - 17)); - table[--pointers[masked]] = chrpos - 17; - } - masked = (oligo >> 4) & MASK6; /* 13 */ - if (counts[masked]) { - assert(pointers[masked] > positions[masked]); - debug(printf("Storing masked %u at %u\n",masked,chrpos - 18)); - table[--pointers[masked]] = chrpos - 18; - } + oligo = nexthigh_rev >> 24; /* For 31..28 */ + oligo |= low_rev << 8; - masked = (oligo >> 6) & MASK6; /* 12 */ - if (counts[masked]) { - assert(pointers[masked] > positions[masked]); - debug(printf("Storing masked %u at %u\n",masked,chrpos - 19)); - table[--pointers[masked]] = chrpos - 19; - } + _oligo = _mm256_srlv_epi32(_mm256_set1_epi32(oligo),bigshift0to14); + _masked = _mm256_and_si256(_oligo, bigmask5); - masked = (oligo >> 8) & MASK6; /* 11 */ - if (counts[masked]) { - assert(pointers[masked] > positions[masked]); - debug(printf("Storing masked %u at %u\n",masked,chrpos - 20)); - table[--pointers[masked]] = chrpos - 20; - } + masked = EXTRACT256(_masked,0); + INCR_COUNT(counts[masked]); + debug(printf("31 %04X => %d\n",masked,counts[masked])); -#else - _oligo = _mm_setr_epi32(oligo, oligo >> 2, oligo >> 4, oligo >> 6); -#ifdef SIMD_MASK_THEN_STORE - _mm_store_si128((__m128i *) _masked,_mm_and_si128(_oligo, mask6)); -#else - _masked = _mm_and_si128(_oligo, mask6); -#endif + masked = EXTRACT256(_masked,1); + INCR_COUNT(counts[masked]); + debug(printf("30 %04X => %d\n",masked,counts[masked])); - masked = EXTRACT(_masked,0); - if (counts[masked]) { - assert(pointers[masked] > positions[masked]); - debug(printf("Storing masked %u at %u\n",masked,chrpos - 16)); - table[--pointers[masked]] = chrpos - 16; - } + masked = EXTRACT256(_masked,2); + INCR_COUNT(counts[masked]); + debug(printf("29 %04X => %d\n",masked,counts[masked])); - masked = EXTRACT(_masked,1); - if (counts[masked]) { - assert(pointers[masked] > positions[masked]); - debug(printf("Storing masked %u at %u\n",masked,chrpos - 17)); - table[--pointers[masked]] = chrpos - 17; - } + masked = EXTRACT256(_masked,3); + INCR_COUNT(counts[masked]); + debug(printf("28 %04X => %d\n",masked,counts[masked])); - masked = EXTRACT(_masked,2); - if (counts[masked]) { - assert(pointers[masked] > positions[masked]); - debug(printf("Storing masked %u at %u\n",masked,chrpos - 18)); - table[--pointers[masked]] = chrpos - 18; - } - masked = EXTRACT(_masked,3); - if (counts[masked]) { - assert(pointers[masked] > positions[masked]); - debug(printf("Storing masked %u at %u\n",masked,chrpos - 19)); - table[--pointers[masked]] = chrpos - 19; - } + _oligo = _mm256_srlv_epi32(_mm256_set1_epi32(low_rev),bigshift0to14); + _masked = _mm256_and_si256(_oligo, bigmask5); + masked = EXTRACT256(_masked,0); + INCR_COUNT(counts[masked]); + debug(printf("27 %04X => %d\n",masked,counts[masked])); - masked = (oligo >> 8) & MASK6; /* 11 */ - if (counts[masked]) { - assert(pointers[masked] > positions[masked]); - debug(printf("Storing masked %u at %u\n",masked,chrpos - 20)); - table[--pointers[masked]] = chrpos - 20; - } -#endif + masked = EXTRACT256(_masked,1); + INCR_COUNT(counts[masked]); + debug(printf("26 %04X => %d\n",masked,counts[masked])); + masked = EXTRACT256(_masked,2); + INCR_COUNT(counts[masked]); + debug(printf("25 %04X => %d\n",masked,counts[masked])); -#ifdef INDIVIDUAL_SHIFTS - masked = high_rev & MASK6; /* 10 */ - if (counts[masked]) { - assert(pointers[masked] > positions[masked]); - debug(printf("Storing masked %u at %u\n",masked,chrpos - 21)); - table[--pointers[masked]] = chrpos - 21; - } + masked = EXTRACT256(_masked,3); + INCR_COUNT(counts[masked]); + debug(printf("24 %04X => %d\n",masked,counts[masked])); - masked = (high_rev >> 2) & MASK6; /* 9 */ - if (counts[masked]) { - assert(pointers[masked] > positions[masked]); - debug(printf("Storing masked %u at %u\n",masked,chrpos - 22)); - table[--pointers[masked]] = chrpos - 22; - } + masked = EXTRACT256(_masked,4); + INCR_COUNT(counts[masked]); + debug(printf("23 %04X => %d\n",masked,counts[masked])); - masked = (high_rev >> 4) & MASK6; /* 8 */ - if (counts[masked]) { - assert(pointers[masked] > positions[masked]); - debug(printf("Storing masked %u at %u\n",masked,chrpos - 23)); - table[--pointers[masked]] = chrpos - 23; - } + masked = EXTRACT256(_masked,5); + INCR_COUNT(counts[masked]); + debug(printf("22 %04X => %d\n",masked,counts[masked])); - masked = (high_rev >> 6) & MASK6; /* 7 */ - if (counts[masked]) { - assert(pointers[masked] > positions[masked]); - debug(printf("Storing masked %u at %u\n",masked,chrpos - 24)); - table[--pointers[masked]] = chrpos - 24; - } + masked = EXTRACT256(_masked,6); + INCR_COUNT(counts[masked]); + debug(printf("21 %04X => %d\n",masked,counts[masked])); - masked = (high_rev >> 8) & MASK6; /* 6 */ - if (counts[masked]) { - assert(pointers[masked] > positions[masked]); - debug(printf("Storing masked %u at %u\n",masked,chrpos - 25)); - table[--pointers[masked]] = chrpos - 25; - } + masked = EXTRACT256(_masked,7); + INCR_COUNT(counts[masked]); + debug(printf("20 %04X => %d\n",masked,counts[masked])); - masked = (high_rev >> 10) & MASK6; /* 5 */ - if (counts[masked]) { - assert(pointers[masked] > positions[masked]); - debug(printf("Storing masked %u at %u\n",masked,chrpos - 26)); - table[--pointers[masked]] = chrpos - 26; - } - masked = (high_rev >> 12) & MASK6; /* 4 */ - if (counts[masked]) { - assert(pointers[masked] > positions[masked]); - debug(printf("Storing masked %u at %u\n",masked,chrpos - 27)); - table[--pointers[masked]] = chrpos - 27; - } + _oligo = _mm256_srli_epi32(_oligo, 16); + _masked = _mm256_and_si256(_oligo, bigmask5); - masked = (high_rev >> 14) & MASK6; /* 3 */ - if (counts[masked]) { - assert(pointers[masked] > positions[masked]); - debug(printf("Storing masked %u at %u\n",masked,chrpos - 28)); - table[--pointers[masked]] = chrpos - 28; - } + masked = EXTRACT256(_masked,0); + INCR_COUNT(counts[masked]); + debug(printf("19 %04X => %d\n",masked,counts[masked])); - masked = (high_rev >> 16) & MASK6; /* 2 */ - if (counts[masked]) { - assert(pointers[masked] > positions[masked]); - debug(printf("Storing masked %u at %u\n",masked,chrpos - 29)); - table[--pointers[masked]] = chrpos - 29; - } + masked = EXTRACT256(_masked,1); + INCR_COUNT(counts[masked]); + debug(printf("18 %04X => %d\n",masked,counts[masked])); - masked = (high_rev >> 18) & MASK6; /* 1 */ - if (counts[masked]) { - assert(pointers[masked] > positions[masked]); - debug(printf("Storing masked %u at %u\n",masked,chrpos - 30)); - table[--pointers[masked]] = chrpos - 30; - } + masked = EXTRACT256(_masked,2); + INCR_COUNT(counts[masked]); + debug(printf("17 %04X => %d\n",masked,counts[masked])); - masked = high_rev >> 20; /* 0, No mask necessary */ - if (counts[masked]) { - assert(pointers[masked] > positions[masked]); - debug(printf("Storing masked %u at %u\n",masked,chrpos - 31)); - table[--pointers[masked]] = chrpos - 31; - } - -#else - _oligo = _mm_setr_epi32(high_rev, high_rev >> 2, high_rev >> 4, high_rev >> 6); -#ifdef SIMD_MASK_THEN_STORE - _mm_store_si128((__m128i *) _masked,_mm_and_si128(_oligo, mask6)); -#else - _masked = _mm_and_si128(_oligo, mask6); -#endif + masked = EXTRACT256(_masked,3); + INCR_COUNT(counts[masked]); + debug(printf("16 %04X => %d\n",masked,counts[masked])); - masked = EXTRACT(_masked,0); - if (counts[masked]) { - assert(pointers[masked] > positions[masked]); - debug(printf("Storing masked %u at %u\n",masked,chrpos - 21)); - table[--pointers[masked]] = chrpos - 21; - } - masked = EXTRACT(_masked,1); - if (counts[masked]) { - assert(pointers[masked] > positions[masked]); - debug(printf("Storing masked %u at %u\n",masked,chrpos - 22)); - table[--pointers[masked]] = chrpos - 22; - } + oligo = low_rev >> 24; /* For 15..12 */ + oligo |= high_rev << 8; - masked = EXTRACT(_masked,2); - if (counts[masked]) { - assert(pointers[masked] > positions[masked]); - debug(printf("Storing masked %u at %u\n",masked,chrpos - 23)); - table[--pointers[masked]] = chrpos - 23; - } + _oligo = _mm256_srlv_epi32(_mm256_set1_epi32(oligo),bigshift0to14); + _masked = _mm256_and_si256(_oligo, bigmask5); - masked = EXTRACT(_masked,3); - if (counts[masked]) { - assert(pointers[masked] > positions[masked]); - debug(printf("Storing masked %u at %u\n",masked,chrpos - 24)); - table[--pointers[masked]] = chrpos - 24; - } + masked = EXTRACT256(_masked,0); + INCR_COUNT(counts[masked]); + debug(printf("15 %04X => %d\n",masked,counts[masked])); + masked = EXTRACT256(_masked,1); + INCR_COUNT(counts[masked]); + debug(printf("14 %04X => %d\n",masked,counts[masked])); - _oligo = _mm_srli_epi32(_oligo, 8); -#ifdef SIMD_MASK_THEN_STORE - _mm_store_si128((__m128i *) _masked,_mm_and_si128(_oligo, mask6)); -#else - _masked = _mm_and_si128(_oligo, mask6); -#endif + masked = EXTRACT256(_masked,2); + INCR_COUNT(counts[masked]); + debug(printf("13 %04X => %d\n",masked,counts[masked])); - masked = EXTRACT(_masked,0); - if (counts[masked]) { - assert(pointers[masked] > positions[masked]); - debug(printf("Storing masked %u at %u\n",masked,chrpos - 25)); - table[--pointers[masked]] = chrpos - 25; - } + masked = EXTRACT256(_masked,3); + INCR_COUNT(counts[masked]); + debug(printf("12 %04X => %d\n",masked,counts[masked])); - masked = EXTRACT(_masked,1); - if (counts[masked]) { - assert(pointers[masked] > positions[masked]); - debug(printf("Storing masked %u at %u\n",masked,chrpos - 26)); - table[--pointers[masked]] = chrpos - 26; - } - masked = EXTRACT(_masked,2); - if (counts[masked]) { - assert(pointers[masked] > positions[masked]); - debug(printf("Storing masked %u at %u\n",masked,chrpos - 27)); - table[--pointers[masked]] = chrpos - 27; - } + _oligo = _mm256_srlv_epi32(_mm256_set1_epi32(high_rev),bigshift0to14); + _masked = _mm256_and_si256(_oligo, bigmask5); - masked = EXTRACT(_masked,3); - if (counts[masked]) { - assert(pointers[masked] > positions[masked]); - debug(printf("Storing masked %u at %u\n",masked,chrpos - 28)); - table[--pointers[masked]] = chrpos - 28; - } + masked = EXTRACT256(_masked,0); + INCR_COUNT(counts[masked]); + debug(printf("11 %04X => %d\n",masked,counts[masked])); + masked = EXTRACT256(_masked,1); + INCR_COUNT(counts[masked]); + debug(printf("10 %04X => %d\n",masked,counts[masked])); - _oligo = _mm_srli_epi32(_oligo, 8); -#ifdef SIMD_MASK_THEN_STORE - _mm_store_si128((__m128i *) _masked,_mm_and_si128(_oligo, mask6)); -#else - _masked = _mm_and_si128(_oligo, mask6); -#endif + masked = EXTRACT256(_masked,2); + INCR_COUNT(counts[masked]); + debug(printf("9 %04X => %d\n",masked,counts[masked])); - masked = EXTRACT(_masked,0); - if (counts[masked]) { - assert(pointers[masked] > positions[masked]); - debug(printf("Storing masked %u at %u\n",masked,chrpos - 29)); - table[--pointers[masked]] = chrpos - 29; - } + masked = EXTRACT256(_masked,3); + INCR_COUNT(counts[masked]); + debug(printf("8 %04X => %d\n",masked,counts[masked])); - masked = EXTRACT(_masked,1); - if (counts[masked]) { - assert(pointers[masked] > positions[masked]); - debug(printf("Storing masked %u at %u\n",masked,chrpos - 30)); - table[--pointers[masked]] = chrpos - 30; - } + masked = EXTRACT256(_masked,4); + INCR_COUNT(counts[masked]); + debug(printf("7 %04X => %d\n",masked,counts[masked])); - masked = EXTRACT(_masked,2); - if (counts[masked]) { - assert(pointers[masked] > positions[masked]); - debug(printf("Storing masked %u at %u\n",masked,chrpos - 31)); - table[--pointers[masked]] = chrpos - 31; - } -#endif + masked = EXTRACT256(_masked,5); + INCR_COUNT(counts[masked]); + debug(printf("6 %04X => %d\n",masked,counts[masked])); + masked = EXTRACT256(_masked,6); + INCR_COUNT(counts[masked]); + debug(printf("5 %04X => %d\n",masked,counts[masked])); - return chrpos - 32; -} + masked = EXTRACT256(_masked,7); + INCR_COUNT(counts[masked]); + debug(printf("4 %04X => %d\n",masked,counts[masked])); -#else /* HAVE_AVX2 */ -static int -store_6mers_fwd (Chrpos_T chrpos, Chrpos_T *table, UINT4 *pointers, UINT4 *positions, Count_T *counts, - Genomecomp_T high_rev, Genomecomp_T low_rev, Genomecomp_T nexthigh_rev) { - Genomecomp_T masked, oligo; - __m256i _oligo, _masked, _counts; + _oligo = _mm256_srli_epi32(_oligo, 16); + _masked = _mm256_and_si256(_oligo, bigmask5); + masked = EXTRACT256(_masked,0); + INCR_COUNT(counts[masked]); + debug(printf("3 %04X => %d\n",masked,counts[masked])); - oligo = nexthigh_rev >> 22; /* For 27..31 */ - oligo |= low_rev << 10; + masked = EXTRACT256(_masked,1); + INCR_COUNT(counts[masked]); + debug(printf("2 %04X => %d\n",masked,counts[masked])); - _oligo = _mm256_srlv_epi32(_mm256_set1_epi32(oligo),shift0to14); - _masked = _mm256_and_si256(_oligo, bigmask6); - _counts = _mm256_i32gather_epi32(counts,_masked,/*scale*/4); - - if (EXTRACT256(_counts,0)) { - masked = EXTRACT256(_masked,0); - assert(pointers[masked] > positions[masked]); - debug(printf("Storing masked %u at %u\n",masked,chrpos)); - table[--pointers[masked]] = chrpos; - } - - if (EXTRACT256(_counts,1)) { - masked = EXTRACT256(_masked,1); - assert(pointers[masked] > positions[masked]); - debug(printf("Storing masked %u at %u\n",masked,chrpos - 1)); - table[--pointers[masked]] = chrpos - 1; - } - - if (EXTRACT256(_counts,2)) { - masked = EXTRACT256(_masked,2); - assert(pointers[masked] > positions[masked]); - debug(printf("Storing masked %u at %u\n",masked,chrpos - 2)); - table[--pointers[masked]] = chrpos - 2; - } - - if (EXTRACT256(_counts,3)) { - masked = EXTRACT256(_masked,3); - assert(pointers[masked] > positions[masked]); - debug(printf("Storing masked %u at %u\n",masked,chrpos - 3)); - table[--pointers[masked]] = chrpos - 3; - } - - - if (EXTRACT256(_counts,4)) { - masked = EXTRACT256(_masked,4); - assert(pointers[masked] > positions[masked]); - debug(printf("Storing masked %u at %u\n",masked,chrpos - 4)); - table[--pointers[masked]] = chrpos - 4; - } - - - _oligo = _mm256_srlv_epi32(_mm256_set1_epi32(low_rev),shift0to14); - _masked = _mm256_and_si256(_oligo, bigmask6); - _counts = _mm256_i32gather_epi32(counts,_masked,/*scale*/4); - - if (EXTRACT256(_counts,0)) { - masked = EXTRACT256(_masked,0); - assert(pointers[masked] > positions[masked]); - debug(printf("Storing masked %u at %u\n",masked,chrpos - 5)); - table[--pointers[masked]] = chrpos - 5; - } - - if (EXTRACT256(_counts,1)) { - masked = EXTRACT256(_masked,1); - assert(pointers[masked] > positions[masked]); - debug(printf("Storing masked %u at %u\n",masked,chrpos - 6)); - table[--pointers[masked]] = chrpos - 6; - } - - if (EXTRACT256(_counts,2)) { - masked = EXTRACT256(_masked,2); - assert(pointers[masked] > positions[masked]); - debug(printf("Storing masked %u at %u\n",masked,chrpos - 7)); - table[--pointers[masked]] = chrpos - 7; - } - - if (EXTRACT256(_counts,3)) { - masked = EXTRACT256(_masked,3); - assert(pointers[masked] > positions[masked]); - debug(printf("Storing masked %u at %u\n",masked,chrpos - 8)); - table[--pointers[masked]] = chrpos - 8; - } - - if (EXTRACT256(_counts,4)) { - masked = EXTRACT256(_masked,4); - assert(pointers[masked] > positions[masked]); - debug(printf("Storing masked %u at %u\n",masked,chrpos - 9)); - table[--pointers[masked]] = chrpos - 9; - } - - if (EXTRACT256(_counts,5)) { - masked = EXTRACT256(_masked,5); - assert(pointers[masked] > positions[masked]); - debug(printf("Storing masked %u at %u\n",masked,chrpos - 10)); - table[--pointers[masked]] = chrpos - 10; - } - - if (EXTRACT256(_counts,6)) { - masked = EXTRACT256(_masked,6); - assert(pointers[masked] > positions[masked]); - debug(printf("Storing masked %u at %u\n",masked,chrpos - 11)); - table[--pointers[masked]] = chrpos - 11; - } - - if (EXTRACT256(_counts,7)) { - masked = EXTRACT256(_masked,7); - assert(pointers[masked] > positions[masked]); - debug(printf("Storing masked %u at %u\n",masked,chrpos - 12)); - table[--pointers[masked]] = chrpos - 12; - } - - - _oligo = _mm256_srli_epi32(_oligo, 16); - _masked = _mm256_and_si256(_oligo, bigmask6); - _counts = _mm256_i32gather_epi32(counts,_masked,/*scale*/4); - - if (EXTRACT256(_counts,0)) { - masked = EXTRACT256(_masked,0); - assert(pointers[masked] > positions[masked]); - debug(printf("Storing masked %u at %u\n",masked,chrpos - 13)); - table[--pointers[masked]] = chrpos - 13; - } - - if (EXTRACT256(_counts,1)) { - masked = EXTRACT256(_masked,1); - assert(pointers[masked] > positions[masked]); - debug(printf("Storing masked %u at %u\n",masked,chrpos - 14)); - table[--pointers[masked]] = chrpos - 14; - } - - if (EXTRACT256(_counts,2)) { - masked = EXTRACT256(_masked,2); - assert(pointers[masked] > positions[masked]); - debug(printf("Storing masked %u at %u\n",masked,chrpos - 15)); - table[--pointers[masked]] = chrpos - 15; - } + masked = EXTRACT256(_masked,2); + INCR_COUNT(counts[masked]); + debug(printf("1 %04X => %d\n",masked,counts[masked])); + masked = EXTRACT256(_masked,3); + INCR_COUNT(counts[masked]); + debug(printf("0 %04X => %d\n",masked,counts[masked])); - oligo = low_rev >> 22; /* For 15..11 */ - oligo |= high_rev << 10; + return; +} - _oligo = _mm256_srlv_epi32(_mm256_set1_epi32(oligo),shift0to14); - _masked = _mm256_and_si256(_oligo, bigmask6); - _counts = _mm256_i32gather_epi32(counts,_masked,/*scale*/4); +#endif /* HAVE_AVX2 */ - if (EXTRACT256(_counts,0)) { - masked = EXTRACT256(_masked,0); - assert(pointers[masked] > positions[masked]); - debug(printf("Storing masked %u at %u\n",masked,chrpos - 16)); - table[--pointers[masked]] = chrpos - 16; - } - if (EXTRACT256(_counts,1)) { - masked = EXTRACT256(_masked,1); - assert(pointers[masked] > positions[masked]); - debug(printf("Storing masked %u at %u\n",masked,chrpos - 17)); - table[--pointers[masked]] = chrpos - 17; - } - if (EXTRACT256(_counts,2)) { - masked = EXTRACT256(_masked,2); - assert(pointers[masked] > positions[masked]); - debug(printf("Storing masked %u at %u\n",masked,chrpos - 18)); - table[--pointers[masked]] = chrpos - 18; - } +#ifdef HAVE_SSE2 +static void +extract_5mers_fwd_simd_64 (__m128i *out, __m128i current, __m128i next) { + __m128i oligo; - if (EXTRACT256(_counts,3)) { - masked = EXTRACT256(_masked,3); - assert(pointers[masked] > positions[masked]); - debug(printf("Storing masked %u at %u\n",masked,chrpos - 19)); - table[--pointers[masked]] = chrpos - 19; - } + _mm_store_si128(out++, _mm_srli_epi32(current,22)); /* No mask necessary */ + _mm_store_si128(out++, _mm_and_si128( _mm_srli_epi32(current,20), mask5)); + _mm_store_si128(out++, _mm_and_si128( _mm_srli_epi32(current,18), mask5)); + _mm_store_si128(out++, _mm_and_si128( _mm_srli_epi32(current,16), mask5)); + _mm_store_si128(out++, _mm_and_si128( _mm_srli_epi32(current,14), mask5)); + _mm_store_si128(out++, _mm_and_si128( _mm_srli_epi32(current,12), mask5)); + _mm_store_si128(out++, _mm_and_si128( _mm_srli_epi32(current,10), mask5)); + _mm_store_si128(out++, _mm_and_si128( _mm_srli_epi32(current,8), mask5)); + _mm_store_si128(out++, _mm_and_si128( _mm_srli_epi32(current,6), mask5)); + _mm_store_si128(out++, _mm_and_si128( _mm_srli_epi32(current,4), mask5)); + _mm_store_si128(out++, _mm_and_si128( _mm_srli_epi32(current,2), mask5)); + _mm_store_si128(out++, _mm_and_si128( current, mask5)); + oligo = _mm_or_si128( _mm_srli_epi32(next,24), _mm_slli_epi32(current,8)); + _mm_store_si128(out++, _mm_and_si128( _mm_srli_epi32(oligo,6), mask5)); + _mm_store_si128(out++, _mm_and_si128( _mm_srli_epi32(oligo,4), mask5)); + _mm_store_si128(out++, _mm_and_si128( _mm_srli_epi32(oligo,2), mask5)); + _mm_store_si128(out++, _mm_and_si128( oligo, mask5)); - if (EXTRACT256(_counts,4)) { - masked = EXTRACT256(_masked,4); - assert(pointers[masked] > positions[masked]); - debug(printf("Storing masked %u at %u\n",masked,chrpos - 20)); - table[--pointers[masked]] = chrpos - 20; - } + return; +} +#ifdef USE_UNORDERED_5 +static Chrpos_T +store_5mers_fwd_simd_64 (Chrpos_T chrpos, Chrpos_T *table, UINT4 *positions, Count_T *counts, + __m128i current, __m128i next) { + __m128i array[16]; + + extract_5mers_fwd_simd_64(array,current,next); + return store_fwdrev_simd_64(chrpos,table,positions,counts,(UINT4 *) array); +} +#else +/* Includes extract_5mers_fwd_simd_64_ordered (__m128i *out, __m128i current, __m128i next) */ +static Chrpos_T +store_5mers_fwd_simd_64 (Chrpos_T chrpos, Chrpos_T *table, UINT4 *positions, Count_T *counts, + __m128i current, __m128i next) { + __m128i array[16], *out; + __m128i oligo; + __m128i _t0, _t1, _t2, _t3, _t4, _t5, _t6, _t7; + __m128i _u0, _u1, _u2, _u3; - _oligo = _mm256_srlv_epi32(_mm256_set1_epi32(high_rev),shift0to14); - _masked = _mm256_and_si256(_oligo, bigmask6); - _counts = _mm256_i32gather_epi32(counts,_masked,/*scale*/4); + out = &(array[0]); - if (EXTRACT256(_counts,0)) { - masked = EXTRACT256(_masked,0); - assert(pointers[masked] > positions[masked]); - debug(printf("Storing masked %u at %u\n",masked,chrpos - 21)); - table[--pointers[masked]] = chrpos - 21; - } + oligo = _mm_or_si128( _mm_srli_epi32(next,24), _mm_slli_epi32(current,8)); + /* _row0 = _mm_and_si128( oligo, mask5); */ + /* _row1 = _mm_and_si128( _mm_srli_epi32(oligo,2), mask5); */ + _t0 = _mm_and_si128(_mm_blend_epi16(_mm_slli_epi32(oligo,14), oligo, 0x55), mask5_epi16); + + /* _row2 = _mm_and_si128( _mm_srli_epi32(oligo,4), mask5); */ + /* _row3 = _mm_and_si128( _mm_srli_epi32(oligo,6), mask5); */ + _t1 = _mm_and_si128(_mm_blend_epi16(_mm_slli_epi32(oligo,10), _mm_srli_epi32(oligo, 4), 0x55), mask5_epi16); + + + /* _row4 = _mm_and_si128( current, mask5); */ + /* _row5 = _mm_and_si128( _mm_srli_epi32(current,2), mask5); */ + _t2 = _mm_and_si128(_mm_blend_epi16(_mm_slli_epi32(current,14), current, 0x55), mask5_epi16); + + /* _row6 = _mm_and_si128( _mm_srli_epi32(current,4), mask5); */ + /* _row7 = _mm_and_si128( _mm_srli_epi32(current,6), mask5); */ + _t3 = _mm_and_si128(_mm_blend_epi16(_mm_slli_epi32(current,10), _mm_srli_epi32(current, 4), 0x55), mask5_epi16); + + /* _row8 = _mm_and_si128( _mm_srli_epi32(current,8), mask5); */ + /* _row9 = _mm_and_si128( _mm_srli_epi32(current,10), mask5); */ + _t4 = _mm_and_si128(_mm_blend_epi16(_mm_slli_epi32(current,6), _mm_srli_epi32(current, 8), 0x55), mask5_epi16); + + /* _row10 = _mm_and_si128( _mm_srli_epi32(current,12), mask5); */ + /* _row11 = _mm_and_si128( _mm_srli_epi32(current,14), mask5); */ + _t5 = _mm_and_si128(_mm_blend_epi16(_mm_slli_epi32(current,2), _mm_srli_epi32(current, 12), 0x55), mask5_epi16); + + /* _row12 = _mm_and_si128( _mm_srli_epi32(current,16), mask5); */ + /* _row13 = _mm_and_si128( _mm_srli_epi32(current,18), mask5); */ + _t6 = _mm_and_si128(_mm_blend_epi16(_mm_srli_epi32(current,2), _mm_srli_epi32(current, 16), 0x55), mask5_epi16); + + /* _row14 = _mm_and_si128( _mm_srli_epi32(current,20), mask5); */ + /* _row15 = _mm_srli_epi32(current,22); */ /* No mask necessary */ + _t7 = _mm_and_si128(_mm_blend_epi16(_mm_srli_epi32(current,6), _mm_srli_epi32(current, 20), 0x55), mask5_epi16); + + + /* Split: top half */ + _u0 = _mm_unpackhi_epi32(_t0,_t1); + _u1 = _mm_unpackhi_epi32(_t2,_t3); + _u2 = _mm_unpackhi_epi32(_t4,_t5); + _u3 = _mm_unpackhi_epi32(_t6,_t7); + + _mm_store_si128(out++, _mm_cvtepu16_epi32(_mm_srli_si128(_u0,8))); + _mm_store_si128(out++, _mm_cvtepu16_epi32(_mm_srli_si128(_u1,8))); + _mm_store_si128(out++, _mm_cvtepu16_epi32(_mm_srli_si128(_u2,8))); + _mm_store_si128(out++, _mm_cvtepu16_epi32(_mm_srli_si128(_u3,8))); + _mm_store_si128(out++, _mm_cvtepu16_epi32(_u0)); + _mm_store_si128(out++, _mm_cvtepu16_epi32(_u1)); + _mm_store_si128(out++, _mm_cvtepu16_epi32(_u2)); + _mm_store_si128(out++, _mm_cvtepu16_epi32(_u3)); + + /* Split: bottom half */ + _u0 = _mm_unpacklo_epi32(_t0,_t1); + _u1 = _mm_unpacklo_epi32(_t2,_t3); + _u2 = _mm_unpacklo_epi32(_t4,_t5); + _u3 = _mm_unpacklo_epi32(_t6,_t7); + + _mm_store_si128(out++, _mm_cvtepu16_epi32(_mm_srli_si128(_u0,8))); + _mm_store_si128(out++, _mm_cvtepu16_epi32(_mm_srli_si128(_u1,8))); + _mm_store_si128(out++, _mm_cvtepu16_epi32(_mm_srli_si128(_u2,8))); + _mm_store_si128(out++, _mm_cvtepu16_epi32(_mm_srli_si128(_u3,8))); + _mm_store_si128(out++, _mm_cvtepu16_epi32(_u0)); + _mm_store_si128(out++, _mm_cvtepu16_epi32(_u1)); + _mm_store_si128(out++, _mm_cvtepu16_epi32(_u2)); + _mm_store_si128(out++, _mm_cvtepu16_epi32(_u3)); - if (EXTRACT256(_counts,1)) { - masked = EXTRACT256(_masked,1); - assert(pointers[masked] > positions[masked]); - debug(printf("Storing masked %u at %u\n",masked,chrpos - 22)); - table[--pointers[masked]] = chrpos - 22; - } + return store_fwdrev_simd_64_ordered(chrpos,table,positions,counts,(UINT4 *) array); +} +#endif +#endif - if (EXTRACT256(_counts,2)) { - masked = EXTRACT256(_masked,2); - assert(pointers[masked] > positions[masked]); - debug(printf("Storing masked %u at %u\n",masked,chrpos - 23)); - table[--pointers[masked]] = chrpos - 23; - } +#ifdef HAVE_AVX2 +static void +extract_5mers_fwd_simd_128 (__m256i *out, __m256i current, __m256i next) { + __m256i oligo; - if (EXTRACT256(_counts,3)) { - masked = EXTRACT256(_masked,3); - assert(pointers[masked] > positions[masked]); - debug(printf("Storing masked %u at %u\n",masked,chrpos - 24)); - table[--pointers[masked]] = chrpos - 24; - } + _mm256_store_si256(out++, _mm256_srli_epi32(current,22)); /* No mask necessary */ + _mm256_store_si256(out++, _mm256_and_si256( _mm256_srli_epi32(current,20), bigmask5)); + _mm256_store_si256(out++, _mm256_and_si256( _mm256_srli_epi32(current,18), bigmask5)); + _mm256_store_si256(out++, _mm256_and_si256( _mm256_srli_epi32(current,16), bigmask5)); + _mm256_store_si256(out++, _mm256_and_si256( _mm256_srli_epi32(current,14), bigmask5)); + _mm256_store_si256(out++, _mm256_and_si256( _mm256_srli_epi32(current,12), bigmask5)); + _mm256_store_si256(out++, _mm256_and_si256( _mm256_srli_epi32(current,10), bigmask5)); + _mm256_store_si256(out++, _mm256_and_si256( _mm256_srli_epi32(current,8), bigmask5)); + _mm256_store_si256(out++, _mm256_and_si256( _mm256_srli_epi32(current,6), bigmask5)); + _mm256_store_si256(out++, _mm256_and_si256( _mm256_srli_epi32(current,4), bigmask5)); + _mm256_store_si256(out++, _mm256_and_si256( _mm256_srli_epi32(current,2), bigmask5)); + _mm256_store_si256(out++, _mm256_and_si256( current, bigmask5)); - if (EXTRACT256(_counts,4)) { - masked = EXTRACT256(_masked,4); - assert(pointers[masked] > positions[masked]); - debug(printf("Storing masked %u at %u\n",masked,chrpos - 25)); - table[--pointers[masked]] = chrpos - 25; - } + oligo = _mm256_or_si256( _mm256_srli_epi32(next,24), _mm256_slli_epi32(current,8)); + _mm256_store_si256(out++, _mm256_and_si256( _mm256_srli_epi32(oligo,6), bigmask5)); + _mm256_store_si256(out++, _mm256_and_si256( _mm256_srli_epi32(oligo,4), bigmask5)); + _mm256_store_si256(out++, _mm256_and_si256( _mm256_srli_epi32(oligo,2), bigmask5)); + _mm256_store_si256(out++, _mm256_and_si256( oligo, bigmask5)); - if (EXTRACT256(_counts,5)) { - masked = EXTRACT256(_masked,5); - assert(pointers[masked] > positions[masked]); - debug(printf("Storing masked %u at %u\n",masked,chrpos - 26)); - table[--pointers[masked]] = chrpos - 26; - } + return; +} - if (EXTRACT256(_counts,6)) { - masked = EXTRACT256(_masked,6); - assert(pointers[masked] > positions[masked]); - debug(printf("Storing masked %u at %u\n",masked,chrpos - 27)); - table[--pointers[masked]] = chrpos - 27; - } +#ifdef USE_UNORDERED_5 +static Chrpos_T +store_5mers_fwd_simd_128 (Chrpos_T chrpos, Chrpos_T *table, UINT4 *positions, Count_T *counts, + __m256i current, __m256i next) { + __m256i array[16]; + + extract_5mers_fwd_simd_128(array,current,next); + return store_fwdrev_simd_128(chrpos,table,positions,counts,(UINT4 *) array); +} - if (EXTRACT256(_counts,7)) { - masked = EXTRACT256(_masked,7); - assert(pointers[masked] > positions[masked]); - debug(printf("Storing masked %u at %u\n",masked,chrpos - 28)); - table[--pointers[masked]] = chrpos - 28; - } +#else +/* Includes extract_5mers_fwd_simd_128_ordered (__m256i *out, __m256i current, __m256i next) */ +static Chrpos_T +store_5mers_fwd_simd_128 (Chrpos_T chrpos, Chrpos_T *table, UINT4 *positions, Count_T *counts, + __m256i current, __m256i next) { + __m256i array[16], *out; + __m256i oligo; + __m256i _t0, _t1, _t2, _t3, _t4, _t5, _t6, _t7; + __m256i _u0, _u1, _u2, _u3, _u4, _u5, _u6, _u7; + out = &(array[0]); - _oligo = _mm256_srli_epi32(_oligo, 16); - _masked = _mm256_and_si256(_oligo, bigmask6); - _counts = _mm256_i32gather_epi32(counts,_masked,/*scale*/4); + oligo = _mm256_or_si256( _mm256_srli_epi32(next,24), _mm256_slli_epi32(current,8)); + /* _row0 = _mm256_and_si256( oligo, bigmask5); */ + /* _row1 = _mm256_and_si256( _mm256_srli_epi32(oligo,2), bigmask5); */ + _t0 = _mm256_and_si256(_mm256_blend_epi16(_mm256_slli_epi32(oligo,14), oligo, 0x55), bigmask5_epi16); + + /* _row2 = _mm256_and_si256( _mm256_srli_epi32(oligo,4), bigmask5); */ + /* _row3 = _mm256_and_si256( _mm256_srli_epi32(oligo,6), bigmask5); */ + _t1 = _mm256_and_si256(_mm256_blend_epi16(_mm256_slli_epi32(oligo,10), _mm256_srli_epi32(oligo,4), 0x55), bigmask5_epi16); + + + /* _row4 = _mm256_and_si256( current, bigmask5); */ + /* _row5 = _mm256_and_si256( _mm256_srli_epi32(current,2), bigmask5); */ + _t2 = _mm256_and_si256(_mm256_blend_epi16(_mm256_slli_epi32(current,14), current, 0x55), bigmask5_epi16); + + /* _row6 = _mm256_and_si256( _mm256_srli_epi32(current,4), bigmask5); */ + /* _row7 = _mm256_and_si256( _mm256_srli_epi32(current,6), bigmask5); */ + _t3 = _mm256_and_si256(_mm256_blend_epi16(_mm256_slli_epi32(current,10), _mm256_srli_epi32(current,4), 0x55), bigmask5_epi16); + + /* _row8 = _mm256_and_si256( _mm256_srli_epi32(current,8), bigmask5); */ + /* _row9 = _mm256_and_si256( _mm256_srli_epi32(current,10), bigmask5); */ + _t4 = _mm256_and_si256(_mm256_blend_epi16(_mm256_slli_epi32(current,6), _mm256_srli_epi32(current,8), 0x55), bigmask5_epi16); + + /* _row10 = _mm256_and_si256( _mm256_srli_epi32(current,12), bigmask5); */ + /* _row11 = _mm256_and_si256( _mm256_srli_epi32(current,14), bigmask5); */ + _t5 = _mm256_and_si256(_mm256_blend_epi16(_mm256_slli_epi32(current,2), _mm256_srli_epi32(current,12), 0x55), bigmask5_epi16); + + /* _row12 = _mm256_and_si256( _mm256_srli_epi32(current,16), bigmask5); */ + /* _row13 = _mm256_and_si256( _mm256_srli_epi32(current,18), bigmask5); */ + _t6 = _mm256_and_si256(_mm256_blend_epi16(_mm256_srli_epi32(current,2), _mm256_srli_epi32(current,16), 0x55), bigmask5_epi16); + + /* _row14 = _mm256_and_si256( _mm256_srli_epi32(current,20), bigmask5); */ + /* _row15 = _mm256_srli_epi32(current,22); */ /* No mask necessary */ + _t7 = _mm256_and_si256(_mm256_blend_epi16(_mm256_srli_epi32(current,6), _mm256_srli_epi32(current,20), 0x55), bigmask5_epi16); + + + _u0 = _mm256_unpackhi_epi32(_t0,_t1); + _u1 = _mm256_unpackhi_epi32(_t2,_t3); + _u2 = _mm256_unpackhi_epi32(_t4,_t5); + _u3 = _mm256_unpackhi_epi32(_t6,_t7); + _u4 = _mm256_unpacklo_epi32(_t0,_t1); + _u5 = _mm256_unpacklo_epi32(_t2,_t3); + _u6 = _mm256_unpacklo_epi32(_t4,_t5); + _u7 = _mm256_unpacklo_epi32(_t6,_t7); + + + _t0 = _mm256_unpackhi_epi64(_u0,_u1); + _t1 = _mm256_unpackhi_epi64(_u2,_u3); + _t2 = _mm256_unpacklo_epi64(_u0,_u1); + _t3 = _mm256_unpacklo_epi64(_u2,_u3); + _t4 = _mm256_unpackhi_epi64(_u4,_u5); + _t5 = _mm256_unpackhi_epi64(_u6,_u7); + _t6 = _mm256_unpacklo_epi64(_u4,_u5); + _t7 = _mm256_unpacklo_epi64(_u6,_u7); + + + _mm256_store_si256(out++, _mm256_cvtepu16_epi32(_mm256_extracti128_si256(_t0,1))); + _mm256_store_si256(out++, _mm256_cvtepu16_epi32(_mm256_extracti128_si256(_t1,1))); + _mm256_store_si256(out++, _mm256_cvtepu16_epi32(_mm256_extracti128_si256(_t2,1))); + _mm256_store_si256(out++, _mm256_cvtepu16_epi32(_mm256_extracti128_si256(_t3,1))); + _mm256_store_si256(out++, _mm256_cvtepu16_epi32(_mm256_extracti128_si256(_t4,1))); + _mm256_store_si256(out++, _mm256_cvtepu16_epi32(_mm256_extracti128_si256(_t5,1))); + _mm256_store_si256(out++, _mm256_cvtepu16_epi32(_mm256_extracti128_si256(_t6,1))); + _mm256_store_si256(out++, _mm256_cvtepu16_epi32(_mm256_extracti128_si256(_t7,1))); + _mm256_store_si256(out++, _mm256_cvtepu16_epi32(_mm256_extracti128_si256(_t0,0))); + _mm256_store_si256(out++, _mm256_cvtepu16_epi32(_mm256_extracti128_si256(_t1,0))); + _mm256_store_si256(out++, _mm256_cvtepu16_epi32(_mm256_extracti128_si256(_t2,0))); + _mm256_store_si256(out++, _mm256_cvtepu16_epi32(_mm256_extracti128_si256(_t3,0))); + _mm256_store_si256(out++, _mm256_cvtepu16_epi32(_mm256_extracti128_si256(_t4,0))); + _mm256_store_si256(out++, _mm256_cvtepu16_epi32(_mm256_extracti128_si256(_t5,0))); + _mm256_store_si256(out++, _mm256_cvtepu16_epi32(_mm256_extracti128_si256(_t6,0))); + _mm256_store_si256(out++, _mm256_cvtepu16_epi32(_mm256_extracti128_si256(_t7,0))); + + return store_fwdrev_simd_128_ordered(chrpos,table,positions,counts,(UINT4 *) array); +} +#endif +#endif + +#ifdef HAVE_AVX512 +static void +extract_5mers_fwd_simd_256 (__m512i *out, __m512i current, __m512i next) { + __m512i oligo; + + _mm512_store_si512(out++, _mm512_srli_epi32(current,22)); /* No mask necessary */ + _mm512_store_si512(out++, _mm512_and_si512( _mm512_srli_epi32(current,20), hugemask5)); + _mm512_store_si512(out++, _mm512_and_si512( _mm512_srli_epi32(current,18), hugemask5)); + _mm512_store_si512(out++, _mm512_and_si512( _mm512_srli_epi32(current,16), hugemask5)); + _mm512_store_si512(out++, _mm512_and_si512( _mm512_srli_epi32(current,14), hugemask5)); + _mm512_store_si512(out++, _mm512_and_si512( _mm512_srli_epi32(current,12), hugemask5)); + _mm512_store_si512(out++, _mm512_and_si512( _mm512_srli_epi32(current,10), hugemask5)); + _mm512_store_si512(out++, _mm512_and_si512( _mm512_srli_epi32(current,8), hugemask5)); + _mm512_store_si512(out++, _mm512_and_si512( _mm512_srli_epi32(current,6), hugemask5)); + _mm512_store_si512(out++, _mm512_and_si512( _mm512_srli_epi32(current,4), hugemask5)); + _mm512_store_si512(out++, _mm512_and_si512( _mm512_srli_epi32(current,2), hugemask5)); + _mm512_store_si512(out++, _mm512_and_si512( current, hugemask5)); + + oligo = _mm512_or_si512( _mm512_srli_epi32(next,24), _mm512_slli_epi32(current,8)); + _mm512_store_si512(out++, _mm512_and_si512( _mm512_srli_epi32(oligo,6), hugemask5)); + _mm512_store_si512(out++, _mm512_and_si512( _mm512_srli_epi32(oligo,4), hugemask5)); + _mm512_store_si512(out++, _mm512_and_si512( _mm512_srli_epi32(oligo,2), hugemask5)); + _mm512_store_si512(out++, _mm512_and_si512( oligo, hugemask5)); - if (EXTRACT256(_counts,0)) { - masked = EXTRACT256(_masked,0); - assert(pointers[masked] > positions[masked]); - debug(printf("Storing masked %u at %u\n",masked,chrpos - 29)); - table[--pointers[masked]] = chrpos - 29; - } + return; +} - if (EXTRACT256(_counts,1)) { - masked = EXTRACT256(_masked,1); - assert(pointers[masked] > positions[masked]); - debug(printf("Storing masked %u at %u\n",masked,chrpos - 30)); - table[--pointers[masked]] = chrpos - 30; - } +#ifdef USE_UNORDERED_5 +static Chrpos_T +store_5mers_fwd_simd_256 (Chrpos_T chrpos, Chrpos_T *table, UINT4 *positions, Count_T *counts, + __m512i current, __m512i next) { + __m512i array[16]; + + extract_5mers_fwd_simd_256(array,current,next); + return store_fwdrev_simd_256(chrpos,table,positions,counts,(UINT4 *) array); +} - if (EXTRACT256(_counts,2)) { - masked = EXTRACT256(_masked,2); - assert(pointers[masked] > positions[masked]); - debug(printf("Storing masked %u at %u\n",masked,chrpos - 31)); - table[--pointers[masked]] = chrpos - 31; - } +#else +/* Includes extract_5mers_fwd_simd_256_ordered (__m512i *out, __m512i current, __m512i next) */ +static Chrpos_T +store_5mers_fwd_simd_256 (Chrpos_T chrpos, Chrpos_T *table, UINT4 *positions, Count_T *counts, + __m512i current, __m512i next) { + __m512i array[16], *out; + __m512i oligo, _shuffle0, _shuffle1, _shuffle2; + __m512i _t0, _t1, _t2, _t3, _t4, _t5, _t6, _t7; + __m512i _u0, _u1, _u2, _u3, _u4, _u5, _u6, _u7; + + out = &(array[0]); + + oligo = _mm512_or_si512( _mm512_srli_epi32(next,24), _mm512_slli_epi32(current,8)); + _u0 = _mm512_and_si512( oligo, hugemask5); + /* _row1 = _mm512_and_si512( _mm512_srli_epi32(oligo,2), hugemask5); */ + _u1 = _mm512_and_si512( _mm512_slli_epi32(oligo,14), highmask5); + _t0 = _mm512_or_si512(_u0, _u1); + + _u0 = _mm512_and_si512( _mm512_srli_epi32(oligo,4), hugemask5); + /* _row3 = _mm512_and_si512( _mm512_srli_epi32(oligo,6), hugemask5); */ + _u1 = _mm512_and_si512( _mm512_slli_epi32(oligo,10), highmask5); + _t1 = _mm512_or_si512(_u0, _u1); + + + _u0 = _mm512_and_si512( current, hugemask5); + /* _row5 = _mm512_and_si512( _mm512_srli_epi32(current,2), hugemask5); */ + _u1 = _mm512_and_si512( _mm512_slli_epi32(current,14), highmask5); + _t2 = _mm512_or_si512(_u0, _u1); + + _u0 = _mm512_and_si512( _mm512_srli_epi32(current,4), hugemask5); + /* _row7 = _mm512_and_si512( _mm512_srli_epi32(current,6), hugemask5); */ + _u1 = _mm512_and_si512( _mm512_slli_epi32(current,10), highmask5); + _t3 = _mm512_or_si512(_u0, _u1); + + _u0 = _mm512_and_si512( _mm512_srli_epi32(current,8), hugemask5); + /* _row9 = _mm512_and_si512( _mm512_srli_epi32(current,10), hugemask5); */ + _u1 = _mm512_and_si512( _mm512_slli_epi32(current,6), highmask5); + _t4 = _mm512_or_si512(_u0, _u1); + + _u0 = _mm512_and_si512( _mm512_srli_epi32(current,12), hugemask5); + /* _row11 = _mm512_and_si512( _mm512_srli_epi32(current,14), hugemask5); */ + _u1 = _mm512_and_si512( _mm512_slli_epi32(current,2), highmask5); + _t5 = _mm512_or_si512(_u0, _u1); + + _u0 = _mm512_and_si512( _mm512_srli_epi32(current,16), hugemask5); + /* _row13 = _mm512_and_si512( _mm512_srli_epi32(current,18), hugemask5); */ + _u1 = _mm512_and_si512( _mm512_srli_epi32(current,2), highmask5); + _t6 = _mm512_or_si512(_u0, _u1); + + _u0 = _mm512_and_si512( _mm512_srli_epi32(current,20), hugemask5); + /* _row15 = _mm512_srli_epi32(current,22); */ /* No mask necessary */ + _u1 = _mm512_and_si512( _mm512_srli_epi32(current,6), highmask5); + _t7 = _mm512_or_si512(_u0, _u1); + + + _u0 = _mm512_unpackhi_epi32(_t0,_t1); + _u1 = _mm512_unpackhi_epi32(_t2,_t3); + _u2 = _mm512_unpackhi_epi32(_t4,_t5); + _u3 = _mm512_unpackhi_epi32(_t6,_t7); + _u4 = _mm512_unpacklo_epi32(_t0,_t1); + _u5 = _mm512_unpacklo_epi32(_t2,_t3); + _u6 = _mm512_unpacklo_epi32(_t4,_t5); + _u7 = _mm512_unpacklo_epi32(_t6,_t7); + + + /* Split: top half */ + _shuffle0 = _mm512_setr_epi64(7, 8+7, 6, 8+6, 5, 8+5, 4, 8+4); + _t0 = _mm512_permutex2var_epi64(_u0, _shuffle0, _u1); + _t1 = _mm512_permutex2var_epi64(_u2, _shuffle0, _u3); + _t2 = _mm512_permutex2var_epi64(_u4, _shuffle0, _u5); + _t3 = _mm512_permutex2var_epi64(_u6, _shuffle0, _u7); + + + _shuffle1 = _mm512_setr_epi64(0, 1, 8+0, 8+1, 2, 3, 8+2, 8+3); + _t7 = _mm512_permutex2var_epi64(_t0, _shuffle1, _t1); + _mm512_store_si512(out++, _mm512_cvtepu16_epi32(_mm512_extracti64x4_epi64(_t7,0))); + _mm512_store_si512(out++, _mm512_cvtepu16_epi32(_mm512_extracti64x4_epi64(_t7,1))); + + _t7 = _mm512_permutex2var_epi64(_t2, _shuffle1, _t3); + _mm512_store_si512(out++, _mm512_cvtepu16_epi32(_mm512_extracti64x4_epi64(_t7,0))); + _mm512_store_si512(out++, _mm512_cvtepu16_epi32(_mm512_extracti64x4_epi64(_t7,1))); + + _shuffle2 = _mm512_setr_epi64(4, 5, 8+4, 8+5, 6, 7, 8+6, 8+7); + _t7 = _mm512_permutex2var_epi64(_t0, _shuffle2, _t1); + _mm512_store_si512(out++, _mm512_cvtepu16_epi32(_mm512_extracti64x4_epi64(_t7,0))); + _mm512_store_si512(out++, _mm512_cvtepu16_epi32(_mm512_extracti64x4_epi64(_t7,1))); + + _t7 = _mm512_permutex2var_epi64(_t2, _shuffle2, _t3); + _mm512_store_si512(out++, _mm512_cvtepu16_epi32(_mm512_extracti64x4_epi64(_t7,0))); + _mm512_store_si512(out++, _mm512_cvtepu16_epi32(_mm512_extracti64x4_epi64(_t7,1))); + + + /* Split: bottom half */ + _shuffle0 = _mm512_setr_epi64(3, 8+3, 2, 8+2, 1, 8+1, 0, 8+0); + _t0 = _mm512_permutex2var_epi64(_u0, _shuffle0, _u1); + _t1 = _mm512_permutex2var_epi64(_u2, _shuffle0, _u3); + _t2 = _mm512_permutex2var_epi64(_u4, _shuffle0, _u5); + _t3 = _mm512_permutex2var_epi64(_u6, _shuffle0, _u7); + + + /* _shuffle1 = _mm512_setr_epi64(0, 1, 8+0, 8+1, 2, 3, 8+2, 8+3); */ + _t7 = _mm512_permutex2var_epi64(_t0, _shuffle1, _t1); + _mm512_store_si512(out++, _mm512_cvtepu16_epi32(_mm512_extracti64x4_epi64(_t7,0))); + _mm512_store_si512(out++, _mm512_cvtepu16_epi32(_mm512_extracti64x4_epi64(_t7,1))); + + _t7 = _mm512_permutex2var_epi64(_t2, _shuffle1, _t3); + _mm512_store_si512(out++, _mm512_cvtepu16_epi32(_mm512_extracti64x4_epi64(_t7,0))); + _mm512_store_si512(out++, _mm512_cvtepu16_epi32(_mm512_extracti64x4_epi64(_t7,1))); + + /* _shuffle2 = _mm512_setr_epi64(4, 5, 8+4, 8+5, 6, 7, 8+6, 8+7); */ + _t7 = _mm512_permutex2var_epi64(_t0, _shuffle2, _t1); + _mm512_store_si512(out++, _mm512_cvtepu16_epi32(_mm512_extracti64x4_epi64(_t7,0))); + _mm512_store_si512(out++, _mm512_cvtepu16_epi32(_mm512_extracti64x4_epi64(_t7,1))); + + _t7 = _mm512_permutex2var_epi64(_t2, _shuffle2, _t3); + _mm512_store_si512(out++, _mm512_cvtepu16_epi32(_mm512_extracti64x4_epi64(_t7,0))); + _mm512_store_si512(out++, _mm512_cvtepu16_epi32(_mm512_extracti64x4_epi64(_t7,1))); - return chrpos - 32; + return store_fwdrev_simd_256_ordered(chrpos,table,positions,counts,(UINT4 *) array); } - -#endif /* HAVE_AVX2 */ +#endif +#endif #if !defined(HAVE_AVX2) -static void -count_5mers_fwd (Count_T *counts, Inquery_T *inquery, Genomecomp_T high_rev, Genomecomp_T low_rev, Genomecomp_T nexthigh_rev) { +static int +store_5mers_fwd_32 (Chrpos_T chrpos, Chrpos_T *table, UINT4 *positions, Count_T *counts, + Genomecomp_T high_rev, Genomecomp_T low_rev, Genomecomp_T nexthigh_rev) { Genomecomp_T masked, oligo; #ifdef INDIVIDUAL_SHIFTS #elif defined(SIMD_MASK_THEN_STORE) @@ -24461,20 +18469,28 @@ #ifdef INDIVIDUAL_SHIFTS masked = oligo & MASK5; /* 31 */ - INCR_COUNT(counts[masked],inquery[masked]); - debug(printf("31 %04X => %d\n",masked,counts[masked])); + if (counts[masked]) { + debug(printf("Storing masked %u at %u\n",masked,chrpos)); + table[positions[masked] + (--counts[masked])] = chrpos; + } masked = (oligo >> 2) & MASK5; /* 30 */ - INCR_COUNT(counts[masked],inquery[masked]); - debug(printf("30 %04X => %d\n",masked,counts[masked])); + if (counts[masked]) { + debug(printf("Storing masked %u at %u\n",masked,chrpos - 1)); + table[positions[masked] + (--counts[masked])] = chrpos - 1; + } masked = (oligo >> 4) & MASK5; /* 29 */ - INCR_COUNT(counts[masked],inquery[masked]); - debug(printf("29 %04X => %d\n",masked,counts[masked])); + if (counts[masked]) { + debug(printf("Storing masked %u at %u\n",masked,chrpos - 2)); + table[positions[masked] + (--counts[masked])] = chrpos - 2; + } masked = (oligo >> 6) & MASK5; /* 28 */ - INCR_COUNT(counts[masked],inquery[masked]); - debug(printf("28 %04X => %d\n",masked,counts[masked])); + if (counts[masked]) { + debug(printf("Storing masked %u at %u\n",masked,chrpos - 3)); + table[positions[masked] + (--counts[masked])] = chrpos - 3; + } #else _oligo = _mm_setr_epi32(oligo, oligo >> 2, oligo >> 4, oligo >> 6); @@ -24485,72 +18501,104 @@ #endif masked = EXTRACT(_masked,0); - INCR_COUNT(counts[masked],inquery[masked]); - debug(printf("31 %04X => %d\n",masked,counts[masked])); + if (counts[masked]) { + debug(printf("Storing masked %u at %u\n",masked,chrpos - 4)); + table[positions[masked] + (--counts[masked])] = chrpos; + } masked = EXTRACT(_masked,1); - INCR_COUNT(counts[masked],inquery[masked]); - debug(printf("30 %04X => %d\n",masked,counts[masked])); + if (counts[masked]) { + debug(printf("Storing masked %u at %u\n",masked,chrpos - 1)); + table[positions[masked] + (--counts[masked])] = chrpos - 1; + } masked = EXTRACT(_masked,2); - INCR_COUNT(counts[masked],inquery[masked]); - debug(printf("29 %04X => %d\n",masked,counts[masked])); + if (counts[masked]) { + debug(printf("Storing masked %u at %u\n",masked,chrpos - 2)); + table[positions[masked] + (--counts[masked])] = chrpos - 2; + } masked = EXTRACT(_masked,3); - INCR_COUNT(counts[masked],inquery[masked]); - debug(printf("28 %04X => %d\n",masked,counts[masked])); + if (counts[masked]) { + debug(printf("Storing masked %u at %u\n",masked,chrpos - 3)); + table[positions[masked] + (--counts[masked])] = chrpos - 3; + } #endif #ifdef INDIVIDUAL_SHIFTS masked = low_rev & MASK5; /* 27 */ - INCR_COUNT(counts[masked],inquery[masked]); - debug(printf("27 %04X => %d\n",masked,counts[masked])); + if (counts[masked]) { + debug(printf("Storing masked %u at %u\n",masked,chrpos - 4)); + table[positions[masked] + (--counts[masked])] = chrpos - 4; + } masked = (low_rev >> 2) & MASK5; /* 26 */ - INCR_COUNT(counts[masked],inquery[masked]); - debug(printf("26 %04X => %d\n",masked,counts[masked])); + if (counts[masked]) { + debug(printf("Storing masked %u at %u\n",masked,chrpos - 5)); + table[positions[masked] + (--counts[masked])] = chrpos - 5; + } masked = (low_rev >> 4) & MASK5; /* 25 */ - INCR_COUNT(counts[masked],inquery[masked]); - debug(printf("25 %04X => %d\n",masked,counts[masked])); + if (counts[masked]) { + debug(printf("Storing masked %u at %u\n",masked,chrpos - 6)); + table[positions[masked] + (--counts[masked])] = chrpos - 6; + } masked = (low_rev >> 6) & MASK5; /* 24 */ - INCR_COUNT(counts[masked],inquery[masked]); - debug(printf("24 %04X => %d\n",masked,counts[masked])); + if (counts[masked]) { + debug(printf("Storing masked %u at %u\n",masked,chrpos - 7)); + table[positions[masked] + (--counts[masked])] = chrpos - 7; + } masked = (low_rev >> 8) & MASK5; /* 23 */ - INCR_COUNT(counts[masked],inquery[masked]); - debug(printf("23 %04X => %d\n",masked,counts[masked])); + if (counts[masked]) { + debug(printf("Storing masked %u at %u\n",masked,chrpos - 8)); + table[positions[masked] + (--counts[masked])] = chrpos - 8; + } masked = (low_rev >> 10) & MASK5; /* 22 */ - INCR_COUNT(counts[masked],inquery[masked]); - debug(printf("22 %04X => %d\n",masked,counts[masked])); + if (counts[masked]) { + debug(printf("Storing masked %u at %u\n",masked,chrpos - 9)); + table[positions[masked] + (--counts[masked])] = chrpos - 9; + } masked = (low_rev >> 12) & MASK5; /* 21 */ - INCR_COUNT(counts[masked],inquery[masked]); - debug(printf("21 %04X => %d\n",masked,counts[masked])); + if (counts[masked]) { + debug(printf("Storing masked %u at %u\n",masked,chrpos - 10)); + table[positions[masked] + (--counts[masked])] = chrpos - 10; + } masked = (low_rev >> 14) & MASK5; /* 20 */ - INCR_COUNT(counts[masked],inquery[masked]); - debug(printf("20 %04X => %d\n",masked,counts[masked])); + if (counts[masked]) { + debug(printf("Storing masked %u at %u\n",masked,chrpos - 11)); + table[positions[masked] + (--counts[masked])] = chrpos - 11; + } masked = (low_rev >> 16) & MASK5; /* 19 */ - INCR_COUNT(counts[masked],inquery[masked]); - debug(printf("19 %04X => %d\n",masked,counts[masked])); + if (counts[masked]) { + debug(printf("Storing masked %u at %u\n",masked,chrpos - 12)); + table[positions[masked] + (--counts[masked])] = chrpos - 12; + } masked = (low_rev >> 18) & MASK5; /* 18 */ - INCR_COUNT(counts[masked],inquery[masked]); - debug(printf("18 %04X => %d\n",masked,counts[masked])); + if (counts[masked]) { + debug(printf("Storing masked %u at %u\n",masked,chrpos - 13)); + table[positions[masked] + (--counts[masked])] = chrpos - 13; + } masked = (low_rev >> 20) & MASK5; /* 17 */ - INCR_COUNT(counts[masked],inquery[masked]); - debug(printf("17 %04X => %d\n",masked,counts[masked])); + if (counts[masked]) { + debug(printf("Storing masked %u at %u\n",masked,chrpos - 14)); + table[positions[masked] + (--counts[masked])] = chrpos - 14; + } masked = low_rev >> 22; /* 16, No mask necessary */ - INCR_COUNT(counts[masked],inquery[masked]); - debug(printf("16 %04X => %d\n",masked,counts[masked])); - + if (counts[masked]) { + debug(printf("Storing masked %u at %u\n",masked,chrpos - 15)); + table[positions[masked] + (--counts[masked])] = chrpos - 15; + } + #else _oligo = _mm_setr_epi32(low_rev, low_rev >> 2, low_rev >> 4, low_rev >> 6); #ifdef SIMD_MASK_THEN_STORE @@ -24560,20 +18608,28 @@ #endif masked = EXTRACT(_masked,0); - INCR_COUNT(counts[masked],inquery[masked]); - debug(printf("27 %04X => %d\n",masked,counts[masked])); + if (counts[masked]) { + debug(printf("Storing masked %u at %u\n",masked,chrpos - 4)); + table[positions[masked] + (--counts[masked])] = chrpos - 4; + } masked = EXTRACT(_masked,1); - INCR_COUNT(counts[masked],inquery[masked]); - debug(printf("26 %04X => %d\n",masked,counts[masked])); + if (counts[masked]) { + debug(printf("Storing masked %u at %u\n",masked,chrpos - 5)); + table[positions[masked] + (--counts[masked])] = chrpos - 5; + } masked = EXTRACT(_masked,2); - INCR_COUNT(counts[masked],inquery[masked]); - debug(printf("25 %04X => %d\n",masked,counts[masked])); + if (counts[masked]) { + debug(printf("Storing masked %u at %u\n",masked,chrpos - 6)); + table[positions[masked] + (--counts[masked])] = chrpos - 6; + } masked = EXTRACT(_masked,3); - INCR_COUNT(counts[masked],inquery[masked]); - debug(printf("24 %04X => %d\n",masked,counts[masked])); + if (counts[masked]) { + debug(printf("Storing masked %u at %u\n",masked,chrpos - 7)); + table[positions[masked] + (--counts[masked])] = chrpos - 7; + } _oligo = _mm_srli_epi32(_oligo, 8); @@ -24584,20 +18640,28 @@ #endif masked = EXTRACT(_masked,0); - INCR_COUNT(counts[masked],inquery[masked]); - debug(printf("23 %04X => %d\n",masked,counts[masked])); + if (counts[masked]) { + debug(printf("Storing masked %u at %u\n",masked,chrpos - 8)); + table[positions[masked] + (--counts[masked])] = chrpos - 8; + } masked = EXTRACT(_masked,1); - INCR_COUNT(counts[masked],inquery[masked]); - debug(printf("22 %04X => %d\n",masked,counts[masked])); + if (counts[masked]) { + debug(printf("Storing masked %u at %u\n",masked,chrpos - 9)); + table[positions[masked] + (--counts[masked])] = chrpos - 9; + } masked = EXTRACT(_masked,2); - INCR_COUNT(counts[masked],inquery[masked]); - debug(printf("21 %04X => %d\n",masked,counts[masked])); + if (counts[masked]) { + debug(printf("Storing masked %u at %u\n",masked,chrpos - 10)); + table[positions[masked] + (--counts[masked])] = chrpos - 10; + } masked = EXTRACT(_masked,3); - INCR_COUNT(counts[masked],inquery[masked]); - debug(printf("20 %04X => %d\n",masked,counts[masked])); + if (counts[masked]) { + debug(printf("Storing masked %u at %u\n",masked,chrpos - 11)); + table[positions[masked] + (--counts[masked])] = chrpos - 11; + } _oligo = _mm_srli_epi32(_oligo, 8); @@ -24608,20 +18672,28 @@ #endif masked = EXTRACT(_masked,0); - INCR_COUNT(counts[masked],inquery[masked]); - debug(printf("19 %04X => %d\n",masked,counts[masked])); + if (counts[masked]) { + debug(printf("Storing masked %u at %u\n",masked,chrpos - 12)); + table[positions[masked] + (--counts[masked])] = chrpos - 12; + } masked = EXTRACT(_masked,1); - INCR_COUNT(counts[masked],inquery[masked]); - debug(printf("18 %04X => %d\n",masked,counts[masked])); + if (counts[masked]) { + debug(printf("Storing masked %u at %u\n",masked,chrpos - 13)); + table[positions[masked] + (--counts[masked])] = chrpos - 13; + } masked = EXTRACT(_masked,2); - INCR_COUNT(counts[masked],inquery[masked]); - debug(printf("17 %04X => %d\n",masked,counts[masked])); + if (counts[masked]) { + debug(printf("Storing masked %u at %u\n",masked,chrpos - 14)); + table[positions[masked] + (--counts[masked])] = chrpos - 14; + } masked = EXTRACT(_masked,3); - INCR_COUNT(counts[masked],inquery[masked]); - debug(printf("16 %04X => %d\n",masked,counts[masked])); + if (counts[masked]) { + debug(printf("Storing masked %u at %u\n",masked,chrpos - 15)); + table[positions[masked] + (--counts[masked])] = chrpos - 15; + } #endif @@ -24630,20 +18702,28 @@ #ifdef INDIVIDUAL_SHIFTS masked = oligo & MASK5; /* 15 */ - INCR_COUNT(counts[masked],inquery[masked]); - debug(printf("15 %04X => %d\n",masked,counts[masked])); + if (counts[masked]) { + debug(printf("Storing masked %u at %u\n",masked,chrpos - 16)); + table[positions[masked] + (--counts[masked])] = chrpos - 16; + } masked = (oligo >> 2) & MASK5; /* 14 */ - INCR_COUNT(counts[masked],inquery[masked]); - debug(printf("14 %04X => %d\n",masked,counts[masked])); + if (counts[masked]) { + debug(printf("Storing masked %u at %u\n",masked,chrpos - 17)); + table[positions[masked] + (--counts[masked])] = chrpos - 17; + } masked = (oligo >> 4) & MASK5; /* 13 */ - INCR_COUNT(counts[masked],inquery[masked]); - debug(printf("13 %04X => %d\n",masked,counts[masked])); + if (counts[masked]) { + debug(printf("Storing masked %u at %u\n",masked,chrpos - 18)); + table[positions[masked] + (--counts[masked])] = chrpos - 18; + } masked = (oligo >> 6) & MASK5; /* 12 */ - INCR_COUNT(counts[masked],inquery[masked]); - debug(printf("12 %04X => %d\n",masked,counts[masked])); + if (counts[masked]) { + debug(printf("Storing masked %u at %u\n",masked,chrpos - 19)); + table[positions[masked] + (--counts[masked])] = chrpos - 19; + } #else _oligo = _mm_setr_epi32(oligo, oligo >> 2, oligo >> 4, oligo >> 6); @@ -24654,71 +18734,103 @@ #endif masked = EXTRACT(_masked,0); - INCR_COUNT(counts[masked],inquery[masked]); - debug(printf("15 %04X => %d\n",masked,counts[masked])); + if (counts[masked]) { + debug(printf("Storing masked %u at %u\n",masked,chrpos - 16)); + table[positions[masked] + (--counts[masked])] = chrpos - 16; + } masked = EXTRACT(_masked,1); - INCR_COUNT(counts[masked],inquery[masked]); - debug(printf("14 %04X => %d\n",masked,counts[masked])); + if (counts[masked]) { + debug(printf("Storing masked %u at %u\n",masked,chrpos - 17)); + table[positions[masked] + (--counts[masked])] = chrpos - 17; + } masked = EXTRACT(_masked,2); - INCR_COUNT(counts[masked],inquery[masked]); - debug(printf("13 %04X => %d\n",masked,counts[masked])); + if (counts[masked]) { + debug(printf("Storing masked %u at %u\n",masked,chrpos - 18)); + table[positions[masked] + (--counts[masked])] = chrpos - 18; + } masked = EXTRACT(_masked,3); - INCR_COUNT(counts[masked],inquery[masked]); - debug(printf("12 %04X => %d\n",masked,counts[masked])); + if (counts[masked]) { + debug(printf("Storing masked %u at %u\n",masked,chrpos - 19)); + table[positions[masked] + (--counts[masked])] = chrpos - 19; + } #endif #ifdef INDIVIDUAL_SHIFTS masked = high_rev & MASK5; /* 11 */ - INCR_COUNT(counts[masked],inquery[masked]); - debug(printf("11 %04X => %d\n",masked,counts[masked])); + if (counts[masked]) { + debug(printf("Storing masked %u at %u\n",masked,chrpos - 20)); + table[positions[masked] + (--counts[masked])] = chrpos - 20; + } masked = (high_rev >> 2) & MASK5; /* 10 */ - INCR_COUNT(counts[masked],inquery[masked]); - debug(printf("10 %04X => %d\n",masked,counts[masked])); + if (counts[masked]) { + debug(printf("Storing masked %u at %u\n",masked,chrpos - 21)); + table[positions[masked] + (--counts[masked])] = chrpos - 21; + } masked = (high_rev >> 4) & MASK5; /* 9 */ - INCR_COUNT(counts[masked],inquery[masked]); - debug(printf("9 %04X => %d\n",masked,counts[masked])); + if (counts[masked]) { + debug(printf("Storing masked %u at %u\n",masked,chrpos - 22)); + table[positions[masked] + (--counts[masked])] = chrpos - 22; + } masked = (high_rev >> 6) & MASK5; /* 8 */ - INCR_COUNT(counts[masked],inquery[masked]); - debug(printf("8 %04X => %d\n",masked,counts[masked])); + if (counts[masked]) { + debug(printf("Storing masked %u at %u\n",masked,chrpos - 23)); + table[positions[masked] + (--counts[masked])] = chrpos - 23; + } masked = (high_rev >> 8) & MASK5; /* 7 */ - INCR_COUNT(counts[masked],inquery[masked]); - debug(printf("7 %04X => %d\n",masked,counts[masked])); + if (counts[masked]) { + debug(printf("Storing masked %u at %u\n",masked,chrpos - 24)); + table[positions[masked] + (--counts[masked])] = chrpos - 24; + } masked = (high_rev >> 10) & MASK5; /* 6 */ - INCR_COUNT(counts[masked],inquery[masked]); - debug(printf("6 %04X => %d\n",masked,counts[masked])); + if (counts[masked]) { + debug(printf("Storing masked %u at %u\n",masked,chrpos - 25)); + table[positions[masked] + (--counts[masked])] = chrpos - 25; + } masked = (high_rev >> 12) & MASK5; /* 5 */ - INCR_COUNT(counts[masked],inquery[masked]); - debug(printf("5 %04X => %d\n",masked,counts[masked])); + if (counts[masked]) { + debug(printf("Storing masked %u at %u\n",masked,chrpos - 26)); + table[positions[masked] + (--counts[masked])] = chrpos - 26; + } masked = (high_rev >> 14) & MASK5; /* 4 */ - INCR_COUNT(counts[masked],inquery[masked]); - debug(printf("4 %04X => %d\n",masked,counts[masked])); + if (counts[masked]) { + debug(printf("Storing masked %u at %u\n",masked,chrpos - 27)); + table[positions[masked] + (--counts[masked])] = chrpos - 27; + } masked = (high_rev >> 16) & MASK5; /* 3 */ - INCR_COUNT(counts[masked],inquery[masked]); - debug(printf("3 %04X => %d\n",masked,counts[masked])); + if (counts[masked]) { + debug(printf("Storing masked %u at %u\n",masked,chrpos - 28)); + table[positions[masked] + (--counts[masked])] = chrpos - 28; + } masked = (high_rev >> 18) & MASK5; /* 2 */ - INCR_COUNT(counts[masked],inquery[masked]); - debug(printf("2 %04X => %d\n",masked,counts[masked])); + if (counts[masked]) { + debug(printf("Storing masked %u at %u\n",masked,chrpos - 29)); + table[positions[masked] + (--counts[masked])] = chrpos - 29; + } masked = (high_rev >> 20) & MASK5; /* 1 */ - INCR_COUNT(counts[masked],inquery[masked]); - debug(printf("1 %04X => %d\n",masked,counts[masked])); + if (counts[masked]) { + debug(printf("Storing masked %u at %u\n",masked,chrpos - 30)); + table[positions[masked] + (--counts[masked])] = chrpos - 30; + } masked = high_rev >> 22; /* 0, No mask necessary */ - INCR_COUNT(counts[masked],inquery[masked]); - debug(printf("0 %04X => %d\n",masked,counts[masked])); + if (counts[masked]) { + debug(printf("Storing masked %u at %u\n",masked,chrpos - 31)); + table[positions[masked] + (--counts[masked])] = chrpos - 31; + } #else _oligo = _mm_setr_epi32(high_rev, high_rev >> 2, high_rev >> 4, high_rev >> 6); @@ -24729,20 +18841,28 @@ #endif masked = EXTRACT(_masked,0); - INCR_COUNT(counts[masked],inquery[masked]); - debug(printf("11 %04X => %d\n",masked,counts[masked])); + if (counts[masked]) { + debug(printf("Storing masked %u at %u\n",masked,chrpos - 20)); + table[positions[masked] + (--counts[masked])] = chrpos - 20; + } masked = EXTRACT(_masked,1); - INCR_COUNT(counts[masked],inquery[masked]); - debug(printf("10 %04X => %d\n",masked,counts[masked])); + if (counts[masked]) { + debug(printf("Storing masked %u at %u\n",masked,chrpos - 21)); + table[positions[masked] + (--counts[masked])] = chrpos - 21; + } masked = EXTRACT(_masked,2); - INCR_COUNT(counts[masked],inquery[masked]); - debug(printf("9 %04X => %d\n",masked,counts[masked])); + if (counts[masked]) { + debug(printf("Storing masked %u at %u\n",masked,chrpos - 22)); + table[positions[masked] + (--counts[masked])] = chrpos - 22; + } masked = EXTRACT(_masked,3); - INCR_COUNT(counts[masked],inquery[masked]); - debug(printf("8 %04X => %d\n",masked,counts[masked])); + if (counts[masked]) { + debug(printf("Storing masked %u at %u\n",masked,chrpos - 23)); + table[positions[masked] + (--counts[masked])] = chrpos - 23; + } _oligo = _mm_srli_epi32(_oligo, 8); @@ -24753,20 +18873,28 @@ #endif masked = EXTRACT(_masked,0); - INCR_COUNT(counts[masked],inquery[masked]); - debug(printf("7 %04X => %d\n",masked,counts[masked])); + if (counts[masked]) { + debug(printf("Storing masked %u at %u\n",masked,chrpos - 24)); + table[positions[masked] + (--counts[masked])] = chrpos - 24; + } masked = EXTRACT(_masked,1); - INCR_COUNT(counts[masked],inquery[masked]); - debug(printf("6 %04X => %d\n",masked,counts[masked])); + if (counts[masked]) { + debug(printf("Storing masked %u at %u\n",masked,chrpos - 25)); + table[positions[masked] + (--counts[masked])] = chrpos - 25; + } masked = EXTRACT(_masked,2); - INCR_COUNT(counts[masked],inquery[masked]); - debug(printf("5 %04X => %d\n",masked,counts[masked])); + if (counts[masked]) { + debug(printf("Storing masked %u at %u\n",masked,chrpos - 26)); + table[positions[masked] + (--counts[masked])] = chrpos - 26; + } masked = EXTRACT(_masked,3); - INCR_COUNT(counts[masked],inquery[masked]); - debug(printf("4 %04X => %d\n",masked,counts[masked])); + if (counts[masked]) { + debug(printf("Storing masked %u at %u\n",masked,chrpos - 27)); + table[positions[masked] + (--counts[masked])] = chrpos - 27; + } _oligo = _mm_srli_epi32(_oligo, 8); @@ -24777,4582 +18905,878 @@ #endif masked = EXTRACT(_masked,0); - INCR_COUNT(counts[masked],inquery[masked]); - debug(printf("3 %04X => %d\n",masked,counts[masked])); + if (counts[masked]) { + debug(printf("Storing masked %u at %u\n",masked,chrpos - 28)); + table[positions[masked] + (--counts[masked])] = chrpos - 28; + } masked = EXTRACT(_masked,1); - INCR_COUNT(counts[masked],inquery[masked]); - debug(printf("2 %04X => %d\n",masked,counts[masked])); + if (counts[masked]) { + debug(printf("Storing masked %u at %u\n",masked,chrpos - 29)); + table[positions[masked] + (--counts[masked])] = chrpos - 29; + } masked = EXTRACT(_masked,2); - INCR_COUNT(counts[masked],inquery[masked]); - debug(printf("1 %04X => %d\n",masked,counts[masked])); + if (counts[masked]) { + debug(printf("Storing masked %u at %u\n",masked,chrpos - 30)); + table[positions[masked] + (--counts[masked])] = chrpos - 30; + } masked = EXTRACT(_masked,3); - INCR_COUNT(counts[masked],inquery[masked]); - debug(printf("0 %04X => %d\n",masked,counts[masked])); + if (counts[masked]) { + debug(printf("Storing masked %u at %u\n",masked,chrpos - 31)); + table[positions[masked] + (--counts[masked])] = chrpos - 31; + } #endif - return; + return chrpos - 32; } #else /* HAVE_AVX2 */ -static void -count_5mers_fwd (Count_T *counts, Inquery_T *inquery, Genomecomp_T high_rev, Genomecomp_T low_rev, Genomecomp_T nexthigh_rev) { +static int +store_5mers_fwd_32 (Chrpos_T chrpos, Chrpos_T *table, UINT4 *positions, Count_T *counts, + Genomecomp_T high_rev, Genomecomp_T low_rev, Genomecomp_T nexthigh_rev) { Genomecomp_T masked, oligo; - __m256i _oligo, _masked; -#ifdef CHECK_FOR_OVERFLOW - __m256i _counts_after, _counts_neg; -#endif + __m256i _oligo, _masked, _counts; + __m256i _blocks, _envelopes, _addresses, _address_mask, _count_mask; + + + _address_mask = _mm256_set1_epi32(0x3); + _count_mask = _mm256_set1_epi32(0xFF); oligo = nexthigh_rev >> 24; /* For 31..28 */ oligo |= low_rev << 8; - _oligo = _mm256_srlv_epi32(_mm256_set1_epi32(oligo),shift0to14); + _oligo = _mm256_srlv_epi32(_mm256_set1_epi32(oligo),bigshift0to14); _masked = _mm256_and_si256(_oligo, bigmask5); - masked = EXTRACT256(_masked,0); - counts[masked] += 1; - debug(printf("31 %04X => %d\n",masked,counts[masked])); - - masked = EXTRACT256(_masked,1); - counts[masked] += 1; - debug(printf("30 %04X => %d\n",masked,counts[masked])); + _blocks = _mm256_srli_epi32(_masked,2); /* div by 4 bytes/int */ + _addresses = _mm256_and_si256(_masked,_address_mask); + _addresses = _mm256_slli_epi32(_addresses,3); /* Multiply by 8 bits/byte */ + _envelopes = _mm256_i32gather_epi32((const int *) counts,_blocks,/*scale*/4); + _counts = _mm256_srlv_epi32(_envelopes,_addresses); /* Puts byte on right */ + _counts = _mm256_and_si256(_counts,_count_mask); - masked = EXTRACT256(_masked,2); - counts[masked] += 1; - debug(printf("29 %04X => %d\n",masked,counts[masked])); - - masked = EXTRACT256(_masked,3); - counts[masked] += 1; - debug(printf("28 %04X => %d\n",masked,counts[masked])); - -#ifdef CHECK_FOR_OVERFLOW - _counts_after = _mm256_i32gather_epi32(counts,_masked,/*shift*/4); - _counts_after = _mm256_and_si256(_counts_after,low4); - _counts_neg = _mm256_cmpgt_epi32(_counts_after,maxcount256); - if (_mm256_testz_si256(_counts_neg,_counts_neg) == 0) { - debug(printf("Exceeded maximum counts\n")); - if (EXTRACT256(_counts_neg,0)) { - inquery[EXTRACT256(_masked,0)] = INQUERY_FALSE; - } - if (EXTRACT256(_counts_neg,1)) { - inquery[EXTRACT256(_masked,1)] = INQUERY_FALSE; - } - if (EXTRACT256(_counts_neg,2)) { - inquery[EXTRACT256(_masked,2)] = INQUERY_FALSE; - } - if (EXTRACT256(_counts_neg,3)) { - inquery[EXTRACT256(_masked,3)] = INQUERY_FALSE; + if (EXTRACT256(_counts,0)) { + masked = EXTRACT256(_masked,0); + if (counts[masked]) { /* Have to re-check if there is a conflict */ + debug(printf("Storing masked %u at %u\n",masked,chrpos)); + table[positions[masked] + (--counts[masked])] = chrpos; } } -#endif /* CHECK_FOR_OVERFLOW */ - - _oligo = _mm256_srlv_epi32(_mm256_set1_epi32(low_rev),shift0to14); - _masked = _mm256_and_si256(_oligo, bigmask5); - - masked = EXTRACT256(_masked,0); - counts[masked] += 1; - debug(printf("27 %04X => %d\n",masked,counts[masked])); - - masked = EXTRACT256(_masked,1); - counts[masked] += 1; - debug(printf("26 %04X => %d\n",masked,counts[masked])); - masked = EXTRACT256(_masked,2); - counts[masked] += 1; - debug(printf("25 %04X => %d\n",masked,counts[masked])); + if (EXTRACT256(_counts,1)) { + masked = EXTRACT256(_masked,1); + if (counts[masked]) { /* Have to re-check if there is a conflict */ + debug(printf("Storing masked %u at %u\n",masked,chrpos - 1)); + table[positions[masked] + (--counts[masked])] = chrpos - 1; + } + } - masked = EXTRACT256(_masked,3); - counts[masked] += 1; - debug(printf("24 %04X => %d\n",masked,counts[masked])); + if (EXTRACT256(_counts,2)) { + masked = EXTRACT256(_masked,2); + if (counts[masked]) { /* Have to re-check if there is a conflict */ + debug(printf("Storing masked %u at %u\n",masked,chrpos - 2)); + table[positions[masked] + (--counts[masked])] = chrpos - 2; + } + } - masked = EXTRACT256(_masked,4); - counts[masked] += 1; - debug(printf("23 %04X => %d\n",masked,counts[masked])); + if (EXTRACT256(_counts,3)) { + masked = EXTRACT256(_masked,3); + if (counts[masked]) { /* Have to re-check if there is a conflict */ + debug(printf("Storing masked %u at %u\n",masked,chrpos - 3)); + table[positions[masked] + (--counts[masked])] = chrpos - 3; + } + } - masked = EXTRACT256(_masked,5); - counts[masked] += 1; - debug(printf("22 %04X => %d\n",masked,counts[masked])); - masked = EXTRACT256(_masked,6); - counts[masked] += 1; - debug(printf("21 %04X => %d\n",masked,counts[masked])); + _oligo = _mm256_srlv_epi32(_mm256_set1_epi32(low_rev),bigshift0to14); + _masked = _mm256_and_si256(_oligo, bigmask5); - masked = EXTRACT256(_masked,7); - counts[masked] += 1; - debug(printf("20 %04X => %d\n",masked,counts[masked])); + _blocks = _mm256_srli_epi32(_masked,2); /* div by 4 bytes/int */ + _addresses = _mm256_and_si256(_masked,_address_mask); + _addresses = _mm256_slli_epi32(_addresses,3); /* Multiply by 8 bits/byte */ + _envelopes = _mm256_i32gather_epi32((const int *) counts,_blocks,/*scale*/4); + _counts = _mm256_srlv_epi32(_envelopes,_addresses); /* Puts byte on right */ + _counts = _mm256_and_si256(_counts,_count_mask); -#ifdef CHECK_FOR_OVERFLOW - _counts_after = _mm256_i32gather_epi32(counts,_masked,/*shift*/4); - /* _counts_after = _mm256_and_si256(_counts_after,low8); */ - _counts_neg = _mm256_cmpgt_epi32(_counts_after,maxcount256); - if (_mm256_testz_si256(_counts_neg,_counts_neg) == 0) { - debug(printf("Exceeded maximum counts\n")); - if (EXTRACT256(_counts_neg,0)) { - inquery[EXTRACT256(_masked,0)] = INQUERY_FALSE; + if (EXTRACT256(_counts,0)) { + masked = EXTRACT256(_masked,0); + if (counts[masked]) { /* Have to re-check if there is a conflict */ + debug(printf("Storing masked %u at %u\n",masked,chrpos - 4)); + table[positions[masked] + (--counts[masked])] = chrpos - 4; } - if (EXTRACT256(_counts_neg,1)) { - inquery[EXTRACT256(_masked,1)] = INQUERY_FALSE; + } + + if (EXTRACT256(_counts,1)) { + masked = EXTRACT256(_masked,1); + if (counts[masked]) { /* Have to re-check if there is a conflict */ + debug(printf("Storing masked %u at %u\n",masked,chrpos - 5)); + table[positions[masked] + (--counts[masked])] = chrpos - 5; } - if (EXTRACT256(_counts_neg,2)) { - inquery[EXTRACT256(_masked,2)] = INQUERY_FALSE; + } + + if (EXTRACT256(_counts,2)) { + masked = EXTRACT256(_masked,2); + if (counts[masked]) { /* Have to re-check if there is a conflict */ + debug(printf("Storing masked %u at %u\n",masked,chrpos - 6)); + table[positions[masked] + (--counts[masked])] = chrpos - 6; } - if (EXTRACT256(_counts_neg,3)) { - inquery[EXTRACT256(_masked,3)] = INQUERY_FALSE; + } + + if (EXTRACT256(_counts,3)) { + masked = EXTRACT256(_masked,3); + if (counts[masked]) { /* Have to re-check if there is a conflict */ + debug(printf("Storing masked %u at %u\n",masked,chrpos - 7)); + table[positions[masked] + (--counts[masked])] = chrpos - 7; } - if (EXTRACT256(_counts_neg,4)) { - inquery[EXTRACT256(_masked,4)] = INQUERY_FALSE; + } + + if (EXTRACT256(_counts,4)) { + masked = EXTRACT256(_masked,4); + if (counts[masked]) { /* Have to re-check if there is a conflict */ + debug(printf("Storing masked %u at %u\n",masked,chrpos - 8)); + table[positions[masked] + (--counts[masked])] = chrpos - 8; } - if (EXTRACT256(_counts_neg,5)) { - inquery[EXTRACT256(_masked,5)] = INQUERY_FALSE; + } + + if (EXTRACT256(_counts,5)) { + masked = EXTRACT256(_masked,5); + if (counts[masked]) { /* Have to re-check if there is a conflict */ + debug(printf("Storing masked %u at %u\n",masked,chrpos - 9)); + table[positions[masked] + (--counts[masked])] = chrpos - 9; } - if (EXTRACT256(_counts_neg,6)) { - inquery[EXTRACT256(_masked,6)] = INQUERY_FALSE; + } + + if (EXTRACT256(_counts,6)) { + masked = EXTRACT256(_masked,6); + if (counts[masked]) { /* Have to re-check if there is a conflict */ + debug(printf("Storing masked %u at %u\n",masked,chrpos - 10)); + table[positions[masked] + (--counts[masked])] = chrpos - 10; } - if (EXTRACT256(_counts_neg,7)) { - inquery[EXTRACT256(_masked,7)] = INQUERY_FALSE; + } + + if (EXTRACT256(_counts,7)) { + masked = EXTRACT256(_masked,7); + if (counts[masked]) { /* Have to re-check if there is a conflict */ + debug(printf("Storing masked %u at %u\n",masked,chrpos - 11)); + table[positions[masked] + (--counts[masked])] = chrpos - 11; } } -#endif /* CHECK_FOR_OVERFLOW */ _oligo = _mm256_srli_epi32(_oligo, 16); _masked = _mm256_and_si256(_oligo, bigmask5); - masked = EXTRACT256(_masked,0); - counts[masked] += 1; - debug(printf("19 %04X => %d\n",masked,counts[masked])); - - masked = EXTRACT256(_masked,1); - counts[masked] += 1; - debug(printf("18 %04X => %d\n",masked,counts[masked])); - - masked = EXTRACT256(_masked,2); - counts[masked] += 1; - debug(printf("17 %04X => %d\n",masked,counts[masked])); - - masked = EXTRACT256(_masked,3); - counts[masked] += 1; - debug(printf("16 %04X => %d\n",masked,counts[masked])); + _blocks = _mm256_srli_epi32(_masked,2); /* div by 4 bytes/int */ + _addresses = _mm256_and_si256(_masked,_address_mask); + _addresses = _mm256_slli_epi32(_addresses,3); /* Multiply by 8 bits/byte */ + _envelopes = _mm256_i32gather_epi32((const int *) counts,_blocks,/*scale*/4); + _counts = _mm256_srlv_epi32(_envelopes,_addresses); /* Puts byte on right */ + _counts = _mm256_and_si256(_counts,_count_mask); -#ifdef CHECK_FOR_OVERFLOW - _counts_after = _mm256_i32gather_epi32(counts,_masked,/*shift*/4); - _counts_after = _mm256_and_si256(_counts_after,low4); - _counts_neg = _mm256_cmpgt_epi32(_counts_after,maxcount256); - if (_mm256_testz_si256(_counts_neg,_counts_neg) == 0) { - debug(printf("Exceeded maximum counts\n")); - if (EXTRACT256(_counts_neg,0)) { - inquery[EXTRACT256(_masked,0)] = INQUERY_FALSE; + if (EXTRACT256(_counts,0)) { + masked = EXTRACT256(_masked,0); + if (counts[masked]) { /* Have to re-check if there is a conflict */ + debug(printf("Storing masked %u at %u\n",masked,chrpos - 12)); + table[positions[masked] + (--counts[masked])] = chrpos - 12; } - if (EXTRACT256(_counts_neg,1)) { - inquery[EXTRACT256(_masked,1)] = INQUERY_FALSE; + } + + if (EXTRACT256(_counts,1)) { + masked = EXTRACT256(_masked,1); + if (counts[masked]) { /* Have to re-check if there is a conflict */ + debug(printf("Storing masked %u at %u\n",masked,chrpos - 13)); + table[positions[masked] + (--counts[masked])] = chrpos - 13; } - if (EXTRACT256(_counts_neg,2)) { - inquery[EXTRACT256(_masked,2)] = INQUERY_FALSE; + } + + if (EXTRACT256(_counts,2)) { + masked = EXTRACT256(_masked,2); + if (counts[masked]) { /* Have to re-check if there is a conflict */ + debug(printf("Storing masked %u at %u\n",masked,chrpos - 14)); + table[positions[masked] + (--counts[masked])] = chrpos - 14; } - if (EXTRACT256(_counts_neg,3)) { - inquery[EXTRACT256(_masked,3)] = INQUERY_FALSE; + } + + if (EXTRACT256(_counts,3)) { + masked = EXTRACT256(_masked,3); + if (counts[masked]) { /* Have to re-check if there is a conflict */ + debug(printf("Storing masked %u at %u\n",masked,chrpos - 15)); + table[positions[masked] + (--counts[masked])] = chrpos - 15; } } -#endif /* CHECK_FOR_OVERFLOW */ oligo = low_rev >> 24; /* For 15..12 */ oligo |= high_rev << 8; - _oligo = _mm256_srlv_epi32(_mm256_set1_epi32(oligo),shift0to14); + _oligo = _mm256_srlv_epi32(_mm256_set1_epi32(oligo),bigshift0to14); _masked = _mm256_and_si256(_oligo, bigmask5); - masked = EXTRACT256(_masked,0); - counts[masked] += 1; - debug(printf("15 %04X => %d\n",masked,counts[masked])); - - masked = EXTRACT256(_masked,1); - counts[masked] += 1; - debug(printf("14 %04X => %d\n",masked,counts[masked])); - - masked = EXTRACT256(_masked,2); - counts[masked] += 1; - debug(printf("13 %04X => %d\n",masked,counts[masked])); - - masked = EXTRACT256(_masked,3); - counts[masked] += 1; - debug(printf("12 %04X => %d\n",masked,counts[masked])); + _blocks = _mm256_srli_epi32(_masked,2); /* div by 4 bytes/int */ + _addresses = _mm256_and_si256(_masked,_address_mask); + _addresses = _mm256_slli_epi32(_addresses,3); /* Multiply by 8 bits/byte */ + _envelopes = _mm256_i32gather_epi32((const int *) counts,_blocks,/*scale*/4); + _counts = _mm256_srlv_epi32(_envelopes,_addresses); /* Puts byte on right */ + _counts = _mm256_and_si256(_counts,_count_mask); -#ifdef CHECK_FOR_OVERFLOW - _counts_after = _mm256_i32gather_epi32(counts,_masked,/*shift*/4); - _counts_after = _mm256_and_si256(_counts_after,low4); - _counts_neg = _mm256_cmpgt_epi32(_counts_after,maxcount256); - if (_mm256_testz_si256(_counts_neg,_counts_neg) == 0) { - debug(printf("Exceeded maximum counts\n")); - if (EXTRACT256(_counts_neg,0)) { - inquery[EXTRACT256(_masked,0)] = INQUERY_FALSE; + if (EXTRACT256(_counts,0)) { + masked = EXTRACT256(_masked,0); + if (counts[masked]) { /* Have to re-check if there is a conflict */ + debug(printf("Storing masked %u at %u\n",masked,chrpos - 16)); + table[positions[masked] + (--counts[masked])] = chrpos - 16; } - if (EXTRACT256(_counts_neg,1)) { - inquery[EXTRACT256(_masked,1)] = INQUERY_FALSE; + } + + if (EXTRACT256(_counts,1)) { + masked = EXTRACT256(_masked,1); + if (counts[masked]) { /* Have to re-check if there is a conflict */ + debug(printf("Storing masked %u at %u\n",masked,chrpos - 17)); + table[positions[masked] + (--counts[masked])] = chrpos - 17; } - if (EXTRACT256(_counts_neg,2)) { - inquery[EXTRACT256(_masked,2)] = INQUERY_FALSE; + } + + if (EXTRACT256(_counts,2)) { + masked = EXTRACT256(_masked,2); + if (counts[masked]) { /* Have to re-check if there is a conflict */ + debug(printf("Storing masked %u at %u\n",masked,chrpos - 18)); + table[positions[masked] + (--counts[masked])] = chrpos - 18; } - if (EXTRACT256(_counts_neg,3)) { - inquery[EXTRACT256(_masked,3)] = INQUERY_FALSE; + } + + if (EXTRACT256(_counts,3)) { + masked = EXTRACT256(_masked,3); + if (counts[masked]) { /* Have to re-check if there is a conflict */ + debug(printf("Storing masked %u at %u\n",masked,chrpos - 19)); + table[positions[masked] + (--counts[masked])] = chrpos - 19; } } -#endif /* CHECK_FOR_OVERFLOW */ - _oligo = _mm256_srlv_epi32(_mm256_set1_epi32(high_rev),shift0to14); + _oligo = _mm256_srlv_epi32(_mm256_set1_epi32(high_rev),bigshift0to14); _masked = _mm256_and_si256(_oligo, bigmask5); - masked = EXTRACT256(_masked,0); - counts[masked] += 1; - debug(printf("11 %04X => %d\n",masked,counts[masked])); - - masked = EXTRACT256(_masked,1); - counts[masked] += 1; - debug(printf("10 %04X => %d\n",masked,counts[masked])); - - masked = EXTRACT256(_masked,2); - counts[masked] += 1; - debug(printf("9 %04X => %d\n",masked,counts[masked])); - - masked = EXTRACT256(_masked,3); - counts[masked] += 1; - debug(printf("8 %04X => %d\n",masked,counts[masked])); - - masked = EXTRACT256(_masked,4); - counts[masked] += 1; - debug(printf("7 %04X => %d\n",masked,counts[masked])); - - masked = EXTRACT256(_masked,5); - counts[masked] += 1; - debug(printf("6 %04X => %d\n",masked,counts[masked])); - - masked = EXTRACT256(_masked,6); - counts[masked] += 1; - debug(printf("5 %04X => %d\n",masked,counts[masked])); + _blocks = _mm256_srli_epi32(_masked,2); /* div by 4 bytes/int */ + _addresses = _mm256_and_si256(_masked,_address_mask); + _addresses = _mm256_slli_epi32(_addresses,3); /* Multiply by 8 bits/byte */ + _envelopes = _mm256_i32gather_epi32((const int *) counts,_blocks,/*scale*/4); + _counts = _mm256_srlv_epi32(_envelopes,_addresses); /* Puts byte on right */ + _counts = _mm256_and_si256(_counts,_count_mask); - masked = EXTRACT256(_masked,7); - counts[masked] += 1; - debug(printf("4 %04X => %d\n",masked,counts[masked])); - -#ifdef CHECK_FOR_OVERFLOW - _counts_after = _mm256_i32gather_epi32(counts,_masked,/*shift*/4); - /* _counts_after = _mm256_and_si256(_counts_after,low8); */ - _counts_neg = _mm256_cmpgt_epi32(_counts_after,maxcount256); - if (_mm256_testz_si256(_counts_neg,_counts_neg) == 0) { - debug(printf("Exceeded maximum counts\n")); - if (EXTRACT256(_counts_neg,0)) { - inquery[EXTRACT256(_masked,0)] = INQUERY_FALSE; + if (EXTRACT256(_counts,0)) { + masked = EXTRACT256(_masked,0); + if (counts[masked]) { /* Have to re-check if there is a conflict */ + debug(printf("Storing masked %u at %u\n",masked,chrpos - 20)); + table[positions[masked] + (--counts[masked])] = chrpos - 20; } - if (EXTRACT256(_counts_neg,1)) { - inquery[EXTRACT256(_masked,1)] = INQUERY_FALSE; + } + + if (EXTRACT256(_counts,1)) { + masked = EXTRACT256(_masked,1); + if (counts[masked]) { /* Have to re-check if there is a conflict */ + debug(printf("Storing masked %u at %u\n",masked,chrpos - 21)); + table[positions[masked] + (--counts[masked])] = chrpos - 21; } - if (EXTRACT256(_counts_neg,2)) { - inquery[EXTRACT256(_masked,2)] = INQUERY_FALSE; + } + + if (EXTRACT256(_counts,2)) { + masked = EXTRACT256(_masked,2); + if (counts[masked]) { /* Have to re-check if there is a conflict */ + debug(printf("Storing masked %u at %u\n",masked,chrpos - 22)); + table[positions[masked] + (--counts[masked])] = chrpos - 22; } - if (EXTRACT256(_counts_neg,3)) { - inquery[EXTRACT256(_masked,3)] = INQUERY_FALSE; + } + + if (EXTRACT256(_counts,3)) { + masked = EXTRACT256(_masked,3); + if (counts[masked]) { /* Have to re-check if there is a conflict */ + debug(printf("Storing masked %u at %u\n",masked,chrpos - 23)); + table[positions[masked] + (--counts[masked])] = chrpos - 23; } - if (EXTRACT256(_counts_neg,4)) { - inquery[EXTRACT256(_masked,4)] = INQUERY_FALSE; + } + + if (EXTRACT256(_counts,4)) { + masked = EXTRACT256(_masked,4); + if (counts[masked]) { /* Have to re-check if there is a conflict */ + debug(printf("Storing masked %u at %u\n",masked,chrpos - 24)); + table[positions[masked] + (--counts[masked])] = chrpos - 24; } - if (EXTRACT256(_counts_neg,5)) { - inquery[EXTRACT256(_masked,5)] = INQUERY_FALSE; + } + + if (EXTRACT256(_counts,5)) { + masked = EXTRACT256(_masked,5); + if (counts[masked]) { /* Have to re-check if there is a conflict */ + debug(printf("Storing masked %u at %u\n",masked,chrpos - 25)); + table[positions[masked] + (--counts[masked])] = chrpos - 25; } - if (EXTRACT256(_counts_neg,6)) { - inquery[EXTRACT256(_masked,6)] = INQUERY_FALSE; + } + + if (EXTRACT256(_counts,6)) { + masked = EXTRACT256(_masked,6); + if (counts[masked]) { /* Have to re-check if there is a conflict */ + debug(printf("Storing masked %u at %u\n",masked,chrpos - 26)); + table[positions[masked] + (--counts[masked])] = chrpos - 26; } - if (EXTRACT256(_counts_neg,7)) { - inquery[EXTRACT256(_masked,7)] = INQUERY_FALSE; + } + + if (EXTRACT256(_counts,7)) { + masked = EXTRACT256(_masked,7); + if (counts[masked]) { /* Have to re-check if there is a conflict */ + debug(printf("Storing masked %u at %u\n",masked,chrpos - 27)); + table[positions[masked] + (--counts[masked])] = chrpos - 27; } } -#endif /* CHECK_FOR_OVERFLOW */ _oligo = _mm256_srli_epi32(_oligo, 16); _masked = _mm256_and_si256(_oligo, bigmask5); - masked = EXTRACT256(_masked,0); - counts[masked] += 1; - debug(printf("3 %04X => %d\n",masked,counts[masked])); - - masked = EXTRACT256(_masked,1); - counts[masked] += 1; - debug(printf("2 %04X => %d\n",masked,counts[masked])); - - masked = EXTRACT256(_masked,2); - counts[masked] += 1; - debug(printf("1 %04X => %d\n",masked,counts[masked])); - - masked = EXTRACT256(_masked,3); - counts[masked] += 1; - debug(printf("0 %04X => %d\n",masked,counts[masked])); + _blocks = _mm256_srli_epi32(_masked,2); /* div by 4 bytes/int */ + _addresses = _mm256_and_si256(_masked,_address_mask); + _addresses = _mm256_slli_epi32(_addresses,3); /* Multiply by 8 bits/byte */ + _envelopes = _mm256_i32gather_epi32((const int *) counts,_blocks,/*scale*/4); + _counts = _mm256_srlv_epi32(_envelopes,_addresses); /* Puts byte on right */ + _counts = _mm256_and_si256(_counts,_count_mask); -#ifdef CHECK_FOR_OVERFLOW - _counts_after = _mm256_i32gather_epi32(counts,_masked,/*shift*/4); - _counts_after = _mm256_and_si256(_counts_after,low4); - _counts_neg = _mm256_cmpgt_epi32(_counts_after,maxcount256); - if (_mm256_testz_si256(_counts_neg,_counts_neg) == 0) { - debug(printf("Exceeded maximum counts\n")); - if (EXTRACT256(_counts_neg,0)) { - inquery[EXTRACT256(_masked,0)] = INQUERY_FALSE; + if (EXTRACT256(_counts,0)) { + masked = EXTRACT256(_masked,0); + if (counts[masked]) { /* Have to re-check if there is a conflict */ + debug(printf("Storing masked %u at %u\n",masked,chrpos - 28)); + table[positions[masked] + (--counts[masked])] = chrpos - 28; } - if (EXTRACT256(_counts_neg,1)) { - inquery[EXTRACT256(_masked,1)] = INQUERY_FALSE; + } + + if (EXTRACT256(_counts,1)) { + masked = EXTRACT256(_masked,1); + if (counts[masked]) { /* Have to re-check if there is a conflict */ + debug(printf("Storing masked %u at %u\n",masked,chrpos - 29)); + table[positions[masked] + (--counts[masked])] = chrpos - 29; } - if (EXTRACT256(_counts_neg,2)) { - inquery[EXTRACT256(_masked,2)] = INQUERY_FALSE; + } + + if (EXTRACT256(_counts,2)) { + masked = EXTRACT256(_masked,2); + if (counts[masked]) { /* Have to re-check if there is a conflict */ + debug(printf("Storing masked %u at %u\n",masked,chrpos - 30)); + table[positions[masked] + (--counts[masked])] = chrpos - 30; } - if (EXTRACT256(_counts_neg,3)) { - inquery[EXTRACT256(_masked,3)] = INQUERY_FALSE; + } + + if (EXTRACT256(_counts,3)) { + masked = EXTRACT256(_masked,3); + if (counts[masked]) { /* Have to re-check if there is a conflict */ + debug(printf("Storing masked %u at %u\n",masked,chrpos - 31)); + table[positions[masked] + (--counts[masked])] = chrpos - 31; } } -#endif /* CHECK_FOR_OVERFLOW */ - return; + return chrpos - 32; } #endif /* HAVE_AVX2 */ -#ifdef USE_SIMD_FOR_COUNTS -static void -extract_5mers_fwd_simd (__m128i *out, __m128i current, __m128i next) { - __m128i oligo; +#ifndef HAVE_SSE2 - _mm_store_si128(out++, _mm_srli_epi32(current,22)); /* No mask necessary */ - _mm_store_si128(out++, _mm_and_si128( _mm_srli_epi32(current,20), mask5)); - _mm_store_si128(out++, _mm_and_si128( _mm_srli_epi32(current,18), mask5)); - _mm_store_si128(out++, _mm_and_si128( _mm_srli_epi32(current,16), mask5)); - _mm_store_si128(out++, _mm_and_si128( _mm_srli_epi32(current,14), mask5)); - _mm_store_si128(out++, _mm_and_si128( _mm_srli_epi32(current,12), mask5)); - _mm_store_si128(out++, _mm_and_si128( _mm_srli_epi32(current,10), mask5)); - _mm_store_si128(out++, _mm_and_si128( _mm_srli_epi32(current,8), mask5)); - _mm_store_si128(out++, _mm_and_si128( _mm_srli_epi32(current,6), mask5)); - _mm_store_si128(out++, _mm_and_si128( _mm_srli_epi32(current,4), mask5)); - _mm_store_si128(out++, _mm_and_si128( _mm_srli_epi32(current,2), mask5)); - _mm_store_si128(out++, _mm_and_si128( current, mask5)); +static void +count_positions_fwd_std (Count_T *counts, int indexsize, Univcoord_T left, Univcoord_T left_plus_length, + int genestrand) { + int startdiscard, enddiscard; + Genomecomp_T ptr, startptr, endptr, high_rev, low_rev, nexthigh_rev, + low, high, nextlow; - oligo = _mm_or_si128( _mm_srli_epi32(next,24), _mm_slli_epi32(current,8)); - _mm_store_si128(out++, _mm_and_si128( _mm_srli_epi32(oligo,6), mask5)); - _mm_store_si128(out++, _mm_and_si128( _mm_srli_epi32(oligo,4), mask5)); - _mm_store_si128(out++, _mm_and_si128( _mm_srli_epi32(oligo,2), mask5)); - _mm_store_si128(out++, _mm_and_si128( oligo, mask5)); + debug(printf("Starting count_positions_fwd_std\n")); - return; -} + if (left_plus_length < (Univcoord_T) indexsize) { + left_plus_length = 0; + } else { + left_plus_length -= indexsize; + } -#ifdef HAVE_AVX2 -static void -extract_5mers_fwd_simd_128 (__m256i *out, __m256i current, __m256i next) { - __m256i oligo; + startptr = left/32U*3; + ptr = endptr = left_plus_length/32U*3; + startdiscard = left % 32; /* (left+pos5) % 32 */ + enddiscard = left_plus_length % 32; /* (left+pos3) % 32 */ + + if (left_plus_length <= left) { + /* Skip */ - _mm256_store_si256(out++, _mm256_srli_epi32(current,22)); /* No mask necessary */ - _mm256_store_si256(out++, _mm256_and_si256( _mm256_srli_epi32(current,20), bigmask5)); - _mm256_store_si256(out++, _mm256_and_si256( _mm256_srli_epi32(current,18), bigmask5)); - _mm256_store_si256(out++, _mm256_and_si256( _mm256_srli_epi32(current,16), bigmask5)); - _mm256_store_si256(out++, _mm256_and_si256( _mm256_srli_epi32(current,14), bigmask5)); - _mm256_store_si256(out++, _mm256_and_si256( _mm256_srli_epi32(current,12), bigmask5)); - _mm256_store_si256(out++, _mm256_and_si256( _mm256_srli_epi32(current,10), bigmask5)); - _mm256_store_si256(out++, _mm256_and_si256( _mm256_srli_epi32(current,8), bigmask5)); - _mm256_store_si256(out++, _mm256_and_si256( _mm256_srli_epi32(current,6), bigmask5)); - _mm256_store_si256(out++, _mm256_and_si256( _mm256_srli_epi32(current,4), bigmask5)); - _mm256_store_si256(out++, _mm256_and_si256( _mm256_srli_epi32(current,2), bigmask5)); - _mm256_store_si256(out++, _mm256_and_si256( current, bigmask5)); + } else if (startptr == endptr) { +#ifdef WORDS_BIGENDIAN + high = Bigendian_convert_uint(ref_blocks[ptr]); + low = Bigendian_convert_uint(ref_blocks[ptr+1]); + nextlow = Bigendian_convert_uint(ref_blocks[ptr+4]); +#else + high = ref_blocks[ptr]; + low = ref_blocks[ptr+1]; + nextlow = ref_blocks[ptr+4]; +#endif - oligo = _mm256_or_si256( _mm256_srli_epi32(next,24), _mm256_slli_epi32(current,8)); - _mm256_store_si256(out++, _mm256_and_si256( _mm256_srli_epi32(oligo,6), bigmask5)); - _mm256_store_si256(out++, _mm256_and_si256( _mm256_srli_epi32(oligo,4), bigmask5)); - _mm256_store_si256(out++, _mm256_and_si256( _mm256_srli_epi32(oligo,2), bigmask5)); - _mm256_store_si256(out++, _mm256_and_si256( oligo, bigmask5)); + if (mode == STANDARD) { + /* Skip */ + } else if (mode == CMET_STRANDED) { + high = Cmet_reduce_ct(high); low = Cmet_reduce_ct(low); nextlow = Cmet_reduce_ct(nextlow); + } else if (mode == CMET_NONSTRANDED) { + if (genestrand > 0) { + high = Cmet_reduce_ct(high); low = Cmet_reduce_ct(low); nextlow = Cmet_reduce_ct(nextlow); + } else { + high = Cmet_reduce_ga(high); low = Cmet_reduce_ga(low); nextlow = Cmet_reduce_ga(nextlow); + } + } else if (mode == ATOI_STRANDED) { + high = Atoi_reduce_tc(high); low = Atoi_reduce_tc(low); nextlow = Atoi_reduce_tc(nextlow); + } else if (mode == ATOI_NONSTRANDED) { + if (genestrand > 0) { + high = Atoi_reduce_tc(high); low = Atoi_reduce_tc(low); nextlow = Atoi_reduce_tc(nextlow); + } else { + high = Atoi_reduce_ag(high); low = Atoi_reduce_ag(low); nextlow = Atoi_reduce_ag(nextlow); + } + } else if (mode == TTOC_STRANDED) { + high = Atoi_reduce_ag(high); low = Atoi_reduce_ag(low); nextlow = Atoi_reduce_ag(nextlow); + } else if (mode == TTOC_NONSTRANDED) { + if (genestrand > 0) { + high = Atoi_reduce_ag(high); low = Atoi_reduce_ag(low); nextlow = Atoi_reduce_ag(nextlow); + } else { + high = Atoi_reduce_tc(high); low = Atoi_reduce_tc(low); nextlow = Atoi_reduce_tc(nextlow); + } + } - return; -} -#endif + high_rev = reverse_nt[low >> 16]; + high_rev |= (reverse_nt[low & 0x0000FFFF] << 16); + low_rev = reverse_nt[high >> 16]; + low_rev |= (reverse_nt[high & 0x0000FFFF] << 16); + nexthigh_rev = reverse_nt[nextlow >> 16]; + nexthigh_rev |= (reverse_nt[nextlow & 0x0000FFFF] << 16); + if (indexsize == 9) { + count_9mers_fwd_partial(counts,high_rev,low_rev,nexthigh_rev,startdiscard,enddiscard); + } else if (indexsize == 8) { + count_8mers_fwd_partial(counts,high_rev,low_rev,nexthigh_rev,startdiscard,enddiscard); + } else if (indexsize == 7) { + count_7mers_fwd_partial(counts,high_rev,low_rev,nexthigh_rev,startdiscard,enddiscard); + } else if (indexsize == 6) { + count_6mers_fwd_partial(counts,high_rev,low_rev,nexthigh_rev,startdiscard,enddiscard); + } else if (indexsize == 5) { + count_5mers_fwd_partial(counts,high_rev,low_rev,nexthigh_rev,startdiscard,enddiscard); + } else { + fprintf(stderr,"indexsize %d not supported\n",indexsize); + abort(); + } -static void -count_5mers_fwd_simd (Count_T *counts, Inquery_T *inquery, __m128i current, __m128i next) { - __m128i oligo; -#ifdef HAVE_SSE4_1 - __m128i array; -#else - Genomecomp_T array[4]; -#endif -#if defined(HAVE_AVX2) && defined(CHECK_FOR_OVERFLOW) - __m128i _counts_after, _counts_neg; -#endif + } else { + /* Genome_print_blocks(ref_blocks,left,left+16); */ -#ifdef HAVE_SSE4_1 - array = _mm_srli_epi32(current,22); /* No mask necessary */ + /* End block */ +#ifdef WORDS_BIGENDIAN + high = Bigendian_convert_uint(ref_blocks[ptr]); + low = Bigendian_convert_uint(ref_blocks[ptr+1]); + nextlow = Bigendian_convert_uint(ref_blocks[ptr+4]); #else - _mm_store_si128((__m128i *) array, _mm_srli_epi32(current,22)); /* No mask necessary */ + high = ref_blocks[ptr]; + low = ref_blocks[ptr+1]; + nextlow = ref_blocks[ptr+4]; #endif -#ifdef HAVE_AVX2 - counts[EXTRACT(array,0)] += 1; - counts[EXTRACT(array,1)] += 1; - counts[EXTRACT(array,2)] += 1; - counts[EXTRACT(array,3)] += 1; -#ifdef CHECK_FOR_OVERFLOW - _counts_after = _mm_i32gather_epi32(counts,array,/*scale*/4); - _counts_neg = _mm_cmpgt_epi32(_counts_after,maxcount128); - if (_mm_testz_si128(_counts_neg,_counts_neg) == 0) { - debug(printf("Exceeded maximum counts\n")); - if (EXTRACT(_counts_neg,0)) { - inquery[EXTRACT(array,0)] = INQUERY_FALSE; - } - if (EXTRACT(_counts_neg,1)) { - inquery[EXTRACT(array,1)] = INQUERY_FALSE; - } - if (EXTRACT(_counts_neg,2)) { - inquery[EXTRACT(array,2)] = INQUERY_FALSE; - } - if (EXTRACT(_counts_neg,3)) { - inquery[EXTRACT(array,3)] = INQUERY_FALSE; - } - } -#endif /* CHECK_FOR_OVERFLOW */ - debug(printf("0 %04X => %d\n",array[0],counts[array[0]])); - debug(printf("16 %04X => %d\n",array[1],counts[array[1]])); - debug(printf("32 %04X => %d\n",array[2],counts[array[2]])); - debug(printf("48 %04X => %d\n",array[3],counts[array[3]])); - -#else - INCR_COUNT(counts[EXTRACT(array,0)],inquery[EXTRACT(array,0)]); /* 0 */ - INCR_COUNT(counts[EXTRACT(array,1)],inquery[EXTRACT(array,1)]); /* 16 */ - INCR_COUNT(counts[EXTRACT(array,2)],inquery[EXTRACT(array,2)]); /* 32 */ - INCR_COUNT(counts[EXTRACT(array,3)],inquery[EXTRACT(array,3)]); /* 48 */ -#endif - debug(printf("0 %04X => %d\n",EXTRACT(array,0),counts[EXTRACT(array,0)])); - debug(printf("16 %04X => %d\n",EXTRACT(array,1),counts[EXTRACT(array,1)])); - debug(printf("32 %04X => %d\n",EXTRACT(array,2),counts[EXTRACT(array,2)])); - debug(printf("48 %04X => %d\n",EXTRACT(array,3),counts[EXTRACT(array,3)])); -#ifdef HAVE_SSE4_1 - array = _mm_and_si128( _mm_srli_epi32(current,20), mask5); -#else - _mm_store_si128((__m128i *) array, _mm_and_si128( _mm_srli_epi32(current,20), mask5)); -#endif -#ifdef HAVE_AVX2 - counts[EXTRACT(array,0)] += 1; - counts[EXTRACT(array,1)] += 1; - counts[EXTRACT(array,2)] += 1; - counts[EXTRACT(array,3)] += 1; -#ifdef CHECK_FOR_OVERFLOW - _counts_after = _mm_i32gather_epi32(counts,array,/*scale*/4); - _counts_neg = _mm_cmpgt_epi32(_counts_after,maxcount128); - if (_mm_testz_si128(_counts_neg,_counts_neg) == 0) { - debug(printf("Exceeded maximum counts\n")); - if (EXTRACT(_counts_neg,0)) { - inquery[EXTRACT(array,0)] = INQUERY_FALSE; - } - if (EXTRACT(_counts_neg,1)) { - inquery[EXTRACT(array,1)] = INQUERY_FALSE; - } - if (EXTRACT(_counts_neg,2)) { - inquery[EXTRACT(array,2)] = INQUERY_FALSE; - } - if (EXTRACT(_counts_neg,3)) { - inquery[EXTRACT(array,3)] = INQUERY_FALSE; - } - } -#endif /* CHECK_FOR_OVERFLOW */ - debug(printf("1 %04X => %d\n",array[0],counts[array[0]])); - debug(printf("17 %04X => %d\n",array[1],counts[array[1]])); - debug(printf("33 %04X => %d\n",array[2],counts[array[2]])); - debug(printf("49 %04X => %d\n",array[3],counts[array[3]])); - -#else - INCR_COUNT(counts[EXTRACT(array,0)],inquery[EXTRACT(array,0)]); /* 1 */ - INCR_COUNT(counts[EXTRACT(array,1)],inquery[EXTRACT(array,1)]); /* 17 */ - INCR_COUNT(counts[EXTRACT(array,2)],inquery[EXTRACT(array,2)]); /* 33 */ - INCR_COUNT(counts[EXTRACT(array,3)],inquery[EXTRACT(array,3)]); /* 49 */ -#endif - debug(printf("1 %04X => %d\n",EXTRACT(array,0),counts[EXTRACT(array,0)])); - debug(printf("17 %04X => %d\n",EXTRACT(array,1),counts[EXTRACT(array,1)])); - debug(printf("33 %04X => %d\n",EXTRACT(array,2),counts[EXTRACT(array,2)])); - debug(printf("49 %04X => %d\n",EXTRACT(array,3),counts[EXTRACT(array,3)])); + if (mode == STANDARD) { + /* Skip */ + } else if (mode == CMET_STRANDED) { + high = Cmet_reduce_ct(high); low = Cmet_reduce_ct(low); nextlow = Cmet_reduce_ct(nextlow); + } else if (mode == CMET_NONSTRANDED) { + if (genestrand > 0) { + high = Cmet_reduce_ct(high); low = Cmet_reduce_ct(low); nextlow = Cmet_reduce_ct(nextlow); + } else { + high = Cmet_reduce_ga(high); low = Cmet_reduce_ga(low); nextlow = Cmet_reduce_ga(nextlow); + } + } else if (mode == ATOI_STRANDED) { + high = Atoi_reduce_tc(high); low = Atoi_reduce_tc(low); nextlow = Atoi_reduce_tc(nextlow); + } else if (mode == ATOI_NONSTRANDED) { + if (genestrand > 0) { + high = Atoi_reduce_tc(high); low = Atoi_reduce_tc(low); nextlow = Atoi_reduce_tc(nextlow); + } else { + high = Atoi_reduce_ag(high); low = Atoi_reduce_ag(low); nextlow = Atoi_reduce_ag(nextlow); + } + } else if (mode == TTOC_STRANDED) { + high = Atoi_reduce_ag(high); low = Atoi_reduce_ag(low); nextlow = Atoi_reduce_ag(nextlow); + } else if (mode == TTOC_NONSTRANDED) { + if (genestrand > 0) { + high = Atoi_reduce_ag(high); low = Atoi_reduce_ag(low); nextlow = Atoi_reduce_ag(nextlow); + } else { + high = Atoi_reduce_tc(high); low = Atoi_reduce_tc(low); nextlow = Atoi_reduce_tc(nextlow); + } + } -#ifdef HAVE_SSE4_1 - array = _mm_and_si128( _mm_srli_epi32(current,18), mask5); -#else - _mm_store_si128((__m128i *) array, _mm_and_si128( _mm_srli_epi32(current,18), mask5)); -#endif -#ifdef HAVE_AVX2 - counts[EXTRACT(array,0)] += 1; - counts[EXTRACT(array,1)] += 1; - counts[EXTRACT(array,2)] += 1; - counts[EXTRACT(array,3)] += 1; -#ifdef CHECK_FOR_OVERFLOW - _counts_after = _mm_i32gather_epi32(counts,array,/*scale*/4); - _counts_neg = _mm_cmpgt_epi32(_counts_after,maxcount128); - if (_mm_testz_si128(_counts_neg,_counts_neg) == 0) { - debug(printf("Exceeded maximum counts\n")); - if (EXTRACT(_counts_neg,0)) { - inquery[EXTRACT(array,0)] = INQUERY_FALSE; - } - if (EXTRACT(_counts_neg,1)) { - inquery[EXTRACT(array,1)] = INQUERY_FALSE; - } - if (EXTRACT(_counts_neg,2)) { - inquery[EXTRACT(array,2)] = INQUERY_FALSE; - } - if (EXTRACT(_counts_neg,3)) { - inquery[EXTRACT(array,3)] = INQUERY_FALSE; - } - } -#endif /* CHECK_FOR_OVERFLOW */ - debug(printf("2 %04X => %d\n",array[0],counts[array[0]])); - debug(printf("18 %04X => %d\n",array[1],counts[array[1]])); - debug(printf("34 %04X => %d\n",array[2],counts[array[2]])); - debug(printf("50 %04X => %d\n",array[3],counts[array[3]])); - -#else - INCR_COUNT(counts[EXTRACT(array,0)],inquery[EXTRACT(array,0)]); /* 2 */ - INCR_COUNT(counts[EXTRACT(array,1)],inquery[EXTRACT(array,1)]); /* 18 */ - INCR_COUNT(counts[EXTRACT(array,2)],inquery[EXTRACT(array,2)]); /* 34 */ - INCR_COUNT(counts[EXTRACT(array,3)],inquery[EXTRACT(array,3)]); /* 50 */ -#endif - debug(printf("2 %04X => %d\n",EXTRACT(array,0),counts[EXTRACT(array,0)])); - debug(printf("18 %04X => %d\n",EXTRACT(array,1),counts[EXTRACT(array,1)])); - debug(printf("34 %04X => %d\n",EXTRACT(array,2),counts[EXTRACT(array,2)])); - debug(printf("50 %04X => %d\n",EXTRACT(array,3),counts[EXTRACT(array,3)])); + high_rev = reverse_nt[low >> 16]; + high_rev |= (reverse_nt[low & 0x0000FFFF] << 16); + low_rev = reverse_nt[high >> 16]; + low_rev |= (reverse_nt[high & 0x0000FFFF] << 16); + nexthigh_rev = reverse_nt[nextlow >> 16]; + nexthigh_rev |= (reverse_nt[nextlow & 0x0000FFFF] << 16); -#ifdef HAVE_SSE4_1 - array = _mm_and_si128( _mm_srli_epi32(current,16), mask5); -#else - _mm_store_si128((__m128i *) array, _mm_and_si128( _mm_srli_epi32(current,16), mask5)); -#endif -#ifdef HAVE_AVX2 - counts[EXTRACT(array,0)] += 1; - counts[EXTRACT(array,1)] += 1; - counts[EXTRACT(array,2)] += 1; - counts[EXTRACT(array,3)] += 1; -#ifdef CHECK_FOR_OVERFLOW - _counts_after = _mm_i32gather_epi32(counts,array,/*scale*/4); - _counts_neg = _mm_cmpgt_epi32(_counts_after,maxcount128); - if (_mm_testz_si128(_counts_neg,_counts_neg) == 0) { - debug(printf("Exceeded maximum counts\n")); - if (EXTRACT(_counts_neg,0)) { - inquery[EXTRACT(array,0)] = INQUERY_FALSE; - } - if (EXTRACT(_counts_neg,1)) { - inquery[EXTRACT(array,1)] = INQUERY_FALSE; - } - if (EXTRACT(_counts_neg,2)) { - inquery[EXTRACT(array,2)] = INQUERY_FALSE; - } - if (EXTRACT(_counts_neg,3)) { - inquery[EXTRACT(array,3)] = INQUERY_FALSE; - } - } -#endif /* CHECK_FOR_OVERFLOW */ - debug(printf("3 %04X => %d\n",array[0],counts[array[0]])); - debug(printf("19 %04X => %d\n",array[1],counts[array[1]])); - debug(printf("35 %04X => %d\n",array[2],counts[array[2]])); - debug(printf("51 %04X => %d\n",array[3],counts[array[3]])); - -#else - INCR_COUNT(counts[EXTRACT(array,0)],inquery[EXTRACT(array,0)]); /* 3 */ - INCR_COUNT(counts[EXTRACT(array,1)],inquery[EXTRACT(array,1)]); /* 19 */ - INCR_COUNT(counts[EXTRACT(array,2)],inquery[EXTRACT(array,2)]); /* 35 */ - INCR_COUNT(counts[EXTRACT(array,3)],inquery[EXTRACT(array,3)]); /* 51 */ -#endif - debug(printf("3 %04X => %d\n",EXTRACT(array,0),counts[EXTRACT(array,0)])); - debug(printf("19 %04X => %d\n",EXTRACT(array,1),counts[EXTRACT(array,1)])); - debug(printf("35 %04X => %d\n",EXTRACT(array,2),counts[EXTRACT(array,2)])); - debug(printf("51 %04X => %d\n",EXTRACT(array,3),counts[EXTRACT(array,3)])); + if (indexsize == 9) { + count_9mers_fwd_partial(counts,high_rev,low_rev,nexthigh_rev,/*startdiscard*/0,enddiscard); + } else if (indexsize == 8) { + count_8mers_fwd_partial(counts,high_rev,low_rev,nexthigh_rev,/*startdiscard*/0,enddiscard); + } else if (indexsize == 7) { + count_7mers_fwd_partial(counts,high_rev,low_rev,nexthigh_rev,/*startdiscard*/0,enddiscard); + } else if (indexsize == 6) { + count_6mers_fwd_partial(counts,high_rev,low_rev,nexthigh_rev,/*startdiscard*/0,enddiscard); + } else if (indexsize == 5) { + count_5mers_fwd_partial(counts,high_rev,low_rev,nexthigh_rev,/*startdiscard*/0,enddiscard); + } else { + abort(); + } -#ifdef HAVE_SSE4_1 - array = _mm_and_si128( _mm_srli_epi32(current,14), mask5); -#else - _mm_store_si128((__m128i *) array, _mm_and_si128( _mm_srli_epi32(current,14), mask5)); -#endif -#ifdef HAVE_AVX2 - counts[EXTRACT(array,0)] += 1; - counts[EXTRACT(array,1)] += 1; - counts[EXTRACT(array,2)] += 1; - counts[EXTRACT(array,3)] += 1; -#ifdef CHECK_FOR_OVERFLOW - _counts_after = _mm_i32gather_epi32(counts,array,/*scale*/4); - _counts_neg = _mm_cmpgt_epi32(_counts_after,maxcount128); - if (_mm_testz_si128(_counts_neg,_counts_neg) == 0) { - debug(printf("Exceeded maximum counts\n")); - if (EXTRACT(_counts_neg,0)) { - inquery[EXTRACT(array,0)] = INQUERY_FALSE; - } - if (EXTRACT(_counts_neg,1)) { - inquery[EXTRACT(array,1)] = INQUERY_FALSE; - } - if (EXTRACT(_counts_neg,2)) { - inquery[EXTRACT(array,2)] = INQUERY_FALSE; - } - if (EXTRACT(_counts_neg,3)) { - inquery[EXTRACT(array,3)] = INQUERY_FALSE; - } - } -#endif /* CHECK_FOR_OVERFLOW */ - debug(printf("4 %04X => %d\n",array[0],counts[array[0]])); - debug(printf("20 %04X => %d\n",array[1],counts[array[1]])); - debug(printf("36 %04X => %d\n",array[2],counts[array[2]])); - debug(printf("52 %04X => %d\n",array[3],counts[array[3]])); - -#else - INCR_COUNT(counts[EXTRACT(array,0)],inquery[EXTRACT(array,0)]); /* 4 */ - INCR_COUNT(counts[EXTRACT(array,1)],inquery[EXTRACT(array,1)]); /* 20 */ - INCR_COUNT(counts[EXTRACT(array,2)],inquery[EXTRACT(array,2)]); /* 36 */ - INCR_COUNT(counts[EXTRACT(array,3)],inquery[EXTRACT(array,3)]); /* 52 */ -#endif - debug(printf("4 %04X => %d\n",EXTRACT(array,0),counts[EXTRACT(array,0)])); - debug(printf("20 %04X => %d\n",EXTRACT(array,1),counts[EXTRACT(array,1)])); - debug(printf("36 %04X => %d\n",EXTRACT(array,2),counts[EXTRACT(array,2)])); - debug(printf("52 %04X => %d\n",EXTRACT(array,3),counts[EXTRACT(array,3)])); + /* Middle blocks */ + while (ptr > startptr + 3) { + ptr -= 3; -#ifdef HAVE_SSE4_1 - array = _mm_and_si128( _mm_srli_epi32(current,12), mask5); +#ifdef WORDS_BIGENDIAN + high = Bigendian_convert_uint(ref_blocks[ptr]); + low = Bigendian_convert_uint(ref_blocks[ptr+1]); + nextlow = Bigendian_convert_uint(ref_blocks[ptr+4]); #else - _mm_store_si128((__m128i *) array, _mm_and_si128( _mm_srli_epi32(current,12), mask5)); + high = ref_blocks[ptr]; + low = ref_blocks[ptr+1]; + nextlow = ref_blocks[ptr+4]; #endif -#ifdef HAVE_AVX2 - counts[EXTRACT(array,0)] += 1; - counts[EXTRACT(array,1)] += 1; - counts[EXTRACT(array,2)] += 1; - counts[EXTRACT(array,3)] += 1; -#ifdef CHECK_FOR_OVERFLOW - _counts_after = _mm_i32gather_epi32(counts,array,/*scale*/4); - _counts_neg = _mm_cmpgt_epi32(_counts_after,maxcount128); - if (_mm_testz_si128(_counts_neg,_counts_neg) == 0) { - debug(printf("Exceeded maximum counts\n")); - if (EXTRACT(_counts_neg,0)) { - inquery[EXTRACT(array,0)] = INQUERY_FALSE; - } - if (EXTRACT(_counts_neg,1)) { - inquery[EXTRACT(array,1)] = INQUERY_FALSE; - } - if (EXTRACT(_counts_neg,2)) { - inquery[EXTRACT(array,2)] = INQUERY_FALSE; - } - if (EXTRACT(_counts_neg,3)) { - inquery[EXTRACT(array,3)] = INQUERY_FALSE; - } - } -#endif /* CHECK_FOR_OVERFLOW */ - debug(printf("5 %04X => %d\n",array[0],counts[array[0]])); - debug(printf("21 %04X => %d\n",array[1],counts[array[1]])); - debug(printf("37 %04X => %d\n",array[2],counts[array[2]])); - debug(printf("53 %04X => %d\n",array[3],counts[array[3]])); - -#else - INCR_COUNT(counts[EXTRACT(array,0)],inquery[EXTRACT(array,0)]); /* 5 */ - INCR_COUNT(counts[EXTRACT(array,1)],inquery[EXTRACT(array,1)]); /* 21 */ - INCR_COUNT(counts[EXTRACT(array,2)],inquery[EXTRACT(array,2)]); /* 37 */ - INCR_COUNT(counts[EXTRACT(array,3)],inquery[EXTRACT(array,3)]); /* 53 */ -#endif - debug(printf("5 %04X => %d\n",EXTRACT(array,0),counts[EXTRACT(array,0)])); - debug(printf("21 %04X => %d\n",EXTRACT(array,1),counts[EXTRACT(array,1)])); - debug(printf("37 %04X => %d\n",EXTRACT(array,2),counts[EXTRACT(array,2)])); - debug(printf("53 %04X => %d\n",EXTRACT(array,3),counts[EXTRACT(array,3)])); -#ifdef HAVE_SSE4_1 - array = _mm_and_si128( _mm_srli_epi32(current,10), mask5); -#else - _mm_store_si128((__m128i *) array, _mm_and_si128( _mm_srli_epi32(current,10), mask5)); -#endif -#ifdef HAVE_AVX2 - counts[EXTRACT(array,0)] += 1; - counts[EXTRACT(array,1)] += 1; - counts[EXTRACT(array,2)] += 1; - counts[EXTRACT(array,3)] += 1; -#ifdef CHECK_FOR_OVERFLOW - _counts_after = _mm_i32gather_epi32(counts,array,/*scale*/4); - _counts_neg = _mm_cmpgt_epi32(_counts_after,maxcount128); - if (_mm_testz_si128(_counts_neg,_counts_neg) == 0) { - debug(printf("Exceeded maximum counts\n")); - if (EXTRACT(_counts_neg,0)) { - inquery[EXTRACT(array,0)] = INQUERY_FALSE; - } - if (EXTRACT(_counts_neg,1)) { - inquery[EXTRACT(array,1)] = INQUERY_FALSE; - } - if (EXTRACT(_counts_neg,2)) { - inquery[EXTRACT(array,2)] = INQUERY_FALSE; - } - if (EXTRACT(_counts_neg,3)) { - inquery[EXTRACT(array,3)] = INQUERY_FALSE; - } - } -#endif /* CHECK_FOR_OVERFLOW */ - debug(printf("6 %04X => %d\n",array[0],counts[array[0]])); - debug(printf("22 %04X => %d\n",array[1],counts[array[1]])); - debug(printf("38 %04X => %d\n",array[2],counts[array[2]])); - debug(printf("54 %04X => %d\n",array[3],counts[array[3]])); - -#else - INCR_COUNT(counts[EXTRACT(array,0)],inquery[EXTRACT(array,0)]); /* 6 */ - INCR_COUNT(counts[EXTRACT(array,1)],inquery[EXTRACT(array,1)]); /* 22 */ - INCR_COUNT(counts[EXTRACT(array,2)],inquery[EXTRACT(array,2)]); /* 38 */ - INCR_COUNT(counts[EXTRACT(array,3)],inquery[EXTRACT(array,3)]); /* 54 */ -#endif - debug(printf("6 %04X => %d\n",EXTRACT(array,0),counts[EXTRACT(array,0)])); - debug(printf("22 %04X => %d\n",EXTRACT(array,1),counts[EXTRACT(array,1)])); - debug(printf("38 %04X => %d\n",EXTRACT(array,2),counts[EXTRACT(array,2)])); - debug(printf("54 %04X => %d\n",EXTRACT(array,3),counts[EXTRACT(array,3)])); + if (mode == STANDARD) { + /* Skip */ + } else if (mode == CMET_STRANDED) { + high = Cmet_reduce_ct(high); low = Cmet_reduce_ct(low); nextlow = Cmet_reduce_ct(nextlow); + } else if (mode == CMET_NONSTRANDED) { + if (genestrand > 0) { + high = Cmet_reduce_ct(high); low = Cmet_reduce_ct(low); nextlow = Cmet_reduce_ct(nextlow); + } else { + high = Cmet_reduce_ga(high); low = Cmet_reduce_ga(low); nextlow = Cmet_reduce_ga(nextlow); + } + } else if (mode == ATOI_STRANDED) { + high = Atoi_reduce_tc(high); low = Atoi_reduce_tc(low); nextlow = Atoi_reduce_tc(nextlow); + } else if (mode == ATOI_NONSTRANDED) { + if (genestrand > 0) { + high = Atoi_reduce_tc(high); low = Atoi_reduce_tc(low); nextlow = Atoi_reduce_tc(nextlow); + } else { + high = Atoi_reduce_ag(high); low = Atoi_reduce_ag(low); nextlow = Atoi_reduce_ag(nextlow); + } + } else if (mode == TTOC_STRANDED) { + high = Atoi_reduce_ag(high); low = Atoi_reduce_ag(low); nextlow = Atoi_reduce_ag(nextlow); + } else if (mode == TTOC_NONSTRANDED) { + if (genestrand > 0) { + high = Atoi_reduce_ag(high); low = Atoi_reduce_ag(low); nextlow = Atoi_reduce_ag(nextlow); + } else { + high = Atoi_reduce_tc(high); low = Atoi_reduce_tc(low); nextlow = Atoi_reduce_tc(nextlow); + } + } -#ifdef HAVE_SSE4_1 - array = _mm_and_si128( _mm_srli_epi32(current,8), mask5); -#else - _mm_store_si128((__m128i *) array, _mm_and_si128( _mm_srli_epi32(current,8), mask5)); -#endif -#ifdef HAVE_AVX2 - counts[EXTRACT(array,0)] += 1; - counts[EXTRACT(array,1)] += 1; - counts[EXTRACT(array,2)] += 1; - counts[EXTRACT(array,3)] += 1; -#ifdef CHECK_FOR_OVERFLOW - _counts_after = _mm_i32gather_epi32(counts,array,/*scale*/4); - _counts_neg = _mm_cmpgt_epi32(_counts_after,maxcount128); - if (_mm_testz_si128(_counts_neg,_counts_neg) == 0) { - debug(printf("Exceeded maximum counts\n")); - if (EXTRACT(_counts_neg,0)) { - inquery[EXTRACT(array,0)] = INQUERY_FALSE; - } - if (EXTRACT(_counts_neg,1)) { - inquery[EXTRACT(array,1)] = INQUERY_FALSE; - } - if (EXTRACT(_counts_neg,2)) { - inquery[EXTRACT(array,2)] = INQUERY_FALSE; - } - if (EXTRACT(_counts_neg,3)) { - inquery[EXTRACT(array,3)] = INQUERY_FALSE; - } - } -#endif /* CHECK_FOR_OVERFLOW */ - debug(printf("7 %04X => %d\n",array[0],counts[array[0]])); - debug(printf("23 %04X => %d\n",array[1],counts[array[1]])); - debug(printf("39 %04X => %d\n",array[2],counts[array[2]])); - debug(printf("55 %04X => %d\n",array[3],counts[array[3]])); - -#else - INCR_COUNT(counts[EXTRACT(array,0)],inquery[EXTRACT(array,0)]); /* 7 */ - INCR_COUNT(counts[EXTRACT(array,1)],inquery[EXTRACT(array,1)]); /* 23 */ - INCR_COUNT(counts[EXTRACT(array,2)],inquery[EXTRACT(array,2)]); /* 39 */ - INCR_COUNT(counts[EXTRACT(array,3)],inquery[EXTRACT(array,3)]); /* 55 */ -#endif - debug(printf("7 %04X => %d\n",EXTRACT(array,0),counts[EXTRACT(array,0)])); - debug(printf("23 %04X => %d\n",EXTRACT(array,1),counts[EXTRACT(array,1)])); - debug(printf("39 %04X => %d\n",EXTRACT(array,2),counts[EXTRACT(array,2)])); - debug(printf("55 %04X => %d\n",EXTRACT(array,3),counts[EXTRACT(array,3)])); + high_rev = reverse_nt[low >> 16]; + high_rev |= (reverse_nt[low & 0x0000FFFF] << 16); + low_rev = reverse_nt[high >> 16]; + low_rev |= (reverse_nt[high & 0x0000FFFF] << 16); + nexthigh_rev = reverse_nt[nextlow >> 16]; + nexthigh_rev |= (reverse_nt[nextlow & 0x0000FFFF] << 16); + + if (indexsize == 9) { + count_9mers_fwd_32(counts,high_rev,low_rev,nexthigh_rev); + } else if (indexsize == 8) { + count_8mers_fwd_32(counts,high_rev,low_rev,nexthigh_rev); + } else if (indexsize == 7) { + count_7mers_fwd_32(counts,high_rev,low_rev,nexthigh_rev); + } else if (indexsize == 6) { + count_6mers_fwd_32(counts,high_rev,low_rev,nexthigh_rev); + } else if (indexsize == 5) { + count_5mers_fwd_32(counts,high_rev,low_rev,nexthigh_rev); + } else { + abort(); + } + } -#ifdef HAVE_SSE4_1 - array = _mm_and_si128( _mm_srli_epi32(current,6), mask5); -#else - _mm_store_si128((__m128i *) array, _mm_and_si128( _mm_srli_epi32(current,6), mask5)); -#endif -#ifdef HAVE_AVX2 - counts[EXTRACT(array,0)] += 1; - counts[EXTRACT(array,1)] += 1; - counts[EXTRACT(array,2)] += 1; - counts[EXTRACT(array,3)] += 1; -#ifdef CHECK_FOR_OVERFLOW - _counts_after = _mm_i32gather_epi32(counts,array,/*scale*/4); - _counts_neg = _mm_cmpgt_epi32(_counts_after,maxcount128); - if (_mm_testz_si128(_counts_neg,_counts_neg) == 0) { - debug(printf("Exceeded maximum counts\n")); - if (EXTRACT(_counts_neg,0)) { - inquery[EXTRACT(array,0)] = INQUERY_FALSE; - } - if (EXTRACT(_counts_neg,1)) { - inquery[EXTRACT(array,1)] = INQUERY_FALSE; - } - if (EXTRACT(_counts_neg,2)) { - inquery[EXTRACT(array,2)] = INQUERY_FALSE; - } - if (EXTRACT(_counts_neg,3)) { - inquery[EXTRACT(array,3)] = INQUERY_FALSE; - } - } -#endif /* CHECK_FOR_OVERFLOW */ - debug(printf("8 %04X => %d\n",array[0],counts[array[0]])); - debug(printf("24 %04X => %d\n",array[1],counts[array[1]])); - debug(printf("40 %04X => %d\n",array[2],counts[array[2]])); - debug(printf("56 %04X => %d\n",array[3],counts[array[3]])); - -#else - INCR_COUNT(counts[EXTRACT(array,0)],inquery[EXTRACT(array,0)]); /* 8 */ - INCR_COUNT(counts[EXTRACT(array,1)],inquery[EXTRACT(array,1)]); /* 24 */ - INCR_COUNT(counts[EXTRACT(array,2)],inquery[EXTRACT(array,2)]); /* 40 */ - INCR_COUNT(counts[EXTRACT(array,3)],inquery[EXTRACT(array,3)]); /* 56 */ -#endif - debug(printf("8 %04X => %d\n",EXTRACT(array,0),counts[EXTRACT(array,0)])); - debug(printf("24 %04X => %d\n",EXTRACT(array,1),counts[EXTRACT(array,1)])); - debug(printf("40 %04X => %d\n",EXTRACT(array,2),counts[EXTRACT(array,2)])); - debug(printf("56 %04X => %d\n",EXTRACT(array,3),counts[EXTRACT(array,3)])); + ptr -= 3; -#ifdef HAVE_SSE4_1 - array = _mm_and_si128( _mm_srli_epi32(current,4), mask5); -#else - _mm_store_si128((__m128i *) array, _mm_and_si128( _mm_srli_epi32(current,4), mask5)); -#endif -#ifdef HAVE_AVX2 - counts[EXTRACT(array,0)] += 1; - counts[EXTRACT(array,1)] += 1; - counts[EXTRACT(array,2)] += 1; - counts[EXTRACT(array,3)] += 1; -#ifdef CHECK_FOR_OVERFLOW - _counts_after = _mm_i32gather_epi32(counts,array,/*scale*/4); - _counts_neg = _mm_cmpgt_epi32(_counts_after,maxcount128); - if (_mm_testz_si128(_counts_neg,_counts_neg) == 0) { - debug(printf("Exceeded maximum counts\n")); - if (EXTRACT(_counts_neg,0)) { - inquery[EXTRACT(array,0)] = INQUERY_FALSE; - } - if (EXTRACT(_counts_neg,1)) { - inquery[EXTRACT(array,1)] = INQUERY_FALSE; - } - if (EXTRACT(_counts_neg,2)) { - inquery[EXTRACT(array,2)] = INQUERY_FALSE; - } - if (EXTRACT(_counts_neg,3)) { - inquery[EXTRACT(array,3)] = INQUERY_FALSE; - } - } -#endif /* CHECK_FOR_OVERFLOW */ - debug(printf("9 %04X => %d\n",array[0],counts[array[0]])); - debug(printf("25 %04X => %d\n",array[1],counts[array[1]])); - debug(printf("41 %04X => %d\n",array[2],counts[array[2]])); - debug(printf("57 %04X => %d\n",array[3],counts[array[3]])); - -#else - INCR_COUNT(counts[EXTRACT(array,0)],inquery[EXTRACT(array,0)]); /* 9 */ - INCR_COUNT(counts[EXTRACT(array,1)],inquery[EXTRACT(array,1)]); /* 25 */ - INCR_COUNT(counts[EXTRACT(array,2)],inquery[EXTRACT(array,2)]); /* 41 */ - INCR_COUNT(counts[EXTRACT(array,3)],inquery[EXTRACT(array,3)]); /* 57 */ -#endif - debug(printf("9 %04X => %d\n",EXTRACT(array,0),counts[EXTRACT(array,0)])); - debug(printf("25 %04X => %d\n",EXTRACT(array,1),counts[EXTRACT(array,1)])); - debug(printf("41 %04X => %d\n",EXTRACT(array,2),counts[EXTRACT(array,2)])); - debug(printf("57 %04X => %d\n",EXTRACT(array,3),counts[EXTRACT(array,3)])); + /* Start block */ + assert(ptr == startptr); -#ifdef HAVE_SSE4_1 - array = _mm_and_si128( _mm_srli_epi32(current,2), mask5); +#ifdef WORDS_BIGENDIAN + high = Bigendian_convert_uint(ref_blocks[ptr]); + low = Bigendian_convert_uint(ref_blocks[ptr+1]); + nextlow = Bigendian_convert_uint(ref_blocks[ptr+4]); #else - _mm_store_si128((__m128i *) array, _mm_and_si128( _mm_srli_epi32(current,2), mask5)); + high = ref_blocks[ptr]; + low = ref_blocks[ptr+1]; + nextlow = ref_blocks[ptr+4]; #endif -#ifdef HAVE_AVX2 - counts[EXTRACT(array,0)] += 1; - counts[EXTRACT(array,1)] += 1; - counts[EXTRACT(array,2)] += 1; - counts[EXTRACT(array,3)] += 1; -#ifdef CHECK_FOR_OVERFLOW - _counts_after = _mm_i32gather_epi32(counts,array,/*scale*/4); - _counts_neg = _mm_cmpgt_epi32(_counts_after,maxcount128); - if (_mm_testz_si128(_counts_neg,_counts_neg) == 0) { - debug(printf("Exceeded maximum counts\n")); - if (EXTRACT(_counts_neg,0)) { - inquery[EXTRACT(array,0)] = INQUERY_FALSE; - } - if (EXTRACT(_counts_neg,1)) { - inquery[EXTRACT(array,1)] = INQUERY_FALSE; - } - if (EXTRACT(_counts_neg,2)) { - inquery[EXTRACT(array,2)] = INQUERY_FALSE; - } - if (EXTRACT(_counts_neg,3)) { - inquery[EXTRACT(array,3)] = INQUERY_FALSE; - } - } -#endif /* CHECK_FOR_OVERFLOW */ - debug(printf("10 %04X => %d\n",array[0],counts[array[0]])); - debug(printf("26 %04X => %d\n",array[1],counts[array[1]])); - debug(printf("42 %04X => %d\n",array[2],counts[array[2]])); - debug(printf("58 %04X => %d\n",array[3],counts[array[3]])); - -#else - INCR_COUNT(counts[EXTRACT(array,0)],inquery[EXTRACT(array,0)]); /* 10 */ - INCR_COUNT(counts[EXTRACT(array,1)],inquery[EXTRACT(array,1)]); /* 26 */ - INCR_COUNT(counts[EXTRACT(array,2)],inquery[EXTRACT(array,2)]); /* 42 */ - INCR_COUNT(counts[EXTRACT(array,3)],inquery[EXTRACT(array,3)]); /* 58 */ -#endif - debug(printf("10 %04X => %d\n",EXTRACT(array,0),counts[EXTRACT(array,0)])); - debug(printf("26 %04X => %d\n",EXTRACT(array,1),counts[EXTRACT(array,1)])); - debug(printf("42 %04X => %d\n",EXTRACT(array,2),counts[EXTRACT(array,2)])); - debug(printf("58 %04X => %d\n",EXTRACT(array,3),counts[EXTRACT(array,3)])); -#ifdef HAVE_SSE4_1 - array = _mm_and_si128( current, mask5); -#else - _mm_store_si128((__m128i *) array, _mm_and_si128( current, mask5)); -#endif -#ifdef HAVE_AVX2 - counts[EXTRACT(array,0)] += 1; - counts[EXTRACT(array,1)] += 1; - counts[EXTRACT(array,2)] += 1; - counts[EXTRACT(array,3)] += 1; -#ifdef CHECK_FOR_OVERFLOW - _counts_after = _mm_i32gather_epi32(counts,array,/*scale*/4); - _counts_neg = _mm_cmpgt_epi32(_counts_after,maxcount128); - if (_mm_testz_si128(_counts_neg,_counts_neg) == 0) { - debug(printf("Exceeded maximum counts\n")); - if (EXTRACT(_counts_neg,0)) { - inquery[EXTRACT(array,0)] = INQUERY_FALSE; - } - if (EXTRACT(_counts_neg,1)) { - inquery[EXTRACT(array,1)] = INQUERY_FALSE; - } - if (EXTRACT(_counts_neg,2)) { - inquery[EXTRACT(array,2)] = INQUERY_FALSE; - } - if (EXTRACT(_counts_neg,3)) { - inquery[EXTRACT(array,3)] = INQUERY_FALSE; - } - } -#endif /* CHECK_FOR_OVERFLOW */ - debug(printf("11 %04X => %d\n",array[0],counts[array[0]])); - debug(printf("27 %04X => %d\n",array[1],counts[array[1]])); - debug(printf("43 %04X => %d\n",array[2],counts[array[2]])); - debug(printf("59 %04X => %d\n",array[3],counts[array[3]])); - -#else - INCR_COUNT(counts[EXTRACT(array,0)],inquery[EXTRACT(array,0)]); /* 11 */ - INCR_COUNT(counts[EXTRACT(array,1)],inquery[EXTRACT(array,1)]); /* 27 */ - INCR_COUNT(counts[EXTRACT(array,2)],inquery[EXTRACT(array,2)]); /* 43 */ - INCR_COUNT(counts[EXTRACT(array,3)],inquery[EXTRACT(array,3)]); /* 59 */ -#endif - debug(printf("11 %04X => %d\n",EXTRACT(array,0),counts[EXTRACT(array,0)])); - debug(printf("27 %04X => %d\n",EXTRACT(array,1),counts[EXTRACT(array,1)])); - debug(printf("43 %04X => %d\n",EXTRACT(array,2),counts[EXTRACT(array,2)])); - debug(printf("59 %04X => %d\n",EXTRACT(array,3),counts[EXTRACT(array,3)])); + if (mode == STANDARD) { + /* Skip */ + } else if (mode == CMET_STRANDED) { + high = Cmet_reduce_ct(high); low = Cmet_reduce_ct(low); nextlow = Cmet_reduce_ct(nextlow); + } else if (mode == CMET_NONSTRANDED) { + if (genestrand > 0) { + high = Cmet_reduce_ct(high); low = Cmet_reduce_ct(low); nextlow = Cmet_reduce_ct(nextlow); + } else { + high = Cmet_reduce_ga(high); low = Cmet_reduce_ga(low); nextlow = Cmet_reduce_ga(nextlow); + } + } else if (mode == ATOI_STRANDED) { + high = Atoi_reduce_tc(high); low = Atoi_reduce_tc(low); nextlow = Atoi_reduce_tc(nextlow); + } else if (mode == ATOI_NONSTRANDED) { + if (genestrand > 0) { + high = Atoi_reduce_tc(high); low = Atoi_reduce_tc(low); nextlow = Atoi_reduce_tc(nextlow); + } else { + high = Atoi_reduce_ag(high); low = Atoi_reduce_ag(low); nextlow = Atoi_reduce_ag(nextlow); + } + } else if (mode == TTOC_STRANDED) { + high = Atoi_reduce_ag(high); low = Atoi_reduce_ag(low); nextlow = Atoi_reduce_ag(nextlow); + } else if (mode == TTOC_NONSTRANDED) { + if (genestrand > 0) { + high = Atoi_reduce_ag(high); low = Atoi_reduce_ag(low); nextlow = Atoi_reduce_ag(nextlow); + } else { + high = Atoi_reduce_tc(high); low = Atoi_reduce_tc(low); nextlow = Atoi_reduce_tc(nextlow); + } + } + high_rev = reverse_nt[low >> 16]; + high_rev |= (reverse_nt[low & 0x0000FFFF] << 16); + low_rev = reverse_nt[high >> 16]; + low_rev |= (reverse_nt[high & 0x0000FFFF] << 16); + nexthigh_rev = reverse_nt[nextlow >> 16]; + nexthigh_rev |= (reverse_nt[nextlow & 0x0000FFFF] << 16); - oligo = _mm_or_si128( _mm_srli_epi32(next,24), _mm_slli_epi32(current,8)); -#ifdef HAVE_SSE4_1 - array = _mm_and_si128( _mm_srli_epi32(oligo,6), mask5); -#else - _mm_store_si128((__m128i *) array, _mm_and_si128( _mm_srli_epi32(oligo,6), mask5)); -#endif -#ifdef HAVE_AVX2 - counts[EXTRACT(array,0)] += 1; - counts[EXTRACT(array,1)] += 1; - counts[EXTRACT(array,2)] += 1; - counts[EXTRACT(array,3)] += 1; -#ifdef CHECK_FOR_OVERFLOW - _counts_after = _mm_i32gather_epi32(counts,array,/*scale*/4); - _counts_neg = _mm_cmpgt_epi32(_counts_after,maxcount128); - if (_mm_testz_si128(_counts_neg,_counts_neg) == 0) { - debug(printf("Exceeded maximum counts\n")); - if (EXTRACT(_counts_neg,0)) { - inquery[EXTRACT(array,0)] = INQUERY_FALSE; - } - if (EXTRACT(_counts_neg,1)) { - inquery[EXTRACT(array,1)] = INQUERY_FALSE; - } - if (EXTRACT(_counts_neg,2)) { - inquery[EXTRACT(array,2)] = INQUERY_FALSE; - } - if (EXTRACT(_counts_neg,3)) { - inquery[EXTRACT(array,3)] = INQUERY_FALSE; - } - } -#endif /* CHECK_FOR_OVERFLOW */ - debug(printf("12 %04X => %d\n",array[0],counts[array[0]])); - debug(printf("28 %04X => %d\n",array[1],counts[array[1]])); - debug(printf("44 %04X => %d\n",array[2],counts[array[2]])); - debug(printf("60 %04X => %d\n",array[3],counts[array[3]])); - -#else - INCR_COUNT(counts[EXTRACT(array,0)],inquery[EXTRACT(array,0)]); /* 12 */ - INCR_COUNT(counts[EXTRACT(array,1)],inquery[EXTRACT(array,1)]); /* 28 */ - INCR_COUNT(counts[EXTRACT(array,2)],inquery[EXTRACT(array,2)]); /* 44 */ - INCR_COUNT(counts[EXTRACT(array,3)],inquery[EXTRACT(array,3)]); /* 60 */ -#endif - debug(printf("12 %04X => %d\n",EXTRACT(array,0),counts[EXTRACT(array,0)])); - debug(printf("28 %04X => %d\n",EXTRACT(array,1),counts[EXTRACT(array,1)])); - debug(printf("44 %04X => %d\n",EXTRACT(array,2),counts[EXTRACT(array,2)])); - debug(printf("60 %04X => %d\n",EXTRACT(array,3),counts[EXTRACT(array,3)])); + if (indexsize == 9) { + count_9mers_fwd_partial(counts,high_rev,low_rev,nexthigh_rev,startdiscard,/*enddiscard*/31); + } else if (indexsize == 8) { + count_8mers_fwd_partial(counts,high_rev,low_rev,nexthigh_rev,startdiscard,/*enddiscard*/31); + } else if (indexsize == 7) { + count_7mers_fwd_partial(counts,high_rev,low_rev,nexthigh_rev,startdiscard,/*enddiscard*/31); + } else if (indexsize == 6) { + count_6mers_fwd_partial(counts,high_rev,low_rev,nexthigh_rev,startdiscard,/*enddiscard*/31); + } else if (indexsize == 5) { + count_5mers_fwd_partial(counts,high_rev,low_rev,nexthigh_rev,startdiscard,/*enddiscard*/31); + } else { + fprintf(stderr,"indexsize %d not supported\n",indexsize); + abort(); + } -#ifdef HAVE_SSE4_1 - array = _mm_and_si128( _mm_srli_epi32(oligo,4), mask5); -#else - _mm_store_si128((__m128i *) array, _mm_and_si128( _mm_srli_epi32(oligo,4), mask5)); -#endif -#ifdef HAVE_AVX2 - counts[EXTRACT(array,0)] += 1; - counts[EXTRACT(array,1)] += 1; - counts[EXTRACT(array,2)] += 1; - counts[EXTRACT(array,3)] += 1; -#ifdef CHECK_FOR_OVERFLOW - _counts_after = _mm_i32gather_epi32(counts,array,/*scale*/4); - _counts_neg = _mm_cmpgt_epi32(_counts_after,maxcount128); - if (_mm_testz_si128(_counts_neg,_counts_neg) == 0) { - debug(printf("Exceeded maximum counts\n")); - if (EXTRACT(_counts_neg,0)) { - inquery[EXTRACT(array,0)] = INQUERY_FALSE; - } - if (EXTRACT(_counts_neg,1)) { - inquery[EXTRACT(array,1)] = INQUERY_FALSE; - } - if (EXTRACT(_counts_neg,2)) { - inquery[EXTRACT(array,2)] = INQUERY_FALSE; - } - if (EXTRACT(_counts_neg,3)) { - inquery[EXTRACT(array,3)] = INQUERY_FALSE; - } - } -#endif /* CHECK_FOR_OVERFLOW */ - debug(printf("13 %04X => %d\n",array[0],counts[array[0]])); - debug(printf("29 %04X => %d\n",array[1],counts[array[1]])); - debug(printf("45 %04X => %d\n",array[2],counts[array[2]])); - debug(printf("61 %04X => %d\n",array[3],counts[array[3]])); - -#else - INCR_COUNT(counts[EXTRACT(array,0)],inquery[EXTRACT(array,0)]); /* 13 */ - INCR_COUNT(counts[EXTRACT(array,1)],inquery[EXTRACT(array,1)]); /* 29 */ - INCR_COUNT(counts[EXTRACT(array,2)],inquery[EXTRACT(array,2)]); /* 45 */ - INCR_COUNT(counts[EXTRACT(array,3)],inquery[EXTRACT(array,3)]); /* 61 */ -#endif - debug(printf("13 %04X => %d\n",EXTRACT(array,0),counts[EXTRACT(array,0)])); - debug(printf("29 %04X => %d\n",EXTRACT(array,1),counts[EXTRACT(array,1)])); - debug(printf("45 %04X => %d\n",EXTRACT(array,2),counts[EXTRACT(array,2)])); - debug(printf("61 %04X => %d\n",EXTRACT(array,3),counts[EXTRACT(array,3)])); + } -#ifdef HAVE_SSE4_1 - array = _mm_and_si128( _mm_srli_epi32(oligo,2), mask5); -#else - _mm_store_si128((__m128i *) array, _mm_and_si128( _mm_srli_epi32(oligo,2), mask5)); + return; +} #endif -#ifdef HAVE_AVX2 - counts[EXTRACT(array,0)] += 1; - counts[EXTRACT(array,1)] += 1; - counts[EXTRACT(array,2)] += 1; - counts[EXTRACT(array,3)] += 1; -#ifdef CHECK_FOR_OVERFLOW - _counts_after = _mm_i32gather_epi32(counts,array,/*scale*/4); - _counts_neg = _mm_cmpgt_epi32(_counts_after,maxcount128); - if (_mm_testz_si128(_counts_neg,_counts_neg) == 0) { - debug(printf("Exceeded maximum counts\n")); - if (EXTRACT(_counts_neg,0)) { - inquery[EXTRACT(array,0)] = INQUERY_FALSE; - } - if (EXTRACT(_counts_neg,1)) { - inquery[EXTRACT(array,1)] = INQUERY_FALSE; - } - if (EXTRACT(_counts_neg,2)) { - inquery[EXTRACT(array,2)] = INQUERY_FALSE; - } - if (EXTRACT(_counts_neg,3)) { - inquery[EXTRACT(array,3)] = INQUERY_FALSE; - } - } -#endif /* CHECK_FOR_OVERFLOW */ - debug(printf("14 %04X => %d\n",array[0],counts[array[0]])); - debug(printf("30 %04X => %d\n",array[1],counts[array[1]])); - debug(printf("46 %04X => %d\n",array[2],counts[array[2]])); - debug(printf("62 %04X => %d\n",array[3],counts[array[3]])); - -#else - INCR_COUNT(counts[EXTRACT(array,0)],inquery[EXTRACT(array,0)]); /* 14 */ - INCR_COUNT(counts[EXTRACT(array,1)],inquery[EXTRACT(array,1)]); /* 30 */ - INCR_COUNT(counts[EXTRACT(array,2)],inquery[EXTRACT(array,2)]); /* 46 */ - INCR_COUNT(counts[EXTRACT(array,3)],inquery[EXTRACT(array,3)]); /* 62 */ -#endif - debug(printf("14 %04X => %d\n",EXTRACT(array,0),counts[EXTRACT(array,0)])); - debug(printf("30 %04X => %d\n",EXTRACT(array,1),counts[EXTRACT(array,1)])); - debug(printf("46 %04X => %d\n",EXTRACT(array,2),counts[EXTRACT(array,2)])); - debug(printf("62 %04X => %d\n",EXTRACT(array,3),counts[EXTRACT(array,3)])); -#ifdef HAVE_SSE4_1 - array = _mm_and_si128( oligo, mask5); -#else - _mm_store_si128((__m128i *) array, _mm_and_si128( oligo, mask5)); -#endif + #ifdef HAVE_AVX2 - counts[EXTRACT(array,0)] += 1; - counts[EXTRACT(array,1)] += 1; - counts[EXTRACT(array,2)] += 1; - counts[EXTRACT(array,3)] += 1; -#ifdef CHECK_FOR_OVERFLOW - _counts_after = _mm_i32gather_epi32(counts,array,/*scale*/4); - _counts_neg = _mm_cmpgt_epi32(_counts_after,maxcount128); - if (_mm_testz_si128(_counts_neg,_counts_neg) == 0) { - debug(printf("Exceeded maximum counts\n")); - if (EXTRACT(_counts_neg,0)) { - inquery[EXTRACT(array,0)] = INQUERY_FALSE; - } - if (EXTRACT(_counts_neg,1)) { - inquery[EXTRACT(array,1)] = INQUERY_FALSE; - } - if (EXTRACT(_counts_neg,2)) { - inquery[EXTRACT(array,2)] = INQUERY_FALSE; - } - if (EXTRACT(_counts_neg,3)) { - inquery[EXTRACT(array,3)] = INQUERY_FALSE; - } - } -#endif /* CHECK_FOR_OVERFLOW */ - debug(printf("15 %04X => %d\n",array[0],counts[array[0]])); - debug(printf("31 %04X => %d\n",array[1],counts[array[1]])); - debug(printf("47 %04X => %d\n",array[2],counts[array[2]])); - debug(printf("63 %04X => %d\n",array[3],counts[array[3]])); - -#else - INCR_COUNT(counts[EXTRACT(array,0)],inquery[EXTRACT(array,0)]); /* 15 */ - INCR_COUNT(counts[EXTRACT(array,1)],inquery[EXTRACT(array,1)]); /* 31 */ - INCR_COUNT(counts[EXTRACT(array,2)],inquery[EXTRACT(array,2)]); /* 47 */ - INCR_COUNT(counts[EXTRACT(array,3)],inquery[EXTRACT(array,3)]); /* 63 */ -#endif - debug(printf("15 %04X => %d\n",EXTRACT(array,0),counts[EXTRACT(array,0)])); - debug(printf("31 %04X => %d\n",EXTRACT(array,1),counts[EXTRACT(array,1)])); - debug(printf("47 %04X => %d\n",EXTRACT(array,2),counts[EXTRACT(array,2)])); - debug(printf("63 %04X => %d\n",EXTRACT(array,3),counts[EXTRACT(array,3)])); +static __m256i +apply_mode_fwd_256 (Genomecomp_T *block_ptr, Mode_T mode, int genestrand) { + Genomecomp_T low0, high0, low1, high1, low2, high2, low3, high3, nextlow; + + high0 = block_ptr[0]; low0 = block_ptr[1]; + high1 = block_ptr[3]; low1 = block_ptr[4]; + high2 = block_ptr[6]; low2 = block_ptr[7]; + high3 = block_ptr[9]; low3 = block_ptr[10]; + nextlow = block_ptr[13]; + + if (mode == CMET_STRANDED) { + high0 = Cmet_reduce_ct(high0); low0 = Cmet_reduce_ct(low0); + high1 = Cmet_reduce_ct(high1); low1 = Cmet_reduce_ct(low1); + high2 = Cmet_reduce_ct(high2); low2 = Cmet_reduce_ct(low2); + high3 = Cmet_reduce_ct(high3); low3 = Cmet_reduce_ct(low3); + nextlow = Cmet_reduce_ct(nextlow); + } else if (mode == CMET_NONSTRANDED) { + if (genestrand > 0) { + high0 = Cmet_reduce_ct(high0); low0 = Cmet_reduce_ct(low0); + high1 = Cmet_reduce_ct(high1); low1 = Cmet_reduce_ct(low1); + high2 = Cmet_reduce_ct(high2); low2 = Cmet_reduce_ct(low2); + high3 = Cmet_reduce_ct(high3); low3 = Cmet_reduce_ct(low3); + nextlow = Cmet_reduce_ct(nextlow); + } else { + high0 = Cmet_reduce_ga(high0); low0 = Cmet_reduce_ga(low0); + high1 = Cmet_reduce_ga(high1); low1 = Cmet_reduce_ga(low1); + high2 = Cmet_reduce_ga(high2); low2 = Cmet_reduce_ga(low2); + high3 = Cmet_reduce_ga(high3); low3 = Cmet_reduce_ga(low3); + nextlow = Cmet_reduce_ga(nextlow); + } + + } else if (mode == ATOI_STRANDED) { + high0 = Atoi_reduce_tc(high0); low0 = Atoi_reduce_tc(low0); + high1 = Atoi_reduce_tc(high1); low1 = Atoi_reduce_tc(low1); + high2 = Atoi_reduce_tc(high2); low2 = Atoi_reduce_tc(low2); + high3 = Atoi_reduce_tc(high3); low3 = Atoi_reduce_tc(low3); + nextlow = Atoi_reduce_tc(nextlow); + } else if (mode == ATOI_NONSTRANDED) { + if (genestrand > 0) { + high0 = Atoi_reduce_tc(high0); low0 = Atoi_reduce_tc(low0); + high1 = Atoi_reduce_tc(high1); low1 = Atoi_reduce_tc(low1); + high2 = Atoi_reduce_tc(high2); low2 = Atoi_reduce_tc(low2); + high3 = Atoi_reduce_tc(high3); low3 = Atoi_reduce_tc(low3); + nextlow = Atoi_reduce_tc(nextlow); + } else { + high0 = Atoi_reduce_ag(high0); low0 = Atoi_reduce_ag(low0); + high1 = Atoi_reduce_ag(high1); low1 = Atoi_reduce_ag(low1); + high2 = Atoi_reduce_ag(high2); low2 = Atoi_reduce_ag(low2); + high3 = Atoi_reduce_ag(high3); low3 = Atoi_reduce_ag(low3); + nextlow = Atoi_reduce_ag(nextlow); + } + + } else if (mode == TTOC_STRANDED) { + high0 = Atoi_reduce_ag(high0); low0 = Atoi_reduce_ag(low0); + high1 = Atoi_reduce_ag(high1); low1 = Atoi_reduce_ag(low1); + high2 = Atoi_reduce_ag(high2); low2 = Atoi_reduce_ag(low2); + high3 = Atoi_reduce_ag(high3); low3 = Atoi_reduce_ag(low3); + nextlow = Atoi_reduce_ag(nextlow); + } else if (mode == TTOC_NONSTRANDED) { + if (genestrand > 0) { + high0 = Atoi_reduce_ag(high0); low0 = Atoi_reduce_ag(low0); + high1 = Atoi_reduce_ag(high1); low1 = Atoi_reduce_ag(low1); + high2 = Atoi_reduce_ag(high2); low2 = Atoi_reduce_ag(low2); + high3 = Atoi_reduce_ag(high3); low3 = Atoi_reduce_ag(low3); + nextlow = Atoi_reduce_ag(nextlow); + } else { + high0 = Atoi_reduce_tc(high0); low0 = Atoi_reduce_tc(low0); + high1 = Atoi_reduce_tc(high1); low1 = Atoi_reduce_tc(low1); + high2 = Atoi_reduce_tc(high2); low2 = Atoi_reduce_tc(low2); + high3 = Atoi_reduce_tc(high3); low3 = Atoi_reduce_tc(low3); + nextlow = Atoi_reduce_tc(nextlow); + } + } - return; + return _mm256_set_epi32(high3,low3,high2,low2,high1,low1,high0,low0); } - #endif -#ifdef HAVE_AVX2 -static void -count_5mers_fwd_simd_128 (Count_T *counts, Inquery_T *inquery, __m256i current, __m256i next) { - __m256i oligo; - __m256i array; -#ifdef CHECK_FOR_OVERFLOW - __m256i _counts_after, _counts_neg; + +#ifdef HAVE_AVX512 +static __m512i +apply_mode_fwd_512 (Genomecomp_T *block_ptr, Mode_T mode, int genestrand) { + Genomecomp_T low0, high0, low1, high1, low2, high2, low3, high3, + low4, high4, low5, high5, low6, high6, low7, high7, nextlow; + + high0 = block_ptr[0]; low0 = block_ptr[1]; + high1 = block_ptr[3]; low1 = block_ptr[4]; + high2 = block_ptr[6]; low2 = block_ptr[7]; + high3 = block_ptr[9]; low3 = block_ptr[10]; + + high4 = block_ptr[12]; low4 = block_ptr[13]; + high5 = block_ptr[15]; low5 = block_ptr[16]; + high6 = block_ptr[18]; low6 = block_ptr[19]; + high7 = block_ptr[21]; low7 = block_ptr[22]; + nextlow = block_ptr[25]; + + if (mode == CMET_STRANDED) { + high0 = Cmet_reduce_ct(high0); low0 = Cmet_reduce_ct(low0); + high1 = Cmet_reduce_ct(high1); low1 = Cmet_reduce_ct(low1); + high2 = Cmet_reduce_ct(high2); low2 = Cmet_reduce_ct(low2); + high3 = Cmet_reduce_ct(high3); low3 = Cmet_reduce_ct(low3); + high4 = Cmet_reduce_ct(high4); low4 = Cmet_reduce_ct(low4); + high5 = Cmet_reduce_ct(high5); low5 = Cmet_reduce_ct(low5); + high6 = Cmet_reduce_ct(high6); low6 = Cmet_reduce_ct(low6); + high7 = Cmet_reduce_ct(high7); low7 = Cmet_reduce_ct(low7); + nextlow = Cmet_reduce_ct(nextlow); + } else if (mode == CMET_NONSTRANDED) { + if (genestrand > 0) { + high0 = Cmet_reduce_ct(high0); low0 = Cmet_reduce_ct(low0); + high1 = Cmet_reduce_ct(high1); low1 = Cmet_reduce_ct(low1); + high2 = Cmet_reduce_ct(high2); low2 = Cmet_reduce_ct(low2); + high3 = Cmet_reduce_ct(high3); low3 = Cmet_reduce_ct(low3); + high4 = Cmet_reduce_ct(high4); low4 = Cmet_reduce_ct(low4); + high5 = Cmet_reduce_ct(high5); low5 = Cmet_reduce_ct(low5); + high6 = Cmet_reduce_ct(high6); low6 = Cmet_reduce_ct(low6); + high7 = Cmet_reduce_ct(high7); low7 = Cmet_reduce_ct(low7); + nextlow = Cmet_reduce_ct(nextlow); + } else { + high0 = Cmet_reduce_ga(high0); low0 = Cmet_reduce_ga(low0); + high1 = Cmet_reduce_ga(high1); low1 = Cmet_reduce_ga(low1); + high2 = Cmet_reduce_ga(high2); low2 = Cmet_reduce_ga(low2); + high3 = Cmet_reduce_ga(high3); low3 = Cmet_reduce_ga(low3); + high4 = Cmet_reduce_ga(high4); low0 = Cmet_reduce_ga(low4); + high5 = Cmet_reduce_ga(high5); low1 = Cmet_reduce_ga(low5); + high6 = Cmet_reduce_ga(high6); low2 = Cmet_reduce_ga(low6); + high7 = Cmet_reduce_ga(high7); low3 = Cmet_reduce_ga(low7); + nextlow = Cmet_reduce_ga(nextlow); + } + + } else if (mode == ATOI_STRANDED) { + high0 = Atoi_reduce_tc(high0); low0 = Atoi_reduce_tc(low0); + high1 = Atoi_reduce_tc(high1); low1 = Atoi_reduce_tc(low1); + high2 = Atoi_reduce_tc(high2); low2 = Atoi_reduce_tc(low2); + high3 = Atoi_reduce_tc(high3); low3 = Atoi_reduce_tc(low3); + high4 = Atoi_reduce_tc(high4); low4 = Atoi_reduce_tc(low4); + high5 = Atoi_reduce_tc(high5); low5 = Atoi_reduce_tc(low5); + high6 = Atoi_reduce_tc(high6); low6 = Atoi_reduce_tc(low6); + high7 = Atoi_reduce_tc(high7); low7 = Atoi_reduce_tc(low7); + nextlow = Atoi_reduce_tc(nextlow); + } else if (mode == ATOI_NONSTRANDED) { + if (genestrand > 0) { + high0 = Atoi_reduce_tc(high0); low0 = Atoi_reduce_tc(low0); + high1 = Atoi_reduce_tc(high1); low1 = Atoi_reduce_tc(low1); + high2 = Atoi_reduce_tc(high2); low2 = Atoi_reduce_tc(low2); + high3 = Atoi_reduce_tc(high3); low3 = Atoi_reduce_tc(low3); + high4 = Atoi_reduce_tc(high4); low4 = Atoi_reduce_tc(low4); + high5 = Atoi_reduce_tc(high5); low5 = Atoi_reduce_tc(low5); + high6 = Atoi_reduce_tc(high6); low6 = Atoi_reduce_tc(low6); + high7 = Atoi_reduce_tc(high7); low7 = Atoi_reduce_tc(low7); + nextlow = Atoi_reduce_tc(nextlow); + } else { + high0 = Atoi_reduce_ag(high0); low0 = Atoi_reduce_ag(low0); + high1 = Atoi_reduce_ag(high1); low1 = Atoi_reduce_ag(low1); + high2 = Atoi_reduce_ag(high2); low2 = Atoi_reduce_ag(low2); + high3 = Atoi_reduce_ag(high3); low3 = Atoi_reduce_ag(low3); + high4 = Atoi_reduce_ag(high4); low0 = Atoi_reduce_ag(low4); + high5 = Atoi_reduce_ag(high5); low1 = Atoi_reduce_ag(low5); + high6 = Atoi_reduce_ag(high6); low2 = Atoi_reduce_ag(low6); + high7 = Atoi_reduce_ag(high7); low3 = Atoi_reduce_ag(low7); + nextlow = Atoi_reduce_ag(nextlow); + } + + } else if (mode == TTOC_STRANDED) { + high0 = Atoi_reduce_ag(high0); low0 = Atoi_reduce_ag(low0); + high1 = Atoi_reduce_ag(high1); low1 = Atoi_reduce_ag(low1); + high2 = Atoi_reduce_ag(high2); low2 = Atoi_reduce_ag(low2); + high3 = Atoi_reduce_ag(high3); low3 = Atoi_reduce_ag(low3); + high4 = Atoi_reduce_ag(high4); low4 = Atoi_reduce_ag(low4); + high5 = Atoi_reduce_ag(high5); low5 = Atoi_reduce_ag(low5); + high6 = Atoi_reduce_ag(high6); low6 = Atoi_reduce_ag(low6); + high7 = Atoi_reduce_ag(high7); low7 = Atoi_reduce_ag(low7); + nextlow = Atoi_reduce_ag(nextlow); + } else if (mode == ATOI_NONSTRANDED) { + if (genestrand > 0) { + high0 = Atoi_reduce_ag(high0); low0 = Atoi_reduce_ag(low0); + high1 = Atoi_reduce_ag(high1); low1 = Atoi_reduce_ag(low1); + high2 = Atoi_reduce_ag(high2); low2 = Atoi_reduce_ag(low2); + high3 = Atoi_reduce_ag(high3); low3 = Atoi_reduce_ag(low3); + high4 = Atoi_reduce_ag(high4); low4 = Atoi_reduce_ag(low4); + high5 = Atoi_reduce_ag(high5); low5 = Atoi_reduce_ag(low5); + high6 = Atoi_reduce_ag(high6); low6 = Atoi_reduce_ag(low6); + high7 = Atoi_reduce_ag(high7); low7 = Atoi_reduce_ag(low7); + nextlow = Atoi_reduce_ag(nextlow); + } else { + high0 = Atoi_reduce_tc(high0); low0 = Atoi_reduce_tc(low0); + high1 = Atoi_reduce_tc(high1); low1 = Atoi_reduce_tc(low1); + high2 = Atoi_reduce_tc(high2); low2 = Atoi_reduce_tc(low2); + high3 = Atoi_reduce_tc(high3); low3 = Atoi_reduce_tc(low3); + high4 = Atoi_reduce_tc(high4); low0 = Atoi_reduce_tc(low4); + high5 = Atoi_reduce_tc(high5); low1 = Atoi_reduce_tc(low5); + high6 = Atoi_reduce_tc(high6); low2 = Atoi_reduce_tc(low6); + high7 = Atoi_reduce_tc(high7); low3 = Atoi_reduce_tc(low7); + nextlow = Atoi_reduce_tc(nextlow); + } + } + + return _mm512_set_epi32(high7,low7,high6,low6,high5,low5,high4,low4, + high3,low3,high2,low2,high1,low1,high0,low0); +} #endif - array = _mm256_srli_epi32(current,22); /* No mask necessary */ - counts[EXTRACT256(array,0)] += 1; /* 0 */ - counts[EXTRACT256(array,1)] += 1; /* 16 */ - counts[EXTRACT256(array,2)] += 1; /* 32 */ - counts[EXTRACT256(array,3)] += 1; /* 48 */ - counts[EXTRACT256(array,4)] += 1; /* 64 */ - counts[EXTRACT256(array,5)] += 1; /* 80 */ - counts[EXTRACT256(array,6)] += 1; /* 96 */ - counts[EXTRACT256(array,7)] += 1; /* 112 */ -#ifdef CHECK_FOR_OVERFLOW - _counts_after = _mm256_i32gather_epi32(counts,array,/*scale*/4); - _counts_neg = _mm256_cmpgt_epi32(_counts_after,maxcount256); - if (_mm256_testz_si256(_counts_neg,_counts_neg) == 0) { - debug(printf("Exceeded maximum counts\n")); - if (EXTRACT256(_counts_neg,0)) { - inquery[EXTRACT256(array,0)] = INQUERY_FALSE; - } - if (EXTRACT256(_counts_neg,1)) { - inquery[EXTRACT256(array,1)] = INQUERY_FALSE; - } - if (EXTRACT256(_counts_neg,2)) { - inquery[EXTRACT256(array,2)] = INQUERY_FALSE; - } - if (EXTRACT256(_counts_neg,3)) { - inquery[EXTRACT256(array,3)] = INQUERY_FALSE; - } - if (EXTRACT256(_counts_neg,4)) { - inquery[EXTRACT256(array,4)] = INQUERY_FALSE; - } - if (EXTRACT256(_counts_neg,5)) { - inquery[EXTRACT256(array,5)] = INQUERY_FALSE; - } - if (EXTRACT256(_counts_neg,6)) { - inquery[EXTRACT256(array,6)] = INQUERY_FALSE; - } - if (EXTRACT256(_counts_neg,7)) { - inquery[EXTRACT256(array,7)] = INQUERY_FALSE; - } - } -#endif /* CHECK_FOR_OVERFLOW */ - debug(printf("0 %04X => %d\n",EXTRACT256(array,0),counts[EXTRACT256(array,0)])); - debug(printf("16 %04X => %d\n",EXTRACT256(array,1),counts[EXTRACT256(array,1)])); - debug(printf("32 %04X => %d\n",EXTRACT256(array,2),counts[EXTRACT256(array,2)])); - debug(printf("48 %04X => %d\n",EXTRACT256(array,3),counts[EXTRACT256(array,3)])); - debug(printf("0 %04X => %d\n",EXTRACT256(array,4),counts[EXTRACT256(array,4)])); - debug(printf("16 %04X => %d\n",EXTRACT256(array,5),counts[EXTRACT256(array,5)])); - debug(printf("32 %04X => %d\n",EXTRACT256(array,6),counts[EXTRACT256(array,6)])); - debug(printf("48 %04X => %d\n",EXTRACT256(array,7),counts[EXTRACT256(array,7)])); - - array = _mm256_and_si256( _mm256_srli_epi32(current,20), bigmask5); - counts[EXTRACT256(array,0)] += 1; /* 1 */ - counts[EXTRACT256(array,1)] += 1; /* 17 */ - counts[EXTRACT256(array,2)] += 1; /* 33 */ - counts[EXTRACT256(array,3)] += 1; /* 49 */ - counts[EXTRACT256(array,4)] += 1; /* 65 */ - counts[EXTRACT256(array,5)] += 1; /* 81 */ - counts[EXTRACT256(array,6)] += 1; /* 97 */ - counts[EXTRACT256(array,7)] += 1; /* 113 */ -#ifdef CHECK_FOR_OVERFLOW - _counts_after = _mm256_i32gather_epi32(counts,array,/*scale*/4); - _counts_neg = _mm256_cmpgt_epi32(_counts_after,maxcount256); - if (_mm256_testz_si256(_counts_neg,_counts_neg) == 0) { - debug(printf("Exceeded maximum counts\n")); - if (EXTRACT256(_counts_neg,0)) { - inquery[EXTRACT256(array,0)] = INQUERY_FALSE; - } - if (EXTRACT256(_counts_neg,1)) { - inquery[EXTRACT256(array,1)] = INQUERY_FALSE; - } - if (EXTRACT256(_counts_neg,2)) { - inquery[EXTRACT256(array,2)] = INQUERY_FALSE; - } - if (EXTRACT256(_counts_neg,3)) { - inquery[EXTRACT256(array,3)] = INQUERY_FALSE; - } - if (EXTRACT256(_counts_neg,4)) { - inquery[EXTRACT256(array,4)] = INQUERY_FALSE; - } - if (EXTRACT256(_counts_neg,5)) { - inquery[EXTRACT256(array,5)] = INQUERY_FALSE; - } - if (EXTRACT256(_counts_neg,6)) { - inquery[EXTRACT256(array,6)] = INQUERY_FALSE; - } - if (EXTRACT256(_counts_neg,7)) { - inquery[EXTRACT256(array,7)] = INQUERY_FALSE; - } - } -#endif /* CHECK_FOR_OVERFLOW */ - debug(printf("1 %04X => %d\n",EXTRACT256(array,0),counts[EXTRACT256(array,0)])); - debug(printf("17 %04X => %d\n",EXTRACT256(array,1),counts[EXTRACT256(array,1)])); - debug(printf("33 %04X => %d\n",EXTRACT256(array,2),counts[EXTRACT256(array,2)])); - debug(printf("49 %04X => %d\n",EXTRACT256(array,3),counts[EXTRACT256(array,3)])); - debug(printf("1 %04X => %d\n",EXTRACT256(array,4),counts[EXTRACT256(array,4)])); - debug(printf("17 %04X => %d\n",EXTRACT256(array,5),counts[EXTRACT256(array,5)])); - debug(printf("33 %04X => %d\n",EXTRACT256(array,6),counts[EXTRACT256(array,6)])); - debug(printf("49 %04X => %d\n",EXTRACT256(array,7),counts[EXTRACT256(array,7)])); - - array = _mm256_and_si256( _mm256_srli_epi32(current,18), bigmask5); - counts[EXTRACT256(array,0)] += 1; /* 2 */ - counts[EXTRACT256(array,1)] += 1; /* 18 */ - counts[EXTRACT256(array,2)] += 1; /* 34 */ - counts[EXTRACT256(array,3)] += 1; /* 50 */ - counts[EXTRACT256(array,4)] += 1; /* 66 */ - counts[EXTRACT256(array,5)] += 1; /* 82 */ - counts[EXTRACT256(array,6)] += 1; /* 98 */ - counts[EXTRACT256(array,7)] += 1; /* 114 */ -#ifdef CHECK_FOR_OVERFLOW - _counts_after = _mm256_i32gather_epi32(counts,array,/*scale*/4); - _counts_neg = _mm256_cmpgt_epi32(_counts_after,maxcount256); - if (_mm256_testz_si256(_counts_neg,_counts_neg) == 0) { - debug(printf("Exceeded maximum counts\n")); - if (EXTRACT256(_counts_neg,0)) { - inquery[EXTRACT256(array,0)] = INQUERY_FALSE; - } - if (EXTRACT256(_counts_neg,1)) { - inquery[EXTRACT256(array,1)] = INQUERY_FALSE; - } - if (EXTRACT256(_counts_neg,2)) { - inquery[EXTRACT256(array,2)] = INQUERY_FALSE; - } - if (EXTRACT256(_counts_neg,3)) { - inquery[EXTRACT256(array,3)] = INQUERY_FALSE; - } - if (EXTRACT256(_counts_neg,4)) { - inquery[EXTRACT256(array,4)] = INQUERY_FALSE; - } - if (EXTRACT256(_counts_neg,5)) { - inquery[EXTRACT256(array,5)] = INQUERY_FALSE; - } - if (EXTRACT256(_counts_neg,6)) { - inquery[EXTRACT256(array,6)] = INQUERY_FALSE; - } - if (EXTRACT256(_counts_neg,7)) { - inquery[EXTRACT256(array,7)] = INQUERY_FALSE; - } - } -#endif /* CHECK_FOR_OVERFLOW */ - debug(printf("2 %04X => %d\n",EXTRACT256(array,0),counts[EXTRACT256(array,0)])); - debug(printf("18 %04X => %d\n",EXTRACT256(array,1),counts[EXTRACT256(array,1)])); - debug(printf("34 %04X => %d\n",EXTRACT256(array,2),counts[EXTRACT256(array,2)])); - debug(printf("50 %04X => %d\n",EXTRACT256(array,3),counts[EXTRACT256(array,3)])); - debug(printf("2 %04X => %d\n",EXTRACT256(array,4),counts[EXTRACT256(array,4)])); - debug(printf("18 %04X => %d\n",EXTRACT256(array,5),counts[EXTRACT256(array,5)])); - debug(printf("34 %04X => %d\n",EXTRACT256(array,6),counts[EXTRACT256(array,6)])); - debug(printf("50 %04X => %d\n",EXTRACT256(array,7),counts[EXTRACT256(array,7)])); - - array = _mm256_and_si256( _mm256_srli_epi32(current,16), bigmask5); - counts[EXTRACT256(array,0)] += 1; /* 3 */ - counts[EXTRACT256(array,1)] += 1; /* 19 */ - counts[EXTRACT256(array,2)] += 1; /* 35 */ - counts[EXTRACT256(array,3)] += 1; /* 51 */ - counts[EXTRACT256(array,4)] += 1; /* 67 */ - counts[EXTRACT256(array,5)] += 1; /* 83 */ - counts[EXTRACT256(array,6)] += 1; /* 99 */ - counts[EXTRACT256(array,7)] += 1; /* 115 */ -#ifdef CHECK_FOR_OVERFLOW - _counts_after = _mm256_i32gather_epi32(counts,array,/*scale*/4); - _counts_neg = _mm256_cmpgt_epi32(_counts_after,maxcount256); - if (_mm256_testz_si256(_counts_neg,_counts_neg) == 0) { - debug(printf("Exceeded maximum counts\n")); - if (EXTRACT256(_counts_neg,0)) { - inquery[EXTRACT256(array,0)] = INQUERY_FALSE; - } - if (EXTRACT256(_counts_neg,1)) { - inquery[EXTRACT256(array,1)] = INQUERY_FALSE; - } - if (EXTRACT256(_counts_neg,2)) { - inquery[EXTRACT256(array,2)] = INQUERY_FALSE; - } - if (EXTRACT256(_counts_neg,3)) { - inquery[EXTRACT256(array,3)] = INQUERY_FALSE; - } - if (EXTRACT256(_counts_neg,4)) { - inquery[EXTRACT256(array,4)] = INQUERY_FALSE; - } - if (EXTRACT256(_counts_neg,5)) { - inquery[EXTRACT256(array,5)] = INQUERY_FALSE; - } - if (EXTRACT256(_counts_neg,6)) { - inquery[EXTRACT256(array,6)] = INQUERY_FALSE; - } - if (EXTRACT256(_counts_neg,7)) { - inquery[EXTRACT256(array,7)] = INQUERY_FALSE; - } - } -#endif /* CHECK_FOR_OVERFLOW */ - debug(printf("3 %04X => %d\n",EXTRACT256(array,0),counts[EXTRACT256(array,0)])); - debug(printf("19 %04X => %d\n",EXTRACT256(array,1),counts[EXTRACT256(array,1)])); - debug(printf("35 %04X => %d\n",EXTRACT256(array,2),counts[EXTRACT256(array,2)])); - debug(printf("51 %04X => %d\n",EXTRACT256(array,3),counts[EXTRACT256(array,3)])); - debug(printf("3 %04X => %d\n",EXTRACT256(array,4),counts[EXTRACT256(array,4)])); - debug(printf("19 %04X => %d\n",EXTRACT256(array,5),counts[EXTRACT256(array,5)])); - debug(printf("35 %04X => %d\n",EXTRACT256(array,6),counts[EXTRACT256(array,6)])); - debug(printf("51 %04X => %d\n",EXTRACT256(array,7),counts[EXTRACT256(array,7)])); - - array = _mm256_and_si256( _mm256_srli_epi32(current,14), bigmask5); - counts[EXTRACT256(array,0)] += 1; /* 4 */ - counts[EXTRACT256(array,1)] += 1; /* 20 */ - counts[EXTRACT256(array,2)] += 1; /* 36 */ - counts[EXTRACT256(array,3)] += 1; /* 52 */ - counts[EXTRACT256(array,4)] += 1; /* 68 */ - counts[EXTRACT256(array,5)] += 1; /* 84 */ - counts[EXTRACT256(array,6)] += 1; /* 100 */ - counts[EXTRACT256(array,7)] += 1; /* 116 */ -#ifdef CHECK_FOR_OVERFLOW - _counts_after = _mm256_i32gather_epi32(counts,array,/*scale*/4); - _counts_neg = _mm256_cmpgt_epi32(_counts_after,maxcount256); - if (_mm256_testz_si256(_counts_neg,_counts_neg) == 0) { - debug(printf("Exceeded maximum counts\n")); - if (EXTRACT256(_counts_neg,0)) { - inquery[EXTRACT256(array,0)] = INQUERY_FALSE; - } - if (EXTRACT256(_counts_neg,1)) { - inquery[EXTRACT256(array,1)] = INQUERY_FALSE; - } - if (EXTRACT256(_counts_neg,2)) { - inquery[EXTRACT256(array,2)] = INQUERY_FALSE; - } - if (EXTRACT256(_counts_neg,3)) { - inquery[EXTRACT256(array,3)] = INQUERY_FALSE; - } - if (EXTRACT256(_counts_neg,4)) { - inquery[EXTRACT256(array,4)] = INQUERY_FALSE; - } - if (EXTRACT256(_counts_neg,5)) { - inquery[EXTRACT256(array,5)] = INQUERY_FALSE; - } - if (EXTRACT256(_counts_neg,6)) { - inquery[EXTRACT256(array,6)] = INQUERY_FALSE; - } - if (EXTRACT256(_counts_neg,7)) { - inquery[EXTRACT256(array,7)] = INQUERY_FALSE; - } - } -#endif /* CHECK_FOR_OVERFLOW */ - debug(printf("4 %04X => %d\n",EXTRACT256(array,0),counts[EXTRACT256(array,0)])); - debug(printf("20 %04X => %d\n",EXTRACT256(array,1),counts[EXTRACT256(array,1)])); - debug(printf("36 %04X => %d\n",EXTRACT256(array,2),counts[EXTRACT256(array,2)])); - debug(printf("52 %04X => %d\n",EXTRACT256(array,3),counts[EXTRACT256(array,3)])); - debug(printf("4 %04X => %d\n",EXTRACT256(array,4),counts[EXTRACT256(array,4)])); - debug(printf("20 %04X => %d\n",EXTRACT256(array,5),counts[EXTRACT256(array,5)])); - debug(printf("36 %04X => %d\n",EXTRACT256(array,6),counts[EXTRACT256(array,6)])); - debug(printf("52 %04X => %d\n",EXTRACT256(array,7),counts[EXTRACT256(array,7)])); - - array = _mm256_and_si256( _mm256_srli_epi32(current,12), bigmask5); - counts[EXTRACT256(array,0)] += 1; /* 5 */ - counts[EXTRACT256(array,1)] += 1; /* 21 */ - counts[EXTRACT256(array,2)] += 1; /* 37 */ - counts[EXTRACT256(array,3)] += 1; /* 53 */ - counts[EXTRACT256(array,4)] += 1; /* 69 */ - counts[EXTRACT256(array,5)] += 1; /* 85 */ - counts[EXTRACT256(array,6)] += 1; /* 101 */ - counts[EXTRACT256(array,7)] += 1; /* 117 */ -#ifdef CHECK_FOR_OVERFLOW - _counts_after = _mm256_i32gather_epi32(counts,array,/*scale*/4); - _counts_neg = _mm256_cmpgt_epi32(_counts_after,maxcount256); - if (_mm256_testz_si256(_counts_neg,_counts_neg) == 0) { - debug(printf("Exceeded maximum counts\n")); - if (EXTRACT256(_counts_neg,0)) { - inquery[EXTRACT256(array,0)] = INQUERY_FALSE; - } - if (EXTRACT256(_counts_neg,1)) { - inquery[EXTRACT256(array,1)] = INQUERY_FALSE; - } - if (EXTRACT256(_counts_neg,2)) { - inquery[EXTRACT256(array,2)] = INQUERY_FALSE; - } - if (EXTRACT256(_counts_neg,3)) { - inquery[EXTRACT256(array,3)] = INQUERY_FALSE; - } - if (EXTRACT256(_counts_neg,4)) { - inquery[EXTRACT256(array,4)] = INQUERY_FALSE; - } - if (EXTRACT256(_counts_neg,5)) { - inquery[EXTRACT256(array,5)] = INQUERY_FALSE; - } - if (EXTRACT256(_counts_neg,6)) { - inquery[EXTRACT256(array,6)] = INQUERY_FALSE; - } - if (EXTRACT256(_counts_neg,7)) { - inquery[EXTRACT256(array,7)] = INQUERY_FALSE; - } - } -#endif /* CHECK_FOR_OVERFLOW */ - debug(printf("5 %04X => %d\n",EXTRACT256(array,0),counts[EXTRACT256(array,0)])); - debug(printf("21 %04X => %d\n",EXTRACT256(array,1),counts[EXTRACT256(array,1)])); - debug(printf("37 %04X => %d\n",EXTRACT256(array,2),counts[EXTRACT256(array,2)])); - debug(printf("53 %04X => %d\n",EXTRACT256(array,3),counts[EXTRACT256(array,3)])); - debug(printf("5 %04X => %d\n",EXTRACT256(array,4),counts[EXTRACT256(array,4)])); - debug(printf("21 %04X => %d\n",EXTRACT256(array,5),counts[EXTRACT256(array,5)])); - debug(printf("37 %04X => %d\n",EXTRACT256(array,6),counts[EXTRACT256(array,6)])); - debug(printf("53 %04X => %d\n",EXTRACT256(array,7),counts[EXTRACT256(array,7)])); - - array = _mm256_and_si256( _mm256_srli_epi32(current,10), bigmask5); - counts[EXTRACT256(array,0)] += 1; /* 6 */ - counts[EXTRACT256(array,1)] += 1; /* 22 */ - counts[EXTRACT256(array,2)] += 1; /* 38 */ - counts[EXTRACT256(array,3)] += 1; /* 54 */ - counts[EXTRACT256(array,4)] += 1; /* 70 */ - counts[EXTRACT256(array,5)] += 1; /* 86 */ - counts[EXTRACT256(array,6)] += 1; /* 102 */ - counts[EXTRACT256(array,7)] += 1; /* 118 */ -#ifdef CHECK_FOR_OVERFLOW - _counts_after = _mm256_i32gather_epi32(counts,array,/*scale*/4); - _counts_neg = _mm256_cmpgt_epi32(_counts_after,maxcount256); - if (_mm256_testz_si256(_counts_neg,_counts_neg) == 0) { - debug(printf("Exceeded maximum counts\n")); - if (EXTRACT256(_counts_neg,0)) { - inquery[EXTRACT256(array,0)] = INQUERY_FALSE; - } - if (EXTRACT256(_counts_neg,1)) { - inquery[EXTRACT256(array,1)] = INQUERY_FALSE; - } - if (EXTRACT256(_counts_neg,2)) { - inquery[EXTRACT256(array,2)] = INQUERY_FALSE; - } - if (EXTRACT256(_counts_neg,3)) { - inquery[EXTRACT256(array,3)] = INQUERY_FALSE; - } - if (EXTRACT256(_counts_neg,4)) { - inquery[EXTRACT256(array,4)] = INQUERY_FALSE; - } - if (EXTRACT256(_counts_neg,5)) { - inquery[EXTRACT256(array,5)] = INQUERY_FALSE; - } - if (EXTRACT256(_counts_neg,6)) { - inquery[EXTRACT256(array,6)] = INQUERY_FALSE; - } - if (EXTRACT256(_counts_neg,7)) { - inquery[EXTRACT256(array,7)] = INQUERY_FALSE; - } - } -#endif /* CHECK_FOR_OVERFLOW */ - debug(printf("6 %04X => %d\n",EXTRACT256(array,0),counts[EXTRACT256(array,0)])); - debug(printf("22 %04X => %d\n",EXTRACT256(array,1),counts[EXTRACT256(array,1)])); - debug(printf("38 %04X => %d\n",EXTRACT256(array,2),counts[EXTRACT256(array,2)])); - debug(printf("54 %04X => %d\n",EXTRACT256(array,3),counts[EXTRACT256(array,3)])); - debug(printf("6 %04X => %d\n",EXTRACT256(array,4),counts[EXTRACT256(array,4)])); - debug(printf("22 %04X => %d\n",EXTRACT256(array,5),counts[EXTRACT256(array,5)])); - debug(printf("38 %04X => %d\n",EXTRACT256(array,6),counts[EXTRACT256(array,6)])); - debug(printf("54 %04X => %d\n",EXTRACT256(array,7),counts[EXTRACT256(array,7)])); - - array = _mm256_and_si256( _mm256_srli_epi32(current,8), bigmask5); - counts[EXTRACT256(array,0)] += 1; /* 7 */ - counts[EXTRACT256(array,1)] += 1; /* 23 */ - counts[EXTRACT256(array,2)] += 1; /* 39 */ - counts[EXTRACT256(array,3)] += 1; /* 55 */ - counts[EXTRACT256(array,4)] += 1; /* 71 */ - counts[EXTRACT256(array,5)] += 1; /* 87 */ - counts[EXTRACT256(array,6)] += 1; /* 103 */ - counts[EXTRACT256(array,7)] += 1; /* 119 */ -#ifdef CHECK_FOR_OVERFLOW - _counts_after = _mm256_i32gather_epi32(counts,array,/*scale*/4); - _counts_neg = _mm256_cmpgt_epi32(_counts_after,maxcount256); - if (_mm256_testz_si256(_counts_neg,_counts_neg) == 0) { - debug(printf("Exceeded maximum counts\n")); - if (EXTRACT256(_counts_neg,0)) { - inquery[EXTRACT256(array,0)] = INQUERY_FALSE; - } - if (EXTRACT256(_counts_neg,1)) { - inquery[EXTRACT256(array,1)] = INQUERY_FALSE; - } - if (EXTRACT256(_counts_neg,2)) { - inquery[EXTRACT256(array,2)] = INQUERY_FALSE; - } - if (EXTRACT256(_counts_neg,3)) { - inquery[EXTRACT256(array,3)] = INQUERY_FALSE; - } - if (EXTRACT256(_counts_neg,4)) { - inquery[EXTRACT256(array,4)] = INQUERY_FALSE; - } - if (EXTRACT256(_counts_neg,5)) { - inquery[EXTRACT256(array,5)] = INQUERY_FALSE; - } - if (EXTRACT256(_counts_neg,6)) { - inquery[EXTRACT256(array,6)] = INQUERY_FALSE; - } - if (EXTRACT256(_counts_neg,7)) { - inquery[EXTRACT256(array,7)] = INQUERY_FALSE; - } - } -#endif /* CHECK_FOR_OVERFLOW */ - debug(printf("7 %04X => %d\n",EXTRACT256(array,0),counts[EXTRACT256(array,0)])); - debug(printf("23 %04X => %d\n",EXTRACT256(array,1),counts[EXTRACT256(array,1)])); - debug(printf("39 %04X => %d\n",EXTRACT256(array,2),counts[EXTRACT256(array,2)])); - debug(printf("55 %04X => %d\n",EXTRACT256(array,3),counts[EXTRACT256(array,3)])); - debug(printf("7 %04X => %d\n",EXTRACT256(array,4),counts[EXTRACT256(array,4)])); - debug(printf("23 %04X => %d\n",EXTRACT256(array,5),counts[EXTRACT256(array,5)])); - debug(printf("39 %04X => %d\n",EXTRACT256(array,6),counts[EXTRACT256(array,6)])); - debug(printf("55 %04X => %d\n",EXTRACT256(array,7),counts[EXTRACT256(array,7)])); - - array = _mm256_and_si256( _mm256_srli_epi32(current,6), bigmask5); - counts[EXTRACT256(array,0)] += 1; /* 8 */ - counts[EXTRACT256(array,1)] += 1; /* 24 */ - counts[EXTRACT256(array,2)] += 1; /* 40 */ - counts[EXTRACT256(array,3)] += 1; /* 56 */ - counts[EXTRACT256(array,4)] += 1; /* 72 */ - counts[EXTRACT256(array,5)] += 1; /* 88 */ - counts[EXTRACT256(array,6)] += 1; /* 104 */ - counts[EXTRACT256(array,7)] += 1; /* 120 */ -#ifdef CHECK_FOR_OVERFLOW - _counts_after = _mm256_i32gather_epi32(counts,array,/*scale*/4); - _counts_neg = _mm256_cmpgt_epi32(_counts_after,maxcount256); - if (_mm256_testz_si256(_counts_neg,_counts_neg) == 0) { - debug(printf("Exceeded maximum counts\n")); - if (EXTRACT256(_counts_neg,0)) { - inquery[EXTRACT256(array,0)] = INQUERY_FALSE; - } - if (EXTRACT256(_counts_neg,1)) { - inquery[EXTRACT256(array,1)] = INQUERY_FALSE; - } - if (EXTRACT256(_counts_neg,2)) { - inquery[EXTRACT256(array,2)] = INQUERY_FALSE; - } - if (EXTRACT256(_counts_neg,3)) { - inquery[EXTRACT256(array,3)] = INQUERY_FALSE; - } - if (EXTRACT256(_counts_neg,4)) { - inquery[EXTRACT256(array,4)] = INQUERY_FALSE; - } - if (EXTRACT256(_counts_neg,5)) { - inquery[EXTRACT256(array,5)] = INQUERY_FALSE; - } - if (EXTRACT256(_counts_neg,6)) { - inquery[EXTRACT256(array,6)] = INQUERY_FALSE; - } - if (EXTRACT256(_counts_neg,7)) { - inquery[EXTRACT256(array,7)] = INQUERY_FALSE; - } - } -#endif /* CHECK_FOR_OVERFLOW */ - debug(printf("8 %04X => %d\n",EXTRACT256(array,0),counts[EXTRACT256(array,0)])); - debug(printf("24 %04X => %d\n",EXTRACT256(array,1),counts[EXTRACT256(array,1)])); - debug(printf("40 %04X => %d\n",EXTRACT256(array,2),counts[EXTRACT256(array,2)])); - debug(printf("56 %04X => %d\n",EXTRACT256(array,3),counts[EXTRACT256(array,3)])); - debug(printf("8 %04X => %d\n",EXTRACT256(array,4),counts[EXTRACT256(array,4)])); - debug(printf("24 %04X => %d\n",EXTRACT256(array,5),counts[EXTRACT256(array,5)])); - debug(printf("40 %04X => %d\n",EXTRACT256(array,6),counts[EXTRACT256(array,6)])); - debug(printf("56 %04X => %d\n",EXTRACT256(array,7),counts[EXTRACT256(array,7)])); - - array = _mm256_and_si256( _mm256_srli_epi32(current,4), bigmask5); - counts[EXTRACT256(array,0)] += 1; /* 9 */ - counts[EXTRACT256(array,1)] += 1; /* 25 */ - counts[EXTRACT256(array,2)] += 1; /* 41 */ - counts[EXTRACT256(array,3)] += 1; /* 57 */ - counts[EXTRACT256(array,4)] += 1; /* 73 */ - counts[EXTRACT256(array,5)] += 1; /* 89 */ - counts[EXTRACT256(array,6)] += 1; /* 105 */ - counts[EXTRACT256(array,7)] += 1; /* 121 */ -#ifdef CHECK_FOR_OVERFLOW - _counts_after = _mm256_i32gather_epi32(counts,array,/*scale*/4); - _counts_neg = _mm256_cmpgt_epi32(_counts_after,maxcount256); - if (_mm256_testz_si256(_counts_neg,_counts_neg) == 0) { - debug(printf("Exceeded maximum counts\n")); - if (EXTRACT256(_counts_neg,0)) { - inquery[EXTRACT256(array,0)] = INQUERY_FALSE; - } - if (EXTRACT256(_counts_neg,1)) { - inquery[EXTRACT256(array,1)] = INQUERY_FALSE; - } - if (EXTRACT256(_counts_neg,2)) { - inquery[EXTRACT256(array,2)] = INQUERY_FALSE; - } - if (EXTRACT256(_counts_neg,3)) { - inquery[EXTRACT256(array,3)] = INQUERY_FALSE; - } - if (EXTRACT256(_counts_neg,4)) { - inquery[EXTRACT256(array,4)] = INQUERY_FALSE; - } - if (EXTRACT256(_counts_neg,5)) { - inquery[EXTRACT256(array,5)] = INQUERY_FALSE; - } - if (EXTRACT256(_counts_neg,6)) { - inquery[EXTRACT256(array,6)] = INQUERY_FALSE; - } - if (EXTRACT256(_counts_neg,7)) { - inquery[EXTRACT256(array,7)] = INQUERY_FALSE; - } - } -#endif /* CHECK_FOR_OVERFLOW */ - debug(printf("9 %04X => %d\n",EXTRACT256(array,0),counts[EXTRACT256(array,0)])); - debug(printf("25 %04X => %d\n",EXTRACT256(array,1),counts[EXTRACT256(array,1)])); - debug(printf("41 %04X => %d\n",EXTRACT256(array,2),counts[EXTRACT256(array,2)])); - debug(printf("57 %04X => %d\n",EXTRACT256(array,3),counts[EXTRACT256(array,3)])); - debug(printf("9 %04X => %d\n",EXTRACT256(array,4),counts[EXTRACT256(array,4)])); - debug(printf("25 %04X => %d\n",EXTRACT256(array,5),counts[EXTRACT256(array,5)])); - debug(printf("41 %04X => %d\n",EXTRACT256(array,6),counts[EXTRACT256(array,6)])); - debug(printf("57 %04X => %d\n",EXTRACT256(array,7),counts[EXTRACT256(array,7)])); - - array = _mm256_and_si256( _mm256_srli_epi32(current,2), bigmask5); - counts[EXTRACT256(array,0)] += 1; /* 10 */ - counts[EXTRACT256(array,1)] += 1; /* 26 */ - counts[EXTRACT256(array,2)] += 1; /* 42 */ - counts[EXTRACT256(array,3)] += 1; /* 58 */ - counts[EXTRACT256(array,4)] += 1; /* 74 */ - counts[EXTRACT256(array,5)] += 1; /* 90 */ - counts[EXTRACT256(array,6)] += 1; /* 106 */ - counts[EXTRACT256(array,7)] += 1; /* 122 */ -#ifdef CHECK_FOR_OVERFLOW - _counts_after = _mm256_i32gather_epi32(counts,array,/*scale*/4); - _counts_neg = _mm256_cmpgt_epi32(_counts_after,maxcount256); - if (_mm256_testz_si256(_counts_neg,_counts_neg) == 0) { - debug(printf("Exceeded maximum counts\n")); - if (EXTRACT256(_counts_neg,0)) { - inquery[EXTRACT256(array,0)] = INQUERY_FALSE; - } - if (EXTRACT256(_counts_neg,1)) { - inquery[EXTRACT256(array,1)] = INQUERY_FALSE; - } - if (EXTRACT256(_counts_neg,2)) { - inquery[EXTRACT256(array,2)] = INQUERY_FALSE; - } - if (EXTRACT256(_counts_neg,3)) { - inquery[EXTRACT256(array,3)] = INQUERY_FALSE; - } - if (EXTRACT256(_counts_neg,4)) { - inquery[EXTRACT256(array,4)] = INQUERY_FALSE; - } - if (EXTRACT256(_counts_neg,5)) { - inquery[EXTRACT256(array,5)] = INQUERY_FALSE; - } - if (EXTRACT256(_counts_neg,6)) { - inquery[EXTRACT256(array,6)] = INQUERY_FALSE; - } - if (EXTRACT256(_counts_neg,7)) { - inquery[EXTRACT256(array,7)] = INQUERY_FALSE; - } - } -#endif /* CHECK_FOR_OVERFLOW */ - debug(printf("10 %04X => %d\n",EXTRACT256(array,0),counts[EXTRACT256(array,0)])); - debug(printf("26 %04X => %d\n",EXTRACT256(array,1),counts[EXTRACT256(array,1)])); - debug(printf("42 %04X => %d\n",EXTRACT256(array,2),counts[EXTRACT256(array,2)])); - debug(printf("58 %04X => %d\n",EXTRACT256(array,3),counts[EXTRACT256(array,3)])); - debug(printf("10 %04X => %d\n",EXTRACT256(array,4),counts[EXTRACT256(array,4)])); - debug(printf("26 %04X => %d\n",EXTRACT256(array,5),counts[EXTRACT256(array,5)])); - debug(printf("42 %04X => %d\n",EXTRACT256(array,6),counts[EXTRACT256(array,6)])); - debug(printf("58 %04X => %d\n",EXTRACT256(array,7),counts[EXTRACT256(array,7)])); - - array = _mm256_and_si256( current, bigmask5); - counts[EXTRACT256(array,0)] += 1; /* 11 */ - counts[EXTRACT256(array,1)] += 1; /* 27 */ - counts[EXTRACT256(array,2)] += 1; /* 43 */ - counts[EXTRACT256(array,3)] += 1; /* 59 */ - counts[EXTRACT256(array,4)] += 1; /* 75 */ - counts[EXTRACT256(array,5)] += 1; /* 91 */ - counts[EXTRACT256(array,6)] += 1; /* 107 */ - counts[EXTRACT256(array,7)] += 1; /* 123 */ -#ifdef CHECK_FOR_OVERFLOW - _counts_after = _mm256_i32gather_epi32(counts,array,/*scale*/4); - _counts_neg = _mm256_cmpgt_epi32(_counts_after,maxcount256); - if (_mm256_testz_si256(_counts_neg,_counts_neg) == 0) { - debug(printf("Exceeded maximum counts\n")); - if (EXTRACT256(_counts_neg,0)) { - inquery[EXTRACT256(array,0)] = INQUERY_FALSE; - } - if (EXTRACT256(_counts_neg,1)) { - inquery[EXTRACT256(array,1)] = INQUERY_FALSE; - } - if (EXTRACT256(_counts_neg,2)) { - inquery[EXTRACT256(array,2)] = INQUERY_FALSE; - } - if (EXTRACT256(_counts_neg,3)) { - inquery[EXTRACT256(array,3)] = INQUERY_FALSE; - } - if (EXTRACT256(_counts_neg,4)) { - inquery[EXTRACT256(array,4)] = INQUERY_FALSE; - } - if (EXTRACT256(_counts_neg,5)) { - inquery[EXTRACT256(array,5)] = INQUERY_FALSE; - } - if (EXTRACT256(_counts_neg,6)) { - inquery[EXTRACT256(array,6)] = INQUERY_FALSE; - } - if (EXTRACT256(_counts_neg,7)) { - inquery[EXTRACT256(array,7)] = INQUERY_FALSE; - } - } -#endif /* CHECK_FOR_OVERFLOW */ - debug(printf("11 %04X => %d\n",EXTRACT256(array,0),counts[EXTRACT256(array,0)])); - debug(printf("27 %04X => %d\n",EXTRACT256(array,1),counts[EXTRACT256(array,1)])); - debug(printf("43 %04X => %d\n",EXTRACT256(array,2),counts[EXTRACT256(array,2)])); - debug(printf("59 %04X => %d\n",EXTRACT256(array,3),counts[EXTRACT256(array,3)])); - debug(printf("11 %04X => %d\n",EXTRACT256(array,4),counts[EXTRACT256(array,4)])); - debug(printf("27 %04X => %d\n",EXTRACT256(array,5),counts[EXTRACT256(array,5)])); - debug(printf("43 %04X => %d\n",EXTRACT256(array,6),counts[EXTRACT256(array,6)])); - debug(printf("59 %04X => %d\n",EXTRACT256(array,7),counts[EXTRACT256(array,7)])); - - oligo = _mm256_or_si256( _mm256_srli_epi32(next,24), _mm256_slli_epi32(current,8)); - array = _mm256_and_si256( _mm256_srli_epi32(oligo,6), bigmask5); - counts[EXTRACT256(array,0)] += 1; /* 12 */ - counts[EXTRACT256(array,1)] += 1; /* 28 */ - counts[EXTRACT256(array,2)] += 1; /* 44 */ - counts[EXTRACT256(array,3)] += 1; /* 60 */ - counts[EXTRACT256(array,4)] += 1; /* 76 */ - counts[EXTRACT256(array,5)] += 1; /* 92 */ - counts[EXTRACT256(array,6)] += 1; /* 108 */ - counts[EXTRACT256(array,7)] += 1; /* 124 */ -#ifdef CHECK_FOR_OVERFLOW - _counts_after = _mm256_i32gather_epi32(counts,array,/*scale*/4); - _counts_neg = _mm256_cmpgt_epi32(_counts_after,maxcount256); - if (_mm256_testz_si256(_counts_neg,_counts_neg) == 0) { - debug(printf("Exceeded maximum counts\n")); - if (EXTRACT256(_counts_neg,0)) { - inquery[EXTRACT256(array,0)] = INQUERY_FALSE; - } - if (EXTRACT256(_counts_neg,1)) { - inquery[EXTRACT256(array,1)] = INQUERY_FALSE; - } - if (EXTRACT256(_counts_neg,2)) { - inquery[EXTRACT256(array,2)] = INQUERY_FALSE; - } - if (EXTRACT256(_counts_neg,3)) { - inquery[EXTRACT256(array,3)] = INQUERY_FALSE; - } - if (EXTRACT256(_counts_neg,4)) { - inquery[EXTRACT256(array,4)] = INQUERY_FALSE; - } - if (EXTRACT256(_counts_neg,5)) { - inquery[EXTRACT256(array,5)] = INQUERY_FALSE; - } - if (EXTRACT256(_counts_neg,6)) { - inquery[EXTRACT256(array,6)] = INQUERY_FALSE; - } - if (EXTRACT256(_counts_neg,7)) { - inquery[EXTRACT256(array,7)] = INQUERY_FALSE; - } - } -#endif /* CHECK_FOR_OVERFLOW */ - debug(printf("12 %04X => %d\n",EXTRACT256(array,0),counts[EXTRACT256(array,0)])); - debug(printf("28 %04X => %d\n",EXTRACT256(array,1),counts[EXTRACT256(array,1)])); - debug(printf("44 %04X => %d\n",EXTRACT256(array,2),counts[EXTRACT256(array,2)])); - debug(printf("60 %04X => %d\n",EXTRACT256(array,3),counts[EXTRACT256(array,3)])); - debug(printf("12 %04X => %d\n",EXTRACT256(array,4),counts[EXTRACT256(array,4)])); - debug(printf("28 %04X => %d\n",EXTRACT256(array,5),counts[EXTRACT256(array,5)])); - debug(printf("44 %04X => %d\n",EXTRACT256(array,6),counts[EXTRACT256(array,6)])); - debug(printf("60 %04X => %d\n",EXTRACT256(array,7),counts[EXTRACT256(array,7)])); - - array = _mm256_and_si256( _mm256_srli_epi32(oligo,4), bigmask5); - counts[EXTRACT256(array,0)] += 1; /* 13 */ - counts[EXTRACT256(array,1)] += 1; /* 29 */ - counts[EXTRACT256(array,2)] += 1; /* 45 */ - counts[EXTRACT256(array,3)] += 1; /* 61 */ - counts[EXTRACT256(array,4)] += 1; /* 77 */ - counts[EXTRACT256(array,5)] += 1; /* 93 */ - counts[EXTRACT256(array,6)] += 1; /* 109 */ - counts[EXTRACT256(array,7)] += 1; /* 125 */ -#ifdef CHECK_FOR_OVERFLOW - _counts_after = _mm256_i32gather_epi32(counts,array,/*scale*/4); - _counts_neg = _mm256_cmpgt_epi32(_counts_after,maxcount256); - if (_mm256_testz_si256(_counts_neg,_counts_neg) == 0) { - debug(printf("Exceeded maximum counts\n")); - if (EXTRACT256(_counts_neg,0)) { - inquery[EXTRACT256(array,0)] = INQUERY_FALSE; - } - if (EXTRACT256(_counts_neg,1)) { - inquery[EXTRACT256(array,1)] = INQUERY_FALSE; - } - if (EXTRACT256(_counts_neg,2)) { - inquery[EXTRACT256(array,2)] = INQUERY_FALSE; - } - if (EXTRACT256(_counts_neg,3)) { - inquery[EXTRACT256(array,3)] = INQUERY_FALSE; - } - if (EXTRACT256(_counts_neg,4)) { - inquery[EXTRACT256(array,4)] = INQUERY_FALSE; - } - if (EXTRACT256(_counts_neg,5)) { - inquery[EXTRACT256(array,5)] = INQUERY_FALSE; - } - if (EXTRACT256(_counts_neg,6)) { - inquery[EXTRACT256(array,6)] = INQUERY_FALSE; - } - if (EXTRACT256(_counts_neg,7)) { - inquery[EXTRACT256(array,7)] = INQUERY_FALSE; - } - } -#endif /* CHECK_FOR_OVERFLOW */ - debug(printf("13 %04X => %d\n",EXTRACT256(array,0),counts[EXTRACT256(array,0)])); - debug(printf("29 %04X => %d\n",EXTRACT256(array,1),counts[EXTRACT256(array,1)])); - debug(printf("45 %04X => %d\n",EXTRACT256(array,2),counts[EXTRACT256(array,2)])); - debug(printf("61 %04X => %d\n",EXTRACT256(array,3),counts[EXTRACT256(array,3)])); - debug(printf("13 %04X => %d\n",EXTRACT256(array,4),counts[EXTRACT256(array,4)])); - debug(printf("29 %04X => %d\n",EXTRACT256(array,5),counts[EXTRACT256(array,5)])); - debug(printf("45 %04X => %d\n",EXTRACT256(array,6),counts[EXTRACT256(array,6)])); - debug(printf("61 %04X => %d\n",EXTRACT256(array,7),counts[EXTRACT256(array,7)])); - - array = _mm256_and_si256( _mm256_srli_epi32(oligo,2), bigmask5); - counts[EXTRACT256(array,0)] += 1; /* 14 */ - counts[EXTRACT256(array,1)] += 1; /* 30 */ - counts[EXTRACT256(array,2)] += 1; /* 46 */ - counts[EXTRACT256(array,3)] += 1; /* 62 */ - counts[EXTRACT256(array,4)] += 1; /* 78 */ - counts[EXTRACT256(array,5)] += 1; /* 94 */ - counts[EXTRACT256(array,6)] += 1; /* 110 */ - counts[EXTRACT256(array,7)] += 1; /* 126 */ -#ifdef CHECK_FOR_OVERFLOW - _counts_after = _mm256_i32gather_epi32(counts,array,/*scale*/4); - _counts_neg = _mm256_cmpgt_epi32(_counts_after,maxcount256); - if (_mm256_testz_si256(_counts_neg,_counts_neg) == 0) { - debug(printf("Exceeded maximum counts\n")); - if (EXTRACT256(_counts_neg,0)) { - inquery[EXTRACT256(array,0)] = INQUERY_FALSE; - } - if (EXTRACT256(_counts_neg,1)) { - inquery[EXTRACT256(array,1)] = INQUERY_FALSE; - } - if (EXTRACT256(_counts_neg,2)) { - inquery[EXTRACT256(array,2)] = INQUERY_FALSE; - } - if (EXTRACT256(_counts_neg,3)) { - inquery[EXTRACT256(array,3)] = INQUERY_FALSE; - } - if (EXTRACT256(_counts_neg,4)) { - inquery[EXTRACT256(array,4)] = INQUERY_FALSE; - } - if (EXTRACT256(_counts_neg,5)) { - inquery[EXTRACT256(array,5)] = INQUERY_FALSE; - } - if (EXTRACT256(_counts_neg,6)) { - inquery[EXTRACT256(array,6)] = INQUERY_FALSE; - } - if (EXTRACT256(_counts_neg,7)) { - inquery[EXTRACT256(array,7)] = INQUERY_FALSE; - } - } -#endif /* CHECK_FOR_OVERFLOW */ - debug(printf("14 %04X => %d\n",EXTRACT256(array,0),counts[EXTRACT256(array,0)])); - debug(printf("30 %04X => %d\n",EXTRACT256(array,1),counts[EXTRACT256(array,1)])); - debug(printf("46 %04X => %d\n",EXTRACT256(array,2),counts[EXTRACT256(array,2)])); - debug(printf("62 %04X => %d\n",EXTRACT256(array,3),counts[EXTRACT256(array,3)])); - debug(printf("14 %04X => %d\n",EXTRACT256(array,4),counts[EXTRACT256(array,4)])); - debug(printf("30 %04X => %d\n",EXTRACT256(array,5),counts[EXTRACT256(array,5)])); - debug(printf("46 %04X => %d\n",EXTRACT256(array,6),counts[EXTRACT256(array,6)])); - debug(printf("62 %04X => %d\n",EXTRACT256(array,7),counts[EXTRACT256(array,7)])); - - array = _mm256_and_si256( oligo, bigmask5); - counts[EXTRACT256(array,0)] += 1; /* 15 */ - counts[EXTRACT256(array,1)] += 1; /* 31 */ - counts[EXTRACT256(array,2)] += 1; /* 47 */ - counts[EXTRACT256(array,3)] += 1; /* 63 */ - counts[EXTRACT256(array,4)] += 1; /* 79 */ - counts[EXTRACT256(array,5)] += 1; /* 95 */ - counts[EXTRACT256(array,6)] += 1; /* 111 */ - counts[EXTRACT256(array,7)] += 1; /* 127 */ -#ifdef CHECK_FOR_OVERFLOW - _counts_after = _mm256_i32gather_epi32(counts,array,/*scale*/4); - _counts_neg = _mm256_cmpgt_epi32(_counts_after,maxcount256); - if (_mm256_testz_si256(_counts_neg,_counts_neg) == 0) { - debug(printf("Exceeded maximum counts\n")); - if (EXTRACT256(_counts_neg,0)) { - inquery[EXTRACT256(array,0)] = INQUERY_FALSE; - } - if (EXTRACT256(_counts_neg,1)) { - inquery[EXTRACT256(array,1)] = INQUERY_FALSE; - } - if (EXTRACT256(_counts_neg,2)) { - inquery[EXTRACT256(array,2)] = INQUERY_FALSE; - } - if (EXTRACT256(_counts_neg,3)) { - inquery[EXTRACT256(array,3)] = INQUERY_FALSE; - } - if (EXTRACT256(_counts_neg,4)) { - inquery[EXTRACT256(array,4)] = INQUERY_FALSE; - } - if (EXTRACT256(_counts_neg,5)) { - inquery[EXTRACT256(array,5)] = INQUERY_FALSE; - } - if (EXTRACT256(_counts_neg,6)) { - inquery[EXTRACT256(array,6)] = INQUERY_FALSE; - } - if (EXTRACT256(_counts_neg,7)) { - inquery[EXTRACT256(array,7)] = INQUERY_FALSE; - } - } -#endif /* CHECK_FOR_OVERFLOW */ - debug(printf("15 %04X => %d\n",EXTRACT256(array,0),counts[EXTRACT256(array,0)])); - debug(printf("31 %04X => %d\n",EXTRACT256(array,1),counts[EXTRACT256(array,1)])); - debug(printf("47 %04X => %d\n",EXTRACT256(array,2),counts[EXTRACT256(array,2)])); - debug(printf("63 %04X => %d\n",EXTRACT256(array,3),counts[EXTRACT256(array,3)])); - debug(printf("15 %04X => %d\n",EXTRACT256(array,4),counts[EXTRACT256(array,4)])); - debug(printf("31 %04X => %d\n",EXTRACT256(array,5),counts[EXTRACT256(array,5)])); - debug(printf("47 %04X => %d\n",EXTRACT256(array,6),counts[EXTRACT256(array,6)])); - debug(printf("63 %04X => %d\n",EXTRACT256(array,7),counts[EXTRACT256(array,7)])); - - return; -} -#endif - - -#if !defined(HAVE_AVX2) - -static int -store_5mers_fwd (Chrpos_T chrpos, Chrpos_T *table, UINT4 *pointers, UINT4 *positions, Count_T *counts, - Genomecomp_T high_rev, Genomecomp_T low_rev, Genomecomp_T nexthigh_rev) { - Genomecomp_T masked, oligo; -#ifdef INDIVIDUAL_SHIFTS -#elif defined(SIMD_MASK_THEN_STORE) - UINT4 _masked[4] __attribute__ ((aligned (16))); - __m128i _oligo; -#else - __m128i _oligo, _masked; -#endif - - - oligo = nexthigh_rev >> 24; /* For 31..28 */ - oligo |= low_rev << 8; - -#ifdef INDIVIDUAL_SHIFTS - masked = oligo & MASK5; /* 31 */ - if (counts[masked]) { - assert(pointers[masked] > positions[masked]); - debug(printf("Storing masked %u at %u\n",masked,chrpos)); - table[--pointers[masked]] = chrpos; - } - - masked = (oligo >> 2) & MASK5; /* 30 */ - if (counts[masked]) { - assert(pointers[masked] > positions[masked]); - debug(printf("Storing masked %u at %u\n",masked,chrpos - 1)); - table[--pointers[masked]] = chrpos - 1; - } - - masked = (oligo >> 4) & MASK5; /* 29 */ - if (counts[masked]) { - assert(pointers[masked] > positions[masked]); - debug(printf("Storing masked %u at %u\n",masked,chrpos - 2)); - table[--pointers[masked]] = chrpos - 2; - } - - masked = (oligo >> 6) & MASK5; /* 28 */ - if (counts[masked]) { - assert(pointers[masked] > positions[masked]); - debug(printf("Storing masked %u at %u\n",masked,chrpos - 3)); - table[--pointers[masked]] = chrpos - 3; - } - -#else - _oligo = _mm_setr_epi32(oligo, oligo >> 2, oligo >> 4, oligo >> 6); -#ifdef SIMD_MASK_THEN_STORE - _mm_store_si128((__m128i *) _masked,_mm_and_si128(_oligo, mask5)); -#else - _masked = _mm_and_si128(_oligo, mask5); -#endif - - masked = EXTRACT(_masked,0); - if (counts[masked]) { - assert(pointers[masked] > positions[masked]); - debug(printf("Storing masked %u at %u\n",masked,chrpos - 4)); - table[--pointers[masked]] = chrpos; - } - - masked = EXTRACT(_masked,1); - if (counts[masked]) { - assert(pointers[masked] > positions[masked]); - debug(printf("Storing masked %u at %u\n",masked,chrpos - 1)); - table[--pointers[masked]] = chrpos - 1; - } - - masked = EXTRACT(_masked,2); - if (counts[masked]) { - assert(pointers[masked] > positions[masked]); - debug(printf("Storing masked %u at %u\n",masked,chrpos - 2)); - table[--pointers[masked]] = chrpos - 2; - } - - masked = EXTRACT(_masked,3); - if (counts[masked]) { - assert(pointers[masked] > positions[masked]); - debug(printf("Storing masked %u at %u\n",masked,chrpos - 3)); - table[--pointers[masked]] = chrpos - 3; - } -#endif - - -#ifdef INDIVIDUAL_SHIFTS - masked = low_rev & MASK5; /* 27 */ - if (counts[masked]) { - assert(pointers[masked] > positions[masked]); - debug(printf("Storing masked %u at %u\n",masked,chrpos - 4)); - table[--pointers[masked]] = chrpos - 4; - } - - masked = (low_rev >> 2) & MASK5; /* 26 */ - if (counts[masked]) { - assert(pointers[masked] > positions[masked]); - debug(printf("Storing masked %u at %u\n",masked,chrpos - 5)); - table[--pointers[masked]] = chrpos - 5; - } - - masked = (low_rev >> 4) & MASK5; /* 25 */ - if (counts[masked]) { - assert(pointers[masked] > positions[masked]); - debug(printf("Storing masked %u at %u\n",masked,chrpos - 6)); - table[--pointers[masked]] = chrpos - 6; - } - - masked = (low_rev >> 6) & MASK5; /* 24 */ - if (counts[masked]) { - assert(pointers[masked] > positions[masked]); - debug(printf("Storing masked %u at %u\n",masked,chrpos - 7)); - table[--pointers[masked]] = chrpos - 7; - } - - masked = (low_rev >> 8) & MASK5; /* 23 */ - if (counts[masked]) { - assert(pointers[masked] > positions[masked]); - debug(printf("Storing masked %u at %u\n",masked,chrpos - 8)); - table[--pointers[masked]] = chrpos - 8; - } - - masked = (low_rev >> 10) & MASK5; /* 22 */ - if (counts[masked]) { - assert(pointers[masked] > positions[masked]); - debug(printf("Storing masked %u at %u\n",masked,chrpos - 9)); - table[--pointers[masked]] = chrpos - 9; - } - - masked = (low_rev >> 12) & MASK5; /* 21 */ - if (counts[masked]) { - assert(pointers[masked] > positions[masked]); - debug(printf("Storing masked %u at %u\n",masked,chrpos - 10)); - table[--pointers[masked]] = chrpos - 10; - } - - masked = (low_rev >> 14) & MASK5; /* 20 */ - if (counts[masked]) { - assert(pointers[masked] > positions[masked]); - debug(printf("Storing masked %u at %u\n",masked,chrpos - 11)); - table[--pointers[masked]] = chrpos - 11; - } - - masked = (low_rev >> 16) & MASK5; /* 19 */ - if (counts[masked]) { - assert(pointers[masked] > positions[masked]); - debug(printf("Storing masked %u at %u\n",masked,chrpos - 12)); - table[--pointers[masked]] = chrpos - 12; - } - - masked = (low_rev >> 18) & MASK5; /* 18 */ - if (counts[masked]) { - assert(pointers[masked] > positions[masked]); - debug(printf("Storing masked %u at %u\n",masked,chrpos - 13)); - table[--pointers[masked]] = chrpos - 13; - } - - masked = (low_rev >> 20) & MASK5; /* 17 */ - if (counts[masked]) { - assert(pointers[masked] > positions[masked]); - debug(printf("Storing masked %u at %u\n",masked,chrpos - 14)); - table[--pointers[masked]] = chrpos - 14; - } - - masked = low_rev >> 22; /* 16, No mask necessary */ - if (counts[masked]) { - assert(pointers[masked] > positions[masked]); - debug(printf("Storing masked %u at %u\n",masked,chrpos - 15)); - table[--pointers[masked]] = chrpos - 15; - } - -#else - _oligo = _mm_setr_epi32(low_rev, low_rev >> 2, low_rev >> 4, low_rev >> 6); -#ifdef SIMD_MASK_THEN_STORE - _mm_store_si128((__m128i *) _masked,_mm_and_si128(_oligo, mask5)); -#else - _masked = _mm_and_si128(_oligo, mask5); -#endif - - masked = EXTRACT(_masked,0); - if (counts[masked]) { - assert(pointers[masked] > positions[masked]); - debug(printf("Storing masked %u at %u\n",masked,chrpos - 4)); - table[--pointers[masked]] = chrpos - 4; - } - - masked = EXTRACT(_masked,1); - if (counts[masked]) { - assert(pointers[masked] > positions[masked]); - debug(printf("Storing masked %u at %u\n",masked,chrpos - 5)); - table[--pointers[masked]] = chrpos - 5; - } - - masked = EXTRACT(_masked,2); - if (counts[masked]) { - assert(pointers[masked] > positions[masked]); - debug(printf("Storing masked %u at %u\n",masked,chrpos - 6)); - table[--pointers[masked]] = chrpos - 6; - } - - masked = EXTRACT(_masked,3); - if (counts[masked]) { - assert(pointers[masked] > positions[masked]); - debug(printf("Storing masked %u at %u\n",masked,chrpos - 7)); - table[--pointers[masked]] = chrpos - 7; - } - - - _oligo = _mm_srli_epi32(_oligo, 8); -#ifdef SIMD_MASK_THEN_STORE - _mm_store_si128((__m128i *) _masked,_mm_and_si128(_oligo, mask5)); -#else - _masked = _mm_and_si128(_oligo, mask5); -#endif - - masked = EXTRACT(_masked,0); - if (counts[masked]) { - assert(pointers[masked] > positions[masked]); - debug(printf("Storing masked %u at %u\n",masked,chrpos - 8)); - table[--pointers[masked]] = chrpos - 8; - } - - masked = EXTRACT(_masked,1); - if (counts[masked]) { - assert(pointers[masked] > positions[masked]); - debug(printf("Storing masked %u at %u\n",masked,chrpos - 9)); - table[--pointers[masked]] = chrpos - 9; - } - - masked = EXTRACT(_masked,2); - if (counts[masked]) { - assert(pointers[masked] > positions[masked]); - debug(printf("Storing masked %u at %u\n",masked,chrpos - 10)); - table[--pointers[masked]] = chrpos - 10; - } - - masked = EXTRACT(_masked,3); - if (counts[masked]) { - assert(pointers[masked] > positions[masked]); - debug(printf("Storing masked %u at %u\n",masked,chrpos - 11)); - table[--pointers[masked]] = chrpos - 11; - } - - - _oligo = _mm_srli_epi32(_oligo, 8); -#ifdef SIMD_MASK_THEN_STORE - _mm_store_si128((__m128i *) _masked,_mm_and_si128(_oligo, mask5)); -#else - _masked = _mm_and_si128(_oligo, mask5); -#endif - - masked = EXTRACT(_masked,0); - if (counts[masked]) { - assert(pointers[masked] > positions[masked]); - debug(printf("Storing masked %u at %u\n",masked,chrpos - 12)); - table[--pointers[masked]] = chrpos - 12; - } - - masked = EXTRACT(_masked,1); - if (counts[masked]) { - assert(pointers[masked] > positions[masked]); - debug(printf("Storing masked %u at %u\n",masked,chrpos - 13)); - table[--pointers[masked]] = chrpos - 13; - } - - masked = EXTRACT(_masked,2); - if (counts[masked]) { - assert(pointers[masked] > positions[masked]); - debug(printf("Storing masked %u at %u\n",masked,chrpos - 14)); - table[--pointers[masked]] = chrpos - 14; - } - - masked = EXTRACT(_masked,3); - if (counts[masked]) { - assert(pointers[masked] > positions[masked]); - debug(printf("Storing masked %u at %u\n",masked,chrpos - 15)); - table[--pointers[masked]] = chrpos - 15; - } -#endif - - - oligo = low_rev >> 24; /* For 15..12 */ - oligo |= high_rev << 8; - -#ifdef INDIVIDUAL_SHIFTS - masked = oligo & MASK5; /* 15 */ - if (counts[masked]) { - assert(pointers[masked] > positions[masked]); - debug(printf("Storing masked %u at %u\n",masked,chrpos - 16)); - table[--pointers[masked]] = chrpos - 16; - } - - masked = (oligo >> 2) & MASK5; /* 14 */ - if (counts[masked]) { - assert(pointers[masked] > positions[masked]); - debug(printf("Storing masked %u at %u\n",masked,chrpos - 17)); - table[--pointers[masked]] = chrpos - 17; - } - - masked = (oligo >> 4) & MASK5; /* 13 */ - if (counts[masked]) { - assert(pointers[masked] > positions[masked]); - debug(printf("Storing masked %u at %u\n",masked,chrpos - 18)); - table[--pointers[masked]] = chrpos - 18; - } - - masked = (oligo >> 6) & MASK5; /* 12 */ - if (counts[masked]) { - assert(pointers[masked] > positions[masked]); - debug(printf("Storing masked %u at %u\n",masked,chrpos - 19)); - table[--pointers[masked]] = chrpos - 19; - } - -#else - _oligo = _mm_setr_epi32(oligo, oligo >> 2, oligo >> 4, oligo >> 6); -#ifdef SIMD_MASK_THEN_STORE - _mm_store_si128((__m128i *) _masked,_mm_and_si128(_oligo, mask5)); -#else - _masked = _mm_and_si128(_oligo, mask5); -#endif - - masked = EXTRACT(_masked,0); - if (counts[masked]) { - assert(pointers[masked] > positions[masked]); - debug(printf("Storing masked %u at %u\n",masked,chrpos - 16)); - table[--pointers[masked]] = chrpos - 16; - } - - masked = EXTRACT(_masked,1); - if (counts[masked]) { - assert(pointers[masked] > positions[masked]); - debug(printf("Storing masked %u at %u\n",masked,chrpos - 17)); - table[--pointers[masked]] = chrpos - 17; - } - - masked = EXTRACT(_masked,2); - if (counts[masked]) { - assert(pointers[masked] > positions[masked]); - debug(printf("Storing masked %u at %u\n",masked,chrpos - 18)); - table[--pointers[masked]] = chrpos - 18; - } - - masked = EXTRACT(_masked,3); - if (counts[masked]) { - assert(pointers[masked] > positions[masked]); - debug(printf("Storing masked %u at %u\n",masked,chrpos - 19)); - table[--pointers[masked]] = chrpos - 19; - } -#endif - - -#ifdef INDIVIDUAL_SHIFTS - masked = high_rev & MASK5; /* 11 */ - if (counts[masked]) { - assert(pointers[masked] > positions[masked]); - debug(printf("Storing masked %u at %u\n",masked,chrpos - 20)); - table[--pointers[masked]] = chrpos - 20; - } - - masked = (high_rev >> 2) & MASK5; /* 10 */ - if (counts[masked]) { - assert(pointers[masked] > positions[masked]); - debug(printf("Storing masked %u at %u\n",masked,chrpos - 21)); - table[--pointers[masked]] = chrpos - 21; - } - - masked = (high_rev >> 4) & MASK5; /* 9 */ - if (counts[masked]) { - assert(pointers[masked] > positions[masked]); - debug(printf("Storing masked %u at %u\n",masked,chrpos - 22)); - table[--pointers[masked]] = chrpos - 22; - } - - masked = (high_rev >> 6) & MASK5; /* 8 */ - if (counts[masked]) { - assert(pointers[masked] > positions[masked]); - debug(printf("Storing masked %u at %u\n",masked,chrpos - 23)); - table[--pointers[masked]] = chrpos - 23; - } - - masked = (high_rev >> 8) & MASK5; /* 7 */ - if (counts[masked]) { - assert(pointers[masked] > positions[masked]); - debug(printf("Storing masked %u at %u\n",masked,chrpos - 24)); - table[--pointers[masked]] = chrpos - 24; - } - - masked = (high_rev >> 10) & MASK5; /* 6 */ - if (counts[masked]) { - assert(pointers[masked] > positions[masked]); - debug(printf("Storing masked %u at %u\n",masked,chrpos - 25)); - table[--pointers[masked]] = chrpos - 25; - } - - masked = (high_rev >> 12) & MASK5; /* 5 */ - if (counts[masked]) { - assert(pointers[masked] > positions[masked]); - debug(printf("Storing masked %u at %u\n",masked,chrpos - 26)); - table[--pointers[masked]] = chrpos - 26; - } - - masked = (high_rev >> 14) & MASK5; /* 4 */ - if (counts[masked]) { - assert(pointers[masked] > positions[masked]); - debug(printf("Storing masked %u at %u\n",masked,chrpos - 27)); - table[--pointers[masked]] = chrpos - 27; - } - - masked = (high_rev >> 16) & MASK5; /* 3 */ - if (counts[masked]) { - assert(pointers[masked] > positions[masked]); - debug(printf("Storing masked %u at %u\n",masked,chrpos - 28)); - table[--pointers[masked]] = chrpos - 28; - } - - masked = (high_rev >> 18) & MASK5; /* 2 */ - if (counts[masked]) { - assert(pointers[masked] > positions[masked]); - debug(printf("Storing masked %u at %u\n",masked,chrpos - 29)); - table[--pointers[masked]] = chrpos - 29; - } - - masked = (high_rev >> 20) & MASK5; /* 1 */ - if (counts[masked]) { - assert(pointers[masked] > positions[masked]); - debug(printf("Storing masked %u at %u\n",masked,chrpos - 30)); - table[--pointers[masked]] = chrpos - 30; - } - - masked = high_rev >> 22; /* 0, No mask necessary */ - if (counts[masked]) { - assert(pointers[masked] > positions[masked]); - debug(printf("Storing masked %u at %u\n",masked,chrpos - 31)); - table[--pointers[masked]] = chrpos - 31; - } - -#else - _oligo = _mm_setr_epi32(high_rev, high_rev >> 2, high_rev >> 4, high_rev >> 6); -#ifdef SIMD_MASK_THEN_STORE - _mm_store_si128((__m128i *) _masked,_mm_and_si128(_oligo, mask5)); -#else - _masked = _mm_and_si128(_oligo, mask5); -#endif - - masked = EXTRACT(_masked,0); - if (counts[masked]) { - assert(pointers[masked] > positions[masked]); - debug(printf("Storing masked %u at %u\n",masked,chrpos - 20)); - table[--pointers[masked]] = chrpos - 20; - } - - masked = EXTRACT(_masked,1); - if (counts[masked]) { - assert(pointers[masked] > positions[masked]); - debug(printf("Storing masked %u at %u\n",masked,chrpos - 21)); - table[--pointers[masked]] = chrpos - 21; - } - - masked = EXTRACT(_masked,2); - if (counts[masked]) { - assert(pointers[masked] > positions[masked]); - debug(printf("Storing masked %u at %u\n",masked,chrpos - 22)); - table[--pointers[masked]] = chrpos - 22; - } - - masked = EXTRACT(_masked,3); - if (counts[masked]) { - assert(pointers[masked] > positions[masked]); - debug(printf("Storing masked %u at %u\n",masked,chrpos - 23)); - table[--pointers[masked]] = chrpos - 23; - } - - - _oligo = _mm_srli_epi32(_oligo, 8); -#ifdef SIMD_MASK_THEN_STORE - _mm_store_si128((__m128i *) _masked,_mm_and_si128(_oligo, mask5)); -#else - _masked = _mm_and_si128(_oligo, mask5); -#endif - - masked = EXTRACT(_masked,0); - if (counts[masked]) { - assert(pointers[masked] > positions[masked]); - debug(printf("Storing masked %u at %u\n",masked,chrpos - 24)); - table[--pointers[masked]] = chrpos - 24; - } - - masked = EXTRACT(_masked,1); - if (counts[masked]) { - assert(pointers[masked] > positions[masked]); - debug(printf("Storing masked %u at %u\n",masked,chrpos - 25)); - table[--pointers[masked]] = chrpos - 25; - } - - masked = EXTRACT(_masked,2); - if (counts[masked]) { - assert(pointers[masked] > positions[masked]); - debug(printf("Storing masked %u at %u\n",masked,chrpos - 26)); - table[--pointers[masked]] = chrpos - 26; - } - - masked = EXTRACT(_masked,3); - if (counts[masked]) { - assert(pointers[masked] > positions[masked]); - debug(printf("Storing masked %u at %u\n",masked,chrpos - 27)); - table[--pointers[masked]] = chrpos - 27; - } - - - _oligo = _mm_srli_epi32(_oligo, 8); -#ifdef SIMD_MASK_THEN_STORE - _mm_store_si128((__m128i *) _masked,_mm_and_si128(_oligo, mask5)); -#else - _masked = _mm_and_si128(_oligo, mask5); -#endif - - masked = EXTRACT(_masked,0); - if (counts[masked]) { - assert(pointers[masked] > positions[masked]); - debug(printf("Storing masked %u at %u\n",masked,chrpos - 28)); - table[--pointers[masked]] = chrpos - 28; - } - - masked = EXTRACT(_masked,1); - if (counts[masked]) { - assert(pointers[masked] > positions[masked]); - debug(printf("Storing masked %u at %u\n",masked,chrpos - 29)); - table[--pointers[masked]] = chrpos - 29; - } - - masked = EXTRACT(_masked,2); - if (counts[masked]) { - assert(pointers[masked] > positions[masked]); - debug(printf("Storing masked %u at %u\n",masked,chrpos - 30)); - table[--pointers[masked]] = chrpos - 30; - } - - masked = EXTRACT(_masked,3); - if (counts[masked]) { - assert(pointers[masked] > positions[masked]); - debug(printf("Storing masked %u at %u\n",masked,chrpos - 31)); - table[--pointers[masked]] = chrpos - 31; - } -#endif - - return chrpos - 32; -} - -#else /* HAVE_AVX2 */ - -static int -store_5mers_fwd (Chrpos_T chrpos, Chrpos_T *table, UINT4 *pointers, UINT4 *positions, Count_T *counts, - Genomecomp_T high_rev, Genomecomp_T low_rev, Genomecomp_T nexthigh_rev) { - Genomecomp_T masked, oligo; - __m256i _oligo, _masked, _counts; - - - oligo = nexthigh_rev >> 24; /* For 31..28 */ - oligo |= low_rev << 8; - - _oligo = _mm256_srlv_epi32(_mm256_set1_epi32(oligo),shift0to14); - _masked = _mm256_and_si256(_oligo, bigmask5); - _counts = _mm256_i32gather_epi32(counts,_masked,/*scale*/4); - - if (EXTRACT256(_counts,0)) { - masked = EXTRACT256(_masked,0); - assert(pointers[masked] > positions[masked]); - debug(printf("Storing masked %u at %u\n",masked,chrpos)); - table[--pointers[masked]] = chrpos; - } - - if (EXTRACT256(_counts,1)) { - masked = EXTRACT256(_masked,1); - assert(pointers[masked] > positions[masked]); - debug(printf("Storing masked %u at %u\n",masked,chrpos - 1)); - table[--pointers[masked]] = chrpos - 1; - } - - if (EXTRACT256(_counts,2)) { - masked = EXTRACT256(_masked,2); - assert(pointers[masked] > positions[masked]); - debug(printf("Storing masked %u at %u\n",masked,chrpos - 2)); - table[--pointers[masked]] = chrpos - 2; - } - - if (EXTRACT256(_counts,3)) { - masked = EXTRACT256(_masked,3); - assert(pointers[masked] > positions[masked]); - debug(printf("Storing masked %u at %u\n",masked,chrpos - 3)); - table[--pointers[masked]] = chrpos - 3; - } - - - _oligo = _mm256_srlv_epi32(_mm256_set1_epi32(low_rev),shift0to14); - _masked = _mm256_and_si256(_oligo, bigmask5); - _counts = _mm256_i32gather_epi32(counts,_masked,/*scale*/4); - - if (EXTRACT256(_counts,0)) { - masked = EXTRACT256(_masked,0); - assert(pointers[masked] > positions[masked]); - debug(printf("Storing masked %u at %u\n",masked,chrpos - 4)); - table[--pointers[masked]] = chrpos - 4; - } - - if (EXTRACT256(_counts,1)) { - masked = EXTRACT256(_masked,1); - assert(pointers[masked] > positions[masked]); - debug(printf("Storing masked %u at %u\n",masked,chrpos - 5)); - table[--pointers[masked]] = chrpos - 5; - } - - if (EXTRACT256(_counts,2)) { - masked = EXTRACT256(_masked,2); - assert(pointers[masked] > positions[masked]); - debug(printf("Storing masked %u at %u\n",masked,chrpos - 6)); - table[--pointers[masked]] = chrpos - 6; - } - - if (EXTRACT256(_counts,3)) { - masked = EXTRACT256(_masked,3); - assert(pointers[masked] > positions[masked]); - debug(printf("Storing masked %u at %u\n",masked,chrpos - 7)); - table[--pointers[masked]] = chrpos - 7; - } - - if (EXTRACT256(_counts,4)) { - masked = EXTRACT256(_masked,4); - assert(pointers[masked] > positions[masked]); - debug(printf("Storing masked %u at %u\n",masked,chrpos - 8)); - table[--pointers[masked]] = chrpos - 8; - } - - if (EXTRACT256(_counts,5)) { - masked = EXTRACT256(_masked,5); - assert(pointers[masked] > positions[masked]); - debug(printf("Storing masked %u at %u\n",masked,chrpos - 9)); - table[--pointers[masked]] = chrpos - 9; - } - - if (EXTRACT256(_counts,6)) { - masked = EXTRACT256(_masked,6); - assert(pointers[masked] > positions[masked]); - debug(printf("Storing masked %u at %u\n",masked,chrpos - 10)); - table[--pointers[masked]] = chrpos - 10; - } - - if (EXTRACT256(_counts,7)) { - masked = EXTRACT256(_masked,7); - assert(pointers[masked] > positions[masked]); - debug(printf("Storing masked %u at %u\n",masked,chrpos - 11)); - table[--pointers[masked]] = chrpos - 11; - } - - - _oligo = _mm256_srli_epi32(_oligo, 16); - _masked = _mm256_and_si256(_oligo, bigmask5); - _counts = _mm256_i32gather_epi32(counts,_masked,/*scale*/4); - - if (EXTRACT256(_counts,0)) { - masked = EXTRACT256(_masked,0); - assert(pointers[masked] > positions[masked]); - debug(printf("Storing masked %u at %u\n",masked,chrpos - 12)); - table[--pointers[masked]] = chrpos - 12; - } - - if (EXTRACT256(_counts,1)) { - masked = EXTRACT256(_masked,1); - assert(pointers[masked] > positions[masked]); - debug(printf("Storing masked %u at %u\n",masked,chrpos - 13)); - table[--pointers[masked]] = chrpos - 13; - } - - if (EXTRACT256(_counts,2)) { - masked = EXTRACT256(_masked,2); - assert(pointers[masked] > positions[masked]); - debug(printf("Storing masked %u at %u\n",masked,chrpos - 14)); - table[--pointers[masked]] = chrpos - 14; - } - - if (EXTRACT256(_counts,3)) { - masked = EXTRACT256(_masked,3); - assert(pointers[masked] > positions[masked]); - debug(printf("Storing masked %u at %u\n",masked,chrpos - 15)); - table[--pointers[masked]] = chrpos - 15; - } - - - oligo = low_rev >> 24; /* For 15..12 */ - oligo |= high_rev << 8; - - _oligo = _mm256_srlv_epi32(_mm256_set1_epi32(oligo),shift0to14); - _masked = _mm256_and_si256(_oligo, bigmask5); - _counts = _mm256_i32gather_epi32(counts,_masked,/*scale*/4); - - if (EXTRACT256(_counts,0)) { - masked = EXTRACT256(_masked,0); - assert(pointers[masked] > positions[masked]); - debug(printf("Storing masked %u at %u\n",masked,chrpos - 16)); - table[--pointers[masked]] = chrpos - 16; - } - - if (EXTRACT256(_counts,1)) { - masked = EXTRACT256(_masked,1); - assert(pointers[masked] > positions[masked]); - debug(printf("Storing masked %u at %u\n",masked,chrpos - 17)); - table[--pointers[masked]] = chrpos - 17; - } - - if (EXTRACT256(_counts,2)) { - masked = EXTRACT256(_masked,2); - assert(pointers[masked] > positions[masked]); - debug(printf("Storing masked %u at %u\n",masked,chrpos - 18)); - table[--pointers[masked]] = chrpos - 18; - } - - if (EXTRACT256(_counts,3)) { - masked = EXTRACT256(_masked,3); - assert(pointers[masked] > positions[masked]); - debug(printf("Storing masked %u at %u\n",masked,chrpos - 19)); - table[--pointers[masked]] = chrpos - 19; - } - - - _oligo = _mm256_srlv_epi32(_mm256_set1_epi32(high_rev),shift0to14); - _masked = _mm256_and_si256(_oligo, bigmask5); - _counts = _mm256_i32gather_epi32(counts,_masked,/*scale*/4); - - if (EXTRACT256(_counts,0)) { - masked = EXTRACT256(_masked,0); - assert(pointers[masked] > positions[masked]); - debug(printf("Storing masked %u at %u\n",masked,chrpos - 20)); - table[--pointers[masked]] = chrpos - 20; - } - - if (EXTRACT256(_counts,1)) { - masked = EXTRACT256(_masked,1); - assert(pointers[masked] > positions[masked]); - debug(printf("Storing masked %u at %u\n",masked,chrpos - 21)); - table[--pointers[masked]] = chrpos - 21; - } - - if (EXTRACT256(_counts,2)) { - masked = EXTRACT256(_masked,2); - assert(pointers[masked] > positions[masked]); - debug(printf("Storing masked %u at %u\n",masked,chrpos - 22)); - table[--pointers[masked]] = chrpos - 22; - } - - if (EXTRACT256(_counts,3)) { - masked = EXTRACT256(_masked,3); - assert(pointers[masked] > positions[masked]); - debug(printf("Storing masked %u at %u\n",masked,chrpos - 23)); - table[--pointers[masked]] = chrpos - 23; - } - - if (EXTRACT256(_counts,4)) { - masked = EXTRACT256(_masked,4); - assert(pointers[masked] > positions[masked]); - debug(printf("Storing masked %u at %u\n",masked,chrpos - 24)); - table[--pointers[masked]] = chrpos - 24; - } - - if (EXTRACT256(_counts,5)) { - masked = EXTRACT256(_masked,5); - assert(pointers[masked] > positions[masked]); - debug(printf("Storing masked %u at %u\n",masked,chrpos - 25)); - table[--pointers[masked]] = chrpos - 25; - } - - if (EXTRACT256(_counts,6)) { - masked = EXTRACT256(_masked,6); - assert(pointers[masked] > positions[masked]); - debug(printf("Storing masked %u at %u\n",masked,chrpos - 26)); - table[--pointers[masked]] = chrpos - 26; - } - - if (EXTRACT256(_counts,7)) { - masked = EXTRACT256(_masked,7); - assert(pointers[masked] > positions[masked]); - debug(printf("Storing masked %u at %u\n",masked,chrpos - 27)); - table[--pointers[masked]] = chrpos - 27; - } - - - _oligo = _mm256_srli_epi32(_oligo, 16); - _masked = _mm256_and_si256(_oligo, bigmask5); - _counts = _mm256_i32gather_epi32(counts,_masked,/*scale*/4); - - if (EXTRACT256(_counts,0)) { - masked = EXTRACT256(_masked,0); - assert(pointers[masked] > positions[masked]); - debug(printf("Storing masked %u at %u\n",masked,chrpos - 28)); - table[--pointers[masked]] = chrpos - 28; - } - - if (EXTRACT256(_counts,1)) { - masked = EXTRACT256(_masked,1); - assert(pointers[masked] > positions[masked]); - debug(printf("Storing masked %u at %u\n",masked,chrpos - 29)); - table[--pointers[masked]] = chrpos - 29; - } - - if (EXTRACT256(_counts,2)) { - masked = EXTRACT256(_masked,2); - assert(pointers[masked] > positions[masked]); - debug(printf("Storing masked %u at %u\n",masked,chrpos - 30)); - table[--pointers[masked]] = chrpos - 30; - } - - if (EXTRACT256(_counts,3)) { - masked = EXTRACT256(_masked,3); - assert(pointers[masked] > positions[masked]); - debug(printf("Storing masked %u at %u\n",masked,chrpos - 31)); - table[--pointers[masked]] = chrpos - 31; - } - - return chrpos - 32; -} - -#endif /* HAVE_AVX2 */ - - - -#ifndef USE_SIMD_FOR_COUNTS - -static void -count_positions_fwd_std (Count_T *counts, Inquery_T *inquery, int indexsize, Univcoord_T left, Univcoord_T left_plus_length, - int genestrand) { - int startdiscard, enddiscard; - Genomecomp_T ptr, startptr, endptr, high_rev, low_rev, nexthigh_rev, - low, high, nextlow; - - debug(printf("Starting count_positions_fwd_std\n")); - - if (left_plus_length < (Univcoord_T) indexsize) { - left_plus_length = 0; - } else { - left_plus_length -= indexsize; - } - - startptr = left/32U*3; - ptr = endptr = left_plus_length/32U*3; - startdiscard = left % 32; /* (left+pos5) % 32 */ - enddiscard = left_plus_length % 32; /* (left+pos3) % 32 */ - - if (left_plus_length <= left) { - /* Skip */ - - } else if (startptr == endptr) { -#ifdef WORDS_BIGENDIAN - high = Bigendian_convert_uint(ref_blocks[ptr]); - low = Bigendian_convert_uint(ref_blocks[ptr+1]); - nextlow = Bigendian_convert_uint(ref_blocks[ptr+4]); -#else - high = ref_blocks[ptr]; - low = ref_blocks[ptr+1]; - nextlow = ref_blocks[ptr+4]; -#endif - if (mode == CMET_STRANDED) { - high = Cmet_reduce_ct(high); low = Cmet_reduce_ct(low); nextlow = Cmet_reduce_ct(nextlow); - } else if (mode == CMET_NONSTRANDED) { - if (genestrand > 0) { - high = Cmet_reduce_ct(high); low = Cmet_reduce_ct(low); nextlow = Cmet_reduce_ct(nextlow); - } else { - high = Cmet_reduce_ga(high); low = Cmet_reduce_ga(low); nextlow = Cmet_reduce_ga(nextlow); - } - } - - high_rev = reverse_nt[low >> 16]; - high_rev |= (reverse_nt[low & 0x0000FFFF] << 16); - low_rev = reverse_nt[high >> 16]; - low_rev |= (reverse_nt[high & 0x0000FFFF] << 16); - nexthigh_rev = reverse_nt[nextlow >> 16]; - nexthigh_rev |= (reverse_nt[nextlow & 0x0000FFFF] << 16); - - if (indexsize == 9) { - count_9mers_fwd_partial(counts,inquery,high_rev,low_rev,nexthigh_rev,startdiscard,enddiscard); - } else if (indexsize == 8) { - count_8mers_fwd_partial(counts,inquery,high_rev,low_rev,nexthigh_rev,startdiscard,enddiscard); - } else if (indexsize == 7) { - count_7mers_fwd_partial(counts,inquery,high_rev,low_rev,nexthigh_rev,startdiscard,enddiscard); - } else if (indexsize == 6) { - count_6mers_fwd_partial(counts,inquery,high_rev,low_rev,nexthigh_rev,startdiscard,enddiscard); - } else if (indexsize == 5) { - count_5mers_fwd_partial(counts,inquery,high_rev,low_rev,nexthigh_rev,startdiscard,enddiscard); - } else { - fprintf(stderr,"indexsize %d not supported\n",indexsize); - abort(); - } - - } else { - /* Genome_print_blocks(ref_blocks,left,left+16); */ - - /* End block */ -#ifdef WORDS_BIGENDIAN - high = Bigendian_convert_uint(ref_blocks[ptr]); - low = Bigendian_convert_uint(ref_blocks[ptr+1]); - nextlow = Bigendian_convert_uint(ref_blocks[ptr+4]); -#else - high = ref_blocks[ptr]; - low = ref_blocks[ptr+1]; - nextlow = ref_blocks[ptr+4]; -#endif - if (mode == CMET_STRANDED) { - high = Cmet_reduce_ct(high); low = Cmet_reduce_ct(low); nextlow = Cmet_reduce_ct(nextlow); - } else if (mode == CMET_NONSTRANDED) { - if (genestrand > 0) { - high = Cmet_reduce_ct(high); low = Cmet_reduce_ct(low); nextlow = Cmet_reduce_ct(nextlow); - } else { - high = Cmet_reduce_ga(high); low = Cmet_reduce_ga(low); nextlow = Cmet_reduce_ga(nextlow); - } - } - - high_rev = reverse_nt[low >> 16]; - high_rev |= (reverse_nt[low & 0x0000FFFF] << 16); - low_rev = reverse_nt[high >> 16]; - low_rev |= (reverse_nt[high & 0x0000FFFF] << 16); - nexthigh_rev = reverse_nt[nextlow >> 16]; - nexthigh_rev |= (reverse_nt[nextlow & 0x0000FFFF] << 16); - - if (indexsize == 9) { - count_9mers_fwd_partial(counts,inquery,high_rev,low_rev,nexthigh_rev,/*startdiscard*/0,enddiscard); - } else if (indexsize == 8) { - count_8mers_fwd_partial(counts,inquery,high_rev,low_rev,nexthigh_rev,/*startdiscard*/0,enddiscard); - } else if (indexsize == 7) { - count_7mers_fwd_partial(counts,inquery,high_rev,low_rev,nexthigh_rev,/*startdiscard*/0,enddiscard); - } else if (indexsize == 6) { - count_6mers_fwd_partial(counts,inquery,high_rev,low_rev,nexthigh_rev,/*startdiscard*/0,enddiscard); - } else if (indexsize == 5) { - count_5mers_fwd_partial(counts,inquery,high_rev,low_rev,nexthigh_rev,/*startdiscard*/0,enddiscard); - } else { - abort(); - } - - /* Middle blocks */ - if (indexsize == 9) { - while (ptr > startptr + 3) { - ptr -= 3; - -#ifdef WORDS_BIGENDIAN - high = Bigendian_convert_uint(ref_blocks[ptr]); - low = Bigendian_convert_uint(ref_blocks[ptr+1]); - nextlow = Bigendian_convert_uint(ref_blocks[ptr+4]); -#else - high = ref_blocks[ptr]; - low = ref_blocks[ptr+1]; - nextlow = ref_blocks[ptr+4]; -#endif - if (mode == CMET_STRANDED) { - high = Cmet_reduce_ct(high); low = Cmet_reduce_ct(low); nextlow = Cmet_reduce_ct(nextlow); - } else if (mode == CMET_NONSTRANDED) { - if (genestrand > 0) { - high = Cmet_reduce_ct(high); low = Cmet_reduce_ct(low); nextlow = Cmet_reduce_ct(nextlow); - } else { - high = Cmet_reduce_ga(high); low = Cmet_reduce_ga(low); nextlow = Cmet_reduce_ga(nextlow); - } - } - - high_rev = reverse_nt[low >> 16]; - high_rev |= (reverse_nt[low & 0x0000FFFF] << 16); - low_rev = reverse_nt[high >> 16]; - low_rev |= (reverse_nt[high & 0x0000FFFF] << 16); - nexthigh_rev = reverse_nt[nextlow >> 16]; - nexthigh_rev |= (reverse_nt[nextlow & 0x0000FFFF] << 16); - - count_9mers_fwd(counts,inquery,high_rev,low_rev,nexthigh_rev); - } - - } else if (indexsize == 8) { - while (ptr > startptr + 3) { - ptr -= 3; - -#ifdef WORDS_BIGENDIAN - high = Bigendian_convert_uint(ref_blocks[ptr]); - low = Bigendian_convert_uint(ref_blocks[ptr+1]); - nextlow = Bigendian_convert_uint(ref_blocks[ptr+4]); -#else - high = ref_blocks[ptr]; - low = ref_blocks[ptr+1]; - nextlow = ref_blocks[ptr+4]; -#endif - if (mode == CMET_STRANDED) { - high = Cmet_reduce_ct(high); low = Cmet_reduce_ct(low); nextlow = Cmet_reduce_ct(nextlow); - } else if (mode == CMET_NONSTRANDED) { - if (genestrand > 0) { - high = Cmet_reduce_ct(high); low = Cmet_reduce_ct(low); nextlow = Cmet_reduce_ct(nextlow); - } else { - high = Cmet_reduce_ga(high); low = Cmet_reduce_ga(low); nextlow = Cmet_reduce_ga(nextlow); - } - } - - high_rev = reverse_nt[low >> 16]; - high_rev |= (reverse_nt[low & 0x0000FFFF] << 16); - low_rev = reverse_nt[high >> 16]; - low_rev |= (reverse_nt[high & 0x0000FFFF] << 16); - nexthigh_rev = reverse_nt[nextlow >> 16]; - nexthigh_rev |= (reverse_nt[nextlow & 0x0000FFFF] << 16); - - count_8mers_fwd(counts,inquery,high_rev,low_rev,nexthigh_rev); - } - - } else if (indexsize == 7) { - while (ptr > startptr + 3) { - ptr -= 3; - -#ifdef WORDS_BIGENDIAN - high = Bigendian_convert_uint(ref_blocks[ptr]); - low = Bigendian_convert_uint(ref_blocks[ptr+1]); - nextlow = Bigendian_convert_uint(ref_blocks[ptr+4]); -#else - high = ref_blocks[ptr]; - low = ref_blocks[ptr+1]; - nextlow = ref_blocks[ptr+4]; -#endif - if (mode == CMET_STRANDED) { - high = Cmet_reduce_ct(high); low = Cmet_reduce_ct(low); nextlow = Cmet_reduce_ct(nextlow); - } else if (mode == CMET_NONSTRANDED) { - if (genestrand > 0) { - high = Cmet_reduce_ct(high); low = Cmet_reduce_ct(low); nextlow = Cmet_reduce_ct(nextlow); - } else { - high = Cmet_reduce_ga(high); low = Cmet_reduce_ga(low); nextlow = Cmet_reduce_ga(nextlow); - } - } - - high_rev = reverse_nt[low >> 16]; - high_rev |= (reverse_nt[low & 0x0000FFFF] << 16); - low_rev = reverse_nt[high >> 16]; - low_rev |= (reverse_nt[high & 0x0000FFFF] << 16); - nexthigh_rev = reverse_nt[nextlow >> 16]; - nexthigh_rev |= (reverse_nt[nextlow & 0x0000FFFF] << 16); - - count_7mers_fwd(counts,inquery,high_rev,low_rev,nexthigh_rev); - } - - } else if (indexsize == 6) { - while (ptr > startptr + 3) { - ptr -= 3; - -#ifdef WORDS_BIGENDIAN - high = Bigendian_convert_uint(ref_blocks[ptr]); - low = Bigendian_convert_uint(ref_blocks[ptr+1]); - nextlow = Bigendian_convert_uint(ref_blocks[ptr+4]); -#else - high = ref_blocks[ptr]; - low = ref_blocks[ptr+1]; - nextlow = ref_blocks[ptr+4]; -#endif - if (mode == CMET_STRANDED) { - high = Cmet_reduce_ct(high); low = Cmet_reduce_ct(low); nextlow = Cmet_reduce_ct(nextlow); - } else if (mode == CMET_NONSTRANDED) { - if (genestrand > 0) { - high = Cmet_reduce_ct(high); low = Cmet_reduce_ct(low); nextlow = Cmet_reduce_ct(nextlow); - } else { - high = Cmet_reduce_ga(high); low = Cmet_reduce_ga(low); nextlow = Cmet_reduce_ga(nextlow); - } - } - - high_rev = reverse_nt[low >> 16]; - high_rev |= (reverse_nt[low & 0x0000FFFF] << 16); - low_rev = reverse_nt[high >> 16]; - low_rev |= (reverse_nt[high & 0x0000FFFF] << 16); - nexthigh_rev = reverse_nt[nextlow >> 16]; - nexthigh_rev |= (reverse_nt[nextlow & 0x0000FFFF] << 16); - - count_6mers_fwd(counts,inquery,high_rev,low_rev,nexthigh_rev); - } - - } else if (indexsize == 5) { - while (ptr > startptr + 3) { - ptr -= 3; - -#ifdef WORDS_BIGENDIAN - high = Bigendian_convert_uint(ref_blocks[ptr]); - low = Bigendian_convert_uint(ref_blocks[ptr+1]); - nextlow = Bigendian_convert_uint(ref_blocks[ptr+4]); -#else - high = ref_blocks[ptr]; - low = ref_blocks[ptr+1]; - nextlow = ref_blocks[ptr+4]; -#endif - if (mode == CMET_STRANDED) { - high = Cmet_reduce_ct(high); low = Cmet_reduce_ct(low); nextlow = Cmet_reduce_ct(nextlow); - } else if (mode == CMET_NONSTRANDED) { - if (genestrand > 0) { - high = Cmet_reduce_ct(high); low = Cmet_reduce_ct(low); nextlow = Cmet_reduce_ct(nextlow); - } else { - high = Cmet_reduce_ga(high); low = Cmet_reduce_ga(low); nextlow = Cmet_reduce_ga(nextlow); - } - } - - high_rev = reverse_nt[low >> 16]; - high_rev |= (reverse_nt[low & 0x0000FFFF] << 16); - low_rev = reverse_nt[high >> 16]; - low_rev |= (reverse_nt[high & 0x0000FFFF] << 16); - nexthigh_rev = reverse_nt[nextlow >> 16]; - nexthigh_rev |= (reverse_nt[nextlow & 0x0000FFFF] << 16); - - count_5mers_fwd(counts,inquery,high_rev,low_rev,nexthigh_rev); - } - - } else { - abort(); - } - - ptr -= 3; - - /* Start block */ - assert(ptr == startptr); - -#ifdef WORDS_BIGENDIAN - high = Bigendian_convert_uint(ref_blocks[ptr]); - low = Bigendian_convert_uint(ref_blocks[ptr+1]); - nextlow = Bigendian_convert_uint(ref_blocks[ptr+4]); -#else - high = ref_blocks[ptr]; - low = ref_blocks[ptr+1]; - nextlow = ref_blocks[ptr+4]; -#endif - if (mode == CMET_STRANDED) { - high = Cmet_reduce_ct(high); low = Cmet_reduce_ct(low); nextlow = Cmet_reduce_ct(nextlow); - } else if (mode == CMET_NONSTRANDED) { - if (genestrand > 0) { - high = Cmet_reduce_ct(high); low = Cmet_reduce_ct(low); nextlow = Cmet_reduce_ct(nextlow); - } else { - high = Cmet_reduce_ga(high); low = Cmet_reduce_ga(low); nextlow = Cmet_reduce_ga(nextlow); - } - } - - high_rev = reverse_nt[low >> 16]; - high_rev |= (reverse_nt[low & 0x0000FFFF] << 16); - low_rev = reverse_nt[high >> 16]; - low_rev |= (reverse_nt[high & 0x0000FFFF] << 16); - nexthigh_rev = reverse_nt[nextlow >> 16]; - nexthigh_rev |= (reverse_nt[nextlow & 0x0000FFFF] << 16); - - if (indexsize == 9) { - count_9mers_fwd_partial(counts,inquery,high_rev,low_rev,nexthigh_rev,startdiscard,/*enddiscard*/31); - } else if (indexsize == 8) { - count_8mers_fwd_partial(counts,inquery,high_rev,low_rev,nexthigh_rev,startdiscard,/*enddiscard*/31); - } else if (indexsize == 7) { - count_7mers_fwd_partial(counts,inquery,high_rev,low_rev,nexthigh_rev,startdiscard,/*enddiscard*/31); - } else if (indexsize == 6) { - count_6mers_fwd_partial(counts,inquery,high_rev,low_rev,nexthigh_rev,startdiscard,/*enddiscard*/31); - } else if (indexsize == 5) { - count_5mers_fwd_partial(counts,inquery,high_rev,low_rev,nexthigh_rev,startdiscard,/*enddiscard*/31); - } else { - fprintf(stderr,"indexsize %d not supported\n",indexsize); - abort(); - } - - } - - return; -} -#endif - - - -#ifdef USE_SIMD_FOR_COUNTS - -static void -count_positions_fwd_simd (Count_T *counts, Inquery_T *inquery, int indexsize, - Univcoord_T left, Univcoord_T left_plus_length, int genestrand) { - int startdiscard, enddiscard; - Genomecomp_T ptr, startptr, endptr, nexthigh_rev, nextlow; - Genomecomp_T high0_rev, low0_rev, low0, high0, low1, high1; - __m128i current, next, mask2, mask4; - /* __m128i array[16]; */ -#ifdef HAVE_SSSE3 - __m128i reverse8; -#else - __m128i mask8; -#endif -#ifdef HAVE_SSE4_1 - __m128i temp; -#else - Genomecomp_T high1_rev, low1_rev; -#endif -#ifdef HAVE_AVX2 - Genomecomp_T low2, high2, low3, high3; - __m256i current256, next256, temp256, bigmask2, bigmask4, bigreverse8; - __m256i shift256; -#endif - - - debug(printf("Starting count_positions_fwd_simd\n")); - - if (left_plus_length < (Univcoord_T) indexsize) { - left_plus_length = 0; - } else { - left_plus_length -= indexsize; - } - - startptr = left/32U*3; - ptr = endptr = left_plus_length/32U*3; - startdiscard = left % 32; /* (left+pos5) % 32 */ - enddiscard = left_plus_length % 32; /* (left+pos3) % 32 */ - - mask2 = _mm_set1_epi32(0x33333333); - mask4 = _mm_set1_epi32(0x0F0F0F0F); -#ifdef HAVE_SSSE3 - reverse8 = _mm_set_epi8(0x0C,0x0D,0x0E,0x0F, 0x08,0x09,0x0A,0x0B, 0x04,0x05,0x06,0x07, 0x00,0x01,0x02,0x03); -#else - mask8 = _mm_set1_epi32(0x00FF00FF); -#endif -#ifdef HAVE_AVX2 - bigmask2 = _mm256_set1_epi32(0x33333333); - bigmask4 = _mm256_set1_epi32(0x0F0F0F0F); - bigreverse8 = _mm256_set_epi8(0x0C,0x0D,0x0E,0x0F, 0x08,0x09,0x0A,0x0B, 0x04,0x05,0x06,0x07, 0x00,0x01,0x02,0x03, - 0x0C,0x0D,0x0E,0x0F, 0x08,0x09,0x0A,0x0B, 0x04,0x05,0x06,0x07, 0x00,0x01,0x02,0x03); - shift256 = _mm256_setr_epi32(1,2,3,4,5,6,7,0); -#endif - - if (left_plus_length <= left) { - /* Skip */ - - } else if (startptr == endptr) { -#ifdef WORDS_BIGENDIAN - high0 = Bigendian_convert_uint(ref_blocks[ptr]); - low0 = Bigendian_convert_uint(ref_blocks[ptr+1]); - nextlow = Bigendian_convert_uint(ref_blocks[ptr+4]); -#else - high0 = ref_blocks[ptr]; - low0 = ref_blocks[ptr+1]; - nextlow = ref_blocks[ptr+4]; -#endif - if (mode == CMET_STRANDED) { - high0 = Cmet_reduce_ct(high0); low0 = Cmet_reduce_ct(low0); nextlow = Cmet_reduce_ct(nextlow); - } else if (mode == CMET_NONSTRANDED) { - if (genestrand > 0) { - high0 = Cmet_reduce_ct(high0); low0 = Cmet_reduce_ct(low0); nextlow = Cmet_reduce_ct(nextlow); - } else { - high0 = Cmet_reduce_ga(high0); low0 = Cmet_reduce_ga(low0); nextlow = Cmet_reduce_ga(nextlow); - } - } - - current = _mm_set_epi32(0,nextlow,high0,low0); - current = _mm_or_si128(_mm_and_si128(_mm_srli_epi32(current,2),mask2),_mm_slli_epi32(_mm_and_si128(current,mask2),2)); /* Swap pairs */ - current = _mm_or_si128(_mm_and_si128(_mm_srli_epi32(current,4),mask4),_mm_slli_epi32(_mm_and_si128(current,mask4),4)); /* Swap nibbles */ -#ifdef HAVE_SSSE3 - current = _mm_shuffle_epi8(current,reverse8); /* Reverse bytes */ -#else - current = _mm_or_si128(_mm_and_si128(_mm_srli_epi32(current,8),mask8),_mm_slli_epi32(_mm_and_si128(current,mask8),8)); /* Swap bytes */ - current = _mm_or_si128(_mm_srli_epi32(current,16),_mm_slli_epi32(current,16)); /* Swap 16-bit quantities */ -#endif - -#ifdef HAVE_SSE4_1 - high0_rev = (unsigned int) _mm_extract_epi32(current,0); - low0_rev = (unsigned int) _mm_extract_epi32(current,1); - nexthigh_rev = (unsigned int) _mm_extract_epi32(current,2); -#else - high0_rev = (reverse_nt[low0 >> 16] | reverse_nt[low0 & 0x0000FFFF] << 16); - low0_rev = (reverse_nt[high0 >> 16] | reverse_nt[high0 & 0x0000FFFF] << 16); - nexthigh_rev = (reverse_nt[nextlow >> 16] | reverse_nt[nextlow & 0x0000FFFF] << 16); -#endif - - if (indexsize == 9) { - count_9mers_fwd_partial(counts,inquery,high0_rev,low0_rev,nexthigh_rev,startdiscard,enddiscard); - } else if (indexsize == 8) { - count_8mers_fwd_partial(counts,inquery,high0_rev,low0_rev,nexthigh_rev,startdiscard,enddiscard); - } else if (indexsize == 7) { - count_7mers_fwd_partial(counts,inquery,high0_rev,low0_rev,nexthigh_rev,startdiscard,enddiscard); - } else if (indexsize == 6) { - count_6mers_fwd_partial(counts,inquery,high0_rev,low0_rev,nexthigh_rev,startdiscard,enddiscard); - } else if (indexsize == 5) { - count_5mers_fwd_partial(counts,inquery,high0_rev,low0_rev,nexthigh_rev,startdiscard,enddiscard); - } else { - fprintf(stderr,"indexsize %d not supported\n",indexsize); - abort(); - } - - } else { - /* Genome_print_blocks(ref_blocks,left,left+16); */ - - /* End block */ -#ifdef WORDS_BIGENDIAN - high0 = Bigendian_convert_uint(ref_blocks[ptr]); - low0 = Bigendian_convert_uint(ref_blocks[ptr+1]); - nextlow = Bigendian_convert_uint(ref_blocks[ptr+4]); -#else - high0 = ref_blocks[ptr]; - low0 = ref_blocks[ptr+1]; - nextlow = ref_blocks[ptr+4]; -#endif - if (mode == CMET_STRANDED) { - high0 = Cmet_reduce_ct(high0); low0 = Cmet_reduce_ct(low0); nextlow = Cmet_reduce_ct(nextlow); - } else if (mode == CMET_NONSTRANDED) { - if (genestrand > 0) { - high0 = Cmet_reduce_ct(high0); low0 = Cmet_reduce_ct(low0); nextlow = Cmet_reduce_ct(nextlow); - } else { - high0 = Cmet_reduce_ga(high0); low0 = Cmet_reduce_ga(low0); nextlow = Cmet_reduce_ga(nextlow); - } - } - - current = _mm_set_epi32(0,nextlow,high0,low0); - current = _mm_or_si128(_mm_and_si128(_mm_srli_epi32(current,2),mask2),_mm_slli_epi32(_mm_and_si128(current,mask2),2)); /* Swap pairs */ - current = _mm_or_si128(_mm_and_si128(_mm_srli_epi32(current,4),mask4),_mm_slli_epi32(_mm_and_si128(current,mask4),4)); /* Swap nibbles */ -#ifdef HAVE_SSSE3 - current = _mm_shuffle_epi8(current,reverse8); /* Reverse bytes */ -#else - current = _mm_or_si128(_mm_and_si128(_mm_srli_epi32(current,8),mask8),_mm_slli_epi32(_mm_and_si128(current,mask8),8)); /* Swap bytes */ - current = _mm_or_si128(_mm_srli_epi32(current,16),_mm_slli_epi32(current,16)); /* Swap 16-bit quantities */ -#endif - -#ifdef HAVE_SSE4_1 - high0_rev = (unsigned int) _mm_extract_epi32(current,0); - low0_rev = (unsigned int) _mm_extract_epi32(current,1); - nexthigh_rev = (unsigned int) _mm_extract_epi32(current,2); -#else - high0_rev = (reverse_nt[low0 >> 16] | reverse_nt[low0 & 0x0000FFFF] << 16); - low0_rev = (reverse_nt[high0 >> 16] | reverse_nt[high0 & 0x0000FFFF] << 16); - nexthigh_rev = (reverse_nt[nextlow >> 16] | reverse_nt[nextlow & 0x0000FFFF] << 16); -#endif - - if (indexsize == 9) { - count_9mers_fwd_partial(counts,inquery,high0_rev,low0_rev,nexthigh_rev,/*startdiscard*/0,enddiscard); - } else if (indexsize == 8) { - count_8mers_fwd_partial(counts,inquery,high0_rev,low0_rev,nexthigh_rev,/*startdiscard*/0,enddiscard); - } else if (indexsize == 7) { - count_7mers_fwd_partial(counts,inquery,high0_rev,low0_rev,nexthigh_rev,/*startdiscard*/0,enddiscard); - } else if (indexsize == 6) { - count_6mers_fwd_partial(counts,inquery,high0_rev,low0_rev,nexthigh_rev,/*startdiscard*/0,enddiscard); - } else if (indexsize == 5) { - count_5mers_fwd_partial(counts,inquery,high0_rev,low0_rev,nexthigh_rev,/*startdiscard*/0,enddiscard); - } else { - abort(); - } - - /* Middle blocks */ - if (indexsize == 9) { -#ifdef HAVE_AVX2 - while (ptr > startptr + 12) { - ptr -= 12; - -#ifdef WORDS_BIGENDIAN - high0 = Bigendian_convert_uint(ref_blocks[ptr]); - low0 = Bigendian_convert_uint(ref_blocks[ptr+1]); - high1 = Bigendian_convert_uint(ref_blocks[ptr+3]); - low1 = Bigendian_convert_uint(ref_blocks[ptr+4]); - high2 = Bigendian_convert_uint(ref_blocks[ptr+6]); - low2 = Bigendian_convert_uint(ref_blocks[ptr+7]); - high3 = Bigendian_convert_uint(ref_blocks[ptr+9]); - low3 = Bigendian_convert_uint(ref_blocks[ptr+10]); - /* nextlow = Bigendian_convert_uint(ref_blocks[ptr+13]); */ -#else - high0 = ref_blocks[ptr]; - low0 = ref_blocks[ptr+1]; - high1 = ref_blocks[ptr+3]; - low1 = ref_blocks[ptr+4]; - high2 = ref_blocks[ptr+6]; - low2 = ref_blocks[ptr+7]; - high3 = ref_blocks[ptr+9]; - low3 = ref_blocks[ptr+10]; - /* nextlow = ref_blocks[ptr+13]; */ -#endif - if (mode == CMET_STRANDED) { - high0 = Cmet_reduce_ct(high0); low0 = Cmet_reduce_ct(low0); - high1 = Cmet_reduce_ct(high1); low1 = Cmet_reduce_ct(low1); - high2 = Cmet_reduce_ct(high2); low2 = Cmet_reduce_ct(low2); - high3 = Cmet_reduce_ct(high3); low3 = Cmet_reduce_ct(low3); - /* nextlow = Cmet_reduce_ct(nextlow); */ - } else if (mode == CMET_NONSTRANDED) { - if (genestrand > 0) { - high0 = Cmet_reduce_ct(high0); low0 = Cmet_reduce_ct(low0); - high1 = Cmet_reduce_ct(high1); low1 = Cmet_reduce_ct(low1); - high2 = Cmet_reduce_ct(high2); low2 = Cmet_reduce_ct(low2); - high3 = Cmet_reduce_ct(high3); low3 = Cmet_reduce_ct(low3); - /* nextlow = Cmet_reduce_ct(nextlow); */ - } else { - high0 = Cmet_reduce_ga(high0); low0 = Cmet_reduce_ga(low0); - high1 = Cmet_reduce_ga(high1); low1 = Cmet_reduce_ga(low1); - high2 = Cmet_reduce_ga(high2); low2 = Cmet_reduce_ga(low2); - high3 = Cmet_reduce_ga(high3); low3 = Cmet_reduce_ga(low3); - /* nextlow = Cmet_reduce_ga(nextlow); */ - } - } - - current256 = _mm256_set_epi32(high3,low3,high2,low2,high1,low1,high0,low0); - current256 = _mm256_or_si256(_mm256_and_si256(_mm256_srli_epi32(current256,2),bigmask2),_mm256_slli_epi32(_mm256_and_si256(current256,bigmask2),2)); /* Swap pairs */ - current256 = _mm256_or_si256(_mm256_and_si256(_mm256_srli_epi32(current256,4),bigmask4),_mm256_slli_epi32(_mm256_and_si256(current256,bigmask4),4)); /* Swap nibbles */ - current256 = _mm256_shuffle_epi8(current256,bigreverse8); /* Reverse bytes within 128-bit lanes*/ - - nexthigh_rev = high0_rev; - - high0_rev = (unsigned int) _mm256_extract_epi32(current256,0); - /* low0_rev = (unsigned int) _mm_extract_epi32(current,1); */ - /* high1_rev = (unsigned int) _mm_extract_epi32(current,2); */ - /* low1_rev = (unsigned int) _mm_extract_epi32(current,3); */ - - temp256 = _mm256_insert_epi32(current256,nexthigh_rev,0x00); - next256 = _mm256_permutevar8x32_epi32(temp256,shift256); - - count_9mers_fwd_simd_128(counts,inquery,current256,next256); - } -#endif - - while (ptr > startptr + 6) { - ptr -= 6; - -#ifdef WORDS_BIGENDIAN - high0 = Bigendian_convert_uint(ref_blocks[ptr]); - low0 = Bigendian_convert_uint(ref_blocks[ptr+1]); - high1 = Bigendian_convert_uint(ref_blocks[ptr+3]); - low1 = Bigendian_convert_uint(ref_blocks[ptr+4]); - /* nextlow = Bigendian_convert_uint(ref_blocks[ptr+7]); */ -#else - high0 = ref_blocks[ptr]; - low0 = ref_blocks[ptr+1]; - high1 = ref_blocks[ptr+3]; - low1 = ref_blocks[ptr+4]; - /* nextlow = ref_blocks[ptr+7]; */ -#endif - if (mode == CMET_STRANDED) { - high0 = Cmet_reduce_ct(high0); low0 = Cmet_reduce_ct(low0); - high1 = Cmet_reduce_ct(high1); low1 = Cmet_reduce_ct(low1); - /* nextlow = Cmet_reduce_ct(nextlow); */ - } else if (mode == CMET_NONSTRANDED) { - if (genestrand > 0) { - high0 = Cmet_reduce_ct(high0); low0 = Cmet_reduce_ct(low0); - high1 = Cmet_reduce_ct(high1); low1 = Cmet_reduce_ct(low1); - /* nextlow = Cmet_reduce_ct(nextlow); */ - } else { - high0 = Cmet_reduce_ga(high0); low0 = Cmet_reduce_ga(low0); - high1 = Cmet_reduce_ga(high1); low1 = Cmet_reduce_ga(low1); - /* nextlow = Cmet_reduce_ga(nextlow); */ - } - } - - current = _mm_set_epi32(high1,low1,high0,low0); - current = _mm_or_si128(_mm_and_si128(_mm_srli_epi32(current,2),mask2),_mm_slli_epi32(_mm_and_si128(current,mask2),2)); /* Swap pairs */ - current = _mm_or_si128(_mm_and_si128(_mm_srli_epi32(current,4),mask4),_mm_slli_epi32(_mm_and_si128(current,mask4),4)); /* Swap nibbles */ -#ifdef HAVE_SSSE3 - current = _mm_shuffle_epi8(current,reverse8); /* Reverse bytes */ -#else - current = _mm_or_si128(_mm_and_si128(_mm_srli_epi32(current,8),mask8),_mm_slli_epi32(_mm_and_si128(current,mask8),8)); /* Swap bytes */ - current = _mm_or_si128(_mm_srli_epi32(current,16),_mm_slli_epi32(current,16)); /* Swap 16-bit quantities */ -#endif - - nexthigh_rev = high0_rev; - -#ifdef HAVE_SSE4_1 - high0_rev = (unsigned int) _mm_extract_epi32(current,0); - /* low0_rev = (unsigned int) _mm_extract_epi32(current,1); */ - /* high1_rev = (unsigned int) _mm_extract_epi32(current,2); */ - /* low1_rev = (unsigned int) _mm_extract_epi32(current,3); */ - - temp = _mm_insert_epi32(current,nexthigh_rev,0x00); - next = _mm_shuffle_epi32(temp,0x39); -#else - high0_rev = (reverse_nt[low0 >> 16] | reverse_nt[low0 & 0x0000FFFF] << 16); - low0_rev = (reverse_nt[high0 >> 16] | reverse_nt[high0 & 0x0000FFFF] << 16); - high1_rev = (reverse_nt[low1 >> 16] | reverse_nt[low1 & 0x0000FFFF] << 16); - low1_rev = (reverse_nt[high1 >> 16] | reverse_nt[high1 & 0x0000FFFF] << 16); - - next = _mm_setr_epi32(low0_rev,high1_rev,low1_rev,nexthigh_rev); -#endif - -#if 0 - extract_9mers_fwd_simd(array,current,next); - count_fwdrev_simd(counts,inquery,(Genomecomp_T *) array); -#else - count_9mers_fwd_simd(counts,inquery,current,next); -#endif - } - - if (ptr > startptr + 3) { - ptr -= 3; - -#ifdef WORDS_BIGENDIAN - high0 = Bigendian_convert_uint(ref_blocks[ptr]); - low0 = Bigendian_convert_uint(ref_blocks[ptr+1]); - /* nextlow = Bigendian_convert_uint(ref_blocks[ptr+4]); */ -#else - high0 = ref_blocks[ptr]; - low0 = ref_blocks[ptr+1]; - /* nextlow = ref_blocks[ptr+4]; */ -#endif - if (mode == CMET_STRANDED) { - high0 = Cmet_reduce_ct(high0); low0 = Cmet_reduce_ct(low0); /* nextlow = Cmet_reduce_ct(nextlow); */ - } else if (mode == CMET_NONSTRANDED) { - if (genestrand > 0) { - high0 = Cmet_reduce_ct(high0); low0 = Cmet_reduce_ct(low0); /* nextlow = Cmet_reduce_ct(nextlow); */ - } else { - high0 = Cmet_reduce_ga(high0); low0 = Cmet_reduce_ga(low0); /* nextlow = Cmet_reduce_ga(nextlow); */ - } - } - - current = _mm_set_epi32(0,0,high0,low0); - current = _mm_or_si128(_mm_and_si128(_mm_srli_epi32(current,2),mask2),_mm_slli_epi32(_mm_and_si128(current,mask2),2)); /* Swap pairs */ - current = _mm_or_si128(_mm_and_si128(_mm_srli_epi32(current,4),mask4),_mm_slli_epi32(_mm_and_si128(current,mask4),4)); /* Swap nibbles */ -#ifdef HAVE_SSSE3 - current = _mm_shuffle_epi8(current,reverse8); /* Reverse bytes */ -#else - current = _mm_or_si128(_mm_and_si128(_mm_srli_epi32(current,8),mask8),_mm_slli_epi32(_mm_and_si128(current,mask8),8)); /* Swap bytes */ - current = _mm_or_si128(_mm_srli_epi32(current,16),_mm_slli_epi32(current,16)); /* Swap 16-bit quantities */ -#endif - - nexthigh_rev = high0_rev; -#ifdef HAVE_SSE4_1 - high0_rev = (unsigned int) _mm_extract_epi32(current,0); - low0_rev = (unsigned int) _mm_extract_epi32(current,1); -#else - high0_rev = (reverse_nt[low0 >> 16] | reverse_nt[low0 & 0x0000FFFF] << 16); - low0_rev = (reverse_nt[high0 >> 16] | reverse_nt[high0 & 0x0000FFFF] << 16); -#endif - - count_9mers_fwd(counts,inquery,high0_rev,low0_rev,nexthigh_rev); - } - - } else if (indexsize == 8) { -#ifdef HAVE_AVX2 - while (ptr > startptr + 12) { - ptr -= 12; - -#ifdef WORDS_BIGENDIAN - high0 = Bigendian_convert_uint(ref_blocks[ptr]); - low0 = Bigendian_convert_uint(ref_blocks[ptr+1]); - high1 = Bigendian_convert_uint(ref_blocks[ptr+3]); - low1 = Bigendian_convert_uint(ref_blocks[ptr+4]); - high2 = Bigendian_convert_uint(ref_blocks[ptr+6]); - low2 = Bigendian_convert_uint(ref_blocks[ptr+7]); - high3 = Bigendian_convert_uint(ref_blocks[ptr+9]); - low3 = Bigendian_convert_uint(ref_blocks[ptr+10]); - /* nextlow = Bigendian_convert_uint(ref_blocks[ptr+13]); */ -#else - high0 = ref_blocks[ptr]; - low0 = ref_blocks[ptr+1]; - high1 = ref_blocks[ptr+3]; - low1 = ref_blocks[ptr+4]; - high2 = ref_blocks[ptr+6]; - low2 = ref_blocks[ptr+7]; - high3 = ref_blocks[ptr+9]; - low3 = ref_blocks[ptr+10]; - /* nextlow = ref_blocks[ptr+13]; */ -#endif - if (mode == CMET_STRANDED) { - high0 = Cmet_reduce_ct(high0); low0 = Cmet_reduce_ct(low0); - high1 = Cmet_reduce_ct(high1); low1 = Cmet_reduce_ct(low1); - high2 = Cmet_reduce_ct(high2); low2 = Cmet_reduce_ct(low2); - high3 = Cmet_reduce_ct(high3); low3 = Cmet_reduce_ct(low3); - /* nextlow = Cmet_reduce_ct(nextlow); */ - } else if (mode == CMET_NONSTRANDED) { - if (genestrand > 0) { - high0 = Cmet_reduce_ct(high0); low0 = Cmet_reduce_ct(low0); - high1 = Cmet_reduce_ct(high1); low1 = Cmet_reduce_ct(low1); - high2 = Cmet_reduce_ct(high2); low2 = Cmet_reduce_ct(low2); - high3 = Cmet_reduce_ct(high3); low3 = Cmet_reduce_ct(low3); - /* nextlow = Cmet_reduce_ct(nextlow); */ - } else { - high0 = Cmet_reduce_ga(high0); low0 = Cmet_reduce_ga(low0); - high1 = Cmet_reduce_ga(high1); low1 = Cmet_reduce_ga(low1); - high2 = Cmet_reduce_ga(high2); low2 = Cmet_reduce_ga(low2); - high3 = Cmet_reduce_ga(high3); low3 = Cmet_reduce_ga(low3); - /* nextlow = Cmet_reduce_ga(nextlow); */ - } - } - - current256 = _mm256_set_epi32(high3,low3,high2,low2,high1,low1,high0,low0); - current256 = _mm256_or_si256(_mm256_and_si256(_mm256_srli_epi32(current256,2),bigmask2),_mm256_slli_epi32(_mm256_and_si256(current256,bigmask2),2)); /* Swap pairs */ - current256 = _mm256_or_si256(_mm256_and_si256(_mm256_srli_epi32(current256,4),bigmask4),_mm256_slli_epi32(_mm256_and_si256(current256,bigmask4),4)); /* Swap nibbles */ - current256 = _mm256_shuffle_epi8(current256,bigreverse8); /* Reverse bytes within 128-bit lanes */ - - nexthigh_rev = high0_rev; - - high0_rev = (unsigned int) _mm256_extract_epi32(current256,0); - /* low0_rev = (unsigned int) _mm_extract_epi32(current,1); */ - /* high1_rev = (unsigned int) _mm_extract_epi32(current,2); */ - /* low1_rev = (unsigned int) _mm_extract_epi32(current,3); */ - - temp256 = _mm256_insert_epi32(current256,nexthigh_rev,0x00); - next256 = _mm256_permutevar8x32_epi32(temp256,shift256); - - count_8mers_fwd_simd_128(counts,inquery,current256,next256); - } -#endif - - while (ptr > startptr + 6) { - ptr -= 6; - -#ifdef WORDS_BIGENDIAN - high0 = Bigendian_convert_uint(ref_blocks[ptr]); - low0 = Bigendian_convert_uint(ref_blocks[ptr+1]); - high1 = Bigendian_convert_uint(ref_blocks[ptr+3]); - low1 = Bigendian_convert_uint(ref_blocks[ptr+4]); - /* nextlow = Bigendian_convert_uint(ref_blocks[ptr+7]); */ -#else - high0 = ref_blocks[ptr]; - low0 = ref_blocks[ptr+1]; - high1 = ref_blocks[ptr+3]; - low1 = ref_blocks[ptr+4]; - /* nextlow = ref_blocks[ptr+7]; */ -#endif - if (mode == CMET_STRANDED) { - high0 = Cmet_reduce_ct(high0); low0 = Cmet_reduce_ct(low0); - high1 = Cmet_reduce_ct(high1); low1 = Cmet_reduce_ct(low1); - /* nextlow = Cmet_reduce_ct(nextlow); */ - } else if (mode == CMET_NONSTRANDED) { - if (genestrand > 0) { - high0 = Cmet_reduce_ct(high0); low0 = Cmet_reduce_ct(low0); - high1 = Cmet_reduce_ct(high1); low1 = Cmet_reduce_ct(low1); - /* nextlow = Cmet_reduce_ct(nextlow); */ - } else { - high0 = Cmet_reduce_ga(high0); low0 = Cmet_reduce_ga(low0); - high1 = Cmet_reduce_ga(high1); low1 = Cmet_reduce_ga(low1); - /* nextlow = Cmet_reduce_ga(nextlow); */ - } - } - - current = _mm_set_epi32(high1,low1,high0,low0); - current = _mm_or_si128(_mm_and_si128(_mm_srli_epi32(current,2),mask2),_mm_slli_epi32(_mm_and_si128(current,mask2),2)); /* Swap pairs */ - current = _mm_or_si128(_mm_and_si128(_mm_srli_epi32(current,4),mask4),_mm_slli_epi32(_mm_and_si128(current,mask4),4)); /* Swap nibbles */ -#ifdef HAVE_SSSE3 - current = _mm_shuffle_epi8(current,reverse8); /* Reverse bytes */ -#else - current = _mm_or_si128(_mm_and_si128(_mm_srli_epi32(current,8),mask8),_mm_slli_epi32(_mm_and_si128(current,mask8),8)); /* Swap bytes */ - current = _mm_or_si128(_mm_srli_epi32(current,16),_mm_slli_epi32(current,16)); /* Swap 16-bit quantities */ -#endif - - nexthigh_rev = high0_rev; - -#ifdef HAVE_SSE4_1 - high0_rev = (unsigned int) _mm_extract_epi32(current,0); - /* low0_rev = (unsigned int) _mm_extract_epi32(current,1); */ - /* high1_rev = (unsigned int) _mm_extract_epi32(current,2);*/ - /* low1_rev = (unsigned int) _mm_extract_epi32(current,3); */ - - temp = _mm_insert_epi32(current,nexthigh_rev,0x00); - next = _mm_shuffle_epi32(temp,0x39); -#else - high0_rev = (reverse_nt[low0 >> 16] | reverse_nt[low0 & 0x0000FFFF] << 16); - low0_rev = (reverse_nt[high0 >> 16] | reverse_nt[high0 & 0x0000FFFF] << 16); - high1_rev = (reverse_nt[low1 >> 16] | reverse_nt[low1 & 0x0000FFFF] << 16); - low1_rev = (reverse_nt[high1 >> 16] | reverse_nt[high1 & 0x0000FFFF] << 16); - - next = _mm_setr_epi32(low0_rev,high1_rev,low1_rev,nexthigh_rev); -#endif - -#if 0 - extract_8mers_fwd_simd(array,current,next); - count_fwdrev_simd(counts,inquery,(Genomecomp_T *) array); -#else - count_8mers_fwd_simd(counts,inquery,current,next); -#endif - } - - if (ptr > startptr + 3) { - ptr -= 3; - -#ifdef WORDS_BIGENDIAN - high0 = Bigendian_convert_uint(ref_blocks[ptr]); - low0 = Bigendian_convert_uint(ref_blocks[ptr+1]); - /* nextlow = Bigendian_convert_uint(ref_blocks[ptr+4]); */ -#else - high0 = ref_blocks[ptr]; - low0 = ref_blocks[ptr+1]; - /* nextlow = ref_blocks[ptr+4]; */ -#endif - if (mode == CMET_STRANDED) { - high0 = Cmet_reduce_ct(high0); low0 = Cmet_reduce_ct(low0); /* nextlow = Cmet_reduce_ct(nextlow); */ - } else if (mode == CMET_NONSTRANDED) { - if (genestrand > 0) { - high0 = Cmet_reduce_ct(high0); low0 = Cmet_reduce_ct(low0); /* nextlow = Cmet_reduce_ct(nextlow); */ - } else { - high0 = Cmet_reduce_ga(high0); low0 = Cmet_reduce_ga(low0); /* nextlow = Cmet_reduce_ga(nextlow); */ - } - } - - current = _mm_set_epi32(0,0,high0,low0); - current = _mm_or_si128(_mm_and_si128(_mm_srli_epi32(current,2),mask2),_mm_slli_epi32(_mm_and_si128(current,mask2),2)); /* Swap pairs */ - current = _mm_or_si128(_mm_and_si128(_mm_srli_epi32(current,4),mask4),_mm_slli_epi32(_mm_and_si128(current,mask4),4)); /* Swap nibbles */ -#ifdef HAVE_SSSE3 - current = _mm_shuffle_epi8(current,reverse8); /* Reverse bytes */ -#else - current = _mm_or_si128(_mm_and_si128(_mm_srli_epi32(current,8),mask8),_mm_slli_epi32(_mm_and_si128(current,mask8),8)); /* Swap bytes */ - current = _mm_or_si128(_mm_srli_epi32(current,16),_mm_slli_epi32(current,16)); /* Swap 16-bit quantities */ -#endif - - nexthigh_rev = high0_rev; -#ifdef HAVE_SSE4_1 - high0_rev = (unsigned int) _mm_extract_epi32(current,0); - low0_rev = (unsigned int) _mm_extract_epi32(current,1); -#else - high0_rev = (reverse_nt[low0 >> 16] | reverse_nt[low0 & 0x0000FFFF] << 16); - low0_rev = (reverse_nt[high0 >> 16] | reverse_nt[high0 & 0x0000FFFF] << 16); -#endif - - count_8mers_fwd(counts,inquery,high0_rev,low0_rev,nexthigh_rev); - } - - } else if (indexsize == 7) { -#ifdef HAVE_AVX2 - while (ptr > startptr + 12) { - ptr -= 12; - -#ifdef WORDS_BIGENDIAN - high0 = Bigendian_convert_uint(ref_blocks[ptr]); - low0 = Bigendian_convert_uint(ref_blocks[ptr+1]); - high1 = Bigendian_convert_uint(ref_blocks[ptr+3]); - low1 = Bigendian_convert_uint(ref_blocks[ptr+4]); - high2 = Bigendian_convert_uint(ref_blocks[ptr+6]); - low2 = Bigendian_convert_uint(ref_blocks[ptr+7]); - high3 = Bigendian_convert_uint(ref_blocks[ptr+9]); - low3 = Bigendian_convert_uint(ref_blocks[ptr+10]); - /* nextlow = Bigendian_convert_uint(ref_blocks[ptr+13]); */ -#else - high0 = ref_blocks[ptr]; - low0 = ref_blocks[ptr+1]; - high1 = ref_blocks[ptr+3]; - low1 = ref_blocks[ptr+4]; - high2 = ref_blocks[ptr+6]; - low2 = ref_blocks[ptr+7]; - high3 = ref_blocks[ptr+9]; - low3 = ref_blocks[ptr+10]; - /* nextlow = ref_blocks[ptr+13]; */ -#endif - if (mode == CMET_STRANDED) { - high0 = Cmet_reduce_ct(high0); low0 = Cmet_reduce_ct(low0); - high1 = Cmet_reduce_ct(high1); low1 = Cmet_reduce_ct(low1); - high2 = Cmet_reduce_ct(high2); low2 = Cmet_reduce_ct(low2); - high3 = Cmet_reduce_ct(high3); low3 = Cmet_reduce_ct(low3); - /* nextlow = Cmet_reduce_ct(nextlow); */ - } else if (mode == CMET_NONSTRANDED) { - if (genestrand > 0) { - high0 = Cmet_reduce_ct(high0); low0 = Cmet_reduce_ct(low0); - high1 = Cmet_reduce_ct(high1); low1 = Cmet_reduce_ct(low1); - high2 = Cmet_reduce_ct(high2); low2 = Cmet_reduce_ct(low2); - high3 = Cmet_reduce_ct(high3); low3 = Cmet_reduce_ct(low3); - /* nextlow = Cmet_reduce_ct(nextlow); */ - } else { - high0 = Cmet_reduce_ga(high0); low0 = Cmet_reduce_ga(low0); - high1 = Cmet_reduce_ga(high1); low1 = Cmet_reduce_ga(low1); - high2 = Cmet_reduce_ga(high2); low2 = Cmet_reduce_ga(low2); - high3 = Cmet_reduce_ga(high3); low3 = Cmet_reduce_ga(low3); - /* nextlow = Cmet_reduce_ga(nextlow); */ - } - } - - current256 = _mm256_set_epi32(high3,low3,high2,low2,high1,low1,high0,low0); - current256 = _mm256_or_si256(_mm256_and_si256(_mm256_srli_epi32(current256,2),bigmask2),_mm256_slli_epi32(_mm256_and_si256(current256,bigmask2),2)); /* Swap pairs */ - current256 = _mm256_or_si256(_mm256_and_si256(_mm256_srli_epi32(current256,4),bigmask4),_mm256_slli_epi32(_mm256_and_si256(current256,bigmask4),4)); /* Swap nibbles */ - current256 = _mm256_shuffle_epi8(current256,bigreverse8); /* Reverse bytes within 128-bit lanes */ - - nexthigh_rev = high0_rev; - - high0_rev = (unsigned int) _mm256_extract_epi32(current256,0); - /* low0_rev = (unsigned int) _mm_extract_epi32(current,1); */ - /* high1_rev = (unsigned int) _mm_extract_epi32(current,2); */ - /* low1_rev = (unsigned int) _mm_extract_epi32(current,3); */ - - temp256 = _mm256_insert_epi32(current256,nexthigh_rev,0x00); - next256 = _mm256_permutevar8x32_epi32(temp256,shift256); - - count_7mers_fwd_simd_128(counts,inquery,current256,next256); - } -#endif - - while (ptr > startptr + 6) { - ptr -= 6; - -#ifdef WORDS_BIGENDIAN - high0 = Bigendian_convert_uint(ref_blocks[ptr]); - low0 = Bigendian_convert_uint(ref_blocks[ptr+1]); - high1 = Bigendian_convert_uint(ref_blocks[ptr+3]); - low1 = Bigendian_convert_uint(ref_blocks[ptr+4]); - /* nextlow = Bigendian_convert_uint(ref_blocks[ptr+7]); */ -#else - high0 = ref_blocks[ptr]; - low0 = ref_blocks[ptr+1]; - high1 = ref_blocks[ptr+3]; - low1 = ref_blocks[ptr+4]; - /* nextlow = ref_blocks[ptr+7]; */ -#endif - if (mode == CMET_STRANDED) { - high0 = Cmet_reduce_ct(high0); low0 = Cmet_reduce_ct(low0); - high1 = Cmet_reduce_ct(high1); low1 = Cmet_reduce_ct(low1); - /* nextlow = Cmet_reduce_ct(nextlow); */ - } else if (mode == CMET_NONSTRANDED) { - if (genestrand > 0) { - high0 = Cmet_reduce_ct(high0); low0 = Cmet_reduce_ct(low0); - high1 = Cmet_reduce_ct(high1); low1 = Cmet_reduce_ct(low1); - /* nextlow = Cmet_reduce_ct(nextlow); */ - } else { - high0 = Cmet_reduce_ga(high0); low0 = Cmet_reduce_ga(low0); - high1 = Cmet_reduce_ga(high1); low1 = Cmet_reduce_ga(low1); - /* nextlow = Cmet_reduce_ga(nextlow); */ - } - } - - current = _mm_set_epi32(high1,low1,high0,low0); - current = _mm_or_si128(_mm_and_si128(_mm_srli_epi32(current,2),mask2),_mm_slli_epi32(_mm_and_si128(current,mask2),2)); /* Swap pairs */ - current = _mm_or_si128(_mm_and_si128(_mm_srli_epi32(current,4),mask4),_mm_slli_epi32(_mm_and_si128(current,mask4),4)); /* Swap nibbles */ -#ifdef HAVE_SSSE3 - current = _mm_shuffle_epi8(current,reverse8); /* Reverse bytes */ -#else - current = _mm_or_si128(_mm_and_si128(_mm_srli_epi32(current,8),mask8),_mm_slli_epi32(_mm_and_si128(current,mask8),8)); /* Swap bytes */ - current = _mm_or_si128(_mm_srli_epi32(current,16),_mm_slli_epi32(current,16)); /* Swap 16-bit quantities */ -#endif - - nexthigh_rev = high0_rev; - -#ifdef HAVE_SSE4_1 - high0_rev = (unsigned int) _mm_extract_epi32(current,0); - /* low0_rev = (unsigned int) _mm_extract_epi32(current,1); */ - /* high1_rev = (unsigned int) _mm_extract_epi32(current,2); */ - /* low1_rev = (unsigned int) _mm_extract_epi32(current,3); */ - - temp = _mm_insert_epi32(current,nexthigh_rev,0x00); - next = _mm_shuffle_epi32(temp,0x39); -#else - high0_rev = (reverse_nt[low0 >> 16] | reverse_nt[low0 & 0x0000FFFF] << 16); - low0_rev = (reverse_nt[high0 >> 16] | reverse_nt[high0 & 0x0000FFFF] << 16); - high1_rev = (reverse_nt[low1 >> 16] | reverse_nt[low1 & 0x0000FFFF] << 16); - low1_rev = (reverse_nt[high1 >> 16] | reverse_nt[high1 & 0x0000FFFF] << 16); - - next = _mm_setr_epi32(low0_rev,high1_rev,low1_rev,nexthigh_rev); -#endif - -#if 0 - extract_7mers_fwd_simd(array,current,next); - count_fwdrev_simd(counts,inquery,(Genomecomp_T *) array); -#else - count_7mers_fwd_simd(counts,inquery,current,next); -#endif - } - - if (ptr > startptr + 3) { - ptr -= 3; - -#ifdef WORDS_BIGENDIAN - high0 = Bigendian_convert_uint(ref_blocks[ptr]); - low0 = Bigendian_convert_uint(ref_blocks[ptr+1]); - /* nextlow = Bigendian_convert_uint(ref_blocks[ptr+4]); */ -#else - high0 = ref_blocks[ptr]; - low0 = ref_blocks[ptr+1]; - /* nextlow = ref_blocks[ptr+4]; */ -#endif - if (mode == CMET_STRANDED) { - high0 = Cmet_reduce_ct(high0); low0 = Cmet_reduce_ct(low0); /* nextlow = Cmet_reduce_ct(nextlow); */ - } else if (mode == CMET_NONSTRANDED) { - if (genestrand > 0) { - high0 = Cmet_reduce_ct(high0); low0 = Cmet_reduce_ct(low0); /* nextlow = Cmet_reduce_ct(nextlow); */ - } else { - high0 = Cmet_reduce_ga(high0); low0 = Cmet_reduce_ga(low0); /* nextlow = Cmet_reduce_ga(nextlow); */ - } - } - - current = _mm_set_epi32(0,0,high0,low0); - current = _mm_or_si128(_mm_and_si128(_mm_srli_epi32(current,2),mask2),_mm_slli_epi32(_mm_and_si128(current,mask2),2)); /* Swap pairs */ - current = _mm_or_si128(_mm_and_si128(_mm_srli_epi32(current,4),mask4),_mm_slli_epi32(_mm_and_si128(current,mask4),4)); /* Swap nibbles */ -#ifdef HAVE_SSSE3 - current = _mm_shuffle_epi8(current,reverse8); /* Reverse bytes */ -#else - current = _mm_or_si128(_mm_and_si128(_mm_srli_epi32(current,8),mask8),_mm_slli_epi32(_mm_and_si128(current,mask8),8)); /* Swap bytes */ - current = _mm_or_si128(_mm_srli_epi32(current,16),_mm_slli_epi32(current,16)); /* Swap 16-bit quantities */ -#endif - - nexthigh_rev = high0_rev; -#ifdef HAVE_SSE4_1 - high0_rev = (unsigned int) _mm_extract_epi32(current,0); - low0_rev = (unsigned int) _mm_extract_epi32(current,1); -#else - high0_rev = (reverse_nt[low0 >> 16] | reverse_nt[low0 & 0x0000FFFF] << 16); - low0_rev = (reverse_nt[high0 >> 16] | reverse_nt[high0 & 0x0000FFFF] << 16); -#endif - - count_7mers_fwd(counts,inquery,high0_rev,low0_rev,nexthigh_rev); - } - - } else if (indexsize == 6) { -#ifdef HAVE_AVX2 - while (ptr > startptr + 12) { - ptr -= 12; - -#ifdef WORDS_BIGENDIAN - high0 = Bigendian_convert_uint(ref_blocks[ptr]); - low0 = Bigendian_convert_uint(ref_blocks[ptr+1]); - high1 = Bigendian_convert_uint(ref_blocks[ptr+3]); - low1 = Bigendian_convert_uint(ref_blocks[ptr+4]); - high2 = Bigendian_convert_uint(ref_blocks[ptr+6]); - low2 = Bigendian_convert_uint(ref_blocks[ptr+7]); - high3 = Bigendian_convert_uint(ref_blocks[ptr+9]); - low3 = Bigendian_convert_uint(ref_blocks[ptr+10]); - /* nextlow = Bigendian_convert_uint(ref_blocks[ptr+13]); */ -#else - high0 = ref_blocks[ptr]; - low0 = ref_blocks[ptr+1]; - high1 = ref_blocks[ptr+3]; - low1 = ref_blocks[ptr+4]; - high2 = ref_blocks[ptr+6]; - low2 = ref_blocks[ptr+7]; - high3 = ref_blocks[ptr+9]; - low3 = ref_blocks[ptr+10]; - /* nextlow = ref_blocks[ptr+13]; */ -#endif - if (mode == CMET_STRANDED) { - high0 = Cmet_reduce_ct(high0); low0 = Cmet_reduce_ct(low0); - high1 = Cmet_reduce_ct(high1); low1 = Cmet_reduce_ct(low1); - high2 = Cmet_reduce_ct(high2); low2 = Cmet_reduce_ct(low2); - high3 = Cmet_reduce_ct(high3); low3 = Cmet_reduce_ct(low3); - /* nextlow = Cmet_reduce_ct(nextlow); */ - } else if (mode == CMET_NONSTRANDED) { - if (genestrand > 0) { - high0 = Cmet_reduce_ct(high0); low0 = Cmet_reduce_ct(low0); - high1 = Cmet_reduce_ct(high1); low1 = Cmet_reduce_ct(low1); - high2 = Cmet_reduce_ct(high2); low2 = Cmet_reduce_ct(low2); - high3 = Cmet_reduce_ct(high3); low3 = Cmet_reduce_ct(low3); - /* nextlow = Cmet_reduce_ct(nextlow); */ - } else { - high0 = Cmet_reduce_ga(high0); low0 = Cmet_reduce_ga(low0); - high1 = Cmet_reduce_ga(high1); low1 = Cmet_reduce_ga(low1); - high2 = Cmet_reduce_ga(high2); low2 = Cmet_reduce_ga(low2); - high3 = Cmet_reduce_ga(high3); low3 = Cmet_reduce_ga(low3); - /* nextlow = Cmet_reduce_ga(nextlow); */ - } - } - - current256 = _mm256_set_epi32(high3,low3,high2,low2,high1,low1,high0,low0); - current256 = _mm256_or_si256(_mm256_and_si256(_mm256_srli_epi32(current256,2),bigmask2),_mm256_slli_epi32(_mm256_and_si256(current256,bigmask2),2)); /* Swap pairs */ - current256 = _mm256_or_si256(_mm256_and_si256(_mm256_srli_epi32(current256,4),bigmask4),_mm256_slli_epi32(_mm256_and_si256(current256,bigmask4),4)); /* Swap nibbles */ - current256 = _mm256_shuffle_epi8(current256,bigreverse8); /* Reverse bytes within 128-bit lanes */ - - nexthigh_rev = high0_rev; - - high0_rev = (unsigned int) _mm256_extract_epi32(current256,0); - /* low0_rev = (unsigned int) _mm_extract_epi32(current,1); */ - /* high1_rev = (unsigned int) _mm_extract_epi32(current,2); */ - /* low1_rev = (unsigned int) _mm_extract_epi32(current,3); */ - - temp256 = _mm256_insert_epi32(current256,nexthigh_rev,0x00); - next256 = _mm256_permutevar8x32_epi32(temp256,shift256); - - count_6mers_fwd_simd_128(counts,inquery,current256,next256); - } -#endif - - while (ptr > startptr + 6) { - ptr -= 6; - -#ifdef WORDS_BIGENDIAN - high0 = Bigendian_convert_uint(ref_blocks[ptr]); - low0 = Bigendian_convert_uint(ref_blocks[ptr+1]); - high1 = Bigendian_convert_uint(ref_blocks[ptr+3]); - low1 = Bigendian_convert_uint(ref_blocks[ptr+4]); - /* nextlow = Bigendian_convert_uint(ref_blocks[ptr+7]); */ -#else - high0 = ref_blocks[ptr]; - low0 = ref_blocks[ptr+1]; - high1 = ref_blocks[ptr+3]; - low1 = ref_blocks[ptr+4]; - /* nextlow = ref_blocks[ptr+7]; */ -#endif - if (mode == CMET_STRANDED) { - high0 = Cmet_reduce_ct(high0); low0 = Cmet_reduce_ct(low0); - high1 = Cmet_reduce_ct(high1); low1 = Cmet_reduce_ct(low1); - /* nextlow = Cmet_reduce_ct(nextlow); */ - } else if (mode == CMET_NONSTRANDED) { - if (genestrand > 0) { - high0 = Cmet_reduce_ct(high0); low0 = Cmet_reduce_ct(low0); - high1 = Cmet_reduce_ct(high1); low1 = Cmet_reduce_ct(low1); - /* nextlow = Cmet_reduce_ct(nextlow); */ - } else { - high0 = Cmet_reduce_ga(high0); low0 = Cmet_reduce_ga(low0); - high1 = Cmet_reduce_ga(high1); low1 = Cmet_reduce_ga(low1); - /* nextlow = Cmet_reduce_ga(nextlow); */ - } - } - - current = _mm_set_epi32(high1,low1,high0,low0); - current = _mm_or_si128(_mm_and_si128(_mm_srli_epi32(current,2),mask2),_mm_slli_epi32(_mm_and_si128(current,mask2),2)); /* Swap pairs */ - current = _mm_or_si128(_mm_and_si128(_mm_srli_epi32(current,4),mask4),_mm_slli_epi32(_mm_and_si128(current,mask4),4)); /* Swap nibbles */ -#ifdef HAVE_SSSE3 - current = _mm_shuffle_epi8(current,reverse8); /* Reverse bytes */ -#else - current = _mm_or_si128(_mm_and_si128(_mm_srli_epi32(current,8),mask8),_mm_slli_epi32(_mm_and_si128(current,mask8),8)); /* Swap bytes */ - current = _mm_or_si128(_mm_srli_epi32(current,16),_mm_slli_epi32(current,16)); /* Swap 16-bit quantities */ -#endif - - nexthigh_rev = high0_rev; - -#ifdef HAVE_SSE4_1 - high0_rev = (unsigned int) _mm_extract_epi32(current,0); - /* low0_rev = (unsigned int) _mm_extract_epi32(current,1); */ - /* high1_rev = (unsigned int) _mm_extract_epi32(current,2); */ - /* low1_rev = (unsigned int) _mm_extract_epi32(current,3); */ - - temp = _mm_insert_epi32(current,nexthigh_rev,0x00); - next = _mm_shuffle_epi32(temp,0x39); -#else - high0_rev = (reverse_nt[low0 >> 16] | reverse_nt[low0 & 0x0000FFFF] << 16); - low0_rev = (reverse_nt[high0 >> 16] | reverse_nt[high0 & 0x0000FFFF] << 16); - high1_rev = (reverse_nt[low1 >> 16] | reverse_nt[low1 & 0x0000FFFF] << 16); - low1_rev = (reverse_nt[high1 >> 16] | reverse_nt[high1 & 0x0000FFFF] << 16); - - next = _mm_setr_epi32(low0_rev,high1_rev,low1_rev,nexthigh_rev); -#endif - -#if 0 - extract_6mers_fwd_simd(array,current,next); - count_fwdrev_simd(counts,inquery,(Genomecomp_T *) array); -#else - count_6mers_fwd_simd(counts,inquery,current,next); -#endif - } - - if (ptr > startptr + 3) { - ptr -= 3; - -#ifdef WORDS_BIGENDIAN - high0 = Bigendian_convert_uint(ref_blocks[ptr]); - low0 = Bigendian_convert_uint(ref_blocks[ptr+1]); - /* nextlow = Bigendian_convert_uint(ref_blocks[ptr+4]); */ -#else - high0 = ref_blocks[ptr]; - low0 = ref_blocks[ptr+1]; - /* nextlow = ref_blocks[ptr+4]; */ -#endif - if (mode == CMET_STRANDED) { - high0 = Cmet_reduce_ct(high0); low0 = Cmet_reduce_ct(low0); /* nextlow = Cmet_reduce_ct(nextlow); */ - } else if (mode == CMET_NONSTRANDED) { - if (genestrand > 0) { - high0 = Cmet_reduce_ct(high0); low0 = Cmet_reduce_ct(low0); /* nextlow = Cmet_reduce_ct(nextlow); */ - } else { - high0 = Cmet_reduce_ga(high0); low0 = Cmet_reduce_ga(low0); /* nextlow = Cmet_reduce_ga(nextlow); */ - } - } - - current = _mm_set_epi32(0,0,high0,low0); - current = _mm_or_si128(_mm_and_si128(_mm_srli_epi32(current,2),mask2),_mm_slli_epi32(_mm_and_si128(current,mask2),2)); /* Swap pairs */ - current = _mm_or_si128(_mm_and_si128(_mm_srli_epi32(current,4),mask4),_mm_slli_epi32(_mm_and_si128(current,mask4),4)); /* Swap nibbles */ -#ifdef HAVE_SSSE3 - current = _mm_shuffle_epi8(current,reverse8); /* Reverse bytes */ -#else - current = _mm_or_si128(_mm_and_si128(_mm_srli_epi32(current,8),mask8),_mm_slli_epi32(_mm_and_si128(current,mask8),8)); /* Swap bytes */ - current = _mm_or_si128(_mm_srli_epi32(current,16),_mm_slli_epi32(current,16)); /* Swap 16-bit quantities */ -#endif - - nexthigh_rev = high0_rev; -#ifdef HAVE_SSE4_1 - high0_rev = (unsigned int) _mm_extract_epi32(current,0); - low0_rev = (unsigned int) _mm_extract_epi32(current,1); -#else - high0_rev = (reverse_nt[low0 >> 16] | reverse_nt[low0 & 0x0000FFFF] << 16); - low0_rev = (reverse_nt[high0 >> 16] | reverse_nt[high0 & 0x0000FFFF] << 16); -#endif - - count_6mers_fwd(counts,inquery,high0_rev,low0_rev,nexthigh_rev); - } - - } else if (indexsize == 5) { -#ifdef HAVE_AVX2 - while (ptr > startptr + 12) { - ptr -= 12; - -#ifdef WORDS_BIGENDIAN - high0 = Bigendian_convert_uint(ref_blocks[ptr]); - low0 = Bigendian_convert_uint(ref_blocks[ptr+1]); - high1 = Bigendian_convert_uint(ref_blocks[ptr+3]); - low1 = Bigendian_convert_uint(ref_blocks[ptr+4]); - high2 = Bigendian_convert_uint(ref_blocks[ptr+6]); - low2 = Bigendian_convert_uint(ref_blocks[ptr+7]); - high3 = Bigendian_convert_uint(ref_blocks[ptr+9]); - low3 = Bigendian_convert_uint(ref_blocks[ptr+10]); - /* nextlow = Bigendian_convert_uint(ref_blocks[ptr+13]); */ -#else - high0 = ref_blocks[ptr]; - low0 = ref_blocks[ptr+1]; - high1 = ref_blocks[ptr+3]; - low1 = ref_blocks[ptr+4]; - high2 = ref_blocks[ptr+6]; - low2 = ref_blocks[ptr+7]; - high3 = ref_blocks[ptr+9]; - low3 = ref_blocks[ptr+10]; - /* nextlow = ref_blocks[ptr+13]; */ -#endif - if (mode == CMET_STRANDED) { - high0 = Cmet_reduce_ct(high0); low0 = Cmet_reduce_ct(low0); - high1 = Cmet_reduce_ct(high1); low1 = Cmet_reduce_ct(low1); - high2 = Cmet_reduce_ct(high2); low2 = Cmet_reduce_ct(low2); - high3 = Cmet_reduce_ct(high3); low3 = Cmet_reduce_ct(low3); - /* nextlow = Cmet_reduce_ct(nextlow); */ - } else if (mode == CMET_NONSTRANDED) { - if (genestrand > 0) { - high0 = Cmet_reduce_ct(high0); low0 = Cmet_reduce_ct(low0); - high1 = Cmet_reduce_ct(high1); low1 = Cmet_reduce_ct(low1); - high2 = Cmet_reduce_ct(high2); low2 = Cmet_reduce_ct(low2); - high3 = Cmet_reduce_ct(high3); low3 = Cmet_reduce_ct(low3); - /* nextlow = Cmet_reduce_ct(nextlow); */ - } else { - high0 = Cmet_reduce_ga(high0); low0 = Cmet_reduce_ga(low0); - high1 = Cmet_reduce_ga(high1); low1 = Cmet_reduce_ga(low1); - high2 = Cmet_reduce_ga(high2); low2 = Cmet_reduce_ga(low2); - high3 = Cmet_reduce_ga(high3); low3 = Cmet_reduce_ga(low3); - /* nextlow = Cmet_reduce_ga(nextlow); */ - } - } - - current256 = _mm256_set_epi32(high3,low3,high2,low2,high1,low1,high0,low0); - current256 = _mm256_or_si256(_mm256_and_si256(_mm256_srli_epi32(current256,2),bigmask2),_mm256_slli_epi32(_mm256_and_si256(current256,bigmask2),2)); /* Swap pairs */ - current256 = _mm256_or_si256(_mm256_and_si256(_mm256_srli_epi32(current256,4),bigmask4),_mm256_slli_epi32(_mm256_and_si256(current256,bigmask4),4)); /* Swap nibbles */ - current256 = _mm256_shuffle_epi8(current256,bigreverse8); /* Reverse bytes within 128-bit lanes */ - - nexthigh_rev = high0_rev; - - high0_rev = (unsigned int) _mm256_extract_epi32(current256,0); - /* low0_rev = (unsigned int) _mm_extract_epi32(current,1); */ - /* high1_rev = (unsigned int) _mm_extract_epi32(current,2); */ - /* low1_rev = (unsigned int) _mm_extract_epi32(current,3); */ - - temp256 = _mm256_insert_epi32(current256,nexthigh_rev,0x00); - next256 = _mm256_permutevar8x32_epi32(temp256,shift256); - - count_5mers_fwd_simd_128(counts,inquery,current256,next256); - } -#endif - - while (ptr > startptr + 6) { - ptr -= 6; - -#ifdef WORDS_BIGENDIAN - high0 = Bigendian_convert_uint(ref_blocks[ptr]); - low0 = Bigendian_convert_uint(ref_blocks[ptr+1]); - high1 = Bigendian_convert_uint(ref_blocks[ptr+3]); - low1 = Bigendian_convert_uint(ref_blocks[ptr+4]); - /* nextlow = Bigendian_convert_uint(ref_blocks[ptr+7]); */ -#else - high0 = ref_blocks[ptr]; - low0 = ref_blocks[ptr+1]; - high1 = ref_blocks[ptr+3]; - low1 = ref_blocks[ptr+4]; - /* nextlow = ref_blocks[ptr+7]; */ -#endif - if (mode == CMET_STRANDED) { - high0 = Cmet_reduce_ct(high0); low0 = Cmet_reduce_ct(low0); - high1 = Cmet_reduce_ct(high1); low1 = Cmet_reduce_ct(low1); - /* nextlow = Cmet_reduce_ct(nextlow); */ - } else if (mode == CMET_NONSTRANDED) { - if (genestrand > 0) { - high0 = Cmet_reduce_ct(high0); low0 = Cmet_reduce_ct(low0); - high1 = Cmet_reduce_ct(high1); low1 = Cmet_reduce_ct(low1); - /* nextlow = Cmet_reduce_ct(nextlow); */ - } else { - high0 = Cmet_reduce_ga(high0); low0 = Cmet_reduce_ga(low0); - high1 = Cmet_reduce_ga(high1); low1 = Cmet_reduce_ga(low1); - /* nextlow = Cmet_reduce_ga(nextlow); */ - } - } - - current = _mm_set_epi32(high1,low1,high0,low0); - current = _mm_or_si128(_mm_and_si128(_mm_srli_epi32(current,2),mask2),_mm_slli_epi32(_mm_and_si128(current,mask2),2)); /* Swap pairs */ - current = _mm_or_si128(_mm_and_si128(_mm_srli_epi32(current,4),mask4),_mm_slli_epi32(_mm_and_si128(current,mask4),4)); /* Swap nibbles */ -#ifdef HAVE_SSSE3 - current = _mm_shuffle_epi8(current,reverse8); /* Reverse bytes */ -#else - current = _mm_or_si128(_mm_and_si128(_mm_srli_epi32(current,8),mask8),_mm_slli_epi32(_mm_and_si128(current,mask8),8)); /* Swap bytes */ - current = _mm_or_si128(_mm_srli_epi32(current,16),_mm_slli_epi32(current,16)); /* Swap 16-bit quantities */ -#endif - - nexthigh_rev = high0_rev; -#ifdef HAVE_SSE4_1 - high0_rev = (unsigned int) _mm_extract_epi32(current,0); - /* low0_rev = (unsigned int) _mm_extract_epi32(current,1); */ - /* high1_rev = (unsigned int) _mm_extract_epi32(current,2); */ - /* low1_rev = (unsigned int) _mm_extract_epi32(current,3); */ - - temp = _mm_insert_epi32(current,nexthigh_rev,0x00); - next = _mm_shuffle_epi32(temp,0x39); -#else - high0_rev = (reverse_nt[low0 >> 16] | reverse_nt[low0 & 0x0000FFFF] << 16); - low0_rev = (reverse_nt[high0 >> 16] | reverse_nt[high0 & 0x0000FFFF] << 16); - high1_rev = (reverse_nt[low1 >> 16] | reverse_nt[low1 & 0x0000FFFF] << 16); - low1_rev = (reverse_nt[high1 >> 16] | reverse_nt[high1 & 0x0000FFFF] << 16); - - next = _mm_setr_epi32(low0_rev,high1_rev,low1_rev,nexthigh_rev); -#endif - -#if 0 - extract_5mers_fwd_simd(array,current,next); - count_fwdrev_simd(counts,inquery,(Genomecomp_T *) array); -#else - count_5mers_fwd_simd(counts,inquery,current,next); -#endif - } - - if (ptr > startptr + 3) { - ptr -= 3; - -#ifdef WORDS_BIGENDIAN - high0 = Bigendian_convert_uint(ref_blocks[ptr]); - low0 = Bigendian_convert_uint(ref_blocks[ptr+1]); - /* nextlow = Bigendian_convert_uint(ref_blocks[ptr+4]); */ -#else - high0 = ref_blocks[ptr]; - low0 = ref_blocks[ptr+1]; - /* nextlow = ref_blocks[ptr+4]; */ -#endif - if (mode == CMET_STRANDED) { - high0 = Cmet_reduce_ct(high0); low0 = Cmet_reduce_ct(low0); /* nextlow = Cmet_reduce_ct(nextlow); */ - } else if (mode == CMET_NONSTRANDED) { - if (genestrand > 0) { - high0 = Cmet_reduce_ct(high0); low0 = Cmet_reduce_ct(low0); /* nextlow = Cmet_reduce_ct(nextlow); */ - } else { - high0 = Cmet_reduce_ga(high0); low0 = Cmet_reduce_ga(low0); /* nextlow = Cmet_reduce_ga(nextlow); */ - } - } - - current = _mm_set_epi32(0,0,high0,low0); - current = _mm_or_si128(_mm_and_si128(_mm_srli_epi32(current,2),mask2),_mm_slli_epi32(_mm_and_si128(current,mask2),2)); /* Swap pairs */ - current = _mm_or_si128(_mm_and_si128(_mm_srli_epi32(current,4),mask4),_mm_slli_epi32(_mm_and_si128(current,mask4),4)); /* Swap nibbles */ -#ifdef HAVE_SSSE3 - current = _mm_shuffle_epi8(current,reverse8); /* Reverse bytes */ -#else - current = _mm_or_si128(_mm_and_si128(_mm_srli_epi32(current,8),mask8),_mm_slli_epi32(_mm_and_si128(current,mask8),8)); /* Swap bytes */ - current = _mm_or_si128(_mm_srli_epi32(current,16),_mm_slli_epi32(current,16)); /* Swap 16-bit quantities */ -#endif - - nexthigh_rev = high0_rev; -#ifdef HAVE_SSE4_1 - high0_rev = (unsigned int) _mm_extract_epi32(current,0); - low0_rev = (unsigned int) _mm_extract_epi32(current,1); -#else - high0_rev = (reverse_nt[low0 >> 16] | reverse_nt[low0 & 0x0000FFFF] << 16); - low0_rev = (reverse_nt[high0 >> 16] | reverse_nt[high0 & 0x0000FFFF] << 16); -#endif - - count_5mers_fwd(counts,inquery,high0_rev,low0_rev,nexthigh_rev); - } - - } else { - abort(); - } - - ptr -= 3; - - /* Start block */ - assert(ptr == startptr); - -#ifdef WORDS_BIGENDIAN - high0 = Bigendian_convert_uint(ref_blocks[ptr]); - low0 = Bigendian_convert_uint(ref_blocks[ptr+1]); - /* nextlow = Bigendian_convert_uint(ref_blocks[ptr+4]); */ -#else - high0 = ref_blocks[ptr]; - low0 = ref_blocks[ptr+1]; - /* nextlow = ref_blocks[ptr+4]; */ -#endif - if (mode == CMET_STRANDED) { - high0 = Cmet_reduce_ct(high0); low0 = Cmet_reduce_ct(low0); /* nextlow = Cmet_reduce_ct(nextlow); */ - } else if (mode == CMET_NONSTRANDED) { - if (genestrand > 0) { - high0 = Cmet_reduce_ct(high0); low0 = Cmet_reduce_ct(low0); /* nextlow = Cmet_reduce_ct(nextlow); */ - } else { - high0 = Cmet_reduce_ga(high0); low0 = Cmet_reduce_ga(low0); /* nextlow = Cmet_reduce_ga(nextlow); */ - } - } - - current = _mm_set_epi32(0,0,high0,low0); - current = _mm_or_si128(_mm_and_si128(_mm_srli_epi32(current,2),mask2),_mm_slli_epi32(_mm_and_si128(current,mask2),2)); /* Swap pairs */ - current = _mm_or_si128(_mm_and_si128(_mm_srli_epi32(current,4),mask4),_mm_slli_epi32(_mm_and_si128(current,mask4),4)); /* Swap nibbles */ -#ifdef HAVE_SSSE3 - current = _mm_shuffle_epi8(current,reverse8); /* Reverse bytes */ -#else - current = _mm_or_si128(_mm_and_si128(_mm_srli_epi32(current,8),mask8),_mm_slli_epi32(_mm_and_si128(current,mask8),8)); /* Swap bytes */ - current = _mm_or_si128(_mm_srli_epi32(current,16),_mm_slli_epi32(current,16)); /* Swap 16-bit quantities */ -#endif - - nexthigh_rev = high0_rev; -#ifdef HAVE_SSE4_1 - high0_rev = (unsigned int) _mm_extract_epi32(current,0); - low0_rev = (unsigned int) _mm_extract_epi32(current,1); -#else - high0_rev = (reverse_nt[low0 >> 16] | reverse_nt[low0 & 0x0000FFFF] << 16); - low0_rev = (reverse_nt[high0 >> 16] | reverse_nt[high0 & 0x0000FFFF] << 16); -#endif - - if (indexsize == 9) { - count_9mers_fwd_partial(counts,inquery,high0_rev,low0_rev,nexthigh_rev,startdiscard,/*enddiscard*/31); - } else if (indexsize == 8) { - count_8mers_fwd_partial(counts,inquery,high0_rev,low0_rev,nexthigh_rev,startdiscard,/*enddiscard*/31); - } else if (indexsize == 7) { - count_7mers_fwd_partial(counts,inquery,high0_rev,low0_rev,nexthigh_rev,startdiscard,/*enddiscard*/31); - } else if (indexsize == 6) { - count_6mers_fwd_partial(counts,inquery,high0_rev,low0_rev,nexthigh_rev,startdiscard,/*enddiscard*/31); - } else if (indexsize == 5) { - count_5mers_fwd_partial(counts,inquery,high0_rev,low0_rev,nexthigh_rev,startdiscard,/*enddiscard*/31); - } else { - fprintf(stderr,"indexsize %d not supported\n",indexsize); - abort(); - } - } - - return; -} - -#endif - - -#ifndef USE_SIMD_FOR_COUNTS -static void -store_positions_fwd_std (Chrpos_T *table, UINT4 *pointers, UINT4 *positions, Count_T *counts, int indexsize, - Univcoord_T left, Univcoord_T left_plus_length, Chrpos_T chrpos, - int genestrand) { - int startdiscard, enddiscard; - Genomecomp_T ptr, startptr, endptr, high_rev, low_rev, nexthigh_rev, - low, high, nextlow; - - - if (left_plus_length < (Univcoord_T) indexsize) { - left_plus_length = 0; - } else { - left_plus_length -= indexsize; - } - chrpos += (left_plus_length - left); /* We are starting from the right */ - - startptr = left/32U*3; - ptr = endptr = left_plus_length/32U*3; - startdiscard = left % 32; /* (left+pos5) % 32 */ - enddiscard = left_plus_length % 32; /* (left+pos3) % 32 */ - - if (left_plus_length <= left) { - /* Skip */ - - } else if (startptr == endptr) { -#ifdef WORDS_BIGENDIAN - high = Bigendian_convert_uint(ref_blocks[ptr]); - low = Bigendian_convert_uint(ref_blocks[ptr+1]); - nextlow = Bigendian_convert_uint(ref_blocks[ptr+4]); -#else - high = ref_blocks[ptr]; - low = ref_blocks[ptr+1]; - nextlow = ref_blocks[ptr+4]; -#endif - if (mode == CMET_STRANDED) { - high = Cmet_reduce_ct(high); low = Cmet_reduce_ct(low); nextlow = Cmet_reduce_ct(nextlow); - } else if (mode == CMET_NONSTRANDED) { - if (genestrand > 0) { - high = Cmet_reduce_ct(high); low = Cmet_reduce_ct(low); nextlow = Cmet_reduce_ct(nextlow); - } else { - high = Cmet_reduce_ga(high); low = Cmet_reduce_ga(low); nextlow = Cmet_reduce_ga(nextlow); - } - } - - high_rev = reverse_nt[low >> 16]; - high_rev |= (reverse_nt[low & 0x0000FFFF] << 16); - low_rev = reverse_nt[high >> 16]; - low_rev |= (reverse_nt[high & 0x0000FFFF] << 16); - nexthigh_rev = reverse_nt[nextlow >> 16]; - nexthigh_rev |= (reverse_nt[nextlow & 0x0000FFFF] << 16); - - if (indexsize == 9) { - chrpos = store_9mers_fwd_partial(chrpos,table,pointers,positions,counts,high_rev,low_rev,nexthigh_rev,startdiscard,enddiscard); - } else if (indexsize == 8) { - chrpos = store_8mers_fwd_partial(chrpos,table,pointers,positions,counts,high_rev,low_rev,nexthigh_rev,startdiscard,enddiscard); - } else if (indexsize == 7) { - chrpos = store_7mers_fwd_partial(chrpos,table,pointers,positions,counts,high_rev,low_rev,nexthigh_rev,startdiscard,enddiscard); - } else if (indexsize == 6) { - chrpos = store_6mers_fwd_partial(chrpos,table,pointers,positions,counts,high_rev,low_rev,nexthigh_rev,startdiscard,enddiscard); - } else if (indexsize == 5) { - chrpos = store_5mers_fwd_partial(chrpos,table,pointers,positions,counts,high_rev,low_rev,nexthigh_rev,startdiscard,enddiscard); - } else { - fprintf(stderr,"indexsize %d not supported\n",indexsize); - abort(); - } - - } else { - /* Genome_print_blocks(ref_blocks,left,left+16); */ - - /* End block */ -#ifdef WORDS_BIGENDIAN - high = Bigendian_convert_uint(ref_blocks[ptr]); - low = Bigendian_convert_uint(ref_blocks[ptr+1]); - nextlow = Bigendian_convert_uint(ref_blocks[ptr+4]); -#else - high = ref_blocks[ptr]; - low = ref_blocks[ptr+1]; - nextlow = ref_blocks[ptr+4]; -#endif - if (mode == CMET_STRANDED) { - high = Cmet_reduce_ct(high); low = Cmet_reduce_ct(low); nextlow = Cmet_reduce_ct(nextlow); - } else if (mode == CMET_NONSTRANDED) { - if (genestrand > 0) { - high = Cmet_reduce_ct(high); low = Cmet_reduce_ct(low); nextlow = Cmet_reduce_ct(nextlow); - } else { - high = Cmet_reduce_ga(high); low = Cmet_reduce_ga(low); nextlow = Cmet_reduce_ga(nextlow); - } - } - - high_rev = reverse_nt[low >> 16]; - high_rev |= (reverse_nt[low & 0x0000FFFF] << 16); - low_rev = reverse_nt[high >> 16]; - low_rev |= (reverse_nt[high & 0x0000FFFF] << 16); - nexthigh_rev = reverse_nt[nextlow >> 16]; - nexthigh_rev |= (reverse_nt[nextlow & 0x0000FFFF] << 16); - - if (indexsize == 9) { - chrpos = store_9mers_fwd_partial(chrpos,table,pointers,positions,counts,high_rev,low_rev,nexthigh_rev,/*startdiscard*/0,enddiscard); - } else if (indexsize == 8) { - chrpos = store_8mers_fwd_partial(chrpos,table,pointers,positions,counts,high_rev,low_rev,nexthigh_rev,/*startdiscard*/0,enddiscard); - } else if (indexsize == 7) { - chrpos = store_7mers_fwd_partial(chrpos,table,pointers,positions,counts,high_rev,low_rev,nexthigh_rev,/*startdiscard*/0,enddiscard); - } else if (indexsize == 6) { - chrpos = store_6mers_fwd_partial(chrpos,table,pointers,positions,counts,high_rev,low_rev,nexthigh_rev,/*startdiscard*/0,enddiscard); - } else if (indexsize == 5) { - chrpos = store_5mers_fwd_partial(chrpos,table,pointers,positions,counts,high_rev,low_rev,nexthigh_rev,/*startdiscard*/0,enddiscard); - } else { - abort(); - } - - if (indexsize == 9) { - while (ptr > startptr + 3) { - ptr -= 3; - -#ifdef WORDS_BIGENDIAN - high = Bigendian_convert_uint(ref_blocks[ptr]); - low = Bigendian_convert_uint(ref_blocks[ptr+1]); - nextlow = Bigendian_convert_uint(ref_blocks[ptr+4]); -#else - high = ref_blocks[ptr]; - low = ref_blocks[ptr+1]; - nextlow = ref_blocks[ptr+4]; -#endif - if (mode == CMET_STRANDED) { - high = Cmet_reduce_ct(high); low = Cmet_reduce_ct(low); nextlow = Cmet_reduce_ct(nextlow); - } else if (mode == CMET_NONSTRANDED) { - if (genestrand > 0) { - high = Cmet_reduce_ct(high); low = Cmet_reduce_ct(low); nextlow = Cmet_reduce_ct(nextlow); - } else { - high = Cmet_reduce_ga(high); low = Cmet_reduce_ga(low); nextlow = Cmet_reduce_ga(nextlow); - } - } - - high_rev = reverse_nt[low >> 16]; - high_rev |= (reverse_nt[low & 0x0000FFFF] << 16); - low_rev = reverse_nt[high >> 16]; - low_rev |= (reverse_nt[high & 0x0000FFFF] << 16); - nexthigh_rev = reverse_nt[nextlow >> 16]; - nexthigh_rev |= (reverse_nt[nextlow & 0x0000FFFF] << 16); - - chrpos = store_9mers_fwd(chrpos,table,pointers,positions,counts,high_rev,low_rev,nexthigh_rev); - } - - } else if (indexsize == 8) { - while (ptr > startptr + 3) { - ptr -= 3; - -#ifdef WORDS_BIGENDIAN - high = Bigendian_convert_uint(ref_blocks[ptr]); - low = Bigendian_convert_uint(ref_blocks[ptr+1]); - nextlow = Bigendian_convert_uint(ref_blocks[ptr+4]); -#else - high = ref_blocks[ptr]; - low = ref_blocks[ptr+1]; - nextlow = ref_blocks[ptr+4]; -#endif - if (mode == CMET_STRANDED) { - high = Cmet_reduce_ct(high); low = Cmet_reduce_ct(low); nextlow = Cmet_reduce_ct(nextlow); - } else if (mode == CMET_NONSTRANDED) { - if (genestrand > 0) { - high = Cmet_reduce_ct(high); low = Cmet_reduce_ct(low); nextlow = Cmet_reduce_ct(nextlow); - } else { - high = Cmet_reduce_ga(high); low = Cmet_reduce_ga(low); nextlow = Cmet_reduce_ga(nextlow); - } - } - - high_rev = reverse_nt[low >> 16]; - high_rev |= (reverse_nt[low & 0x0000FFFF] << 16); - low_rev = reverse_nt[high >> 16]; - low_rev |= (reverse_nt[high & 0x0000FFFF] << 16); - nexthigh_rev = reverse_nt[nextlow >> 16]; - nexthigh_rev |= (reverse_nt[nextlow & 0x0000FFFF] << 16); - - chrpos = store_8mers_fwd(chrpos,table,pointers,positions,counts,high_rev,low_rev,nexthigh_rev); - } - - } else if (indexsize == 7) { - while (ptr > startptr + 3) { - ptr -= 3; - -#ifdef WORDS_BIGENDIAN - high = Bigendian_convert_uint(ref_blocks[ptr]); - low = Bigendian_convert_uint(ref_blocks[ptr+1]); - nextlow = Bigendian_convert_uint(ref_blocks[ptr+4]); -#else - high = ref_blocks[ptr]; - low = ref_blocks[ptr+1]; - nextlow = ref_blocks[ptr+4]; -#endif - if (mode == CMET_STRANDED) { - high = Cmet_reduce_ct(high); low = Cmet_reduce_ct(low); nextlow = Cmet_reduce_ct(nextlow); - } else if (mode == CMET_NONSTRANDED) { - if (genestrand > 0) { - high = Cmet_reduce_ct(high); low = Cmet_reduce_ct(low); nextlow = Cmet_reduce_ct(nextlow); - } else { - high = Cmet_reduce_ga(high); low = Cmet_reduce_ga(low); nextlow = Cmet_reduce_ga(nextlow); - } - } - - high_rev = reverse_nt[low >> 16]; - high_rev |= (reverse_nt[low & 0x0000FFFF] << 16); - low_rev = reverse_nt[high >> 16]; - low_rev |= (reverse_nt[high & 0x0000FFFF] << 16); - nexthigh_rev = reverse_nt[nextlow >> 16]; - nexthigh_rev |= (reverse_nt[nextlow & 0x0000FFFF] << 16); - - chrpos = store_7mers_fwd(chrpos,table,pointers,positions,counts,high_rev,low_rev,nexthigh_rev); - } - - } else if (indexsize == 6) { - while (ptr > startptr + 3) { - ptr -= 3; - -#ifdef WORDS_BIGENDIAN - high = Bigendian_convert_uint(ref_blocks[ptr]); - low = Bigendian_convert_uint(ref_blocks[ptr+1]); - nextlow = Bigendian_convert_uint(ref_blocks[ptr+4]); -#else - high = ref_blocks[ptr]; - low = ref_blocks[ptr+1]; - nextlow = ref_blocks[ptr+4]; -#endif - if (mode == CMET_STRANDED) { - high = Cmet_reduce_ct(high); low = Cmet_reduce_ct(low); nextlow = Cmet_reduce_ct(nextlow); - } else if (mode == CMET_NONSTRANDED) { - if (genestrand > 0) { - high = Cmet_reduce_ct(high); low = Cmet_reduce_ct(low); nextlow = Cmet_reduce_ct(nextlow); - } else { - high = Cmet_reduce_ga(high); low = Cmet_reduce_ga(low); nextlow = Cmet_reduce_ga(nextlow); - } - } - - high_rev = reverse_nt[low >> 16]; - high_rev |= (reverse_nt[low & 0x0000FFFF] << 16); - low_rev = reverse_nt[high >> 16]; - low_rev |= (reverse_nt[high & 0x0000FFFF] << 16); - nexthigh_rev = reverse_nt[nextlow >> 16]; - nexthigh_rev |= (reverse_nt[nextlow & 0x0000FFFF] << 16); - - chrpos = store_6mers_fwd(chrpos,table,pointers,positions,counts,high_rev,low_rev,nexthigh_rev); - } - - } else if (indexsize == 5) { - while (ptr > startptr + 3) { - ptr -= 3; - -#ifdef WORDS_BIGENDIAN - high = Bigendian_convert_uint(ref_blocks[ptr]); - low = Bigendian_convert_uint(ref_blocks[ptr+1]); - nextlow = Bigendian_convert_uint(ref_blocks[ptr+4]); -#else - high = ref_blocks[ptr]; - low = ref_blocks[ptr+1]; - nextlow = ref_blocks[ptr+4]; -#endif - if (mode == CMET_STRANDED) { - high = Cmet_reduce_ct(high); low = Cmet_reduce_ct(low); nextlow = Cmet_reduce_ct(nextlow); - } else if (mode == CMET_NONSTRANDED) { - if (genestrand > 0) { - high = Cmet_reduce_ct(high); low = Cmet_reduce_ct(low); nextlow = Cmet_reduce_ct(nextlow); - } else { - high = Cmet_reduce_ga(high); low = Cmet_reduce_ga(low); nextlow = Cmet_reduce_ga(nextlow); - } - } - - high_rev = reverse_nt[low >> 16]; - high_rev |= (reverse_nt[low & 0x0000FFFF] << 16); - low_rev = reverse_nt[high >> 16]; - low_rev |= (reverse_nt[high & 0x0000FFFF] << 16); - nexthigh_rev = reverse_nt[nextlow >> 16]; - nexthigh_rev |= (reverse_nt[nextlow & 0x0000FFFF] << 16); - - chrpos = store_5mers_fwd(chrpos,table,pointers,positions,counts,high_rev,low_rev,nexthigh_rev); - } - } else { - abort(); - } - - ptr -= 3; - - /* Start block */ - assert(ptr == startptr); - -#ifdef WORDS_BIGENDIAN - high = Bigendian_convert_uint(ref_blocks[ptr]); - low = Bigendian_convert_uint(ref_blocks[ptr+1]); - nextlow = Bigendian_convert_uint(ref_blocks[ptr+4]); -#else - high = ref_blocks[ptr]; - low = ref_blocks[ptr+1]; - nextlow = ref_blocks[ptr+4]; -#endif - if (mode == CMET_STRANDED) { - high = Cmet_reduce_ct(high); low = Cmet_reduce_ct(low); nextlow = Cmet_reduce_ct(nextlow); - } else if (mode == CMET_NONSTRANDED) { - if (genestrand > 0) { - high = Cmet_reduce_ct(high); low = Cmet_reduce_ct(low); nextlow = Cmet_reduce_ct(nextlow); - } else { - high = Cmet_reduce_ga(high); low = Cmet_reduce_ga(low); nextlow = Cmet_reduce_ga(nextlow); - } - } - - high_rev = reverse_nt[low >> 16]; - high_rev |= (reverse_nt[low & 0x0000FFFF] << 16); - low_rev = reverse_nt[high >> 16]; - low_rev |= (reverse_nt[high & 0x0000FFFF] << 16); - nexthigh_rev = reverse_nt[nextlow >> 16]; - nexthigh_rev |= (reverse_nt[nextlow & 0x0000FFFF] << 16); - - if (indexsize == 9) { - chrpos = store_9mers_fwd_partial(chrpos,table,pointers,positions,counts,high_rev,low_rev,nexthigh_rev,startdiscard,/*enddiscard*/31); - } else if (indexsize == 8) { - chrpos = store_8mers_fwd_partial(chrpos,table,pointers,positions,counts,high_rev,low_rev,nexthigh_rev,startdiscard,/*enddiscard*/31); - } else if (indexsize == 7) { - chrpos = store_7mers_fwd_partial(chrpos,table,pointers,positions,counts,high_rev,low_rev,nexthigh_rev,startdiscard,/*enddiscard*/31); - } else if (indexsize == 6) { - chrpos = store_6mers_fwd_partial(chrpos,table,pointers,positions,counts,high_rev,low_rev,nexthigh_rev,startdiscard,/*enddiscard*/31); - } else if (indexsize == 5) { - chrpos = store_5mers_fwd_partial(chrpos,table,pointers,positions,counts,high_rev,low_rev,nexthigh_rev,startdiscard,/*enddiscard*/31); - } else { - fprintf(stderr,"indexsize %d not supported\n",indexsize); - abort(); - } - - } - - return; -} -#endif - - -#ifdef USE_SIMD_FOR_COUNTS +#ifdef HAVE_SSE2 static void -store_positions_fwd_simd (Chrpos_T *table, UINT4 *pointers, UINT4 *positions, Count_T *counts, int indexsize, - Univcoord_T left, Univcoord_T left_plus_length, Chrpos_T chrpos, - int genestrand) { +count_positions_fwd_simd (Count_T *counts, int indexsize, + Univcoord_T left, Univcoord_T left_plus_length, int genestrand) { int startdiscard, enddiscard; Genomecomp_T ptr, startptr, endptr, nexthigh_rev, nextlow; Genomecomp_T high0_rev, low0_rev, low0, high0, low1, high1; - __m128i current, next, mask2, mask4; + __m128i current, a, b, next, mask2, mask4; __m128i array[16]; #ifdef HAVE_SSSE3 __m128i reverse8; @@ -29367,19 +19791,24 @@ #ifdef HAVE_AVX2 __m256i array256[16]; Genomecomp_T low2, high2, low3, high3; - __m256i current256, next256, temp256, bigmask2, bigmask4, bigreverse8; + __m256i current256, a256, b256, c256, d256, next256, temp256, bigmask2, bigmask4, bigreverse8; __m256i shift256; #endif +#ifdef HAVE_AVX512 + __m512i array512[16]; + Genomecomp_T low4, high4, low5, high5, low6, high6, low7, high7; + __m512i current512, a512, b512, next512, temp512, hugemask2, hugemask4; + __m512i shift512; +#endif - debug(printf("Starting store_positions_fwd_simd\n")); + debug(printf("Starting count_positions_fwd_simd\n")); if (left_plus_length < (Univcoord_T) indexsize) { left_plus_length = 0; } else { left_plus_length -= indexsize; } - chrpos += (left_plus_length - left); /* We are starting from the right */ startptr = left/32U*3; ptr = endptr = left_plus_length/32U*3; @@ -29400,6 +19829,12 @@ 0x0C,0x0D,0x0E,0x0F, 0x08,0x09,0x0A,0x0B, 0x04,0x05,0x06,0x07, 0x00,0x01,0x02,0x03); shift256 = _mm256_setr_epi32(1,2,3,4,5,6,7,0); #endif +#ifdef HAVE_AVX512 + hugemask2 = _mm512_set1_epi32(0x33333333); + hugemask4 = _mm512_set1_epi32(0x0F0F0F0F); + /* hugereverse8 = _mm512_broadcast_i64x4(bigreverse8); */ + shift512 = _mm512_setr_epi32(1,2,3,4,5,6,7,8, 9,10,11,12,13,14,15,0); +#endif if (left_plus_length <= left) { /* Skip */ @@ -29414,7 +19849,10 @@ low0 = ref_blocks[ptr+1]; nextlow = ref_blocks[ptr+4]; #endif - if (mode == CMET_STRANDED) { + + if (mode == STANDARD) { + /* Skip */ + } else if (mode == CMET_STRANDED) { high0 = Cmet_reduce_ct(high0); low0 = Cmet_reduce_ct(low0); nextlow = Cmet_reduce_ct(nextlow); } else if (mode == CMET_NONSTRANDED) { if (genestrand > 0) { @@ -29422,6 +19860,22 @@ } else { high0 = Cmet_reduce_ga(high0); low0 = Cmet_reduce_ga(low0); nextlow = Cmet_reduce_ga(nextlow); } + } else if (mode == ATOI_STRANDED) { + high0 = Atoi_reduce_tc(high0); low0 = Atoi_reduce_tc(low0); nextlow = Atoi_reduce_tc(nextlow); + } else if (mode == ATOI_NONSTRANDED) { + if (genestrand > 0) { + high0 = Atoi_reduce_tc(high0); low0 = Atoi_reduce_tc(low0); nextlow = Atoi_reduce_tc(nextlow); + } else { + high0 = Atoi_reduce_ag(high0); low0 = Atoi_reduce_ag(low0); nextlow = Atoi_reduce_ag(nextlow); + } + } else if (mode == TTOC_STRANDED) { + high0 = Atoi_reduce_ag(high0); low0 = Atoi_reduce_ag(low0); nextlow = Atoi_reduce_ag(nextlow); + } else if (mode == TTOC_NONSTRANDED) { + if (genestrand > 0) { + high0 = Atoi_reduce_ag(high0); low0 = Atoi_reduce_ag(low0); nextlow = Atoi_reduce_ag(nextlow); + } else { + high0 = Atoi_reduce_tc(high0); low0 = Atoi_reduce_tc(low0); nextlow = Atoi_reduce_tc(nextlow); + } } current = _mm_set_epi32(0,nextlow,high0,low0); @@ -29434,6 +19888,7 @@ current = _mm_or_si128(_mm_srli_epi32(current,16),_mm_slli_epi32(current,16)); /* Swap 16-bit quantities */ #endif + /* nexthigh_rev = high0_rev; */ #ifdef HAVE_SSE4_1 high0_rev = (unsigned int) _mm_extract_epi32(current,0); low0_rev = (unsigned int) _mm_extract_epi32(current,1); @@ -29445,15 +19900,15 @@ #endif if (indexsize == 9) { - chrpos = store_9mers_fwd_partial(chrpos,table,pointers,positions,counts,high0_rev,low0_rev,nexthigh_rev,startdiscard,enddiscard); + count_9mers_fwd_partial(counts,high0_rev,low0_rev,nexthigh_rev,startdiscard,enddiscard); } else if (indexsize == 8) { - chrpos = store_8mers_fwd_partial(chrpos,table,pointers,positions,counts,high0_rev,low0_rev,nexthigh_rev,startdiscard,enddiscard); + count_8mers_fwd_partial(counts,high0_rev,low0_rev,nexthigh_rev,startdiscard,enddiscard); } else if (indexsize == 7) { - chrpos = store_7mers_fwd_partial(chrpos,table,pointers,positions,counts,high0_rev,low0_rev,nexthigh_rev,startdiscard,enddiscard); + count_7mers_fwd_partial(counts,high0_rev,low0_rev,nexthigh_rev,startdiscard,enddiscard); } else if (indexsize == 6) { - chrpos = store_6mers_fwd_partial(chrpos,table,pointers,positions,counts,high0_rev,low0_rev,nexthigh_rev,startdiscard,enddiscard); + count_6mers_fwd_partial(counts,high0_rev,low0_rev,nexthigh_rev,startdiscard,enddiscard); } else if (indexsize == 5) { - chrpos = store_5mers_fwd_partial(chrpos,table,pointers,positions,counts,high0_rev,low0_rev,nexthigh_rev,startdiscard,enddiscard); + count_5mers_fwd_partial(counts,high0_rev,low0_rev,nexthigh_rev,startdiscard,enddiscard); } else { fprintf(stderr,"indexsize %d not supported\n",indexsize); abort(); @@ -29472,7 +19927,10 @@ low0 = ref_blocks[ptr+1]; nextlow = ref_blocks[ptr+4]; #endif - if (mode == CMET_STRANDED) { + + if (mode == STANDARD) { + /* Skip */ + } else if (mode == CMET_STRANDED) { high0 = Cmet_reduce_ct(high0); low0 = Cmet_reduce_ct(low0); nextlow = Cmet_reduce_ct(nextlow); } else if (mode == CMET_NONSTRANDED) { if (genestrand > 0) { @@ -29480,6 +19938,22 @@ } else { high0 = Cmet_reduce_ga(high0); low0 = Cmet_reduce_ga(low0); nextlow = Cmet_reduce_ga(nextlow); } + } else if (mode == ATOI_STRANDED) { + high0 = Atoi_reduce_tc(high0); low0 = Atoi_reduce_tc(low0); nextlow = Atoi_reduce_tc(nextlow); + } else if (mode == ATOI_NONSTRANDED) { + if (genestrand > 0) { + high0 = Atoi_reduce_tc(high0); low0 = Atoi_reduce_tc(low0); nextlow = Atoi_reduce_tc(nextlow); + } else { + high0 = Atoi_reduce_ag(high0); low0 = Atoi_reduce_ag(low0); nextlow = Atoi_reduce_ag(nextlow); + } + } else if (mode == TTOC_STRANDED) { + high0 = Atoi_reduce_ag(high0); low0 = Atoi_reduce_ag(low0); nextlow = Atoi_reduce_ag(nextlow); + } else if (mode == TTOC_NONSTRANDED) { + if (genestrand > 0) { + high0 = Atoi_reduce_ag(high0); low0 = Atoi_reduce_ag(low0); nextlow = Atoi_reduce_ag(nextlow); + } else { + high0 = Atoi_reduce_tc(high0); low0 = Atoi_reduce_tc(low0); nextlow = Atoi_reduce_tc(nextlow); + } } current = _mm_set_epi32(0,nextlow,high0,low0); @@ -29492,6 +19966,7 @@ current = _mm_or_si128(_mm_srli_epi32(current,16),_mm_slli_epi32(current,16)); /* Swap 16-bit quantities */ #endif + /* nexthigh_rev = high0_rev; */ #ifdef HAVE_SSE4_1 high0_rev = (unsigned int) _mm_extract_epi32(current,0); low0_rev = (unsigned int) _mm_extract_epi32(current,1); @@ -29503,902 +19978,1322 @@ #endif if (indexsize == 9) { - chrpos = store_9mers_fwd_partial(chrpos,table,pointers,positions,counts,high0_rev,low0_rev,nexthigh_rev,/*startdiscard*/0,enddiscard); + count_9mers_fwd_partial(counts,high0_rev,low0_rev,nexthigh_rev,/*startdiscard*/0,enddiscard); } else if (indexsize == 8) { - chrpos = store_8mers_fwd_partial(chrpos,table,pointers,positions,counts,high0_rev,low0_rev,nexthigh_rev,/*startdiscard*/0,enddiscard); + count_8mers_fwd_partial(counts,high0_rev,low0_rev,nexthigh_rev,/*startdiscard*/0,enddiscard); } else if (indexsize == 7) { - chrpos = store_7mers_fwd_partial(chrpos,table,pointers,positions,counts,high0_rev,low0_rev,nexthigh_rev,/*startdiscard*/0,enddiscard); + count_7mers_fwd_partial(counts,high0_rev,low0_rev,nexthigh_rev,/*startdiscard*/0,enddiscard); } else if (indexsize == 6) { - chrpos = store_6mers_fwd_partial(chrpos,table,pointers,positions,counts,high0_rev,low0_rev,nexthigh_rev,/*startdiscard*/0,enddiscard); + count_6mers_fwd_partial(counts,high0_rev,low0_rev,nexthigh_rev,/*startdiscard*/0,enddiscard); } else if (indexsize == 5) { - chrpos = store_5mers_fwd_partial(chrpos,table,pointers,positions,counts,high0_rev,low0_rev,nexthigh_rev,/*startdiscard*/0,enddiscard); + count_5mers_fwd_partial(counts,high0_rev,low0_rev,nexthigh_rev,/*startdiscard*/0,enddiscard); } else { abort(); } /* Middle blocks */ - if (indexsize == 9) { +#ifdef HAVE_AVX512 + while (ptr > startptr + 24) { + ptr -= 24; + + if (mode == STANDARD) { + a512 = _mm512_loadu_si512((__m512i *) &(ref_blocks[ptr])); + b512 = _mm512_loadu_si512((__m512i *) &(ref_blocks[ptr+7])); + current512 = _mm512_permutex2var_epi32(a512,_mm512_setr_epi32(1, 0, 4, 3, 7, 6, 10, 9, 13, 12, 16+9, 16+8, 16+12, 16+11, 16+15, 16+14), b512); + } else { + current512 = apply_mode_fwd_512(&(ref_blocks[ptr]),mode,genestrand); + } + + current512 = _mm512_or_si512(_mm512_and_si512(_mm512_srli_epi32(current512,2),hugemask2),_mm512_slli_epi32(_mm512_and_si512(current512,hugemask2),2)); /* Swap pairs */ + current512 = _mm512_or_si512(_mm512_and_si512(_mm512_srli_epi32(current512,4),hugemask4),_mm512_slli_epi32(_mm512_and_si512(current512,hugemask4),4)); /* Swap nibbles */ +#ifdef HAVE_AVX512BW + current512 = _mm512_shuffle_epi8(current512,hugereverse8); /* Reverse bytes within 128-bit lanes*/ +#else + /* Reverse bytes within 128-bit lanes*/ + current256 = _mm256_shuffle_epi8(_mm512_extracti64x4_epi64(current512,0x0),bigreverse8); + next256 = _mm256_shuffle_epi8(_mm512_extracti64x4_epi64(current512,0x1),bigreverse8); + current512 = _mm512_broadcast_i64x4(next256); + current512 = _mm512_inserti64x4(current512,current256,0x0); +#endif + + nexthigh_rev = high0_rev; /* Take from previous loop */ + + current = _mm512_extracti32x4_epi32(current512,0); + high0_rev = (unsigned int) _mm_extract_epi32(current, 0); /* Generate for next loop */ + + temp = _mm_insert_epi32(current,nexthigh_rev,0x00); + temp512 = _mm512_inserti32x4(current512,temp,0x00); + next512 = _mm512_permutexvar_epi32(shift512,temp512); /* shift goes first! */ + + if (indexsize == 9) { + extract_9mers_fwd_simd_256(array512,current512,next512); + } else if (indexsize == 8) { + extract_8mers_fwd_simd_256(array512,current512,next512); + } else if (indexsize == 7) { + extract_7mers_fwd_simd_256(array512,current512,next512); + } else if (indexsize == 6) { + extract_6mers_fwd_simd_256(array512,current512,next512); + } else if (indexsize == 5) { + extract_5mers_fwd_simd_256(array512,current512,next512); + } else { + abort(); + } + count_fwdrev_simd_n(counts,(Genomecomp_T *) array512,256); + } +#endif + + #ifdef HAVE_AVX2 - while (ptr > startptr + 12) { - ptr -= 12; + while (ptr > startptr + 12) { + ptr -= 12; + + if (mode == STANDARD) { + a256 = _mm256_loadu_si256((__m256i *) &(ref_blocks[ptr])); + b256 = _mm256_loadu_si256((__m256i *) &(ref_blocks[ptr+3])); + c256 = _mm256_unpacklo_epi64(a256,b256); + d256 = _mm256_unpackhi_epi64(a256,b256); + current256 = _mm256_permute2x128_si256(c256, d256, 0x30); + current256 = _mm256_shuffle_epi32(current256, 0xB1); /* 0b10110001 */ + } else { + current256 = apply_mode_fwd_256(&(ref_blocks[ptr]),mode,genestrand); + } + + current256 = _mm256_or_si256(_mm256_and_si256(_mm256_srli_epi32(current256,2),bigmask2),_mm256_slli_epi32(_mm256_and_si256(current256,bigmask2),2)); /* Swap pairs */ + current256 = _mm256_or_si256(_mm256_and_si256(_mm256_srli_epi32(current256,4),bigmask4),_mm256_slli_epi32(_mm256_and_si256(current256,bigmask4),4)); /* Swap nibbles */ + current256 = _mm256_shuffle_epi8(current256,bigreverse8); /* Reverse bytes within 128-bit lanes*/ + + nexthigh_rev = high0_rev; /* Take from previous loop */ + high0_rev = (unsigned int) _mm256_extract_epi32(current256,0); /* Generate for next loop */ + +#if 0 + /* Doesn't work, because performs shift within 128-bit lanes */ + next256 = _mm256_alignr_epi8(_mm256_set1_epi32(nexthigh_rev),current256,4); +#else + temp256 = _mm256_insert_epi32(current256,nexthigh_rev,0x00); + next256 = _mm256_permutevar8x32_epi32(temp256,shift256); +#endif + + if (indexsize == 9) { + extract_9mers_fwd_simd_128(array256,current256,next256); + } else if (indexsize == 8) { + extract_8mers_fwd_simd_128(array256,current256,next256); + } else if (indexsize == 7) { + extract_7mers_fwd_simd_128(array256,current256,next256); + } else if (indexsize == 6) { + extract_6mers_fwd_simd_128(array256,current256,next256); + } else if (indexsize == 5) { + extract_5mers_fwd_simd_128(array256,current256,next256); + } else { + abort(); + } + count_fwdrev_simd_n(counts,(Genomecomp_T *) array256,128); + } +#endif + + while (ptr > startptr + 6) { + ptr -= 6; +#ifdef HAVE_SSSE3 + if (mode == STANDARD) { + a = _mm_loadu_si128((__m128i *) &(ref_blocks[ptr])); + b = _mm_loadu_si128((__m128i *) &(ref_blocks[ptr+3])); + current = _mm_unpacklo_epi64(a,b); + current = _mm_shuffle_epi32(current, 0xB1); /* 0b10110001 */ +#ifndef HAVE_SSE4_1 #ifdef WORDS_BIGENDIAN - high0 = Bigendian_convert_uint(ref_blocks[ptr]); low0 = Bigendian_convert_uint(ref_blocks[ptr+1]); - high1 = Bigendian_convert_uint(ref_blocks[ptr+3]); - low1 = Bigendian_convert_uint(ref_blocks[ptr+4]); - high2 = Bigendian_convert_uint(ref_blocks[ptr+6]); - low2 = Bigendian_convert_uint(ref_blocks[ptr+7]); - high3 = Bigendian_convert_uint(ref_blocks[ptr+9]); - low3 = Bigendian_convert_uint(ref_blocks[ptr+10]); - /* nextlow = Bigendian_convert_uint(ref_blocks[ptr+13]); */ #else - high0 = ref_blocks[ptr]; low0 = ref_blocks[ptr+1]; - high1 = ref_blocks[ptr+3]; - low1 = ref_blocks[ptr+4]; - high2 = ref_blocks[ptr+6]; - low2 = ref_blocks[ptr+7]; - high3 = ref_blocks[ptr+9]; - low3 = ref_blocks[ptr+10]; - /* nextlow = ref_blocks[ptr+13]; */ #endif - if (mode == CMET_STRANDED) { +#endif + + } else { +#ifdef WORDS_BIGENDIAN + high0 = Bigendian_convert_uint(ref_blocks[ptr]); low0 = Bigendian_convert_uint(ref_blocks[ptr+1]); + high1 = Bigendian_convert_uint(ref_blocks[ptr+3]); low1 = Bigendian_convert_uint(ref_blocks[ptr+4]); + nextlow = Bigendian_convert_uint(ref_blocks[ptr+7]); +#else + high0 = ref_blocks[ptr]; low0 = ref_blocks[ptr+1]; + high1 = ref_blocks[ptr+3]; low1 = ref_blocks[ptr+4]; + nextlow = ref_blocks[ptr+7]; +#endif + + if (mode == STANDARD) { + /* Skip */ + } else if (mode == CMET_STRANDED) { high0 = Cmet_reduce_ct(high0); low0 = Cmet_reduce_ct(low0); high1 = Cmet_reduce_ct(high1); low1 = Cmet_reduce_ct(low1); - high2 = Cmet_reduce_ct(high2); low2 = Cmet_reduce_ct(low2); - high3 = Cmet_reduce_ct(high3); low3 = Cmet_reduce_ct(low3); - /* nextlow = Cmet_reduce_ct(nextlow); */ + nextlow = Cmet_reduce_ct(nextlow); } else if (mode == CMET_NONSTRANDED) { if (genestrand > 0) { high0 = Cmet_reduce_ct(high0); low0 = Cmet_reduce_ct(low0); high1 = Cmet_reduce_ct(high1); low1 = Cmet_reduce_ct(low1); - high2 = Cmet_reduce_ct(high2); low2 = Cmet_reduce_ct(low2); - high3 = Cmet_reduce_ct(high3); low3 = Cmet_reduce_ct(low3); - /* nextlow = Cmet_reduce_ct(nextlow); */ + nextlow = Cmet_reduce_ct(nextlow); } else { high0 = Cmet_reduce_ga(high0); low0 = Cmet_reduce_ga(low0); high1 = Cmet_reduce_ga(high1); low1 = Cmet_reduce_ga(low1); - high2 = Cmet_reduce_ga(high2); low2 = Cmet_reduce_ga(low2); - high3 = Cmet_reduce_ga(high3); low3 = Cmet_reduce_ga(low3); - /* nextlow = Cmet_reduce_ga(nextlow); */ + nextlow = Cmet_reduce_ga(nextlow); } - } - current256 = _mm256_set_epi32(high3,low3,high2,low2,high1,low1,high0,low0); - current256 = _mm256_or_si256(_mm256_and_si256(_mm256_srli_epi32(current256,2),bigmask2),_mm256_slli_epi32(_mm256_and_si256(current256,bigmask2),2)); /* Swap pairs */ - current256 = _mm256_or_si256(_mm256_and_si256(_mm256_srli_epi32(current256,4),bigmask4),_mm256_slli_epi32(_mm256_and_si256(current256,bigmask4),4)); /* Swap nibbles */ - current256 = _mm256_shuffle_epi8(current256,bigreverse8); /* Reverse bytes within 128-bit lanes */ - - nexthigh_rev = high0_rev; - - high0_rev = (unsigned int) _mm256_extract_epi32(current256,0); - /* low0_rev = (unsigned int) _mm_extract_epi32(current,1); */ - /* high1_rev = (unsigned int) _mm_extract_epi32(current,2); */ - /* low1_rev = (unsigned int) _mm_extract_epi32(current,3); */ + } else if (mode == ATOI_STRANDED) { + high0 = Atoi_reduce_tc(high0); low0 = Atoi_reduce_tc(low0); + high1 = Atoi_reduce_tc(high1); low1 = Atoi_reduce_tc(low1); + nextlow = Atoi_reduce_tc(nextlow); + } else if (mode == ATOI_NONSTRANDED) { + if (genestrand > 0) { + high0 = Atoi_reduce_tc(high0); low0 = Atoi_reduce_tc(low0); + high1 = Atoi_reduce_tc(high1); low1 = Atoi_reduce_tc(low1); + nextlow = Atoi_reduce_tc(nextlow); + } else { + high0 = Atoi_reduce_ag(high0); low0 = Atoi_reduce_ag(low0); + high1 = Atoi_reduce_ag(high1); low1 = Atoi_reduce_ag(low1); + nextlow = Atoi_reduce_ag(nextlow); + } - temp256 = _mm256_insert_epi32(current256,nexthigh_rev,0x00); - next256 = _mm256_permutevar8x32_epi32(temp256,shift256); + } else if (mode == TTOC_STRANDED) { + high0 = Atoi_reduce_ag(high0); low0 = Atoi_reduce_ag(low0); + high1 = Atoi_reduce_ag(high1); low1 = Atoi_reduce_ag(low1); + nextlow = Atoi_reduce_ag(nextlow); + } else if (mode == TTOC_NONSTRANDED) { + if (genestrand > 0) { + high0 = Atoi_reduce_ag(high0); low0 = Atoi_reduce_ag(low0); + high1 = Atoi_reduce_ag(high1); low1 = Atoi_reduce_ag(low1); + nextlow = Atoi_reduce_ag(nextlow); + } else { + high0 = Atoi_reduce_tc(high0); low0 = Atoi_reduce_tc(low0); + high1 = Atoi_reduce_tc(high1); low1 = Atoi_reduce_tc(low1); + nextlow = Atoi_reduce_tc(nextlow); + } + } - extract_9mers_fwd_simd_128(array256,current256,next256); - chrpos = store_fwdrev_simd_128(chrpos,table,pointers,positions,counts,(Genomecomp_T *) array256); + current = _mm_set_epi32(high1,low1,high0,low0); } + + current = _mm_or_si128(_mm_and_si128(_mm_srli_epi32(current,2),mask2),_mm_slli_epi32(_mm_and_si128(current,mask2),2)); /* Swap pairs */ + current = _mm_or_si128(_mm_and_si128(_mm_srli_epi32(current,4),mask4),_mm_slli_epi32(_mm_and_si128(current,mask4),4)); /* Swap nibbles */ + current = _mm_shuffle_epi8(current,reverse8); /* Reverse bytes */ + + nexthigh_rev = high0_rev; /* Take from previous loop */ +#ifdef HAVE_SSE4_1 + high0_rev = (unsigned int) _mm_extract_epi32(current,0); /* Generate for next loop (SSE4.1 and higher) */ +#else + high0_rev = (reverse_nt[low0 >> 16] | reverse_nt[low0 & 0x0000FFFF] << 16); /* Generate for next loop (SSSE3) */ #endif - while (ptr > startptr + 6) { - ptr -= 6; +#if 1 + next = _mm_alignr_epi8(_mm_set1_epi32(nexthigh_rev),current,4); +#else + /* Previous solution for SSE4.1 */ + temp = _mm_insert_epi32(current,nexthigh_rev,0x00); + next = _mm_shuffle_epi32(temp,0x39); +#endif + +#else + /* Non-SSSE3 */ #ifdef WORDS_BIGENDIAN - high0 = Bigendian_convert_uint(ref_blocks[ptr]); - low0 = Bigendian_convert_uint(ref_blocks[ptr+1]); - high1 = Bigendian_convert_uint(ref_blocks[ptr+3]); - low1 = Bigendian_convert_uint(ref_blocks[ptr+4]); - /* nextlow = Bigendian_convert_uint(ref_blocks[ptr+7]); */ + high0 = Bigendian_convert_uint(ref_blocks[ptr]); low0 = Bigendian_convert_uint(ref_blocks[ptr+1]); + high1 = Bigendian_convert_uint(ref_blocks[ptr+3]); low1 = Bigendian_convert_uint(ref_blocks[ptr+4]); + nextlow = Bigendian_convert_uint(ref_blocks[ptr+7]); #else - high0 = ref_blocks[ptr]; - low0 = ref_blocks[ptr+1]; - high1 = ref_blocks[ptr+3]; - low1 = ref_blocks[ptr+4]; - /* nextlow = ref_blocks[ptr+7]; */ + high0 = ref_blocks[ptr]; low0 = ref_blocks[ptr+1]; + high1 = ref_blocks[ptr+3]; low1 = ref_blocks[ptr+4]; + nextlow = ref_blocks[ptr+7]; #endif - if (mode == CMET_STRANDED) { + + if (mode == STANDARD) { + /* Skip */ + } else if (mode == CMET_STRANDED) { + high0 = Cmet_reduce_ct(high0); low0 = Cmet_reduce_ct(low0); + high1 = Cmet_reduce_ct(high1); low1 = Cmet_reduce_ct(low1); + nextlow = Cmet_reduce_ct(nextlow); + } else if (mode == CMET_NONSTRANDED) { + if (genestrand > 0) { high0 = Cmet_reduce_ct(high0); low0 = Cmet_reduce_ct(low0); high1 = Cmet_reduce_ct(high1); low1 = Cmet_reduce_ct(low1); - /* nextlow = Cmet_reduce_ct(nextlow); */ - } else if (mode == CMET_NONSTRANDED) { - if (genestrand > 0) { - high0 = Cmet_reduce_ct(high0); low0 = Cmet_reduce_ct(low0); - high1 = Cmet_reduce_ct(high1); low1 = Cmet_reduce_ct(low1); - /* nextlow = Cmet_reduce_ct(nextlow); */ - } else { - high0 = Cmet_reduce_ga(high0); low0 = Cmet_reduce_ga(low0); - high1 = Cmet_reduce_ga(high1); low1 = Cmet_reduce_ga(low1); - /* nextlow = Cmet_reduce_ga(nextlow); */ - } + nextlow = Cmet_reduce_ct(nextlow); + } else { + high0 = Cmet_reduce_ga(high0); low0 = Cmet_reduce_ga(low0); + high1 = Cmet_reduce_ga(high1); low1 = Cmet_reduce_ga(low1); + nextlow = Cmet_reduce_ga(nextlow); } - current = _mm_set_epi32(high1,low1,high0,low0); - current = _mm_or_si128(_mm_and_si128(_mm_srli_epi32(current,2),mask2),_mm_slli_epi32(_mm_and_si128(current,mask2),2)); /* Swap pairs */ - current = _mm_or_si128(_mm_and_si128(_mm_srli_epi32(current,4),mask4),_mm_slli_epi32(_mm_and_si128(current,mask4),4)); /* Swap nibbles */ -#ifdef HAVE_SSSE3 - current = _mm_shuffle_epi8(current,reverse8); /* Reverse bytes */ -#else - current = _mm_or_si128(_mm_and_si128(_mm_srli_epi32(current,8),mask8),_mm_slli_epi32(_mm_and_si128(current,mask8),8)); /* Swap bytes */ - current = _mm_or_si128(_mm_srli_epi32(current,16),_mm_slli_epi32(current,16)); /* Swap 16-bit quantities */ + } else if (mode == ATOI_STRANDED) { + high0 = Atoi_reduce_tc(high0); low0 = Atoi_reduce_tc(low0); + high1 = Atoi_reduce_tc(high1); low1 = Atoi_reduce_tc(low1); + nextlow = Atoi_reduce_tc(nextlow); + } else if (mode == ATOI_NONSTRANDED) { + if (genestrand > 0) { + high0 = Atoi_reduce_tc(high0); low0 = Atoi_reduce_tc(low0); + high1 = Atoi_reduce_tc(high1); low1 = Atoi_reduce_tc(low1); + nextlow = Atoi_reduce_tc(nextlow); + } else { + high0 = Atoi_reduce_ag(high0); low0 = Atoi_reduce_ag(low0); + high1 = Atoi_reduce_ag(high1); low1 = Atoi_reduce_ag(low1); + nextlow = Atoi_reduce_ag(nextlow); + } + + } else if (mode == TTOC_STRANDED) { + high0 = Atoi_reduce_ag(high0); low0 = Atoi_reduce_ag(low0); + high1 = Atoi_reduce_ag(high1); low1 = Atoi_reduce_ag(low1); + nextlow = Atoi_reduce_ag(nextlow); + } else if (mode == TTOC_NONSTRANDED) { + if (genestrand > 0) { + high0 = Atoi_reduce_ag(high0); low0 = Atoi_reduce_ag(low0); + high1 = Atoi_reduce_ag(high1); low1 = Atoi_reduce_ag(low1); + nextlow = Atoi_reduce_ag(nextlow); + } else { + high0 = Atoi_reduce_tc(high0); low0 = Atoi_reduce_tc(low0); + high1 = Atoi_reduce_tc(high1); low1 = Atoi_reduce_tc(low1); + nextlow = Atoi_reduce_tc(nextlow); + } + } + + current = _mm_set_epi32(high1,low1,high0,low0); + + current = _mm_or_si128(_mm_and_si128(_mm_srli_epi32(current,2),mask2),_mm_slli_epi32(_mm_and_si128(current,mask2),2)); /* Swap pairs */ + current = _mm_or_si128(_mm_and_si128(_mm_srli_epi32(current,4),mask4),_mm_slli_epi32(_mm_and_si128(current,mask4),4)); /* Swap nibbles */ + current = _mm_or_si128(_mm_and_si128(_mm_srli_epi32(current,8),mask8),_mm_slli_epi32(_mm_and_si128(current,mask8),8)); /* Swap bytes */ + current = _mm_or_si128(_mm_srli_epi32(current,16),_mm_slli_epi32(current,16)); /* Swap 16-bit quantities */ + + nexthigh_rev = high0_rev; /* Take from previous loop */ + + high0_rev = (reverse_nt[low0 >> 16] | reverse_nt[low0 & 0x0000FFFF] << 16); /* Generate for next loop */ + low0_rev = (reverse_nt[high0 >> 16] | reverse_nt[high0 & 0x0000FFFF] << 16); + high1_rev = (reverse_nt[low1 >> 16] | reverse_nt[low1 & 0x0000FFFF] << 16); + low1_rev = (reverse_nt[high1 >> 16] | reverse_nt[high1 & 0x0000FFFF] << 16); + + next = _mm_setr_epi32(low0_rev,high1_rev,low1_rev,nexthigh_rev); #endif - nexthigh_rev = high0_rev; -#ifdef HAVE_SSE4_1 - high0_rev = (unsigned int) _mm_extract_epi32(current,0); - /* low0_rev = (unsigned int) _mm_extract_epi32(current,1); */ - /* high1_rev = (unsigned int) _mm_extract_epi32(current,2); */ - /* low1_rev = (unsigned int) _mm_extract_epi32(current,3); */ + if (indexsize == 9) { + extract_9mers_fwd_simd_64(array,current,next); + } else if (indexsize == 8) { + extract_8mers_fwd_simd_64(array,current,next); + } else if (indexsize == 7) { + extract_7mers_fwd_simd_64(array,current,next); + } else if (indexsize == 6) { + extract_6mers_fwd_simd_64(array,current,next); + } else if (indexsize == 5) { + extract_5mers_fwd_simd_64(array,current,next); + } else { + abort(); + } + count_fwdrev_simd_n(counts,(Genomecomp_T *) array,64); + } + + if (ptr > startptr + 3) { + ptr -= 3; - temp = _mm_insert_epi32(current,nexthigh_rev,0x00); - next = _mm_shuffle_epi32(temp,0x39); +#ifdef WORDS_BIGENDIAN + high0 = Bigendian_convert_uint(ref_blocks[ptr]); + low0 = Bigendian_convert_uint(ref_blocks[ptr+1]); + nextlow = Bigendian_convert_uint(ref_blocks[ptr+4]); #else - high0_rev = (reverse_nt[low0 >> 16] | reverse_nt[low0 & 0x0000FFFF] << 16); - low0_rev = (reverse_nt[high0 >> 16] | reverse_nt[high0 & 0x0000FFFF] << 16); - high1_rev = (reverse_nt[low1 >> 16] | reverse_nt[low1 & 0x0000FFFF] << 16); - low1_rev = (reverse_nt[high1 >> 16] | reverse_nt[high1 & 0x0000FFFF] << 16); + high0 = ref_blocks[ptr]; + low0 = ref_blocks[ptr+1]; + nextlow = ref_blocks[ptr+4]; +#endif + + if (mode == STANDARD) { + /* Skip */ + } else if (mode == CMET_STRANDED) { + high0 = Cmet_reduce_ct(high0); low0 = Cmet_reduce_ct(low0); nextlow = Cmet_reduce_ct(nextlow); + } else if (mode == CMET_NONSTRANDED) { + if (genestrand > 0) { + high0 = Cmet_reduce_ct(high0); low0 = Cmet_reduce_ct(low0); nextlow = Cmet_reduce_ct(nextlow); + } else { + high0 = Cmet_reduce_ga(high0); low0 = Cmet_reduce_ga(low0); nextlow = Cmet_reduce_ga(nextlow); + } + } else if (mode == ATOI_STRANDED) { + high0 = Atoi_reduce_tc(high0); low0 = Atoi_reduce_tc(low0); nextlow = Atoi_reduce_tc(nextlow); + } else if (mode == ATOI_NONSTRANDED) { + if (genestrand > 0) { + high0 = Atoi_reduce_tc(high0); low0 = Atoi_reduce_tc(low0); nextlow = Atoi_reduce_tc(nextlow); + } else { + high0 = Atoi_reduce_ag(high0); low0 = Atoi_reduce_ag(low0); nextlow = Atoi_reduce_ag(nextlow); + } + } else if (mode == TTOC_STRANDED) { + high0 = Atoi_reduce_ag(high0); low0 = Atoi_reduce_ag(low0); nextlow = Atoi_reduce_ag(nextlow); + } else if (mode == TTOC_NONSTRANDED) { + if (genestrand > 0) { + high0 = Atoi_reduce_ag(high0); low0 = Atoi_reduce_ag(low0); nextlow = Atoi_reduce_ag(nextlow); + } else { + high0 = Atoi_reduce_tc(high0); low0 = Atoi_reduce_tc(low0); nextlow = Atoi_reduce_tc(nextlow); + } + } - next = _mm_setr_epi32(low0_rev,high1_rev,low1_rev,nexthigh_rev); + current = _mm_set_epi32(0,nextlow,high0,low0); + current = _mm_or_si128(_mm_and_si128(_mm_srli_epi32(current,2),mask2),_mm_slli_epi32(_mm_and_si128(current,mask2),2)); /* Swap pairs */ + current = _mm_or_si128(_mm_and_si128(_mm_srli_epi32(current,4),mask4),_mm_slli_epi32(_mm_and_si128(current,mask4),4)); /* Swap nibbles */ +#ifdef HAVE_SSSE3 + current = _mm_shuffle_epi8(current,reverse8); /* Reverse bytes */ +#else + current = _mm_or_si128(_mm_and_si128(_mm_srli_epi32(current,8),mask8),_mm_slli_epi32(_mm_and_si128(current,mask8),8)); /* Swap bytes */ + current = _mm_or_si128(_mm_srli_epi32(current,16),_mm_slli_epi32(current,16)); /* Swap 16-bit quantities */ #endif - extract_9mers_fwd_simd(array,current,next); - chrpos = store_fwdrev_simd(chrpos,table,pointers,positions,counts,(Genomecomp_T *) array); + /* nexthigh_rev = high0_rev; */ +#ifdef HAVE_SSE4_1 + high0_rev = (unsigned int) _mm_extract_epi32(current,0); + low0_rev = (unsigned int) _mm_extract_epi32(current,1); + nexthigh_rev = (unsigned int) _mm_extract_epi32(current,2); +#else + high0_rev = (reverse_nt[low0 >> 16] | reverse_nt[low0 & 0x0000FFFF] << 16); + low0_rev = (reverse_nt[high0 >> 16] | reverse_nt[high0 & 0x0000FFFF] << 16); + nexthigh_rev = (reverse_nt[nextlow >> 16] | reverse_nt[nextlow & 0x0000FFFF] << 16); +#endif + + if (indexsize == 9) { + count_9mers_fwd_32(counts,high0_rev,low0_rev,nexthigh_rev); + } else if (indexsize == 8) { + count_8mers_fwd_32(counts,high0_rev,low0_rev,nexthigh_rev); + } else if (indexsize == 7) { + count_7mers_fwd_32(counts,high0_rev,low0_rev,nexthigh_rev); + } else if (indexsize == 6) { + count_6mers_fwd_32(counts,high0_rev,low0_rev,nexthigh_rev); + } else if (indexsize == 5) { + count_5mers_fwd_32(counts,high0_rev,low0_rev,nexthigh_rev); + } else { + abort(); } + } + + ptr -= 3; - if (ptr > startptr + 3) { - ptr -= 3; + /* Start block */ + assert(ptr == startptr); #ifdef WORDS_BIGENDIAN - high0 = Bigendian_convert_uint(ref_blocks[ptr]); - low0 = Bigendian_convert_uint(ref_blocks[ptr+1]); - /* nextlow = Bigendian_convert_uint(ref_blocks[ptr+4]); */ + high0 = Bigendian_convert_uint(ref_blocks[ptr]); + low0 = Bigendian_convert_uint(ref_blocks[ptr+1]); + nextlow = Bigendian_convert_uint(ref_blocks[ptr+4]); #else - high0 = ref_blocks[ptr]; - low0 = ref_blocks[ptr+1]; - /* nextlow = ref_blocks[ptr+4]; */ + high0 = ref_blocks[ptr]; + low0 = ref_blocks[ptr+1]; + nextlow = ref_blocks[ptr+4]; #endif - if (mode == CMET_STRANDED) { - high0 = Cmet_reduce_ct(high0); low0 = Cmet_reduce_ct(low0); /* nextlow = Cmet_reduce_ct(nextlow); */ - } else if (mode == CMET_NONSTRANDED) { - if (genestrand > 0) { - high0 = Cmet_reduce_ct(high0); low0 = Cmet_reduce_ct(low0); /* nextlow = Cmet_reduce_ct(nextlow); */ - } else { - high0 = Cmet_reduce_ga(high0); low0 = Cmet_reduce_ga(low0); /* nextlow = Cmet_reduce_ga(nextlow); */ - } - } - current = _mm_set_epi32(0,0,high0,low0); - current = _mm_or_si128(_mm_and_si128(_mm_srli_epi32(current,2),mask2),_mm_slli_epi32(_mm_and_si128(current,mask2),2)); /* Swap pairs */ - current = _mm_or_si128(_mm_and_si128(_mm_srli_epi32(current,4),mask4),_mm_slli_epi32(_mm_and_si128(current,mask4),4)); /* Swap nibbles */ + if (mode == STANDARD) { + /* Skip */ + } else if (mode == CMET_STRANDED) { + high0 = Cmet_reduce_ct(high0); low0 = Cmet_reduce_ct(low0); nextlow = Cmet_reduce_ct(nextlow); + } else if (mode == CMET_NONSTRANDED) { + if (genestrand > 0) { + high0 = Cmet_reduce_ct(high0); low0 = Cmet_reduce_ct(low0); nextlow = Cmet_reduce_ct(nextlow); + } else { + high0 = Cmet_reduce_ga(high0); low0 = Cmet_reduce_ga(low0); nextlow = Cmet_reduce_ga(nextlow); + } + } else if (mode == ATOI_STRANDED) { + high0 = Atoi_reduce_tc(high0); low0 = Atoi_reduce_tc(low0); nextlow = Atoi_reduce_tc(nextlow); + } else if (mode == ATOI_NONSTRANDED) { + if (genestrand > 0) { + high0 = Atoi_reduce_tc(high0); low0 = Atoi_reduce_tc(low0); nextlow = Atoi_reduce_tc(nextlow); + } else { + high0 = Atoi_reduce_ag(high0); low0 = Atoi_reduce_ag(low0); nextlow = Atoi_reduce_ag(nextlow); + } + } else if (mode == TTOC_STRANDED) { + high0 = Atoi_reduce_ag(high0); low0 = Atoi_reduce_ag(low0); nextlow = Atoi_reduce_ag(nextlow); + } else if (mode == TTOC_NONSTRANDED) { + if (genestrand > 0) { + high0 = Atoi_reduce_ag(high0); low0 = Atoi_reduce_ag(low0); nextlow = Atoi_reduce_ag(nextlow); + } else { + high0 = Atoi_reduce_tc(high0); low0 = Atoi_reduce_tc(low0); nextlow = Atoi_reduce_tc(nextlow); + } + } + + current = _mm_set_epi32(0,nextlow,high0,low0); + current = _mm_or_si128(_mm_and_si128(_mm_srli_epi32(current,2),mask2),_mm_slli_epi32(_mm_and_si128(current,mask2),2)); /* Swap pairs */ + current = _mm_or_si128(_mm_and_si128(_mm_srli_epi32(current,4),mask4),_mm_slli_epi32(_mm_and_si128(current,mask4),4)); /* Swap nibbles */ #ifdef HAVE_SSSE3 - current = _mm_shuffle_epi8(current,reverse8); /* Reverse bytes */ + current = _mm_shuffle_epi8(current,reverse8); /* Reverse bytes */ #else - current = _mm_or_si128(_mm_and_si128(_mm_srli_epi32(current,8),mask8),_mm_slli_epi32(_mm_and_si128(current,mask8),8)); /* Swap bytes */ - current = _mm_or_si128(_mm_srli_epi32(current,16),_mm_slli_epi32(current,16)); /* Swap 16-bit quantities */ + current = _mm_or_si128(_mm_and_si128(_mm_srli_epi32(current,8),mask8),_mm_slli_epi32(_mm_and_si128(current,mask8),8)); /* Swap bytes */ + current = _mm_or_si128(_mm_srli_epi32(current,16),_mm_slli_epi32(current,16)); /* Swap 16-bit quantities */ #endif - nexthigh_rev = high0_rev; + /* nexthigh_rev = high0_rev; */ #ifdef HAVE_SSE4_1 - high0_rev = (unsigned int) _mm_extract_epi32(current,0); - low0_rev = (unsigned int) _mm_extract_epi32(current,1); + high0_rev = (unsigned int) _mm_extract_epi32(current,0); + low0_rev = (unsigned int) _mm_extract_epi32(current,1); + nexthigh_rev = (unsigned int) _mm_extract_epi32(current,2); +#else + high0_rev = (reverse_nt[low0 >> 16] | reverse_nt[low0 & 0x0000FFFF] << 16); + low0_rev = (reverse_nt[high0 >> 16] | reverse_nt[high0 & 0x0000FFFF] << 16); + nexthigh_rev = (reverse_nt[nextlow >> 16] | reverse_nt[nextlow & 0x0000FFFF] << 16); +#endif + + if (indexsize == 9) { + count_9mers_fwd_partial(counts,high0_rev,low0_rev,nexthigh_rev,startdiscard,/*enddiscard*/31); + } else if (indexsize == 8) { + count_8mers_fwd_partial(counts,high0_rev,low0_rev,nexthigh_rev,startdiscard,/*enddiscard*/31); + } else if (indexsize == 7) { + count_7mers_fwd_partial(counts,high0_rev,low0_rev,nexthigh_rev,startdiscard,/*enddiscard*/31); + } else if (indexsize == 6) { + count_6mers_fwd_partial(counts,high0_rev,low0_rev,nexthigh_rev,startdiscard,/*enddiscard*/31); + } else if (indexsize == 5) { + count_5mers_fwd_partial(counts,high0_rev,low0_rev,nexthigh_rev,startdiscard,/*enddiscard*/31); + } else { + fprintf(stderr,"indexsize %d not supported\n",indexsize); + abort(); + } + } + + return; +} + +#endif + + +#ifndef HAVE_SSE2 +static void +store_positions_fwd_std (Chrpos_T *table, UINT4 *positions, Count_T *counts, int indexsize, + Univcoord_T left, Univcoord_T left_plus_length, Chrpos_T chrpos, + int genestrand) { + int startdiscard, enddiscard; + Genomecomp_T ptr, startptr, endptr, high_rev, low_rev, nexthigh_rev, + low, high, nextlow; + + + if (left_plus_length < (Univcoord_T) indexsize) { + left_plus_length = 0; + } else { + left_plus_length -= indexsize; + } + chrpos += (left_plus_length - left); /* We are starting from the right */ + + startptr = left/32U*3; + ptr = endptr = left_plus_length/32U*3; + startdiscard = left % 32; /* (left+pos5) % 32 */ + enddiscard = left_plus_length % 32; /* (left+pos3) % 32 */ + + if (left_plus_length <= left) { + /* Skip */ + + } else if (startptr == endptr) { +#ifdef WORDS_BIGENDIAN + high = Bigendian_convert_uint(ref_blocks[ptr]); + low = Bigendian_convert_uint(ref_blocks[ptr+1]); + nextlow = Bigendian_convert_uint(ref_blocks[ptr+4]); #else - high0_rev = (reverse_nt[low0 >> 16] | reverse_nt[low0 & 0x0000FFFF] << 16); - low0_rev = (reverse_nt[high0 >> 16] | reverse_nt[high0 & 0x0000FFFF] << 16); + high = ref_blocks[ptr]; + low = ref_blocks[ptr+1]; + nextlow = ref_blocks[ptr+4]; #endif - chrpos = store_9mers_fwd(chrpos,table,pointers,positions,counts,high0_rev,low0_rev,nexthigh_rev); + if (mode == STANDARD) { + /* Skip */ + } else if (mode == CMET_STRANDED) { + high = Cmet_reduce_ct(high); low = Cmet_reduce_ct(low); nextlow = Cmet_reduce_ct(nextlow); + } else if (mode == CMET_NONSTRANDED) { + if (genestrand > 0) { + high = Cmet_reduce_ct(high); low = Cmet_reduce_ct(low); nextlow = Cmet_reduce_ct(nextlow); + } else { + high = Cmet_reduce_ga(high); low = Cmet_reduce_ga(low); nextlow = Cmet_reduce_ga(nextlow); + } + } else if (mode == ATOI_STRANDED) { + high = Atoi_reduce_tc(high); low = Atoi_reduce_tc(low); nextlow = Atoi_reduce_tc(nextlow); + } else if (mode == ATOI_NONSTRANDED) { + if (genestrand > 0) { + high = Atoi_reduce_tc(high); low = Atoi_reduce_tc(low); nextlow = Atoi_reduce_tc(nextlow); + } else { + high = Atoi_reduce_ag(high); low = Atoi_reduce_ag(low); nextlow = Atoi_reduce_ag(nextlow); + } + } else if (mode == TTOC_STRANDED) { + high = Atoi_reduce_ag(high); low = Atoi_reduce_ag(low); nextlow = Atoi_reduce_ag(nextlow); + } else if (mode == TTOC_NONSTRANDED) { + if (genestrand > 0) { + high = Atoi_reduce_ag(high); low = Atoi_reduce_ag(low); nextlow = Atoi_reduce_ag(nextlow); + } else { + high = Atoi_reduce_tc(high); low = Atoi_reduce_tc(low); nextlow = Atoi_reduce_tc(nextlow); } + } + + high_rev = reverse_nt[low >> 16]; + high_rev |= (reverse_nt[low & 0x0000FFFF] << 16); + low_rev = reverse_nt[high >> 16]; + low_rev |= (reverse_nt[high & 0x0000FFFF] << 16); + nexthigh_rev = reverse_nt[nextlow >> 16]; + nexthigh_rev |= (reverse_nt[nextlow & 0x0000FFFF] << 16); + if (indexsize == 9) { + chrpos = store_9mers_fwd_partial(chrpos,table,positions,counts,high_rev,low_rev,nexthigh_rev,startdiscard,enddiscard); } else if (indexsize == 8) { -#ifdef HAVE_AVX2 - while (ptr > startptr + 12) { - ptr -= 12; + chrpos = store_8mers_fwd_partial(chrpos,table,positions,counts,high_rev,low_rev,nexthigh_rev,startdiscard,enddiscard); + } else if (indexsize == 7) { + chrpos = store_7mers_fwd_partial(chrpos,table,positions,counts,high_rev,low_rev,nexthigh_rev,startdiscard,enddiscard); + } else if (indexsize == 6) { + chrpos = store_6mers_fwd_partial(chrpos,table,positions,counts,high_rev,low_rev,nexthigh_rev,startdiscard,enddiscard); + } else if (indexsize == 5) { + chrpos = store_5mers_fwd_partial(chrpos,table,positions,counts,high_rev,low_rev,nexthigh_rev,startdiscard,enddiscard); + } else { + fprintf(stderr,"indexsize %d not supported\n",indexsize); + abort(); + } + + } else { + /* Genome_print_blocks(ref_blocks,left,left+16); */ + /* End block */ #ifdef WORDS_BIGENDIAN - high0 = Bigendian_convert_uint(ref_blocks[ptr]); - low0 = Bigendian_convert_uint(ref_blocks[ptr+1]); - high1 = Bigendian_convert_uint(ref_blocks[ptr+3]); - low1 = Bigendian_convert_uint(ref_blocks[ptr+4]); - high2 = Bigendian_convert_uint(ref_blocks[ptr+6]); - low2 = Bigendian_convert_uint(ref_blocks[ptr+7]); - high3 = Bigendian_convert_uint(ref_blocks[ptr+9]); - low3 = Bigendian_convert_uint(ref_blocks[ptr+10]); - /* nextlow = Bigendian_convert_uint(ref_blocks[ptr+13]); */ + high = Bigendian_convert_uint(ref_blocks[ptr]); + low = Bigendian_convert_uint(ref_blocks[ptr+1]); + nextlow = Bigendian_convert_uint(ref_blocks[ptr+4]); #else - high0 = ref_blocks[ptr]; - low0 = ref_blocks[ptr+1]; - high1 = ref_blocks[ptr+3]; - low1 = ref_blocks[ptr+4]; - high2 = ref_blocks[ptr+6]; - low2 = ref_blocks[ptr+7]; - high3 = ref_blocks[ptr+9]; - low3 = ref_blocks[ptr+10]; - /* nextlow = ref_blocks[ptr+13]; */ + high = ref_blocks[ptr]; + low = ref_blocks[ptr+1]; + nextlow = ref_blocks[ptr+4]; #endif - if (mode == CMET_STRANDED) { - high0 = Cmet_reduce_ct(high0); low0 = Cmet_reduce_ct(low0); - high1 = Cmet_reduce_ct(high1); low1 = Cmet_reduce_ct(low1); - high2 = Cmet_reduce_ct(high2); low2 = Cmet_reduce_ct(low2); - high3 = Cmet_reduce_ct(high3); low3 = Cmet_reduce_ct(low3); - /* nextlow = Cmet_reduce_ct(nextlow); */ - } else if (mode == CMET_NONSTRANDED) { - if (genestrand > 0) { - high0 = Cmet_reduce_ct(high0); low0 = Cmet_reduce_ct(low0); - high1 = Cmet_reduce_ct(high1); low1 = Cmet_reduce_ct(low1); - high2 = Cmet_reduce_ct(high2); low2 = Cmet_reduce_ct(low2); - high3 = Cmet_reduce_ct(high3); low3 = Cmet_reduce_ct(low3); - /* nextlow = Cmet_reduce_ct(nextlow); */ - } else { - high0 = Cmet_reduce_ga(high0); low0 = Cmet_reduce_ga(low0); - high1 = Cmet_reduce_ga(high1); low1 = Cmet_reduce_ga(low1); - high2 = Cmet_reduce_ga(high2); low2 = Cmet_reduce_ga(low2); - high3 = Cmet_reduce_ga(high3); low3 = Cmet_reduce_ga(low3); - /* nextlow = Cmet_reduce_ga(nextlow); */ - } - } - current256 = _mm256_set_epi32(high3,low3,high2,low2,high1,low1,high0,low0); - current256 = _mm256_or_si256(_mm256_and_si256(_mm256_srli_epi32(current256,2),bigmask2),_mm256_slli_epi32(_mm256_and_si256(current256,bigmask2),2)); /* Swap pairs */ - current256 = _mm256_or_si256(_mm256_and_si256(_mm256_srli_epi32(current256,4),bigmask4),_mm256_slli_epi32(_mm256_and_si256(current256,bigmask4),4)); /* Swap nibbles */ - current256 = _mm256_shuffle_epi8(current256,bigreverse8); /* Reverse bytes within 128-bit lanes */ - - nexthigh_rev = high0_rev; - - high0_rev = (unsigned int) _mm256_extract_epi32(current256,0); - /* low0_rev = (unsigned int) _mm_extract_epi32(current,1); */ - /* high1_rev = (unsigned int) _mm_extract_epi32(current,2); */ - /* low1_rev = (unsigned int) _mm_extract_epi32(current,3); */ + if (mode == STANDARD) { + /* Skip */ + } else if (mode == CMET_STRANDED) { + high = Cmet_reduce_ct(high); low = Cmet_reduce_ct(low); nextlow = Cmet_reduce_ct(nextlow); + } else if (mode == CMET_NONSTRANDED) { + if (genestrand > 0) { + high = Cmet_reduce_ct(high); low = Cmet_reduce_ct(low); nextlow = Cmet_reduce_ct(nextlow); + } else { + high = Cmet_reduce_ga(high); low = Cmet_reduce_ga(low); nextlow = Cmet_reduce_ga(nextlow); + } + } else if (mode == ATOI_STRANDED) { + high = Atoi_reduce_tc(high); low = Atoi_reduce_tc(low); nextlow = Atoi_reduce_tc(nextlow); + } else if (mode == ATOI_NONSTRANDED) { + if (genestrand > 0) { + high = Atoi_reduce_tc(high); low = Atoi_reduce_tc(low); nextlow = Atoi_reduce_tc(nextlow); + } else { + high = Atoi_reduce_ag(high); low = Atoi_reduce_ag(low); nextlow = Atoi_reduce_ag(nextlow); + } + } else if (mode == TTOC_STRANDED) { + high = Atoi_reduce_ag(high); low = Atoi_reduce_ag(low); nextlow = Atoi_reduce_ag(nextlow); + } else if (mode == TTOC_NONSTRANDED) { + if (genestrand > 0) { + high = Atoi_reduce_ag(high); low = Atoi_reduce_ag(low); nextlow = Atoi_reduce_ag(nextlow); + } else { + high = Atoi_reduce_tc(high); low = Atoi_reduce_tc(low); nextlow = Atoi_reduce_tc(nextlow); + } + } - temp256 = _mm256_insert_epi32(current256,nexthigh_rev,0x00); - next256 = _mm256_permutevar8x32_epi32(temp256,shift256); + high_rev = reverse_nt[low >> 16]; + high_rev |= (reverse_nt[low & 0x0000FFFF] << 16); + low_rev = reverse_nt[high >> 16]; + low_rev |= (reverse_nt[high & 0x0000FFFF] << 16); + nexthigh_rev = reverse_nt[nextlow >> 16]; + nexthigh_rev |= (reverse_nt[nextlow & 0x0000FFFF] << 16); - extract_8mers_fwd_simd_128(array256,current256,next256); - chrpos = store_fwdrev_simd_128(chrpos,table,pointers,positions,counts,(Genomecomp_T *) array256); - } -#endif + if (indexsize == 9) { + chrpos = store_9mers_fwd_partial(chrpos,table,positions,counts,high_rev,low_rev,nexthigh_rev,/*startdiscard*/0,enddiscard); + } else if (indexsize == 8) { + chrpos = store_8mers_fwd_partial(chrpos,table,positions,counts,high_rev,low_rev,nexthigh_rev,/*startdiscard*/0,enddiscard); + } else if (indexsize == 7) { + chrpos = store_7mers_fwd_partial(chrpos,table,positions,counts,high_rev,low_rev,nexthigh_rev,/*startdiscard*/0,enddiscard); + } else if (indexsize == 6) { + chrpos = store_6mers_fwd_partial(chrpos,table,positions,counts,high_rev,low_rev,nexthigh_rev,/*startdiscard*/0,enddiscard); + } else if (indexsize == 5) { + chrpos = store_5mers_fwd_partial(chrpos,table,positions,counts,high_rev,low_rev,nexthigh_rev,/*startdiscard*/0,enddiscard); + } else { + abort(); + } - while (ptr > startptr + 6) { - ptr -= 6; + while (ptr > startptr + 3) { + ptr -= 3; #ifdef WORDS_BIGENDIAN - high0 = Bigendian_convert_uint(ref_blocks[ptr]); - low0 = Bigendian_convert_uint(ref_blocks[ptr+1]); - high1 = Bigendian_convert_uint(ref_blocks[ptr+3]); - low1 = Bigendian_convert_uint(ref_blocks[ptr+4]); - /* nextlow = Bigendian_convert_uint(ref_blocks[ptr+7]); */ + high = Bigendian_convert_uint(ref_blocks[ptr]); + low = Bigendian_convert_uint(ref_blocks[ptr+1]); + nextlow = Bigendian_convert_uint(ref_blocks[ptr+4]); #else - high0 = ref_blocks[ptr]; - low0 = ref_blocks[ptr+1]; - high1 = ref_blocks[ptr+3]; - low1 = ref_blocks[ptr+4]; - /* nextlow = ref_blocks[ptr+7]; */ + high = ref_blocks[ptr]; + low = ref_blocks[ptr+1]; + nextlow = ref_blocks[ptr+4]; #endif - if (mode == CMET_STRANDED) { - high0 = Cmet_reduce_ct(high0); low0 = Cmet_reduce_ct(low0); - high1 = Cmet_reduce_ct(high1); low1 = Cmet_reduce_ct(low1); - /* nextlow = Cmet_reduce_ct(nextlow); */ - } else if (mode == CMET_NONSTRANDED) { - if (genestrand > 0) { - high0 = Cmet_reduce_ct(high0); low0 = Cmet_reduce_ct(low0); - high1 = Cmet_reduce_ct(high1); low1 = Cmet_reduce_ct(low1); - /* nextlow = Cmet_reduce_ct(nextlow); */ - } else { - high0 = Cmet_reduce_ga(high0); low0 = Cmet_reduce_ga(low0); - high1 = Cmet_reduce_ga(high1); low1 = Cmet_reduce_ga(low1); - /* nextlow = Cmet_reduce_ga(nextlow); */ - } + + if (mode == STANDARD) { + /* Skip */ + } else if (mode == CMET_STRANDED) { + high = Cmet_reduce_ct(high); low = Cmet_reduce_ct(low); nextlow = Cmet_reduce_ct(nextlow); + } else if (mode == CMET_NONSTRANDED) { + if (genestrand > 0) { + high = Cmet_reduce_ct(high); low = Cmet_reduce_ct(low); nextlow = Cmet_reduce_ct(nextlow); + } else { + high = Cmet_reduce_ga(high); low = Cmet_reduce_ga(low); nextlow = Cmet_reduce_ga(nextlow); + } + } else if (mode == ATOI_STRANDED) { + high = Atoi_reduce_tc(high); low = Atoi_reduce_tc(low); nextlow = Atoi_reduce_tc(nextlow); + } else if (mode == ATOI_NONSTRANDED) { + if (genestrand > 0) { + high = Atoi_reduce_tc(high); low = Atoi_reduce_tc(low); nextlow = Atoi_reduce_tc(nextlow); + } else { + high = Atoi_reduce_ag(high); low = Atoi_reduce_ag(low); nextlow = Atoi_reduce_ag(nextlow); + } + } else if (mode == TTOC_STRANDED) { + high = Atoi_reduce_ag(high); low = Atoi_reduce_ag(low); nextlow = Atoi_reduce_ag(nextlow); + } else if (mode == TTOC_NONSTRANDED) { + if (genestrand > 0) { + high = Atoi_reduce_ag(high); low = Atoi_reduce_ag(low); nextlow = Atoi_reduce_ag(nextlow); + } else { + high = Atoi_reduce_tc(high); low = Atoi_reduce_tc(low); nextlow = Atoi_reduce_tc(nextlow); } + } - current = _mm_set_epi32(high1,low1,high0,low0); - current = _mm_or_si128(_mm_and_si128(_mm_srli_epi32(current,2),mask2),_mm_slli_epi32(_mm_and_si128(current,mask2),2)); /* Swap pairs */ - current = _mm_or_si128(_mm_and_si128(_mm_srli_epi32(current,4),mask4),_mm_slli_epi32(_mm_and_si128(current,mask4),4)); /* Swap nibbles */ -#ifdef HAVE_SSSE3 - current = _mm_shuffle_epi8(current,reverse8); /* Reverse bytes */ -#else - current = _mm_or_si128(_mm_and_si128(_mm_srli_epi32(current,8),mask8),_mm_slli_epi32(_mm_and_si128(current,mask8),8)); /* Swap bytes */ - current = _mm_or_si128(_mm_srli_epi32(current,16),_mm_slli_epi32(current,16)); /* Swap 16-bit quantities */ -#endif + high_rev = reverse_nt[low >> 16]; + high_rev |= (reverse_nt[low & 0x0000FFFF] << 16); + low_rev = reverse_nt[high >> 16]; + low_rev |= (reverse_nt[high & 0x0000FFFF] << 16); + nexthigh_rev = reverse_nt[nextlow >> 16]; + nexthigh_rev |= (reverse_nt[nextlow & 0x0000FFFF] << 16); + + if (indexsize == 9) { + chrpos = store_9mers_fwd_32(chrpos,table,positions,counts,high_rev,low_rev,nexthigh_rev); + } else if (indexsize == 8) { + chrpos = store_8mers_fwd_32(chrpos,table,positions,counts,high_rev,low_rev,nexthigh_rev); + } else if (indexsize == 7) { + chrpos = store_7mers_fwd_32(chrpos,table,positions,counts,high_rev,low_rev,nexthigh_rev); + } else if (indexsize == 6) { + chrpos = store_6mers_fwd_32(chrpos,table,positions,counts,high_rev,low_rev,nexthigh_rev); + } else if (indexsize == 5) { + chrpos = store_5mers_fwd_32(chrpos,table,positions,counts,high_rev,low_rev,nexthigh_rev); + } else { + abort(); + } + } - nexthigh_rev = high0_rev; -#ifdef HAVE_SSE4_1 - high0_rev = (unsigned int) _mm_extract_epi32(current,0); - /* low0_rev = (unsigned int) _mm_extract_epi32(current,1); */ - /* high1_rev = (unsigned int) _mm_extract_epi32(current,2); */ - /* low1_rev = (unsigned int) _mm_extract_epi32(current,3); */ + ptr -= 3; - temp = _mm_insert_epi32(current,nexthigh_rev,0x00); - next = _mm_shuffle_epi32(temp,0x39); -#else - high0_rev = (reverse_nt[low0 >> 16] | reverse_nt[low0 & 0x0000FFFF] << 16); - low0_rev = (reverse_nt[high0 >> 16] | reverse_nt[high0 & 0x0000FFFF] << 16); - high1_rev = (reverse_nt[low1 >> 16] | reverse_nt[low1 & 0x0000FFFF] << 16); - low1_rev = (reverse_nt[high1 >> 16] | reverse_nt[high1 & 0x0000FFFF] << 16); + /* Start block */ + assert(ptr == startptr); - next = _mm_setr_epi32(low0_rev,high1_rev,low1_rev,nexthigh_rev); +#ifdef WORDS_BIGENDIAN + high = Bigendian_convert_uint(ref_blocks[ptr]); + low = Bigendian_convert_uint(ref_blocks[ptr+1]); + nextlow = Bigendian_convert_uint(ref_blocks[ptr+4]); +#else + high = ref_blocks[ptr]; + low = ref_blocks[ptr+1]; + nextlow = ref_blocks[ptr+4]; #endif - extract_8mers_fwd_simd(array,current,next); - chrpos = store_fwdrev_simd(chrpos,table,pointers,positions,counts,(Genomecomp_T *) array); + if (mode == STANDARD) { + /* Skip */ + } else if (mode == CMET_STRANDED) { + high = Cmet_reduce_ct(high); low = Cmet_reduce_ct(low); nextlow = Cmet_reduce_ct(nextlow); + } else if (mode == CMET_NONSTRANDED) { + if (genestrand > 0) { + high = Cmet_reduce_ct(high); low = Cmet_reduce_ct(low); nextlow = Cmet_reduce_ct(nextlow); + } else { + high = Cmet_reduce_ga(high); low = Cmet_reduce_ga(low); nextlow = Cmet_reduce_ga(nextlow); + } + } else if (mode == ATOI_STRANDED) { + high = Atoi_reduce_tc(high); low = Atoi_reduce_tc(low); nextlow = Atoi_reduce_tc(nextlow); + } else if (mode == ATOI_NONSTRANDED) { + if (genestrand > 0) { + high = Atoi_reduce_tc(high); low = Atoi_reduce_tc(low); nextlow = Atoi_reduce_tc(nextlow); + } else { + high = Atoi_reduce_ag(high); low = Atoi_reduce_ag(low); nextlow = Atoi_reduce_ag(nextlow); + } + } else if (mode == TTOC_STRANDED) { + high = Atoi_reduce_ag(high); low = Atoi_reduce_ag(low); nextlow = Atoi_reduce_ag(nextlow); + } else if (mode == TTOC_NONSTRANDED) { + if (genestrand > 0) { + high = Atoi_reduce_ag(high); low = Atoi_reduce_ag(low); nextlow = Atoi_reduce_ag(nextlow); + } else { + high = Atoi_reduce_tc(high); low = Atoi_reduce_tc(low); nextlow = Atoi_reduce_tc(nextlow); } + } - if (ptr > startptr + 3) { - ptr -= 3; + high_rev = reverse_nt[low >> 16]; + high_rev |= (reverse_nt[low & 0x0000FFFF] << 16); + low_rev = reverse_nt[high >> 16]; + low_rev |= (reverse_nt[high & 0x0000FFFF] << 16); + nexthigh_rev = reverse_nt[nextlow >> 16]; + nexthigh_rev |= (reverse_nt[nextlow & 0x0000FFFF] << 16); -#ifdef WORDS_BIGENDIAN - high0 = Bigendian_convert_uint(ref_blocks[ptr]); - low0 = Bigendian_convert_uint(ref_blocks[ptr+1]); - /* nextlow = Bigendian_convert_uint(ref_blocks[ptr+4]); */ -#else - high0 = ref_blocks[ptr]; - low0 = ref_blocks[ptr+1]; - /* nextlow = ref_blocks[ptr+4]; */ + if (indexsize == 9) { + chrpos = store_9mers_fwd_partial(chrpos,table,positions,counts,high_rev,low_rev,nexthigh_rev,startdiscard,/*enddiscard*/31); + } else if (indexsize == 8) { + chrpos = store_8mers_fwd_partial(chrpos,table,positions,counts,high_rev,low_rev,nexthigh_rev,startdiscard,/*enddiscard*/31); + } else if (indexsize == 7) { + chrpos = store_7mers_fwd_partial(chrpos,table,positions,counts,high_rev,low_rev,nexthigh_rev,startdiscard,/*enddiscard*/31); + } else if (indexsize == 6) { + chrpos = store_6mers_fwd_partial(chrpos,table,positions,counts,high_rev,low_rev,nexthigh_rev,startdiscard,/*enddiscard*/31); + } else if (indexsize == 5) { + chrpos = store_5mers_fwd_partial(chrpos,table,positions,counts,high_rev,low_rev,nexthigh_rev,startdiscard,/*enddiscard*/31); + } else { + fprintf(stderr,"indexsize %d not supported\n",indexsize); + abort(); + } + + } + + return; +} #endif - if (mode == CMET_STRANDED) { - high0 = Cmet_reduce_ct(high0); low0 = Cmet_reduce_ct(low0); /* nextlow = Cmet_reduce_ct(nextlow); */ - } else if (mode == CMET_NONSTRANDED) { - if (genestrand > 0) { - high0 = Cmet_reduce_ct(high0); low0 = Cmet_reduce_ct(low0); /* nextlow = Cmet_reduce_ct(nextlow); */ - } else { - high0 = Cmet_reduce_ga(high0); low0 = Cmet_reduce_ga(low0); /* nextlow = Cmet_reduce_ga(nextlow); */ - } - } - current = _mm_set_epi32(0,0,high0,low0); - current = _mm_or_si128(_mm_and_si128(_mm_srli_epi32(current,2),mask2),_mm_slli_epi32(_mm_and_si128(current,mask2),2)); /* Swap pairs */ - current = _mm_or_si128(_mm_and_si128(_mm_srli_epi32(current,4),mask4),_mm_slli_epi32(_mm_and_si128(current,mask4),4)); /* Swap nibbles */ + +#ifdef HAVE_SSE2 +static void +store_positions_fwd_simd (Chrpos_T *table, UINT4 *positions, Count_T *counts, int indexsize, + Univcoord_T left, Univcoord_T left_plus_length, Chrpos_T chrpos, + int genestrand) { + int startdiscard, enddiscard; + Genomecomp_T ptr, startptr, endptr, nexthigh_rev, nextlow; + Genomecomp_T high0_rev, low0_rev, low0, high0, low1, high1; + __m128i current, a, b, next, mask2, mask4; #ifdef HAVE_SSSE3 - current = _mm_shuffle_epi8(current,reverse8); /* Reverse bytes */ + __m128i reverse8; #else - current = _mm_or_si128(_mm_and_si128(_mm_srli_epi32(current,8),mask8),_mm_slli_epi32(_mm_and_si128(current,mask8),8)); /* Swap bytes */ - current = _mm_or_si128(_mm_srli_epi32(current,16),_mm_slli_epi32(current,16)); /* Swap 16-bit quantities */ + __m128i mask8; #endif - - nexthigh_rev = high0_rev; #ifdef HAVE_SSE4_1 - high0_rev = (unsigned int) _mm_extract_epi32(current,0); - low0_rev = (unsigned int) _mm_extract_epi32(current,1); + __m128i temp; #else - high0_rev = (reverse_nt[low0 >> 16] | reverse_nt[low0 & 0x0000FFFF] << 16); - low0_rev = (reverse_nt[high0 >> 16] | reverse_nt[high0 & 0x0000FFFF] << 16); + Genomecomp_T high1_rev, low1_rev; #endif - - chrpos = store_8mers_fwd(chrpos,table,pointers,positions,counts,high0_rev,low0_rev,nexthigh_rev); - } - - } else if (indexsize == 7) { #ifdef HAVE_AVX2 - while (ptr > startptr + 12) { - ptr -= 12; - -#ifdef WORDS_BIGENDIAN - high0 = Bigendian_convert_uint(ref_blocks[ptr]); - low0 = Bigendian_convert_uint(ref_blocks[ptr+1]); - high1 = Bigendian_convert_uint(ref_blocks[ptr+3]); - low1 = Bigendian_convert_uint(ref_blocks[ptr+4]); - high2 = Bigendian_convert_uint(ref_blocks[ptr+6]); - low2 = Bigendian_convert_uint(ref_blocks[ptr+7]); - high3 = Bigendian_convert_uint(ref_blocks[ptr+9]); - low3 = Bigendian_convert_uint(ref_blocks[ptr+10]); - /* nextlow = Bigendian_convert_uint(ref_blocks[ptr+13]); */ -#else - high0 = ref_blocks[ptr]; - low0 = ref_blocks[ptr+1]; - high1 = ref_blocks[ptr+3]; - low1 = ref_blocks[ptr+4]; - high2 = ref_blocks[ptr+6]; - low2 = ref_blocks[ptr+7]; - high3 = ref_blocks[ptr+9]; - low3 = ref_blocks[ptr+10]; - /* nextlow = ref_blocks[ptr+13]; */ + Genomecomp_T low2, high2, low3, high3; + __m256i current256, a256, b256, c256, d256, next256, temp256, bigmask2, bigmask4, bigreverse8; + __m256i shift256; #endif - if (mode == CMET_STRANDED) { - high0 = Cmet_reduce_ct(high0); low0 = Cmet_reduce_ct(low0); - high1 = Cmet_reduce_ct(high1); low1 = Cmet_reduce_ct(low1); - high2 = Cmet_reduce_ct(high2); low2 = Cmet_reduce_ct(low2); - high3 = Cmet_reduce_ct(high3); low3 = Cmet_reduce_ct(low3); - /* nextlow = Cmet_reduce_ct(nextlow); */ - } else if (mode == CMET_NONSTRANDED) { - if (genestrand > 0) { - high0 = Cmet_reduce_ct(high0); low0 = Cmet_reduce_ct(low0); - high1 = Cmet_reduce_ct(high1); low1 = Cmet_reduce_ct(low1); - high2 = Cmet_reduce_ct(high2); low2 = Cmet_reduce_ct(low2); - high3 = Cmet_reduce_ct(high3); low3 = Cmet_reduce_ct(low3); - /* nextlow = Cmet_reduce_ct(nextlow); */ - } else { - high0 = Cmet_reduce_ga(high0); low0 = Cmet_reduce_ga(low0); - high1 = Cmet_reduce_ga(high1); low1 = Cmet_reduce_ga(low1); - high2 = Cmet_reduce_ga(high2); low2 = Cmet_reduce_ga(low2); - high3 = Cmet_reduce_ga(high3); low3 = Cmet_reduce_ga(low3); - /* nextlow = Cmet_reduce_ga(nextlow); */ - } - } - - current256 = _mm256_set_epi32(high3,low3,high2,low2,high1,low1,high0,low0); - current256 = _mm256_or_si256(_mm256_and_si256(_mm256_srli_epi32(current256,2),bigmask2),_mm256_slli_epi32(_mm256_and_si256(current256,bigmask2),2)); /* Swap pairs */ - current256 = _mm256_or_si256(_mm256_and_si256(_mm256_srli_epi32(current256,4),bigmask4),_mm256_slli_epi32(_mm256_and_si256(current256,bigmask4),4)); /* Swap nibbles */ - current256 = _mm256_shuffle_epi8(current256,bigreverse8); /* Reverse bytes within 128-bit lanes */ - - nexthigh_rev = high0_rev; - - high0_rev = (unsigned int) _mm256_extract_epi32(current256,0); - /* low0_rev = (unsigned int) _mm_extract_epi32(current,1); */ - /* high1_rev = (unsigned int) _mm_extract_epi32(current,2); */ - /* low1_rev = (unsigned int) _mm_extract_epi32(current,3); */ - - temp256 = _mm256_insert_epi32(current256,nexthigh_rev,0x00); - next256 = _mm256_permutevar8x32_epi32(temp256,shift256); - - extract_7mers_fwd_simd_128(array256,current256,next256); - chrpos = store_fwdrev_simd_128(chrpos,table,pointers,positions,counts,(Genomecomp_T *) array256); - } +#ifdef HAVE_AVX512 + Genomecomp_T low4, high4, low5, high5, low6, high6, low7, high7; + __m512i current512, a512, b512, next512, temp512, hugemask2, hugemask4; + __m512i shift512; #endif - while (ptr > startptr + 6) { - ptr -= 6; -#ifdef WORDS_BIGENDIAN - high0 = Bigendian_convert_uint(ref_blocks[ptr]); - low0 = Bigendian_convert_uint(ref_blocks[ptr+1]); - high1 = Bigendian_convert_uint(ref_blocks[ptr+3]); - low1 = Bigendian_convert_uint(ref_blocks[ptr+4]); - /* nextlow = Bigendian_convert_uint(ref_blocks[ptr+7]); */ -#else - high0 = ref_blocks[ptr]; - low0 = ref_blocks[ptr+1]; - high1 = ref_blocks[ptr+3]; - low1 = ref_blocks[ptr+4]; - /* nextlow = ref_blocks[ptr+7]; */ -#endif - if (mode == CMET_STRANDED) { - high0 = Cmet_reduce_ct(high0); low0 = Cmet_reduce_ct(low0); - high1 = Cmet_reduce_ct(high1); low1 = Cmet_reduce_ct(low1); - /* nextlow = Cmet_reduce_ct(nextlow); */ - } else if (mode == CMET_NONSTRANDED) { - if (genestrand > 0) { - high0 = Cmet_reduce_ct(high0); low0 = Cmet_reduce_ct(low0); - high1 = Cmet_reduce_ct(high1); low1 = Cmet_reduce_ct(low1); - /* nextlow = Cmet_reduce_ct(nextlow); */ - } else { - high0 = Cmet_reduce_ga(high0); low0 = Cmet_reduce_ga(low0); - high1 = Cmet_reduce_ga(high1); low1 = Cmet_reduce_ga(low1); - /* nextlow = Cmet_reduce_ga(nextlow); */ - } - } + debug(printf("Starting store_positions_fwd_simd\n")); - current = _mm_set_epi32(high1,low1,high0,low0); - current = _mm_or_si128(_mm_and_si128(_mm_srli_epi32(current,2),mask2),_mm_slli_epi32(_mm_and_si128(current,mask2),2)); /* Swap pairs */ - current = _mm_or_si128(_mm_and_si128(_mm_srli_epi32(current,4),mask4),_mm_slli_epi32(_mm_and_si128(current,mask4),4)); /* Swap nibbles */ + if (left_plus_length < (Univcoord_T) indexsize) { + left_plus_length = 0; + } else { + left_plus_length -= indexsize; + } + chrpos += (left_plus_length - left); /* We are starting from the right */ + + startptr = left/32U*3; + ptr = endptr = left_plus_length/32U*3; + startdiscard = left % 32; /* (left+pos5) % 32 */ + enddiscard = left_plus_length % 32; /* (left+pos3) % 32 */ + + mask2 = _mm_set1_epi32(0x33333333); + mask4 = _mm_set1_epi32(0x0F0F0F0F); #ifdef HAVE_SSSE3 - current = _mm_shuffle_epi8(current,reverse8); /* Reverse bytes */ + reverse8 = _mm_set_epi8(0x0C,0x0D,0x0E,0x0F, 0x08,0x09,0x0A,0x0B, 0x04,0x05,0x06,0x07, 0x00,0x01,0x02,0x03); #else - current = _mm_or_si128(_mm_and_si128(_mm_srli_epi32(current,8),mask8),_mm_slli_epi32(_mm_and_si128(current,mask8),8)); /* Swap bytes */ - current = _mm_or_si128(_mm_srli_epi32(current,16),_mm_slli_epi32(current,16)); /* Swap 16-bit quantities */ + mask8 = _mm_set1_epi32(0x00FF00FF); +#endif +#ifdef HAVE_AVX2 + bigmask2 = _mm256_set1_epi32(0x33333333); + bigmask4 = _mm256_set1_epi32(0x0F0F0F0F); + bigreverse8 = _mm256_set_epi8(0x0C,0x0D,0x0E,0x0F, 0x08,0x09,0x0A,0x0B, 0x04,0x05,0x06,0x07, 0x00,0x01,0x02,0x03, + 0x0C,0x0D,0x0E,0x0F, 0x08,0x09,0x0A,0x0B, 0x04,0x05,0x06,0x07, 0x00,0x01,0x02,0x03); + shift256 = _mm256_setr_epi32(1,2,3,4,5,6,7,0); #endif - - nexthigh_rev = high0_rev; -#ifdef HAVE_SSE4_1 - high0_rev = (unsigned int) _mm_extract_epi32(current,0); - /* low0_rev = (unsigned int) _mm_extract_epi32(current,1); */ - /* high1_rev = (unsigned int) _mm_extract_epi32(current,2); */ - /* low1_rev = (unsigned int) _mm_extract_epi32(current,3); */ - - temp = _mm_insert_epi32(current,nexthigh_rev,0x00); - next = _mm_shuffle_epi32(temp,0x39); -#else - high0_rev = (reverse_nt[low0 >> 16] | reverse_nt[low0 & 0x0000FFFF] << 16); - low0_rev = (reverse_nt[high0 >> 16] | reverse_nt[high0 & 0x0000FFFF] << 16); - high1_rev = (reverse_nt[low1 >> 16] | reverse_nt[low1 & 0x0000FFFF] << 16); - low1_rev = (reverse_nt[high1 >> 16] | reverse_nt[high1 & 0x0000FFFF] << 16); - - next = _mm_setr_epi32(low0_rev,high1_rev,low1_rev,nexthigh_rev); +#ifdef HAVE_AVX512 + hugemask2 = _mm512_set1_epi32(0x33333333); + hugemask4 = _mm512_set1_epi32(0x0F0F0F0F); + shift512 = _mm512_setr_epi32(1,2,3,4,5,6,7,8, 9,10,11,12,13,14,15,0); #endif - extract_7mers_fwd_simd(array,current,next); - chrpos = store_fwdrev_simd(chrpos,table,pointers,positions,counts,(Genomecomp_T *) array); - } - - if (ptr > startptr + 3) { - ptr -= 3; + if (left_plus_length <= left) { + /* Skip */ + } else if (startptr == endptr) { #ifdef WORDS_BIGENDIAN - high0 = Bigendian_convert_uint(ref_blocks[ptr]); - low0 = Bigendian_convert_uint(ref_blocks[ptr+1]); - /* nextlow = Bigendian_convert_uint(ref_blocks[ptr+4]); */ + high0 = Bigendian_convert_uint(ref_blocks[ptr]); + low0 = Bigendian_convert_uint(ref_blocks[ptr+1]); + nextlow = Bigendian_convert_uint(ref_blocks[ptr+4]); #else - high0 = ref_blocks[ptr]; - low0 = ref_blocks[ptr+1]; - /* nextlow = ref_blocks[ptr+4]; */ + high0 = ref_blocks[ptr]; + low0 = ref_blocks[ptr+1]; + nextlow = ref_blocks[ptr+4]; #endif - if (mode == CMET_STRANDED) { - high0 = Cmet_reduce_ct(high0); low0 = Cmet_reduce_ct(low0); /* nextlow = Cmet_reduce_ct(nextlow); */ - } else if (mode == CMET_NONSTRANDED) { - if (genestrand > 0) { - high0 = Cmet_reduce_ct(high0); low0 = Cmet_reduce_ct(low0); /* nextlow = Cmet_reduce_ct(nextlow); */ - } else { - high0 = Cmet_reduce_ga(high0); low0 = Cmet_reduce_ga(low0); /* nextlow = Cmet_reduce_ga(nextlow); */ - } - } - current = _mm_set_epi32(0,0,high0,low0); - current = _mm_or_si128(_mm_and_si128(_mm_srli_epi32(current,2),mask2),_mm_slli_epi32(_mm_and_si128(current,mask2),2)); /* Swap pairs */ - current = _mm_or_si128(_mm_and_si128(_mm_srli_epi32(current,4),mask4),_mm_slli_epi32(_mm_and_si128(current,mask4),4)); /* Swap nibbles */ + if (mode == STANDARD) { + /* Skip */ + } else if (mode == CMET_STRANDED) { + high0 = Cmet_reduce_ct(high0); low0 = Cmet_reduce_ct(low0); nextlow = Cmet_reduce_ct(nextlow); + } else if (mode == CMET_NONSTRANDED) { + if (genestrand > 0) { + high0 = Cmet_reduce_ct(high0); low0 = Cmet_reduce_ct(low0); nextlow = Cmet_reduce_ct(nextlow); + } else { + high0 = Cmet_reduce_ga(high0); low0 = Cmet_reduce_ga(low0); nextlow = Cmet_reduce_ga(nextlow); + } + } else if (mode == ATOI_STRANDED) { + high0 = Atoi_reduce_tc(high0); low0 = Atoi_reduce_tc(low0); nextlow = Atoi_reduce_tc(nextlow); + } else if (mode == ATOI_NONSTRANDED) { + if (genestrand > 0) { + high0 = Atoi_reduce_tc(high0); low0 = Atoi_reduce_tc(low0); nextlow = Atoi_reduce_tc(nextlow); + } else { + high0 = Atoi_reduce_ag(high0); low0 = Atoi_reduce_ag(low0); nextlow = Atoi_reduce_ag(nextlow); + } + } else if (mode == TTOC_STRANDED) { + high0 = Atoi_reduce_ag(high0); low0 = Atoi_reduce_ag(low0); nextlow = Atoi_reduce_ag(nextlow); + } else if (mode == TTOC_NONSTRANDED) { + if (genestrand > 0) { + high0 = Atoi_reduce_ag(high0); low0 = Atoi_reduce_ag(low0); nextlow = Atoi_reduce_ag(nextlow); + } else { + high0 = Atoi_reduce_tc(high0); low0 = Atoi_reduce_tc(low0); nextlow = Atoi_reduce_tc(nextlow); + } + } + + current = _mm_set_epi32(0,nextlow,high0,low0); + current = _mm_or_si128(_mm_and_si128(_mm_srli_epi32(current,2),mask2),_mm_slli_epi32(_mm_and_si128(current,mask2),2)); /* Swap pairs */ + current = _mm_or_si128(_mm_and_si128(_mm_srli_epi32(current,4),mask4),_mm_slli_epi32(_mm_and_si128(current,mask4),4)); /* Swap nibbles */ #ifdef HAVE_SSSE3 - current = _mm_shuffle_epi8(current,reverse8); /* Reverse bytes */ + current = _mm_shuffle_epi8(current,reverse8); /* Reverse bytes */ #else - current = _mm_or_si128(_mm_and_si128(_mm_srli_epi32(current,8),mask8),_mm_slli_epi32(_mm_and_si128(current,mask8),8)); /* Swap bytes */ - current = _mm_or_si128(_mm_srli_epi32(current,16),_mm_slli_epi32(current,16)); /* Swap 16-bit quantities */ + current = _mm_or_si128(_mm_and_si128(_mm_srli_epi32(current,8),mask8),_mm_slli_epi32(_mm_and_si128(current,mask8),8)); /* Swap bytes */ + current = _mm_or_si128(_mm_srli_epi32(current,16),_mm_slli_epi32(current,16)); /* Swap 16-bit quantities */ #endif - nexthigh_rev = high0_rev; + /* nexthigh_rev = high0_rev; */ #ifdef HAVE_SSE4_1 - high0_rev = (unsigned int) _mm_extract_epi32(current,0); - low0_rev = (unsigned int) _mm_extract_epi32(current,1); + high0_rev = (unsigned int) _mm_extract_epi32(current,0); + low0_rev = (unsigned int) _mm_extract_epi32(current,1); + nexthigh_rev = (unsigned int) _mm_extract_epi32(current,2); #else - high0_rev = (reverse_nt[low0 >> 16] | reverse_nt[low0 & 0x0000FFFF] << 16); - low0_rev = (reverse_nt[high0 >> 16] | reverse_nt[high0 & 0x0000FFFF] << 16); + high0_rev = (reverse_nt[low0 >> 16] | reverse_nt[low0 & 0x0000FFFF] << 16); + low0_rev = (reverse_nt[high0 >> 16] | reverse_nt[high0 & 0x0000FFFF] << 16); + nexthigh_rev = (reverse_nt[nextlow >> 16] | reverse_nt[nextlow & 0x0000FFFF] << 16); #endif - chrpos = store_7mers_fwd(chrpos,table,pointers,positions,counts,high0_rev,low0_rev,nexthigh_rev); - } - + if (indexsize == 9) { + /* chrpos = */ store_9mers_fwd_partial(chrpos,table,positions,counts,high0_rev,low0_rev,nexthigh_rev,startdiscard,enddiscard); + } else if (indexsize == 8) { + /* chrpos = */ store_8mers_fwd_partial(chrpos,table,positions,counts,high0_rev,low0_rev,nexthigh_rev,startdiscard,enddiscard); + } else if (indexsize == 7) { + /* chrpos = */ store_7mers_fwd_partial(chrpos,table,positions,counts,high0_rev,low0_rev,nexthigh_rev,startdiscard,enddiscard); } else if (indexsize == 6) { -#ifdef HAVE_AVX2 - while (ptr > startptr + 12) { - ptr -= 12; + /* chrpos = */ store_6mers_fwd_partial(chrpos,table,positions,counts,high0_rev,low0_rev,nexthigh_rev,startdiscard,enddiscard); + } else if (indexsize == 5) { + /* chrpos = */ store_5mers_fwd_partial(chrpos,table,positions,counts,high0_rev,low0_rev,nexthigh_rev,startdiscard,enddiscard); + } else { + fprintf(stderr,"indexsize %d not supported\n",indexsize); + abort(); + } + + } else { + /* Genome_print_blocks(ref_blocks,left,left+16); */ + /* End block */ #ifdef WORDS_BIGENDIAN - high0 = Bigendian_convert_uint(ref_blocks[ptr]); - low0 = Bigendian_convert_uint(ref_blocks[ptr+1]); - high1 = Bigendian_convert_uint(ref_blocks[ptr+3]); - low1 = Bigendian_convert_uint(ref_blocks[ptr+4]); - high2 = Bigendian_convert_uint(ref_blocks[ptr+6]); - low2 = Bigendian_convert_uint(ref_blocks[ptr+7]); - high3 = Bigendian_convert_uint(ref_blocks[ptr+9]); - low3 = Bigendian_convert_uint(ref_blocks[ptr+10]); - /* nextlow = Bigendian_convert_uint(ref_blocks[ptr+13]); */ + high0 = Bigendian_convert_uint(ref_blocks[ptr]); + low0 = Bigendian_convert_uint(ref_blocks[ptr+1]); + nextlow = Bigendian_convert_uint(ref_blocks[ptr+4]); #else - high0 = ref_blocks[ptr]; - low0 = ref_blocks[ptr+1]; - high1 = ref_blocks[ptr+3]; - low1 = ref_blocks[ptr+4]; - high2 = ref_blocks[ptr+6]; - low2 = ref_blocks[ptr+7]; - high3 = ref_blocks[ptr+9]; - low3 = ref_blocks[ptr+10]; - /* nextlow = ref_blocks[ptr+13]; */ + high0 = ref_blocks[ptr]; + low0 = ref_blocks[ptr+1]; + nextlow = ref_blocks[ptr+4]; #endif - if (mode == CMET_STRANDED) { - high0 = Cmet_reduce_ct(high0); low0 = Cmet_reduce_ct(low0); - high1 = Cmet_reduce_ct(high1); low1 = Cmet_reduce_ct(low1); - high2 = Cmet_reduce_ct(high2); low2 = Cmet_reduce_ct(low2); - high3 = Cmet_reduce_ct(high3); low3 = Cmet_reduce_ct(low3); - /* nextlow = Cmet_reduce_ct(nextlow); */ - } else if (mode == CMET_NONSTRANDED) { - if (genestrand > 0) { - high0 = Cmet_reduce_ct(high0); low0 = Cmet_reduce_ct(low0); - high1 = Cmet_reduce_ct(high1); low1 = Cmet_reduce_ct(low1); - high2 = Cmet_reduce_ct(high2); low2 = Cmet_reduce_ct(low2); - high3 = Cmet_reduce_ct(high3); low3 = Cmet_reduce_ct(low3); - /* nextlow = Cmet_reduce_ct(nextlow); */ - } else { - high0 = Cmet_reduce_ga(high0); low0 = Cmet_reduce_ga(low0); - high1 = Cmet_reduce_ga(high1); low1 = Cmet_reduce_ga(low1); - high2 = Cmet_reduce_ga(high2); low2 = Cmet_reduce_ga(low2); - high3 = Cmet_reduce_ga(high3); low3 = Cmet_reduce_ga(low3); - /* nextlow = Cmet_reduce_ga(nextlow); */ - } - } - current256 = _mm256_set_epi32(high3,low3,high2,low2,high1,low1,high0,low0); - current256 = _mm256_or_si256(_mm256_and_si256(_mm256_srli_epi32(current256,2),bigmask2),_mm256_slli_epi32(_mm256_and_si256(current256,bigmask2),2)); /* Swap pairs */ - current256 = _mm256_or_si256(_mm256_and_si256(_mm256_srli_epi32(current256,4),bigmask4),_mm256_slli_epi32(_mm256_and_si256(current256,bigmask4),4)); /* Swap nibbles */ - current256 = _mm256_shuffle_epi8(current256,bigreverse8); /* Reverse bytes within 128-bit lanes */ - - nexthigh_rev = high0_rev; - - high0_rev = (unsigned int) _mm256_extract_epi32(current256,0); - /* low0_rev = (unsigned int) _mm_extract_epi32(current,1); */ - /* high1_rev = (unsigned int) _mm_extract_epi32(current,2); */ - /* low1_rev = (unsigned int) _mm_extract_epi32(current,3); */ - - temp256 = _mm256_insert_epi32(current256,nexthigh_rev,0x00); - next256 = _mm256_permutevar8x32_epi32(temp256,shift256); - - extract_6mers_fwd_simd_128(array256,current256,next256); - chrpos = store_fwdrev_simd_128(chrpos,table,pointers,positions,counts,(Genomecomp_T *) array256); + if (mode == STANDARD) { + /* Skip */ + } else if (mode == CMET_STRANDED) { + high0 = Cmet_reduce_ct(high0); low0 = Cmet_reduce_ct(low0); nextlow = Cmet_reduce_ct(nextlow); + } else if (mode == CMET_NONSTRANDED) { + if (genestrand > 0) { + high0 = Cmet_reduce_ct(high0); low0 = Cmet_reduce_ct(low0); nextlow = Cmet_reduce_ct(nextlow); + } else { + high0 = Cmet_reduce_ga(high0); low0 = Cmet_reduce_ga(low0); nextlow = Cmet_reduce_ga(nextlow); } -#endif - - while (ptr > startptr + 6) { - ptr -= 6; - -#ifdef WORDS_BIGENDIAN - high0 = Bigendian_convert_uint(ref_blocks[ptr]); - low0 = Bigendian_convert_uint(ref_blocks[ptr+1]); - high1 = Bigendian_convert_uint(ref_blocks[ptr+3]); - low1 = Bigendian_convert_uint(ref_blocks[ptr+4]); - /* nextlow = Bigendian_convert_uint(ref_blocks[ptr+7]); */ -#else - high0 = ref_blocks[ptr]; - low0 = ref_blocks[ptr+1]; - high1 = ref_blocks[ptr+3]; - low1 = ref_blocks[ptr+4]; - /* nextlow = ref_blocks[ptr+7]; */ -#endif - if (mode == CMET_STRANDED) { - high0 = Cmet_reduce_ct(high0); low0 = Cmet_reduce_ct(low0); - high1 = Cmet_reduce_ct(high1); low1 = Cmet_reduce_ct(low1); - /* nextlow = Cmet_reduce_ct(nextlow); */ - } else if (mode == CMET_NONSTRANDED) { - if (genestrand > 0) { - high0 = Cmet_reduce_ct(high0); low0 = Cmet_reduce_ct(low0); - high1 = Cmet_reduce_ct(high1); low1 = Cmet_reduce_ct(low1); - /* nextlow = Cmet_reduce_ct(nextlow); */ - } else { - high0 = Cmet_reduce_ga(high0); low0 = Cmet_reduce_ga(low0); - high1 = Cmet_reduce_ga(high1); low1 = Cmet_reduce_ga(low1); - /* nextlow = Cmet_reduce_ga(nextlow); */ - } - } + } else if (mode == ATOI_STRANDED) { + high0 = Atoi_reduce_tc(high0); low0 = Atoi_reduce_tc(low0); nextlow = Atoi_reduce_tc(nextlow); + } else if (mode == ATOI_NONSTRANDED) { + if (genestrand > 0) { + high0 = Atoi_reduce_tc(high0); low0 = Atoi_reduce_tc(low0); nextlow = Atoi_reduce_tc(nextlow); + } else { + high0 = Atoi_reduce_ag(high0); low0 = Atoi_reduce_ag(low0); nextlow = Atoi_reduce_ag(nextlow); + } + } else if (mode == TTOC_STRANDED) { + high0 = Atoi_reduce_ag(high0); low0 = Atoi_reduce_ag(low0); nextlow = Atoi_reduce_ag(nextlow); + } else if (mode == TTOC_NONSTRANDED) { + if (genestrand > 0) { + high0 = Atoi_reduce_ag(high0); low0 = Atoi_reduce_ag(low0); nextlow = Atoi_reduce_ag(nextlow); + } else { + high0 = Atoi_reduce_tc(high0); low0 = Atoi_reduce_tc(low0); nextlow = Atoi_reduce_tc(nextlow); + } + } - current = _mm_set_epi32(high1,low1,high0,low0); - current = _mm_or_si128(_mm_and_si128(_mm_srli_epi32(current,2),mask2),_mm_slli_epi32(_mm_and_si128(current,mask2),2)); /* Swap pairs */ - current = _mm_or_si128(_mm_and_si128(_mm_srli_epi32(current,4),mask4),_mm_slli_epi32(_mm_and_si128(current,mask4),4)); /* Swap nibbles */ + current = _mm_set_epi32(0,nextlow,high0,low0); + current = _mm_or_si128(_mm_and_si128(_mm_srli_epi32(current,2),mask2),_mm_slli_epi32(_mm_and_si128(current,mask2),2)); /* Swap pairs */ + current = _mm_or_si128(_mm_and_si128(_mm_srli_epi32(current,4),mask4),_mm_slli_epi32(_mm_and_si128(current,mask4),4)); /* Swap nibbles */ #ifdef HAVE_SSSE3 - current = _mm_shuffle_epi8(current,reverse8); /* Reverse bytes */ + current = _mm_shuffle_epi8(current,reverse8); /* Reverse bytes */ #else - current = _mm_or_si128(_mm_and_si128(_mm_srli_epi32(current,8),mask8),_mm_slli_epi32(_mm_and_si128(current,mask8),8)); /* Swap bytes */ - current = _mm_or_si128(_mm_srli_epi32(current,16),_mm_slli_epi32(current,16)); /* Swap 16-bit quantities */ + current = _mm_or_si128(_mm_and_si128(_mm_srli_epi32(current,8),mask8),_mm_slli_epi32(_mm_and_si128(current,mask8),8)); /* Swap bytes */ + current = _mm_or_si128(_mm_srli_epi32(current,16),_mm_slli_epi32(current,16)); /* Swap 16-bit quantities */ #endif - nexthigh_rev = high0_rev; + /* nexthigh_rev = high0_rev; */ #ifdef HAVE_SSE4_1 - high0_rev = (unsigned int) _mm_extract_epi32(current,0); - /* low0_rev = (unsigned int) _mm_extract_epi32(current,1); */ - /* high1_rev = (unsigned int) _mm_extract_epi32(current,2); */ - /* low1_rev = (unsigned int) _mm_extract_epi32(current,3); */ - - temp = _mm_insert_epi32(current,nexthigh_rev,0x00); - next = _mm_shuffle_epi32(temp,0x39); + high0_rev = (unsigned int) _mm_extract_epi32(current,0); + low0_rev = (unsigned int) _mm_extract_epi32(current,1); + nexthigh_rev = (unsigned int) _mm_extract_epi32(current,2); #else - high0_rev = (reverse_nt[low0 >> 16] | reverse_nt[low0 & 0x0000FFFF] << 16); - low0_rev = (reverse_nt[high0 >> 16] | reverse_nt[high0 & 0x0000FFFF] << 16); - high1_rev = (reverse_nt[low1 >> 16] | reverse_nt[low1 & 0x0000FFFF] << 16); - low1_rev = (reverse_nt[high1 >> 16] | reverse_nt[high1 & 0x0000FFFF] << 16); - - next = _mm_setr_epi32(low0_rev,high1_rev,low1_rev,nexthigh_rev); + high0_rev = (reverse_nt[low0 >> 16] | reverse_nt[low0 & 0x0000FFFF] << 16); + low0_rev = (reverse_nt[high0 >> 16] | reverse_nt[high0 & 0x0000FFFF] << 16); + nexthigh_rev = (reverse_nt[nextlow >> 16] | reverse_nt[nextlow & 0x0000FFFF] << 16); #endif - extract_6mers_fwd_simd(array,current,next); - chrpos = store_fwdrev_simd(chrpos,table,pointers,positions,counts,(Genomecomp_T *) array); - } + if (indexsize == 9) { + chrpos = store_9mers_fwd_partial(chrpos,table,positions,counts,high0_rev,low0_rev,nexthigh_rev,/*startdiscard*/0,enddiscard); + } else if (indexsize == 8) { + chrpos = store_8mers_fwd_partial(chrpos,table,positions,counts,high0_rev,low0_rev,nexthigh_rev,/*startdiscard*/0,enddiscard); + } else if (indexsize == 7) { + chrpos = store_7mers_fwd_partial(chrpos,table,positions,counts,high0_rev,low0_rev,nexthigh_rev,/*startdiscard*/0,enddiscard); + } else if (indexsize == 6) { + chrpos = store_6mers_fwd_partial(chrpos,table,positions,counts,high0_rev,low0_rev,nexthigh_rev,/*startdiscard*/0,enddiscard); + } else if (indexsize == 5) { + chrpos = store_5mers_fwd_partial(chrpos,table,positions,counts,high0_rev,low0_rev,nexthigh_rev,/*startdiscard*/0,enddiscard); + } else { + abort(); + } - if (ptr > startptr + 3) { - ptr -= 3; + /* Middle blocks */ +#ifdef HAVE_AVX512 + while (ptr > startptr + 24) { + ptr -= 24; + + if (mode == STANDARD) { + a512 = _mm512_loadu_si512((__m512i *) &(ref_blocks[ptr])); + b512 = _mm512_loadu_si512((__m512i *) &(ref_blocks[ptr+7])); + current512 = _mm512_permutex2var_epi32(a512,_mm512_setr_epi32(1, 0, 4, 3, 7, 6, 10, 9, 13, 12, 16+9, 16+8, 16+12, 16+11, 16+15, 16+14), b512); + } else { + current512 = apply_mode_fwd_512(&(ref_blocks[ptr]),mode,genestrand); + } -#ifdef WORDS_BIGENDIAN - high0 = Bigendian_convert_uint(ref_blocks[ptr]); - low0 = Bigendian_convert_uint(ref_blocks[ptr+1]); - /* nextlow = Bigendian_convert_uint(ref_blocks[ptr+4]); */ -#else - high0 = ref_blocks[ptr]; - low0 = ref_blocks[ptr+1]; - /* nextlow = ref_blocks[ptr+4]; */ + current512 = _mm512_or_si512(_mm512_and_si512(_mm512_srli_epi32(current512,2),hugemask2),_mm512_slli_epi32(_mm512_and_si512(current512,hugemask2),2)); /* Swap pairs */ + current512 = _mm512_or_si512(_mm512_and_si512(_mm512_srli_epi32(current512,4),hugemask4),_mm512_slli_epi32(_mm512_and_si512(current512,hugemask4),4)); /* Swap nibbles */ +#ifdef HAVE_AVX512BW + current512 = _mm512_shuffle_epi8(current512,hugereverse8); /* Reverse bytes within 128-bit lanes*/ +#else + /* Reverse bytes within 128-bit lanes*/ + current256 = _mm256_shuffle_epi8(_mm512_extracti64x4_epi64(current512,0x0),bigreverse8); + next256 = _mm256_shuffle_epi8(_mm512_extracti64x4_epi64(current512,0x1),bigreverse8); + current512 = _mm512_broadcast_i64x4(next256); + current512 = _mm512_inserti64x4(current512,current256,0x0); +#endif + + nexthigh_rev = high0_rev; /* Take from previous loop */ + + current = _mm512_extracti32x4_epi32(current512,0); + high0_rev = (unsigned int) _mm_extract_epi32(current, 0); /* Generate for next loop */ + + temp = _mm_insert_epi32(current,nexthigh_rev,0x00); + temp512 = _mm512_inserti32x4(current512,temp,0x00); + next512 = _mm512_permutexvar_epi32(shift512,temp512); /* shift goes first! */ + + if (indexsize == 9) { + chrpos = store_9mers_fwd_simd_256(chrpos,table,positions,counts,current512,next512); + } else if (indexsize == 8) { + chrpos = store_8mers_fwd_simd_256(chrpos,table,positions,counts,current512,next512); + } else if (indexsize == 7) { + chrpos = store_7mers_fwd_simd_256(chrpos,table,positions,counts,current512,next512); + } else if (indexsize == 6) { + chrpos = store_6mers_fwd_simd_256(chrpos,table,positions,counts,current512,next512); + } else if (indexsize == 5) { + chrpos = store_5mers_fwd_simd_256(chrpos,table,positions,counts,current512,next512); + } else { + abort(); + } + } #endif - if (mode == CMET_STRANDED) { - high0 = Cmet_reduce_ct(high0); low0 = Cmet_reduce_ct(low0); /* nextlow = Cmet_reduce_ct(nextlow); */ - } else if (mode == CMET_NONSTRANDED) { - if (genestrand > 0) { - high0 = Cmet_reduce_ct(high0); low0 = Cmet_reduce_ct(low0); /* nextlow = Cmet_reduce_ct(nextlow); */ - } else { - high0 = Cmet_reduce_ga(high0); low0 = Cmet_reduce_ga(low0); /* nextlow = Cmet_reduce_ga(nextlow); */ - } - } - current = _mm_set_epi32(0,0,high0,low0); - current = _mm_or_si128(_mm_and_si128(_mm_srli_epi32(current,2),mask2),_mm_slli_epi32(_mm_and_si128(current,mask2),2)); /* Swap pairs */ - current = _mm_or_si128(_mm_and_si128(_mm_srli_epi32(current,4),mask4),_mm_slli_epi32(_mm_and_si128(current,mask4),4)); /* Swap nibbles */ -#ifdef HAVE_SSSE3 - current = _mm_shuffle_epi8(current,reverse8); /* Reverse bytes */ -#else - current = _mm_or_si128(_mm_and_si128(_mm_srli_epi32(current,8),mask8),_mm_slli_epi32(_mm_and_si128(current,mask8),8)); /* Swap bytes */ - current = _mm_or_si128(_mm_srli_epi32(current,16),_mm_slli_epi32(current,16)); /* Swap 16-bit quantities */ -#endif - nexthigh_rev = high0_rev; -#ifdef HAVE_SSE4_1 - high0_rev = (unsigned int) _mm_extract_epi32(current,0); - low0_rev = (unsigned int) _mm_extract_epi32(current,1); +#ifdef HAVE_AVX2 + while (ptr > startptr + 12) { + ptr -= 12; + + if (mode == STANDARD) { + a256 = _mm256_loadu_si256((__m256i *) &(ref_blocks[ptr])); + b256 = _mm256_loadu_si256((__m256i *) &(ref_blocks[ptr+3])); + c256 = _mm256_unpacklo_epi64(a256,b256); + d256 = _mm256_unpackhi_epi64(a256,b256); + current256 = _mm256_permute2x128_si256(c256, d256, 0x30); + current256 = _mm256_shuffle_epi32(current256, 0xB1); /* 0b10110001 */ + } else { + current256 = apply_mode_fwd_256(&(ref_blocks[ptr]),mode,genestrand); + } + + current256 = _mm256_or_si256(_mm256_and_si256(_mm256_srli_epi32(current256,2),bigmask2),_mm256_slli_epi32(_mm256_and_si256(current256,bigmask2),2)); /* Swap pairs */ + current256 = _mm256_or_si256(_mm256_and_si256(_mm256_srli_epi32(current256,4),bigmask4),_mm256_slli_epi32(_mm256_and_si256(current256,bigmask4),4)); /* Swap nibbles */ + current256 = _mm256_shuffle_epi8(current256,bigreverse8); /* Reverse bytes within 128-bit lanes */ + + nexthigh_rev = high0_rev; /* Take from previous loop */ + high0_rev = (unsigned int) _mm256_extract_epi32(current256,0); /* Generate for next loop */ + +#if 0 + /* Doesn't work, because performs shift within 128-bit lanes */ + next256 = _mm256_alignr_epi8(_mm256_set1_epi32(nexthigh_rev),current256,4); #else - high0_rev = (reverse_nt[low0 >> 16] | reverse_nt[low0 & 0x0000FFFF] << 16); - low0_rev = (reverse_nt[high0 >> 16] | reverse_nt[high0 & 0x0000FFFF] << 16); + temp256 = _mm256_insert_epi32(current256,nexthigh_rev,0x00); + next256 = _mm256_permutevar8x32_epi32(temp256,shift256); #endif - - chrpos = store_6mers_fwd(chrpos,table,pointers,positions,counts,high0_rev,low0_rev,nexthigh_rev); + + if (indexsize == 9) { + chrpos = store_9mers_fwd_simd_128(chrpos,table,positions,counts,current256,next256); + } else if (indexsize == 8) { + chrpos = store_8mers_fwd_simd_128(chrpos,table,positions,counts,current256,next256); + } else if (indexsize == 7) { + chrpos = store_7mers_fwd_simd_128(chrpos,table,positions,counts,current256,next256); + } else if (indexsize == 6) { + chrpos = store_6mers_fwd_simd_128(chrpos,table,positions,counts,current256,next256); + } else if (indexsize == 5) { + chrpos = store_5mers_fwd_simd_128(chrpos,table,positions,counts,current256,next256); + } else { + abort(); } + } +#endif - } else if (indexsize == 5) { -#ifdef HAVE_AVX2 - while (ptr > startptr + 12) { - ptr -= 12; + while (ptr > startptr + 6) { + ptr -= 6; +#ifdef HAVE_SSSE3 + if (mode == STANDARD) { + a = _mm_loadu_si128((__m128i *) &(ref_blocks[ptr])); + b = _mm_loadu_si128((__m128i *) &(ref_blocks[ptr+3])); + current = _mm_unpacklo_epi64(a,b); + current = _mm_shuffle_epi32(current, 0xB1); /* 0b10110001 */ +#ifndef HAVE_SSE4_1 #ifdef WORDS_BIGENDIAN - high0 = Bigendian_convert_uint(ref_blocks[ptr]); low0 = Bigendian_convert_uint(ref_blocks[ptr+1]); - high1 = Bigendian_convert_uint(ref_blocks[ptr+3]); - low1 = Bigendian_convert_uint(ref_blocks[ptr+4]); - high2 = Bigendian_convert_uint(ref_blocks[ptr+6]); - low2 = Bigendian_convert_uint(ref_blocks[ptr+7]); - high3 = Bigendian_convert_uint(ref_blocks[ptr+9]); - low3 = Bigendian_convert_uint(ref_blocks[ptr+10]); - /* nextlow = Bigendian_convert_uint(ref_blocks[ptr+13]); */ #else - high0 = ref_blocks[ptr]; low0 = ref_blocks[ptr+1]; - high1 = ref_blocks[ptr+3]; - low1 = ref_blocks[ptr+4]; - high2 = ref_blocks[ptr+6]; - low2 = ref_blocks[ptr+7]; - high3 = ref_blocks[ptr+9]; - low3 = ref_blocks[ptr+10]; - /* nextlow = ref_blocks[ptr+13]; */ #endif - if (mode == CMET_STRANDED) { - high0 = Cmet_reduce_ct(high0); low0 = Cmet_reduce_ct(low0); - high1 = Cmet_reduce_ct(high1); low1 = Cmet_reduce_ct(low1); - high2 = Cmet_reduce_ct(high2); low2 = Cmet_reduce_ct(low2); - high3 = Cmet_reduce_ct(high3); low3 = Cmet_reduce_ct(low3); - /* nextlow = Cmet_reduce_ct(nextlow); */ - } else if (mode == CMET_NONSTRANDED) { - if (genestrand > 0) { - high0 = Cmet_reduce_ct(high0); low0 = Cmet_reduce_ct(low0); - high1 = Cmet_reduce_ct(high1); low1 = Cmet_reduce_ct(low1); - high2 = Cmet_reduce_ct(high2); low2 = Cmet_reduce_ct(low2); - high3 = Cmet_reduce_ct(high3); low3 = Cmet_reduce_ct(low3); - /* nextlow = Cmet_reduce_ct(nextlow); */ - } else { - high0 = Cmet_reduce_ga(high0); low0 = Cmet_reduce_ga(low0); - high1 = Cmet_reduce_ga(high1); low1 = Cmet_reduce_ga(low1); - high2 = Cmet_reduce_ga(high2); low2 = Cmet_reduce_ga(low2); - high3 = Cmet_reduce_ga(high3); low3 = Cmet_reduce_ga(low3); - /* nextlow = Cmet_reduce_ga(nextlow); */ - } - } - - current256 = _mm256_set_epi32(high3,low3,high2,low2,high1,low1,high0,low0); - current256 = _mm256_or_si256(_mm256_and_si256(_mm256_srli_epi32(current256,2),bigmask2),_mm256_slli_epi32(_mm256_and_si256(current256,bigmask2),2)); /* Swap pairs */ - current256 = _mm256_or_si256(_mm256_and_si256(_mm256_srli_epi32(current256,4),bigmask4),_mm256_slli_epi32(_mm256_and_si256(current256,bigmask4),4)); /* Swap nibbles */ - current256 = _mm256_shuffle_epi8(current256,bigreverse8); /* Reverse bytes within 128-bit lanes */ - - nexthigh_rev = high0_rev; - - high0_rev = (unsigned int) _mm256_extract_epi32(current256,0); - /* low0_rev = (unsigned int) _mm_extract_epi32(current,1); */ - /* high1_rev = (unsigned int) _mm_extract_epi32(current,2); */ - /* low1_rev = (unsigned int) _mm_extract_epi32(current,3); */ - - temp256 = _mm256_insert_epi32(current256,nexthigh_rev,0x00); - next256 = _mm256_permutevar8x32_epi32(temp256,shift256); - - extract_5mers_fwd_simd_128(array256,current256,next256); - chrpos = store_fwdrev_simd_128(chrpos,table,pointers,positions,counts,(Genomecomp_T *) array256); - } #endif - while (ptr > startptr + 6) { - ptr -= 6; - + } else { #ifdef WORDS_BIGENDIAN - high0 = Bigendian_convert_uint(ref_blocks[ptr]); - low0 = Bigendian_convert_uint(ref_blocks[ptr+1]); - high1 = Bigendian_convert_uint(ref_blocks[ptr+3]); - low1 = Bigendian_convert_uint(ref_blocks[ptr+4]); - /* nextlow = Bigendian_convert_uint(ref_blocks[ptr+7]); */ + high0 = Bigendian_convert_uint(ref_blocks[ptr]); low0 = Bigendian_convert_uint(ref_blocks[ptr+1]); + high1 = Bigendian_convert_uint(ref_blocks[ptr+3]); low1 = Bigendian_convert_uint(ref_blocks[ptr+4]); + nextlow = Bigendian_convert_uint(ref_blocks[ptr+7]); #else - high0 = ref_blocks[ptr]; - low0 = ref_blocks[ptr+1]; - high1 = ref_blocks[ptr+3]; - low1 = ref_blocks[ptr+4]; - /* nextlow = ref_blocks[ptr+7]; */ + high0 = ref_blocks[ptr]; low0 = ref_blocks[ptr+1]; + high1 = ref_blocks[ptr+3]; low1 = ref_blocks[ptr+4]; + nextlow = ref_blocks[ptr+7]; #endif - if (mode == CMET_STRANDED) { + + if (mode == STANDARD) { + /* Skip */ + } else if (mode == CMET_STRANDED) { high0 = Cmet_reduce_ct(high0); low0 = Cmet_reduce_ct(low0); high1 = Cmet_reduce_ct(high1); low1 = Cmet_reduce_ct(low1); - /* nextlow = Cmet_reduce_ct(nextlow); */ + nextlow = Cmet_reduce_ct(nextlow); } else if (mode == CMET_NONSTRANDED) { if (genestrand > 0) { high0 = Cmet_reduce_ct(high0); low0 = Cmet_reduce_ct(low0); high1 = Cmet_reduce_ct(high1); low1 = Cmet_reduce_ct(low1); - /* nextlow = Cmet_reduce_ct(nextlow); */ + nextlow = Cmet_reduce_ct(nextlow); } else { high0 = Cmet_reduce_ga(high0); low0 = Cmet_reduce_ga(low0); high1 = Cmet_reduce_ga(high1); low1 = Cmet_reduce_ga(low1); - /* nextlow = Cmet_reduce_ga(nextlow); */ + nextlow = Cmet_reduce_ga(nextlow); + } + } else if (mode == ATOI_STRANDED) { + high0 = Atoi_reduce_tc(high0); low0 = Atoi_reduce_tc(low0); + high1 = Atoi_reduce_tc(high1); low1 = Atoi_reduce_tc(low1); + nextlow = Atoi_reduce_tc(nextlow); + } else if (mode == ATOI_NONSTRANDED) { + if (genestrand > 0) { + high0 = Atoi_reduce_tc(high0); low0 = Atoi_reduce_tc(low0); + high1 = Atoi_reduce_tc(high1); low1 = Atoi_reduce_tc(low1); + nextlow = Atoi_reduce_tc(nextlow); + } else { + high0 = Atoi_reduce_ag(high0); low0 = Atoi_reduce_ag(low0); + high1 = Atoi_reduce_ag(high1); low1 = Atoi_reduce_ag(low1); + nextlow = Atoi_reduce_ag(nextlow); + } + } else if (mode == TTOC_STRANDED) { + high0 = Atoi_reduce_ag(high0); low0 = Atoi_reduce_ag(low0); + high1 = Atoi_reduce_ag(high1); low1 = Atoi_reduce_ag(low1); + nextlow = Atoi_reduce_ag(nextlow); + } else if (mode == TTOC_NONSTRANDED) { + if (genestrand > 0) { + high0 = Atoi_reduce_ag(high0); low0 = Atoi_reduce_ag(low0); + high1 = Atoi_reduce_ag(high1); low1 = Atoi_reduce_ag(low1); + nextlow = Atoi_reduce_ag(nextlow); + } else { + high0 = Atoi_reduce_tc(high0); low0 = Atoi_reduce_tc(low0); + high1 = Atoi_reduce_tc(high1); low1 = Atoi_reduce_tc(low1); + nextlow = Atoi_reduce_tc(nextlow); } } current = _mm_set_epi32(high1,low1,high0,low0); - current = _mm_or_si128(_mm_and_si128(_mm_srli_epi32(current,2),mask2),_mm_slli_epi32(_mm_and_si128(current,mask2),2)); /* Swap pairs */ - current = _mm_or_si128(_mm_and_si128(_mm_srli_epi32(current,4),mask4),_mm_slli_epi32(_mm_and_si128(current,mask4),4)); /* Swap nibbles */ -#ifdef HAVE_SSSE3 - current = _mm_shuffle_epi8(current,reverse8); /* Reverse bytes */ -#else - current = _mm_or_si128(_mm_and_si128(_mm_srli_epi32(current,8),mask8),_mm_slli_epi32(_mm_and_si128(current,mask8),8)); /* Swap bytes */ - current = _mm_or_si128(_mm_srli_epi32(current,16),_mm_slli_epi32(current,16)); /* Swap 16-bit quantities */ -#endif + } - nexthigh_rev = high0_rev; + current = _mm_or_si128(_mm_and_si128(_mm_srli_epi32(current,2),mask2),_mm_slli_epi32(_mm_and_si128(current,mask2),2)); /* Swap pairs */ + current = _mm_or_si128(_mm_and_si128(_mm_srli_epi32(current,4),mask4),_mm_slli_epi32(_mm_and_si128(current,mask4),4)); /* Swap nibbles */ + current = _mm_shuffle_epi8(current,reverse8); /* Reverse bytes */ + + nexthigh_rev = high0_rev; /* Take from previous loop */ #ifdef HAVE_SSE4_1 - high0_rev = (unsigned int) _mm_extract_epi32(current,0); - /* low0_rev = (unsigned int) _mm_extract_epi32(current,1); */ - /* high1_rev = (unsigned int) _mm_extract_epi32(current,2); */ - /* low1_rev = (unsigned int) _mm_extract_epi32(current,3); */ - - temp = _mm_insert_epi32(current,nexthigh_rev,0x00); - next = _mm_shuffle_epi32(temp,0x39); + high0_rev = (unsigned int) _mm_extract_epi32(current,0); /* Generate for next loop (SSE4.1 and higher) */ #else - high0_rev = (reverse_nt[low0 >> 16] | reverse_nt[low0 & 0x0000FFFF] << 16); - low0_rev = (reverse_nt[high0 >> 16] | reverse_nt[high0 & 0x0000FFFF] << 16); - high1_rev = (reverse_nt[low1 >> 16] | reverse_nt[low1 & 0x0000FFFF] << 16); - low1_rev = (reverse_nt[high1 >> 16] | reverse_nt[high1 & 0x0000FFFF] << 16); - - next = _mm_setr_epi32(low0_rev,high1_rev,low1_rev,nexthigh_rev); + high0_rev = (reverse_nt[low0 >> 16] | reverse_nt[low0 & 0x0000FFFF] << 16); /* Generate for next loop (SSSE3) */ #endif - extract_5mers_fwd_simd(array,current,next); - chrpos = store_fwdrev_simd(chrpos,table,pointers,positions,counts,(Genomecomp_T *) array); - } +#if 1 + next = _mm_alignr_epi8(_mm_set1_epi32(nexthigh_rev),current,4); +#else + /* Previous solution for SSE4.1 */ + temp = _mm_insert_epi32(current,nexthigh_rev,0x00); + next = _mm_shuffle_epi32(temp,0x39); +#endif - if (ptr > startptr + 3) { - ptr -= 3; +#else + /* Non-SSSE3 */ #ifdef WORDS_BIGENDIAN - high0 = Bigendian_convert_uint(ref_blocks[ptr]); - low0 = Bigendian_convert_uint(ref_blocks[ptr+1]); - /* nextlow = Bigendian_convert_uint(ref_blocks[ptr+4]); */ + high0 = Bigendian_convert_uint(ref_blocks[ptr]); low0 = Bigendian_convert_uint(ref_blocks[ptr+1]); + high1 = Bigendian_convert_uint(ref_blocks[ptr+3]); ow1 = Bigendian_convert_uint(ref_blocks[ptr+4]); + nextlow = Bigendian_convert_uint(ref_blocks[ptr+7]); #else - high0 = ref_blocks[ptr]; - low0 = ref_blocks[ptr+1]; - /* nextlow = ref_blocks[ptr+4]; */ + high0 = ref_blocks[ptr]; low0 = ref_blocks[ptr+1]; + high1 = ref_blocks[ptr+3]; low1 = ref_blocks[ptr+4]; + nextlow = ref_blocks[ptr+7]; #endif - if (mode == CMET_STRANDED) { - high0 = Cmet_reduce_ct(high0); low0 = Cmet_reduce_ct(low0); /* nextlow = Cmet_reduce_ct(nextlow); */ - } else if (mode == CMET_NONSTRANDED) { - if (genestrand > 0) { - high0 = Cmet_reduce_ct(high0); low0 = Cmet_reduce_ct(low0); /* nextlow = Cmet_reduce_ct(nextlow); */ - } else { - high0 = Cmet_reduce_ga(high0); low0 = Cmet_reduce_ga(low0); /* nextlow = Cmet_reduce_ga(nextlow); */ - } + + if (mode == STANDARD) { + /* Skip */ + } else if (mode == CMET_STRANDED) { + high0 = Cmet_reduce_ct(high0); low0 = Cmet_reduce_ct(low0); + high1 = Cmet_reduce_ct(high1); low1 = Cmet_reduce_ct(low1); + nextlow = Cmet_reduce_ct(nextlow); + } else if (mode == CMET_NONSTRANDED) { + if (genestrand > 0) { + high0 = Cmet_reduce_ct(high0); low0 = Cmet_reduce_ct(low0); + high1 = Cmet_reduce_ct(high1); low1 = Cmet_reduce_ct(low1); + nextlow = Cmet_reduce_ct(nextlow); + } else { + high0 = Cmet_reduce_ga(high0); low0 = Cmet_reduce_ga(low0); + high1 = Cmet_reduce_ga(high1); low1 = Cmet_reduce_ga(low1); + nextlow = Cmet_reduce_ga(nextlow); + } + } else if (mode == ATOI_STRANDED) { + high0 = Atoi_reduce_ct(high0); low0 = Atoi_reduce_ct(low0); + high1 = Atoi_reduce_ct(high1); low1 = Atoi_reduce_ct(low1); + nextlow = Atoi_reduce_ct(nextlow); + } else if (mode == ATOI_NONSTRANDED) { + if (genestrand > 0) { + high0 = Atoi_reduce_ct(high0); low0 = Atoi_reduce_ct(low0); + high1 = Atoi_reduce_ct(high1); low1 = Atoi_reduce_ct(low1); + nextlow = Atoi_reduce_ct(nextlow); + } else { + high0 = Atoi_reduce_ga(high0); low0 = Atoi_reduce_ga(low0); + high1 = Atoi_reduce_ga(high1); low1 = Atoi_reduce_ga(low1); + nextlow = Atoi_reduce_ga(nextlow); } + } - current = _mm_set_epi32(0,0,high0,low0); - current = _mm_or_si128(_mm_and_si128(_mm_srli_epi32(current,2),mask2),_mm_slli_epi32(_mm_and_si128(current,mask2),2)); /* Swap pairs */ - current = _mm_or_si128(_mm_and_si128(_mm_srli_epi32(current,4),mask4),_mm_slli_epi32(_mm_and_si128(current,mask4),4)); /* Swap nibbles */ -#ifdef HAVE_SSSE3 - current = _mm_shuffle_epi8(current,reverse8); /* Reverse bytes */ + current = _mm_set_epi32(high1,low1,high0,low0); + + current = _mm_or_si128(_mm_and_si128(_mm_srli_epi32(current,2),mask2),_mm_slli_epi32(_mm_and_si128(current,mask2),2)); /* Swap pairs */ + current = _mm_or_si128(_mm_and_si128(_mm_srli_epi32(current,4),mask4),_mm_slli_epi32(_mm_and_si128(current,mask4),4)); /* Swap nibbles */ + current = _mm_or_si128(_mm_and_si128(_mm_srli_epi32(current,8),mask8),_mm_slli_epi32(_mm_and_si128(current,mask8),8)); /* Swap bytes */ + current = _mm_or_si128(_mm_srli_epi32(current,16),_mm_slli_epi32(current,16)); /* Swap 16-bit quantities */ + + nexthigh_rev = high0_rev; /* Take from previous loop */ + + high0_rev = (reverse_nt[low0 >> 16] | reverse_nt[low0 & 0x0000FFFF] << 16); /* Generate for next loop */ + low0_rev = (reverse_nt[high0 >> 16] | reverse_nt[high0 & 0x0000FFFF] << 16); + high1_rev = (reverse_nt[low1 >> 16] | reverse_nt[low1 & 0x0000FFFF] << 16); + low1_rev = (reverse_nt[high1 >> 16] | reverse_nt[high1 & 0x0000FFFF] << 16); + + next = _mm_setr_epi32(low0_rev,high1_rev,low1_rev,nexthigh_rev); +#endif + + if (indexsize == 9) { + chrpos = store_9mers_fwd_simd_64(chrpos,table,positions,counts,current,next); + } else if (indexsize == 8) { + chrpos = store_8mers_fwd_simd_64(chrpos,table,positions,counts,current,next); + } else if (indexsize == 7) { + chrpos = store_7mers_fwd_simd_64(chrpos,table,positions,counts,current,next); + } else if (indexsize == 6) { + chrpos = store_6mers_fwd_simd_64(chrpos,table,positions,counts,current,next); + } else if (indexsize == 5) { + chrpos = store_5mers_fwd_simd_64(chrpos,table,positions,counts,current,next); + } else { + abort(); + } + } + + if (ptr > startptr + 3) { + ptr -= 3; + +#ifdef WORDS_BIGENDIAN + high0 = Bigendian_convert_uint(ref_blocks[ptr]); + low0 = Bigendian_convert_uint(ref_blocks[ptr+1]); + nextlow = Bigendian_convert_uint(ref_blocks[ptr+4]); #else - current = _mm_or_si128(_mm_and_si128(_mm_srli_epi32(current,8),mask8),_mm_slli_epi32(_mm_and_si128(current,mask8),8)); /* Swap bytes */ - current = _mm_or_si128(_mm_srli_epi32(current,16),_mm_slli_epi32(current,16)); /* Swap 16-bit quantities */ + high0 = ref_blocks[ptr]; + low0 = ref_blocks[ptr+1]; + nextlow = ref_blocks[ptr+4]; #endif - nexthigh_rev = high0_rev; -#ifdef HAVE_SSE4_1 - high0_rev = (unsigned int) _mm_extract_epi32(current,0); - low0_rev = (unsigned int) _mm_extract_epi32(current,1); + if (mode == STANDARD) { + /* Skip */ + } else if (mode == CMET_STRANDED) { + high0 = Cmet_reduce_ct(high0); low0 = Cmet_reduce_ct(low0); nextlow = Cmet_reduce_ct(nextlow); + } else if (mode == CMET_NONSTRANDED) { + if (genestrand > 0) { + high0 = Cmet_reduce_ct(high0); low0 = Cmet_reduce_ct(low0); nextlow = Cmet_reduce_ct(nextlow); + } else { + high0 = Cmet_reduce_ga(high0); low0 = Cmet_reduce_ga(low0); nextlow = Cmet_reduce_ga(nextlow); + } + } else if (mode == ATOI_STRANDED) { + high0 = Atoi_reduce_tc(high0); low0 = Atoi_reduce_tc(low0); nextlow = Atoi_reduce_tc(nextlow); + } else if (mode == ATOI_NONSTRANDED) { + if (genestrand > 0) { + high0 = Atoi_reduce_tc(high0); low0 = Atoi_reduce_tc(low0); nextlow = Atoi_reduce_tc(nextlow); + } else { + high0 = Atoi_reduce_ag(high0); low0 = Atoi_reduce_ag(low0); nextlow = Atoi_reduce_ag(nextlow); + } + } else if (mode == TTOC_STRANDED) { + high0 = Atoi_reduce_ag(high0); low0 = Atoi_reduce_ag(low0); nextlow = Atoi_reduce_ag(nextlow); + } else if (mode == TTOC_NONSTRANDED) { + if (genestrand > 0) { + high0 = Atoi_reduce_ag(high0); low0 = Atoi_reduce_ag(low0); nextlow = Atoi_reduce_ag(nextlow); + } else { + high0 = Atoi_reduce_tc(high0); low0 = Atoi_reduce_tc(low0); nextlow = Atoi_reduce_tc(nextlow); + } + } + + current = _mm_set_epi32(0,nextlow,high0,low0); + current = _mm_or_si128(_mm_and_si128(_mm_srli_epi32(current,2),mask2),_mm_slli_epi32(_mm_and_si128(current,mask2),2)); /* Swap pairs */ + current = _mm_or_si128(_mm_and_si128(_mm_srli_epi32(current,4),mask4),_mm_slli_epi32(_mm_and_si128(current,mask4),4)); /* Swap nibbles */ +#ifdef HAVE_SSSE3 + current = _mm_shuffle_epi8(current,reverse8); /* Reverse bytes */ #else - high0_rev = (reverse_nt[low0 >> 16] | reverse_nt[low0 & 0x0000FFFF] << 16); - low0_rev = (reverse_nt[high0 >> 16] | reverse_nt[high0 & 0x0000FFFF] << 16); + current = _mm_or_si128(_mm_and_si128(_mm_srli_epi32(current,8),mask8),_mm_slli_epi32(_mm_and_si128(current,mask8),8)); /* Swap bytes */ + current = _mm_or_si128(_mm_srli_epi32(current,16),_mm_slli_epi32(current,16)); /* Swap 16-bit quantities */ #endif - - chrpos = store_5mers_fwd(chrpos,table,pointers,positions,counts,high0_rev,low0_rev,nexthigh_rev); + + /* nexthigh_rev = high0_rev; */ +#ifdef HAVE_SSE4_1 + high0_rev = (unsigned int) _mm_extract_epi32(current,0); + low0_rev = (unsigned int) _mm_extract_epi32(current,1); + nexthigh_rev = (unsigned int) _mm_extract_epi32(current,2); +#else + high0_rev = (reverse_nt[low0 >> 16] | reverse_nt[low0 & 0x0000FFFF] << 16); + low0_rev = (reverse_nt[high0 >> 16] | reverse_nt[high0 & 0x0000FFFF] << 16); + nexthigh_rev = (reverse_nt[nextlow >> 16] | reverse_nt[nextlow & 0x0000FFFF] << 16); +#endif + + if (indexsize == 9) { + chrpos = store_9mers_fwd_32(chrpos,table,positions,counts,high0_rev,low0_rev,nexthigh_rev); + } else if (indexsize == 8) { + chrpos = store_8mers_fwd_32(chrpos,table,positions,counts,high0_rev,low0_rev,nexthigh_rev); + } else if (indexsize == 7) { + chrpos = store_7mers_fwd_32(chrpos,table,positions,counts,high0_rev,low0_rev,nexthigh_rev); + } else if (indexsize == 6) { + chrpos = store_6mers_fwd_32(chrpos,table,positions,counts,high0_rev,low0_rev,nexthigh_rev); + } else if (indexsize == 5) { + chrpos = store_5mers_fwd_32(chrpos,table,positions,counts,high0_rev,low0_rev,nexthigh_rev); + } else { + abort(); } - - } else { - abort(); } ptr -= 3; @@ -30409,23 +21304,42 @@ #ifdef WORDS_BIGENDIAN high0 = Bigendian_convert_uint(ref_blocks[ptr]); low0 = Bigendian_convert_uint(ref_blocks[ptr+1]); - /* nextlow = Bigendian_convert_uint(ref_blocks[ptr+4]); */ + nextlow = Bigendian_convert_uint(ref_blocks[ptr+4]); #else high0 = ref_blocks[ptr]; low0 = ref_blocks[ptr+1]; - /* nextlow = ref_blocks[ptr+4]; */ + nextlow = ref_blocks[ptr+4]; #endif - if (mode == CMET_STRANDED) { - high0 = Cmet_reduce_ct(high0); low0 = Cmet_reduce_ct(low0); /* nextlow = Cmet_reduce_ct(nextlow); */ + + if (mode == STANDARD) { + /* Skip */ + } else if (mode == CMET_STRANDED) { + high0 = Cmet_reduce_ct(high0); low0 = Cmet_reduce_ct(low0); nextlow = Cmet_reduce_ct(nextlow); } else if (mode == CMET_NONSTRANDED) { if (genestrand > 0) { - high0 = Cmet_reduce_ct(high0); low0 = Cmet_reduce_ct(low0); /* nextlow = Cmet_reduce_ct(nextlow); */ + high0 = Cmet_reduce_ct(high0); low0 = Cmet_reduce_ct(low0); nextlow = Cmet_reduce_ct(nextlow); + } else { + high0 = Cmet_reduce_ga(high0); low0 = Cmet_reduce_ga(low0); nextlow = Cmet_reduce_ga(nextlow); + } + } else if (mode == ATOI_STRANDED) { + high0 = Atoi_reduce_tc(high0); low0 = Atoi_reduce_tc(low0); nextlow = Atoi_reduce_tc(nextlow); + } else if (mode == ATOI_NONSTRANDED) { + if (genestrand > 0) { + high0 = Atoi_reduce_tc(high0); low0 = Atoi_reduce_tc(low0); nextlow = Atoi_reduce_tc(nextlow); + } else { + high0 = Atoi_reduce_ag(high0); low0 = Atoi_reduce_ag(low0); nextlow = Atoi_reduce_ag(nextlow); + } + } else if (mode == TTOC_STRANDED) { + high0 = Atoi_reduce_ag(high0); low0 = Atoi_reduce_ag(low0); nextlow = Atoi_reduce_ag(nextlow); + } else if (mode == TTOC_NONSTRANDED) { + if (genestrand > 0) { + high0 = Atoi_reduce_ag(high0); low0 = Atoi_reduce_ag(low0); nextlow = Atoi_reduce_ag(nextlow); } else { - high0 = Cmet_reduce_ga(high0); low0 = Cmet_reduce_ga(low0); /* nextlow = Cmet_reduce_ga(nextlow); */ + high0 = Atoi_reduce_tc(high0); low0 = Atoi_reduce_tc(low0); nextlow = Atoi_reduce_tc(nextlow); } } - current = _mm_set_epi32(0,0,high0,low0); + current = _mm_set_epi32(0,nextlow,high0,low0); current = _mm_or_si128(_mm_and_si128(_mm_srli_epi32(current,2),mask2),_mm_slli_epi32(_mm_and_si128(current,mask2),2)); /* Swap pairs */ current = _mm_or_si128(_mm_and_si128(_mm_srli_epi32(current,4),mask4),_mm_slli_epi32(_mm_and_si128(current,mask4),4)); /* Swap nibbles */ #ifdef HAVE_SSSE3 @@ -30435,25 +21349,27 @@ current = _mm_or_si128(_mm_srli_epi32(current,16),_mm_slli_epi32(current,16)); /* Swap 16-bit quantities */ #endif - nexthigh_rev = high0_rev; + /* nexthigh_rev = high0_rev; */ #ifdef HAVE_SSE4_1 high0_rev = (unsigned int) _mm_extract_epi32(current,0); low0_rev = (unsigned int) _mm_extract_epi32(current,1); + nexthigh_rev = (unsigned int) _mm_extract_epi32(current,2); #else high0_rev = (reverse_nt[low0 >> 16] | reverse_nt[low0 & 0x0000FFFF] << 16); low0_rev = (reverse_nt[high0 >> 16] | reverse_nt[high0 & 0x0000FFFF] << 16); + nexthigh_rev = (reverse_nt[nextlow >> 16] | reverse_nt[nextlow & 0x0000FFFF] << 16); #endif if (indexsize == 9) { - chrpos = store_9mers_fwd_partial(chrpos,table,pointers,positions,counts,high0_rev,low0_rev,nexthigh_rev,startdiscard,/*enddiscard*/31); + chrpos = store_9mers_fwd_partial(chrpos,table,positions,counts,high0_rev,low0_rev,nexthigh_rev,startdiscard,/*enddiscard*/31); } else if (indexsize == 8) { - chrpos = store_8mers_fwd_partial(chrpos,table,pointers,positions,counts,high0_rev,low0_rev,nexthigh_rev,startdiscard,/*enddiscard*/31); + chrpos = store_8mers_fwd_partial(chrpos,table,positions,counts,high0_rev,low0_rev,nexthigh_rev,startdiscard,/*enddiscard*/31); } else if (indexsize == 7) { - chrpos = store_7mers_fwd_partial(chrpos,table,pointers,positions,counts,high0_rev,low0_rev,nexthigh_rev,startdiscard,/*enddiscard*/31); + chrpos = store_7mers_fwd_partial(chrpos,table,positions,counts,high0_rev,low0_rev,nexthigh_rev,startdiscard,/*enddiscard*/31); } else if (indexsize == 6) { - chrpos = store_6mers_fwd_partial(chrpos,table,pointers,positions,counts,high0_rev,low0_rev,nexthigh_rev,startdiscard,/*enddiscard*/31); + chrpos = store_6mers_fwd_partial(chrpos,table,positions,counts,high0_rev,low0_rev,nexthigh_rev,startdiscard,/*enddiscard*/31); } else if (indexsize == 5) { - chrpos = store_5mers_fwd_partial(chrpos,table,pointers,positions,counts,high0_rev,low0_rev,nexthigh_rev,startdiscard,/*enddiscard*/31); + chrpos = store_5mers_fwd_partial(chrpos,table,positions,counts,high0_rev,low0_rev,nexthigh_rev,startdiscard,/*enddiscard*/31); } else { fprintf(stderr,"indexsize %d not supported\n",indexsize); abort(); @@ -30470,7 +21386,7 @@ ************************************************************************/ static void -count_9mers_rev_partial (Count_T *counts, Inquery_T *inquery, Genomecomp_T low_rc, Genomecomp_T high_rc, +count_9mers_rev_partial (Count_T *counts, Genomecomp_T low_rc, Genomecomp_T high_rc, Genomecomp_T nextlow_rc, int startdiscard, int enddiscard) { Genomecomp_T masked; int pos; @@ -30480,7 +21396,7 @@ while (pos <= enddiscard && pos <= 7) { masked = low_rc >> 2*pos; masked &= MASK9; - INCR_COUNT(counts[masked],inquery[masked]); + INCR_COUNT(counts[masked]); debug(printf("%d partial %04X => %d\n",pos,masked,counts[masked])); pos++; } @@ -30489,7 +21405,7 @@ masked = low_rc >> 2*pos; masked |= high_rc << (32 - 2*pos); masked &= MASK9; - INCR_COUNT(counts[masked],inquery[masked]); + INCR_COUNT(counts[masked]); debug(printf("%d partial %04X => %d\n",pos,masked,counts[masked])); pos++; } @@ -30497,7 +21413,7 @@ while (pos <= enddiscard && pos <= 23) { masked = high_rc >> (2*pos - 32); masked &= MASK9; - INCR_COUNT(counts[masked],inquery[masked]); + INCR_COUNT(counts[masked]); debug(printf("%d partial %04X => %d\n",pos,masked,counts[masked])); pos++; } @@ -30506,7 +21422,7 @@ masked = high_rc >> (2*pos - 32); masked |= nextlow_rc << (64 - 2*pos); masked &= MASK9; - INCR_COUNT(counts[masked],inquery[masked]); + INCR_COUNT(counts[masked]); debug(printf("%d partial %04X => %d\n",pos,masked,counts[masked])); pos++; } @@ -30515,7 +21431,7 @@ } static int -store_9mers_rev_partial (Chrpos_T chrpos, Chrpos_T *table, UINT4 *pointers, UINT4 *positions, Count_T *counts, +store_9mers_rev_partial (Chrpos_T chrpos, Chrpos_T *table, UINT4 *positions, Count_T *counts, Genomecomp_T low_rc, Genomecomp_T high_rc, Genomecomp_T nextlow_rc, int startdiscard, int enddiscard) { Genomecomp_T masked; @@ -30527,9 +21443,8 @@ masked = low_rc >> 2*pos; masked &= MASK9; if (counts[masked]) { - assert(pointers[masked] > positions[masked]); debug(printf("Storing masked %u at %u (partial)\n",masked,chrpos)); - table[--pointers[masked]] = chrpos; + table[positions[masked] + (--counts[masked])] = chrpos; } chrpos--; pos++; @@ -30540,9 +21455,8 @@ masked |= high_rc << (32 - 2*pos); masked &= MASK9; if (counts[masked]) { - assert(pointers[masked] > positions[masked]); debug(printf("Storing masked %u at %u (partial)\n",masked,chrpos)); - table[--pointers[masked]] = chrpos; + table[positions[masked] + (--counts[masked])] = chrpos; } chrpos--; pos++; @@ -30552,9 +21466,8 @@ masked = high_rc >> (2*pos - 32); masked &= MASK9; if (counts[masked]) { - assert(pointers[masked] > positions[masked]); debug(printf("Storing masked %u at %u (partial)\n",masked,chrpos)); - table[--pointers[masked]] = chrpos; + table[positions[masked] + (--counts[masked])] = chrpos; } chrpos--; pos++; @@ -30565,9 +21478,8 @@ masked |= nextlow_rc << (64 - 2*pos); masked &= MASK9; if (counts[masked]) { - assert(pointers[masked] > positions[masked]); debug(printf("Storing masked %u at %u (partial)\n",masked,chrpos)); - table[--pointers[masked]] = chrpos; + table[positions[masked] + (--counts[masked])] = chrpos; } chrpos--; pos++; @@ -30578,7 +21490,7 @@ static void -count_8mers_rev_partial (Count_T *counts, Inquery_T *inquery, Genomecomp_T low_rc, Genomecomp_T high_rc, +count_8mers_rev_partial (Count_T *counts, Genomecomp_T low_rc, Genomecomp_T high_rc, Genomecomp_T nextlow_rc, int startdiscard, int enddiscard) { Genomecomp_T masked; int pos; @@ -30588,7 +21500,7 @@ while (pos <= enddiscard && pos <= 8) { masked = low_rc >> 2*pos; masked &= MASK8; - INCR_COUNT(counts[masked],inquery[masked]); + INCR_COUNT(counts[masked]); debug(printf("%d partial %04X => %d\n",pos,masked,counts[masked])); pos++; } @@ -30597,7 +21509,7 @@ masked = low_rc >> 2*pos; masked |= high_rc << (32 - 2*pos); masked &= MASK8; - INCR_COUNT(counts[masked],inquery[masked]); + INCR_COUNT(counts[masked]); debug(printf("%d partial %04X => %d\n",pos,masked,counts[masked])); pos++; } @@ -30605,7 +21517,7 @@ while (pos <= enddiscard && pos <= 24) { masked = high_rc >> (2*pos - 32); masked &= MASK8; - INCR_COUNT(counts[masked],inquery[masked]); + INCR_COUNT(counts[masked]); debug(printf("%d partial %04X => %d\n",pos,masked,counts[masked])); pos++; } @@ -30614,7 +21526,7 @@ masked = high_rc >> (2*pos - 32); masked |= nextlow_rc << (64 - 2*pos); masked &= MASK8; - INCR_COUNT(counts[masked],inquery[masked]); + INCR_COUNT(counts[masked]); debug(printf("%d partial %04X => %d\n",pos,masked,counts[masked])); pos++; } @@ -30623,7 +21535,7 @@ } static int -store_8mers_rev_partial (Chrpos_T chrpos, Chrpos_T *table, UINT4 *pointers, UINT4 *positions, Count_T *counts, +store_8mers_rev_partial (Chrpos_T chrpos, Chrpos_T *table, UINT4 *positions, Count_T *counts, Genomecomp_T low_rc, Genomecomp_T high_rc, Genomecomp_T nextlow_rc, int startdiscard, int enddiscard) { Genomecomp_T masked; @@ -30635,9 +21547,8 @@ masked = low_rc >> 2*pos; masked &= MASK8; if (counts[masked]) { - assert(pointers[masked] > positions[masked]); debug(printf("Storing masked %u at %u (partial)\n",masked,chrpos)); - table[--pointers[masked]] = chrpos; + table[positions[masked] + (--counts[masked])] = chrpos; } chrpos--; pos++; @@ -30648,9 +21559,8 @@ masked |= high_rc << (32 - 2*pos); masked &= MASK8; if (counts[masked]) { - assert(pointers[masked] > positions[masked]); debug(printf("Storing masked %u at %u (partial)\n",masked,chrpos)); - table[--pointers[masked]] = chrpos; + table[positions[masked] + (--counts[masked])] = chrpos; } chrpos--; pos++; @@ -30660,9 +21570,8 @@ masked = high_rc >> (2*pos - 32); masked &= MASK8; if (counts[masked]) { - assert(pointers[masked] > positions[masked]); debug(printf("Storing masked %u at %u (partial)\n",masked,chrpos)); - table[--pointers[masked]] = chrpos; + table[positions[masked] + (--counts[masked])] = chrpos; } chrpos--; pos++; @@ -30673,9 +21582,8 @@ masked |= nextlow_rc << (64 - 2*pos); masked &= MASK8; if (counts[masked]) { - assert(pointers[masked] > positions[masked]); debug(printf("Storing masked %u at %u (partial)\n",masked,chrpos)); - table[--pointers[masked]] = chrpos; + table[positions[masked] + (--counts[masked])] = chrpos; } chrpos--; pos++; @@ -30685,7 +21593,7 @@ } static void -count_7mers_rev_partial (Count_T *counts, Inquery_T *inquery, Genomecomp_T low_rc, Genomecomp_T high_rc, +count_7mers_rev_partial (Count_T *counts, Genomecomp_T low_rc, Genomecomp_T high_rc, Genomecomp_T nextlow_rc, int startdiscard, int enddiscard) { Genomecomp_T masked; int pos; @@ -30695,7 +21603,7 @@ while (pos <= enddiscard && pos <= 9) { masked = low_rc >> 2*pos; masked &= MASK7; - INCR_COUNT(counts[masked],inquery[masked]); + INCR_COUNT(counts[masked]); debug(printf("%d partial %04X => %d\n",pos,masked,counts[masked])); pos++; } @@ -30704,7 +21612,7 @@ masked = low_rc >> 2*pos; masked |= high_rc << (32 - 2*pos); masked &= MASK7; - INCR_COUNT(counts[masked],inquery[masked]); + INCR_COUNT(counts[masked]); debug(printf("%d partial %04X => %d\n",pos,masked,counts[masked])); pos++; } @@ -30712,7 +21620,7 @@ while (pos <= enddiscard && pos <= 25) { masked = high_rc >> (2*pos - 32); masked &= MASK7; - INCR_COUNT(counts[masked],inquery[masked]); + INCR_COUNT(counts[masked]); debug(printf("%d partial %04X => %d\n",pos,masked,counts[masked])); pos++; } @@ -30721,7 +21629,7 @@ masked = high_rc >> (2*pos - 32); masked |= nextlow_rc << (64 - 2*pos); masked &= MASK7; - INCR_COUNT(counts[masked],inquery[masked]); + INCR_COUNT(counts[masked]); debug(printf("%d partial %04X => %d\n",pos,masked,counts[masked])); pos++; } @@ -30730,7 +21638,7 @@ } static int -store_7mers_rev_partial (Chrpos_T chrpos, Chrpos_T *table, UINT4 *pointers, UINT4 *positions, Count_T *counts, +store_7mers_rev_partial (Chrpos_T chrpos, Chrpos_T *table, UINT4 *positions, Count_T *counts, Genomecomp_T low_rc, Genomecomp_T high_rc, Genomecomp_T nextlow_rc, int startdiscard, int enddiscard) { Genomecomp_T masked; @@ -30742,9 +21650,8 @@ masked = low_rc >> 2*pos; masked &= MASK7; if (counts[masked]) { - assert(pointers[masked] > positions[masked]); debug(printf("Storing masked %u at %u (partial)\n",masked,chrpos)); - table[--pointers[masked]] = chrpos; + table[positions[masked] + (--counts[masked])] = chrpos; } chrpos--; pos++; @@ -30755,9 +21662,8 @@ masked |= high_rc << (32 - 2*pos); masked &= MASK7; if (counts[masked]) { - assert(pointers[masked] > positions[masked]); debug(printf("Storing masked %u at %u (partial)\n",masked,chrpos)); - table[--pointers[masked]] = chrpos; + table[positions[masked] + (--counts[masked])] = chrpos; } chrpos--; pos++; @@ -30767,9 +21673,8 @@ masked = high_rc >> (2*pos - 32); masked &= MASK7; if (counts[masked]) { - assert(pointers[masked] > positions[masked]); debug(printf("Storing masked %u at %u (partial)\n",masked,chrpos)); - table[--pointers[masked]] = chrpos; + table[positions[masked] + (--counts[masked])] = chrpos; } chrpos--; pos++; @@ -30780,9 +21685,8 @@ masked |= nextlow_rc << (64 - 2*pos); masked &= MASK7; if (counts[masked]) { - assert(pointers[masked] > positions[masked]); debug(printf("Storing masked %u at %u (partial)\n",masked,chrpos)); - table[--pointers[masked]] = chrpos; + table[positions[masked] + (--counts[masked])] = chrpos; } chrpos--; pos++; @@ -30793,7 +21697,7 @@ static void -count_6mers_rev_partial (Count_T *counts, Inquery_T *inquery, Genomecomp_T low_rc, Genomecomp_T high_rc, +count_6mers_rev_partial (Count_T *counts, Genomecomp_T low_rc, Genomecomp_T high_rc, Genomecomp_T nextlow_rc, int startdiscard, int enddiscard) { Genomecomp_T masked; int pos; @@ -30803,7 +21707,7 @@ while (pos <= enddiscard && pos <= 10) { masked = low_rc >> 2*pos; masked &= MASK6; - INCR_COUNT(counts[masked],inquery[masked]); + INCR_COUNT(counts[masked]); debug(printf("%d partial %04X => %d\n",pos,masked,counts[masked])); pos++; } @@ -30812,7 +21716,7 @@ masked = low_rc >> 2*pos; masked |= high_rc << (32 - 2*pos); masked &= MASK6; - INCR_COUNT(counts[masked],inquery[masked]); + INCR_COUNT(counts[masked]); debug(printf("%d partial %04X => %d\n",pos,masked,counts[masked])); pos++; } @@ -30820,7 +21724,7 @@ while (pos <= enddiscard && pos <= 26) { masked = high_rc >> (2*pos - 32); masked &= MASK6; - INCR_COUNT(counts[masked],inquery[masked]); + INCR_COUNT(counts[masked]); debug(printf("%d partial %04X => %d\n",pos,masked,counts[masked])); pos++; } @@ -30829,7 +21733,7 @@ masked = high_rc >> (2*pos - 32); masked |= nextlow_rc << (64 - 2*pos); masked &= MASK6; - INCR_COUNT(counts[masked],inquery[masked]); + INCR_COUNT(counts[masked]); debug(printf("%d partial %04X => %d\n",pos,masked,counts[masked])); pos++; } @@ -30838,7 +21742,7 @@ } static int -store_6mers_rev_partial (Chrpos_T chrpos, Chrpos_T *table, UINT4 *pointers, UINT4 *positions, Count_T *counts, +store_6mers_rev_partial (Chrpos_T chrpos, Chrpos_T *table, UINT4 *positions, Count_T *counts, Genomecomp_T low_rc, Genomecomp_T high_rc, Genomecomp_T nextlow_rc, int startdiscard, int enddiscard) { Genomecomp_T masked; @@ -30850,9 +21754,8 @@ masked = low_rc >> 2*pos; masked &= MASK6; if (counts[masked]) { - assert(pointers[masked] > positions[masked]); debug(printf("Storing masked %u at %u (partial)\n",masked,chrpos)); - table[--pointers[masked]] = chrpos; + table[positions[masked] + (--counts[masked])] = chrpos; } chrpos--; pos++; @@ -30863,9 +21766,8 @@ masked |= high_rc << (32 - 2*pos); masked &= MASK6; if (counts[masked]) { - assert(pointers[masked] > positions[masked]); debug(printf("Storing masked %u at %u (partial)\n",masked,chrpos)); - table[--pointers[masked]] = chrpos; + table[positions[masked] + (--counts[masked])] = chrpos; } chrpos--; pos++; @@ -30875,9 +21777,8 @@ masked = high_rc >> (2*pos - 32); masked &= MASK6; if (counts[masked]) { - assert(pointers[masked] > positions[masked]); debug(printf("Storing masked %u at %u (partial)\n",masked,chrpos)); - table[--pointers[masked]] = chrpos; + table[positions[masked] + (--counts[masked])] = chrpos; } chrpos--; pos++; @@ -30888,9 +21789,8 @@ masked |= nextlow_rc << (64 - 2*pos); masked &= MASK6; if (counts[masked]) { - assert(pointers[masked] > positions[masked]); debug(printf("Storing masked %u at %u (partial)\n",masked,chrpos)); - table[--pointers[masked]] = chrpos; + table[positions[masked] + (--counts[masked])] = chrpos; } chrpos--; pos++; @@ -30900,7 +21800,7 @@ } static void -count_5mers_rev_partial (Count_T *counts, Inquery_T *inquery, Genomecomp_T low_rc, Genomecomp_T high_rc, +count_5mers_rev_partial (Count_T *counts, Genomecomp_T low_rc, Genomecomp_T high_rc, Genomecomp_T nextlow_rc, int startdiscard, int enddiscard) { Genomecomp_T masked; int pos; @@ -30910,7 +21810,7 @@ while (pos <= enddiscard && pos <= 11) { masked = low_rc >> 2*pos; masked &= MASK5; - INCR_COUNT(counts[masked],inquery[masked]); + INCR_COUNT(counts[masked]); debug(printf("%d partial %04X => %d\n",pos,masked,counts[masked])); pos++; } @@ -30919,7 +21819,7 @@ masked = low_rc >> 2*pos; masked |= high_rc << (32 - 2*pos); masked &= MASK5; - INCR_COUNT(counts[masked],inquery[masked]); + INCR_COUNT(counts[masked]); debug(printf("%d partial %04X => %d\n",pos,masked,counts[masked])); pos++; } @@ -30927,7 +21827,7 @@ while (pos <= enddiscard && pos <= 27) { masked = high_rc >> (2*pos - 32); masked &= MASK5; - INCR_COUNT(counts[masked],inquery[masked]); + INCR_COUNT(counts[masked]); debug(printf("%d partial %04X => %d\n",pos,masked,counts[masked])); pos++; } @@ -30936,7 +21836,7 @@ masked = high_rc >> (2*pos - 32); masked |= nextlow_rc << (64 - 2*pos); masked &= MASK5; - INCR_COUNT(counts[masked],inquery[masked]); + INCR_COUNT(counts[masked]); debug(printf("%d partial %04X => %d\n",pos,masked,counts[masked])); pos++; } @@ -30945,7 +21845,7 @@ } static int -store_5mers_rev_partial (Chrpos_T chrpos, Chrpos_T *table, UINT4 *pointers, UINT4 *positions, Count_T *counts, +store_5mers_rev_partial (Chrpos_T chrpos, Chrpos_T *table, UINT4 *positions, Count_T *counts, Genomecomp_T low_rc, Genomecomp_T high_rc, Genomecomp_T nextlow_rc, int startdiscard, int enddiscard) { Genomecomp_T masked; @@ -30957,9 +21857,8 @@ masked = low_rc >> 2*pos; masked &= MASK5; if (counts[masked]) { - assert(pointers[masked] > positions[masked]); debug(printf("Storing masked %u at %u (partial)\n",masked,chrpos)); - table[--pointers[masked]] = chrpos; + table[positions[masked] + (--counts[masked])] = chrpos; } chrpos--; pos++; @@ -30970,9 +21869,8 @@ masked |= high_rc << (32 - 2*pos); masked &= MASK5; if (counts[masked]) { - assert(pointers[masked] > positions[masked]); debug(printf("Storing masked %u at %u (partial)\n",masked,chrpos)); - table[--pointers[masked]] = chrpos; + table[positions[masked] + (--counts[masked])] = chrpos; } chrpos--; pos++; @@ -30982,9 +21880,8 @@ masked = high_rc >> (2*pos - 32); masked &= MASK5; if (counts[masked]) { - assert(pointers[masked] > positions[masked]); debug(printf("Storing masked %u at %u (partial)\n",masked,chrpos)); - table[--pointers[masked]] = chrpos; + table[positions[masked] + (--counts[masked])] = chrpos; } chrpos--; pos++; @@ -30995,9 +21892,8 @@ masked |= nextlow_rc << (64 - 2*pos); masked &= MASK5; if (counts[masked]) { - assert(pointers[masked] > positions[masked]); debug(printf("Storing masked %u at %u (partial)\n",masked,chrpos)); - table[--pointers[masked]] = chrpos; + table[positions[masked] + (--counts[masked])] = chrpos; } chrpos--; pos++; @@ -31011,7 +21907,7 @@ #if !defined(HAVE_AVX2) static void -count_9mers_rev (Count_T *counts, Inquery_T *inquery, Genomecomp_T low_rc, Genomecomp_T high_rc, Genomecomp_T nextlow_rc) { +count_9mers_rev_32 (Count_T *counts, Genomecomp_T low_rc, Genomecomp_T high_rc, Genomecomp_T nextlow_rc) { Genomecomp_T masked, oligo; #ifdef INDIVIDUAL_SHIFTS #elif defined(SIMD_MASK_THEN_STORE) @@ -31024,35 +21920,35 @@ #ifdef INDIVIDUAL_SHIFTS masked = low_rc & MASK9; /* 0 */ - INCR_COUNT(counts[masked],inquery[masked]); + INCR_COUNT(counts[masked]); debug(printf("0 %04X => %d\n",masked,counts[masked])); masked = (low_rc >> 2) & MASK9; /* 1 */ - INCR_COUNT(counts[masked],inquery[masked]); + INCR_COUNT(counts[masked]); debug(printf("1 %04X => %d\n",masked,counts[masked])); masked = (low_rc >> 4) & MASK9; /* 2 */ - INCR_COUNT(counts[masked],inquery[masked]); + INCR_COUNT(counts[masked]); debug(printf("2 %04X => %d\n",masked,counts[masked])); masked = (low_rc >> 6) & MASK9; /* 3 */ - INCR_COUNT(counts[masked],inquery[masked]); + INCR_COUNT(counts[masked]); debug(printf("3 %04X => %d\n",masked,counts[masked])); masked = (low_rc >> 8) & MASK9; /* 4 */ - INCR_COUNT(counts[masked],inquery[masked]); + INCR_COUNT(counts[masked]); debug(printf("4 %04X => %d\n",masked,counts[masked])); masked = (low_rc >> 10) & MASK9; /* 5 */ - INCR_COUNT(counts[masked],inquery[masked]); + INCR_COUNT(counts[masked]); debug(printf("5 %04X => %d\n",masked,counts[masked])); masked = (low_rc >> 12) & MASK9; /* 6 */ - INCR_COUNT(counts[masked],inquery[masked]); + INCR_COUNT(counts[masked]); debug(printf("6 %04X => %d\n",masked,counts[masked])); masked = low_rc >> 14; /* 7, No mask necessary */ - INCR_COUNT(counts[masked],inquery[masked]); + INCR_COUNT(counts[masked]); debug(printf("7 %04X => %d\n",masked,counts[masked])); #else @@ -31064,19 +21960,19 @@ #endif masked = EXTRACT(_masked,0); - INCR_COUNT(counts[masked],inquery[masked]); + INCR_COUNT(counts[masked]); debug(printf("0 %04X => %d\n",masked,counts[masked])); masked = EXTRACT(_masked,1); - INCR_COUNT(counts[masked],inquery[masked]); + INCR_COUNT(counts[masked]); debug(printf("1 %04X => %d\n",masked,counts[masked])); masked = EXTRACT(_masked,2); - INCR_COUNT(counts[masked],inquery[masked]); + INCR_COUNT(counts[masked]); debug(printf("2 %04X => %d\n",masked,counts[masked])); masked = EXTRACT(_masked,3); - INCR_COUNT(counts[masked],inquery[masked]); + INCR_COUNT(counts[masked]); debug(printf("3 %04X => %d\n",masked,counts[masked])); @@ -31088,19 +21984,19 @@ #endif masked = EXTRACT(_masked,0); - INCR_COUNT(counts[masked],inquery[masked]); + INCR_COUNT(counts[masked]); debug(printf("4 %04X => %d\n",masked,counts[masked])); masked = EXTRACT(_masked,1); - INCR_COUNT(counts[masked],inquery[masked]); + INCR_COUNT(counts[masked]); debug(printf("5 %04X => %d\n",masked,counts[masked])); masked = EXTRACT(_masked,2); - INCR_COUNT(counts[masked],inquery[masked]); + INCR_COUNT(counts[masked]); debug(printf("6 %04X => %d\n",masked,counts[masked])); masked = EXTRACT(_masked,3); - INCR_COUNT(counts[masked],inquery[masked]); + INCR_COUNT(counts[masked]); debug(printf("7 %04X => %d\n",masked,counts[masked])); #endif @@ -31110,35 +22006,35 @@ #ifdef INDIVIDUAL_SHIFTS masked = oligo & MASK9; /* 8 */ - INCR_COUNT(counts[masked],inquery[masked]); + INCR_COUNT(counts[masked]); debug(printf("8 %04X => %d\n",masked,counts[masked])); masked = (oligo >> 2) & MASK9; /* 9 */ - INCR_COUNT(counts[masked],inquery[masked]); + INCR_COUNT(counts[masked]); debug(printf("9 %04X => %d\n",masked,counts[masked])); masked = (oligo >> 4) & MASK9; /* 10 */ - INCR_COUNT(counts[masked],inquery[masked]); + INCR_COUNT(counts[masked]); debug(printf("10 %04X => %d\n",masked,counts[masked])); masked = (oligo >> 6) & MASK9; /* 11 */ - INCR_COUNT(counts[masked],inquery[masked]); + INCR_COUNT(counts[masked]); debug(printf("11 %04X => %d\n",masked,counts[masked])); masked = (oligo >> 8) & MASK9; /* 12 */ - INCR_COUNT(counts[masked],inquery[masked]); + INCR_COUNT(counts[masked]); debug(printf("12 %04X => %d\n",masked,counts[masked])); masked = (oligo >> 10) & MASK9; /* 13 */ - INCR_COUNT(counts[masked],inquery[masked]); + INCR_COUNT(counts[masked]); debug(printf("13 %04X => %d\n",masked,counts[masked])); masked = (oligo >> 12) & MASK9; /* 14 */ - INCR_COUNT(counts[masked],inquery[masked]); + INCR_COUNT(counts[masked]); debug(printf("14 %04X => %d\n",masked,counts[masked])); masked = (oligo >> 14) & MASK9; /* 15 */ - INCR_COUNT(counts[masked],inquery[masked]); + INCR_COUNT(counts[masked]); debug(printf("15 %04X => %d\n",masked,counts[masked])); #else @@ -31150,19 +22046,19 @@ #endif masked = EXTRACT(_masked,0); - INCR_COUNT(counts[masked],inquery[masked]); + INCR_COUNT(counts[masked]); debug(printf("8 %04X => %d\n",masked,counts[masked])); masked = EXTRACT(_masked,1); - INCR_COUNT(counts[masked],inquery[masked]); + INCR_COUNT(counts[masked]); debug(printf("9 %04X => %d\n",masked,counts[masked])); masked = EXTRACT(_masked,2); - INCR_COUNT(counts[masked],inquery[masked]); + INCR_COUNT(counts[masked]); debug(printf("10 %04X => %d\n",masked,counts[masked])); masked = EXTRACT(_masked,3); - INCR_COUNT(counts[masked],inquery[masked]); + INCR_COUNT(counts[masked]); debug(printf("11 %04X => %d\n",masked,counts[masked])); @@ -31174,54 +22070,54 @@ #endif masked = EXTRACT(_masked,0); - INCR_COUNT(counts[masked],inquery[masked]); + INCR_COUNT(counts[masked]); debug(printf("12 %04X => %d\n",masked,counts[masked])); masked = EXTRACT(_masked,1); - INCR_COUNT(counts[masked],inquery[masked]); + INCR_COUNT(counts[masked]); debug(printf("13 %04X => %d\n",masked,counts[masked])); masked = EXTRACT(_masked,2); - INCR_COUNT(counts[masked],inquery[masked]); + INCR_COUNT(counts[masked]); debug(printf("14 %04X => %d\n",masked,counts[masked])); masked = EXTRACT(_masked,3); - INCR_COUNT(counts[masked],inquery[masked]); + INCR_COUNT(counts[masked]); debug(printf("15 %04X => %d\n",masked,counts[masked])); #endif #ifdef INDIVIDUAL_SHIFTS masked = high_rc & MASK9; /* 16 */ - INCR_COUNT(counts[masked],inquery[masked]); + INCR_COUNT(counts[masked]); debug(printf("16 %04X => %d\n",masked,counts[masked])); masked = (high_rc >> 2) & MASK9; /* 17 */ - INCR_COUNT(counts[masked],inquery[masked]); + INCR_COUNT(counts[masked]); debug(printf("17 %04X => %d\n",masked,counts[masked])); masked = (high_rc >> 4) & MASK9; /* 18 */ - INCR_COUNT(counts[masked],inquery[masked]); + INCR_COUNT(counts[masked]); debug(printf("18 %04X => %d\n",masked,counts[masked])); masked = (high_rc >> 6) & MASK9; /* 19 */ - INCR_COUNT(counts[masked],inquery[masked]); + INCR_COUNT(counts[masked]); debug(printf("19 %04X => %d\n",masked,counts[masked])); masked = (high_rc >> 8) & MASK9; /* 20 */ - INCR_COUNT(counts[masked],inquery[masked]); + INCR_COUNT(counts[masked]); debug(printf("20 %04X => %d\n",masked,counts[masked])); masked = (high_rc >> 10) & MASK9; /* 21 */ - INCR_COUNT(counts[masked],inquery[masked]); + INCR_COUNT(counts[masked]); debug(printf("21 %04X => %d\n",masked,counts[masked])); masked = (high_rc >> 12) & MASK9; /* 22 */ - INCR_COUNT(counts[masked],inquery[masked]); + INCR_COUNT(counts[masked]); debug(printf("22 %04X => %d\n",masked,counts[masked])); masked = high_rc >> 14; /* 23, No mask necessary */ - INCR_COUNT(counts[masked],inquery[masked]); + INCR_COUNT(counts[masked]); debug(printf("23 %04X => %d\n",masked,counts[masked])); #else @@ -31233,19 +22129,19 @@ #endif masked = EXTRACT(_masked,0); - INCR_COUNT(counts[masked],inquery[masked]); + INCR_COUNT(counts[masked]); debug(printf("16 %04X => %d\n",masked,counts[masked])); masked = EXTRACT(_masked,1); - INCR_COUNT(counts[masked],inquery[masked]); + INCR_COUNT(counts[masked]); debug(printf("17 %04X => %d\n",masked,counts[masked])); masked = EXTRACT(_masked,2); - INCR_COUNT(counts[masked],inquery[masked]); + INCR_COUNT(counts[masked]); debug(printf("18 %04X => %d\n",masked,counts[masked])); masked = EXTRACT(_masked,3); - INCR_COUNT(counts[masked],inquery[masked]); + INCR_COUNT(counts[masked]); debug(printf("19 %04X => %d\n",masked,counts[masked])); @@ -31257,19 +22153,19 @@ #endif masked = EXTRACT(_masked,0); - INCR_COUNT(counts[masked],inquery[masked]); + INCR_COUNT(counts[masked]); debug(printf("20 %04X => %d\n",masked,counts[masked])); masked = EXTRACT(_masked,1); - INCR_COUNT(counts[masked],inquery[masked]); + INCR_COUNT(counts[masked]); debug(printf("21 %04X => %d\n",masked,counts[masked])); masked = EXTRACT(_masked,2); - INCR_COUNT(counts[masked],inquery[masked]); + INCR_COUNT(counts[masked]); debug(printf("22 %04X => %d\n",masked,counts[masked])); masked = EXTRACT(_masked,3); - INCR_COUNT(counts[masked],inquery[masked]); + INCR_COUNT(counts[masked]); debug(printf("23 %04X => %d\n",masked,counts[masked])); #endif @@ -31279,35 +22175,35 @@ #ifdef INDIVIDUAL_SHIFTS masked = oligo & MASK9; /* 24 */ - INCR_COUNT(counts[masked],inquery[masked]); + INCR_COUNT(counts[masked]); debug(printf("24 %04X => %d\n",masked,counts[masked])); masked = (oligo >> 2) & MASK9; /* 25 */ - INCR_COUNT(counts[masked],inquery[masked]); + INCR_COUNT(counts[masked]); debug(printf("25 %04X => %d\n",masked,counts[masked])); masked = (oligo >> 4) & MASK9; /* 26 */ - INCR_COUNT(counts[masked],inquery[masked]); + INCR_COUNT(counts[masked]); debug(printf("26 %04X => %d\n",masked,counts[masked])); masked = (oligo >> 6) & MASK9; /* 27 */ - INCR_COUNT(counts[masked],inquery[masked]); + INCR_COUNT(counts[masked]); debug(printf("27 %04X => %d\n",masked,counts[masked])); masked = (oligo >> 8) & MASK9; /* 28 */ - INCR_COUNT(counts[masked],inquery[masked]); + INCR_COUNT(counts[masked]); debug(printf("28 %04X => %d\n",masked,counts[masked])); masked = (oligo >> 10) & MASK9; /* 29 */ - INCR_COUNT(counts[masked],inquery[masked]); + INCR_COUNT(counts[masked]); debug(printf("29 %04X => %d\n",masked,counts[masked])); masked = (oligo >> 12) & MASK9; /* 30 */ - INCR_COUNT(counts[masked],inquery[masked]); + INCR_COUNT(counts[masked]); debug(printf("30 %04X => %d\n",masked,counts[masked])); masked = (oligo >> 14) & MASK9; /* 31 */ - INCR_COUNT(counts[masked],inquery[masked]); + INCR_COUNT(counts[masked]); debug(printf("31 %04X => %d\n",masked,counts[masked])); #else @@ -31319,19 +22215,19 @@ #endif masked = EXTRACT(_masked,0); - INCR_COUNT(counts[masked],inquery[masked]); + INCR_COUNT(counts[masked]); debug(printf("24 %04X => %d\n",masked,counts[masked])); masked = EXTRACT(_masked,1); - INCR_COUNT(counts[masked],inquery[masked]); + INCR_COUNT(counts[masked]); debug(printf("25 %04X => %d\n",masked,counts[masked])); masked = EXTRACT(_masked,2); - INCR_COUNT(counts[masked],inquery[masked]); + INCR_COUNT(counts[masked]); debug(printf("26 %04X => %d\n",masked,counts[masked])); masked = EXTRACT(_masked,3); - INCR_COUNT(counts[masked],inquery[masked]); + INCR_COUNT(counts[masked]); debug(printf("27 %04X => %d\n",masked,counts[masked])); @@ -31343,19 +22239,19 @@ #endif masked = EXTRACT(_masked,0); - INCR_COUNT(counts[masked],inquery[masked]); + INCR_COUNT(counts[masked]); debug(printf("28 %04X => %d\n",masked,counts[masked])); masked = EXTRACT(_masked,1); - INCR_COUNT(counts[masked],inquery[masked]); + INCR_COUNT(counts[masked]); debug(printf("29 %04X => %d\n",masked,counts[masked])); masked = EXTRACT(_masked,2); - INCR_COUNT(counts[masked],inquery[masked]); + INCR_COUNT(counts[masked]); debug(printf("30 %04X => %d\n",masked,counts[masked])); masked = EXTRACT(_masked,3); - INCR_COUNT(counts[masked],inquery[masked]); + INCR_COUNT(counts[masked]); debug(printf("31 %04X => %d\n",masked,counts[masked])); #endif @@ -31365,295 +22261,160 @@ #else /* HAVE_AVX2 */ static void -count_9mers_rev (Count_T *counts, Inquery_T *inquery, Genomecomp_T low_rc, Genomecomp_T high_rc, Genomecomp_T nextlow_rc) { +count_9mers_rev_32 (Count_T *counts, Genomecomp_T low_rc, Genomecomp_T high_rc, Genomecomp_T nextlow_rc) { Genomecomp_T masked, oligo; __m256i _oligo, _masked; -#ifdef CHECK_FOR_OVERFLOW - __m256i _counts_after, _counts_neg; -#endif - _oligo = _mm256_srlv_epi32(_mm256_set1_epi32(low_rc),shift0to14); + _oligo = _mm256_srlv_epi32(_mm256_set1_epi32(low_rc),bigshift0to14); _masked = _mm256_and_si256(_oligo, bigmask9); masked = EXTRACT256(_masked,0); - counts[masked] += 1; + INCR_COUNT(counts[masked]); debug(printf("0 %04X => %d\n",masked,counts[masked])); masked = EXTRACT256(_masked,1); - counts[masked] += 1; + INCR_COUNT(counts[masked]); debug(printf("1 %04X => %d\n",masked,counts[masked])); masked = EXTRACT256(_masked,2); - counts[masked] += 1; + INCR_COUNT(counts[masked]); debug(printf("2 %04X => %d\n",masked,counts[masked])); masked = EXTRACT256(_masked,3); - counts[masked] += 1; + INCR_COUNT(counts[masked]); debug(printf("3 %04X => %d\n",masked,counts[masked])); masked = EXTRACT256(_masked,4); - counts[masked] += 1; + INCR_COUNT(counts[masked]); debug(printf("4 %04X => %d\n",masked,counts[masked])); masked = EXTRACT256(_masked,5); - counts[masked] += 1; + INCR_COUNT(counts[masked]); debug(printf("5 %04X => %d\n",masked,counts[masked])); masked = EXTRACT256(_masked,6); - counts[masked] += 1; + INCR_COUNT(counts[masked]); debug(printf("6 %04X => %d\n",masked,counts[masked])); masked = EXTRACT256(_masked,7); - counts[masked] += 1; + INCR_COUNT(counts[masked]); debug(printf("7 %04X => %d\n",masked,counts[masked])); -#ifdef CHECK_FOR_OVERFLOW - _counts_after = _mm256_i32gather_epi32(counts,_masked,/*shift*/4); - /* _counts_after = _mm256_and_si256(_counts_after,low8); */ - _counts_neg = _mm256_cmpgt_epi32(_counts_after,maxcount256); - if (_mm256_testz_si256(_counts_neg,_counts_neg) == 0) { - debug(printf("Exceeded maximum counts\n")); - if (EXTRACT256(_counts_neg,0)) { - inquery[EXTRACT256(_masked,0)] = INQUERY_FALSE; - } - if (EXTRACT256(_counts_neg,1)) { - inquery[EXTRACT256(_masked,1)] = INQUERY_FALSE; - } - if (EXTRACT256(_counts_neg,2)) { - inquery[EXTRACT256(_masked,2)] = INQUERY_FALSE; - } - if (EXTRACT256(_counts_neg,3)) { - inquery[EXTRACT256(_masked,3)] = INQUERY_FALSE; - } - if (EXTRACT256(_counts_neg,4)) { - inquery[EXTRACT256(_masked,4)] = INQUERY_FALSE; - } - if (EXTRACT256(_counts_neg,5)) { - inquery[EXTRACT256(_masked,5)] = INQUERY_FALSE; - } - if (EXTRACT256(_counts_neg,6)) { - inquery[EXTRACT256(_masked,6)] = INQUERY_FALSE; - } - if (EXTRACT256(_counts_neg,7)) { - inquery[EXTRACT256(_masked,7)] = INQUERY_FALSE; - } - } -#endif /* CHECK_FOR_OVERFLOW */ - oligo = low_rc >> 16; /* For 15..8 */ oligo |= high_rc << 16; - _oligo = _mm256_srlv_epi32(_mm256_set1_epi32(oligo),shift0to14); + _oligo = _mm256_srlv_epi32(_mm256_set1_epi32(oligo),bigshift0to14); _masked = _mm256_and_si256(_oligo, bigmask9); masked = EXTRACT256(_masked,0); - counts[masked] += 1; + INCR_COUNT(counts[masked]); debug(printf("8 %04X => %d\n",masked,counts[masked])); masked = EXTRACT256(_masked,1); - counts[masked] += 1; + INCR_COUNT(counts[masked]); debug(printf("9 %04X => %d\n",masked,counts[masked])); masked = EXTRACT256(_masked,2); - counts[masked] += 1; + INCR_COUNT(counts[masked]); debug(printf("10 %04X => %d\n",masked,counts[masked])); masked = EXTRACT256(_masked,3); - counts[masked] += 1; + INCR_COUNT(counts[masked]); debug(printf("11 %04X => %d\n",masked,counts[masked])); masked = EXTRACT256(_masked,4); - counts[masked] += 1; + INCR_COUNT(counts[masked]); debug(printf("12 %04X => %d\n",masked,counts[masked])); masked = EXTRACT256(_masked,5); - counts[masked] += 1; + INCR_COUNT(counts[masked]); debug(printf("13 %04X => %d\n",masked,counts[masked])); masked = EXTRACT256(_masked,6); - counts[masked] += 1; + INCR_COUNT(counts[masked]); debug(printf("14 %04X => %d\n",masked,counts[masked])); masked = EXTRACT256(_masked,7); - counts[masked] += 1; + INCR_COUNT(counts[masked]); debug(printf("15 %04X => %d\n",masked,counts[masked])); -#ifdef CHECK_FOR_OVERFLOW - _counts_after = _mm256_i32gather_epi32(counts,_masked,/*shift*/4); - /* _counts_after = _mm256_and_si256(_counts_after,low8); */ - _counts_neg = _mm256_cmpgt_epi32(_counts_after,maxcount256); - if (_mm256_testz_si256(_counts_neg,_counts_neg) == 0) { - debug(printf("Exceeded maximum counts\n")); - if (EXTRACT256(_counts_neg,0)) { - inquery[EXTRACT256(_masked,0)] = INQUERY_FALSE; - } - if (EXTRACT256(_counts_neg,1)) { - inquery[EXTRACT256(_masked,1)] = INQUERY_FALSE; - } - if (EXTRACT256(_counts_neg,2)) { - inquery[EXTRACT256(_masked,2)] = INQUERY_FALSE; - } - if (EXTRACT256(_counts_neg,3)) { - inquery[EXTRACT256(_masked,3)] = INQUERY_FALSE; - } - if (EXTRACT256(_counts_neg,4)) { - inquery[EXTRACT256(_masked,4)] = INQUERY_FALSE; - } - if (EXTRACT256(_counts_neg,5)) { - inquery[EXTRACT256(_masked,5)] = INQUERY_FALSE; - } - if (EXTRACT256(_counts_neg,6)) { - inquery[EXTRACT256(_masked,6)] = INQUERY_FALSE; - } - if (EXTRACT256(_counts_neg,7)) { - inquery[EXTRACT256(_masked,7)] = INQUERY_FALSE; - } - } -#endif /* CHECK_FOR_OVERFLOW */ - - _oligo = _mm256_srlv_epi32(_mm256_set1_epi32(high_rc),shift0to14); + _oligo = _mm256_srlv_epi32(_mm256_set1_epi32(high_rc),bigshift0to14); _masked = _mm256_and_si256(_oligo, bigmask9); masked = EXTRACT256(_masked,0); - counts[masked] += 1; + INCR_COUNT(counts[masked]); debug(printf("16 %04X => %d\n",masked,counts[masked])); masked = EXTRACT256(_masked,1); - counts[masked] += 1; + INCR_COUNT(counts[masked]); debug(printf("17 %04X => %d\n",masked,counts[masked])); masked = EXTRACT256(_masked,2); - counts[masked] += 1; + INCR_COUNT(counts[masked]); debug(printf("18 %04X => %d\n",masked,counts[masked])); masked = EXTRACT256(_masked,3); - counts[masked] += 1; + INCR_COUNT(counts[masked]); debug(printf("19 %04X => %d\n",masked,counts[masked])); masked = EXTRACT256(_masked,4); - counts[masked] += 1; + INCR_COUNT(counts[masked]); debug(printf("20 %04X => %d\n",masked,counts[masked])); masked = EXTRACT256(_masked,5); - counts[masked] += 1; + INCR_COUNT(counts[masked]); debug(printf("21 %04X => %d\n",masked,counts[masked])); masked = EXTRACT256(_masked,6); - counts[masked] += 1; + INCR_COUNT(counts[masked]); debug(printf("22 %04X => %d\n",masked,counts[masked])); masked = EXTRACT256(_masked,7); - counts[masked] += 1; + INCR_COUNT(counts[masked]); debug(printf("23 %04X => %d\n",masked,counts[masked])); -#ifdef CHECK_FOR_OVERFLOW - _counts_after = _mm256_i32gather_epi32(counts,_masked,/*shift*/4); - /* _counts_after = _mm256_and_si256(_counts_after,low8); */ - _counts_neg = _mm256_cmpgt_epi32(_counts_after,maxcount256); - if (_mm256_testz_si256(_counts_neg,_counts_neg) == 0) { - debug(printf("Exceeded maximum counts\n")); - if (EXTRACT256(_counts_neg,0)) { - inquery[EXTRACT256(_masked,0)] = INQUERY_FALSE; - } - if (EXTRACT256(_counts_neg,1)) { - inquery[EXTRACT256(_masked,1)] = INQUERY_FALSE; - } - if (EXTRACT256(_counts_neg,2)) { - inquery[EXTRACT256(_masked,2)] = INQUERY_FALSE; - } - if (EXTRACT256(_counts_neg,3)) { - inquery[EXTRACT256(_masked,3)] = INQUERY_FALSE; - } - if (EXTRACT256(_counts_neg,4)) { - inquery[EXTRACT256(_masked,4)] = INQUERY_FALSE; - } - if (EXTRACT256(_counts_neg,5)) { - inquery[EXTRACT256(_masked,5)] = INQUERY_FALSE; - } - if (EXTRACT256(_counts_neg,6)) { - inquery[EXTRACT256(_masked,6)] = INQUERY_FALSE; - } - if (EXTRACT256(_counts_neg,7)) { - inquery[EXTRACT256(_masked,7)] = INQUERY_FALSE; - } - } -#endif /* CHECK_FOR_OVERFLOW */ - oligo = high_rc >> 16; /* For 31..24 */ oligo |= nextlow_rc << 16; - _oligo = _mm256_srlv_epi32(_mm256_set1_epi32(oligo),shift0to14); + _oligo = _mm256_srlv_epi32(_mm256_set1_epi32(oligo),bigshift0to14); _masked = _mm256_and_si256(_oligo, bigmask9); masked = EXTRACT256(_masked,0); - counts[masked] += 1; + INCR_COUNT(counts[masked]); debug(printf("24 %04X => %d\n",masked,counts[masked])); masked = EXTRACT256(_masked,1); - counts[masked] += 1; + INCR_COUNT(counts[masked]); debug(printf("25 %04X => %d\n",masked,counts[masked])); masked = EXTRACT256(_masked,2); - counts[masked] += 1; + INCR_COUNT(counts[masked]); debug(printf("26 %04X => %d\n",masked,counts[masked])); masked = EXTRACT256(_masked,3); - counts[masked] += 1; + INCR_COUNT(counts[masked]); debug(printf("27 %04X => %d\n",masked,counts[masked])); masked = EXTRACT256(_masked,4); - counts[masked] += 1; + INCR_COUNT(counts[masked]); debug(printf("28 %04X => %d\n",masked,counts[masked])); masked = EXTRACT256(_masked,5); - counts[masked] += 1; + INCR_COUNT(counts[masked]); debug(printf("29 %04X => %d\n",masked,counts[masked])); masked = EXTRACT256(_masked,6); - counts[masked] += 1; + INCR_COUNT(counts[masked]); debug(printf("30 %04X => %d\n",masked,counts[masked])); masked = EXTRACT256(_masked,7); - counts[masked] += 1; + INCR_COUNT(counts[masked]); debug(printf("31 %04X => %d\n",masked,counts[masked])); -#ifdef CHECK_FOR_OVERFLOW - _counts_after = _mm256_i32gather_epi32(counts,_masked,/*shift*/4); - /* _counts_after = _mm256_and_si256(_counts_after,low8); */ - _counts_neg = _mm256_cmpgt_epi32(_counts_after,maxcount256); - if (_mm256_testz_si256(_counts_neg,_counts_neg) == 0) { - debug(printf("Exceeded maximum counts\n")); - if (EXTRACT256(_counts_neg,0)) { - inquery[EXTRACT256(_masked,0)] = INQUERY_FALSE; - } - if (EXTRACT256(_counts_neg,1)) { - inquery[EXTRACT256(_masked,1)] = INQUERY_FALSE; - } - if (EXTRACT256(_counts_neg,2)) { - inquery[EXTRACT256(_masked,2)] = INQUERY_FALSE; - } - if (EXTRACT256(_counts_neg,3)) { - inquery[EXTRACT256(_masked,3)] = INQUERY_FALSE; - } - if (EXTRACT256(_counts_neg,4)) { - inquery[EXTRACT256(_masked,4)] = INQUERY_FALSE; - } - if (EXTRACT256(_counts_neg,5)) { - inquery[EXTRACT256(_masked,5)] = INQUERY_FALSE; - } - if (EXTRACT256(_counts_neg,6)) { - inquery[EXTRACT256(_masked,6)] = INQUERY_FALSE; - } - if (EXTRACT256(_counts_neg,7)) { - inquery[EXTRACT256(_masked,7)] = INQUERY_FALSE; - } - } -#endif /* CHECK_FOR_OVERFLOW */ - return; } @@ -31663,9 +22424,9 @@ /* Expecting current to have {low0_rc, high0_rc, low1_rc, high1_rc}, and next to have {high0_rc, low1_rc, high1_rc, nextlow_rc} */ -#ifdef USE_SIMD_FOR_COUNTS +#ifdef HAVE_SSE2 static void -extract_9mers_rev_simd (__m128i *out, __m128i current, __m128i next) { +extract_9mers_rev_simd_64 (__m128i *out, __m128i current, __m128i next) { __m128i oligo; oligo = _mm_or_si128( _mm_srli_epi32(current,16), _mm_slli_epi32(next,16)); @@ -31690,6 +22451,93 @@ return; } +#ifdef USE_UNORDERED_9 +static Chrpos_T +store_9mers_rev_simd_64 (Chrpos_T chrpos, Chrpos_T *table, UINT4 *positions, Count_T *counts, + __m128i current, __m128i next) { + __m128i array[16]; + + extract_9mers_rev_simd_64(array,current,next); + return store_fwdrev_simd_64(chrpos,table,positions,counts,(UINT4 *) array); +} + +#else +/* Includes extract_9mers_rev_simd_64_ordered (__m128i *out, __m128i current, __m128i next) */ +static Chrpos_T +store_9mers_rev_simd_64 (Chrpos_T chrpos, Chrpos_T *table, UINT4 *positions, Count_T *counts, + __m128i current, __m128i next) { + __m128i array[16], *out; + __m128i oligo; + __m128i _row0, _row1, _row2, _row3, _row4, _row5, _row6, _row7, + _row8, _row9, _row10, _row11, _row12, _row13, _row14, _row15; + __m128i _t0, _t1, _t2, _t3, _t4, _t5, _t6, _t7; + + out = &(array[0]); + + _row0 = _mm_and_si128( current, mask9); + _row1 = _mm_and_si128( _mm_srli_epi32(current,2), mask9); + _row2 = _mm_and_si128( _mm_srli_epi32(current,4), mask9); + _row3 = _mm_and_si128( _mm_srli_epi32(current,6), mask9); + _row4 = _mm_and_si128( _mm_srli_epi32(current,8), mask9); + _row5 = _mm_and_si128( _mm_srli_epi32(current,10), mask9); + _row6 = _mm_and_si128( _mm_srli_epi32(current,12), mask9); + _row7 = _mm_srli_epi32(current,14); /* No mask necessary */; + + oligo = _mm_or_si128( _mm_srli_epi32(current,16), _mm_slli_epi32(next,16)); + _row8 = _mm_and_si128( oligo, mask9); + _row9 = _mm_and_si128( _mm_srli_epi32(oligo,2), mask9); + _row10 = _mm_and_si128( _mm_srli_epi32(oligo,4), mask9); + _row11 = _mm_and_si128( _mm_srli_epi32(oligo,6), mask9); + _row12 = _mm_and_si128( _mm_srli_epi32(oligo,8), mask9); + _row13 = _mm_and_si128( _mm_srli_epi32(oligo,10), mask9); + _row14 = _mm_and_si128( _mm_srli_epi32(oligo,12), mask9); + _row15 = _mm_and_si128( _mm_srli_epi32(oligo,14), mask9); + + + /* Split: top half */ + _t0 = _mm_unpackhi_epi32(_row0,_row1); + _t1 = _mm_unpackhi_epi32(_row2,_row3); + _t2 = _mm_unpackhi_epi32(_row4,_row5); + _t3 = _mm_unpackhi_epi32(_row6,_row7); + _t4 = _mm_unpackhi_epi32(_row8,_row9); + _t5 = _mm_unpackhi_epi32(_row10,_row11); + _t6 = _mm_unpackhi_epi32(_row12,_row13); + _t7 = _mm_unpackhi_epi32(_row14,_row15); + + _mm_store_si128(out++, _mm_unpackhi_epi64(_t0,_t1)); + _mm_store_si128(out++, _mm_unpackhi_epi64(_t2,_t3)); + _mm_store_si128(out++, _mm_unpackhi_epi64(_t4,_t5)); + _mm_store_si128(out++, _mm_unpackhi_epi64(_t6,_t7)); + _mm_store_si128(out++, _mm_unpacklo_epi64(_t0,_t1)); + _mm_store_si128(out++, _mm_unpacklo_epi64(_t2,_t3)); + _mm_store_si128(out++, _mm_unpacklo_epi64(_t4,_t5)); + _mm_store_si128(out++, _mm_unpacklo_epi64(_t6,_t7)); + + + /* Split: bottom half */ + _t0 = _mm_unpacklo_epi32(_row0,_row1); + _t1 = _mm_unpacklo_epi32(_row2,_row3); + _t2 = _mm_unpacklo_epi32(_row4,_row5); + _t3 = _mm_unpacklo_epi32(_row6,_row7); + _t4 = _mm_unpacklo_epi32(_row8,_row9); + _t5 = _mm_unpacklo_epi32(_row10,_row11); + _t6 = _mm_unpacklo_epi32(_row12,_row13); + _t7 = _mm_unpacklo_epi32(_row14,_row15); + + _mm_store_si128(out++, _mm_unpackhi_epi64(_t0,_t1)); + _mm_store_si128(out++, _mm_unpackhi_epi64(_t2,_t3)); + _mm_store_si128(out++, _mm_unpackhi_epi64(_t4,_t5)); + _mm_store_si128(out++, _mm_unpackhi_epi64(_t6,_t7)); + _mm_store_si128(out++, _mm_unpacklo_epi64(_t0,_t1)); + _mm_store_si128(out++, _mm_unpacklo_epi64(_t2,_t3)); + _mm_store_si128(out++, _mm_unpacklo_epi64(_t4,_t5)); + _mm_store_si128(out++, _mm_unpacklo_epi64(_t6,_t7)); + + return store_fwdrev_simd_64_ordered(chrpos,table,positions,counts,(UINT4 *) array); +} +#endif +#endif + #ifdef HAVE_AVX2 static void extract_9mers_rev_simd_128 (__m256i *out, __m256i current, __m256i next) { @@ -31716,1553 +22564,279 @@ return; } -#endif - - -static void -count_9mers_rev_simd (Count_T *counts, Inquery_T *inquery, __m128i current, __m128i next) { - __m128i oligo; -#ifdef HAVE_SSE4_1 - __m128i array; -#else - Genomecomp_T array[4]; -#endif -#if defined(HAVE_AVX2) && defined(CHECK_FOR_OVERFLOW) - __m128i _counts_after, _counts_neg; -#endif - - oligo = _mm_or_si128( _mm_srli_epi32(current,16), _mm_slli_epi32(next,16)); -#ifdef HAVE_SSE4_1 - array = _mm_and_si128( _mm_srli_epi32(oligo,14), mask9); -#else - _mm_store_si128((__m128i *) array, _mm_and_si128( _mm_srli_epi32(oligo,14), mask9)); -#endif -#ifdef HAVE_AVX2 - counts[EXTRACT(array,0)] += 1; - counts[EXTRACT(array,1)] += 1; - counts[EXTRACT(array,2)] += 1; - counts[EXTRACT(array,3)] += 1; -#ifdef CHECK_FOR_OVERFLOW - _counts_after = _mm_i32gather_epi32(counts,array,/*scale*/4); - _counts_neg = _mm_cmpgt_epi32(_counts_after,maxcount128); - if (_mm_testz_si128(_counts_neg,_counts_neg) == 0) { - debug(printf("Exceeded maximum counts\n")); - if (EXTRACT(_counts_neg,0)) { - inquery[EXTRACT(array,0)] = INQUERY_FALSE; - } - if (EXTRACT(_counts_neg,1)) { - inquery[EXTRACT(array,1)] = INQUERY_FALSE; - } - if (EXTRACT(_counts_neg,2)) { - inquery[EXTRACT(array,2)] = INQUERY_FALSE; - } - if (EXTRACT(_counts_neg,3)) { - inquery[EXTRACT(array,3)] = INQUERY_FALSE; - } - } -#endif /* CHECK_FOR_OVERFLOW */ - debug(printf("63 %04X => %d\n",array[0],counts[array[0]])); - debug(printf("47 %04X => %d\n",array[1],counts[array[1]])); - debug(printf("31 %04X => %d\n",array[2],counts[array[2]])); - debug(printf("15 %04X => %d\n",array[3],counts[array[3]])); - -#else - INCR_COUNT(counts[EXTRACT(array,0)],inquery[EXTRACT(array,0)]); /* 63 */ - INCR_COUNT(counts[EXTRACT(array,1)],inquery[EXTRACT(array,1)]); /* 47 */ - INCR_COUNT(counts[EXTRACT(array,2)],inquery[EXTRACT(array,2)]); /* 31 */ - INCR_COUNT(counts[EXTRACT(array,3)],inquery[EXTRACT(array,3)]); /* 15 */ -#endif - debug(printf("63 %04X => %d\n",EXTRACT(array,0),counts[EXTRACT(array,0)])); - debug(printf("47 %04X => %d\n",EXTRACT(array,1),counts[EXTRACT(array,1)])); - debug(printf("31 %04X => %d\n",EXTRACT(array,2),counts[EXTRACT(array,2)])); - debug(printf("15 %04X => %d\n",EXTRACT(array,3),counts[EXTRACT(array,3)])); - -#ifdef HAVE_SSE4_1 - array = _mm_and_si128( _mm_srli_epi32(oligo,12), mask9); -#else - _mm_store_si128((__m128i *) array, _mm_and_si128( _mm_srli_epi32(oligo,12), mask9)); -#endif -#ifdef HAVE_AVX2 - counts[EXTRACT(array,0)] += 1; - counts[EXTRACT(array,1)] += 1; - counts[EXTRACT(array,2)] += 1; - counts[EXTRACT(array,3)] += 1; -#ifdef CHECK_FOR_OVERFLOW - _counts_after = _mm_i32gather_epi32(counts,array,/*scale*/4); - _counts_neg = _mm_cmpgt_epi32(_counts_after,maxcount128); - if (_mm_testz_si128(_counts_neg,_counts_neg) == 0) { - debug(printf("Exceeded maximum counts\n")); - if (EXTRACT(_counts_neg,0)) { - inquery[EXTRACT(array,0)] = INQUERY_FALSE; - } - if (EXTRACT(_counts_neg,1)) { - inquery[EXTRACT(array,1)] = INQUERY_FALSE; - } - if (EXTRACT(_counts_neg,2)) { - inquery[EXTRACT(array,2)] = INQUERY_FALSE; - } - if (EXTRACT(_counts_neg,3)) { - inquery[EXTRACT(array,3)] = INQUERY_FALSE; - } - } -#endif /* CHECK_FOR_OVERFLOW */ - debug(printf("62 %04X => %d\n",array[0],counts[array[0]])); - debug(printf("46 %04X => %d\n",array[1],counts[array[1]])); - debug(printf("30 %04X => %d\n",array[2],counts[array[2]])); - debug(printf("14 %04X => %d\n",array[3],counts[array[3]])); - -#else - INCR_COUNT(counts[EXTRACT(array,0)],inquery[EXTRACT(array,0)]); /* 62 */ - INCR_COUNT(counts[EXTRACT(array,1)],inquery[EXTRACT(array,1)]); /* 46 */ - INCR_COUNT(counts[EXTRACT(array,2)],inquery[EXTRACT(array,2)]); /* 30 */ - INCR_COUNT(counts[EXTRACT(array,3)],inquery[EXTRACT(array,3)]); /* 14 */ -#endif - debug(printf("62 %04X => %d\n",EXTRACT(array,0),counts[EXTRACT(array,0)])); - debug(printf("46 %04X => %d\n",EXTRACT(array,1),counts[EXTRACT(array,1)])); - debug(printf("30 %04X => %d\n",EXTRACT(array,2),counts[EXTRACT(array,2)])); - debug(printf("14 %04X => %d\n",EXTRACT(array,3),counts[EXTRACT(array,3)])); - -#ifdef HAVE_SSE4_1 - array = _mm_and_si128( _mm_srli_epi32(oligo,10), mask9); -#else - _mm_store_si128((__m128i *) array, _mm_and_si128( _mm_srli_epi32(oligo,10), mask9)); -#endif -#ifdef HAVE_AVX2 - counts[EXTRACT(array,0)] += 1; - counts[EXTRACT(array,1)] += 1; - counts[EXTRACT(array,2)] += 1; - counts[EXTRACT(array,3)] += 1; -#ifdef CHECK_FOR_OVERFLOW - _counts_after = _mm_i32gather_epi32(counts,array,/*scale*/4); - _counts_neg = _mm_cmpgt_epi32(_counts_after,maxcount128); - if (_mm_testz_si128(_counts_neg,_counts_neg) == 0) { - debug(printf("Exceeded maximum counts\n")); - if (EXTRACT(_counts_neg,0)) { - inquery[EXTRACT(array,0)] = INQUERY_FALSE; - } - if (EXTRACT(_counts_neg,1)) { - inquery[EXTRACT(array,1)] = INQUERY_FALSE; - } - if (EXTRACT(_counts_neg,2)) { - inquery[EXTRACT(array,2)] = INQUERY_FALSE; - } - if (EXTRACT(_counts_neg,3)) { - inquery[EXTRACT(array,3)] = INQUERY_FALSE; - } - } -#endif /* CHECK_FOR_OVERFLOW */ - debug(printf("61 %04X => %d\n",array[0],counts[array[0]])); - debug(printf("45 %04X => %d\n",array[1],counts[array[1]])); - debug(printf("29 %04X => %d\n",array[2],counts[array[2]])); - debug(printf("13 %04X => %d\n",array[3],counts[array[3]])); - -#else - INCR_COUNT(counts[EXTRACT(array,0)],inquery[EXTRACT(array,0)]); /* 61 */ - INCR_COUNT(counts[EXTRACT(array,1)],inquery[EXTRACT(array,1)]); /* 45 */ - INCR_COUNT(counts[EXTRACT(array,2)],inquery[EXTRACT(array,2)]); /* 29 */ - INCR_COUNT(counts[EXTRACT(array,3)],inquery[EXTRACT(array,3)]); /* 13 */ -#endif - debug(printf("61 %04X => %d\n",EXTRACT(array,0),counts[EXTRACT(array,0)])); - debug(printf("45 %04X => %d\n",EXTRACT(array,1),counts[EXTRACT(array,1)])); - debug(printf("29 %04X => %d\n",EXTRACT(array,2),counts[EXTRACT(array,2)])); - debug(printf("13 %04X => %d\n",EXTRACT(array,3),counts[EXTRACT(array,3)])); - -#ifdef HAVE_SSE4_1 - array = _mm_and_si128( _mm_srli_epi32(oligo,8), mask9); -#else - _mm_store_si128((__m128i *) array, _mm_and_si128( _mm_srli_epi32(oligo,8), mask9)); -#endif -#ifdef HAVE_AVX2 - counts[EXTRACT(array,0)] += 1; - counts[EXTRACT(array,1)] += 1; - counts[EXTRACT(array,2)] += 1; - counts[EXTRACT(array,3)] += 1; -#ifdef CHECK_FOR_OVERFLOW - _counts_after = _mm_i32gather_epi32(counts,array,/*scale*/4); - _counts_neg = _mm_cmpgt_epi32(_counts_after,maxcount128); - if (_mm_testz_si128(_counts_neg,_counts_neg) == 0) { - debug(printf("Exceeded maximum counts\n")); - if (EXTRACT(_counts_neg,0)) { - inquery[EXTRACT(array,0)] = INQUERY_FALSE; - } - if (EXTRACT(_counts_neg,1)) { - inquery[EXTRACT(array,1)] = INQUERY_FALSE; - } - if (EXTRACT(_counts_neg,2)) { - inquery[EXTRACT(array,2)] = INQUERY_FALSE; - } - if (EXTRACT(_counts_neg,3)) { - inquery[EXTRACT(array,3)] = INQUERY_FALSE; - } - } -#endif /* CHECK_FOR_OVERFLOW */ - debug(printf("60 %04X => %d\n",array[0],counts[array[0]])); - debug(printf("44 %04X => %d\n",array[1],counts[array[1]])); - debug(printf("28 %04X => %d\n",array[2],counts[array[2]])); - debug(printf("12 %04X => %d\n",array[3],counts[array[3]])); - -#else - INCR_COUNT(counts[EXTRACT(array,0)],inquery[EXTRACT(array,0)]); /* 60 */ - INCR_COUNT(counts[EXTRACT(array,1)],inquery[EXTRACT(array,1)]); /* 44 */ - INCR_COUNT(counts[EXTRACT(array,2)],inquery[EXTRACT(array,2)]); /* 28 */ - INCR_COUNT(counts[EXTRACT(array,3)],inquery[EXTRACT(array,3)]); /* 12 */ -#endif - debug(printf("60 %04X => %d\n",EXTRACT(array,0),counts[EXTRACT(array,0)])); - debug(printf("44 %04X => %d\n",EXTRACT(array,1),counts[EXTRACT(array,1)])); - debug(printf("28 %04X => %d\n",EXTRACT(array,2),counts[EXTRACT(array,2)])); - debug(printf("12 %04X => %d\n",EXTRACT(array,3),counts[EXTRACT(array,3)])); - -#ifdef HAVE_SSE4_1 - array = _mm_and_si128( _mm_srli_epi32(oligo,6), mask9); -#else - _mm_store_si128((__m128i *) array, _mm_and_si128( _mm_srli_epi32(oligo,6), mask9)); -#endif -#ifdef HAVE_AVX2 - counts[EXTRACT(array,0)] += 1; - counts[EXTRACT(array,1)] += 1; - counts[EXTRACT(array,2)] += 1; - counts[EXTRACT(array,3)] += 1; -#ifdef CHECK_FOR_OVERFLOW - _counts_after = _mm_i32gather_epi32(counts,array,/*scale*/4); - _counts_neg = _mm_cmpgt_epi32(_counts_after,maxcount128); - if (_mm_testz_si128(_counts_neg,_counts_neg) == 0) { - debug(printf("Exceeded maximum counts\n")); - if (EXTRACT(_counts_neg,0)) { - inquery[EXTRACT(array,0)] = INQUERY_FALSE; - } - if (EXTRACT(_counts_neg,1)) { - inquery[EXTRACT(array,1)] = INQUERY_FALSE; - } - if (EXTRACT(_counts_neg,2)) { - inquery[EXTRACT(array,2)] = INQUERY_FALSE; - } - if (EXTRACT(_counts_neg,3)) { - inquery[EXTRACT(array,3)] = INQUERY_FALSE; - } - } -#endif /* CHECK_FOR_OVERFLOW */ - debug(printf("59 %04X => %d\n",array[0],counts[array[0]])); - debug(printf("43 %04X => %d\n",array[1],counts[array[1]])); - debug(printf("27 %04X => %d\n",array[2],counts[array[2]])); - debug(printf("11 %04X => %d\n",array[3],counts[array[3]])); - -#else - INCR_COUNT(counts[EXTRACT(array,0)],inquery[EXTRACT(array,0)]); /* 59 */ - INCR_COUNT(counts[EXTRACT(array,1)],inquery[EXTRACT(array,1)]); /* 43 */ - INCR_COUNT(counts[EXTRACT(array,2)],inquery[EXTRACT(array,2)]); /* 27 */ - INCR_COUNT(counts[EXTRACT(array,3)],inquery[EXTRACT(array,3)]); /* 11 */ -#endif - debug(printf("59 %04X => %d\n",EXTRACT(array,0),counts[EXTRACT(array,0)])); - debug(printf("43 %04X => %d\n",EXTRACT(array,1),counts[EXTRACT(array,1)])); - debug(printf("27 %04X => %d\n",EXTRACT(array,2),counts[EXTRACT(array,2)])); - debug(printf("11 %04X => %d\n",EXTRACT(array,3),counts[EXTRACT(array,3)])); - -#ifdef HAVE_SSE4_1 - array = _mm_and_si128( _mm_srli_epi32(oligo,4), mask9); -#else - _mm_store_si128((__m128i *) array, _mm_and_si128( _mm_srli_epi32(oligo,4), mask9)); -#endif -#ifdef HAVE_AVX2 - counts[EXTRACT(array,0)] += 1; - counts[EXTRACT(array,1)] += 1; - counts[EXTRACT(array,2)] += 1; - counts[EXTRACT(array,3)] += 1; -#ifdef CHECK_FOR_OVERFLOW - _counts_after = _mm_i32gather_epi32(counts,array,/*scale*/4); - _counts_neg = _mm_cmpgt_epi32(_counts_after,maxcount128); - if (_mm_testz_si128(_counts_neg,_counts_neg) == 0) { - debug(printf("Exceeded maximum counts\n")); - if (EXTRACT(_counts_neg,0)) { - inquery[EXTRACT(array,0)] = INQUERY_FALSE; - } - if (EXTRACT(_counts_neg,1)) { - inquery[EXTRACT(array,1)] = INQUERY_FALSE; - } - if (EXTRACT(_counts_neg,2)) { - inquery[EXTRACT(array,2)] = INQUERY_FALSE; - } - if (EXTRACT(_counts_neg,3)) { - inquery[EXTRACT(array,3)] = INQUERY_FALSE; - } - } -#endif /* CHECK_FOR_OVERFLOW */ - debug(printf("58 %04X => %d\n",array[0],counts[array[0]])); - debug(printf("42 %04X => %d\n",array[1],counts[array[1]])); - debug(printf("26 %04X => %d\n",array[2],counts[array[2]])); - debug(printf("10 %04X => %d\n",array[3],counts[array[3]])); - -#else - INCR_COUNT(counts[EXTRACT(array,0)],inquery[EXTRACT(array,0)]); /* 58 */ - INCR_COUNT(counts[EXTRACT(array,1)],inquery[EXTRACT(array,1)]); /* 42 */ - INCR_COUNT(counts[EXTRACT(array,2)],inquery[EXTRACT(array,2)]); /* 26 */ - INCR_COUNT(counts[EXTRACT(array,3)],inquery[EXTRACT(array,3)]); /* 10 */ -#endif - debug(printf("58 %04X => %d\n",EXTRACT(array,0),counts[EXTRACT(array,0)])); - debug(printf("42 %04X => %d\n",EXTRACT(array,1),counts[EXTRACT(array,1)])); - debug(printf("26 %04X => %d\n",EXTRACT(array,2),counts[EXTRACT(array,2)])); - debug(printf("10 %04X => %d\n",EXTRACT(array,3),counts[EXTRACT(array,3)])); - -#ifdef HAVE_SSE4_1 - array = _mm_and_si128( _mm_srli_epi32(oligo,2), mask9); -#else - _mm_store_si128((__m128i *) array, _mm_and_si128( _mm_srli_epi32(oligo,2), mask9)); -#endif -#ifdef HAVE_AVX2 - counts[EXTRACT(array,0)] += 1; - counts[EXTRACT(array,1)] += 1; - counts[EXTRACT(array,2)] += 1; - counts[EXTRACT(array,3)] += 1; -#ifdef CHECK_FOR_OVERFLOW - _counts_after = _mm_i32gather_epi32(counts,array,/*scale*/4); - _counts_neg = _mm_cmpgt_epi32(_counts_after,maxcount128); - if (_mm_testz_si128(_counts_neg,_counts_neg) == 0) { - debug(printf("Exceeded maximum counts\n")); - if (EXTRACT(_counts_neg,0)) { - inquery[EXTRACT(array,0)] = INQUERY_FALSE; - } - if (EXTRACT(_counts_neg,1)) { - inquery[EXTRACT(array,1)] = INQUERY_FALSE; - } - if (EXTRACT(_counts_neg,2)) { - inquery[EXTRACT(array,2)] = INQUERY_FALSE; - } - if (EXTRACT(_counts_neg,3)) { - inquery[EXTRACT(array,3)] = INQUERY_FALSE; - } - } -#endif /* CHECK_FOR_OVERFLOW */ - debug(printf("57 %04X => %d\n",array[0],counts[array[0]])); - debug(printf("41 %04X => %d\n",array[1],counts[array[1]])); - debug(printf("25 %04X => %d\n",array[2],counts[array[2]])); - debug(printf("9 %04X => %d\n",array[3],counts[array[3]])); - -#else - INCR_COUNT(counts[EXTRACT(array,0)],inquery[EXTRACT(array,0)]); /* 57 */ - INCR_COUNT(counts[EXTRACT(array,1)],inquery[EXTRACT(array,1)]); /* 41 */ - INCR_COUNT(counts[EXTRACT(array,2)],inquery[EXTRACT(array,2)]); /* 25 */ - INCR_COUNT(counts[EXTRACT(array,3)],inquery[EXTRACT(array,3)]); /* 9 */ -#endif - debug(printf("57 %04X => %d\n",EXTRACT(array,0),counts[EXTRACT(array,0)])); - debug(printf("41 %04X => %d\n",EXTRACT(array,1),counts[EXTRACT(array,1)])); - debug(printf("25 %04X => %d\n",EXTRACT(array,2),counts[EXTRACT(array,2)])); - debug(printf("9 %04X => %d\n",EXTRACT(array,3),counts[EXTRACT(array,3)])); - -#ifdef HAVE_SSE4_1 - array = _mm_and_si128( oligo, mask9); -#else - _mm_store_si128((__m128i *) array, _mm_and_si128( oligo, mask9)); -#endif -#ifdef HAVE_AVX2 - counts[EXTRACT(array,0)] += 1; - counts[EXTRACT(array,1)] += 1; - counts[EXTRACT(array,2)] += 1; - counts[EXTRACT(array,3)] += 1; -#ifdef CHECK_FOR_OVERFLOW - _counts_after = _mm_i32gather_epi32(counts,array,/*scale*/4); - _counts_neg = _mm_cmpgt_epi32(_counts_after,maxcount128); - if (_mm_testz_si128(_counts_neg,_counts_neg) == 0) { - debug(printf("Exceeded maximum counts\n")); - if (EXTRACT(_counts_neg,0)) { - inquery[EXTRACT(array,0)] = INQUERY_FALSE; - } - if (EXTRACT(_counts_neg,1)) { - inquery[EXTRACT(array,1)] = INQUERY_FALSE; - } - if (EXTRACT(_counts_neg,2)) { - inquery[EXTRACT(array,2)] = INQUERY_FALSE; - } - if (EXTRACT(_counts_neg,3)) { - inquery[EXTRACT(array,3)] = INQUERY_FALSE; - } - } -#endif /* CHECK_FOR_OVERFLOW */ - debug(printf("56 %04X => %d\n",array[0],counts[array[0]])); - debug(printf("40 %04X => %d\n",array[1],counts[array[1]])); - debug(printf("24 %04X => %d\n",array[2],counts[array[2]])); - debug(printf("8 %04X => %d\n",array[3],counts[array[3]])); - -#else - INCR_COUNT(counts[EXTRACT(array,0)],inquery[EXTRACT(array,0)]); /* 56 */ - INCR_COUNT(counts[EXTRACT(array,1)],inquery[EXTRACT(array,1)]); /* 40 */ - INCR_COUNT(counts[EXTRACT(array,2)],inquery[EXTRACT(array,2)]); /* 24 */ - INCR_COUNT(counts[EXTRACT(array,3)],inquery[EXTRACT(array,3)]); /* 8 */ -#endif - debug(printf("56 %04X => %d\n",EXTRACT(array,0),counts[EXTRACT(array,0)])); - debug(printf("40 %04X => %d\n",EXTRACT(array,1),counts[EXTRACT(array,1)])); - debug(printf("24 %04X => %d\n",EXTRACT(array,2),counts[EXTRACT(array,2)])); - debug(printf("8 %04X => %d\n",EXTRACT(array,3),counts[EXTRACT(array,3)])); - - -#ifdef HAVE_SSE4_1 - array = _mm_srli_epi32(current,14); /* No mask necessary */; -#else - _mm_store_si128((__m128i *) array, _mm_srli_epi32(current,14)); /* No mask necessary */; -#endif -#ifdef HAVE_AVX2 - counts[EXTRACT(array,0)] += 1; - counts[EXTRACT(array,1)] += 1; - counts[EXTRACT(array,2)] += 1; - counts[EXTRACT(array,3)] += 1; -#ifdef CHECK_FOR_OVERFLOW - _counts_after = _mm_i32gather_epi32(counts,array,/*scale*/4); - _counts_neg = _mm_cmpgt_epi32(_counts_after,maxcount128); - if (_mm_testz_si128(_counts_neg,_counts_neg) == 0) { - debug(printf("Exceeded maximum counts\n")); - if (EXTRACT(_counts_neg,0)) { - inquery[EXTRACT(array,0)] = INQUERY_FALSE; - } - if (EXTRACT(_counts_neg,1)) { - inquery[EXTRACT(array,1)] = INQUERY_FALSE; - } - if (EXTRACT(_counts_neg,2)) { - inquery[EXTRACT(array,2)] = INQUERY_FALSE; - } - if (EXTRACT(_counts_neg,3)) { - inquery[EXTRACT(array,3)] = INQUERY_FALSE; - } - } -#endif /* CHECK_FOR_OVERFLOW */ - debug(printf("55 %04X => %d\n",array[0],counts[array[0]])); - debug(printf("39 %04X => %d\n",array[1],counts[array[1]])); - debug(printf("23 %04X => %d\n",array[2],counts[array[2]])); - debug(printf("7 %04X => %d\n",array[3],counts[array[3]])); - -#else - INCR_COUNT(counts[EXTRACT(array,0)],inquery[EXTRACT(array,0)]); /* 55 */ - INCR_COUNT(counts[EXTRACT(array,1)],inquery[EXTRACT(array,1)]); /* 39 */ - INCR_COUNT(counts[EXTRACT(array,2)],inquery[EXTRACT(array,2)]); /* 23 */ - INCR_COUNT(counts[EXTRACT(array,3)],inquery[EXTRACT(array,3)]); /* 7 */ -#endif - debug(printf("55 %04X => %d\n",EXTRACT(array,0),counts[EXTRACT(array,0)])); - debug(printf("39 %04X => %d\n",EXTRACT(array,1),counts[EXTRACT(array,1)])); - debug(printf("23 %04X => %d\n",EXTRACT(array,2),counts[EXTRACT(array,2)])); - debug(printf("7 %04X => %d\n",EXTRACT(array,3),counts[EXTRACT(array,3)])); - -#ifdef HAVE_SSE4_1 - array = _mm_and_si128( _mm_srli_epi32(current,12), mask9); -#else - _mm_store_si128((__m128i *) array, _mm_and_si128( _mm_srli_epi32(current,12), mask9)); -#endif -#ifdef HAVE_AVX2 - counts[EXTRACT(array,0)] += 1; - counts[EXTRACT(array,1)] += 1; - counts[EXTRACT(array,2)] += 1; - counts[EXTRACT(array,3)] += 1; -#ifdef CHECK_FOR_OVERFLOW - _counts_after = _mm_i32gather_epi32(counts,array,/*scale*/4); - _counts_neg = _mm_cmpgt_epi32(_counts_after,maxcount128); - if (_mm_testz_si128(_counts_neg,_counts_neg) == 0) { - debug(printf("Exceeded maximum counts\n")); - if (EXTRACT(_counts_neg,0)) { - inquery[EXTRACT(array,0)] = INQUERY_FALSE; - } - if (EXTRACT(_counts_neg,1)) { - inquery[EXTRACT(array,1)] = INQUERY_FALSE; - } - if (EXTRACT(_counts_neg,2)) { - inquery[EXTRACT(array,2)] = INQUERY_FALSE; - } - if (EXTRACT(_counts_neg,3)) { - inquery[EXTRACT(array,3)] = INQUERY_FALSE; - } - } -#endif /* CHECK_FOR_OVERFLOW */ - debug(printf("54 %04X => %d\n",array[0],counts[array[0]])); - debug(printf("38 %04X => %d\n",array[1],counts[array[1]])); - debug(printf("22 %04X => %d\n",array[2],counts[array[2]])); - debug(printf("6 %04X => %d\n",array[3],counts[array[3]])); - -#else - INCR_COUNT(counts[EXTRACT(array,0)],inquery[EXTRACT(array,0)]); /* 54 */ - INCR_COUNT(counts[EXTRACT(array,1)],inquery[EXTRACT(array,1)]); /* 38 */ - INCR_COUNT(counts[EXTRACT(array,2)],inquery[EXTRACT(array,2)]); /* 22 */ - INCR_COUNT(counts[EXTRACT(array,3)],inquery[EXTRACT(array,3)]); /* 6 */ -#endif - debug(printf("54 %04X => %d\n",EXTRACT(array,0),counts[EXTRACT(array,0)])); - debug(printf("38 %04X => %d\n",EXTRACT(array,1),counts[EXTRACT(array,1)])); - debug(printf("22 %04X => %d\n",EXTRACT(array,2),counts[EXTRACT(array,2)])); - debug(printf("6 %04X => %d\n",EXTRACT(array,3),counts[EXTRACT(array,3)])); - -#ifdef HAVE_SSE4_1 - array = _mm_and_si128( _mm_srli_epi32(current,10), mask9); -#else - _mm_store_si128((__m128i *) array, _mm_and_si128( _mm_srli_epi32(current,10), mask9)); -#endif -#ifdef HAVE_AVX2 - counts[EXTRACT(array,0)] += 1; - counts[EXTRACT(array,1)] += 1; - counts[EXTRACT(array,2)] += 1; - counts[EXTRACT(array,3)] += 1; -#ifdef CHECK_FOR_OVERFLOW - _counts_after = _mm_i32gather_epi32(counts,array,/*scale*/4); - _counts_neg = _mm_cmpgt_epi32(_counts_after,maxcount128); - if (_mm_testz_si128(_counts_neg,_counts_neg) == 0) { - debug(printf("Exceeded maximum counts\n")); - if (EXTRACT(_counts_neg,0)) { - inquery[EXTRACT(array,0)] = INQUERY_FALSE; - } - if (EXTRACT(_counts_neg,1)) { - inquery[EXTRACT(array,1)] = INQUERY_FALSE; - } - if (EXTRACT(_counts_neg,2)) { - inquery[EXTRACT(array,2)] = INQUERY_FALSE; - } - if (EXTRACT(_counts_neg,3)) { - inquery[EXTRACT(array,3)] = INQUERY_FALSE; - } - } -#endif /* CHECK_FOR_OVERFLOW */ - debug(printf("53 %04X => %d\n",array[0],counts[array[0]])); - debug(printf("37 %04X => %d\n",array[1],counts[array[1]])); - debug(printf("21 %04X => %d\n",array[2],counts[array[2]])); - debug(printf("5 %04X => %d\n",array[3],counts[array[3]])); - -#else - INCR_COUNT(counts[EXTRACT(array,0)],inquery[EXTRACT(array,0)]); /* 53 */ - INCR_COUNT(counts[EXTRACT(array,1)],inquery[EXTRACT(array,1)]); /* 37 */ - INCR_COUNT(counts[EXTRACT(array,2)],inquery[EXTRACT(array,2)]); /* 21 */ - INCR_COUNT(counts[EXTRACT(array,3)],inquery[EXTRACT(array,3)]); /* 5 */ -#endif - debug(printf("53 %04X => %d\n",EXTRACT(array,0),counts[EXTRACT(array,0)])); - debug(printf("37 %04X => %d\n",EXTRACT(array,1),counts[EXTRACT(array,1)])); - debug(printf("21 %04X => %d\n",EXTRACT(array,2),counts[EXTRACT(array,2)])); - debug(printf("5 %04X => %d\n",EXTRACT(array,3),counts[EXTRACT(array,3)])); - -#ifdef HAVE_SSE4_1 - array = _mm_and_si128( _mm_srli_epi32(current,8), mask9); -#else - _mm_store_si128((__m128i *) array, _mm_and_si128( _mm_srli_epi32(current,8), mask9)); -#endif -#ifdef HAVE_AVX2 - counts[EXTRACT(array,0)] += 1; - counts[EXTRACT(array,1)] += 1; - counts[EXTRACT(array,2)] += 1; - counts[EXTRACT(array,3)] += 1; -#ifdef CHECK_FOR_OVERFLOW - _counts_after = _mm_i32gather_epi32(counts,array,/*scale*/4); - _counts_neg = _mm_cmpgt_epi32(_counts_after,maxcount128); - if (_mm_testz_si128(_counts_neg,_counts_neg) == 0) { - debug(printf("Exceeded maximum counts\n")); - if (EXTRACT(_counts_neg,0)) { - inquery[EXTRACT(array,0)] = INQUERY_FALSE; - } - if (EXTRACT(_counts_neg,1)) { - inquery[EXTRACT(array,1)] = INQUERY_FALSE; - } - if (EXTRACT(_counts_neg,2)) { - inquery[EXTRACT(array,2)] = INQUERY_FALSE; - } - if (EXTRACT(_counts_neg,3)) { - inquery[EXTRACT(array,3)] = INQUERY_FALSE; - } - } -#endif /* CHECK_FOR_OVERFLOW */ - debug(printf("52 %04X => %d\n",array[0],counts[array[0]])); - debug(printf("36 %04X => %d\n",array[1],counts[array[1]])); - debug(printf("20 %04X => %d\n",array[2],counts[array[2]])); - debug(printf("4 %04X => %d\n",array[3],counts[array[3]])); - -#else - INCR_COUNT(counts[EXTRACT(array,0)],inquery[EXTRACT(array,0)]); /* 52 */ - INCR_COUNT(counts[EXTRACT(array,1)],inquery[EXTRACT(array,1)]); /* 36 */ - INCR_COUNT(counts[EXTRACT(array,2)],inquery[EXTRACT(array,2)]); /* 20 */ - INCR_COUNT(counts[EXTRACT(array,3)],inquery[EXTRACT(array,3)]); /* 4 */ -#endif - debug(printf("52 %04X => %d\n",EXTRACT(array,0),counts[EXTRACT(array,0)])); - debug(printf("36 %04X => %d\n",EXTRACT(array,1),counts[EXTRACT(array,1)])); - debug(printf("20 %04X => %d\n",EXTRACT(array,2),counts[EXTRACT(array,2)])); - debug(printf("4 %04X => %d\n",EXTRACT(array,3),counts[EXTRACT(array,3)])); - -#ifdef HAVE_SSE4_1 - array = _mm_and_si128( _mm_srli_epi32(current,6), mask9); -#else - _mm_store_si128((__m128i *) array, _mm_and_si128( _mm_srli_epi32(current,6), mask9)); -#endif -#ifdef HAVE_AVX2 - counts[EXTRACT(array,0)] += 1; - counts[EXTRACT(array,1)] += 1; - counts[EXTRACT(array,2)] += 1; - counts[EXTRACT(array,3)] += 1; -#ifdef CHECK_FOR_OVERFLOW - _counts_after = _mm_i32gather_epi32(counts,array,/*scale*/4); - _counts_neg = _mm_cmpgt_epi32(_counts_after,maxcount128); - if (_mm_testz_si128(_counts_neg,_counts_neg) == 0) { - debug(printf("Exceeded maximum counts\n")); - if (EXTRACT(_counts_neg,0)) { - inquery[EXTRACT(array,0)] = INQUERY_FALSE; - } - if (EXTRACT(_counts_neg,1)) { - inquery[EXTRACT(array,1)] = INQUERY_FALSE; - } - if (EXTRACT(_counts_neg,2)) { - inquery[EXTRACT(array,2)] = INQUERY_FALSE; - } - if (EXTRACT(_counts_neg,3)) { - inquery[EXTRACT(array,3)] = INQUERY_FALSE; - } - } -#endif /* CHECK_FOR_OVERFLOW */ - debug(printf("51 %04X => %d\n",array[0],counts[array[0]])); - debug(printf("35 %04X => %d\n",array[1],counts[array[1]])); - debug(printf("19 %04X => %d\n",array[2],counts[array[2]])); - debug(printf("3 %04X => %d\n",array[3],counts[array[3]])); - -#else - INCR_COUNT(counts[EXTRACT(array,0)],inquery[EXTRACT(array,0)]); /* 51 */ - INCR_COUNT(counts[EXTRACT(array,1)],inquery[EXTRACT(array,1)]); /* 35 */ - INCR_COUNT(counts[EXTRACT(array,2)],inquery[EXTRACT(array,2)]); /* 19 */ - INCR_COUNT(counts[EXTRACT(array,3)],inquery[EXTRACT(array,3)]); /* 3 */ -#endif - debug(printf("51 %04X => %d\n",EXTRACT(array,0),counts[EXTRACT(array,0)])); - debug(printf("35 %04X => %d\n",EXTRACT(array,1),counts[EXTRACT(array,1)])); - debug(printf("19 %04X => %d\n",EXTRACT(array,2),counts[EXTRACT(array,2)])); - debug(printf("3 %04X => %d\n",EXTRACT(array,3),counts[EXTRACT(array,3)])); -#ifdef HAVE_SSE4_1 - array = _mm_and_si128( _mm_srli_epi32(current,4), mask9); -#else - _mm_store_si128((__m128i *) array, _mm_and_si128( _mm_srli_epi32(current,4), mask9)); -#endif -#ifdef HAVE_AVX2 - counts[EXTRACT(array,0)] += 1; - counts[EXTRACT(array,1)] += 1; - counts[EXTRACT(array,2)] += 1; - counts[EXTRACT(array,3)] += 1; -#ifdef CHECK_FOR_OVERFLOW - _counts_after = _mm_i32gather_epi32(counts,array,/*scale*/4); - _counts_neg = _mm_cmpgt_epi32(_counts_after,maxcount128); - if (_mm_testz_si128(_counts_neg,_counts_neg) == 0) { - debug(printf("Exceeded maximum counts\n")); - if (EXTRACT(_counts_neg,0)) { - inquery[EXTRACT(array,0)] = INQUERY_FALSE; - } - if (EXTRACT(_counts_neg,1)) { - inquery[EXTRACT(array,1)] = INQUERY_FALSE; - } - if (EXTRACT(_counts_neg,2)) { - inquery[EXTRACT(array,2)] = INQUERY_FALSE; - } - if (EXTRACT(_counts_neg,3)) { - inquery[EXTRACT(array,3)] = INQUERY_FALSE; - } - } -#endif /* CHECK_FOR_OVERFLOW */ - debug(printf("50 %04X => %d\n",array[0],counts[array[0]])); - debug(printf("34 %04X => %d\n",array[1],counts[array[1]])); - debug(printf("18 %04X => %d\n",array[2],counts[array[2]])); - debug(printf("2 %04X => %d\n",array[3],counts[array[3]])); - -#else - INCR_COUNT(counts[EXTRACT(array,0)],inquery[EXTRACT(array,0)]); /* 50 */ - INCR_COUNT(counts[EXTRACT(array,1)],inquery[EXTRACT(array,1)]); /* 34 */ - INCR_COUNT(counts[EXTRACT(array,2)],inquery[EXTRACT(array,2)]); /* 18 */ - INCR_COUNT(counts[EXTRACT(array,3)],inquery[EXTRACT(array,3)]); /* 2 */ -#endif - debug(printf("50 %04X => %d\n",EXTRACT(array,0),counts[EXTRACT(array,0)])); - debug(printf("34 %04X => %d\n",EXTRACT(array,1),counts[EXTRACT(array,1)])); - debug(printf("18 %04X => %d\n",EXTRACT(array,2),counts[EXTRACT(array,2)])); - debug(printf("2 %04X => %d\n",EXTRACT(array,3),counts[EXTRACT(array,3)])); +#ifdef USE_UNORDERED_9 +static Chrpos_T +store_9mers_rev_simd_128 (Chrpos_T chrpos, Chrpos_T *table, UINT4 *positions, Count_T *counts, + __m256i current, __m256i next) { + __m256i array[16]; + + extract_9mers_rev_simd_128(array,current,next); + return store_fwdrev_simd_128(chrpos,table,positions,counts,(UINT4 *) array); +} -#ifdef HAVE_SSE4_1 - array = _mm_and_si128( _mm_srli_epi32(current,2), mask9); #else - _mm_store_si128((__m128i *) array, _mm_and_si128( _mm_srli_epi32(current,2), mask9)); -#endif -#ifdef HAVE_AVX2 - counts[EXTRACT(array,0)] += 1; - counts[EXTRACT(array,1)] += 1; - counts[EXTRACT(array,2)] += 1; - counts[EXTRACT(array,3)] += 1; -#ifdef CHECK_FOR_OVERFLOW - _counts_after = _mm_i32gather_epi32(counts,array,/*scale*/4); - _counts_neg = _mm_cmpgt_epi32(_counts_after,maxcount128); - if (_mm_testz_si128(_counts_neg,_counts_neg) == 0) { - debug(printf("Exceeded maximum counts\n")); - if (EXTRACT(_counts_neg,0)) { - inquery[EXTRACT(array,0)] = INQUERY_FALSE; - } - if (EXTRACT(_counts_neg,1)) { - inquery[EXTRACT(array,1)] = INQUERY_FALSE; - } - if (EXTRACT(_counts_neg,2)) { - inquery[EXTRACT(array,2)] = INQUERY_FALSE; - } - if (EXTRACT(_counts_neg,3)) { - inquery[EXTRACT(array,3)] = INQUERY_FALSE; - } - } -#endif /* CHECK_FOR_OVERFLOW */ - debug(printf("49 %04X => %d\n",array[0],counts[array[0]])); - debug(printf("33 %04X => %d\n",array[1],counts[array[1]])); - debug(printf("17 %04X => %d\n",array[2],counts[array[2]])); - debug(printf("1 %04X => %d\n",array[3],counts[array[3]])); - -#else - INCR_COUNT(counts[EXTRACT(array,0)],inquery[EXTRACT(array,0)]); /* 49 */ - INCR_COUNT(counts[EXTRACT(array,1)],inquery[EXTRACT(array,1)]); /* 33 */ - INCR_COUNT(counts[EXTRACT(array,2)],inquery[EXTRACT(array,2)]); /* 17 */ - INCR_COUNT(counts[EXTRACT(array,3)],inquery[EXTRACT(array,3)]); /* 1 */ -#endif - debug(printf("49 %04X => %d\n",EXTRACT(array,0),counts[EXTRACT(array,0)])); - debug(printf("33 %04X => %d\n",EXTRACT(array,1),counts[EXTRACT(array,1)])); - debug(printf("17 %04X => %d\n",EXTRACT(array,2),counts[EXTRACT(array,2)])); - debug(printf("1 %04X => %d\n",EXTRACT(array,3),counts[EXTRACT(array,3)])); +/* Includes extract_9mers_rev_simd_128_ordered (__m256i *out, __m256i current, __m256i next) */ +static Chrpos_T +store_9mers_rev_simd_128 (Chrpos_T chrpos, Chrpos_T *table, UINT4 *positions, Count_T *counts, + __m256i current, __m256i next) { + __m256i array[16], *out; + __m256i oligo; + __m256i _row0, _row1, _row2, _row3, _row4, _row5, _row6, _row7, + _row8, _row9, _row10, _row11, _row12, _row13, _row14, _row15; + __m256i _t0, _t1, _t2, _t3, _t4, _t5, _t6, _t7; + __m256i _u0, _u1, _u2, _u3, _u4, _u5, _u6, _u7; + + out = &(array[0]); + + _row0 = _mm256_and_si256( current, bigmask9); + _row1 = _mm256_and_si256( _mm256_srli_epi32(current,2), bigmask9); + _row2 = _mm256_and_si256( _mm256_srli_epi32(current,4), bigmask9); + _row3 = _mm256_and_si256( _mm256_srli_epi32(current,6), bigmask9); + _row4 = _mm256_and_si256( _mm256_srli_epi32(current,8), bigmask9); + _row5 = _mm256_and_si256( _mm256_srli_epi32(current,10), bigmask9); + _row6 = _mm256_and_si256( _mm256_srli_epi32(current,12), bigmask9); + _row7 = _mm256_srli_epi32(current,14); /* No mask necessary */; -#ifdef HAVE_SSE4_1 - array = _mm_and_si128( current, mask9); -#else - _mm_store_si128((__m128i *) array, _mm_and_si128( current, mask9)); -#endif -#ifdef HAVE_AVX2 - counts[EXTRACT(array,0)] += 1; - counts[EXTRACT(array,1)] += 1; - counts[EXTRACT(array,2)] += 1; - counts[EXTRACT(array,3)] += 1; -#ifdef CHECK_FOR_OVERFLOW - _counts_after = _mm_i32gather_epi32(counts,array,/*scale*/4); - _counts_neg = _mm_cmpgt_epi32(_counts_after,maxcount128); - if (_mm_testz_si128(_counts_neg,_counts_neg) == 0) { - debug(printf("Exceeded maximum counts\n")); - if (EXTRACT(_counts_neg,0)) { - inquery[EXTRACT(array,0)] = INQUERY_FALSE; - } - if (EXTRACT(_counts_neg,1)) { - inquery[EXTRACT(array,1)] = INQUERY_FALSE; - } - if (EXTRACT(_counts_neg,2)) { - inquery[EXTRACT(array,2)] = INQUERY_FALSE; - } - if (EXTRACT(_counts_neg,3)) { - inquery[EXTRACT(array,3)] = INQUERY_FALSE; - } - } -#endif /* CHECK_FOR_OVERFLOW */ - debug(printf("48 %04X => %d\n",array[0],counts[array[0]])); - debug(printf("32 %04X => %d\n",array[1],counts[array[1]])); - debug(printf("16 %04X => %d\n",array[2],counts[array[2]])); - debug(printf("0 %04X => %d\n",array[3],counts[array[3]])); - -#else - INCR_COUNT(counts[EXTRACT(array,0)],inquery[EXTRACT(array,0)]); /* 48 */ - INCR_COUNT(counts[EXTRACT(array,1)],inquery[EXTRACT(array,1)]); /* 32 */ - INCR_COUNT(counts[EXTRACT(array,2)],inquery[EXTRACT(array,2)]); /* 16 */ - INCR_COUNT(counts[EXTRACT(array,3)],inquery[EXTRACT(array,3)]); /* 0 */ -#endif - debug(printf("48 %04X => %d\n",EXTRACT(array,0),counts[EXTRACT(array,0)])); - debug(printf("32 %04X => %d\n",EXTRACT(array,1),counts[EXTRACT(array,1)])); - debug(printf("16 %04X => %d\n",EXTRACT(array,2),counts[EXTRACT(array,2)])); - debug(printf("0 %04X => %d\n",EXTRACT(array,3),counts[EXTRACT(array,3)])); + oligo = _mm256_or_si256( _mm256_srli_epi32(current,16), _mm256_slli_epi32(next,16)); + _row8 = _mm256_and_si256( oligo, bigmask9); + _row9 = _mm256_and_si256( _mm256_srli_epi32(oligo,2), bigmask9); + _row10 = _mm256_and_si256( _mm256_srli_epi32(oligo,4), bigmask9); + _row11 = _mm256_and_si256( _mm256_srli_epi32(oligo,6), bigmask9); + _row12 = _mm256_and_si256( _mm256_srli_epi32(oligo,8), bigmask9); + _row13 = _mm256_and_si256( _mm256_srli_epi32(oligo,10), bigmask9); + _row14 = _mm256_and_si256( _mm256_srli_epi32(oligo,12), bigmask9); + _row15 = _mm256_and_si256( _mm256_srli_epi32(oligo,14), bigmask9); + + + /* Split: top half */ + _t0 = _mm256_unpackhi_epi32(_row0,_row1); + _t1 = _mm256_unpackhi_epi32(_row2,_row3); + _t2 = _mm256_unpackhi_epi32(_row4,_row5); + _t3 = _mm256_unpackhi_epi32(_row6,_row7); + _t4 = _mm256_unpackhi_epi32(_row8,_row9); + _t5 = _mm256_unpackhi_epi32(_row10,_row11); + _t6 = _mm256_unpackhi_epi32(_row12,_row13); + _t7 = _mm256_unpackhi_epi32(_row14,_row15); + + _u0 = _mm256_unpackhi_epi64(_t0,_t1); + _u1 = _mm256_unpackhi_epi64(_t2,_t3); + _u2 = _mm256_unpackhi_epi64(_t4,_t5); + _u3 = _mm256_unpackhi_epi64(_t6,_t7); + _u4 = _mm256_unpacklo_epi64(_t0,_t1); + _u5 = _mm256_unpacklo_epi64(_t2,_t3); + _u6 = _mm256_unpacklo_epi64(_t4,_t5); + _u7 = _mm256_unpacklo_epi64(_t6,_t7); + + + /* Split: bottom half */ + _t0 = _mm256_unpacklo_epi32(_row0,_row1); + _t1 = _mm256_unpacklo_epi32(_row2,_row3); + _t2 = _mm256_unpacklo_epi32(_row4,_row5); + _t3 = _mm256_unpacklo_epi32(_row6,_row7); + _t4 = _mm256_unpacklo_epi32(_row8,_row9); + _t5 = _mm256_unpacklo_epi32(_row10,_row11); + _t6 = _mm256_unpacklo_epi32(_row12,_row13); + _t7 = _mm256_unpacklo_epi32(_row14,_row15); + + _row8 = _mm256_unpackhi_epi64(_t0,_t1); + _row9 = _mm256_unpackhi_epi64(_t2,_t3); + _row10 = _mm256_unpackhi_epi64(_t4,_t5); + _row11 = _mm256_unpackhi_epi64(_t6,_t7); + _row12 = _mm256_unpacklo_epi64(_t0,_t1); + _row13 = _mm256_unpacklo_epi64(_t2,_t3); + _row14 = _mm256_unpacklo_epi64(_t4,_t5); + _row15 = _mm256_unpacklo_epi64(_t6,_t7); + + + _mm256_store_si256(out++, _mm256_permute2x128_si256(_u0, _u1, 0x31)); + _mm256_store_si256(out++, _mm256_permute2x128_si256(_u2, _u3, 0x31)); + _mm256_store_si256(out++, _mm256_permute2x128_si256(_u4, _u5, 0x31)); + _mm256_store_si256(out++, _mm256_permute2x128_si256(_u6, _u7, 0x31)); + _mm256_store_si256(out++, _mm256_permute2x128_si256(_row8, _row9, 0x31)); + _mm256_store_si256(out++, _mm256_permute2x128_si256(_row10, _row11, 0x31)); + _mm256_store_si256(out++, _mm256_permute2x128_si256(_row12, _row13, 0x31)); + _mm256_store_si256(out++, _mm256_permute2x128_si256(_row14, _row15, 0x31)); + + _mm256_store_si256(out++, _mm256_permute2x128_si256(_u0, _u1, 0x20)); + _mm256_store_si256(out++, _mm256_permute2x128_si256(_u2, _u3, 0x20)); + _mm256_store_si256(out++, _mm256_permute2x128_si256(_u4, _u5, 0x20)); + _mm256_store_si256(out++, _mm256_permute2x128_si256(_u6, _u7, 0x20)); + _mm256_store_si256(out++, _mm256_permute2x128_si256(_row8, _row9, 0x20)); + _mm256_store_si256(out++, _mm256_permute2x128_si256(_row10, _row11, 0x20)); + _mm256_store_si256(out++, _mm256_permute2x128_si256(_row12, _row13, 0x20)); + _mm256_store_si256(out++, _mm256_permute2x128_si256(_row14, _row15, 0x20)); + + return store_fwdrev_simd_128_ordered(chrpos,table,positions,counts,(UINT4 *) array); +} +#endif +#endif + +#ifdef HAVE_AVX512 +static void +extract_9mers_rev_simd_256 (__m512i *out, __m512i current, __m512i next) { + __m512i oligo; + + oligo = _mm512_or_si512( _mm512_srli_epi32(current,16), _mm512_slli_epi32(next,16)); + _mm512_store_si512(out++, _mm512_and_si512( _mm512_srli_epi32(oligo,14), hugemask9)); + _mm512_store_si512(out++, _mm512_and_si512( _mm512_srli_epi32(oligo,12), hugemask9)); + _mm512_store_si512(out++, _mm512_and_si512( _mm512_srli_epi32(oligo,10), hugemask9)); + _mm512_store_si512(out++, _mm512_and_si512( _mm512_srli_epi32(oligo,8), hugemask9)); + _mm512_store_si512(out++, _mm512_and_si512( _mm512_srli_epi32(oligo,6), hugemask9)); + _mm512_store_si512(out++, _mm512_and_si512( _mm512_srli_epi32(oligo,4), hugemask9)); + _mm512_store_si512(out++, _mm512_and_si512( _mm512_srli_epi32(oligo,2), hugemask9)); + _mm512_store_si512(out++, _mm512_and_si512( oligo, hugemask9)); + + _mm512_store_si512(out++, _mm512_srli_epi32(current,14)); /* No mask necessary */; + _mm512_store_si512(out++, _mm512_and_si512( _mm512_srli_epi32(current,12), hugemask9)); + _mm512_store_si512(out++, _mm512_and_si512( _mm512_srli_epi32(current,10), hugemask9)); + _mm512_store_si512(out++, _mm512_and_si512( _mm512_srli_epi32(current,8), hugemask9)); + _mm512_store_si512(out++, _mm512_and_si512( _mm512_srli_epi32(current,6), hugemask9)); + _mm512_store_si512(out++, _mm512_and_si512( _mm512_srli_epi32(current,4), hugemask9)); + _mm512_store_si512(out++, _mm512_and_si512( _mm512_srli_epi32(current,2), hugemask9)); + _mm512_store_si512(out++, _mm512_and_si512( current, hugemask9)); return; } -#endif - - -#ifdef HAVE_AVX2 -static void -count_9mers_rev_simd_128 (Count_T *counts, Inquery_T *inquery, __m256i current, __m256i next) { - __m256i oligo; - __m256i array; -#ifdef CHECK_FOR_OVERFLOW - __m256i _counts_after, _counts_neg; -#endif +#ifdef USE_UNORDERED_9 +static Chrpos_T +store_9mers_rev_simd_256 (Chrpos_T chrpos, Chrpos_T *table, UINT4 *positions, Count_T *counts, + __m512i current, __m512i next) { + __m512i array[16]; + + extract_9mers_rev_simd_256(array,current,next); + return store_fwdrev_simd_256(chrpos,table,positions,counts,(UINT4 *) array); +} - oligo = _mm256_or_si256( _mm256_srli_epi32(current,16), _mm256_slli_epi32(next,16)); - array = _mm256_and_si256( _mm256_srli_epi32(oligo,14), bigmask9); - counts[EXTRACT256(array,0)] += 1; /* 127 */ - counts[EXTRACT256(array,1)] += 1; /* 111 */ - counts[EXTRACT256(array,2)] += 1; /* 95 */ - counts[EXTRACT256(array,3)] += 1; /* 79 */ - counts[EXTRACT256(array,4)] += 1; /* 63 */ - counts[EXTRACT256(array,5)] += 1; /* 47 */ - counts[EXTRACT256(array,6)] += 1; /* 31 */ - counts[EXTRACT256(array,7)] += 1; /* 15 */ -#ifdef CHECK_FOR_OVERFLOW - _counts_after = _mm256_i32gather_epi32(counts,array,/*scale*/4); - _counts_neg = _mm256_cmpgt_epi32(_counts_after,maxcount256); - if (_mm256_testz_si256(_counts_neg,_counts_neg) == 0) { - debug(printf("Exceeded maximum counts\n")); - if (EXTRACT256(_counts_neg,0)) { - inquery[EXTRACT256(array,0)] = INQUERY_FALSE; - } - if (EXTRACT256(_counts_neg,1)) { - inquery[EXTRACT256(array,1)] = INQUERY_FALSE; - } - if (EXTRACT256(_counts_neg,2)) { - inquery[EXTRACT256(array,2)] = INQUERY_FALSE; - } - if (EXTRACT256(_counts_neg,3)) { - inquery[EXTRACT256(array,3)] = INQUERY_FALSE; - } - if (EXTRACT256(_counts_neg,4)) { - inquery[EXTRACT256(array,4)] = INQUERY_FALSE; - } - if (EXTRACT256(_counts_neg,5)) { - inquery[EXTRACT256(array,5)] = INQUERY_FALSE; - } - if (EXTRACT256(_counts_neg,6)) { - inquery[EXTRACT256(array,6)] = INQUERY_FALSE; - } - if (EXTRACT256(_counts_neg,7)) { - inquery[EXTRACT256(array,7)] = INQUERY_FALSE; - } - } -#endif /* CHECK_FOR_OVERFLOW */ - debug(printf("63 %04X => %d\n",EXTRACT256(array,0),counts[EXTRACT256(array,0)])); - debug(printf("47 %04X => %d\n",EXTRACT256(array,1),counts[EXTRACT256(array,1)])); - debug(printf("31 %04X => %d\n",EXTRACT256(array,2),counts[EXTRACT256(array,2)])); - debug(printf("15 %04X => %d\n",EXTRACT256(array,3),counts[EXTRACT256(array,3)])); - debug(printf("63 %04X => %d\n",EXTRACT256(array,4),counts[EXTRACT256(array,4)])); - debug(printf("47 %04X => %d\n",EXTRACT256(array,5),counts[EXTRACT256(array,5)])); - debug(printf("31 %04X => %d\n",EXTRACT256(array,6),counts[EXTRACT256(array,6)])); - debug(printf("15 %04X => %d\n",EXTRACT256(array,7),counts[EXTRACT256(array,7)])); - - array = _mm256_and_si256( _mm256_srli_epi32(oligo,12), bigmask9); - counts[EXTRACT256(array,0)] += 1; /* 126 */ - counts[EXTRACT256(array,1)] += 1; /* 110 */ - counts[EXTRACT256(array,2)] += 1; /* 94 */ - counts[EXTRACT256(array,3)] += 1; /* 78 */ - counts[EXTRACT256(array,4)] += 1; /* 62 */ - counts[EXTRACT256(array,5)] += 1; /* 46 */ - counts[EXTRACT256(array,6)] += 1; /* 30 */ - counts[EXTRACT256(array,7)] += 1; /* 14 */ -#ifdef CHECK_FOR_OVERFLOW - _counts_after = _mm256_i32gather_epi32(counts,array,/*scale*/4); - _counts_neg = _mm256_cmpgt_epi32(_counts_after,maxcount256); - if (_mm256_testz_si256(_counts_neg,_counts_neg) == 0) { - debug(printf("Exceeded maximum counts\n")); - if (EXTRACT256(_counts_neg,0)) { - inquery[EXTRACT256(array,0)] = INQUERY_FALSE; - } - if (EXTRACT256(_counts_neg,1)) { - inquery[EXTRACT256(array,1)] = INQUERY_FALSE; - } - if (EXTRACT256(_counts_neg,2)) { - inquery[EXTRACT256(array,2)] = INQUERY_FALSE; - } - if (EXTRACT256(_counts_neg,3)) { - inquery[EXTRACT256(array,3)] = INQUERY_FALSE; - } - if (EXTRACT256(_counts_neg,4)) { - inquery[EXTRACT256(array,4)] = INQUERY_FALSE; - } - if (EXTRACT256(_counts_neg,5)) { - inquery[EXTRACT256(array,5)] = INQUERY_FALSE; - } - if (EXTRACT256(_counts_neg,6)) { - inquery[EXTRACT256(array,6)] = INQUERY_FALSE; - } - if (EXTRACT256(_counts_neg,7)) { - inquery[EXTRACT256(array,7)] = INQUERY_FALSE; - } - } -#endif /* CHECK_FOR_OVERFLOW */ - debug(printf("62 %04X => %d\n",EXTRACT256(array,0),counts[EXTRACT256(array,0)])); - debug(printf("46 %04X => %d\n",EXTRACT256(array,1),counts[EXTRACT256(array,1)])); - debug(printf("30 %04X => %d\n",EXTRACT256(array,2),counts[EXTRACT256(array,2)])); - debug(printf("14 %04X => %d\n",EXTRACT256(array,3),counts[EXTRACT256(array,3)])); - debug(printf("62 %04X => %d\n",EXTRACT256(array,4),counts[EXTRACT256(array,4)])); - debug(printf("46 %04X => %d\n",EXTRACT256(array,5),counts[EXTRACT256(array,5)])); - debug(printf("30 %04X => %d\n",EXTRACT256(array,6),counts[EXTRACT256(array,6)])); - debug(printf("14 %04X => %d\n",EXTRACT256(array,7),counts[EXTRACT256(array,7)])); - - array = _mm256_and_si256( _mm256_srli_epi32(oligo,10), bigmask9); - counts[EXTRACT256(array,0)] += 1; /* 125 */ - counts[EXTRACT256(array,1)] += 1; /* 109 */ - counts[EXTRACT256(array,2)] += 1; /* 93 */ - counts[EXTRACT256(array,3)] += 1; /* 77 */ - counts[EXTRACT256(array,4)] += 1; /* 61 */ - counts[EXTRACT256(array,5)] += 1; /* 45 */ - counts[EXTRACT256(array,6)] += 1; /* 29 */ - counts[EXTRACT256(array,7)] += 1; /* 13 */ -#ifdef CHECK_FOR_OVERFLOW - _counts_after = _mm256_i32gather_epi32(counts,array,/*scale*/4); - _counts_neg = _mm256_cmpgt_epi32(_counts_after,maxcount256); - if (_mm256_testz_si256(_counts_neg,_counts_neg) == 0) { - debug(printf("Exceeded maximum counts\n")); - if (EXTRACT256(_counts_neg,0)) { - inquery[EXTRACT256(array,0)] = INQUERY_FALSE; - } - if (EXTRACT256(_counts_neg,1)) { - inquery[EXTRACT256(array,1)] = INQUERY_FALSE; - } - if (EXTRACT256(_counts_neg,2)) { - inquery[EXTRACT256(array,2)] = INQUERY_FALSE; - } - if (EXTRACT256(_counts_neg,3)) { - inquery[EXTRACT256(array,3)] = INQUERY_FALSE; - } - if (EXTRACT256(_counts_neg,4)) { - inquery[EXTRACT256(array,4)] = INQUERY_FALSE; - } - if (EXTRACT256(_counts_neg,5)) { - inquery[EXTRACT256(array,5)] = INQUERY_FALSE; - } - if (EXTRACT256(_counts_neg,6)) { - inquery[EXTRACT256(array,6)] = INQUERY_FALSE; - } - if (EXTRACT256(_counts_neg,7)) { - inquery[EXTRACT256(array,7)] = INQUERY_FALSE; - } - } -#endif /* CHECK_FOR_OVERFLOW */ - debug(printf("61 %04X => %d\n",EXTRACT256(array,0),counts[EXTRACT256(array,0)])); - debug(printf("45 %04X => %d\n",EXTRACT256(array,1),counts[EXTRACT256(array,1)])); - debug(printf("29 %04X => %d\n",EXTRACT256(array,2),counts[EXTRACT256(array,2)])); - debug(printf("13 %04X => %d\n",EXTRACT256(array,3),counts[EXTRACT256(array,3)])); - debug(printf("61 %04X => %d\n",EXTRACT256(array,4),counts[EXTRACT256(array,4)])); - debug(printf("45 %04X => %d\n",EXTRACT256(array,5),counts[EXTRACT256(array,5)])); - debug(printf("29 %04X => %d\n",EXTRACT256(array,6),counts[EXTRACT256(array,6)])); - debug(printf("13 %04X => %d\n",EXTRACT256(array,7),counts[EXTRACT256(array,7)])); - - array = _mm256_and_si256( _mm256_srli_epi32(oligo,8), bigmask9); - counts[EXTRACT256(array,0)] += 1; /* 124 */ - counts[EXTRACT256(array,1)] += 1; /* 108 */ - counts[EXTRACT256(array,2)] += 1; /* 92 */ - counts[EXTRACT256(array,3)] += 1; /* 76 */ - counts[EXTRACT256(array,4)] += 1; /* 60 */ - counts[EXTRACT256(array,5)] += 1; /* 44 */ - counts[EXTRACT256(array,6)] += 1; /* 28 */ - counts[EXTRACT256(array,7)] += 1; /* 12 */ -#ifdef CHECK_FOR_OVERFLOW - _counts_after = _mm256_i32gather_epi32(counts,array,/*scale*/4); - _counts_neg = _mm256_cmpgt_epi32(_counts_after,maxcount256); - if (_mm256_testz_si256(_counts_neg,_counts_neg) == 0) { - debug(printf("Exceeded maximum counts\n")); - if (EXTRACT256(_counts_neg,0)) { - inquery[EXTRACT256(array,0)] = INQUERY_FALSE; - } - if (EXTRACT256(_counts_neg,1)) { - inquery[EXTRACT256(array,1)] = INQUERY_FALSE; - } - if (EXTRACT256(_counts_neg,2)) { - inquery[EXTRACT256(array,2)] = INQUERY_FALSE; - } - if (EXTRACT256(_counts_neg,3)) { - inquery[EXTRACT256(array,3)] = INQUERY_FALSE; - } - if (EXTRACT256(_counts_neg,4)) { - inquery[EXTRACT256(array,4)] = INQUERY_FALSE; - } - if (EXTRACT256(_counts_neg,5)) { - inquery[EXTRACT256(array,5)] = INQUERY_FALSE; - } - if (EXTRACT256(_counts_neg,6)) { - inquery[EXTRACT256(array,6)] = INQUERY_FALSE; - } - if (EXTRACT256(_counts_neg,7)) { - inquery[EXTRACT256(array,7)] = INQUERY_FALSE; - } - } -#endif /* CHECK_FOR_OVERFLOW */ - debug(printf("60 %04X => %d\n",EXTRACT256(array,0),counts[EXTRACT256(array,0)])); - debug(printf("44 %04X => %d\n",EXTRACT256(array,1),counts[EXTRACT256(array,1)])); - debug(printf("28 %04X => %d\n",EXTRACT256(array,2),counts[EXTRACT256(array,2)])); - debug(printf("12 %04X => %d\n",EXTRACT256(array,3),counts[EXTRACT256(array,3)])); - debug(printf("60 %04X => %d\n",EXTRACT256(array,4),counts[EXTRACT256(array,4)])); - debug(printf("44 %04X => %d\n",EXTRACT256(array,5),counts[EXTRACT256(array,5)])); - debug(printf("28 %04X => %d\n",EXTRACT256(array,6),counts[EXTRACT256(array,6)])); - debug(printf("12 %04X => %d\n",EXTRACT256(array,7),counts[EXTRACT256(array,7)])); - - array = _mm256_and_si256( _mm256_srli_epi32(oligo,6), bigmask9); - counts[EXTRACT256(array,0)] += 1; /* 123 */ - counts[EXTRACT256(array,1)] += 1; /* 107 */ - counts[EXTRACT256(array,2)] += 1; /* 91 */ - counts[EXTRACT256(array,3)] += 1; /* 75 */ - counts[EXTRACT256(array,4)] += 1; /* 59 */ - counts[EXTRACT256(array,5)] += 1; /* 43 */ - counts[EXTRACT256(array,6)] += 1; /* 27 */ - counts[EXTRACT256(array,7)] += 1; /* 11 */ -#ifdef CHECK_FOR_OVERFLOW - _counts_after = _mm256_i32gather_epi32(counts,array,/*scale*/4); - _counts_neg = _mm256_cmpgt_epi32(_counts_after,maxcount256); - if (_mm256_testz_si256(_counts_neg,_counts_neg) == 0) { - debug(printf("Exceeded maximum counts\n")); - if (EXTRACT256(_counts_neg,0)) { - inquery[EXTRACT256(array,0)] = INQUERY_FALSE; - } - if (EXTRACT256(_counts_neg,1)) { - inquery[EXTRACT256(array,1)] = INQUERY_FALSE; - } - if (EXTRACT256(_counts_neg,2)) { - inquery[EXTRACT256(array,2)] = INQUERY_FALSE; - } - if (EXTRACT256(_counts_neg,3)) { - inquery[EXTRACT256(array,3)] = INQUERY_FALSE; - } - if (EXTRACT256(_counts_neg,4)) { - inquery[EXTRACT256(array,4)] = INQUERY_FALSE; - } - if (EXTRACT256(_counts_neg,5)) { - inquery[EXTRACT256(array,5)] = INQUERY_FALSE; - } - if (EXTRACT256(_counts_neg,6)) { - inquery[EXTRACT256(array,6)] = INQUERY_FALSE; - } - if (EXTRACT256(_counts_neg,7)) { - inquery[EXTRACT256(array,7)] = INQUERY_FALSE; - } - } -#endif /* CHECK_FOR_OVERFLOW */ - debug(printf("59 %04X => %d\n",EXTRACT256(array,0),counts[EXTRACT256(array,0)])); - debug(printf("43 %04X => %d\n",EXTRACT256(array,1),counts[EXTRACT256(array,1)])); - debug(printf("27 %04X => %d\n",EXTRACT256(array,2),counts[EXTRACT256(array,2)])); - debug(printf("11 %04X => %d\n",EXTRACT256(array,3),counts[EXTRACT256(array,3)])); - debug(printf("59 %04X => %d\n",EXTRACT256(array,4),counts[EXTRACT256(array,4)])); - debug(printf("43 %04X => %d\n",EXTRACT256(array,5),counts[EXTRACT256(array,5)])); - debug(printf("27 %04X => %d\n",EXTRACT256(array,6),counts[EXTRACT256(array,6)])); - debug(printf("11 %04X => %d\n",EXTRACT256(array,7),counts[EXTRACT256(array,7)])); - - array = _mm256_and_si256( _mm256_srli_epi32(oligo,4), bigmask9); - counts[EXTRACT256(array,0)] += 1; /* 122 */ - counts[EXTRACT256(array,1)] += 1; /* 106 */ - counts[EXTRACT256(array,2)] += 1; /* 90 */ - counts[EXTRACT256(array,3)] += 1; /* 74 */ - counts[EXTRACT256(array,4)] += 1; /* 58 */ - counts[EXTRACT256(array,5)] += 1; /* 42 */ - counts[EXTRACT256(array,6)] += 1; /* 26 */ - counts[EXTRACT256(array,7)] += 1; /* 10 */ -#ifdef CHECK_FOR_OVERFLOW - _counts_after = _mm256_i32gather_epi32(counts,array,/*scale*/4); - _counts_neg = _mm256_cmpgt_epi32(_counts_after,maxcount256); - if (_mm256_testz_si256(_counts_neg,_counts_neg) == 0) { - debug(printf("Exceeded maximum counts\n")); - if (EXTRACT256(_counts_neg,0)) { - inquery[EXTRACT256(array,0)] = INQUERY_FALSE; - } - if (EXTRACT256(_counts_neg,1)) { - inquery[EXTRACT256(array,1)] = INQUERY_FALSE; - } - if (EXTRACT256(_counts_neg,2)) { - inquery[EXTRACT256(array,2)] = INQUERY_FALSE; - } - if (EXTRACT256(_counts_neg,3)) { - inquery[EXTRACT256(array,3)] = INQUERY_FALSE; - } - if (EXTRACT256(_counts_neg,4)) { - inquery[EXTRACT256(array,4)] = INQUERY_FALSE; - } - if (EXTRACT256(_counts_neg,5)) { - inquery[EXTRACT256(array,5)] = INQUERY_FALSE; - } - if (EXTRACT256(_counts_neg,6)) { - inquery[EXTRACT256(array,6)] = INQUERY_FALSE; - } - if (EXTRACT256(_counts_neg,7)) { - inquery[EXTRACT256(array,7)] = INQUERY_FALSE; - } - } -#endif /* CHECK_FOR_OVERFLOW */ - debug(printf("58 %04X => %d\n",EXTRACT256(array,0),counts[EXTRACT256(array,0)])); - debug(printf("42 %04X => %d\n",EXTRACT256(array,1),counts[EXTRACT256(array,1)])); - debug(printf("26 %04X => %d\n",EXTRACT256(array,2),counts[EXTRACT256(array,2)])); - debug(printf("10 %04X => %d\n",EXTRACT256(array,3),counts[EXTRACT256(array,3)])); - debug(printf("58 %04X => %d\n",EXTRACT256(array,4),counts[EXTRACT256(array,4)])); - debug(printf("42 %04X => %d\n",EXTRACT256(array,5),counts[EXTRACT256(array,5)])); - debug(printf("26 %04X => %d\n",EXTRACT256(array,6),counts[EXTRACT256(array,6)])); - debug(printf("10 %04X => %d\n",EXTRACT256(array,7),counts[EXTRACT256(array,7)])); - - array = _mm256_and_si256( _mm256_srli_epi32(oligo,2), bigmask9); - counts[EXTRACT256(array,0)] += 1; /* 121 */ - counts[EXTRACT256(array,1)] += 1; /* 105 */ - counts[EXTRACT256(array,2)] += 1; /* 89 */ - counts[EXTRACT256(array,3)] += 1; /* 73 */ - counts[EXTRACT256(array,4)] += 1; /* 57 */ - counts[EXTRACT256(array,5)] += 1; /* 41 */ - counts[EXTRACT256(array,6)] += 1; /* 25 */ - counts[EXTRACT256(array,7)] += 1; /* 9 */ -#ifdef CHECK_FOR_OVERFLOW - _counts_after = _mm256_i32gather_epi32(counts,array,/*scale*/4); - _counts_neg = _mm256_cmpgt_epi32(_counts_after,maxcount256); - if (_mm256_testz_si256(_counts_neg,_counts_neg) == 0) { - debug(printf("Exceeded maximum counts\n")); - if (EXTRACT256(_counts_neg,0)) { - inquery[EXTRACT256(array,0)] = INQUERY_FALSE; - } - if (EXTRACT256(_counts_neg,1)) { - inquery[EXTRACT256(array,1)] = INQUERY_FALSE; - } - if (EXTRACT256(_counts_neg,2)) { - inquery[EXTRACT256(array,2)] = INQUERY_FALSE; - } - if (EXTRACT256(_counts_neg,3)) { - inquery[EXTRACT256(array,3)] = INQUERY_FALSE; - } - if (EXTRACT256(_counts_neg,4)) { - inquery[EXTRACT256(array,4)] = INQUERY_FALSE; - } - if (EXTRACT256(_counts_neg,5)) { - inquery[EXTRACT256(array,5)] = INQUERY_FALSE; - } - if (EXTRACT256(_counts_neg,6)) { - inquery[EXTRACT256(array,6)] = INQUERY_FALSE; - } - if (EXTRACT256(_counts_neg,7)) { - inquery[EXTRACT256(array,7)] = INQUERY_FALSE; - } - } -#endif /* CHECK_FOR_OVERFLOW */ - debug(printf("57 %04X => %d\n",EXTRACT256(array,0),counts[EXTRACT256(array,0)])); - debug(printf("41 %04X => %d\n",EXTRACT256(array,1),counts[EXTRACT256(array,1)])); - debug(printf("25 %04X => %d\n",EXTRACT256(array,2),counts[EXTRACT256(array,2)])); - debug(printf("9 %04X => %d\n",EXTRACT256(array,3),counts[EXTRACT256(array,3)])); - debug(printf("57 %04X => %d\n",EXTRACT256(array,4),counts[EXTRACT256(array,4)])); - debug(printf("41 %04X => %d\n",EXTRACT256(array,5),counts[EXTRACT256(array,5)])); - debug(printf("25 %04X => %d\n",EXTRACT256(array,6),counts[EXTRACT256(array,6)])); - debug(printf("9 %04X => %d\n",EXTRACT256(array,7),counts[EXTRACT256(array,7)])); - - array = _mm256_and_si256( oligo, bigmask9); - counts[EXTRACT256(array,0)] += 1; /* 120 */ - counts[EXTRACT256(array,1)] += 1; /* 104 */ - counts[EXTRACT256(array,2)] += 1; /* 88 */ - counts[EXTRACT256(array,3)] += 1; /* 72 */ - counts[EXTRACT256(array,4)] += 1; /* 56 */ - counts[EXTRACT256(array,5)] += 1; /* 40 */ - counts[EXTRACT256(array,6)] += 1; /* 24 */ - counts[EXTRACT256(array,7)] += 1; /* 8 */ -#ifdef CHECK_FOR_OVERFLOW - _counts_after = _mm256_i32gather_epi32(counts,array,/*scale*/4); - _counts_neg = _mm256_cmpgt_epi32(_counts_after,maxcount256); - if (_mm256_testz_si256(_counts_neg,_counts_neg) == 0) { - debug(printf("Exceeded maximum counts\n")); - if (EXTRACT256(_counts_neg,0)) { - inquery[EXTRACT256(array,0)] = INQUERY_FALSE; - } - if (EXTRACT256(_counts_neg,1)) { - inquery[EXTRACT256(array,1)] = INQUERY_FALSE; - } - if (EXTRACT256(_counts_neg,2)) { - inquery[EXTRACT256(array,2)] = INQUERY_FALSE; - } - if (EXTRACT256(_counts_neg,3)) { - inquery[EXTRACT256(array,3)] = INQUERY_FALSE; - } - if (EXTRACT256(_counts_neg,4)) { - inquery[EXTRACT256(array,4)] = INQUERY_FALSE; - } - if (EXTRACT256(_counts_neg,5)) { - inquery[EXTRACT256(array,5)] = INQUERY_FALSE; - } - if (EXTRACT256(_counts_neg,6)) { - inquery[EXTRACT256(array,6)] = INQUERY_FALSE; - } - if (EXTRACT256(_counts_neg,7)) { - inquery[EXTRACT256(array,7)] = INQUERY_FALSE; - } - } -#endif /* CHECK_FOR_OVERFLOW */ - debug(printf("56 %04X => %d\n",EXTRACT256(array,0),counts[EXTRACT256(array,0)])); - debug(printf("40 %04X => %d\n",EXTRACT256(array,1),counts[EXTRACT256(array,1)])); - debug(printf("24 %04X => %d\n",EXTRACT256(array,2),counts[EXTRACT256(array,2)])); - debug(printf("8 %04X => %d\n",EXTRACT256(array,3),counts[EXTRACT256(array,3)])); - debug(printf("56 %04X => %d\n",EXTRACT256(array,4),counts[EXTRACT256(array,4)])); - debug(printf("40 %04X => %d\n",EXTRACT256(array,5),counts[EXTRACT256(array,5)])); - debug(printf("24 %04X => %d\n",EXTRACT256(array,6),counts[EXTRACT256(array,6)])); - debug(printf("8 %04X => %d\n",EXTRACT256(array,7),counts[EXTRACT256(array,7)])); - - array = _mm256_srli_epi32(current,14); /* No mask necessary */; - counts[EXTRACT256(array,0)] += 1; /* 119 */ - counts[EXTRACT256(array,1)] += 1; /* 103 */ - counts[EXTRACT256(array,2)] += 1; /* 87 */ - counts[EXTRACT256(array,3)] += 1; /* 71 */ - counts[EXTRACT256(array,4)] += 1; /* 55 */ - counts[EXTRACT256(array,5)] += 1; /* 39 */ - counts[EXTRACT256(array,6)] += 1; /* 23 */ - counts[EXTRACT256(array,7)] += 1; /* 7 */ -#ifdef CHECK_FOR_OVERFLOW - _counts_after = _mm256_i32gather_epi32(counts,array,/*scale*/4); - _counts_neg = _mm256_cmpgt_epi32(_counts_after,maxcount256); - if (_mm256_testz_si256(_counts_neg,_counts_neg) == 0) { - debug(printf("Exceeded maximum counts\n")); - if (EXTRACT256(_counts_neg,0)) { - inquery[EXTRACT256(array,0)] = INQUERY_FALSE; - } - if (EXTRACT256(_counts_neg,1)) { - inquery[EXTRACT256(array,1)] = INQUERY_FALSE; - } - if (EXTRACT256(_counts_neg,2)) { - inquery[EXTRACT256(array,2)] = INQUERY_FALSE; - } - if (EXTRACT256(_counts_neg,3)) { - inquery[EXTRACT256(array,3)] = INQUERY_FALSE; - } - if (EXTRACT256(_counts_neg,4)) { - inquery[EXTRACT256(array,4)] = INQUERY_FALSE; - } - if (EXTRACT256(_counts_neg,5)) { - inquery[EXTRACT256(array,5)] = INQUERY_FALSE; - } - if (EXTRACT256(_counts_neg,6)) { - inquery[EXTRACT256(array,6)] = INQUERY_FALSE; - } - if (EXTRACT256(_counts_neg,7)) { - inquery[EXTRACT256(array,7)] = INQUERY_FALSE; - } - } -#endif /* CHECK_FOR_OVERFLOW */ - debug(printf("55 %04X => %d\n",EXTRACT256(array,0),counts[EXTRACT256(array,0)])); - debug(printf("39 %04X => %d\n",EXTRACT256(array,1),counts[EXTRACT256(array,1)])); - debug(printf("23 %04X => %d\n",EXTRACT256(array,2),counts[EXTRACT256(array,2)])); - debug(printf("7 %04X => %d\n",EXTRACT256(array,3),counts[EXTRACT256(array,3)])); - debug(printf("55 %04X => %d\n",EXTRACT256(array,4),counts[EXTRACT256(array,4)])); - debug(printf("39 %04X => %d\n",EXTRACT256(array,5),counts[EXTRACT256(array,5)])); - debug(printf("23 %04X => %d\n",EXTRACT256(array,6),counts[EXTRACT256(array,6)])); - debug(printf("7 %04X => %d\n",EXTRACT256(array,7),counts[EXTRACT256(array,7)])); - - array = _mm256_and_si256( _mm256_srli_epi32(current,12), bigmask9); - counts[EXTRACT256(array,0)] += 1; /* 118 */ - counts[EXTRACT256(array,1)] += 1; /* 102 */ - counts[EXTRACT256(array,2)] += 1; /* 86 */ - counts[EXTRACT256(array,3)] += 1; /* 70 */ - counts[EXTRACT256(array,4)] += 1; /* 54 */ - counts[EXTRACT256(array,5)] += 1; /* 38 */ - counts[EXTRACT256(array,6)] += 1; /* 22 */ - counts[EXTRACT256(array,7)] += 1; /* 6 */ -#ifdef CHECK_FOR_OVERFLOW - _counts_after = _mm256_i32gather_epi32(counts,array,/*scale*/4); - _counts_neg = _mm256_cmpgt_epi32(_counts_after,maxcount256); - if (_mm256_testz_si256(_counts_neg,_counts_neg) == 0) { - debug(printf("Exceeded maximum counts\n")); - if (EXTRACT256(_counts_neg,0)) { - inquery[EXTRACT256(array,0)] = INQUERY_FALSE; - } - if (EXTRACT256(_counts_neg,1)) { - inquery[EXTRACT256(array,1)] = INQUERY_FALSE; - } - if (EXTRACT256(_counts_neg,2)) { - inquery[EXTRACT256(array,2)] = INQUERY_FALSE; - } - if (EXTRACT256(_counts_neg,3)) { - inquery[EXTRACT256(array,3)] = INQUERY_FALSE; - } - if (EXTRACT256(_counts_neg,4)) { - inquery[EXTRACT256(array,4)] = INQUERY_FALSE; - } - if (EXTRACT256(_counts_neg,5)) { - inquery[EXTRACT256(array,5)] = INQUERY_FALSE; - } - if (EXTRACT256(_counts_neg,6)) { - inquery[EXTRACT256(array,6)] = INQUERY_FALSE; - } - if (EXTRACT256(_counts_neg,7)) { - inquery[EXTRACT256(array,7)] = INQUERY_FALSE; - } - } -#endif /* CHECK_FOR_OVERFLOW */ - debug(printf("54 %04X => %d\n",EXTRACT256(array,0),counts[EXTRACT256(array,0)])); - debug(printf("38 %04X => %d\n",EXTRACT256(array,1),counts[EXTRACT256(array,1)])); - debug(printf("22 %04X => %d\n",EXTRACT256(array,2),counts[EXTRACT256(array,2)])); - debug(printf("6 %04X => %d\n",EXTRACT256(array,3),counts[EXTRACT256(array,3)])); - debug(printf("54 %04X => %d\n",EXTRACT256(array,4),counts[EXTRACT256(array,4)])); - debug(printf("38 %04X => %d\n",EXTRACT256(array,5),counts[EXTRACT256(array,5)])); - debug(printf("22 %04X => %d\n",EXTRACT256(array,6),counts[EXTRACT256(array,6)])); - debug(printf("6 %04X => %d\n",EXTRACT256(array,7),counts[EXTRACT256(array,7)])); - - array = _mm256_and_si256( _mm256_srli_epi32(current,10), bigmask9); - counts[EXTRACT256(array,0)] += 1; /* 117 */ - counts[EXTRACT256(array,1)] += 1; /* 101 */ - counts[EXTRACT256(array,2)] += 1; /* 85 */ - counts[EXTRACT256(array,3)] += 1; /* 69 */ - counts[EXTRACT256(array,4)] += 1; /* 53 */ - counts[EXTRACT256(array,5)] += 1; /* 37 */ - counts[EXTRACT256(array,6)] += 1; /* 21 */ - counts[EXTRACT256(array,7)] += 1; /* 5 */ -#ifdef CHECK_FOR_OVERFLOW - _counts_after = _mm256_i32gather_epi32(counts,array,/*scale*/4); - _counts_neg = _mm256_cmpgt_epi32(_counts_after,maxcount256); - if (_mm256_testz_si256(_counts_neg,_counts_neg) == 0) { - debug(printf("Exceeded maximum counts\n")); - if (EXTRACT256(_counts_neg,0)) { - inquery[EXTRACT256(array,0)] = INQUERY_FALSE; - } - if (EXTRACT256(_counts_neg,1)) { - inquery[EXTRACT256(array,1)] = INQUERY_FALSE; - } - if (EXTRACT256(_counts_neg,2)) { - inquery[EXTRACT256(array,2)] = INQUERY_FALSE; - } - if (EXTRACT256(_counts_neg,3)) { - inquery[EXTRACT256(array,3)] = INQUERY_FALSE; - } - if (EXTRACT256(_counts_neg,4)) { - inquery[EXTRACT256(array,4)] = INQUERY_FALSE; - } - if (EXTRACT256(_counts_neg,5)) { - inquery[EXTRACT256(array,5)] = INQUERY_FALSE; - } - if (EXTRACT256(_counts_neg,6)) { - inquery[EXTRACT256(array,6)] = INQUERY_FALSE; - } - if (EXTRACT256(_counts_neg,7)) { - inquery[EXTRACT256(array,7)] = INQUERY_FALSE; - } - } -#endif /* CHECK_FOR_OVERFLOW */ - debug(printf("53 %04X => %d\n",EXTRACT256(array,0),counts[EXTRACT256(array,0)])); - debug(printf("37 %04X => %d\n",EXTRACT256(array,1),counts[EXTRACT256(array,1)])); - debug(printf("21 %04X => %d\n",EXTRACT256(array,2),counts[EXTRACT256(array,2)])); - debug(printf("5 %04X => %d\n",EXTRACT256(array,3),counts[EXTRACT256(array,3)])); - debug(printf("53 %04X => %d\n",EXTRACT256(array,4),counts[EXTRACT256(array,4)])); - debug(printf("37 %04X => %d\n",EXTRACT256(array,5),counts[EXTRACT256(array,5)])); - debug(printf("21 %04X => %d\n",EXTRACT256(array,6),counts[EXTRACT256(array,6)])); - debug(printf("5 %04X => %d\n",EXTRACT256(array,7),counts[EXTRACT256(array,7)])); - - array = _mm256_and_si256( _mm256_srli_epi32(current,8), bigmask9); - counts[EXTRACT256(array,0)] += 1; /* 116 */ - counts[EXTRACT256(array,1)] += 1; /* 100 */ - counts[EXTRACT256(array,2)] += 1; /* 84 */ - counts[EXTRACT256(array,3)] += 1; /* 68 */ - counts[EXTRACT256(array,4)] += 1; /* 52 */ - counts[EXTRACT256(array,5)] += 1; /* 36 */ - counts[EXTRACT256(array,6)] += 1; /* 20 */ - counts[EXTRACT256(array,7)] += 1; /* 4 */ -#ifdef CHECK_FOR_OVERFLOW - _counts_after = _mm256_i32gather_epi32(counts,array,/*scale*/4); - _counts_neg = _mm256_cmpgt_epi32(_counts_after,maxcount256); - if (_mm256_testz_si256(_counts_neg,_counts_neg) == 0) { - debug(printf("Exceeded maximum counts\n")); - if (EXTRACT256(_counts_neg,0)) { - inquery[EXTRACT256(array,0)] = INQUERY_FALSE; - } - if (EXTRACT256(_counts_neg,1)) { - inquery[EXTRACT256(array,1)] = INQUERY_FALSE; - } - if (EXTRACT256(_counts_neg,2)) { - inquery[EXTRACT256(array,2)] = INQUERY_FALSE; - } - if (EXTRACT256(_counts_neg,3)) { - inquery[EXTRACT256(array,3)] = INQUERY_FALSE; - } - if (EXTRACT256(_counts_neg,4)) { - inquery[EXTRACT256(array,4)] = INQUERY_FALSE; - } - if (EXTRACT256(_counts_neg,5)) { - inquery[EXTRACT256(array,5)] = INQUERY_FALSE; - } - if (EXTRACT256(_counts_neg,6)) { - inquery[EXTRACT256(array,6)] = INQUERY_FALSE; - } - if (EXTRACT256(_counts_neg,7)) { - inquery[EXTRACT256(array,7)] = INQUERY_FALSE; - } - } -#endif /* CHECK_FOR_OVERFLOW */ - debug(printf("52 %04X => %d\n",EXTRACT256(array,0),counts[EXTRACT256(array,0)])); - debug(printf("36 %04X => %d\n",EXTRACT256(array,1),counts[EXTRACT256(array,1)])); - debug(printf("20 %04X => %d\n",EXTRACT256(array,2),counts[EXTRACT256(array,2)])); - debug(printf("4 %04X => %d\n",EXTRACT256(array,3),counts[EXTRACT256(array,3)])); - debug(printf("52 %04X => %d\n",EXTRACT256(array,4),counts[EXTRACT256(array,4)])); - debug(printf("36 %04X => %d\n",EXTRACT256(array,5),counts[EXTRACT256(array,5)])); - debug(printf("20 %04X => %d\n",EXTRACT256(array,6),counts[EXTRACT256(array,6)])); - debug(printf("4 %04X => %d\n",EXTRACT256(array,7),counts[EXTRACT256(array,7)])); - - array = _mm256_and_si256( _mm256_srli_epi32(current,6), bigmask9); - counts[EXTRACT256(array,0)] += 1; /* 115 */ - counts[EXTRACT256(array,1)] += 1; /* 99 */ - counts[EXTRACT256(array,2)] += 1; /* 83 */ - counts[EXTRACT256(array,3)] += 1; /* 67 */ - counts[EXTRACT256(array,4)] += 1; /* 51 */ - counts[EXTRACT256(array,5)] += 1; /* 35 */ - counts[EXTRACT256(array,6)] += 1; /* 19 */ - counts[EXTRACT256(array,7)] += 1; /* 3 */ -#ifdef CHECK_FOR_OVERFLOW - _counts_after = _mm256_i32gather_epi32(counts,array,/*scale*/4); - _counts_neg = _mm256_cmpgt_epi32(_counts_after,maxcount256); - if (_mm256_testz_si256(_counts_neg,_counts_neg) == 0) { - debug(printf("Exceeded maximum counts\n")); - if (EXTRACT256(_counts_neg,0)) { - inquery[EXTRACT256(array,0)] = INQUERY_FALSE; - } - if (EXTRACT256(_counts_neg,1)) { - inquery[EXTRACT256(array,1)] = INQUERY_FALSE; - } - if (EXTRACT256(_counts_neg,2)) { - inquery[EXTRACT256(array,2)] = INQUERY_FALSE; - } - if (EXTRACT256(_counts_neg,3)) { - inquery[EXTRACT256(array,3)] = INQUERY_FALSE; - } - if (EXTRACT256(_counts_neg,4)) { - inquery[EXTRACT256(array,4)] = INQUERY_FALSE; - } - if (EXTRACT256(_counts_neg,5)) { - inquery[EXTRACT256(array,5)] = INQUERY_FALSE; - } - if (EXTRACT256(_counts_neg,6)) { - inquery[EXTRACT256(array,6)] = INQUERY_FALSE; - } - if (EXTRACT256(_counts_neg,7)) { - inquery[EXTRACT256(array,7)] = INQUERY_FALSE; - } - } -#endif /* CHECK_FOR_OVERFLOW */ - debug(printf("51 %04X => %d\n",EXTRACT256(array,0),counts[EXTRACT256(array,0)])); - debug(printf("35 %04X => %d\n",EXTRACT256(array,1),counts[EXTRACT256(array,1)])); - debug(printf("19 %04X => %d\n",EXTRACT256(array,2),counts[EXTRACT256(array,2)])); - debug(printf("3 %04X => %d\n",EXTRACT256(array,3),counts[EXTRACT256(array,3)])); - debug(printf("51 %04X => %d\n",EXTRACT256(array,4),counts[EXTRACT256(array,4)])); - debug(printf("35 %04X => %d\n",EXTRACT256(array,5),counts[EXTRACT256(array,5)])); - debug(printf("19 %04X => %d\n",EXTRACT256(array,6),counts[EXTRACT256(array,6)])); - debug(printf("3 %04X => %d\n",EXTRACT256(array,7),counts[EXTRACT256(array,7)])); - - array = _mm256_and_si256( _mm256_srli_epi32(current,4), bigmask9); - counts[EXTRACT256(array,0)] += 1; /* 114 */ - counts[EXTRACT256(array,1)] += 1; /* 98 */ - counts[EXTRACT256(array,2)] += 1; /* 82 */ - counts[EXTRACT256(array,3)] += 1; /* 66 */ - counts[EXTRACT256(array,4)] += 1; /* 50 */ - counts[EXTRACT256(array,5)] += 1; /* 34 */ - counts[EXTRACT256(array,6)] += 1; /* 18 */ - counts[EXTRACT256(array,7)] += 1; /* 2 */ -#ifdef CHECK_FOR_OVERFLOW - _counts_after = _mm256_i32gather_epi32(counts,array,/*scale*/4); - _counts_neg = _mm256_cmpgt_epi32(_counts_after,maxcount256); - if (_mm256_testz_si256(_counts_neg,_counts_neg) == 0) { - debug(printf("Exceeded maximum counts\n")); - if (EXTRACT256(_counts_neg,0)) { - inquery[EXTRACT256(array,0)] = INQUERY_FALSE; - } - if (EXTRACT256(_counts_neg,1)) { - inquery[EXTRACT256(array,1)] = INQUERY_FALSE; - } - if (EXTRACT256(_counts_neg,2)) { - inquery[EXTRACT256(array,2)] = INQUERY_FALSE; - } - if (EXTRACT256(_counts_neg,3)) { - inquery[EXTRACT256(array,3)] = INQUERY_FALSE; - } - if (EXTRACT256(_counts_neg,4)) { - inquery[EXTRACT256(array,4)] = INQUERY_FALSE; - } - if (EXTRACT256(_counts_neg,5)) { - inquery[EXTRACT256(array,5)] = INQUERY_FALSE; - } - if (EXTRACT256(_counts_neg,6)) { - inquery[EXTRACT256(array,6)] = INQUERY_FALSE; - } - if (EXTRACT256(_counts_neg,7)) { - inquery[EXTRACT256(array,7)] = INQUERY_FALSE; - } - } -#endif /* CHECK_FOR_OVERFLOW */ - debug(printf("50 %04X => %d\n",EXTRACT256(array,0),counts[EXTRACT256(array,0)])); - debug(printf("34 %04X => %d\n",EXTRACT256(array,1),counts[EXTRACT256(array,1)])); - debug(printf("18 %04X => %d\n",EXTRACT256(array,2),counts[EXTRACT256(array,2)])); - debug(printf("2 %04X => %d\n",EXTRACT256(array,3),counts[EXTRACT256(array,3)])); - debug(printf("50 %04X => %d\n",EXTRACT256(array,4),counts[EXTRACT256(array,4)])); - debug(printf("34 %04X => %d\n",EXTRACT256(array,5),counts[EXTRACT256(array,5)])); - debug(printf("18 %04X => %d\n",EXTRACT256(array,6),counts[EXTRACT256(array,6)])); - debug(printf("2 %04X => %d\n",EXTRACT256(array,7),counts[EXTRACT256(array,7)])); - - array = _mm256_and_si256( _mm256_srli_epi32(current,2), bigmask9); - counts[EXTRACT256(array,0)] += 1; /* 113 */ - counts[EXTRACT256(array,1)] += 1; /* 97 */ - counts[EXTRACT256(array,2)] += 1; /* 81 */ - counts[EXTRACT256(array,3)] += 1; /* 65 */ - counts[EXTRACT256(array,4)] += 1; /* 49 */ - counts[EXTRACT256(array,5)] += 1; /* 33 */ - counts[EXTRACT256(array,6)] += 1; /* 17 */ - counts[EXTRACT256(array,7)] += 1; /* 1 */ -#ifdef CHECK_FOR_OVERFLOW - _counts_after = _mm256_i32gather_epi32(counts,array,/*scale*/4); - _counts_neg = _mm256_cmpgt_epi32(_counts_after,maxcount256); - if (_mm256_testz_si256(_counts_neg,_counts_neg) == 0) { - debug(printf("Exceeded maximum counts\n")); - if (EXTRACT256(_counts_neg,0)) { - inquery[EXTRACT256(array,0)] = INQUERY_FALSE; - } - if (EXTRACT256(_counts_neg,1)) { - inquery[EXTRACT256(array,1)] = INQUERY_FALSE; - } - if (EXTRACT256(_counts_neg,2)) { - inquery[EXTRACT256(array,2)] = INQUERY_FALSE; - } - if (EXTRACT256(_counts_neg,3)) { - inquery[EXTRACT256(array,3)] = INQUERY_FALSE; - } - if (EXTRACT256(_counts_neg,4)) { - inquery[EXTRACT256(array,4)] = INQUERY_FALSE; - } - if (EXTRACT256(_counts_neg,5)) { - inquery[EXTRACT256(array,5)] = INQUERY_FALSE; - } - if (EXTRACT256(_counts_neg,6)) { - inquery[EXTRACT256(array,6)] = INQUERY_FALSE; - } - if (EXTRACT256(_counts_neg,7)) { - inquery[EXTRACT256(array,7)] = INQUERY_FALSE; - } - } -#endif /* CHECK_FOR_OVERFLOW */ - debug(printf("49 %04X => %d\n",EXTRACT256(array,0),counts[EXTRACT256(array,0)])); - debug(printf("33 %04X => %d\n",EXTRACT256(array,1),counts[EXTRACT256(array,1)])); - debug(printf("17 %04X => %d\n",EXTRACT256(array,2),counts[EXTRACT256(array,2)])); - debug(printf("1 %04X => %d\n",EXTRACT256(array,3),counts[EXTRACT256(array,3)])); - debug(printf("49 %04X => %d\n",EXTRACT256(array,4),counts[EXTRACT256(array,4)])); - debug(printf("33 %04X => %d\n",EXTRACT256(array,5),counts[EXTRACT256(array,5)])); - debug(printf("17 %04X => %d\n",EXTRACT256(array,6),counts[EXTRACT256(array,6)])); - debug(printf("1 %04X => %d\n",EXTRACT256(array,7),counts[EXTRACT256(array,7)])); - - array = _mm256_and_si256( current, bigmask9); - counts[EXTRACT256(array,0)] += 1; /* 112 */ - counts[EXTRACT256(array,1)] += 1; /* 96 */ - counts[EXTRACT256(array,2)] += 1; /* 80 */ - counts[EXTRACT256(array,3)] += 1; /* 64 */ - counts[EXTRACT256(array,4)] += 1; /* 48 */ - counts[EXTRACT256(array,5)] += 1; /* 32 */ - counts[EXTRACT256(array,6)] += 1; /* 16 */ - counts[EXTRACT256(array,7)] += 1; /* 0 */ -#ifdef CHECK_FOR_OVERFLOW - _counts_after = _mm256_i32gather_epi32(counts,array,/*scale*/4); - _counts_neg = _mm256_cmpgt_epi32(_counts_after,maxcount256); - if (_mm256_testz_si256(_counts_neg,_counts_neg) == 0) { - debug(printf("Exceeded maximum counts\n")); - if (EXTRACT256(_counts_neg,0)) { - inquery[EXTRACT256(array,0)] = INQUERY_FALSE; - } - if (EXTRACT256(_counts_neg,1)) { - inquery[EXTRACT256(array,1)] = INQUERY_FALSE; - } - if (EXTRACT256(_counts_neg,2)) { - inquery[EXTRACT256(array,2)] = INQUERY_FALSE; - } - if (EXTRACT256(_counts_neg,3)) { - inquery[EXTRACT256(array,3)] = INQUERY_FALSE; - } - if (EXTRACT256(_counts_neg,4)) { - inquery[EXTRACT256(array,4)] = INQUERY_FALSE; - } - if (EXTRACT256(_counts_neg,5)) { - inquery[EXTRACT256(array,5)] = INQUERY_FALSE; - } - if (EXTRACT256(_counts_neg,6)) { - inquery[EXTRACT256(array,6)] = INQUERY_FALSE; - } - if (EXTRACT256(_counts_neg,7)) { - inquery[EXTRACT256(array,7)] = INQUERY_FALSE; - } - } -#endif /* CHECK_FOR_OVERFLOW */ - debug(printf("48 %04X => %d\n",EXTRACT256(array,0),counts[EXTRACT256(array,0)])); - debug(printf("32 %04X => %d\n",EXTRACT256(array,1),counts[EXTRACT256(array,1)])); - debug(printf("16 %04X => %d\n",EXTRACT256(array,2),counts[EXTRACT256(array,2)])); - debug(printf("0 %04X => %d\n",EXTRACT256(array,3),counts[EXTRACT256(array,3)])); - debug(printf("48 %04X => %d\n",EXTRACT256(array,4),counts[EXTRACT256(array,4)])); - debug(printf("32 %04X => %d\n",EXTRACT256(array,5),counts[EXTRACT256(array,5)])); - debug(printf("16 %04X => %d\n",EXTRACT256(array,6),counts[EXTRACT256(array,6)])); - debug(printf("0 %04X => %d\n",EXTRACT256(array,7),counts[EXTRACT256(array,7)])); +#else +/* Includes extract_9mers_rev_simd_256_ordered (__m512i *out, __m512i current, __m512i next) */ +static Chrpos_T +store_9mers_rev_simd_256 (Chrpos_T chrpos, Chrpos_T *table, UINT4 *positions, Count_T *counts, + __m512i current, __m512i next) { + __m512i array[16], *out; + __m512i oligo, _shuffle0, _shuffle1, _shuffle2; + __m512i _row0, _row1, _row2, _row3, _row4, _row5, _row6, _row7, + _row8, _row9, _row10, _row11, _row12, _row13, _row14, _row15; + __m512i _t0, _t1, _t2, _t3, _t4, _t5, _t6, _t7; + __m512i _u0, _u1, _u2, _u3, _u4, _u5, _u6, _u7; + + out = &(array[0]); + + _row0 = _mm512_and_si512( current, hugemask9); + _row1 = _mm512_and_si512( _mm512_srli_epi32(current,2), hugemask9); + _row2 = _mm512_and_si512( _mm512_srli_epi32(current,4), hugemask9); + _row3 = _mm512_and_si512( _mm512_srli_epi32(current,6), hugemask9); + _row4 = _mm512_and_si512( _mm512_srli_epi32(current,8), hugemask9); + _row5 = _mm512_and_si512( _mm512_srli_epi32(current,10), hugemask9); + _row6 = _mm512_and_si512( _mm512_srli_epi32(current,12), hugemask9); + _row7 = _mm512_srli_epi32(current,14); /* No mask necessary */; + + oligo = _mm512_or_si512( _mm512_srli_epi32(current,16), _mm512_slli_epi32(next,16)); + _row8 = _mm512_and_si512( oligo, hugemask9); + _row9 = _mm512_and_si512( _mm512_srli_epi32(oligo,2), hugemask9); + _row10 = _mm512_and_si512( _mm512_srli_epi32(oligo,4), hugemask9); + _row11 = _mm512_and_si512( _mm512_srli_epi32(oligo,6), hugemask9); + _row12 = _mm512_and_si512( _mm512_srli_epi32(oligo,8), hugemask9); + _row13 = _mm512_and_si512( _mm512_srli_epi32(oligo,10), hugemask9); + _row14 = _mm512_and_si512( _mm512_srli_epi32(oligo,12), hugemask9); + _row15 = _mm512_and_si512( _mm512_srli_epi32(oligo,14), hugemask9); + + + /* Split: top half */ + _t0 = _mm512_unpackhi_epi32(_row0,_row1); + _t1 = _mm512_unpackhi_epi32(_row2,_row3); + _t2 = _mm512_unpackhi_epi32(_row4,_row5); + _t3 = _mm512_unpackhi_epi32(_row6,_row7); + _t4 = _mm512_unpackhi_epi32(_row8,_row9); + _t5 = _mm512_unpackhi_epi32(_row10,_row11); + _t6 = _mm512_unpackhi_epi32(_row12,_row13); + _t7 = _mm512_unpackhi_epi32(_row14,_row15); + + _u0 = _mm512_unpackhi_epi64(_t0,_t1); + _u1 = _mm512_unpackhi_epi64(_t2,_t3); + _u2 = _mm512_unpackhi_epi64(_t4,_t5); + _u3 = _mm512_unpackhi_epi64(_t6,_t7); + _u4 = _mm512_unpacklo_epi64(_t0,_t1); + _u5 = _mm512_unpacklo_epi64(_t2,_t3); + _u6 = _mm512_unpacklo_epi64(_t4,_t5); + _u7 = _mm512_unpacklo_epi64(_t6,_t7); + + /* Split: bottom half */ + _t0 = _mm512_unpacklo_epi32(_row0,_row1); + _t1 = _mm512_unpacklo_epi32(_row2,_row3); + _t2 = _mm512_unpacklo_epi32(_row4,_row5); + _t3 = _mm512_unpacklo_epi32(_row6,_row7); + _t4 = _mm512_unpacklo_epi32(_row8,_row9); + _t5 = _mm512_unpacklo_epi32(_row10,_row11); + _t6 = _mm512_unpacklo_epi32(_row12,_row13); + _t7 = _mm512_unpacklo_epi32(_row14,_row15); + + _row8 = _mm512_unpackhi_epi64(_t0,_t1); + _row9 = _mm512_unpackhi_epi64(_t2,_t3); + _row10 = _mm512_unpackhi_epi64(_t4,_t5); + _row11 = _mm512_unpackhi_epi64(_t6,_t7); + _row12 = _mm512_unpacklo_epi64(_t0,_t1); + _row13 = _mm512_unpacklo_epi64(_t2,_t3); + _row14 = _mm512_unpacklo_epi64(_t4,_t5); + _row15 = _mm512_unpacklo_epi64(_t6,_t7); + + + /* Split: top half */ + _shuffle0 = _mm512_setr_epi64(6, 7, 8+6, 8+7, 4, 5, 8+4, 8+5); + _t0 = _mm512_permutex2var_epi64(_u0, _shuffle0, _u1); + _t1 = _mm512_permutex2var_epi64(_u2, _shuffle0, _u3); + _t2 = _mm512_permutex2var_epi64(_u4, _shuffle0, _u5); + _t3 = _mm512_permutex2var_epi64(_u6, _shuffle0, _u7); + _t4 = _mm512_permutex2var_epi64(_row8, _shuffle0, _row9); + _t5 = _mm512_permutex2var_epi64(_row10, _shuffle0, _row11); + _t6 = _mm512_permutex2var_epi64(_row12, _shuffle0, _row13); + _t7 = _mm512_permutex2var_epi64(_row14, _shuffle0, _row15); + + _shuffle1 = _mm512_setr_epi64(0, 1, 2, 3, 8+0, 8+1, 8+2, 8+3); + _shuffle2 = _mm512_setr_epi64(4, 5, 6, 7, 8+4, 8+5, 8+6, 8+7); + _mm512_store_si512(out++, _mm512_permutex2var_epi64(_t0, _shuffle1, _t1)); + _mm512_store_si512(out++, _mm512_permutex2var_epi64(_t2, _shuffle1, _t3)); + _mm512_store_si512(out++, _mm512_permutex2var_epi64(_t4, _shuffle1, _t5)); + _mm512_store_si512(out++, _mm512_permutex2var_epi64(_t6, _shuffle1, _t7)); + _mm512_store_si512(out++, _mm512_permutex2var_epi64(_t0, _shuffle2, _t1)); + _mm512_store_si512(out++, _mm512_permutex2var_epi64(_t2, _shuffle2, _t3)); + _mm512_store_si512(out++, _mm512_permutex2var_epi64(_t4, _shuffle2, _t5)); + _mm512_store_si512(out++, _mm512_permutex2var_epi64(_t6, _shuffle2, _t7)); + + /* Split: bottom half */ + _shuffle0 = _mm512_setr_epi64(2, 3, 8+2, 8+3, 0, 1, 8+0, 8+1); + _t0 = _mm512_permutex2var_epi64(_u0, _shuffle0, _u1); + _t1 = _mm512_permutex2var_epi64(_u2, _shuffle0, _u3); + _t2 = _mm512_permutex2var_epi64(_u4, _shuffle0, _u5); + _t3 = _mm512_permutex2var_epi64(_u6, _shuffle0, _u7); + _t4 = _mm512_permutex2var_epi64(_row8, _shuffle0, _row9); + _t5 = _mm512_permutex2var_epi64(_row10, _shuffle0, _row11); + _t6 = _mm512_permutex2var_epi64(_row12, _shuffle0, _row13); + _t7 = _mm512_permutex2var_epi64(_row14, _shuffle0, _row15); + + /* _shuffle1 = _mm512_setr_epi64(0, 1, 2, 3, 8+0, 8+1, 8+2, 8+3); */ + /* _shuffle2 = _mm512_setr_epi64(4, 5, 6, 7, 8+4, 8+5, 8+6, 8+7); */ + _mm512_store_si512(out++, _mm512_permutex2var_epi64(_t0, _shuffle1, _t1)); + _mm512_store_si512(out++, _mm512_permutex2var_epi64(_t2, _shuffle1, _t3)); + _mm512_store_si512(out++, _mm512_permutex2var_epi64(_t4, _shuffle1, _t5)); + _mm512_store_si512(out++, _mm512_permutex2var_epi64(_t6, _shuffle1, _t7)); + _mm512_store_si512(out++, _mm512_permutex2var_epi64(_t0, _shuffle2, _t1)); + _mm512_store_si512(out++, _mm512_permutex2var_epi64(_t2, _shuffle2, _t3)); + _mm512_store_si512(out++, _mm512_permutex2var_epi64(_t4, _shuffle2, _t5)); + _mm512_store_si512(out++, _mm512_permutex2var_epi64(_t6, _shuffle2, _t7)); - return; + return store_fwdrev_simd_256_ordered(chrpos,table,positions,counts,(UINT4 *) array); } #endif +#endif #if !defined(HAVE_AVX2) static int -store_9mers_rev (Chrpos_T chrpos, Chrpos_T *table, UINT4 *pointers, UINT4 *positions, Count_T *counts, - Genomecomp_T low_rc, Genomecomp_T high_rc, Genomecomp_T nextlow_rc) { +store_9mers_rev_32 (Chrpos_T chrpos, Chrpos_T *table, UINT4 *positions, Count_T *counts, + Genomecomp_T low_rc, Genomecomp_T high_rc, Genomecomp_T nextlow_rc) { Genomecomp_T masked, oligo; #ifdef INDIVIDUAL_SHIFTS #elif defined(SIMD_MASK_THEN_STORE) @@ -33276,58 +22850,50 @@ #ifdef INDIVIDUAL_SHIFTS masked = low_rc & MASK9; /* 0 */ if (counts[masked]) { - assert(pointers[masked] > positions[masked]); debug(printf("Storing masked %u at %u\n",masked,chrpos)); - table[--pointers[masked]] = chrpos; + table[positions[masked] + (--counts[masked])] = chrpos; } masked = (low_rc >> 2) & MASK9; /* 1 */ if (counts[masked]) { - assert(pointers[masked] > positions[masked]); debug(printf("Storing masked %u at %u\n",masked,chrpos - 1)); - table[--pointers[masked]] = chrpos - 1; + table[positions[masked] + (--counts[masked])] = chrpos - 1; } masked = (low_rc >> 4) & MASK9; /* 2 */ if (counts[masked]) { - assert(pointers[masked] > positions[masked]); debug(printf("Storing masked %u at %u\n",masked,chrpos - 2)); - table[--pointers[masked]] = chrpos - 2; + table[positions[masked] + (--counts[masked])] = chrpos - 2; } masked = (low_rc >> 6) & MASK9; /* 3 */ if (counts[masked]) { - assert(pointers[masked] > positions[masked]); debug(printf("Storing masked %u at %u\n",masked,chrpos - 3)); - table[--pointers[masked]] = chrpos - 3; + table[positions[masked] + (--counts[masked])] = chrpos - 3; } masked = (low_rc >> 8) & MASK9; /* 4 */ if (counts[masked]) { - assert(pointers[masked] > positions[masked]); debug(printf("Storing masked %u at %u\n",masked,chrpos - 4)); - table[--pointers[masked]] = chrpos - 4; + table[positions[masked] + (--counts[masked])] = chrpos - 4; } masked = (low_rc >> 10) & MASK9; /* 5 */ if (counts[masked]) { - assert(pointers[masked] > positions[masked]); debug(printf("Storing masked %u at %u\n",masked,chrpos - 5)); - table[--pointers[masked]] = chrpos - 5; + table[positions[masked] + (--counts[masked])] = chrpos - 5; } masked = (low_rc >> 12) & MASK9; /* 6 */ if (counts[masked]) { - assert(pointers[masked] > positions[masked]); debug(printf("Storing masked %u at %u\n",masked,chrpos - 6)); - table[--pointers[masked]] = chrpos - 6; + table[positions[masked] + (--counts[masked])] = chrpos - 6; } masked = low_rc >> 14; /* 7, No mask necessary */ if (counts[masked]) { - assert(pointers[masked] > positions[masked]); debug(printf("Storing masked %u at %u\n",masked,chrpos - 7)); - table[--pointers[masked]] = chrpos - 7; + table[positions[masked] + (--counts[masked])] = chrpos - 7; } #else @@ -33340,30 +22906,26 @@ masked = EXTRACT(_masked,0); if (counts[masked]) { - assert(pointers[masked] > positions[masked]); debug(printf("Storing masked %u at %u\n",masked,chrpos)); - table[--pointers[masked]] = chrpos; + table[positions[masked] + (--counts[masked])] = chrpos; } masked = EXTRACT(_masked,1); if (counts[masked]) { - assert(pointers[masked] > positions[masked]); debug(printf("Storing masked %u at %u\n",masked,chrpos - 1)); - table[--pointers[masked]] = chrpos - 1; + table[positions[masked] + (--counts[masked])] = chrpos - 1; } masked = EXTRACT(_masked,2); if (counts[masked]) { - assert(pointers[masked] > positions[masked]); debug(printf("Storing masked %u at %u\n",masked,chrpos - 2)); - table[--pointers[masked]] = chrpos - 2; + table[positions[masked] + (--counts[masked])] = chrpos - 2; } masked = EXTRACT(_masked,3); if (counts[masked]) { - assert(pointers[masked] > positions[masked]); debug(printf("Storing masked %u at %u\n",masked,chrpos - 3)); - table[--pointers[masked]] = chrpos - 3; + table[positions[masked] + (--counts[masked])] = chrpos - 3; } @@ -33376,30 +22938,26 @@ masked = EXTRACT(_masked,0); if (counts[masked]) { - assert(pointers[masked] > positions[masked]); debug(printf("Storing masked %u at %u\n",masked,chrpos - 4)); - table[--pointers[masked]] = chrpos - 4; + table[positions[masked] + (--counts[masked])] = chrpos - 4; } masked = EXTRACT(_masked,1); if (counts[masked]) { - assert(pointers[masked] > positions[masked]); debug(printf("Storing masked %u at %u\n",masked,chrpos - 5)); - table[--pointers[masked]] = chrpos - 5; + table[positions[masked] + (--counts[masked])] = chrpos - 5; } masked = EXTRACT(_masked,2); if (counts[masked]) { - assert(pointers[masked] > positions[masked]); debug(printf("Storing masked %u at %u\n",masked,chrpos - 6)); - table[--pointers[masked]] = chrpos - 6; + table[positions[masked] + (--counts[masked])] = chrpos - 6; } masked = EXTRACT(_masked,3); if (counts[masked]) { - assert(pointers[masked] > positions[masked]); debug(printf("Storing masked %u at %u\n",masked,chrpos - 7)); - table[--pointers[masked]] = chrpos - 7; + table[positions[masked] + (--counts[masked])] = chrpos - 7; } #endif @@ -33410,58 +22968,50 @@ #ifdef INDIVIDUAL_SHIFTS masked = oligo & MASK9; /* 8 */ if (counts[masked]) { - assert(pointers[masked] > positions[masked]); debug(printf("Storing masked %u at %u\n",masked,chrpos - 8)); - table[--pointers[masked]] = chrpos - 8; + table[positions[masked] + (--counts[masked])] = chrpos - 8; } masked = (oligo >> 2) & MASK9; /* 9 */ if (counts[masked]) { - assert(pointers[masked] > positions[masked]); debug(printf("Storing masked %u at %u\n",masked,chrpos - 9)); - table[--pointers[masked]] = chrpos - 9; + table[positions[masked] + (--counts[masked])] = chrpos - 9; } masked = (oligo >> 4) & MASK9; /* 10 */ if (counts[masked]) { - assert(pointers[masked] > positions[masked]); debug(printf("Storing masked %u at %u\n",masked,chrpos - 10)); - table[--pointers[masked]] = chrpos - 10; + table[positions[masked] + (--counts[masked])] = chrpos - 10; } masked = (oligo >> 6) & MASK9; /* 11 */ if (counts[masked]) { - assert(pointers[masked] > positions[masked]); debug(printf("Storing masked %u at %u\n",masked,chrpos - 11)); - table[--pointers[masked]] = chrpos - 11; + table[positions[masked] + (--counts[masked])] = chrpos - 11; } masked = (oligo >> 8) & MASK9; /* 12 */ if (counts[masked]) { - assert(pointers[masked] > positions[masked]); debug(printf("Storing masked %u at %u\n",masked,chrpos - 12)); - table[--pointers[masked]] = chrpos - 12; + table[positions[masked] + (--counts[masked])] = chrpos - 12; } masked = (oligo >> 10) & MASK9; /* 13 */ if (counts[masked]) { - assert(pointers[masked] > positions[masked]); debug(printf("Storing masked %u at %u\n",masked,chrpos - 13)); - table[--pointers[masked]] = chrpos - 13; + table[positions[masked] + (--counts[masked])] = chrpos - 13; } masked = (oligo >> 12) & MASK9; /* 14 */ if (counts[masked]) { - assert(pointers[masked] > positions[masked]); debug(printf("Storing masked %u at %u\n",masked,chrpos - 14)); - table[--pointers[masked]] = chrpos - 14; + table[positions[masked] + (--counts[masked])] = chrpos - 14; } masked = (oligo >> 14) & MASK9; /* 15 */ if (counts[masked]) { - assert(pointers[masked] > positions[masked]); debug(printf("Storing masked %u at %u\n",masked,chrpos - 15)); - table[--pointers[masked]] = chrpos - 15; + table[positions[masked] + (--counts[masked])] = chrpos - 15; } #else @@ -33474,30 +23024,26 @@ masked = EXTRACT(_masked,0); if (counts[masked]) { - assert(pointers[masked] > positions[masked]); debug(printf("Storing masked %u at %u\n",masked,chrpos - 8)); - table[--pointers[masked]] = chrpos - 8; + table[positions[masked] + (--counts[masked])] = chrpos - 8; } masked = EXTRACT(_masked,1); if (counts[masked]) { - assert(pointers[masked] > positions[masked]); debug(printf("Storing masked %u at %u\n",masked,chrpos - 9)); - table[--pointers[masked]] = chrpos - 9; + table[positions[masked] + (--counts[masked])] = chrpos - 9; } masked = EXTRACT(_masked,2); if (counts[masked]) { - assert(pointers[masked] > positions[masked]); debug(printf("Storing masked %u at %u\n",masked,chrpos - 10)); - table[--pointers[masked]] = chrpos - 10; + table[positions[masked] + (--counts[masked])] = chrpos - 10; } masked = EXTRACT(_masked,3); if (counts[masked]) { - assert(pointers[masked] > positions[masked]); debug(printf("Storing masked %u at %u\n",masked,chrpos - 11)); - table[--pointers[masked]] = chrpos - 11; + table[positions[masked] + (--counts[masked])] = chrpos - 11; } @@ -33510,30 +23056,26 @@ masked = EXTRACT(_masked,0); if (counts[masked]) { - assert(pointers[masked] > positions[masked]); debug(printf("Storing masked %u at %u\n",masked,chrpos - 12)); - table[--pointers[masked]] = chrpos - 12; + table[positions[masked] + (--counts[masked])] = chrpos - 12; } masked = EXTRACT(_masked,1); if (counts[masked]) { - assert(pointers[masked] > positions[masked]); debug(printf("Storing masked %u at %u\n",masked,chrpos - 13)); - table[--pointers[masked]] = chrpos - 13; + table[positions[masked] + (--counts[masked])] = chrpos - 13; } masked = EXTRACT(_masked,2); if (counts[masked]) { - assert(pointers[masked] > positions[masked]); debug(printf("Storing masked %u at %u\n",masked,chrpos - 14)); - table[--pointers[masked]] = chrpos - 14; + table[positions[masked] + (--counts[masked])] = chrpos - 14; } masked = EXTRACT(_masked,3); if (counts[masked]) { - assert(pointers[masked] > positions[masked]); debug(printf("Storing masked %u at %u\n",masked,chrpos - 15)); - table[--pointers[masked]] = chrpos - 15; + table[positions[masked] + (--counts[masked])] = chrpos - 15; } #endif @@ -33541,58 +23083,50 @@ #ifdef INDIVIDUAL_SHIFTS masked = high_rc & MASK9; /* 16 */ if (counts[masked]) { - assert(pointers[masked] > positions[masked]); debug(printf("Storing masked %u at %u\n",masked,chrpos - 16)); - table[--pointers[masked]] = chrpos - 16; + table[positions[masked] + (--counts[masked])] = chrpos - 16; } masked = (high_rc >> 2) & MASK9; /* 17 */ if (counts[masked]) { - assert(pointers[masked] > positions[masked]); debug(printf("Storing masked %u at %u\n",masked,chrpos - 17)); - table[--pointers[masked]] = chrpos - 17; + table[positions[masked] + (--counts[masked])] = chrpos - 17; } masked = (high_rc >> 4) & MASK9; /* 18 */ if (counts[masked]) { - assert(pointers[masked] > positions[masked]); debug(printf("Storing masked %u at %u\n",masked,chrpos - 18)); - table[--pointers[masked]] = chrpos - 18; + table[positions[masked] + (--counts[masked])] = chrpos - 18; } masked = (high_rc >> 6) & MASK9; /* 19 */ if (counts[masked]) { - assert(pointers[masked] > positions[masked]); debug(printf("Storing masked %u at %u\n",masked,chrpos - 19)); - table[--pointers[masked]] = chrpos - 19; + table[positions[masked] + (--counts[masked])] = chrpos - 19; } masked = (high_rc >> 8) & MASK9; /* 20 */ if (counts[masked]) { - assert(pointers[masked] > positions[masked]); debug(printf("Storing masked %u at %u\n",masked,chrpos - 20)); - table[--pointers[masked]] = chrpos - 20; + table[positions[masked] + (--counts[masked])] = chrpos - 20; } masked = (high_rc >> 10) & MASK9; /* 21 */ if (counts[masked]) { - assert(pointers[masked] > positions[masked]); debug(printf("Storing masked %u at %u\n",masked,chrpos - 21)); - table[--pointers[masked]] = chrpos - 21; + table[positions[masked] + (--counts[masked])] = chrpos - 21; } masked = (high_rc >> 12) & MASK9; /* 22 */ if (counts[masked]) { - assert(pointers[masked] > positions[masked]); debug(printf("Storing masked %u at %u\n",masked,chrpos - 22)); - table[--pointers[masked]] = chrpos - 22; + table[positions[masked] + (--counts[masked])] = chrpos - 22; } masked = high_rc >> 14; /* 23, No mask necessary */ if (counts[masked]) { - assert(pointers[masked] > positions[masked]); debug(printf("Storing masked %u at %u\n",masked,chrpos - 23)); - table[--pointers[masked]] = chrpos - 23; + table[positions[masked] + (--counts[masked])] = chrpos - 23; } #else @@ -33605,30 +23139,26 @@ masked = EXTRACT(_masked,0); if (counts[masked]) { - assert(pointers[masked] > positions[masked]); debug(printf("Storing masked %u at %u\n",masked,chrpos - 16)); - table[--pointers[masked]] = chrpos - 16; + table[positions[masked] + (--counts[masked])] = chrpos - 16; } masked = EXTRACT(_masked,1); if (counts[masked]) { - assert(pointers[masked] > positions[masked]); debug(printf("Storing masked %u at %u\n",masked,chrpos - 17)); - table[--pointers[masked]] = chrpos - 17; + table[positions[masked] + (--counts[masked])] = chrpos - 17; } masked = EXTRACT(_masked,2); if (counts[masked]) { - assert(pointers[masked] > positions[masked]); debug(printf("Storing masked %u at %u\n",masked,chrpos - 18)); - table[--pointers[masked]] = chrpos - 18; + table[positions[masked] + (--counts[masked])] = chrpos - 18; } masked = EXTRACT(_masked,3); if (counts[masked]) { - assert(pointers[masked] > positions[masked]); debug(printf("Storing masked %u at %u\n",masked,chrpos - 19)); - table[--pointers[masked]] = chrpos - 19; + table[positions[masked] + (--counts[masked])] = chrpos - 19; } @@ -33641,30 +23171,26 @@ masked = EXTRACT(_masked,0); if (counts[masked]) { - assert(pointers[masked] > positions[masked]); debug(printf("Storing masked %u at %u\n",masked,chrpos - 20)); - table[--pointers[masked]] = chrpos - 20; + table[positions[masked] + (--counts[masked])] = chrpos - 20; } masked = EXTRACT(_masked,1); if (counts[masked]) { - assert(pointers[masked] > positions[masked]); debug(printf("Storing masked %u at %u\n",masked,chrpos - 21)); - table[--pointers[masked]] = chrpos - 21; + table[positions[masked] + (--counts[masked])] = chrpos - 21; } masked = EXTRACT(_masked,2); if (counts[masked]) { - assert(pointers[masked] > positions[masked]); debug(printf("Storing masked %u at %u\n",masked,chrpos - 22)); - table[--pointers[masked]] = chrpos - 22; + table[positions[masked] + (--counts[masked])] = chrpos - 22; } masked = EXTRACT(_masked,3); if (counts[masked]) { - assert(pointers[masked] > positions[masked]); debug(printf("Storing masked %u at %u\n",masked,chrpos - 23)); - table[--pointers[masked]] = chrpos - 23; + table[positions[masked] + (--counts[masked])] = chrpos - 23; } #endif @@ -33675,58 +23201,50 @@ #ifdef INDIVIDUAL_SHIFTS masked = oligo & MASK9; /* 24 */ if (counts[masked]) { - assert(pointers[masked] > positions[masked]); debug(printf("Storing masked %u at %u\n",masked,chrpos - 24)); - table[--pointers[masked]] = chrpos - 24; + table[positions[masked] + (--counts[masked])] = chrpos - 24; } masked = (oligo >> 2) & MASK9; /* 25 */ if (counts[masked]) { - assert(pointers[masked] > positions[masked]); debug(printf("Storing masked %u at %u\n",masked,chrpos - 25)); - table[--pointers[masked]] = chrpos - 25; + table[positions[masked] + (--counts[masked])] = chrpos - 25; } masked = (oligo >> 4) & MASK9; /* 26 */ if (counts[masked]) { - assert(pointers[masked] > positions[masked]); debug(printf("Storing masked %u at %u\n",masked,chrpos - 26)); - table[--pointers[masked]] = chrpos - 26; + table[positions[masked] + (--counts[masked])] = chrpos - 26; } masked = (oligo >> 6) & MASK9; /* 27 */ if (counts[masked]) { - assert(pointers[masked] > positions[masked]); debug(printf("Storing masked %u at %u\n",masked,chrpos - 27)); - table[--pointers[masked]] = chrpos - 27; + table[positions[masked] + (--counts[masked])] = chrpos - 27; } masked = (oligo >> 8) & MASK9; /* 28 */ if (counts[masked]) { - assert(pointers[masked] > positions[masked]); debug(printf("Storing masked %u at %u\n",masked,chrpos - 28)); - table[--pointers[masked]] = chrpos - 28; + table[positions[masked] + (--counts[masked])] = chrpos - 28; } masked = (oligo >> 10) & MASK9; /* 29 */ if (counts[masked]) { - assert(pointers[masked] > positions[masked]); debug(printf("Storing masked %u at %u\n",masked,chrpos - 29)); - table[--pointers[masked]] = chrpos - 29; + table[positions[masked] + (--counts[masked])] = chrpos - 29; } masked = (oligo >> 12) & MASK9; /* 30 */ if (counts[masked]) { - assert(pointers[masked] > positions[masked]); debug(printf("Storing masked %u at %u\n",masked,chrpos - 30)); - table[--pointers[masked]] = chrpos - 30; + table[positions[masked] + (--counts[masked])] = chrpos - 30; } masked = (oligo >> 14) & MASK9; /* 31 */ if (counts[masked]) { - assert(pointers[masked] > positions[masked]); debug(printf("Storing masked %u at %u\n",masked,chrpos - 31)); - table[--pointers[masked]] = chrpos - 31; + table[positions[masked] + (--counts[masked])] = chrpos - 31; } #else @@ -33739,30 +23257,26 @@ masked = EXTRACT(_masked,0); if (counts[masked]) { - assert(pointers[masked] > positions[masked]); debug(printf("Storing masked %u at %u\n",masked,chrpos - 24)); - table[--pointers[masked]] = chrpos - 24; + table[positions[masked] + (--counts[masked])] = chrpos - 24; } masked = EXTRACT(_masked,1); if (counts[masked]) { - assert(pointers[masked] > positions[masked]); debug(printf("Storing masked %u at %u\n",masked,chrpos - 25)); - table[--pointers[masked]] = chrpos - 25; + table[positions[masked] + (--counts[masked])] = chrpos - 25; } masked = EXTRACT(_masked,2); if (counts[masked]) { - assert(pointers[masked] > positions[masked]); debug(printf("Storing masked %u at %u\n",masked,chrpos - 26)); - table[--pointers[masked]] = chrpos - 26; + table[positions[masked] + (--counts[masked])] = chrpos - 26; } masked = EXTRACT(_masked,3); if (counts[masked]) { - assert(pointers[masked] > positions[masked]); debug(printf("Storing masked %u at %u\n",masked,chrpos - 27)); - table[--pointers[masked]] = chrpos - 27; + table[positions[masked] + (--counts[masked])] = chrpos - 27; } @@ -33775,30 +23289,26 @@ masked = EXTRACT(_masked,0); if (counts[masked]) { - assert(pointers[masked] > positions[masked]); debug(printf("Storing masked %u at %u\n",masked,chrpos - 28)); - table[--pointers[masked]] = chrpos - 28; + table[positions[masked] + (--counts[masked])] = chrpos - 28; } masked = EXTRACT(_masked,1); if (counts[masked]) { - assert(pointers[masked] > positions[masked]); debug(printf("Storing masked %u at %u\n",masked,chrpos - 29)); - table[--pointers[masked]] = chrpos - 29; + table[positions[masked] + (--counts[masked])] = chrpos - 29; } masked = EXTRACT(_masked,2); if (counts[masked]) { - assert(pointers[masked] > positions[masked]); debug(printf("Storing masked %u at %u\n",masked,chrpos - 30)); - table[--pointers[masked]] = chrpos - 30; + table[positions[masked] + (--counts[masked])] = chrpos - 30; } masked = EXTRACT(_masked,3); if (counts[masked]) { - assert(pointers[masked] > positions[masked]); debug(printf("Storing masked %u at %u\n",masked,chrpos - 31)); - table[--pointers[masked]] = chrpos - 31; + table[positions[masked] + (--counts[masked])] = chrpos - 31; } #endif @@ -33808,259 +23318,320 @@ #else /* HAVE_AVX2 */ static int -store_9mers_rev (Chrpos_T chrpos, Chrpos_T *table, UINT4 *pointers, UINT4 *positions, Count_T *counts, - Genomecomp_T low_rc, Genomecomp_T high_rc, Genomecomp_T nextlow_rc) { +store_9mers_rev_32 (Chrpos_T chrpos, Chrpos_T *table, UINT4 *positions, Count_T *counts, + Genomecomp_T low_rc, Genomecomp_T high_rc, Genomecomp_T nextlow_rc) { Genomecomp_T masked, oligo; __m256i _oligo, _masked, _counts; + __m256i _blocks, _envelopes, _addresses, _address_mask, _count_mask; + + _address_mask = _mm256_set1_epi32(0x3); + _count_mask = _mm256_set1_epi32(0xFF); - _oligo = _mm256_srlv_epi32(_mm256_set1_epi32(low_rc),shift0to14); + + _oligo = _mm256_srlv_epi32(_mm256_set1_epi32(low_rc),bigshift0to14); _masked = _mm256_and_si256(_oligo, bigmask9); - _counts = _mm256_i32gather_epi32(counts,_masked,/*scale*/4); + + _blocks = _mm256_srli_epi32(_masked,2); /* div by 4 bytes/int */ + _addresses = _mm256_and_si256(_masked,_address_mask); + _addresses = _mm256_slli_epi32(_addresses,3); /* Multiply by 8 bits/byte */ + _envelopes = _mm256_i32gather_epi32((const int *) counts,_blocks,/*scale*/4); + _counts = _mm256_srlv_epi32(_envelopes,_addresses); /* Puts byte on right */ + _counts = _mm256_and_si256(_counts,_count_mask); if (EXTRACT256(_counts,0)) { masked = EXTRACT256(_masked,0); - assert(pointers[masked] > positions[masked]); - debug(printf("Storing masked %u at %u\n",masked,chrpos)); - table[--pointers[masked]] = chrpos; + if (counts[masked]) { /* Have to re-check if there is a conflict */ + debug(printf("Storing masked %u at %u\n",masked,chrpos)); + table[positions[masked] + (--counts[masked])] = chrpos; + } } if (EXTRACT256(_counts,1)) { masked = EXTRACT256(_masked,1); - assert(pointers[masked] > positions[masked]); - debug(printf("Storing masked %u at %u\n",masked,chrpos - 1)); - table[--pointers[masked]] = chrpos - 1; + if (counts[masked]) { /* Have to re-check if there is a conflict */ + debug(printf("Storing masked %u at %u\n",masked,chrpos - 1)); + table[positions[masked] + (--counts[masked])] = chrpos - 1; + } } if (EXTRACT256(_counts,2)) { masked = EXTRACT256(_masked,2); - assert(pointers[masked] > positions[masked]); - debug(printf("Storing masked %u at %u\n",masked,chrpos - 2)); - table[--pointers[masked]] = chrpos - 2; + if (counts[masked]) { /* Have to re-check if there is a conflict */ + debug(printf("Storing masked %u at %u\n",masked,chrpos - 2)); + table[positions[masked] + (--counts[masked])] = chrpos - 2; + } } if (EXTRACT256(_counts,3)) { masked = EXTRACT256(_masked,3); - assert(pointers[masked] > positions[masked]); - debug(printf("Storing masked %u at %u\n",masked,chrpos - 3)); - table[--pointers[masked]] = chrpos - 3; + if (counts[masked]) { /* Have to re-check if there is a conflict */ + debug(printf("Storing masked %u at %u\n",masked,chrpos - 3)); + table[positions[masked] + (--counts[masked])] = chrpos - 3; + } } if (EXTRACT256(_counts,4)) { masked = EXTRACT256(_masked,4); - assert(pointers[masked] > positions[masked]); - debug(printf("Storing masked %u at %u\n",masked,chrpos - 4)); - table[--pointers[masked]] = chrpos - 4; + if (counts[masked]) { /* Have to re-check if there is a conflict */ + debug(printf("Storing masked %u at %u\n",masked,chrpos - 4)); + table[positions[masked] + (--counts[masked])] = chrpos - 4; + } } if (EXTRACT256(_counts,5)) { masked = EXTRACT256(_masked,5); - assert(pointers[masked] > positions[masked]); - debug(printf("Storing masked %u at %u\n",masked,chrpos - 5)); - table[--pointers[masked]] = chrpos - 5; + if (counts[masked]) { /* Have to re-check if there is a conflict */ + debug(printf("Storing masked %u at %u\n",masked,chrpos - 5)); + table[positions[masked] + (--counts[masked])] = chrpos - 5; + } } if (EXTRACT256(_counts,6)) { masked = EXTRACT256(_masked,6); - assert(pointers[masked] > positions[masked]); - debug(printf("Storing masked %u at %u\n",masked,chrpos - 6)); - table[--pointers[masked]] = chrpos - 6; + if (counts[masked]) { /* Have to re-check if there is a conflict */ + debug(printf("Storing masked %u at %u\n",masked,chrpos - 6)); + table[positions[masked] + (--counts[masked])] = chrpos - 6; + } } if (EXTRACT256(_counts,7)) { masked = EXTRACT256(_masked,7); - assert(pointers[masked] > positions[masked]); - debug(printf("Storing masked %u at %u\n",masked,chrpos - 7)); - table[--pointers[masked]] = chrpos - 7; + if (counts[masked]) { /* Have to re-check if there is a conflict */ + debug(printf("Storing masked %u at %u\n",masked,chrpos - 7)); + table[positions[masked] + (--counts[masked])] = chrpos - 7; + } } oligo = low_rc >> 16; /* For 15..8 */ oligo |= high_rc << 16; - _oligo = _mm256_srlv_epi32(_mm256_set1_epi32(oligo),shift0to14); + _oligo = _mm256_srlv_epi32(_mm256_set1_epi32(oligo),bigshift0to14); _masked = _mm256_and_si256(_oligo, bigmask9); - _counts = _mm256_i32gather_epi32(counts,_masked,/*scale*/4); + + _blocks = _mm256_srli_epi32(_masked,2); /* div by 4 bytes/int */ + _addresses = _mm256_and_si256(_masked,_address_mask); + _addresses = _mm256_slli_epi32(_addresses,3); /* Multiply by 8 bits/byte */ + _envelopes = _mm256_i32gather_epi32((const int *) counts,_blocks,/*scale*/4); + _counts = _mm256_srlv_epi32(_envelopes,_addresses); /* Puts byte on right */ + _counts = _mm256_and_si256(_counts,_count_mask); if (EXTRACT256(_counts,0)) { masked = EXTRACT256(_masked,0); - assert(pointers[masked] > positions[masked]); - debug(printf("Storing masked %u at %u\n",masked,chrpos - 8)); - table[--pointers[masked]] = chrpos - 8; + if (counts[masked]) { /* Have to re-check if there is a conflict */ + debug(printf("Storing masked %u at %u\n",masked,chrpos - 8)); + table[positions[masked] + (--counts[masked])] = chrpos - 8; + } } if (EXTRACT256(_counts,1)) { masked = EXTRACT256(_masked,1); - assert(pointers[masked] > positions[masked]); - debug(printf("Storing masked %u at %u\n",masked,chrpos - 9)); - table[--pointers[masked]] = chrpos - 9; + if (counts[masked]) { /* Have to re-check if there is a conflict */ + debug(printf("Storing masked %u at %u\n",masked,chrpos - 9)); + table[positions[masked] + (--counts[masked])] = chrpos - 9; + } } if (EXTRACT256(_counts,2)) { masked = EXTRACT256(_masked,2); - assert(pointers[masked] > positions[masked]); - debug(printf("Storing masked %u at %u\n",masked,chrpos - 10)); - table[--pointers[masked]] = chrpos - 10; + if (counts[masked]) { /* Have to re-check if there is a conflict */ + debug(printf("Storing masked %u at %u\n",masked,chrpos - 10)); + table[positions[masked] + (--counts[masked])] = chrpos - 10; + } } if (EXTRACT256(_counts,3)) { masked = EXTRACT256(_masked,3); - assert(pointers[masked] > positions[masked]); - debug(printf("Storing masked %u at %u\n",masked,chrpos - 11)); - table[--pointers[masked]] = chrpos - 11; + if (counts[masked]) { /* Have to re-check if there is a conflict */ + debug(printf("Storing masked %u at %u\n",masked,chrpos - 11)); + table[positions[masked] + (--counts[masked])] = chrpos - 11; + } } if (EXTRACT256(_counts,4)) { masked = EXTRACT256(_masked,4); - assert(pointers[masked] > positions[masked]); - debug(printf("Storing masked %u at %u\n",masked,chrpos - 12)); - table[--pointers[masked]] = chrpos - 12; + if (counts[masked]) { /* Have to re-check if there is a conflict */ + debug(printf("Storing masked %u at %u\n",masked,chrpos - 12)); + table[positions[masked] + (--counts[masked])] = chrpos - 12; + } } if (EXTRACT256(_counts,5)) { masked = EXTRACT256(_masked,5); - assert(pointers[masked] > positions[masked]); - debug(printf("Storing masked %u at %u\n",masked,chrpos - 13)); - table[--pointers[masked]] = chrpos - 13; + if (counts[masked]) { /* Have to re-check if there is a conflict */ + debug(printf("Storing masked %u at %u\n",masked,chrpos - 13)); + table[positions[masked] + (--counts[masked])] = chrpos - 13; + } } if (EXTRACT256(_counts,6)) { masked = EXTRACT256(_masked,6); - assert(pointers[masked] > positions[masked]); - debug(printf("Storing masked %u at %u\n",masked,chrpos - 14)); - table[--pointers[masked]] = chrpos - 14; + if (counts[masked]) { /* Have to re-check if there is a conflict */ + debug(printf("Storing masked %u at %u\n",masked,chrpos - 14)); + table[positions[masked] + (--counts[masked])] = chrpos - 14; + } } if (EXTRACT256(_counts,7)) { masked = EXTRACT256(_masked,7); - assert(pointers[masked] > positions[masked]); - debug(printf("Storing masked %u at %u\n",masked,chrpos - 15)); - table[--pointers[masked]] = chrpos - 15; + if (counts[masked]) { /* Have to re-check if there is a conflict */ + debug(printf("Storing masked %u at %u\n",masked,chrpos - 15)); + table[positions[masked] + (--counts[masked])] = chrpos - 15; + } } - _oligo = _mm256_srlv_epi32(_mm256_set1_epi32(high_rc),shift0to14); + _oligo = _mm256_srlv_epi32(_mm256_set1_epi32(high_rc),bigshift0to14); _masked = _mm256_and_si256(_oligo, bigmask9); - _counts = _mm256_i32gather_epi32(counts,_masked,/*scale*/4); + + _blocks = _mm256_srli_epi32(_masked,2); /* div by 4 bytes/int */ + _addresses = _mm256_and_si256(_masked,_address_mask); + _addresses = _mm256_slli_epi32(_addresses,3); /* Multiply by 8 bits/byte */ + _envelopes = _mm256_i32gather_epi32((const int *) counts,_blocks,/*scale*/4); + _counts = _mm256_srlv_epi32(_envelopes,_addresses); /* Puts byte on right */ + _counts = _mm256_and_si256(_counts,_count_mask); if (EXTRACT256(_counts,0)) { masked = EXTRACT256(_masked,0); - assert(pointers[masked] > positions[masked]); - debug(printf("Storing masked %u at %u\n",masked,chrpos - 16)); - table[--pointers[masked]] = chrpos - 16; + if (counts[masked]) { /* Have to re-check if there is a conflict */ + debug(printf("Storing masked %u at %u\n",masked,chrpos - 16)); + table[positions[masked] + (--counts[masked])] = chrpos - 16; + } } if (EXTRACT256(_counts,1)) { masked = EXTRACT256(_masked,1); - assert(pointers[masked] > positions[masked]); - debug(printf("Storing masked %u at %u\n",masked,chrpos - 17)); - table[--pointers[masked]] = chrpos - 17; + if (counts[masked]) { /* Have to re-check if there is a conflict */ + debug(printf("Storing masked %u at %u\n",masked,chrpos - 17)); + table[positions[masked] + (--counts[masked])] = chrpos - 17; + } } if (EXTRACT256(_counts,2)) { masked = EXTRACT256(_masked,2); - assert(pointers[masked] > positions[masked]); - debug(printf("Storing masked %u at %u\n",masked,chrpos - 18)); - table[--pointers[masked]] = chrpos - 18; + if (counts[masked]) { /* Have to re-check if there is a conflict */ + debug(printf("Storing masked %u at %u\n",masked,chrpos - 18)); + table[positions[masked] + (--counts[masked])] = chrpos - 18; + } } if (EXTRACT256(_counts,3)) { masked = EXTRACT256(_masked,3); - assert(pointers[masked] > positions[masked]); - debug(printf("Storing masked %u at %u\n",masked,chrpos - 19)); - table[--pointers[masked]] = chrpos - 19; + if (counts[masked]) { /* Have to re-check if there is a conflict */ + debug(printf("Storing masked %u at %u\n",masked,chrpos - 19)); + table[positions[masked] + (--counts[masked])] = chrpos - 19; + } } if (EXTRACT256(_counts,4)) { masked = EXTRACT256(_masked,4); - assert(pointers[masked] > positions[masked]); - debug(printf("Storing masked %u at %u\n",masked,chrpos - 20)); - table[--pointers[masked]] = chrpos - 20; + if (counts[masked]) { /* Have to re-check if there is a conflict */ + debug(printf("Storing masked %u at %u\n",masked,chrpos - 20)); + table[positions[masked] + (--counts[masked])] = chrpos - 20; + } } if (EXTRACT256(_counts,5)) { masked = EXTRACT256(_masked,5); - assert(pointers[masked] > positions[masked]); - debug(printf("Storing masked %u at %u\n",masked,chrpos - 21)); - table[--pointers[masked]] = chrpos - 21; + if (counts[masked]) { /* Have to re-check if there is a conflict */ + debug(printf("Storing masked %u at %u\n",masked,chrpos - 21)); + table[positions[masked] + (--counts[masked])] = chrpos - 21; + } } if (EXTRACT256(_counts,6)) { masked = EXTRACT256(_masked,6); - assert(pointers[masked] > positions[masked]); - debug(printf("Storing masked %u at %u\n",masked,chrpos - 22)); - table[--pointers[masked]] = chrpos - 22; + if (counts[masked]) { /* Have to re-check if there is a conflict */ + debug(printf("Storing masked %u at %u\n",masked,chrpos - 22)); + table[positions[masked] + (--counts[masked])] = chrpos - 22; + } } if (EXTRACT256(_counts,7)) { masked = EXTRACT256(_masked,7); - assert(pointers[masked] > positions[masked]); - debug(printf("Storing masked %u at %u\n",masked,chrpos - 23)); - table[--pointers[masked]] = chrpos - 23; + if (counts[masked]) { /* Have to re-check if there is a conflict */ + debug(printf("Storing masked %u at %u\n",masked,chrpos - 23)); + table[positions[masked] + (--counts[masked])] = chrpos - 23; + } } oligo = high_rc >> 16; /* For 31..24 */ oligo |= nextlow_rc << 16; - _oligo = _mm256_srlv_epi32(_mm256_set1_epi32(oligo),shift0to14); + _oligo = _mm256_srlv_epi32(_mm256_set1_epi32(oligo),bigshift0to14); _masked = _mm256_and_si256(_oligo, bigmask9); - _counts = _mm256_i32gather_epi32(counts,_masked,/*scale*/4); + + _blocks = _mm256_srli_epi32(_masked,2); /* div by 4 bytes/int */ + _addresses = _mm256_and_si256(_masked,_address_mask); + _addresses = _mm256_slli_epi32(_addresses,3); /* Multiply by 8 bits/byte */ + _envelopes = _mm256_i32gather_epi32((const int *) counts,_blocks,/*scale*/4); + _counts = _mm256_srlv_epi32(_envelopes,_addresses); /* Puts byte on right */ + _counts = _mm256_and_si256(_counts,_count_mask); if (EXTRACT256(_counts,0)) { masked = EXTRACT256(_masked,0); - assert(pointers[masked] > positions[masked]); - debug(printf("Storing masked %u at %u\n",masked,chrpos - 24)); - table[--pointers[masked]] = chrpos - 24; + if (counts[masked]) { /* Have to re-check if there is a conflict */ + debug(printf("Storing masked %u at %u\n",masked,chrpos - 24)); + table[positions[masked] + (--counts[masked])] = chrpos - 24; + } } if (EXTRACT256(_counts,1)) { masked = EXTRACT256(_masked,1); - assert(pointers[masked] > positions[masked]); - debug(printf("Storing masked %u at %u\n",masked,chrpos - 25)); - table[--pointers[masked]] = chrpos - 25; + if (counts[masked]) { /* Have to re-check if there is a conflict */ + debug(printf("Storing masked %u at %u\n",masked,chrpos - 25)); + table[positions[masked] + (--counts[masked])] = chrpos - 25; + } } if (EXTRACT256(_counts,2)) { masked = EXTRACT256(_masked,2); - assert(pointers[masked] > positions[masked]); - debug(printf("Storing masked %u at %u\n",masked,chrpos - 26)); - table[--pointers[masked]] = chrpos - 26; + if (counts[masked]) { /* Have to re-check if there is a conflict */ + debug(printf("Storing masked %u at %u\n",masked,chrpos - 26)); + table[positions[masked] + (--counts[masked])] = chrpos - 26; + } } if (EXTRACT256(_counts,3)) { masked = EXTRACT256(_masked,3); - assert(pointers[masked] > positions[masked]); - debug(printf("Storing masked %u at %u\n",masked,chrpos - 27)); - table[--pointers[masked]] = chrpos - 27; + if (counts[masked]) { /* Have to re-check if there is a conflict */ + debug(printf("Storing masked %u at %u\n",masked,chrpos - 27)); + table[positions[masked] + (--counts[masked])] = chrpos - 27; + } } if (EXTRACT256(_counts,4)) { masked = EXTRACT256(_masked,4); - assert(pointers[masked] > positions[masked]); - debug(printf("Storing masked %u at %u\n",masked,chrpos - 28)); - table[--pointers[masked]] = chrpos - 28; + if (counts[masked]) { /* Have to re-check if there is a conflict */ + debug(printf("Storing masked %u at %u\n",masked,chrpos - 28)); + table[positions[masked] + (--counts[masked])] = chrpos - 28; + } } if (EXTRACT256(_counts,5)) { masked = EXTRACT256(_masked,5); - assert(pointers[masked] > positions[masked]); - debug(printf("Storing masked %u at %u\n",masked,chrpos - 29)); - table[--pointers[masked]] = chrpos - 29; + if (counts[masked]) { /* Have to re-check if there is a conflict */ + debug(printf("Storing masked %u at %u\n",masked,chrpos - 29)); + table[positions[masked] + (--counts[masked])] = chrpos - 29; + } } if (EXTRACT256(_counts,6)) { masked = EXTRACT256(_masked,6); - assert(pointers[masked] > positions[masked]); - debug(printf("Storing masked %u at %u\n",masked,chrpos - 30)); - table[--pointers[masked]] = chrpos - 30; + if (counts[masked]) { /* Have to re-check if there is a conflict */ + debug(printf("Storing masked %u at %u\n",masked,chrpos - 30)); + table[positions[masked] + (--counts[masked])] = chrpos - 30; + } } if (EXTRACT256(_counts,7)) { masked = EXTRACT256(_masked,7); - assert(pointers[masked] > positions[masked]); - debug(printf("Storing masked %u at %u\n",masked,chrpos - 31)); - table[--pointers[masked]] = chrpos - 31; + if (counts[masked]) { /* Have to re-check if there is a conflict */ + debug(printf("Storing masked %u at %u\n",masked,chrpos - 31)); + table[positions[masked] + (--counts[masked])] = chrpos - 31; + } } return chrpos - 32; @@ -34074,7 +23645,7 @@ #if !defined(HAVE_AVX2) static void -count_8mers_rev (Count_T *counts, Inquery_T *inquery, Genomecomp_T low_rc, Genomecomp_T high_rc, Genomecomp_T nextlow_rc) { +count_8mers_rev_32 (Count_T *counts, Genomecomp_T low_rc, Genomecomp_T high_rc, Genomecomp_T nextlow_rc) { Genomecomp_T masked, oligo; #ifdef INDIVIDUAL_SHIFTS #elif defined(SIMD_MASK_THEN_STORE) @@ -34087,39 +23658,39 @@ #ifdef INDIVIDUAL_SHIFTS masked = low_rc & MASK8; /* 0 */ - INCR_COUNT(counts[masked],inquery[masked]); + INCR_COUNT(counts[masked]); debug(printf("0 %04X => %d\n",masked,counts[masked])); masked = (low_rc >> 2) & MASK8; /* 1 */ - INCR_COUNT(counts[masked],inquery[masked]); + INCR_COUNT(counts[masked]); debug(printf("1 %04X => %d\n",masked,counts[masked])); masked = (low_rc >> 4) & MASK8; /* 2 */ - INCR_COUNT(counts[masked],inquery[masked]); + INCR_COUNT(counts[masked]); debug(printf("2 %04X => %d\n",masked,counts[masked])); masked = (low_rc >> 6) & MASK8; /* 3 */ - INCR_COUNT(counts[masked],inquery[masked]); + INCR_COUNT(counts[masked]); debug(printf("3 %04X => %d\n",masked,counts[masked])); masked = (low_rc >> 8) & MASK8; /* 4 */ - INCR_COUNT(counts[masked],inquery[masked]); + INCR_COUNT(counts[masked]); debug(printf("4 %04X => %d\n",masked,counts[masked])); masked = (low_rc >> 10) & MASK8; /* 5 */ - INCR_COUNT(counts[masked],inquery[masked]); + INCR_COUNT(counts[masked]); debug(printf("5 %04X => %d\n",masked,counts[masked])); masked = (low_rc >> 12) & MASK8; /* 6 */ - INCR_COUNT(counts[masked],inquery[masked]); + INCR_COUNT(counts[masked]); debug(printf("6 %04X => %d\n",masked,counts[masked])); masked = (low_rc >> 14) & MASK8; /* 7 */ - INCR_COUNT(counts[masked],inquery[masked]); + INCR_COUNT(counts[masked]); debug(printf("7 %04X => %d\n",masked,counts[masked])); masked = low_rc >> 16; /* 8, No mask necessary */ - INCR_COUNT(counts[masked],inquery[masked]); + INCR_COUNT(counts[masked]); debug(printf("8 %04X => %d\n",masked,counts[masked])); #else @@ -34132,22 +23703,22 @@ masked = EXTRACT(_masked,0); assert(masked == (low_rc & MASK8)); - INCR_COUNT(counts[masked],inquery[masked]); + INCR_COUNT(counts[masked]); debug(printf("0 %04X => %d\n",masked,counts[masked])); masked = EXTRACT(_masked,1); assert(masked == ((low_rc >> 2) & MASK8)); - INCR_COUNT(counts[masked],inquery[masked]); + INCR_COUNT(counts[masked]); debug(printf("1 %04X => %d\n",masked,counts[masked])); masked = EXTRACT(_masked,2); assert(masked == ((low_rc >> 4) & MASK8)); - INCR_COUNT(counts[masked],inquery[masked]); + INCR_COUNT(counts[masked]); debug(printf("2 %04X => %d\n",masked,counts[masked])); masked = EXTRACT(_masked,3); assert(masked == ((low_rc >> 6) & MASK8)); - INCR_COUNT(counts[masked],inquery[masked]); + INCR_COUNT(counts[masked]); debug(printf("3 %04X => %d\n",masked,counts[masked])); @@ -34160,27 +23731,27 @@ masked = EXTRACT(_masked,0); assert(masked == ((low_rc >> 8) & MASK8)); - INCR_COUNT(counts[masked],inquery[masked]); + INCR_COUNT(counts[masked]); debug(printf("4 %04X => %d\n",masked,counts[masked])); masked = EXTRACT(_masked,1); assert(masked == ((low_rc >> 10) & MASK8)); - INCR_COUNT(counts[masked],inquery[masked]); + INCR_COUNT(counts[masked]); debug(printf("5 %04X => %d\n",masked,counts[masked])); masked = EXTRACT(_masked,2); assert(masked == ((low_rc >> 12) & MASK8)); - INCR_COUNT(counts[masked],inquery[masked]); + INCR_COUNT(counts[masked]); debug(printf("6 %04X => %d\n",masked,counts[masked])); masked = EXTRACT(_masked,3); assert(masked == ((low_rc >> 14) & MASK8)); - INCR_COUNT(counts[masked],inquery[masked]); + INCR_COUNT(counts[masked]); debug(printf("7 %04X => %d\n",masked,counts[masked])); masked = low_rc >> 16; /* 8, No mask necessary */ - INCR_COUNT(counts[masked],inquery[masked]); + INCR_COUNT(counts[masked]); debug(printf("8 %04X => %d\n",masked,counts[masked])); #endif @@ -34190,31 +23761,31 @@ #ifdef INDIVIDUAL_SHIFTS masked = oligo & MASK8; /* 9 */ - INCR_COUNT(counts[masked],inquery[masked]); + INCR_COUNT(counts[masked]); debug(printf("9 %04X => %d\n",masked,counts[masked])); masked = (oligo >> 2) & MASK8; /* 10 */ - INCR_COUNT(counts[masked],inquery[masked]); + INCR_COUNT(counts[masked]); debug(printf("10 %04X => %d\n",masked,counts[masked])); masked = (oligo >> 4) & MASK8; /* 11 */ - INCR_COUNT(counts[masked],inquery[masked]); + INCR_COUNT(counts[masked]); debug(printf("11 %04X => %d\n",masked,counts[masked])); masked = (oligo >> 6) & MASK8; /* 12 */ - INCR_COUNT(counts[masked],inquery[masked]); + INCR_COUNT(counts[masked]); debug(printf("12 %04X => %d\n",masked,counts[masked])); masked = (oligo >> 8) & MASK8; /* 13 */ - INCR_COUNT(counts[masked],inquery[masked]); + INCR_COUNT(counts[masked]); debug(printf("13 %04X => %d\n",masked,counts[masked])); masked = (oligo >> 10) & MASK8; /* 14 */ - INCR_COUNT(counts[masked],inquery[masked]); + INCR_COUNT(counts[masked]); debug(printf("14 %04X => %d\n",masked,counts[masked])); masked = (oligo >> 12) & MASK8; /* 15 */ - INCR_COUNT(counts[masked],inquery[masked]); + INCR_COUNT(counts[masked]); debug(printf("15 %04X => %d\n",masked,counts[masked])); #else @@ -34227,22 +23798,22 @@ masked = EXTRACT(_masked,0); assert(masked == (oligo & MASK8)); - INCR_COUNT(counts[masked],inquery[masked]); + INCR_COUNT(counts[masked]); debug(printf("9 %04X => %d\n",masked,counts[masked])); masked = EXTRACT(_masked,1); assert(masked == ((oligo >> 2) & MASK8)); - INCR_COUNT(counts[masked],inquery[masked]); + INCR_COUNT(counts[masked]); debug(printf("10 %04X => %d\n",masked,counts[masked])); masked = EXTRACT(_masked,2); assert(masked == ((oligo >> 4) & MASK8)); - INCR_COUNT(counts[masked],inquery[masked]); + INCR_COUNT(counts[masked]); debug(printf("11 %04X => %d\n",masked,counts[masked])); masked = EXTRACT(_masked,3); assert(masked == ((oligo >> 6) & MASK8)); - INCR_COUNT(counts[masked],inquery[masked]); + INCR_COUNT(counts[masked]); debug(printf("12 %04X => %d\n",masked,counts[masked])); @@ -34255,56 +23826,56 @@ masked = EXTRACT(_masked,0); assert(masked == ((oligo >> 8) & MASK8)); - INCR_COUNT(counts[masked],inquery[masked]); + INCR_COUNT(counts[masked]); debug(printf("13 %04X => %d\n",masked,counts[masked])); masked = EXTRACT(_masked,1); assert(masked == ((oligo >> 10) & MASK8)); - INCR_COUNT(counts[masked],inquery[masked]); + INCR_COUNT(counts[masked]); debug(printf("14 %04X => %d\n",masked,counts[masked])); masked = EXTRACT(_masked,2); assert(masked == ((oligo >> 12) & MASK8)); - INCR_COUNT(counts[masked],inquery[masked]); + INCR_COUNT(counts[masked]); debug(printf("15 %04X => %d\n",masked,counts[masked])); #endif #ifdef INDIVIDUAL_SHIFTS masked = high_rc & MASK8; /* 16 */ - INCR_COUNT(counts[masked],inquery[masked]); + INCR_COUNT(counts[masked]); debug(printf("16 %04X => %d\n",masked,counts[masked])); masked = (high_rc >> 2) & MASK8; /* 17 */ - INCR_COUNT(counts[masked],inquery[masked]); + INCR_COUNT(counts[masked]); debug(printf("17 %04X => %d\n",masked,counts[masked])); masked = (high_rc >> 4) & MASK8; /* 18 */ - INCR_COUNT(counts[masked],inquery[masked]); + INCR_COUNT(counts[masked]); debug(printf("18 %04X => %d\n",masked,counts[masked])); masked = (high_rc >> 6) & MASK8; /* 19 */ - INCR_COUNT(counts[masked],inquery[masked]); + INCR_COUNT(counts[masked]); debug(printf("19 %04X => %d\n",masked,counts[masked])); masked = (high_rc >> 8) & MASK8; /* 20 */ - INCR_COUNT(counts[masked],inquery[masked]); + INCR_COUNT(counts[masked]); debug(printf("20 %04X => %d\n",masked,counts[masked])); masked = (high_rc >> 10) & MASK8; /* 21 */ - INCR_COUNT(counts[masked],inquery[masked]); + INCR_COUNT(counts[masked]); debug(printf("21 %04X => %d\n",masked,counts[masked])); masked = (high_rc >> 12) & MASK8; /* 22 */ - INCR_COUNT(counts[masked],inquery[masked]); + INCR_COUNT(counts[masked]); debug(printf("22 %04X => %d\n",masked,counts[masked])); masked = (high_rc >> 14) & MASK8; /* 23 */ - INCR_COUNT(counts[masked],inquery[masked]); + INCR_COUNT(counts[masked]); debug(printf("23 %04X => %d\n",masked,counts[masked])); masked = high_rc >> 16; /* 24, No mask necessary */ - INCR_COUNT(counts[masked],inquery[masked]); + INCR_COUNT(counts[masked]); debug(printf("24 %04X => %d\n",masked,counts[masked])); #else @@ -34317,22 +23888,22 @@ masked = EXTRACT(_masked,0); assert(masked == (high_rc & MASK8)); - INCR_COUNT(counts[masked],inquery[masked]); + INCR_COUNT(counts[masked]); debug(printf("16 %04X => %d\n",masked,counts[masked])); masked = EXTRACT(_masked,1); assert(masked == ((high_rc >> 2) & MASK8)); - INCR_COUNT(counts[masked],inquery[masked]); + INCR_COUNT(counts[masked]); debug(printf("17 %04X => %d\n",masked,counts[masked])); masked = EXTRACT(_masked,2); assert(masked == ((high_rc >> 4) & MASK8)); - INCR_COUNT(counts[masked],inquery[masked]); + INCR_COUNT(counts[masked]); debug(printf("18 %04X => %d\n",masked,counts[masked])); masked = EXTRACT(_masked,3); assert(masked == ((high_rc >> 6) & MASK8)); - INCR_COUNT(counts[masked],inquery[masked]); + INCR_COUNT(counts[masked]); debug(printf("19 %04X => %d\n",masked,counts[masked])); @@ -34345,27 +23916,27 @@ masked = EXTRACT(_masked,0); assert(masked == ((high_rc >> 8) & MASK8)); - INCR_COUNT(counts[masked],inquery[masked]); + INCR_COUNT(counts[masked]); debug(printf("20 %04X => %d\n",masked,counts[masked])); masked = EXTRACT(_masked,1); assert(masked == ((high_rc >> 10) & MASK8)); - INCR_COUNT(counts[masked],inquery[masked]); + INCR_COUNT(counts[masked]); debug(printf("21 %04X => %d\n",masked,counts[masked])); masked = EXTRACT(_masked,2); assert(masked == ((high_rc >> 12) & MASK8)); - INCR_COUNT(counts[masked],inquery[masked]); + INCR_COUNT(counts[masked]); debug(printf("22 %04X => %d\n",masked,counts[masked])); masked = EXTRACT(_masked,3); assert(masked == ((high_rc >> 14) & MASK8)); - INCR_COUNT(counts[masked],inquery[masked]); + INCR_COUNT(counts[masked]); debug(printf("23 %04X => %d\n",masked,counts[masked])); masked = high_rc >> 16; /* 24, No mask necessary */ - INCR_COUNT(counts[masked],inquery[masked]); + INCR_COUNT(counts[masked]); debug(printf("24 %04X => %d\n",masked,counts[masked])); #endif @@ -34375,31 +23946,31 @@ #ifdef INDIVIDUAL_SHIFTS masked = oligo & MASK8; /* 25 */ - INCR_COUNT(counts[masked],inquery[masked]); + INCR_COUNT(counts[masked]); debug(printf("25 %04X => %d\n",masked,counts[masked])); masked = (oligo >> 2) & MASK8; /* 26 */ - INCR_COUNT(counts[masked],inquery[masked]); + INCR_COUNT(counts[masked]); debug(printf("26 %04X => %d\n",masked,counts[masked])); masked = (oligo >> 4) & MASK8; /* 27 */ - INCR_COUNT(counts[masked],inquery[masked]); + INCR_COUNT(counts[masked]); debug(printf("27 %04X => %d\n",masked,counts[masked])); masked = (oligo >> 6) & MASK8; /* 28 */ - INCR_COUNT(counts[masked],inquery[masked]); + INCR_COUNT(counts[masked]); debug(printf("28 %04X => %d\n",masked,counts[masked])); masked = (oligo >> 8) & MASK8; /* 29 */ - INCR_COUNT(counts[masked],inquery[masked]); + INCR_COUNT(counts[masked]); debug(printf("29 %04X => %d\n",masked,counts[masked])); masked = (oligo >> 10) & MASK8; /* 30 */ - INCR_COUNT(counts[masked],inquery[masked]); + INCR_COUNT(counts[masked]); debug(printf("30 %04X => %d\n",masked,counts[masked])); masked = (oligo >> 12) & MASK8; /* 31 */ - INCR_COUNT(counts[masked],inquery[masked]); + INCR_COUNT(counts[masked]); debug(printf("31 %04X => %d\n",masked,counts[masked])); #else @@ -34412,22 +23983,22 @@ masked = EXTRACT(_masked,0); assert(masked == (oligo & MASK8)); - INCR_COUNT(counts[masked],inquery[masked]); + INCR_COUNT(counts[masked]); debug(printf("25 %04X => %d\n",masked,counts[masked])); masked = EXTRACT(_masked,1); assert(masked == ((oligo >> 2) & MASK8)); - INCR_COUNT(counts[masked],inquery[masked]); + INCR_COUNT(counts[masked]); debug(printf("26 %04X => %d\n",masked,counts[masked])); masked = EXTRACT(_masked,2); assert(masked == ((oligo >> 4) & MASK8)); - INCR_COUNT(counts[masked],inquery[masked]); + INCR_COUNT(counts[masked]); debug(printf("27 %04X => %d\n",masked,counts[masked])); masked = EXTRACT(_masked,3); assert(masked == ((oligo >> 6) & MASK8)); - INCR_COUNT(counts[masked],inquery[masked]); + INCR_COUNT(counts[masked]); debug(printf("28 %04X => %d\n",masked,counts[masked])); @@ -34440,17 +24011,17 @@ masked = EXTRACT(_masked,0); assert(masked == ((oligo >> 8) & MASK8)); - INCR_COUNT(counts[masked],inquery[masked]); + INCR_COUNT(counts[masked]); debug(printf("29 %04X => %d\n",masked,counts[masked])); masked = EXTRACT(_masked,1); assert(masked == ((oligo >> 10) & MASK8)); - INCR_COUNT(counts[masked],inquery[masked]); + INCR_COUNT(counts[masked]); debug(printf("30 %04X => %d\n",masked,counts[masked])); masked = EXTRACT(_masked,2); assert(masked == ((oligo >> 12) & MASK8)); - INCR_COUNT(counts[masked],inquery[masked]); + INCR_COUNT(counts[masked]); debug(printf("31 %04X => %d\n",masked,counts[masked])); #endif @@ -34460,289 +24031,162 @@ #else /* HAVE_AVX2 */ static void -count_8mers_rev (Count_T *counts, Inquery_T *inquery, Genomecomp_T low_rc, Genomecomp_T high_rc, Genomecomp_T nextlow_rc) { +count_8mers_rev_32 (Count_T *counts, Genomecomp_T low_rc, Genomecomp_T high_rc, Genomecomp_T nextlow_rc) { Genomecomp_T masked, oligo; __m256i _oligo, _masked; -#ifdef CHECK_FOR_OVERFLOW - __m256i _counts_after, _counts_neg; -#endif - _oligo = _mm256_srlv_epi32(_mm256_set1_epi32(low_rc),shift0to14); + _oligo = _mm256_srlv_epi32(_mm256_set1_epi32(low_rc),bigshift0to14); _masked = _mm256_and_si256(_oligo, bigmask8); masked = EXTRACT256(_masked,0); - counts[masked] += 1; + INCR_COUNT(counts[masked]); debug(printf("0 %04X => %d\n",masked,counts[masked])); masked = EXTRACT256(_masked,1); - counts[masked] += 1; + INCR_COUNT(counts[masked]); debug(printf("1 %04X => %d\n",masked,counts[masked])); masked = EXTRACT256(_masked,2); - counts[masked] += 1; + INCR_COUNT(counts[masked]); debug(printf("2 %04X => %d\n",masked,counts[masked])); masked = EXTRACT256(_masked,3); - counts[masked] += 1; + INCR_COUNT(counts[masked]); debug(printf("3 %04X => %d\n",masked,counts[masked])); masked = EXTRACT256(_masked,4); - counts[masked] += 1; + INCR_COUNT(counts[masked]); debug(printf("4 %04X => %d\n",masked,counts[masked])); masked = EXTRACT256(_masked,5); - counts[masked] += 1; + INCR_COUNT(counts[masked]); debug(printf("5 %04X => %d\n",masked,counts[masked])); masked = EXTRACT256(_masked,6); - counts[masked] += 1; + INCR_COUNT(counts[masked]); debug(printf("6 %04X => %d\n",masked,counts[masked])); masked = EXTRACT256(_masked,7); - counts[masked] += 1; + INCR_COUNT(counts[masked]); debug(printf("7 %04X => %d\n",masked,counts[masked])); -#ifdef CHECK_FOR_OVERFLOW - _counts_after = _mm256_i32gather_epi32(counts,_masked,/*shift*/4); - /* _counts_after = _mm256_and_si256(_counts_after,low8); */ - _counts_neg = _mm256_cmpgt_epi32(_counts_after,maxcount256); - if (_mm256_testz_si256(_counts_neg,_counts_neg) == 0) { - debug(printf("Exceeded maximum counts\n")); - if (EXTRACT256(_counts_neg,0)) { - inquery[EXTRACT256(_masked,0)] = INQUERY_FALSE; - } - if (EXTRACT256(_counts_neg,1)) { - inquery[EXTRACT256(_masked,1)] = INQUERY_FALSE; - } - if (EXTRACT256(_counts_neg,2)) { - inquery[EXTRACT256(_masked,2)] = INQUERY_FALSE; - } - if (EXTRACT256(_counts_neg,3)) { - inquery[EXTRACT256(_masked,3)] = INQUERY_FALSE; - } - if (EXTRACT256(_counts_neg,4)) { - inquery[EXTRACT256(_masked,4)] = INQUERY_FALSE; - } - if (EXTRACT256(_counts_neg,5)) { - inquery[EXTRACT256(_masked,5)] = INQUERY_FALSE; - } - if (EXTRACT256(_counts_neg,6)) { - inquery[EXTRACT256(_masked,6)] = INQUERY_FALSE; - } - if (EXTRACT256(_counts_neg,7)) { - inquery[EXTRACT256(_masked,7)] = INQUERY_FALSE; - } - } -#endif /* CHECK_FOR_OVERFLOW */ masked = low_rc >> 16; /* 8, No mask necessary */ - INCR_COUNT(counts[masked],inquery[masked]); + INCR_COUNT(counts[masked]); debug(printf("8 %04X => %d\n",masked,counts[masked])); oligo = low_rc >> 18; /* For 15..9 */ oligo |= high_rc << 14; - _oligo = _mm256_srlv_epi32(_mm256_set1_epi32(oligo),shift0to14); + _oligo = _mm256_srlv_epi32(_mm256_set1_epi32(oligo),bigshift0to14); _masked = _mm256_and_si256(_oligo, bigmask8); masked = EXTRACT256(_masked,0); - counts[masked] += 1; + INCR_COUNT(counts[masked]); debug(printf("9 %04X => %d\n",masked,counts[masked])); masked = EXTRACT256(_masked,1); - counts[masked] += 1; + INCR_COUNT(counts[masked]); debug(printf("10 %04X => %d\n",masked,counts[masked])); masked = EXTRACT256(_masked,2); - counts[masked] += 1; + INCR_COUNT(counts[masked]); debug(printf("11 %04X => %d\n",masked,counts[masked])); masked = EXTRACT256(_masked,3); - counts[masked] += 1; + INCR_COUNT(counts[masked]); debug(printf("12 %04X => %d\n",masked,counts[masked])); masked = EXTRACT256(_masked,4); - counts[masked] += 1; + INCR_COUNT(counts[masked]); debug(printf("13 %04X => %d\n",masked,counts[masked])); masked = EXTRACT256(_masked,5); - counts[masked] += 1; + INCR_COUNT(counts[masked]); debug(printf("14 %04X => %d\n",masked,counts[masked])); masked = EXTRACT256(_masked,6); - counts[masked] += 1; + INCR_COUNT(counts[masked]); debug(printf("15 %04X => %d\n",masked,counts[masked])); -#ifdef CHECK_FOR_OVERFLOW - _counts_after = _mm256_i32gather_epi32(counts,_masked,/*shift*/4); - _counts_after = _mm256_and_si256(_counts_after,low7); - _counts_neg = _mm256_cmpgt_epi32(_counts_after,maxcount256); - if (_mm256_testz_si256(_counts_neg,_counts_neg) == 0) { - debug(printf("Exceeded maximum counts\n")); - if (EXTRACT256(_counts_neg,0)) { - inquery[EXTRACT256(_masked,0)] = INQUERY_FALSE; - } - if (EXTRACT256(_counts_neg,1)) { - inquery[EXTRACT256(_masked,1)] = INQUERY_FALSE; - } - if (EXTRACT256(_counts_neg,2)) { - inquery[EXTRACT256(_masked,2)] = INQUERY_FALSE; - } - if (EXTRACT256(_counts_neg,3)) { - inquery[EXTRACT256(_masked,3)] = INQUERY_FALSE; - } - if (EXTRACT256(_counts_neg,4)) { - inquery[EXTRACT256(_masked,4)] = INQUERY_FALSE; - } - if (EXTRACT256(_counts_neg,5)) { - inquery[EXTRACT256(_masked,5)] = INQUERY_FALSE; - } - if (EXTRACT256(_counts_neg,6)) { - inquery[EXTRACT256(_masked,6)] = INQUERY_FALSE; - } - } -#endif /* CHECK_FOR_OVERFLOW */ - - _oligo = _mm256_srlv_epi32(_mm256_set1_epi32(high_rc),shift0to14); + _oligo = _mm256_srlv_epi32(_mm256_set1_epi32(high_rc),bigshift0to14); _masked = _mm256_and_si256(_oligo, bigmask8); masked = EXTRACT256(_masked,0); - counts[masked] += 1; + INCR_COUNT(counts[masked]); debug(printf("16 %04X => %d\n",masked,counts[masked])); masked = EXTRACT256(_masked,1); - counts[masked] += 1; + INCR_COUNT(counts[masked]); debug(printf("17 %04X => %d\n",masked,counts[masked])); masked = EXTRACT256(_masked,2); - counts[masked] += 1; + INCR_COUNT(counts[masked]); debug(printf("18 %04X => %d\n",masked,counts[masked])); masked = EXTRACT256(_masked,3); - counts[masked] += 1; + INCR_COUNT(counts[masked]); debug(printf("19 %04X => %d\n",masked,counts[masked])); masked = EXTRACT256(_masked,4); - counts[masked] += 1; + INCR_COUNT(counts[masked]); debug(printf("20 %04X => %d\n",masked,counts[masked])); masked = EXTRACT256(_masked,5); - counts[masked] += 1; + INCR_COUNT(counts[masked]); debug(printf("21 %04X => %d\n",masked,counts[masked])); masked = EXTRACT256(_masked,6); - counts[masked] += 1; + INCR_COUNT(counts[masked]); debug(printf("22 %04X => %d\n",masked,counts[masked])); masked = EXTRACT256(_masked,7); - counts[masked] += 1; + INCR_COUNT(counts[masked]); debug(printf("23 %04X => %d\n",masked,counts[masked])); -#ifdef CHECK_FOR_OVERFLOW - _counts_after = _mm256_i32gather_epi32(counts,_masked,/*shift*/4); - /* _counts_after = _mm256_and_si256(_counts_after,low8); */ - _counts_neg = _mm256_cmpgt_epi32(_counts_after,maxcount256); - if (_mm256_testz_si256(_counts_neg,_counts_neg) == 0) { - debug(printf("Exceeded maximum counts\n")); - if (EXTRACT256(_counts_neg,0)) { - inquery[EXTRACT256(_masked,0)] = INQUERY_FALSE; - } - if (EXTRACT256(_counts_neg,1)) { - inquery[EXTRACT256(_masked,1)] = INQUERY_FALSE; - } - if (EXTRACT256(_counts_neg,2)) { - inquery[EXTRACT256(_masked,2)] = INQUERY_FALSE; - } - if (EXTRACT256(_counts_neg,3)) { - inquery[EXTRACT256(_masked,3)] = INQUERY_FALSE; - } - if (EXTRACT256(_counts_neg,4)) { - inquery[EXTRACT256(_masked,4)] = INQUERY_FALSE; - } - if (EXTRACT256(_counts_neg,5)) { - inquery[EXTRACT256(_masked,5)] = INQUERY_FALSE; - } - if (EXTRACT256(_counts_neg,6)) { - inquery[EXTRACT256(_masked,6)] = INQUERY_FALSE; - } - if (EXTRACT256(_counts_neg,7)) { - inquery[EXTRACT256(_masked,7)] = INQUERY_FALSE; - } - } -#endif /* CHECK_FOR_OVERFLOW */ masked = high_rc >> 16; /* 24, No mask necessary */ - INCR_COUNT(counts[masked],inquery[masked]); + INCR_COUNT(counts[masked]); debug(printf("24 %04X => %d\n",masked,counts[masked])); oligo = high_rc >> 18; /* For 31..25 */ oligo |= nextlow_rc << 14; - _oligo = _mm256_srlv_epi32(_mm256_set1_epi32(oligo),shift0to14); + _oligo = _mm256_srlv_epi32(_mm256_set1_epi32(oligo),bigshift0to14); _masked = _mm256_and_si256(_oligo, bigmask8); masked = EXTRACT256(_masked,0); - counts[masked] += 1; + INCR_COUNT(counts[masked]); debug(printf("25 %04X => %d\n",masked,counts[masked])); masked = EXTRACT256(_masked,1); - counts[masked] += 1; + INCR_COUNT(counts[masked]); debug(printf("26 %04X => %d\n",masked,counts[masked])); masked = EXTRACT256(_masked,2); - counts[masked] += 1; + INCR_COUNT(counts[masked]); debug(printf("27 %04X => %d\n",masked,counts[masked])); masked = EXTRACT256(_masked,3); - counts[masked] += 1; + INCR_COUNT(counts[masked]); debug(printf("28 %04X => %d\n",masked,counts[masked])); masked = EXTRACT256(_masked,4); - counts[masked] += 1; + INCR_COUNT(counts[masked]); debug(printf("29 %04X => %d\n",masked,counts[masked])); masked = EXTRACT256(_masked,5); - counts[masked] += 1; + INCR_COUNT(counts[masked]); debug(printf("30 %04X => %d\n",masked,counts[masked])); masked = EXTRACT256(_masked,6); - counts[masked] += 1; + INCR_COUNT(counts[masked]); debug(printf("31 %04X => %d\n",masked,counts[masked])); -#ifdef CHECK_FOR_OVERFLOW - _counts_after = _mm256_i32gather_epi32(counts,_masked,/*shift*/4); - _counts_after = _mm256_and_si256(_counts_after,low7); - _counts_neg = _mm256_cmpgt_epi32(_counts_after,maxcount256); - if (_mm256_testz_si256(_counts_neg,_counts_neg) == 0) { - debug(printf("Exceeded maximum counts\n")); - if (EXTRACT256(_counts_neg,0)) { - inquery[EXTRACT256(_masked,0)] = INQUERY_FALSE; - } - if (EXTRACT256(_counts_neg,1)) { - inquery[EXTRACT256(_masked,1)] = INQUERY_FALSE; - } - if (EXTRACT256(_counts_neg,2)) { - inquery[EXTRACT256(_masked,2)] = INQUERY_FALSE; - } - if (EXTRACT256(_counts_neg,3)) { - inquery[EXTRACT256(_masked,3)] = INQUERY_FALSE; - } - if (EXTRACT256(_counts_neg,4)) { - inquery[EXTRACT256(_masked,4)] = INQUERY_FALSE; - } - if (EXTRACT256(_counts_neg,5)) { - inquery[EXTRACT256(_masked,5)] = INQUERY_FALSE; - } - if (EXTRACT256(_counts_neg,6)) { - inquery[EXTRACT256(_masked,6)] = INQUERY_FALSE; - } - } -#endif /* CHECK_FOR_OVERFLOW */ - return; } @@ -34752,9 +24196,9 @@ /* Expecting current to have {low0_rc, high0_rc, low1_rc, high1_rc}, and next to have {high0_rc, low1_rc, high1_rc, nextlow_rc} */ -#ifdef USE_SIMD_FOR_COUNTS +#ifdef HAVE_SSE2 static void -extract_8mers_rev_simd (__m128i *out, __m128i current, __m128i next) { +extract_8mers_rev_simd_64 (__m128i *out, __m128i current, __m128i next) { __m128i oligo; oligo = _mm_or_si128( _mm_srli_epi32(current,18), _mm_slli_epi32(next,14)); @@ -34779,6 +24223,100 @@ return; } +#ifdef USE_UNORDERED_8 +static Chrpos_T +store_8mers_rev_simd_64 (Chrpos_T chrpos, Chrpos_T *table, UINT4 *positions, Count_T *counts, + __m128i current, __m128i next) { + __m128i array[16]; + + extract_8mers_rev_simd_64(array,current,next); + return store_fwdrev_simd_64(chrpos,table,positions,counts,(UINT4 *) array); +} + +#else +/* Includes extract_8mers_rev_simd_64_ordered (__m128i *out, __m128i current, __m128i next) */ +static Chrpos_T +store_8mers_rev_simd_64 (Chrpos_T chrpos, Chrpos_T *table, UINT4 *positions, Count_T *counts, + __m128i current, __m128i next) { + __m128i array[16], *out; + __m128i oligo; + __m128i _t0, _t1, _t2, _t3, _t4, _t5, _t6, _t7; + __m128i _u0, _u1, _u2, _u3; + + out = &(array[0]); + + /* As a special case, 8_mers don't need to be masked, since they fill each 16-mer */ + + /* _row0 = _mm_and_si128( current, mask8); */ + /* _row1 = _mm_and_si128( _mm_srli_epi32(current,2), mask8); */ + _t0 = _mm_blend_epi16(_mm_slli_epi32(current,14), current, 0x55); + + /* _row2 = _mm_and_si128( _mm_srli_epi32(current,4), mask8); */ + /* _row3 = _mm_and_si128( _mm_srli_epi32(current,6), mask8); */ + _t1 = _mm_blend_epi16(_mm_slli_epi32(current,10), _mm_srli_epi32(current,4), 0x55); + + /* _row4 = _mm_and_si128( _mm_srli_epi32(current,8), mask8); */ + /* _row5 = _mm_and_si128( _mm_srli_epi32(current,10), mask8); */ + _t2 = _mm_blend_epi16(_mm_slli_epi32(current,6), _mm_srli_epi32(current,8), 0x55); + + /* _row6 = _mm_and_si128( _mm_srli_epi32(current,12), mask8); */ + /* _row7 = _mm_and_si128( _mm_srli_epi32(current,14), mask8); */ + _t3 = _mm_blend_epi16(_mm_slli_epi32(current,2), _mm_srli_epi32(current,12), 0x55); + + + oligo = _mm_or_si128( _mm_srli_epi32(current,18), _mm_slli_epi32(next,14)); + /* _row8 = _mm_srli_epi32(current,16); */ /* No mask necessary */; + /* _row9 = _mm_and_si128( oligo, mask8); */ + _t4 = _mm_blend_epi16(_mm_slli_epi32(oligo,16), _mm_srli_epi32(current,16), 0x55); + + /* _row10 = _mm_and_si128( _mm_srli_epi32(oligo,2), mask8); */ + /* _row11 = _mm_and_si128( _mm_srli_epi32(oligo,4), mask8); */ + _t5 = _mm_blend_epi16(_mm_slli_epi32(oligo,12), _mm_srli_epi32(oligo,2), 0x55); + + /* _row12 = _mm_and_si128( _mm_srli_epi32(oligo,6), mask8); */ + /* _row13 = _mm_and_si128( _mm_srli_epi32(oligo,8), mask8); */ + _t6 = _mm_blend_epi16(_mm_slli_epi32(oligo,8), _mm_srli_epi32(oligo,6), 0x55); + + /* _row14 = _mm_and_si128( _mm_srli_epi32(oligo,10), mask8); */ + /* _row15 = _mm_and_si128( _mm_srli_epi32(oligo,12), mask8); */ + _t7 = _mm_blend_epi16(_mm_slli_epi32(oligo,4), _mm_srli_epi32(oligo,10), 0x55); + + + /* Split: top half */ + _u0 = _mm_unpackhi_epi32(_t0,_t1); + _u1 = _mm_unpackhi_epi32(_t2,_t3); + _u2 = _mm_unpackhi_epi32(_t4,_t5); + _u3 = _mm_unpackhi_epi32(_t6,_t7); + + _mm_store_si128(out++, _mm_cvtepu16_epi32(_mm_srli_si128(_u0,8))); + _mm_store_si128(out++, _mm_cvtepu16_epi32(_mm_srli_si128(_u1,8))); + _mm_store_si128(out++, _mm_cvtepu16_epi32(_mm_srli_si128(_u2,8))); + _mm_store_si128(out++, _mm_cvtepu16_epi32(_mm_srli_si128(_u3,8))); + _mm_store_si128(out++, _mm_cvtepu16_epi32(_u0)); + _mm_store_si128(out++, _mm_cvtepu16_epi32(_u1)); + _mm_store_si128(out++, _mm_cvtepu16_epi32(_u2)); + _mm_store_si128(out++, _mm_cvtepu16_epi32(_u3)); + + /* Split: bottom half */ + _u0 = _mm_unpacklo_epi32(_t0,_t1); + _u1 = _mm_unpacklo_epi32(_t2,_t3); + _u2 = _mm_unpacklo_epi32(_t4,_t5); + _u3 = _mm_unpacklo_epi32(_t6,_t7); + + _mm_store_si128(out++, _mm_cvtepu16_epi32(_mm_srli_si128(_u0,8))); + _mm_store_si128(out++, _mm_cvtepu16_epi32(_mm_srli_si128(_u1,8))); + _mm_store_si128(out++, _mm_cvtepu16_epi32(_mm_srli_si128(_u2,8))); + _mm_store_si128(out++, _mm_cvtepu16_epi32(_mm_srli_si128(_u3,8))); + _mm_store_si128(out++, _mm_cvtepu16_epi32(_u0)); + _mm_store_si128(out++, _mm_cvtepu16_epi32(_u1)); + _mm_store_si128(out++, _mm_cvtepu16_epi32(_u2)); + _mm_store_si128(out++, _mm_cvtepu16_epi32(_u3)); + + return store_fwdrev_simd_64_ordered(chrpos,table,positions,counts,(UINT4 *) array); +} +#endif +#endif + #ifdef HAVE_AVX2 static void extract_8mers_rev_simd_128 (__m256i *out, __m256i current, __m256i next) { @@ -34805,1553 +24343,274 @@ return; } -#endif - - -static void -count_8mers_rev_simd (Count_T *counts, Inquery_T *inquery, __m128i current, __m128i next) { - __m128i oligo; -#ifdef HAVE_SSE4_1 - __m128i array; -#else - Genomecomp_T array[4]; -#endif -#if defined(HAVE_AVX2) && defined(CHECK_FOR_OVERFLOW) - __m128i _counts_after, _counts_neg; -#endif - - oligo = _mm_or_si128( _mm_srli_epi32(current,18), _mm_slli_epi32(next,14)); -#ifdef HAVE_SSE4_1 - array = _mm_and_si128( _mm_srli_epi32(oligo,12), mask8); -#else - _mm_store_si128((__m128i *) array, _mm_and_si128( _mm_srli_epi32(oligo,12), mask8)); -#endif -#ifdef HAVE_AVX2 - counts[EXTRACT(array,0)] += 1; - counts[EXTRACT(array,1)] += 1; - counts[EXTRACT(array,2)] += 1; - counts[EXTRACT(array,3)] += 1; -#ifdef CHECK_FOR_OVERFLOW - _counts_after = _mm_i32gather_epi32(counts,array,/*scale*/4); - _counts_neg = _mm_cmpgt_epi32(_counts_after,maxcount128); - if (_mm_testz_si128(_counts_neg,_counts_neg) == 0) { - debug(printf("Exceeded maximum counts\n")); - if (EXTRACT(_counts_neg,0)) { - inquery[EXTRACT(array,0)] = INQUERY_FALSE; - } - if (EXTRACT(_counts_neg,1)) { - inquery[EXTRACT(array,1)] = INQUERY_FALSE; - } - if (EXTRACT(_counts_neg,2)) { - inquery[EXTRACT(array,2)] = INQUERY_FALSE; - } - if (EXTRACT(_counts_neg,3)) { - inquery[EXTRACT(array,3)] = INQUERY_FALSE; - } - } -#endif /* CHECK_FOR_OVERFLOW */ - debug(printf("63 %04X => %d\n",array[0],counts[array[0]])); - debug(printf("47 %04X => %d\n",array[1],counts[array[1]])); - debug(printf("31 %04X => %d\n",array[2],counts[array[2]])); - debug(printf("15 %04X => %d\n",array[3],counts[array[3]])); - -#else - INCR_COUNT(counts[EXTRACT(array,0)],inquery[EXTRACT(array,0)]); /* 63 */ - INCR_COUNT(counts[EXTRACT(array,1)],inquery[EXTRACT(array,1)]); /* 47 */ - INCR_COUNT(counts[EXTRACT(array,2)],inquery[EXTRACT(array,2)]); /* 31 */ - INCR_COUNT(counts[EXTRACT(array,3)],inquery[EXTRACT(array,3)]); /* 15 */ -#endif - debug(printf("63 %04X => %d\n",EXTRACT(array,0),counts[EXTRACT(array,0)])); - debug(printf("47 %04X => %d\n",EXTRACT(array,1),counts[EXTRACT(array,1)])); - debug(printf("31 %04X => %d\n",EXTRACT(array,2),counts[EXTRACT(array,2)])); - debug(printf("15 %04X => %d\n",EXTRACT(array,3),counts[EXTRACT(array,3)])); - -#ifdef HAVE_SSE4_1 - array = _mm_and_si128( _mm_srli_epi32(oligo,10), mask8); -#else - _mm_store_si128((__m128i *) array, _mm_and_si128( _mm_srli_epi32(oligo,10), mask8)); -#endif -#ifdef HAVE_AVX2 - counts[EXTRACT(array,0)] += 1; - counts[EXTRACT(array,1)] += 1; - counts[EXTRACT(array,2)] += 1; - counts[EXTRACT(array,3)] += 1; -#ifdef CHECK_FOR_OVERFLOW - _counts_after = _mm_i32gather_epi32(counts,array,/*scale*/4); - _counts_neg = _mm_cmpgt_epi32(_counts_after,maxcount128); - if (_mm_testz_si128(_counts_neg,_counts_neg) == 0) { - debug(printf("Exceeded maximum counts\n")); - if (EXTRACT(_counts_neg,0)) { - inquery[EXTRACT(array,0)] = INQUERY_FALSE; - } - if (EXTRACT(_counts_neg,1)) { - inquery[EXTRACT(array,1)] = INQUERY_FALSE; - } - if (EXTRACT(_counts_neg,2)) { - inquery[EXTRACT(array,2)] = INQUERY_FALSE; - } - if (EXTRACT(_counts_neg,3)) { - inquery[EXTRACT(array,3)] = INQUERY_FALSE; - } - } -#endif /* CHECK_FOR_OVERFLOW */ - debug(printf("62 %04X => %d\n",array[0],counts[array[0]])); - debug(printf("46 %04X => %d\n",array[1],counts[array[1]])); - debug(printf("30 %04X => %d\n",array[2],counts[array[2]])); - debug(printf("14 %04X => %d\n",array[3],counts[array[3]])); - -#else - INCR_COUNT(counts[EXTRACT(array,0)],inquery[EXTRACT(array,0)]); /* 62 */ - INCR_COUNT(counts[EXTRACT(array,1)],inquery[EXTRACT(array,1)]); /* 46 */ - INCR_COUNT(counts[EXTRACT(array,2)],inquery[EXTRACT(array,2)]); /* 30 */ - INCR_COUNT(counts[EXTRACT(array,3)],inquery[EXTRACT(array,3)]); /* 14 */ -#endif - debug(printf("62 %04X => %d\n",EXTRACT(array,0),counts[EXTRACT(array,0)])); - debug(printf("46 %04X => %d\n",EXTRACT(array,1),counts[EXTRACT(array,1)])); - debug(printf("30 %04X => %d\n",EXTRACT(array,2),counts[EXTRACT(array,2)])); - debug(printf("14 %04X => %d\n",EXTRACT(array,3),counts[EXTRACT(array,3)])); - -#ifdef HAVE_SSE4_1 - array = _mm_and_si128( _mm_srli_epi32(oligo,8), mask8); -#else - _mm_store_si128((__m128i *) array, _mm_and_si128( _mm_srli_epi32(oligo,8), mask8)); -#endif -#ifdef HAVE_AVX2 - counts[EXTRACT(array,0)] += 1; - counts[EXTRACT(array,1)] += 1; - counts[EXTRACT(array,2)] += 1; - counts[EXTRACT(array,3)] += 1; -#ifdef CHECK_FOR_OVERFLOW - _counts_after = _mm_i32gather_epi32(counts,array,/*scale*/4); - _counts_neg = _mm_cmpgt_epi32(_counts_after,maxcount128); - if (_mm_testz_si128(_counts_neg,_counts_neg) == 0) { - debug(printf("Exceeded maximum counts\n")); - if (EXTRACT(_counts_neg,0)) { - inquery[EXTRACT(array,0)] = INQUERY_FALSE; - } - if (EXTRACT(_counts_neg,1)) { - inquery[EXTRACT(array,1)] = INQUERY_FALSE; - } - if (EXTRACT(_counts_neg,2)) { - inquery[EXTRACT(array,2)] = INQUERY_FALSE; - } - if (EXTRACT(_counts_neg,3)) { - inquery[EXTRACT(array,3)] = INQUERY_FALSE; - } - } -#endif /* CHECK_FOR_OVERFLOW */ - debug(printf("61 %04X => %d\n",array[0],counts[array[0]])); - debug(printf("45 %04X => %d\n",array[1],counts[array[1]])); - debug(printf("29 %04X => %d\n",array[2],counts[array[2]])); - debug(printf("13 %04X => %d\n",array[3],counts[array[3]])); - -#else - INCR_COUNT(counts[EXTRACT(array,0)],inquery[EXTRACT(array,0)]); /* 61 */ - INCR_COUNT(counts[EXTRACT(array,1)],inquery[EXTRACT(array,1)]); /* 45 */ - INCR_COUNT(counts[EXTRACT(array,2)],inquery[EXTRACT(array,2)]); /* 29 */ - INCR_COUNT(counts[EXTRACT(array,3)],inquery[EXTRACT(array,3)]); /* 13 */ -#endif - debug(printf("61 %04X => %d\n",EXTRACT(array,0),counts[EXTRACT(array,0)])); - debug(printf("45 %04X => %d\n",EXTRACT(array,1),counts[EXTRACT(array,1)])); - debug(printf("29 %04X => %d\n",EXTRACT(array,2),counts[EXTRACT(array,2)])); - debug(printf("13 %04X => %d\n",EXTRACT(array,3),counts[EXTRACT(array,3)])); - -#ifdef HAVE_SSE4_1 - array = _mm_and_si128( _mm_srli_epi32(oligo,6), mask8); -#else - _mm_store_si128((__m128i *) array, _mm_and_si128( _mm_srli_epi32(oligo,6), mask8)); -#endif -#ifdef HAVE_AVX2 - counts[EXTRACT(array,0)] += 1; - counts[EXTRACT(array,1)] += 1; - counts[EXTRACT(array,2)] += 1; - counts[EXTRACT(array,3)] += 1; -#ifdef CHECK_FOR_OVERFLOW - _counts_after = _mm_i32gather_epi32(counts,array,/*scale*/4); - _counts_neg = _mm_cmpgt_epi32(_counts_after,maxcount128); - if (_mm_testz_si128(_counts_neg,_counts_neg) == 0) { - debug(printf("Exceeded maximum counts\n")); - if (EXTRACT(_counts_neg,0)) { - inquery[EXTRACT(array,0)] = INQUERY_FALSE; - } - if (EXTRACT(_counts_neg,1)) { - inquery[EXTRACT(array,1)] = INQUERY_FALSE; - } - if (EXTRACT(_counts_neg,2)) { - inquery[EXTRACT(array,2)] = INQUERY_FALSE; - } - if (EXTRACT(_counts_neg,3)) { - inquery[EXTRACT(array,3)] = INQUERY_FALSE; - } - } -#endif /* CHECK_FOR_OVERFLOW */ - debug(printf("60 %04X => %d\n",array[0],counts[array[0]])); - debug(printf("44 %04X => %d\n",array[1],counts[array[1]])); - debug(printf("28 %04X => %d\n",array[2],counts[array[2]])); - debug(printf("12 %04X => %d\n",array[3],counts[array[3]])); - -#else - INCR_COUNT(counts[EXTRACT(array,0)],inquery[EXTRACT(array,0)]); /* 60 */ - INCR_COUNT(counts[EXTRACT(array,1)],inquery[EXTRACT(array,1)]); /* 44 */ - INCR_COUNT(counts[EXTRACT(array,2)],inquery[EXTRACT(array,2)]); /* 28 */ - INCR_COUNT(counts[EXTRACT(array,3)],inquery[EXTRACT(array,3)]); /* 12 */ -#endif - debug(printf("60 %04X => %d\n",EXTRACT(array,0),counts[EXTRACT(array,0)])); - debug(printf("44 %04X => %d\n",EXTRACT(array,1),counts[EXTRACT(array,1)])); - debug(printf("28 %04X => %d\n",EXTRACT(array,2),counts[EXTRACT(array,2)])); - debug(printf("12 %04X => %d\n",EXTRACT(array,3),counts[EXTRACT(array,3)])); - -#ifdef HAVE_SSE4_1 - array = _mm_and_si128( _mm_srli_epi32(oligo,4), mask8); -#else - _mm_store_si128((__m128i *) array, _mm_and_si128( _mm_srli_epi32(oligo,4), mask8)); -#endif -#ifdef HAVE_AVX2 - counts[EXTRACT(array,0)] += 1; - counts[EXTRACT(array,1)] += 1; - counts[EXTRACT(array,2)] += 1; - counts[EXTRACT(array,3)] += 1; -#ifdef CHECK_FOR_OVERFLOW - _counts_after = _mm_i32gather_epi32(counts,array,/*scale*/4); - _counts_neg = _mm_cmpgt_epi32(_counts_after,maxcount128); - if (_mm_testz_si128(_counts_neg,_counts_neg) == 0) { - debug(printf("Exceeded maximum counts\n")); - if (EXTRACT(_counts_neg,0)) { - inquery[EXTRACT(array,0)] = INQUERY_FALSE; - } - if (EXTRACT(_counts_neg,1)) { - inquery[EXTRACT(array,1)] = INQUERY_FALSE; - } - if (EXTRACT(_counts_neg,2)) { - inquery[EXTRACT(array,2)] = INQUERY_FALSE; - } - if (EXTRACT(_counts_neg,3)) { - inquery[EXTRACT(array,3)] = INQUERY_FALSE; - } - } -#endif /* CHECK_FOR_OVERFLOW */ - debug(printf("59 %04X => %d\n",array[0],counts[array[0]])); - debug(printf("43 %04X => %d\n",array[1],counts[array[1]])); - debug(printf("27 %04X => %d\n",array[2],counts[array[2]])); - debug(printf("11 %04X => %d\n",array[3],counts[array[3]])); - -#else - INCR_COUNT(counts[EXTRACT(array,0)],inquery[EXTRACT(array,0)]); /* 59 */ - INCR_COUNT(counts[EXTRACT(array,1)],inquery[EXTRACT(array,1)]); /* 43 */ - INCR_COUNT(counts[EXTRACT(array,2)],inquery[EXTRACT(array,2)]); /* 27 */ - INCR_COUNT(counts[EXTRACT(array,3)],inquery[EXTRACT(array,3)]); /* 11 */ -#endif - debug(printf("59 %04X => %d\n",EXTRACT(array,0),counts[EXTRACT(array,0)])); - debug(printf("43 %04X => %d\n",EXTRACT(array,1),counts[EXTRACT(array,1)])); - debug(printf("27 %04X => %d\n",EXTRACT(array,2),counts[EXTRACT(array,2)])); - debug(printf("11 %04X => %d\n",EXTRACT(array,3),counts[EXTRACT(array,3)])); - -#ifdef HAVE_SSE4_1 - array = _mm_and_si128( _mm_srli_epi32(oligo,2), mask8); -#else - _mm_store_si128((__m128i *) array, _mm_and_si128( _mm_srli_epi32(oligo,2), mask8)); -#endif -#ifdef HAVE_AVX2 - counts[EXTRACT(array,0)] += 1; - counts[EXTRACT(array,1)] += 1; - counts[EXTRACT(array,2)] += 1; - counts[EXTRACT(array,3)] += 1; -#ifdef CHECK_FOR_OVERFLOW - _counts_after = _mm_i32gather_epi32(counts,array,/*scale*/4); - _counts_neg = _mm_cmpgt_epi32(_counts_after,maxcount128); - if (_mm_testz_si128(_counts_neg,_counts_neg) == 0) { - debug(printf("Exceeded maximum counts\n")); - if (EXTRACT(_counts_neg,0)) { - inquery[EXTRACT(array,0)] = INQUERY_FALSE; - } - if (EXTRACT(_counts_neg,1)) { - inquery[EXTRACT(array,1)] = INQUERY_FALSE; - } - if (EXTRACT(_counts_neg,2)) { - inquery[EXTRACT(array,2)] = INQUERY_FALSE; - } - if (EXTRACT(_counts_neg,3)) { - inquery[EXTRACT(array,3)] = INQUERY_FALSE; - } - } -#endif /* CHECK_FOR_OVERFLOW */ - debug(printf("58 %04X => %d\n",array[0],counts[array[0]])); - debug(printf("42 %04X => %d\n",array[1],counts[array[1]])); - debug(printf("26 %04X => %d\n",array[2],counts[array[2]])); - debug(printf("10 %04X => %d\n",array[3],counts[array[3]])); - -#else - INCR_COUNT(counts[EXTRACT(array,0)],inquery[EXTRACT(array,0)]); /* 58 */ - INCR_COUNT(counts[EXTRACT(array,1)],inquery[EXTRACT(array,1)]); /* 42 */ - INCR_COUNT(counts[EXTRACT(array,2)],inquery[EXTRACT(array,2)]); /* 26 */ - INCR_COUNT(counts[EXTRACT(array,3)],inquery[EXTRACT(array,3)]); /* 10 */ -#endif - debug(printf("58 %04X => %d\n",EXTRACT(array,0),counts[EXTRACT(array,0)])); - debug(printf("42 %04X => %d\n",EXTRACT(array,1),counts[EXTRACT(array,1)])); - debug(printf("26 %04X => %d\n",EXTRACT(array,2),counts[EXTRACT(array,2)])); - debug(printf("10 %04X => %d\n",EXTRACT(array,3),counts[EXTRACT(array,3)])); - -#ifdef HAVE_SSE4_1 - array = _mm_and_si128( oligo, mask8); -#else - _mm_store_si128((__m128i *) array, _mm_and_si128( oligo, mask8)); -#endif -#ifdef HAVE_AVX2 - counts[EXTRACT(array,0)] += 1; - counts[EXTRACT(array,1)] += 1; - counts[EXTRACT(array,2)] += 1; - counts[EXTRACT(array,3)] += 1; -#ifdef CHECK_FOR_OVERFLOW - _counts_after = _mm_i32gather_epi32(counts,array,/*scale*/4); - _counts_neg = _mm_cmpgt_epi32(_counts_after,maxcount128); - if (_mm_testz_si128(_counts_neg,_counts_neg) == 0) { - debug(printf("Exceeded maximum counts\n")); - if (EXTRACT(_counts_neg,0)) { - inquery[EXTRACT(array,0)] = INQUERY_FALSE; - } - if (EXTRACT(_counts_neg,1)) { - inquery[EXTRACT(array,1)] = INQUERY_FALSE; - } - if (EXTRACT(_counts_neg,2)) { - inquery[EXTRACT(array,2)] = INQUERY_FALSE; - } - if (EXTRACT(_counts_neg,3)) { - inquery[EXTRACT(array,3)] = INQUERY_FALSE; - } - } -#endif /* CHECK_FOR_OVERFLOW */ - debug(printf("57 %04X => %d\n",array[0],counts[array[0]])); - debug(printf("41 %04X => %d\n",array[1],counts[array[1]])); - debug(printf("25 %04X => %d\n",array[2],counts[array[2]])); - debug(printf("9 %04X => %d\n",array[3],counts[array[3]])); - -#else - INCR_COUNT(counts[EXTRACT(array,0)],inquery[EXTRACT(array,0)]); /* 57 */ - INCR_COUNT(counts[EXTRACT(array,1)],inquery[EXTRACT(array,1)]); /* 41 */ - INCR_COUNT(counts[EXTRACT(array,2)],inquery[EXTRACT(array,2)]); /* 25 */ - INCR_COUNT(counts[EXTRACT(array,3)],inquery[EXTRACT(array,3)]); /* 9 */ -#endif - debug(printf("57 %04X => %d\n",EXTRACT(array,0),counts[EXTRACT(array,0)])); - debug(printf("41 %04X => %d\n",EXTRACT(array,1),counts[EXTRACT(array,1)])); - debug(printf("25 %04X => %d\n",EXTRACT(array,2),counts[EXTRACT(array,2)])); - debug(printf("9 %04X => %d\n",EXTRACT(array,3),counts[EXTRACT(array,3)])); +#ifdef USE_UNORDERED_8 +static Chrpos_T +store_8mers_rev_simd_128 (Chrpos_T chrpos, Chrpos_T *table, UINT4 *positions, Count_T *counts, + __m256i current, __m256i next) { + __m256i array[16]; + + extract_8mers_rev_simd_128(array,current,next); + return store_fwdrev_simd_128(chrpos,table,positions,counts,(UINT4 *) array); +} -#ifdef HAVE_SSE4_1 - array = _mm_srli_epi32(current,16); /* No mask necessary */; #else - _mm_store_si128((__m128i *) array, _mm_srli_epi32(current,16)); /* No mask necessary */; -#endif -#ifdef HAVE_AVX2 - counts[EXTRACT(array,0)] += 1; - counts[EXTRACT(array,1)] += 1; - counts[EXTRACT(array,2)] += 1; - counts[EXTRACT(array,3)] += 1; -#ifdef CHECK_FOR_OVERFLOW - _counts_after = _mm_i32gather_epi32(counts,array,/*scale*/4); - _counts_neg = _mm_cmpgt_epi32(_counts_after,maxcount128); - if (_mm_testz_si128(_counts_neg,_counts_neg) == 0) { - debug(printf("Exceeded maximum counts\n")); - if (EXTRACT(_counts_neg,0)) { - inquery[EXTRACT(array,0)] = INQUERY_FALSE; - } - if (EXTRACT(_counts_neg,1)) { - inquery[EXTRACT(array,1)] = INQUERY_FALSE; - } - if (EXTRACT(_counts_neg,2)) { - inquery[EXTRACT(array,2)] = INQUERY_FALSE; - } - if (EXTRACT(_counts_neg,3)) { - inquery[EXTRACT(array,3)] = INQUERY_FALSE; - } - } -#endif /* CHECK_FOR_OVERFLOW */ - debug(printf("56 %04X => %d\n",array[0],counts[array[0]])); - debug(printf("40 %04X => %d\n",array[1],counts[array[1]])); - debug(printf("24 %04X => %d\n",array[2],counts[array[2]])); - debug(printf("8 %04X => %d\n",array[3],counts[array[3]])); - -#else - INCR_COUNT(counts[EXTRACT(array,0)],inquery[EXTRACT(array,0)]); /* 56 */ - INCR_COUNT(counts[EXTRACT(array,1)],inquery[EXTRACT(array,1)]); /* 40 */ - INCR_COUNT(counts[EXTRACT(array,2)],inquery[EXTRACT(array,2)]); /* 24 */ - INCR_COUNT(counts[EXTRACT(array,3)],inquery[EXTRACT(array,3)]); /* 8 */ -#endif - debug(printf("56 %04X => %d\n",EXTRACT(array,0),counts[EXTRACT(array,0)])); - debug(printf("40 %04X => %d\n",EXTRACT(array,1),counts[EXTRACT(array,1)])); - debug(printf("24 %04X => %d\n",EXTRACT(array,2),counts[EXTRACT(array,2)])); - debug(printf("8 %04X => %d\n",EXTRACT(array,3),counts[EXTRACT(array,3)])); +/* Includes extract_8mers_rev_simd_128_ordered (__m256i *out, __m256i current, __m256i next) */ +static Chrpos_T +store_8mers_rev_simd_128 (Chrpos_T chrpos, Chrpos_T *table, UINT4 *positions, Count_T *counts, + __m256i current, __m256i next) { + __m256i array[16], *out; + __m256i oligo; + __m256i _t0, _t1, _t2, _t3, _t4, _t5, _t6, _t7; + __m256i _u0, _u1, _u2, _u3, _u4, _u5, _u6, _u7; -#ifdef HAVE_SSE4_1 - array = _mm_and_si128( _mm_srli_epi32(current,14), mask8); -#else - _mm_store_si128((__m128i *) array, _mm_and_si128( _mm_srli_epi32(current,14), mask8)); -#endif -#ifdef HAVE_AVX2 - counts[EXTRACT(array,0)] += 1; - counts[EXTRACT(array,1)] += 1; - counts[EXTRACT(array,2)] += 1; - counts[EXTRACT(array,3)] += 1; -#ifdef CHECK_FOR_OVERFLOW - _counts_after = _mm_i32gather_epi32(counts,array,/*scale*/4); - _counts_neg = _mm_cmpgt_epi32(_counts_after,maxcount128); - if (_mm_testz_si128(_counts_neg,_counts_neg) == 0) { - debug(printf("Exceeded maximum counts\n")); - if (EXTRACT(_counts_neg,0)) { - inquery[EXTRACT(array,0)] = INQUERY_FALSE; - } - if (EXTRACT(_counts_neg,1)) { - inquery[EXTRACT(array,1)] = INQUERY_FALSE; - } - if (EXTRACT(_counts_neg,2)) { - inquery[EXTRACT(array,2)] = INQUERY_FALSE; - } - if (EXTRACT(_counts_neg,3)) { - inquery[EXTRACT(array,3)] = INQUERY_FALSE; - } - } -#endif /* CHECK_FOR_OVERFLOW */ - debug(printf("55 %04X => %d\n",array[0],counts[array[0]])); - debug(printf("39 %04X => %d\n",array[1],counts[array[1]])); - debug(printf("23 %04X => %d\n",array[2],counts[array[2]])); - debug(printf("7 %04X => %d\n",array[3],counts[array[3]])); - -#else - INCR_COUNT(counts[EXTRACT(array,0)],inquery[EXTRACT(array,0)]); /* 55 */ - INCR_COUNT(counts[EXTRACT(array,1)],inquery[EXTRACT(array,1)]); /* 39 */ - INCR_COUNT(counts[EXTRACT(array,2)],inquery[EXTRACT(array,2)]); /* 23 */ - INCR_COUNT(counts[EXTRACT(array,3)],inquery[EXTRACT(array,3)]); /* 7 */ -#endif - debug(printf("55 %04X => %d\n",EXTRACT(array,0),counts[EXTRACT(array,0)])); - debug(printf("39 %04X => %d\n",EXTRACT(array,1),counts[EXTRACT(array,1)])); - debug(printf("23 %04X => %d\n",EXTRACT(array,2),counts[EXTRACT(array,2)])); - debug(printf("7 %04X => %d\n",EXTRACT(array,3),counts[EXTRACT(array,3)])); + out = &(array[0]); -#ifdef HAVE_SSE4_1 - array = _mm_and_si128( _mm_srli_epi32(current,12), mask8); -#else - _mm_store_si128((__m128i *) array, _mm_and_si128( _mm_srli_epi32(current,12), mask8)); -#endif -#ifdef HAVE_AVX2 - counts[EXTRACT(array,0)] += 1; - counts[EXTRACT(array,1)] += 1; - counts[EXTRACT(array,2)] += 1; - counts[EXTRACT(array,3)] += 1; -#ifdef CHECK_FOR_OVERFLOW - _counts_after = _mm_i32gather_epi32(counts,array,/*scale*/4); - _counts_neg = _mm_cmpgt_epi32(_counts_after,maxcount128); - if (_mm_testz_si128(_counts_neg,_counts_neg) == 0) { - debug(printf("Exceeded maximum counts\n")); - if (EXTRACT(_counts_neg,0)) { - inquery[EXTRACT(array,0)] = INQUERY_FALSE; - } - if (EXTRACT(_counts_neg,1)) { - inquery[EXTRACT(array,1)] = INQUERY_FALSE; - } - if (EXTRACT(_counts_neg,2)) { - inquery[EXTRACT(array,2)] = INQUERY_FALSE; - } - if (EXTRACT(_counts_neg,3)) { - inquery[EXTRACT(array,3)] = INQUERY_FALSE; - } - } -#endif /* CHECK_FOR_OVERFLOW */ - debug(printf("54 %04X => %d\n",array[0],counts[array[0]])); - debug(printf("38 %04X => %d\n",array[1],counts[array[1]])); - debug(printf("22 %04X => %d\n",array[2],counts[array[2]])); - debug(printf("6 %04X => %d\n",array[3],counts[array[3]])); - -#else - INCR_COUNT(counts[EXTRACT(array,0)],inquery[EXTRACT(array,0)]); /* 54 */ - INCR_COUNT(counts[EXTRACT(array,1)],inquery[EXTRACT(array,1)]); /* 38 */ - INCR_COUNT(counts[EXTRACT(array,2)],inquery[EXTRACT(array,2)]); /* 22 */ - INCR_COUNT(counts[EXTRACT(array,3)],inquery[EXTRACT(array,3)]); /* 6 */ -#endif - debug(printf("54 %04X => %d\n",EXTRACT(array,0),counts[EXTRACT(array,0)])); - debug(printf("38 %04X => %d\n",EXTRACT(array,1),counts[EXTRACT(array,1)])); - debug(printf("22 %04X => %d\n",EXTRACT(array,2),counts[EXTRACT(array,2)])); - debug(printf("6 %04X => %d\n",EXTRACT(array,3),counts[EXTRACT(array,3)])); + /* As a special case, 8_mers don't need to be masked, since they fill each 16-mer */ -#ifdef HAVE_SSE4_1 - array = _mm_and_si128( _mm_srli_epi32(current,10), mask8); -#else - _mm_store_si128((__m128i *) array, _mm_and_si128( _mm_srli_epi32(current,10), mask8)); -#endif -#ifdef HAVE_AVX2 - counts[EXTRACT(array,0)] += 1; - counts[EXTRACT(array,1)] += 1; - counts[EXTRACT(array,2)] += 1; - counts[EXTRACT(array,3)] += 1; -#ifdef CHECK_FOR_OVERFLOW - _counts_after = _mm_i32gather_epi32(counts,array,/*scale*/4); - _counts_neg = _mm_cmpgt_epi32(_counts_after,maxcount128); - if (_mm_testz_si128(_counts_neg,_counts_neg) == 0) { - debug(printf("Exceeded maximum counts\n")); - if (EXTRACT(_counts_neg,0)) { - inquery[EXTRACT(array,0)] = INQUERY_FALSE; - } - if (EXTRACT(_counts_neg,1)) { - inquery[EXTRACT(array,1)] = INQUERY_FALSE; - } - if (EXTRACT(_counts_neg,2)) { - inquery[EXTRACT(array,2)] = INQUERY_FALSE; - } - if (EXTRACT(_counts_neg,3)) { - inquery[EXTRACT(array,3)] = INQUERY_FALSE; - } - } -#endif /* CHECK_FOR_OVERFLOW */ - debug(printf("53 %04X => %d\n",array[0],counts[array[0]])); - debug(printf("37 %04X => %d\n",array[1],counts[array[1]])); - debug(printf("21 %04X => %d\n",array[2],counts[array[2]])); - debug(printf("5 %04X => %d\n",array[3],counts[array[3]])); - -#else - INCR_COUNT(counts[EXTRACT(array,0)],inquery[EXTRACT(array,0)]); /* 53 */ - INCR_COUNT(counts[EXTRACT(array,1)],inquery[EXTRACT(array,1)]); /* 37 */ - INCR_COUNT(counts[EXTRACT(array,2)],inquery[EXTRACT(array,2)]); /* 21 */ - INCR_COUNT(counts[EXTRACT(array,3)],inquery[EXTRACT(array,3)]); /* 5 */ -#endif - debug(printf("53 %04X => %d\n",EXTRACT(array,0),counts[EXTRACT(array,0)])); - debug(printf("37 %04X => %d\n",EXTRACT(array,1),counts[EXTRACT(array,1)])); - debug(printf("21 %04X => %d\n",EXTRACT(array,2),counts[EXTRACT(array,2)])); - debug(printf("5 %04X => %d\n",EXTRACT(array,3),counts[EXTRACT(array,3)])); + /* _row0 = _mm256_and_si256( current, bigmask8); */ + /* _row1 = _mm256_and_si256( _mm256_srli_epi32(current,2), bigmask8); */ + _t0 = _mm256_blend_epi16(_mm256_slli_epi32(current,14), current, 0x55); -#ifdef HAVE_SSE4_1 - array = _mm_and_si128( _mm_srli_epi32(current,8), mask8); -#else - _mm_store_si128((__m128i *) array, _mm_and_si128( _mm_srli_epi32(current,8), mask8)); -#endif -#ifdef HAVE_AVX2 - counts[EXTRACT(array,0)] += 1; - counts[EXTRACT(array,1)] += 1; - counts[EXTRACT(array,2)] += 1; - counts[EXTRACT(array,3)] += 1; -#ifdef CHECK_FOR_OVERFLOW - _counts_after = _mm_i32gather_epi32(counts,array,/*scale*/4); - _counts_neg = _mm_cmpgt_epi32(_counts_after,maxcount128); - if (_mm_testz_si128(_counts_neg,_counts_neg) == 0) { - debug(printf("Exceeded maximum counts\n")); - if (EXTRACT(_counts_neg,0)) { - inquery[EXTRACT(array,0)] = INQUERY_FALSE; - } - if (EXTRACT(_counts_neg,1)) { - inquery[EXTRACT(array,1)] = INQUERY_FALSE; - } - if (EXTRACT(_counts_neg,2)) { - inquery[EXTRACT(array,2)] = INQUERY_FALSE; - } - if (EXTRACT(_counts_neg,3)) { - inquery[EXTRACT(array,3)] = INQUERY_FALSE; - } - } -#endif /* CHECK_FOR_OVERFLOW */ - debug(printf("52 %04X => %d\n",array[0],counts[array[0]])); - debug(printf("36 %04X => %d\n",array[1],counts[array[1]])); - debug(printf("20 %04X => %d\n",array[2],counts[array[2]])); - debug(printf("4 %04X => %d\n",array[3],counts[array[3]])); - -#else - INCR_COUNT(counts[EXTRACT(array,0)],inquery[EXTRACT(array,0)]); /* 52 */ - INCR_COUNT(counts[EXTRACT(array,1)],inquery[EXTRACT(array,1)]); /* 36 */ - INCR_COUNT(counts[EXTRACT(array,2)],inquery[EXTRACT(array,2)]); /* 20 */ - INCR_COUNT(counts[EXTRACT(array,3)],inquery[EXTRACT(array,3)]); /* 4 */ -#endif - debug(printf("52 %04X => %d\n",EXTRACT(array,0),counts[EXTRACT(array,0)])); - debug(printf("36 %04X => %d\n",EXTRACT(array,1),counts[EXTRACT(array,1)])); - debug(printf("20 %04X => %d\n",EXTRACT(array,2),counts[EXTRACT(array,2)])); - debug(printf("4 %04X => %d\n",EXTRACT(array,3),counts[EXTRACT(array,3)])); + /* _row2 = _mm256_and_si256( _mm256_srli_epi32(current,4), bigmask8); */ + /* _row3 = _mm256_and_si256( _mm256_srli_epi32(current,6), bigmask8); */ + _t1 = _mm256_blend_epi16(_mm256_slli_epi32(current,10), _mm256_srli_epi32(current,4), 0x55); -#ifdef HAVE_SSE4_1 - array = _mm_and_si128( _mm_srli_epi32(current,6), mask8); -#else - _mm_store_si128((__m128i *) array, _mm_and_si128( _mm_srli_epi32(current,6), mask8)); -#endif -#ifdef HAVE_AVX2 - counts[EXTRACT(array,0)] += 1; - counts[EXTRACT(array,1)] += 1; - counts[EXTRACT(array,2)] += 1; - counts[EXTRACT(array,3)] += 1; -#ifdef CHECK_FOR_OVERFLOW - _counts_after = _mm_i32gather_epi32(counts,array,/*scale*/4); - _counts_neg = _mm_cmpgt_epi32(_counts_after,maxcount128); - if (_mm_testz_si128(_counts_neg,_counts_neg) == 0) { - debug(printf("Exceeded maximum counts\n")); - if (EXTRACT(_counts_neg,0)) { - inquery[EXTRACT(array,0)] = INQUERY_FALSE; - } - if (EXTRACT(_counts_neg,1)) { - inquery[EXTRACT(array,1)] = INQUERY_FALSE; - } - if (EXTRACT(_counts_neg,2)) { - inquery[EXTRACT(array,2)] = INQUERY_FALSE; - } - if (EXTRACT(_counts_neg,3)) { - inquery[EXTRACT(array,3)] = INQUERY_FALSE; - } - } -#endif /* CHECK_FOR_OVERFLOW */ - debug(printf("51 %04X => %d\n",array[0],counts[array[0]])); - debug(printf("35 %04X => %d\n",array[1],counts[array[1]])); - debug(printf("19 %04X => %d\n",array[2],counts[array[2]])); - debug(printf("3 %04X => %d\n",array[3],counts[array[3]])); - -#else - INCR_COUNT(counts[EXTRACT(array,0)],inquery[EXTRACT(array,0)]); /* 51 */ - INCR_COUNT(counts[EXTRACT(array,1)],inquery[EXTRACT(array,1)]); /* 35 */ - INCR_COUNT(counts[EXTRACT(array,2)],inquery[EXTRACT(array,2)]); /* 19 */ - INCR_COUNT(counts[EXTRACT(array,3)],inquery[EXTRACT(array,3)]); /* 3 */ -#endif - debug(printf("51 %04X => %d\n",EXTRACT(array,0),counts[EXTRACT(array,0)])); - debug(printf("35 %04X => %d\n",EXTRACT(array,1),counts[EXTRACT(array,1)])); - debug(printf("19 %04X => %d\n",EXTRACT(array,2),counts[EXTRACT(array,2)])); - debug(printf("3 %04X => %d\n",EXTRACT(array,3),counts[EXTRACT(array,3)])); + /* _row4 = _mm256_and_si256( _mm256_srli_epi32(current,8), bigmask8); */ + /* _row5 = _mm256_and_si256( _mm256_srli_epi32(current,10), bigmask8); */ + _t2 = _mm256_blend_epi16(_mm256_slli_epi32(current,6), _mm256_srli_epi32(current,8), 0x55); -#ifdef HAVE_SSE4_1 - array = _mm_and_si128( _mm_srli_epi32(current,4), mask8); -#else - _mm_store_si128((__m128i *) array, _mm_and_si128( _mm_srli_epi32(current,4), mask8)); -#endif -#ifdef HAVE_AVX2 - counts[EXTRACT(array,0)] += 1; - counts[EXTRACT(array,1)] += 1; - counts[EXTRACT(array,2)] += 1; - counts[EXTRACT(array,3)] += 1; -#ifdef CHECK_FOR_OVERFLOW - _counts_after = _mm_i32gather_epi32(counts,array,/*scale*/4); - _counts_neg = _mm_cmpgt_epi32(_counts_after,maxcount128); - if (_mm_testz_si128(_counts_neg,_counts_neg) == 0) { - debug(printf("Exceeded maximum counts\n")); - if (EXTRACT(_counts_neg,0)) { - inquery[EXTRACT(array,0)] = INQUERY_FALSE; - } - if (EXTRACT(_counts_neg,1)) { - inquery[EXTRACT(array,1)] = INQUERY_FALSE; - } - if (EXTRACT(_counts_neg,2)) { - inquery[EXTRACT(array,2)] = INQUERY_FALSE; - } - if (EXTRACT(_counts_neg,3)) { - inquery[EXTRACT(array,3)] = INQUERY_FALSE; - } - } -#endif /* CHECK_FOR_OVERFLOW */ - debug(printf("50 %04X => %d\n",array[0],counts[array[0]])); - debug(printf("34 %04X => %d\n",array[1],counts[array[1]])); - debug(printf("18 %04X => %d\n",array[2],counts[array[2]])); - debug(printf("2 %04X => %d\n",array[3],counts[array[3]])); - -#else - INCR_COUNT(counts[EXTRACT(array,0)],inquery[EXTRACT(array,0)]); /* 50 */ - INCR_COUNT(counts[EXTRACT(array,1)],inquery[EXTRACT(array,1)]); /* 34 */ - INCR_COUNT(counts[EXTRACT(array,2)],inquery[EXTRACT(array,2)]); /* 18 */ - INCR_COUNT(counts[EXTRACT(array,3)],inquery[EXTRACT(array,3)]); /* 2 */ -#endif - debug(printf("50 %04X => %d\n",EXTRACT(array,0),counts[EXTRACT(array,0)])); - debug(printf("34 %04X => %d\n",EXTRACT(array,1),counts[EXTRACT(array,1)])); - debug(printf("18 %04X => %d\n",EXTRACT(array,2),counts[EXTRACT(array,2)])); - debug(printf("2 %04X => %d\n",EXTRACT(array,3),counts[EXTRACT(array,3)])); + /* _row6 = _mm256_and_si256( _mm256_srli_epi32(current,12), bigmask8); */ + /* _row7 = _mm256_and_si256( _mm256_srli_epi32(current,14), bigmask8); */ + _t3 = _mm256_blend_epi16(_mm256_slli_epi32(current,2), _mm256_srli_epi32(current,12), 0x55); -#ifdef HAVE_SSE4_1 - array = _mm_and_si128( _mm_srli_epi32(current,2), mask8); -#else - _mm_store_si128((__m128i *) array, _mm_and_si128( _mm_srli_epi32(current,2), mask8)); -#endif -#ifdef HAVE_AVX2 - counts[EXTRACT(array,0)] += 1; - counts[EXTRACT(array,1)] += 1; - counts[EXTRACT(array,2)] += 1; - counts[EXTRACT(array,3)] += 1; -#ifdef CHECK_FOR_OVERFLOW - _counts_after = _mm_i32gather_epi32(counts,array,/*scale*/4); - _counts_neg = _mm_cmpgt_epi32(_counts_after,maxcount128); - if (_mm_testz_si128(_counts_neg,_counts_neg) == 0) { - debug(printf("Exceeded maximum counts\n")); - if (EXTRACT(_counts_neg,0)) { - inquery[EXTRACT(array,0)] = INQUERY_FALSE; - } - if (EXTRACT(_counts_neg,1)) { - inquery[EXTRACT(array,1)] = INQUERY_FALSE; - } - if (EXTRACT(_counts_neg,2)) { - inquery[EXTRACT(array,2)] = INQUERY_FALSE; - } - if (EXTRACT(_counts_neg,3)) { - inquery[EXTRACT(array,3)] = INQUERY_FALSE; - } - } -#endif /* CHECK_FOR_OVERFLOW */ - debug(printf("49 %04X => %d\n",array[0],counts[array[0]])); - debug(printf("33 %04X => %d\n",array[1],counts[array[1]])); - debug(printf("17 %04X => %d\n",array[2],counts[array[2]])); - debug(printf("1 %04X => %d\n",array[3],counts[array[3]])); - -#else - INCR_COUNT(counts[EXTRACT(array,0)],inquery[EXTRACT(array,0)]); /* 49 */ - INCR_COUNT(counts[EXTRACT(array,1)],inquery[EXTRACT(array,1)]); /* 33 */ - INCR_COUNT(counts[EXTRACT(array,2)],inquery[EXTRACT(array,2)]); /* 17 */ - INCR_COUNT(counts[EXTRACT(array,3)],inquery[EXTRACT(array,3)]); /* 1 */ -#endif - debug(printf("49 %04X => %d\n",EXTRACT(array,0),counts[EXTRACT(array,0)])); - debug(printf("33 %04X => %d\n",EXTRACT(array,1),counts[EXTRACT(array,1)])); - debug(printf("17 %04X => %d\n",EXTRACT(array,2),counts[EXTRACT(array,2)])); - debug(printf("1 %04X => %d\n",EXTRACT(array,3),counts[EXTRACT(array,3)])); -#ifdef HAVE_SSE4_1 - array = _mm_and_si128( current, mask8); -#else - _mm_store_si128((__m128i *) array, _mm_and_si128( current, mask8)); -#endif -#ifdef HAVE_AVX2 - counts[EXTRACT(array,0)] += 1; - counts[EXTRACT(array,1)] += 1; - counts[EXTRACT(array,2)] += 1; - counts[EXTRACT(array,3)] += 1; -#ifdef CHECK_FOR_OVERFLOW - _counts_after = _mm_i32gather_epi32(counts,array,/*scale*/4); - _counts_neg = _mm_cmpgt_epi32(_counts_after,maxcount128); - if (_mm_testz_si128(_counts_neg,_counts_neg) == 0) { - debug(printf("Exceeded maximum counts\n")); - if (EXTRACT(_counts_neg,0)) { - inquery[EXTRACT(array,0)] = INQUERY_FALSE; - } - if (EXTRACT(_counts_neg,1)) { - inquery[EXTRACT(array,1)] = INQUERY_FALSE; - } - if (EXTRACT(_counts_neg,2)) { - inquery[EXTRACT(array,2)] = INQUERY_FALSE; - } - if (EXTRACT(_counts_neg,3)) { - inquery[EXTRACT(array,3)] = INQUERY_FALSE; - } - } -#endif /* CHECK_FOR_OVERFLOW */ - debug(printf("48 %04X => %d\n",array[0],counts[array[0]])); - debug(printf("32 %04X => %d\n",array[1],counts[array[1]])); - debug(printf("16 %04X => %d\n",array[2],counts[array[2]])); - debug(printf("0 %04X => %d\n",array[3],counts[array[3]])); - -#else - INCR_COUNT(counts[EXTRACT(array,0)],inquery[EXTRACT(array,0)]); /* 48 */ - INCR_COUNT(counts[EXTRACT(array,1)],inquery[EXTRACT(array,1)]); /* 32 */ - INCR_COUNT(counts[EXTRACT(array,2)],inquery[EXTRACT(array,2)]); /* 16 */ - INCR_COUNT(counts[EXTRACT(array,3)],inquery[EXTRACT(array,3)]); /* 0 */ -#endif - debug(printf("48 %04X => %d\n",EXTRACT(array,0),counts[EXTRACT(array,0)])); - debug(printf("32 %04X => %d\n",EXTRACT(array,1),counts[EXTRACT(array,1)])); - debug(printf("16 %04X => %d\n",EXTRACT(array,2),counts[EXTRACT(array,2)])); - debug(printf("0 %04X => %d\n",EXTRACT(array,3),counts[EXTRACT(array,3)])); + oligo = _mm256_or_si256( _mm256_srli_epi32(current,18), _mm256_slli_epi32(next,14)); + /* _row8 = _mm256_srli_epi32(current,16); */ /* No mask necessary */; + /* _row9 = _mm256_and_si256( oligo, bigmask8); */ + _t4 = _mm256_blend_epi16(_mm256_slli_epi32(oligo,16), _mm256_srli_epi32(current,16), 0x55); + + /* _row10 = _mm256_and_si256( _mm256_srli_epi32(oligo,2), bigmask8); */ + /* _row11 = _mm256_and_si256( _mm256_srli_epi32(oligo,4), bigmask8); */ + _t5 = _mm256_blend_epi16(_mm256_slli_epi32(oligo,12), _mm256_srli_epi32(oligo,2), 0x55); + + /* _row12 = _mm256_and_si256( _mm256_srli_epi32(oligo,6), bigmask8); */ + /* _row13 = _mm256_and_si256( _mm256_srli_epi32(oligo,8), bigmask8); */ + _t6 = _mm256_blend_epi16(_mm256_slli_epi32(oligo,8), _mm256_srli_epi32(oligo,6), 0x55); + + /* _row14 = _mm256_and_si256( _mm256_srli_epi32(oligo,10), bigmask8); */ + /* _row15 = _mm256_and_si256( _mm256_srli_epi32(oligo,12), bigmask8); */ + _t7 = _mm256_blend_epi16(_mm256_slli_epi32(oligo,4), _mm256_srli_epi32(oligo,10), 0x55); + + + _u0 = _mm256_unpackhi_epi32(_t0,_t1); + _u1 = _mm256_unpackhi_epi32(_t2,_t3); + _u2 = _mm256_unpackhi_epi32(_t4,_t5); + _u3 = _mm256_unpackhi_epi32(_t6,_t7); + _u4 = _mm256_unpacklo_epi32(_t0,_t1); + _u5 = _mm256_unpacklo_epi32(_t2,_t3); + _u6 = _mm256_unpacklo_epi32(_t4,_t5); + _u7 = _mm256_unpacklo_epi32(_t6,_t7); + + + _t0 = _mm256_unpackhi_epi64(_u0,_u1); + _t1 = _mm256_unpackhi_epi64(_u2,_u3); + _t2 = _mm256_unpacklo_epi64(_u0,_u1); + _t3 = _mm256_unpacklo_epi64(_u2,_u3); + _t4 = _mm256_unpackhi_epi64(_u4,_u5); + _t5 = _mm256_unpackhi_epi64(_u6,_u7); + _t6 = _mm256_unpacklo_epi64(_u4,_u5); + _t7 = _mm256_unpacklo_epi64(_u6,_u7); + + + _mm256_store_si256(out++, _mm256_cvtepu16_epi32(_mm256_extracti128_si256(_t0,1))); + _mm256_store_si256(out++, _mm256_cvtepu16_epi32(_mm256_extracti128_si256(_t1,1))); + _mm256_store_si256(out++, _mm256_cvtepu16_epi32(_mm256_extracti128_si256(_t2,1))); + _mm256_store_si256(out++, _mm256_cvtepu16_epi32(_mm256_extracti128_si256(_t3,1))); + _mm256_store_si256(out++, _mm256_cvtepu16_epi32(_mm256_extracti128_si256(_t4,1))); + _mm256_store_si256(out++, _mm256_cvtepu16_epi32(_mm256_extracti128_si256(_t5,1))); + _mm256_store_si256(out++, _mm256_cvtepu16_epi32(_mm256_extracti128_si256(_t6,1))); + _mm256_store_si256(out++, _mm256_cvtepu16_epi32(_mm256_extracti128_si256(_t7,1))); + _mm256_store_si256(out++, _mm256_cvtepu16_epi32(_mm256_extracti128_si256(_t0,0))); + _mm256_store_si256(out++, _mm256_cvtepu16_epi32(_mm256_extracti128_si256(_t1,0))); + _mm256_store_si256(out++, _mm256_cvtepu16_epi32(_mm256_extracti128_si256(_t2,0))); + _mm256_store_si256(out++, _mm256_cvtepu16_epi32(_mm256_extracti128_si256(_t3,0))); + _mm256_store_si256(out++, _mm256_cvtepu16_epi32(_mm256_extracti128_si256(_t4,0))); + _mm256_store_si256(out++, _mm256_cvtepu16_epi32(_mm256_extracti128_si256(_t5,0))); + _mm256_store_si256(out++, _mm256_cvtepu16_epi32(_mm256_extracti128_si256(_t6,0))); + _mm256_store_si256(out++, _mm256_cvtepu16_epi32(_mm256_extracti128_si256(_t7,0))); + + return store_fwdrev_simd_128_ordered(chrpos,table,positions,counts,(UINT4 *) array); +} +#endif +#endif + +#ifdef HAVE_AVX512 +static void +extract_8mers_rev_simd_256 (__m512i *out, __m512i current, __m512i next) { + __m512i oligo; + + oligo = _mm512_or_si512( _mm512_srli_epi32(current,18), _mm512_slli_epi32(next,14)); + _mm512_store_si512(out++, _mm512_and_si512( _mm512_srli_epi32(oligo,12), hugemask8)); + _mm512_store_si512(out++, _mm512_and_si512( _mm512_srli_epi32(oligo,10), hugemask8)); + _mm512_store_si512(out++, _mm512_and_si512( _mm512_srli_epi32(oligo,8), hugemask8)); + _mm512_store_si512(out++, _mm512_and_si512( _mm512_srli_epi32(oligo,6), hugemask8)); + _mm512_store_si512(out++, _mm512_and_si512( _mm512_srli_epi32(oligo,4), hugemask8)); + _mm512_store_si512(out++, _mm512_and_si512( _mm512_srli_epi32(oligo,2), hugemask8)); + _mm512_store_si512(out++, _mm512_and_si512( oligo, hugemask8)); + + _mm512_store_si512(out++, _mm512_srli_epi32(current,16)); /* No mask necessary */; + _mm512_store_si512(out++, _mm512_and_si512( _mm512_srli_epi32(current,14), hugemask8)); + _mm512_store_si512(out++, _mm512_and_si512( _mm512_srli_epi32(current,12), hugemask8)); + _mm512_store_si512(out++, _mm512_and_si512( _mm512_srli_epi32(current,10), hugemask8)); + _mm512_store_si512(out++, _mm512_and_si512( _mm512_srli_epi32(current,8), hugemask8)); + _mm512_store_si512(out++, _mm512_and_si512( _mm512_srli_epi32(current,6), hugemask8)); + _mm512_store_si512(out++, _mm512_and_si512( _mm512_srli_epi32(current,4), hugemask8)); + _mm512_store_si512(out++, _mm512_and_si512( _mm512_srli_epi32(current,2), hugemask8)); + _mm512_store_si512(out++, _mm512_and_si512( current, hugemask8)); return; } -#endif - - -#ifdef HAVE_AVX2 -static void -count_8mers_rev_simd_128 (Count_T *counts, Inquery_T *inquery, __m256i current, __m256i next) { - __m256i oligo; - __m256i array; -#ifdef CHECK_FOR_OVERFLOW - __m256i _counts_after, _counts_neg; -#endif +#ifdef USE_UNORDERED_8 +static Chrpos_T +store_8mers_rev_simd_256 (Chrpos_T chrpos, Chrpos_T *table, UINT4 *positions, Count_T *counts, + __m512i current, __m512i next) { + __m512i array[16]; + + extract_8mers_rev_simd_256(array,current,next); + return store_fwdrev_simd_256(chrpos,table,positions,counts,(UINT4 *) array); +} - oligo = _mm256_or_si256( _mm256_srli_epi32(current,18), _mm256_slli_epi32(next,14)); - array = _mm256_and_si256( _mm256_srli_epi32(oligo,12), bigmask8); - counts[EXTRACT256(array,0)] += 1; /* 127 */ - counts[EXTRACT256(array,1)] += 1; /* 111 */ - counts[EXTRACT256(array,2)] += 1; /* 95 */ - counts[EXTRACT256(array,3)] += 1; /* 79 */ - counts[EXTRACT256(array,4)] += 1; /* 63 */ - counts[EXTRACT256(array,5)] += 1; /* 47 */ - counts[EXTRACT256(array,6)] += 1; /* 31 */ - counts[EXTRACT256(array,7)] += 1; /* 15 */ -#ifdef CHECK_FOR_OVERFLOW - _counts_after = _mm256_i32gather_epi32(counts,array,/*scale*/4); - _counts_neg = _mm256_cmpgt_epi32(_counts_after,maxcount256); - if (_mm256_testz_si256(_counts_neg,_counts_neg) == 0) { - debug(printf("Exceeded maximum counts\n")); - if (EXTRACT256(_counts_neg,0)) { - inquery[EXTRACT256(array,0)] = INQUERY_FALSE; - } - if (EXTRACT256(_counts_neg,1)) { - inquery[EXTRACT256(array,1)] = INQUERY_FALSE; - } - if (EXTRACT256(_counts_neg,2)) { - inquery[EXTRACT256(array,2)] = INQUERY_FALSE; - } - if (EXTRACT256(_counts_neg,3)) { - inquery[EXTRACT256(array,3)] = INQUERY_FALSE; - } - if (EXTRACT256(_counts_neg,4)) { - inquery[EXTRACT256(array,4)] = INQUERY_FALSE; - } - if (EXTRACT256(_counts_neg,5)) { - inquery[EXTRACT256(array,5)] = INQUERY_FALSE; - } - if (EXTRACT256(_counts_neg,6)) { - inquery[EXTRACT256(array,6)] = INQUERY_FALSE; - } - if (EXTRACT256(_counts_neg,7)) { - inquery[EXTRACT256(array,7)] = INQUERY_FALSE; - } - } -#endif /* CHECK_FOR_OVERFLOW */ - debug(printf("63 %04X => %d\n",EXTRACT256(array,0),counts[EXTRACT256(array,0)])); - debug(printf("47 %04X => %d\n",EXTRACT256(array,1),counts[EXTRACT256(array,1)])); - debug(printf("31 %04X => %d\n",EXTRACT256(array,2),counts[EXTRACT256(array,2)])); - debug(printf("15 %04X => %d\n",EXTRACT256(array,3),counts[EXTRACT256(array,3)])); - debug(printf("63 %04X => %d\n",EXTRACT256(array,4),counts[EXTRACT256(array,4)])); - debug(printf("47 %04X => %d\n",EXTRACT256(array,5),counts[EXTRACT256(array,5)])); - debug(printf("31 %04X => %d\n",EXTRACT256(array,6),counts[EXTRACT256(array,6)])); - debug(printf("15 %04X => %d\n",EXTRACT256(array,7),counts[EXTRACT256(array,7)])); - - array = _mm256_and_si256( _mm256_srli_epi32(oligo,10), bigmask8); - counts[EXTRACT256(array,0)] += 1; /* 126 */ - counts[EXTRACT256(array,1)] += 1; /* 110 */ - counts[EXTRACT256(array,2)] += 1; /* 94 */ - counts[EXTRACT256(array,3)] += 1; /* 78 */ - counts[EXTRACT256(array,4)] += 1; /* 62 */ - counts[EXTRACT256(array,5)] += 1; /* 46 */ - counts[EXTRACT256(array,6)] += 1; /* 30 */ - counts[EXTRACT256(array,7)] += 1; /* 14 */ -#ifdef CHECK_FOR_OVERFLOW - _counts_after = _mm256_i32gather_epi32(counts,array,/*scale*/4); - _counts_neg = _mm256_cmpgt_epi32(_counts_after,maxcount256); - if (_mm256_testz_si256(_counts_neg,_counts_neg) == 0) { - debug(printf("Exceeded maximum counts\n")); - if (EXTRACT256(_counts_neg,0)) { - inquery[EXTRACT256(array,0)] = INQUERY_FALSE; - } - if (EXTRACT256(_counts_neg,1)) { - inquery[EXTRACT256(array,1)] = INQUERY_FALSE; - } - if (EXTRACT256(_counts_neg,2)) { - inquery[EXTRACT256(array,2)] = INQUERY_FALSE; - } - if (EXTRACT256(_counts_neg,3)) { - inquery[EXTRACT256(array,3)] = INQUERY_FALSE; - } - if (EXTRACT256(_counts_neg,4)) { - inquery[EXTRACT256(array,4)] = INQUERY_FALSE; - } - if (EXTRACT256(_counts_neg,5)) { - inquery[EXTRACT256(array,5)] = INQUERY_FALSE; - } - if (EXTRACT256(_counts_neg,6)) { - inquery[EXTRACT256(array,6)] = INQUERY_FALSE; - } - if (EXTRACT256(_counts_neg,7)) { - inquery[EXTRACT256(array,7)] = INQUERY_FALSE; - } - } -#endif /* CHECK_FOR_OVERFLOW */ - debug(printf("62 %04X => %d\n",EXTRACT256(array,0),counts[EXTRACT256(array,0)])); - debug(printf("46 %04X => %d\n",EXTRACT256(array,1),counts[EXTRACT256(array,1)])); - debug(printf("30 %04X => %d\n",EXTRACT256(array,2),counts[EXTRACT256(array,2)])); - debug(printf("14 %04X => %d\n",EXTRACT256(array,3),counts[EXTRACT256(array,3)])); - debug(printf("62 %04X => %d\n",EXTRACT256(array,4),counts[EXTRACT256(array,4)])); - debug(printf("46 %04X => %d\n",EXTRACT256(array,5),counts[EXTRACT256(array,5)])); - debug(printf("30 %04X => %d\n",EXTRACT256(array,6),counts[EXTRACT256(array,6)])); - debug(printf("14 %04X => %d\n",EXTRACT256(array,7),counts[EXTRACT256(array,7)])); - - array = _mm256_and_si256( _mm256_srli_epi32(oligo,8), bigmask8); - counts[EXTRACT256(array,0)] += 1; /* 125 */ - counts[EXTRACT256(array,1)] += 1; /* 109 */ - counts[EXTRACT256(array,2)] += 1; /* 93 */ - counts[EXTRACT256(array,3)] += 1; /* 77 */ - counts[EXTRACT256(array,4)] += 1; /* 61 */ - counts[EXTRACT256(array,5)] += 1; /* 45 */ - counts[EXTRACT256(array,6)] += 1; /* 29 */ - counts[EXTRACT256(array,7)] += 1; /* 13 */ -#ifdef CHECK_FOR_OVERFLOW - _counts_after = _mm256_i32gather_epi32(counts,array,/*scale*/4); - _counts_neg = _mm256_cmpgt_epi32(_counts_after,maxcount256); - if (_mm256_testz_si256(_counts_neg,_counts_neg) == 0) { - debug(printf("Exceeded maximum counts\n")); - if (EXTRACT256(_counts_neg,0)) { - inquery[EXTRACT256(array,0)] = INQUERY_FALSE; - } - if (EXTRACT256(_counts_neg,1)) { - inquery[EXTRACT256(array,1)] = INQUERY_FALSE; - } - if (EXTRACT256(_counts_neg,2)) { - inquery[EXTRACT256(array,2)] = INQUERY_FALSE; - } - if (EXTRACT256(_counts_neg,3)) { - inquery[EXTRACT256(array,3)] = INQUERY_FALSE; - } - if (EXTRACT256(_counts_neg,4)) { - inquery[EXTRACT256(array,4)] = INQUERY_FALSE; - } - if (EXTRACT256(_counts_neg,5)) { - inquery[EXTRACT256(array,5)] = INQUERY_FALSE; - } - if (EXTRACT256(_counts_neg,6)) { - inquery[EXTRACT256(array,6)] = INQUERY_FALSE; - } - if (EXTRACT256(_counts_neg,7)) { - inquery[EXTRACT256(array,7)] = INQUERY_FALSE; - } - } -#endif /* CHECK_FOR_OVERFLOW */ - debug(printf("61 %04X => %d\n",EXTRACT256(array,0),counts[EXTRACT256(array,0)])); - debug(printf("45 %04X => %d\n",EXTRACT256(array,1),counts[EXTRACT256(array,1)])); - debug(printf("29 %04X => %d\n",EXTRACT256(array,2),counts[EXTRACT256(array,2)])); - debug(printf("13 %04X => %d\n",EXTRACT256(array,3),counts[EXTRACT256(array,3)])); - debug(printf("61 %04X => %d\n",EXTRACT256(array,4),counts[EXTRACT256(array,4)])); - debug(printf("45 %04X => %d\n",EXTRACT256(array,5),counts[EXTRACT256(array,5)])); - debug(printf("29 %04X => %d\n",EXTRACT256(array,6),counts[EXTRACT256(array,6)])); - debug(printf("13 %04X => %d\n",EXTRACT256(array,7),counts[EXTRACT256(array,7)])); - - array = _mm256_and_si256( _mm256_srli_epi32(oligo,6), bigmask8); - counts[EXTRACT256(array,0)] += 1; /* 124 */ - counts[EXTRACT256(array,1)] += 1; /* 108 */ - counts[EXTRACT256(array,2)] += 1; /* 92 */ - counts[EXTRACT256(array,3)] += 1; /* 76 */ - counts[EXTRACT256(array,4)] += 1; /* 60 */ - counts[EXTRACT256(array,5)] += 1; /* 44 */ - counts[EXTRACT256(array,6)] += 1; /* 28 */ - counts[EXTRACT256(array,7)] += 1; /* 12 */ -#ifdef CHECK_FOR_OVERFLOW - _counts_after = _mm256_i32gather_epi32(counts,array,/*scale*/4); - _counts_neg = _mm256_cmpgt_epi32(_counts_after,maxcount256); - if (_mm256_testz_si256(_counts_neg,_counts_neg) == 0) { - debug(printf("Exceeded maximum counts\n")); - if (EXTRACT256(_counts_neg,0)) { - inquery[EXTRACT256(array,0)] = INQUERY_FALSE; - } - if (EXTRACT256(_counts_neg,1)) { - inquery[EXTRACT256(array,1)] = INQUERY_FALSE; - } - if (EXTRACT256(_counts_neg,2)) { - inquery[EXTRACT256(array,2)] = INQUERY_FALSE; - } - if (EXTRACT256(_counts_neg,3)) { - inquery[EXTRACT256(array,3)] = INQUERY_FALSE; - } - if (EXTRACT256(_counts_neg,4)) { - inquery[EXTRACT256(array,4)] = INQUERY_FALSE; - } - if (EXTRACT256(_counts_neg,5)) { - inquery[EXTRACT256(array,5)] = INQUERY_FALSE; - } - if (EXTRACT256(_counts_neg,6)) { - inquery[EXTRACT256(array,6)] = INQUERY_FALSE; - } - if (EXTRACT256(_counts_neg,7)) { - inquery[EXTRACT256(array,7)] = INQUERY_FALSE; - } - } -#endif /* CHECK_FOR_OVERFLOW */ - debug(printf("60 %04X => %d\n",EXTRACT256(array,0),counts[EXTRACT256(array,0)])); - debug(printf("44 %04X => %d\n",EXTRACT256(array,1),counts[EXTRACT256(array,1)])); - debug(printf("28 %04X => %d\n",EXTRACT256(array,2),counts[EXTRACT256(array,2)])); - debug(printf("12 %04X => %d\n",EXTRACT256(array,3),counts[EXTRACT256(array,3)])); - debug(printf("60 %04X => %d\n",EXTRACT256(array,4),counts[EXTRACT256(array,4)])); - debug(printf("44 %04X => %d\n",EXTRACT256(array,5),counts[EXTRACT256(array,5)])); - debug(printf("28 %04X => %d\n",EXTRACT256(array,6),counts[EXTRACT256(array,6)])); - debug(printf("12 %04X => %d\n",EXTRACT256(array,7),counts[EXTRACT256(array,7)])); - - array = _mm256_and_si256( _mm256_srli_epi32(oligo,4), bigmask8); - counts[EXTRACT256(array,0)] += 1; /* 123 */ - counts[EXTRACT256(array,1)] += 1; /* 107 */ - counts[EXTRACT256(array,2)] += 1; /* 91 */ - counts[EXTRACT256(array,3)] += 1; /* 75 */ - counts[EXTRACT256(array,4)] += 1; /* 59 */ - counts[EXTRACT256(array,5)] += 1; /* 43 */ - counts[EXTRACT256(array,6)] += 1; /* 27 */ - counts[EXTRACT256(array,7)] += 1; /* 11 */ -#ifdef CHECK_FOR_OVERFLOW - _counts_after = _mm256_i32gather_epi32(counts,array,/*scale*/4); - _counts_neg = _mm256_cmpgt_epi32(_counts_after,maxcount256); - if (_mm256_testz_si256(_counts_neg,_counts_neg) == 0) { - debug(printf("Exceeded maximum counts\n")); - if (EXTRACT256(_counts_neg,0)) { - inquery[EXTRACT256(array,0)] = INQUERY_FALSE; - } - if (EXTRACT256(_counts_neg,1)) { - inquery[EXTRACT256(array,1)] = INQUERY_FALSE; - } - if (EXTRACT256(_counts_neg,2)) { - inquery[EXTRACT256(array,2)] = INQUERY_FALSE; - } - if (EXTRACT256(_counts_neg,3)) { - inquery[EXTRACT256(array,3)] = INQUERY_FALSE; - } - if (EXTRACT256(_counts_neg,4)) { - inquery[EXTRACT256(array,4)] = INQUERY_FALSE; - } - if (EXTRACT256(_counts_neg,5)) { - inquery[EXTRACT256(array,5)] = INQUERY_FALSE; - } - if (EXTRACT256(_counts_neg,6)) { - inquery[EXTRACT256(array,6)] = INQUERY_FALSE; - } - if (EXTRACT256(_counts_neg,7)) { - inquery[EXTRACT256(array,7)] = INQUERY_FALSE; - } - } -#endif /* CHECK_FOR_OVERFLOW */ - debug(printf("59 %04X => %d\n",EXTRACT256(array,0),counts[EXTRACT256(array,0)])); - debug(printf("43 %04X => %d\n",EXTRACT256(array,1),counts[EXTRACT256(array,1)])); - debug(printf("27 %04X => %d\n",EXTRACT256(array,2),counts[EXTRACT256(array,2)])); - debug(printf("11 %04X => %d\n",EXTRACT256(array,3),counts[EXTRACT256(array,3)])); - debug(printf("59 %04X => %d\n",EXTRACT256(array,4),counts[EXTRACT256(array,4)])); - debug(printf("43 %04X => %d\n",EXTRACT256(array,5),counts[EXTRACT256(array,5)])); - debug(printf("27 %04X => %d\n",EXTRACT256(array,6),counts[EXTRACT256(array,6)])); - debug(printf("11 %04X => %d\n",EXTRACT256(array,7),counts[EXTRACT256(array,7)])); - - array = _mm256_and_si256( _mm256_srli_epi32(oligo,2), bigmask8); - counts[EXTRACT256(array,0)] += 1; /* 122 */ - counts[EXTRACT256(array,1)] += 1; /* 106 */ - counts[EXTRACT256(array,2)] += 1; /* 90 */ - counts[EXTRACT256(array,3)] += 1; /* 74 */ - counts[EXTRACT256(array,4)] += 1; /* 58 */ - counts[EXTRACT256(array,5)] += 1; /* 42 */ - counts[EXTRACT256(array,6)] += 1; /* 26 */ - counts[EXTRACT256(array,7)] += 1; /* 10 */ -#ifdef CHECK_FOR_OVERFLOW - _counts_after = _mm256_i32gather_epi32(counts,array,/*scale*/4); - _counts_neg = _mm256_cmpgt_epi32(_counts_after,maxcount256); - if (_mm256_testz_si256(_counts_neg,_counts_neg) == 0) { - debug(printf("Exceeded maximum counts\n")); - if (EXTRACT256(_counts_neg,0)) { - inquery[EXTRACT256(array,0)] = INQUERY_FALSE; - } - if (EXTRACT256(_counts_neg,1)) { - inquery[EXTRACT256(array,1)] = INQUERY_FALSE; - } - if (EXTRACT256(_counts_neg,2)) { - inquery[EXTRACT256(array,2)] = INQUERY_FALSE; - } - if (EXTRACT256(_counts_neg,3)) { - inquery[EXTRACT256(array,3)] = INQUERY_FALSE; - } - if (EXTRACT256(_counts_neg,4)) { - inquery[EXTRACT256(array,4)] = INQUERY_FALSE; - } - if (EXTRACT256(_counts_neg,5)) { - inquery[EXTRACT256(array,5)] = INQUERY_FALSE; - } - if (EXTRACT256(_counts_neg,6)) { - inquery[EXTRACT256(array,6)] = INQUERY_FALSE; - } - if (EXTRACT256(_counts_neg,7)) { - inquery[EXTRACT256(array,7)] = INQUERY_FALSE; - } - } -#endif /* CHECK_FOR_OVERFLOW */ - debug(printf("58 %04X => %d\n",EXTRACT256(array,0),counts[EXTRACT256(array,0)])); - debug(printf("42 %04X => %d\n",EXTRACT256(array,1),counts[EXTRACT256(array,1)])); - debug(printf("26 %04X => %d\n",EXTRACT256(array,2),counts[EXTRACT256(array,2)])); - debug(printf("10 %04X => %d\n",EXTRACT256(array,3),counts[EXTRACT256(array,3)])); - debug(printf("58 %04X => %d\n",EXTRACT256(array,4),counts[EXTRACT256(array,4)])); - debug(printf("42 %04X => %d\n",EXTRACT256(array,5),counts[EXTRACT256(array,5)])); - debug(printf("26 %04X => %d\n",EXTRACT256(array,6),counts[EXTRACT256(array,6)])); - debug(printf("10 %04X => %d\n",EXTRACT256(array,7),counts[EXTRACT256(array,7)])); - - array = _mm256_and_si256( oligo, bigmask8); - counts[EXTRACT256(array,0)] += 1; /* 121 */ - counts[EXTRACT256(array,1)] += 1; /* 105 */ - counts[EXTRACT256(array,2)] += 1; /* 89 */ - counts[EXTRACT256(array,3)] += 1; /* 73 */ - counts[EXTRACT256(array,4)] += 1; /* 57 */ - counts[EXTRACT256(array,5)] += 1; /* 41 */ - counts[EXTRACT256(array,6)] += 1; /* 25 */ - counts[EXTRACT256(array,7)] += 1; /* 9 */ -#ifdef CHECK_FOR_OVERFLOW - _counts_after = _mm256_i32gather_epi32(counts,array,/*scale*/4); - _counts_neg = _mm256_cmpgt_epi32(_counts_after,maxcount256); - if (_mm256_testz_si256(_counts_neg,_counts_neg) == 0) { - debug(printf("Exceeded maximum counts\n")); - if (EXTRACT256(_counts_neg,0)) { - inquery[EXTRACT256(array,0)] = INQUERY_FALSE; - } - if (EXTRACT256(_counts_neg,1)) { - inquery[EXTRACT256(array,1)] = INQUERY_FALSE; - } - if (EXTRACT256(_counts_neg,2)) { - inquery[EXTRACT256(array,2)] = INQUERY_FALSE; - } - if (EXTRACT256(_counts_neg,3)) { - inquery[EXTRACT256(array,3)] = INQUERY_FALSE; - } - if (EXTRACT256(_counts_neg,4)) { - inquery[EXTRACT256(array,4)] = INQUERY_FALSE; - } - if (EXTRACT256(_counts_neg,5)) { - inquery[EXTRACT256(array,5)] = INQUERY_FALSE; - } - if (EXTRACT256(_counts_neg,6)) { - inquery[EXTRACT256(array,6)] = INQUERY_FALSE; - } - if (EXTRACT256(_counts_neg,7)) { - inquery[EXTRACT256(array,7)] = INQUERY_FALSE; - } - } -#endif /* CHECK_FOR_OVERFLOW */ - debug(printf("57 %04X => %d\n",EXTRACT256(array,0),counts[EXTRACT256(array,0)])); - debug(printf("41 %04X => %d\n",EXTRACT256(array,1),counts[EXTRACT256(array,1)])); - debug(printf("25 %04X => %d\n",EXTRACT256(array,2),counts[EXTRACT256(array,2)])); - debug(printf("9 %04X => %d\n",EXTRACT256(array,3),counts[EXTRACT256(array,3)])); - debug(printf("57 %04X => %d\n",EXTRACT256(array,4),counts[EXTRACT256(array,4)])); - debug(printf("41 %04X => %d\n",EXTRACT256(array,5),counts[EXTRACT256(array,5)])); - debug(printf("25 %04X => %d\n",EXTRACT256(array,6),counts[EXTRACT256(array,6)])); - debug(printf("9 %04X => %d\n",EXTRACT256(array,7),counts[EXTRACT256(array,7)])); - - array = _mm256_srli_epi32(current,16); /* No mask necessary */; - counts[EXTRACT256(array,0)] += 1; /* 120 */ - counts[EXTRACT256(array,1)] += 1; /* 104 */ - counts[EXTRACT256(array,2)] += 1; /* 88 */ - counts[EXTRACT256(array,3)] += 1; /* 72 */ - counts[EXTRACT256(array,4)] += 1; /* 56 */ - counts[EXTRACT256(array,5)] += 1; /* 40 */ - counts[EXTRACT256(array,6)] += 1; /* 24 */ - counts[EXTRACT256(array,7)] += 1; /* 8 */ -#ifdef CHECK_FOR_OVERFLOW - _counts_after = _mm256_i32gather_epi32(counts,array,/*scale*/4); - _counts_neg = _mm256_cmpgt_epi32(_counts_after,maxcount256); - if (_mm256_testz_si256(_counts_neg,_counts_neg) == 0) { - debug(printf("Exceeded maximum counts\n")); - if (EXTRACT256(_counts_neg,0)) { - inquery[EXTRACT256(array,0)] = INQUERY_FALSE; - } - if (EXTRACT256(_counts_neg,1)) { - inquery[EXTRACT256(array,1)] = INQUERY_FALSE; - } - if (EXTRACT256(_counts_neg,2)) { - inquery[EXTRACT256(array,2)] = INQUERY_FALSE; - } - if (EXTRACT256(_counts_neg,3)) { - inquery[EXTRACT256(array,3)] = INQUERY_FALSE; - } - if (EXTRACT256(_counts_neg,4)) { - inquery[EXTRACT256(array,4)] = INQUERY_FALSE; - } - if (EXTRACT256(_counts_neg,5)) { - inquery[EXTRACT256(array,5)] = INQUERY_FALSE; - } - if (EXTRACT256(_counts_neg,6)) { - inquery[EXTRACT256(array,6)] = INQUERY_FALSE; - } - if (EXTRACT256(_counts_neg,7)) { - inquery[EXTRACT256(array,7)] = INQUERY_FALSE; - } - } -#endif /* CHECK_FOR_OVERFLOW */ - debug(printf("56 %04X => %d\n",EXTRACT256(array,0),counts[EXTRACT256(array,0)])); - debug(printf("40 %04X => %d\n",EXTRACT256(array,1),counts[EXTRACT256(array,1)])); - debug(printf("24 %04X => %d\n",EXTRACT256(array,2),counts[EXTRACT256(array,2)])); - debug(printf("8 %04X => %d\n",EXTRACT256(array,3),counts[EXTRACT256(array,3)])); - debug(printf("56 %04X => %d\n",EXTRACT256(array,4),counts[EXTRACT256(array,4)])); - debug(printf("40 %04X => %d\n",EXTRACT256(array,5),counts[EXTRACT256(array,5)])); - debug(printf("24 %04X => %d\n",EXTRACT256(array,6),counts[EXTRACT256(array,6)])); - debug(printf("8 %04X => %d\n",EXTRACT256(array,7),counts[EXTRACT256(array,7)])); - - array = _mm256_and_si256( _mm256_srli_epi32(current,14), bigmask8); - counts[EXTRACT256(array,0)] += 1; /* 119 */ - counts[EXTRACT256(array,1)] += 1; /* 103 */ - counts[EXTRACT256(array,2)] += 1; /* 87 */ - counts[EXTRACT256(array,3)] += 1; /* 71 */ - counts[EXTRACT256(array,4)] += 1; /* 55 */ - counts[EXTRACT256(array,5)] += 1; /* 39 */ - counts[EXTRACT256(array,6)] += 1; /* 23 */ - counts[EXTRACT256(array,7)] += 1; /* 7 */ -#ifdef CHECK_FOR_OVERFLOW - _counts_after = _mm256_i32gather_epi32(counts,array,/*scale*/4); - _counts_neg = _mm256_cmpgt_epi32(_counts_after,maxcount256); - if (_mm256_testz_si256(_counts_neg,_counts_neg) == 0) { - debug(printf("Exceeded maximum counts\n")); - if (EXTRACT256(_counts_neg,0)) { - inquery[EXTRACT256(array,0)] = INQUERY_FALSE; - } - if (EXTRACT256(_counts_neg,1)) { - inquery[EXTRACT256(array,1)] = INQUERY_FALSE; - } - if (EXTRACT256(_counts_neg,2)) { - inquery[EXTRACT256(array,2)] = INQUERY_FALSE; - } - if (EXTRACT256(_counts_neg,3)) { - inquery[EXTRACT256(array,3)] = INQUERY_FALSE; - } - if (EXTRACT256(_counts_neg,4)) { - inquery[EXTRACT256(array,4)] = INQUERY_FALSE; - } - if (EXTRACT256(_counts_neg,5)) { - inquery[EXTRACT256(array,5)] = INQUERY_FALSE; - } - if (EXTRACT256(_counts_neg,6)) { - inquery[EXTRACT256(array,6)] = INQUERY_FALSE; - } - if (EXTRACT256(_counts_neg,7)) { - inquery[EXTRACT256(array,7)] = INQUERY_FALSE; - } - } -#endif /* CHECK_FOR_OVERFLOW */ - debug(printf("55 %04X => %d\n",EXTRACT256(array,0),counts[EXTRACT256(array,0)])); - debug(printf("39 %04X => %d\n",EXTRACT256(array,1),counts[EXTRACT256(array,1)])); - debug(printf("23 %04X => %d\n",EXTRACT256(array,2),counts[EXTRACT256(array,2)])); - debug(printf("7 %04X => %d\n",EXTRACT256(array,3),counts[EXTRACT256(array,3)])); - debug(printf("55 %04X => %d\n",EXTRACT256(array,4),counts[EXTRACT256(array,4)])); - debug(printf("39 %04X => %d\n",EXTRACT256(array,5),counts[EXTRACT256(array,5)])); - debug(printf("23 %04X => %d\n",EXTRACT256(array,6),counts[EXTRACT256(array,6)])); - debug(printf("7 %04X => %d\n",EXTRACT256(array,7),counts[EXTRACT256(array,7)])); - - array = _mm256_and_si256( _mm256_srli_epi32(current,12), bigmask8); - counts[EXTRACT256(array,0)] += 1; /* 118 */ - counts[EXTRACT256(array,1)] += 1; /* 102 */ - counts[EXTRACT256(array,2)] += 1; /* 86 */ - counts[EXTRACT256(array,3)] += 1; /* 70 */ - counts[EXTRACT256(array,4)] += 1; /* 54 */ - counts[EXTRACT256(array,5)] += 1; /* 38 */ - counts[EXTRACT256(array,6)] += 1; /* 22 */ - counts[EXTRACT256(array,7)] += 1; /* 6 */ -#ifdef CHECK_FOR_OVERFLOW - _counts_after = _mm256_i32gather_epi32(counts,array,/*scale*/4); - _counts_neg = _mm256_cmpgt_epi32(_counts_after,maxcount256); - if (_mm256_testz_si256(_counts_neg,_counts_neg) == 0) { - debug(printf("Exceeded maximum counts\n")); - if (EXTRACT256(_counts_neg,0)) { - inquery[EXTRACT256(array,0)] = INQUERY_FALSE; - } - if (EXTRACT256(_counts_neg,1)) { - inquery[EXTRACT256(array,1)] = INQUERY_FALSE; - } - if (EXTRACT256(_counts_neg,2)) { - inquery[EXTRACT256(array,2)] = INQUERY_FALSE; - } - if (EXTRACT256(_counts_neg,3)) { - inquery[EXTRACT256(array,3)] = INQUERY_FALSE; - } - if (EXTRACT256(_counts_neg,4)) { - inquery[EXTRACT256(array,4)] = INQUERY_FALSE; - } - if (EXTRACT256(_counts_neg,5)) { - inquery[EXTRACT256(array,5)] = INQUERY_FALSE; - } - if (EXTRACT256(_counts_neg,6)) { - inquery[EXTRACT256(array,6)] = INQUERY_FALSE; - } - if (EXTRACT256(_counts_neg,7)) { - inquery[EXTRACT256(array,7)] = INQUERY_FALSE; - } - } -#endif /* CHECK_FOR_OVERFLOW */ - debug(printf("54 %04X => %d\n",EXTRACT256(array,0),counts[EXTRACT256(array,0)])); - debug(printf("38 %04X => %d\n",EXTRACT256(array,1),counts[EXTRACT256(array,1)])); - debug(printf("22 %04X => %d\n",EXTRACT256(array,2),counts[EXTRACT256(array,2)])); - debug(printf("6 %04X => %d\n",EXTRACT256(array,3),counts[EXTRACT256(array,3)])); - debug(printf("54 %04X => %d\n",EXTRACT256(array,4),counts[EXTRACT256(array,4)])); - debug(printf("38 %04X => %d\n",EXTRACT256(array,5),counts[EXTRACT256(array,5)])); - debug(printf("22 %04X => %d\n",EXTRACT256(array,6),counts[EXTRACT256(array,6)])); - debug(printf("6 %04X => %d\n",EXTRACT256(array,7),counts[EXTRACT256(array,7)])); - - array = _mm256_and_si256( _mm256_srli_epi32(current,10), bigmask8); - counts[EXTRACT256(array,0)] += 1; /* 117 */ - counts[EXTRACT256(array,1)] += 1; /* 101 */ - counts[EXTRACT256(array,2)] += 1; /* 85 */ - counts[EXTRACT256(array,3)] += 1; /* 69 */ - counts[EXTRACT256(array,4)] += 1; /* 53 */ - counts[EXTRACT256(array,5)] += 1; /* 37 */ - counts[EXTRACT256(array,6)] += 1; /* 21 */ - counts[EXTRACT256(array,7)] += 1; /* 5 */ -#ifdef CHECK_FOR_OVERFLOW - _counts_after = _mm256_i32gather_epi32(counts,array,/*scale*/4); - _counts_neg = _mm256_cmpgt_epi32(_counts_after,maxcount256); - if (_mm256_testz_si256(_counts_neg,_counts_neg) == 0) { - debug(printf("Exceeded maximum counts\n")); - if (EXTRACT256(_counts_neg,0)) { - inquery[EXTRACT256(array,0)] = INQUERY_FALSE; - } - if (EXTRACT256(_counts_neg,1)) { - inquery[EXTRACT256(array,1)] = INQUERY_FALSE; - } - if (EXTRACT256(_counts_neg,2)) { - inquery[EXTRACT256(array,2)] = INQUERY_FALSE; - } - if (EXTRACT256(_counts_neg,3)) { - inquery[EXTRACT256(array,3)] = INQUERY_FALSE; - } - if (EXTRACT256(_counts_neg,4)) { - inquery[EXTRACT256(array,4)] = INQUERY_FALSE; - } - if (EXTRACT256(_counts_neg,5)) { - inquery[EXTRACT256(array,5)] = INQUERY_FALSE; - } - if (EXTRACT256(_counts_neg,6)) { - inquery[EXTRACT256(array,6)] = INQUERY_FALSE; - } - if (EXTRACT256(_counts_neg,7)) { - inquery[EXTRACT256(array,7)] = INQUERY_FALSE; - } - } -#endif /* CHECK_FOR_OVERFLOW */ - debug(printf("53 %04X => %d\n",EXTRACT256(array,0),counts[EXTRACT256(array,0)])); - debug(printf("37 %04X => %d\n",EXTRACT256(array,1),counts[EXTRACT256(array,1)])); - debug(printf("21 %04X => %d\n",EXTRACT256(array,2),counts[EXTRACT256(array,2)])); - debug(printf("5 %04X => %d\n",EXTRACT256(array,3),counts[EXTRACT256(array,3)])); - debug(printf("53 %04X => %d\n",EXTRACT256(array,4),counts[EXTRACT256(array,4)])); - debug(printf("37 %04X => %d\n",EXTRACT256(array,5),counts[EXTRACT256(array,5)])); - debug(printf("21 %04X => %d\n",EXTRACT256(array,6),counts[EXTRACT256(array,6)])); - debug(printf("5 %04X => %d\n",EXTRACT256(array,7),counts[EXTRACT256(array,7)])); - - array = _mm256_and_si256( _mm256_srli_epi32(current,8), bigmask8); - counts[EXTRACT256(array,0)] += 1; /* 116 */ - counts[EXTRACT256(array,1)] += 1; /* 100 */ - counts[EXTRACT256(array,2)] += 1; /* 84 */ - counts[EXTRACT256(array,3)] += 1; /* 68 */ - counts[EXTRACT256(array,4)] += 1; /* 52 */ - counts[EXTRACT256(array,5)] += 1; /* 36 */ - counts[EXTRACT256(array,6)] += 1; /* 20 */ - counts[EXTRACT256(array,7)] += 1; /* 4 */ -#ifdef CHECK_FOR_OVERFLOW - _counts_after = _mm256_i32gather_epi32(counts,array,/*scale*/4); - _counts_neg = _mm256_cmpgt_epi32(_counts_after,maxcount256); - if (_mm256_testz_si256(_counts_neg,_counts_neg) == 0) { - debug(printf("Exceeded maximum counts\n")); - if (EXTRACT256(_counts_neg,0)) { - inquery[EXTRACT256(array,0)] = INQUERY_FALSE; - } - if (EXTRACT256(_counts_neg,1)) { - inquery[EXTRACT256(array,1)] = INQUERY_FALSE; - } - if (EXTRACT256(_counts_neg,2)) { - inquery[EXTRACT256(array,2)] = INQUERY_FALSE; - } - if (EXTRACT256(_counts_neg,3)) { - inquery[EXTRACT256(array,3)] = INQUERY_FALSE; - } - if (EXTRACT256(_counts_neg,4)) { - inquery[EXTRACT256(array,4)] = INQUERY_FALSE; - } - if (EXTRACT256(_counts_neg,5)) { - inquery[EXTRACT256(array,5)] = INQUERY_FALSE; - } - if (EXTRACT256(_counts_neg,6)) { - inquery[EXTRACT256(array,6)] = INQUERY_FALSE; - } - if (EXTRACT256(_counts_neg,7)) { - inquery[EXTRACT256(array,7)] = INQUERY_FALSE; - } - } -#endif /* CHECK_FOR_OVERFLOW */ - debug(printf("52 %04X => %d\n",EXTRACT256(array,0),counts[EXTRACT256(array,0)])); - debug(printf("36 %04X => %d\n",EXTRACT256(array,1),counts[EXTRACT256(array,1)])); - debug(printf("20 %04X => %d\n",EXTRACT256(array,2),counts[EXTRACT256(array,2)])); - debug(printf("4 %04X => %d\n",EXTRACT256(array,3),counts[EXTRACT256(array,3)])); - debug(printf("52 %04X => %d\n",EXTRACT256(array,4),counts[EXTRACT256(array,4)])); - debug(printf("36 %04X => %d\n",EXTRACT256(array,5),counts[EXTRACT256(array,5)])); - debug(printf("20 %04X => %d\n",EXTRACT256(array,6),counts[EXTRACT256(array,6)])); - debug(printf("4 %04X => %d\n",EXTRACT256(array,7),counts[EXTRACT256(array,7)])); - - array = _mm256_and_si256( _mm256_srli_epi32(current,6), bigmask8); - counts[EXTRACT256(array,0)] += 1; /* 115 */ - counts[EXTRACT256(array,1)] += 1; /* 99 */ - counts[EXTRACT256(array,2)] += 1; /* 83 */ - counts[EXTRACT256(array,3)] += 1; /* 67 */ - counts[EXTRACT256(array,4)] += 1; /* 51 */ - counts[EXTRACT256(array,5)] += 1; /* 35 */ - counts[EXTRACT256(array,6)] += 1; /* 19 */ - counts[EXTRACT256(array,7)] += 1; /* 3 */ -#ifdef CHECK_FOR_OVERFLOW - _counts_after = _mm256_i32gather_epi32(counts,array,/*scale*/4); - _counts_neg = _mm256_cmpgt_epi32(_counts_after,maxcount256); - if (_mm256_testz_si256(_counts_neg,_counts_neg) == 0) { - debug(printf("Exceeded maximum counts\n")); - if (EXTRACT256(_counts_neg,0)) { - inquery[EXTRACT256(array,0)] = INQUERY_FALSE; - } - if (EXTRACT256(_counts_neg,1)) { - inquery[EXTRACT256(array,1)] = INQUERY_FALSE; - } - if (EXTRACT256(_counts_neg,2)) { - inquery[EXTRACT256(array,2)] = INQUERY_FALSE; - } - if (EXTRACT256(_counts_neg,3)) { - inquery[EXTRACT256(array,3)] = INQUERY_FALSE; - } - if (EXTRACT256(_counts_neg,4)) { - inquery[EXTRACT256(array,4)] = INQUERY_FALSE; - } - if (EXTRACT256(_counts_neg,5)) { - inquery[EXTRACT256(array,5)] = INQUERY_FALSE; - } - if (EXTRACT256(_counts_neg,6)) { - inquery[EXTRACT256(array,6)] = INQUERY_FALSE; - } - if (EXTRACT256(_counts_neg,7)) { - inquery[EXTRACT256(array,7)] = INQUERY_FALSE; - } - } -#endif /* CHECK_FOR_OVERFLOW */ - debug(printf("51 %04X => %d\n",EXTRACT256(array,0),counts[EXTRACT256(array,0)])); - debug(printf("35 %04X => %d\n",EXTRACT256(array,1),counts[EXTRACT256(array,1)])); - debug(printf("19 %04X => %d\n",EXTRACT256(array,2),counts[EXTRACT256(array,2)])); - debug(printf("3 %04X => %d\n",EXTRACT256(array,3),counts[EXTRACT256(array,3)])); - debug(printf("51 %04X => %d\n",EXTRACT256(array,4),counts[EXTRACT256(array,4)])); - debug(printf("35 %04X => %d\n",EXTRACT256(array,5),counts[EXTRACT256(array,5)])); - debug(printf("19 %04X => %d\n",EXTRACT256(array,6),counts[EXTRACT256(array,6)])); - debug(printf("3 %04X => %d\n",EXTRACT256(array,7),counts[EXTRACT256(array,7)])); - - array = _mm256_and_si256( _mm256_srli_epi32(current,4), bigmask8); - counts[EXTRACT256(array,0)] += 1; /* 114 */ - counts[EXTRACT256(array,1)] += 1; /* 98 */ - counts[EXTRACT256(array,2)] += 1; /* 82 */ - counts[EXTRACT256(array,3)] += 1; /* 66 */ - counts[EXTRACT256(array,4)] += 1; /* 50 */ - counts[EXTRACT256(array,5)] += 1; /* 34 */ - counts[EXTRACT256(array,6)] += 1; /* 18 */ - counts[EXTRACT256(array,7)] += 1; /* 2 */ -#ifdef CHECK_FOR_OVERFLOW - _counts_after = _mm256_i32gather_epi32(counts,array,/*scale*/4); - _counts_neg = _mm256_cmpgt_epi32(_counts_after,maxcount256); - if (_mm256_testz_si256(_counts_neg,_counts_neg) == 0) { - debug(printf("Exceeded maximum counts\n")); - if (EXTRACT256(_counts_neg,0)) { - inquery[EXTRACT256(array,0)] = INQUERY_FALSE; - } - if (EXTRACT256(_counts_neg,1)) { - inquery[EXTRACT256(array,1)] = INQUERY_FALSE; - } - if (EXTRACT256(_counts_neg,2)) { - inquery[EXTRACT256(array,2)] = INQUERY_FALSE; - } - if (EXTRACT256(_counts_neg,3)) { - inquery[EXTRACT256(array,3)] = INQUERY_FALSE; - } - if (EXTRACT256(_counts_neg,4)) { - inquery[EXTRACT256(array,4)] = INQUERY_FALSE; - } - if (EXTRACT256(_counts_neg,5)) { - inquery[EXTRACT256(array,5)] = INQUERY_FALSE; - } - if (EXTRACT256(_counts_neg,6)) { - inquery[EXTRACT256(array,6)] = INQUERY_FALSE; - } - if (EXTRACT256(_counts_neg,7)) { - inquery[EXTRACT256(array,7)] = INQUERY_FALSE; - } - } -#endif /* CHECK_FOR_OVERFLOW */ - debug(printf("50 %04X => %d\n",EXTRACT256(array,0),counts[EXTRACT256(array,0)])); - debug(printf("34 %04X => %d\n",EXTRACT256(array,1),counts[EXTRACT256(array,1)])); - debug(printf("18 %04X => %d\n",EXTRACT256(array,2),counts[EXTRACT256(array,2)])); - debug(printf("2 %04X => %d\n",EXTRACT256(array,3),counts[EXTRACT256(array,3)])); - debug(printf("50 %04X => %d\n",EXTRACT256(array,4),counts[EXTRACT256(array,4)])); - debug(printf("34 %04X => %d\n",EXTRACT256(array,5),counts[EXTRACT256(array,5)])); - debug(printf("18 %04X => %d\n",EXTRACT256(array,6),counts[EXTRACT256(array,6)])); - debug(printf("2 %04X => %d\n",EXTRACT256(array,7),counts[EXTRACT256(array,7)])); - - array = _mm256_and_si256( _mm256_srli_epi32(current,2), bigmask8); - counts[EXTRACT256(array,0)] += 1; /* 113 */ - counts[EXTRACT256(array,1)] += 1; /* 97 */ - counts[EXTRACT256(array,2)] += 1; /* 81 */ - counts[EXTRACT256(array,3)] += 1; /* 65 */ - counts[EXTRACT256(array,4)] += 1; /* 49 */ - counts[EXTRACT256(array,5)] += 1; /* 33 */ - counts[EXTRACT256(array,6)] += 1; /* 17 */ - counts[EXTRACT256(array,7)] += 1; /* 1 */ -#ifdef CHECK_FOR_OVERFLOW - _counts_after = _mm256_i32gather_epi32(counts,array,/*scale*/4); - _counts_neg = _mm256_cmpgt_epi32(_counts_after,maxcount256); - if (_mm256_testz_si256(_counts_neg,_counts_neg) == 0) { - debug(printf("Exceeded maximum counts\n")); - if (EXTRACT256(_counts_neg,0)) { - inquery[EXTRACT256(array,0)] = INQUERY_FALSE; - } - if (EXTRACT256(_counts_neg,1)) { - inquery[EXTRACT256(array,1)] = INQUERY_FALSE; - } - if (EXTRACT256(_counts_neg,2)) { - inquery[EXTRACT256(array,2)] = INQUERY_FALSE; - } - if (EXTRACT256(_counts_neg,3)) { - inquery[EXTRACT256(array,3)] = INQUERY_FALSE; - } - if (EXTRACT256(_counts_neg,4)) { - inquery[EXTRACT256(array,4)] = INQUERY_FALSE; - } - if (EXTRACT256(_counts_neg,5)) { - inquery[EXTRACT256(array,5)] = INQUERY_FALSE; - } - if (EXTRACT256(_counts_neg,6)) { - inquery[EXTRACT256(array,6)] = INQUERY_FALSE; - } - if (EXTRACT256(_counts_neg,7)) { - inquery[EXTRACT256(array,7)] = INQUERY_FALSE; - } - } -#endif /* CHECK_FOR_OVERFLOW */ - debug(printf("49 %04X => %d\n",EXTRACT256(array,0),counts[EXTRACT256(array,0)])); - debug(printf("33 %04X => %d\n",EXTRACT256(array,1),counts[EXTRACT256(array,1)])); - debug(printf("17 %04X => %d\n",EXTRACT256(array,2),counts[EXTRACT256(array,2)])); - debug(printf("1 %04X => %d\n",EXTRACT256(array,3),counts[EXTRACT256(array,3)])); - debug(printf("49 %04X => %d\n",EXTRACT256(array,4),counts[EXTRACT256(array,4)])); - debug(printf("33 %04X => %d\n",EXTRACT256(array,5),counts[EXTRACT256(array,5)])); - debug(printf("17 %04X => %d\n",EXTRACT256(array,6),counts[EXTRACT256(array,6)])); - debug(printf("1 %04X => %d\n",EXTRACT256(array,7),counts[EXTRACT256(array,7)])); - - array = _mm256_and_si256( current, bigmask8); - counts[EXTRACT256(array,0)] += 1; /* 112 */ - counts[EXTRACT256(array,1)] += 1; /* 96 */ - counts[EXTRACT256(array,2)] += 1; /* 80 */ - counts[EXTRACT256(array,3)] += 1; /* 64 */ - counts[EXTRACT256(array,4)] += 1; /* 48 */ - counts[EXTRACT256(array,5)] += 1; /* 32 */ - counts[EXTRACT256(array,6)] += 1; /* 16 */ - counts[EXTRACT256(array,7)] += 1; /* 0 */ -#ifdef CHECK_FOR_OVERFLOW - _counts_after = _mm256_i32gather_epi32(counts,array,/*scale*/4); - _counts_neg = _mm256_cmpgt_epi32(_counts_after,maxcount256); - if (_mm256_testz_si256(_counts_neg,_counts_neg) == 0) { - debug(printf("Exceeded maximum counts\n")); - if (EXTRACT256(_counts_neg,0)) { - inquery[EXTRACT256(array,0)] = INQUERY_FALSE; - } - if (EXTRACT256(_counts_neg,1)) { - inquery[EXTRACT256(array,1)] = INQUERY_FALSE; - } - if (EXTRACT256(_counts_neg,2)) { - inquery[EXTRACT256(array,2)] = INQUERY_FALSE; - } - if (EXTRACT256(_counts_neg,3)) { - inquery[EXTRACT256(array,3)] = INQUERY_FALSE; - } - if (EXTRACT256(_counts_neg,4)) { - inquery[EXTRACT256(array,4)] = INQUERY_FALSE; - } - if (EXTRACT256(_counts_neg,5)) { - inquery[EXTRACT256(array,5)] = INQUERY_FALSE; - } - if (EXTRACT256(_counts_neg,6)) { - inquery[EXTRACT256(array,6)] = INQUERY_FALSE; - } - if (EXTRACT256(_counts_neg,7)) { - inquery[EXTRACT256(array,7)] = INQUERY_FALSE; - } - } -#endif /* CHECK_FOR_OVERFLOW */ - debug(printf("48 %04X => %d\n",EXTRACT256(array,0),counts[EXTRACT256(array,0)])); - debug(printf("32 %04X => %d\n",EXTRACT256(array,1),counts[EXTRACT256(array,1)])); - debug(printf("16 %04X => %d\n",EXTRACT256(array,2),counts[EXTRACT256(array,2)])); - debug(printf("0 %04X => %d\n",EXTRACT256(array,3),counts[EXTRACT256(array,3)])); - debug(printf("48 %04X => %d\n",EXTRACT256(array,4),counts[EXTRACT256(array,4)])); - debug(printf("32 %04X => %d\n",EXTRACT256(array,5),counts[EXTRACT256(array,5)])); - debug(printf("16 %04X => %d\n",EXTRACT256(array,6),counts[EXTRACT256(array,6)])); - debug(printf("0 %04X => %d\n",EXTRACT256(array,7),counts[EXTRACT256(array,7)])); +#else +/* Includes extract_8mers_rev_simd_256_ordered (__m512i *out, __m512i current, __m512i next) */ +static Chrpos_T +store_8mers_rev_simd_256 (Chrpos_T chrpos, Chrpos_T *table, UINT4 *positions, Count_T *counts, + __m512i current, __m512i next) { + __m512i array[16], *out; + __m512i oligo, _shuffle0, _shuffle1, _shuffle2; + __m512i _t0, _t1, _t2, _t3, _t4, _t5, _t6, _t7; + __m512i _u0, _u1, _u2, _u3, _u4, _u5, _u6, _u7; + + out = &(array[0]); + + _u0 = _mm512_and_si512( current, hugemask8); + /* _row1 = _mm512_and_si512( _mm512_srli_epi32(current,2), hugemask8); */ + _u1 = _mm512_and_si512( _mm512_slli_epi32(current,14), highmask8); + _t0 = _mm512_or_si512(_u0, _u1); + + _u0 = _mm512_and_si512( _mm512_srli_epi32(current,4), hugemask8); + /* _row3 = _mm512_and_si512( _mm512_srli_epi32(current,6), hugemask8); */ + _u1 = _mm512_and_si512( _mm512_slli_epi32(current,10), highmask8); + _t1 = _mm512_or_si512(_u0, _u1); + + _u0 = _mm512_and_si512( _mm512_srli_epi32(current,8), hugemask8); + /* _row5 = _mm512_and_si512( _mm512_srli_epi32(current,10), hugemask8); */ + _u1 = _mm512_and_si512( _mm512_slli_epi32(current,6), highmask8); + _t2 = _mm512_or_si512(_u0, _u1); + + _u0 = _mm512_and_si512( _mm512_srli_epi32(current,12), hugemask8); + /* _row7 = _mm512_and_si512( _mm512_srli_epi32(current,14), hugemask8); */ + _u1 = _mm512_and_si512( _mm512_slli_epi32(current,2), highmask8); + _t3 = _mm512_or_si512(_u0, _u1); + + + oligo = _mm512_or_si512( _mm512_srli_epi32(current,18), _mm512_slli_epi32(next,14)); + _u0 = _mm512_srli_epi32(current,16); /* No mask necessary */; + /* _row9 = _mm512_and_si512( oligo, hugemask8); */ + _u1 = _mm512_and_si512( _mm512_slli_epi32(oligo,16), highmask8); + _t4 = _mm512_or_si512(_u0, _u1); + + _u0 = _mm512_and_si512( _mm512_srli_epi32(oligo,2), hugemask8); + /* _row11 = _mm512_and_si512( _mm512_srli_epi32(oligo,4), hugemask8); */ + _u1 = _mm512_and_si512( _mm512_slli_epi32(oligo,12), highmask8); + _t5 = _mm512_or_si512(_u0, _u1); + + _u0 = _mm512_and_si512( _mm512_srli_epi32(oligo,6), hugemask8); + /* _row13 = _mm512_and_si512( _mm512_srli_epi32(oligo,8), hugemask8); */ + _u1 = _mm512_and_si512( _mm512_slli_epi32(oligo,8), highmask8); + _t6 = _mm512_or_si512(_u0, _u1); + + _u0 = _mm512_and_si512( _mm512_srli_epi32(oligo,10), hugemask8); + /* _row15 = _mm512_and_si512( _mm512_srli_epi32(oligo,12), hugemask8); */ + _u1 = _mm512_and_si512( _mm512_slli_epi32(oligo,4), highmask8); + _t7 = _mm512_or_si512(_u0, _u1); + + + _u0 = _mm512_unpackhi_epi32(_t0,_t1); + _u1 = _mm512_unpackhi_epi32(_t2,_t3); + _u2 = _mm512_unpackhi_epi32(_t4,_t5); + _u3 = _mm512_unpackhi_epi32(_t6,_t7); + _u4 = _mm512_unpacklo_epi32(_t0,_t1); + _u5 = _mm512_unpacklo_epi32(_t2,_t3); + _u6 = _mm512_unpacklo_epi32(_t4,_t5); + _u7 = _mm512_unpacklo_epi32(_t6,_t7); + + + /* Split: top half */ + _shuffle0 = _mm512_setr_epi64(7, 8+7, 6, 8+6, 5, 8+5, 4, 8+4); + _t0 = _mm512_permutex2var_epi64(_u0, _shuffle0, _u1); + _t1 = _mm512_permutex2var_epi64(_u2, _shuffle0, _u3); + _t2 = _mm512_permutex2var_epi64(_u4, _shuffle0, _u5); + _t3 = _mm512_permutex2var_epi64(_u6, _shuffle0, _u7); + + + _shuffle1 = _mm512_setr_epi64(0, 1, 8+0, 8+1, 2, 3, 8+2, 8+3); + _t7 = _mm512_permutex2var_epi64(_t0, _shuffle1, _t1); + _mm512_store_si512(out++, _mm512_cvtepu16_epi32(_mm512_extracti64x4_epi64(_t7,0))); + _mm512_store_si512(out++, _mm512_cvtepu16_epi32(_mm512_extracti64x4_epi64(_t7,1))); + + _t7 = _mm512_permutex2var_epi64(_t2, _shuffle1, _t3); + _mm512_store_si512(out++, _mm512_cvtepu16_epi32(_mm512_extracti64x4_epi64(_t7,0))); + _mm512_store_si512(out++, _mm512_cvtepu16_epi32(_mm512_extracti64x4_epi64(_t7,1))); + + _shuffle2 = _mm512_setr_epi64(4, 5, 8+4, 8+5, 6, 7, 8+6, 8+7); + _t7 = _mm512_permutex2var_epi64(_t0, _shuffle2, _t1); + _mm512_store_si512(out++, _mm512_cvtepu16_epi32(_mm512_extracti64x4_epi64(_t7,0))); + _mm512_store_si512(out++, _mm512_cvtepu16_epi32(_mm512_extracti64x4_epi64(_t7,1))); + + _t7 = _mm512_permutex2var_epi64(_t2, _shuffle2, _t3); + _mm512_store_si512(out++, _mm512_cvtepu16_epi32(_mm512_extracti64x4_epi64(_t7,0))); + _mm512_store_si512(out++, _mm512_cvtepu16_epi32(_mm512_extracti64x4_epi64(_t7,1))); + + + /* Split: bottom half */ + _shuffle0 = _mm512_setr_epi64(3, 8+3, 2, 8+2, 1, 8+1, 0, 8+0); + _t0 = _mm512_permutex2var_epi64(_u0, _shuffle0, _u1); + _t1 = _mm512_permutex2var_epi64(_u2, _shuffle0, _u3); + _t2 = _mm512_permutex2var_epi64(_u4, _shuffle0, _u5); + _t3 = _mm512_permutex2var_epi64(_u6, _shuffle0, _u7); + + + /* _shuffle1 = _mm512_setr_epi64(0, 1, 8+0, 8+1, 2, 3, 8+2, 8+3); */ + _t7 = _mm512_permutex2var_epi64(_t0, _shuffle1, _t1); + _mm512_store_si512(out++, _mm512_cvtepu16_epi32(_mm512_extracti64x4_epi64(_t7,0))); + _mm512_store_si512(out++, _mm512_cvtepu16_epi32(_mm512_extracti64x4_epi64(_t7,1))); + + _t7 = _mm512_permutex2var_epi64(_t2, _shuffle1, _t3); + _mm512_store_si512(out++, _mm512_cvtepu16_epi32(_mm512_extracti64x4_epi64(_t7,0))); + _mm512_store_si512(out++, _mm512_cvtepu16_epi32(_mm512_extracti64x4_epi64(_t7,1))); + + /* _shuffle2 = _mm512_setr_epi64(4, 5, 8+4, 8+5, 6, 7, 8+6, 8+7); */ + _t7 = _mm512_permutex2var_epi64(_t0, _shuffle2, _t1); + _mm512_store_si512(out++, _mm512_cvtepu16_epi32(_mm512_extracti64x4_epi64(_t7,0))); + _mm512_store_si512(out++, _mm512_cvtepu16_epi32(_mm512_extracti64x4_epi64(_t7,1))); + + _t7 = _mm512_permutex2var_epi64(_t2, _shuffle2, _t3); + _mm512_store_si512(out++, _mm512_cvtepu16_epi32(_mm512_extracti64x4_epi64(_t7,0))); + _mm512_store_si512(out++, _mm512_cvtepu16_epi32(_mm512_extracti64x4_epi64(_t7,1))); - return; + return store_fwdrev_simd_256_ordered(chrpos,table,positions,counts,(UINT4 *) array); } #endif +#endif #if !defined(HAVE_AVX2) static int -store_8mers_rev (Chrpos_T chrpos, Chrpos_T *table, UINT4 *pointers, UINT4 *positions, Count_T *counts, - Genomecomp_T low_rc, Genomecomp_T high_rc, Genomecomp_T nextlow_rc) { +store_8mers_rev_32 (Chrpos_T chrpos, Chrpos_T *table, UINT4 *positions, Count_T *counts, + Genomecomp_T low_rc, Genomecomp_T high_rc, Genomecomp_T nextlow_rc) { Genomecomp_T masked, oligo; #ifdef INDIVIDUAL_SHIFTS #elif defined(SIMD_MASK_THEN_STORE) @@ -36365,65 +24624,56 @@ #ifdef INDIVIDUAL_SHIFTS masked = low_rc & MASK8; /* 0 */ if (counts[masked]) { - assert(pointers[masked] > positions[masked]); debug(printf("Storing masked %u at %u\n",masked,chrpos)); - table[--pointers[masked]] = chrpos; + table[positions[masked] + (--counts[masked])] = chrpos; } masked = (low_rc >> 2) & MASK8; /* 1 */ if (counts[masked]) { - assert(pointers[masked] > positions[masked]); debug(printf("Storing masked %u at %u\n",masked,chrpos - 1)); - table[--pointers[masked]] = chrpos - 1; + table[positions[masked] + (--counts[masked])] = chrpos - 1; } masked = (low_rc >> 4) & MASK8; /* 2 */ if (counts[masked]) { - assert(pointers[masked] > positions[masked]); debug(printf("Storing masked %u at %u\n",masked,chrpos - 2)); - table[--pointers[masked]] = chrpos - 2; + table[positions[masked] + (--counts[masked])] = chrpos - 2; } masked = (low_rc >> 6) & MASK8; /* 3 */ if (counts[masked]) { - assert(pointers[masked] > positions[masked]); debug(printf("Storing masked %u at %u\n",masked,chrpos - 3)); - table[--pointers[masked]] = chrpos - 3; + table[positions[masked] + (--counts[masked])] = chrpos - 3; } masked = (low_rc >> 8) & MASK8; /* 4 */ if (counts[masked]) { - assert(pointers[masked] > positions[masked]); debug(printf("Storing masked %u at %u\n",masked,chrpos - 4)); - table[--pointers[masked]] = chrpos - 4; + table[positions[masked] + (--counts[masked])] = chrpos - 4; } masked = (low_rc >> 10) & MASK8; /* 5 */ if (counts[masked]) { - assert(pointers[masked] > positions[masked]); debug(printf("Storing masked %u at %u\n",masked,chrpos - 5)); - table[--pointers[masked]] = chrpos - 5; + table[positions[masked] + (--counts[masked])] = chrpos - 5; } masked = (low_rc >> 12) & MASK8; /* 6 */ if (counts[masked]) { - assert(pointers[masked] > positions[masked]); debug(printf("Storing masked %u at %u\n",masked,chrpos - 6)); - table[--pointers[masked]] = chrpos - 6; + table[positions[masked] + (--counts[masked])] = chrpos - 6; } masked = (low_rc >> 14) & MASK8; /* 7 */ if (counts[masked]) { - assert(pointers[masked] > positions[masked]); debug(printf("Storing masked %u at %u\n",masked,chrpos - 7)); - table[--pointers[masked]] = chrpos - 7; + table[positions[masked] + (--counts[masked])] = chrpos - 7; } masked = low_rc >> 16; /* 8, No mask necessary */ if (counts[masked]) { - assert(pointers[masked] > positions[masked]); debug(printf("Storing masked %u at %u\n",masked,chrpos - 8)); - table[--pointers[masked]] = chrpos - 8; + table[positions[masked] + (--counts[masked])] = chrpos - 8; } #else @@ -36437,33 +24687,29 @@ masked = EXTRACT(_masked,0); assert(masked == (low_rc & MASK8)); if (counts[masked]) { - assert(pointers[masked] > positions[masked]); debug(printf("Storing masked %u at %u\n",masked,chrpos)); - table[--pointers[masked]] = chrpos; + table[positions[masked] + (--counts[masked])] = chrpos; } masked = EXTRACT(_masked,1); assert(masked == ((low_rc >> 2) & MASK8)); if (counts[masked]) { - assert(pointers[masked] > positions[masked]); debug(printf("Storing masked %u at %u\n",masked,chrpos - 1)); - table[--pointers[masked]] = chrpos - 1; + table[positions[masked] + (--counts[masked])] = chrpos - 1; } masked = EXTRACT(_masked,2); assert(masked == ((low_rc >> 4) & MASK8)); if (counts[masked]) { - assert(pointers[masked] > positions[masked]); debug(printf("Storing masked %u at %u\n",masked,chrpos - 2)); - table[--pointers[masked]] = chrpos - 2; + table[positions[masked] + (--counts[masked])] = chrpos - 2; } masked = EXTRACT(_masked,3); assert(masked == ((low_rc >> 6) & MASK8)); if (counts[masked]) { - assert(pointers[masked] > positions[masked]); debug(printf("Storing masked %u at %u\n",masked,chrpos - 3)); - table[--pointers[masked]] = chrpos - 3; + table[positions[masked] + (--counts[masked])] = chrpos - 3; } @@ -36477,41 +24723,36 @@ masked = EXTRACT(_masked,0); assert(masked == ((low_rc >> 8) & MASK8)); if (counts[masked]) { - assert(pointers[masked] > positions[masked]); debug(printf("Storing masked %u at %u\n",masked,chrpos - 4)); - table[--pointers[masked]] = chrpos - 4; + table[positions[masked] + (--counts[masked])] = chrpos - 4; } masked = EXTRACT(_masked,1); assert(masked == ((low_rc >> 10) & MASK8)); if (counts[masked]) { - assert(pointers[masked] > positions[masked]); debug(printf("Storing masked %u at %u\n",masked,chrpos - 5)); - table[--pointers[masked]] = chrpos - 5; + table[positions[masked] + (--counts[masked])] = chrpos - 5; } masked = EXTRACT(_masked,2); assert(masked == ((low_rc >> 12) & MASK8)); if (counts[masked]) { - assert(pointers[masked] > positions[masked]); debug(printf("Storing masked %u at %u\n",masked,chrpos - 6)); - table[--pointers[masked]] = chrpos - 6; + table[positions[masked] + (--counts[masked])] = chrpos - 6; } masked = EXTRACT(_masked,3); assert(masked == ((low_rc >> 14) & MASK8)); if (counts[masked]) { - assert(pointers[masked] > positions[masked]); debug(printf("Storing masked %u at %u\n",masked,chrpos - 7)); - table[--pointers[masked]] = chrpos - 7; + table[positions[masked] + (--counts[masked])] = chrpos - 7; } masked = low_rc >> 16; /* 8, No mask necessary */ if (counts[masked]) { - assert(pointers[masked] > positions[masked]); debug(printf("Storing masked %u at %u\n",masked,chrpos - 8)); - table[--pointers[masked]] = chrpos - 8; + table[positions[masked] + (--counts[masked])] = chrpos - 8; } #endif @@ -36522,51 +24763,44 @@ #ifdef INDIVIDUAL_SHIFTS masked = oligo & MASK8; /* 9 */ if (counts[masked]) { - assert(pointers[masked] > positions[masked]); debug(printf("Storing masked %u at %u\n",masked,chrpos - 9)); - table[--pointers[masked]] = chrpos - 9; + table[positions[masked] + (--counts[masked])] = chrpos - 9; } masked = (oligo >> 2) & MASK8; /* 10 */ if (counts[masked]) { - assert(pointers[masked] > positions[masked]); debug(printf("Storing masked %u at %u\n",masked,chrpos - 10)); - table[--pointers[masked]] = chrpos - 10; + table[positions[masked] + (--counts[masked])] = chrpos - 10; } masked = (oligo >> 4) & MASK8; /* 11 */ if (counts[masked]) { - assert(pointers[masked] > positions[masked]); debug(printf("Storing masked %u at %u\n",masked,chrpos - 11)); - table[--pointers[masked]] = chrpos - 11; + table[positions[masked] + (--counts[masked])] = chrpos - 11; } masked = (oligo >> 6) & MASK8; /* 12 */ if (counts[masked]) { - assert(pointers[masked] > positions[masked]); debug(printf("Storing masked %u at %u\n",masked,chrpos - 12)); - table[--pointers[masked]] = chrpos - 12; + table[positions[masked] + (--counts[masked])] = chrpos - 12; } masked = (oligo >> 8) & MASK8; /* 13 */ if (counts[masked]) { - assert(pointers[masked] > positions[masked]); debug(printf("Storing masked %u at %u\n",masked,chrpos - 13)); - table[--pointers[masked]] = chrpos - 13; + table[positions[masked] + (--counts[masked])] = chrpos - 13; } masked = (oligo >> 10) & MASK8; /* 14 */ if (counts[masked]) { - assert(pointers[masked] > positions[masked]); debug(printf("Storing masked %u at %u\n",masked,chrpos - 14)); - table[--pointers[masked]] = chrpos - 14; + table[positions[masked] + (--counts[masked])] = chrpos - 14; } masked = (oligo >> 12) & MASK8; /* 15 */ if (counts[masked]) { - assert(pointers[masked] > positions[masked]); debug(printf("Storing masked %u at %u\n",masked,chrpos - 15)); - table[--pointers[masked]] = chrpos - 15; + table[positions[masked] + (--counts[masked])] = chrpos - 15; } #else @@ -36580,33 +24814,29 @@ masked = EXTRACT(_masked,0); assert(masked == (oligo & MASK8)); if (counts[masked]) { - assert(pointers[masked] > positions[masked]); debug(printf("Storing masked %u at %u\n",masked,chrpos - 9)); - table[--pointers[masked]] = chrpos - 9; + table[positions[masked] + (--counts[masked])] = chrpos - 9; } masked = EXTRACT(_masked,1); assert(masked == ((oligo >> 2) & MASK8)); if (counts[masked]) { - assert(pointers[masked] > positions[masked]); debug(printf("Storing masked %u at %u\n",masked,chrpos - 10)); - table[--pointers[masked]] = chrpos - 10; + table[positions[masked] + (--counts[masked])] = chrpos - 10; } masked = EXTRACT(_masked,2); assert(masked == ((oligo >> 4) & MASK8)); if (counts[masked]) { - assert(pointers[masked] > positions[masked]); debug(printf("Storing masked %u at %u\n",masked,chrpos - 11)); - table[--pointers[masked]] = chrpos - 11; + table[positions[masked] + (--counts[masked])] = chrpos - 11; } masked = EXTRACT(_masked,3); assert(masked == ((oligo >> 6) & MASK8)); if (counts[masked]) { - assert(pointers[masked] > positions[masked]); debug(printf("Storing masked %u at %u\n",masked,chrpos - 12)); - table[--pointers[masked]] = chrpos - 12; + table[positions[masked] + (--counts[masked])] = chrpos - 12; } @@ -36620,25 +24850,22 @@ masked = EXTRACT(_masked,0); assert(masked == ((oligo >> 8) & MASK8)); if (counts[masked]) { - assert(pointers[masked] > positions[masked]); debug(printf("Storing masked %u at %u\n",masked,chrpos - 13)); - table[--pointers[masked]] = chrpos - 13; + table[positions[masked] + (--counts[masked])] = chrpos - 13; } masked = EXTRACT(_masked,1); assert(masked == ((oligo >> 10) & MASK8)); if (counts[masked]) { - assert(pointers[masked] > positions[masked]); debug(printf("Storing masked %u at %u\n",masked,chrpos - 14)); - table[--pointers[masked]] = chrpos - 14; + table[positions[masked] + (--counts[masked])] = chrpos - 14; } masked = EXTRACT(_masked,2); assert(masked == ((oligo >> 12) & MASK8)); if (counts[masked]) { - assert(pointers[masked] > positions[masked]); debug(printf("Storing masked %u at %u\n",masked,chrpos - 15)); - table[--pointers[masked]] = chrpos - 15; + table[positions[masked] + (--counts[masked])] = chrpos - 15; } #endif @@ -36646,65 +24873,56 @@ #ifdef INDIVIDUAL_SHIFTS masked = high_rc & MASK8; /* 16 */ if (counts[masked]) { - assert(pointers[masked] > positions[masked]); debug(printf("Storing masked %u at %u\n",masked,chrpos - 16)); - table[--pointers[masked]] = chrpos - 16; + table[positions[masked] + (--counts[masked])] = chrpos - 16; } masked = (high_rc >> 2) & MASK8; /* 17 */ if (counts[masked]) { - assert(pointers[masked] > positions[masked]); debug(printf("Storing masked %u at %u\n",masked,chrpos - 17)); - table[--pointers[masked]] = chrpos - 17; + table[positions[masked] + (--counts[masked])] = chrpos - 17; } masked = (high_rc >> 4) & MASK8; /* 18 */ if (counts[masked]) { - assert(pointers[masked] > positions[masked]); debug(printf("Storing masked %u at %u\n",masked,chrpos - 18)); - table[--pointers[masked]] = chrpos - 18; + table[positions[masked] + (--counts[masked])] = chrpos - 18; } masked = (high_rc >> 6) & MASK8; /* 19 */ if (counts[masked]) { - assert(pointers[masked] > positions[masked]); debug(printf("Storing masked %u at %u\n",masked,chrpos - 19)); - table[--pointers[masked]] = chrpos - 19; + table[positions[masked] + (--counts[masked])] = chrpos - 19; } masked = (high_rc >> 8) & MASK8; /* 20 */ if (counts[masked]) { - assert(pointers[masked] > positions[masked]); debug(printf("Storing masked %u at %u\n",masked,chrpos - 20)); - table[--pointers[masked]] = chrpos - 20; + table[positions[masked] + (--counts[masked])] = chrpos - 20; } masked = (high_rc >> 10) & MASK8; /* 21 */ if (counts[masked]) { - assert(pointers[masked] > positions[masked]); debug(printf("Storing masked %u at %u\n",masked,chrpos - 21)); - table[--pointers[masked]] = chrpos - 21; + table[positions[masked] + (--counts[masked])] = chrpos - 21; } masked = (high_rc >> 12) & MASK8; /* 22 */ if (counts[masked]) { - assert(pointers[masked] > positions[masked]); debug(printf("Storing masked %u at %u\n",masked,chrpos - 22)); - table[--pointers[masked]] = chrpos - 22; + table[positions[masked] + (--counts[masked])] = chrpos - 22; } masked = (high_rc >> 14) & MASK8; /* 23 */ if (counts[masked]) { - assert(pointers[masked] > positions[masked]); debug(printf("Storing masked %u at %u\n",masked,chrpos - 23)); - table[--pointers[masked]] = chrpos - 23; + table[positions[masked] + (--counts[masked])] = chrpos - 23; } masked = high_rc >> 16; /* 24, No mask necessary */ if (counts[masked]) { - assert(pointers[masked] > positions[masked]); debug(printf("Storing masked %u at %u\n",masked,chrpos - 24)); - table[--pointers[masked]] = chrpos - 24; + table[positions[masked] + (--counts[masked])] = chrpos - 24; } #else @@ -36718,33 +24936,29 @@ masked = EXTRACT(_masked,0); assert(masked == (high_rc & MASK8)); if (counts[masked]) { - assert(pointers[masked] > positions[masked]); debug(printf("Storing masked %u at %u\n",masked,chrpos - 16)); - table[--pointers[masked]] = chrpos - 16; + table[positions[masked] + (--counts[masked])] = chrpos - 16; } masked = EXTRACT(_masked,1); assert(masked == ((high_rc >> 2) & MASK8)); if (counts[masked]) { - assert(pointers[masked] > positions[masked]); debug(printf("Storing masked %u at %u\n",masked,chrpos - 17)); - table[--pointers[masked]] = chrpos - 17; + table[positions[masked] + (--counts[masked])] = chrpos - 17; } masked = EXTRACT(_masked,2); assert(masked == ((high_rc >> 4) & MASK8)); if (counts[masked]) { - assert(pointers[masked] > positions[masked]); debug(printf("Storing masked %u at %u\n",masked,chrpos - 18)); - table[--pointers[masked]] = chrpos - 18; + table[positions[masked] + (--counts[masked])] = chrpos - 18; } masked = EXTRACT(_masked,3); assert(masked == ((high_rc >> 6) & MASK8)); if (counts[masked]) { - assert(pointers[masked] > positions[masked]); debug(printf("Storing masked %u at %u\n",masked,chrpos - 19)); - table[--pointers[masked]] = chrpos - 19; + table[positions[masked] + (--counts[masked])] = chrpos - 19; } @@ -36758,41 +24972,36 @@ masked = EXTRACT(_masked,0); assert(masked == ((high_rc >> 8) & MASK8)); if (counts[masked]) { - assert(pointers[masked] > positions[masked]); debug(printf("Storing masked %u at %u\n",masked,chrpos - 20)); - table[--pointers[masked]] = chrpos - 20; + table[positions[masked] + (--counts[masked])] = chrpos - 20; } masked = EXTRACT(_masked,1); assert(masked == ((high_rc >> 10) & MASK8)); if (counts[masked]) { - assert(pointers[masked] > positions[masked]); debug(printf("Storing masked %u at %u\n",masked,chrpos - 21)); - table[--pointers[masked]] = chrpos - 21; + table[positions[masked] + (--counts[masked])] = chrpos - 21; } masked = EXTRACT(_masked,2); assert(masked == ((high_rc >> 12) & MASK8)); if (counts[masked]) { - assert(pointers[masked] > positions[masked]); debug(printf("Storing masked %u at %u\n",masked,chrpos - 22)); - table[--pointers[masked]] = chrpos - 22; + table[positions[masked] + (--counts[masked])] = chrpos - 22; } masked = EXTRACT(_masked,3); assert(masked == ((high_rc >> 14) & MASK8)); if (counts[masked]) { - assert(pointers[masked] > positions[masked]); debug(printf("Storing masked %u at %u\n",masked,chrpos - 23)); - table[--pointers[masked]] = chrpos - 23; + table[positions[masked] + (--counts[masked])] = chrpos - 23; } masked = high_rc >> 16; /* 24, No mask necessary */ if (counts[masked]) { - assert(pointers[masked] > positions[masked]); debug(printf("Storing masked %u at %u\n",masked,chrpos - 24)); - table[--pointers[masked]] = chrpos - 24; + table[positions[masked] + (--counts[masked])] = chrpos - 24; } #endif @@ -36803,51 +25012,44 @@ #ifdef INDIVIDUAL_SHIFTS masked = oligo & MASK8; /* 25 */ if (counts[masked]) { - assert(pointers[masked] > positions[masked]); debug(printf("Storing masked %u at %u\n",masked,chrpos - 25)); - table[--pointers[masked]] = chrpos - 25; + table[positions[masked] + (--counts[masked])] = chrpos - 25; } masked = (oligo >> 2) & MASK8; /* 26 */ if (counts[masked]) { - assert(pointers[masked] > positions[masked]); debug(printf("Storing masked %u at %u\n",masked,chrpos - 26)); - table[--pointers[masked]] = chrpos - 26; + table[positions[masked] + (--counts[masked])] = chrpos - 26; } masked = (oligo >> 4) & MASK8; /* 27 */ if (counts[masked]) { - assert(pointers[masked] > positions[masked]); debug(printf("Storing masked %u at %u\n",masked,chrpos - 27)); - table[--pointers[masked]] = chrpos - 27; + table[positions[masked] + (--counts[masked])] = chrpos - 27; } masked = (oligo >> 6) & MASK8; /* 28 */ if (counts[masked]) { - assert(pointers[masked] > positions[masked]); debug(printf("Storing masked %u at %u\n",masked,chrpos - 28)); - table[--pointers[masked]] = chrpos - 28; + table[positions[masked] + (--counts[masked])] = chrpos - 28; } masked = (oligo >> 8) & MASK8; /* 29 */ if (counts[masked]) { - assert(pointers[masked] > positions[masked]); debug(printf("Storing masked %u at %u\n",masked,chrpos - 29)); - table[--pointers[masked]] = chrpos - 29; + table[positions[masked] + (--counts[masked])] = chrpos - 29; } masked = (oligo >> 10) & MASK8; /* 30 */ if (counts[masked]) { - assert(pointers[masked] > positions[masked]); debug(printf("Storing masked %u at %u\n",masked,chrpos - 30)); - table[--pointers[masked]] = chrpos - 30; + table[positions[masked] + (--counts[masked])] = chrpos - 30; } masked = (oligo >> 12) & MASK8; /* 31 */ if (counts[masked]) { - assert(pointers[masked] > positions[masked]); debug(printf("Storing masked %u at %u\n",masked,chrpos - 31)); - table[--pointers[masked]] = chrpos - 31; + table[positions[masked] + (--counts[masked])] = chrpos - 31; } #else @@ -36861,33 +25063,29 @@ masked = EXTRACT(_masked,0); assert(masked == (oligo & MASK8)); if (counts[masked]) { - assert(pointers[masked] > positions[masked]); debug(printf("Storing masked %u at %u\n",masked,chrpos - 25)); - table[--pointers[masked]] = chrpos - 25; + table[positions[masked] + (--counts[masked])] = chrpos - 25; } masked = EXTRACT(_masked,1); assert(masked == ((oligo >> 2) & MASK8)); if (counts[masked]) { - assert(pointers[masked] > positions[masked]); debug(printf("Storing masked %u at %u\n",masked,chrpos - 26)); - table[--pointers[masked]] = chrpos - 26; + table[positions[masked] + (--counts[masked])] = chrpos - 26; } masked = EXTRACT(_masked,2); assert(masked == ((oligo >> 4) & MASK8)); if (counts[masked]) { - assert(pointers[masked] > positions[masked]); debug(printf("Storing masked %u at %u\n",masked,chrpos - 27)); - table[--pointers[masked]] = chrpos - 27; + table[positions[masked] + (--counts[masked])] = chrpos - 27; } masked = EXTRACT(_masked,3); assert(masked == ((oligo >> 6) & MASK8)); if (counts[masked]) { - assert(pointers[masked] > positions[masked]); debug(printf("Storing masked %u at %u\n",masked,chrpos - 28)); - table[--pointers[masked]] = chrpos - 28; + table[positions[masked] + (--counts[masked])] = chrpos - 28; } @@ -36901,25 +25099,22 @@ masked = EXTRACT(_masked,0); assert(masked == ((oligo >> 8) & MASK8)); if (counts[masked]) { - assert(pointers[masked] > positions[masked]); debug(printf("Storing masked %u at %u\n",masked,chrpos - 29)); - table[--pointers[masked]] = chrpos - 29; + table[positions[masked] + (--counts[masked])] = chrpos - 29; } masked = EXTRACT(_masked,1); assert(masked == ((oligo >> 10) & MASK8)); if (counts[masked]) { - assert(pointers[masked] > positions[masked]); debug(printf("Storing masked %u at %u\n",masked,chrpos - 30)); - table[--pointers[masked]] = chrpos - 30; + table[positions[masked] + (--counts[masked])] = chrpos - 30; } masked = EXTRACT(_masked,2); assert(masked == ((oligo >> 12) & MASK8)); if (counts[masked]) { - assert(pointers[masked] > positions[masked]); debug(printf("Storing masked %u at %u\n",masked,chrpos - 31)); - table[--pointers[masked]] = chrpos - 31; + table[positions[masked] + (--counts[masked])] = chrpos - 31; } #endif @@ -36929,261 +25124,319 @@ #else /* HAVE_AVX2 */ static int -store_8mers_rev (Chrpos_T chrpos, Chrpos_T *table, UINT4 *pointers, UINT4 *positions, Count_T *counts, - Genomecomp_T low_rc, Genomecomp_T high_rc, Genomecomp_T nextlow_rc) { +store_8mers_rev_32 (Chrpos_T chrpos, Chrpos_T *table, UINT4 *positions, Count_T *counts, + Genomecomp_T low_rc, Genomecomp_T high_rc, Genomecomp_T nextlow_rc) { Genomecomp_T masked, oligo; __m256i _oligo, _masked, _counts; + __m256i _blocks, _envelopes, _addresses, _address_mask, _count_mask; + + _address_mask = _mm256_set1_epi32(0x3); + _count_mask = _mm256_set1_epi32(0xFF); - _oligo = _mm256_srlv_epi32(_mm256_set1_epi32(low_rc),shift0to14); + + _oligo = _mm256_srlv_epi32(_mm256_set1_epi32(low_rc),bigshift0to14); _masked = _mm256_and_si256(_oligo, bigmask8); - _counts = _mm256_i32gather_epi32(counts,_masked,/*scale*/4); + + _blocks = _mm256_srli_epi32(_masked,2); /* div by 4 bytes/int */ + _addresses = _mm256_and_si256(_masked,_address_mask); + _addresses = _mm256_slli_epi32(_addresses,3); /* Multiply by 8 bits/byte */ + _envelopes = _mm256_i32gather_epi32((const int *) counts,_blocks,/*scale*/4); + _counts = _mm256_srlv_epi32(_envelopes,_addresses); /* Puts byte on right */ + _counts = _mm256_and_si256(_counts,_count_mask); if (EXTRACT256(_counts,0)) { masked = EXTRACT256(_masked,0); - assert(pointers[masked] > positions[masked]); - debug(printf("Storing masked %u at %u\n",masked,chrpos)); - table[--pointers[masked]] = chrpos; + if (counts[masked]) { /* Have to re-check if there is a conflict */ + debug(printf("Storing masked %u at %u\n",masked,chrpos)); + } + table[positions[masked] + (--counts[masked])] = chrpos; } if (EXTRACT256(_counts,1)) { masked = EXTRACT256(_masked,1); - assert(pointers[masked] > positions[masked]); - debug(printf("Storing masked %u at %u\n",masked,chrpos - 1)); - table[--pointers[masked]] = chrpos - 1; + if (counts[masked]) { /* Have to re-check if there is a conflict */ + debug(printf("Storing masked %u at %u\n",masked,chrpos - 1)); + table[positions[masked] + (--counts[masked])] = chrpos - 1; + } } if (EXTRACT256(_counts,2)) { masked = EXTRACT256(_masked,2); - assert(pointers[masked] > positions[masked]); - debug(printf("Storing masked %u at %u\n",masked,chrpos - 2)); - table[--pointers[masked]] = chrpos - 2; + if (counts[masked]) { /* Have to re-check if there is a conflict */ + debug(printf("Storing masked %u at %u\n",masked,chrpos - 2)); + table[positions[masked] + (--counts[masked])] = chrpos - 2; + } } if (EXTRACT256(_counts,3)) { masked = EXTRACT256(_masked,3); - assert(pointers[masked] > positions[masked]); - debug(printf("Storing masked %u at %u\n",masked,chrpos - 3)); - table[--pointers[masked]] = chrpos - 3; + if (counts[masked]) { /* Have to re-check if there is a conflict */ + debug(printf("Storing masked %u at %u\n",masked,chrpos - 3)); + table[positions[masked] + (--counts[masked])] = chrpos - 3; + } } if (EXTRACT256(_counts,4)) { masked = EXTRACT256(_masked,4); - assert(pointers[masked] > positions[masked]); - debug(printf("Storing masked %u at %u\n",masked,chrpos - 4)); - table[--pointers[masked]] = chrpos - 4; + if (counts[masked]) { /* Have to re-check if there is a conflict */ + debug(printf("Storing masked %u at %u\n",masked,chrpos - 4)); + table[positions[masked] + (--counts[masked])] = chrpos - 4; + } } if (EXTRACT256(_counts,5)) { masked = EXTRACT256(_masked,5); - assert(pointers[masked] > positions[masked]); - debug(printf("Storing masked %u at %u\n",masked,chrpos - 5)); - table[--pointers[masked]] = chrpos - 5; + if (counts[masked]) { /* Have to re-check if there is a conflict */ + debug(printf("Storing masked %u at %u\n",masked,chrpos - 5)); + table[positions[masked] + (--counts[masked])] = chrpos - 5; + } } if (EXTRACT256(_counts,6)) { masked = EXTRACT256(_masked,6); - assert(pointers[masked] > positions[masked]); - debug(printf("Storing masked %u at %u\n",masked,chrpos - 6)); - table[--pointers[masked]] = chrpos - 6; + if (counts[masked]) { /* Have to re-check if there is a conflict */ + debug(printf("Storing masked %u at %u\n",masked,chrpos - 6)); + table[positions[masked] + (--counts[masked])] = chrpos - 6; + } } if (EXTRACT256(_counts,7)) { masked = EXTRACT256(_masked,7); - assert(pointers[masked] > positions[masked]); - debug(printf("Storing masked %u at %u\n",masked,chrpos - 7)); - table[--pointers[masked]] = chrpos - 7; + if (counts[masked]) { /* Have to re-check if there is a conflict */ + debug(printf("Storing masked %u at %u\n",masked,chrpos - 7)); + table[positions[masked] + (--counts[masked])] = chrpos - 7; + } } masked = low_rc >> 16; /* 8, No mask necessary */ if (counts[masked]) { - assert(pointers[masked] > positions[masked]); debug(printf("Storing masked %u at %u\n",masked,chrpos - 8)); - table[--pointers[masked]] = chrpos - 8; + table[positions[masked] + (--counts[masked])] = chrpos - 8; } oligo = low_rc >> 18; /* For 15..9 */ oligo |= high_rc << 14; - _oligo = _mm256_srlv_epi32(_mm256_set1_epi32(oligo),shift0to14); + _oligo = _mm256_srlv_epi32(_mm256_set1_epi32(oligo),bigshift0to14); _masked = _mm256_and_si256(_oligo, bigmask8); - _counts = _mm256_i32gather_epi32(counts,_masked,/*scale*/4); + + _blocks = _mm256_srli_epi32(_masked,2); /* div by 4 bytes/int */ + _addresses = _mm256_and_si256(_masked,_address_mask); + _addresses = _mm256_slli_epi32(_addresses,3); /* Multiply by 8 bits/byte */ + _envelopes = _mm256_i32gather_epi32((const int *) counts,_blocks,/*scale*/4); + _counts = _mm256_srlv_epi32(_envelopes,_addresses); /* Puts byte on right */ + _counts = _mm256_and_si256(_counts,_count_mask); if (EXTRACT256(_counts,0)) { masked = EXTRACT256(_masked,0); - assert(pointers[masked] > positions[masked]); - debug(printf("Storing masked %u at %u\n",masked,chrpos - 9)); - table[--pointers[masked]] = chrpos - 9; + if (counts[masked]) { /* Have to re-check if there is a conflict */ + debug(printf("Storing masked %u at %u\n",masked,chrpos - 9)); + table[positions[masked] + (--counts[masked])] = chrpos - 9; + } } if (EXTRACT256(_counts,1)) { masked = EXTRACT256(_masked,1); - assert(pointers[masked] > positions[masked]); - debug(printf("Storing masked %u at %u\n",masked,chrpos - 10)); - table[--pointers[masked]] = chrpos - 10; + if (counts[masked]) { /* Have to re-check if there is a conflict */ + debug(printf("Storing masked %u at %u\n",masked,chrpos - 10)); + table[positions[masked] + (--counts[masked])] = chrpos - 10; + } } if (EXTRACT256(_counts,2)) { masked = EXTRACT256(_masked,2); - assert(pointers[masked] > positions[masked]); - debug(printf("Storing masked %u at %u\n",masked,chrpos - 11)); - table[--pointers[masked]] = chrpos - 11; + if (counts[masked]) { /* Have to re-check if there is a conflict */ + debug(printf("Storing masked %u at %u\n",masked,chrpos - 11)); + table[positions[masked] + (--counts[masked])] = chrpos - 11; + } } if (EXTRACT256(_counts,3)) { masked = EXTRACT256(_masked,3); - assert(pointers[masked] > positions[masked]); - debug(printf("Storing masked %u at %u\n",masked,chrpos - 12)); - table[--pointers[masked]] = chrpos - 12; - } + if (counts[masked]) { /* Have to re-check if there is a conflict */ + debug(printf("Storing masked %u at %u\n",masked,chrpos - 12)); + table[positions[masked] + (--counts[masked])] = chrpos - 12; + }} + if (EXTRACT256(_counts,4)) { masked = EXTRACT256(_masked,4); - assert(pointers[masked] > positions[masked]); - debug(printf("Storing masked %u at %u\n",masked,chrpos - 13)); - table[--pointers[masked]] = chrpos - 13; + if (counts[masked]) { /* Have to re-check if there is a conflict */ + debug(printf("Storing masked %u at %u\n",masked,chrpos - 13)); + table[positions[masked] + (--counts[masked])] = chrpos - 13; + } } if (EXTRACT256(_counts,5)) { masked = EXTRACT256(_masked,5); - assert(pointers[masked] > positions[masked]); - debug(printf("Storing masked %u at %u\n",masked,chrpos - 14)); - table[--pointers[masked]] = chrpos - 14; + if (counts[masked]) { /* Have to re-check if there is a conflict */ + debug(printf("Storing masked %u at %u\n",masked,chrpos - 14)); + table[positions[masked] + (--counts[masked])] = chrpos - 14; + } } if (EXTRACT256(_counts,6)) { masked = EXTRACT256(_masked,6); - assert(pointers[masked] > positions[masked]); - debug(printf("Storing masked %u at %u\n",masked,chrpos - 15)); - table[--pointers[masked]] = chrpos - 15; + if (counts[masked]) { /* Have to re-check if there is a conflict */ + debug(printf("Storing masked %u at %u\n",masked,chrpos - 15)); + table[positions[masked] + (--counts[masked])] = chrpos - 15; + } } - _oligo = _mm256_srlv_epi32(_mm256_set1_epi32(high_rc),shift0to14); + + _oligo = _mm256_srlv_epi32(_mm256_set1_epi32(high_rc),bigshift0to14); _masked = _mm256_and_si256(_oligo, bigmask8); - _counts = _mm256_i32gather_epi32(counts,_masked,/*scale*/4); + + _blocks = _mm256_srli_epi32(_masked,2); /* div by 4 bytes/int */ + _addresses = _mm256_and_si256(_masked,_address_mask); + _addresses = _mm256_slli_epi32(_addresses,3); /* Multiply by 8 bits/byte */ + _envelopes = _mm256_i32gather_epi32((const int *) counts,_blocks,/*scale*/4); + _counts = _mm256_srlv_epi32(_envelopes,_addresses); /* Puts byte on right */ + _counts = _mm256_and_si256(_counts,_count_mask); if (EXTRACT256(_counts,0)) { masked = EXTRACT256(_masked,0); - assert(pointers[masked] > positions[masked]); - debug(printf("Storing masked %u at %u\n",masked,chrpos - 16)); - table[--pointers[masked]] = chrpos - 16; + if (counts[masked]) { /* Have to re-check if there is a conflict */ + debug(printf("Storing masked %u at %u\n",masked,chrpos - 16)); + table[positions[masked] + (--counts[masked])] = chrpos - 16; + } } if (EXTRACT256(_counts,1)) { masked = EXTRACT256(_masked,1); - assert(pointers[masked] > positions[masked]); - debug(printf("Storing masked %u at %u\n",masked,chrpos - 17)); - table[--pointers[masked]] = chrpos - 17; + if (counts[masked]) { /* Have to re-check if there is a conflict */ + debug(printf("Storing masked %u at %u\n",masked,chrpos - 17)); + table[positions[masked] + (--counts[masked])] = chrpos - 17; + } } if (EXTRACT256(_counts,2)) { masked = EXTRACT256(_masked,2); - assert(pointers[masked] > positions[masked]); - debug(printf("Storing masked %u at %u\n",masked,chrpos - 18)); - table[--pointers[masked]] = chrpos - 18; + if (counts[masked]) { /* Have to re-check if there is a conflict */ + debug(printf("Storing masked %u at %u\n",masked,chrpos - 18)); + table[positions[masked] + (--counts[masked])] = chrpos - 18; + } } if (EXTRACT256(_counts,3)) { masked = EXTRACT256(_masked,3); - assert(pointers[masked] > positions[masked]); - debug(printf("Storing masked %u at %u\n",masked,chrpos - 19)); - table[--pointers[masked]] = chrpos - 19; + if (counts[masked]) { /* Have to re-check if there is a conflict */ + debug(printf("Storing masked %u at %u\n",masked,chrpos - 19)); + table[positions[masked] + (--counts[masked])] = chrpos - 19; + } } if (EXTRACT256(_counts,4)) { masked = EXTRACT256(_masked,4); - assert(pointers[masked] > positions[masked]); - debug(printf("Storing masked %u at %u\n",masked,chrpos - 20)); - table[--pointers[masked]] = chrpos - 20; + if (counts[masked]) { /* Have to re-check if there is a conflict */ + debug(printf("Storing masked %u at %u\n",masked,chrpos - 20)); + table[positions[masked] + (--counts[masked])] = chrpos - 20; + } } if (EXTRACT256(_counts,5)) { masked = EXTRACT256(_masked,5); - assert(pointers[masked] > positions[masked]); - debug(printf("Storing masked %u at %u\n",masked,chrpos - 21)); - table[--pointers[masked]] = chrpos - 21; - } + if (counts[masked]) { /* Have to re-check if there is a conflict */ + debug(printf("Storing masked %u at %u\n",masked,chrpos - 21)); + table[positions[masked] + (--counts[masked])] = chrpos - 21; + }} + if (EXTRACT256(_counts,6)) { masked = EXTRACT256(_masked,6); - assert(pointers[masked] > positions[masked]); - debug(printf("Storing masked %u at %u\n",masked,chrpos - 22)); - table[--pointers[masked]] = chrpos - 22; + if (counts[masked]) { /* Have to re-check if there is a conflict */ + debug(printf("Storing masked %u at %u\n",masked,chrpos - 22)); + table[positions[masked] + (--counts[masked])] = chrpos - 22; + } } if (EXTRACT256(_counts,7)) { masked = EXTRACT256(_masked,7); - assert(pointers[masked] > positions[masked]); - debug(printf("Storing masked %u at %u\n",masked,chrpos - 23)); - table[--pointers[masked]] = chrpos - 23; + if (counts[masked]) { /* Have to re-check if there is a conflict */ + debug(printf("Storing masked %u at %u\n",masked,chrpos - 23)); + table[positions[masked] + (--counts[masked])] = chrpos - 23; + } } masked = high_rc >> 16; /* 24, No mask necessary */ if (counts[masked]) { - assert(pointers[masked] > positions[masked]); debug(printf("Storing masked %u at %u\n",masked,chrpos - 24)); - table[--pointers[masked]] = chrpos - 24; + table[positions[masked] + (--counts[masked])] = chrpos - 24; } oligo = high_rc >> 18; /* For 31..25 */ oligo |= nextlow_rc << 14; - _oligo = _mm256_srlv_epi32(_mm256_set1_epi32(oligo),shift0to14); + _oligo = _mm256_srlv_epi32(_mm256_set1_epi32(oligo),bigshift0to14); _masked = _mm256_and_si256(_oligo, bigmask8); - _counts = _mm256_i32gather_epi32(counts,_masked,/*scale*/4); + + _blocks = _mm256_srli_epi32(_masked,2); /* div by 4 bytes/int */ + _addresses = _mm256_and_si256(_masked,_address_mask); + _addresses = _mm256_slli_epi32(_addresses,3); /* Multiply by 8 bits/byte */ + _envelopes = _mm256_i32gather_epi32((const int *) counts,_blocks,/*scale*/4); + _counts = _mm256_srlv_epi32(_envelopes,_addresses); /* Puts byte on right */ + _counts = _mm256_and_si256(_counts,_count_mask); if (EXTRACT256(_counts,0)) { masked = EXTRACT256(_masked,0); - assert(pointers[masked] > positions[masked]); - debug(printf("Storing masked %u at %u\n",masked,chrpos - 25)); - table[--pointers[masked]] = chrpos - 25; + if (counts[masked]) { /* Have to re-check if there is a conflict */ + debug(printf("Storing masked %u at %u\n",masked,chrpos - 25)); + table[positions[masked] + (--counts[masked])] = chrpos - 25; + } } if (EXTRACT256(_counts,1)) { masked = EXTRACT256(_masked,1); - assert(pointers[masked] > positions[masked]); - debug(printf("Storing masked %u at %u\n",masked,chrpos - 26)); - table[--pointers[masked]] = chrpos - 26; + if (counts[masked]) { /* Have to re-check if there is a conflict */ + debug(printf("Storing masked %u at %u\n",masked,chrpos - 26)); + table[positions[masked] + (--counts[masked])] = chrpos - 26; + } } if (EXTRACT256(_counts,2)) { masked = EXTRACT256(_masked,2); - assert(pointers[masked] > positions[masked]); - debug(printf("Storing masked %u at %u\n",masked,chrpos - 27)); - table[--pointers[masked]] = chrpos - 27; + if (counts[masked]) { /* Have to re-check if there is a conflict */ + debug(printf("Storing masked %u at %u\n",masked,chrpos - 27)); + table[positions[masked] + (--counts[masked])] = chrpos - 27; + } } if (EXTRACT256(_counts,3)) { masked = EXTRACT256(_masked,3); - assert(pointers[masked] > positions[masked]); - debug(printf("Storing masked %u at %u\n",masked,chrpos - 28)); - table[--pointers[masked]] = chrpos - 28; - } + if (counts[masked]) { /* Have to re-check if there is a conflict */ + debug(printf("Storing masked %u at %u\n",masked,chrpos - 28)); + table[positions[masked] + (--counts[masked])] = chrpos - 28; + }} + if (EXTRACT256(_counts,4)) { masked = EXTRACT256(_masked,4); - assert(pointers[masked] > positions[masked]); - debug(printf("Storing masked %u at %u\n",masked,chrpos - 29)); - table[--pointers[masked]] = chrpos - 29; + if (counts[masked]) { /* Have to re-check if there is a conflict */ + debug(printf("Storing masked %u at %u\n",masked,chrpos - 29)); + table[positions[masked] + (--counts[masked])] = chrpos - 29; + } } if (EXTRACT256(_counts,5)) { masked = EXTRACT256(_masked,5); - assert(pointers[masked] > positions[masked]); - debug(printf("Storing masked %u at %u\n",masked,chrpos - 30)); - table[--pointers[masked]] = chrpos - 30; + if (counts[masked]) { /* Have to re-check if there is a conflict */ + debug(printf("Storing masked %u at %u\n",masked,chrpos - 30)); + table[positions[masked] + (--counts[masked])] = chrpos - 30; + } } if (EXTRACT256(_counts,6)) { masked = EXTRACT256(_masked,6); - assert(pointers[masked] > positions[masked]); - debug(printf("Storing masked %u at %u\n",masked,chrpos - 31)); - table[--pointers[masked]] = chrpos - 31; + if (counts[masked]) { /* Have to re-check if there is a conflict */ + debug(printf("Storing masked %u at %u\n",masked,chrpos - 31)); + table[positions[masked] + (--counts[masked])] = chrpos - 31; + } } return chrpos - 32; @@ -37196,7 +25449,7 @@ #if !defined(HAVE_AVX2) static void -count_7mers_rev (Count_T *counts, Inquery_T *inquery, Genomecomp_T low_rc, Genomecomp_T high_rc, Genomecomp_T nextlow_rc) { +count_7mers_rev_32 (Count_T *counts, Genomecomp_T low_rc, Genomecomp_T high_rc, Genomecomp_T nextlow_rc) { Genomecomp_T masked, oligo; #ifdef INDIVIDUAL_SHIFTS #elif defined(SIMD_MASK_THEN_STORE) @@ -37209,43 +25462,43 @@ #ifdef INDIVIDUAL_SHIFTS masked = low_rc & MASK7; /* 0 */ - INCR_COUNT(counts[masked],inquery[masked]); + INCR_COUNT(counts[masked]); debug(printf("0 %04X => %d\n",masked,counts[masked])); masked = (low_rc >> 2) & MASK7; /* 1 */ - INCR_COUNT(counts[masked],inquery[masked]); + INCR_COUNT(counts[masked]); debug(printf("1 %04X => %d\n",masked,counts[masked])); masked = (low_rc >> 4) & MASK7; /* 2 */ - INCR_COUNT(counts[masked],inquery[masked]); + INCR_COUNT(counts[masked]); debug(printf("2 %04X => %d\n",masked,counts[masked])); masked = (low_rc >> 6) & MASK7; /* 3 */ - INCR_COUNT(counts[masked],inquery[masked]); + INCR_COUNT(counts[masked]); debug(printf("3 %04X => %d\n",masked,counts[masked])); masked = (low_rc >> 8) & MASK7; /* 4 */ - INCR_COUNT(counts[masked],inquery[masked]); + INCR_COUNT(counts[masked]); debug(printf("4 %04X => %d\n",masked,counts[masked])); masked = (low_rc >> 10) & MASK7; /* 5 */ - INCR_COUNT(counts[masked],inquery[masked]); + INCR_COUNT(counts[masked]); debug(printf("5 %04X => %d\n",masked,counts[masked])); masked = (low_rc >> 12) & MASK7; /* 6 */ - INCR_COUNT(counts[masked],inquery[masked]); + INCR_COUNT(counts[masked]); debug(printf("6 %04X => %d\n",masked,counts[masked])); masked = (low_rc >> 14) & MASK7; /* 7 */ - INCR_COUNT(counts[masked],inquery[masked]); + INCR_COUNT(counts[masked]); debug(printf("7 %04X => %d\n",masked,counts[masked])); masked = (low_rc >> 16) & MASK7; /* 8 */ - INCR_COUNT(counts[masked],inquery[masked]); + INCR_COUNT(counts[masked]); debug(printf("8 %04X => %d\n",masked,counts[masked])); masked = low_rc >> 18; /* 9, No mask necessary */ - INCR_COUNT(counts[masked],inquery[masked]); + INCR_COUNT(counts[masked]); debug(printf("9 %04X => %d\n",masked,counts[masked])); #else @@ -37257,19 +25510,19 @@ #endif masked = EXTRACT(_masked,0); - INCR_COUNT(counts[masked],inquery[masked]); + INCR_COUNT(counts[masked]); debug(printf("0 %04X => %d\n",masked,counts[masked])); masked = EXTRACT(_masked,1); - INCR_COUNT(counts[masked],inquery[masked]); + INCR_COUNT(counts[masked]); debug(printf("1 %04X => %d\n",masked,counts[masked])); masked = EXTRACT(_masked,2); - INCR_COUNT(counts[masked],inquery[masked]); + INCR_COUNT(counts[masked]); debug(printf("2 %04X => %d\n",masked,counts[masked])); masked = EXTRACT(_masked,3); - INCR_COUNT(counts[masked],inquery[masked]); + INCR_COUNT(counts[masked]); debug(printf("3 %04X => %d\n",masked,counts[masked])); @@ -37281,19 +25534,19 @@ #endif masked = EXTRACT(_masked,0); - INCR_COUNT(counts[masked],inquery[masked]); + INCR_COUNT(counts[masked]); debug(printf("4 %04X => %d\n",masked,counts[masked])); masked = EXTRACT(_masked,1); - INCR_COUNT(counts[masked],inquery[masked]); + INCR_COUNT(counts[masked]); debug(printf("5 %04X => %d\n",masked,counts[masked])); masked = EXTRACT(_masked,2); - INCR_COUNT(counts[masked],inquery[masked]); + INCR_COUNT(counts[masked]); debug(printf("6 %04X => %d\n",masked,counts[masked])); masked = EXTRACT(_masked,3); - INCR_COUNT(counts[masked],inquery[masked]); + INCR_COUNT(counts[masked]); debug(printf("7 %04X => %d\n",masked,counts[masked])); @@ -37305,11 +25558,11 @@ #endif masked = EXTRACT(_masked,0); - INCR_COUNT(counts[masked],inquery[masked]); + INCR_COUNT(counts[masked]); debug(printf("8 %04X => %d\n",masked,counts[masked])); masked = EXTRACT(_masked,1); - INCR_COUNT(counts[masked],inquery[masked]); + INCR_COUNT(counts[masked]); debug(printf("9 %04X => %d\n",masked,counts[masked])); #endif @@ -37319,27 +25572,27 @@ #ifdef INDIVIDUAL_SHIFTS masked = oligo & MASK7; /* 10 */ - INCR_COUNT(counts[masked],inquery[masked]); + INCR_COUNT(counts[masked]); debug(printf("10 %04X => %d\n",masked,counts[masked])); masked = (oligo >> 2) & MASK7; /* 11 */ - INCR_COUNT(counts[masked],inquery[masked]); + INCR_COUNT(counts[masked]); debug(printf("11 %04X => %d\n",masked,counts[masked])); masked = (oligo >> 4) & MASK7; /* 12 */ - INCR_COUNT(counts[masked],inquery[masked]); + INCR_COUNT(counts[masked]); debug(printf("12 %04X => %d\n",masked,counts[masked])); masked = (oligo >> 6) & MASK7; /* 13 */ - INCR_COUNT(counts[masked],inquery[masked]); + INCR_COUNT(counts[masked]); debug(printf("13 %04X => %d\n",masked,counts[masked])); masked = (oligo >> 8) & MASK7; /* 14 */ - INCR_COUNT(counts[masked],inquery[masked]); + INCR_COUNT(counts[masked]); debug(printf("14 %04X => %d\n",masked,counts[masked])); masked = (oligo >> 10) & MASK7; /* 15 */ - INCR_COUNT(counts[masked],inquery[masked]); + INCR_COUNT(counts[masked]); debug(printf("15 %04X => %d\n",masked,counts[masked])); #else @@ -37351,19 +25604,19 @@ #endif masked = EXTRACT(_masked,0); - INCR_COUNT(counts[masked],inquery[masked]); + INCR_COUNT(counts[masked]); debug(printf("10 %04X => %d\n",masked,counts[masked])); masked = EXTRACT(_masked,1); - INCR_COUNT(counts[masked],inquery[masked]); + INCR_COUNT(counts[masked]); debug(printf("11 %04X => %d\n",masked,counts[masked])); masked = EXTRACT(_masked,2); - INCR_COUNT(counts[masked],inquery[masked]); + INCR_COUNT(counts[masked]); debug(printf("12 %04X => %d\n",masked,counts[masked])); masked = EXTRACT(_masked,3); - INCR_COUNT(counts[masked],inquery[masked]); + INCR_COUNT(counts[masked]); debug(printf("13 %04X => %d\n",masked,counts[masked])); @@ -37375,54 +25628,54 @@ #endif masked = EXTRACT(_masked,0); - INCR_COUNT(counts[masked],inquery[masked]); + INCR_COUNT(counts[masked]); debug(printf("14 %04X => %d\n",masked,counts[masked])); masked = EXTRACT(_masked,1); - INCR_COUNT(counts[masked],inquery[masked]); + INCR_COUNT(counts[masked]); debug(printf("15 %04X => %d\n",masked,counts[masked])); #endif #ifdef INDIVIDUAL_SHIFTS masked = high_rc & MASK7; /* 16 */ - INCR_COUNT(counts[masked],inquery[masked]); + INCR_COUNT(counts[masked]); debug(printf("16 %04X => %d\n",masked,counts[masked])); masked = (high_rc >> 2) & MASK7; /* 17 */ - INCR_COUNT(counts[masked],inquery[masked]); + INCR_COUNT(counts[masked]); debug(printf("17 %04X => %d\n",masked,counts[masked])); masked = (high_rc >> 4) & MASK7; /* 18 */ - INCR_COUNT(counts[masked],inquery[masked]); + INCR_COUNT(counts[masked]); debug(printf("18 %04X => %d\n",masked,counts[masked])); masked = (high_rc >> 6) & MASK7; /* 19 */ - INCR_COUNT(counts[masked],inquery[masked]); + INCR_COUNT(counts[masked]); debug(printf("19 %04X => %d\n",masked,counts[masked])); masked = (high_rc >> 8) & MASK7; /* 20 */ - INCR_COUNT(counts[masked],inquery[masked]); + INCR_COUNT(counts[masked]); debug(printf("20 %04X => %d\n",masked,counts[masked])); masked = (high_rc >> 10) & MASK7; /* 21 */ - INCR_COUNT(counts[masked],inquery[masked]); + INCR_COUNT(counts[masked]); debug(printf("21 %04X => %d\n",masked,counts[masked])); masked = (high_rc >> 12) & MASK7; /* 22 */ - INCR_COUNT(counts[masked],inquery[masked]); + INCR_COUNT(counts[masked]); debug(printf("22 %04X => %d\n",masked,counts[masked])); masked = (high_rc >> 14) & MASK7; /* 23 */ - INCR_COUNT(counts[masked],inquery[masked]); + INCR_COUNT(counts[masked]); debug(printf("23 %04X => %d\n",masked,counts[masked])); masked = (high_rc >> 16) & MASK7; /* 24 */ - INCR_COUNT(counts[masked],inquery[masked]); + INCR_COUNT(counts[masked]); debug(printf("24 %04X => %d\n",masked,counts[masked])); masked = high_rc >> 18; /* 25, No mask necessary */ - INCR_COUNT(counts[masked],inquery[masked]); + INCR_COUNT(counts[masked]); debug(printf("25 %04X => %d\n",masked,counts[masked])); #else @@ -37434,19 +25687,19 @@ #endif masked = EXTRACT(_masked,0); - INCR_COUNT(counts[masked],inquery[masked]); + INCR_COUNT(counts[masked]); debug(printf("16 %04X => %d\n",masked,counts[masked])); masked = EXTRACT(_masked,1); - INCR_COUNT(counts[masked],inquery[masked]); + INCR_COUNT(counts[masked]); debug(printf("17 %04X => %d\n",masked,counts[masked])); masked = EXTRACT(_masked,2); - INCR_COUNT(counts[masked],inquery[masked]); + INCR_COUNT(counts[masked]); debug(printf("18 %04X => %d\n",masked,counts[masked])); masked = EXTRACT(_masked,3); - INCR_COUNT(counts[masked],inquery[masked]); + INCR_COUNT(counts[masked]); debug(printf("19 %04X => %d\n",masked,counts[masked])); @@ -37458,19 +25711,19 @@ #endif masked = EXTRACT(_masked,0); - INCR_COUNT(counts[masked],inquery[masked]); + INCR_COUNT(counts[masked]); debug(printf("20 %04X => %d\n",masked,counts[masked])); masked = EXTRACT(_masked,1); - INCR_COUNT(counts[masked],inquery[masked]); + INCR_COUNT(counts[masked]); debug(printf("21 %04X => %d\n",masked,counts[masked])); masked = EXTRACT(_masked,2); - INCR_COUNT(counts[masked],inquery[masked]); + INCR_COUNT(counts[masked]); debug(printf("22 %04X => %d\n",masked,counts[masked])); masked = EXTRACT(_masked,3); - INCR_COUNT(counts[masked],inquery[masked]); + INCR_COUNT(counts[masked]); debug(printf("23 %04X => %d\n",masked,counts[masked])); @@ -37482,11 +25735,11 @@ #endif masked = EXTRACT(_masked,0); - INCR_COUNT(counts[masked],inquery[masked]); + INCR_COUNT(counts[masked]); debug(printf("24 %04X => %d\n",masked,counts[masked])); masked = EXTRACT(_masked,1); - INCR_COUNT(counts[masked],inquery[masked]); + INCR_COUNT(counts[masked]); debug(printf("25 %04X => %d\n",masked,counts[masked])); #endif @@ -37496,27 +25749,27 @@ #ifdef INDIVIDUAL_SHIFTS masked = oligo & MASK7; /* 26 */ - INCR_COUNT(counts[masked],inquery[masked]); + INCR_COUNT(counts[masked]); debug(printf("26 %04X => %d\n",masked,counts[masked])); masked = (oligo >> 2) & MASK7; /* 27 */ - INCR_COUNT(counts[masked],inquery[masked]); + INCR_COUNT(counts[masked]); debug(printf("27 %04X => %d\n",masked,counts[masked])); masked = (oligo >> 4) & MASK7; /* 28 */ - INCR_COUNT(counts[masked],inquery[masked]); + INCR_COUNT(counts[masked]); debug(printf("28 %04X => %d\n",masked,counts[masked])); masked = (oligo >> 6) & MASK7; /* 29 */ - INCR_COUNT(counts[masked],inquery[masked]); + INCR_COUNT(counts[masked]); debug(printf("29 %04X => %d\n",masked,counts[masked])); masked = (oligo >> 8) & MASK7; /* 30 */ - INCR_COUNT(counts[masked],inquery[masked]); + INCR_COUNT(counts[masked]); debug(printf("30 %04X => %d\n",masked,counts[masked])); masked = (oligo >> 10) & MASK7; /* 31 */ - INCR_COUNT(counts[masked],inquery[masked]); + INCR_COUNT(counts[masked]); debug(printf("31 %04X => %d\n",masked,counts[masked])); #else @@ -37528,19 +25781,19 @@ #endif masked = EXTRACT(_masked,0); - INCR_COUNT(counts[masked],inquery[masked]); + INCR_COUNT(counts[masked]); debug(printf("26 %04X => %d\n",masked,counts[masked])); masked = EXTRACT(_masked,1); - INCR_COUNT(counts[masked],inquery[masked]); + INCR_COUNT(counts[masked]); debug(printf("27 %04X => %d\n",masked,counts[masked])); masked = EXTRACT(_masked,2); - INCR_COUNT(counts[masked],inquery[masked]); + INCR_COUNT(counts[masked]); debug(printf("28 %04X => %d\n",masked,counts[masked])); masked = EXTRACT(_masked,3); - INCR_COUNT(counts[masked],inquery[masked]); + INCR_COUNT(counts[masked]); debug(printf("29 %04X => %d\n",masked,counts[masked])); @@ -37552,11 +25805,11 @@ #endif masked = EXTRACT(_masked,0); - INCR_COUNT(counts[masked],inquery[masked]); + INCR_COUNT(counts[masked]); debug(printf("30 %04X => %d\n",masked,counts[masked])); masked = EXTRACT(_masked,1); - INCR_COUNT(counts[masked],inquery[masked]); + INCR_COUNT(counts[masked]); debug(printf("31 %04X => %d\n",masked,counts[masked])); #endif @@ -37566,291 +25819,169 @@ #else /* HAVE_AVX2 */ static void -count_7mers_rev (Count_T *counts, Inquery_T *inquery, Genomecomp_T low_rc, Genomecomp_T high_rc, Genomecomp_T nextlow_rc) { +count_7mers_rev_32 (Count_T *counts, Genomecomp_T low_rc, Genomecomp_T high_rc, Genomecomp_T nextlow_rc) { Genomecomp_T masked, oligo; __m256i _oligo, _masked; -#ifdef CHECK_FOR_OVERFLOW - __m256i _counts_after, _counts_neg; -#endif - _oligo = _mm256_srlv_epi32(_mm256_set1_epi32(low_rc),shift0to14); + _oligo = _mm256_srlv_epi32(_mm256_set1_epi32(low_rc),bigshift0to14); _masked = _mm256_and_si256(_oligo, bigmask7); masked = EXTRACT256(_masked,0); - counts[masked] += 1; + INCR_COUNT(counts[masked]); debug(printf("0 %04X => %d\n",masked,counts[masked])); masked = EXTRACT256(_masked,1); - counts[masked] += 1; + INCR_COUNT(counts[masked]); debug(printf("1 %04X => %d\n",masked,counts[masked])); masked = EXTRACT256(_masked,2); - counts[masked] += 1; + INCR_COUNT(counts[masked]); debug(printf("2 %04X => %d\n",masked,counts[masked])); masked = EXTRACT256(_masked,3); - counts[masked] += 1; + INCR_COUNT(counts[masked]); debug(printf("3 %04X => %d\n",masked,counts[masked])); masked = EXTRACT256(_masked,4); - counts[masked] += 1; + INCR_COUNT(counts[masked]); debug(printf("4 %04X => %d\n",masked,counts[masked])); masked = EXTRACT256(_masked,5); - counts[masked] += 1; + INCR_COUNT(counts[masked]); debug(printf("5 %04X => %d\n",masked,counts[masked])); masked = EXTRACT256(_masked,6); - counts[masked] += 1; + INCR_COUNT(counts[masked]); debug(printf("6 %04X => %d\n",masked,counts[masked])); masked = EXTRACT256(_masked,7); - counts[masked] += 1; + INCR_COUNT(counts[masked]); debug(printf("7 %04X => %d\n",masked,counts[masked])); -#ifdef CHECK_FOR_OVERFLOW - _counts_after = _mm256_i32gather_epi32(counts,_masked,/*shift*/4); - /* _counts_after = _mm256_and_si256(_counts_after,low8); */ - _counts_neg = _mm256_cmpgt_epi32(_counts_after,maxcount256); - if (_mm256_testz_si256(_counts_neg,_counts_neg) == 0) { - debug(printf("Exceeded maximum counts\n")); - if (EXTRACT256(_counts_neg,0)) { - inquery[EXTRACT256(_masked,0)] = INQUERY_FALSE; - } - if (EXTRACT256(_counts_neg,1)) { - inquery[EXTRACT256(_masked,1)] = INQUERY_FALSE; - } - if (EXTRACT256(_counts_neg,2)) { - inquery[EXTRACT256(_masked,2)] = INQUERY_FALSE; - } - if (EXTRACT256(_counts_neg,3)) { - inquery[EXTRACT256(_masked,3)] = INQUERY_FALSE; - } - if (EXTRACT256(_counts_neg,4)) { - inquery[EXTRACT256(_masked,4)] = INQUERY_FALSE; - } - if (EXTRACT256(_counts_neg,5)) { - inquery[EXTRACT256(_masked,5)] = INQUERY_FALSE; - } - if (EXTRACT256(_counts_neg,6)) { - inquery[EXTRACT256(_masked,6)] = INQUERY_FALSE; - } - if (EXTRACT256(_counts_neg,7)) { - inquery[EXTRACT256(_masked,7)] = INQUERY_FALSE; - } - } -#endif /* CHECK_FOR_OVERFLOW */ - _oligo = _mm256_srli_epi32(_oligo, 16); _masked = _mm256_and_si256(_oligo, bigmask7); masked = EXTRACT256(_masked,0); - INCR_COUNT(counts[masked],inquery[masked]); + INCR_COUNT(counts[masked]); debug(printf("8 %04X => %d\n",masked,counts[masked])); masked = EXTRACT256(_masked,1); - INCR_COUNT(counts[masked],inquery[masked]); + INCR_COUNT(counts[masked]); debug(printf("9 %04X => %d\n",masked,counts[masked])); oligo = low_rc >> 20; /* For 15..10 */ oligo |= high_rc << 12; - _oligo = _mm256_srlv_epi32(_mm256_set1_epi32(oligo),shift0to14); + _oligo = _mm256_srlv_epi32(_mm256_set1_epi32(oligo),bigshift0to14); _masked = _mm256_and_si256(_oligo, bigmask7); masked = EXTRACT256(_masked,0); - counts[masked] += 1; + INCR_COUNT(counts[masked]); debug(printf("10 %04X => %d\n",masked,counts[masked])); masked = EXTRACT256(_masked,1); - counts[masked] += 1; + INCR_COUNT(counts[masked]); debug(printf("11 %04X => %d\n",masked,counts[masked])); masked = EXTRACT256(_masked,2); - counts[masked] += 1; + INCR_COUNT(counts[masked]); debug(printf("12 %04X => %d\n",masked,counts[masked])); masked = EXTRACT256(_masked,3); - counts[masked] += 1; + INCR_COUNT(counts[masked]); debug(printf("13 %04X => %d\n",masked,counts[masked])); masked = EXTRACT256(_masked,4); - counts[masked] += 1; + INCR_COUNT(counts[masked]); debug(printf("14 %04X => %d\n",masked,counts[masked])); masked = EXTRACT256(_masked,5); - counts[masked] += 1; + INCR_COUNT(counts[masked]); debug(printf("15 %04X => %d\n",masked,counts[masked])); -#ifdef CHECK_FOR_OVERFLOW - _counts_after = _mm256_i32gather_epi32(counts,_masked,/*shift*/4); - _counts_after = _mm256_and_si256(_counts_after,low6); - _counts_neg = _mm256_cmpgt_epi32(_counts_after,maxcount256); - if (_mm256_testz_si256(_counts_neg,_counts_neg) == 0) { - debug(printf("Exceeded maximum counts\n")); - if (EXTRACT256(_counts_neg,0)) { - inquery[EXTRACT256(_masked,0)] = INQUERY_FALSE; - } - if (EXTRACT256(_counts_neg,1)) { - inquery[EXTRACT256(_masked,1)] = INQUERY_FALSE; - } - if (EXTRACT256(_counts_neg,2)) { - inquery[EXTRACT256(_masked,2)] = INQUERY_FALSE; - } - if (EXTRACT256(_counts_neg,3)) { - inquery[EXTRACT256(_masked,3)] = INQUERY_FALSE; - } - if (EXTRACT256(_counts_neg,4)) { - inquery[EXTRACT256(_masked,4)] = INQUERY_FALSE; - } - if (EXTRACT256(_counts_neg,5)) { - inquery[EXTRACT256(_masked,5)] = INQUERY_FALSE; - } - } -#endif /* CHECK_FOR_OVERFLOW */ - - _oligo = _mm256_srlv_epi32(_mm256_set1_epi32(high_rc),shift0to14); + _oligo = _mm256_srlv_epi32(_mm256_set1_epi32(high_rc),bigshift0to14); _masked = _mm256_and_si256(_oligo, bigmask7); masked = EXTRACT256(_masked,0); - counts[masked] += 1; + INCR_COUNT(counts[masked]); debug(printf("16 %04X => %d\n",masked,counts[masked])); masked = EXTRACT256(_masked,1); - counts[masked] += 1; + INCR_COUNT(counts[masked]); debug(printf("17 %04X => %d\n",masked,counts[masked])); masked = EXTRACT256(_masked,2); - counts[masked] += 1; + INCR_COUNT(counts[masked]); debug(printf("18 %04X => %d\n",masked,counts[masked])); masked = EXTRACT256(_masked,3); - counts[masked] += 1; + INCR_COUNT(counts[masked]); debug(printf("19 %04X => %d\n",masked,counts[masked])); masked = EXTRACT256(_masked,4); - counts[masked] += 1; + INCR_COUNT(counts[masked]); debug(printf("20 %04X => %d\n",masked,counts[masked])); masked = EXTRACT256(_masked,5); - counts[masked] += 1; + INCR_COUNT(counts[masked]); debug(printf("21 %04X => %d\n",masked,counts[masked])); masked = EXTRACT256(_masked,6); - counts[masked] += 1; + INCR_COUNT(counts[masked]); debug(printf("22 %04X => %d\n",masked,counts[masked])); masked = EXTRACT256(_masked,7); - counts[masked] += 1; + INCR_COUNT(counts[masked]); debug(printf("23 %04X => %d\n",masked,counts[masked])); -#ifdef CHECK_FOR_OVERFLOW - _counts_after = _mm256_i32gather_epi32(counts,_masked,/*shift*/4); - /* _counts_after = _mm256_and_si256(_counts_after,low8); */ - _counts_neg = _mm256_cmpgt_epi32(_counts_after,maxcount256); - if (_mm256_testz_si256(_counts_neg,_counts_neg) == 0) { - debug(printf("Exceeded maximum counts\n")); - if (EXTRACT256(_counts_neg,0)) { - inquery[EXTRACT256(_masked,0)] = INQUERY_FALSE; - } - if (EXTRACT256(_counts_neg,1)) { - inquery[EXTRACT256(_masked,1)] = INQUERY_FALSE; - } - if (EXTRACT256(_counts_neg,2)) { - inquery[EXTRACT256(_masked,2)] = INQUERY_FALSE; - } - if (EXTRACT256(_counts_neg,3)) { - inquery[EXTRACT256(_masked,3)] = INQUERY_FALSE; - } - if (EXTRACT256(_counts_neg,4)) { - inquery[EXTRACT256(_masked,4)] = INQUERY_FALSE; - } - if (EXTRACT256(_counts_neg,5)) { - inquery[EXTRACT256(_masked,5)] = INQUERY_FALSE; - } - if (EXTRACT256(_counts_neg,6)) { - inquery[EXTRACT256(_masked,6)] = INQUERY_FALSE; - } - if (EXTRACT256(_counts_neg,7)) { - inquery[EXTRACT256(_masked,7)] = INQUERY_FALSE; - } - } -#endif /* CHECK_FOR_OVERFLOW */ _oligo = _mm256_srli_epi32(_oligo, 16); _masked = _mm256_and_si256(_oligo, bigmask7); masked = EXTRACT256(_masked,0); - INCR_COUNT(counts[masked],inquery[masked]); + INCR_COUNT(counts[masked]); debug(printf("24 %04X => %d\n",masked,counts[masked])); masked = EXTRACT256(_masked,1); - INCR_COUNT(counts[masked],inquery[masked]); + INCR_COUNT(counts[masked]); debug(printf("25 %04X => %d\n",masked,counts[masked])); oligo = high_rc >> 20; /* For 31..26 */ oligo |= nextlow_rc << 12; - _oligo = _mm256_srlv_epi32(_mm256_set1_epi32(oligo),shift0to14); + _oligo = _mm256_srlv_epi32(_mm256_set1_epi32(oligo),bigshift0to14); _masked = _mm256_and_si256(_oligo, bigmask7); masked = EXTRACT256(_masked,0); - counts[masked] += 1; + INCR_COUNT(counts[masked]); debug(printf("26 %04X => %d\n",masked,counts[masked])); masked = EXTRACT256(_masked,1); - counts[masked] += 1; + INCR_COUNT(counts[masked]); debug(printf("27 %04X => %d\n",masked,counts[masked])); masked = EXTRACT256(_masked,2); - counts[masked] += 1; + INCR_COUNT(counts[masked]); debug(printf("28 %04X => %d\n",masked,counts[masked])); masked = EXTRACT256(_masked,3); - counts[masked] += 1; + INCR_COUNT(counts[masked]); debug(printf("29 %04X => %d\n",masked,counts[masked])); masked = EXTRACT256(_masked,4); - counts[masked] += 1; + INCR_COUNT(counts[masked]); debug(printf("30 %04X => %d\n",masked,counts[masked])); masked = EXTRACT256(_masked,5); - counts[masked] += 1; + INCR_COUNT(counts[masked]); debug(printf("31 %04X => %d\n",masked,counts[masked])); -#ifdef CHECK_FOR_OVERFLOW - _counts_after = _mm256_i32gather_epi32(counts,_masked,/*shift*/4); - _counts_after = _mm256_and_si256(_counts_after,low6); - _counts_neg = _mm256_cmpgt_epi32(_counts_after,maxcount256); - if (_mm256_testz_si256(_counts_neg,_counts_neg) == 0) { - debug(printf("Exceeded maximum counts\n")); - if (EXTRACT256(_counts_neg,0)) { - inquery[EXTRACT256(_masked,0)] = INQUERY_FALSE; - } - if (EXTRACT256(_counts_neg,1)) { - inquery[EXTRACT256(_masked,1)] = INQUERY_FALSE; - } - if (EXTRACT256(_counts_neg,2)) { - inquery[EXTRACT256(_masked,2)] = INQUERY_FALSE; - } - if (EXTRACT256(_counts_neg,3)) { - inquery[EXTRACT256(_masked,3)] = INQUERY_FALSE; - } - if (EXTRACT256(_counts_neg,4)) { - inquery[EXTRACT256(_masked,4)] = INQUERY_FALSE; - } - if (EXTRACT256(_counts_neg,5)) { - inquery[EXTRACT256(_masked,5)] = INQUERY_FALSE; - } - } -#endif /* CHECK_FOR_OVERFLOW */ - return; } @@ -37860,9 +25991,9 @@ /* Expecting current to have {low0_rc, high0_rc, low1_rc, high1_rc}, and next to have {high0_rc, low1_rc, high1_rc, nextlow_rc} */ -#ifdef USE_SIMD_FOR_COUNTS +#ifdef HAVE_SSE2 static void -extract_7mers_rev_simd (__m128i *out, __m128i current, __m128i next) { +extract_7mers_rev_simd_64 (__m128i *out, __m128i current, __m128i next) { __m128i oligo; oligo = _mm_or_si128( _mm_srli_epi32(current,20), _mm_slli_epi32(next,12)); @@ -37887,6 +26018,98 @@ return; } +#ifdef USE_UNORDERED_7 +static Chrpos_T +store_7mers_rev_simd_64 (Chrpos_T chrpos, Chrpos_T *table, UINT4 *positions, Count_T *counts, + __m128i current, __m128i next) { + __m128i array[16]; + + extract_7mers_rev_simd_64(array,current,next); + return store_fwdrev_simd_64(chrpos,table,positions,counts,(UINT4 *) array); +} + +#else +/* Includes extract_7mers_rev_simd_64_ordered (__m128i *out, __m128i current, __m128i next) */ +static Chrpos_T +store_7mers_rev_simd_64 (Chrpos_T chrpos, Chrpos_T *table, UINT4 *positions, Count_T *counts, + __m128i current, __m128i next) { + __m128i array[16], *out; + __m128i oligo; + __m128i _t0, _t1, _t2, _t3, _t4, _t5, _t6, _t7; + __m128i _u0, _u1, _u2, _u3; + + out = &(array[0]); + + /* _row0 = _mm_and_si128( current, mask7); */ + /* _row1 = _mm_and_si128( _mm_srli_epi32(current,2), mask7);*/ + _t0 = _mm_and_si128(_mm_blend_epi16(_mm_slli_epi32(current,14), current, 0x55), mask7_epi16); + + /* _row2 = _mm_and_si128( _mm_srli_epi32(current,4), mask7); */ + /* _row3 = _mm_and_si128( _mm_srli_epi32(current,6), mask7); */ + _t1 = _mm_and_si128(_mm_blend_epi16(_mm_slli_epi32(current,10), _mm_srli_epi32(current, 4), 0x55), mask7_epi16); + + /* _row4 = _mm_and_si128( _mm_srli_epi32(current,8), mask7); */ + /* _row5 = _mm_and_si128( _mm_srli_epi32(current,10), mask7); */ + _t2 = _mm_and_si128(_mm_blend_epi16(_mm_slli_epi32(current,6), _mm_srli_epi32(current, 8), 0x55), mask7_epi16); + + /* _row6 = _mm_and_si128( _mm_srli_epi32(current,12), mask7); */ + /* _row7 = _mm_and_si128( _mm_srli_epi32(current,14), mask7); */ + _t3 = _mm_and_si128(_mm_blend_epi16(_mm_slli_epi32(current,2), _mm_srli_epi32(current, 12), 0x55), mask7_epi16); + + /* _row8 = _mm_and_si128( _mm_srli_epi32(current,16), mask7); */ + /* _row9 = _mm_srli_epi32(current,18); */ /* No mask necessary */ + _t4 = _mm_and_si128(_mm_blend_epi16(_mm_srli_epi32(current,2), _mm_srli_epi32(current, 16), 0x55), mask7_epi16); + + + oligo = _mm_or_si128( _mm_srli_epi32(current,20), _mm_slli_epi32(next,12)); + /* _row10 = _mm_and_si128( oligo, mask7); */ + /* _row11 = _mm_and_si128( _mm_srli_epi32(oligo,2), mask7); */ + _t5 = _mm_and_si128(_mm_blend_epi16(_mm_slli_epi32(oligo, 14), oligo, 0x55), mask7_epi16); + + /* _row12 = _mm_and_si128( _mm_srli_epi32(oligo,4), mask7); */ + /* _row13 = _mm_and_si128( _mm_srli_epi32(oligo,6), mask7); */ + _t6 = _mm_and_si128(_mm_blend_epi16(_mm_slli_epi32(oligo,10), _mm_srli_epi32(oligo, 4), 0x55), mask7_epi16); + + /* _row14 = _mm_and_si128( _mm_srli_epi32(oligo,8), mask7); */ + /* _row15 = _mm_and_si128( _mm_srli_epi32(oligo,10), mask7); */ + _t7 = _mm_and_si128(_mm_blend_epi16(_mm_slli_epi32(oligo,6), _mm_srli_epi32(oligo, 8), 0x55), mask7_epi16); + + + /* Split: top half */ + _u0 = _mm_unpackhi_epi32(_t0,_t1); + _u1 = _mm_unpackhi_epi32(_t2,_t3); + _u2 = _mm_unpackhi_epi32(_t4,_t5); + _u3 = _mm_unpackhi_epi32(_t6,_t7); + + _mm_store_si128(out++, _mm_cvtepu16_epi32(_mm_srli_si128(_u0,8))); + _mm_store_si128(out++, _mm_cvtepu16_epi32(_mm_srli_si128(_u1,8))); + _mm_store_si128(out++, _mm_cvtepu16_epi32(_mm_srli_si128(_u2,8))); + _mm_store_si128(out++, _mm_cvtepu16_epi32(_mm_srli_si128(_u3,8))); + _mm_store_si128(out++, _mm_cvtepu16_epi32(_u0)); + _mm_store_si128(out++, _mm_cvtepu16_epi32(_u1)); + _mm_store_si128(out++, _mm_cvtepu16_epi32(_u2)); + _mm_store_si128(out++, _mm_cvtepu16_epi32(_u3)); + + /* Split: bottom half */ + _u0 = _mm_unpacklo_epi32(_t0,_t1); + _u1 = _mm_unpacklo_epi32(_t2,_t3); + _u2 = _mm_unpacklo_epi32(_t4,_t5); + _u3 = _mm_unpacklo_epi32(_t6,_t7); + + _mm_store_si128(out++, _mm_cvtepu16_epi32(_mm_srli_si128(_u0,8))); + _mm_store_si128(out++, _mm_cvtepu16_epi32(_mm_srli_si128(_u1,8))); + _mm_store_si128(out++, _mm_cvtepu16_epi32(_mm_srli_si128(_u2,8))); + _mm_store_si128(out++, _mm_cvtepu16_epi32(_mm_srli_si128(_u3,8))); + _mm_store_si128(out++, _mm_cvtepu16_epi32(_u0)); + _mm_store_si128(out++, _mm_cvtepu16_epi32(_u1)); + _mm_store_si128(out++, _mm_cvtepu16_epi32(_u2)); + _mm_store_si128(out++, _mm_cvtepu16_epi32(_u3)); + + return store_fwdrev_simd_64_ordered(chrpos,table,positions,counts,(UINT4 *) array); +} +#endif +#endif + #ifdef HAVE_AVX2 static void extract_7mers_rev_simd_128 (__m256i *out, __m256i current, __m256i next) { @@ -37913,1553 +26136,272 @@ return; } -#endif - - -static void -count_7mers_rev_simd (Count_T *counts, Inquery_T *inquery, __m128i current, __m128i next) { - __m128i oligo; -#ifdef HAVE_SSE4_1 - __m128i array; -#else - Genomecomp_T array[4]; -#endif -#if defined(HAVE_AVX2) && defined(CHECK_FOR_OVERFLOW) - __m128i _counts_after, _counts_neg; -#endif - - oligo = _mm_or_si128( _mm_srli_epi32(current,20), _mm_slli_epi32(next,12)); -#ifdef HAVE_SSE4_1 - array = _mm_and_si128( _mm_srli_epi32(oligo,10), mask7); -#else - _mm_store_si128((__m128i *) array, _mm_and_si128( _mm_srli_epi32(oligo,10), mask7)); -#endif -#ifdef HAVE_AVX2 - counts[EXTRACT(array,0)] += 1; - counts[EXTRACT(array,1)] += 1; - counts[EXTRACT(array,2)] += 1; - counts[EXTRACT(array,3)] += 1; -#ifdef CHECK_FOR_OVERFLOW - _counts_after = _mm_i32gather_epi32(counts,array,/*scale*/4); - _counts_neg = _mm_cmpgt_epi32(_counts_after,maxcount128); - if (_mm_testz_si128(_counts_neg,_counts_neg) == 0) { - debug(printf("Exceeded maximum counts\n")); - if (EXTRACT(_counts_neg,0)) { - inquery[EXTRACT(array,0)] = INQUERY_FALSE; - } - if (EXTRACT(_counts_neg,1)) { - inquery[EXTRACT(array,1)] = INQUERY_FALSE; - } - if (EXTRACT(_counts_neg,2)) { - inquery[EXTRACT(array,2)] = INQUERY_FALSE; - } - if (EXTRACT(_counts_neg,3)) { - inquery[EXTRACT(array,3)] = INQUERY_FALSE; - } - } -#endif /* CHECK_FOR_OVERFLOW */ - debug(printf("63 %04X => %d\n",array[0],counts[array[0]])); - debug(printf("47 %04X => %d\n",array[1],counts[array[1]])); - debug(printf("31 %04X => %d\n",array[2],counts[array[2]])); - debug(printf("15 %04X => %d\n",array[3],counts[array[3]])); - -#else - INCR_COUNT(counts[EXTRACT(array,0)],inquery[EXTRACT(array,0)]); /* 63 */ - INCR_COUNT(counts[EXTRACT(array,1)],inquery[EXTRACT(array,1)]); /* 47 */ - INCR_COUNT(counts[EXTRACT(array,2)],inquery[EXTRACT(array,2)]); /* 31 */ - INCR_COUNT(counts[EXTRACT(array,3)],inquery[EXTRACT(array,3)]); /* 15 */ -#endif - debug(printf("63 %04X => %d\n",EXTRACT(array,0),counts[EXTRACT(array,0)])); - debug(printf("47 %04X => %d\n",EXTRACT(array,1),counts[EXTRACT(array,1)])); - debug(printf("31 %04X => %d\n",EXTRACT(array,2),counts[EXTRACT(array,2)])); - debug(printf("15 %04X => %d\n",EXTRACT(array,3),counts[EXTRACT(array,3)])); - -#ifdef HAVE_SSE4_1 - array = _mm_and_si128( _mm_srli_epi32(oligo,8), mask7); -#else - _mm_store_si128((__m128i *) array, _mm_and_si128( _mm_srli_epi32(oligo,8), mask7)); -#endif -#ifdef HAVE_AVX2 - counts[EXTRACT(array,0)] += 1; - counts[EXTRACT(array,1)] += 1; - counts[EXTRACT(array,2)] += 1; - counts[EXTRACT(array,3)] += 1; -#ifdef CHECK_FOR_OVERFLOW - _counts_after = _mm_i32gather_epi32(counts,array,/*scale*/4); - _counts_neg = _mm_cmpgt_epi32(_counts_after,maxcount128); - if (_mm_testz_si128(_counts_neg,_counts_neg) == 0) { - debug(printf("Exceeded maximum counts\n")); - if (EXTRACT(_counts_neg,0)) { - inquery[EXTRACT(array,0)] = INQUERY_FALSE; - } - if (EXTRACT(_counts_neg,1)) { - inquery[EXTRACT(array,1)] = INQUERY_FALSE; - } - if (EXTRACT(_counts_neg,2)) { - inquery[EXTRACT(array,2)] = INQUERY_FALSE; - } - if (EXTRACT(_counts_neg,3)) { - inquery[EXTRACT(array,3)] = INQUERY_FALSE; - } - } -#endif /* CHECK_FOR_OVERFLOW */ - debug(printf("62 %04X => %d\n",array[0],counts[array[0]])); - debug(printf("46 %04X => %d\n",array[1],counts[array[1]])); - debug(printf("30 %04X => %d\n",array[2],counts[array[2]])); - debug(printf("14 %04X => %d\n",array[3],counts[array[3]])); - -#else - INCR_COUNT(counts[EXTRACT(array,0)],inquery[EXTRACT(array,0)]); /* 62 */ - INCR_COUNT(counts[EXTRACT(array,1)],inquery[EXTRACT(array,1)]); /* 46 */ - INCR_COUNT(counts[EXTRACT(array,2)],inquery[EXTRACT(array,2)]); /* 30 */ - INCR_COUNT(counts[EXTRACT(array,3)],inquery[EXTRACT(array,3)]); /* 14 */ -#endif - debug(printf("62 %04X => %d\n",EXTRACT(array,0),counts[EXTRACT(array,0)])); - debug(printf("46 %04X => %d\n",EXTRACT(array,1),counts[EXTRACT(array,1)])); - debug(printf("30 %04X => %d\n",EXTRACT(array,2),counts[EXTRACT(array,2)])); - debug(printf("14 %04X => %d\n",EXTRACT(array,3),counts[EXTRACT(array,3)])); -#ifdef HAVE_SSE4_1 - array = _mm_and_si128( _mm_srli_epi32(oligo,6), mask7); -#else - _mm_store_si128((__m128i *) array, _mm_and_si128( _mm_srli_epi32(oligo,6), mask7)); -#endif -#ifdef HAVE_AVX2 - counts[EXTRACT(array,0)] += 1; - counts[EXTRACT(array,1)] += 1; - counts[EXTRACT(array,2)] += 1; - counts[EXTRACT(array,3)] += 1; -#ifdef CHECK_FOR_OVERFLOW - _counts_after = _mm_i32gather_epi32(counts,array,/*scale*/4); - _counts_neg = _mm_cmpgt_epi32(_counts_after,maxcount128); - if (_mm_testz_si128(_counts_neg,_counts_neg) == 0) { - debug(printf("Exceeded maximum counts\n")); - if (EXTRACT(_counts_neg,0)) { - inquery[EXTRACT(array,0)] = INQUERY_FALSE; - } - if (EXTRACT(_counts_neg,1)) { - inquery[EXTRACT(array,1)] = INQUERY_FALSE; - } - if (EXTRACT(_counts_neg,2)) { - inquery[EXTRACT(array,2)] = INQUERY_FALSE; - } - if (EXTRACT(_counts_neg,3)) { - inquery[EXTRACT(array,3)] = INQUERY_FALSE; - } - } -#endif /* CHECK_FOR_OVERFLOW */ - debug(printf("61 %04X => %d\n",array[0],counts[array[0]])); - debug(printf("45 %04X => %d\n",array[1],counts[array[1]])); - debug(printf("29 %04X => %d\n",array[2],counts[array[2]])); - debug(printf("13 %04X => %d\n",array[3],counts[array[3]])); - -#else - INCR_COUNT(counts[EXTRACT(array,0)],inquery[EXTRACT(array,0)]); /* 61 */ - INCR_COUNT(counts[EXTRACT(array,1)],inquery[EXTRACT(array,1)]); /* 45 */ - INCR_COUNT(counts[EXTRACT(array,2)],inquery[EXTRACT(array,2)]); /* 29 */ - INCR_COUNT(counts[EXTRACT(array,3)],inquery[EXTRACT(array,3)]); /* 13 */ -#endif - debug(printf("61 %04X => %d\n",EXTRACT(array,0),counts[EXTRACT(array,0)])); - debug(printf("45 %04X => %d\n",EXTRACT(array,1),counts[EXTRACT(array,1)])); - debug(printf("29 %04X => %d\n",EXTRACT(array,2),counts[EXTRACT(array,2)])); - debug(printf("13 %04X => %d\n",EXTRACT(array,3),counts[EXTRACT(array,3)])); - -#ifdef HAVE_SSE4_1 - array = _mm_and_si128( _mm_srli_epi32(oligo,4), mask7); -#else - _mm_store_si128((__m128i *) array, _mm_and_si128( _mm_srli_epi32(oligo,4), mask7)); -#endif -#ifdef HAVE_AVX2 - counts[EXTRACT(array,0)] += 1; - counts[EXTRACT(array,1)] += 1; - counts[EXTRACT(array,2)] += 1; - counts[EXTRACT(array,3)] += 1; -#ifdef CHECK_FOR_OVERFLOW - _counts_after = _mm_i32gather_epi32(counts,array,/*scale*/4); - _counts_neg = _mm_cmpgt_epi32(_counts_after,maxcount128); - if (_mm_testz_si128(_counts_neg,_counts_neg) == 0) { - debug(printf("Exceeded maximum counts\n")); - if (EXTRACT(_counts_neg,0)) { - inquery[EXTRACT(array,0)] = INQUERY_FALSE; - } - if (EXTRACT(_counts_neg,1)) { - inquery[EXTRACT(array,1)] = INQUERY_FALSE; - } - if (EXTRACT(_counts_neg,2)) { - inquery[EXTRACT(array,2)] = INQUERY_FALSE; - } - if (EXTRACT(_counts_neg,3)) { - inquery[EXTRACT(array,3)] = INQUERY_FALSE; - } - } -#endif /* CHECK_FOR_OVERFLOW */ - debug(printf("60 %04X => %d\n",array[0],counts[array[0]])); - debug(printf("44 %04X => %d\n",array[1],counts[array[1]])); - debug(printf("28 %04X => %d\n",array[2],counts[array[2]])); - debug(printf("12 %04X => %d\n",array[3],counts[array[3]])); - -#else - INCR_COUNT(counts[EXTRACT(array,0)],inquery[EXTRACT(array,0)]); /* 60 */ - INCR_COUNT(counts[EXTRACT(array,1)],inquery[EXTRACT(array,1)]); /* 44 */ - INCR_COUNT(counts[EXTRACT(array,2)],inquery[EXTRACT(array,2)]); /* 28 */ - INCR_COUNT(counts[EXTRACT(array,3)],inquery[EXTRACT(array,3)]); /* 12 */ -#endif - debug(printf("60 %04X => %d\n",EXTRACT(array,0),counts[EXTRACT(array,0)])); - debug(printf("44 %04X => %d\n",EXTRACT(array,1),counts[EXTRACT(array,1)])); - debug(printf("28 %04X => %d\n",EXTRACT(array,2),counts[EXTRACT(array,2)])); - debug(printf("12 %04X => %d\n",EXTRACT(array,3),counts[EXTRACT(array,3)])); - -#ifdef HAVE_SSE4_1 - array = _mm_and_si128( _mm_srli_epi32(oligo,2), mask7); -#else - _mm_store_si128((__m128i *) array, _mm_and_si128( _mm_srli_epi32(oligo,2), mask7)); -#endif -#ifdef HAVE_AVX2 - counts[EXTRACT(array,0)] += 1; - counts[EXTRACT(array,1)] += 1; - counts[EXTRACT(array,2)] += 1; - counts[EXTRACT(array,3)] += 1; -#ifdef CHECK_FOR_OVERFLOW - _counts_after = _mm_i32gather_epi32(counts,array,/*scale*/4); - _counts_neg = _mm_cmpgt_epi32(_counts_after,maxcount128); - if (_mm_testz_si128(_counts_neg,_counts_neg) == 0) { - debug(printf("Exceeded maximum counts\n")); - if (EXTRACT(_counts_neg,0)) { - inquery[EXTRACT(array,0)] = INQUERY_FALSE; - } - if (EXTRACT(_counts_neg,1)) { - inquery[EXTRACT(array,1)] = INQUERY_FALSE; - } - if (EXTRACT(_counts_neg,2)) { - inquery[EXTRACT(array,2)] = INQUERY_FALSE; - } - if (EXTRACT(_counts_neg,3)) { - inquery[EXTRACT(array,3)] = INQUERY_FALSE; - } - } -#endif /* CHECK_FOR_OVERFLOW */ - debug(printf("59 %04X => %d\n",array[0],counts[array[0]])); - debug(printf("43 %04X => %d\n",array[1],counts[array[1]])); - debug(printf("27 %04X => %d\n",array[2],counts[array[2]])); - debug(printf("11 %04X => %d\n",array[3],counts[array[3]])); - -#else - INCR_COUNT(counts[EXTRACT(array,0)],inquery[EXTRACT(array,0)]); /* 59 */ - INCR_COUNT(counts[EXTRACT(array,1)],inquery[EXTRACT(array,1)]); /* 43 */ - INCR_COUNT(counts[EXTRACT(array,2)],inquery[EXTRACT(array,2)]); /* 27 */ - INCR_COUNT(counts[EXTRACT(array,3)],inquery[EXTRACT(array,3)]); /* 11 */ -#endif - debug(printf("59 %04X => %d\n",EXTRACT(array,0),counts[EXTRACT(array,0)])); - debug(printf("43 %04X => %d\n",EXTRACT(array,1),counts[EXTRACT(array,1)])); - debug(printf("27 %04X => %d\n",EXTRACT(array,2),counts[EXTRACT(array,2)])); - debug(printf("11 %04X => %d\n",EXTRACT(array,3),counts[EXTRACT(array,3)])); - -#ifdef HAVE_SSE4_1 - array = _mm_and_si128( oligo, mask7); -#else - _mm_store_si128((__m128i *) array, _mm_and_si128( oligo, mask7)); -#endif -#ifdef HAVE_AVX2 - counts[EXTRACT(array,0)] += 1; - counts[EXTRACT(array,1)] += 1; - counts[EXTRACT(array,2)] += 1; - counts[EXTRACT(array,3)] += 1; -#ifdef CHECK_FOR_OVERFLOW - _counts_after = _mm_i32gather_epi32(counts,array,/*scale*/4); - _counts_neg = _mm_cmpgt_epi32(_counts_after,maxcount128); - if (_mm_testz_si128(_counts_neg,_counts_neg) == 0) { - debug(printf("Exceeded maximum counts\n")); - if (EXTRACT(_counts_neg,0)) { - inquery[EXTRACT(array,0)] = INQUERY_FALSE; - } - if (EXTRACT(_counts_neg,1)) { - inquery[EXTRACT(array,1)] = INQUERY_FALSE; - } - if (EXTRACT(_counts_neg,2)) { - inquery[EXTRACT(array,2)] = INQUERY_FALSE; - } - if (EXTRACT(_counts_neg,3)) { - inquery[EXTRACT(array,3)] = INQUERY_FALSE; - } - } -#endif /* CHECK_FOR_OVERFLOW */ - debug(printf("58 %04X => %d\n",array[0],counts[array[0]])); - debug(printf("42 %04X => %d\n",array[1],counts[array[1]])); - debug(printf("26 %04X => %d\n",array[2],counts[array[2]])); - debug(printf("10 %04X => %d\n",array[3],counts[array[3]])); - -#else - INCR_COUNT(counts[EXTRACT(array,0)],inquery[EXTRACT(array,0)]); /* 58 */ - INCR_COUNT(counts[EXTRACT(array,1)],inquery[EXTRACT(array,1)]); /* 42 */ - INCR_COUNT(counts[EXTRACT(array,2)],inquery[EXTRACT(array,2)]); /* 26 */ - INCR_COUNT(counts[EXTRACT(array,3)],inquery[EXTRACT(array,3)]); /* 10 */ -#endif - debug(printf("58 %04X => %d\n",EXTRACT(array,0),counts[EXTRACT(array,0)])); - debug(printf("42 %04X => %d\n",EXTRACT(array,1),counts[EXTRACT(array,1)])); - debug(printf("26 %04X => %d\n",EXTRACT(array,2),counts[EXTRACT(array,2)])); - debug(printf("10 %04X => %d\n",EXTRACT(array,3),counts[EXTRACT(array,3)])); - - -#ifdef HAVE_SSE4_1 - array = _mm_srli_epi32(current,18); /* No mask necessary */ -#else - _mm_store_si128((__m128i *) array, _mm_srli_epi32(current,18)); /* No mask necessary */ -#endif -#ifdef HAVE_AVX2 - counts[EXTRACT(array,0)] += 1; - counts[EXTRACT(array,1)] += 1; - counts[EXTRACT(array,2)] += 1; - counts[EXTRACT(array,3)] += 1; -#ifdef CHECK_FOR_OVERFLOW - _counts_after = _mm_i32gather_epi32(counts,array,/*scale*/4); - _counts_neg = _mm_cmpgt_epi32(_counts_after,maxcount128); - if (_mm_testz_si128(_counts_neg,_counts_neg) == 0) { - debug(printf("Exceeded maximum counts\n")); - if (EXTRACT(_counts_neg,0)) { - inquery[EXTRACT(array,0)] = INQUERY_FALSE; - } - if (EXTRACT(_counts_neg,1)) { - inquery[EXTRACT(array,1)] = INQUERY_FALSE; - } - if (EXTRACT(_counts_neg,2)) { - inquery[EXTRACT(array,2)] = INQUERY_FALSE; - } - if (EXTRACT(_counts_neg,3)) { - inquery[EXTRACT(array,3)] = INQUERY_FALSE; - } - } -#endif /* CHECK_FOR_OVERFLOW */ - debug(printf("57 %04X => %d\n",array[0],counts[array[0]])); - debug(printf("41 %04X => %d\n",array[1],counts[array[1]])); - debug(printf("25 %04X => %d\n",array[2],counts[array[2]])); - debug(printf("9 %04X => %d\n",array[3],counts[array[3]])); - -#else - INCR_COUNT(counts[EXTRACT(array,0)],inquery[EXTRACT(array,0)]); /* 57 */ - INCR_COUNT(counts[EXTRACT(array,1)],inquery[EXTRACT(array,1)]); /* 41 */ - INCR_COUNT(counts[EXTRACT(array,2)],inquery[EXTRACT(array,2)]); /* 25 */ - INCR_COUNT(counts[EXTRACT(array,3)],inquery[EXTRACT(array,3)]); /* 9 */ -#endif - debug(printf("57 %04X => %d\n",EXTRACT(array,0),counts[EXTRACT(array,0)])); - debug(printf("41 %04X => %d\n",EXTRACT(array,1),counts[EXTRACT(array,1)])); - debug(printf("25 %04X => %d\n",EXTRACT(array,2),counts[EXTRACT(array,2)])); - debug(printf("9 %04X => %d\n",EXTRACT(array,3),counts[EXTRACT(array,3)])); - -#ifdef HAVE_SSE4_1 - array = _mm_and_si128( _mm_srli_epi32(current,16), mask7); -#else - _mm_store_si128((__m128i *) array, _mm_and_si128( _mm_srli_epi32(current,16), mask7)); -#endif -#ifdef HAVE_AVX2 - counts[EXTRACT(array,0)] += 1; - counts[EXTRACT(array,1)] += 1; - counts[EXTRACT(array,2)] += 1; - counts[EXTRACT(array,3)] += 1; -#ifdef CHECK_FOR_OVERFLOW - _counts_after = _mm_i32gather_epi32(counts,array,/*scale*/4); - _counts_neg = _mm_cmpgt_epi32(_counts_after,maxcount128); - if (_mm_testz_si128(_counts_neg,_counts_neg) == 0) { - debug(printf("Exceeded maximum counts\n")); - if (EXTRACT(_counts_neg,0)) { - inquery[EXTRACT(array,0)] = INQUERY_FALSE; - } - if (EXTRACT(_counts_neg,1)) { - inquery[EXTRACT(array,1)] = INQUERY_FALSE; - } - if (EXTRACT(_counts_neg,2)) { - inquery[EXTRACT(array,2)] = INQUERY_FALSE; - } - if (EXTRACT(_counts_neg,3)) { - inquery[EXTRACT(array,3)] = INQUERY_FALSE; - } - } -#endif /* CHECK_FOR_OVERFLOW */ - debug(printf("56 %04X => %d\n",array[0],counts[array[0]])); - debug(printf("40 %04X => %d\n",array[1],counts[array[1]])); - debug(printf("24 %04X => %d\n",array[2],counts[array[2]])); - debug(printf("8 %04X => %d\n",array[3],counts[array[3]])); - -#else - INCR_COUNT(counts[EXTRACT(array,0)],inquery[EXTRACT(array,0)]); /* 56 */ - INCR_COUNT(counts[EXTRACT(array,1)],inquery[EXTRACT(array,1)]); /* 40 */ - INCR_COUNT(counts[EXTRACT(array,2)],inquery[EXTRACT(array,2)]); /* 24 */ - INCR_COUNT(counts[EXTRACT(array,3)],inquery[EXTRACT(array,3)]); /* 8 */ -#endif - debug(printf("56 %04X => %d\n",EXTRACT(array,0),counts[EXTRACT(array,0)])); - debug(printf("40 %04X => %d\n",EXTRACT(array,1),counts[EXTRACT(array,1)])); - debug(printf("24 %04X => %d\n",EXTRACT(array,2),counts[EXTRACT(array,2)])); - debug(printf("8 %04X => %d\n",EXTRACT(array,3),counts[EXTRACT(array,3)])); - -#ifdef HAVE_SSE4_1 - array = _mm_and_si128( _mm_srli_epi32(current,14), mask7); -#else - _mm_store_si128((__m128i *) array, _mm_and_si128( _mm_srli_epi32(current,14), mask7)); -#endif -#ifdef HAVE_AVX2 - counts[EXTRACT(array,0)] += 1; - counts[EXTRACT(array,1)] += 1; - counts[EXTRACT(array,2)] += 1; - counts[EXTRACT(array,3)] += 1; -#ifdef CHECK_FOR_OVERFLOW - _counts_after = _mm_i32gather_epi32(counts,array,/*scale*/4); - _counts_neg = _mm_cmpgt_epi32(_counts_after,maxcount128); - if (_mm_testz_si128(_counts_neg,_counts_neg) == 0) { - debug(printf("Exceeded maximum counts\n")); - if (EXTRACT(_counts_neg,0)) { - inquery[EXTRACT(array,0)] = INQUERY_FALSE; - } - if (EXTRACT(_counts_neg,1)) { - inquery[EXTRACT(array,1)] = INQUERY_FALSE; - } - if (EXTRACT(_counts_neg,2)) { - inquery[EXTRACT(array,2)] = INQUERY_FALSE; - } - if (EXTRACT(_counts_neg,3)) { - inquery[EXTRACT(array,3)] = INQUERY_FALSE; - } - } -#endif /* CHECK_FOR_OVERFLOW */ - debug(printf("55 %04X => %d\n",array[0],counts[array[0]])); - debug(printf("39 %04X => %d\n",array[1],counts[array[1]])); - debug(printf("23 %04X => %d\n",array[2],counts[array[2]])); - debug(printf("7 %04X => %d\n",array[3],counts[array[3]])); - -#else - INCR_COUNT(counts[EXTRACT(array,0)],inquery[EXTRACT(array,0)]); /* 55 */ - INCR_COUNT(counts[EXTRACT(array,1)],inquery[EXTRACT(array,1)]); /* 39 */ - INCR_COUNT(counts[EXTRACT(array,2)],inquery[EXTRACT(array,2)]); /* 23 */ - INCR_COUNT(counts[EXTRACT(array,3)],inquery[EXTRACT(array,3)]); /* 7 */ -#endif - debug(printf("55 %04X => %d\n",EXTRACT(array,0),counts[EXTRACT(array,0)])); - debug(printf("39 %04X => %d\n",EXTRACT(array,1),counts[EXTRACT(array,1)])); - debug(printf("23 %04X => %d\n",EXTRACT(array,2),counts[EXTRACT(array,2)])); - debug(printf("7 %04X => %d\n",EXTRACT(array,3),counts[EXTRACT(array,3)])); - -#ifdef HAVE_SSE4_1 - array = _mm_and_si128( _mm_srli_epi32(current,12), mask7); -#else - _mm_store_si128((__m128i *) array, _mm_and_si128( _mm_srli_epi32(current,12), mask7)); -#endif -#ifdef HAVE_AVX2 - counts[EXTRACT(array,0)] += 1; - counts[EXTRACT(array,1)] += 1; - counts[EXTRACT(array,2)] += 1; - counts[EXTRACT(array,3)] += 1; -#ifdef CHECK_FOR_OVERFLOW - _counts_after = _mm_i32gather_epi32(counts,array,/*scale*/4); - _counts_neg = _mm_cmpgt_epi32(_counts_after,maxcount128); - if (_mm_testz_si128(_counts_neg,_counts_neg) == 0) { - debug(printf("Exceeded maximum counts\n")); - if (EXTRACT(_counts_neg,0)) { - inquery[EXTRACT(array,0)] = INQUERY_FALSE; - } - if (EXTRACT(_counts_neg,1)) { - inquery[EXTRACT(array,1)] = INQUERY_FALSE; - } - if (EXTRACT(_counts_neg,2)) { - inquery[EXTRACT(array,2)] = INQUERY_FALSE; - } - if (EXTRACT(_counts_neg,3)) { - inquery[EXTRACT(array,3)] = INQUERY_FALSE; - } - } -#endif /* CHECK_FOR_OVERFLOW */ - debug(printf("54 %04X => %d\n",array[0],counts[array[0]])); - debug(printf("38 %04X => %d\n",array[1],counts[array[1]])); - debug(printf("22 %04X => %d\n",array[2],counts[array[2]])); - debug(printf("6 %04X => %d\n",array[3],counts[array[3]])); - -#else - INCR_COUNT(counts[EXTRACT(array,0)],inquery[EXTRACT(array,0)]); /* 54 */ - INCR_COUNT(counts[EXTRACT(array,1)],inquery[EXTRACT(array,1)]); /* 38 */ - INCR_COUNT(counts[EXTRACT(array,2)],inquery[EXTRACT(array,2)]); /* 22 */ - INCR_COUNT(counts[EXTRACT(array,3)],inquery[EXTRACT(array,3)]); /* 6 */ -#endif - debug(printf("54 %04X => %d\n",EXTRACT(array,0),counts[EXTRACT(array,0)])); - debug(printf("38 %04X => %d\n",EXTRACT(array,1),counts[EXTRACT(array,1)])); - debug(printf("22 %04X => %d\n",EXTRACT(array,2),counts[EXTRACT(array,2)])); - debug(printf("6 %04X => %d\n",EXTRACT(array,3),counts[EXTRACT(array,3)])); - -#ifdef HAVE_SSE4_1 - array = _mm_and_si128( _mm_srli_epi32(current,10), mask7); -#else - _mm_store_si128((__m128i *) array, _mm_and_si128( _mm_srli_epi32(current,10), mask7)); -#endif -#ifdef HAVE_AVX2 - counts[EXTRACT(array,0)] += 1; - counts[EXTRACT(array,1)] += 1; - counts[EXTRACT(array,2)] += 1; - counts[EXTRACT(array,3)] += 1; -#ifdef CHECK_FOR_OVERFLOW - _counts_after = _mm_i32gather_epi32(counts,array,/*scale*/4); - _counts_neg = _mm_cmpgt_epi32(_counts_after,maxcount128); - if (_mm_testz_si128(_counts_neg,_counts_neg) == 0) { - debug(printf("Exceeded maximum counts\n")); - if (EXTRACT(_counts_neg,0)) { - inquery[EXTRACT(array,0)] = INQUERY_FALSE; - } - if (EXTRACT(_counts_neg,1)) { - inquery[EXTRACT(array,1)] = INQUERY_FALSE; - } - if (EXTRACT(_counts_neg,2)) { - inquery[EXTRACT(array,2)] = INQUERY_FALSE; - } - if (EXTRACT(_counts_neg,3)) { - inquery[EXTRACT(array,3)] = INQUERY_FALSE; - } - } -#endif /* CHECK_FOR_OVERFLOW */ - debug(printf("53 %04X => %d\n",array[0],counts[array[0]])); - debug(printf("37 %04X => %d\n",array[1],counts[array[1]])); - debug(printf("21 %04X => %d\n",array[2],counts[array[2]])); - debug(printf("5 %04X => %d\n",array[3],counts[array[3]])); - -#else - INCR_COUNT(counts[EXTRACT(array,0)],inquery[EXTRACT(array,0)]); /* 53 */ - INCR_COUNT(counts[EXTRACT(array,1)],inquery[EXTRACT(array,1)]); /* 37 */ - INCR_COUNT(counts[EXTRACT(array,2)],inquery[EXTRACT(array,2)]); /* 21 */ - INCR_COUNT(counts[EXTRACT(array,3)],inquery[EXTRACT(array,3)]); /* 5 */ -#endif - debug(printf("53 %04X => %d\n",EXTRACT(array,0),counts[EXTRACT(array,0)])); - debug(printf("37 %04X => %d\n",EXTRACT(array,1),counts[EXTRACT(array,1)])); - debug(printf("21 %04X => %d\n",EXTRACT(array,2),counts[EXTRACT(array,2)])); - debug(printf("5 %04X => %d\n",EXTRACT(array,3),counts[EXTRACT(array,3)])); +#ifdef USE_UNORDERED_7 +static Chrpos_T +store_7mers_rev_simd_128 (Chrpos_T chrpos, Chrpos_T *table, UINT4 *positions, Count_T *counts, + __m256i current, __m256i next) { + __m256i array[16]; + + extract_7mers_rev_simd_128(array,current,next); + return store_fwdrev_simd_128(chrpos,table,positions,counts,(UINT4 *) array); +} -#ifdef HAVE_SSE4_1 - array = _mm_and_si128( _mm_srli_epi32(current,8), mask7); #else - _mm_store_si128((__m128i *) array, _mm_and_si128( _mm_srli_epi32(current,8), mask7)); -#endif -#ifdef HAVE_AVX2 - counts[EXTRACT(array,0)] += 1; - counts[EXTRACT(array,1)] += 1; - counts[EXTRACT(array,2)] += 1; - counts[EXTRACT(array,3)] += 1; -#ifdef CHECK_FOR_OVERFLOW - _counts_after = _mm_i32gather_epi32(counts,array,/*scale*/4); - _counts_neg = _mm_cmpgt_epi32(_counts_after,maxcount128); - if (_mm_testz_si128(_counts_neg,_counts_neg) == 0) { - debug(printf("Exceeded maximum counts\n")); - if (EXTRACT(_counts_neg,0)) { - inquery[EXTRACT(array,0)] = INQUERY_FALSE; - } - if (EXTRACT(_counts_neg,1)) { - inquery[EXTRACT(array,1)] = INQUERY_FALSE; - } - if (EXTRACT(_counts_neg,2)) { - inquery[EXTRACT(array,2)] = INQUERY_FALSE; - } - if (EXTRACT(_counts_neg,3)) { - inquery[EXTRACT(array,3)] = INQUERY_FALSE; - } - } -#endif /* CHECK_FOR_OVERFLOW */ - debug(printf("52 %04X => %d\n",array[0],counts[array[0]])); - debug(printf("36 %04X => %d\n",array[1],counts[array[1]])); - debug(printf("20 %04X => %d\n",array[2],counts[array[2]])); - debug(printf("4 %04X => %d\n",array[3],counts[array[3]])); - -#else - INCR_COUNT(counts[EXTRACT(array,0)],inquery[EXTRACT(array,0)]); /* 52 */ - INCR_COUNT(counts[EXTRACT(array,1)],inquery[EXTRACT(array,1)]); /* 36 */ - INCR_COUNT(counts[EXTRACT(array,2)],inquery[EXTRACT(array,2)]); /* 20 */ - INCR_COUNT(counts[EXTRACT(array,3)],inquery[EXTRACT(array,3)]); /* 4 */ -#endif - debug(printf("52 %04X => %d\n",EXTRACT(array,0),counts[EXTRACT(array,0)])); - debug(printf("36 %04X => %d\n",EXTRACT(array,1),counts[EXTRACT(array,1)])); - debug(printf("20 %04X => %d\n",EXTRACT(array,2),counts[EXTRACT(array,2)])); - debug(printf("4 %04X => %d\n",EXTRACT(array,3),counts[EXTRACT(array,3)])); +/* Includes extract_7mers_rev_simd_128_ordered (__m256i *out, __m256i current, __m256i next) */ +static Chrpos_T +store_7mers_rev_simd_128 (Chrpos_T chrpos, Chrpos_T *table, UINT4 *positions, Count_T *counts, + __m256i current, __m256i next) { + __m256i array[16], *out; + __m256i oligo; + __m256i _t0, _t1, _t2, _t3, _t4, _t5, _t6, _t7; + __m256i _u0, _u1, _u2, _u3, _u4, _u5, _u6, _u7; -#ifdef HAVE_SSE4_1 - array = _mm_and_si128( _mm_srli_epi32(current,6), mask7); -#else - _mm_store_si128((__m128i *) array, _mm_and_si128( _mm_srli_epi32(current,6), mask7)); -#endif -#ifdef HAVE_AVX2 - counts[EXTRACT(array,0)] += 1; - counts[EXTRACT(array,1)] += 1; - counts[EXTRACT(array,2)] += 1; - counts[EXTRACT(array,3)] += 1; -#ifdef CHECK_FOR_OVERFLOW - _counts_after = _mm_i32gather_epi32(counts,array,/*scale*/4); - _counts_neg = _mm_cmpgt_epi32(_counts_after,maxcount128); - if (_mm_testz_si128(_counts_neg,_counts_neg) == 0) { - debug(printf("Exceeded maximum counts\n")); - if (EXTRACT(_counts_neg,0)) { - inquery[EXTRACT(array,0)] = INQUERY_FALSE; - } - if (EXTRACT(_counts_neg,1)) { - inquery[EXTRACT(array,1)] = INQUERY_FALSE; - } - if (EXTRACT(_counts_neg,2)) { - inquery[EXTRACT(array,2)] = INQUERY_FALSE; - } - if (EXTRACT(_counts_neg,3)) { - inquery[EXTRACT(array,3)] = INQUERY_FALSE; - } - } -#endif /* CHECK_FOR_OVERFLOW */ - debug(printf("51 %04X => %d\n",array[0],counts[array[0]])); - debug(printf("35 %04X => %d\n",array[1],counts[array[1]])); - debug(printf("19 %04X => %d\n",array[2],counts[array[2]])); - debug(printf("3 %04X => %d\n",array[3],counts[array[3]])); - -#else - INCR_COUNT(counts[EXTRACT(array,0)],inquery[EXTRACT(array,0)]); /* 51 */ - INCR_COUNT(counts[EXTRACT(array,1)],inquery[EXTRACT(array,1)]); /* 35 */ - INCR_COUNT(counts[EXTRACT(array,2)],inquery[EXTRACT(array,2)]); /* 19 */ - INCR_COUNT(counts[EXTRACT(array,3)],inquery[EXTRACT(array,3)]); /* 3 */ -#endif - debug(printf("51 %04X => %d\n",EXTRACT(array,0),counts[EXTRACT(array,0)])); - debug(printf("35 %04X => %d\n",EXTRACT(array,1),counts[EXTRACT(array,1)])); - debug(printf("19 %04X => %d\n",EXTRACT(array,2),counts[EXTRACT(array,2)])); - debug(printf("3 %04X => %d\n",EXTRACT(array,3),counts[EXTRACT(array,3)])); + out = &(array[0]); -#ifdef HAVE_SSE4_1 - array = _mm_and_si128( _mm_srli_epi32(current,4), mask7); -#else - _mm_store_si128((__m128i *) array, _mm_and_si128( _mm_srli_epi32(current,4), mask7)); -#endif -#ifdef HAVE_AVX2 - counts[EXTRACT(array,0)] += 1; - counts[EXTRACT(array,1)] += 1; - counts[EXTRACT(array,2)] += 1; - counts[EXTRACT(array,3)] += 1; -#ifdef CHECK_FOR_OVERFLOW - _counts_after = _mm_i32gather_epi32(counts,array,/*scale*/4); - _counts_neg = _mm_cmpgt_epi32(_counts_after,maxcount128); - if (_mm_testz_si128(_counts_neg,_counts_neg) == 0) { - debug(printf("Exceeded maximum counts\n")); - if (EXTRACT(_counts_neg,0)) { - inquery[EXTRACT(array,0)] = INQUERY_FALSE; - } - if (EXTRACT(_counts_neg,1)) { - inquery[EXTRACT(array,1)] = INQUERY_FALSE; - } - if (EXTRACT(_counts_neg,2)) { - inquery[EXTRACT(array,2)] = INQUERY_FALSE; - } - if (EXTRACT(_counts_neg,3)) { - inquery[EXTRACT(array,3)] = INQUERY_FALSE; - } - } -#endif /* CHECK_FOR_OVERFLOW */ - debug(printf("50 %04X => %d\n",array[0],counts[array[0]])); - debug(printf("34 %04X => %d\n",array[1],counts[array[1]])); - debug(printf("18 %04X => %d\n",array[2],counts[array[2]])); - debug(printf("2 %04X => %d\n",array[3],counts[array[3]])); - -#else - INCR_COUNT(counts[EXTRACT(array,0)],inquery[EXTRACT(array,0)]); /* 50 */ - INCR_COUNT(counts[EXTRACT(array,1)],inquery[EXTRACT(array,1)]); /* 34 */ - INCR_COUNT(counts[EXTRACT(array,2)],inquery[EXTRACT(array,2)]); /* 18 */ - INCR_COUNT(counts[EXTRACT(array,3)],inquery[EXTRACT(array,3)]); /* 2 */ -#endif - debug(printf("50 %04X => %d\n",EXTRACT(array,0),counts[EXTRACT(array,0)])); - debug(printf("34 %04X => %d\n",EXTRACT(array,1),counts[EXTRACT(array,1)])); - debug(printf("18 %04X => %d\n",EXTRACT(array,2),counts[EXTRACT(array,2)])); - debug(printf("2 %04X => %d\n",EXTRACT(array,3),counts[EXTRACT(array,3)])); + /* _row0 = _mm256_and_si256( current, bigmask7); */ + /* _row1 = _mm256_and_si256( _mm256_srli_epi32(current,2), bigmask7); */ + _t0 = _mm256_and_si256(_mm256_blend_epi16(_mm256_slli_epi32(current,14), current, 0x55), bigmask7_epi16); + + /* _row2 = _mm256_and_si256( _mm256_srli_epi32(current,4), bigmask7); */ + /* _row3 = _mm256_and_si256( _mm256_srli_epi32(current,6), bigmask7); */ + _t1 = _mm256_and_si256(_mm256_blend_epi16(_mm256_slli_epi32(current,10), _mm256_srli_epi32(current,4), 0x55), bigmask7_epi16); + + /* _row4 = _mm256_and_si256( _mm256_srli_epi32(current,8), bigmask7); */ + /* _row5 = _mm256_and_si256( _mm256_srli_epi32(current,10), bigmask7); */ + _t2 = _mm256_and_si256(_mm256_blend_epi16(_mm256_slli_epi32(current,6), _mm256_srli_epi32(current,8), 0x55), bigmask7_epi16); + + /* _row6 = _mm256_and_si256( _mm256_srli_epi32(current,12), bigmask7); */ + /* _row7 = _mm256_and_si256( _mm256_srli_epi32(current,14), bigmask7); */ + _t3 = _mm256_and_si256(_mm256_blend_epi16(_mm256_slli_epi32(current,2), _mm256_srli_epi32(current,12), 0x55), bigmask7_epi16); + + /* _row8 = _mm256_and_si256( _mm256_srli_epi32(current,16), bigmask7); */ + /* _row9 = _mm256_srli_epi32(current,18); */ /* No mask necessary */ + _t4 = _mm256_and_si256(_mm256_blend_epi16(_mm256_srli_epi32(current,2), _mm256_srli_epi32(current,16), 0x55), bigmask7_epi16); -#ifdef HAVE_SSE4_1 - array = _mm_and_si128( _mm_srli_epi32(current,2), mask7); -#else - _mm_store_si128((__m128i *) array, _mm_and_si128( _mm_srli_epi32(current,2), mask7)); -#endif -#ifdef HAVE_AVX2 - counts[EXTRACT(array,0)] += 1; - counts[EXTRACT(array,1)] += 1; - counts[EXTRACT(array,2)] += 1; - counts[EXTRACT(array,3)] += 1; -#ifdef CHECK_FOR_OVERFLOW - _counts_after = _mm_i32gather_epi32(counts,array,/*scale*/4); - _counts_neg = _mm_cmpgt_epi32(_counts_after,maxcount128); - if (_mm_testz_si128(_counts_neg,_counts_neg) == 0) { - debug(printf("Exceeded maximum counts\n")); - if (EXTRACT(_counts_neg,0)) { - inquery[EXTRACT(array,0)] = INQUERY_FALSE; - } - if (EXTRACT(_counts_neg,1)) { - inquery[EXTRACT(array,1)] = INQUERY_FALSE; - } - if (EXTRACT(_counts_neg,2)) { - inquery[EXTRACT(array,2)] = INQUERY_FALSE; - } - if (EXTRACT(_counts_neg,3)) { - inquery[EXTRACT(array,3)] = INQUERY_FALSE; - } - } -#endif /* CHECK_FOR_OVERFLOW */ - debug(printf("49 %04X => %d\n",array[0],counts[array[0]])); - debug(printf("33 %04X => %d\n",array[1],counts[array[1]])); - debug(printf("17 %04X => %d\n",array[2],counts[array[2]])); - debug(printf("1 %04X => %d\n",array[3],counts[array[3]])); - -#else - INCR_COUNT(counts[EXTRACT(array,0)],inquery[EXTRACT(array,0)]); /* 49 */ - INCR_COUNT(counts[EXTRACT(array,1)],inquery[EXTRACT(array,1)]); /* 33 */ - INCR_COUNT(counts[EXTRACT(array,2)],inquery[EXTRACT(array,2)]); /* 17 */ - INCR_COUNT(counts[EXTRACT(array,3)],inquery[EXTRACT(array,3)]); /* 1 */ -#endif - debug(printf("49 %04X => %d\n",EXTRACT(array,0),counts[EXTRACT(array,0)])); - debug(printf("33 %04X => %d\n",EXTRACT(array,1),counts[EXTRACT(array,1)])); - debug(printf("17 %04X => %d\n",EXTRACT(array,2),counts[EXTRACT(array,2)])); - debug(printf("1 %04X => %d\n",EXTRACT(array,3),counts[EXTRACT(array,3)])); -#ifdef HAVE_SSE4_1 - array = _mm_and_si128( current, mask7); -#else - _mm_store_si128((__m128i *) array, _mm_and_si128( current, mask7)); -#endif -#ifdef HAVE_AVX2 - counts[EXTRACT(array,0)] += 1; - counts[EXTRACT(array,1)] += 1; - counts[EXTRACT(array,2)] += 1; - counts[EXTRACT(array,3)] += 1; -#ifdef CHECK_FOR_OVERFLOW - _counts_after = _mm_i32gather_epi32(counts,array,/*scale*/4); - _counts_neg = _mm_cmpgt_epi32(_counts_after,maxcount128); - if (_mm_testz_si128(_counts_neg,_counts_neg) == 0) { - debug(printf("Exceeded maximum counts\n")); - if (EXTRACT(_counts_neg,0)) { - inquery[EXTRACT(array,0)] = INQUERY_FALSE; - } - if (EXTRACT(_counts_neg,1)) { - inquery[EXTRACT(array,1)] = INQUERY_FALSE; - } - if (EXTRACT(_counts_neg,2)) { - inquery[EXTRACT(array,2)] = INQUERY_FALSE; - } - if (EXTRACT(_counts_neg,3)) { - inquery[EXTRACT(array,3)] = INQUERY_FALSE; - } - } -#endif /* CHECK_FOR_OVERFLOW */ - debug(printf("48 %04X => %d\n",array[0],counts[array[0]])); - debug(printf("32 %04X => %d\n",array[1],counts[array[1]])); - debug(printf("16 %04X => %d\n",array[2],counts[array[2]])); - debug(printf("0 %04X => %d\n",array[3],counts[array[3]])); - -#else - INCR_COUNT(counts[EXTRACT(array,0)],inquery[EXTRACT(array,0)]); /* 48 */ - INCR_COUNT(counts[EXTRACT(array,1)],inquery[EXTRACT(array,1)]); /* 32 */ - INCR_COUNT(counts[EXTRACT(array,2)],inquery[EXTRACT(array,2)]); /* 16 */ - INCR_COUNT(counts[EXTRACT(array,3)],inquery[EXTRACT(array,3)]); /* 0 */ -#endif - debug(printf("48 %04X => %d\n",EXTRACT(array,0),counts[EXTRACT(array,0)])); - debug(printf("32 %04X => %d\n",EXTRACT(array,1),counts[EXTRACT(array,1)])); - debug(printf("16 %04X => %d\n",EXTRACT(array,2),counts[EXTRACT(array,2)])); - debug(printf("0 %04X => %d\n",EXTRACT(array,3),counts[EXTRACT(array,3)])); + oligo = _mm256_or_si256( _mm256_srli_epi32(current,20), _mm256_slli_epi32(next,12)); + /* _row10 = _mm256_and_si256( oligo, bigmask7); */ + /* _row11 = _mm256_and_si256( _mm256_srli_epi32(oligo,2), bigmask7); */ + _t5 = _mm256_and_si256(_mm256_blend_epi16(_mm256_slli_epi32(oligo,14), oligo, 0x55), bigmask7_epi16); + + /* _row12 = _mm256_and_si256( _mm256_srli_epi32(oligo,4), bigmask7); */ + /* _row13 = _mm256_and_si256( _mm256_srli_epi32(oligo,6), bigmask7); */ + _t6 = _mm256_and_si256(_mm256_blend_epi16(_mm256_slli_epi32(oligo,10), _mm256_srli_epi32(oligo,4), 0x55), bigmask7_epi16); + + /* _row14 = _mm256_and_si256( _mm256_srli_epi32(oligo,8), bigmask7); */ + /* _row15 = _mm256_and_si256( _mm256_srli_epi32(oligo,10), bigmask7); */ + _t7 = _mm256_and_si256(_mm256_blend_epi16(_mm256_slli_epi32(oligo,6), _mm256_srli_epi32(oligo,8), 0x55), bigmask7_epi16); + + + _u0 = _mm256_unpackhi_epi32(_t0,_t1); + _u1 = _mm256_unpackhi_epi32(_t2,_t3); + _u2 = _mm256_unpackhi_epi32(_t4,_t5); + _u3 = _mm256_unpackhi_epi32(_t6,_t7); + _u4 = _mm256_unpacklo_epi32(_t0,_t1); + _u5 = _mm256_unpacklo_epi32(_t2,_t3); + _u6 = _mm256_unpacklo_epi32(_t4,_t5); + _u7 = _mm256_unpacklo_epi32(_t6,_t7); + + + _t0 = _mm256_unpackhi_epi64(_u0,_u1); + _t1 = _mm256_unpackhi_epi64(_u2,_u3); + _t2 = _mm256_unpacklo_epi64(_u0,_u1); + _t3 = _mm256_unpacklo_epi64(_u2,_u3); + _t4 = _mm256_unpackhi_epi64(_u4,_u5); + _t5 = _mm256_unpackhi_epi64(_u6,_u7); + _t6 = _mm256_unpacklo_epi64(_u4,_u5); + _t7 = _mm256_unpacklo_epi64(_u6,_u7); + + + _mm256_store_si256(out++, _mm256_cvtepu16_epi32(_mm256_extracti128_si256(_t0,1))); + _mm256_store_si256(out++, _mm256_cvtepu16_epi32(_mm256_extracti128_si256(_t1,1))); + _mm256_store_si256(out++, _mm256_cvtepu16_epi32(_mm256_extracti128_si256(_t2,1))); + _mm256_store_si256(out++, _mm256_cvtepu16_epi32(_mm256_extracti128_si256(_t3,1))); + _mm256_store_si256(out++, _mm256_cvtepu16_epi32(_mm256_extracti128_si256(_t4,1))); + _mm256_store_si256(out++, _mm256_cvtepu16_epi32(_mm256_extracti128_si256(_t5,1))); + _mm256_store_si256(out++, _mm256_cvtepu16_epi32(_mm256_extracti128_si256(_t6,1))); + _mm256_store_si256(out++, _mm256_cvtepu16_epi32(_mm256_extracti128_si256(_t7,1))); + _mm256_store_si256(out++, _mm256_cvtepu16_epi32(_mm256_extracti128_si256(_t0,0))); + _mm256_store_si256(out++, _mm256_cvtepu16_epi32(_mm256_extracti128_si256(_t1,0))); + _mm256_store_si256(out++, _mm256_cvtepu16_epi32(_mm256_extracti128_si256(_t2,0))); + _mm256_store_si256(out++, _mm256_cvtepu16_epi32(_mm256_extracti128_si256(_t3,0))); + _mm256_store_si256(out++, _mm256_cvtepu16_epi32(_mm256_extracti128_si256(_t4,0))); + _mm256_store_si256(out++, _mm256_cvtepu16_epi32(_mm256_extracti128_si256(_t5,0))); + _mm256_store_si256(out++, _mm256_cvtepu16_epi32(_mm256_extracti128_si256(_t6,0))); + _mm256_store_si256(out++, _mm256_cvtepu16_epi32(_mm256_extracti128_si256(_t7,0))); + + return store_fwdrev_simd_128_ordered(chrpos,table,positions,counts,(UINT4 *) array); +} +#endif +#endif + +#ifdef HAVE_AVX512 +static void +extract_7mers_rev_simd_256 (__m512i *out, __m512i current, __m512i next) { + __m512i oligo; + + oligo = _mm512_or_si512( _mm512_srli_epi32(current,20), _mm512_slli_epi32(next,12)); + _mm512_store_si512(out++, _mm512_and_si512( _mm512_srli_epi32(oligo,10), hugemask7)); + _mm512_store_si512(out++, _mm512_and_si512( _mm512_srli_epi32(oligo,8), hugemask7)); + _mm512_store_si512(out++, _mm512_and_si512( _mm512_srli_epi32(oligo,6), hugemask7)); + _mm512_store_si512(out++, _mm512_and_si512( _mm512_srli_epi32(oligo,4), hugemask7)); + _mm512_store_si512(out++, _mm512_and_si512( _mm512_srli_epi32(oligo,2), hugemask7)); + _mm512_store_si512(out++, _mm512_and_si512( oligo, hugemask7)); + + _mm512_store_si512(out++, _mm512_srli_epi32(current,18)); /* No mask necessary */ + _mm512_store_si512(out++, _mm512_and_si512( _mm512_srli_epi32(current,16), hugemask7)); + _mm512_store_si512(out++, _mm512_and_si512( _mm512_srli_epi32(current,14), hugemask7)); + _mm512_store_si512(out++, _mm512_and_si512( _mm512_srli_epi32(current,12), hugemask7)); + _mm512_store_si512(out++, _mm512_and_si512( _mm512_srli_epi32(current,10), hugemask7)); + _mm512_store_si512(out++, _mm512_and_si512( _mm512_srli_epi32(current,8), hugemask7)); + _mm512_store_si512(out++, _mm512_and_si512( _mm512_srli_epi32(current,6), hugemask7)); + _mm512_store_si512(out++, _mm512_and_si512( _mm512_srli_epi32(current,4), hugemask7)); + _mm512_store_si512(out++, _mm512_and_si512( _mm512_srli_epi32(current,2), hugemask7)); + _mm512_store_si512(out++, _mm512_and_si512( current, hugemask7)); return; } -#endif - - -#ifdef HAVE_AVX2 -static void -count_7mers_rev_simd_128 (Count_T *counts, Inquery_T *inquery, __m256i current, __m256i next) { - __m256i oligo; - __m256i array; -#ifdef CHECK_FOR_OVERFLOW - __m256i _counts_after, _counts_neg; -#endif +#ifdef USE_UNORDERED_7 +static Chrpos_T +store_7mers_rev_simd_256 (Chrpos_T chrpos, Chrpos_T *table, UINT4 *positions, Count_T *counts, + __m512i current, __m512i next) { + __m512i array[16]; + + extract_7mers_rev_simd_256(array,current,next); + return store_fwdrev_simd_256(chrpos,table,positions,counts,(UINT4 *) array); +} - oligo = _mm256_or_si256( _mm256_srli_epi32(current,20), _mm256_slli_epi32(next,12)); - array = _mm256_and_si256( _mm256_srli_epi32(oligo,10), bigmask7); - counts[EXTRACT256(array,0)] += 1; /* 127 */ - counts[EXTRACT256(array,1)] += 1; /* 111 */ - counts[EXTRACT256(array,2)] += 1; /* 95 */ - counts[EXTRACT256(array,3)] += 1; /* 79 */ - counts[EXTRACT256(array,4)] += 1; /* 63 */ - counts[EXTRACT256(array,5)] += 1; /* 47 */ - counts[EXTRACT256(array,6)] += 1; /* 31 */ - counts[EXTRACT256(array,7)] += 1; /* 15 */ -#ifdef CHECK_FOR_OVERFLOW - _counts_after = _mm256_i32gather_epi32(counts,array,/*scale*/4); - _counts_neg = _mm256_cmpgt_epi32(_counts_after,maxcount256); - if (_mm256_testz_si256(_counts_neg,_counts_neg) == 0) { - debug(printf("Exceeded maximum counts\n")); - if (EXTRACT256(_counts_neg,0)) { - inquery[EXTRACT256(array,0)] = INQUERY_FALSE; - } - if (EXTRACT256(_counts_neg,1)) { - inquery[EXTRACT256(array,1)] = INQUERY_FALSE; - } - if (EXTRACT256(_counts_neg,2)) { - inquery[EXTRACT256(array,2)] = INQUERY_FALSE; - } - if (EXTRACT256(_counts_neg,3)) { - inquery[EXTRACT256(array,3)] = INQUERY_FALSE; - } - if (EXTRACT256(_counts_neg,4)) { - inquery[EXTRACT256(array,4)] = INQUERY_FALSE; - } - if (EXTRACT256(_counts_neg,5)) { - inquery[EXTRACT256(array,5)] = INQUERY_FALSE; - } - if (EXTRACT256(_counts_neg,6)) { - inquery[EXTRACT256(array,6)] = INQUERY_FALSE; - } - if (EXTRACT256(_counts_neg,7)) { - inquery[EXTRACT256(array,7)] = INQUERY_FALSE; - } - } -#endif /* CHECK_FOR_OVERFLOW */ - debug(printf("63 %04X => %d\n",EXTRACT256(array,0),counts[EXTRACT256(array,0)])); - debug(printf("47 %04X => %d\n",EXTRACT256(array,1),counts[EXTRACT256(array,1)])); - debug(printf("31 %04X => %d\n",EXTRACT256(array,2),counts[EXTRACT256(array,2)])); - debug(printf("15 %04X => %d\n",EXTRACT256(array,3),counts[EXTRACT256(array,3)])); - debug(printf("63 %04X => %d\n",EXTRACT256(array,4),counts[EXTRACT256(array,4)])); - debug(printf("47 %04X => %d\n",EXTRACT256(array,5),counts[EXTRACT256(array,5)])); - debug(printf("31 %04X => %d\n",EXTRACT256(array,6),counts[EXTRACT256(array,6)])); - debug(printf("15 %04X => %d\n",EXTRACT256(array,7),counts[EXTRACT256(array,7)])); - - array = _mm256_and_si256( _mm256_srli_epi32(oligo,8), bigmask7); - counts[EXTRACT256(array,0)] += 1; /* 126 */ - counts[EXTRACT256(array,1)] += 1; /* 110 */ - counts[EXTRACT256(array,2)] += 1; /* 94 */ - counts[EXTRACT256(array,3)] += 1; /* 78 */ - counts[EXTRACT256(array,4)] += 1; /* 62 */ - counts[EXTRACT256(array,5)] += 1; /* 46 */ - counts[EXTRACT256(array,6)] += 1; /* 30 */ - counts[EXTRACT256(array,7)] += 1; /* 14 */ -#ifdef CHECK_FOR_OVERFLOW - _counts_after = _mm256_i32gather_epi32(counts,array,/*scale*/4); - _counts_neg = _mm256_cmpgt_epi32(_counts_after,maxcount256); - if (_mm256_testz_si256(_counts_neg,_counts_neg) == 0) { - debug(printf("Exceeded maximum counts\n")); - if (EXTRACT256(_counts_neg,0)) { - inquery[EXTRACT256(array,0)] = INQUERY_FALSE; - } - if (EXTRACT256(_counts_neg,1)) { - inquery[EXTRACT256(array,1)] = INQUERY_FALSE; - } - if (EXTRACT256(_counts_neg,2)) { - inquery[EXTRACT256(array,2)] = INQUERY_FALSE; - } - if (EXTRACT256(_counts_neg,3)) { - inquery[EXTRACT256(array,3)] = INQUERY_FALSE; - } - if (EXTRACT256(_counts_neg,4)) { - inquery[EXTRACT256(array,4)] = INQUERY_FALSE; - } - if (EXTRACT256(_counts_neg,5)) { - inquery[EXTRACT256(array,5)] = INQUERY_FALSE; - } - if (EXTRACT256(_counts_neg,6)) { - inquery[EXTRACT256(array,6)] = INQUERY_FALSE; - } - if (EXTRACT256(_counts_neg,7)) { - inquery[EXTRACT256(array,7)] = INQUERY_FALSE; - } - } -#endif /* CHECK_FOR_OVERFLOW */ - debug(printf("62 %04X => %d\n",EXTRACT256(array,0),counts[EXTRACT256(array,0)])); - debug(printf("46 %04X => %d\n",EXTRACT256(array,1),counts[EXTRACT256(array,1)])); - debug(printf("30 %04X => %d\n",EXTRACT256(array,2),counts[EXTRACT256(array,2)])); - debug(printf("14 %04X => %d\n",EXTRACT256(array,3),counts[EXTRACT256(array,3)])); - debug(printf("62 %04X => %d\n",EXTRACT256(array,4),counts[EXTRACT256(array,4)])); - debug(printf("46 %04X => %d\n",EXTRACT256(array,5),counts[EXTRACT256(array,5)])); - debug(printf("30 %04X => %d\n",EXTRACT256(array,6),counts[EXTRACT256(array,6)])); - debug(printf("14 %04X => %d\n",EXTRACT256(array,7),counts[EXTRACT256(array,7)])); - - array = _mm256_and_si256( _mm256_srli_epi32(oligo,6), bigmask7); - counts[EXTRACT256(array,0)] += 1; /* 125 */ - counts[EXTRACT256(array,1)] += 1; /* 109 */ - counts[EXTRACT256(array,2)] += 1; /* 93 */ - counts[EXTRACT256(array,3)] += 1; /* 77 */ - counts[EXTRACT256(array,4)] += 1; /* 61 */ - counts[EXTRACT256(array,5)] += 1; /* 45 */ - counts[EXTRACT256(array,6)] += 1; /* 29 */ - counts[EXTRACT256(array,7)] += 1; /* 13 */ -#ifdef CHECK_FOR_OVERFLOW - _counts_after = _mm256_i32gather_epi32(counts,array,/*scale*/4); - _counts_neg = _mm256_cmpgt_epi32(_counts_after,maxcount256); - if (_mm256_testz_si256(_counts_neg,_counts_neg) == 0) { - debug(printf("Exceeded maximum counts\n")); - if (EXTRACT256(_counts_neg,0)) { - inquery[EXTRACT256(array,0)] = INQUERY_FALSE; - } - if (EXTRACT256(_counts_neg,1)) { - inquery[EXTRACT256(array,1)] = INQUERY_FALSE; - } - if (EXTRACT256(_counts_neg,2)) { - inquery[EXTRACT256(array,2)] = INQUERY_FALSE; - } - if (EXTRACT256(_counts_neg,3)) { - inquery[EXTRACT256(array,3)] = INQUERY_FALSE; - } - if (EXTRACT256(_counts_neg,4)) { - inquery[EXTRACT256(array,4)] = INQUERY_FALSE; - } - if (EXTRACT256(_counts_neg,5)) { - inquery[EXTRACT256(array,5)] = INQUERY_FALSE; - } - if (EXTRACT256(_counts_neg,6)) { - inquery[EXTRACT256(array,6)] = INQUERY_FALSE; - } - if (EXTRACT256(_counts_neg,7)) { - inquery[EXTRACT256(array,7)] = INQUERY_FALSE; - } - } -#endif /* CHECK_FOR_OVERFLOW */ - debug(printf("61 %04X => %d\n",EXTRACT256(array,0),counts[EXTRACT256(array,0)])); - debug(printf("45 %04X => %d\n",EXTRACT256(array,1),counts[EXTRACT256(array,1)])); - debug(printf("29 %04X => %d\n",EXTRACT256(array,2),counts[EXTRACT256(array,2)])); - debug(printf("13 %04X => %d\n",EXTRACT256(array,3),counts[EXTRACT256(array,3)])); - debug(printf("61 %04X => %d\n",EXTRACT256(array,4),counts[EXTRACT256(array,4)])); - debug(printf("45 %04X => %d\n",EXTRACT256(array,5),counts[EXTRACT256(array,5)])); - debug(printf("29 %04X => %d\n",EXTRACT256(array,6),counts[EXTRACT256(array,6)])); - debug(printf("13 %04X => %d\n",EXTRACT256(array,7),counts[EXTRACT256(array,7)])); - - array = _mm256_and_si256( _mm256_srli_epi32(oligo,4), bigmask7); - counts[EXTRACT256(array,0)] += 1; /* 124 */ - counts[EXTRACT256(array,1)] += 1; /* 108 */ - counts[EXTRACT256(array,2)] += 1; /* 92 */ - counts[EXTRACT256(array,3)] += 1; /* 76 */ - counts[EXTRACT256(array,4)] += 1; /* 60 */ - counts[EXTRACT256(array,5)] += 1; /* 44 */ - counts[EXTRACT256(array,6)] += 1; /* 28 */ - counts[EXTRACT256(array,7)] += 1; /* 12 */ -#ifdef CHECK_FOR_OVERFLOW - _counts_after = _mm256_i32gather_epi32(counts,array,/*scale*/4); - _counts_neg = _mm256_cmpgt_epi32(_counts_after,maxcount256); - if (_mm256_testz_si256(_counts_neg,_counts_neg) == 0) { - debug(printf("Exceeded maximum counts\n")); - if (EXTRACT256(_counts_neg,0)) { - inquery[EXTRACT256(array,0)] = INQUERY_FALSE; - } - if (EXTRACT256(_counts_neg,1)) { - inquery[EXTRACT256(array,1)] = INQUERY_FALSE; - } - if (EXTRACT256(_counts_neg,2)) { - inquery[EXTRACT256(array,2)] = INQUERY_FALSE; - } - if (EXTRACT256(_counts_neg,3)) { - inquery[EXTRACT256(array,3)] = INQUERY_FALSE; - } - if (EXTRACT256(_counts_neg,4)) { - inquery[EXTRACT256(array,4)] = INQUERY_FALSE; - } - if (EXTRACT256(_counts_neg,5)) { - inquery[EXTRACT256(array,5)] = INQUERY_FALSE; - } - if (EXTRACT256(_counts_neg,6)) { - inquery[EXTRACT256(array,6)] = INQUERY_FALSE; - } - if (EXTRACT256(_counts_neg,7)) { - inquery[EXTRACT256(array,7)] = INQUERY_FALSE; - } - } -#endif /* CHECK_FOR_OVERFLOW */ - debug(printf("60 %04X => %d\n",EXTRACT256(array,0),counts[EXTRACT256(array,0)])); - debug(printf("44 %04X => %d\n",EXTRACT256(array,1),counts[EXTRACT256(array,1)])); - debug(printf("28 %04X => %d\n",EXTRACT256(array,2),counts[EXTRACT256(array,2)])); - debug(printf("12 %04X => %d\n",EXTRACT256(array,3),counts[EXTRACT256(array,3)])); - debug(printf("60 %04X => %d\n",EXTRACT256(array,4),counts[EXTRACT256(array,4)])); - debug(printf("44 %04X => %d\n",EXTRACT256(array,5),counts[EXTRACT256(array,5)])); - debug(printf("28 %04X => %d\n",EXTRACT256(array,6),counts[EXTRACT256(array,6)])); - debug(printf("12 %04X => %d\n",EXTRACT256(array,7),counts[EXTRACT256(array,7)])); - - array = _mm256_and_si256( _mm256_srli_epi32(oligo,2), bigmask7); - counts[EXTRACT256(array,0)] += 1; /* 123 */ - counts[EXTRACT256(array,1)] += 1; /* 107 */ - counts[EXTRACT256(array,2)] += 1; /* 91 */ - counts[EXTRACT256(array,3)] += 1; /* 75 */ - counts[EXTRACT256(array,4)] += 1; /* 59 */ - counts[EXTRACT256(array,5)] += 1; /* 43 */ - counts[EXTRACT256(array,6)] += 1; /* 27 */ - counts[EXTRACT256(array,7)] += 1; /* 11 */ -#ifdef CHECK_FOR_OVERFLOW - _counts_after = _mm256_i32gather_epi32(counts,array,/*scale*/4); - _counts_neg = _mm256_cmpgt_epi32(_counts_after,maxcount256); - if (_mm256_testz_si256(_counts_neg,_counts_neg) == 0) { - debug(printf("Exceeded maximum counts\n")); - if (EXTRACT256(_counts_neg,0)) { - inquery[EXTRACT256(array,0)] = INQUERY_FALSE; - } - if (EXTRACT256(_counts_neg,1)) { - inquery[EXTRACT256(array,1)] = INQUERY_FALSE; - } - if (EXTRACT256(_counts_neg,2)) { - inquery[EXTRACT256(array,2)] = INQUERY_FALSE; - } - if (EXTRACT256(_counts_neg,3)) { - inquery[EXTRACT256(array,3)] = INQUERY_FALSE; - } - if (EXTRACT256(_counts_neg,4)) { - inquery[EXTRACT256(array,4)] = INQUERY_FALSE; - } - if (EXTRACT256(_counts_neg,5)) { - inquery[EXTRACT256(array,5)] = INQUERY_FALSE; - } - if (EXTRACT256(_counts_neg,6)) { - inquery[EXTRACT256(array,6)] = INQUERY_FALSE; - } - if (EXTRACT256(_counts_neg,7)) { - inquery[EXTRACT256(array,7)] = INQUERY_FALSE; - } - } -#endif /* CHECK_FOR_OVERFLOW */ - debug(printf("59 %04X => %d\n",EXTRACT256(array,0),counts[EXTRACT256(array,0)])); - debug(printf("43 %04X => %d\n",EXTRACT256(array,1),counts[EXTRACT256(array,1)])); - debug(printf("27 %04X => %d\n",EXTRACT256(array,2),counts[EXTRACT256(array,2)])); - debug(printf("11 %04X => %d\n",EXTRACT256(array,3),counts[EXTRACT256(array,3)])); - debug(printf("59 %04X => %d\n",EXTRACT256(array,4),counts[EXTRACT256(array,4)])); - debug(printf("43 %04X => %d\n",EXTRACT256(array,5),counts[EXTRACT256(array,5)])); - debug(printf("27 %04X => %d\n",EXTRACT256(array,6),counts[EXTRACT256(array,6)])); - debug(printf("11 %04X => %d\n",EXTRACT256(array,7),counts[EXTRACT256(array,7)])); - - array = _mm256_and_si256( oligo, bigmask7); - counts[EXTRACT256(array,0)] += 1; /* 122 */ - counts[EXTRACT256(array,1)] += 1; /* 106 */ - counts[EXTRACT256(array,2)] += 1; /* 90 */ - counts[EXTRACT256(array,3)] += 1; /* 74 */ - counts[EXTRACT256(array,4)] += 1; /* 58 */ - counts[EXTRACT256(array,5)] += 1; /* 42 */ - counts[EXTRACT256(array,6)] += 1; /* 26 */ - counts[EXTRACT256(array,7)] += 1; /* 10 */ -#ifdef CHECK_FOR_OVERFLOW - _counts_after = _mm256_i32gather_epi32(counts,array,/*scale*/4); - _counts_neg = _mm256_cmpgt_epi32(_counts_after,maxcount256); - if (_mm256_testz_si256(_counts_neg,_counts_neg) == 0) { - debug(printf("Exceeded maximum counts\n")); - if (EXTRACT256(_counts_neg,0)) { - inquery[EXTRACT256(array,0)] = INQUERY_FALSE; - } - if (EXTRACT256(_counts_neg,1)) { - inquery[EXTRACT256(array,1)] = INQUERY_FALSE; - } - if (EXTRACT256(_counts_neg,2)) { - inquery[EXTRACT256(array,2)] = INQUERY_FALSE; - } - if (EXTRACT256(_counts_neg,3)) { - inquery[EXTRACT256(array,3)] = INQUERY_FALSE; - } - if (EXTRACT256(_counts_neg,4)) { - inquery[EXTRACT256(array,4)] = INQUERY_FALSE; - } - if (EXTRACT256(_counts_neg,5)) { - inquery[EXTRACT256(array,5)] = INQUERY_FALSE; - } - if (EXTRACT256(_counts_neg,6)) { - inquery[EXTRACT256(array,6)] = INQUERY_FALSE; - } - if (EXTRACT256(_counts_neg,7)) { - inquery[EXTRACT256(array,7)] = INQUERY_FALSE; - } - } -#endif /* CHECK_FOR_OVERFLOW */ - debug(printf("58 %04X => %d\n",EXTRACT256(array,0),counts[EXTRACT256(array,0)])); - debug(printf("42 %04X => %d\n",EXTRACT256(array,1),counts[EXTRACT256(array,1)])); - debug(printf("26 %04X => %d\n",EXTRACT256(array,2),counts[EXTRACT256(array,2)])); - debug(printf("10 %04X => %d\n",EXTRACT256(array,3),counts[EXTRACT256(array,3)])); - debug(printf("58 %04X => %d\n",EXTRACT256(array,4),counts[EXTRACT256(array,4)])); - debug(printf("42 %04X => %d\n",EXTRACT256(array,5),counts[EXTRACT256(array,5)])); - debug(printf("26 %04X => %d\n",EXTRACT256(array,6),counts[EXTRACT256(array,6)])); - debug(printf("10 %04X => %d\n",EXTRACT256(array,7),counts[EXTRACT256(array,7)])); - - array = _mm256_srli_epi32(current,18); /* No mask necessary */ - counts[EXTRACT256(array,0)] += 1; /* 121 */ - counts[EXTRACT256(array,1)] += 1; /* 105 */ - counts[EXTRACT256(array,2)] += 1; /* 89 */ - counts[EXTRACT256(array,3)] += 1; /* 73 */ - counts[EXTRACT256(array,4)] += 1; /* 57 */ - counts[EXTRACT256(array,5)] += 1; /* 41 */ - counts[EXTRACT256(array,6)] += 1; /* 25 */ - counts[EXTRACT256(array,7)] += 1; /* 9 */ -#ifdef CHECK_FOR_OVERFLOW - _counts_after = _mm256_i32gather_epi32(counts,array,/*scale*/4); - _counts_neg = _mm256_cmpgt_epi32(_counts_after,maxcount256); - if (_mm256_testz_si256(_counts_neg,_counts_neg) == 0) { - debug(printf("Exceeded maximum counts\n")); - if (EXTRACT256(_counts_neg,0)) { - inquery[EXTRACT256(array,0)] = INQUERY_FALSE; - } - if (EXTRACT256(_counts_neg,1)) { - inquery[EXTRACT256(array,1)] = INQUERY_FALSE; - } - if (EXTRACT256(_counts_neg,2)) { - inquery[EXTRACT256(array,2)] = INQUERY_FALSE; - } - if (EXTRACT256(_counts_neg,3)) { - inquery[EXTRACT256(array,3)] = INQUERY_FALSE; - } - if (EXTRACT256(_counts_neg,4)) { - inquery[EXTRACT256(array,4)] = INQUERY_FALSE; - } - if (EXTRACT256(_counts_neg,5)) { - inquery[EXTRACT256(array,5)] = INQUERY_FALSE; - } - if (EXTRACT256(_counts_neg,6)) { - inquery[EXTRACT256(array,6)] = INQUERY_FALSE; - } - if (EXTRACT256(_counts_neg,7)) { - inquery[EXTRACT256(array,7)] = INQUERY_FALSE; - } - } -#endif /* CHECK_FOR_OVERFLOW */ - debug(printf("57 %04X => %d\n",EXTRACT256(array,0),counts[EXTRACT256(array,0)])); - debug(printf("41 %04X => %d\n",EXTRACT256(array,1),counts[EXTRACT256(array,1)])); - debug(printf("25 %04X => %d\n",EXTRACT256(array,2),counts[EXTRACT256(array,2)])); - debug(printf("9 %04X => %d\n",EXTRACT256(array,3),counts[EXTRACT256(array,3)])); - debug(printf("57 %04X => %d\n",EXTRACT256(array,4),counts[EXTRACT256(array,4)])); - debug(printf("41 %04X => %d\n",EXTRACT256(array,5),counts[EXTRACT256(array,5)])); - debug(printf("25 %04X => %d\n",EXTRACT256(array,6),counts[EXTRACT256(array,6)])); - debug(printf("9 %04X => %d\n",EXTRACT256(array,7),counts[EXTRACT256(array,7)])); - - array = _mm256_and_si256( _mm256_srli_epi32(current,16), bigmask7); - counts[EXTRACT256(array,0)] += 1; /* 120 */ - counts[EXTRACT256(array,1)] += 1; /* 104 */ - counts[EXTRACT256(array,2)] += 1; /* 88 */ - counts[EXTRACT256(array,3)] += 1; /* 72 */ - counts[EXTRACT256(array,4)] += 1; /* 56 */ - counts[EXTRACT256(array,5)] += 1; /* 40 */ - counts[EXTRACT256(array,6)] += 1; /* 24 */ - counts[EXTRACT256(array,7)] += 1; /* 8 */ -#ifdef CHECK_FOR_OVERFLOW - _counts_after = _mm256_i32gather_epi32(counts,array,/*scale*/4); - _counts_neg = _mm256_cmpgt_epi32(_counts_after,maxcount256); - if (_mm256_testz_si256(_counts_neg,_counts_neg) == 0) { - debug(printf("Exceeded maximum counts\n")); - if (EXTRACT256(_counts_neg,0)) { - inquery[EXTRACT256(array,0)] = INQUERY_FALSE; - } - if (EXTRACT256(_counts_neg,1)) { - inquery[EXTRACT256(array,1)] = INQUERY_FALSE; - } - if (EXTRACT256(_counts_neg,2)) { - inquery[EXTRACT256(array,2)] = INQUERY_FALSE; - } - if (EXTRACT256(_counts_neg,3)) { - inquery[EXTRACT256(array,3)] = INQUERY_FALSE; - } - if (EXTRACT256(_counts_neg,4)) { - inquery[EXTRACT256(array,4)] = INQUERY_FALSE; - } - if (EXTRACT256(_counts_neg,5)) { - inquery[EXTRACT256(array,5)] = INQUERY_FALSE; - } - if (EXTRACT256(_counts_neg,6)) { - inquery[EXTRACT256(array,6)] = INQUERY_FALSE; - } - if (EXTRACT256(_counts_neg,7)) { - inquery[EXTRACT256(array,7)] = INQUERY_FALSE; - } - } -#endif /* CHECK_FOR_OVERFLOW */ - debug(printf("56 %04X => %d\n",EXTRACT256(array,0),counts[EXTRACT256(array,0)])); - debug(printf("40 %04X => %d\n",EXTRACT256(array,1),counts[EXTRACT256(array,1)])); - debug(printf("24 %04X => %d\n",EXTRACT256(array,2),counts[EXTRACT256(array,2)])); - debug(printf("8 %04X => %d\n",EXTRACT256(array,3),counts[EXTRACT256(array,3)])); - debug(printf("56 %04X => %d\n",EXTRACT256(array,4),counts[EXTRACT256(array,4)])); - debug(printf("40 %04X => %d\n",EXTRACT256(array,5),counts[EXTRACT256(array,5)])); - debug(printf("24 %04X => %d\n",EXTRACT256(array,6),counts[EXTRACT256(array,6)])); - debug(printf("8 %04X => %d\n",EXTRACT256(array,7),counts[EXTRACT256(array,7)])); - - array = _mm256_and_si256( _mm256_srli_epi32(current,14), bigmask7); - counts[EXTRACT256(array,0)] += 1; /* 119 */ - counts[EXTRACT256(array,1)] += 1; /* 103 */ - counts[EXTRACT256(array,2)] += 1; /* 87 */ - counts[EXTRACT256(array,3)] += 1; /* 71 */ - counts[EXTRACT256(array,4)] += 1; /* 55 */ - counts[EXTRACT256(array,5)] += 1; /* 39 */ - counts[EXTRACT256(array,6)] += 1; /* 23 */ - counts[EXTRACT256(array,7)] += 1; /* 7 */ -#ifdef CHECK_FOR_OVERFLOW - _counts_after = _mm256_i32gather_epi32(counts,array,/*scale*/4); - _counts_neg = _mm256_cmpgt_epi32(_counts_after,maxcount256); - if (_mm256_testz_si256(_counts_neg,_counts_neg) == 0) { - debug(printf("Exceeded maximum counts\n")); - if (EXTRACT256(_counts_neg,0)) { - inquery[EXTRACT256(array,0)] = INQUERY_FALSE; - } - if (EXTRACT256(_counts_neg,1)) { - inquery[EXTRACT256(array,1)] = INQUERY_FALSE; - } - if (EXTRACT256(_counts_neg,2)) { - inquery[EXTRACT256(array,2)] = INQUERY_FALSE; - } - if (EXTRACT256(_counts_neg,3)) { - inquery[EXTRACT256(array,3)] = INQUERY_FALSE; - } - if (EXTRACT256(_counts_neg,4)) { - inquery[EXTRACT256(array,4)] = INQUERY_FALSE; - } - if (EXTRACT256(_counts_neg,5)) { - inquery[EXTRACT256(array,5)] = INQUERY_FALSE; - } - if (EXTRACT256(_counts_neg,6)) { - inquery[EXTRACT256(array,6)] = INQUERY_FALSE; - } - if (EXTRACT256(_counts_neg,7)) { - inquery[EXTRACT256(array,7)] = INQUERY_FALSE; - } - } -#endif /* CHECK_FOR_OVERFLOW */ - debug(printf("55 %04X => %d\n",EXTRACT256(array,0),counts[EXTRACT256(array,0)])); - debug(printf("39 %04X => %d\n",EXTRACT256(array,1),counts[EXTRACT256(array,1)])); - debug(printf("23 %04X => %d\n",EXTRACT256(array,2),counts[EXTRACT256(array,2)])); - debug(printf("7 %04X => %d\n",EXTRACT256(array,3),counts[EXTRACT256(array,3)])); - debug(printf("55 %04X => %d\n",EXTRACT256(array,4),counts[EXTRACT256(array,4)])); - debug(printf("39 %04X => %d\n",EXTRACT256(array,5),counts[EXTRACT256(array,5)])); - debug(printf("23 %04X => %d\n",EXTRACT256(array,6),counts[EXTRACT256(array,6)])); - debug(printf("7 %04X => %d\n",EXTRACT256(array,7),counts[EXTRACT256(array,7)])); - - array = _mm256_and_si256( _mm256_srli_epi32(current,12), bigmask7); - counts[EXTRACT256(array,0)] += 1; /* 118 */ - counts[EXTRACT256(array,1)] += 1; /* 102 */ - counts[EXTRACT256(array,2)] += 1; /* 86 */ - counts[EXTRACT256(array,3)] += 1; /* 70 */ - counts[EXTRACT256(array,4)] += 1; /* 54 */ - counts[EXTRACT256(array,5)] += 1; /* 38 */ - counts[EXTRACT256(array,6)] += 1; /* 22 */ - counts[EXTRACT256(array,7)] += 1; /* 6 */ -#ifdef CHECK_FOR_OVERFLOW - _counts_after = _mm256_i32gather_epi32(counts,array,/*scale*/4); - _counts_neg = _mm256_cmpgt_epi32(_counts_after,maxcount256); - if (_mm256_testz_si256(_counts_neg,_counts_neg) == 0) { - debug(printf("Exceeded maximum counts\n")); - if (EXTRACT256(_counts_neg,0)) { - inquery[EXTRACT256(array,0)] = INQUERY_FALSE; - } - if (EXTRACT256(_counts_neg,1)) { - inquery[EXTRACT256(array,1)] = INQUERY_FALSE; - } - if (EXTRACT256(_counts_neg,2)) { - inquery[EXTRACT256(array,2)] = INQUERY_FALSE; - } - if (EXTRACT256(_counts_neg,3)) { - inquery[EXTRACT256(array,3)] = INQUERY_FALSE; - } - if (EXTRACT256(_counts_neg,4)) { - inquery[EXTRACT256(array,4)] = INQUERY_FALSE; - } - if (EXTRACT256(_counts_neg,5)) { - inquery[EXTRACT256(array,5)] = INQUERY_FALSE; - } - if (EXTRACT256(_counts_neg,6)) { - inquery[EXTRACT256(array,6)] = INQUERY_FALSE; - } - if (EXTRACT256(_counts_neg,7)) { - inquery[EXTRACT256(array,7)] = INQUERY_FALSE; - } - } -#endif /* CHECK_FOR_OVERFLOW */ - debug(printf("54 %04X => %d\n",EXTRACT256(array,0),counts[EXTRACT256(array,0)])); - debug(printf("38 %04X => %d\n",EXTRACT256(array,1),counts[EXTRACT256(array,1)])); - debug(printf("22 %04X => %d\n",EXTRACT256(array,2),counts[EXTRACT256(array,2)])); - debug(printf("6 %04X => %d\n",EXTRACT256(array,3),counts[EXTRACT256(array,3)])); - debug(printf("54 %04X => %d\n",EXTRACT256(array,4),counts[EXTRACT256(array,4)])); - debug(printf("38 %04X => %d\n",EXTRACT256(array,5),counts[EXTRACT256(array,5)])); - debug(printf("22 %04X => %d\n",EXTRACT256(array,6),counts[EXTRACT256(array,6)])); - debug(printf("6 %04X => %d\n",EXTRACT256(array,7),counts[EXTRACT256(array,7)])); - - array = _mm256_and_si256( _mm256_srli_epi32(current,10), bigmask7); - counts[EXTRACT256(array,0)] += 1; /* 117 */ - counts[EXTRACT256(array,1)] += 1; /* 101 */ - counts[EXTRACT256(array,2)] += 1; /* 85 */ - counts[EXTRACT256(array,3)] += 1; /* 69 */ - counts[EXTRACT256(array,4)] += 1; /* 53 */ - counts[EXTRACT256(array,5)] += 1; /* 37 */ - counts[EXTRACT256(array,6)] += 1; /* 21 */ - counts[EXTRACT256(array,7)] += 1; /* 5 */ -#ifdef CHECK_FOR_OVERFLOW - _counts_after = _mm256_i32gather_epi32(counts,array,/*scale*/4); - _counts_neg = _mm256_cmpgt_epi32(_counts_after,maxcount256); - if (_mm256_testz_si256(_counts_neg,_counts_neg) == 0) { - debug(printf("Exceeded maximum counts\n")); - if (EXTRACT256(_counts_neg,0)) { - inquery[EXTRACT256(array,0)] = INQUERY_FALSE; - } - if (EXTRACT256(_counts_neg,1)) { - inquery[EXTRACT256(array,1)] = INQUERY_FALSE; - } - if (EXTRACT256(_counts_neg,2)) { - inquery[EXTRACT256(array,2)] = INQUERY_FALSE; - } - if (EXTRACT256(_counts_neg,3)) { - inquery[EXTRACT256(array,3)] = INQUERY_FALSE; - } - if (EXTRACT256(_counts_neg,4)) { - inquery[EXTRACT256(array,4)] = INQUERY_FALSE; - } - if (EXTRACT256(_counts_neg,5)) { - inquery[EXTRACT256(array,5)] = INQUERY_FALSE; - } - if (EXTRACT256(_counts_neg,6)) { - inquery[EXTRACT256(array,6)] = INQUERY_FALSE; - } - if (EXTRACT256(_counts_neg,7)) { - inquery[EXTRACT256(array,7)] = INQUERY_FALSE; - } - } -#endif /* CHECK_FOR_OVERFLOW */ - debug(printf("53 %04X => %d\n",EXTRACT256(array,0),counts[EXTRACT256(array,0)])); - debug(printf("37 %04X => %d\n",EXTRACT256(array,1),counts[EXTRACT256(array,1)])); - debug(printf("21 %04X => %d\n",EXTRACT256(array,2),counts[EXTRACT256(array,2)])); - debug(printf("5 %04X => %d\n",EXTRACT256(array,3),counts[EXTRACT256(array,3)])); - debug(printf("53 %04X => %d\n",EXTRACT256(array,4),counts[EXTRACT256(array,4)])); - debug(printf("37 %04X => %d\n",EXTRACT256(array,5),counts[EXTRACT256(array,5)])); - debug(printf("21 %04X => %d\n",EXTRACT256(array,6),counts[EXTRACT256(array,6)])); - debug(printf("5 %04X => %d\n",EXTRACT256(array,7),counts[EXTRACT256(array,7)])); - - array = _mm256_and_si256( _mm256_srli_epi32(current,8), bigmask7); - counts[EXTRACT256(array,0)] += 1; /* 116 */ - counts[EXTRACT256(array,1)] += 1; /* 100 */ - counts[EXTRACT256(array,2)] += 1; /* 84 */ - counts[EXTRACT256(array,3)] += 1; /* 68 */ - counts[EXTRACT256(array,4)] += 1; /* 52 */ - counts[EXTRACT256(array,5)] += 1; /* 36 */ - counts[EXTRACT256(array,6)] += 1; /* 20 */ - counts[EXTRACT256(array,7)] += 1; /* 4 */ -#ifdef CHECK_FOR_OVERFLOW - _counts_after = _mm256_i32gather_epi32(counts,array,/*scale*/4); - _counts_neg = _mm256_cmpgt_epi32(_counts_after,maxcount256); - if (_mm256_testz_si256(_counts_neg,_counts_neg) == 0) { - debug(printf("Exceeded maximum counts\n")); - if (EXTRACT256(_counts_neg,0)) { - inquery[EXTRACT256(array,0)] = INQUERY_FALSE; - } - if (EXTRACT256(_counts_neg,1)) { - inquery[EXTRACT256(array,1)] = INQUERY_FALSE; - } - if (EXTRACT256(_counts_neg,2)) { - inquery[EXTRACT256(array,2)] = INQUERY_FALSE; - } - if (EXTRACT256(_counts_neg,3)) { - inquery[EXTRACT256(array,3)] = INQUERY_FALSE; - } - if (EXTRACT256(_counts_neg,4)) { - inquery[EXTRACT256(array,4)] = INQUERY_FALSE; - } - if (EXTRACT256(_counts_neg,5)) { - inquery[EXTRACT256(array,5)] = INQUERY_FALSE; - } - if (EXTRACT256(_counts_neg,6)) { - inquery[EXTRACT256(array,6)] = INQUERY_FALSE; - } - if (EXTRACT256(_counts_neg,7)) { - inquery[EXTRACT256(array,7)] = INQUERY_FALSE; - } - } -#endif /* CHECK_FOR_OVERFLOW */ - debug(printf("52 %04X => %d\n",EXTRACT256(array,0),counts[EXTRACT256(array,0)])); - debug(printf("36 %04X => %d\n",EXTRACT256(array,1),counts[EXTRACT256(array,1)])); - debug(printf("20 %04X => %d\n",EXTRACT256(array,2),counts[EXTRACT256(array,2)])); - debug(printf("4 %04X => %d\n",EXTRACT256(array,3),counts[EXTRACT256(array,3)])); - debug(printf("52 %04X => %d\n",EXTRACT256(array,4),counts[EXTRACT256(array,4)])); - debug(printf("36 %04X => %d\n",EXTRACT256(array,5),counts[EXTRACT256(array,5)])); - debug(printf("20 %04X => %d\n",EXTRACT256(array,6),counts[EXTRACT256(array,6)])); - debug(printf("4 %04X => %d\n",EXTRACT256(array,7),counts[EXTRACT256(array,7)])); - - array = _mm256_and_si256( _mm256_srli_epi32(current,6), bigmask7); - counts[EXTRACT256(array,0)] += 1; /* 115 */ - counts[EXTRACT256(array,1)] += 1; /* 99 */ - counts[EXTRACT256(array,2)] += 1; /* 83 */ - counts[EXTRACT256(array,3)] += 1; /* 67 */ - counts[EXTRACT256(array,4)] += 1; /* 51 */ - counts[EXTRACT256(array,5)] += 1; /* 35 */ - counts[EXTRACT256(array,6)] += 1; /* 19 */ - counts[EXTRACT256(array,7)] += 1; /* 3 */ -#ifdef CHECK_FOR_OVERFLOW - _counts_after = _mm256_i32gather_epi32(counts,array,/*scale*/4); - _counts_neg = _mm256_cmpgt_epi32(_counts_after,maxcount256); - if (_mm256_testz_si256(_counts_neg,_counts_neg) == 0) { - debug(printf("Exceeded maximum counts\n")); - if (EXTRACT256(_counts_neg,0)) { - inquery[EXTRACT256(array,0)] = INQUERY_FALSE; - } - if (EXTRACT256(_counts_neg,1)) { - inquery[EXTRACT256(array,1)] = INQUERY_FALSE; - } - if (EXTRACT256(_counts_neg,2)) { - inquery[EXTRACT256(array,2)] = INQUERY_FALSE; - } - if (EXTRACT256(_counts_neg,3)) { - inquery[EXTRACT256(array,3)] = INQUERY_FALSE; - } - if (EXTRACT256(_counts_neg,4)) { - inquery[EXTRACT256(array,4)] = INQUERY_FALSE; - } - if (EXTRACT256(_counts_neg,5)) { - inquery[EXTRACT256(array,5)] = INQUERY_FALSE; - } - if (EXTRACT256(_counts_neg,6)) { - inquery[EXTRACT256(array,6)] = INQUERY_FALSE; - } - if (EXTRACT256(_counts_neg,7)) { - inquery[EXTRACT256(array,7)] = INQUERY_FALSE; - } - } -#endif /* CHECK_FOR_OVERFLOW */ - debug(printf("51 %04X => %d\n",EXTRACT256(array,0),counts[EXTRACT256(array,0)])); - debug(printf("35 %04X => %d\n",EXTRACT256(array,1),counts[EXTRACT256(array,1)])); - debug(printf("19 %04X => %d\n",EXTRACT256(array,2),counts[EXTRACT256(array,2)])); - debug(printf("3 %04X => %d\n",EXTRACT256(array,3),counts[EXTRACT256(array,3)])); - debug(printf("51 %04X => %d\n",EXTRACT256(array,4),counts[EXTRACT256(array,4)])); - debug(printf("35 %04X => %d\n",EXTRACT256(array,5),counts[EXTRACT256(array,5)])); - debug(printf("19 %04X => %d\n",EXTRACT256(array,6),counts[EXTRACT256(array,6)])); - debug(printf("3 %04X => %d\n",EXTRACT256(array,7),counts[EXTRACT256(array,7)])); - - array = _mm256_and_si256( _mm256_srli_epi32(current,4), bigmask7); - counts[EXTRACT256(array,0)] += 1; /* 114 */ - counts[EXTRACT256(array,1)] += 1; /* 98 */ - counts[EXTRACT256(array,2)] += 1; /* 82 */ - counts[EXTRACT256(array,3)] += 1; /* 66 */ - counts[EXTRACT256(array,4)] += 1; /* 50 */ - counts[EXTRACT256(array,5)] += 1; /* 34 */ - counts[EXTRACT256(array,6)] += 1; /* 18 */ - counts[EXTRACT256(array,7)] += 1; /* 2 */ -#ifdef CHECK_FOR_OVERFLOW - _counts_after = _mm256_i32gather_epi32(counts,array,/*scale*/4); - _counts_neg = _mm256_cmpgt_epi32(_counts_after,maxcount256); - if (_mm256_testz_si256(_counts_neg,_counts_neg) == 0) { - debug(printf("Exceeded maximum counts\n")); - if (EXTRACT256(_counts_neg,0)) { - inquery[EXTRACT256(array,0)] = INQUERY_FALSE; - } - if (EXTRACT256(_counts_neg,1)) { - inquery[EXTRACT256(array,1)] = INQUERY_FALSE; - } - if (EXTRACT256(_counts_neg,2)) { - inquery[EXTRACT256(array,2)] = INQUERY_FALSE; - } - if (EXTRACT256(_counts_neg,3)) { - inquery[EXTRACT256(array,3)] = INQUERY_FALSE; - } - if (EXTRACT256(_counts_neg,4)) { - inquery[EXTRACT256(array,4)] = INQUERY_FALSE; - } - if (EXTRACT256(_counts_neg,5)) { - inquery[EXTRACT256(array,5)] = INQUERY_FALSE; - } - if (EXTRACT256(_counts_neg,6)) { - inquery[EXTRACT256(array,6)] = INQUERY_FALSE; - } - if (EXTRACT256(_counts_neg,7)) { - inquery[EXTRACT256(array,7)] = INQUERY_FALSE; - } - } -#endif /* CHECK_FOR_OVERFLOW */ - debug(printf("50 %04X => %d\n",EXTRACT256(array,0),counts[EXTRACT256(array,0)])); - debug(printf("34 %04X => %d\n",EXTRACT256(array,1),counts[EXTRACT256(array,1)])); - debug(printf("18 %04X => %d\n",EXTRACT256(array,2),counts[EXTRACT256(array,2)])); - debug(printf("2 %04X => %d\n",EXTRACT256(array,3),counts[EXTRACT256(array,3)])); - debug(printf("50 %04X => %d\n",EXTRACT256(array,4),counts[EXTRACT256(array,4)])); - debug(printf("34 %04X => %d\n",EXTRACT256(array,5),counts[EXTRACT256(array,5)])); - debug(printf("18 %04X => %d\n",EXTRACT256(array,6),counts[EXTRACT256(array,6)])); - debug(printf("2 %04X => %d\n",EXTRACT256(array,7),counts[EXTRACT256(array,7)])); - - array = _mm256_and_si256( _mm256_srli_epi32(current,2), bigmask7); - counts[EXTRACT256(array,0)] += 1; /* 113 */ - counts[EXTRACT256(array,1)] += 1; /* 97 */ - counts[EXTRACT256(array,2)] += 1; /* 81 */ - counts[EXTRACT256(array,3)] += 1; /* 65 */ - counts[EXTRACT256(array,4)] += 1; /* 49 */ - counts[EXTRACT256(array,5)] += 1; /* 33 */ - counts[EXTRACT256(array,6)] += 1; /* 17 */ - counts[EXTRACT256(array,7)] += 1; /* 1 */ -#ifdef CHECK_FOR_OVERFLOW - _counts_after = _mm256_i32gather_epi32(counts,array,/*scale*/4); - _counts_neg = _mm256_cmpgt_epi32(_counts_after,maxcount256); - if (_mm256_testz_si256(_counts_neg,_counts_neg) == 0) { - debug(printf("Exceeded maximum counts\n")); - if (EXTRACT256(_counts_neg,0)) { - inquery[EXTRACT256(array,0)] = INQUERY_FALSE; - } - if (EXTRACT256(_counts_neg,1)) { - inquery[EXTRACT256(array,1)] = INQUERY_FALSE; - } - if (EXTRACT256(_counts_neg,2)) { - inquery[EXTRACT256(array,2)] = INQUERY_FALSE; - } - if (EXTRACT256(_counts_neg,3)) { - inquery[EXTRACT256(array,3)] = INQUERY_FALSE; - } - if (EXTRACT256(_counts_neg,4)) { - inquery[EXTRACT256(array,4)] = INQUERY_FALSE; - } - if (EXTRACT256(_counts_neg,5)) { - inquery[EXTRACT256(array,5)] = INQUERY_FALSE; - } - if (EXTRACT256(_counts_neg,6)) { - inquery[EXTRACT256(array,6)] = INQUERY_FALSE; - } - if (EXTRACT256(_counts_neg,7)) { - inquery[EXTRACT256(array,7)] = INQUERY_FALSE; - } - } -#endif /* CHECK_FOR_OVERFLOW */ - debug(printf("49 %04X => %d\n",EXTRACT256(array,0),counts[EXTRACT256(array,0)])); - debug(printf("33 %04X => %d\n",EXTRACT256(array,1),counts[EXTRACT256(array,1)])); - debug(printf("17 %04X => %d\n",EXTRACT256(array,2),counts[EXTRACT256(array,2)])); - debug(printf("1 %04X => %d\n",EXTRACT256(array,3),counts[EXTRACT256(array,3)])); - debug(printf("49 %04X => %d\n",EXTRACT256(array,4),counts[EXTRACT256(array,4)])); - debug(printf("33 %04X => %d\n",EXTRACT256(array,5),counts[EXTRACT256(array,5)])); - debug(printf("17 %04X => %d\n",EXTRACT256(array,6),counts[EXTRACT256(array,6)])); - debug(printf("1 %04X => %d\n",EXTRACT256(array,7),counts[EXTRACT256(array,7)])); - - array = _mm256_and_si256( current, bigmask7); - counts[EXTRACT256(array,0)] += 1; /* 112 */ - counts[EXTRACT256(array,1)] += 1; /* 96 */ - counts[EXTRACT256(array,2)] += 1; /* 80 */ - counts[EXTRACT256(array,3)] += 1; /* 64 */ - counts[EXTRACT256(array,4)] += 1; /* 48 */ - counts[EXTRACT256(array,5)] += 1; /* 32 */ - counts[EXTRACT256(array,6)] += 1; /* 16 */ - counts[EXTRACT256(array,7)] += 1; /* 0 */ -#ifdef CHECK_FOR_OVERFLOW - _counts_after = _mm256_i32gather_epi32(counts,array,/*scale*/4); - _counts_neg = _mm256_cmpgt_epi32(_counts_after,maxcount256); - if (_mm256_testz_si256(_counts_neg,_counts_neg) == 0) { - debug(printf("Exceeded maximum counts\n")); - if (EXTRACT256(_counts_neg,0)) { - inquery[EXTRACT256(array,0)] = INQUERY_FALSE; - } - if (EXTRACT256(_counts_neg,1)) { - inquery[EXTRACT256(array,1)] = INQUERY_FALSE; - } - if (EXTRACT256(_counts_neg,2)) { - inquery[EXTRACT256(array,2)] = INQUERY_FALSE; - } - if (EXTRACT256(_counts_neg,3)) { - inquery[EXTRACT256(array,3)] = INQUERY_FALSE; - } - if (EXTRACT256(_counts_neg,4)) { - inquery[EXTRACT256(array,4)] = INQUERY_FALSE; - } - if (EXTRACT256(_counts_neg,5)) { - inquery[EXTRACT256(array,5)] = INQUERY_FALSE; - } - if (EXTRACT256(_counts_neg,6)) { - inquery[EXTRACT256(array,6)] = INQUERY_FALSE; - } - if (EXTRACT256(_counts_neg,7)) { - inquery[EXTRACT256(array,7)] = INQUERY_FALSE; - } - } -#endif /* CHECK_FOR_OVERFLOW */ - debug(printf("48 %04X => %d\n",EXTRACT256(array,0),counts[EXTRACT256(array,0)])); - debug(printf("32 %04X => %d\n",EXTRACT256(array,1),counts[EXTRACT256(array,1)])); - debug(printf("16 %04X => %d\n",EXTRACT256(array,2),counts[EXTRACT256(array,2)])); - debug(printf("0 %04X => %d\n",EXTRACT256(array,3),counts[EXTRACT256(array,3)])); - debug(printf("48 %04X => %d\n",EXTRACT256(array,4),counts[EXTRACT256(array,4)])); - debug(printf("32 %04X => %d\n",EXTRACT256(array,5),counts[EXTRACT256(array,5)])); - debug(printf("16 %04X => %d\n",EXTRACT256(array,6),counts[EXTRACT256(array,6)])); - debug(printf("0 %04X => %d\n",EXTRACT256(array,7),counts[EXTRACT256(array,7)])); +#else +/* Includes extract_7mers_rev_simd_256_ordered (__m512i *out, __m512i current, __m512i next) */ +static Chrpos_T +store_7mers_rev_simd_256 (Chrpos_T chrpos, Chrpos_T *table, UINT4 *positions, Count_T *counts, + __m512i current, __m512i next) { + __m512i array[16], *out; + __m512i oligo, _shuffle0, _shuffle1, _shuffle2; + __m512i _t0, _t1, _t2, _t3, _t4, _t5, _t6, _t7; + __m512i _u0, _u1, _u2, _u3, _u4, _u5, _u6, _u7; + + out = &(array[0]); + + _u0 = _mm512_and_si512( current, hugemask7); + /* _row1 = _mm512_and_si512( _mm512_srli_epi32(current,2), hugemask7); */ + _u1 = _mm512_and_si512( _mm512_slli_epi32(current,14), highmask7); + _t0 = _mm512_or_si512(_u0, _u1); + + _u0 = _mm512_and_si512( _mm512_srli_epi32(current,4), hugemask7); + /* _row3 = _mm512_and_si512( _mm512_srli_epi32(current,6), hugemask7); */ + _u1 = _mm512_and_si512( _mm512_slli_epi32(current,10), highmask7); + _t1 = _mm512_or_si512(_u0, _u1); + + _u0 = _mm512_and_si512( _mm512_srli_epi32(current,8), hugemask7); + /* _row5 = _mm512_and_si512( _mm512_srli_epi32(current,10), hugemask7); */ + _u1 = _mm512_and_si512( _mm512_slli_epi32(current,6), highmask7); + _t2 = _mm512_or_si512(_u0, _u1); + + _u0 = _mm512_and_si512( _mm512_srli_epi32(current,12), hugemask7); + /* _row7 = _mm512_and_si512( _mm512_srli_epi32(current,14), hugemask7); */ + _u1 = _mm512_and_si512( _mm512_slli_epi32(current,2), highmask7); + _t3 = _mm512_or_si512(_u0, _u1); + + _u0 = _mm512_and_si512( _mm512_srli_epi32(current,16), hugemask7); + /* _row9 = _mm512_srli_epi32(current,18); */ /* No mask necessary */ + _u1 = _mm512_and_si512( _mm512_srli_epi32(current,2), highmask7); + _t4 = _mm512_or_si512(_u0, _u1); + + + oligo = _mm512_or_si512( _mm512_srli_epi32(current,20), _mm512_slli_epi32(next,12)); + _u0 = _mm512_and_si512( oligo, hugemask7); + /* _row11 = _mm512_and_si512( _mm512_srli_epi32(oligo,2), hugemask7); */ + _u1 = _mm512_and_si512( _mm512_slli_epi32(oligo,14), highmask7); + _t5 = _mm512_or_si512(_u0, _u1); + + _u0 = _mm512_and_si512( _mm512_srli_epi32(oligo,4), hugemask7); + /* _row13 = _mm512_and_si512( _mm512_srli_epi32(oligo,6), hugemask7); */ + _u1 = _mm512_and_si512( _mm512_slli_epi32(oligo,10), highmask7); + _t6 = _mm512_or_si512(_u0, _u1); + + _u0 = _mm512_and_si512( _mm512_srli_epi32(oligo,8), hugemask7); + /* _row15 = _mm512_and_si512( _mm512_srli_epi32(oligo,10), hugemask7); */ + _u1 = _mm512_and_si512( _mm512_slli_epi32(oligo,6), highmask7); + _t7 = _mm512_or_si512(_u0, _u1); + + + _u0 = _mm512_unpackhi_epi32(_t0,_t1); + _u1 = _mm512_unpackhi_epi32(_t2,_t3); + _u2 = _mm512_unpackhi_epi32(_t4,_t5); + _u3 = _mm512_unpackhi_epi32(_t6,_t7); + _u4 = _mm512_unpacklo_epi32(_t0,_t1); + _u5 = _mm512_unpacklo_epi32(_t2,_t3); + _u6 = _mm512_unpacklo_epi32(_t4,_t5); + _u7 = _mm512_unpacklo_epi32(_t6,_t7); + + + /* Split: top half */ + _shuffle0 = _mm512_setr_epi64(7, 8+7, 6, 8+6, 5, 8+5, 4, 8+4); + _t0 = _mm512_permutex2var_epi64(_u0, _shuffle0, _u1); + _t1 = _mm512_permutex2var_epi64(_u2, _shuffle0, _u3); + _t2 = _mm512_permutex2var_epi64(_u4, _shuffle0, _u5); + _t3 = _mm512_permutex2var_epi64(_u6, _shuffle0, _u7); + + + _shuffle1 = _mm512_setr_epi64(0, 1, 8+0, 8+1, 2, 3, 8+2, 8+3); + _t7 = _mm512_permutex2var_epi64(_t0, _shuffle1, _t1); + _mm512_store_si512(out++, _mm512_cvtepu16_epi32(_mm512_extracti64x4_epi64(_t7,0))); + _mm512_store_si512(out++, _mm512_cvtepu16_epi32(_mm512_extracti64x4_epi64(_t7,1))); + + _t7 = _mm512_permutex2var_epi64(_t2, _shuffle1, _t3); + _mm512_store_si512(out++, _mm512_cvtepu16_epi32(_mm512_extracti64x4_epi64(_t7,0))); + _mm512_store_si512(out++, _mm512_cvtepu16_epi32(_mm512_extracti64x4_epi64(_t7,1))); + + _shuffle2 = _mm512_setr_epi64(4, 5, 8+4, 8+5, 6, 7, 8+6, 8+7); + _t7 = _mm512_permutex2var_epi64(_t0, _shuffle2, _t1); + _mm512_store_si512(out++, _mm512_cvtepu16_epi32(_mm512_extracti64x4_epi64(_t7,0))); + _mm512_store_si512(out++, _mm512_cvtepu16_epi32(_mm512_extracti64x4_epi64(_t7,1))); + + _t7 = _mm512_permutex2var_epi64(_t2, _shuffle2, _t3); + _mm512_store_si512(out++, _mm512_cvtepu16_epi32(_mm512_extracti64x4_epi64(_t7,0))); + _mm512_store_si512(out++, _mm512_cvtepu16_epi32(_mm512_extracti64x4_epi64(_t7,1))); + + + /* Split: bottom half */ + _shuffle0 = _mm512_setr_epi64(3, 8+3, 2, 8+2, 1, 8+1, 0, 8+0); + _t0 = _mm512_permutex2var_epi64(_u0, _shuffle0, _u1); + _t1 = _mm512_permutex2var_epi64(_u2, _shuffle0, _u3); + _t2 = _mm512_permutex2var_epi64(_u4, _shuffle0, _u5); + _t3 = _mm512_permutex2var_epi64(_u6, _shuffle0, _u7); + + + /* _shuffle1 = _mm512_setr_epi64(0, 1, 8+0, 8+1, 2, 3, 8+2, 8+3); */ + _t7 = _mm512_permutex2var_epi64(_t0, _shuffle1, _t1); + _mm512_store_si512(out++, _mm512_cvtepu16_epi32(_mm512_extracti64x4_epi64(_t7,0))); + _mm512_store_si512(out++, _mm512_cvtepu16_epi32(_mm512_extracti64x4_epi64(_t7,1))); + + _t7 = _mm512_permutex2var_epi64(_t2, _shuffle1, _t3); + _mm512_store_si512(out++, _mm512_cvtepu16_epi32(_mm512_extracti64x4_epi64(_t7,0))); + _mm512_store_si512(out++, _mm512_cvtepu16_epi32(_mm512_extracti64x4_epi64(_t7,1))); + + /* _shuffle2 = _mm512_setr_epi64(4, 5, 8+4, 8+5, 6, 7, 8+6, 8+7); */ + _t7 = _mm512_permutex2var_epi64(_t0, _shuffle2, _t1); + _mm512_store_si512(out++, _mm512_cvtepu16_epi32(_mm512_extracti64x4_epi64(_t7,0))); + _mm512_store_si512(out++, _mm512_cvtepu16_epi32(_mm512_extracti64x4_epi64(_t7,1))); + + _t7 = _mm512_permutex2var_epi64(_t2, _shuffle2, _t3); + _mm512_store_si512(out++, _mm512_cvtepu16_epi32(_mm512_extracti64x4_epi64(_t7,0))); + _mm512_store_si512(out++, _mm512_cvtepu16_epi32(_mm512_extracti64x4_epi64(_t7,1))); - return; + return store_fwdrev_simd_256_ordered(chrpos,table,positions,counts,(UINT4 *) array); } #endif +#endif #if !defined(HAVE_AVX2) static Chrpos_T -store_7mers_rev (Chrpos_T chrpos, Chrpos_T *table, UINT4 *pointers, UINT4 *positions, Count_T *counts, - Genomecomp_T low_rc, Genomecomp_T high_rc, Genomecomp_T nextlow_rc) { +store_7mers_rev_32 (Chrpos_T chrpos, Chrpos_T *table, UINT4 *positions, Count_T *counts, + Genomecomp_T low_rc, Genomecomp_T high_rc, Genomecomp_T nextlow_rc) { Genomecomp_T masked, oligo; #ifdef INDIVIDUAL_SHIFTS #elif defined(SIMD_MASK_THEN_STORE) @@ -39473,72 +26415,62 @@ #ifdef INDIVIDUAL_SHIFTS masked = low_rc & MASK7; /* 0 */ if (counts[masked]) { - assert(pointers[masked] > positions[masked]); debug(printf("Storing masked %u at %u\n",masked,chrpos)); - table[--pointers[masked]] = chrpos; + table[positions[masked] + (--counts[masked])] = chrpos; } masked = (low_rc >> 2) & MASK7; /* 1 */ if (counts[masked]) { - assert(pointers[masked] > positions[masked]); debug(printf("Storing masked %u at %u\n",masked,chrpos - 1)); - table[--pointers[masked]] = chrpos - 1; + table[positions[masked] + (--counts[masked])] = chrpos - 1; } masked = (low_rc >> 4) & MASK7; /* 2 */ if (counts[masked]) { - assert(pointers[masked] > positions[masked]); debug(printf("Storing masked %u at %u\n",masked,chrpos - 2)); - table[--pointers[masked]] = chrpos - 2; + table[positions[masked] + (--counts[masked])] = chrpos - 2; } masked = (low_rc >> 6) & MASK7; /* 3 */ if (counts[masked]) { - assert(pointers[masked] > positions[masked]); debug(printf("Storing masked %u at %u\n",masked,chrpos - 3)); - table[--pointers[masked]] = chrpos - 3; + table[positions[masked] + (--counts[masked])] = chrpos - 3; } masked = (low_rc >> 8) & MASK7; /* 4 */ if (counts[masked]) { - assert(pointers[masked] > positions[masked]); debug(printf("Storing masked %u at %u\n",masked,chrpos - 4)); - table[--pointers[masked]] = chrpos - 4; + table[positions[masked] + (--counts[masked])] = chrpos - 4; } masked = (low_rc >> 10) & MASK7; /* 5 */ if (counts[masked]) { - assert(pointers[masked] > positions[masked]); debug(printf("Storing masked %u at %u\n",masked,chrpos - 5)); - table[--pointers[masked]] = chrpos - 5; + table[positions[masked] + (--counts[masked])] = chrpos - 5; } masked = (low_rc >> 12) & MASK7; /* 6 */ if (counts[masked]) { - assert(pointers[masked] > positions[masked]); debug(printf("Storing masked %u at %u\n",masked,chrpos - 6)); - table[--pointers[masked]] = chrpos - 6; + table[positions[masked] + (--counts[masked])] = chrpos - 6; } masked = (low_rc >> 14) & MASK7; /* 7 */ if (counts[masked]) { - assert(pointers[masked] > positions[masked]); debug(printf("Storing masked %u at %u\n",masked,chrpos - 7)); - table[--pointers[masked]] = chrpos - 7; + table[positions[masked] + (--counts[masked])] = chrpos - 7; } masked = (low_rc >> 16) & MASK7; /* 8 */ if (counts[masked]) { - assert(pointers[masked] > positions[masked]); debug(printf("Storing masked %u at %u\n",masked,chrpos - 8)); - table[--pointers[masked]] = chrpos - 8; + table[positions[masked] + (--counts[masked])] = chrpos - 8; } masked = low_rc >> 18; /* 9, No mask necessary */ if (counts[masked]) { - assert(pointers[masked] > positions[masked]); debug(printf("Storing masked %u at %u\n",masked,chrpos - 9)); - table[--pointers[masked]] = chrpos - 9; + table[positions[masked] + (--counts[masked])] = chrpos - 9; } #else @@ -39551,30 +26483,26 @@ masked = EXTRACT(_masked,0); if (counts[masked]) { - assert(pointers[masked] > positions[masked]); debug(printf("Storing masked %u at %u\n",masked,chrpos)); - table[--pointers[masked]] = chrpos; + table[positions[masked] + (--counts[masked])] = chrpos; } masked = EXTRACT(_masked,1); if (counts[masked]) { - assert(pointers[masked] > positions[masked]); debug(printf("Storing masked %u at %u\n",masked,chrpos - 1)); - table[--pointers[masked]] = chrpos - 1; + table[positions[masked] + (--counts[masked])] = chrpos - 1; } masked = EXTRACT(_masked,2); if (counts[masked]) { - assert(pointers[masked] > positions[masked]); debug(printf("Storing masked %u at %u\n",masked,chrpos - 2)); - table[--pointers[masked]] = chrpos - 2; + table[positions[masked] + (--counts[masked])] = chrpos - 2; } masked = EXTRACT(_masked,3); if (counts[masked]) { - assert(pointers[masked] > positions[masked]); debug(printf("Storing masked %u at %u\n",masked,chrpos - 3)); - table[--pointers[masked]] = chrpos - 3; + table[positions[masked] + (--counts[masked])] = chrpos - 3; } @@ -39587,30 +26515,26 @@ masked = EXTRACT(_masked,0); if (counts[masked]) { - assert(pointers[masked] > positions[masked]); debug(printf("Storing masked %u at %u\n",masked,chrpos - 4)); - table[--pointers[masked]] = chrpos - 4; + table[positions[masked] + (--counts[masked])] = chrpos - 4; } masked = EXTRACT(_masked,1); if (counts[masked]) { - assert(pointers[masked] > positions[masked]); debug(printf("Storing masked %u at %u\n",masked,chrpos - 5)); - table[--pointers[masked]] = chrpos - 5; + table[positions[masked] + (--counts[masked])] = chrpos - 5; } masked = EXTRACT(_masked,2); if (counts[masked]) { - assert(pointers[masked] > positions[masked]); debug(printf("Storing masked %u at %u\n",masked,chrpos - 6)); - table[--pointers[masked]] = chrpos - 6; + table[positions[masked] + (--counts[masked])] = chrpos - 6; } masked = EXTRACT(_masked,3); if (counts[masked]) { - assert(pointers[masked] > positions[masked]); debug(printf("Storing masked %u at %u\n",masked,chrpos - 7)); - table[--pointers[masked]] = chrpos - 7; + table[positions[masked] + (--counts[masked])] = chrpos - 7; } @@ -39623,16 +26547,14 @@ masked = EXTRACT(_masked,0); if (counts[masked]) { - assert(pointers[masked] > positions[masked]); debug(printf("Storing masked %u at %u\n",masked,chrpos - 8)); - table[--pointers[masked]] = chrpos - 8; + table[positions[masked] + (--counts[masked])] = chrpos - 8; } masked = EXTRACT(_masked,1); if (counts[masked]) { - assert(pointers[masked] > positions[masked]); debug(printf("Storing masked %u at %u\n",masked,chrpos - 9)); - table[--pointers[masked]] = chrpos - 9; + table[positions[masked] + (--counts[masked])] = chrpos - 9; } #endif @@ -39643,44 +26565,38 @@ #ifdef INDIVIDUAL_SHIFTS masked = oligo & MASK7; /* 10 */ if (counts[masked]) { - assert(pointers[masked] > positions[masked]); debug(printf("Storing masked %u at %u\n",masked,chrpos - 10)); - table[--pointers[masked]] = chrpos - 10; + table[positions[masked] + (--counts[masked])] = chrpos - 10; } masked = (oligo >> 2) & MASK7; /* 11 */ if (counts[masked]) { - assert(pointers[masked] > positions[masked]); debug(printf("Storing masked %u at %u\n",masked,chrpos - 11)); - table[--pointers[masked]] = chrpos - 11; + table[positions[masked] + (--counts[masked])] = chrpos - 11; } masked = (oligo >> 4) & MASK7; /* 12 */ if (counts[masked]) { - assert(pointers[masked] > positions[masked]); debug(printf("Storing masked %u at %u\n",masked,chrpos - 12)); - table[--pointers[masked]] = chrpos - 12; + table[positions[masked] + (--counts[masked])] = chrpos - 12; } masked = (oligo >> 6) & MASK7; /* 13 */ if (counts[masked]) { - assert(pointers[masked] > positions[masked]); debug(printf("Storing masked %u at %u\n",masked,chrpos - 13)); - table[--pointers[masked]] = chrpos - 13; + table[positions[masked] + (--counts[masked])] = chrpos - 13; } masked = (oligo >> 8) & MASK7; /* 14 */ if (counts[masked]) { - assert(pointers[masked] > positions[masked]); debug(printf("Storing masked %u at %u\n",masked,chrpos - 14)); - table[--pointers[masked]] = chrpos - 14; + table[positions[masked] + (--counts[masked])] = chrpos - 14; } masked = (oligo >> 10) & MASK7; /* 15 */ if (counts[masked]) { - assert(pointers[masked] > positions[masked]); debug(printf("Storing masked %u at %u\n",masked,chrpos - 15)); - table[--pointers[masked]] = chrpos - 15; + table[positions[masked] + (--counts[masked])] = chrpos - 15; } #else @@ -39693,30 +26609,26 @@ masked = EXTRACT(_masked,0); if (counts[masked]) { - assert(pointers[masked] > positions[masked]); debug(printf("Storing masked %u at %u\n",masked,chrpos - 10)); - table[--pointers[masked]] = chrpos - 10; + table[positions[masked] + (--counts[masked])] = chrpos - 10; } masked = EXTRACT(_masked,1); if (counts[masked]) { - assert(pointers[masked] > positions[masked]); debug(printf("Storing masked %u at %u\n",masked,chrpos - 11)); - table[--pointers[masked]] = chrpos - 11; + table[positions[masked] + (--counts[masked])] = chrpos - 11; } masked = EXTRACT(_masked,2); if (counts[masked]) { - assert(pointers[masked] > positions[masked]); debug(printf("Storing masked %u at %u\n",masked,chrpos - 12)); - table[--pointers[masked]] = chrpos - 12; + table[positions[masked] + (--counts[masked])] = chrpos - 12; } masked = EXTRACT(_masked,3); if (counts[masked]) { - assert(pointers[masked] > positions[masked]); debug(printf("Storing masked %u at %u\n",masked,chrpos - 13)); - table[--pointers[masked]] = chrpos - 13; + table[positions[masked] + (--counts[masked])] = chrpos - 13; } @@ -39729,16 +26641,14 @@ masked = EXTRACT(_masked,0); if (counts[masked]) { - assert(pointers[masked] > positions[masked]); debug(printf("Storing masked %u at %u\n",masked,chrpos - 14)); - table[--pointers[masked]] = chrpos - 14; + table[positions[masked] + (--counts[masked])] = chrpos - 14; } masked = EXTRACT(_masked,1); if (counts[masked]) { - assert(pointers[masked] > positions[masked]); debug(printf("Storing masked %u at %u\n",masked,chrpos - 15)); - table[--pointers[masked]] = chrpos - 15; + table[positions[masked] + (--counts[masked])] = chrpos - 15; } #endif @@ -39747,72 +26657,62 @@ #ifdef INDIVIDUAL_SHIFTS masked = high_rc & MASK7; /* 16 */ if (counts[masked]) { - assert(pointers[masked] > positions[masked]); debug(printf("Storing masked %u at %u\n",masked,chrpos - 16)); - table[--pointers[masked]] = chrpos - 16; + table[positions[masked] + (--counts[masked])] = chrpos - 16; } masked = (high_rc >> 2) & MASK7; /* 17 */ if (counts[masked]) { - assert(pointers[masked] > positions[masked]); debug(printf("Storing masked %u at %u\n",masked,chrpos - 17)); - table[--pointers[masked]] = chrpos - 17; + table[positions[masked] + (--counts[masked])] = chrpos - 17; } masked = (high_rc >> 4) & MASK7; /* 18 */ if (counts[masked]) { - assert(pointers[masked] > positions[masked]); debug(printf("Storing masked %u at %u\n",masked,chrpos - 18)); - table[--pointers[masked]] = chrpos - 18; + table[positions[masked] + (--counts[masked])] = chrpos - 18; } masked = (high_rc >> 6) & MASK7; /* 19 */ if (counts[masked]) { - assert(pointers[masked] > positions[masked]); debug(printf("Storing masked %u at %u\n",masked,chrpos - 19)); - table[--pointers[masked]] = chrpos - 19; + table[positions[masked] + (--counts[masked])] = chrpos - 19; } masked = (high_rc >> 8) & MASK7; /* 20 */ if (counts[masked]) { - assert(pointers[masked] > positions[masked]); debug(printf("Storing masked %u at %u\n",masked,chrpos - 20)); - table[--pointers[masked]] = chrpos - 20; + table[positions[masked] + (--counts[masked])] = chrpos - 20; } masked = (high_rc >> 10) & MASK7; /* 21 */ if (counts[masked]) { - assert(pointers[masked] > positions[masked]); debug(printf("Storing masked %u at %u\n",masked,chrpos - 21)); - table[--pointers[masked]] = chrpos - 21; + table[positions[masked] + (--counts[masked])] = chrpos - 21; } masked = (high_rc >> 12) & MASK7; /* 22 */ if (counts[masked]) { - assert(pointers[masked] > positions[masked]); debug(printf("Storing masked %u at %u\n",masked,chrpos - 22)); - table[--pointers[masked]] = chrpos - 22; + table[positions[masked] + (--counts[masked])] = chrpos - 22; } masked = (high_rc >> 14) & MASK7; /* 23 */ if (counts[masked]) { - assert(pointers[masked] > positions[masked]); debug(printf("Storing masked %u at %u\n",masked,chrpos - 23)); - table[--pointers[masked]] = chrpos - 23; + table[positions[masked] + (--counts[masked])] = chrpos - 23; } masked = (high_rc >> 16) & MASK7; /* 24 */ if (counts[masked]) { - assert(pointers[masked] > positions[masked]); debug(printf("Storing masked %u at %u\n",masked,chrpos - 24)); - table[--pointers[masked]] = chrpos - 24; + table[positions[masked] + (--counts[masked])] = chrpos - 24; } masked = high_rc >> 18; /* 25, No mask necessary */ if (counts[masked]) { - assert(pointers[masked] > positions[masked]); debug(printf("Storing masked %u at %u\n",masked,chrpos - 25)); - table[--pointers[masked]] = chrpos - 25; + table[positions[masked] + (--counts[masked])] = chrpos - 25; } #else @@ -39825,30 +26725,26 @@ masked = EXTRACT(_masked,0); if (counts[masked]) { - assert(pointers[masked] > positions[masked]); debug(printf("Storing masked %u at %u\n",masked,chrpos - 16)); - table[--pointers[masked]] = chrpos - 16; + table[positions[masked] + (--counts[masked])] = chrpos - 16; } masked = EXTRACT(_masked,1); if (counts[masked]) { - assert(pointers[masked] > positions[masked]); debug(printf("Storing masked %u at %u\n",masked,chrpos - 17)); - table[--pointers[masked]] = chrpos - 17; + table[positions[masked] + (--counts[masked])] = chrpos - 17; } masked = EXTRACT(_masked,2); if (counts[masked]) { - assert(pointers[masked] > positions[masked]); debug(printf("Storing masked %u at %u\n",masked,chrpos - 18)); - table[--pointers[masked]] = chrpos - 18; + table[positions[masked] + (--counts[masked])] = chrpos - 18; } masked = EXTRACT(_masked,3); if (counts[masked]) { - assert(pointers[masked] > positions[masked]); debug(printf("Storing masked %u at %u\n",masked,chrpos - 19)); - table[--pointers[masked]] = chrpos - 19; + table[positions[masked] + (--counts[masked])] = chrpos - 19; } @@ -39861,30 +26757,26 @@ masked = EXTRACT(_masked,0); if (counts[masked]) { - assert(pointers[masked] > positions[masked]); debug(printf("Storing masked %u at %u\n",masked,chrpos - 20)); - table[--pointers[masked]] = chrpos - 20; + table[positions[masked] + (--counts[masked])] = chrpos - 20; } masked = EXTRACT(_masked,1); if (counts[masked]) { - assert(pointers[masked] > positions[masked]); debug(printf("Storing masked %u at %u\n",masked,chrpos - 21)); - table[--pointers[masked]] = chrpos - 21; + table[positions[masked] + (--counts[masked])] = chrpos - 21; } masked = EXTRACT(_masked,2); if (counts[masked]) { - assert(pointers[masked] > positions[masked]); debug(printf("Storing masked %u at %u\n",masked,chrpos - 22)); - table[--pointers[masked]] = chrpos - 22; + table[positions[masked] + (--counts[masked])] = chrpos - 22; } masked = EXTRACT(_masked,3); if (counts[masked]) { - assert(pointers[masked] > positions[masked]); debug(printf("Storing masked %u at %u\n",masked,chrpos - 23)); - table[--pointers[masked]] = chrpos - 23; + table[positions[masked] + (--counts[masked])] = chrpos - 23; } @@ -39897,16 +26789,14 @@ masked = EXTRACT(_masked,0); if (counts[masked]) { - assert(pointers[masked] > positions[masked]); debug(printf("Storing masked %u at %u\n",masked,chrpos - 24)); - table[--pointers[masked]] = chrpos - 24; + table[positions[masked] + (--counts[masked])] = chrpos - 24; } masked = EXTRACT(_masked,1); if (counts[masked]) { - assert(pointers[masked] > positions[masked]); debug(printf("Storing masked %u at %u\n",masked,chrpos - 25)); - table[--pointers[masked]] = chrpos - 25; + table[positions[masked] + (--counts[masked])] = chrpos - 25; } #endif @@ -39917,44 +26807,38 @@ #ifdef INDIVIDUAL_SHIFTS masked = oligo & MASK7; /* 26 */ if (counts[masked]) { - assert(pointers[masked] > positions[masked]); debug(printf("Storing masked %u at %u\n",masked,chrpos - 26)); - table[--pointers[masked]] = chrpos - 26; + table[positions[masked] + (--counts[masked])] = chrpos - 26; } masked = (oligo >> 2) & MASK7; /* 27 */ if (counts[masked]) { - assert(pointers[masked] > positions[masked]); debug(printf("Storing masked %u at %u\n",masked,chrpos - 27)); - table[--pointers[masked]] = chrpos - 27; + table[positions[masked] + (--counts[masked])] = chrpos - 27; } masked = (oligo >> 4) & MASK7; /* 28 */ if (counts[masked]) { - assert(pointers[masked] > positions[masked]); debug(printf("Storing masked %u at %u\n",masked,chrpos - 28)); - table[--pointers[masked]] = chrpos - 28; + table[positions[masked] + (--counts[masked])] = chrpos - 28; } masked = (oligo >> 6) & MASK7; /* 29 */ if (counts[masked]) { - assert(pointers[masked] > positions[masked]); debug(printf("Storing masked %u at %u\n",masked,chrpos - 29)); - table[--pointers[masked]] = chrpos - 29; + table[positions[masked] + (--counts[masked])] = chrpos - 29; } masked = (oligo >> 8) & MASK7; /* 30 */ if (counts[masked]) { - assert(pointers[masked] > positions[masked]); debug(printf("Storing masked %u at %u\n",masked,chrpos - 30)); - table[--pointers[masked]] = chrpos - 30; + table[positions[masked] + (--counts[masked])] = chrpos - 30; } masked = (oligo >> 10) & MASK7; /* 31 */ if (counts[masked]) { - assert(pointers[masked] > positions[masked]); debug(printf("Storing masked %u at %u\n",masked,chrpos - 31)); - table[--pointers[masked]] = chrpos - 31; + table[positions[masked] + (--counts[masked])] = chrpos - 31; } #else @@ -39967,30 +26851,26 @@ masked = EXTRACT(_masked,0); if (counts[masked]) { - assert(pointers[masked] > positions[masked]); debug(printf("Storing masked %u at %u\n",masked,chrpos - 26)); - table[--pointers[masked]] = chrpos - 26; + table[positions[masked] + (--counts[masked])] = chrpos - 26; } masked = EXTRACT(_masked,1); if (counts[masked]) { - assert(pointers[masked] > positions[masked]); debug(printf("Storing masked %u at %u\n",masked,chrpos - 27)); - table[--pointers[masked]] = chrpos - 27; + table[positions[masked] + (--counts[masked])] = chrpos - 27; } masked = EXTRACT(_masked,2); if (counts[masked]) { - assert(pointers[masked] > positions[masked]); debug(printf("Storing masked %u at %u\n",masked,chrpos - 28)); - table[--pointers[masked]] = chrpos - 28; + table[positions[masked] + (--counts[masked])] = chrpos - 28; } masked = EXTRACT(_masked,3); if (counts[masked]) { - assert(pointers[masked] > positions[masked]); debug(printf("Storing masked %u at %u\n",masked,chrpos - 29)); - table[--pointers[masked]] = chrpos - 29; + table[positions[masked] + (--counts[masked])] = chrpos - 29; } @@ -40003,16 +26883,14 @@ masked = EXTRACT(_masked,0); if (counts[masked]) { - assert(pointers[masked] > positions[masked]); debug(printf("Storing masked %u at %u\n",masked,chrpos - 30)); - table[--pointers[masked]] = chrpos - 30; + table[positions[masked] + (--counts[masked])] = chrpos - 30; } masked = EXTRACT(_masked,1); if (counts[masked]) { - assert(pointers[masked] > positions[masked]); debug(printf("Storing masked %u at %u\n",masked,chrpos - 31)); - table[--pointers[masked]] = chrpos - 31; + table[positions[masked] + (--counts[masked])] = chrpos - 31; } #endif @@ -40022,275 +26900,342 @@ #else /* HAVE_AVX2 */ static Chrpos_T -store_7mers_rev (Chrpos_T chrpos, Chrpos_T *table, UINT4 *pointers, UINT4 *positions, Count_T *counts, - Genomecomp_T low_rc, Genomecomp_T high_rc, Genomecomp_T nextlow_rc) { +store_7mers_rev_32 (Chrpos_T chrpos, Chrpos_T *table, UINT4 *positions, Count_T *counts, + Genomecomp_T low_rc, Genomecomp_T high_rc, Genomecomp_T nextlow_rc) { Genomecomp_T masked, oligo; __m256i _oligo, _masked, _counts; + __m256i _blocks, _envelopes, _addresses, _address_mask, _count_mask; + + + _address_mask = _mm256_set1_epi32(0x3); + _count_mask = _mm256_set1_epi32(0xFF); - _oligo = _mm256_srlv_epi32(_mm256_set1_epi32(low_rc),shift0to14); + _oligo = _mm256_srlv_epi32(_mm256_set1_epi32(low_rc),bigshift0to14); _masked = _mm256_and_si256(_oligo, bigmask7); - _counts = _mm256_i32gather_epi32(counts,_masked,/*scale*/4); + + _blocks = _mm256_srli_epi32(_masked,2); /* div by 4 bytes/int */ + _addresses = _mm256_and_si256(_masked,_address_mask); + _addresses = _mm256_slli_epi32(_addresses,3); /* Multiply by 8 bits/byte */ + _envelopes = _mm256_i32gather_epi32((const int *) counts,_blocks,/*scale*/4); + _counts = _mm256_srlv_epi32(_envelopes,_addresses); /* Puts byte on right */ + _counts = _mm256_and_si256(_counts,_count_mask); if (EXTRACT256(_counts,0)) { masked = EXTRACT256(_masked,0); - assert(pointers[masked] > positions[masked]); - debug(printf("Storing masked %u at %u\n",masked,chrpos)); - table[--pointers[masked]] = chrpos; + if (counts[masked]) { /* Have to re-check if there is a conflict */ + debug(printf("Storing masked %u at %u\n",masked,chrpos)); + table[positions[masked] + (--counts[masked])] = chrpos; + } } if (EXTRACT256(_counts,1)) { masked = EXTRACT256(_masked,1); - assert(pointers[masked] > positions[masked]); - debug(printf("Storing masked %u at %u\n",masked,chrpos - 1)); - table[--pointers[masked]] = chrpos - 1; + if (counts[masked]) { /* Have to re-check if there is a conflict */ + debug(printf("Storing masked %u at %u\n",masked,chrpos - 1)); + table[positions[masked] + (--counts[masked])] = chrpos - 1; + } } if (EXTRACT256(_counts,2)) { masked = EXTRACT256(_masked,2); - assert(pointers[masked] > positions[masked]); - debug(printf("Storing masked %u at %u\n",masked,chrpos - 2)); - table[--pointers[masked]] = chrpos - 2; + if (counts[masked]) { /* Have to re-check if there is a conflict */ + debug(printf("Storing masked %u at %u\n",masked,chrpos - 2)); + table[positions[masked] + (--counts[masked])] = chrpos - 2; + } } if (EXTRACT256(_counts,3)) { masked = EXTRACT256(_masked,3); - assert(pointers[masked] > positions[masked]); - debug(printf("Storing masked %u at %u\n",masked,chrpos - 3)); - table[--pointers[masked]] = chrpos - 3; + if (counts[masked]) { /* Have to re-check if there is a conflict */ + debug(printf("Storing masked %u at %u\n",masked,chrpos - 3)); + table[positions[masked] + (--counts[masked])] = chrpos - 3; + } } if (EXTRACT256(_counts,4)) { masked = EXTRACT256(_masked,4); - assert(pointers[masked] > positions[masked]); - debug(printf("Storing masked %u at %u\n",masked,chrpos - 4)); - table[--pointers[masked]] = chrpos - 4; - } + if (counts[masked]) { /* Have to re-check if there is a conflict */ + debug(printf("Storing masked %u at %u\n",masked,chrpos - 4)); + table[positions[masked] + (--counts[masked])] = chrpos - 4; + }} + if (EXTRACT256(_counts,5)) { masked = EXTRACT256(_masked,5); - assert(pointers[masked] > positions[masked]); - debug(printf("Storing masked %u at %u\n",masked,chrpos - 5)); - table[--pointers[masked]] = chrpos - 5; + if (counts[masked]) { /* Have to re-check if there is a conflict */ + debug(printf("Storing masked %u at %u\n",masked,chrpos - 5)); + table[positions[masked] + (--counts[masked])] = chrpos - 5; + } } if (EXTRACT256(_counts,6)) { masked = EXTRACT256(_masked,6); - assert(pointers[masked] > positions[masked]); - debug(printf("Storing masked %u at %u\n",masked,chrpos - 6)); - table[--pointers[masked]] = chrpos - 6; + if (counts[masked]) { /* Have to re-check if there is a conflict */ + debug(printf("Storing masked %u at %u\n",masked,chrpos - 6)); + table[positions[masked] + (--counts[masked])] = chrpos - 6; + } } if (EXTRACT256(_counts,7)) { masked = EXTRACT256(_masked,7); - assert(pointers[masked] > positions[masked]); - debug(printf("Storing masked %u at %u\n",masked,chrpos - 7)); - table[--pointers[masked]] = chrpos - 7; + if (counts[masked]) { /* Have to re-check if there is a conflict */ + debug(printf("Storing masked %u at %u\n",masked,chrpos - 7)); + table[positions[masked] + (--counts[masked])] = chrpos - 7; + } } _oligo = _mm256_srli_epi32(_oligo, 16); _masked = _mm256_and_si256(_oligo, bigmask7); - _counts = _mm256_i32gather_epi32(counts,_masked,/*scale*/4); + + _blocks = _mm256_srli_epi32(_masked,2); /* div by 4 bytes/int */ + _addresses = _mm256_and_si256(_masked,_address_mask); + _addresses = _mm256_slli_epi32(_addresses,3); /* Multiply by 8 bits/byte */ + _envelopes = _mm256_i32gather_epi32((const int *) counts,_blocks,/*scale*/4); + _counts = _mm256_srlv_epi32(_envelopes,_addresses); /* Puts byte on right */ + _counts = _mm256_and_si256(_counts,_count_mask); if (EXTRACT256(_counts,0)) { masked = EXTRACT256(_masked,0); - assert(pointers[masked] > positions[masked]); - debug(printf("Storing masked %u at %u\n",masked,chrpos - 8)); - table[--pointers[masked]] = chrpos - 8; + if (counts[masked]) { /* Have to re-check if there is a conflict */ + debug(printf("Storing masked %u at %u\n",masked,chrpos - 8)); + table[positions[masked] + (--counts[masked])] = chrpos - 8; + } } if (EXTRACT256(_counts,1)) { masked = EXTRACT256(_masked,1); - assert(pointers[masked] > positions[masked]); - debug(printf("Storing masked %u at %u\n",masked,chrpos - 9)); - table[--pointers[masked]] = chrpos - 9; + if (counts[masked]) { /* Have to re-check if there is a conflict */ + debug(printf("Storing masked %u at %u\n",masked,chrpos - 9)); + table[positions[masked] + (--counts[masked])] = chrpos - 9; + } } oligo = low_rc >> 20; /* For 15..10 */ oligo |= high_rc << 12; - _oligo = _mm256_srlv_epi32(_mm256_set1_epi32(oligo),shift0to14); + _oligo = _mm256_srlv_epi32(_mm256_set1_epi32(oligo),bigshift0to14); _masked = _mm256_and_si256(_oligo, bigmask7); - _counts = _mm256_i32gather_epi32(counts,_masked,/*scale*/4); + + _blocks = _mm256_srli_epi32(_masked,2); /* div by 4 bytes/int */ + _addresses = _mm256_and_si256(_masked,_address_mask); + _addresses = _mm256_slli_epi32(_addresses,3); /* Multiply by 8 bits/byte */ + _envelopes = _mm256_i32gather_epi32((const int *) counts,_blocks,/*scale*/4); + _counts = _mm256_srlv_epi32(_envelopes,_addresses); /* Puts byte on right */ + _counts = _mm256_and_si256(_counts,_count_mask); if (EXTRACT256(_counts,0)) { masked = EXTRACT256(_masked,0); - assert(pointers[masked] > positions[masked]); - debug(printf("Storing masked %u at %u\n",masked,chrpos - 10)); - table[--pointers[masked]] = chrpos - 10; + if (counts[masked]) { /* Have to re-check if there is a conflict */ + debug(printf("Storing masked %u at %u\n",masked,chrpos - 10)); + table[positions[masked] + (--counts[masked])] = chrpos - 10; + } } if (EXTRACT256(_counts,1)) { masked = EXTRACT256(_masked,1); - assert(pointers[masked] > positions[masked]); - debug(printf("Storing masked %u at %u\n",masked,chrpos - 11)); - table[--pointers[masked]] = chrpos - 11; + if (counts[masked]) { /* Have to re-check if there is a conflict */ + debug(printf("Storing masked %u at %u\n",masked,chrpos - 11)); + table[positions[masked] + (--counts[masked])] = chrpos - 11; + } } if (EXTRACT256(_counts,2)) { masked = EXTRACT256(_masked,2); - assert(pointers[masked] > positions[masked]); - debug(printf("Storing masked %u at %u\n",masked,chrpos - 12)); - table[--pointers[masked]] = chrpos - 12; - } + if (counts[masked]) { /* Have to re-check if there is a conflict */ + debug(printf("Storing masked %u at %u\n",masked,chrpos - 12)); + table[positions[masked] + (--counts[masked])] = chrpos - 12; + }} + if (EXTRACT256(_counts,3)) { masked = EXTRACT256(_masked,3); - assert(pointers[masked] > positions[masked]); - debug(printf("Storing masked %u at %u\n",masked,chrpos - 13)); - table[--pointers[masked]] = chrpos - 13; + if (counts[masked]) { /* Have to re-check if there is a conflict */ + debug(printf("Storing masked %u at %u\n",masked,chrpos - 13)); + table[positions[masked] + (--counts[masked])] = chrpos - 13; + } } if (EXTRACT256(_counts,4)) { masked = EXTRACT256(_masked,4); - assert(pointers[masked] > positions[masked]); - debug(printf("Storing masked %u at %u\n",masked,chrpos - 14)); - table[--pointers[masked]] = chrpos - 14; + if (counts[masked]) { /* Have to re-check if there is a conflict */ + debug(printf("Storing masked %u at %u\n",masked,chrpos - 14)); + table[positions[masked] + (--counts[masked])] = chrpos - 14; + } } if (EXTRACT256(_counts,5)) { masked = EXTRACT256(_masked,5); - assert(pointers[masked] > positions[masked]); - debug(printf("Storing masked %u at %u\n",masked,chrpos - 15)); - table[--pointers[masked]] = chrpos - 15; + if (counts[masked]) { /* Have to re-check if there is a conflict */ + debug(printf("Storing masked %u at %u\n",masked,chrpos - 15)); + table[positions[masked] + (--counts[masked])] = chrpos - 15; + } } - _oligo = _mm256_srlv_epi32(_mm256_set1_epi32(high_rc),shift0to14); + _oligo = _mm256_srlv_epi32(_mm256_set1_epi32(high_rc),bigshift0to14); _masked = _mm256_and_si256(_oligo, bigmask7); - _counts = _mm256_i32gather_epi32(counts,_masked,/*scale*/4); + + _blocks = _mm256_srli_epi32(_masked,2); /* div by 4 bytes/int */ + _addresses = _mm256_and_si256(_masked,_address_mask); + _addresses = _mm256_slli_epi32(_addresses,3); /* Multiply by 8 bits/byte */ + _envelopes = _mm256_i32gather_epi32((const int *) counts,_blocks,/*scale*/4); + _counts = _mm256_srlv_epi32(_envelopes,_addresses); /* Puts byte on right */ + _counts = _mm256_and_si256(_counts,_count_mask); if (EXTRACT256(_counts,0)) { masked = EXTRACT256(_masked,0); - assert(pointers[masked] > positions[masked]); - debug(printf("Storing masked %u at %u\n",masked,chrpos - 16)); - table[--pointers[masked]] = chrpos - 16; + if (counts[masked]) { /* Have to re-check if there is a conflict */ + debug(printf("Storing masked %u at %u\n",masked,chrpos - 16)); + table[positions[masked] + (--counts[masked])] = chrpos - 16; + } } if (EXTRACT256(_counts,1)) { masked = EXTRACT256(_masked,1); - assert(pointers[masked] > positions[masked]); - debug(printf("Storing masked %u at %u\n",masked,chrpos - 17)); - table[--pointers[masked]] = chrpos - 17; + if (counts[masked]) { /* Have to re-check if there is a conflict */ + debug(printf("Storing masked %u at %u\n",masked,chrpos - 17)); + table[positions[masked] + (--counts[masked])] = chrpos - 17; + } } if (EXTRACT256(_counts,2)) { masked = EXTRACT256(_masked,2); - assert(pointers[masked] > positions[masked]); - debug(printf("Storing masked %u at %u\n",masked,chrpos - 18)); - table[--pointers[masked]] = chrpos - 18; + if (counts[masked]) { /* Have to re-check if there is a conflict */ + debug(printf("Storing masked %u at %u\n",masked,chrpos - 18)); + table[positions[masked] + (--counts[masked])] = chrpos - 18; + } } if (EXTRACT256(_counts,3)) { masked = EXTRACT256(_masked,3); - assert(pointers[masked] > positions[masked]); - debug(printf("Storing masked %u at %u\n",masked,chrpos - 19)); - table[--pointers[masked]] = chrpos - 19; + if (counts[masked]) { /* Have to re-check if there is a conflict */ + debug(printf("Storing masked %u at %u\n",masked,chrpos - 19)); + table[positions[masked] + (--counts[masked])] = chrpos - 19; + } } if (EXTRACT256(_counts,4)) { masked = EXTRACT256(_masked,4); - assert(pointers[masked] > positions[masked]); - debug(printf("Storing masked %u at %u\n",masked,chrpos - 20)); - table[--pointers[masked]] = chrpos - 20; + if (counts[masked]) { /* Have to re-check if there is a conflict */ + debug(printf("Storing masked %u at %u\n",masked,chrpos - 20)); + table[positions[masked] + (--counts[masked])] = chrpos - 20; + } } if (EXTRACT256(_counts,5)) { masked = EXTRACT256(_masked,5); - assert(pointers[masked] > positions[masked]); - debug(printf("Storing masked %u at %u\n",masked,chrpos - 21)); - table[--pointers[masked]] = chrpos - 21; + if (counts[masked]) { /* Have to re-check if there is a conflict */ + debug(printf("Storing masked %u at %u\n",masked,chrpos - 21)); + table[positions[masked] + (--counts[masked])] = chrpos - 21; + } } if (EXTRACT256(_counts,6)) { masked = EXTRACT256(_masked,6); - assert(pointers[masked] > positions[masked]); - debug(printf("Storing masked %u at %u\n",masked,chrpos - 22)); - table[--pointers[masked]] = chrpos - 22; + if (counts[masked]) { /* Have to re-check if there is a conflict */ + debug(printf("Storing masked %u at %u\n",masked,chrpos - 22)); + table[positions[masked] + (--counts[masked])] = chrpos - 22; + } } if (EXTRACT256(_counts,7)) { masked = EXTRACT256(_masked,7); - assert(pointers[masked] > positions[masked]); - debug(printf("Storing masked %u at %u\n",masked,chrpos - 23)); - table[--pointers[masked]] = chrpos - 23; + if (counts[masked]) { /* Have to re-check if there is a conflict */ + debug(printf("Storing masked %u at %u\n",masked,chrpos - 23)); + table[positions[masked] + (--counts[masked])] = chrpos - 23; + } } _oligo = _mm256_srli_epi32(_oligo, 16); _masked = _mm256_and_si256(_oligo, bigmask7); - _counts = _mm256_i32gather_epi32(counts,_masked,/*scale*/4); + + _blocks = _mm256_srli_epi32(_masked,2); /* div by 4 bytes/int */ + _addresses = _mm256_and_si256(_masked,_address_mask); + _addresses = _mm256_slli_epi32(_addresses,3); /* Multiply by 8 bits/byte */ + _envelopes = _mm256_i32gather_epi32((const int *) counts,_blocks,/*scale*/4); + _counts = _mm256_srlv_epi32(_envelopes,_addresses); /* Puts byte on right */ + _counts = _mm256_and_si256(_counts,_count_mask); if (EXTRACT256(_counts,0)) { masked = EXTRACT256(_masked,0); - assert(pointers[masked] > positions[masked]); - debug(printf("Storing masked %u at %u\n",masked,chrpos - 24)); - table[--pointers[masked]] = chrpos - 24; + if (counts[masked]) { /* Have to re-check if there is a conflict */ + debug(printf("Storing masked %u at %u\n",masked,chrpos - 24)); + table[positions[masked] + (--counts[masked])] = chrpos - 24; + } } if (EXTRACT256(_counts,1)) { masked = EXTRACT256(_masked,1); - assert(pointers[masked] > positions[masked]); - debug(printf("Storing masked %u at %u\n",masked,chrpos - 25)); - table[--pointers[masked]] = chrpos - 25; + if (counts[masked]) { /* Have to re-check if there is a conflict */ + debug(printf("Storing masked %u at %u\n",masked,chrpos - 25)); + table[positions[masked] + (--counts[masked])] = chrpos - 25; + } } oligo = high_rc >> 20; /* For 31..26 */ oligo |= nextlow_rc << 12; - _oligo = _mm256_srlv_epi32(_mm256_set1_epi32(oligo),shift0to14); + _oligo = _mm256_srlv_epi32(_mm256_set1_epi32(oligo),bigshift0to14); _masked = _mm256_and_si256(_oligo, bigmask7); - _counts = _mm256_i32gather_epi32(counts,_masked,/*scale*/4); - assert(EXTRACT256(_counts,0) == counts[EXTRACT256(_masked,0)]); + _blocks = _mm256_srli_epi32(_masked,2); /* div by 4 bytes/int */ + _addresses = _mm256_and_si256(_masked,_address_mask); + _addresses = _mm256_slli_epi32(_addresses,3); /* Multiply by 8 bits/byte */ + _envelopes = _mm256_i32gather_epi32((const int *) counts,_blocks,/*scale*/4); + _counts = _mm256_srlv_epi32(_envelopes,_addresses); /* Puts byte on right */ + _counts = _mm256_and_si256(_counts,_count_mask); + if (EXTRACT256(_counts,0)) { masked = EXTRACT256(_masked,0); - assert(pointers[masked] > positions[masked]); - debug(printf("Storing masked %u at %u\n",masked,chrpos - 26)); - table[--pointers[masked]] = chrpos - 26; + if (counts[masked]) { /* Have to re-check if there is a conflict */ + debug(printf("Storing masked %u at %u\n",masked,chrpos - 26)); + table[positions[masked] + (--counts[masked])] = chrpos - 26; + } } - assert(EXTRACT256(_counts,1) == counts[EXTRACT256(_masked,1)]); if (EXTRACT256(_counts,1)) { masked = EXTRACT256(_masked,1); - assert(pointers[masked] > positions[masked]); - debug(printf("Storing masked %u at %u\n",masked,chrpos - 27)); - table[--pointers[masked]] = chrpos - 27; + if (counts[masked]) { /* Have to re-check if there is a conflict */ + debug(printf("Storing masked %u at %u\n",masked,chrpos - 27)); + table[positions[masked] + (--counts[masked])] = chrpos - 27; + } } - assert(EXTRACT256(_counts,2) == counts[EXTRACT256(_masked,2)]); if (EXTRACT256(_counts,2)) { masked = EXTRACT256(_masked,2); - assert(pointers[masked] > positions[masked]); - debug(printf("Storing masked %u at %u\n",masked,chrpos - 28)); - table[--pointers[masked]] = chrpos - 28; + if (counts[masked]) { /* Have to re-check if there is a conflict */ + debug(printf("Storing masked %u at %u\n",masked,chrpos - 28)); + table[positions[masked] + (--counts[masked])] = chrpos - 28; + } } - assert(EXTRACT256(_counts,3) == counts[EXTRACT256(_masked,3)]); if (EXTRACT256(_counts,3)) { masked = EXTRACT256(_masked,3); - assert(pointers[masked] > positions[masked]); - debug(printf("Storing masked %u at %u\n",masked,chrpos - 29)); - table[--pointers[masked]] = chrpos - 29; + if (counts[masked]) { /* Have to re-check if there is a conflict */ + debug(printf("Storing masked %u at %u\n",masked,chrpos - 29)); + table[positions[masked] + (--counts[masked])] = chrpos - 29; + } } - assert(EXTRACT256(_counts,4) == counts[EXTRACT256(_masked,4)]); if (EXTRACT256(_counts,4)) { masked = EXTRACT256(_masked,4); - assert(pointers[masked] > positions[masked]); - debug(printf("Storing masked %u at %u\n",masked,chrpos - 30)); - table[--pointers[masked]] = chrpos - 30; + if (counts[masked]) { /* Have to re-check if there is a conflict */ + debug(printf("Storing masked %u at %u\n",masked,chrpos - 30)); + table[positions[masked] + (--counts[masked])] = chrpos - 30; + } } - assert(EXTRACT256(_counts,5) == counts[EXTRACT256(_masked,5)]); if (EXTRACT256(_counts,5)) { masked = EXTRACT256(_masked,5); - assert(pointers[masked] > positions[masked]); - debug(printf("Storing masked %u at %u\n",masked,chrpos - 31)); - table[--pointers[masked]] = chrpos - 31; + if (counts[masked]) { /* Have to re-check if there is a conflict */ + debug(printf("Storing masked %u at %u\n",masked,chrpos - 31)); + table[positions[masked] + (--counts[masked])] = chrpos - 31; + } } return chrpos - 32; @@ -40303,7 +27248,7 @@ #if !defined(HAVE_AVX2) static void -count_6mers_rev (Count_T *counts, Inquery_T *inquery, Genomecomp_T low_rc, Genomecomp_T high_rc, Genomecomp_T nextlow_rc) { +count_6mers_rev_32 (Count_T *counts, Genomecomp_T low_rc, Genomecomp_T high_rc, Genomecomp_T nextlow_rc) { Genomecomp_T masked, oligo; #ifdef INDIVIDUAL_SHIFTS #elif defined(SIMD_MASK_THEN_STORE) @@ -40316,47 +27261,47 @@ #ifdef INDIVIDUAL_SHIFTS masked = low_rc & MASK6; /* 0 */ - INCR_COUNT(counts[masked],inquery[masked]); + INCR_COUNT(counts[masked]); debug(printf("0 %04X => %d\n",masked,counts[masked])); masked = (low_rc >> 2) & MASK6; /* 1 */ - INCR_COUNT(counts[masked],inquery[masked]); + INCR_COUNT(counts[masked]); debug(printf("1 %04X => %d\n",masked,counts[masked])); masked = (low_rc >> 4) & MASK6; /* 2 */ - INCR_COUNT(counts[masked],inquery[masked]); + INCR_COUNT(counts[masked]); debug(printf("2 %04X => %d\n",masked,counts[masked])); masked = (low_rc >> 6) & MASK6; /* 3 */ - INCR_COUNT(counts[masked],inquery[masked]); + INCR_COUNT(counts[masked]); debug(printf("3 %04X => %d\n",masked,counts[masked])); masked = (low_rc >> 8) & MASK6; /* 4 */ - INCR_COUNT(counts[masked],inquery[masked]); + INCR_COUNT(counts[masked]); debug(printf("4 %04X => %d\n",masked,counts[masked])); masked = (low_rc >> 10) & MASK6; /* 5 */ - INCR_COUNT(counts[masked],inquery[masked]); + INCR_COUNT(counts[masked]); debug(printf("5 %04X => %d\n",masked,counts[masked])); masked = (low_rc >> 12) & MASK6; /* 6 */ - INCR_COUNT(counts[masked],inquery[masked]); + INCR_COUNT(counts[masked]); debug(printf("6 %04X => %d\n",masked,counts[masked])); masked = (low_rc >> 14) & MASK6; /* 7 */ - INCR_COUNT(counts[masked],inquery[masked]); + INCR_COUNT(counts[masked]); debug(printf("7 %04X => %d\n",masked,counts[masked])); masked = (low_rc >> 16) & MASK6; /* 8 */ - INCR_COUNT(counts[masked],inquery[masked]); + INCR_COUNT(counts[masked]); debug(printf("8 %04X => %d\n",masked,counts[masked])); masked = (low_rc >> 18) & MASK6; /* 9 */ - INCR_COUNT(counts[masked],inquery[masked]); + INCR_COUNT(counts[masked]); debug(printf("9 %04X => %d\n",masked,counts[masked])); masked = low_rc >> 20; /* 10, No mask necessary */ - INCR_COUNT(counts[masked],inquery[masked]); + INCR_COUNT(counts[masked]); debug(printf("10 %04X => %d\n",masked,counts[masked])); #else @@ -40368,19 +27313,19 @@ #endif masked = EXTRACT(_masked,0); - INCR_COUNT(counts[masked],inquery[masked]); + INCR_COUNT(counts[masked]); debug(printf("0 %04X => %d\n",masked,counts[masked])); masked = EXTRACT(_masked,1); - INCR_COUNT(counts[masked],inquery[masked]); + INCR_COUNT(counts[masked]); debug(printf("1 %04X => %d\n",masked,counts[masked])); masked = EXTRACT(_masked,2); - INCR_COUNT(counts[masked],inquery[masked]); + INCR_COUNT(counts[masked]); debug(printf("2 %04X => %d\n",masked,counts[masked])); masked = EXTRACT(_masked,3); - INCR_COUNT(counts[masked],inquery[masked]); + INCR_COUNT(counts[masked]); debug(printf("3 %04X => %d\n",masked,counts[masked])); @@ -40392,19 +27337,19 @@ #endif masked = EXTRACT(_masked,0); - INCR_COUNT(counts[masked],inquery[masked]); + INCR_COUNT(counts[masked]); debug(printf("4 %04X => %d\n",masked,counts[masked])); masked = EXTRACT(_masked,1); - INCR_COUNT(counts[masked],inquery[masked]); + INCR_COUNT(counts[masked]); debug(printf("5 %04X => %d\n",masked,counts[masked])); masked = EXTRACT(_masked,2); - INCR_COUNT(counts[masked],inquery[masked]); + INCR_COUNT(counts[masked]); debug(printf("6 %04X => %d\n",masked,counts[masked])); masked = EXTRACT(_masked,3); - INCR_COUNT(counts[masked],inquery[masked]); + INCR_COUNT(counts[masked]); debug(printf("7 %04X => %d\n",masked,counts[masked])); @@ -40416,15 +27361,15 @@ #endif masked = EXTRACT(_masked,0); - INCR_COUNT(counts[masked],inquery[masked]); + INCR_COUNT(counts[masked]); debug(printf("8 %04X => %d\n",masked,counts[masked])); masked = EXTRACT(_masked,1); - INCR_COUNT(counts[masked],inquery[masked]); + INCR_COUNT(counts[masked]); debug(printf("9 %04X => %d\n",masked,counts[masked])); masked = EXTRACT(_masked,2); - INCR_COUNT(counts[masked],inquery[masked]); + INCR_COUNT(counts[masked]); debug(printf("10 %04X => %d\n",masked,counts[masked])); #endif @@ -40434,23 +27379,23 @@ #ifdef INDIVIDUAL_SHIFTS masked = oligo & MASK6; /* 11 */ - INCR_COUNT(counts[masked],inquery[masked]); + INCR_COUNT(counts[masked]); debug(printf("11 %04X => %d\n",masked,counts[masked])); masked = (oligo >> 2) & MASK6; /* 12 */ - INCR_COUNT(counts[masked],inquery[masked]); + INCR_COUNT(counts[masked]); debug(printf("12 %04X => %d\n",masked,counts[masked])); masked = (oligo >> 4) & MASK6; /* 13 */ - INCR_COUNT(counts[masked],inquery[masked]); + INCR_COUNT(counts[masked]); debug(printf("13 %04X => %d\n",masked,counts[masked])); masked = (oligo >> 6) & MASK6; /* 14 */ - INCR_COUNT(counts[masked],inquery[masked]); + INCR_COUNT(counts[masked]); debug(printf("14 %04X => %d\n",masked,counts[masked])); masked = (oligo >> 8) & MASK6; /* 15 */ - INCR_COUNT(counts[masked],inquery[masked]); + INCR_COUNT(counts[masked]); debug(printf("15 %04X => %d\n",masked,counts[masked])); #else @@ -40462,71 +27407,71 @@ #endif masked = EXTRACT(_masked,0); - INCR_COUNT(counts[masked],inquery[masked]); + INCR_COUNT(counts[masked]); debug(printf("11 %04X => %d\n",masked,counts[masked])); masked = EXTRACT(_masked,1); - INCR_COUNT(counts[masked],inquery[masked]); + INCR_COUNT(counts[masked]); debug(printf("12 %04X => %d\n",masked,counts[masked])); masked = EXTRACT(_masked,2); - INCR_COUNT(counts[masked],inquery[masked]); + INCR_COUNT(counts[masked]); debug(printf("13 %04X => %d\n",masked,counts[masked])); masked = EXTRACT(_masked,3); - INCR_COUNT(counts[masked],inquery[masked]); + INCR_COUNT(counts[masked]); debug(printf("14 %04X => %d\n",masked,counts[masked])); masked = (oligo >> 8) & MASK6; /* 15 */ - INCR_COUNT(counts[masked],inquery[masked]); + INCR_COUNT(counts[masked]); debug(printf("15 %04X => %d\n",masked,counts[masked])); #endif #ifdef INDIVIDUAL_SHIFTS masked = high_rc & MASK6; /* 16 */ - INCR_COUNT(counts[masked],inquery[masked]); + INCR_COUNT(counts[masked]); debug(printf("16 %04X => %d\n",masked,counts[masked])); masked = (high_rc >> 2) & MASK6; /* 17 */ - INCR_COUNT(counts[masked],inquery[masked]); + INCR_COUNT(counts[masked]); debug(printf("17 %04X => %d\n",masked,counts[masked])); masked = (high_rc >> 4) & MASK6; /* 18 */ - INCR_COUNT(counts[masked],inquery[masked]); + INCR_COUNT(counts[masked]); debug(printf("18 %04X => %d\n",masked,counts[masked])); masked = (high_rc >> 6) & MASK6; /* 19 */ - INCR_COUNT(counts[masked],inquery[masked]); + INCR_COUNT(counts[masked]); debug(printf("19 %04X => %d\n",masked,counts[masked])); masked = (high_rc >> 8) & MASK6; /* 20 */ - INCR_COUNT(counts[masked],inquery[masked]); + INCR_COUNT(counts[masked]); debug(printf("20 %04X => %d\n",masked,counts[masked])); masked = (high_rc >> 10) & MASK6; /* 21 */ - INCR_COUNT(counts[masked],inquery[masked]); + INCR_COUNT(counts[masked]); debug(printf("21 %04X => %d\n",masked,counts[masked])); masked = (high_rc >> 12) & MASK6; /* 22 */ - INCR_COUNT(counts[masked],inquery[masked]); + INCR_COUNT(counts[masked]); debug(printf("22 %04X => %d\n",masked,counts[masked])); masked = (high_rc >> 14) & MASK6; /* 23 */ - INCR_COUNT(counts[masked],inquery[masked]); + INCR_COUNT(counts[masked]); debug(printf("23 %04X => %d\n",masked,counts[masked])); masked = (high_rc >> 16) & MASK6; /* 24 */ - INCR_COUNT(counts[masked],inquery[masked]); + INCR_COUNT(counts[masked]); debug(printf("24 %04X => %d\n",masked,counts[masked])); masked = (high_rc >> 18) & MASK6; /* 25 */ - INCR_COUNT(counts[masked],inquery[masked]); + INCR_COUNT(counts[masked]); debug(printf("25 %04X => %d\n",masked,counts[masked])); masked = high_rc >> 20; /* 26, No mask necessary */ - INCR_COUNT(counts[masked],inquery[masked]); + INCR_COUNT(counts[masked]); debug(printf("26 %04X => %d\n",masked,counts[masked])); #else @@ -40538,19 +27483,19 @@ #endif masked = EXTRACT(_masked,0); - INCR_COUNT(counts[masked],inquery[masked]); + INCR_COUNT(counts[masked]); debug(printf("16 %04X => %d\n",masked,counts[masked])); masked = EXTRACT(_masked,1); - INCR_COUNT(counts[masked],inquery[masked]); + INCR_COUNT(counts[masked]); debug(printf("17 %04X => %d\n",masked,counts[masked])); masked = EXTRACT(_masked,2); - INCR_COUNT(counts[masked],inquery[masked]); + INCR_COUNT(counts[masked]); debug(printf("18 %04X => %d\n",masked,counts[masked])); masked = EXTRACT(_masked,3); - INCR_COUNT(counts[masked],inquery[masked]); + INCR_COUNT(counts[masked]); debug(printf("19 %04X => %d\n",masked,counts[masked])); @@ -40562,19 +27507,19 @@ #endif masked = EXTRACT(_masked,0); - INCR_COUNT(counts[masked],inquery[masked]); + INCR_COUNT(counts[masked]); debug(printf("20 %04X => %d\n",masked,counts[masked])); masked = EXTRACT(_masked,1); - INCR_COUNT(counts[masked],inquery[masked]); + INCR_COUNT(counts[masked]); debug(printf("21 %04X => %d\n",masked,counts[masked])); masked = EXTRACT(_masked,2); - INCR_COUNT(counts[masked],inquery[masked]); + INCR_COUNT(counts[masked]); debug(printf("22 %04X => %d\n",masked,counts[masked])); masked = EXTRACT(_masked,3); - INCR_COUNT(counts[masked],inquery[masked]); + INCR_COUNT(counts[masked]); debug(printf("23 %04X => %d\n",masked,counts[masked])); @@ -40586,15 +27531,15 @@ #endif masked = EXTRACT(_masked,0); - INCR_COUNT(counts[masked],inquery[masked]); + INCR_COUNT(counts[masked]); debug(printf("24 %04X => %d\n",masked,counts[masked])); masked = EXTRACT(_masked,1); - INCR_COUNT(counts[masked],inquery[masked]); + INCR_COUNT(counts[masked]); debug(printf("25 %04X => %d\n",masked,counts[masked])); masked = EXTRACT(_masked,2); - INCR_COUNT(counts[masked],inquery[masked]); + INCR_COUNT(counts[masked]); debug(printf("26 %04X => %d\n",masked,counts[masked])); #endif @@ -40604,23 +27549,23 @@ #ifdef INDIVIDUAL_SHIFTS masked = oligo & MASK6; /* 27 */ - INCR_COUNT(counts[masked],inquery[masked]); + INCR_COUNT(counts[masked]); debug(printf("27 %04X => %d\n",masked,counts[masked])); masked = (oligo >> 2) & MASK6; /* 28 */ - INCR_COUNT(counts[masked],inquery[masked]); + INCR_COUNT(counts[masked]); debug(printf("28 %04X => %d\n",masked,counts[masked])); masked = (oligo >> 4) & MASK6; /* 29 */ - INCR_COUNT(counts[masked],inquery[masked]); + INCR_COUNT(counts[masked]); debug(printf("29 %04X => %d\n",masked,counts[masked])); masked = (oligo >> 6) & MASK6; /* 30 */ - INCR_COUNT(counts[masked],inquery[masked]); + INCR_COUNT(counts[masked]); debug(printf("30 %04X => %d\n",masked,counts[masked])); masked = (oligo >> 8) & MASK6; /* 31 */ - INCR_COUNT(counts[masked],inquery[masked]); + INCR_COUNT(counts[masked]); debug(printf("31 %04X => %d\n",masked,counts[masked])); #else @@ -40632,24 +27577,24 @@ #endif masked = EXTRACT(_masked,0); - INCR_COUNT(counts[masked],inquery[masked]); + INCR_COUNT(counts[masked]); debug(printf("27 %04X => %d\n",masked,counts[masked])); masked = EXTRACT(_masked,1); - INCR_COUNT(counts[masked],inquery[masked]); + INCR_COUNT(counts[masked]); debug(printf("28 %04X => %d\n",masked,counts[masked])); masked = EXTRACT(_masked,2); - INCR_COUNT(counts[masked],inquery[masked]); + INCR_COUNT(counts[masked]); debug(printf("29 %04X => %d\n",masked,counts[masked])); masked = EXTRACT(_masked,3); - INCR_COUNT(counts[masked],inquery[masked]); + INCR_COUNT(counts[masked]); debug(printf("30 %04X => %d\n",masked,counts[masked])); masked = (oligo >> 8) & MASK6; /* 31 */ - INCR_COUNT(counts[masked],inquery[masked]); + INCR_COUNT(counts[masked]); debug(printf("31 %04X => %d\n",masked,counts[masked])); #endif @@ -40659,285 +27604,168 @@ #else /* HAVE_AVX2 */ static void -count_6mers_rev (Count_T *counts, Inquery_T *inquery, Genomecomp_T low_rc, Genomecomp_T high_rc, Genomecomp_T nextlow_rc) { +count_6mers_rev_32 (Count_T *counts, Genomecomp_T low_rc, Genomecomp_T high_rc, Genomecomp_T nextlow_rc) { Genomecomp_T masked, oligo; __m256i _oligo, _masked; -#ifdef CHECK_FOR_OVERFLOW - __m256i _counts_after, _counts_neg; -#endif - _oligo = _mm256_srlv_epi32(_mm256_set1_epi32(low_rc),shift0to14); + _oligo = _mm256_srlv_epi32(_mm256_set1_epi32(low_rc),bigshift0to14); _masked = _mm256_and_si256(_oligo, bigmask6); masked = EXTRACT256(_masked,0); - counts[masked] += 1; + INCR_COUNT(counts[masked]); debug(printf("0 %04X => %d\n",masked,counts[masked])); masked = EXTRACT256(_masked,1); - counts[masked] += 1; + INCR_COUNT(counts[masked]); debug(printf("1 %04X => %d\n",masked,counts[masked])); masked = EXTRACT256(_masked,2); - counts[masked] += 1; + INCR_COUNT(counts[masked]); debug(printf("2 %04X => %d\n",masked,counts[masked])); masked = EXTRACT256(_masked,3); - counts[masked] += 1; + INCR_COUNT(counts[masked]); debug(printf("3 %04X => %d\n",masked,counts[masked])); masked = EXTRACT256(_masked,4); - counts[masked] += 1; + INCR_COUNT(counts[masked]); debug(printf("4 %04X => %d\n",masked,counts[masked])); masked = EXTRACT256(_masked,5); - counts[masked] += 1; + INCR_COUNT(counts[masked]); debug(printf("5 %04X => %d\n",masked,counts[masked])); masked = EXTRACT256(_masked,6); - counts[masked] += 1; + INCR_COUNT(counts[masked]); debug(printf("6 %04X => %d\n",masked,counts[masked])); masked = EXTRACT256(_masked,7); - counts[masked] += 1; + INCR_COUNT(counts[masked]); debug(printf("7 %04X => %d\n",masked,counts[masked])); -#ifdef CHECK_FOR_OVERFLOW - _counts_after = _mm256_i32gather_epi32(counts,_masked,/*shift*/4); - /* _counts_after = _mm256_and_si256(_counts_after,low8); */ - _counts_neg = _mm256_cmpgt_epi32(_counts_after,maxcount256); - if (_mm256_testz_si256(_counts_neg,_counts_neg) == 0) { - debug(printf("Exceeded maximum counts\n")); - if (EXTRACT256(_counts_neg,0)) { - inquery[EXTRACT256(_masked,0)] = INQUERY_FALSE; - } - if (EXTRACT256(_counts_neg,1)) { - inquery[EXTRACT256(_masked,1)] = INQUERY_FALSE; - } - if (EXTRACT256(_counts_neg,2)) { - inquery[EXTRACT256(_masked,2)] = INQUERY_FALSE; - } - if (EXTRACT256(_counts_neg,3)) { - inquery[EXTRACT256(_masked,3)] = INQUERY_FALSE; - } - if (EXTRACT256(_counts_neg,4)) { - inquery[EXTRACT256(_masked,4)] = INQUERY_FALSE; - } - if (EXTRACT256(_counts_neg,5)) { - inquery[EXTRACT256(_masked,5)] = INQUERY_FALSE; - } - if (EXTRACT256(_counts_neg,6)) { - inquery[EXTRACT256(_masked,6)] = INQUERY_FALSE; - } - if (EXTRACT256(_counts_neg,7)) { - inquery[EXTRACT256(_masked,7)] = INQUERY_FALSE; - } - } -#endif /* CHECK_FOR_OVERFLOW */ - _oligo = _mm256_srli_epi32(_oligo, 16); _masked = _mm256_and_si256(_oligo, bigmask6); masked = EXTRACT256(_masked,0); - INCR_COUNT(counts[masked],inquery[masked]); + INCR_COUNT(counts[masked]); debug(printf("8 %04X => %d\n",masked,counts[masked])); masked = EXTRACT256(_masked,1); - INCR_COUNT(counts[masked],inquery[masked]); + INCR_COUNT(counts[masked]); debug(printf("9 %04X => %d\n",masked,counts[masked])); masked = EXTRACT256(_masked,2); - INCR_COUNT(counts[masked],inquery[masked]); + INCR_COUNT(counts[masked]); debug(printf("10 %04X => %d\n",masked,counts[masked])); oligo = low_rc >> 22; /* For 15..11 */ oligo |= high_rc << 10; - _oligo = _mm256_srlv_epi32(_mm256_set1_epi32(oligo),shift0to14); + _oligo = _mm256_srlv_epi32(_mm256_set1_epi32(oligo),bigshift0to14); _masked = _mm256_and_si256(_oligo, bigmask6); masked = EXTRACT256(_masked,0); - counts[masked] += 1; + INCR_COUNT(counts[masked]); debug(printf("11 %04X => %d\n",masked,counts[masked])); masked = EXTRACT256(_masked,1); - counts[masked] += 1; + INCR_COUNT(counts[masked]); debug(printf("12 %04X => %d\n",masked,counts[masked])); masked = EXTRACT256(_masked,2); - counts[masked] += 1; + INCR_COUNT(counts[masked]); debug(printf("13 %04X => %d\n",masked,counts[masked])); masked = EXTRACT256(_masked,3); - counts[masked] += 1; + INCR_COUNT(counts[masked]); debug(printf("14 %04X => %d\n",masked,counts[masked])); masked = EXTRACT256(_masked,4); - counts[masked] += 1; + INCR_COUNT(counts[masked]); debug(printf("15 %04X => %d\n",masked,counts[masked])); -#ifdef CHECK_FOR_OVERFLOW - _counts_after = _mm256_i32gather_epi32(counts,_masked,/*shift*/4); - _counts_after = _mm256_and_si256(_counts_after,low5); - _counts_neg = _mm256_cmpgt_epi32(_counts_after,maxcount256); - if (_mm256_testz_si256(_counts_neg,_counts_neg) == 0) { - debug(printf("Exceeded maximum counts\n")); - if (EXTRACT256(_counts_neg,0)) { - inquery[EXTRACT256(_masked,0)] = INQUERY_FALSE; - } - if (EXTRACT256(_counts_neg,1)) { - inquery[EXTRACT256(_masked,1)] = INQUERY_FALSE; - } - if (EXTRACT256(_counts_neg,2)) { - inquery[EXTRACT256(_masked,2)] = INQUERY_FALSE; - } - if (EXTRACT256(_counts_neg,3)) { - inquery[EXTRACT256(_masked,3)] = INQUERY_FALSE; - } - if (EXTRACT256(_counts_neg,4)) { - inquery[EXTRACT256(_masked,4)] = INQUERY_FALSE; - } - } -#endif /* CHECK_FOR_OVERFLOW */ - - _oligo = _mm256_srlv_epi32(_mm256_set1_epi32(high_rc),shift0to14); + _oligo = _mm256_srlv_epi32(_mm256_set1_epi32(high_rc),bigshift0to14); _masked = _mm256_and_si256(_oligo, bigmask6); masked = EXTRACT256(_masked,0); - counts[masked] += 1; + INCR_COUNT(counts[masked]); debug(printf("16 %04X => %d\n",masked,counts[masked])); masked = EXTRACT256(_masked,1); - counts[masked] += 1; + INCR_COUNT(counts[masked]); debug(printf("17 %04X => %d\n",masked,counts[masked])); masked = EXTRACT256(_masked,2); - counts[masked] += 1; + INCR_COUNT(counts[masked]); debug(printf("18 %04X => %d\n",masked,counts[masked])); masked = EXTRACT256(_masked,3); - counts[masked] += 1; + INCR_COUNT(counts[masked]); debug(printf("19 %04X => %d\n",masked,counts[masked])); masked = EXTRACT256(_masked,4); - counts[masked] += 1; + INCR_COUNT(counts[masked]); debug(printf("20 %04X => %d\n",masked,counts[masked])); masked = EXTRACT256(_masked,5); - counts[masked] += 1; + INCR_COUNT(counts[masked]); debug(printf("21 %04X => %d\n",masked,counts[masked])); masked = EXTRACT256(_masked,6); - counts[masked] += 1; + INCR_COUNT(counts[masked]); debug(printf("22 %04X => %d\n",masked,counts[masked])); masked = EXTRACT256(_masked,7); - counts[masked] += 1; + INCR_COUNT(counts[masked]); debug(printf("23 %04X => %d\n",masked,counts[masked])); -#ifdef CHECK_FOR_OVERFLOW - _counts_after = _mm256_i32gather_epi32(counts,_masked,/*shift*/4); - /* _counts_after = _mm256_and_si256(_counts_after,low8); */ - _counts_neg = _mm256_cmpgt_epi32(_counts_after,maxcount256); - if (_mm256_testz_si256(_counts_neg,_counts_neg) == 0) { - debug(printf("Exceeded maximum counts\n")); - if (EXTRACT256(_counts_neg,0)) { - inquery[EXTRACT256(_masked,0)] = INQUERY_FALSE; - } - if (EXTRACT256(_counts_neg,1)) { - inquery[EXTRACT256(_masked,1)] = INQUERY_FALSE; - } - if (EXTRACT256(_counts_neg,2)) { - inquery[EXTRACT256(_masked,2)] = INQUERY_FALSE; - } - if (EXTRACT256(_counts_neg,3)) { - inquery[EXTRACT256(_masked,3)] = INQUERY_FALSE; - } - if (EXTRACT256(_counts_neg,4)) { - inquery[EXTRACT256(_masked,4)] = INQUERY_FALSE; - } - if (EXTRACT256(_counts_neg,5)) { - inquery[EXTRACT256(_masked,5)] = INQUERY_FALSE; - } - if (EXTRACT256(_counts_neg,6)) { - inquery[EXTRACT256(_masked,6)] = INQUERY_FALSE; - } - if (EXTRACT256(_counts_neg,7)) { - inquery[EXTRACT256(_masked,7)] = INQUERY_FALSE; - } - } -#endif /* CHECK_FOR_OVERFLOW */ - _oligo = _mm256_srli_epi32(_oligo, 16); _masked = _mm256_and_si256(_oligo, bigmask6); masked = EXTRACT256(_masked,0); - INCR_COUNT(counts[masked],inquery[masked]); + INCR_COUNT(counts[masked]); debug(printf("24 %04X => %d\n",masked,counts[masked])); masked = EXTRACT256(_masked,1); - INCR_COUNT(counts[masked],inquery[masked]); + INCR_COUNT(counts[masked]); debug(printf("25 %04X => %d\n",masked,counts[masked])); masked = EXTRACT256(_masked,2); - INCR_COUNT(counts[masked],inquery[masked]); + INCR_COUNT(counts[masked]); debug(printf("26 %04X => %d\n",masked,counts[masked])); oligo = high_rc >> 22; /* For 31..27 */ oligo |= nextlow_rc << 10; - _oligo = _mm256_srlv_epi32(_mm256_set1_epi32(oligo),shift0to14); + _oligo = _mm256_srlv_epi32(_mm256_set1_epi32(oligo),bigshift0to14); _masked = _mm256_and_si256(_oligo, bigmask6); masked = EXTRACT256(_masked,0); - counts[masked] += 1; + INCR_COUNT(counts[masked]); debug(printf("27 %04X => %d\n",masked,counts[masked])); masked = EXTRACT256(_masked,1); - counts[masked] += 1; + INCR_COUNT(counts[masked]); debug(printf("28 %04X => %d\n",masked,counts[masked])); masked = EXTRACT256(_masked,2); - counts[masked] += 1; + INCR_COUNT(counts[masked]); debug(printf("29 %04X => %d\n",masked,counts[masked])); masked = EXTRACT256(_masked,3); - counts[masked] += 1; + INCR_COUNT(counts[masked]); debug(printf("30 %04X => %d\n",masked,counts[masked])); masked = EXTRACT256(_masked,4); - counts[masked] += 1; + INCR_COUNT(counts[masked]); debug(printf("31 %04X => %d\n",masked,counts[masked])); -#ifdef CHECK_FOR_OVERFLOW - _counts_after = _mm256_i32gather_epi32(counts,_masked,/*shift*/4); - _counts_after = _mm256_and_si256(_counts_after,low5); - _counts_neg = _mm256_cmpgt_epi32(_counts_after,maxcount256); - if (_mm256_testz_si256(_counts_neg,_counts_neg) == 0) { - debug(printf("Exceeded maximum counts\n")); - if (EXTRACT256(_counts_neg,0)) { - inquery[EXTRACT256(_masked,0)] = INQUERY_FALSE; - } - if (EXTRACT256(_counts_neg,1)) { - inquery[EXTRACT256(_masked,1)] = INQUERY_FALSE; - } - if (EXTRACT256(_counts_neg,2)) { - inquery[EXTRACT256(_masked,2)] = INQUERY_FALSE; - } - if (EXTRACT256(_counts_neg,3)) { - inquery[EXTRACT256(_masked,3)] = INQUERY_FALSE; - } - if (EXTRACT256(_counts_neg,4)) { - inquery[EXTRACT256(_masked,4)] = INQUERY_FALSE; - } - } -#endif /* CHECK_FOR_OVERFLOW */ - return; } @@ -40946,9 +27774,9 @@ /* Expecting current to have {low0_rc, high0_rc, low1_rc, high1_rc}, and next to have {high0_rc, low1_rc, high1_rc, nextlow_rc} */ -#ifdef USE_SIMD_FOR_COUNTS +#ifdef HAVE_SSE2 static void -extract_6mers_rev_simd (__m128i *out, __m128i current, __m128i next) { +extract_6mers_rev_simd_64 (__m128i *out, __m128i current, __m128i next) { __m128i oligo; oligo = _mm_or_si128( _mm_srli_epi32(current,22), _mm_slli_epi32(next,10)); @@ -40958,7 +27786,7 @@ _mm_store_si128(out++, _mm_and_si128( _mm_srli_epi32(oligo,2), mask6)); _mm_store_si128(out++, _mm_and_si128( oligo, mask6)); - _mm_store_si128(out++, _mm_srli_epi32(current,20)); + _mm_store_si128(out++, _mm_srli_epi32(current,20)); /* No mask necessary */ _mm_store_si128(out++, _mm_and_si128( _mm_srli_epi32(current,18), mask6)); _mm_store_si128(out++, _mm_and_si128( _mm_srli_epi32(current,16), mask6)); _mm_store_si128(out++, _mm_and_si128( _mm_srli_epi32(current,14), mask6)); @@ -40973,6 +27801,98 @@ return; } +#ifdef USE_UNORDERED_6 +static Chrpos_T +store_6mers_rev_simd_64 (Chrpos_T chrpos, Chrpos_T *table, UINT4 *positions, Count_T *counts, + __m128i current, __m128i next) { + __m128i array[16]; + + extract_6mers_rev_simd_64(array,current,next); + return store_fwdrev_simd_64(chrpos,table,positions,counts,(UINT4 *) array); +} + +#else +/* Includes extract_6mers_rev_simd_64_ordered (__m128i *out, __m128i current, __m128i next) */ +static Chrpos_T +store_6mers_rev_simd_64 (Chrpos_T chrpos, Chrpos_T *table, UINT4 *positions, Count_T *counts, + __m128i current, __m128i next) { + __m128i array[16], *out; + __m128i oligo; + __m128i _t0, _t1, _t2, _t3, _t4, _t5, _t6, _t7; + __m128i _u0, _u1, _u2, _u3; + + out = &(array[0]); + + /* _row0 = _mm_and_si128( current, mask6); */ + /* _row1 = _mm_and_si128( _mm_srli_epi32(current,2), mask6); */ + _t0 = _mm_and_si128(_mm_blend_epi16(_mm_slli_epi32(current,14), current, 0x55), mask6_epi16); + + /* _row2 = _mm_and_si128( _mm_srli_epi32(current,4), mask6); */ + /* _row3 = _mm_and_si128( _mm_srli_epi32(current,6), mask6); */ + _t1 = _mm_and_si128(_mm_blend_epi16(_mm_slli_epi32(current,10), _mm_srli_epi32(current, 4), 0x55), mask6_epi16); + + /* _row4 = _mm_and_si128( _mm_srli_epi32(current,8), mask6); */ + /* _row5 = _mm_and_si128( _mm_srli_epi32(current,10), mask6); */ + _t2 = _mm_and_si128(_mm_blend_epi16(_mm_slli_epi32(current,6), _mm_srli_epi32(current, 8), 0x55), mask6_epi16); + + /* _row6 = _mm_and_si128( _mm_srli_epi32(current,12), mask6); */ + /* _row7 = _mm_and_si128( _mm_srli_epi32(current,14), mask6); */ + _t3 = _mm_and_si128(_mm_blend_epi16(_mm_slli_epi32(current,2), _mm_srli_epi32(current, 12), 0x55), mask6_epi16); + + /* _row8 = _mm_and_si128( _mm_srli_epi32(current,16), mask6); */ + /* _row9 = _mm_and_si128( _mm_srli_epi32(current,18), mask6); */ + _t4 = _mm_and_si128(_mm_blend_epi16(_mm_srli_epi32(current,2), _mm_srli_epi32(current, 16), 0x55), mask6_epi16); + + + oligo = _mm_or_si128( _mm_srli_epi32(current,22), _mm_slli_epi32(next,10)); + /* _row10 = _mm_srli_epi32(current,20); */ /* No mask necessary */ + /* _row11 = _mm_and_si128( oligo, mask6); */ + _t5 = _mm_and_si128(_mm_blend_epi16(_mm_slli_epi32(oligo,16), _mm_srli_epi32(current, 20), 0x55), mask6_epi16); + + /* _row12 = _mm_and_si128( _mm_srli_epi32(oligo,2), mask6); */ + /* _row13 = _mm_and_si128( _mm_srli_epi32(oligo,4), mask6); */ + _t6 = _mm_and_si128(_mm_blend_epi16(_mm_slli_epi32(oligo,12), _mm_srli_epi32(oligo, 2), 0x55), mask6_epi16); + + /* _row14 = _mm_and_si128( _mm_srli_epi32(oligo,6), mask6); */ + /* _row15 = _mm_and_si128( _mm_srli_epi32(oligo,8), mask6); */ + _t7 = _mm_and_si128(_mm_blend_epi16(_mm_slli_epi32(oligo,8), _mm_srli_epi32(oligo, 6), 0x55), mask6_epi16); + + + /* Split: top half */ + _u0 = _mm_unpackhi_epi32(_t0,_t1); + _u1 = _mm_unpackhi_epi32(_t2,_t3); + _u2 = _mm_unpackhi_epi32(_t4,_t5); + _u3 = _mm_unpackhi_epi32(_t6,_t7); + + _mm_store_si128(out++, _mm_cvtepu16_epi32(_mm_srli_si128(_u0,8))); + _mm_store_si128(out++, _mm_cvtepu16_epi32(_mm_srli_si128(_u1,8))); + _mm_store_si128(out++, _mm_cvtepu16_epi32(_mm_srli_si128(_u2,8))); + _mm_store_si128(out++, _mm_cvtepu16_epi32(_mm_srli_si128(_u3,8))); + _mm_store_si128(out++, _mm_cvtepu16_epi32(_u0)); + _mm_store_si128(out++, _mm_cvtepu16_epi32(_u1)); + _mm_store_si128(out++, _mm_cvtepu16_epi32(_u2)); + _mm_store_si128(out++, _mm_cvtepu16_epi32(_u3)); + + /* Split: bottom half */ + _u0 = _mm_unpacklo_epi32(_t0,_t1); + _u1 = _mm_unpacklo_epi32(_t2,_t3); + _u2 = _mm_unpacklo_epi32(_t4,_t5); + _u3 = _mm_unpacklo_epi32(_t6,_t7); + + _mm_store_si128(out++, _mm_cvtepu16_epi32(_mm_srli_si128(_u0,8))); + _mm_store_si128(out++, _mm_cvtepu16_epi32(_mm_srli_si128(_u1,8))); + _mm_store_si128(out++, _mm_cvtepu16_epi32(_mm_srli_si128(_u2,8))); + _mm_store_si128(out++, _mm_cvtepu16_epi32(_mm_srli_si128(_u3,8))); + _mm_store_si128(out++, _mm_cvtepu16_epi32(_u0)); + _mm_store_si128(out++, _mm_cvtepu16_epi32(_u1)); + _mm_store_si128(out++, _mm_cvtepu16_epi32(_u2)); + _mm_store_si128(out++, _mm_cvtepu16_epi32(_u3)); + + return store_fwdrev_simd_64_ordered(chrpos,table,positions,counts,(UINT4 *) array); +} +#endif +#endif + #ifdef HAVE_AVX2 static void extract_6mers_rev_simd_128 (__m256i *out, __m256i current, __m256i next) { @@ -40999,1553 +27919,273 @@ return; } -#endif - - -static void -count_6mers_rev_simd (Count_T *counts, Inquery_T *inquery, __m128i current, __m128i next) { - __m128i oligo; -#ifdef HAVE_SSE4_1 - __m128i array; -#else - Genomecomp_T array[4]; -#endif -#if defined(HAVE_AVX2) && defined(CHECK_FOR_OVERFLOW) - __m128i _counts_after, _counts_neg; -#endif - - oligo = _mm_or_si128( _mm_srli_epi32(current,22), _mm_slli_epi32(next,10)); -#ifdef HAVE_SSE4_1 - array = _mm_and_si128( _mm_srli_epi32(oligo,8), mask6); -#else - _mm_store_si128((__m128i *) array, _mm_and_si128( _mm_srli_epi32(oligo,8), mask6)); -#endif -#ifdef HAVE_AVX2 - counts[EXTRACT(array,0)] += 1; - counts[EXTRACT(array,1)] += 1; - counts[EXTRACT(array,2)] += 1; - counts[EXTRACT(array,3)] += 1; -#ifdef CHECK_FOR_OVERFLOW - _counts_after = _mm_i32gather_epi32(counts,array,/*scale*/4); - _counts_neg = _mm_cmpgt_epi32(_counts_after,maxcount128); - if (_mm_testz_si128(_counts_neg,_counts_neg) == 0) { - debug(printf("Exceeded maximum counts\n")); - if (EXTRACT(_counts_neg,0)) { - inquery[EXTRACT(array,0)] = INQUERY_FALSE; - } - if (EXTRACT(_counts_neg,1)) { - inquery[EXTRACT(array,1)] = INQUERY_FALSE; - } - if (EXTRACT(_counts_neg,2)) { - inquery[EXTRACT(array,2)] = INQUERY_FALSE; - } - if (EXTRACT(_counts_neg,3)) { - inquery[EXTRACT(array,3)] = INQUERY_FALSE; - } - } -#endif /* CHECK_FOR_OVERFLOW */ - debug(printf("63 %04X => %d\n",array[0],counts[array[0]])); - debug(printf("47 %04X => %d\n",array[1],counts[array[1]])); - debug(printf("31 %04X => %d\n",array[2],counts[array[2]])); - debug(printf("15 %04X => %d\n",array[3],counts[array[3]])); - -#else - INCR_COUNT(counts[EXTRACT(array,0)],inquery[EXTRACT(array,0)]); /* 63 */ - INCR_COUNT(counts[EXTRACT(array,1)],inquery[EXTRACT(array,1)]); /* 47 */ - INCR_COUNT(counts[EXTRACT(array,2)],inquery[EXTRACT(array,2)]); /* 31 */ - INCR_COUNT(counts[EXTRACT(array,3)],inquery[EXTRACT(array,3)]); /* 15 */ -#endif - debug(printf("63 %04X => %d\n",EXTRACT(array,0),counts[EXTRACT(array,0)])); - debug(printf("47 %04X => %d\n",EXTRACT(array,1),counts[EXTRACT(array,1)])); - debug(printf("31 %04X => %d\n",EXTRACT(array,2),counts[EXTRACT(array,2)])); - debug(printf("15 %04X => %d\n",EXTRACT(array,3),counts[EXTRACT(array,3)])); - -#ifdef HAVE_SSE4_1 - array = _mm_and_si128( _mm_srli_epi32(oligo,6), mask6); -#else - _mm_store_si128((__m128i *) array, _mm_and_si128( _mm_srli_epi32(oligo,6), mask6)); -#endif -#ifdef HAVE_AVX2 - counts[EXTRACT(array,0)] += 1; - counts[EXTRACT(array,1)] += 1; - counts[EXTRACT(array,2)] += 1; - counts[EXTRACT(array,3)] += 1; -#ifdef CHECK_FOR_OVERFLOW - _counts_after = _mm_i32gather_epi32(counts,array,/*scale*/4); - _counts_neg = _mm_cmpgt_epi32(_counts_after,maxcount128); - if (_mm_testz_si128(_counts_neg,_counts_neg) == 0) { - debug(printf("Exceeded maximum counts\n")); - if (EXTRACT(_counts_neg,0)) { - inquery[EXTRACT(array,0)] = INQUERY_FALSE; - } - if (EXTRACT(_counts_neg,1)) { - inquery[EXTRACT(array,1)] = INQUERY_FALSE; - } - if (EXTRACT(_counts_neg,2)) { - inquery[EXTRACT(array,2)] = INQUERY_FALSE; - } - if (EXTRACT(_counts_neg,3)) { - inquery[EXTRACT(array,3)] = INQUERY_FALSE; - } - } -#endif /* CHECK_FOR_OVERFLOW */ - debug(printf("62 %04X => %d\n",array[0],counts[array[0]])); - debug(printf("46 %04X => %d\n",array[1],counts[array[1]])); - debug(printf("30 %04X => %d\n",array[2],counts[array[2]])); - debug(printf("14 %04X => %d\n",array[3],counts[array[3]])); - -#else - INCR_COUNT(counts[EXTRACT(array,0)],inquery[EXTRACT(array,0)]); /* 62 */ - INCR_COUNT(counts[EXTRACT(array,1)],inquery[EXTRACT(array,1)]); /* 46 */ - INCR_COUNT(counts[EXTRACT(array,2)],inquery[EXTRACT(array,2)]); /* 30 */ - INCR_COUNT(counts[EXTRACT(array,3)],inquery[EXTRACT(array,3)]); /* 14 */ -#endif - debug(printf("62 %04X => %d\n",EXTRACT(array,0),counts[EXTRACT(array,0)])); - debug(printf("46 %04X => %d\n",EXTRACT(array,1),counts[EXTRACT(array,1)])); - debug(printf("30 %04X => %d\n",EXTRACT(array,2),counts[EXTRACT(array,2)])); - debug(printf("14 %04X => %d\n",EXTRACT(array,3),counts[EXTRACT(array,3)])); - -#ifdef HAVE_SSE4_1 - array = _mm_and_si128( _mm_srli_epi32(oligo,4), mask6); -#else - _mm_store_si128((__m128i *) array, _mm_and_si128( _mm_srli_epi32(oligo,4), mask6)); -#endif -#ifdef HAVE_AVX2 - counts[EXTRACT(array,0)] += 1; - counts[EXTRACT(array,1)] += 1; - counts[EXTRACT(array,2)] += 1; - counts[EXTRACT(array,3)] += 1; -#ifdef CHECK_FOR_OVERFLOW - _counts_after = _mm_i32gather_epi32(counts,array,/*scale*/4); - _counts_neg = _mm_cmpgt_epi32(_counts_after,maxcount128); - if (_mm_testz_si128(_counts_neg,_counts_neg) == 0) { - debug(printf("Exceeded maximum counts\n")); - if (EXTRACT(_counts_neg,0)) { - inquery[EXTRACT(array,0)] = INQUERY_FALSE; - } - if (EXTRACT(_counts_neg,1)) { - inquery[EXTRACT(array,1)] = INQUERY_FALSE; - } - if (EXTRACT(_counts_neg,2)) { - inquery[EXTRACT(array,2)] = INQUERY_FALSE; - } - if (EXTRACT(_counts_neg,3)) { - inquery[EXTRACT(array,3)] = INQUERY_FALSE; - } - } -#endif /* CHECK_FOR_OVERFLOW */ - debug(printf("61 %04X => %d\n",array[0],counts[array[0]])); - debug(printf("45 %04X => %d\n",array[1],counts[array[1]])); - debug(printf("29 %04X => %d\n",array[2],counts[array[2]])); - debug(printf("13 %04X => %d\n",array[3],counts[array[3]])); - -#else - INCR_COUNT(counts[EXTRACT(array,0)],inquery[EXTRACT(array,0)]); /* 61 */ - INCR_COUNT(counts[EXTRACT(array,1)],inquery[EXTRACT(array,1)]); /* 45 */ - INCR_COUNT(counts[EXTRACT(array,2)],inquery[EXTRACT(array,2)]); /* 29 */ - INCR_COUNT(counts[EXTRACT(array,3)],inquery[EXTRACT(array,3)]); /* 13 */ -#endif - debug(printf("61 %04X => %d\n",EXTRACT(array,0),counts[EXTRACT(array,0)])); - debug(printf("45 %04X => %d\n",EXTRACT(array,1),counts[EXTRACT(array,1)])); - debug(printf("29 %04X => %d\n",EXTRACT(array,2),counts[EXTRACT(array,2)])); - debug(printf("13 %04X => %d\n",EXTRACT(array,3),counts[EXTRACT(array,3)])); - -#ifdef HAVE_SSE4_1 - array = _mm_and_si128( _mm_srli_epi32(oligo,2), mask6); -#else - _mm_store_si128((__m128i *) array, _mm_and_si128( _mm_srli_epi32(oligo,2), mask6)); -#endif -#ifdef HAVE_AVX2 - counts[EXTRACT(array,0)] += 1; - counts[EXTRACT(array,1)] += 1; - counts[EXTRACT(array,2)] += 1; - counts[EXTRACT(array,3)] += 1; -#ifdef CHECK_FOR_OVERFLOW - _counts_after = _mm_i32gather_epi32(counts,array,/*scale*/4); - _counts_neg = _mm_cmpgt_epi32(_counts_after,maxcount128); - if (_mm_testz_si128(_counts_neg,_counts_neg) == 0) { - debug(printf("Exceeded maximum counts\n")); - if (EXTRACT(_counts_neg,0)) { - inquery[EXTRACT(array,0)] = INQUERY_FALSE; - } - if (EXTRACT(_counts_neg,1)) { - inquery[EXTRACT(array,1)] = INQUERY_FALSE; - } - if (EXTRACT(_counts_neg,2)) { - inquery[EXTRACT(array,2)] = INQUERY_FALSE; - } - if (EXTRACT(_counts_neg,3)) { - inquery[EXTRACT(array,3)] = INQUERY_FALSE; - } - } -#endif /* CHECK_FOR_OVERFLOW */ - debug(printf("60 %04X => %d\n",array[0],counts[array[0]])); - debug(printf("44 %04X => %d\n",array[1],counts[array[1]])); - debug(printf("28 %04X => %d\n",array[2],counts[array[2]])); - debug(printf("12 %04X => %d\n",array[3],counts[array[3]])); - -#else - INCR_COUNT(counts[EXTRACT(array,0)],inquery[EXTRACT(array,0)]); /* 60 */ - INCR_COUNT(counts[EXTRACT(array,1)],inquery[EXTRACT(array,1)]); /* 44 */ - INCR_COUNT(counts[EXTRACT(array,2)],inquery[EXTRACT(array,2)]); /* 28 */ - INCR_COUNT(counts[EXTRACT(array,3)],inquery[EXTRACT(array,3)]); /* 12 */ -#endif - debug(printf("60 %04X => %d\n",EXTRACT(array,0),counts[EXTRACT(array,0)])); - debug(printf("44 %04X => %d\n",EXTRACT(array,1),counts[EXTRACT(array,1)])); - debug(printf("28 %04X => %d\n",EXTRACT(array,2),counts[EXTRACT(array,2)])); - debug(printf("12 %04X => %d\n",EXTRACT(array,3),counts[EXTRACT(array,3)])); - -#ifdef HAVE_SSE4_1 - array = _mm_and_si128( oligo, mask6); -#else - _mm_store_si128((__m128i *) array, _mm_and_si128( oligo, mask6)); -#endif -#ifdef HAVE_AVX2 - counts[EXTRACT(array,0)] += 1; - counts[EXTRACT(array,1)] += 1; - counts[EXTRACT(array,2)] += 1; - counts[EXTRACT(array,3)] += 1; -#ifdef CHECK_FOR_OVERFLOW - _counts_after = _mm_i32gather_epi32(counts,array,/*scale*/4); - _counts_neg = _mm_cmpgt_epi32(_counts_after,maxcount128); - if (_mm_testz_si128(_counts_neg,_counts_neg) == 0) { - debug(printf("Exceeded maximum counts\n")); - if (EXTRACT(_counts_neg,0)) { - inquery[EXTRACT(array,0)] = INQUERY_FALSE; - } - if (EXTRACT(_counts_neg,1)) { - inquery[EXTRACT(array,1)] = INQUERY_FALSE; - } - if (EXTRACT(_counts_neg,2)) { - inquery[EXTRACT(array,2)] = INQUERY_FALSE; - } - if (EXTRACT(_counts_neg,3)) { - inquery[EXTRACT(array,3)] = INQUERY_FALSE; - } - } -#endif /* CHECK_FOR_OVERFLOW */ - debug(printf("59 %04X => %d\n",array[0],counts[array[0]])); - debug(printf("43 %04X => %d\n",array[1],counts[array[1]])); - debug(printf("27 %04X => %d\n",array[2],counts[array[2]])); - debug(printf("11 %04X => %d\n",array[3],counts[array[3]])); - -#else - INCR_COUNT(counts[EXTRACT(array,0)],inquery[EXTRACT(array,0)]); /* 59 */ - INCR_COUNT(counts[EXTRACT(array,1)],inquery[EXTRACT(array,1)]); /* 43 */ - INCR_COUNT(counts[EXTRACT(array,2)],inquery[EXTRACT(array,2)]); /* 27 */ - INCR_COUNT(counts[EXTRACT(array,3)],inquery[EXTRACT(array,3)]); /* 11 */ -#endif - debug(printf("59 %04X => %d\n",EXTRACT(array,0),counts[EXTRACT(array,0)])); - debug(printf("43 %04X => %d\n",EXTRACT(array,1),counts[EXTRACT(array,1)])); - debug(printf("27 %04X => %d\n",EXTRACT(array,2),counts[EXTRACT(array,2)])); - debug(printf("11 %04X => %d\n",EXTRACT(array,3),counts[EXTRACT(array,3)])); - - -#ifdef HAVE_SSE4_1 - array = _mm_srli_epi32(current,20); -#else - _mm_store_si128((__m128i *) array, _mm_srli_epi32(current,20)); -#endif -#ifdef HAVE_AVX2 - counts[EXTRACT(array,0)] += 1; - counts[EXTRACT(array,1)] += 1; - counts[EXTRACT(array,2)] += 1; - counts[EXTRACT(array,3)] += 1; -#ifdef CHECK_FOR_OVERFLOW - _counts_after = _mm_i32gather_epi32(counts,array,/*scale*/4); - _counts_neg = _mm_cmpgt_epi32(_counts_after,maxcount128); - if (_mm_testz_si128(_counts_neg,_counts_neg) == 0) { - debug(printf("Exceeded maximum counts\n")); - if (EXTRACT(_counts_neg,0)) { - inquery[EXTRACT(array,0)] = INQUERY_FALSE; - } - if (EXTRACT(_counts_neg,1)) { - inquery[EXTRACT(array,1)] = INQUERY_FALSE; - } - if (EXTRACT(_counts_neg,2)) { - inquery[EXTRACT(array,2)] = INQUERY_FALSE; - } - if (EXTRACT(_counts_neg,3)) { - inquery[EXTRACT(array,3)] = INQUERY_FALSE; - } - } -#endif /* CHECK_FOR_OVERFLOW */ - debug(printf("58 %04X => %d\n",array[0],counts[array[0]])); - debug(printf("42 %04X => %d\n",array[1],counts[array[1]])); - debug(printf("26 %04X => %d\n",array[2],counts[array[2]])); - debug(printf("10 %04X => %d\n",array[3],counts[array[3]])); - -#else - INCR_COUNT(counts[EXTRACT(array,0)],inquery[EXTRACT(array,0)]); /* 58 */ - INCR_COUNT(counts[EXTRACT(array,1)],inquery[EXTRACT(array,1)]); /* 42 */ - INCR_COUNT(counts[EXTRACT(array,2)],inquery[EXTRACT(array,2)]); /* 26 */ - INCR_COUNT(counts[EXTRACT(array,3)],inquery[EXTRACT(array,3)]); /* 10 */ -#endif - debug(printf("58 %04X => %d\n",EXTRACT(array,0),counts[EXTRACT(array,0)])); - debug(printf("42 %04X => %d\n",EXTRACT(array,1),counts[EXTRACT(array,1)])); - debug(printf("26 %04X => %d\n",EXTRACT(array,2),counts[EXTRACT(array,2)])); - debug(printf("10 %04X => %d\n",EXTRACT(array,3),counts[EXTRACT(array,3)])); - -#ifdef HAVE_SSE4_1 - array = _mm_and_si128( _mm_srli_epi32(current,18), mask6); -#else - _mm_store_si128((__m128i *) array, _mm_and_si128( _mm_srli_epi32(current,18), mask6)); -#endif -#ifdef HAVE_AVX2 - counts[EXTRACT(array,0)] += 1; - counts[EXTRACT(array,1)] += 1; - counts[EXTRACT(array,2)] += 1; - counts[EXTRACT(array,3)] += 1; -#ifdef CHECK_FOR_OVERFLOW - _counts_after = _mm_i32gather_epi32(counts,array,/*scale*/4); - _counts_neg = _mm_cmpgt_epi32(_counts_after,maxcount128); - if (_mm_testz_si128(_counts_neg,_counts_neg) == 0) { - debug(printf("Exceeded maximum counts\n")); - if (EXTRACT(_counts_neg,0)) { - inquery[EXTRACT(array,0)] = INQUERY_FALSE; - } - if (EXTRACT(_counts_neg,1)) { - inquery[EXTRACT(array,1)] = INQUERY_FALSE; - } - if (EXTRACT(_counts_neg,2)) { - inquery[EXTRACT(array,2)] = INQUERY_FALSE; - } - if (EXTRACT(_counts_neg,3)) { - inquery[EXTRACT(array,3)] = INQUERY_FALSE; - } - } -#endif /* CHECK_FOR_OVERFLOW */ - debug(printf("57 %04X => %d\n",array[0],counts[array[0]])); - debug(printf("41 %04X => %d\n",array[1],counts[array[1]])); - debug(printf("25 %04X => %d\n",array[2],counts[array[2]])); - debug(printf("9 %04X => %d\n",array[3],counts[array[3]])); - -#else - INCR_COUNT(counts[EXTRACT(array,0)],inquery[EXTRACT(array,0)]); /* 57 */ - INCR_COUNT(counts[EXTRACT(array,1)],inquery[EXTRACT(array,1)]); /* 41 */ - INCR_COUNT(counts[EXTRACT(array,2)],inquery[EXTRACT(array,2)]); /* 25 */ - INCR_COUNT(counts[EXTRACT(array,3)],inquery[EXTRACT(array,3)]); /* 9 */ -#endif - debug(printf("57 %04X => %d\n",EXTRACT(array,0),counts[EXTRACT(array,0)])); - debug(printf("41 %04X => %d\n",EXTRACT(array,1),counts[EXTRACT(array,1)])); - debug(printf("25 %04X => %d\n",EXTRACT(array,2),counts[EXTRACT(array,2)])); - debug(printf("9 %04X => %d\n",EXTRACT(array,3),counts[EXTRACT(array,3)])); -#ifdef HAVE_SSE4_1 - array = _mm_and_si128( _mm_srli_epi32(current,16), mask6); -#else - _mm_store_si128((__m128i *) array, _mm_and_si128( _mm_srli_epi32(current,16), mask6)); -#endif -#ifdef HAVE_AVX2 - counts[EXTRACT(array,0)] += 1; - counts[EXTRACT(array,1)] += 1; - counts[EXTRACT(array,2)] += 1; - counts[EXTRACT(array,3)] += 1; -#ifdef CHECK_FOR_OVERFLOW - _counts_after = _mm_i32gather_epi32(counts,array,/*scale*/4); - _counts_neg = _mm_cmpgt_epi32(_counts_after,maxcount128); - if (_mm_testz_si128(_counts_neg,_counts_neg) == 0) { - debug(printf("Exceeded maximum counts\n")); - if (EXTRACT(_counts_neg,0)) { - inquery[EXTRACT(array,0)] = INQUERY_FALSE; - } - if (EXTRACT(_counts_neg,1)) { - inquery[EXTRACT(array,1)] = INQUERY_FALSE; - } - if (EXTRACT(_counts_neg,2)) { - inquery[EXTRACT(array,2)] = INQUERY_FALSE; - } - if (EXTRACT(_counts_neg,3)) { - inquery[EXTRACT(array,3)] = INQUERY_FALSE; - } - } -#endif /* CHECK_FOR_OVERFLOW */ - debug(printf("56 %04X => %d\n",array[0],counts[array[0]])); - debug(printf("40 %04X => %d\n",array[1],counts[array[1]])); - debug(printf("24 %04X => %d\n",array[2],counts[array[2]])); - debug(printf("8 %04X => %d\n",array[3],counts[array[3]])); - -#else - INCR_COUNT(counts[EXTRACT(array,0)],inquery[EXTRACT(array,0)]); /* 56 */ - INCR_COUNT(counts[EXTRACT(array,1)],inquery[EXTRACT(array,1)]); /* 40 */ - INCR_COUNT(counts[EXTRACT(array,2)],inquery[EXTRACT(array,2)]); /* 24 */ - INCR_COUNT(counts[EXTRACT(array,3)],inquery[EXTRACT(array,3)]); /* 8 */ -#endif - debug(printf("56 %04X => %d\n",EXTRACT(array,0),counts[EXTRACT(array,0)])); - debug(printf("40 %04X => %d\n",EXTRACT(array,1),counts[EXTRACT(array,1)])); - debug(printf("24 %04X => %d\n",EXTRACT(array,2),counts[EXTRACT(array,2)])); - debug(printf("8 %04X => %d\n",EXTRACT(array,3),counts[EXTRACT(array,3)])); +#ifdef USE_UNORDERED_6 +static Chrpos_T +store_6mers_rev_simd_128 (Chrpos_T chrpos, Chrpos_T *table, UINT4 *positions, Count_T *counts, + __m256i current, __m256i next) { + __m256i array[16]; + + extract_6mers_rev_simd_128(array,current,next); + return store_fwdrev_simd_128(chrpos,table,positions,counts,(UINT4 *) array); +} -#ifdef HAVE_SSE4_1 - array = _mm_and_si128( _mm_srli_epi32(current,14), mask6); #else - _mm_store_si128((__m128i *) array, _mm_and_si128( _mm_srli_epi32(current,14), mask6)); -#endif -#ifdef HAVE_AVX2 - counts[EXTRACT(array,0)] += 1; - counts[EXTRACT(array,1)] += 1; - counts[EXTRACT(array,2)] += 1; - counts[EXTRACT(array,3)] += 1; -#ifdef CHECK_FOR_OVERFLOW - _counts_after = _mm_i32gather_epi32(counts,array,/*scale*/4); - _counts_neg = _mm_cmpgt_epi32(_counts_after,maxcount128); - if (_mm_testz_si128(_counts_neg,_counts_neg) == 0) { - debug(printf("Exceeded maximum counts\n")); - if (EXTRACT(_counts_neg,0)) { - inquery[EXTRACT(array,0)] = INQUERY_FALSE; - } - if (EXTRACT(_counts_neg,1)) { - inquery[EXTRACT(array,1)] = INQUERY_FALSE; - } - if (EXTRACT(_counts_neg,2)) { - inquery[EXTRACT(array,2)] = INQUERY_FALSE; - } - if (EXTRACT(_counts_neg,3)) { - inquery[EXTRACT(array,3)] = INQUERY_FALSE; - } - } -#endif /* CHECK_FOR_OVERFLOW */ - debug(printf("55 %04X => %d\n",array[0],counts[array[0]])); - debug(printf("39 %04X => %d\n",array[1],counts[array[1]])); - debug(printf("23 %04X => %d\n",array[2],counts[array[2]])); - debug(printf("7 %04X => %d\n",array[3],counts[array[3]])); - -#else - INCR_COUNT(counts[EXTRACT(array,0)],inquery[EXTRACT(array,0)]); /* 55 */ - INCR_COUNT(counts[EXTRACT(array,1)],inquery[EXTRACT(array,1)]); /* 39 */ - INCR_COUNT(counts[EXTRACT(array,2)],inquery[EXTRACT(array,2)]); /* 23 */ - INCR_COUNT(counts[EXTRACT(array,3)],inquery[EXTRACT(array,3)]); /* 7 */ -#endif - debug(printf("55 %04X => %d\n",EXTRACT(array,0),counts[EXTRACT(array,0)])); - debug(printf("39 %04X => %d\n",EXTRACT(array,1),counts[EXTRACT(array,1)])); - debug(printf("23 %04X => %d\n",EXTRACT(array,2),counts[EXTRACT(array,2)])); - debug(printf("7 %04X => %d\n",EXTRACT(array,3),counts[EXTRACT(array,3)])); +/* Includes extract_6mers_rev_simd_128_ordered (__m256i *out, __m256i current, __m256i next) */ +static Chrpos_T +store_6mers_rev_simd_128 (Chrpos_T chrpos, Chrpos_T *table, UINT4 *positions, Count_T *counts, + __m256i current, __m256i next) { + __m256i array[16], *out; + __m256i oligo; + __m256i _t0, _t1, _t2, _t3, _t4, _t5, _t6, _t7; + __m256i _u0, _u1, _u2, _u3, _u4, _u5, _u6, _u7; -#ifdef HAVE_SSE4_1 - array = _mm_and_si128( _mm_srli_epi32(current,12), mask6); -#else - _mm_store_si128((__m128i *) array, _mm_and_si128( _mm_srli_epi32(current,12), mask6)); -#endif -#ifdef HAVE_AVX2 - counts[EXTRACT(array,0)] += 1; - counts[EXTRACT(array,1)] += 1; - counts[EXTRACT(array,2)] += 1; - counts[EXTRACT(array,3)] += 1; -#ifdef CHECK_FOR_OVERFLOW - _counts_after = _mm_i32gather_epi32(counts,array,/*scale*/4); - _counts_neg = _mm_cmpgt_epi32(_counts_after,maxcount128); - if (_mm_testz_si128(_counts_neg,_counts_neg) == 0) { - debug(printf("Exceeded maximum counts\n")); - if (EXTRACT(_counts_neg,0)) { - inquery[EXTRACT(array,0)] = INQUERY_FALSE; - } - if (EXTRACT(_counts_neg,1)) { - inquery[EXTRACT(array,1)] = INQUERY_FALSE; - } - if (EXTRACT(_counts_neg,2)) { - inquery[EXTRACT(array,2)] = INQUERY_FALSE; - } - if (EXTRACT(_counts_neg,3)) { - inquery[EXTRACT(array,3)] = INQUERY_FALSE; - } - } -#endif /* CHECK_FOR_OVERFLOW */ - debug(printf("54 %04X => %d\n",array[0],counts[array[0]])); - debug(printf("38 %04X => %d\n",array[1],counts[array[1]])); - debug(printf("22 %04X => %d\n",array[2],counts[array[2]])); - debug(printf("6 %04X => %d\n",array[3],counts[array[3]])); - -#else - INCR_COUNT(counts[EXTRACT(array,0)],inquery[EXTRACT(array,0)]); /* 54 */ - INCR_COUNT(counts[EXTRACT(array,1)],inquery[EXTRACT(array,1)]); /* 38 */ - INCR_COUNT(counts[EXTRACT(array,2)],inquery[EXTRACT(array,2)]); /* 22 */ - INCR_COUNT(counts[EXTRACT(array,3)],inquery[EXTRACT(array,3)]); /* 6 */ -#endif - debug(printf("54 %04X => %d\n",EXTRACT(array,0),counts[EXTRACT(array,0)])); - debug(printf("38 %04X => %d\n",EXTRACT(array,1),counts[EXTRACT(array,1)])); - debug(printf("22 %04X => %d\n",EXTRACT(array,2),counts[EXTRACT(array,2)])); - debug(printf("6 %04X => %d\n",EXTRACT(array,3),counts[EXTRACT(array,3)])); + out = &(array[0]); -#ifdef HAVE_SSE4_1 - array = _mm_and_si128( _mm_srli_epi32(current,10), mask6); -#else - _mm_store_si128((__m128i *) array, _mm_and_si128( _mm_srli_epi32(current,10), mask6)); -#endif -#ifdef HAVE_AVX2 - counts[EXTRACT(array,0)] += 1; - counts[EXTRACT(array,1)] += 1; - counts[EXTRACT(array,2)] += 1; - counts[EXTRACT(array,3)] += 1; -#ifdef CHECK_FOR_OVERFLOW - _counts_after = _mm_i32gather_epi32(counts,array,/*scale*/4); - _counts_neg = _mm_cmpgt_epi32(_counts_after,maxcount128); - if (_mm_testz_si128(_counts_neg,_counts_neg) == 0) { - debug(printf("Exceeded maximum counts\n")); - if (EXTRACT(_counts_neg,0)) { - inquery[EXTRACT(array,0)] = INQUERY_FALSE; - } - if (EXTRACT(_counts_neg,1)) { - inquery[EXTRACT(array,1)] = INQUERY_FALSE; - } - if (EXTRACT(_counts_neg,2)) { - inquery[EXTRACT(array,2)] = INQUERY_FALSE; - } - if (EXTRACT(_counts_neg,3)) { - inquery[EXTRACT(array,3)] = INQUERY_FALSE; - } - } -#endif /* CHECK_FOR_OVERFLOW */ - debug(printf("53 %04X => %d\n",array[0],counts[array[0]])); - debug(printf("37 %04X => %d\n",array[1],counts[array[1]])); - debug(printf("21 %04X => %d\n",array[2],counts[array[2]])); - debug(printf("5 %04X => %d\n",array[3],counts[array[3]])); - -#else - INCR_COUNT(counts[EXTRACT(array,0)],inquery[EXTRACT(array,0)]); /* 53 */ - INCR_COUNT(counts[EXTRACT(array,1)],inquery[EXTRACT(array,1)]); /* 37 */ - INCR_COUNT(counts[EXTRACT(array,2)],inquery[EXTRACT(array,2)]); /* 21 */ - INCR_COUNT(counts[EXTRACT(array,3)],inquery[EXTRACT(array,3)]); /* 5 */ -#endif - debug(printf("53 %04X => %d\n",EXTRACT(array,0),counts[EXTRACT(array,0)])); - debug(printf("37 %04X => %d\n",EXTRACT(array,1),counts[EXTRACT(array,1)])); - debug(printf("21 %04X => %d\n",EXTRACT(array,2),counts[EXTRACT(array,2)])); - debug(printf("5 %04X => %d\n",EXTRACT(array,3),counts[EXTRACT(array,3)])); + /* _row0 = _mm256_and_si256( current, bigmask6); */ + /* _row1 = _mm256_and_si256( _mm256_srli_epi32(current,2), bigmask6); */ + _t0 = _mm256_and_si256(_mm256_blend_epi16(_mm256_slli_epi32(current,14), current, 0x55), bigmask6_epi16); + + /* _row2 = _mm256_and_si256( _mm256_srli_epi32(current,4), bigmask6); */ + /* _row3 = _mm256_and_si256( _mm256_srli_epi32(current,6), bigmask6); */ + _t1 = _mm256_and_si256(_mm256_blend_epi16(_mm256_slli_epi32(current,10), _mm256_srli_epi32(current,4), 0x55), bigmask6_epi16); + + /* _row4 = _mm256_and_si256( _mm256_srli_epi32(current,8), bigmask6); */ + /* _row5 = _mm256_and_si256( _mm256_srli_epi32(current,10), bigmask6); */ + _t2 = _mm256_and_si256(_mm256_blend_epi16(_mm256_slli_epi32(current,6), _mm256_srli_epi32(current,8), 0x55), bigmask6_epi16); + + /* _row6 = _mm256_and_si256( _mm256_srli_epi32(current,12), bigmask6); */ + /* _row7 = _mm256_and_si256( _mm256_srli_epi32(current,14), bigmask6); */ + _t3 = _mm256_and_si256(_mm256_blend_epi16(_mm256_slli_epi32(current,2), _mm256_srli_epi32(current,12), 0x55), bigmask6_epi16); + + /* _row8 = _mm256_and_si256( _mm256_srli_epi32(current,16), bigmask6); */ + /* _row9 = _mm256_and_si256( _mm256_srli_epi32(current,18), bigmask6); */ + _t4 = _mm256_and_si256(_mm256_blend_epi16(_mm256_srli_epi32(current,2), _mm256_srli_epi32(current,16), 0x55), bigmask6_epi16); -#ifdef HAVE_SSE4_1 - array = _mm_and_si128( _mm_srli_epi32(current,8), mask6); -#else - _mm_store_si128((__m128i *) array, _mm_and_si128( _mm_srli_epi32(current,8), mask6)); -#endif -#ifdef HAVE_AVX2 - counts[EXTRACT(array,0)] += 1; - counts[EXTRACT(array,1)] += 1; - counts[EXTRACT(array,2)] += 1; - counts[EXTRACT(array,3)] += 1; -#ifdef CHECK_FOR_OVERFLOW - _counts_after = _mm_i32gather_epi32(counts,array,/*scale*/4); - _counts_neg = _mm_cmpgt_epi32(_counts_after,maxcount128); - if (_mm_testz_si128(_counts_neg,_counts_neg) == 0) { - debug(printf("Exceeded maximum counts\n")); - if (EXTRACT(_counts_neg,0)) { - inquery[EXTRACT(array,0)] = INQUERY_FALSE; - } - if (EXTRACT(_counts_neg,1)) { - inquery[EXTRACT(array,1)] = INQUERY_FALSE; - } - if (EXTRACT(_counts_neg,2)) { - inquery[EXTRACT(array,2)] = INQUERY_FALSE; - } - if (EXTRACT(_counts_neg,3)) { - inquery[EXTRACT(array,3)] = INQUERY_FALSE; - } - } -#endif /* CHECK_FOR_OVERFLOW */ - debug(printf("52 %04X => %d\n",array[0],counts[array[0]])); - debug(printf("36 %04X => %d\n",array[1],counts[array[1]])); - debug(printf("20 %04X => %d\n",array[2],counts[array[2]])); - debug(printf("4 %04X => %d\n",array[3],counts[array[3]])); - -#else - INCR_COUNT(counts[EXTRACT(array,0)],inquery[EXTRACT(array,0)]); /* 52 */ - INCR_COUNT(counts[EXTRACT(array,1)],inquery[EXTRACT(array,1)]); /* 36 */ - INCR_COUNT(counts[EXTRACT(array,2)],inquery[EXTRACT(array,2)]); /* 20 */ - INCR_COUNT(counts[EXTRACT(array,3)],inquery[EXTRACT(array,3)]); /* 4 */ -#endif - debug(printf("52 %04X => %d\n",EXTRACT(array,0),counts[EXTRACT(array,0)])); - debug(printf("36 %04X => %d\n",EXTRACT(array,1),counts[EXTRACT(array,1)])); - debug(printf("20 %04X => %d\n",EXTRACT(array,2),counts[EXTRACT(array,2)])); - debug(printf("4 %04X => %d\n",EXTRACT(array,3),counts[EXTRACT(array,3)])); -#ifdef HAVE_SSE4_1 - array = _mm_and_si128( _mm_srli_epi32(current,6), mask6); -#else - _mm_store_si128((__m128i *) array, _mm_and_si128( _mm_srli_epi32(current,6), mask6)); -#endif -#ifdef HAVE_AVX2 - counts[EXTRACT(array,0)] += 1; - counts[EXTRACT(array,1)] += 1; - counts[EXTRACT(array,2)] += 1; - counts[EXTRACT(array,3)] += 1; -#ifdef CHECK_FOR_OVERFLOW - _counts_after = _mm_i32gather_epi32(counts,array,/*scale*/4); - _counts_neg = _mm_cmpgt_epi32(_counts_after,maxcount128); - if (_mm_testz_si128(_counts_neg,_counts_neg) == 0) { - debug(printf("Exceeded maximum counts\n")); - if (EXTRACT(_counts_neg,0)) { - inquery[EXTRACT(array,0)] = INQUERY_FALSE; - } - if (EXTRACT(_counts_neg,1)) { - inquery[EXTRACT(array,1)] = INQUERY_FALSE; - } - if (EXTRACT(_counts_neg,2)) { - inquery[EXTRACT(array,2)] = INQUERY_FALSE; - } - if (EXTRACT(_counts_neg,3)) { - inquery[EXTRACT(array,3)] = INQUERY_FALSE; - } - } -#endif /* CHECK_FOR_OVERFLOW */ - debug(printf("51 %04X => %d\n",array[0],counts[array[0]])); - debug(printf("35 %04X => %d\n",array[1],counts[array[1]])); - debug(printf("19 %04X => %d\n",array[2],counts[array[2]])); - debug(printf("3 %04X => %d\n",array[3],counts[array[3]])); - -#else - INCR_COUNT(counts[EXTRACT(array,0)],inquery[EXTRACT(array,0)]); /* 51 */ - INCR_COUNT(counts[EXTRACT(array,1)],inquery[EXTRACT(array,1)]); /* 35 */ - INCR_COUNT(counts[EXTRACT(array,2)],inquery[EXTRACT(array,2)]); /* 19 */ - INCR_COUNT(counts[EXTRACT(array,3)],inquery[EXTRACT(array,3)]); /* 3 */ -#endif - debug(printf("51 %04X => %d\n",EXTRACT(array,0),counts[EXTRACT(array,0)])); - debug(printf("35 %04X => %d\n",EXTRACT(array,1),counts[EXTRACT(array,1)])); - debug(printf("19 %04X => %d\n",EXTRACT(array,2),counts[EXTRACT(array,2)])); - debug(printf("3 %04X => %d\n",EXTRACT(array,3),counts[EXTRACT(array,3)])); + oligo = _mm256_or_si256( _mm256_srli_epi32(current,22), _mm256_slli_epi32(next,10)); + /* _row10 = _mm256_srli_epi32(current,20); */ /* No mask necessary */ + /* _row11 = _mm256_and_si256( oligo, bigmask6); */ + _t5 = _mm256_and_si256(_mm256_blend_epi16(_mm256_slli_epi32(oligo,16), _mm256_srli_epi32(current,20), 0x55), bigmask6_epi16); + + /* _row12 = _mm256_and_si256( _mm256_srli_epi32(oligo,2), bigmask6); */ + /* _row13 = _mm256_and_si256( _mm256_srli_epi32(oligo,4), bigmask6); */ + _t6 = _mm256_and_si256(_mm256_blend_epi16(_mm256_slli_epi32(oligo,12), _mm256_srli_epi32(oligo,2), 0x55), bigmask6_epi16); + + /* _row14 = _mm256_and_si256( _mm256_srli_epi32(oligo,6), bigmask6); */ + /* _row15 = _mm256_and_si256( _mm256_srli_epi32(oligo,8), bigmask6); */ + _t7 = _mm256_and_si256(_mm256_blend_epi16(_mm256_slli_epi32(oligo,8), _mm256_srli_epi32(oligo,6), 0x55), bigmask6_epi16); + + + _u0 = _mm256_unpackhi_epi32(_t0,_t1); + _u1 = _mm256_unpackhi_epi32(_t2,_t3); + _u2 = _mm256_unpackhi_epi32(_t4,_t5); + _u3 = _mm256_unpackhi_epi32(_t6,_t7); + _u4 = _mm256_unpacklo_epi32(_t0,_t1); + _u5 = _mm256_unpacklo_epi32(_t2,_t3); + _u6 = _mm256_unpacklo_epi32(_t4,_t5); + _u7 = _mm256_unpacklo_epi32(_t6,_t7); + + + _t0 = _mm256_unpackhi_epi64(_u0,_u1); + _t1 = _mm256_unpackhi_epi64(_u2,_u3); + _t2 = _mm256_unpacklo_epi64(_u0,_u1); + _t3 = _mm256_unpacklo_epi64(_u2,_u3); + _t4 = _mm256_unpackhi_epi64(_u4,_u5); + _t5 = _mm256_unpackhi_epi64(_u6,_u7); + _t6 = _mm256_unpacklo_epi64(_u4,_u5); + _t7 = _mm256_unpacklo_epi64(_u6,_u7); + + + _mm256_store_si256(out++, _mm256_cvtepu16_epi32(_mm256_extracti128_si256(_t0,1))); + _mm256_store_si256(out++, _mm256_cvtepu16_epi32(_mm256_extracti128_si256(_t1,1))); + _mm256_store_si256(out++, _mm256_cvtepu16_epi32(_mm256_extracti128_si256(_t2,1))); + _mm256_store_si256(out++, _mm256_cvtepu16_epi32(_mm256_extracti128_si256(_t3,1))); + _mm256_store_si256(out++, _mm256_cvtepu16_epi32(_mm256_extracti128_si256(_t4,1))); + _mm256_store_si256(out++, _mm256_cvtepu16_epi32(_mm256_extracti128_si256(_t5,1))); + _mm256_store_si256(out++, _mm256_cvtepu16_epi32(_mm256_extracti128_si256(_t6,1))); + _mm256_store_si256(out++, _mm256_cvtepu16_epi32(_mm256_extracti128_si256(_t7,1))); + _mm256_store_si256(out++, _mm256_cvtepu16_epi32(_mm256_extracti128_si256(_t0,0))); + _mm256_store_si256(out++, _mm256_cvtepu16_epi32(_mm256_extracti128_si256(_t1,0))); + _mm256_store_si256(out++, _mm256_cvtepu16_epi32(_mm256_extracti128_si256(_t2,0))); + _mm256_store_si256(out++, _mm256_cvtepu16_epi32(_mm256_extracti128_si256(_t3,0))); + _mm256_store_si256(out++, _mm256_cvtepu16_epi32(_mm256_extracti128_si256(_t4,0))); + _mm256_store_si256(out++, _mm256_cvtepu16_epi32(_mm256_extracti128_si256(_t5,0))); + _mm256_store_si256(out++, _mm256_cvtepu16_epi32(_mm256_extracti128_si256(_t6,0))); + _mm256_store_si256(out++, _mm256_cvtepu16_epi32(_mm256_extracti128_si256(_t7,0))); + + return store_fwdrev_simd_128_ordered(chrpos,table,positions,counts,(UINT4 *) array); +} +#endif +#endif + +#ifdef HAVE_AVX512 +static void +extract_6mers_rev_simd_256 (__m512i *out, __m512i current, __m512i next) { + __m512i oligo; + + oligo = _mm512_or_si512( _mm512_srli_epi32(current,22), _mm512_slli_epi32(next,10)); + _mm512_store_si512(out++, _mm512_and_si512( _mm512_srli_epi32(oligo,8), hugemask6)); + _mm512_store_si512(out++, _mm512_and_si512( _mm512_srli_epi32(oligo,6), hugemask6)); + _mm512_store_si512(out++, _mm512_and_si512( _mm512_srli_epi32(oligo,4), hugemask6)); + _mm512_store_si512(out++, _mm512_and_si512( _mm512_srli_epi32(oligo,2), hugemask6)); + _mm512_store_si512(out++, _mm512_and_si512( oligo, hugemask6)); + + _mm512_store_si512(out++, _mm512_srli_epi32(current,20)); /* No mask necessary */ + _mm512_store_si512(out++, _mm512_and_si512( _mm512_srli_epi32(current,18), hugemask6)); + _mm512_store_si512(out++, _mm512_and_si512( _mm512_srli_epi32(current,16), hugemask6)); + _mm512_store_si512(out++, _mm512_and_si512( _mm512_srli_epi32(current,14), hugemask6)); + _mm512_store_si512(out++, _mm512_and_si512( _mm512_srli_epi32(current,12), hugemask6)); + _mm512_store_si512(out++, _mm512_and_si512( _mm512_srli_epi32(current,10), hugemask6)); + _mm512_store_si512(out++, _mm512_and_si512( _mm512_srli_epi32(current,8), hugemask6)); + _mm512_store_si512(out++, _mm512_and_si512( _mm512_srli_epi32(current,6), hugemask6)); + _mm512_store_si512(out++, _mm512_and_si512( _mm512_srli_epi32(current,4), hugemask6)); + _mm512_store_si512(out++, _mm512_and_si512( _mm512_srli_epi32(current,2), hugemask6)); + _mm512_store_si512(out++, _mm512_and_si512( current, hugemask6)); -#ifdef HAVE_SSE4_1 - array = _mm_and_si128( _mm_srli_epi32(current,4), mask6); -#else - _mm_store_si128((__m128i *) array, _mm_and_si128( _mm_srli_epi32(current,4), mask6)); -#endif -#ifdef HAVE_AVX2 - counts[EXTRACT(array,0)] += 1; - counts[EXTRACT(array,1)] += 1; - counts[EXTRACT(array,2)] += 1; - counts[EXTRACT(array,3)] += 1; -#ifdef CHECK_FOR_OVERFLOW - _counts_after = _mm_i32gather_epi32(counts,array,/*scale*/4); - _counts_neg = _mm_cmpgt_epi32(_counts_after,maxcount128); - if (_mm_testz_si128(_counts_neg,_counts_neg) == 0) { - debug(printf("Exceeded maximum counts\n")); - if (EXTRACT(_counts_neg,0)) { - inquery[EXTRACT(array,0)] = INQUERY_FALSE; - } - if (EXTRACT(_counts_neg,1)) { - inquery[EXTRACT(array,1)] = INQUERY_FALSE; - } - if (EXTRACT(_counts_neg,2)) { - inquery[EXTRACT(array,2)] = INQUERY_FALSE; - } - if (EXTRACT(_counts_neg,3)) { - inquery[EXTRACT(array,3)] = INQUERY_FALSE; - } - } -#endif /* CHECK_FOR_OVERFLOW */ - debug(printf("50 %04X => %d\n",array[0],counts[array[0]])); - debug(printf("34 %04X => %d\n",array[1],counts[array[1]])); - debug(printf("18 %04X => %d\n",array[2],counts[array[2]])); - debug(printf("2 %04X => %d\n",array[3],counts[array[3]])); - -#else - INCR_COUNT(counts[EXTRACT(array,0)],inquery[EXTRACT(array,0)]); /* 50 */ - INCR_COUNT(counts[EXTRACT(array,1)],inquery[EXTRACT(array,1)]); /* 34 */ - INCR_COUNT(counts[EXTRACT(array,2)],inquery[EXTRACT(array,2)]); /* 18 */ - INCR_COUNT(counts[EXTRACT(array,3)],inquery[EXTRACT(array,3)]); /* 2 */ -#endif - debug(printf("50 %04X => %d\n",EXTRACT(array,0),counts[EXTRACT(array,0)])); - debug(printf("34 %04X => %d\n",EXTRACT(array,1),counts[EXTRACT(array,1)])); - debug(printf("18 %04X => %d\n",EXTRACT(array,2),counts[EXTRACT(array,2)])); - debug(printf("2 %04X => %d\n",EXTRACT(array,3),counts[EXTRACT(array,3)])); + return; +} -#ifdef HAVE_SSE4_1 - array = _mm_and_si128( _mm_srli_epi32(current,2), mask6); -#else - _mm_store_si128((__m128i *) array, _mm_and_si128( _mm_srli_epi32(current,2), mask6)); -#endif -#ifdef HAVE_AVX2 - counts[EXTRACT(array,0)] += 1; - counts[EXTRACT(array,1)] += 1; - counts[EXTRACT(array,2)] += 1; - counts[EXTRACT(array,3)] += 1; -#ifdef CHECK_FOR_OVERFLOW - _counts_after = _mm_i32gather_epi32(counts,array,/*scale*/4); - _counts_neg = _mm_cmpgt_epi32(_counts_after,maxcount128); - if (_mm_testz_si128(_counts_neg,_counts_neg) == 0) { - debug(printf("Exceeded maximum counts\n")); - if (EXTRACT(_counts_neg,0)) { - inquery[EXTRACT(array,0)] = INQUERY_FALSE; - } - if (EXTRACT(_counts_neg,1)) { - inquery[EXTRACT(array,1)] = INQUERY_FALSE; - } - if (EXTRACT(_counts_neg,2)) { - inquery[EXTRACT(array,2)] = INQUERY_FALSE; - } - if (EXTRACT(_counts_neg,3)) { - inquery[EXTRACT(array,3)] = INQUERY_FALSE; - } - } -#endif /* CHECK_FOR_OVERFLOW */ - debug(printf("49 %04X => %d\n",array[0],counts[array[0]])); - debug(printf("33 %04X => %d\n",array[1],counts[array[1]])); - debug(printf("17 %04X => %d\n",array[2],counts[array[2]])); - debug(printf("1 %04X => %d\n",array[3],counts[array[3]])); - -#else - INCR_COUNT(counts[EXTRACT(array,0)],inquery[EXTRACT(array,0)]); /* 49 */ - INCR_COUNT(counts[EXTRACT(array,1)],inquery[EXTRACT(array,1)]); /* 33 */ - INCR_COUNT(counts[EXTRACT(array,2)],inquery[EXTRACT(array,2)]); /* 17 */ - INCR_COUNT(counts[EXTRACT(array,3)],inquery[EXTRACT(array,3)]); /* 1 */ -#endif - debug(printf("49 %04X => %d\n",EXTRACT(array,0),counts[EXTRACT(array,0)])); - debug(printf("33 %04X => %d\n",EXTRACT(array,1),counts[EXTRACT(array,1)])); - debug(printf("17 %04X => %d\n",EXTRACT(array,2),counts[EXTRACT(array,2)])); - debug(printf("1 %04X => %d\n",EXTRACT(array,3),counts[EXTRACT(array,3)])); +#ifdef USE_UNORDERED_6 +static Chrpos_T +store_6mers_rev_simd_256 (Chrpos_T chrpos, Chrpos_T *table, UINT4 *positions, Count_T *counts, + __m512i current, __m512i next) { + __m512i array[16]; + + extract_6mers_rev_simd_256(array,current,next); + return store_fwdrev_simd_256(chrpos,table,positions,counts,(UINT4 *) array); +} -#ifdef HAVE_SSE4_1 - array = _mm_and_si128( current, mask6); #else - _mm_store_si128((__m128i *) array, _mm_and_si128( current, mask6)); -#endif -#ifdef HAVE_AVX2 - counts[EXTRACT(array,0)] += 1; - counts[EXTRACT(array,1)] += 1; - counts[EXTRACT(array,2)] += 1; - counts[EXTRACT(array,3)] += 1; -#ifdef CHECK_FOR_OVERFLOW - _counts_after = _mm_i32gather_epi32(counts,array,/*scale*/4); - _counts_neg = _mm_cmpgt_epi32(_counts_after,maxcount128); - if (_mm_testz_si128(_counts_neg,_counts_neg) == 0) { - debug(printf("Exceeded maximum counts\n")); - if (EXTRACT(_counts_neg,0)) { - inquery[EXTRACT(array,0)] = INQUERY_FALSE; - } - if (EXTRACT(_counts_neg,1)) { - inquery[EXTRACT(array,1)] = INQUERY_FALSE; - } - if (EXTRACT(_counts_neg,2)) { - inquery[EXTRACT(array,2)] = INQUERY_FALSE; - } - if (EXTRACT(_counts_neg,3)) { - inquery[EXTRACT(array,3)] = INQUERY_FALSE; - } - } -#endif /* CHECK_FOR_OVERFLOW */ - debug(printf("48 %04X => %d\n",array[0],counts[array[0]])); - debug(printf("32 %04X => %d\n",array[1],counts[array[1]])); - debug(printf("16 %04X => %d\n",array[2],counts[array[2]])); - debug(printf("0 %04X => %d\n",array[3],counts[array[3]])); - -#else - INCR_COUNT(counts[EXTRACT(array,0)],inquery[EXTRACT(array,0)]); /* 48 */ - INCR_COUNT(counts[EXTRACT(array,1)],inquery[EXTRACT(array,1)]); /* 32 */ - INCR_COUNT(counts[EXTRACT(array,2)],inquery[EXTRACT(array,2)]); /* 16 */ - INCR_COUNT(counts[EXTRACT(array,3)],inquery[EXTRACT(array,3)]); /* 0 */ -#endif - debug(printf("48 %04X => %d\n",EXTRACT(array,0),counts[EXTRACT(array,0)])); - debug(printf("32 %04X => %d\n",EXTRACT(array,1),counts[EXTRACT(array,1)])); - debug(printf("16 %04X => %d\n",EXTRACT(array,2),counts[EXTRACT(array,2)])); - debug(printf("0 %04X => %d\n",EXTRACT(array,3),counts[EXTRACT(array,3)])); +/* Includes extract_6mers_rev_simd_256_ordered (__m512i *out, __m512i current, __m512i next) */ +static Chrpos_T +store_6mers_rev_simd_256 (Chrpos_T chrpos, Chrpos_T *table, UINT4 *positions, Count_T *counts, + __m512i current, __m512i next) { + __m512i array[16], *out; + __m512i oligo, _shuffle0, _shuffle1, _shuffle2; + __m512i _t0, _t1, _t2, _t3, _t4, _t5, _t6, _t7; + __m512i _u0, _u1, _u2, _u3, _u4, _u5, _u6, _u7; + + out = &(array[0]); + + _u0 = _mm512_and_si512( current, hugemask6); + /* _row1 = _mm512_and_si512(_mm512_srli_epi32(current,2), hugemask6); */ + _u1 = _mm512_and_si512( _mm512_slli_epi32(current,14), highmask6); + _t0 = _mm512_or_si512(_u0, _u1); + + _u0 = _mm512_and_si512( _mm512_srli_epi32(current,4), hugemask6); + /* _row3 = _mm512_and_si512( _mm512_srli_epi32(current,6), hugemask6); */ + _u1 = _mm512_and_si512( _mm512_slli_epi32(current,10), highmask6); + _t1 = _mm512_or_si512(_u0, _u1); + + _u0 = _mm512_and_si512( _mm512_srli_epi32(current,8), hugemask6); + /* _row5 = _mm512_and_si512( _mm512_srli_epi32(current,10), hugemask6); */ + _u1 = _mm512_and_si512( _mm512_slli_epi32(current,6), highmask6); + _t2 = _mm512_or_si512(_u0, _u1); + + _u0 = _mm512_and_si512( _mm512_srli_epi32(current,12), hugemask6); + /* _row7 = _mm512_and_si512( _mm512_srli_epi32(current,14), hugemask6); */ + _u1 = _mm512_and_si512( _mm512_slli_epi32(current,2), highmask6); + _t3 = _mm512_or_si512(_u0, _u1); + + _u0 = _mm512_and_si512( _mm512_srli_epi32(current,16), hugemask6); + /* _row9 = _mm512_and_si512( _mm512_srli_epi32(current,18), hugemask6); */ + _u1 = _mm512_and_si512( _mm512_srli_epi32(current,2), highmask6); + _t4 = _mm512_or_si512(_u0, _u1); + + + oligo = _mm512_or_si512( _mm512_srli_epi32(current,22), _mm512_slli_epi32(next,10)); + _u0 = _mm512_srli_epi32(current,20); /* No mask necessary */ + /* _row11 = _mm512_and_si512( oligo, hugemask6); */ + _u1 = _mm512_and_si512( _mm512_slli_epi32(oligo,16), highmask6); + _t5 = _mm512_or_si512(_u0, _u1); + + _u0 = _mm512_and_si512( _mm512_srli_epi32(oligo,2), hugemask6); + /* _row13 = _mm512_and_si512( _mm512_srli_epi32(oligo,4), hugemask6); */ + _u1 = _mm512_and_si512( _mm512_slli_epi32(oligo,12), highmask6); + _t6 = _mm512_or_si512(_u0, _u1); + + _u0 = _mm512_and_si512( _mm512_srli_epi32(oligo,6), hugemask6); + /* _row15 = _mm512_and_si512( _mm512_srli_epi32(oligo,8), hugemask6); */ + _u1 = _mm512_and_si512( _mm512_slli_epi32(oligo,8), highmask6); + _t7 = _mm512_or_si512(_u0, _u1); + + + _u0 = _mm512_unpackhi_epi32(_t0,_t1); + _u1 = _mm512_unpackhi_epi32(_t2,_t3); + _u2 = _mm512_unpackhi_epi32(_t4,_t5); + _u3 = _mm512_unpackhi_epi32(_t6,_t7); + _u4 = _mm512_unpacklo_epi32(_t0,_t1); + _u5 = _mm512_unpacklo_epi32(_t2,_t3); + _u6 = _mm512_unpacklo_epi32(_t4,_t5); + _u7 = _mm512_unpacklo_epi32(_t6,_t7); + + + /* Split: top half */ + _shuffle0 = _mm512_setr_epi64(7, 8+7, 6, 8+6, 5, 8+5, 4, 8+4); + _t0 = _mm512_permutex2var_epi64(_u0, _shuffle0, _u1); + _t1 = _mm512_permutex2var_epi64(_u2, _shuffle0, _u3); + _t2 = _mm512_permutex2var_epi64(_u4, _shuffle0, _u5); + _t3 = _mm512_permutex2var_epi64(_u6, _shuffle0, _u7); + + + _shuffle1 = _mm512_setr_epi64(0, 1, 8+0, 8+1, 2, 3, 8+2, 8+3); + _t7 = _mm512_permutex2var_epi64(_t0, _shuffle1, _t1); + _mm512_store_si512(out++, _mm512_cvtepu16_epi32(_mm512_extracti64x4_epi64(_t7,0))); + _mm512_store_si512(out++, _mm512_cvtepu16_epi32(_mm512_extracti64x4_epi64(_t7,1))); + + _t7 = _mm512_permutex2var_epi64(_t2, _shuffle1, _t3); + _mm512_store_si512(out++, _mm512_cvtepu16_epi32(_mm512_extracti64x4_epi64(_t7,0))); + _mm512_store_si512(out++, _mm512_cvtepu16_epi32(_mm512_extracti64x4_epi64(_t7,1))); + + _shuffle2 = _mm512_setr_epi64(4, 5, 8+4, 8+5, 6, 7, 8+6, 8+7); + _t7 = _mm512_permutex2var_epi64(_t0, _shuffle2, _t1); + _mm512_store_si512(out++, _mm512_cvtepu16_epi32(_mm512_extracti64x4_epi64(_t7,0))); + _mm512_store_si512(out++, _mm512_cvtepu16_epi32(_mm512_extracti64x4_epi64(_t7,1))); + + _t7 = _mm512_permutex2var_epi64(_t2, _shuffle2, _t3); + _mm512_store_si512(out++, _mm512_cvtepu16_epi32(_mm512_extracti64x4_epi64(_t7,0))); + _mm512_store_si512(out++, _mm512_cvtepu16_epi32(_mm512_extracti64x4_epi64(_t7,1))); + + + /* Split: bottom half */ + _shuffle0 = _mm512_setr_epi64(3, 8+3, 2, 8+2, 1, 8+1, 0, 8+0); + _t0 = _mm512_permutex2var_epi64(_u0, _shuffle0, _u1); + _t1 = _mm512_permutex2var_epi64(_u2, _shuffle0, _u3); + _t2 = _mm512_permutex2var_epi64(_u4, _shuffle0, _u5); + _t3 = _mm512_permutex2var_epi64(_u6, _shuffle0, _u7); + + + /* _shuffle1 = _mm512_setr_epi64(0, 1, 8+0, 8+1, 2, 3, 8+2, 8+3); */ + _t7 = _mm512_permutex2var_epi64(_t0, _shuffle1, _t1); + _mm512_store_si512(out++, _mm512_cvtepu16_epi32(_mm512_extracti64x4_epi64(_t7,0))); + _mm512_store_si512(out++, _mm512_cvtepu16_epi32(_mm512_extracti64x4_epi64(_t7,1))); + + _t7 = _mm512_permutex2var_epi64(_t2, _shuffle1, _t3); + _mm512_store_si512(out++, _mm512_cvtepu16_epi32(_mm512_extracti64x4_epi64(_t7,0))); + _mm512_store_si512(out++, _mm512_cvtepu16_epi32(_mm512_extracti64x4_epi64(_t7,1))); + + /* _shuffle2 = _mm512_setr_epi64(4, 5, 8+4, 8+5, 6, 7, 8+6, 8+7); */ + _t7 = _mm512_permutex2var_epi64(_t0, _shuffle2, _t1); + _mm512_store_si512(out++, _mm512_cvtepu16_epi32(_mm512_extracti64x4_epi64(_t7,0))); + _mm512_store_si512(out++, _mm512_cvtepu16_epi32(_mm512_extracti64x4_epi64(_t7,1))); + + _t7 = _mm512_permutex2var_epi64(_t2, _shuffle2, _t3); + _mm512_store_si512(out++, _mm512_cvtepu16_epi32(_mm512_extracti64x4_epi64(_t7,0))); + _mm512_store_si512(out++, _mm512_cvtepu16_epi32(_mm512_extracti64x4_epi64(_t7,1))); - return; + return store_fwdrev_simd_256_ordered(chrpos,table,positions,counts,(UINT4 *) array); } - #endif - -#ifdef HAVE_AVX2 -static void -count_6mers_rev_simd_128 (Count_T *counts, Inquery_T *inquery, __m256i current, __m256i next) { - __m256i oligo; - __m256i array; -#ifdef CHECK_FOR_OVERFLOW - __m256i _counts_after, _counts_neg; #endif - oligo = _mm256_or_si256( _mm256_srli_epi32(current,22), _mm256_slli_epi32(next,10)); - array = _mm256_and_si256( _mm256_srli_epi32(oligo,8), bigmask6); - counts[EXTRACT256(array,0)] += 1; /* 127 */ - counts[EXTRACT256(array,1)] += 1; /* 111 */ - counts[EXTRACT256(array,2)] += 1; /* 95 */ - counts[EXTRACT256(array,3)] += 1; /* 79 */ - counts[EXTRACT256(array,4)] += 1; /* 63 */ - counts[EXTRACT256(array,5)] += 1; /* 47 */ - counts[EXTRACT256(array,6)] += 1; /* 31 */ - counts[EXTRACT256(array,7)] += 1; /* 15 */ -#ifdef CHECK_FOR_OVERFLOW - _counts_after = _mm256_i32gather_epi32(counts,array,/*scale*/4); - _counts_neg = _mm256_cmpgt_epi32(_counts_after,maxcount256); - if (_mm256_testz_si256(_counts_neg,_counts_neg) == 0) { - debug(printf("Exceeded maximum counts\n")); - if (EXTRACT256(_counts_neg,0)) { - inquery[EXTRACT256(array,0)] = INQUERY_FALSE; - } - if (EXTRACT256(_counts_neg,1)) { - inquery[EXTRACT256(array,1)] = INQUERY_FALSE; - } - if (EXTRACT256(_counts_neg,2)) { - inquery[EXTRACT256(array,2)] = INQUERY_FALSE; - } - if (EXTRACT256(_counts_neg,3)) { - inquery[EXTRACT256(array,3)] = INQUERY_FALSE; - } - if (EXTRACT256(_counts_neg,4)) { - inquery[EXTRACT256(array,4)] = INQUERY_FALSE; - } - if (EXTRACT256(_counts_neg,5)) { - inquery[EXTRACT256(array,5)] = INQUERY_FALSE; - } - if (EXTRACT256(_counts_neg,6)) { - inquery[EXTRACT256(array,6)] = INQUERY_FALSE; - } - if (EXTRACT256(_counts_neg,7)) { - inquery[EXTRACT256(array,7)] = INQUERY_FALSE; - } - } -#endif /* CHECK_FOR_OVERFLOW */ - debug(printf("63 %04X => %d\n",EXTRACT256(array,0),counts[EXTRACT256(array,0)])); - debug(printf("47 %04X => %d\n",EXTRACT256(array,1),counts[EXTRACT256(array,1)])); - debug(printf("31 %04X => %d\n",EXTRACT256(array,2),counts[EXTRACT256(array,2)])); - debug(printf("15 %04X => %d\n",EXTRACT256(array,3),counts[EXTRACT256(array,3)])); - debug(printf("63 %04X => %d\n",EXTRACT256(array,4),counts[EXTRACT256(array,4)])); - debug(printf("47 %04X => %d\n",EXTRACT256(array,5),counts[EXTRACT256(array,5)])); - debug(printf("31 %04X => %d\n",EXTRACT256(array,6),counts[EXTRACT256(array,6)])); - debug(printf("15 %04X => %d\n",EXTRACT256(array,7),counts[EXTRACT256(array,7)])); - - array = _mm256_and_si256( _mm256_srli_epi32(oligo,6), bigmask6); - counts[EXTRACT256(array,0)] += 1; /* 126 */ - counts[EXTRACT256(array,1)] += 1; /* 110 */ - counts[EXTRACT256(array,2)] += 1; /* 94 */ - counts[EXTRACT256(array,3)] += 1; /* 78 */ - counts[EXTRACT256(array,4)] += 1; /* 62 */ - counts[EXTRACT256(array,5)] += 1; /* 46 */ - counts[EXTRACT256(array,6)] += 1; /* 30 */ - counts[EXTRACT256(array,7)] += 1; /* 14 */ -#ifdef CHECK_FOR_OVERFLOW - _counts_after = _mm256_i32gather_epi32(counts,array,/*scale*/4); - _counts_neg = _mm256_cmpgt_epi32(_counts_after,maxcount256); - if (_mm256_testz_si256(_counts_neg,_counts_neg) == 0) { - debug(printf("Exceeded maximum counts\n")); - if (EXTRACT256(_counts_neg,0)) { - inquery[EXTRACT256(array,0)] = INQUERY_FALSE; - } - if (EXTRACT256(_counts_neg,1)) { - inquery[EXTRACT256(array,1)] = INQUERY_FALSE; - } - if (EXTRACT256(_counts_neg,2)) { - inquery[EXTRACT256(array,2)] = INQUERY_FALSE; - } - if (EXTRACT256(_counts_neg,3)) { - inquery[EXTRACT256(array,3)] = INQUERY_FALSE; - } - if (EXTRACT256(_counts_neg,4)) { - inquery[EXTRACT256(array,4)] = INQUERY_FALSE; - } - if (EXTRACT256(_counts_neg,5)) { - inquery[EXTRACT256(array,5)] = INQUERY_FALSE; - } - if (EXTRACT256(_counts_neg,6)) { - inquery[EXTRACT256(array,6)] = INQUERY_FALSE; - } - if (EXTRACT256(_counts_neg,7)) { - inquery[EXTRACT256(array,7)] = INQUERY_FALSE; - } - } -#endif /* CHECK_FOR_OVERFLOW */ - debug(printf("62 %04X => %d\n",EXTRACT256(array,0),counts[EXTRACT256(array,0)])); - debug(printf("46 %04X => %d\n",EXTRACT256(array,1),counts[EXTRACT256(array,1)])); - debug(printf("30 %04X => %d\n",EXTRACT256(array,2),counts[EXTRACT256(array,2)])); - debug(printf("14 %04X => %d\n",EXTRACT256(array,3),counts[EXTRACT256(array,3)])); - debug(printf("62 %04X => %d\n",EXTRACT256(array,4),counts[EXTRACT256(array,4)])); - debug(printf("46 %04X => %d\n",EXTRACT256(array,5),counts[EXTRACT256(array,5)])); - debug(printf("30 %04X => %d\n",EXTRACT256(array,6),counts[EXTRACT256(array,6)])); - debug(printf("14 %04X => %d\n",EXTRACT256(array,7),counts[EXTRACT256(array,7)])); - - array = _mm256_and_si256( _mm256_srli_epi32(oligo,4), bigmask6); - counts[EXTRACT256(array,0)] += 1; /* 125 */ - counts[EXTRACT256(array,1)] += 1; /* 109 */ - counts[EXTRACT256(array,2)] += 1; /* 93 */ - counts[EXTRACT256(array,3)] += 1; /* 77 */ - counts[EXTRACT256(array,4)] += 1; /* 61 */ - counts[EXTRACT256(array,5)] += 1; /* 45 */ - counts[EXTRACT256(array,6)] += 1; /* 29 */ - counts[EXTRACT256(array,7)] += 1; /* 13 */ -#ifdef CHECK_FOR_OVERFLOW - _counts_after = _mm256_i32gather_epi32(counts,array,/*scale*/4); - _counts_neg = _mm256_cmpgt_epi32(_counts_after,maxcount256); - if (_mm256_testz_si256(_counts_neg,_counts_neg) == 0) { - debug(printf("Exceeded maximum counts\n")); - if (EXTRACT256(_counts_neg,0)) { - inquery[EXTRACT256(array,0)] = INQUERY_FALSE; - } - if (EXTRACT256(_counts_neg,1)) { - inquery[EXTRACT256(array,1)] = INQUERY_FALSE; - } - if (EXTRACT256(_counts_neg,2)) { - inquery[EXTRACT256(array,2)] = INQUERY_FALSE; - } - if (EXTRACT256(_counts_neg,3)) { - inquery[EXTRACT256(array,3)] = INQUERY_FALSE; - } - if (EXTRACT256(_counts_neg,4)) { - inquery[EXTRACT256(array,4)] = INQUERY_FALSE; - } - if (EXTRACT256(_counts_neg,5)) { - inquery[EXTRACT256(array,5)] = INQUERY_FALSE; - } - if (EXTRACT256(_counts_neg,6)) { - inquery[EXTRACT256(array,6)] = INQUERY_FALSE; - } - if (EXTRACT256(_counts_neg,7)) { - inquery[EXTRACT256(array,7)] = INQUERY_FALSE; - } - } -#endif /* CHECK_FOR_OVERFLOW */ - debug(printf("61 %04X => %d\n",EXTRACT256(array,0),counts[EXTRACT256(array,0)])); - debug(printf("45 %04X => %d\n",EXTRACT256(array,1),counts[EXTRACT256(array,1)])); - debug(printf("29 %04X => %d\n",EXTRACT256(array,2),counts[EXTRACT256(array,2)])); - debug(printf("13 %04X => %d\n",EXTRACT256(array,3),counts[EXTRACT256(array,3)])); - debug(printf("61 %04X => %d\n",EXTRACT256(array,4),counts[EXTRACT256(array,4)])); - debug(printf("45 %04X => %d\n",EXTRACT256(array,5),counts[EXTRACT256(array,5)])); - debug(printf("29 %04X => %d\n",EXTRACT256(array,6),counts[EXTRACT256(array,6)])); - debug(printf("13 %04X => %d\n",EXTRACT256(array,7),counts[EXTRACT256(array,7)])); - - array = _mm256_and_si256( _mm256_srli_epi32(oligo,2), bigmask6); - counts[EXTRACT256(array,0)] += 1; /* 124 */ - counts[EXTRACT256(array,1)] += 1; /* 108 */ - counts[EXTRACT256(array,2)] += 1; /* 92 */ - counts[EXTRACT256(array,3)] += 1; /* 76 */ - counts[EXTRACT256(array,4)] += 1; /* 60 */ - counts[EXTRACT256(array,5)] += 1; /* 44 */ - counts[EXTRACT256(array,6)] += 1; /* 28 */ - counts[EXTRACT256(array,7)] += 1; /* 12 */ -#ifdef CHECK_FOR_OVERFLOW - _counts_after = _mm256_i32gather_epi32(counts,array,/*scale*/4); - _counts_neg = _mm256_cmpgt_epi32(_counts_after,maxcount256); - if (_mm256_testz_si256(_counts_neg,_counts_neg) == 0) { - debug(printf("Exceeded maximum counts\n")); - if (EXTRACT256(_counts_neg,0)) { - inquery[EXTRACT256(array,0)] = INQUERY_FALSE; - } - if (EXTRACT256(_counts_neg,1)) { - inquery[EXTRACT256(array,1)] = INQUERY_FALSE; - } - if (EXTRACT256(_counts_neg,2)) { - inquery[EXTRACT256(array,2)] = INQUERY_FALSE; - } - if (EXTRACT256(_counts_neg,3)) { - inquery[EXTRACT256(array,3)] = INQUERY_FALSE; - } - if (EXTRACT256(_counts_neg,4)) { - inquery[EXTRACT256(array,4)] = INQUERY_FALSE; - } - if (EXTRACT256(_counts_neg,5)) { - inquery[EXTRACT256(array,5)] = INQUERY_FALSE; - } - if (EXTRACT256(_counts_neg,6)) { - inquery[EXTRACT256(array,6)] = INQUERY_FALSE; - } - if (EXTRACT256(_counts_neg,7)) { - inquery[EXTRACT256(array,7)] = INQUERY_FALSE; - } - } -#endif /* CHECK_FOR_OVERFLOW */ - debug(printf("60 %04X => %d\n",EXTRACT256(array,0),counts[EXTRACT256(array,0)])); - debug(printf("44 %04X => %d\n",EXTRACT256(array,1),counts[EXTRACT256(array,1)])); - debug(printf("28 %04X => %d\n",EXTRACT256(array,2),counts[EXTRACT256(array,2)])); - debug(printf("12 %04X => %d\n",EXTRACT256(array,3),counts[EXTRACT256(array,3)])); - debug(printf("60 %04X => %d\n",EXTRACT256(array,4),counts[EXTRACT256(array,4)])); - debug(printf("44 %04X => %d\n",EXTRACT256(array,5),counts[EXTRACT256(array,5)])); - debug(printf("28 %04X => %d\n",EXTRACT256(array,6),counts[EXTRACT256(array,6)])); - debug(printf("12 %04X => %d\n",EXTRACT256(array,7),counts[EXTRACT256(array,7)])); - - array = _mm256_and_si256( oligo, bigmask6); - counts[EXTRACT256(array,0)] += 1; /* 123 */ - counts[EXTRACT256(array,1)] += 1; /* 107 */ - counts[EXTRACT256(array,2)] += 1; /* 91 */ - counts[EXTRACT256(array,3)] += 1; /* 75 */ - counts[EXTRACT256(array,4)] += 1; /* 59 */ - counts[EXTRACT256(array,5)] += 1; /* 43 */ - counts[EXTRACT256(array,6)] += 1; /* 27 */ - counts[EXTRACT256(array,7)] += 1; /* 11 */ -#ifdef CHECK_FOR_OVERFLOW - _counts_after = _mm256_i32gather_epi32(counts,array,/*scale*/4); - _counts_neg = _mm256_cmpgt_epi32(_counts_after,maxcount256); - if (_mm256_testz_si256(_counts_neg,_counts_neg) == 0) { - debug(printf("Exceeded maximum counts\n")); - if (EXTRACT256(_counts_neg,0)) { - inquery[EXTRACT256(array,0)] = INQUERY_FALSE; - } - if (EXTRACT256(_counts_neg,1)) { - inquery[EXTRACT256(array,1)] = INQUERY_FALSE; - } - if (EXTRACT256(_counts_neg,2)) { - inquery[EXTRACT256(array,2)] = INQUERY_FALSE; - } - if (EXTRACT256(_counts_neg,3)) { - inquery[EXTRACT256(array,3)] = INQUERY_FALSE; - } - if (EXTRACT256(_counts_neg,4)) { - inquery[EXTRACT256(array,4)] = INQUERY_FALSE; - } - if (EXTRACT256(_counts_neg,5)) { - inquery[EXTRACT256(array,5)] = INQUERY_FALSE; - } - if (EXTRACT256(_counts_neg,6)) { - inquery[EXTRACT256(array,6)] = INQUERY_FALSE; - } - if (EXTRACT256(_counts_neg,7)) { - inquery[EXTRACT256(array,7)] = INQUERY_FALSE; - } - } -#endif /* CHECK_FOR_OVERFLOW */ - debug(printf("59 %04X => %d\n",EXTRACT256(array,0),counts[EXTRACT256(array,0)])); - debug(printf("43 %04X => %d\n",EXTRACT256(array,1),counts[EXTRACT256(array,1)])); - debug(printf("27 %04X => %d\n",EXTRACT256(array,2),counts[EXTRACT256(array,2)])); - debug(printf("11 %04X => %d\n",EXTRACT256(array,3),counts[EXTRACT256(array,3)])); - debug(printf("59 %04X => %d\n",EXTRACT256(array,4),counts[EXTRACT256(array,4)])); - debug(printf("43 %04X => %d\n",EXTRACT256(array,5),counts[EXTRACT256(array,5)])); - debug(printf("27 %04X => %d\n",EXTRACT256(array,6),counts[EXTRACT256(array,6)])); - debug(printf("11 %04X => %d\n",EXTRACT256(array,7),counts[EXTRACT256(array,7)])); - - array = _mm256_srli_epi32(current,20); - counts[EXTRACT256(array,0)] += 1; /* 122 */ - counts[EXTRACT256(array,1)] += 1; /* 106 */ - counts[EXTRACT256(array,2)] += 1; /* 90 */ - counts[EXTRACT256(array,3)] += 1; /* 74 */ - counts[EXTRACT256(array,4)] += 1; /* 58 */ - counts[EXTRACT256(array,5)] += 1; /* 42 */ - counts[EXTRACT256(array,6)] += 1; /* 26 */ - counts[EXTRACT256(array,7)] += 1; /* 10 */ -#ifdef CHECK_FOR_OVERFLOW - _counts_after = _mm256_i32gather_epi32(counts,array,/*scale*/4); - _counts_neg = _mm256_cmpgt_epi32(_counts_after,maxcount256); - if (_mm256_testz_si256(_counts_neg,_counts_neg) == 0) { - debug(printf("Exceeded maximum counts\n")); - if (EXTRACT256(_counts_neg,0)) { - inquery[EXTRACT256(array,0)] = INQUERY_FALSE; - } - if (EXTRACT256(_counts_neg,1)) { - inquery[EXTRACT256(array,1)] = INQUERY_FALSE; - } - if (EXTRACT256(_counts_neg,2)) { - inquery[EXTRACT256(array,2)] = INQUERY_FALSE; - } - if (EXTRACT256(_counts_neg,3)) { - inquery[EXTRACT256(array,3)] = INQUERY_FALSE; - } - if (EXTRACT256(_counts_neg,4)) { - inquery[EXTRACT256(array,4)] = INQUERY_FALSE; - } - if (EXTRACT256(_counts_neg,5)) { - inquery[EXTRACT256(array,5)] = INQUERY_FALSE; - } - if (EXTRACT256(_counts_neg,6)) { - inquery[EXTRACT256(array,6)] = INQUERY_FALSE; - } - if (EXTRACT256(_counts_neg,7)) { - inquery[EXTRACT256(array,7)] = INQUERY_FALSE; - } - } -#endif /* CHECK_FOR_OVERFLOW */ - debug(printf("58 %04X => %d\n",EXTRACT256(array,0),counts[EXTRACT256(array,0)])); - debug(printf("42 %04X => %d\n",EXTRACT256(array,1),counts[EXTRACT256(array,1)])); - debug(printf("26 %04X => %d\n",EXTRACT256(array,2),counts[EXTRACT256(array,2)])); - debug(printf("10 %04X => %d\n",EXTRACT256(array,3),counts[EXTRACT256(array,3)])); - debug(printf("58 %04X => %d\n",EXTRACT256(array,4),counts[EXTRACT256(array,4)])); - debug(printf("42 %04X => %d\n",EXTRACT256(array,5),counts[EXTRACT256(array,5)])); - debug(printf("26 %04X => %d\n",EXTRACT256(array,6),counts[EXTRACT256(array,6)])); - debug(printf("10 %04X => %d\n",EXTRACT256(array,7),counts[EXTRACT256(array,7)])); - - array = _mm256_and_si256( _mm256_srli_epi32(current,18), bigmask6); - counts[EXTRACT256(array,0)] += 1; /* 121 */ - counts[EXTRACT256(array,1)] += 1; /* 105 */ - counts[EXTRACT256(array,2)] += 1; /* 89 */ - counts[EXTRACT256(array,3)] += 1; /* 73 */ - counts[EXTRACT256(array,4)] += 1; /* 57 */ - counts[EXTRACT256(array,5)] += 1; /* 41 */ - counts[EXTRACT256(array,6)] += 1; /* 25 */ - counts[EXTRACT256(array,7)] += 1; /* 9 */ -#ifdef CHECK_FOR_OVERFLOW - _counts_after = _mm256_i32gather_epi32(counts,array,/*scale*/4); - _counts_neg = _mm256_cmpgt_epi32(_counts_after,maxcount256); - if (_mm256_testz_si256(_counts_neg,_counts_neg) == 0) { - debug(printf("Exceeded maximum counts\n")); - if (EXTRACT256(_counts_neg,0)) { - inquery[EXTRACT256(array,0)] = INQUERY_FALSE; - } - if (EXTRACT256(_counts_neg,1)) { - inquery[EXTRACT256(array,1)] = INQUERY_FALSE; - } - if (EXTRACT256(_counts_neg,2)) { - inquery[EXTRACT256(array,2)] = INQUERY_FALSE; - } - if (EXTRACT256(_counts_neg,3)) { - inquery[EXTRACT256(array,3)] = INQUERY_FALSE; - } - if (EXTRACT256(_counts_neg,4)) { - inquery[EXTRACT256(array,4)] = INQUERY_FALSE; - } - if (EXTRACT256(_counts_neg,5)) { - inquery[EXTRACT256(array,5)] = INQUERY_FALSE; - } - if (EXTRACT256(_counts_neg,6)) { - inquery[EXTRACT256(array,6)] = INQUERY_FALSE; - } - if (EXTRACT256(_counts_neg,7)) { - inquery[EXTRACT256(array,7)] = INQUERY_FALSE; - } - } -#endif /* CHECK_FOR_OVERFLOW */ - debug(printf("57 %04X => %d\n",EXTRACT256(array,0),counts[EXTRACT256(array,0)])); - debug(printf("41 %04X => %d\n",EXTRACT256(array,1),counts[EXTRACT256(array,1)])); - debug(printf("25 %04X => %d\n",EXTRACT256(array,2),counts[EXTRACT256(array,2)])); - debug(printf("9 %04X => %d\n",EXTRACT256(array,3),counts[EXTRACT256(array,3)])); - debug(printf("57 %04X => %d\n",EXTRACT256(array,4),counts[EXTRACT256(array,4)])); - debug(printf("41 %04X => %d\n",EXTRACT256(array,5),counts[EXTRACT256(array,5)])); - debug(printf("25 %04X => %d\n",EXTRACT256(array,6),counts[EXTRACT256(array,6)])); - debug(printf("9 %04X => %d\n",EXTRACT256(array,7),counts[EXTRACT256(array,7)])); - - array = _mm256_and_si256( _mm256_srli_epi32(current,16), bigmask6); - counts[EXTRACT256(array,0)] += 1; /* 120 */ - counts[EXTRACT256(array,1)] += 1; /* 104 */ - counts[EXTRACT256(array,2)] += 1; /* 88 */ - counts[EXTRACT256(array,3)] += 1; /* 72 */ - counts[EXTRACT256(array,4)] += 1; /* 56 */ - counts[EXTRACT256(array,5)] += 1; /* 40 */ - counts[EXTRACT256(array,6)] += 1; /* 24 */ - counts[EXTRACT256(array,7)] += 1; /* 8 */ -#ifdef CHECK_FOR_OVERFLOW - _counts_after = _mm256_i32gather_epi32(counts,array,/*scale*/4); - _counts_neg = _mm256_cmpgt_epi32(_counts_after,maxcount256); - if (_mm256_testz_si256(_counts_neg,_counts_neg) == 0) { - debug(printf("Exceeded maximum counts\n")); - if (EXTRACT256(_counts_neg,0)) { - inquery[EXTRACT256(array,0)] = INQUERY_FALSE; - } - if (EXTRACT256(_counts_neg,1)) { - inquery[EXTRACT256(array,1)] = INQUERY_FALSE; - } - if (EXTRACT256(_counts_neg,2)) { - inquery[EXTRACT256(array,2)] = INQUERY_FALSE; - } - if (EXTRACT256(_counts_neg,3)) { - inquery[EXTRACT256(array,3)] = INQUERY_FALSE; - } - if (EXTRACT256(_counts_neg,4)) { - inquery[EXTRACT256(array,4)] = INQUERY_FALSE; - } - if (EXTRACT256(_counts_neg,5)) { - inquery[EXTRACT256(array,5)] = INQUERY_FALSE; - } - if (EXTRACT256(_counts_neg,6)) { - inquery[EXTRACT256(array,6)] = INQUERY_FALSE; - } - if (EXTRACT256(_counts_neg,7)) { - inquery[EXTRACT256(array,7)] = INQUERY_FALSE; - } - } -#endif /* CHECK_FOR_OVERFLOW */ - debug(printf("56 %04X => %d\n",EXTRACT256(array,0),counts[EXTRACT256(array,0)])); - debug(printf("40 %04X => %d\n",EXTRACT256(array,1),counts[EXTRACT256(array,1)])); - debug(printf("24 %04X => %d\n",EXTRACT256(array,2),counts[EXTRACT256(array,2)])); - debug(printf("8 %04X => %d\n",EXTRACT256(array,3),counts[EXTRACT256(array,3)])); - debug(printf("56 %04X => %d\n",EXTRACT256(array,4),counts[EXTRACT256(array,4)])); - debug(printf("40 %04X => %d\n",EXTRACT256(array,5),counts[EXTRACT256(array,5)])); - debug(printf("24 %04X => %d\n",EXTRACT256(array,6),counts[EXTRACT256(array,6)])); - debug(printf("8 %04X => %d\n",EXTRACT256(array,7),counts[EXTRACT256(array,7)])); - - array = _mm256_and_si256( _mm256_srli_epi32(current,14), bigmask6); - counts[EXTRACT256(array,0)] += 1; /* 119 */ - counts[EXTRACT256(array,1)] += 1; /* 103 */ - counts[EXTRACT256(array,2)] += 1; /* 87 */ - counts[EXTRACT256(array,3)] += 1; /* 71 */ - counts[EXTRACT256(array,4)] += 1; /* 55 */ - counts[EXTRACT256(array,5)] += 1; /* 39 */ - counts[EXTRACT256(array,6)] += 1; /* 23 */ - counts[EXTRACT256(array,7)] += 1; /* 7 */ -#ifdef CHECK_FOR_OVERFLOW - _counts_after = _mm256_i32gather_epi32(counts,array,/*scale*/4); - _counts_neg = _mm256_cmpgt_epi32(_counts_after,maxcount256); - if (_mm256_testz_si256(_counts_neg,_counts_neg) == 0) { - debug(printf("Exceeded maximum counts\n")); - if (EXTRACT256(_counts_neg,0)) { - inquery[EXTRACT256(array,0)] = INQUERY_FALSE; - } - if (EXTRACT256(_counts_neg,1)) { - inquery[EXTRACT256(array,1)] = INQUERY_FALSE; - } - if (EXTRACT256(_counts_neg,2)) { - inquery[EXTRACT256(array,2)] = INQUERY_FALSE; - } - if (EXTRACT256(_counts_neg,3)) { - inquery[EXTRACT256(array,3)] = INQUERY_FALSE; - } - if (EXTRACT256(_counts_neg,4)) { - inquery[EXTRACT256(array,4)] = INQUERY_FALSE; - } - if (EXTRACT256(_counts_neg,5)) { - inquery[EXTRACT256(array,5)] = INQUERY_FALSE; - } - if (EXTRACT256(_counts_neg,6)) { - inquery[EXTRACT256(array,6)] = INQUERY_FALSE; - } - if (EXTRACT256(_counts_neg,7)) { - inquery[EXTRACT256(array,7)] = INQUERY_FALSE; - } - } -#endif /* CHECK_FOR_OVERFLOW */ - debug(printf("55 %04X => %d\n",EXTRACT256(array,0),counts[EXTRACT256(array,0)])); - debug(printf("39 %04X => %d\n",EXTRACT256(array,1),counts[EXTRACT256(array,1)])); - debug(printf("23 %04X => %d\n",EXTRACT256(array,2),counts[EXTRACT256(array,2)])); - debug(printf("7 %04X => %d\n",EXTRACT256(array,3),counts[EXTRACT256(array,3)])); - debug(printf("55 %04X => %d\n",EXTRACT256(array,4),counts[EXTRACT256(array,4)])); - debug(printf("39 %04X => %d\n",EXTRACT256(array,5),counts[EXTRACT256(array,5)])); - debug(printf("23 %04X => %d\n",EXTRACT256(array,6),counts[EXTRACT256(array,6)])); - debug(printf("7 %04X => %d\n",EXTRACT256(array,7),counts[EXTRACT256(array,7)])); - - array = _mm256_and_si256( _mm256_srli_epi32(current,12), bigmask6); - counts[EXTRACT256(array,0)] += 1; /* 118 */ - counts[EXTRACT256(array,1)] += 1; /* 102 */ - counts[EXTRACT256(array,2)] += 1; /* 86 */ - counts[EXTRACT256(array,3)] += 1; /* 70 */ - counts[EXTRACT256(array,4)] += 1; /* 54 */ - counts[EXTRACT256(array,5)] += 1; /* 38 */ - counts[EXTRACT256(array,6)] += 1; /* 22 */ - counts[EXTRACT256(array,7)] += 1; /* 6 */ -#ifdef CHECK_FOR_OVERFLOW - _counts_after = _mm256_i32gather_epi32(counts,array,/*scale*/4); - _counts_neg = _mm256_cmpgt_epi32(_counts_after,maxcount256); - if (_mm256_testz_si256(_counts_neg,_counts_neg) == 0) { - debug(printf("Exceeded maximum counts\n")); - if (EXTRACT256(_counts_neg,0)) { - inquery[EXTRACT256(array,0)] = INQUERY_FALSE; - } - if (EXTRACT256(_counts_neg,1)) { - inquery[EXTRACT256(array,1)] = INQUERY_FALSE; - } - if (EXTRACT256(_counts_neg,2)) { - inquery[EXTRACT256(array,2)] = INQUERY_FALSE; - } - if (EXTRACT256(_counts_neg,3)) { - inquery[EXTRACT256(array,3)] = INQUERY_FALSE; - } - if (EXTRACT256(_counts_neg,4)) { - inquery[EXTRACT256(array,4)] = INQUERY_FALSE; - } - if (EXTRACT256(_counts_neg,5)) { - inquery[EXTRACT256(array,5)] = INQUERY_FALSE; - } - if (EXTRACT256(_counts_neg,6)) { - inquery[EXTRACT256(array,6)] = INQUERY_FALSE; - } - if (EXTRACT256(_counts_neg,7)) { - inquery[EXTRACT256(array,7)] = INQUERY_FALSE; - } - } -#endif /* CHECK_FOR_OVERFLOW */ - debug(printf("54 %04X => %d\n",EXTRACT256(array,0),counts[EXTRACT256(array,0)])); - debug(printf("38 %04X => %d\n",EXTRACT256(array,1),counts[EXTRACT256(array,1)])); - debug(printf("22 %04X => %d\n",EXTRACT256(array,2),counts[EXTRACT256(array,2)])); - debug(printf("6 %04X => %d\n",EXTRACT256(array,3),counts[EXTRACT256(array,3)])); - debug(printf("54 %04X => %d\n",EXTRACT256(array,4),counts[EXTRACT256(array,4)])); - debug(printf("38 %04X => %d\n",EXTRACT256(array,5),counts[EXTRACT256(array,5)])); - debug(printf("22 %04X => %d\n",EXTRACT256(array,6),counts[EXTRACT256(array,6)])); - debug(printf("6 %04X => %d\n",EXTRACT256(array,7),counts[EXTRACT256(array,7)])); - - array = _mm256_and_si256( _mm256_srli_epi32(current,10), bigmask6); - counts[EXTRACT256(array,0)] += 1; /* 117 */ - counts[EXTRACT256(array,1)] += 1; /* 101 */ - counts[EXTRACT256(array,2)] += 1; /* 85 */ - counts[EXTRACT256(array,3)] += 1; /* 69 */ - counts[EXTRACT256(array,4)] += 1; /* 53 */ - counts[EXTRACT256(array,5)] += 1; /* 37 */ - counts[EXTRACT256(array,6)] += 1; /* 21 */ - counts[EXTRACT256(array,7)] += 1; /* 5 */ -#ifdef CHECK_FOR_OVERFLOW - _counts_after = _mm256_i32gather_epi32(counts,array,/*scale*/4); - _counts_neg = _mm256_cmpgt_epi32(_counts_after,maxcount256); - if (_mm256_testz_si256(_counts_neg,_counts_neg) == 0) { - debug(printf("Exceeded maximum counts\n")); - if (EXTRACT256(_counts_neg,0)) { - inquery[EXTRACT256(array,0)] = INQUERY_FALSE; - } - if (EXTRACT256(_counts_neg,1)) { - inquery[EXTRACT256(array,1)] = INQUERY_FALSE; - } - if (EXTRACT256(_counts_neg,2)) { - inquery[EXTRACT256(array,2)] = INQUERY_FALSE; - } - if (EXTRACT256(_counts_neg,3)) { - inquery[EXTRACT256(array,3)] = INQUERY_FALSE; - } - if (EXTRACT256(_counts_neg,4)) { - inquery[EXTRACT256(array,4)] = INQUERY_FALSE; - } - if (EXTRACT256(_counts_neg,5)) { - inquery[EXTRACT256(array,5)] = INQUERY_FALSE; - } - if (EXTRACT256(_counts_neg,6)) { - inquery[EXTRACT256(array,6)] = INQUERY_FALSE; - } - if (EXTRACT256(_counts_neg,7)) { - inquery[EXTRACT256(array,7)] = INQUERY_FALSE; - } - } -#endif /* CHECK_FOR_OVERFLOW */ - debug(printf("53 %04X => %d\n",EXTRACT256(array,0),counts[EXTRACT256(array,0)])); - debug(printf("37 %04X => %d\n",EXTRACT256(array,1),counts[EXTRACT256(array,1)])); - debug(printf("21 %04X => %d\n",EXTRACT256(array,2),counts[EXTRACT256(array,2)])); - debug(printf("5 %04X => %d\n",EXTRACT256(array,3),counts[EXTRACT256(array,3)])); - debug(printf("53 %04X => %d\n",EXTRACT256(array,4),counts[EXTRACT256(array,4)])); - debug(printf("37 %04X => %d\n",EXTRACT256(array,5),counts[EXTRACT256(array,5)])); - debug(printf("21 %04X => %d\n",EXTRACT256(array,6),counts[EXTRACT256(array,6)])); - debug(printf("5 %04X => %d\n",EXTRACT256(array,7),counts[EXTRACT256(array,7)])); - - array = _mm256_and_si256( _mm256_srli_epi32(current,8), bigmask6); - counts[EXTRACT256(array,0)] += 1; /* 116 */ - counts[EXTRACT256(array,1)] += 1; /* 100 */ - counts[EXTRACT256(array,2)] += 1; /* 84 */ - counts[EXTRACT256(array,3)] += 1; /* 68 */ - counts[EXTRACT256(array,4)] += 1; /* 52 */ - counts[EXTRACT256(array,5)] += 1; /* 36 */ - counts[EXTRACT256(array,6)] += 1; /* 20 */ - counts[EXTRACT256(array,7)] += 1; /* 4 */ -#ifdef CHECK_FOR_OVERFLOW - _counts_after = _mm256_i32gather_epi32(counts,array,/*scale*/4); - _counts_neg = _mm256_cmpgt_epi32(_counts_after,maxcount256); - if (_mm256_testz_si256(_counts_neg,_counts_neg) == 0) { - debug(printf("Exceeded maximum counts\n")); - if (EXTRACT256(_counts_neg,0)) { - inquery[EXTRACT256(array,0)] = INQUERY_FALSE; - } - if (EXTRACT256(_counts_neg,1)) { - inquery[EXTRACT256(array,1)] = INQUERY_FALSE; - } - if (EXTRACT256(_counts_neg,2)) { - inquery[EXTRACT256(array,2)] = INQUERY_FALSE; - } - if (EXTRACT256(_counts_neg,3)) { - inquery[EXTRACT256(array,3)] = INQUERY_FALSE; - } - if (EXTRACT256(_counts_neg,4)) { - inquery[EXTRACT256(array,4)] = INQUERY_FALSE; - } - if (EXTRACT256(_counts_neg,5)) { - inquery[EXTRACT256(array,5)] = INQUERY_FALSE; - } - if (EXTRACT256(_counts_neg,6)) { - inquery[EXTRACT256(array,6)] = INQUERY_FALSE; - } - if (EXTRACT256(_counts_neg,7)) { - inquery[EXTRACT256(array,7)] = INQUERY_FALSE; - } - } -#endif /* CHECK_FOR_OVERFLOW */ - debug(printf("52 %04X => %d\n",EXTRACT256(array,0),counts[EXTRACT256(array,0)])); - debug(printf("36 %04X => %d\n",EXTRACT256(array,1),counts[EXTRACT256(array,1)])); - debug(printf("20 %04X => %d\n",EXTRACT256(array,2),counts[EXTRACT256(array,2)])); - debug(printf("4 %04X => %d\n",EXTRACT256(array,3),counts[EXTRACT256(array,3)])); - debug(printf("52 %04X => %d\n",EXTRACT256(array,4),counts[EXTRACT256(array,4)])); - debug(printf("36 %04X => %d\n",EXTRACT256(array,5),counts[EXTRACT256(array,5)])); - debug(printf("20 %04X => %d\n",EXTRACT256(array,6),counts[EXTRACT256(array,6)])); - debug(printf("4 %04X => %d\n",EXTRACT256(array,7),counts[EXTRACT256(array,7)])); - - array = _mm256_and_si256( _mm256_srli_epi32(current,6), bigmask6); - counts[EXTRACT256(array,0)] += 1; /* 115 */ - counts[EXTRACT256(array,1)] += 1; /* 99 */ - counts[EXTRACT256(array,2)] += 1; /* 83 */ - counts[EXTRACT256(array,3)] += 1; /* 67 */ - counts[EXTRACT256(array,4)] += 1; /* 51 */ - counts[EXTRACT256(array,5)] += 1; /* 35 */ - counts[EXTRACT256(array,6)] += 1; /* 19 */ - counts[EXTRACT256(array,7)] += 1; /* 3 */ -#ifdef CHECK_FOR_OVERFLOW - _counts_after = _mm256_i32gather_epi32(counts,array,/*scale*/4); - _counts_neg = _mm256_cmpgt_epi32(_counts_after,maxcount256); - if (_mm256_testz_si256(_counts_neg,_counts_neg) == 0) { - debug(printf("Exceeded maximum counts\n")); - if (EXTRACT256(_counts_neg,0)) { - inquery[EXTRACT256(array,0)] = INQUERY_FALSE; - } - if (EXTRACT256(_counts_neg,1)) { - inquery[EXTRACT256(array,1)] = INQUERY_FALSE; - } - if (EXTRACT256(_counts_neg,2)) { - inquery[EXTRACT256(array,2)] = INQUERY_FALSE; - } - if (EXTRACT256(_counts_neg,3)) { - inquery[EXTRACT256(array,3)] = INQUERY_FALSE; - } - if (EXTRACT256(_counts_neg,4)) { - inquery[EXTRACT256(array,4)] = INQUERY_FALSE; - } - if (EXTRACT256(_counts_neg,5)) { - inquery[EXTRACT256(array,5)] = INQUERY_FALSE; - } - if (EXTRACT256(_counts_neg,6)) { - inquery[EXTRACT256(array,6)] = INQUERY_FALSE; - } - if (EXTRACT256(_counts_neg,7)) { - inquery[EXTRACT256(array,7)] = INQUERY_FALSE; - } - } -#endif /* CHECK_FOR_OVERFLOW */ - debug(printf("51 %04X => %d\n",EXTRACT256(array,0),counts[EXTRACT256(array,0)])); - debug(printf("35 %04X => %d\n",EXTRACT256(array,1),counts[EXTRACT256(array,1)])); - debug(printf("19 %04X => %d\n",EXTRACT256(array,2),counts[EXTRACT256(array,2)])); - debug(printf("3 %04X => %d\n",EXTRACT256(array,3),counts[EXTRACT256(array,3)])); - debug(printf("51 %04X => %d\n",EXTRACT256(array,4),counts[EXTRACT256(array,4)])); - debug(printf("35 %04X => %d\n",EXTRACT256(array,5),counts[EXTRACT256(array,5)])); - debug(printf("19 %04X => %d\n",EXTRACT256(array,6),counts[EXTRACT256(array,6)])); - debug(printf("3 %04X => %d\n",EXTRACT256(array,7),counts[EXTRACT256(array,7)])); - - array = _mm256_and_si256( _mm256_srli_epi32(current,4), bigmask6); - counts[EXTRACT256(array,0)] += 1; /* 114 */ - counts[EXTRACT256(array,1)] += 1; /* 98 */ - counts[EXTRACT256(array,2)] += 1; /* 82 */ - counts[EXTRACT256(array,3)] += 1; /* 66 */ - counts[EXTRACT256(array,4)] += 1; /* 50 */ - counts[EXTRACT256(array,5)] += 1; /* 34 */ - counts[EXTRACT256(array,6)] += 1; /* 18 */ - counts[EXTRACT256(array,7)] += 1; /* 2 */ -#ifdef CHECK_FOR_OVERFLOW - _counts_after = _mm256_i32gather_epi32(counts,array,/*scale*/4); - _counts_neg = _mm256_cmpgt_epi32(_counts_after,maxcount256); - if (_mm256_testz_si256(_counts_neg,_counts_neg) == 0) { - debug(printf("Exceeded maximum counts\n")); - if (EXTRACT256(_counts_neg,0)) { - inquery[EXTRACT256(array,0)] = INQUERY_FALSE; - } - if (EXTRACT256(_counts_neg,1)) { - inquery[EXTRACT256(array,1)] = INQUERY_FALSE; - } - if (EXTRACT256(_counts_neg,2)) { - inquery[EXTRACT256(array,2)] = INQUERY_FALSE; - } - if (EXTRACT256(_counts_neg,3)) { - inquery[EXTRACT256(array,3)] = INQUERY_FALSE; - } - if (EXTRACT256(_counts_neg,4)) { - inquery[EXTRACT256(array,4)] = INQUERY_FALSE; - } - if (EXTRACT256(_counts_neg,5)) { - inquery[EXTRACT256(array,5)] = INQUERY_FALSE; - } - if (EXTRACT256(_counts_neg,6)) { - inquery[EXTRACT256(array,6)] = INQUERY_FALSE; - } - if (EXTRACT256(_counts_neg,7)) { - inquery[EXTRACT256(array,7)] = INQUERY_FALSE; - } - } -#endif /* CHECK_FOR_OVERFLOW */ - debug(printf("50 %04X => %d\n",EXTRACT256(array,0),counts[EXTRACT256(array,0)])); - debug(printf("34 %04X => %d\n",EXTRACT256(array,1),counts[EXTRACT256(array,1)])); - debug(printf("18 %04X => %d\n",EXTRACT256(array,2),counts[EXTRACT256(array,2)])); - debug(printf("2 %04X => %d\n",EXTRACT256(array,3),counts[EXTRACT256(array,3)])); - debug(printf("50 %04X => %d\n",EXTRACT256(array,4),counts[EXTRACT256(array,4)])); - debug(printf("34 %04X => %d\n",EXTRACT256(array,5),counts[EXTRACT256(array,5)])); - debug(printf("18 %04X => %d\n",EXTRACT256(array,6),counts[EXTRACT256(array,6)])); - debug(printf("2 %04X => %d\n",EXTRACT256(array,7),counts[EXTRACT256(array,7)])); - - array = _mm256_and_si256( _mm256_srli_epi32(current,2), bigmask6); - counts[EXTRACT256(array,0)] += 1; /* 113 */ - counts[EXTRACT256(array,1)] += 1; /* 97 */ - counts[EXTRACT256(array,2)] += 1; /* 81 */ - counts[EXTRACT256(array,3)] += 1; /* 65 */ - counts[EXTRACT256(array,4)] += 1; /* 49 */ - counts[EXTRACT256(array,5)] += 1; /* 33 */ - counts[EXTRACT256(array,6)] += 1; /* 17 */ - counts[EXTRACT256(array,7)] += 1; /* 1 */ -#ifdef CHECK_FOR_OVERFLOW - _counts_after = _mm256_i32gather_epi32(counts,array,/*scale*/4); - _counts_neg = _mm256_cmpgt_epi32(_counts_after,maxcount256); - if (_mm256_testz_si256(_counts_neg,_counts_neg) == 0) { - debug(printf("Exceeded maximum counts\n")); - if (EXTRACT256(_counts_neg,0)) { - inquery[EXTRACT256(array,0)] = INQUERY_FALSE; - } - if (EXTRACT256(_counts_neg,1)) { - inquery[EXTRACT256(array,1)] = INQUERY_FALSE; - } - if (EXTRACT256(_counts_neg,2)) { - inquery[EXTRACT256(array,2)] = INQUERY_FALSE; - } - if (EXTRACT256(_counts_neg,3)) { - inquery[EXTRACT256(array,3)] = INQUERY_FALSE; - } - if (EXTRACT256(_counts_neg,4)) { - inquery[EXTRACT256(array,4)] = INQUERY_FALSE; - } - if (EXTRACT256(_counts_neg,5)) { - inquery[EXTRACT256(array,5)] = INQUERY_FALSE; - } - if (EXTRACT256(_counts_neg,6)) { - inquery[EXTRACT256(array,6)] = INQUERY_FALSE; - } - if (EXTRACT256(_counts_neg,7)) { - inquery[EXTRACT256(array,7)] = INQUERY_FALSE; - } - } -#endif /* CHECK_FOR_OVERFLOW */ - debug(printf("49 %04X => %d\n",EXTRACT256(array,0),counts[EXTRACT256(array,0)])); - debug(printf("33 %04X => %d\n",EXTRACT256(array,1),counts[EXTRACT256(array,1)])); - debug(printf("17 %04X => %d\n",EXTRACT256(array,2),counts[EXTRACT256(array,2)])); - debug(printf("1 %04X => %d\n",EXTRACT256(array,3),counts[EXTRACT256(array,3)])); - debug(printf("49 %04X => %d\n",EXTRACT256(array,4),counts[EXTRACT256(array,4)])); - debug(printf("33 %04X => %d\n",EXTRACT256(array,5),counts[EXTRACT256(array,5)])); - debug(printf("17 %04X => %d\n",EXTRACT256(array,6),counts[EXTRACT256(array,6)])); - debug(printf("1 %04X => %d\n",EXTRACT256(array,7),counts[EXTRACT256(array,7)])); - - array = _mm256_and_si256( current, bigmask6); - counts[EXTRACT256(array,0)] += 1; /* 112 */ - counts[EXTRACT256(array,1)] += 1; /* 96 */ - counts[EXTRACT256(array,2)] += 1; /* 80 */ - counts[EXTRACT256(array,3)] += 1; /* 64 */ - counts[EXTRACT256(array,4)] += 1; /* 48 */ - counts[EXTRACT256(array,5)] += 1; /* 32 */ - counts[EXTRACT256(array,6)] += 1; /* 16 */ - counts[EXTRACT256(array,7)] += 1; /* 0 */ -#ifdef CHECK_FOR_OVERFLOW - _counts_after = _mm256_i32gather_epi32(counts,array,/*scale*/4); - _counts_neg = _mm256_cmpgt_epi32(_counts_after,maxcount256); - if (_mm256_testz_si256(_counts_neg,_counts_neg) == 0) { - debug(printf("Exceeded maximum counts\n")); - if (EXTRACT256(_counts_neg,0)) { - inquery[EXTRACT256(array,0)] = INQUERY_FALSE; - } - if (EXTRACT256(_counts_neg,1)) { - inquery[EXTRACT256(array,1)] = INQUERY_FALSE; - } - if (EXTRACT256(_counts_neg,2)) { - inquery[EXTRACT256(array,2)] = INQUERY_FALSE; - } - if (EXTRACT256(_counts_neg,3)) { - inquery[EXTRACT256(array,3)] = INQUERY_FALSE; - } - if (EXTRACT256(_counts_neg,4)) { - inquery[EXTRACT256(array,4)] = INQUERY_FALSE; - } - if (EXTRACT256(_counts_neg,5)) { - inquery[EXTRACT256(array,5)] = INQUERY_FALSE; - } - if (EXTRACT256(_counts_neg,6)) { - inquery[EXTRACT256(array,6)] = INQUERY_FALSE; - } - if (EXTRACT256(_counts_neg,7)) { - inquery[EXTRACT256(array,7)] = INQUERY_FALSE; - } - } -#endif /* CHECK_FOR_OVERFLOW */ - debug(printf("48 %04X => %d\n",EXTRACT256(array,0),counts[EXTRACT256(array,0)])); - debug(printf("32 %04X => %d\n",EXTRACT256(array,1),counts[EXTRACT256(array,1)])); - debug(printf("16 %04X => %d\n",EXTRACT256(array,2),counts[EXTRACT256(array,2)])); - debug(printf("0 %04X => %d\n",EXTRACT256(array,3),counts[EXTRACT256(array,3)])); - debug(printf("48 %04X => %d\n",EXTRACT256(array,4),counts[EXTRACT256(array,4)])); - debug(printf("32 %04X => %d\n",EXTRACT256(array,5),counts[EXTRACT256(array,5)])); - debug(printf("16 %04X => %d\n",EXTRACT256(array,6),counts[EXTRACT256(array,6)])); - debug(printf("0 %04X => %d\n",EXTRACT256(array,7),counts[EXTRACT256(array,7)])); - - return; -} -#endif - #if !defined(HAVE_AVX2) static int -store_6mers_rev (Chrpos_T chrpos, Chrpos_T *table, UINT4 *pointers, UINT4 *positions, Count_T *counts, - Genomecomp_T low_rc, Genomecomp_T high_rc, Genomecomp_T nextlow_rc) { +store_6mers_rev_32 (Chrpos_T chrpos, Chrpos_T *table, UINT4 *positions, Count_T *counts, + Genomecomp_T low_rc, Genomecomp_T high_rc, Genomecomp_T nextlow_rc) { Genomecomp_T masked, oligo; #ifdef INDIVIDUAL_SHIFTS #elif defined(SIMD_MASK_THEN_STORE) @@ -42559,79 +28199,68 @@ #ifdef INDIVIDUAL_SHIFTS masked = low_rc & MASK6; /* 0 */ if (counts[masked]) { - assert(pointers[masked] > positions[masked]); debug(printf("Storing masked %u at %u\n",masked,chrpos)); - table[--pointers[masked]] = chrpos; + table[positions[masked] + (--counts[masked])] = chrpos; } masked = (low_rc >> 2) & MASK6; /* 1 */ if (counts[masked]) { - assert(pointers[masked] > positions[masked]); debug(printf("Storing masked %u at %u\n",masked,chrpos - 1)); - table[--pointers[masked]] = chrpos - 1; + table[positions[masked] + (--counts[masked])] = chrpos - 1; } masked = (low_rc >> 4) & MASK6; /* 2 */ if (counts[masked]) { - assert(pointers[masked] > positions[masked]); debug(printf("Storing masked %u at %u\n",masked,chrpos - 2)); - table[--pointers[masked]] = chrpos - 2; + table[positions[masked] + (--counts[masked])] = chrpos - 2; } masked = (low_rc >> 6) & MASK6; /* 3 */ if (counts[masked]) { - assert(pointers[masked] > positions[masked]); debug(printf("Storing masked %u at %u\n",masked,chrpos - 3)); - table[--pointers[masked]] = chrpos - 3; + table[positions[masked] + (--counts[masked])] = chrpos - 3; } masked = (low_rc >> 8) & MASK6; /* 4 */ if (counts[masked]) { - assert(pointers[masked] > positions[masked]); debug(printf("Storing masked %u at %u\n",masked,chrpos - 4)); - table[--pointers[masked]] = chrpos - 4; + table[positions[masked] + (--counts[masked])] = chrpos - 4; } masked = (low_rc >> 10) & MASK6; /* 5 */ if (counts[masked]) { - assert(pointers[masked] > positions[masked]); debug(printf("Storing masked %u at %u\n",masked,chrpos - 5)); - table[--pointers[masked]] = chrpos - 5; + table[positions[masked] + (--counts[masked])] = chrpos - 5; } masked = (low_rc >> 12) & MASK6; /* 6 */ if (counts[masked]) { - assert(pointers[masked] > positions[masked]); debug(printf("Storing masked %u at %u\n",masked,chrpos - 6)); - table[--pointers[masked]] = chrpos - 6; + table[positions[masked] + (--counts[masked])] = chrpos - 6; } masked = (low_rc >> 14) & MASK6; /* 7 */ if (counts[masked]) { - assert(pointers[masked] > positions[masked]); debug(printf("Storing masked %u at %u\n",masked,chrpos - 7)); - table[--pointers[masked]] = chrpos - 7; + table[positions[masked] + (--counts[masked])] = chrpos - 7; } masked = (low_rc >> 16) & MASK6; /* 8 */ if (counts[masked]) { - assert(pointers[masked] > positions[masked]); debug(printf("Storing masked %u at %u\n",masked,chrpos - 8)); - table[--pointers[masked]] = chrpos - 8; + table[positions[masked] + (--counts[masked])] = chrpos - 8; } masked = (low_rc >> 18) & MASK6; /* 9 */ if (counts[masked]) { - assert(pointers[masked] > positions[masked]); debug(printf("Storing masked %u at %u\n",masked,chrpos - 9)); - table[--pointers[masked]] = chrpos - 9; + table[positions[masked] + (--counts[masked])] = chrpos - 9; } masked = low_rc >> 20; /* 10, No mask necessary */ if (counts[masked]) { - assert(pointers[masked] > positions[masked]); debug(printf("Storing masked %u at %u\n",masked,chrpos - 10)); - table[--pointers[masked]] = chrpos - 10; + table[positions[masked] + (--counts[masked])] = chrpos - 10; } #else @@ -42644,30 +28273,26 @@ masked = EXTRACT(_masked,0); if (counts[masked]) { - assert(pointers[masked] > positions[masked]); debug(printf("Storing masked %u at %u\n",masked,chrpos)); - table[--pointers[masked]] = chrpos; + table[positions[masked] + (--counts[masked])] = chrpos; } masked = EXTRACT(_masked,1); if (counts[masked]) { - assert(pointers[masked] > positions[masked]); debug(printf("Storing masked %u at %u\n",masked,chrpos - 1)); - table[--pointers[masked]] = chrpos - 1; + table[positions[masked] + (--counts[masked])] = chrpos - 1; } masked = EXTRACT(_masked,2); if (counts[masked]) { - assert(pointers[masked] > positions[masked]); debug(printf("Storing masked %u at %u\n",masked,chrpos - 2)); - table[--pointers[masked]] = chrpos - 2; + table[positions[masked] + (--counts[masked])] = chrpos - 2; } masked = EXTRACT(_masked,3); if (counts[masked]) { - assert(pointers[masked] > positions[masked]); debug(printf("Storing masked %u at %u\n",masked,chrpos - 3)); - table[--pointers[masked]] = chrpos - 3; + table[positions[masked] + (--counts[masked])] = chrpos - 3; } @@ -42680,30 +28305,26 @@ masked = EXTRACT(_masked,0); if (counts[masked]) { - assert(pointers[masked] > positions[masked]); debug(printf("Storing masked %u at %u\n",masked,chrpos - 4)); - table[--pointers[masked]] = chrpos - 4; + table[positions[masked] + (--counts[masked])] = chrpos - 4; } masked = EXTRACT(_masked,1); if (counts[masked]) { - assert(pointers[masked] > positions[masked]); debug(printf("Storing masked %u at %u\n",masked,chrpos - 5)); - table[--pointers[masked]] = chrpos - 5; + table[positions[masked] + (--counts[masked])] = chrpos - 5; } masked = EXTRACT(_masked,2); if (counts[masked]) { - assert(pointers[masked] > positions[masked]); debug(printf("Storing masked %u at %u\n",masked,chrpos - 6)); - table[--pointers[masked]] = chrpos - 6; + table[positions[masked] + (--counts[masked])] = chrpos - 6; } masked = EXTRACT(_masked,3); if (counts[masked]) { - assert(pointers[masked] > positions[masked]); debug(printf("Storing masked %u at %u\n",masked,chrpos - 7)); - table[--pointers[masked]] = chrpos - 7; + table[positions[masked] + (--counts[masked])] = chrpos - 7; } @@ -42716,23 +28337,20 @@ masked = EXTRACT(_masked,0); if (counts[masked]) { - assert(pointers[masked] > positions[masked]); debug(printf("Storing masked %u at %u\n",masked,chrpos - 8)); - table[--pointers[masked]] = chrpos - 8; + table[positions[masked] + (--counts[masked])] = chrpos - 8; } masked = EXTRACT(_masked,1); if (counts[masked]) { - assert(pointers[masked] > positions[masked]); debug(printf("Storing masked %u at %u\n",masked,chrpos - 9)); - table[--pointers[masked]] = chrpos - 9; + table[positions[masked] + (--counts[masked])] = chrpos - 9; } masked = EXTRACT(_masked,2); if (counts[masked]) { - assert(pointers[masked] > positions[masked]); debug(printf("Storing masked %u at %u\n",masked,chrpos - 10)); - table[--pointers[masked]] = chrpos - 10; + table[positions[masked] + (--counts[masked])] = chrpos - 10; } #endif @@ -42743,37 +28361,32 @@ #ifdef INDIVIDUAL_SHIFTS masked = oligo & MASK6; /* 11 */ if (counts[masked]) { - assert(pointers[masked] > positions[masked]); debug(printf("Storing masked %u at %u\n",masked,chrpos - 11)); - table[--pointers[masked]] = chrpos - 11; + table[positions[masked] + (--counts[masked])] = chrpos - 11; } masked = (oligo >> 2) & MASK6; /* 12 */ if (counts[masked]) { - assert(pointers[masked] > positions[masked]); debug(printf("Storing masked %u at %u\n",masked,chrpos - 12)); - table[--pointers[masked]] = chrpos - 12; + table[positions[masked] + (--counts[masked])] = chrpos - 12; } masked = (oligo >> 4) & MASK6; /* 13 */ if (counts[masked]) { - assert(pointers[masked] > positions[masked]); debug(printf("Storing masked %u at %u\n",masked,chrpos - 13)); - table[--pointers[masked]] = chrpos - 13; + table[positions[masked] + (--counts[masked])] = chrpos - 13; } masked = (oligo >> 6) & MASK6; /* 14 */ if (counts[masked]) { - assert(pointers[masked] > positions[masked]); debug(printf("Storing masked %u at %u\n",masked,chrpos - 14)); - table[--pointers[masked]] = chrpos - 14; + table[positions[masked] + (--counts[masked])] = chrpos - 14; } masked = (oligo >> 8) & MASK6; /* 15 */ if (counts[masked]) { - assert(pointers[masked] > positions[masked]); debug(printf("Storing masked %u at %u\n",masked,chrpos - 15)); - table[--pointers[masked]] = chrpos - 15; + table[positions[masked] + (--counts[masked])] = chrpos - 15; } #else @@ -42786,38 +28399,33 @@ masked = EXTRACT(_masked,0); if (counts[masked]) { - assert(pointers[masked] > positions[masked]); debug(printf("Storing masked %u at %u\n",masked,chrpos - 11)); - table[--pointers[masked]] = chrpos - 11; + table[positions[masked] + (--counts[masked])] = chrpos - 11; } masked = EXTRACT(_masked,1); if (counts[masked]) { - assert(pointers[masked] > positions[masked]); debug(printf("Storing masked %u at %u\n",masked,chrpos - 12)); - table[--pointers[masked]] = chrpos - 12; + table[positions[masked] + (--counts[masked])] = chrpos - 12; } masked = EXTRACT(_masked,2); if (counts[masked]) { - assert(pointers[masked] > positions[masked]); debug(printf("Storing masked %u at %u\n",masked,chrpos - 13)); - table[--pointers[masked]] = chrpos - 13; + table[positions[masked] + (--counts[masked])] = chrpos - 13; } masked = EXTRACT(_masked,3); if (counts[masked]) { - assert(pointers[masked] > positions[masked]); debug(printf("Storing masked %u at %u\n",masked,chrpos - 14)); - table[--pointers[masked]] = chrpos - 14; + table[positions[masked] + (--counts[masked])] = chrpos - 14; } masked = (oligo >> 8) & MASK6; /* 15 */ if (counts[masked]) { - assert(pointers[masked] > positions[masked]); debug(printf("Storing masked %u at %u\n",masked,chrpos - 15)); - table[--pointers[masked]] = chrpos - 15; + table[positions[masked] + (--counts[masked])] = chrpos - 15; } #endif @@ -42825,79 +28433,68 @@ #ifdef INDIVIDUAL_SHIFTS masked = high_rc & MASK6; /* 16 */ if (counts[masked]) { - assert(pointers[masked] > positions[masked]); debug(printf("Storing masked %u at %u\n",masked,chrpos - 16)); - table[--pointers[masked]] = chrpos - 16; + table[positions[masked] + (--counts[masked])] = chrpos - 16; } masked = (high_rc >> 2) & MASK6; /* 17 */ if (counts[masked]) { - assert(pointers[masked] > positions[masked]); debug(printf("Storing masked %u at %u\n",masked,chrpos - 17)); - table[--pointers[masked]] = chrpos - 17; + table[positions[masked] + (--counts[masked])] = chrpos - 17; } masked = (high_rc >> 4) & MASK6; /* 18 */ if (counts[masked]) { - assert(pointers[masked] > positions[masked]); debug(printf("Storing masked %u at %u\n",masked,chrpos - 18)); - table[--pointers[masked]] = chrpos - 18; + table[positions[masked] + (--counts[masked])] = chrpos - 18; } masked = (high_rc >> 6) & MASK6; /* 19 */ if (counts[masked]) { - assert(pointers[masked] > positions[masked]); debug(printf("Storing masked %u at %u\n",masked,chrpos - 19)); - table[--pointers[masked]] = chrpos - 19; + table[positions[masked] + (--counts[masked])] = chrpos - 19; } masked = (high_rc >> 8) & MASK6; /* 20 */ if (counts[masked]) { - assert(pointers[masked] > positions[masked]); debug(printf("Storing masked %u at %u\n",masked,chrpos - 20)); - table[--pointers[masked]] = chrpos - 20; + table[positions[masked] + (--counts[masked])] = chrpos - 20; } masked = (high_rc >> 10) & MASK6; /* 21 */ if (counts[masked]) { - assert(pointers[masked] > positions[masked]); debug(printf("Storing masked %u at %u\n",masked,chrpos - 21)); - table[--pointers[masked]] = chrpos - 21; + table[positions[masked] + (--counts[masked])] = chrpos - 21; } masked = (high_rc >> 12) & MASK6; /* 22 */ if (counts[masked]) { - assert(pointers[masked] > positions[masked]); debug(printf("Storing masked %u at %u\n",masked,chrpos - 22)); - table[--pointers[masked]] = chrpos - 22; + table[positions[masked] + (--counts[masked])] = chrpos - 22; } masked = (high_rc >> 14) & MASK6; /* 23 */ if (counts[masked]) { - assert(pointers[masked] > positions[masked]); debug(printf("Storing masked %u at %u\n",masked,chrpos - 23)); - table[--pointers[masked]] = chrpos - 23; + table[positions[masked] + (--counts[masked])] = chrpos - 23; } masked = (high_rc >> 16) & MASK6; /* 24 */ if (counts[masked]) { - assert(pointers[masked] > positions[masked]); debug(printf("Storing masked %u at %u\n",masked,chrpos - 24)); - table[--pointers[masked]] = chrpos - 24; + table[positions[masked] + (--counts[masked])] = chrpos - 24; } masked = (high_rc >> 18) & MASK6; /* 25 */ if (counts[masked]) { - assert(pointers[masked] > positions[masked]); debug(printf("Storing masked %u at %u\n",masked,chrpos - 25)); - table[--pointers[masked]] = chrpos - 25; + table[positions[masked] + (--counts[masked])] = chrpos - 25; } masked = high_rc >> 20; /* 26, No mask necessary */ if (counts[masked]) { - assert(pointers[masked] > positions[masked]); debug(printf("Storing masked %u at %u\n",masked,chrpos - 26)); - table[--pointers[masked]] = chrpos - 26; + table[positions[masked] + (--counts[masked])] = chrpos - 26; } #else @@ -42910,30 +28507,26 @@ masked = EXTRACT(_masked,0); if (counts[masked]) { - assert(pointers[masked] > positions[masked]); debug(printf("Storing masked %u at %u\n",masked,chrpos - 16)); - table[--pointers[masked]] = chrpos - 16; + table[positions[masked] + (--counts[masked])] = chrpos - 16; } masked = EXTRACT(_masked,1); if (counts[masked]) { - assert(pointers[masked] > positions[masked]); debug(printf("Storing masked %u at %u\n",masked,chrpos - 17)); - table[--pointers[masked]] = chrpos - 17; + table[positions[masked] + (--counts[masked])] = chrpos - 17; } masked = EXTRACT(_masked,2); if (counts[masked]) { - assert(pointers[masked] > positions[masked]); debug(printf("Storing masked %u at %u\n",masked,chrpos - 18)); - table[--pointers[masked]] = chrpos - 18; + table[positions[masked] + (--counts[masked])] = chrpos - 18; } masked = EXTRACT(_masked,3); if (counts[masked]) { - assert(pointers[masked] > positions[masked]); debug(printf("Storing masked %u at %u\n",masked,chrpos - 19)); - table[--pointers[masked]] = chrpos - 19; + table[positions[masked] + (--counts[masked])] = chrpos - 19; } @@ -42946,30 +28539,26 @@ masked = EXTRACT(_masked,0); if (counts[masked]) { - assert(pointers[masked] > positions[masked]); debug(printf("Storing masked %u at %u\n",masked,chrpos - 20)); - table[--pointers[masked]] = chrpos - 20; + table[positions[masked] + (--counts[masked])] = chrpos - 20; } masked = EXTRACT(_masked,1); if (counts[masked]) { - assert(pointers[masked] > positions[masked]); debug(printf("Storing masked %u at %u\n",masked,chrpos - 21)); - table[--pointers[masked]] = chrpos - 21; + table[positions[masked] + (--counts[masked])] = chrpos - 21; } masked = EXTRACT(_masked,2); if (counts[masked]) { - assert(pointers[masked] > positions[masked]); debug(printf("Storing masked %u at %u\n",masked,chrpos - 22)); - table[--pointers[masked]] = chrpos - 22; + table[positions[masked] + (--counts[masked])] = chrpos - 22; } masked = EXTRACT(_masked,3); if (counts[masked]) { - assert(pointers[masked] > positions[masked]); debug(printf("Storing masked %u at %u\n",masked,chrpos - 23)); - table[--pointers[masked]] = chrpos - 23; + table[positions[masked] + (--counts[masked])] = chrpos - 23; } @@ -42982,23 +28571,20 @@ masked = EXTRACT(_masked,0); if (counts[masked]) { - assert(pointers[masked] > positions[masked]); debug(printf("Storing masked %u at %u\n",masked,chrpos - 24)); - table[--pointers[masked]] = chrpos - 24; + table[positions[masked] + (--counts[masked])] = chrpos - 24; } masked = EXTRACT(_masked,1); if (counts[masked]) { - assert(pointers[masked] > positions[masked]); debug(printf("Storing masked %u at %u\n",masked,chrpos - 25)); - table[--pointers[masked]] = chrpos - 25; + table[positions[masked] + (--counts[masked])] = chrpos - 25; } masked = EXTRACT(_masked,2); if (counts[masked]) { - assert(pointers[masked] > positions[masked]); debug(printf("Storing masked %u at %u\n",masked,chrpos - 26)); - table[--pointers[masked]] = chrpos - 26; + table[positions[masked] + (--counts[masked])] = chrpos - 26; } #endif @@ -43009,37 +28595,32 @@ #ifdef INDIVIDUAL_SHIFTS masked = oligo & MASK6; /* 27 */ if (counts[masked]) { - assert(pointers[masked] > positions[masked]); debug(printf("Storing masked %u at %u\n",masked,chrpos - 27)); - table[--pointers[masked]] = chrpos - 27; + table[positions[masked] + (--counts[masked])] = chrpos - 27; } masked = (oligo >> 2) & MASK6; /* 28 */ if (counts[masked]) { - assert(pointers[masked] > positions[masked]); debug(printf("Storing masked %u at %u\n",masked,chrpos - 28)); - table[--pointers[masked]] = chrpos - 28; + table[positions[masked] + (--counts[masked])] = chrpos - 28; } masked = (oligo >> 4) & MASK6; /* 29 */ if (counts[masked]) { - assert(pointers[masked] > positions[masked]); debug(printf("Storing masked %u at %u\n",masked,chrpos - 29)); - table[--pointers[masked]] = chrpos - 29; + table[positions[masked] + (--counts[masked])] = chrpos - 29; } masked = (oligo >> 6) & MASK6; /* 30 */ if (counts[masked]) { - assert(pointers[masked] > positions[masked]); debug(printf("Storing masked %u at %u\n",masked,chrpos - 30)); - table[--pointers[masked]] = chrpos - 30; + table[positions[masked] + (--counts[masked])] = chrpos - 30; } masked = (oligo >> 8) & MASK6; /* 31 */ if (counts[masked]) { - assert(pointers[masked] > positions[masked]); debug(printf("Storing masked %u at %u\n",masked,chrpos - 31)); - table[--pointers[masked]] = chrpos - 31; + table[positions[masked] + (--counts[masked])] = chrpos - 31; } #else @@ -43052,38 +28633,33 @@ masked = EXTRACT(_masked,0); if (counts[masked]) { - assert(pointers[masked] > positions[masked]); debug(printf("Storing masked %u at %u\n",masked,chrpos - 27)); - table[--pointers[masked]] = chrpos - 27; + table[positions[masked] + (--counts[masked])] = chrpos - 27; } masked = EXTRACT(_masked,1); if (counts[masked]) { - assert(pointers[masked] > positions[masked]); debug(printf("Storing masked %u at %u\n",masked,chrpos - 28)); - table[--pointers[masked]] = chrpos - 28; + table[positions[masked] + (--counts[masked])] = chrpos - 28; } masked = EXTRACT(_masked,2); if (counts[masked]) { - assert(pointers[masked] > positions[masked]); debug(printf("Storing masked %u at %u\n",masked,chrpos - 29)); - table[--pointers[masked]] = chrpos - 29; + table[positions[masked] + (--counts[masked])] = chrpos - 29; } masked = EXTRACT(_masked,3); if (counts[masked]) { - assert(pointers[masked] > positions[masked]); debug(printf("Storing masked %u at %u\n",masked,chrpos - 30)); - table[--pointers[masked]] = chrpos - 30; + table[positions[masked] + (--counts[masked])] = chrpos - 30; } masked = (oligo >> 8) & MASK6; /* 31 */ if (counts[masked]) { - assert(pointers[masked] > positions[masked]); debug(printf("Storing masked %u at %u\n",masked,chrpos - 31)); - table[--pointers[masked]] = chrpos - 31; + table[positions[masked] + (--counts[masked])] = chrpos - 31; } #endif @@ -43093,270 +28669,343 @@ #else /* HAVE_AVX2 */ static int -store_6mers_rev (Chrpos_T chrpos, Chrpos_T *table, UINT4 *pointers, UINT4 *positions, Count_T *counts, - Genomecomp_T low_rc, Genomecomp_T high_rc, Genomecomp_T nextlow_rc) { +store_6mers_rev_32 (Chrpos_T chrpos, Chrpos_T *table, UINT4 *positions, Count_T *counts, + Genomecomp_T low_rc, Genomecomp_T high_rc, Genomecomp_T nextlow_rc) { Genomecomp_T masked, oligo; __m256i _oligo, _masked, _counts; + __m256i _blocks, _envelopes, _addresses, _address_mask, _count_mask; + + _address_mask = _mm256_set1_epi32(0x3); + _count_mask = _mm256_set1_epi32(0xFF); - _oligo = _mm256_srlv_epi32(_mm256_set1_epi32(low_rc),shift0to14); + + _oligo = _mm256_srlv_epi32(_mm256_set1_epi32(low_rc),bigshift0to14); _masked = _mm256_and_si256(_oligo, bigmask6); - _counts = _mm256_i32gather_epi32(counts,_masked,/*scale*/4); + + _blocks = _mm256_srli_epi32(_masked,2); /* div by 4 bytes/int */ + _addresses = _mm256_and_si256(_masked,_address_mask); + _addresses = _mm256_slli_epi32(_addresses,3); /* Multiply by 8 bits/byte */ + _envelopes = _mm256_i32gather_epi32((const int *) counts,_blocks,/*scale*/4); + _counts = _mm256_srlv_epi32(_envelopes,_addresses); /* Puts byte on right */ + _counts = _mm256_and_si256(_counts,_count_mask); if (EXTRACT256(_counts,0)) { masked = EXTRACT256(_masked,0); - assert(pointers[masked] > positions[masked]); - debug(printf("Storing masked %u at %u\n",masked,chrpos)); - table[--pointers[masked]] = chrpos; + if (counts[masked]) { /* Have to re-check if there is a conflict */ + debug(printf("Storing masked %u at %u\n",masked,chrpos)); + table[positions[masked] + (--counts[masked])] = chrpos; + } } if (EXTRACT256(_counts,1)) { masked = EXTRACT256(_masked,1); - assert(pointers[masked] > positions[masked]); - debug(printf("Storing masked %u at %u\n",masked,chrpos - 1)); - table[--pointers[masked]] = chrpos - 1; + if (counts[masked]) { /* Have to re-check if there is a conflict */ + debug(printf("Storing masked %u at %u\n",masked,chrpos - 1)); + table[positions[masked] + (--counts[masked])] = chrpos - 1; + } } if (EXTRACT256(_counts,2)) { masked = EXTRACT256(_masked,2); - assert(pointers[masked] > positions[masked]); - debug(printf("Storing masked %u at %u\n",masked,chrpos - 2)); - table[--pointers[masked]] = chrpos - 2; + if (counts[masked]) { /* Have to re-check if there is a conflict */ + debug(printf("Storing masked %u at %u\n",masked,chrpos - 2)); + table[positions[masked] + (--counts[masked])] = chrpos - 2; + } } if (EXTRACT256(_counts,3)) { masked = EXTRACT256(_masked,3); - assert(pointers[masked] > positions[masked]); - debug(printf("Storing masked %u at %u\n",masked,chrpos - 3)); - table[--pointers[masked]] = chrpos - 3; + if (counts[masked]) { /* Have to re-check if there is a conflict */ + debug(printf("Storing masked %u at %u\n",masked,chrpos - 3)); + table[positions[masked] + (--counts[masked])] = chrpos - 3; + } } if (EXTRACT256(_counts,4)) { masked = EXTRACT256(_masked,4); - assert(pointers[masked] > positions[masked]); - debug(printf("Storing masked %u at %u\n",masked,chrpos - 4)); - table[--pointers[masked]] = chrpos - 4; - } + if (counts[masked]) { /* Have to re-check if there is a conflict */ + debug(printf("Storing masked %u at %u\n",masked,chrpos - 4)); + table[positions[masked] + (--counts[masked])] = chrpos - 4; + }} + if (EXTRACT256(_counts,5)) { masked = EXTRACT256(_masked,5); - assert(pointers[masked] > positions[masked]); - debug(printf("Storing masked %u at %u\n",masked,chrpos - 5)); - table[--pointers[masked]] = chrpos - 5; + if (counts[masked]) { /* Have to re-check if there is a conflict */ + debug(printf("Storing masked %u at %u\n",masked,chrpos - 5)); + table[positions[masked] + (--counts[masked])] = chrpos - 5; + } } if (EXTRACT256(_counts,6)) { masked = EXTRACT256(_masked,6); - assert(pointers[masked] > positions[masked]); - debug(printf("Storing masked %u at %u\n",masked,chrpos - 6)); - table[--pointers[masked]] = chrpos - 6; + if (counts[masked]) { /* Have to re-check if there is a conflict */ + debug(printf("Storing masked %u at %u\n",masked,chrpos - 6)); + table[positions[masked] + (--counts[masked])] = chrpos - 6; + } } if (EXTRACT256(_counts,7)) { masked = EXTRACT256(_masked,7); - assert(pointers[masked] > positions[masked]); - debug(printf("Storing masked %u at %u\n",masked,chrpos - 7)); - table[--pointers[masked]] = chrpos - 7; + if (counts[masked]) { /* Have to re-check if there is a conflict */ + debug(printf("Storing masked %u at %u\n",masked,chrpos - 7)); + table[positions[masked] + (--counts[masked])] = chrpos - 7; + } } _oligo = _mm256_srli_epi32(_oligo, 16); _masked = _mm256_and_si256(_oligo, bigmask6); - _counts = _mm256_i32gather_epi32(counts,_masked,/*scale*/4); + + _blocks = _mm256_srli_epi32(_masked,2); /* div by 4 bytes/int */ + _addresses = _mm256_and_si256(_masked,_address_mask); + _addresses = _mm256_slli_epi32(_addresses,3); /* Multiply by 8 bits/byte */ + _envelopes = _mm256_i32gather_epi32((const int *) counts,_blocks,/*scale*/4); + _counts = _mm256_srlv_epi32(_envelopes,_addresses); /* Puts byte on right */ + _counts = _mm256_and_si256(_counts,_count_mask); if (EXTRACT256(_counts,0)) { masked = EXTRACT256(_masked,0); - assert(pointers[masked] > positions[masked]); - debug(printf("Storing masked %u at %u\n",masked,chrpos - 8)); - table[--pointers[masked]] = chrpos - 8; + if (counts[masked]) { /* Have to re-check if there is a conflict */ + debug(printf("Storing masked %u at %u\n",masked,chrpos - 8)); + table[positions[masked] + (--counts[masked])] = chrpos - 8; + } } if (EXTRACT256(_counts,1)) { masked = EXTRACT256(_masked,1); - assert(pointers[masked] > positions[masked]); - debug(printf("Storing masked %u at %u\n",masked,chrpos - 9)); - table[--pointers[masked]] = chrpos - 9; + if (counts[masked]) { /* Have to re-check if there is a conflict */ + debug(printf("Storing masked %u at %u\n",masked,chrpos - 9)); + table[positions[masked] + (--counts[masked])] = chrpos - 9; + } } if (EXTRACT256(_counts,2)) { masked = EXTRACT256(_masked,2); - assert(pointers[masked] > positions[masked]); - debug(printf("Storing masked %u at %u\n",masked,chrpos - 10)); - table[--pointers[masked]] = chrpos - 10; + if (counts[masked]) { /* Have to re-check if there is a conflict */ + debug(printf("Storing masked %u at %u\n",masked,chrpos - 10)); + table[positions[masked] + (--counts[masked])] = chrpos - 10; + } } oligo = low_rc >> 22; /* For 15..11 */ oligo |= high_rc << 10; - _oligo = _mm256_srlv_epi32(_mm256_set1_epi32(oligo),shift0to14); + _oligo = _mm256_srlv_epi32(_mm256_set1_epi32(oligo),bigshift0to14); _masked = _mm256_and_si256(_oligo, bigmask6); - _counts = _mm256_i32gather_epi32(counts,_masked,/*scale*/4); + + _blocks = _mm256_srli_epi32(_masked,2); /* div by 4 bytes/int */ + _addresses = _mm256_and_si256(_masked,_address_mask); + _addresses = _mm256_slli_epi32(_addresses,3); /* Multiply by 8 bits/byte */ + _envelopes = _mm256_i32gather_epi32((const int *) counts,_blocks,/*scale*/4); + _counts = _mm256_srlv_epi32(_envelopes,_addresses); /* Puts byte on right */ + _counts = _mm256_and_si256(_counts,_count_mask); if (EXTRACT256(_counts,0)) { masked = EXTRACT256(_masked,0); - assert(pointers[masked] > positions[masked]); - debug(printf("Storing masked %u at %u\n",masked,chrpos - 11)); - table[--pointers[masked]] = chrpos - 11; + if (counts[masked]) { /* Have to re-check if there is a conflict */ + debug(printf("Storing masked %u at %u\n",masked,chrpos - 11)); + table[positions[masked] + (--counts[masked])] = chrpos - 11; + } } if (EXTRACT256(_counts,1)) { masked = EXTRACT256(_masked,1); - assert(pointers[masked] > positions[masked]); - debug(printf("Storing masked %u at %u\n",masked,chrpos - 12)); - table[--pointers[masked]] = chrpos - 12; + if (counts[masked]) { /* Have to re-check if there is a conflict */ + debug(printf("Storing masked %u at %u\n",masked,chrpos - 12)); + table[positions[masked] + (--counts[masked])] = chrpos - 12; + } } if (EXTRACT256(_counts,2)) { masked = EXTRACT256(_masked,2); - assert(pointers[masked] > positions[masked]); - debug(printf("Storing masked %u at %u\n",masked,chrpos - 13)); - table[--pointers[masked]] = chrpos - 13; + if (counts[masked]) { /* Have to re-check if there is a conflict */ + debug(printf("Storing masked %u at %u\n",masked,chrpos - 13)); + table[positions[masked] + (--counts[masked])] = chrpos - 13; + } } if (EXTRACT256(_counts,3)) { masked = EXTRACT256(_masked,3); - assert(pointers[masked] > positions[masked]); - debug(printf("Storing masked %u at %u\n",masked,chrpos - 14)); - table[--pointers[masked]] = chrpos - 14; + if (counts[masked]) { /* Have to re-check if there is a conflict */ + debug(printf("Storing masked %u at %u\n",masked,chrpos - 14)); + table[positions[masked] + (--counts[masked])] = chrpos - 14; + } } if (EXTRACT256(_counts,4)) { masked = EXTRACT256(_masked,4); - assert(pointers[masked] > positions[masked]); - debug(printf("Storing masked %u at %u\n",masked,chrpos - 15)); - table[--pointers[masked]] = chrpos - 15; + if (counts[masked]) { /* Have to re-check if there is a conflict */ + debug(printf("Storing masked %u at %u\n",masked,chrpos - 15)); + table[positions[masked] + (--counts[masked])] = chrpos - 15; + } } - _oligo = _mm256_srlv_epi32(_mm256_set1_epi32(high_rc),shift0to14); + _oligo = _mm256_srlv_epi32(_mm256_set1_epi32(high_rc),bigshift0to14); _masked = _mm256_and_si256(_oligo, bigmask6); - _counts = _mm256_i32gather_epi32(counts,_masked,/*scale*/4); + + _blocks = _mm256_srli_epi32(_masked,2); /* div by 4 bytes/int */ + _addresses = _mm256_and_si256(_masked,_address_mask); + _addresses = _mm256_slli_epi32(_addresses,3); /* Multiply by 8 bits/byte */ + _envelopes = _mm256_i32gather_epi32((const int *) counts,_blocks,/*scale*/4); + _counts = _mm256_srlv_epi32(_envelopes,_addresses); /* Puts byte on right */ + _counts = _mm256_and_si256(_counts,_count_mask); if (EXTRACT256(_counts,0)) { masked = EXTRACT256(_masked,0); - assert(pointers[masked] > positions[masked]); - debug(printf("Storing masked %u at %u\n",masked,chrpos - 16)); - table[--pointers[masked]] = chrpos - 16; + if (counts[masked]) { /* Have to re-check if there is a conflict */ + debug(printf("Storing masked %u at %u\n",masked,chrpos - 16)); + table[positions[masked] + (--counts[masked])] = chrpos - 16; + } } if (EXTRACT256(_counts,1)) { masked = EXTRACT256(_masked,1); - assert(pointers[masked] > positions[masked]); - debug(printf("Storing masked %u at %u\n",masked,chrpos - 17)); - table[--pointers[masked]] = chrpos - 17; + if (counts[masked]) { /* Have to re-check if there is a conflict */ + debug(printf("Storing masked %u at %u\n",masked,chrpos - 17)); + table[positions[masked] + (--counts[masked])] = chrpos - 17; + } } if (EXTRACT256(_counts,2)) { masked = EXTRACT256(_masked,2); - assert(pointers[masked] > positions[masked]); - debug(printf("Storing masked %u at %u\n",masked,chrpos - 18)); - table[--pointers[masked]] = chrpos - 18; + if (counts[masked]) { /* Have to re-check if there is a conflict */ + debug(printf("Storing masked %u at %u\n",masked,chrpos - 18)); + table[positions[masked] + (--counts[masked])] = chrpos - 18; + } } if (EXTRACT256(_counts,3)) { masked = EXTRACT256(_masked,3); - assert(pointers[masked] > positions[masked]); - debug(printf("Storing masked %u at %u\n",masked,chrpos - 19)); - table[--pointers[masked]] = chrpos - 19; + if (counts[masked]) { /* Have to re-check if there is a conflict */ + debug(printf("Storing masked %u at %u\n",masked,chrpos - 19)); + table[positions[masked] + (--counts[masked])] = chrpos - 19; + } } if (EXTRACT256(_counts,4)) { masked = EXTRACT256(_masked,4); - assert(pointers[masked] > positions[masked]); - debug(printf("Storing masked %u at %u\n",masked,chrpos - 20)); - table[--pointers[masked]] = chrpos - 20; + if (counts[masked]) { /* Have to re-check if there is a conflict */ + debug(printf("Storing masked %u at %u\n",masked,chrpos - 20)); + table[positions[masked] + (--counts[masked])] = chrpos - 20; + } } if (EXTRACT256(_counts,5)) { masked = EXTRACT256(_masked,5); - assert(pointers[masked] > positions[masked]); - debug(printf("Storing masked %u at %u\n",masked,chrpos - 21)); - table[--pointers[masked]] = chrpos - 21; + if (counts[masked]) { /* Have to re-check if there is a conflict */ + debug(printf("Storing masked %u at %u\n",masked,chrpos - 21)); + table[positions[masked] + (--counts[masked])] = chrpos - 21; + } } if (EXTRACT256(_counts,6)) { masked = EXTRACT256(_masked,6); - assert(pointers[masked] > positions[masked]); - debug(printf("Storing masked %u at %u\n",masked,chrpos - 22)); - table[--pointers[masked]] = chrpos - 22; + if (counts[masked]) { /* Have to re-check if there is a conflict */ + debug(printf("Storing masked %u at %u\n",masked,chrpos - 22)); + table[positions[masked] + (--counts[masked])] = chrpos - 22; + } } if (EXTRACT256(_counts,7)) { masked = EXTRACT256(_masked,7); - assert(pointers[masked] > positions[masked]); - debug(printf("Storing masked %u at %u\n",masked,chrpos - 23)); - table[--pointers[masked]] = chrpos - 23; + if (counts[masked]) { /* Have to re-check if there is a conflict */ + debug(printf("Storing masked %u at %u\n",masked,chrpos - 23)); + table[positions[masked] + (--counts[masked])] = chrpos - 23; + } } _oligo = _mm256_srli_epi32(_oligo, 16); _masked = _mm256_and_si256(_oligo, bigmask6); - _counts = _mm256_i32gather_epi32(counts,_masked,/*scale*/4); + + _blocks = _mm256_srli_epi32(_masked,2); /* div by 4 bytes/int */ + _addresses = _mm256_and_si256(_masked,_address_mask); + _addresses = _mm256_slli_epi32(_addresses,3); /* Multiply by 8 bits/byte */ + _envelopes = _mm256_i32gather_epi32((const int *) counts,_blocks,/*scale*/4); + _counts = _mm256_srlv_epi32(_envelopes,_addresses); /* Puts byte on right */ + _counts = _mm256_and_si256(_counts,_count_mask); if (EXTRACT256(_counts,0)) { masked = EXTRACT256(_masked,0); - assert(pointers[masked] > positions[masked]); - debug(printf("Storing masked %u at %u\n",masked,chrpos - 24)); - table[--pointers[masked]] = chrpos - 24; + if (counts[masked]) { /* Have to re-check if there is a conflict */ + debug(printf("Storing masked %u at %u\n",masked,chrpos - 24)); + table[positions[masked] + (--counts[masked])] = chrpos - 24; + } } if (EXTRACT256(_counts,1)) { masked = EXTRACT256(_masked,1); - assert(pointers[masked] > positions[masked]); - debug(printf("Storing masked %u at %u\n",masked,chrpos - 25)); - table[--pointers[masked]] = chrpos - 25; + if (counts[masked]) { /* Have to re-check if there is a conflict */ + debug(printf("Storing masked %u at %u\n",masked,chrpos - 25)); + table[positions[masked] + (--counts[masked])] = chrpos - 25; + } } if (EXTRACT256(_counts,2)) { masked = EXTRACT256(_masked,2); - assert(pointers[masked] > positions[masked]); - debug(printf("Storing masked %u at %u\n",masked,chrpos - 26)); - table[--pointers[masked]] = chrpos - 26; + if (counts[masked]) { /* Have to re-check if there is a conflict */ + debug(printf("Storing masked %u at %u\n",masked,chrpos - 26)); + table[positions[masked] + (--counts[masked])] = chrpos - 26; + } } oligo = high_rc >> 22; /* For 31..27 */ oligo |= nextlow_rc << 10; - _oligo = _mm256_srlv_epi32(_mm256_set1_epi32(oligo),shift0to14); + _oligo = _mm256_srlv_epi32(_mm256_set1_epi32(oligo),bigshift0to14); _masked = _mm256_and_si256(_oligo, bigmask6); - _counts = _mm256_i32gather_epi32(counts,_masked,/*scale*/4); + + _blocks = _mm256_srli_epi32(_masked,2); /* div by 4 bytes/int */ + _addresses = _mm256_and_si256(_masked,_address_mask); + _addresses = _mm256_slli_epi32(_addresses,3); /* Multiply by 8 bits/byte */ + _envelopes = _mm256_i32gather_epi32((const int *) counts,_blocks,/*scale*/4); + _counts = _mm256_srlv_epi32(_envelopes,_addresses); /* Puts byte on right */ + _counts = _mm256_and_si256(_counts,_count_mask); if (EXTRACT256(_counts,0)) { masked = EXTRACT256(_masked,0); - assert(pointers[masked] > positions[masked]); - debug(printf("Storing masked %u at %u\n",masked,chrpos - 27)); - table[--pointers[masked]] = chrpos - 27; + if (counts[masked]) { /* Have to re-check if there is a conflict */ + debug(printf("Storing masked %u at %u\n",masked,chrpos - 27)); + table[positions[masked] + (--counts[masked])] = chrpos - 27; + } } if (EXTRACT256(_counts,1)) { masked = EXTRACT256(_masked,1); - assert(pointers[masked] > positions[masked]); - debug(printf("Storing masked %u at %u\n",masked,chrpos - 28)); - table[--pointers[masked]] = chrpos - 28; + if (counts[masked]) { /* Have to re-check if there is a conflict */ + debug(printf("Storing masked %u at %u\n",masked,chrpos - 28)); + table[positions[masked] + (--counts[masked])] = chrpos - 28; + } } if (EXTRACT256(_counts,2)) { masked = EXTRACT256(_masked,2); - assert(pointers[masked] > positions[masked]); - debug(printf("Storing masked %u at %u\n",masked,chrpos - 29)); - table[--pointers[masked]] = chrpos - 29; + if (counts[masked]) { /* Have to re-check if there is a conflict */ + debug(printf("Storing masked %u at %u\n",masked,chrpos - 29)); + table[positions[masked] + (--counts[masked])] = chrpos - 29; + } } if (EXTRACT256(_counts,3)) { masked = EXTRACT256(_masked,3); - assert(pointers[masked] > positions[masked]); - debug(printf("Storing masked %u at %u\n",masked,chrpos - 30)); - table[--pointers[masked]] = chrpos - 30; + if (counts[masked]) { /* Have to re-check if there is a conflict */ + debug(printf("Storing masked %u at %u\n",masked,chrpos - 30)); + table[positions[masked] + (--counts[masked])] = chrpos - 30; + } } if (EXTRACT256(_counts,4)) { masked = EXTRACT256(_masked,4); - assert(pointers[masked] > positions[masked]); - debug(printf("Storing masked %u at %u\n",masked,chrpos - 31)); - table[--pointers[masked]] = chrpos - 31; + if (counts[masked]) { /* Have to re-check if there is a conflict */ + debug(printf("Storing masked %u at %u\n",masked,chrpos - 31)); + table[positions[masked] + (--counts[masked])] = chrpos - 31; + } } return chrpos - 32; @@ -43369,7 +29018,7 @@ #if !defined(HAVE_AVX2) static void -count_5mers_rev (Count_T *counts, Inquery_T *inquery, Genomecomp_T low_rc, Genomecomp_T high_rc, Genomecomp_T nextlow_rc) { +count_5mers_rev_32 (Count_T *counts, Genomecomp_T low_rc, Genomecomp_T high_rc, Genomecomp_T nextlow_rc) { Genomecomp_T masked, oligo; #ifdef INDIVIDUAL_SHIFTS #elif defined(SIMD_MASK_THEN_STORE) @@ -43382,51 +29031,51 @@ #ifdef INDIVIDUAL_SHIFTS masked = low_rc & MASK5; /* 0 */ - INCR_COUNT(counts[masked],inquery[masked]); + INCR_COUNT(counts[masked]); debug(printf("0 %04X => %d\n",masked,counts[masked])); masked = (low_rc >> 2) & MASK5; /* 1 */ - INCR_COUNT(counts[masked],inquery[masked]); + INCR_COUNT(counts[masked]); debug(printf("1 %04X => %d\n",masked,counts[masked])); masked = (low_rc >> 4) & MASK5; /* 2 */ - INCR_COUNT(counts[masked],inquery[masked]); + INCR_COUNT(counts[masked]); debug(printf("2 %04X => %d\n",masked,counts[masked])); masked = (low_rc >> 6) & MASK5; /* 3 */ - INCR_COUNT(counts[masked],inquery[masked]); + INCR_COUNT(counts[masked]); debug(printf("3 %04X => %d\n",masked,counts[masked])); masked = (low_rc >> 8) & MASK5; /* 4 */ - INCR_COUNT(counts[masked],inquery[masked]); + INCR_COUNT(counts[masked]); debug(printf("4 %04X => %d\n",masked,counts[masked])); masked = (low_rc >> 10) & MASK5; /* 5 */ - INCR_COUNT(counts[masked],inquery[masked]); + INCR_COUNT(counts[masked]); debug(printf("5 %04X => %d\n",masked,counts[masked])); masked = (low_rc >> 12) & MASK5; /* 6 */ - INCR_COUNT(counts[masked],inquery[masked]); + INCR_COUNT(counts[masked]); debug(printf("6 %04X => %d\n",masked,counts[masked])); masked = (low_rc >> 14) & MASK5; /* 7 */ - INCR_COUNT(counts[masked],inquery[masked]); + INCR_COUNT(counts[masked]); debug(printf("7 %04X => %d\n",masked,counts[masked])); masked = (low_rc >> 16) & MASK5; /* 8 */ - INCR_COUNT(counts[masked],inquery[masked]); + INCR_COUNT(counts[masked]); debug(printf("8 %04X => %d\n",masked,counts[masked])); masked = (low_rc >> 18) & MASK5; /* 9 */ - INCR_COUNT(counts[masked],inquery[masked]); + INCR_COUNT(counts[masked]); debug(printf("9 %04X => %d\n",masked,counts[masked])); masked = (low_rc >> 20) & MASK5; /* 10 */ - INCR_COUNT(counts[masked],inquery[masked]); + INCR_COUNT(counts[masked]); debug(printf("10 %04X => %d\n",masked,counts[masked])); masked = low_rc >> 22; /* 11, No mask necessary */ - INCR_COUNT(counts[masked],inquery[masked]); + INCR_COUNT(counts[masked]); debug(printf("11 %04X => %d\n",masked,counts[masked])); #else @@ -43438,19 +29087,19 @@ #endif masked = EXTRACT(_masked,0); - INCR_COUNT(counts[masked],inquery[masked]); + INCR_COUNT(counts[masked]); debug(printf("0 %04X => %d\n",masked,counts[masked])); masked = EXTRACT(_masked,1); - INCR_COUNT(counts[masked],inquery[masked]); + INCR_COUNT(counts[masked]); debug(printf("1 %04X => %d\n",masked,counts[masked])); masked = EXTRACT(_masked,2); - INCR_COUNT(counts[masked],inquery[masked]); + INCR_COUNT(counts[masked]); debug(printf("2 %04X => %d\n",masked,counts[masked])); masked = EXTRACT(_masked,3); - INCR_COUNT(counts[masked],inquery[masked]); + INCR_COUNT(counts[masked]); debug(printf("3 %04X => %d\n",masked,counts[masked])); @@ -43462,19 +29111,19 @@ #endif masked = EXTRACT(_masked,0); - INCR_COUNT(counts[masked],inquery[masked]); + INCR_COUNT(counts[masked]); debug(printf("4 %04X => %d\n",masked,counts[masked])); masked = EXTRACT(_masked,1); - INCR_COUNT(counts[masked],inquery[masked]); + INCR_COUNT(counts[masked]); debug(printf("5 %04X => %d\n",masked,counts[masked])); masked = EXTRACT(_masked,2); - INCR_COUNT(counts[masked],inquery[masked]); + INCR_COUNT(counts[masked]); debug(printf("6 %04X => %d\n",masked,counts[masked])); masked = EXTRACT(_masked,3); - INCR_COUNT(counts[masked],inquery[masked]); + INCR_COUNT(counts[masked]); debug(printf("7 %04X => %d\n",masked,counts[masked])); @@ -43486,19 +29135,19 @@ #endif masked = EXTRACT(_masked,0); - INCR_COUNT(counts[masked],inquery[masked]); + INCR_COUNT(counts[masked]); debug(printf("8 %04X => %d\n",masked,counts[masked])); masked = EXTRACT(_masked,1); - INCR_COUNT(counts[masked],inquery[masked]); + INCR_COUNT(counts[masked]); debug(printf("9 %04X => %d\n",masked,counts[masked])); masked = EXTRACT(_masked,2); - INCR_COUNT(counts[masked],inquery[masked]); + INCR_COUNT(counts[masked]); debug(printf("10 %04X => %d\n",masked,counts[masked])); masked = EXTRACT(_masked,3); - INCR_COUNT(counts[masked],inquery[masked]); + INCR_COUNT(counts[masked]); debug(printf("11 %04X => %d\n",masked,counts[masked])); #endif @@ -43508,19 +29157,19 @@ #ifdef INDIVIDUAL_SHIFTS masked = oligo & MASK5; /* 12 */ - INCR_COUNT(counts[masked],inquery[masked]); + INCR_COUNT(counts[masked]); debug(printf("12 %04X => %d\n",masked,counts[masked])); masked = (oligo >> 2) & MASK5; /* 13 */ - INCR_COUNT(counts[masked],inquery[masked]); + INCR_COUNT(counts[masked]); debug(printf("13 %04X => %d\n",masked,counts[masked])); masked = (oligo >> 4) & MASK5; /* 14 */ - INCR_COUNT(counts[masked],inquery[masked]); + INCR_COUNT(counts[masked]); debug(printf("14 %04X => %d\n",masked,counts[masked])); masked = (oligo >> 6) & MASK5; /* 15 */ - INCR_COUNT(counts[masked],inquery[masked]); + INCR_COUNT(counts[masked]); debug(printf("15 %04X => %d\n",masked,counts[masked])); #else @@ -43532,70 +29181,70 @@ #endif masked = EXTRACT(_masked,0); - INCR_COUNT(counts[masked],inquery[masked]); + INCR_COUNT(counts[masked]); debug(printf("12 %04X => %d\n",masked,counts[masked])); masked = EXTRACT(_masked,1); - INCR_COUNT(counts[masked],inquery[masked]); + INCR_COUNT(counts[masked]); debug(printf("13 %04X => %d\n",masked,counts[masked])); masked = EXTRACT(_masked,2); - INCR_COUNT(counts[masked],inquery[masked]); + INCR_COUNT(counts[masked]); debug(printf("14 %04X => %d\n",masked,counts[masked])); masked = EXTRACT(_masked,3); - INCR_COUNT(counts[masked],inquery[masked]); + INCR_COUNT(counts[masked]); debug(printf("15 %04X => %d\n",masked,counts[masked])); #endif #ifdef INDIVIDUAL_SHIFTS masked = high_rc & MASK5; /* 16 */ - INCR_COUNT(counts[masked],inquery[masked]); + INCR_COUNT(counts[masked]); debug(printf("16 %04X => %d\n",masked,counts[masked])); masked = (high_rc >> 2) & MASK5; /* 17 */ - INCR_COUNT(counts[masked],inquery[masked]); + INCR_COUNT(counts[masked]); debug(printf("17 %04X => %d\n",masked,counts[masked])); masked = (high_rc >> 4) & MASK5; /* 18 */ - INCR_COUNT(counts[masked],inquery[masked]); + INCR_COUNT(counts[masked]); debug(printf("18 %04X => %d\n",masked,counts[masked])); masked = (high_rc >> 6) & MASK5; /* 19 */ - INCR_COUNT(counts[masked],inquery[masked]); + INCR_COUNT(counts[masked]); debug(printf("19 %04X => %d\n",masked,counts[masked])); masked = (high_rc >> 8) & MASK5; /* 20 */ - INCR_COUNT(counts[masked],inquery[masked]); + INCR_COUNT(counts[masked]); debug(printf("20 %04X => %d\n",masked,counts[masked])); masked = (high_rc >> 10) & MASK5; /* 21 */ - INCR_COUNT(counts[masked],inquery[masked]); + INCR_COUNT(counts[masked]); debug(printf("21 %04X => %d\n",masked,counts[masked])); masked = (high_rc >> 12) & MASK5; /* 22 */ - INCR_COUNT(counts[masked],inquery[masked]); + INCR_COUNT(counts[masked]); debug(printf("22 %04X => %d\n",masked,counts[masked])); masked = (high_rc >> 14) & MASK5; /* 23 */ - INCR_COUNT(counts[masked],inquery[masked]); + INCR_COUNT(counts[masked]); debug(printf("23 %04X => %d\n",masked,counts[masked])); masked = (high_rc >> 16) & MASK5; /* 24 */ - INCR_COUNT(counts[masked],inquery[masked]); + INCR_COUNT(counts[masked]); debug(printf("24 %04X => %d\n",masked,counts[masked])); masked = (high_rc >> 18) & MASK5; /* 25 */ - INCR_COUNT(counts[masked],inquery[masked]); + INCR_COUNT(counts[masked]); debug(printf("25 %04X => %d\n",masked,counts[masked])); masked = (high_rc >> 20) & MASK5; /* 26 */ - INCR_COUNT(counts[masked],inquery[masked]); + INCR_COUNT(counts[masked]); debug(printf("26 %04X => %d\n",masked,counts[masked])); masked = high_rc >> 22; /* 27, No mask necessary */ - INCR_COUNT(counts[masked],inquery[masked]); + INCR_COUNT(counts[masked]); debug(printf("27 %04X => %d\n",masked,counts[masked])); #else @@ -43607,19 +29256,19 @@ #endif masked = EXTRACT(_masked,0); - INCR_COUNT(counts[masked],inquery[masked]); + INCR_COUNT(counts[masked]); debug(printf("16 %04X => %d\n",masked,counts[masked])); masked = EXTRACT(_masked,1); - INCR_COUNT(counts[masked],inquery[masked]); + INCR_COUNT(counts[masked]); debug(printf("17 %04X => %d\n",masked,counts[masked])); masked = EXTRACT(_masked,2); - INCR_COUNT(counts[masked],inquery[masked]); + INCR_COUNT(counts[masked]); debug(printf("18 %04X => %d\n",masked,counts[masked])); masked = EXTRACT(_masked,3); - INCR_COUNT(counts[masked],inquery[masked]); + INCR_COUNT(counts[masked]); debug(printf("19 %04X => %d\n",masked,counts[masked])); @@ -43631,2383 +29280,764 @@ #endif masked = EXTRACT(_masked,0); - INCR_COUNT(counts[masked],inquery[masked]); + INCR_COUNT(counts[masked]); debug(printf("20 %04X => %d\n",masked,counts[masked])); masked = EXTRACT(_masked,1); - INCR_COUNT(counts[masked],inquery[masked]); + INCR_COUNT(counts[masked]); debug(printf("21 %04X => %d\n",masked,counts[masked])); masked = EXTRACT(_masked,2); - INCR_COUNT(counts[masked],inquery[masked]); - debug(printf("22 %04X => %d\n",masked,counts[masked])); - - masked = EXTRACT(_masked,3); - INCR_COUNT(counts[masked],inquery[masked]); - debug(printf("23 %04X => %d\n",masked,counts[masked])); - - - _oligo = _mm_srli_epi32(_oligo, 8); -#ifdef SIMD_MASK_THEN_STORE - _mm_store_si128((__m128i *) _masked,_mm_and_si128(_oligo, mask5)); -#else - _masked = _mm_and_si128(_oligo, mask5); -#endif - - masked = EXTRACT(_masked,0); - INCR_COUNT(counts[masked],inquery[masked]); - debug(printf("24 %04X => %d\n",masked,counts[masked])); - - masked = EXTRACT(_masked,1); - INCR_COUNT(counts[masked],inquery[masked]); - debug(printf("25 %04X => %d\n",masked,counts[masked])); - - masked = EXTRACT(_masked,2); - INCR_COUNT(counts[masked],inquery[masked]); - debug(printf("26 %04X => %d\n",masked,counts[masked])); - - masked = EXTRACT(_masked,3); - INCR_COUNT(counts[masked],inquery[masked]); - debug(printf("27 %04X => %d\n",masked,counts[masked])); -#endif - - - oligo = high_rc >> 24; /* For 31..28 */ - oligo |= nextlow_rc << 8; - -#ifdef INDIVIDUAL_SHIFTS - masked = oligo & MASK5; /* 28 */ - INCR_COUNT(counts[masked],inquery[masked]); - debug(printf("28 %04X => %d\n",masked,counts[masked])); - - masked = (oligo >> 2) & MASK5; /* 29 */ - INCR_COUNT(counts[masked],inquery[masked]); - debug(printf("29 %04X => %d\n",masked,counts[masked])); - - masked = (oligo >> 4) & MASK5; /* 30 */ - INCR_COUNT(counts[masked],inquery[masked]); - debug(printf("30 %04X => %d\n",masked,counts[masked])); - - masked = (oligo >> 6) & MASK5; /* 31 */ - INCR_COUNT(counts[masked],inquery[masked]); - debug(printf("31 %04X => %d\n",masked,counts[masked])); - -#else - _oligo = _mm_setr_epi32(oligo, oligo >> 2, oligo >> 4, oligo >> 6); -#ifdef SIMD_MASK_THEN_STORE - _mm_store_si128((__m128i *) _masked,_mm_and_si128(_oligo, mask5)); -#else - _masked = _mm_and_si128(_oligo, mask5); -#endif - - masked = EXTRACT(_masked,0); - INCR_COUNT(counts[masked],inquery[masked]); - debug(printf("28 %04X => %d\n",masked,counts[masked])); - - masked = EXTRACT(_masked,1); - INCR_COUNT(counts[masked],inquery[masked]); - debug(printf("29 %04X => %d\n",masked,counts[masked])); - - masked = EXTRACT(_masked,2); - INCR_COUNT(counts[masked],inquery[masked]); - debug(printf("30 %04X => %d\n",masked,counts[masked])); - - masked = EXTRACT(_masked,3); - INCR_COUNT(counts[masked],inquery[masked]); - debug(printf("31 %04X => %d\n",masked,counts[masked])); -#endif - - return; -} - -#else /* HAVE_AVX2 */ - -static void -count_5mers_rev (Count_T *counts, Inquery_T *inquery, Genomecomp_T low_rc, Genomecomp_T high_rc, Genomecomp_T nextlow_rc) { - Genomecomp_T masked, oligo; - __m256i _oligo, _masked; -#ifdef CHECK_FOR_OVERFLOW - __m256i _counts_after, _counts_neg; -#endif - - - _oligo = _mm256_srlv_epi32(_mm256_set1_epi32(low_rc),shift0to14); - _masked = _mm256_and_si256(_oligo, bigmask5); - - masked = EXTRACT256(_masked,0); - counts[masked] += 1; - debug(printf("0 %04X => %d\n",masked,counts[masked])); - - masked = EXTRACT256(_masked,1); - counts[masked] += 1; - debug(printf("1 %04X => %d\n",masked,counts[masked])); - - masked = EXTRACT256(_masked,2); - counts[masked] += 1; - debug(printf("2 %04X => %d\n",masked,counts[masked])); - - masked = EXTRACT256(_masked,3); - counts[masked] += 1; - debug(printf("3 %04X => %d\n",masked,counts[masked])); - - masked = EXTRACT256(_masked,4); - counts[masked] += 1; - debug(printf("4 %04X => %d\n",masked,counts[masked])); - - masked = EXTRACT256(_masked,5); - counts[masked] += 1; - debug(printf("5 %04X => %d\n",masked,counts[masked])); - - masked = EXTRACT256(_masked,6); - counts[masked] += 1; - debug(printf("6 %04X => %d\n",masked,counts[masked])); - - masked = EXTRACT256(_masked,7); - counts[masked] += 1; - debug(printf("7 %04X => %d\n",masked,counts[masked])); - -#ifdef CHECK_FOR_OVERFLOW - _counts_after = _mm256_i32gather_epi32(counts,_masked,/*shift*/4); - /* _counts_after = _mm256_and_si256(_counts_after,low8); */ - _counts_neg = _mm256_cmpgt_epi32(_counts_after,maxcount256); - if (_mm256_testz_si256(_counts_neg,_counts_neg) == 0) { - debug(printf("Exceeded maximum counts\n")); - if (EXTRACT256(_counts_neg,0)) { - inquery[EXTRACT256(_masked,0)] = INQUERY_FALSE; - } - if (EXTRACT256(_counts_neg,1)) { - inquery[EXTRACT256(_masked,1)] = INQUERY_FALSE; - } - if (EXTRACT256(_counts_neg,2)) { - inquery[EXTRACT256(_masked,2)] = INQUERY_FALSE; - } - if (EXTRACT256(_counts_neg,3)) { - inquery[EXTRACT256(_masked,3)] = INQUERY_FALSE; - } - if (EXTRACT256(_counts_neg,4)) { - inquery[EXTRACT256(_masked,4)] = INQUERY_FALSE; - } - if (EXTRACT256(_counts_neg,5)) { - inquery[EXTRACT256(_masked,5)] = INQUERY_FALSE; - } - if (EXTRACT256(_counts_neg,6)) { - inquery[EXTRACT256(_masked,6)] = INQUERY_FALSE; - } - if (EXTRACT256(_counts_neg,7)) { - inquery[EXTRACT256(_masked,7)] = INQUERY_FALSE; - } - } -#endif /* CHECK_FOR_OVERFLOW */ - - - _oligo = _mm256_srli_epi32(_oligo, 16); - _masked = _mm256_and_si256(_oligo, bigmask5); - - masked = EXTRACT256(_masked,0); - counts[masked] += 1; - debug(printf("8 %04X => %d\n",masked,counts[masked])); - - masked = EXTRACT256(_masked,1); - counts[masked] += 1; - debug(printf("9 %04X => %d\n",masked,counts[masked])); - - masked = EXTRACT256(_masked,2); - counts[masked] += 1; - debug(printf("10 %04X => %d\n",masked,counts[masked])); - - masked = EXTRACT256(_masked,3); - counts[masked] += 1; - debug(printf("11 %04X => %d\n",masked,counts[masked])); - -#ifdef CHECK_FOR_OVERFLOW - _counts_after = _mm256_i32gather_epi32(counts,_masked,/*shift*/4); - _counts_after = _mm256_and_si256(_counts_after,low4); - _counts_neg = _mm256_cmpgt_epi32(_counts_after,maxcount256); - if (_mm256_testz_si256(_counts_neg,_counts_neg) == 0) { - debug(printf("Exceeded maximum counts\n")); - if (EXTRACT256(_counts_neg,0)) { - inquery[EXTRACT256(_masked,0)] = INQUERY_FALSE; - } - if (EXTRACT256(_counts_neg,1)) { - inquery[EXTRACT256(_masked,1)] = INQUERY_FALSE; - } - if (EXTRACT256(_counts_neg,2)) { - inquery[EXTRACT256(_masked,2)] = INQUERY_FALSE; - } - if (EXTRACT256(_counts_neg,3)) { - inquery[EXTRACT256(_masked,3)] = INQUERY_FALSE; - } - } -#endif /* CHECK_FOR_OVERFLOW */ - - - oligo = low_rc >> 24; /* For 15..12 */ - oligo |= high_rc << 8; - - _oligo = _mm256_srlv_epi32(_mm256_set1_epi32(oligo),shift0to14); - _masked = _mm256_and_si256(_oligo, bigmask5); - - masked = EXTRACT256(_masked,0); - counts[masked] += 1; - debug(printf("12 %04X => %d\n",masked,counts[masked])); - - masked = EXTRACT256(_masked,1); - counts[masked] += 1; - debug(printf("13 %04X => %d\n",masked,counts[masked])); - - masked = EXTRACT256(_masked,2); - counts[masked] += 1; - debug(printf("14 %04X => %d\n",masked,counts[masked])); - - masked = EXTRACT256(_masked,3); - counts[masked] += 1; - debug(printf("15 %04X => %d\n",masked,counts[masked])); - -#ifdef CHECK_FOR_OVERFLOW - _counts_after = _mm256_i32gather_epi32(counts,_masked,/*shift*/4); - _counts_after = _mm256_and_si256(_counts_after,low4); - _counts_neg = _mm256_cmpgt_epi32(_counts_after,maxcount256); - if (_mm256_testz_si256(_counts_neg,_counts_neg) == 0) { - debug(printf("Exceeded maximum counts\n")); - if (EXTRACT256(_counts_neg,0)) { - inquery[EXTRACT256(_masked,0)] = INQUERY_FALSE; - } - if (EXTRACT256(_counts_neg,1)) { - inquery[EXTRACT256(_masked,1)] = INQUERY_FALSE; - } - if (EXTRACT256(_counts_neg,2)) { - inquery[EXTRACT256(_masked,2)] = INQUERY_FALSE; - } - if (EXTRACT256(_counts_neg,3)) { - inquery[EXTRACT256(_masked,3)] = INQUERY_FALSE; - } - } -#endif /* CHECK_FOR_OVERFLOW */ - - - _oligo = _mm256_srlv_epi32(_mm256_set1_epi32(high_rc),shift0to14); - _masked = _mm256_and_si256(_oligo, bigmask5); - - masked = EXTRACT256(_masked,0); - counts[masked] += 1; - debug(printf("16 %04X => %d\n",masked,counts[masked])); - - masked = EXTRACT256(_masked,1); - counts[masked] += 1; - debug(printf("17 %04X => %d\n",masked,counts[masked])); - - masked = EXTRACT256(_masked,2); - counts[masked] += 1; - debug(printf("18 %04X => %d\n",masked,counts[masked])); - - masked = EXTRACT256(_masked,3); - counts[masked] += 1; - debug(printf("19 %04X => %d\n",masked,counts[masked])); - - masked = EXTRACT256(_masked,4); - counts[masked] += 1; - debug(printf("20 %04X => %d\n",masked,counts[masked])); - - masked = EXTRACT256(_masked,5); - counts[masked] += 1; - debug(printf("21 %04X => %d\n",masked,counts[masked])); - - masked = EXTRACT256(_masked,6); - counts[masked] += 1; + INCR_COUNT(counts[masked]); debug(printf("22 %04X => %d\n",masked,counts[masked])); - masked = EXTRACT256(_masked,7); - counts[masked] += 1; - debug(printf("23 %04X => %d\n",masked,counts[masked])); - -#ifdef CHECK_FOR_OVERFLOW - _counts_after = _mm256_i32gather_epi32(counts,_masked,/*shift*/4); - /* _counts_after = _mm256_and_si256(_counts_after,low8); */ - _counts_neg = _mm256_cmpgt_epi32(_counts_after,maxcount256); - if (_mm256_testz_si256(_counts_neg,_counts_neg) == 0) { - debug(printf("Exceeded maximum counts\n")); - if (EXTRACT256(_counts_neg,0)) { - inquery[EXTRACT256(_masked,0)] = INQUERY_FALSE; - } - if (EXTRACT256(_counts_neg,1)) { - inquery[EXTRACT256(_masked,1)] = INQUERY_FALSE; - } - if (EXTRACT256(_counts_neg,2)) { - inquery[EXTRACT256(_masked,2)] = INQUERY_FALSE; - } - if (EXTRACT256(_counts_neg,3)) { - inquery[EXTRACT256(_masked,3)] = INQUERY_FALSE; - } - if (EXTRACT256(_counts_neg,4)) { - inquery[EXTRACT256(_masked,4)] = INQUERY_FALSE; - } - if (EXTRACT256(_counts_neg,5)) { - inquery[EXTRACT256(_masked,5)] = INQUERY_FALSE; - } - if (EXTRACT256(_counts_neg,6)) { - inquery[EXTRACT256(_masked,6)] = INQUERY_FALSE; - } - if (EXTRACT256(_counts_neg,7)) { - inquery[EXTRACT256(_masked,7)] = INQUERY_FALSE; - } - } -#endif /* CHECK_FOR_OVERFLOW */ + masked = EXTRACT(_masked,3); + INCR_COUNT(counts[masked]); + debug(printf("23 %04X => %d\n",masked,counts[masked])); - _oligo = _mm256_srli_epi32(_oligo, 16); - _masked = _mm256_and_si256(_oligo, bigmask5); + _oligo = _mm_srli_epi32(_oligo, 8); +#ifdef SIMD_MASK_THEN_STORE + _mm_store_si128((__m128i *) _masked,_mm_and_si128(_oligo, mask5)); +#else + _masked = _mm_and_si128(_oligo, mask5); +#endif - masked = EXTRACT256(_masked,0); - counts[masked] += 1; + masked = EXTRACT(_masked,0); + INCR_COUNT(counts[masked]); debug(printf("24 %04X => %d\n",masked,counts[masked])); - masked = EXTRACT256(_masked,1); - counts[masked] += 1; + masked = EXTRACT(_masked,1); + INCR_COUNT(counts[masked]); debug(printf("25 %04X => %d\n",masked,counts[masked])); - masked = EXTRACT256(_masked,2); - counts[masked] += 1; + masked = EXTRACT(_masked,2); + INCR_COUNT(counts[masked]); debug(printf("26 %04X => %d\n",masked,counts[masked])); - masked = EXTRACT256(_masked,3); - counts[masked] += 1; + masked = EXTRACT(_masked,3); + INCR_COUNT(counts[masked]); debug(printf("27 %04X => %d\n",masked,counts[masked])); - -#ifdef CHECK_FOR_OVERFLOW - _counts_after = _mm256_i32gather_epi32(counts,_masked,/*shift*/4); - _counts_after = _mm256_and_si256(_counts_after,low4); - _counts_neg = _mm256_cmpgt_epi32(_counts_after,maxcount256); - if (_mm256_testz_si256(_counts_neg,_counts_neg) == 0) { - debug(printf("Exceeded maximum counts\n")); - if (EXTRACT256(_counts_neg,0)) { - inquery[EXTRACT256(_masked,0)] = INQUERY_FALSE; - } - if (EXTRACT256(_counts_neg,1)) { - inquery[EXTRACT256(_masked,1)] = INQUERY_FALSE; - } - if (EXTRACT256(_counts_neg,2)) { - inquery[EXTRACT256(_masked,2)] = INQUERY_FALSE; - } - if (EXTRACT256(_counts_neg,3)) { - inquery[EXTRACT256(_masked,3)] = INQUERY_FALSE; - } - } -#endif /* CHECK_FOR_OVERFLOW */ +#endif oligo = high_rc >> 24; /* For 31..28 */ oligo |= nextlow_rc << 8; - _oligo = _mm256_srlv_epi32(_mm256_set1_epi32(oligo),shift0to14); - _masked = _mm256_and_si256(_oligo, bigmask5); - - masked = EXTRACT256(_masked,0); - counts[masked] += 1; +#ifdef INDIVIDUAL_SHIFTS + masked = oligo & MASK5; /* 28 */ + INCR_COUNT(counts[masked]); debug(printf("28 %04X => %d\n",masked,counts[masked])); - masked = EXTRACT256(_masked,1); - counts[masked] += 1; + masked = (oligo >> 2) & MASK5; /* 29 */ + INCR_COUNT(counts[masked]); debug(printf("29 %04X => %d\n",masked,counts[masked])); - masked = EXTRACT256(_masked,2); - counts[masked] += 1; + masked = (oligo >> 4) & MASK5; /* 30 */ + INCR_COUNT(counts[masked]); debug(printf("30 %04X => %d\n",masked,counts[masked])); - masked = EXTRACT256(_masked,3); - counts[masked] += 1; + masked = (oligo >> 6) & MASK5; /* 31 */ + INCR_COUNT(counts[masked]); debug(printf("31 %04X => %d\n",masked,counts[masked])); -#ifdef CHECK_FOR_OVERFLOW - _counts_after = _mm256_i32gather_epi32(counts,_masked,/*shift*/4); - _counts_after = _mm256_and_si256(_counts_after,low4); - _counts_neg = _mm256_cmpgt_epi32(_counts_after,maxcount256); - if (_mm256_testz_si256(_counts_neg,_counts_neg) == 0) { - debug(printf("Exceeded maximum counts\n")); - if (EXTRACT256(_counts_neg,0)) { - inquery[EXTRACT256(_masked,0)] = INQUERY_FALSE; - } - if (EXTRACT256(_counts_neg,1)) { - inquery[EXTRACT256(_masked,1)] = INQUERY_FALSE; - } - if (EXTRACT256(_counts_neg,2)) { - inquery[EXTRACT256(_masked,2)] = INQUERY_FALSE; - } - if (EXTRACT256(_counts_neg,3)) { - inquery[EXTRACT256(_masked,3)] = INQUERY_FALSE; - } - } -#endif /* CHECK_FOR_OVERFLOW */ - - return; -} - -#endif /* HAVE_AVX2 */ - +#else + _oligo = _mm_setr_epi32(oligo, oligo >> 2, oligo >> 4, oligo >> 6); +#ifdef SIMD_MASK_THEN_STORE + _mm_store_si128((__m128i *) _masked,_mm_and_si128(_oligo, mask5)); +#else + _masked = _mm_and_si128(_oligo, mask5); +#endif + masked = EXTRACT(_masked,0); + INCR_COUNT(counts[masked]); + debug(printf("28 %04X => %d\n",masked,counts[masked])); -/* Expecting current to have {low0_rc, high0_rc, low1_rc, high1_rc}, - and next to have {high0_rc, low1_rc, high1_rc, nextlow_rc} */ -#ifdef USE_SIMD_FOR_COUNTS -static void -extract_5mers_rev_simd (__m128i *out, __m128i current, __m128i next) { - __m128i oligo; + masked = EXTRACT(_masked,1); + INCR_COUNT(counts[masked]); + debug(printf("29 %04X => %d\n",masked,counts[masked])); - oligo = _mm_or_si128( _mm_srli_epi32(current,24), _mm_slli_epi32(next,8)); - _mm_store_si128(out++, _mm_and_si128( _mm_srli_epi32(oligo,6), mask5)); - _mm_store_si128(out++, _mm_and_si128( _mm_srli_epi32(oligo,4), mask5)); - _mm_store_si128(out++, _mm_and_si128( _mm_srli_epi32(oligo,2), mask5)); - _mm_store_si128(out++, _mm_and_si128( oligo, mask5)); + masked = EXTRACT(_masked,2); + INCR_COUNT(counts[masked]); + debug(printf("30 %04X => %d\n",masked,counts[masked])); - _mm_store_si128(out++, _mm_srli_epi32(current,22)); - _mm_store_si128(out++, _mm_and_si128( _mm_srli_epi32(current,20), mask5)); - _mm_store_si128(out++, _mm_and_si128( _mm_srli_epi32(current,18), mask5)); - _mm_store_si128(out++, _mm_and_si128( _mm_srli_epi32(current,16), mask5)); - _mm_store_si128(out++, _mm_and_si128( _mm_srli_epi32(current,14), mask5)); - _mm_store_si128(out++, _mm_and_si128( _mm_srli_epi32(current,12), mask5)); - _mm_store_si128(out++, _mm_and_si128( _mm_srli_epi32(current,10), mask5)); - _mm_store_si128(out++, _mm_and_si128( _mm_srli_epi32(current,8), mask5)); - _mm_store_si128(out++, _mm_and_si128( _mm_srli_epi32(current,6), mask5)); - _mm_store_si128(out++, _mm_and_si128( _mm_srli_epi32(current,4), mask5)); - _mm_store_si128(out++, _mm_and_si128( _mm_srli_epi32(current,2), mask5)); - _mm_store_si128(out++, _mm_and_si128( current, mask5)); + masked = EXTRACT(_masked,3); + INCR_COUNT(counts[masked]); + debug(printf("31 %04X => %d\n",masked,counts[masked])); +#endif return; } -#ifdef HAVE_AVX2 -static void -extract_5mers_rev_simd_128 (__m256i *out, __m256i current, __m256i next) { - __m256i oligo; +#else /* HAVE_AVX2 */ - oligo = _mm256_or_si256( _mm256_srli_epi32(current,24), _mm256_slli_epi32(next,8)); - _mm256_store_si256(out++, _mm256_and_si256( _mm256_srli_epi32(oligo,6), bigmask5)); - _mm256_store_si256(out++, _mm256_and_si256( _mm256_srli_epi32(oligo,4), bigmask5)); - _mm256_store_si256(out++, _mm256_and_si256( _mm256_srli_epi32(oligo,2), bigmask5)); - _mm256_store_si256(out++, _mm256_and_si256( oligo, bigmask5)); +static void +count_5mers_rev_32 (Count_T *counts, Genomecomp_T low_rc, Genomecomp_T high_rc, Genomecomp_T nextlow_rc) { + Genomecomp_T masked, oligo; + __m256i _oligo, _masked; - _mm256_store_si256(out++, _mm256_srli_epi32(current,22)); - _mm256_store_si256(out++, _mm256_and_si256( _mm256_srli_epi32(current,20), bigmask5)); - _mm256_store_si256(out++, _mm256_and_si256( _mm256_srli_epi32(current,18), bigmask5)); - _mm256_store_si256(out++, _mm256_and_si256( _mm256_srli_epi32(current,16), bigmask5)); - _mm256_store_si256(out++, _mm256_and_si256( _mm256_srli_epi32(current,14), bigmask5)); - _mm256_store_si256(out++, _mm256_and_si256( _mm256_srli_epi32(current,12), bigmask5)); - _mm256_store_si256(out++, _mm256_and_si256( _mm256_srli_epi32(current,10), bigmask5)); - _mm256_store_si256(out++, _mm256_and_si256( _mm256_srli_epi32(current,8), bigmask5)); - _mm256_store_si256(out++, _mm256_and_si256( _mm256_srli_epi32(current,6), bigmask5)); - _mm256_store_si256(out++, _mm256_and_si256( _mm256_srli_epi32(current,4), bigmask5)); - _mm256_store_si256(out++, _mm256_and_si256( _mm256_srli_epi32(current,2), bigmask5)); - _mm256_store_si256(out++, _mm256_and_si256( current, bigmask5)); - return; -} -#endif + _oligo = _mm256_srlv_epi32(_mm256_set1_epi32(low_rc),bigshift0to14); + _masked = _mm256_and_si256(_oligo, bigmask5); + masked = EXTRACT256(_masked,0); + INCR_COUNT(counts[masked]); + debug(printf("0 %04X => %d\n",masked,counts[masked])); -static void -count_5mers_rev_simd (Count_T *counts, Inquery_T *inquery, __m128i current, __m128i next) { - __m128i oligo; -#ifdef HAVE_SSE4_1 - __m128i array; -#else - Genomecomp_T array[4]; -#endif -#if defined(HAVE_AVX2) && defined(CHECK_FOR_OVERFLOW) - __m128i _counts_after, _counts_neg; -#endif + masked = EXTRACT256(_masked,1); + INCR_COUNT(counts[masked]); + debug(printf("1 %04X => %d\n",masked,counts[masked])); - oligo = _mm_or_si128( _mm_srli_epi32(current,24), _mm_slli_epi32(next,8)); -#ifdef HAVE_SSE4_1 - array = _mm_and_si128( _mm_srli_epi32(oligo,6), mask5); -#else - _mm_store_si128((__m128i *) array, _mm_and_si128( _mm_srli_epi32(oligo,6), mask5)); -#endif -#ifdef HAVE_AVX2 - counts[EXTRACT(array,0)] += 1; - counts[EXTRACT(array,1)] += 1; - counts[EXTRACT(array,2)] += 1; - counts[EXTRACT(array,3)] += 1; -#ifdef CHECK_FOR_OVERFLOW - _counts_after = _mm_i32gather_epi32(counts,array,/*scale*/4); - _counts_neg = _mm_cmpgt_epi32(_counts_after,maxcount128); - if (_mm_testz_si128(_counts_neg,_counts_neg) == 0) { - debug(printf("Exceeded maximum counts\n")); - if (EXTRACT(_counts_neg,0)) { - inquery[EXTRACT(array,0)] = INQUERY_FALSE; - } - if (EXTRACT(_counts_neg,1)) { - inquery[EXTRACT(array,1)] = INQUERY_FALSE; - } - if (EXTRACT(_counts_neg,2)) { - inquery[EXTRACT(array,2)] = INQUERY_FALSE; - } - if (EXTRACT(_counts_neg,3)) { - inquery[EXTRACT(array,3)] = INQUERY_FALSE; - } - } -#endif /* CHECK_FOR_OVERFLOW */ - debug(printf("63 %04X => %d\n",array[0],counts[array[0]])); - debug(printf("47 %04X => %d\n",array[1],counts[array[1]])); - debug(printf("31 %04X => %d\n",array[2],counts[array[2]])); - debug(printf("15 %04X => %d\n",array[3],counts[array[3]])); - -#else - INCR_COUNT(counts[EXTRACT(array,0)],inquery[EXTRACT(array,0)]); /* 63 */ - INCR_COUNT(counts[EXTRACT(array,1)],inquery[EXTRACT(array,1)]); /* 47 */ - INCR_COUNT(counts[EXTRACT(array,2)],inquery[EXTRACT(array,2)]); /* 31 */ - INCR_COUNT(counts[EXTRACT(array,3)],inquery[EXTRACT(array,3)]); /* 15 */ -#endif - debug(printf("63 %04X => %d\n",EXTRACT(array,0),counts[EXTRACT(array,0)])); - debug(printf("47 %04X => %d\n",EXTRACT(array,1),counts[EXTRACT(array,1)])); - debug(printf("31 %04X => %d\n",EXTRACT(array,2),counts[EXTRACT(array,2)])); - debug(printf("15 %04X => %d\n",EXTRACT(array,3),counts[EXTRACT(array,3)])); + masked = EXTRACT256(_masked,2); + INCR_COUNT(counts[masked]); + debug(printf("2 %04X => %d\n",masked,counts[masked])); -#ifdef HAVE_SSE4_1 - array = _mm_and_si128( _mm_srli_epi32(oligo,4), mask5); -#else - _mm_store_si128((__m128i *) array, _mm_and_si128( _mm_srli_epi32(oligo,4), mask5)); -#endif -#ifdef HAVE_AVX2 - counts[EXTRACT(array,0)] += 1; - counts[EXTRACT(array,1)] += 1; - counts[EXTRACT(array,2)] += 1; - counts[EXTRACT(array,3)] += 1; -#ifdef CHECK_FOR_OVERFLOW - _counts_after = _mm_i32gather_epi32(counts,array,/*scale*/4); - _counts_neg = _mm_cmpgt_epi32(_counts_after,maxcount128); - if (_mm_testz_si128(_counts_neg,_counts_neg) == 0) { - debug(printf("Exceeded maximum counts\n")); - if (EXTRACT(_counts_neg,0)) { - inquery[EXTRACT(array,0)] = INQUERY_FALSE; - } - if (EXTRACT(_counts_neg,1)) { - inquery[EXTRACT(array,1)] = INQUERY_FALSE; - } - if (EXTRACT(_counts_neg,2)) { - inquery[EXTRACT(array,2)] = INQUERY_FALSE; - } - if (EXTRACT(_counts_neg,3)) { - inquery[EXTRACT(array,3)] = INQUERY_FALSE; - } - } -#endif /* CHECK_FOR_OVERFLOW */ - debug(printf("62 %04X => %d\n",array[0],counts[array[0]])); - debug(printf("46 %04X => %d\n",array[1],counts[array[1]])); - debug(printf("30 %04X => %d\n",array[2],counts[array[2]])); - debug(printf("14 %04X => %d\n",array[3],counts[array[3]])); - -#else - INCR_COUNT(counts[EXTRACT(array,0)],inquery[EXTRACT(array,0)]); /* 62 */ - INCR_COUNT(counts[EXTRACT(array,1)],inquery[EXTRACT(array,1)]); /* 46 */ - INCR_COUNT(counts[EXTRACT(array,2)],inquery[EXTRACT(array,2)]); /* 30 */ - INCR_COUNT(counts[EXTRACT(array,3)],inquery[EXTRACT(array,3)]); /* 14 */ -#endif - debug(printf("62 %04X => %d\n",EXTRACT(array,0),counts[EXTRACT(array,0)])); - debug(printf("46 %04X => %d\n",EXTRACT(array,1),counts[EXTRACT(array,1)])); - debug(printf("30 %04X => %d\n",EXTRACT(array,2),counts[EXTRACT(array,2)])); - debug(printf("14 %04X => %d\n",EXTRACT(array,3),counts[EXTRACT(array,3)])); + masked = EXTRACT256(_masked,3); + INCR_COUNT(counts[masked]); + debug(printf("3 %04X => %d\n",masked,counts[masked])); -#ifdef HAVE_SSE4_1 - array = _mm_and_si128( _mm_srli_epi32(oligo,2), mask5); -#else - _mm_store_si128((__m128i *) array, _mm_and_si128( _mm_srli_epi32(oligo,2), mask5)); -#endif -#ifdef HAVE_AVX2 - counts[EXTRACT(array,0)] += 1; - counts[EXTRACT(array,1)] += 1; - counts[EXTRACT(array,2)] += 1; - counts[EXTRACT(array,3)] += 1; -#ifdef CHECK_FOR_OVERFLOW - _counts_after = _mm_i32gather_epi32(counts,array,/*scale*/4); - _counts_neg = _mm_cmpgt_epi32(_counts_after,maxcount128); - if (_mm_testz_si128(_counts_neg,_counts_neg) == 0) { - debug(printf("Exceeded maximum counts\n")); - if (EXTRACT(_counts_neg,0)) { - inquery[EXTRACT(array,0)] = INQUERY_FALSE; - } - if (EXTRACT(_counts_neg,1)) { - inquery[EXTRACT(array,1)] = INQUERY_FALSE; - } - if (EXTRACT(_counts_neg,2)) { - inquery[EXTRACT(array,2)] = INQUERY_FALSE; - } - if (EXTRACT(_counts_neg,3)) { - inquery[EXTRACT(array,3)] = INQUERY_FALSE; - } - } -#endif /* CHECK_FOR_OVERFLOW */ - debug(printf("61 %04X => %d\n",array[0],counts[array[0]])); - debug(printf("45 %04X => %d\n",array[1],counts[array[1]])); - debug(printf("29 %04X => %d\n",array[2],counts[array[2]])); - debug(printf("13 %04X => %d\n",array[3],counts[array[3]])); - -#else - INCR_COUNT(counts[EXTRACT(array,0)],inquery[EXTRACT(array,0)]); /* 61 */ - INCR_COUNT(counts[EXTRACT(array,1)],inquery[EXTRACT(array,1)]); /* 45 */ - INCR_COUNT(counts[EXTRACT(array,2)],inquery[EXTRACT(array,2)]); /* 29 */ - INCR_COUNT(counts[EXTRACT(array,3)],inquery[EXTRACT(array,3)]); /* 13 */ -#endif - debug(printf("61 %04X => %d\n",EXTRACT(array,0),counts[EXTRACT(array,0)])); - debug(printf("45 %04X => %d\n",EXTRACT(array,1),counts[EXTRACT(array,1)])); - debug(printf("29 %04X => %d\n",EXTRACT(array,2),counts[EXTRACT(array,2)])); - debug(printf("13 %04X => %d\n",EXTRACT(array,3),counts[EXTRACT(array,3)])); + masked = EXTRACT256(_masked,4); + INCR_COUNT(counts[masked]); + debug(printf("4 %04X => %d\n",masked,counts[masked])); -#ifdef HAVE_SSE4_1 - array = _mm_and_si128( oligo, mask5); -#else - _mm_store_si128((__m128i *) array, _mm_and_si128( oligo, mask5)); -#endif -#ifdef HAVE_AVX2 - counts[EXTRACT(array,0)] += 1; - counts[EXTRACT(array,1)] += 1; - counts[EXTRACT(array,2)] += 1; - counts[EXTRACT(array,3)] += 1; -#ifdef CHECK_FOR_OVERFLOW - _counts_after = _mm_i32gather_epi32(counts,array,/*scale*/4); - _counts_neg = _mm_cmpgt_epi32(_counts_after,maxcount128); - if (_mm_testz_si128(_counts_neg,_counts_neg) == 0) { - debug(printf("Exceeded maximum counts\n")); - if (EXTRACT(_counts_neg,0)) { - inquery[EXTRACT(array,0)] = INQUERY_FALSE; - } - if (EXTRACT(_counts_neg,1)) { - inquery[EXTRACT(array,1)] = INQUERY_FALSE; - } - if (EXTRACT(_counts_neg,2)) { - inquery[EXTRACT(array,2)] = INQUERY_FALSE; - } - if (EXTRACT(_counts_neg,3)) { - inquery[EXTRACT(array,3)] = INQUERY_FALSE; - } - } -#endif /* CHECK_FOR_OVERFLOW */ - debug(printf("60 %04X => %d\n",array[0],counts[array[0]])); - debug(printf("44 %04X => %d\n",array[1],counts[array[1]])); - debug(printf("28 %04X => %d\n",array[2],counts[array[2]])); - debug(printf("12 %04X => %d\n",array[3],counts[array[3]])); - -#else - INCR_COUNT(counts[EXTRACT(array,0)],inquery[EXTRACT(array,0)]); /* 60 */ - INCR_COUNT(counts[EXTRACT(array,1)],inquery[EXTRACT(array,1)]); /* 44 */ - INCR_COUNT(counts[EXTRACT(array,2)],inquery[EXTRACT(array,2)]); /* 28 */ - INCR_COUNT(counts[EXTRACT(array,3)],inquery[EXTRACT(array,3)]); /* 12 */ -#endif - debug(printf("60 %04X => %d\n",EXTRACT(array,0),counts[EXTRACT(array,0)])); - debug(printf("44 %04X => %d\n",EXTRACT(array,1),counts[EXTRACT(array,1)])); - debug(printf("28 %04X => %d\n",EXTRACT(array,2),counts[EXTRACT(array,2)])); - debug(printf("12 %04X => %d\n",EXTRACT(array,3),counts[EXTRACT(array,3)])); + masked = EXTRACT256(_masked,5); + INCR_COUNT(counts[masked]); + debug(printf("5 %04X => %d\n",masked,counts[masked])); + masked = EXTRACT256(_masked,6); + INCR_COUNT(counts[masked]); + debug(printf("6 %04X => %d\n",masked,counts[masked])); -#ifdef HAVE_SSE4_1 - array = _mm_srli_epi32(current,22); -#else - _mm_store_si128((__m128i *) array, _mm_srli_epi32(current,22)); -#endif -#ifdef HAVE_AVX2 - counts[EXTRACT(array,0)] += 1; - counts[EXTRACT(array,1)] += 1; - counts[EXTRACT(array,2)] += 1; - counts[EXTRACT(array,3)] += 1; -#ifdef CHECK_FOR_OVERFLOW - _counts_after = _mm_i32gather_epi32(counts,array,/*scale*/4); - _counts_neg = _mm_cmpgt_epi32(_counts_after,maxcount128); - if (_mm_testz_si128(_counts_neg,_counts_neg) == 0) { - debug(printf("Exceeded maximum counts\n")); - if (EXTRACT(_counts_neg,0)) { - inquery[EXTRACT(array,0)] = INQUERY_FALSE; - } - if (EXTRACT(_counts_neg,1)) { - inquery[EXTRACT(array,1)] = INQUERY_FALSE; - } - if (EXTRACT(_counts_neg,2)) { - inquery[EXTRACT(array,2)] = INQUERY_FALSE; - } - if (EXTRACT(_counts_neg,3)) { - inquery[EXTRACT(array,3)] = INQUERY_FALSE; - } - } -#endif /* CHECK_FOR_OVERFLOW */ - debug(printf("59 %04X => %d\n",array[0],counts[array[0]])); - debug(printf("43 %04X => %d\n",array[1],counts[array[1]])); - debug(printf("27 %04X => %d\n",array[2],counts[array[2]])); - debug(printf("11 %04X => %d\n",array[3],counts[array[3]])); - -#else - INCR_COUNT(counts[EXTRACT(array,0)],inquery[EXTRACT(array,0)]); /* 59 */ - INCR_COUNT(counts[EXTRACT(array,1)],inquery[EXTRACT(array,1)]); /* 43 */ - INCR_COUNT(counts[EXTRACT(array,2)],inquery[EXTRACT(array,2)]); /* 27 */ - INCR_COUNT(counts[EXTRACT(array,3)],inquery[EXTRACT(array,3)]); /* 11 */ -#endif - debug(printf("59 %04X => %d\n",EXTRACT(array,0),counts[EXTRACT(array,0)])); - debug(printf("43 %04X => %d\n",EXTRACT(array,1),counts[EXTRACT(array,1)])); - debug(printf("27 %04X => %d\n",EXTRACT(array,2),counts[EXTRACT(array,2)])); - debug(printf("11 %04X => %d\n",EXTRACT(array,3),counts[EXTRACT(array,3)])); + masked = EXTRACT256(_masked,7); + INCR_COUNT(counts[masked]); + debug(printf("7 %04X => %d\n",masked,counts[masked])); -#ifdef HAVE_SSE4_1 - array = _mm_and_si128( _mm_srli_epi32(current,20), mask5); -#else - _mm_store_si128((__m128i *) array, _mm_and_si128( _mm_srli_epi32(current,20), mask5)); -#endif -#ifdef HAVE_AVX2 - counts[EXTRACT(array,0)] += 1; - counts[EXTRACT(array,1)] += 1; - counts[EXTRACT(array,2)] += 1; - counts[EXTRACT(array,3)] += 1; -#ifdef CHECK_FOR_OVERFLOW - _counts_after = _mm_i32gather_epi32(counts,array,/*scale*/4); - _counts_neg = _mm_cmpgt_epi32(_counts_after,maxcount128); - if (_mm_testz_si128(_counts_neg,_counts_neg) == 0) { - debug(printf("Exceeded maximum counts\n")); - if (EXTRACT(_counts_neg,0)) { - inquery[EXTRACT(array,0)] = INQUERY_FALSE; - } - if (EXTRACT(_counts_neg,1)) { - inquery[EXTRACT(array,1)] = INQUERY_FALSE; - } - if (EXTRACT(_counts_neg,2)) { - inquery[EXTRACT(array,2)] = INQUERY_FALSE; - } - if (EXTRACT(_counts_neg,3)) { - inquery[EXTRACT(array,3)] = INQUERY_FALSE; - } - } -#endif /* CHECK_FOR_OVERFLOW */ - debug(printf("58 %04X => %d\n",array[0],counts[array[0]])); - debug(printf("42 %04X => %d\n",array[1],counts[array[1]])); - debug(printf("26 %04X => %d\n",array[2],counts[array[2]])); - debug(printf("10 %04X => %d\n",array[3],counts[array[3]])); - -#else - INCR_COUNT(counts[EXTRACT(array,0)],inquery[EXTRACT(array,0)]); /* 58 */ - INCR_COUNT(counts[EXTRACT(array,1)],inquery[EXTRACT(array,1)]); /* 42 */ - INCR_COUNT(counts[EXTRACT(array,2)],inquery[EXTRACT(array,2)]); /* 26 */ - INCR_COUNT(counts[EXTRACT(array,3)],inquery[EXTRACT(array,3)]); /* 10 */ -#endif - debug(printf("58 %04X => %d\n",EXTRACT(array,0),counts[EXTRACT(array,0)])); - debug(printf("42 %04X => %d\n",EXTRACT(array,1),counts[EXTRACT(array,1)])); - debug(printf("26 %04X => %d\n",EXTRACT(array,2),counts[EXTRACT(array,2)])); - debug(printf("10 %04X => %d\n",EXTRACT(array,3),counts[EXTRACT(array,3)])); -#ifdef HAVE_SSE4_1 - array = _mm_and_si128( _mm_srli_epi32(current,18), mask5); -#else - _mm_store_si128((__m128i *) array, _mm_and_si128( _mm_srli_epi32(current,18), mask5)); -#endif -#ifdef HAVE_AVX2 - counts[EXTRACT(array,0)] += 1; - counts[EXTRACT(array,1)] += 1; - counts[EXTRACT(array,2)] += 1; - counts[EXTRACT(array,3)] += 1; -#ifdef CHECK_FOR_OVERFLOW - _counts_after = _mm_i32gather_epi32(counts,array,/*scale*/4); - _counts_neg = _mm_cmpgt_epi32(_counts_after,maxcount128); - if (_mm_testz_si128(_counts_neg,_counts_neg) == 0) { - debug(printf("Exceeded maximum counts\n")); - if (EXTRACT(_counts_neg,0)) { - inquery[EXTRACT(array,0)] = INQUERY_FALSE; - } - if (EXTRACT(_counts_neg,1)) { - inquery[EXTRACT(array,1)] = INQUERY_FALSE; - } - if (EXTRACT(_counts_neg,2)) { - inquery[EXTRACT(array,2)] = INQUERY_FALSE; - } - if (EXTRACT(_counts_neg,3)) { - inquery[EXTRACT(array,3)] = INQUERY_FALSE; - } - } -#endif /* CHECK_FOR_OVERFLOW */ - debug(printf("57 %04X => %d\n",array[0],counts[array[0]])); - debug(printf("41 %04X => %d\n",array[1],counts[array[1]])); - debug(printf("25 %04X => %d\n",array[2],counts[array[2]])); - debug(printf("9 %04X => %d\n",array[3],counts[array[3]])); - -#else - INCR_COUNT(counts[EXTRACT(array,0)],inquery[EXTRACT(array,0)]); /* 57 */ - INCR_COUNT(counts[EXTRACT(array,1)],inquery[EXTRACT(array,1)]); /* 41 */ - INCR_COUNT(counts[EXTRACT(array,2)],inquery[EXTRACT(array,2)]); /* 25 */ - INCR_COUNT(counts[EXTRACT(array,3)],inquery[EXTRACT(array,3)]); /* 9 */ -#endif - debug(printf("57 %04X => %d\n",EXTRACT(array,0),counts[EXTRACT(array,0)])); - debug(printf("41 %04X => %d\n",EXTRACT(array,1),counts[EXTRACT(array,1)])); - debug(printf("25 %04X => %d\n",EXTRACT(array,2),counts[EXTRACT(array,2)])); - debug(printf("9 %04X => %d\n",EXTRACT(array,3),counts[EXTRACT(array,3)])); + _oligo = _mm256_srli_epi32(_oligo, 16); + _masked = _mm256_and_si256(_oligo, bigmask5); -#ifdef HAVE_SSE4_1 - array = _mm_and_si128( _mm_srli_epi32(current,16), mask5); -#else - _mm_store_si128((__m128i *) array, _mm_and_si128( _mm_srli_epi32(current,16), mask5)); -#endif -#ifdef HAVE_AVX2 - counts[EXTRACT(array,0)] += 1; - counts[EXTRACT(array,1)] += 1; - counts[EXTRACT(array,2)] += 1; - counts[EXTRACT(array,3)] += 1; -#ifdef CHECK_FOR_OVERFLOW - _counts_after = _mm_i32gather_epi32(counts,array,/*scale*/4); - _counts_neg = _mm_cmpgt_epi32(_counts_after,maxcount128); - if (_mm_testz_si128(_counts_neg,_counts_neg) == 0) { - debug(printf("Exceeded maximum counts\n")); - if (EXTRACT(_counts_neg,0)) { - inquery[EXTRACT(array,0)] = INQUERY_FALSE; - } - if (EXTRACT(_counts_neg,1)) { - inquery[EXTRACT(array,1)] = INQUERY_FALSE; - } - if (EXTRACT(_counts_neg,2)) { - inquery[EXTRACT(array,2)] = INQUERY_FALSE; - } - if (EXTRACT(_counts_neg,3)) { - inquery[EXTRACT(array,3)] = INQUERY_FALSE; - } - } -#endif /* CHECK_FOR_OVERFLOW */ - debug(printf("56 %04X => %d\n",array[0],counts[array[0]])); - debug(printf("40 %04X => %d\n",array[1],counts[array[1]])); - debug(printf("24 %04X => %d\n",array[2],counts[array[2]])); - debug(printf("8 %04X => %d\n",array[3],counts[array[3]])); - -#else - INCR_COUNT(counts[EXTRACT(array,0)],inquery[EXTRACT(array,0)]); /* 56 */ - INCR_COUNT(counts[EXTRACT(array,1)],inquery[EXTRACT(array,1)]); /* 40 */ - INCR_COUNT(counts[EXTRACT(array,2)],inquery[EXTRACT(array,2)]); /* 24 */ - INCR_COUNT(counts[EXTRACT(array,3)],inquery[EXTRACT(array,3)]); /* 8 */ -#endif - debug(printf("56 %04X => %d\n",EXTRACT(array,0),counts[EXTRACT(array,0)])); - debug(printf("40 %04X => %d\n",EXTRACT(array,1),counts[EXTRACT(array,1)])); - debug(printf("24 %04X => %d\n",EXTRACT(array,2),counts[EXTRACT(array,2)])); - debug(printf("8 %04X => %d\n",EXTRACT(array,3),counts[EXTRACT(array,3)])); + masked = EXTRACT256(_masked,0); + INCR_COUNT(counts[masked]); + debug(printf("8 %04X => %d\n",masked,counts[masked])); -#ifdef HAVE_SSE4_1 - array = _mm_and_si128( _mm_srli_epi32(current,14), mask5); -#else - _mm_store_si128((__m128i *) array, _mm_and_si128( _mm_srli_epi32(current,14), mask5)); -#endif -#ifdef HAVE_AVX2 - counts[EXTRACT(array,0)] += 1; - counts[EXTRACT(array,1)] += 1; - counts[EXTRACT(array,2)] += 1; - counts[EXTRACT(array,3)] += 1; -#ifdef CHECK_FOR_OVERFLOW - _counts_after = _mm_i32gather_epi32(counts,array,/*scale*/4); - _counts_neg = _mm_cmpgt_epi32(_counts_after,maxcount128); - if (_mm_testz_si128(_counts_neg,_counts_neg) == 0) { - debug(printf("Exceeded maximum counts\n")); - if (EXTRACT(_counts_neg,0)) { - inquery[EXTRACT(array,0)] = INQUERY_FALSE; - } - if (EXTRACT(_counts_neg,1)) { - inquery[EXTRACT(array,1)] = INQUERY_FALSE; - } - if (EXTRACT(_counts_neg,2)) { - inquery[EXTRACT(array,2)] = INQUERY_FALSE; - } - if (EXTRACT(_counts_neg,3)) { - inquery[EXTRACT(array,3)] = INQUERY_FALSE; - } - } -#endif /* CHECK_FOR_OVERFLOW */ - debug(printf("55 %04X => %d\n",array[0],counts[array[0]])); - debug(printf("39 %04X => %d\n",array[1],counts[array[1]])); - debug(printf("23 %04X => %d\n",array[2],counts[array[2]])); - debug(printf("7 %04X => %d\n",array[3],counts[array[3]])); - -#else - INCR_COUNT(counts[EXTRACT(array,0)],inquery[EXTRACT(array,0)]); /* 55 */ - INCR_COUNT(counts[EXTRACT(array,1)],inquery[EXTRACT(array,1)]); /* 39 */ - INCR_COUNT(counts[EXTRACT(array,2)],inquery[EXTRACT(array,2)]); /* 23 */ - INCR_COUNT(counts[EXTRACT(array,3)],inquery[EXTRACT(array,3)]); /* 7 */ -#endif - debug(printf("55 %04X => %d\n",EXTRACT(array,0),counts[EXTRACT(array,0)])); - debug(printf("39 %04X => %d\n",EXTRACT(array,1),counts[EXTRACT(array,1)])); - debug(printf("23 %04X => %d\n",EXTRACT(array,2),counts[EXTRACT(array,2)])); - debug(printf("7 %04X => %d\n",EXTRACT(array,3),counts[EXTRACT(array,3)])); + masked = EXTRACT256(_masked,1); + INCR_COUNT(counts[masked]); + debug(printf("9 %04X => %d\n",masked,counts[masked])); -#ifdef HAVE_SSE4_1 - array = _mm_and_si128( _mm_srli_epi32(current,12), mask5); -#else - _mm_store_si128((__m128i *) array, _mm_and_si128( _mm_srli_epi32(current,12), mask5)); -#endif -#ifdef HAVE_AVX2 - counts[EXTRACT(array,0)] += 1; - counts[EXTRACT(array,1)] += 1; - counts[EXTRACT(array,2)] += 1; - counts[EXTRACT(array,3)] += 1; -#ifdef CHECK_FOR_OVERFLOW - _counts_after = _mm_i32gather_epi32(counts,array,/*scale*/4); - _counts_neg = _mm_cmpgt_epi32(_counts_after,maxcount128); - if (_mm_testz_si128(_counts_neg,_counts_neg) == 0) { - debug(printf("Exceeded maximum counts\n")); - if (EXTRACT(_counts_neg,0)) { - inquery[EXTRACT(array,0)] = INQUERY_FALSE; - } - if (EXTRACT(_counts_neg,1)) { - inquery[EXTRACT(array,1)] = INQUERY_FALSE; - } - if (EXTRACT(_counts_neg,2)) { - inquery[EXTRACT(array,2)] = INQUERY_FALSE; - } - if (EXTRACT(_counts_neg,3)) { - inquery[EXTRACT(array,3)] = INQUERY_FALSE; - } - } -#endif /* CHECK_FOR_OVERFLOW */ - debug(printf("54 %04X => %d\n",array[0],counts[array[0]])); - debug(printf("38 %04X => %d\n",array[1],counts[array[1]])); - debug(printf("22 %04X => %d\n",array[2],counts[array[2]])); - debug(printf("6 %04X => %d\n",array[3],counts[array[3]])); - -#else - INCR_COUNT(counts[EXTRACT(array,0)],inquery[EXTRACT(array,0)]); /* 54 */ - INCR_COUNT(counts[EXTRACT(array,1)],inquery[EXTRACT(array,1)]); /* 38 */ - INCR_COUNT(counts[EXTRACT(array,2)],inquery[EXTRACT(array,2)]); /* 22 */ - INCR_COUNT(counts[EXTRACT(array,3)],inquery[EXTRACT(array,3)]); /* 6 */ -#endif - debug(printf("54 %04X => %d\n",EXTRACT(array,0),counts[EXTRACT(array,0)])); - debug(printf("38 %04X => %d\n",EXTRACT(array,1),counts[EXTRACT(array,1)])); - debug(printf("22 %04X => %d\n",EXTRACT(array,2),counts[EXTRACT(array,2)])); - debug(printf("6 %04X => %d\n",EXTRACT(array,3),counts[EXTRACT(array,3)])); + masked = EXTRACT256(_masked,2); + INCR_COUNT(counts[masked]); + debug(printf("10 %04X => %d\n",masked,counts[masked])); -#ifdef HAVE_SSE4_1 - array = _mm_and_si128( _mm_srli_epi32(current,10), mask5); -#else - _mm_store_si128((__m128i *) array, _mm_and_si128( _mm_srli_epi32(current,10), mask5)); -#endif -#ifdef HAVE_AVX2 - counts[EXTRACT(array,0)] += 1; - counts[EXTRACT(array,1)] += 1; - counts[EXTRACT(array,2)] += 1; - counts[EXTRACT(array,3)] += 1; -#ifdef CHECK_FOR_OVERFLOW - _counts_after = _mm_i32gather_epi32(counts,array,/*scale*/4); - _counts_neg = _mm_cmpgt_epi32(_counts_after,maxcount128); - if (_mm_testz_si128(_counts_neg,_counts_neg) == 0) { - debug(printf("Exceeded maximum counts\n")); - if (EXTRACT(_counts_neg,0)) { - inquery[EXTRACT(array,0)] = INQUERY_FALSE; - } - if (EXTRACT(_counts_neg,1)) { - inquery[EXTRACT(array,1)] = INQUERY_FALSE; - } - if (EXTRACT(_counts_neg,2)) { - inquery[EXTRACT(array,2)] = INQUERY_FALSE; - } - if (EXTRACT(_counts_neg,3)) { - inquery[EXTRACT(array,3)] = INQUERY_FALSE; - } - } -#endif /* CHECK_FOR_OVERFLOW */ - debug(printf("53 %04X => %d\n",array[0],counts[array[0]])); - debug(printf("37 %04X => %d\n",array[1],counts[array[1]])); - debug(printf("21 %04X => %d\n",array[2],counts[array[2]])); - debug(printf("5 %04X => %d\n",array[3],counts[array[3]])); - -#else - INCR_COUNT(counts[EXTRACT(array,0)],inquery[EXTRACT(array,0)]); /* 53 */ - INCR_COUNT(counts[EXTRACT(array,1)],inquery[EXTRACT(array,1)]); /* 37 */ - INCR_COUNT(counts[EXTRACT(array,2)],inquery[EXTRACT(array,2)]); /* 21 */ - INCR_COUNT(counts[EXTRACT(array,3)],inquery[EXTRACT(array,3)]); /* 5 */ -#endif - debug(printf("53 %04X => %d\n",EXTRACT(array,0),counts[EXTRACT(array,0)])); - debug(printf("37 %04X => %d\n",EXTRACT(array,1),counts[EXTRACT(array,1)])); - debug(printf("21 %04X => %d\n",EXTRACT(array,2),counts[EXTRACT(array,2)])); - debug(printf("5 %04X => %d\n",EXTRACT(array,3),counts[EXTRACT(array,3)])); + masked = EXTRACT256(_masked,3); + INCR_COUNT(counts[masked]); + debug(printf("11 %04X => %d\n",masked,counts[masked])); -#ifdef HAVE_SSE4_1 - array = _mm_and_si128( _mm_srli_epi32(current,8), mask5); -#else - _mm_store_si128((__m128i *) array, _mm_and_si128( _mm_srli_epi32(current,8), mask5)); -#endif -#ifdef HAVE_AVX2 - counts[EXTRACT(array,0)] += 1; - counts[EXTRACT(array,1)] += 1; - counts[EXTRACT(array,2)] += 1; - counts[EXTRACT(array,3)] += 1; -#ifdef CHECK_FOR_OVERFLOW - _counts_after = _mm_i32gather_epi32(counts,array,/*scale*/4); - _counts_neg = _mm_cmpgt_epi32(_counts_after,maxcount128); - if (_mm_testz_si128(_counts_neg,_counts_neg) == 0) { - debug(printf("Exceeded maximum counts\n")); - if (EXTRACT(_counts_neg,0)) { - inquery[EXTRACT(array,0)] = INQUERY_FALSE; - } - if (EXTRACT(_counts_neg,1)) { - inquery[EXTRACT(array,1)] = INQUERY_FALSE; - } - if (EXTRACT(_counts_neg,2)) { - inquery[EXTRACT(array,2)] = INQUERY_FALSE; - } - if (EXTRACT(_counts_neg,3)) { - inquery[EXTRACT(array,3)] = INQUERY_FALSE; - } - } -#endif /* CHECK_FOR_OVERFLOW */ - debug(printf("52 %04X => %d\n",array[0],counts[array[0]])); - debug(printf("36 %04X => %d\n",array[1],counts[array[1]])); - debug(printf("20 %04X => %d\n",array[2],counts[array[2]])); - debug(printf("4 %04X => %d\n",array[3],counts[array[3]])); - -#else - INCR_COUNT(counts[EXTRACT(array,0)],inquery[EXTRACT(array,0)]); /* 52 */ - INCR_COUNT(counts[EXTRACT(array,1)],inquery[EXTRACT(array,1)]); /* 36 */ - INCR_COUNT(counts[EXTRACT(array,2)],inquery[EXTRACT(array,2)]); /* 20 */ - INCR_COUNT(counts[EXTRACT(array,3)],inquery[EXTRACT(array,3)]); /* 4 */ -#endif - debug(printf("52 %04X => %d\n",EXTRACT(array,0),counts[EXTRACT(array,0)])); - debug(printf("36 %04X => %d\n",EXTRACT(array,1),counts[EXTRACT(array,1)])); - debug(printf("20 %04X => %d\n",EXTRACT(array,2),counts[EXTRACT(array,2)])); - debug(printf("4 %04X => %d\n",EXTRACT(array,3),counts[EXTRACT(array,3)])); -#ifdef HAVE_SSE4_1 - array = _mm_and_si128( _mm_srli_epi32(current,6), mask5); -#else - _mm_store_si128((__m128i *) array, _mm_and_si128( _mm_srli_epi32(current,6), mask5)); -#endif -#ifdef HAVE_AVX2 - counts[EXTRACT(array,0)] += 1; - counts[EXTRACT(array,1)] += 1; - counts[EXTRACT(array,2)] += 1; - counts[EXTRACT(array,3)] += 1; -#ifdef CHECK_FOR_OVERFLOW - _counts_after = _mm_i32gather_epi32(counts,array,/*scale*/4); - _counts_neg = _mm_cmpgt_epi32(_counts_after,maxcount128); - if (_mm_testz_si128(_counts_neg,_counts_neg) == 0) { - debug(printf("Exceeded maximum counts\n")); - if (EXTRACT(_counts_neg,0)) { - inquery[EXTRACT(array,0)] = INQUERY_FALSE; - } - if (EXTRACT(_counts_neg,1)) { - inquery[EXTRACT(array,1)] = INQUERY_FALSE; - } - if (EXTRACT(_counts_neg,2)) { - inquery[EXTRACT(array,2)] = INQUERY_FALSE; - } - if (EXTRACT(_counts_neg,3)) { - inquery[EXTRACT(array,3)] = INQUERY_FALSE; - } - } -#endif /* CHECK_FOR_OVERFLOW */ - debug(printf("51 %04X => %d\n",array[0],counts[array[0]])); - debug(printf("35 %04X => %d\n",array[1],counts[array[1]])); - debug(printf("19 %04X => %d\n",array[2],counts[array[2]])); - debug(printf("3 %04X => %d\n",array[3],counts[array[3]])); - -#else - INCR_COUNT(counts[EXTRACT(array,0)],inquery[EXTRACT(array,0)]); /* 51 */ - INCR_COUNT(counts[EXTRACT(array,1)],inquery[EXTRACT(array,1)]); /* 35 */ - INCR_COUNT(counts[EXTRACT(array,2)],inquery[EXTRACT(array,2)]); /* 19 */ - INCR_COUNT(counts[EXTRACT(array,3)],inquery[EXTRACT(array,3)]); /* 3 */ -#endif - debug(printf("51 %04X => %d\n",EXTRACT(array,0),counts[EXTRACT(array,0)])); - debug(printf("35 %04X => %d\n",EXTRACT(array,1),counts[EXTRACT(array,1)])); - debug(printf("19 %04X => %d\n",EXTRACT(array,2),counts[EXTRACT(array,2)])); - debug(printf("3 %04X => %d\n",EXTRACT(array,3),counts[EXTRACT(array,3)])); + oligo = low_rc >> 24; /* For 15..12 */ + oligo |= high_rc << 8; -#ifdef HAVE_SSE4_1 - array = _mm_and_si128( _mm_srli_epi32(current,4), mask5); -#else - _mm_store_si128((__m128i *) array, _mm_and_si128( _mm_srli_epi32(current,4), mask5)); -#endif -#ifdef HAVE_AVX2 - counts[EXTRACT(array,0)] += 1; - counts[EXTRACT(array,1)] += 1; - counts[EXTRACT(array,2)] += 1; - counts[EXTRACT(array,3)] += 1; -#ifdef CHECK_FOR_OVERFLOW - _counts_after = _mm_i32gather_epi32(counts,array,/*scale*/4); - _counts_neg = _mm_cmpgt_epi32(_counts_after,maxcount128); - if (_mm_testz_si128(_counts_neg,_counts_neg) == 0) { - debug(printf("Exceeded maximum counts\n")); - if (EXTRACT(_counts_neg,0)) { - inquery[EXTRACT(array,0)] = INQUERY_FALSE; - } - if (EXTRACT(_counts_neg,1)) { - inquery[EXTRACT(array,1)] = INQUERY_FALSE; - } - if (EXTRACT(_counts_neg,2)) { - inquery[EXTRACT(array,2)] = INQUERY_FALSE; - } - if (EXTRACT(_counts_neg,3)) { - inquery[EXTRACT(array,3)] = INQUERY_FALSE; - } - } -#endif /* CHECK_FOR_OVERFLOW */ - debug(printf("50 %04X => %d\n",array[0],counts[array[0]])); - debug(printf("34 %04X => %d\n",array[1],counts[array[1]])); - debug(printf("18 %04X => %d\n",array[2],counts[array[2]])); - debug(printf("2 %04X => %d\n",array[3],counts[array[3]])); - -#else - INCR_COUNT(counts[EXTRACT(array,0)],inquery[EXTRACT(array,0)]); /* 50 */ - INCR_COUNT(counts[EXTRACT(array,1)],inquery[EXTRACT(array,1)]); /* 34 */ - INCR_COUNT(counts[EXTRACT(array,2)],inquery[EXTRACT(array,2)]); /* 18 */ - INCR_COUNT(counts[EXTRACT(array,3)],inquery[EXTRACT(array,3)]); /* 2 */ -#endif - debug(printf("50 %04X => %d\n",EXTRACT(array,0),counts[EXTRACT(array,0)])); - debug(printf("34 %04X => %d\n",EXTRACT(array,1),counts[EXTRACT(array,1)])); - debug(printf("18 %04X => %d\n",EXTRACT(array,2),counts[EXTRACT(array,2)])); - debug(printf("2 %04X => %d\n",EXTRACT(array,3),counts[EXTRACT(array,3)])); + _oligo = _mm256_srlv_epi32(_mm256_set1_epi32(oligo),bigshift0to14); + _masked = _mm256_and_si256(_oligo, bigmask5); -#ifdef HAVE_SSE4_1 - array = _mm_and_si128( _mm_srli_epi32(current,2), mask5); -#else - _mm_store_si128((__m128i *) array, _mm_and_si128( _mm_srli_epi32(current,2), mask5)); -#endif -#ifdef HAVE_AVX2 - counts[EXTRACT(array,0)] += 1; - counts[EXTRACT(array,1)] += 1; - counts[EXTRACT(array,2)] += 1; - counts[EXTRACT(array,3)] += 1; -#ifdef CHECK_FOR_OVERFLOW - _counts_after = _mm_i32gather_epi32(counts,array,/*scale*/4); - _counts_neg = _mm_cmpgt_epi32(_counts_after,maxcount128); - if (_mm_testz_si128(_counts_neg,_counts_neg) == 0) { - debug(printf("Exceeded maximum counts\n")); - if (EXTRACT(_counts_neg,0)) { - inquery[EXTRACT(array,0)] = INQUERY_FALSE; - } - if (EXTRACT(_counts_neg,1)) { - inquery[EXTRACT(array,1)] = INQUERY_FALSE; - } - if (EXTRACT(_counts_neg,2)) { - inquery[EXTRACT(array,2)] = INQUERY_FALSE; - } - if (EXTRACT(_counts_neg,3)) { - inquery[EXTRACT(array,3)] = INQUERY_FALSE; - } - } -#endif /* CHECK_FOR_OVERFLOW */ - debug(printf("49 %04X => %d\n",array[0],counts[array[0]])); - debug(printf("33 %04X => %d\n",array[1],counts[array[1]])); - debug(printf("17 %04X => %d\n",array[2],counts[array[2]])); - debug(printf("1 %04X => %d\n",array[3],counts[array[3]])); - -#else - INCR_COUNT(counts[EXTRACT(array,0)],inquery[EXTRACT(array,0)]); /* 49 */ - INCR_COUNT(counts[EXTRACT(array,1)],inquery[EXTRACT(array,1)]); /* 33 */ - INCR_COUNT(counts[EXTRACT(array,2)],inquery[EXTRACT(array,2)]); /* 17 */ - INCR_COUNT(counts[EXTRACT(array,3)],inquery[EXTRACT(array,3)]); /* 1 */ -#endif - debug(printf("49 %04X => %d\n",EXTRACT(array,0),counts[EXTRACT(array,0)])); - debug(printf("33 %04X => %d\n",EXTRACT(array,1),counts[EXTRACT(array,1)])); - debug(printf("17 %04X => %d\n",EXTRACT(array,2),counts[EXTRACT(array,2)])); - debug(printf("1 %04X => %d\n",EXTRACT(array,3),counts[EXTRACT(array,3)])); + masked = EXTRACT256(_masked,0); + INCR_COUNT(counts[masked]); + debug(printf("12 %04X => %d\n",masked,counts[masked])); -#ifdef HAVE_SSE4_1 - array = _mm_and_si128( current, mask5); -#else - _mm_store_si128((__m128i *) array, _mm_and_si128( current, mask5)); -#endif -#ifdef HAVE_AVX2 - counts[EXTRACT(array,0)] += 1; - counts[EXTRACT(array,1)] += 1; - counts[EXTRACT(array,2)] += 1; - counts[EXTRACT(array,3)] += 1; -#ifdef CHECK_FOR_OVERFLOW - _counts_after = _mm_i32gather_epi32(counts,array,/*scale*/4); - _counts_neg = _mm_cmpgt_epi32(_counts_after,maxcount128); - if (_mm_testz_si128(_counts_neg,_counts_neg) == 0) { - debug(printf("Exceeded maximum counts\n")); - if (EXTRACT(_counts_neg,0)) { - inquery[EXTRACT(array,0)] = INQUERY_FALSE; - } - if (EXTRACT(_counts_neg,1)) { - inquery[EXTRACT(array,1)] = INQUERY_FALSE; - } - if (EXTRACT(_counts_neg,2)) { - inquery[EXTRACT(array,2)] = INQUERY_FALSE; - } - if (EXTRACT(_counts_neg,3)) { - inquery[EXTRACT(array,3)] = INQUERY_FALSE; - } - } -#endif /* CHECK_FOR_OVERFLOW */ - debug(printf("48 %04X => %d\n",array[0],counts[array[0]])); - debug(printf("32 %04X => %d\n",array[1],counts[array[1]])); - debug(printf("16 %04X => %d\n",array[2],counts[array[2]])); - debug(printf("0 %04X => %d\n",array[3],counts[array[3]])); - -#else - INCR_COUNT(counts[EXTRACT(array,0)],inquery[EXTRACT(array,0)]); /* 48 */ - INCR_COUNT(counts[EXTRACT(array,1)],inquery[EXTRACT(array,1)]); /* 32 */ - INCR_COUNT(counts[EXTRACT(array,2)],inquery[EXTRACT(array,2)]); /* 16 */ - INCR_COUNT(counts[EXTRACT(array,3)],inquery[EXTRACT(array,3)]); /* 0 */ -#endif - debug(printf("48 %04X => %d\n",EXTRACT(array,0),counts[EXTRACT(array,0)])); - debug(printf("32 %04X => %d\n",EXTRACT(array,1),counts[EXTRACT(array,1)])); - debug(printf("16 %04X => %d\n",EXTRACT(array,2),counts[EXTRACT(array,2)])); - debug(printf("0 %04X => %d\n",EXTRACT(array,3),counts[EXTRACT(array,3)])); + masked = EXTRACT256(_masked,1); + INCR_COUNT(counts[masked]); + debug(printf("13 %04X => %d\n",masked,counts[masked])); - return; -} -#endif + masked = EXTRACT256(_masked,2); + INCR_COUNT(counts[masked]); + debug(printf("14 %04X => %d\n",masked,counts[masked])); + masked = EXTRACT256(_masked,3); + INCR_COUNT(counts[masked]); + debug(printf("15 %04X => %d\n",masked,counts[masked])); -#ifdef HAVE_AVX2 -static void -count_5mers_rev_simd_128 (Count_T *counts, Inquery_T *inquery, __m256i current, __m256i next) { - __m256i oligo; - __m256i array; -#ifdef CHECK_FOR_OVERFLOW - __m256i _counts_after, _counts_neg; -#endif - oligo = _mm256_or_si256( _mm256_srli_epi32(current,24), _mm256_slli_epi32(next,8)); - array = _mm256_and_si256( _mm256_srli_epi32(oligo,6), bigmask5); - counts[EXTRACT256(array,0)] += 1; /* 127 */ - counts[EXTRACT256(array,1)] += 1; /* 111 */ - counts[EXTRACT256(array,2)] += 1; /* 95 */ - counts[EXTRACT256(array,3)] += 1; /* 79 */ - counts[EXTRACT256(array,4)] += 1; /* 63 */ - counts[EXTRACT256(array,5)] += 1; /* 47 */ - counts[EXTRACT256(array,6)] += 1; /* 31 */ - counts[EXTRACT256(array,7)] += 1; /* 15 */ -#ifdef CHECK_FOR_OVERFLOW - _counts_after = _mm256_i32gather_epi32(counts,array,/*scale*/4); - _counts_neg = _mm256_cmpgt_epi32(_counts_after,maxcount256); - if (_mm256_testz_si256(_counts_neg,_counts_neg) == 0) { - debug(printf("Exceeded maximum counts\n")); - if (EXTRACT256(_counts_neg,0)) { - inquery[EXTRACT256(array,0)] = INQUERY_FALSE; - } - if (EXTRACT256(_counts_neg,1)) { - inquery[EXTRACT256(array,1)] = INQUERY_FALSE; - } - if (EXTRACT256(_counts_neg,2)) { - inquery[EXTRACT256(array,2)] = INQUERY_FALSE; - } - if (EXTRACT256(_counts_neg,3)) { - inquery[EXTRACT256(array,3)] = INQUERY_FALSE; - } - if (EXTRACT256(_counts_neg,4)) { - inquery[EXTRACT256(array,4)] = INQUERY_FALSE; - } - if (EXTRACT256(_counts_neg,5)) { - inquery[EXTRACT256(array,5)] = INQUERY_FALSE; - } - if (EXTRACT256(_counts_neg,6)) { - inquery[EXTRACT256(array,6)] = INQUERY_FALSE; - } - if (EXTRACT256(_counts_neg,7)) { - inquery[EXTRACT256(array,7)] = INQUERY_FALSE; - } - } -#endif /* CHECK_FOR_OVERFLOW */ - debug(printf("63 %04X => %d\n",EXTRACT256(array,0),counts[EXTRACT256(array,0)])); - debug(printf("47 %04X => %d\n",EXTRACT256(array,1),counts[EXTRACT256(array,1)])); - debug(printf("31 %04X => %d\n",EXTRACT256(array,2),counts[EXTRACT256(array,2)])); - debug(printf("15 %04X => %d\n",EXTRACT256(array,3),counts[EXTRACT256(array,3)])); - debug(printf("63 %04X => %d\n",EXTRACT256(array,4),counts[EXTRACT256(array,4)])); - debug(printf("47 %04X => %d\n",EXTRACT256(array,5),counts[EXTRACT256(array,5)])); - debug(printf("31 %04X => %d\n",EXTRACT256(array,6),counts[EXTRACT256(array,6)])); - debug(printf("15 %04X => %d\n",EXTRACT256(array,7),counts[EXTRACT256(array,7)])); - - array = _mm256_and_si256( _mm256_srli_epi32(oligo,4), bigmask5); - counts[EXTRACT256(array,0)] += 1; /* 126 */ - counts[EXTRACT256(array,1)] += 1; /* 110 */ - counts[EXTRACT256(array,2)] += 1; /* 94 */ - counts[EXTRACT256(array,3)] += 1; /* 78 */ - counts[EXTRACT256(array,4)] += 1; /* 62 */ - counts[EXTRACT256(array,5)] += 1; /* 46 */ - counts[EXTRACT256(array,6)] += 1; /* 30 */ - counts[EXTRACT256(array,7)] += 1; /* 14 */ -#ifdef CHECK_FOR_OVERFLOW - _counts_after = _mm256_i32gather_epi32(counts,array,/*scale*/4); - _counts_neg = _mm256_cmpgt_epi32(_counts_after,maxcount256); - if (_mm256_testz_si256(_counts_neg,_counts_neg) == 0) { - debug(printf("Exceeded maximum counts\n")); - if (EXTRACT256(_counts_neg,0)) { - inquery[EXTRACT256(array,0)] = INQUERY_FALSE; - } - if (EXTRACT256(_counts_neg,1)) { - inquery[EXTRACT256(array,1)] = INQUERY_FALSE; - } - if (EXTRACT256(_counts_neg,2)) { - inquery[EXTRACT256(array,2)] = INQUERY_FALSE; - } - if (EXTRACT256(_counts_neg,3)) { - inquery[EXTRACT256(array,3)] = INQUERY_FALSE; - } - if (EXTRACT256(_counts_neg,4)) { - inquery[EXTRACT256(array,4)] = INQUERY_FALSE; - } - if (EXTRACT256(_counts_neg,5)) { - inquery[EXTRACT256(array,5)] = INQUERY_FALSE; - } - if (EXTRACT256(_counts_neg,6)) { - inquery[EXTRACT256(array,6)] = INQUERY_FALSE; - } - if (EXTRACT256(_counts_neg,7)) { - inquery[EXTRACT256(array,7)] = INQUERY_FALSE; - } - } -#endif /* CHECK_FOR_OVERFLOW */ - debug(printf("62 %04X => %d\n",EXTRACT256(array,0),counts[EXTRACT256(array,0)])); - debug(printf("46 %04X => %d\n",EXTRACT256(array,1),counts[EXTRACT256(array,1)])); - debug(printf("30 %04X => %d\n",EXTRACT256(array,2),counts[EXTRACT256(array,2)])); - debug(printf("14 %04X => %d\n",EXTRACT256(array,3),counts[EXTRACT256(array,3)])); - debug(printf("62 %04X => %d\n",EXTRACT256(array,4),counts[EXTRACT256(array,4)])); - debug(printf("46 %04X => %d\n",EXTRACT256(array,5),counts[EXTRACT256(array,5)])); - debug(printf("30 %04X => %d\n",EXTRACT256(array,6),counts[EXTRACT256(array,6)])); - debug(printf("14 %04X => %d\n",EXTRACT256(array,7),counts[EXTRACT256(array,7)])); - - array = _mm256_and_si256( _mm256_srli_epi32(oligo,2), bigmask5); - counts[EXTRACT256(array,0)] += 1; /* 125 */ - counts[EXTRACT256(array,1)] += 1; /* 109 */ - counts[EXTRACT256(array,2)] += 1; /* 93 */ - counts[EXTRACT256(array,3)] += 1; /* 77 */ - counts[EXTRACT256(array,4)] += 1; /* 61 */ - counts[EXTRACT256(array,5)] += 1; /* 45 */ - counts[EXTRACT256(array,6)] += 1; /* 29 */ - counts[EXTRACT256(array,7)] += 1; /* 13 */ -#ifdef CHECK_FOR_OVERFLOW - _counts_after = _mm256_i32gather_epi32(counts,array,/*scale*/4); - _counts_neg = _mm256_cmpgt_epi32(_counts_after,maxcount256); - if (_mm256_testz_si256(_counts_neg,_counts_neg) == 0) { - debug(printf("Exceeded maximum counts\n")); - if (EXTRACT256(_counts_neg,0)) { - inquery[EXTRACT256(array,0)] = INQUERY_FALSE; - } - if (EXTRACT256(_counts_neg,1)) { - inquery[EXTRACT256(array,1)] = INQUERY_FALSE; - } - if (EXTRACT256(_counts_neg,2)) { - inquery[EXTRACT256(array,2)] = INQUERY_FALSE; - } - if (EXTRACT256(_counts_neg,3)) { - inquery[EXTRACT256(array,3)] = INQUERY_FALSE; - } - if (EXTRACT256(_counts_neg,4)) { - inquery[EXTRACT256(array,4)] = INQUERY_FALSE; - } - if (EXTRACT256(_counts_neg,5)) { - inquery[EXTRACT256(array,5)] = INQUERY_FALSE; - } - if (EXTRACT256(_counts_neg,6)) { - inquery[EXTRACT256(array,6)] = INQUERY_FALSE; - } - if (EXTRACT256(_counts_neg,7)) { - inquery[EXTRACT256(array,7)] = INQUERY_FALSE; - } - } -#endif /* CHECK_FOR_OVERFLOW */ - debug(printf("61 %04X => %d\n",EXTRACT256(array,0),counts[EXTRACT256(array,0)])); - debug(printf("45 %04X => %d\n",EXTRACT256(array,1),counts[EXTRACT256(array,1)])); - debug(printf("29 %04X => %d\n",EXTRACT256(array,2),counts[EXTRACT256(array,2)])); - debug(printf("13 %04X => %d\n",EXTRACT256(array,3),counts[EXTRACT256(array,3)])); - debug(printf("61 %04X => %d\n",EXTRACT256(array,4),counts[EXTRACT256(array,4)])); - debug(printf("45 %04X => %d\n",EXTRACT256(array,5),counts[EXTRACT256(array,5)])); - debug(printf("29 %04X => %d\n",EXTRACT256(array,6),counts[EXTRACT256(array,6)])); - debug(printf("13 %04X => %d\n",EXTRACT256(array,7),counts[EXTRACT256(array,7)])); - - array = _mm256_and_si256( oligo, bigmask5); - counts[EXTRACT256(array,0)] += 1; /* 124 */ - counts[EXTRACT256(array,1)] += 1; /* 108 */ - counts[EXTRACT256(array,2)] += 1; /* 92 */ - counts[EXTRACT256(array,3)] += 1; /* 76 */ - counts[EXTRACT256(array,4)] += 1; /* 60 */ - counts[EXTRACT256(array,5)] += 1; /* 44 */ - counts[EXTRACT256(array,6)] += 1; /* 28 */ - counts[EXTRACT256(array,7)] += 1; /* 12 */ -#ifdef CHECK_FOR_OVERFLOW - _counts_after = _mm256_i32gather_epi32(counts,array,/*scale*/4); - _counts_neg = _mm256_cmpgt_epi32(_counts_after,maxcount256); - if (_mm256_testz_si256(_counts_neg,_counts_neg) == 0) { - debug(printf("Exceeded maximum counts\n")); - if (EXTRACT256(_counts_neg,0)) { - inquery[EXTRACT256(array,0)] = INQUERY_FALSE; - } - if (EXTRACT256(_counts_neg,1)) { - inquery[EXTRACT256(array,1)] = INQUERY_FALSE; - } - if (EXTRACT256(_counts_neg,2)) { - inquery[EXTRACT256(array,2)] = INQUERY_FALSE; - } - if (EXTRACT256(_counts_neg,3)) { - inquery[EXTRACT256(array,3)] = INQUERY_FALSE; - } - if (EXTRACT256(_counts_neg,4)) { - inquery[EXTRACT256(array,4)] = INQUERY_FALSE; - } - if (EXTRACT256(_counts_neg,5)) { - inquery[EXTRACT256(array,5)] = INQUERY_FALSE; - } - if (EXTRACT256(_counts_neg,6)) { - inquery[EXTRACT256(array,6)] = INQUERY_FALSE; - } - if (EXTRACT256(_counts_neg,7)) { - inquery[EXTRACT256(array,7)] = INQUERY_FALSE; - } - } -#endif /* CHECK_FOR_OVERFLOW */ - debug(printf("60 %04X => %d\n",EXTRACT256(array,0),counts[EXTRACT256(array,0)])); - debug(printf("44 %04X => %d\n",EXTRACT256(array,1),counts[EXTRACT256(array,1)])); - debug(printf("28 %04X => %d\n",EXTRACT256(array,2),counts[EXTRACT256(array,2)])); - debug(printf("12 %04X => %d\n",EXTRACT256(array,3),counts[EXTRACT256(array,3)])); - debug(printf("60 %04X => %d\n",EXTRACT256(array,4),counts[EXTRACT256(array,4)])); - debug(printf("44 %04X => %d\n",EXTRACT256(array,5),counts[EXTRACT256(array,5)])); - debug(printf("28 %04X => %d\n",EXTRACT256(array,6),counts[EXTRACT256(array,6)])); - debug(printf("12 %04X => %d\n",EXTRACT256(array,7),counts[EXTRACT256(array,7)])); - - array = _mm256_srli_epi32(current,22); - counts[EXTRACT256(array,0)] += 1; /* 123 */ - counts[EXTRACT256(array,1)] += 1; /* 107 */ - counts[EXTRACT256(array,2)] += 1; /* 91 */ - counts[EXTRACT256(array,3)] += 1; /* 75 */ - counts[EXTRACT256(array,4)] += 1; /* 59 */ - counts[EXTRACT256(array,5)] += 1; /* 43 */ - counts[EXTRACT256(array,6)] += 1; /* 27 */ - counts[EXTRACT256(array,7)] += 1; /* 11 */ -#ifdef CHECK_FOR_OVERFLOW - _counts_after = _mm256_i32gather_epi32(counts,array,/*scale*/4); - _counts_neg = _mm256_cmpgt_epi32(_counts_after,maxcount256); - if (_mm256_testz_si256(_counts_neg,_counts_neg) == 0) { - debug(printf("Exceeded maximum counts\n")); - if (EXTRACT256(_counts_neg,0)) { - inquery[EXTRACT256(array,0)] = INQUERY_FALSE; - } - if (EXTRACT256(_counts_neg,1)) { - inquery[EXTRACT256(array,1)] = INQUERY_FALSE; - } - if (EXTRACT256(_counts_neg,2)) { - inquery[EXTRACT256(array,2)] = INQUERY_FALSE; - } - if (EXTRACT256(_counts_neg,3)) { - inquery[EXTRACT256(array,3)] = INQUERY_FALSE; - } - if (EXTRACT256(_counts_neg,4)) { - inquery[EXTRACT256(array,4)] = INQUERY_FALSE; - } - if (EXTRACT256(_counts_neg,5)) { - inquery[EXTRACT256(array,5)] = INQUERY_FALSE; - } - if (EXTRACT256(_counts_neg,6)) { - inquery[EXTRACT256(array,6)] = INQUERY_FALSE; - } - if (EXTRACT256(_counts_neg,7)) { - inquery[EXTRACT256(array,7)] = INQUERY_FALSE; - } - } -#endif /* CHECK_FOR_OVERFLOW */ - debug(printf("59 %04X => %d\n",EXTRACT256(array,0),counts[EXTRACT256(array,0)])); - debug(printf("43 %04X => %d\n",EXTRACT256(array,1),counts[EXTRACT256(array,1)])); - debug(printf("27 %04X => %d\n",EXTRACT256(array,2),counts[EXTRACT256(array,2)])); - debug(printf("11 %04X => %d\n",EXTRACT256(array,3),counts[EXTRACT256(array,3)])); - debug(printf("59 %04X => %d\n",EXTRACT256(array,4),counts[EXTRACT256(array,4)])); - debug(printf("43 %04X => %d\n",EXTRACT256(array,5),counts[EXTRACT256(array,5)])); - debug(printf("27 %04X => %d\n",EXTRACT256(array,6),counts[EXTRACT256(array,6)])); - debug(printf("11 %04X => %d\n",EXTRACT256(array,7),counts[EXTRACT256(array,7)])); - - array = _mm256_and_si256( _mm256_srli_epi32(current,20), bigmask5); - counts[EXTRACT256(array,0)] += 1; /* 122 */ - counts[EXTRACT256(array,1)] += 1; /* 106 */ - counts[EXTRACT256(array,2)] += 1; /* 90 */ - counts[EXTRACT256(array,3)] += 1; /* 74 */ - counts[EXTRACT256(array,4)] += 1; /* 58 */ - counts[EXTRACT256(array,5)] += 1; /* 42 */ - counts[EXTRACT256(array,6)] += 1; /* 26 */ - counts[EXTRACT256(array,7)] += 1; /* 10 */ -#ifdef CHECK_FOR_OVERFLOW - _counts_after = _mm256_i32gather_epi32(counts,array,/*scale*/4); - _counts_neg = _mm256_cmpgt_epi32(_counts_after,maxcount256); - if (_mm256_testz_si256(_counts_neg,_counts_neg) == 0) { - debug(printf("Exceeded maximum counts\n")); - if (EXTRACT256(_counts_neg,0)) { - inquery[EXTRACT256(array,0)] = INQUERY_FALSE; - } - if (EXTRACT256(_counts_neg,1)) { - inquery[EXTRACT256(array,1)] = INQUERY_FALSE; - } - if (EXTRACT256(_counts_neg,2)) { - inquery[EXTRACT256(array,2)] = INQUERY_FALSE; - } - if (EXTRACT256(_counts_neg,3)) { - inquery[EXTRACT256(array,3)] = INQUERY_FALSE; - } - if (EXTRACT256(_counts_neg,4)) { - inquery[EXTRACT256(array,4)] = INQUERY_FALSE; - } - if (EXTRACT256(_counts_neg,5)) { - inquery[EXTRACT256(array,5)] = INQUERY_FALSE; - } - if (EXTRACT256(_counts_neg,6)) { - inquery[EXTRACT256(array,6)] = INQUERY_FALSE; - } - if (EXTRACT256(_counts_neg,7)) { - inquery[EXTRACT256(array,7)] = INQUERY_FALSE; - } - } -#endif /* CHECK_FOR_OVERFLOW */ - debug(printf("58 %04X => %d\n",EXTRACT256(array,0),counts[EXTRACT256(array,0)])); - debug(printf("42 %04X => %d\n",EXTRACT256(array,1),counts[EXTRACT256(array,1)])); - debug(printf("26 %04X => %d\n",EXTRACT256(array,2),counts[EXTRACT256(array,2)])); - debug(printf("10 %04X => %d\n",EXTRACT256(array,3),counts[EXTRACT256(array,3)])); - debug(printf("58 %04X => %d\n",EXTRACT256(array,4),counts[EXTRACT256(array,4)])); - debug(printf("42 %04X => %d\n",EXTRACT256(array,5),counts[EXTRACT256(array,5)])); - debug(printf("26 %04X => %d\n",EXTRACT256(array,6),counts[EXTRACT256(array,6)])); - debug(printf("10 %04X => %d\n",EXTRACT256(array,7),counts[EXTRACT256(array,7)])); - - array = _mm256_and_si256( _mm256_srli_epi32(current,18), bigmask5); - counts[EXTRACT256(array,0)] += 1; /* 121 */ - counts[EXTRACT256(array,1)] += 1; /* 105 */ - counts[EXTRACT256(array,2)] += 1; /* 89 */ - counts[EXTRACT256(array,3)] += 1; /* 73 */ - counts[EXTRACT256(array,4)] += 1; /* 57 */ - counts[EXTRACT256(array,5)] += 1; /* 41 */ - counts[EXTRACT256(array,6)] += 1; /* 25 */ - counts[EXTRACT256(array,7)] += 1; /* 9 */ -#ifdef CHECK_FOR_OVERFLOW - _counts_after = _mm256_i32gather_epi32(counts,array,/*scale*/4); - _counts_neg = _mm256_cmpgt_epi32(_counts_after,maxcount256); - if (_mm256_testz_si256(_counts_neg,_counts_neg) == 0) { - debug(printf("Exceeded maximum counts\n")); - if (EXTRACT256(_counts_neg,0)) { - inquery[EXTRACT256(array,0)] = INQUERY_FALSE; - } - if (EXTRACT256(_counts_neg,1)) { - inquery[EXTRACT256(array,1)] = INQUERY_FALSE; - } - if (EXTRACT256(_counts_neg,2)) { - inquery[EXTRACT256(array,2)] = INQUERY_FALSE; - } - if (EXTRACT256(_counts_neg,3)) { - inquery[EXTRACT256(array,3)] = INQUERY_FALSE; - } - if (EXTRACT256(_counts_neg,4)) { - inquery[EXTRACT256(array,4)] = INQUERY_FALSE; - } - if (EXTRACT256(_counts_neg,5)) { - inquery[EXTRACT256(array,5)] = INQUERY_FALSE; - } - if (EXTRACT256(_counts_neg,6)) { - inquery[EXTRACT256(array,6)] = INQUERY_FALSE; - } - if (EXTRACT256(_counts_neg,7)) { - inquery[EXTRACT256(array,7)] = INQUERY_FALSE; - } - } -#endif /* CHECK_FOR_OVERFLOW */ - debug(printf("57 %04X => %d\n",EXTRACT256(array,0),counts[EXTRACT256(array,0)])); - debug(printf("41 %04X => %d\n",EXTRACT256(array,1),counts[EXTRACT256(array,1)])); - debug(printf("25 %04X => %d\n",EXTRACT256(array,2),counts[EXTRACT256(array,2)])); - debug(printf("9 %04X => %d\n",EXTRACT256(array,3),counts[EXTRACT256(array,3)])); - debug(printf("57 %04X => %d\n",EXTRACT256(array,4),counts[EXTRACT256(array,4)])); - debug(printf("41 %04X => %d\n",EXTRACT256(array,5),counts[EXTRACT256(array,5)])); - debug(printf("25 %04X => %d\n",EXTRACT256(array,6),counts[EXTRACT256(array,6)])); - debug(printf("9 %04X => %d\n",EXTRACT256(array,7),counts[EXTRACT256(array,7)])); - - array = _mm256_and_si256( _mm256_srli_epi32(current,16), bigmask5); - counts[EXTRACT256(array,0)] += 1; /* 120 */ - counts[EXTRACT256(array,1)] += 1; /* 104 */ - counts[EXTRACT256(array,2)] += 1; /* 88 */ - counts[EXTRACT256(array,3)] += 1; /* 72 */ - counts[EXTRACT256(array,4)] += 1; /* 56 */ - counts[EXTRACT256(array,5)] += 1; /* 40 */ - counts[EXTRACT256(array,6)] += 1; /* 24 */ - counts[EXTRACT256(array,7)] += 1; /* 8 */ -#ifdef CHECK_FOR_OVERFLOW - _counts_after = _mm256_i32gather_epi32(counts,array,/*scale*/4); - _counts_neg = _mm256_cmpgt_epi32(_counts_after,maxcount256); - if (_mm256_testz_si256(_counts_neg,_counts_neg) == 0) { - debug(printf("Exceeded maximum counts\n")); - if (EXTRACT256(_counts_neg,0)) { - inquery[EXTRACT256(array,0)] = INQUERY_FALSE; - } - if (EXTRACT256(_counts_neg,1)) { - inquery[EXTRACT256(array,1)] = INQUERY_FALSE; - } - if (EXTRACT256(_counts_neg,2)) { - inquery[EXTRACT256(array,2)] = INQUERY_FALSE; - } - if (EXTRACT256(_counts_neg,3)) { - inquery[EXTRACT256(array,3)] = INQUERY_FALSE; - } - if (EXTRACT256(_counts_neg,4)) { - inquery[EXTRACT256(array,4)] = INQUERY_FALSE; - } - if (EXTRACT256(_counts_neg,5)) { - inquery[EXTRACT256(array,5)] = INQUERY_FALSE; - } - if (EXTRACT256(_counts_neg,6)) { - inquery[EXTRACT256(array,6)] = INQUERY_FALSE; - } - if (EXTRACT256(_counts_neg,7)) { - inquery[EXTRACT256(array,7)] = INQUERY_FALSE; - } - } -#endif /* CHECK_FOR_OVERFLOW */ - debug(printf("56 %04X => %d\n",EXTRACT256(array,0),counts[EXTRACT256(array,0)])); - debug(printf("40 %04X => %d\n",EXTRACT256(array,1),counts[EXTRACT256(array,1)])); - debug(printf("24 %04X => %d\n",EXTRACT256(array,2),counts[EXTRACT256(array,2)])); - debug(printf("8 %04X => %d\n",EXTRACT256(array,3),counts[EXTRACT256(array,3)])); - debug(printf("56 %04X => %d\n",EXTRACT256(array,4),counts[EXTRACT256(array,4)])); - debug(printf("40 %04X => %d\n",EXTRACT256(array,5),counts[EXTRACT256(array,5)])); - debug(printf("24 %04X => %d\n",EXTRACT256(array,6),counts[EXTRACT256(array,6)])); - debug(printf("8 %04X => %d\n",EXTRACT256(array,7),counts[EXTRACT256(array,7)])); - - array = _mm256_and_si256( _mm256_srli_epi32(current,14), bigmask5); - counts[EXTRACT256(array,0)] += 1; /* 119 */ - counts[EXTRACT256(array,1)] += 1; /* 103 */ - counts[EXTRACT256(array,2)] += 1; /* 87 */ - counts[EXTRACT256(array,3)] += 1; /* 71 */ - counts[EXTRACT256(array,4)] += 1; /* 55 */ - counts[EXTRACT256(array,5)] += 1; /* 39 */ - counts[EXTRACT256(array,6)] += 1; /* 23 */ - counts[EXTRACT256(array,7)] += 1; /* 7 */ -#ifdef CHECK_FOR_OVERFLOW - _counts_after = _mm256_i32gather_epi32(counts,array,/*scale*/4); - _counts_neg = _mm256_cmpgt_epi32(_counts_after,maxcount256); - if (_mm256_testz_si256(_counts_neg,_counts_neg) == 0) { - debug(printf("Exceeded maximum counts\n")); - if (EXTRACT256(_counts_neg,0)) { - inquery[EXTRACT256(array,0)] = INQUERY_FALSE; - } - if (EXTRACT256(_counts_neg,1)) { - inquery[EXTRACT256(array,1)] = INQUERY_FALSE; - } - if (EXTRACT256(_counts_neg,2)) { - inquery[EXTRACT256(array,2)] = INQUERY_FALSE; - } - if (EXTRACT256(_counts_neg,3)) { - inquery[EXTRACT256(array,3)] = INQUERY_FALSE; - } - if (EXTRACT256(_counts_neg,4)) { - inquery[EXTRACT256(array,4)] = INQUERY_FALSE; - } - if (EXTRACT256(_counts_neg,5)) { - inquery[EXTRACT256(array,5)] = INQUERY_FALSE; - } - if (EXTRACT256(_counts_neg,6)) { - inquery[EXTRACT256(array,6)] = INQUERY_FALSE; - } - if (EXTRACT256(_counts_neg,7)) { - inquery[EXTRACT256(array,7)] = INQUERY_FALSE; - } - } -#endif /* CHECK_FOR_OVERFLOW */ - debug(printf("55 %04X => %d\n",EXTRACT256(array,0),counts[EXTRACT256(array,0)])); - debug(printf("39 %04X => %d\n",EXTRACT256(array,1),counts[EXTRACT256(array,1)])); - debug(printf("23 %04X => %d\n",EXTRACT256(array,2),counts[EXTRACT256(array,2)])); - debug(printf("7 %04X => %d\n",EXTRACT256(array,3),counts[EXTRACT256(array,3)])); - debug(printf("55 %04X => %d\n",EXTRACT256(array,4),counts[EXTRACT256(array,4)])); - debug(printf("39 %04X => %d\n",EXTRACT256(array,5),counts[EXTRACT256(array,5)])); - debug(printf("23 %04X => %d\n",EXTRACT256(array,6),counts[EXTRACT256(array,6)])); - debug(printf("7 %04X => %d\n",EXTRACT256(array,7),counts[EXTRACT256(array,7)])); - - array = _mm256_and_si256( _mm256_srli_epi32(current,12), bigmask5); - counts[EXTRACT256(array,0)] += 1; /* 118 */ - counts[EXTRACT256(array,1)] += 1; /* 102 */ - counts[EXTRACT256(array,2)] += 1; /* 86 */ - counts[EXTRACT256(array,3)] += 1; /* 70 */ - counts[EXTRACT256(array,4)] += 1; /* 54 */ - counts[EXTRACT256(array,5)] += 1; /* 38 */ - counts[EXTRACT256(array,6)] += 1; /* 22 */ - counts[EXTRACT256(array,7)] += 1; /* 6 */ -#ifdef CHECK_FOR_OVERFLOW - _counts_after = _mm256_i32gather_epi32(counts,array,/*scale*/4); - _counts_neg = _mm256_cmpgt_epi32(_counts_after,maxcount256); - if (_mm256_testz_si256(_counts_neg,_counts_neg) == 0) { - debug(printf("Exceeded maximum counts\n")); - if (EXTRACT256(_counts_neg,0)) { - inquery[EXTRACT256(array,0)] = INQUERY_FALSE; - } - if (EXTRACT256(_counts_neg,1)) { - inquery[EXTRACT256(array,1)] = INQUERY_FALSE; - } - if (EXTRACT256(_counts_neg,2)) { - inquery[EXTRACT256(array,2)] = INQUERY_FALSE; - } - if (EXTRACT256(_counts_neg,3)) { - inquery[EXTRACT256(array,3)] = INQUERY_FALSE; - } - if (EXTRACT256(_counts_neg,4)) { - inquery[EXTRACT256(array,4)] = INQUERY_FALSE; - } - if (EXTRACT256(_counts_neg,5)) { - inquery[EXTRACT256(array,5)] = INQUERY_FALSE; - } - if (EXTRACT256(_counts_neg,6)) { - inquery[EXTRACT256(array,6)] = INQUERY_FALSE; - } - if (EXTRACT256(_counts_neg,7)) { - inquery[EXTRACT256(array,7)] = INQUERY_FALSE; - } - } -#endif /* CHECK_FOR_OVERFLOW */ - debug(printf("54 %04X => %d\n",EXTRACT256(array,0),counts[EXTRACT256(array,0)])); - debug(printf("38 %04X => %d\n",EXTRACT256(array,1),counts[EXTRACT256(array,1)])); - debug(printf("22 %04X => %d\n",EXTRACT256(array,2),counts[EXTRACT256(array,2)])); - debug(printf("6 %04X => %d\n",EXTRACT256(array,3),counts[EXTRACT256(array,3)])); - debug(printf("54 %04X => %d\n",EXTRACT256(array,4),counts[EXTRACT256(array,4)])); - debug(printf("38 %04X => %d\n",EXTRACT256(array,5),counts[EXTRACT256(array,5)])); - debug(printf("22 %04X => %d\n",EXTRACT256(array,6),counts[EXTRACT256(array,6)])); - debug(printf("6 %04X => %d\n",EXTRACT256(array,7),counts[EXTRACT256(array,7)])); - - array = _mm256_and_si256( _mm256_srli_epi32(current,10), bigmask5); - counts[EXTRACT256(array,0)] += 1; /* 117 */ - counts[EXTRACT256(array,1)] += 1; /* 101 */ - counts[EXTRACT256(array,2)] += 1; /* 85 */ - counts[EXTRACT256(array,3)] += 1; /* 69 */ - counts[EXTRACT256(array,4)] += 1; /* 53 */ - counts[EXTRACT256(array,5)] += 1; /* 37 */ - counts[EXTRACT256(array,6)] += 1; /* 21 */ - counts[EXTRACT256(array,7)] += 1; /* 5 */ -#ifdef CHECK_FOR_OVERFLOW - _counts_after = _mm256_i32gather_epi32(counts,array,/*scale*/4); - _counts_neg = _mm256_cmpgt_epi32(_counts_after,maxcount256); - if (_mm256_testz_si256(_counts_neg,_counts_neg) == 0) { - debug(printf("Exceeded maximum counts\n")); - if (EXTRACT256(_counts_neg,0)) { - inquery[EXTRACT256(array,0)] = INQUERY_FALSE; - } - if (EXTRACT256(_counts_neg,1)) { - inquery[EXTRACT256(array,1)] = INQUERY_FALSE; - } - if (EXTRACT256(_counts_neg,2)) { - inquery[EXTRACT256(array,2)] = INQUERY_FALSE; - } - if (EXTRACT256(_counts_neg,3)) { - inquery[EXTRACT256(array,3)] = INQUERY_FALSE; - } - if (EXTRACT256(_counts_neg,4)) { - inquery[EXTRACT256(array,4)] = INQUERY_FALSE; - } - if (EXTRACT256(_counts_neg,5)) { - inquery[EXTRACT256(array,5)] = INQUERY_FALSE; - } - if (EXTRACT256(_counts_neg,6)) { - inquery[EXTRACT256(array,6)] = INQUERY_FALSE; - } - if (EXTRACT256(_counts_neg,7)) { - inquery[EXTRACT256(array,7)] = INQUERY_FALSE; - } - } -#endif /* CHECK_FOR_OVERFLOW */ - debug(printf("53 %04X => %d\n",EXTRACT256(array,0),counts[EXTRACT256(array,0)])); - debug(printf("37 %04X => %d\n",EXTRACT256(array,1),counts[EXTRACT256(array,1)])); - debug(printf("21 %04X => %d\n",EXTRACT256(array,2),counts[EXTRACT256(array,2)])); - debug(printf("5 %04X => %d\n",EXTRACT256(array,3),counts[EXTRACT256(array,3)])); - debug(printf("53 %04X => %d\n",EXTRACT256(array,4),counts[EXTRACT256(array,4)])); - debug(printf("37 %04X => %d\n",EXTRACT256(array,5),counts[EXTRACT256(array,5)])); - debug(printf("21 %04X => %d\n",EXTRACT256(array,6),counts[EXTRACT256(array,6)])); - debug(printf("5 %04X => %d\n",EXTRACT256(array,7),counts[EXTRACT256(array,7)])); - - array = _mm256_and_si256( _mm256_srli_epi32(current,8), bigmask5); - counts[EXTRACT256(array,0)] += 1; /* 116 */ - counts[EXTRACT256(array,1)] += 1; /* 100 */ - counts[EXTRACT256(array,2)] += 1; /* 84 */ - counts[EXTRACT256(array,3)] += 1; /* 68 */ - counts[EXTRACT256(array,4)] += 1; /* 52 */ - counts[EXTRACT256(array,5)] += 1; /* 36 */ - counts[EXTRACT256(array,6)] += 1; /* 20 */ - counts[EXTRACT256(array,7)] += 1; /* 4 */ -#ifdef CHECK_FOR_OVERFLOW - _counts_after = _mm256_i32gather_epi32(counts,array,/*scale*/4); - _counts_neg = _mm256_cmpgt_epi32(_counts_after,maxcount256); - if (_mm256_testz_si256(_counts_neg,_counts_neg) == 0) { - debug(printf("Exceeded maximum counts\n")); - if (EXTRACT256(_counts_neg,0)) { - inquery[EXTRACT256(array,0)] = INQUERY_FALSE; - } - if (EXTRACT256(_counts_neg,1)) { - inquery[EXTRACT256(array,1)] = INQUERY_FALSE; - } - if (EXTRACT256(_counts_neg,2)) { - inquery[EXTRACT256(array,2)] = INQUERY_FALSE; - } - if (EXTRACT256(_counts_neg,3)) { - inquery[EXTRACT256(array,3)] = INQUERY_FALSE; - } - if (EXTRACT256(_counts_neg,4)) { - inquery[EXTRACT256(array,4)] = INQUERY_FALSE; - } - if (EXTRACT256(_counts_neg,5)) { - inquery[EXTRACT256(array,5)] = INQUERY_FALSE; - } - if (EXTRACT256(_counts_neg,6)) { - inquery[EXTRACT256(array,6)] = INQUERY_FALSE; - } - if (EXTRACT256(_counts_neg,7)) { - inquery[EXTRACT256(array,7)] = INQUERY_FALSE; - } - } -#endif /* CHECK_FOR_OVERFLOW */ - debug(printf("52 %04X => %d\n",EXTRACT256(array,0),counts[EXTRACT256(array,0)])); - debug(printf("36 %04X => %d\n",EXTRACT256(array,1),counts[EXTRACT256(array,1)])); - debug(printf("20 %04X => %d\n",EXTRACT256(array,2),counts[EXTRACT256(array,2)])); - debug(printf("4 %04X => %d\n",EXTRACT256(array,3),counts[EXTRACT256(array,3)])); - debug(printf("52 %04X => %d\n",EXTRACT256(array,4),counts[EXTRACT256(array,4)])); - debug(printf("36 %04X => %d\n",EXTRACT256(array,5),counts[EXTRACT256(array,5)])); - debug(printf("20 %04X => %d\n",EXTRACT256(array,6),counts[EXTRACT256(array,6)])); - debug(printf("4 %04X => %d\n",EXTRACT256(array,7),counts[EXTRACT256(array,7)])); - - array = _mm256_and_si256( _mm256_srli_epi32(current,6), bigmask5); - counts[EXTRACT256(array,0)] += 1; /* 115 */ - counts[EXTRACT256(array,1)] += 1; /* 99 */ - counts[EXTRACT256(array,2)] += 1; /* 83 */ - counts[EXTRACT256(array,3)] += 1; /* 67 */ - counts[EXTRACT256(array,4)] += 1; /* 51 */ - counts[EXTRACT256(array,5)] += 1; /* 35 */ - counts[EXTRACT256(array,6)] += 1; /* 19 */ - counts[EXTRACT256(array,7)] += 1; /* 3 */ -#ifdef CHECK_FOR_OVERFLOW - _counts_after = _mm256_i32gather_epi32(counts,array,/*scale*/4); - _counts_neg = _mm256_cmpgt_epi32(_counts_after,maxcount256); - if (_mm256_testz_si256(_counts_neg,_counts_neg) == 0) { - debug(printf("Exceeded maximum counts\n")); - if (EXTRACT256(_counts_neg,0)) { - inquery[EXTRACT256(array,0)] = INQUERY_FALSE; - } - if (EXTRACT256(_counts_neg,1)) { - inquery[EXTRACT256(array,1)] = INQUERY_FALSE; - } - if (EXTRACT256(_counts_neg,2)) { - inquery[EXTRACT256(array,2)] = INQUERY_FALSE; - } - if (EXTRACT256(_counts_neg,3)) { - inquery[EXTRACT256(array,3)] = INQUERY_FALSE; - } - if (EXTRACT256(_counts_neg,4)) { - inquery[EXTRACT256(array,4)] = INQUERY_FALSE; - } - if (EXTRACT256(_counts_neg,5)) { - inquery[EXTRACT256(array,5)] = INQUERY_FALSE; - } - if (EXTRACT256(_counts_neg,6)) { - inquery[EXTRACT256(array,6)] = INQUERY_FALSE; - } - if (EXTRACT256(_counts_neg,7)) { - inquery[EXTRACT256(array,7)] = INQUERY_FALSE; - } - } -#endif /* CHECK_FOR_OVERFLOW */ - debug(printf("51 %04X => %d\n",EXTRACT256(array,0),counts[EXTRACT256(array,0)])); - debug(printf("35 %04X => %d\n",EXTRACT256(array,1),counts[EXTRACT256(array,1)])); - debug(printf("19 %04X => %d\n",EXTRACT256(array,2),counts[EXTRACT256(array,2)])); - debug(printf("3 %04X => %d\n",EXTRACT256(array,3),counts[EXTRACT256(array,3)])); - debug(printf("51 %04X => %d\n",EXTRACT256(array,4),counts[EXTRACT256(array,4)])); - debug(printf("35 %04X => %d\n",EXTRACT256(array,5),counts[EXTRACT256(array,5)])); - debug(printf("19 %04X => %d\n",EXTRACT256(array,6),counts[EXTRACT256(array,6)])); - debug(printf("3 %04X => %d\n",EXTRACT256(array,7),counts[EXTRACT256(array,7)])); - - array = _mm256_and_si256( _mm256_srli_epi32(current,4), bigmask5); - counts[EXTRACT256(array,0)] += 1; /* 114 */ - counts[EXTRACT256(array,1)] += 1; /* 98 */ - counts[EXTRACT256(array,2)] += 1; /* 82 */ - counts[EXTRACT256(array,3)] += 1; /* 66 */ - counts[EXTRACT256(array,4)] += 1; /* 50 */ - counts[EXTRACT256(array,5)] += 1; /* 34 */ - counts[EXTRACT256(array,6)] += 1; /* 18 */ - counts[EXTRACT256(array,7)] += 1; /* 2 */ -#ifdef CHECK_FOR_OVERFLOW - _counts_after = _mm256_i32gather_epi32(counts,array,/*scale*/4); - _counts_neg = _mm256_cmpgt_epi32(_counts_after,maxcount256); - if (_mm256_testz_si256(_counts_neg,_counts_neg) == 0) { - debug(printf("Exceeded maximum counts\n")); - if (EXTRACT256(_counts_neg,0)) { - inquery[EXTRACT256(array,0)] = INQUERY_FALSE; - } - if (EXTRACT256(_counts_neg,1)) { - inquery[EXTRACT256(array,1)] = INQUERY_FALSE; - } - if (EXTRACT256(_counts_neg,2)) { - inquery[EXTRACT256(array,2)] = INQUERY_FALSE; - } - if (EXTRACT256(_counts_neg,3)) { - inquery[EXTRACT256(array,3)] = INQUERY_FALSE; - } - if (EXTRACT256(_counts_neg,4)) { - inquery[EXTRACT256(array,4)] = INQUERY_FALSE; - } - if (EXTRACT256(_counts_neg,5)) { - inquery[EXTRACT256(array,5)] = INQUERY_FALSE; - } - if (EXTRACT256(_counts_neg,6)) { - inquery[EXTRACT256(array,6)] = INQUERY_FALSE; - } - if (EXTRACT256(_counts_neg,7)) { - inquery[EXTRACT256(array,7)] = INQUERY_FALSE; - } - } -#endif /* CHECK_FOR_OVERFLOW */ - debug(printf("50 %04X => %d\n",EXTRACT256(array,0),counts[EXTRACT256(array,0)])); - debug(printf("34 %04X => %d\n",EXTRACT256(array,1),counts[EXTRACT256(array,1)])); - debug(printf("18 %04X => %d\n",EXTRACT256(array,2),counts[EXTRACT256(array,2)])); - debug(printf("2 %04X => %d\n",EXTRACT256(array,3),counts[EXTRACT256(array,3)])); - debug(printf("50 %04X => %d\n",EXTRACT256(array,4),counts[EXTRACT256(array,4)])); - debug(printf("34 %04X => %d\n",EXTRACT256(array,5),counts[EXTRACT256(array,5)])); - debug(printf("18 %04X => %d\n",EXTRACT256(array,6),counts[EXTRACT256(array,6)])); - debug(printf("2 %04X => %d\n",EXTRACT256(array,7),counts[EXTRACT256(array,7)])); - - array = _mm256_and_si256( _mm256_srli_epi32(current,2), bigmask5); - counts[EXTRACT256(array,0)] += 1; /* 113 */ - counts[EXTRACT256(array,1)] += 1; /* 97 */ - counts[EXTRACT256(array,2)] += 1; /* 81 */ - counts[EXTRACT256(array,3)] += 1; /* 65 */ - counts[EXTRACT256(array,4)] += 1; /* 49 */ - counts[EXTRACT256(array,5)] += 1; /* 33 */ - counts[EXTRACT256(array,6)] += 1; /* 17 */ - counts[EXTRACT256(array,7)] += 1; /* 1 */ -#ifdef CHECK_FOR_OVERFLOW - _counts_after = _mm256_i32gather_epi32(counts,array,/*scale*/4); - _counts_neg = _mm256_cmpgt_epi32(_counts_after,maxcount256); - if (_mm256_testz_si256(_counts_neg,_counts_neg) == 0) { - debug(printf("Exceeded maximum counts\n")); - if (EXTRACT256(_counts_neg,0)) { - inquery[EXTRACT256(array,0)] = INQUERY_FALSE; - } - if (EXTRACT256(_counts_neg,1)) { - inquery[EXTRACT256(array,1)] = INQUERY_FALSE; - } - if (EXTRACT256(_counts_neg,2)) { - inquery[EXTRACT256(array,2)] = INQUERY_FALSE; - } - if (EXTRACT256(_counts_neg,3)) { - inquery[EXTRACT256(array,3)] = INQUERY_FALSE; - } - if (EXTRACT256(_counts_neg,4)) { - inquery[EXTRACT256(array,4)] = INQUERY_FALSE; - } - if (EXTRACT256(_counts_neg,5)) { - inquery[EXTRACT256(array,5)] = INQUERY_FALSE; - } - if (EXTRACT256(_counts_neg,6)) { - inquery[EXTRACT256(array,6)] = INQUERY_FALSE; - } - if (EXTRACT256(_counts_neg,7)) { - inquery[EXTRACT256(array,7)] = INQUERY_FALSE; - } - } -#endif /* CHECK_FOR_OVERFLOW */ - debug(printf("49 %04X => %d\n",EXTRACT256(array,0),counts[EXTRACT256(array,0)])); - debug(printf("33 %04X => %d\n",EXTRACT256(array,1),counts[EXTRACT256(array,1)])); - debug(printf("17 %04X => %d\n",EXTRACT256(array,2),counts[EXTRACT256(array,2)])); - debug(printf("1 %04X => %d\n",EXTRACT256(array,3),counts[EXTRACT256(array,3)])); - debug(printf("49 %04X => %d\n",EXTRACT256(array,4),counts[EXTRACT256(array,4)])); - debug(printf("33 %04X => %d\n",EXTRACT256(array,5),counts[EXTRACT256(array,5)])); - debug(printf("17 %04X => %d\n",EXTRACT256(array,6),counts[EXTRACT256(array,6)])); - debug(printf("1 %04X => %d\n",EXTRACT256(array,7),counts[EXTRACT256(array,7)])); - - array = _mm256_and_si256( current, bigmask5); - counts[EXTRACT256(array,0)] += 1; /* 112 */ - counts[EXTRACT256(array,1)] += 1; /* 96 */ - counts[EXTRACT256(array,2)] += 1; /* 80 */ - counts[EXTRACT256(array,3)] += 1; /* 64 */ - counts[EXTRACT256(array,4)] += 1; /* 48 */ - counts[EXTRACT256(array,5)] += 1; /* 32 */ - counts[EXTRACT256(array,6)] += 1; /* 16 */ - counts[EXTRACT256(array,7)] += 1; /* 0 */ -#ifdef CHECK_FOR_OVERFLOW - _counts_after = _mm256_i32gather_epi32(counts,array,/*scale*/4); - _counts_neg = _mm256_cmpgt_epi32(_counts_after,maxcount256); - if (_mm256_testz_si256(_counts_neg,_counts_neg) == 0) { - debug(printf("Exceeded maximum counts\n")); - if (EXTRACT256(_counts_neg,0)) { - inquery[EXTRACT256(array,0)] = INQUERY_FALSE; - } - if (EXTRACT256(_counts_neg,1)) { - inquery[EXTRACT256(array,1)] = INQUERY_FALSE; - } - if (EXTRACT256(_counts_neg,2)) { - inquery[EXTRACT256(array,2)] = INQUERY_FALSE; - } - if (EXTRACT256(_counts_neg,3)) { - inquery[EXTRACT256(array,3)] = INQUERY_FALSE; - } - if (EXTRACT256(_counts_neg,4)) { - inquery[EXTRACT256(array,4)] = INQUERY_FALSE; - } - if (EXTRACT256(_counts_neg,5)) { - inquery[EXTRACT256(array,5)] = INQUERY_FALSE; - } - if (EXTRACT256(_counts_neg,6)) { - inquery[EXTRACT256(array,6)] = INQUERY_FALSE; - } - if (EXTRACT256(_counts_neg,7)) { - inquery[EXTRACT256(array,7)] = INQUERY_FALSE; - } - } -#endif /* CHECK_FOR_OVERFLOW */ - debug(printf("48 %04X => %d\n",EXTRACT256(array,0),counts[EXTRACT256(array,0)])); - debug(printf("32 %04X => %d\n",EXTRACT256(array,1),counts[EXTRACT256(array,1)])); - debug(printf("16 %04X => %d\n",EXTRACT256(array,2),counts[EXTRACT256(array,2)])); - debug(printf("0 %04X => %d\n",EXTRACT256(array,3),counts[EXTRACT256(array,3)])); - debug(printf("48 %04X => %d\n",EXTRACT256(array,4),counts[EXTRACT256(array,4)])); - debug(printf("32 %04X => %d\n",EXTRACT256(array,5),counts[EXTRACT256(array,5)])); - debug(printf("16 %04X => %d\n",EXTRACT256(array,6),counts[EXTRACT256(array,6)])); - debug(printf("0 %04X => %d\n",EXTRACT256(array,7),counts[EXTRACT256(array,7)])); + _oligo = _mm256_srlv_epi32(_mm256_set1_epi32(high_rc),bigshift0to14); + _masked = _mm256_and_si256(_oligo, bigmask5); - return; -} -#endif + masked = EXTRACT256(_masked,0); + INCR_COUNT(counts[masked]); + debug(printf("16 %04X => %d\n",masked,counts[masked])); + masked = EXTRACT256(_masked,1); + INCR_COUNT(counts[masked]); + debug(printf("17 %04X => %d\n",masked,counts[masked])); -#if !defined(HAVE_AVX2) + masked = EXTRACT256(_masked,2); + INCR_COUNT(counts[masked]); + debug(printf("18 %04X => %d\n",masked,counts[masked])); -static int -store_5mers_rev (Chrpos_T chrpos, Chrpos_T *table, UINT4 *pointers, UINT4 *positions, Count_T *counts, - Genomecomp_T low_rc, Genomecomp_T high_rc, Genomecomp_T nextlow_rc) { - Genomecomp_T masked, oligo; -#ifdef INDIVIDUAL_SHIFTS -#elif defined(SIMD_MASK_THEN_STORE) - UINT4 _masked[4] __attribute__ ((aligned (16))); - __m128i _oligo; -#else - __m128i _oligo, _masked; -#endif + masked = EXTRACT256(_masked,3); + INCR_COUNT(counts[masked]); + debug(printf("19 %04X => %d\n",masked,counts[masked])); + masked = EXTRACT256(_masked,4); + INCR_COUNT(counts[masked]); + debug(printf("20 %04X => %d\n",masked,counts[masked])); -#ifdef INDIVIDUAL_SHIFTS - masked = low_rc & MASK5; /* 0 */ - if (counts[masked]) { - assert(pointers[masked] > positions[masked]); - debug(printf("Storing masked %u at %u\n",masked,chrpos)); - table[--pointers[masked]] = chrpos; - } + masked = EXTRACT256(_masked,5); + INCR_COUNT(counts[masked]); + debug(printf("21 %04X => %d\n",masked,counts[masked])); - masked = (low_rc >> 2) & MASK5; /* 1 */ - if (counts[masked]) { - assert(pointers[masked] > positions[masked]); - debug(printf("Storing masked %u at %u\n",masked,chrpos - 1)); - table[--pointers[masked]] = chrpos - 1; - } + masked = EXTRACT256(_masked,6); + INCR_COUNT(counts[masked]); + debug(printf("22 %04X => %d\n",masked,counts[masked])); - masked = (low_rc >> 4) & MASK5; /* 2 */ - if (counts[masked]) { - assert(pointers[masked] > positions[masked]); - debug(printf("Storing masked %u at %u\n",masked,chrpos - 2)); - table[--pointers[masked]] = chrpos - 2; - } + masked = EXTRACT256(_masked,7); + INCR_COUNT(counts[masked]); + debug(printf("23 %04X => %d\n",masked,counts[masked])); - masked = (low_rc >> 6) & MASK5; /* 3 */ - if (counts[masked]) { - assert(pointers[masked] > positions[masked]); - debug(printf("Storing masked %u at %u\n",masked,chrpos - 3)); - table[--pointers[masked]] = chrpos - 3; - } - masked = (low_rc >> 8) & MASK5; /* 4 */ - if (counts[masked]) { - assert(pointers[masked] > positions[masked]); - debug(printf("Storing masked %u at %u\n",masked,chrpos - 4)); - table[--pointers[masked]] = chrpos - 4; - } + _oligo = _mm256_srli_epi32(_oligo, 16); + _masked = _mm256_and_si256(_oligo, bigmask5); - masked = (low_rc >> 10) & MASK5; /* 5 */ - if (counts[masked]) { - assert(pointers[masked] > positions[masked]); - debug(printf("Storing masked %u at %u\n",masked,chrpos - 5)); - table[--pointers[masked]] = chrpos - 5; - } + masked = EXTRACT256(_masked,0); + INCR_COUNT(counts[masked]); + debug(printf("24 %04X => %d\n",masked,counts[masked])); - masked = (low_rc >> 12) & MASK5; /* 6 */ - if (counts[masked]) { - assert(pointers[masked] > positions[masked]); - debug(printf("Storing masked %u at %u\n",masked,chrpos - 6)); - table[--pointers[masked]] = chrpos - 6; - } + masked = EXTRACT256(_masked,1); + INCR_COUNT(counts[masked]); + debug(printf("25 %04X => %d\n",masked,counts[masked])); - masked = (low_rc >> 14) & MASK5; /* 7 */ - if (counts[masked]) { - assert(pointers[masked] > positions[masked]); - debug(printf("Storing masked %u at %u\n",masked,chrpos - 7)); - table[--pointers[masked]] = chrpos - 7; - } + masked = EXTRACT256(_masked,2); + INCR_COUNT(counts[masked]); + debug(printf("26 %04X => %d\n",masked,counts[masked])); - masked = (low_rc >> 16) & MASK5; /* 8 */ - if (counts[masked]) { - assert(pointers[masked] > positions[masked]); - debug(printf("Storing masked %u at %u\n",masked,chrpos - 8)); - table[--pointers[masked]] = chrpos - 8; - } + masked = EXTRACT256(_masked,3); + INCR_COUNT(counts[masked]); + debug(printf("27 %04X => %d\n",masked,counts[masked])); - masked = (low_rc >> 18) & MASK5; /* 9 */ - if (counts[masked]) { - assert(pointers[masked] > positions[masked]); - debug(printf("Storing masked %u at %u\n",masked,chrpos - 9)); - table[--pointers[masked]] = chrpos - 9; - } - masked = (low_rc >> 20) & MASK5; /* 10 */ - if (counts[masked]) { - assert(pointers[masked] > positions[masked]); - debug(printf("Storing masked %u at %u\n",masked,chrpos - 10)); - table[--pointers[masked]] = chrpos - 10; - } + oligo = high_rc >> 24; /* For 31..28 */ + oligo |= nextlow_rc << 8; - masked = low_rc >> 22; /* 11, No mask necessary */ - if (counts[masked]) { - assert(pointers[masked] > positions[masked]); - debug(printf("Storing masked %u at %u\n",masked,chrpos - 11)); - table[--pointers[masked]] = chrpos - 11; - } + _oligo = _mm256_srlv_epi32(_mm256_set1_epi32(oligo),bigshift0to14); + _masked = _mm256_and_si256(_oligo, bigmask5); -#else - _oligo = _mm_setr_epi32(low_rc, low_rc >> 2, low_rc >> 4, low_rc >> 6); -#ifdef SIMD_MASK_THEN_STORE - _mm_store_si128((__m128i *) _masked,_mm_and_si128(_oligo, mask5)); -#else - _masked = _mm_and_si128(_oligo, mask5); -#endif + masked = EXTRACT256(_masked,0); + INCR_COUNT(counts[masked]); + debug(printf("28 %04X => %d\n",masked,counts[masked])); - masked = EXTRACT(_masked,0); - if (counts[masked]) { - assert(pointers[masked] > positions[masked]); - debug(printf("Storing masked %u at %u\n",masked,chrpos)); - table[--pointers[masked]] = chrpos; - } + masked = EXTRACT256(_masked,1); + INCR_COUNT(counts[masked]); + debug(printf("29 %04X => %d\n",masked,counts[masked])); - masked = EXTRACT(_masked,1); - if (counts[masked]) { - assert(pointers[masked] > positions[masked]); - debug(printf("Storing masked %u at %u\n",masked,chrpos - 1)); - table[--pointers[masked]] = chrpos - 1; - } + masked = EXTRACT256(_masked,2); + INCR_COUNT(counts[masked]); + debug(printf("30 %04X => %d\n",masked,counts[masked])); - masked = EXTRACT(_masked,2); - if (counts[masked]) { - assert(pointers[masked] > positions[masked]); - debug(printf("Storing masked %u at %u\n",masked,chrpos - 2)); - table[--pointers[masked]] = chrpos - 2; - } + masked = EXTRACT256(_masked,3); + INCR_COUNT(counts[masked]); + debug(printf("31 %04X => %d\n",masked,counts[masked])); - masked = EXTRACT(_masked,3); - if (counts[masked]) { - assert(pointers[masked] > positions[masked]); - debug(printf("Storing masked %u at %u\n",masked,chrpos - 3)); - table[--pointers[masked]] = chrpos - 3; - } + return; +} +#endif /* HAVE_AVX2 */ - _oligo = _mm_srli_epi32(_oligo, 8); -#ifdef SIMD_MASK_THEN_STORE - _mm_store_si128((__m128i *) _masked,_mm_and_si128(_oligo, mask5)); -#else - _masked = _mm_and_si128(_oligo, mask5); -#endif - masked = EXTRACT(_masked,0); - if (counts[masked]) { - assert(pointers[masked] > positions[masked]); - debug(printf("Storing masked %u at %u\n",masked,chrpos - 4)); - table[--pointers[masked]] = chrpos - 4; - } - masked = EXTRACT(_masked,1); - if (counts[masked]) { - assert(pointers[masked] > positions[masked]); - debug(printf("Storing masked %u at %u\n",masked,chrpos - 5)); - table[--pointers[masked]] = chrpos - 5; - } +/* Expecting current to have {low0_rc, high0_rc, low1_rc, high1_rc}, + and next to have {high0_rc, low1_rc, high1_rc, nextlow_rc} */ +#ifdef HAVE_SSE2 +static void +extract_5mers_rev_simd_64 (__m128i *out, __m128i current, __m128i next) { + __m128i oligo; - masked = EXTRACT(_masked,2); - if (counts[masked]) { - assert(pointers[masked] > positions[masked]); - debug(printf("Storing masked %u at %u\n",masked,chrpos - 6)); - table[--pointers[masked]] = chrpos - 6; - } + oligo = _mm_or_si128( _mm_srli_epi32(current,24), _mm_slli_epi32(next,8)); + _mm_store_si128(out++, _mm_and_si128( _mm_srli_epi32(oligo,6), mask5)); + _mm_store_si128(out++, _mm_and_si128( _mm_srli_epi32(oligo,4), mask5)); + _mm_store_si128(out++, _mm_and_si128( _mm_srli_epi32(oligo,2), mask5)); + _mm_store_si128(out++, _mm_and_si128( oligo, mask5)); - masked = EXTRACT(_masked,3); - if (counts[masked]) { - assert(pointers[masked] > positions[masked]); - debug(printf("Storing masked %u at %u\n",masked,chrpos - 7)); - table[--pointers[masked]] = chrpos - 7; - } + _mm_store_si128(out++, _mm_srli_epi32(current,22)); /* No mask necessary */ + _mm_store_si128(out++, _mm_and_si128( _mm_srli_epi32(current,20), mask5)); + _mm_store_si128(out++, _mm_and_si128( _mm_srli_epi32(current,18), mask5)); + _mm_store_si128(out++, _mm_and_si128( _mm_srli_epi32(current,16), mask5)); + _mm_store_si128(out++, _mm_and_si128( _mm_srli_epi32(current,14), mask5)); + _mm_store_si128(out++, _mm_and_si128( _mm_srli_epi32(current,12), mask5)); + _mm_store_si128(out++, _mm_and_si128( _mm_srli_epi32(current,10), mask5)); + _mm_store_si128(out++, _mm_and_si128( _mm_srli_epi32(current,8), mask5)); + _mm_store_si128(out++, _mm_and_si128( _mm_srli_epi32(current,6), mask5)); + _mm_store_si128(out++, _mm_and_si128( _mm_srli_epi32(current,4), mask5)); + _mm_store_si128(out++, _mm_and_si128( _mm_srli_epi32(current,2), mask5)); + _mm_store_si128(out++, _mm_and_si128( current, mask5)); + return; +} + +#ifdef USE_UNORDERED_5 +static Chrpos_T +store_5mers_rev_simd_64 (Chrpos_T chrpos, Chrpos_T *table, UINT4 *positions, Count_T *counts, + __m128i current, __m128i next) { + __m128i array[16]; + + extract_5mers_rev_simd_64(array,current,next); + return store_fwdrev_simd_64(chrpos,table,positions,counts,(UINT4 *) array); +} - _oligo = _mm_srli_epi32(_oligo, 8); -#ifdef SIMD_MASK_THEN_STORE - _mm_store_si128((__m128i *) _masked,_mm_and_si128(_oligo, mask5)); #else - _masked = _mm_and_si128(_oligo, mask5); -#endif +/* Includes extract_5mers_rev_simd_64_ordered (__m128i *out, __m128i current, __m128i next) */ +static Chrpos_T +store_5mers_rev_simd_64 (Chrpos_T chrpos, Chrpos_T *table, UINT4 *positions, Count_T *counts, + __m128i current, __m128i next) { + __m128i array[16], *out; + __m128i oligo; + __m128i _t0, _t1, _t2, _t3, _t4, _t5, _t6, _t7; + __m128i _u0, _u1, _u2, _u3; - masked = EXTRACT(_masked,0); - if (counts[masked]) { - assert(pointers[masked] > positions[masked]); - debug(printf("Storing masked %u at %u\n",masked,chrpos - 8)); - table[--pointers[masked]] = chrpos - 8; - } + out = &(array[0]); - masked = EXTRACT(_masked,1); - if (counts[masked]) { - assert(pointers[masked] > positions[masked]); - debug(printf("Storing masked %u at %u\n",masked,chrpos - 9)); - table[--pointers[masked]] = chrpos - 9; - } + /* _row0 = _mm_and_si128( current, mask5); */ + /* _row1 = _mm_and_si128( _mm_srli_epi32(current,2), mask5); */ + _t0 = _mm_and_si128(_mm_blend_epi16(_mm_slli_epi32(current,14), current, 0x55), mask5_epi16); + + /* _row2 = _mm_and_si128( _mm_srli_epi32(current,4), mask5); */ + /* _row3 = _mm_and_si128( _mm_srli_epi32(current,6), mask5); */ + _t1 = _mm_and_si128(_mm_blend_epi16(_mm_slli_epi32(current,10), _mm_srli_epi32(current, 4), 0x55), mask5_epi16); + + /* _row4 = _mm_and_si128( _mm_srli_epi32(current,8), mask5); */ + /* _row5 = _mm_and_si128( _mm_srli_epi32(current,10), mask5); */ + _t2 = _mm_and_si128(_mm_blend_epi16(_mm_slli_epi32(current,6), _mm_srli_epi32(current, 8), 0x55), mask5_epi16); + + /* _row6 = _mm_and_si128( _mm_srli_epi32(current,12), mask5); */ + /* _row7 = _mm_and_si128( _mm_srli_epi32(current,14), mask5); */ + _t3 = _mm_and_si128(_mm_blend_epi16(_mm_slli_epi32(current,2), _mm_srli_epi32(current, 12), 0x55), mask5_epi16); + + /* _row8 = _mm_and_si128( _mm_srli_epi32(current,16), mask5); */ + /* _row9 = _mm_and_si128( _mm_srli_epi32(current,18), mask5); */ + _t4 = _mm_and_si128(_mm_blend_epi16(_mm_srli_epi32(current,2), _mm_srli_epi32(current, 16), 0x55), mask5_epi16); + + /* _row10 = _mm_and_si128( _mm_srli_epi32(current,20), mask5); */ + /* _row11 = _mm_srli_epi32(current,22); */ /* No mask necessary */ + _t5 = _mm_and_si128(_mm_blend_epi16(_mm_srli_epi32(current,6), _mm_srli_epi32(current, 20), 0x55), mask5_epi16); - masked = EXTRACT(_masked,2); - if (counts[masked]) { - assert(pointers[masked] > positions[masked]); - debug(printf("Storing masked %u at %u\n",masked,chrpos - 10)); - table[--pointers[masked]] = chrpos - 10; - } + oligo = _mm_or_si128( _mm_srli_epi32(current,24), _mm_slli_epi32(next,8)); + /* _row12 = _mm_and_si128( oligo, mask5); */ + /* _row13 = _mm_and_si128( _mm_srli_epi32(oligo,2), mask5); */ + _t6 = _mm_and_si128(_mm_blend_epi16(_mm_slli_epi32(oligo,14), oligo, 0x55), mask5_epi16); + + /* _row14 = _mm_and_si128( _mm_srli_epi32(oligo,4), mask5); */ + /* _row15 = _mm_and_si128( _mm_srli_epi32(oligo,6), mask5); */ + _t7 = _mm_and_si128(_mm_blend_epi16(_mm_slli_epi32(oligo,10), _mm_srli_epi32(oligo, 4), 0x55), mask5_epi16); + + + /* Split: top half */ + _u0 = _mm_unpackhi_epi32(_t0,_t1); + _u1 = _mm_unpackhi_epi32(_t2,_t3); + _u2 = _mm_unpackhi_epi32(_t4,_t5); + _u3 = _mm_unpackhi_epi32(_t6,_t7); + + _mm_store_si128(out++, _mm_cvtepu16_epi32(_mm_srli_si128(_u0,8))); + _mm_store_si128(out++, _mm_cvtepu16_epi32(_mm_srli_si128(_u1,8))); + _mm_store_si128(out++, _mm_cvtepu16_epi32(_mm_srli_si128(_u2,8))); + _mm_store_si128(out++, _mm_cvtepu16_epi32(_mm_srli_si128(_u3,8))); + _mm_store_si128(out++, _mm_cvtepu16_epi32(_u0)); + _mm_store_si128(out++, _mm_cvtepu16_epi32(_u1)); + _mm_store_si128(out++, _mm_cvtepu16_epi32(_u2)); + _mm_store_si128(out++, _mm_cvtepu16_epi32(_u3)); + + /* Split: bottom half */ + _u0 = _mm_unpacklo_epi32(_t0,_t1); + _u1 = _mm_unpacklo_epi32(_t2,_t3); + _u2 = _mm_unpacklo_epi32(_t4,_t5); + _u3 = _mm_unpacklo_epi32(_t6,_t7); + + _mm_store_si128(out++, _mm_cvtepu16_epi32(_mm_srli_si128(_u0,8))); + _mm_store_si128(out++, _mm_cvtepu16_epi32(_mm_srli_si128(_u1,8))); + _mm_store_si128(out++, _mm_cvtepu16_epi32(_mm_srli_si128(_u2,8))); + _mm_store_si128(out++, _mm_cvtepu16_epi32(_mm_srli_si128(_u3,8))); + _mm_store_si128(out++, _mm_cvtepu16_epi32(_u0)); + _mm_store_si128(out++, _mm_cvtepu16_epi32(_u1)); + _mm_store_si128(out++, _mm_cvtepu16_epi32(_u2)); + _mm_store_si128(out++, _mm_cvtepu16_epi32(_u3)); - masked = EXTRACT(_masked,3); - if (counts[masked]) { - assert(pointers[masked] > positions[masked]); - debug(printf("Storing masked %u at %u\n",masked,chrpos - 11)); - table[--pointers[masked]] = chrpos - 11; - } + return store_fwdrev_simd_64_ordered(chrpos,table,positions,counts,(UINT4 *) array); +} +#endif #endif +#ifdef HAVE_AVX2 +static void +extract_5mers_rev_simd_128 (__m256i *out, __m256i current, __m256i next) { + __m256i oligo; - oligo = low_rc >> 24; /* For 15..12 */ - oligo |= high_rc << 8; + oligo = _mm256_or_si256( _mm256_srli_epi32(current,24), _mm256_slli_epi32(next,8)); + _mm256_store_si256(out++, _mm256_and_si256( _mm256_srli_epi32(oligo,6), bigmask5)); + _mm256_store_si256(out++, _mm256_and_si256( _mm256_srli_epi32(oligo,4), bigmask5)); + _mm256_store_si256(out++, _mm256_and_si256( _mm256_srli_epi32(oligo,2), bigmask5)); + _mm256_store_si256(out++, _mm256_and_si256( oligo, bigmask5)); -#ifdef INDIVIDUAL_SHIFTS - masked = oligo & MASK5; /* 12 */ - if (counts[masked]) { - assert(pointers[masked] > positions[masked]); - debug(printf("Storing masked %u at %u\n",masked,chrpos - 12)); - table[--pointers[masked]] = chrpos - 12; - } + _mm256_store_si256(out++, _mm256_srli_epi32(current,22)); + _mm256_store_si256(out++, _mm256_and_si256( _mm256_srli_epi32(current,20), bigmask5)); + _mm256_store_si256(out++, _mm256_and_si256( _mm256_srli_epi32(current,18), bigmask5)); + _mm256_store_si256(out++, _mm256_and_si256( _mm256_srli_epi32(current,16), bigmask5)); + _mm256_store_si256(out++, _mm256_and_si256( _mm256_srli_epi32(current,14), bigmask5)); + _mm256_store_si256(out++, _mm256_and_si256( _mm256_srli_epi32(current,12), bigmask5)); + _mm256_store_si256(out++, _mm256_and_si256( _mm256_srli_epi32(current,10), bigmask5)); + _mm256_store_si256(out++, _mm256_and_si256( _mm256_srli_epi32(current,8), bigmask5)); + _mm256_store_si256(out++, _mm256_and_si256( _mm256_srli_epi32(current,6), bigmask5)); + _mm256_store_si256(out++, _mm256_and_si256( _mm256_srli_epi32(current,4), bigmask5)); + _mm256_store_si256(out++, _mm256_and_si256( _mm256_srli_epi32(current,2), bigmask5)); + _mm256_store_si256(out++, _mm256_and_si256( current, bigmask5)); + + return; +} + +#ifdef USE_UNORDERED_5 +static Chrpos_T +store_5mers_rev_simd_128 (Chrpos_T chrpos, Chrpos_T *table, UINT4 *positions, Count_T *counts, + __m256i current, __m256i next) { + __m256i array[16]; + + extract_5mers_rev_simd_128(array,current,next); + return store_fwdrev_simd_128(chrpos,table,positions,counts,(UINT4 *) array); +} + +#else +/* Includes extract_5mers_rev_simd_128_ordered (__m256i *out, __m256i current, __m256i next) */ +static Chrpos_T +store_5mers_rev_simd_128 (Chrpos_T chrpos, Chrpos_T *table, UINT4 *positions, Count_T *counts, + __m256i current, __m256i next) { + __m256i array[16], *out; + __m256i oligo; + __m256i _t0, _t1, _t2, _t3, _t4, _t5, _t6, _t7; + __m256i _u0, _u1, _u2, _u3, _u4, _u5, _u6, _u7; - masked = (oligo >> 2) & MASK5; /* 13 */ - if (counts[masked]) { - assert(pointers[masked] > positions[masked]); - debug(printf("Storing masked %u at %u\n",masked,chrpos - 13)); - table[--pointers[masked]] = chrpos - 13; - } + out = &(array[0]); - masked = (oligo >> 4) & MASK5; /* 14 */ - if (counts[masked]) { - assert(pointers[masked] > positions[masked]); - debug(printf("Storing masked %u at %u\n",masked,chrpos - 14)); - table[--pointers[masked]] = chrpos - 14; - } + /* _row0 = _mm256_and_si256( current, bigmask5); */ + /* _row1 = _mm256_and_si256( _mm256_srli_epi32(current,2), bigmask5); */ + _t0 = _mm256_and_si256(_mm256_blend_epi16(_mm256_slli_epi32(current,14), current, 0x55), bigmask5_epi16); + + /* _row2 = _mm256_and_si256( _mm256_srli_epi32(current,4), bigmask5); */ + /* _row3 = _mm256_and_si256( _mm256_srli_epi32(current,6), bigmask5) ; */ + _t1 = _mm256_and_si256(_mm256_blend_epi16(_mm256_slli_epi32(current,10), _mm256_srli_epi32(current,4), 0x55), bigmask5_epi16); + + /* _row4 = _mm256_and_si256( _mm256_srli_epi32(current,8), bigmask5); */ + /* _row5 = _mm256_and_si256( _mm256_srli_epi32(current,10), bigmask5); */ + _t2 = _mm256_and_si256(_mm256_blend_epi16(_mm256_slli_epi32(current,6), _mm256_srli_epi32(current,8), 0x55), bigmask5_epi16); + + /* _row6 = _mm256_and_si256( _mm256_srli_epi32(current,12), bigmask5); */ + /* _row7 = _mm256_and_si256( _mm256_srli_epi32(current,14), bigmask5); */ + _t3 = _mm256_and_si256(_mm256_blend_epi16(_mm256_slli_epi32(current,2), _mm256_srli_epi32(current,12), 0x55), bigmask5_epi16); + + /* _row8 = _mm256_and_si256( _mm256_srli_epi32(current,16), bigmask5); */ + /* _row9 = _mm256_and_si256( _mm256_srli_epi32(current,18), bigmask5); */ + _t4 = _mm256_and_si256(_mm256_blend_epi16(_mm256_srli_epi32(current,2), _mm256_srli_epi32(current,16), 0x55), bigmask5_epi16); + + /* _row10 = _mm256_and_si256( _mm256_srli_epi32(current,20), bigmask5); */ + /* _row11 = _mm256_srli_epi32(current,22); */ /* No mask necessary */ + _t5 = _mm256_and_si256(_mm256_blend_epi16(_mm256_srli_epi32(current,6), _mm256_srli_epi32(current,20), 0x55), bigmask5_epi16); - masked = (oligo >> 6) & MASK5; /* 15 */ - if (counts[masked]) { - assert(pointers[masked] > positions[masked]); - debug(printf("Storing masked %u at %u\n",masked,chrpos - 15)); - table[--pointers[masked]] = chrpos - 15; - } + + oligo = _mm256_or_si256( _mm256_srli_epi32(current,24), _mm256_slli_epi32(next,8)); + /* _row12 = _mm256_and_si256( oligo, bigmask5); */ + /* _row13 = _mm256_and_si256( _mm256_srli_epi32(oligo,2), bigmask5); */ + _t6 = _mm256_and_si256(_mm256_blend_epi16(_mm256_slli_epi32(oligo,14), oligo, 0x55), bigmask5_epi16); + + /* _row14 = _mm256_and_si256( _mm256_srli_epi32(oligo,4), bigmask5); */ + /* _row15 = _mm256_and_si256( _mm256_srli_epi32(oligo,6), bigmask5); */ + _t7 = _mm256_and_si256(_mm256_blend_epi16(_mm256_slli_epi32(oligo,10), _mm256_srli_epi32(oligo,4), 0x55), bigmask5_epi16); + + + _u0 = _mm256_unpackhi_epi32(_t0,_t1); + _u1 = _mm256_unpackhi_epi32(_t2,_t3); + _u2 = _mm256_unpackhi_epi32(_t4,_t5); + _u3 = _mm256_unpackhi_epi32(_t6,_t7); + _u4 = _mm256_unpacklo_epi32(_t0,_t1); + _u5 = _mm256_unpacklo_epi32(_t2,_t3); + _u6 = _mm256_unpacklo_epi32(_t4,_t5); + _u7 = _mm256_unpacklo_epi32(_t6,_t7); + + + _t0 = _mm256_unpackhi_epi64(_u0,_u1); + _t1 = _mm256_unpackhi_epi64(_u2,_u3); + _t2 = _mm256_unpacklo_epi64(_u0,_u1); + _t3 = _mm256_unpacklo_epi64(_u2,_u3); + _t4 = _mm256_unpackhi_epi64(_u4,_u5); + _t5 = _mm256_unpackhi_epi64(_u6,_u7); + _t6 = _mm256_unpacklo_epi64(_u4,_u5); + _t7 = _mm256_unpacklo_epi64(_u6,_u7); + + + _mm256_store_si256(out++, _mm256_cvtepu16_epi32(_mm256_extracti128_si256(_t0,1))); + _mm256_store_si256(out++, _mm256_cvtepu16_epi32(_mm256_extracti128_si256(_t1,1))); + _mm256_store_si256(out++, _mm256_cvtepu16_epi32(_mm256_extracti128_si256(_t2,1))); + _mm256_store_si256(out++, _mm256_cvtepu16_epi32(_mm256_extracti128_si256(_t3,1))); + _mm256_store_si256(out++, _mm256_cvtepu16_epi32(_mm256_extracti128_si256(_t4,1))); + _mm256_store_si256(out++, _mm256_cvtepu16_epi32(_mm256_extracti128_si256(_t5,1))); + _mm256_store_si256(out++, _mm256_cvtepu16_epi32(_mm256_extracti128_si256(_t6,1))); + _mm256_store_si256(out++, _mm256_cvtepu16_epi32(_mm256_extracti128_si256(_t7,1))); + _mm256_store_si256(out++, _mm256_cvtepu16_epi32(_mm256_extracti128_si256(_t0,0))); + _mm256_store_si256(out++, _mm256_cvtepu16_epi32(_mm256_extracti128_si256(_t1,0))); + _mm256_store_si256(out++, _mm256_cvtepu16_epi32(_mm256_extracti128_si256(_t2,0))); + _mm256_store_si256(out++, _mm256_cvtepu16_epi32(_mm256_extracti128_si256(_t3,0))); + _mm256_store_si256(out++, _mm256_cvtepu16_epi32(_mm256_extracti128_si256(_t4,0))); + _mm256_store_si256(out++, _mm256_cvtepu16_epi32(_mm256_extracti128_si256(_t5,0))); + _mm256_store_si256(out++, _mm256_cvtepu16_epi32(_mm256_extracti128_si256(_t6,0))); + _mm256_store_si256(out++, _mm256_cvtepu16_epi32(_mm256_extracti128_si256(_t7,0))); + + return store_fwdrev_simd_128_ordered(chrpos,table,positions,counts,(UINT4 *) array); +} +#endif +#endif + +#ifdef HAVE_AVX512 +static void +extract_5mers_rev_simd_256 (__m512i *out, __m512i current, __m512i next) { + __m512i oligo; + + oligo = _mm512_or_si512( _mm512_srli_epi32(current,24), _mm512_slli_epi32(next,8)); + _mm512_store_si512(out++, _mm512_and_si512( _mm512_srli_epi32(oligo,6), hugemask5)); + _mm512_store_si512(out++, _mm512_and_si512( _mm512_srli_epi32(oligo,4), hugemask5)); + _mm512_store_si512(out++, _mm512_and_si512( _mm512_srli_epi32(oligo,2), hugemask5)); + _mm512_store_si512(out++, _mm512_and_si512( oligo, hugemask5)); + + _mm512_store_si512(out++, _mm512_srli_epi32(current,22)); /* No mask necessary */ + _mm512_store_si512(out++, _mm512_and_si512( _mm512_srli_epi32(current,20), hugemask5)); + _mm512_store_si512(out++, _mm512_and_si512( _mm512_srli_epi32(current,18), hugemask5)); + _mm512_store_si512(out++, _mm512_and_si512( _mm512_srli_epi32(current,16), hugemask5)); + _mm512_store_si512(out++, _mm512_and_si512( _mm512_srli_epi32(current,14), hugemask5)); + _mm512_store_si512(out++, _mm512_and_si512( _mm512_srli_epi32(current,12), hugemask5)); + _mm512_store_si512(out++, _mm512_and_si512( _mm512_srli_epi32(current,10), hugemask5)); + _mm512_store_si512(out++, _mm512_and_si512( _mm512_srli_epi32(current,8), hugemask5)); + _mm512_store_si512(out++, _mm512_and_si512( _mm512_srli_epi32(current,6), hugemask5)); + _mm512_store_si512(out++, _mm512_and_si512( _mm512_srli_epi32(current,4), hugemask5)); + _mm512_store_si512(out++, _mm512_and_si512( _mm512_srli_epi32(current,2), hugemask5)); + _mm512_store_si512(out++, _mm512_and_si512( current, hugemask5)); + + return; +} + +#ifdef USE_UNORDERED_5 +static Chrpos_T +store_5mers_rev_simd_256 (Chrpos_T chrpos, Chrpos_T *table, UINT4 *positions, Count_T *counts, + __m512i current, __m512i next) { + __m512i array[16]; + + extract_5mers_rev_simd_256(array,current,next); + return store_fwdrev_simd_256(chrpos,table,positions,counts,(UINT4 *) array); +} #else - _oligo = _mm_setr_epi32(oligo, oligo >> 2, oligo >> 4, oligo >> 6); -#ifdef SIMD_MASK_THEN_STORE - _mm_store_si128((__m128i *) _masked,_mm_and_si128(_oligo, mask5)); -#else - _masked = _mm_and_si128(_oligo, mask5); -#endif +/* Includes extract_5mers_rev_simd_256_ordered (__m512i *out, __m512i current, __m512i next) */ +static Chrpos_T +store_5mers_rev_simd_256 (Chrpos_T chrpos, Chrpos_T *table, UINT4 *positions, Count_T *counts, + __m512i current, __m512i next) { + __m256i array[16], *out; + __m512i oligo, _shuffle0, _shuffle1, _shuffle2; + __m512i _t0, _t1, _t2, _t3, _t4, _t5, _t6, _t7; + __m512i _u0, _u1, _u2, _u3, _u4, _u5, _u6, _u7; + + out = &(array[0]); + + _u0 = _mm512_and_si512( current, hugemask5); + /* _row1 = _mm512_and_si512( _mm512_srli_epi32(current,2), hugemask5); */ + _u1 = _mm512_and_si512( _mm512_slli_epi32(current,14), highmask5); + _t0 = _mm512_or_si512(_u0, _u1); + + _u0 = _mm512_and_si512( _mm512_srli_epi32(current,4), hugemask5); + /* _row3 = _mm512_and_si512( _mm512_srli_epi32(current,6), hugemask5); */ + _u1 = _mm512_and_si512( _mm512_slli_epi32(current,10), highmask5); + _t1 = _mm512_or_si512(_u0, _u1); + + _u0 = _mm512_and_si512( _mm512_srli_epi32(current,8), hugemask5); + /* _row5 = _mm512_and_si512( _mm512_srli_epi32(current,10), hugemask5); */ + _u1 = _mm512_and_si512( _mm512_slli_epi32(current,6), highmask5); + _t2 = _mm512_or_si512(_u0, _u1); + + _u0 = _mm512_and_si512( _mm512_srli_epi32(current,12), hugemask5); + /* _row7 = _mm512_and_si512( _mm512_srli_epi32(current,14), hugemask5); */ + _u1 = _mm512_and_si512( _mm512_slli_epi32(current,2), highmask5); + _t3 = _mm512_or_si512(_u0, _u1); + + _u0 = _mm512_and_si512( _mm512_srli_epi32(current,16), hugemask5); + /* _row9 = _mm512_and_si512( _mm512_srli_epi32(current,18), hugemask5); */ + _u1 = _mm512_and_si512( _mm512_srli_epi32(current,2), highmask5); + _t4 = _mm512_or_si512(_u0, _u1); + + _u0 = _mm512_and_si512( _mm512_srli_epi32(current,20), hugemask5); + /* _row11 = _mm512_srli_epi32(current,22); */ /* No mask necessary */ + _u1 = _mm512_and_si512( _mm512_srli_epi32(current,6), highmask5); + _t5 = _mm512_or_si512(_u0, _u1); + + + oligo = _mm512_or_si512( _mm512_srli_epi32(current,24), _mm512_slli_epi32(next,8)); + _u0 = _mm512_and_si512( oligo, hugemask5); + /* _row13 = _mm512_and_si512( _mm512_srli_epi32(oligo,2), hugemask5); */ + _u1 = _mm512_and_si512( _mm512_slli_epi32(oligo,14), highmask5); + _t6 = _mm512_or_si512(_u0, _u1); + + _u0 = _mm512_and_si512( _mm512_srli_epi32(oligo,4), hugemask5); + /* _row15 = _mm512_and_si512( _mm512_srli_epi32(oligo,6), hugemask5); */ + _u1 = _mm512_and_si512( _mm512_slli_epi32(oligo,10), highmask5); + _t7 = _mm512_or_si512(_u0, _u1); + + + _u0 = _mm512_unpackhi_epi32(_t0,_t1); + _u1 = _mm512_unpackhi_epi32(_t2,_t3); + _u2 = _mm512_unpackhi_epi32(_t4,_t5); + _u3 = _mm512_unpackhi_epi32(_t6,_t7); + _u4 = _mm512_unpacklo_epi32(_t0,_t1); + _u5 = _mm512_unpacklo_epi32(_t2,_t3); + _u6 = _mm512_unpacklo_epi32(_t4,_t5); + _u7 = _mm512_unpacklo_epi32(_t6,_t7); + + + /* Split: top half */ + _shuffle0 = _mm512_setr_epi64(7, 8+7, 6, 8+6, 5, 8+5, 4, 8+4); + _t0 = _mm512_permutex2var_epi64(_u0, _shuffle0, _u1); + _t1 = _mm512_permutex2var_epi64(_u2, _shuffle0, _u3); + _t2 = _mm512_permutex2var_epi64(_u4, _shuffle0, _u5); + _t3 = _mm512_permutex2var_epi64(_u6, _shuffle0, _u7); + + + _shuffle1 = _mm512_setr_epi64(0, 1, 8+0, 8+1, 2, 3, 8+2, 8+3); + _t7 = _mm512_permutex2var_epi64(_t0, _shuffle1, _t1); + _mm512_store_si512(out++, _mm512_cvtepu16_epi32(_mm512_extracti64x4_epi64(_t7,0))); + _mm512_store_si512(out++, _mm512_cvtepu16_epi32(_mm512_extracti64x4_epi64(_t7,1))); + + _t7 = _mm512_permutex2var_epi64(_t2, _shuffle1, _t3); + _mm512_store_si512(out++, _mm512_cvtepu16_epi32(_mm512_extracti64x4_epi64(_t7,0))); + _mm512_store_si512(out++, _mm512_cvtepu16_epi32(_mm512_extracti64x4_epi64(_t7,1))); + + _shuffle2 = _mm512_setr_epi64(4, 5, 8+4, 8+5, 6, 7, 8+6, 8+7); + _t7 = _mm512_permutex2var_epi64(_t0, _shuffle2, _t1); + _mm512_store_si512(out++, _mm512_cvtepu16_epi32(_mm512_extracti64x4_epi64(_t7,0))); + _mm512_store_si512(out++, _mm512_cvtepu16_epi32(_mm512_extracti64x4_epi64(_t7,1))); + + _t7 = _mm512_permutex2var_epi64(_t2, _shuffle2, _t3); + _mm512_store_si512(out++, _mm512_cvtepu16_epi32(_mm512_extracti64x4_epi64(_t7,0))); + _mm512_store_si512(out++, _mm512_cvtepu16_epi32(_mm512_extracti64x4_epi64(_t7,1))); + + + /* Split: bottom half */ + _shuffle0 = _mm512_setr_epi64(3, 8+3, 2, 8+2, 1, 8+1, 0, 8+0); + _t0 = _mm512_permutex2var_epi64(_u0, _shuffle0, _u1); + _t1 = _mm512_permutex2var_epi64(_u2, _shuffle0, _u3); + _t2 = _mm512_permutex2var_epi64(_u4, _shuffle0, _u5); + _t3 = _mm512_permutex2var_epi64(_u6, _shuffle0, _u7); + + + /* _shuffle1 = _mm512_setr_epi64(0, 1, 8+0, 8+1, 2, 3, 8+2, 8+3); */ + _t7 = _mm512_permutex2var_epi64(_t0, _shuffle1, _t1); + _mm512_store_si512(out++, _mm512_cvtepu16_epi32(_mm512_extracti64x4_epi64(_t7,0))); + _mm512_store_si512(out++, _mm512_cvtepu16_epi32(_mm512_extracti64x4_epi64(_t7,1))); + + _t7 = _mm512_permutex2var_epi64(_t2, _shuffle1, _t3); + _mm512_store_si512(out++, _mm512_cvtepu16_epi32(_mm512_extracti64x4_epi64(_t7,0))); + _mm512_store_si512(out++, _mm512_cvtepu16_epi32(_mm512_extracti64x4_epi64(_t7,1))); + + /* _shuffle2 = _mm512_setr_epi64(4, 5, 8+4, 8+5, 6, 7, 8+6, 8+7); */ + _t7 = _mm512_permutex2var_epi64(_t0, _shuffle2, _t1); + _mm512_store_si512(out++, _mm512_cvtepu16_epi32(_mm512_extracti64x4_epi64(_t7,0))); + _mm512_store_si512(out++, _mm512_cvtepu16_epi32(_mm512_extracti64x4_epi64(_t7,1))); + + _t7 = _mm512_permutex2var_epi64(_t2, _shuffle2, _t3); + _mm512_store_si512(out++, _mm512_cvtepu16_epi32(_mm512_extracti64x4_epi64(_t7,0))); + _mm512_store_si512(out++, _mm512_cvtepu16_epi32(_mm512_extracti64x4_epi64(_t7,1))); - masked = EXTRACT(_masked,0); - if (counts[masked]) { - assert(pointers[masked] > positions[masked]); - debug(printf("Storing masked %u at %u\n",masked,chrpos - 12)); - table[--pointers[masked]] = chrpos - 12; - } + return store_fwdrev_simd_256_ordered(chrpos,table,positions,counts,(UINT4 *) array); +} +#endif +#endif - masked = EXTRACT(_masked,1); - if (counts[masked]) { - assert(pointers[masked] > positions[masked]); - debug(printf("Storing masked %u at %u\n",masked,chrpos - 13)); - table[--pointers[masked]] = chrpos - 13; - } - masked = EXTRACT(_masked,2); - if (counts[masked]) { - assert(pointers[masked] > positions[masked]); - debug(printf("Storing masked %u at %u\n",masked,chrpos - 14)); - table[--pointers[masked]] = chrpos - 14; - } +#if !defined(HAVE_AVX2) - masked = EXTRACT(_masked,3); - if (counts[masked]) { - assert(pointers[masked] > positions[masked]); - debug(printf("Storing masked %u at %u\n",masked,chrpos - 15)); - table[--pointers[masked]] = chrpos - 15; - } +static int +store_5mers_rev_32 (Chrpos_T chrpos, Chrpos_T *table, UINT4 *positions, Count_T *counts, + Genomecomp_T low_rc, Genomecomp_T high_rc, Genomecomp_T nextlow_rc) { + Genomecomp_T masked, oligo; +#ifdef INDIVIDUAL_SHIFTS +#elif defined(SIMD_MASK_THEN_STORE) + UINT4 _masked[4] __attribute__ ((aligned (16))); + __m128i _oligo; +#else + __m128i _oligo, _masked; #endif #ifdef INDIVIDUAL_SHIFTS - masked = high_rc & MASK5; /* 16 */ + masked = low_rc & MASK5; /* 0 */ if (counts[masked]) { - assert(pointers[masked] > positions[masked]); - debug(printf("Storing masked %u at %u\n",masked,chrpos - 16)); - table[--pointers[masked]] = chrpos - 16; + debug(printf("Storing masked %u at %u\n",masked,chrpos)); + table[positions[masked] + (--counts[masked])] = chrpos; } - masked = (high_rc >> 2) & MASK5; /* 17 */ + masked = (low_rc >> 2) & MASK5; /* 1 */ if (counts[masked]) { - assert(pointers[masked] > positions[masked]); - debug(printf("Storing masked %u at %u\n",masked,chrpos - 17)); - table[--pointers[masked]] = chrpos - 17; + debug(printf("Storing masked %u at %u\n",masked,chrpos - 1)); + table[positions[masked] + (--counts[masked])] = chrpos - 1; } - masked = (high_rc >> 4) & MASK5; /* 18 */ + masked = (low_rc >> 4) & MASK5; /* 2 */ if (counts[masked]) { - assert(pointers[masked] > positions[masked]); - debug(printf("Storing masked %u at %u\n",masked,chrpos - 18)); - table[--pointers[masked]] = chrpos - 18; + debug(printf("Storing masked %u at %u\n",masked,chrpos - 2)); + table[positions[masked] + (--counts[masked])] = chrpos - 2; } - masked = (high_rc >> 6) & MASK5; /* 19 */ + masked = (low_rc >> 6) & MASK5; /* 3 */ if (counts[masked]) { - assert(pointers[masked] > positions[masked]); - debug(printf("Storing masked %u at %u\n",masked,chrpos - 19)); - table[--pointers[masked]] = chrpos - 19; + debug(printf("Storing masked %u at %u\n",masked,chrpos - 3)); + table[positions[masked] + (--counts[masked])] = chrpos - 3; } - masked = (high_rc >> 8) & MASK5; /* 20 */ + masked = (low_rc >> 8) & MASK5; /* 4 */ if (counts[masked]) { - assert(pointers[masked] > positions[masked]); - debug(printf("Storing masked %u at %u\n",masked,chrpos - 20)); - table[--pointers[masked]] = chrpos - 20; + debug(printf("Storing masked %u at %u\n",masked,chrpos - 4)); + table[positions[masked] + (--counts[masked])] = chrpos - 4; } - masked = (high_rc >> 10) & MASK5; /* 21 */ + masked = (low_rc >> 10) & MASK5; /* 5 */ if (counts[masked]) { - assert(pointers[masked] > positions[masked]); - debug(printf("Storing masked %u at %u\n",masked,chrpos - 21)); - table[--pointers[masked]] = chrpos - 21; + debug(printf("Storing masked %u at %u\n",masked,chrpos - 5)); + table[positions[masked] + (--counts[masked])] = chrpos - 5; } - masked = (high_rc >> 12) & MASK5; /* 22 */ + masked = (low_rc >> 12) & MASK5; /* 6 */ if (counts[masked]) { - assert(pointers[masked] > positions[masked]); - debug(printf("Storing masked %u at %u\n",masked,chrpos - 22)); - table[--pointers[masked]] = chrpos - 22; + debug(printf("Storing masked %u at %u\n",masked,chrpos - 6)); + table[positions[masked] + (--counts[masked])] = chrpos - 6; } - masked = (high_rc >> 14) & MASK5; /* 23 */ + masked = (low_rc >> 14) & MASK5; /* 7 */ if (counts[masked]) { - assert(pointers[masked] > positions[masked]); - debug(printf("Storing masked %u at %u\n",masked,chrpos - 23)); - table[--pointers[masked]] = chrpos - 23; + debug(printf("Storing masked %u at %u\n",masked,chrpos - 7)); + table[positions[masked] + (--counts[masked])] = chrpos - 7; } - masked = (high_rc >> 16) & MASK5; /* 24 */ + masked = (low_rc >> 16) & MASK5; /* 8 */ if (counts[masked]) { - assert(pointers[masked] > positions[masked]); - debug(printf("Storing masked %u at %u\n",masked,chrpos - 24)); - table[--pointers[masked]] = chrpos - 24; + debug(printf("Storing masked %u at %u\n",masked,chrpos - 8)); + table[positions[masked] + (--counts[masked])] = chrpos - 8; } - masked = (high_rc >> 18) & MASK5; /* 25 */ + masked = (low_rc >> 18) & MASK5; /* 9 */ if (counts[masked]) { - assert(pointers[masked] > positions[masked]); - debug(printf("Storing masked %u at %u\n",masked,chrpos - 25)); - table[--pointers[masked]] = chrpos - 25; + debug(printf("Storing masked %u at %u\n",masked,chrpos - 9)); + table[positions[masked] + (--counts[masked])] = chrpos - 9; } - masked = (high_rc >> 20) & MASK5; /* 26 */ + masked = (low_rc >> 20) & MASK5; /* 10 */ if (counts[masked]) { - assert(pointers[masked] > positions[masked]); - debug(printf("Storing masked %u at %u\n",masked,chrpos - 26)); - table[--pointers[masked]] = chrpos - 26; + debug(printf("Storing masked %u at %u\n",masked,chrpos - 10)); + table[positions[masked] + (--counts[masked])] = chrpos - 10; } - masked = high_rc >> 22; /* 27, No mask necessary */ + masked = low_rc >> 22; /* 11, No mask necessary */ if (counts[masked]) { - assert(pointers[masked] > positions[masked]); - debug(printf("Storing masked %u at %u\n",masked,chrpos - 27)); - table[--pointers[masked]] = chrpos - 27; + debug(printf("Storing masked %u at %u\n",masked,chrpos - 11)); + table[positions[masked] + (--counts[masked])] = chrpos - 11; } #else - _oligo = _mm_setr_epi32(high_rc, high_rc >> 2, high_rc >> 4, high_rc >> 6); + _oligo = _mm_setr_epi32(low_rc, low_rc >> 2, low_rc >> 4, low_rc >> 6); #ifdef SIMD_MASK_THEN_STORE _mm_store_si128((__m128i *) _masked,_mm_and_si128(_oligo, mask5)); #else @@ -46016,30 +30046,26 @@ masked = EXTRACT(_masked,0); if (counts[masked]) { - assert(pointers[masked] > positions[masked]); - debug(printf("Storing masked %u at %u\n",masked,chrpos - 16)); - table[--pointers[masked]] = chrpos - 16; + debug(printf("Storing masked %u at %u\n",masked,chrpos)); + table[positions[masked] + (--counts[masked])] = chrpos; } masked = EXTRACT(_masked,1); if (counts[masked]) { - assert(pointers[masked] > positions[masked]); - debug(printf("Storing masked %u at %u\n",masked,chrpos - 17)); - table[--pointers[masked]] = chrpos - 17; + debug(printf("Storing masked %u at %u\n",masked,chrpos - 1)); + table[positions[masked] + (--counts[masked])] = chrpos - 1; } masked = EXTRACT(_masked,2); if (counts[masked]) { - assert(pointers[masked] > positions[masked]); - debug(printf("Storing masked %u at %u\n",masked,chrpos - 18)); - table[--pointers[masked]] = chrpos - 18; + debug(printf("Storing masked %u at %u\n",masked,chrpos - 2)); + table[positions[masked] + (--counts[masked])] = chrpos - 2; } masked = EXTRACT(_masked,3); if (counts[masked]) { - assert(pointers[masked] > positions[masked]); - debug(printf("Storing masked %u at %u\n",masked,chrpos - 19)); - table[--pointers[masked]] = chrpos - 19; + debug(printf("Storing masked %u at %u\n",masked,chrpos - 3)); + table[positions[masked] + (--counts[masked])] = chrpos - 3; } @@ -46052,30 +30078,26 @@ masked = EXTRACT(_masked,0); if (counts[masked]) { - assert(pointers[masked] > positions[masked]); - debug(printf("Storing masked %u at %u\n",masked,chrpos - 20)); - table[--pointers[masked]] = chrpos - 20; + debug(printf("Storing masked %u at %u\n",masked,chrpos - 4)); + table[positions[masked] + (--counts[masked])] = chrpos - 4; } masked = EXTRACT(_masked,1); if (counts[masked]) { - assert(pointers[masked] > positions[masked]); - debug(printf("Storing masked %u at %u\n",masked,chrpos - 21)); - table[--pointers[masked]] = chrpos - 21; + debug(printf("Storing masked %u at %u\n",masked,chrpos - 5)); + table[positions[masked] + (--counts[masked])] = chrpos - 5; } masked = EXTRACT(_masked,2); if (counts[masked]) { - assert(pointers[masked] > positions[masked]); - debug(printf("Storing masked %u at %u\n",masked,chrpos - 22)); - table[--pointers[masked]] = chrpos - 22; + debug(printf("Storing masked %u at %u\n",masked,chrpos - 6)); + table[positions[masked] + (--counts[masked])] = chrpos - 6; } masked = EXTRACT(_masked,3); if (counts[masked]) { - assert(pointers[masked] > positions[masked]); - debug(printf("Storing masked %u at %u\n",masked,chrpos - 23)); - table[--pointers[masked]] = chrpos - 23; + debug(printf("Storing masked %u at %u\n",masked,chrpos - 7)); + table[positions[masked] + (--counts[masked])] = chrpos - 7; } @@ -46088,64 +30110,56 @@ masked = EXTRACT(_masked,0); if (counts[masked]) { - assert(pointers[masked] > positions[masked]); - debug(printf("Storing masked %u at %u\n",masked,chrpos - 24)); - table[--pointers[masked]] = chrpos - 24; + debug(printf("Storing masked %u at %u\n",masked,chrpos - 8)); + table[positions[masked] + (--counts[masked])] = chrpos - 8; } masked = EXTRACT(_masked,1); if (counts[masked]) { - assert(pointers[masked] > positions[masked]); - debug(printf("Storing masked %u at %u\n",masked,chrpos - 25)); - table[--pointers[masked]] = chrpos - 25; + debug(printf("Storing masked %u at %u\n",masked,chrpos - 9)); + table[positions[masked] + (--counts[masked])] = chrpos - 9; } masked = EXTRACT(_masked,2); if (counts[masked]) { - assert(pointers[masked] > positions[masked]); - debug(printf("Storing masked %u at %u\n",masked,chrpos - 26)); - table[--pointers[masked]] = chrpos - 26; + debug(printf("Storing masked %u at %u\n",masked,chrpos - 10)); + table[positions[masked] + (--counts[masked])] = chrpos - 10; } masked = EXTRACT(_masked,3); if (counts[masked]) { - assert(pointers[masked] > positions[masked]); - debug(printf("Storing masked %u at %u\n",masked,chrpos - 27)); - table[--pointers[masked]] = chrpos - 27; + debug(printf("Storing masked %u at %u\n",masked,chrpos - 11)); + table[positions[masked] + (--counts[masked])] = chrpos - 11; } #endif - oligo = high_rc >> 24; /* For 31..28 */ - oligo |= nextlow_rc << 8; + oligo = low_rc >> 24; /* For 15..12 */ + oligo |= high_rc << 8; #ifdef INDIVIDUAL_SHIFTS - masked = oligo & MASK5; /* 28 */ + masked = oligo & MASK5; /* 12 */ if (counts[masked]) { - assert(pointers[masked] > positions[masked]); - debug(printf("Storing masked %u at %u\n",masked,chrpos - 28)); - table[--pointers[masked]] = chrpos - 28; + debug(printf("Storing masked %u at %u\n",masked,chrpos - 12)); + table[positions[masked] + (--counts[masked])] = chrpos - 12; } - masked = (oligo >> 2) & MASK5; /* 29 */ + masked = (oligo >> 2) & MASK5; /* 13 */ if (counts[masked]) { - assert(pointers[masked] > positions[masked]); - debug(printf("Storing masked %u at %u\n",masked,chrpos - 29)); - table[--pointers[masked]] = chrpos - 29; + debug(printf("Storing masked %u at %u\n",masked,chrpos - 13)); + table[positions[masked] + (--counts[masked])] = chrpos - 13; } - masked = (oligo >> 4) & MASK5; /* 30 */ + masked = (oligo >> 4) & MASK5; /* 14 */ if (counts[masked]) { - assert(pointers[masked] > positions[masked]); - debug(printf("Storing masked %u at %u\n",masked,chrpos - 30)); - table[--pointers[masked]] = chrpos - 30; + debug(printf("Storing masked %u at %u\n",masked,chrpos - 14)); + table[positions[masked] + (--counts[masked])] = chrpos - 14; } - masked = (oligo >> 6) & MASK5; /* 31 */ + masked = (oligo >> 6) & MASK5; /* 15 */ if (counts[masked]) { - assert(pointers[masked] > positions[masked]); - debug(printf("Storing masked %u at %u\n",masked,chrpos - 31)); - table[--pointers[masked]] = chrpos - 31; + debug(printf("Storing masked %u at %u\n",masked,chrpos - 15)); + table[positions[masked] + (--counts[masked])] = chrpos - 15; } #else @@ -46158,1520 +30172,880 @@ masked = EXTRACT(_masked,0); if (counts[masked]) { - assert(pointers[masked] > positions[masked]); - debug(printf("Storing masked %u at %u\n",masked,chrpos - 28)); - table[--pointers[masked]] = chrpos - 28; - } - - masked = EXTRACT(_masked,1); - if (counts[masked]) { - assert(pointers[masked] > positions[masked]); - debug(printf("Storing masked %u at %u\n",masked,chrpos - 29)); - table[--pointers[masked]] = chrpos - 29; - } - - masked = EXTRACT(_masked,2); - if (counts[masked]) { - assert(pointers[masked] > positions[masked]); - debug(printf("Storing masked %u at %u\n",masked,chrpos - 30)); - table[--pointers[masked]] = chrpos - 30; - } - - masked = EXTRACT(_masked,3); - if (counts[masked]) { - assert(pointers[masked] > positions[masked]); - debug(printf("Storing masked %u at %u\n",masked,chrpos - 31)); - table[--pointers[masked]] = chrpos - 31; - } -#endif - - return chrpos - 32; -} - -#else /* HAVE_AVX2 */ - -static int -store_5mers_rev (Chrpos_T chrpos, Chrpos_T *table, UINT4 *pointers, UINT4 *positions, Count_T *counts, - Genomecomp_T low_rc, Genomecomp_T high_rc, Genomecomp_T nextlow_rc) { - Genomecomp_T masked, oligo; - __m256i _oligo, _masked, _counts; - - - _oligo = _mm256_srlv_epi32(_mm256_set1_epi32(low_rc),shift0to14); - _masked = _mm256_and_si256(_oligo, bigmask5); - _counts = _mm256_i32gather_epi32(counts,_masked,/*scale*/4); - - if (EXTRACT256(_counts,0)) { - masked = EXTRACT256(_masked,0); - assert(pointers[masked] > positions[masked]); - debug(printf("Storing masked %u at %u\n",masked,chrpos)); - table[--pointers[masked]] = chrpos; - } - - if (EXTRACT256(_counts,1)) { - masked = EXTRACT256(_masked,1); - assert(pointers[masked] > positions[masked]); - debug(printf("Storing masked %u at %u\n",masked,chrpos - 1)); - table[--pointers[masked]] = chrpos - 1; - } - - if (EXTRACT256(_counts,2)) { - masked = EXTRACT256(_masked,2); - assert(pointers[masked] > positions[masked]); - debug(printf("Storing masked %u at %u\n",masked,chrpos - 2)); - table[--pointers[masked]] = chrpos - 2; - } - - if (EXTRACT256(_counts,3)) { - masked = EXTRACT256(_masked,3); - assert(pointers[masked] > positions[masked]); - debug(printf("Storing masked %u at %u\n",masked,chrpos - 3)); - table[--pointers[masked]] = chrpos - 3; - } - - if (EXTRACT256(_counts,4)) { - masked = EXTRACT256(_masked,4); - assert(pointers[masked] > positions[masked]); - debug(printf("Storing masked %u at %u\n",masked,chrpos - 4)); - table[--pointers[masked]] = chrpos - 4; - } - - if (EXTRACT256(_counts,5)) { - masked = EXTRACT256(_masked,5); - assert(pointers[masked] > positions[masked]); - debug(printf("Storing masked %u at %u\n",masked,chrpos - 5)); - table[--pointers[masked]] = chrpos - 5; - } - - if (EXTRACT256(_counts,6)) { - masked = EXTRACT256(_masked,6); - assert(pointers[masked] > positions[masked]); - debug(printf("Storing masked %u at %u\n",masked,chrpos - 6)); - table[--pointers[masked]] = chrpos - 6; - } - - if (EXTRACT256(_counts,7)) { - masked = EXTRACT256(_masked,7); - assert(pointers[masked] > positions[masked]); - debug(printf("Storing masked %u at %u\n",masked,chrpos - 7)); - table[--pointers[masked]] = chrpos - 7; - } - - - _oligo = _mm256_srli_epi32(_oligo, 16); - _masked = _mm256_and_si256(_oligo, bigmask5); - _counts = _mm256_i32gather_epi32(counts,_masked,/*scale*/4); - - if (EXTRACT256(_counts,0)) { - masked = EXTRACT256(_masked,0); - assert(pointers[masked] > positions[masked]); - debug(printf("Storing masked %u at %u\n",masked,chrpos - 8)); - table[--pointers[masked]] = chrpos - 8; - } - - if (EXTRACT256(_counts,1)) { - masked = EXTRACT256(_masked,1); - assert(pointers[masked] > positions[masked]); - debug(printf("Storing masked %u at %u\n",masked,chrpos - 9)); - table[--pointers[masked]] = chrpos - 9; - } - - if (EXTRACT256(_counts,2)) { - masked = EXTRACT256(_masked,2); - assert(pointers[masked] > positions[masked]); - debug(printf("Storing masked %u at %u\n",masked,chrpos - 10)); - table[--pointers[masked]] = chrpos - 10; - } - - if (EXTRACT256(_counts,3)) { - masked = EXTRACT256(_masked,3); - assert(pointers[masked] > positions[masked]); - debug(printf("Storing masked %u at %u\n",masked,chrpos - 11)); - table[--pointers[masked]] = chrpos - 11; - } - - - oligo = low_rc >> 24; /* For 15..12 */ - oligo |= high_rc << 8; - - _oligo = _mm256_srlv_epi32(_mm256_set1_epi32(oligo),shift0to14); - _masked = _mm256_and_si256(_oligo, bigmask5); - _counts = _mm256_i32gather_epi32(counts,_masked,/*scale*/4); - - if (EXTRACT256(_counts,0)) { - masked = EXTRACT256(_masked,0); - assert(pointers[masked] > positions[masked]); debug(printf("Storing masked %u at %u\n",masked,chrpos - 12)); - table[--pointers[masked]] = chrpos - 12; + table[positions[masked] + (--counts[masked])] = chrpos - 12; } - - if (EXTRACT256(_counts,1)) { - masked = EXTRACT256(_masked,1); - assert(pointers[masked] > positions[masked]); + + masked = EXTRACT(_masked,1); + if (counts[masked]) { debug(printf("Storing masked %u at %u\n",masked,chrpos - 13)); - table[--pointers[masked]] = chrpos - 13; + table[positions[masked] + (--counts[masked])] = chrpos - 13; } - if (EXTRACT256(_counts,2)) { - masked = EXTRACT256(_masked,2); - assert(pointers[masked] > positions[masked]); + masked = EXTRACT(_masked,2); + if (counts[masked]) { debug(printf("Storing masked %u at %u\n",masked,chrpos - 14)); - table[--pointers[masked]] = chrpos - 14; + table[positions[masked] + (--counts[masked])] = chrpos - 14; } - if (EXTRACT256(_counts,3)) { - masked = EXTRACT256(_masked,3); - assert(pointers[masked] > positions[masked]); + masked = EXTRACT(_masked,3); + if (counts[masked]) { debug(printf("Storing masked %u at %u\n",masked,chrpos - 15)); - table[--pointers[masked]] = chrpos - 15; + table[positions[masked] + (--counts[masked])] = chrpos - 15; } +#endif - _oligo = _mm256_srlv_epi32(_mm256_set1_epi32(high_rc),shift0to14); - _masked = _mm256_and_si256(_oligo, bigmask5); - _counts = _mm256_i32gather_epi32(counts,_masked,/*scale*/4); - - if (EXTRACT256(_counts,0)) { - masked = EXTRACT256(_masked,0); - assert(pointers[masked] > positions[masked]); +#ifdef INDIVIDUAL_SHIFTS + masked = high_rc & MASK5; /* 16 */ + if (counts[masked]) { debug(printf("Storing masked %u at %u\n",masked,chrpos - 16)); - table[--pointers[masked]] = chrpos - 16; + table[positions[masked] + (--counts[masked])] = chrpos - 16; } - if (EXTRACT256(_counts,1)) { - masked = EXTRACT256(_masked,1); - assert(pointers[masked] > positions[masked]); + masked = (high_rc >> 2) & MASK5; /* 17 */ + if (counts[masked]) { debug(printf("Storing masked %u at %u\n",masked,chrpos - 17)); - table[--pointers[masked]] = chrpos - 17; + table[positions[masked] + (--counts[masked])] = chrpos - 17; } - if (EXTRACT256(_counts,2)) { - masked = EXTRACT256(_masked,2); - assert(pointers[masked] > positions[masked]); + masked = (high_rc >> 4) & MASK5; /* 18 */ + if (counts[masked]) { debug(printf("Storing masked %u at %u\n",masked,chrpos - 18)); - table[--pointers[masked]] = chrpos - 18; + table[positions[masked] + (--counts[masked])] = chrpos - 18; } - if (EXTRACT256(_counts,3)) { - masked = EXTRACT256(_masked,3); - assert(pointers[masked] > positions[masked]); + masked = (high_rc >> 6) & MASK5; /* 19 */ + if (counts[masked]) { debug(printf("Storing masked %u at %u\n",masked,chrpos - 19)); - table[--pointers[masked]] = chrpos - 19; + table[positions[masked] + (--counts[masked])] = chrpos - 19; } - if (EXTRACT256(_counts,4)) { - masked = EXTRACT256(_masked,4); - assert(pointers[masked] > positions[masked]); + masked = (high_rc >> 8) & MASK5; /* 20 */ + if (counts[masked]) { debug(printf("Storing masked %u at %u\n",masked,chrpos - 20)); - table[--pointers[masked]] = chrpos - 20; + table[positions[masked] + (--counts[masked])] = chrpos - 20; } - if (EXTRACT256(_counts,5)) { - masked = EXTRACT256(_masked,5); - assert(pointers[masked] > positions[masked]); + masked = (high_rc >> 10) & MASK5; /* 21 */ + if (counts[masked]) { debug(printf("Storing masked %u at %u\n",masked,chrpos - 21)); - table[--pointers[masked]] = chrpos - 21; + table[positions[masked] + (--counts[masked])] = chrpos - 21; } - if (EXTRACT256(_counts,6)) { - masked = EXTRACT256(_masked,6); - assert(pointers[masked] > positions[masked]); + masked = (high_rc >> 12) & MASK5; /* 22 */ + if (counts[masked]) { debug(printf("Storing masked %u at %u\n",masked,chrpos - 22)); - table[--pointers[masked]] = chrpos - 22; + table[positions[masked] + (--counts[masked])] = chrpos - 22; } - if (EXTRACT256(_counts,7)) { - masked = EXTRACT256(_masked,7); - assert(pointers[masked] > positions[masked]); + masked = (high_rc >> 14) & MASK5; /* 23 */ + if (counts[masked]) { debug(printf("Storing masked %u at %u\n",masked,chrpos - 23)); - table[--pointers[masked]] = chrpos - 23; + table[positions[masked] + (--counts[masked])] = chrpos - 23; } - - _oligo = _mm256_srli_epi32(_oligo, 16); - _masked = _mm256_and_si256(_oligo, bigmask5); - _counts = _mm256_i32gather_epi32(counts,_masked,/*scale*/4); - - if (EXTRACT256(_counts,0)) { - masked = EXTRACT256(_masked,0); - assert(pointers[masked] > positions[masked]); + masked = (high_rc >> 16) & MASK5; /* 24 */ + if (counts[masked]) { debug(printf("Storing masked %u at %u\n",masked,chrpos - 24)); - table[--pointers[masked]] = chrpos - 24; + table[positions[masked] + (--counts[masked])] = chrpos - 24; } - if (EXTRACT256(_counts,1)) { - masked = EXTRACT256(_masked,1); - assert(pointers[masked] > positions[masked]); + masked = (high_rc >> 18) & MASK5; /* 25 */ + if (counts[masked]) { debug(printf("Storing masked %u at %u\n",masked,chrpos - 25)); - table[--pointers[masked]] = chrpos - 25; + table[positions[masked] + (--counts[masked])] = chrpos - 25; } - if (EXTRACT256(_counts,2)) { - masked = EXTRACT256(_masked,2); - assert(pointers[masked] > positions[masked]); + masked = (high_rc >> 20) & MASK5; /* 26 */ + if (counts[masked]) { debug(printf("Storing masked %u at %u\n",masked,chrpos - 26)); - table[--pointers[masked]] = chrpos - 26; + table[positions[masked] + (--counts[masked])] = chrpos - 26; } - if (EXTRACT256(_counts,3)) { - masked = EXTRACT256(_masked,3); - assert(pointers[masked] > positions[masked]); + masked = high_rc >> 22; /* 27, No mask necessary */ + if (counts[masked]) { debug(printf("Storing masked %u at %u\n",masked,chrpos - 27)); - table[--pointers[masked]] = chrpos - 27; - } - - - oligo = high_rc >> 24; /* For 31..28 */ - oligo |= nextlow_rc << 8; - - _oligo = _mm256_srlv_epi32(_mm256_set1_epi32(oligo),shift0to14); - _masked = _mm256_and_si256(_oligo, bigmask5); - _counts = _mm256_i32gather_epi32(counts,_masked,/*scale*/4); - - if (EXTRACT256(_counts,0)) { - masked = EXTRACT256(_masked,0); - assert(pointers[masked] > positions[masked]); - debug(printf("Storing masked %u at %u\n",masked,chrpos - 28)); - table[--pointers[masked]] = chrpos - 28; - } - - if (EXTRACT256(_counts,1)) { - masked = EXTRACT256(_masked,1); - assert(pointers[masked] > positions[masked]); - debug(printf("Storing masked %u at %u\n",masked,chrpos - 29)); - table[--pointers[masked]] = chrpos - 29; - } - - if (EXTRACT256(_counts,2)) { - masked = EXTRACT256(_masked,2); - assert(pointers[masked] > positions[masked]); - debug(printf("Storing masked %u at %u\n",masked,chrpos - 30)); - table[--pointers[masked]] = chrpos - 30; - } - - if (EXTRACT256(_counts,3)) { - masked = EXTRACT256(_masked,3); - assert(pointers[masked] > positions[masked]); - debug(printf("Storing masked %u at %u\n",masked,chrpos - 31)); - table[--pointers[masked]] = chrpos - 31; - } - - return chrpos - 32; -} - -#endif /* HAVE_AVX2 */ - - - -#ifndef USE_SIMD_FOR_COUNTS -static void -count_positions_rev_std (Count_T *counts, Inquery_T *inquery, int indexsize, Univcoord_T left, Univcoord_T left_plus_length, - int genestrand) { - int startdiscard, enddiscard; - Genomecomp_T ptr, startptr, endptr, low_rc, high_rc, nextlow_rc, - low, high, nextlow; - - debug(printf("Starting count_positions_rev_std\n")); - - - if (left_plus_length < (Univcoord_T) indexsize) { - left_plus_length = 0; - } else { - left_plus_length -= indexsize; + table[positions[masked] + (--counts[masked])] = chrpos - 27; } - ptr = startptr = left/32U*3; - endptr = left_plus_length/32U*3; - startdiscard = left % 32; /* (left+pos5) % 32 */ - enddiscard = left_plus_length % 32; /* (left+pos3) % 32 */ - - if (left_plus_length <= left) { - /* Skip */ - - } else if (startptr == endptr) { -#ifdef WORDS_BIGENDIAN - high = Bigendian_convert_uint(ref_blocks[ptr]); - low = Bigendian_convert_uint(ref_blocks[ptr+1]); - nextlow = Bigendian_convert_uint(ref_blocks[ptr+4]); #else - high = ref_blocks[ptr]; - low = ref_blocks[ptr+1]; - nextlow = ref_blocks[ptr+4]; -#endif - if (mode == CMET_STRANDED) { - high = Cmet_reduce_ga(high); low = Cmet_reduce_ga(low); nextlow = Cmet_reduce_ga(nextlow); - } else if (mode == CMET_NONSTRANDED) { - if (genestrand > 0) { - high = Cmet_reduce_ct(high); low = Cmet_reduce_ct(low); nextlow = Cmet_reduce_ct(nextlow); - } else { - high = Cmet_reduce_ga(high); low = Cmet_reduce_ga(low); nextlow = Cmet_reduce_ga(nextlow); - } - } - - low_rc = ~low; - high_rc = ~high; - nextlow_rc = ~nextlow; - - if (indexsize == 9) { - count_9mers_rev_partial(counts,inquery,low_rc,high_rc,nextlow_rc,startdiscard,enddiscard); - } else if (indexsize == 8) { - count_8mers_rev_partial(counts,inquery,low_rc,high_rc,nextlow_rc,startdiscard,enddiscard); - } else if (indexsize == 7) { - count_7mers_rev_partial(counts,inquery,low_rc,high_rc,nextlow_rc,startdiscard,enddiscard); - } else if (indexsize == 6) { - count_6mers_rev_partial(counts,inquery,low_rc,high_rc,nextlow_rc,startdiscard,enddiscard); - } else if (indexsize == 5) { - count_5mers_rev_partial(counts,inquery,low_rc,high_rc,nextlow_rc,startdiscard,enddiscard); - } else { - fprintf(stderr,"indexsize %d not supported\n",indexsize); - abort(); - } - - } else { - /* Genome_print_blocks(ref_blocks,left,left+16); */ - - /* Start block */ -#ifdef WORDS_BIGENDIAN - high = Bigendian_convert_uint(ref_blocks[ptr]); - low = Bigendian_convert_uint(ref_blocks[ptr+1]); - nextlow = Bigendian_convert_uint(ref_blocks[ptr+4]); + _oligo = _mm_setr_epi32(high_rc, high_rc >> 2, high_rc >> 4, high_rc >> 6); +#ifdef SIMD_MASK_THEN_STORE + _mm_store_si128((__m128i *) _masked,_mm_and_si128(_oligo, mask5)); #else - high = ref_blocks[ptr]; - low = ref_blocks[ptr+1]; - nextlow = ref_blocks[ptr+4]; -#endif - if (mode == CMET_STRANDED) { - high = Cmet_reduce_ga(high); low = Cmet_reduce_ga(low); nextlow = Cmet_reduce_ga(nextlow); - } else if (mode == CMET_NONSTRANDED) { - if (genestrand > 0) { - high = Cmet_reduce_ct(high); low = Cmet_reduce_ct(low); nextlow = Cmet_reduce_ct(nextlow); - } else { - high = Cmet_reduce_ga(high); low = Cmet_reduce_ga(low); nextlow = Cmet_reduce_ga(nextlow); - } - } - - low_rc = ~low; - high_rc = ~high; - nextlow_rc = ~nextlow; - - if (indexsize == 9) { - count_9mers_rev_partial(counts,inquery,low_rc,high_rc,nextlow_rc,startdiscard,/*enddiscard*/31); - } else if (indexsize == 8) { - count_8mers_rev_partial(counts,inquery,low_rc,high_rc,nextlow_rc,startdiscard,/*enddiscard*/31); - } else if (indexsize == 7) { - count_7mers_rev_partial(counts,inquery,low_rc,high_rc,nextlow_rc,startdiscard,/*enddiscard*/31); - } else if (indexsize == 6) { - count_6mers_rev_partial(counts,inquery,low_rc,high_rc,nextlow_rc,startdiscard,/*enddiscard*/31); - } else if (indexsize == 5) { - count_5mers_rev_partial(counts,inquery,low_rc,high_rc,nextlow_rc,startdiscard,/*enddiscard*/31); - } else { - fprintf(stderr,"indexsize %d not supported\n",indexsize); - abort(); - } - - ptr += 3; - - /* Middle blocks */ - if (indexsize == 9) { - while (ptr + 3 <= endptr) { -#ifdef WORDS_BIGENDIAN - high = Bigendian_convert_uint(ref_blocks[ptr]); - low = Bigendian_convert_uint(ref_blocks[ptr+1]); - nextlow = Bigendian_convert_uint(ref_blocks[ptr+4]); -#else - high = ref_blocks[ptr]; - low = ref_blocks[ptr+1]; - nextlow = ref_blocks[ptr+4]; -#endif - if (mode == CMET_STRANDED) { - high = Cmet_reduce_ga(high); low = Cmet_reduce_ga(low); nextlow = Cmet_reduce_ga(nextlow); - } else if (mode == CMET_NONSTRANDED) { - if (genestrand > 0) { - high = Cmet_reduce_ct(high); low = Cmet_reduce_ct(low); nextlow = Cmet_reduce_ct(nextlow); - } else { - high = Cmet_reduce_ga(high); low = Cmet_reduce_ga(low); nextlow = Cmet_reduce_ga(nextlow); - } - } - - low_rc = ~low; - high_rc = ~high; - nextlow_rc = ~nextlow; - - count_9mers_rev(counts,inquery,low_rc,high_rc,nextlow_rc); - ptr += 3; - } - - } else if (indexsize == 8) { - while (ptr + 3 <= endptr) { -#ifdef WORDS_BIGENDIAN - high = Bigendian_convert_uint(ref_blocks[ptr]); - low = Bigendian_convert_uint(ref_blocks[ptr+1]); - nextlow = Bigendian_convert_uint(ref_blocks[ptr+4]); -#else - high = ref_blocks[ptr]; - low = ref_blocks[ptr+1]; - nextlow = ref_blocks[ptr+4]; -#endif - if (mode == CMET_STRANDED) { - high = Cmet_reduce_ga(high); low = Cmet_reduce_ga(low); nextlow = Cmet_reduce_ga(nextlow); - } else if (mode == CMET_NONSTRANDED) { - if (genestrand > 0) { - high = Cmet_reduce_ct(high); low = Cmet_reduce_ct(low); nextlow = Cmet_reduce_ct(nextlow); - } else { - high = Cmet_reduce_ga(high); low = Cmet_reduce_ga(low); nextlow = Cmet_reduce_ga(nextlow); - } - } - - low_rc = ~low; - high_rc = ~high; - nextlow_rc = ~nextlow; - - count_8mers_rev(counts,inquery,low_rc,high_rc,nextlow_rc); - ptr += 3; - } - - } else if (indexsize == 7) { - while (ptr + 3 <= endptr) { -#ifdef WORDS_BIGENDIAN - high = Bigendian_convert_uint(ref_blocks[ptr]); - low = Bigendian_convert_uint(ref_blocks[ptr+1]); - nextlow = Bigendian_convert_uint(ref_blocks[ptr+4]); -#else - high = ref_blocks[ptr]; - low = ref_blocks[ptr+1]; - nextlow = ref_blocks[ptr+4]; + _masked = _mm_and_si128(_oligo, mask5); #endif - if (mode == CMET_STRANDED) { - high = Cmet_reduce_ga(high); low = Cmet_reduce_ga(low); nextlow = Cmet_reduce_ga(nextlow); - } else if (mode == CMET_NONSTRANDED) { - if (genestrand > 0) { - high = Cmet_reduce_ct(high); low = Cmet_reduce_ct(low); nextlow = Cmet_reduce_ct(nextlow); - } else { - high = Cmet_reduce_ga(high); low = Cmet_reduce_ga(low); nextlow = Cmet_reduce_ga(nextlow); - } - } - low_rc = ~low; - high_rc = ~high; - nextlow_rc = ~nextlow; + masked = EXTRACT(_masked,0); + if (counts[masked]) { + debug(printf("Storing masked %u at %u\n",masked,chrpos - 16)); + table[positions[masked] + (--counts[masked])] = chrpos - 16; + } - count_7mers_rev(counts,inquery,low_rc,high_rc,nextlow_rc); - ptr += 3; - } + masked = EXTRACT(_masked,1); + if (counts[masked]) { + debug(printf("Storing masked %u at %u\n",masked,chrpos - 17)); + table[positions[masked] + (--counts[masked])] = chrpos - 17; + } - } else if (indexsize == 6) { - while (ptr + 3 <= endptr) { -#ifdef WORDS_BIGENDIAN - high = Bigendian_convert_uint(ref_blocks[ptr]); - low = Bigendian_convert_uint(ref_blocks[ptr+1]); - nextlow = Bigendian_convert_uint(ref_blocks[ptr+4]); -#else - high = ref_blocks[ptr]; - low = ref_blocks[ptr+1]; - nextlow = ref_blocks[ptr+4]; -#endif - if (mode == CMET_STRANDED) { - high = Cmet_reduce_ga(high); low = Cmet_reduce_ga(low); nextlow = Cmet_reduce_ga(nextlow); - } else if (mode == CMET_NONSTRANDED) { - if (genestrand > 0) { - high = Cmet_reduce_ct(high); low = Cmet_reduce_ct(low); nextlow = Cmet_reduce_ct(nextlow); - } else { - high = Cmet_reduce_ga(high); low = Cmet_reduce_ga(low); nextlow = Cmet_reduce_ga(nextlow); - } - } + masked = EXTRACT(_masked,2); + if (counts[masked]) { + debug(printf("Storing masked %u at %u\n",masked,chrpos - 18)); + table[positions[masked] + (--counts[masked])] = chrpos - 18; + } - low_rc = ~low; - high_rc = ~high; - nextlow_rc = ~nextlow; + masked = EXTRACT(_masked,3); + if (counts[masked]) { + debug(printf("Storing masked %u at %u\n",masked,chrpos - 19)); + table[positions[masked] + (--counts[masked])] = chrpos - 19; + } - count_6mers_rev(counts,inquery,low_rc,high_rc,nextlow_rc); - ptr += 3; - } - } else if (indexsize == 5) { - while (ptr + 3 <= endptr) { -#ifdef WORDS_BIGENDIAN - high = Bigendian_convert_uint(ref_blocks[ptr]); - low = Bigendian_convert_uint(ref_blocks[ptr+1]); - nextlow = Bigendian_convert_uint(ref_blocks[ptr+4]); -#else - high = ref_blocks[ptr]; - low = ref_blocks[ptr+1]; - nextlow = ref_blocks[ptr+4]; + _oligo = _mm_srli_epi32(_oligo, 8); +#ifdef SIMD_MASK_THEN_STORE + _mm_store_si128((__m128i *) _masked,_mm_and_si128(_oligo, mask5)); +#else + _masked = _mm_and_si128(_oligo, mask5); #endif - if (mode == CMET_STRANDED) { - high = Cmet_reduce_ga(high); low = Cmet_reduce_ga(low); nextlow = Cmet_reduce_ga(nextlow); - } else if (mode == CMET_NONSTRANDED) { - if (genestrand > 0) { - high = Cmet_reduce_ct(high); low = Cmet_reduce_ct(low); nextlow = Cmet_reduce_ct(nextlow); - } else { - high = Cmet_reduce_ga(high); low = Cmet_reduce_ga(low); nextlow = Cmet_reduce_ga(nextlow); - } - } - low_rc = ~low; - high_rc = ~high; - nextlow_rc = ~nextlow; + masked = EXTRACT(_masked,0); + if (counts[masked]) { + debug(printf("Storing masked %u at %u\n",masked,chrpos - 20)); + table[positions[masked] + (--counts[masked])] = chrpos - 20; + } - count_5mers_rev(counts,inquery,low_rc,high_rc,nextlow_rc); - ptr += 3; - } - } else { - abort(); - } + masked = EXTRACT(_masked,1); + if (counts[masked]) { + debug(printf("Storing masked %u at %u\n",masked,chrpos - 21)); + table[positions[masked] + (--counts[masked])] = chrpos - 21; + } + masked = EXTRACT(_masked,2); + if (counts[masked]) { + debug(printf("Storing masked %u at %u\n",masked,chrpos - 22)); + table[positions[masked] + (--counts[masked])] = chrpos - 22; + } - /* End block */ - assert(ptr == endptr); + masked = EXTRACT(_masked,3); + if (counts[masked]) { + debug(printf("Storing masked %u at %u\n",masked,chrpos - 23)); + table[positions[masked] + (--counts[masked])] = chrpos - 23; + } -#ifdef WORDS_BIGENDIAN - high = Bigendian_convert_uint(ref_blocks[ptr]); - low = Bigendian_convert_uint(ref_blocks[ptr+1]); - nextlow = Bigendian_convert_uint(ref_blocks[ptr+4]); + + _oligo = _mm_srli_epi32(_oligo, 8); +#ifdef SIMD_MASK_THEN_STORE + _mm_store_si128((__m128i *) _masked,_mm_and_si128(_oligo, mask5)); #else - high = ref_blocks[ptr]; - low = ref_blocks[ptr+1]; - nextlow = ref_blocks[ptr+4]; + _masked = _mm_and_si128(_oligo, mask5); #endif - if (mode == CMET_STRANDED) { - high = Cmet_reduce_ga(high); low = Cmet_reduce_ga(low); nextlow = Cmet_reduce_ga(nextlow); - } else if (mode == CMET_NONSTRANDED) { - if (genestrand > 0) { - high = Cmet_reduce_ct(high); low = Cmet_reduce_ct(low); nextlow = Cmet_reduce_ct(nextlow); - } else { - high = Cmet_reduce_ga(high); low = Cmet_reduce_ga(low); nextlow = Cmet_reduce_ga(nextlow); - } - } - low_rc = ~low; - high_rc = ~high; - nextlow_rc = ~nextlow; + masked = EXTRACT(_masked,0); + if (counts[masked]) { + debug(printf("Storing masked %u at %u\n",masked,chrpos - 24)); + table[positions[masked] + (--counts[masked])] = chrpos - 24; + } - if (indexsize == 9) { - count_9mers_rev_partial(counts,inquery,low_rc,high_rc,nextlow_rc,/*startdiscard*/0,enddiscard); - } else if (indexsize == 8) { - count_8mers_rev_partial(counts,inquery,low_rc,high_rc,nextlow_rc,/*startdiscard*/0,enddiscard); - } else if (indexsize == 7) { - count_7mers_rev_partial(counts,inquery,low_rc,high_rc,nextlow_rc,/*startdiscard*/0,enddiscard); - } else if (indexsize == 6) { - count_6mers_rev_partial(counts,inquery,low_rc,high_rc,nextlow_rc,/*startdiscard*/0,enddiscard); - } else if (indexsize == 5) { - count_5mers_rev_partial(counts,inquery,low_rc,high_rc,nextlow_rc,/*startdiscard*/0,enddiscard); - } else { - abort(); - } + masked = EXTRACT(_masked,1); + if (counts[masked]) { + debug(printf("Storing masked %u at %u\n",masked,chrpos - 25)); + table[positions[masked] + (--counts[masked])] = chrpos - 25; + } + masked = EXTRACT(_masked,2); + if (counts[masked]) { + debug(printf("Storing masked %u at %u\n",masked,chrpos - 26)); + table[positions[masked] + (--counts[masked])] = chrpos - 26; } - - return; -} -#endif - -#ifdef USE_SIMD_FOR_COUNTS -static void -count_positions_rev_simd (Count_T *counts, Inquery_T *inquery, int indexsize, - Univcoord_T left, Univcoord_T left_plus_length, int genestrand) { - int startdiscard, enddiscard; - Genomecomp_T ptr, startptr, endptr, nextlow_rc, nextlow; - Genomecomp_T low1_rc, high1_rc, high0, low1, high1; - __m128i current, next, invert3; - /* __m128i array[16]; */ -#ifdef HAVE_SSE4_1 - __m128i temp; -#else - Genomecomp_T low0_rc, high0_rc; -#endif -#ifdef HAVE_AVX2 - Genomecomp_T low2, high2, low3, high3; - __m256i current256, next256, temp256, shift256; - __m256i biginvert3; + masked = EXTRACT(_masked,3); + if (counts[masked]) { + debug(printf("Storing masked %u at %u\n",masked,chrpos - 27)); + table[positions[masked] + (--counts[masked])] = chrpos - 27; + } #endif - debug(printf("Starting count_positions_rev_simd\n")); + oligo = high_rc >> 24; /* For 31..28 */ + oligo |= nextlow_rc << 8; - if (left_plus_length < (Univcoord_T) indexsize) { - left_plus_length = 0; - } else { - left_plus_length -= indexsize; +#ifdef INDIVIDUAL_SHIFTS + masked = oligo & MASK5; /* 28 */ + if (counts[masked]) { + debug(printf("Storing masked %u at %u\n",masked,chrpos - 28)); + table[positions[masked] + (--counts[masked])] = chrpos - 28; } - ptr = startptr = left/32U*3; - endptr = left_plus_length/32U*3; - startdiscard = left % 32; /* (left+pos5) % 32 */ - enddiscard = left_plus_length % 32; /* (left+pos3) % 32 */ - - invert3 = _mm_set_epi32(0x00000000,0xFFFFFFFF,0xFFFFFFFF,0xFFFFFFFF); -#ifdef HAVE_AVX2 - biginvert3 = _mm256_set_epi32(0x00000000,0xFFFFFFFF,0xFFFFFFFF,0xFFFFFFFF,0xFFFFFFFF,0xFFFFFFFF,0xFFFFFFFF,0xFFFFFFFF); - shift256 = _mm256_setr_epi32(7,0,1,2,3,4,5,6); -#endif + masked = (oligo >> 2) & MASK5; /* 29 */ + if (counts[masked]) { + debug(printf("Storing masked %u at %u\n",masked,chrpos - 29)); + table[positions[masked] + (--counts[masked])] = chrpos - 29; + } - if (left_plus_length <= left) { - /* Skip */ + masked = (oligo >> 4) & MASK5; /* 30 */ + if (counts[masked]) { + debug(printf("Storing masked %u at %u\n",masked,chrpos - 30)); + table[positions[masked] + (--counts[masked])] = chrpos - 30; + } + + masked = (oligo >> 6) & MASK5; /* 31 */ + if (counts[masked]) { + debug(printf("Storing masked %u at %u\n",masked,chrpos - 31)); + table[positions[masked] + (--counts[masked])] = chrpos - 31; + } - } else if (startptr == endptr) { -#ifdef WORDS_BIGENDIAN - high1 = Bigendian_convert_uint(ref_blocks[ptr]); - low1 = Bigendian_convert_uint(ref_blocks[ptr+1]); - nextlow = Bigendian_convert_uint(ref_blocks[ptr+4]); #else - high1 = ref_blocks[ptr]; - low1 = ref_blocks[ptr+1]; - nextlow = ref_blocks[ptr+4]; + _oligo = _mm_setr_epi32(oligo, oligo >> 2, oligo >> 4, oligo >> 6); +#ifdef SIMD_MASK_THEN_STORE + _mm_store_si128((__m128i *) _masked,_mm_and_si128(_oligo, mask5)); +#else + _masked = _mm_and_si128(_oligo, mask5); #endif - if (mode == CMET_STRANDED) { - high1 = Cmet_reduce_ga(high1); low1 = Cmet_reduce_ga(low1); nextlow = Cmet_reduce_ga(nextlow); - } else if (mode == CMET_NONSTRANDED) { - if (genestrand > 0) { - high1 = Cmet_reduce_ct(high1); low1 = Cmet_reduce_ct(low1); nextlow = Cmet_reduce_ct(nextlow); - } else { - high1 = Cmet_reduce_ga(high1); low1 = Cmet_reduce_ga(low1); nextlow = Cmet_reduce_ga(nextlow); - } - } - low1_rc = ~low1; - high1_rc = ~high1; - nextlow_rc = ~nextlow; + masked = EXTRACT(_masked,0); + if (counts[masked]) { + debug(printf("Storing masked %u at %u\n",masked,chrpos - 28)); + table[positions[masked] + (--counts[masked])] = chrpos - 28; + } - if (indexsize == 9) { - count_9mers_rev_partial(counts,inquery,low1_rc,high1_rc,nextlow_rc,startdiscard,enddiscard); - } else if (indexsize == 8) { - count_8mers_rev_partial(counts,inquery,low1_rc,high1_rc,nextlow_rc,startdiscard,enddiscard); - } else if (indexsize == 7) { - count_7mers_rev_partial(counts,inquery,low1_rc,high1_rc,nextlow_rc,startdiscard,enddiscard); - } else if (indexsize == 6) { - count_6mers_rev_partial(counts,inquery,low1_rc,high1_rc,nextlow_rc,startdiscard,enddiscard); - } else if (indexsize == 5) { - count_5mers_rev_partial(counts,inquery,low1_rc,high1_rc,nextlow_rc,startdiscard,enddiscard); - } else { - fprintf(stderr,"indexsize %d not supported\n",indexsize); - abort(); - } + masked = EXTRACT(_masked,1); + if (counts[masked]) { + debug(printf("Storing masked %u at %u\n",masked,chrpos - 29)); + table[positions[masked] + (--counts[masked])] = chrpos - 29; + } - } else { - /* Genome_print_blocks(ref_blocks,left,left+16); */ + masked = EXTRACT(_masked,2); + if (counts[masked]) { + debug(printf("Storing masked %u at %u\n",masked,chrpos - 30)); + table[positions[masked] + (--counts[masked])] = chrpos - 30; + } - /* Start block */ -#ifdef WORDS_BIGENDIAN - high1 = Bigendian_convert_uint(ref_blocks[ptr]); - low1 = Bigendian_convert_uint(ref_blocks[ptr+1]); - nextlow = Bigendian_convert_uint(ref_blocks[ptr+4]); -#else - high1 = ref_blocks[ptr]; - low1 = ref_blocks[ptr+1]; - nextlow = ref_blocks[ptr+4]; + masked = EXTRACT(_masked,3); + if (counts[masked]) { + debug(printf("Storing masked %u at %u\n",masked,chrpos - 31)); + table[positions[masked] + (--counts[masked])] = chrpos - 31; + } #endif - if (mode == CMET_STRANDED) { - high1 = Cmet_reduce_ga(high1); low1 = Cmet_reduce_ga(low1); nextlow = Cmet_reduce_ga(nextlow); - } else if (mode == CMET_NONSTRANDED) { - if (genestrand > 0) { - high1 = Cmet_reduce_ct(high1); low1 = Cmet_reduce_ct(low1); nextlow = Cmet_reduce_ct(nextlow); - } else { - high1 = Cmet_reduce_ga(high1); low1 = Cmet_reduce_ga(low1); nextlow = Cmet_reduce_ga(nextlow); - } - } - nextlow_rc = ~nextlow; - low1_rc = ~low1; - high1_rc = ~high1; + return chrpos - 32; +} - if (indexsize == 9) { - count_9mers_rev_partial(counts,inquery,low1_rc,high1_rc,nextlow_rc,startdiscard,/*enddiscard*/31); - } else if (indexsize == 8) { - count_8mers_rev_partial(counts,inquery,low1_rc,high1_rc,nextlow_rc,startdiscard,/*enddiscard*/31); - } else if (indexsize == 7) { - count_7mers_rev_partial(counts,inquery,low1_rc,high1_rc,nextlow_rc,startdiscard,/*enddiscard*/31); - } else if (indexsize == 6) { - count_6mers_rev_partial(counts,inquery,low1_rc,high1_rc,nextlow_rc,startdiscard,/*enddiscard*/31); - } else if (indexsize == 5) { - count_5mers_rev_partial(counts,inquery,low1_rc,high1_rc,nextlow_rc,startdiscard,/*enddiscard*/31); - } else { - fprintf(stderr,"indexsize %d not supported\n",indexsize); - abort(); - } +#else /* HAVE_AVX2 */ - ptr += 3; +static int +store_5mers_rev_32 (Chrpos_T chrpos, Chrpos_T *table, UINT4 *positions, Count_T *counts, + Genomecomp_T low_rc, Genomecomp_T high_rc, Genomecomp_T nextlow_rc) { + Genomecomp_T masked, oligo; + __m256i _oligo, _masked, _counts; + __m256i _blocks, _envelopes, _addresses, _address_mask, _count_mask; - /* Middle blocks */ - if (indexsize == 9) { -#ifdef HAVE_AVX2 - while (ptr + 12 <= endptr) { -#ifdef WORDS_BIGENDIAN - high0 = Bigendian_convert_uint(ref_blocks[ptr]); - /* low0 = Bigendian_convert_uint(ref_blocks[ptr+1]); */ - high1 = Bigendian_convert_uint(ref_blocks[ptr+3]); - low1 = Bigendian_convert_uint(ref_blocks[ptr+4]); - high2 = Bigendian_convert_uint(ref_blocks[ptr+6]); - low2 = Bigendian_convert_uint(ref_blocks[ptr+7]); - high3 = Bigendian_convert_uint(ref_blocks[ptr+9]); - low3 = Bigendian_convert_uint(ref_blocks[ptr+10]); - nextlow = Bigendian_convert_uint(ref_blocks[ptr+13]); -#else - high0 = ref_blocks[ptr]; - /* low0 = ref_blocks[ptr+1]; */ - high1 = ref_blocks[ptr+3]; - low1 = ref_blocks[ptr+4]; - high2 = ref_blocks[ptr+6]; - low2 = ref_blocks[ptr+7]; - high3 = ref_blocks[ptr+9]; - low3 = ref_blocks[ptr+10]; - nextlow = ref_blocks[ptr+13]; -#endif - if (mode == CMET_STRANDED) { - high0 = Cmet_reduce_ga(high0); /* low0 = Cmet_reduce_ga(low0); */ - high1 = Cmet_reduce_ga(high1); low1 = Cmet_reduce_ga(low1); - high2 = Cmet_reduce_ga(high2); low2 = Cmet_reduce_ga(low2); - high3 = Cmet_reduce_ga(high3); low3 = Cmet_reduce_ga(low3); - nextlow = Cmet_reduce_ga(nextlow); - } else if (mode == CMET_NONSTRANDED) { - if (genestrand > 0) { - high0 = Cmet_reduce_ct(high0); /* low0 = Cmet_reduce_ct(low0); */ - high1 = Cmet_reduce_ct(high1); low1 = Cmet_reduce_ct(low1); - high2 = Cmet_reduce_ct(high2); low2 = Cmet_reduce_ct(low2); - high3 = Cmet_reduce_ct(high3); low3 = Cmet_reduce_ct(low3); - nextlow = Cmet_reduce_ct(nextlow); - } else { - high0 = Cmet_reduce_ga(high0); /* low0 = Cmet_reduce_ga(low0); */ - high1 = Cmet_reduce_ga(high1); low1 = Cmet_reduce_ga(low1); - high2 = Cmet_reduce_ga(high2); low2 = Cmet_reduce_ga(low2); - high3 = Cmet_reduce_ga(high3); low3 = Cmet_reduce_ga(low3); - nextlow = Cmet_reduce_ga(nextlow); - } - } - current256 = _mm256_set_epi32(nextlow_rc,high0,low1,high1,low2,high2,low3,high3); - current256 = _mm256_xor_si256(current256,biginvert3); - nextlow_rc = ~nextlow; + _address_mask = _mm256_set1_epi32(0x3); + _count_mask = _mm256_set1_epi32(0xFF); + + + _oligo = _mm256_srlv_epi32(_mm256_set1_epi32(low_rc),bigshift0to14); + _masked = _mm256_and_si256(_oligo, bigmask5); + + _blocks = _mm256_srli_epi32(_masked,2); /* div by 4 bytes/int */ + _addresses = _mm256_and_si256(_masked,_address_mask); + _addresses = _mm256_slli_epi32(_addresses,3); /* Multiply by 8 bits/byte */ + _envelopes = _mm256_i32gather_epi32((const int *) counts,_blocks,/*scale*/4); + _counts = _mm256_srlv_epi32(_envelopes,_addresses); /* Puts byte on right */ + _counts = _mm256_and_si256(_counts,_count_mask); + + if (EXTRACT256(_counts,0)) { + masked = EXTRACT256(_masked,0); + if (counts[masked]) { /* Have to re-check if there is a conflict */ + debug(printf("Storing masked %u at %u\n",masked,chrpos)); + table[positions[masked] + (--counts[masked])] = chrpos; + } + } + + if (EXTRACT256(_counts,1)) { + masked = EXTRACT256(_masked,1); + if (counts[masked]) { /* Have to re-check if there is a conflict */ + debug(printf("Storing masked %u at %u\n",masked,chrpos - 1)); + table[positions[masked] + (--counts[masked])] = chrpos - 1; + } + } + + if (EXTRACT256(_counts,2)) { + masked = EXTRACT256(_masked,2); + if (counts[masked]) { /* Have to re-check if there is a conflict */ + debug(printf("Storing masked %u at %u\n",masked,chrpos - 2)); + table[positions[masked] + (--counts[masked])] = chrpos - 2; + } + } - temp256 = _mm256_insert_epi32(current256,nextlow_rc,0x07); - next256 = _mm256_permutevar8x32_epi32(temp256,shift256); + if (EXTRACT256(_counts,3)) { + masked = EXTRACT256(_masked,3); + if (counts[masked]) { /* Have to re-check if there is a conflict */ + debug(printf("Storing masked %u at %u\n",masked,chrpos - 3)); + table[positions[masked] + (--counts[masked])] = chrpos - 3; + } + } - count_9mers_rev_simd_128(counts,inquery,current256,next256); + if (EXTRACT256(_counts,4)) { + masked = EXTRACT256(_masked,4); + if (counts[masked]) { /* Have to re-check if there is a conflict */ + debug(printf("Storing masked %u at %u\n",masked,chrpos - 4)); + table[positions[masked] + (--counts[masked])] = chrpos - 4; + } + } - ptr += 12; - } -#endif + if (EXTRACT256(_counts,5)) { + masked = EXTRACT256(_masked,5); + if (counts[masked]) { /* Have to re-check if there is a conflict */ + debug(printf("Storing masked %u at %u\n",masked,chrpos - 5)); + table[positions[masked] + (--counts[masked])] = chrpos - 5; + } + } - while (ptr + 6 <= endptr) { -#ifdef WORDS_BIGENDIAN - high0 = Bigendian_convert_uint(ref_blocks[ptr]); - /* low0 = Bigendian_convert_uint(ref_blocks[ptr+1]); */ - high1 = Bigendian_convert_uint(ref_blocks[ptr+3]); - low1 = Bigendian_convert_uint(ref_blocks[ptr+4]); - nextlow = Bigendian_convert_uint(ref_blocks[ptr+7]); -#else - high0 = ref_blocks[ptr]; - /* low0 = ref_blocks[ptr+1]; */ - high1 = ref_blocks[ptr+3]; - low1 = ref_blocks[ptr+4]; - nextlow = ref_blocks[ptr+7]; -#endif - if (mode == CMET_STRANDED) { - high0 = Cmet_reduce_ga(high0); /* low0 = Cmet_reduce_ga(low0); */ - high1 = Cmet_reduce_ga(high1); low1 = Cmet_reduce_ga(low1); - nextlow = Cmet_reduce_ga(nextlow); - } else if (mode == CMET_NONSTRANDED) { - if (genestrand > 0) { - high0 = Cmet_reduce_ct(high0); /* low0 = Cmet_reduce_ct(low0); */ - high1 = Cmet_reduce_ct(high1); low1 = Cmet_reduce_ct(low1); - nextlow = Cmet_reduce_ct(nextlow); - } else { - high0 = Cmet_reduce_ga(high0); /* low0 = Cmet_reduce_ga(low0); */ - high1 = Cmet_reduce_ga(high1); low1 = Cmet_reduce_ga(low1); - nextlow = Cmet_reduce_ga(nextlow); - } - } + if (EXTRACT256(_counts,6)) { + masked = EXTRACT256(_masked,6); + if (counts[masked]) { /* Have to re-check if there is a conflict */ + debug(printf("Storing masked %u at %u\n",masked,chrpos - 6)); + table[positions[masked] + (--counts[masked])] = chrpos - 6; + } + } - current = _mm_set_epi32(nextlow_rc,high0,low1,high1); - current = _mm_xor_si128(current,invert3); + if (EXTRACT256(_counts,7)) { + masked = EXTRACT256(_masked,7); + if (counts[masked]) { /* Have to re-check if there is a conflict */ + debug(printf("Storing masked %u at %u\n",masked,chrpos - 7)); + table[positions[masked] + (--counts[masked])] = chrpos - 7; + } + } - nextlow_rc = ~nextlow; -#ifdef HAVE_SSE4_1 - /* high0_rc = _mm_extract_epi32(current,2); */ - /* low1_rc = _mm_extract_epi32(current,1); */ - /* high1_rc = _mm_extract_epi32(current,0); */ - - temp = _mm_insert_epi32(current,nextlow_rc,0x03); - next = _mm_shuffle_epi32(temp,0x93); -#else - high0_rc = ~high0; - low1_rc = ~low1; - high1_rc = ~high1; - next = _mm_set_epi32(high0_rc,low1_rc,high1_rc,nextlow_rc); -#endif + _oligo = _mm256_srli_epi32(_oligo, 16); + _masked = _mm256_and_si256(_oligo, bigmask5); -#if 0 - extract_9mers_rev_simd(array,current,next); - count_fwdrev_simd(counts,inquery,(Genomecomp_T *) array); -#else - count_9mers_rev_simd(counts,inquery,current,next); -#endif - ptr += 6; - } + _blocks = _mm256_srli_epi32(_masked,2); /* div by 4 bytes/int */ + _addresses = _mm256_and_si256(_masked,_address_mask); + _addresses = _mm256_slli_epi32(_addresses,3); /* Multiply by 8 bits/byte */ + _envelopes = _mm256_i32gather_epi32((const int *) counts,_blocks,/*scale*/4); + _counts = _mm256_srlv_epi32(_envelopes,_addresses); /* Puts byte on right */ + _counts = _mm256_and_si256(_counts,_count_mask); - if (ptr + 3 <= endptr) { -#ifdef WORDS_BIGENDIAN - high1 = Bigendian_convert_uint(ref_blocks[ptr]); - /* low1 = Bigendian_convert_uint(ref_blocks[ptr+1]); */ - nextlow = Bigendian_convert_uint(ref_blocks[ptr+4]); -#else - high1 = ref_blocks[ptr]; - /* low1 = ref_blocks[ptr+1]; */ - nextlow = ref_blocks[ptr+4]; -#endif - if (mode == CMET_STRANDED) { - high1 = Cmet_reduce_ga(high1); /* low1 = Cmet_reduce_ga(low1); */ nextlow = Cmet_reduce_ga(nextlow); - } else if (mode == CMET_NONSTRANDED) { - if (genestrand > 0) { - high1 = Cmet_reduce_ct(high1); /* low1 = Cmet_reduce_ct(low1); */ nextlow = Cmet_reduce_ct(nextlow); - } else { - high1 = Cmet_reduce_ga(high1); /* low1 = Cmet_reduce_ga(low1); */ nextlow = Cmet_reduce_ga(nextlow); - } - } + if (EXTRACT256(_counts,0)) { + masked = EXTRACT256(_masked,0); + if (counts[masked]) { /* Have to re-check if there is a conflict */ + debug(printf("Storing masked %u at %u\n",masked,chrpos - 8)); + table[positions[masked] + (--counts[masked])] = chrpos - 8; + } + } - /* low1_rc = ~low1; */ - low1_rc = nextlow_rc; + if (EXTRACT256(_counts,1)) { + masked = EXTRACT256(_masked,1); + if (counts[masked]) { /* Have to re-check if there is a conflict */ + debug(printf("Storing masked %u at %u\n",masked,chrpos - 9)); + table[positions[masked] + (--counts[masked])] = chrpos - 9; + } + } - nextlow_rc = ~nextlow; - high1_rc = ~high1; + if (EXTRACT256(_counts,2)) { + masked = EXTRACT256(_masked,2); + if (counts[masked]) { /* Have to re-check if there is a conflict */ + debug(printf("Storing masked %u at %u\n",masked,chrpos - 10)); + table[positions[masked] + (--counts[masked])] = chrpos - 10; + } + } - count_9mers_rev(counts,inquery,low1_rc,high1_rc,nextlow_rc); - ptr += 3; - } + if (EXTRACT256(_counts,3)) { + masked = EXTRACT256(_masked,3); + if (counts[masked]) { /* Have to re-check if there is a conflict */ + debug(printf("Storing masked %u at %u\n",masked,chrpos - 11)); + table[positions[masked] + (--counts[masked])] = chrpos - 11; + } + } - } else if (indexsize == 8) { -#ifdef HAVE_AVX2 - while (ptr + 12 <= endptr) { -#ifdef WORDS_BIGENDIAN - high0 = Bigendian_convert_uint(ref_blocks[ptr]); - /* low0 = Bigendian_convert_uint(ref_blocks[ptr+1]); */ - high1 = Bigendian_convert_uint(ref_blocks[ptr+3]); - low1 = Bigendian_convert_uint(ref_blocks[ptr+4]); - high2 = Bigendian_convert_uint(ref_blocks[ptr+6]); - low2 = Bigendian_convert_uint(ref_blocks[ptr+7]); - high3 = Bigendian_convert_uint(ref_blocks[ptr+9]); - low3 = Bigendian_convert_uint(ref_blocks[ptr+10]); - nextlow = Bigendian_convert_uint(ref_blocks[ptr+13]); -#else - high0 = ref_blocks[ptr]; - /* low0 = ref_blocks[ptr+1]; */ - high1 = ref_blocks[ptr+3]; - low1 = ref_blocks[ptr+4]; - high2 = ref_blocks[ptr+6]; - low2 = ref_blocks[ptr+7]; - high3 = ref_blocks[ptr+9]; - low3 = ref_blocks[ptr+10]; - nextlow = ref_blocks[ptr+13]; -#endif - if (mode == CMET_STRANDED) { - high0 = Cmet_reduce_ga(high0); /* low0 = Cmet_reduce_ga(low0); */ - high1 = Cmet_reduce_ga(high1); low1 = Cmet_reduce_ga(low1); - high2 = Cmet_reduce_ga(high2); low2 = Cmet_reduce_ga(low2); - high3 = Cmet_reduce_ga(high3); low3 = Cmet_reduce_ga(low3); - nextlow = Cmet_reduce_ga(nextlow); - } else if (mode == CMET_NONSTRANDED) { - if (genestrand > 0) { - high0 = Cmet_reduce_ct(high0); /* low0 = Cmet_reduce_ct(low0); */ - high1 = Cmet_reduce_ct(high1); low1 = Cmet_reduce_ct(low1); - high2 = Cmet_reduce_ct(high2); low2 = Cmet_reduce_ct(low2); - high3 = Cmet_reduce_ct(high3); low3 = Cmet_reduce_ct(low3); - nextlow = Cmet_reduce_ct(nextlow); - } else { - high0 = Cmet_reduce_ga(high0); /* low0 = Cmet_reduce_ga(low0); */ - high1 = Cmet_reduce_ga(high1); low1 = Cmet_reduce_ga(low1); - high2 = Cmet_reduce_ga(high2); low2 = Cmet_reduce_ga(low2); - high3 = Cmet_reduce_ga(high3); low3 = Cmet_reduce_ga(low3); - nextlow = Cmet_reduce_ga(nextlow); - } - } - current256 = _mm256_set_epi32(nextlow_rc,high0,low1,high1,low2,high2,low3,high3); - current256 = _mm256_xor_si256(current256,biginvert3); - nextlow_rc = ~nextlow; + oligo = low_rc >> 24; /* For 15..12 */ + oligo |= high_rc << 8; - temp256 = _mm256_insert_epi32(current256,nextlow_rc,0x07); - next256 = _mm256_permutevar8x32_epi32(temp256,shift256); + _oligo = _mm256_srlv_epi32(_mm256_set1_epi32(oligo),bigshift0to14); + _masked = _mm256_and_si256(_oligo, bigmask5); - count_8mers_rev_simd_128(counts,inquery,current256,next256); + _blocks = _mm256_srli_epi32(_masked,2); /* div by 4 bytes/int */ + _addresses = _mm256_and_si256(_masked,_address_mask); + _addresses = _mm256_slli_epi32(_addresses,3); /* Multiply by 8 bits/byte */ + _envelopes = _mm256_i32gather_epi32((const int *) counts,_blocks,/*scale*/4); + _counts = _mm256_srlv_epi32(_envelopes,_addresses); /* Puts byte on right */ + _counts = _mm256_and_si256(_counts,_count_mask); - ptr += 12; - } -#endif + if (EXTRACT256(_counts,0)) { + masked = EXTRACT256(_masked,0); + if (counts[masked]) { /* Have to re-check if there is a conflict */ + debug(printf("Storing masked %u at %u\n",masked,chrpos - 12)); + table[positions[masked] + (--counts[masked])] = chrpos - 12; + } + } - while (ptr + 6 <= endptr) { -#ifdef WORDS_BIGENDIAN - high0 = Bigendian_convert_uint(ref_blocks[ptr]); - /* low0 = Bigendian_convert_uint(ref_blocks[ptr+1]); */ - high1 = Bigendian_convert_uint(ref_blocks[ptr+3]); - low1 = Bigendian_convert_uint(ref_blocks[ptr+4]); - nextlow = Bigendian_convert_uint(ref_blocks[ptr+7]); -#else - high0 = ref_blocks[ptr]; - /* low0 = ref_blocks[ptr+1]; */ - high1 = ref_blocks[ptr+3]; - low1 = ref_blocks[ptr+4]; - nextlow = ref_blocks[ptr+7]; -#endif - if (mode == CMET_STRANDED) { - high0 = Cmet_reduce_ga(high0); /* low0 = Cmet_reduce_ga(low0); */ - high1 = Cmet_reduce_ga(high1); low1 = Cmet_reduce_ga(low1); - nextlow = Cmet_reduce_ga(nextlow); - } else if (mode == CMET_NONSTRANDED) { - if (genestrand > 0) { - high0 = Cmet_reduce_ct(high0); /* low0 = Cmet_reduce_ct(low0); */ - high1 = Cmet_reduce_ct(high1); low1 = Cmet_reduce_ct(low1); - nextlow = Cmet_reduce_ct(nextlow); - } else { - high0 = Cmet_reduce_ga(high0); /* low0 = Cmet_reduce_ga(low0); */ - high1 = Cmet_reduce_ga(high1); low1 = Cmet_reduce_ga(low1); - nextlow = Cmet_reduce_ga(nextlow); - } - } + if (EXTRACT256(_counts,1)) { + masked = EXTRACT256(_masked,1); + if (counts[masked]) { /* Have to re-check if there is a conflict */ + debug(printf("Storing masked %u at %u\n",masked,chrpos - 13)); + table[positions[masked] + (--counts[masked])] = chrpos - 13; + } + } - current = _mm_set_epi32(nextlow_rc,high0,low1,high1); - current = _mm_xor_si128(current,invert3); - nextlow_rc = ~nextlow; -#ifdef HAVE_SSE4_1 - /* high0_rc = _mm_extract_epi32(current,2); */ - /* low1_rc = _mm_extract_epi32(current,1); */ - /* high1_rc = _mm_extract_epi32(current,0); */ - - temp = _mm_insert_epi32(current,nextlow_rc,0x03); - next = _mm_shuffle_epi32(temp,0x93); -#else - high0_rc = ~high0; - low1_rc = ~low1; - high1_rc = ~high1; + if (EXTRACT256(_counts,2)) { + masked = EXTRACT256(_masked,2); + if (counts[masked]) { /* Have to re-check if there is a conflict */ + debug(printf("Storing masked %u at %u\n",masked,chrpos - 14)); + table[positions[masked] + (--counts[masked])] = chrpos - 14; + } + } - next = _mm_set_epi32(high0_rc,low1_rc,high1_rc,nextlow_rc); -#endif + if (EXTRACT256(_counts,3)) { + masked = EXTRACT256(_masked,3); + if (counts[masked]) { /* Have to re-check if there is a conflict */ + debug(printf("Storing masked %u at %u\n",masked,chrpos - 15)); + table[positions[masked] + (--counts[masked])] = chrpos - 15; + } + } -#if 0 - extract_8mers_rev_simd(array,current,next); - count_fwdrev_simd(counts,inquery,(Genomecomp_T *) array); -#else - count_8mers_rev_simd(counts,inquery,current,next); -#endif - ptr += 6; - } - if (ptr + 3 <= endptr) { -#ifdef WORDS_BIGENDIAN - high1 = Bigendian_convert_uint(ref_blocks[ptr]); - /* low1 = Bigendian_convert_uint(ref_blocks[ptr+1]); */ - nextlow = Bigendian_convert_uint(ref_blocks[ptr+4]); -#else - high1 = ref_blocks[ptr]; - /* low1 = ref_blocks[ptr+1]; */ - nextlow = ref_blocks[ptr+4]; -#endif - if (mode == CMET_STRANDED) { - high1 = Cmet_reduce_ga(high1); /* low1 = Cmet_reduce_ga(low1); */ nextlow = Cmet_reduce_ga(nextlow); - } else if (mode == CMET_NONSTRANDED) { - if (genestrand > 0) { - high1 = Cmet_reduce_ct(high1); /* low1 = Cmet_reduce_ct(low1); */ nextlow = Cmet_reduce_ct(nextlow); - } else { - high1 = Cmet_reduce_ga(high1); /* low1 = Cmet_reduce_ga(low1); */ nextlow = Cmet_reduce_ga(nextlow); - } - } + _oligo = _mm256_srlv_epi32(_mm256_set1_epi32(high_rc),bigshift0to14); + _masked = _mm256_and_si256(_oligo, bigmask5); - /* low1_rc = ~low1; */ - low1_rc = nextlow_rc; + _blocks = _mm256_srli_epi32(_masked,2); /* div by 4 bytes/int */ + _addresses = _mm256_and_si256(_masked,_address_mask); + _addresses = _mm256_slli_epi32(_addresses,3); /* Multiply by 8 bits/byte */ + _envelopes = _mm256_i32gather_epi32((const int *) counts,_blocks,/*scale*/4); + _counts = _mm256_srlv_epi32(_envelopes,_addresses); /* Puts byte on right */ + _counts = _mm256_and_si256(_counts,_count_mask); - nextlow_rc = ~nextlow; - high1_rc = ~high1; + if (EXTRACT256(_counts,0)) { + masked = EXTRACT256(_masked,0); + if (counts[masked]) { /* Have to re-check if there is a conflict */ + debug(printf("Storing masked %u at %u\n",masked,chrpos - 16)); + table[positions[masked] + (--counts[masked])] = chrpos - 16; + } + } - count_8mers_rev(counts,inquery,low1_rc,high1_rc,nextlow_rc); - ptr += 3; - } + if (EXTRACT256(_counts,1)) { + masked = EXTRACT256(_masked,1); + if (counts[masked]) { /* Have to re-check if there is a conflict */ + debug(printf("Storing masked %u at %u\n",masked,chrpos - 17)); + table[positions[masked] + (--counts[masked])] = chrpos - 17; + } + } - } else if (indexsize == 7) { -#ifdef HAVE_AVX2 - while (ptr + 12 <= endptr) { -#ifdef WORDS_BIGENDIAN - high0 = Bigendian_convert_uint(ref_blocks[ptr]); - /* low0 = Bigendian_convert_uint(ref_blocks[ptr+1]); */ - high1 = Bigendian_convert_uint(ref_blocks[ptr+3]); - low1 = Bigendian_convert_uint(ref_blocks[ptr+4]); - high2 = Bigendian_convert_uint(ref_blocks[ptr+6]); - low2 = Bigendian_convert_uint(ref_blocks[ptr+7]); - high3 = Bigendian_convert_uint(ref_blocks[ptr+9]); - low3 = Bigendian_convert_uint(ref_blocks[ptr+10]); - nextlow = Bigendian_convert_uint(ref_blocks[ptr+13]); -#else - high0 = ref_blocks[ptr]; - /* low0 = ref_blocks[ptr+1]; */ - high1 = ref_blocks[ptr+3]; - low1 = ref_blocks[ptr+4]; - high2 = ref_blocks[ptr+6]; - low2 = ref_blocks[ptr+7]; - high3 = ref_blocks[ptr+9]; - low3 = ref_blocks[ptr+10]; - nextlow = ref_blocks[ptr+13]; -#endif - if (mode == CMET_STRANDED) { - high0 = Cmet_reduce_ga(high0); /* low0 = Cmet_reduce_ga(low0); */ - high1 = Cmet_reduce_ga(high1); low1 = Cmet_reduce_ga(low1); - high2 = Cmet_reduce_ga(high2); low2 = Cmet_reduce_ga(low2); - high3 = Cmet_reduce_ga(high3); low3 = Cmet_reduce_ga(low3); - nextlow = Cmet_reduce_ga(nextlow); - } else if (mode == CMET_NONSTRANDED) { - if (genestrand > 0) { - high0 = Cmet_reduce_ct(high0); /* low0 = Cmet_reduce_ct(low0); */ - high1 = Cmet_reduce_ct(high1); low1 = Cmet_reduce_ct(low1); - high2 = Cmet_reduce_ct(high2); low2 = Cmet_reduce_ct(low2); - high3 = Cmet_reduce_ct(high3); low3 = Cmet_reduce_ct(low3); - nextlow = Cmet_reduce_ct(nextlow); - } else { - high0 = Cmet_reduce_ga(high0); /* low0 = Cmet_reduce_ga(low0); */ - high1 = Cmet_reduce_ga(high1); low1 = Cmet_reduce_ga(low1); - high2 = Cmet_reduce_ga(high2); low2 = Cmet_reduce_ga(low2); - high3 = Cmet_reduce_ga(high3); low3 = Cmet_reduce_ga(low3); - nextlow = Cmet_reduce_ga(nextlow); - } - } + if (EXTRACT256(_counts,2)) { + masked = EXTRACT256(_masked,2); + if (counts[masked]) { /* Have to re-check if there is a conflict */ + debug(printf("Storing masked %u at %u\n",masked,chrpos - 18)); + table[positions[masked] + (--counts[masked])] = chrpos - 18; + } + } - current256 = _mm256_set_epi32(nextlow_rc,high0,low1,high1,low2,high2,low3,high3); - current256 = _mm256_xor_si256(current256,biginvert3); - nextlow_rc = ~nextlow; + if (EXTRACT256(_counts,3)) { + masked = EXTRACT256(_masked,3); + if (counts[masked]) { /* Have to re-check if there is a conflict */ + debug(printf("Storing masked %u at %u\n",masked,chrpos - 19)); + table[positions[masked] + (--counts[masked])] = chrpos - 19; + } + } - temp256 = _mm256_insert_epi32(current256,nextlow_rc,0x07); - next256 = _mm256_permutevar8x32_epi32(temp256,shift256); + if (EXTRACT256(_counts,4)) { + masked = EXTRACT256(_masked,4); + if (counts[masked]) { /* Have to re-check if there is a conflict */ + debug(printf("Storing masked %u at %u\n",masked,chrpos - 20)); + table[positions[masked] + (--counts[masked])] = chrpos - 20; + }} - count_7mers_rev_simd_128(counts,inquery,current256,next256); - ptr += 12; - } -#endif + if (EXTRACT256(_counts,5)) { + masked = EXTRACT256(_masked,5); + if (counts[masked]) { /* Have to re-check if there is a conflict */ + debug(printf("Storing masked %u at %u\n",masked,chrpos - 21)); + table[positions[masked] + (--counts[masked])] = chrpos - 21; + } + } - while (ptr + 6 <= endptr) { -#ifdef WORDS_BIGENDIAN - high0 = Bigendian_convert_uint(ref_blocks[ptr]); - /* low0 = Bigendian_convert_uint(ref_blocks[ptr+1]); */ - high1 = Bigendian_convert_uint(ref_blocks[ptr+3]); - low1 = Bigendian_convert_uint(ref_blocks[ptr+4]); - nextlow = Bigendian_convert_uint(ref_blocks[ptr+7]); -#else - high0 = ref_blocks[ptr]; - /* low0 = ref_blocks[ptr+1]; */ - high1 = ref_blocks[ptr+3]; - low1 = ref_blocks[ptr+4]; - nextlow = ref_blocks[ptr+7]; -#endif - if (mode == CMET_STRANDED) { - high0 = Cmet_reduce_ga(high0); /* low0 = Cmet_reduce_ga(low0); */ - high1 = Cmet_reduce_ga(high1); low1 = Cmet_reduce_ga(low1); - nextlow = Cmet_reduce_ga(nextlow); - } else if (mode == CMET_NONSTRANDED) { - if (genestrand > 0) { - high0 = Cmet_reduce_ct(high0); /* low0 = Cmet_reduce_ct(low0); */ - high1 = Cmet_reduce_ct(high1); low1 = Cmet_reduce_ct(low1); - nextlow = Cmet_reduce_ct(nextlow); - } else { - high0 = Cmet_reduce_ga(high0); /* low0 = Cmet_reduce_ga(low0); */ - high1 = Cmet_reduce_ga(high1); low1 = Cmet_reduce_ga(low1); - nextlow = Cmet_reduce_ga(nextlow); - } - } + if (EXTRACT256(_counts,6)) { + masked = EXTRACT256(_masked,6); + if (counts[masked]) { /* Have to re-check if there is a conflict */ + debug(printf("Storing masked %u at %u\n",masked,chrpos - 22)); + table[positions[masked] + (--counts[masked])] = chrpos - 22; + } + } - current = _mm_set_epi32(nextlow_rc,high0,low1,high1); - current = _mm_xor_si128(current,invert3); - nextlow_rc = ~nextlow; -#ifdef HAVE_SSE4_1 - /* high0_rc = _mm_extract_epi32(current,2); */ - /* low1_rc = _mm_extract_epi32(current,1); */ - /* high1_rc = _mm_extract_epi32(current,0); */ - - temp = _mm_insert_epi32(current,nextlow_rc,0x03); - next = _mm_shuffle_epi32(temp,0x93); -#else - high0_rc = ~high0; - low1_rc = ~low1; - high1_rc = ~high1; + if (EXTRACT256(_counts,7)) { + masked = EXTRACT256(_masked,7); + if (counts[masked]) { /* Have to re-check if there is a conflict */ + debug(printf("Storing masked %u at %u\n",masked,chrpos - 23)); + table[positions[masked] + (--counts[masked])] = chrpos - 23; + } + } - next = _mm_set_epi32(high0_rc,low1_rc,high1_rc,nextlow_rc); -#endif -#if 0 - extract_7mers_rev_simd(array,current,next); - count_fwdrev_simd(counts,inquery,(Genomecomp_T *) array); -#else - count_7mers_rev_simd(counts,inquery,current,next); -#endif - ptr += 6; - } + _oligo = _mm256_srli_epi32(_oligo, 16); + _masked = _mm256_and_si256(_oligo, bigmask5); + + _blocks = _mm256_srli_epi32(_masked,2); /* div by 4 bytes/int */ + _addresses = _mm256_and_si256(_masked,_address_mask); + _addresses = _mm256_slli_epi32(_addresses,3); /* Multiply by 8 bits/byte */ + _envelopes = _mm256_i32gather_epi32((const int *) counts,_blocks,/*scale*/4); + _counts = _mm256_srlv_epi32(_envelopes,_addresses); /* Puts byte on right */ + _counts = _mm256_and_si256(_counts,_count_mask); + + if (EXTRACT256(_counts,0)) { + masked = EXTRACT256(_masked,0); + if (counts[masked]) { /* Have to re-check if there is a conflict */ + debug(printf("Storing masked %u at %u\n",masked,chrpos - 24)); + table[positions[masked] + (--counts[masked])] = chrpos - 24; + } + } + + if (EXTRACT256(_counts,1)) { + masked = EXTRACT256(_masked,1); + if (counts[masked]) { /* Have to re-check if there is a conflict */ + debug(printf("Storing masked %u at %u\n",masked,chrpos - 25)); + table[positions[masked] + (--counts[masked])] = chrpos - 25; + } + } + + if (EXTRACT256(_counts,2)) { + masked = EXTRACT256(_masked,2); + if (counts[masked]) { /* Have to re-check if there is a conflict */ + debug(printf("Storing masked %u at %u\n",masked,chrpos - 26)); + table[positions[masked] + (--counts[masked])] = chrpos - 26; + } + } - if (ptr + 3 <= endptr) { -#ifdef WORDS_BIGENDIAN - high1 = Bigendian_convert_uint(ref_blocks[ptr]); - /* low1 = Bigendian_convert_uint(ref_blocks[ptr+1]); */ - nextlow = Bigendian_convert_uint(ref_blocks[ptr+4]); -#else - high1 = ref_blocks[ptr]; - /* low1 = ref_blocks[ptr+1]; */ - nextlow = ref_blocks[ptr+4]; -#endif - if (mode == CMET_STRANDED) { - high1 = Cmet_reduce_ga(high1); /* low1 = Cmet_reduce_ga(low1); */ nextlow = Cmet_reduce_ga(nextlow); - } else if (mode == CMET_NONSTRANDED) { - if (genestrand > 0) { - high1 = Cmet_reduce_ct(high1); /* low1 = Cmet_reduce_ct(low1); */ nextlow = Cmet_reduce_ct(nextlow); - } else { - high1 = Cmet_reduce_ga(high1); /* low1 = Cmet_reduce_ga(low1); */ nextlow = Cmet_reduce_ga(nextlow); - } - } + if (EXTRACT256(_counts,3)) { + masked = EXTRACT256(_masked,3); + if (counts[masked]) { /* Have to re-check if there is a conflict */ + debug(printf("Storing masked %u at %u\n",masked,chrpos - 27)); + table[positions[masked] + (--counts[masked])] = chrpos - 27; + } + } - /* low1_rc = ~low1; */ - low1_rc = nextlow_rc; - nextlow_rc = ~nextlow; - high1_rc = ~high1; + oligo = high_rc >> 24; /* For 31..28 */ + oligo |= nextlow_rc << 8; - count_7mers_rev(counts,inquery,low1_rc,high1_rc,nextlow_rc); - ptr += 3; - } + _oligo = _mm256_srlv_epi32(_mm256_set1_epi32(oligo),bigshift0to14); + _masked = _mm256_and_si256(_oligo, bigmask5); - } else if (indexsize == 6) { -#ifdef HAVE_AVX2 - while (ptr + 12 <= endptr) { -#ifdef WORDS_BIGENDIAN - high0 = Bigendian_convert_uint(ref_blocks[ptr]); - /* low0 = Bigendian_convert_uint(ref_blocks[ptr+1]); */ - high1 = Bigendian_convert_uint(ref_blocks[ptr+3]); - low1 = Bigendian_convert_uint(ref_blocks[ptr+4]); - high2 = Bigendian_convert_uint(ref_blocks[ptr+6]); - low2 = Bigendian_convert_uint(ref_blocks[ptr+7]); - high3 = Bigendian_convert_uint(ref_blocks[ptr+9]); - low3 = Bigendian_convert_uint(ref_blocks[ptr+10]); - nextlow = Bigendian_convert_uint(ref_blocks[ptr+13]); -#else - high0 = ref_blocks[ptr]; - /* low0 = ref_blocks[ptr+1]; */ - high1 = ref_blocks[ptr+3]; - low1 = ref_blocks[ptr+4]; - high2 = ref_blocks[ptr+6]; - low2 = ref_blocks[ptr+7]; - high3 = ref_blocks[ptr+9]; - low3 = ref_blocks[ptr+10]; - nextlow = ref_blocks[ptr+13]; -#endif - if (mode == CMET_STRANDED) { - high0 = Cmet_reduce_ga(high0); /* low0 = Cmet_reduce_ga(low0); */ - high1 = Cmet_reduce_ga(high1); low1 = Cmet_reduce_ga(low1); - high2 = Cmet_reduce_ga(high2); low2 = Cmet_reduce_ga(low2); - high3 = Cmet_reduce_ga(high3); low3 = Cmet_reduce_ga(low3); - nextlow = Cmet_reduce_ga(nextlow); - } else if (mode == CMET_NONSTRANDED) { - if (genestrand > 0) { - high0 = Cmet_reduce_ct(high0); /* low0 = Cmet_reduce_ct(low0); */ - high1 = Cmet_reduce_ct(high1); low1 = Cmet_reduce_ct(low1); - high2 = Cmet_reduce_ct(high2); low2 = Cmet_reduce_ct(low2); - high3 = Cmet_reduce_ct(high3); low3 = Cmet_reduce_ct(low3); - nextlow = Cmet_reduce_ct(nextlow); - } else { - high0 = Cmet_reduce_ga(high0); /* low0 = Cmet_reduce_ga(low0); */ - high1 = Cmet_reduce_ga(high1); low1 = Cmet_reduce_ga(low1); - high2 = Cmet_reduce_ga(high2); low2 = Cmet_reduce_ga(low2); - high3 = Cmet_reduce_ga(high3); low3 = Cmet_reduce_ga(low3); - nextlow = Cmet_reduce_ga(nextlow); - } - } + _blocks = _mm256_srli_epi32(_masked,2); /* div by 4 bytes/int */ + _addresses = _mm256_and_si256(_masked,_address_mask); + _addresses = _mm256_slli_epi32(_addresses,3); /* Multiply by 8 bits/byte */ + _envelopes = _mm256_i32gather_epi32((const int *) counts,_blocks,/*scale*/4); + _counts = _mm256_srlv_epi32(_envelopes,_addresses); /* Puts byte on right */ + _counts = _mm256_and_si256(_counts,_count_mask); - current256 = _mm256_set_epi32(nextlow_rc,high0,low1,high1,low2,high2,low3,high3); - current256 = _mm256_xor_si256(current256,biginvert3); - nextlow_rc = ~nextlow; + if (EXTRACT256(_counts,0)) { + masked = EXTRACT256(_masked,0); + if (counts[masked]) { /* Have to re-check if there is a conflict */ + debug(printf("Storing masked %u at %u\n",masked,chrpos - 28)); + table[positions[masked] + (--counts[masked])] = chrpos - 28; + } + } - temp256 = _mm256_insert_epi32(current256,nextlow_rc,0x07); - next256 = _mm256_permutevar8x32_epi32(temp256,shift256); + if (EXTRACT256(_counts,1)) { + masked = EXTRACT256(_masked,1); + if (counts[masked]) { /* Have to re-check if there is a conflict */ + debug(printf("Storing masked %u at %u\n",masked,chrpos - 29)); + table[positions[masked] + (--counts[masked])] = chrpos - 29; + } + } - count_6mers_rev_simd_128(counts,inquery,current256,next256); + if (EXTRACT256(_counts,2)) { + masked = EXTRACT256(_masked,2); + if (counts[masked]) { /* Have to re-check if there is a conflict */ + debug(printf("Storing masked %u at %u\n",masked,chrpos - 30)); + table[positions[masked] + (--counts[masked])] = chrpos - 30; + } + } - ptr += 12; - } -#endif + if (EXTRACT256(_counts,3)) { + masked = EXTRACT256(_masked,3); + if (counts[masked]) { /* Have to re-check if there is a conflict */ + debug(printf("Storing masked %u at %u\n",masked,chrpos - 31)); + table[positions[masked] + (--counts[masked])] = chrpos - 31; + } + } - while (ptr + 6 <= endptr) { -#ifdef WORDS_BIGENDIAN - high0 = Bigendian_convert_uint(ref_blocks[ptr]); - /* low0 = Bigendian_convert_uint(ref_blocks[ptr+1]); */ - high1 = Bigendian_convert_uint(ref_blocks[ptr+3]); - low1 = Bigendian_convert_uint(ref_blocks[ptr+4]); - nextlow = Bigendian_convert_uint(ref_blocks[ptr+7]); -#else - high0 = ref_blocks[ptr]; - /* low0 = ref_blocks[ptr+1]; */ - high1 = ref_blocks[ptr+3]; - low1 = ref_blocks[ptr+4]; - nextlow = ref_blocks[ptr+7]; -#endif - if (mode == CMET_STRANDED) { - high0 = Cmet_reduce_ga(high0); /* low0 = Cmet_reduce_ga(low0); */ - high1 = Cmet_reduce_ga(high1); low1 = Cmet_reduce_ga(low1); - nextlow = Cmet_reduce_ga(nextlow); - } else if (mode == CMET_NONSTRANDED) { - if (genestrand > 0) { - high0 = Cmet_reduce_ct(high0); /* low0 = Cmet_reduce_ct(low0); */ - high1 = Cmet_reduce_ct(high1); low1 = Cmet_reduce_ct(low1); - nextlow = Cmet_reduce_ct(nextlow); - } else { - high0 = Cmet_reduce_ga(high0); /* low0 = Cmet_reduce_ga(low0); */ - high1 = Cmet_reduce_ga(high1); low1 = Cmet_reduce_ga(low1); - nextlow = Cmet_reduce_ga(nextlow); - } - } + return chrpos - 32; +} - current = _mm_set_epi32(nextlow_rc,high0,low1,high1); - current = _mm_xor_si128(current,invert3); - nextlow_rc = ~nextlow; -#ifdef HAVE_SSE4_1 - /* high0_rc = _mm_extract_epi32(current,2); */ - /* low1_rc = _mm_extract_epi32(current,1); */ - /* high1_rc = _mm_extract_epi32(current,0); */ - - temp = _mm_insert_epi32(current,nextlow_rc,0x03); - next = _mm_shuffle_epi32(temp,0x93); -#else - high0_rc = ~high0; - low1_rc = ~low1; - high1_rc = ~high1; +#endif /* HAVE_AVX2 */ - next = _mm_set_epi32(high0_rc,low1_rc,high1_rc,nextlow_rc); -#endif -#if 0 - extract_6mers_rev_simd(array,current,next); - count_fwdrev_simd(counts,inquery,(Genomecomp_T *) array); -#else - count_6mers_rev_simd(counts,inquery,current,next); -#endif - ptr += 6; - } - if (ptr + 3 <= endptr) { -#ifdef WORDS_BIGENDIAN - high1 = Bigendian_convert_uint(ref_blocks[ptr]); - /* low1 = Bigendian_convert_uint(ref_blocks[ptr+1]); */ - nextlow = Bigendian_convert_uint(ref_blocks[ptr+4]); -#else - high1 = ref_blocks[ptr]; - /* low1 = ref_blocks[ptr+1]; */ - nextlow = ref_blocks[ptr+4]; -#endif - if (mode == CMET_STRANDED) { - high1 = Cmet_reduce_ga(high1); /* low1 = Cmet_reduce_ga(low1); */ nextlow = Cmet_reduce_ga(nextlow); - } else if (mode == CMET_NONSTRANDED) { - if (genestrand > 0) { - high1 = Cmet_reduce_ct(high1); /* low1 = Cmet_reduce_ct(low1); */ nextlow = Cmet_reduce_ct(nextlow); - } else { - high1 = Cmet_reduce_ga(high1); /* low1 = Cmet_reduce_ga(low1); */ nextlow = Cmet_reduce_ga(nextlow); - } - } +#ifndef HAVE_SSE2 +static void +count_positions_rev_std (Count_T *counts, int indexsize, Univcoord_T left, Univcoord_T left_plus_length, + int genestrand) { + int startdiscard, enddiscard; + Genomecomp_T ptr, startptr, endptr, low_rc, high_rc, nextlow_rc, + low, high, nextlow; - /* low1_rc = ~low1; */ - low1_rc = nextlow_rc; + debug(printf("Starting count_positions_rev_std\n")); - nextlow_rc = ~nextlow; - high1_rc = ~high1; - count_6mers_rev(counts,inquery,low1_rc,high1_rc,nextlow_rc); - ptr += 3; - } + if (left_plus_length < (Univcoord_T) indexsize) { + left_plus_length = 0; + } else { + left_plus_length -= indexsize; + } - } else if (indexsize == 5) { -#ifdef HAVE_AVX2 - while (ptr + 12 <= endptr) { + ptr = startptr = left/32U*3; + endptr = left_plus_length/32U*3; + startdiscard = left % 32; /* (left+pos5) % 32 */ + enddiscard = left_plus_length % 32; /* (left+pos3) % 32 */ + + if (left_plus_length <= left) { + /* Skip */ + + } else if (startptr == endptr) { #ifdef WORDS_BIGENDIAN - high0 = Bigendian_convert_uint(ref_blocks[ptr]); - /* low0 = Bigendian_convert_uint(ref_blocks[ptr+1]); */ - high1 = Bigendian_convert_uint(ref_blocks[ptr+3]); - low1 = Bigendian_convert_uint(ref_blocks[ptr+4]); - high2 = Bigendian_convert_uint(ref_blocks[ptr+6]); - low2 = Bigendian_convert_uint(ref_blocks[ptr+7]); - high3 = Bigendian_convert_uint(ref_blocks[ptr+9]); - low3 = Bigendian_convert_uint(ref_blocks[ptr+10]); - nextlow = Bigendian_convert_uint(ref_blocks[ptr+13]); -#else - high0 = ref_blocks[ptr]; - /* low0 = ref_blocks[ptr+1]; */ - high1 = ref_blocks[ptr+3]; - low1 = ref_blocks[ptr+4]; - high2 = ref_blocks[ptr+6]; - low2 = ref_blocks[ptr+7]; - high3 = ref_blocks[ptr+9]; - low3 = ref_blocks[ptr+10]; - nextlow = ref_blocks[ptr+13]; + high = Bigendian_convert_uint(ref_blocks[ptr]); + low = Bigendian_convert_uint(ref_blocks[ptr+1]); + nextlow = Bigendian_convert_uint(ref_blocks[ptr+4]); +#else + high = ref_blocks[ptr]; + low = ref_blocks[ptr+1]; + nextlow = ref_blocks[ptr+4]; #endif - if (mode == CMET_STRANDED) { - high0 = Cmet_reduce_ga(high0); /* low0 = Cmet_reduce_ga(low0); */ - high1 = Cmet_reduce_ga(high1); low1 = Cmet_reduce_ga(low1); - high2 = Cmet_reduce_ga(high2); low2 = Cmet_reduce_ga(low2); - high3 = Cmet_reduce_ga(high3); low3 = Cmet_reduce_ga(low3); - nextlow = Cmet_reduce_ga(nextlow); - } else if (mode == CMET_NONSTRANDED) { - if (genestrand > 0) { - high0 = Cmet_reduce_ct(high0); /* low0 = Cmet_reduce_ct(low0); */ - high1 = Cmet_reduce_ct(high1); low1 = Cmet_reduce_ct(low1); - high2 = Cmet_reduce_ct(high2); low2 = Cmet_reduce_ct(low2); - high3 = Cmet_reduce_ct(high3); low3 = Cmet_reduce_ct(low3); - nextlow = Cmet_reduce_ct(nextlow); - } else { - high0 = Cmet_reduce_ga(high0); /* low0 = Cmet_reduce_ga(low0); */ - high1 = Cmet_reduce_ga(high1); low1 = Cmet_reduce_ga(low1); - high2 = Cmet_reduce_ga(high2); low2 = Cmet_reduce_ga(low2); - high3 = Cmet_reduce_ga(high3); low3 = Cmet_reduce_ga(low3); - nextlow = Cmet_reduce_ga(nextlow); - } - } - current256 = _mm256_set_epi32(nextlow_rc,high0,low1,high1,low2,high2,low3,high3); - current256 = _mm256_xor_si256(current256,biginvert3); - nextlow_rc = ~nextlow; + if (mode == STANDARD) { + /* Skip */ + } else if (mode == CMET_STRANDED) { + high = Cmet_reduce_ga(high); low = Cmet_reduce_ga(low); nextlow = Cmet_reduce_ga(nextlow); + } else if (mode == CMET_NONSTRANDED) { + if (genestrand > 0) { + high = Cmet_reduce_ct(high); low = Cmet_reduce_ct(low); nextlow = Cmet_reduce_ct(nextlow); + } else { + high = Cmet_reduce_ga(high); low = Cmet_reduce_ga(low); nextlow = Cmet_reduce_ga(nextlow); + } + } else if (mode == ATOI_STRANDED) { + high = Atoi_reduce_ag(high); low = Atoi_reduce_ag(low); nextlow = Atoi_reduce_ag(nextlow); + } else if (mode == ATOI_NONSTRANDED) { + if (genestrand > 0) { + high = Atoi_reduce_tc(high); low = Atoi_reduce_tc(low); nextlow = Atoi_reduce_tc(nextlow); + } else { + high = Atoi_reduce_ag(high); low = Atoi_reduce_ag(low); nextlow = Atoi_reduce_ag(nextlow); + } + } else if (mode == TTOC_STRANDED) { + high = Atoi_reduce_tc(high); low = Atoi_reduce_tc(low); nextlow = Atoi_reduce_tc(nextlow); + } else if (mode == TTOC_NONSTRANDED) { + if (genestrand > 0) { + high = Atoi_reduce_ag(high); low = Atoi_reduce_ag(low); nextlow = Atoi_reduce_ag(nextlow); + } else { + high = Atoi_reduce_tc(high); low = Atoi_reduce_tc(low); nextlow = Atoi_reduce_tc(nextlow); + } + } - temp256 = _mm256_insert_epi32(current256,nextlow_rc,0x07); - next256 = _mm256_permutevar8x32_epi32(temp256,shift256); + low_rc = ~low; + high_rc = ~high; + nextlow_rc = ~nextlow; - count_5mers_rev_simd_128(counts,inquery,current256,next256); + if (indexsize == 9) { + count_9mers_rev_partial(counts,low_rc,high_rc,nextlow_rc,startdiscard,enddiscard); + } else if (indexsize == 8) { + count_8mers_rev_partial(counts,low_rc,high_rc,nextlow_rc,startdiscard,enddiscard); + } else if (indexsize == 7) { + count_7mers_rev_partial(counts,low_rc,high_rc,nextlow_rc,startdiscard,enddiscard); + } else if (indexsize == 6) { + count_6mers_rev_partial(counts,low_rc,high_rc,nextlow_rc,startdiscard,enddiscard); + } else if (indexsize == 5) { + count_5mers_rev_partial(counts,low_rc,high_rc,nextlow_rc,startdiscard,enddiscard); + } else { + fprintf(stderr,"indexsize %d not supported\n",indexsize); + abort(); + } - ptr += 12; - } -#endif + } else { + /* Genome_print_blocks(ref_blocks,left,left+16); */ - while (ptr + 6 <= endptr) { + /* Start block */ #ifdef WORDS_BIGENDIAN - high0 = Bigendian_convert_uint(ref_blocks[ptr]); - /* low0 = Bigendian_convert_uint(ref_blocks[ptr+1]); */ - high1 = Bigendian_convert_uint(ref_blocks[ptr+3]); - low1 = Bigendian_convert_uint(ref_blocks[ptr+4]); - nextlow = Bigendian_convert_uint(ref_blocks[ptr+7]); + high = Bigendian_convert_uint(ref_blocks[ptr]); + low = Bigendian_convert_uint(ref_blocks[ptr+1]); + nextlow = Bigendian_convert_uint(ref_blocks[ptr+4]); #else - high0 = ref_blocks[ptr]; - /* low0 = ref_blocks[ptr+1]; */ - high1 = ref_blocks[ptr+3]; - low1 = ref_blocks[ptr+4]; - nextlow = ref_blocks[ptr+7]; + high = ref_blocks[ptr]; + low = ref_blocks[ptr+1]; + nextlow = ref_blocks[ptr+4]; #endif - if (mode == CMET_STRANDED) { - high0 = Cmet_reduce_ga(high0); /* low0 = Cmet_reduce_ga(low0); */ - high1 = Cmet_reduce_ga(high1); low1 = Cmet_reduce_ga(low1); - nextlow = Cmet_reduce_ga(nextlow); - } else if (mode == CMET_NONSTRANDED) { - if (genestrand > 0) { - high0 = Cmet_reduce_ct(high0); /* low0 = Cmet_reduce_ct(low0); */ - high1 = Cmet_reduce_ct(high1); low1 = Cmet_reduce_ct(low1); - nextlow = Cmet_reduce_ct(nextlow); - } else { - high0 = Cmet_reduce_ga(high0); /* low0 = Cmet_reduce_ga(low0); */ - high1 = Cmet_reduce_ga(high1); low1 = Cmet_reduce_ga(low1); - nextlow = Cmet_reduce_ga(nextlow); - } - } - current = _mm_set_epi32(nextlow_rc,high0,low1,high1); - current = _mm_xor_si128(current,invert3); - nextlow_rc = ~nextlow; -#ifdef HAVE_SSE4_1 - /* high0_rc = _mm_extract_epi32(current,2); */ - /* low1_rc = _mm_extract_epi32(current,1); */ - /* high1_rc = _mm_extract_epi32(current,0); */ - - temp = _mm_insert_epi32(current,nextlow_rc,0x03); - next = _mm_shuffle_epi32(temp,0x93); -#else - high0_rc = ~high0; - low1_rc = ~low1; - high1_rc = ~high1; + if (mode == STANDARD) { + /* Skip */ + } else if (mode == CMET_STRANDED) { + high = Cmet_reduce_ga(high); low = Cmet_reduce_ga(low); nextlow = Cmet_reduce_ga(nextlow); + } else if (mode == CMET_NONSTRANDED) { + if (genestrand > 0) { + high = Cmet_reduce_ct(high); low = Cmet_reduce_ct(low); nextlow = Cmet_reduce_ct(nextlow); + } else { + high = Cmet_reduce_ga(high); low = Cmet_reduce_ga(low); nextlow = Cmet_reduce_ga(nextlow); + } + } else if (mode == ATOI_STRANDED) { + high = Atoi_reduce_ag(high); low = Atoi_reduce_ag(low); nextlow = Atoi_reduce_ag(nextlow); + } else if (mode == ATOI_NONSTRANDED) { + if (genestrand > 0) { + high = Atoi_reduce_tc(high); low = Atoi_reduce_tc(low); nextlow = Atoi_reduce_tc(nextlow); + } else { + high = Atoi_reduce_ag(high); low = Atoi_reduce_ag(low); nextlow = Atoi_reduce_ag(nextlow); + } + } else if (mode == TTOC_STRANDED) { + high = Atoi_reduce_tc(high); low = Atoi_reduce_tc(low); nextlow = Atoi_reduce_tc(nextlow); + } else if (mode == TTOC_NONSTRANDED) { + if (genestrand > 0) { + high = Atoi_reduce_ag(high); low = Atoi_reduce_ag(low); nextlow = Atoi_reduce_ag(nextlow); + } else { + high = Atoi_reduce_tc(high); low = Atoi_reduce_tc(low); nextlow = Atoi_reduce_tc(nextlow); + } + } - next = _mm_set_epi32(high0_rc,low1_rc,high1_rc,nextlow_rc); -#endif + low_rc = ~low; + high_rc = ~high; + nextlow_rc = ~nextlow; -#if 0 - extract_5mers_rev_simd(array,current,next); - count_fwdrev_simd(counts,inquery,(Genomecomp_T *) array); -#else - count_5mers_rev_simd(counts,inquery,current,next); -#endif - ptr += 6; - } + if (indexsize == 9) { + count_9mers_rev_partial(counts,low_rc,high_rc,nextlow_rc,startdiscard,/*enddiscard*/31); + } else if (indexsize == 8) { + count_8mers_rev_partial(counts,low_rc,high_rc,nextlow_rc,startdiscard,/*enddiscard*/31); + } else if (indexsize == 7) { + count_7mers_rev_partial(counts,low_rc,high_rc,nextlow_rc,startdiscard,/*enddiscard*/31); + } else if (indexsize == 6) { + count_6mers_rev_partial(counts,low_rc,high_rc,nextlow_rc,startdiscard,/*enddiscard*/31); + } else if (indexsize == 5) { + count_5mers_rev_partial(counts,low_rc,high_rc,nextlow_rc,startdiscard,/*enddiscard*/31); + } else { + fprintf(stderr,"indexsize %d not supported\n",indexsize); + abort(); + } - if (ptr + 3 <= endptr) { + ptr += 3; + + /* Middle blocks */ + while (ptr + 3 <= endptr) { #ifdef WORDS_BIGENDIAN - high1 = Bigendian_convert_uint(ref_blocks[ptr]); - /* low1 = Bigendian_convert_uint(ref_blocks[ptr+1]); */ - nextlow = Bigendian_convert_uint(ref_blocks[ptr+4]); -#else - high1 = ref_blocks[ptr]; - /* low1 = ref_blocks[ptr+1]; */ - nextlow = ref_blocks[ptr+4]; + high = Bigendian_convert_uint(ref_blocks[ptr]); + low = Bigendian_convert_uint(ref_blocks[ptr+1]); + nextlow = Bigendian_convert_uint(ref_blocks[ptr+4]); +#else + high = ref_blocks[ptr]; + low = ref_blocks[ptr+1]; + nextlow = ref_blocks[ptr+4]; #endif - if (mode == CMET_STRANDED) { - high1 = Cmet_reduce_ga(high1); /* low1 = Cmet_reduce_ga(low1); */ nextlow = Cmet_reduce_ga(nextlow); - } else if (mode == CMET_NONSTRANDED) { - if (genestrand > 0) { - high1 = Cmet_reduce_ct(high1); /* low1 = Cmet_reduce_ct(low1); */ nextlow = Cmet_reduce_ct(nextlow); - } else { - high1 = Cmet_reduce_ga(high1); /* low1 = Cmet_reduce_ga(low1); */ nextlow = Cmet_reduce_ga(nextlow); - } - } - /* low1_rc = ~low1; */ - low1_rc = nextlow_rc; - - nextlow_rc = ~nextlow; - high1_rc = ~high1; + if (mode == STANDARD) { + /* Skip */ + } else if (mode == CMET_STRANDED) { + high = Cmet_reduce_ga(high); low = Cmet_reduce_ga(low); nextlow = Cmet_reduce_ga(nextlow); + } else if (mode == CMET_NONSTRANDED) { + if (genestrand > 0) { + high = Cmet_reduce_ct(high); low = Cmet_reduce_ct(low); nextlow = Cmet_reduce_ct(nextlow); + } else { + high = Cmet_reduce_ga(high); low = Cmet_reduce_ga(low); nextlow = Cmet_reduce_ga(nextlow); + } + } else if (mode == ATOI_STRANDED) { + high = Atoi_reduce_ag(high); low = Atoi_reduce_ag(low); nextlow = Atoi_reduce_ag(nextlow); + } else if (mode == ATOI_NONSTRANDED) { + if (genestrand > 0) { + high = Atoi_reduce_tc(high); low = Atoi_reduce_tc(low); nextlow = Atoi_reduce_tc(nextlow); + } else { + high = Atoi_reduce_ag(high); low = Atoi_reduce_ag(low); nextlow = Atoi_reduce_ag(nextlow); + } + } else if (mode == TTOC_STRANDED) { + high = Atoi_reduce_tc(high); low = Atoi_reduce_tc(low); nextlow = Atoi_reduce_tc(nextlow); + } else if (mode == TTOC_NONSTRANDED) { + if (genestrand > 0) { + high = Atoi_reduce_ag(high); low = Atoi_reduce_ag(low); nextlow = Atoi_reduce_ag(nextlow); + } else { + high = Atoi_reduce_tc(high); low = Atoi_reduce_tc(low); nextlow = Atoi_reduce_tc(nextlow); + } + } - count_5mers_rev(counts,inquery,low1_rc,high1_rc,nextlow_rc); - ptr += 3; + low_rc = ~low; + high_rc = ~high; + nextlow_rc = ~nextlow; + + if (indexsize == 9) { + count_9mers_rev_32(counts,low_rc,high_rc,nextlow_rc); + } else if (indexsize == 8) { + count_8mers_rev_32(counts,low_rc,high_rc,nextlow_rc); + } else if (indexsize == 7) { + count_7mers_rev_32(counts,low_rc,high_rc,nextlow_rc); + } else if (indexsize == 6) { + count_6mers_rev_32(counts,low_rc,high_rc,nextlow_rc); + } else if (indexsize == 5) { + count_5mers_rev_32(counts,low_rc,high_rc,nextlow_rc); + } else { + abort(); } - } else { - abort(); + ptr += 3; } /* End block */ assert(ptr == endptr); -#ifdef WORDS_BIGENDIAN - high1 = Bigendian_convert_uint(ref_blocks[ptr]); - /* low1 = Bigendian_convert_uint(ref_blocks[ptr+1]); */ +#ifdef WORDS_BIGENDIAN + high = Bigendian_convert_uint(ref_blocks[ptr]); + low = Bigendian_convert_uint(ref_blocks[ptr+1]); nextlow = Bigendian_convert_uint(ref_blocks[ptr+4]); #else - high1 = ref_blocks[ptr]; - /* low1 = ref_blocks[ptr+1]; */ + high = ref_blocks[ptr]; + low = ref_blocks[ptr+1]; nextlow = ref_blocks[ptr+4]; #endif - if (mode == CMET_STRANDED) { - high1 = Cmet_reduce_ga(high1); /* low1 = Cmet_reduce_ga(low1); */ nextlow = Cmet_reduce_ga(nextlow); + + if (mode == STANDARD) { + /* Skip */ + } else if (mode == CMET_STRANDED) { + high = Cmet_reduce_ga(high); low = Cmet_reduce_ga(low); nextlow = Cmet_reduce_ga(nextlow); } else if (mode == CMET_NONSTRANDED) { if (genestrand > 0) { - high1 = Cmet_reduce_ct(high1); /* low1 = Cmet_reduce_ct(low1); */ nextlow = Cmet_reduce_ct(nextlow); + high = Cmet_reduce_ct(high); low = Cmet_reduce_ct(low); nextlow = Cmet_reduce_ct(nextlow); } else { - high1 = Cmet_reduce_ga(high1); /* low1 = Cmet_reduce_ga(low1); */ nextlow = Cmet_reduce_ga(nextlow); + high = Cmet_reduce_ga(high); low = Cmet_reduce_ga(low); nextlow = Cmet_reduce_ga(nextlow); + } + } else if (mode == ATOI_STRANDED) { + high = Atoi_reduce_ag(high); low = Atoi_reduce_ag(low); nextlow = Atoi_reduce_ag(nextlow); + } else if (mode == ATOI_NONSTRANDED) { + if (genestrand > 0) { + high = Atoi_reduce_tc(high); low = Atoi_reduce_tc(low); nextlow = Atoi_reduce_tc(nextlow); + } else { + high = Atoi_reduce_ag(high); low = Atoi_reduce_ag(low); nextlow = Atoi_reduce_ag(nextlow); + } + } else if (mode == TTOC_STRANDED) { + high = Atoi_reduce_tc(high); low = Atoi_reduce_tc(low); nextlow = Atoi_reduce_tc(nextlow); + } else if (mode == TTOC_NONSTRANDED) { + if (genestrand > 0) { + high = Atoi_reduce_ag(high); low = Atoi_reduce_ag(low); nextlow = Atoi_reduce_ag(nextlow); + } else { + high = Atoi_reduce_tc(high); low = Atoi_reduce_tc(low); nextlow = Atoi_reduce_tc(nextlow); } } - /* low1_rc = ~low1; */ - low1_rc = nextlow_rc; - + low_rc = ~low; + high_rc = ~high; nextlow_rc = ~nextlow; - high1_rc = ~high1; if (indexsize == 9) { - count_9mers_rev_partial(counts,inquery,low1_rc,high1_rc,nextlow_rc,/*startdiscard*/0,enddiscard); + count_9mers_rev_partial(counts,low_rc,high_rc,nextlow_rc,/*startdiscard*/0,enddiscard); } else if (indexsize == 8) { - count_8mers_rev_partial(counts,inquery,low1_rc,high1_rc,nextlow_rc,/*startdiscard*/0,enddiscard); + count_8mers_rev_partial(counts,low_rc,high_rc,nextlow_rc,/*startdiscard*/0,enddiscard); } else if (indexsize == 7) { - count_7mers_rev_partial(counts,inquery,low1_rc,high1_rc,nextlow_rc,/*startdiscard*/0,enddiscard); + count_7mers_rev_partial(counts,low_rc,high_rc,nextlow_rc,/*startdiscard*/0,enddiscard); } else if (indexsize == 6) { - count_6mers_rev_partial(counts,inquery,low1_rc,high1_rc,nextlow_rc,/*startdiscard*/0,enddiscard); + count_6mers_rev_partial(counts,low_rc,high_rc,nextlow_rc,/*startdiscard*/0,enddiscard); } else if (indexsize == 5) { - count_5mers_rev_partial(counts,inquery,low1_rc,high1_rc,nextlow_rc,/*startdiscard*/0,enddiscard); + count_5mers_rev_partial(counts,low_rc,high_rc,nextlow_rc,/*startdiscard*/0,enddiscard); } else { abort(); } + } return; @@ -47679,65 +31053,316 @@ #endif -#ifndef USE_SIMD_FOR_COUNTS + +#ifdef HAVE_AVX2 +static __m256i +apply_mode_rev_256 (Genomecomp_T *block_ptr, Mode_T mode, int genestrand, Genomecomp_T *nextlow, Genomecomp_T nextlow_rc) { + Genomecomp_T low0, high0, low1, high1, low2, high2, low3, high3; + + high0 = block_ptr[0]; /* low0 = block_ptr[1]; */ + high1 = block_ptr[3]; low1 = block_ptr[4]; + high2 = block_ptr[6]; low2 = block_ptr[7]; + high3 = block_ptr[9]; low3 = block_ptr[10]; + *nextlow = block_ptr[13]; + + if (mode == CMET_STRANDED) { + high0 = Cmet_reduce_ga(high0); /* low0 = Cmet_reduce_ga(low0); */ + high1 = Cmet_reduce_ga(high1); low1 = Cmet_reduce_ga(low1); + high2 = Cmet_reduce_ga(high2); low2 = Cmet_reduce_ga(low2); + high3 = Cmet_reduce_ga(high3); low3 = Cmet_reduce_ga(low3); + *nextlow = Cmet_reduce_ga(*nextlow); + } else if (mode == CMET_NONSTRANDED) { + if (genestrand > 0) { + high0 = Cmet_reduce_ct(high0); /* low0 = Cmet_reduce_ct(low0); */ + high1 = Cmet_reduce_ct(high1); low1 = Cmet_reduce_ct(low1); + high2 = Cmet_reduce_ct(high2); low2 = Cmet_reduce_ct(low2); + high3 = Cmet_reduce_ct(high3); low3 = Cmet_reduce_ct(low3); + *nextlow = Cmet_reduce_ct(*nextlow); + } else { + high0 = Cmet_reduce_ga(high0); /* low0 = Cmet_reduce_ga(low0); */ + high1 = Cmet_reduce_ga(high1); low1 = Cmet_reduce_ga(low1); + high2 = Cmet_reduce_ga(high2); low2 = Cmet_reduce_ga(low2); + high3 = Cmet_reduce_ga(high3); low3 = Cmet_reduce_ga(low3); + *nextlow = Cmet_reduce_ga(*nextlow); + } + + } else if (mode == ATOI_STRANDED) { + high0 = Atoi_reduce_ag(high0); /* low0 = Atoi_reduce_ag(low0); */ + high1 = Atoi_reduce_ag(high1); low1 = Atoi_reduce_ag(low1); + high2 = Atoi_reduce_ag(high2); low2 = Atoi_reduce_ag(low2); + high3 = Atoi_reduce_ag(high3); low3 = Atoi_reduce_ag(low3); + *nextlow = Atoi_reduce_ag(*nextlow); + } else if (mode == CMET_NONSTRANDED) { + if (genestrand > 0) { + high0 = Atoi_reduce_tc(high0); /* low0 = Atoi_reduce_tc(low0); */ + high1 = Atoi_reduce_tc(high1); low1 = Atoi_reduce_tc(low1); + high2 = Atoi_reduce_tc(high2); low2 = Atoi_reduce_tc(low2); + high3 = Atoi_reduce_tc(high3); low3 = Atoi_reduce_tc(low3); + *nextlow = Atoi_reduce_tc(*nextlow); + } else { + high0 = Atoi_reduce_ag(high0); /* low0 = Atoi_reduce_ag(low0); */ + high1 = Atoi_reduce_ag(high1); low1 = Atoi_reduce_ag(low1); + high2 = Atoi_reduce_ag(high2); low2 = Atoi_reduce_ag(low2); + high3 = Atoi_reduce_ag(high3); low3 = Atoi_reduce_ag(low3); + *nextlow = Atoi_reduce_ag(*nextlow); + } + + } else if (mode == TTOC_STRANDED) { + high0 = Atoi_reduce_tc(high0); /* low0 = Atoi_reduce_tc(low0); */ + high1 = Atoi_reduce_tc(high1); low1 = Atoi_reduce_tc(low1); + high2 = Atoi_reduce_tc(high2); low2 = Atoi_reduce_tc(low2); + high3 = Atoi_reduce_tc(high3); low3 = Atoi_reduce_tc(low3); + *nextlow = Atoi_reduce_tc(*nextlow); + } else if (mode == TTOC_NONSTRANDED) { + if (genestrand > 0) { + high0 = Atoi_reduce_ag(high0); /* low0 = Atoi_reduce_ag(low0); */ + high1 = Atoi_reduce_ag(high1); low1 = Atoi_reduce_ag(low1); + high2 = Atoi_reduce_ag(high2); low2 = Atoi_reduce_ag(low2); + high3 = Atoi_reduce_ag(high3); low3 = Atoi_reduce_ag(low3); + *nextlow = Atoi_reduce_ag(*nextlow); + } else { + high0 = Atoi_reduce_tc(high0); /* low0 = Atoi_reduce_tc(low0); */ + high1 = Atoi_reduce_tc(high1); low1 = Atoi_reduce_tc(low1); + high2 = Atoi_reduce_tc(high2); low2 = Atoi_reduce_tc(low2); + high3 = Atoi_reduce_tc(high3); low3 = Atoi_reduce_tc(low3); + *nextlow = Atoi_reduce_tc(*nextlow); + } + } + + return _mm256_set_epi32(nextlow_rc,high0,low1,high1,low2,high2,low3,high3); +} +#endif + +#ifdef HAVE_AVX512 +static __m512i +apply_mode_rev_512 (Genomecomp_T *block_ptr, Mode_T mode, int genestrand, Genomecomp_T *nextlow, Genomecomp_T nextlow_rc) { + Genomecomp_T low0, high0, low1, high1, low2, high2, low3, high3, + low4, high4, low5, high5, low6, high6, low7, high7; + + high0 = block_ptr[0]; /* low0 = block_ptr[1]; */ + high1 = block_ptr[3]; low1 = block_ptr[4]; + high2 = block_ptr[6]; low2 = block_ptr[7]; + high3 = block_ptr[9]; low3 = block_ptr[10]; + + high4 = block_ptr[12]; low4 = block_ptr[13]; + high5 = block_ptr[15]; low5 = block_ptr[16]; + high6 = block_ptr[18]; low6 = block_ptr[19]; + high7 = block_ptr[21]; low7 = block_ptr[22]; + *nextlow = block_ptr[25]; + + if (mode == CMET_STRANDED) { + high0 = Cmet_reduce_ga(high0); /* low0 = Cmet_reduce_ga(low0); */ + high1 = Cmet_reduce_ga(high1); low1 = Cmet_reduce_ga(low1); + high2 = Cmet_reduce_ga(high2); low2 = Cmet_reduce_ga(low2); + high3 = Cmet_reduce_ga(high3); low3 = Cmet_reduce_ga(low3); + high4 = Cmet_reduce_ga(high4); low4 = Cmet_reduce_ga(low4); + high5 = Cmet_reduce_ga(high5); low5 = Cmet_reduce_ga(low5); + high6 = Cmet_reduce_ga(high6); low6 = Cmet_reduce_ga(low6); + high7 = Cmet_reduce_ga(high7); low7 = Cmet_reduce_ga(low7); + *nextlow = Cmet_reduce_ga(*nextlow); + } else if (mode == CMET_NONSTRANDED) { + if (genestrand > 0) { + high0 = Cmet_reduce_ct(high0); /* low0 = Cmet_reduce_ct(low0); */ + high1 = Cmet_reduce_ct(high1); low1 = Cmet_reduce_ct(low1); + high2 = Cmet_reduce_ct(high2); low2 = Cmet_reduce_ct(low2); + high3 = Cmet_reduce_ct(high3); low3 = Cmet_reduce_ct(low3); + high4 = Cmet_reduce_ct(high4); low4 = Cmet_reduce_ct(low4); + high5 = Cmet_reduce_ct(high5); low5 = Cmet_reduce_ct(low5); + high6 = Cmet_reduce_ct(high6); low6 = Cmet_reduce_ct(low6); + high7 = Cmet_reduce_ct(high7); low7 = Cmet_reduce_ct(low7); + *nextlow = Cmet_reduce_ct(*nextlow); + } else { + high0 = Cmet_reduce_ga(high0); /* low0 = Cmet_reduce_ga(low0); */ + high1 = Cmet_reduce_ga(high1); low1 = Cmet_reduce_ga(low1); + high2 = Cmet_reduce_ga(high2); low2 = Cmet_reduce_ga(low2); + high3 = Cmet_reduce_ga(high3); low3 = Cmet_reduce_ga(low3); + high4 = Cmet_reduce_ga(high4); low4 = Cmet_reduce_ga(low4); + high5 = Cmet_reduce_ga(high5); low5 = Cmet_reduce_ga(low5); + high6 = Cmet_reduce_ga(high6); low6 = Cmet_reduce_ga(low6); + high7 = Cmet_reduce_ga(high7); low7 = Cmet_reduce_ga(low7); + *nextlow = Cmet_reduce_ga(*nextlow); + } + + } else if (mode == ATOI_STRANDED) { + high0 = Atoi_reduce_ag(high0); /* low0 = Atoi_reduce_ag(low0); */ + high1 = Atoi_reduce_ag(high1); low1 = Atoi_reduce_ag(low1); + high2 = Atoi_reduce_ag(high2); low2 = Atoi_reduce_ag(low2); + high3 = Atoi_reduce_ag(high3); low3 = Atoi_reduce_ag(low3); + high4 = Atoi_reduce_ag(high4); low4 = Atoi_reduce_ag(low4); + high5 = Atoi_reduce_ag(high5); low5 = Atoi_reduce_ag(low5); + high6 = Atoi_reduce_ag(high6); low6 = Atoi_reduce_ag(low6); + high7 = Atoi_reduce_ag(high7); low7 = Atoi_reduce_ag(low7); + *nextlow = Atoi_reduce_ag(*nextlow); + } else if (mode == ATOI_NONSTRANDED) { + if (genestrand > 0) { + high0 = Atoi_reduce_tc(high0); /* low0 = Atoi_reduce_tc(low0); */ + high1 = Atoi_reduce_tc(high1); low1 = Atoi_reduce_tc(low1); + high2 = Atoi_reduce_tc(high2); low2 = Atoi_reduce_tc(low2); + high3 = Atoi_reduce_tc(high3); low3 = Atoi_reduce_tc(low3); + high4 = Atoi_reduce_tc(high4); low4 = Atoi_reduce_tc(low4); + high5 = Atoi_reduce_tc(high5); low5 = Atoi_reduce_tc(low5); + high6 = Atoi_reduce_tc(high6); low6 = Atoi_reduce_tc(low6); + high7 = Atoi_reduce_tc(high7); low7 = Atoi_reduce_tc(low7); + *nextlow = Atoi_reduce_tc(*nextlow); + } else { + high0 = Atoi_reduce_ag(high0); /* low0 = Atoi_reduce_ag(low0); */ + high1 = Atoi_reduce_ag(high1); low1 = Atoi_reduce_ag(low1); + high2 = Atoi_reduce_ag(high2); low2 = Atoi_reduce_ag(low2); + high3 = Atoi_reduce_ag(high3); low3 = Atoi_reduce_ag(low3); + high4 = Atoi_reduce_ag(high4); low4 = Atoi_reduce_ag(low4); + high5 = Atoi_reduce_ag(high5); low5 = Atoi_reduce_ag(low5); + high6 = Atoi_reduce_ag(high6); low6 = Atoi_reduce_ag(low6); + high7 = Atoi_reduce_ag(high7); low7 = Atoi_reduce_ag(low7); + *nextlow = Atoi_reduce_ag(*nextlow); + } + + } else if (mode == TTOC_STRANDED) { + high0 = Atoi_reduce_tc(high0); /* low0 = Atoi_reduce_tc(low0); */ + high1 = Atoi_reduce_tc(high1); low1 = Atoi_reduce_tc(low1); + high2 = Atoi_reduce_tc(high2); low2 = Atoi_reduce_tc(low2); + high3 = Atoi_reduce_tc(high3); low3 = Atoi_reduce_tc(low3); + high4 = Atoi_reduce_tc(high4); low4 = Atoi_reduce_tc(low4); + high5 = Atoi_reduce_tc(high5); low5 = Atoi_reduce_tc(low5); + high6 = Atoi_reduce_tc(high6); low6 = Atoi_reduce_tc(low6); + high7 = Atoi_reduce_tc(high7); low7 = Atoi_reduce_tc(low7); + *nextlow = Atoi_reduce_tc(*nextlow); + } else if (mode == TTOC_NONSTRANDED) { + if (genestrand > 0) { + high0 = Atoi_reduce_ag(high0); /* low0 = Atoi_reduce_ag(low0); */ + high1 = Atoi_reduce_ag(high1); low1 = Atoi_reduce_ag(low1); + high2 = Atoi_reduce_ag(high2); low2 = Atoi_reduce_ag(low2); + high3 = Atoi_reduce_ag(high3); low3 = Atoi_reduce_ag(low3); + high4 = Atoi_reduce_ag(high4); low4 = Atoi_reduce_ag(low4); + high5 = Atoi_reduce_ag(high5); low5 = Atoi_reduce_ag(low5); + high6 = Atoi_reduce_ag(high6); low6 = Atoi_reduce_ag(low6); + high7 = Atoi_reduce_ag(high7); low7 = Atoi_reduce_ag(low7); + *nextlow = Atoi_reduce_ag(*nextlow); + } else { + high0 = Atoi_reduce_tc(high0); /* low0 = Atoi_reduce_tc(low0); */ + high1 = Atoi_reduce_tc(high1); low1 = Atoi_reduce_tc(low1); + high2 = Atoi_reduce_tc(high2); low2 = Atoi_reduce_tc(low2); + high3 = Atoi_reduce_tc(high3); low3 = Atoi_reduce_tc(low3); + high4 = Atoi_reduce_tc(high4); low4 = Atoi_reduce_tc(low4); + high5 = Atoi_reduce_tc(high5); low5 = Atoi_reduce_tc(low5); + high6 = Atoi_reduce_tc(high6); low6 = Atoi_reduce_tc(low6); + high7 = Atoi_reduce_tc(high7); low7 = Atoi_reduce_tc(low7); + *nextlow = Atoi_reduce_tc(*nextlow); + } + } + + return _mm512_set_epi32(nextlow_rc,high0,low1,high1,low2,high2,low3,high3, + low4,high4,low5,high5,low6,high6,low7,high7); +} +#endif + + + +#ifdef HAVE_SSE2 static void -store_positions_rev_std (Chrpos_T *table, UINT4 *pointers, UINT4 *positions, Count_T *counts, int indexsize, - Univcoord_T left, Univcoord_T left_plus_length, Chrpos_T chrpos, - int genestrand) { +count_positions_rev_simd (Count_T *counts, int indexsize, + Univcoord_T left, Univcoord_T left_plus_length, int genestrand) { int startdiscard, enddiscard; - Genomecomp_T ptr, startptr, endptr, low_rc, high_rc, nextlow_rc, - low, high, nextlow; + Genomecomp_T ptr, startptr, endptr, nextlow_rc, nextlow; + Genomecomp_T low1_rc, high1_rc, high0, low1, high1; + __m128i current, a, b, next, invert3, invert4; + __m128i array[16]; +#ifdef HAVE_AVX2 + __m256i array256[16]; + Genomecomp_T low2, high2, low3, high3; + __m256i current256, a256, b256, c256, d256, next256, temp256, shift256; + __m256i biginvert3, biginvert4; +#endif +#ifdef HAVE_AVX512 + __m128i temp; + __m512i array512[16]; + Genomecomp_T low4, high4, low5, high5, low6, high6, low7, high7; + __m512i current512, a512, b512, next512, temp512, shift512; + __m512i hugeinvert3, hugeinvert4; +#endif + + debug(printf("Starting count_positions_rev_simd\n")); if (left_plus_length < (Univcoord_T) indexsize) { left_plus_length = 0; } else { left_plus_length -= indexsize; } - chrpos += (left_plus_length - left); /* We are starting from the right */ ptr = startptr = left/32U*3; endptr = left_plus_length/32U*3; startdiscard = left % 32; /* (left+pos5) % 32 */ enddiscard = left_plus_length % 32; /* (left+pos3) % 32 */ + invert3 = _mm_set_epi32(0x00000000,0xFFFFFFFF,0xFFFFFFFF,0xFFFFFFFF); + invert4 = _mm_set1_epi32(0xFFFFFFFF); +#ifdef HAVE_AVX2 + biginvert3 = _mm256_set_epi32(0x00000000,0xFFFFFFFF,0xFFFFFFFF,0xFFFFFFFF,0xFFFFFFFF,0xFFFFFFFF,0xFFFFFFFF,0xFFFFFFFF); + biginvert4 = _mm256_set1_epi32(0xFFFFFFFF); + shift256 = _mm256_setr_epi32(7,0,1,2,3,4,5,6); +#endif +#ifdef HAVE_AVX512 + hugeinvert3 = _mm512_inserti64x4(_mm512_set1_epi32(0xFFFFFFFF), biginvert3, 0x1); + hugeinvert4 = _mm512_set1_epi32(0xFFFFFFFF); + shift512 = _mm512_setr_epi32(15,0,1,2,3,4,5,6, 7,8,9,10,11,12,13,14); +#endif + if (left_plus_length <= left) { /* Skip */ } else if (startptr == endptr) { #ifdef WORDS_BIGENDIAN - high = Bigendian_convert_uint(ref_blocks[ptr]); - low = Bigendian_convert_uint(ref_blocks[ptr+1]); + high1 = Bigendian_convert_uint(ref_blocks[ptr]); + low1 = Bigendian_convert_uint(ref_blocks[ptr+1]); nextlow = Bigendian_convert_uint(ref_blocks[ptr+4]); #else - high = ref_blocks[ptr]; - low = ref_blocks[ptr+1]; + high1 = ref_blocks[ptr]; + low1 = ref_blocks[ptr+1]; nextlow = ref_blocks[ptr+4]; #endif - if (mode == CMET_STRANDED) { - high = Cmet_reduce_ga(high); low = Cmet_reduce_ga(low); nextlow = Cmet_reduce_ga(nextlow); + + if (mode == STANDARD) { + /* Skip */ + } else if (mode == CMET_STRANDED) { + high1 = Cmet_reduce_ga(high1); low1 = Cmet_reduce_ga(low1); nextlow = Cmet_reduce_ga(nextlow); } else if (mode == CMET_NONSTRANDED) { if (genestrand > 0) { - high = Cmet_reduce_ct(high); low = Cmet_reduce_ct(low); nextlow = Cmet_reduce_ct(nextlow); + high1 = Cmet_reduce_ct(high1); low1 = Cmet_reduce_ct(low1); nextlow = Cmet_reduce_ct(nextlow); } else { - high = Cmet_reduce_ga(high); low = Cmet_reduce_ga(low); nextlow = Cmet_reduce_ga(nextlow); + high1 = Cmet_reduce_ga(high1); low1 = Cmet_reduce_ga(low1); nextlow = Cmet_reduce_ga(nextlow); + } + } else if (mode == ATOI_STRANDED) { + high1 = Atoi_reduce_ag(high1); low1 = Atoi_reduce_ag(low1); nextlow = Atoi_reduce_ag(nextlow); + } else if (mode == ATOI_NONSTRANDED) { + if (genestrand > 0) { + high1 = Atoi_reduce_tc(high1); low1 = Atoi_reduce_tc(low1); nextlow = Atoi_reduce_tc(nextlow); + } else { + high1 = Atoi_reduce_ag(high1); low1 = Atoi_reduce_ag(low1); nextlow = Atoi_reduce_ag(nextlow); + } + } else if (mode == TTOC_STRANDED) { + high1 = Atoi_reduce_tc(high1); low1 = Atoi_reduce_tc(low1); nextlow = Atoi_reduce_tc(nextlow); + } else if (mode == TTOC_NONSTRANDED) { + if (genestrand > 0) { + high1 = Atoi_reduce_ag(high1); low1 = Atoi_reduce_ag(low1); nextlow = Atoi_reduce_ag(nextlow); + } else { + high1 = Atoi_reduce_tc(high1); low1 = Atoi_reduce_tc(low1); nextlow = Atoi_reduce_tc(nextlow); } } - low_rc = ~low; - high_rc = ~high; + low1_rc = ~low1; + high1_rc = ~high1; nextlow_rc = ~nextlow; if (indexsize == 9) { - chrpos = store_9mers_rev_partial(chrpos,table,pointers,positions,counts,low_rc,high_rc,nextlow_rc,startdiscard,enddiscard); + count_9mers_rev_partial(counts,low1_rc,high1_rc,nextlow_rc,startdiscard,enddiscard); } else if (indexsize == 8) { - chrpos = store_8mers_rev_partial(chrpos,table,pointers,positions,counts,low_rc,high_rc,nextlow_rc,startdiscard,enddiscard); + count_8mers_rev_partial(counts,low1_rc,high1_rc,nextlow_rc,startdiscard,enddiscard); } else if (indexsize == 7) { - chrpos = store_7mers_rev_partial(chrpos,table,pointers,positions,counts,low_rc,high_rc,nextlow_rc,startdiscard,enddiscard); + count_7mers_rev_partial(counts,low1_rc,high1_rc,nextlow_rc,startdiscard,enddiscard); } else if (indexsize == 6) { - chrpos = store_6mers_rev_partial(chrpos,table,pointers,positions,counts,low_rc,high_rc,nextlow_rc,startdiscard,enddiscard); + count_6mers_rev_partial(counts,low1_rc,high1_rc,nextlow_rc,startdiscard,enddiscard); } else if (indexsize == 5) { - chrpos = store_5mers_rev_partial(chrpos,table,pointers,positions,counts,low_rc,high_rc,nextlow_rc,startdiscard,enddiscard); + count_5mers_rev_partial(counts,low1_rc,high1_rc,nextlow_rc,startdiscard,enddiscard); } else { fprintf(stderr,"indexsize %d not supported\n",indexsize); abort(); @@ -47748,38 +31373,57 @@ /* Start block */ #ifdef WORDS_BIGENDIAN - high = Bigendian_convert_uint(ref_blocks[ptr]); - low = Bigendian_convert_uint(ref_blocks[ptr+1]); + high1 = Bigendian_convert_uint(ref_blocks[ptr]); + low1 = Bigendian_convert_uint(ref_blocks[ptr+1]); nextlow = Bigendian_convert_uint(ref_blocks[ptr+4]); #else - high = ref_blocks[ptr]; - low = ref_blocks[ptr+1]; + high1 = ref_blocks[ptr]; + low1 = ref_blocks[ptr+1]; nextlow = ref_blocks[ptr+4]; #endif - if (mode == CMET_STRANDED) { - high = Cmet_reduce_ga(high); low = Cmet_reduce_ga(low); nextlow = Cmet_reduce_ga(nextlow); + + if (mode == STANDARD) { + /* Skip */ + } else if (mode == CMET_STRANDED) { + high1 = Cmet_reduce_ga(high1); low1 = Cmet_reduce_ga(low1); nextlow = Cmet_reduce_ga(nextlow); } else if (mode == CMET_NONSTRANDED) { if (genestrand > 0) { - high = Cmet_reduce_ct(high); low = Cmet_reduce_ct(low); nextlow = Cmet_reduce_ct(nextlow); + high1 = Cmet_reduce_ct(high1); low1 = Cmet_reduce_ct(low1); nextlow = Cmet_reduce_ct(nextlow); } else { - high = Cmet_reduce_ga(high); low = Cmet_reduce_ga(low); nextlow = Cmet_reduce_ga(nextlow); + high1 = Cmet_reduce_ga(high1); low1 = Cmet_reduce_ga(low1); nextlow = Cmet_reduce_ga(nextlow); + } + } else if (mode == ATOI_STRANDED) { + high1 = Atoi_reduce_ag(high1); low1 = Atoi_reduce_ag(low1); nextlow = Atoi_reduce_ag(nextlow); + } else if (mode == ATOI_NONSTRANDED) { + if (genestrand > 0) { + high1 = Atoi_reduce_tc(high1); low1 = Atoi_reduce_tc(low1); nextlow = Atoi_reduce_tc(nextlow); + } else { + high1 = Atoi_reduce_ag(high1); low1 = Atoi_reduce_ag(low1); nextlow = Atoi_reduce_ag(nextlow); + } + } else if (mode == TTOC_STRANDED) { + high1 = Atoi_reduce_tc(high1); low1 = Atoi_reduce_tc(low1); nextlow = Atoi_reduce_tc(nextlow); + } else if (mode == TTOC_NONSTRANDED) { + if (genestrand > 0) { + high1 = Atoi_reduce_ag(high1); low1 = Atoi_reduce_ag(low1); nextlow = Atoi_reduce_ag(nextlow); + } else { + high1 = Atoi_reduce_tc(high1); low1 = Atoi_reduce_tc(low1); nextlow = Atoi_reduce_tc(nextlow); } } - low_rc = ~low; - high_rc = ~high; nextlow_rc = ~nextlow; + low1_rc = ~low1; + high1_rc = ~high1; if (indexsize == 9) { - chrpos = store_9mers_rev_partial(chrpos,table,pointers,positions,counts,low_rc,high_rc,nextlow_rc,startdiscard,/*enddiscard*/31); + count_9mers_rev_partial(counts,low1_rc,high1_rc,nextlow_rc,startdiscard,/*enddiscard*/31); } else if (indexsize == 8) { - chrpos = store_8mers_rev_partial(chrpos,table,pointers,positions,counts,low_rc,high_rc,nextlow_rc,startdiscard,/*enddiscard*/31); + count_8mers_rev_partial(counts,low1_rc,high1_rc,nextlow_rc,startdiscard,/*enddiscard*/31); } else if (indexsize == 7) { - chrpos = store_7mers_rev_partial(chrpos,table,pointers,positions,counts,low_rc,high_rc,nextlow_rc,startdiscard,/*enddiscard*/31); + count_7mers_rev_partial(counts,low1_rc,high1_rc,nextlow_rc,startdiscard,/*enddiscard*/31); } else if (indexsize == 6) { - chrpos = store_6mers_rev_partial(chrpos,table,pointers,positions,counts,low_rc,high_rc,nextlow_rc,startdiscard,/*enddiscard*/31); + count_6mers_rev_partial(counts,low1_rc,high1_rc,nextlow_rc,startdiscard,/*enddiscard*/31); } else if (indexsize == 5) { - chrpos = store_5mers_rev_partial(chrpos,table,pointers,positions,counts,low_rc,high_rc,nextlow_rc,startdiscard,/*enddiscard*/31); + count_5mers_rev_partial(counts,low1_rc,high1_rc,nextlow_rc,startdiscard,/*enddiscard*/31); } else { fprintf(stderr,"indexsize %d not supported\n",indexsize); abort(); @@ -47788,153 +31432,262 @@ ptr += 3; /* Middle blocks */ - if (indexsize == 9) { - while (ptr + 3 <= endptr) { -#ifdef WORDS_BIGENDIAN - high = Bigendian_convert_uint(ref_blocks[ptr]); - low = Bigendian_convert_uint(ref_blocks[ptr+1]); - nextlow = Bigendian_convert_uint(ref_blocks[ptr+4]); -#else - high = ref_blocks[ptr]; - low = ref_blocks[ptr+1]; - nextlow = ref_blocks[ptr+4]; -#endif - if (mode == CMET_STRANDED) { - high = Cmet_reduce_ga(high); low = Cmet_reduce_ga(low); nextlow = Cmet_reduce_ga(nextlow); - } else if (mode == CMET_NONSTRANDED) { - if (genestrand > 0) { - high = Cmet_reduce_ct(high); low = Cmet_reduce_ct(low); nextlow = Cmet_reduce_ct(nextlow); - } else { - high = Cmet_reduce_ga(high); low = Cmet_reduce_ga(low); nextlow = Cmet_reduce_ga(nextlow); - } - } +#ifdef HAVE_AVX512 + while (ptr + 24 <= endptr) { - low_rc = ~low; - high_rc = ~high; - nextlow_rc = ~nextlow; + if (mode == STANDARD) { + a512 = _mm512_loadu_si512((__m512i *) &(ref_blocks[ptr])); + b512 = _mm512_loadu_si512((__m512i *) &(ref_blocks[ptr+7])); + current512 = _mm512_permutex2var_epi32(a512,_mm512_setr_epi32(16+14, 16+15, 16+11, 16+12, 16+8, 16+9, 12, 13, 9, 10, 6, 7, 3, 4, 0, 1), b512); + current512 = _mm512_xor_si512(current512,hugeinvert4); + nextlow = ref_blocks[ptr+25]; + } else { + current512 = apply_mode_rev_512(&(ref_blocks[ptr]),mode,genestrand,&nextlow,nextlow_rc); + current512 = _mm512_xor_si512(current512,hugeinvert3); + } - chrpos = store_9mers_rev(chrpos,table,pointers,positions,counts,low_rc,high_rc,nextlow_rc); - ptr += 3; + nextlow_rc = ~nextlow; /* Take from this loop */ + + current = _mm512_extracti32x4_epi32(current512,3); + temp = _mm_insert_epi32(current,nextlow_rc,0x03); + temp512 = _mm512_inserti32x4(current512,temp,0x03); + next512 = _mm512_permutexvar_epi32(shift512,temp512); /* shift goes first! */ + + if (indexsize == 9) { + extract_9mers_rev_simd_256(array512,current512,next512); + } else if (indexsize == 8) { + extract_8mers_rev_simd_256(array512,current512,next512); + } else if (indexsize == 7) { + extract_7mers_rev_simd_256(array512,current512,next512); + } else if (indexsize == 6) { + extract_6mers_rev_simd_256(array512,current512,next512); + } else if (indexsize == 5) { + extract_5mers_rev_simd_256(array512,current512,next512); + } else { + abort(); } + count_fwdrev_simd_n(counts,(Genomecomp_T *) array512,256); - } else if (indexsize == 8) { - while (ptr + 3 <= endptr) { -#ifdef WORDS_BIGENDIAN - high = Bigendian_convert_uint(ref_blocks[ptr]); - low = Bigendian_convert_uint(ref_blocks[ptr+1]); - nextlow = Bigendian_convert_uint(ref_blocks[ptr+4]); -#else - high = ref_blocks[ptr]; - low = ref_blocks[ptr+1]; - nextlow = ref_blocks[ptr+4]; + ptr += 24; + } #endif - if (mode == CMET_STRANDED) { - high = Cmet_reduce_ga(high); low = Cmet_reduce_ga(low); nextlow = Cmet_reduce_ga(nextlow); - } else if (mode == CMET_NONSTRANDED) { - if (genestrand > 0) { - high = Cmet_reduce_ct(high); low = Cmet_reduce_ct(low); nextlow = Cmet_reduce_ct(nextlow); - } else { - high = Cmet_reduce_ga(high); low = Cmet_reduce_ga(low); nextlow = Cmet_reduce_ga(nextlow); - } - } - low_rc = ~low; - high_rc = ~high; - nextlow_rc = ~nextlow; - chrpos = store_8mers_rev(chrpos,table,pointers,positions,counts,low_rc,high_rc,nextlow_rc); - ptr += 3; +#ifdef HAVE_AVX2 + while (ptr + 12 <= endptr) { + + if (mode == STANDARD) { + a256 = _mm256_loadu_si256((__m256i *) &(ref_blocks[ptr])); + b256 = _mm256_loadu_si256((__m256i *) &(ref_blocks[ptr+3])); + c256 = _mm256_unpacklo_epi64(b256,a256); + d256 = _mm256_unpackhi_epi64(b256,a256); + current256 = _mm256_permute2x128_si256(c256, d256, 0x03); + current256 = _mm256_xor_si256(current256,biginvert4); + nextlow = ref_blocks[ptr+13]; + } else { + current256 = apply_mode_rev_256(&(ref_blocks[ptr]),mode,genestrand,&nextlow,nextlow_rc); + current256 = _mm256_xor_si256(current256,biginvert3); } - } else if (indexsize == 7) { - while (ptr + 3 <= endptr) { -#ifdef WORDS_BIGENDIAN - high = Bigendian_convert_uint(ref_blocks[ptr]); - low = Bigendian_convert_uint(ref_blocks[ptr+1]); - nextlow = Bigendian_convert_uint(ref_blocks[ptr+4]); -#else - high = ref_blocks[ptr]; - low = ref_blocks[ptr+1]; - nextlow = ref_blocks[ptr+4]; + nextlow_rc = ~nextlow; /* Take from this loop */ + +#if 0 + /* Doesn't work, because performs shift within 128-bit lanes */ + next256 = _mm256_alignr_epi8(current256,_mm256_set1_epi32(nextlow_rc),0); +#else + temp256 = _mm256_insert_epi32(current256,nextlow_rc,0x07); + next256 = _mm256_permutevar8x32_epi32(temp256,shift256); #endif - if (mode == CMET_STRANDED) { - high = Cmet_reduce_ga(high); low = Cmet_reduce_ga(low); nextlow = Cmet_reduce_ga(nextlow); - } else if (mode == CMET_NONSTRANDED) { - if (genestrand > 0) { - high = Cmet_reduce_ct(high); low = Cmet_reduce_ct(low); nextlow = Cmet_reduce_ct(nextlow); - } else { - high = Cmet_reduce_ga(high); low = Cmet_reduce_ga(low); nextlow = Cmet_reduce_ga(nextlow); - } - } + + if (indexsize == 9) { + extract_9mers_rev_simd_128(array256,current256,next256); + } else if (indexsize == 8) { + extract_8mers_rev_simd_128(array256,current256,next256); + } else if (indexsize == 7) { + extract_7mers_rev_simd_128(array256,current256,next256); + } else if (indexsize == 6) { + extract_6mers_rev_simd_128(array256,current256,next256); + } else if (indexsize == 5) { + extract_5mers_rev_simd_128(array256,current256,next256); + } else { + abort(); + } + count_fwdrev_simd_n(counts,(Genomecomp_T *) array256,128); - low_rc = ~low; - high_rc = ~high; - nextlow_rc = ~nextlow; + ptr += 12; + } +#endif - chrpos = store_7mers_rev(chrpos,table,pointers,positions,counts,low_rc,high_rc,nextlow_rc); - ptr += 3; - } + while (ptr + 6 <= endptr) { - } else if (indexsize == 6) { - while (ptr + 3 <= endptr) { + if (mode == STANDARD) { +#ifdef HAVE_SSSE3 + a = _mm_loadu_si128((__m128i *) &(ref_blocks[ptr])); + b = _mm_loadu_si128((__m128i *) &(ref_blocks[ptr+3])); + current = _mm_unpacklo_epi64(b,a); + current = _mm_xor_si128(current,invert4); + nextlow = ref_blocks[ptr+7]; +#else + /* Solution for SSE2. Need separate values to construct "next" */ + high0 = ref_blocks[ptr]; /* low0 = ref_blocks[ptr+1]; */ + high1 = ref_blocks[ptr+3]; low1 = ref_blocks[ptr+4]; + nextlow = ref_blocks[ptr+7]; + + current = _mm_set_epi32(nextlow_rc,high0,low1,high1); + current = _mm_xor_si128(current,invert3); +#endif + + } else { #ifdef WORDS_BIGENDIAN - high = Bigendian_convert_uint(ref_blocks[ptr]); - low = Bigendian_convert_uint(ref_blocks[ptr+1]); - nextlow = Bigendian_convert_uint(ref_blocks[ptr+4]); -#else - high = ref_blocks[ptr]; - low = ref_blocks[ptr+1]; - nextlow = ref_blocks[ptr+4]; + high0 = Bigendian_convert_uint(ref_blocks[ptr]); /* low0 = Bigendian_convert_uint(ref_blocks[ptr+1]); */ + high1 = Bigendian_convert_uint(ref_blocks[ptr+3]); low1 = Bigendian_convert_uint(ref_blocks[ptr+4]); + nextlow = Bigendian_convert_uint(ref_blocks[ptr+7]); +#else + high0 = ref_blocks[ptr]; /* low0 = ref_blocks[ptr+1]; */ + high1 = ref_blocks[ptr+3]; low1 = ref_blocks[ptr+4]; + nextlow = ref_blocks[ptr+7]; #endif - if (mode == CMET_STRANDED) { - high = Cmet_reduce_ga(high); low = Cmet_reduce_ga(low); nextlow = Cmet_reduce_ga(nextlow); + + if (mode == STANDARD) { + /* Skip */ + } else if (mode == CMET_STRANDED) { + high0 = Cmet_reduce_ga(high0); /* low0 = Cmet_reduce_ga(low0); */ + high1 = Cmet_reduce_ga(high1); low1 = Cmet_reduce_ga(low1); + nextlow = Cmet_reduce_ga(nextlow); } else if (mode == CMET_NONSTRANDED) { if (genestrand > 0) { - high = Cmet_reduce_ct(high); low = Cmet_reduce_ct(low); nextlow = Cmet_reduce_ct(nextlow); + high0 = Cmet_reduce_ct(high0); /* low0 = Cmet_reduce_ct(low0); */ + high1 = Cmet_reduce_ct(high1); low1 = Cmet_reduce_ct(low1); + nextlow = Cmet_reduce_ct(nextlow); + } else { + high0 = Cmet_reduce_ga(high0); /* low0 = Cmet_reduce_ga(low0); */ + high1 = Cmet_reduce_ga(high1); low1 = Cmet_reduce_ga(low1); + nextlow = Cmet_reduce_ga(nextlow); + } + } else if (mode == ATOI_STRANDED) { + high0 = Atoi_reduce_ag(high0); /* low0 = Atoi_reduce_ag(low0); */ + high1 = Atoi_reduce_ag(high1); low1 = Atoi_reduce_ag(low1); + nextlow = Atoi_reduce_ag(nextlow); + } else if (mode == ATOI_NONSTRANDED) { + if (genestrand > 0) { + high0 = Atoi_reduce_tc(high0); /* low0 = Atoi_reduce_tc(low0); */ + high1 = Atoi_reduce_tc(high1); low1 = Atoi_reduce_tc(low1); + nextlow = Atoi_reduce_tc(nextlow); + } else { + high0 = Atoi_reduce_ag(high0); /* low0 = Atoi_reduce_ag(low0); */ + high1 = Atoi_reduce_ag(high1); low1 = Atoi_reduce_ag(low1); + nextlow = Atoi_reduce_ag(nextlow); + } + } else if (mode == TTOC_STRANDED) { + high0 = Atoi_reduce_tc(high0); /* low0 = Atoi_reduce_tc(low0); */ + high1 = Atoi_reduce_tc(high1); low1 = Atoi_reduce_tc(low1); + nextlow = Atoi_reduce_tc(nextlow); + } else if (mode == TTOC_NONSTRANDED) { + if (genestrand > 0) { + high0 = Atoi_reduce_ag(high0); /* low0 = Atoi_reduce_ag(low0); */ + high1 = Atoi_reduce_ag(high1); low1 = Atoi_reduce_ag(low1); + nextlow = Atoi_reduce_ag(nextlow); } else { - high = Cmet_reduce_ga(high); low = Cmet_reduce_ga(low); nextlow = Cmet_reduce_ga(nextlow); + high0 = Atoi_reduce_tc(high0); /* low0 = Atoi_reduce_tc(low0); */ + high1 = Atoi_reduce_tc(high1); low1 = Atoi_reduce_tc(low1); + nextlow = Atoi_reduce_tc(nextlow); } } - low_rc = ~low; - high_rc = ~high; - nextlow_rc = ~nextlow; - - chrpos = store_6mers_rev(chrpos,table,pointers,positions,counts,low_rc,high_rc,nextlow_rc); - ptr += 3; + current = _mm_set_epi32(nextlow_rc,high0,low1,high1); + current = _mm_xor_si128(current,invert3); } + + nextlow_rc = ~nextlow; /* Take from this loop */ +#if defined(HAVE_SSSE3) + next = _mm_alignr_epi8(current,_mm_set1_epi32(nextlow_rc),12); +#elif 0 + /* Previous solution for SSE4.1 */ + temp = _mm_insert_epi32(current,nextlow_rc,0x03); + next = _mm_shuffle_epi32(temp,0x93); +#else + /* Solution for SSE2 */ + next = _mm_set_epi32(~high0,~low1,~high1,nextlow_rc); +#endif + + if (indexsize == 9) { + extract_9mers_rev_simd_64(array,current,next); + } else if (indexsize == 8) { + extract_8mers_rev_simd_64(array,current,next); + } else if (indexsize == 7) { + extract_7mers_rev_simd_64(array,current,next); + } else if (indexsize == 6) { + extract_6mers_rev_simd_64(array,current,next); + } else if (indexsize == 5) { + extract_5mers_rev_simd_64(array,current,next); + } else { + abort(); + } + count_fwdrev_simd_n(counts,(Genomecomp_T *) array,64); - } else if (indexsize == 5) { - while (ptr + 3 <= endptr) { + ptr += 6; + } + + if (ptr + 3 <= endptr) { #ifdef WORDS_BIGENDIAN - high = Bigendian_convert_uint(ref_blocks[ptr]); - low = Bigendian_convert_uint(ref_blocks[ptr+1]); - nextlow = Bigendian_convert_uint(ref_blocks[ptr+4]); -#else - high = ref_blocks[ptr]; - low = ref_blocks[ptr+1]; - nextlow = ref_blocks[ptr+4]; + high1 = Bigendian_convert_uint(ref_blocks[ptr]); + /* low1 = Bigendian_convert_uint(ref_blocks[ptr+1]); */ + nextlow = Bigendian_convert_uint(ref_blocks[ptr+4]); +#else + high1 = ref_blocks[ptr]; + /* low1 = ref_blocks[ptr+1]; */ + nextlow = ref_blocks[ptr+4]; #endif - if (mode == CMET_STRANDED) { - high = Cmet_reduce_ga(high); low = Cmet_reduce_ga(low); nextlow = Cmet_reduce_ga(nextlow); - } else if (mode == CMET_NONSTRANDED) { - if (genestrand > 0) { - high = Cmet_reduce_ct(high); low = Cmet_reduce_ct(low); nextlow = Cmet_reduce_ct(nextlow); - } else { - high = Cmet_reduce_ga(high); low = Cmet_reduce_ga(low); nextlow = Cmet_reduce_ga(nextlow); - } - } - - low_rc = ~low; - high_rc = ~high; - nextlow_rc = ~nextlow; - chrpos = store_5mers_rev(chrpos,table,pointers,positions,counts,low_rc,high_rc,nextlow_rc); - ptr += 3; + if (mode == STANDARD) { + /* Skip */ + } else if (mode == CMET_STRANDED) { + high1 = Cmet_reduce_ga(high1); /* low1 = Cmet_reduce_ga(low1); */ nextlow = Cmet_reduce_ga(nextlow); + } else if (mode == CMET_NONSTRANDED) { + if (genestrand > 0) { + high1 = Cmet_reduce_ct(high1); /* low1 = Cmet_reduce_ct(low1); */ nextlow = Cmet_reduce_ct(nextlow); + } else { + high1 = Cmet_reduce_ga(high1); /* low1 = Cmet_reduce_ga(low1); */ nextlow = Cmet_reduce_ga(nextlow); + } + } else if (mode == ATOI_STRANDED) { + high1 = Atoi_reduce_ag(high1); /* low1 = Atoi_reduce_ag(low1); */ nextlow = Atoi_reduce_ag(nextlow); + } else if (mode == ATOI_NONSTRANDED) { + if (genestrand > 0) { + high1 = Atoi_reduce_tc(high1); /* low1 = Atoi_reduce_tc(low1); */ nextlow = Atoi_reduce_tc(nextlow); + } else { + high1 = Atoi_reduce_ag(high1); /* low1 = Atoi_reduce_ag(low1); */ nextlow = Atoi_reduce_ag(nextlow); + } + } else if (mode == TTOC_STRANDED) { + high1 = Atoi_reduce_tc(high1); /* low1 = Atoi_reduce_tc(low1); */ nextlow = Atoi_reduce_tc(nextlow); + } else if (mode == TTOC_NONSTRANDED) { + if (genestrand > 0) { + high1 = Atoi_reduce_ag(high1); /* low1 = Atoi_reduce_ag(low1); */ nextlow = Atoi_reduce_ag(nextlow); + } else { + high1 = Atoi_reduce_tc(high1); /* low1 = Atoi_reduce_tc(low1); */ nextlow = Atoi_reduce_tc(nextlow); + } + } + + /* low1_rc = ~low1; */ + low1_rc = nextlow_rc; + + nextlow_rc = ~nextlow; + high1_rc = ~high1; + + if (indexsize == 9) { + count_9mers_rev_32(counts,low1_rc,high1_rc,nextlow_rc); + } else if (indexsize == 8) { + count_8mers_rev_32(counts,low1_rc,high1_rc,nextlow_rc); + } else if (indexsize == 7) { + count_7mers_rev_32(counts,low1_rc,high1_rc,nextlow_rc); + } else if (indexsize == 6) { + count_6mers_rev_32(counts,low1_rc,high1_rc,nextlow_rc); + } else if (indexsize == 5) { + count_5mers_rev_32(counts,low1_rc,high1_rc,nextlow_rc); + } else { + abort(); } - } else { - abort(); + ptr += 3; } @@ -47942,38 +31695,59 @@ assert(ptr == endptr); #ifdef WORDS_BIGENDIAN - high = Bigendian_convert_uint(ref_blocks[ptr]); - low = Bigendian_convert_uint(ref_blocks[ptr+1]); + high1 = Bigendian_convert_uint(ref_blocks[ptr]); + /* low1 = Bigendian_convert_uint(ref_blocks[ptr+1]); */ nextlow = Bigendian_convert_uint(ref_blocks[ptr+4]); #else - high = ref_blocks[ptr]; - low = ref_blocks[ptr+1]; + high1 = ref_blocks[ptr]; + /* low1 = ref_blocks[ptr+1]; */ nextlow = ref_blocks[ptr+4]; #endif - if (mode == CMET_STRANDED) { - high = Cmet_reduce_ga(high); low = Cmet_reduce_ga(low); nextlow = Cmet_reduce_ga(nextlow); + + if (mode == STANDARD) { + /* Skip */ + } else if (mode == CMET_STRANDED) { + high1 = Cmet_reduce_ga(high1); /* low1 = Cmet_reduce_ga(low1); */ nextlow = Cmet_reduce_ga(nextlow); } else if (mode == CMET_NONSTRANDED) { if (genestrand > 0) { - high = Cmet_reduce_ct(high); low = Cmet_reduce_ct(low); nextlow = Cmet_reduce_ct(nextlow); + high1 = Cmet_reduce_ct(high1); /* low1 = Cmet_reduce_ct(low1); */ nextlow = Cmet_reduce_ct(nextlow); + } else { + high1 = Cmet_reduce_ga(high1); /* low1 = Cmet_reduce_ga(low1); */ nextlow = Cmet_reduce_ga(nextlow); + } + } else if (mode == ATOI_STRANDED) { + high1 = Atoi_reduce_ag(high1); /* low1 = Atoi_reduce_ag(low1); */ nextlow = Atoi_reduce_ag(nextlow); + } else if (mode == ATOI_NONSTRANDED) { + if (genestrand > 0) { + high1 = Atoi_reduce_tc(high1); /* low1 = Atoi_reduce_tc(low1); */ nextlow = Atoi_reduce_tc(nextlow); + } else { + high1 = Atoi_reduce_ag(high1); /* low1 = Atoi_reduce_ag(low1); */ nextlow = Atoi_reduce_ag(nextlow); + } + } else if (mode == TTOC_STRANDED) { + high1 = Atoi_reduce_tc(high1); /* low1 = Atoi_reduce_tc(low1); */ nextlow = Atoi_reduce_tc(nextlow); + } else if (mode == TTOC_NONSTRANDED) { + if (genestrand > 0) { + high1 = Atoi_reduce_ag(high1); /* low1 = Atoi_reduce_ag(low1); */ nextlow = Atoi_reduce_ag(nextlow); } else { - high = Cmet_reduce_ga(high); low = Cmet_reduce_ga(low); nextlow = Cmet_reduce_ga(nextlow); + high1 = Atoi_reduce_tc(high1); /* low1 = Atoi_reduce_tc(low1); */ nextlow = Atoi_reduce_tc(nextlow); } } - low_rc = ~low; - high_rc = ~high; + /* low1_rc = ~low1; */ + low1_rc = nextlow_rc; + nextlow_rc = ~nextlow; + high1_rc = ~high1; if (indexsize == 9) { - chrpos = store_9mers_rev_partial(chrpos,table,pointers,positions,counts,low_rc,high_rc,nextlow_rc,/*startdiscard*/0,enddiscard); + count_9mers_rev_partial(counts,low1_rc,high1_rc,nextlow_rc,/*startdiscard*/0,enddiscard); } else if (indexsize == 8) { - chrpos = store_8mers_rev_partial(chrpos,table,pointers,positions,counts,low_rc,high_rc,nextlow_rc,/*startdiscard*/0,enddiscard); + count_8mers_rev_partial(counts,low1_rc,high1_rc,nextlow_rc,/*startdiscard*/0,enddiscard); } else if (indexsize == 7) { - chrpos = store_7mers_rev_partial(chrpos,table,pointers,positions,counts,low_rc,high_rc,nextlow_rc,/*startdiscard*/0,enddiscard); + count_7mers_rev_partial(counts,low1_rc,high1_rc,nextlow_rc,/*startdiscard*/0,enddiscard); } else if (indexsize == 6) { - chrpos = store_6mers_rev_partial(chrpos,table,pointers,positions,counts,low_rc,high_rc,nextlow_rc,/*startdiscard*/0,enddiscard); + count_6mers_rev_partial(counts,low1_rc,high1_rc,nextlow_rc,/*startdiscard*/0,enddiscard); } else if (indexsize == 5) { - chrpos = store_5mers_rev_partial(chrpos,table,pointers,positions,counts,low_rc,high_rc,nextlow_rc,/*startdiscard*/0,enddiscard); + count_5mers_rev_partial(counts,low1_rc,high1_rc,nextlow_rc,/*startdiscard*/0,enddiscard); } else { abort(); } @@ -47982,31 +31756,17 @@ return; } #endif + -#ifdef USE_SIMD_FOR_COUNTS +#ifndef HAVE_SSE2 static void -store_positions_rev_simd (Chrpos_T *table, UINT4 *pointers, UINT4 *positions, Count_T *counts, int indexsize, - Univcoord_T left, Univcoord_T left_plus_length, Chrpos_T chrpos, - int genestrand) { +store_positions_rev_std (Chrpos_T *table, UINT4 *positions, Count_T *counts, int indexsize, + Univcoord_T left, Univcoord_T left_plus_length, Chrpos_T chrpos, + int genestrand) { int startdiscard, enddiscard; - Genomecomp_T ptr, startptr, endptr, nextlow_rc, nextlow; - Genomecomp_T low1_rc, high1_rc, high0, low1, high1; - __m128i current, next, invert3; - __m128i array[16]; -#ifdef HAVE_SSE4_1 - __m128i temp; -#else - Genomecomp_T low0_rc, high0_rc; -#endif -#ifdef HAVE_AVX2 - __m256i array256[16]; - Genomecomp_T low2, high2, low3, high3; - __m256i current256, next256, temp256, shift256; - __m256i biginvert3; -#endif - + Genomecomp_T ptr, startptr, endptr, low_rc, high_rc, nextlow_rc, + low, high, nextlow; - debug(printf("Starting store_positions_rev_simd\n")); if (left_plus_length < (Univcoord_T) indexsize) { left_plus_length = 0; @@ -48020,49 +31780,62 @@ startdiscard = left % 32; /* (left+pos5) % 32 */ enddiscard = left_plus_length % 32; /* (left+pos3) % 32 */ - invert3 = _mm_set_epi32(0x00000000,0xFFFFFFFF,0xFFFFFFFF,0xFFFFFFFF); -#ifdef HAVE_AVX2 - biginvert3 = _mm256_set_epi32(0x00000000,0xFFFFFFFF,0xFFFFFFFF,0xFFFFFFFF,0xFFFFFFFF,0xFFFFFFFF,0xFFFFFFFF,0xFFFFFFFF); - shift256 = _mm256_setr_epi32(7,0,1,2,3,4,5,6); -#endif - if (left_plus_length <= left) { /* Skip */ } else if (startptr == endptr) { #ifdef WORDS_BIGENDIAN - high1 = Bigendian_convert_uint(ref_blocks[ptr]); - low1 = Bigendian_convert_uint(ref_blocks[ptr+1]); + high = Bigendian_convert_uint(ref_blocks[ptr]); + low = Bigendian_convert_uint(ref_blocks[ptr+1]); nextlow = Bigendian_convert_uint(ref_blocks[ptr+4]); #else - high1 = ref_blocks[ptr]; - low1 = ref_blocks[ptr+1]; + high = ref_blocks[ptr]; + low = ref_blocks[ptr+1]; nextlow = ref_blocks[ptr+4]; #endif - if (mode == CMET_STRANDED) { - high1 = Cmet_reduce_ga(high1); low1 = Cmet_reduce_ga(low1); nextlow = Cmet_reduce_ga(nextlow); + + if (mode == STANDARD) { + /* Skip */ + } else if (mode == CMET_STRANDED) { + high = Cmet_reduce_ga(high); low = Cmet_reduce_ga(low); nextlow = Cmet_reduce_ga(nextlow); } else if (mode == CMET_NONSTRANDED) { if (genestrand > 0) { - high1 = Cmet_reduce_ct(high1); low1 = Cmet_reduce_ct(low1); nextlow = Cmet_reduce_ct(nextlow); + high = Cmet_reduce_ct(high); low = Cmet_reduce_ct(low); nextlow = Cmet_reduce_ct(nextlow); } else { - high1 = Cmet_reduce_ga(high1); low1 = Cmet_reduce_ga(low1); nextlow = Cmet_reduce_ga(nextlow); + high = Cmet_reduce_ga(high); low = Cmet_reduce_ga(low); nextlow = Cmet_reduce_ga(nextlow); + } + } else if (mode == ATOI_STRANDED) { + high = Atoi_reduce_ag(high); low = Atoi_reduce_ag(low); nextlow = Atoi_reduce_ag(nextlow); + } else if (mode == ATOI_NONSTRANDED) { + if (genestrand > 0) { + high = Atoi_reduce_tc(high); low = Atoi_reduce_tc(low); nextlow = Atoi_reduce_tc(nextlow); + } else { + high = Atoi_reduce_ag(high); low = Atoi_reduce_ag(low); nextlow = Atoi_reduce_ag(nextlow); + } + } else if (mode == TTOC_STRANDED) { + high = Atoi_reduce_tc(high); low = Atoi_reduce_tc(low); nextlow = Atoi_reduce_tc(nextlow); + } else if (mode == TTOC_NONSTRANDED) { + if (genestrand > 0) { + high = Atoi_reduce_ag(high); low = Atoi_reduce_ag(low); nextlow = Atoi_reduce_ag(nextlow); + } else { + high = Atoi_reduce_tc(high); low = Atoi_reduce_tc(low); nextlow = Atoi_reduce_tc(nextlow); } } - low1_rc = ~low1; - high1_rc = ~high1; + low_rc = ~low; + high_rc = ~high; nextlow_rc = ~nextlow; if (indexsize == 9) { - chrpos = store_9mers_rev_partial(chrpos,table,pointers,positions,counts,low1_rc,high1_rc,nextlow_rc,startdiscard,enddiscard); + chrpos = store_9mers_rev_partial(chrpos,table,positions,counts,low_rc,high_rc,nextlow_rc,startdiscard,enddiscard); } else if (indexsize == 8) { - chrpos = store_8mers_rev_partial(chrpos,table,pointers,positions,counts,low1_rc,high1_rc,nextlow_rc,startdiscard,enddiscard); + chrpos = store_8mers_rev_partial(chrpos,table,positions,counts,low_rc,high_rc,nextlow_rc,startdiscard,enddiscard); } else if (indexsize == 7) { - chrpos = store_7mers_rev_partial(chrpos,table,pointers,positions,counts,low1_rc,high1_rc,nextlow_rc,startdiscard,enddiscard); + chrpos = store_7mers_rev_partial(chrpos,table,positions,counts,low_rc,high_rc,nextlow_rc,startdiscard,enddiscard); } else if (indexsize == 6) { - chrpos = store_6mers_rev_partial(chrpos,table,pointers,positions,counts,low1_rc,high1_rc,nextlow_rc,startdiscard,enddiscard); + chrpos = store_6mers_rev_partial(chrpos,table,positions,counts,low_rc,high_rc,nextlow_rc,startdiscard,enddiscard); } else if (indexsize == 5) { - chrpos = store_5mers_rev_partial(chrpos,table,pointers,positions,counts,low1_rc,high1_rc,nextlow_rc,startdiscard,enddiscard); + chrpos = store_5mers_rev_partial(chrpos,table,positions,counts,low_rc,high_rc,nextlow_rc,startdiscard,enddiscard); } else { fprintf(stderr,"indexsize %d not supported\n",indexsize); abort(); @@ -48073,38 +31846,57 @@ /* Start block */ #ifdef WORDS_BIGENDIAN - high1 = Bigendian_convert_uint(ref_blocks[ptr]); - low1 = Bigendian_convert_uint(ref_blocks[ptr+1]); + high = Bigendian_convert_uint(ref_blocks[ptr]); + low = Bigendian_convert_uint(ref_blocks[ptr+1]); nextlow = Bigendian_convert_uint(ref_blocks[ptr+4]); #else - high1 = ref_blocks[ptr]; - low1 = ref_blocks[ptr+1]; + high = ref_blocks[ptr]; + low = ref_blocks[ptr+1]; nextlow = ref_blocks[ptr+4]; #endif - if (mode == CMET_STRANDED) { - high1 = Cmet_reduce_ga(high1); low1 = Cmet_reduce_ga(low1); nextlow = Cmet_reduce_ga(nextlow); + + if (mode == STANDARD) { + /* Skip */ + } else if (mode == CMET_STRANDED) { + high = Cmet_reduce_ga(high); low = Cmet_reduce_ga(low); nextlow = Cmet_reduce_ga(nextlow); } else if (mode == CMET_NONSTRANDED) { if (genestrand > 0) { - high1 = Cmet_reduce_ct(high1); low1 = Cmet_reduce_ct(low1); nextlow = Cmet_reduce_ct(nextlow); + high = Cmet_reduce_ct(high); low = Cmet_reduce_ct(low); nextlow = Cmet_reduce_ct(nextlow); } else { - high1 = Cmet_reduce_ga(high1); low1 = Cmet_reduce_ga(low1); nextlow = Cmet_reduce_ga(nextlow); + high = Cmet_reduce_ga(high); low = Cmet_reduce_ga(low); nextlow = Cmet_reduce_ga(nextlow); + } + } else if (mode == ATOI_STRANDED) { + high = Atoi_reduce_ag(high); low = Atoi_reduce_ag(low); nextlow = Atoi_reduce_ag(nextlow); + } else if (mode == ATOI_NONSTRANDED) { + if (genestrand > 0) { + high = Atoi_reduce_tc(high); low = Atoi_reduce_tc(low); nextlow = Atoi_reduce_tc(nextlow); + } else { + high = Atoi_reduce_ag(high); low = Atoi_reduce_ag(low); nextlow = Atoi_reduce_ag(nextlow); + } + } else if (mode == TTOC_STRANDED) { + high = Atoi_reduce_tc(high); low = Atoi_reduce_tc(low); nextlow = Atoi_reduce_tc(nextlow); + } else if (mode == TTOC_NONSTRANDED) { + if (genestrand > 0) { + high = Atoi_reduce_ag(high); low = Atoi_reduce_ag(low); nextlow = Atoi_reduce_ag(nextlow); + } else { + high = Atoi_reduce_tc(high); low = Atoi_reduce_tc(low); nextlow = Atoi_reduce_tc(nextlow); } } + low_rc = ~low; + high_rc = ~high; nextlow_rc = ~nextlow; - low1_rc = ~low1; - high1_rc = ~high1; if (indexsize == 9) { - chrpos = store_9mers_rev_partial(chrpos,table,pointers,positions,counts,low1_rc,high1_rc,nextlow_rc,startdiscard,/*enddiscard*/31); + chrpos = store_9mers_rev_partial(chrpos,table,positions,counts,low_rc,high_rc,nextlow_rc,startdiscard,/*enddiscard*/31); } else if (indexsize == 8) { - chrpos = store_8mers_rev_partial(chrpos,table,pointers,positions,counts,low1_rc,high1_rc,nextlow_rc,startdiscard,/*enddiscard*/31); + chrpos = store_8mers_rev_partial(chrpos,table,positions,counts,low_rc,high_rc,nextlow_rc,startdiscard,/*enddiscard*/31); } else if (indexsize == 7) { - chrpos = store_7mers_rev_partial(chrpos,table,pointers,positions,counts,low1_rc,high1_rc,nextlow_rc,startdiscard,/*enddiscard*/31); + chrpos = store_7mers_rev_partial(chrpos,table,positions,counts,low_rc,high_rc,nextlow_rc,startdiscard,/*enddiscard*/31); } else if (indexsize == 6) { - chrpos = store_6mers_rev_partial(chrpos,table,pointers,positions,counts,low1_rc,high1_rc,nextlow_rc,startdiscard,/*enddiscard*/31); + chrpos = store_6mers_rev_partial(chrpos,table,positions,counts,low_rc,high_rc,nextlow_rc,startdiscard,/*enddiscard*/31); } else if (indexsize == 5) { - chrpos = store_5mers_rev_partial(chrpos,table,pointers,positions,counts,low1_rc,high1_rc,nextlow_rc,startdiscard,/*enddiscard*/31); + chrpos = store_5mers_rev_partial(chrpos,table,positions,counts,low_rc,high_rc,nextlow_rc,startdiscard,/*enddiscard*/31); } else { fprintf(stderr,"indexsize %d not supported\n",indexsize); abort(); @@ -48113,718 +31905,561 @@ ptr += 3; /* Middle blocks */ - if (indexsize == 9) { -#ifdef HAVE_AVX2 - while (ptr + 12 <= endptr) { + while (ptr + 3 <= endptr) { #ifdef WORDS_BIGENDIAN - high0 = Bigendian_convert_uint(ref_blocks[ptr]); - /* low0 = Bigendian_convert_uint(ref_blocks[ptr+1]); */ - high1 = Bigendian_convert_uint(ref_blocks[ptr+3]); - low1 = Bigendian_convert_uint(ref_blocks[ptr+4]); - high2 = Bigendian_convert_uint(ref_blocks[ptr+6]); - low2 = Bigendian_convert_uint(ref_blocks[ptr+7]); - high3 = Bigendian_convert_uint(ref_blocks[ptr+9]); - low3 = Bigendian_convert_uint(ref_blocks[ptr+10]); - nextlow = Bigendian_convert_uint(ref_blocks[ptr+13]); -#else - high0 = ref_blocks[ptr]; - /* low0 = ref_blocks[ptr+1]; */ - high1 = ref_blocks[ptr+3]; - low1 = ref_blocks[ptr+4]; - high2 = ref_blocks[ptr+6]; - low2 = ref_blocks[ptr+7]; - high3 = ref_blocks[ptr+9]; - low3 = ref_blocks[ptr+10]; - nextlow = ref_blocks[ptr+13]; + high = Bigendian_convert_uint(ref_blocks[ptr]); + low = Bigendian_convert_uint(ref_blocks[ptr+1]); + nextlow = Bigendian_convert_uint(ref_blocks[ptr+4]); +#else + high = ref_blocks[ptr]; + low = ref_blocks[ptr+1]; + nextlow = ref_blocks[ptr+4]; #endif - if (mode == CMET_STRANDED) { - high0 = Cmet_reduce_ga(high0); /* low0 = Cmet_reduce_ga(low0); */ - high1 = Cmet_reduce_ga(high1); low1 = Cmet_reduce_ga(low1); - high2 = Cmet_reduce_ga(high2); low2 = Cmet_reduce_ga(low2); - high3 = Cmet_reduce_ga(high3); low3 = Cmet_reduce_ga(low3); - nextlow = Cmet_reduce_ga(nextlow); - } else if (mode == CMET_NONSTRANDED) { - if (genestrand > 0) { - high0 = Cmet_reduce_ct(high0); /* low0 = Cmet_reduce_ct(low0); */ - high1 = Cmet_reduce_ct(high1); low1 = Cmet_reduce_ct(low1); - high2 = Cmet_reduce_ct(high2); low2 = Cmet_reduce_ct(low2); - high3 = Cmet_reduce_ct(high3); low3 = Cmet_reduce_ct(low3); - nextlow = Cmet_reduce_ct(nextlow); - } else { - high0 = Cmet_reduce_ga(high0); /* low0 = Cmet_reduce_ga(low0); */ - high1 = Cmet_reduce_ga(high1); low1 = Cmet_reduce_ga(low1); - high2 = Cmet_reduce_ga(high2); low2 = Cmet_reduce_ga(low2); - high3 = Cmet_reduce_ga(high3); low3 = Cmet_reduce_ga(low3); - nextlow = Cmet_reduce_ga(nextlow); - } + + if (mode == STANDARD) { + /* Skip */ + } else if (mode == CMET_STRANDED) { + high = Cmet_reduce_ga(high); low = Cmet_reduce_ga(low); nextlow = Cmet_reduce_ga(nextlow); + } else if (mode == CMET_NONSTRANDED) { + if (genestrand > 0) { + high = Cmet_reduce_ct(high); low = Cmet_reduce_ct(low); nextlow = Cmet_reduce_ct(nextlow); + } else { + high = Cmet_reduce_ga(high); low = Cmet_reduce_ga(low); nextlow = Cmet_reduce_ga(nextlow); + } + } else if (mode == ATOI_STRANDED) { + high = Atoi_reduce_ag(high); low = Atoi_reduce_ag(low); nextlow = Atoi_reduce_ag(nextlow); + } else if (mode == ATOI_NONSTRANDED) { + if (genestrand > 0) { + high = Atoi_reduce_tc(high); low = Atoi_reduce_tc(low); nextlow = Atoi_reduce_tc(nextlow); + } else { + high = Atoi_reduce_ag(high); low = Atoi_reduce_ag(low); nextlow = Atoi_reduce_ag(nextlow); + } + } else if (mode == TTOC_STRANDED) { + high = Atoi_reduce_tc(high); low = Atoi_reduce_tc(low); nextlow = Atoi_reduce_tc(nextlow); + } else if (mode == TTOC_NONSTRANDED) { + if (genestrand > 0) { + high = Atoi_reduce_ag(high); low = Atoi_reduce_ag(low); nextlow = Atoi_reduce_ag(nextlow); + } else { + high = Atoi_reduce_tc(high); low = Atoi_reduce_tc(low); nextlow = Atoi_reduce_tc(nextlow); } + } + + low_rc = ~low; + high_rc = ~high; + nextlow_rc = ~nextlow; + + if (indexsize == 9) { + chrpos = store_9mers_rev_32(chrpos,table,positions,counts,low_rc,high_rc,nextlow_rc); + } else if (indexsize == 8) { + chrpos = store_8mers_rev_32(chrpos,table,positions,counts,low_rc,high_rc,nextlow_rc); + } else if (indexsize == 7) { + chrpos = store_7mers_rev_32(chrpos,table,positions,counts,low_rc,high_rc,nextlow_rc); + } else if (indexsize == 6) { + chrpos = store_6mers_rev_32(chrpos,table,positions,counts,low_rc,high_rc,nextlow_rc); + } else if (indexsize == 5) { + chrpos = store_5mers_rev_32(chrpos,table,positions,counts,low_rc,high_rc,nextlow_rc); + } else { + abort(); + } - current256 = _mm256_set_epi32(nextlow_rc,high0,low1,high1,low2,high2,low3,high3); - current256 = _mm256_xor_si256(current256,biginvert3); - nextlow_rc = ~nextlow; + ptr += 3; + } - temp256 = _mm256_insert_epi32(current256,nextlow_rc,0x07); - next256 = _mm256_permutevar8x32_epi32(temp256,shift256); - extract_9mers_rev_simd_128(array256,current256,next256); - chrpos = store_fwdrev_simd_128(chrpos,table,pointers,positions,counts,(Genomecomp_T *) array256); - ptr += 12; - } -#endif + /* End block */ + assert(ptr == endptr); - while (ptr + 6 <= endptr) { #ifdef WORDS_BIGENDIAN - high0 = Bigendian_convert_uint(ref_blocks[ptr]); - /* low0 = Bigendian_convert_uint(ref_blocks[ptr+1]); */ - high1 = Bigendian_convert_uint(ref_blocks[ptr+3]); - low1 = Bigendian_convert_uint(ref_blocks[ptr+4]); - nextlow = Bigendian_convert_uint(ref_blocks[ptr+7]); + high = Bigendian_convert_uint(ref_blocks[ptr]); + low = Bigendian_convert_uint(ref_blocks[ptr+1]); + nextlow = Bigendian_convert_uint(ref_blocks[ptr+4]); #else - high0 = ref_blocks[ptr]; - /* low0 = ref_blocks[ptr+1]; */ - high1 = ref_blocks[ptr+3]; - low1 = ref_blocks[ptr+4]; - nextlow = ref_blocks[ptr+7]; + high = ref_blocks[ptr]; + low = ref_blocks[ptr+1]; + nextlow = ref_blocks[ptr+4]; #endif - if (mode == CMET_STRANDED) { - high0 = Cmet_reduce_ga(high0); /* low0 = Cmet_reduce_ga(low0); */ - high1 = Cmet_reduce_ga(high1); low1 = Cmet_reduce_ga(low1); - nextlow = Cmet_reduce_ga(nextlow); - } else if (mode == CMET_NONSTRANDED) { - if (genestrand > 0) { - high0 = Cmet_reduce_ct(high0); /* low0 = Cmet_reduce_ct(low0); */ - high1 = Cmet_reduce_ct(high1); low1 = Cmet_reduce_ct(low1); - nextlow = Cmet_reduce_ct(nextlow); - } else { - high0 = Cmet_reduce_ga(high0); /* low0 = Cmet_reduce_ga(low0); */ - high1 = Cmet_reduce_ga(high1); low1 = Cmet_reduce_ga(low1); - nextlow = Cmet_reduce_ga(nextlow); - } - } - current = _mm_set_epi32(nextlow_rc,high0,low1,high1); - current = _mm_xor_si128(current,invert3); - nextlow_rc = ~nextlow; -#ifdef HAVE_SSE4_1 - /* high0_rc = _mm_extract_epi32(current,2); */ - /* low1_rc = _mm_extract_epi32(current,1); */ - /* high1_rc = _mm_extract_epi32(current,0); */ + if (mode == STANDARD) { + /* Skip */ + } else if (mode == CMET_STRANDED) { + high = Cmet_reduce_ga(high); low = Cmet_reduce_ga(low); nextlow = Cmet_reduce_ga(nextlow); + } else if (mode == CMET_NONSTRANDED) { + if (genestrand > 0) { + high = Cmet_reduce_ct(high); low = Cmet_reduce_ct(low); nextlow = Cmet_reduce_ct(nextlow); + } else { + high = Cmet_reduce_ga(high); low = Cmet_reduce_ga(low); nextlow = Cmet_reduce_ga(nextlow); + } + } else if (mode == ATOI_STRANDED) { + high = Atoi_reduce_ag(high); low = Atoi_reduce_ag(low); nextlow = Atoi_reduce_ag(nextlow); + } else if (mode == ATOI_NONSTRANDED) { + if (genestrand > 0) { + high = Atoi_reduce_tc(high); low = Atoi_reduce_tc(low); nextlow = Atoi_reduce_tc(nextlow); + } else { + high = Atoi_reduce_ag(high); low = Atoi_reduce_ag(low); nextlow = Atoi_reduce_ag(nextlow); + } + } else if (mode == TTOC_STRANDED) { + high = Atoi_reduce_tc(high); low = Atoi_reduce_tc(low); nextlow = Atoi_reduce_tc(nextlow); + } else if (mode == TTOC_NONSTRANDED) { + if (genestrand > 0) { + high = Atoi_reduce_ag(high); low = Atoi_reduce_ag(low); nextlow = Atoi_reduce_ag(nextlow); + } else { + high = Atoi_reduce_tc(high); low = Atoi_reduce_tc(low); nextlow = Atoi_reduce_tc(nextlow); + } + } - temp = _mm_insert_epi32(current,nextlow_rc,0x03); - next = _mm_shuffle_epi32(temp,0x93); -#else - high0_rc = ~high0; - low1_rc = ~low1; - high1_rc = ~high1; + low_rc = ~low; + high_rc = ~high; + nextlow_rc = ~nextlow; - next = _mm_set_epi32(high0_rc,low1_rc,high1_rc,nextlow_rc); + if (indexsize == 9) { + chrpos = store_9mers_rev_partial(chrpos,table,positions,counts,low_rc,high_rc,nextlow_rc,/*startdiscard*/0,enddiscard); + } else if (indexsize == 8) { + chrpos = store_8mers_rev_partial(chrpos,table,positions,counts,low_rc,high_rc,nextlow_rc,/*startdiscard*/0,enddiscard); + } else if (indexsize == 7) { + chrpos = store_7mers_rev_partial(chrpos,table,positions,counts,low_rc,high_rc,nextlow_rc,/*startdiscard*/0,enddiscard); + } else if (indexsize == 6) { + chrpos = store_6mers_rev_partial(chrpos,table,positions,counts,low_rc,high_rc,nextlow_rc,/*startdiscard*/0,enddiscard); + } else if (indexsize == 5) { + chrpos = store_5mers_rev_partial(chrpos,table,positions,counts,low_rc,high_rc,nextlow_rc,/*startdiscard*/0,enddiscard); + } else { + abort(); + } + } + + return; +} #endif - extract_9mers_rev_simd(array,current,next); - chrpos = store_fwdrev_simd(chrpos,table,pointers,positions,counts,(Genomecomp_T *) array); - ptr += 6; - } - if (ptr + 3 <= endptr) { -#ifdef WORDS_BIGENDIAN - high1 = Bigendian_convert_uint(ref_blocks[ptr]); - /* low1 = Bigendian_convert_uint(ref_blocks[ptr+1]); */ - nextlow = Bigendian_convert_uint(ref_blocks[ptr+4]); -#else - high1 = ref_blocks[ptr]; - /* low1 = ref_blocks[ptr+1]; */ - nextlow = ref_blocks[ptr+4]; +#ifdef HAVE_SSE2 +static void +store_positions_rev_simd (Chrpos_T *table, UINT4 *positions, Count_T *counts, int indexsize, + Univcoord_T left, Univcoord_T left_plus_length, Chrpos_T chrpos, + int genestrand) { + int startdiscard, enddiscard; + Genomecomp_T ptr, startptr, endptr, nextlow_rc, nextlow; + Genomecomp_T low1_rc, high1_rc, high0, low1, high1; + __m128i current, a, b, next, invert3, invert4; +#ifdef HAVE_AVX2 + Genomecomp_T low2, high2, low3, high3; + __m256i current256, a256, b256, c256, d256, next256, temp256, shift256; + __m256i biginvert3, biginvert4; +#endif +#ifdef HAVE_AVX512 + __m128i temp; + Genomecomp_T low4, high4, low5, high5, low6, high6, low7, high7; + __m512i current512, a512, b512, next512, temp512, shift512; + __m512i hugeinvert3, hugeinvert4; #endif - if (mode == CMET_STRANDED) { - high1 = Cmet_reduce_ga(high1); /* low1 = Cmet_reduce_ga(low1); */ nextlow = Cmet_reduce_ga(nextlow); - } else if (mode == CMET_NONSTRANDED) { - if (genestrand > 0) { - high1 = Cmet_reduce_ct(high1); /* low1 = Cmet_reduce_ct(low1); */ nextlow = Cmet_reduce_ct(nextlow); - } else { - high1 = Cmet_reduce_ga(high1); /* low1 = Cmet_reduce_ga(low1); */ nextlow = Cmet_reduce_ga(nextlow); - } - } - /* low1_rc = ~low1; */ - low1_rc = nextlow_rc; - nextlow_rc = ~nextlow; - high1_rc = ~high1; + debug(printf("Starting store_positions_rev_simd\n")); - chrpos = store_9mers_rev(chrpos,table,pointers,positions,counts,low1_rc,high1_rc,nextlow_rc); - ptr += 3; - } + if (left_plus_length < (Univcoord_T) indexsize) { + left_plus_length = 0; + } else { + left_plus_length -= indexsize; + } + chrpos += (left_plus_length - left); /* We are starting from the right */ - } else if (indexsize == 8) { + ptr = startptr = left/32U*3; + endptr = left_plus_length/32U*3; + startdiscard = left % 32; /* (left+pos5) % 32 */ + enddiscard = left_plus_length % 32; /* (left+pos3) % 32 */ + + invert3 = _mm_set_epi32(0x00000000,0xFFFFFFFF,0xFFFFFFFF,0xFFFFFFFF); + invert4 = _mm_set1_epi32(0xFFFFFFFF); #ifdef HAVE_AVX2 - while (ptr + 12 <= endptr) { -#ifdef WORDS_BIGENDIAN - high0 = Bigendian_convert_uint(ref_blocks[ptr]); - /* low0 = Bigendian_convert_uint(ref_blocks[ptr+1]); */ - high1 = Bigendian_convert_uint(ref_blocks[ptr+3]); - low1 = Bigendian_convert_uint(ref_blocks[ptr+4]); - high2 = Bigendian_convert_uint(ref_blocks[ptr+6]); - low2 = Bigendian_convert_uint(ref_blocks[ptr+7]); - high3 = Bigendian_convert_uint(ref_blocks[ptr+9]); - low3 = Bigendian_convert_uint(ref_blocks[ptr+10]); - nextlow = Bigendian_convert_uint(ref_blocks[ptr+13]); -#else - high0 = ref_blocks[ptr]; - /* low0 = ref_blocks[ptr+1]; */ - high1 = ref_blocks[ptr+3]; - low1 = ref_blocks[ptr+4]; - high2 = ref_blocks[ptr+6]; - low2 = ref_blocks[ptr+7]; - high3 = ref_blocks[ptr+9]; - low3 = ref_blocks[ptr+10]; - nextlow = ref_blocks[ptr+13]; -#endif - if (mode == CMET_STRANDED) { - high0 = Cmet_reduce_ga(high0); /* low0 = Cmet_reduce_ga(low0); */ - high1 = Cmet_reduce_ga(high1); low1 = Cmet_reduce_ga(low1); - high2 = Cmet_reduce_ga(high2); low2 = Cmet_reduce_ga(low2); - high3 = Cmet_reduce_ga(high3); low3 = Cmet_reduce_ga(low3); - nextlow = Cmet_reduce_ga(nextlow); - } else if (mode == CMET_NONSTRANDED) { - if (genestrand > 0) { - high0 = Cmet_reduce_ct(high0); /* low0 = Cmet_reduce_ct(low0); */ - high1 = Cmet_reduce_ct(high1); low1 = Cmet_reduce_ct(low1); - high2 = Cmet_reduce_ct(high2); low2 = Cmet_reduce_ct(low2); - high3 = Cmet_reduce_ct(high3); low3 = Cmet_reduce_ct(low3); - nextlow = Cmet_reduce_ct(nextlow); - } else { - high0 = Cmet_reduce_ga(high0); /* low0 = Cmet_reduce_ga(low0); */ - high1 = Cmet_reduce_ga(high1); low1 = Cmet_reduce_ga(low1); - high2 = Cmet_reduce_ga(high2); low2 = Cmet_reduce_ga(low2); - high3 = Cmet_reduce_ga(high3); low3 = Cmet_reduce_ga(low3); - nextlow = Cmet_reduce_ga(nextlow); - } - } - - current256 = _mm256_set_epi32(nextlow_rc,high0,low1,high1,low2,high2,low3,high3); - current256 = _mm256_xor_si256(current256,biginvert3); - nextlow_rc = ~nextlow; - - temp256 = _mm256_insert_epi32(current256,nextlow_rc,0x07); - next256 = _mm256_permutevar8x32_epi32(temp256,shift256); - - extract_8mers_rev_simd_128(array256,current256,next256); - chrpos = store_fwdrev_simd_128(chrpos,table,pointers,positions,counts,(Genomecomp_T *) array256); - ptr += 12; - } + biginvert3 = _mm256_set_epi32(0x00000000,0xFFFFFFFF,0xFFFFFFFF,0xFFFFFFFF,0xFFFFFFFF,0xFFFFFFFF,0xFFFFFFFF,0xFFFFFFFF); + biginvert4 = _mm256_set1_epi32(0xFFFFFFFF); + shift256 = _mm256_setr_epi32(7,0,1,2,3,4,5,6); #endif - - while (ptr + 6 <= endptr) { -#ifdef WORDS_BIGENDIAN - high0 = Bigendian_convert_uint(ref_blocks[ptr]); - /* low0 = Bigendian_convert_uint(ref_blocks[ptr+1]); */ - high1 = Bigendian_convert_uint(ref_blocks[ptr+3]); - low1 = Bigendian_convert_uint(ref_blocks[ptr+4]); - nextlow = Bigendian_convert_uint(ref_blocks[ptr+7]); -#else - high0 = ref_blocks[ptr]; - /* low0 = ref_blocks[ptr+1]; */ - high1 = ref_blocks[ptr+3]; - low1 = ref_blocks[ptr+4]; - nextlow = ref_blocks[ptr+7]; +#ifdef HAVE_AVX512 + hugeinvert3 = _mm512_inserti64x4(_mm512_set1_epi32(0xFFFFFFFF), biginvert3, 0x1); + hugeinvert4 = _mm512_set1_epi32(0xFFFFFFFF); + shift512 = _mm512_setr_epi32(15,0,1,2,3,4,5,6, 7,8,9,10,11,12,13,14); #endif - if (mode == CMET_STRANDED) { - high0 = Cmet_reduce_ga(high0); /* low0 = Cmet_reduce_ga(low0); */ - high1 = Cmet_reduce_ga(high1); low1 = Cmet_reduce_ga(low1); - nextlow = Cmet_reduce_ga(nextlow); - } else if (mode == CMET_NONSTRANDED) { - if (genestrand > 0) { - high0 = Cmet_reduce_ct(high0); /* low0 = Cmet_reduce_ct(low0); */ - high1 = Cmet_reduce_ct(high1); low1 = Cmet_reduce_ct(low1); - nextlow = Cmet_reduce_ct(nextlow); - } else { - high0 = Cmet_reduce_ga(high0); /* low0 = Cmet_reduce_ga(low0); */ - high1 = Cmet_reduce_ga(high1); low1 = Cmet_reduce_ga(low1); - nextlow = Cmet_reduce_ga(nextlow); - } - } - current = _mm_set_epi32(nextlow_rc,high0,low1,high1); - current = _mm_xor_si128(current,invert3); - nextlow_rc = ~nextlow; -#ifdef HAVE_SSE4_1 - /* high0_rc = _mm_extract_epi32(current,2); */ - /* low1_rc = _mm_extract_epi32(current,1); */ - /* high1_rc = _mm_extract_epi32(current,0); */ + if (left_plus_length <= left) { + /* Skip */ - temp = _mm_insert_epi32(current,nextlow_rc,0x03); - next = _mm_shuffle_epi32(temp,0x93); + } else if (startptr == endptr) { +#ifdef WORDS_BIGENDIAN + high1 = Bigendian_convert_uint(ref_blocks[ptr]); + low1 = Bigendian_convert_uint(ref_blocks[ptr+1]); + nextlow = Bigendian_convert_uint(ref_blocks[ptr+4]); #else - high0_rc = ~high0; - low1_rc = ~low1; - high1_rc = ~high1; - - next = _mm_set_epi32(high0_rc,low1_rc,high1_rc,nextlow_rc); + high1 = ref_blocks[ptr]; + low1 = ref_blocks[ptr+1]; + nextlow = ref_blocks[ptr+4]; #endif - extract_8mers_rev_simd(array,current,next); - chrpos = store_fwdrev_simd(chrpos,table,pointers,positions,counts,(Genomecomp_T *) array); - ptr += 6; + if (mode == STANDARD) { + /* Skip */ + } else if (mode == CMET_STRANDED) { + high1 = Cmet_reduce_ga(high1); low1 = Cmet_reduce_ga(low1); nextlow = Cmet_reduce_ga(nextlow); + } else if (mode == CMET_NONSTRANDED) { + if (genestrand > 0) { + high1 = Cmet_reduce_ct(high1); low1 = Cmet_reduce_ct(low1); nextlow = Cmet_reduce_ct(nextlow); + } else { + high1 = Cmet_reduce_ga(high1); low1 = Cmet_reduce_ga(low1); nextlow = Cmet_reduce_ga(nextlow); } + } else if (mode == ATOI_STRANDED) { + high1 = Atoi_reduce_ag(high1); low1 = Atoi_reduce_ag(low1); nextlow = Atoi_reduce_ag(nextlow); + } else if (mode == ATOI_NONSTRANDED) { + if (genestrand > 0) { + high1 = Atoi_reduce_tc(high1); low1 = Atoi_reduce_tc(low1); nextlow = Atoi_reduce_tc(nextlow); + } else { + high1 = Atoi_reduce_ag(high1); low1 = Atoi_reduce_ag(low1); nextlow = Atoi_reduce_ag(nextlow); + } + } else if (mode == TTOC_STRANDED) { + high1 = Atoi_reduce_ag(high1); low1 = Atoi_reduce_ag(low1); nextlow = Atoi_reduce_ag(nextlow); + } else if (mode == TTOC_NONSTRANDED) { + if (genestrand > 0) { + high1 = Atoi_reduce_tc(high1); low1 = Atoi_reduce_tc(low1); nextlow = Atoi_reduce_tc(nextlow); + } else { + high1 = Atoi_reduce_ag(high1); low1 = Atoi_reduce_ag(low1); nextlow = Atoi_reduce_ag(nextlow); + } + } - if (ptr + 3 <= endptr) { -#ifdef WORDS_BIGENDIAN - high1 = Bigendian_convert_uint(ref_blocks[ptr]); - /* low1 = Bigendian_convert_uint(ref_blocks[ptr+1]); */ - nextlow = Bigendian_convert_uint(ref_blocks[ptr+4]); -#else - high1 = ref_blocks[ptr]; - /* low1 = ref_blocks[ptr+1]; */ - nextlow = ref_blocks[ptr+4]; -#endif - if (mode == CMET_STRANDED) { - high1 = Cmet_reduce_ga(high1); /* low1 = Cmet_reduce_ga(low1); */ nextlow = Cmet_reduce_ga(nextlow); - } else if (mode == CMET_NONSTRANDED) { - if (genestrand > 0) { - high1 = Cmet_reduce_ct(high1); /* low1 = Cmet_reduce_ct(low1); */ nextlow = Cmet_reduce_ct(nextlow); - } else { - high1 = Cmet_reduce_ga(high1); /* low1 = Cmet_reduce_ga(low1); */ nextlow = Cmet_reduce_ga(nextlow); - } - } - - /* low1_rc = ~low1; */ - low1_rc = nextlow_rc; + low1_rc = ~low1; + high1_rc = ~high1; + nextlow_rc = ~nextlow; - nextlow_rc = ~nextlow; - high1_rc = ~high1; + if (indexsize == 9) { + /* chrpos = */ store_9mers_rev_partial(chrpos,table,positions,counts,low1_rc,high1_rc,nextlow_rc,startdiscard,enddiscard); + } else if (indexsize == 8) { + /* chrpos = */ store_8mers_rev_partial(chrpos,table,positions,counts,low1_rc,high1_rc,nextlow_rc,startdiscard,enddiscard); + } else if (indexsize == 7) { + /* chrpos = */ store_7mers_rev_partial(chrpos,table,positions,counts,low1_rc,high1_rc,nextlow_rc,startdiscard,enddiscard); + } else if (indexsize == 6) { + /* chrpos = */ store_6mers_rev_partial(chrpos,table,positions,counts,low1_rc,high1_rc,nextlow_rc,startdiscard,enddiscard); + } else if (indexsize == 5) { + /* chrpos = */ store_5mers_rev_partial(chrpos,table,positions,counts,low1_rc,high1_rc,nextlow_rc,startdiscard,enddiscard); + } else { + fprintf(stderr,"indexsize %d not supported\n",indexsize); + abort(); + } - chrpos = store_8mers_rev(chrpos,table,pointers,positions,counts,low1_rc,high1_rc,nextlow_rc); - ptr += 3; - } + } else { + /* Genome_print_blocks(ref_blocks,left,left+16); */ - } else if (indexsize == 7) { -#ifdef HAVE_AVX2 - while (ptr + 12 <= endptr) { + /* Start block */ #ifdef WORDS_BIGENDIAN - high0 = Bigendian_convert_uint(ref_blocks[ptr]); - /* low0 = Bigendian_convert_uint(ref_blocks[ptr+1]); */ - high1 = Bigendian_convert_uint(ref_blocks[ptr+3]); - low1 = Bigendian_convert_uint(ref_blocks[ptr+4]); - high2 = Bigendian_convert_uint(ref_blocks[ptr+6]); - low2 = Bigendian_convert_uint(ref_blocks[ptr+7]); - high3 = Bigendian_convert_uint(ref_blocks[ptr+9]); - low3 = Bigendian_convert_uint(ref_blocks[ptr+10]); - nextlow = Bigendian_convert_uint(ref_blocks[ptr+13]); -#else - high0 = ref_blocks[ptr]; - /* low0 = ref_blocks[ptr+1]; */ - high1 = ref_blocks[ptr+3]; - low1 = ref_blocks[ptr+4]; - high2 = ref_blocks[ptr+6]; - low2 = ref_blocks[ptr+7]; - high3 = ref_blocks[ptr+9]; - low3 = ref_blocks[ptr+10]; - nextlow = ref_blocks[ptr+13]; + high1 = Bigendian_convert_uint(ref_blocks[ptr]); + low1 = Bigendian_convert_uint(ref_blocks[ptr+1]); + nextlow = Bigendian_convert_uint(ref_blocks[ptr+4]); +#else + high1 = ref_blocks[ptr]; + low1 = ref_blocks[ptr+1]; + nextlow = ref_blocks[ptr+4]; #endif - if (mode == CMET_STRANDED) { - high0 = Cmet_reduce_ga(high0); /* low0 = Cmet_reduce_ga(low0); */ - high1 = Cmet_reduce_ga(high1); low1 = Cmet_reduce_ga(low1); - high2 = Cmet_reduce_ga(high2); low2 = Cmet_reduce_ga(low2); - high3 = Cmet_reduce_ga(high3); low3 = Cmet_reduce_ga(low3); - nextlow = Cmet_reduce_ga(nextlow); - } else if (mode == CMET_NONSTRANDED) { - if (genestrand > 0) { - high0 = Cmet_reduce_ct(high0); /* low0 = Cmet_reduce_ct(low0); */ - high1 = Cmet_reduce_ct(high1); low1 = Cmet_reduce_ct(low1); - high2 = Cmet_reduce_ct(high2); low2 = Cmet_reduce_ct(low2); - high3 = Cmet_reduce_ct(high3); low3 = Cmet_reduce_ct(low3); - nextlow = Cmet_reduce_ct(nextlow); - } else { - high0 = Cmet_reduce_ga(high0); /* low0 = Cmet_reduce_ga(low0); */ - high1 = Cmet_reduce_ga(high1); low1 = Cmet_reduce_ga(low1); - high2 = Cmet_reduce_ga(high2); low2 = Cmet_reduce_ga(low2); - high3 = Cmet_reduce_ga(high3); low3 = Cmet_reduce_ga(low3); - nextlow = Cmet_reduce_ga(nextlow); - } - } - - current256 = _mm256_set_epi32(nextlow_rc,high0,low1,high1,low2,high2,low3,high3); - current256 = _mm256_xor_si256(current256,biginvert3); - nextlow_rc = ~nextlow; - - temp256 = _mm256_insert_epi32(current256,nextlow_rc,0x07); - next256 = _mm256_permutevar8x32_epi32(temp256,shift256); - extract_7mers_rev_simd_128(array256,current256,next256); - chrpos = store_fwdrev_simd_128(chrpos,table,pointers,positions,counts,(Genomecomp_T *) array256); - ptr += 12; + if (mode == STANDARD) { + /* Skip */ + } else if (mode == CMET_STRANDED) { + high1 = Cmet_reduce_ga(high1); low1 = Cmet_reduce_ga(low1); nextlow = Cmet_reduce_ga(nextlow); + } else if (mode == CMET_NONSTRANDED) { + if (genestrand > 0) { + high1 = Cmet_reduce_ct(high1); low1 = Cmet_reduce_ct(low1); nextlow = Cmet_reduce_ct(nextlow); + } else { + high1 = Cmet_reduce_ga(high1); low1 = Cmet_reduce_ga(low1); nextlow = Cmet_reduce_ga(nextlow); } -#endif + } else if (mode == ATOI_STRANDED) { + high1 = Atoi_reduce_ag(high1); low1 = Atoi_reduce_ag(low1); nextlow = Atoi_reduce_ag(nextlow); + } else if (mode == ATOI_NONSTRANDED) { + if (genestrand > 0) { + high1 = Atoi_reduce_tc(high1); low1 = Atoi_reduce_tc(low1); nextlow = Atoi_reduce_tc(nextlow); + } else { + high1 = Atoi_reduce_ag(high1); low1 = Atoi_reduce_ag(low1); nextlow = Atoi_reduce_ag(nextlow); + } + } else if (mode == TTOC_STRANDED) { + high1 = Atoi_reduce_ag(high1); low1 = Atoi_reduce_ag(low1); nextlow = Atoi_reduce_ag(nextlow); + } else if (mode == TTOC_NONSTRANDED) { + if (genestrand > 0) { + high1 = Atoi_reduce_tc(high1); low1 = Atoi_reduce_tc(low1); nextlow = Atoi_reduce_tc(nextlow); + } else { + high1 = Atoi_reduce_ag(high1); low1 = Atoi_reduce_ag(low1); nextlow = Atoi_reduce_ag(nextlow); + } + } - while (ptr + 6 <= endptr) { -#ifdef WORDS_BIGENDIAN - high0 = Bigendian_convert_uint(ref_blocks[ptr]); - /* low0 = Bigendian_convert_uint(ref_blocks[ptr+1]); */ - high1 = Bigendian_convert_uint(ref_blocks[ptr+3]); - low1 = Bigendian_convert_uint(ref_blocks[ptr+4]); - nextlow = Bigendian_convert_uint(ref_blocks[ptr+7]); -#else - high0 = ref_blocks[ptr]; - /* low0 = ref_blocks[ptr+1]; */ - high1 = ref_blocks[ptr+3]; - low1 = ref_blocks[ptr+4]; - nextlow = ref_blocks[ptr+7]; -#endif - if (mode == CMET_STRANDED) { - high0 = Cmet_reduce_ga(high0); /* low0 = Cmet_reduce_ga(low0); */ - high1 = Cmet_reduce_ga(high1); low1 = Cmet_reduce_ga(low1); - nextlow = Cmet_reduce_ga(nextlow); - } else if (mode == CMET_NONSTRANDED) { - if (genestrand > 0) { - high0 = Cmet_reduce_ct(high0); /* low0 = Cmet_reduce_ct(low0); */ - high1 = Cmet_reduce_ct(high1); low1 = Cmet_reduce_ct(low1); - nextlow = Cmet_reduce_ct(nextlow); - } else { - high0 = Cmet_reduce_ga(high0); /* low0 = Cmet_reduce_ga(low0); */ - high1 = Cmet_reduce_ga(high1); low1 = Cmet_reduce_ga(low1); - nextlow = Cmet_reduce_ga(nextlow); - } - } + nextlow_rc = ~nextlow; + low1_rc = ~low1; + high1_rc = ~high1; - current = _mm_set_epi32(nextlow_rc,high0,low1,high1); - current = _mm_xor_si128(current,invert3); - nextlow_rc = ~nextlow; -#ifdef HAVE_SSE4_1 - /* high0_rc = _mm_extract_epi32(current,2); */ - /* low1_rc = _mm_extract_epi32(current,1); */ - /* high1_rc = _mm_extract_epi32(current,0); */ + if (indexsize == 9) { + chrpos = store_9mers_rev_partial(chrpos,table,positions,counts,low1_rc,high1_rc,nextlow_rc,startdiscard,/*enddiscard*/31); + } else if (indexsize == 8) { + chrpos = store_8mers_rev_partial(chrpos,table,positions,counts,low1_rc,high1_rc,nextlow_rc,startdiscard,/*enddiscard*/31); + } else if (indexsize == 7) { + chrpos = store_7mers_rev_partial(chrpos,table,positions,counts,low1_rc,high1_rc,nextlow_rc,startdiscard,/*enddiscard*/31); + } else if (indexsize == 6) { + chrpos = store_6mers_rev_partial(chrpos,table,positions,counts,low1_rc,high1_rc,nextlow_rc,startdiscard,/*enddiscard*/31); + } else if (indexsize == 5) { + chrpos = store_5mers_rev_partial(chrpos,table,positions,counts,low1_rc,high1_rc,nextlow_rc,startdiscard,/*enddiscard*/31); + } else { + fprintf(stderr,"indexsize %d not supported\n",indexsize); + abort(); + } - temp = _mm_insert_epi32(current,nextlow_rc,0x03); - next = _mm_shuffle_epi32(temp,0x93); -#else - high0_rc = ~high0; - low1_rc = ~low1; - high1_rc = ~high1; + ptr += 3; - next = _mm_set_epi32(high0_rc,low1_rc,high1_rc,nextlow_rc); -#endif + /* Middle blocks */ +#ifdef HAVE_AVX512 + while (ptr + 24 <= endptr) { - extract_7mers_rev_simd(array,current,next); - chrpos = store_fwdrev_simd(chrpos,table,pointers,positions,counts,(Genomecomp_T *) array); - ptr += 6; + if (mode == STANDARD) { + a512 = _mm512_loadu_si512((__m512i *) &(ref_blocks[ptr])); + b512 = _mm512_loadu_si512((__m512i *) &(ref_blocks[ptr+7])); + current512 = _mm512_permutex2var_epi32(a512,_mm512_setr_epi32(16+14, 16+15, 16+11, 16+12, 16+8, 16+9, 12, 13, 9, 10, 6, 7, 3, 4, 0, 1), b512); + current512 = _mm512_xor_si512(current512,hugeinvert4); + nextlow = ref_blocks[ptr+25]; + } else { + current512 = apply_mode_rev_512(&(ref_blocks[ptr]),mode,genestrand,&nextlow,nextlow_rc); + current512 = _mm512_xor_si512(current512,hugeinvert3); } - if (ptr + 3 <= endptr) { -#ifdef WORDS_BIGENDIAN - high1 = Bigendian_convert_uint(ref_blocks[ptr]); - /* low1 = Bigendian_convert_uint(ref_blocks[ptr+1]);*/ - nextlow = Bigendian_convert_uint(ref_blocks[ptr+4]); -#else - high1 = ref_blocks[ptr]; - /* low1 = ref_blocks[ptr+1]; */ - nextlow = ref_blocks[ptr+4]; -#endif - if (mode == CMET_STRANDED) { - high1 = Cmet_reduce_ga(high1); /* low1 = Cmet_reduce_ga(low1); */ nextlow = Cmet_reduce_ga(nextlow); - } else if (mode == CMET_NONSTRANDED) { - if (genestrand > 0) { - high1 = Cmet_reduce_ct(high1); /* low1 = Cmet_reduce_ct(low1); */ nextlow = Cmet_reduce_ct(nextlow); - } else { - high1 = Cmet_reduce_ga(high1); /* low1 = Cmet_reduce_ga(low1); */ nextlow = Cmet_reduce_ga(nextlow); - } - } - - /* low1_rc = ~low1; */ - low1_rc = nextlow_rc; + nextlow_rc = ~nextlow; /* Take from this loop */ + + current = _mm512_extracti32x4_epi32(current512,3); + temp = _mm_insert_epi32(current,nextlow_rc,0x03); + temp512 = _mm512_inserti32x4(current512,temp,0x03); + next512 = _mm512_permutexvar_epi32(shift512,temp512); /* shift goes first! */ + + if (indexsize == 9) { + chrpos = store_9mers_rev_simd_256(chrpos,table,positions,counts,current512,next512); + } else if (indexsize == 8) { + chrpos = store_8mers_rev_simd_256(chrpos,table,positions,counts,current512,next512); + } else if (indexsize == 7) { + chrpos = store_7mers_rev_simd_256(chrpos,table,positions,counts,current512,next512); + } else if (indexsize == 6) { + chrpos = store_6mers_rev_simd_256(chrpos,table,positions,counts,current512,next512); + } else if (indexsize == 5) { + chrpos = store_5mers_rev_simd_256(chrpos,table,positions,counts,current512,next512); + } else { + abort(); + } - nextlow_rc = ~nextlow; - high1_rc = ~high1; + ptr += 24; + } +#endif - chrpos = store_7mers_rev(chrpos,table,pointers,positions,counts,low1_rc,high1_rc,nextlow_rc); - ptr += 3; - } - } else if (indexsize == 6) { #ifdef HAVE_AVX2 - while (ptr + 12 <= endptr) { -#ifdef WORDS_BIGENDIAN - high0 = Bigendian_convert_uint(ref_blocks[ptr]); - /* low0 = Bigendian_convert_uint(ref_blocks[ptr+1]); */ - high1 = Bigendian_convert_uint(ref_blocks[ptr+3]); - low1 = Bigendian_convert_uint(ref_blocks[ptr+4]); - high2 = Bigendian_convert_uint(ref_blocks[ptr+6]); - low2 = Bigendian_convert_uint(ref_blocks[ptr+7]); - high3 = Bigendian_convert_uint(ref_blocks[ptr+9]); - low3 = Bigendian_convert_uint(ref_blocks[ptr+10]); - nextlow = Bigendian_convert_uint(ref_blocks[ptr+13]); -#else - high0 = ref_blocks[ptr]; - /* low0 = ref_blocks[ptr+1]; */ - high1 = ref_blocks[ptr+3]; - low1 = ref_blocks[ptr+4]; - high2 = ref_blocks[ptr+6]; - low2 = ref_blocks[ptr+7]; - high3 = ref_blocks[ptr+9]; - low3 = ref_blocks[ptr+10]; + while (ptr + 12 <= endptr) { + + if (mode == STANDARD) { + a256 = _mm256_loadu_si256((__m256i *) &(ref_blocks[ptr])); + b256 = _mm256_loadu_si256((__m256i *) &(ref_blocks[ptr+3])); + c256 = _mm256_unpacklo_epi64(b256,a256); + d256 = _mm256_unpackhi_epi64(b256,a256); + current256 = _mm256_permute2x128_si256(c256, d256, 0x03); + current256 = _mm256_xor_si256(current256,biginvert4); nextlow = ref_blocks[ptr+13]; -#endif - if (mode == CMET_STRANDED) { - high0 = Cmet_reduce_ga(high0); /* low0 = Cmet_reduce_ga(low0); */ - high1 = Cmet_reduce_ga(high1); low1 = Cmet_reduce_ga(low1); - high2 = Cmet_reduce_ga(high2); low2 = Cmet_reduce_ga(low2); - high3 = Cmet_reduce_ga(high3); low3 = Cmet_reduce_ga(low3); - nextlow = Cmet_reduce_ga(nextlow); - } else if (mode == CMET_NONSTRANDED) { - if (genestrand > 0) { - high0 = Cmet_reduce_ct(high0); /* low0 = Cmet_reduce_ct(low0); */ - high1 = Cmet_reduce_ct(high1); low1 = Cmet_reduce_ct(low1); - high2 = Cmet_reduce_ct(high2); low2 = Cmet_reduce_ct(low2); - high3 = Cmet_reduce_ct(high3); low3 = Cmet_reduce_ct(low3); - nextlow = Cmet_reduce_ct(nextlow); - } else { - high0 = Cmet_reduce_ga(high0); /* low0 = Cmet_reduce_ga(low0); */ - high1 = Cmet_reduce_ga(high1); low1 = Cmet_reduce_ga(low1); - high2 = Cmet_reduce_ga(high2); low2 = Cmet_reduce_ga(low2); - high3 = Cmet_reduce_ga(high3); low3 = Cmet_reduce_ga(low3); - nextlow = Cmet_reduce_ga(nextlow); - } - } - current256 = _mm256_set_epi32(nextlow_rc,high0,low1,high1,low2,high2,low3,high3); + } else { + current256 = apply_mode_rev_256(&(ref_blocks[ptr]),mode,genestrand,&nextlow,nextlow_rc); current256 = _mm256_xor_si256(current256,biginvert3); - nextlow_rc = ~nextlow; - - temp256 = _mm256_insert_epi32(current256,nextlow_rc,0x07); - next256 = _mm256_permutevar8x32_epi32(temp256,shift256); + } - extract_6mers_rev_simd_128(array256,current256,next256); - chrpos = store_fwdrev_simd_128(chrpos,table,pointers,positions,counts,(Genomecomp_T *) array256); - ptr += 12; + nextlow_rc = ~nextlow; /* Take from this loop */ + +#if 0 + /* Doesn't work, because performs shift within 128-bit lanes */ + next256 = _mm256_alignr_epi8(current256,_mm256_set1_epi32(nextlow_rc),28); +#else + temp256 = _mm256_insert_epi32(current256,nextlow_rc,0x07); + next256 = _mm256_permutevar8x32_epi32(temp256,shift256); +#endif + + if (indexsize == 9) { + chrpos = store_9mers_rev_simd_128(chrpos,table,positions,counts,current256,next256); + } else if (indexsize == 8) { + chrpos = store_8mers_rev_simd_128(chrpos,table,positions,counts,current256,next256); + } else if (indexsize == 7) { + chrpos = store_7mers_rev_simd_128(chrpos,table,positions,counts,current256,next256); + } else if (indexsize == 6) { + chrpos = store_6mers_rev_simd_128(chrpos,table,positions,counts,current256,next256); + } else if (indexsize == 5) { + chrpos = store_5mers_rev_simd_128(chrpos,table,positions,counts,current256,next256); + } else { + abort(); } + + ptr += 12; + } #endif - while (ptr + 6 <= endptr) { -#ifdef WORDS_BIGENDIAN - high0 = Bigendian_convert_uint(ref_blocks[ptr]); - /* low0 = Bigendian_convert_uint(ref_blocks[ptr+1]); */ - high1 = Bigendian_convert_uint(ref_blocks[ptr+3]); - low1 = Bigendian_convert_uint(ref_blocks[ptr+4]); - nextlow = Bigendian_convert_uint(ref_blocks[ptr+7]); + while (ptr + 6 <= endptr) { + + if (mode == STANDARD) { +#ifdef HAVE_SSSE3 + a = _mm_loadu_si128((__m128i *) &(ref_blocks[ptr])); + b = _mm_loadu_si128((__m128i *) &(ref_blocks[ptr+3])); + current = _mm_unpacklo_epi64(b,a); + current = _mm_xor_si128(current,invert4); + nextlow = ref_blocks[ptr+7]; #else - high0 = ref_blocks[ptr]; - /* low0 = ref_blocks[ptr+1]; */ - high1 = ref_blocks[ptr+3]; - low1 = ref_blocks[ptr+4]; + /* Solution for SSE2. Need separate values to construct "next" */ + high0 = ref_blocks[ptr]; /* low0 = ref_blocks[ptr+1]; */ + high1 = ref_blocks[ptr+3]; low1 = ref_blocks[ptr+4]; nextlow = ref_blocks[ptr+7]; -#endif - if (mode == CMET_STRANDED) { - high0 = Cmet_reduce_ga(high0); /* low0 = Cmet_reduce_ga(low0); */ - high1 = Cmet_reduce_ga(high1); low1 = Cmet_reduce_ga(low1); - nextlow = Cmet_reduce_ga(nextlow); - } else if (mode == CMET_NONSTRANDED) { - if (genestrand > 0) { - high0 = Cmet_reduce_ct(high0); /* low0 = Cmet_reduce_ct(low0); */ - high1 = Cmet_reduce_ct(high1); low1 = Cmet_reduce_ct(low1); - nextlow = Cmet_reduce_ct(nextlow); - } else { - high0 = Cmet_reduce_ga(high0); /* low0 = Cmet_reduce_ga(low0); */ - high1 = Cmet_reduce_ga(high1); low1 = Cmet_reduce_ga(low1); - nextlow = Cmet_reduce_ga(nextlow); - } - } current = _mm_set_epi32(nextlow_rc,high0,low1,high1); current = _mm_xor_si128(current,invert3); - nextlow_rc = ~nextlow; -#ifdef HAVE_SSE4_1 - /* high0_rc = _mm_extract_epi32(current,2); */ - /* low1_rc = _mm_extract_epi32(current,1); */ - /* high1_rc = _mm_extract_epi32(current,0); */ - - temp = _mm_insert_epi32(current,nextlow_rc,0x03); - next = _mm_shuffle_epi32(temp,0x93); -#else - high0_rc = ~high0; - low1_rc = ~low1; - high1_rc = ~high1; - - next = _mm_set_epi32(high0_rc,low1_rc,high1_rc,nextlow_rc); #endif - extract_6mers_rev_simd(array,current,next); - chrpos = store_fwdrev_simd(chrpos,table,pointers,positions,counts,(Genomecomp_T *) array); - ptr += 6; - } - - if (ptr + 3 <= endptr) { + } else { #ifdef WORDS_BIGENDIAN - high1 = Bigendian_convert_uint(ref_blocks[ptr]); - /* low1 = Bigendian_convert_uint(ref_blocks[ptr+1]); */ - nextlow = Bigendian_convert_uint(ref_blocks[ptr+4]); -#else - high1 = ref_blocks[ptr]; - /* low1 = ref_blocks[ptr+1]; */ - nextlow = ref_blocks[ptr+4]; + high0 = Bigendian_convert_uint(ref_blocks[ptr]); /* low0 = Bigendian_convert_uint(ref_blocks[ptr+1]); */ + high1 = Bigendian_convert_uint(ref_blocks[ptr+3]); low1 = Bigendian_convert_uint(ref_blocks[ptr+4]); + nextlow = Bigendian_convert_uint(ref_blocks[ptr+7]); +#else + high0 = ref_blocks[ptr]; /* low0 = ref_blocks[ptr+1]; */ + high1 = ref_blocks[ptr+3]; low1 = ref_blocks[ptr+4]; + nextlow = ref_blocks[ptr+7]; #endif - if (mode == CMET_STRANDED) { - high1 = Cmet_reduce_ga(high1); /* low1 = Cmet_reduce_ga(low1); */ nextlow = Cmet_reduce_ga(nextlow); - } else if (mode == CMET_NONSTRANDED) { - if (genestrand > 0) { - high1 = Cmet_reduce_ct(high1); /* low1 = Cmet_reduce_ct(low1); */ nextlow = Cmet_reduce_ct(nextlow); - } else { - high1 = Cmet_reduce_ga(high1); /* low1 = Cmet_reduce_ga(low1); */ nextlow = Cmet_reduce_ga(nextlow); - } - } - - /* low1_rc = ~low1; */ - low1_rc = nextlow_rc; - - nextlow_rc = ~nextlow; - high1_rc = ~high1; - - chrpos = store_6mers_rev(chrpos,table,pointers,positions,counts,low1_rc,high1_rc,nextlow_rc); - ptr += 3; - } - } else if (indexsize == 5) { -#ifdef HAVE_AVX2 - while (ptr + 12 <= endptr) { -#ifdef WORDS_BIGENDIAN - high0 = Bigendian_convert_uint(ref_blocks[ptr]); - /* low0 = Bigendian_convert_uint(ref_blocks[ptr+1]); */ - high1 = Bigendian_convert_uint(ref_blocks[ptr+3]); - low1 = Bigendian_convert_uint(ref_blocks[ptr+4]); - high2 = Bigendian_convert_uint(ref_blocks[ptr+6]); - low2 = Bigendian_convert_uint(ref_blocks[ptr+7]); - high3 = Bigendian_convert_uint(ref_blocks[ptr+9]); - low3 = Bigendian_convert_uint(ref_blocks[ptr+10]); - nextlow = Bigendian_convert_uint(ref_blocks[ptr+13]); -#else - high0 = ref_blocks[ptr]; - /* low0 = ref_blocks[ptr+1]; */ - high1 = ref_blocks[ptr+3]; - low1 = ref_blocks[ptr+4]; - high2 = ref_blocks[ptr+6]; - low2 = ref_blocks[ptr+7]; - high3 = ref_blocks[ptr+9]; - low3 = ref_blocks[ptr+10]; - nextlow = ref_blocks[ptr+13]; -#endif - if (mode == CMET_STRANDED) { + if (mode == STANDARD) { + /* Skip */ + } else if (mode == CMET_STRANDED) { high0 = Cmet_reduce_ga(high0); /* low0 = Cmet_reduce_ga(low0); */ high1 = Cmet_reduce_ga(high1); low1 = Cmet_reduce_ga(low1); - high2 = Cmet_reduce_ga(high2); low2 = Cmet_reduce_ga(low2); - high3 = Cmet_reduce_ga(high3); low3 = Cmet_reduce_ga(low3); nextlow = Cmet_reduce_ga(nextlow); } else if (mode == CMET_NONSTRANDED) { if (genestrand > 0) { high0 = Cmet_reduce_ct(high0); /* low0 = Cmet_reduce_ct(low0); */ high1 = Cmet_reduce_ct(high1); low1 = Cmet_reduce_ct(low1); - high2 = Cmet_reduce_ct(high2); low2 = Cmet_reduce_ct(low2); - high3 = Cmet_reduce_ct(high3); low3 = Cmet_reduce_ct(low3); nextlow = Cmet_reduce_ct(nextlow); } else { high0 = Cmet_reduce_ga(high0); /* low0 = Cmet_reduce_ga(low0); */ high1 = Cmet_reduce_ga(high1); low1 = Cmet_reduce_ga(low1); - high2 = Cmet_reduce_ga(high2); low2 = Cmet_reduce_ga(low2); - high3 = Cmet_reduce_ga(high3); low3 = Cmet_reduce_ga(low3); nextlow = Cmet_reduce_ga(nextlow); } - } - - current256 = _mm256_set_epi32(nextlow_rc,high0,low1,high1,low2,high2,low3,high3); - current256 = _mm256_xor_si256(current256,biginvert3); - nextlow_rc = ~nextlow; - - temp256 = _mm256_insert_epi32(current256,nextlow_rc,0x07); - next256 = _mm256_permutevar8x32_epi32(temp256,shift256); - - extract_5mers_rev_simd_128(array256,current256,next256); - chrpos = store_fwdrev_simd_128(chrpos,table,pointers,positions,counts,(Genomecomp_T *) array256); - ptr += 12; - } -#endif - - while (ptr + 6 <= endptr) { -#ifdef WORDS_BIGENDIAN - high0 = Bigendian_convert_uint(ref_blocks[ptr]); - /* low0 = Bigendian_convert_uint(ref_blocks[ptr+1]); */ - high1 = Bigendian_convert_uint(ref_blocks[ptr+3]); - low1 = Bigendian_convert_uint(ref_blocks[ptr+4]); - nextlow = Bigendian_convert_uint(ref_blocks[ptr+7]); -#else - high0 = ref_blocks[ptr]; - /* low0 = ref_blocks[ptr+1]; */ - high1 = ref_blocks[ptr+3]; - low1 = ref_blocks[ptr+4]; - nextlow = ref_blocks[ptr+7]; -#endif - if (mode == CMET_STRANDED) { - high0 = Cmet_reduce_ga(high0); /* low0 = Cmet_reduce_ga(low0); */ - high1 = Cmet_reduce_ga(high1); low1 = Cmet_reduce_ga(low1); - nextlow = Cmet_reduce_ga(nextlow); - } else if (mode == CMET_NONSTRANDED) { + } else if (mode == ATOI_STRANDED) { + high0 = Atoi_reduce_ag(high0); /* low0 = Atoi_reduce_ag(low0); */ + high1 = Atoi_reduce_ag(high1); low1 = Atoi_reduce_ag(low1); + nextlow = Atoi_reduce_ag(nextlow); + } else if (mode == ATOI_NONSTRANDED) { if (genestrand > 0) { - high0 = Cmet_reduce_ct(high0); /* low0 = Cmet_reduce_ct(low0); */ - high1 = Cmet_reduce_ct(high1); low1 = Cmet_reduce_ct(low1); - nextlow = Cmet_reduce_ct(nextlow); + high0 = Atoi_reduce_tc(high0); /* low0 = Atoi_reduce_tc(low0); */ + high1 = Atoi_reduce_tc(high1); low1 = Atoi_reduce_tc(low1); + nextlow = Atoi_reduce_tc(nextlow); } else { - high0 = Cmet_reduce_ga(high0); /* low0 = Cmet_reduce_ga(low0); */ - high1 = Cmet_reduce_ga(high1); low1 = Cmet_reduce_ga(low1); - nextlow = Cmet_reduce_ga(nextlow); + high0 = Atoi_reduce_ag(high0); /* low0 = Atoi_reduce_ag(low0); */ + high1 = Atoi_reduce_ag(high1); low1 = Atoi_reduce_ag(low1); + nextlow = Atoi_reduce_ag(nextlow); + } + } else if (mode == TTOC_STRANDED) { + high0 = Atoi_reduce_tc(high0); /* low0 = Atoi_reduce_tc(low0); */ + high1 = Atoi_reduce_tc(high1); low1 = Atoi_reduce_tc(low1); + nextlow = Atoi_reduce_tc(nextlow); + } else if (mode == TTOC_NONSTRANDED) { + if (genestrand > 0) { + high0 = Atoi_reduce_ag(high0); /* low0 = Atoi_reduce_ag(low0); */ + high1 = Atoi_reduce_ag(high1); low1 = Atoi_reduce_ag(low1); + nextlow = Atoi_reduce_ag(nextlow); + } else { + high0 = Atoi_reduce_tc(high0); /* low0 = Atoi_reduce_tc(low0); */ + high1 = Atoi_reduce_tc(high1); low1 = Atoi_reduce_tc(low1); + nextlow = Atoi_reduce_tc(nextlow); } } current = _mm_set_epi32(nextlow_rc,high0,low1,high1); current = _mm_xor_si128(current,invert3); - nextlow_rc = ~nextlow; -#ifdef HAVE_SSE4_1 - /* high0_rc = _mm_extract_epi32(current,2); */ - /* low1_rc = _mm_extract_epi32(current,1); */ - /* high1_rc = _mm_extract_epi32(current,0); */ - - temp = _mm_insert_epi32(current,nextlow_rc,0x03); - next = _mm_shuffle_epi32(temp,0x93); -#else - high0_rc = ~high0; - low1_rc = ~low1; - high1_rc = ~high1; + } - next = _mm_set_epi32(high0_rc,low1_rc,high1_rc,nextlow_rc); -#endif + nextlow_rc = ~nextlow; /* Take from this loop */ - extract_5mers_rev_simd(array,current,next); - chrpos = store_fwdrev_simd(chrpos,table,pointers,positions,counts,(Genomecomp_T *) array); - ptr += 6; +#if defined(HAVE_SSSE3) + next = _mm_alignr_epi8(current,_mm_set1_epi32(nextlow_rc),12); +#elif 0 + /* Previous solution for SSE4.1 */ + temp = _mm_insert_epi32(current,nextlow_rc,0x03); + next = _mm_shuffle_epi32(temp,0x93); +#else + /* Solution for SSE2 */ + next = _mm_set_epi32(~high0,~low1,~high1,nextlow_rc); +#endif + + if (indexsize == 9) { + chrpos = store_9mers_rev_simd_64(chrpos,table,positions,counts,current,next); + } else if (indexsize == 8) { + chrpos = store_8mers_rev_simd_64(chrpos,table,positions,counts,current,next); + } else if (indexsize == 7) { + chrpos = store_7mers_rev_simd_64(chrpos,table,positions,counts,current,next); + } else if (indexsize == 6) { + chrpos = store_6mers_rev_simd_64(chrpos,table,positions,counts,current,next); + } else if (indexsize == 5) { + chrpos = store_5mers_rev_simd_64(chrpos,table,positions,counts,current,next); + } else { + abort(); } - if (ptr + 3 <= endptr) { + ptr += 6; + } + + if (ptr + 3 <= endptr) { #ifdef WORDS_BIGENDIAN - high1 = Bigendian_convert_uint(ref_blocks[ptr]); - /* low1 = Bigendian_convert_uint(ref_blocks[ptr+1]); */ - nextlow = Bigendian_convert_uint(ref_blocks[ptr+4]); -#else - high1 = ref_blocks[ptr]; - /* low1 = ref_blocks[ptr+1]; */ - nextlow = ref_blocks[ptr+4]; + high1 = Bigendian_convert_uint(ref_blocks[ptr]); + /* low1 = Bigendian_convert_uint(ref_blocks[ptr+1]); */ + nextlow = Bigendian_convert_uint(ref_blocks[ptr+4]); +#else + high1 = ref_blocks[ptr]; + /* low1 = ref_blocks[ptr+1]; */ + nextlow = ref_blocks[ptr+4]; #endif - if (mode == CMET_STRANDED) { + + if (mode == STANDARD) { + /* Skip */ + } else if (mode == CMET_STRANDED) { + high1 = Cmet_reduce_ga(high1); /* low1 = Cmet_reduce_ga(low1); */ nextlow = Cmet_reduce_ga(nextlow); + } else if (mode == CMET_NONSTRANDED) { + if (genestrand > 0) { + high1 = Cmet_reduce_ct(high1); /* low1 = Cmet_reduce_ct(low1); */ nextlow = Cmet_reduce_ct(nextlow); + } else { high1 = Cmet_reduce_ga(high1); /* low1 = Cmet_reduce_ga(low1); */ nextlow = Cmet_reduce_ga(nextlow); - } else if (mode == CMET_NONSTRANDED) { - if (genestrand > 0) { - high1 = Cmet_reduce_ct(high1); /* low1 = Cmet_reduce_ct(low1); */ nextlow = Cmet_reduce_ct(nextlow); - } else { - high1 = Cmet_reduce_ga(high1); /* low1 = Cmet_reduce_ga(low1); */ nextlow = Cmet_reduce_ga(nextlow); - } } - - /* low1_rc = ~low1; */ - low1_rc = nextlow_rc; - - nextlow_rc = ~nextlow; - high1_rc = ~high1; - - chrpos = store_5mers_rev(chrpos,table,pointers,positions,counts,low1_rc,high1_rc,nextlow_rc); - ptr += 3; + } else if (mode == ATOI_STRANDED) { + high1 = Atoi_reduce_ag(high1); /* low1 = Atoi_reduce_ag(low1); */ nextlow = Atoi_reduce_ag(nextlow); + } else if (mode == ATOI_NONSTRANDED) { + if (genestrand > 0) { + high1 = Atoi_reduce_tc(high1); /* low1 = Atoi_reduce_tc(low1); */ nextlow = Atoi_reduce_tc(nextlow); + } else { + high1 = Atoi_reduce_ag(high1); /* low1 = Atoi_reduce_ag(low1); */ nextlow = Atoi_reduce_ag(nextlow); + } + } else if (mode == TTOC_STRANDED) { + high1 = Atoi_reduce_ag(high1); /* low1 = Atoi_reduce_ag(low1); */ nextlow = Atoi_reduce_ag(nextlow); + } else if (mode == TTOC_NONSTRANDED) { + if (genestrand > 0) { + high1 = Atoi_reduce_tc(high1); /* low1 = Atoi_reduce_tc(low1); */ nextlow = Atoi_reduce_tc(nextlow); + } else { + high1 = Atoi_reduce_ag(high1); /* low1 = Atoi_reduce_ag(low1); */ nextlow = Atoi_reduce_ag(nextlow); + } + } + + /* low1_rc = ~low1; */ + low1_rc = nextlow_rc; + + nextlow_rc = ~nextlow; + high1_rc = ~high1; + + if (indexsize == 9) { + chrpos = store_9mers_rev_32(chrpos,table,positions,counts,low1_rc,high1_rc,nextlow_rc); + } else if (indexsize == 8) { + chrpos = store_8mers_rev_32(chrpos,table,positions,counts,low1_rc,high1_rc,nextlow_rc); + } else if (indexsize == 7) { + chrpos = store_7mers_rev_32(chrpos,table,positions,counts,low1_rc,high1_rc,nextlow_rc); + } else if (indexsize == 6) { + chrpos = store_6mers_rev_32(chrpos,table,positions,counts,low1_rc,high1_rc,nextlow_rc); + } else if (indexsize == 5) { + chrpos = store_5mers_rev_32(chrpos,table,positions,counts,low1_rc,high1_rc,nextlow_rc); + } else { + abort(); } - } else { - abort(); + ptr += 3; } @@ -48840,7 +32475,10 @@ /* low1 = ref_blocks[ptr+1]; */ nextlow = ref_blocks[ptr+4]; #endif - if (mode == CMET_STRANDED) { + + if (mode == STANDARD) { + /* Skip */ + } else if (mode == CMET_STRANDED) { high1 = Cmet_reduce_ga(high1); /* low1 = Cmet_reduce_ga(low1); */ nextlow = Cmet_reduce_ga(nextlow); } else if (mode == CMET_NONSTRANDED) { if (genestrand > 0) { @@ -48848,6 +32486,22 @@ } else { high1 = Cmet_reduce_ga(high1); /* low1 = Cmet_reduce_ga(low1); */ nextlow = Cmet_reduce_ga(nextlow); } + } else if (mode == ATOI_STRANDED) { + high1 = Atoi_reduce_ag(high1); /* low1 = Atoi_reduce_ag(low1); */ nextlow = Atoi_reduce_ag(nextlow); + } else if (mode == ATOI_NONSTRANDED) { + if (genestrand > 0) { + high1 = Atoi_reduce_tc(high1); /* low1 = Atoi_reduce_tc(low1); */ nextlow = Atoi_reduce_tc(nextlow); + } else { + high1 = Atoi_reduce_ag(high1); /* low1 = Atoi_reduce_ag(low1); */ nextlow = Atoi_reduce_ag(nextlow); + } + } else if (mode == TTOC_STRANDED) { + high1 = Atoi_reduce_ag(high1); /* low1 = Atoi_reduce_ag(low1); */ nextlow = Atoi_reduce_ag(nextlow); + } else if (mode == TTOC_NONSTRANDED) { + if (genestrand > 0) { + high1 = Atoi_reduce_tc(high1); /* low1 = Atoi_reduce_tc(low1); */ nextlow = Atoi_reduce_tc(nextlow); + } else { + high1 = Atoi_reduce_ag(high1); /* low1 = Atoi_reduce_ag(low1); */ nextlow = Atoi_reduce_ag(nextlow); + } } /* low1_rc = ~low1; */ @@ -48857,15 +32511,15 @@ high1_rc = ~high1; if (indexsize == 9) { - chrpos = store_9mers_rev_partial(chrpos,table,pointers,positions,counts,low1_rc,high1_rc,nextlow_rc,/*startdiscard*/0,enddiscard); + chrpos = store_9mers_rev_partial(chrpos,table,positions,counts,low1_rc,high1_rc,nextlow_rc,/*startdiscard*/0,enddiscard); } else if (indexsize == 8) { - chrpos = store_8mers_rev_partial(chrpos,table,pointers,positions,counts,low1_rc,high1_rc,nextlow_rc,/*startdiscard*/0,enddiscard); + chrpos = store_8mers_rev_partial(chrpos,table,positions,counts,low1_rc,high1_rc,nextlow_rc,/*startdiscard*/0,enddiscard); } else if (indexsize == 7) { - chrpos = store_7mers_rev_partial(chrpos,table,pointers,positions,counts,low1_rc,high1_rc,nextlow_rc,/*startdiscard*/0,enddiscard); + chrpos = store_7mers_rev_partial(chrpos,table,positions,counts,low1_rc,high1_rc,nextlow_rc,/*startdiscard*/0,enddiscard); } else if (indexsize == 6) { - chrpos = store_6mers_rev_partial(chrpos,table,pointers,positions,counts,low1_rc,high1_rc,nextlow_rc,/*startdiscard*/0,enddiscard); + chrpos = store_6mers_rev_partial(chrpos,table,positions,counts,low1_rc,high1_rc,nextlow_rc,/*startdiscard*/0,enddiscard); } else if (indexsize == 5) { - chrpos = store_5mers_rev_partial(chrpos,table,pointers,positions,counts,low1_rc,high1_rc,nextlow_rc,/*startdiscard*/0,enddiscard); + chrpos = store_5mers_rev_partial(chrpos,table,positions,counts,low1_rc,high1_rc,nextlow_rc,/*startdiscard*/0,enddiscard); } else { abort(); } @@ -48883,16 +32537,146 @@ #define POLY_T 0xFFFF -#ifdef HAVE_AVX2 +#ifdef HAVE_AVX512 static Chrpos_T * -allocate_positions (UINT4 *pointers, UINT4 *positions, - Inquery_T *inquery, Count_T *counts, int oligospace) { +allocate_positions (UINT4 *__restrict__ positions, + Inquery_T *__restrict__ inquery, Count_T *counts, int oligospace) { + Chrpos_T *table; + UINT4 p; + int totalcounts = 0; + int i, j, k; + __m512i *inquery_ptr, *counts_ptr, *end_ptr, qcounts; + __m512i terms_ptr[1]; + Count_T *terms; + int *nskip, *nskip_ptr; + +#if 0 + /* Causes problems with new algorithm */ + inquery[POLY_A & mask] = INQUERY_FALSE; + inquery[POLY_C & mask] = INQUERY_FALSE; + inquery[POLY_G & mask] = INQUERY_FALSE; + inquery[POLY_T & mask] = INQUERY_FALSE; +#endif + + /* nskip is a run-length of zero counts, which allows faster processing the second time through */ + nskip_ptr = nskip = (int *) MALLOCA((oligospace/SIMD_NELTS + 1) * sizeof(int)); + *nskip_ptr = 0; + + inquery_ptr = (__m512i *) inquery; + counts_ptr = (__m512i *) counts; + end_ptr = &(counts_ptr[oligospace/SIMD_NELTS]); + terms = (Count_T *) terms_ptr; + + i = 0; + while (counts_ptr < end_ptr) { + debug(printf("%d\n",i)); + debug(print_counts_512(*counts_ptr,"counts")); + qcounts = _mm512_and_si512(*counts_ptr,*inquery_ptr++); /* counts in query (zeroed if INQUERY_FALSE, which can happen if count > MAXCOUNT) */ + _mm512_store_si512(counts_ptr++,qcounts); /* and store back, so we don't need inquery or overabundant any more */ + if (_mm512_test_epi32_mask(qcounts,qcounts) == 0) { + /* All counts are zero, so incrementing nskip */ + (*nskip_ptr) += 1; + + } else { + /* A valid count found */ + _mm512_store_si512(terms_ptr,qcounts); + for (k = 0; k < SIMD_NELTS; k++) { + totalcounts += terms[k]; + } + *(++nskip_ptr) = 0; /* Advance ptr and initialize */ + } + + i += SIMD_NELTS; + } + +#if 0 + /* For debugging */ + totalcounts_old = 0; + for (i = 0; i < oligospace; i++) { + if (inquery[i] == INQUERY_TRUE) { + totalcounts_old += counts[i]; + } + } + + fprintf(stderr,"Old method %d, new method %d\n",totalcounts_old,totalcounts); + if (totalcounts != totalcounts_old) { + abort(); + } +#endif + + debug(printf("totalcounts is %d\n",totalcounts)); + if (totalcounts == 0) { + table = (Chrpos_T *) NULL; + } else { + /* Need to assign positions[0] so we can free the space */ + /* pointers_end = &(pointers[-1]); */ /* or pointers_allocated[0] */ + table = (Chrpos_T *) MALLOC(totalcounts * sizeof(Chrpos_T)); + p = 0; + + i = 0; + nskip_ptr = nskip; + j = *nskip_ptr++; + while (i + j*SIMD_NELTS < oligospace) { +#if 0 + while (--j >= 0) { + positions[i++] = p; positions[i++] = p; positions[i++] = p; positions[i++] = p; + positions[i++] = p; positions[i++] = p; positions[i++] = p; positions[i++] = p; + } +#elif 0 + /* Not necessary to assign since we check for counts[i] == 0 */ + pointers_end[i] = /* positions[i] = */ p; + i += j*SIMD_NELTS; +#else + i += j*SIMD_NELTS; +#endif + + for (k = 0; k < SIMD_NELTS; k++) { + /* pointers_end[i] = */ positions[i] = p; + p += counts[i++]; + } + + j = *nskip_ptr++; + } + +#if 0 + while (--j >= 0) { + /* Not necessary to assign since we check for counts[i] == 0 */ + positions[i++] = p; positions[i++] = p; positions[i++] = p; positions[i++] = p; + positions[i++] = p; positions[i++] = p; positions[i++] = p; positions[i++] = p; + } +#elif 0 + if (j > 0) { + pointers_end[i] = /* positions[i] = */ p; + /* i += j*SIMD_NELTS; */ + } +#endif + } + +#if 0 + /* Faster to assign each individual pointer above */ + memcpy((void *) pointers,&(positions[1]),(oligospace-1)*sizeof(Chrpos_T *)); +#endif + /* pointers[oligospace-1] = p; */ /* or pointers_end[oligospace] or pointers_allocated[oligospace+1] */ + + /* dump_allocations(positions,counts,oligospace,indexsize,positions_space); */ + + FREEA(nskip); + + return table; +} + + +#elif defined(HAVE_AVX2) +static Chrpos_T * +allocate_positions (UINT4 *__restrict__ positions, + Inquery_T *__restrict__ inquery, Count_T *counts, int oligospace) { Chrpos_T *table; - UINT4 *pointers_end, p; + UINT4 p; int totalcounts = 0; int i, j, k; __m256i *inquery_ptr, *counts_ptr, *end_ptr, qcounts; __m256i terms_ptr[1]; + __m256i _overflowp, _maxcounts; Count_T *terms; int *nskip, *nskip_ptr; @@ -48913,21 +32697,35 @@ end_ptr = &(counts_ptr[oligospace/SIMD_NELTS]); terms = (Count_T *) terms_ptr; +#ifdef CHECK_FOR_OVERFLOW + _maxcounts = _mm256_set1_epi8(MAXCOUNT); +#endif + i = 0; while (counts_ptr < end_ptr) { debug(printf("%d\n",i)); - debug(print_counts(*counts_ptr,"counts")); - qcounts = _mm256_and_si256(*counts_ptr,*inquery_ptr++); /* counts in query (zeroed if INQUERY_FALSE, which can happen if count > MAXCOUNT) */ - _mm256_store_si256(counts_ptr++,qcounts); /* and store back, so we don't need inquery or overabundant any more */ + qcounts = _mm256_load_si256(counts_ptr); + debug(print_counts_256(qcounts,"counts")); + qcounts = _mm256_and_si256(qcounts,*inquery_ptr++); /* counts in query (zeroed if INQUERY_FALSE, which can happen if count > MAXCOUNT) */ + debug(print_counts_256(qcounts,"qcounts")); if (_mm256_testz_si256(qcounts,qcounts)) { - /* All counts are zero, so incrementing nskip */ + /* All counts are zero, so incrementing nskip, but need to store back */ + _mm256_stream_si256(counts_ptr++,qcounts); /* Store back, so we don't need inquery or overabundant any more */ (*nskip_ptr) += 1; - + } else { /* A valid count found */ +#ifdef CHECK_FOR_OVERFLOW + _overflowp = _mm256_cmpgt_epi8(qcounts,_maxcounts); + debug(print_counts_256(_overflowp,"overflow")); + qcounts = _mm256_andnot_si256(_overflowp,qcounts); /* Remove counts that have overflowed */ + debug(print_counts_256(qcounts,"qcounts")); +#endif + + _mm256_stream_si256(counts_ptr++,qcounts); /* Store back, so we don't need inquery or overabundant any more */ _mm256_store_si256(terms_ptr,qcounts); for (k = 0; k < SIMD_NELTS; k++) { - totalcounts += terms[k]; + totalcounts += (int) terms[k]; } *(++nskip_ptr) = 0; /* Advance ptr and initialize */ } @@ -48935,11 +32733,12 @@ i += SIMD_NELTS; } + #if 0 /* For debugging */ totalcounts_old = 0; for (i = 0; i < oligospace; i++) { - if (inquery[i] == INQUERY_TRUE) { + if (inquery[i] == INQUERY_TRUE && counts[i] > 0 && counts[i] <= MAXCOUNT) { totalcounts_old += counts[i]; } } @@ -48955,7 +32754,7 @@ table = (Chrpos_T *) NULL; } else { /* Need to assign positions[0] so we can free the space */ - pointers_end = &(pointers[-1]); /* or pointers_allocated[0] */ + /* pointers_end = &(pointers[-1]); */ /* or pointers_allocated[0] */ table = (Chrpos_T *) MALLOC(totalcounts * sizeof(Chrpos_T)); p = 0; @@ -48968,34 +32767,108 @@ positions[i++] = p; positions[i++] = p; positions[i++] = p; positions[i++] = p; positions[i++] = p; positions[i++] = p; positions[i++] = p; positions[i++] = p; } -#else +#elif 0 /* Not necessary to assign since we check for counts[i] == 0 */ pointers_end[i] = /* positions[i] = */ p; i += j*SIMD_NELTS; +#else + i += j*SIMD_NELTS; #endif - pointers_end[i] = positions[i] = p; /* 0 */ + /* pointers_end[i] = */ positions[i] = p; /* 0 */ + p += counts[i++]; + + /* pointers_end[i] = */ positions[i] = p; /* 1 */ + p += counts[i++]; + + /* pointers_end[i] = */ positions[i] = p; /* 2 */ + p += counts[i++]; + + /* pointers_end[i] = */ positions[i] = p; /* 3 */ + p += counts[i++]; + + /* pointers_end[i] = */ positions[i] = p; /* 4 */ + p += counts[i++]; + + /* pointers_end[i] = */ positions[i] = p; /* 5 */ + p += counts[i++]; + + /* pointers_end[i] = */ positions[i] = p; /* 6 */ + p += counts[i++]; + + /* pointers_end[i] = */ positions[i] = p; /* 7 */ + p += counts[i++]; + + /* pointers_end[i] = */ positions[i] = p; /* 8 */ + p += counts[i++]; + + /* pointers_end[i] = */ positions[i] = p; /* 9 */ + p += counts[i++]; + + /* pointers_end[i] = */ positions[i] = p; /* 10 */ + p += counts[i++]; + + /* pointers_end[i] = */ positions[i] = p; /* 11 */ + p += counts[i++]; + + /* pointers_end[i] = */ positions[i] = p; /* 12 */ + p += counts[i++]; + + /* pointers_end[i] = */ positions[i] = p; /* 13 */ + p += counts[i++]; + + /* pointers_end[i] = */ positions[i] = p; /* 14 */ + p += counts[i++]; + + /* pointers_end[i] = */ positions[i] = p; /* 15 */ + p += counts[i++]; + + /* pointers_end[i] = */ positions[i] = p; /* 16 */ + p += counts[i++]; + + /* pointers_end[i] = */ positions[i] = p; /* 17 */ p += counts[i++]; - pointers_end[i] = positions[i] = p; /* 1 */ + /* pointers_end[i] = */ positions[i] = p; /* 18 */ p += counts[i++]; - pointers_end[i] = positions[i] = p; /* 2 */ + /* pointers_end[i] = */ positions[i] = p; /* 19 */ p += counts[i++]; - pointers_end[i] = positions[i] = p; /* 3 */ + /* pointers_end[i] = */ positions[i] = p; /* 20 */ p += counts[i++]; - pointers_end[i] = positions[i] = p; /* 4 */ + /* pointers_end[i] = */ positions[i] = p; /* 21 */ p += counts[i++]; - pointers_end[i] = positions[i] = p; /* 5 */ + /* pointers_end[i] = */ positions[i] = p; /* 22 */ p += counts[i++]; - pointers_end[i] = positions[i] = p; /* 6 */ + /* pointers_end[i] = */ positions[i] = p; /* 23 */ p += counts[i++]; - pointers_end[i] = positions[i] = p; /* 7 */ + /* pointers_end[i] = */ positions[i] = p; /* 24 */ + p += counts[i++]; + + /* pointers_end[i] = */ positions[i] = p; /* 25 */ + p += counts[i++]; + + /* pointers_end[i] = */ positions[i] = p; /* 26 */ + p += counts[i++]; + + /* pointers_end[i] = */ positions[i] = p; /* 27 */ + p += counts[i++]; + + /* pointers_end[i] = */ positions[i] = p; /* 28 */ + p += counts[i++]; + + /* pointers_end[i] = */ positions[i] = p; /* 29 */ + p += counts[i++]; + + /* pointers_end[i] = */ positions[i] = p; /* 30 */ + p += counts[i++]; + + /* pointers_end[i] = */ positions[i] = p; /* 31 (SIMD_NELTS - 1) in bytes */ p += counts[i++]; j = *nskip_ptr++; @@ -49007,7 +32880,7 @@ positions[i++] = p; positions[i++] = p; positions[i++] = p; positions[i++] = p; positions[i++] = p; positions[i++] = p; positions[i++] = p; positions[i++] = p; } -#else +#elif 0 if (j > 0) { pointers_end[i] = /* positions[i] = */ p; /* i += j*SIMD_NELTS; */ @@ -49019,7 +32892,7 @@ /* Faster to assign each individual pointer above */ memcpy((void *) pointers,&(positions[1]),(oligospace-1)*sizeof(Chrpos_T *)); #endif - pointers[oligospace-1] = p; /* or pointers_end[oligospace] or pointers_allocated[oligospace+1] */ + /* pointers[oligospace-1] = p; */ /* or pointers_end[oligospace] or pointers_allocated[oligospace+1] */ /* dump_allocations(positions,counts,oligospace,indexsize,positions_space); */ @@ -49031,10 +32904,10 @@ #elif defined(HAVE_SSE2) static Chrpos_T * -allocate_positions (UINT4 *pointers, UINT4 *positions, - Inquery_T *inquery, Count_T *counts, int oligospace) { +allocate_positions (UINT4 *__restrict__ positions, + Inquery_T *__restrict__ inquery, Count_T *__restrict__ counts, int oligospace) { Chrpos_T *table; - UINT4 *pointers_end, p; + UINT4 p; int totalcounts = 0; int i, j, k; __m128i *inquery_ptr, *counts_ptr, *end_ptr, qcounts; @@ -49062,7 +32935,7 @@ end_ptr = &(counts_ptr[oligospace/SIMD_NELTS]); terms = (Count_T *) terms_ptr; #ifndef HAVE_SSE4_1 - zero = _mm_set1_epi8(0); + zero = _mm_setzero_si128(); #endif i = 0; @@ -49084,32 +32957,9 @@ } else { /* A valid count found */ _mm_store_si128(terms_ptr,qcounts); -#ifdef HAVE_AVX2 - if (_mm_extract_epi32(qcounts,0)) { - totalcounts += terms[0]; - } else { - counts[i] = 0; - } - if (_mm_extract_epi32(qcounts,1)) { - totalcounts += terms[1]; - } else { - counts[i+1] = 0; - } - if (_mm_extract_epi32(qcounts,2)) { - totalcounts += terms[2]; - } else { - counts[i+2] = 0; - } - if (_mm_extract_epi32(qcounts,3)) { - totalcounts += terms[3]; - } else { - counts[i+3] = 0; - } -#else for (k = 0; k < SIMD_NELTS; k++) { totalcounts += terms[k]; } -#endif *(++nskip_ptr) = 0; /* Advance ptr and initialize */ } @@ -49136,7 +32986,7 @@ table = (Chrpos_T *) NULL; } else { /* Need to assign positions[0] so we can free the space */ - pointers_end = &(pointers[-1]); /* or pointers_allocated[0] */ + /* pointers_end = &(pointers[-1]); */ /* or pointers_allocated[0] */ table = (Chrpos_T *) MALLOC(totalcounts * sizeof(Chrpos_T)); p = 0; @@ -49151,74 +33001,61 @@ positions[i++] = p; positions[i++] = p; positions[i++] = p; positions[i++] = p; positions[i++] = p; positions[i++] = p; positions[i++] = p; positions[i++] = p; } -#else +#elif 0 /* Not necessary to assign since we check for counts[i] == 0 */ pointers_end[i] = /* positions[i] = */ p; i += j*SIMD_NELTS; +#else + i += j*SIMD_NELTS; #endif -#ifdef HAVE_AVX2 - pointers_end[i] = positions[i] = p; /* 0 */ - p += counts[i++]; - - pointers_end[i] = positions[i] = p; /* 1 */ - p += counts[i++]; - - pointers_end[i] = positions[i] = p; /* 2 */ - p += counts[i++]; - - pointers_end[i] = positions[i] = p; /* 3 */ - p += counts[i++]; - -#else - pointers_end[i] = positions[i] = p; /* 0 */ + /* pointers_end[i] = */ positions[i] = p; /* 0 */ p += counts[i++]; - pointers_end[i] = positions[i] = p; /* 1 */ + /* pointers_end[i] = */ positions[i] = p; /* 1 */ p += counts[i++]; - pointers_end[i] = positions[i] = p; /* 2 */ + /* pointers_end[i] = */ positions[i] = p; /* 2 */ p += counts[i++]; - pointers_end[i] = positions[i] = p; /* 3 */ + /* pointers_end[i] = */ positions[i] = p; /* 3 */ p += counts[i++]; - pointers_end[i] = positions[i] = p; /* 4 */ + /* pointers_end[i] = */ positions[i] = p; /* 4 */ p += counts[i++]; - pointers_end[i] = positions[i] = p; /* 5 */ + /* pointers_end[i] = */ positions[i] = p; /* 5 */ p += counts[i++]; - pointers_end[i] = positions[i] = p; /* 6 */ + /* pointers_end[i] = */ positions[i] = p; /* 6 */ p += counts[i++]; - pointers_end[i] = positions[i] = p; /* 7 */ + /* pointers_end[i] = */ positions[i] = p; /* 7 */ p += counts[i++]; - pointers_end[i] = positions[i] = p; /* 8 */ + /* pointers_end[i] = */ positions[i] = p; /* 8 */ p += counts[i++]; - pointers_end[i] = positions[i] = p; /* 9 */ + /* pointers_end[i] = */ positions[i] = p; /* 9 */ p += counts[i++]; - pointers_end[i] = positions[i] = p; /* 10 */ + /* pointers_end[i] = */ positions[i] = p; /* 10 */ p += counts[i++]; - pointers_end[i] = positions[i] = p; /* 11 */ + /* pointers_end[i] = */ positions[i] = p; /* 11 */ p += counts[i++]; - pointers_end[i] = positions[i] = p; /* 12 */ + /* pointers_end[i] = */ positions[i] = p; /* 12 */ p += counts[i++]; - pointers_end[i] = positions[i] = p; /* 13 */ + /* pointers_end[i] = */ positions[i] = p; /* 13 */ p += counts[i++]; - pointers_end[i] = positions[i] = p; /* 14 */ + /* pointers_end[i] = */ positions[i] = p; /* 14 */ p += counts[i++]; - pointers_end[i] = positions[i] = p; /* 15 */ + /* pointers_end[i] = */ positions[i] = p; /* 15 (SIMD_NELTS - 1) in bytes */ p += counts[i++]; -#endif j = *nskip_ptr++; } @@ -49231,7 +33068,7 @@ positions[i++] = p; positions[i++] = p; positions[i++] = p; positions[i++] = p; positions[i++] = p; positions[i++] = p; positions[i++] = p; positions[i++] = p; } -#else +#elif 0 if (j > 0) { pointers_end[i] = /* positions[i] = */ p; /* i += j*SIMD_NELTS; */ @@ -49243,7 +33080,7 @@ /* Faster to assign each individual pointer above */ memcpy((void *) pointers,&(positions[1]),(oligospace-1)*sizeof(Chrpos_T *)); #endif - pointers[oligospace-1] = p; /* or pointers_end[oligospace] or pointers_allocated[oligospace+1] */ + /* pointers[oligospace-1] = p;*/ /* or pointers_end[oligospace] or pointers_allocated[oligospace+1] */ /* dump_allocations(positions,counts,oligospace,indexsize,positions_space); */ @@ -49254,7 +33091,7 @@ #else static Chrpos_T * -allocate_positions (UINT4 *pointers, UINT4 *positions, +allocate_positions (UINT4 *positions, Inquery_T *inquery, Count_T *counts, int oligospace) { Chrpos_T *table; UINT4 p; @@ -49263,10 +33100,10 @@ #if 0 /* Causes problems with new algorithm */ - inquery[POLY_A & mask] = false; - inquery[POLY_C & mask] = false; - inquery[POLY_G & mask] = false; - inquery[POLY_T & mask] = false; + inquery[POLY_A & mask] = INQUERY_FALSE; + inquery[POLY_C & mask] = INQUERY_FALSE; + inquery[POLY_G & mask] = INQUERY_FALSE; + inquery[POLY_T & mask] = INQUERY_FALSE; #endif for (i = 0; i < oligospace; i++) { @@ -49293,8 +33130,8 @@ positions[i] = p; p += counts[i]; } - memcpy((void *) pointers,&(positions[1]),(oligospace-1)*sizeof(UINT4)); - pointers[oligospace-1] = p; + /* memcpy((void *) pointers,&(positions[1]),(oligospace-1)*sizeof(UINT4)); */ + /* pointers[oligospace-1] = p; */ } return table; @@ -49318,7 +33155,7 @@ } static void -positions_compare (Chrpos_T **positions1, Count_T *counts1, Inquery_T *inquery1, +positions_compare (Chrpos_T *table, UINT4 *positions1, Count_T *counts1, Inquery_T *inquery1, Chrpos_T **positions2, Count0_T *counts2, Oligospace_T oligospace, int indexsize, Shortoligomer_T mask) { Oligospace_T i; @@ -49334,6 +33171,8 @@ /* Can happen if count > MAXCOUNT */ if (i == (POLY_A & mask) || i == (POLY_C & mask) || i == (POLY_G & mask) || i == (POLY_T & mask)) { /* Ignore */ + } else if (counts2[i] == 0) { + /* Ignore (overabundant) */ } else { nt = shortoligo_nt(i,indexsize); printf("At oligo %s (%llu), counts1 %d != counts2 %d, inquery1 %hd\n", @@ -49343,10 +33182,10 @@ } } else { for (hit = 0; hit < counts1[i]; hit++) { - if (positions1[i][hit] != positions2[i][hit]) { + if (table[positions1[i]+hit] != positions2[i][hit]) { nt = shortoligo_nt(i,indexsize); printf("At oligo %s (%llu), hit %d/%d, positions1 %u != positions2 %u\n", - nt,(unsigned long long) i,hit,counts1[i],positions1[i][hit],positions2[i][hit]); + nt,(unsigned long long) i,hit,counts1[i],table[positions1[i]+hit],positions2[i][hit]); FREE(nt); abort(); } @@ -49663,7 +33502,7 @@ masked = oligo & this->mask; noligos++; debug(nt = shortoligo_nt(oligo,indexsize); - printf("At querypos %d, oligo %s seen\n",i,nt); + printf("At querypos %d, oligo %s (%08X fwd, %08X rev) seen\n",i,nt,masked,~oligo & this->mask); FREE(nt)); this->counts[masked] += 1; @@ -50010,6 +33849,8 @@ Oligoindex_hr_tally (T this, Univcoord_T mappingstart, Univcoord_T mappingend, bool plusp, char *queryuc_ptr, int querystart, int queryend, Chrpos_T chrpos, int genestrand) { int badoligos, repoligos, trimoligos, trim_start, trim_end; + Count_T *working_counts; + Oligospace_T i; #ifdef DEBUG14 Count0_T *counts_old; Chrpos_T **positions_old; @@ -50027,31 +33868,42 @@ if (plusp == true) { debug0(printf("plus, origin is %u\n",chrpos)); -#ifdef USE_SIMD_FOR_COUNTS - count_positions_fwd_simd(this->counts,this->inquery,this->indexsize,mappingstart,mappingend,genestrand); +#ifdef HAVE_SSE2 + count_positions_fwd_simd(this->counts,this->indexsize,mappingstart,mappingend,genestrand); #else - count_positions_fwd_std(this->counts,this->inquery,this->indexsize,mappingstart,mappingend,genestrand); + count_positions_fwd_std(this->counts,this->indexsize,mappingstart,mappingend,genestrand); #endif - if ((this->table = allocate_positions(this->pointers,this->positions,this->inquery,this->counts, + if ((this->table = allocate_positions(this->positions,this->inquery,this->counts, this->oligospace)) != NULL) { + working_counts = (Count_T *) MALLOC(this->oligospace*sizeof(Count_T)); + memcpy((void *) working_counts,(const void *) this->counts,this->oligospace*sizeof(Count_T)); -#ifdef USE_SIMD_FOR_COUNTS - store_positions_fwd_simd(this->table,this->pointers,this->positions,this->counts,this->indexsize,mappingstart,mappingend, +#ifdef HAVE_SSE2 + store_positions_fwd_simd(this->table,this->positions,working_counts,this->indexsize,mappingstart,mappingend, chrpos,genestrand); #else - store_positions_fwd_std(this->table,this->pointers,this->positions,this->counts,this->indexsize,mappingstart,mappingend, + store_positions_fwd_std(this->table,this->positions,working_counts,this->indexsize,mappingstart,mappingend, chrpos,genestrand); #endif +#ifdef CHECK_ASSERTIONS + /* Check if storage routine matches counting routine */ + for (i = 0; i < this->oligospace; i++) { + assert(working_counts[i] == 0); + } +#endif + + FREE(working_counts); + debug9(printf("plus, origin is %u\n",chrpos)); debug9(dump_positions(this->table,this->positions,this->counts,this->inquery,this->oligospace,this->indexsize)); #ifdef DEBUG14 positions_old = Oligoindex_old_tally(&counts_old,mappingstart,mappingend,plusp, - queryuc_ptr,querylength,chrpos,genestrand, + queryuc_ptr,querystart,queryend,chrpos,genestrand, this->oligospace,this->indexsize,this->mask); - positions_compare(this->positions,this->counts,this->inquery, + positions_compare(this->table,this->positions,this->counts,this->inquery, positions_old,counts_old,this->oligospace,this->indexsize,this->mask); FREE(counts_old); FREE(positions_old[0]); @@ -50062,30 +33914,42 @@ } else { debug0(printf("minus, origin is %u\n",chrpos)); -#ifdef USE_SIMD_FOR_COUNTS - count_positions_rev_simd(this->counts,this->inquery,this->indexsize,mappingstart,mappingend,genestrand); +#ifdef HAVE_SSE2 + count_positions_rev_simd(this->counts,this->indexsize,mappingstart,mappingend,genestrand); #else - count_positions_rev_std(this->counts,this->inquery,this->indexsize,mappingstart,mappingend,genestrand); + count_positions_rev_std(this->counts,this->indexsize,mappingstart,mappingend,genestrand); #endif - if ((this->table = allocate_positions(this->pointers,this->positions,this->inquery,this->counts, + if ((this->table = allocate_positions(this->positions,this->inquery,this->counts, this->oligospace)) != NULL) { -#ifdef USE_SIMD_FOR_COUNTS - store_positions_rev_simd(this->table,this->pointers,this->positions,this->counts,this->indexsize,mappingstart,mappingend, + working_counts = (Count_T *) MALLOC(this->oligospace*sizeof(Count_T)); + memcpy((void *) working_counts,(const void *) this->counts,this->oligospace*sizeof(Count_T)); + +#ifdef HAVE_SSE2 + store_positions_rev_simd(this->table,this->positions,working_counts,this->indexsize,mappingstart,mappingend, chrpos,genestrand); #else - store_positions_rev_std(this->table,this->pointers,this->positions,this->counts,this->indexsize,mappingstart,mappingend, + store_positions_rev_std(this->table,this->positions,working_counts,this->indexsize,mappingstart,mappingend, chrpos,genestrand); #endif +#ifdef CHECK_ASSERTIONS + /* Check if storage routine matches counting routine */ + for (i = 0; i < this->oligospace; i++) { + assert(working_counts[i] == 0); + } +#endif + + FREE(working_counts); + debug9(printf("minus, origin is %u\n",chrpos)); debug9(dump_positions(this->table,this->positions,this->counts,this->inquery,this->oligospace,this->indexsize)); #ifdef DEBUG14 positions_old = Oligoindex_old_tally(&counts_old,mappingstart,mappingend,plusp, - queryuc_ptr,querylength,chrpos,genestrand, + queryuc_ptr,querystart,queryend,chrpos,genestrand, this->oligospace,this->indexsize,this->mask); - positions_compare(this->positions,this->counts,this->inquery, + positions_compare(this->table,this->positions,this->counts,this->inquery, positions_old,counts_old,this->oligospace,this->indexsize,this->mask); FREE(counts_old); FREE(positions_old[0]); @@ -50137,7 +34001,7 @@ masked = oligo & this->mask; #ifdef DEBUG nt = shortoligo_nt(oligo,indexsize); - printf("At querypos %d, oligo %s seen\n",i,nt); + printf("At querypos %d, oligo %s (%08X fwd, %08X rev) seen\n",i,nt,masked,~oligo & this->mask); FREE(nt); #endif @@ -50188,7 +34052,7 @@ static void Oligoindex_free (T *old) { if (*old) { - FREE((*old)->pointers_allocated); + /* FREE((*old)->pointers_allocated); */ FREE((*old)->positions); FREE((*old)->table); #ifdef HAVE_SSE2 @@ -50226,11 +34090,11 @@ char *nt; #endif - if ((*nhits = this->counts[masked]) >= 1) { + if ((*nhits = this->counts[masked]) > 0) { #ifdef DEBUG nt = shortoligo_nt(masked,this->indexsize); - printf("masked is %s (%u) => %d entries: %u...%u\n", - nt,masked,*nhits, + printf("masked is %s [%08X] (%u) => %d entries: %u...%u\n", + nt,masked,masked,*nhits, #if 0 this->positions[masked],this->positions[masked]+(*nhits)-1, #endif @@ -50495,5 +34359,3 @@ return diagonals; } - - diff -Nru gmap-2016-11-07/src/oligoindex_hr.h gmap-2017-01-14/src/oligoindex_hr.h --- gmap-2016-11-07/src/oligoindex_hr.h 2015-12-10 19:54:31.000000000 +0000 +++ gmap-2017-01-14/src/oligoindex_hr.h 2016-12-16 16:37:42.000000000 +0000 @@ -1,4 +1,4 @@ -/* $Id: oligoindex_hr.h 180701 2015-12-10 19:54:31Z twu $ */ +/* $Id: oligoindex_hr.h 201739 2016-12-16 16:37:41Z twu $ */ #ifndef OLIGOINDEX_HR_INCLUDED #define OLIGOINDEX_HR_INCLUDED @@ -14,54 +14,21 @@ #define OVERABUNDANCE_PCT 0.97 #define OVERABUNDANCE_MIN 200 - -#ifdef HAVE_AVX2 -/* Attempted to use int, so we don't need to check for count > 255. However, SIMD is much faster on bytes than on ints */ -typedef int Count_T; -typedef unsigned int Inquery_T; -#define INQUERY_FALSE 0x00000000 -#define INQUERY_TRUE 0xFFFFFFFF -#define SIMD_NELTS 8 /* 8 ints in 256 bits */ - -/* #define CHECK_FOR_OVERFLOW 1 -- Optional if we use int for Count_T */ -#define CHECK_FOR_OVERFLOW 1 - -#ifdef CHECK_FOR_OVERFLOW -#define MAXCOUNT 255 -#define INCR_COUNT(counts,inquery) if (++counts > MAXCOUNT) inquery = INQUERY_FALSE; -#else -#define INCR_COUNT(counts,inquery) counts += 1; -#endif - - -#elif defined(HAVE_SSE2) -typedef char Count_T; +/* Attempted to use int, so we could use i32gather_epi32. However, SIMD is much faster on bytes than on ints */ +typedef unsigned char Count_T; typedef unsigned char Inquery_T; #define INQUERY_FALSE 0x00 #define INQUERY_TRUE 0xFF -#define SIMD_NELTS 16 /* 16 bytes in 128 bits */ +#define INCR_COUNT(counts) counts += 1; -#define CHECK_FOR_OVERFLOW 1 /* Required, since a char can hold only 127 positive counts */ -#ifdef CHECK_FOR_OVERFLOW -#define INCR_COUNT(counts,inquery) if (++counts < 0) inquery = INQUERY_FALSE; -#else -#define INCR_COUNT(counts,inquery) counts += 1; -#endif - -#else -typedef char Count_T; -typedef bool Inquery_T; -#define INQUERY_FALSE false -#define INQUERY_TRUE true - -#define CHECK_FOR_OVERFLOW 1 /* Required, since a char can hold only 127 positive counts */ -#ifdef CHECK_FOR_OVERFLOW -#define INCR_COUNT(counts,inquery) if (++counts < 0) inquery = false; -#else -#define INCR_COUNT(counts,inquery) counts += 1; +#if defined(HAVE_AVX512) +#define SIMD_NELTS 64 /* 64 bytes in 256 bits */ +#elif defined(HAVE_AVX2) +#define SIMD_NELTS 32 /* 32 bytes in 256 bits */ +#elif defined(HAVE_SSE2) +#define SIMD_NELTS 16 /* 16 bytes in 128 bits */ #endif -#endif #define T Oligoindex_T diff -Nru gmap-2016-11-07/src/outbuffer.c gmap-2017-01-14/src/outbuffer.c --- gmap-2016-11-07/src/outbuffer.c 2016-02-18 00:10:24.000000000 +0000 +++ gmap-2017-01-14/src/outbuffer.c 2016-11-14 20:54:21.000000000 +0000 @@ -1,4 +1,4 @@ -static char rcsid[] = "$Id: outbuffer.c 184468 2016-02-18 00:10:24Z twu $"; +static char rcsid[] = "$Id: outbuffer.c 200473 2016-11-14 20:54:20Z twu $"; #ifdef HAVE_CONFIG_H #include #endif @@ -74,22 +74,20 @@ #ifdef USE_MPI static MPI_File *outputs; +static MPI_File output_failedinput; #ifdef GSNAP static MPI_File output_failedinput_1; static MPI_File output_failedinput_2; -#else -static MPI_File output_failedinput; #endif #else static char *write_mode; static FILE **outputs = NULL; +static FILE *output_failedinput; #ifdef GSNAP static FILE *output_failedinput_1; static FILE *output_failedinput_2; -#else -static FILE *output_failedinput; #endif #endif @@ -228,8 +226,30 @@ exit(9); } #endif + + /* Re-use filename, since it is shorter */ + sprintf(filename,"%s",failedinput_root); +#ifdef USE_MPI + if (appendp == true) { + MPI_File_open(MPI_COMM_WORLD,filename,MPI_MODE_CREATE | MPI_MODE_WRONLY | MPI_MODE_APPEND, + MPI_INFO_NULL,&output_failedinput); + } else { + /* Need to remove existing file, if any */ + MPI_File_open(MPI_COMM_WORLD,filename,MPI_MODE_CREATE | MPI_MODE_WRONLY | MPI_MODE_DELETE_ON_CLOSE, + MPI_INFO_NULL,&output_failedinput); + MPI_File_close(&output_failedinput); + MPI_File_open(MPI_COMM_WORLD,filename,MPI_MODE_CREATE | MPI_MODE_WRONLY, + MPI_INFO_NULL,&output_failedinput); + } +#else + if ((output_failedinput = fopen(filename,write_mode)) == NULL) { + fprintf(stderr,"Cannot open file %s for writing\n",filename); + exit(9); + } +#endif FREE(filename); + #else /* GMAP */ filename = (char *) MALLOC((strlen(failedinput_root)+1) * sizeof(char)); sprintf(filename,"%s",failedinput_root); @@ -357,10 +377,9 @@ failedinput_root = failedinput_root_in; if (failedinput_root == NULL) { + output_failedinput = NULL; #ifdef GSNAP output_failedinput_1 = output_failedinput_2 = NULL; -#else - output_failedinput = NULL; #endif } else { failedinput_open(failedinput_root); @@ -381,11 +400,10 @@ struct RRlist_T { int id; Filestring_T fp; + Filestring_T fp_failedinput; #ifdef GSNAP Filestring_T fp_failedinput_1; Filestring_T fp_failedinput_2; -#else - Filestring_T fp_failedinput; #endif RRlist_T next; }; @@ -410,21 +428,19 @@ /* Returns new tail */ static RRlist_T RRlist_push (RRlist_T *head, RRlist_T tail, Filestring_T fp, -#ifdef GSNAP - Filestring_T fp_failedinput_1, Filestring_T fp_failedinput_2 -#else Filestring_T fp_failedinput +#ifdef GSNAP + , Filestring_T fp_failedinput_1, Filestring_T fp_failedinput_2 #endif ) { RRlist_T new; new = (RRlist_T) MALLOC_OUT(sizeof(*new)); /* Called by worker thread */ new->fp = fp; + new->fp_failedinput = fp_failedinput; #ifdef GSNAP new->fp_failedinput_1 = fp_failedinput_1; new->fp_failedinput_2 = fp_failedinput_2; -#else - new->fp_failedinput = fp_failedinput; #endif new->next = (RRlist_T) NULL; @@ -441,20 +457,18 @@ /* Returns new head */ static RRlist_T RRlist_pop (RRlist_T head, Filestring_T *fp, -#ifdef GSNAP - Filestring_T *fp_failedinput_1, Filestring_T *fp_failedinput_2 -#else Filestring_T *fp_failedinput +#ifdef GSNAP + , Filestring_T *fp_failedinput_1, Filestring_T *fp_failedinput_2 #endif ) { RRlist_T newhead; *fp = head->fp; + *fp_failedinput = head->fp_failedinput; #ifdef GSNAP *fp_failedinput_1 = head->fp_failedinput_1; *fp_failedinput_2 = head->fp_failedinput_2; -#else - *fp_failedinput = head->fp_failedinput; #endif newhead = head->next; @@ -466,10 +480,9 @@ static RRlist_T RRlist_insert (RRlist_T list, int id, Filestring_T fp, -#ifdef GSNAP - Filestring_T fp_failedinput_1, Filestring_T fp_failedinput_2 -#else Filestring_T fp_failedinput +#ifdef GSNAP + , Filestring_T fp_failedinput_1, Filestring_T fp_failedinput_2 #endif ) { RRlist_T *p; @@ -483,11 +496,10 @@ new = (RRlist_T) MALLOC_OUT(sizeof(*new)); new->id = id; new->fp = fp; + new->fp_failedinput = fp_failedinput; #ifdef GSNAP new->fp_failedinput_1 = fp_failedinput_1; new->fp_failedinput_2 = fp_failedinput_2; -#else - new->fp_failedinput = fp_failedinput; #endif new->next = *p; @@ -498,21 +510,19 @@ /* Returns new head */ static RRlist_T RRlist_pop_id (RRlist_T head, int *id, Filestring_T *fp, -#ifdef GSNAP - Filestring_T *fp_failedinput_1, Filestring_T *fp_failedinput_2 -#else Filestring_T *fp_failedinput +#ifdef GSNAP + , Filestring_T *fp_failedinput_1, Filestring_T *fp_failedinput_2 #endif ) { RRlist_T newhead; *id = head->id; *fp = head->fp; + *fp_failedinput = head->fp_failedinput; #ifdef GSNAP *fp_failedinput_1 = head->fp_failedinput_1; *fp_failedinput_2 = head->fp_failedinput_2; -#else - *fp_failedinput = head->fp_failedinput; #endif newhead = head->next; @@ -711,19 +721,17 @@ if (failedinput_root != NULL) { #ifdef USE_MPI + MPI_File_close(&output_failedinput); #ifdef GSNAP MPI_File_close(&output_failedinput_1); MPI_File_close(&output_failedinput_2); -#else - MPI_File_close(&output_failedinput); #endif #else + fclose(output_failedinput); #ifdef GSNAP fclose(output_failedinput_1); fclose(output_failedinput_2); -#else - fclose(output_failedinput); #endif #endif @@ -841,13 +849,13 @@ #ifdef GSNAP void -Outbuffer_put_filestrings (T this, Filestring_T fp, Filestring_T fp_failedinput_1, Filestring_T fp_failedinput_2) { +Outbuffer_put_filestrings (T this, Filestring_T fp, Filestring_T fp_failedinput, Filestring_T fp_failedinput_1, Filestring_T fp_failedinput_2) { #ifdef HAVE_PTHREAD pthread_mutex_lock(&this->lock); #endif - this->tail = RRlist_push(&this->head,this->tail,fp,fp_failedinput_1,fp_failedinput_2); + this->tail = RRlist_push(&this->head,this->tail,fp,fp_failedinput,fp_failedinput_1,fp_failedinput_2); debug1(RRlist_dump(this->head,this->tail)); this->nprocessed += 1; @@ -886,7 +894,7 @@ #ifdef GSNAP void -Outbuffer_print_filestrings (Filestring_T fp, Filestring_T fp_failedinput_1, Filestring_T fp_failedinput_2) { +Outbuffer_print_filestrings (Filestring_T fp, Filestring_T fp_failedinput, Filestring_T fp_failedinput_1, Filestring_T fp_failedinput_2) { SAM_split_output_type split_output; #ifdef USE_MPI MPI_File output; @@ -929,6 +937,13 @@ Filestring_free(&fp); if (failedinput_root != NULL) { + if (fp_failedinput != NULL) { +#ifdef USE_MPI + Filestring_stringify(fp_failedinput); +#endif + Filestring_print(output_failedinput,fp_failedinput); + Filestring_free(&fp_failedinput); + } if (fp_failedinput_1 != NULL) { #ifdef USE_MPI Filestring_stringify(fp_failedinput_1); @@ -1015,10 +1030,9 @@ unsigned int output_buffer_size = this->output_buffer_size; unsigned int noutput = 0, ntotal, nbeyond; Filestring_T fp; + Filestring_T fp_failedinput; #ifdef GSNAP Filestring_T fp_failedinput_1, fp_failedinput_2; -#else - Filestring_T fp_failedinput; #endif /* Obtain this->ntotal while locked, to prevent race between output thread and input thread */ @@ -1049,7 +1063,7 @@ } else { #ifdef GSNAP - this->head = RRlist_pop(this->head,&fp,&fp_failedinput_1,&fp_failedinput_2); + this->head = RRlist_pop(this->head,&fp,&fp_failedinput,&fp_failedinput_1,&fp_failedinput_2); #else this->head = RRlist_pop(this->head,&fp,&fp_failedinput); #endif @@ -1061,7 +1075,7 @@ #endif #ifdef GSNAP - Outbuffer_print_filestrings(fp,fp_failedinput_1,fp_failedinput_2); + Outbuffer_print_filestrings(fp,fp_failedinput,fp_failedinput_1,fp_failedinput_2); #else Outbuffer_print_filestrings(fp,fp_failedinput); #endif @@ -1076,14 +1090,14 @@ /* Clear out backlog */ while (this->head && this->nprocessed - noutput > output_buffer_size) { #ifdef GSNAP - this->head = RRlist_pop(this->head,&fp,&fp_failedinput_1,&fp_failedinput_2); + this->head = RRlist_pop(this->head,&fp,&fp_failedinput,&fp_failedinput_1,&fp_failedinput_2); #else this->head = RRlist_pop(this->head,&fp,&fp_failedinput); #endif debug1(RRlist_dump(this->head,this->tail)); #ifdef GSNAP - Outbuffer_print_filestrings(fp,fp_failedinput_1,fp_failedinput_2); + Outbuffer_print_filestrings(fp,fp_failedinput,fp_failedinput_1,fp_failedinput_2); #else Outbuffer_print_filestrings(fp,fp_failedinput); #endif @@ -1124,10 +1138,9 @@ unsigned int output_buffer_size = this->output_buffer_size; unsigned int noutput = 0, nqueued = 0, ntotal, nbeyond; Filestring_T fp; + Filestring_T fp_failedinput; #ifdef GSNAP Filestring_T fp_failedinput_1, fp_failedinput_2; -#else - Filestring_T fp_failedinput; #endif RRlist_T queue = NULL; int id; @@ -1161,7 +1174,7 @@ } else { #ifdef GSNAP - this->head = RRlist_pop(this->head,&fp,&fp_failedinput_1,&fp_failedinput_2); + this->head = RRlist_pop(this->head,&fp,&fp_failedinput,&fp_failedinput_1,&fp_failedinput_2); #else this->head = RRlist_pop(this->head,&fp,&fp_failedinput); #endif @@ -1173,14 +1186,14 @@ if ((id = Filestring_id(fp)) != (int) noutput) { /* Store in queue */ #ifdef GSNAP - queue = RRlist_insert(queue,id,fp,fp_failedinput_1,fp_failedinput_2); + queue = RRlist_insert(queue,id,fp,fp_failedinput,fp_failedinput_1,fp_failedinput_2); #else queue = RRlist_insert(queue,id,fp,fp_failedinput); #endif nqueued++; } else { #ifdef GSNAP - Outbuffer_print_filestrings(fp,fp_failedinput_1,fp_failedinput_2); + Outbuffer_print_filestrings(fp,fp_failedinput,fp_failedinput_1,fp_failedinput_2); #else Outbuffer_print_filestrings(fp,fp_failedinput); #endif @@ -1192,13 +1205,13 @@ /* Print out rest of stored queue */ while (queue != NULL && queue->id == (int) noutput) { #ifdef GSNAP - queue = RRlist_pop_id(queue,&id,&fp,&fp_failedinput_1,&fp_failedinput_2); + queue = RRlist_pop_id(queue,&id,&fp,&fp_failedinput,&fp_failedinput_1,&fp_failedinput_2); #else queue = RRlist_pop_id(queue,&id,&fp,&fp_failedinput); #endif nqueued--; #ifdef GSNAP - Outbuffer_print_filestrings(fp,fp_failedinput_1,fp_failedinput_2); + Outbuffer_print_filestrings(fp,fp_failedinput,fp_failedinput_1,fp_failedinput_2); #else Outbuffer_print_filestrings(fp,fp_failedinput); #endif @@ -1216,21 +1229,21 @@ /* Clear out backlog */ while (this->head && this->nprocessed - nqueued - noutput > output_buffer_size) { #ifdef GSNAP - this->head = RRlist_pop(this->head,&fp,&fp_failedinput_1,&fp_failedinput_2); + this->head = RRlist_pop(this->head,&fp,&fp_failedinput,&fp_failedinput_1,&fp_failedinput_2); #else this->head = RRlist_pop(this->head,&fp,&fp_failedinput); #endif if ((id = Filestring_id(fp)) != (int) noutput) { /* Store in queue */ #ifdef GSNAP - queue = RRlist_insert(queue,id,fp,fp_failedinput_1,fp_failedinput_2); + queue = RRlist_insert(queue,id,fp,fp_failedinput,fp_failedinput_1,fp_failedinput_2); #else queue = RRlist_insert(queue,id,fp,fp_failedinput); #endif nqueued++; } else { #ifdef GSNAP - Outbuffer_print_filestrings(fp,fp_failedinput_1,fp_failedinput_2); + Outbuffer_print_filestrings(fp,fp_failedinput,fp_failedinput_1,fp_failedinput_2); #else Outbuffer_print_filestrings(fp,fp_failedinput); #endif @@ -1241,13 +1254,13 @@ /* Print out rest of stored queue */ while (queue != NULL && queue->id == (int) noutput) { #ifdef GSNAP - queue = RRlist_pop_id(queue,&id,&fp,&fp_failedinput_1,&fp_failedinput_2); + queue = RRlist_pop_id(queue,&id,&fp,&fp_failedinput,&fp_failedinput_1,&fp_failedinput_2); #else queue = RRlist_pop_id(queue,&id,&fp,&fp_failedinput); #endif nqueued--; #ifdef GSNAP - Outbuffer_print_filestrings(fp,fp_failedinput_1,fp_failedinput_2); + Outbuffer_print_filestrings(fp,fp_failedinput,fp_failedinput_1,fp_failedinput_2); #else Outbuffer_print_filestrings(fp,fp_failedinput); #endif diff -Nru gmap-2016-11-07/src/outbuffer.h gmap-2017-01-14/src/outbuffer.h --- gmap-2016-11-07/src/outbuffer.h 2015-06-24 03:58:38.000000000 +0000 +++ gmap-2017-01-14/src/outbuffer.h 2016-11-14 20:54:21.000000000 +0000 @@ -1,4 +1,4 @@ -/* $Id: outbuffer.h 157571 2015-01-28 00:04:37Z twu $ */ +/* $Id: outbuffer.h 200473 2016-11-14 20:54:20Z twu $ */ #ifndef OUTBUFFER_INCLUDED #define OUTBUFFER_INCLUDED @@ -63,10 +63,10 @@ #ifdef GSNAP extern void -Outbuffer_put_filestrings (T this, Filestring_T fp, Filestring_T fp_failedinput_1, Filestring_T fp_failedinput_2); +Outbuffer_put_filestrings (T this, Filestring_T fp, Filestring_T fp_failedinput, Filestring_T fp_failedinput_1, Filestring_T fp_failedinput_2); extern void -Outbuffer_print_filestrings (Filestring_T fp, Filestring_T fp_failedinput_1, Filestring_T fp_failedinput_2); +Outbuffer_print_filestrings (Filestring_T fp, Filestring_T fp_failedinput, Filestring_T fp_failedinput_1, Filestring_T fp_failedinput_2); #else extern void Outbuffer_put_filestrings (T this, Filestring_T fp, Filestring_T fp_failedinput); diff -Nru gmap-2016-11-07/src/output.c gmap-2017-01-14/src/output.c --- gmap-2016-11-07/src/output.c 2016-02-22 20:21:10.000000000 +0000 +++ gmap-2017-01-14/src/output.c 2016-11-14 20:54:22.000000000 +0000 @@ -1,4 +1,4 @@ -static char rcsid[] = "$Id: output.c 184470 2016-02-18 00:11:42Z twu $"; +static char rcsid[] = "$Id: output.c 200473 2016-11-14 20:54:20Z twu $"; #ifdef HAVE_CONFIG_H #include #endif @@ -158,7 +158,7 @@ /* Taken from print_result_sam from old outbuffer.c */ static Filestring_T -filestring_fromresult_sam (Filestring_T *fp_failedinput_1, Filestring_T *fp_failedinput_2, +filestring_fromresult_sam (Filestring_T *fp_failedinput, Filestring_T *fp_failedinput_1, Filestring_T *fp_failedinput_2, Result_T result, Request_T request) { Filestring_T fp; Resulttype_T resulttype; @@ -169,15 +169,16 @@ char *abbrev; fp = Filestring_new(Request_id(request)); - if (failedinput_root == NULL) { - *fp_failedinput_1 = (Filestring_T) NULL; - } else { - *fp_failedinput_1 = Filestring_new(Request_id(request)); - } resulttype = Result_resulttype(result); if (resulttype == SINGLEEND_NOMAPPING) { - *fp_failedinput_2 = (Filestring_T) NULL; + if (failedinput_root == NULL) { + *fp_failedinput = (Filestring_T) NULL; + } else { + *fp_failedinput = Filestring_new(Request_id(request)); + } + *fp_failedinput_1 = *fp_failedinput_2 = (Filestring_T) NULL; + queryseq1 = Request_queryseq1(request); if (nofailsp == true) { /* Skip */ @@ -190,12 +191,18 @@ /*artificial_mate_p*/false,/*npaths_mate*/0,/*mate_chrpos*/0U, quality_shift,sam_read_group_id,invert_first_p,invert_second_p); if (failedinput_root != NULL) { - Shortread_print_query_singleend(*fp_failedinput_1,queryseq1,/*headerseq*/queryseq1); + Shortread_print_query_singleend(*fp_failedinput,queryseq1,/*headerseq*/queryseq1); } } } else if (resulttype == SINGLEEND_UNIQ) { - *fp_failedinput_2 = (Filestring_T) NULL; + if (failedinput_root == NULL) { + *fp_failedinput = (Filestring_T) NULL; + } else { + *fp_failedinput = Filestring_new(Request_id(request)); + } + *fp_failedinput_1 = *fp_failedinput_2 = (Filestring_T) NULL; + if (failsonlyp == true) { /* Skip */ } else { @@ -216,7 +223,7 @@ Filestring_set_split_output(fp,OUTPUT_UU); abbrev = ABBREV_UNPAIRED_UNIQ; } - SAM_print(fp,*fp_failedinput_1,abbrev,stage3,/*mate*/NULL,/*acc1*/Shortread_accession(queryseq1),/*acc2*/NULL, + SAM_print(fp,*fp_failedinput,abbrev,stage3,/*mate*/NULL,/*acc1*/Shortread_accession(queryseq1),/*acc2*/NULL, /*pathnum*/1,npaths_primary,npaths_altloc,Stage3end_absmq_score(stage3array[0]),first_absmq,second_absmq, Stage3end_mapq_score(stage3array[0]), chromosome_iit,queryseq1,/*queryseq2*/NULL, @@ -227,7 +234,12 @@ } } else if (resulttype == SINGLEEND_TRANSLOC) { - *fp_failedinput_2 = (Filestring_T) NULL; + if (failedinput_root == NULL) { + *fp_failedinput = (Filestring_T) NULL; + } else { + *fp_failedinput = Filestring_new(Request_id(request)); + } + *fp_failedinput_1 = *fp_failedinput_2 = (Filestring_T) NULL; Filestring_set_split_output(fp,OUTPUT_UT); stage3array = (Stage3end_T *) Result_array(&npaths_primary,&npaths_altloc,&first_absmq,&second_absmq,result); @@ -242,7 +254,7 @@ /*artificial_mate_p*/false,/*npaths_mate*/0,/*mate_chrpos*/0U, quality_shift,sam_read_group_id,invert_first_p,invert_second_p); if (failedinput_root != NULL) { - Shortread_print_query_singleend(*fp_failedinput_1,queryseq1,/*headerseq*/queryseq1); + Shortread_print_query_singleend(*fp_failedinput,queryseq1,/*headerseq*/queryseq1); } } else { @@ -256,7 +268,7 @@ chrpos = SAM_compute_chrpos(/*hardclip_low*/0,/*hardclip_high*/0,stage3,Shortread_fulllength(queryseq1), /*first_read_p*/true); } - SAM_print(fp,*fp_failedinput_1,ABBREV_UNPAIRED_TRANSLOC, + SAM_print(fp,*fp_failedinput,ABBREV_UNPAIRED_TRANSLOC, stage3,/*mate*/NULL,/*acc1*/Shortread_accession(queryseq1), /*acc2*/NULL,pathnum,npaths_primary,npaths_altloc, Stage3end_absmq_score(stage3array[pathnum-1]),first_absmq,second_absmq, @@ -270,7 +282,13 @@ } } else if (resulttype == SINGLEEND_MULT) { - *fp_failedinput_2 = (Filestring_T) NULL; + if (failedinput_root == NULL) { + *fp_failedinput = (Filestring_T) NULL; + } else { + *fp_failedinput = Filestring_new(Request_id(request)); + } + *fp_failedinput_1 = *fp_failedinput_2 = (Filestring_T) NULL; + stage3array = (Stage3end_T *) Result_array(&npaths_primary,&npaths_altloc,&first_absmq,&second_absmq,result); if (failsonlyp == true) { @@ -285,7 +303,7 @@ /*artificial_mate_p*/false,/*npaths_mate*/0,/*mate_chrpos*/0U, quality_shift,sam_read_group_id,invert_first_p,invert_second_p); if (failedinput_root != NULL) { - Shortread_print_query_singleend(*fp_failedinput_1,queryseq1,/*headerseq*/queryseq1); + Shortread_print_query_singleend(*fp_failedinput,queryseq1,/*headerseq*/queryseq1); } } else { @@ -299,7 +317,7 @@ chrpos = SAM_compute_chrpos(/*hardclip_low*/0,/*hardclip_high*/0,stage3,Shortread_fulllength(queryseq1), /*first_read_p*/true); } - SAM_print(fp,*fp_failedinput_1,ABBREV_UNPAIRED_MULT, + SAM_print(fp,*fp_failedinput,ABBREV_UNPAIRED_MULT, stage3,/*mate*/NULL,/*acc1*/Shortread_accession(queryseq1), /*acc2*/NULL,pathnum,npaths_primary,npaths_altloc, Stage3end_absmq_score(stage3array[pathnum-1]),first_absmq,second_absmq, @@ -313,9 +331,12 @@ } } else { + *fp_failedinput = (Filestring_T) NULL; if (failedinput_root == NULL) { + *fp_failedinput_1 = (Filestring_T) NULL; *fp_failedinput_2 = (Filestring_T) NULL; } else { + *fp_failedinput_1 = Filestring_new(Request_id(request)); *fp_failedinput_2 = Filestring_new(Request_id(request)); } SAM_print_paired(fp,*fp_failedinput_1,*fp_failedinput_2,result,resulttype,chromosome_iit, @@ -360,7 +381,7 @@ /* Taken from print_result_gsnap from old outbuffer.c */ static Filestring_T -filestring_fromresult_gsnap (Filestring_T *fp_failedinput_1, Filestring_T *fp_failedinput_2, +filestring_fromresult_gsnap (Filestring_T *fp_failedinput, Filestring_T *fp_failedinput_1, Filestring_T *fp_failedinput_2, Result_T result, Request_T request) { Filestring_T fp; Resulttype_T resulttype; @@ -369,16 +390,16 @@ int npaths_primary, npaths_altloc, pathnum, first_absmq, second_absmq; fp = Filestring_new(Request_id(request)); - if (failedinput_root == NULL) { - *fp_failedinput_1 = (Filestring_T) NULL; - } else { - *fp_failedinput_1 = Filestring_new(Request_id(request)); - } resulttype = Result_resulttype(result); - if (resulttype == SINGLEEND_NOMAPPING) { - *fp_failedinput_2 = (Filestring_T) NULL; + if (failedinput_root == NULL) { + *fp_failedinput = (Filestring_T) NULL; + } else { + *fp_failedinput = Filestring_new(Request_id(request)); + } + *fp_failedinput_1 = *fp_failedinput_2 = (Filestring_T) NULL; + if (nofailsp == true) { /* Skip */ } else if (print_m8_p) { @@ -390,12 +411,18 @@ if (failedinput_root != NULL) { queryseq1 = Request_queryseq1(request); - Shortread_print_query_singleend(*fp_failedinput_1,queryseq1,/*headerseq*/queryseq1); + Shortread_print_query_singleend(*fp_failedinput,queryseq1,/*headerseq*/queryseq1); } } } else if (resulttype == SINGLEEND_UNIQ) { - *fp_failedinput_2 = (Filestring_T) NULL; + if (failedinput_root == NULL) { + *fp_failedinput = (Filestring_T) NULL; + } else { + *fp_failedinput = Filestring_new(Request_id(request)); + } + *fp_failedinput_1 = *fp_failedinput_2 = (Filestring_T) NULL; + if (failsonlyp == true) { /* Skip */ } else { @@ -417,7 +444,13 @@ } } else if (resulttype == SINGLEEND_TRANSLOC) { - *fp_failedinput_2 = (Filestring_T) NULL; + if (failedinput_root == NULL) { + *fp_failedinput = (Filestring_T) NULL; + } else { + *fp_failedinput = Filestring_new(Request_id(request)); + } + *fp_failedinput_1 = *fp_failedinput_2 = (Filestring_T) NULL; + Filestring_set_split_output(fp,OUTPUT_UT); stage3array = (Stage3end_T *) Result_array(&npaths_primary,&npaths_altloc,&first_absmq,&second_absmq,result); @@ -447,7 +480,13 @@ } } else if (resulttype == SINGLEEND_MULT) { - *fp_failedinput_2 = (Filestring_T) NULL; + if (failedinput_root == NULL) { + *fp_failedinput = (Filestring_T) NULL; + } else { + *fp_failedinput = Filestring_new(Request_id(request)); + } + *fp_failedinput_1 = *fp_failedinput_2 = (Filestring_T) NULL; + stage3array = (Stage3end_T *) Result_array(&npaths_primary,&npaths_altloc,&first_absmq,&second_absmq,result); if (failsonlyp == true) { @@ -479,9 +518,12 @@ } } else if (resulttype == PAIREDEND_NOMAPPING) { + *fp_failedinput = (Filestring_T) NULL; if (failedinput_root == NULL) { + *fp_failedinput_1 = (Filestring_T) NULL; *fp_failedinput_2 = (Filestring_T) NULL; } else { + *fp_failedinput_1 = Filestring_new(Request_id(request)); *fp_failedinput_2 = Filestring_new(Request_id(request)); } @@ -509,9 +551,12 @@ } } else { + *fp_failedinput = (Filestring_T) NULL; if (failedinput_root == NULL) { + *fp_failedinput_1 = (Filestring_T) NULL; *fp_failedinput_2 = (Filestring_T) NULL; } else { + *fp_failedinput_1 = Filestring_new(Request_id(request)); *fp_failedinput_2 = Filestring_new(Request_id(request)); } @@ -539,12 +584,12 @@ } Filestring_T -Output_filestring_fromresult (Filestring_T *fp_failedinput_1, Filestring_T *fp_failedinput_2, +Output_filestring_fromresult (Filestring_T *fp_failedinput, Filestring_T *fp_failedinput_1, Filestring_T *fp_failedinput_2, Result_T result, Request_T request) { if (output_sam_p == true) { - return filestring_fromresult_sam(&(*fp_failedinput_1),&(*fp_failedinput_2),result,request); + return filestring_fromresult_sam(&(*fp_failedinput),&(*fp_failedinput_1),&(*fp_failedinput_2),result,request); } else { - return filestring_fromresult_gsnap(&(*fp_failedinput_1),&(*fp_failedinput_2),result,request); + return filestring_fromresult_gsnap(&(*fp_failedinput),&(*fp_failedinput_1),&(*fp_failedinput_2),result,request); } } diff -Nru gmap-2016-11-07/src/output.h gmap-2017-01-14/src/output.h --- gmap-2016-11-07/src/output.h 2014-12-12 19:43:00.000000000 +0000 +++ gmap-2017-01-14/src/output.h 2016-11-14 20:54:22.000000000 +0000 @@ -1,4 +1,4 @@ -/* $Id: output.h 155282 2014-12-12 19:42:54Z twu $ */ +/* $Id: output.h 200473 2016-11-14 20:54:20Z twu $ */ #ifndef OUTPUT_INCLUDED #define OUTPUT_INCLUDED @@ -44,7 +44,7 @@ #ifdef GSNAP extern Filestring_T -Output_filestring_fromresult (Filestring_T *fp_failedinput_1, Filestring_T *fp_failedinput_2, +Output_filestring_fromresult (Filestring_T *fp_failedinput, Filestring_T *fp_failedinput_1, Filestring_T *fp_failedinput_2, Result_T result, Request_T request); #else extern Filestring_T diff -Nru gmap-2016-11-07/src/pair.c gmap-2017-01-14/src/pair.c --- gmap-2016-11-07/src/pair.c 2016-11-08 00:58:17.000000000 +0000 +++ gmap-2017-01-14/src/pair.c 2016-12-30 14:36:45.000000000 +0000 @@ -1,4 +1,4 @@ -static char rcsid[] = "$Id: pair.c 200236 2016-11-08 00:58:17Z twu $"; +static char rcsid[] = "$Id: pair.c 202031 2016-12-29 16:20:14Z twu $"; #ifdef HAVE_CONFIG_H #include #endif @@ -149,8 +149,8 @@ static bool print_nsnpdiffs_p; static double genomelength; /* For BLAST E-value */ -static bool gff3_phase_swap_p = true; -static bool cigar_extended_p = true; +static bool gff3_phase_swap_p; +static bool cigar_extended_p; void @@ -1750,6 +1750,39 @@ } +#if 0 +bool +Pair_identical_p (List_T pairs1, List_T pairs2) { + List_T p, q; + T pair1, pair2; + + p = pairs1; + q = pairs2; + while (p && q) { + pair1 = (T) List_head(p); + pair2 = (T) List_head(q); + if (pair1->gapp != pair2->gapp) { + return false; + } else if (pair1->querypos != pair2->querypos) { + return false; + } else if (pair1->genomepos != pair2->genomepos) { + return false; + } else if (pair1->comp != pair2->comp) { + return false; + } + p = List_next(p); + q = List_next(q); + } + + if (p || q) { + return false; + } else { + return true; + } +} +#endif + + void Pair_check_list (List_T pairs) { T this; @@ -1823,10 +1856,45 @@ } +/* Modeled after Pair_convert_array_to_pairs */ +List_T +Pair_convert_array_to_pairs (List_T pairs, struct T *pairarray, int npairs, bool plusp, + Chrpos_T chrlength, Pairpool_T pairpool) { + T pair; + int i; + + if (plusp == true) { + for (i = 0; i < npairs; i++) { + pair = &(pairarray[i]); + if (pair->gapp) { + /* Skip */ + } else { + pairs = Pairpool_push(pairs,pairpool,pair->querypos /*+ queryseq_offset*/,pair->genomepos, + pair->cdna,pair->comp,pair->genome,pair->genomealt,/*dynprogindex*/0); + } + } + + } else { + for (i = 0; i < npairs; i++) { + pair = &(pairarray[i]); + if (pair->gapp) { + /* Skip */ + } else { + pairs = Pairpool_push(pairs,pairpool,pair->querypos /*+ queryseq_offset*/,chrlength - pair->genomepos, + pair->cdna,pair->comp,pair->genome,pair->genomealt,/*dynprogindex*/0); + } + } + } + + + return pairs; +} + + /* Called by output thread for --merge-overlap feature. Modeled after Substring_convert_to_pairs. */ List_T -Pair_convert_array_to_pairs (List_T pairs, struct T *pairarray, int npairs, bool plusp, int querylength, - int hardclip_low, int hardclip_high, int queryseq_offset) { +Pair_convert_array_to_pairs_out (List_T pairs, struct T *pairarray, int npairs, bool plusp, int querylength, + int hardclip_low, int hardclip_high, int queryseq_offset) { T pair; int querystart, queryend, i; diff -Nru gmap-2016-11-07/src/pair.h gmap-2017-01-14/src/pair.h --- gmap-2016-11-07/src/pair.h 2016-11-08 00:58:18.000000000 +0000 +++ gmap-2017-01-14/src/pair.h 2016-12-29 16:20:16.000000000 +0000 @@ -1,4 +1,4 @@ -/* $Id: pair.h 200236 2016-11-08 00:58:17Z twu $ */ +/* $Id: pair.h 202031 2016-12-29 16:20:14Z twu $ */ #ifndef PAIR_INCLUDED #define PAIR_INCLUDED @@ -124,14 +124,18 @@ extern int Pair_codon_changepos (struct T *pairs, int npairs, int aapos, int cdna_direction); - +extern bool +Pair_identical_p (List_T pairs1, List_T pairs2); extern void Pair_check_list (List_T pairs); extern bool Pair_check_array (struct T *pairs, int npairs); extern List_T -Pair_convert_array_to_pairs (List_T pairs, struct T *pairarray, int npairs, bool plusp, int querylength, - int hardclip_low, int hardclip_high, int queryseq_offset); +Pair_convert_array_to_pairs (List_T pairs, struct T *pairarray, int npairs, bool plusp, + Chrpos_T chrlength, Pairpool_T pairpool); +extern List_T +Pair_convert_array_to_pairs_out (List_T pairs, struct T *pairarray, int npairs, bool plusp, int querylength, + int hardclip_low, int hardclip_high, int queryseq_offset); extern void Pair_print_exonsummary (Filestring_T fp, struct T *pairs, int npairs, Chrnum_T chrnum, diff -Nru gmap-2016-11-07/src/samprint.c gmap-2017-01-14/src/samprint.c --- gmap-2016-11-07/src/samprint.c 2016-11-08 00:58:55.000000000 +0000 +++ gmap-2017-01-14/src/samprint.c 2017-01-13 23:29:59.000000000 +0000 @@ -1,4 +1,4 @@ -static char rcsid[] = "$Id: samprint.c 200237 2016-11-08 00:58:55Z twu $"; +static char rcsid[] = "$Id: samprint.c 202590 2017-01-13 23:29:58Z twu $"; #ifdef HAVE_CONFIG_H #include #endif @@ -2983,8 +2983,8 @@ /* 12. TAGS: XT */ if (print_xt_p == true) { FPRINTF(fp,"\tXT:Z:%c%c-%c%c,%.2f,%.2f",donor1,donor2,acceptor2,acceptor1,donor_prob,acceptor_prob); - FPRINTF(fp,",%c%s@%u..%c%s@%u",donor_strand,donor_chr,Substring_chr_splicecoord_D(donor), - acceptor_strand,acceptor_chr,Substring_chr_splicecoord_A(acceptor)); + FPRINTF(fp,",%c%s@%u..%c%s@%u",donor_strand,donor_chr,Substring_chr_splicecoord_D(donor,donor_strand), + acceptor_strand,acceptor_chr,Substring_chr_splicecoord_A(acceptor,acceptor_strand)); } /* 12. TAGS: XC */ @@ -3493,8 +3493,8 @@ /* 12. TAGS: XT */ if (print_xt_p == true) { FPRINTF(fp,"\tXT:Z:%c%c-%c%c,%.2f,%.2f",donor1,donor2,acceptor2,acceptor1,donor_prob,acceptor_prob); - FPRINTF(fp,",%c%s@%u..%c%s@%u",donor_strand,donor_chr,Substring_chr_splicecoord_D(donor), - acceptor_strand,acceptor_chr,Substring_chr_splicecoord_A(acceptor)); + FPRINTF(fp,",%c%s@%u..%c%s@%u",donor_strand,donor_chr,Substring_chr_splicecoord_D(donor,donor_strand), + acceptor_strand,acceptor_chr,Substring_chr_splicecoord_A(acceptor,acceptor_strand)); } @@ -4027,7 +4027,7 @@ /* No output */ return; - } else + } else { Filestring_set_split_output(fp,OUTPUT_NM); SAM_print_nomapping(fp,ABBREV_NOMAPPING_1,queryseq1,/*mate*/(Stage3end_T) NULL, acc1,acc2,chromosome_iit,resulttype, @@ -4045,6 +4045,7 @@ if (fp_failedinput_1 != NULL) { Shortread_print_query_pairedend(fp_failedinput_1,fp_failedinput_2,queryseq1,queryseq2); } + } } else { if (failsonlyp == true) { diff -Nru gmap-2016-11-07/src/sarray-read.c gmap-2017-01-14/src/sarray-read.c --- gmap-2016-11-07/src/sarray-read.c 2016-09-15 23:19:20.000000000 +0000 +++ gmap-2017-01-14/src/sarray-read.c 2016-12-29 16:20:16.000000000 +0000 @@ -1,4 +1,4 @@ -static char rcsid[] = "$Id: sarray-read.c 197551 2016-09-08 01:16:14Z twu $"; +static char rcsid[] = "$Id: sarray-read.c 202031 2016-12-29 16:20:14Z twu $"; #ifdef HAVE_CONFIG_H #include #endif @@ -47,6 +47,9 @@ #include "stage3hr.h" #include "sedgesort.h" +/* Sedgesort giving errors on Intel compiler */ +#define USE_QSORT 1 + #if defined(WORDS_BIGENDIAN) || !defined(HAVE_SSE2) #else @@ -60,6 +63,11 @@ #else #include #endif +#if defined(WORDS_BIGENDIAN) || !defined(HAVE_AVX512) +#else +#include +#endif + #if !defined(HAVE_SSE4_2) /* Skip popcnt */ @@ -73,6 +81,7 @@ #define MIN_ENDLENGTH 12 #define MIN_INTRONLEN 9 +/* Some limit is needed to prevent GSNAP from running very slowly */ #define MAX_HITS_FOR_BEST_ELT 1000 /* A value of 10000 misses various splices, although they are caught by GSNAP algorithm */ @@ -229,6 +238,29 @@ return; } +#ifdef HAVE_AVX512 +static void +print_vector_hex_512 (__m512i x) { + UINT4 *s = (UINT4 *) &x; + + /* printf("%d %d %d %d\n",s[0],s[1],s[2],s[3]); */ + printf("%08X %08X %08X %08X %08X %08X %08X %08X %08X %08X %08X %08X %08X %08X %08X %08X\n", + s[15],s[14],s[13],s[12],s[11],s[10],s[9],s[8],s[7],s[6],s[5],s[4],s[3],s[2],s[1],s[0]); + return; +} + +static void +print_vector_uint_512 (__m512i x) { + UINT4 *s = (UINT4 *) &x; + + /* printf("%d %d %d %d\n",s[0],s[1],s[2],s[3]); */ + printf("%u %u %u %u %u %u %u %u %u %u %u %u %u %u %u %u\n", + s[15],s[14],s[13],s[12],s[11],s[10],s[9],s[8],s[7],s[6],s[5],s[4],s[3],s[2],s[1],s[0]); + return; +} +#endif + + #ifdef HAVE_AVX2 static void print_vector_hex_256 (__m256i x) { @@ -2143,12 +2175,27 @@ if (this->nmatches == 0 || this->npositions > EXCESS_SARRAY_HITS) { this->positions_allocated = this->positions = (Univcoord_T *) NULL; this->npositions_allocated = this->npositions = 0; + } else { #ifdef USE_QSORT - this->positions_allocated = this->positions = (Univcoord_T *) MALLOC(this->npositions * sizeof(Univcoord_T)); + if (this->npositions == 0) { + this->positions_allocated = this->positions = (Univcoord_T *) NULL; + this->npositions = 0; + } else { + this->positions_allocated = this->positions = (Univcoord_T *) MALLOC(this->npositions * sizeof(Univcoord_T)); + i = 0; + ptr = this->initptr; + while (ptr <= this->finalptr) { + if ((pos = csa_lookup(sarray,ptr++)) >= (Univcoord_T) this->querystart) { + this->positions[i++] = pos - this->querystart; + } + } + this->npositions = i; + qsort(this->positions,this->npositions,sizeof(Univcoord_T),Univcoord_compare); + } + #else this->positions_allocated = this->positions = (Univcoord_T *) MALLOC((this->npositions + 1) * sizeof(Univcoord_T)); -#endif i = 0; ptr = this->initptr; while (ptr <= this->finalptr) { @@ -2157,11 +2204,9 @@ } } this->npositions = i; -#ifdef USE_QSORT - qsort(this->positions,this->npositions,sizeof(Univcoord_T),Univcoord_compare); -#else Sedgesort_uint4(this->positions,this->npositions); #endif + } } @@ -2301,7 +2346,215 @@ #ifdef HAVE_ALLOCA -#if defined(HAVE_AVX2) && !defined(WORDS_BIGENDIAN) +#if defined(HAVE_AVX512) && !defined(WORDS_BIGENDIAN) + +/* AVX512 version is much simpler because it generates a mask directly + and it has compare operations for epu32 */ + +static void +fill_positions_filtered_first (Elt_T this, T sarray, Univcoord_T low, Univcoord_T high) { + Univcoord_T low_adj, high_adj; + Univcoord_T *array = sarray->array, value0; + Sarrayptr_T *array_stop, *array_end, *array_ptr; + Univcoord_T *positions_temp; + Univcoord_T *out; + __m512i adjusted; + __m512i floor, ceiling, values, adj; + __mmask16 mask; +#if defined(REQUIRE_ALIGNMENT) + int n_prealign, k; +#endif +#if defined(DEBUG) || defined(DEBUG7) + int nmatches; +#endif +#ifdef DEBUG7 + UINT8 pointer; + int i; +#endif +#ifdef DEBUG8 + Univcoord_T *positions_std; + int npositions_std; +#endif + + + debug(printf("Entered fill_positions_filtered_first with low %u and high %u, initptr %u and finalptr %u (n = %d), nmatches %d\n", + low,high,this->initptr,this->finalptr,this->finalptr - this->initptr + 1,this->nmatches)); + debug7(printf("Entered fill_positions_filtered_first with low %u and high %u, initptr %u and finalptr %u (n = %d), nmatches %d\n", + low,high,this->initptr,this->finalptr,this->finalptr - this->initptr + 1,this->nmatches)); + + if (this->positions_allocated != NULL) { + /* Filled from a previous call */ + FREE(this->positions_allocated); + } + + if ((this->n_all_positions = this->finalptr - this->initptr + 1) == 0 /*|| this->n_all_positions > EXCESS_SARRAY_HITS*/) { + this->all_positions = (Univcoord_T *) NULL; + + } else { + /* Function surrounded by HAVE_ALLOCA */ +#ifdef USE_QSORT + positions_temp = out = (Univcoord_T *) MALLOCA((this->finalptr - this->initptr + 1) * sizeof(Univcoord_T)); +#else + positions_temp = out = (Univcoord_T *) MALLOCA((this->finalptr - this->initptr + 1 + 1) * sizeof(Univcoord_T)); +#endif + + low_adj = low + this->querystart; + high_adj = high + this->querystart; + + floor = _mm512_set1_epi32(low_adj - 1); + ceiling = _mm512_set1_epi32(high_adj + 1); + adj = _mm512_set1_epi32(this->querystart); + + this->npositions_allocated = this->npositions = 0; +#if defined(REQUIRE_ALIGNMENT) + array_ptr = &(array[this->initptr]); + + /* Initial part */ + n_prealign = ((64 - ((UINT8) array_ptr & 0x3F))/8) & 0xF; + debug7(printf("Initial ptr is at location %p. Need %d to get to 512-bit boundary\n",pointer,n_prealign)); + + debug7(printf("Initial part:\n")); + if (n_prealign > this->finalptr - this->initptr + 1) { + n_prealign = this->finalptr - this->initptr + 1; + } + for (k = 0; k < n_prealign; k++) { + debug7a(printf("Looking at value %u, relative to low %u and high %u\n",CONVERT(array[ptr]),low_adj,high_adj)); + if ((value0 = *array_ptr++) >= low_adj && value0 <= high_adj) { + *out++ = value0 - this->querystart; + } + } +#else + array_ptr = &(array[this->initptr]); +#endif /* REQUIRE_ALIGNMENT */ + + + /* Aligned part */ + if (this->finalptr < 16) { + array_stop = &(array[0]); + } else { + array_stop = &(array[this->finalptr - 16]); + } + array_end = &(array[this->finalptr]); + + while (array_ptr <= array_stop) { + +#if defined(REQUIRE_ALIGNMENT) + /* Use stream_load to avoid polluting the cache with suffix array entries */ + values = _mm512_stream_load_si512((__m512i *) array_ptr); +#else + /* It looks like loadu is just as fast as load */ + values = _mm512_loadu_si512((__m512i *) array_ptr); +#endif + debug7b(print_vector_uint_512(values)); + + /* mask = _mm512_andnot_si512(_mm512_cmpgt_epu32_mask(floor,values),_mm512_cmpgt_epu32_mask(ceiling,values)); -- This is off by 1 at floor */ + mask = _mm512_cmpgt_epu32_mask(values,floor) & _mm512_cmpgt_epu32_mask(ceiling,values); + + /* Example: 0xCCCC (16 bits) */ + debug7b(printf("%08X\n",mask)); + + /* Is it faster to skip check of mask? */ + /* if (mask) { */ + adjusted = _mm512_sub_epi32(values,adj); + _mm512_mask_compressstoreu_epi32((void *) out,mask,adjusted); + +#ifdef HAVE_POPCNT + out += _popcnt32(mask); + debug7b(printf("mask: %08X (%d ones)\n",mask,_popcnt32(mask))); +#elif defined HAVE_MM_POPCNT + out += _mm_popcnt_u32(mask); + debug7b(printf("mask: %08X (%d ones)\n",mask,_mm_popcnt_u32(mask))); +#else + out += __builtin_popcount(mask); + debug7b(printf("mask: %08X (%d ones)\n",mask,__builtin_popcount(mask))); +#endif + /* } */ + + array_ptr += 16; + } + + /* Partial block at end */ + debug7(printf("\nFinal part:\n")); +#if 0 + /* Scalar */ + while (array_ptr <= array_end) { + if ((value0 = *array_ptr++) >= low_adj && value0 <= high_adj) { + *out++ = value0 - this->querystart; + } + } +#else + /* Vector */ + mask = ~(0xFFFF << (array_end - array_ptr + 1)); + values = _mm512_mask_loadu_epi32(values,mask,(__m512i *) array_ptr); + mask &= _mm512_cmpgt_epu32_mask(values,floor) & _mm512_cmpgt_epu32_mask(ceiling,values); + + adjusted = _mm512_sub_epi32(values,adj); + _mm512_mask_compressstoreu_epi32((void *) out,mask,adjusted); + +#ifdef HAVE_POPCNT + out += _popcnt32(mask); + debug7b(printf("mask: %08X (%d ones)\n",mask,_popcnt32(mask))); +#elif defined HAVE_MM_POPCNT + out += _mm_popcnt_u32(mask); + debug7b(printf("mask: %08X (%d ones)\n",mask,_mm_popcnt_u32(mask))); +#else + out += __builtin_popcount(mask); + debug7b(printf("mask: %08X (%d ones)\n",mask,__builtin_popcount(mask))); +#endif +#endif + + this->npositions_allocated = this->npositions = out - positions_temp; + debug7(printf("SIMD method found %d positions\n",this->npositions)); + + /* Copy the positions into heap from temp in stack */ + if (this->npositions == 0) { + this->positions_allocated = this->positions = (Univcoord_T *) NULL; + } else { + debug7(printf("Sorting %d positions\n",this->npositions)); +#ifdef USE_QSORT + qsort(positions_temp,this->npositions,sizeof(Univcoord_T),Univcoord_compare); +#else + Sedgesort_uint4(positions_temp,this->npositions); +#endif + + /* Need to copy positions before the goal */ +#ifdef USE_QSORT + this->positions_allocated = this->positions = MALLOC(this->npositions * sizeof(Univcoord_T)); +#else + this->positions_allocated = this->positions = MALLOC((this->npositions + 1) * sizeof(Univcoord_T)); +#endif + memcpy(this->positions,positions_temp,this->npositions * sizeof(Univcoord_T)); +#ifdef DEBUG7 + for (i = 0; i < this->npositions; i++) { + printf("%u\n",this->positions[i]); + } +#endif + +#if 0 + /* Not sure why we were doing this. We will find collinear set of diagonals later. */ + /* Advance pointer to goal (note: do not want goal_adj, since we have already subtracted this->querystart) */ + /* Have tested positions[i] <= goal, but want positions[-1] to be < goal, or positions[0] >= goal */ + /* ? Replace with a binary search */ + i = 0; + while (i < this->npositions && positions_temp[i] < goal) { + debug7(printf("1 Skipping position %u (%u) < goal %u (%u)\n", + positions_temp[i],positions_temp[i] - chroffset,goal,goal - chroffset)); + i++; + } + this->positions += i; + this->npositions -= i; + debug7(printf("Remaining: %d positions\n",this->npositions)); +#endif + } + + /* Function surrounded by HAVE_ALLOCA */ + FREEA(positions_temp); + } + + return; +} + +#elif defined(HAVE_AVX2) && !defined(WORDS_BIGENDIAN) /* Using pext method, because _mm256_shuffle_epi32 doesn't work well because it works only within lanes, and MASTER_CONTROL does not extend @@ -2395,7 +2648,7 @@ } array_end = &(array[this->finalptr]); - while (array_ptr < array_stop) { + while (array_ptr <= array_stop) { #if defined(REQUIRE_ALIGNMENT) /* Use stream_load to avoid polluting the cache with suffix array entries */ @@ -2651,7 +2904,7 @@ 0x00, 0x80, 0xC0, 0xBC, 0x00, 0x00, 0x00, 0xC0); #endif - while (array_ptr < array_stop) { + while (array_ptr <= array_stop) { #if defined(REQUIRE_ALIGNMENT) #ifdef HAVE_SSE4_1 @@ -8515,7 +8768,7 @@ querylength - minus_querypos,/*queryoffset*/minus_querypos, query_compress_rev,minus_sarray,/*plusp*/false,genestrand,minus_conversion); elt = Elt_new(minus_querypos,nmatches,initptr,finalptr,/*temporaryp*/false); - if (nmatches > best_minus_nmatches && elt->nptr < MAX_HITS_FOR_BEST_ELT) { + if (nmatches > best_minus_nmatches && elt->nptr <= MAX_HITS_FOR_BEST_ELT) { best_minus_elt = elt; best_minus_nmatches = nmatches; best_minus_i = niter; diff -Nru gmap-2016-11-07/src/smooth.c gmap-2017-01-14/src/smooth.c --- gmap-2016-11-07/src/smooth.c 2016-09-20 20:19:43.000000000 +0000 +++ gmap-2017-01-14/src/smooth.c 2016-12-29 16:20:17.000000000 +0000 @@ -1,4 +1,4 @@ -static char rcsid[] = "$Id: smooth.c 184474 2016-02-18 00:12:53Z twu $"; +static char rcsid[] = "$Id: smooth.c 202031 2016-12-29 16:20:14Z twu $"; #ifdef HAVE_CONFIG_H #include #endif @@ -550,10 +550,12 @@ struct Smoothcell_T { int exoni; - double pvalue; + /* double pvalue; */ + int exonmatches; int exonstatus; }; +#if 0 static int Smoothcell_cmp (const void *x, const void *y) { struct Smoothcell_T a = * (struct Smoothcell_T *) x; @@ -567,6 +569,22 @@ return 0; } } +#else +static int +Smoothcell_cmp (const void *x, const void *y) { + struct Smoothcell_T a = * (struct Smoothcell_T *) x; + struct Smoothcell_T b = * (struct Smoothcell_T *) y; + + if (a.exonmatches < b.exonmatches) { + return -1; + } else if (b.exonmatches < a.exonmatches) { + return +1; + } else { + return 0; + } +} +#endif + static void @@ -590,7 +608,7 @@ for (i = 0; i < nexons; i++) { exonstatus[i] = KEEP; cells[i].exoni = i; - cells[i].pvalue = 1.0; + cells[i].exonmatches = exonmatches[i]; cells[i].exonstatus = KEEP; } @@ -603,7 +621,8 @@ intron_matches_right[i],intron_denominator_right[i],total_matches,total_denominator); debug(printf("For exon %d, left intron bad %d, right intron bad %d\n",i,intron1_bad_p,intron2_bad_p)); - if (intron1_bad_p == true && intron2_bad_p == true) { + if (intron1_bad_p == true || intron2_bad_p == true) { +#if 0 numerator0 = exonmatches[i]; denominator0 = exon_denominator[i]; theta0 = (double) (total_matches - numerator0 + 1)/(double) (total_denominator - denominator0 + 1); @@ -613,6 +632,11 @@ cells[i].pvalue = pvalue; cells[i].exonstatus = DELETE; } +#else + if (exonmatches[i] < 15) { + cells[i].exonstatus = DELETE; + } +#endif } else { /* Do nothing */ @@ -623,8 +647,9 @@ qsort(cells,nexons,sizeof(struct Smoothcell_T),Smoothcell_cmp); i = 0; - while (i < nexons && cells[i].pvalue < STRICT_EXON_PVALUE) { + while (i < nexons && cells[i].exonmatches < 15) { if (cells[i].exonstatus == DELETE) { + debug(printf(" Will delete exon %d\n",i)); *deletep = true; exonstatus[cells[i].exoni] = DELETE; exonstatus[cells[i].exoni - 1] = KEEP; /* Prevent consecutive deletes */ diff -Nru gmap-2016-11-07/src/spanningelt.c gmap-2017-01-14/src/spanningelt.c --- gmap-2016-11-07/src/spanningelt.c 2016-02-10 01:52:03.000000000 +0000 +++ gmap-2017-01-14/src/spanningelt.c 2017-01-13 23:31:34.000000000 +0000 @@ -1,4 +1,4 @@ -static char rcsid[] = "$Id: spanningelt.c 184022 2016-02-10 01:52:03Z twu $"; +static char rcsid[] = "$Id: spanningelt.c 202593 2017-01-13 23:31:33Z twu $"; #ifdef HAVE_CONFIG_H #include #endif @@ -6,9 +6,11 @@ #include "spanningelt.h" #include #include /* For qsort */ +#include "assert.h" #include "mem.h" #include "indexdbdef.h" + #ifdef WORDS_BIGENDIAN #include "bigendian.h" #endif @@ -83,7 +85,7 @@ Spanningelt_gc (T old) { if (old->intersection_diagonals_reset != NULL) { - FREE(old->intersection_diagonals_reset); + FREE_ALIGN(old->intersection_diagonals_reset); } if (old->compoundpos != NULL) { Compoundpos_free(&(old->compoundpos)); @@ -687,7 +689,8 @@ #else Univcoord_T *positions0, *positions1; #endif - int npositions0, npositions1, delta, j, diagterm; + int npositions0, npositions1, delta, j, diagterm, i; + if (npositionsa < npositionsb) { #ifdef LARGE_GENOMES @@ -727,8 +730,10 @@ debug(printf("intersection is null\n")); return (Univcoord_T *) NULL; } else { - /* Allocate maximum possible size */ - diagonals = (Univcoord_T *) CALLOC(npositions0,sizeof(Univcoord_T)); + /* Note: This has to be on a SIMD boundary (16-byte for SSE2, 32-byte for AVX2, 64-byte for AVX512) for Merge_uint4 to work */ + diagonals = (Univcoord_T *) MALLOC_ALIGN(npositions0 * sizeof(Univcoord_T)); + /* Previously, allocated maximum possible size */ + /* diagonals = (Univcoord_T *) CALLOC(npositions0,sizeof(Univcoord_T)); */ } while (npositions0 > 0) { @@ -790,6 +795,7 @@ #ifdef LARGE_GENOMES if (npositions1 <= 0) { return diagonals; + } else if ((((Univcoord_T) *positions1_high) << 32) + (*positions1_low) == local_goal) { /* Found local goal. Save and advance */ debug(printf(" intersection list 1: %d:%u found\n", @@ -806,6 +812,7 @@ #elif defined(WORDS_BIGENDIAN) if (npositions1 <= 0) { return diagonals; + } else if (Bigendian_convert_univcoord(*positions1) == local_goal) { /* Found local goal. Save and advance */ debug(printf(" intersection list 1: %d:%u found\n", @@ -821,6 +828,7 @@ #else if (npositions1 <= 0) { return diagonals; + } else if ((*positions1) == local_goal) { /* Found local goal. Save and advance */ debug(printf(" intersection list 1: %d:%u found\n",npositions1,*positions1)); @@ -857,7 +865,7 @@ #endif int diagterm0, int npositions0, Compoundpos_T compoundpos, int diagterm1) { Univcoord_T *diagonals, local_goal, last_local_goal; - int delta; + int delta, i; bool emptyp; delta = diagterm0 - diagterm1; /* list0 + (diagterm0 - diagterm1) = list1 */ @@ -866,8 +874,9 @@ if (npositions0 == 0) { return (Univcoord_T *) NULL; } else { - /* Could add up compoundpos->npositions to see if we could allocate less memory */ - diagonals = (Univcoord_T *) CALLOC(npositions0,sizeof(Univcoord_T)); + /* Could add up compoundpos->npositions to see if we could allocate less memory */ + /* Note: This has to be on a SIMD boundary (16-byte for SSE2, 32-byte for AVX2, 64-byte for AVX512) for Merge_uint4 to work */ + diagonals = (Univcoord_T *) MALLOC_ALIGN(npositions0 * sizeof(Univcoord_T)); } last_local_goal = 0U; @@ -951,6 +960,8 @@ /* Previously computed a result */ *ndiagonals = this->intersection_ndiagonals; check(check_diagonals(this->intersection_diagonals,this->intersection_ndiagonals)); + debug(printf("Returning previous result\n")); + CHECK_ALIGN(this->intersection_diagonals); return this->intersection_diagonals; } else if (this->partnerp == false) { @@ -961,7 +972,8 @@ this->intersection_diagonals = (Univcoord_T *) NULL; *ndiagonals = this->intersection_ndiagonals = 0; } else { - q = this->intersection_diagonals = (Univcoord_T *) MALLOC(this->npositions * sizeof(Univcoord_T)); + /* Note: This has to be on a SIMD boundary (16-byte for SSE2, 32-byte for AVX2, 64-byte for AVX512) for Merge_uint4 to work */ + q = this->intersection_diagonals = (Univcoord_T *) MALLOC_ALIGN(this->npositions * sizeof(Univcoord_T)); #ifdef LARGE_GENOMES p_high = this->positions_high; p_low = this->positions_low; @@ -1003,6 +1015,7 @@ debug(printf("Returning %p (%d diagonals)\n",this->intersection_diagonals,this->intersection_ndiagonals)); check(check_diagonals(this->intersection_diagonals,this->intersection_ndiagonals)); + CHECK_ALIGN(this->intersection_diagonals); return this->intersection_diagonals; } else { @@ -1016,6 +1029,7 @@ this->intersection_ndiagonals_reset = this->intersection_ndiagonals; check(check_diagonals(this->intersection_diagonals,this->intersection_ndiagonals)); + CHECK_ALIGN(this->intersection_diagonals); return this->intersection_diagonals; } @@ -1040,6 +1054,7 @@ this->intersection_ndiagonals_reset = this->intersection_ndiagonals; check(check_diagonals(this->intersection_diagonals,this->intersection_ndiagonals)); + CHECK_ALIGN(this->intersection_diagonals); return this->intersection_diagonals; } else { @@ -1060,6 +1075,7 @@ this->intersection_ndiagonals_reset = this->intersection_ndiagonals; check(check_diagonals(this->intersection_diagonals,this->intersection_ndiagonals)); + CHECK_ALIGN(this->intersection_diagonals); return this->intersection_diagonals; } } diff -Nru gmap-2016-11-07/src/spanningelt.h gmap-2017-01-14/src/spanningelt.h --- gmap-2016-11-07/src/spanningelt.h 2015-12-07 18:29:45.000000000 +0000 +++ gmap-2017-01-14/src/spanningelt.h 2016-12-16 16:49:33.000000000 +0000 @@ -1,4 +1,4 @@ -/* $Id: spanningelt.h 180341 2015-12-07 18:29:40Z twu $ */ +/* $Id: spanningelt.h 201744 2016-12-16 16:49:32Z twu $ */ #ifndef SPANNINGELT_INCLUDED #define SPANNINGELT_INCLUDED @@ -52,7 +52,7 @@ int miss_querypos5; /* If partnerp is true, this is the overlap of the two partners */ int miss_querypos3; - /* Reset values */ + /* Reset values. Needed because stage1hr procedures modify the pointer and number fields */ Univcoord_T *intersection_diagonals_reset; int intersection_ndiagonals_reset; #ifdef LARGE_GENOMES diff -Nru gmap-2016-11-07/src/stage1hr.c gmap-2017-01-14/src/stage1hr.c --- gmap-2016-11-07/src/stage1hr.c 2016-10-24 23:55:08.000000000 +0000 +++ gmap-2017-01-14/src/stage1hr.c 2017-01-13 23:32:33.000000000 +0000 @@ -1,4 +1,4 @@ -static char rcsid[] = "$Id: stage1hr.c 199517 2016-10-24 23:55:08Z twu $"; +static char rcsid[] = "$Id: stage1hr.c 202594 2017-01-13 23:32:32Z twu $"; #ifdef HAVE_CONFIG_H #include #endif @@ -45,6 +45,7 @@ #endif #include "spanningelt.h" +#include "merge.h" #include "cmet.h" #include "atoi.h" @@ -65,7 +66,20 @@ #endif #endif + +/* Three methods for performing a multiway merge. Need to define one below. */ +#if defined(LARGE_GENOMES) || !defined(HAVE_SSE4_1) #define USE_HEAP 1 +/* #define USE_LOSER_TREES 1 */ +#else +#define USE_MERGE 1 +#endif + + +#ifdef USE_HEAP +#include "merge-heap.h" +#endif + #define SPEED 1 @@ -129,6 +143,9 @@ static bool distances_observed_p; static Chrpos_T min_intronlength; +static Chrpos_T expected_pairlength; +static Chrpos_T pairlength_deviation; + /* Splicing */ static Univcoord_T *splicesites; @@ -144,7 +161,6 @@ static bool gmap_segments_p; /* previously called gmap_terminal_p. Should move earlier (1). */ static bool gmap_pairsearch_p; /* controls halfmapping. Should move later (2). */ static bool gmap_improvement_p; /* Should be at end (3). */ -static bool gmap_indel_knownsplice_p; static bool gmap_rerun_p = true; static int antistranded_penalty; @@ -1679,7 +1695,7 @@ typedef struct Batch_T *Batch_T; struct Batch_T { -#ifndef USE_HEAP +#ifdef USE_LOSER_TREES int nodei; /* Node in loser tree. Also used for debugging */ #endif int querypos; @@ -1706,12 +1722,12 @@ Univcoord_T *positions, #endif int npositions, int querylength -#ifndef USE_HEAP +#ifdef USE_LOSER_TREES , int nodei #endif ) { -#ifndef USE_HEAP +#ifdef USE_LOSER_TREES batch->nodei = nodei; #endif batch->querypos = querypos; @@ -1760,23 +1776,24 @@ } +#if defined(USE_HEAP) || defined(USE_LOSER_TREES) static void Batch_init_simple (Batch_T batch, Univcoord_T *diagonals, int ndiagonals, int querylength, -#ifdef USE_HEAP +#ifndef USE_LOSER_TREES int querypos #else int nodei #endif ) { -#ifdef USE_HEAP +#ifndef USE_LOSER_TREES batch->querypos = querypos; #else batch->nodei = nodei; #endif batch->positions = diagonals; batch->npositions = ndiagonals; -#ifdef USE_HEAP +#ifndef USE_LOSER_TREES batch->diagonal = *diagonals; /* Already in correct endianness */ #else if (batch->npositions == 0) { @@ -1798,6 +1815,7 @@ return; } +#endif static void @@ -1833,6 +1851,7 @@ } +#if defined(USE_HEAP) || defined(USE_LOSER_TREES) static void min_heap_insert_simple (Batch_T *heap, int *heapsize, Batch_T batch) { int i; @@ -1848,7 +1867,7 @@ return; } - +#endif /* Note FORMULA: formulas for querypos <-> diagonal (diagterm in call to Indexdb_read) are: @@ -2146,7 +2165,8 @@ if (elt->miss_querypos3 > miss_querypos3) miss_querypos3 = elt->miss_querypos3; /* continue; -- naturally falls to end of loop */ } - } else if (*elt->intersection_diagonals > local_goal) { + + } else if (*elt->intersection_diagonals > goal) { /* was local_goal */ /* Already advanced past goal, so continue with one more miss seen. */ debug7(printf(" one miss --")); if (++nmisses_seen > nmisses_allowed) { @@ -2158,6 +2178,7 @@ if (elt->miss_querypos3 > miss_querypos3) miss_querypos3 = elt->miss_querypos3; /* continue; -- naturally falls to end of loop */ } + } else { /* Found goal. Advance past goal and continue with loop. */ debug7(printf(" advancing\n")); @@ -3316,7 +3337,231 @@ } -#ifdef USE_HEAP +#ifdef USE_MERGE +static List_T +find_spanning_multimiss_matches (int *found_score, int *nhits, List_T hits, T this, int genestrand, + int nrequired, int querylength, Compress_T query_compress_fwd, Compress_T query_compress_rev, + int nmisses_allowed) { + Univcoord_T *diagonals, *all_diagonals_merged, *all_diagonals, diagonal, new_diagonal; + Spanningelt_T *array; + List_T prev; + int nunion = nmisses_allowed + nrequired, nelts, elti, nstreams; + int count, mod, i; + int ndiagonals, nempty, n_all_diagonals; + int global_miss_querypos5, global_miss_querypos3; + int elt_miss_querypos5, elt_miss_querypos3; + List_T stream_list; + Intlist_T streamsize_list; + Univcoord_T chroffset, chrhigh; + Chrpos_T chrlength; + Chrnum_T chrnum; + + debug(printf("Starting find_spanning_multimiss_matches with %d misses allowed\n",nmisses_allowed)); + + + /* Plus */ + for (mod = 0; mod < index1interval; mod++) { + array = this->plus_spanningset[mod]; + nelts = this->plus_spanningset_nelts[mod]; + debug(printf("Multimiss plus mod %d, nelts %d\n",mod,nelts)); + + qsort(array,nelts,sizeof(Spanningelt_T),Spanningelt_candidates_cmp); + if (nelts > nunion) { + qsort(&(array[nunion]),nelts-nunion,sizeof(Spanningelt_T),Spanningelt_pruning_cmp); + } + for (elti = 0; elti < nelts; elti++) { + Spanningelt_reset(array[elti]); + } + + debug(printf("*** find_spanning_multimiss_matches, %d misses allowed, plus mod %d\n",nmisses_allowed,mod)); + debug(Spanningelt_print_array(array,nelts)); + + /* Put first few pointers into heap */ + global_miss_querypos5 = querylength; + global_miss_querypos3 = 0; + stream_list = (List_T) NULL; + streamsize_list = (Intlist_T) NULL; + nstreams = 0; + for (elti = 0; elti < nelts && elti < nunion; elti++) { + /* Get list as a special one, and perform conversion if necessary */ + diagonals = Spanningelt_diagonals(&ndiagonals,(Spanningelt_T) array[elti],&elt_miss_querypos5,&elt_miss_querypos3); + /* Note: diagonals has to be on a SIMD boundary (16-byte for SSE2, 32-byte for AVX2, 64-byte for AVX512) for Merge_uint4 to work */ + if (elt_miss_querypos5 < global_miss_querypos5) global_miss_querypos5 = elt_miss_querypos5; + if (elt_miss_querypos3 > global_miss_querypos3) global_miss_querypos3 = elt_miss_querypos3; + + debug(printf("Adding batch %d of size %d...",elti,ndiagonals)); + if (ndiagonals > 0) { + stream_list = List_push(stream_list,(void *) diagonals); + streamsize_list = Intlist_push(streamsize_list,ndiagonals); + nstreams++; + } + debug(printf("\n")); + } + + all_diagonals_merged = Merge_diagonals(&n_all_diagonals,stream_list,streamsize_list); + + /* Skip diagonals at beginning of genome */ + all_diagonals = all_diagonals_merged; + while (n_all_diagonals > 0 && *all_diagonals < (unsigned int) querylength) { + debug11(printf("Eliminating diagonal %llu as straddling beginning of genome (Batch_init)\n", + (unsigned long long) *all_diagonals)); + all_diagonals++; + n_all_diagonals--; + } + + /* Process sorted diagonals */ + if (n_all_diagonals > 0) { + prev = (struct List_T *) MALLOCA((nelts - elti + 1) * sizeof(struct List_T)); + List_fill_array_with_handle(prev,(void *) &(array[elti]),nelts - elti); + + nempty = 0; + chrhigh = 0U; + + debug7(printf("*** multimiss mod %d plus:\n",mod)); + diagonal = all_diagonals[0]; + count = 1; + debug7(printf("at ??, initial diagonal is %llu\n",(unsigned long long) diagonal)); + + i = 1; + while (i < n_all_diagonals && *nhits <= maxpaths_search) { + if ((new_diagonal = all_diagonals[i++]) == diagonal) { + count++; + debug7(printf("at ??, incrementing diagonal %llu to count %d\n",(unsigned long long) diagonal,count)); + } else { + /* End of diagonal */ + if (count >= nrequired) { + hits = identify_multimiss_iter(&(*found_score),&chrnum,&chroffset,&chrhigh,&chrlength,&(*nhits),hits,diagonal, + prev,&nempty,&global_miss_querypos5,&global_miss_querypos3, + querylength,/*query_compress*/query_compress_fwd, + /*plusp*/true,genestrand,nmisses_allowed, + /*nmisses_seen*/nunion-count+nempty,global_miss_querypos5,global_miss_querypos3); + } + diagonal = new_diagonal; + count = 1; + debug7(printf("at ??, next diagonal is %llu\n",(unsigned long long) diagonal)); + } + } + + /* Terminate loop */ + if (count >= nrequired && *nhits <= maxpaths_search) { + hits = identify_multimiss_iter(&(*found_score),&chrnum,&chroffset,&chrhigh,&chrlength,&(*nhits),hits,diagonal, + prev,&nempty,&global_miss_querypos5,&global_miss_querypos3, + querylength,/*query_compress*/query_compress_fwd, + /*plusp*/true,genestrand,nmisses_allowed, + /*nmisses_seen*/nunion-count+nempty,global_miss_querypos5,global_miss_querypos3); + } + + FREEA(prev); + } + + if (nstreams > 1) { + FREE_ALIGN(all_diagonals_merged); + } + } + + /* Minus */ + for (mod = 0; mod < index1interval; mod++) { + array = this->minus_spanningset[mod]; + nelts = this->minus_spanningset_nelts[mod]; + debug(printf("Multimiss minus mod %d, nelts %d\n",mod,nelts)); + + qsort(array,nelts,sizeof(Spanningelt_T),Spanningelt_candidates_cmp); + if (nelts > nunion) { + qsort(&(array[nunion]),nelts-nunion,sizeof(Spanningelt_T),Spanningelt_pruning_cmp); + } + for (elti = 0; elti < nelts; elti++) { + Spanningelt_reset(array[elti]); + } + + debug(printf("*** find_spanning_multimiss_matches, %d misses_allowed, minus mod %d\n",nmisses_allowed,mod)); + debug(Spanningelt_print_array(array,nelts)); + + /* Put first few pointers into heap */ + global_miss_querypos5 = querylength; + global_miss_querypos3 = 0; + stream_list = (List_T) NULL; + streamsize_list = (Intlist_T) NULL; + nstreams = 0; + for (elti = 0; elti < nelts && elti < nunion; elti++) { + /* Get list as a special one, and perform conversion if necessary */ + diagonals = Spanningelt_diagonals(&ndiagonals,(Spanningelt_T) array[elti],&elt_miss_querypos5,&elt_miss_querypos3); + if (elt_miss_querypos5 < global_miss_querypos5) global_miss_querypos5 = elt_miss_querypos5; + if (elt_miss_querypos3 > global_miss_querypos3) global_miss_querypos3 = elt_miss_querypos3; + + debug(printf("Adding batch %d of size %d...",elti,ndiagonals)); + if (ndiagonals > 0) { + stream_list = List_push(stream_list,(void *) diagonals); + streamsize_list = Intlist_push(streamsize_list,ndiagonals); + nstreams++; + } + debug(printf("\n")); + } + + all_diagonals_merged = Merge_diagonals(&n_all_diagonals,stream_list,streamsize_list); + + /* Skip diagonals at beginning of genome */ + all_diagonals = all_diagonals_merged; + while (n_all_diagonals > 0 && *all_diagonals < (unsigned int) querylength) { + debug11(printf("Eliminating diagonal %llu as straddling beginning of genome (Batch_init)\n", + (unsigned long long) *all_diagonals)); + all_diagonals++; + n_all_diagonals--; + } + + /* Process sorted diagonals */ + if (n_all_diagonals > 0) { + prev = (struct List_T *) MALLOCA((nelts - elti + 1) * sizeof(struct List_T)); + List_fill_array_with_handle(prev,(void *) &(array[elti]),nelts - elti); + + nempty = 0; + chrhigh = 0U; + + debug7(printf("*** multimiss mod %d minus:\n",mod)); + diagonal = all_diagonals[0]; + count = 1; + debug7(printf("at ??, initial diagonal is %llu\n",(unsigned long long) diagonal)); + + i = 1; + while (i < n_all_diagonals && *nhits <= maxpaths_search) { + if ((new_diagonal = all_diagonals[i++]) == diagonal) { + count++; + debug7(printf("at ??, incrementing diagonal %llu to count %d\n",(unsigned long long) diagonal,count)); + } else { + /* End of diagonal */ + if (count >= nrequired) { + hits = identify_multimiss_iter(&(*found_score),&chrnum,&chroffset,&chrhigh,&chrlength,&(*nhits),hits,diagonal, + prev,&nempty,&global_miss_querypos5,&global_miss_querypos3, + querylength,/*query_compress*/query_compress_rev, + /*plusp*/false,genestrand,nmisses_allowed, + /*nmisses_seen*/nunion-count+nempty,global_miss_querypos5,global_miss_querypos3); + } + diagonal = new_diagonal; + count = 1; + debug7(printf("at ??, next diagonal is %llu\n",(unsigned long long) diagonal)); + } + } + + /* Terminate loop */ + if (count >= nrequired && *nhits <= maxpaths_search) { + hits = identify_multimiss_iter(&(*found_score),&chrnum,&chroffset,&chrhigh,&chrlength,&(*nhits),hits,diagonal, + prev,&nempty,&global_miss_querypos5,&global_miss_querypos3, + querylength,/*query_compress*/query_compress_rev, + /*plusp*/false,genestrand,nmisses_allowed, + /*nmisses_seen*/nunion-count+nempty,global_miss_querypos5,global_miss_querypos3); + } + + FREEA(prev); + } + + if (nstreams > 1) { + FREE_ALIGN(all_diagonals_merged); + } + } + + return hits; +} + +#elif defined(USE_HEAP) static List_T find_spanning_multimiss_matches (int *found_score, int *nhits, List_T hits, T this, int genestrand, int nrequired, int querylength, Compress_T query_compress_fwd, Compress_T query_compress_rev, @@ -3681,10 +3926,9 @@ return hits; } -#endif -#ifndef USE_HEAP -/* Uses a loser tree */ +#elif defined(USE_LOSER_TREES) + static List_T find_spanning_multimiss_matches (int *found_score, int *nhits, List_T hits, T this, int genestrand, int nrequired, int querylength, Compress_T query_compress_fwd, Compress_T query_compress_rev, @@ -4053,6 +4297,7 @@ segmenti = *p; assert(segmenti->diagonal != (Univcoord_T) -1); if (segmenti->floor <= max_mismatches_allowed) { + assert(segmenti->diagonal >= (Univcoord_T) querylength); /* identify_all_segments should have performed filtering */ left = segmenti->diagonal - querylength; nmismatches = Genome_count_mismatches_limit(query_compress,left,/*pos5*/0,/*pos3*/querylength, max_mismatches_allowed,plusp,genestrand); @@ -4074,51 +4319,1088 @@ } -#ifdef USE_HEAP -/* TODO: Change spliceable to be an attribute of the segment. Then we - can loop over anchor_segments only */ -static struct Segment_T * -identify_all_segments (int *nsegments, Segment_T **anchor_segments, int *nanchors, - Segment_T **spliceable, int *nspliceable, -#ifdef LARGE_GENOMES - unsigned char **positions_high, UINT4 **positions_low, -#else - Univcoord_T **positions, -#endif - int *npositions, bool *omitted, int querylength, int query_lastpos, - Floors_T floors, bool plusp) { - struct Segment_T *segments = NULL; - Segment_T *all_segments, *ptr_all, *ptr_anchor, *dest, *src; - int length_threshold; - int n_all_segments, n; - int nanchors_bymod[MAX_INDEX1INTERVAL], naccept_bymod[MAX_INDEX1INTERVAL]; - int mod; - int k; +#if 0 +/* Modified from pair_up_concordant_aux in stage3hr.c */ +static void +pair_up_segments (struct Segment_T *plus_segments_5, int plus_nsegments_5, + struct Segment_T *minus_segments_5, int minus_nsegments_5, + struct Segment_T *plus_segments_3, int plus_nsegments_3, + struct Segment_T *minus_segments_3, int minus_nsegments_3, + int querylength5, int querylength3, Chrpos_T pairmax) { + int i, j; + Univcoord_T insert_start; + Segment_T segment5, segment3; /* Need pointers, because we are changing the pairable value */ - struct Batch_T *batchpool; - struct Batch_T sentinel_struct; - Batch_T *heap, sentinel; - int smallesti, righti; - Batch_T batch; - int heapsize = 0; - int parenti, i; - int querypos, first_querypos, last_querypos; - int floor_left, floor_right, floor_incr; - int floor, floor_xfirst, floor_xlast, *floors_from_xfirst, *floors_to_xlast; - int *floors_from_neg3, *floors_to_pos3; - /* int exclude_xfirst, exclude_xlast; */ - Univcoord_T diagonal, segment_left, last_diagonal, chroffset = 0U, chrhigh = 0U; - Chrpos_T chrlength, max_distance; - Chrnum_T chrnum = 1; -#ifdef OLD_FLOOR_ENDS - int halfquerylength, halfquery_lastpos; -#endif + debug(printf("Entered pair_up_segments\n")); -#ifdef DIAGONAL_ADD_QUERYPOS - UINT8 diagonal_add_querypos; + /* plus/plus */ + j = 0; + for (i = 0; i < plus_nsegments_5; i++) { + segment5 = &(plus_segments_5[i]); + if ((insert_start = segment5->diagonal) == (Univcoord_T) -1) { + /* Skip chromosomal end marker */ + } else { +#ifdef DEBUG5 + printf("plus/plus: i=%d/%d %u %d..%d\n", + i,plus_nsegments_5,segment5->diagonal,segment5->querypos5,segment5->querypos3); + if (j >= plus_nsegments_3) { + printf(" current: j=%d/%d\n",j,plus_nsegments_3); + } else if (plus_segments_3[j].diagonal == (Univcoord_T) -1) { + printf(" current: j=%d/%d %u\n",j,plus_nsegments_3,plus_segments_3[j].diagonal); + } else { + printf(" current: j=%d/%d %u %d..%d\n", + j,plus_nsegments_3,plus_segments_3[j].diagonal,plus_segments_3[j].querypos5,plus_segments_3[j].querypos3); + } #endif - int total_npositions = 0; - int joffset = 0, j; + + /* Get to correct chrnum */ + while (j < plus_nsegments_3 && (plus_segments_3[j].diagonal == (Univcoord_T) -1 || plus_segments_3[j].diagonal < segment5->diagonal)) { +#ifdef DEBUG5 + if (plus_segments_3[j].diagonal == (Univcoord_T) -1) { + printf(" advancing: j=%d/%d %u\n",j,plus_nsegments_3,plus_segments_3[j].diagonal); + } else { + printf(" advancing: j=%d/%d %u %d..%d\n", + j,plus_nsegments_3,plus_segments_3[j].diagonal,plus_segments_3[j].querypos5,plus_segments_3[j].querypos3); + } +#endif + j++; + } + + if (j < plus_nsegments_3) { + while (j >= 0 && plus_segments_3[j].diagonal != (Univcoord_T) -1 && plus_segments_3[j].diagonal > segment5->diagonal) { + debug5(printf(" backup: j=%d/%d %u %d..%d\n", + j,plus_nsegments_3,plus_segments_3[j].diagonal,plus_segments_3[j].querypos5,plus_segments_3[j].querypos3)); + j--; + } + j++; /* Finish backup */ + + /* Cannot perform arithmetic on diagonal, because we want to preserve -1 as being the largest value */ + /* Ignore inclusion of querylength inside pairmax */ + while (j < plus_nsegments_3 && plus_segments_3[j].diagonal <= insert_start + pairmax /*- querylength3*/) { + debug5(printf(" overlap: j=%d/%d, %u <= %u + %u, %d..%d\n", + j,plus_nsegments_3,plus_segments_3[j].diagonal, + insert_start,pairmax,plus_segments_3[j].querypos5,plus_segments_3[j].querypos3)); + debug5(printf("Setting plus segments %d and %d to be pairable: %u and %u\n",i,j,segment5->diagonal,plus_segments_3[j].diagonal)); + segment5->pairablep = true; + plus_segments_3[j].pairablep = true; + j++; + } + } + } + } + + /* minus/minus */ + j = 0; + for (i = 0; i < minus_nsegments_3; i++) { + segment3 = &(minus_segments_3[i]); + if ((insert_start = segment3->diagonal) == (Univcoord_T) -1) { + /* Skip chromosomal end marker */ + } else { +#ifdef DEBUG5 + printf("minus/minus: i=%d/%d %u %d..%d\n", + i,minus_nsegments_3,segment3->diagonal,segment3->querypos5,segment3->querypos3); + if (j >= minus_nsegments_5) { + printf(" current: j=%d/%d\n",j,minus_nsegments_5); + } else if (minus_segments_5[j].diagonal == (Univcoord_T) -1) { + printf(" current: j=%d/%d %u\n",j,minus_nsegments_5,minus_segments_5[j].diagonal); + } else { + printf(" current: j=%d/%d %u %d..%d\n", + j,minus_nsegments_5,minus_segments_5[j].diagonal,minus_segments_5[j].querypos5,minus_segments_5[j].querypos3); + } +#endif + + /* Get to correct chrnum */ + while (j < minus_nsegments_5 && (minus_segments_5[j].diagonal == (Univcoord_T) -1 || minus_segments_5[j].diagonal < segment3->diagonal)) { +#ifdef DEBUG5 + if (minus_segments_5[j].diagonal == (Univcoord_T) -1) { + printf(" advancing: j=%d/%d %u\n",j,minus_nsegments_5,minus_segments_5[j].diagonal); + } else { + printf(" advancing: j=%d/%d %u %d..%d\n", + j,minus_nsegments_5,minus_segments_5[j].diagonal,minus_segments_5[j].querypos5,minus_segments_5[j].querypos3); + } +#endif + j++; + } + + if (j < minus_nsegments_5) { + while (j >= 0 && minus_segments_5[j].diagonal != (Univcoord_T) -1 && minus_segments_5[j].diagonal > segment3->diagonal) { + debug5(printf(" backup: j=%d/%d %u %d..%d\n", + j,minus_nsegments_5,minus_segments_5[j].diagonal,minus_segments_5[j].querypos5,minus_segments_5[j].querypos3)); + j--; + } + j++; /* Finish backup */ + + /* Cannot perform arithmetic on diagonal, because we want to preserve -1 as being the largest value */ + /* Ignore inclusion of querylength inside pairmax */ + while (j < minus_nsegments_5 && minus_segments_5[j].diagonal <= insert_start + pairmax /*- querylength5*/) { + debug5(printf(" overlap: j=%d/%d %u %d..%d\n", + j,minus_nsegments_5,minus_segments_5[j].diagonal,minus_segments_5[j].querypos5,minus_segments_5[j].querypos3)); + debug5(printf("Setting minus segments %d and %d to be pairable: %u and %u\n",i,j,segment3->diagonal,minus_segments_5[j].diagonal)); + segment3->pairablep = true; + minus_segments_5[j].pairablep = true; + j++; + } + } + } + } + + return; +} +#endif + + +#ifdef LARGE_GENOMES +/* TODO: Change spliceable to be an attribute of the segment. Then we + can loop over anchor_segments only */ +static struct Segment_T * +identify_all_segments (int *nsegments, Segment_T **anchor_segments, int *nanchors, + Segment_T **spliceable, int *nspliceable, +#ifdef LARGE_GENOMES + unsigned char **positions_high, UINT4 **positions_low, +#else + Univcoord_T **positions, +#endif + int *npositions, bool *omitted, int querylength, int query_lastpos, + Floors_T floors, bool plusp) { + struct Segment_T *segments = NULL; + Segment_T *all_segments, *ptr_all, *ptr_anchor, *dest, *src; + int length_threshold; + int n_all_segments, n; + int nanchors_bymod[MAX_INDEX1INTERVAL], naccept_bymod[MAX_INDEX1INTERVAL]; + int mod; + int k; + + struct Batch_T *batchpool; + struct Batch_T sentinel_struct; + Batch_T *heap, sentinel; + int smallesti, righti; + Batch_T batch; + int heapsize = 0; + int parenti, i; + int querypos, first_querypos, last_querypos; + int floor_left, floor_right, floor_incr; + int floor, floor_xfirst, floor_xlast, *floors_from_xfirst, *floors_to_xlast; + int *floors_from_neg3, *floors_to_pos3; + /* int exclude_xfirst, exclude_xlast; */ + Univcoord_T diagonal, segment_left, last_diagonal, chroffset = 0U, chrhigh = 0U; + Chrpos_T chrlength, max_distance; + Chrnum_T chrnum = 1; +#ifdef OLD_FLOOR_ENDS + int halfquerylength, halfquery_lastpos; +#endif + +#ifdef DIAGONAL_ADD_QUERYPOS + UINT8 diagonal_add_querypos; +#endif + int total_npositions = 0; + int joffset = 0, j; + +#ifdef DEBUG + Segment_T segment, *p; +#endif + + Segment_T ptr, ptr_chrstart; + Segment_T *ptr_spliceable; + bool last_spliceable_p = false; + /* bool next_spliceable_p; */ +#ifdef DEBUG19 + Segment_T ptr0; +#endif +#ifndef SLOW_CHR_UPDATE + Univcoord_T goal; + int nchromosomes_local = nchromosomes; + Univcoord_T *chrhighs_local = chrhighs; +#endif + + Univcoord_T *splicesites_local, splicesites_static[1]; + int nsplicesites_local; + + debug(printf("*** Starting identify_all_segments on %s ***\n",plusp ? "plus" : "minus")); + + if (floors == NULL) { + *nsegments = 0; + *anchor_segments = (Segment_T *) NULL; + *nanchors = 0; + *spliceable = (Segment_T *) NULL; + *nspliceable = 0; + return (struct Segment_T *) NULL; + } + + if (splicesites == NULL) { + splicesites_local = splicesites_static; + splicesites_local[0] = (Univcoord_T) -1; + nsplicesites_local = 0; + } else { + splicesites_local = splicesites; + nsplicesites_local = nsplicesites; + } + +#ifdef OLD_FLOOR_ENDS + halfquerylength = querylength / 2; + halfquery_lastpos = halfquerylength - index1part; +#endif + + /* Create sentinel */ +#ifdef DIAGONAL_ADD_QUERYPOS + sentinel_struct.diagonal_add_querypos = (UINT8) -1; /* infinity */ + sentinel_struct.diagonal_add_querypos <<= 32; +#else + sentinel_struct.querypos = querylength; /* essentially infinity */ + sentinel_struct.diagonal = (Univcoord_T) -1; /* infinity */ +#endif + sentinel = &sentinel_struct; + + /* Set up batches */ + batchpool = (struct Batch_T *) MALLOCA((query_lastpos+1) * sizeof(struct Batch_T)); + heap = (Batch_T *) MALLOCA((2*(query_lastpos+1)+1+1) * sizeof(Batch_T)); + + /* Don't add entries for compoundpos positions (skip querypos -2, -1, lastpos+1, lastpos+2) */ + if (plusp) { + for (querypos = 0, i = 0; querypos <= query_lastpos; querypos++) { + if (omitted[querypos] == true) { + debug1(printf("Not adding batch for querypos %d with %d positions, omitted %d\n", + querypos,npositions[querypos],omitted[querypos])); + } else if (npositions[querypos] > 0) { + debug1(printf("Adding batch for querypos %d with %d positions, omitted %d\n", + querypos,npositions[querypos],omitted[querypos])); + batch = &(batchpool[i]); +#ifdef LARGE_GENOMES + Batch_init(batch,querypos,/*diagterm*/querylength - querypos,positions_high[querypos],positions_low[querypos], + npositions[querypos],querylength); +#else + Batch_init(batch,querypos,/*diagterm*/querylength - querypos,positions[querypos], + npositions[querypos],querylength); +#endif + total_npositions += npositions[querypos]; + if (batch->npositions > 0) { + min_heap_insert(heap,&heapsize,batch); + i++; + } + } else { + debug1(printf("Not adding batch for querypos %d with %d positions, omitted %d\n", + querypos,npositions[querypos],omitted[querypos])); + } + } + } else { + for (querypos = 0, i = 0; querypos <= query_lastpos; querypos++) { + if (omitted[querypos] == true) { + debug1(printf("Not adding batch for querypos %d with %d positions, omitted %d\n", + querypos,npositions[querypos],omitted[querypos])); + } else if (npositions[querypos] > 0) { + debug1(printf("Adding batch for querypos %d with %d positions, omitted %d\n", + querypos,npositions[querypos],omitted[querypos])); + batch = &(batchpool[i]); +#ifdef LARGE_GENOMES + Batch_init(batch,querypos,/*diagterm*/querypos + index1part,positions_high[querypos],positions_low[querypos], + npositions[querypos],querylength); +#else + Batch_init(batch,querypos,/*diagterm*/querypos + index1part,positions[querypos], + npositions[querypos],querylength); +#endif + total_npositions += npositions[querypos]; + if (batch->npositions > 0) { + min_heap_insert(heap,&heapsize,batch); + i++; + } + } else { + debug1(printf("Not adding batch for querypos %d with %d positions, omitted %d\n", + querypos,npositions[querypos],omitted[querypos])); + } + } + } + debug14(printf("Initial total_npositions = %d\n",total_npositions)); + + + if (i == 0) { + FREEA(heap); + FREEA(batchpool); + *nsegments = 0; + return (struct Segment_T *) NULL; + } + + /* Set up rest of heap */ + for (i = heapsize+1; i <= 2*heapsize+1; i++) { + heap[i] = sentinel; + } + + /* Putting chr marker "segments" after each chromosome */ + segments = (struct Segment_T *) MALLOC((total_npositions + nchromosomes) * sizeof(struct Segment_T)); + ptr_chrstart = ptr = &(segments[0]); + all_segments = (Segment_T *) MALLOC(total_npositions * sizeof(Segment_T)); + ptr_all = &(all_segments[0]); + *anchor_segments = (Segment_T *) MALLOC(total_npositions * sizeof(Segment_T)); + ptr_anchor = &((*anchor_segments)[0]); + if (overall_max_distance == 0) { + ptr_spliceable = *spliceable = (Segment_T *) NULL; + } else { + ptr_spliceable = *spliceable = (Segment_T *) CALLOC(total_npositions,sizeof(Segment_T)); + } + + /* + if ((exclude_xfirst = firstbound-2-index1part-max_end_insertions) < 3) { + exclude_xfirst = 3; + } + if ((exclude_xlast = lastbound+1+max_end_insertions) > query_lastpos-3) { + exclude_xlast = query_lastpos-3; + } + */ + +#if 0 + /* Should account for firstbound and lastbound */ + floors_from_xfirst = floors->scorefrom[/* xfirst_from = */ firstbound-index1interval+max_end_insertions]; + floors_to_xlast = floors->scoreto[/* xlast_to = */ lastbound+1+index1interval-index1part-max_end_insertions]; +#else + /* This was previously run in identify_all_segments and not in identify_all_segments_for_terminals */ + if (spansize /* +max_end_insertions */ > query_lastpos + index1interval) { + floors_from_xfirst = floors->scorefrom[query_lastpos+index1interval]; + } else { + floors_from_xfirst = floors->scorefrom[spansize /* +max_end_insertions */]; + } + if (query_lastpos-spansize /* -max_end_insertions */ < -index1interval) { + floors_to_xlast = floors->scoreto[-index1interval]; + } else { + floors_to_xlast = floors->scoreto[query_lastpos-spansize /* -max_end_insertions */]; + } +#endif + floors_from_neg3 = floors->scorefrom[-index1interval]; + floors_to_pos3 = floors->scoreto[query_lastpos+index1interval]; + + + /* Initialize loop */ + batch = heap[1]; + first_querypos = last_querypos = querypos = batch->querypos; + last_diagonal = diagonal = batch->diagonal; + + floor_incr = floors_from_neg3[first_querypos]; + floor = floor_incr; + floor_xlast = floor_incr; + floor_xfirst = floors_from_xfirst[first_querypos] /* floors->scorefrom[xfirst_from][first_querypos] */; + +#ifdef OLD_FLOOR_ENDS + if (querypos < halfquery_lastpos) { + floor_left = floor_incr; + } else { + floor_left = floors->scorefrom[-index1interval][halfquery_lastpos]; + } + if (querypos < halfquerylength) { + floor_right = floors->scorefrom[halfquerylength-index1interval][query_lastpos]; + } else { + floor_right = floors->scorefrom[halfquerylength-index1interval][first_querypos]; + } +#else + floor_left = floor_incr; +#ifdef DEBUG1 + floor_right = -99; +#endif +#endif + + + debug1(printf("multiple_mm_%s, diagonal %llu, querypos %d\n", + plusp ? "plus" : "minus",(unsigned long long) diagonal,querypos)); + debug1(printf("first_querypos = %d => initial values: floor %d, floor_xfirst %d, floor_xlast %d, floor_left %d, floor_right %d\n", + first_querypos,floor,floor_xfirst,floor_xlast,floor_left,floor_right)); + + if (--batch->npositions <= 0) { + /* Use last entry in heap for insertion */ + batch = heap[heapsize]; + querypos = batch->querypos; + heap[heapsize--] = sentinel; + + } else { + /* Use this batch for insertion (same querypos) */ +#ifdef LARGE_GENOMES + batch->diagonal = ((Univcoord_T) *(++batch->positions_high) << 32) + *(++batch->positions_low) + batch->diagterm; +#elif defined(WORDS_BIGENDIAN) + batch->diagonal = Bigendian_convert_univcoord(*(++batch->positions)) + batch->diagterm; +#else + batch->diagonal = *(++batch->positions) + batch->diagterm; +#endif +#ifdef DIAGONAL_ADD_QUERYPOS + batch->diagonal_add_querypos = (UINT8) batch->diagonal; + batch->diagonal_add_querypos <<= 32; + batch->diagonal_add_querypos |= querypos /* Previously added 2 because querypos was -2: + 2*/; +#endif + } + + /* heapify */ + parenti = 1; +#ifdef DIAGONAL_ADD_QUERYPOS + diagonal_add_querypos = batch->diagonal_add_querypos; + smallesti = (heap[3]->diagonal_add_querypos < heap[2]->diagonal_add_querypos) ? 3 : 2; + while (diagonal_add_querypos > heap[smallesti]->diagonal_add_querypos) { + heap[parenti] = heap[smallesti]; + parenti = smallesti; + smallesti = LEFT(parenti); + righti = smallesti+1; + if (heap[righti]->diagonal_add_querypos < heap[smallesti]->diagonal_add_querypos) { + smallesti = righti; + } + } +#else + diagonal = batch->diagonal; + smallesti = ((heap[3]->diagonal < heap[2]->diagonal) || + ((heap[3]->diagonal == heap[2]->diagonal) && + (heap[3]->querypos < heap[2]->querypos))) ? 3 : 2; + /* Note that diagonal/querypos will never exceed a sentinel diagonal/querypos */ + while (diagonal > heap[smallesti]->diagonal || + (diagonal == heap[smallesti]->diagonal && + querypos > heap[smallesti]->querypos)) { + heap[parenti] = heap[smallesti]; + parenti = smallesti; + smallesti = LEFT(parenti); + righti = smallesti+1; + if ((heap[righti]->diagonal < heap[smallesti]->diagonal) || + ((heap[righti]->diagonal == heap[smallesti]->diagonal) && + (heap[righti]->querypos < heap[smallesti]->querypos))) { + smallesti = righti; + } + } +#endif + heap[parenti] = batch; + + + /* Continue after initialization */ + while (heapsize > 0) { + batch = heap[1]; + querypos = batch->querypos; + diagonal = batch->diagonal; + debug14(printf("diagonal = %u, querypos = %d\n",last_diagonal,last_querypos)); + + if (diagonal == last_diagonal) { + /* Continuing exact match or substitution */ + floor_incr = floors->scorefrom[last_querypos][querypos]; + floor += floor_incr; + floor_xfirst += floor_incr; + floor_xlast += floor_incr; + +#ifdef OLD_FLOOR_ENDS + /* Why is this here? Just set floor_left at start and floor_right at end. */ + if (querypos < halfquery_lastpos) { + floor_left += floor_incr; + } else if (last_querypos < halfquery_lastpos) { + /* Finish floor_left */ + floor_left += floors->scorefrom[last_querypos][halfquery_lastpos+index1interval]; + } + if (querypos >= halfquerylength) { + if (last_querypos < halfquerylength) { + /* Start floor_right */ + floor_right = floors->scorefrom[halfquerylength-index1interval][querypos]; + } else { + floor_right += floor_incr; + } + } +#endif + + debug1(printf("diagonal %llu unchanged: last_querypos = %d, querypos = %d => floor increments by %d\n", + (unsigned long long) diagonal,last_querypos,querypos,floor_incr)); + debug1(printf("*multiple_mm_%s, diagonal %llu, querypos %d, floor %d, floor_xfirst %d, floor_xlast %d, floor_left %d, floor_right %d\n", + plusp ? "plus" : "minus",(unsigned long long) diagonal,querypos, + floor,floor_xfirst,floor_xlast,floor_left,floor_right)); + + } else { + /* End of diagonal */ + floor_incr = floors_to_pos3[last_querypos] /* floors->score[last_querypos][query_lastpos+index1interval] */; + floor += floor_incr; + floor_xfirst += floor_incr; + floor_xlast += floors_to_xlast[last_querypos]; /* floors->score[last_querypos][xlast_to]; */ + +#ifdef OLD_FLOOR_ENDS + if (last_querypos < halfquery_lastpos) { + floor_left += floors->scorefrom[last_querypos][halfquery_lastpos+index1interval]; + floor_right = floors->scorefrom[halfquerylength-index1interval][query_lastpos+index1interval]; + } + if (last_querypos >= halfquerylength) { + floor_right += floor_incr; + } +#else + floor_right = floor_incr; +#endif + + debug1(printf("new diagonal %llu > last diagonal %llu: last_querypos = %d => final values: floor %d, floor_xfirst %d, floor_xlast %d, floor_left %d, floor_right %d\n", + (unsigned long long) diagonal,(unsigned long long) last_diagonal,last_querypos, + floor,floor_xfirst,floor_xlast,floor_left,floor_right)); + + if (last_diagonal > chrhigh) { + if (ptr > ptr_chrstart) { + /* Add chr marker segment */ + debug14(printf("=== ptr %p > ptr_chrstart %p, so adding chr marker segment\n",ptr,ptr_chrstart)); + ptr->diagonal = (Univcoord_T) -1; + ptr_chrstart = ++ptr; + } + + /* update chromosome bounds, based on low end */ +#ifdef SLOW_CHR_UPDATE + chrnum = Univ_IIT_get_one(chromosome_iit,last_diagonal-querylength,last_diagonal-querylength); + Univ_IIT_interval_bounds(&chroffset,&chrhigh,&chrlength,chromosome_iit,chrnum,circular_typeint); + /* chrhigh += 1; */ +#else + j = 1; +#ifdef NO_EXTENSIONS_BEFORE_ZERO + goal = last_diagonal - querylength + 1; +#else + goal = last_diagonal + 1; +#endif + while (j < nchromosomes_local && chrhighs_local[j] < goal) { + j <<= 1; /* gallop by 2 */ + } + if (j >= nchromosomes_local) { + j = binary_search(j >> 1,nchromosomes_local,chrhighs_local,goal); + } else { + j = binary_search(j >> 1,j,chrhighs_local,goal); + } + chrnum += j; +#ifdef DEBUG15 + if (chrnum != Univ_IIT_get_one(chromosome_iit,last_diagonal-querylength,last_diagonal-querylength)) { + fprintf(stderr,"Got chrnum %d, but wanted %d\n", + chrnum,Univ_IIT_get_one(chromosome_iit,last_diagonal-querylength,last_diagonal-querylength)); + abort(); + } +#endif + chroffset = chroffsets[chrnum-1]; + chrhigh = chrhighs[chrnum-1]; + chrlength = chrlengths[chrnum-1]; + chrhighs_local += j; + nchromosomes_local -= j; +#endif + } + if (last_diagonal <= chrhigh) { /* FORMULA for high position */ + /* position of high end is within current chromosome */ + debug1(printf(" => multiple_mm, diagonal %llu, query %d..%d, chrbounds %llu..%llu, floor %d, floor_xfirst %d, floor_xlast %d, floor_left %d, floor_right %d\n", + (unsigned long long) last_diagonal,first_querypos,last_querypos, + (unsigned long long) chroffset,(unsigned long long) chrhigh, + floor,floor_xfirst,floor_xlast,floor_left,floor_right)); + + /* Save segment, but first advance splicesites past segment_left */ + segment_left = last_diagonal - querylength; + max_distance = overall_max_distance; + if (splicesites_local[0] >= last_diagonal) { + ptr->splicesites_i = -1; + } else if (Splicetrie_splicesite_p(segment_left,/*pos5*/1,/*pos3*/querylength) == false) { + ptr->splicesites_i = -1; + } else { + if (splicesites_local[0] < segment_left) { + j = 1; + while (j < nsplicesites_local && splicesites_local[j] < segment_left) { + j <<= 1; /* gallop by 2 */ + } + if (j >= nsplicesites_local) { + j = binary_search(j >> 1,nsplicesites_local,splicesites_local,segment_left); + } else { + j = binary_search(j >> 1,j,splicesites_local,segment_left); + } + joffset += j; + splicesites_local += j; + nsplicesites_local -= j; + } + + if (splicesites_local[0] >= last_diagonal) { + ptr->splicesites_i = -1; + } else { + ptr->splicesites_i = joffset; + j = joffset; + while (j < nsplicesites && splicesites[j] < last_diagonal) { + if (splicedists[j] > max_distance) { + max_distance = splicedists[j]; + } + j++; + } + } + } + + /* Save segment */ + ptr->diagonal = last_diagonal; + ptr->chrnum = chrnum; + ptr->chroffset = chroffset; + ptr->chrhigh = chrhigh; + ptr->chrlength = chrlength; + ptr->querypos5 = first_querypos; + ptr->querypos3 = last_querypos; + + /* FORMULA */ + if (plusp) { + ptr->lowpos = ptr->diagonal - querylength + ptr->querypos5; + ptr->highpos = ptr->diagonal - querylength + ptr->querypos3 + index1part; + } else { + ptr->lowpos = ptr->diagonal - ptr->querypos3 - index1part - index1part; + ptr->highpos = ptr->diagonal - ptr->querypos5 - index1part; + } + + ptr->floor = floor; + ptr->floor_xfirst = floor_xfirst; + ptr->floor_xlast = floor_xlast; + ptr->floor_left = floor_left; + ptr->floor_right = floor_right; + ptr->leftmost = ptr->rightmost = -1; + ptr->left_splice_p = ptr->right_splice_p = false; + ptr->spliceable_low_p = last_spliceable_p; + /* ptr->spliceable_high_p = false; */ +#if 0 + ptr->leftspan = ptr->rightspan = -1; +#endif + ptr->usedp = false; + ptr->pairablep = false; + +#if 0 + /* Not doing this, because the max_distance test is already good enough */ + if (plusp) { + /* For plus-strand splicing, require segmenti->querypos3 < segmentj->querypos5, + so if segmenti->querypos3 is too high, then it is not spliceable */ + if (last_querypos > query_lastpos) { + /* Not spliceable */ + last_spliceable_p = false; + } else if (diagonal <= last_diagonal + max_distance) { + *ptr_spliceable++ = ptr; + ptr->spliceable_high_p = last_spliceable_p = true; + } + } else { + /* For minus-strand splicing, require segmenti->querypos5 > segmentj->querypos3, + so if segmenti->querypos5 is too low, then it is not spliceable */ + if (first_querypos < index1part) { + /* Not spliceable */ + last_spliceable_p = false; + } else if (diagonal <= last_diagonal + max_distance) { + *ptr_spliceable++ = ptr; + ptr->spliceable_high_p = last_spliceable_p = true; + } + } +#endif + if (diagonal <= last_diagonal + max_distance) { + *ptr_spliceable++ = ptr; + ptr->spliceable_high_p = last_spliceable_p = true; + debug4s(printf("%s diagonal %u is spliceable because next one is at %u\n", + plusp ? "plus" : "minus",last_diagonal,diagonal)); + } else { + ptr->spliceable_high_p = last_spliceable_p = false; + debug4s(printf("%s diagonal %u is not spliceable because next one is at %u\n", + plusp ? "plus" : "minus",last_diagonal,diagonal)); + } + debug14(printf("Saving segment at %u (%u), query %d..%d",last_diagonal,last_diagonal-chroffset,ptr->querypos5,ptr->querypos3)); + *ptr_all++ = ptr; + if (last_querypos >= first_querypos + /*min_segment_length*/1) { + debug14(printf(" ANCHOR")); + *ptr_anchor++ = ptr; + } + debug14(printf("\n")); + ptr++; + } + + /* Prepare next diagonal */ + first_querypos = querypos; + last_diagonal = diagonal; + floor_incr = floors_from_neg3[first_querypos] /* floors->score[-index1interval][first_querypos] */; + floor = floor_incr; + floor_xlast = floor_incr; + floor_xfirst = floors_from_xfirst[first_querypos]; /* floors->score[xfirst_from][first_querypos]; */ + +#ifdef OLD_FLOOR_ENDS + if (querypos < halfquery_lastpos) { + floor_left = floor_incr; + } else { + floor_left = floors->scorefrom[-index1interval][halfquery_lastpos]; + } + if (querypos < halfquerylength) { + floor_right = floors->scorefrom[halfquerylength-index1interval][query_lastpos]; + } else { + floor_right = floors->scorefrom[halfquerylength-index1interval][first_querypos]; + } +#else + floor_left = floor_incr; +#ifdef DEBUG1 + floor_right = -99; /* For debugging output */ +#endif +#endif + + debug1(printf("*multiple_mm_%s, diagonal %llu, querypos %d\n", + plusp ? "plus" : "minus",(unsigned long long) diagonal,querypos)); + debug1(printf("start of diagonal %llu, first_querypos = %d => initial values: floor %d, floor_xfirst %d, floor_xlast %d, floor_left %d, floor_right %d\n", + (unsigned long long) diagonal,first_querypos, + floor,floor_xfirst,floor_xlast,floor_left,floor_right)); + + } + last_querypos = querypos; + + + if (--batch->npositions <= 0) { + /* Use last entry in heap for insertion */ + batch = heap[heapsize]; + querypos = batch->querypos; + heap[heapsize--] = sentinel; + + } else { + /* Use this batch for insertion (same querypos) */ +#ifdef LARGE_GENOMES + batch->diagonal = ((Univcoord_T) *(++batch->positions_high) << 32) + *(++batch->positions_low) + batch->diagterm; +#elif defined(WORDS_BIGENDIAN) + batch->diagonal = Bigendian_convert_univcoord(*(++batch->positions)) + batch->diagterm; +#else + batch->diagonal = *(++batch->positions) + batch->diagterm; +#endif +#ifdef DIAGONAL_ADD_QUERYPOS + batch->diagonal_add_querypos = (UINT8) batch->diagonal; + batch->diagonal_add_querypos <<= 32; + batch->diagonal_add_querypos |= querypos /* Previously added 2 because querypos was -2: + 2*/; +#endif + } + + /* heapify */ + parenti = 1; +#ifdef DIAGONAL_ADD_QUERYPOS + diagonal_add_querypos = batch->diagonal_add_querypos; + smallesti = (heap[3]->diagonal_add_querypos < heap[2]->diagonal_add_querypos) ? 3 : 2; + while (diagonal_add_querypos > heap[smallesti]->diagonal_add_querypos) { + heap[parenti] = heap[smallesti]; + parenti = smallesti; + smallesti = LEFT(parenti); + righti = smallesti+1; + if (heap[righti]->diagonal_add_querypos < heap[smallesti]->diagonal_add_querypos) { + smallesti = righti; + } + } +#else + diagonal = batch->diagonal; + smallesti = ((heap[3]->diagonal < heap[2]->diagonal) || + ((heap[3]->diagonal == heap[2]->diagonal) && + (heap[3]->querypos < heap[2]->querypos))) ? 3 : 2; + /* Note that diagonal/querypos will never exceed a sentinel diagonal/querypos */ + while (diagonal > heap[smallesti]->diagonal || + (diagonal == heap[smallesti]->diagonal && + querypos > heap[smallesti]->querypos)) { + heap[parenti] = heap[smallesti]; + parenti = smallesti; + smallesti = LEFT(parenti); + righti = smallesti+1; + if ((heap[righti]->diagonal < heap[smallesti]->diagonal) || + ((heap[righti]->diagonal == heap[smallesti]->diagonal) && + (heap[righti]->querypos < heap[smallesti]->querypos))) { + smallesti = righti; + } + } +#endif + heap[parenti] = batch; + } + debug14(printf("diagonal = %u, querypos = %d\n",last_diagonal,last_querypos)); + debug14(printf("\n")); + + /* Terminate loop. */ + floor_incr = floors_to_pos3[last_querypos]; /* floors->score[last_querypos][query_lastpos+index1interval]; */ + floor += floor_incr; + floor_xfirst += floor_incr; + floor_xlast += floors_to_xlast[last_querypos]; /* floors->score[last_querypos][xlast_to]; */ + +#ifdef OLD_FLOOR_ENDS + if (last_querypos < halfquery_lastpos) { + floor_left += floors->scorefrom[last_querypos][halfquery_lastpos+index1interval]; + floor_right = floors->scorefrom[halfquerylength-index1interval][query_lastpos+index1interval]; + } + if (last_querypos >= halfquerylength) { + floor_right += floor_incr; + } +#else + floor_right = floor_incr; +#endif + + debug1(printf("no more diagonals: last_querypos = %d => terminal values: floor %d, floor_xfirst %d, floor_xlast %d, floor_left %d, floor_right %d\n", + last_querypos,floor,floor_xfirst,floor_xlast,floor_left,floor_right)); + + debug1(printf("last_diagonal %u vs chrhigh %u (looking for >)\n",last_diagonal,chrhigh)); + if (last_diagonal > chrhigh) { + if (ptr > ptr_chrstart) { + /* Add chr marker segment */ + debug14(printf("=== ptr %p > ptr_chrstart %p, so adding chr marker segment\n",ptr,ptr_chrstart)); + ptr->diagonal = (Univcoord_T) -1; + ptr_chrstart = ++ptr; + } + + /* update chromosome bounds, based on low end */ +#ifdef SLOW_CHR_UPDATE + chrnum = Univ_IIT_get_one(chromosome_iit,last_diagonal-querylength,last_diagonal-querylength); + Univ_IIT_interval_bounds(&chroffset,&chrhigh,&chrlength,chromosome_iit,chrnum,circular_typeint); + /* chrhigh += 1; */ +#else + j = 1; +#ifdef NO_EXTENSIONS_BEFORE_ZERO + goal = last_diagonal - querylength + 1; +#else + goal = last_diagonal + 1; +#endif + while (j < nchromosomes_local && chrhighs_local[j] < goal) { + j <<= 1; /* gallop by 2 */ + } + if (j >= nchromosomes_local) { + j = binary_search(j >> 1,nchromosomes_local,chrhighs_local,goal); + } else { + j = binary_search(j >> 1,j,chrhighs_local,goal); + } + chrnum += j; +#ifdef DEBUG15 + if (chrnum != Univ_IIT_get_one(chromosome_iit,last_diagonal-querylength,last_diagonal-querylength)) { + fprintf(stderr,"Got chrnum %d, but wanted %d\n", + chrnum,Univ_IIT_get_one(chromosome_iit,last_diagonal-querylength,last_diagonal-querylength)); + abort(); + } +#endif + chroffset = chroffsets[chrnum-1]; + chrhigh = chrhighs[chrnum-1]; + chrlength = chrlengths[chrnum-1]; + chrhighs_local += j; + nchromosomes_local -= j; +#endif + } + + debug1(printf("last_diagonal %u vs chrhigh %u (looking for <=)\n",last_diagonal,chrhigh)); + if (last_diagonal <= chrhigh) { /* FORMULA for high position */ + /* position of high end is within current chromosome */ + debug1(printf(" => multiple_mm, diagonal %llu, query %d..%d, chrbounds %llu..%llu, floor %d, floor_xfirst %d, floor_xlast %d, floor_left %d, floor_right %d\n", + (unsigned long long) last_diagonal,first_querypos,last_querypos, + (unsigned long long) chroffset,(unsigned long long) chrhigh, + floor,floor_xfirst,floor_xlast,floor_left,floor_right)); + + /* Save segment, but first advance splicesites past segment_left */ + segment_left = last_diagonal - querylength; +#if 0 + /* Last segment is not spliceable */ + max_distance = overall_max_distance; +#endif + if (splicesites_local[0] >= last_diagonal) { + ptr->splicesites_i = -1; + } else if (Splicetrie_splicesite_p(segment_left,/*pos5*/1,/*pos3*/querylength) == false) { + ptr->splicesites_i = -1; + } else { + if (splicesites_local[0] < segment_left) { + j = 1; + while (j < nsplicesites_local && splicesites_local[j] < segment_left) { + j <<= 1; /* gallop by 2 */ + } + if (j >= nsplicesites_local) { + j = binary_search(j >> 1,nsplicesites_local,splicesites_local,segment_left); + } else { + j = binary_search(j >> 1,j,splicesites_local,segment_left); + } + joffset += j; + splicesites_local += j; + nsplicesites_local -= j; + } + + if (splicesites_local[0] >= last_diagonal) { + ptr->splicesites_i = -1; + } else { + ptr->splicesites_i = joffset; +#if 0 + /* Last segment is not spliceable */ + if (splicedists[joffset] > overall_max_distance) { + max_distance = splicedists[joffset]; + } +#endif + } + } + + /* Save segment */ + ptr->diagonal = last_diagonal; + ptr->chrnum = chrnum; + ptr->chroffset = chroffset; + ptr->chrhigh = chrhigh; + ptr->chrlength = chrlength; + ptr->querypos5 = first_querypos; + ptr->querypos3 = last_querypos; + + /* FORMULA */ + if (plusp) { + ptr->lowpos = ptr->diagonal - querylength + ptr->querypos5; + ptr->highpos = ptr->diagonal - querylength + ptr->querypos3 + index1part; + } else { + ptr->lowpos = ptr->diagonal - ptr->querypos3 - index1part - index1part; + ptr->highpos = ptr->diagonal - ptr->querypos5 - index1part; + } + + ptr->floor = floor; + ptr->floor_xfirst = floor_xfirst; + ptr->floor_xlast = floor_xlast; + ptr->floor_left = floor_left; + ptr->floor_right = floor_right; + ptr->leftmost = ptr->rightmost = -1; + ptr->left_splice_p = ptr->right_splice_p = false; + ptr->spliceable_low_p = last_spliceable_p; + ptr->spliceable_high_p = false; +#if 0 + ptr->leftspan = ptr->rightspan = -1; +#endif + ptr->usedp = false; + ptr->pairablep = false; + + /* Last segment is not spliceable */ + debug14(printf("Saving segment at %u (%u), query %d..%d",last_diagonal,last_diagonal - chroffset,ptr->querypos5,ptr->querypos3)); + *ptr_all++ = ptr; + if (last_querypos >= first_querypos + /*min_segment_length*/1) { + debug14(printf(" ANCHOR")); + *ptr_anchor++ = ptr; + } + debug14(printf("\n")); + ptr++; + } + + + if (ptr > ptr_chrstart) { + /* Final chr marker segment */ + debug14(printf("=== ptr %p > ptr_chrstart %p, so adding final chr marker segment\n",ptr,ptr_chrstart)); + ptr->diagonal = (Univcoord_T) -1; + /* ptr_chrstart = */ ++ptr; + } + +#ifdef DEBUG19 + for (k = 0, ptr0 = segments; ptr0 < ptr; k++, ptr0++) { + printf("%d %llu\n",k,(unsigned long long) ptr0->diagonal); + } + printf("total_npositions = %d, nchromosomes = %d\n",total_npositions,nchromosomes); +#endif + + FREEA(heap); + FREEA(batchpool); + + /* Note: segments is in descending diagonal order. Will need to + reverse before solving middle deletions */ + + *nsegments = ptr - segments; + *nanchors = ptr_anchor - *anchor_segments; + *nspliceable = ptr_spliceable - *spliceable; + debug(printf("nsegments = %d, of which %d are spliceable (total_npositions = %d, nchromosomes = %d)\n", + *nsegments,*nspliceable,total_npositions,nchromosomes)); + debug1(printf("nsegments = %d, of which %d are spliceable (total_npositions = %d, nchromosomes = %d)\n", + *nsegments,*nspliceable,total_npositions,nchromosomes)); + + assert(*nsegments <= total_npositions + nchromosomes); + assert(*nanchors <= total_npositions); + assert(*nspliceable <= total_npositions); + + n_all_segments = ptr_all - all_segments; + debug(printf("%d all segments\n",n_all_segments)); + debug(printf("%d anchor segments\n",*nanchors)); + + if (n_all_segments <= max_anchors) { + /* Might as well use all segments */ + FREE(*anchor_segments); + *anchor_segments = all_segments; + *nanchors = n_all_segments; + + } else if (*nanchors <= max_anchors) { + /* Use only the good anchor segments */ + FREE(all_segments); + + } else { + /* Need to limit anchor segments */ + FREE(all_segments); + + /* Treat each mod separately */ + qsort(*anchor_segments,*nanchors,sizeof(Segment_T),Segment_mod_length_cmp); + + mod = 0; + i = 0; + while (mod < index1interval) { + j = i; + while (j < *nanchors && (*anchor_segments)[j]->querypos5 % index1interval == mod) { + j++; + } + nanchors_bymod[mod] = j - i; + + if (j - i <= max_anchors) { + naccept_bymod[mod] = j - i; + } else { + k = i + max_anchors; + length_threshold = (*anchor_segments)[k]->querypos3 - (*anchor_segments)[k]->querypos5; + while (k < j && k < i + max_anchors + /*ties*/100 && + (*anchor_segments)[k]->querypos3 - (*anchor_segments)[k]->querypos5 == length_threshold) { + k++; + } + naccept_bymod[mod] = k - i; + } + + debug(printf("For mod %d, accepting %d out of %d anchor segments with length threshold %d\n", + mod,naccept_bymod[mod],nanchors_bymod[mod],length_threshold)); + i = j; + mod++; + } + + /* Move good anchors to start of array */ + dest = src = &((*anchor_segments)[0]); + *nanchors = 0; + for (mod = 0; mod < index1interval; mod++) { + memmove((void *) dest,(void *) src,naccept_bymod[mod] * sizeof(Segment_T)); + dest += naccept_bymod[mod]; + src += nanchors_bymod[mod]; + *nanchors += naccept_bymod[mod]; + } + + /* Re-sort in diagonal order */ + qsort(*anchor_segments,*nanchors,sizeof(Segment_T),Segment_diagonal_cmp); + } + + +#ifdef DEBUG19 + printf("%d total segments\n",*nsegments); + for (ptr0 = segments; ptr0 < ptr; ptr0++) { + printf("%u %d..%d\n",ptr0->diagonal,ptr0->querypos5,ptr0->querypos3); + } +#endif + +#ifdef DEBUG + printf("%d selected anchor segments\n",*nanchors); + for (p = &(*anchor_segments)[0]; p< &((*anchor_segments)[*nanchors]); p++) { + segment = (Segment_T) *p; + printf("%u %d..%d spliceable_low:%d spliceable_high:%d\n", + segment->diagonal,segment->querypos5,segment->querypos3,segment->spliceable_low_p,segment->spliceable_high_p); + } +#endif + + return segments; +} + + +#elif defined(USE_MERGE) || defined(USE_HEAP) + +/* TODO: Change spliceable to be an attribute of the segment. Then we + can loop over anchor_segments only */ +static struct Segment_T * +identify_all_segments (int *nsegments, Segment_T **anchor_segments, int *nanchors, + Segment_T **spliceable, int *nspliceable, +#ifdef LARGE_GENOMES + unsigned char **positions_high, UINT4 **positions_low, +#else + Univcoord_T **positions, +#endif + int *npositions, bool *omitted, int querylength, int query_lastpos, + Floors_T floors, bool plusp) { + struct Segment_T *segments = NULL; + Segment_T *all_segments, *ptr_all, *ptr_anchor, *dest, *src; + int length_threshold; + int n_all_segments, n; + int nanchors_bymod[MAX_INDEX1INTERVAL], naccept_bymod[MAX_INDEX1INTERVAL]; + int mod; + int k; + + struct Record_T *all_records; + Record_T *all_records_merged, *record_ptr, record; + int n_all_records, i; + + List_T stream_list = NULL; + Intlist_T streamsize_list = NULL, querypos_list = NULL, diagterm_list = NULL; + int nstreams = 0; + + int querypos, first_querypos, last_querypos; + int floor_left, floor_right, floor_incr; + int floor, floor_xfirst, floor_xlast, *floors_from_xfirst, *floors_to_xlast; + int *floors_from_neg3, *floors_to_pos3; + /* int exclude_xfirst, exclude_xlast; */ + Univcoord_T diagonal, segment_left, last_diagonal, chroffset = 0U, chrhigh = 0U; + Chrpos_T chrlength, max_distance; + Chrnum_T chrnum = 1; +#ifdef OLD_FLOOR_ENDS + int halfquerylength, halfquery_lastpos; +#endif + +#ifdef DIAGONAL_ADD_QUERYPOS + UINT8 diagonal_add_querypos; +#endif + int total_npositions = 0; + int joffset = 0, j; #ifdef DEBUG Segment_T segment, *p; @@ -4165,68 +5447,54 @@ halfquery_lastpos = halfquerylength - index1part; #endif - /* Create sentinel */ -#ifdef DIAGONAL_ADD_QUERYPOS - sentinel_struct.diagonal_add_querypos = (UINT8) -1; /* infinity */ - sentinel_struct.diagonal_add_querypos <<= 32; -#else - sentinel_struct.querypos = querylength; /* essentially infinity */ - sentinel_struct.diagonal = (Univcoord_T) -1; /* infinity */ -#endif - sentinel = &sentinel_struct; - - /* Set up batches */ - batchpool = (struct Batch_T *) MALLOCA((query_lastpos+1) * sizeof(struct Batch_T)); - heap = (Batch_T *) MALLOCA((2*(query_lastpos+1)+1+1) * sizeof(Batch_T)); - /* Don't add entries for compoundpos positions (skip querypos -2, -1, lastpos+1, lastpos+2) */ if (plusp) { - for (querypos = 0, i = 0; querypos <= query_lastpos; querypos++) { + for (querypos = 0; querypos <= query_lastpos; querypos++) { if (omitted[querypos] == true) { debug1(printf("Not adding batch for querypos %d with %d positions, omitted %d\n", querypos,npositions[querypos],omitted[querypos])); } else if (npositions[querypos] > 0) { debug1(printf("Adding batch for querypos %d with %d positions, omitted %d\n", querypos,npositions[querypos],omitted[querypos])); - batch = &(batchpool[i]); + #ifdef LARGE_GENOMES + batch = &(batchpool[i]); Batch_init(batch,querypos,/*diagterm*/querylength - querypos,positions_high[querypos],positions_low[querypos], npositions[querypos],querylength); #else - Batch_init(batch,querypos,/*diagterm*/querylength - querypos,positions[querypos], - npositions[querypos],querylength); + stream_list = List_push(stream_list,(void *) positions[querypos]); + streamsize_list = Intlist_push(streamsize_list,npositions[querypos]); + querypos_list = Intlist_push(querypos_list,querypos); + diagterm_list = Intlist_push(diagterm_list,querylength - querypos); #endif total_npositions += npositions[querypos]; - if (batch->npositions > 0) { - min_heap_insert(heap,&heapsize,batch); - i++; - } + nstreams++; } else { debug1(printf("Not adding batch for querypos %d with %d positions, omitted %d\n", querypos,npositions[querypos],omitted[querypos])); } } + } else { - for (querypos = 0, i = 0; querypos <= query_lastpos; querypos++) { + for (querypos = 0; querypos <= query_lastpos; querypos++) { if (omitted[querypos] == true) { debug1(printf("Not adding batch for querypos %d with %d positions, omitted %d\n", querypos,npositions[querypos],omitted[querypos])); } else if (npositions[querypos] > 0) { debug1(printf("Adding batch for querypos %d with %d positions, omitted %d\n", querypos,npositions[querypos],omitted[querypos])); - batch = &(batchpool[i]); #ifdef LARGE_GENOMES + batch = &(batchpool[i]); Batch_init(batch,querypos,/*diagterm*/querypos + index1part,positions_high[querypos],positions_low[querypos], npositions[querypos],querylength); #else - Batch_init(batch,querypos,/*diagterm*/querypos + index1part,positions[querypos], - npositions[querypos],querylength); + stream_list = List_push(stream_list,(void *) positions[querypos]); + streamsize_list = Intlist_push(streamsize_list,npositions[querypos]); + querypos_list = Intlist_push(querypos_list,querypos); + diagterm_list = Intlist_push(diagterm_list,querypos + index1part); #endif total_npositions += npositions[querypos]; - if (batch->npositions > 0) { - min_heap_insert(heap,&heapsize,batch); - i++; - } + nstreams++; } else { debug1(printf("Not adding batch for querypos %d with %d positions, omitted %d\n", querypos,npositions[querypos],omitted[querypos])); @@ -4236,17 +5504,20 @@ debug14(printf("Initial total_npositions = %d\n",total_npositions)); - if (i == 0) { - FREEA(heap); - FREEA(batchpool); + if (nstreams == 0) { *nsegments = 0; return (struct Segment_T *) NULL; + } else { + all_records = (struct Record_T *) MALLOC(total_npositions * sizeof(struct Record_T)); +#ifdef USE_HEAP + record_ptr = all_records_merged = Merge_records_heap(&n_all_records,stream_list,streamsize_list,querypos_list,diagterm_list, + all_records); +#else + record_ptr = all_records_merged = Merge_records(&n_all_records,stream_list,streamsize_list,querypos_list,diagterm_list, + all_records); +#endif } - /* Set up rest of heap */ - for (i = heapsize+1; i <= 2*heapsize+1; i++) { - heap[i] = sentinel; - } /* Putting chr marker "segments" after each chromosome */ segments = (struct Segment_T *) MALLOC((total_npositions + nchromosomes) * sizeof(struct Segment_T)); @@ -4291,10 +5562,26 @@ floors_to_pos3 = floors->scoreto[query_lastpos+index1interval]; - /* Initialize loop */ - batch = heap[1]; - first_querypos = last_querypos = querypos = batch->querypos; - last_diagonal = diagonal = batch->diagonal; + /* Skip records where diagonals < querylength, which leads to left < 0U */ + debug1(printf("n_all_records at start: %d\n",n_all_records)); + while (n_all_records > 0 && (*record_ptr)->diagonal < (Univcoord_T) querylength) { + debug1(printf("Skipping record with diagonal %d\n",(*record_ptr)->diagonal)); + record_ptr++; + n_all_records -= 1; + } + debug1(printf("n_all_records at end: %d\n",n_all_records)); + if (n_all_records == 0) { + FREE(all_segments); + FREE(segments); + FREE(all_records_merged); + FREE(all_records); + return (struct Segment_T *) NULL; + } + + + record = *record_ptr; + first_querypos = last_querypos = querypos = record->querypos; + last_diagonal = diagonal = record->diagonal; floor_incr = floors_from_neg3[first_querypos]; floor = floor_incr; @@ -4325,70 +5612,12 @@ debug1(printf("first_querypos = %d => initial values: floor %d, floor_xfirst %d, floor_xlast %d, floor_left %d, floor_right %d\n", first_querypos,floor,floor_xfirst,floor_xlast,floor_left,floor_right)); - if (--batch->npositions <= 0) { - /* Use last entry in heap for insertion */ - batch = heap[heapsize]; - querypos = batch->querypos; - heap[heapsize--] = sentinel; - - } else { - /* Use this batch for insertion (same querypos) */ -#ifdef LARGE_GENOMES - batch->diagonal = ((Univcoord_T) *(++batch->positions_high) << 32) + *(++batch->positions_low) + batch->diagterm; -#elif defined(WORDS_BIGENDIAN) - batch->diagonal = Bigendian_convert_univcoord(*(++batch->positions)) + batch->diagterm; -#else - batch->diagonal = *(++batch->positions) + batch->diagterm; -#endif -#ifdef DIAGONAL_ADD_QUERYPOS - batch->diagonal_add_querypos = (UINT8) batch->diagonal; - batch->diagonal_add_querypos <<= 32; - batch->diagonal_add_querypos |= querypos /* Previously added 2 because querypos was -2: + 2*/; -#endif - } - - /* heapify */ - parenti = 1; -#ifdef DIAGONAL_ADD_QUERYPOS - diagonal_add_querypos = batch->diagonal_add_querypos; - smallesti = (heap[3]->diagonal_add_querypos < heap[2]->diagonal_add_querypos) ? 3 : 2; - while (diagonal_add_querypos > heap[smallesti]->diagonal_add_querypos) { - heap[parenti] = heap[smallesti]; - parenti = smallesti; - smallesti = LEFT(parenti); - righti = smallesti+1; - if (heap[righti]->diagonal_add_querypos < heap[smallesti]->diagonal_add_querypos) { - smallesti = righti; - } - } -#else - diagonal = batch->diagonal; - smallesti = ((heap[3]->diagonal < heap[2]->diagonal) || - ((heap[3]->diagonal == heap[2]->diagonal) && - (heap[3]->querypos < heap[2]->querypos))) ? 3 : 2; - /* Note that diagonal/querypos will never exceed a sentinel diagonal/querypos */ - while (diagonal > heap[smallesti]->diagonal || - (diagonal == heap[smallesti]->diagonal && - querypos > heap[smallesti]->querypos)) { - heap[parenti] = heap[smallesti]; - parenti = smallesti; - smallesti = LEFT(parenti); - righti = smallesti+1; - if ((heap[righti]->diagonal < heap[smallesti]->diagonal) || - ((heap[righti]->diagonal == heap[smallesti]->diagonal) && - (heap[righti]->querypos < heap[smallesti]->querypos))) { - smallesti = righti; - } - } -#endif - heap[parenti] = batch; - /* Continue after initialization */ - while (heapsize > 0) { - batch = heap[1]; - querypos = batch->querypos; - diagonal = batch->diagonal; + while (--n_all_records > 0) { + record = *++record_ptr; + querypos = record->querypos; + diagonal = record->diagonal; debug14(printf("diagonal = %u, querypos = %d\n",last_diagonal,last_querypos)); if (diagonal == last_diagonal) { @@ -4643,69 +5872,14 @@ } last_querypos = querypos; - - - if (--batch->npositions <= 0) { - /* Use last entry in heap for insertion */ - batch = heap[heapsize]; - querypos = batch->querypos; - heap[heapsize--] = sentinel; - - } else { - /* Use this batch for insertion (same querypos) */ -#ifdef LARGE_GENOMES - batch->diagonal = ((Univcoord_T) *(++batch->positions_high) << 32) + *(++batch->positions_low) + batch->diagterm; -#elif defined(WORDS_BIGENDIAN) - batch->diagonal = Bigendian_convert_univcoord(*(++batch->positions)) + batch->diagterm; -#else - batch->diagonal = *(++batch->positions) + batch->diagterm; -#endif -#ifdef DIAGONAL_ADD_QUERYPOS - batch->diagonal_add_querypos = (UINT8) batch->diagonal; - batch->diagonal_add_querypos <<= 32; - batch->diagonal_add_querypos |= querypos /* Previously added 2 because querypos was -2: + 2*/; -#endif - } - - /* heapify */ - parenti = 1; -#ifdef DIAGONAL_ADD_QUERYPOS - diagonal_add_querypos = batch->diagonal_add_querypos; - smallesti = (heap[3]->diagonal_add_querypos < heap[2]->diagonal_add_querypos) ? 3 : 2; - while (diagonal_add_querypos > heap[smallesti]->diagonal_add_querypos) { - heap[parenti] = heap[smallesti]; - parenti = smallesti; - smallesti = LEFT(parenti); - righti = smallesti+1; - if (heap[righti]->diagonal_add_querypos < heap[smallesti]->diagonal_add_querypos) { - smallesti = righti; - } - } -#else - diagonal = batch->diagonal; - smallesti = ((heap[3]->diagonal < heap[2]->diagonal) || - ((heap[3]->diagonal == heap[2]->diagonal) && - (heap[3]->querypos < heap[2]->querypos))) ? 3 : 2; - /* Note that diagonal/querypos will never exceed a sentinel diagonal/querypos */ - while (diagonal > heap[smallesti]->diagonal || - (diagonal == heap[smallesti]->diagonal && - querypos > heap[smallesti]->querypos)) { - heap[parenti] = heap[smallesti]; - parenti = smallesti; - smallesti = LEFT(parenti); - righti = smallesti+1; - if ((heap[righti]->diagonal < heap[smallesti]->diagonal) || - ((heap[righti]->diagonal == heap[smallesti]->diagonal) && - (heap[righti]->querypos < heap[smallesti]->querypos))) { - smallesti = righti; - } - } -#endif - heap[parenti] = batch; } debug14(printf("diagonal = %u, querypos = %d\n",last_diagonal,last_querypos)); debug14(printf("\n")); + FREE(all_records_merged); + FREE(all_records); + + /* Terminate loop. */ floor_incr = floors_to_pos3[last_querypos]; /* floors->score[last_querypos][query_lastpos+index1interval]; */ floor += floor_incr; @@ -4878,8 +6052,6 @@ printf("total_npositions = %d, nchromosomes = %d\n",total_npositions,nchromosomes); #endif - FREEA(heap); - FREEA(batchpool); /* Note: segments is in descending diagonal order. Will need to reverse before solving middle deletions */ @@ -4977,12 +6149,9 @@ return segments; } -#endif - +#elif defined(USE_LOSER_TREES) -#ifndef USE_HEAP -/* Uses a loser tree */ /* TODO: Change spliceable to be an attribute of the segment. Then we can loop over anchor_segments only */ static struct Segment_T * @@ -5083,6 +6252,7 @@ /* Skip */ } } + if (heapsize == 0) { *nsegments = 0; return (struct Segment_T *) NULL; @@ -14284,9 +15454,15 @@ queryuc_ptr,querylength,query_lastpos); } +#if 1 debug(printf("Omitting frequent/repetitive oligos\n")); omit_oligos(&all_omitted_p,&(*any_omitted_p),this,query_lastpos,indexdb_size_threshold, omit_frequent_p,omit_repetitive_p); +#else + debug(printf("Not omitting frequent/repetitive oligos\n")); + all_omitted_p = false; + *any_omitted_p = false; +#endif if (all_omitted_p == true) { debug(printf("Aborting because all oligos are omitted\n")); @@ -14734,7 +15910,7 @@ static List_T -run_gmap_for_region (bool *good_start_p, bool *good_end_p, History_T gmap_history, +run_gmap_for_region (bool *successp, bool *good_start_p, bool *good_end_p, History_T gmap_history, List_T hits, char *accession, char *queryuc_ptr, int querylength, int sense_try, bool favor_right_p, @@ -14759,10 +15935,11 @@ List_T pairs1, pairs2; struct Pair_T *pairarray1, *pairarray2; Univcoord_T start, end; - double min_splice_prob_1, min_splice_prob_2; + double avg_splice_score_1, avg_splice_score_2; int goodness1, goodness2; int npairs1, npairs2, nsegments, nmismatches_whole, nindels, nintrons, nindelbreaks; int cdna_direction, sensedir; + double avg_splice_score; int matches1, unknowns1, mismatches1, qopens1, qindels1, topens1, tindels1, ncanonical1, nsemicanonical1, nnoncanonical1; int matches2, unknowns2, mismatches2, qopens2, qindels2, topens2, tindels2, @@ -14781,6 +15958,7 @@ (Chrpos_T) (mappingend-chroffset),watsonp,sense_try,querylength, (Chrpos_T) (knownsplice_limit_low-chroffset),(Chrpos_T) (knownsplice_limit_high-chroffset))); + *successp = false; *good_start_p = *good_end_p = false; /* It is possible for mappingend to equal mappingstart if the read @@ -14793,11 +15971,18 @@ debug13(printf("Already ran these coordinates, and have results\n")); for (p = stored_hits; p != NULL; p = List_next(p)) { if ((hit = (Stage3end_T) List_head(p)) != NULL) { - if (Stage3end_trim_left(hit) < GOOD_GMAP_END) { + if (Stage3end_trim_left(hit) < GOOD_GMAP_END && Stage3end_trim_right(hit) < GOOD_GMAP_END && + Stage3end_gmap_goodness(hit) >= querylength + 12) { + *successp = true; *good_start_p = true; - } - if (Stage3end_trim_right(hit) < GOOD_GMAP_END) { *good_end_p = true; + } else { + if (Stage3end_trim_left(hit) < GOOD_GMAP_END) { + *good_start_p = true; + } + if (Stage3end_trim_right(hit) < GOOD_GMAP_END) { + *good_end_p = true; + } } hits = List_push(hits,(void *) Stage3end_copy(hit)); } @@ -14866,7 +16051,7 @@ &ambig_splicetype_5_1,&ambig_splicetype_3_1, &ambig_prob_5_1,&ambig_prob_3_1, &unknowns1,&mismatches1,&qopens1,&qindels1,&topens1,&tindels1, - &ncanonical1,&nsemicanonical1,&nnoncanonical1,&min_splice_prob_1, + &ncanonical1,&nsemicanonical1,&nnoncanonical1,&avg_splice_score_1, &pairarray2,&pairs2,&npairs2,&goodness2, &matches2,&nmatches_posttrim_2,&max_match_length_2, @@ -14874,7 +16059,7 @@ &ambig_splicetype_5_2,&ambig_splicetype_3_2, &ambig_prob_5_2,&ambig_prob_3_2, &unknowns2,&mismatches2,&qopens2,&qindels2,&topens2,&tindels2, - &ncanonical2,&nsemicanonical2,&nnoncanonical2,&min_splice_prob_2, + &ncanonical2,&nsemicanonical2,&nnoncanonical2,&avg_splice_score_2, Stage2_middle(stage2),Stage2_all_starts(stage2),Stage2_all_ends(stage2), #ifdef END_KNOWNSPLICING_SHORTCUT @@ -14896,7 +16081,7 @@ debug13(printf("stage3 is NULL\n")); stored_hits = List_push(stored_hits,(void *) NULL); - } else if (cdna_direction == 0) { + } else if (pairarray2 != NULL) { debug13(printf("stage3 is not NULL, and cdna direction not determined\n")); debug13a(Pair_dump_array(pairarray1,npairs1,true)); @@ -14910,7 +16095,7 @@ if ((hit = Stage3end_new_gmap(nmismatches_whole,nmatches_posttrim_1,max_match_length_1, ambig_end_length_5_1,ambig_end_length_3_1, ambig_splicetype_5_1,ambig_splicetype_3_1, - min_splice_prob_1, + avg_splice_score_1,goodness1, pairarray1,npairs1,nsegments,nintrons,nindelbreaks, /*left*/start,/*genomiclength*/end - start + 1, /*plusp*/watsonp,genestrand, @@ -14922,14 +16107,22 @@ FREE_OUT(pairarray1); } else { - if (Stage3end_trim_left(hit) < GOOD_GMAP_END) { + if (Stage3end_trim_left(hit) < GOOD_GMAP_END && Stage3end_trim_right(hit) < GOOD_GMAP_END && + goodness1 >= querylength + 12) { + *successp = true; *good_start_p = true; - } - if (Stage3end_trim_right(hit) < GOOD_GMAP_END) { *good_end_p = true; + } else { + if (Stage3end_trim_left(hit) < GOOD_GMAP_END) { + *good_start_p = true; + } + if (Stage3end_trim_right(hit) < GOOD_GMAP_END) { + *good_end_p = true; + } } debug13(printf("Trim at start: %d, trim at end: %d\n", Stage3end_trim_left(hit),Stage3end_trim_right(hit))); + debug13(printf("Goodness %d, nmismatches %d\n",goodness1,nmismatches_whole)); stored_hits = List_push(stored_hits,(void *) Stage3end_copy(hit)); hits = List_push(hits,(void *) hit); } @@ -14942,7 +16135,7 @@ if ((hit = Stage3end_new_gmap(nmismatches_whole,nmatches_posttrim_1,max_match_length_1, ambig_end_length_5_1,ambig_end_length_3_1, ambig_splicetype_5_1,ambig_splicetype_3_1, - min_splice_prob_1, + avg_splice_score_1,goodness1, pairarray1,npairs1,nsegments,nintrons,nindelbreaks, /*left*/end,/*genomiclength*/start - end + 1, /*plusp*/watsonp,genestrand, @@ -14954,14 +16147,22 @@ FREE_OUT(pairarray1); } else { - if (Stage3end_trim_right(hit) < GOOD_GMAP_END) { + if (Stage3end_trim_left(hit) < GOOD_GMAP_END && Stage3end_trim_right(hit) < GOOD_GMAP_END && + goodness1 >= querylength + 12) { + *successp = true; *good_start_p = true; - } - if (Stage3end_trim_left(hit) < GOOD_GMAP_END) { *good_end_p = true; + } else { + if (Stage3end_trim_right(hit) < GOOD_GMAP_END) { + *good_start_p = true; + } + if (Stage3end_trim_left(hit) < GOOD_GMAP_END) { + *good_end_p = true; + } } debug13(printf("Trim at start: %d, trim at end: %d\n", Stage3end_trim_right(hit),Stage3end_trim_left(hit))); + debug13(printf("Goodness %d, nmismatches %d\n",goodness1,nmismatches_whole)); /* Don't throw away GMAP hits */ stored_hits = List_push(stored_hits,(void *) Stage3end_copy(hit)); hits = List_push(hits,(void *) hit); @@ -14981,7 +16182,7 @@ if ((hit = Stage3end_new_gmap(nmismatches_whole,nmatches_posttrim_2,max_match_length_2, ambig_end_length_5_2,ambig_end_length_3_2, ambig_splicetype_5_2,ambig_splicetype_3_2, - min_splice_prob_2, + avg_splice_score_2,goodness2, pairarray2,npairs2,nsegments,nintrons,nindelbreaks, /*left*/start,/*genomiclength*/end - start + 1, /*plusp*/watsonp,genestrand, @@ -14993,14 +16194,22 @@ FREE_OUT(pairarray2); } else { - if (Stage3end_trim_left(hit) < GOOD_GMAP_END) { + if (Stage3end_trim_left(hit) < GOOD_GMAP_END && Stage3end_trim_right(hit) < GOOD_GMAP_END && + goodness2 >= querylength + 12) { + *successp = true; *good_start_p = true; - } - if (Stage3end_trim_right(hit) < GOOD_GMAP_END) { *good_end_p = true; + } else { + if (Stage3end_trim_left(hit) < GOOD_GMAP_END) { + *good_start_p = true; + } + if (Stage3end_trim_right(hit) < GOOD_GMAP_END) { + *good_end_p = true; + } } debug13(printf("Trim at start: %d, trim at end: %d\n", Stage3end_trim_left(hit),Stage3end_trim_right(hit))); + debug13(printf("Goodness %d, nmismatches %d\n",goodness2,nmismatches_whole)); stored_hits = List_push(stored_hits,(void *) Stage3end_copy(hit)); hits = List_push(hits,(void *) hit); } @@ -15013,7 +16222,7 @@ if ((hit = Stage3end_new_gmap(nmismatches_whole,nmatches_posttrim_2,max_match_length_2, ambig_end_length_5_2,ambig_end_length_3_2, ambig_splicetype_5_2,ambig_splicetype_3_2, - min_splice_prob_2, + avg_splice_score_2,goodness2, pairarray2,npairs2,nsegments,nintrons,nindelbreaks, /*left*/end,/*genomiclength*/start - end + 1, /*plusp*/watsonp,genestrand, @@ -15025,14 +16234,22 @@ FREE_OUT(pairarray2); } else { - if (Stage3end_trim_right(hit) < GOOD_GMAP_END) { + if (Stage3end_trim_left(hit) < GOOD_GMAP_END && Stage3end_trim_right(hit) < GOOD_GMAP_END && + goodness2 >= querylength + 12) { + *successp = true; *good_start_p = true; - } - if (Stage3end_trim_left(hit) < GOOD_GMAP_END) { *good_end_p = true; + } else { + if (Stage3end_trim_right(hit) < GOOD_GMAP_END) { + *good_start_p = true; + } + if (Stage3end_trim_left(hit) < GOOD_GMAP_END) { + *good_end_p = true; + } } debug13(printf("Trim at start: %d, trim at end: %d\n", Stage3end_trim_right(hit),Stage3end_trim_left(hit))); + debug13(printf("Goodness %d, nmismatches %d\n",goodness2,nmismatches_whole)); /* Don't throw away GMAP hits */ stored_hits = List_push(stored_hits,(void *) Stage3end_copy(hit)); hits = List_push(hits,(void *) hit); @@ -15054,7 +16271,7 @@ if ((hit = Stage3end_new_gmap(nmismatches_whole,nmatches_posttrim_1,max_match_length_1, ambig_end_length_5_1,ambig_end_length_3_1, ambig_splicetype_5_1,ambig_splicetype_3_1, - min_splice_prob_1, + avg_splice_score_1,goodness1, pairarray1,npairs1,nsegments,nintrons,nindelbreaks, /*left*/start,/*genomiclength*/end - start + 1, /*plusp*/watsonp,genestrand, @@ -15066,14 +16283,22 @@ FREE_OUT(pairarray1); } else { - if (Stage3end_trim_left(hit) < GOOD_GMAP_END) { + if (Stage3end_trim_left(hit) < GOOD_GMAP_END && Stage3end_trim_right(hit) < GOOD_GMAP_END && + goodness1 >= querylength + 12) { + *successp = true; *good_start_p = true; - } - if (Stage3end_trim_right(hit) < GOOD_GMAP_END) { *good_end_p = true; + } else { + if (Stage3end_trim_left(hit) < GOOD_GMAP_END) { + *good_start_p = true; + } + if (Stage3end_trim_right(hit) < GOOD_GMAP_END) { + *good_end_p = true; + } } debug13(printf("Trim at start: %d, trim at end: %d\n", Stage3end_trim_left(hit),Stage3end_trim_right(hit))); + debug13(printf("Goodness %d, nmismatches %d\n",goodness1,nmismatches_whole)); stored_hits = List_push(stored_hits,(void *) Stage3end_copy(hit)); hits = List_push(hits,(void *) hit); } @@ -15086,7 +16311,7 @@ if ((hit = Stage3end_new_gmap(nmismatches_whole,nmatches_posttrim_1,max_match_length_1, ambig_end_length_5_1,ambig_end_length_3_1, ambig_splicetype_5_1,ambig_splicetype_3_1, - min_splice_prob_1, + avg_splice_score_1,goodness1, pairarray1,npairs1,nsegments,nintrons,nindelbreaks, /*left*/end,/*genomiclength*/start - end + 1, /*plusp*/watsonp,genestrand, @@ -15098,14 +16323,22 @@ FREE_OUT(pairarray1); } else { - if (Stage3end_trim_right(hit) < GOOD_GMAP_END) { + if (Stage3end_trim_left(hit) < GOOD_GMAP_END && Stage3end_trim_right(hit) < GOOD_GMAP_END && + goodness1 >= querylength + 12) { + *successp = true; *good_start_p = true; - } - if (Stage3end_trim_left(hit) < GOOD_GMAP_END) { *good_end_p = true; + } else { + if (Stage3end_trim_right(hit) < GOOD_GMAP_END) { + *good_start_p = true; + } + if (Stage3end_trim_left(hit) < GOOD_GMAP_END) { + *good_end_p = true; + } } debug13(printf("Trim at start: %d, trim at end: %d\n", Stage3end_trim_right(hit),Stage3end_trim_left(hit))); + debug13(printf("Goodness %d, nmismatches %d\n",goodness1,nmismatches_whole)); /* Don't throw away GMAP hits */ stored_hits = List_push(stored_hits,(void *) Stage3end_copy(hit)); hits = List_push(hits,(void *) hit); @@ -15374,7 +16607,7 @@ /* 4 */ if (close_mappingstart_p == true && close_mappingend_p == true) { debug13(printf("Single hit: Running gmap with close mappingstart and close mappingend\n")); - hits = run_gmap_for_region(&good_start_p,&good_end_p,gmap_history,hits,accession,queryuc_ptr,querylength, + hits = run_gmap_for_region(&successp,&good_start_p,&good_end_p,gmap_history,hits,accession,queryuc_ptr,querylength, /*sense_try*/0,favor_right_p,close_mappingstart_last,close_mappingend_last, close_knownsplice_limit_low,close_knownsplice_limit_high, /*plusp*/true,genestrand,chrnum,chroffset,chrhigh,chrlength, @@ -15388,7 +16621,7 @@ } else if (good_start_p == true) { if (fallback_mappingend_p == true) { debug13(printf("Single hit: Re-running gmap with close mappingstart only\n")); - hits = run_gmap_for_region(&good_start_p,&good_end_p,gmap_history,hits,accession,queryuc_ptr,querylength, + hits = run_gmap_for_region(&successp,&good_start_p,&good_end_p,gmap_history,hits,accession,queryuc_ptr,querylength, /*sense_try*/0,favor_right_p,close_mappingstart_last,mappingend, close_knownsplice_limit_low,knownsplice_limit_high, /*plusp*/true,genestrand,chrnum,chroffset,chrhigh,chrlength, @@ -15398,7 +16631,7 @@ } else if (good_end_p == true) { if (fallback_mappingstart_p == true) { debug13(printf("Single hit: Re-running gmap with close mappingend only\n")); - hits = run_gmap_for_region(&good_start_p,&good_end_p,gmap_history,hits,accession,queryuc_ptr,querylength, + hits = run_gmap_for_region(&successp,&good_start_p,&good_end_p,gmap_history,hits,accession,queryuc_ptr,querylength, /*sense_try*/0,favor_right_p,mappingstart,close_mappingend_last, knownsplice_limit_low,close_knownsplice_limit_high, /*plusp*/true,genestrand,chrnum,chroffset,chrhigh,chrlength, @@ -15408,7 +16641,7 @@ } else { if (fallback_mappingstart_p == true && fallback_mappingend_p == true) { debug13(printf("Single hit: Re-running gmap with far mappingstart and mappingend\n")); - hits = run_gmap_for_region(&good_start_p,&good_end_p,gmap_history,hits,accession,queryuc_ptr,querylength, + hits = run_gmap_for_region(&successp,&good_start_p,&good_end_p,gmap_history,hits,accession,queryuc_ptr,querylength, /*sense_try*/0,favor_right_p,mappingstart,mappingend, knownsplice_limit_low,close_knownsplice_limit_high, /*plusp*/true,genestrand,chrnum,chroffset,chrhigh,chrlength, @@ -15419,7 +16652,7 @@ } else if (close_mappingstart_p == true) { debug13(printf("Single hit: Running gmap with close mappingstart\n")); - hits = run_gmap_for_region(&good_start_p,&good_end_p,gmap_history,hits,accession,queryuc_ptr,querylength, + hits = run_gmap_for_region(&successp,&good_start_p,&good_end_p,gmap_history,hits,accession,queryuc_ptr,querylength, /*sense_try*/0,favor_right_p,close_mappingstart_last,mappingend, close_knownsplice_limit_low,knownsplice_limit_high, /*plusp*/true,genestrand,chrnum,chroffset,chrhigh,chrlength, @@ -15431,7 +16664,7 @@ debug13(printf("Skipping re-run of gmap\n")); } else if (fallback_mappingstart_p == true) { debug13(printf("Single hit: Re-running gmap with far mappingstart\n")); - hits = run_gmap_for_region(&good_start_p,&good_end_p,gmap_history,hits,accession,queryuc_ptr,querylength, + hits = run_gmap_for_region(&successp,&good_start_p,&good_end_p,gmap_history,hits,accession,queryuc_ptr,querylength, /*sense_try*/0,favor_right_p,mappingstart,mappingend, knownsplice_limit_low,knownsplice_limit_high, /*plusp*/true,genestrand,chrnum,chroffset,chrhigh,chrlength, @@ -15441,7 +16674,7 @@ } else if (close_mappingend_p == true) { debug13(printf("Single hit: Running gmap with close mappingend\n")); - hits = run_gmap_for_region(&good_start_p,&good_end_p,gmap_history,hits,accession,queryuc_ptr,querylength, + hits = run_gmap_for_region(&successp,&good_start_p,&good_end_p,gmap_history,hits,accession,queryuc_ptr,querylength, /*sense_try*/0,favor_right_p,mappingstart,close_mappingend_last, knownsplice_limit_low,close_knownsplice_limit_high, /*plusp*/true,genestrand,chrnum,chroffset,chrhigh,chrlength, @@ -15453,7 +16686,7 @@ debug13(printf("Skipping re-run of gmap\n")); } else if (fallback_mappingend_p == true) { debug13(printf("Single hit: Re-running gmap with far mappingend\n")); - hits = run_gmap_for_region(&good_start_p,&good_end_p,gmap_history,hits,accession,queryuc_ptr,querylength, + hits = run_gmap_for_region(&successp,&good_start_p,&good_end_p,gmap_history,hits,accession,queryuc_ptr,querylength, /*sense_try*/0,favor_right_p,mappingstart,mappingend, knownsplice_limit_low,knownsplice_limit_high, /*plusp*/true,genestrand,chrnum,chroffset,chrhigh,chrlength, @@ -15463,7 +16696,7 @@ } else { debug13(printf("Single hit: Running gmap with far mappingstart and mappingend\n")); - hits = run_gmap_for_region(&good_start_p,&good_end_p,gmap_history,hits,accession,queryuc_ptr,querylength, + hits = run_gmap_for_region(&successp,&good_start_p,&good_end_p,gmap_history,hits,accession,queryuc_ptr,querylength, /*sense_try*/0,favor_right_p,mappingstart,mappingend, knownsplice_limit_low,knownsplice_limit_high, /*plusp*/true,genestrand,chrnum,chroffset,chrhigh,chrlength, @@ -15673,7 +16906,7 @@ /* 4 */ if (close_mappingstart_p == true && close_mappingend_p == true) { debug13(printf("Single hit: Running gmap with close mappingstart and close mappingend\n")); - hits = run_gmap_for_region(&good_start_p,&good_end_p,gmap_history,hits,accession,queryuc_ptr,querylength, + hits = run_gmap_for_region(&successp,&good_start_p,&good_end_p,gmap_history,hits,accession,queryuc_ptr,querylength, /*sense_try*/0,favor_right_p,query_compress_fwd,query_compress_rev, close_mappingstart_last,close_mappingend_last, close_knownsplice_limit_low,close_knownsplice_limit_high, @@ -15688,7 +16921,7 @@ } else if (good_start_p == true) { if (fallback_mappingend_p == true) { debug13(printf("Single hit: Re-running gmap with close mappingstart only\n")); - hits = run_gmap_for_region(&good_start_p,&good_end_p,gmap_history,hits,accession,queryuc_ptr,querylength, + hits = run_gmap_for_region(&successp,&good_start_p,&good_end_p,gmap_history,hits,accession,queryuc_ptr,querylength, /*sense_try*/0,favor_right_p, query_compress_fwd,query_compress_rev,close_mappingstart_last,mappingend, close_knownsplice_limit_low,knownsplice_limit_high, @@ -15699,7 +16932,7 @@ } else if (good_end_p == true) { if (fallback_mappingstart_p == true) { debug13(printf("Single hit: Re-running gmap with close mappingend only\n")); - hits = run_gmap_for_region(&good_start_p,&good_end_p,gmap_history,hits,accession,queryuc_ptr,querylength, + hits = run_gmap_for_region(&successp,&good_start_p,&good_end_p,gmap_history,hits,accession,queryuc_ptr,querylength, /*sense_try*/0,favor_right_p, query_compress_fwd,query_compress_rev,mappingstart,close_mappingend_last, knownsplice_limit_low,close_knownsplice_limit_high, @@ -15710,7 +16943,7 @@ } else { if (fallback_mappingstart_p == true && fallback_mappingend_p == true) { debug13(printf("Single hit: Re-running gmap with far mappingstart and mappingend\n")); - hits = run_gmap_for_region(&good_start_p,&good_end_p,gmap_history,hits,accession,queryuc_ptr,querylength, + hits = run_gmap_for_region(&successp,&good_start_p,&good_end_p,gmap_history,hits,accession,queryuc_ptr,querylength, /*sense_try*/0,favor_right_p, query_compress_fwd,query_compress_rev,mappingstart,mappingend, knownsplice_limit_low,close_knownsplice_limit_high, @@ -15722,7 +16955,7 @@ } else if (close_mappingstart_p == true) { debug13(printf("Single hit: Running gmap with close mappingstart\n")); - hits = run_gmap_for_region(&good_start_p,&good_end_p,gmap_history,hits,accession,queryuc_ptr,querylength, + hits = run_gmap_for_region(&successp,&good_start_p,&good_end_p,gmap_history,hits,accession,queryuc_ptr,querylength, /*sense_try*/0,favor_right_p, query_compress_fwd,query_compress_rev,close_mappingstart_last,mappingend, close_knownsplice_limit_low,knownsplice_limit_high, @@ -15735,7 +16968,7 @@ debug13(printf("Skipping re-run of gmap\n")); } else if (fallback_mappingstart_p == true) { debug13(printf("Single hit: Re-running gmap with far mappingstart\n")); - hits = run_gmap_for_region(&good_start_p,&good_end_p,gmap_history,hits,accession,queryuc_ptr,querylength, + hits = run_gmap_for_region(&successp,&good_start_p,&good_end_p,gmap_history,hits,accession,queryuc_ptr,querylength, /*sense_try*/0,favor_right_p, query_compress_fwd,query_compress_rev,mappingstart,mappingend, knownsplice_limit_low,knownsplice_limit_high, @@ -15746,7 +16979,7 @@ } else if (close_mappingend_p == true) { debug13(printf("Single hit: Running gmap with close mappingend\n")); - hits = run_gmap_for_region(&good_start_p,&good_end_p,gmap_history,hits,accession,queryuc_ptr,querylength, + hits = run_gmap_for_region(&successp,&good_start_p,&good_end_p,gmap_history,hits,accession,queryuc_ptr,querylength, /*sense_try*/0,favor_right_p, query_compress_fwd,query_compress_rev,mappingstart,close_mappingend_last, knownsplice_limit_low,close_knownsplice_limit_high, @@ -15759,7 +16992,7 @@ debug13(printf("Skipping re-run of gmap\n")); } else if (fallback_mappingend_p == true) { debug13(printf("Single hit: Re-running gmap with far mappingend\n")); - hits = run_gmap_for_region(&good_start_p,&good_end_p,gmap_history,hits,accession,queryuc_ptr,querylength, + hits = run_gmap_for_region(&successp,&good_start_p,&good_end_p,gmap_history,hits,accession,queryuc_ptr,querylength, /*sense_try*/0,favor_right_p, query_compress_fwd,query_compress_rev,mappingstart,mappingend, knownsplice_limit_low,knownsplice_limit_high, @@ -15770,7 +17003,7 @@ } else { debug13(printf("Single hit: Running gmap with far mappingstart and mappingend\n")); - hits = run_gmap_for_region(&good_start_p,&good_end_p,gmap_history,hits,accession,queryuc_ptr,querylength, + hits = run_gmap_for_region(&successp,&good_start_p,&good_end_p,gmap_history,hits,accession,queryuc_ptr,querylength, /*sense_try*/0,favor_right_p, query_compress_fwd,query_compress_rev,mappingstart,mappingend, knownsplice_limit_low,knownsplice_limit_high, @@ -15837,8 +17070,8 @@ max_match_length_2, ambig_end_length_5_2, ambig_end_length_3_2, unknowns2, mismatches2, qopens2, qindels2, topens2, tindels2, ncanonical2, nsemicanonical2, nnoncanonical2; - double ambig_prob_5_1, ambig_prob_3_1, min_splice_prob_1; - double ambig_prob_5_2, ambig_prob_3_2, min_splice_prob_2; + double ambig_prob_5_1, ambig_prob_3_1, avg_splice_score_1; + double ambig_prob_5_2, ambig_prob_3_2, avg_splice_score_2; Splicetype_T ambig_splicetype_5_1, ambig_splicetype_3_1; Splicetype_T ambig_splicetype_5_2, ambig_splicetype_3_2; Univcoord_T start, end, left; @@ -16383,7 +17616,7 @@ &ambig_splicetype_5_1,&ambig_splicetype_3_1, &ambig_prob_5_1,&ambig_prob_3_1, &unknowns1,&mismatches1,&qopens1,&qindels1,&topens1,&tindels1, - &ncanonical1,&nsemicanonical1,&nnoncanonical1,&min_splice_prob_1, + &ncanonical1,&nsemicanonical1,&nnoncanonical1,&avg_splice_score_1, &pairarray2,&pairs2,&npairs2,&goodness2, &matches2,&nmatches_posttrim_2,&max_match_length_2, @@ -16391,7 +17624,7 @@ &ambig_splicetype_5_2,&ambig_splicetype_3_2, &ambig_prob_5_2,&ambig_prob_3_2, &unknowns2,&mismatches2,&qopens2,&qindels2,&topens2,&tindels2, - &ncanonical2,&nsemicanonical2,&nnoncanonical2,&min_splice_prob_2, + &ncanonical2,&nsemicanonical2,&nnoncanonical2,&avg_splice_score_2, stage2pairs,/*all_stage2_starts*/NULL,/*all_stage2_ends*/NULL, #ifdef END_KNOWNSPLICING_SHORTCUT @@ -16411,7 +17644,7 @@ oligoindices_minor,diagpool,cellpool)) == NULL) { /* hit = (T) NULL; */ - } else if (cdna_direction == 0) { + } else if (pairarray2 != NULL) { nsegments = Pair_gsnap_nsegments(&nmismatches_whole,&nindels,&nintrons,&nindelbreaks, pairarray1,npairs1); start = subtract_bounded(chroffset + Pair_genomepos(&(pairarray1[0])), @@ -16422,7 +17655,7 @@ if ((hit = Stage3end_new_gmap(nmismatches_whole,nmatches_posttrim_1,max_match_length_1, ambig_end_length_5_1,ambig_end_length_3_1, ambig_splicetype_5_1,ambig_splicetype_3_1, - min_splice_prob_1, + avg_splice_score_1,goodness1, pairarray1,npairs1,nsegments,nintrons,nindelbreaks, /*left*/start,/*genomiclength*/end - start + 1, /*plusp*/true,genestrand, @@ -16445,7 +17678,7 @@ if ((hit = Stage3end_new_gmap(nmismatches_whole,nmatches_posttrim_2,max_match_length_2, ambig_end_length_5_2,ambig_end_length_3_2, ambig_splicetype_5_2,ambig_splicetype_3_2, - min_splice_prob_2, + avg_splice_score_2,goodness2, pairarray2,npairs2,nsegments,nintrons,nindelbreaks, /*left*/start,/*genomiclength*/end - start + 1, /*plusp*/true,genestrand, @@ -16469,7 +17702,7 @@ if ((hit = Stage3end_new_gmap(nmismatches_whole,nmatches_posttrim_1,max_match_length_1, ambig_end_length_5_1,ambig_end_length_3_1, ambig_splicetype_5_1,ambig_splicetype_3_1, - min_splice_prob_1, + avg_splice_score_1,goodness1, pairarray1,npairs1,nsegments,nintrons,nindelbreaks, /*left*/start,/*genomiclength*/end - start + 1, /*plusp*/true,genestrand, @@ -16562,8 +17795,8 @@ max_match_length_2, ambig_end_length_5_2, ambig_end_length_3_2, unknowns2, mismatches2, qopens2, qindels2, topens2, tindels2, ncanonical2, nsemicanonical2, nnoncanonical2; - double ambig_prob_5_1, ambig_prob_3_1, min_splice_prob_1; - double ambig_prob_5_2, ambig_prob_3_2, min_splice_prob_2; + double ambig_prob_5_1, ambig_prob_3_1, avg_splice_score_1; + double ambig_prob_5_2, ambig_prob_3_2, avg_splice_score_2; Splicetype_T ambig_splicetype_5_1, ambig_splicetype_3_1; Splicetype_T ambig_splicetype_5_2, ambig_splicetype_3_2; Univcoord_T start, end, left; @@ -17106,7 +18339,7 @@ &ambig_splicetype_5_1,&ambig_splicetype_3_1, &ambig_prob_5_1,&ambig_prob_3_1, &unknowns1,&mismatches1,&qopens1,&qindels1,&topens1,&tindels1, - &ncanonical1,&nsemicanonical1,&nnoncanonical1,&min_splice_prob_1, + &ncanonical1,&nsemicanonical1,&nnoncanonical1,&avg_splice_score_1, &pairarray2,&pairs2,&npairs2,&goodness2, &matches2,&nmatches_posttrim_2,&max_match_length_2, @@ -17114,7 +18347,7 @@ &ambig_splicetype_5_2,&ambig_splicetype_3_2, &ambig_prob_5_2,&ambig_prob_3_2, &unknowns2,&mismatches2,&qopens2,&qindels2,&topens2,&tindels2, - &ncanonical2,&nsemicanonical2,&nnoncanonical2,&min_splice_prob_2, + &ncanonical2,&nsemicanonical2,&nnoncanonical2,&avg_splice_score_2, stage2pairs,/*all_stage2_starts*/NULL,/*all_stage2_ends*/NULL, #ifdef END_KNOWNSPLICING_SHORTCUT @@ -17134,7 +18367,7 @@ oligoindices_minor,diagpool,cellpool)) == NULL) { /* hit = (T) NULL; */ - } else if (cdna_direction == 0) { + } else if (pairarray2 != NULL) { nsegments = Pair_gsnap_nsegments(&nmismatches_whole,&nindels,&nintrons,&nindelbreaks, pairarray1,npairs1); start = add_bounded(chroffset + Pair_genomepos(&(pairarray1[0])), @@ -17145,7 +18378,7 @@ if ((hit = Stage3end_new_gmap(nmismatches_whole,nmatches_posttrim_1,max_match_length_1, ambig_end_length_5_1,ambig_end_length_3_1, ambig_splicetype_5_1,ambig_splicetype_3_1, - min_splice_prob_1, + avg_splice_score_1,goodness1, pairarray1,npairs1,nsegments,nintrons,nindelbreaks, /*left*/end,/*genomiclength*/start - end + 1, /*plusp*/false,genestrand, @@ -17167,7 +18400,7 @@ if ((hit = Stage3end_new_gmap(nmismatches_whole,nmatches_posttrim_2,max_match_length_2, ambig_end_length_5_2,ambig_end_length_3_2, ambig_splicetype_5_2,ambig_splicetype_3_2, - min_splice_prob_2, + avg_splice_score_2,goodness2, pairarray2,npairs2,nsegments,nintrons,nindelbreaks, /*left*/end,/*genomiclength*/start - end + 1, /*plusp*/false,genestrand, @@ -17190,7 +18423,7 @@ if ((hit = Stage3end_new_gmap(nmismatches_whole,nmatches_posttrim_1,max_match_length_1, ambig_end_length_5_1,ambig_end_length_3_1, ambig_splicetype_5_1,ambig_splicetype_3_1, - min_splice_prob_1, + avg_splice_score_1,goodness1, pairarray1,npairs1,nsegments,nintrons,nindelbreaks, /*left*/end,/*genomiclength*/start - end + 1, /*plusp*/false,genestrand, @@ -17364,7 +18597,7 @@ bool allow_end_indels_p, int max_end_insertions, int max_end_deletions, int min_indel_end_matches, bool keep_floors_p, int genestrand, bool first_read_p) { List_T hits, greedy = NULL, subs = NULL, terminals = NULL, indels = NULL, - singlesplicing = NULL, doublesplicing = NULL, shortendsplicing = NULL, + singlesplicing = NULL, shortendsplicing = NULL, longsinglesplicing = NULL, distantsplicing = NULL, gmap_hits = NULL, q; Segment_T *plus_anchor_segments = NULL, *minus_anchor_segments = NULL; int n_plus_anchors = 0, n_minus_anchors = 0; @@ -17801,7 +19034,9 @@ debug(printf(" subs: %d\n",List_length(subs))); debug(printf(" indels: %d\n",List_length(indels))); debug(printf(" singlesplicing %d\n",List_length(singlesplicing))); +#ifdef PERFORM_DOUBLESPLICING debug(printf(" doublesplicing %d\n",List_length(doublesplicing))); +#endif debug(printf(" shortendsplicing: %d\n",List_length(shortendsplicing))); debug(printf(" done_level: %d\n",done_level)); @@ -18071,9 +19306,12 @@ FREEA(antiacceptors_minus); } - debug(printf("%d single splices, %d double splices, %d long single splices, %d distant splices\n", - List_length(singlesplicing),List_length(doublesplicing), - List_length(longsinglesplicing),List_length(distantsplicing))); + debug(printf("%d single splices, %d long single splices, %d distant splices", + List_length(singlesplicing),List_length(longsinglesplicing),List_length(distantsplicing))); +#ifdef PERFORM_DOUBLESPLICING + debug(printf(", %d double splices\n",List_length(doublesplicing))); +#endif + debug(printf("\n")); } @@ -18382,144 +19620,333 @@ min_coverage = (int) user_mincoverage_float; } - this_geneplus = Stage1_new(querylength); - this_geneminus = Stage1_new(querylength); + this_geneplus = Stage1_new(querylength); + this_geneminus = Stage1_new(querylength); + + queryuc_ptr = Shortread_fullpointer_uc(queryseq); + quality_string = Shortread_quality_string(queryseq); + query_lastpos = querylength - index1part; + + /* Limit search on repetitive sequences */ + if (check_dinucleotides(queryuc_ptr,querylength) == false) { + user_maxlevel = 0; + } + + query_compress_fwd = Compress_new_fwd(queryuc_ptr,querylength); + query_compress_rev = Compress_new_rev(queryuc_ptr,querylength); + gmap_history = History_new(); + make_complement_buffered(queryrc,queryuc_ptr,querylength); + + if (read_oligos(&allvalidp,this_geneplus,queryuc_ptr,querylength,query_lastpos,/*genestrand*/+1) > 0) { + hits_geneplus = align_end(&cutoff_level,this_geneplus,query_compress_fwd,query_compress_rev, + queryuc_ptr,queryrc,querylength,query_lastpos, + indexdb_fwd,indexdb_rev,indexdb_size_threshold,floors_array, + oligoindices_minor,pairpool,diagpool,cellpool,dynprogL,dynprogM,dynprogR, + user_maxlevel,indel_penalty_middle,indel_penalty_end, + localsplicing_penalty,distantsplicing_penalty,min_shortend, + allow_end_indels_p,max_end_insertions,max_end_deletions,min_indel_end_matches, + keep_floors_p,/*genestrand*/+1,/*first_read_p*/true); + } + + if (read_oligos(&allvalidp,this_geneminus,queryuc_ptr,querylength,query_lastpos,/*genestrand*/+2) > 0) { + hits_geneminus = align_end(&cutoff_level,this_geneminus,query_compress_fwd,query_compress_rev, + queryuc_ptr,queryrc,querylength,query_lastpos, + indexdb_fwd,indexdb_rev,indexdb_size_threshold,floors_array, + oligoindices_minor,pairpool,diagpool,cellpool,dynprogL,dynprogM,dynprogR, + user_maxlevel,indel_penalty_middle,indel_penalty_end, + localsplicing_penalty,distantsplicing_penalty,min_shortend, + allow_end_indels_p,max_end_insertions,max_end_deletions,min_indel_end_matches, + keep_floors_p,/*genestrand*/+2,/*first_read_p*/true); + } + + hits = List_append(hits_geneplus,hits_geneminus); + hits = Stage3end_optimal_score(hits,query_compress_fwd,query_compress_rev, + querylength,/*keep_gmap_p*/true,/*finalp*/true); + /* hits = Stage3end_reject_trimlengths(hits); */ + hits = Stage3end_remove_overlaps(hits,/*finalp*/true); + hits = Stage3end_optimal_score(hits,query_compress_fwd,query_compress_rev, + querylength,/*keep_gmap_p*/false,/*finalp*/true); + hits = Stage3end_resolve_multimapping(hits); + + hits = Stage3end_filter_coverage(hits,min_coverage); + if (hits == NULL) { + *npaths_primary = *npaths_altloc = 0; + stage3array = (Stage3end_T *) NULL; + } else { + Stage3end_count_hits(&(*npaths_primary),&(*npaths_altloc),hits); + stage3array = (Stage3end_T *) List_to_array_out(hits,NULL); List_free(&hits); /* Return value */ + stage3array = Stage3end_eval_and_sort(/*npaths*/(*npaths_primary) + (*npaths_altloc), + &(*first_absmq),&(*second_absmq), + stage3array,maxpaths_search,queryseq,queryuc_ptr, + query_compress_fwd,query_compress_rev, + quality_string,/*displayp*/true); + } + + History_free(&gmap_history); + Compress_free(&query_compress_fwd); + Compress_free(&query_compress_rev); + Stage1_free(&this_geneminus,querylength); + Stage1_free(&this_geneplus,querylength); + +#ifdef HAVE_ALLOCA + if (querylength <= MAX_STACK_READLENGTH) { + FREEA(queryrc); + } else { + FREE(queryrc); + } +#else + FREE(queryrc); +#endif + + return stage3array; +} + + +Stage3end_T * +Stage1_single_read (int *npaths_primary, int *npaths_altloc, int *first_absmq, int *second_absmq, + Shortread_T queryseq, Indexdb_T indexdb_fwd, Indexdb_T indexdb_rev, + int indexdb_size_threshold, Floors_T *floors_array, + double user_maxlevel_float, double user_mincoverage_float, + int indel_penalty_middle, int indel_penalty_end, + bool allow_end_indels_p, int max_end_insertions, int max_end_deletions, int min_indel_end_matches, + int localsplicing_penalty, int distantsplicing_penalty, int min_shortend, + Oligoindex_array_T oligoindices_minor, + Pairpool_T pairpool, Diagpool_T diagpool, Cellpool_T cellpool, + Dynprog_T dynprogL, Dynprog_T dynprogM, Dynprog_T dynprogR, + bool keep_floors_p) { + + if (mode == STANDARD || mode == CMET_STRANDED || mode == ATOI_STRANDED || mode == TTOC_STRANDED) { + return single_read(&(*npaths_primary),&(*npaths_altloc),&(*first_absmq),&(*second_absmq), + queryseq,indexdb_fwd,indexdb_rev,indexdb_size_threshold, + floors_array,user_maxlevel_float,user_mincoverage_float, + indel_penalty_middle,indel_penalty_end, + allow_end_indels_p,max_end_insertions,max_end_deletions,min_indel_end_matches, + localsplicing_penalty,distantsplicing_penalty,min_shortend, + oligoindices_minor,pairpool,diagpool,cellpool,dynprogL,dynprogM,dynprogR,keep_floors_p); + } else if (mode == CMET_NONSTRANDED || mode == ATOI_NONSTRANDED || mode == TTOC_NONSTRANDED) { + return single_read_tolerant_nonstranded(&(*npaths_primary),&(*npaths_altloc),&(*first_absmq),&(*second_absmq),queryseq, + indexdb_fwd,indexdb_rev,indexdb_size_threshold, + floors_array,user_maxlevel_float,user_mincoverage_float, + indel_penalty_middle,indel_penalty_end, + allow_end_indels_p,max_end_insertions,max_end_deletions,min_indel_end_matches, + localsplicing_penalty,distantsplicing_penalty,min_shortend, + oligoindices_minor, + pairpool,diagpool,cellpool,dynprogL,dynprogM,dynprogR,keep_floors_p); + } else { + fprintf(stderr,"Do not recognize mode %d\n",mode); + abort(); + } +} + + + +/* #define HITARRAY_SHORTENDSPLICING 4 */ +/* #define HITARRAY_DISTANTSPLICING 4 */ + + +/* Picks a region nearby */ +static List_T +align_halfmapping_with_gmap_close (bool *successp, List_T hits, History_T gmap_history, Stage3end_T hit5, Stage3end_T hit3, + Shortread_T queryseq5, Shortread_T queryseq3, + char *queryuc_ptr, int querylength, int query_lastpos, +#ifdef END_KNOWNSPLICING_SHORTCUT + char *queryrc, bool invertedp, +#endif + Oligoindex_array_T oligoindices_major, Oligoindex_array_T oligoindices_minor, + Pairpool_T pairpool, Diagpool_T diagpool, Cellpool_T cellpool, + Dynprog_T dynprogL, Dynprog_T dynprogM, Dynprog_T dynprogR, + Chrpos_T pairmax, Chrpos_T shortsplicedist, int genestrand) { + int sensedir, sense_try; + + /* int zero_offset = 0; */ + Univcoord_T genomicbound, mappingstart, mappingend, + chroffset, chrhigh; + Chrpos_T chrlength; + Chrnum_T chrnum; + bool good_start_p, good_end_p, watsonp, favor_right_p; + + *successp = false; + debug(printf("Trying halfmapping close\n")); + + if (hit3 == NULL) { + /* Both events are tested by Stage3end_anomalous_splice_p */ + if ((chrnum = Stage3end_chrnum(hit5)) == 0) { + /* Translocation */ + return (List_T) NULL; + + } else if (Stage3end_hittype(hit5) == SAMECHR_SPLICE) { + /* A genomic event that doesn't get reflected in chrnum */ + return (List_T) NULL; + + } else if ((watsonp = Stage3end_plusp(hit5)) == true) { + chroffset = Stage3end_chroffset(hit5); + chrhigh = Stage3end_chrhigh(hit5); + chrlength = Stage3end_chrlength(hit5); + + if (Shortread_find_primers(queryseq5,queryseq3) == true) { + /* Go from genomicstart */ + debug13(printf("Found primers\n")); + genomicbound = Stage3end_genomicstart(hit5); + + } else if (Stage3end_anomalous_splice_p(hit5) == true) { + /* Go from genomicstart */ + debug13(printf("Anomalous splice\n")); + genomicbound = Stage3end_genomicstart(hit5); + + } else { + genomicbound = Stage3end_genomicend(hit5); + } + + debug13(printf("Case 1: hit5 plus %s %u..%u (sensedir %d) => genomicbound %u\n", + Stage3end_hittype_string(hit5), + Stage3end_genomicstart(hit5) - chroffset,Stage3end_genomicend(hit5) - chroffset, + Stage3end_sensedir(hit5),genomicbound - chroffset)); + + mappingstart = genomicbound; + mappingend = add_bounded(Stage3end_genomicend(hit5),expected_pairlength + pairlength_deviation + querylength,chrhigh); + favor_right_p = false; + + } else { + chroffset = Stage3end_chroffset(hit5); + chrhigh = Stage3end_chrhigh(hit5); + chrlength = Stage3end_chrlength(hit5); + + if (Shortread_find_primers(queryseq5,queryseq3) == true) { + /* Go from genomicstart */ + debug13(printf("Found primers\n")); + genomicbound = Stage3end_genomicstart(hit5); + + } else if (Stage3end_anomalous_splice_p(hit5) == true) { + /* Go from genomicstart */ + debug13(printf("Anomalous splice\n")); + genomicbound = Stage3end_genomicstart(hit5); + + } else { + genomicbound = Stage3end_genomicend(hit5); + } + + debug13(printf("Case 2: hit5 minus %s %u..%u (sensedir %d) => genomicbound %u\n", + Stage3end_hittype_string(hit5), + Stage3end_genomicstart(hit5) - chroffset,Stage3end_genomicend(hit5) - chroffset, + Stage3end_sensedir(hit5),genomicbound - chroffset)); + + mappingend = genomicbound; + mappingstart = subtract_bounded(Stage3end_genomicend(hit5),expected_pairlength + pairlength_deviation + querylength,chroffset); + favor_right_p = false; + } + + if ((sensedir = Stage3end_sensedir(hit5)) == SENSE_FORWARD) { + sense_try = +1; + } else if (sensedir == SENSE_ANTI) { + sense_try = -1; + } else { + sense_try = 0; + } + + } else if (hit5 == NULL) { + /* Both events are tested by Stage3end_anomalous_splice_p */ + if ((chrnum = Stage3end_chrnum(hit3)) == 0) { + /* Translocation */ + return (List_T) NULL; + + } else if (Stage3end_hittype(hit3) == SAMECHR_SPLICE) { + /* A genomic event that doesn't get reflected in chrnum */ + return (List_T) NULL; + + } else if ((watsonp = Stage3end_plusp(hit3)) == true) { + chroffset = Stage3end_chroffset(hit3); + chrhigh = Stage3end_chrhigh(hit3); + chrlength = Stage3end_chrlength(hit3); - queryuc_ptr = Shortread_fullpointer_uc(queryseq); - quality_string = Shortread_quality_string(queryseq); - query_lastpos = querylength - index1part; + if (Shortread_find_primers(queryseq5,queryseq3) == true) { + /* Go from genomicend */ + debug13(printf("Found primers\n")); + genomicbound = Stage3end_genomicend(hit3); - /* Limit search on repetitive sequences */ - if (check_dinucleotides(queryuc_ptr,querylength) == false) { - user_maxlevel = 0; - } + } else if (Stage3end_anomalous_splice_p(hit3) == true) { + /* Go from genomicend */ + debug13(printf("Anomalous splice\n")); + genomicbound = Stage3end_genomicend(hit3); - query_compress_fwd = Compress_new_fwd(queryuc_ptr,querylength); - query_compress_rev = Compress_new_rev(queryuc_ptr,querylength); - gmap_history = History_new(); - make_complement_buffered(queryrc,queryuc_ptr,querylength); + } else { + genomicbound = Stage3end_genomicstart(hit3); + } - if (read_oligos(&allvalidp,this_geneplus,queryuc_ptr,querylength,query_lastpos,/*genestrand*/+1) > 0) { - hits_geneplus = align_end(&cutoff_level,this_geneplus,query_compress_fwd,query_compress_rev, - queryuc_ptr,queryrc,querylength,query_lastpos, - indexdb_fwd,indexdb_rev,indexdb_size_threshold,floors_array, - oligoindices_minor,pairpool,diagpool,cellpool,dynprogL,dynprogM,dynprogR, - user_maxlevel,indel_penalty_middle,indel_penalty_end, - localsplicing_penalty,distantsplicing_penalty,min_shortend, - allow_end_indels_p,max_end_insertions,max_end_deletions,min_indel_end_matches, - keep_floors_p,/*genestrand*/+1,/*first_read_p*/true); - } + debug13(printf("Case 3: hit3 plus %s %u..%u (sensedir %d) => genomicbound %u\n", + Stage3end_hittype_string(hit3), + Stage3end_genomicstart(hit3) - chroffset,Stage3end_genomicend(hit3) - chroffset, + Stage3end_sensedir(hit3),genomicbound - chroffset)); - if (read_oligos(&allvalidp,this_geneminus,queryuc_ptr,querylength,query_lastpos,/*genestrand*/+2) > 0) { - hits_geneminus = align_end(&cutoff_level,this_geneminus,query_compress_fwd,query_compress_rev, - queryuc_ptr,queryrc,querylength,query_lastpos, - indexdb_fwd,indexdb_rev,indexdb_size_threshold,floors_array, - oligoindices_minor,pairpool,diagpool,cellpool,dynprogL,dynprogM,dynprogR, - user_maxlevel,indel_penalty_middle,indel_penalty_end, - localsplicing_penalty,distantsplicing_penalty,min_shortend, - allow_end_indels_p,max_end_insertions,max_end_deletions,min_indel_end_matches, - keep_floors_p,/*genestrand*/+2,/*first_read_p*/true); - } + mappingend = genomicbound; + mappingstart = subtract_bounded(Stage3end_genomicstart(hit3),expected_pairlength + pairlength_deviation + querylength,chroffset); + favor_right_p = true; - hits = List_append(hits_geneplus,hits_geneminus); - hits = Stage3end_optimal_score(hits,query_compress_fwd,query_compress_rev, - querylength,/*keep_gmap_p*/true,/*finalp*/true); - /* hits = Stage3end_reject_trimlengths(hits); */ - hits = Stage3end_remove_overlaps(hits,/*finalp*/true); - hits = Stage3end_optimal_score(hits,query_compress_fwd,query_compress_rev, - querylength,/*keep_gmap_p*/false,/*finalp*/true); - hits = Stage3end_resolve_multimapping(hits); + } else { + chroffset = Stage3end_chroffset(hit3); + chrhigh = Stage3end_chrhigh(hit3); + chrlength = Stage3end_chrlength(hit3); - hits = Stage3end_filter_coverage(hits,min_coverage); - if (hits == NULL) { - *npaths_primary = *npaths_altloc = 0; - stage3array = (Stage3end_T *) NULL; - } else { - Stage3end_count_hits(&(*npaths_primary),&(*npaths_altloc),hits); - stage3array = (Stage3end_T *) List_to_array_out(hits,NULL); List_free(&hits); /* Return value */ - stage3array = Stage3end_eval_and_sort(/*npaths*/(*npaths_primary) + (*npaths_altloc), - &(*first_absmq),&(*second_absmq), - stage3array,maxpaths_search,queryseq,queryuc_ptr, - query_compress_fwd,query_compress_rev, - quality_string,/*displayp*/true); - } + if (Shortread_find_primers(queryseq5,queryseq3) == true) { + /* Go from genomicend */ + debug13(printf("Found primers\n")); + genomicbound = Stage3end_genomicend(hit3); - History_free(&gmap_history); - Compress_free(&query_compress_fwd); - Compress_free(&query_compress_rev); - Stage1_free(&this_geneminus,querylength); - Stage1_free(&this_geneplus,querylength); + } else if (Stage3end_anomalous_splice_p(hit3) == true) { + /* Go from genomicend */ + debug13(printf("Anomalous splice\n")); + genomicbound = Stage3end_genomicend(hit3); -#ifdef HAVE_ALLOCA - if (querylength <= MAX_STACK_READLENGTH) { - FREEA(queryrc); - } else { - FREE(queryrc); - } -#else - FREE(queryrc); -#endif + } else { + genomicbound = Stage3end_genomicstart(hit3); + } - return stage3array; -} + debug13(printf("Case 4: hit3 minus %s %u..%u (sensedir %d) => genomicbound %u\n", + Stage3end_hittype_string(hit3), + Stage3end_genomicstart(hit3) - chroffset,Stage3end_genomicend(hit3) - chroffset, + Stage3end_sensedir(hit3),genomicbound - chroffset)); + mappingstart = genomicbound; + mappingend = add_bounded(Stage3end_genomicstart(hit3),expected_pairlength + pairlength_deviation + querylength,chrhigh); + favor_right_p = true; + } -Stage3end_T * -Stage1_single_read (int *npaths_primary, int *npaths_altloc, int *first_absmq, int *second_absmq, - Shortread_T queryseq, Indexdb_T indexdb_fwd, Indexdb_T indexdb_rev, - int indexdb_size_threshold, Floors_T *floors_array, - double user_maxlevel_float, double user_mincoverage_float, - int indel_penalty_middle, int indel_penalty_end, - bool allow_end_indels_p, int max_end_insertions, int max_end_deletions, int min_indel_end_matches, - int localsplicing_penalty, int distantsplicing_penalty, int min_shortend, - Oligoindex_array_T oligoindices_minor, - Pairpool_T pairpool, Diagpool_T diagpool, Cellpool_T cellpool, - Dynprog_T dynprogL, Dynprog_T dynprogM, Dynprog_T dynprogR, - bool keep_floors_p) { + if ((sensedir = Stage3end_sensedir(hit3)) == SENSE_FORWARD) { + sense_try = +1; + } else if (sensedir == SENSE_ANTI) { + sense_try = -1; + } else { + sense_try = 0; + } - if (mode == STANDARD || mode == CMET_STRANDED || mode == ATOI_STRANDED || mode == TTOC_STRANDED) { - return single_read(&(*npaths_primary),&(*npaths_altloc),&(*first_absmq),&(*second_absmq), - queryseq,indexdb_fwd,indexdb_rev,indexdb_size_threshold, - floors_array,user_maxlevel_float,user_mincoverage_float, - indel_penalty_middle,indel_penalty_end, - allow_end_indels_p,max_end_insertions,max_end_deletions,min_indel_end_matches, - localsplicing_penalty,distantsplicing_penalty,min_shortend, - oligoindices_minor,pairpool,diagpool,cellpool,dynprogL,dynprogM,dynprogR,keep_floors_p); - } else if (mode == CMET_NONSTRANDED || mode == ATOI_NONSTRANDED || mode == TTOC_NONSTRANDED) { - return single_read_tolerant_nonstranded(&(*npaths_primary),&(*npaths_altloc),&(*first_absmq),&(*second_absmq),queryseq, - indexdb_fwd,indexdb_rev,indexdb_size_threshold, - floors_array,user_maxlevel_float,user_mincoverage_float, - indel_penalty_middle,indel_penalty_end, - allow_end_indels_p,max_end_insertions,max_end_deletions,min_indel_end_matches, - localsplicing_penalty,distantsplicing_penalty,min_shortend, - oligoindices_minor, - pairpool,diagpool,cellpool,dynprogL,dynprogM,dynprogR,keep_floors_p); } else { - fprintf(stderr,"Do not recognize mode %d\n",mode); abort(); } -} - - -/* #define HITARRAY_SHORTENDSPLICING 4 */ -/* #define HITARRAY_DISTANTSPLICING 4 */ + debug13(printf("Halfmapping close: Running gmap with mappingstart %u and mappingend %u\n",mappingstart,mappingend)); + return run_gmap_for_region(&(*successp),&good_start_p,&good_end_p,gmap_history, + hits,Shortread_accession(queryseq5),queryuc_ptr,querylength,sense_try,favor_right_p, + mappingstart,mappingend, + /*knownsplice_limit_low*/mappingstart,/*knownsplice_limit_high*/mappingend, + watsonp,genestrand,chrnum,chroffset,chrhigh,chrlength, + oligoindices_major,oligoindices_minor, + pairpool,diagpool,cellpool,dynprogL,dynprogM,dynprogR); +} +/* Uses segments to find a region to search */ static List_T -align_halfmapping_with_gmap (History_T gmap_history, Stage3end_T hit5, Stage3end_T hit3, - Shortread_T queryseq5, Shortread_T queryseq3, - char *queryuc_ptr, int querylength, int query_lastpos, +align_halfmapping_with_gmap_far (List_T hits, History_T gmap_history, Stage3end_T hit5, Stage3end_T hit3, + Shortread_T queryseq5, Shortread_T queryseq3, + char *queryuc_ptr, int querylength, int query_lastpos, #ifdef END_KNOWNSPLICING_SHORTCUT - char *queryrc, bool invertedp, + char *queryrc, bool invertedp, #endif - struct Segment_T *plus_segments, int plus_nsegments, - struct Segment_T *minus_segments, int minus_nsegments, - Oligoindex_array_T oligoindices_major, Oligoindex_array_T oligoindices_minor, - Pairpool_T pairpool, Diagpool_T diagpool, Cellpool_T cellpool, - Dynprog_T dynprogL, Dynprog_T dynprogM, Dynprog_T dynprogR, - Chrpos_T pairmax, Chrpos_T shortsplicedist, int genestrand) { - List_T hits = NULL; + struct Segment_T *plus_segments, int plus_nsegments, + struct Segment_T *minus_segments, int minus_nsegments, + Oligoindex_array_T oligoindices_major, Oligoindex_array_T oligoindices_minor, + Pairpool_T pairpool, Diagpool_T diagpool, Cellpool_T cellpool, + Dynprog_T dynprogL, Dynprog_T dynprogM, Dynprog_T dynprogR, + Chrpos_T pairmax, Chrpos_T shortsplicedist, int genestrand) { int sensedir, sense_try; /* int zero_offset = 0; */ @@ -18539,10 +19966,12 @@ bool close_mappingstart_p = false, close_mappingend_p = false; bool middle_mappingstart_p = false, middle_mappingend_p = false; bool fallback_mappingstart_p, fallback_mappingend_p; - bool good_start_p, good_end_p, watsonp, favor_right_p; + bool successp, good_start_p, good_end_p, watsonp, favor_right_p; int starti, endi, i; + debug(printf("Trying halfmapping far\n")); + if (hit3 == NULL) { /* Both events are tested by Stage3end_anomalous_splice_p */ if ((chrnum = Stage3end_chrnum(hit5)) == 0) { @@ -18590,7 +20019,7 @@ Stage3end_sensedir(hit5),genomicbound - chroffset)); knownsplice_limit_low = mappingstart = segmentstart = genomicbound; - knownsplice_limit_high = add_bounded(Stage3end_genomicend(hit5),pairmax + overall_max_distance,chrhigh); + knownsplice_limit_high = add_bounded(Stage3end_genomicend(hit5),pairmax + overall_max_distance,chrhigh); segmentend = add_bounded(Stage3end_genomicend(hit5),pairmax + PAIRMAX_ADDITIONAL,chrhigh); #ifdef LONG_ENDSPLICES mappingend = add_bounded(Stage3end_genomicend(hit5),pairmax + overall_max_distance,chrhigh); @@ -19270,7 +20699,7 @@ if (close_mappingstart_p == true && close_mappingend_p == true) { debug13(printf("Halfmapping: Running gmap with close mappingstart and close mappingend\n")); - hits = run_gmap_for_region(&good_start_p,&good_end_p,gmap_history, + hits = run_gmap_for_region(&successp,&good_start_p,&good_end_p,gmap_history, hits,Shortread_accession(queryseq5),queryuc_ptr,querylength,sense_try,favor_right_p, close_mappingstart_last,close_mappingend_last, close_knownsplice_limit_low,close_knownsplice_limit_high, @@ -19285,7 +20714,7 @@ } else if (/* require both ends to be good */ 0 && good_start_p == true) { if (fallback_mappingend_p == true) { debug13(printf("Halfmapping: Re-running gmap with close mappingstart only\n")); - hits = run_gmap_for_region(&good_start_p,&good_end_p,gmap_history, + hits = run_gmap_for_region(&successp,&good_start_p,&good_end_p,gmap_history, hits,Shortread_accession(queryseq5),queryuc_ptr,querylength,sense_try,favor_right_p, close_mappingstart_last,mappingend, close_knownsplice_limit_low,knownsplice_limit_high, @@ -19297,7 +20726,7 @@ } else if (/* require both ends to be good */ 0 && good_end_p == true) { if (fallback_mappingstart_p == true) { debug13(printf("Halfmapping: Re-running gmap with close mappingend only\n")); - hits = run_gmap_for_region(&good_start_p,&good_end_p,gmap_history, + hits = run_gmap_for_region(&successp,&good_start_p,&good_end_p,gmap_history, hits,Shortread_accession(queryseq5),queryuc_ptr,querylength,sense_try,favor_right_p, mappingstart,close_mappingend_last, knownsplice_limit_low,close_knownsplice_limit_high, @@ -19308,7 +20737,7 @@ } else { if (fallback_mappingstart_p == true && fallback_mappingend_p == true) { debug13(printf("Halfmapping: Re-running gmap with far mappingstart and mappingend\n")); - hits = run_gmap_for_region(&good_start_p,&good_end_p,gmap_history, + hits = run_gmap_for_region(&successp,&good_start_p,&good_end_p,gmap_history, hits,Shortread_accession(queryseq5),queryuc_ptr,querylength,sense_try,favor_right_p, mappingstart,mappingend, knownsplice_limit_low,knownsplice_limit_high, @@ -19320,7 +20749,7 @@ } else if (close_mappingstart_p == true) { debug13(printf("Halfmapping: Running gmap with close mappingstart\n")); - hits = run_gmap_for_region(&good_start_p,&good_end_p,gmap_history, + hits = run_gmap_for_region(&successp,&good_start_p,&good_end_p,gmap_history, hits,Shortread_accession(queryseq5),queryuc_ptr,querylength,sense_try,favor_right_p, close_mappingstart_last,mappingend, close_knownsplice_limit_low,knownsplice_limit_high, @@ -19334,7 +20763,7 @@ debug13(printf("Skipping re-run of gmap\n")); } else if (fallback_mappingstart_p == true) { debug13(printf("Halfmapping: Re-running gmap with far mappingstart\n")); - hits = run_gmap_for_region(&good_start_p,&good_end_p,gmap_history, + hits = run_gmap_for_region(&successp,&good_start_p,&good_end_p,gmap_history, hits,Shortread_accession(queryseq5),queryuc_ptr,querylength,sense_try,favor_right_p, mappingstart,mappingend, knownsplice_limit_low,knownsplice_limit_high, @@ -19345,7 +20774,7 @@ } else if (close_mappingend_p == true) { debug13(printf("Halfmapping: Running gmap with close mappingend\n")); - hits = run_gmap_for_region(&good_start_p,&good_end_p,gmap_history, + hits = run_gmap_for_region(&successp,&good_start_p,&good_end_p,gmap_history, hits,Shortread_accession(queryseq5),queryuc_ptr,querylength,sense_try,favor_right_p, mappingstart,close_mappingend_last, knownsplice_limit_low,close_knownsplice_limit_high, @@ -19359,7 +20788,7 @@ debug13(printf("Skipping re-run of gmap\n")); } else if (fallback_mappingend_p == true) { debug13(printf("Halfmapping: Re-running gmap with far mappingend\n")); - hits = run_gmap_for_region(&good_start_p,&good_end_p,gmap_history, + hits = run_gmap_for_region(&successp,&good_start_p,&good_end_p,gmap_history, hits,Shortread_accession(queryseq5),queryuc_ptr,querylength,sense_try,favor_right_p, mappingstart,mappingend, knownsplice_limit_low,knownsplice_limit_high, @@ -19370,7 +20799,7 @@ } else { debug13(printf("Halfmapping: Running gmap with far mappingstart and mappingend\n")); - hits = run_gmap_for_region(&good_start_p,&good_end_p,gmap_history, + hits = run_gmap_for_region(&successp,&good_start_p,&good_end_p,gmap_history, hits,Shortread_accession(queryseq5),queryuc_ptr,querylength,sense_try,favor_right_p, mappingstart,mappingend, knownsplice_limit_low,knownsplice_limit_high, @@ -19384,6 +20813,43 @@ static List_T +align_halfmapping_with_gmap (History_T gmap_history, Stage3end_T hit5, Stage3end_T hit3, + Shortread_T queryseq5, Shortread_T queryseq3, + char *queryuc_ptr, int querylength, int query_lastpos, +#ifdef END_KNOWNSPLICING_SHORTCUT + char *queryrc, bool invertedp, +#endif + struct Segment_T *plus_segments, int plus_nsegments, + struct Segment_T *minus_segments, int minus_nsegments, + Oligoindex_array_T oligoindices_major, Oligoindex_array_T oligoindices_minor, + Pairpool_T pairpool, Diagpool_T diagpool, Cellpool_T cellpool, + Dynprog_T dynprogL, Dynprog_T dynprogM, Dynprog_T dynprogR, + Chrpos_T pairmax, Chrpos_T shortsplicedist, int genestrand) { + List_T hits = NULL; + bool successp; + + debug(printf("Trying halfmapping close\n")); + hits = align_halfmapping_with_gmap_close(&successp,hits,gmap_history,hit5,hit3, + queryseq5,queryseq3,queryuc_ptr,querylength,query_lastpos, + oligoindices_major,oligoindices_minor, + pairpool,diagpool,cellpool,dynprogL,dynprogM,dynprogR, + pairmax,shortsplicedist,genestrand); + if (successp == true) { + debug(printf("Trying halfmapping close succeeded\n")); + return hits; + } else { + debug(printf("Trying halfmapping close failed. Trying halfmapping far\n")); + return align_halfmapping_with_gmap_far(hits,gmap_history,hit5,hit3, + queryseq5,queryseq3,queryuc_ptr,querylength,query_lastpos, + plus_segments,plus_nsegments,minus_segments,minus_nsegments, + oligoindices_major,oligoindices_minor, + pairpool,diagpool,cellpool,dynprogL,dynprogM,dynprogR, + pairmax,shortsplicedist,genestrand); + } +} + + +static List_T align_pair_with_gmap (Pairtype_T *final_pairtype, List_T result, char *queryuc_ptr_5, char *queryuc_ptr_3, int querylength5, int querylength3, @@ -20086,6 +21552,7 @@ hitarray3,/*narray3*/HITARRAY_SUBS+1, *cutoff_level_5,*cutoff_level_3, querylength5,querylength3,maxpairedpaths,genestrand); + debug(printf("After pairing exact, found %d concordant, %d samechr, %d terminals, found_score %d\n", nconcordant,nsamechr,List_length(*terminals),*found_score)); if (*abort_pairing_p == true) { @@ -20135,6 +21602,7 @@ hitarray3,/*narray3*/HITARRAY_SUBS+1, *cutoff_level_5,*cutoff_level_3, querylength5,querylength3,maxpairedpaths,genestrand); + debug(printf("After pairing one mismatch, found %d concordant, %d samechr, %d terminals, found_score %d\n", nconcordant,nsamechr,List_length(*terminals),*found_score)); if (*abort_pairing_p == true) { @@ -20356,6 +21824,7 @@ hitarray3,/*narray3*/HITARRAY_INDELS+1, *cutoff_level_5,*cutoff_level_3, querylength5,querylength3,maxpairedpaths,genestrand); + debug(printf("After pairing complete set mismatches and indels, found %d concordant, %d nsamechr, %d terminals, found_score %d\n", nconcordant,nsamechr,List_length(*terminals),*found_score)); if (*abort_pairing_p == true) { @@ -20510,6 +21979,7 @@ hitarray3,/*narray3*/HITARRAY_SINGLESPLICING+1, *cutoff_level_5,*cutoff_level_3, querylength5,querylength3,maxpairedpaths,genestrand); + debug(printf("After pairing single splicing, found %d concordant, %d nsamechr, %d terminals, found_score %d\n", nconcordant,nsamechr,List_length(*terminals),*found_score)); if (*abort_pairing_p == true) { @@ -20762,6 +22232,7 @@ /*hitarray3*/&(*hits3),/*narray3*/1, *cutoff_level_5,*cutoff_level_3, querylength5,querylength3,maxpairedpaths,genestrand); + debug(printf("11> After pairing GMAP, found %d concordant, %d samechr, %d terminals, found_score %d\n", nconcordant,nsamechr,List_length(*terminals),*found_score)); if (*abort_pairing_p == false) { @@ -21025,6 +22496,7 @@ /*hitarray3*/&(*hits3),/*narray3*/1, *cutoff_level_5,*cutoff_level_3, querylength5,querylength3,maxpairedpaths,genestrand); + debug(printf("10> After pairing long single splicing, found %d concordant, %d samechr, %d terminals, found_score %d\n", nconcordant,nsamechr,List_length(*terminals),*found_score)); @@ -21063,6 +22535,7 @@ /*hitarray3*/&(*hits3),/*narray3*/1, *cutoff_level_5,*cutoff_level_3, querylength5,querylength3,maxpairedpaths,genestrand); + debug(printf("11> After pairing distant splicing, found %d concordant, %d samechr, %d terminals, found_score %d\n", nconcordant,nsamechr,List_length(*terminals),*found_score)); @@ -21102,6 +22575,7 @@ /*hitarray3*/&(*hits3),/*narray3*/1, *cutoff_level_5,*cutoff_level_3, querylength5,querylength3,maxpairedpaths,genestrand); + debug(printf("After pairing terminals, found %d concordant, %d nsamechr, %d terminals, found_score %d\n", nconcordant,nsamechr,List_length(*terminals),*found_score)); } @@ -21155,6 +22629,7 @@ best_score_paired)); for (p = *hits5; p != NULL && i < max_gmap_pairsearch; p = List_next(p)) { hit5 = (Stage3end_T) List_head(p); + debug13(printf("hit5 has score %d. Used in pair %d\n",Stage3end_score(hit5),Stage3end_paired_usedp(hit5))); if (Stage3end_hittype(hit5) == TRANSLOC_SPLICE) { debug13(printf("No GMAP on transloc splice\n")); } else if (Stage3end_paired_usedp(hit5) == false && Stage3end_score(hit5) <= best_score_paired) { @@ -21239,6 +22714,7 @@ best_score_paired)); for (p = *hits3; p != NULL && i < max_gmap_pairsearch; p = List_next(p)) { hit3 = (Stage3end_T) List_head(p); + debug13(printf("hit3 has score %d. Used in pair %d\n",Stage3end_score(hit3),Stage3end_paired_usedp(hit3))); if (Stage3end_hittype(hit3) == TRANSLOC_SPLICE) { debug13(printf("Not GMAP on transloc splice\n")); } else if (Stage3end_paired_usedp(hit3) == false && Stage3end_score(hit3) <= best_score_paired) { @@ -21428,6 +22904,7 @@ hitarray3,/*narray3*/HITARRAY_DOUBLESPLICING+1, *cutoff_level_5,*cutoff_level_3, querylength5,querylength3,maxpairedpaths,genestrand); + debug(printf("After pairing short-overlap splicing, found %d concordant, %d samechr, %d terminals, found_score %d\n", nconcordant,nsamechr,List_length(*terminals),*found_score)); if (*abort_pairing_p == false) { @@ -21648,37 +23125,35 @@ } } - if (hitpairs == NULL) { - for (p = terminals; p != NULL; p = p->rest) { - hitpair = (Stage3pair_T) p->first; - if ((nmatches = Stage3pair_nmatches_posttrim(&nmatches5,&nmatches3,hitpair)) > *best_nmatches_paired) { - final_pairtype = PAIRED_TERMINALS; - *best_nmatches_paired = nmatches; - *best_nmatches_5 = nmatches5; - *best_nmatches_3 = nmatches3; - } + for (p = terminals; p != NULL; p = p->rest) { + hitpair = (Stage3pair_T) p->first; + if ((nmatches = Stage3pair_nmatches_posttrim(&nmatches5,&nmatches3,hitpair)) > *best_nmatches_paired) { + final_pairtype = PAIRED_TERMINALS; + *best_nmatches_paired = nmatches; + *best_nmatches_5 = nmatches5; + *best_nmatches_3 = nmatches3; } + } - *best_nmatches_paired += 1; /* penalty for choosing translocation over others */ - - for (p = conc_transloc; p != NULL; p = p->rest) { - hitpair = (Stage3pair_T) p->first; - if ((nmatches = Stage3pair_nmatches_posttrim(&nmatches5,&nmatches3,hitpair)) > *best_nmatches_paired) { - final_pairtype = CONCORDANT_TRANSLOCATIONS; - *best_nmatches_paired = nmatches; - *best_nmatches_5 = nmatches5; - *best_nmatches_3 = nmatches3; - } + *best_nmatches_paired += 1; /* penalty for choosing translocation over others */ + + for (p = conc_transloc; p != NULL; p = p->rest) { + hitpair = (Stage3pair_T) p->first; + if ((nmatches = Stage3pair_nmatches_posttrim(&nmatches5,&nmatches3,hitpair)) > *best_nmatches_paired) { + final_pairtype = CONCORDANT_TRANSLOCATIONS; + *best_nmatches_paired = nmatches; + *best_nmatches_5 = nmatches5; + *best_nmatches_3 = nmatches3; } + } - for (p = samechr; p != NULL; p = p->rest) { - hitpair = (Stage3pair_T) p->first; - if ((nmatches = Stage3pair_nmatches_posttrim(&nmatches5,&nmatches3,hitpair)) > *best_nmatches_paired) { - final_pairtype = PAIRED_UNSPECIFIED; - *best_nmatches_paired = nmatches; - *best_nmatches_5 = nmatches5; - *best_nmatches_3 = nmatches3; - } + for (p = samechr; p != NULL; p = p->rest) { + hitpair = (Stage3pair_T) p->first; + if ((nmatches = Stage3pair_nmatches_posttrim(&nmatches5,&nmatches3,hitpair)) > *best_nmatches_paired) { + final_pairtype = PAIRED_UNSPECIFIED; + *best_nmatches_paired = nmatches; + *best_nmatches_5 = nmatches5; + *best_nmatches_3 = nmatches3; } } @@ -21856,9 +23331,10 @@ int best_nmatches_paired, best_nmatches_paired_5, best_nmatches_paired_3, best_nmatches_5, best_nmatches_3; - debug16(printf("Entered consolidate_paired_results. Passing pointer %p\n",&best_nmatches_paired)); *final_pairtype = choose_among_paired(&best_nmatches_paired,&best_nmatches_paired_5,&best_nmatches_paired_3, hitpairs,samechr,conc_transloc,terminals); + debug16(printf("Entered consolidate_paired_results with final_pairtype %d\n",*final_pairtype)); + if (*final_pairtype == CONCORDANT) { /* Have concordant results */ @@ -21891,6 +23367,13 @@ query5_compress_fwd,query5_compress_rev, query3_compress_fwd,query3_compress_rev, querylength5,querylength3,/*keep_gmap_p*/false,/*finalp*/true); + + /* result = Stage3pair_resolve_insides(result,queryuc_ptr_5,queryuc_ptr_3, + query5_compress_fwd,query5_compress_rev, + query3_compress_fwd,query3_compress_rev, + pairpool,dynprogL,dynprogM,dynprogR, + oligoindices_minor,diagpool,cellpool); */ + result = Stage3pair_resolve_multimapping(result); /* result = Stage3pair_sort_distance(result); */ debug16(printf("After removing overlaps, %d results\n",List_length(result))); @@ -21932,6 +23415,12 @@ query3_compress_fwd,query3_compress_rev, querylength5,querylength3,/*keep_gmap_p*/false,/*finalp*/true); + result = Stage3pair_resolve_insides(result,queryuc_ptr_5,queryuc_ptr_3, + query5_compress_fwd,query5_compress_rev, + query3_compress_fwd,query3_compress_rev, + pairpool,dynprogL,dynprogM,dynprogR, + oligoindices_minor,diagpool,cellpool); + /* TODO: Resolve terminals by doing full GMAP, and then redo optimal_score */ result = Stage3pair_resolve_multimapping(result); @@ -21996,16 +23485,24 @@ query5_compress_fwd,query5_compress_rev, query3_compress_fwd,query3_compress_rev, querylength5,querylength3,/*keep_gmap_p*/false,/*finalp*/true); - result = Stage3pair_resolve_multimapping(result); if (Stage3pair_concordantp(result) == true) { debug16(printf("Found remaining concordant solution, so removing non-concordant ones\n")); *final_pairtype = CONCORDANT; result = Stage3pair_filter_nonconcordant(result); + + result = Stage3pair_resolve_insides(result,queryuc_ptr_5,queryuc_ptr_3, + query5_compress_fwd,query5_compress_rev, + query3_compress_fwd,query3_compress_rev, + pairpool,dynprogL,dynprogM,dynprogR, + oligoindices_minor,diagpool,cellpool); + debug16(printf("Concordant results: %d\n",List_length(result))); } else { *final_pairtype = PAIRED_UNSPECIFIED; } + + result = Stage3pair_resolve_multimapping(result); } } else if (*final_pairtype == PAIRED_TERMINALS) { @@ -22068,6 +23565,13 @@ query5_compress_fwd,query5_compress_rev, query3_compress_fwd,query3_compress_rev, querylength5,querylength3,/*keep_gmap_p*/false,/*finalp*/true); + + result = Stage3pair_resolve_insides(result,queryuc_ptr_5,queryuc_ptr_3, + query5_compress_fwd,query5_compress_rev, + query3_compress_fwd,query3_compress_rev, + pairpool,dynprogL,dynprogM,dynprogR, + oligoindices_minor,diagpool,cellpool); + result = Stage3pair_resolve_multimapping(result); #if 0 @@ -22119,6 +23623,13 @@ query5_compress_fwd,query5_compress_rev, query3_compress_fwd,query3_compress_rev, querylength5,querylength3,/*keep_gmap_p*/false,/*finalp*/true); + + result = Stage3pair_resolve_insides(result,queryuc_ptr_5,queryuc_ptr_3, + query5_compress_fwd,query5_compress_rev, + query3_compress_fwd,query3_compress_rev, + pairpool,dynprogL,dynprogM,dynprogR, + oligoindices_minor,diagpool,cellpool); + result = Stage3pair_resolve_multimapping(result); debug16(printf("Finally, have %d concordant translocation results\n",List_length(result))); @@ -22206,6 +23717,13 @@ query5_compress_fwd,query5_compress_rev, query3_compress_fwd,query3_compress_rev, querylength5,querylength3,/*keep_gmap_p*/false,/*finalp*/true); + + /* result = Stage3pair_resolve_insides(result,queryuc_ptr_5,queryuc_ptr_3, + query5_compress_fwd,query5_compress_rev, + query3_compress_fwd,query3_compress_rev, + pairpool,dynprogL,dynprogM,dynprogR, + oligoindices_minor,diagpool,cellpool); */ + result = Stage3pair_resolve_multimapping(result); /* result = Stage3pair_sort_distance(result); */ debug16(printf("After removing overlaps, %d results\n",List_length(result))); @@ -22217,6 +23735,9 @@ query5_compress_fwd,query5_compress_rev, query3_compress_fwd,query3_compress_rev, querylength5,querylength3,/*keep_gmap_p*/false,/*finalp*/false); + + /* Do align_pair_with_gmap before trying resolve_insides */ + result = Stage3pair_resolve_multimapping(result); /* result = Stage3pair_sort_distance(result); */ debug16(printf("After removing overlaps, %d results\n",List_length(result))); @@ -22242,6 +23763,13 @@ query5_compress_fwd,query5_compress_rev, query3_compress_fwd,query3_compress_rev, querylength5,querylength3,/*keep_gmap_p*/false,/*finalp*/true); + + result = Stage3pair_resolve_insides(result,queryuc_ptr_5,queryuc_ptr_3, + query5_compress_fwd,query5_compress_rev, + query3_compress_fwd,query3_compress_rev, + pairpool,dynprogL,dynprogM,dynprogR, + oligoindices_minor,diagpool,cellpool); + result = Stage3pair_resolve_multimapping(result); } } @@ -22288,8 +23816,7 @@ if ((newpair = Stage3pair_new(hit5,hit3,/*genestrand*/0,pairtype, /*private5p*/false,/*private3p*/false, /*expect_concordant_p*/pairtype == CONCORDANT ? true : false)) != NULL) { - stage3pairarray = (Stage3pair_T *) CALLOC_OUT(1,sizeof(Stage3pair_T)); - stage3pairarray[0] = newpair; + result = List_push(NULL,(void *) newpair); *nhits5_primary = *nhits5_altloc = 0; *nhits3_primary = *nhits3_altloc = 0; @@ -22305,10 +23832,22 @@ if (pairtype == CONCORDANT) { debug16(printf("final pairtype is CONCORDANT\n")); *final_pairtype = CONCORDANT; + + result = Stage3pair_resolve_insides(result,queryuc_ptr_5,queryuc_ptr_3, + query5_compress_fwd,query5_compress_rev, + query3_compress_fwd,query3_compress_rev, + pairpool,dynprogL,dynprogM,dynprogR, + oligoindices_minor,diagpool,cellpool); + } else { debug16(printf("final pairtype is PAIRED_UNSPECIFIED\n")); *final_pairtype = PAIRED_UNSPECIFIED; } + + stage3pairarray = (Stage3pair_T *) CALLOC_OUT(1,sizeof(Stage3pair_T)); + stage3pairarray[0] = (Stage3pair_T) List_head(result); + List_free(&result); + Stage3pair_privatize(stage3pairarray,/*npairs*/1); Stage3pair_eval_and_sort(/*npaths*/(*npaths_primary) + (*npaths_altloc), &(*first_absmq),&(*second_absmq),stage3pairarray,maxpaths_search,queryseq5,queryseq3, @@ -22395,6 +23934,7 @@ return (Stage3pair_T *) NULL; } else { + debug16(printf("final pairtype is %d\n",*final_pairtype)); debug16(printf("Result is not NULL (%d paths), and we fall through to concordant, paired, or transloc pairs\n", List_length(result))); @@ -23010,7 +24550,7 @@ bool distances_observed_p_in, int subopt_levels_in, int min_indel_end_matches_in, int max_middle_insertions_in, int max_middle_deletions_in, Chrpos_T shortsplicedist_in, Chrpos_T shortsplicedist_known_in, Chrpos_T shortsplicedist_novelend_in, - Chrpos_T min_intronlength_in, + Chrpos_T min_intronlength_in, Chrpos_T expected_pairlength_in, Chrpos_T pairlength_deviation_in, int min_distantsplicing_end_matches_in, int min_distantsplicing_identity_in, @@ -23088,6 +24628,8 @@ } min_intronlength = min_intronlength_in; + expected_pairlength = expected_pairlength_in; + pairlength_deviation = pairlength_deviation_in; min_distantsplicing_end_matches = min_distantsplicing_end_matches_in; min_distantsplicing_identity = min_distantsplicing_identity_in; @@ -23099,7 +24641,6 @@ gmap_segments_p = false; gmap_pairsearch_p = false; - gmap_indel_knownsplice_p = false; gmap_improvement_p = false; fprintf(stderr,"GMAP modes:"); @@ -23112,15 +24653,6 @@ fprintf(stderr," pairsearch"); gmap_pairsearch_p = true; } - if ((gmap_mode & GMAP_INDEL_KNOWNSPLICE) != 0) { - if (gmapp == true) { - fprintf(stderr,","); - } else { - gmapp = true; - } - fprintf(stderr," indel_knownsplice"); - gmap_indel_knownsplice_p = true; - } if ((gmap_mode & GMAP_TERMINAL) != 0) { if (gmapp == true) { fprintf(stderr,","); diff -Nru gmap-2016-11-07/src/stage1hr.h gmap-2017-01-14/src/stage1hr.h --- gmap-2016-11-07/src/stage1hr.h 2016-08-16 20:21:03.000000000 +0000 +++ gmap-2017-01-14/src/stage1hr.h 2016-12-29 16:20:17.000000000 +0000 @@ -1,4 +1,4 @@ -/* $Id: stage1hr.h 196434 2016-08-16 20:21:03Z twu $ */ +/* $Id: stage1hr.h 202031 2016-12-29 16:20:14Z twu $ */ #ifndef STAGE1HR_INCLUDED #define STAGE1HR_INCLUDED @@ -34,8 +34,7 @@ #define GMAP_IMPROVEMENT 1 #define GMAP_TERMINAL 2 -#define GMAP_INDEL_KNOWNSPLICE 4 -#define GMAP_PAIRSEARCH 8 +#define GMAP_PAIRSEARCH 4 typedef struct Floors_T *Floors_T; @@ -99,7 +98,7 @@ bool distances_observed_p_in, int subopt_levels_in, int min_indel_end_matches_in, int max_middle_insertions_in, int max_middle_deletions_in, Chrpos_T shortsplicedist_in, Chrpos_T shortsplicedist_known_in, Chrpos_T shortsplicedist_novelend_in, - Chrpos_T min_intronlength_in, + Chrpos_T min_intronlength_in, Chrpos_T expected_pairlength_in, Chrpos_T pairlength_deviation_in, int min_distantsplicing_end_matches_in, int min_distantsplicing_identity_in, diff -Nru gmap-2016-11-07/src/stage2.c gmap-2017-01-14/src/stage2.c --- gmap-2016-11-07/src/stage2.c 2016-09-24 00:44:53.000000000 +0000 +++ gmap-2017-01-14/src/stage2.c 2016-12-30 14:34:47.000000000 +0000 @@ -1,4 +1,4 @@ -static char rcsid[] = "$Id: stage2.c 198275 2016-09-24 00:44:53Z twu $"; +static char rcsid[] = "$Id: stage2.c 202031 2016-12-29 16:20:14Z twu $"; #ifdef HAVE_CONFIG_H #include #endif @@ -102,8 +102,8 @@ #define SHIFT_EXTRA 15 -#define ONE 1.0 -#define TEN_THOUSAND 10000.0 +#define ONE 1 +#define TEN_THOUSAND 8192 /* Power of 2 */ #define HUNDRED_THOUSAND 100000.0 #define ONE_MILLION 1000000.0 @@ -345,13 +345,13 @@ int fwd_consecutive; int fwd_rootposition; /*int fwd_rootnlinks;*/ /* Number of links in last branch */ - int fwd_score; + /* int fwd_score; */ /* Kept as a separate structure */ int fwd_pos; int fwd_hit; + int fwd_tracei; /* Corresponds to a distinct set of branches */ #ifdef DEBUG9 - int fwd_tracei; /* Corresponds to a distinct set of branches */ int fwd_intronnfwd; int fwd_intronnrev; int fwd_intronnunk; @@ -469,11 +469,11 @@ /* For PMAP, indexsize is in aa */ static void -Linkmatrix_print_fwd (struct Link_T **links, Chrpos_T **mappings, int length1, - int *npositions, char *queryseq_ptr, int indexsize) { +print_fwd (struct Link_T **links, int **fwd_scores, + Chrpos_T **mappings, int length1, + int *npositions, char *queryseq_ptr, int indexsize) { int i, j, lastpos; char *oligo; - Intlist_T p, q; oligo = (char *) MALLOCA((indexsize+1) * sizeof(char)); lastpos = length1 - indexsize; @@ -483,7 +483,7 @@ printf("Querypos %d (%s, %d positions):",i,oligo,npositions[i]); for (j = 0; j < npositions[i]; j++) { printf(" %d.%u:%d(%d,%d)[%u]", - j,mappings[i][j],links[i][j].fwd_score, + j,mappings[i][j],fwd_scores[i][j], links[i][j].fwd_pos,links[i][j].fwd_hit,links[i][j].fwd_tracei); } printf("\n"); @@ -540,6 +540,7 @@ } +#if 0 static void best_path_dump_R (struct Link_T **links, Chrpos_T **mappings, int querypos, int hit, bool fwdp, char *varname) { @@ -614,6 +615,7 @@ return; } +#endif static void active_bounds_dump_R (Chrpos_T *minactive, Chrpos_T *maxactive, @@ -912,14 +914,141 @@ #endif -static void -score_querypos_lookback_one ( +#if 0 +/* SIMD version */ +_positions = _mm_set1_epi32(position - indexsize_nt); +_querydistance = _mm_set1_epi32(querydistance); +_splicing_querydist_penalty = _mm_set1_epi32(querydist_penalty+1+NINTRON_PENALTY_MISMATCH); +_max_scores = _mm_set1_epi32(-1000); + +prevhit = low_hit; +while (prevhit + 4 < high_hit) { + /* printf("fwd: prevposition %u, prevhit %d\n",prevposition,prevhit); */ + _prevpositions = _mm_loadu_epi32(&(mappings[prev_querypos][prevhit])); + _gendistance = _mm_sub_epi32(_positions,_prevpositions); + if (_mm_cmpgt_epi32(_gendistance,_zeroes) == 0) { + break; + } else { + _diffdistance = _mm_abs_epi32(_mm_sub_epi32(_gendistance,_querydistance)); + + _prev_scores = _mm_loadu_epi32(&(fwd_scores[prev_querypos][prevhit])); + + _scores_close = _mm_add_epi32(_prev_scores,_mm_set1_epi32(CONSEC_POINTS_PER_MATCH)); + /* Right shift of 13 bits gives division by 8192 */ + _scores_splice = _mm_add_epi32(_prev_scores,_mm_sub_epi32(_mm_srli_epi32(_diffdistance,13),_splicing_querydist_penalty)); + + _scores = _mm_blendv_ps(_scores_close,_scores_splice,_mm_cmpgt_epi32(_diffdistance,_mm_set1_epi32(EQUAL_DISTANCE_NOT_SPLICING))); + + _mm_storeu_epi32(_scores); + + _max_scores = _mm_max_epi32(_max_scores,_scores); + prevhit += 4; + } +} + +/* Take care of serial cases */ + + + + +/* Compute overall max and return. Caller can find prev_querypos with + largest max and store in fwd_pos[curr_querypos][currhit] and max in + fwd_max[curr_querypos][currhit]. During traceback, recompute at + prev_querypos and find prevhit that gives the max. */ + + if (diffdistance < maxintronlen) { + if (diffdistance <= EQUAL_DISTANCE_NOT_SPLICING) { + debug9(canonicalsgn = 9); + fwd_score = prevlink->fwd_score + CONSEC_POINTS_PER_MATCH; +#ifdef PMAP + if (diffdistance % 3 != 0) { + fwd_score -= NONCODON_INDEL_PENALTY; + } +#endif + } else if (near_end_p == false && prevlink->fwd_consecutive < EXON_DEFN) { + debug9(canonicalsgn = 0); + if (splicingp == true) { + fwd_score = prevlink->fwd_score - (diffdistance/TEN_THOUSAND + 1) - querydist_penalty - NINTRON_PENALTY_MISMATCH; + } else { + fwd_score = prevlink->fwd_score - (diffdistance/ONE + 1) - querydist_penalty - NINTRON_PENALTY_MISMATCH; + } + + } else if (splicingp == false) { + debug9(canonicalsgn = 0); + fwd_score = prevlink->fwd_score - (diffdistance/ONE + 1) - querydist_penalty; + + } else if (use_shifted_canonical_p == true) { + leftpos = prevposition + querydistance - 1; + /* printf("leftpos %d, last_leftpos %d, rightpos %d\n",leftpos,last_leftpos,rightpos); */ + if (leftpos == last_leftpos) { + canonicalp = last_canonicalp; + } else { + debug7(printf("Calling find_shift_canonical fwd\n")); + canonicalp = find_shifted_canonical(leftpos,rightpos,querydistance-indexsize_nt, + /* &lastGT,&lastAG, */ + Genome_prev_donor_position,Genome_prev_acceptor_position, + chroffset,chrhigh,plusp,skip_repetitive_p); + /* And need to check for shift_canonical_rev */ + + last_leftpos = leftpos; + last_canonicalp = canonicalp; + } + if (canonicalp == true) { + debug9(canonicalsgn = +1); + fwd_score = prevlink->fwd_score - (diffdistance/TEN_THOUSAND + 1) - querydist_penalty; + } else { + debug9(canonicalsgn = 0); + fwd_score = prevlink->fwd_score - (diffdistance/TEN_THOUSAND + 1) - querydist_penalty - NINTRON_PENALTY_MISMATCH; + } + + } else { + debug9(canonicalsgn = +1); + fwd_score = prevlink->fwd_score - (diffdistance/TEN_THOUSAND + 1) - querydist_penalty; + } + + debug9(printf("\tD. Fwd mismatch qpos %d,%d at %ux%d (score = %d -> %d, consec = %d, intr = %d-%d-%d, gendist %u, querydist %d, canonicalsgn %d)", + prev_querypos,prevhit,prevposition,active[prev_querypos][prevhit], + prevlink->fwd_score,fwd_score,prevlink->fwd_consecutive, + best_fwd_intronnfwd,best_fwd_intronnrev,best_fwd_intronnunk, + gendistance,querydistance,canonicalsgn)); + + /* Allow ties, which should favor shorter intron */ + if (fwd_score >= best_fwd_score) { + if (diffdistance <= EQUAL_DISTANCE_FOR_CONSECUTIVE) { + best_fwd_consecutive = prevlink->fwd_consecutive + (querydistance + indexsize_nt); + /* best_fwd_rootnlinks = prevlink->fwd_rootnlinks + 1; */ + } else { + best_fwd_consecutive = 0; + /* best_fwd_rootnlinks = 1; */ + } + best_fwd_score = fwd_score; + best_fwd_prevpos = prev_querypos; + best_fwd_prevhit = prevhit; #ifdef DEBUG9 - int *fwd_tracei, + best_fwd_tracei = ++*fwd_tracei; + best_fwd_intronnfwd = prevlink->fwd_intronnfwd; + best_fwd_intronnrev = prevlink->fwd_intronnrev; + best_fwd_intronnunk = prevlink->fwd_intronnunk; + switch (canonicalsgn) { + case 1: best_fwd_intronnfwd++; break; + case 0: best_fwd_intronnunk++; break; + } +#endif + debug9(printf(" => Best fwd at %d (consec = %d)\n",fwd_score,best_fwd_consecutive)); + } else { + debug9(printf(" => Loses to %d\n",best_fwd_score)); + } + } + + prevhit = active[prev_querypos][prevhit]; + } #endif - Link_T currlink, int querypos, + + +static void +score_querypos_lookback_one (int *fwd_tracei, Link_T currlink, int curr_querypos, int currhit, int querystart, int queryend, unsigned int position, - struct Link_T **links, Chrpos_T **mappings, + struct Link_T **links, int **fwd_scores, Chrpos_T **mappings, int **active, int *firstactive, Univcoord_T chroffset, Univcoord_T chrhigh, bool plusp, int indexsize, Intlist_T processed, @@ -935,8 +1064,8 @@ /* int best_fwd_rootnlinks = 1; */ int best_fwd_score = 0, fwd_score; int best_fwd_prevpos = -1, best_fwd_prevhit = -1; + int best_fwd_tracei, last_tracei; #ifdef DEBUG9 - int best_fwd_tracei; int best_fwd_intronnfwd = 0, best_fwd_intronnrev = 0, best_fwd_intronnunk = 0; int canonicalsgn = 0; #endif @@ -975,9 +1104,9 @@ prev_active = active[prev_querypos]; #ifdef PMAP - querydistance = (querypos - prev_querypos)*3; + querydistance = (curr_querypos - prev_querypos)*3; #else - querydistance = querypos - prev_querypos; + querydistance = curr_querypos - prev_querypos; #endif prevhit = firstactive[prev_querypos]; prevposition = position; /* Prevents prevposition + querydistance == position */ @@ -989,12 +1118,12 @@ best_fwd_consecutive = prevlink->fwd_consecutive + querydistance; best_fwd_rootposition = prevlink->fwd_rootposition; /* best_fwd_rootnlinks = prevlink->fwd_rootnlinks + 1; */ - best_fwd_score = prevlink->fwd_score + CONSEC_POINTS_PER_MATCH*querydistance; + best_fwd_score = fwd_scores[prev_querypos][prevhit] + CONSEC_POINTS_PER_MATCH*querydistance; best_fwd_prevpos = prev_querypos; best_fwd_prevhit = prevhit; -#ifdef DEBUG9 best_fwd_tracei = prevlink->fwd_tracei; +#ifdef DEBUG9 best_fwd_intronnfwd = prevlink->fwd_intronnfwd; best_fwd_intronnrev = prevlink->fwd_intronnrev; best_fwd_intronnunk = prevlink->fwd_intronnunk; @@ -1007,7 +1136,7 @@ debug9(printf("\tA. Adjacent qpos %d,%d at %ux%d (scores = %d -> %d, consec = %d (from #%d), intr = %d-%d-%d)\n", - prev_querypos,prevhit,prevposition,active[prev_querypos][prevhit],prevlink->fwd_score, + prev_querypos,prevhit,prevposition,active[prev_querypos][prevhit],fwd_scores[prev_querypos][prevhit], best_fwd_score,best_fwd_consecutive,best_fwd_tracei, best_fwd_intronnfwd,best_fwd_intronnrev,best_fwd_intronnunk)); } @@ -1015,13 +1144,13 @@ /* Check work list */ - if (anchoredp && querypos - indexsize_query <= querystart) { + if (anchoredp && curr_querypos - indexsize_query <= querystart) { /* Allow close prevpositions that overlap with anchor */ /* Can give rise to false positives, and increases amount of dynamic programming work */ - } else if (0 && anchoredp && querypos == queryend) { + } else if (0 && anchoredp && curr_querypos == queryend) { /* Test first position */ } else { - while (processed != NULL && (prev_querypos = Intlist_head(processed)) > querypos - indexsize_query) { + while (processed != NULL && (prev_querypos = Intlist_head(processed)) > curr_querypos - indexsize_query) { debug9(printf("Skipping prev_querypos %d, because too close\n",prev_querypos)); processed = Intlist_next(processed); } @@ -1030,14 +1159,15 @@ /* D. Evaluate for mismatches (all other previous querypos) */ donep = false; nseen = 0; + last_tracei = -1; for ( ; processed != NULL && best_fwd_consecutive < enough_consecutive && donep == false; processed = Intlist_next(processed), nseen++) { prev_querypos = Intlist_head(processed); #ifdef PMAP - querydistance = (querypos - prev_querypos)*3; + querydistance = (curr_querypos - prev_querypos)*3; #else - querydistance = querypos - prev_querypos; + querydistance = curr_querypos - prev_querypos; #endif if (nseen > nlookback && querydistance - indexsize_nt > lookback) { @@ -1053,6 +1183,15 @@ prev_links = links[prev_querypos]; prev_active = active[prev_querypos]; + /* Range 0 */ + while (prevhit != -1 && prev_links[prevhit].fwd_tracei == last_tracei) { + debug9(printf("Skipping querypos %d with tracei #%d\n",prev_querypos,prev_links[prevhit].fwd_tracei)); + prevhit = /*active[prev_querypos]*/prev_active[prevhit]; + } + if (prevhit != -1) { + last_tracei = prev_links[prevhit].fwd_tracei; + } + /* Range 1: From Infinity to maxintronlen */ if (splicingp == true) { /* This is equivalent to diffdistance >= maxintronlen, where @@ -1075,7 +1214,7 @@ assert(gendistance > querydistance); /* True because gendistance > EQUAL_DISTANCE_NOT_SPLICING + querydistance */ diffdistance = gendistance - querydistance; /* No need for abs() */ - fwd_score = prevlink->fwd_score + querydist_credit /*- querydist_penalty*/; + fwd_score = fwd_scores[prev_querypos][prevhit] + querydist_credit /*- querydist_penalty*/; if (splicingp == true) { fwd_score -= (diffdistance/TEN_THOUSAND + 1); } else { @@ -1151,7 +1290,7 @@ debug9(printf("\tD2. Fwd mismatch qpos %d,%d at %ux%d (score = %d -> %d, consec = %d (from #%d), intr = %d-%d-%d, gendist %u, querydist %d, canonicalsgn %d)", prev_querypos,prevhit,prevposition,active[prev_querypos][prevhit], - prevlink->fwd_score,fwd_score,prevlink->fwd_consecutive,prevlink->fwd_tracei, + fwd_scores[prev_querypos][prevhit],fwd_score,prevlink->fwd_consecutive,prevlink->fwd_tracei, best_fwd_intronnfwd,best_fwd_intronnrev,best_fwd_intronnunk, gendistance-indexsize_nt,querydistance-indexsize_nt,canonicalsgn)); @@ -1168,8 +1307,8 @@ best_fwd_score = fwd_score; best_fwd_prevpos = prev_querypos; best_fwd_prevhit = prevhit; -#ifdef DEBUG9 best_fwd_tracei = ++*fwd_tracei; +#ifdef DEBUG9 best_fwd_intronnfwd = prevlink->fwd_intronnfwd; best_fwd_intronnrev = prevlink->fwd_intronnrev; best_fwd_intronnunk = prevlink->fwd_intronnunk; @@ -1204,7 +1343,7 @@ #else /* diffdistance <= EQUAL_DISTANCE_NOT_SPLICING */ /* This is how version 2013-08-14 did it */ - fwd_score = prevlink->fwd_score + CONSEC_POINTS_PER_MATCH; + fwd_score = fwd_scores[prev_querypos][prevhit] + CONSEC_POINTS_PER_MATCH; #endif #if 0 @@ -1216,7 +1355,7 @@ debug9(printf("\tD4. Fwd mismatch qpos %d,%d at %ux%d (score = %d -> %d, consec = %d (from #%d), intr = %d-%d-%d, gendist %u, querydist %d, canonicalsgn %d)", prev_querypos,prevhit,prevposition,active[prev_querypos][prevhit], - prevlink->fwd_score,fwd_score,prevlink->fwd_consecutive,prevlink->fwd_tracei, + fwd_scores[prev_querypos][prevhit],fwd_score,prevlink->fwd_consecutive,prevlink->fwd_tracei, best_fwd_intronnfwd,best_fwd_intronnrev,best_fwd_intronnunk, gendistance-indexsize_nt,querydistance-indexsize_nt,canonicalsgn)); @@ -1233,9 +1372,9 @@ best_fwd_score = fwd_score; best_fwd_prevpos = prev_querypos; best_fwd_prevhit = prevhit; -#ifdef DEBUG9 /* best_fwd_tracei = ++*fwd_tracei; */ best_fwd_tracei = prevlink->fwd_tracei; /* Keep previous trace, as in range 3 */ +#ifdef DEBUG9 best_fwd_intronnfwd = prevlink->fwd_intronnfwd; best_fwd_intronnrev = prevlink->fwd_intronnrev; best_fwd_intronnunk = prevlink->fwd_intronnunk; @@ -1264,17 +1403,17 @@ currlink->fwd_pos = best_fwd_prevpos; currlink->fwd_hit = best_fwd_prevhit; if (currlink->fwd_pos >= 0) { - debug9(currlink->fwd_tracei = best_fwd_tracei); - currlink->fwd_score = best_fwd_score; + currlink->fwd_tracei = best_fwd_tracei; + fwd_scores[curr_querypos][currhit] = best_fwd_score; } else if (anchoredp == true) { - debug9(currlink->fwd_tracei = -1); - currlink->fwd_score = -100000; + currlink->fwd_tracei = -1; + fwd_scores[curr_querypos][currhit] = -100000; } else if (localp == true) { - debug9(currlink->fwd_tracei = ++*fwd_tracei); - currlink->fwd_score = indexsize_nt; + currlink->fwd_tracei = ++*fwd_tracei; + fwd_scores[curr_querypos][currhit] = indexsize_nt; } else { - debug9(currlink->fwd_tracei = ++*fwd_tracei); - currlink->fwd_score = best_fwd_score; + currlink->fwd_tracei = ++*fwd_tracei; + fwd_scores[curr_querypos][currhit] = best_fwd_score; } #ifdef DEBUG9 @@ -1284,7 +1423,7 @@ #endif debug9(printf("\tChose %d,%d with score %d (fwd) => trace #%d\n", - currlink->fwd_pos,currlink->fwd_hit,currlink->fwd_score,currlink->fwd_tracei)); + currlink->fwd_pos,currlink->fwd_hit,fwd_scores[curr_querypos][currhit],currlink->fwd_tracei)); debug3(printf("%d %d %d %d 1\n",querypos,hit,best_prevpos,best_prevhit)); return; @@ -1294,14 +1433,10 @@ static void -score_querypos_lookback_mult ( -#ifdef DEBUG9 - int *fwd_tracei, -#endif - int low_hit, int high_hit, - int querypos, int querystart, int queryend, unsigned int *positions, - struct Link_T **links, Chrpos_T **mappings, - int **active, int *firstactive, +score_querypos_lookback_mult (int *fwd_tracei, int low_hit, int high_hit, int curr_querypos, + int querystart, int queryend, unsigned int *positions, + struct Link_T **links, int **fwd_scores, + Chrpos_T **mappings, int **active, int *firstactive, Univcoord_T chroffset, Univcoord_T chrhigh, bool plusp, int indexsize, Intlist_T processed, bool anchoredp, bool localp, bool splicingp, @@ -1318,8 +1453,8 @@ int best_fwd_rootposition; int best_fwd_score, fwd_score; int best_fwd_prevpos, best_fwd_prevhit; + int best_fwd_tracei, last_tracei; #ifdef DEBUG9 - int best_fwd_tracei; int best_fwd_intronnfwd, best_fwd_intronnrev, best_fwd_intronnunk; int canonicalsgn = 0; #endif @@ -1344,14 +1479,14 @@ /* Determine work load */ /* printf("Work load (lookback): %s\n",Intlist_to_string(processed)); */ last_item = processed; - if (anchoredp && querypos - indexsize_query <= querystart) { + if (anchoredp && curr_querypos - indexsize_query <= querystart) { /* Allow close prevpositions that overlap with anchor */ /* Can give rise to false positives, and increases amount of dynamic programming work */ /* debug9(printf("No skipping because close to anchor\n")); */ - } else if (0 && anchoredp && querypos == queryend) { + } else if (0 && anchoredp && curr_querypos == queryend) { /* Test first position */ } else { - while (processed != NULL && (/*prev_querypos =*/ Intlist_head(processed)) > querypos - indexsize_query) { + while (processed != NULL && (/*prev_querypos =*/ Intlist_head(processed)) > curr_querypos - indexsize_query) { debug9(printf("Skipping prev_querypos %d, because too close\n",Intlist_head(processed))); processed = Intlist_next(processed); } @@ -1359,7 +1494,7 @@ if (last_item == NULL) { for (hiti = 0; hiti < nhits; hiti++) { - currlink = &(links[querypos][hiti + low_hit]); + currlink = &(links[curr_querypos][hiti + low_hit]); currlink->fwd_consecutive = /*best_fwd_consecutive =*/ indexsize*NT_PER_MATCH; currlink->fwd_rootposition = /*best_fwd_rootposition =*/ positions[hiti]; @@ -1367,13 +1502,13 @@ currlink->fwd_hit = /*best_fwd_prevhit =*/ -1; if (anchoredp == true) { - debug9(currlink->fwd_tracei = -1); - currlink->fwd_score = -100000; + currlink->fwd_tracei = -1; + fwd_scores[curr_querypos][hiti + low_hit] = -100000; } else if (localp == true) { - debug9(currlink->fwd_tracei = ++*fwd_tracei); - currlink->fwd_score = indexsize_nt; + currlink->fwd_tracei = ++*fwd_tracei; + fwd_scores[curr_querypos][hiti + low_hit] = indexsize_nt; } else { - currlink->fwd_score = /*best_fwd_score =*/ 0; + fwd_scores[curr_querypos][hiti + low_hit] = /*best_fwd_score =*/ 0; } } @@ -1386,20 +1521,20 @@ adj_active = active[adj_querypos]; #ifdef PMAP - adj_querydistance = (querypos - adj_querypos)*3; + adj_querydistance = (curr_querypos - adj_querypos)*3; #else - adj_querydistance = querypos - adj_querypos; + adj_querydistance = curr_querypos - adj_querypos; #endif /* Process prevhit and hiti in parallel. Values are asscending along prevhit chain and from 0 to nhits-1. */ prevhit = firstactive[adj_querypos]; hiti = 0; while (prevhit != -1 && hiti < nhits) { - if ((prevposition = /*mappings[adj_querypos]*/adj_mappings[prevhit]) + adj_querydistance < (position = positions[hiti])) { + if ((prevposition = /*mappings[adj_querypos]*/adj_mappings[prevhit]) + adj_querydistance < (position = positions[hiti])) { prevhit = /*active[adj_querypos]*/adj_active[prevhit]; } else if (prevposition + adj_querydistance > position) { - currlink = &(links[querypos][hiti + low_hit]); + currlink = &(links[curr_querypos][hiti + low_hit]); currlink->fwd_consecutive = /*best_fwd_consecutive =*/ indexsize*NT_PER_MATCH; currlink->fwd_rootposition = /*best_fwd_rootposition =*/ positions[hiti]; @@ -1407,32 +1542,32 @@ currlink->fwd_hit = /*best_fwd_prevhit =*/ -1; if (anchoredp == true) { - debug9(currlink->fwd_tracei = -1); - currlink->fwd_score = -100000; + currlink->fwd_tracei = -1; + fwd_scores[curr_querypos][hiti + low_hit] = -100000; } else if (localp == true) { - debug9(currlink->fwd_tracei = ++*fwd_tracei); - currlink->fwd_score = indexsize_nt; + currlink->fwd_tracei = ++*fwd_tracei; + fwd_scores[curr_querypos][hiti + low_hit] = indexsize_nt; } else { - currlink->fwd_score = /*best_fwd_score =*/ 0; + fwd_scores[curr_querypos][hiti + low_hit] = /*best_fwd_score =*/ 0; } hiti++; } else { /* Adjacent position found for hiti */ - currlink = &(links[querypos][hiti + low_hit]); + currlink = &(links[curr_querypos][hiti + low_hit]); prevlink = &(/*links[adj_querypos]*/adj_links[prevhit]); currlink->fwd_consecutive = /*best_fwd_consecutive =*/ prevlink->fwd_consecutive + adj_querydistance; currlink->fwd_rootposition = /*best_fwd_rootposition =*/ prevlink->fwd_rootposition; currlink->fwd_pos = /*best_fwd_prevpos =*/ adj_querypos; currlink->fwd_hit = /*best_fwd_prevhit =*/ prevhit; - currlink->fwd_score = /*best_fwd_score =*/ prevlink->fwd_score + CONSEC_POINTS_PER_MATCH*adj_querydistance; + fwd_scores[curr_querypos][hiti + low_hit] = /*best_fwd_score =*/ fwd_scores[adj_querypos][prevhit] + CONSEC_POINTS_PER_MATCH*adj_querydistance; #ifdef DEBUG9 printf("\tA. For hit %d, adjacent qpos %d,%d at %ux%d (scores = %d -> %d, consec = %d (from #%d), intr = %d-%d-%d)\n", - hiti,adj_querypos,prevhit,prevposition,active[adj_querypos][prevhit],prevlink->fwd_score, - currlink->fwd_score,currlink->fwd_consecutive,/*best_fwd_tracei*/prevlink->fwd_tracei, + hiti,adj_querypos,prevhit,prevposition,active[adj_querypos][prevhit],fwd_scores[adj_querypos][prevhit], + fwd_scores[curr_querypos][hiti + low_hit],currlink->fwd_consecutive,/*best_fwd_tracei*/prevlink->fwd_tracei, /*best_fwd_intronnfwd*/prevlink->fwd_intronnfwd, /*best_fwd_intronnrev*/prevlink->fwd_intronnrev, /*best_fwd_intronnunk*/prevlink->fwd_intronnunk); @@ -1444,7 +1579,7 @@ } while (hiti < nhits) { - currlink = &(links[querypos][hiti + low_hit]); + currlink = &(links[curr_querypos][hiti + low_hit]); currlink->fwd_consecutive = /*best_fwd_consecutive =*/ indexsize*NT_PER_MATCH; currlink->fwd_rootposition = /*best_fwd_rootposition =*/ positions[hiti]; @@ -1452,13 +1587,13 @@ currlink->fwd_hit = /*best_fwd_prevhit =*/ -1; if (anchoredp == true) { - debug9(currlink->fwd_tracei = -1); - currlink->fwd_score = -100000; + currlink->fwd_tracei = -1; + fwd_scores[curr_querypos][hiti + low_hit] = -100000; } else if (localp == true) { - debug9(currlink->fwd_tracei = ++*fwd_tracei); - currlink->fwd_score = indexsize_nt; + currlink->fwd_tracei = ++*fwd_tracei; + fwd_scores[curr_querypos][hiti + low_hit] = indexsize_nt; } else { - currlink->fwd_score = /*best_fwd_score =*/ 0; + fwd_scores[curr_querypos][hiti + low_hit] = /*best_fwd_score =*/ 0; } hiti++; @@ -1471,9 +1606,9 @@ adj_active = active[adj_querypos]; #ifdef PMAP - adj_querydistance = (querypos - adj_querypos)*3; + adj_querydistance = (curr_querypos - adj_querypos)*3; #else - adj_querydistance = querypos - adj_querypos; + adj_querydistance = curr_querypos - adj_querypos; #endif adj_frontier = firstactive[adj_querypos]; @@ -1484,7 +1619,7 @@ for (p = processed; p != NULL; p = Intlist_next(p)) { prev_querypos = Intlist_head(p); - querydistance = querypos - prev_querypos; + querydistance = curr_querypos - prev_querypos; if (nseen <= /*nlookback*/1 || querydistance - indexsize_nt <= /*lookback*/sufflookback/2) { max_adjacent_nseen = nseen; } @@ -1514,17 +1649,17 @@ best_fwd_rootposition = prevlink->fwd_rootposition; best_fwd_prevpos = adj_querypos; best_fwd_prevhit = prevhit; - best_fwd_score = prevlink->fwd_score + CONSEC_POINTS_PER_MATCH*adj_querydistance; + best_fwd_score = fwd_scores[adj_querypos][prevhit] + CONSEC_POINTS_PER_MATCH*adj_querydistance; max_nseen = max_adjacent_nseen; /* Look not so far back */ + best_fwd_tracei = prevlink->fwd_tracei; #ifdef DEBUG9 - best_fwd_tracei = prevlink->fwd_tracei; best_fwd_intronnfwd = prevlink->fwd_intronnfwd; best_fwd_intronnrev = prevlink->fwd_intronnrev; best_fwd_intronnunk = prevlink->fwd_intronnunk; #endif debug9(printf("\tA. For hit %d, adjacent qpos %d,%d at %ux%d (scores = %d -> %d, consec = %d (from #%d), intr = %d-%d-%d)\n", - hiti,adj_querypos,prevhit,prevposition,active[adj_querypos][prevhit],prevlink->fwd_score, + hiti,adj_querypos,prevhit,prevposition,active[adj_querypos][prevhit],fwd_scores[adj_querypos][prevhit], best_fwd_score,best_fwd_consecutive,/*best_fwd_tracei*/prevlink->fwd_tracei, best_fwd_intronnfwd,best_fwd_intronnrev,best_fwd_intronnunk)); @@ -1536,9 +1671,9 @@ best_fwd_prevhit = -1; best_fwd_score = 0; max_nseen = max_nonadjacent_nseen; /* Look farther back */ + best_fwd_tracei = -1; #ifdef DEBUG9 - best_fwd_tracei = -1; best_fwd_intronnfwd = 0; best_fwd_intronnrev = 0; best_fwd_intronnunk = 0; @@ -1548,6 +1683,7 @@ /* D. Evaluate for mismatches (all other previous querypos) */ nseen = 0; + last_tracei = -1; for (p = processed; p != NULL && best_fwd_consecutive < enough_consecutive && nseen <= max_nseen; p = Intlist_next(p), nseen++) { @@ -1555,9 +1691,9 @@ if ((prevhit = frontier[nseen]) != -1) { /* Retrieve starting point from last hiti */ prev_querypos = Intlist_head(p); #ifdef PMAP - querydistance = (querypos - prev_querypos)*3; + querydistance = (curr_querypos - prev_querypos)*3; #else - querydistance = querypos - prev_querypos; + querydistance = curr_querypos - prev_querypos; #endif /* Actually a querydist_penalty */ querydist_credit = -querydistance/indexsize_nt; @@ -1566,6 +1702,15 @@ prev_links = links[prev_querypos]; prev_active = active[prev_querypos]; + /* Range 0 */ + while (prevhit != -1 && prev_links[prevhit].fwd_tracei == last_tracei) { + debug9(printf("Skipping querypos %d with tracei #%d\n",prev_querypos,prev_links[prevhit].fwd_tracei)); + prevhit = /*active[prev_querypos]*/prev_active[prevhit]; + } + if (prevhit != -1) { + last_tracei = prev_links[prevhit].fwd_tracei; + } + /* Range 1: From Infinity to maxintronlen. To be skipped. This is equivalent to diffdistance >= maxintronlen, where diffdistance = abs(gendistance - querydistance) and @@ -1576,7 +1721,6 @@ } frontier[nseen] = prevhit; /* Store as starting point for next hiti */ - /* Range 2: From maxintronlen to (prev_querypos + EQUAL_DISTANCE_NOT_SPLICING) */ /* This is equivalent to +diffdistance > EQUAL_DISTANCE_NOT_SPLICING */ while (prevhit != -1 && (prevposition = /*mappings[prev_querypos]*/prev_mappings[prevhit]) + EQUAL_DISTANCE_NOT_SPLICING + querydistance < position) { @@ -1586,7 +1730,7 @@ assert(gendistance > querydistance); /* True because gendistance > EQUAL_DISTANCE_NOT_SPLICING + querydistance */ diffdistance = gendistance - querydistance; /* No need for abs() */ - fwd_score = prevlink->fwd_score + querydist_credit /*- querydist_penalty*/; + fwd_score = fwd_scores[prev_querypos][prevhit] + querydist_credit /*- querydist_penalty*/; if (splicingp == true) { fwd_score -= (diffdistance/TEN_THOUSAND + 1); } else { @@ -1656,7 +1800,7 @@ debug9(printf("\tD2, hit %d. Fwd mismatch qpos %d,%d at %ux%d (score = %d -> %d, consec = %d (from #%d), intr = %d-%d-%d, gendist %u, querydist %d, canonicalsgn %d)", hiti,prev_querypos,prevhit,prevposition,active[prev_querypos][prevhit], - prevlink->fwd_score,fwd_score,prevlink->fwd_consecutive,prevlink->fwd_tracei, + fwd_scores[prev_querypos][prevhit],fwd_score,prevlink->fwd_consecutive,prevlink->fwd_tracei, best_fwd_intronnfwd,best_fwd_intronnrev,best_fwd_intronnunk, gendistance-indexsize_nt,querydistance-indexsize_nt,canonicalsgn)); @@ -1671,8 +1815,8 @@ best_fwd_score = fwd_score; best_fwd_prevpos = prev_querypos; best_fwd_prevhit = prevhit; -#ifdef DEBUG9 best_fwd_tracei = ++*fwd_tracei; +#ifdef DEBUG9 best_fwd_intronnfwd = prevlink->fwd_intronnfwd; best_fwd_intronnrev = prevlink->fwd_intronnrev; best_fwd_intronnunk = prevlink->fwd_intronnunk; @@ -1685,7 +1829,6 @@ } else { debug9(printf(" => Loses to %d\n",best_fwd_score)); } - prevhit = /*active[prev_querypos]*/prev_active[prevhit]; } @@ -1708,12 +1851,12 @@ #else /* diffdistance <= EQUAL_DISTANCE_NOT_SPLICING */ /* This is how version 2013-08-14 did it */ - fwd_score = prevlink->fwd_score + CONSEC_POINTS_PER_MATCH; + fwd_score = fwd_scores[prev_querypos][prevhit] + CONSEC_POINTS_PER_MATCH; #endif debug9(printf("\tD4, hit %d. Fwd mismatch qpos %d,%d at %ux%d (score = %d -> %d, consec = %d (from #%d), intr = %d-%d-%d, gendist %u, querydist %d, canonicalsgn %d)", hiti,prev_querypos,prevhit,prevposition,active[prev_querypos][prevhit], - prevlink->fwd_score,fwd_score,prevlink->fwd_consecutive,prevlink->fwd_tracei, + fwd_scores[prev_querypos][prevhit],fwd_score,prevlink->fwd_consecutive,prevlink->fwd_tracei, best_fwd_intronnfwd,best_fwd_intronnrev,best_fwd_intronnunk, gendistance-indexsize_nt,querydistance-indexsize_nt,canonicalsgn)); @@ -1728,9 +1871,9 @@ best_fwd_score = fwd_score; best_fwd_prevpos = prev_querypos; best_fwd_prevhit = prevhit; -#ifdef DEBUG9 /* best_fwd_tracei = ++*fwd_tracei; */ best_fwd_tracei = prevlink->fwd_tracei; /* Keep previous trace, as in range 3 */ +#ifdef DEBUG9 best_fwd_intronnfwd = prevlink->fwd_intronnfwd; best_fwd_intronnrev = prevlink->fwd_intronnrev; best_fwd_intronnunk = prevlink->fwd_intronnunk; @@ -1753,23 +1896,23 @@ small local extension from beating a good canonical intron. If querypos is too small, don't insert an intron. */ /* linksconsecutive already assigned above */ - currlink = &(links[querypos][hiti + low_hit]); + currlink = &(links[curr_querypos][hiti + low_hit]); currlink->fwd_consecutive = best_fwd_consecutive; currlink->fwd_rootposition = best_fwd_rootposition; currlink->fwd_pos = best_fwd_prevpos; currlink->fwd_hit = best_fwd_prevhit; if (currlink->fwd_pos >= 0) { - debug9(currlink->fwd_tracei = best_fwd_tracei); - currlink->fwd_score = best_fwd_score; + currlink->fwd_tracei = best_fwd_tracei; + fwd_scores[curr_querypos][hiti + low_hit] = best_fwd_score; } else if (anchoredp == true) { - debug9(currlink->fwd_tracei = -1); - currlink->fwd_score = -100000; + currlink->fwd_tracei = -1; + fwd_scores[curr_querypos][hiti + low_hit] = -100000; } else if (localp == true) { - debug9(currlink->fwd_tracei = ++*fwd_tracei); - currlink->fwd_score = indexsize_nt; + currlink->fwd_tracei = ++*fwd_tracei; + fwd_scores[curr_querypos][hiti + low_hit] = indexsize_nt; } else { - debug9(currlink->fwd_tracei = ++*fwd_tracei); - currlink->fwd_score = best_fwd_score; + currlink->fwd_tracei = ++*fwd_tracei; + fwd_scores[curr_querypos][hiti + low_hit] = best_fwd_score; } #ifdef DEBUG9 @@ -1779,7 +1922,7 @@ #endif debug9(printf("\tChose %d,%d with score %d (fwd) => trace #%d\n", - currlink->fwd_pos,currlink->fwd_hit,currlink->fwd_score,currlink->fwd_tracei)); + currlink->fwd_pos,currlink->fwd_hit,fwd_scores[curr_querypos][hiti + low_hit],currlink->fwd_tracei)); debug3(printf("%d %d %d %d 1\n",querypos,hit,best_prevpos,best_prevhit)); } @@ -1791,14 +1934,10 @@ static void -score_querypos_lookforward_one ( -#ifdef DEBUG9 - int *fwd_tracei, -#endif - Link_T currlink, int querypos, +score_querypos_lookforward_one (int *fwd_tracei, Link_T currlink, int curr_querypos, int currhit, int querystart, int queryend, unsigned int position, - struct Link_T **links, Chrpos_T **mappings, - int **active, int *firstactive, + struct Link_T **links, int **fwd_scores, + Chrpos_T **mappings, int **active, int *firstactive, Univcoord_T chroffset, Univcoord_T chrhigh, bool plusp, int indexsize, Intlist_T processed, bool anchoredp, bool localp, bool splicingp, @@ -1812,8 +1951,8 @@ int best_fwd_rootposition = position; int best_fwd_score = 0, fwd_score; int best_fwd_prevpos = -1, best_fwd_prevhit = -1; + int best_fwd_tracei, last_tracei; #ifdef DEBUG9 - int best_fwd_tracei; int best_fwd_intronnfwd = 0, best_fwd_intronnrev = 0, best_fwd_intronnunk = 0; int canonicalsgn = 0; #endif @@ -1852,9 +1991,9 @@ prev_active = active[prev_querypos]; #ifdef PMAP - querydistance = (prev_querypos - querypos)*3; + querydistance = (prev_querypos - curr_querypos)*3; #else - querydistance = prev_querypos - querypos; + querydistance = prev_querypos - curr_querypos; #endif prevhit = firstactive[prev_querypos]; prevposition = position; /* Prevents prevposition == position + querydistance */ @@ -1866,12 +2005,12 @@ best_fwd_consecutive = prevlink->fwd_consecutive + querydistance; /* best_fwd_rootnlinks = prevlink->fwd_rootnlinks + 1; */ best_fwd_rootposition = prevlink->fwd_rootposition; - best_fwd_score = prevlink->fwd_score + CONSEC_POINTS_PER_MATCH*querydistance; + best_fwd_score = fwd_scores[prev_querypos][prevhit] + CONSEC_POINTS_PER_MATCH*querydistance; best_fwd_prevpos = prev_querypos; best_fwd_prevhit = prevhit; -#ifdef DEBUG9 best_fwd_tracei = prevlink->fwd_tracei; +#ifdef DEBUG9 best_fwd_intronnfwd = prevlink->fwd_intronnfwd; best_fwd_intronnrev = prevlink->fwd_intronnrev; best_fwd_intronnunk = prevlink->fwd_intronnunk; @@ -1882,21 +2021,21 @@ lookback = sufflookback/2; debug9(printf("\tA. Adjacent qpos %d,%d at %ux%d (scores = %d -> %d, consec = %d (from #%d), intr = %d-%d-%d)\n", - prev_querypos,prevhit,prevposition,active[prev_querypos][prevhit],prevlink->fwd_score, + prev_querypos,prevhit,prevposition,active[prev_querypos][prevhit],fwd_scores[prev_querypos][prevhit], best_fwd_score,best_fwd_consecutive,best_fwd_tracei, best_fwd_intronnfwd,best_fwd_intronnrev,best_fwd_intronnunk)); } } /* Check work list */ - if (anchoredp && querypos + indexsize_query >= queryend) { + if (anchoredp && curr_querypos + indexsize_query >= queryend) { /* Allow close prevpositions that overlap with anchor */ /* Can give rise to false positives, and increases amount of dynamic programming work */ debug9(printf("No skipping because close to anchor\n")); - } else if (0 && anchoredp && querypos == querystart) { + } else if (0 && anchoredp && curr_querypos == querystart) { /* Test end position */ } else { - while (processed != NULL && (prev_querypos = Intlist_head(processed)) < querypos + indexsize_query) { + while (processed != NULL && (prev_querypos = Intlist_head(processed)) < curr_querypos + indexsize_query) { debug9(printf("Skipping prev_querypos %d, because too close\n",prev_querypos)); processed = Intlist_next(processed); } @@ -1905,14 +2044,15 @@ /* D. Evaluate for mismatches (all other previous querypos) */ donep = false; nseen = 0; + last_tracei = -1; for ( ; processed != NULL && best_fwd_consecutive < enough_consecutive && donep == false; processed = Intlist_next(processed), nseen++) { prev_querypos = Intlist_head(processed); #ifdef PMAP - querydistance = (prev_querypos - querypos)*3; + querydistance = (prev_querypos - curr_querypos)*3; #else - querydistance = prev_querypos - querypos; + querydistance = prev_querypos - curr_querypos; #endif if (nseen > nlookback && querydistance - indexsize_nt > lookback) { @@ -1928,6 +2068,15 @@ prev_links = links[prev_querypos]; prev_active = active[prev_querypos]; + /* Range 0 */ + while (prevhit != -1 && prev_links[prevhit].fwd_tracei == last_tracei) { + debug9(printf("Skipping querypos %d with tracei #%d\n",prev_querypos,prev_links[prevhit].fwd_tracei)); + prevhit = /*active[prev_querypos]*/prev_active[prevhit]; + } + if (prevhit != -1) { + last_tracei = prev_links[prevhit].fwd_tracei; + } + /* Range 1: From Infinity to maxintronlen */ if (splicingp == true) { /* This is equivalent to diffdistance >= maxintronlen, where @@ -1950,7 +2099,7 @@ assert(gendistance > querydistance); /* True because gendistance > EQUAL_DISTANCE_NOT_SPLICING + querydistance */ diffdistance = gendistance - querydistance; /* No need for abs() */ - fwd_score = prevlink->fwd_score + querydist_credit /*- querydist_penalty*/; + fwd_score = fwd_scores[prev_querypos][prevhit] + querydist_credit /*- querydist_penalty*/; if (splicingp == true) { fwd_score -= (diffdistance/TEN_THOUSAND + 1); } else { @@ -2021,7 +2170,7 @@ debug9(printf("\tD2. Fwd mismatch qpos %d,%d at %ux%d (score = %d -> %d, consec = %d (from #%d), intr = %d-%d-%d, gendist %u, querydist %d, canonicalsgn %d)", prev_querypos,prevhit,prevposition,active[prev_querypos][prevhit], - prevlink->fwd_score,fwd_score,prevlink->fwd_consecutive,prevlink->fwd_tracei, + fwd_scores[prev_querypos][prevhit],fwd_score,prevlink->fwd_consecutive,prevlink->fwd_tracei, best_fwd_intronnfwd,best_fwd_intronnrev,best_fwd_intronnunk, gendistance-indexsize_nt,querydistance-indexsize_nt,canonicalsgn)); @@ -2038,8 +2187,8 @@ best_fwd_score = fwd_score; best_fwd_prevpos = prev_querypos; best_fwd_prevhit = prevhit; -#ifdef DEBUG9 best_fwd_tracei = ++*fwd_tracei; +#ifdef DEBUG9 best_fwd_intronnfwd = prevlink->fwd_intronnfwd; best_fwd_intronnrev = prevlink->fwd_intronnrev; best_fwd_intronnunk = prevlink->fwd_intronnunk; @@ -2074,7 +2223,7 @@ #else /* diffdistance <= EQUAL_DISTANCE_NOT_SPLICING */ /* This is how version 2013-08-14 did it */ - fwd_score = prevlink->fwd_score + CONSEC_POINTS_PER_MATCH; + fwd_score = fwd_scores[prev_querypos][prevhit] + CONSEC_POINTS_PER_MATCH; #endif #if 0 if (/*near_end_p == false &&*/ prevlink->fwd_consecutive < EXON_DEFN) { @@ -2084,7 +2233,7 @@ debug9(printf("\tD4. Fwd mismatch qpos %d,%d at %ux%d (score = %d -> %d, consec = %d (from #%d), intr = %d-%d-%d, gendist %u, querydist %d, canonicalsgn %d)", prev_querypos,prevhit,prevposition,active[prev_querypos][prevhit], - prevlink->fwd_score,fwd_score,prevlink->fwd_consecutive,prevlink->fwd_tracei, + fwd_scores[prev_querypos][prevhit],fwd_score,prevlink->fwd_consecutive,prevlink->fwd_tracei, best_fwd_intronnfwd,best_fwd_intronnrev,best_fwd_intronnunk, gendistance-indexsize_nt,querydistance-indexsize_nt,canonicalsgn)); @@ -2101,9 +2250,9 @@ best_fwd_score = fwd_score; best_fwd_prevpos = prev_querypos; best_fwd_prevhit = prevhit; -#ifdef DEBUG9 /* best_fwd_tracei = ++*fwd_tracei; */ best_fwd_tracei = prevlink->fwd_tracei; /* Keep previous trace, as in range 3 */ +#ifdef DEBUG9 best_fwd_intronnfwd = prevlink->fwd_intronnfwd; best_fwd_intronnrev = prevlink->fwd_intronnrev; best_fwd_intronnunk = prevlink->fwd_intronnunk; @@ -2132,17 +2281,17 @@ currlink->fwd_pos = best_fwd_prevpos; currlink->fwd_hit = best_fwd_prevhit; if (currlink->fwd_pos >= 0) { - debug9(currlink->fwd_tracei = best_fwd_tracei); - currlink->fwd_score = best_fwd_score; + currlink->fwd_tracei = best_fwd_tracei; + fwd_scores[curr_querypos][currhit] = best_fwd_score; } else if (anchoredp == true) { - debug9(currlink->fwd_tracei = -1); - currlink->fwd_score = -100000; + currlink->fwd_tracei = -1; + fwd_scores[curr_querypos][currhit] = -100000; } else if (localp == true) { - debug9(currlink->fwd_tracei = ++*fwd_tracei); - currlink->fwd_score = indexsize_nt; + currlink->fwd_tracei = ++*fwd_tracei; + fwd_scores[curr_querypos][currhit] = indexsize_nt; } else { - debug9(currlink->fwd_tracei = ++*fwd_tracei); - currlink->fwd_score = best_fwd_score; + currlink->fwd_tracei = ++*fwd_tracei; + fwd_scores[curr_querypos][currhit] = best_fwd_score; } #ifdef DEBUG9 @@ -2152,7 +2301,7 @@ #endif debug9(printf("\tChose %d,%d with score %d (fwd) => trace #%d\n", - currlink->fwd_pos,currlink->fwd_hit,currlink->fwd_score,currlink->fwd_tracei)); + currlink->fwd_pos,currlink->fwd_hit,fwd_scores[curr_querypos][currhit],currlink->fwd_tracei)); debug3(printf("%d %d %d %d 1\n",querypos,hit,best_prevpos,best_prevhit)); return; @@ -2160,14 +2309,10 @@ static void -score_querypos_lookforward_mult ( -#ifdef DEBUG9 - int *fwd_tracei, -#endif - int low_hit, int high_hit, - int querypos, int querystart, int queryend, unsigned int *positions, - struct Link_T **links, Chrpos_T **mappings, - int **active, int *firstactive, +score_querypos_lookforward_mult (int *fwd_tracei, int low_hit, int high_hit, int curr_querypos, + int querystart, int queryend, unsigned int *positions, + struct Link_T **links, int **fwd_scores, + Chrpos_T **mappings, int **active, int *firstactive, Univcoord_T chroffset, Univcoord_T chrhigh, bool plusp, int indexsize, Intlist_T processed, bool anchoredp, bool localp, bool splicingp, @@ -2184,8 +2329,8 @@ int best_fwd_rootposition; int best_fwd_score, fwd_score; int best_fwd_prevpos, best_fwd_prevhit; + int best_fwd_tracei, last_tracei; #ifdef DEBUG9 - int best_fwd_tracei; int best_fwd_intronnfwd, best_fwd_intronnrev, best_fwd_intronnunk; int canonicalsgn = 0; #endif @@ -2210,14 +2355,14 @@ /* Determine work load */ /* printf("Work load (lookforward): %s\n",Intlist_to_string(processed)); */ last_item = processed; - if (anchoredp && querypos + indexsize_query >= queryend) { + if (anchoredp && curr_querypos + indexsize_query >= queryend) { /* Allow close prevpositions that overlap with anchor */ /* Can give rise to false positives, and increases amount of dynamic programming work */ /* debug9(printf("No skipping because close to anchor\n")); */ - } else if (0 && anchoredp && querypos == querystart) { + } else if (0 && anchoredp && curr_querypos == querystart) { /* Test end position */ } else { - while (processed != NULL && (prev_querypos = Intlist_head(processed)) < querypos + indexsize_query) { + while (processed != NULL && (prev_querypos = Intlist_head(processed)) < curr_querypos + indexsize_query) { debug9(printf("Skipping prev_querypos %d, because too close\n",prev_querypos)); processed = Intlist_next(processed); } @@ -2225,7 +2370,7 @@ if (last_item == NULL) { for (hiti = nhits - 1; hiti >= 0; hiti--) { - currlink = &(links[querypos][hiti + low_hit]); + currlink = &(links[curr_querypos][hiti + low_hit]); currlink->fwd_consecutive = /*best_fwd_consecutive =*/ indexsize*NT_PER_MATCH; currlink->fwd_rootposition = /*best_fwd_rootposition =*/ positions[hiti]; @@ -2233,13 +2378,13 @@ currlink->fwd_hit = /*best_fwd_prevhit =*/ -1; if (anchoredp == true) { - debug9(currlink->fwd_tracei = -1); - currlink->fwd_score = -100000; + currlink->fwd_tracei = -1; + fwd_scores[curr_querypos][hiti + low_hit] = -100000; } else if (localp == true) { - debug9(currlink->fwd_tracei = ++*fwd_tracei); - currlink->fwd_score = indexsize_nt; + currlink->fwd_tracei = ++*fwd_tracei; + fwd_scores[curr_querypos][hiti + low_hit] = indexsize_nt; } else { - currlink->fwd_score = /*best_fwd_score =*/ 0; + fwd_scores[curr_querypos][hiti + low_hit] = /*best_fwd_score =*/ 0; } } @@ -2251,9 +2396,9 @@ adj_active = active[adj_querypos]; #ifdef PMAP - adj_querydistance = (adj_querypos - querypos)*3; + adj_querydistance = (adj_querypos - curr_querypos)*3; #else - adj_querydistance = adj_querypos - querypos; + adj_querydistance = adj_querypos - curr_querypos; #endif /* Process prevhit and hiti in parallel. Values are descending along prevhit chain and from nhits-1 to 0. */ @@ -2265,7 +2410,7 @@ } else if (prevposition < position + adj_querydistance) { /* Adjacent position not found for hiti */ - currlink = &(links[querypos][hiti + low_hit]); + currlink = &(links[curr_querypos][hiti + low_hit]); currlink->fwd_consecutive = /*best_fwd_consecutive =*/ indexsize*NT_PER_MATCH; currlink->fwd_rootposition = /*best_fwd_rootposition =*/ positions[hiti]; @@ -2273,32 +2418,32 @@ currlink->fwd_hit = /*best_fwd_prevhit =*/ -1; if (anchoredp == true) { - debug9(currlink->fwd_tracei = -1); - currlink->fwd_score = -100000; + currlink->fwd_tracei = -1; + fwd_scores[curr_querypos][hiti + low_hit] = -100000; } else if (localp == true) { - debug9(currlink->fwd_tracei = ++*fwd_tracei); - currlink->fwd_score = indexsize_nt; + currlink->fwd_tracei = ++*fwd_tracei; + fwd_scores[curr_querypos][hiti + low_hit] = indexsize_nt; } else { - currlink->fwd_score = /*best_fwd_score =*/ 0; + fwd_scores[curr_querypos][hiti + low_hit] = /*best_fwd_score =*/ 0; } hiti--; } else { /* Adjacent position found for hiti */ - currlink = &(links[querypos][hiti + low_hit]); + currlink = &(links[curr_querypos][hiti + low_hit]); prevlink = &(/*links[adj_querypos]*/adj_links[prevhit]); currlink->fwd_consecutive = /*best_fwd_consecutive =*/ prevlink->fwd_consecutive + adj_querydistance; currlink->fwd_rootposition = /*best_fwd_rootposition =*/ prevlink->fwd_rootposition; currlink->fwd_pos = /*best_fwd_prevpos =*/ adj_querypos; currlink->fwd_hit = /*best_fwd_prevhit =*/ prevhit; - currlink->fwd_score = /*best_fwd_score =*/ prevlink->fwd_score + CONSEC_POINTS_PER_MATCH*adj_querydistance; + fwd_scores[curr_querypos][hiti + low_hit] = /*best_fwd_score =*/ fwd_scores[adj_querypos][prevhit] + CONSEC_POINTS_PER_MATCH*adj_querydistance; #ifdef DEBUG9 printf("\tA. For hit %d, adjacent qpos %d,%d at %ux%d (scores = %d -> %d, consec = %d (from #%d), intr = %d-%d-%d)\n", - hiti,adj_querypos,prevhit,prevposition,active[adj_querypos][prevhit],prevlink->fwd_score, - currlink->fwd_score,currlink->fwd_consecutive,/*best_fwd_tracei*/prevlink->fwd_tracei, + hiti,adj_querypos,prevhit,prevposition,active[adj_querypos][prevhit],fwd_scores[adj_querypos][prevhit], + fwd_scores[curr_querypos][hiti + low_hit],currlink->fwd_consecutive,/*best_fwd_tracei*/prevlink->fwd_tracei, /*best_fwd_intronnfwd*/prevlink->fwd_intronnfwd, /*best_fwd_intronnrev*/prevlink->fwd_intronnrev, /*best_fwd_intronnunk*/prevlink->fwd_intronnunk); @@ -2311,7 +2456,7 @@ while (hiti >= 0) { /* Adjacent position not found for hiti */ - currlink = &(links[querypos][hiti + low_hit]); + currlink = &(links[curr_querypos][hiti + low_hit]); currlink->fwd_consecutive = /*best_fwd_consecutive =*/ indexsize*NT_PER_MATCH; currlink->fwd_rootposition = /*best_fwd_rootposition =*/ positions[hiti]; @@ -2319,13 +2464,13 @@ currlink->fwd_hit = /*best_fwd_prevhit =*/ -1; if (anchoredp == true) { - debug9(currlink->fwd_tracei = -1); - currlink->fwd_score = -100000; + currlink->fwd_tracei = -1; + fwd_scores[curr_querypos][hiti + low_hit] = -100000; } else if (localp == true) { - debug9(currlink->fwd_tracei = ++*fwd_tracei); - currlink->fwd_score = indexsize_nt; + currlink->fwd_tracei = ++*fwd_tracei; + fwd_scores[curr_querypos][hiti + low_hit] = indexsize_nt; } else { - currlink->fwd_score = /*best_fwd_score =*/ 0; + fwd_scores[curr_querypos][hiti + low_hit] = /*best_fwd_score =*/ 0; } hiti--; @@ -2338,9 +2483,9 @@ adj_active = active[adj_querypos]; #ifdef PMAP - adj_querydistance = (adj_querypos - querypos)*3; + adj_querydistance = (adj_querypos - curr_querypos)*3; #else - adj_querydistance = adj_querypos - querypos; + adj_querydistance = adj_querypos - curr_querypos; #endif adj_frontier = firstactive[adj_querypos]; @@ -2351,7 +2496,7 @@ for (p = processed; p != NULL; p = Intlist_next(p)) { prev_querypos = Intlist_head(p); - querydistance = prev_querypos - querypos; + querydistance = prev_querypos - curr_querypos; if (nseen <= /*nlookback*/1 || querydistance - indexsize_nt <= /*lookback*/sufflookback/2) { max_adjacent_nseen = nseen; } @@ -2381,17 +2526,17 @@ best_fwd_rootposition = prevlink->fwd_rootposition; best_fwd_prevpos = adj_querypos; best_fwd_prevhit = prevhit; - best_fwd_score = prevlink->fwd_score + CONSEC_POINTS_PER_MATCH*adj_querydistance; + best_fwd_score = fwd_scores[adj_querypos][prevhit] + CONSEC_POINTS_PER_MATCH*adj_querydistance; max_nseen = max_adjacent_nseen; /* Look not so far back */ + best_fwd_tracei = prevlink->fwd_tracei; #ifdef DEBUG9 - best_fwd_tracei = prevlink->fwd_tracei; best_fwd_intronnfwd = prevlink->fwd_intronnfwd; best_fwd_intronnrev = prevlink->fwd_intronnrev; best_fwd_intronnunk = prevlink->fwd_intronnunk; #endif debug9(printf("\tA. For hit %d, adjacent qpos %d,%d at %ux%d (scores = %d -> %d, consec = %d (from #%d), intr = %d-%d-%d)\n", - hiti,adj_querypos,prevhit,prevposition,active[adj_querypos][prevhit],prevlink->fwd_score, + hiti,adj_querypos,prevhit,prevposition,active[adj_querypos][prevhit],fwd_scores[adj_querypos][prevhit], best_fwd_score,best_fwd_consecutive,/*best_fwd_tracei*/prevlink->fwd_tracei, best_fwd_intronnfwd,best_fwd_intronnrev,best_fwd_intronnunk)); } else { @@ -2402,9 +2547,9 @@ best_fwd_prevhit = -1; best_fwd_score = 0; max_nseen = max_nonadjacent_nseen; /* Look farther back */ + best_fwd_tracei = -1; #ifdef DEBUG9 - best_fwd_tracei = -1; best_fwd_intronnfwd = 0; best_fwd_intronnrev = 0; best_fwd_intronnunk = 0; @@ -2414,15 +2559,16 @@ /* D. Evaluate for mismatches (all other previous querypos) */ nseen = 0; + last_tracei = -1; for (p = processed; p != NULL && best_fwd_consecutive < enough_consecutive && nseen <= max_nseen; p = Intlist_next(p), nseen++) { /* Making this check helps with efficiency */ if ((prevhit = frontier[nseen]) != -1) { /* Retrieve starting point from last hiti */ prev_querypos = Intlist_head(p); #ifdef PMAP - querydistance = (prev_querypos - querypos)*3; + querydistance = (prev_querypos - curr_querypos)*3; #else - querydistance = prev_querypos - querypos; + querydistance = prev_querypos - curr_querypos; #endif /* Actually a querydist_penalty */ querydist_credit = -querydistance/indexsize_nt; @@ -2431,6 +2577,15 @@ prev_links = links[prev_querypos]; prev_active = active[prev_querypos]; + /* Range 0 */ + while (prevhit != -1 && prev_links[prevhit].fwd_tracei == last_tracei) { + debug9(printf("Skipping querypos %d with tracei #%d\n",prev_querypos,prev_links[prevhit].fwd_tracei)); + prevhit = /*active[prev_querypos]*/prev_active[prevhit]; + } + if (prevhit != -1) { + last_tracei = prev_links[prevhit].fwd_tracei; + } + /* Range 1: From Infinity to maxintronlen. To be skipped. This is equivalent to diffdistance >= maxintronlen, where diffdistance = abs(gendistance - querydistance) and @@ -2451,7 +2606,7 @@ assert(gendistance > querydistance); /* True because gendistance > EQUAL_DISTANCE_NOT_SPLICING + querydistance */ diffdistance = gendistance - querydistance; /* No need for abs() */ - fwd_score = prevlink->fwd_score + querydist_credit /*- querydist_penalty*/; + fwd_score = fwd_scores[prev_querypos][prevhit] + querydist_credit /*- querydist_penalty*/; if (splicingp == true) { fwd_score -= (diffdistance/TEN_THOUSAND + 1); } else { @@ -2521,7 +2676,7 @@ debug9(printf("\tD2, hit %d. Fwd mismatch qpos %d,%d at %ux%d (score = %d -> %d, consec = %d (from #%d), intr = %d-%d-%d, gendist %u, querydist %d, canonicalsgn %d)", hiti,prev_querypos,prevhit,prevposition,active[prev_querypos][prevhit], - prevlink->fwd_score,fwd_score,prevlink->fwd_consecutive,prevlink->fwd_tracei, + fwd_scores[prev_querypos][prevhit],fwd_score,prevlink->fwd_consecutive,prevlink->fwd_tracei, best_fwd_intronnfwd,best_fwd_intronnrev,best_fwd_intronnunk, gendistance-indexsize_nt,querydistance-indexsize_nt,canonicalsgn)); @@ -2536,8 +2691,8 @@ best_fwd_score = fwd_score; best_fwd_prevpos = prev_querypos; best_fwd_prevhit = prevhit; -#ifdef DEBUG9 best_fwd_tracei = ++*fwd_tracei; +#ifdef DEBUG9 best_fwd_intronnfwd = prevlink->fwd_intronnfwd; best_fwd_intronnrev = prevlink->fwd_intronnrev; best_fwd_intronnunk = prevlink->fwd_intronnunk; @@ -2569,16 +2724,16 @@ diffdistance = gendistance > querydistance ? (gendistance - querydistance) : (querydistance - gendistance); #ifdef BAD_GMAX - fwd_score = prevlink->fwd_score + querydist_credit - (diffdistance/ONE + 1) /*- querydist_penalty*/; + fwd_score = fwd_scores[prev_querypos][prevhit] + querydist_credit - (diffdistance/ONE + 1) /*- querydist_penalty*/; #else /* diffdistance <= EQUAL_DISTANCE_NOT_SPLICING */ /* This is how version 2013-08-14 did it */ - fwd_score = prevlink->fwd_score + CONSEC_POINTS_PER_MATCH; + fwd_score = fwd_scores[prev_querypos][prevhit] + CONSEC_POINTS_PER_MATCH; #endif debug9(printf("\tD4, hit %d. Fwd mismatch qpos %d,%d at %ux%d (score = %d -> %d, consec = %d (from #%d), intr = %d-%d-%d, gendist %u, querydist %d, canonicalsgn %d)", hiti,prev_querypos,prevhit,prevposition,active[prev_querypos][prevhit], - prevlink->fwd_score,fwd_score,prevlink->fwd_consecutive,prevlink->fwd_tracei, + fwd_scores[prev_querypos][prevhit],fwd_score,prevlink->fwd_consecutive,prevlink->fwd_tracei, best_fwd_intronnfwd,best_fwd_intronnrev,best_fwd_intronnunk, gendistance-indexsize_nt,querydistance-indexsize_nt,canonicalsgn)); @@ -2593,9 +2748,10 @@ best_fwd_score = fwd_score; best_fwd_prevpos = prev_querypos; best_fwd_prevhit = prevhit; -#ifdef DEBUG9 /* best_fwd_tracei = ++*fwd_tracei; */ best_fwd_tracei = prevlink->fwd_tracei; /* Keep previous trace, as in range 3 */ + +#ifdef DEBUG9 best_fwd_intronnfwd = prevlink->fwd_intronnfwd; best_fwd_intronnrev = prevlink->fwd_intronnrev; best_fwd_intronnunk = prevlink->fwd_intronnunk; @@ -2618,23 +2774,23 @@ small local extension from beating a good canonical intron. If querypos is too small, don't insert an intron. */ /* linksconsecutive already assigned above */ - currlink = &(links[querypos][hiti + low_hit]); + currlink = &(links[curr_querypos][hiti + low_hit]); currlink->fwd_consecutive = best_fwd_consecutive; currlink->fwd_rootposition = best_fwd_rootposition; currlink->fwd_pos = best_fwd_prevpos; currlink->fwd_hit = best_fwd_prevhit; if (currlink->fwd_pos >= 0) { - debug9(currlink->fwd_tracei = best_fwd_tracei); - currlink->fwd_score = best_fwd_score; + currlink->fwd_tracei = best_fwd_tracei; + fwd_scores[curr_querypos][hiti + low_hit] = best_fwd_score; } else if (anchoredp == true) { - debug9(currlink->fwd_tracei = -1); - currlink->fwd_score = -100000; + currlink->fwd_tracei = -1; + fwd_scores[curr_querypos][hiti + low_hit] = -100000; } else if (localp == true) { - debug9(currlink->fwd_tracei = ++*fwd_tracei); - currlink->fwd_score = indexsize_nt; + currlink->fwd_tracei = ++*fwd_tracei; + fwd_scores[curr_querypos][hiti + low_hit] = indexsize_nt; } else { - debug9(currlink->fwd_tracei = ++*fwd_tracei); - currlink->fwd_score = best_fwd_score; + currlink->fwd_tracei = ++*fwd_tracei; + fwd_scores[curr_querypos][hiti + low_hit] = best_fwd_score; } #ifdef DEBUG9 @@ -2644,7 +2800,7 @@ #endif debug9(printf("\tChose %d,%d with score %d (fwd) => trace #%d\n", - currlink->fwd_pos,currlink->fwd_hit,currlink->fwd_score,currlink->fwd_tracei)); + currlink->fwd_pos,currlink->fwd_hit,fwd_scores[curr_querypos][hiti + low_hit],currlink->fwd_tracei)); debug3(printf("%d %d %d %d 1\n",querypos,hit,best_prevpos,best_prevhit)); } @@ -2657,7 +2813,7 @@ static void revise_active_lookback (int **active, int *firstactive, int *nactive, - int low_hit, int high_hit, struct Link_T **links, int querypos) { + int low_hit, int high_hit, int **fwd_scores, int querypos) { int best_score, threshold, score; int hit, *ptr; @@ -2668,24 +2824,24 @@ nactive[querypos] = 0; } else { - debug6(printf("At hit %d, fwd_score is %d",hit,links[querypos][hit].fwd_score)); - best_score = links[querypos][hit].fwd_score; + debug6(printf("At hit %d, fwd_score is %d",hit,fwd_scores[querypos][hit])); + best_score = fwd_scores[querypos][hit]; #ifdef SEPARATE_FWD_REV - debug6(printf(" and rev_score is %d",links[querypos][hit].rev_score)); - if ((score = links[querypos][hit].rev_score) > best_score) { + debug6(printf(" and rev_score is %d",rev_scores[querypos][hit])); + if ((score = rev_scores[querypos][hit]) > best_score) { best_score = score; } #endif debug6(printf("\n")); for (hit++; hit < high_hit; hit++) { - debug6(printf("At hit %d, fwd_score is %d",hit,links[querypos][hit].fwd_score)); - if ((score = links[querypos][hit].fwd_score) > best_score) { + debug6(printf("At hit %d, fwd_score is %d",hit,fwd_scores[querypos][hit])); + if ((score = fwd_scores[querypos][hit]) > best_score) { best_score = score; } #ifdef SEPARATE_FWD_REV - debug6(printf(" and rev_score is %d",links[querypos][hit].rev_score)); - if ((score = links[querypos][hit].rev_score) > best_score) { + debug6(printf(" and rev_score is %d",rev_scores[querypos][hit])); + if ((score = rev_scores[querypos][hit]) > best_score) { best_score = score; } #endif @@ -2702,9 +2858,9 @@ ptr = &(firstactive[querypos]); hit = low_hit; while (hit < high_hit) { - while (hit < high_hit && links[querypos][hit].fwd_score <= threshold + while (hit < high_hit && fwd_scores[querypos][hit] <= threshold #ifdef SEPARATE_FWD_REV - && links[querypos][hit].rev_score <= threshold + && rev_scores[querypos][hit] <= threshold #endif ) { hit++; @@ -2735,7 +2891,7 @@ static void revise_active_lookforward (int **active, int *firstactive, int *nactive, - int low_hit, int high_hit, struct Link_T **links, int querypos) { + int low_hit, int high_hit, int **fwd_scores, int querypos) { int best_score, threshold, score; int hit, *ptr; @@ -2745,24 +2901,24 @@ firstactive[querypos] = -1; nactive[querypos] = 0; } else { - debug6(printf("At hit %d, fwd_score is %d",hit,links[querypos][hit].fwd_score)); - best_score = links[querypos][hit].fwd_score; + debug6(printf("At hit %d, fwd_score is %d",hit,fwd_scores[querypos][hit])); + best_score = fwd_scores[querypos][hit]; #ifdef SEPARATE_FWD_REV - debug6(printf(" and rev_score is %d",links[querypos][hit].rev_score)); - if ((score = links[querypos][hit].rev_score) > best_score) { + debug6(printf(" and rev_score is %d",rev_scores[querypos][hit])); + if ((score = rev_scores[querypos][hit]) > best_score) { best_score = score; } #endif debug6(printf("\n")); for (--hit; hit >= low_hit; --hit) { - debug6(printf("At hit %d, fwd_score is %d",hit,links[querypos][hit].fwd_score)); - if ((score = links[querypos][hit].fwd_score) > best_score) { + debug6(printf("At hit %d, fwd_score is %d",hit,fwd_scores[querypos][hit])); + if ((score = fwd_scores[querypos][hit]) > best_score) { best_score = score; } #ifdef SEPARATE_FWD_REV - debug6(printf(" and rev_score is %d",links[querypos][hit].rev_score)); - if ((score = links[querypos][hit].rev_score) > best_score) { + debug6(printf(" and rev_score is %d",rev_scores[querypos][hit])); + if ((score = rev_scores[querypos][hit]) > best_score) { best_score = score; } #endif @@ -2779,9 +2935,9 @@ ptr = &(firstactive[querypos]); hit = high_hit - 1; while (hit >= low_hit) { - while (hit >= low_hit && links[querypos][hit].fwd_score <= threshold + while (hit >= low_hit && fwd_scores[querypos][hit] <= threshold #ifdef SEPARATE_FWD_REV - && links[querypos][hit].rev_score <= threshold + && rev_scores[querypos][hit] <= threshold #endif ) { --hit; @@ -3088,8 +3244,9 @@ #else static Cell_T * -Linkmatrix_get_cells_fwd (int *nunique, struct Link_T **links, int querystart, int queryend, int *npositions, - bool favor_right_p, Cellpool_T cellpool) { +get_cells_fwd (int *nunique, struct Link_T **links, int **fwd_scores, + int querystart, int queryend, int *npositions, + bool favor_right_p, Cellpool_T cellpool) { Cell_T *sorted, *cells; List_T celllist = NULL; int querypos, hit; @@ -3100,11 +3257,11 @@ ncells = 0; for (querypos = querystart; querypos <= queryend; querypos++) { for (hit = 0; hit < npositions[querypos]; hit++) { - if (links[querypos][hit].fwd_score > 0) { + if (fwd_scores[querypos][hit] > 0) { rootposition = links[querypos][hit].fwd_rootposition; /* tracei = links[querypos][hit].fwd_tracei; */ celllist = Cellpool_push(celllist,cellpool,rootposition,querypos,hit,/*fwdp*/true, - links[querypos][hit].fwd_score); + fwd_scores[querypos][hit]); ncells++; } } @@ -3311,7 +3468,8 @@ /* Returns celllist */ /* For PMAP, indexsize is in aa. */ static Cell_T * -align_compute_scores_lookback (int *ncells, struct Link_T **links, Chrpos_T **mappings, int *npositions, int totalpositions, +align_compute_scores_lookback (int *ncells, struct Link_T **links, int **fwd_scores, + Chrpos_T **mappings, int *npositions, int totalpositions, bool oned_matrix_p, Chrpos_T *minactive, Chrpos_T *maxactive, int *firstactive, int *nactive, Cellpool_T cellpool, int querystart, int queryend, int querylength, @@ -3338,6 +3496,7 @@ #endif int **active; Chrpos_T position, prevposition; + int fwd_tracei = 0; #if 0 int *lastGT, *lastAG; #ifndef PMAP @@ -3346,7 +3505,6 @@ #endif #ifdef DEBUG9 char *oligo; - int fwd_tracei = 0; #endif #ifdef DEBUG12 Link_T termlink = NULL; @@ -3365,6 +3523,7 @@ #endif debug0(printf("Lookback: querystart = %d, queryend = %d, indexsize = %d\n",querystart,queryend,indexsize)); + assert(oned_matrix_p == true); if (oned_matrix_p == true) { active = intmatrix_1d_new(querylength,npositions,totalpositions); } else { @@ -3401,9 +3560,9 @@ currlink = &(links[anchor_querypos][hit]); #ifndef SEPARATE_FWD_REV currlink->fwd_pos = currlink->fwd_hit = -1; - currlink->fwd_score = indexsize_nt; currlink->fwd_consecutive = EXON_DEFN; - debug9(currlink->fwd_tracei = 0); + currlink->fwd_tracei = 0; + fwd_scores[anchor_querypos][hit] = indexsize_nt; #else fprintf(stderr,"Not implemented yet\n"); abort(); @@ -3422,26 +3581,26 @@ currlink = &(links[querypos][hit]); #ifndef SEPARATE_FWD_REV currlink->fwd_pos = currlink->fwd_hit = -1; - currlink->fwd_score = indexsize_nt; currlink->fwd_consecutive = indexsize_nt; - debug9(currlink->fwd_tracei = -1); + currlink->fwd_tracei = -1; /* currlink->fwd_rootnlinks = 1; */ + fwd_scores[querypos][hit] = indexsize_nt; #else currlink->fwd_pos = currlink->fwd_hit = -1; - currlink->fwd_score = indexsize_nt; currlink->fwd_consecutive = indexsize_nt; - debug9(currlink->fwd_tracei = -1); + currlink->fwd_tracei = -1; /* currlink->fwd_rootnlinks = 1; */ + fwd_scores[querypos][hit] = indexsize_nt; if (splicingp == true) { currlink->rev_pos = currlink->rev_hit = -1; - currlink->rev_score = indexsize_nt; currlink->rev_consecutive = indexsize_nt; - debug9(currlink->rev_tracei = -1); + currlink->rev_tracei = -1; /* currlink->rev_rootnlinks = 1; */ + rev_scores[querypos][hit] = indexsize_nt; } #endif } - revise_active_lookback(active,firstactive,nactive,0,npositions[querypos],links,querypos); + revise_active_lookback(active,firstactive,nactive,0,npositions[querypos],fwd_scores,querypos); } grand_fwd_score = 0; @@ -3524,19 +3683,15 @@ debug9(printf("Finding link looking back from querypos %d,%d at %ux%d (%s). prev_querypos was %d\n", querypos,low_hit,position,active[querypos][low_hit],oligo,processed ? Intlist_head(processed) : -1)); - score_querypos_lookback_one( -#ifdef DEBUG9 - &fwd_tracei, -#endif - currlink,querypos,querystart,queryend,position, - links,mappings,active,firstactive,chroffset,chrhigh,plusp, + score_querypos_lookback_one(&fwd_tracei,currlink,querypos,low_hit,querystart,queryend,position, + links,fwd_scores,mappings,active,firstactive,chroffset,chrhigh,plusp, indexsize,processed, anchoredp,localp,splicingp,use_canonical_p, non_canonical_penalty); - if (currlink->fwd_score > 0) { - debug9(printf("Single hit at low_hit %d has score %d\n",low_hit,currlink->fwd_score)); - best_fwd_score = currlink->fwd_score; + if (fwd_scores[querypos][low_hit] > 0) { + debug9(printf("Single hit at low_hit %d has score %d\n",low_hit,fwd_scores[querypos][low_hit])); + best_fwd_score = fwd_scores[querypos][low_hit]; best_fwd_hit = low_hit; } @@ -3546,23 +3701,18 @@ querypos,low_hit,high_hit-1,mappings[querypos][low_hit],mappings[querypos][high_hit-1], oligo,processed ? Intlist_head(processed) : -1)); - score_querypos_lookback_mult( -#ifdef DEBUG9 - &fwd_tracei, -#endif - low_hit,high_hit,querypos,querystart,queryend, + score_querypos_lookback_mult(&fwd_tracei,low_hit,high_hit,querypos,querystart,queryend, /*positions*/&(mappings[querypos][low_hit]), - links,mappings,active,firstactive,chroffset,chrhigh,plusp, + links,fwd_scores,mappings,active,firstactive,chroffset,chrhigh,plusp, indexsize,processed, anchoredp,localp,splicingp,use_canonical_p, non_canonical_penalty); debug9(printf("Checking hits from low_hit %d to high_hit %d\n",low_hit,high_hit)); for (hit = low_hit; hit < high_hit; hit++) { - currlink = &(links[querypos][hit]); - debug9(printf("Hit %d has score %d\n",hit,currlink->fwd_score)); - if (currlink->fwd_score > best_fwd_score) { - best_fwd_score = currlink->fwd_score; + debug9(printf("Hit %d has score %d\n",hit,fwd_scores[querypos][hit])); + if (fwd_scores[querypos][hit] > best_fwd_score) { + best_fwd_score = fwd_scores[querypos][hit]; best_fwd_hit = hit; } } @@ -3586,8 +3736,7 @@ if (splicingp == true && best_fwd_hit >= 0 && links[querypos][best_fwd_hit].fwd_hit < 0 && grand_fwd_querypos >= 0 && querypos >= grand_fwd_querypos + indexsize_query) { - prevlink = &(links[grand_fwd_querypos][grand_fwd_hit]); - if ((best_fwd_score = prevlink->fwd_score - (querypos - grand_fwd_querypos)) > 0) { + if ((best_fwd_score = fwd_scores[grand_fwd_querypos][grand_fwd_hit] - (querypos - grand_fwd_querypos)) > 0) { prevposition = mappings[grand_fwd_querypos][grand_fwd_hit]; debug12(printf("Considering prevposition %u to position %u as a grand fwd lookback\n",prevposition,position)); for (hit = low_hit; hit < high_hit; hit++) { @@ -3596,12 +3745,12 @@ } else if (position >= prevposition + indexsize_nt) { currlink = &(links[querypos][hit]); currlink->fwd_consecutive = indexsize_nt; - /* currlink->fwd_rootnlinks = 1; */ currlink->fwd_pos = grand_fwd_querypos; currlink->fwd_hit = grand_fwd_hit; - currlink->fwd_score = best_fwd_score; -#ifdef DEBUG9 currlink->fwd_tracei = ++fwd_tracei; + fwd_scores[querypos][hit] = best_fwd_score; +#ifdef DEBUG9 + prevlink = &(links[grand_fwd_querypos][grand_fwd_hit]); currlink->fwd_intronnfwd = prevlink->fwd_intronnfwd; currlink->fwd_intronnrev = prevlink->fwd_intronnrev; currlink->fwd_intronnunk = prevlink->fwd_intronnunk + 1; @@ -3609,7 +3758,7 @@ } } debug12(printf("At querypos %d, setting all fwd hits to point back to grand_fwd %d,%d with a score of %d\n", - querypos,grand_fwd_querypos,grand_fwd_hit,prevlink->fwd_score)); + querypos,grand_fwd_querypos,grand_fwd_hit,fwd_scores[grand_fwd_querypos][grand_fwd_hit])); } } @@ -3672,7 +3821,7 @@ #endif } - revise_active_lookback(active,firstactive,nactive,low_hit,high_hit,links,querypos); + revise_active_lookback(active,firstactive,nactive,low_hit,high_hit,fwd_scores,querypos); /* Need to push querypos, even if firstactive[querypos] == -1 */ /* Want to skip npositions[querypos] == 0, so we can find adjacent despite mismatch or overabundance */ @@ -3715,8 +3864,8 @@ indexsize,best_overall_score,favor_right_p,cellpool); } #else - cells = Linkmatrix_get_cells_fwd(&(*ncells),links,querystart,queryend,npositions, - favor_right_p,cellpool); + cells = get_cells_fwd(&(*ncells),links,fwd_scores,querystart,queryend,npositions, + favor_right_p,cellpool); #endif debug9(FREE(oligo)); @@ -3753,7 +3902,7 @@ Univcoord_T chroffset, Univcoord_T chrhigh, bool watsonp, bool lookbackp, #endif #ifdef DEBUG0 - int indexsize, + int **fwd_scores, int indexsize, #endif Pairpool_T pairpool, bool fwdp) { List_T path = NULL; @@ -3809,7 +3958,7 @@ if (fwdp == true) { debug0(printf("Pushing %d,%d (%s) at %u, score = %d, consec = %d", querypos,hit,oligo,position, - links[querypos][hit].fwd_score,links[querypos][hit].fwd_consecutive)); + fwd_scores[querypos][hit],links[querypos][hit].fwd_consecutive)); debug9(printf(" (from #%d), intr = %d(+)/%d(-)/%d(?)", links[querypos][hit].fwd_tracei,links[querypos][hit].fwd_intronnfwd,links[querypos][hit].fwd_intronnrev, links[querypos][hit].fwd_intronnunk)); @@ -3856,7 +4005,7 @@ Univcoord_T chroffset, Univcoord_T chrhigh, bool watsonp, #ifdef DEBUG0 - int indexsize, + int **fwd_scores, int indexsize, #endif Pairpool_T pairpool, bool fwdp) { List_T path = NULL; @@ -3897,7 +4046,7 @@ if (fwdp == true) { debug0(printf("Pushing %d,%d (%s) at %u, score = %d, consec = %d", querypos,hit,oligo,position, - links[querypos][hit].fwd_score,links[querypos][hit].fwd_consecutive)); + fwd_scores[querypos][hit],links[querypos][hit].fwd_consecutive)); debug9(printf(" (from #%d), intr = %d(+)/%d(-)/%d(?)", links[querypos][hit].fwd_tracei,links[querypos][hit].fwd_intronnfwd,links[querypos][hit].fwd_intronnrev, links[querypos][hit].fwd_intronnunk)); @@ -3950,6 +4099,7 @@ bool favor_right_p, int max_nalignments, bool debug_graphic_p) { List_T all_paths = NULL; struct Link_T **links; + int **fwd_scores; Cell_T *cells, cell; int ncells, i; @@ -3961,8 +4111,10 @@ if (oned_matrix_p == true) { links = Linkmatrix_1d_new(querylength,npositions,totalpositions); + fwd_scores = intmatrix_1d_new(querylength,npositions,totalpositions); } else { links = Linkmatrix_2d_new(querylength,npositions); + fwd_scores = intmatrix_2d_new(querylength,npositions); } /* These are all oligomers */ @@ -3970,7 +4122,8 @@ mappings_dump_R(mappings,npositions,querylength,/*active*/NULL,/*firstactive*/NULL,indexsize,"all.mers"); } - cells = align_compute_scores_lookback(&ncells,links,mappings,npositions,totalpositions, + cells = align_compute_scores_lookback(&ncells,links,fwd_scores, + mappings,npositions,totalpositions, oned_matrix_p,minactive,maxactive,firstactive,nactive,cellpool, querystart,queryend,querylength, @@ -3987,7 +4140,7 @@ #ifdef SEPARATE_FWD_REV debug1(Linkmatrix_print_both(links,mappings,querylength,npositions,queryseq_ptr,indexsize)); #else - debug1(Linkmatrix_print_fwd(links,mappings,querylength,npositions,queryseq_ptr,indexsize)); + debug1(print_fwd(links,fwd_scores,mappings,querylength,npositions,queryseq_ptr,indexsize)); #endif if (ncells == 0) { @@ -4012,7 +4165,7 @@ all_paths = List_push(all_paths,(void *) traceback_one_snps(querypos,hit,links,mappings,queryseq_ptr, chroffset,chrhigh,/*watsonp*/plusp, #ifdef DEBUG0 - indexsize, + fwd_scores,indexsize, #endif pairpool,fwdp)); } @@ -4044,7 +4197,7 @@ chroffset,chrhigh,/*watsonp*/plusp,/*lookbackp*/true, #endif #ifdef DEBUG0 - indexsize, + fwd_scores,indexsize, #endif pairpool,fwdp)); } @@ -4065,8 +4218,10 @@ if (oned_matrix_p == true) { Linkmatrix_1d_free(&links); + intmatrix_1d_free(&fwd_scores); } else { Linkmatrix_2d_free(&links,querylength); + intmatrix_2d_free(&fwd_scores,querylength); } #if 0 @@ -4084,7 +4239,8 @@ /* Returns celllist */ /* For PMAP, indexsize is in aa. */ static Cell_T * -align_compute_scores_lookforward (int *ncells, struct Link_T **links, Chrpos_T **mappings, int *npositions, int totalpositions, +align_compute_scores_lookforward (int *ncells, struct Link_T **links, int **fwd_scores, + Chrpos_T **mappings, int *npositions, int totalpositions, bool oned_matrix_p, Chrpos_T *minactive, Chrpos_T *maxactive, int *firstactive, int *nactive, Cellpool_T cellpool, int querystart, int queryend, int querylength, @@ -4110,6 +4266,7 @@ #endif int **active; Chrpos_T position, prevposition; + int fwd_tracei = 0; #if 0 int *lastGT, *lastAG; #ifndef PMAP @@ -4118,7 +4275,6 @@ #endif #ifdef DEBUG9 char *oligo; - int fwd_tracei = 0; #endif #ifdef DEBUG12 Link_T termlink = NULL; @@ -4172,9 +4328,9 @@ currlink = &(links[anchor_querypos][hit]); #ifndef SEPARATE_FWD_REV currlink->fwd_pos = currlink->fwd_hit = -1; - currlink->fwd_score = indexsize_nt; currlink->fwd_consecutive = EXON_DEFN; - debug9(currlink->fwd_tracei = 0); + currlink->fwd_tracei = 0; + fwd_scores[anchor_querypos][hit] = indexsize_nt; #else fprintf(stderr,"Not implemented yet\n"); abort(); @@ -4193,26 +4349,26 @@ currlink = &(links[querypos][hit]); #ifndef SEPARATE_FWD_REV currlink->fwd_pos = currlink->fwd_hit = -1; - currlink->fwd_score = indexsize_nt; currlink->fwd_consecutive = indexsize_nt; - debug9(currlink->fwd_tracei = -1); + currlink->fwd_tracei = -1; /* currlink->fwd_rootnlinks = 1; */ + fwd_scores[querypos][hit] = indexsize_nt; #else currlink->fwd_pos = currlink->fwd_hit = -1; currlink->fwd_score = indexsize_nt; currlink->fwd_consecutive = indexsize_nt; - debug9(currlink->fwd_tracei = -1); + currlink->fwd_tracei = -1; /* currlink->fwd_rootnlinks = 1; */ if (splicingp == true) { currlink->rev_pos = currlink->rev_hit = -1; - currlink->rev_score = indexsize_nt; currlink->rev_consecutive = indexsize_nt; currlink->rev_tracei = -1; /* currlink->rev_rootnlinks = 1; */ + rev_scores[querypos][hit] = indexsize_nt; } #endif } - revise_active_lookforward(active,firstactive,nactive,0,npositions[querypos],links,querypos); + revise_active_lookforward(active,firstactive,nactive,0,npositions[querypos],fwd_scores,querypos); } @@ -4295,20 +4451,15 @@ debug9(strncpy(oligo,&(queryseq_ptr[querypos]),indexsize)); debug9(printf("Finding link looking forward from querypos %d,%d at %ux%d (%s). prev_querypos was %d\n", querypos,low_hit,position,active[querypos][low_hit],oligo,processed ? Intlist_head(processed) : -1)); - score_querypos_lookforward_one( -#ifdef DEBUG9 - &fwd_tracei, -#endif - currlink,querypos,querystart,queryend,position, - links,mappings,active,firstactive, + score_querypos_lookforward_one(&fwd_tracei,currlink,querypos,low_hit,querystart,queryend,position, + links,fwd_scores,mappings,active,firstactive, chroffset,chrhigh,plusp, indexsize,processed, anchoredp,localp,splicingp,use_canonical_p, non_canonical_penalty); - - if (currlink->fwd_score > 0) { - debug9(printf("Single hit at low_hit %d has score %d\n",low_hit,currlink->fwd_score)); - best_fwd_score = currlink->fwd_score; + if (fwd_scores[querypos][low_hit] > 0) { + debug9(printf("Single hit at low_hit %d has score %d\n",low_hit,fwd_scores[querypos][low_hit])); + best_fwd_score = fwd_scores[querypos][low_hit]; best_fwd_hit = low_hit; } @@ -4318,23 +4469,18 @@ querypos,high_hit-1,low_hit,mappings[querypos][high_hit-1],mappings[querypos][low_hit], oligo,processed ? Intlist_head(processed) : -1)); - score_querypos_lookforward_mult( -#ifdef DEBUG9 - &fwd_tracei, -#endif - low_hit,high_hit,querypos,querystart,queryend, + score_querypos_lookforward_mult(&fwd_tracei,low_hit,high_hit,querypos,querystart,queryend, /*positions*/&(mappings[querypos][low_hit]), - links,mappings,active,firstactive,chroffset,chrhigh,plusp, + links,fwd_scores,mappings,active,firstactive,chroffset,chrhigh,plusp, indexsize,processed, anchoredp,localp,splicingp,use_canonical_p, non_canonical_penalty); debug9(printf("Checking hits from high_hit %d to low_hit %d\n",high_hit,low_hit)); for (hit = high_hit - 1; hit >= low_hit; hit--) { - currlink = &(links[querypos][hit]); - debug9(printf("Hit %d has score %d\n",hit,currlink->fwd_score)); - if (currlink->fwd_score > best_fwd_score) { - best_fwd_score = currlink->fwd_score; + debug9(printf("Hit %d has score %d\n",hit,fwd_scores[querypos][hit])); + if (fwd_scores[querypos][hit] > best_fwd_score) { + best_fwd_score = fwd_scores[querypos][hit]; best_fwd_hit = hit; } } @@ -4358,8 +4504,7 @@ if (splicingp == true && best_fwd_hit >= 0 && links[querypos][best_fwd_hit].fwd_hit < 0 && grand_fwd_querypos <= querylength - indexsize_query && querypos + indexsize_query <= grand_fwd_querypos) { - prevlink = &(links[grand_fwd_querypos][grand_fwd_hit]); - if ((best_fwd_score = prevlink->fwd_score - (grand_fwd_querypos - querypos)) > 0) { + if ((best_fwd_score = fwd_scores[grand_fwd_querypos][grand_fwd_hit] - (grand_fwd_querypos - querypos)) > 0) { prevposition = mappings[grand_fwd_querypos][grand_fwd_hit]; debug12(printf("Considering prevposition %u to position %u as a grand fwd lookforward\n",prevposition,position)); for (hit = high_hit - 1; hit >= low_hit; --hit) { @@ -4368,12 +4513,12 @@ } else if (position + indexsize_nt <= prevposition) { currlink = &(links[querypos][hit]); currlink->fwd_consecutive = indexsize_nt; - /* currlink->fwd_rootnlinks = 1; */ currlink->fwd_pos = grand_fwd_querypos; currlink->fwd_hit = grand_fwd_hit; - currlink->fwd_score = best_fwd_score; -#ifdef DEBUG9 currlink->fwd_tracei = ++fwd_tracei; + /* currlink->fwd_rootnlinks = 1; */ + fwd_scores[querypos][hit] = best_fwd_score; +#ifdef DEBUG9 currlink->fwd_intronnfwd = prevlink->fwd_intronnfwd; currlink->fwd_intronnrev = prevlink->fwd_intronnrev; currlink->fwd_intronnunk = prevlink->fwd_intronnunk + 1; @@ -4381,7 +4526,7 @@ } } debug12(printf("At querypos %d, setting all fwd hits to point back to grand_fwd %d,%d with a score of %d\n", - querypos,grand_fwd_querypos,grand_fwd_hit,prevlink->fwd_score)); + querypos,grand_fwd_querypos,grand_fwd_hit,fwd_scores[grand_fwd_querypos][grand_fwd_hit])); } } @@ -4444,7 +4589,7 @@ #endif } - revise_active_lookforward(active,firstactive,nactive,low_hit,high_hit,links,querypos); + revise_active_lookforward(active,firstactive,nactive,low_hit,high_hit,fwd_scores,querypos); /* Need to push querypos, even if firstactive[querypos] == -1 */ /* Want to skip npositions[querypos] == 0, so we can find adjacent despite mismatch or overabundance */ @@ -4488,8 +4633,8 @@ indexsize,best_overall_score,favor_right_p,cellpool); } #else - cells = Linkmatrix_get_cells_fwd(&(*ncells),links,querystart,queryend,npositions, - favor_right_p,cellpool); + cells = get_cells_fwd(&(*ncells),links,fwd_scores,querystart,queryend,npositions, + favor_right_p,cellpool); #endif debug9(FREE(oligo)); @@ -4512,6 +4657,7 @@ bool favor_right_p, int max_nalignments, bool debug_graphic_p) { List_T all_paths = NULL; struct Link_T **links; + int **fwd_scores; Cell_T *cells, cell; int ncells, i; @@ -4522,8 +4668,10 @@ if (oned_matrix_p == true) { links = Linkmatrix_1d_new(querylength,npositions,totalpositions); + fwd_scores = intmatrix_1d_new(querylength,npositions,totalpositions); } else { links = Linkmatrix_2d_new(querylength,npositions); + fwd_scores = intmatrix_2d_new(querylength,npositions); } /* These are all oligomers */ @@ -4531,7 +4679,8 @@ mappings_dump_R(mappings,npositions,querylength,/*active*/NULL,/*firstactive*/NULL,indexsize,"all.mers"); } - cells = align_compute_scores_lookforward(&ncells,links,mappings,npositions,totalpositions, + cells = align_compute_scores_lookforward(&ncells,links,fwd_scores, + mappings,npositions,totalpositions, oned_matrix_p,minactive,maxactive,firstactive,nactive,cellpool, querystart,queryend,querylength, @@ -4548,7 +4697,7 @@ #ifdef SEPARATE_FWD_REV debug1(Linkmatrix_print_both(links,mappings,querylength,npositions,queryseq_ptr,indexsize)); #else - debug1(Linkmatrix_print_fwd(links,mappings,querylength,npositions,queryseq_ptr,indexsize)); + debug1(print_fwd(links,fwd_scores,mappings,querylength,npositions,queryseq_ptr,indexsize)); #endif if (ncells == 0) { @@ -4571,7 +4720,7 @@ if (debug_graphic_p == true) { - best_path_dump_R(links,mappings,querypos,hit,fwdp,"best.path"); + /* best_path_dump_R(links,mappings,querypos,hit,fwdp,"best.path"); */ printf("plot(all.mers,col=\"black\",pch=\".\",xlab=\"Query\",ylab=\"Genomic\")\n"); printf("points(active.mers,col=\"red\",pch=\".\")\n"); printf("points(best.path,col=\"green\",pch=\".\")\n"); @@ -4583,7 +4732,7 @@ all_paths = List_push(all_paths,(void *) traceback_one_snps(querypos,hit,links,mappings,queryseq_ptr, chroffset,chrhigh,/*watsonp*/plusp, #ifdef DEBUG0 - indexsize, + fwd_scores,indexsize, #endif pairpool,fwdp)); } else { @@ -4592,7 +4741,7 @@ chroffset,chrhigh,/*watsonp*/plusp,/*lookbackp*/false, #endif #ifdef DEBUG0 - indexsize, + fwd_scores,indexsize, #endif pairpool,fwdp)); } @@ -4613,8 +4762,10 @@ if (oned_matrix_p == true) { Linkmatrix_1d_free(&links); + intmatrix_1d_free(&fwd_scores); } else { Linkmatrix_2d_free(&links,querylength); + intmatrix_2d_free(&fwd_scores,querylength); } #if 0 @@ -6334,7 +6485,7 @@ #endif - if (totalpositions == 0) { + if (totalpositions <= 0) { debug(printf("Quitting because totalpositions is zero\n")); all_results = (List_T) NULL; diff -Nru gmap-2016-11-07/src/stage3.c gmap-2017-01-14/src/stage3.c --- gmap-2016-11-07/src/stage3.c 2016-11-07 22:48:17.000000000 +0000 +++ gmap-2017-01-14/src/stage3.c 2017-01-01 15:47:24.000000000 +0000 @@ -1,4 +1,4 @@ -static char rcsid[] = "$Id: stage3.c 198281 2016-09-24 00:55:49Z twu $"; +static char rcsid[] = "$Id: stage3.c 202048 2017-01-01 15:47:23Z twu $"; #ifdef HAVE_CONFIG_H #include #endif @@ -72,7 +72,10 @@ #define DYNPROGINDEX_MINOR +1 #define DUAL_BREAK_PROB_THRESHOLD 0.90 -#define MIN_STAGE2_FOR_DUALBREAK 3 /* was 24, but misses small exons */ + +/* If too small, e.g., 3, misses introns with a nearby mismatch. If too large, e.g., 24, misses small exons */ +#define MIN_STAGE2_FOR_DUALBREAK 6 + #define MIN_MICROEXON_LENGTH 3 #define THETA_SLACK 0.10 @@ -1724,7 +1727,7 @@ introntype, intronlength, genomicpos; char left1, left2, right2, right1, left1_alt, left2_alt, right2_alt, right1_alt, c2, c2_alt; - debug(printf("\n** Starting assign_intron_probs\n")); + debug(printf("\n** Starting assign_intron_probs with watsonp %d and cdna_direction %d\n",watsonp,cdna_direction)); while (path != NULL) { /* pairptr = path; */ /* path = Pairpool_pop(path,&pair); */ @@ -3507,8 +3510,8 @@ static int sufficient_splice_prob_local (int support, int nmatches, int nmismatches, double distal_spliceprob, double medial_spliceprob) { - debug3(printf("Checking for sufficient splice prob, based on %d matches, %d mismatches, and support %d\n", - nmatches,nmismatches,support)); + debug3(printf("Checking for sufficient splice prob, based on %d matches, %d mismatches, support %d, distal spliceprob %f, and medial spliceprob %f\n", + nmatches,nmismatches,support,distal_spliceprob,medial_spliceprob)); nmatches -= 2*nmismatches; if (nmatches < 0) { return (int) false; @@ -3525,6 +3528,17 @@ } } +static bool +sufficient_splice_prob_strict (double distal_spliceprob, double medial_spliceprob) { + debug3(printf("Checking for sufficient splice prob, based on spliceprob %f, and medial spliceprob %f\n", + distal_spliceprob,medial_spliceprob)); + if (distal_spliceprob > 0.95 && medial_spliceprob > 0.90) { + return true; + } else { + return false; + } +} + #if 0 @@ -3576,22 +3590,200 @@ /* Also handles case where novelsplicingp == false */ /* pairs -> pairs */ static List_T -trim_end5_exon_indels (bool *trim5p, int ambig_end_length, List_T pairs, - int cdna_direction +trim_end5_indels (List_T pairs, int ambig_end_length, + Dynprog_T dynprog, Chrpos_T chroffset, Chrpos_T chrhigh, + char *queryseq_ptr, char *queryuc_ptr, + int cdna_direction, bool watsonp, bool jump_late_p, + Pairpool_T pairpool, double defect_rate) { + List_T path, exon, pairptr, p; + Pair_T pair, medial; + int max_nmatches = 0, max_nmismatches; + int nmatches = 0, nmismatches /* = -1 because of the gap */, i; + int max_score, score; + bool nearindelp = false; + int nindels; + + int finalscore, continuous_nmatches, continuous_nmismatches, continuous_nopens, continuous_nindels; + int querydp3_medialgap, genomedp3_medialgap, queryjump, genomejump; + List_T continuous_gappairs_medialgap; + int dynprogindex_minor = 0; + + debug3(printf("Starting trim_end5_indels\n")); + + /* Handle first exon */ + if (pairs == NULL) { + /* *trim5p = false; */ + return (List_T) NULL; + } else if (ambig_end_length > 0) { + /* Don't mess with ambiguous end */ + /* *trim5p = false; */ + return pairs; + } else { + pair = pairs->first; + debug3(printf("querystart %d\n",pair->querypos)); + } + + exon = (List_T) NULL; + while (pairs != NULL && pair->comp != INDEL_COMP) { + pairptr = pairs; + pairs = Pairpool_pop(pairs,&pair); +#ifdef WASTE + exon = Pairpool_push_existing(exon,pairpool,pair); +#else + exon = List_push_existing(exon,pairptr); +#endif + } + + while (pairs != NULL && ((Pair_T) pairs->first)->comp == INDEL_COMP) { + pairptr = pairs; + pairs = Pairpool_pop(pairs,&pair); #ifdef WASTE - , Pairpool_T pairpool + exon = Pairpool_push_existing(exon,pairpool,pair); +#else + exon = List_push_existing(exon,pairptr); +#endif + } + debug3(printf("End exon:\n")); + debug3(Pair_dump_list(exon,true)); + + + if (exon == NULL) { + /* *trim5p = false; */ + return pairs; + + } else { + p = exon; + nindels = 1; + while (p != NULL && ((Pair_T) p->first)->comp == INDEL_COMP) { + p = List_next(p); + nindels++; + } + + max_nmatches = max_nmismatches = 0; + nmatches = nmismatches = 0; + max_score = score = 0; + /* Evaluate region distal to indel */ + while (p != NULL) { + pair = (Pair_T) List_head(p); + if (pair->comp == MATCH_COMP || pair->comp == DYNPROG_MATCH_COMP || pair->comp == AMBIGUOUS_COMP) { + score += 1; + nmatches += 1; + } else { + score -= 3; + nmismatches += 1; + } + if (score > max_score) { + max_score = score; + max_nmatches = nmatches; + max_nmismatches = nmismatches; + } + debug3(printf("5' querypos %d => score %d, max_nmatches %d, max_nmismatches %d\n", + pair->querypos,score,max_nmatches,max_nmismatches)); + p = List_next(p); + } + +#if 0 + for ( i = 0; p != NULL && i < NEARBY_INDEL; p = List_next(p), i++) { + medial = (Pair_T) p->first; + if (medial->gapp) { + debug3(printf("Saw splice medial to 5' end indel\n")); + nearindelp = true; + } else if (medial->comp == MATCH_COMP || medial->comp == DYNPROG_MATCH_COMP || medial->comp == AMBIGUOUS_COMP) { + /* Skip */ + } else { + debug3(printf("Saw mismatch %c medial to 5' end indel\n",medial->comp)); + } + } #endif - ) { + + debug3(printf("Before indel/gap, nmatches %d, nmismatches %d\n",max_nmatches,max_nmismatches)); + if (pairs == NULL) { + debug3(printf("No indel/gap\n")); + path = exon; + /* *trim5p = false; */ + + } else if (exon == NULL) { + debug3(printf("No 5' exon\n")); + path = exon; + /* *trim5p = false; */ + +#if 0 + } else if (nearindelp == true && max_nmatches < INDEL_SPLICE_ENDLENGTH) { + debug3(printf("near indel with nmatches %d too low, so trimming it\n",max_nmatches)); + path = (List_T) NULL; + /* *trim5p = true; */ +#endif + + } else { + querydp3_medialgap = ((Pair_T) pairs->first)->querypos - 1; + genomedp3_medialgap = ((Pair_T) pairs->first)->genomepos - 1; + queryjump = querydp3_medialgap + 1; + genomejump = queryjump /*+ extramaterial_end*/; + + continuous_gappairs_medialgap = Dynprog_end5_gap(&dynprogindex_minor,&finalscore, + &continuous_nmatches,&continuous_nmismatches,&continuous_nopens,&continuous_nindels, + dynprog,&(queryseq_ptr[querydp3_medialgap]),&(queryuc_ptr[querydp3_medialgap]), + queryjump,genomejump,querydp3_medialgap,genomedp3_medialgap, + chroffset,chrhigh,watsonp,jump_late_p,pairpool, + extraband_end,defect_rate,/*endalign*/QUERYEND_NOGAPS,/*require_pos_score_p*/true); + debug(printf("CONTINUOUS?\n")); + debug(Pair_dump_list(continuous_gappairs_medialgap,true)); + debug3(printf("continuous finalscore %d\n",finalscore)); + + if (finalscore > 0) { + debug3(printf("Using continuous\n")); + path = continuous_gappairs_medialgap; + /* *trim5p = false; */ + + } else if (score < 0) { + debug3(printf("Not enough matches, so trimming it\n")); + path = (List_T) NULL; + /* *trim5p = true; */ + + } else { + debug3(printf("Using indel, because score %d > 0\n",score)); + path = exon; /* exon already has the indel */ + /* *trim5p = false; */ + } + } + + path = Pairpool_transfer(path,pairs); + + pairs = List_reverse(path); + pairs = clean_pairs_end5(pairs,ambig_end_length); + + debug3(printf("End of trim_end5_indels: length = %d\n",List_length(pairs))); + debug3(Pair_dump_list(pairs,true)); + return pairs; + } +} + + +/* Also handles case where novelsplicingp == false */ +/* pairs -> pairs */ +static List_T +trim_end5_exons (bool *indelp, bool *trim5p, int ambig_end_length, List_T pairs, + Dynprog_T dynprog, Chrpos_T chroffset, Chrpos_T chrhigh, + char *queryseq_ptr, char *queryuc_ptr, + int cdna_direction, bool watsonp, bool jump_late_p, + Pairpool_T pairpool, double defect_rate) { List_T path, exon, pairptr, p; - Pair_T pair, medial, splice = NULL, gappair; + Pair_T pair, splice = NULL, gappair; int max_nmatches = 0, max_nmismatches; int nmatches = 0, nmismatches /* = -1 because of the gap */, i; int max_score, score; - bool nearindelp = false; + /* bool nearindelp = false; */ double medial_prob; - int nindels; - debug3(printf("Starting trim_end5_exon_indels\n")); + int finalscore, continuous_nmatches, continuous_nmismatches, continuous_nopens, continuous_nindels; + int querydp3_medialgap, genomedp3_medialgap, queryjump, genomejump; + List_T continuous_gappairs_medialgap; + int dynprogindex_minor = 0; + + + debug3(printf("Starting trim_end5_exons with ambig_end_length %d\n",ambig_end_length)); + + *indelp = false; /* Handle first exon */ if (pairs == NULL) { @@ -3660,67 +3852,25 @@ debug3(Pair_dump_one(gappair,true)); debug3(printf("\n")); - if (gappair->comp == INDEL_COMP) { - /* Handle end indel. No longer possible, since we stop only at gapp */ - /* indel = pair; */ - - p = pairs; - nindels = 1; - while (p != NULL && ((Pair_T) p->first)->comp == INDEL_COMP) { - p = List_next(p); - nindels++; - } - - for ( i = 0; p != NULL && i < NEARBY_INDEL; p = List_next(p), i++) { - medial = (Pair_T) p->first; - if (medial->gapp) { - debug3(printf("Saw splice medial to 5' end indel\n")); - splice = medial; - nearindelp = true; - } else if (medial->comp == MATCH_COMP || medial->comp == DYNPROG_MATCH_COMP || medial->comp == AMBIGUOUS_COMP) { - /* Skip */ - } else { - debug3(printf("Saw mismatch %c medial to 5' end indel\n",medial->comp)); - } - } - - } else { - /* Handle end exon */ - splice = gappair; - debug3(printf("5' end splice length: %d\n",splice->genomejump)); - - for (p = pairs, i = 0; p != NULL && i < NEARBY_INDEL; p = List_next(p), i++) { - medial = (Pair_T) p->first; - if (medial->comp == MATCH_COMP || medial->comp == DYNPROG_MATCH_COMP || medial->comp == AMBIGUOUS_COMP) { - /* Skip */ - } else if (medial->comp == INDEL_COMP || medial->comp == SHORTGAP_COMP) { - debug3(printf("Saw indel medial to 5' end intron\n")); - nearindelp = true; - } else { - debug3(printf("Saw mismatch %c medial to 5' end intron\n",medial->comp)); - } - } + /* Handle end exon */ + splice = gappair; + debug3(printf("5' end splice length: %d\n",splice->genomejump)); #if 0 - /* No longer possible, since we stop at first indel */ - if (exon != NULL) { - /* Skip first pair of exon, which holds the gap */ - for (p = List_next(exon), i = 0; p != NULL && i < NEARBY_INDEL; p = List_next(p), i++) { - distal = (Pair_T) p->first; - if (distal->comp == MATCH_COMP || distal->comp == DYNPROG_MATCH_COMP || distal->comp == AMBIGUOUS_COMP) { - /* Skip */ - } else if (distal->comp == INDEL_COMP || distal->comp == SHORTGAP_COMP) { - debug3(printf("Saw indel distal to 5' end intron\n")); - nearindelp = true; - } else { - debug3(printf("Saw mismatch %c distal to 5' end intron\n",distal->comp)); - } - } + for (p = pairs, i = 0; p != NULL && i < NEARBY_INDEL; p = List_next(p), i++) { + medial = (Pair_T) p->first; + if (medial->comp == MATCH_COMP || medial->comp == DYNPROG_MATCH_COMP || medial->comp == AMBIGUOUS_COMP) { + /* Skip */ + } else if (medial->comp == INDEL_COMP || medial->comp == SHORTGAP_COMP) { + debug3(printf("Saw indel medial to 5' end intron\n")); + nearindelp = true; + } else { + debug3(printf("Saw mismatch %c medial to 5' end intron\n",medial->comp)); } -#endif } +#endif - debug3(printf("Before indel/gap, nmatches %d, nmismatches %d\n",max_nmatches,max_nmismatches)); + debug3(printf("Before end intron, nmatches %d, nmismatches %d\n",max_nmatches,max_nmismatches)); if (pairs == NULL) { debug3(printf("No indel/gap\n")); path = exon; @@ -3746,44 +3896,12 @@ *trim5p = false; #endif +#if 0 } else if (nearindelp == true && max_nmatches < INDEL_SPLICE_ENDLENGTH) { debug3(printf("near indel with nmatches %d too low, so trimming it\n",max_nmatches)); path = (List_T) NULL; *trim5p = true; - - } else if (splice == NULL) { - debug3(printf("nindels %d\n",nindels)); - if (max_nmatches < min_indel_end_matches) { - debug3(printf("Not enough matches %d < %d, so trimming it\n",max_nmatches,min_indel_end_matches)); - path = (List_T) NULL; - *trim5p = true; - - } else if (nindels > 3) { - /* Large indel */ - if (max_nmatches - max_nmismatches > nindels) { - debug3(printf("Large indel: More matches than mismatches, so keeping it\n")); - path = exon; /* exon already has the indel */ - *trim5p = false; - - } else { - debug3(printf("Large indel: Trimming it\n")); - path = (List_T) NULL; - *trim5p = true; - } - - } else { - /* Small indel */ - if (max_nmatches - max_nmismatches > 2) { - debug3(printf("Small indel: More matches than mismatches, so keeping it\n")); - path = exon; /* exon already has the indel */ - *trim5p = false; - - } else { - debug3(printf("Small indel: Trimming it\n")); - path = (List_T) NULL; - *trim5p = true; - } - } +#endif } else { if (splice->genomejump > maxintronlen_ends) { @@ -3824,25 +3942,58 @@ *trim5p = true; #endif - } else if (sufficient_splice_prob_local(List_length(exon),max_nmatches,max_nmismatches, - /*distal_spliceprob*/cdna_direction >= 0 ? splice->donor_prob : splice->acceptor_prob, + } else if (sufficient_splice_prob_strict(/*distal_spliceprob*/cdna_direction >= 0 ? splice->donor_prob : splice->acceptor_prob, /*medial_spliceprob*/cdna_direction >= 0 ? splice->acceptor_prob : splice->donor_prob)) { - /* Want to keep for comparison of fwd and rev, even if probabilities are poor */ debug3(printf("Keeping first 5' exon with %d matches and %d mismatches\n",max_nmatches,max_nmismatches)); path = exon; /* exon already has the gap */ *trim5p = false; } else { - debug3(printf("Fall through (bad probabilities %f and %f): trimming noncanonical 5' exon\n",splice->donor_prob,splice->acceptor_prob)); + querydp3_medialgap = ((Pair_T) pairs->first)->querypos - 1; + genomedp3_medialgap = ((Pair_T) pairs->first)->genomepos - 1; + queryjump = querydp3_medialgap + 1; + genomejump = queryjump + extramaterial_end; + + continuous_gappairs_medialgap = Dynprog_end5_gap(&dynprogindex_minor,&finalscore, + &continuous_nmatches,&continuous_nmismatches,&continuous_nopens,&continuous_nindels, + dynprog,&(queryseq_ptr[querydp3_medialgap]),&(queryuc_ptr[querydp3_medialgap]), + queryjump,genomejump,querydp3_medialgap,genomedp3_medialgap, + chroffset,chrhigh,watsonp,jump_late_p,pairpool, + extraband_end,defect_rate,/*endalign*/QUERYEND_INDELS,/*require_pos_score_p*/true); + debug(printf("CONTINUOUS?\n")); + debug(Pair_dump_list(continuous_gappairs_medialgap,true)); + debug3(printf("continuous finalscore %d\n",finalscore)); + + if (finalscore > 0) { + path = continuous_gappairs_medialgap; + if (continuous_nindels > 0) { + *trim5p = true; /* So calling procedure iterates */ + *indelp = true; /* So calling procedure will call trim_end5_indels */ + } else { + *trim5p = false; + } + + } else if (sufficient_splice_prob_local(List_length(exon),max_nmatches,max_nmismatches, + /*distal_spliceprob*/cdna_direction >= 0 ? splice->donor_prob : splice->acceptor_prob, + /*medial_spliceprob*/cdna_direction >= 0 ? splice->acceptor_prob : splice->donor_prob)) { + /* Want to keep for comparison of fwd and rev, even if probabilities are poor */ + debug3(printf("Keeping first 5' exon with %d matches and %d mismatches\n",max_nmatches,max_nmismatches)); + path = exon; /* exon already has the gap */ + *trim5p = false; - medial_prob = (cdna_direction >= 0) ? splice->acceptor_prob : splice->donor_prob; - if (canonicalp(splice->knowngapp,splice->comp,splice->donor_prob,splice->acceptor_prob,cdna_direction) == true && - medial_prob > 0.95) { - *trim5p = false; /* Not really, since we are trimming, but this stops further work */ } else { - *trim5p = true; + /* TODO: Set ambig_end_length_5 here, so default output shows a donor or acceptor end type */ + debug3(printf("Fall through (bad probabilities %f and %f): trimming noncanonical 5' exon\n",splice->donor_prob,splice->acceptor_prob)); + + medial_prob = (cdna_direction >= 0) ? splice->acceptor_prob : splice->donor_prob; + if (canonicalp(splice->knowngapp,splice->comp,splice->donor_prob,splice->acceptor_prob,cdna_direction) == true && + medial_prob > 0.95) { + *trim5p = false; /* Not really, since we are trimming, but this stops further work */ + } else { + *trim5p = true; + } + path = (List_T) NULL; } - path = (List_T) NULL; } } @@ -3859,32 +4010,208 @@ pairs = List_reverse(path); pairs = clean_pairs_end5(pairs,ambig_end_length); - debug3(printf("End of trim_end5_exon_indels: length = %d\n",List_length(pairs))); + debug3(printf("End of trim_end5_exons: length = %d\n",List_length(pairs))); debug3(Pair_dump_list(pairs,true)); return pairs; } - /* Also handles case where novelsplicingp == false */ /* path -> path */ static List_T -trim_end3_exon_indels (bool *trim3p, int ambig_end_length, List_T path, - int cdna_direction +trim_end3_indels (List_T path, int ambig_end_length, + Dynprog_T dynprog, Chrpos_T chroffset, Chrpos_T chrhigh, + char *queryseq_ptr, char *queryuc_ptr, int querylength, + int cdna_direction, bool watsonp, bool jump_late_p, + Pairpool_T pairpool, double defect_rate) { + List_T pairs, exon, pairptr, p; + Pair_T pair, medial; + int max_nmatches = 0, max_nmismatches; + int nmatches = 0, nmismatches /* = -1 because of the gap */, i; + int max_score, score; + bool nearindelp = false; + int nindels; + + int finalscore, continuous_nmatches, continuous_nmismatches, continuous_nopens, continuous_nindels; + int querydp5_medialgap, genomedp5_medialgap, queryjump, genomejump; + List_T continuous_gappairs_medialgap; + int dynprogindex_minor = 0; + + debug3(printf("Starting trim_end3_indels\n")); + + /* Handle last exon */ + if (path == NULL) { + /* *trim3p = false; */ + return (List_T) NULL; + } else if (ambig_end_length > 0) { + /* Don't mess with ambiguous end */ + /* *trim3p = false; */ + return path; + } else { + pair = path->first; + debug3(printf("queryend %d\n",pair->querypos)); + } + + exon = (List_T) NULL; + while (path != NULL && pair->comp != INDEL_COMP) { + pairptr = path; + path = Pairpool_pop(path,&pair); +#ifdef WASTE + exon = Pairpool_push_existing(exon,pairpool,pair); +#else + exon = List_push_existing(exon,pairptr); +#endif + } + + while (path != NULL && ((Pair_T) path->first)->comp == INDEL_COMP) { + pairptr = path; + path = Pairpool_pop(path,&pair); #ifdef WASTE - , Pairpool_T pairpool + exon = Pairpool_push_existing(exon,pairpool,pair); +#else + exon = List_push_existing(exon,pairptr); #endif - ) { + } + debug3(printf("End exon:\n")); + debug3(Pair_dump_list(exon,true)); + + + if (exon == NULL) { + /* *trim3p = false; */ + return path; + + } else { + p = exon; + nindels = 1; + while (p != NULL && ((Pair_T) p->first)->comp == INDEL_COMP) { + p = List_next(p); + nindels++; + } + + max_nmatches = max_nmismatches = 0; + nmatches = nmismatches = 0; + max_score = score = 0; + /* Evaluate region distal to indel */ + while (p != NULL) { + pair = (Pair_T) List_head(p); + if (pair->comp == MATCH_COMP || pair->comp == DYNPROG_MATCH_COMP || pair->comp == AMBIGUOUS_COMP) { + score += 1; + nmatches += 1; + } else { + score -= 3; + nmismatches += 1; + } + if (score > max_score) { + max_score = score; + max_nmatches = nmatches; + max_nmismatches = nmismatches; + } + debug3(printf("3' querypos %d => score %d, max_nmatches %d, max_nmismatches %d\n", + pair->querypos,score,max_nmatches,max_nmismatches)); + p = List_next(p); + } + +#if 0 + for ( i = 0; p != NULL && i < NEARBY_INDEL; p = List_next(p), i++) { + medial = (Pair_T) p->first; + if (medial->gapp) { + debug3(printf("Saw splice medial to 3' end indeln")); + nearindelp = true; + } else if (medial->comp == MATCH_COMP || medial->comp == DYNPROG_MATCH_COMP || medial->comp == AMBIGUOUS_COMP) { + /* Skip */ + } else { + debug3(printf("Saw mismatch medial %c to 3' end indel\n",medial->comp)); + } + } +#endif + + debug3(printf("Before indel/gap, nmatches %d, nmismatches %d\n",max_nmatches,max_nmismatches)); + if (path == NULL) { + debug3(printf("No indel/gap\n")); + pairs = exon; + /* *trim3p = false; */ + + } else if (exon == NULL) { + debug3(printf("No 3' exon\n")); + pairs = exon; + /* *trim3p = false; */ + +#if 0 + } else if (nearindelp == true && max_nmatches < INDEL_SPLICE_ENDLENGTH) { + debug3(printf("near indel with nmatches %d too low, so trimming it\n",max_nmatches)); + pairs = (List_T) NULL; + /* *trim3p = true; */ +#endif + + } else { + querydp5_medialgap = ((Pair_T) path->first)->querypos + 1; + genomedp5_medialgap = ((Pair_T) path->first)->genomepos + 1; + queryjump = querylength - querydp5_medialgap; + genomejump = queryjump /*+ extramaterial_end*/; + + continuous_gappairs_medialgap = Dynprog_end3_gap(&dynprogindex_minor,&finalscore, + &continuous_nmatches,&continuous_nmismatches,&continuous_nopens,&continuous_nindels, + dynprog,&(queryseq_ptr[querydp5_medialgap]),&(queryuc_ptr[querydp5_medialgap]), + queryjump,genomejump,querydp5_medialgap,genomedp5_medialgap, + chroffset,chrhigh,watsonp,jump_late_p,pairpool, + extraband_end,defect_rate,/*endalign*/QUERYEND_NOGAPS,/*require_pos_score_p*/true); + debug(printf("CONTINUOUS?\n")); + debug(Pair_dump_list(continuous_gappairs_medialgap,true)); + debug3(printf("continuous finalscore %d\n",finalscore)); + + if (finalscore > 0) { + debug3(printf("Using continuous\n")); + pairs = List_reverse(continuous_gappairs_medialgap); + /* *trim3p = false; */ + + } else if (score < 0) { + debug3(printf("Not enough matches, so trimming it\n")); + pairs = (List_T) NULL; + /* *trim3p = true; */ + + } else { + debug3(printf("Using indel, because score %d > 0\n",score)); + pairs = exon; + /* *trim3p = false; */ + } + } + + pairs = Pairpool_transfer(pairs,path); + + path = List_reverse(pairs); + path = clean_path_end3(path,ambig_end_length); + + debug3(printf("End of trim_end3_indels: length = %d\n",List_length(path))); + debug3(Pair_dump_list(path,true)); + return path; + } +} + + +/* Also handles case where novelsplicingp == false */ +/* path -> path */ +static List_T +trim_end3_exons (bool *indelp, bool *trim3p, int ambig_end_length, List_T path, + Dynprog_T dynprog, Chrpos_T chroffset, Chrpos_T chrhigh, + char *queryseq_ptr, char *queryuc_ptr, int querylength, + int cdna_direction, bool watsonp, bool jump_late_p, + Pairpool_T pairpool, double defect_rate) { List_T pairs, exon, pairptr, p; - Pair_T pair, medial, splice = NULL, gappair; + Pair_T pair, splice = NULL, gappair; int max_nmatches = 0, max_nmismatches; int nmatches = 0, nmismatches /* = -1 because of the gap */, i; int max_score, score; - bool nearindelp = false; + /* bool nearindelp = false; */ double medial_prob; - int nindels; - debug3(printf("Starting trim_end3_exon_indels\n")); + int finalscore, continuous_nmatches, continuous_nmismatches, continuous_nopens, continuous_nindels; + int querydp5_medialgap, genomedp5_medialgap, queryjump, genomejump; + List_T continuous_gappairs_medialgap; + int dynprogindex_minor = 0; + + debug3(printf("Starting trim_end3_exons with ambig_end_length %d\n",ambig_end_length)); + + *indelp = false; /* Handle last exon */ if (path == NULL) { @@ -3908,7 +4235,7 @@ } exon = (List_T) NULL; - while (path != NULL && !pair->gapp /*&& pair->comp != INDEL_COMP*/) { + while (path != NULL && !pair->gapp) { pairptr = path; path = Pairpool_pop(path,&pair); #ifdef WASTE @@ -3953,69 +4280,27 @@ debug3(Pair_dump_one(gappair,true)); debug3(printf("\n")); - if (gappair->comp == INDEL_COMP) { - /* Handle end indel. No longer possible, since we stop only at gapp */ - /* indel = pair; */ - - p = path; - nindels = 1; - while (p != NULL && ((Pair_T) p->first)->comp == INDEL_COMP) { - p = List_next(p); - nindels++; - } - - for ( i = 0; p != NULL && i < NEARBY_INDEL; p = List_next(p), i++) { - medial = (Pair_T) p->first; - if (medial->gapp) { - debug3(printf("Saw splice medial to 3' end indeln")); - splice = medial; - nearindelp = true; - } else if (medial->comp == MATCH_COMP || medial->comp == DYNPROG_MATCH_COMP || medial->comp == AMBIGUOUS_COMP) { - /* Skip */ - } else { - debug3(printf("Saw mismatch medial %c to 3' end indel\n",medial->comp)); - } - } - - } else { - /* Handle end exon */ - splice = gappair; - debug3(printf("3' end splice length: %d\n",splice->genomejump)); - - for (p = path, i = 0; p != NULL && i < NEARBY_INDEL; p = List_next(p), i++) { - medial = (Pair_T) p->first; - if (medial->comp == MATCH_COMP || medial->comp == DYNPROG_MATCH_COMP || medial->comp == AMBIGUOUS_COMP) { - /* Skip */ - } else if (medial->comp == INDEL_COMP || medial->comp == SHORTGAP_COMP) { - debug3(printf("Saw indel medial to 3' end intron\n")); - nearindelp = true; - } else { - debug3(printf("Saw mismatch medial %c to 3' end intron\n",medial->comp)); - } - } + /* Handle end exon */ + splice = gappair; + debug3(printf("3' end splice length: %d\n",splice->genomejump)); #if 0 - /* No longer possible, since we stop at first indel */ - if (exon != NULL) { - /* Skip first pair of exon, which holds the gap */ - for (p = List_next(exon), i = 0; p != NULL && i < NEARBY_INDEL; p = List_next(p), i++) { - distal = (Pair_T) p->first; - if (distal->comp == MATCH_COMP || distal->comp == DYNPROG_MATCH_COMP || distal->comp == AMBIGUOUS_COMP) { - /* Skip */ - } else if (distal->comp == INDEL_COMP || distal->comp == SHORTGAP_COMP) { - debug3(printf("Saw indel distal to 3' end intron\n")); - nearindelp = true; - } else { - debug3(printf("Saw mismatch %c distal to 3' end intron\n",distal->comp)); - } - } + for (p = path, i = 0; p != NULL && i < NEARBY_INDEL; p = List_next(p), i++) { + medial = (Pair_T) p->first; + if (medial->comp == MATCH_COMP || medial->comp == DYNPROG_MATCH_COMP || medial->comp == AMBIGUOUS_COMP) { + /* Skip */ + } else if (medial->comp == INDEL_COMP || medial->comp == SHORTGAP_COMP) { + debug3(printf("Saw indel medial to 3' end intron\n")); + nearindelp = true; + } else { + debug3(printf("Saw mismatch medial %c to 3' end intron\n",medial->comp)); } -#endif } +#endif - debug3(printf("Before indel/gap, nmatches %d, nmismatches %d\n",max_nmatches,max_nmismatches)); + debug3(printf("Before end intron, nmatches %d, nmismatches %d\n",max_nmatches,max_nmismatches)); if (path == NULL) { - debug3(printf("No indel/gap\n")); + debug3(printf("No gap\n")); pairs = exon; *trim3p = false; @@ -4039,45 +4324,13 @@ *trim3p = false; #endif +#if 0 } else if (nearindelp == true && max_nmatches < INDEL_SPLICE_ENDLENGTH) { debug3(printf("near indel with nmatches %d too low, so trimming it\n",max_nmatches)); pairs = (List_T) NULL; *trim3p = true; +#endif - } else if (splice == NULL) { - debug3(printf("nindels %d\n",nindels)); - if (max_nmatches < min_indel_end_matches) { - debug3(printf("Not enough matches %d < %d, so trimming it\n",max_nmatches,min_indel_end_matches)); - pairs = (List_T) NULL; - *trim3p = true; - - } else if (nindels > 3) { - /* Large indel */ - if (max_nmatches - max_nmismatches > nindels) { - debug3(printf("Large indel: More matches than mismatches, so keeping it\n")); - pairs = exon; /* exon already has the indel */ - *trim3p = false; - - } else { - debug3(printf("Large indel: Trimming it\n")); - pairs = (List_T) NULL; - *trim3p = true; - } - - } else { - /* Small indel */ - if (max_nmatches - max_nmismatches > 2) { - debug3(printf("Small indel: More matches than mismatches, so keeping it\n")); - pairs = exon; /* exon already has the indel */ - *trim3p = false; - - } else { - debug3(printf("Small indel: Trimming it\n")); - pairs = (List_T) NULL; - *trim3p = true; - } - } - } else { if (splice->genomejump > maxintronlen_ends) { debug3(printf("End intron is too long, so trimming it\n")); @@ -4117,25 +4370,58 @@ *trim3p = true; #endif - } else if (sufficient_splice_prob_local(List_length(exon),max_nmatches,max_nmismatches, - /*distal_spliceprob*/cdna_direction >= 0 ? splice->acceptor_prob : splice->donor_prob, - /*medial_spliceprob*/cdna_direction >= 0 ? splice->donor_prob : splice->acceptor_prob)) { - /* Want to keep for comparison of fwd and rev, even if probabilities are poor */ + } else if (sufficient_splice_prob_strict(/*distal_spliceprob*/cdna_direction >= 0 ? splice->acceptor_prob : splice->donor_prob, + /*medial_spliceprob*/cdna_direction >= 0 ? splice->donor_prob : splice->acceptor_prob)) { debug3(printf("Keeping last 3' exon with %d matches and %d mismatches\n",max_nmatches,max_nmismatches)); pairs = exon; /* exon already has the gap */ *trim3p = false; - + } else { - debug3(printf("Fall through (bad probabilities %f and %f): trimming noncanonical 3' exon\n",splice->donor_prob,splice->acceptor_prob)); + querydp5_medialgap = ((Pair_T) path->first)->querypos + 1; + genomedp5_medialgap = ((Pair_T) path->first)->genomepos + 1; + queryjump = querylength - querydp5_medialgap; + genomejump = queryjump + extramaterial_end; + + continuous_gappairs_medialgap = Dynprog_end3_gap(&dynprogindex_minor,&finalscore, + &continuous_nmatches,&continuous_nmismatches,&continuous_nopens,&continuous_nindels, + dynprog,&(queryseq_ptr[querydp5_medialgap]),&(queryuc_ptr[querydp5_medialgap]), + queryjump,genomejump,querydp5_medialgap,genomedp5_medialgap, + chroffset,chrhigh,watsonp,jump_late_p,pairpool, + extraband_end,defect_rate,/*endalign*/QUERYEND_INDELS,/*require_pos_score_p*/true); + debug(printf("CONTINUOUS?\n")); + debug(Pair_dump_list(continuous_gappairs_medialgap,true)); + debug3(printf("continuous finalscore %d\n",finalscore)); + + if (finalscore > 0) { + pairs = List_reverse(continuous_gappairs_medialgap); + if (continuous_nindels > 0) { + *trim3p = true; /* So calling procedure iterates */ + *indelp = true; /* So calling procedure will call trim_end3_indels */ + } else { + *trim3p = false; + } + + } else if (sufficient_splice_prob_local(List_length(exon),max_nmatches,max_nmismatches, + /*distal_spliceprob*/cdna_direction >= 0 ? splice->acceptor_prob : splice->donor_prob, + /*medial_spliceprob*/cdna_direction >= 0 ? splice->donor_prob : splice->acceptor_prob)) { + /* Want to keep for comparison of fwd and rev, even if probabilities are poor */ + debug3(printf("Keeping last 3' exon with %d matches and %d mismatches\n",max_nmatches,max_nmismatches)); + pairs = exon; /* exon already has the gap */ + *trim3p = false; - medial_prob = (cdna_direction >= 0) ? splice->donor_prob : splice->acceptor_prob; - if (canonicalp(splice->knowngapp,splice->comp,splice->donor_prob,splice->acceptor_prob,cdna_direction) == true && - medial_prob > 0.95) { - *trim3p = false; /* Not really, since we are trimming, but this stops further work */ } else { - *trim3p = true; + /* TODO: Set ambig_end_length_3 here, so default output shows a donor or acceptor end type */ + debug3(printf("Fall through (bad probabilities %f and %f): trimming noncanonical 3' exon\n",splice->donor_prob,splice->acceptor_prob)); + + medial_prob = (cdna_direction >= 0) ? splice->donor_prob : splice->acceptor_prob; + if (canonicalp(splice->knowngapp,splice->comp,splice->donor_prob,splice->acceptor_prob,cdna_direction) == true && + medial_prob > 0.95) { + *trim3p = false; /* Not really, since we are trimming, but this stops further work */ + } else { + *trim3p = true; + } + pairs = (List_T) NULL; } - pairs = (List_T) NULL; } } @@ -4151,7 +4437,7 @@ path = List_reverse(pairs); path = clean_path_end3(path,ambig_end_length); - debug3(printf("End of trim_noncanonical_end3_exons: length = %d\n",List_length(path))); + debug3(printf("End of trim_end3_exons: length = %d\n",List_length(path))); debug3(Pair_dump_list(path,true)); return path; } @@ -6565,8 +6851,8 @@ intron_nt = get_genomic_nt(&intron_nt_alt,genomedp3--,chroffset,chrhigh,watsonp); if ((cdna = ((Pair_T) path->first)->cdna) != intron_nt && cdna != intron_nt_alt) { - debug(printf(" Mismatch %c != %c or %c at %u\n",cdna,intron_nt,intron_nt_alt,genomedp3+1)); nmismatches++; + debug(printf(" (1) Intron mismatch #%d: %c != %c or %c at %u\n",nmismatches,cdna,intron_nt,intron_nt_alt,genomedp3+1)); } if (((Pair_T) path->first)->protectedp == true) { @@ -6595,8 +6881,8 @@ intron_nt = get_genomic_nt(&intron_nt_alt,genomedp3--,chroffset,chrhigh,watsonp); if ((cdna = ((Pair_T) path->first)->cdna) != intron_nt && cdna != intron_nt_alt) { - debug(printf(" Mismatch %c != %c or %c at %u\n",cdna,intron_nt,intron_nt_alt,genomedp3+1)); nmismatches++; + debug(printf(" (2) Intron mismatch #%d: %c != %c or %c at %u\n",nmismatches,cdna,intron_nt,intron_nt_alt,genomedp3+1)); } if (((Pair_T) path->first)->comp == MATCH_COMP || ((Pair_T) path->first)->comp == DYNPROG_MATCH_COMP || ((Pair_T) path->first)->comp == AMBIGUOUS_COMP) { @@ -7194,8 +7480,8 @@ intron_nt = get_genomic_nt(&intron_nt_alt,genomedp5++,chroffset,chrhigh,watsonp); if ((cdna = ((Pair_T) pairs->first)->cdna) != intron_nt && cdna != intron_nt_alt) { - debug(printf(" Mismatch %c != %c or %c at %u\n",cdna,intron_nt,intron_nt_alt,genomedp5-1)); nmismatches++; + debug(printf(" (3) Intron mismatch #%d: %c != %c or %c at %u\n",nmismatches,cdna,intron_nt,intron_nt_alt,genomedp5-1)); } if (((Pair_T) pairs->first)->protectedp == true) { @@ -7224,8 +7510,8 @@ intron_nt = get_genomic_nt(&intron_nt_alt,genomedp5++,chroffset,chrhigh,watsonp); if ((cdna = ((Pair_T) pairs->first)->cdna) != intron_nt && cdna != intron_nt_alt) { - debug(printf(" Mismatch %c != %c or %c at %u\n",cdna,intron_nt,intron_nt_alt,genomedp5-1)); nmismatches++; + debug(printf(" (4) Intron mismatch #%d: %c != %c or %c at %u\n",nmismatches,cdna,intron_nt,intron_nt_alt,genomedp5-1)); } if (((Pair_T) pairs->first)->comp == MATCH_COMP || ((Pair_T) pairs->first)->comp == DYNPROG_MATCH_COMP || ((Pair_T) pairs->first)->comp == AMBIGUOUS_COMP) { @@ -8489,7 +8775,7 @@ &(queryseq_ptr[querydp3_medialgap]),&(queryuc_ptr[querydp3_medialgap]), queryjump,genomejump,querydp3_medialgap,genomedp3_medialgap, chroffset,chrhigh,watsonp,jump_late_p,pairpool, - extraband_end,defect_rate,/*endalign*/QUERYEND_INDELS); + extraband_end,defect_rate,/*endalign*/QUERYEND_INDELS,/*require_pos_score_p*/false); *ambig_end_length = 0; *ambig_prob = 0.0; @@ -8600,7 +8886,7 @@ &(queryseq_ptr[querydp3_distalgap]),&(queryuc_ptr[querydp3_distalgap]), queryjump,genomejump,querydp3_distalgap,genomedp3_distalgap, chroffset,chrhigh,watsonp,jump_late_p,pairpool, - extraband_end,defect_rate,endalign); + extraband_end,defect_rate,endalign,/*require_pos_score_p*/false); *ambig_end_length = 0; *ambig_prob = 0.0; *knownsplicep = false; @@ -8618,7 +8904,7 @@ firstpair->querypos,querydp3_distalgap)); return (List_T) NULL; - } else if (*finalscore < 0) { + } else if (*finalscore <= 0) { *knownsplicep = false; #if 0 return (List_T) NULL; @@ -8656,6 +8942,7 @@ genomedp5_distalgap = leftpair->genomepos + 1; /* if (leftpair->cdna == ' ') querydp5_distalgap--; -- For old dynamic programming */ /* if (leftpair->genome == ' ') genomedp5_distalgap--; -- For old dynamic programming */ + querydp5_medialgap = querydp5_distalgap; genomedp5_medialgap = genomedp5_distalgap; querydp3 = rightquerypos - 1; @@ -8709,10 +8996,14 @@ &(queryseq_ptr[querydp5_medialgap]),&(queryuc_ptr[querydp5_medialgap]), queryjump,genomejump,querydp5_medialgap,genomedp5_medialgap, chroffset,chrhigh,watsonp,jump_late_p,pairpool, - extraband_end,defect_rate,/*endalign*/QUERYEND_INDELS); + extraband_end,defect_rate,/*endalign*/QUERYEND_INDELS, + /*require_pos_score_p*/false); *ambig_end_length = 0; *ambig_prob = 0.0; + debug(printf("Medial gap\n")); + debug(Pair_dump_list(continuous_gappairs_medialgap,true)); + continuous_goodness_medialgap = nmatches + MISMATCH*nmismatches + QOPEN*nopens + QINDEL*nindels; debug(printf("Continuous_goodness_medialgap %d = %d + %d*%d + %d*%d + %d*%d\n", continuous_goodness_medialgap,nmatches,MISMATCH,nmismatches,QOPEN,nopens,QINDEL,nindels)); @@ -8816,7 +9107,7 @@ &(queryseq_ptr[querydp5_distalgap]),&(queryuc_ptr[querydp5_distalgap]), queryjump,genomejump,querydp5_distalgap,genomedp5_distalgap, chroffset,chrhigh,watsonp,jump_late_p,pairpool, - extraband_end,defect_rate,endalign); + extraband_end,defect_rate,endalign,/*require_pos_score_p*/false); *ambig_end_length = 0; *ambig_prob = 0.0; *knownsplicep = false; @@ -8835,7 +9126,7 @@ firstpair->querypos,querydp5_distalgap)); return (List_T) NULL; - } else if (*finalscore < 0) { + } else if (*finalscore <= 0) { *knownsplicep = false; #if 0 return (List_T) NULL; @@ -8871,7 +9162,6 @@ Univcoord_T splice_genomepos_5, splice_genomepos_3, splice_genomepos_5_mm, splice_genomepos_3_mm; Univcoord_T start, middle, end; /* start to middle has mismatches, while middle to end has none */ double donor_prob, acceptor_prob; - double max_prob_5 = 0.0, max_prob_3 = 0.0, max_prob_5_mm = 0.0, max_prob_3_mm = 0.0; debug13(printf("\nEntered find_dual_break_spliceends with cdna_direction %d\n",cdna_direction)); @@ -9722,6 +10012,7 @@ pairs = Pairpool_push_gapholder(pairs,pairpool,/*queryjump*/UNKNOWNJUMP,/*genomejump*/UNKNOWNJUMP, /*leftpair*/(*path)->first,/*rightpair*/pairs->first,/*knownp*/false); } + } else { lastpair = (Pair_T) gappairs->first; firstpair = (Pair_T) List_last_value(gappairs); @@ -9731,6 +10022,7 @@ /* fprintf(stderr,"%d..%d .. %d..%d\n",querydp5,firstpair->querypos,lastpair->querypos,querydp3); */ debug14(printf(" => entire query sequence bridged or not, but taking it regardless\n")); pairs = Pairpool_transfer(pairs,gappairs); + } else { debug14(printf(" => entire query sequence not bridged, so abort\n")); pairs = Pairpool_transfer(pairs,peeled_pairs); @@ -10184,6 +10476,7 @@ bool left_end_intron_p = false, right_end_intron_p, exonp; debug(printf("\n** Starting build_pairs_dualintrons\n")); + debug(Pair_dump_list(path,true)); /* Remove gaps at beginning */ while (path != NULL && ((Pair_T) path->first)->gapp == true) { @@ -12041,6 +12334,12 @@ #endif + /* Re-evaluate any small exons inserted by build_dual_breaks */ + path = insert_gapholders(pairs,queryseq_ptr,queryuc_ptr,chroffset,chrhigh,watsonp,pairpool); + pairs = assign_intron_probs(path,cdna_direction,watsonp,chrnum,chroffset,chrhigh,pairpool); + Smooth_reset(pairs); + pairs = Smooth_pairs_by_intronprobs(&badp,pairs,pairpool); + debug(printf("*** Pass 6 (dir %d): Solve dual introns. Iteration0 %d, Iteration1 %d\n", cdna_direction,iter0,iter1)); if (badp == false && shortp == false && deletep == false) { @@ -12117,7 +12416,7 @@ ncanonical, nsemicanonical, nnoncanonical; double min_splice_prob; bool knownsplice5p, chop_exon_p; - bool trim5p; + bool trim5p, indelp; *ambig_end_length_5 = 0; *ambig_prob_5 = 0.0; @@ -12192,7 +12491,14 @@ /* Using iter1 to avoid the possibility of an infinite loop */ iter1 = 0; while (iter1 < 5 && trim5p == true) { - pairs = trim_end5_exon_indels(&trim5p,*ambig_end_length_5,pairs,cdna_direction); + pairs = trim_end5_exons(&indelp,&trim5p,*ambig_end_length_5,pairs,dynprogR,chroffset,chrhigh, + queryseq_ptr,queryuc_ptr, + cdna_direction,watsonp,jump_late_p,pairpool,defect_rate); + if (indelp == true) { + pairs = trim_end5_indels(pairs,*ambig_end_length_5,dynprogR,chroffset,chrhigh, + queryseq_ptr,queryuc_ptr, + cdna_direction,watsonp,jump_late_p,pairpool,defect_rate); + } if (trim5p == true) { pairs = build_pairs_end5(&knownsplice5p,&(*ambig_end_length_5),&(*ambig_splicetype_5),&(*ambig_prob_5), &chop_exon_p,&dynprogindex_minor,pairs, @@ -12259,7 +12565,7 @@ ncanonical, nsemicanonical, nnoncanonical; double min_splice_prob; bool knownsplice3p, chop_exon_p; - bool trim3p; + bool trim3p, indelp; *ambig_end_length_3 = 0; *ambig_prob_3 = 0.0; @@ -12333,7 +12639,14 @@ /* Using iter1 to avoid the possibility of an infinite loop */ iter1 = 0; while (iter1 < 5 && trim3p == true) { - path = trim_end3_exon_indels(&trim3p,*ambig_end_length_3,path,cdna_direction); + path = trim_end3_exons(&indelp,&trim3p,*ambig_end_length_3,path,dynprogL,chroffset,chrhigh, + queryseq_ptr,queryuc_ptr,querylength, + cdna_direction,watsonp,jump_late_p,pairpool,defect_rate); + if (indelp == true) { + path = trim_end3_indels(path,*ambig_end_length_3,dynprogL,chroffset,chrhigh, + queryseq_ptr,queryuc_ptr,querylength, + cdna_direction,watsonp,jump_late_p,pairpool,defect_rate); + } if (trim3p == true) { path = build_path_end3(&knownsplice3p,&(*ambig_end_length_3),&(*ambig_splicetype_3),&(*ambig_prob_3), &chop_exon_p,&dynprogindex_minor,path, @@ -12477,15 +12790,21 @@ int splice_sensedir_5, splice_sensedir_3; /* int splice_cdna_direction_5_mm, splice_cdna_direction_3_mm; */ int splice_sensedir_5_mm, splice_sensedir_3_mm; + int nmismatches, *scorei; bool mismatchp; - debug13(printf("\nEntered gmap_trim_novel_spliceends with orig_sensedir %d, ambig_end_lengths %d and %d\n", orig_sensedir,*ambig_end_length_5,*ambig_end_length_3)); *new_sensedir = SENSE_NULL; Pair_trim_distances(&trim5,&trim3,pairs); debug13(printf("Trim distances are %d and %d\n",trim5,trim3)); + if (trim5 > trim3) { + scorei = (int *) MALLOC((trim5 + 1) * sizeof(int)); + } else { + scorei = (int *) MALLOC((trim3 + 1) * sizeof(int)); + } + path = List_reverse(pairs); if (path != NULL && knownsplice3p == false && *ambig_end_length_3 == 0 @@ -12504,6 +12823,7 @@ } i = 0; + nmismatches = 0; while (i < trim3) { if ((p = List_next(p)) == NULL) { break; @@ -12511,15 +12831,18 @@ break; } else if (pair->comp == MATCH_COMP || pair->comp == DYNPROG_MATCH_COMP || pair->comp == AMBIGUOUS_COMP) { middle = pair->genomepos; + scorei[i] = nmismatches; debug13(printf("Resetting middle to be %u\n",middle)); } else { middle = pair->genomepos; + scorei[i] = ++nmismatches; mismatchp = true; debug13(printf("Resetting middle to be %u\n",middle)); } pair = (Pair_T) List_head(p); i++; } + scorei[i] = ++nmismatches; while (i < trim3 + END_SPLICESITE_SEARCH) { if ((p = List_next(p)) == NULL) { @@ -12558,15 +12881,22 @@ /* assert(start_genomicpos >= end_genomicpos); */ genomicpos = start_genomicpos; + i = 0; + while (genomicpos >= middle_genomicpos && scorei[i] < nmismatches - 2) { + debug13(printf("3', watson, sense anti %u %u score %d\n",chroffset+genomicpos,genomicpos,nmismatches - scorei[i])); + genomicpos--; + i++; + } while (genomicpos >= middle_genomicpos && genomicpos >= end_genomicpos - exondist + END_MIN_EXONLENGTH) { donor_prob = Maxent_hr_donor_prob(chroffset+genomicpos,chroffset); /* Case 1 */ - debug13(printf("3', watson, sense anti %u %u %f mm\n",chroffset+genomicpos,genomicpos,donor_prob)); + debug13(printf("3', watson, sense anti %u %u %f mm %d\n",chroffset+genomicpos,genomicpos,donor_prob,nmismatches - scorei[i])); if (donor_prob > max_prob_3_mm) { max_prob_3_mm = donor_prob; splice_genomepos_3_mm = genomicpos - 1; } genomicpos--; + debug13(i++); } while (genomicpos >= end_genomicpos && genomicpos >= end_genomicpos - exondist + END_MIN_EXONLENGTH) { @@ -12589,15 +12919,22 @@ /* assert(start_genomicpos <= end_genomicpos); */ genomicpos = start_genomicpos; + i = 0; + while (genomicpos <= middle_genomicpos && scorei[i] < nmismatches - 2) { + debug13(printf("3', crick, sense forward %u %u score %d\n",chroffset+genomicpos,genomicpos,nmismatches - scorei[i])); + genomicpos++; + i++; + } while (genomicpos <= middle_genomicpos && genomicpos <= end_genomicpos + exondist - END_MIN_EXONLENGTH) { donor_prob = Maxent_hr_antidonor_prob(chroffset+genomicpos,chroffset); /* Case 3 */ - debug13(printf("3', crick, sense forward %u %u %f mm\n",chroffset+genomicpos,genomicpos,donor_prob)); + debug13(printf("3', crick, sense forward %u %u %f mm %d\n",chroffset+genomicpos,genomicpos,donor_prob,nmismatches - scorei[i])); if (donor_prob > max_prob_3_mm) { max_prob_3_mm = donor_prob; splice_genomepos_3_mm = (chrhigh - chroffset) - genomicpos; } genomicpos++; + debug13(i++); } while (genomicpos <= end_genomicpos && genomicpos <= end_genomicpos + exondist - END_MIN_EXONLENGTH) { @@ -12622,15 +12959,22 @@ /* assert(start_genomicpos >= end_genomicpos); */ genomicpos = start_genomicpos; + i = 0; + while (genomicpos >= middle_genomicpos && scorei[i] < nmismatches - 2) { + debug13(printf("3', watson, sense forward %u %u score %d\n",chroffset+genomicpos,genomicpos,nmismatches - scorei[i])); + genomicpos--; + i++; + } while (genomicpos >= middle_genomicpos && genomicpos >= end_genomicpos - exondist + END_MIN_EXONLENGTH) { acceptor_prob = Maxent_hr_antiacceptor_prob(chroffset+genomicpos,chroffset); /* Case 5 */ - debug13(printf("3', watson, sense forward %u %u %f mm\n",chroffset+genomicpos,genomicpos,acceptor_prob)); + debug13(printf("3', watson, sense forward %u %u %f mm %d\n",chroffset+genomicpos,genomicpos,acceptor_prob,nmismatches - scorei[i])); if (acceptor_prob > max_prob_3_mm) { max_prob_3_mm = acceptor_prob; splice_genomepos_3_mm = genomicpos - 1; } genomicpos--; + debug13(i++); } while (genomicpos >= end_genomicpos && genomicpos >= end_genomicpos - exondist + END_MIN_EXONLENGTH) { @@ -12653,15 +12997,22 @@ /* assert(start_genomicpos <= end_genomicpos); */ genomicpos = start_genomicpos; + i = 0; + while (genomicpos <= middle_genomicpos && scorei[i] < nmismatches - 2) { + debug13(printf("3', crick, sense anti %u %u score %d\n",chroffset+genomicpos,genomicpos,nmismatches - scorei[i])); + genomicpos++; + i++; + } while (genomicpos <= middle_genomicpos && genomicpos <= end_genomicpos + exondist - END_MIN_EXONLENGTH) { acceptor_prob = Maxent_hr_acceptor_prob(chroffset+genomicpos,chroffset); /* Case 7 */ - debug13(printf("3', crick, sense anti %u %u %f mm\n",chroffset+genomicpos,genomicpos,acceptor_prob)); + debug13(printf("3', crick, sense anti %u %u %f mm %d\n",chroffset+genomicpos,genomicpos,acceptor_prob,nmismatches - scorei[i])); if (acceptor_prob > max_prob_3_mm) { max_prob_3_mm = acceptor_prob; splice_genomepos_3_mm = (chrhigh - chroffset) - genomicpos; } genomicpos++; + debug13(i++); } while (genomicpos <= end_genomicpos && genomicpos <= end_genomicpos + exondist - END_MIN_EXONLENGTH) { @@ -12684,11 +13035,17 @@ /* assert(start_genomicpos >= end_genomicpos); */ genomicpos = start_genomicpos; + i = 0; + while (genomicpos >= middle_genomicpos && scorei[i] < nmismatches - 2) { + debug13(printf("3', watson, sense null %u %u score %d\n",chroffset+genomicpos,genomicpos,nmismatches - scorei[i])); + genomicpos--; + i++; + } while (genomicpos >= middle_genomicpos && genomicpos >= end_genomicpos - exondist + END_MIN_EXONLENGTH) { donor_prob = Maxent_hr_donor_prob(chroffset+genomicpos,chroffset); /* Case 1 */ acceptor_prob = Maxent_hr_antiacceptor_prob(chroffset+genomicpos,chroffset); /* Case 5 */ - debug13(printf("3', watson, sense null %u %u %f %f mm\n",chroffset+genomicpos,genomicpos,donor_prob,acceptor_prob)); + debug13(printf("3', watson, sense null %u %u %f %f mm %d\n",chroffset+genomicpos,genomicpos,donor_prob,acceptor_prob,nmismatches - scorei[i])); if (donor_prob > max_prob_sense_forward_3_mm) { max_prob_sense_forward_3_mm = donor_prob; if (donor_prob > max_prob_3_mm) { @@ -12710,6 +13067,7 @@ } } genomicpos--; + debug13(i++); } while (genomicpos >= end_genomicpos && genomicpos >= end_genomicpos - exondist + END_MIN_EXONLENGTH) { @@ -12747,11 +13105,17 @@ /* assert(start_genomicpos <= end_genomicpos); */ genomicpos = start_genomicpos; + i = 0; + while (genomicpos <= middle_genomicpos && scorei[i] < nmismatches - 2) { + debug13(printf("3', crick, sense null %u %u score %d\n",chroffset+genomicpos,genomicpos,nmismatches - scorei[i])); + genomicpos++; + i++; + } while (genomicpos <= middle_genomicpos && genomicpos <= end_genomicpos + exondist - END_MIN_EXONLENGTH) { donor_prob = Maxent_hr_antidonor_prob(chroffset+genomicpos,chroffset); /* Case 3 */ acceptor_prob = Maxent_hr_acceptor_prob(chroffset+genomicpos,chroffset); /* Case 7 */ - debug13(printf("3', crick, sense null %u %u %f %f mm\n",chroffset+genomicpos,genomicpos,donor_prob,acceptor_prob)); + debug13(printf("3', crick, sense null %u %u %f %f mm %d\n",chroffset+genomicpos,genomicpos,donor_prob,acceptor_prob,nmismatches - scorei[i])); if (donor_prob > max_prob_sense_forward_3_mm) { max_prob_sense_forward_3_mm = donor_prob; if (donor_prob > max_prob_3_mm) { @@ -12773,6 +13137,7 @@ } } genomicpos++; + debug13(i++); } while (genomicpos <= end_genomicpos && genomicpos <= end_genomicpos + exondist - END_MIN_EXONLENGTH) { @@ -12837,6 +13202,8 @@ } } + /* 5' end */ + pairs = List_reverse(path); if (pairs != NULL && knownsplice5p == false && *ambig_end_length_5 == 0 /* && exon_length_5(pairs) >= END_SPLICESITE_EXON_LENGTH */) { @@ -12854,6 +13221,7 @@ } i = 0; + nmismatches = 0; while (i < trim5) { if ((p = List_next(p)) == NULL) { break; @@ -12861,15 +13229,18 @@ break; } else if (pair->comp == MATCH_COMP || pair->comp == DYNPROG_MATCH_COMP || pair->comp == AMBIGUOUS_COMP) { middle = pair->genomepos; + scorei[i] = nmismatches; debug13(printf("Resetting middle to be %u\n",middle)); } else { middle = pair->genomepos; + scorei[i] = ++nmismatches; mismatchp = true; debug13(printf("Resetting middle to be %u\n",middle)); } pair = (Pair_T) List_head(p); i++; } + scorei[i] = nmismatches; while (i < trim5 + END_SPLICESITE_SEARCH) { if ((p = List_next(p)) == NULL) { @@ -12908,15 +13279,22 @@ /* assert(start_genomicpos <= end_genomicpos); */ genomicpos = start_genomicpos; + i = 0; + while (genomicpos <= middle_genomicpos && scorei[i] < nmismatches - 2) { + debug13(printf("5', watson, sense forward %u %u score %d\n",chroffset+genomicpos,genomicpos,nmismatches - scorei[i])); + genomicpos++; + i++; + } while (genomicpos <= middle_genomicpos && genomicpos <= end_genomicpos + exondist - END_MIN_EXONLENGTH) { acceptor_prob = Maxent_hr_acceptor_prob(chroffset+genomicpos,chroffset); /* Case 2 */ - debug13(printf("5', watson, sense forward %u %u %f mm\n",chroffset+genomicpos,genomicpos,acceptor_prob)); + debug13(printf("5', watson, sense forward %u %u %f mm %d\n",chroffset+genomicpos,genomicpos,acceptor_prob,nmismatches - scorei[i])); if (acceptor_prob > max_prob_5_mm) { max_prob_5_mm = acceptor_prob; splice_genomepos_5_mm = genomicpos; } genomicpos++; + debug13(i++); } while (genomicpos <= end_genomicpos && genomicpos <= end_genomicpos + exondist - END_MIN_EXONLENGTH) { @@ -12939,15 +13317,22 @@ /* assert(start_genomicpos >= end_genomicpos); */ genomicpos = start_genomicpos; + i = 0; + while (genomicpos >= middle_genomicpos && scorei[i] < nmismatches - 2) { + debug13(printf("5', crick, sense anti %u %u score %d\n",chroffset+genomicpos,genomicpos,nmismatches - scorei[i])); + genomicpos--; + i++; + } while (genomicpos >= middle_genomicpos && genomicpos >= end_genomicpos - exondist + END_MIN_EXONLENGTH) { acceptor_prob = Maxent_hr_antiacceptor_prob(chroffset+genomicpos,chroffset); /* Case 4 */ - debug13(printf("5', crick, sense anti %u %u %f mm\n",chroffset+genomicpos,genomicpos,acceptor_prob)); + debug13(printf("5', crick, sense anti %u %u %f mm %d\n",chroffset+genomicpos,genomicpos,acceptor_prob,nmismatches - scorei[i])); if (acceptor_prob > max_prob_5_mm) { max_prob_5_mm = acceptor_prob; splice_genomepos_5_mm = (chrhigh - chroffset) - genomicpos + 1; } genomicpos--; + debug13(i++); } while (genomicpos >= end_genomicpos && genomicpos >= end_genomicpos - exondist + END_MIN_EXONLENGTH) { @@ -12972,15 +13357,22 @@ /* assert(start_genomicpos <= end_genomicpos); */ genomicpos = start_genomicpos; + i = 0; + while (genomicpos <= middle_genomicpos && scorei[i] < nmismatches - 2) { + debug13(printf("5', watson, sense anti %u %u score %d\n",chroffset+genomicpos,genomicpos,nmismatches - scorei[i])); + genomicpos++; + i++; + } while (genomicpos <= middle_genomicpos && genomicpos <= end_genomicpos + exondist - END_MIN_EXONLENGTH) { donor_prob = Maxent_hr_antidonor_prob(chroffset+genomicpos,chroffset); /* Case 6 */ - debug13(printf("5', watson, sense anti %u %u %f mm\n",chroffset+genomicpos,genomicpos,donor_prob)); + debug13(printf("5', watson, sense anti %u %u %f mm %d\n",chroffset+genomicpos,genomicpos,donor_prob,nmismatches - scorei[i])); if (donor_prob > max_prob_5_mm) { max_prob_5_mm = donor_prob; splice_genomepos_5_mm = genomicpos; } genomicpos++; + debug13(i++); } while (genomicpos <= end_genomicpos && genomicpos <= end_genomicpos + exondist - END_MIN_EXONLENGTH) { @@ -13003,15 +13395,22 @@ /* assert(start_genomicpos >= end_genomicpos); */ genomicpos = start_genomicpos; + i = 0; + while (genomicpos >= middle_genomicpos && scorei[i] < nmismatches - 2) { + debug13(printf("5', crick, sense forward %u %u score %d\n",chroffset+genomicpos,genomicpos,nmismatches - scorei[i])); + genomicpos--; + i++; + } while (genomicpos >= middle_genomicpos && genomicpos >= end_genomicpos - exondist + END_MIN_EXONLENGTH) { donor_prob = Maxent_hr_donor_prob(chroffset+genomicpos,chroffset); /* Case 8 */ - debug13(printf("5', crick, sense forward %u %u %f mm\n",chroffset+genomicpos,genomicpos,donor_prob)); + debug13(printf("5', crick, sense forward %u %u %f mm %d\n",chroffset+genomicpos,genomicpos,donor_prob,nmismatches - scorei[i])); if (donor_prob > max_prob_5_mm) { max_prob_5_mm = donor_prob; splice_genomepos_5_mm = (chrhigh - chroffset) - genomicpos + 1; } genomicpos--; + debug13(i++); } while (genomicpos >= end_genomicpos && genomicpos >= end_genomicpos - exondist + END_MIN_EXONLENGTH) { @@ -13034,11 +13433,17 @@ /* assert(start_genomicpos <= end_genomicpos); */ genomicpos = start_genomicpos; + i = 0; + while (genomicpos <= middle_genomicpos && scorei[i] < nmismatches - 2) { + debug13(printf("5', watson, sense null %u %u score %d\n",chroffset+genomicpos,genomicpos,nmismatches - scorei[i])); + genomicpos++; + debug13(i++); + } while (genomicpos <= middle_genomicpos && genomicpos <= end_genomicpos + exondist - END_MIN_EXONLENGTH) { acceptor_prob = Maxent_hr_acceptor_prob(chroffset+genomicpos,chroffset); /* Case 2 */ donor_prob = Maxent_hr_antidonor_prob(chroffset+genomicpos,chroffset); /* Case 6 */ - debug13(printf("5', watson, sense null %u %u %f %f mm\n",chroffset+genomicpos,genomicpos,donor_prob,acceptor_prob)); + debug13(printf("5', watson, sense null %u %u %f %f mm %d\n",chroffset+genomicpos,genomicpos,donor_prob,acceptor_prob,nmismatches - scorei[i])); if (acceptor_prob > max_prob_sense_forward_5_mm) { max_prob_sense_forward_5_mm = acceptor_prob; if (acceptor_prob > max_prob_5_mm) { @@ -13060,6 +13465,7 @@ } } genomicpos++; + debug13(i++); } while (genomicpos <= end_genomicpos && genomicpos <= end_genomicpos + exondist - END_MIN_EXONLENGTH) { @@ -13097,11 +13503,17 @@ /* assert(start_genomicpos >= end_genomicpos); */ genomicpos = start_genomicpos; + i = 0; + while (genomicpos >= middle_genomicpos && scorei[i] < nmismatches - 2) { + debug13(printf("5', crick, sense null %u %u score %d\n",chroffset+genomicpos,genomicpos,nmismatches - scorei[i])); + genomicpos--; + i++; + } while (genomicpos >= middle_genomicpos && genomicpos >= end_genomicpos - exondist + END_MIN_EXONLENGTH) { acceptor_prob = Maxent_hr_antiacceptor_prob(chroffset+genomicpos,chroffset); /* Case 4 */ donor_prob = Maxent_hr_donor_prob(chroffset+genomicpos,chroffset); /* Case 8 */ - debug13(printf("5', crick, sense null %u %u %f %f mm\n",chroffset+genomicpos,genomicpos,donor_prob,acceptor_prob)); + debug13(printf("5', crick, sense null %u %u %f %f mm %d\n",chroffset+genomicpos,genomicpos,donor_prob,acceptor_prob,nmismatches - scorei[i])); if (acceptor_prob > max_prob_sense_forward_5_mm) { max_prob_sense_forward_5_mm = acceptor_prob; if (acceptor_prob > max_prob_5_mm) { @@ -13123,6 +13535,7 @@ } } genomicpos--; + debug13(i++); } while (genomicpos >= end_genomicpos && genomicpos >= end_genomicpos - exondist + END_MIN_EXONLENGTH) { @@ -13366,6 +13779,7 @@ } } + FREE(scorei); return pairs; } #endif @@ -13644,7 +14058,7 @@ int dynprogindex_minor = DYNPROGINDEX_MINOR; bool chop_exon_p; bool knownsplice5p = false, knownsplice3p = false; - bool trimp, trim5p, trim3p, trim5p_ignore, trim3p_ignore; + bool trimp, trim5p, trim3p, trim5p_ignore, trim3p_ignore, indelp; int iter = 0; int new_sensedir; /* Not used currently */ @@ -13656,6 +14070,8 @@ debug3(printf("Entering path_trim with cdna_direction %d\n",*cdna_direction)); #endif + debug3(Pair_dump_list(pairs,true)); + #ifdef GSNAP if (novelsplicingp == true) { pairs = trim_novel_spliceends(&new_sensedir,pairs,&(*ambig_end_length_5),&(*ambig_end_length_3), @@ -13702,7 +14118,13 @@ *cdna_direction,watsonp,jump_late_p, maxpeelback,defect_rate,pairpool,dynprogR, /*extendp*/true,/*endalign*/QUERYEND_NOGAPS); - pairs = trim_end5_exon_indels(&trim5p,*ambig_end_length_5,pairs,*cdna_direction); + pairs = trim_end5_exons(&indelp,&trim5p,*ambig_end_length_5,pairs,dynprogR,chroffset,chrhigh, + queryseq_ptr,queryuc_ptr,*cdna_direction,watsonp,jump_late_p,pairpool,defect_rate); + if (indelp == true) { + pairs = trim_end5_indels(pairs,*ambig_end_length_5,dynprogR,chroffset,chrhigh, + queryseq_ptr,queryuc_ptr, + *cdna_direction,watsonp,jump_late_p,pairpool,defect_rate); + } if (trim5p == true) { trimp = true; } @@ -13720,11 +14142,18 @@ *cdna_direction,watsonp,jump_late_p, maxpeelback,defect_rate,pairpool,dynprogL, /*extendp*/true,/*endalign*/QUERYEND_NOGAPS); - path = trim_end3_exon_indels(&trim3p,*ambig_end_length_3,path,*cdna_direction); - pairs = List_reverse(path); + path = trim_end3_exons(&indelp,&trim3p,*ambig_end_length_3,path,dynprogL,chroffset,chrhigh, + queryseq_ptr,queryuc_ptr,querylength, + *cdna_direction,watsonp,jump_late_p,pairpool,defect_rate); + if (indelp == true) { + path = trim_end3_indels(path,*ambig_end_length_3,dynprogL,chroffset,chrhigh, + queryseq_ptr,queryuc_ptr,querylength, + *cdna_direction,watsonp,jump_late_p,pairpool,defect_rate); + } if (trim3p == true) { trimp = true; } + pairs = List_reverse(path); } /* Important to end the alignment with Pair_trim_ends, or else trimming will be faulty */ @@ -13756,7 +14185,7 @@ Splicetype_T *ambig_splicetype_5_1, Splicetype_T *ambig_splicetype_3_1, double *ambig_prob_5_1, double *ambig_prob_3_1, int *unknowns1, int *mismatches1, int *qopens1, int *qindels1, int *topens1, int *tindels1, - int *ncanonical1, int *nsemicanonical1, int *nnoncanonical1, double *min_splice_prob_1, + int *ncanonical1, int *nsemicanonical1, int *nnoncanonical1, double *avg_splice_score_1, #ifdef GSNAP struct Pair_T **pairarray2, List_T *finalpairs2, int *npairs2, int *goodness2, @@ -13765,7 +14194,7 @@ Splicetype_T *ambig_splicetype_5_2, Splicetype_T *ambig_splicetype_3_2, double *ambig_prob_5_2, double *ambig_prob_3_2, int *unknowns2, int *mismatches2, int *qopens2, int *qindels2, int *topens2, int *tindels2, - int *ncanonical2, int *nsemicanonical2, int *nnoncanonical2, double *min_splice_prob_2, + int *ncanonical2, int *nsemicanonical2, int *nnoncanonical2, double *avg_splice_score_2, #endif List_T stage2pairs, List_T all_stage2_starts, List_T all_stage2_ends, @@ -13789,6 +14218,7 @@ int nknown_fwd, ncanonical_fwd, nsemicanonical_fwd, nnoncanonical_fwd, nknown_rev, ncanonical_rev, nsemicanonical_rev, nnoncanonical_rev; int nbadintrons_fwd, nbadintrons_rev; + double min_splice_prob_1, min_splice_prob_2; double max_intron_score_fwd = 0.0, max_intron_score_rev = 0.0, avg_donor_score_fwd = 0.0, avg_acceptor_score_fwd = 0.0, avg_donor_score_rev = 0.0, avg_acceptor_score_rev = 0.0; @@ -14224,10 +14654,12 @@ pairs_pretrim = pairs_fwd; *cdna_direction = +1; *sensedir = SENSE_FORWARD; + } else if (pairs_fwd == NULL) { pairs_pretrim = pairs_rev; *cdna_direction = -1; *sensedir = SENSE_ANTI; + } else { path_fwd = List_reverse(pairs_fwd); debug11(printf("Calling score_introns for path_fwd before path_trim\n")); @@ -14274,10 +14706,6 @@ sense_filter); } - if (splicingp == false) { - *sensedir = SENSE_NULL; - } - if (pairs_pretrim == NULL) { #if 0 @@ -14288,11 +14716,15 @@ *ambig_prob_5_1 = *ambig_prob_3_1 = 0.0; #endif return (struct Pair_T *) NULL; + } + if (splicingp == false) { + *sensedir = SENSE_NULL; } #ifdef GSNAP if (*cdna_direction == 0) { + /* If both pairarrays are returned, then first one is fwd and second one is rev */ debug11(printf("Initial cdna_direction is 0\n")); *ambig_end_length_5_1 = fwd_ambig_end_length_5; *ambig_end_length_3_1 = fwd_ambig_end_length_3; @@ -14341,6 +14773,7 @@ if (*finalpairs1 != NULL && *finalpairs2 != NULL) { debug11(printf("Both directions are non-null, so returning both\n")); + /* Pairarray 1 (cdna_direction +1): */ *nmatches_posttrim_1 = Pair_nmatches_posttrim(&(*max_match_length_1),*finalpairs1,/*pos5*/*ambig_end_length_5_1, /*pos3*/querylength - (*ambig_end_length_3_1)); pairarray1 = make_pairarray(&(*npairs1),&(*finalpairs1),/*cdna_direction*/+1,watsonp, @@ -14349,25 +14782,40 @@ *goodness1 = Pair_fracidentity_array(&(*matches1),&(*unknowns1),&(*mismatches1), &(*qopens1),&(*qindels1),&(*topens1),&(*tindels1), &(*ncanonical1),&(*nsemicanonical1),&(*nnoncanonical1), - &(*min_splice_prob_1),pairarray1,*npairs1,/*cdna_direction*/+1); + &min_splice_prob_1,pairarray1,*npairs1,/*cdna_direction*/+1); + *avg_splice_score_1 = avg_donor_score_fwd + avg_acceptor_score_fwd; + - debug0(printf("Result (%d pairs): %d matches, %d mismatches, %d qopens, %d qindels, %d topens, %d tindels\n", - *npairs1,*matches1,*mismatches1,*qopens1,*qindels1,*topens1,*tindels1)); + debug0(printf("Result 1 (%d pairs): %d matches, %d mismatches, %d qopens, %d qindels, %d topens, %d tindels, splice score %f\n", + *npairs1,*matches1,*mismatches1,*qopens1,*qindels1,*topens1,*tindels1,*avg_splice_score_1)); debug0(Pair_dump_array(pairarray1,*npairs1,/*zerobasedp*/true)); - *nmatches_posttrim_2 = Pair_nmatches_posttrim(&(*max_match_length_2),*finalpairs2,/*pos5*/*ambig_end_length_5_2, - /*pos3*/querylength - (*ambig_end_length_3_2)); - *pairarray2 = make_pairarray(&(*npairs2),&(*finalpairs2),/*cdna_direction*/-1,watsonp, - pairpool,queryseq_ptr,chroffset,chrhigh, - ngap,query_subseq_offset,skiplength); - *goodness2 = Pair_fracidentity_array(&(*matches2),&(*unknowns2),&(*mismatches2), - &(*qopens2),&(*qindels2),&(*topens2),&(*tindels2), - &(*ncanonical2),&(*nsemicanonical2),&(*nnoncanonical2), - &(*min_splice_prob_2),*pairarray2,*npairs2,/*cdna_direction*/-1); - - debug0(printf("Result (%d pairs): %d matches, %d mismatches, %d qopens, %d qindels, %d topens, %d tindels\n", - *npairs2,*matches2,*mismatches2,*qopens2,*qindels2,*topens2,*tindels2)); - debug0(Pair_dump_array(*pairarray2,*npairs2,/*zerobasedp*/true)); + /* Note avg_donor_score_fwd and so on do not include evaluations + of the end splice junctions. So if cdna_direction == 0, + callers should assume that the sensedir is not known */ + + if (0 /*&& Pair_identical_p(*finalpairs1,*finalpairs2) == true*/) { + /* This causes misses in resolve-inside procedures */ + debug0(printf("Result 2 is identical to Result 1, so not returning it\n")); + *pairarray2 = (struct Pair_T *) NULL; + + } else { + /* Pairarray 2 (cdna_direction -1): */ + *nmatches_posttrim_2 = Pair_nmatches_posttrim(&(*max_match_length_2),*finalpairs2,/*pos5*/*ambig_end_length_5_2, + /*pos3*/querylength - (*ambig_end_length_3_2)); + *pairarray2 = make_pairarray(&(*npairs2),&(*finalpairs2),/*cdna_direction*/-1,watsonp, + pairpool,queryseq_ptr,chroffset,chrhigh, + ngap,query_subseq_offset,skiplength); + *goodness2 = Pair_fracidentity_array(&(*matches2),&(*unknowns2),&(*mismatches2), + &(*qopens2),&(*qindels2),&(*topens2),&(*tindels2), + &(*ncanonical2),&(*nsemicanonical2),&(*nnoncanonical2), + &min_splice_prob_2,*pairarray2,*npairs2,/*cdna_direction*/-1); + *avg_splice_score_2 = avg_donor_score_rev + avg_acceptor_score_rev; + + debug0(printf("Result 2 (%d pairs): %d matches, %d mismatches, %d qopens, %d qindels, %d topens, %d tindels, splice score %f\n", + *npairs2,*matches2,*mismatches2,*qopens2,*qindels2,*topens2,*tindels2,*avg_splice_score_2)); + debug0(Pair_dump_array(*pairarray2,*npairs2,/*zerobasedp*/true)); + } *cdna_direction = 0; *sensedir = SENSE_NULL; @@ -14400,6 +14848,7 @@ *ambig_prob_5_1 = fwd_ambig_prob_5; *ambig_prob_3_1 = fwd_ambig_prob_3; *sensedir = SENSE_FORWARD; + *avg_splice_score_1 = avg_donor_score_fwd + avg_acceptor_score_fwd; defect_rate = defect_rate_fwd; } else if (*cdna_direction < 0) { @@ -14411,6 +14860,7 @@ *ambig_prob_5_1 = rev_ambig_prob_5; *ambig_prob_3_1 = rev_ambig_prob_3; *sensedir = SENSE_ANTI; + *avg_splice_score_1 = avg_donor_score_rev + avg_acceptor_score_rev; defect_rate = defect_rate_rev; } else { @@ -14425,6 +14875,7 @@ *ambig_prob_5_1 = fwd_ambig_prob_5; *ambig_prob_3_1 = fwd_ambig_prob_3; *sensedir = SENSE_FORWARD; + *avg_splice_score_1 = 0.0; defect_rate = defect_rate_fwd; #endif } @@ -14450,10 +14901,14 @@ *goodness1 = Pair_fracidentity_array(&(*matches1),&(*unknowns1),&(*mismatches1), &(*qopens1),&(*qindels1),&(*topens1),&(*tindels1), &(*ncanonical1),&(*nsemicanonical1),&(*nnoncanonical1), - &(*min_splice_prob_1),pairarray1,*npairs1,*cdna_direction); + &min_splice_prob_1,pairarray1,*npairs1,*cdna_direction); + /* *avg_splice_score_1 assigned above */ + - debug0(printf("Result (%d pairs): %d matches, %d mismatches, %d qopens, %d qindels, %d topens, %d tindels\n", - *npairs1,*matches1,*mismatches1,*qopens1,*qindels1,*topens1,*tindels1)); + debug0(printf("Result (%d pairs): %d matches, %d mismatches, %d qopens, %d qindels, %d topens, %d tindels, splice score %f\n", + *npairs1,*matches1,*mismatches1,*qopens1,*qindels1,*topens1,*tindels1,*avg_splice_score_1)); + debug0(printf("avg_splice_score_1 %f\n",*avg_splice_score_1)); + debug0(printf("avg_splice_score_2 %f\n",*avg_splice_score_2)); debug0(Pair_dump_array(pairarray1,*npairs1,/*zerobasedp*/true)); #ifdef GSNAP diff -Nru gmap-2016-11-07/src/stage3.h gmap-2017-01-14/src/stage3.h --- gmap-2016-11-07/src/stage3.h 2016-09-21 00:29:14.000000000 +0000 +++ gmap-2017-01-14/src/stage3.h 2016-12-29 16:20:17.000000000 +0000 @@ -1,4 +1,4 @@ -/* $Id: stage3.h 198076 2016-09-21 00:29:14Z twu $ */ +/* $Id: stage3.h 202031 2016-12-29 16:20:14Z twu $ */ #ifndef STAGE3_INCLUDED #define STAGE3_INCLUDED @@ -281,13 +281,14 @@ Stage3_good_part (struct Pair_T *pairarray, int npairs, int pos5, int pos3); extern struct Pair_T * -Stage3_compute (int *cdna_direction, int *sensedir, List_T *pairs1, int *npairs1, int *goodness1, +Stage3_compute (int *cdna_direction, int *sensedir, + List_T *pairs1, int *npairs1, int *goodness1, int *matches1, int *nmatches_posttrim_1, int *max_match_length_1, int *ambig_end_length_5_1, int *ambig_end_length_3_1, Splicetype_T *ambig_splicetype_5_1, Splicetype_T *ambig_splicetype_3_1, double *ambig_prob_5_1, double *ambig_prob_3_1, int *unknowns1, int *mismatches1, int *qopens1, int *qindels1, int *topens1, int *tindels1, - int *ncanonical1, int *nsemicanonical1, int *nnoncanonical1, double *min_splice_prob_1, + int *ncanonical1, int *nsemicanonical1, int *nnoncanonical1, double *avg_splice_score_1, #ifdef GSNAP struct Pair_T **pairarray2, List_T *pairs2, int *npairs2, int *goodness2, @@ -296,7 +297,7 @@ Splicetype_T *ambig_splicetype_5_2, Splicetype_T *ambig_splicetype_3_2, double *ambig_prob_5_2, double *ambig_prob_3_2, int *unknowns2, int *mismatches2, int *qopens2, int *qindels2, int *topens2, int *tindels2, - int *ncanonical2, int *nsemicanonical2, int *nnoncanonical2, double *min_splice_prob_2, + int *ncanonical2, int *nsemicanonical2, int *nnoncanonical2, double *avg_splice_score_2, #endif List_T stage2pairs, List_T all_stage2_starts, List_T all_stage2_ends, diff -Nru gmap-2016-11-07/src/stage3hr.c gmap-2017-01-14/src/stage3hr.c --- gmap-2016-11-07/src/stage3hr.c 2016-11-08 00:59:57.000000000 +0000 +++ gmap-2017-01-14/src/stage3hr.c 2017-01-14 01:43:12.000000000 +0000 @@ -1,4 +1,4 @@ -static char rcsid[] = "$Id: stage3hr.c 200238 2016-11-08 00:59:56Z twu $"; +static char rcsid[] = "$Id: stage3hr.c 202602 2017-01-14 01:43:11Z twu $"; #ifdef HAVE_CONFIG_H #include #endif @@ -230,17 +230,16 @@ static IIT_T runlength_iit; static int *runlength_divint_crosstable; -static int pairmax_linear; -static int pairmax_circular; +static Chrpos_T pairmax_linear; +static Chrpos_T pairmax_circular; -#if 0 -static int expected_pairlength; -static int pairlength_deviation; -#else -static int expected_pairlength_low; -static int expected_pairlength_high; -static int expected_pairlength_very_high; -#endif +static Chrpos_T expected_pairlength; +static Chrpos_T pairlength_deviation; +static int maxpeelback; + +static Chrpos_T expected_pairlength_low; +static Chrpos_T expected_pairlength_high; +static Chrpos_T expected_pairlength_very_high; static int amb_penalty = 2; static int localsplicing_penalty; @@ -283,8 +282,8 @@ IIT_T genes_iit_in, int *genes_divint_crosstable_in, IIT_T tally_iit_in, int *tally_divint_crosstable_in, IIT_T runlength_iit_in, int *runlength_divint_crosstable_in, - bool distances_observed_p, int pairmax_linear_in, int pairmax_circular_in, - Chrpos_T expected_pairlength, Chrpos_T pairlength_deviation, + bool distances_observed_p, Chrpos_T pairmax_linear_in, Chrpos_T pairmax_circular_in, + Chrpos_T expected_pairlength_in, Chrpos_T pairlength_deviation_in, int maxpeelback_in, int localsplicing_penalty_in, int indel_penalty_middle_in, int antistranded_penalty_in, bool favor_multiexon_p_in, int gmap_min_nconsecutive_in, int end_detail, int subopt_levels_in, @@ -314,6 +313,10 @@ pairmax_linear = pairmax_linear_in; pairmax_circular = pairmax_circular_in; + expected_pairlength = expected_pairlength_in; + pairlength_deviation = pairlength_deviation_in; + maxpeelback = maxpeelback_in; + if (pairlength_deviation > expected_pairlength) { expected_pairlength_low = 0; } else { @@ -445,8 +448,9 @@ int nmatches; int nmatches_posttrim; - int gmap_max_match_length; /* Used only by GMAP */ - double gmap_min_splice_prob; /* Used only by GMAP */ + int gmap_goodness; /* Used only by GMAP */ + int gmap_max_match_length; /* Used only by GMAP */ + double gmap_avg_splice_score; /* Used only by GMAP */ /* trim_left and trim_right should really be named trim_start and trim_end */ /* if trim_left_splicep or trim_right_splicep is true, then trim is of type "unknown amb" */ @@ -477,6 +481,7 @@ int gmap_nindelbreaks; int gmap_cdna_direction; int gmap_nintrons; + int gmap_nbadintrons; /* Filled in during Stage3pair_optimal_score */ int sensedir; /* for splicing */ int nsplices; @@ -557,7 +562,7 @@ Univcoord_T low; Univcoord_T high; - int insertlength; + Chrpos_T insertlength; int insertlength_expected_sign; /* 1 if in (expected_pairlength_low, expected_pairlength_high), 0 if in (expected_pairlength_low, expected_pairlength_very_high), and -1 if < expected_pairlength_low or > expected_pairlength_very_high */ @@ -787,13 +792,18 @@ } int +Stage3end_gmap_goodness (T this) { + return this->gmap_goodness; +} + +int Stage3end_gmap_max_match_length (T this) { return this->gmap_max_match_length; } double -Stage3end_gmap_min_splice_prob (T this) { - return this->gmap_min_splice_prob; +Stage3end_gmap_avg_splice_score (T this) { + return this->gmap_avg_splice_score; } @@ -1620,6 +1630,30 @@ return this->pairarray[this->npairs - 1].querypos; } +static int +Stage3end_querystart (T this) { + Substring_T substring; + + if (this->hittype == GMAP) { + return this->pairarray[0].querypos; + } else { + substring = (Substring_T) List_head(this->substrings_1toN); + return Substring_querystart(substring); + } +} + +static int +Stage3end_queryend (T this) { + Substring_T substring; + + if (this->hittype == GMAP) { + return this->pairarray[this->npairs - 1].querypos; + } else { + substring = (Substring_T) List_head(this->substrings_Nto1); + return Substring_queryend(substring); + } +} + int Stage3end_terminal_trim (T this) { Substring_T substring; @@ -4682,8 +4716,9 @@ new->ntscore = old->ntscore; new->nmatches_posttrim = old->nmatches_posttrim; new->nmatches = old->nmatches; + new->gmap_goodness = old->gmap_goodness; new->gmap_max_match_length = old->gmap_max_match_length; - new->gmap_min_splice_prob = old->gmap_min_splice_prob; + new->gmap_avg_splice_score = old->gmap_avg_splice_score; new->trim_left = old->trim_left; new->trim_right = old->trim_right; @@ -4709,6 +4744,7 @@ new->gmap_nindelbreaks = old->gmap_nindelbreaks; new->gmap_cdna_direction = old->gmap_cdna_direction; new->gmap_nintrons = old->gmap_nintrons; + new->gmap_nbadintrons = old->gmap_nbadintrons; new->sensedir = old->sensedir; new->gmap_start_endtype = old->gmap_start_endtype; @@ -5385,8 +5421,16 @@ new->genestrand = genestrand; new->sarrayp = sarrayp; new->gmap_source = GMAP_NOT_APPLICABLE; + new->gmap_goodness = 0; + new->gmap_max_match_length = 0; + new->gmap_avg_splice_score = 0.0; new->improved_by_gmap_p = false; + new->gmap_nindelbreaks = 0; + new->gmap_cdna_direction = 0; + new->gmap_nintrons = 0; + new->gmap_nbadintrons = 0; + new->distant_splice_p = false; new->chrnum = new->effective_chrnum = chrnum; new->other_chrnum = 0; @@ -5550,8 +5594,8 @@ max_match_length_2, ambig_end_length_5_2, ambig_end_length_3_2, unknowns2, mismatches2, qopens2, qindels2, topens2, tindels2, ncanonical2, nsemicanonical2, nnoncanonical2; - double ambig_prob_5_1, ambig_prob_3_1, min_splice_prob_1; - double ambig_prob_5_2, ambig_prob_3_2, min_splice_prob_2; + double ambig_prob_5_1, ambig_prob_3_1, avg_splice_score_1; + double ambig_prob_5_2, ambig_prob_3_2, avg_splice_score_2; Splicetype_T ambig_splicetype_5_1, ambig_splicetype_3_1; Splicetype_T ambig_splicetype_5_2, ambig_splicetype_3_2; Univcoord_T knownsplice_limit_low, knownsplice_limit_high; @@ -5815,7 +5859,7 @@ &ambig_splicetype_5_1,&ambig_splicetype_3_1, &ambig_prob_5_1,&ambig_prob_3_1, &unknowns1,&mismatches1,&qopens1,&qindels1,&topens1,&tindels1, - &ncanonical1,&nsemicanonical1,&nnoncanonical1,&min_splice_prob_1, + &ncanonical1,&nsemicanonical1,&nnoncanonical1,&avg_splice_score_1, &pairarray2,&pairs2,&npairs2,&goodness2, &matches2,&nmatches_posttrim_2,&max_match_length_2, @@ -5823,7 +5867,7 @@ &ambig_splicetype_5_2,&ambig_splicetype_3_2, &ambig_prob_5_2,&ambig_prob_3_2, &unknowns2,&mismatches2,&qopens2,&qindels2,&topens2,&tindels2, - &ncanonical2,&nsemicanonical2,&nnoncanonical2,&min_splice_prob_2, + &ncanonical2,&nsemicanonical2,&nnoncanonical2,&avg_splice_score_2, stage2pairs,all_stage2_starts,all_stage2_ends, #ifdef END_KNOWNSPLICING_SHORTCUT @@ -5843,7 +5887,7 @@ oligoindices_minor,diagpool,cellpool)) == NULL) { hit1 = (T) NULL; - } else if (cdna_direction == 0) { + } else if (pairarray2 != NULL) { nsegments = Pair_gsnap_nsegments(&nmismatches_whole,&nindels,&nintrons,&nindelbreaks, pairarray1,npairs1); start = subtract_bounded(this->chroffset + Pair_genomepos(&(pairarray1[0])), @@ -5854,7 +5898,7 @@ if ((hit1 = Stage3end_new_gmap(nmismatches_whole,nmatches_posttrim_1,max_match_length_1, ambig_end_length_5_1,ambig_end_length_3_1, ambig_splicetype_5_1,ambig_splicetype_3_1, - min_splice_prob_1, + avg_splice_score_1,goodness1, pairarray1,npairs1,nsegments,nintrons,nindelbreaks, /*left*/start,/*genomiclength*/end - start + 1, /*plusp*/true,genestrand, @@ -5874,7 +5918,7 @@ if ((*hit2 = Stage3end_new_gmap(nmismatches_whole,nmatches_posttrim_2,max_match_length_2, ambig_end_length_5_2,ambig_end_length_3_2, ambig_splicetype_5_2,ambig_splicetype_3_2, - min_splice_prob_2, + avg_splice_score_2,goodness2, pairarray2,npairs2,nsegments,nintrons,nindelbreaks, /*left*/start,/*genomiclength*/end - start + 1, /*plusp*/true,genestrand, @@ -5895,7 +5939,7 @@ if ((hit1 = Stage3end_new_gmap(nmismatches_whole,nmatches_posttrim_1,max_match_length_1, ambig_end_length_5_1,ambig_end_length_3_1, ambig_splicetype_5_1,ambig_splicetype_3_1, - min_splice_prob_1, + avg_splice_score_1,goodness1, pairarray1,npairs1,nsegments,nintrons,nindelbreaks, /*left*/start,/*genomiclength*/end - start + 1, /*plusp*/true,genestrand, @@ -5955,8 +5999,8 @@ max_match_length_2, ambig_end_length_5_2, ambig_end_length_3_2, unknowns2, mismatches2, qopens2, qindels2, topens2, tindels2, ncanonical2, nsemicanonical2, nnoncanonical2; - double ambig_prob_5_1, ambig_prob_3_1, min_splice_prob_1; - double ambig_prob_5_2, ambig_prob_3_2, min_splice_prob_2; + double ambig_prob_5_1, ambig_prob_3_1, avg_splice_score_1; + double ambig_prob_5_2, ambig_prob_3_2, avg_splice_score_2; Splicetype_T ambig_splicetype_5_1, ambig_splicetype_3_1; Splicetype_T ambig_splicetype_5_2, ambig_splicetype_3_2; Univcoord_T knownsplice_limit_low, knownsplice_limit_high; @@ -6224,7 +6268,7 @@ &ambig_splicetype_5_1,&ambig_splicetype_3_1, &ambig_prob_5_1,&ambig_prob_3_1, &unknowns1,&mismatches1,&qopens1,&qindels1,&topens1,&tindels1, - &ncanonical1,&nsemicanonical1,&nnoncanonical1,&min_splice_prob_1, + &ncanonical1,&nsemicanonical1,&nnoncanonical1,&avg_splice_score_1, &pairarray2,&pairs2,&npairs2,&goodness2, &matches2,&nmatches_posttrim_2,&max_match_length_2, @@ -6232,7 +6276,7 @@ &ambig_splicetype_5_2,&ambig_splicetype_3_2, &ambig_prob_5_2,&ambig_prob_3_2, &unknowns2,&mismatches2,&qopens2,&qindels2,&topens2,&tindels2, - &ncanonical2,&nsemicanonical2,&nnoncanonical2,&min_splice_prob_2, + &ncanonical2,&nsemicanonical2,&nnoncanonical2,&avg_splice_score_2, stage2pairs,all_stage2_starts,all_stage2_ends, #ifdef END_KNOWNSPLICING_SHORTCUT @@ -6252,17 +6296,18 @@ oligoindices_minor,diagpool,cellpool)) == NULL) { hit1 = (T) NULL; - } else if (cdna_direction == 0) { + } else if (pairarray2 != NULL) { nsegments = Pair_gsnap_nsegments(&nmismatches_whole,&nindels,&nintrons,&nindelbreaks, pairarray1,npairs1); start = add_bounded(this->chroffset + Pair_genomepos(&(pairarray1[0])), /*plusterm*/Pair_querypos(&(pairarray1[0])),this->chrhigh); end = subtract_bounded(this->chroffset + Pair_genomepos(&(pairarray1[npairs1-1])), /*minusterm*/querylength - 1 - Pair_querypos(&(pairarray1[npairs1-1])),this->chroffset); + if ((hit1 = Stage3end_new_gmap(nmismatches_whole,nmatches_posttrim_1,max_match_length_1, ambig_end_length_5_1,ambig_end_length_3_1, ambig_splicetype_5_1,ambig_splicetype_3_1, - min_splice_prob_1, + avg_splice_score_1,goodness1, pairarray1,npairs1,nsegments,nintrons,nindelbreaks, /*left*/end,/*genomiclength*/start - end + 1, /*plusp*/false,genestrand, @@ -6278,10 +6323,11 @@ /*plusterm*/Pair_querypos(&(pairarray2[0])),this->chrhigh); end = subtract_bounded(this->chroffset + Pair_genomepos(&(pairarray2[npairs2-1])), /*minusterm*/querylength - 1 - Pair_querypos(&(pairarray2[npairs2-1])),this->chroffset); + if ((*hit2 = Stage3end_new_gmap(nmismatches_whole,nmatches_posttrim_2,max_match_length_2, ambig_end_length_5_2,ambig_end_length_3_2, ambig_splicetype_5_2,ambig_splicetype_3_2, - min_splice_prob_2, + avg_splice_score_2,goodness2, pairarray2,npairs2,nsegments,nintrons,nindelbreaks, /*left*/end,/*genomiclength*/start - end + 1, /*plusp*/false,genestrand, @@ -6298,10 +6344,11 @@ /*plusterm*/Pair_querypos(&(pairarray1[0])),this->chrhigh); end = subtract_bounded(this->chroffset + Pair_genomepos(&(pairarray1[npairs1-1])), /*minusterm*/querylength - 1 - Pair_querypos(&(pairarray1[npairs1-1])),this->chroffset); + if ((hit1 = Stage3end_new_gmap(nmismatches_whole,nmatches_posttrim_1,max_match_length_1, ambig_end_length_5_1,ambig_end_length_3_1, ambig_splicetype_5_1,ambig_splicetype_3_1, - min_splice_prob_1, + avg_splice_score_1,goodness1, pairarray1,npairs1,nsegments,nintrons,nindelbreaks, /*left*/end,/*genomiclength*/start - end + 1, /*plusp*/false,genestrand, @@ -6352,8 +6399,8 @@ max_match_length_2, ambig_end_length_5_2, ambig_end_length_3_2, unknowns2, mismatches2, qopens2, qindels2, topens2, tindels2, ncanonical2, nsemicanonical2, nnoncanonical2; - double ambig_prob_5_1, ambig_prob_3_1, min_splice_prob_1; - double ambig_prob_5_2, ambig_prob_3_2, min_splice_prob_2; + double ambig_prob_5_1, ambig_prob_3_1, avg_splice_score_1; + double ambig_prob_5_2, ambig_prob_3_2, avg_splice_score_2; Splicetype_T ambig_splicetype_5_1, ambig_splicetype_3_1; Splicetype_T ambig_splicetype_5_2, ambig_splicetype_3_2; Univcoord_T knownsplice_limit_low, knownsplice_limit_high; @@ -6457,7 +6504,7 @@ &ambig_splicetype_5_1,&ambig_splicetype_3_1, &ambig_prob_5_1,&ambig_prob_3_1, &unknowns1,&mismatches1,&qopens1,&qindels1,&topens1,&tindels1, - &ncanonical1,&nsemicanonical1,&nnoncanonical1,&min_splice_prob_1, + &ncanonical1,&nsemicanonical1,&nnoncanonical1,&avg_splice_score_1, &pairarray2,&pairs2,&npairs2,&goodness2, &matches2,&nmatches_posttrim_2,&max_match_length_2, @@ -6465,7 +6512,7 @@ &ambig_splicetype_5_2,&ambig_splicetype_3_2, &ambig_prob_5_2,&ambig_prob_3_2, &unknowns2,&mismatches2,&qopens2,&qindels2,&topens2,&tindels2, - &ncanonical2,&nsemicanonical2,&nnoncanonical2,&min_splice_prob_2, + &ncanonical2,&nsemicanonical2,&nnoncanonical2,&avg_splice_score_2, stage2pairs,all_stage2_starts,all_stage2_ends, #ifdef END_KNOWNSPLICING_SHORTCUT @@ -6485,7 +6532,7 @@ oligoindices_minor,diagpool,cellpool)) == NULL) { hit1 = (T) NULL; - } else if (cdna_direction == 0) { + } else if (pairarray2 != NULL) { nsegments = Pair_gsnap_nsegments(&nmismatches_whole,&nindels,&nintrons,&nindelbreaks, pairarray1,npairs1); start = subtract_bounded(this->chroffset + Pair_genomepos(&(pairarray1[0])), @@ -6496,7 +6543,7 @@ if ((hit1 = Stage3end_new_gmap(nmismatches_whole,nmatches_posttrim_1,max_match_length_1, ambig_end_length_5_1,ambig_end_length_3_1, ambig_splicetype_5_1,ambig_splicetype_3_1, - min_splice_prob_1, + avg_splice_score_1,goodness1, pairarray1,npairs1,nsegments,nintrons,nindelbreaks, /*left*/start,/*genomiclength*/end - start + 1, /*plusp*/true,genestrand, @@ -6516,7 +6563,7 @@ if ((*hit2 = Stage3end_new_gmap(nmismatches_whole,nmatches_posttrim_2,max_match_length_2, ambig_end_length_5_2,ambig_end_length_3_2, ambig_splicetype_5_2,ambig_splicetype_3_2, - min_splice_prob_2, + avg_splice_score_2,goodness2, pairarray2,npairs2,nsegments,nintrons,nindelbreaks, /*left*/start,/*genomiclength*/end - start + 1, /*plusp*/true,genestrand, @@ -6537,7 +6584,7 @@ if ((hit1 = Stage3end_new_gmap(nmismatches_whole,nmatches_posttrim_1,max_match_length_1, ambig_end_length_5_1,ambig_end_length_3_1, ambig_splicetype_5_1,ambig_splicetype_3_1, - min_splice_prob_1, + avg_splice_score_1,goodness1, pairarray1,npairs1,nsegments,nintrons,nindelbreaks, /*left*/start,/*genomiclength*/end - start + 1, /*plusp*/true,genestrand, @@ -6581,8 +6628,8 @@ max_match_length_2, ambig_end_length_5_2, ambig_end_length_3_2, unknowns2, mismatches2, qopens2, qindels2, topens2, tindels2, ncanonical2, nsemicanonical2, nnoncanonical2; - double ambig_prob_5_1, ambig_prob_3_1, min_splice_prob_1; - double ambig_prob_5_2, ambig_prob_3_2, min_splice_prob_2; + double ambig_prob_5_1, ambig_prob_3_1, avg_splice_score_1; + double ambig_prob_5_2, ambig_prob_3_2, avg_splice_score_2; Splicetype_T ambig_splicetype_5_1, ambig_splicetype_3_1; Splicetype_T ambig_splicetype_5_2, ambig_splicetype_3_2; Univcoord_T knownsplice_limit_low, knownsplice_limit_high; @@ -6686,7 +6733,7 @@ &ambig_splicetype_5_1,&ambig_splicetype_3_1, &ambig_prob_5_1,&ambig_prob_3_1, &unknowns1,&mismatches1,&qopens1,&qindels1,&topens1,&tindels1, - &ncanonical1,&nsemicanonical1,&nnoncanonical1,&min_splice_prob_1, + &ncanonical1,&nsemicanonical1,&nnoncanonical1,&avg_splice_score_1, &pairarray2,&pairs2,&npairs2,&goodness2, &matches2,&nmatches_posttrim_2,&max_match_length_2, @@ -6694,7 +6741,7 @@ &ambig_splicetype_5_2,&ambig_splicetype_3_2, &ambig_prob_5_2,&ambig_prob_3_2, &unknowns2,&mismatches2,&qopens2,&qindels2,&topens2,&tindels2, - &ncanonical2,&nsemicanonical2,&nnoncanonical2,&min_splice_prob_2, + &ncanonical2,&nsemicanonical2,&nnoncanonical2,&avg_splice_score_2, stage2pairs,all_stage2_starts,all_stage2_ends, #ifdef END_KNOWNSPLICING_SHORTCUT @@ -6714,17 +6761,18 @@ oligoindices_minor,diagpool,cellpool)) == NULL) { hit1 = (T) NULL; - } else if (cdna_direction == 0) { + } else if (pairarray2 != NULL) { nsegments = Pair_gsnap_nsegments(&nmismatches_whole,&nindels,&nintrons,&nindelbreaks, pairarray1,npairs1); start = add_bounded(this->chroffset + Pair_genomepos(&(pairarray1[0])), /*plusterm*/Pair_querypos(&(pairarray1[0])),this->chrhigh); end = subtract_bounded(this->chroffset + Pair_genomepos(&(pairarray1[npairs1-1])), /*minusterm*/querylength - 1 - Pair_querypos(&(pairarray1[npairs1-1])),this->chroffset); + if ((hit1 = Stage3end_new_gmap(nmismatches_whole,nmatches_posttrim_1,max_match_length_1, ambig_end_length_5_1,ambig_end_length_3_1, ambig_splicetype_5_1,ambig_splicetype_3_1, - min_splice_prob_1, + avg_splice_score_1,goodness1, pairarray1,npairs1,nsegments,nintrons,nindelbreaks, /*left*/end,/*genomiclength*/start - end + 1, /*plusp*/false,genestrand, @@ -6740,10 +6788,11 @@ /*plusterm*/Pair_querypos(&(pairarray2[0])),this->chrhigh); end = subtract_bounded(this->chroffset + Pair_genomepos(&(pairarray2[npairs2-1])), /*minusterm*/querylength - 1 - Pair_querypos(&(pairarray2[npairs2-1])),this->chroffset); + if ((*hit2 = Stage3end_new_gmap(nmismatches_whole,nmatches_posttrim_2,max_match_length_2, ambig_end_length_5_2,ambig_end_length_3_2, ambig_splicetype_5_2,ambig_splicetype_3_2, - min_splice_prob_2, + avg_splice_score_2,goodness2, pairarray2,npairs2,nsegments,nintrons,nindelbreaks, /*left*/end,/*genomiclength*/start - end + 1, /*plusp*/false,genestrand, @@ -6760,10 +6809,11 @@ /*plusterm*/Pair_querypos(&(pairarray1[0])),this->chrhigh); end = subtract_bounded(this->chroffset + Pair_genomepos(&(pairarray1[npairs1-1])), /*minusterm*/querylength - 1 - Pair_querypos(&(pairarray1[npairs1-1])),this->chroffset); + if ((hit1 = Stage3end_new_gmap(nmismatches_whole,nmatches_posttrim_1,max_match_length_1, ambig_end_length_5_1,ambig_end_length_3_1, ambig_splicetype_5_1,ambig_splicetype_3_1, - min_splice_prob_1, + avg_splice_score_1,goodness1, pairarray1,npairs1,nsegments,nintrons,nindelbreaks, /*left*/end,/*genomiclength*/start - end + 1, /*plusp*/false,genestrand, @@ -6921,8 +6971,16 @@ new->genestrand = genestrand; new->sarrayp = sarrayp; new->gmap_source = GMAP_NOT_APPLICABLE; + new->gmap_goodness = 0; + new->gmap_max_match_length = 0; + new->gmap_avg_splice_score = 0.0; new->improved_by_gmap_p = false; + new->gmap_nindelbreaks = 0; + new->gmap_cdna_direction = 0; + new->gmap_nintrons = 0; + new->gmap_nbadintrons = 0; + new->distant_splice_p = false; new->chrnum = new->effective_chrnum = chrnum; new->other_chrnum = 0; @@ -7124,8 +7182,16 @@ new->genestrand = genestrand; new->sarrayp = sarrayp; new->gmap_source = GMAP_NOT_APPLICABLE; + new->gmap_goodness = 0; + new->gmap_max_match_length = 0; + new->gmap_avg_splice_score = 0.0; new->improved_by_gmap_p = false; + new->gmap_nindelbreaks = 0; + new->gmap_cdna_direction = 0; + new->gmap_nintrons = 0; + new->gmap_nbadintrons = 0; + new->distant_splice_p = false; new->chrnum = new->effective_chrnum = chrnum; new->other_chrnum = 0; @@ -7428,8 +7494,16 @@ new->genestrand = genestrand; new->sarrayp = sarrayp; new->gmap_source = GMAP_NOT_APPLICABLE; + new->gmap_goodness = 0; + new->gmap_max_match_length = 0; + new->gmap_avg_splice_score = 0.0; new->improved_by_gmap_p = false; + new->gmap_nindelbreaks = 0; + new->gmap_cdna_direction = 0; + new->gmap_nintrons = 0; + new->gmap_nbadintrons = 0; + new->distant_splice_p = false; new->chrnum = new->effective_chrnum = chrnum; new->other_chrnum = 0; @@ -7767,8 +7841,16 @@ new->genestrand = genestrand; new->sarrayp = sarrayp; new->gmap_source = GMAP_NOT_APPLICABLE; + new->gmap_goodness = 0; + new->gmap_max_match_length = 0; + new->gmap_avg_splice_score = 0.0; new->improved_by_gmap_p = false; + new->gmap_nindelbreaks = 0; + new->gmap_cdna_direction = 0; + new->gmap_nintrons = 0; + new->gmap_nbadintrons = 0; + new->distant_splice_p = false; new->chrnum = new->effective_chrnum = chrnum; new->other_chrnum = 0; @@ -7926,8 +8008,16 @@ new->sarrayp = sarrayp; new->gmap_source = GMAP_NOT_APPLICABLE; + new->gmap_goodness = 0; + new->gmap_max_match_length = 0; + new->gmap_avg_splice_score = 0.0; new->improved_by_gmap_p = false; + new->gmap_nindelbreaks = 0; + new->gmap_cdna_direction = 0; + new->gmap_nintrons = 0; + new->gmap_nbadintrons = 0; + if (donor == NULL) { new->hittype = SPLICE; new->genestrand = Substring_genestrand(acceptor); @@ -7987,7 +8077,16 @@ new->hittype = DISTANT_SPLICE; new->sarrayp = sarrayp; new->gmap_source = GMAP_NOT_APPLICABLE; + new->gmap_goodness = 0; + new->gmap_max_match_length = 0; + new->gmap_avg_splice_score = 0.0; new->improved_by_gmap_p = false; + + new->gmap_nindelbreaks = 0; + new->gmap_cdna_direction = 0; + new->gmap_nintrons = 0; + new->gmap_nbadintrons = 0; + new->chrnum = 0; new->chroffset = 0; new->chrhigh = 0; @@ -7997,7 +8096,16 @@ } else { new->sarrayp = sarrayp; new->gmap_source = GMAP_NOT_APPLICABLE; + new->gmap_goodness = 0; + new->gmap_max_match_length = 0; + new->gmap_avg_splice_score = 0.0; new->improved_by_gmap_p = false; + + new->gmap_nindelbreaks = 0; + new->gmap_cdna_direction = 0; + new->gmap_nintrons = 0; + new->gmap_nbadintrons = 0; + new->distant_splice_p = true; if (Substring_chrnum(donor) == Substring_chrnum(acceptor)) { new->hittype = SAMECHR_SPLICE; @@ -8388,8 +8496,16 @@ new->sarrayp = sarrayp; new->gmap_source = GMAP_NOT_APPLICABLE; + new->gmap_goodness = 0; + new->gmap_max_match_length = 0; + new->gmap_avg_splice_score = 0.0; new->improved_by_gmap_p = false; + new->gmap_nindelbreaks = 0; + new->gmap_cdna_direction = 0; + new->gmap_nintrons = 0; + new->gmap_nbadintrons = 0; + #if 0 if (donor == NULL && acceptor == NULL) { new->hittype = ONE_THIRD_SHORTEXON; @@ -9080,8 +9196,16 @@ new->genestrand = genestrand; new->sarrayp = sarrayp; new->gmap_source = GMAP_NOT_APPLICABLE; + new->gmap_goodness = 0; + new->gmap_max_match_length = 0; + new->gmap_avg_splice_score = 0.0; new->improved_by_gmap_p = false; + new->gmap_nindelbreaks = 0; + new->gmap_cdna_direction = 0; + new->gmap_nintrons = 0; + new->gmap_nbadintrons = 0; + new->distant_splice_p = false; new->chrnum = new->effective_chrnum = chrnum; new->other_chrnum = 0; @@ -9159,7 +9283,7 @@ Stage3end_new_gmap (int nmismatches_whole, int nmatches_posttrim, int max_match_length, int ambig_end_length_5, int ambig_end_length_3, Splicetype_T ambig_splicetype_5, Splicetype_T ambig_splicetype_3, - double min_splice_prob, + double avg_splice_score, int goodness, struct Pair_T *pairarray, int npairs, int nsegments, int nintrons, int nindelbreaks, Univcoord_T left, int genomiclength, bool plusp, int genestrand, char *accession, int querylength, Chrnum_T chrnum, @@ -9182,6 +9306,7 @@ */ /* However, this leads to fatal bugs later, so restored these statements */ + debug0(printf("Entered Stage3end_new_gmap with orig_sensedir %d\n",orig_sensedir)); assert(orig_sensedir == SENSE_NULL || orig_sensedir == SENSE_ANTI || orig_sensedir == SENSE_FORWARD); @@ -9237,12 +9362,13 @@ new = (T) MALLOC_OUT(sizeof(*new)); - debug0(printf("Stage3end_new_gmap %p: left %llu, genomicstart/end %u..%u, chrhigh %llu, chrnum %d, nmismatches %d, nmatches_posttrim %d, cdna_direction %d, orig_sensedir %d, max_match_length %d, gmap_source %d\n", + debug0(printf("Stage3end_new_gmap %p: left %llu, genomicstart/end %u..%u, chrhigh %llu, chrnum %d, nmismatches %d, nmatches_posttrim %d, cdna_direction %d, orig_sensedir %d, avg_splice_score %f, max_match_length %d, gmap_source %d\n", new,(unsigned long long) left,(unsigned int) (genomicstart - chroffset),(unsigned int) (genomicend - chroffset), - (unsigned long long) chrhigh,chrnum,nmismatches_whole,nmatches_posttrim,cdna_direction,orig_sensedir,max_match_length,gmap_source)); + (unsigned long long) chrhigh,chrnum,nmismatches_whole,nmatches_posttrim,cdna_direction,orig_sensedir,avg_splice_score,max_match_length,gmap_source)); debug0(printf(" ambig_end_length_5 %d, ambig_end_length_3 %d\n",ambig_end_length_5,ambig_end_length_3)); debug0(Pair_dump_comp_array(pairarray,npairs)); + new->substrings_LtoH = (List_T) NULL; new->substrings_1toN = (List_T) NULL; new->substrings_Nto1 = (List_T) NULL; @@ -9291,6 +9417,16 @@ new->gmap_nintrons = nintrons; #if 0 + /* Not sure if this nindelbreaks compares with the parameter value */ + Pair_nmismatches_region(&nindelbreaks,&new->gmap_nbadintrons,pairarray,npairs, + /*trim_left*/0,/*trim_right*/0,/*start_amb_length*/0,/*end_amb_length*/0, + querylength); +#else + new->gmap_nbadintrons = 0; /* Filled in during Stage3pair_optimal_score */ +#endif + + +#if 0 new->mapq_loglik = Substring_mapq_loglik(substring); new->mapq_score = 0; new->absmq_score = 0; @@ -9351,9 +9487,9 @@ return (T) NULL; } + new->gmap_goodness = goodness; new->gmap_max_match_length = max_match_length; - new->gmap_min_splice_prob = min_splice_prob; - + new->gmap_avg_splice_score = avg_splice_score; new->trim_left = Pair_querypos(&(pairarray[0])) /*- ambig_end_length_5*/; /* Do not subtract ambig_end_length, so we are equivalent with substrings */ if (ambig_end_length_5 > 0) { @@ -9526,7 +9662,7 @@ /* Stage3end_free(&new); -- Cannot use, because it frees pairarray */ Pair_tokens_free(&new->cigar_tokens); /* No substrings or junctions */ - FREE(new); + FREE_OUT(new); debug0(printf("Returning NULL\n")); return (T) NULL; @@ -10106,7 +10242,7 @@ static int -insertlength_expected (int insertlength) { +insertlength_expected (Chrpos_T insertlength) { if (insertlength < expected_pairlength_low) { return -1; } else if (insertlength > expected_pairlength_very_high) { @@ -10607,6 +10743,7 @@ /* Filter on nsegments */ + /* TODO: Consider filtering on gmap_nbadintrons instead, as we do for Stage3pair_optimal_score */ if (finalp == true && optimal != NULL) { hitlist = optimal; optimal = (List_T) NULL; @@ -11598,6 +11735,12 @@ return -1; #endif + } else if (x->gmap_avg_splice_score > y->gmap_avg_splice_score) { + return -1; + + } else if (y->gmap_avg_splice_score > x->gmap_avg_splice_score) { + return +1; + } else { return 0; } @@ -11734,6 +11877,22 @@ debug7(printf(" => indistinguishable\n")); return 0; + } else if (hit->hittype == GMAP && best_hit->hittype == GMAP) { + prob1 = hit->gmap_avg_splice_score; + prob2 = best_hit->gmap_avg_splice_score; + + if (prob1 < prob2) { + debug7(printf(" => %d loses by GMAP splice prob %f vs %f\n",k,prob1,prob2)); + return -1; + } else if (prob1 > prob2) { + debug7(printf(" => %d wins by GMAP splice prob %f vs %f\n",k,prob1,prob2)); + return +1; + } else { + debug7(printf(" => equal\n")); + *equalp = true; + return 0; + } + } else { prob1 = Stage3end_prob(hit); prob2 = Stage3end_prob(best_hit); @@ -13194,11 +13353,81 @@ } +/* Used for resolve_inside_general_splice_plus and resolve_inside_general_splice_minus */ +static List_T +Stage3end_convert_to_pairs (List_T pairs, T hit, char *queryuc_ptr, int querylength, + Compress_T query_compress_fwd, Compress_T query_compress_rev, + Chrpos_T chrlength, Pairpool_T pairpool) { + List_T p, q; + /* Chrpos_T genomicpos1, genomicpos2; */ + Substring_T substring, prev_substring; + Junction_T junction; + Junctiontype_T type; + char *deletion_string; + + if (hit->hittype == TRANSLOC_SPLICE) { + /* Cannot handle translocations within a single GMAP alignment */ + abort(); + return NULL; + + } else if (hit->hittype == GMAP) { + debug9(printf("Converting gmap to pairs\n")); + /* Use querylength here, but adj elsewhere */ + return Pair_convert_array_to_pairs(pairs,hit->pairarray,hit->npairs,hit->plusp, + chrlength,pairpool); + } else { + p = hit->substrings_1toN; + prev_substring = (Substring_T) List_head(p); + debug9(printf("Converting substring\n")); + /* Normally done during Stage3pair_eval_and_sort */ + Substring_display_prep(prev_substring,queryuc_ptr,querylength,/*extraleft*/0,/*extraright*/0, + query_compress_fwd,query_compress_rev,genome); + pairs = Substring_convert_to_pairs(pairs,prev_substring,queryuc_ptr,chrlength,pairpool); + + for (q = hit->junctions_1toN, p = List_next(p); p != NULL; q = List_next(q), p = List_next(p)) { + junction = (Junction_T) List_head(q); + substring = (Substring_T) List_head(p); + + if ((type = Junction_type(junction)) == INS_JUNCTION) { + debug9(printf("Converting insertion\n")); + pairs = Substring_add_insertion(pairs,prev_substring,substring, + /*insertionlength*/Junction_nindels(junction),queryuc_ptr, + pairpool); + } else if (type == DEL_JUNCTION) { + debug9(printf("Converting deletion\n")); + deletion_string = Junction_deletion_string(junction,genome,hit->plusp); + pairs = Substring_add_deletion(pairs,prev_substring,substring, + deletion_string,/*deletionlength*/Junction_nindels(junction), + pairpool); + FREE(deletion_string); + } else if (type == SPLICE_JUNCTION) { + /* Causes problems with bad comps. Stage3_compute will insert gaps anyway */ + debug9(printf("(Not converting splice)\n")); + /* pairs = Substring_add_intron(pairs,prev_substring,substring,pairpool); */ + + } else { + abort(); + } + + debug9(printf("Converting substring\n")); + /* Normally done during Stage3pair_eval_and_sort */ + Substring_display_prep(substring,queryuc_ptr,querylength,/*extraleft*/0,/*extraright*/0, + query_compress_fwd,query_compress_rev,genome); + pairs = Substring_convert_to_pairs(pairs,substring,queryuc_ptr,chrlength,pairpool); + prev_substring = substring; + } + + debug9(Pair_dump_list(pairs,true)); + return pairs; + } +} + + /* Used only for --merge-overlap features, so obey hardclip and not querystart/queryend */ /* If use querylength_adj, ss.bug.4 fails. If use querylength, ss.bug.3 fails */ static List_T -Stage3end_convert_to_pairs (List_T pairs, T hit, Shortread_T queryseq, - int hardclip_low, int hardclip_high, int queryseq_offset) { +Stage3end_convert_to_pairs_out (List_T pairs, T hit, Shortread_T queryseq, + int hardclip_low, int hardclip_high, int queryseq_offset) { List_T p, q; /* Chrpos_T genomicpos1, genomicpos2; */ Substring_T substring, prev_substring; @@ -13214,37 +13443,37 @@ } else if (hit->hittype == GMAP) { debug15(printf("Converting gmap to pairs\n")); /* Use querylength here, but adj elsewhere */ - return Pair_convert_array_to_pairs(pairs,hit->pairarray,hit->npairs,hit->plusp,hit->querylength, - hardclip_low,hardclip_high,queryseq_offset); + return Pair_convert_array_to_pairs_out(pairs,hit->pairarray,hit->npairs,hit->plusp,hit->querylength, + hardclip_low,hardclip_high,queryseq_offset); } else { p = hit->substrings_1toN; prev_substring = (Substring_T) List_head(p); - pairs = Substring_convert_to_pairs(pairs,prev_substring,hit->querylength, - queryseq,hardclip_low,hardclip_high,queryseq_offset); + pairs = Substring_convert_to_pairs_out(pairs,prev_substring,hit->querylength, + queryseq,hardclip_low,hardclip_high,queryseq_offset); for (q = hit->junctions_1toN, p = List_next(p); p != NULL; q = List_next(q), p = List_next(p)) { junction = (Junction_T) List_head(q); substring = (Substring_T) List_head(p); if ((type = Junction_type(junction)) == INS_JUNCTION) { - pairs = Substring_add_insertion(pairs,prev_substring,substring,hit->querylength, - /*insertionlength*/Junction_nindels(junction),queryseq, - hardclip_low,hardclip_high,queryseq_offset); + pairs = Substring_add_insertion_out(pairs,prev_substring,substring,hit->querylength, + /*insertionlength*/Junction_nindels(junction),queryseq, + hardclip_low,hardclip_high,queryseq_offset); } else if (type == DEL_JUNCTION) { deletion_string = Junction_deletion_string(junction,genome,hit->plusp); - pairs = Substring_add_deletion(pairs,prev_substring,substring,hit->querylength, - deletion_string,/*deletionlength*/Junction_nindels(junction), - hardclip_low,hardclip_high,queryseq_offset); + pairs = Substring_add_deletion_out(pairs,prev_substring,substring,hit->querylength, + deletion_string,/*deletionlength*/Junction_nindels(junction), + hardclip_low,hardclip_high,queryseq_offset); } else if (type == SPLICE_JUNCTION) { - pairs = Substring_add_intron(pairs,prev_substring,substring,hit->querylength, - hardclip_low,hardclip_high,queryseq_offset); + pairs = Substring_add_intron_out(pairs,prev_substring,substring,hit->querylength, + hardclip_low,hardclip_high,queryseq_offset); } else { abort(); } - pairs = Substring_convert_to_pairs(pairs,substring,hit->querylength, - queryseq,hardclip_low,hardclip_high,queryseq_offset); + pairs = Substring_convert_to_pairs_out(pairs,substring,hit->querylength, + queryseq,hardclip_low,hardclip_high,queryseq_offset); prev_substring = substring; } @@ -13279,11 +13508,11 @@ if (hit5->plusp == true) { if (clipdir > 0) { - pairs5 = Stage3end_convert_to_pairs(NULL,hit5,queryseq5,hardclip5_low,hardclip5_high,/*queryseq_offset*/0); + pairs5 = Stage3end_convert_to_pairs_out(NULL,hit5,queryseq5,hardclip5_low,hardclip5_high,/*queryseq_offset*/0); pairs5 = strip_gaps_at_head(pairs5); - pairs3 = Stage3end_convert_to_pairs(NULL,hit3,queryseq3,hardclip3_low,hardclip3_high, - /*queryseq_offset*/querylength5-hardclip5_low-hardclip5_high-hardclip3_low-hardclip3_high); + pairs3 = Stage3end_convert_to_pairs_out(NULL,hit3,queryseq3,hardclip3_low,hardclip3_high, + /*queryseq_offset*/querylength5-hardclip5_low-hardclip5_high-hardclip3_low-hardclip3_high); pairs3 = strip_gaps_at_tail(pairs3); #ifdef CHECK_ASSERTIONS @@ -13317,11 +13546,11 @@ } } else if (clipdir < 0) { - pairs3 = Stage3end_convert_to_pairs(NULL,hit3,queryseq3,hardclip3_low,hardclip3_high,/*queryseq_offset*/0); + pairs3 = Stage3end_convert_to_pairs_out(NULL,hit3,queryseq3,hardclip3_low,hardclip3_high,/*queryseq_offset*/0); pairs3 = strip_gaps_at_head(pairs3); - pairs5 = Stage3end_convert_to_pairs(NULL,hit5,queryseq5,hardclip5_low,hardclip5_high, - /*queryseq_offset*/querylength3-hardclip3_low-hardclip3_high-hardclip5_low-hardclip5_high); + pairs5 = Stage3end_convert_to_pairs_out(NULL,hit5,queryseq5,hardclip5_low,hardclip5_high, + /*queryseq_offset*/querylength3-hardclip3_low-hardclip3_high-hardclip5_low-hardclip5_high); pairs5 = strip_gaps_at_tail(pairs5); #ifdef CHECK_ASSERTIONS @@ -13362,11 +13591,11 @@ } else { if (clipdir > 0) { - pairs3 = Stage3end_convert_to_pairs(NULL,hit3,queryseq3,hardclip3_low,hardclip3_high,/*queryseq_offset*/0); + pairs3 = Stage3end_convert_to_pairs_out(NULL,hit3,queryseq3,hardclip3_low,hardclip3_high,/*queryseq_offset*/0); pairs3 = strip_gaps_at_head(pairs3); - pairs5 = Stage3end_convert_to_pairs(NULL,hit5,queryseq5,hardclip5_low,hardclip5_high, - /*queryseq_offset*/querylength3-hardclip3_low-hardclip3_high-hardclip5_low-hardclip5_high); + pairs5 = Stage3end_convert_to_pairs_out(NULL,hit5,queryseq5,hardclip5_low,hardclip5_high, + /*queryseq_offset*/querylength3-hardclip3_low-hardclip3_high-hardclip5_low-hardclip5_high); pairs5 = strip_gaps_at_tail(pairs5); #ifdef CHECK_ASSERTIONS @@ -13400,11 +13629,11 @@ } } else if (clipdir < 0) { - pairs5 = Stage3end_convert_to_pairs(NULL,hit5,queryseq5,hardclip5_low,hardclip5_high,/*queryseq_offset*/0); + pairs5 = Stage3end_convert_to_pairs_out(NULL,hit5,queryseq5,hardclip5_low,hardclip5_high,/*queryseq_offset*/0); pairs5 = strip_gaps_at_head(pairs5); - pairs3 = Stage3end_convert_to_pairs(NULL,hit3,queryseq3,hardclip3_low,hardclip3_high, - /*queryseq_offset*/querylength5-hardclip5_low-hardclip5_high-hardclip3_low-hardclip3_high); + pairs3 = Stage3end_convert_to_pairs_out(NULL,hit3,queryseq3,hardclip3_low,hardclip3_high, + /*queryseq_offset*/querylength5-hardclip5_low-hardclip5_high-hardclip3_low-hardclip3_high); pairs3 = strip_gaps_at_tail(pairs3); #ifdef CHECK_ASSERTIONS @@ -13458,94 +13687,885 @@ } +static bool +resolve_inside_general_splice_plus (T *oldhit5, T *oldhit3, bool *private5p, bool *private3p, + Compress_T query5_compress_fwd, Compress_T query5_compress_rev, + Compress_T query3_compress_fwd, Compress_T query3_compress_rev, + char *queryuc_ptr_5, char *queryuc_ptr_3, int querylength5, int querylength3, + int genestrand, Pairpool_T pairpool, Dynprog_T dynprogL, Dynprog_T dynprogM, Dynprog_T dynprogR, + Oligoindex_array_T oligoindices_minor, Diagpool_T diagpool, Cellpool_T cellpool) { + bool changep = false; + T hit, hit5 = *oldhit5, hit3 = *oldhit3; -#if 0 -List_T -Stage3end_filter_bymatch (List_T hitlist) { - List_T filtered = NULL, p; - T hit; - int min_nmismatches_whole = 1000; +#ifdef DEBUG9 + List_T p; +#endif + List_T stage2pairs, all_stage2_starts, all_stage2_ends; + int queryend, endlength; + Chrpos_T chrstart, chrend; + struct Pair_T *pairarray1, *pairarray2; + List_T pairs1, pairs2; - for (p = hitlist; p != NULL; p = p->rest) { - hit = (T) p->first; - if (hit->nmismatches_whole < min_nmismatches_whole) { - min_nmismatches_whole = hit->nmismatches_whole; - } - } + int cdna_direction, sensedir, sense_try; + int npairs1, goodness1, matches1, nmatches_posttrim_1, + max_match_length_1, ambig_end_length_5_1, ambig_end_length_3_1, + unknowns1, mismatches1, qopens1, qindels1, topens1, tindels1, + ncanonical1, nsemicanonical1, nnoncanonical1; + int npairs2, goodness2, matches2, nmatches_posttrim_2, + max_match_length_2, ambig_end_length_5_2, ambig_end_length_3_2, + unknowns2, mismatches2, qopens2, qindels2, topens2, tindels2, + ncanonical2, nsemicanonical2, nnoncanonical2; + double ambig_prob_5_1, ambig_prob_3_1, avg_splice_score_1; + double ambig_prob_5_2, ambig_prob_3_2, avg_splice_score_2; + Splicetype_T ambig_splicetype_5_1, ambig_splicetype_3_1; + Splicetype_T ambig_splicetype_5_2, ambig_splicetype_3_2; + Univcoord_T knownsplice_limit_low, knownsplice_limit_high; - for (p = hitlist; p != NULL; p = p->rest) { - hit = (T) p->first; - if (hit->nmismatches_whole == min_nmismatches_whole) { - filtered = List_push(filtered,hit); - } else { - Stage3end_free(&hit); - } - } - List_free(&hitlist); + Univcoord_T start, end; + int nsegments, nmismatches_whole, nindels, nintrons, nindelbreaks; + GMAP_source_T gmap_source; - return filtered; -} -#endif + if (hit5->querylength - 1 - Stage3end_queryend(hit5) > 10 && Stage3end_querystart(hit3) > 10) { + /* Both insides need to be resolved. Not solving at this time */ + debug9(printf("Dual to be resolved on inside. Not solving at this time\n")); + + } else if (hit5->chrnum != 0 && (endlength = hit5->querylength - 1 - Stage3end_queryend(hit5)) > 10) { + chrend = hit3->genomicstart - hit5->chroffset; /* Use hit5->chroffset in case hit3 is a transloc */ + chrstart = subtract_bounded(chrend,(Chrpos_T) expected_pairlength + pairlength_deviation + endlength,0); + if (chrstart < hit5->genomicend - hit5->chroffset) { + debug9(printf("Revising chrstart\n")); + chrstart = hit5->genomicend - hit5->chroffset; + } + queryend = Stage3end_queryend(hit5) + 1; + debug9(printf("For ends, chrstart %u, chrend %u\n",chrstart,chrend)); + if (chrstart < chrend && + (all_stage2_ends = Stage2_compute_ends( +#ifdef PMAP + &(queryaaseq_ptr[queryend]),&(queryaaseq_ptr[queryend]), + /*querylength*/endlength,/*query_offset*/0*3, +#else + &(queryuc_ptr_5[queryend]),&(queryuc_ptr_5[queryend]), + /*querylength*/endlength,/*query_offset*/queryend, +#endif + chrstart,chrend,hit5->chroffset,hit5->chrhigh,/*plusp*/true,genestrand, + + oligoindices_minor,pairpool,diagpool,cellpool, + /*localp should be false*/true,/*skip_repetitive_p*/false, + /*favor_right_p*/false,/*max_nalignments*/2,/*debug_graphic_p*/false)) != NULL) { + debug9(printf("Got %d ends\n",List_length(all_stage2_ends))); + debug9(printf("5' end to be resolved on inside\n")); +#ifdef DEBUG9 + for (p = all_stage2_ends; p != NULL; p = List_next(p)) { + Pair_dump_list(List_head(p),true); + } +#endif + stage2pairs = Stage3end_convert_to_pairs(/*pairs*/NULL,hit5,queryuc_ptr_5,querylength5, + query5_compress_fwd,query5_compress_rev, + /*chrlength*/hit5->chrhigh - hit5->chroffset,pairpool); + debug9(Pair_dump_list(stage2pairs,true)); -static Chrpos_T -overlap5_gmap_plus (int *querypos, Chrpos_T *genomicstart, Chrpos_T *genomicend, - Stage3end_T hit5, Stage3end_T gmap) { - Chrpos_T chrpos; - Substring_T substring; - List_T p; + knownsplice_limit_high = ((Pair_T) stage2pairs->first)->genomepos + hit5->chroffset; + stage2pairs = List_reverse(stage2pairs); + knownsplice_limit_low = ((Pair_T) stage2pairs->first)->genomepos + hit5->chroffset; - debug10(printf("Entered overlap5_gmap_plus with gmap %d..%d\n", - gmap->pairarray[0].querypos,gmap->pairarray[gmap->npairs - 1].querypos)); - for (p = hit5->substrings_LtoH; p != NULL; p = List_next(p)) { - substring = (Substring_T) List_head(p); - if (Substring_ambiguous_p(substring) == false) { - *genomicstart = Substring_alignstart_chr(substring); - *genomicend = Substring_alignend_chr(substring); - if ((chrpos = Pair_binary_search_ascending(&(*querypos),/*lowi*/0,/*highi*/gmap->npairs,gmap->pairarray, - *genomicstart,*genomicend)) > 0) { - return chrpos; + if ((sensedir = Stage3end_sensedir(hit3)) == SENSE_FORWARD) { + sense_try = +1; + } else if (sensedir == SENSE_ANTI) { + sense_try = -1; + } else { + sense_try = 0; } - } - } - return 0; -} + if (hit5->hittype == GMAP) { + gmap_source = hit5->gmap_source; + } else { + gmap_source = GMAP_VIA_SUBSTRINGS; + } -static Chrpos_T -overlap3_gmap_plus (int *querypos, Chrpos_T *genomicstart, Chrpos_T *genomicend, - Stage3end_T hit3, Stage3end_T gmap) { - Chrpos_T chrpos; - Substring_T substring; - List_T p; + if ((pairarray1 = Stage3_compute(&cdna_direction,&sensedir,&pairs1,&npairs1,&goodness1, + &matches1,&nmatches_posttrim_1,&max_match_length_1, + &ambig_end_length_5_1,&ambig_end_length_3_1, + &ambig_splicetype_5_1,&ambig_splicetype_3_1, + &ambig_prob_5_1,&ambig_prob_3_1, + &unknowns1,&mismatches1,&qopens1,&qindels1,&topens1,&tindels1, + &ncanonical1,&nsemicanonical1,&nnoncanonical1,&avg_splice_score_1, + + &pairarray2,&pairs2,&npairs2,&goodness2, + &matches2,&nmatches_posttrim_2,&max_match_length_2, + &ambig_end_length_5_2,&ambig_end_length_3_2, + &ambig_splicetype_5_2,&ambig_splicetype_3_2, + &ambig_prob_5_2,&ambig_prob_3_2, + &unknowns2,&mismatches2,&qopens2,&qindels2,&topens2,&tindels2, + &ncanonical2,&nsemicanonical2,&nnoncanonical2,&avg_splice_score_2, - debug10(printf("Entered overlap3_gmap_plus with gmap %d..%d\n", - gmap->pairarray[0].querypos,gmap->pairarray[gmap->npairs - 1].querypos)); - for (p = hit3->substrings_LtoH; p != NULL; p = List_next(p)) { - substring = (Substring_T) List_head(p); - if (Substring_ambiguous_p(substring) == false) { - *genomicstart = Substring_alignstart_chr(substring); - *genomicend = Substring_alignend_chr(substring); - if ((chrpos = Pair_binary_search_ascending(&(*querypos),/*lowi*/0,/*highi*/gmap->npairs,gmap->pairarray, - *genomicstart,*genomicend)) > 0) { - return chrpos; - } - } - } + stage2pairs,/*all_stage2_starts*/NULL,all_stage2_ends, +#ifdef END_KNOWNSPLICING_SHORTCUT + cutoff_level,/*queryptr*/watsonp ? queryuc_ptr : queryrc, + watsonp ? query_compress_fwd : query_compress_rev, +#endif + /*queryseq_ptr*/queryuc_ptr_5,queryuc_ptr_5,querylength5,/*skiplength*/0, +#ifdef EXTRACT_GENOMICSEG + /*query_subseq_offset*/0, +#else + /*query_subseq_offset*/0, +#endif + hit5->chrnum,hit5->chroffset,hit5->chrhigh, + knownsplice_limit_low,knownsplice_limit_high,/*plusp*/true,genestrand, + /*jump_late_p*/false,maxpeelback,pairpool,dynprogL,dynprogM,dynprogR, + sense_try,/*sense_filter*/0, + oligoindices_minor,diagpool,cellpool)) == NULL) { - return 0; -} + } else if (pairarray2 != NULL) { + if (avg_splice_score_1 > avg_splice_score_2) { + nsegments = Pair_gsnap_nsegments(&nmismatches_whole,&nindels,&nintrons,&nindelbreaks, + pairarray1,npairs1); + start = subtract_bounded(hit5->chroffset + Pair_genomepos(&(pairarray1[0])), + /*minusterm*/Pair_querypos(&(pairarray1[0])),hit5->chroffset); + end = add_bounded(hit5->chroffset + Pair_genomepos(&(pairarray1[npairs1-1])), + /*plusterm*/querylength5 - 1 - Pair_querypos(&(pairarray1[npairs1-1])),hit5->chrhigh); + + if ((hit = Stage3end_new_gmap(nmismatches_whole,nmatches_posttrim_1,max_match_length_1, + ambig_end_length_5_1,ambig_end_length_3_1, + ambig_splicetype_5_1,ambig_splicetype_3_1, + avg_splice_score_1,goodness1, + pairarray1,npairs1,nsegments,nintrons,nindelbreaks, + /*left*/start,/*genomiclength*/end - start + 1, + /*plusp*/true,genestrand, + /*accession*/NULL,querylength5,hit5->chrnum,hit5->chroffset,hit5->chrhigh,hit5->chrlength, + /*cdna_direction*/+1,/*sensedir*/SENSE_FORWARD,/*sensedir_knownp*/false, + gmap_source)) == NULL) { + FREE_OUT(pairarray1); + } else { + if (*private5p == true) { + Stage3end_free(&(*oldhit5)); + } + debug9(printf("5' resolved on inside\n")); + *oldhit5 = hit; + *private5p = true; + changep = true; + } + FREE_OUT(pairarray2); + } else { + nsegments = Pair_gsnap_nsegments(&nmismatches_whole,&nindels,&nintrons,&nindelbreaks, + pairarray2,npairs2); + start = subtract_bounded(hit5->chroffset + Pair_genomepos(&(pairarray2[0])), + /*minusterm*/Pair_querypos(&(pairarray2[0])),hit5->chroffset); + end = add_bounded(hit5->chroffset + Pair_genomepos(&(pairarray2[npairs2-1])), + /*plusterm*/querylength5 - 1 - Pair_querypos(&(pairarray2[npairs2-1])),hit5->chrhigh); -static Chrpos_T -overlap5_gmap_minus (int *querypos, Chrpos_T *genomicstart, Chrpos_T *genomicend, - Stage3end_T hit5, Stage3end_T gmap) { - Chrpos_T chrpos; - Substring_T substring; - List_T p; + if ((hit = Stage3end_new_gmap(nmismatches_whole,nmatches_posttrim_2,max_match_length_2, + ambig_end_length_5_2,ambig_end_length_3_2, + ambig_splicetype_5_2,ambig_splicetype_3_2, + avg_splice_score_2,goodness2, + pairarray2,npairs2,nsegments,nintrons,nindelbreaks, + /*left*/start,/*genomiclength*/end - start + 1, + /*plusp*/true,genestrand, + /*accession*/NULL,querylength5,hit5->chrnum,hit5->chroffset,hit5->chrhigh,hit5->chrlength, + /*cdna_direction*/-1,/*sensedir*/SENSE_ANTI,/*sensedir_knownp*/false, + gmap_source)) == NULL) { + FREE_OUT(pairarray2); + } else { + if (*private5p == true) { + Stage3end_free(&(*oldhit5)); + } + debug9(printf("5' resolved on inside\n")); + *oldhit5 = hit; + *private5p = true; + changep = true; + } + FREE_OUT(pairarray1); + } - debug10(printf("Entered overlap5_gmap_minus with gmap %d..%d\n", + } else { + nsegments = Pair_gsnap_nsegments(&nmismatches_whole,&nindels,&nintrons,&nindelbreaks, + pairarray1,npairs1); + start = subtract_bounded(hit5->chroffset + Pair_genomepos(&(pairarray1[0])), + /*minusterm*/Pair_querypos(&(pairarray1[0])),hit5->chroffset); + end = add_bounded(hit5->chroffset + Pair_genomepos(&(pairarray1[npairs1-1])), + /*plusterm*/querylength5 - 1 - Pair_querypos(&(pairarray1[npairs1-1])),hit5->chrhigh); + + if ((hit = Stage3end_new_gmap(nmismatches_whole,nmatches_posttrim_1,max_match_length_1, + ambig_end_length_5_1,ambig_end_length_3_1, + ambig_splicetype_5_1,ambig_splicetype_3_1, + avg_splice_score_1,goodness1, + pairarray1,npairs1,nsegments,nintrons,nindelbreaks, + /*left*/start,/*genomiclength*/end - start + 1, + /*plusp*/true,genestrand, + /*accession*/NULL,querylength5,hit5->chrnum,hit5->chroffset,hit5->chrhigh,hit5->chrlength, + cdna_direction,sensedir,/*sensedir_knownp*/true, + gmap_source)) == NULL) { + FREE_OUT(pairarray1); + } else { + if (*private5p == true) { + Stage3end_free(&(*oldhit5)); + } + debug9(printf("5' resolved on inside\n")); + *oldhit5 = hit; + *private5p = true; + changep = true; + } + } + + List_free(&all_stage2_ends); + } + + } else if (hit3->chrnum != 0 && (endlength = Stage3end_querystart(hit3)) > 10) { + chrstart = hit5->genomicend - hit3->chroffset; /* Use hit3->chroffset in case hit5 is a transloc */ + chrend = add_bounded(chrstart,(Chrpos_T) expected_pairlength + pairlength_deviation + endlength,hit5->chrhigh); + if (chrend > hit3->genomicstart - hit3->chroffset) { + debug9(printf("Revising chrend\n")); + chrend = hit3->genomicstart - hit3->chroffset; + } + debug9(printf("Resolve plus 3': For starts, chrstart %u, chrend %u\n",chrstart,chrend)); + if (chrstart < chrend && + (all_stage2_starts = Stage2_compute_starts( +#ifdef PMAP + &(queryaaseq_ptr[0]),&(queryaaseq_ptr[0]), + /*querylength*/endlength,/*query_offset*/0*3, +#else + &(queryuc_ptr_3[0]),&(queryuc_ptr_3[0]), + /*querylength*/endlength,/*query_offset*/0, +#endif + chrstart,chrend,hit3->chroffset,hit3->chrhigh,/*plusp*/true,genestrand, + + oligoindices_minor,pairpool,diagpool,cellpool, + /*localp should be false*/true,/*skip_repetitive_p*/false, + /*favor_right_p*/true,/*max_nalignments*/2,/*debug_graphic_p*/false)) != NULL) { + + debug9(printf("Got %d starts\n",List_length(all_stage2_starts))); + debug9(printf("3' start to be resolved on inside\n")); +#ifdef DEBUG9 + for (p = all_stage2_starts; p != NULL; p = List_next(p)) { + Pair_dump_list(List_head(p),true); + } +#endif + stage2pairs = Stage3end_convert_to_pairs(/*pairs*/NULL,hit3,queryuc_ptr_3,querylength3, + query3_compress_fwd,query3_compress_rev, + /*chrlength*/hit3->chrhigh - hit3->chroffset,pairpool); + debug9(Pair_dump_list(stage2pairs,true)); + + knownsplice_limit_high = ((Pair_T) stage2pairs->first)->genomepos + hit3->chroffset; + stage2pairs = List_reverse(stage2pairs); + knownsplice_limit_low = ((Pair_T) stage2pairs->first)->genomepos + hit3->chroffset; + + if ((sensedir = Stage3end_sensedir(hit5)) == SENSE_FORWARD) { + sense_try = +1; + } else if (sensedir == SENSE_ANTI) { + sense_try = -1; + } else { + sense_try = 0; + } + + if (hit3->hittype == GMAP) { + gmap_source = hit3->gmap_source; + } else { + gmap_source = GMAP_VIA_SUBSTRINGS; + } + + if ((pairarray1 = Stage3_compute(&cdna_direction,&sensedir,&pairs1,&npairs1,&goodness1, + &matches1,&nmatches_posttrim_1,&max_match_length_1, + &ambig_end_length_5_1,&ambig_end_length_3_1, + &ambig_splicetype_5_1,&ambig_splicetype_3_1, + &ambig_prob_5_1,&ambig_prob_3_1, + &unknowns1,&mismatches1,&qopens1,&qindels1,&topens1,&tindels1, + &ncanonical1,&nsemicanonical1,&nnoncanonical1,&avg_splice_score_1, + + &pairarray2,&pairs2,&npairs2,&goodness2, + &matches2,&nmatches_posttrim_2,&max_match_length_2, + &ambig_end_length_5_2,&ambig_end_length_3_2, + &ambig_splicetype_5_2,&ambig_splicetype_3_2, + &ambig_prob_5_2,&ambig_prob_3_2, + &unknowns2,&mismatches2,&qopens2,&qindels2,&topens2,&tindels2, + &ncanonical2,&nsemicanonical2,&nnoncanonical2,&avg_splice_score_2, + + stage2pairs,all_stage2_starts,/*all_stage2_ends*/NULL, +#ifdef END_KNOWNSPLICING_SHORTCUT + cutoff_level,/*queryptr*/watsonp ? queryuc_ptr : queryrc, + watsonp ? query_compress_fwd : query_compress_rev, +#endif + /*queryseq_ptr*/queryuc_ptr_3,queryuc_ptr_3,querylength3,/*skiplength*/0, +#ifdef EXTRACT_GENOMICSEG + /*query_subseq_offset*/0, +#else + /*query_subseq_offset*/0, +#endif + hit3->chrnum,hit3->chroffset,hit3->chrhigh, + knownsplice_limit_low,knownsplice_limit_high,/*plusp*/true,genestrand, + /*jump_late_p*/false,maxpeelback,pairpool,dynprogL,dynprogM,dynprogR, + sense_try,/*sense_filter*/0, + oligoindices_minor,diagpool,cellpool)) == NULL) { + + } else if (pairarray2 != NULL) { + if (avg_splice_score_1 > avg_splice_score_2) { + nsegments = Pair_gsnap_nsegments(&nmismatches_whole,&nindels,&nintrons,&nindelbreaks, + pairarray1,npairs1); + start = subtract_bounded(hit3->chroffset + Pair_genomepos(&(pairarray1[0])), + /*minusterm*/Pair_querypos(&(pairarray1[0])),hit3->chroffset); + end = add_bounded(hit3->chroffset + Pair_genomepos(&(pairarray1[npairs1-1])), + /*plusterm*/querylength3 - 1 - Pair_querypos(&(pairarray1[npairs1-1])),hit3->chrhigh); + + if ((hit = Stage3end_new_gmap(nmismatches_whole,nmatches_posttrim_1,max_match_length_1, + ambig_end_length_5_1,ambig_end_length_3_1, + ambig_splicetype_5_1,ambig_splicetype_3_1, + avg_splice_score_1,goodness1, + pairarray1,npairs1,nsegments,nintrons,nindelbreaks, + /*left*/start,/*genomiclength*/end - start + 1, + /*plusp*/true,genestrand, + /*accession*/NULL,querylength3,hit3->chrnum,hit3->chroffset,hit3->chrhigh,hit3->chrlength, + /*cdna_direction*/+1,/*sensedir*/SENSE_FORWARD,/*sensedir_knownp*/false, + gmap_source)) == NULL) { + FREE_OUT(pairarray1); + } else { + if (*private3p == true) { + Stage3end_free(&(*oldhit3)); + } + debug9(printf("3' resolved on inside\n")); + *oldhit3 = hit; + *private3p = true; + changep = true; + } + FREE_OUT(pairarray2); + + } else { + nsegments = Pair_gsnap_nsegments(&nmismatches_whole,&nindels,&nintrons,&nindelbreaks, + pairarray2,npairs2); + start = subtract_bounded(hit3->chroffset + Pair_genomepos(&(pairarray2[0])), + /*minusterm*/Pair_querypos(&(pairarray2[0])),hit3->chroffset); + end = add_bounded(hit3->chroffset + Pair_genomepos(&(pairarray2[npairs2-1])), + /*plusterm*/querylength3 - 1 - Pair_querypos(&(pairarray2[npairs2-1])),hit3->chrhigh); + + if ((hit = Stage3end_new_gmap(nmismatches_whole,nmatches_posttrim_2,max_match_length_2, + ambig_end_length_5_2,ambig_end_length_3_2, + ambig_splicetype_5_2,ambig_splicetype_3_2, + avg_splice_score_2,goodness2, + pairarray2,npairs2,nsegments,nintrons,nindelbreaks, + /*left*/start,/*genomiclength*/end - start + 1, + /*plusp*/true,genestrand, + /*accession*/NULL,querylength3,hit3->chrnum,hit3->chroffset,hit3->chrhigh,hit3->chrlength, + /*cdna_direction*/-1,/*sensedir*/SENSE_ANTI,/*sensedir_knownp*/false, + gmap_source)) == NULL) { + FREE_OUT(pairarray2); + } else { + if (*private3p == true) { + Stage3end_free(&(*oldhit3)); + } + debug9(printf("3' resolved on inside\n")); + *oldhit3 = hit; + *private3p = true; + changep = true; + } + FREE_OUT(pairarray1); + } + + } else { + nsegments = Pair_gsnap_nsegments(&nmismatches_whole,&nindels,&nintrons,&nindelbreaks, + pairarray1,npairs1); + start = subtract_bounded(hit3->chroffset + Pair_genomepos(&(pairarray1[0])), + /*minusterm*/Pair_querypos(&(pairarray1[0])),hit3->chroffset); + end = add_bounded(hit3->chroffset + Pair_genomepos(&(pairarray1[npairs1-1])), + /*plusterm*/querylength3 - 1 - Pair_querypos(&(pairarray1[npairs1-1])),hit3->chrhigh); + + if ((hit = Stage3end_new_gmap(nmismatches_whole,nmatches_posttrim_1,max_match_length_1, + ambig_end_length_5_1,ambig_end_length_3_1, + ambig_splicetype_5_1,ambig_splicetype_3_1, + avg_splice_score_1,goodness1, + pairarray1,npairs1,nsegments,nintrons,nindelbreaks, + /*left*/start,/*genomiclength*/end - start + 1, + /*plusp*/true,genestrand, + /*accession*/NULL,querylength3,hit3->chrnum,hit3->chroffset,hit3->chrhigh,hit3->chrlength, + cdna_direction,sensedir,/*sensedir_knownp*/true, + gmap_source)) == NULL) { + FREE_OUT(pairarray1); + } else { + if (*private3p == true) { + Stage3end_free(&(*oldhit3)); + } + debug9(printf("3' resolved on inside\n")); + *oldhit3 = hit; + *private3p = true; + changep = true; + } + } + + List_free(&all_stage2_starts); + } + } + + return changep; +} + +static bool +resolve_inside_general_splice_minus (T *oldhit5, T *oldhit3, bool *private5p, bool *private3p, + Compress_T query5_compress_fwd, Compress_T query5_compress_rev, + Compress_T query3_compress_fwd, Compress_T query3_compress_rev, + char *queryuc_ptr_5, char *queryuc_ptr_3, int querylength5, int querylength3, + int genestrand, Pairpool_T pairpool, Dynprog_T dynprogL, Dynprog_T dynprogM, Dynprog_T dynprogR, + Oligoindex_array_T oligoindices_minor, Diagpool_T diagpool, Cellpool_T cellpool) { + bool changep = false; + T hit, hit5 = *oldhit5, hit3 = *oldhit3; + +#ifdef DEBUG9 + List_T p; +#endif + List_T stage2pairs, all_stage2_starts, all_stage2_ends; + int queryend, endlength; + Chrpos_T chrstart, chrend; + struct Pair_T *pairarray1, *pairarray2; + List_T pairs1, pairs2; + + int cdna_direction, sensedir, sense_try; + int npairs1, goodness1, matches1, nmatches_posttrim_1, + max_match_length_1, ambig_end_length_5_1, ambig_end_length_3_1, + unknowns1, mismatches1, qopens1, qindels1, topens1, tindels1, + ncanonical1, nsemicanonical1, nnoncanonical1; + int npairs2, goodness2, matches2, nmatches_posttrim_2, + max_match_length_2, ambig_end_length_5_2, ambig_end_length_3_2, + unknowns2, mismatches2, qopens2, qindels2, topens2, tindels2, + ncanonical2, nsemicanonical2, nnoncanonical2; + double ambig_prob_5_1, ambig_prob_3_1, avg_splice_score_1; + double ambig_prob_5_2, ambig_prob_3_2, avg_splice_score_2; + Splicetype_T ambig_splicetype_5_1, ambig_splicetype_3_1; + Splicetype_T ambig_splicetype_5_2, ambig_splicetype_3_2; + Univcoord_T knownsplice_limit_low, knownsplice_limit_high; + + Univcoord_T start, end; + int nsegments, nmismatches_whole, nindels, nintrons, nindelbreaks; + GMAP_source_T gmap_source; + + + + if (hit5->querylength - 1 - Stage3end_queryend(hit5) > 10 && Stage3end_querystart(hit3) > 10) { + /* Both insides need to be resolved. Not solving at this time */ + debug9(printf("Dual to be resolved on inside. Not solving at this time\n")); + + } else if (hit5->chrnum != 0 && (endlength = hit5->querylength - 1 - Stage3end_queryend(hit5)) > 10) { + chrstart = hit3->genomicstart - hit5->chroffset; /* Use hit5->chroffset in case hit3 is a transloc */ + chrend = add_bounded(chrstart,(Chrpos_T) expected_pairlength + pairlength_deviation + endlength,hit3->chrhigh); + if (chrend > hit5->genomicend - hit5->chroffset) { + debug9(printf("Revising chrend\n")); + chrend = hit5->genomicend - hit5->chroffset; + } + queryend = Stage3end_queryend(hit5) + 1; + debug9(printf("For ends, chrstart %u, chrend %u\n",chrstart,chrend)); + if (chrstart < chrend && + (all_stage2_ends = Stage2_compute_ends( +#ifdef PMAP + &(queryaaseq_ptr[queryend]),&(queryaaseq_ptr[queryend]), + /*querylength*/endlength,/*query_offset*/0*3, +#else + &(queryuc_ptr_5[queryend]),&(queryuc_ptr_5[queryend]), + /*querylength*/endlength,/*query_offset*/queryend, +#endif + chrstart,chrend,hit5->chroffset,hit5->chrhigh,/*plusp*/false,genestrand, + + oligoindices_minor,pairpool,diagpool,cellpool, + /*localp should be false*/true,/*skip_repetitive_p*/false, + /*favor_right_p*/false,/*max_nalignments*/2,/*debug_graphic_p*/false)) != NULL) { + + debug9(printf("Got %d ends\n",List_length(all_stage2_ends))); + debug9(printf("5' end to be resolved on inside\n")); +#ifdef DEBUG9 + for (p = all_stage2_ends; p != NULL; p = List_next(p)) { + Pair_dump_list(List_head(p),true); + } +#endif + stage2pairs = Stage3end_convert_to_pairs(/*pairs*/NULL,hit5,queryuc_ptr_5,querylength5, + query5_compress_fwd,query5_compress_rev, + /*chrlength*/hit5->chrhigh - hit5->chroffset,pairpool); + debug9(Pair_dump_list(stage2pairs,true)); + + knownsplice_limit_low = ((Pair_T) stage2pairs->first)->genomepos + hit5->chroffset; + stage2pairs = List_reverse(stage2pairs); + knownsplice_limit_high = ((Pair_T) stage2pairs->first)->genomepos + hit5->chroffset; + + if ((sensedir = Stage3end_sensedir(hit3)) == SENSE_FORWARD) { + sense_try = +1; + } else if (sensedir == SENSE_ANTI) { + sense_try = -1; + } else { + sense_try = 0; + } + + if (hit5->hittype == GMAP) { + gmap_source = hit5->gmap_source; + } else { + gmap_source = GMAP_VIA_SUBSTRINGS; + } + + if ((pairarray1 = Stage3_compute(&cdna_direction,&sensedir,&pairs1,&npairs1,&goodness1, + &matches1,&nmatches_posttrim_1,&max_match_length_1, + &ambig_end_length_5_1,&ambig_end_length_3_1, + &ambig_splicetype_5_1,&ambig_splicetype_3_1, + &ambig_prob_5_1,&ambig_prob_3_1, + &unknowns1,&mismatches1,&qopens1,&qindels1,&topens1,&tindels1, + &ncanonical1,&nsemicanonical1,&nnoncanonical1,&avg_splice_score_1, + + &pairarray2,&pairs2,&npairs2,&goodness2, + &matches2,&nmatches_posttrim_2,&max_match_length_2, + &ambig_end_length_5_2,&ambig_end_length_3_2, + &ambig_splicetype_5_2,&ambig_splicetype_3_2, + &ambig_prob_5_2,&ambig_prob_3_2, + &unknowns2,&mismatches2,&qopens2,&qindels2,&topens2,&tindels2, + &ncanonical2,&nsemicanonical2,&nnoncanonical2,&avg_splice_score_2, + + stage2pairs,/*all_stage2_starts*/NULL,all_stage2_ends, +#ifdef END_KNOWNSPLICING_SHORTCUT + cutoff_level,/*queryptr*/watsonp ? queryuc_ptr : queryrc, + watsonp ? query_compress_fwd : query_compress_rev, +#endif + /*queryseq_ptr*/queryuc_ptr_5,queryuc_ptr_5,querylength5,/*skiplength*/0, +#ifdef EXTRACT_GENOMICSEG + /*query_subseq_offset*/0, +#else + /*query_subseq_offset*/0, +#endif + hit5->chrnum,hit5->chroffset,hit5->chrhigh, + knownsplice_limit_low,knownsplice_limit_high,/*plusp*/false,genestrand, + /*jump_late_p*/true,maxpeelback,pairpool,dynprogL,dynprogM,dynprogR, + sense_try,/*sense_filter*/0, + oligoindices_minor,diagpool,cellpool)) == NULL) { + + } else if (pairarray2 != NULL) { + if (avg_splice_score_1 > avg_splice_score_2) { + nsegments = Pair_gsnap_nsegments(&nmismatches_whole,&nindels,&nintrons,&nindelbreaks, + pairarray1,npairs1); + start = add_bounded(hit5->chroffset + Pair_genomepos(&(pairarray1[0])), + /*plusterm*/Pair_querypos(&(pairarray1[0])),hit5->chrhigh); + end = subtract_bounded(hit5->chroffset + Pair_genomepos(&(pairarray1[npairs1-1])), + /*minusterm*/querylength5 - 1 - Pair_querypos(&(pairarray1[npairs1-1])),hit5->chroffset); + + if ((hit = Stage3end_new_gmap(nmismatches_whole,nmatches_posttrim_1,max_match_length_1, + ambig_end_length_5_1,ambig_end_length_3_1, + ambig_splicetype_5_1,ambig_splicetype_3_1, + avg_splice_score_1,goodness1, + pairarray1,npairs1,nsegments,nintrons,nindelbreaks, + /*left*/end,/*genomiclength*/start - end + 1, + /*plusp*/false,genestrand, + /*accession*/NULL,querylength5,hit5->chrnum,hit5->chroffset,hit5->chrhigh,hit5->chrlength, + /*cdna_direction*/+1,/*sensedir*/SENSE_FORWARD,/*sensedir_knownp*/false, + gmap_source)) == NULL) { + FREE_OUT(pairarray1); + } else { + if (*private5p == true) { + Stage3end_free(&(*oldhit5)); + } + debug9(printf("5' resolved on inside\n")); + *oldhit5 = hit; + *private5p = true; + changep = true; + } + FREE_OUT(pairarray2); + + } else { + nsegments = Pair_gsnap_nsegments(&nmismatches_whole,&nindels,&nintrons,&nindelbreaks, + pairarray2,npairs2); + start = add_bounded(hit5->chroffset + Pair_genomepos(&(pairarray2[0])), + /*plusterm*/Pair_querypos(&(pairarray2[0])),hit5->chrhigh); + end = subtract_bounded(hit5->chroffset + Pair_genomepos(&(pairarray2[npairs2-1])), + /*minusterm*/querylength5 - 1 - Pair_querypos(&(pairarray2[npairs2-1])),hit5->chroffset); + + if ((hit = Stage3end_new_gmap(nmismatches_whole,nmatches_posttrim_2,max_match_length_2, + ambig_end_length_5_2,ambig_end_length_3_2, + ambig_splicetype_5_2,ambig_splicetype_3_2, + avg_splice_score_2,goodness2, + pairarray2,npairs2,nsegments,nintrons,nindelbreaks, + /*left*/end,/*genomiclength*/start - end + 1, + /*plusp*/false,genestrand, + /*accession*/NULL,querylength5,hit5->chrnum,hit5->chroffset,hit5->chrhigh,hit5->chrlength, + /*cdna_direction*/-1,/*sensedir*/SENSE_ANTI,/*sensedir_knownp*/false, + gmap_source)) == NULL) { + FREE_OUT(pairarray2); + } else { + if (*private5p == true) { + Stage3end_free(&(*oldhit5)); + } + debug9(printf("5' resolved on inside\n")); + *oldhit5 = hit; + *private5p = true; + changep = true; + } + FREE_OUT(pairarray1); + } + + } else { + nsegments = Pair_gsnap_nsegments(&nmismatches_whole,&nindels,&nintrons,&nindelbreaks, + pairarray1,npairs1); + start = add_bounded(hit5->chroffset + Pair_genomepos(&(pairarray1[0])), + /*plusterm*/Pair_querypos(&(pairarray1[0])),hit5->chrhigh); + end = subtract_bounded(hit5->chroffset + Pair_genomepos(&(pairarray1[npairs1-1])), + /*minusterm*/querylength5 - 1 - Pair_querypos(&(pairarray1[npairs1-1])),hit5->chroffset); + + if ((hit = Stage3end_new_gmap(nmismatches_whole,nmatches_posttrim_1,max_match_length_1, + ambig_end_length_5_1,ambig_end_length_3_1, + ambig_splicetype_5_1,ambig_splicetype_3_1, + avg_splice_score_1,goodness1, + pairarray1,npairs1,nsegments,nintrons,nindelbreaks, + /*left*/end,/*genomiclength*/start - end + 1, + /*plusp*/false,genestrand, + /*accession*/NULL,querylength5,hit5->chrnum,hit5->chroffset,hit5->chrhigh,hit5->chrlength, + cdna_direction,sensedir,/*sensedir_knownp*/true, + gmap_source)) == NULL) { + FREE_OUT(pairarray1); + } else { + if (*private5p == true) { + Stage3end_free(&(*oldhit5)); + } + debug9(printf("5' resolved on inside\n")); + *oldhit5 = hit; + *private5p = true; + changep = true; + } + } + + List_free(&all_stage2_ends); + } + + } else if (hit3->chrnum != 0 && (endlength = Stage3end_querystart(hit3)) > 10) { + chrend = hit5->genomicend - hit3->chroffset; /* Use hit3->chroffset in case hit5 is a transloc */ + chrstart = subtract_bounded(chrend,(Chrpos_T) expected_pairlength + pairlength_deviation + endlength,0); + if (chrstart < hit3->genomicstart - hit3->chroffset) { + debug9(printf("Revising chrstart\n")); + chrstart = hit3->genomicstart - hit3->chroffset; + } + debug9(printf("For starts, chrstart %u, chrend %u\n",chrstart,chrend)); + if (chrstart < chrend && + (all_stage2_starts = Stage2_compute_starts( +#ifdef PMAP + &(queryaaseq_ptr[0]),&(queryaaseq_ptr[0]), + /*querylength*/endlength,/*query_offset*/0*3, +#else + &(queryuc_ptr_3[0]),&(queryuc_ptr_3[0]), + /*querylength*/endlength,/*query_offset*/0, +#endif + chrstart,chrend,hit3->chroffset,hit3->chrhigh,/*plusp*/false,genestrand, + + oligoindices_minor,pairpool,diagpool,cellpool, + /*localp should be false*/true,/*skip_repetitive_p*/false, + /*favor_right_p*/true,/*max_nalignments*/2,/*debug_graphic_p*/false)) != NULL) { + + debug9(printf("Got %d starts\n",List_length(all_stage2_starts))); + debug9(printf("3' start to be resolved on inside\n")); +#ifdef DEBUG9 + for (p = all_stage2_starts; p != NULL; p = List_next(p)) { + Pair_dump_list(List_head(p),true); + } +#endif + stage2pairs = Stage3end_convert_to_pairs(/*pairs*/NULL,hit3,queryuc_ptr_3,querylength3, + query3_compress_fwd,query3_compress_rev, + /*chrlength*/hit3->chrhigh - hit3->chroffset,pairpool); + debug9(Pair_dump_list(stage2pairs,true)); + + knownsplice_limit_low = ((Pair_T) stage2pairs->first)->genomepos + hit3->chroffset; + stage2pairs = List_reverse(stage2pairs); + knownsplice_limit_high = ((Pair_T) stage2pairs->first)->genomepos + hit3->chroffset; + + if ((sensedir = Stage3end_sensedir(hit5)) == SENSE_FORWARD) { + sense_try = +1; + } else if (sensedir == SENSE_ANTI) { + sense_try = -1; + } else { + sense_try = 0; + } + + if (hit3->hittype == GMAP) { + gmap_source = hit3->gmap_source; + } else { + gmap_source = GMAP_VIA_SUBSTRINGS; + } + + if ((pairarray1 = Stage3_compute(&cdna_direction,&sensedir,&pairs1,&npairs1,&goodness1, + &matches1,&nmatches_posttrim_1,&max_match_length_1, + &ambig_end_length_5_1,&ambig_end_length_3_1, + &ambig_splicetype_5_1,&ambig_splicetype_3_1, + &ambig_prob_5_1,&ambig_prob_3_1, + &unknowns1,&mismatches1,&qopens1,&qindels1,&topens1,&tindels1, + &ncanonical1,&nsemicanonical1,&nnoncanonical1,&avg_splice_score_1, + + &pairarray2,&pairs2,&npairs2,&goodness2, + &matches2,&nmatches_posttrim_2,&max_match_length_2, + &ambig_end_length_5_2,&ambig_end_length_3_2, + &ambig_splicetype_5_2,&ambig_splicetype_3_2, + &ambig_prob_5_2,&ambig_prob_3_2, + &unknowns2,&mismatches2,&qopens2,&qindels2,&topens2,&tindels2, + &ncanonical2,&nsemicanonical2,&nnoncanonical2,&avg_splice_score_2, + + stage2pairs,all_stage2_starts,/*all_stage2_ends*/NULL, +#ifdef END_KNOWNSPLICING_SHORTCUT + cutoff_level,/*queryptr*/watsonp ? queryuc_ptr : queryrc, + watsonp ? query_compress_fwd : query_compress_rev, +#endif + /*queryseq_ptr*/queryuc_ptr_3,queryuc_ptr_3,querylength3,/*skiplength*/0, +#ifdef EXTRACT_GENOMICSEG + /*query_subseq_offset*/0, +#else + /*query_subseq_offset*/0, +#endif + hit3->chrnum,hit3->chroffset,hit3->chrhigh, + knownsplice_limit_low,knownsplice_limit_high,/*plusp*/false,genestrand, + /*jump_late_p*/true,maxpeelback,pairpool,dynprogL,dynprogM,dynprogR, + sense_try,/*sense_filter*/0, + oligoindices_minor,diagpool,cellpool)) == NULL) { + + } else if (pairarray2 != NULL) { + if (avg_splice_score_1 > avg_splice_score_2) { + nsegments = Pair_gsnap_nsegments(&nmismatches_whole,&nindels,&nintrons,&nindelbreaks, + pairarray1,npairs1); + start = add_bounded(hit3->chroffset + Pair_genomepos(&(pairarray1[0])), + /*plusterm*/Pair_querypos(&(pairarray1[0])),hit3->chrhigh); + end = subtract_bounded(hit3->chroffset + Pair_genomepos(&(pairarray1[npairs1-1])), + /*minusterm*/querylength3 - 1 - Pair_querypos(&(pairarray1[npairs1-1])),hit3->chroffset); + + if ((hit = Stage3end_new_gmap(nmismatches_whole,nmatches_posttrim_1,max_match_length_1, + ambig_end_length_5_1,ambig_end_length_3_1, + ambig_splicetype_5_1,ambig_splicetype_3_1, + avg_splice_score_1,goodness1, + pairarray1,npairs1,nsegments,nintrons,nindelbreaks, + /*left*/end,/*genomiclength*/start - end + 1, + /*plusp*/false,genestrand, + /*accession*/NULL,querylength3,hit3->chrnum,hit3->chroffset,hit3->chrhigh,hit3->chrlength, + /*cdna_direction*/+1,/*sensedir*/SENSE_FORWARD,/*sensedir_knownp*/false, + gmap_source)) == NULL) { + FREE_OUT(pairarray1); + } else { + if (*private3p == true) { + Stage3end_free(&(*oldhit3)); + } + debug9(printf("3' resolved on inside\n")); + *oldhit3 = hit; + *private3p = true; + changep = true; + } + FREE_OUT(pairarray2); + + } else { + nsegments = Pair_gsnap_nsegments(&nmismatches_whole,&nindels,&nintrons,&nindelbreaks, + pairarray2,npairs2); + start = add_bounded(hit3->chroffset + Pair_genomepos(&(pairarray2[0])), + /*plusterm*/Pair_querypos(&(pairarray2[0])),hit3->chrhigh); + end = subtract_bounded(hit3->chroffset + Pair_genomepos(&(pairarray2[npairs2-1])), + /*minusterm*/querylength3 - 1 - Pair_querypos(&(pairarray2[npairs2-1])),hit3->chroffset); + + if ((hit = Stage3end_new_gmap(nmismatches_whole,nmatches_posttrim_2,max_match_length_2, + ambig_end_length_5_2,ambig_end_length_3_2, + ambig_splicetype_5_2,ambig_splicetype_3_2, + avg_splice_score_2,goodness2, + pairarray2,npairs2,nsegments,nintrons,nindelbreaks, + /*left*/end,/*genomiclength*/start - end + 1, + /*plusp*/false,genestrand, + /*accession*/NULL,querylength3,hit3->chrnum,hit3->chroffset,hit3->chrhigh,hit3->chrlength, + /*cdna_direction*/-1,/*sensedir*/SENSE_ANTI,/*sensedir_knownp*/false, + gmap_source)) == NULL) { + FREE_OUT(pairarray2); + } else { + if (*private3p == true) { + Stage3end_free(&(*oldhit3)); + } + debug9(printf("3' resolved on inside\n")); + *oldhit3 = hit; + *private3p = true; + changep = true; + } + FREE_OUT(pairarray1); + } + + } else { + nsegments = Pair_gsnap_nsegments(&nmismatches_whole,&nindels,&nintrons,&nindelbreaks, + pairarray1,npairs1); + start = add_bounded(hit3->chroffset + Pair_genomepos(&(pairarray1[0])), + /*plusterm*/Pair_querypos(&(pairarray1[0])),hit3->chrhigh); + end = subtract_bounded(hit3->chroffset + Pair_genomepos(&(pairarray1[npairs1-1])), + /*minusterm*/querylength3 - 1 - Pair_querypos(&(pairarray1[npairs1-1])),hit3->chroffset); + + if ((hit = Stage3end_new_gmap(nmismatches_whole,nmatches_posttrim_1,max_match_length_1, + ambig_end_length_5_1,ambig_end_length_3_1, + ambig_splicetype_5_1,ambig_splicetype_3_1, + avg_splice_score_1,goodness1, + pairarray1,npairs1,nsegments,nintrons,nindelbreaks, + /*left*/end,/*genomiclength*/start - end + 1, + /*plusp*/false,genestrand, + /*accession*/NULL,querylength3,hit3->chrnum,hit3->chroffset,hit3->chrhigh,hit3->chrlength, + cdna_direction,sensedir,/*sensedir_knownp*/true, + gmap_source)) == NULL) { + FREE_OUT(pairarray1); + } else { + if (*private3p == true) { + Stage3end_free(&(*oldhit3)); + } + debug9(printf("3' resolved on inside\n")); + *oldhit3 = hit; + *private3p = true; + changep = true; + } + } + + List_free(&all_stage2_starts); + } + } + + return changep; +} + + +static Chrpos_T +overlap5_gmap_plus (int *querypos, Chrpos_T *genomicstart, Chrpos_T *genomicend, + Stage3end_T hit5, Stage3end_T gmap) { + Chrpos_T chrpos; + Substring_T substring; + List_T p; + + debug10(printf("Entered overlap5_gmap_plus with gmap %d..%d\n", + gmap->pairarray[0].querypos,gmap->pairarray[gmap->npairs - 1].querypos)); + for (p = hit5->substrings_LtoH; p != NULL; p = List_next(p)) { + substring = (Substring_T) List_head(p); + if (Substring_ambiguous_p(substring) == false) { + *genomicstart = Substring_alignstart_chr(substring); + *genomicend = Substring_alignend_chr(substring); + if ((chrpos = Pair_binary_search_ascending(&(*querypos),/*lowi*/0,/*highi*/gmap->npairs,gmap->pairarray, + *genomicstart,*genomicend)) > 0) { + return chrpos; + } + } + } + + return 0; +} + +static Chrpos_T +overlap3_gmap_plus (int *querypos, Chrpos_T *genomicstart, Chrpos_T *genomicend, + Stage3end_T hit3, Stage3end_T gmap) { + Chrpos_T chrpos; + Substring_T substring; + List_T p; + + debug10(printf("Entered overlap3_gmap_plus with gmap %d..%d\n", + gmap->pairarray[0].querypos,gmap->pairarray[gmap->npairs - 1].querypos)); + for (p = hit3->substrings_LtoH; p != NULL; p = List_next(p)) { + substring = (Substring_T) List_head(p); + if (Substring_ambiguous_p(substring) == false) { + *genomicstart = Substring_alignstart_chr(substring); + *genomicend = Substring_alignend_chr(substring); + if ((chrpos = Pair_binary_search_ascending(&(*querypos),/*lowi*/0,/*highi*/gmap->npairs,gmap->pairarray, + *genomicstart,*genomicend)) > 0) { + return chrpos; + } + } + } + + return 0; +} + + +static Chrpos_T +overlap5_gmap_minus (int *querypos, Chrpos_T *genomicstart, Chrpos_T *genomicend, + Stage3end_T hit5, Stage3end_T gmap) { + Chrpos_T chrpos; + Substring_T substring; + List_T p; + + debug10(printf("Entered overlap5_gmap_minus with gmap %d..%d\n", gmap->pairarray[0].querypos,gmap->pairarray[gmap->npairs - 1].querypos)); for (p = hit5->substrings_LtoH; p != NULL; p = List_next(p)) { substring = (Substring_T) List_head(p); @@ -13557,34 +14577,329 @@ return chrpos; } } - } + } + + return 0; +} + +static Chrpos_T +overlap3_gmap_minus (int *querypos, Chrpos_T *genomicstart, Chrpos_T *genomicend, + Stage3end_T hit3, Stage3end_T gmap) { + Chrpos_T chrpos; + Substring_T substring; + List_T p; + + debug10(printf("Entered overlap3_gmap_minus with gmap %d..%d\n", + gmap->pairarray[0].querypos,gmap->pairarray[gmap->npairs - 1].querypos)); + for (p = hit3->substrings_LtoH; p != NULL; p = List_next(p)) { + substring = (Substring_T) List_head(p); + if (Substring_ambiguous_p(substring) == false) { + *genomicstart = Substring_alignstart_chr(substring); + *genomicend = Substring_alignend_chr(substring); + if ((chrpos = Pair_binary_search_descending(&(*querypos),/*lowi*/0,/*highi*/gmap->npairs,gmap->pairarray, + *genomicstart,*genomicend)) > 0) { + return chrpos; + } + } + } + + return 0; +} + + +static int +compute_insertlength (Stage3pair_T this) { + T hit5, hit3; + Chrpos_T chrstart, chrend, chrpos; + int querypos; + int querylength5, querylength3; + + hit5 = this->hit5; + hit3 = this->hit3; + querylength5 = hit5->querylength; + querylength3 = hit3->querylength; + + debug10(printf("Computing insertlength on %u..%u to %u..%u\n", + hit5->genomicstart - hit5->chroffset,hit5->genomicend - hit5->chroffset, + hit3->genomicend - hit3->chroffset,hit3->genomicstart - hit3->chroffset)); + + if (hit5->hittype == GMAP && hit3->hittype == GMAP) { + debug10(printf("Got hit5 and hit3 both of type GMAP\n")); + + /* Do not try to resolve ambiguity on inside of concordant ends */ + if (hit5->plusp == true && hit3->plusp == true) { + return (hit3->genomicstart - hit5->genomicend) + querylength5 + querylength3; + } else if (hit5->plusp == false && hit3->plusp == false) { + return (hit5->genomicend - hit3->genomicstart) + querylength5 + querylength3; + } else { + return pair_insert_length_unpaired(hit5,hit3); + } + + } else if (hit5->hittype == GMAP) { + debug10(printf("Got hit5 of type GMAP\n")); + if (hit5->plusp == true && hit3->plusp == true) { + /* Have 5-start..end and 3-start..end */ + debug10(printf("1 plus: comparing hit5->genomicend %u <= hit3->genomicstart %u\n", + hit5->genomicend - hit5->chroffset,hit3->genomicstart - hit3->chroffset)); + + if (hit5->genomicend <= hit3->genomicstart) { + /* No overlap */ + return (hit3->genomicstart - hit5->genomicend) + querylength5 + querylength3; + } else if ((chrpos = overlap3_gmap_plus(&querypos,&chrstart,&chrend,/*hit*/hit3,/*gmap*/hit5)) > 0U) { + return /* end3 */ chrend - /* start5 */ (chrpos - querypos); + } else { + /* Still no overlap */ + return (hit3->genomicstart - hit5->genomicend) + querylength5 + querylength3; + } + + } else if (hit5->plusp == false && hit3->plusp == false) { + /* Have 3-end..start and 5-end..start */ + debug10(printf("2 minus: comparing hit3->genomicstart %u <= hit5->genomicend %u\n", + hit3->genomicstart - hit3->chroffset,hit5->genomicend - hit5->chroffset)); + + if (hit3->genomicstart <= hit5->genomicend) { + return (hit5->genomicend - hit3->genomicstart) + querylength5 + querylength3; + } else if ((chrpos = overlap3_gmap_minus(&querypos,&chrstart,&chrend,/*hit*/hit3,/*gmap*/hit5)) > 0U) { + return /* start5 */ (chrpos + querypos) - /* end3 */ chrend + 1; + } else { + /* Still no overlap */ + return (hit5->genomicend - hit3->genomicstart) + querylength5 + querylength3; + } + } else { + return pair_insert_length_unpaired(hit5,hit3); + } + + } else if (hit3->hittype == GMAP) { + debug10(printf("Got hit3 of type GMAP\n")); + if (hit5->plusp == true && hit3->plusp == true) { + /* Have 5-start..end and 3-start..end */ + debug10(printf("3 plus: comparing hit5->genomicend %u <= hit3->genomicstart %u\n", + hit5->genomicend - hit5->chroffset,hit3->genomicstart - hit3->chroffset)); + + if (hit5->genomicend <= hit3->genomicstart) { + /* No overlap */ + return (hit3->genomicstart - hit5->genomicend) + querylength5 + querylength3; + } else if ((chrpos = overlap5_gmap_plus(&querypos,&chrstart,&chrend,/*hit*/hit5,/*gmap*/hit3)) > 0U) { + return /* end3 */ (chrpos - querypos + querylength3) - /* start5 */ chrstart; + } else { + /* Still no overlap */ + return (hit3->genomicstart - hit5->genomicend) + querylength5 + querylength3; + } + + } else if (hit5->plusp == false && hit3->plusp == false) { + /* Have 3-end..start and 5-end..start */ + debug10(printf("4 minus: comparing hit3->genomicstart %u <= hit5->genomicend %u\n", + hit3->genomicstart - hit3->chroffset,hit5->genomicend - hit5->chroffset)); + if (hit3->genomicstart <= hit5->genomicend) { + /* No overlap */ + return (hit5->genomicend - hit3->genomicstart) + querylength5 + querylength3; + } else if ((chrpos = overlap5_gmap_minus(&querypos,&chrstart,&chrend,/*hit*/hit5,/*gmap*/hit3)) > 0U) { + return /* start5 */ chrstart - /* end3 */ (chrpos + querypos - querylength3) - 1; + } else { + /* Still no overlap */ + return (hit5->genomicend - hit3->genomicstart) + querylength5 + querylength3; + } + } else { + return pair_insert_length_unpaired(hit5,hit3); + } + + } else if (hit5->plusp == true && hit3->plusp == false) { + /* Have 5-start..end and 3-end..start */ + /* or 3-end..start and 5-start..end */ + + if (hit5->genomicend < hit3->genomicend) { + return (hit3->genomicend - hit5->genomicend) + querylength5 + querylength3; + } else if (hit3->genomicstart < hit5->genomicstart) { + return (hit5->genomicstart - hit3->genomicstart) + querylength5 + querylength3; + } else { + return pair_insert_length_unpaired(hit5,hit3); + } + + } else if (hit5->plusp == false && hit3->plusp == true) { + /* Have 5-end..start and 3-start..end */ + /* or 3-start..end and 5-end..start */ + + if (hit5->genomicstart < hit3->genomicstart) { + return (hit3->genomicstart - hit5->genomicstart) + querylength5 + querylength3; + } else if (hit3->genomicend < hit5->genomicend) { + return (hit5->genomicend - hit3->genomicend) + querylength5 + querylength3; + } else { + return pair_insert_length_unpaired(hit5,hit3); + } + + } else if (hit5->plusp == true) { + /* Concordant directions on same chromosome (plus) */ + debug10(printf("Concordant on plus strand\n")); + /* Have 5-start..end and 3-start..end */ + if (hit5->genomicend < hit3->genomicstart) { + /* No overlap */ + return (hit3->genomicstart - hit5->genomicend) + querylength5 + querylength3; + } else { + return pair_insert_length(hit5,hit3); + } + + + } else { + /* Concordant directions on same chromosome (minus) */ + debug10(printf("Concordant on minus strand\n")); + /* Have 3-end..start and 5-end..start */ + if (hit3->genomicstart < hit5->genomicend) { + /* No overlap */ + return (hit5->genomicend - hit3->genomicstart) + querylength5 + querylength3; + } else { + return pair_insert_length(hit5,hit3); + } + } +} + + +List_T +Stage3pair_resolve_insides (List_T hitpairlist, char *queryuc_ptr_5, char *queryuc_ptr_3, + Compress_T query5_compress_fwd, Compress_T query5_compress_rev, + Compress_T query3_compress_fwd, Compress_T query3_compress_rev, + Pairpool_T pairpool,Dynprog_T dynprogL, Dynprog_T dynprogM, Dynprog_T dynprogR, + Oligoindex_array_T oligoindices_minor, Diagpool_T diagpool, Cellpool_T cellpool) { + List_T result = NULL, p; + Stage3pair_T stage3pair; + T hit5, hit3; + int querylength5, querylength3; + int genestrand; + bool changep; + + for (p = hitpairlist; p != NULL; p = List_next(p)) { + stage3pair = (Stage3pair_T) List_head(p); + hit5 = stage3pair->hit5; + hit3 = stage3pair->hit3; + querylength5 = hit5->querylength; + querylength3 = hit3->querylength; + genestrand = stage3pair->genestrand; + + if (hit5->plusp == true && hit3->plusp == true) { + changep = resolve_inside_general_splice_plus(&hit5,&hit3,&stage3pair->private5p,&stage3pair->private3p, + query5_compress_fwd,query5_compress_rev, + query3_compress_fwd,query3_compress_rev, + queryuc_ptr_5,queryuc_ptr_3,querylength5,querylength3, + genestrand,pairpool,dynprogL,dynprogM,dynprogR, + oligoindices_minor,diagpool,cellpool); + } else if (hit5->plusp == false && hit3->plusp == false) { + changep = resolve_inside_general_splice_minus(&hit5,&hit3,&stage3pair->private5p,&stage3pair->private3p, + query5_compress_fwd,query5_compress_rev, + query3_compress_fwd,query3_compress_rev, + queryuc_ptr_5,queryuc_ptr_3,querylength5,querylength3, + genestrand,pairpool,dynprogL,dynprogM,dynprogR, + oligoindices_minor,diagpool,cellpool); + } else { + changep = false; + } + + if (changep == true) { + stage3pair->hit5 = hit5; + stage3pair->hit3 = hit3; + stage3pair->insertlength = compute_insertlength(stage3pair); + + /* Rest of this code is taken from the bottom of Stage3pair_new */ + + stage3pair->score = hit5->score + hit3->score /* + unresolved_amb_length */; + + stage3pair->nmatches_posttrim = hit5->nmatches_posttrim + hit3->nmatches_posttrim; + stage3pair->nmatches = hit5->nmatches + hit3->nmatches /*- unresolved_amb_length -- not available*/; + /* stage3pair->overlap_known_gene_p = false; -- initialized later when resolving multimappers */ + stage3pair->tally = -1L; + + stage3pair->low = (hit5->low < hit3->low) ? hit5->low : hit3->low; + stage3pair->high = (hit5->high > hit3->high) ? hit5->high : hit3->high; + +#if 0 + if (stage3pair->low > stage3pair->high) { + fprintf(stderr,"stage3pair->low %u > stage3pair->high %u, hit5->chrnum %d\n", + stage3pair->low - stage3pair->chroffset,stage3pair->high - stage3pair->chroffset,hit5->chrnum); + abort(); + } +#endif + + if (hit5->chrnum == 0 || hit3->chrnum == 0) { + stage3pair->outerlength = querylength5 + querylength3; + } else { + stage3pair->outerlength = stage3pair->high - stage3pair->low; + } + + stage3pair->nsplices = hit5->nsplices + hit3->nsplices; + + debug0(printf("Revised new pair %p from %p and %p with private %d, %d\n", + stage3pair,hit5,hit3,stage3pair->private5p,stage3pair->private3p)); + debug0(printf(" hittypes %s and %s\n",hittype_string(hit5->hittype),hittype_string(hit3->hittype))); + debug0(printf(" sensedirs %d and %d\n",hit5->sensedir,hit3->sensedir)); + debug0(printf(" chrpos %u..%u and %u..%u\n", + hit5->genomicstart - hit5->chroffset,hit5->genomicend - hit5->chroffset, + hit3->genomicstart - hit3->chroffset,hit3->genomicend - hit3->chroffset)); + + if (hit5->circularpos < 0 && hit3->circularpos < 0) { + stage3pair->circularp = false; + } else { + stage3pair->circularp = true; + } + + /* Fixing insertlength for circular pairs */ + if (stage3pair->insertlength > hit5->chrlength) { + stage3pair->insertlength -= hit5->chrlength; + } + + /* Note: the new hit5 or hit3 is guaranteed to have private5p or private3p set to true, respectively */ + if (hit5->circularalias == +1) { + debug0(printf("Unaliasing 5' end\n")); + if (stage3pair->private5p == false) { + stage3pair->hit5 = Stage3end_copy(hit5); + stage3pair->private5p = true; + } + unalias_circular(stage3pair->hit5); + } + + if (hit3->circularalias == +1) { + debug0(printf("Unaliasing 3' end\n")); + if (stage3pair->private3p == false) { + stage3pair->hit3 = Stage3end_copy(hit3); + stage3pair->private3p = true; + } + unalias_circular(stage3pair->hit3); + } + } - return 0; + result = List_push(result,(void *) stage3pair); + } + + List_free(&hitpairlist); + return result; } -static Chrpos_T -overlap3_gmap_minus (int *querypos, Chrpos_T *genomicstart, Chrpos_T *genomicend, - Stage3end_T hit3, Stage3end_T gmap) { - Chrpos_T chrpos; - Substring_T substring; - List_T p; - debug10(printf("Entered overlap3_gmap_minus with gmap %d..%d\n", - gmap->pairarray[0].querypos,gmap->pairarray[gmap->npairs - 1].querypos)); - for (p = hit3->substrings_LtoH; p != NULL; p = List_next(p)) { - substring = (Substring_T) List_head(p); - if (Substring_ambiguous_p(substring) == false) { - *genomicstart = Substring_alignstart_chr(substring); - *genomicend = Substring_alignend_chr(substring); - if ((chrpos = Pair_binary_search_descending(&(*querypos),/*lowi*/0,/*highi*/gmap->npairs,gmap->pairarray, - *genomicstart,*genomicend)) > 0) { - return chrpos; - } + +#if 0 +List_T +Stage3end_filter_bymatch (List_T hitlist) { + List_T filtered = NULL, p; + T hit; + int min_nmismatches_whole = 1000; + + for (p = hitlist; p != NULL; p = p->rest) { + hit = (T) p->first; + if (hit->nmismatches_whole < min_nmismatches_whole) { + min_nmismatches_whole = hit->nmismatches_whole; } } - return 0; + for (p = hitlist; p != NULL; p = p->rest) { + hit = (T) p->first; + if (hit->nmismatches_whole == min_nmismatches_whole) { + filtered = List_push(filtered,hit); + } else { + Stage3end_free(&hit); + } + } + List_free(&hitlist); + + return filtered; } +#endif /* Should not set ambiguous flag in substrings, because resolution of @@ -13593,7 +14908,7 @@ static void resolve_inside_ambiguous_splice_plus (int *unresolved_amb_length, int *amb_resolve_5, int *amb_resolve_3, int *amb_status_inside, T hit5, T hit3, int querylength5, int querylength3) { - int insertlength; + Chrpos_T insertlength; Univcoord_T genomicstart, genomicend; int nbingo, bingoi5, bingoi3; int nbest, besti5, besti3, i, j; @@ -13607,18 +14922,24 @@ *unresolved_amb_length = 0; + debug9(printf("resolve plus: hit5 %s and hit3 %s\n", + hittype_string(hit5->hittype),hittype_string(hit3->hittype))); if (hit5->hittype == GMAP) { substring5 = (Substring_T) NULL; + debug9(printf("hit5 query bounds: %d..%d\n",Stage3end_gmap_querystart(hit5),Stage3end_gmap_queryend(hit5))); } else { substring5 = (Substring_T) List_head(hit5->substrings_Nto1); + debug9(printf("hit5 query bounds: %d..%d\n",Stage3end_substrings_querystart(hit5),Stage3end_substrings_queryend(hit5))); + debug9(printf("hit5 ambiguous_p %d\n",Substring_ambiguous_p(substring5))); } if (hit3->hittype == GMAP) { substring3 = (Substring_T) NULL; + debug9(printf("hit3 query bounds: %d..%d\n",Stage3end_gmap_querystart(hit3),Stage3end_gmap_queryend(hit3))); } else { substring3 = (Substring_T) List_head(hit3->substrings_1toN); + debug9(printf("hit3 query bounds: %d..%d\n",Stage3end_substrings_querystart(hit3),Stage3end_substrings_queryend(hit3))); + debug9(printf("hit3 ambiguous_p %d\n",Substring_ambiguous_p(substring3))); } - debug9(printf("resolve plus: hit5 %s and hit3 %s\n", - hittype_string(hit5->hittype),hittype_string(hit3->hittype))); if (substring5 != NULL && Substring_ambiguous_p(substring5) == true && substring3 != NULL && Substring_ambiguous_p(substring3) == true) { @@ -13801,7 +15122,7 @@ static void resolve_inside_ambiguous_splice_minus (int *unresolved_amb_length, int *amb_resolve_5, int *amb_resolve_3, int *amb_status_inside, T hit5, T hit3, int querylength5, int querylength3) { - int insertlength; + Chrpos_T insertlength; Univcoord_T genomicstart, genomicend; int nbingo, bingoi5, bingoi3; int nbest, besti5, besti3, i, j; @@ -13812,20 +15133,25 @@ int *end_amb_nmismatches, *start_amb_nmismatches; int end_amb_length_5, start_amb_length_3; + *unresolved_amb_length = 0; debug9(printf("resolve minus: hit5 %s and hit3 %s\n", hittype_string(hit5->hittype),hittype_string(hit3->hittype))); if (hit5->hittype == GMAP) { substring5 = (Substring_T) NULL; + debug9(printf("hit5 query bounds: %d..%d\n",Stage3end_gmap_querystart(hit5),Stage3end_gmap_queryend(hit5))); } else { substring5 = (Substring_T) List_head(hit5->substrings_Nto1); + debug9(printf("hit5 query bounds: %d..%d\n",Stage3end_substrings_querystart(hit5),Stage3end_substrings_queryend(hit5))); debug9(printf("hit5 ambiguous_p %d\n",Substring_ambiguous_p(substring5))); } if (hit3->hittype == GMAP) { substring3 = (Substring_T) NULL; + debug9(printf("hit3 query bounds: %d..%d\n",Stage3end_gmap_querystart(hit3),Stage3end_gmap_queryend(hit3))); } else { substring3 = (Substring_T) List_head(hit3->substrings_1toN); + debug9(printf("hit3 query bounds: %d..%d\n",Stage3end_substrings_querystart(hit3),Stage3end_substrings_queryend(hit3))); debug9(printf("hit3 ambiguous_p %d\n",Substring_ambiguous_p(substring3))); } @@ -14006,7 +15332,6 @@ debug9(printf("\n")); } - return; } @@ -14040,151 +15365,6 @@ return; } - -static int -compute_insertlength (Stage3pair_T this) { - T hit5, hit3; - Chrpos_T chrstart, chrend, chrpos; - int querypos; - int querylength5, querylength3; - - - hit5 = this->hit5; - hit3 = this->hit3; - querylength5 = hit5->querylength; - querylength3 = hit3->querylength; - - if (hit5->hittype == GMAP && hit3->hittype == GMAP) { - debug10(printf("Got hit5 and hit3 both of type GMAP\n")); - - /* Do not try to resolve ambiguity on inside of concordant ends */ - if (hit5->plusp == true && hit3->plusp == true) { - return (hit3->genomicstart - hit5->genomicend) + querylength5 + querylength3; - } else if (hit5->plusp == false && hit3->plusp == false) { - return (hit5->genomicend - hit3->genomicstart) + querylength5 + querylength3; - } else { - return pair_insert_length_unpaired(hit5,hit3); - } - - } else if (hit5->hittype == GMAP) { - debug10(printf("Got hit5 of type GMAP\n")); - if (hit5->plusp == true && hit3->plusp == true) { - /* Have 5-start..end and 3-start..end */ - debug10(printf("1 plus: comparing hit5->genomicend %u <= hit3->genomicstart %u\n", - hit5->genomicend - hit5->chroffset,hit3->genomicstart - hit3->chroffset)); - - if (hit5->genomicend <= hit3->genomicstart) { - /* No overlap */ - return (hit3->genomicstart - hit5->genomicend) + querylength5 + querylength3; - } else if ((chrpos = overlap3_gmap_plus(&querypos,&chrstart,&chrend,/*hit*/hit3,/*gmap*/hit5)) > 0U) { - return /* end3 */ chrend - /* start5 */ (chrpos - querypos); - } else { - /* Still no overlap */ - return (hit3->genomicstart - hit5->genomicend) + querylength5 + querylength3; - } - - } else if (hit5->plusp == false && hit3->plusp == false) { - /* Have 3-end..start and 5-end..start */ - debug10(printf("2 minus: comparing hit3->genomicstart %u <= hit5->genomicend %u\n", - hit3->genomicstart - hit3->chroffset,hit5->genomicend - hit5->chroffset)); - - if (hit3->genomicstart <= hit5->genomicend) { - return (hit5->genomicend - hit3->genomicstart) + querylength5 + querylength3; - } else if ((chrpos = overlap3_gmap_minus(&querypos,&chrstart,&chrend,/*hit*/hit3,/*gmap*/hit5)) > 0U) { - return /* start5 */ (chrpos + querypos) - /* end3 */ chrend + 1; - } else { - /* Still no overlap */ - return (hit5->genomicend - hit3->genomicstart) + querylength5 + querylength3; - } - } else { - return pair_insert_length_unpaired(hit5,hit3); - } - - } else if (hit3->hittype == GMAP) { - debug10(printf("Got hit3 of type GMAP\n")); - if (hit5->plusp == true && hit3->plusp == true) { - /* Have 5-start..end and 3-start..end */ - debug10(printf("3 plus: comparing hit5->genomicend %u <= hit3->genomicstart %u\n", - hit5->genomicend - hit5->chroffset,hit3->genomicstart - hit3->chroffset)); - - if (hit5->genomicend <= hit3->genomicstart) { - /* No overlap */ - return (hit3->genomicstart - hit5->genomicend) + querylength5 + querylength3; - } else if ((chrpos = overlap5_gmap_plus(&querypos,&chrstart,&chrend,/*hit*/hit5,/*gmap*/hit3)) > 0U) { - return /* end3 */ (chrpos - querypos + querylength3) - /* start5 */ chrstart; - } else { - /* Still no overlap */ - return (hit3->genomicstart - hit5->genomicend) + querylength5 + querylength3; - } - - } else if (hit5->plusp == false && hit3->plusp == false) { - /* Have 3-end..start and 5-end..start */ - debug10(printf("4 minus: comparing hit3->genomicstart %u <= hit5->genomicend %u\n", - hit3->genomicstart - hit3->chroffset,hit5->genomicend - hit5->chroffset)); - if (hit3->genomicstart <= hit5->genomicend) { - /* No overlap */ - return (hit5->genomicend - hit3->genomicstart) + querylength5 + querylength3; - } else if ((chrpos = overlap5_gmap_minus(&querypos,&chrstart,&chrend,/*hit*/hit5,/*gmap*/hit3)) > 0U) { - return /* start5 */ chrstart - /* end3 */ (chrpos + querypos - querylength3) - 1; - } else { - /* Still no overlap */ - return (hit5->genomicend - hit3->genomicstart) + querylength5 + querylength3; - } - } else { - return pair_insert_length_unpaired(hit5,hit3); - } - - } else if (hit5->plusp == true && hit3->plusp == false) { - /* Have 5-start..end and 3-end..start */ - /* or 3-end..start and 5-start..end */ - - if (hit5->genomicend < hit3->genomicend) { - return (hit3->genomicend - hit5->genomicend) + querylength5 + querylength3; - } else if (hit3->genomicstart < hit5->genomicstart) { - return (hit5->genomicstart - hit3->genomicstart) + querylength5 + querylength3; - } else { - return pair_insert_length_unpaired(hit5,hit3); - } - - } else if (hit5->plusp == false && hit3->plusp == true) { - /* Have 5-end..start and 3-start..end */ - /* or 3-start..end and 5-end..start */ - - if (hit5->genomicstart < hit3->genomicstart) { - return (hit3->genomicstart - hit5->genomicstart) + querylength5 + querylength3; - } else if (hit3->genomicend < hit5->genomicend) { - return (hit5->genomicend - hit3->genomicend) + querylength5 + querylength3; - } else { - return pair_insert_length_unpaired(hit5,hit3); - } - - } else if (hit5->plusp == true) { - /* Concordant directions on same chromosome (plus) */ - debug10(printf("Concordant on plus strand\n")); - /* Have 5-start..end and 3-start..end */ - if (hit5->genomicend < hit3->genomicstart) { - /* No overlap */ - return (hit3->genomicstart - hit5->genomicend) + querylength5 + querylength3; - } else { - return pair_insert_length(hit5,hit3); - } - - - } else { - /* Concordant directions on same chromosome (minus) */ - debug10(printf("Concordant on minus strand\n")); - /* Have 3-end..start and 5-end..start */ - if (hit3->genomicstart < hit5->genomicend) { - /* No overlap */ - return (hit5->genomicend - hit3->genomicstart) + querylength5 + querylength3; - } else { - return pair_insert_length(hit5,hit3); - } - } -} - - - Stage3pair_T Stage3pair_new (T hit5, T hit3, int genestrand, Pairtype_T pairtype, bool private5p, bool private3p, bool expect_concordant_p) { @@ -14201,7 +15381,7 @@ int unresolved_amb_length = 0; /* int found_score = 0; */ bool overreach5p, overreach3p; - int pairmax; + Chrpos_T pairmax; int querylength5 = hit5->querylength; int querylength3 = hit3->querylength; @@ -14948,7 +16128,7 @@ } /* Fixing insertlength for circular pairs */ - if (new->insertlength > (int) hit5->chrlength) { + if (new->insertlength > hit5->chrlength) { new->insertlength -= hit5->chrlength; } @@ -15011,22 +16191,23 @@ Univcoord_T x_hit3_high, x_hit3_low, y_hit3_high, y_hit3_low; Univcoord_T x_low, x_high, y_low, y_high; - debug8(printf(" Comparing (%s, %s-%s): %p, %u..%u|%u..%u (dir = %d), circularalias %d|%d, nmatches: %d (%d posttrim), amb_lengths %d and %d, sensedirs %d-%d\n", + debug8(printf(" Comparing (%s, %s-%s): %p, %u..%u|%u..%u (dir = %d), circularalias %d|%d, nmatches: %d (%d posttrim), amb_lengths %d and %d, sensedirs %d-%d, GMAP splice prob %f+%f\n", Pairtype_string(x->pairtype),hittype_string(x->hit5->hittype), hittype_string(x->hit3->hittype),x, x->hit5->low - x->hit5->chroffset,x->hit5->high - x->hit5->chroffset, x->hit3->low - x->hit3->chroffset,x->hit3->high - x->hit3->chroffset, x->dir,x->hit5->circularalias,x->hit3->circularalias,x->nmatches,x->nmatches_posttrim, - amb_length(x->hit5),amb_length(x->hit3),x->hit5->sensedir,x->hit3->sensedir)); + amb_length(x->hit5),amb_length(x->hit3),x->hit5->sensedir,x->hit3->sensedir, + x->hit5->gmap_avg_splice_score,x->hit3->gmap_avg_splice_score)); - debug8(printf(" with (%s, %s-%s): %p, %u..%u|%u..%u (dir = %d), circularalias %d|%d, nmatches: %d (%d posttrim), amb_lengths %d and %d, sensedirs %d-%d\n", + debug8(printf(" with (%s, %s-%s): %p, %u..%u|%u..%u (dir = %d), circularalias %d|%d, nmatches: %d (%d posttrim), amb_lengths %d and %d, sensedirs %d-%d, GMAP splice prob %f+%f\n", Pairtype_string(y->pairtype),hittype_string(y->hit5->hittype), hittype_string(y->hit3->hittype),y, y->hit5->low - y->hit5->chroffset,y->hit5->high - y->hit5->chroffset, y->hit3->low - y->hit3->chroffset,y->hit3->high - y->hit3->chroffset, y->dir,y->hit5->circularalias,y->hit3->circularalias,y->nmatches,y->nmatches_posttrim, - amb_length(y->hit5),amb_length(y->hit3),y->hit5->sensedir,y->hit3->sensedir)); - + amb_length(y->hit5),amb_length(y->hit3),y->hit5->sensedir,y->hit3->sensedir, + y->hit5->gmap_avg_splice_score,y->hit3->gmap_avg_splice_score)); x_hit5_low = normalize_coord(x->hit5->low,x->hit5->circularalias,x->hit5->chrlength); x_hit5_high = normalize_coord(x->hit5->high,x->hit5->circularalias,x->hit5->chrlength); @@ -15194,6 +16375,7 @@ return +1; #endif +#if 0 } else if (x->sense_consistent_p == true) { if ((x->hit5->sensedir != 0 || x->hit3->sensedir != 0) && (y->hit5->sensedir == 0 && y->hit3->sensedir == 0)) { @@ -15204,6 +16386,15 @@ } else { return 0; } +#endif + + } else if (x->hit5->gmap_avg_splice_score + x->hit3->gmap_avg_splice_score > + y->hit5->gmap_avg_splice_score + y->hit3->gmap_avg_splice_score) { + return -1; + + } else if (y->hit5->gmap_avg_splice_score + y->hit3->gmap_avg_splice_score > + x->hit5->gmap_avg_splice_score + x->hit3->gmap_avg_splice_score) { + return +1; } else { return 0; @@ -15657,6 +16848,7 @@ hitpair_goodness_cmp (bool *equalp, Stage3pair_T hitpair, Stage3pair_T best_hitpair, bool finalp) { double prob1, prob2; + Chrpos_T total_querylength, best_total_querylength; #if 0 int hitpair_nmatches, best_hitpair_nmatches; @@ -15974,12 +17166,12 @@ } /* Overlapping ends worse than separate ends */ - if (hitpair->insertlength <= hitpair->hit5->querylength + hitpair->hit3->querylength && - best_hitpair->insertlength > best_hitpair->hit5->querylength + best_hitpair->hit3->querylength) { + total_querylength = (Chrpos_T) (hitpair->hit5->querylength + hitpair->hit3->querylength); + best_total_querylength = (Chrpos_T) (best_hitpair->hit5->querylength + best_hitpair->hit3->querylength); + if (hitpair->insertlength <= total_querylength && best_hitpair->insertlength > best_total_querylength) { debug8(printf(" => loses by being overlapping\n")); return -1; - } else if (hitpair->insertlength > hitpair->hit5->querylength + hitpair->hit3->querylength && - best_hitpair->insertlength <= best_hitpair->hit5->querylength + best_hitpair->hit3->querylength) { + } else if (hitpair->insertlength > total_querylength && best_hitpair->insertlength <= best_total_querylength) { debug8(printf(" => wins by being separate\n")); return +1; @@ -16229,13 +17421,14 @@ debug8( for (i = 0; i < n; i++) { hitpair = hitpairs[i]; - printf(" Initial %d (%s, %s-%s): %p, %u..%u|%u..%u (dir = %d), circularalias %d|%d, nmatches: %d (%d posttrim), amb_lengths %d and %d, sensedirs %d and %d\n", + printf(" Initial %d (%s, %s-%s): %p, %u..%u|%u..%u (dir = %d), circularalias %d|%d, nmatches: %d (%d posttrim), amb_lengths %d and %d, sensedirs %d and %d, GMAP splice probs %f and %f\n", i,Pairtype_string(hitpair->pairtype),hittype_string(hitpair->hit5->hittype), hittype_string(hitpair->hit3->hittype),hitpair, hitpair->hit5->low - hitpair->hit5->chroffset,hitpair->hit5->high - hitpair->hit5->chroffset, hitpair->hit3->low - hitpair->hit3->chroffset,hitpair->hit3->high - hitpair->hit3->chroffset, hitpair->dir,hitpair->hit5->circularalias,hitpair->hit3->circularalias,hitpair->nmatches,hitpair->nmatches_posttrim, - amb_length(hitpair->hit5),amb_length(hitpair->hit3),hitpair->hit5->sensedir,hitpair->hit3->sensedir); + amb_length(hitpair->hit5),amb_length(hitpair->hit3),hitpair->hit5->sensedir,hitpair->hit3->sensedir, + hitpair->hit5->gmap_avg_splice_score,hitpair->hit3->gmap_avg_splice_score); } ); @@ -16332,7 +17525,7 @@ for (p = hitpairlist; p != NULL; p = List_next(p)) { hitpair = (Stage3pair_T) List_head(p); - if (hitpair->insertlength <= hitpair->hit5->querylength + hitpair->hit3->querylength) { + if (hitpair->insertlength <= (Chrpos_T) (hitpair->hit5->querylength + hitpair->hit3->querylength)) { overlapping = List_push(overlapping,(void *) hitpair); } else { separate = List_push(separate,(void *) hitpair); @@ -16794,6 +17987,9 @@ int trim_left_5 = querylength5, trim_right_5 = querylength5, trim_left_3 = querylength3, trim_right_3 = querylength3, trim_left, trim_right; int nindelbreaks, nbadintrons; + int min_badintrons_5, min_badintrons_3; + int nleft; + #if 0 /* DISTANT_SPLICE_SPECIAL */ bool shortdistance_p = false; @@ -16817,7 +18013,7 @@ hit5 = hitpair->hit5; hit3 = hitpair->hit3; - debug6(printf("hit5 %u..%u type %s, nsegments %d, trim_left: %d%s, trim_right %d%s, start_ambig %d, end_ambig %d. hit3 %u..%u type %s, nsegments %d, trim_left %d%s, trim_right %d%s, start_ambig %d, end_ambig %d, sensedirs %d and %d.\n", + debug6(printf("hit5 %u..%u type %s, nsegments %d, trim_left: %d%s, trim_right %d%s, start_ambig %d, end_ambig %d. hit3 %u..%u type %s, nsegments %d, trim_left %d%s, trim_right %d%s, start_ambig %d, end_ambig %d, sensedirs %d and %d, gmap_splice_scores %f and %f.\n", hit5->genomicstart - hit5->chroffset,hit5->genomicend - hit5->chroffset,hittype_string(hit5->hittype), hit5->nsegments,hit5->trim_left,hit5->trim_left_splicep ? " (splice)" : "", hit5->trim_right,hit5->trim_right_splicep ? " (splice)" : "", @@ -16825,7 +18021,7 @@ hit3->genomicstart - hit3->chroffset,hit3->genomicend - hit3->chroffset,hittype_string(hit3->hittype), hit3->nsegments,hit3->trim_left,hit3->trim_left_splicep ? " (splice)" : "", hit3->trim_right,hit3->trim_right_splicep ? " (splice)" : "", - start_amb_length(hit3),end_amb_length(hit3),hit5->sensedir,hit3->sensedir)); + start_amb_length(hit3),end_amb_length(hit3),hit5->sensedir,hit3->sensedir,hit5->gmap_avg_splice_score,hit3->gmap_avg_splice_score)); if (hit5->hittype == TERMINAL) { /* Don't allow terminals to set trims */ @@ -16895,6 +18091,8 @@ debug6(printf("overall 3': trim_left %d, trim_right %d\n",trim_left_3,trim_right_3)); + min_badintrons_5 = querylength5; + min_badintrons_3 = querylength3; for (p = hitpairlist; p != NULL; p = p->rest) { hitpair = (Stage3pair_T) p->first; hit5 = hitpair->hit5; @@ -16931,6 +18129,12 @@ debug6(printf(" add nmismatches %d.",Pair_nmismatches_region(&nindelbreaks,&nbadintrons,hit5->pairarray,hit5->npairs, trim_left_5,trim_right_5,start_amb_length(hit5),end_amb_length(hit5), hit5->querylength))); + debug6(printf(" nbadintrons %d.",nbadintrons)); + hit5->gmap_nbadintrons = nbadintrons; + if (nbadintrons < min_badintrons_5) { + min_badintrons_5 = nbadintrons; + } + if (start_amb_length(hit5) > 0) { debug6(printf(" add penalty for start amb %d.",amb_penalty)); hit5->score_eventrim += amb_penalty; @@ -16952,6 +18156,7 @@ } else { hit5->score_eventrim = 0; /* was hit5->penalties */ + min_badintrons_5 = 0; debug6(printf("score 5' OTHER:")); for (q = hit5->substrings_1toN; q != NULL; q = List_next(q)) { @@ -17018,6 +18223,11 @@ debug6(printf(" add nmismatches %d.",Pair_nmismatches_region(&nindelbreaks,&nbadintrons,hit3->pairarray,hit3->npairs, trim_left_3,trim_right_3,start_amb_length(hit3),end_amb_length(hit3), hit3->querylength))); + debug6(printf(" nbadintrons %d.",nbadintrons)); + hit3->gmap_nbadintrons = nbadintrons; + if (nbadintrons < min_badintrons_3) { + min_badintrons_3 = nbadintrons; + } if (start_amb_length(hit3) > 0) { debug6(printf(" add penalty for start amb %d.",amb_penalty)); @@ -17051,6 +18261,7 @@ } else { hit3->score_eventrim = 0; /* was hit3->penalties */ + min_badintrons_3 = 0; debug6(printf("score 3' OTHER:")); for (q = hit3->substrings_1toN; q != NULL; q = List_next(q)) { @@ -17238,8 +18449,44 @@ optimal = List_push(optimal,hitpair); } } - } + /* Filter GMAP hits with bad introns */ + debug6(printf("Filtering GMAP hits with bad introns: mininum was %d and %d\n", + min_badintrons_5,min_badintrons_3)); + + nleft = 0; + for (p = optimal; p != NULL; p = p->rest) { + hitpair = (Stage3pair_T) p->first; + + if (hitpair->hit5->gmap_nbadintrons == min_badintrons_5 && hitpair->hit3->gmap_nbadintrons == min_badintrons_3) { + nleft += 1; + } else { + /* Candidate for elimination */ + } + } + + debug6(printf("If we eliminated based on bad introns, would have %d left\n",nleft)); + if (nleft > 0) { + /* Proceed to eliminate based on bad introns */ + List_free(&hitpairlist); + hitpairlist = optimal; + optimal = (List_T) NULL; + + for (p = hitpairlist; p != NULL; p = p->rest) { + hitpair = (Stage3pair_T) p->first; + if (hitpair->hit5->gmap_nbadintrons > min_badintrons_5 || hitpair->hit3->gmap_nbadintrons > min_badintrons_3) { + debug6(printf("Final: Eliminating hit pair %p at %u..%u|%u..%u with nbadintrons %d+%d\n", + hitpair,hitpair->hit5->low - hitpair->hit5->chroffset,hitpair->hit5->high - hitpair->hit5->chroffset, + hitpair->hit3->low - hitpair->hit3->chroffset,hitpair->hit3->high - hitpair->hit3->chroffset, + hitpair->hit5->gmap_nbadintrons,hitpair->hit3->gmap_nbadintrons)); + *eliminatedp = true; + Stage3pair_free(&hitpair); + } else { + optimal = List_push(optimal,(void *) hitpair); + } + } + } + } List_free(&hitpairlist); @@ -17642,22 +18889,21 @@ hit5->effective_chrnum,hit5->chrnum,hit3->chrnum)); if ((stage3pair = Stage3pair_new(hit5,hit3,genestrand,/*pairtype*/CONCORDANT, /*private5p*/false,/*private3p*/false,/*expect_concordant_p*/true)) != NULL) { - - debug5(printf("Have new pair with scores %d + %d, compared with new_found_score %d\n",hit5->score,hit3->score,new_found_score)); + debug5(printf("Have new pair with scores %d + %d, compared with new_found_score %d\n",stage2pair->hit5->score,stage2pair->hit3->score,new_found_score)); if (Stage3pair_max_trim(stage3pair) > 18) { /* Don't use terminals to set new_found_score */ debug5(printf("Max trim is %d > 18, so treating as terminals\n",Stage3pair_max_trim(stage3pair))); *terminals = List_push(*terminals,(void *) stage3pair); - } else if (hit5->hittype == GMAP || hit3->hittype == GMAP) { + } else if (stage3pair->hit5->hittype == GMAP || stage3pair->hit3->hittype == GMAP) { /* Don't use GMAP scores to set new_found_score */ hitpairs = List_push(hitpairs,(void *) stage3pair); (*nconcordant)++; - } else if (hit5->score + hit3->score < new_found_score) { + } else if (stage3pair->hit5->score + stage3pair->hit3->score < new_found_score) { /* Don't use frontier_score here, which is the trimmed_score. Use the full score, to motivate stage1hr to find longer alignments */ - new_found_score = hit5->score + hit3->score; - debug5(printf(" => tentatively updating found_score to be %d = %d + %d\n",new_found_score,hit5->score,hit3->score)); + new_found_score = stage3pair->hit5->score + stage3pair->hit3->score; + debug5(printf(" => tentatively updating found_score to be %d = %d + %d\n",new_found_score,stage3pair->hit5->score,stage3pair->hit3->score)); hitpairs = List_push(hitpairs,(void *) stage3pair); (*nconcordant)++; @@ -17763,21 +19009,21 @@ if ((stage3pair = Stage3pair_new(hit5,hit3,genestrand,/*pairtype*/CONCORDANT, /*private5p*/false,/*private3p*/false,/*expect_concordant_p*/true)) != NULL) { - debug5(printf("Have new pair with scores %d + %d, compared with new_found_score %d\n",hit5->score,hit3->score,new_found_score)); + debug5(printf("Have new pair with scores %d + %d, compared with new_found_score %d\n",stage3pair->hit5->score,stage3pair->hit3->score,new_found_score)); if (Stage3pair_max_trim(stage3pair) > 18) { /* Don't use terminals to set new_found_score */ debug5(printf("Max trim is %d > 18, so treating as terminals\n",Stage3pair_max_trim(stage3pair))); *terminals = List_push(*terminals,(void *) stage3pair); - } else if (hit5->hittype == GMAP || hit3->hittype == GMAP) { + } else if (stage3pair->hit5->hittype == GMAP || stage3pair->hit3->hittype == GMAP) { /* Don't use GMAP scores to set new_found_score */ hitpairs = List_push(hitpairs,(void *) stage3pair); (*nconcordant)++; - } else if (hit5->score + hit3->score < new_found_score) { + } else if (stage3pair->hit5->score + stage3pair->hit3->score < new_found_score) { /* Don't use frontier_score here, which is the trimmed_score. Use the full score, to motivate stage1hr to find longer alignments */ - new_found_score = hit5->score + hit3->score; - debug5(printf(" => tentatively updating found_score to be %d = %d + %d\n",new_found_score,hit5->score,hit3->score)); + new_found_score = stage3pair->hit5->score + stage3pair->hit3->score; + debug5(printf(" => tentatively updating found_score to be %d = %d + %d\n",new_found_score,stage3pair->hit5->score,stage3pair->hit3->score)); hitpairs = List_push(hitpairs,(void *) stage3pair); (*nconcordant)++; diff -Nru gmap-2016-11-07/src/stage3hr.h gmap-2017-01-14/src/stage3hr.h --- gmap-2016-11-07/src/stage3hr.h 2016-10-23 23:22:00.000000000 +0000 +++ gmap-2017-01-14/src/stage3hr.h 2016-12-29 16:20:17.000000000 +0000 @@ -1,4 +1,4 @@ -/* $Id: stage3hr.h 199475 2016-10-23 23:21:59Z twu $ */ +/* $Id: stage3hr.h 202031 2016-12-29 16:20:14Z twu $ */ #ifndef STAGE3HR_INCLUDED #define STAGE3HR_INCLUDED @@ -38,8 +38,8 @@ IIT_T genes_iit_in, int *genes_divint_crosstable_in, IIT_T tally_iit_in, int *tally_divint_crosstable_in, IIT_T runlength_iit_in, int *runlength_divint_crosstable_in, - bool distances_observed_p, int pairmax_linear_in, int pairmax_circular_in, - Chrpos_T expected_pairlength, Chrpos_T pairlength_deviation, + bool distances_observed_p, Chrpos_T pairmax_linear_in, Chrpos_T pairmax_circular_in, + Chrpos_T expected_pairlength_in, Chrpos_T pairlength_deviation_in, int maxpeelback_in, int localsplicing_penalty_in, int indel_penalty_middle_in, int antistranded_penalty_in, bool favor_multiexon_p_in, int gmap_min_nconsecutive_in, int end_detail, int subopt_levels_in, @@ -92,9 +92,9 @@ extern int Stage3end_score (T this); extern int +Stage3end_gmap_goodness (T this); +extern int Stage3end_gmap_max_match_length (T this); -extern double -Stage3end_gmap_min_splice_prob (T this); extern int Stage3end_best_score (List_T hits); extern bool @@ -397,7 +397,7 @@ Stage3end_new_gmap (int nmismatches_whole, int nmatches_posttrim, int max_match_length, int ambig_end_length_5, int ambig_end_length_3, Splicetype_T ambig_splicetype_5, Splicetype_T ambig_splicetype_3, - double min_splice_prob, + double avg_splice_score, int goodness, struct Pair_T *pairarray, int npairs, int nsegments, int nintrons, int nindelbreaks, Univcoord_T left, int genomiclength, bool plusp, int genestrand, char *accession, int querylength, @@ -472,6 +472,13 @@ int maxpaths, bool quiet_if_excessive_p, bool invertp, int quality_shift); +extern List_T +Stage3pair_resolve_insides (List_T hitpairlist, char *queryuc_ptr_5, char *queryuc_ptr_3, + Compress_T query5_compress_fwd, Compress_T query5_compress_rev, + Compress_T query3_compress_fwd, Compress_T query3_compress_rev, + Pairpool_T pairpool, Dynprog_T dynprogL, Dynprog_T dynprogM, Dynprog_T dynprogR, + Oligoindex_array_T oligoindices_minor, Diagpool_T diagpool, Cellpool_T cellpool); + extern Stage3pair_T Stage3pair_new (T hit5, T hit3, int genestrand, Pairtype_T pairtype, bool private5p, bool private3p, bool expect_concordant_p); diff -Nru gmap-2016-11-07/src/substring.c gmap-2017-01-14/src/substring.c --- gmap-2016-11-07/src/substring.c 2016-11-07 20:12:20.000000000 +0000 +++ gmap-2017-01-14/src/substring.c 2017-01-13 23:29:59.000000000 +0000 @@ -1,4 +1,4 @@ -static char rcsid[] = "$Id: substring.c 199475 2016-10-23 23:21:59Z twu $"; +static char rcsid[] = "$Id: substring.c 202590 2017-01-13 23:29:58Z twu $"; #ifdef HAVE_CONFIG_H #include #endif @@ -3015,14 +3015,28 @@ return this->splicecoord_A; } +/* Called only by samprint */ Chrpos_T -Substring_chr_splicecoord_D (T this) { - return (Chrpos_T) (this->splicecoord_D - this->chroffset); +Substring_chr_splicecoord_D (T this, char donor_strand) { + if (donor_strand == '+') { + return (Chrpos_T) (this->splicecoord_D - this->chroffset); + } else if (donor_strand == '-') { + return (Chrpos_T) (this->splicecoord_D - this->chroffset + 1); + } else { + abort(); + } } +/* Called only by samprint */ Chrpos_T -Substring_chr_splicecoord_A (T this) { - return (Chrpos_T) (this->splicecoord_A - this->chroffset); +Substring_chr_splicecoord_A (T this, char acceptor_strand) { + if (acceptor_strand == '+') { + return (Chrpos_T) (this->splicecoord_A - this->chroffset + 1); + } else if (acceptor_strand == '-') { + return (Chrpos_T) (this->splicecoord_A - this->chroffset); + } else { + abort(); + } } int @@ -6244,8 +6258,134 @@ ************************************************************************/ List_T -Substring_convert_to_pairs (List_T pairs, T substring, int querylength, Shortread_T queryseq, - int hardclip_low, int hardclip_high, int queryseq_offset) { +Substring_convert_to_pairs (List_T pairs, T substring, char *queryuc_ptr, + Chrpos_T chrlength, Pairpool_T pairpool) { + int querystart, queryend, querypos, i; + Chrpos_T chrpos; + char genome; + + if (substring == NULL) { + return pairs; + } + + debug6(printf("*** Entered Substring_convert_to_pairs with querylength %d\n",querylength)); + + if (substring->plusp == true) { + querystart = substring->querystart; + queryend = substring->queryend; + + /* Pairs are all zero-based, so do not add 1 */ +#if 0 + chrpos = substring->genomicstart_adj + querystart - substring->chroffset /*+ 1U*/; +#else + chrpos = substring->genomicstart + querystart - substring->chroffset /*+ 1U*/; +#endif + + debug6(printf("plus conversion\n")); + debug6(printf("querystart %d, queryend %d, plusp %d\n",querystart,queryend,substring->plusp)); + debug6(printf("alignstart %u, alignend %u\n",substring->alignstart_trim - substring->chroffset, + substring->alignend_trim - substring->chroffset)); + debug6(printf("chrpos %u\n",chrpos)); + + if (substring->genomic_bothdiff == NULL) { + /* Exact match */ + for (i = querystart, querypos = /*queryseq_offset +*/ querystart; i < queryend; i++, querypos++) { + pairs = Pairpool_push(pairs,pairpool,querypos,/*genomepos*/chrpos++, + queryuc_ptr[i],/*comp*/MATCH_COMP,queryuc_ptr[i],/*g_alt*/queryuc_ptr[i],/*dynprogindex*/0); + } + } else if (show_refdiff_p == true) { + for (i = querystart, querypos = /*queryseq_offset +*/ querystart; i < queryend; i++, querypos++) { + if (isupper(genome = substring->genomic_refdiff[i])) { + assert(queryuc_ptr[i] == genome || queryuc_ptr[i] == 'N'); + pairs = Pairpool_push(pairs,pairpool,querypos,/*genomepos*/chrpos++, + queryuc_ptr[i],/*comp*/MATCH_COMP,genome,/*g_alt*/genome,/*dynprogindex*/0); + } else { + assert(queryuc_ptr[i] != toupper(genome)); + pairs = Pairpool_push(pairs,pairpool,querypos,/*genomepos*/chrpos++, + queryuc_ptr[i],/*comp*/MISMATCH_COMP,toupper(genome),/*g_alt*/toupper(genome), + /*dynprogindex*/0); + } + } + } else { + /* printf("querystart %d, queryend %d\n",querystart,queryend); */ + /* printf("seq1 %s\n",queryuc_ptr); */ + /* printf("genome %s\n",substring->genomic_bothdiff); */ + for (i = querystart, querypos = /*queryseq_offset +*/ querystart; i < queryend; i++, querypos++) { + if (isupper(genome = substring->genomic_bothdiff[i])) { + assert(queryuc_ptr[i] == genome || queryuc_ptr[i] == 'N'); + pairs = Pairpool_push(pairs,pairpool,querypos,/*genomepos*/chrpos++, + queryuc_ptr[i],/*comp*/MATCH_COMP,genome,/*g_alt*/genome,/*dynprogindex*/0); + } else { + assert(queryuc_ptr[i] != toupper(genome)); + pairs = Pairpool_push(pairs,pairpool,querypos,/*genomepos*/chrpos++, + queryuc_ptr[i],/*comp*/MISMATCH_COMP,toupper(genome),/*g_alt*/toupper(genome), + /*dynprogindex*/0); + } + } + } + + } else { + querystart = substring->querystart; + queryend = substring->queryend; + + /* For minus, to get 0-based coordinates, subtract 1 */ +#if 0 + chrpos = substring->genomicstart_adj - querystart - substring->chroffset - 1U; +#else + chrpos = substring->genomicstart - querystart - substring->chroffset - 1U; + chrpos = chrlength - chrpos; +#endif + + debug6(printf("minus conversion\n")); + debug6(printf("querystart %d, queryend %d, plusp %d\n",querystart,queryend,substring->plusp)); + debug6(printf("alignstart %u, alignend %u\n",substring->alignstart_trim - substring->chroffset, + substring->alignend_trim - substring->chroffset)); + debug6(printf("chrpos %u\n",chrpos)); + + if (substring->genomic_bothdiff == NULL) { + /* Exact match */ + for (i = querystart, querypos = /*queryseq_offset +*/ querystart; i < queryend; i++, querypos++) { + pairs = Pairpool_push(pairs,pairpool,querypos,/*genomepos*/chrpos++, + queryuc_ptr[i],/*comp*/MATCH_COMP,queryuc_ptr[i],/*g_alt*/queryuc_ptr[i],/*dynprogindex*/0); + } + } else if (show_refdiff_p == true) { + for (i = querystart, querypos = /*queryseq_offset +*/ querystart; i < queryend; i++, querypos++) { + if (isupper(genome = substring->genomic_refdiff[i])) { + assert(queryuc_ptr[i] == genome || queryuc_ptr[i] == 'N'); + pairs = Pairpool_push(pairs,pairpool,querypos,/*genomepos*/chrpos++, + queryuc_ptr[i],/*comp*/MATCH_COMP,genome,/*g_alt*/genome,/*dynprogindex*/0); + } else { + assert(queryuc_ptr[i] != toupper(genome)); + pairs = Pairpool_push(pairs,pairpool,querypos,/*genomepos*/chrpos++, + queryuc_ptr[i],/*comp*/MISMATCH_COMP,toupper(genome),/*g_alt*/toupper(genome), + /*dynprogindex*/0); + } + } + } else { + for (i = querystart, querypos = /*queryseq_offset +*/ querystart; i < queryend; i++, querypos++) { + if (isupper(genome = substring->genomic_bothdiff[i])) { + /* assert(queryuc_ptr[i] == genome || queryuc_ptr[i] == 'N'); */ + pairs = Pairpool_push(pairs,pairpool,querypos,/*genomepos*/chrpos++, + queryuc_ptr[i],/*comp*/MATCH_COMP,genome,/*g_alt*/genome,/*dynprogindex*/0); + } else { + /* assert(queryuc_ptr[i] != toupper(genome)); */ + pairs = Pairpool_push(pairs,pairpool,querypos,/*genomepos*/chrpos++, + queryuc_ptr[i],/*comp*/MISMATCH_COMP,toupper(genome),/*g_alt*/toupper(genome), + /*dynprogindex*/0); + } + } + } + } + + debug6(Pair_dump_list(pairs,true)); + return pairs; +} + + + +List_T +Substring_convert_to_pairs_out (List_T pairs, T substring, int querylength, Shortread_T queryseq, + int hardclip_low, int hardclip_high, int queryseq_offset) { int querystart, queryend, querypos, i; Chrpos_T chrpos; char *seq1; @@ -6383,9 +6523,159 @@ List_T -Substring_add_insertion (List_T pairs, T substringA, T substringB, int querylength, - int insertionlength, Shortread_T queryseq, - int hardclip_low, int hardclip_high, int queryseq_offset) { +Substring_add_insertion (List_T pairs, T substringA, T substringB, + int insertionlength, char *queryuc_ptr, + Pairpool_T pairpool) { + int querystartA, queryendA, querystartB, queryendB, querypos, i; + Chrpos_T chrendA; + + + if (substringA->plusp == true) { + querystartA = substringA->querystart; + queryendA = substringA->queryend; + querystartB = substringB->querystart; + queryendB = substringB->queryend; + + /* Pairs are all zero-based, so do not add 1 */ +#if 0 + chrendA = substringA->genomicstart_adj + queryendA - substringA->chroffset /*+ 1U*/; +#else + chrendA = substringA->genomicstart + queryendA - substringA->chroffset /*+ 1U*/; +#endif + + } else { + querystartA = substringA->querystart; + queryendA = substringA->queryend; + querystartB = substringB->querystart; + queryendB = substringB->queryend; + + /* Pairs are all zero-based, so subtract 1 */ +#if 0 + chrendA = substringA->genomicstart_adj - queryendA - substringA->chroffset - 1U; +#else + chrendA = substringA->genomicstart - queryendA - substringA->chroffset - 1U; +#endif + } + + if (querystartA <= queryendA && querystartB <= queryendB) { + querypos = queryendA /*+ queryseq_offset*/; + i = queryendA; + while (--insertionlength >= 0) { + pairs = Pairpool_push(pairs,pairpool,querypos++,/*genomepos*/chrendA, + queryuc_ptr[i++],/*comp*/INDEL_COMP,' ',/*g_alt*/' ',/*dynprogindex*/0); + } + } + + return pairs; +} + +List_T +Substring_add_deletion (List_T pairs, T substringA, T substringB, + char *deletion, int deletionlength, + Pairpool_T pairpool) { + int querystartA, queryendA, querystartB, queryendB, querypos, k; + Chrpos_T chrendA; + + if (substringA->plusp == true) { + querystartA = substringA->querystart; + queryendA = substringA->queryend; + querystartB = substringB->querystart; + queryendB = substringB->queryend; + + /* Pairs are all zero-based, so do not add 1 */ +#if 0 + chrendA = substringA->genomicstart_adj + queryendA - substringA->chroffset /*+ 1U*/; +#else + chrendA = substringA->genomicstart + queryendA - substringA->chroffset /*+ 1U*/; +#endif + + if (querystartA < queryendA && querystartB < queryendB) { + querypos = queryendA /*+ queryseq_offset*/; + for (k = 0; k < deletionlength; k++) { + pairs = Pairpool_push(pairs,pairpool,querypos,/*genomepos*/chrendA++, + ' ',/*comp*/INDEL_COMP,deletion[k],/*g_alt*/deletion[k], + /*dynprogindex*/0); + } + } + + } else { + querystartA = substringA->querystart; + queryendA = substringA->queryend; + querystartB = substringB->querystart; + queryendB = substringB->queryend; + + /* Pairs are all zero-based, so subtract 1 */ +#if 0 + chrendA = substringA->genomicstart_adj - queryendA - substringA->chroffset - 1U; +#else + chrendA = substringA->genomicstart - queryendA - substringA->chroffset - 1U; +#endif + + if (querystartA <= queryendA && querystartB <= queryendB) { + querypos = queryendA /*+ queryseq_offset*/; + for (k = 0; k < deletionlength; k++) { + pairs = Pairpool_push(pairs,pairpool,querypos,/*genomepos*/chrendA++, + ' ',/*comp*/INDEL_COMP,deletion[k],/*g_alt*/deletion[k], + /*dynprogindex*/0); + } + } + } + + return pairs; +} + +List_T +Substring_add_intron (List_T pairs, T substringA, T substringB, Pairpool_T pairpool) { + int querystartA, queryendA, querystartB, queryendB, querypos; + Chrpos_T chrendA; + + if (substringA->plusp == true) { + querystartA = substringA->querystart; + queryendA = substringA->queryend; + querystartB = substringB->querystart; + queryendB = substringB->queryend; + + /* Pairs are all zero-based, so do not add 1 */ +#if 0 + chrendA = substringA->genomicstart_adj + queryendA - substringA->chroffset /*+ 1U*/; +#else + chrendA = substringA->genomicstart + queryendA - substringA->chroffset /*+ 1U*/; +#endif + + } else { + querystartA = substringA->querystart; + queryendA = substringA->queryend; + querystartB = substringB->querystart; + queryendB = substringB->queryend; + + + /* Pairs are all zero-based, so subtract 1 */ +#if 0 + chrendA = substringA->genomicstart_adj - queryendA - substringA->chroffset - 1U; +#else + chrendA = substringA->genomicstart - queryendA - substringA->chroffset - 1U; +#endif + } + + if (querystartA <= queryendA && querystartB <= queryendB) { + /* Add gapholder */ + /* All we really need for Pair_print_sam is to set gapp to be true */ + querypos = queryendA /*+ queryseq_offset*/; + pairs = Pairpool_push(pairs,pairpool,querypos,/*genomepos*/chrendA, + ' ',/*comp*/FWD_CANONICAL_INTRON_COMP,' ',/*g_alt*/' ', + /*dynprogindex*/0); + } + + return pairs; +} + + + + +List_T +Substring_add_insertion_out (List_T pairs, T substringA, T substringB, int querylength, + int insertionlength, Shortread_T queryseq, + int hardclip_low, int hardclip_high, int queryseq_offset) { int querystartA, queryendA, querystartB, queryendB, querypos, i; Chrpos_T chrendA; char *seq1; @@ -6471,9 +6761,9 @@ List_T -Substring_add_deletion (List_T pairs, T substringA, T substringB, int querylength, - char *deletion, int deletionlength, - int hardclip_low, int hardclip_high, int queryseq_offset) { +Substring_add_deletion_out (List_T pairs, T substringA, T substringB, int querylength, + char *deletion, int deletionlength, + int hardclip_low, int hardclip_high, int queryseq_offset) { int querystartA, queryendA, querystartB, queryendB, querypos, k; Chrpos_T chrendA; @@ -6564,8 +6854,8 @@ List_T -Substring_add_intron (List_T pairs, T substringA, T substringB, int querylength, - int hardclip_low, int hardclip_high, int queryseq_offset) { +Substring_add_intron_out (List_T pairs, T substringA, T substringB, int querylength, + int hardclip_low, int hardclip_high, int queryseq_offset) { int querystartA, queryendA, querystartB, queryendB, querypos; Chrpos_T chrendA; diff -Nru gmap-2016-11-07/src/substring.h gmap-2017-01-14/src/substring.h --- gmap-2016-11-07/src/substring.h 2016-10-23 23:22:00.000000000 +0000 +++ gmap-2017-01-14/src/substring.h 2017-01-13 23:29:59.000000000 +0000 @@ -1,7 +1,11 @@ -/* $Id: substring.h 199475 2016-10-23 23:21:59Z twu $ */ +/* $Id: substring.h 202590 2017-01-13 23:29:58Z twu $ */ #ifndef SUBSTRING_INCLUDED #define SUBSTRING_INCLUDED +typedef enum {GMAP_NOT_APPLICABLE, GMAP_VIA_SUBSTRINGS, GMAP_VIA_SEGMENTS, GMAP_VIA_REGION} GMAP_source_T; +typedef enum {END, INS, DEL, FRAG, DON, ACC, AMB_DON, AMB_ACC, TERM} Endtype_T; +typedef enum {NO_TRIM, PRE_TRIMMED, COMPUTE_TRIM} Trimaction_T; + #include #include "mode.h" #include "genomicpos.h" @@ -14,6 +18,7 @@ #include "iit-read.h" #include "bool.h" #include "pairdef.h" +#include "pairpool.h" #include "filestring.h" #include "junction.h" #include "intlist.h" @@ -26,10 +31,6 @@ #endif -typedef enum {GMAP_NOT_APPLICABLE, GMAP_VIA_SUBSTRINGS, GMAP_VIA_SEGMENTS, GMAP_VIA_REGION} GMAP_source_T; -typedef enum {END, INS, DEL, FRAG, DON, ACC, AMB_DON, AMB_ACC, TERM} Endtype_T; -typedef enum {NO_TRIM, PRE_TRIMMED, COMPUTE_TRIM} Trimaction_T; - extern char * Endtype_string (Endtype_T endtype); @@ -127,9 +128,9 @@ extern Univcoord_T Substring_splicecoord_D (T this); extern Chrpos_T -Substring_chr_splicecoord_D (T this); +Substring_chr_splicecoord_D (T this, char donor_strand); extern Chrpos_T -Substring_chr_splicecoord_A (T this); +Substring_chr_splicecoord_A (T this, char acceptor_strand); extern int Substring_splicesitesD_knowni (T this); extern int @@ -359,21 +360,36 @@ extern int Substring_count_mismatches_region (T this, int trim_left, int trim_right, Compress_T query_compress_fwd, Compress_T query_compress_rev); - extern List_T -Substring_convert_to_pairs (List_T pairs, T substring, int querylength, Shortread_T queryseq, - int hardclip_low, int hardclip_high, int queryseq_offset); +Substring_convert_to_pairs (List_T pairs, T substring, char *queryuc_ptr, + Chrpos_T chrlength, Pairpool_T pairpool); +extern List_T +Substring_convert_to_pairs_out (List_T pairs, T substring, int querylength, Shortread_T queryseq, + int hardclip_low, int hardclip_high, int queryseq_offset); + extern List_T -Substring_add_insertion (List_T pairs, T substringA, T substringB, int querylength, - int insertionlength, Shortread_T queryseq, - int hardclip_low, int hardclip_high, int queryseq_offset); +Substring_add_insertion (List_T pairs, T substringA, T substringB, + int insertionlength, char *queryuc_ptr, + Pairpool_T pairpool); extern List_T -Substring_add_deletion (List_T pairs, T substringA, T substringB, int querylength, +Substring_add_deletion (List_T pairs, T substringA, T substringB, char *deletion, int deletionlength, - int hardclip_low, int hardclip_high, int queryseq_offset); + Pairpool_T pairpool); +extern List_T +Substring_add_intron (List_T pairs, T substringA, T substringB, + Pairpool_T pairpool); + +extern List_T +Substring_add_insertion_out (List_T pairs, T substringA, T substringB, int querylength, + int insertionlength, Shortread_T queryseq, + int hardclip_low, int hardclip_high, int queryseq_offset); +extern List_T +Substring_add_deletion_out (List_T pairs, T substringA, T substringB, int querylength, + char *deletion, int deletionlength, + int hardclip_low, int hardclip_high, int queryseq_offset); extern List_T -Substring_add_intron (List_T pairs, T substringA, T substringB, int querylength, - int hardclip_low, int hardclip_high, int queryseq_offset); +Substring_add_intron_out (List_T pairs, T substringA, T substringB, int querylength, + int hardclip_low, int hardclip_high, int queryseq_offset); #undef T #endif diff -Nru gmap-2016-11-07/src/uniqscan.c gmap-2017-01-14/src/uniqscan.c --- gmap-2016-11-07/src/uniqscan.c 2016-11-08 00:56:52.000000000 +0000 +++ gmap-2017-01-14/src/uniqscan.c 2016-12-29 16:20:18.000000000 +0000 @@ -1,4 +1,4 @@ -static char rcsid[] = "$Id: uniqscan.c 200234 2016-11-08 00:56:52Z twu $"; +static char rcsid[] = "$Id: uniqscan.c 202031 2016-12-29 16:20:14Z twu $"; #ifdef HAVE_CONFIG_H #include #endif @@ -1298,7 +1298,8 @@ splicesites,splicetypes,splicedists,nsplicesites, novelsplicingp,knownsplicingp,/*find_dna_chimeras_p*/false,distances_observed_p, subopt_levels,min_indel_end_matches,max_middle_insertions,max_middle_deletions, - shortsplicedist,shortsplicedist_known,shortsplicedist_novelend,min_intronlength, + shortsplicedist,shortsplicedist_known,shortsplicedist_novelend, + min_intronlength,expected_pairlength,pairlength_deviation, min_distantsplicing_end_matches,min_distantsplicing_identity, nullgap,maxpeelback,maxpeelback_distalmedial, extramaterial_end,extramaterial_paired,gmap_mode, @@ -1338,7 +1339,8 @@ chromosome_iit,nchromosomes,circular_typeint,genes_iit,genes_divint_crosstable, /*tally_iit*/NULL,/*tally_divint_crosstable*/NULL, /*runlength_iit*/NULL,/*runlength_divint_crosstable*/NULL, - distances_observed_p,pairmax_linear,pairmax_circular,expected_pairlength,pairlength_deviation, + distances_observed_p,pairmax_linear,pairmax_circular, + expected_pairlength,pairlength_deviation,maxpeelback, localsplicing_penalty,indel_penalty_middle,antistranded_penalty, favor_multiexon_p,gmap_min_nconsecutive,/*end_detail*/1,subopt_levels, max_middle_insertions,max_middle_deletions, diff -Nru gmap-2016-11-07/tests/Makefile.in gmap-2017-01-14/tests/Makefile.in --- gmap-2016-11-07/tests/Makefile.in 2016-11-08 01:15:36.000000000 +0000 +++ gmap-2017-01-14/tests/Makefile.in 2017-01-13 23:46:50.000000000 +0000 @@ -422,6 +422,7 @@ SET_MAKE = @SET_MAKE@ SHELL = @SHELL@ SIMD_AVX2_CFLAGS = @SIMD_AVX2_CFLAGS@ +SIMD_AVX512_CFLAGS = @SIMD_AVX512_CFLAGS@ SIMD_SSE2_CFLAGS = @SIMD_SSE2_CFLAGS@ SIMD_SSE4_1_CFLAGS = @SIMD_SSE4_1_CFLAGS@ SIMD_SSE4_2_CFLAGS = @SIMD_SSE4_2_CFLAGS@ diff -Nru gmap-2016-11-07/util/gmap_build.pl.in gmap-2017-01-14/util/gmap_build.pl.in --- gmap-2016-11-07/util/gmap_build.pl.in 2016-02-18 17:51:39.000000000 +0000 +++ gmap-2017-01-14/util/gmap_build.pl.in 2017-01-12 20:00:40.000000000 +0000 @@ -45,7 +45,9 @@ 'c|circular=s' => \$circular, # Circular chromosomes '2|altscaffold=s' => \$altscaffold, # File with altscaffold info - 'e|nmessages=s' => \$nmessages # Max number of warnings or messages to print + 'e|nmessages=s' => \$nmessages, # Max number of warnings or messages to print + + 'p|part=s' => \$part # Build in parts ); @@ -149,6 +151,9 @@ #} #my $genome_fasta = join(" ",@quoted); +$dbdir = create_db($destdir,$dbname); +$genomecompfile = "$dbdir/$dbname.genomecomp"; + my $coordsfile = "$destdir/$dbname.coords"; my $fasta_sources = "$destdir/$dbname.sources"; @@ -163,42 +168,49 @@ check_compiler_assumptions(); -$dbdir = create_db($destdir,$dbname); +if (!defined($part) || $part == 1) { -create_genome_version($dbdir,$dbname); + create_genome_version($dbdir,$dbname); -create_coords($mdfile,$fasta_pipe,$gunzip_flag,$circular_flag,$altscaffold_flag,$contigs_mapped_flag,$chrnamefile, - $bindir,$coordsfile,$fasta_sources); -if (!(-s "$coordsfile")) { - die "ERROR: $coordsfile not found"; -} else { - $gmap_process_pipe = make_gmap_process_pipe($fasta_pipe,$gunzip_flag,$bindir,$coordsfile,$fasta_sources); -} + create_coords($mdfile,$fasta_pipe,$gunzip_flag,$circular_flag,$altscaffold_flag,$contigs_mapped_flag,$chrnamefile, + $bindir,$coordsfile,$fasta_sources); + if (!(-s "$coordsfile")) { + die "ERROR: $coordsfile not found"; + } else { + $gmap_process_pipe = make_gmap_process_pipe($fasta_pipe,$gunzip_flag,$bindir,$coordsfile,$fasta_sources); + } -make_contig($nmessages_flag,$chr_order_flag, - $bindir,$dbdir,$dbname,$gmap_process_pipe); + make_contig($nmessages_flag,$chr_order_flag, + $bindir,$dbdir,$dbname,$gmap_process_pipe); -$genomecompfile = compress_genome($nmessages_flag,$bindir,$dbdir,$dbname,$gmap_process_pipe); + compress_genome($nmessages_flag,$bindir,$dbdir,$dbname,$gmap_process_pipe); -unshuffle_genome($bindir,$dbdir,$dbname,$genomecompfile); + unshuffle_genome($bindir,$dbdir,$dbname,$genomecompfile); +} -$index_cmd = "\"$bindir/gmapindex\" -k $kmersize -q $sampling $nmessages_flag -d $dbname -F \"$dbdir\" -D \"$dbdir\""; +if (!defined($part) || $part == 2) { + $index_cmd = "\"$bindir/gmapindex\" -k $kmersize -q $sampling $nmessages_flag -d $dbname -F \"$dbdir\" -D \"$dbdir\""; -if (count_index_offsets($index_cmd,$genomecompfile) == 1) { - $index_cmd .= " -H"; -} + if (count_index_offsets($index_cmd,$genomecompfile) == 1) { + $index_cmd .= " -H"; + } -create_index_offsets($index_cmd,$compression_flag,$genomecompfile); + create_index_offsets($index_cmd,$compression_flag,$genomecompfile); -create_index_positions($index_cmd,$genomecompfile); + create_index_positions($index_cmd,$genomecompfile); +} -if ($sarrayp == 1) { - make_enhanced_suffix_array($bindir,$dbdir,$dbname); +if (!defined($part) || $part == 3) { + if ($sarrayp == 1) { + make_enhanced_suffix_array($bindir,$dbdir,$dbname); + } } +if (!defined($part) || $part == 4) { # install_db($sarrayp); -system("rm -f \"$fasta_sources\""); -system("rm -f \"$coordsfile\""); + system("rm -f \"$fasta_sources\""); + system("rm -f \"$coordsfile\""); +} exit; @@ -216,6 +228,7 @@ my ($destdir, $dbname) = @_; print STDERR "Creating files in directory $destdir/$dbname\n"; + system("mkdir -p \"$destdir\""); system("mkdir -p \"$destdir/$dbname\""); system("mkdir -p \"$destdir/$dbname/$dbname.maps\""); system("chmod 755 \"$destdir/$dbname/$dbname.maps\""); @@ -289,7 +302,6 @@ sub compress_genome { my ($nmessages_flag, $bindir, $dbdir, $dbname, $gmap_process_pipe) = @_; - my $genomecompfile = "$dbdir/$dbname.genomecomp"; my ($cmd, $rc); $cmd = "$gmap_process_pipe | \"$bindir/gmapindex\" $nmessages_flag -d $dbname -F \"$dbdir\" -D \"$dbdir\" -G"; @@ -298,7 +310,7 @@ die "$cmd failed with return code $rc"; } sleep($sleeptime); - return $genomecompfile; + return; } sub unshuffle_genome { diff -Nru gmap-2016-11-07/util/Makefile.in gmap-2017-01-14/util/Makefile.in --- gmap-2016-11-07/util/Makefile.in 2016-11-08 01:15:36.000000000 +0000 +++ gmap-2017-01-14/util/Makefile.in 2017-01-13 23:46:50.000000000 +0000 @@ -263,6 +263,7 @@ SET_MAKE = @SET_MAKE@ SHELL = @SHELL@ SIMD_AVX2_CFLAGS = @SIMD_AVX2_CFLAGS@ +SIMD_AVX512_CFLAGS = @SIMD_AVX512_CFLAGS@ SIMD_SSE2_CFLAGS = @SIMD_SSE2_CFLAGS@ SIMD_SSE4_1_CFLAGS = @SIMD_SSE4_1_CFLAGS@ SIMD_SSE4_2_CFLAGS = @SIMD_SSE4_2_CFLAGS@ diff -Nru gmap-2016-11-07/VERSION gmap-2017-01-14/VERSION --- gmap-2016-11-07/VERSION 2016-11-08 01:00:53.000000000 +0000 +++ gmap-2017-01-14/VERSION 2017-01-13 23:39:49.000000000 +0000 @@ -1 +1 @@ -2016-11-07 \ No newline at end of file +2017-01-14 \ No newline at end of file