diff -Nru zita-resampler-1.6.2/debian/changelog zita-resampler-1.8.0/debian/changelog --- zita-resampler-1.6.2/debian/changelog 2020-03-15 16:07:31.000000000 +0000 +++ zita-resampler-1.8.0/debian/changelog 2021-01-02 21:14:01.000000000 +0000 @@ -1,3 +1,24 @@ +zita-resampler (1.8.0-2) unstable; urgency=medium + + * Fix the SSE2 excluding in d/rules + + -- Dennis Braun Sat, 02 Jan 2021 22:14:01 +0100 + +zita-resampler (1.8.0-1) unstable; urgency=medium + + * New upstream version 1.8.0 + * Refresh makefile patch + * Remove SSE2 patch, (mostly) applied by upstream + * Enable SSE2 only for amd64 and x32 + * Fix spell error in the documentation + * Bump dh-compat to 13 + * Bump S-V to 4.5.1 + * Update d/copyright years + * Remove unneeded build flags in d/rules + * Include architecture.mk instead of calling dpkg-architecture + + -- Dennis Braun Sat, 02 Jan 2021 17:14:38 +0100 + zita-resampler (1.6.2-2) unstable; urgency=medium * Fix FTCBFS (Closes: #950705) Thanks to Helmut Grohne! diff -Nru zita-resampler-1.6.2/debian/control zita-resampler-1.8.0/debian/control --- zita-resampler-1.6.2/debian/control 2020-03-15 11:48:03.000000000 +0000 +++ zita-resampler-1.8.0/debian/control 2021-01-01 21:12:58.000000000 +0000 @@ -6,9 +6,9 @@ Dennis Braun , Jaromír Mikeš Build-Depends: - debhelper-compat (= 12), + debhelper-compat (= 13), libsndfile1-dev -Standards-Version: 4.5.0 +Standards-Version: 4.5.1 Vcs-Git: https://salsa.debian.org/multimedia-team/zita-resampler.git Vcs-Browser: https://salsa.debian.org/multimedia-team/zita-resampler Homepage: https://kokkinizita.linuxaudio.org/linuxaudio/zita-resampler/resampler.html diff -Nru zita-resampler-1.6.2/debian/copyright zita-resampler-1.8.0/debian/copyright --- zita-resampler-1.6.2/debian/copyright 2020-02-03 19:32:46.000000000 +0000 +++ zita-resampler-1.8.0/debian/copyright 2021-01-01 21:13:48.000000000 +0000 @@ -4,13 +4,13 @@ Source: https://kokkinizita.linuxaudio.org/linuxaudio/downloads/index.html Files: * -Copyright: 2006-2018 Fons Adriaensen +Copyright: 2006-2020 Fons Adriaensen License: GPL-3+ Files: debian/* Copyright: 2010-2012 Alessio Treglia 2012 Jaromír Mikeš - 2020 Dennis Braun + 2020-2021 Dennis Braun License: GPL-3+ License: GPL-3+ diff -Nru zita-resampler-1.6.2/debian/patches/01-makefile.patch zita-resampler-1.8.0/debian/patches/01-makefile.patch --- zita-resampler-1.6.2/debian/patches/01-makefile.patch 2020-03-15 16:07:31.000000000 +0000 +++ zita-resampler-1.8.0/debian/patches/01-makefile.patch 2021-01-01 22:55:44.000000000 +0000 @@ -3,18 +3,20 @@ called 'resample' is provided by another package. Author: Alessio Treglia Author: Dennis Braun -Last-Update: 2020-02-10 +Last-Update: 2021-01-01 Forwarded: not-needed ---- a/apps/Makefile -+++ b/apps/Makefile -@@ -22,17 +22,16 @@ +Index: zita-resampler/apps/Makefile +=================================================================== +--- zita-resampler.orig/apps/Makefile ++++ zita-resampler/apps/Makefile +@@ -22,17 +22,16 @@ PREFIX ?= /usr/local BINDIR ?= $(PREFIX)/bin MANDIR ?= /usr/share/man/man1 -VERSION = 1.6.0 -CPPFLAGS += -MMD -MP -DVERSION=\"$(VERSION)\" -+VERSION = 1.6.2 ++VERSION = 1.8.0 +CPPFLAGS += -MMD -MP -DVERSION=\"$(VERSION)\" -I../debian/tmp/usr/include -I../source CXXFLAGS += -O2 -ffast-math -Wall -CXXFLAGS += -march=native @@ -26,16 +28,16 @@ ZRESAMPLE_O = zresample.o audiofile.o dither.o -zresample: LDLIBS += -lzita-resampler -lsndfile -lrt -+zresample: LDLIBS += ../source/libzita-resampler.so.1.6.2 -lsndfile -lrt ++zresample: LDLIBS += ../source/libzita-resampler.so.1.8.0 -lsndfile -lrt zresample: $(ZRESAMPLE_O) $(CXX) $(LDFLAGS) -o $@ $(ZRESAMPLE_O) $(LDLIBS) $(ZRESAMPLE_O): -@@ -40,34 +39,25 @@ +@@ -40,34 +39,25 @@ $(ZRESAMPLE_O): ZRETUNE_O = zretune.o audiofile.o dither.o -zretune: LDLIBS += -lzita-resampler -lsndfile -lrt -+zretune: LDLIBS += ../source/libzita-resampler.so.1.6.2 -lsndfile -lrt ++zretune: LDLIBS += ../source/libzita-resampler.so.1.8.0 -lsndfile -lrt zretune: $(ZRETUNE_O) $(CXX) $(LDFLAGS) -o $@ $(ZRETUNE_O) $(LDLIBS) $(ZRETUNE_O): @@ -73,17 +75,21 @@ - /bin/rm -f *~ *.o *.a *.d *.so *.gz zresample zretune + /bin/rm -f *~ *.o *.a *.d *.so zresample zretune ---- a/source/Makefile -+++ b/source/Makefile -@@ -33,7 +33,6 @@ +Index: zita-resampler/source/Makefile +=================================================================== +--- zita-resampler.orig/source/Makefile ++++ zita-resampler/source/Makefile +@@ -31,9 +31,7 @@ VERSION = $(MAJVERS).$(MINVERS) + DISTDIR = zita-resampler-$(VERSION) CPPFLAGS += -I. -D_REENTRANT -D_POSIX_PTHREAD_SEMANTICS +-CPPFLAGS += -DENABLE_SSE2 CXXFLAGS += -Wall -fPIC -O2 -ffast-math -CXXFLAGS += -march=native LDFLAGS += LDLIBS += -@@ -48,7 +47,7 @@ +@@ -48,7 +46,7 @@ ZITA-RESAMPLER_H = zita-resampler/resamp $(ZITA-RESAMPLER_MIN): $(ZITA-RESAMPLER_O) @@ -92,7 +98,7 @@ $(ZITA-RESAMPLER_O): $(ZITA-RESAMPLER_H) -@@ -58,8 +57,8 @@ +@@ -58,8 +56,8 @@ install: $(ZITA-RESAMPLER_MIN) install -d $(DESTDIR)$(LIBDIR) install -m 644 $(ZITA-RESAMPLER_H) $(DESTDIR)$(INCDIR)/zita-resampler install -m 755 $(ZITA-RESAMPLER_MIN) $(DESTDIR)$(LIBDIR) diff -Nru zita-resampler-1.6.2/debian/patches/fix_spell_errors.patch zita-resampler-1.8.0/debian/patches/fix_spell_errors.patch --- zita-resampler-1.6.2/debian/patches/fix_spell_errors.patch 1970-01-01 00:00:00.000000000 +0000 +++ zita-resampler-1.8.0/debian/patches/fix_spell_errors.patch 2021-01-01 21:09:24.000000000 +0000 @@ -0,0 +1,75 @@ +Description: Fix spell errors +Author: Dennis Braun +Forwarded: not-yet + +Index: zita-resampler-build/apps/zretune.1 +=================================================================== +--- zita-resampler-build.orig/apps/zretune.1 ++++ zita-resampler-build/apps/zretune.1 +@@ -8,7 +8,7 @@ zretune \- resample an audio file in ord + .B zretune + resamples an audio file by a the inverse of a ratio expressed in cents, + without changing the nominal sample rate. The result is to change the +-musical pitch and lenght of the file. Input can be any audio file ++musical pitch and length of the file. Input can be any audio file + readable by the libsndfile library. The output file type is either + WAV, WAVEX, CAF, AIFF or FLAC. + .SH OPTIONS +Index: zita-resampler-build/docs/resampler.html +=================================================================== +--- zita-resampler-build.orig/docs/resampler.html ++++ zita-resampler-build/docs/resampler.html +@@ -261,7 +261,7 @@ input samples at the start and end of th + easy to add such padding, and doing this is left entirely up to the user. +

+

+-The inpsize () member returns the lenght of the FIR filter expressed in ++The inpsize () member returns the length of the FIR filter expressed in + input samples. At least this number of samples is required to produce an output + sample. If k is the value returned by this function, then +

+@@ -368,7 +368,7 @@ can be used. The destructor calls

int   Resampler::setup (unsigned int   fs_inp, unsigned int fs_out, unsigned int nchan, unsigned int hlen);

+

+ Description: Configures the object for a combination of input / output sample rates, number +-of channels, and filter lenght.
If the parameters are OK, creates the filter coefficient tables ++of channels, and filter length.
If the parameters are OK, creates the filter coefficient tables + or re-uses existing ones, allocates some internal resources, and returns via
+ reset (). +

+@@ -381,7 +381,7 @@ with a, b integer and b ≤ 1 +

+ nchan: Number of channels, must not be zero. +

+-hlen: Half the lenght of the filter expressed in samples at the lower of ++hlen: Half the length of the filter expressed in samples at the lower of + input and output rates. This parameter determines the 'quality' as explained + here. For any fixed combination of the other parameters, + cpu load will be roughly proportional to hlen. The valid range is +@@ -402,7 +402,7 @@ even if they are not shared. +

int   VResampler::setup (double ratio, unsigned int nchan, unsigned int hlen);

+

+ Description: Configures the object for a combination of resampling ratio, number of channels, +-and filter lenght.
If the parameters are OK, creates the filter coefficient tables or re-uses ++and filter length.
If the parameters are OK, creates the filter coefficient tables or re-uses + existing ones, allocates some internal resources, and returns via + reset (). +

+@@ -413,7 +413,7 @@ existing ones, allocates some internal r +

+ nchan: Number of channels, must not be zero. +

+-hlen: Half the lenght of the filter expressed in samples at the lower of ++hlen: Half the length of the filter expressed in samples at the lower of + the input and output rates. This parameter determines the 'quality' as explained + here. For any fixed combination of the other parameters, + cpu load will be roughly proportional to hlen. The valid range is +@@ -482,7 +482,7 @@ this number of channels in interleaved f +

+ Description: Accessor. +

+-Returns: If the resampler is configured, the lenght of the ++Returns: If the resampler is configured, the length of the + finite impulse filter expressed in samples at the input sample rate, + or zero otherwise. This value may be used to determine the number of + silence samples to insert at the start and end when resampling e.g. diff -Nru zita-resampler-1.6.2/debian/patches/series zita-resampler-1.8.0/debian/patches/series --- zita-resampler-1.6.2/debian/patches/series 2020-02-03 19:48:27.000000000 +0000 +++ zita-resampler-1.8.0/debian/patches/series 2021-01-01 21:29:58.000000000 +0000 @@ -1,2 +1,2 @@ 01-makefile.patch -zita-resampler-sse.diff +fix_spell_errors.patch diff -Nru zita-resampler-1.6.2/debian/patches/zita-resampler-sse.diff zita-resampler-1.8.0/debian/patches/zita-resampler-sse.diff --- zita-resampler-1.6.2/debian/patches/zita-resampler-sse.diff 2020-02-03 19:48:27.000000000 +0000 +++ zita-resampler-1.8.0/debian/patches/zita-resampler-sse.diff 1970-01-01 00:00:00.000000000 +0000 @@ -1,422 +0,0 @@ -Description: SSE-optimizing resampling of stereo signals. - https://bugs.debian.org/cgi-bin/bugreport.cgi?bug=832095 -Author: Steinar H. Gunderson -Last-Update: 2016-08-29 -Forwarded: yes - ---- a/source/resampler.cc -+++ b/source/resampler.cc -@@ -24,6 +24,10 @@ - #include - #include - -+#ifdef __SSE2__ -+#include -+#endif -+ - - static unsigned int gcd (unsigned int a, unsigned int b) - { -@@ -47,6 +51,118 @@ - return 1; - } - -+#ifdef __SSE2__ -+ -+static inline float calc_mono_sample_sse (unsigned int hl, -+ const float *c1, -+ const float *c2, -+ const float *q1, -+ const float *q2) -+{ -+ unsigned int i; -+ __m128 denorm, s, w1, w2, shuf; -+ -+ denorm = _mm_set1_ps (1e-20f); -+ s = denorm; -+ for (i = 0; i < hl; i += 4) -+ { -+ q2 -= 4; -+ -+ // s += *q1 * c1 [i]; -+ w1 = _mm_loadu_ps (&c1 [i]); -+ s = _mm_add_ps (s, _mm_mul_ps (_mm_loadu_ps (q1), w1)); -+ -+ // s += *q2 * c2 [i]; -+ w2 = _mm_loadu_ps (&c2 [i]); -+ s = _mm_add_ps (s, _mm_mul_ps (_mm_loadu_ps (q2), _mm_shuffle_ps (w2, w2, _MM_SHUFFLE (0, 1, 2, 3)))); -+ -+ q1 += 4; -+ } -+ s = _mm_sub_ps (s, denorm); -+ -+ // Add all the elements of s together into one. Adapted from -+ // http://stackoverflow.com/questions/6996764/fastest-way-to-do-horizontal-float-vector-sum-on-x86 -+ shuf = _mm_shuffle_ps (s, s, _MM_SHUFFLE (2, 3, 0, 1)); -+ s = _mm_add_ps (s, shuf); -+ s = _mm_add_ss (s, _mm_movehl_ps (shuf, s)); -+ return _mm_cvtss_f32 (s); -+} -+ -+// Note: This writes four floats instead of two (the last two are garbage). -+// The caller will need to make sure there is room for all four. -+static inline void calc_stereo_sample_sse (unsigned int hl, -+ const float *c1, -+ const float *c2, -+ const float *q1, -+ const float *q2, -+ float *out_data) -+{ -+ unsigned int i; -+ __m128 denorm, s, w1, w2; -+ -+ denorm = _mm_set1_ps (1e-20f); -+ s = denorm; -+ for (i = 0; i < hl; i += 4) -+ { -+ q2 -= 8; -+ -+ // s += *q1 * c1 [i]; -+ w1 = _mm_loadu_ps (&c1 [i]); -+ s = _mm_add_ps (s, _mm_mul_ps (_mm_loadu_ps (q1), _mm_unpacklo_ps (w1, w1))); -+ s = _mm_add_ps (s, _mm_mul_ps (_mm_loadu_ps (q1 + 4), _mm_unpackhi_ps (w1, w1))); -+ -+ // s += *q2 * c2 [i]; -+ w2 = _mm_loadu_ps (&c2 [i]); -+ s = _mm_add_ps (s, _mm_mul_ps (_mm_loadu_ps (q2 + 4), _mm_shuffle_ps (w2, w2, _MM_SHUFFLE (0, 0, 1, 1)))); -+ s = _mm_add_ps (s, _mm_mul_ps (_mm_loadu_ps (q2), _mm_shuffle_ps (w2, w2, _MM_SHUFFLE (2, 2, 3, 3)))); -+ -+ q1 += 8; -+ } -+ s = _mm_sub_ps (s, denorm); -+ s = _mm_add_ps (s, _mm_shuffle_ps (s, s, _MM_SHUFFLE (1, 0, 3, 2))); -+ -+ _mm_storeu_ps (out_data, s); -+} -+ -+static inline void calc_quad_sample_sse (int hl, -+ int nchan, -+ const float *c1, -+ const float *c2, -+ const float *q1, -+ const float *q2, -+ float *out_data) -+{ -+ int i; -+ __m128 denorm, s, w1, w2; -+ -+ denorm = _mm_set1_ps (1e-20f); -+ s = denorm; -+ for (i = 0; i < hl; i += 4) -+ { -+ q2 -= 4 * nchan; -+ -+ // s += *p1 * _c1 [i]; -+ w1 = _mm_loadu_ps (&c1 [i]); -+ s = _mm_add_ps (s, _mm_mul_ps (_mm_loadu_ps (q1), _mm_shuffle_ps (w1, w1, _MM_SHUFFLE (0, 0, 0, 0)))); -+ s = _mm_add_ps (s, _mm_mul_ps (_mm_loadu_ps (q1 + nchan), _mm_shuffle_ps (w1, w1, _MM_SHUFFLE (1, 1, 1, 1)))); -+ s = _mm_add_ps (s, _mm_mul_ps (_mm_loadu_ps (q1 + 2 * nchan), _mm_shuffle_ps (w1, w1, _MM_SHUFFLE (2, 2, 2, 2)))); -+ s = _mm_add_ps (s, _mm_mul_ps (_mm_loadu_ps (q1 + 3 * nchan), _mm_shuffle_ps (w1, w1, _MM_SHUFFLE (3, 3, 3, 3)))); -+ -+ // s += *p2 * _c2 [i]; -+ w2 = _mm_loadu_ps (&c2 [i]); -+ s = _mm_add_ps (s, _mm_mul_ps (_mm_loadu_ps (q2 + 3 * nchan), _mm_shuffle_ps (w2, w2, _MM_SHUFFLE (0, 0, 0, 0)))); -+ s = _mm_add_ps (s, _mm_mul_ps (_mm_loadu_ps (q2 + 2 * nchan), _mm_shuffle_ps (w2, w2, _MM_SHUFFLE (1, 1, 1, 1)))); -+ s = _mm_add_ps (s, _mm_mul_ps (_mm_loadu_ps (q2 + nchan), _mm_shuffle_ps (w2, w2, _MM_SHUFFLE (2, 2, 2, 2)))); -+ s = _mm_add_ps (s, _mm_mul_ps (_mm_loadu_ps (q2), _mm_shuffle_ps (w2, w2, _MM_SHUFFLE (3, 3, 3, 3)))); -+ -+ q1 += 4 * nchan; -+ } -+ s = _mm_sub_ps (s, denorm); -+ -+ _mm_storeu_ps (out_data, s); -+} -+#endif -+ - - Resampler::Resampler (void) : - _table (0), -@@ -213,18 +329,42 @@ - { - float *c1 = _table->_ctab + hl * ph; - float *c2 = _table->_ctab + hl * (np - ph); -- for (c = 0; c < _nchan; c++) -+#ifdef __SSE2__ -+ if ((hl % 4) == 0 && _nchan == 1) -+ { -+ *out_data++ = calc_mono_sample_sse (hl, c1, c2, p1, p2); -+ } -+ else if ((hl % 4) == 0 && _nchan == 2) - { -- float *q1 = p1 + c; -- float *q2 = p2 + c; -- float s = 1e-20f; -- for (i = 0; i < hl; i++) -+ if (out_count >= 2) -+ { -+ calc_stereo_sample_sse (hl, c1, c2, p1, p2, out_data); -+ } -+ else -+ { -+ float tmp[4]; -+ calc_stereo_sample_sse (hl, c1, c2, p1, p2, tmp); -+ out_data[0] = tmp[0]; -+ out_data[1] = tmp[1]; -+ } -+ out_data += 2; -+ } -+ else -+#endif -+ { -+ for (c = 0; c < _nchan; c++) - { -- q2 -= _nchan; -- s += *q1 * c1 [i] + *q2 * c2 [i]; -- q1 += _nchan; -+ float *q1 = p1 + c; -+ float *q2 = p2 + c; -+ float s = 1e-20f; -+ for (i = 0; i < hl; i++) -+ { -+ q2 -= _nchan; -+ s += *q1 * c1 [i] + *q2 * c2 [i]; -+ q1 += _nchan; -+ } -+ *out_data++ = s - 1e-20f; - } -- *out_data++ = s - 1e-20f; - } - } - else -diff -ur orig/zita-resampler-1.3.0/source/vresampler.cc zita-resampler-1.3.0/source/vresampler.cc ---- orig/zita-resampler-1.3.0/source/vresampler.cc 2012-10-26 22:58:55.000000000 +0200 -+++ zita-resampler-1.3.0/source/vresampler.cc 2016-09-05 00:33:53.907511211 +0200 -@@ -25,6 +25,152 @@ - #include - - -+#ifdef __SSE2__ -+ -+#include -+ -+static inline float calc_mono_sample_sse (int hl, -+ float b, -+ const float *p1, -+ const float *p2, -+ const float *q1, -+ const float *q2) -+{ -+ int i; -+ __m128 denorm, bs, s, c1, c2, w1, w2, shuf; -+ -+ denorm = _mm_set1_ps (1e-25f); -+ bs = _mm_set1_ps (b); -+ s = denorm; -+ for (i = 0; i < hl; i += 4) -+ { -+ p2 -= 4; -+ -+ // _c1 [i] = q1 [i] + b * (q1 [i + hl] - q1 [i]); -+ w1 = _mm_loadu_ps (&q1 [i]); -+ w2 = _mm_loadu_ps (&q1 [i + hl]); -+ c1 = _mm_add_ps (w1, _mm_mul_ps(bs, _mm_sub_ps (w2, w1))); -+ -+ // _c2 [i] = q2 [i] + b * (q2 [i - hl] - q2 [i]); -+ w1 = _mm_loadu_ps (&q2 [i]); -+ w2 = _mm_loadu_ps (&q2 [i - hl]); -+ c2 = _mm_add_ps (w1, _mm_mul_ps(bs, _mm_sub_ps (w2, w1))); -+ -+ // s += *p1 * _c1 [i]; -+ s = _mm_add_ps (s, _mm_mul_ps (_mm_loadu_ps (p1), c1)); -+ -+ // s += *p2 * _c2 [i]; -+ s = _mm_add_ps (s, _mm_mul_ps (_mm_loadu_ps (p2), _mm_shuffle_ps (c2, c2, _MM_SHUFFLE (0, 1, 2, 3)))); -+ -+ p1 += 4; -+ } -+ s = _mm_sub_ps (s, denorm); -+ -+ // Add all the elements of s together into one. Adapted from -+ // http://stackoverflow.com/questions/6996764/fastest-way-to-do-horizontal-float-vector-sum-on-x86 -+ shuf = _mm_shuffle_ps (s, s, _MM_SHUFFLE (2, 3, 0, 1)); -+ s = _mm_add_ps (s, shuf); -+ s = _mm_add_ss (s, _mm_movehl_ps (shuf, s)); -+ return _mm_cvtss_f32 (s); -+} -+ -+// Note: This writes four floats instead of two (the last two are garbage). -+// The caller will need to make sure there is room for all four. -+static inline void calc_stereo_sample_sse (int hl, -+ float b, -+ const float *p1, -+ const float *p2, -+ const float *q1, -+ const float *q2, -+ float *out_data) -+{ -+ int i; -+ __m128 denorm, bs, s, c1, c2, w1, w2; -+ -+ denorm = _mm_set1_ps (1e-25f); -+ bs = _mm_set1_ps (b); -+ s = denorm; -+ for (i = 0; i < hl; i += 4) -+ { -+ p2 -= 8; -+ -+ // _c1 [i] = q1 [i] + b * (q1 [i + hl] - q1 [i]); -+ w1 = _mm_loadu_ps (&q1 [i]); -+ w2 = _mm_loadu_ps (&q1 [i + hl]); -+ c1 = _mm_add_ps (w1, _mm_mul_ps(bs, _mm_sub_ps (w2, w1))); -+ -+ // _c2 [i] = q2 [i] + b * (q2 [i - hl] - q2 [i]); -+ w1 = _mm_loadu_ps (&q2 [i]); -+ w2 = _mm_loadu_ps (&q2 [i - hl]); -+ c2 = _mm_add_ps (w1, _mm_mul_ps(bs, _mm_sub_ps (w2, w1))); -+ -+ // s += *p1 * _c1 [i]; -+ s = _mm_add_ps (s, _mm_mul_ps (_mm_loadu_ps (p1), _mm_unpacklo_ps (c1, c1))); -+ s = _mm_add_ps (s, _mm_mul_ps (_mm_loadu_ps (p1 + 4), _mm_unpackhi_ps (c1, c1))); -+ -+ // s += *p2 * _c2 [i]; -+ s = _mm_add_ps (s, _mm_mul_ps (_mm_loadu_ps (p2 + 4), _mm_shuffle_ps (c2, c2, _MM_SHUFFLE (0, 0, 1, 1)))); -+ s = _mm_add_ps (s, _mm_mul_ps (_mm_loadu_ps (p2), _mm_shuffle_ps (c2, c2, _MM_SHUFFLE (2, 2, 3, 3)))); -+ -+ p1 += 8; -+ } -+ s = _mm_sub_ps (s, denorm); -+ s = _mm_add_ps (s, _mm_shuffle_ps (s, s, _MM_SHUFFLE (1, 0, 3, 2))); -+ -+ _mm_storeu_ps (out_data, s); -+} -+ -+static inline void calc_quad_sample_sse (int hl, -+ int nchan, -+ float b, -+ const float *p1, -+ const float *p2, -+ const float *q1, -+ const float *q2, -+ float *out_data) -+{ -+ int i; -+ __m128 denorm, bs, s, c1, c2, w1, w2; -+ -+ denorm = _mm_set1_ps (1e-25f); -+ bs = _mm_set1_ps (b); -+ s = denorm; -+ for (i = 0; i < hl; i += 4) -+ { -+ p2 -= 4 * nchan; -+ -+ // _c1 [i] = q1 [i] + b * (q1 [i + hl] - q1 [i]); -+ w1 = _mm_loadu_ps (&q1 [i]); -+ w2 = _mm_loadu_ps (&q1 [i + hl]); -+ c1 = _mm_add_ps (w1, _mm_mul_ps(bs, _mm_sub_ps (w2, w1))); -+ -+ // _c2 [i] = q2 [i] + b * (q2 [i - hl] - q2 [i]); -+ w1 = _mm_loadu_ps (&q2 [i]); -+ w2 = _mm_loadu_ps (&q2 [i - hl]); -+ c2 = _mm_add_ps (w1, _mm_mul_ps(bs, _mm_sub_ps (w2, w1))); -+ -+ // s += *p1 * _c1 [i]; -+ s = _mm_add_ps (s, _mm_mul_ps (_mm_loadu_ps (p1), _mm_shuffle_ps (c1, c1, _MM_SHUFFLE (0, 0, 0, 0)))); -+ s = _mm_add_ps (s, _mm_mul_ps (_mm_loadu_ps (p1 + nchan), _mm_shuffle_ps (c1, c1, _MM_SHUFFLE (1, 1, 1, 1)))); -+ s = _mm_add_ps (s, _mm_mul_ps (_mm_loadu_ps (p1 + 2 * nchan), _mm_shuffle_ps (c1, c1, _MM_SHUFFLE (2, 2, 2, 2)))); -+ s = _mm_add_ps (s, _mm_mul_ps (_mm_loadu_ps (p1 + 3 * nchan), _mm_shuffle_ps (c1, c1, _MM_SHUFFLE (3, 3, 3, 3)))); -+ -+ // s += *p2 * _c2 [i]; -+ s = _mm_add_ps (s, _mm_mul_ps (_mm_loadu_ps (p2 + 3 * nchan), _mm_shuffle_ps (c2, c2, _MM_SHUFFLE (0, 0, 0, 0)))); -+ s = _mm_add_ps (s, _mm_mul_ps (_mm_loadu_ps (p2 + 2 * nchan), _mm_shuffle_ps (c2, c2, _MM_SHUFFLE (1, 1, 1, 1)))); -+ s = _mm_add_ps (s, _mm_mul_ps (_mm_loadu_ps (p2 + nchan), _mm_shuffle_ps (c2, c2, _MM_SHUFFLE (2, 2, 2, 2)))); -+ s = _mm_add_ps (s, _mm_mul_ps (_mm_loadu_ps (p2), _mm_shuffle_ps (c2, c2, _MM_SHUFFLE (3, 3, 3, 3)))); -+ -+ p1 += 4 * nchan; -+ } -+ s = _mm_sub_ps (s, denorm); -+ -+ _mm_storeu_ps (out_data, s); -+} -+ -+#endif -+ -+ - VResampler::VResampler (void) : - _table (0), - _nchan (0), -@@ -163,7 +309,7 @@ - - int VResampler::process (void) - { -- unsigned int k, np, in, nr, n, c; -+ unsigned int j, k, np, in, nr, n, c; - int i, hl, nz; - double ph, dp, dd; - float a, b, *p1, *p2, *q1, *q2; -@@ -212,23 +358,55 @@ - a = 1.0f - b; - q1 = _table->_ctab + hl * k; - q2 = _table->_ctab + hl * (np - k); -- for (i = 0; i < hl; i++) -+#ifdef __SSE2__ -+ if ((hl % 4) == 0 && _nchan == 1) -+ { -+ *out_data++ = calc_mono_sample_sse (hl, b, p1, p2, q1, q2); -+ } -+ else if ((hl % 4) == 0 && _nchan == 2) - { -- _c1 [i] = a * q1 [i] + b * q1 [i + hl]; -- _c2 [i] = a * q2 [i] + b * q2 [i - hl]; -+ if (out_count >= 2) -+ { -+ calc_stereo_sample_sse (hl, b, p1, p2, q1, q2, out_data); -+ } -+ else -+ { -+ float tmp[4]; -+ calc_stereo_sample_sse (hl, b, p1, p2, q1, q2, tmp); -+ out_data[0] = tmp[0]; -+ out_data[1] = tmp[1]; -+ } -+ out_data += 2; -+ } -+ else if ((hl % 4) == 0 && (_nchan % 4) == 0) -+ { -+ for (j = 0; j < _nchan; j += 4) -+ { -+ calc_quad_sample_sse (hl, _nchan, b, p1 + j, p2 + j, q1, q2, out_data + j); -+ } -+ out_data += _nchan; - } -- for (c = 0; c < _nchan; c++) -+ else -+#endif - { -- q1 = p1 + c; -- q2 = p2 + c; -- a = 1e-25f; - for (i = 0; i < hl; i++) - { -- q2 -= _nchan; -- a += *q1 * _c1 [i] + *q2 * _c2 [i]; -- q1 += _nchan; -+ _c1 [i] = a * q1 [i] + b * q1 [i + hl]; -+ _c2 [i] = a * q2 [i] + b * q2 [i - hl]; -+ } -+ for (c = 0; c < _nchan; c++) -+ { -+ q1 = p1 + c; -+ q2 = p2 + c; -+ a = 1e-25f; -+ for (i = 0; i < hl; i++) -+ { -+ q2 -= _nchan; -+ a += *q1 * _c1 [i] + *q2 * _c2 [i]; -+ q1 += _nchan; -+ } -+ *out_data++ = a - 1e-25f; - } -- *out_data++ = a - 1e-25f; - } - } - else diff -Nru zita-resampler-1.6.2/debian/rules zita-resampler-1.8.0/debian/rules --- zita-resampler-1.6.2/debian/rules 2020-03-15 16:07:31.000000000 +0000 +++ zita-resampler-1.8.0/debian/rules 2021-01-02 21:14:01.000000000 +0000 @@ -1,13 +1,16 @@ #!/usr/bin/make -f -DEB_HOST_MULTIARCH ?= $(shell dpkg-architecture -qDEB_HOST_MULTIARCH) +include /usr/share/dpkg/architecture.mk export DEB_BUILD_MAINT_OPTIONS = hardening=+bindnow -export DEB_LDFLAGS_MAINT_APPEND = -Wl,--as-needed export PREFIX=/usr export LIBDIR=$(PREFIX)/lib/$(DEB_HOST_MULTIARCH) export DESTDIR=$(CURDIR)/debian/tmp +ifeq ($(DEB_HOST_ARCH),$(filter $(DEB_HOST_ARCH),amd64 x32)) + CPPFLAGS += -DENABLE_SSE2 +endif + %: dh $@ -Dsource -Dapps diff -Nru zita-resampler-1.6.2/docs/resampler.html zita-resampler-1.8.0/docs/resampler.html --- zita-resampler-1.6.2/docs/resampler.html 2018-08-24 18:41:47.000000000 +0000 +++ zita-resampler-1.8.0/docs/resampler.html 2020-12-29 11:12:36.000000000 +0000 @@ -28,6 +28,11 @@ high-quality sample rate conversion.

+From version 1.8.0, libzita-resampler can be compiled to use SSE2 instructions. +This will reduce CPU load by a factor of at least 2. An ARM NEON option will be +provided in some future release. +

+

The library operates on signals represented in single-precision floating point format. For multichannel operation both the input and output signals are assumed to be stored as interleaved samples. @@ -54,14 +59,14 @@

The Resampler class performs resampling at a fixed ratio F_out / F_in -which is required to be ≥ 1/16 and be reducible to the form b / a +which is required to be ≥ 1/64 and be reducible to the form b / a with a, b integer and b ≤ 1000. This includes all the 'standard' ratios, e.g. 96000 / 44100 = 320 / 147. These restrictions allow for a more efficient implementation.

The VResampler class provides an arbitrary ratio r in the range -1/16 ≤ r ≤ 64 and which can variable within a range of 0.95 to +1/64 ≤ r ≤ 64 and which can variable within a range of 0.95 to 16.0 w.r.t. the originally configured one. The lower limit here is necessary because this class still uses a fixed multiphase filter, with only the phase step being variable. This class was developed for converting between two nominally fixed @@ -225,16 +230,16 @@ again, or provide a new one, and re-initialise the input count and pointer. If at that time out_count is not zero, you can either leave the output parameters as they are for the next call to process (), or you could -empty the part of the output buffer that has been filled and re-use it from -the start, or provide a completely different one. +read the part of the output buffer that has been filled and then re-use it +from the start, or provide a completely different one.

The same applies to the input buffer when it is not empty on return of -process (): it can be left alone or be replaced. A number of input +process (): it can be left alone, re-used or replaced. A number of input samples is stored internally between process () calls as part of the -resampler state, but this never includes samples that have not yet been used. -So you can 'revise' the input data, starting from the frame pointed to by the -returned inp_data, up to the last moment. +resampler state, but this never includes samples that have not yet been used +to compute an output sample. So you can 'revise' the input data, starting from +the frame pointed to by the returned inp_data, up to the last moment.

All this means that both classes will interface easily with fixed input and @@ -272,9 +277,9 @@

  • inserting k / 2 zero-valued samples at the end will ensure that the last output sample produced will correspond to a position as close -as possible but not past the last real input sample,
  • +as possible but not past the last input sample,
  • inserting k - 1 zero valued samples will ensure that the output -includes the full filter response for the last real input sample.
  • +includes the full filter response for the last input sample.

@@ -301,9 +306,9 @@ input data.

-The 'resample' application supplied with the library sources provides -an example of how to use the Resampler class. For an example -using VResampler you can have a look at zita_a2j and zita_ja2. +The test programs supplied with the library sources provide some +examples of how to use both classes. For an another example using +VResampler you can have a look at zita_ajbridge or zita_njbridge.


@@ -371,7 +376,7 @@

fs_inp, fs_out: The input and output sample rates. The ratio fs_out -/ fs_inp must be ≥ 1/16 and reducible to the form b / a +/ fs_inp must be ≥ 1/64 and reducible to the form b / a with a, b integer and b ≤ 1000.

nchan: Number of channels, must not be zero. @@ -404,7 +409,7 @@ Parameters:

-ratio: The resampling ratio wich must be between 1/16 and 64. +ratio: The resampling ratio wich must be between 1/64 and 64.

nchan: Number of channels, must not be zero.

diff -Nru zita-resampler-1.6.2/README zita-resampler-1.8.0/README --- zita-resampler-1.6.2/README 2018-08-24 18:41:47.000000000 +0000 +++ zita-resampler-1.8.0/README 2020-12-29 14:25:11.000000000 +0000 @@ -3,6 +3,14 @@ see the 'docs' directory. +Release 1.8.0 (30/12/2020) +--------------------------- + +* Added SSE2 support for Resampler and VResampler. + This is enabled by default in the Makefile. +* Cleanup and some minor bug fixes. + + Release 1.6.2 (25/08/2018) --------------------------- diff -Nru zita-resampler-1.6.2/source/Makefile zita-resampler-1.8.0/source/Makefile --- zita-resampler-1.6.2/source/Makefile 2018-08-24 18:41:47.000000000 +0000 +++ zita-resampler-1.8.0/source/Makefile 2020-12-28 20:09:24.000000000 +0000 @@ -1,6 +1,6 @@ # ---------------------------------------------------------------------------- # -# Copyright (C) 2006-2018 Fons Adriaensen +# Copyright (C) 2006-2020 Fons Adriaensen # # This program is free software; you can redistribute it and/or modify # it under the terms of the GNU General Public License as published by @@ -26,12 +26,12 @@ LIBDIR ?= $(PREFIX)/lib$(SUFFIX) MAJVERS = 1 -MINVERS = 6.2 +MINVERS = 8.0 VERSION = $(MAJVERS).$(MINVERS) DISTDIR = zita-resampler-$(VERSION) - CPPFLAGS += -I. -D_REENTRANT -D_POSIX_PTHREAD_SEMANTICS +CPPFLAGS += -DENABLE_SSE2 CXXFLAGS += -Wall -fPIC -O2 -ffast-math CXXFLAGS += -march=native LDFLAGS += diff -Nru zita-resampler-1.6.2/source/resampler.cc zita-resampler-1.8.0/source/resampler.cc --- zita-resampler-1.6.2/source/resampler.cc 2018-08-24 18:41:47.000000000 +0000 +++ zita-resampler-1.8.0/source/resampler.cc 2020-12-29 13:22:10.000000000 +0000 @@ -1,6 +1,6 @@ // ---------------------------------------------------------------------------- // -// Copyright (C) 2006-2012 Fons Adriaensen +// Copyright (C) 2006-2020 Fons Adriaensen // // This program is free software; you can redistribute it and/or modify // it under the terms of the GNU General Public License as published by @@ -22,7 +22,14 @@ #include #include #include -#include + +#undef ENABLE_VEC4 +#if defined(ENABLE_SSE2) +# define ENABLE_VEC4 +# include +#endif + +#include "zita-resampler/resampler.h" static unsigned int gcd (unsigned int a, unsigned int b) @@ -31,18 +38,18 @@ if (b == 0) return a; while (1) { - if (a > b) - { - a = a % b; - if (a == 0) return b; - if (a == 1) return 1; - } - else - { - b = b % a; - if (b == 0) return a; - if (b == 1) return 1; - } + if (a > b) + { + a = a % b; + if (a == 0) return b; + if (a == 1) return 1; + } + else + { + b = b % a; + if (b == 0) return a; + if (b == 1) return 1; + } } return 1; } @@ -79,41 +86,54 @@ unsigned int hlen, double frel) { - unsigned int g, h, k, n, s; + unsigned int np, dp, mi, hl, n; double r; - float *B = 0; Resampler_table *T = 0; - k = s = 0; - if (fs_inp && fs_out && nchan) + if (!fs_inp || !fs_out || !nchan) { - r = (double) fs_out / (double) fs_inp; - g = gcd (fs_out, fs_inp); - n = fs_out / g; - s = fs_inp / g; - if ((16 * r >= 1) && (n <= 1000)) - { - h = hlen; - k = 250; - if (r < 1) - { - frel *= r; - h = (unsigned int)(ceil (h / r)); - k = (unsigned int)(ceil (k / r)); - } - T = Resampler_table::create (frel, h, n); - B = new float [nchan * (2 * h - 1 + k)]; - } + clear (); + return 1; } + + r = (double) fs_out / (double) fs_inp; + n = gcd (fs_out, fs_inp); + np = fs_out / n; + dp = fs_inp / n; + if ((64 * r < 1.0) || (np > 1000)) + { + clear (); + return 1; + } + + hl = hlen; + mi = 32; + if (r < 1.0) + { + frel *= r; + hl = (unsigned int)(ceil (hl / r)); + mi = (unsigned int)(ceil (mi / r)); + } +#ifdef ENABLE_VEC4 + hl = (hl + 3) & ~3; +#endif + T = Resampler_table::create (frel, hl, np); + clear (); if (T) { - _table = T; - _buff = B; - _nchan = nchan; - _inmax = k; - _pstep = s; - return reset (); + _table = T; + n = nchan * (2 * hl + mi); +#ifdef ENABLE_VEC4 + posix_memalign ((void **)(&_buff), 16, n * sizeof (float)); + memset (_buff, 0, n * sizeof (float)); +#else + _buff = new float [n]; +#endif + _nchan = nchan; + _inmax = mi; + _pstep = dp; + return reset (); } else return 1; } @@ -122,7 +142,11 @@ void Resampler::clear (void) { Resampler_table::destroy (_table); +#ifdef ENABLE_VEC4 + free (_buff); +#else delete[] _buff; +#endif _buff = 0; _table = 0; _nchan = 0; @@ -161,7 +185,7 @@ if (_table) { _nread = 2 * _table->_hl; - return 0; + return 0; } return 1; } @@ -169,89 +193,116 @@ int Resampler::process (void) { - unsigned int hl, ph, np, dp, in, nr, nz, i, n, c; - float *p1, *p2; + unsigned int hl, np, ph, dp, in, nr, nz, di, i, j, n; + float *c1, *c2, *p1, *p2, *q1, *q2; if (!_table) return 1; - hl = _table->_hl; np = _table->_np; dp = _pstep; in = _index; nr = _nread; - ph = _phase; nz = _nzero; - n = (2 * hl - nr) * _nchan; - p1 = _buff + in * _nchan; - p2 = p1 + n; + ph = _phase; + + p1 = _buff + in; + p2 = p1 + 2 * hl - nr; + di = 2 * hl + _inmax; while (out_count) { - if (nr) - { - if (inp_count == 0) break; - if (inp_data) - { - for (c = 0; c < _nchan; c++) p2 [c] = inp_data [c]; - inp_data += _nchan; - nz = 0; - } - else - { - for (c = 0; c < _nchan; c++) p2 [c] = 0; - if (nz < 2 * hl) nz++; - } - nr--; - p2 += _nchan; - inp_count--; - } - else - { - if (out_data) - { - if (nz < 2 * hl) - { - float *c1 = _table->_ctab + hl * ph; - float *c2 = _table->_ctab + hl * (np - ph); - for (c = 0; c < _nchan; c++) - { - float *q1 = p1 + c; - float *q2 = p2 + c; - float s = 1e-20f; - for (i = 0; i < hl; i++) - { - q2 -= _nchan; - s += *q1 * c1 [i] + *q2 * c2 [i]; - q1 += _nchan; - } - *out_data++ = s - 1e-20f; - } - } - else + while (nr && inp_count) + { + if (inp_data) + { + for (j = 0; j < _nchan; j++) p2 [j * di] = inp_data [j]; + inp_data += _nchan; + nz = 0; + } + else + { + for (j = 0; j < _nchan; j++) p2 [j * di] = 0; + if (nz < 2 * hl) nz++; + } + p2++; + nr--; + inp_count--; + } + if (nr) break; + + if (out_data) + { + if (nz < 2 * hl) + { + c1 = _table->_ctab + hl * ph; + c2 = _table->_ctab + hl * (np - ph); + +#if defined(ENABLE_SSE2) + __m128 C1, C2, Q1, Q2, S; + for (j = 0; j < _nchan; j++) + { + q1 = p1 + j * di; + q2 = p2 + j * di; + S = _mm_setzero_ps (); + for (i = 0; i < hl; i += 4) + { + C1 = _mm_load_ps (c1 + i); + Q1 = _mm_loadu_ps (q1); + q2 -= 4; + S = _mm_add_ps (S, _mm_mul_ps (C1, Q1)); + C2 = _mm_loadr_ps (c2 + i); + Q2 = _mm_loadu_ps (q2); + q1 += 4; + S = _mm_add_ps (S, _mm_mul_ps (C2, Q2)); + } + *out_data++ = S [0] + S [1] + S [2] + S [3]; + } +#else + float s; + for (j = 0; j < _nchan; j++) + { + q1 = p1 + j * di; + q2 = p2 + j * di; + s = 1e-20f; + for (i = 0; i < hl; i++) + { + q2--; + s += *q1 * c1 [i] + *q2 * c2 [i]; + q1++; + } + *out_data++ = s - 1e-20f; + } +#endif + } + else + { + for (j = 0; j < _nchan; j++) *out_data++ = 0; + } + } + out_count--; + + ph += dp; + if (ph >= np) + { + nr = ph / np; + ph -= nr * np; + in += nr; + p1 += nr; + if (in >= _inmax) + { + n = 2 * hl - nr; + p2 = _buff; + for (j = 0; j < _nchan; j++) { - for (c = 0; c < _nchan; c++) *out_data++ = 0; + memmove (p2 + j * di, p1 + j * di, n * sizeof (float)); } - } - out_count--; - - ph += dp; - if (ph >= np) - { - nr = ph / np; - ph -= nr * np; - in += nr; - p1 += nr * _nchan;; - if (in >= _inmax) - { - n = (2 * hl - nr) * _nchan; - memcpy (_buff, p1, n * sizeof (float)); - in = 0; - p1 = _buff; - p2 = p1 + n; - } - } - } + in = 0; + p1 = _buff; + p2 = p1 + n; + } + } } + _index = in; _nread = nr; _phase = ph; diff -Nru zita-resampler-1.6.2/source/resampler-table.cc zita-resampler-1.8.0/source/resampler-table.cc --- zita-resampler-1.6.2/source/resampler-table.cc 2018-08-24 18:41:47.000000000 +0000 +++ zita-resampler-1.8.0/source/resampler-table.cc 2020-12-28 19:54:44.000000000 +0000 @@ -1,6 +1,6 @@ // ---------------------------------------------------------------------------- // -// Copyright (C) 2006-2012 Fons Adriaensen +// Copyright (C) 2006-2020 Fons Adriaensen // // This program is free software; you can redistribute it and/or modify // it under the terms of the GNU General Public License as published by @@ -25,6 +25,12 @@ #include +#undef ENABLE_VEC4 +#if defined(ENABLE_SSE2) +# define ENABLE_VEC4 +#endif + + int zita_resampler_major_version (void) { return ZITA_RESAMPLER_MAJOR_VERSION; @@ -55,7 +61,6 @@ } - Resampler_table *Resampler_table::_list = 0; Resampler_mutex Resampler_table::_mutex; @@ -67,11 +72,16 @@ _hl (hl), _np (np) { - unsigned int i, j; + unsigned int i, j, n; double t; float *p; - _ctab = new float [hl * (np + 1)]; + n = hl * (np + 1); +#ifdef ENABLE_VEC4 + posix_memalign ((void **) &_ctab, 16, n * sizeof (float)); +#else + _ctab = new float [n]; +#endif p = _ctab; for (j = 0; j <= np; j++) { @@ -88,7 +98,11 @@ Resampler_table::~Resampler_table (void) { +#ifdef ENABLE_VEC4 + free (_ctab); +#else delete[] _ctab; +#endif } diff -Nru zita-resampler-1.6.2/source/vresampler.cc zita-resampler-1.8.0/source/vresampler.cc --- zita-resampler-1.6.2/source/vresampler.cc 2018-08-24 18:41:47.000000000 +0000 +++ zita-resampler-1.8.0/source/vresampler.cc 2020-12-29 13:24:52.000000000 +0000 @@ -1,6 +1,6 @@ // ---------------------------------------------------------------------------- // -// Copyright (C) 2006-2013 Fons Adriaensen +// Copyright (C) 2006-2020 Fons Adriaensen // // This program is free software; you can redistribute it and/or modify // it under the terms of the GNU General Public License as published by @@ -22,7 +22,14 @@ #include #include #include -#include + +#undef ENABLE_VEC4 +#if defined(ENABLE_SSE2) +# define ENABLE_VEC4 +# include +#endif + +#include "zita-resampler/vresampler.h" VResampler::VResampler (void) : @@ -56,34 +63,48 @@ unsigned int hlen, double frel) { - unsigned int h, k, n; - double s; + unsigned int hl, mi, n; + double dp; Resampler_table *T = 0; - if (! nchan) return 1; - n = NPHASE; - s = n / ratio; - h = hlen; - k = 250; - if (ratio < 1) + if (!nchan || (64 * ratio < 1.0) || (ratio > 64)) + { + clear (); + return 1; + } + + dp = NPHASE / ratio; + hl = hlen; + mi = 32; + if (ratio < 1.0) { frel *= ratio; - h = (unsigned int)(ceil (h / ratio)); - k = (unsigned int)(ceil (k / ratio)); + hl = (unsigned int)(ceil (hl / ratio)); + mi = (unsigned int)(ceil (mi / ratio)); } - T = Resampler_table::create (frel, h, n); +#ifdef ENABLE_VEC4 + hl = (hl + 3) & ~3; +#endif + T = Resampler_table::create (frel, hl, NPHASE); clear (); if (T) { _table = T; - _buff = new float [nchan * (2 * h - 1 + k)]; - _c1 = new float [2 * h]; - _c2 = new float [2 * h]; + n = nchan * (2 * hl + mi); +#ifdef ENABLE_VEC4 + posix_memalign ((void **)(&_buff), 16, n * sizeof (float)); + posix_memalign ((void **)(&_c1), 16, hl * sizeof (float)); + posix_memalign ((void **)(&_c2), 16, hl * sizeof (float)); +#else + _buff = new float [n]; + _c1 = new float [hl]; + _c2 = new float [hl]; +#endif _nchan = nchan; - _inmax = k; - _ratio = ratio; - _pstep = s; - _qstep = s; + _ratio = ratio; + _inmax = mi; + _pstep = dp; + _qstep = dp; _wstep = 1; return reset (); } @@ -94,9 +115,15 @@ void VResampler::clear (void) { Resampler_table::destroy (_table); +#ifdef ENABLE_VEC4 + free (_buff); + free (_c1); + free (_c2); +#else delete[] _buff; delete[] _c1; delete[] _c2; +#endif _buff = 0; _c1 = 0; _c2 = 0; @@ -156,17 +183,22 @@ inp_data = 0; out_data = 0; _index = 0; - _phase = 0; - _nread = 2 * _table->_hl; + _nread = 0; _nzero = 0; - return 0; + _phase = 0; + if (_table) + { + _nread = 2 * _table->_hl; + return 0; + } + return 1; } int VResampler::process (void) { - unsigned int k, np, in, nr, n, c; - int i, hl, nz; + int nr, np, hl, nz, di, i, n; + unsigned int in, j; double ph, dp, dd; float a, b, *p1, *p2, *q1, *q2; @@ -179,88 +211,131 @@ nz = _nzero; ph = _phase; dp = _pstep; - n = (2 * hl - nr) * _nchan; - p1 = _buff + in * _nchan; - p2 = p1 + n; + + p1 = _buff + in; + p2 = p1 + 2 * hl - nr; + di = 2 * hl + _inmax; while (out_count) { - if (nr) - { - if (inp_count == 0) break; - if (inp_data) - { - for (c = 0; c < _nchan; c++) p2 [c] = inp_data [c]; - inp_data += _nchan; - nz = 0; - } - else - { - for (c = 0; c < _nchan; c++) p2 [c] = 0; - if (nz < 2 * hl) nz++; - } - nr--; - p2 += _nchan; - inp_count--; - } - else + while (nr && inp_count) + { + if (inp_data) + { + for (j = 0; j < _nchan; j++) p2 [j * di] = inp_data [j]; + inp_data += _nchan; + nz = 0; + } + else + { + for (j = 0; j < _nchan; j++) p2 [j * di] = 0; + if (nz < 2 * hl) nz++; + } + p2++; + nr--; + inp_count--; + } + if (nr) break; + + if (out_data) { - if (out_data) + if (nz < 2 * hl) { - if (nz < 2 * hl) + n = (unsigned int) ph; + b = (float)(ph - n); + a = 1.0f - b; + q1 = _table->_ctab + hl * n; + q2 = _table->_ctab + hl * (np - n); + +#if defined(ENABLE_SSE2) + __m128 C1, C2, Q1, Q2, S; + C1 = _mm_load1_ps (&a); + C2 = _mm_load1_ps (&b); + for (i = 0; i < hl; i += 4) { - k = (unsigned int) ph; - b = (float)(ph - k); - a = 1.0f - b; - q1 = _table->_ctab + hl * k; - q2 = _table->_ctab + hl * (np - k); - for (i = 0; i < hl; i++) - { - _c1 [i] = a * q1 [i] + b * q1 [i + hl]; - _c2 [i] = a * q2 [i] + b * q2 [i - hl]; - } - for (c = 0; c < _nchan; c++) - { - q1 = p1 + c; - q2 = p2 + c; - a = 1e-25f; - for (i = 0; i < hl; i++) - { - q2 -= _nchan; - a += *q1 * _c1 [i] + *q2 * _c2 [i]; - q1 += _nchan; - } - *out_data++ = a - 1e-25f; - } + Q1 = _mm_load_ps (q1 + i); + Q2 = _mm_load_ps (q1 + i + hl); + S = _mm_add_ps (_mm_mul_ps (Q1, C1), _mm_mul_ps (Q2, C2)); + _mm_store_ps (_c1 + i, S); + Q1 = _mm_load_ps (q2 + i); + Q2 = _mm_load_ps (q2 + i - hl); + S = _mm_add_ps (_mm_mul_ps (Q1, C1), _mm_mul_ps (Q2, C2)); + _mm_store_ps (_c2 + i, S); } - else + for (j = 0; j < _nchan; j++) + { + q1 = p1 + j * di; + q2 = p2 + j * di; + S = _mm_setzero_ps (); + for (i = 0; i < hl; i += 4) + { + C1 = _mm_load_ps (_c1 + i); + Q1 = _mm_loadu_ps (q1); + q2 -= 4; + S = _mm_add_ps (S, _mm_mul_ps (C1, Q1)); + C2 = _mm_loadr_ps (_c2 + i); + Q2 = _mm_loadu_ps (q2); + q1 += 4; + S = _mm_add_ps (S, _mm_mul_ps (C2, Q2)); + } + *out_data++ = S [0] + S [1] + S [2] + S [3]; + } + +#else + float s; + for (i = 0; i < hl; i++) { - for (c = 0; c < _nchan; c++) *out_data++ = 0; + _c1 [i] = a * q1 [i] + b * q1 [i + hl]; + _c2 [i] = a * q2 [i] + b * q2 [i - hl]; } - } - out_count--; - - dd = _qstep - dp; - if (fabs (dd) < 1e-30) dp = _qstep; - else dp += _wstep * dd; - ph += dp; - if (ph >= np) - { - nr = (unsigned int) floor( ph / np); - ph -= nr * np;; - in += nr; - p1 += nr * _nchan;; - if (in >= _inmax) + for (j = 0; j < _nchan; j++) { - n = (2 * hl - nr) * _nchan; - memcpy (_buff, p1, n * sizeof (float)); - in = 0; - p1 = _buff; - p2 = p1 + n; + q1 = p1 + j * di; + q2 = p2 + j * di; + s = 1e-20f; + for (i = 0; i < hl; i++) + { + q2--; + s += *q1 * _c1 [i] + *q2 * _c2 [i]; + q1++; + } + *out_data++ = s - 1e-20f; } +#endif + } + else + { + for (j = 0; j < _nchan; j++) *out_data++ = 0; } } + out_count--; + + dd = _qstep - dp; + if (fabs (dd) < 1e-20) dp = _qstep; + else dp += _wstep * dd; + ph += dp; + if (ph >= np) + { + nr = (unsigned int) floor (ph / np); + ph -= nr * np;; + in += nr; + p1 += nr; + + if (in >= _inmax) + { + n = 2 * hl - nr; + p2 = _buff; + for (j = 0; j < _nchan; j++) + { + memmove (p2 + j * di, p1 + j * di, n * sizeof (float)); + } + in = 0; + p1 = _buff; + p2 = p1 + n; + } + } } + _index = in; _nread = nr; _phase = ph; diff -Nru zita-resampler-1.6.2/source/zita-resampler/resampler.h zita-resampler-1.8.0/source/zita-resampler/resampler.h --- zita-resampler-1.6.2/source/zita-resampler/resampler.h 2018-08-24 18:41:47.000000000 +0000 +++ zita-resampler-1.8.0/source/zita-resampler/resampler.h 2020-12-28 19:46:46.000000000 +0000 @@ -1,6 +1,6 @@ // ---------------------------------------------------------------------------- // -// Copyright (C) 2006-2012 Fons Adriaensen +// Copyright (C) 2006-2020 Fons Adriaensen // // This program is free software; you can redistribute it and/or modify // it under the terms of the GNU General Public License as published by @@ -22,7 +22,7 @@ #define __RESAMPLER_H -#include +#include "zita-resampler/resampler-table.h" class Resampler @@ -55,8 +55,8 @@ unsigned int out_count; float *inp_data; float *out_data; - void *inp_list; - void *out_list; + float **inp_list; + float **out_list; private: diff -Nru zita-resampler-1.6.2/source/zita-resampler/resampler-table.h zita-resampler-1.8.0/source/zita-resampler/resampler-table.h --- zita-resampler-1.6.2/source/zita-resampler/resampler-table.h 2018-08-24 18:41:47.000000000 +0000 +++ zita-resampler-1.8.0/source/zita-resampler/resampler-table.h 2020-12-28 14:27:20.000000000 +0000 @@ -1,6 +1,6 @@ // ---------------------------------------------------------------------------- // -// Copyright (C) 2006-2012 Fons Adriaensen +// Copyright (C) 2006-2020 Fons Adriaensen // // This program is free software; you can redistribute it and/or modify // it under the terms of the GNU General Public License as published by @@ -26,7 +26,7 @@ #define ZITA_RESAMPLER_MAJOR_VERSION 1 -#define ZITA_RESAMPLER_MINOR_VERSION 6 +#define ZITA_RESAMPLER_MINOR_VERSION 8 extern int zita_resampler_major_version (void); diff -Nru zita-resampler-1.6.2/source/zita-resampler/vresampler.h zita-resampler-1.8.0/source/zita-resampler/vresampler.h --- zita-resampler-1.6.2/source/zita-resampler/vresampler.h 2018-08-24 18:41:47.000000000 +0000 +++ zita-resampler-1.8.0/source/zita-resampler/vresampler.h 2020-12-29 09:58:06.000000000 +0000 @@ -1,6 +1,6 @@ // ---------------------------------------------------------------------------- // -// Copyright (C) 2006-2012 Fons Adriaensen +// Copyright (C) 2006-2020 Fons Adriaensen // // This program is free software; you can redistribute it and/or modify // it under the terms of the GNU General Public License as published by @@ -22,7 +22,7 @@ #define __VRESAMPLER_H -#include +#include "zita-resampler/resampler-table.h" class VResampler @@ -61,7 +61,7 @@ private: - enum { NPHASE = 256 }; + enum { NPHASE = 120 }; Resampler_table *_table; unsigned int _nchan; diff -Nru zita-resampler-1.6.2/test/jackproc.cc zita-resampler-1.8.0/test/jackproc.cc --- zita-resampler-1.6.2/test/jackproc.cc 1970-01-01 00:00:00.000000000 +0000 +++ zita-resampler-1.8.0/test/jackproc.cc 2020-12-29 10:08:29.000000000 +0000 @@ -0,0 +1,296 @@ +// ----------------------------------------------------------------------------- +// +// Copyright (C) 2020 Fons Adriaensen +// +// This program is free software; you can redistribute it and/or modify +// it under the terms of the GNU General Public License as published by +// the Free Software Foundation; either version 2 of the License, or +// (at your option) any later version. +// +// This program is distributed in the hope that it will be useful, +// but WITHOUT ANY WARRANTY; without even the implied warranty of +// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +// GNU General Public License for more details. +// +// You should have received a copy of the GNU General Public License +// along with this program. If not, see . +// +// ----------------------------------------------------------------------------- + + +// Zita-resampler demo program. +// +// Imagine you want a jack client that internally needs to use a +// fixed sample rate and period size regardless of how Jack is +// configured. This demo program contains all the buffering and +// resampling required to do this. The process in this case is +// just a copy. +// +// The signal flow is like this: +// +// jack -> queue -> resampler -> process -> resampler -> queue -> jack. +// +// The queues are required because in most cases the process period, +// taking the resampling ratio into account, will not be an integer +// fraction or multiple of jack's period. They will add some latency, +// but never more than the process period time plus the delay of the +// resamplers. + + +//#define USE_VRSAMPLER + + +#include +#include +#include +#include +#include +#include +#include +#include +#include "lfqueue.h" + + +#define MAXCHAN 16 + +static jack_client_t *jack_handle; +static jack_port_t *jack_capt [MAXCHAN]; +static jack_port_t *jack_play [MAXCHAN]; +static bool active = false; +static int nchan; +static uint32_t proc_rate; +static uint32_t proc_frag; +static uint32_t jack_rate; +static uint32_t jack_frag; +#ifdef USE_VRESAMPLER +static VResampler input_resampler; +static VResampler output_resampler; +#else +static Resampler input_resampler; +static Resampler output_resampler; +#endif +static Audioqueue *input_queue = 0; +static Audioqueue *output_queue = 0; +static float *input_buff = 0; +static float *output_buff = 0; + + + +int jack_process (jack_nframes_t nframes, void *arg) +{ + int i, j, k, n; + float *inp [MAXCHAN]; + float *out [MAXCHAN]; + float *p, *q; + + if (! active) return 0; + + // Get port buffers. + for (i = 0; i < nchan; i++) + { + inp [i] = (float *)(jack_port_get_buffer (jack_capt [i], nframes)); + out [i] = (float *)(jack_port_get_buffer (jack_play [i], nframes)); + } + + // Copy from Jack ports to input queue. + // The for loop takes care of wraparound in the queue, + // there will be at most two iterations. + for (n = 0; n < (int) nframes; n += k) + { + // Get the number of frames that can be written + // without wraparound. + k = input_queue->write_nowrap (); + if (k > (int) nframes - n) k = nframes - n; + // Copy and interleave channels. + for (i = 0; i < nchan; i++) + { + p = inp [i] + n; + q = input_queue->write_ptr (i); + for (j = 0; j < k; j++) q [nchan * j] = p [j]; + } + // Update queue state. + input_queue->write_commit (k); + } + // Check queue overflow. + assert (input_queue->write_avail () >= 0); + + while (output_queue->read_avail () < (int) nframes) + { + // Resample from Jack's sample rate to the process + // sample rate, reading from input_queue and writing + // exactly proc_frag frames to input_buff. + // The while loop takes care of wraparound. + input_resampler.out_data = input_buff; + input_resampler.out_count = proc_frag; + while (input_resampler.out_count) + { + input_resampler.inp_data = input_queue->read_ptr (); + input_resampler.inp_count = n = input_queue->read_nowrap (); + input_resampler.process (); + input_queue->read_commit (n - input_resampler.inp_count); + } + + // Now we have proc_frag frames in input_buff. + // Normally there would be some process using + // these, here we just copy to output_buff. + memcpy (output_buff, input_buff, nchan * proc_frag * sizeof (float)); + + // Resample from the process sample rate to Jack's + // sample rate, taking exactly proc_frag frames + // from output_buff, and writing to output_queue. + // The while loop takes care of wraparound in the + // queue. + output_resampler.inp_data = output_buff; + output_resampler.inp_count = proc_frag; + while (output_resampler.inp_count) + { + output_resampler.out_data = output_queue->write_ptr (); + output_resampler.out_count = n = output_queue->write_nowrap (); + output_resampler.process (); + output_queue->write_commit (n - output_resampler.out_count); + } + } + // Check queues. + assert (input_queue->read_avail () >= 0); + assert (output_queue->write_avail () >= 0); + + // Copy from output queue to Jack ports. + // The for loop takes care of wraparound in the queue, + // there will be at most two iterations. + for (n = 0; n < (int) nframes; n += k) + { + // Get the number of frames that can be read + // without wraparound. + k = output_queue->read_nowrap (); + if (k > (int) nframes - n) k = nframes - n; + // Copy and de-interleave channels. + for (i = 0; i < nchan; i++) + { + p = output_queue->read_ptr (i); + q = out [i] + n; + for (j = 0; j < k; j++) q [j] = p [nchan * j]; + } + // Update queue state. + output_queue->read_commit (k); + } + // Check queue underflow. + assert (output_queue->read_avail () >= 0); + + return 0; +} + + +static void sigint_handler (int) +{ + signal (SIGINT, SIG_IGN); + active = false; +} + + +int main (int ac, char *av []) +{ + int32_t i, n; + int64_t k; + char s [16]; + jack_status_t stat; + + if (ac < 4) + { + fprintf (stderr, "jackproc \n"); + return 1; + } + nchan = atoi (av [1]); + proc_rate = atoi (av [2]); + proc_frag = atoi (av [3]); + if (nchan < 1) return 1; + if (nchan > MAXCHAN) nchan = MAXCHAN; + + // Create and initialise the Jack client. + jack_handle = jack_client_open ("Jackproc", JackNoStartServer, &stat); + if (jack_handle == 0) + { + fprintf (stderr, "Can't connect to Jack, is the server running ?\n"); + return 1; + } + + jack_set_process_callback (jack_handle, jack_process, 0); + if (jack_activate (jack_handle)) + { + fprintf(stderr, "Can't activate Jack"); + return 1; + } + + for (i = 0; i < nchan; i++) + { + sprintf (s, "in_%d", i); + jack_capt [i] = jack_port_register (jack_handle, s, JACK_DEFAULT_AUDIO_TYPE, JackPortIsInput, 0); + sprintf (s, "out_%d", i); + jack_play [i] = jack_port_register (jack_handle, s, JACK_DEFAULT_AUDIO_TYPE, JackPortIsOutput, 0); + } + + jack_rate = jack_get_sample_rate (jack_handle); + jack_frag = jack_get_buffer_size (jack_handle); + + // Set the resampling ratios. +#ifdef USE_VRESAMPLER + if (input_resampler.setup ((double) proc_rate / jack_rate, nchan, 32)) +#else + if (input_resampler.setup (jack_rate, proc_rate, nchan, 32)) +#endif + { + fprintf (stderr, "Resampler can't handle the ratio %d/%d\n", + proc_rate, jack_rate); + goto cleanup; + } +#ifdef USE_VRESAMPLER + if (input_resampler.setup ((double) jack_rate / proc_rate, nchan, 32)) +#else + if (output_resampler.setup (proc_rate, jack_rate, nchan, 32)) +#endif + { + fprintf (stderr, "Resampler can't handle the ratio %d/%d\n", + jack_rate, proc_rate); + goto cleanup; + } + + // Initialise the resamplers for zero delay. + input_resampler.inp_count = input_resampler.inpsize () - 1; + input_resampler.inp_data = 0; + input_resampler.out_count = 999999; + input_resampler.out_data = 0; + input_resampler.process (); + output_resampler.inp_count = output_resampler.inpsize () - 1; + output_resampler.inp_data = 0; + output_resampler.out_count = 999999; + output_resampler.out_data = 0; + output_resampler.process (); + + input_buff = new float [nchan * proc_frag]; + output_buff = new float [nchan * proc_frag]; + + // Compute the number of extra samples we need to buffer. + k = jack_rate * proc_frag; + n = k / proc_rate; + + // Create the queues, and prefill the input queue. + input_queue = new Audioqueue (jack_frag + n, nchan, true); + output_queue = new Audioqueue (jack_frag + n, nchan, true); + input_queue->write_commit (n); + + signal (SIGINT, sigint_handler); + + // Enable the process callback and wait. + for (active = true; active; usleep (250000)); + +cleanup: + // Cleanup. + jack_deactivate (jack_handle); + jack_client_close (jack_handle); + delete[] input_buff; + delete[] output_buff; + delete input_queue; + delete output_queue; + + return 0; +} + diff -Nru zita-resampler-1.6.2/test/lfqueue.cc zita-resampler-1.8.0/test/lfqueue.cc --- zita-resampler-1.6.2/test/lfqueue.cc 1970-01-01 00:00:00.000000000 +0000 +++ zita-resampler-1.8.0/test/lfqueue.cc 2020-12-27 15:41:51.000000000 +0000 @@ -0,0 +1,60 @@ +// ---------------------------------------------------------------------------- +// +// Copyright (C) 2012-2016 Fons Adriaensen +// +// This program is free software; you can redistribute it and/or modify +// it under the terms of the GNU General Public License as published by +// the Free Software Foundation; either version 3 of the License, or +// (at your option) any later version. +// +// This program is distributed in the hope that it will be useful, +// but WITHOUT ANY WARRANTY; without even the implied warranty of +// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +// GNU General Public License for more details. +// +// You should have received a copy of the GNU General Public License +// along with this program. If not, see . +// +// ---------------------------------------------------------------------------- + + +#include "lfqueue.h" + + +Queuebase::Queuebase (int nelm) : + _nelm (0), + _mask (0), + _kwrite (0), + _kread (0) +{ + int32_t k; + + if (nelm > 0x01000000) return; + for (k = 1; k < nelm; k <<= 1); + _nelm = k; + _mask = k - 1; +} + + +Queuebase::~Queuebase (void) +{ +} + + + +Audioqueue::Audioqueue (int32_t minsize, int nchannel, bool interleaved) : + Queuebase (minsize), + _nchan (nchannel), + _inter (interleaved), + _data (0) +{ + _data = new float [_nelm * _nchan]; +} + + +Audioqueue::~Audioqueue (void) +{ + delete[] _data; +} + + diff -Nru zita-resampler-1.6.2/test/lfqueue.h zita-resampler-1.8.0/test/lfqueue.h --- zita-resampler-1.6.2/test/lfqueue.h 1970-01-01 00:00:00.000000000 +0000 +++ zita-resampler-1.8.0/test/lfqueue.h 2020-12-27 22:40:30.000000000 +0000 @@ -0,0 +1,178 @@ +// ---------------------------------------------------------------------------- +// +// Copyright (C) 2010-2020 Fons Adriaensen +// +// This program is free software; you can redistribute it and/or modify +// it under the terms of the GNU General Public License as published by +// the Free Software Foundation; either version 3 of the License, or +// (at your option) any later version. +// +// This program is distributed in the hope that it will be useful, +// but WITHOUT ANY WARRANTY; without even the implied warranty of +// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +// GNU General Public License for more details. +// +// You should have received a copy of the GNU General Public License +// along with this program. If not, see . +// +// ---------------------------------------------------------------------------- + + +#ifndef __LFQUEUE_H +#define __LFQUEUE_H + + +#include +#include + + +// Base class for lock-free queues. +// +// The logic implemented here is somewhat different from +// how e.g. Jack's lock-free queues work. It does nothing +// to stop the user overflowing or underflowing the queue. +// Avoiding these conditions is easy enough, and allowing +// them can be useful in some applications. +// The logic keeps correct read/write counts as long as the +// over/underflow is less than 2^31 elements. +// Even if data is corrupted, this allows the client process +// to recover while maintaining a defined latency. +// To make this work, the logical queue size (the number of +// elements, regardless of element size in bytes) must be a +// power of two. The constructor will if necessary round up +// the size. + + +class Queuebase +{ +public: + + + Queuebase (int32_t nelm); + ~Queuebase (void); + + // Return queue size. + int32_t nelm (void) const { return _nelm; } + + // Reset queue state to empty. + void reset (void) + { + _kwrite = 0; + _kread = 0; + } + + // Return number of elements that can be written + // without overflow. A negative value indicates + // that the queue is already in overflow. + int32_t write_avail (void) const + { + return _nelm - _kwrite + _kread; + } + + // Return number of elements that can be written + // without wraparound. Note: this only reflects + // the postion of the write pointer relative to + // the end of the buffer, so it can be more than + // write_avail(). + int32_t write_nowrap (void) const + { + return _nelm - (_kwrite & _mask); + } + + // Adjust queue state, reflecting nelm elements + // have been written. + void write_commit (int32_t nelm) + { + _kwrite += nelm; + } + + // Return number of elements that can be read + // without underflow. A negative value indicates + // that the queue is already in underflow. + int32_t read_avail (void) const + { + return _kwrite - _kread; + } + + // Return number of elements that can be read + // without wraparound. Note: this only reflects + // the position of the read pointer relative to + // the end of the buffer, so it can be more than + // read_avail(). + int32_t read_nowrap (void) const + { + return _nelm - (_kread & _mask); + } + + // Adjust queue state, reflecting nelm elements + // have been read. + void read_commit (int32_t nf) + { + _kread += nf; + } + +protected: + + int32_t _nelm; + int32_t _mask; + int32_t _kwrite; + int32_t _kread; +}; + + +// Multichannel lock-free audio sample queue. +// Channels can be separate or interleaved. +// The way this queue works is different from +// Jack's lock-free queues in two ways: +// +// 1. See Queuebase above. +// +// 2. For reading or writing, only a pointer is +// provided, so the user has to do the work. +// In many cases this can avoid to need for +// intermediate copies. It also means that +// wraparound is exposed to the user, but +// handling this is quite easy. +// +// Note that all methods inherited from Queuebase +// return a number of frames, not samples. + + + + + +class Audioqueue : public Queuebase +{ +public: + + Audioqueue (int32_t minsize, int nchannel, bool interleaved); + ~Audioqueue (void); + + int32_t nchan (void) const { return _nchan; } + + float *write_ptr (int ch = 0) const + { + uint32_t k = _kwrite & _mask; + if (_inter) k = k * _nchan + ch; + else k += ch * _nelm; + return _data + k; + } + + float *read_ptr (int ch = 0) const + { + uint32_t k = _kread & _mask; + if (_inter) k = k * _nchan + ch; + else k += ch * _nelm; + return _data + k; + } + +private: + + int32_t _nchan; + bool _inter; + float *_data; +}; + + +#endif + diff -Nru zita-resampler-1.6.2/test/Makefile zita-resampler-1.8.0/test/Makefile --- zita-resampler-1.6.2/test/Makefile 1970-01-01 00:00:00.000000000 +0000 +++ zita-resampler-1.8.0/test/Makefile 2020-12-29 14:20:47.000000000 +0000 @@ -0,0 +1,49 @@ +# ---------------------------------------------------------------------------- +# +# Copyright (C) 2020 Fons Adriaensen +# +# This program is free software; you can redistribute it and/or modify +# it under the terms of the GNU General Public License as published by +# the Free Software Foundation; either version 3 of the License, or +# (at your option) any later version. +# +# This program is distributed in the hope that it will be useful, +# but WITHOUT ANY WARRANTY; without even the implied warranty of +# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +# GNU General Public License for more details. +# +# You should have received a copy of the GNU General Public License +# along with this program. If not, see . +# +# ---------------------------------------------------------------------------- + + +# Compilation flags +# +CXXFLAGS += -O2 -Wall + + +# Targets +# +all: speedtest vspeedtest upstest jackproc + + +speedtest: speedtest.o + g++ -o $@ speedtest.o -lzita-resampler + + +vspeedtest: vspeedtest.o + g++ -o $@ vspeedtest.o -lzita-resampler + + +upstest: upstest.o + g++ -o $@ upstest.o -lzita-resampler + + +jackproc: jackproc.o lfqueue.o + g++ -o $@ jackproc.o lfqueue.o -lzita-resampler -ljack + + +clean: + /bin/rm -f *.o *.u *.so *~ speedtest vspeedtest upstest jackproc zz* + diff -Nru zita-resampler-1.6.2/test/speedtest.cc zita-resampler-1.8.0/test/speedtest.cc --- zita-resampler-1.6.2/test/speedtest.cc 1970-01-01 00:00:00.000000000 +0000 +++ zita-resampler-1.8.0/test/speedtest.cc 2020-12-28 16:00:54.000000000 +0000 @@ -0,0 +1,77 @@ +// ---------------------------------------------------------------------------- +// +// Copyright (C) 2020 Fons Adriaensen +// +// This program is free software; you can redistribute it and/or modify +// it under the terms of the GNU General Public License as published by +// the Free Software Foundation; either version 3 of the License, or +// (at your option) any later version. +// +// This program is distributed in the hope that it will be useful, +// but WITHOUT ANY WARRANTY; without even the implied warranty of +// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +// GNU General Public License for more details. +// +// You should have received a copy of the GNU General Public License +// along with this program. If not, see . +// +// ---------------------------------------------------------------------------- + + +#include +#include +#include +#include + + +#define LINP 10000 +#define LOUT 11000 +#define ITER 1000 + + +int main (int ac, char *av[]) +{ + int c, h, i; + Resampler R; + float *inp; + float *out; + timespec t0, t1; + int64_t ds, dn; + double dt; + + if (ac < 3) + { + fprintf (stderr, "speedtest \n"); + return 1; + } + c = atoi (av [1]); + h = atoi (av [2]); + + inp = new float [c * LINP]; + out = new float [c * LOUT]; + + for (i = 0; i < c * LINP; i++) inp [i] = i * 1e-4f; + + R.setup (441, 480, c, h); + + clock_gettime (CLOCK_REALTIME, &t0); + for (i = 0; i < ITER; i++) + { + R.inp_count = LINP; + R.inp_data = inp; + R.out_count = LOUT; + R.out_data = out; + R.process (); + } + clock_gettime (CLOCK_REALTIME, &t1); + ds = t1.tv_sec - t0.tv_sec; + dn = t1.tv_nsec - t0.tv_nsec; + dt = ds + 1e-9 * dn; + printf ("44100 -> 48000, chan = %2d, hlen = %2d %8.3le input frames per second\n", + c, h, LINP * ITER / dt); + + delete[] inp; + delete[] out; + return 0; +} + diff -Nru zita-resampler-1.6.2/test/upstest.cc zita-resampler-1.8.0/test/upstest.cc --- zita-resampler-1.6.2/test/upstest.cc 1970-01-01 00:00:00.000000000 +0000 +++ zita-resampler-1.8.0/test/upstest.cc 2020-12-29 14:18:14.000000000 +0000 @@ -0,0 +1,102 @@ + +#include +#include +#include +#include + + +#define NCHAN 7 +#define CHAN 3 +#define HLEN 48 +#define LINP 300 +#define LOUT 1380 // LINP * 4.6 + + +// Basic upsampling test of Resampler and Vresampler. +// +// We upsample by 4.6 and prefill the resamplers for zero delay. +// In the input signal we put a single sample with value 1 at +// offsets 0, 100 and 201. So in the output we expect a band- +// limited pulse (i.e. a windowed sinc() function) peaking at +// offsets 0, 460, and 924.6. +// +// Since we input exactly LINP + inpsize() - 1 samples, and the +// output buffer size is exactly the input size times the ratio, +// the input and output counters should both end up at 0. +// +// test1 > zz1 +// gnuplot +// set grid +// plot 'zz1' u 1:2 w l lt1, 'zz1' u 1:3 w l lt 2 +// +// Zoom in to verify the positions. + + +int main (int ac, char *av[]) +{ + int i; + Resampler R; + VResampler V; + float inp [NCHAN * LINP]; + float out1 [NCHAN * LOUT]; + float out2 [NCHAN * LOUT]; + float *p1, *p2; + + // Clear input array. + memset (inp, 0, NCHAN * LINP * sizeof (float)); + // Put a single sample at offsets 0 and 100. + inp [CHAN] = 1; + inp [CHAN + NCHAN * 100] = 1; + inp [CHAN + NCHAN * 201] = 1; + + // Setup for upsampling by 46 / 10. + R.setup (10, 46, NCHAN, HLEN); + V.setup (4.6, NCHAN, HLEN); + + // Prefill for zero delay. + R.out_count = LOUT; + R.out_data = out1; + R.inp_count = R.inpsize () / 2 - 1; + R.inp_data = 0; + R.process (); + fprintf (stderr, "inp_count = %6d, out_count = %6d\n", R.inp_count, R.out_count); + + // Process the entire input buffer. + R.inp_count = LINP; + R.inp_data = inp; + R.process (); + fprintf (stderr, "inp_count = %6d, out_count = %6d\n", R.inp_count, R.out_count); + + // Postfill to complete. + R.inp_count = R.inpsize () / 2; + R.inp_data = 0; + R.process (); + fprintf (stderr, "inp_count = %6d, out_count = %6d\n", R.inp_count, R.out_count); + + // Same for VResampler + V.out_count = LOUT; + V.out_data = out2; + V.inp_count = R.inpsize () / 2 - 1; + V.inp_data = 0; + V.process (); + + V.inp_count = LINP; + V.inp_data = inp; + V.process (); + + V.inp_count = R.inpsize () / 2; + V.inp_data = 0; + V.process (); + + // Write both the Resampler and VResampler outputs, they + // should be identical. + p1 = out1; + p2 = out2; + for (i = 0; i < LOUT; i++) + { + printf ("%5d %10.8lf %10.8lf\n", i, p1 [CHAN], p2 [CHAN]); + p1 += NCHAN; + p2 += NCHAN; + } + return 0; +} diff -Nru zita-resampler-1.6.2/test/vspeedtest.cc zita-resampler-1.8.0/test/vspeedtest.cc --- zita-resampler-1.6.2/test/vspeedtest.cc 1970-01-01 00:00:00.000000000 +0000 +++ zita-resampler-1.8.0/test/vspeedtest.cc 2020-12-28 16:01:51.000000000 +0000 @@ -0,0 +1,77 @@ +// ---------------------------------------------------------------------------- +// +// Copyright (C) 2020 Fons Adriaensen +// +// This program is free software; you can redistribute it and/or modify +// it under the terms of the GNU General Public License as published by +// the Free Software Foundation; either version 3 of the License, or +// (at your option) any later version. +// +// This program is distributed in the hope that it will be useful, +// but WITHOUT ANY WARRANTY; without even the implied warranty of +// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +// GNU General Public License for more details. +// +// You should have received a copy of the GNU General Public License +// along with this program. If not, see . +// +// ---------------------------------------------------------------------------- + + +#include +#include +#include +#include + + +#define LINP 10000 +#define LOUT 11000 +#define ITER 1000 + + +int main (int ac, char *av[]) +{ + int c, h, i; + VResampler R; + float *inp; + float *out; + timespec t0, t1; + int64_t ds, dn; + double dt; + + if (ac < 3) + { + fprintf (stderr, "vspeedtest \n"); + return 1; + } + c = atoi (av [1]); + h = atoi (av [2]); + + inp = new float [c * LINP]; + out = new float [c * LOUT]; + + for (i = 0; i < c * LINP; i++) inp [i] = i * 1e-4f; + + R.setup (480.0 / 441.0, c, h); + + clock_gettime (CLOCK_REALTIME, &t0); + for (i = 0; i < ITER; i++) + { + R.inp_count = LINP; + R.inp_data = inp; + R.out_count = LOUT; + R.out_data = out; + R.process (); + } + clock_gettime (CLOCK_REALTIME, &t1); + ds = t1.tv_sec - t0.tv_sec; + dn = t1.tv_nsec - t0.tv_nsec; + dt = ds + 1e-9 * dn; + printf ("44100 -> 48000, chan = %2d, hlen = %2d %8.3le input frames per second\n", + c, h, LINP * ITER / dt); + + delete[] inp; + delete[] out; + return 0; +} +