diff -Nru zita-resampler-1.6.2/debian/changelog zita-resampler-1.8.0/debian/changelog
--- zita-resampler-1.6.2/debian/changelog 2020-03-15 16:07:31.000000000 +0000
+++ zita-resampler-1.8.0/debian/changelog 2021-01-02 21:14:01.000000000 +0000
@@ -1,3 +1,24 @@
+zita-resampler (1.8.0-2) unstable; urgency=medium
+
+ * Fix the SSE2 excluding in d/rules
+
+ -- Dennis Braun Sat, 02 Jan 2021 22:14:01 +0100
+
+zita-resampler (1.8.0-1) unstable; urgency=medium
+
+ * New upstream version 1.8.0
+ * Refresh makefile patch
+ * Remove SSE2 patch, (mostly) applied by upstream
+ * Enable SSE2 only for amd64 and x32
+ * Fix spell error in the documentation
+ * Bump dh-compat to 13
+ * Bump S-V to 4.5.1
+ * Update d/copyright years
+ * Remove unneeded build flags in d/rules
+ * Include architecture.mk instead of calling dpkg-architecture
+
+ -- Dennis Braun Sat, 02 Jan 2021 17:14:38 +0100
+
zita-resampler (1.6.2-2) unstable; urgency=medium
* Fix FTCBFS (Closes: #950705) Thanks to Helmut Grohne!
diff -Nru zita-resampler-1.6.2/debian/control zita-resampler-1.8.0/debian/control
--- zita-resampler-1.6.2/debian/control 2020-03-15 11:48:03.000000000 +0000
+++ zita-resampler-1.8.0/debian/control 2021-01-01 21:12:58.000000000 +0000
@@ -6,9 +6,9 @@
Dennis Braun ,
Jaromír Mikeš
Build-Depends:
- debhelper-compat (= 12),
+ debhelper-compat (= 13),
libsndfile1-dev
-Standards-Version: 4.5.0
+Standards-Version: 4.5.1
Vcs-Git: https://salsa.debian.org/multimedia-team/zita-resampler.git
Vcs-Browser: https://salsa.debian.org/multimedia-team/zita-resampler
Homepage: https://kokkinizita.linuxaudio.org/linuxaudio/zita-resampler/resampler.html
diff -Nru zita-resampler-1.6.2/debian/copyright zita-resampler-1.8.0/debian/copyright
--- zita-resampler-1.6.2/debian/copyright 2020-02-03 19:32:46.000000000 +0000
+++ zita-resampler-1.8.0/debian/copyright 2021-01-01 21:13:48.000000000 +0000
@@ -4,13 +4,13 @@
Source: https://kokkinizita.linuxaudio.org/linuxaudio/downloads/index.html
Files: *
-Copyright: 2006-2018 Fons Adriaensen
+Copyright: 2006-2020 Fons Adriaensen
License: GPL-3+
Files: debian/*
Copyright: 2010-2012 Alessio Treglia
2012 Jaromír Mikeš
- 2020 Dennis Braun
+ 2020-2021 Dennis Braun
License: GPL-3+
License: GPL-3+
diff -Nru zita-resampler-1.6.2/debian/patches/01-makefile.patch zita-resampler-1.8.0/debian/patches/01-makefile.patch
--- zita-resampler-1.6.2/debian/patches/01-makefile.patch 2020-03-15 16:07:31.000000000 +0000
+++ zita-resampler-1.8.0/debian/patches/01-makefile.patch 2021-01-01 22:55:44.000000000 +0000
@@ -3,18 +3,20 @@
called 'resample' is provided by another package.
Author: Alessio Treglia
Author: Dennis Braun
-Last-Update: 2020-02-10
+Last-Update: 2021-01-01
Forwarded: not-needed
---- a/apps/Makefile
-+++ b/apps/Makefile
-@@ -22,17 +22,16 @@
+Index: zita-resampler/apps/Makefile
+===================================================================
+--- zita-resampler.orig/apps/Makefile
++++ zita-resampler/apps/Makefile
+@@ -22,17 +22,16 @@ PREFIX ?= /usr/local
BINDIR ?= $(PREFIX)/bin
MANDIR ?= /usr/share/man/man1
-VERSION = 1.6.0
-CPPFLAGS += -MMD -MP -DVERSION=\"$(VERSION)\"
-+VERSION = 1.6.2
++VERSION = 1.8.0
+CPPFLAGS += -MMD -MP -DVERSION=\"$(VERSION)\" -I../debian/tmp/usr/include -I../source
CXXFLAGS += -O2 -ffast-math -Wall
-CXXFLAGS += -march=native
@@ -26,16 +28,16 @@
ZRESAMPLE_O = zresample.o audiofile.o dither.o
-zresample: LDLIBS += -lzita-resampler -lsndfile -lrt
-+zresample: LDLIBS += ../source/libzita-resampler.so.1.6.2 -lsndfile -lrt
++zresample: LDLIBS += ../source/libzita-resampler.so.1.8.0 -lsndfile -lrt
zresample: $(ZRESAMPLE_O)
$(CXX) $(LDFLAGS) -o $@ $(ZRESAMPLE_O) $(LDLIBS)
$(ZRESAMPLE_O):
-@@ -40,34 +39,25 @@
+@@ -40,34 +39,25 @@ $(ZRESAMPLE_O):
ZRETUNE_O = zretune.o audiofile.o dither.o
-zretune: LDLIBS += -lzita-resampler -lsndfile -lrt
-+zretune: LDLIBS += ../source/libzita-resampler.so.1.6.2 -lsndfile -lrt
++zretune: LDLIBS += ../source/libzita-resampler.so.1.8.0 -lsndfile -lrt
zretune: $(ZRETUNE_O)
$(CXX) $(LDFLAGS) -o $@ $(ZRETUNE_O) $(LDLIBS)
$(ZRETUNE_O):
@@ -73,17 +75,21 @@
- /bin/rm -f *~ *.o *.a *.d *.so *.gz zresample zretune
+ /bin/rm -f *~ *.o *.a *.d *.so zresample zretune
---- a/source/Makefile
-+++ b/source/Makefile
-@@ -33,7 +33,6 @@
+Index: zita-resampler/source/Makefile
+===================================================================
+--- zita-resampler.orig/source/Makefile
++++ zita-resampler/source/Makefile
+@@ -31,9 +31,7 @@ VERSION = $(MAJVERS).$(MINVERS)
+ DISTDIR = zita-resampler-$(VERSION)
CPPFLAGS += -I. -D_REENTRANT -D_POSIX_PTHREAD_SEMANTICS
+-CPPFLAGS += -DENABLE_SSE2
CXXFLAGS += -Wall -fPIC -O2 -ffast-math
-CXXFLAGS += -march=native
LDFLAGS +=
LDLIBS +=
-@@ -48,7 +47,7 @@
+@@ -48,7 +46,7 @@ ZITA-RESAMPLER_H = zita-resampler/resamp
$(ZITA-RESAMPLER_MIN): $(ZITA-RESAMPLER_O)
@@ -92,7 +98,7 @@
$(ZITA-RESAMPLER_O): $(ZITA-RESAMPLER_H)
-@@ -58,8 +57,8 @@
+@@ -58,8 +56,8 @@ install: $(ZITA-RESAMPLER_MIN)
install -d $(DESTDIR)$(LIBDIR)
install -m 644 $(ZITA-RESAMPLER_H) $(DESTDIR)$(INCDIR)/zita-resampler
install -m 755 $(ZITA-RESAMPLER_MIN) $(DESTDIR)$(LIBDIR)
diff -Nru zita-resampler-1.6.2/debian/patches/fix_spell_errors.patch zita-resampler-1.8.0/debian/patches/fix_spell_errors.patch
--- zita-resampler-1.6.2/debian/patches/fix_spell_errors.patch 1970-01-01 00:00:00.000000000 +0000
+++ zita-resampler-1.8.0/debian/patches/fix_spell_errors.patch 2021-01-01 21:09:24.000000000 +0000
@@ -0,0 +1,75 @@
+Description: Fix spell errors
+Author: Dennis Braun
+Forwarded: not-yet
+
+Index: zita-resampler-build/apps/zretune.1
+===================================================================
+--- zita-resampler-build.orig/apps/zretune.1
++++ zita-resampler-build/apps/zretune.1
+@@ -8,7 +8,7 @@ zretune \- resample an audio file in ord
+ .B zretune
+ resamples an audio file by a the inverse of a ratio expressed in cents,
+ without changing the nominal sample rate. The result is to change the
+-musical pitch and lenght of the file. Input can be any audio file
++musical pitch and length of the file. Input can be any audio file
+ readable by the libsndfile library. The output file type is either
+ WAV, WAVEX, CAF, AIFF or FLAC.
+ .SH OPTIONS
+Index: zita-resampler-build/docs/resampler.html
+===================================================================
+--- zita-resampler-build.orig/docs/resampler.html
++++ zita-resampler-build/docs/resampler.html
+@@ -261,7 +261,7 @@ input samples at the start and end of th
+ easy to add such padding, and doing this is left entirely up to the user.
+
+ int   Resampler::setup (unsigned int   fs_inp, unsigned int fs_out, unsigned int nchan, unsigned int hlen);
+
+ Description: Configures the object for a combination of input / output sample rates, number
+-of channels, and filter lenght.
If the parameters are OK, creates the filter coefficient tables
++of channels, and filter length.
If the parameters are OK, creates the filter coefficient tables
+ or re-uses existing ones, allocates some internal resources, and returns via
+ reset ().
+
+@@ -381,7 +381,7 @@ with a, b integer and b ≤ 1
+
+ nchan: Number of channels, must not be zero.
+
+-hlen: Half the lenght of the filter expressed in samples at the lower of
++hlen: Half the length of the filter expressed in samples at the lower of
+ input and output rates. This parameter determines the 'quality' as explained
+ here. For any fixed combination of the other parameters,
+ cpu load will be roughly proportional to hlen. The valid range is
+@@ -402,7 +402,7 @@ even if they are not shared.
+
int   VResampler::setup (double ratio, unsigned int nchan, unsigned int hlen);
+
+ Description: Configures the object for a combination of resampling ratio, number of channels,
+-and filter lenght.
If the parameters are OK, creates the filter coefficient tables or re-uses
++and filter length.
If the parameters are OK, creates the filter coefficient tables or re-uses
+ existing ones, allocates some internal resources, and returns via
+ reset ().
+
+@@ -413,7 +413,7 @@ existing ones, allocates some internal r
+
+ nchan: Number of channels, must not be zero.
+
+-hlen: Half the lenght of the filter expressed in samples at the lower of
++hlen: Half the length of the filter expressed in samples at the lower of
+ the input and output rates. This parameter determines the 'quality' as explained
+ here. For any fixed combination of the other parameters,
+ cpu load will be roughly proportional to hlen. The valid range is
+@@ -482,7 +482,7 @@ this number of channels in interleaved f
+
+ Description: Accessor.
+
+-Returns: If the resampler is configured, the lenght of the
++Returns: If the resampler is configured, the length of the
+ finite impulse filter expressed in samples at the input sample rate,
+ or zero otherwise. This value may be used to determine the number of
+ silence samples to insert at the start and end when resampling e.g.
diff -Nru zita-resampler-1.6.2/debian/patches/series zita-resampler-1.8.0/debian/patches/series
--- zita-resampler-1.6.2/debian/patches/series 2020-02-03 19:48:27.000000000 +0000
+++ zita-resampler-1.8.0/debian/patches/series 2021-01-01 21:29:58.000000000 +0000
@@ -1,2 +1,2 @@
01-makefile.patch
-zita-resampler-sse.diff
+fix_spell_errors.patch
diff -Nru zita-resampler-1.6.2/debian/patches/zita-resampler-sse.diff zita-resampler-1.8.0/debian/patches/zita-resampler-sse.diff
--- zita-resampler-1.6.2/debian/patches/zita-resampler-sse.diff 2020-02-03 19:48:27.000000000 +0000
+++ zita-resampler-1.8.0/debian/patches/zita-resampler-sse.diff 1970-01-01 00:00:00.000000000 +0000
@@ -1,422 +0,0 @@
-Description: SSE-optimizing resampling of stereo signals.
- https://bugs.debian.org/cgi-bin/bugreport.cgi?bug=832095
-Author: Steinar H. Gunderson
-Last-Update: 2016-08-29
-Forwarded: yes
-
---- a/source/resampler.cc
-+++ b/source/resampler.cc
-@@ -24,6 +24,10 @@
- #include
- #include
-
-+#ifdef __SSE2__
-+#include
-+#endif
-+
-
- static unsigned int gcd (unsigned int a, unsigned int b)
- {
-@@ -47,6 +51,118 @@
- return 1;
- }
-
-+#ifdef __SSE2__
-+
-+static inline float calc_mono_sample_sse (unsigned int hl,
-+ const float *c1,
-+ const float *c2,
-+ const float *q1,
-+ const float *q2)
-+{
-+ unsigned int i;
-+ __m128 denorm, s, w1, w2, shuf;
-+
-+ denorm = _mm_set1_ps (1e-20f);
-+ s = denorm;
-+ for (i = 0; i < hl; i += 4)
-+ {
-+ q2 -= 4;
-+
-+ // s += *q1 * c1 [i];
-+ w1 = _mm_loadu_ps (&c1 [i]);
-+ s = _mm_add_ps (s, _mm_mul_ps (_mm_loadu_ps (q1), w1));
-+
-+ // s += *q2 * c2 [i];
-+ w2 = _mm_loadu_ps (&c2 [i]);
-+ s = _mm_add_ps (s, _mm_mul_ps (_mm_loadu_ps (q2), _mm_shuffle_ps (w2, w2, _MM_SHUFFLE (0, 1, 2, 3))));
-+
-+ q1 += 4;
-+ }
-+ s = _mm_sub_ps (s, denorm);
-+
-+ // Add all the elements of s together into one. Adapted from
-+ // http://stackoverflow.com/questions/6996764/fastest-way-to-do-horizontal-float-vector-sum-on-x86
-+ shuf = _mm_shuffle_ps (s, s, _MM_SHUFFLE (2, 3, 0, 1));
-+ s = _mm_add_ps (s, shuf);
-+ s = _mm_add_ss (s, _mm_movehl_ps (shuf, s));
-+ return _mm_cvtss_f32 (s);
-+}
-+
-+// Note: This writes four floats instead of two (the last two are garbage).
-+// The caller will need to make sure there is room for all four.
-+static inline void calc_stereo_sample_sse (unsigned int hl,
-+ const float *c1,
-+ const float *c2,
-+ const float *q1,
-+ const float *q2,
-+ float *out_data)
-+{
-+ unsigned int i;
-+ __m128 denorm, s, w1, w2;
-+
-+ denorm = _mm_set1_ps (1e-20f);
-+ s = denorm;
-+ for (i = 0; i < hl; i += 4)
-+ {
-+ q2 -= 8;
-+
-+ // s += *q1 * c1 [i];
-+ w1 = _mm_loadu_ps (&c1 [i]);
-+ s = _mm_add_ps (s, _mm_mul_ps (_mm_loadu_ps (q1), _mm_unpacklo_ps (w1, w1)));
-+ s = _mm_add_ps (s, _mm_mul_ps (_mm_loadu_ps (q1 + 4), _mm_unpackhi_ps (w1, w1)));
-+
-+ // s += *q2 * c2 [i];
-+ w2 = _mm_loadu_ps (&c2 [i]);
-+ s = _mm_add_ps (s, _mm_mul_ps (_mm_loadu_ps (q2 + 4), _mm_shuffle_ps (w2, w2, _MM_SHUFFLE (0, 0, 1, 1))));
-+ s = _mm_add_ps (s, _mm_mul_ps (_mm_loadu_ps (q2), _mm_shuffle_ps (w2, w2, _MM_SHUFFLE (2, 2, 3, 3))));
-+
-+ q1 += 8;
-+ }
-+ s = _mm_sub_ps (s, denorm);
-+ s = _mm_add_ps (s, _mm_shuffle_ps (s, s, _MM_SHUFFLE (1, 0, 3, 2)));
-+
-+ _mm_storeu_ps (out_data, s);
-+}
-+
-+static inline void calc_quad_sample_sse (int hl,
-+ int nchan,
-+ const float *c1,
-+ const float *c2,
-+ const float *q1,
-+ const float *q2,
-+ float *out_data)
-+{
-+ int i;
-+ __m128 denorm, s, w1, w2;
-+
-+ denorm = _mm_set1_ps (1e-20f);
-+ s = denorm;
-+ for (i = 0; i < hl; i += 4)
-+ {
-+ q2 -= 4 * nchan;
-+
-+ // s += *p1 * _c1 [i];
-+ w1 = _mm_loadu_ps (&c1 [i]);
-+ s = _mm_add_ps (s, _mm_mul_ps (_mm_loadu_ps (q1), _mm_shuffle_ps (w1, w1, _MM_SHUFFLE (0, 0, 0, 0))));
-+ s = _mm_add_ps (s, _mm_mul_ps (_mm_loadu_ps (q1 + nchan), _mm_shuffle_ps (w1, w1, _MM_SHUFFLE (1, 1, 1, 1))));
-+ s = _mm_add_ps (s, _mm_mul_ps (_mm_loadu_ps (q1 + 2 * nchan), _mm_shuffle_ps (w1, w1, _MM_SHUFFLE (2, 2, 2, 2))));
-+ s = _mm_add_ps (s, _mm_mul_ps (_mm_loadu_ps (q1 + 3 * nchan), _mm_shuffle_ps (w1, w1, _MM_SHUFFLE (3, 3, 3, 3))));
-+
-+ // s += *p2 * _c2 [i];
-+ w2 = _mm_loadu_ps (&c2 [i]);
-+ s = _mm_add_ps (s, _mm_mul_ps (_mm_loadu_ps (q2 + 3 * nchan), _mm_shuffle_ps (w2, w2, _MM_SHUFFLE (0, 0, 0, 0))));
-+ s = _mm_add_ps (s, _mm_mul_ps (_mm_loadu_ps (q2 + 2 * nchan), _mm_shuffle_ps (w2, w2, _MM_SHUFFLE (1, 1, 1, 1))));
-+ s = _mm_add_ps (s, _mm_mul_ps (_mm_loadu_ps (q2 + nchan), _mm_shuffle_ps (w2, w2, _MM_SHUFFLE (2, 2, 2, 2))));
-+ s = _mm_add_ps (s, _mm_mul_ps (_mm_loadu_ps (q2), _mm_shuffle_ps (w2, w2, _MM_SHUFFLE (3, 3, 3, 3))));
-+
-+ q1 += 4 * nchan;
-+ }
-+ s = _mm_sub_ps (s, denorm);
-+
-+ _mm_storeu_ps (out_data, s);
-+}
-+#endif
-+
-
- Resampler::Resampler (void) :
- _table (0),
-@@ -213,18 +329,42 @@
- {
- float *c1 = _table->_ctab + hl * ph;
- float *c2 = _table->_ctab + hl * (np - ph);
-- for (c = 0; c < _nchan; c++)
-+#ifdef __SSE2__
-+ if ((hl % 4) == 0 && _nchan == 1)
-+ {
-+ *out_data++ = calc_mono_sample_sse (hl, c1, c2, p1, p2);
-+ }
-+ else if ((hl % 4) == 0 && _nchan == 2)
- {
-- float *q1 = p1 + c;
-- float *q2 = p2 + c;
-- float s = 1e-20f;
-- for (i = 0; i < hl; i++)
-+ if (out_count >= 2)
-+ {
-+ calc_stereo_sample_sse (hl, c1, c2, p1, p2, out_data);
-+ }
-+ else
-+ {
-+ float tmp[4];
-+ calc_stereo_sample_sse (hl, c1, c2, p1, p2, tmp);
-+ out_data[0] = tmp[0];
-+ out_data[1] = tmp[1];
-+ }
-+ out_data += 2;
-+ }
-+ else
-+#endif
-+ {
-+ for (c = 0; c < _nchan; c++)
- {
-- q2 -= _nchan;
-- s += *q1 * c1 [i] + *q2 * c2 [i];
-- q1 += _nchan;
-+ float *q1 = p1 + c;
-+ float *q2 = p2 + c;
-+ float s = 1e-20f;
-+ for (i = 0; i < hl; i++)
-+ {
-+ q2 -= _nchan;
-+ s += *q1 * c1 [i] + *q2 * c2 [i];
-+ q1 += _nchan;
-+ }
-+ *out_data++ = s - 1e-20f;
- }
-- *out_data++ = s - 1e-20f;
- }
- }
- else
-diff -ur orig/zita-resampler-1.3.0/source/vresampler.cc zita-resampler-1.3.0/source/vresampler.cc
---- orig/zita-resampler-1.3.0/source/vresampler.cc 2012-10-26 22:58:55.000000000 +0200
-+++ zita-resampler-1.3.0/source/vresampler.cc 2016-09-05 00:33:53.907511211 +0200
-@@ -25,6 +25,152 @@
- #include
-
-
-+#ifdef __SSE2__
-+
-+#include
-+
-+static inline float calc_mono_sample_sse (int hl,
-+ float b,
-+ const float *p1,
-+ const float *p2,
-+ const float *q1,
-+ const float *q2)
-+{
-+ int i;
-+ __m128 denorm, bs, s, c1, c2, w1, w2, shuf;
-+
-+ denorm = _mm_set1_ps (1e-25f);
-+ bs = _mm_set1_ps (b);
-+ s = denorm;
-+ for (i = 0; i < hl; i += 4)
-+ {
-+ p2 -= 4;
-+
-+ // _c1 [i] = q1 [i] + b * (q1 [i + hl] - q1 [i]);
-+ w1 = _mm_loadu_ps (&q1 [i]);
-+ w2 = _mm_loadu_ps (&q1 [i + hl]);
-+ c1 = _mm_add_ps (w1, _mm_mul_ps(bs, _mm_sub_ps (w2, w1)));
-+
-+ // _c2 [i] = q2 [i] + b * (q2 [i - hl] - q2 [i]);
-+ w1 = _mm_loadu_ps (&q2 [i]);
-+ w2 = _mm_loadu_ps (&q2 [i - hl]);
-+ c2 = _mm_add_ps (w1, _mm_mul_ps(bs, _mm_sub_ps (w2, w1)));
-+
-+ // s += *p1 * _c1 [i];
-+ s = _mm_add_ps (s, _mm_mul_ps (_mm_loadu_ps (p1), c1));
-+
-+ // s += *p2 * _c2 [i];
-+ s = _mm_add_ps (s, _mm_mul_ps (_mm_loadu_ps (p2), _mm_shuffle_ps (c2, c2, _MM_SHUFFLE (0, 1, 2, 3))));
-+
-+ p1 += 4;
-+ }
-+ s = _mm_sub_ps (s, denorm);
-+
-+ // Add all the elements of s together into one. Adapted from
-+ // http://stackoverflow.com/questions/6996764/fastest-way-to-do-horizontal-float-vector-sum-on-x86
-+ shuf = _mm_shuffle_ps (s, s, _MM_SHUFFLE (2, 3, 0, 1));
-+ s = _mm_add_ps (s, shuf);
-+ s = _mm_add_ss (s, _mm_movehl_ps (shuf, s));
-+ return _mm_cvtss_f32 (s);
-+}
-+
-+// Note: This writes four floats instead of two (the last two are garbage).
-+// The caller will need to make sure there is room for all four.
-+static inline void calc_stereo_sample_sse (int hl,
-+ float b,
-+ const float *p1,
-+ const float *p2,
-+ const float *q1,
-+ const float *q2,
-+ float *out_data)
-+{
-+ int i;
-+ __m128 denorm, bs, s, c1, c2, w1, w2;
-+
-+ denorm = _mm_set1_ps (1e-25f);
-+ bs = _mm_set1_ps (b);
-+ s = denorm;
-+ for (i = 0; i < hl; i += 4)
-+ {
-+ p2 -= 8;
-+
-+ // _c1 [i] = q1 [i] + b * (q1 [i + hl] - q1 [i]);
-+ w1 = _mm_loadu_ps (&q1 [i]);
-+ w2 = _mm_loadu_ps (&q1 [i + hl]);
-+ c1 = _mm_add_ps (w1, _mm_mul_ps(bs, _mm_sub_ps (w2, w1)));
-+
-+ // _c2 [i] = q2 [i] + b * (q2 [i - hl] - q2 [i]);
-+ w1 = _mm_loadu_ps (&q2 [i]);
-+ w2 = _mm_loadu_ps (&q2 [i - hl]);
-+ c2 = _mm_add_ps (w1, _mm_mul_ps(bs, _mm_sub_ps (w2, w1)));
-+
-+ // s += *p1 * _c1 [i];
-+ s = _mm_add_ps (s, _mm_mul_ps (_mm_loadu_ps (p1), _mm_unpacklo_ps (c1, c1)));
-+ s = _mm_add_ps (s, _mm_mul_ps (_mm_loadu_ps (p1 + 4), _mm_unpackhi_ps (c1, c1)));
-+
-+ // s += *p2 * _c2 [i];
-+ s = _mm_add_ps (s, _mm_mul_ps (_mm_loadu_ps (p2 + 4), _mm_shuffle_ps (c2, c2, _MM_SHUFFLE (0, 0, 1, 1))));
-+ s = _mm_add_ps (s, _mm_mul_ps (_mm_loadu_ps (p2), _mm_shuffle_ps (c2, c2, _MM_SHUFFLE (2, 2, 3, 3))));
-+
-+ p1 += 8;
-+ }
-+ s = _mm_sub_ps (s, denorm);
-+ s = _mm_add_ps (s, _mm_shuffle_ps (s, s, _MM_SHUFFLE (1, 0, 3, 2)));
-+
-+ _mm_storeu_ps (out_data, s);
-+}
-+
-+static inline void calc_quad_sample_sse (int hl,
-+ int nchan,
-+ float b,
-+ const float *p1,
-+ const float *p2,
-+ const float *q1,
-+ const float *q2,
-+ float *out_data)
-+{
-+ int i;
-+ __m128 denorm, bs, s, c1, c2, w1, w2;
-+
-+ denorm = _mm_set1_ps (1e-25f);
-+ bs = _mm_set1_ps (b);
-+ s = denorm;
-+ for (i = 0; i < hl; i += 4)
-+ {
-+ p2 -= 4 * nchan;
-+
-+ // _c1 [i] = q1 [i] + b * (q1 [i + hl] - q1 [i]);
-+ w1 = _mm_loadu_ps (&q1 [i]);
-+ w2 = _mm_loadu_ps (&q1 [i + hl]);
-+ c1 = _mm_add_ps (w1, _mm_mul_ps(bs, _mm_sub_ps (w2, w1)));
-+
-+ // _c2 [i] = q2 [i] + b * (q2 [i - hl] - q2 [i]);
-+ w1 = _mm_loadu_ps (&q2 [i]);
-+ w2 = _mm_loadu_ps (&q2 [i - hl]);
-+ c2 = _mm_add_ps (w1, _mm_mul_ps(bs, _mm_sub_ps (w2, w1)));
-+
-+ // s += *p1 * _c1 [i];
-+ s = _mm_add_ps (s, _mm_mul_ps (_mm_loadu_ps (p1), _mm_shuffle_ps (c1, c1, _MM_SHUFFLE (0, 0, 0, 0))));
-+ s = _mm_add_ps (s, _mm_mul_ps (_mm_loadu_ps (p1 + nchan), _mm_shuffle_ps (c1, c1, _MM_SHUFFLE (1, 1, 1, 1))));
-+ s = _mm_add_ps (s, _mm_mul_ps (_mm_loadu_ps (p1 + 2 * nchan), _mm_shuffle_ps (c1, c1, _MM_SHUFFLE (2, 2, 2, 2))));
-+ s = _mm_add_ps (s, _mm_mul_ps (_mm_loadu_ps (p1 + 3 * nchan), _mm_shuffle_ps (c1, c1, _MM_SHUFFLE (3, 3, 3, 3))));
-+
-+ // s += *p2 * _c2 [i];
-+ s = _mm_add_ps (s, _mm_mul_ps (_mm_loadu_ps (p2 + 3 * nchan), _mm_shuffle_ps (c2, c2, _MM_SHUFFLE (0, 0, 0, 0))));
-+ s = _mm_add_ps (s, _mm_mul_ps (_mm_loadu_ps (p2 + 2 * nchan), _mm_shuffle_ps (c2, c2, _MM_SHUFFLE (1, 1, 1, 1))));
-+ s = _mm_add_ps (s, _mm_mul_ps (_mm_loadu_ps (p2 + nchan), _mm_shuffle_ps (c2, c2, _MM_SHUFFLE (2, 2, 2, 2))));
-+ s = _mm_add_ps (s, _mm_mul_ps (_mm_loadu_ps (p2), _mm_shuffle_ps (c2, c2, _MM_SHUFFLE (3, 3, 3, 3))));
-+
-+ p1 += 4 * nchan;
-+ }
-+ s = _mm_sub_ps (s, denorm);
-+
-+ _mm_storeu_ps (out_data, s);
-+}
-+
-+#endif
-+
-+
- VResampler::VResampler (void) :
- _table (0),
- _nchan (0),
-@@ -163,7 +309,7 @@
-
- int VResampler::process (void)
- {
-- unsigned int k, np, in, nr, n, c;
-+ unsigned int j, k, np, in, nr, n, c;
- int i, hl, nz;
- double ph, dp, dd;
- float a, b, *p1, *p2, *q1, *q2;
-@@ -212,23 +358,55 @@
- a = 1.0f - b;
- q1 = _table->_ctab + hl * k;
- q2 = _table->_ctab + hl * (np - k);
-- for (i = 0; i < hl; i++)
-+#ifdef __SSE2__
-+ if ((hl % 4) == 0 && _nchan == 1)
-+ {
-+ *out_data++ = calc_mono_sample_sse (hl, b, p1, p2, q1, q2);
-+ }
-+ else if ((hl % 4) == 0 && _nchan == 2)
- {
-- _c1 [i] = a * q1 [i] + b * q1 [i + hl];
-- _c2 [i] = a * q2 [i] + b * q2 [i - hl];
-+ if (out_count >= 2)
-+ {
-+ calc_stereo_sample_sse (hl, b, p1, p2, q1, q2, out_data);
-+ }
-+ else
-+ {
-+ float tmp[4];
-+ calc_stereo_sample_sse (hl, b, p1, p2, q1, q2, tmp);
-+ out_data[0] = tmp[0];
-+ out_data[1] = tmp[1];
-+ }
-+ out_data += 2;
-+ }
-+ else if ((hl % 4) == 0 && (_nchan % 4) == 0)
-+ {
-+ for (j = 0; j < _nchan; j += 4)
-+ {
-+ calc_quad_sample_sse (hl, _nchan, b, p1 + j, p2 + j, q1, q2, out_data + j);
-+ }
-+ out_data += _nchan;
- }
-- for (c = 0; c < _nchan; c++)
-+ else
-+#endif
- {
-- q1 = p1 + c;
-- q2 = p2 + c;
-- a = 1e-25f;
- for (i = 0; i < hl; i++)
- {
-- q2 -= _nchan;
-- a += *q1 * _c1 [i] + *q2 * _c2 [i];
-- q1 += _nchan;
-+ _c1 [i] = a * q1 [i] + b * q1 [i + hl];
-+ _c2 [i] = a * q2 [i] + b * q2 [i - hl];
-+ }
-+ for (c = 0; c < _nchan; c++)
-+ {
-+ q1 = p1 + c;
-+ q2 = p2 + c;
-+ a = 1e-25f;
-+ for (i = 0; i < hl; i++)
-+ {
-+ q2 -= _nchan;
-+ a += *q1 * _c1 [i] + *q2 * _c2 [i];
-+ q1 += _nchan;
-+ }
-+ *out_data++ = a - 1e-25f;
- }
-- *out_data++ = a - 1e-25f;
- }
- }
- else
diff -Nru zita-resampler-1.6.2/debian/rules zita-resampler-1.8.0/debian/rules
--- zita-resampler-1.6.2/debian/rules 2020-03-15 16:07:31.000000000 +0000
+++ zita-resampler-1.8.0/debian/rules 2021-01-02 21:14:01.000000000 +0000
@@ -1,13 +1,16 @@
#!/usr/bin/make -f
-DEB_HOST_MULTIARCH ?= $(shell dpkg-architecture -qDEB_HOST_MULTIARCH)
+include /usr/share/dpkg/architecture.mk
export DEB_BUILD_MAINT_OPTIONS = hardening=+bindnow
-export DEB_LDFLAGS_MAINT_APPEND = -Wl,--as-needed
export PREFIX=/usr
export LIBDIR=$(PREFIX)/lib/$(DEB_HOST_MULTIARCH)
export DESTDIR=$(CURDIR)/debian/tmp
+ifeq ($(DEB_HOST_ARCH),$(filter $(DEB_HOST_ARCH),amd64 x32))
+ CPPFLAGS += -DENABLE_SSE2
+endif
+
%:
dh $@ -Dsource -Dapps
diff -Nru zita-resampler-1.6.2/docs/resampler.html zita-resampler-1.8.0/docs/resampler.html
--- zita-resampler-1.6.2/docs/resampler.html 2018-08-24 18:41:47.000000000 +0000
+++ zita-resampler-1.8.0/docs/resampler.html 2020-12-29 11:12:36.000000000 +0000
@@ -28,6 +28,11 @@
high-quality sample rate conversion.
+From version 1.8.0, libzita-resampler can be compiled to use SSE2 instructions.
+This will reduce CPU load by a factor of at least 2. An ARM NEON option will be
+provided in some future release.
+
+
The library operates on signals represented in single-precision floating point
format. For multichannel operation both the input and output signals are
assumed to be stored as interleaved samples.
@@ -54,14 +59,14 @@
The Resampler class performs resampling at a fixed ratio F_out / F_in
-which is required to be ≥ 1/16 and be reducible to the form b / a
+which is required to be ≥ 1/64 and be reducible to the form b / a
with a, b integer and b ≤ 1000. This includes all the 'standard'
ratios, e.g. 96000 / 44100 = 320 / 147. These restrictions allow for a more efficient
implementation.
The VResampler class provides an arbitrary ratio r in the range
-1/16 ≤ r ≤ 64 and which can variable within a range of 0.95 to
+1/64 ≤ r ≤ 64 and which can variable within a range of 0.95 to
16.0 w.r.t. the originally configured one. The lower limit here is necessary
because this class still uses a fixed multiphase filter, with only the phase step
being variable. This class was developed for converting between two nominally fixed
@@ -225,16 +230,16 @@
again, or provide a new one, and re-initialise the input count and pointer.
If at that time out_count is not zero, you can either leave the output
parameters as they are for the next call to process (), or you could
-empty the part of the output buffer that has been filled and re-use it from
-the start, or provide a completely different one.
+read the part of the output buffer that has been filled and then re-use it
+from the start, or provide a completely different one.
The same applies to the input buffer when it is not empty on return of
-process (): it can be left alone or be replaced. A number of input
+process (): it can be left alone, re-used or replaced. A number of input
samples is stored internally between process () calls as part of the
-resampler state, but this never includes samples that have not yet been used.
-So you can 'revise' the input data, starting from the frame pointed to by the
-returned inp_data, up to the last moment.
+resampler state, but this never includes samples that have not yet been used
+to compute an output sample. So you can 'revise' the input data, starting from
+the frame pointed to by the returned inp_data, up to the last moment.
All this means that both classes will interface easily with fixed input and
@@ -272,9 +277,9 @@
- inserting k / 2 zero-valued samples at the end will ensure
that the last output sample produced will correspond to a position as close
-as possible but not past the last real input sample,
+as possible but not past the last input sample,
- inserting k - 1 zero valued samples will ensure that the output
-includes the full filter response for the last real input sample.
+includes the full filter response for the last input sample.
@@ -301,9 +306,9 @@
input data.
-The 'resample' application supplied with the library sources provides
-an example of how to use the Resampler class. For an example
-using VResampler you can have a look at zita_a2j and zita_ja2.
+The test programs supplied with the library sources provide some
+examples of how to use both classes. For an another example using
+VResampler you can have a look at zita_ajbridge or zita_njbridge.
@@ -371,7 +376,7 @@
fs_inp, fs_out: The input and output sample rates. The ratio fs_out
-/ fs_inp must be ≥ 1/16 and reducible to the form b / a
+/ fs_inp must be ≥ 1/64 and reducible to the form b / a
with a, b integer and b ≤ 1000.
nchan: Number of channels, must not be zero.
@@ -404,7 +409,7 @@
Parameters:
-ratio: The resampling ratio wich must be between 1/16 and 64.
+ratio: The resampling ratio wich must be between 1/64 and 64.
nchan: Number of channels, must not be zero.
diff -Nru zita-resampler-1.6.2/README zita-resampler-1.8.0/README
--- zita-resampler-1.6.2/README 2018-08-24 18:41:47.000000000 +0000
+++ zita-resampler-1.8.0/README 2020-12-29 14:25:11.000000000 +0000
@@ -3,6 +3,14 @@
see the 'docs' directory.
+Release 1.8.0 (30/12/2020)
+---------------------------
+
+* Added SSE2 support for Resampler and VResampler.
+ This is enabled by default in the Makefile.
+* Cleanup and some minor bug fixes.
+
+
Release 1.6.2 (25/08/2018)
---------------------------
diff -Nru zita-resampler-1.6.2/source/Makefile zita-resampler-1.8.0/source/Makefile
--- zita-resampler-1.6.2/source/Makefile 2018-08-24 18:41:47.000000000 +0000
+++ zita-resampler-1.8.0/source/Makefile 2020-12-28 20:09:24.000000000 +0000
@@ -1,6 +1,6 @@
# ----------------------------------------------------------------------------
#
-# Copyright (C) 2006-2018 Fons Adriaensen
+# Copyright (C) 2006-2020 Fons Adriaensen
#
# This program is free software; you can redistribute it and/or modify
# it under the terms of the GNU General Public License as published by
@@ -26,12 +26,12 @@
LIBDIR ?= $(PREFIX)/lib$(SUFFIX)
MAJVERS = 1
-MINVERS = 6.2
+MINVERS = 8.0
VERSION = $(MAJVERS).$(MINVERS)
DISTDIR = zita-resampler-$(VERSION)
-
CPPFLAGS += -I. -D_REENTRANT -D_POSIX_PTHREAD_SEMANTICS
+CPPFLAGS += -DENABLE_SSE2
CXXFLAGS += -Wall -fPIC -O2 -ffast-math
CXXFLAGS += -march=native
LDFLAGS +=
diff -Nru zita-resampler-1.6.2/source/resampler.cc zita-resampler-1.8.0/source/resampler.cc
--- zita-resampler-1.6.2/source/resampler.cc 2018-08-24 18:41:47.000000000 +0000
+++ zita-resampler-1.8.0/source/resampler.cc 2020-12-29 13:22:10.000000000 +0000
@@ -1,6 +1,6 @@
// ----------------------------------------------------------------------------
//
-// Copyright (C) 2006-2012 Fons Adriaensen
+// Copyright (C) 2006-2020 Fons Adriaensen
//
// This program is free software; you can redistribute it and/or modify
// it under the terms of the GNU General Public License as published by
@@ -22,7 +22,14 @@
#include
#include
#include
-#include
+
+#undef ENABLE_VEC4
+#if defined(ENABLE_SSE2)
+# define ENABLE_VEC4
+# include
+#endif
+
+#include "zita-resampler/resampler.h"
static unsigned int gcd (unsigned int a, unsigned int b)
@@ -31,18 +38,18 @@
if (b == 0) return a;
while (1)
{
- if (a > b)
- {
- a = a % b;
- if (a == 0) return b;
- if (a == 1) return 1;
- }
- else
- {
- b = b % a;
- if (b == 0) return a;
- if (b == 1) return 1;
- }
+ if (a > b)
+ {
+ a = a % b;
+ if (a == 0) return b;
+ if (a == 1) return 1;
+ }
+ else
+ {
+ b = b % a;
+ if (b == 0) return a;
+ if (b == 1) return 1;
+ }
}
return 1;
}
@@ -79,41 +86,54 @@
unsigned int hlen,
double frel)
{
- unsigned int g, h, k, n, s;
+ unsigned int np, dp, mi, hl, n;
double r;
- float *B = 0;
Resampler_table *T = 0;
- k = s = 0;
- if (fs_inp && fs_out && nchan)
+ if (!fs_inp || !fs_out || !nchan)
{
- r = (double) fs_out / (double) fs_inp;
- g = gcd (fs_out, fs_inp);
- n = fs_out / g;
- s = fs_inp / g;
- if ((16 * r >= 1) && (n <= 1000))
- {
- h = hlen;
- k = 250;
- if (r < 1)
- {
- frel *= r;
- h = (unsigned int)(ceil (h / r));
- k = (unsigned int)(ceil (k / r));
- }
- T = Resampler_table::create (frel, h, n);
- B = new float [nchan * (2 * h - 1 + k)];
- }
+ clear ();
+ return 1;
}
+
+ r = (double) fs_out / (double) fs_inp;
+ n = gcd (fs_out, fs_inp);
+ np = fs_out / n;
+ dp = fs_inp / n;
+ if ((64 * r < 1.0) || (np > 1000))
+ {
+ clear ();
+ return 1;
+ }
+
+ hl = hlen;
+ mi = 32;
+ if (r < 1.0)
+ {
+ frel *= r;
+ hl = (unsigned int)(ceil (hl / r));
+ mi = (unsigned int)(ceil (mi / r));
+ }
+#ifdef ENABLE_VEC4
+ hl = (hl + 3) & ~3;
+#endif
+ T = Resampler_table::create (frel, hl, np);
+
clear ();
if (T)
{
- _table = T;
- _buff = B;
- _nchan = nchan;
- _inmax = k;
- _pstep = s;
- return reset ();
+ _table = T;
+ n = nchan * (2 * hl + mi);
+#ifdef ENABLE_VEC4
+ posix_memalign ((void **)(&_buff), 16, n * sizeof (float));
+ memset (_buff, 0, n * sizeof (float));
+#else
+ _buff = new float [n];
+#endif
+ _nchan = nchan;
+ _inmax = mi;
+ _pstep = dp;
+ return reset ();
}
else return 1;
}
@@ -122,7 +142,11 @@
void Resampler::clear (void)
{
Resampler_table::destroy (_table);
+#ifdef ENABLE_VEC4
+ free (_buff);
+#else
delete[] _buff;
+#endif
_buff = 0;
_table = 0;
_nchan = 0;
@@ -161,7 +185,7 @@
if (_table)
{
_nread = 2 * _table->_hl;
- return 0;
+ return 0;
}
return 1;
}
@@ -169,89 +193,116 @@
int Resampler::process (void)
{
- unsigned int hl, ph, np, dp, in, nr, nz, i, n, c;
- float *p1, *p2;
+ unsigned int hl, np, ph, dp, in, nr, nz, di, i, j, n;
+ float *c1, *c2, *p1, *p2, *q1, *q2;
if (!_table) return 1;
-
hl = _table->_hl;
np = _table->_np;
dp = _pstep;
in = _index;
nr = _nread;
- ph = _phase;
nz = _nzero;
- n = (2 * hl - nr) * _nchan;
- p1 = _buff + in * _nchan;
- p2 = p1 + n;
+ ph = _phase;
+
+ p1 = _buff + in;
+ p2 = p1 + 2 * hl - nr;
+ di = 2 * hl + _inmax;
while (out_count)
{
- if (nr)
- {
- if (inp_count == 0) break;
- if (inp_data)
- {
- for (c = 0; c < _nchan; c++) p2 [c] = inp_data [c];
- inp_data += _nchan;
- nz = 0;
- }
- else
- {
- for (c = 0; c < _nchan; c++) p2 [c] = 0;
- if (nz < 2 * hl) nz++;
- }
- nr--;
- p2 += _nchan;
- inp_count--;
- }
- else
- {
- if (out_data)
- {
- if (nz < 2 * hl)
- {
- float *c1 = _table->_ctab + hl * ph;
- float *c2 = _table->_ctab + hl * (np - ph);
- for (c = 0; c < _nchan; c++)
- {
- float *q1 = p1 + c;
- float *q2 = p2 + c;
- float s = 1e-20f;
- for (i = 0; i < hl; i++)
- {
- q2 -= _nchan;
- s += *q1 * c1 [i] + *q2 * c2 [i];
- q1 += _nchan;
- }
- *out_data++ = s - 1e-20f;
- }
- }
- else
+ while (nr && inp_count)
+ {
+ if (inp_data)
+ {
+ for (j = 0; j < _nchan; j++) p2 [j * di] = inp_data [j];
+ inp_data += _nchan;
+ nz = 0;
+ }
+ else
+ {
+ for (j = 0; j < _nchan; j++) p2 [j * di] = 0;
+ if (nz < 2 * hl) nz++;
+ }
+ p2++;
+ nr--;
+ inp_count--;
+ }
+ if (nr) break;
+
+ if (out_data)
+ {
+ if (nz < 2 * hl)
+ {
+ c1 = _table->_ctab + hl * ph;
+ c2 = _table->_ctab + hl * (np - ph);
+
+#if defined(ENABLE_SSE2)
+ __m128 C1, C2, Q1, Q2, S;
+ for (j = 0; j < _nchan; j++)
+ {
+ q1 = p1 + j * di;
+ q2 = p2 + j * di;
+ S = _mm_setzero_ps ();
+ for (i = 0; i < hl; i += 4)
+ {
+ C1 = _mm_load_ps (c1 + i);
+ Q1 = _mm_loadu_ps (q1);
+ q2 -= 4;
+ S = _mm_add_ps (S, _mm_mul_ps (C1, Q1));
+ C2 = _mm_loadr_ps (c2 + i);
+ Q2 = _mm_loadu_ps (q2);
+ q1 += 4;
+ S = _mm_add_ps (S, _mm_mul_ps (C2, Q2));
+ }
+ *out_data++ = S [0] + S [1] + S [2] + S [3];
+ }
+#else
+ float s;
+ for (j = 0; j < _nchan; j++)
+ {
+ q1 = p1 + j * di;
+ q2 = p2 + j * di;
+ s = 1e-20f;
+ for (i = 0; i < hl; i++)
+ {
+ q2--;
+ s += *q1 * c1 [i] + *q2 * c2 [i];
+ q1++;
+ }
+ *out_data++ = s - 1e-20f;
+ }
+#endif
+ }
+ else
+ {
+ for (j = 0; j < _nchan; j++) *out_data++ = 0;
+ }
+ }
+ out_count--;
+
+ ph += dp;
+ if (ph >= np)
+ {
+ nr = ph / np;
+ ph -= nr * np;
+ in += nr;
+ p1 += nr;
+ if (in >= _inmax)
+ {
+ n = 2 * hl - nr;
+ p2 = _buff;
+ for (j = 0; j < _nchan; j++)
{
- for (c = 0; c < _nchan; c++) *out_data++ = 0;
+ memmove (p2 + j * di, p1 + j * di, n * sizeof (float));
}
- }
- out_count--;
-
- ph += dp;
- if (ph >= np)
- {
- nr = ph / np;
- ph -= nr * np;
- in += nr;
- p1 += nr * _nchan;;
- if (in >= _inmax)
- {
- n = (2 * hl - nr) * _nchan;
- memcpy (_buff, p1, n * sizeof (float));
- in = 0;
- p1 = _buff;
- p2 = p1 + n;
- }
- }
- }
+ in = 0;
+ p1 = _buff;
+ p2 = p1 + n;
+ }
+ }
}
+
_index = in;
_nread = nr;
_phase = ph;
diff -Nru zita-resampler-1.6.2/source/resampler-table.cc zita-resampler-1.8.0/source/resampler-table.cc
--- zita-resampler-1.6.2/source/resampler-table.cc 2018-08-24 18:41:47.000000000 +0000
+++ zita-resampler-1.8.0/source/resampler-table.cc 2020-12-28 19:54:44.000000000 +0000
@@ -1,6 +1,6 @@
// ----------------------------------------------------------------------------
//
-// Copyright (C) 2006-2012 Fons Adriaensen
+// Copyright (C) 2006-2020 Fons Adriaensen
//
// This program is free software; you can redistribute it and/or modify
// it under the terms of the GNU General Public License as published by
@@ -25,6 +25,12 @@
#include
+#undef ENABLE_VEC4
+#if defined(ENABLE_SSE2)
+# define ENABLE_VEC4
+#endif
+
+
int zita_resampler_major_version (void)
{
return ZITA_RESAMPLER_MAJOR_VERSION;
@@ -55,7 +61,6 @@
}
-
Resampler_table *Resampler_table::_list = 0;
Resampler_mutex Resampler_table::_mutex;
@@ -67,11 +72,16 @@
_hl (hl),
_np (np)
{
- unsigned int i, j;
+ unsigned int i, j, n;
double t;
float *p;
- _ctab = new float [hl * (np + 1)];
+ n = hl * (np + 1);
+#ifdef ENABLE_VEC4
+ posix_memalign ((void **) &_ctab, 16, n * sizeof (float));
+#else
+ _ctab = new float [n];
+#endif
p = _ctab;
for (j = 0; j <= np; j++)
{
@@ -88,7 +98,11 @@
Resampler_table::~Resampler_table (void)
{
+#ifdef ENABLE_VEC4
+ free (_ctab);
+#else
delete[] _ctab;
+#endif
}
diff -Nru zita-resampler-1.6.2/source/vresampler.cc zita-resampler-1.8.0/source/vresampler.cc
--- zita-resampler-1.6.2/source/vresampler.cc 2018-08-24 18:41:47.000000000 +0000
+++ zita-resampler-1.8.0/source/vresampler.cc 2020-12-29 13:24:52.000000000 +0000
@@ -1,6 +1,6 @@
// ----------------------------------------------------------------------------
//
-// Copyright (C) 2006-2013 Fons Adriaensen
+// Copyright (C) 2006-2020 Fons Adriaensen
//
// This program is free software; you can redistribute it and/or modify
// it under the terms of the GNU General Public License as published by
@@ -22,7 +22,14 @@
#include
#include
#include
-#include
+
+#undef ENABLE_VEC4
+#if defined(ENABLE_SSE2)
+# define ENABLE_VEC4
+# include
+#endif
+
+#include "zita-resampler/vresampler.h"
VResampler::VResampler (void) :
@@ -56,34 +63,48 @@
unsigned int hlen,
double frel)
{
- unsigned int h, k, n;
- double s;
+ unsigned int hl, mi, n;
+ double dp;
Resampler_table *T = 0;
- if (! nchan) return 1;
- n = NPHASE;
- s = n / ratio;
- h = hlen;
- k = 250;
- if (ratio < 1)
+ if (!nchan || (64 * ratio < 1.0) || (ratio > 64))
+ {
+ clear ();
+ return 1;
+ }
+
+ dp = NPHASE / ratio;
+ hl = hlen;
+ mi = 32;
+ if (ratio < 1.0)
{
frel *= ratio;
- h = (unsigned int)(ceil (h / ratio));
- k = (unsigned int)(ceil (k / ratio));
+ hl = (unsigned int)(ceil (hl / ratio));
+ mi = (unsigned int)(ceil (mi / ratio));
}
- T = Resampler_table::create (frel, h, n);
+#ifdef ENABLE_VEC4
+ hl = (hl + 3) & ~3;
+#endif
+ T = Resampler_table::create (frel, hl, NPHASE);
clear ();
if (T)
{
_table = T;
- _buff = new float [nchan * (2 * h - 1 + k)];
- _c1 = new float [2 * h];
- _c2 = new float [2 * h];
+ n = nchan * (2 * hl + mi);
+#ifdef ENABLE_VEC4
+ posix_memalign ((void **)(&_buff), 16, n * sizeof (float));
+ posix_memalign ((void **)(&_c1), 16, hl * sizeof (float));
+ posix_memalign ((void **)(&_c2), 16, hl * sizeof (float));
+#else
+ _buff = new float [n];
+ _c1 = new float [hl];
+ _c2 = new float [hl];
+#endif
_nchan = nchan;
- _inmax = k;
- _ratio = ratio;
- _pstep = s;
- _qstep = s;
+ _ratio = ratio;
+ _inmax = mi;
+ _pstep = dp;
+ _qstep = dp;
_wstep = 1;
return reset ();
}
@@ -94,9 +115,15 @@
void VResampler::clear (void)
{
Resampler_table::destroy (_table);
+#ifdef ENABLE_VEC4
+ free (_buff);
+ free (_c1);
+ free (_c2);
+#else
delete[] _buff;
delete[] _c1;
delete[] _c2;
+#endif
_buff = 0;
_c1 = 0;
_c2 = 0;
@@ -156,17 +183,22 @@
inp_data = 0;
out_data = 0;
_index = 0;
- _phase = 0;
- _nread = 2 * _table->_hl;
+ _nread = 0;
_nzero = 0;
- return 0;
+ _phase = 0;
+ if (_table)
+ {
+ _nread = 2 * _table->_hl;
+ return 0;
+ }
+ return 1;
}
int VResampler::process (void)
{
- unsigned int k, np, in, nr, n, c;
- int i, hl, nz;
+ int nr, np, hl, nz, di, i, n;
+ unsigned int in, j;
double ph, dp, dd;
float a, b, *p1, *p2, *q1, *q2;
@@ -179,88 +211,131 @@
nz = _nzero;
ph = _phase;
dp = _pstep;
- n = (2 * hl - nr) * _nchan;
- p1 = _buff + in * _nchan;
- p2 = p1 + n;
+
+ p1 = _buff + in;
+ p2 = p1 + 2 * hl - nr;
+ di = 2 * hl + _inmax;
while (out_count)
{
- if (nr)
- {
- if (inp_count == 0) break;
- if (inp_data)
- {
- for (c = 0; c < _nchan; c++) p2 [c] = inp_data [c];
- inp_data += _nchan;
- nz = 0;
- }
- else
- {
- for (c = 0; c < _nchan; c++) p2 [c] = 0;
- if (nz < 2 * hl) nz++;
- }
- nr--;
- p2 += _nchan;
- inp_count--;
- }
- else
+ while (nr && inp_count)
+ {
+ if (inp_data)
+ {
+ for (j = 0; j < _nchan; j++) p2 [j * di] = inp_data [j];
+ inp_data += _nchan;
+ nz = 0;
+ }
+ else
+ {
+ for (j = 0; j < _nchan; j++) p2 [j * di] = 0;
+ if (nz < 2 * hl) nz++;
+ }
+ p2++;
+ nr--;
+ inp_count--;
+ }
+ if (nr) break;
+
+ if (out_data)
{
- if (out_data)
+ if (nz < 2 * hl)
{
- if (nz < 2 * hl)
+ n = (unsigned int) ph;
+ b = (float)(ph - n);
+ a = 1.0f - b;
+ q1 = _table->_ctab + hl * n;
+ q2 = _table->_ctab + hl * (np - n);
+
+#if defined(ENABLE_SSE2)
+ __m128 C1, C2, Q1, Q2, S;
+ C1 = _mm_load1_ps (&a);
+ C2 = _mm_load1_ps (&b);
+ for (i = 0; i < hl; i += 4)
{
- k = (unsigned int) ph;
- b = (float)(ph - k);
- a = 1.0f - b;
- q1 = _table->_ctab + hl * k;
- q2 = _table->_ctab + hl * (np - k);
- for (i = 0; i < hl; i++)
- {
- _c1 [i] = a * q1 [i] + b * q1 [i + hl];
- _c2 [i] = a * q2 [i] + b * q2 [i - hl];
- }
- for (c = 0; c < _nchan; c++)
- {
- q1 = p1 + c;
- q2 = p2 + c;
- a = 1e-25f;
- for (i = 0; i < hl; i++)
- {
- q2 -= _nchan;
- a += *q1 * _c1 [i] + *q2 * _c2 [i];
- q1 += _nchan;
- }
- *out_data++ = a - 1e-25f;
- }
+ Q1 = _mm_load_ps (q1 + i);
+ Q2 = _mm_load_ps (q1 + i + hl);
+ S = _mm_add_ps (_mm_mul_ps (Q1, C1), _mm_mul_ps (Q2, C2));
+ _mm_store_ps (_c1 + i, S);
+ Q1 = _mm_load_ps (q2 + i);
+ Q2 = _mm_load_ps (q2 + i - hl);
+ S = _mm_add_ps (_mm_mul_ps (Q1, C1), _mm_mul_ps (Q2, C2));
+ _mm_store_ps (_c2 + i, S);
}
- else
+ for (j = 0; j < _nchan; j++)
+ {
+ q1 = p1 + j * di;
+ q2 = p2 + j * di;
+ S = _mm_setzero_ps ();
+ for (i = 0; i < hl; i += 4)
+ {
+ C1 = _mm_load_ps (_c1 + i);
+ Q1 = _mm_loadu_ps (q1);
+ q2 -= 4;
+ S = _mm_add_ps (S, _mm_mul_ps (C1, Q1));
+ C2 = _mm_loadr_ps (_c2 + i);
+ Q2 = _mm_loadu_ps (q2);
+ q1 += 4;
+ S = _mm_add_ps (S, _mm_mul_ps (C2, Q2));
+ }
+ *out_data++ = S [0] + S [1] + S [2] + S [3];
+ }
+
+#else
+ float s;
+ for (i = 0; i < hl; i++)
{
- for (c = 0; c < _nchan; c++) *out_data++ = 0;
+ _c1 [i] = a * q1 [i] + b * q1 [i + hl];
+ _c2 [i] = a * q2 [i] + b * q2 [i - hl];
}
- }
- out_count--;
-
- dd = _qstep - dp;
- if (fabs (dd) < 1e-30) dp = _qstep;
- else dp += _wstep * dd;
- ph += dp;
- if (ph >= np)
- {
- nr = (unsigned int) floor( ph / np);
- ph -= nr * np;;
- in += nr;
- p1 += nr * _nchan;;
- if (in >= _inmax)
+ for (j = 0; j < _nchan; j++)
{
- n = (2 * hl - nr) * _nchan;
- memcpy (_buff, p1, n * sizeof (float));
- in = 0;
- p1 = _buff;
- p2 = p1 + n;
+ q1 = p1 + j * di;
+ q2 = p2 + j * di;
+ s = 1e-20f;
+ for (i = 0; i < hl; i++)
+ {
+ q2--;
+ s += *q1 * _c1 [i] + *q2 * _c2 [i];
+ q1++;
+ }
+ *out_data++ = s - 1e-20f;
}
+#endif
+ }
+ else
+ {
+ for (j = 0; j < _nchan; j++) *out_data++ = 0;
}
}
+ out_count--;
+
+ dd = _qstep - dp;
+ if (fabs (dd) < 1e-20) dp = _qstep;
+ else dp += _wstep * dd;
+ ph += dp;
+ if (ph >= np)
+ {
+ nr = (unsigned int) floor (ph / np);
+ ph -= nr * np;;
+ in += nr;
+ p1 += nr;
+
+ if (in >= _inmax)
+ {
+ n = 2 * hl - nr;
+ p2 = _buff;
+ for (j = 0; j < _nchan; j++)
+ {
+ memmove (p2 + j * di, p1 + j * di, n * sizeof (float));
+ }
+ in = 0;
+ p1 = _buff;
+ p2 = p1 + n;
+ }
+ }
}
+
_index = in;
_nread = nr;
_phase = ph;
diff -Nru zita-resampler-1.6.2/source/zita-resampler/resampler.h zita-resampler-1.8.0/source/zita-resampler/resampler.h
--- zita-resampler-1.6.2/source/zita-resampler/resampler.h 2018-08-24 18:41:47.000000000 +0000
+++ zita-resampler-1.8.0/source/zita-resampler/resampler.h 2020-12-28 19:46:46.000000000 +0000
@@ -1,6 +1,6 @@
// ----------------------------------------------------------------------------
//
-// Copyright (C) 2006-2012 Fons Adriaensen
+// Copyright (C) 2006-2020 Fons Adriaensen
//
// This program is free software; you can redistribute it and/or modify
// it under the terms of the GNU General Public License as published by
@@ -22,7 +22,7 @@
#define __RESAMPLER_H
-#include
+#include "zita-resampler/resampler-table.h"
class Resampler
@@ -55,8 +55,8 @@
unsigned int out_count;
float *inp_data;
float *out_data;
- void *inp_list;
- void *out_list;
+ float **inp_list;
+ float **out_list;
private:
diff -Nru zita-resampler-1.6.2/source/zita-resampler/resampler-table.h zita-resampler-1.8.0/source/zita-resampler/resampler-table.h
--- zita-resampler-1.6.2/source/zita-resampler/resampler-table.h 2018-08-24 18:41:47.000000000 +0000
+++ zita-resampler-1.8.0/source/zita-resampler/resampler-table.h 2020-12-28 14:27:20.000000000 +0000
@@ -1,6 +1,6 @@
// ----------------------------------------------------------------------------
//
-// Copyright (C) 2006-2012 Fons Adriaensen
+// Copyright (C) 2006-2020 Fons Adriaensen
//
// This program is free software; you can redistribute it and/or modify
// it under the terms of the GNU General Public License as published by
@@ -26,7 +26,7 @@
#define ZITA_RESAMPLER_MAJOR_VERSION 1
-#define ZITA_RESAMPLER_MINOR_VERSION 6
+#define ZITA_RESAMPLER_MINOR_VERSION 8
extern int zita_resampler_major_version (void);
diff -Nru zita-resampler-1.6.2/source/zita-resampler/vresampler.h zita-resampler-1.8.0/source/zita-resampler/vresampler.h
--- zita-resampler-1.6.2/source/zita-resampler/vresampler.h 2018-08-24 18:41:47.000000000 +0000
+++ zita-resampler-1.8.0/source/zita-resampler/vresampler.h 2020-12-29 09:58:06.000000000 +0000
@@ -1,6 +1,6 @@
// ----------------------------------------------------------------------------
//
-// Copyright (C) 2006-2012 Fons Adriaensen
+// Copyright (C) 2006-2020 Fons Adriaensen
//
// This program is free software; you can redistribute it and/or modify
// it under the terms of the GNU General Public License as published by
@@ -22,7 +22,7 @@
#define __VRESAMPLER_H
-#include
+#include "zita-resampler/resampler-table.h"
class VResampler
@@ -61,7 +61,7 @@
private:
- enum { NPHASE = 256 };
+ enum { NPHASE = 120 };
Resampler_table *_table;
unsigned int _nchan;
diff -Nru zita-resampler-1.6.2/test/jackproc.cc zita-resampler-1.8.0/test/jackproc.cc
--- zita-resampler-1.6.2/test/jackproc.cc 1970-01-01 00:00:00.000000000 +0000
+++ zita-resampler-1.8.0/test/jackproc.cc 2020-12-29 10:08:29.000000000 +0000
@@ -0,0 +1,296 @@
+// -----------------------------------------------------------------------------
+//
+// Copyright (C) 2020 Fons Adriaensen
+//
+// This program is free software; you can redistribute it and/or modify
+// it under the terms of the GNU General Public License as published by
+// the Free Software Foundation; either version 2 of the License, or
+// (at your option) any later version.
+//
+// This program is distributed in the hope that it will be useful,
+// but WITHOUT ANY WARRANTY; without even the implied warranty of
+// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+// GNU General Public License for more details.
+//
+// You should have received a copy of the GNU General Public License
+// along with this program. If not, see .
+//
+// -----------------------------------------------------------------------------
+
+
+// Zita-resampler demo program.
+//
+// Imagine you want a jack client that internally needs to use a
+// fixed sample rate and period size regardless of how Jack is
+// configured. This demo program contains all the buffering and
+// resampling required to do this. The process in this case is
+// just a copy.
+//
+// The signal flow is like this:
+//
+// jack -> queue -> resampler -> process -> resampler -> queue -> jack.
+//
+// The queues are required because in most cases the process period,
+// taking the resampling ratio into account, will not be an integer
+// fraction or multiple of jack's period. They will add some latency,
+// but never more than the process period time plus the delay of the
+// resamplers.
+
+
+//#define USE_VRSAMPLER
+
+
+#include
+#include
+#include
+#include
+#include
+#include
+#include
+#include
+#include "lfqueue.h"
+
+
+#define MAXCHAN 16
+
+static jack_client_t *jack_handle;
+static jack_port_t *jack_capt [MAXCHAN];
+static jack_port_t *jack_play [MAXCHAN];
+static bool active = false;
+static int nchan;
+static uint32_t proc_rate;
+static uint32_t proc_frag;
+static uint32_t jack_rate;
+static uint32_t jack_frag;
+#ifdef USE_VRESAMPLER
+static VResampler input_resampler;
+static VResampler output_resampler;
+#else
+static Resampler input_resampler;
+static Resampler output_resampler;
+#endif
+static Audioqueue *input_queue = 0;
+static Audioqueue *output_queue = 0;
+static float *input_buff = 0;
+static float *output_buff = 0;
+
+
+
+int jack_process (jack_nframes_t nframes, void *arg)
+{
+ int i, j, k, n;
+ float *inp [MAXCHAN];
+ float *out [MAXCHAN];
+ float *p, *q;
+
+ if (! active) return 0;
+
+ // Get port buffers.
+ for (i = 0; i < nchan; i++)
+ {
+ inp [i] = (float *)(jack_port_get_buffer (jack_capt [i], nframes));
+ out [i] = (float *)(jack_port_get_buffer (jack_play [i], nframes));
+ }
+
+ // Copy from Jack ports to input queue.
+ // The for loop takes care of wraparound in the queue,
+ // there will be at most two iterations.
+ for (n = 0; n < (int) nframes; n += k)
+ {
+ // Get the number of frames that can be written
+ // without wraparound.
+ k = input_queue->write_nowrap ();
+ if (k > (int) nframes - n) k = nframes - n;
+ // Copy and interleave channels.
+ for (i = 0; i < nchan; i++)
+ {
+ p = inp [i] + n;
+ q = input_queue->write_ptr (i);
+ for (j = 0; j < k; j++) q [nchan * j] = p [j];
+ }
+ // Update queue state.
+ input_queue->write_commit (k);
+ }
+ // Check queue overflow.
+ assert (input_queue->write_avail () >= 0);
+
+ while (output_queue->read_avail () < (int) nframes)
+ {
+ // Resample from Jack's sample rate to the process
+ // sample rate, reading from input_queue and writing
+ // exactly proc_frag frames to input_buff.
+ // The while loop takes care of wraparound.
+ input_resampler.out_data = input_buff;
+ input_resampler.out_count = proc_frag;
+ while (input_resampler.out_count)
+ {
+ input_resampler.inp_data = input_queue->read_ptr ();
+ input_resampler.inp_count = n = input_queue->read_nowrap ();
+ input_resampler.process ();
+ input_queue->read_commit (n - input_resampler.inp_count);
+ }
+
+ // Now we have proc_frag frames in input_buff.
+ // Normally there would be some process using
+ // these, here we just copy to output_buff.
+ memcpy (output_buff, input_buff, nchan * proc_frag * sizeof (float));
+
+ // Resample from the process sample rate to Jack's
+ // sample rate, taking exactly proc_frag frames
+ // from output_buff, and writing to output_queue.
+ // The while loop takes care of wraparound in the
+ // queue.
+ output_resampler.inp_data = output_buff;
+ output_resampler.inp_count = proc_frag;
+ while (output_resampler.inp_count)
+ {
+ output_resampler.out_data = output_queue->write_ptr ();
+ output_resampler.out_count = n = output_queue->write_nowrap ();
+ output_resampler.process ();
+ output_queue->write_commit (n - output_resampler.out_count);
+ }
+ }
+ // Check queues.
+ assert (input_queue->read_avail () >= 0);
+ assert (output_queue->write_avail () >= 0);
+
+ // Copy from output queue to Jack ports.
+ // The for loop takes care of wraparound in the queue,
+ // there will be at most two iterations.
+ for (n = 0; n < (int) nframes; n += k)
+ {
+ // Get the number of frames that can be read
+ // without wraparound.
+ k = output_queue->read_nowrap ();
+ if (k > (int) nframes - n) k = nframes - n;
+ // Copy and de-interleave channels.
+ for (i = 0; i < nchan; i++)
+ {
+ p = output_queue->read_ptr (i);
+ q = out [i] + n;
+ for (j = 0; j < k; j++) q [j] = p [nchan * j];
+ }
+ // Update queue state.
+ output_queue->read_commit (k);
+ }
+ // Check queue underflow.
+ assert (output_queue->read_avail () >= 0);
+
+ return 0;
+}
+
+
+static void sigint_handler (int)
+{
+ signal (SIGINT, SIG_IGN);
+ active = false;
+}
+
+
+int main (int ac, char *av [])
+{
+ int32_t i, n;
+ int64_t k;
+ char s [16];
+ jack_status_t stat;
+
+ if (ac < 4)
+ {
+ fprintf (stderr, "jackproc \n");
+ return 1;
+ }
+ nchan = atoi (av [1]);
+ proc_rate = atoi (av [2]);
+ proc_frag = atoi (av [3]);
+ if (nchan < 1) return 1;
+ if (nchan > MAXCHAN) nchan = MAXCHAN;
+
+ // Create and initialise the Jack client.
+ jack_handle = jack_client_open ("Jackproc", JackNoStartServer, &stat);
+ if (jack_handle == 0)
+ {
+ fprintf (stderr, "Can't connect to Jack, is the server running ?\n");
+ return 1;
+ }
+
+ jack_set_process_callback (jack_handle, jack_process, 0);
+ if (jack_activate (jack_handle))
+ {
+ fprintf(stderr, "Can't activate Jack");
+ return 1;
+ }
+
+ for (i = 0; i < nchan; i++)
+ {
+ sprintf (s, "in_%d", i);
+ jack_capt [i] = jack_port_register (jack_handle, s, JACK_DEFAULT_AUDIO_TYPE, JackPortIsInput, 0);
+ sprintf (s, "out_%d", i);
+ jack_play [i] = jack_port_register (jack_handle, s, JACK_DEFAULT_AUDIO_TYPE, JackPortIsOutput, 0);
+ }
+
+ jack_rate = jack_get_sample_rate (jack_handle);
+ jack_frag = jack_get_buffer_size (jack_handle);
+
+ // Set the resampling ratios.
+#ifdef USE_VRESAMPLER
+ if (input_resampler.setup ((double) proc_rate / jack_rate, nchan, 32))
+#else
+ if (input_resampler.setup (jack_rate, proc_rate, nchan, 32))
+#endif
+ {
+ fprintf (stderr, "Resampler can't handle the ratio %d/%d\n",
+ proc_rate, jack_rate);
+ goto cleanup;
+ }
+#ifdef USE_VRESAMPLER
+ if (input_resampler.setup ((double) jack_rate / proc_rate, nchan, 32))
+#else
+ if (output_resampler.setup (proc_rate, jack_rate, nchan, 32))
+#endif
+ {
+ fprintf (stderr, "Resampler can't handle the ratio %d/%d\n",
+ jack_rate, proc_rate);
+ goto cleanup;
+ }
+
+ // Initialise the resamplers for zero delay.
+ input_resampler.inp_count = input_resampler.inpsize () - 1;
+ input_resampler.inp_data = 0;
+ input_resampler.out_count = 999999;
+ input_resampler.out_data = 0;
+ input_resampler.process ();
+ output_resampler.inp_count = output_resampler.inpsize () - 1;
+ output_resampler.inp_data = 0;
+ output_resampler.out_count = 999999;
+ output_resampler.out_data = 0;
+ output_resampler.process ();
+
+ input_buff = new float [nchan * proc_frag];
+ output_buff = new float [nchan * proc_frag];
+
+ // Compute the number of extra samples we need to buffer.
+ k = jack_rate * proc_frag;
+ n = k / proc_rate;
+
+ // Create the queues, and prefill the input queue.
+ input_queue = new Audioqueue (jack_frag + n, nchan, true);
+ output_queue = new Audioqueue (jack_frag + n, nchan, true);
+ input_queue->write_commit (n);
+
+ signal (SIGINT, sigint_handler);
+
+ // Enable the process callback and wait.
+ for (active = true; active; usleep (250000));
+
+cleanup:
+ // Cleanup.
+ jack_deactivate (jack_handle);
+ jack_client_close (jack_handle);
+ delete[] input_buff;
+ delete[] output_buff;
+ delete input_queue;
+ delete output_queue;
+
+ return 0;
+}
+
diff -Nru zita-resampler-1.6.2/test/lfqueue.cc zita-resampler-1.8.0/test/lfqueue.cc
--- zita-resampler-1.6.2/test/lfqueue.cc 1970-01-01 00:00:00.000000000 +0000
+++ zita-resampler-1.8.0/test/lfqueue.cc 2020-12-27 15:41:51.000000000 +0000
@@ -0,0 +1,60 @@
+// ----------------------------------------------------------------------------
+//
+// Copyright (C) 2012-2016 Fons Adriaensen
+//
+// This program is free software; you can redistribute it and/or modify
+// it under the terms of the GNU General Public License as published by
+// the Free Software Foundation; either version 3 of the License, or
+// (at your option) any later version.
+//
+// This program is distributed in the hope that it will be useful,
+// but WITHOUT ANY WARRANTY; without even the implied warranty of
+// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+// GNU General Public License for more details.
+//
+// You should have received a copy of the GNU General Public License
+// along with this program. If not, see .
+//
+// ----------------------------------------------------------------------------
+
+
+#include "lfqueue.h"
+
+
+Queuebase::Queuebase (int nelm) :
+ _nelm (0),
+ _mask (0),
+ _kwrite (0),
+ _kread (0)
+{
+ int32_t k;
+
+ if (nelm > 0x01000000) return;
+ for (k = 1; k < nelm; k <<= 1);
+ _nelm = k;
+ _mask = k - 1;
+}
+
+
+Queuebase::~Queuebase (void)
+{
+}
+
+
+
+Audioqueue::Audioqueue (int32_t minsize, int nchannel, bool interleaved) :
+ Queuebase (minsize),
+ _nchan (nchannel),
+ _inter (interleaved),
+ _data (0)
+{
+ _data = new float [_nelm * _nchan];
+}
+
+
+Audioqueue::~Audioqueue (void)
+{
+ delete[] _data;
+}
+
+
diff -Nru zita-resampler-1.6.2/test/lfqueue.h zita-resampler-1.8.0/test/lfqueue.h
--- zita-resampler-1.6.2/test/lfqueue.h 1970-01-01 00:00:00.000000000 +0000
+++ zita-resampler-1.8.0/test/lfqueue.h 2020-12-27 22:40:30.000000000 +0000
@@ -0,0 +1,178 @@
+// ----------------------------------------------------------------------------
+//
+// Copyright (C) 2010-2020 Fons Adriaensen
+//
+// This program is free software; you can redistribute it and/or modify
+// it under the terms of the GNU General Public License as published by
+// the Free Software Foundation; either version 3 of the License, or
+// (at your option) any later version.
+//
+// This program is distributed in the hope that it will be useful,
+// but WITHOUT ANY WARRANTY; without even the implied warranty of
+// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+// GNU General Public License for more details.
+//
+// You should have received a copy of the GNU General Public License
+// along with this program. If not, see .
+//
+// ----------------------------------------------------------------------------
+
+
+#ifndef __LFQUEUE_H
+#define __LFQUEUE_H
+
+
+#include
+#include
+
+
+// Base class for lock-free queues.
+//
+// The logic implemented here is somewhat different from
+// how e.g. Jack's lock-free queues work. It does nothing
+// to stop the user overflowing or underflowing the queue.
+// Avoiding these conditions is easy enough, and allowing
+// them can be useful in some applications.
+// The logic keeps correct read/write counts as long as the
+// over/underflow is less than 2^31 elements.
+// Even if data is corrupted, this allows the client process
+// to recover while maintaining a defined latency.
+// To make this work, the logical queue size (the number of
+// elements, regardless of element size in bytes) must be a
+// power of two. The constructor will if necessary round up
+// the size.
+
+
+class Queuebase
+{
+public:
+
+
+ Queuebase (int32_t nelm);
+ ~Queuebase (void);
+
+ // Return queue size.
+ int32_t nelm (void) const { return _nelm; }
+
+ // Reset queue state to empty.
+ void reset (void)
+ {
+ _kwrite = 0;
+ _kread = 0;
+ }
+
+ // Return number of elements that can be written
+ // without overflow. A negative value indicates
+ // that the queue is already in overflow.
+ int32_t write_avail (void) const
+ {
+ return _nelm - _kwrite + _kread;
+ }
+
+ // Return number of elements that can be written
+ // without wraparound. Note: this only reflects
+ // the postion of the write pointer relative to
+ // the end of the buffer, so it can be more than
+ // write_avail().
+ int32_t write_nowrap (void) const
+ {
+ return _nelm - (_kwrite & _mask);
+ }
+
+ // Adjust queue state, reflecting nelm elements
+ // have been written.
+ void write_commit (int32_t nelm)
+ {
+ _kwrite += nelm;
+ }
+
+ // Return number of elements that can be read
+ // without underflow. A negative value indicates
+ // that the queue is already in underflow.
+ int32_t read_avail (void) const
+ {
+ return _kwrite - _kread;
+ }
+
+ // Return number of elements that can be read
+ // without wraparound. Note: this only reflects
+ // the position of the read pointer relative to
+ // the end of the buffer, so it can be more than
+ // read_avail().
+ int32_t read_nowrap (void) const
+ {
+ return _nelm - (_kread & _mask);
+ }
+
+ // Adjust queue state, reflecting nelm elements
+ // have been read.
+ void read_commit (int32_t nf)
+ {
+ _kread += nf;
+ }
+
+protected:
+
+ int32_t _nelm;
+ int32_t _mask;
+ int32_t _kwrite;
+ int32_t _kread;
+};
+
+
+// Multichannel lock-free audio sample queue.
+// Channels can be separate or interleaved.
+// The way this queue works is different from
+// Jack's lock-free queues in two ways:
+//
+// 1. See Queuebase above.
+//
+// 2. For reading or writing, only a pointer is
+// provided, so the user has to do the work.
+// In many cases this can avoid to need for
+// intermediate copies. It also means that
+// wraparound is exposed to the user, but
+// handling this is quite easy.
+//
+// Note that all methods inherited from Queuebase
+// return a number of frames, not samples.
+
+
+
+
+
+class Audioqueue : public Queuebase
+{
+public:
+
+ Audioqueue (int32_t minsize, int nchannel, bool interleaved);
+ ~Audioqueue (void);
+
+ int32_t nchan (void) const { return _nchan; }
+
+ float *write_ptr (int ch = 0) const
+ {
+ uint32_t k = _kwrite & _mask;
+ if (_inter) k = k * _nchan + ch;
+ else k += ch * _nelm;
+ return _data + k;
+ }
+
+ float *read_ptr (int ch = 0) const
+ {
+ uint32_t k = _kread & _mask;
+ if (_inter) k = k * _nchan + ch;
+ else k += ch * _nelm;
+ return _data + k;
+ }
+
+private:
+
+ int32_t _nchan;
+ bool _inter;
+ float *_data;
+};
+
+
+#endif
+
diff -Nru zita-resampler-1.6.2/test/Makefile zita-resampler-1.8.0/test/Makefile
--- zita-resampler-1.6.2/test/Makefile 1970-01-01 00:00:00.000000000 +0000
+++ zita-resampler-1.8.0/test/Makefile 2020-12-29 14:20:47.000000000 +0000
@@ -0,0 +1,49 @@
+# ----------------------------------------------------------------------------
+#
+# Copyright (C) 2020 Fons Adriaensen
+#
+# This program is free software; you can redistribute it and/or modify
+# it under the terms of the GNU General Public License as published by
+# the Free Software Foundation; either version 3 of the License, or
+# (at your option) any later version.
+#
+# This program is distributed in the hope that it will be useful,
+# but WITHOUT ANY WARRANTY; without even the implied warranty of
+# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+# GNU General Public License for more details.
+#
+# You should have received a copy of the GNU General Public License
+# along with this program. If not, see .
+#
+# ----------------------------------------------------------------------------
+
+
+# Compilation flags
+#
+CXXFLAGS += -O2 -Wall
+
+
+# Targets
+#
+all: speedtest vspeedtest upstest jackproc
+
+
+speedtest: speedtest.o
+ g++ -o $@ speedtest.o -lzita-resampler
+
+
+vspeedtest: vspeedtest.o
+ g++ -o $@ vspeedtest.o -lzita-resampler
+
+
+upstest: upstest.o
+ g++ -o $@ upstest.o -lzita-resampler
+
+
+jackproc: jackproc.o lfqueue.o
+ g++ -o $@ jackproc.o lfqueue.o -lzita-resampler -ljack
+
+
+clean:
+ /bin/rm -f *.o *.u *.so *~ speedtest vspeedtest upstest jackproc zz*
+
diff -Nru zita-resampler-1.6.2/test/speedtest.cc zita-resampler-1.8.0/test/speedtest.cc
--- zita-resampler-1.6.2/test/speedtest.cc 1970-01-01 00:00:00.000000000 +0000
+++ zita-resampler-1.8.0/test/speedtest.cc 2020-12-28 16:00:54.000000000 +0000
@@ -0,0 +1,77 @@
+// ----------------------------------------------------------------------------
+//
+// Copyright (C) 2020 Fons Adriaensen
+//
+// This program is free software; you can redistribute it and/or modify
+// it under the terms of the GNU General Public License as published by
+// the Free Software Foundation; either version 3 of the License, or
+// (at your option) any later version.
+//
+// This program is distributed in the hope that it will be useful,
+// but WITHOUT ANY WARRANTY; without even the implied warranty of
+// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+// GNU General Public License for more details.
+//
+// You should have received a copy of the GNU General Public License
+// along with this program. If not, see .
+//
+// ----------------------------------------------------------------------------
+
+
+#include
+#include
+#include
+#include
+
+
+#define LINP 10000
+#define LOUT 11000
+#define ITER 1000
+
+
+int main (int ac, char *av[])
+{
+ int c, h, i;
+ Resampler R;
+ float *inp;
+ float *out;
+ timespec t0, t1;
+ int64_t ds, dn;
+ double dt;
+
+ if (ac < 3)
+ {
+ fprintf (stderr, "speedtest \n");
+ return 1;
+ }
+ c = atoi (av [1]);
+ h = atoi (av [2]);
+
+ inp = new float [c * LINP];
+ out = new float [c * LOUT];
+
+ for (i = 0; i < c * LINP; i++) inp [i] = i * 1e-4f;
+
+ R.setup (441, 480, c, h);
+
+ clock_gettime (CLOCK_REALTIME, &t0);
+ for (i = 0; i < ITER; i++)
+ {
+ R.inp_count = LINP;
+ R.inp_data = inp;
+ R.out_count = LOUT;
+ R.out_data = out;
+ R.process ();
+ }
+ clock_gettime (CLOCK_REALTIME, &t1);
+ ds = t1.tv_sec - t0.tv_sec;
+ dn = t1.tv_nsec - t0.tv_nsec;
+ dt = ds + 1e-9 * dn;
+ printf ("44100 -> 48000, chan = %2d, hlen = %2d %8.3le input frames per second\n",
+ c, h, LINP * ITER / dt);
+
+ delete[] inp;
+ delete[] out;
+ return 0;
+}
+
diff -Nru zita-resampler-1.6.2/test/upstest.cc zita-resampler-1.8.0/test/upstest.cc
--- zita-resampler-1.6.2/test/upstest.cc 1970-01-01 00:00:00.000000000 +0000
+++ zita-resampler-1.8.0/test/upstest.cc 2020-12-29 14:18:14.000000000 +0000
@@ -0,0 +1,102 @@
+
+#include
+#include
+#include
+#include
+
+
+#define NCHAN 7
+#define CHAN 3
+#define HLEN 48
+#define LINP 300
+#define LOUT 1380 // LINP * 4.6
+
+
+// Basic upsampling test of Resampler and Vresampler.
+//
+// We upsample by 4.6 and prefill the resamplers for zero delay.
+// In the input signal we put a single sample with value 1 at
+// offsets 0, 100 and 201. So in the output we expect a band-
+// limited pulse (i.e. a windowed sinc() function) peaking at
+// offsets 0, 460, and 924.6.
+//
+// Since we input exactly LINP + inpsize() - 1 samples, and the
+// output buffer size is exactly the input size times the ratio,
+// the input and output counters should both end up at 0.
+//
+// test1 > zz1
+// gnuplot
+// set grid
+// plot 'zz1' u 1:2 w l lt1, 'zz1' u 1:3 w l lt 2
+//
+// Zoom in to verify the positions.
+
+
+int main (int ac, char *av[])
+{
+ int i;
+ Resampler R;
+ VResampler V;
+ float inp [NCHAN * LINP];
+ float out1 [NCHAN * LOUT];
+ float out2 [NCHAN * LOUT];
+ float *p1, *p2;
+
+ // Clear input array.
+ memset (inp, 0, NCHAN * LINP * sizeof (float));
+ // Put a single sample at offsets 0 and 100.
+ inp [CHAN] = 1;
+ inp [CHAN + NCHAN * 100] = 1;
+ inp [CHAN + NCHAN * 201] = 1;
+
+ // Setup for upsampling by 46 / 10.
+ R.setup (10, 46, NCHAN, HLEN);
+ V.setup (4.6, NCHAN, HLEN);
+
+ // Prefill for zero delay.
+ R.out_count = LOUT;
+ R.out_data = out1;
+ R.inp_count = R.inpsize () / 2 - 1;
+ R.inp_data = 0;
+ R.process ();
+ fprintf (stderr, "inp_count = %6d, out_count = %6d\n", R.inp_count, R.out_count);
+
+ // Process the entire input buffer.
+ R.inp_count = LINP;
+ R.inp_data = inp;
+ R.process ();
+ fprintf (stderr, "inp_count = %6d, out_count = %6d\n", R.inp_count, R.out_count);
+
+ // Postfill to complete.
+ R.inp_count = R.inpsize () / 2;
+ R.inp_data = 0;
+ R.process ();
+ fprintf (stderr, "inp_count = %6d, out_count = %6d\n", R.inp_count, R.out_count);
+
+ // Same for VResampler
+ V.out_count = LOUT;
+ V.out_data = out2;
+ V.inp_count = R.inpsize () / 2 - 1;
+ V.inp_data = 0;
+ V.process ();
+
+ V.inp_count = LINP;
+ V.inp_data = inp;
+ V.process ();
+
+ V.inp_count = R.inpsize () / 2;
+ V.inp_data = 0;
+ V.process ();
+
+ // Write both the Resampler and VResampler outputs, they
+ // should be identical.
+ p1 = out1;
+ p2 = out2;
+ for (i = 0; i < LOUT; i++)
+ {
+ printf ("%5d %10.8lf %10.8lf\n", i, p1 [CHAN], p2 [CHAN]);
+ p1 += NCHAN;
+ p2 += NCHAN;
+ }
+ return 0;
+}
diff -Nru zita-resampler-1.6.2/test/vspeedtest.cc zita-resampler-1.8.0/test/vspeedtest.cc
--- zita-resampler-1.6.2/test/vspeedtest.cc 1970-01-01 00:00:00.000000000 +0000
+++ zita-resampler-1.8.0/test/vspeedtest.cc 2020-12-28 16:01:51.000000000 +0000
@@ -0,0 +1,77 @@
+// ----------------------------------------------------------------------------
+//
+// Copyright (C) 2020 Fons Adriaensen
+//
+// This program is free software; you can redistribute it and/or modify
+// it under the terms of the GNU General Public License as published by
+// the Free Software Foundation; either version 3 of the License, or
+// (at your option) any later version.
+//
+// This program is distributed in the hope that it will be useful,
+// but WITHOUT ANY WARRANTY; without even the implied warranty of
+// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+// GNU General Public License for more details.
+//
+// You should have received a copy of the GNU General Public License
+// along with this program. If not, see .
+//
+// ----------------------------------------------------------------------------
+
+
+#include
+#include
+#include
+#include
+
+
+#define LINP 10000
+#define LOUT 11000
+#define ITER 1000
+
+
+int main (int ac, char *av[])
+{
+ int c, h, i;
+ VResampler R;
+ float *inp;
+ float *out;
+ timespec t0, t1;
+ int64_t ds, dn;
+ double dt;
+
+ if (ac < 3)
+ {
+ fprintf (stderr, "vspeedtest \n");
+ return 1;
+ }
+ c = atoi (av [1]);
+ h = atoi (av [2]);
+
+ inp = new float [c * LINP];
+ out = new float [c * LOUT];
+
+ for (i = 0; i < c * LINP; i++) inp [i] = i * 1e-4f;
+
+ R.setup (480.0 / 441.0, c, h);
+
+ clock_gettime (CLOCK_REALTIME, &t0);
+ for (i = 0; i < ITER; i++)
+ {
+ R.inp_count = LINP;
+ R.inp_data = inp;
+ R.out_count = LOUT;
+ R.out_data = out;
+ R.process ();
+ }
+ clock_gettime (CLOCK_REALTIME, &t1);
+ ds = t1.tv_sec - t0.tv_sec;
+ dn = t1.tv_nsec - t0.tv_nsec;
+ dt = ds + 1e-9 * dn;
+ printf ("44100 -> 48000, chan = %2d, hlen = %2d %8.3le input frames per second\n",
+ c, h, LINP * ITER / dt);
+
+ delete[] inp;
+ delete[] out;
+ return 0;
+}
+