[oe] [PATCH 2/2] fluidsynth: add ARM NEON support for sample interpolation

Wed Jan 25 20:06:03 UTC 2017

Signed-off-by: Andreas Müller <schnitzeltony at googlemail.com>
---
 ...e_dsp_interpolate_4th_order-make-use-of-A.patch | 158 +++++++++++++++++++++
 .../fluidsynth/fluidsynth_1.1.6.bb                 |   5 +-
 2 files changed, 162 insertions(+), 1 deletion(-)
 create mode 100644 meta-multimedia/recipes-multimedia/fluidsynth/files/0001-fluid_rvoice_dsp_interpolate_4th_order-make-use-of-A.patch

diff --git a/meta-multimedia/recipes-multimedia/fluidsynth/files/0001-fluid_rvoice_dsp_interpolate_4th_order-make-use-of-A.patch b/meta-multimedia/recipes-multimedia/fluidsynth/files/0001-fluid_rvoice_dsp_interpolate_4th_order-make-use-of-A.patch
new file mode 100644
index 0000000..855f641
--- /dev/null
+++ b/meta-multimedia/recipes-multimedia/fluidsynth/files/0001-fluid_rvoice_dsp_interpolate_4th_order-make-use-of-A.patch
@@ -0,0 +1,158 @@
+From 6cf151bd571ab6288ab0bfa7bc4c854bef012183 Mon Sep 17 00:00:00 2001
+From: =?UTF-8?q?Andreas=20M=C3=BCller?= <schnitzeltony at googlemail.com>
+Date: Mon, 23 Jan 2017 19:32:06 +0100
+Subject: [PATCH] fluid_rvoice_dsp_interpolate_4th_order: make use of ARM NEON
+ intriniscs
+MIME-Version: 1.0
+Content-Type: text/plain; charset=UTF-8
+Content-Transfer-Encoding: 8bit
+
+Upstream-Status: Pending
+
+Signed-off-by: Andreas Müller <schnitzeltony at googlemail.com>
+---
+ src/rvoice/fluid_rvoice_dsp.c | 87 ++++++++++++++++++++++++++++++++++++++++++-
+ 1 file changed, 86 insertions(+), 1 deletion(-)
+
+diff --git a/src/rvoice/fluid_rvoice_dsp.c b/src/rvoice/fluid_rvoice_dsp.c
+index df7da50..ca4a807 100644
+--- a/src/rvoice/fluid_rvoice_dsp.c
++++ b/src/rvoice/fluid_rvoice_dsp.c
+@@ -22,6 +22,9 @@
+ #include "fluid_phase.h"
+ #include "fluid_rvoice.h"
+ #include "fluid_sys.h"
++#ifdef __ARM_NEON__
++#include <arm_neon.h>
++#endif
+ 
+ /* Purpose:
+  *
+@@ -279,13 +282,19 @@ fluid_rvoice_dsp_interpolate_4th_order (fluid_rvoice_dsp_t *voice)
+   fluid_phase_t dsp_phase_incr;
+   short int *dsp_data = voice->sample->data;
+   fluid_real_t *dsp_buf = voice->dsp_buf;
++#if defined(__ARM_NEON__) && defined(WITH_FLOAT)
++  float32x4_t dsp_amp = vdupq_n_f32(voice->amp);
++  float32x4_t dsp_amp_incr = vdupq_n_f32(voice->amp_incr);
++  float32x4_t coeffs;
++#else
+   fluid_real_t dsp_amp = voice->amp;
+   fluid_real_t dsp_amp_incr = voice->amp_incr;
++  fluid_real_t *coeffs;
++#endif
+   unsigned int dsp_i = 0;
+   unsigned int dsp_phase_index;
+   unsigned int start_index, end_index;
+   short int start_point, end_point1, end_point2;
+-  fluid_real_t *coeffs;
+   int looping;
+ 
+   /* Convert playback "speed" floating point value to phase index/fract */
+@@ -327,11 +336,22 @@ fluid_rvoice_dsp_interpolate_4th_order (fluid_rvoice_dsp_t *voice)
+     /* interpolate first sample point (start or loop start) if needed */
+     for ( ; dsp_phase_index == start_index && dsp_i < FLUID_BUFSIZE; dsp_i++)
+     {
++#if defined(__ARM_NEON__) && defined(WITH_FLOAT)
++      coeffs = vld1q_f32(interp_coeff[fluid_phase_fract_to_tablerow (dsp_phase)]);
++      int16x4_t vdsp_data_i16 = vld1_s16(&dsp_data[dsp_phase_index-1]);
++      vdsp_data_i16 = vld1_lane_s16(&start_point, vdsp_data_i16, 0);
++      float32x4_t vdsp_data = vcvtq_f32_s32(vmovl_s16(vdsp_data_i16));
++      vdsp_data *= coeffs;
++      vdsp_data *= dsp_amp;
++      float32x2_t sum2 = vadd_f32(vget_high_f32(vdsp_data), vget_low_f32(vdsp_data));
++      dsp_buf[dsp_i] = vget_lane_f32(vpadd_f32(sum2, sum2), 0);
++#else
+       coeffs = interp_coeff[fluid_phase_fract_to_tablerow (dsp_phase)];
+       dsp_buf[dsp_i] = dsp_amp * (coeffs[0] * start_point
+ 				  + coeffs[1] * dsp_data[dsp_phase_index]
+ 				  + coeffs[2] * dsp_data[dsp_phase_index+1]
+ 				  + coeffs[3] * dsp_data[dsp_phase_index+2]);
++#endif
+ 
+       /* increment phase and amplitude */
+       fluid_phase_incr (dsp_phase, dsp_phase_incr);
+@@ -342,11 +362,21 @@ fluid_rvoice_dsp_interpolate_4th_order (fluid_rvoice_dsp_t *voice)
+     /* interpolate the sequence of sample points */
+     for ( ; dsp_i < FLUID_BUFSIZE && dsp_phase_index <= end_index; dsp_i++)
+     {
++#if defined(__ARM_NEON__) && defined(WITH_FLOAT)
++      coeffs = vld1q_f32(interp_coeff[fluid_phase_fract_to_tablerow (dsp_phase)]);
++      int16x4_t vdsp_data_i16 = vld1_s16(&dsp_data[dsp_phase_index-1]);
++      float32x4_t vdsp_data = vcvtq_f32_s32(vmovl_s16(vdsp_data_i16));
++      vdsp_data *= coeffs;
++      vdsp_data *= dsp_amp;
++      float32x2_t sum2 = vadd_f32(vget_high_f32(vdsp_data), vget_low_f32(vdsp_data));
++      dsp_buf[dsp_i] = vget_lane_f32(vpadd_f32(sum2, sum2), 0);
++#else
+       coeffs = interp_coeff[fluid_phase_fract_to_tablerow (dsp_phase)];
+       dsp_buf[dsp_i] = dsp_amp * (coeffs[0] * dsp_data[dsp_phase_index-1]
+ 				  + coeffs[1] * dsp_data[dsp_phase_index]
+ 				  + coeffs[2] * dsp_data[dsp_phase_index+1]
+ 				  + coeffs[3] * dsp_data[dsp_phase_index+2]);
++#endif
+ 
+       /* increment phase and amplitude */
+       fluid_phase_incr (dsp_phase, dsp_phase_incr);
+@@ -362,11 +392,22 @@ fluid_rvoice_dsp_interpolate_4th_order (fluid_rvoice_dsp_t *voice)
+     /* interpolate within 2nd to last point */
+     for (; dsp_phase_index <= end_index && dsp_i < FLUID_BUFSIZE; dsp_i++)
+     {
++#if defined(__ARM_NEON__) && defined(WITH_FLOAT)
++      coeffs = vld1q_f32(interp_coeff[fluid_phase_fract_to_tablerow (dsp_phase)]);
++      int16x4_t vdsp_data_i16 = vld1_s16(&dsp_data[dsp_phase_index-1]);
++      vdsp_data_i16 = vld1_lane_s16(&end_point1, vdsp_data_i16, 3);
++      float32x4_t vdsp_data = vcvtq_f32_s32(vmovl_s16(vdsp_data_i16));
++      vdsp_data *= coeffs;
++      vdsp_data *= dsp_amp;
++      float32x2_t sum2 = vadd_f32(vget_high_f32(vdsp_data), vget_low_f32(vdsp_data));
++      dsp_buf[dsp_i] = vget_lane_f32(vpadd_f32(sum2, sum2), 0);
++#else
+       coeffs = interp_coeff[fluid_phase_fract_to_tablerow (dsp_phase)];
+       dsp_buf[dsp_i] = dsp_amp * (coeffs[0] * dsp_data[dsp_phase_index-1]
+ 				  + coeffs[1] * dsp_data[dsp_phase_index]
+ 				  + coeffs[2] * dsp_data[dsp_phase_index+1]
+ 				  + coeffs[3] * end_point1);
++#endif
+ 
+       /* increment phase and amplitude */
+       fluid_phase_incr (dsp_phase, dsp_phase_incr);
+@@ -379,11 +420,23 @@ fluid_rvoice_dsp_interpolate_4th_order (fluid_rvoice_dsp_t *voice)
+     /* interpolate within the last point */
+     for (; dsp_phase_index <= end_index && dsp_i < FLUID_BUFSIZE; dsp_i++)
+     {
++#if defined(__ARM_NEON__) && defined(WITH_FLOAT)
++      coeffs = vld1q_f32(interp_coeff[fluid_phase_fract_to_tablerow (dsp_phase)]);
++      int16x4_t vdsp_data_i16 = vld1_s16(&dsp_data[dsp_phase_index-1]);
++      vdsp_data_i16 = vld1_lane_s16(&end_point1, vdsp_data_i16, 2);
++      vdsp_data_i16 = vld1_lane_s16(&end_point2, vdsp_data_i16, 3);
++      float32x4_t vdsp_data = vcvtq_f32_s32(vmovl_s16(vdsp_data_i16));
++      vdsp_data *= coeffs;
++      vdsp_data *= dsp_amp;
++      float32x2_t sum2 = vadd_f32(vget_high_f32(vdsp_data), vget_low_f32(vdsp_data));
++      dsp_buf[dsp_i] = vget_lane_f32(vpadd_f32(sum2, sum2), 0);
++#else
+       coeffs = interp_coeff[fluid_phase_fract_to_tablerow (dsp_phase)];
+       dsp_buf[dsp_i] = dsp_amp * (coeffs[0] * dsp_data[dsp_phase_index-1]
+ 				  + coeffs[1] * dsp_data[dsp_phase_index]
+ 				  + coeffs[2] * end_point1
+ 				  + coeffs[3] * end_point2);
++#endif
+ 
+       /* increment phase and amplitude */
+       fluid_phase_incr (dsp_phase, dsp_phase_incr);
+@@ -413,7 +466,11 @@ fluid_rvoice_dsp_interpolate_4th_order (fluid_rvoice_dsp_t *voice)
+   }
+ 
+   voice->phase = dsp_phase;
++#if defined(__ARM_NEON__) && defined(WITH_FLOAT)
++  vst1q_lane_f32(&voice->amp, dsp_amp, 0);
++#else
+   voice->amp = dsp_amp;
++#endif
+ 
+   return (dsp_i);
+ }
+-- 
+2.5.5
+
diff --git a/meta-multimedia/recipes-multimedia/fluidsynth/fluidsynth_1.1.6.bb b/meta-multimedia/recipes-multimedia/fluidsynth/fluidsynth_1.1.6.bb
index 54e8697..313ffb0 100644
--- a/meta-multimedia/recipes-multimedia/fluidsynth/fluidsynth_1.1.6.bb
+++ b/meta-multimedia/recipes-multimedia/fluidsynth/fluidsynth_1.1.6.bb
@@ -6,7 +6,10 @@ LIC_FILES_CHKSUM = "file://COPYING;md5=e198e9aac94943d0ec29a7dae8c29416"
 
 DEPENDS = "alsa-lib ncurses glib-2.0"
 
-SRC_URI = "${SOURCEFORGE_MIRROR}/project/${BPN}/${BP}/${BP}.tar.gz"
+SRC_URI = " \
+    ${SOURCEFORGE_MIRROR}/project/${BPN}/${BP}/${BP}.tar.gz \
+    file://0001-fluid_rvoice_dsp_interpolate_4th_order-make-use-of-A.patch \
+"
 SRC_URI[md5sum] = "ae5aca6de824b4173667cbd3a310b263"
 SRC_URI[sha256sum] = "50853391d9ebeda9b4db787efb23f98b1e26b7296dd2bb5d0d96b5bccee2171c"
 
-- 
2.5.5