[oe-commits] Koen Kooi : xf86-video-omapfb: add patch for armv7a to do pixel conversion with NEON SIMD

GIT User account git at amethyst.openembedded.net
Tue Feb 3 10:00:50 UTC 2009


Module: openembedded.git
Branch: org.openembedded.dev
Commit: 0e58397cce01711f0c547d98f6a00224bdaaf2ec
URL:    http://gitweb.openembedded.net/?p=openembedded.git&a=commit;h=0e58397cce01711f0c547d98f6a00224bdaaf2ec

Author: Koen Kooi <koen at openembedded.org>
Date:   Tue Feb  3 10:54:18 2009 +0100

xf86-video-omapfb: add patch for armv7a to do pixel conversion with NEON SIMD

---

 .../xorg-driver/xf86-video-omapfb/omapfb-neon.diff |  146 ++++++++++++++++++++
 packages/xorg-driver/xf86-video-omapfb_git.bb      |    7 +-
 2 files changed, 151 insertions(+), 2 deletions(-)

diff --git a/packages/xorg-driver/xf86-video-omapfb/omapfb-neon.diff b/packages/xorg-driver/xf86-video-omapfb/omapfb-neon.diff
new file mode 100644
index 0000000..325ca66
--- /dev/null
+++ b/packages/xorg-driver/xf86-video-omapfb/omapfb-neon.diff
@@ -0,0 +1,146 @@
+--- /tmp/image-format-conversions.h	2009-02-03 10:18:04.000000000 +0100
++++ git/src/image-format-conversions.h	2009-02-03 10:19:18.000000000 +0100
+@@ -30,6 +30,8 @@
+ /* Basic C implementation of YV12/I420 to UYVY conversion */
+ void uv12_to_uyvy(int w, int h, int y_pitch, int uv_pitch, uint8_t *y_p, uint8_t *u_p, uint8_t *v_p, uint8_t *dest);
+ 
++/* NEON implementation of YV12/I420 to UYVY conversion */
++void uv12_to_uyvy_neon(int w, int h, int y_pitch, int uv_pitch, uint8_t *y_p, uint8_t *u_p, uint8_t *v_p, uint8_t *dest);
+ 
+ #endif /* __IMAGE_FORMAT_CONVERSIONS_H__ */
+ 
+--- /tmp/image-format-conversions.c	2009-02-03 10:18:04.000000000 +0100
++++ git/src/image-format-conversions.c	2009-02-03 10:16:47.000000000 +0100
+@@ -2,6 +2,7 @@
+  * Copyright 2008 Kalle Vahlman, <zuh at iki.fi>
+  *                Ilpo Ruotsalainen, <lonewolf at iki.fi>
+  *                Tuomas Kulve, <tuomas.kulve at movial.com>
++ *                Ian Rickards, <ian.rickards at arm.com>
+  *                
+  *
+  * Permission to use, copy, modify, distribute and sell this software and its
+@@ -89,3 +90,104 @@
+ 	}
+ }
+ 
++void uv12_to_uyvy_neon(int w, int h, int y_pitch, int uv_pitch, uint8_t *y_p, uint8_t *u_p, uint8_t *v_p, uint8_t *dest)
++{
++    int x, y;
++    uint8_t *dest_even = dest;
++    uint8_t *dest_odd = dest + w * 2;
++    uint8_t *y_p_even = y_p;
++    uint8_t *y_p_odd = y_p + y_pitch;
++
++    /*ErrorF("in uv12_to_uyvy, w: %d, pitch: %d\n", w, pitch);*/
++    if (w<16)
++    {
++        for (y=0; y<h; y+=2)
++        {
++            for (x=0; x<w; x+=2)
++            {
++                /* Output two 2x1 macroblocks to form a 2x2 block from input */
++                uint8_t u_val = *u_p++;
++                uint8_t v_val = *v_p++;
++
++                /* Even row, first pixel */
++                *dest_even++ = u_val;
++                *dest_even++ = *y_p_even++;
++
++                /* Even row, second pixel */
++                *dest_even++ = v_val;
++                *dest_even++ = *y_p_even++;
++
++                /* Odd row, first pixel */
++                *dest_odd++ = u_val;
++                *dest_odd++ = *y_p_odd++;
++
++                /* Odd row, second pixel */
++                *dest_odd++ = v_val;
++                *dest_odd++ = *y_p_odd++;
++            }
++
++            dest_even += w * 2;
++            dest_odd += w * 2;
++
++            u_p += ((uv_pitch << 1) - w) >> 1;
++            v_p += ((uv_pitch << 1) - w) >> 1;
++
++            y_p_even += (y_pitch - w) + y_pitch;
++            y_p_odd += (y_pitch - w) + y_pitch;
++        }
++    }
++    else
++    {
++        for (y=0; y<h; y+=2)
++        {
++            x=w;
++            do {
++                // avoid using d8-d15 (q4-q7) aapcs callee-save registers
++                asm volatile (
++                        "1:\n\t"
++                        "vld1.u8   {d0}, [%[u_p]]!\n\t"
++                        "sub       %[x],%[x],#16\n\t"
++                        "cmp       %[x],#16\n\t"
++                        "vld1.u8   {d1}, [%[v_p]]!\n\t"
++                        "vld1.u8   {q1}, [%[y_p_even]]!\n\t"
++                        "vzip.u8   d0, d1\n\t"
++                        "vld1.u8   {q2}, [%[y_p_odd]]!\n\t"
++                // use 2-element struct stores to zip up y with y&v
++                        "vst2.u8   {q0,q1}, [%[dest_even]]!\n\t"
++                        "vmov.u8   q1, q2\n\t"
++                        "vst2.u8   {q0,q1}, [%[dest_odd]]!\n\t"
++                        "bhs       1b\n\t"
++                        : [u_p] "+r" (u_p), [v_p] "+r" (v_p), [y_p_even] "+r" (y_p_even), [y_p_odd] "+r" (y_p_odd),
++                          [dest_even] "+r" (dest_even), [dest_odd] "+r" (dest_odd),
++                          [x] "+r" (x)
++                        :
++                        : "cc", "memory", "d0","d1","d2","d3","d4","d5"
++                        );
++                if (x!=0)
++                {
++                    // overlap final 16-pixel block to process requested width exactly
++                    x = 16-x;
++                    u_p -= x/2;
++                    v_p -= x/2;
++                    y_p_even -= x;
++                    y_p_odd -= x;
++                    dest_even -= x*2;
++                    dest_odd -= x*2;
++                    x = 16;
++                    // do another 16-pixel block
++                }
++            }
++            while (x!=0);
++
++            dest_even += w * 2;
++            dest_odd += w * 2;
++
++            u_p += ((uv_pitch << 1) - w) >> 1;
++            v_p += ((uv_pitch << 1) - w) >> 1;
++
++            y_p_even += (y_pitch - w) + y_pitch;
++            y_p_odd += (y_pitch - w) + y_pitch;
++        }
++    }
++}
++
+--- /tmp/omapfb-xv-generic.c	2009-02-03 10:52:18.000000000 +0100
++++ git/src/omapfb-xv-generic.c	2009-02-03 10:52:24.000000000 +0100
+@@ -240,7 +240,7 @@
+ 			uint8_t *yb = buf;
+ 			uint8_t *ub = yb + (src_y_pitch * src_h);
+ 			uint8_t *vb = ub + (src_uv_pitch * (src_h / 2));
+-			uv12_to_uyvy(src_w & ~15,
++			uv12_to_uyvy_neon(src_w & ~15,
+ 			             src_h & ~15,
+ 				     src_y_pitch,
+ 				     src_uv_pitch,
+@@ -256,7 +256,7 @@
+ 			uint8_t *yb = buf;
+ 			uint8_t *vb = yb + (src_y_pitch * src_h);
+ 			uint8_t *ub = vb + (src_uv_pitch * (src_h / 2));
+-			uv12_to_uyvy(src_w & ~15,
++			uv12_to_uyvy_neon(src_w & ~15,
+ 			             src_h & ~15,
+ 				     src_y_pitch,
+ 				     src_uv_pitch,
diff --git a/packages/xorg-driver/xf86-video-omapfb_git.bb b/packages/xorg-driver/xf86-video-omapfb_git.bb
index e364ecc..f718e76 100644
--- a/packages/xorg-driver/xf86-video-omapfb_git.bb
+++ b/packages/xorg-driver/xf86-video-omapfb_git.bb
@@ -2,13 +2,16 @@ require xorg-driver-video.inc
 
 DESCRIPTION = "X.Org X server -- OMAP display driver"
 
-PR ="r19"
+PR ="r20"
 
 SRCREV = "ef0b41c332a710fb33f996df77bd4a96b56878da"
 PV = "0.0.1+${PR}+gitr${SRCREV}"
 PE = "1"
 
-SRC_URI = "git://git.pingu.fi/xf86-video-omapfb.git;protocol=http"
+SRC_URI = "git://git.pingu.fi/xf86-video-omapfb.git;protocol=http \
+          "
+
+SRC_URI_append_armv7a = " file://omapfb-neon.diff;patch=1"
 
 S = "${WORKDIR}/git"
 





More information about the Openembedded-commits mailing list