[oe-commits] unknown mplayer: add conditional patch to speed up AVR32
koen commit
openembedded-commits at lists.openembedded.org
Wed Jan 23 17:58:31 UTC 2008
mplayer: add conditional patch to speed up AVR32
Author: koen at openembedded.org
Branch: unknown
Revision: 50c56d6b5b2d15f6bc9c32b4f9907bc0d1c3564b
ViewMTN: http://monotone.openembedded.org/revision/info/50c56d6b5b2d15f6bc9c32b4f9907bc0d1c3564b
Files:
1
packages/mplayer/files/mplayer-1.0rc1-atmel.2.patch
packages/mplayer/mplayer_0.0+1.0rc1.bb
Diffs:
#
# mt diff -reef10bc14df32d73fe3daaa34cc453dbe48b9713 -r50c56d6b5b2d15f6bc9c32b4f9907bc0d1c3564b
#
#
#
# add_file "packages/mplayer/files/mplayer-1.0rc1-atmel.2.patch"
# content [876315cc7ea35c174e273748e709d64dc97d68d9]
#
# patch "packages/mplayer/mplayer_0.0+1.0rc1.bb"
# from [9378af60d35a2d2c248760fadc6a2f095991bdca]
# to [c228f31728ecf2e1a347df4bc148917394df34a1]
#
============================================================
--- packages/mplayer/files/mplayer-1.0rc1-atmel.2.patch 876315cc7ea35c174e273748e709d64dc97d68d9
+++ packages/mplayer/files/mplayer-1.0rc1-atmel.2.patch 876315cc7ea35c174e273748e709d64dc97d68d9
@@ -0,0 +1,6444 @@
+ cfg-common.h | 4 +
+ cfg-mencoder.h | 4 +
+ cfg-mplayer.h | 4 +
+ configure | 13 +-
+ libaf/af_format.c | 7 +
+ libavcodec/Makefile | 7 +
+ libavcodec/avr32/dsputil_avr32.c | 2678 ++++++++++++++++++++++++++++++++++++++
+ libavcodec/avr32/fdct.S | 541 ++++++++
+ libavcodec/avr32/h264idct.S | 451 +++++++
+ libavcodec/avr32/idct.S | 829 ++++++++++++
+ libavcodec/avr32/mc.S | 434 ++++++
+ libavcodec/avr32/pico.h | 260 ++++
+ libavcodec/bitstream.h | 77 +-
+ libavcodec/dsputil.c | 3 +
+ libavcodec/h264.c | 15 +
+ libavutil/common.h | 16 +
+ libavutil/internal.h | 9 +
+ libfaad2/common.h | 2 +-
+ libmpcodecs/ad_libmad.c | 5 +
+ libswscale/pico-avr32.h | 137 ++
+ libswscale/swscale_internal.h | 2 +-
+ libswscale/yuv2rgb.c | 14 +
+ libswscale/yuv2rgb_avr32.c | 416 ++++++
+ libvo/vo_fbdev2.c | 101 ++-
+ version.sh | 2 +-
+ 25 files changed, 6011 insertions(+), 20 deletions(-)
+ create mode 100644 libavcodec/avr32/dsputil_avr32.c
+ create mode 100644 libavcodec/avr32/fdct.S
+ create mode 100644 libavcodec/avr32/h264idct.S
+ create mode 100644 libavcodec/avr32/idct.S
+ create mode 100644 libavcodec/avr32/mc.S
+ create mode 100644 libavcodec/avr32/pico.h
+ create mode 100644 libswscale/pico-avr32.h
+ create mode 100644 libswscale/yuv2rgb_avr32.c
+
+diff --git a/cfg-common.h b/cfg-common.h
+index 780df38..7d878a8 100644
+--- a/cfg-common.h
++++ b/cfg-common.h
+@@ -235,6 +235,10 @@
+ {"tsprobe", &ts_probe, CONF_TYPE_POSITION, 0, 0, TS_MAX_PROBE_SIZE, NULL},
+ {"tskeepbroken", &ts_keep_broken, CONF_TYPE_FLAG, 0, 0, 1, NULL},
+
++#ifdef ARCH_AVR32
++ {"use-pico", &avr32_use_pico, CONF_TYPE_FLAG, 0, 0, 1, NULL},
++ {"nouse-pico", &avr32_use_pico, CONF_TYPE_FLAG, 0, 1, 0, NULL},
++#endif
+ // draw by slices or whole frame (useful with libmpeg2/libavcodec)
+ {"slices", &vd_use_slices, CONF_TYPE_FLAG, 0, 0, 1, NULL},
+ {"noslices", &vd_use_slices, CONF_TYPE_FLAG, 0, 1, 0, NULL},
+diff --git a/cfg-mencoder.h b/cfg-mencoder.h
+index 411b748..addf791 100644
+--- a/cfg-mencoder.h
++++ b/cfg-mencoder.h
+@@ -5,6 +5,10 @@
+
+ #include "cfg-common.h"
+
++#ifdef ARCH_AVR32
++extern int avr32_use_pico;
++#endif
++
+ #ifdef USE_FAKE_MONO
+ extern int fakemono; // defined in dec_audio.c
+ #endif
+diff --git a/cfg-mplayer.h b/cfg-mplayer.h
+index 62b6eac..31499c2 100644
+--- a/cfg-mplayer.h
++++ b/cfg-mplayer.h
+@@ -4,6 +4,10 @@
+
+ #include "cfg-common.h"
+
++#ifdef ARCH_AVR32
++extern int avr32_use_pico;
++#endif
++
+ extern int noconsolecontrols;
+
+ #if defined(HAVE_FBDEV)||defined(HAVE_VESA)
+diff --git a/configure b/configure
+index 29002c8..56c6fe4 100755
+--- a/configure
++++ b/configure
+@@ -1203,6 +1203,15 @@ EOF
+ _optimizing="$proc"
+ ;;
+
++ avr32)
++ _def_arch='#define ARCH_AVR32'
++ _target_arch='TARGET_ARCH_AVR32 = yes'
++ iproc='avr32'
++ proc=''
++ _march=''
++ _mcpu=''
++ _optimizing=''
++ ;;
+ arm|armv4l|armv5tel)
+ _def_arch='#define ARCH_ARMV4L 1'
+ _target_arch='TARGET_ARCH_ARMV4L = yes'
+@@ -1533,7 +1542,7 @@ echores $_named_asm_args
+ # Checking for CFLAGS
+ _stripbinaries=yes
+ if test "$_profile" != "" || test "$_debug" != "" ; then
+- CFLAGS="-W -Wall -O2 $_march $_mcpu $_debug $_profile"
++ CFLAGS="-W -Wall -O4 $_march $_mcpu $_debug $_profile"
+ if test "$_cc_major" -ge "3" ; then
+ CFLAGS=`echo "$CFLAGS" | sed -e 's/\(-Wall\)/\1 -Wno-unused-parameter/'`
+ fi
+@@ -3794,7 +3803,7 @@ fi
+
+
+ echocheck "X11 headers presence"
+- for I in `echo $_inc_extra | sed s/-I//g` /usr/X11/include /usr/X11R6/include /usr/include/X11R6 /usr/include /usr/openwin/include ; do
++ for I in `echo $_inc_extra | sed s/-I//g`; do
+ if test -f "$I/X11/Xlib.h" ; then
+ _inc_x11="-I$I"
+ _x11_headers="yes"
+diff --git a/libaf/af_format.c b/libaf/af_format.c
+index e5b7cc9..5d7ea6d 100644
+--- a/libaf/af_format.c
++++ b/libaf/af_format.c
+@@ -20,7 +20,14 @@
+ // Integer to float conversion through lrintf()
+ #ifdef HAVE_LRINTF
+ #include <math.h>
++
++#ifdef ARCH_AVR32
++#define lrintf(x) rint(x)
++#define llrint(x) (long long)rint(x)
++#else
+ long int lrintf(float);
++#endif
++
+ #else
+ #define lrintf(x) ((int)(x))
+ #endif
+diff --git a/libavcodec/Makefile b/libavcodec/Makefile
+index 17b6c45..8e1dc96 100644
+--- a/libavcodec/Makefile
++++ b/libavcodec/Makefile
+@@ -360,6 +360,12 @@ OBJS-$(TARGET_ARCH_SPARC) += sparc/dsputil_vis.o \
+
+ sparc/dsputil_vis.o: CFLAGS += -mcpu=ultrasparc -mtune=ultrasparc
+
++# avr32 specific stuff
++ifeq ($(TARGET_ARCH_AVR32),yes)
++ASM_OBJS += avr32/idct.o avr32/fdct.o avr32/mc.o avr32/h264idct.o
++OBJS += avr32/dsputil_avr32.o
++endif
++
+ # sun mediaLib specific stuff
+ OBJS-$(HAVE_MLIB) += mlib/dsputil_mlib.o \
+
+@@ -419,6 +425,7 @@ tests: apiexample $(TESTS)
+ clean::
+ rm -f \
+ i386/*.o i386/*~ \
++ avr32/*.o avr32/*~ \
+ armv4l/*.o armv4l/*~ \
+ mlib/*.o mlib/*~ \
+ alpha/*.o alpha/*~ \
+diff --git a/libavcodec/avr32/dsputil_avr32.c b/libavcodec/avr32/dsputil_avr32.c
+new file mode 100644
+index 0000000..200284d
+--- /dev/null
++++ b/libavcodec/avr32/dsputil_avr32.c
+@@ -0,0 +1,2678 @@
++/*
++ * Copyright (c) 2007 Atmel Corporation. All rights reserved.
++ *
++ * Redistribution and use in source and binary forms, with or without
++ * modification, are permitted provided that the following conditions
++ * are met:
++ *
++ * 1. Redistributions of source code must retain the above copyright
++ * notice, this list of conditions and the following disclaimer.
++ *
++ * 2. Redistributions in binary form must reproduce the above
++ * copyright notice, this list of conditions and the following
++ * disclaimer in the documentation and/or other materials provided
++ * with the distribution.
++ *
++ * 3. The name of ATMEL may not be used to endorse or promote products
++ * derived from this software without specific prior written
++ * permission.
++ *
++ * THIS SOFTWARE IS PROVIDED BY ATMEL ``AS IS'' AND ANY EXPRESS OR
++ * IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED
++ * WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
++ * ARE EXPRESSLY AND SPECIFICALLY DISCLAIMED. IN NO EVENT SHALL ATMEL
++ * BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY,
++ * OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
++ * PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
++ * PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY
++ * OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
++ * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE
++ * USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH
++ * DAMAGE.
++ */
++
++#include "../dsputil.h"
++#include "pico.h"
++
++int avr32_use_pico = 1;
++
++//#define CHECK_DSP_FUNCS_AGAINST_C
++
++#ifdef CHECK_DSP_FUNCS_AGAINST_C
++#define DSP_FUNC_NAME(name) test_ ## name
++#else
++#define DSP_FUNC_NAME(name) name
++#endif
++
++union doubleword {
++ int64_t doubleword;
++ struct {
++ int32_t top;
++ int32_t bottom;
++ } words;
++};
++
++#undef LD16
++#undef LD32
++#undef LD64
++
++#define LD16(a) (*((uint16_t*)(a)))
++#define LD32(a) (*((uint32_t*)(a)))
++#define LD64(a) (*((uint64_t*)(a)))
++#define LD64_UNALIGNED(a) \
++ ({ union doubleword __tmp__; \
++ __tmp__.words.top = LD32(a); \
++ __tmp__.words.bottom = LD32(a + 4); \
++ __tmp__.doubleword; })
++
++#undef ST32
++#undef ST16
++
++#define ST16(a, b) *((uint16_t*)(a)) = (b)
++#define ST32(a, b) *((uint32_t*)(a)) = (b)
++
++#undef rnd_avg32
++#define rnd_avg32(a, b) \
++ ({ uint32_t __tmp__;\
++ asm("pavg.ub\t%0, %1, %2" : "=r"(__tmp__) : "r"(a), "r"(b));\
++ __tmp__;})
++
++void idct_avr32(DCTELEM *data);
++void fdct_avr32(DCTELEM *data);
++
++void idct_put_avr32(uint8_t *dest, int line_size, DCTELEM *data);
++void idct_add_avr32(uint8_t *dest, int line_size, DCTELEM *data);
++
++void h264_idct_add_avr32(uint8_t *dest, DCTELEM *data, int stride);
++void h264_idct8_add_avr32(uint8_t *dest, DCTELEM *data, int stride);
++
++#define extern_dspfunc(PFX, NUM) \
++ void PFX ## _pixels ## NUM ## _avr32(uint8_t *dst, const uint8_t *pixels, int line_size, int h ); \
++ void PFX ## _pixels ## NUM ## _h_avr32(uint8_t *dst, const uint8_t *pixels, int line_size, int h ); \
++ void PFX ## _pixels ## NUM ## _v_avr32(uint8_t *dst, const uint8_t *pixels, int line_size, int h ); \
++ void PFX ## _pixels ## NUM ## _hv_avr32(uint8_t *dst, const uint8_t *pixels, int line_size, int h )
++
++extern_dspfunc(put, 8);
++extern_dspfunc(put_no_rnd, 8);
++extern_dspfunc(avg, 8);
++extern_dspfunc(avg_no_rnd, 8);
++#undef extern_dspfunc
++
++#ifdef CHECK_DSP_FUNCS_AGAINST_C
++#define extern_dspfunc(PFX, NUM) \
++ void PFX ## _pixels ## NUM ## _c(uint8_t *dst, const uint8_t *pixels, int line_size, int h ); \
++ void PFX ## _pixels ## NUM ## _x2_c(uint8_t *dst, const uint8_t *pixels, int line_size, int h ); \
++ void PFX ## _pixels ## NUM ## _y2_c(uint8_t *dst, const uint8_t *pixels, int line_size, int h ); \
++ void PFX ## _pixels ## NUM ## _xy2_c(uint8_t *dst, const uint8_t *pixels, int line_size, int h )
++
++extern_dspfunc(put, 4);
++extern_dspfunc(put_no_rnd, 4);
++extern_dspfunc(put, 8);
++extern_dspfunc(put_no_rnd, 8);
++extern_dspfunc(put, 16);
++extern_dspfunc(put_no_rnd, 16);
++extern_dspfunc(avg, 8);
++extern_dspfunc(avg_no_rnd, 8);
++extern_dspfunc(avg, 16);
++extern_dspfunc(avg_no_rnd, 16);
++
++
++#undef extern_dspfunc
++#define extern_dspfunc(PFX, NUM) \
++void PFX ## NUM ## _mc00_c(uint8_t *dst, uint8_t *src, int stride); \
++void PFX ## NUM ## _mc10_c(uint8_t *dst, uint8_t *src, int stride); \
++void PFX ## NUM ## _mc20_c(uint8_t *dst, uint8_t *src, int stride); \
++void PFX ## NUM ## _mc30_c(uint8_t *dst, uint8_t *src, int stride); \
++void PFX ## NUM ## _mc01_c(uint8_t *dst, uint8_t *src, int stride); \
++void PFX ## NUM ## _mc11_c(uint8_t *dst, uint8_t *src, int stride); \
++void PFX ## NUM ## _mc21_c(uint8_t *dst, uint8_t *src, int stride); \
++void PFX ## NUM ## _mc31_c(uint8_t *dst, uint8_t *src, int stride); \
++void PFX ## NUM ## _mc02_c(uint8_t *dst, uint8_t *src, int stride); \
++void PFX ## NUM ## _mc12_c(uint8_t *dst, uint8_t *src, int stride); \
++void PFX ## NUM ## _mc22_c(uint8_t *dst, uint8_t *src, int stride); \
++void PFX ## NUM ## _mc32_c(uint8_t *dst, uint8_t *src, int stride); \
++void PFX ## NUM ## _mc03_c(uint8_t *dst, uint8_t *src, int stride); \
++void PFX ## NUM ## _mc13_c(uint8_t *dst, uint8_t *src, int stride); \
++void PFX ## NUM ## _mc23_c(uint8_t *dst, uint8_t *src, int stride); \
++void PFX ## NUM ## _mc33_c(uint8_t *dst, uint8_t *src, int stride); \
++
++extern_dspfunc(put_h264_qpel, 16);
++extern_dspfunc(put_h264_qpel, 8);
++extern_dspfunc(put_h264_qpel, 4);
++extern_dspfunc(avg_h264_qpel, 16);
++extern_dspfunc(avg_h264_qpel, 8);
++extern_dspfunc(avg_h264_qpel, 4);
++
++#undef extern_dspfunc
++
++void put_h264_chroma_mc2_c(uint8_t *dst/*align 8*/, uint8_t *src/*align 1*/, int stride, int h, int x, int y);
++void put_h264_chroma_mc4_c(uint8_t *dst/*align 8*/, uint8_t *src/*align 1*/, int stride, int h, int x, int y);
++void put_h264_chroma_mc8_c(uint8_t *dst/*align 8*/, uint8_t *src/*align 1*/, int stride, int h, int x, int y);
++
++void avg_h264_chroma_mc2_c(uint8_t *dst/*align 8*/, uint8_t *src/*align 1*/, int stride, int h, int x, int y);
++void avg_h264_chroma_mc4_c(uint8_t *dst/*align 8*/, uint8_t *src/*align 1*/, int stride, int h, int x, int y);
++void avg_h264_chroma_mc8_c(uint8_t *dst/*align 8*/, uint8_t *src/*align 1*/, int stride, int h, int x, int y);
++
++
++void dump_block8(uint8_t *block, int line_size, int h);
++void dump_block4(uint8_t *block, int line_size, int h);
++void dump_block(uint8_t *block, int line_size, int h, int w);
++
++void check_block8(uint8_t *test, uint8_t *correct, int line_size_test, int line_size_correct,
++ int h, char *name, int max_dev);
++void check_block4(uint8_t *test, uint8_t *correct, int line_size_test, int line_size_correct,
++ int h, char *name, int max_dev);
++void check_block(uint8_t *test, uint8_t *correct, int line_size_test, int line_size_correct,
++ int h, int width, char *name, int max_dev);
++
++#define PIXOP2( OPNAME, OP ) \
++void OPNAME ## _pixels4_c(uint8_t *block, const uint8_t *pixels, int line_size, int h){\
++ int i;\
++ for(i=0; i<h; i++){\
++ OP(*((uint32_t*)(block )), LD32(pixels ));\
++ pixels+=line_size;\
++ block +=line_size;\
++ }\
++}\
++void OPNAME ## _pixels8_l2(uint8_t *dst, const uint8_t *src1, const uint8_t *src2, int dst_stride, \
++ int src_stride1, int src_stride2, int h){\
++ int i;\
++ for(i=0; i<h; i++){\
++ uint32_t a,b;\
++ a= LD32(&src1[i*src_stride1 ]);\
++ b= LD32(&src2[i*src_stride2 ]);\
++ OP(*((uint32_t*)&dst[i*dst_stride ]), rnd_avg32(a, b));\
++ a= LD32(&src1[i*src_stride1+4]);\
++ b= LD32(&src2[i*src_stride2+4]);\
++ OP(*((uint32_t*)&dst[i*dst_stride+4]), rnd_avg32(a, b));\
++ }\
++}\
++\
++void OPNAME ## _pixels4_l2(uint8_t *dst, const uint8_t *src1, const uint8_t *src2, int dst_stride, \
++ int src_stride1, int src_stride2, int h){\
++ int i;\
++ for(i=0; i<h; i++){\
++ uint32_t a,b;\
++ a= LD32(&src1[i*src_stride1 ]);\
++ b= LD32(&src2[i*src_stride2 ]);\
++ OP(*((uint32_t*)&dst[i*dst_stride ]), rnd_avg32(a, b));\
++ }\
++}\
++\
++void OPNAME ## _pixels16_l2(uint8_t *dst, const uint8_t *src1, const uint8_t *src2, int dst_stride, \
++ int src_stride1, int src_stride2, int h){\
++ OPNAME ## _pixels8_l2(dst , src1 , src2 , dst_stride, src_stride1, src_stride2, h);\
++ OPNAME ## _pixels8_l2(dst+8, src1+8, src2+8, dst_stride, src_stride1, src_stride2, h);\
++}\
++
++#else
++#define PIXOP2( OPNAME, OP ) \
++static void OPNAME ## _pixels4_c(uint8_t *block, const uint8_t *pixels, int line_size, int h){\
++ int i;\
++ for(i=0; i<h; i++){\
++ OP(*((uint32_t*)(block )), LD32(pixels ));\
++ pixels+=line_size;\
++ block +=line_size;\
++ }\
++}\
++static void OPNAME ## _pixels8_c(uint8_t *block, const uint8_t *pixels, int line_size, int h){\
++ int i;\
++ for(i=0; i<h; i++){\
++ OP(*((uint32_t*)(block )), LD32(pixels ));\
++ OP(*((uint32_t*)(block+4)), LD32(pixels+4));\
++ pixels+=line_size;\
++ block +=line_size;\
++ }\
++}\
++static void OPNAME ## _pixels16_c(uint8_t *block, const uint8_t *pixels, int line_size, int h){\
++ int i;\
++ for(i=0; i<h; i++){\
++ OP(*((uint32_t*)(block )), LD32(pixels ));\
++ OP(*((uint32_t*)(block+4)), LD32(pixels+4));\
++ OP(*((uint32_t*)(block+8)), LD32(pixels+8));\
++ OP(*((uint32_t*)(block+12)), LD32(pixels+12));\
++ pixels+=line_size;\
++ block +=line_size;\
++ }\
++}\
++static void OPNAME ## _pixels8_l2(uint8_t *dst, const uint8_t *src1, const uint8_t *src2, int dst_stride, \
++ int src_stride1, int src_stride2, int h){\
++ int i;\
++ for(i=0; i<h; i++){\
++ uint32_t a,b;\
++ a= LD32(&src1[i*src_stride1 ]);\
++ b=%s
>>> DIFF TRUNCATED @ 16K
More information about the Openembedded-commits
mailing list