[oe-commits] Mario Domenech Goulart : contrib/tesseract-langs.sh: add script to generate recipes for tesseract languages
git at git.openembedded.org
git at git.openembedded.org
Tue Jun 10 10:30:46 UTC 2014
Module: meta-openembedded.git
Branch: master
Commit: cb41796a5e0573bf3676b5c54fcc12c6dd42f9fb
URL: http://git.openembedded.org/?p=meta-openembedded.git&a=commit;h=cb41796a5e0573bf3676b5c54fcc12c6dd42f9fb
Author: Mario Domenech Goulart <mario at ossystems.com.br>
Date: Mon May 26 09:59:00 2014 -0300
contrib/tesseract-langs.sh: add script to generate recipes for tesseract languages
This script writes language recipes for tesseract. It downloads the
listing of available languages and language tarballs from the official
site and writes language recipes tesseract-lang-<lang>_<version>.bb
for each language.
Signed-off-by: Mario Domenech Goulart <mario at ossystems.com.br>
Signed-off-by: Martin Jansa <Martin.Jansa at gmail.com>
---
contrib/tesseract-langs.sh | 92 ++++++++++++++++++++++++++++++++++++++++++++++
1 file changed, 92 insertions(+)
diff --git a/contrib/tesseract-langs.sh b/contrib/tesseract-langs.sh
new file mode 100755
index 0000000..50873c1
--- /dev/null
+++ b/contrib/tesseract-langs.sh
@@ -0,0 +1,92 @@
+#! /bin/sh
+
+# Copyright (C) 2014, O.S. Systems Software Ltda. All Rights Reserved
+# Released under the MIT license (see meta-openembedded layer's COPYING.MIT)
+
+PV='3.02'
+
+# Sometimes the software package has a minor version, but language
+# packages have not. Example:
+# software package: tesseract-ocr-3.02.02.tar.gz
+# language package: tesseract-ocr-3.02.por.tar.gz
+MINOR_PV=02
+
+recipes_dir=$1
+
+usage() {
+ echo "Usage: `basename $0` <recipes dir> [ <download dir> ]"
+}
+
+if [ -z "$recipes_dir" ]; then
+ usage
+ exit 1
+fi
+mkdir -p "$recipes_dir"
+
+file_list_uri='https://code.google.com/p/tesseract-ocr/downloads/list'
+file_list=`mktemp`
+
+remove_dl_dir=
+if [ -z "$2" ]; then
+ remove_dl_dir=1
+ dl_dir=`mktemp -d`
+else
+ dl_dir="$2"
+fi
+
+mkdir -p $dl_dir
+
+tesseract_langs() {
+ wget -q -O "$file_list" "$file_list_uri"
+
+ grep -E 'a href="detail\?name=tesseract-ocr-'${PV}'\.[^\.]+.tar.gz&can=2&q=">' "$file_list" | \
+ sed -r -e 's/.*tesseract-ocr-'${PV}'\.*([^\.]+)\.tar\.gz.*/\1/' | \
+ grep -Ev '('${MINOR_PV}'|'${MINOR_PV}'-doc-html)' | \
+ sort -u
+}
+
+download_lang_files() {
+ local langs="$1"
+ local uri
+ for lang in $langs; do
+ if [ ! -e "$dl_dir/tesseract-ocr-${PV}.${lang}.tar.gz" ]; then
+ uri="https://tesseract-ocr.googlecode.com/files/tesseract-ocr-${PV}.${lang}.tar.gz"
+ echo "Downloading $uri"
+ wget -q -P "$dl_dir" "$uri"
+ fi
+ done
+}
+
+create_recipe() {
+ local lang=$1
+ local tarball
+
+ tarball="$dl_dir/tesseract-ocr-${PV}.${lang}.tar.gz"
+
+ md5sum=`md5sum $tarball | awk '{print $1}'`
+ sha256sum=`sha256sum $tarball | awk '{print $1}'`
+
+ cat > $recipes_dir/tesseract-lang-`echo ${lang} | sed s/_/-/g`_${PV}.bb <<EOF
+# Copyright (C) 2014, O.S. Systems Software Ltda. All Rights Reserved
+# Released under the MIT license (see meta-openembedded layer's COPYING.MIT)
+
+TESSERACT_LANG = "$lang"
+
+require tesseract-lang.inc
+
+SRC_URI[md5sum] = "${md5sum}"
+SRC_URI[sha256sum] = "${sha256sum}"
+EOF
+}
+
+
+LANGS=`tesseract_langs`
+
+download_lang_files "$LANGS"
+
+for lang in $LANGS; do
+ create_recipe $lang
+done
+
+[ -n "$remove_dl_dir" ] && rm -rf $dl_dir
+rm -f $file_list
More information about the Openembedded-commits
mailing list