[oe-commits] Mario Domenech Goulart : contrib/tesseract-langs.sh: add script to generate recipes for tesseract languages
git at git.openembedded.org
git at git.openembedded.org
Fri May 30 10:23:46 UTC 2014
Module: meta-openembedded.git
Branch: master-next
Commit: 70dab0d46f96886e911850bbec708553eae9e2cf
URL: http://git.openembedded.org/?p=meta-openembedded.git&a=commit;h=70dab0d46f96886e911850bbec708553eae9e2cf
Author: Mario Domenech Goulart <mario at ossystems.com.br>
Date: Mon May 26 09:59:00 2014 -0300
contrib/tesseract-langs.sh: add script to generate recipes for tesseract languages
This script writes language recipes for tesseract. It downloads the
listing of available languages and language tarballs from the official
site and writes language recipes tesseract-lang-<lang>_<version>.bb
for each language.
Signed-off-by: Mario Domenech Goulart <mario at ossystems.com.br>
Signed-off-by: Martin Jansa <Martin.Jansa at gmail.com>
---
contrib/tesseract-langs.sh | 92 ++++++++++++++++++++++++++++++++++++++++++++++
1 file changed, 92 insertions(+)
diff --git a/contrib/tesseract-langs.sh b/contrib/tesseract-langs.sh
new file mode 100755
index 0000000..50873c1
--- /dev/null
+++ b/contrib/tesseract-langs.sh
@@ -0,0 +1,92 @@
+#! /bin/sh
+
+# Copyright (C) 2014, O.S. Systems Software Ltda. All Rights Reserved
+# Released under the MIT license (see meta-openembedded layer's COPYING.MIT)
+
+PV='3.02'
+
+# Sometimes the software package has a minor version, but language
+# packages have not. Example:
+# software package: tesseract-ocr-3.02.02.tar.gz
+# language package: tesseract-ocr-3.02.por.tar.gz
+MINOR_PV=02
+
+recipes_dir=$1
+
+usage() {
+ echo "Usage: `basename $0` <recipes dir> [ <download dir> ]"
+}
+
+if [ -z "$recipes_dir" ]; then
+ usage
+ exit 1
+fi
+mkdir -p "$recipes_dir"
+
+file_list_uri='https://code.google.com/p/tesseract-ocr/downloads/list'
+file_list=`mktemp`
+
+remove_dl_dir=
+if [ -z "$2" ]; then
+ remove_dl_dir=1
+ dl_dir=`mktemp -d`
+else
+ dl_dir="$2"
+fi
+
+mkdir -p $dl_dir
+
+tesseract_langs() {
+ wget -q -O "$file_list" "$file_list_uri"
+
+ grep -E 'a href="detail\?name=tesseract-ocr-'${PV}'\.[^\.]+.tar.gz&can=2&q=">' "$file_list" | \
+ sed -r -e 's/.*tesseract-ocr-'${PV}'\.*([^\.]+)\.tar\.gz.*/\1/' | \
+ grep -Ev '('${MINOR_PV}'|'${MINOR_PV}'-doc-html)' | \
+ sort -u
+}
+
+download_lang_files() {
+ local langs="$1"
+ local uri
+ for lang in $langs; do
+ if [ ! -e "$dl_dir/tesseract-ocr-${PV}.${lang}.tar.gz" ]; then
+ uri="https://tesseract-ocr.googlecode.com/files/tesseract-ocr-${PV}.${lang}.tar.gz"
+ echo "Downloading $uri"
+ wget -q -P "$dl_dir" "$uri"
+ fi
+ done
+}
+
+create_recipe() {
+ local lang=$1
+ local tarball
+
+ tarball="$dl_dir/tesseract-ocr-${PV}.${lang}.tar.gz"
+
+ md5sum=`md5sum $tarball | awk '{print $1}'`
+ sha256sum=`sha256sum $tarball | awk '{print $1}'`
+
+ cat > $recipes_dir/tesseract-lang-`echo ${lang} | sed s/_/-/g`_${PV}.bb <<EOF
+# Copyright (C) 2014, O.S. Systems Software Ltda. All Rights Reserved
+# Released under the MIT license (see meta-openembedded layer's COPYING.MIT)
+
+TESSERACT_LANG = "$lang"
+
+require tesseract-lang.inc
+
+SRC_URI[md5sum] = "${md5sum}"
+SRC_URI[sha256sum] = "${sha256sum}"
+EOF
+}
+
+
+LANGS=`tesseract_langs`
+
+download_lang_files "$LANGS"
+
+for lang in $LANGS; do
+ create_recipe $lang
+done
+
+[ -n "$remove_dl_dir" ] && rm -rf $dl_dir
+rm -f $file_list
More information about the Openembedded-commits
mailing list