[oe-commits] Mario Domenech Goulart : contrib/tesseract-langs.sh: add script to generate recipes for tesseract languages

git at git.openembedded.org git at git.openembedded.org
Tue Jun 10 10:44:02 UTC 2014


Module: meta-openembedded.git
Branch: master-next
Commit: cb41796a5e0573bf3676b5c54fcc12c6dd42f9fb
URL:    http://git.openembedded.org/?p=meta-openembedded.git&a=commit;h=cb41796a5e0573bf3676b5c54fcc12c6dd42f9fb

Author: Mario Domenech Goulart <mario at ossystems.com.br>
Date:   Mon May 26 09:59:00 2014 -0300

contrib/tesseract-langs.sh: add script to generate recipes for tesseract languages

This script writes language recipes for tesseract.  It downloads the
listing of available languages and language tarballs from the official
site and writes language recipes tesseract-lang-<lang>_<version>.bb
for each language.

Signed-off-by: Mario Domenech Goulart <mario at ossystems.com.br>
Signed-off-by: Martin Jansa <Martin.Jansa at gmail.com>

---

 contrib/tesseract-langs.sh | 92 ++++++++++++++++++++++++++++++++++++++++++++++
 1 file changed, 92 insertions(+)

diff --git a/contrib/tesseract-langs.sh b/contrib/tesseract-langs.sh
new file mode 100755
index 0000000..50873c1
--- /dev/null
+++ b/contrib/tesseract-langs.sh
@@ -0,0 +1,92 @@
+#! /bin/sh
+
+# Copyright (C) 2014, O.S. Systems Software Ltda. All Rights Reserved
+# Released under the MIT license (see meta-openembedded layer's COPYING.MIT)
+
+PV='3.02'
+
+# Sometimes the software package has a minor version, but language
+# packages have not.  Example: 
+#   software package: tesseract-ocr-3.02.02.tar.gz
+#   language package: tesseract-ocr-3.02.por.tar.gz
+MINOR_PV=02
+
+recipes_dir=$1
+
+usage() {
+    echo "Usage: `basename $0` <recipes dir> [ <download dir> ]"
+}
+
+if [ -z "$recipes_dir" ]; then
+    usage
+    exit 1
+fi
+mkdir -p "$recipes_dir"
+
+file_list_uri='https://code.google.com/p/tesseract-ocr/downloads/list'
+file_list=`mktemp`
+
+remove_dl_dir=
+if [ -z "$2" ]; then
+    remove_dl_dir=1
+    dl_dir=`mktemp -d`
+else
+    dl_dir="$2"
+fi
+
+mkdir -p $dl_dir
+
+tesseract_langs() {
+    wget -q -O "$file_list" "$file_list_uri"
+
+    grep -E 'a href="detail\?name=tesseract-ocr-'${PV}'\.[^\.]+.tar.gz&amp;can=2&amp;q=">' "$file_list" | \
+        sed -r -e 's/.*tesseract-ocr-'${PV}'\.*([^\.]+)\.tar\.gz.*/\1/' | \
+        grep -Ev '('${MINOR_PV}'|'${MINOR_PV}'-doc-html)' | \
+        sort -u
+}
+
+download_lang_files() {
+    local langs="$1"
+    local uri
+    for lang in $langs; do
+        if [ ! -e "$dl_dir/tesseract-ocr-${PV}.${lang}.tar.gz" ]; then
+            uri="https://tesseract-ocr.googlecode.com/files/tesseract-ocr-${PV}.${lang}.tar.gz"
+            echo "Downloading $uri"
+            wget -q -P "$dl_dir" "$uri"
+        fi
+    done
+}
+
+create_recipe() {
+    local lang=$1
+    local tarball
+
+    tarball="$dl_dir/tesseract-ocr-${PV}.${lang}.tar.gz"
+
+    md5sum=`md5sum $tarball | awk '{print $1}'`
+    sha256sum=`sha256sum $tarball | awk '{print $1}'`
+
+    cat > $recipes_dir/tesseract-lang-`echo ${lang} | sed s/_/-/g`_${PV}.bb <<EOF
+# Copyright (C) 2014, O.S. Systems Software Ltda. All Rights Reserved
+# Released under the MIT license (see meta-openembedded layer's COPYING.MIT)
+
+TESSERACT_LANG = "$lang"
+
+require tesseract-lang.inc
+
+SRC_URI[md5sum] = "${md5sum}"
+SRC_URI[sha256sum] = "${sha256sum}"
+EOF
+}
+
+
+LANGS=`tesseract_langs`
+
+download_lang_files "$LANGS"
+
+for lang in $LANGS; do
+    create_recipe $lang
+done
+
+[ -n "$remove_dl_dir" ] && rm -rf $dl_dir
+rm -f $file_list



More information about the Openembedded-commits mailing list