[oe-commits] Mario Domenech Goulart : contrib/tesseract-langs.sh: add script to generate recipes for tesseract languages

git at git.openembedded.org git at git.openembedded.org
Wed Jun 4 09:43:37 UTC 2014


Module: meta-openembedded.git
Branch: master-next
Commit: 4d9c3a8eac013a321d637431029a5f17916d1a00
URL:    http://git.openembedded.org/?p=meta-openembedded.git&a=commit;h=4d9c3a8eac013a321d637431029a5f17916d1a00

Author: Mario Domenech Goulart <mario at ossystems.com.br>
Date:   Mon May 26 09:59:00 2014 -0300

contrib/tesseract-langs.sh: add script to generate recipes for tesseract languages

This script writes language recipes for tesseract.  It downloads the
listing of available languages and language tarballs from the official
site and writes language recipes tesseract-lang-<lang>_<version>.bb
for each language.

Signed-off-by: Mario Domenech Goulart <mario at ossystems.com.br>
Signed-off-by: Martin Jansa <Martin.Jansa at gmail.com>

---

 contrib/tesseract-langs.sh | 92 ++++++++++++++++++++++++++++++++++++++++++++++
 1 file changed, 92 insertions(+)

diff --git a/contrib/tesseract-langs.sh b/contrib/tesseract-langs.sh
new file mode 100755
index 0000000..50873c1
--- /dev/null
+++ b/contrib/tesseract-langs.sh
@@ -0,0 +1,92 @@
+#! /bin/sh
+
+# Copyright (C) 2014, O.S. Systems Software Ltda. All Rights Reserved
+# Released under the MIT license (see meta-openembedded layer's COPYING.MIT)
+
+PV='3.02'
+
+# Sometimes the software package has a minor version, but language
+# packages have not.  Example: 
+#   software package: tesseract-ocr-3.02.02.tar.gz
+#   language package: tesseract-ocr-3.02.por.tar.gz
+MINOR_PV=02
+
+recipes_dir=$1
+
+usage() {
+    echo "Usage: `basename $0` <recipes dir> [ <download dir> ]"
+}
+
+if [ -z "$recipes_dir" ]; then
+    usage
+    exit 1
+fi
+mkdir -p "$recipes_dir"
+
+file_list_uri='https://code.google.com/p/tesseract-ocr/downloads/list'
+file_list=`mktemp`
+
+remove_dl_dir=
+if [ -z "$2" ]; then
+    remove_dl_dir=1
+    dl_dir=`mktemp -d`
+else
+    dl_dir="$2"
+fi
+
+mkdir -p $dl_dir
+
+tesseract_langs() {
+    wget -q -O "$file_list" "$file_list_uri"
+
+    grep -E 'a href="detail\?name=tesseract-ocr-'${PV}'\.[^\.]+.tar.gz&amp;can=2&amp;q=">' "$file_list" | \
+        sed -r -e 's/.*tesseract-ocr-'${PV}'\.*([^\.]+)\.tar\.gz.*/\1/' | \
+        grep -Ev '('${MINOR_PV}'|'${MINOR_PV}'-doc-html)' | \
+        sort -u
+}
+
+download_lang_files() {
+    local langs="$1"
+    local uri
+    for lang in $langs; do
+        if [ ! -e "$dl_dir/tesseract-ocr-${PV}.${lang}.tar.gz" ]; then
+            uri="https://tesseract-ocr.googlecode.com/files/tesseract-ocr-${PV}.${lang}.tar.gz"
+            echo "Downloading $uri"
+            wget -q -P "$dl_dir" "$uri"
+        fi
+    done
+}
+
+create_recipe() {
+    local lang=$1
+    local tarball
+
+    tarball="$dl_dir/tesseract-ocr-${PV}.${lang}.tar.gz"
+
+    md5sum=`md5sum $tarball | awk '{print $1}'`
+    sha256sum=`sha256sum $tarball | awk '{print $1}'`
+
+    cat > $recipes_dir/tesseract-lang-`echo ${lang} | sed s/_/-/g`_${PV}.bb <<EOF
+# Copyright (C) 2014, O.S. Systems Software Ltda. All Rights Reserved
+# Released under the MIT license (see meta-openembedded layer's COPYING.MIT)
+
+TESSERACT_LANG = "$lang"
+
+require tesseract-lang.inc
+
+SRC_URI[md5sum] = "${md5sum}"
+SRC_URI[sha256sum] = "${sha256sum}"
+EOF
+}
+
+
+LANGS=`tesseract_langs`
+
+download_lang_files "$LANGS"
+
+for lang in $LANGS; do
+    create_recipe $lang
+done
+
+[ -n "$remove_dl_dir" ] && rm -rf $dl_dir
+rm -f $file_list



More information about the Openembedded-commits mailing list