[bitbake-devel] [PATCH 2/2] bitbake: implement checksums for local files in SRC_URI

Paul Eggleton paul.eggleton at linux.intel.com
Tue May 22 23:23:32 UTC 2012


Gathers a list of paths to have checksums calculated at parse time, and
processes these when calculating task hashes. Checksums are cached with
the file's current mtime. Thus, changing any local file in SRC_URI will
now cause the do_fetch taskhash to change, thus forcing a rebuild.

This change adds very roughly about an 8% increase in parse time (a few
seconds) and maybe a few seconds during runqueue generation, so a fairly
moderate performance hit.

Note that since paths are resolved at parse time, this will not force
a rebuild when files are introduced which would cause that resolved path
to be different - for example, where a machine-specific version of a file
was added without otherwise changing the recipe. This will need to be
handled in a future update.

Code to hook this into the signature generator was courtesy of
Richard Purdie <richard.purdie at linuxfoundation.org>.

Implements [YOCTO #2044].

Signed-off-by: Paul Eggleton <paul.eggleton at linux.intel.com>
---
 bitbake/lib/bb/cache.py           |   13 ++++--
 bitbake/lib/bb/checksum.py        |   90 +++++++++++++++++++++++++++++++++++++
 bitbake/lib/bb/cooker.py          |    2 +
 bitbake/lib/bb/fetch2/__init__.py |   85 +++++++++++++++++++++++++++++++++++
 bitbake/lib/bb/siggen.py          |   24 ++++++++++
 5 files changed, 211 insertions(+), 3 deletions(-)
 create mode 100644 bitbake/lib/bb/checksum.py

diff --git a/bitbake/lib/bb/cache.py b/bitbake/lib/bb/cache.py
index 36e6356..dea2a80 100644
--- a/bitbake/lib/bb/cache.py
+++ b/bitbake/lib/bb/cache.py
@@ -43,7 +43,7 @@ except ImportError:
     logger.info("Importing cPickle failed. "
                 "Falling back to a very slow implementation.")
 
-__cache_version__ = "143"
+__cache_version__ = "144"
 
 def getCacheFile(path, filename, data_hash):
     return os.path.join(path, filename + "." + data_hash)
@@ -76,9 +76,13 @@ class RecipeInfoCommon(object):
                     for task in tasks)
 
     @classmethod
-    def flaglist(cls, flag, varlist, metadata):
-        return dict((var, metadata.getVarFlag(var, flag, True))
+    def flaglist(cls, flag, varlist, metadata, squash=False):
+        out_dict = dict((var, metadata.getVarFlag(var, flag, True))
                     for var in varlist)
+        if squash:
+            return dict((k,v) for (k,v) in out_dict.iteritems() if v)
+        else:
+            return out_dict
 
     @classmethod
     def getvar(cls, var, metadata):
@@ -128,6 +132,7 @@ class CoreRecipeInfo(RecipeInfoCommon):
         self.stamp = self.getvar('STAMP', metadata)
         self.stamp_base = self.flaglist('stamp-base', self.tasks, metadata)
         self.stamp_extrainfo = self.flaglist('stamp-extra-info', self.tasks, metadata)
+        self.file_checksums = self.flaglist('file-checksums', self.tasks, metadata, True)
         self.packages_dynamic = self.listvar('PACKAGES_DYNAMIC', metadata)
         self.depends          = self.depvar('DEPENDS', metadata)
         self.provides         = self.depvar('PROVIDES', metadata)
@@ -154,6 +159,7 @@ class CoreRecipeInfo(RecipeInfoCommon):
         cachedata.stamp = {}
         cachedata.stamp_base = {}
         cachedata.stamp_extrainfo = {}
+        cachedata.file_checksums = {}
         cachedata.fn_provides = {}
         cachedata.pn_provides = defaultdict(list)
         cachedata.all_depends = []
@@ -185,6 +191,7 @@ class CoreRecipeInfo(RecipeInfoCommon):
         cachedata.stamp[fn] = self.stamp
         cachedata.stamp_base[fn] = self.stamp_base
         cachedata.stamp_extrainfo[fn] = self.stamp_extrainfo
+        cachedata.file_checksums[fn] = self.file_checksums
 
         provides = [self.pn]
         for provide in self.provides:
diff --git a/bitbake/lib/bb/checksum.py b/bitbake/lib/bb/checksum.py
new file mode 100644
index 0000000..514ff0b
--- /dev/null
+++ b/bitbake/lib/bb/checksum.py
@@ -0,0 +1,90 @@
+# Local file checksum cache implementation
+#
+# Copyright (C) 2012 Intel Corporation
+#
+# This program is free software; you can redistribute it and/or modify
+# it under the terms of the GNU General Public License version 2 as
+# published by the Free Software Foundation.
+#
+# This program is distributed in the hope that it will be useful,
+# but WITHOUT ANY WARRANTY; without even the implied warranty of
+# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+# GNU General Public License for more details.
+#
+# You should have received a copy of the GNU General Public License along
+# with this program; if not, write to the Free Software Foundation, Inc.,
+# 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA.
+
+import os
+import stat
+import bb.utils
+import logging
+from bb.cache import MultiProcessCache
+
+logger = logging.getLogger("BitBake.Cache")
+
+try:
+    import cPickle as pickle
+except ImportError:
+    import pickle
+    logger.info("Importing cPickle failed. "
+                "Falling back to a very slow implementation.")
+
+
+# mtime cache (non-persistent)
+# based upon the assumption that files do not change during bitbake run
+class FileMtimeCache(object):
+    cache = {}
+
+    def cached_mtime(self, f):
+        if f not in self.cache:
+            self.cache[f] = os.stat(f)[stat.ST_MTIME]
+        return self.cache[f]
+
+    def cached_mtime_noerror(self, f):
+        if f not in self.cache:
+            try:
+                self.cache[f] = os.stat(f)[stat.ST_MTIME]
+            except OSError:
+                return 0
+        return self.cache[f]
+
+    def update_mtime(self, f):
+        self.cache[f] = os.stat(f)[stat.ST_MTIME]
+        return self.cache[f]
+
+    def clear(self):
+        self.cache.clear()
+
+# Checksum + mtime cache (persistent)
+class FileChecksumCache(MultiProcessCache):
+    cache_file_name = "local_file_checksum_cache.dat"
+    CACHE_VERSION = 1
+
+    def __init__(self):
+        self.mtime_cache = FileMtimeCache()
+        MultiProcessCache.__init__(self)
+
+    def get_checksum(self, f):
+        entry = self.cachedata[0].get(f)
+        cmtime = self.mtime_cache.cached_mtime(f)
+        if entry:
+            (mtime, hashval) = entry
+            if cmtime == mtime:
+                return hashval
+            else:
+                bb.debug(2, "file %s changed mtime, recompute checksum" % f)
+
+        hashval = bb.utils.md5_file(f)
+        self.cachedata_extras[0][f] = (cmtime, hashval)
+        return hashval
+
+    def merge_data(self, source, dest):
+        for h in source[0]:
+            if h in dest:
+                (smtime, _) = source[0][h]
+                (dmtime, _) = dest[0][h]
+                if smtime > dmtime:
+                    dest[0][h] = source[0][h]
+            else:
+                dest[0][h] = source[0][h]
diff --git a/bitbake/lib/bb/cooker.py b/bitbake/lib/bb/cooker.py
index dea0aad..8ad4922 100644
--- a/bitbake/lib/bb/cooker.py
+++ b/bitbake/lib/bb/cooker.py
@@ -1570,6 +1570,7 @@ class CookerParser(object):
             def init():
                 Parser.cfg = self.cfgdata
                 multiprocessing.util.Finalize(None, bb.codeparser.parser_cache_save, args=(self.cfgdata,), exitpriority=1)
+                multiprocessing.util.Finalize(None, bb.fetch.fetcher_parse_save, args=(self.cfgdata,), exitpriority=1)
 
             self.feeder_quit = multiprocessing.Queue(maxsize=1)
             self.parser_quit = multiprocessing.Queue(maxsize=self.num_processes)
@@ -1618,6 +1619,7 @@ class CookerParser(object):
         sync.start()
         multiprocessing.util.Finalize(None, sync.join, exitpriority=-100)
         bb.codeparser.parser_cache_savemerge(self.cooker.configuration.data)
+        bb.fetch.fetcher_parse_done(self.cooker.configuration.data)
 
     def load_cached(self):
         for filename, appends in self.fromcache:
diff --git a/bitbake/lib/bb/fetch2/__init__.py b/bitbake/lib/bb/fetch2/__init__.py
index 0b976c4..d4b6c3e 100644
--- a/bitbake/lib/bb/fetch2/__init__.py
+++ b/bitbake/lib/bb/fetch2/__init__.py
@@ -8,6 +8,7 @@ BitBake build tools.
 """
 
 # Copyright (C) 2003, 2004  Chris Larson
+# Copyright (C) 2012  Intel Corporation
 #
 # This program is free software; you can redistribute it and/or modify
 # it under the terms of the GNU General Public License version 2 as
@@ -30,9 +31,11 @@ import os, re
 import logging
 import urllib
 import bb.persist_data, bb.utils
+import bb.checksum
 from bb import data
 
 __version__ = "2"
+_checksum_cache = bb.checksum.FileChecksumCache()
 
 logger = logging.getLogger("BitBake.Fetcher")
 
@@ -233,10 +236,18 @@ def fetcher_init(d):
     else:
         raise FetchError("Invalid SRCREV cache policy of: %s" % srcrev_policy)
 
+    _checksum_cache.init_cache(d)
+
     for m in methods:
         if hasattr(m, "init"):
             m.init(d)
 
+def fetcher_parse_save(d):
+    _checksum_cache.save_extras(d)
+
+def fetcher_parse_done(d):
+    _checksum_cache.save_merge(d)
+
 def fetcher_compare_revisions(d):
     """
     Compare the revisions in the persistant cache with current values and
@@ -553,6 +564,80 @@ def srcrev_internal_helper(ud, d, name):
 
     return rev
 
+
+def get_checksum_file_list(d):
+    """ Get a list of files checksum in SRC_URI
+
+    Returns the all resolved local path of all local file entries in
+    SRC_URI as a space-separated string
+    """
+    fetch = Fetch([], d)
+
+    dl_dir = d.getVar('DL_DIR', True)
+    filelist = []
+    for u in fetch.urls:
+        ud = fetch.ud[u]
+
+        if isinstance(ud.method, local.Local):
+            ud.setup_localpath(d)
+            f = ud.localpath
+            if f.startswith(dl_dir):
+                # The local fetcher's behaviour is to return a path under DL_DIR if it couldn't find the file anywhere else
+                if os.path.exists(f):
+                    bb.warn("Getting checksum for %s SRC_URI entry %s: file not found except in DL_DIR" % (d.getVar('PN', True), os.path.basename(f)))
+                else:
+                    bb.warn("Unable to get checksum for %s SRC_URI entry %s: file could not be found" % (d.getVar('PN', True), os.path.basename(f)))
+                    continue
+            filelist.append(f)
+
+    return " ".join(filelist)
+
+
+def get_file_checksums(filelist, pn):
+    """Get a list of the checksums for a list of local files
+
+    Returns the checksums for a list of local files, caching the results as
+    it proceeds
+
+    """
+
+    def checksum_file(f):
+        try:
+            checksum = _checksum_cache.get_checksum(f)
+        except OSError as e:
+            import traceback
+            bb.warn("Unable to get checksum for %s SRC_URI entry %s: %s" % (pn, os.path.basename(f), e))
+            return None
+        return checksum
+
+    checksums = []
+    for pth in filelist.split():
+        checksum = None
+        if '*' in pth:
+            # Handle globs
+            import glob
+            for f in glob.glob(pth):
+                checksum = checksum_file(f)
+                if checksum:
+                    checksums.append((f, checksum))
+        elif os.path.isdir(pth):
+            # Handle directories
+            for root, dirs, files in os.walk(pth):
+                for name in files:
+                    fullpth = os.path.join(root, name)
+                    checksum = checksum_file(fullpth)
+                    if checksum:
+                        checksums.append((fullpth, checksum))
+        else:
+            checksum = checksum_file(pth)
+
+        if checksum:
+            checksums.append((pth, checksum))
+
+    checksums.sort()
+    return checksums
+
+
 class FetchData(object):
     """
     A class which represents the fetcher state for a given URI.
diff --git a/bitbake/lib/bb/siggen.py b/bitbake/lib/bb/siggen.py
index 5a0b80e..daf5677 100644
--- a/bitbake/lib/bb/siggen.py
+++ b/bitbake/lib/bb/siggen.py
@@ -60,6 +60,7 @@ class SignatureGeneratorBasic(SignatureGenerator):
         self.taskhash = {}
         self.taskdeps = {}
         self.runtaskdeps = {}
+        self.file_checksum_values = {}
         self.gendeps = {}
         self.lookupcache = {}
         self.pkgnameextract = re.compile("(?P<fn>.*)\..*")
@@ -152,6 +153,7 @@ class SignatureGeneratorBasic(SignatureGenerator):
         k = fn + "." + task
         data = dataCache.basetaskhash[k]
         self.runtaskdeps[k] = []
+        self.file_checksum_values[k] = {}
         recipename = dataCache.pkg_fn[fn]
         for dep in sorted(deps, key=clean_basepath):
             depname = dataCache.pkg_fn[self.pkgnameextract.search(dep).group('fn')]
@@ -161,6 +163,12 @@ class SignatureGeneratorBasic(SignatureGenerator):
                 bb.fatal("%s is not in taskhash, caller isn't calling in dependency order?", dep)
             data = data + self.taskhash[dep]
             self.runtaskdeps[k].append(dep)
+
+        if task in dataCache.file_checksums[fn]:
+            checksums = bb.fetch2.get_file_checksums(dataCache.file_checksums[fn][task], recipename)
+            for (f,cs) in checksums:
+               self.file_checksum_values[k][f] = cs
+               data = data + cs
         h = hashlib.md5(data).hexdigest()
         self.taskhash[k] = h
         #d.setVar("BB_TASKHASH_task-%s" % task, taskhash[task])
@@ -197,6 +205,7 @@ class SignatureGeneratorBasic(SignatureGenerator):
 
         if runtime and k in self.taskhash:
             data['runtaskdeps'] = self.runtaskdeps[k]
+            data['file_checksum_values'] = self.file_checksum_values[k]
             data['runtaskhashes'] = {}
             for dep in data['runtaskdeps']:
                 data['runtaskhashes'][dep] = self.taskhash[dep]
@@ -304,6 +313,18 @@ def compare_sigfiles(a, b):
         for dep in changed:
             print "Variable %s value changed from %s to %s" % (dep, a_data['varvals'][dep], b_data['varvals'][dep])
 
+    changed, added, removed = dict_diff(a_data['file_checksum_values'], b_data['file_checksum_values'])
+    if changed:
+        for f in changed:
+            print "Checksum for file %s changed from %s to %s" % (f, a_data['file_checksum_values'][f], b_data['file_checksum_values'][f])
+    if added:
+        for f in added:
+            print "Dependency on checksum of file %s was added" % (f)
+    if removed:
+        for f in removed:
+            print "Dependency on checksum of file %s was removed" % (f)
+
+
     if 'runtaskhashes' in a_data and 'runtaskhashes' in b_data:
         a = clean_basepaths(a_data['runtaskhashes'])
         b = clean_basepaths(b_data['runtaskhashes'])
@@ -353,6 +374,9 @@ def dump_sigfile(a):
     if 'runtaskdeps' in a_data:
         print "Tasks this task depends on: %s" % (a_data['runtaskdeps'])
 
+    if 'file_checksum_values' in a_data:
+        print "This task depends on the checksums of files: %s" % (a_data['file_checksum_values'])
+
     if 'runtaskhashes' in a_data:
         for dep in a_data['runtaskhashes']:
             print "Hash for dependent task %s is %s" % (dep, a_data['runtaskhashes'][dep])
-- 
1.7.9.5





More information about the bitbake-devel mailing list