[bitbake-devel] [PATCH 2/2] bitbake: implement checksums for local files in SRC_URI
Mark Hatle
mark.hatle at windriver.com
Tue May 22 23:45:23 UTC 2012
On 5/22/12 6:23 PM, Paul Eggleton wrote:
> Gathers a list of paths to have checksums calculated at parse time, and
> processes these when calculating task hashes. Checksums are cached with
> the file's current mtime. Thus, changing any local file in SRC_URI will
> now cause the do_fetch taskhash to change, thus forcing a rebuild.
Does the mtime change invalidate the checksum, or just cause the checksum to be
re-interpreted?
The issue I see is that you share a ccache file with someone else, their files
may simply have a different mtime on them.
From reading the code below, I think the comment is just confusing me. The
checksum is computed and stored bases on a hash + mtime. If the mtime changes,
that will cause the system to recalculate the checksum, which may end up being
the same.. (and if it is, no rebuild) right?
--Mark
> This change adds very roughly about an 8% increase in parse time (a few
> seconds) and maybe a few seconds during runqueue generation, so a fairly
> moderate performance hit.
>
> Note that since paths are resolved at parse time, this will not force
> a rebuild when files are introduced which would cause that resolved path
> to be different - for example, where a machine-specific version of a file
> was added without otherwise changing the recipe. This will need to be
> handled in a future update.
>
> Code to hook this into the signature generator was courtesy of
> Richard Purdie<richard.purdie at linuxfoundation.org>.
>
> Implements [YOCTO #2044].
>
> Signed-off-by: Paul Eggleton<paul.eggleton at linux.intel.com>
> ---
> bitbake/lib/bb/cache.py | 13 ++++--
> bitbake/lib/bb/checksum.py | 90 +++++++++++++++++++++++++++++++++++++
> bitbake/lib/bb/cooker.py | 2 +
> bitbake/lib/bb/fetch2/__init__.py | 85 +++++++++++++++++++++++++++++++++++
> bitbake/lib/bb/siggen.py | 24 ++++++++++
> 5 files changed, 211 insertions(+), 3 deletions(-)
> create mode 100644 bitbake/lib/bb/checksum.py
>
> diff --git a/bitbake/lib/bb/cache.py b/bitbake/lib/bb/cache.py
> index 36e6356..dea2a80 100644
> --- a/bitbake/lib/bb/cache.py
> +++ b/bitbake/lib/bb/cache.py
> @@ -43,7 +43,7 @@ except ImportError:
> logger.info("Importing cPickle failed. "
> "Falling back to a very slow implementation.")
>
> -__cache_version__ = "143"
> +__cache_version__ = "144"
>
> def getCacheFile(path, filename, data_hash):
> return os.path.join(path, filename + "." + data_hash)
> @@ -76,9 +76,13 @@ class RecipeInfoCommon(object):
> for task in tasks)
>
> @classmethod
> - def flaglist(cls, flag, varlist, metadata):
> - return dict((var, metadata.getVarFlag(var, flag, True))
> + def flaglist(cls, flag, varlist, metadata, squash=False):
> + out_dict = dict((var, metadata.getVarFlag(var, flag, True))
> for var in varlist)
> + if squash:
> + return dict((k,v) for (k,v) in out_dict.iteritems() if v)
> + else:
> + return out_dict
>
> @classmethod
> def getvar(cls, var, metadata):
> @@ -128,6 +132,7 @@ class CoreRecipeInfo(RecipeInfoCommon):
> self.stamp = self.getvar('STAMP', metadata)
> self.stamp_base = self.flaglist('stamp-base', self.tasks, metadata)
> self.stamp_extrainfo = self.flaglist('stamp-extra-info', self.tasks, metadata)
> + self.file_checksums = self.flaglist('file-checksums', self.tasks, metadata, True)
> self.packages_dynamic = self.listvar('PACKAGES_DYNAMIC', metadata)
> self.depends = self.depvar('DEPENDS', metadata)
> self.provides = self.depvar('PROVIDES', metadata)
> @@ -154,6 +159,7 @@ class CoreRecipeInfo(RecipeInfoCommon):
> cachedata.stamp = {}
> cachedata.stamp_base = {}
> cachedata.stamp_extrainfo = {}
> + cachedata.file_checksums = {}
> cachedata.fn_provides = {}
> cachedata.pn_provides = defaultdict(list)
> cachedata.all_depends = []
> @@ -185,6 +191,7 @@ class CoreRecipeInfo(RecipeInfoCommon):
> cachedata.stamp[fn] = self.stamp
> cachedata.stamp_base[fn] = self.stamp_base
> cachedata.stamp_extrainfo[fn] = self.stamp_extrainfo
> + cachedata.file_checksums[fn] = self.file_checksums
>
> provides = [self.pn]
> for provide in self.provides:
> diff --git a/bitbake/lib/bb/checksum.py b/bitbake/lib/bb/checksum.py
> new file mode 100644
> index 0000000..514ff0b
> --- /dev/null
> +++ b/bitbake/lib/bb/checksum.py
> @@ -0,0 +1,90 @@
> +# Local file checksum cache implementation
> +#
> +# Copyright (C) 2012 Intel Corporation
> +#
> +# This program is free software; you can redistribute it and/or modify
> +# it under the terms of the GNU General Public License version 2 as
> +# published by the Free Software Foundation.
> +#
> +# This program is distributed in the hope that it will be useful,
> +# but WITHOUT ANY WARRANTY; without even the implied warranty of
> +# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
> +# GNU General Public License for more details.
> +#
> +# You should have received a copy of the GNU General Public License along
> +# with this program; if not, write to the Free Software Foundation, Inc.,
> +# 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA.
> +
> +import os
> +import stat
> +import bb.utils
> +import logging
> +from bb.cache import MultiProcessCache
> +
> +logger = logging.getLogger("BitBake.Cache")
> +
> +try:
> + import cPickle as pickle
> +except ImportError:
> + import pickle
> + logger.info("Importing cPickle failed. "
> + "Falling back to a very slow implementation.")
> +
> +
> +# mtime cache (non-persistent)
> +# based upon the assumption that files do not change during bitbake run
> +class FileMtimeCache(object):
> + cache = {}
> +
> + def cached_mtime(self, f):
> + if f not in self.cache:
> + self.cache[f] = os.stat(f)[stat.ST_MTIME]
> + return self.cache[f]
> +
> + def cached_mtime_noerror(self, f):
> + if f not in self.cache:
> + try:
> + self.cache[f] = os.stat(f)[stat.ST_MTIME]
> + except OSError:
> + return 0
> + return self.cache[f]
> +
> + def update_mtime(self, f):
> + self.cache[f] = os.stat(f)[stat.ST_MTIME]
> + return self.cache[f]
> +
> + def clear(self):
> + self.cache.clear()
> +
> +# Checksum + mtime cache (persistent)
> +class FileChecksumCache(MultiProcessCache):
> + cache_file_name = "local_file_checksum_cache.dat"
> + CACHE_VERSION = 1
> +
> + def __init__(self):
> + self.mtime_cache = FileMtimeCache()
> + MultiProcessCache.__init__(self)
> +
> + def get_checksum(self, f):
> + entry = self.cachedata[0].get(f)
> + cmtime = self.mtime_cache.cached_mtime(f)
> + if entry:
> + (mtime, hashval) = entry
> + if cmtime == mtime:
> + return hashval
> + else:
> + bb.debug(2, "file %s changed mtime, recompute checksum" % f)
> +
> + hashval = bb.utils.md5_file(f)
> + self.cachedata_extras[0][f] = (cmtime, hashval)
> + return hashval
> +
> + def merge_data(self, source, dest):
> + for h in source[0]:
> + if h in dest:
> + (smtime, _) = source[0][h]
> + (dmtime, _) = dest[0][h]
> + if smtime> dmtime:
> + dest[0][h] = source[0][h]
> + else:
> + dest[0][h] = source[0][h]
> diff --git a/bitbake/lib/bb/cooker.py b/bitbake/lib/bb/cooker.py
> index dea0aad..8ad4922 100644
> --- a/bitbake/lib/bb/cooker.py
> +++ b/bitbake/lib/bb/cooker.py
> @@ -1570,6 +1570,7 @@ class CookerParser(object):
> def init():
> Parser.cfg = self.cfgdata
> multiprocessing.util.Finalize(None, bb.codeparser.parser_cache_save, args=(self.cfgdata,), exitpriority=1)
> + multiprocessing.util.Finalize(None, bb.fetch.fetcher_parse_save, args=(self.cfgdata,), exitpriority=1)
>
> self.feeder_quit = multiprocessing.Queue(maxsize=1)
> self.parser_quit = multiprocessing.Queue(maxsize=self.num_processes)
> @@ -1618,6 +1619,7 @@ class CookerParser(object):
> sync.start()
> multiprocessing.util.Finalize(None, sync.join, exitpriority=-100)
> bb.codeparser.parser_cache_savemerge(self.cooker.configuration.data)
> + bb.fetch.fetcher_parse_done(self.cooker.configuration.data)
>
> def load_cached(self):
> for filename, appends in self.fromcache:
> diff --git a/bitbake/lib/bb/fetch2/__init__.py b/bitbake/lib/bb/fetch2/__init__.py
> index 0b976c4..d4b6c3e 100644
> --- a/bitbake/lib/bb/fetch2/__init__.py
> +++ b/bitbake/lib/bb/fetch2/__init__.py
> @@ -8,6 +8,7 @@ BitBake build tools.
> """
>
> # Copyright (C) 2003, 2004 Chris Larson
> +# Copyright (C) 2012 Intel Corporation
> #
> # This program is free software; you can redistribute it and/or modify
> # it under the terms of the GNU General Public License version 2 as
> @@ -30,9 +31,11 @@ import os, re
> import logging
> import urllib
> import bb.persist_data, bb.utils
> +import bb.checksum
> from bb import data
>
> __version__ = "2"
> +_checksum_cache = bb.checksum.FileChecksumCache()
>
> logger = logging.getLogger("BitBake.Fetcher")
>
> @@ -233,10 +236,18 @@ def fetcher_init(d):
> else:
> raise FetchError("Invalid SRCREV cache policy of: %s" % srcrev_policy)
>
> + _checksum_cache.init_cache(d)
> +
> for m in methods:
> if hasattr(m, "init"):
> m.init(d)
>
> +def fetcher_parse_save(d):
> + _checksum_cache.save_extras(d)
> +
> +def fetcher_parse_done(d):
> + _checksum_cache.save_merge(d)
> +
> def fetcher_compare_revisions(d):
> """
> Compare the revisions in the persistant cache with current values and
> @@ -553,6 +564,80 @@ def srcrev_internal_helper(ud, d, name):
>
> return rev
>
> +
> +def get_checksum_file_list(d):
> + """ Get a list of files checksum in SRC_URI
> +
> + Returns the all resolved local path of all local file entries in
> + SRC_URI as a space-separated string
> + """
> + fetch = Fetch([], d)
> +
> + dl_dir = d.getVar('DL_DIR', True)
> + filelist = []
> + for u in fetch.urls:
> + ud = fetch.ud[u]
> +
> + if isinstance(ud.method, local.Local):
> + ud.setup_localpath(d)
> + f = ud.localpath
> + if f.startswith(dl_dir):
> + # The local fetcher's behaviour is to return a path under DL_DIR if it couldn't find the file anywhere else
> + if os.path.exists(f):
> + bb.warn("Getting checksum for %s SRC_URI entry %s: file not found except in DL_DIR" % (d.getVar('PN', True), os.path.basename(f)))
> + else:
> + bb.warn("Unable to get checksum for %s SRC_URI entry %s: file could not be found" % (d.getVar('PN', True), os.path.basename(f)))
> + continue
> + filelist.append(f)
> +
> + return " ".join(filelist)
> +
> +
> +def get_file_checksums(filelist, pn):
> + """Get a list of the checksums for a list of local files
> +
> + Returns the checksums for a list of local files, caching the results as
> + it proceeds
> +
> + """
> +
> + def checksum_file(f):
> + try:
> + checksum = _checksum_cache.get_checksum(f)
> + except OSError as e:
> + import traceback
> + bb.warn("Unable to get checksum for %s SRC_URI entry %s: %s" % (pn, os.path.basename(f), e))
> + return None
> + return checksum
> +
> + checksums = []
> + for pth in filelist.split():
> + checksum = None
> + if '*' in pth:
> + # Handle globs
> + import glob
> + for f in glob.glob(pth):
> + checksum = checksum_file(f)
> + if checksum:
> + checksums.append((f, checksum))
> + elif os.path.isdir(pth):
> + # Handle directories
> + for root, dirs, files in os.walk(pth):
> + for name in files:
> + fullpth = os.path.join(root, name)
> + checksum = checksum_file(fullpth)
> + if checksum:
> + checksums.append((fullpth, checksum))
> + else:
> + checksum = checksum_file(pth)
> +
> + if checksum:
> + checksums.append((pth, checksum))
> +
> + checksums.sort()
> + return checksums
> +
> +
> class FetchData(object):
> """
> A class which represents the fetcher state for a given URI.
> diff --git a/bitbake/lib/bb/siggen.py b/bitbake/lib/bb/siggen.py
> index 5a0b80e..daf5677 100644
> --- a/bitbake/lib/bb/siggen.py
> +++ b/bitbake/lib/bb/siggen.py
> @@ -60,6 +60,7 @@ class SignatureGeneratorBasic(SignatureGenerator):
> self.taskhash = {}
> self.taskdeps = {}
> self.runtaskdeps = {}
> + self.file_checksum_values = {}
> self.gendeps = {}
> self.lookupcache = {}
> self.pkgnameextract = re.compile("(?P<fn>.*)\..*")
> @@ -152,6 +153,7 @@ class SignatureGeneratorBasic(SignatureGenerator):
> k = fn + "." + task
> data = dataCache.basetaskhash[k]
> self.runtaskdeps[k] = []
> + self.file_checksum_values[k] = {}
> recipename = dataCache.pkg_fn[fn]
> for dep in sorted(deps, key=clean_basepath):
> depname = dataCache.pkg_fn[self.pkgnameextract.search(dep).group('fn')]
> @@ -161,6 +163,12 @@ class SignatureGeneratorBasic(SignatureGenerator):
> bb.fatal("%s is not in taskhash, caller isn't calling in dependency order?", dep)
> data = data + self.taskhash[dep]
> self.runtaskdeps[k].append(dep)
> +
> + if task in dataCache.file_checksums[fn]:
> + checksums = bb.fetch2.get_file_checksums(dataCache.file_checksums[fn][task], recipename)
> + for (f,cs) in checksums:
> + self.file_checksum_values[k][f] = cs
> + data = data + cs
> h = hashlib.md5(data).hexdigest()
> self.taskhash[k] = h
> #d.setVar("BB_TASKHASH_task-%s" % task, taskhash[task])
> @@ -197,6 +205,7 @@ class SignatureGeneratorBasic(SignatureGenerator):
>
> if runtime and k in self.taskhash:
> data['runtaskdeps'] = self.runtaskdeps[k]
> + data['file_checksum_values'] = self.file_checksum_values[k]
> data['runtaskhashes'] = {}
> for dep in data['runtaskdeps']:
> data['runtaskhashes'][dep] = self.taskhash[dep]
> @@ -304,6 +313,18 @@ def compare_sigfiles(a, b):
> for dep in changed:
> print "Variable %s value changed from %s to %s" % (dep, a_data['varvals'][dep], b_data['varvals'][dep])
>
> + changed, added, removed = dict_diff(a_data['file_checksum_values'], b_data['file_checksum_values'])
> + if changed:
> + for f in changed:
> + print "Checksum for file %s changed from %s to %s" % (f, a_data['file_checksum_values'][f], b_data['file_checksum_values'][f])
> + if added:
> + for f in added:
> + print "Dependency on checksum of file %s was added" % (f)
> + if removed:
> + for f in removed:
> + print "Dependency on checksum of file %s was removed" % (f)
> +
> +
> if 'runtaskhashes' in a_data and 'runtaskhashes' in b_data:
> a = clean_basepaths(a_data['runtaskhashes'])
> b = clean_basepaths(b_data['runtaskhashes'])
> @@ -353,6 +374,9 @@ def dump_sigfile(a):
> if 'runtaskdeps' in a_data:
> print "Tasks this task depends on: %s" % (a_data['runtaskdeps'])
>
> + if 'file_checksum_values' in a_data:
> + print "This task depends on the checksums of files: %s" % (a_data['file_checksum_values'])
> +
> if 'runtaskhashes' in a_data:
> for dep in a_data['runtaskhashes']:
> print "Hash for dependent task %s is %s" % (dep, a_data['runtaskhashes'][dep])
More information about the bitbake-devel
mailing list