[bitbake-devel] [PATCH 1/9] cache: Use configuration's hash value to validate cache

Dongxiao Xu dongxiao.xu at intel.com
Fri Jan 13 08:30:35 UTC 2012


Previously we use the file time stamp to judge if a cache is valid.
Here this commit introduce a new method, which calculates the total
hash value for a certain configuration's key/value paris, and tag
it into cache filename, for example, bb_cache.dat.xxxyyyzzz.

This mechanism also ensures the cache's correctness if user
dynamically setting variables from some frontend GUI, like HOB.

Signed-off-by: Dongxiao Xu <dongxiao.xu at intel.com>
---
 lib/bb/cache.py      |   32 ++++++++++++--------------------
 lib/bb/cooker.py     |    4 +++-
 lib/bb/data_smart.py |   21 +++++++++++++++++++++
 3 files changed, 36 insertions(+), 21 deletions(-)

diff --git a/lib/bb/cache.py b/lib/bb/cache.py
index 6b7fa6f..955b6df 100644
--- a/lib/bb/cache.py
+++ b/lib/bb/cache.py
@@ -42,10 +42,10 @@ except ImportError:
     logger.info("Importing cPickle failed. "
                 "Falling back to a very slow implementation.")
 
-__cache_version__ = "142"
+__cache_version__ = "143"
 
-def getCacheFile(path, filename):
-    return os.path.join(path, filename)
+def getCacheFile(path, filename, data_hash):
+    return os.path.join(path, filename + "." + data_hash)
 
 # RecipeInfoCommon defines common data retrieving methods
 # from meta data for caches. CoreRecipeInfo as well as other
@@ -254,7 +254,7 @@ class Cache(object):
     BitBake Cache implementation
     """
 
-    def __init__(self, data, caches_array):
+    def __init__(self, data, data_hash, caches_array):
         # Pass caches_array information into Cache Constructor
         # It will be used in later for deciding whether we 
         # need extra cache file dump/load support 
@@ -266,6 +266,7 @@ class Cache(object):
         self.data = None
         self.data_fn = None
         self.cacheclean = True
+        self.data_hash = data_hash
 
         if self.cachedir in [None, '']:
             self.has_cache = False
@@ -274,26 +275,17 @@ class Cache(object):
             return
 
         self.has_cache = True
-        self.cachefile = getCacheFile(self.cachedir, "bb_cache.dat")
+        self.cachefile = getCacheFile(self.cachedir, "bb_cache.dat", self.data_hash)
 
         logger.debug(1, "Using cache in '%s'", self.cachedir)
         bb.utils.mkdirhier(self.cachedir)
 
-        # If any of configuration.data's dependencies are newer than the
-        # cache there isn't even any point in loading it...
-        newest_mtime = 0
-        deps = data.getVar("__base_depends")
-
-        old_mtimes = [old_mtime for _, old_mtime in deps]
-        old_mtimes.append(newest_mtime)
-        newest_mtime = max(old_mtimes)
-
         cache_ok = True
         if self.caches_array:
             for cache_class in self.caches_array:
                 if type(cache_class) is type and issubclass(cache_class, RecipeInfoCommon):
-                    cachefile = getCacheFile(self.cachedir, cache_class.cachefile)
-                    cache_ok = cache_ok and (bb.parse.cached_mtime_noerror(cachefile) >= newest_mtime)
+                    cachefile = getCacheFile(self.cachedir, cache_class.cachefile, self.data_hash)
+                    cache_ok = cache_ok and os.path.exists(cachefile)
                     cache_class.init_cacheData(self)
         if cache_ok:
             self.load_cachefile()
@@ -327,7 +319,7 @@ class Cache(object):
         # Calculate the correct cachesize of all those cache files
         for cache_class in self.caches_array:
             if type(cache_class) is type and issubclass(cache_class, RecipeInfoCommon):
-                cachefile = getCacheFile(self.cachedir, cache_class.cachefile)
+                cachefile = getCacheFile(self.cachedir, cache_class.cachefile, self.data_hash)
                 with open(cachefile, "rb") as cachefile:
                     cachesize += os.fstat(cachefile.fileno()).st_size
 
@@ -335,7 +327,7 @@ class Cache(object):
         
         for cache_class in self.caches_array:
             if type(cache_class) is type and issubclass(cache_class, RecipeInfoCommon):
-                cachefile = getCacheFile(self.cachedir, cache_class.cachefile)
+                cachefile = getCacheFile(self.cachedir, cache_class.cachefile, self.data_hash)
                 with open(cachefile, "rb") as cachefile:
                     pickled = pickle.Unpickler(cachefile)                    
                     while cachefile:
@@ -588,7 +580,7 @@ class Cache(object):
         for cache_class in self.caches_array:
             if type(cache_class) is type and issubclass(cache_class, RecipeInfoCommon):
                 cache_class_name = cache_class.__name__
-                cachefile = getCacheFile(self.cachedir, cache_class.cachefile)
+                cachefile = getCacheFile(self.cachedir, cache_class.cachefile, self.data_hash)
                 file_dict[cache_class_name] = open(cachefile, "wb")
                 pickler_dict[cache_class_name] =  pickle.Pickler(file_dict[cache_class_name], pickle.HIGHEST_PROTOCOL)
                    
@@ -693,7 +685,7 @@ def init(cooker):
     Files causing parsing errors are evicted from the cache.
 
     """
-    return Cache(cooker.configuration.data)
+    return Cache(cooker.configuration.data, cooker.configuration.data_hash)
 
 
 class CacheData(object):
diff --git a/lib/bb/cooker.py b/lib/bb/cooker.py
index 194046e..403aa88 100644
--- a/lib/bb/cooker.py
+++ b/lib/bb/cooker.py
@@ -858,6 +858,7 @@ class BBCooker:
         bb.parse.init_parser(data)
         bb.event.fire(bb.event.ConfigParsed(), data)
         self.configuration.data = data
+        self.configuration.data_hash = data.get_hash()
 
     def handleCollections( self, collections ):
         """Handle collections"""
@@ -1431,6 +1432,7 @@ class CookerParser(object):
         self.filelist = filelist
         self.cooker = cooker
         self.cfgdata = cooker.configuration.data
+        self.cfghash = cooker.configuration.data_hash
 
         # Accounting statistics
         self.parsed = 0
@@ -1446,7 +1448,7 @@ class CookerParser(object):
         self.num_processes = int(self.cfgdata.getVar("BB_NUMBER_PARSE_THREADS", True) or
                                  multiprocessing.cpu_count())
 
-        self.bb_cache = bb.cache.Cache(self.cfgdata, cooker.caches_array)
+        self.bb_cache = bb.cache.Cache(self.cfgdata, self.cfghash, cooker.caches_array)
         self.fromcache = []
         self.willparse = []
         for filename in self.filelist:
diff --git a/lib/bb/data_smart.py b/lib/bb/data_smart.py
index ea13478..9864034 100644
--- a/lib/bb/data_smart.py
+++ b/lib/bb/data_smart.py
@@ -31,6 +31,7 @@ BitBake build tools.
 import copy, re
 from collections import MutableMapping
 import logging
+import hashlib
 import bb, bb.codeparser
 from bb   import utils
 from bb.COW  import COWDictBase
@@ -459,3 +460,23 @@ class DataSmart(MutableMapping):
 
     def __delitem__(self, var):
         self.delVar(var)
+
+    def get_hash(self):
+        data = ""
+        keys = iter(self)
+        for key in keys:
+            if key == "TIME":
+                continue
+            if key == "__depends":
+                deps = list(self.getVar(key, False))
+                deps.sort()
+                value = [deps[i][0] for i in range(len(deps))]
+            elif key == "PATH":
+                path = list(set(self.getVar(key, False).split(':')))
+                path.sort()
+                value = " ".join(path)
+            else:
+                value = self.getVar(key, False) or ""
+            data = data + key + ': ' + str(value) + '\n'
+
+        return hashlib.md5(data).hexdigest()
-- 
1.7.0.4





More information about the bitbake-devel mailing list