[oe-commits] [bitbake] 02/02: siggen: Fix hashequiv performance issues

git at git.openembedded.org git at git.openembedded.org
Thu Dec 12 13:16:11 UTC 2019


This is an automated email from the git hooks/post-receive script.

rpurdie pushed a commit to branch master
in repository bitbake.

commit c4b8440f730c33eaf9f818b856ae81b2f1017fec
Author: Richard Purdie <richard.purdie at linuxfoundation.org>
AuthorDate: Wed Dec 11 23:11:50 2019 +0000

    siggen: Fix hashequiv performance issues
    
    We're seeing huge slowdowns on large builds on the autobuilder. A qemux86 world
    build was running really slowly, a primary feature was lots of rehashing going on
    due to an sstate change which caused a rebuild when all output should be identical.
    
    This was traced to the hundreds of thousands of calls to get_unihash() from
    get_taskash(). If we simplify the unitaskhashes data structure, we can bypass
    the function call and access the data directly. In local profile charts,
    this significanly sped up process_possible_migrations(), both on profiles
    and visually.
    
    Whilst this change doesn't aid readability, it does solve an otherwise
    huge performance issue.
    
    Signed-off-by: Richard Purdie <richard.purdie at linuxfoundation.org>
---
 lib/bb/siggen.py | 27 +++++++++++++--------------
 1 file changed, 13 insertions(+), 14 deletions(-)

diff --git a/lib/bb/siggen.py b/lib/bb/siggen.py
index dbf5102..517ec7b 100644
--- a/lib/bb/siggen.py
+++ b/lib/bb/siggen.py
@@ -121,7 +121,7 @@ class SignatureGeneratorBasic(SignatureGenerator):
         else:
             self.checksum_cache = None
 
-        self.unihash_cache = bb.cache.SimpleCache("1")
+        self.unihash_cache = bb.cache.SimpleCache("2")
         self.unitaskhashes = self.unihash_cache.init_cache(data, "bb_unihashes.dat", {})
 
     def init_rundepcheck(self, data):
@@ -216,7 +216,13 @@ class SignatureGeneratorBasic(SignatureGenerator):
                 continue
             if dep not in self.taskhash:
                 bb.fatal("%s is not in taskhash, caller isn't calling in dependency order?" % dep)
-            data = data + self.get_unihash(dep)
+            # We bypass the fuction and go straight to the cache here
+            # as this was a performance bottleneck otherwise
+            if self.taskhash[dep] in self.unitaskhashes:
+                unihash = self.unitaskhashes[self.taskhash[dep]]
+            else:
+                unihash = self.get_unihash(dep)
+            data = data + unihash
             self.runtaskdeps[tid].append(dep)
 
         if task in dataCache.file_checksums[fn]:
@@ -387,24 +393,19 @@ class SignatureGeneratorUniHashMixIn(object):
             self._client = hashserv.create_client(self.server)
         return self._client
 
-    def __get_task_unihash_key(self, tid):
-        # TODO: The key only *needs* to be the taskhash, the tid is just
-        # convenient
-        return '%s:%s' % (tid.rsplit("/", 1)[1], self.taskhash[tid])
-
     def get_stampfile_hash(self, tid):
         if tid in self.taskhash:
             # If a unique hash is reported, use it as the stampfile hash. This
             # ensures that if a task won't be re-run if the taskhash changes,
             # but it would result in the same output hash
-            unihash = self.unitaskhashes.get(self.__get_task_unihash_key(tid), None)
+            unihash = self.unitaskhashes.get(self.taskhash[tid], None)
             if unihash is not None:
                 return unihash
 
         return super().get_stampfile_hash(tid)
 
     def set_unihash(self, tid, unihash):
-        self.unitaskhashes[self.__get_task_unihash_key(tid)] = unihash
+        self.unitaskhashes[self.taskhash[tid]] = unihash
 
     def get_unihash(self, tid):
         taskhash = self.taskhash[tid]
@@ -413,11 +414,9 @@ class SignatureGeneratorUniHashMixIn(object):
         if self.setscenetasks and tid not in self.setscenetasks:
             return taskhash
 
-        key = self.__get_task_unihash_key(tid)
-
         # TODO: This cache can grow unbounded. It probably only needs to keep
         # for each task
-        unihash = self.unitaskhashes.get(key, None)
+        unihash = self.unitaskhashes.get(taskhash, None)
         if unihash is not None:
             return unihash
 
@@ -449,7 +448,7 @@ class SignatureGeneratorUniHashMixIn(object):
         except hashserv.client.HashConnectionError as e:
             bb.warn('Error contacting Hash Equivalence Server %s: %s' % (self.server, str(e)))
 
-        self.unitaskhashes[key] = unihash
+        self.unitaskhashes[taskhash] = unihash
         return unihash
 
     def report_unihash(self, path, task, d):
@@ -467,7 +466,7 @@ class SignatureGeneratorUniHashMixIn(object):
             return
 
         # Sanity checks
-        cache_unihash = self.unitaskhashes.get(key, None)
+        cache_unihash = self.unitaskhashes.get(taskhash, None)
         if cache_unihash is None:
             bb.fatal('%s not in unihash cache. Please report this error' % key)
 

-- 
To stop receiving notification emails like this one, please contact
the administrator of this repository.


More information about the Openembedded-commits mailing list