[oe-commits] [openembedded-core] 03/10: gen-lockedsig-cache: Replace glob lookup with hash to filename lookup

git at git.openembedded.org git at git.openembedded.org
Wed Oct 2 13:42:43 UTC 2019


This is an automated email from the git hooks/post-receive script.

rpurdie pushed a commit to branch master-next
in repository openembedded-core.

commit ad36335b8592e0387dd36066920cd5ffefd375f8
Author: Konrad Scherer <Konrad.Scherer at windriver.com>
AuthorDate: Fri Sep 27 14:56:42 2019 -0400

    gen-lockedsig-cache: Replace glob lookup with hash to filename lookup
    
    Using the glob function to map signatures to sstate files is very slow
    when the sstate is large and accessed over nfs. The lookup now only
    loads the necessary prefixes and doesn't use glob as all.
    
    Unfortunately I don't have access to the systems where the performance
    isse was noticed and on my test system the glob is fast enough that
    the performance numbers aren't useful. I could verify that file list
    returned by the new code is the same.
    
    [YOCTO #13539]
    
    Signed-off-by: Konrad Scherer <Konrad.Scherer at windriver.com>
    Signed-off-by: Richard Purdie <richard.purdie at linuxfoundation.org>
---
 scripts/gen-lockedsig-cache | 47 ++++++++++++++++++++++++++++++++++++++++-----
 1 file changed, 42 insertions(+), 5 deletions(-)

diff --git a/scripts/gen-lockedsig-cache b/scripts/gen-lockedsig-cache
index e3076e1..48cb671 100755
--- a/scripts/gen-lockedsig-cache
+++ b/scripts/gen-lockedsig-cache
@@ -5,9 +5,9 @@
 
 import os
 import sys
-import glob
 import shutil
 import errno
+import time
 
 def mkdir(d):
     try:
@@ -16,6 +16,36 @@ def mkdir(d):
         if e.errno != errno.EEXIST:
             raise e
 
+# extract the hash from past the last colon to last underscore
+def extract_sha(filename):
+    return filename.split(':')[7].split('_')[0]
+
+# get all files in a directory, extract hash and make
+# a map from hash to list of file with that hash
+def map_sha_to_files(dir_, prefix, sha_map):
+    sstate_prefix_path = dir_ + '/' + prefix + '/'
+    sstate_files = os.listdir(sstate_prefix_path)
+    for f in sstate_files:
+        try:
+            sha = extract_sha(f)
+            if sha not in sha_map:
+                sha_map[sha] = []
+            sha_map[sha].append(sstate_prefix_path + f)
+        except IndexError:
+            continue
+
+# given a prefix build a map of hash to list of files
+def build_sha_cache(prefix):
+    sha_map = {}
+
+    sstate_dir = sys.argv[2]
+    map_sha_to_files(sstate_dir, prefix, sha_map)
+
+    native_sstate_dir = sys.argv[2] + '/' + sys.argv[4]
+    map_sha_to_files(native_sstate_dir, prefix, sha_map)
+
+    return sha_map
+
 if len(sys.argv) < 5:
     print("Incorrect number of arguments specified")
     print("syntax: gen-lockedsig-cache <locked-sigs.inc> <input-cachedir> <output-cachedir> <nativelsbstring> [filterfile]")
@@ -41,12 +71,19 @@ with open(sys.argv[1]) as f:
                 sigs.append(sig)
 
 print('Gathering file list')
+start_time = time.perf_counter()
 files = set()
+sstate_content_cache = {}
 for s in sigs:
-    p = sys.argv[2] + "/" + s[:2] + "/*" + s + "*"
-    files |= set(glob.glob(p))
-    p = sys.argv[2] + "/%s/" % sys.argv[4] + s[:2] + "/*" + s + "*"
-    files |= set(glob.glob(p))
+    prefix = s[:2]
+    if prefix not in sstate_content_cache:
+        sstate_content_cache[prefix] = build_sha_cache(prefix)
+
+    for f in sstate_content_cache[prefix][s]:
+        files.add(f)
+
+elapsed = time.perf_counter() - start_time
+print("Gathering file list took %.1fs" % elapsed)
 
 print('Processing files')
 for f in files:

-- 
To stop receiving notification emails like this one, please contact
the administrator of this repository.


More information about the Openembedded-commits mailing list