[bitbake-devel] [PATCH 7/9] utils: also use mmap for SHA256 and SHA1, for performance

Armin Kuster akuster808 at gmail.com
Sun Nov 24 23:43:20 UTC 2019


From: Ross Burton <ross.burton at intel.com>

md5_file() uses a mmap() window to improve performance when hashing files, so
refactor the code and do the same for SHA1 and SHA256.

Signed-off-by: Ross Burton <ross.burton at intel.com>
Signed-off-by: Richard Purdie <richard.purdie at linuxfoundation.org>
Signed-off-by: Armin Kuster <akuster808 at gmail.com>
---
 lib/bb/utils.py | 34 ++++++++++++++--------------------
 1 file changed, 14 insertions(+), 20 deletions(-)

diff --git a/lib/bb/utils.py b/lib/bb/utils.py
index d035949..8d40bcd 100644
--- a/lib/bb/utils.py
+++ b/lib/bb/utils.py
@@ -520,22 +520,26 @@ def unlockfile(lf):
     fcntl.flock(lf.fileno(), fcntl.LOCK_UN)
     lf.close()
 
-def md5_file(filename):
-    """
-    Return the hex string representation of the MD5 checksum of filename.
-    """
-    import hashlib, mmap
+def _hasher(method, filename):
+    import mmap
 
     with open(filename, "rb") as f:
-        m = hashlib.md5()
         try:
             with mmap.mmap(f.fileno(), 0, access=mmap.ACCESS_READ) as mm:
                 for chunk in iter(lambda: mm.read(8192), b''):
-                    m.update(chunk)
+                    method.update(chunk)
         except ValueError:
             # You can't mmap() an empty file so silence this exception
             pass
-    return m.hexdigest()
+    return method.hexdigest()
+
+
+def md5_file(filename):
+    """
+    Return the hex string representation of the MD5 checksum of filename.
+    """
+    import hashlib
+    return _hasher(hashlib.md5(), filename)
 
 def sha256_file(filename):
     """
@@ -543,24 +547,14 @@ def sha256_file(filename):
     filename.
     """
     import hashlib
-
-    s = hashlib.sha256()
-    with open(filename, "rb") as f:
-        for line in f:
-            s.update(line)
-    return s.hexdigest()
+    return _hasher(hashlib.sha256(), filename)
 
 def sha1_file(filename):
     """
     Return the hex string representation of the SHA1 checksum of the filename
     """
     import hashlib
-
-    s = hashlib.sha1()
-    with open(filename, "rb") as f:
-        for line in f:
-            s.update(line)
-    return s.hexdigest()
+    return _hasher(hashlib.sha1(), filename)
 
 def preserved_envvars_exported():
     """Variables which are taken from the environment and placed in and exported
-- 
2.7.4



More information about the bitbake-devel mailing list