[OE-core] [PATCH] insane: improve license checksumming logic

Ross Burton ross.burton at intel.com
Thu Mar 21 12:30:42 UTC 2019


Instead of opening files as bytes and battling decoding to UTF-8 which can throw
exceptions, open directly as strings and replace invalid codepoints.  This
handles licenses in encodings which are not UTF-8 but are based on ASCII much
better.

Also instead of extracting the license lines, writing them to a file, and then
hashing the file, hash the lines directly.

Signed-off-by: Ross Burton <ross.burton at intel.com>
---
 meta/classes/insane.bbclass | 81 ++++++++++++++++++---------------------------
 1 file changed, 32 insertions(+), 49 deletions(-)

diff --git a/meta/classes/insane.bbclass b/meta/classes/insane.bbclass
index a2adfee835e..37b8bb0032d 100644
--- a/meta/classes/insane.bbclass
+++ b/meta/classes/insane.bbclass
@@ -458,7 +458,6 @@ python populate_lic_qa_checksum() {
     """
     Check for changes in the license files.
     """
-    import tempfile
     sane = True
 
     lic_files = d.getVar('LIC_FILES_CHKSUM') or ''
@@ -496,61 +495,45 @@ python populate_lic_qa_checksum() {
 
         if (not beginline) and (not endline):
             md5chksum = bb.utils.md5_file(srclicfile)
-            with open(srclicfile, 'rb') as f:
-                license = f.read()
+            with open(srclicfile, 'r', errors='replace') as f:
+                license = f.read().splitlines()
         else:
-            fi = open(srclicfile, 'rb')
-            fo = tempfile.NamedTemporaryFile(mode='wb', prefix='poky.', suffix='.tmp', delete=False)
-            tmplicfile = fo.name;
-            lineno = 0
-            linesout = 0
-            license = []
-            for line in fi:
-                lineno += 1
-                if (lineno >= beginline):
-                    if ((lineno <= endline) or not endline):
-                        fo.write(line)
-                        license.append(line)
-                        linesout += 1
-                    else:
-                        break
-            fo.flush()
-            fo.close()
-            fi.close()
-            md5chksum = bb.utils.md5_file(tmplicfile)
-            license = b''.join(license)
-            os.unlink(tmplicfile)
-
+            with open(srclicfile, 'rb') as f:
+                import hashlib
+                lineno = 0
+                license = []
+                m = hashlib.md5()
+                for line in f:
+                    lineno += 1
+                    if (lineno >= beginline):
+                        if ((lineno <= endline) or not endline):
+                            m.update(line)
+                            license.append(line.decode('utf-8', errors='replace').rstrip())
+                        else:
+                            break
+                md5chksum = m.hexdigest()
         if recipemd5 == md5chksum:
             bb.note (pn + ": md5 checksum matched for ", url)
         else:
             if recipemd5:
                 msg = pn + ": The LIC_FILES_CHKSUM does not match for " + url
                 msg = msg + "\n" + pn + ": The new md5 checksum is " + md5chksum
-                try:
-                    license_lines = license.decode('utf-8').split('\n')
-                except:
-                    # License text might not be valid UTF-8, in which
-                    # case we don't know how to include it in our output
-                    # and have to skip it.
-                    pass
-                else:
-                    max_lines = int(d.getVar('QA_MAX_LICENSE_LINES') or 20)
-                    if not license_lines or license_lines[-1] != '':
-                        # Ensure that our license text ends with a line break
-                        # (will be added with join() below).
-                        license_lines.append('')
-                    remove = len(license_lines) - max_lines
-                    if remove > 0:
-                        start = max_lines // 2
-                        end = start + remove - 1
-                        del license_lines[start:end]
-                        license_lines.insert(start, '...')
-                    msg = msg + "\n" + pn + ": Here is the selected license text:" + \
-                          "\n" + \
-                          "{:v^70}".format(" beginline=%d " % beginline if beginline else "") + \
-                          "\n" + "\n".join(license_lines) + \
-                          "{:^^70}".format(" endline=%d " % endline if endline else "")
+                max_lines = int(d.getVar('QA_MAX_LICENSE_LINES') or 20)
+                if not license or license[-1] != '':
+                    # Ensure that our license text ends with a line break
+                    # (will be added with join() below).
+                    license.append('')
+                remove = len(license) - max_lines
+                if remove > 0:
+                    start = max_lines // 2
+                    end = start + remove - 1
+                    del license[start:end]
+                    license.insert(start, '...')
+                msg = msg + "\n" + pn + ": Here is the selected license text:" + \
+                        "\n" + \
+                        "{:v^70}".format(" beginline=%d " % beginline if beginline else "") + \
+                        "\n" + "\n".join(license) + \
+                        "{:^^70}".format(" endline=%d " % endline if endline else "")
                 if beginline:
                     if endline:
                         srcfiledesc = "%s (lines %d through to %d)" % (srclicfile, beginline, endline)
-- 
2.11.0



More information about the Openembedded-core mailing list