[OE-core] [PATCH] oe/distro_check.py: Fixes for python3

Aníbal Limón anibal.limon at linux.intel.com
Thu Jun 9 20:15:22 UTC 2016


create_socket: Use urllib because urllib2 is now urllib in python3
and proxies as argument are deprecated so export them in the environ
instead.

get_links_from_url: Change usage of sgmllib for parsing HTML because
is deprecated in python 3, use instead bs4 that is already imported
in the bitbake tree.

[YOCTO #9744]

Signed-off-by: Aníbal Limón <anibal.limon at linux.intel.com>
---
 meta/lib/oe/distro_check.py | 82 +++++++++++++++++++--------------------------
 1 file changed, 34 insertions(+), 48 deletions(-)

diff --git a/meta/lib/oe/distro_check.py b/meta/lib/oe/distro_check.py
index f1f1fbb..1d5f1c9 100644
--- a/meta/lib/oe/distro_check.py
+++ b/meta/lib/oe/distro_check.py
@@ -1,53 +1,35 @@
 from contextlib import contextmanager
- at contextmanager
+
+from bb.utils import export_proxies
+
 def create_socket(url, d):
-    import urllib.request, urllib.parse, urllib.error
-    socket = urllib.request.urlopen(url, proxies=get_proxies(d))
+    import urllib
+
+    socket = None
     try:
-        yield socket
-    finally:
-        socket.close()
+        export_proxies(d)
+        socket = urllib.request.urlopen(url)
+    except:
+        bb.warn("distro_check: create_socket url %s can't access" % url)
 
-def get_proxies(d):
-    proxies = {}
-    for key in ['http', 'https', 'ftp', 'ftps', 'no', 'all']:
-        proxy = d.getVar(key + '_proxy', True)
-        if proxy:
-            proxies[key] = proxy
-    return proxies
+    return socket
 
 def get_links_from_url(url, d):
     "Return all the href links found on the web location"
 
-    import sgmllib
-    
-    class LinksParser(sgmllib.SGMLParser):
-        def parse(self, s):
-            "Parse the given string 's'."
-            self.feed(s)
-            self.close()
-    
-        def __init__(self, verbose=0):
-            "Initialise an object passing 'verbose' to the superclass."
-            sgmllib.SGMLParser.__init__(self, verbose)
-            self.hyperlinks = []
-    
-        def start_a(self, attributes):
-            "Process a hyperlink and its 'attributes'."
-            for name, value in attributes:
-                if name == "href":
-                    self.hyperlinks.append(value.strip('/'))
-    
-        def get_hyperlinks(self):
-            "Return the list of hyperlinks."
-            return self.hyperlinks
+    from bs4 import BeautifulSoup, SoupStrainer
 
-    with create_socket(url,d) as sock:
+    hyperlinks = []
+
+    webpage = ''
+    sock = create_socket(url,d)
+    if sock:
         webpage = sock.read()
 
-    linksparser = LinksParser()
-    linksparser.parse(webpage)
-    return linksparser.get_hyperlinks()
+    soup = BeautifulSoup(webpage, "html.parser", parse_only=SoupStrainer("a"))
+    for line in soup.find_all('a', href=True):
+        hyperlinks.append(line['href'].strip('/'))
+    return hyperlinks
 
 def find_latest_numeric_release(url, d):
     "Find the latest listed numeric release on the given url"
@@ -162,14 +144,18 @@ def find_latest_debian_release(url, d):
 
 def get_debian_style_source_package_list(url, section, d):
     "Return the list of package-names stored in the debian style Sources.gz file"
-    with create_socket(url,d) as sock:
-        webpage = sock.read()
-        import tempfile
-        tmpfile = tempfile.NamedTemporaryFile(mode='wb', prefix='oecore.', suffix='.tmp', delete=False)
-        tmpfilename=tmpfile.name
-        tmpfile.write(sock.read())
-        tmpfile.close()
+    import tempfile
     import gzip
+
+    webpage = ''
+    sock = create_socket(url,d)
+    if sock:
+        webpage = sock.read()
+
+    tmpfile = tempfile.NamedTemporaryFile(mode='w', prefix='oecore.', suffix='.tmp', delete=False)
+    tmpfilename=tmpfile.name
+    tmpfile.write(sock.read())
+    tmpfile.close()
     bb.note("Reading %s: %s" % (url, section))
 
     f = gzip.open(tmpfilename)
@@ -266,9 +252,9 @@ def update_distro_data(distro_check_dir, datetime, d):
     import fcntl
     try:
         if not os.path.exists(datetime_file):
-            open(datetime_file, 'w+b').close() # touch the file so that the next open won't fail
+            open(datetime_file, 'w+').close() # touch the file so that the next open won't fail
 
-        f = open(datetime_file, "r+b")
+        f = open(datetime_file, "r+")
         fcntl.lockf(f, fcntl.LOCK_EX)
         saved_datetime = f.read()
         if saved_datetime[0:8] != datetime[0:8]:
-- 
2.1.4




More information about the Openembedded-core mailing list