[bitbake-devel] [PATCH] wget.py: parse only <a> tags
Alexander Kanavin
alexander.kanavin at linux.intel.com
Fri Dec 4 11:00:20 UTC 2015
For two reasons:
1) The important one: we hit the following bug when doing upstream version checks
on some webpages:
https://bugs.launchpad.net/beautifulsoup/+bug/1471755
2) Also, documentation for beautifulsoup states that memory usage and
speed is improved that way.
Signed-off-by: Alexander Kanavin <alexander.kanavin at linux.intel.com>
---
bitbake/lib/bb/fetch2/wget.py | 5 +++--
1 file changed, 3 insertions(+), 2 deletions(-)
diff --git a/bitbake/lib/bb/fetch2/wget.py b/bitbake/lib/bb/fetch2/wget.py
index bd2a897..c185f5b 100644
--- a/bitbake/lib/bb/fetch2/wget.py
+++ b/bitbake/lib/bb/fetch2/wget.py
@@ -38,6 +38,7 @@ from bb.fetch2 import FetchError
from bb.fetch2 import logger
from bb.fetch2 import runfetchcmd
from bs4 import BeautifulSoup
+from bs4 import SoupStrainer
class Wget(FetchMethod):
"""Class to fetch urls via 'wget'"""
@@ -367,7 +368,7 @@ class Wget(FetchMethod):
version = ['', '', '']
bb.debug(3, "VersionURL: %s" % (url))
- soup = BeautifulSoup(self._fetch_index(url, ud, d))
+ soup = BeautifulSoup(self._fetch_index(url, ud, d), "html.parser", parse_only=SoupStrainer("a"))
if not soup:
bb.debug(3, "*** %s NO SOUP" % (url))
return ""
@@ -417,7 +418,7 @@ class Wget(FetchMethod):
ud.path.split(dirver)[0], ud.user, ud.pswd, {}])
bb.debug(3, "DirURL: %s, %s" % (dirs_uri, package))
- soup = BeautifulSoup(self._fetch_index(dirs_uri, ud, d))
+ soup = BeautifulSoup(self._fetch_index(dirs_uri, ud, d), "html.parser", parse_only=SoupStrainer("a"))
if not soup:
return version[1]
--
2.6.2
More information about the bitbake-devel
mailing list