[bitbake-devel] [PATCH 3/3] fetch2/wget: fallback to GET if HEAD is rejected in checkstatus()

Armin Kuster akuster808 at gmail.com
Fri Sep 16 23:36:44 UTC 2016


From: Ross Burton <ross.burton at intel.com>

The core change here is to fall back to GET requests if HEAD is rejected in the
checkstatus() method, as you can't do a HEAD on Amazon S3 (used by Github
archives).  This meant removing the monkey patch that the default method was GET
and adding a fixed redirect handler that doesn't reset to GET.

Also, change the way the opener is constructed from an if/elif cluster to a
conditionally constructed list.

Signed-off-by: Ross Burton <ross.burton at intel.com>
Signed-off-by: Richard Purdie <richard.purdie at linuxfoundation.org>
(cherry picked from commit 6ec70d5d2e330b41b932b0a655b838a5f37df01e)
Signed-off-by: Armin Kuster <akuster at mvista.com>
---
 lib/bb/fetch2/wget.py | 72 +++++++++++++++++++++++++++++++++++----------------
 lib/bb/tests/fetch.py |  2 ++
 2 files changed, 51 insertions(+), 23 deletions(-)

diff --git a/lib/bb/fetch2/wget.py b/lib/bb/fetch2/wget.py
index bd2a897..0ff5b2b 100644
--- a/lib/bb/fetch2/wget.py
+++ b/lib/bb/fetch2/wget.py
@@ -234,38 +234,64 @@ class Wget(FetchMethod):
 
             return exported
 
-        def head_method(self):
-            return "HEAD"
-
+        class HTTPMethodFallback(urllib2.BaseHandler):
+            """
+            Fallback to GET if HEAD is not allowed (405 HTTP error)
+            """
+            def http_error_405(self, req, fp, code, msg, headers):
+                fp.read()
+                fp.close()
+
+                newheaders = dict((k,v) for k,v in req.headers.items()
+                                  if k.lower() not in ("content-length", "content-type"))
+                return self.parent.open(urllib2.Request(req.get_full_url(),
+                                                        headers=newheaders,
+                                                        origin_req_host=req.get_origin_req_host(),
+                                                        unverifiable=True))
+
+            """
+            Some servers (e.g. GitHub archives, hosted on Amazon S3) return 403
+            Forbidden when they actually mean 405 Method Not Allowed.
+            """
+            http_error_403 = http_error_405
+
+            """
+            Some servers (e.g. FusionForge) returns 406 Not Acceptable when they
+            actually mean 405 Method Not Allowed.
+            """
+            http_error_406 = http_error_405
+
+        class FixedHTTPRedirectHandler(urllib2.HTTPRedirectHandler):
+            """
+            urllib2.HTTPRedirectHandler resets the method to GET on redirect,
+            when we want to follow redirects using the original method.
+            """
+            def redirect_request(self, req, fp, code, msg, headers, newurl):
+                newreq = urllib2.HTTPRedirectHandler.redirect_request(self, req, fp, code, msg, headers, newurl)
+                newreq.get_method = lambda: req.get_method()
+                return newreq
         exported_proxies = export_proxies(d)
 
+        handlers = [FixedHTTPRedirectHandler, HTTPMethodFallback]
+        if export_proxies:
+            handlers.append(urllib2.ProxyHandler())
+        handlers.append(CacheHTTPHandler())
         # XXX: Since Python 2.7.9 ssl cert validation is enabled by default
         # see PEP-0476, this causes verification errors on some https servers
         # so disable by default.
         import ssl
-        ssl_context = None
         if hasattr(ssl, '_create_unverified_context'):
-            ssl_context = ssl._create_unverified_context()
-
-        if exported_proxies == True and ssl_context is not None:
-            opener = urllib2.build_opener(urllib2.ProxyHandler, CacheHTTPHandler,
-                    urllib2.HTTPSHandler(context=ssl_context))
-        elif exported_proxies == False and ssl_context is not None:
-            opener = urllib2.build_opener(CacheHTTPHandler,
-                    urllib2.HTTPSHandler(context=ssl_context))
-        elif exported_proxies == True and ssl_context is None:
-            opener = urllib2.build_opener(urllib2.ProxyHandler, CacheHTTPHandler)
-        else:
-            opener = urllib2.build_opener(CacheHTTPHandler)
-
-        urllib2.Request.get_method = head_method
-        urllib2.install_opener(opener)
-
-        uri = ud.url.split(";")[0]
+            handlers.append(urllib2.HTTPSHandler(context=ssl._create_unverified_context()))
+        opener = urllib2.build_opener(*handlers)
 
         try:
-            urllib2.urlopen(uri)
-        except:
+            uri = ud.url.split(";")[0]
+            r = urllib2.Request(uri)
+            r.get_method = lambda: "HEAD"
+            opener.open(r)
+        except urllib2.URLError as e:
+            # debug for now to avoid spamming the logs in e.g. remote sstate searches
+            logger.debug(2, "checkstatus() urlopen failed: %s" % e)
             return False
         return True
 
diff --git a/lib/bb/tests/fetch.py b/lib/bb/tests/fetch.py
index 7a08be0..3b74de0 100644
--- a/lib/bb/tests/fetch.py
+++ b/lib/bb/tests/fetch.py
@@ -737,6 +737,8 @@ class FetchCheckStatusTest(FetcherTest):
                       "ftp://ftp.gnu.org/gnu/autoconf/autoconf-2.60.tar.gz",
                       "ftp://ftp.gnu.org/gnu/chess/gnuchess-5.08.tar.gz",
                       "ftp://ftp.gnu.org/gnu/gmp/gmp-4.0.tar.gz",
+                      # GitHub releases are hosted on Amazon S3, which doesn't support HEAD
+                      "https://github.com/kergoth/tslib/releases/download/1.1/tslib-1.1.tar.xz"
                       ]
 
     if os.environ.get("BB_SKIP_NETTESTS") == "yes":
-- 
2.7.4




More information about the bitbake-devel mailing list