[bitbake-devel] [master][PATCH] fetch2/wget: support releases from private github repositories

André Draszik git at andred.net
Thu Nov 14 14:21:31 UTC 2019


The wget / http fetcher currently doesn't support fetching
assets attached to releases on private GitHub repositories,
i.e. release artefacts like
    https://github.com/<user>/<project>/releases/download/v1.0.0/asset1.txt

Those are special, in that HTTP basic auth is not used / possible
on the URL as seen in the GitHub UI, but instead the GitHub API
must be used for downloading (which does support HTTP basic auth)
where the URL will be different.

To be able to access the GitHub API, opportunistic authentication
(auth-no-challenge) needs to be enabled. Then the API needs to
be queried for the real URL of the file to be downloaded, and
finally application/octet-stream must be specified explicitly.

Note that there is a slight difference in the location of the
REST API endpoints between GitHub.com and GitHub Enterprise.

    https://developer.github.com/v3/repos/releases/
    https://developer.github.com/enterprise/2.19/v3/enterprise-admin/

As it's impossible to determine if a repository is on GitHub
or not (considering GitHub Enterprise), and even more so if a
repository is private or not, a new flag is introduced that
should be set to "1" - "github_private_asset", e.g.

    SRC_URI = "https://github.com/<user>/<project>/releases/download/v1.0.0/asset1.txt;github_private_asset=1"

Some notes:
* --auth-no-challenge is added unconditionally because we know
  username / password will definitely be needed, and they are
  likely to be specified in ~/.netrc, rather than in the recipe
* the release information returned looks sth like:
[
    {
        ...
        "assets": [
            {
                ...
                "browser_download_url": "https://github.com/<user>/<project>/releases/download/v1.0.0/asset1.txt",
                "url": "https://api.github.com/repos/<user>/<project>/releases/assets/16146291",
                ...
            },
            ...
        ],
        ...
    },
    ...
]
  hence we need to pass -O to wget to explicitly download using
  the original name
* this has been tested with github.com and GitHub Enterprise on
  private repositories, with and without PREMIRRORS

Signed-off-by: André Draszik <git at andred.net>
---
 lib/bb/fetch2/wget.py | 90 ++++++++++++++++++++++++++++++++++++++++---
 1 file changed, 85 insertions(+), 5 deletions(-)

diff --git a/lib/bb/fetch2/wget.py b/lib/bb/fetch2/wget.py
index 725586d2..90aa9b19 100644
--- a/lib/bb/fetch2/wget.py
+++ b/lib/bb/fetch2/wget.py
@@ -4,6 +4,12 @@ BitBake 'Fetch' implementations
 Classes for obtaining upstream sources for the
 BitBake build tools.
 
+Supported SRC_URI options are:
+
+- github_private_asset
+   Whether or not the URI is pointing to a release artefact
+   in a private GitHub repository. The default is no.
+
 """
 
 # Copyright (C) 2003, 2004  Chris Larson
@@ -23,11 +29,13 @@ import bb.progress
 import socket
 import http.client
 import urllib.request, urllib.parse, urllib.error
+import json
 from   bb.fetch2 import FetchMethod
 from   bb.fetch2 import FetchError
 from   bb.fetch2 import logger
 from   bb.fetch2 import runfetchcmd
 from   bb.fetch2 import FetchConnectionCache
+from   bb.fetch2 import uri_replace
 from   bb.utils import export_proxies
 from   bs4 import BeautifulSoup
 from   bs4 import SoupStrainer
@@ -78,6 +86,8 @@ class Wget(FetchMethod):
         if not ud.localfile:
             ud.localfile = d.expand(urllib.parse.unquote(ud.host + ud.path).replace("/", "."))
 
+        ud.github_private = ud.parm.get("github_private_asset","0") == "1"
+
         self.basecmd = d.getVar("FETCHCMD_wget") or "/usr/bin/env wget -t 2 -T 30 --passive-ftp --no-check-certificate"
 
     def _runwget(self, ud, d, command, quiet, workdir=None):
@@ -93,15 +103,85 @@ class Wget(FetchMethod):
 
         fetchcmd = self.basecmd
 
-        if 'downloadfilename' in ud.parm:
+        uri = ud.url.split(";")[0]
+        gh_asset_uri = None
+
+        if (ud.user and ud.pswd) or ud.github_private:
+            fetchcmd += " --auth-no-challenge"
+            if ud.user and ud.pswd:
+                fetchcmd += " --user=%s --password=%s" % (ud.user, ud.pswd)
+
+        if ud.github_private:
+            # Github private repositories support basic-auth via the API
+            # endpoints only. Using those, the download URL will be
+            # different, and we need to download using application/octet-stream.
+            # The API endpoint mapping is different for github.com and
+            # GitHub Enterprise:
+            #     github.com -> api.github.com
+            #     github.example.com -> github.example.com/api/v3/
+            # The Accept header is used in any case to fix the API version
+            #
+            # To get the download URL when using the API, all the releases
+            # are listed via
+            #     https://api.github.com/<user>/<project>/releases
+            # which returns a JSON message describing all releases and all
+            # their attached artefacts. We can easily search that for
+            # the artefact that we're trying to download, and use
+            # the replacement URL from that response.
+            gh_relcmd = fetchcmd + " --header='Accept: application/vnd.github.v3+json'"
+            api_replacements = ['https?$://github.com/.* TYPE://api.github.com/repos/REPORELEASES',
+                                'https?$://.*/.* TYPE://HOST/api/v3/repos/REPORELEASES']
+            replacements = {}
+            replacements["TYPE"] = ud.type
+            replacements["HOST"] = ud.host
+            # github release artifacts are of the form
+            #     https://github.com/<user>/<project>/releases/download/v1.0.0/asset1.txt
+            # drop everything after .../releases and point to api.github.com
+            replacements["REPORELEASES"] = ud.path.rsplit('/', maxsplit=3)[0]
+            for api_replacement in api_replacements:
+                (find, replace) = api_replacement.split()
+                rel_api_uri = uri_replace(ud, find, replace, replacements, d)
+                if rel_api_uri == None:
+                    continue
+                # uri_replace() keeps the params, and the actual filename.
+                # drop both - we only want
+                #     https://api.github.com/<user>/<project>/releases
+                # from the example above
+                rel_api_uri = rel_api_uri.split(';')[0].rsplit('/', maxsplit=1)[0]
+                with tempfile.TemporaryDirectory(prefix="wget-github-release-") as workdir, \
+                        tempfile.NamedTemporaryFile(mode="w+", dir=workdir, prefix="wget-release-") as f:
+                    gh_relcmd += " -O " + f.name + " '" + rel_api_uri + "'"
+                    try:
+                        self._runwget(ud, d, gh_relcmd, True)
+                    except FetchError as e:
+                        # Accessing a (PRE)MIRROR using the github API
+                        # obviously doesn't work, just ignore
+                        continue
+                    if os.path.getsize(f.name) == 0:
+                        # the fetch resulted in a zero size file, ignore
+                        continue
+                    releases = json.load(f)
+                    # As per https://developer.github.com/v3/repos/releases/#list-releases-for-a-repository
+                    # Each release will have a list of assets, where the 'browser_download_url'
+                    # is what we intended to download, but we need to get it via the 'url',
+                    # which points to the github api and supports username/password
+                    for release in releases:
+                        for asset in release['assets']:
+                            if asset['browser_download_url'] == uri:
+                                gh_asset_uri = asset['url']
+                                break
+                        if gh_asset_uri:
+                            break
+                if gh_asset_uri:
+                    uri = gh_asset_uri
+                    fetchcmd += " --header='Accept: application/octet-stream'"
+                    break
+
+        if 'downloadfilename' in ud.parm or gh_asset_uri:
             dldir = d.getVar("DL_DIR")
             bb.utils.mkdirhier(os.path.dirname(dldir + os.sep + ud.localfile))
             fetchcmd += " -O " + dldir + os.sep + ud.localfile
 
-        if ud.user and ud.pswd:
-            fetchcmd += " --user=%s --password=%s --auth-no-challenge" % (ud.user, ud.pswd)
-
-        uri = ud.url.split(";")[0]
         if os.path.exists(ud.localpath):
             # file exists, but we didnt complete it.. trying again..
             fetchcmd += d.expand(" -c -P ${DL_DIR} '%s'" % uri)
-- 
2.23.0.rc1



More information about the bitbake-devel mailing list