[bitbake-devel] [master][PATCH v2] fetch2/githubprivate: new fetcher for private github repositories

André Draszik git at andred.net
Fri Dec 20 10:08:31 UTC 2019


The wget / http fetcher doesn't support fetching assets
attached to releases on private GitHub repositories, i.e.
release artefacts like
    https://github.com/<user>/<project>/releases/download/v1.0.0/asset1.txt

Those are special, in that HTTP basic auth is not used / possible
on the URL as seen in the GitHub UI, but instead the GitHub API
must be used for downloading (which does support HTTP basic auth)
where the URL will be different.

Implement a new fetcher that:
    * uses the GitHub API to determine the asset URL
    * re-uses the existing wget fetcher to download this URL
      instead
    * supports checkstatus() (bitbake -c checkuri)
    * supports latest_versionstring() (devtool latest-version)
    * supports GitHub.com and GitHub Enterprise for the above

Implementation notes:
To be able to access the GitHub API, opportunistic authentication
(auth-no-challenge) needs to be enabled. Then the API needs to
be queried for the real URL of the file to be downloaded, and
finally application/octet-stream must be specified explicitly.

Note that there is a slight difference in the location of the
REST API endpoints between GitHub.com and GitHub Enterprise.

    https://developer.github.com/v3/repos/releases/
    https://developer.github.com/enterprise/2.19/v3/enterprise-admin/

Some notes:
* --auth-no-challenge is added unconditionally because we know
  username / password will definitely be needed, and they are
  likely specified in ~/.netrc, rather than in the recipe (but
  username / password via recipe is still supported)
* the release information returned looks sth like:
[
    {
        ...
        "name": <name of the release>
        "assets": [
            {
                ...
                "browser_download_url": "https://github.com/<user>/<project>/releases/download/v1.0.0/asset1.txt",
                "url": "https://api.github.com/repos/<user>/<project>/releases/assets/16146291",
                ...
            },
            ...
        ],
        ...
    },
    ...
]
  hence we need to pass -O to wget to explicitly download using
  the original name
* to determine the latest available version, we can simply query
  the API for the version (name) that the SRC_URI entry is
  attached to, and then figure out if there is a more recent
  version available, rather than doing lots of matches using
  regexes
* this has been tested with github.com and GitHub Enterprise on
  private repositories, with and without PREMIRRORS

Signed-off-by: André Draszik <git at andred.net>
---
 bitbake/lib/bb/fetch2/__init__.py      |   6 +-
 bitbake/lib/bb/fetch2/githubprivate.py | 174 +++++++++++++++++++++++++
 2 files changed, 178 insertions(+), 2 deletions(-)
 create mode 100644 bitbake/lib/bb/fetch2/githubprivate.py

diff --git a/bitbake/lib/bb/fetch2/__init__.py b/bitbake/lib/bb/fetch2/__init__.py
index 07de6c2693..5c533cf78e 100644
--- a/bitbake/lib/bb/fetch2/__init__.py
+++ b/bitbake/lib/bb/fetch2/__init__.py
@@ -1238,13 +1238,13 @@ class FetchData(object):
             self.sha256_name = "sha256sum"
         if self.md5_name in self.parm:
             self.md5_expected = self.parm[self.md5_name]
-        elif self.type not in ["http", "https", "ftp", "ftps", "sftp", "s3"]:
+        elif self.type not in ["http", "https", "ftp", "ftps", "githubprivate", "sftp", "s3"]:
             self.md5_expected = None
         else:
             self.md5_expected = d.getVarFlag("SRC_URI", self.md5_name)
         if self.sha256_name in self.parm:
             self.sha256_expected = self.parm[self.sha256_name]
-        elif self.type not in ["http", "https", "ftp", "ftps", "sftp", "s3"]:
+        elif self.type not in ["http", "https", "ftp", "ftps", "githubprivate", "sftp", "s3"]:
             self.sha256_expected = None
         else:
             self.sha256_expected = d.getVarFlag("SRC_URI", self.sha256_name)
@@ -1853,6 +1853,7 @@ from . import osc
 from . import repo
 from . import clearcase
 from . import npm
+from . import githubprivate
 
 methods.append(local.Local())
 methods.append(wget.Wget())
@@ -1871,3 +1872,4 @@ methods.append(osc.Osc())
 methods.append(repo.Repo())
 methods.append(clearcase.ClearCase())
 methods.append(npm.Npm())
+methods.append(githubprivate.Githubprivate())
diff --git a/bitbake/lib/bb/fetch2/githubprivate.py b/bitbake/lib/bb/fetch2/githubprivate.py
new file mode 100644
index 0000000000..5a007c4e69
--- /dev/null
+++ b/bitbake/lib/bb/fetch2/githubprivate.py
@@ -0,0 +1,174 @@
+#
+# SPDX-License-Identifier: GPL-2.0-only
+#
+"""
+Bitbake "Fetch" implementation for assets attached to private
+repositories on GitHub or GitHub Enterprise.
+"""
+
+import os
+import json
+import tempfile
+import bb
+from   bb.fetch2.wget import Wget
+from   bb.fetch2 import FetchError
+from   bb.fetch2 import logger
+from   bb.fetch2 import uri_replace
+
+class Githubprivate(Wget):
+    """Class to fetch an asset from a private repository on GitHub
+       (or GitHub Enterprise)."""
+
+    def supports(self, ud, d):
+        return ud.type in ['githubprivate']
+
+    def urldata_init(self, ud, d):
+        ud.proto = 'https'
+        if 'protocol' in ud.parm:
+            ud.proto = ud.parm['protocol']
+        if not ud.proto in ('http', 'https'):
+            raise bb.fetch2.ParameterError("Invalid protocol type", ud.url)
+
+        if not 'downloadfilename' in ud.parm:
+            # The asset filename determined using the GitHub API will
+            # not match the filename of the release artefact (as in
+            # SRC_URI). Hence we need to unconditionally instruct
+            # wget to download using -O. This can be achieved by
+            # unconditionally setting 'downloadfilename' here.
+            ud.parm['downloadfilename'] = os.path.basename(ud.path)
+        super(Githubprivate, self).urldata_init(ud, d)
+        # To be able to access the GitHub API, opportunistic authentication
+        # needs to be enabled. Also username / password will definitely be
+        # needed, and they are likely specified in ~/.netrc, rather than in
+        # the recipe itself.
+        self.basecmd += " --auth-no-challenge"
+
+    def _get_gh_releases_info(self, uri, ud, d):
+        fetchcmd = self.basecmd
+        if ud.user and ud.pswd:
+            fetchcmd += " --user=%s --password=%s" % (ud.user, ud.pswd)
+
+        # Github private repositories support basic-auth via the API
+        # endpoints only. Using those, the download URL will be
+        # different, and we need to download using application/octet-stream.
+        # The API endpoint mapping is different for github.com and
+        # GitHub Enterprise:
+        #     github.com -> api.github.com
+        #     github.example.com -> github.example.com/api/v3/
+        # The Accept header is used in any case to fix the API version to
+        # the supported level (version 3).
+        #
+        # To get the download URL when using the API, all the releases
+        # are listed via
+        #     https://api.github.com/<user>/<project>/releases
+        # which returns a JSON message describing all releases and all
+        # their attached artefacts. We can easily search that for
+        # the artefact that we're trying to download, and use
+        # the replacement URL from that response.
+        assetinfo_cmd = fetchcmd + " --header='Accept: application/vnd.github.v3+json'"
+        api_replacements = ['githubprivate://github.com/.* TYPE://api.github.com/repos/REPORELEASES',
+                            'githubprivate://.*/.* TYPE://HOST/api/v3/repos/REPORELEASES']
+        replacements = {}
+        replacements["TYPE"] = ud.proto
+        replacements["HOST"] = ud.host
+        # github release artifacts are of the form
+        #     https://github.com/<user>/<project>/releases/download/v1.0.0/asset1.txt
+        # drop everything after .../releases and point to api.github.com
+        replacements["REPORELEASES"] = ud.path.rsplit('/', maxsplit=3)[0]
+        for api_replacement in api_replacements:
+            (find, replace) = api_replacement.split()
+            rel_api_uri = uri_replace(ud, find, replace, replacements, d)
+            if rel_api_uri == None:
+                continue
+            # uri_replace() keeps the params, and the actual filename.
+            # drop both - we only want
+            #     https://api.github.com/<user>/<project>/releases
+            # from the example above
+            rel_api_uri = rel_api_uri.split(';')[0].rsplit('/', maxsplit=1)[0]
+            with tempfile.TemporaryDirectory(prefix="wget-github-release-") as workdir, \
+                    tempfile.NamedTemporaryFile(mode="w+", dir=workdir, prefix="wget-release-") as f:
+                assetinfo_cmd += " -O " + f.name + " '" + rel_api_uri + "'"
+                logger.debug(2, "For url %s trying to retrieve asset info from %s" % (uri, assetinfo_cmd))
+                try:
+                    self._runwget(ud, d, assetinfo_cmd, True)
+                except FetchError as e:
+                    # Accessing a (PRE)MIRROR using the github API
+                    # obviously doesn't work, just ignore
+                    continue
+                if os.path.getsize(f.name) == 0:
+                    # the fetch resulted in a zero size file, ignore
+                    logger.debug(2, "Could not retrieve asset info from %s" % rel_api_uri)
+                    continue
+                return json.load(f)
+
+        return []
+
+    def _get_gh_asset_uri(self, uri, ud, d):
+        uri = uri.replace("githubprivate://", ud.proto + "://", 1)
+        gh_asset_uri = None
+        releases = self._get_gh_releases_info(uri, ud, d)
+        # As per https://developer.github.com/v3/repos/releases/#list-releases-for-a-repository
+        # Each release will have a list of assets, where the 'browser_download_url'
+        # is what we intended to download, but we need to get it via the 'url',
+        # which points to the github api and supports username/password
+        for release in releases:
+            for asset in release['assets']:
+                logger.debug(2, "Comparing asset id %u URL %s" \
+                                % (asset['id'], asset['browser_download_url']))
+                if asset['browser_download_url'] == uri:
+                    gh_asset_uri = asset['url']
+                    logger.debug(2, "For URI %s using GitHub asset %s" % (uri, gh_asset_uri))
+                    break
+            if gh_asset_uri:
+                break
+
+        if not gh_asset_uri:
+            raise FetchError("Could not determine the GitHub asset URI for URI %s" % uri, uri)
+
+        return gh_asset_uri
+
+    def download(self, ud, d):
+        """Fetch urls"""
+        orig_uri = ud.url.split(";")[0]
+        gh_asset_uri = self._get_gh_asset_uri(orig_uri, ud, d)
+        ud.url = ud.url.replace(orig_uri, gh_asset_uri, 1)
+        # To be able to download the actual asset, we need to force
+        # the mime-type. Otherwise we'll get the asset info json.
+        self.basecmd += " --header='Accept: application/octet-stream'"
+        return super(Githubprivate, self).download(ud, d)
+
+    def latest_versionstring(self, ud, d):
+        """
+        Manipulate the URL and try to obtain the latest package version
+        using GitHub API.
+        """
+        # We first get the release (name) that corresponds to the URL ...
+        uri = ud.url.split(";")[0].replace("githubprivate://", ud.proto + "://", 1)
+        releases = self._get_gh_releases_info(uri, ud, d)
+        current_version = '0'
+        for release in releases:
+            bb.debug(3, "Getting current version info for URL %s" % uri)
+            for release in releases:
+                for asset in release['assets']:
+                    if asset['browser_download_url'] == uri:
+                        current_version = release['name']
+                        break
+                if current_version != '0':
+                    break
+            if current_version != '0':
+                bb.debug(3, "Current version info is %s" % current_version)
+
+        # ... and then try to find a newer release (name).
+        for release in releases:
+            this_version = ['', release['name'], '']
+            if self._vercmp(['', current_version, ''], this_version) < 0:
+                current_version = this_version[1]
+
+        return (current_version, '')
+
+    def checkstatus(self, fetch, urldata, d):
+        """Check if urls are accessible"""
+        orig_uri = urldata.url.split(";")[0]
+        gh_asset_uri = self._get_gh_asset_uri(orig_uri, urldata, d)
+        urldata.url = urldata.url.replace(orig_uri, gh_asset_uri, 1)
+        return super(Githubprivate, self).checkstatus(fetch, urldata, d)
-- 
2.23.0.rc1



More information about the bitbake-devel mailing list