[bitbake-devel] [master][PATCH v2] fetch2/githubprivate: new fetcher for private github repositories
André Draszik
git at andred.net
Tue Jan 14 15:09:18 UTC 2020
ping
On Fri, 2019-12-20 at 10:08 +0000, André Draszik wrote:
> The wget / http fetcher doesn't support fetching assets
> attached to releases on private GitHub repositories, i.e.
> release artefacts like
> https://github.com/<user>/<project>/releases/download/v1.0.0/asset1.txt
>
> Those are special, in that HTTP basic auth is not used / possible
> on the URL as seen in the GitHub UI, but instead the GitHub API
> must be used for downloading (which does support HTTP basic auth)
> where the URL will be different.
>
> Implement a new fetcher that:
> * uses the GitHub API to determine the asset URL
> * re-uses the existing wget fetcher to download this URL
> instead
> * supports checkstatus() (bitbake -c checkuri)
> * supports latest_versionstring() (devtool latest-version)
> * supports GitHub.com and GitHub Enterprise for the above
>
> Implementation notes:
> To be able to access the GitHub API, opportunistic authentication
> (auth-no-challenge) needs to be enabled. Then the API needs to
> be queried for the real URL of the file to be downloaded, and
> finally application/octet-stream must be specified explicitly.
>
> Note that there is a slight difference in the location of the
> REST API endpoints between GitHub.com and GitHub Enterprise.
>
> https://developer.github.com/v3/repos/releases/
> https://developer.github.com/enterprise/2.19/v3/enterprise-admin/
>
> Some notes:
> * --auth-no-challenge is added unconditionally because we know
> username / password will definitely be needed, and they are
> likely specified in ~/.netrc, rather than in the recipe (but
> username / password via recipe is still supported)
> * the release information returned looks sth like:
> [
> {
> ...
> "name": <name of the release>
> "assets": [
> {
> ...
> "browser_download_url": "https://github.com/<user>/<project>/releases/download/v1.0.0/asset1.txt",
> "url": "https://api.github.com/repos/<user>/<project>/releases/assets/16146291",
> ...
> },
> ...
> ],
> ...
> },
> ...
> ]
> hence we need to pass -O to wget to explicitly download using
> the original name
> * to determine the latest available version, we can simply query
> the API for the version (name) that the SRC_URI entry is
> attached to, and then figure out if there is a more recent
> version available, rather than doing lots of matches using
> regexes
> * this has been tested with github.com and GitHub Enterprise on
> private repositories, with and without PREMIRRORS
>
> Signed-off-by: André Draszik <git at andred.net>
> ---
> bitbake/lib/bb/fetch2/__init__.py | 6 +-
> bitbake/lib/bb/fetch2/githubprivate.py | 174 +++++++++++++++++++++++++
> 2 files changed, 178 insertions(+), 2 deletions(-)
> create mode 100644 bitbake/lib/bb/fetch2/githubprivate.py
>
> diff --git a/bitbake/lib/bb/fetch2/__init__.py b/bitbake/lib/bb/fetch2/__init__.py
> index 07de6c2693..5c533cf78e 100644
> --- a/bitbake/lib/bb/fetch2/__init__.py
> +++ b/bitbake/lib/bb/fetch2/__init__.py
> @@ -1238,13 +1238,13 @@ class FetchData(object):
> self.sha256_name = "sha256sum"
> if self.md5_name in self.parm:
> self.md5_expected = self.parm[self.md5_name]
> - elif self.type not in ["http", "https", "ftp", "ftps", "sftp", "s3"]:
> + elif self.type not in ["http", "https", "ftp", "ftps", "githubprivate", "sftp", "s3"]:
> self.md5_expected = None
> else:
> self.md5_expected = d.getVarFlag("SRC_URI", self.md5_name)
> if self.sha256_name in self.parm:
> self.sha256_expected = self.parm[self.sha256_name]
> - elif self.type not in ["http", "https", "ftp", "ftps", "sftp", "s3"]:
> + elif self.type not in ["http", "https", "ftp", "ftps", "githubprivate", "sftp", "s3"]:
> self.sha256_expected = None
> else:
> self.sha256_expected = d.getVarFlag("SRC_URI", self.sha256_name)
> @@ -1853,6 +1853,7 @@ from . import osc
> from . import repo
> from . import clearcase
> from . import npm
> +from . import githubprivate
>
> methods.append(local.Local())
> methods.append(wget.Wget())
> @@ -1871,3 +1872,4 @@ methods.append(osc.Osc())
> methods.append(repo.Repo())
> methods.append(clearcase.ClearCase())
> methods.append(npm.Npm())
> +methods.append(githubprivate.Githubprivate())
> diff --git a/bitbake/lib/bb/fetch2/githubprivate.py b/bitbake/lib/bb/fetch2/githubprivate.py
> new file mode 100644
> index 0000000000..5a007c4e69
> --- /dev/null
> +++ b/bitbake/lib/bb/fetch2/githubprivate.py
> @@ -0,0 +1,174 @@
> +#
> +# SPDX-License-Identifier: GPL-2.0-only
> +#
> +"""
> +Bitbake "Fetch" implementation for assets attached to private
> +repositories on GitHub or GitHub Enterprise.
> +"""
> +
> +import os
> +import json
> +import tempfile
> +import bb
> +from bb.fetch2.wget import Wget
> +from bb.fetch2 import FetchError
> +from bb.fetch2 import logger
> +from bb.fetch2 import uri_replace
> +
> +class Githubprivate(Wget):
> + """Class to fetch an asset from a private repository on GitHub
> + (or GitHub Enterprise)."""
> +
> + def supports(self, ud, d):
> + return ud.type in ['githubprivate']
> +
> + def urldata_init(self, ud, d):
> + ud.proto = 'https'
> + if 'protocol' in ud.parm:
> + ud.proto = ud.parm['protocol']
> + if not ud.proto in ('http', 'https'):
> + raise bb.fetch2.ParameterError("Invalid protocol type", ud.url)
> +
> + if not 'downloadfilename' in ud.parm:
> + # The asset filename determined using the GitHub API will
> + # not match the filename of the release artefact (as in
> + # SRC_URI). Hence we need to unconditionally instruct
> + # wget to download using -O. This can be achieved by
> + # unconditionally setting 'downloadfilename' here.
> + ud.parm['downloadfilename'] = os.path.basename(ud.path)
> + super(Githubprivate, self).urldata_init(ud, d)
> + # To be able to access the GitHub API, opportunistic authentication
> + # needs to be enabled. Also username / password will definitely be
> + # needed, and they are likely specified in ~/.netrc, rather than in
> + # the recipe itself.
> + self.basecmd += " --auth-no-challenge"
> +
> + def _get_gh_releases_info(self, uri, ud, d):
> + fetchcmd = self.basecmd
> + if ud.user and ud.pswd:
> + fetchcmd += " --user=%s --password=%s" % (ud.user, ud.pswd)
> +
> + # Github private repositories support basic-auth via the API
> + # endpoints only. Using those, the download URL will be
> + # different, and we need to download using application/octet-stream.
> + # The API endpoint mapping is different for github.com and
> + # GitHub Enterprise:
> + # github.com -> api.github.com
> + # github.example.com -> github.example.com/api/v3/
> + # The Accept header is used in any case to fix the API version to
> + # the supported level (version 3).
> + #
> + # To get the download URL when using the API, all the releases
> + # are listed via
> + # https://api.github.com/<user>/<project>/releases
> + # which returns a JSON message describing all releases and all
> + # their attached artefacts. We can easily search that for
> + # the artefact that we're trying to download, and use
> + # the replacement URL from that response.
> + assetinfo_cmd = fetchcmd + " --header='Accept: application/vnd.github.v3+json'"
> + api_replacements = ['githubprivate://github.com/.* TYPE://api.github.com/repos/REPORELEASES',
> + 'githubprivate://.*/.* TYPE://HOST/api/v3/repos/REPORELEASES']
> + replacements = {}
> + replacements["TYPE"] = ud.proto
> + replacements["HOST"] = ud.host
> + # github release artifacts are of the form
> + # https://github.com/<user>/<project>/releases/download/v1.0.0/asset1.txt
> + # drop everything after .../releases and point to api.github.com
> + replacements["REPORELEASES"] = ud.path.rsplit('/', maxsplit=3)[0]
> + for api_replacement in api_replacements:
> + (find, replace) = api_replacement.split()
> + rel_api_uri = uri_replace(ud, find, replace, replacements, d)
> + if rel_api_uri == None:
> + continue
> + # uri_replace() keeps the params, and the actual filename.
> + # drop both - we only want
> + # https://api.github.com/<user>/<project>/releases
> + # from the example above
> + rel_api_uri = rel_api_uri.split(';')[0].rsplit('/', maxsplit=1)[0]
> + with tempfile.TemporaryDirectory(prefix="wget-github-release-") as workdir, \
> + tempfile.NamedTemporaryFile(mode="w+", dir=workdir, prefix="wget-release-") as f:
> + assetinfo_cmd += " -O " + f.name + " '" + rel_api_uri + "'"
> + logger.debug(2, "For url %s trying to retrieve asset info from %s" % (uri, assetinfo_cmd))
> + try:
> + self._runwget(ud, d, assetinfo_cmd, True)
> + except FetchError as e:
> + # Accessing a (PRE)MIRROR using the github API
> + # obviously doesn't work, just ignore
> + continue
> + if os.path.getsize(f.name) == 0:
> + # the fetch resulted in a zero size file, ignore
> + logger.debug(2, "Could not retrieve asset info from %s" % rel_api_uri)
> + continue
> + return json.load(f)
> +
> + return []
> +
> + def _get_gh_asset_uri(self, uri, ud, d):
> + uri = uri.replace("githubprivate://", ud.proto + "://", 1)
> + gh_asset_uri = None
> + releases = self._get_gh_releases_info(uri, ud, d)
> + # As per https://developer.github.com/v3/repos/releases/#list-releases-for-a-repository
> + # Each release will have a list of assets, where the 'browser_download_url'
> + # is what we intended to download, but we need to get it via the 'url',
> + # which points to the github api and supports username/password
> + for release in releases:
> + for asset in release['assets']:
> + logger.debug(2, "Comparing asset id %u URL %s" \
> + % (asset['id'], asset['browser_download_url']))
> + if asset['browser_download_url'] == uri:
> + gh_asset_uri = asset['url']
> + logger.debug(2, "For URI %s using GitHub asset %s" % (uri, gh_asset_uri))
> + break
> + if gh_asset_uri:
> + break
> +
> + if not gh_asset_uri:
> + raise FetchError("Could not determine the GitHub asset URI for URI %s" % uri, uri)
> +
> + return gh_asset_uri
> +
> + def download(self, ud, d):
> + """Fetch urls"""
> + orig_uri = ud.url.split(";")[0]
> + gh_asset_uri = self._get_gh_asset_uri(orig_uri, ud, d)
> + ud.url = ud.url.replace(orig_uri, gh_asset_uri, 1)
> + # To be able to download the actual asset, we need to force
> + # the mime-type. Otherwise we'll get the asset info json.
> + self.basecmd += " --header='Accept: application/octet-stream'"
> + return super(Githubprivate, self).download(ud, d)
> +
> + def latest_versionstring(self, ud, d):
> + """
> + Manipulate the URL and try to obtain the latest package version
> + using GitHub API.
> + """
> + # We first get the release (name) that corresponds to the URL ...
> + uri = ud.url.split(";")[0].replace("githubprivate://", ud.proto + "://", 1)
> + releases = self._get_gh_releases_info(uri, ud, d)
> + current_version = '0'
> + for release in releases:
> + bb.debug(3, "Getting current version info for URL %s" % uri)
> + for release in releases:
> + for asset in release['assets']:
> + if asset['browser_download_url'] == uri:
> + current_version = release['name']
> + break
> + if current_version != '0':
> + break
> + if current_version != '0':
> + bb.debug(3, "Current version info is %s" % current_version)
> +
> + # ... and then try to find a newer release (name).
> + for release in releases:
> + this_version = ['', release['name'], '']
> + if self._vercmp(['', current_version, ''], this_version) < 0:
> + current_version = this_version[1]
> +
> + return (current_version, '')
> +
> + def checkstatus(self, fetch, urldata, d):
> + """Check if urls are accessible"""
> + orig_uri = urldata.url.split(";")[0]
> + gh_asset_uri = self._get_gh_asset_uri(orig_uri, urldata, d)
> + urldata.url = urldata.url.replace(orig_uri, gh_asset_uri, 1)
> + return super(Githubprivate, self).checkstatus(fetch, urldata, d)
More information about the bitbake-devel
mailing list