[OE-core] [RFC 8/9] hashserver: Add initial reference server

Joshua Watt jpewhacker at gmail.com
Mon Jul 16 20:37:27 UTC 2018


Adds an initial reference implementation of the hash server.

NOTE: This is my first dive into HTTP & REST technologies. Feedback is
appreciated. Also, I don't think it will be necessary for this reference
implementation to live in bitbake, and it can be moved to it's own
independent project if necessary?

Also, this server has some concurrency issues that I haven't tracked
down and will occasionally fail to record a new POST'd task with an
error indicating the database is locked. Based on some reading, I
believe this is because the server is using a sqlite backend, and it
would go away with a more production worthy backend. Anyway, it is good
enough for some preliminary testing.

Starting the server is simple and only requires pipenv to be installed:

 $ pipenv shell
 $ ./app.py

Signed-off-by: Joshua Watt <JPEWhacker at gmail.com>
---
 bitbake/contrib/hashserver/.gitignore |   2 +
 bitbake/contrib/hashserver/Pipfile    |  15 ++
 bitbake/contrib/hashserver/app.py     | 212 ++++++++++++++++++++++++++
 3 files changed, 229 insertions(+)
 create mode 100644 bitbake/contrib/hashserver/.gitignore
 create mode 100644 bitbake/contrib/hashserver/Pipfile
 create mode 100755 bitbake/contrib/hashserver/app.py

diff --git a/bitbake/contrib/hashserver/.gitignore b/bitbake/contrib/hashserver/.gitignore
new file mode 100644
index 00000000000..030640a2b21
--- /dev/null
+++ b/bitbake/contrib/hashserver/.gitignore
@@ -0,0 +1,2 @@
+hashes.db
+Pipfile.lock
diff --git a/bitbake/contrib/hashserver/Pipfile b/bitbake/contrib/hashserver/Pipfile
new file mode 100644
index 00000000000..29cfb41a907
--- /dev/null
+++ b/bitbake/contrib/hashserver/Pipfile
@@ -0,0 +1,15 @@
+[[source]]
+url = "https://pypi.org/simple"
+verify_ssl = true
+name = "pypi"
+
+[packages]
+flask = "*"
+flask-sqlalchemy = "*"
+marshmallow-sqlalchemy = "*"
+flask-marshmallow = "*"
+
+[dev-packages]
+
+[requires]
+python_version = "3.6"
diff --git a/bitbake/contrib/hashserver/app.py b/bitbake/contrib/hashserver/app.py
new file mode 100755
index 00000000000..4fd2070fe92
--- /dev/null
+++ b/bitbake/contrib/hashserver/app.py
@@ -0,0 +1,212 @@
+#! /usr/bin/env python3
+#
+# BitBake Hash Server Reference Implementation
+#
+# Copyright (C) 2018 Garmin International
+#
+# This program is free software; you can redistribute it and/or modify
+# it under the terms of the GNU General Public License version 2 as
+# published by the Free Software Foundation.
+#
+# This program is distributed in the hope that it will be useful,
+# but WITHOUT ANY WARRANTY; without even the implied warranty of
+# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+# GNU General Public License for more details.
+#
+# You should have received a copy of the GNU General Public License along
+# with this program; if not, write to the Free Software Foundation, Inc.,
+# 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA.
+
+from flask import Flask, request, jsonify
+from flask_sqlalchemy import SQLAlchemy
+from flask_marshmallow import Marshmallow
+from sqlalchemy import desc, case, func
+import sqlalchemy
+import sqlite3
+import hashlib
+import datetime
+
+app = Flask(__name__)
+app.config['SQLALCHEMY_DATABASE_URI'] = 'sqlite:///hashes.db'
+app.config['SQLALCHEMY_TIMEOUT'] = 15
+
+# Order matters: Initialize SQLAlchemy before Marshmallow
+db = SQLAlchemy(app)
+ma = Marshmallow(app)
+
+ at sqlalchemy.event.listens_for(sqlalchemy.engine.Engine, "connect")
+def set_sqlite_pragma(dbapi_connection, connection_record):
+    cursor = dbapi_connection.cursor()
+    cursor.execute("PRAGMA journal_mode=WAL")
+    cursor.close()
+
+class TaskModel(db.Model):
+    __tablename__ = 'tasks'
+
+    id = db.Column(db.Integer, primary_key=True)
+    taskhash = db.Column(db.String(), nullable=False)
+    method = db.Column(db.String(), nullable=False)
+    outhash = db.Column(db.String(), nullable=False)
+    depid = db.Column(db.String(), nullable=False)
+    owner = db.Column(db.String())
+    created = db.Column(db.DateTime)
+    PN = db.Column(db.String())
+    PV = db.Column(db.String())
+    PR = db.Column(db.String())
+    task = db.Column(db.String())
+    outhash_siginfo = db.Column(db.Text())
+
+    __table_args__ = (
+            db.UniqueConstraint('taskhash', 'method', 'outhash', name='unique_task'),
+            # Make an index on taskhash and method for fast lookup
+            db.Index('lookup_index', 'taskhash', 'method'),
+            )
+
+    def __init__(self, taskhash, method, outhash, depid, owner=None):
+        self.taskhash = taskhash
+        self.method = method
+        self.outhash = outhash
+        self.depid = depid
+        self.owner = owner
+        self.created = datetime.datetime.utcnow()
+        self.task = None
+        self.outhash_siginfo = None
+
+schemas = {}
+
+class TaskFullSchema(ma.ModelSchema):
+    class Meta:
+        model = TaskModel
+
+task_full_schema = TaskFullSchema()
+tasks_full_schema = TaskFullSchema(many=True)
+schemas['full'] = tasks_full_schema
+
+class TaskSchema(ma.ModelSchema):
+    class Meta:
+        fields = ('id', 'taskhash', 'method', 'outhash', 'depid', 'owner', 'created', 'PN', 'PV', 'PR', 'task')
+
+task_schema = TaskSchema()
+tasks_schema = TaskSchema(many=True)
+schemas['default'] = tasks_schema
+
+class DepIDSchema(ma.ModelSchema):
+    class Meta:
+        fields = ('taskhash', 'method', 'depid')
+
+depid_schema = DepIDSchema()
+depids_schema = DepIDSchema(many=True)
+schemas['depid'] = depids_schema
+
+def get_tasks_schema():
+    return schemas.get(request.args.get('output', 'default'), tasks_schema)
+
+def get_count_column(column, min_count):
+    count_column = func.count(column).label('count')
+    query = (db.session.query(TaskModel)
+            .with_entities(column, count_column)
+            .group_by(column))
+
+    if min_count > 1:
+        query = query.having(count_column >= min_count)
+
+    col_name = column.name
+
+    result = [{'count': data.count, col_name: getattr(data, col_name)} for data in query.all()]
+
+    return jsonify(result)
+
+def filter_query_from_request(query, request):
+    for key in request.args:
+        if hasattr(TaskModel, key):
+            vals = request.args.getlist(key)
+            query = (query
+                    .filter(getattr(TaskModel, key).in_(vals))
+                    .order_by(TaskModel.created.asc()))
+    return query
+
+ at app.route("/v1/count/outhashes", methods=["GET"])
+def outhashes():
+    return get_count_column(TaskModel.outhash, int(request.args.get('min', 1)))
+
+ at app.route("/v1/count/taskhashes", methods=["GET"])
+def taskhashes():
+    return get_count_column(TaskModel.taskhash, int(request.args.get('min', 1)))
+
+ at app.route("/v1/equivalent", methods=["GET", "POST"])
+def equivalent():
+    if request.method == 'GET':
+        task = (db.session.query(TaskModel)
+            .filter(TaskModel.method == request.args['method'])
+            .filter(TaskModel.taskhash == request.args['taskhash'])
+            .order_by(
+                # If there are multiple matching task hashes, return the oldest
+                # one
+                TaskModel.created.asc())
+            .limit(1)
+            .one_or_none())
+
+        return depid_schema.jsonify(task)
+
+    # TODO: Handle authentication
+
+    data = request.get_json()
+    # TODO handle when data is None. Currently breaks
+
+    # Find an appropriate task.
+    new_task = (db.session.query(TaskModel)
+            .filter(
+                # The output hash and method must match exactly
+                TaskModel.method == data['method'],
+                TaskModel.outhash == data['outhash'])
+            .order_by(
+                # If there is a matching taskhash, it will be sorted first, and
+                # thus the only row returned.
+                case([(TaskModel.taskhash == data['taskhash'], 1)], else_=2))
+            .order_by(
+                # Sort by date, oldest first. This only really matters if there
+                # isn't an exact match on the taskhash
+                TaskModel.created.asc())
+            # Only return one row
+            .limit(1)
+            .one_or_none())
+
+    # If no task was found that exactly matches this taskhash, create a new one
+    if not new_task or new_task.taskhash != data['taskhash']:
+        # Capture the dependency ID. If a matching task was found, then change
+        # this tasks dependency ID to match.
+        depid = data['depid']
+        if new_task:
+            depid = new_task.depid
+
+        new_task = TaskModel(data['taskhash'], data['method'], data['outhash'], depid)
+        db.session.add(new_task)
+
+    if new_task.taskhash == data['taskhash']:
+        # Add or update optional attributes
+        for o in ('outhash_siginfo', 'owner', 'task', 'PN', 'PV', 'PR'):
+            v = getattr(new_task, o, None)
+            if v is None:
+                setattr(new_task, o, data.get(o, None))
+
+    db.session.commit()
+
+    return depid_schema.jsonify(new_task)
+
+# TODO: Handle errors. Currently, everything is a 500 error
+ at app.route("/v1/tasks", methods=["GET"])
+def tasks():
+    query = db.session.query(TaskModel)
+    query = filter_query_from_request(query, request)
+    return get_tasks_schema().jsonify(query.all())
+
+ at app.route("/v1/tasks/overridden", methods=["GET"])
+def overridden():
+    query = db.session.query(TaskModel).filter(TaskModel.depid != TaskModel.taskhash)
+    query = filter_query_from_request(query, request)
+    return get_tasks_schema().jsonify(query.all())
+
+if __name__ == '__main__':
+    db.create_all()
+    app.run(debug=True)
+
-- 
2.17.1




More information about the Openembedded-core mailing list