[bitbake-devel] [PATCH 3/3] runqueue.py: alternative rm_work scheduler

Fri Jan 6 09:50:49 UTC 2017

The idea is that tasks which complete building a recipe (like
do_package_qa) are more important than tasks which start building new
recipes (do_fetch) or those which increase disk usage
(do_compile). Therefore tasks get ordered like this (most important
first, do_rm_work before do_build because the enhanced rm_work.bbclass
was used):

1. ID /work/poky/meta/recipes-support/popt/popt_1.16.bb:do_build
2. ID /work/poky/meta/recipes-core/readline/readline_6.3.bb:do_build
3. ID /work/poky/meta/recipes-connectivity/libnss-mdns/libnss-mdns_0.10.bb:do_build
...
464. ID /work/poky/meta/recipes-sato/images/core-image-sato.bb:do_build
465. ID /work/poky/meta/recipes-graphics/xorg-proto/inputproto_2.3.2.bb:do_rm_work
466. ID /work/poky/meta/recipes-devtools/python/python3_3.5.2.bb:do_rm_work
467. ID /work/poky/meta/recipes-core/packagegroups/packagegroup-base.bb:do_rm_work
...
3620. ID virtual:native:/work/poky/meta/recipes-extended/pbzip2/pbzip2_1.1.13.bb:do_install
3621. ID /work/poky/meta/recipes-devtools/qemu/qemu-helper-native_1.0.bb:do_install
3622. ID /work/poky/meta/recipes-core/zlib/zlib_1.2.8.bb:do_compile_ptest_base
3623. ID /work/poky/meta/recipes-extended/bzip2/bzip2_1.0.6.bb:do_compile_ptest_base
...
3645. ID /work/poky/meta/recipes-support/libevent/libevent_2.0.22.bb:do_compile_ptest_base
3646. ID /work/poky/meta/recipes-core/busybox/busybox_1.24.1.bb:do_compile_ptest_base
3647. ID /work/poky/meta/recipes-kernel/linux/linux-yocto_4.8.bb:do_uboot_mkimage
3648. ID /work/poky/meta/recipes-kernel/linux/linux-yocto_4.8.bb:do_sizecheck
3649. ID /work/poky/meta/recipes-kernel/linux/linux-yocto_4.8.bb:do_strip
3650. ID /work/poky/meta/recipes-kernel/linux/linux-yocto_4.8.bb:do_compile_kernelmodules
3651. ID /work/poky/meta/recipes-kernel/linux/linux-yocto_4.8.bb:do_shared_workdir
3652. ID /work/poky/meta/recipes-kernel/linux/linux-yocto_4.8.bb:do_kernel_link_images
3653. ID /work/poky/meta/recipes-devtools/quilt/quilt-native_0.64.bb:do_compile
3654. ID /work/poky/meta/recipes-extended/texinfo-dummy-native/texinfo-dummy-native.bb:do_compile
...

The order of the same task between different recipes is the same as
with the speed scheduler, i.e. more important recipes come first.

The new scheduler has to be picked explicitly with
   BB_SCHEDULER = "rm_work"

Signed-off-by: Patrick Ohly <patrick.ohly at intel.com>
---
 lib/bb/runqueue.py | 107 +++++++++++++++++++++++++++++++++++++++++++++++++++++
 1 file changed, 107 insertions(+)

diff --git a/lib/bb/runqueue.py b/lib/bb/runqueue.py
index 48c6a79..bf5b65d 100644
--- a/lib/bb/runqueue.py
+++ b/lib/bb/runqueue.py
@@ -183,6 +183,18 @@ class RunQueueScheduler(object):
     def newbuilable(self, task):
         self.buildable.append(task)
 
+    def describe_task(self, taskid):
+        result = 'ID %s' % taskid
+        if self.rev_prio_map:
+            result = result + (' pri %d' % self.rev_prio_map[taskid])
+        return result
+
+    def dump_prio(self, comment):
+        bb.debug(3, '%s (most important first):\n%s' %
+                 (comment,
+                  '\n'.join(['%d. %s' % (index + 1, self.describe_task(taskid)) for
+                             index, taskid in enumerate(self.prio_map)])))
+
 class RunQueueSchedulerSpeed(RunQueueScheduler):
     """
     A scheduler optimised for speed. The priority map is sorted by task weight,
@@ -242,6 +254,101 @@ class RunQueueSchedulerCompletion(RunQueueSchedulerSpeed):
             for idx in todel:
                 del basemap[idx]
 
+class RunQueueSchedulerRmwork(RunQueueSchedulerSpeed):
+    """
+    A scheduler optimised to complete .bb files are quickly as possible. The
+    priority map is sorted by task weight, but then reordered so that once a given
+    .bb file starts to build, it's completed as quickly as possible by
+    running all tasks related to the same .bb file one after the after.
+    """
+    name = "rm_work"
+
+    def __init__(self, runqueue, rqdata):
+        super(RunQueueSchedulerRmwork, self).__init__(runqueue, rqdata)
+
+        # Extract list of tasks for each recipe, with tasks sorted
+        # ascending from "must run first" (typically do_fetch) to
+        # "runs last" (do_rm_work). Both the speed and completion
+        # schedule prioritize tasks that must run first before the ones
+        # that run later; this is what we depend on here.
+        task_lists = {}
+        for taskid in self.prio_map:
+            fn, taskname = taskid.rsplit(':', 1)
+            task_lists.setdefault(fn, []).append(taskname)
+
+        # Now unify the different task lists. The strategy is that
+        # common tasks get skipped and new ones get inserted after the
+        # preceeding common one(s) as they are found. Because task
+        # lists should differ only by their number of tasks, but not
+        # the ordering of the common tasks, this should result in a
+        # deterministic result that is a superset of the individual
+        # task ordering.
+        all_tasks = []
+        for recipe, new_tasks in task_lists.items():
+            index = 0
+            old_task = all_tasks[index] if index < len(all_tasks) else None
+            for new_task in new_tasks:
+                if old_task == new_task:
+                    # Common task, skip it. This is the fast-path which
+                    # avoids a full search.
+                    index += 1
+                    old_task = all_tasks[index] if index < len(all_tasks) else None
+                else:
+                    try:
+                        index = all_tasks.index(new_task)
+                        # Already present, just not at the current
+                        # place. We re-synchronized by changing the
+                        # index so that it matches again. Now
+                        # move on to the next existing task.
+                        index += 1
+                        old_task = all_tasks[index] if index < len(all_tasks) else None
+                    except ValueError:
+                        # Not present. Insert before old_task, which
+                        # remains the same (but gets shifted back).
+                        all_tasks.insert(index, new_task)
+                        index += 1
+        bb.debug(3, 'merged task list: %s'  % all_tasks)
+
+        # Now reverse the order so that tasks that finish the work on one
+        # recipe are considered more imporant (= come first). The ordering
+        # is now so that do_rm_work[_all] is most important.
+        all_tasks.reverse()
+
+        # Group tasks of the same kind before tasks of less important
+        # kinds at the head of the queue (because earlier = lower
+        # priority number = runs earlier), while preserving the
+        # ordering by recipe. If recipe foo is more important than
+        # bar, then the goal is to work on foo's do_populate_sysroot
+        # before bar's do_populate_sysroot and on the more important
+        # tasks of foo before any of the less important tasks in any
+        # other recipe (if those other recipes are more important than
+        # foo).
+        #
+        # All of this only applies when tasks are runable. Explicit
+        # dependencies still override this ordering by priority.
+        #
+        # Here's an example why this priority re-ordering helps with
+        # minimizing disk usage. Consider a recipe foo with a higher
+        # priority than bar where foo DEPENDS on bar. Then the
+        # implicit rule (from base.bbclass) is that foo's do_configure
+        # depends on bar's do_populate_sysroot. This ensures that
+        # bar's do_populate_sysroot gets done first. Normally the
+        # tasks from foo would continue to run once that is done, and
+        # bar only gets completed and cleaned up later. By ordering
+        # bar's task that depend on bar's do_populate_sysroot before foo's
+        # do_configure, that problem gets avoided.
+        task_index = 0
+        self.dump_prio('original priorities')
+        for task in all_tasks:
+            for index in range(task_index, self.numTasks):
+                taskid = self.prio_map[index]
+                taskname = taskid.rsplit(':', 1)[1]
+                if taskname == task:
+                    del self.prio_map[index]
+                    self.prio_map.insert(task_index, taskid)
+                    task_index += 1
+        self.dump_prio('rm_work priorities')
+
 class RunTaskEntry(object):
     def __init__(self):
         self.depends = set()
-- 
2.1.4