[bitbake-devel] [PATCH] runqueue: Optimise task id string manipulations

Fri Oct 7 16:07:09 UTC 2016

On Friday, October 7, 2016 8:26:48 AM EDT Richard Purdie wrote:
> Some task id manipulations were suboptimal:
> 
> * taskfn_fromtid and fn_from_tid were effectively the same function
> * many calls to split_tid(), then taskfn_fromtid()
> * taskfn_fromtid() called split_tid() internally
> 
> This patch adds split_tid_mcfn() to replace split_tid() and returns the
> "taskfn" variant being used in many places. We update all core calls
> to the new function and ignore the return values we don't need since the
> function call overhead of the split_tid wrapper is higher than ignoring
> a return value.
> 
> The one remaining standalone use of taskfn_fromtid is replaced with
> fn_from_tid. I couldn't see any external usage so it was dropped.
> 
> There is external usage of split_tid so a wrapper remains for it.
> 
> Combined together these changes should improve some of the runqueue task
> manipulation performance.
> 
> Signed-off-by: Richard Purdie <richard.purdie at linuxfoundation.org>

Thanks Richard. As per our discussion on IRC yesterday these changes are 
inline with what I was expecting. After sorting out the encoding on this email 
to get the patch applied I was able to confirm that we do get some performance 
improvements. For my specific test configuration of ~4500 tasks I am seeing > 
1 second performance improvement and 1/2million less function calls. (when not 
profiling that will be 1/2second improvement since profiling is doubling my 
runtime). With the cleanup of taskfn_fromtid and fn_from_tid I will be able to 
put together some additional performance improvement patches and send them out 
for discussion in the next few days.

Thanks again,
Mark

> 
> diff --git a/bitbake/lib/bb/cooker.py b/bitbake/lib/bb/cooker.py
> index 934072c..42831e2 100644
> --- a/bitbake/lib/bb/cooker.py
> +++ b/bitbake/lib/bb/cooker.py
> @@ -748,8 +748,7 @@ class BBCooker:
>                      depend_tree['providermap'][name] = (pn, version)
>  
>          for tid in rq.rqdata.runtaskentries:
> -            (mc, fn, taskname) = bb.runqueue.split_tid(tid)
> -            taskfn = bb.runqueue.taskfn_fromtid(tid)
> +            (mc, fn, taskname, taskfn) = bb.runqueue.split_tid_mcfn(tid)
>              pn = self.recipecaches[mc].pkg_fn[taskfn]
>              pn = self.add_mc_prefix(mc, pn)
>              version  = "%s:%s-%s" %
> self.recipecaches[mc].pkg_pepvpr[taskfn] @@ -772,8 +771,7 @@ class
> BBCooker:
>  
>  
>              for dep in rq.rqdata.runtaskentries[tid].depends:
> -                (depmc, depfn, deptaskname) = bb.runqueue.split_tid(dep)
> -                deptaskfn = bb.runqueue.taskfn_fromtid(dep)
> +                (depmc, depfn, deptaskname, deptaskfn) =
> bb.runqueue.split_tid_mcfn(dep) deppn =
> self.recipecaches[mc].pkg_fn[deptaskfn]
>                  dotname = "%s.%s" % (pn,
> bb.runqueue.taskname_from_tid(tid)) if not dotname in
> depend_tree["tdepends"]:
> @@ -843,8 +841,7 @@ class BBCooker:
>                  tids.append(tid)
>  
>          for tid in tids:
> -            (mc, fn, taskname) = bb.runqueue.split_tid(tid)
> -            taskfn = bb.runqueue.taskfn_fromtid(tid)
> +            (mc, fn, taskname, taskfn) = bb.runqueue.split_tid_mcfn(tid)
>  
>              pn = self.recipecaches[mc].pkg_fn[taskfn]
>              pn = self.add_mc_prefix(mc, pn)
> diff --git a/bitbake/lib/bb/runqueue.py b/bitbake/lib/bb/runqueue.py
> index 0483b95..3b674ba 100644
> --- a/bitbake/lib/bb/runqueue.py
> +++ b/bitbake/lib/bb/runqueue.py
> @@ -49,30 +49,30 @@ def taskname_from_tid(tid):
>      return tid.rsplit(":", 1)[1]
>  
>  def split_tid(tid):
> +    (mc, fn, taskname, _) = split_tid_mcfn(tid)
> +    return (mc, fn, taskname)
> +
> +def split_tid_mcfn(tid):
>      if tid.startswith('multiconfig:'):
>          elems = tid.split(':')
>          mc = elems[1]
>          fn = ":".join(elems[2:-1])
>          taskname = elems[-1]
> +        mcfn = "multiconfig:" + mc + ":" + fn
>      else:
>          tid = tid.rsplit(":", 1)
>          mc = ""
>          fn = tid[0]
>          taskname = tid[1]
> +        mcfn = fn
>  
> -    return (mc, fn, taskname)
> +    return (mc, fn, taskname, mcfn)
>  
>  def build_tid(mc, fn, taskname):
>      if mc:
>          return "multiconfig:" + mc + ":" + fn + ":" + taskname
>      return fn + ":" + taskname
>  
> -def taskfn_fromtid(tid):
> -    (mc, fn, taskname) = split_tid(tid)
> -    if mc:
> -        return "multiconfig:" + mc + ":" + fn
> -    return fn
> -
>  class RunQueueStats:
>      """
>      Holds statistics on the tasks handled by the associated runQueue
> @@ -135,8 +135,7 @@ class RunQueueScheduler(object):
>          self.buildable = []
>          self.stamps = {}
>          for tid in self.rqdata.runtaskentries:
> -            (mc, fn, taskname) = split_tid(tid)
> -            taskfn = taskfn_fromtid(tid)
> +            (mc, fn, taskname, taskfn) = split_tid_mcfn(tid)
>              self.stamps[tid] = bb.build.stampfile(taskname,
> self.rqdata.dataCaches[mc], taskfn, noextra=True) if tid in
> self.rq.runq_buildable:
>                  self.buildable.append(tid)
> @@ -289,7 +288,7 @@ class RunQueueData:
>          return tid + task_name_suffix
>  
>      def get_short_user_idstring(self, task, task_name_suffix = ""):
> -        (mc, fn, taskname) = split_tid(task)
> +        (mc, fn, taskname, _) = split_tid_mcfn(task)
>          pn = self.dataCaches[mc].pkg_fn[fn]
>          taskname = taskname_from_tid(task) + task_name_suffix
>          return "%s:%s" % (pn, taskname)
> @@ -511,9 +510,8 @@ class RunQueueData:
>          for mc in taskData:
>              for tid in taskData[mc].taskentries:
>  
> -                (mc, fn, taskname) = split_tid(tid)
> +                (mc, fn, taskname, taskfn) = split_tid_mcfn(tid)
>                  #runtid = build_tid(mc, fn, taskname)
> -                taskfn = taskfn_fromtid(tid)
>  
>                  #logger.debug(2, "Processing %s,%s:%s", mc, fn, taskname)
>  
> @@ -529,7 +527,7 @@ class RunQueueData:
>                  #
>                  # e.g. addtask before X after Y
>                  for t in taskData[mc].taskentries[tid].tdepends:
> -                    (_, depfn, deptaskname) = split_tid(t)
> +                    (_, depfn, deptaskname, _) = split_tid_mcfn(t)
>                      depends.add(build_tid(mc, depfn, deptaskname))
>  
>                  # Resolve 'deptask' dependencies
> @@ -611,7 +609,7 @@ class RunQueueData:
>  
>              def generate_recdeps(t):
>                  newdeps = set()
> -                (mc, fn, taskname) = split_tid(t)
> +                (mc, fn, taskname, _) = split_tid_mcfn(t)
>                  add_resolved_dependencies(mc, fn, tasknames, newdeps)
>                  extradeps[tid].update(newdeps)
>                  seendeps.add(t)
> @@ -774,8 +772,7 @@ class RunQueueData:
>              prov_list = {}
>              seen_fn = []
>              for tid in self.runtaskentries:
> -                (tidmc, fn, taskname) = split_tid(tid)
> -                taskfn = taskfn_fromtid(tid)
> +                (tidmc, fn, taskname, taskfn) = split_tid_mcfn(tid)
>                  if taskfn in seen_fn:
>                      continue
>                  if mc != tidmc:
> @@ -885,14 +882,14 @@ class RunQueueData:
>          self.runq_setscene_tids = []
>          if not self.cooker.configuration.nosetscene:
>              for tid in self.runtaskentries:
> -                (mc, fn, taskname) = split_tid(tid)
> +                (mc, fn, taskname, _) = split_tid_mcfn(tid)
>                  setscenetid = fn + ":" + taskname + "_setscene"
>                  if setscenetid not in taskData[mc].taskentries:
>                      continue
>                  self.runq_setscene_tids.append(tid)
>  
>          def invalidate_task(tid, error_nostamp):
> -            (mc, fn, taskname) = split_tid(tid)
> +            (mc, fn, taskname, _) = split_tid_mcfn(tid)
>              taskdep = self.dataCaches[mc].task_deps[fn]
>              if fn + ":" + taskname not in taskData[mc].taskentries:
>                  logger.warning("Task %s does not exist, invalidating this
> task will have no effect" % taskname) @@ -946,8 +943,7 @@ class
> RunQueueData:
>                      procdep = []
>                      for dep in self.runtaskentries[tid].depends:
>                          procdep.append(fn_from_tid(dep) + "." +
> taskname_from_tid(dep)) -                    (mc, fn, taskname) =
> split_tid(tid)
> -                    taskfn = taskfn_fromtid(tid)
> +                    (mc, fn, taskname, taskfn) = split_tid_mcfn(tid)
>                      self.runtaskentries[tid].hash =
> bb.parse.siggen.get_taskhash(taskfn, taskname, procdep,
> self.dataCaches[mc]) task = self.runtaskentries[tid].task
>  
> @@ -1099,8 +1095,7 @@ class RunQueue:
>              except:
>                  return None
>  
> -        (mc, fn, tn) = split_tid(tid)
> -        taskfn = taskfn_fromtid(tid)
> +        (mc, fn, tn, taskfn) = split_tid_mcfn(tid)
>          if taskname is None:
>              taskname = tn
>  
> @@ -1134,8 +1129,7 @@ class RunQueue:
>          t1 = get_timestamp(stampfile)
>          for dep in self.rqdata.runtaskentries[tid].depends:
>              if iscurrent:
> -                (mc2, fn2, taskname2) = split_tid(dep)
> -                taskfn2 = taskfn_fromtid(dep)
> +                (mc2, fn2, taskname2, taskfn2) = split_tid_mcfn(dep)
>                  stampfile2 = bb.build.stampfile(taskname2,
> self.rqdata.dataCaches[mc2], taskfn2) stampfile3 =
> bb.build.stampfile(taskname2 + "_setscene", self.rqdata.dataCaches[mc2],
> taskfn2) t2 = get_timestamp(stampfile2)
> @@ -1247,7 +1241,7 @@ class RunQueue:
>              if not self.rqdata.taskData[''].tryaltconfigs:
>                  raise bb.runqueue.TaskFailure(self.rqexe.failed_tids)
>              for tid in self.rqexe.failed_tids:
> -                (mc, fn, tn) = split_tid(tid)
> +                (mc, fn, tn, _) = split_tid_mcfn(tid)
>                  self.rqdata.taskData[mc].fail_fn(fn)
>              self.rqdata.reset()
>  
> @@ -1297,7 +1291,7 @@ class RunQueue:
>          bb.note("Reparsing files to collect dependency data")
>          bb_cache = bb.cache.NoCache(self.cooker.databuilder)
>          for tid in self.rqdata.runtaskentries:
> -            fn = taskfn_fromtid(tid)
> +            fn = fn_from_tid(tid)
>              if fn not in done:
>                  the_data = bb_cache.loadDataFull(fn,
> self.cooker.collection.get_file_appends(fn)) done.add(fn)
> @@ -1319,8 +1313,7 @@ class RunQueue:
>          valid_new = set()
>  
>          for tid in self.rqdata.runtaskentries:
> -            (mc, fn, taskname) = split_tid(tid)
> -            taskfn = taskfn_fromtid(tid)
> +            (mc, fn, taskname, taskfn) = split_tid_mcfn(tid)
>              taskdep = self.rqdata.dataCaches[mc].task_deps[taskfn]
>  
>              if 'noexec' in taskdep and taskname in taskdep['noexec']:
> @@ -1408,7 +1401,7 @@ class RunQueue:
>  
>  
>          for tid in invalidtasks:
> -            (mc, fn, taskname) = split_tid(tid)
> +            (mc, fn, taskname, _) = split_tid_mcfn(tid)
>              pn = self.rqdata.dataCaches[mc].pkg_fn[fn]
>              h = self.rqdata.runtaskentries[tid].hash
>              matches = bb.siggen.find_siginfo(pn, taskname, [],
> self.cfgData) @@ -1512,7 +1505,7 @@ class RunQueueExecute:
>          taskdata = {}
>          taskdeps.add(task)
>          for dep in taskdeps:
> -            (mc, fn, taskname) = split_tid(dep)
> +            (mc, fn, taskname, _) = split_tid_mcfn(dep)
>              pn = self.rqdata.dataCaches[mc].pkg_fn[fn]
>              taskdata[dep] = [pn, taskname, fn]
>          call = self.rq.depvalidate + "(task, taskdata, notneeded, d)"
> @@ -1569,8 +1562,7 @@ class RunQueueExecuteTasks(RunQueueExecute):
>              tasknames = {}
>              fns = {}
>              for tid in self.rqdata.runtaskentries:
> -                (mc, fn, taskname) = split_tid(tid)
> -                taskfn = taskfn_fromtid(tid)
> +                (mc, fn, taskname, taskfn) = split_tid_mcfn(tid)
>                  taskdep = self.rqdata.dataCaches[mc].task_deps[taskfn]
>                  fns[tid] = taskfn
>                  tasknames[tid] = taskname
> @@ -1589,9 +1581,8 @@ class RunQueueExecuteTasks(RunQueueExecute):
>              covered_remove = bb.utils.better_eval(call, locs)
>  
>          def removecoveredtask(tid):
> -            (mc, fn, taskname) = split_tid(tid)
> +            (mc, fn, taskname, taskfn) = split_tid_mcfn(tid)
>              taskname = taskname + '_setscene'
> -            taskfn = taskfn_fromtid(tid)
>              bb.build.del_stamp(taskname, self.rqdata.dataCaches[mc],
> taskfn) self.rq.scenequeue_covered.remove(tid)
>  
> @@ -1617,7 +1608,7 @@ class RunQueueExecuteTasks(RunQueueExecute):
>          for mc in self.rqdata.dataCaches:
>              target_pairs = []
>              for tid in self.rqdata.target_tids:
> -                (tidmc, fn, taskname) = split_tid(tid)
> +                (tidmc, fn, taskname, _) = split_tid_mcfn(tid)
>                  if tidmc == mc:
>                      target_pairs.append((fn, taskname))
>  
> @@ -1713,7 +1704,7 @@ class RunQueueExecuteTasks(RunQueueExecute):
>          if self.rqdata.setscenewhitelist:
>              # Check tasks that are going to run against the whitelist
>              def check_norun_task(tid, showerror=False):
> -                (mc, fn, taskname) = split_tid(tid)
> +                (mc, fn, taskname, _) = split_tid_mcfn(tid)
>                  # Ignore covered tasks
>                  if tid in self.rq.scenequeue_covered:
>                      return False
> @@ -1761,8 +1752,7 @@ class RunQueueExecuteTasks(RunQueueExecute):
>  
>          task = self.sched.next()
>          if task is not None:
> -            (mc, fn, taskname) = split_tid(task)
> -            taskfn = taskfn_fromtid(task)
> +            (mc, fn, taskname, taskfn) = split_tid_mcfn(task)
>  
>              if task in self.rq.scenequeue_covered:
>                  logger.debug(2, "Setscene covered task %s", task)
> @@ -1842,8 +1832,7 @@ class RunQueueExecuteTasks(RunQueueExecute):
>          while next:
>              additional = []
>              for revdep in next:
> -                (mc, fn, taskname) = split_tid(revdep)
> -                taskfn = taskfn_fromtid(revdep)
> +                (mc, fn, taskname, taskfn) = split_tid_mcfn(revdep)
>                  pn = self.rqdata.dataCaches[mc].pkg_fn[taskfn]
>                  deps = self.rqdata.runtaskentries[revdep].depends
>                  provides = self.rqdata.dataCaches[mc].fn_provides[taskfn]
> @@ -1999,7 +1988,7 @@ class RunQueueExecuteScenequeue(RunQueueExecute):
>          # e.g. do_sometask_setscene[depends] =
> "targetname:do_someothertask_setscene" # Note that anything explicitly
> depended upon will have its reverse dependencies removed to avoid circular
> dependencies for tid in self.rqdata.runq_setscene_tids:
> -                (mc, fn, taskname) = split_tid(tid)
> +                (mc, fn, taskname, _) = split_tid_mcfn(tid)
>                  realtid = fn + ":" + taskname + "_setscene"
>                  idepends =
> self.rqdata.taskData[mc].taskentries[realtid].idepends for (depname,
> idependtask) in idepends:
> @@ -2063,8 +2052,7 @@ class RunQueueExecuteScenequeue(RunQueueExecute):
>              noexec = []
>              stamppresent = []
>              for tid in self.sq_revdeps:
> -                (mc, fn, taskname) = split_tid(tid)
> -                taskfn = taskfn_fromtid(tid)
> +                (mc, fn, taskname, taskfn) = split_tid_mcfn(tid)
>  
>                  taskdep = self.rqdata.dataCaches[mc].task_deps[fn]
>  
> @@ -2135,7 +2123,7 @@ class RunQueueExecuteScenequeue(RunQueueExecute):
>      def check_taskfail(self, task):
>          if self.rqdata.setscenewhitelist:
>              realtask = task.split('_setscene')[0]
> -            (mc, fn, taskname) = split_tid(realtask)
> +            (mc, fn, taskname, _) = split_tid_mcfn(realtask)
>              pn = self.rqdata.dataCaches[mc].pkg_fn[fn]
>              if not check_setscene_enforce_whitelist(pn, taskname,
> self.rqdata.setscenewhitelist): logger.error('Task %s.%s failed' % (pn,
> taskname + "_setscene")) @@ -2199,8 +2187,7 @@ class
> RunQueueExecuteScenequeue(RunQueueExecute): task = nexttask
>                      break
>          if task is not None:
> -            (mc, fn, taskname) = split_tid(task)
> -            taskfn = taskfn_fromtid(task)
> +            (mc, fn, taskname, taskfn) = split_tid_mcfn(task)
>              taskname = taskname + "_setscene"
>              if self.rq.check_stamp_task(task, taskname_from_tid(task),
> recurse = True, cache=self.stampcache): logger.debug(2, 'Stamp for
> underlying task %s is current, so skipping setscene variant', task)