[bitbake-devel] [PATCH] codeparser/data_smart: Optimise parsing speed
Richard Purdie
richard.purdie at linuxfoundation.org
Tue Dec 3 12:10:05 UTC 2013
The previous "contains" changes caused a ~3% parsing speed impact.
Looking at the cause of those changes was interesting:
* Use of defaultdict was slower than just checking for missing entries
and setting them when needed.
* Even the "import collections" adversely affects parsing speed
* There was a missing intern function for the contains cache data
* Setting up a log object for each variable has noticeable overhead
due to the changes in the code paths uses, we can avoid this.
* We can call getVarFlag on "_content" directly within VariableParse
for a noticeable speed gain since its a seriously hot code path.
This patch therefore tweaks the code based on the above observations to
get some of the speed back.
Signed-off-by: Richard Purdie <richard.purdie at linuxfoundation.org>
---
diff --git a/bitbake/lib/bb/codeparser.py b/bitbake/lib/bb/codeparser.py
index 6e34eff..62b6cf9 100644
--- a/bitbake/lib/bb/codeparser.py
+++ b/bitbake/lib/bb/codeparser.py
@@ -1,7 +1,6 @@
import ast
import codegen
import logging
-import collections
import os.path
import bb.utils, bb.data
from itertools import chain
@@ -65,6 +64,8 @@ class CodeParserCache(MultiProcessCache):
for h in data[0]:
data[0][h]["refs"] = self.internSet(data[0][h]["refs"])
data[0][h]["execs"] = self.internSet(data[0][h]["execs"])
+ for k in data[0][h]["contains"]:
+ data[0][h]["contains"][k] = self.internSet(data[0][h]["contains"][k])
for h in data[1]:
data[1][h]["execs"] = self.internSet(data[1][h]["execs"])
return
@@ -125,6 +126,8 @@ class PythonParser():
if isinstance(node.args[0], ast.Str):
varname = node.args[0].s
if name in self.containsfuncs and isinstance(node.args[1], ast.Str):
+ if varname not in self.contains:
+ self.contains[varname] = set()
self.contains[varname].add(node.args[1].s)
else:
self.references.add(node.args[0].s)
@@ -153,10 +156,10 @@ class PythonParser():
def __init__(self, name, log):
self.var_execs = set()
- self.contains = collections.defaultdict(set)
+ self.contains = {}
self.execs = set()
self.references = set()
- self.log = BufferedLogger('BitBake.Data.%s' % name, logging.DEBUG, log)
+ self.log = BufferedLogger('BitBake.Data.PythonParser', logging.DEBUG, log)
self.unhandled_message = "in call of %s, argument '%s' is not a string literal"
self.unhandled_message = "while parsing %s, %s" % (name, self.unhandled_message)
diff --git a/bitbake/lib/bb/data_smart.py b/bitbake/lib/bb/data_smart.py
index 9a6f767..833d9f1 100644
--- a/bitbake/lib/bb/data_smart.py
+++ b/bitbake/lib/bb/data_smart.py
@@ -35,7 +35,6 @@ import hashlib
import bb, bb.codeparser
from bb import utils
from bb.COW import COWDictBase
-import collections
logger = logging.getLogger("BitBake.Data")
@@ -89,7 +88,7 @@ class VariableParse:
self.references = set()
self.execs = set()
- self.contains = collections.defaultdict(set)
+ self.contains = {}
def var_sub(self, match):
key = match.group()[2:-1]
@@ -100,7 +99,7 @@ class VariableParse:
varparse = self.d.expand_cache[key]
var = varparse.value
else:
- var = self.d.getVar(key, True)
+ var = self.d.getVarFlag(key, "_content", True)
self.references.add(key)
if var is not None:
return var
@@ -123,7 +122,10 @@ class VariableParse:
self.execs |= parser.execs
for k in parser.contains:
- self.contains[k].update(parser.contains[k])
+ if k not in self.contains:
+ self.contains[k] = parser.contains[k]
+ else:
+ self.contains[k].update(parser.contains[k])
value = utils.better_eval(codeobj, DataContext(self.d))
return str(value)
More information about the bitbake-devel
mailing list