[oe-commits] [openembedded-core] 24/26: oeqa/logparser: Fix performance issues with ptest log parsing
git at git.openembedded.org
git at git.openembedded.org
Fri Feb 21 09:39:44 UTC 2020
This is an automated email from the git hooks/post-receive script.
rpurdie pushed a commit to branch master
in repository openembedded-core.
commit 01c9f40b979e36a53e789a6bedd89b6d9557dce3
Author: Richard Purdie <richard.purdie at linuxfoundation.org>
AuthorDate: Thu Feb 20 13:57:50 2020 +0000
oeqa/logparser: Fix performance issues with ptest log parsing
On the autobuilder a ptest log with 2.1 million lines took around 18 hours to
process. This is clearly crazy.
We can tweak the processing code to:
a) Stop repeatedly joining large strings together (append to a list instead)
b) Use one startswith expression instead of multiple re.search() operations
With this change it takes 18 hours down to around 12s.
[YOCTO #13696]
Signed-off-by: Richard Purdie <richard.purdie at linuxfoundation.org>
---
meta/lib/oeqa/utils/logparser.py | 16 ++++++++++++++--
1 file changed, 14 insertions(+), 2 deletions(-)
diff --git a/meta/lib/oeqa/utils/logparser.py b/meta/lib/oeqa/utils/logparser.py
index 7313df8..5403721 100644
--- a/meta/lib/oeqa/utils/logparser.py
+++ b/meta/lib/oeqa/utils/logparser.py
@@ -25,13 +25,20 @@ class PtestParser(object):
section_regex['exitcode'] = re.compile(r"^ERROR: Exit status is (.+)")
section_regex['timeout'] = re.compile(r"^TIMEOUT: .*/(.+)/ptest")
+ # Cache markers so we don't take the re.search() hit all the time.
+ markers = ("PASSED", "FAILED", "SKIPPED", "BEGIN:", "END:", "DURATION:", "ERROR: Exit", "TIMEOUT:")
+
def newsection():
- return { 'name': "No-section", 'log': "" }
+ return { 'name': "No-section", 'log': [] }
current_section = newsection()
with open(logfile, errors='replace') as f:
for line in f:
+ if not line.startswith(markers):
+ current_section['log'].append(line)
+ continue
+
result = section_regex['begin'].search(line)
if result:
current_section['name'] = result.group(1)
@@ -61,7 +68,7 @@ class PtestParser(object):
current_section[t] = result.group(1)
continue
- current_section['log'] = current_section['log'] + line
+ current_section['log'].append(line)
for t in test_regex:
result = test_regex[t].search(line)
@@ -70,6 +77,11 @@ class PtestParser(object):
self.results[current_section['name']] = {}
self.results[current_section['name']][result.group(1).strip()] = t
+ # Python performance for repeatedly joining long strings is poor, do it all at once at the end.
+ # For 2.1 million lines in a log this reduces 18 hours to 12s.
+ for section in self.sections:
+ self.sections[section]['log'] = "".join(self.sections[section]['log'])
+
return self.results, self.sections
# Log the results as files. The file name is the section name and the contents are the tests in that section.
--
To stop receiving notification emails like this one, please contact
the administrator of this repository.
More information about the Openembedded-commits
mailing list