oeqa/logparser: Fix performance issues with ptest log parsing

author Richard Purdie <richard.purdie@linuxfoundation.org>

Thu, 20 Feb 2020 13:57:50 +0000 (13:57 +0000)

committer Richard Purdie <richard.purdie@linuxfoundation.org>

Fri, 21 Feb 2020 09:37:04 +0000 (09:37 +0000)
author Richard Purdie <richard.purdie@linuxfoundation.org>
Thu, 20 Feb 2020 13:57:50 +0000 (13:57 +0000)
committer Richard Purdie <richard.purdie@linuxfoundation.org>
Fri, 21 Feb 2020 09:37:04 +0000 (09:37 +0000)
diff --git a/meta/lib/oeqa/utils/logparser.py b/meta/lib/oeqa/utils/logparser.py

index 7313df8ec35b04863f073a4d5ddc15f91f721a74..540372107366940b44f56f589826eeedd3f1d01a 100644 (file)
--- a/meta/lib/oeqa/utils/logparser.py
+++ b/meta/lib/oeqa/utils/logparser.py
@@ -25,13 +25,20 @@ class PtestParser(object):
          section_regex['exitcode'] = re.compile(r"^ERROR: Exit status is (.+)")
          section_regex['timeout'] = re.compile(r"^TIMEOUT: .*/(.+)/ptest")
  
+        # Cache markers so we don't take the re.search() hit all the time.
+        markers = ("PASSED", "FAILED", "SKIPPED", "BEGIN:", "END:", "DURATION:", "ERROR: Exit", "TIMEOUT:")
+
          def newsection():
-            return { 'name': "No-section", 'log': "" }
+            return { 'name': "No-section", 'log': [] }
  
          current_section = newsection()
  
          with open(logfile, errors='replace') as f:
              for line in f:
+                if not line.startswith(markers):
+                    current_section['log'].append(line)
+                    continue
+
                  result = section_regex['begin'].search(line)
                  if result:
                      current_section['name'] = result.group(1)
@@ -61,7 +68,7 @@ class PtestParser(object):
                          current_section[t] = result.group(1)
                          continue
  
-                current_section['log'] = current_section['log'] + line 
+                current_section['log'].append(line)
  
                  for t in test_regex:
                      result = test_regex[t].search(line)
@@ -70,6 +77,11 @@ class PtestParser(object):
                              self.results[current_section['name']] = {}
                          self.results[current_section['name']][result.group(1).strip()] = t
  
+        # Python performance for repeatedly joining long strings is poor, do it all at once at the end.
+        # For 2.1 million lines in a log this reduces 18 hours to 12s.
+        for section in self.sections:
+            self.sections[section]['log'] = "".join(self.sections[section]['log'])
+
          return self.results, self.sections
  
      # Log the results as files. The file name is the section name and the contents are the tests in that section.
author	Richard Purdie <richard.purdie@linuxfoundation.org>
	Thu, 20 Feb 2020 13:57:50 +0000 (13:57 +0000)
committer	Richard Purdie <richard.purdie@linuxfoundation.org>
	Fri, 21 Feb 2020 09:37:04 +0000 (09:37 +0000)