[oe-commits] [openembedded-core] 01/10: qemurunner.py: refactor searching for QEMU PID

git at git.openembedded.org git at git.openembedded.org
Mon Sep 4 19:56:06 UTC 2017


This is an automated email from the git hooks/post-receive script.

rpurdie pushed a commit to branch master-next
in repository openembedded-core.

commit 949387603f7352daecf9eeefaf2ae293f16dc374
Author: Juro Bystricky <juro.bystricky at intel.com>
AuthorDate: Sun Sep 3 11:04:27 2017 -0700

    qemurunner.py: refactor searching for QEMU PID
    
    There are occasional cases observed where QEMU hangs and tests
    are aborted because qemurunner was not able to determine PID
    of the spawned QEMU.
    
    This patch attempts to address the issue with:
    1. Refactoring the code that determines the QEMU PID
    2. In case of error displays all running processes in order to
       assist in problem analysis.
    
    While in there, also the fixes the printout of the command used
    (the command had all spaces removed).
    Some other tweaks.
    
    [YOCTO #12001]
    
    Signed-off-by: Juro Bystricky <juro.bystricky at intel.com>
    Signed-off-by: Richard Purdie <richard.purdie at linuxfoundation.org>
---
 meta/lib/oeqa/utils/qemurunner.py | 308 ++++++++++++++++++--------------------
 1 file changed, 145 insertions(+), 163 deletions(-)

diff --git a/meta/lib/oeqa/utils/qemurunner.py b/meta/lib/oeqa/utils/qemurunner.py
index 54312fb..e448676 100644
--- a/meta/lib/oeqa/utils/qemurunner.py
+++ b/meta/lib/oeqa/utils/qemurunner.py
@@ -139,7 +139,11 @@ class QemuRunner:
             logger.error("Failed to create listening socket: %s" % msg[1])
             return False
 
-        bootparams = 'console=tty1 console=ttyS0,115200n8 printk.time=1'
+        # Note that we add an extra "bootparam" qemurunner_pid. This is in order to simplify
+        # finding the spawned QEMU PID, which we will need to determine in order to communicate
+        # with the QEMU. There may be various QEMus running at this time, but we are only interrested
+        # in the one with our PID. This is perfectly harmless.
+        bootparams = 'console=tty1 console=ttyS0,115200n8 printk.time=1 qemurunner_pid='+ str(os.getpid())
         if extra_bootparams:
             bootparams = bootparams + ' ' + extra_bootparams
 
@@ -199,125 +203,14 @@ class QemuRunner:
                     self.stop()
                     logger.info("Output from runqemu:\n%s" % self.getOutput(output))
                     return False
-            time.sleep(1)
+            time.sleep(0.5)
 
-        out = self.getOutput(output)
-        netconf = False # network configuration is not required by default
-        if self.is_alive():
-            logger.info("qemu started in %s seconds - qemu procces pid is %s" % (time.time() - (endtime - self.runqemutime), self.qemupid))
-            if get_ip:
-                cmdline = ''
-                with open('/proc/%s/cmdline' % self.qemupid) as p:
-                    cmdline = p.read()
-                    # It is needed to sanitize the data received
-                    # because is possible to have control characters
-                    cmdline = re_control_char.sub('', cmdline)
-                try:
-                    ips = re.findall("((?:[0-9]{1,3}\.){3}[0-9]{1,3})", cmdline.split("ip=")[1])
-                    self.ip = ips[0]
-                    self.server_ip = ips[1]
-                    logger.info("qemu cmdline used:\n{}".format(cmdline))
-                except (IndexError, ValueError):
-                    # Try to get network configuration from runqemu output
-                    match = re.match('.*Network configuration: ([0-9.]+)::([0-9.]+):([0-9.]+)$.*',
-                                     out, re.MULTILINE|re.DOTALL)
-                    if match:
-                        self.ip, self.server_ip, self.netmask = match.groups()
-                        # network configuration is required as we couldn't get it
-                        # from the runqemu command line, so qemu doesn't run kernel
-                        # and guest networking is not configured
-                        netconf = True
-                    else:
-                        logger.error("Couldn't get ip from qemu command line and runqemu output! "
-                                     "Here is the qemu command line used:\n%s\n"
-                                     "and output from runqemu:\n%s" % (cmdline, out))
-                        self._dump_host()
-                        self.stop()
-                        return False
-
-                logger.info("Target IP: %s" % self.ip)
-                logger.info("Server IP: %s" % self.server_ip)
-
-            self.thread = LoggingThread(self.log, threadsock, logger)
-            self.thread.start()
-            if not self.thread.connection_established.wait(self.boottime):
-                logger.error("Didn't receive a console connection from qemu. "
-                             "Here is the qemu command line used:\n%s\nand "
-                             "output from runqemu:\n%s" % (cmdline, out))
-                self.stop_thread()
-                return False
-
-            logger.info("Output from runqemu:\n%s", out)
-            logger.info("Waiting at most %d seconds for login banner" % self.boottime)
-            endtime = time.time() + self.boottime
-            socklist = [self.server_socket]
-            reachedlogin = False
-            stopread = False
-            qemusock = None
-            bootlog = ''
-            data = b''
-            while time.time() < endtime and not stopread:
-                try:
-                    sread, swrite, serror = select.select(socklist, [], [], 5)
-                except InterruptedError:
-                    continue
-                for sock in sread:
-                    if sock is self.server_socket:
-                        qemusock, addr = self.server_socket.accept()
-                        qemusock.setblocking(0)
-                        socklist.append(qemusock)
-                        socklist.remove(self.server_socket)
-                        logger.info("Connection from %s:%s" % addr)
-                    else:
-                        data = data + sock.recv(1024)
-                        if data:
-                            try:
-                                data = data.decode("utf-8", errors="surrogateescape")
-                                bootlog += data
-                                data = b''
-                                if re.search(".* login:", bootlog):
-                                    self.server_socket = qemusock
-                                    stopread = True
-                                    reachedlogin = True
-                                    logger.info("Reached login banner")
-                            except UnicodeDecodeError:
-                                continue
-                        else:
-                            socklist.remove(sock)
-                            sock.close()
-                            stopread = True
-
-            if not reachedlogin:
-                logger.info("Target didn't reached login boot in %d seconds" % self.boottime)
-                lines = "\n".join(bootlog.splitlines()[-25:])
-                logger.info("Last 25 lines of text:\n%s" % lines)
-                logger.info("Check full boot log: %s" % self.logfile)
-                self._dump_host()
-                self.stop()
-                return False
-
-            # If we are not able to login the tests can continue
-            try:
-                (status, output) = self.run_serial("root\n", raw=True)
-                if re.search("root@[a-zA-Z0-9\-]+:~#", output):
-                    self.logged = True
-                    logger.info("Logged as root in serial console")
-                    if netconf:
-                        # configure guest networking
-                        cmd = "ifconfig eth0 %s netmask %s up\n" % (self.ip, self.netmask)
-                        output = self.run_serial(cmd, raw=True)[1]
-                        if re.search("root@[a-zA-Z0-9\-]+:~#", output):
-                            logger.info("configured ip address %s", self.ip)
-                        else:
-                            logger.info("Couldn't configure guest networking")
-                else:
-                    logger.info("Couldn't login into serial console"
-                            " as root using blank password")
-            except:
-                logger.info("Serial console failed while trying to login")
-
-        else:
+        if not self.is_alive():
             logger.error("Qemu pid didn't appear in %s seconds" % self.runqemutime)
+            # Dump all processes to help us to figure out what is going on...
+            ps = subprocess.Popen(['ps', 'axww', '-o', 'pid,ppid,command '], stdout=subprocess.PIPE).communicate()[0]
+            processes = ps.decode("utf-8")
+            logger.info("Running processes:\n%s" % processes)
             self._dump_host()
             self.stop()
             op = self.getOutput(output)
@@ -327,7 +220,121 @@ class QemuRunner:
                 logger.error("No output from runqemu.\n")
             return False
 
-        return self.is_alive()
+        # We are alive: qemu is running
+        out = self.getOutput(output)
+        netconf = False # network configuration is not required by default
+        logger.info("qemu started in %s seconds - qemu procces pid is %s" % (time.time() - (endtime - self.runqemutime), self.qemupid))
+        if get_ip:
+            cmdline = ''
+            with open('/proc/%s/cmdline' % self.qemupid) as p:
+                cmdline = p.read()
+                # It is needed to sanitize the data received
+                # because is possible to have control characters
+                cmdline = re_control_char.sub(' ', cmdline)
+            try:
+                ips = re.findall("((?:[0-9]{1,3}\.){3}[0-9]{1,3})", cmdline.split("ip=")[1])
+                self.ip = ips[0]
+                self.server_ip = ips[1]
+                logger.info("qemu cmdline used:\n{}".format(cmdline))
+            except (IndexError, ValueError):
+                # Try to get network configuration from runqemu output
+                match = re.match('.*Network configuration: ([0-9.]+)::([0-9.]+):([0-9.]+)$.*',
+                                 out, re.MULTILINE|re.DOTALL)
+                if match:
+                    self.ip, self.server_ip, self.netmask = match.groups()
+                    # network configuration is required as we couldn't get it
+                    # from the runqemu command line, so qemu doesn't run kernel
+                    # and guest networking is not configured
+                    netconf = True
+                else:
+                    logger.error("Couldn't get ip from qemu command line and runqemu output! "
+                                 "Here is the qemu command line used:\n%s\n"
+                                 "and output from runqemu:\n%s" % (cmdline, out))
+                    self._dump_host()
+                    self.stop()
+                    return False
+
+        logger.info("Target IP: %s" % self.ip)
+        logger.info("Server IP: %s" % self.server_ip)
+
+        self.thread = LoggingThread(self.log, threadsock, logger)
+        self.thread.start()
+        if not self.thread.connection_established.wait(self.boottime):
+            logger.error("Didn't receive a console connection from qemu. "
+                         "Here is the qemu command line used:\n%s\nand "
+                         "output from runqemu:\n%s" % (cmdline, out))
+            self.stop_thread()
+            return False
+
+        logger.info("Output from runqemu:\n%s", out)
+        logger.info("Waiting at most %d seconds for login banner" % self.boottime)
+        endtime = time.time() + self.boottime
+        socklist = [self.server_socket]
+        reachedlogin = False
+        stopread = False
+        qemusock = None
+        bootlog = ''
+        data = b''
+        while time.time() < endtime and not stopread:
+            try:
+                sread, swrite, serror = select.select(socklist, [], [], 5)
+            except InterruptedError:
+                continue
+            for sock in sread:
+                if sock is self.server_socket:
+                    qemusock, addr = self.server_socket.accept()
+                    qemusock.setblocking(0)
+                    socklist.append(qemusock)
+                    socklist.remove(self.server_socket)
+                    logger.info("Connection from %s:%s" % addr)
+                else:
+                    data = data + sock.recv(1024)
+                    if data:
+                        try:
+                            data = data.decode("utf-8", errors="surrogateescape")
+                            bootlog += data
+                            data = b''
+                            if re.search(".* login:", bootlog):
+                                self.server_socket = qemusock
+                                stopread = True
+                                reachedlogin = True
+                                logger.info("Reached login banner")
+                        except UnicodeDecodeError:
+                            continue
+                    else:
+                        socklist.remove(sock)
+                        sock.close()
+                        stopread = True
+
+        if not reachedlogin:
+            logger.info("Target didn't reached login boot in %d seconds" % self.boottime)
+            lines = "\n".join(bootlog.splitlines()[-25:])
+            logger.info("Last 25 lines of text:\n%s" % lines)
+            logger.info("Check full boot log: %s" % self.logfile)
+            self._dump_host()
+            self.stop()
+            return False
+
+        # If we are not able to login the tests can continue
+        try:
+            (status, output) = self.run_serial("root\n", raw=True)
+            if re.search("root@[a-zA-Z0-9\-]+:~#", output):
+                self.logged = True
+                logger.info("Logged as root in serial console")
+                if netconf:
+                    # configure guest networking
+                    cmd = "ifconfig eth0 %s netmask %s up\n" % (self.ip, self.netmask)
+                    output = self.run_serial(cmd, raw=True)[1]
+                    if re.search("root@[a-zA-Z0-9\-]+:~#", output):
+                        logger.info("configured ip address %s", self.ip)
+                    else:
+                        logger.info("Couldn't configure guest networking")
+                else:
+                    logger.info("Couldn't login into serial console"
+                                " as root using blank password")
+        except:
+            logger.info("Serial console failed while trying to login")
+        return True
 
     def stop(self):
         self.stop_thread()
@@ -380,55 +387,30 @@ class QemuRunner:
     def is_alive(self):
         if not self.runqemu:
             return False
-        qemu_child = self.find_child(str(self.runqemu.pid))
+        qemu_child = self.find_child()
         if qemu_child:
-            self.qemupid = qemu_child[0]
-            if os.path.exists("/proc/" + str(self.qemupid)):
+            if os.path.exists("/proc/" + str(qemu_child)):
+                self.qemupid = qemu_child
                 return True
         return False
 
-    def find_child(self,parent_pid):
-        #
-        # Walk the process tree from the process specified looking for a qemu-system. Return its [pid'cmd]
-        #
-        ps = subprocess.Popen(['ps', 'axww', '-o', 'pid,ppid,command'], stdout=subprocess.PIPE).communicate()[0]
+    def find_child(self):
+        args = 'ps axwwh -o pid,command | grep qemurunner_pid=' + str(os.getpid())
+        ps = subprocess.Popen(args, shell=True, stdout=subprocess.PIPE).communicate()[0]
         processes = ps.decode("utf-8").split('\n')
-        nfields = len(processes[0].split()) - 1
-        pids = {}
-        commands = {}
-        for row in processes[1:]:
-            data = row.split(None, nfields)
-            if len(data) != 3:
-                continue
-            if data[1] not in pids:
-                pids[data[1]] = []
-
-            pids[data[1]].append(data[0])
-            commands[data[0]] = data[2]
-
-        if parent_pid not in pids:
-            return []
-
-        parents = []
-        newparents = pids[parent_pid]
-        while newparents:
-            next = []
-            for p in newparents:
-                if p in pids:
-                    for n in pids[p]:
-                        if n not in parents and n not in next:
-                            next.append(n)
-                if p not in parents:
-                    parents.append(p)
-                    newparents = next
-        #print("Children matching %s:" % str(parents))
-        for p in parents:
-            # Need to be careful here since runqemu runs "ldd qemu-system-xxxx"
-            # Also, old versions of ldd (2.11) run "LD_XXXX qemu-system-xxxx"
-            basecmd = commands[p].split()[0]
-            basecmd = os.path.basename(basecmd)
-            if "qemu-system" in basecmd and "-serial tcp" in commands[p]:
-                return [int(p),commands[p]]
+
+        for row in processes:
+            if len(row) >= 2:
+                splitted = row.split()
+                pid = splitted[0]
+                firstarg = splitted[1]
+
+                # beware of any /bin/sh, pytho, grep,... etc commands that may also contain
+                # "qemu-system" somewhere in arguments, we are only interrested in qemu-system
+                # in the first argument
+                if "qemu-system" in firstarg:
+                    return int(pid)
+        return 0
 
     def run_serial(self, command, raw=False, timeout=5):
         # We assume target system have echo to get command status

-- 
To stop receiving notification emails like this one, please contact
the administrator of this repository.


More information about the Openembedded-commits mailing list