Sfoglia il codice sorgente

-Catch SIGQUIT.

- Get rid of per-process pidhistory and keep pidhistory around in options.

- Rejigger logic in start_necessary to use get_state to determine state.

- Log less and log better messages.

- use os.setpgrp() instead of os.setsid() to prevent child processes from
  receiving parent signals.
Chris McDonough 19 anni fa
parent
commit
8423d00656
1 ha cambiato i file con 61 aggiunte e 46 eliminazioni
  1. 61 46
      src/supervisor/supervisord.py

+ 61 - 46
src/supervisor/supervisord.py

@@ -95,6 +95,7 @@ class Subprocess:
 
     # Initial state; overridden by instance variables
     pid = 0 # Subprocess pid; 0 when not running
+    beenstarted = False # true if has been started at least once
     laststart = 0 # Last time the subprocess was started; 0 if never
     laststop = 0  # Last time the subprocess was stopped; 0 if never
     delay = 0 # If nonzero, delay starting or killing until this time
@@ -120,7 +121,6 @@ class Subprocess:
         """
         self.options = options
         self.config = config
-        self.pidhistory = []
         self.readbuffer = ""
         if config.logfile:
             self.childlog = options.getLogger(config.logfile, 10,
@@ -169,7 +169,7 @@ class Subprocess:
 
     def record_spawnerr(self, msg):
         self.spawnerr = msg
-        self.options.logger.critical(msg)
+        self.options.logger.critical("spawnerr: %s" % msg)
         self.do_backoff()
         self.governor()
 
@@ -183,6 +183,7 @@ class Subprocess:
             self.options.logger.critical(msg)
             return
 
+        self.beenstarted = True
         self.killing = 0
         self.spawnerr = None
         self.exitstatus = None
@@ -273,16 +274,25 @@ class Subprocess:
             os.close(child_stdin)
             os.close(child_stdout)
             os.close(child_stderr)
-            self.options.logger.info('spawned process %r with pid %s' % (
+            self.options.logger.info('spawned: %r with pid %s' % (
                 self.config.name, pid))
             self.spawnerr = None
             self.do_backoff()
+            self.options.pidhistory[pid] = self
             return pid
         
         else:
             # Child
             try:
-                os.setsid()
+                # prevent child from receiving signals sent to the
+                # parent by calling os.setpgrp to create a new process
+                # group for the child; this prevents, for instance,
+                # the case of child processes being sent a SIGINT when
+                # running supervisor in foreground mode and Ctrl-C in
+                # the terminal window running supervisord is pressed.
+                # Presumably it also prevents HUP, etc received by
+                # supervisord from being sent to children.
+                os.setpgrp()
                 os.dup2(child_stdin, 0)
                 os.dup2(child_stdout, 1)
                 os.dup2(child_stderr, 2)
@@ -325,11 +335,10 @@ class Subprocess:
         if not self.pid:
             return "no subprocess running"
         try:
-            self.options.logger.info('killing %s (%s)' % (self.config.name,
-                                                          self.pid))
+            self.options.logger.debug('killing %s (pid %s)' % (self.config.name,
+                                                               self.pid))
             self.killing = 1
             os.kill(self.pid, sig)
-            self.addpidtohistory(self.pid)
         except:
             io = StringIO.StringIO()
             traceback.print_exc(file=io)
@@ -343,16 +352,6 @@ class Subprocess:
             
         return None
 
-    def addpidtohistory(self, pid):
-        self.pidhistory.append(pid)
-        if len(self.pidhistory) > 10: # max pid history to keep around is 10
-            self.pidhistory.pop(0)
-
-    def isoneofmypids(self, pid):
-        if pid == self.pid:
-            return True
-        return pid in self.pidhistory
-
     def governor(self):
         # Back off if respawning too frequently
         now = time.time()
@@ -366,7 +365,7 @@ class Subprocess:
                     self.backoff = self.options.backofflimit
                 else:
                     self.options.logger.critical(
-                        "%s: restarting too frequently; quit" % (
+                        "stopped: %s (restarting too frequently)" % (
                         self.config.name))
                     # stop trying
                     self.system_stop = 1
@@ -374,8 +373,9 @@ class Subprocess:
                     self.delay = 0
                     return
             self.options.logger.info(
-                "%s: sleep %s to avoid rapid restarts" % (self.config.name,
-                                                          self.backoff))
+                "backoff: %s (avoid rapid restarts %s)" % (
+                self.config.name,
+                self.backoff))
             self.delay = now + self.backoff
         else:
             # Reset the backoff timer
@@ -389,9 +389,10 @@ class Subprocess:
         pid, sts = self.waitstatus
         self.waitstatus = None
         es, msg = decode_wait_status(sts)
-        msg = "pid %d: " % pid + msg
-        if not self.isoneofmypids(pid):
-            msg = "unknown " + msg
+        process = self.options.pidhistory.get(pid)
+
+        if process is not self:
+            msg = "stopped: unknown " + msg
             self.options.logger.warn(msg)
         else:
             if self.killing:
@@ -400,15 +401,19 @@ class Subprocess:
             elif not es in self.config.exitcodes:
                 self.governor()
 
-            if self.pid:
-                self.addpidtohistory(self.pid)
             self.pid = 0
-            
             self.stdoutfd = self.stderrfd  = self.stdinfd = None
             self.stdout = self.stderr = self.stdin = None
+            processname = process.config.name
 
             if es in self.config.exitcodes and not self.killing:
-                msg = msg + "; OK"
+                msg = "exited: %s (%s)" % (processname,
+                                           msg + "; expected")
+            elif es != -1:
+                msg = "exited: %s (%s)" % (processname,
+                                           msg + "; not expected")
+            else:
+                msg = "killed: %s (%s)" % (processname, msg)
             self.options.logger.info(msg)
             self.exitstatus = es
         self.reportstatusmsg = msg
@@ -448,7 +453,7 @@ class Subprocess:
             return ProcessStates.STOPPING
         elif self.system_stop:
             return ProcessStates.ERROR
-        if self.administrative_stop:
+        elif self.administrative_stop:
             return ProcessStates.STOPPED
         elif not self.pid and self.delay:
             return ProcessStates.STARTING
@@ -459,7 +464,7 @@ class Subprocess:
                 return ProcessStates.KILLED
             elif self.exitstatus is not None:
                 return ProcessStates.EXITED
-            elif not self.pidhistory:
+            elif not self.beenstarted:
                 return ProcessStates.NOTSTARTED
             else:
                 return ProcessStates.UNKNOWN
@@ -517,17 +522,18 @@ class Supervisor:
         processes.sort() # asc by priority
 
         for p in processes:
-            if not p.pid and not p.delay:
-                if not p.pidhistory:
+            state = p.get_state()
+            if state not in (ProcessStates.STOPPED, ProcessStates.ERROR,
+                             ProcessStates.RUNNING, ProcessStates.STOPPING,
+                             ProcessStates.STARTING):
+                if state == ProcessStates.NOTSTARTED:
                     case = p.config.autostart
                 else:
                     case = p.config.autorestart
-                if case:
-                    if not p.administrative_stop and not p.system_stop:
-                        self.options.logger.info('(Re)starting %s' %
-                                                 p.config.name)
-                        p.spawn()
 
+                if case:
+                    p.spawn()
+            
     def handle_procs_with_waitstatus(self):
         processes = self.processes.values()
         for proc in processes:
@@ -577,21 +583,28 @@ class Supervisor:
                 self.options.logger.debug('EINTR during reap')
             pid, sts = None, None
         if pid:
-            self.options.logger.info('child with pid %s was reaped' % pid)
+            name = '<unknown>'
+            process = self.options.pidhistory.get(pid)
+            if process:
+                name = process.config.name
+            self.options.logger.debug('reaped %s (pid %s)' % (name ,pid))
             self.setwaitstatus(pid, sts)
             self.reap() # keep reaping until no more kids to reap
         return pid, sts
 
     def setwaitstatus(self, pid, sts):
         self.options.logger.debug('setwaitstatus called')
-        for name in self.processes.keys():
-            proc = self.processes[name]
-            if proc.isoneofmypids(pid):
-                self.options.logger.debug('set wait status on %s' % name)
-                proc.finaloutput = _readfd(proc.stdoutfd)
-                proc.waitstatus = pid, sts
-                proc.killing = 0
-                proc.laststop = time.time()
+        proc = self.options.pidhistory.get(pid)
+        if proc is None:
+            # this should never happen
+            self.options.logger.critical('cannot set wait status on pid %s'
+                                         % pid)
+            return
+        self.options.logger.debug('set wait status on %s' % proc.config.name)
+        proc.finaloutput = _readfd(proc.stdoutfd)
+        proc.waitstatus = pid, sts
+        proc.killing = 0
+        proc.laststop = time.time()
 
     def cleanup_fds(self):
         # try to close any unused file descriptors to prevent leakage.
@@ -728,8 +741,9 @@ class Supervisor:
 
     def setsignals(self):
         signal.signal(signal.SIGTERM, self.sigexit)
-        signal.signal(signal.SIGHUP, self.sighup)
         signal.signal(signal.SIGINT, self.sigexit)
+        signal.signal(signal.SIGQUIT, self.sigexit)
+        signal.signal(signal.SIGHUP, self.sighup)
         signal.signal(signal.SIGCHLD, self.sigchild)
         signal.signal(signal.SIGUSR2, self.sigreopenlog)
 
@@ -891,6 +905,7 @@ class Supervisor:
 
             self.handle_procs_with_delay()
             self.reap()
+
             if self.mustreopen:
                 self.logreopen()
                 self.mustreopen = False