Browse Source

Better error handling.

Chris McDonough 17 years ago
parent
commit
07fca695d9
3 changed files with 156 additions and 55 deletions
  1. 35 10
      src/supervisor/memmon.py
  2. 46 40
      src/supervisor/tests/base.py
  3. 75 5
      src/supervisor/tests/test_memmon.py

+ 35 - 10
src/supervisor/memmon.py

@@ -67,6 +67,7 @@ memmon.py -p program1=200MB -p theprog:thegroup=100MB -g thegroup=100MB -a 1GB -
 import os
 import sys
 import time
+import xmlrpclib
 
 from supervisor import childutils
 from supervisor.datatypes import byte_size
@@ -169,24 +170,48 @@ class Memmon:
 
     def restart(self, name, rss):
         self.stderr.write('Restarting %s\n' % name)
-        self.rpc.supervisor.stopProcess(name)
-        self.rpc.supervisor.startProcess(name)
+
+        try:
+            self.rpc.supervisor.stopProcess(name)
+        except xmlrpclib.Fault, what:
+            msg = ('Failed to stop process %s (RSS %s), exiting: %s' %
+                   (name, rss, what))
+            self.stderr.write(str(msg))
+            if self.email:
+                subject = 'memmon: failed to stop process %s, exiting' % name
+                self.mail(self.email, subject, msg)
+            raise
+
+        try:
+            self.rpc.supervisor.startProcess(name)
+        except xmlrpclib.Fault, what:
+            msg = ('Failed to start process %s after stopping it, '
+                   'exiting: %s' % (name, what))
+            self.stderr.write(str(msg))
+            if self.email:
+                subject = 'memmon: failed to start process %s, exiting' % name
+                self.mail(self.email, subject, msg)
+            raise
 
         if self.email:
             now = time.asctime()
             msg = (
                 'memmon.py restarted the process named %s at %s because '
-                'it was consuming too much memory (%s bytes RSS)\n' % (
+                'it was consuming too much memory (%s bytes RSS)' % (
                 name, now, rss)
                 )
             subject = 'memmon: process %s restarted' % name
-            m = os.popen(self.sendmail, 'w')
-            m.write('To: %s\n' % self.email)
-            m.write('Subject: %s\n' % subject)
-            m.write('\n')
-            m.write(msg)
-            m.close()
-            self.mailed = True
+            self.mail(self.email, subject, msg)
+
+    def mail(self, email, subject, msg):
+        body =  'To: %s\n' % self.email
+        body += 'Subject: %s\n' % subject
+        body += '\n'
+        body += msg
+        m = os.popen(self.sendmail, 'w')
+        m.write(body)
+        m.close()
+        self.mailed = body
         
 def parse_namesize(option, value):
     try:

+ 46 - 40
src/supervisor/tests/base.py

@@ -576,6 +576,47 @@ class DummySupervisorRPCNamespace:
     _restarted = False
     _shutdown = False
 
+
+    from supervisor.process import ProcessStates
+    all_process_info = [
+        {
+        'name':'foo',
+        'group':'foo',
+        'pid':11,
+        'state':ProcessStates.RUNNING,
+        'statename':'RUNNING',
+        'start':_NOW - 100,
+        'stop':0,
+        'spawnerr':'',
+        'now':_NOW,
+        'description':'foo description',
+        },
+        {
+        'name':'bar',
+        'group':'bar',
+        'pid':12,
+        'state':ProcessStates.FATAL,
+        'statename':'FATAL',
+        'start':_NOW - 100,
+        'stop':_NOW - 50,
+        'spawnerr':'screwed',
+        'now':_NOW,
+        'description':'bar description',
+        },
+        {
+        'name':'baz_01',
+        'group':'baz',
+        'pid':12,
+        'state':ProcessStates.STOPPED,
+        'statename':'STOPPED',
+        'start':_NOW - 100,
+        'stop':_NOW - 25,
+        'spawnerr':'',
+        'now':_NOW,
+        'description':'baz description',
+        },
+        ]
+
     def getAPIVersion(self):
         return '3.0'
 
@@ -597,46 +638,7 @@ class DummySupervisorRPCNamespace:
     readProcessStderrLog = readProcessStdoutLog
 
     def getAllProcessInfo(self):
-        from supervisor.process import ProcessStates
-        return [
-            {
-            'name':'foo',
-            'group':'foo',
-            'pid':11,
-            'state':ProcessStates.RUNNING,
-            'statename':'RUNNING',
-            'start':_NOW - 100,
-            'stop':0,
-            'spawnerr':'',
-            'now':_NOW,
-            'description':'foo description',
-             },
-            {
-            'name':'bar',
-            'group':'bar',
-            'pid':12,
-            'state':ProcessStates.FATAL,
-            'statename':'FATAL',
-            'start':_NOW - 100,
-            'stop':_NOW - 50,
-            'spawnerr':'screwed',
-            'now':_NOW,
-            'description':'bar description',
-             },
-            {
-            'name':'baz_01',
-            'group':'baz',
-            'pid':12,
-            'state':ProcessStates.STOPPED,
-            'statename':'STOPPED',
-            'start':_NOW - 100,
-            'stop':_NOW - 25,
-            'spawnerr':'',
-            'now':_NOW,
-            'description':'baz description',
-             },
-            ]
-                
+        return self.all_process_info
 
     def getProcessInfo(self, name):
         from supervisor.process import ProcessStates
@@ -656,6 +658,8 @@ class DummySupervisorRPCNamespace:
     def startProcess(self, name):
         from supervisor import xmlrpc
         from xmlrpclib import Fault
+        if name == 'BAD_NAME:BAD_NAME':
+            raise Fault(xmlrpc.Faults.BAD_NAME, 'BAD_NAME:BAD_NAME')
         if name == 'BAD_NAME':
             raise Fault(xmlrpc.Faults.BAD_NAME, 'BAD_NAME')
         if name == 'ALREADY_STARTED':
@@ -703,6 +707,8 @@ class DummySupervisorRPCNamespace:
     def stopProcess(self, name):
         from supervisor import xmlrpc
         from xmlrpclib import Fault
+        if name == 'BAD_NAME:BAD_NAME':
+            raise Fault(xmlrpc.Faults.BAD_NAME, 'BAD_NAME:BAD_NAME')
         if name == 'BAD_NAME':
             raise Fault(xmlrpc.Faults.BAD_NAME, 'BAD_NAME')
         if name == 'NOT_RUNNING':

+ 75 - 5
src/supervisor/tests/test_memmon.py

@@ -6,7 +6,7 @@ class MemmonTests(unittest.TestCase):
     def _getTargetClass(self):
         from supervisor.memmon import Memmon
         return Memmon
-
+    
     def _makeOne(self, *opts):
         return self._getTargetClass()(*opts)
 
@@ -50,7 +50,13 @@ class MemmonTests(unittest.TestCase):
         self.assertEqual(lines[5], 'RSS of baz:baz_01 is 2265088')
         self.assertEqual(lines[6], 'Restarting baz:baz_01')
         self.assertEqual(lines[7], '')
-        self.assertEqual(memmon.mailed, True)
+        mailed = memmon.mailed.split('\n')
+        self.assertEqual(len(mailed), 4)
+        self.assertEqual(mailed[0], 'To: chrism@plope.com')
+        self.assertEqual(mailed[1],
+                         'Subject: memmon: process baz:baz_01 restarted')
+        self.assertEqual(mailed[2], '')
+        self.failUnless(mailed[3].startswith('memmon.py restarted'))
 
     def test_runforever_tick_groups(self):
         programs = {}
@@ -66,7 +72,13 @@ class MemmonTests(unittest.TestCase):
         self.assertEqual(lines[1], 'RSS of foo:foo is 2264064')
         self.assertEqual(lines[2], 'Restarting foo:foo')
         self.assertEqual(lines[3], '')
-        self.assertEqual(memmon.mailed, True)
+        mailed = memmon.mailed.split('\n')
+        self.assertEqual(len(mailed), 4)
+        self.assertEqual(mailed[0], 'To: chrism@plope.com')
+        self.assertEqual(mailed[1],
+          'Subject: memmon: process foo:foo restarted')
+        self.assertEqual(mailed[2], '')
+        self.failUnless(mailed[3].startswith('memmon.py restarted'))
 
     def test_runforever_tick_any(self):
         programs = {}
@@ -86,7 +98,8 @@ class MemmonTests(unittest.TestCase):
         self.assertEqual(lines[5], 'RSS of baz:baz_01 is 2265088')
         self.assertEqual(lines[6], 'Restarting baz:baz_01')
         self.assertEqual(lines[7], '')
-        self.assertEqual(memmon.mailed, True)
+        mailed = memmon.mailed.split('\n')
+        self.assertEqual(len(mailed), 4)
 
     def test_runforever_tick_programs_and_groups(self):
         programs = {'baz_01':0}
@@ -105,7 +118,13 @@ class MemmonTests(unittest.TestCase):
         self.assertEqual(lines[4], 'RSS of baz:baz_01 is 2265088')
         self.assertEqual(lines[5], 'Restarting baz:baz_01')
         self.assertEqual(lines[6], '')
-        self.assertEqual(memmon.mailed, True)
+        mailed = memmon.mailed.split('\n')
+        self.assertEqual(len(mailed), 4)
+        self.assertEqual(mailed[0], 'To: chrism@plope.com')
+        self.assertEqual(mailed[1],
+                         'Subject: memmon: process baz:baz_01 restarted')
+        self.assertEqual(mailed[2], '')
+        self.failUnless(mailed[3].startswith('memmon.py restarted'))
 
     def test_runforever_tick_programs_norestart(self):
         programs = {'foo': sys.maxint}
@@ -122,6 +141,57 @@ class MemmonTests(unittest.TestCase):
         self.assertEqual(lines[2], '')
         self.assertEqual(memmon.mailed, False)
 
+    def test_stopprocess_fault_tick_programs_norestart(self):
+        programs = {'foo': sys.maxint}
+        groups = {}
+        any = None
+        memmon = self._makeOnePopulated(programs, groups, any)
+        memmon.stdin.write('eventname:TICK len:0\n')
+        memmon.stdin.seek(0)
+        memmon.runforever(test=True)
+        lines = memmon.stderr.getvalue().split('\n')
+        self.assertEqual(len(lines), 3)
+        self.assertEqual(lines[0], 'Checking programs foo=%s' % sys.maxint)
+        self.assertEqual(lines[1], 'RSS of foo:foo is 2264064')
+        self.assertEqual(lines[2], '')
+        self.assertEqual(memmon.mailed, False)
+
+    def test_stopprocess_fails_to_stop(self):
+        programs = {'BAD_NAME': 0}
+        groups = {}
+        any = None
+        memmon = self._makeOnePopulated(programs, groups, any)
+        memmon.stdin.write('eventname:TICK len:0\n')
+        memmon.stdin.seek(0)
+        from supervisor.process import ProcessStates
+        memmon.rpc.supervisor.all_process_info =  [ {
+            'name':'BAD_NAME',
+            'group':'BAD_NAME',
+            'pid':11,
+            'state':ProcessStates.RUNNING,
+            'statename':'RUNNING',
+            'start':0,
+            'stop':0,
+            'spawnerr':'',
+            'now':0,
+            'description':'BAD_NAME description',
+             } ]
+        import xmlrpclib
+        self.assertRaises(xmlrpclib.Fault, memmon.runforever, True)
+        lines = memmon.stderr.getvalue().split('\n')
+        self.assertEqual(len(lines), 4)
+        self.assertEqual(lines[0], 'Checking programs BAD_NAME=%s' % 0)
+        self.assertEqual(lines[1], 'RSS of BAD_NAME:BAD_NAME is 2264064')
+        self.assertEqual(lines[2], 'Restarting BAD_NAME:BAD_NAME')
+        self.failUnless(lines[3].startswith('Failed'))
+        mailed = memmon.mailed.split('\n')
+        self.assertEqual(len(mailed), 4)
+        self.assertEqual(mailed[0], 'To: chrism@plope.com')
+        self.assertEqual(mailed[1],
+          'Subject: memmon: failed to stop process BAD_NAME:BAD_NAME, exiting')
+        self.assertEqual(mailed[2], '')
+        self.failUnless(mailed[3].startswith('Failed'))
+
 def test_suite():
     return unittest.findTestCases(sys.modules[__name__])