Ticket #411: 0008-Adding-keepalives-to-the-bitten-client-server-protoc.patch
| File 0008-Adding-keepalives-to-the-bitten-client-server-protoc.patch, 9.8 KB (added by wbell, 3 years ago) |
|---|
-
bitten/__init__.py
From 8e3bf0521b11f9648f566bad52dd1f875b3e61ca Mon Sep 17 00:00:00 2001 From: Walter W. Bell <wwb2@cornell.edu> Date: Wed, 21 Apr 2010 21:33:19 -0400 Subject: [PATCH 8/8] Adding keepalives to the bitten client/server protocol. Keepalives now keep builds running as long as the slave is up, rather than having them time out independently of if the slave is working on them. This requires an update of both the master and slave-- I've updated the PROTOCOL_VERSION field as older masters would kill builds for newer slaves that attempted to keepalive. Closes #411. --- bitten/__init__.py | 2 +- bitten/master.py | 18 +++++++++++- bitten/slave.py | 76 +++++++++++++++++++++++++++++++++++++++++++++++++--- 3 files changed, 90 insertions(+), 6 deletions(-) diff --git a/bitten/__init__.py b/bitten/__init__.py index 50e5f75..670f083 100644
a b 19 19 pass 20 20 21 21 # The master-slave protocol/configuration version 22 PROTOCOL_VERSION = 322 PROTOCOL_VERSION = 4 -
bitten/master.py
diff --git a/bitten/master.py b/bitten/master.py index d6bbcd9..24cbe52 100644
a b 128 128 129 129 if req.args['collection'] == 'steps': 130 130 return self._process_build_step(req, config, build) 131 elif req.args['collection'] == 'keepalive': 132 return self._process_keepalive(req, config, build) 131 133 else: 132 134 self._send_error(req, HTTP_NOT_FOUND, 133 135 "No such collection '%s'" % req.args['collection']) … … 162 164 self.log.error('Error parsing build initialization request: %s', e, 163 165 exc_info=True) 164 166 self._send_error(req, HTTP_BAD_REQUEST, 'XML parser error') 165 167 166 168 slave_version = int(elem.attr.get('version', 1)) 167 169 if slave_version != PROTOCOL_VERSION: 168 170 self._send_error(req, HTTP_BAD_REQUEST, … … 399 401 'Location': req.abs_href.builds( 400 402 build.id, 'steps', stepname)}) 401 403 404 def _process_keepalive(self, req, config, build): 405 build.last_activity = int(time.time()) 406 build.update() 407 408 self.log.info('Slave %s build %d keepalive ("%s" as of [%s])', 409 build.slave, build.id, build.config, build.rev) 410 411 body = 'Keepalive processed' 412 self._send_response(req, 200, body, { 413 'Content-Type': 'text/plain', 414 'Content-Length': str(len(body)), 415 'Location': req.abs_href.builds( 416 build.id, 'keepalive')}) 417 402 418 def _start_new_step(self, build, stepname): 403 419 """Creates the in-memory representation for a newly started 404 420 step, ready to be persisted to the database. -
bitten/slave.py
diff --git a/bitten/slave.py b/bitten/slave.py index 04a8d0f..cb960b0 100755
a b 24 24 import time 25 25 import re 26 26 import cookielib 27 import threading 28 import os 27 29 from ConfigParser import MissingSectionHeaderError 28 30 29 31 from bitten import PROTOCOL_VERSION … … 71 73 self.method = self.has_data() and 'POST' or 'GET' 72 74 return self.method 73 75 76 class KeepAliveThread(threading.Thread): 77 "A thread to periodically send keep-alive messages to the master" 78 79 def __init__(self, opener, build_url, single_build, keepalive_interval): 80 threading.Thread.__init__(self, None, None, "KeepaliveThread") 81 self.build_url = build_url 82 self.keepalive_interval = keepalive_interval 83 self.single_build = single_build 84 self.last_keepalive = int(time.time()) 85 self.kill = False 86 self.opener = opener 87 88 def keepalive(self): 89 log.debug('Sending keepalive') 90 method = 'POST' 91 url = self.build_url + '/keepalive/' 92 body = None 93 shutdown = False 94 headers = { 95 'Content-Type': 'application/x-bitten+xml' 96 } 97 98 log.debug('Sending %s request to %r', method, url) 99 req = SaneHTTPRequest(method, url, body, headers or {}) 100 try: 101 return self.opener.open(req) 102 except urllib2.HTTPError, e: 103 # a conflict error lets us know that we've been 104 # invalidated. Ideally, we'd engineer something to stop any 105 # running steps in progress, but killing threads is tricky 106 # stuff. For now, we'll wait for whatever's going 107 # on to stop, and the main thread'll figure out that we've 108 # been invalidated. 109 log.warning('Server returned keepalive error %d: %s', e.code, e.msg) 110 except: 111 log.warning('Server returned unknown keepalive error') 112 113 def run(self): 114 log.debug('Keepalive thread starting.') 115 while (not self.kill): 116 now = int(time.time()) 117 if (self.last_keepalive + self.keepalive_interval) < now: 118 self.keepalive() 119 self.last_keepalive = now 120 121 time.sleep(1) 122 log.debug('Keepalive thread exiting.') 123 124 def stop(self): 125 log.debug('Stopping keepalive thread') 126 self.kill = True 127 self.join(30) 128 log.debug('Keepalive thread stopped') 129 74 130 75 131 class BuildSlave(object): 76 132 """HTTP client implementation for the build slave.""" … … 78 134 def __init__(self, urls, name=None, config=None, dry_run=False, 79 135 work_dir=None, build_dir="build_${build}", 80 136 keep_files=False, single_build=False, 81 poll_interval=300, username=None, password=None, 137 poll_interval=300, keepalive_interval = 60, 138 username=None, password=None, 82 139 dump_reports=False, no_loop=False, form_auth=False): 83 140 """Create the build slave instance. 84 141 … … 98 155 :param poll_interval: the time in seconds to wait between requesting 99 156 builds from the build master (default is five 100 157 minutes) 158 :param keep_alive_interval: the time in seconds to wait between sending 159 keepalive heartbeats (default is 30 seconds) 101 160 :param username: the username to use when authentication against the 102 161 build master is requested 103 162 :param password: the password to use when authentication is needed … … 127 186 self.single_build = single_build 128 187 self.no_loop = no_loop 129 188 self.poll_interval = poll_interval 189 self.keepalive_interval = keepalive_interval 130 190 self.dump_reports = dump_reports 131 191 self.cookiejar = cookielib.CookieJar() 132 192 self.username = username \ … … 169 229 ).startswith('text/plain'): 170 230 content = e.read() 171 231 else: 172 content = 'Unknown cause of error' 173 e.msg = '%s (%s)' % (e.msg, content) 232 content = 'no message available' 233 log.debug('Server returned error %d: %s (%s)', 234 e.code, e.msg, content) 174 235 raise 175 236 return e 176 237 … … 294 355 build_id = build_url and int(build_url.split('/')[-1]) or 0 295 356 xml = xmlio.parse(fileobj) 296 357 basedir = '' 358 keepalive_thread = KeepAliveThread(self.opener, build_url, self.single_build, self.keepalive_interval) 297 359 try: 360 if not self.local: 361 keepalive_thread.start() 298 362 recipe = Recipe(xml, os.path.join(self.work_dir, self.build_dir), 299 363 self.config) 300 364 basedir = recipe.ctxt.basedir … … 316 380 if self.dry_run: 317 381 self._cancel_build(build_url) 318 382 finally: 383 keepalive_thread.stop() 319 384 if not self.keep_files and os.path.isdir(basedir): 320 385 log.debug('Removing build directory %s' % basedir) 321 386 _rmtree(basedir) … … 431 496 help='don\'t report results back to master') 432 497 group.add_option('-i', '--interval', dest='interval', metavar='SECONDS', 433 498 type='int', help='time to wait between requesting builds') 499 group.add_option('-b', '--keepalive_interval', dest='keepalive_interval', metavar='SECONDS', type='int', help='time to wait between keepalive heartbeats') 434 500 group = parser.add_option_group('logging') 435 501 group.add_option('-l', '--log', dest='logfile', metavar='FILENAME', 436 502 help='write log messages to FILENAME') … … 443 509 444 510 parser.set_defaults(dry_run=False, keep_files=False, 445 511 loglevel=logging.INFO, single_build=False, no_loop=False, 446 dump_reports=False, interval=300, form_auth=False) 512 dump_reports=False, interval=300, keepalive_interval=60, 513 form_auth=False) 447 514 options, args = parser.parse_args() 448 515 449 516 if len(args) < 1: … … 477 544 single_build=options.single_build, 478 545 no_loop=options.no_loop, 479 546 poll_interval=options.interval, 547 keepalive_interval=options.keepalive_interval, 480 548 username=options.username, password=options.password, 481 549 dump_reports=options.dump_reports, 482 550 form_auth=options.form_auth)
