diff options
Diffstat (limited to 'watchdog.rb')
-rwxr-xr-x | watchdog.rb | 121 |
1 files changed, 121 insertions, 0 deletions
diff --git a/watchdog.rb b/watchdog.rb new file mode 100755 index 0000000..e6b82e8 --- /dev/null +++ b/watchdog.rb @@ -0,0 +1,121 @@ +#!/usr/bin/env ruby +# +# watchdog.rb: Watchdog component. +# +# Copyright (C) 2011 VMware, Inc. +# +# Permission is hereby granted, free of charge, to any person obtaining a copy +# of this software and associated documentation files (the "Software"), to deal +# in the Software without restriction, including without limitation the rights +# to use, copy, modify, merge, publish, distribute, sublicense, and/or sell +# copies of the Software, and to permit persons to whom the Software is +# furnished to do so, subject to the following conditions: +# +# The above copyright notice and this permission notice shall be included in +# all copies or substantial portions of the Software. +# +# THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR +# IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, +# FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE +# AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER +# LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, +# OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN +# THE SOFTWARE. + +require 'thread' +require 'ffi-rzmq' +require 'socket' + +trap("INT") { exit } +trap("TERM") { exit } +def raise_if_error(rc) + unless ZMQ::Util.resultcode_ok?(rc) + raise "ZMQ Error: #{ZMQ::Util.error_string}" + end +end + +if ARGV.length != 1 + print "usage: watchdog <notifier-host>\n" + exit 1 +end + +notifier_ep = "tcp://#{ARGV[0]}:7770" +control_ep = "tcp://*:7772" + +ctx = ZMQ::Context.new() +notifier_sock = ctx.socket(ZMQ::PUB) +rc = notifier_sock.connect(notifier_ep) +raise_if_error(rc) + +control_sock = ctx.socket(ZMQ::PULL) +rc = control_sock.bind(control_ep) +raise_if_error(rc) + +poller = ZMQ::Poller.new +poller.register_readable(control_sock) + +children = {} + +# Delete process if it went away +trap("CHLD") do + pid = Process.wait + children.each do |k, v| + if v[:pid] == pid + children.delete(k) + end + end +end + +# Kill any children on exit +trap("TERM") do + children.each do |k, v| + Process.kill("KILL", v[:pid]) + end +end + +# +# Main loop +# +loop do + ready = poller.poll(1) + + # Process commands, if any. + if (ready == 1 && poller.readables.include?(control_sock)) + msg = '' + rc = control_sock.recv_string(msg) + raise_if_error(rc) + if (msg =~ /^START (.+)$/) + command = $1 + key = command.split()[0] + unless children.has_key?(key) + exec_command = "./" + key + if File.exists?(exec_command) && File.executable?(exec_command) + children[key] = {} + children[key][:pid] = Process.spawn("./" + command) + children[key][:command] = command + end + end + elsif (msg =~ /^STOP (.+)$/) + command = $1 + key = command.split()[0] + if children.has_key?(key) + Process.kill("KILL", children[key][:pid]) + while children.has_key?(key) + # nothing + end + end + end + # Otherwise, send out status messages to notifier. + else + status = "WATCHDOG ALIVE " + Socket.gethostname + " " + Time.now.to_f.to_s + rc = notifier_sock.send_string(status) + raise_if_error(rc) + children.each do |k, v| + status = "WATCHDOG RUNNING " + Socket.gethostname + \ + " " + Time.now.to_f.to_s + " " + v[:command] + rc = notifier_sock.send_string(status) + raise_if_error(rc) + end + end +end + |