#!/usr/bin/env ruby # # watchdog.rb: Watchdog component. # # Copyright (C) 2011 VMware, Inc. # # Permission is hereby granted, free of charge, to any person obtaining a copy # of this software and associated documentation files (the "Software"), to deal # in the Software without restriction, including without limitation the rights # to use, copy, modify, merge, publish, distribute, sublicense, and/or sell # copies of the Software, and to permit persons to whom the Software is # furnished to do so, subject to the following conditions: # # The above copyright notice and this permission notice shall be included in # all copies or substantial portions of the Software. # # THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR # IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, # FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE # AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER # LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, # OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN # THE SOFTWARE. require 'thread' require 'ffi-rzmq' require 'socket' trap("INT") { exit } trap("TERM") { exit } def raise_if_error(rc) unless ZMQ::Util.resultcode_ok?(rc) raise "ZMQ Error: #{ZMQ::Util.error_string}" end end if ARGV.length != 1 print "usage: watchdog \n" exit 1 end notifier_ep = "tcp://#{ARGV[0]}:7770" control_ep = "tcp://*:7772" ctx = ZMQ::Context.new() notifier_sock = ctx.socket(ZMQ::PUB) rc = notifier_sock.connect(notifier_ep) raise_if_error(rc) control_sock = ctx.socket(ZMQ::PULL) rc = control_sock.bind(control_ep) raise_if_error(rc) poller = ZMQ::Poller.new poller.register_readable(control_sock) children = {} # Delete process if it went away trap("CHLD") do pid = Process.wait children.each do |k, v| if v[:pid] == pid children.delete(k) end end end # Kill any children on exit trap("TERM") do children.each do |k, v| Process.kill("KILL", v[:pid]) end end # # Main loop # loop do ready = poller.poll(1) # Process commands, if any. if (ready == 1 && poller.readables.include?(control_sock)) msg = '' rc = control_sock.recv_string(msg) raise_if_error(rc) if (msg =~ /^START (.+)$/) command = $1 key = command.split()[0] unless children.has_key?(key) exec_command = "./" + key if File.exists?(exec_command) && File.executable?(exec_command) children[key] = {} children[key][:pid] = Process.spawn("./" + command) children[key][:command] = command end end elsif (msg =~ /^STOP (.+)$/) command = $1 key = command.split()[0] if children.has_key?(key) Process.kill("KILL", children[key][:pid]) while children.has_key?(key) # nothing end end end # Otherwise, send out status messages to notifier. else status = "WATCHDOG ALIVE " + Socket.gethostname + " " + Time.now.to_f.to_s rc = notifier_sock.send_string(status) raise_if_error(rc) children.each do |k, v| status = "WATCHDOG RUNNING " + Socket.gethostname + \ " " + Time.now.to_f.to_s + " " + v[:command] rc = notifier_sock.send_string(status) raise_if_error(rc) end end end