summaryrefslogtreecommitdiff
path: root/watchdog.rb
diff options
context:
space:
mode:
Diffstat (limited to 'watchdog.rb')
-rwxr-xr-xwatchdog.rb121
1 files changed, 121 insertions, 0 deletions
diff --git a/watchdog.rb b/watchdog.rb
new file mode 100755
index 0000000..e6b82e8
--- /dev/null
+++ b/watchdog.rb
@@ -0,0 +1,121 @@
+#!/usr/bin/env ruby
+#
+# watchdog.rb: Watchdog component.
+#
+# Copyright (C) 2011 VMware, Inc.
+#
+# Permission is hereby granted, free of charge, to any person obtaining a copy
+# of this software and associated documentation files (the "Software"), to deal
+# in the Software without restriction, including without limitation the rights
+# to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
+# copies of the Software, and to permit persons to whom the Software is
+# furnished to do so, subject to the following conditions:
+#
+# The above copyright notice and this permission notice shall be included in
+# all copies or substantial portions of the Software.
+#
+# THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+# IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+# FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
+# AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+# LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+# OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
+# THE SOFTWARE.
+
+require 'thread'
+require 'ffi-rzmq'
+require 'socket'
+
+trap("INT") { exit }
+trap("TERM") { exit }
+def raise_if_error(rc)
+ unless ZMQ::Util.resultcode_ok?(rc)
+ raise "ZMQ Error: #{ZMQ::Util.error_string}"
+ end
+end
+
+if ARGV.length != 1
+ print "usage: watchdog <notifier-host>\n"
+ exit 1
+end
+
+notifier_ep = "tcp://#{ARGV[0]}:7770"
+control_ep = "tcp://*:7772"
+
+ctx = ZMQ::Context.new()
+notifier_sock = ctx.socket(ZMQ::PUB)
+rc = notifier_sock.connect(notifier_ep)
+raise_if_error(rc)
+
+control_sock = ctx.socket(ZMQ::PULL)
+rc = control_sock.bind(control_ep)
+raise_if_error(rc)
+
+poller = ZMQ::Poller.new
+poller.register_readable(control_sock)
+
+children = {}
+
+# Delete process if it went away
+trap("CHLD") do
+ pid = Process.wait
+ children.each do |k, v|
+ if v[:pid] == pid
+ children.delete(k)
+ end
+ end
+end
+
+# Kill any children on exit
+trap("TERM") do
+ children.each do |k, v|
+ Process.kill("KILL", v[:pid])
+ end
+end
+
+#
+# Main loop
+#
+loop do
+ ready = poller.poll(1)
+
+ # Process commands, if any.
+ if (ready == 1 && poller.readables.include?(control_sock))
+ msg = ''
+ rc = control_sock.recv_string(msg)
+ raise_if_error(rc)
+ if (msg =~ /^START (.+)$/)
+ command = $1
+ key = command.split()[0]
+ unless children.has_key?(key)
+ exec_command = "./" + key
+ if File.exists?(exec_command) && File.executable?(exec_command)
+ children[key] = {}
+ children[key][:pid] = Process.spawn("./" + command)
+ children[key][:command] = command
+ end
+ end
+ elsif (msg =~ /^STOP (.+)$/)
+ command = $1
+ key = command.split()[0]
+ if children.has_key?(key)
+ Process.kill("KILL", children[key][:pid])
+ while children.has_key?(key)
+ # nothing
+ end
+ end
+ end
+ # Otherwise, send out status messages to notifier.
+ else
+ status = "WATCHDOG ALIVE " + Socket.gethostname + " " + Time.now.to_f.to_s
+ rc = notifier_sock.send_string(status)
+ raise_if_error(rc)
+ children.each do |k, v|
+ status = "WATCHDOG RUNNING " + Socket.gethostname + \
+ " " + Time.now.to_f.to_s + " " + v[:command]
+ rc = notifier_sock.send_string(status)
+ raise_if_error(rc)
+ end
+ end
+end
+