[Date Prev][Date Next][Thread Prev][Thread Next][Date Index][Thread Index]
Re: [Condor-devel] LSB-ish init script for Condor
- Date: Fri, 15 Feb 2008 15:25:59 -0600
- From: Matthew Farrellee <matt@xxxxxxxxxxx>
- Subject: Re: [Condor-devel] LSB-ish init script for Condor
Todd Tannenbaum wrote:
Dan Bradley wrote:
I think it would be better to send the master SIGHUP rather than using
condor_reconfig. The standard security settings do not allow
condor_reconfig to work on most nodes, and there is no error checking
on the client side, so the user would not know that the operation
failed. Of course, those problems should be fixed too, but for the
stable series, I think SIGHUP is better.
All in all, I agree with Dan. On Unix the only advantage of using
condor_reconfig in this script would be you could do condor_reconfig -full.
I think this would be ok to put in 7.0.x.
regards,
Todd
Attached is a new patch, that I plan on committing. It sends a -HUP
instead of using condor_reconfig and adds more of a delay between the
-TERM (graceful shutdown) and -KILL (fast shutdown) when stopping the
condor_master.
Best,
matt
diff --git a/src/condor_examples/Imakefile b/src/condor_examples/Imakefile
index 34c85cc..b7ced63 100644
--- a/src/condor_examples/Imakefile
+++ b/src/condor_examples/Imakefile
@@ -9,7 +9,8 @@ FILES = README condor_config.local.central.manager \
condor_vmgahp_config.xen \
condor_vmgahp_config.vmware \
lamscript \
- mp1script
+ mp1script \
+ condor.init
all::
diff --git a/src/condor_examples/condor.init b/src/condor_examples/condor.init
new file mode 100644
index 0000000..1d25a33
--- /dev/null
+++ b/src/condor_examples/condor.init
@@ -0,0 +1,174 @@
+#!/bin/bash
+#
+# condor This script allows for starting and stopping Condor.
+#
+# chkconfig: - 90 10
+# description: Condor is a high throughput computing batch processing
+# platform.
+# processname: condor_master
+# config: /etc/condor/condor_config
+# pidfile: /var/lib/condor/condor_master.pid
+
+### BEGIN INIT INFO
+# Provides: condor
+# Required-Start: $local_fs $network
+# Required-Stop: $local_fs $network
+# Short-Description: start and stop Condor
+# Description: Condor HTC computing platform
+### END INIT INFO
+
+# The program being managed
+prog=condor_master
+
+lockfile=/var/lock/subsys/$prog
+pidfile=/var/lib/condor/$prog.pid
+
+# Source function library
+. /etc/init.d/functions
+
+# Source networking configuration
+[ -f /etc/sysconfig/network ] && . /etc/sysconfig/network
+
+# Source Condor configuration
+[ -f /etc/sysconfig/condor ] && . /etc/sysconfig/condor
+
+# Check that networking is up
+[ "${NETWORKING}" = "no" ] && exit 1
+
+
+start() {
+ echo -n $"Starting Condor daemons: "
+ daemon --pidfile $pidfile --check $prog $prog -pidfile $pidfile
+ RETVAL=$?
+ echo
+ [ $RETVAL -eq 0 ] && touch $lockfile
+ return $RETVAL
+}
+
+stop() {
+ echo -n $"Stopping Condor daemons: "
+ killproc -p $pidfile $prog
+ RETVAL=$?
+ echo
+ [ $RETVAL -eq 0 ] && rm -f $lockfile
+ return $RETVAL
+}
+
+reload() {
+ echo -n $"Reloading Condor daemons: "
+ condor_reconfig # Always returns 0?
+ RETVAL=$?
+ echo
+ return $RETVAL
+}
+
+#
+# Determine if a process is running only by looking in a pidfile.
+# There is no use of pidof, which can find processes that are not
+# started by this script.
+#
+# ASSUMPTION: The pidfile will exist if the process does, see false
+# negative warning.
+#
+# WARNING: A false positive is possible if the process that dropped
+# the pid file has crashed and the pid has been recycled. A false
+# negative is possible if the process has not yet dropped the pidfile,
+# or it contains the incorrect pid.
+#
+# Usage: pid_status <pidfile> <lockfile>
+# Result: 0 = pid exists
+# 1 = pid does not exist, but pidfile does
+# 2 = pid does not exist, but lockfile does
+# 3 = pidfile does not exist, thus pid does not exist
+# 4 = status unknown
+#
+pid_status() {
+ if [ -f $1 ]; then
+ # this can fail if we're not privileged
+ pid=`cat $1` &>/dev/null
+ if [ $? -ne 0 -o -z "$pid" ]; then
+ return 4
+ fi
+
+ ps $pid &>/dev/null
+ if [ $? -ne 0 ]; then
+ if [ -e $2 ]; then
+ return 2
+ fi
+
+ return 1
+ fi
+
+ return 0
+ fi
+
+ return 3
+}
+
+
+pid_status $pidfile $lockfile
+running=$?
+
+if [ "$1" != "status" ]; then
+ # Report that $prog does not exist, or is not executable
+ if [ ! -x /usr/sbin/$prog ]; then
+ echo $"$0: error: program not installed"
+ exit 5
+ fi
+
+ [ $running -eq 4 ] && echo $"$0: error: insufficient privileges" && exit 7
+fi
+
+case "$1" in
+ start)
+ [ $running -eq 0 ] && exit 0
+ start
+ RETVAL=$?
+ ;;
+ stop)
+ [ $running -eq 0 ] || exit 0
+ stop
+ RETVAL=$?
+ ;;
+ restart)
+ [ $running -eq 0 ] && stop
+ start
+ RETVAL=$?
+ ;;
+ try-restart)
+ [ $running -eq 0 ] || exit 0
+ stop
+ start
+ RETVAL=$?
+ ;;
+ reload|force-reload)
+ if [ $running -ne 0 ]; then
+ echo $"$0: error: $prog is not running"
+ exit 7
+ fi
+ reload
+ RETVAL=$?
+ ;;
+ status)
+ if [ $running -ne 0 ]; then
+ case "$running" in
+ 1) echo $"$prog dead but pid file exists" ;;
+ 2) echo $"$prog dead but subsys locked" ;;
+ 3) echo $"$prog is stopped" ;;
+ 4) echo $"$prog status is unknown" ;;
+ esac
+
+ exit $running
+ fi
+
+ # WARNING: status uses pidof and may find more pids than it
+ # should.
+ status -p $pidfile $prog
+ RETVAL=$?
+ ;;
+ *)
+ echo $"Usage: $0 {start|stop|restart|try-restart|reload|force-reload|status}"
+ RETVAL=2
+esac
+
+exit $RETVAL