Fence na: Difference between revisions

Latest revision as of 17:03, 25 August 2010

This is the core fence agent that exists in /sbin/.

Download the source code.

#!/usr/bin/perl
#
# Node Assassin - Fence Agent
# Digimer; digimer@alteeve.com
# Jun. 27, 2010
# Version: 1.1.5
#
# This software is released under the GPL v2. See the LICENSE file in the
# configuration directory for a copy of the GPL v2.
#
# Bugs;
# - None known, many expected
# 

# Play safe!
use strict;
use warnings;

# Load our library.
require '/etc/fence_na/fence_na.lib';

# IO::Handle is used for logging and Net::Telnet is used for communicating with
# the Node Assassin(s).
use IO::Handle;
use Net::Telnet;

# These are the default values and will be over-written by the config file's
# variables which in turn can, in some cases, be over-written by command line
# arguments.
# Please see '/etc/na/fence_na.conf' for details on each option.
my $conf={
	'system'	=>	{
		max_valid_state	=>	3,
		conf_file	=>	"/etc/fence_na/fence_na.conf",
		quiet		=>	"",
		version		=>	0,
		list		=>	"",
		monitor		=>	"",
		na_id		=>	0,
		got_cla		=>	0,	# This is set if command line arguments are read.
		debug		=>	0,
	},
	na		=>	{
		ipaddr		=>	"",
		tcp_port	=>	"238",
		port		=>	"",
		login		=>	"",
		passwd		=>	"",
		port		=>	"",
		set_state	=>	"",
		passwd_script	=>	"",
		action		=>	"",
		agent		=>	"",	# This is only used by 'fenced'
		na_name		=>	"",	# This is used for the 'list' function.
		handle		=>	"",
		max_node	=>	0,
		set_state	=>	[],	# This array will store the states to set based on the action passed for the proper ports.
	}
};

# This method can't pass in the '$log' handle, obviously, as it does not yet
# exist.
read_conf($conf);

# Log file for output.
my $log=IO::Handle->new();
print "Opening: [$conf->{'system'}{'log'}] for logging.\n"  if $conf->{'system'}{debug};
open ($log, ">$conf->{'system'}{'log'}") || die "Failed to open: [$conf->{'system'}{'log'}] for writing; Error: $!\n";

# Set STDOUT and $log to hot (unbuffered) output.
if (1)
{
	select $log;
	$|=1;
	select STDOUT;
	$|=1;
}

# If this gets set in the next two function, the agent will exit.
my $bad=0;

# Read in arguments from the command line.
($bad)=read_cla($conf, $log, $bad);

# Now read in arguments from STDIN, which is how 'fenced' passes arguments.
($bad)=read_stdin($conf, $log, $bad);

# This makes sure the node ID is either zero-padded or '00'.
$conf->{na}{port}=$conf->{na}{port} ? $conf->{na}{port}=sprintf("%02d", $conf->{na}{port}) : "00";
record($conf, $log, "Will use port: [$conf->{na}{port}]\n") if $conf->{'system'}{debug};

# Find the TCP port from the config file.
foreach my $i (1..$conf->{'system'}{na_num})
{
	if ((lc($conf->{na}{$i}{ipaddr}) eq lc($conf->{na}{ipaddr})))
	{
		$conf->{'system'}{na_id}=$i;
		record($conf, $log, __LINE__."; system::na_id: [$conf->{'system'}{na_id}]\n") if $conf->{'system'}{debug};
		$conf->{na}{tcp_port}=$conf->{na}{$i}{tcp_port};
		record($conf, $log, __LINE__."; na::tcp_port: [$conf->{na}{tcp_port}]\n") if $conf->{'system'}{debug};
		$conf->{na}{na_name}=$conf->{na}{$i}{na_name} ? $conf->{na}{$i}{na_name} : "Node Assassin #$i";
		record($conf, $log, __LINE__."; na::na_name: [$conf->{na}{na_name}]\n") if $conf->{'system'}{debug};
		$conf->{na}{max_nodes}=$conf->{na}{$i}{max_nodes};
		record($conf, $log, __LINE__."; na::max_nodes: [$conf->{na}{max_nodes}]\n") if $conf->{'system'}{debug};
	}
}

die "Exiting on errors.\n" if $bad;
my @ny=("no", "yes");
record($conf, $log, "Node Assassin: . [$conf->{na}{ipaddr}].\n");
record($conf, $log, "TCP Port: ...... [$conf->{na}{tcp_port}].\n");
record($conf, $log, "Node: .......... [$conf->{na}{port}].\n");
record($conf, $log, "Login: ......... [$conf->{na}{login}].\n");
record($conf, $log, "Password: ...... [$conf->{na}{passwd}].\n");
record($conf, $log, "Action: ........ [$conf->{na}{action}].\n");
record($conf, $log, "Version Request: [".$ny[$conf->{'system'}{version}]."].\n");
record($conf, $log, "Done reading args.\n");

# If I've been asked to show the version information, do so and then exit.
record($conf, $log, "Version: ..... [$conf->{'system'}{version}].\n") if $conf->{'system'}{debug};
if ($conf->{'system'}{version})
{
	version($conf, $log);
	do_exit($conf, $log, 0);
}

# Connect to the Node Assassin.
connect_to_na($conf, $log);

# Validate credentials.
# NOTE: Checking before the telnet fails on the exit. Also, this will be moved
# into the Node Assassin soon anyway.
if (($conf->{na}{login} ne $conf->{'system'}{username}) or ($conf->{na}{passwd} ne $conf->{'system'}{password}))
{
	record($conf, $log, "Username and/or password invalid. Did you use the command line switches properly?\n");
	do_exit($conf, $log, 8);
}

###############################################################################
# What do?                                                                    #
###############################################################################

# When asked to 'monitor' or 'list'. being multi-port, this will return a CSV
# of nodes and their aliases where found in the config file.
record($conf, $log, "Action: ........ [$conf->{na}{action}].\n") if $conf->{'system'}{debug};
if (($conf->{na}{action} eq "monitor") or ($conf->{na}{action} eq "list"))
{
	record($conf, $log, "Calling the 'show_list' function.\n") if $conf->{'system'}{debug};
	show_list($conf, $log);
	do_exit($conf, $log, 0);
}

# If I made it this far, I am setting a state. Sort out what state from the
# values in my conf->{na} hash.
record($conf, $log, "Setting node: [$conf->{na}{port}] to action: [$conf->{na}{action}] using the Node Assassin: [$conf->{na}{ipaddr}] using the login: [$conf->{na}{login}]\n") if $conf->{'system'}{debug};

# Convert the action into Node Assassin protocol arguments.
process_action($conf, $log);

# Now execute the action plan.
my $exit_code=do_actions($conf, $log);
record($conf, $log, "All calls complete, exiting.\n") if $conf->{'system'}{debug};

# Cleanup and exit.
do_exit($conf, $log, $exit_code);

`Input, advice, complaints and meanderings all welcome!`
`Digimer`	`digimer@alteeve.ca`	`https://alteeve.ca/w`	`legal stuff:`
`All info is provided "As-Is". Do not use anything here unless you are willing and able to take resposibility for your own actions. © 1997-2013`
Naming credits go to Christopher Olah!
In memory of Kettle, Tonia, Josh, Leah and Harvey. In special memory of Hannah, Jack and Riley.

@@ Line 3: / Line 3: @@
 This is the core fence agent that exists in <span class="code">/sbin/</span>.
+* Download the [http://nodeassassin.org/files/sbin/fence_na source code].
 <source lang="perl">
 #!/usr/bin/perl
@@ Line 8: / Line 9: @@
 # Node Assassin - Fence Agent
 # Digimer; digimer@alteeve.com
-# Mar. 07, 2010.
+# Jun. 27, 2010
-# Version: 0.1.004
+# Version: 1.1.5
+#
+# This software is released under the GPL v2. See the LICENSE file in the
+# configuration directory for a copy of the GPL v2.
 #
 # Bugs;
 # - None known, many expected
 #
-=pod
-Changes:
-v0.1.004
- - Fixed the command line argument bug.
- - Updated the 'help' message to be more accurate.
-Given the following:
-<cluster name="an_san" config_version="1">
-	<clusternodes>
-		<clusternode name="an_san01.alteeve.com" nodeid="1">
-			<fence>
-				<method name="node_assassin">
-					<device name="ariel" port="01" action="off"/>
-				</method>
-			</fence>
-		</clusternode>
-		<clusternode name="an_san02.alteeve.com" nodeid="2">
-			<fence>
-				<method name="node_assassin">
-					<device name="ariel" port="02" action="off"/>
-				</method>
-			</fence>
-		</clusternode>
-	</clusternodes>
-	<fencedevices>
-		<fencedevice name="node_assassin" agent="fence_na" ipaddr="ariel.alteeve.com" name="ariel" passwd="gr0tt0"></fencedevice>
-	</fencedevices>
-</cluster>
-Questions:
-- Is there a corelation between 'clusternode -> name', 'device -> name' and
- 'fencedevice -> name'? Which is used when sending 'name' to the fence agent?
- 'fencedevice'?
-When 'fenced' decides to fence "an_san01.alteeve.com", it will:
-- call '/sbin/fence_na' because of the 'fencedevices -> agent' value.
-- It will pass the following arguments to the fence agent, one pair per line:
-    agent=fence_na		# From 'fencedevices -> agent'
-    name=ariel			# From 'fencedevices -> name'
-    ipaddr=ariel.alteeve.com	# From 'fencedevices -> ipaddr'
-    passwd=gr0tt0		# From 'fencedevices -> passwd'
-    port=01			# From 'clusternode "an_san01.alteeve.com" -> port'
-    action=fence_na		# From 'clusternode "an_san01.alteeve.com" -> option'
-    				# This must be 'on', 'off', 'reboot', 'status'
-    				# or 'monitor'. See below for how these terms
-    				# are interpretted by this agent.
-    				# NOTE: If 'option' is passed, it's value will
-    				# be stored in 'action'. That is, 'action' and
-    				# 'option' are synonymous.
-    				#
-- Node Assassin's implentation of options.
-  - 'off'	This sets the node to state '0' on the reset port followed by
-  		state '3' to the power port. State 0 is maintained to prevent
-  		a reboot.
-  - 'on'	This sets the node to state '1' on the reset port followed by
-  		state '2' on the power port to boot the node.
-  - 'reboot'	This sets the node to state '2' on the reset port to quickly
-  		kill the node, then switches to state '3' on the power port,
-  		checks the return value (later, will check the probe pin),
-  		sets state '1' on the reset port, pauses 1 second, and then
-  		sets state '2' on the power port to boot the node.
-  - 'status'	This calls '00:0' and returns the state of the port. Later,
-  		this will return the value from the voltage sensing pin.
-  - 'monitor'	being a multi-port fence device, this should call 'list'.
-  		MADI: Confirm that this is what is meant in "Issues" here:
-  		http://sources.redhat.com/cluster/wiki/FenceAgentAPI
-  - 'list'	No info on this
-Command Line Arguments:
-- Any command line arguments used by this fence agent are not dictated by the
-  Fence Agent API. By convention only, the following command line options are
-  used:
-  -a <ip>	# Maps the value to 'ipaddr'.
-  -h		# Print the help message and then exits.
-  -l <name>	# Maps the value to 'name'.
-  -n <num>	# Maps the value to 'port'.
-  -o <string>	# Maps the value to 'action'.
-  -p <string>	# Maps the value to 'passwd'.
-  -S <path>	# Maps the value to 'passwd_script'. This is not used by Node
-  		# Assassin yet and is simply ignored.
-  -q		# Sets quiet mode. Only errors will be printed. Logging
-  		# proceeds as normal
-  -V		# Prints the 'fence_na' version and the version of any attached
-  		# Node Assassin(s) and exits.
-Note:
-- For now, I will return '0' if the command succeeded, but will add a detection
-  line checks if there is voltage from the node's PSU later.
-=cut
 # Play safe!
 use strict;
 use warnings;
-# Load my library.
-require '/etc/na/fence_na.lib';
+# Load our library.
-# This is how I talk.
+require '/etc/fence_na/fence_na.lib';
+# IO::Handle is used for logging and Net::Telnet is used for communicating with
+# the Node Assassin(s).
 use IO::Handle;
 use Net::Telnet;
-# This will be read in from a config file later.
+# These are the default values and will be over-written by the config file's
+# variables which in turn can, in some cases, be over-written by command line
+# arguments.
+# Please see '/etc/na/fence_na.conf' for details on each option.
 my $conf={
 	'system'	=>	{
 		max_valid_state	=>	3,
-		conf_file	=>	"/etc/na/fence_na.conf",
+		conf_file	=>	"/etc/fence_na/fence_na.conf",
 		quiet		=>	"",
-		version		=>	"",
+		version		=>	0,
-		list_state	=>	"",
 		list		=>	"",
 		monitor		=>	"",
-		node_assassin_id=>	0,
+		na_id		=>	0,
 		got_cla		=>	0,	# This is set if command line arguments are read.
+		debug		=>	0,
 	},
-	node		=>	{
+	na		=>	{
 		ipaddr		=>	"",
-		tcp_port	=>	"",
+		tcp_port	=>	"238",
-		port		=>	"238",
+		port		=>	"",
 		login		=>	"",
 		passwd		=>	"",
@@ Line 142: / Line 58: @@
 		action		=>	"",
 		agent		=>	"",	# This is only used by 'fenced'
+		na_name		=>	"",	# This is used for the 'list' function.
 		handle		=>	"",
-		set_state	=>	[],	# This anon array will store the states to set based on the action passed for the proper ports.
+		max_node	=>	0,
+		set_state	=>	[],	# This array will store the states to set based on the action passed for the proper ports.
 	}
 };
 # This method can't pass in the '$log' handle, obviously, as it does not yet
 # exist.
@@ Line 152: / Line 71: @@
 # Log file for output.
 my $log=IO::Handle->new();
+print "Opening: [$conf->{'system'}{'log'}] for logging.\n"  if $conf->{'system'}{debug};
 open ($log, ">$conf->{'system'}{'log'}") || die "Failed to open: [$conf->{'system'}{'log'}] for writing; Error: $!\n";
+# Set STDOUT and $log to hot (unbuffered) output.
+if (1)
+{
+	select $log;
+	$|=1;
+	select STDOUT;
+	$|=1;
+}
 # If this gets set in the next two function, the agent will exit.
@@ Line 163: / Line 92: @@
 ($bad)=read_stdin($conf, $log, $bad);
-# This makes sure the node ID is zero-padded or '00'.
+# This makes sure the node ID is either zero-padded or '00'.
-$conf->{node}{port}=$conf->{node}{port} ? $conf->{node}{port}=sprintf("%02d", $conf->{node}{port}) : "00";
+$conf->{na}{port}=$conf->{na}{port} ? $conf->{na}{port}=sprintf("%02d", $conf->{na}{port}) : "00";
+record($conf, $log, "Will use port: [$conf->{na}{port}]\n") if $conf->{'system'}{debug};
 # Find the TCP port from the config file.
 foreach my $i (1..$conf->{'system'}{na_num})
 {
-	if ((lc($conf->{node}{$i}{ipaddr}) eq lc($conf->{node}{ipaddr})))
+	if ((lc($conf->{na}{$i}{ipaddr}) eq lc($conf->{na}{ipaddr})))
 	{
-		$conf->{'system'}{node_assassin_id}=$i;
+		$conf->{'system'}{na_id}=$i;
-		$conf->{node}{tcp_port}=$conf->{node}{$i}{tcp_port};
+		record($conf, $log, __LINE__."; system::na_id: [$conf->{'system'}{na_id}]\n") if $conf->{'system'}{debug};
-		last;
+		$conf->{na}{tcp_port}=$conf->{na}{$i}{tcp_port};
+		record($conf, $log, __LINE__."; na::tcp_port: [$conf->{na}{tcp_port}]\n") if $conf->{'system'}{debug};
+		$conf->{na}{na_name}=$conf->{na}{$i}{na_name} ? $conf->{na}{$i}{na_name} : "Node Assassin #$i";
+		record($conf, $log, __LINE__."; na::na_name: [$conf->{na}{na_name}]\n") if $conf->{'system'}{debug};
+		$conf->{na}{max_nodes}=$conf->{na}{$i}{max_nodes};
+		record($conf, $log, __LINE__."; na::max_nodes: [$conf->{na}{max_nodes}]\n") if $conf->{'system'}{debug};
 	}
 }
 die "Exiting on errors.\n" if $bad;
-record($conf, $log, "Node Assassin: [$conf->{node}{ipaddr}].\n");
+my @ny=("no", "yes");
-record($conf, $log, "TCP Port: .... [$conf->{node}{tcp_port}].\n");
+record($conf, $log, "Node Assassin: . [$conf->{na}{ipaddr}].\n");
-record($conf, $log, "Port: ........ [$conf->{node}{port}].\n");
+record($conf, $log, "TCP Port: ...... [$conf->{na}{tcp_port}].\n");
-record($conf, $log, "Login: ....... [$conf->{node}{login}].\n");
+record($conf, $log, "Node: .......... [$conf->{na}{port}].\n");
-record($conf, $log, "Password: .... [$conf->{node}{passwd}].\n");
+record($conf, $log, "Login: ......... [$conf->{na}{login}].\n");
-record($conf, $log, "Action: ...... [$conf->{node}{action}].\n");
+record($conf, $log, "Password: ...... [$conf->{na}{passwd}].\n");
+record($conf, $log, "Action: ........ [$conf->{na}{action}].\n");
+record($conf, $log, "Version Request: [".$ny[$conf->{'system'}{version}]."].\n");
 record($conf, $log, "Done reading args.\n");
+# If I've been asked to show the version information, do so and then exit.
+record($conf, $log, "Version: ..... [$conf->{'system'}{version}].\n") if $conf->{'system'}{debug};
+if ($conf->{'system'}{version})
+{
+	version($conf, $log);
+	do_exit($conf, $log, 0);
+}
 # Connect to the Node Assassin.
-$conf->{node}{handle}=new Net::Telnet(
+connect_to_na($conf, $log);
-	Timeout	=>	10,
-	Errmode	=>	'die',
-	Port	=>	$conf->{node}{tcp_port},
-	Prompt	=>	'/EOM$/',
-	Errmode	=>	'return'
-) or do_exit($conf, $log, 1);
-# print "Handle: [$conf->{node}{handle}]\n";
-$conf->{node}{handle}->open($conf->{node}{ipaddr});
 # Validate credentials.
 # NOTE: Checking before the telnet fails on the exit. Also, this will be moved
 # into the Node Assassin soon anyway.
-if (($conf->{node}{login} ne $conf->{'system'}{username}) or ($conf->{node}{passwd} ne $conf->{'system'}{password}))
+if (($conf->{na}{login} ne $conf->{'system'}{username}) or ($conf->{na}{passwd} ne $conf->{'system'}{password}))
 {
-	record($conf, $log, "Username and/or password failed.\n");
+	record($conf, $log, "Username and/or password invalid. Did you use the command line switches properly?\n");
 	do_exit($conf, $log, 8);
 }
@@ Line 210: / Line 147: @@
 ###############################################################################
-# If I've been asked to show the version information, do so and then exit.
+# When asked to 'monitor' or 'list'. being multi-port, this will return a CSV
-record($conf, $log, "Version: ..... [$conf->{'system'}{version}].\n");
+# of nodes and their aliases where found in the config file.
-if ($conf->{'system'}{version})
+record($conf, $log, "Action: ........ [$conf->{na}{action}].\n") if $conf->{'system'}{debug};
-{
+if (($conf->{na}{action} eq "monitor") or ($conf->{na}{action} eq "list"))
-	version($conf, $log);
-	do_exit($conf, $log, 0);
-}
-# If I've been asked to show the info on the given node assassin, do so and
-# then exit.
-record($conf, $log, "List State: .. [$conf->{'system'}{list_state}].\n");
-if ($conf->{'system'}{list_state})
-{
-	show_state($conf, $log);
-	do_exit($conf, $log, 0);
-}
-# When asked to 'monitor' or 'list', do this... whatever 'this' is. All I know
-# is that it should not generate output.
-record($conf, $log, "Monitor: ..... [$conf->{'system'}{monitor}].\n");
-record($conf, $log, "List: ........ [$conf->{'system'}{list}].\n");
-if (($conf->{node}{monitor}) or ($conf->{node}{list}))
 {
+	record($conf, $log, "Calling the 'show_list' function.\n") if $conf->{'system'}{debug};
 	show_list($conf, $log);
 	do_exit($conf, $log, 0);
@@ Line 238: / Line 158: @@
 # If I made it this far, I am setting a state. Sort out what state from the
-# values in my conf->{node} hash.
+# values in my conf->{na} hash.
-record($conf, $log, "Setting node: [$conf->{node}{port}] to action: [$conf->{node}{action}] using the Node Assassin: [$conf->{node}{ipaddr}] using the login: [$conf->{node}{login}/$conf->{node}{passwd}]\n");
+record($conf, $log, "Setting node: [$conf->{na}{port}] to action: [$conf->{na}{action}] using the Node Assassin: [$conf->{na}{ipaddr}] using the login: [$conf->{na}{login}]\n") if $conf->{'system'}{debug};
 # Convert the action into Node Assassin protocol arguments.
 process_action($conf, $log);
-# In the next step, when a 'check' is seen, the port is analyzed and an exit
+# Now execute the action plan.
-# status is stored here. Exits 0, 1 and 2 have special meaning, so I default to
+my $exit_code=do_actions($conf, $log);
-# 9.
+record($conf, $log, "All calls complete, exiting.\n") if $conf->{'system'}{debug};
-my $exit_code=9;
-# Process the orders.
-foreach my $order (split/,/, $conf->{'system'}{call_order})
-{
-	if ($order=~/^sleep/)
-	{
-		my $time=$order=~/sleep (\d+)/ ? $1 : 1;
-		record ($conf, $log, "Sleeping for: [$time]...\n");
-		sleep $time;
-		next;
-	}
-	record ($conf, $log, "Calling: [$order]\n");
-	if ($order=~/(\d\d):(\D+)/)
-	{
-		my $node=$1;
-		my $check=$2;
-		# Verify the state of the port.
-		record($conf, $log, "Status check on node: [$node] -> [$check]\n");
-		# Get the state.
-		my $states=get_states($conf, $log);
-		if ($states == 1)
-		{
-			# I had a connection problem.
-			do_exit($conf, $log, 1);
-		}
-		my $node_state=$states->{$node};
-		record($conf, $log, "Node: [$node] state: [$node_state]\n");
-		if ($check eq "check")
-		{
-			# Return '2' if the node is off and '0' if it is on.
-			$exit_code=$node_state eq "off" ? 2 : 0;
-		}
-		elsif ($check eq "off")
-		{
-			# 'off' was called, make sure the node is now off. This
-			# may be called by 'reboot' in which case 'exit_code'
-			# will simply be over-written when the final 'reboot'
-			# state check is called.
-			$exit_code=$node_state eq "off" ? 0 : 1;
-		}
-		elsif ($check eq "on")
-		{
-			# 'on' was called, make sure the node is now off.
-			$exit_code=$node_state eq "off" ? 1 : 0;
-		}
-		elsif ($check eq "reboot")
-		{
-			# Make sure that 'exit_code' was set to '0' by the
-			# earlier call. We checked again to make sure the node
-			# came back up, and will log an error if it didn't, but
-			# we return '0' just the same, as per the API.
-			if ($exit_code eq "0")
-			{
-				# The power off portion worked. Check if the
-				# node booted properly and record an error if
-				# not.
-				if ($node_state eq "off")
-				{
-					record($conf, $log, "Node: [$node] failed to boot after a successful power off during a reboot action.\n");
-					record($conf, $log, "This is a non-critical error as the node was fenced successfully but may\n");
-					record($conf, $log, "indicate a hardware failure with the node or with Node Assassin itself.\n");
-				}
-			}
-			else
-			{
-				# The power off portion failed, exit with '1'.
-				$exit_code=1;
-			}
-			$exit_code=$node_state eq "off" ? 1 : 0;
-		}
-		next;
-	}
-	my @set_state=$conf->{node}{handle}->cmd("$order");
-	foreach my $line (@set_state)
-	{
-		record($conf, $log, $line);
-	}
-	record($conf, $log, "Call complete.\n");
-}
-record($conf, $log, "All calls complete, exiting.\n");
-# Now confirm that the requested node is in it's requested state and exit with
-# the appropriate exit code. This function should not return.
-# show_state($conf, $log);
 # Cleanup and exit.

Fence na: Difference between revisions

Latest revision as of 17:03, 25 August 2010

Navigation menu

Search