Fence na.lib: Difference between revisions

Revision as of 06:26, 8 March 2010

Node Assassin :: Fence na.lib
This is the fence agent's function library that exists in /etc/na/.
#!/usr/bin/perl
#
# This is the function library for the Node Assassin fence agent.
# 
# Node Assassin - Fence Agent
# Digimer; digimer@alteeve.com
# Mar. 07, 2010.
# Version: 0.1.004
#


# This cleanly exits the agent.
sub do_exit
{
	($conf, $log, $exit_status)=@_;
	$exit_status=9 if not defined $exit_status;
	
	$conf->{node}{handle}->close;
	$log->close();
	exit ($exit_status);
}

# This gets the states for the active node and returns the states in a hash
# reference.
sub get_states
{
	my ($conf, $log)=@_;
	
	# Create the hash reference to store the states in.
	my $state={};
	
	# Call '00:0' to get the states. If it fails, return 1 as per
	# FenceAgentAPI requirements.
	my @check_state=$conf->{node}{handle}->cmd("00:0") or return(1);
	
	# Loop through the output.
	foreach my $line (@check_state)
	{
		# Chomp the newline off and then pull the port and state out.
		chomp $line;
		my ($this_node, $this_state)=($line=~/(\d+): (.*)$/);
		# Skip if this isn't a status line.
		next if ((not $this_node) || (not $this_state));
		# Convert the state to a simple on/off.
		$this_state=$this_state =~ /fenced/i ? "off" : "on";
		# Store the state.
		$state->{$this_node}=$this_state;
		record($conf, $log, "state->{$this_node}: [$state->{$this_node}]\n");
	}
	
	# Return the hash reference.
	return ($state);
}

# This returns the 'help' message.
sub help
{
	my ($conf, $log)=@_;
	my $msg=q`
Node Assassin Fencing Agent

	This program interfaces with one or more Node Assassin fence devices to
	set one or more nodes to one or more states.

Usage:

	Arguments are read from STDIN as 'variable=value' pairs, one pair per
	new line. This is the method used by 'fenced'.

	For testing, arguments may be passed as command line arguments.
	./fence_na <options>

Overview:

	This takes arguments that defined which Node Assassin to call, what
	node to work on and what action should be taken. These arguments are
	defined by the FenceAgentAPI. These actions are then translated into
	a set of Node Assassin states which then get passed to the device.

Node Assassin Ports:

	The 'fenced' daemon programmatically refers to a given node as a
	"Port". This conflicts with the Node Assassin internal definition of a
	port. Please do not confuse the two!
	
	When the 'fenced' daemon sets the 'port' value, this is understood to
	be the Node ID to be acted on. Internally then, the Node ID is
	converted to two Node Assassin ports; One for "power" and one for
	"reset". The conversion is done this way:
	
	((fenced port * 2) - 1) = Power Port
	(fenced port * 2)       = Reset Port
	
	For example, when fenced sets 'port=4', it is converted to:
	Power Port = ((4*2)-1) = 7
	Reset Port = (4*2)     = 8
	
	With this conversion complete, this fence agent knows what calls to
	make to accomplish the task set in the 'action' argument.

Node Assassin States:

	Each Node Assassin port can be set to one of four states. They are:

	0
		This state will fence the nodes specified by the list. The
		fence will remain active until released.
	
	1
		This will release the fence on the specified NA port.
	
	2
		This will fence the node(s) for one second. This is meant to be
		used on ports connected to a node's power button. If the node
		is alive and supports ACPI, this should start a graceful power
		down of the node. Conversly, if the node was off, this will
		boot the node. When connected to a node's reset switch, this
		will cause a quick reboot without a graceful power off.
	
	3
		This state will fence the node(s) for five seconds. This is
		specifically designed for ports connected to a node's power
		button. It will allow a frozen node to be forced off by holding
		the power button closed long enough to force a power off. This
		state serves no real difference over state 2 when connected to
		a reset switch.

Options:

	Which options are valid depends on how the arguments are passed in. The
	"main" method of passing arguments is via STDIN as a series of 
	'varible=value' pairs, one per line. All arguments accepted this way
	have command-line arguments passed in via single-dashes switches. Each
	option below shows either calling method's arguments, where available.
	
	-h
	
		Prints this help message.
	
	-v, -V
	
		Prints the version of this fence agent and then the version of
		any configured, reachable Node Assassins.
	
	-q
	
		Tells this fence agent to only print critical messages.
	
	-a <val>, ipaddr=<val>
	
		Tells the fence agent which Node Assassin to use. 
		
		NOTE! The name or IP addresses *MUST* match a name or IP
		      address defined in '/etc/na/fence_na.conf'! Specifically,
		      it must match one of the 'node::X::ipaddr' entries where
		      'X' is the ID of the Node Assassin.
	
	-l <val>, login=<val>
	
		This is the login name used to connect to this fence agent. It
		must match the value 'system::username' in
		'/etc/na/fence_na.conf'.
	
	-p <val>, passwd=<val>
	
		This is the login password used to connect to this fence agent.
		It must match the value 'system::password' in
		'/etc/na/fence_na.conf'.
	
	-n <val>, port=<val>
	
		This tells the fence agent which node to act on. 
		
		NOTE! Please do not confuse this with the Node Assassin's
		      internal concept of a port.
	
	-o <val>, action=<val>
	
		This is the action (formerly 'option') to perform on the
		selected node. Valid actions and how Node Assassin implements
		them are:
		
		on
		
			1. Reset pin is opened (fence released)
			2. Power pin is opened
			3. Wait one second
			4. Power pin is closed for one second
			5. Wait one second
			6. Power feed is checked for 'on' state
		
		off
		
			1. Reset pin is closed (fenced), disabling the node
			   immediately.
			2. Wait one second.
			3. Reset pin is opened (fence released). Some machines
			   will not power off is it's reset pin is held closed.
			4. Wait one second.
			5. Power pin is closed
			6. Wait for five seconds to ensure PSU is shut down.
			7. Reset pin is closed
			   Note: With both the power and reset help closed, the
			      node will not boot even when it's front panel
			      power button is pressed.
			8. Power feed is checked for 'off' state
		
		reboot
		
			Note: This will do a hard reboot! Do not use when a
			      normal restart would suffice.
			1.  Reset pin is closed (fenced) to ensure immediate
			    disabling of the node.
			2.  Wait one second.
			3.  Reset pin is opened (fence released)
			4.  Wait one second.
			5.  Power pin is closed (fenced) for five seconds
			6.  Wait five seconds to ensure power down.
			7.  Power feed is checked for 'off' state
			8.  Power pin is closed (fenced) for one second to
			    begin booting the node.
			9.  Wait one second.
			10. Power feed is checked for 'on' state
		
		status
		
			The associated node's power feed is checked and it's
			status is returned.
		
		monitor, list
		
			Unknown what is to be done under these actions.
		
		###############################################################
		NOTE: The following states are supported by Node Assassin only.
		      These states are not used by the 'fenced' daemon and are
		      provided here as a convenience for the user.
		###############################################################
		      
		soft_reboot (not yet implemented, requires NA v1.1.4 or better)
		
			1. Power pin is closed for one second. Nodes who
			   support ACPI should initiate shutdown at the OS
			   level.
			2. A loop starts checking the power feed line. The
			   status is checked once a second until the power feed
			   returns 'off'.
			3. Wait one second
			4. Power pin is closed for one second.
			5. Wait one second.
			6. Power feed is checked for 'on' state.
		
		soft_off (not yet implemented, requires NA v1.1.4 or better)
		
			1. Power pin is closed for one second. Nodes who
			   support ACPI should initiate shutdown at the OS
			   level.
			2. A loop starts checking the power feed line. The
			   status is checked once a second until the power feed
			   returns 'off'.
		
		release
		
			1. Power pin is opened (fence released)
			2. Reset pin is opened (fence released)
		
		all_stop (not yet implemented, requires NA v1.1.4 or better)
		
			1. The '-n/port' and '-a/ipaddr' arguments are ignored.
			2. A loop starts for all Node Assassins configured.
			3. For each NA, a loop is started to step through all
			   supported nodes.
			4. For each node on each Node Assassin:
			4.1. Power feed is checked for 'on' status. If on;
			4.2. Power pin is closed (fenced) for one second,
			     initiating an ACPI induced shut down.
		
		all_start (not yet implemented, requires NA v1.1.4 or better)
		
			1. The '-n/port' and '-a/ipaddr' arguments are ignored.
			2. A loop starts for all Node Assassins configured.
			3. For each NA, a loop is started to step through all
			   supported nodes.
			4. For each node on each Node Assassin:
			4.1. Power feed is checked for 'off' status. If off;
			4.2. Power pin is closed (fenced) for one second,
			     initiating a boot of the node, if there is a node
			     connected.
		
	-S <path>, passwd_script=<path> (Not Implemented)
	
		This is the path to a script that returns the password to use
		when running this fence agent.
	

Examples:

	To simulate how 'fenced' calls the script, create a text file called
	'args.txt' containing:
	
	-----------------------------------------------------------------------
	# Test file used as input for the NA fence agent.
	ipaddr=ariel.alteeve.com
	port=02
	login=ariel
	passwd=gr0tt0
	action=off
	-----------------------------------------------------------------------
	
	Now use 'cat' the pipe the contents into the fence agent:
	
		cat args.txt | ./fence_na
	
	This will call the 'off' function against node #02 connected to the
	Node Assassin at 'ariel.alteeve.com', fencing it. Change the action
	line to 'action=on' and re-run the script again to release the fence
	and boot the node.
	
	To duplicate the same call using command line arguments:
	
		./fence_na -a ariel.alteeve.com -n 2 -l ariel -p gr0tt0 -o off
	
Note:

	An internal pager is not implemented. You may wish to run this via
	'less':
	
	./fence_na | less

Updated:

	Mar. 7, 2010							Digimer
`;
	print $msg;
	
	do_exit($conf, $log, 0);
}

# This handles the actual actions.
sub process_action
{
	my ($conf, $log)=@_;
	
	# Make this more readable.
	my $na_id=$conf->{'system'}{node_assassin_id};
	my $action=$conf->{node}{action};
	my $port=$conf->{node}{port};
	
	# Translate the port passed in by the fence agent into the actual ports
	# in the Node Assassin. Mapping is:
	# Node 01 -> Power = Port 01
	# Node 01 -> Reset = Port 02
	# Node 02 -> Power = Port 03
	# Node 02 -> Reset = Port 04
	# Node 03 -> Power = Port 05
	# Node 03 -> Reset = Port 06
	# Node 04 -> Power = Port 07
	# Node 04 -> Reset = Port 08
	# ...
	my $power_port=sprintf("%02d", (($port*2)-1));
	my $reset_port=sprintf("%02d", ($port*2));
	record($conf, $log, "Translated node port: [$port] to power port: [$power_port] and reset port: [$reset_port]\n");
	
	if ($action eq "on")
	{
		# Release the fence and boot the node.
		$conf->{'system'}{call_order}="$reset_port:1,$power_port:1,sleep,$power_port:2,sleep,$power_port:on";
	}
	elsif ($action eq "off")
	{
		# Fence the node by pressing and holding the reset to make sure
		# the node immediately dies. Then I release the fence long
		# enough to force a power off, then I re-apply then fence to
		# make sure the node doesn't come back up. This is needed
		# because some machines won't power off if the reset is held
		# high when the power is pressed, even for > 4 seconds.
		$conf->{'system'}{call_order}="$reset_port:0,sleep,$reset_port:1,sleep,$power_port:0,sleep 5,$reset_port:0,$power_port:off";
	}
	elsif ($action eq "reboot")
	{
		# Currently, I don't do this gracefully because, well, if it's
		# being fenced, it's not meant to be graceful.
		# This is a combination of the 'off' -> 'on' actions.
		$conf->{'system'}{call_order}="$reset_port:0,sleep,$reset_port:1,sleep,$power_port:3,sleep 6,$power_port:off,$power_port:2,sleep,$power_port:reboot";
	}
	elsif ($action eq "status")
	{
		# This should check the probe, but for now, it checks the
		# port's state.
		$conf->{'system'}{call_order}="$power_port:check";
	}
	elsif (($action eq "monitor") or ($action eq "list"))
	{
		# Not sure what to do here.
	}
	else
	{
		record($conf, $log, "Unknown action request: [$action]!\n");
		do_exit($conf, $log, 9);
	}
}

# Read in the config file.
sub read_conf
{
	my ($conf)=@_;
	$conf={} if not $conf;
	
	# I can't call the 'record' method here because I've not read in the
	# log file and thus don't know where to write the log to yet. Comment
	# out or delete 'print' statements before release.
	my $read=IO::Handle->new();
	my $shell_call="$conf->{'system'}{conf_file}";
# 	print "Shell call: [$shell_call]\n";
	open ($read, "<$shell_call") or die "Failed to read: [$shell_call], error was: $!\n";
	while (<$read>)
	{
		chomp;
		my $line=$_;
		next if not $line;
		next if $line !~ /=/;
		$line=~s/^\s+//;
		$line=~s/\s+$//;
		next if $line =~ /^#/;
		next if not $line;
		my ($var, $val)=(split/=/, $line, 2);
		$var=~s/^\s+//;
		$var=~s/\s+$//;
		$val=~s/^\s+//;
		$val=~s/\s+$//;
		next if (not $var);
# 		print "Storing: [$var] = [$val]\n";
		_make_hash_reference($conf, $var, $val);
	}
	$read->close();
	
	return (0);
}

# Read in command line arguments
sub read_cla
{
	my ($conf, $log, $bad)=@_;
	
	# MADI: Remove this before release.
	record($conf, $log, "Got args:\n");
	
	# Loop through the passed arguments, if any.
	my $set_next="";
	foreach my $arg (@ARGV)
	{
		# MADI: Remove this before release.
# 		record($conf, $log, "[$arg]\n");
		$conf->{'system'}{got_cla}=1;
		
		# If 'set_next' has a value, push this argument into the 'conf'
		# hash.
		if ($set_next)
		{
			# It's set, use it's contents as the hash key.
			$conf->{node}{$set_next}=$arg;
			
			# MADI: Remove this before release.
			record($conf, $log, "Setting: 'node::$set_next': [$conf->{node}{$set_next}]\n");
			
			# Clear it now for the next go-round.
			$set_next="";
			next;
		}
		if ($arg=~/-h/)
		{
			# Print the help message and then exit.
			help($conf, $log);
		}
		elsif ($arg=~/-[vV]/)
		{
			# Print the version information and then exit.
			$conf->{'system'}{version}=1;
		}
		elsif ($arg=~/-q/)
		{
			# Suppress all non-critical messages from STDOUT.
			$conf->{'system'}{quiet}=1;
		}
		elsif ($arg=~/^-/)
		{
			$arg=~s/^-//;
			
			### These are the switches set by Red Hat.
			if ($arg eq "a")
			{
				# This is the IP address or hostname of the
				# Node Assassin to call.
				$set_next="ipaddr";
# 				record ($conf, $log, "Next argument will be stored in: [$set_next]\n");
			}
			elsif ($arg eq "l")
			{
				# This is the login name.
				$set_next="login";
# 				record ($conf, $log, "Next argument will be stored in: [$set_next]\n");
			}
			elsif ($arg eq "p")
			{
				# This is the password. If it starts with '/'
				# it is interpreted to be a file containing the
				# password which will be read in and it's
				# contents will replace# this value.
				$set_next="passwd";
# 				record ($conf, $log, "Next argument will be stored in: [$set_next]\n");
			}
			elsif ($arg eq "n")
			{
				# This is the node to work on.
				$set_next="port";
# 				record ($conf, $log, "Next argument will be stored in: [$set_next]\n");
			}
			elsif ($arg eq "o")
			{
				# This is the action to take.
				$set_next="action";
# 				record ($conf, $log, "Next argument will be stored in: [$set_next]\n");
			}
			elsif ($arg eq "S")
			{
				# This is the script to run to retrieve the
				# password when it is not stored in
				# 'cluster.conf'. This script should echo/print
				# the password to STDOUT.
				$set_next="passwd_script";
# 				record ($conf, $log, "Next argument will be stored in: [$set_next]\n");
			}
		}
		else
		{
			### MADI: I might want to pick up arguments via multiple lines.
			# Bad argument.
			record($conf, $log, "Argument: [$arg] is not valid!\n");
			record($conf, $log, "Please run './fence_na --help' to see a list of valid arguments.\n");
			$bad=1;
		}
	}
}

# Read arguments from STDIN. This is adapted from the 'fence_brocade' agent.
sub read_stdin
{
	my ($conf, $log, $bad)=@_;
	
	return (0) if $conf->{'system'}{got_cla};
	
	my $option;
	my $line_count=0;
	while(defined (my $option=<>))
	{
		# Get rid of newlines.
		chomp $option;
		
		# Record the line for now, but comment this out before release.
		record ($conf, $log, "Processing option line: [$option]\n");
		
		# strip leading and trailing whitespace
		$option=~s/^\s*//;
		$option=~s/\s*$//;
		
		# skip comments
		next if ($option=~ /^#/);
		
		# Increment my option line count.
		$line_count++;
		
		# Go to the next line if the option line is empty.
		next if not $option;
		
		# Split the option up into the name and the value.
		($name,$value)=split /\s*=\s*/, $option;
		
		# Record the line for now, but comment this out before release.
		record ($conf, $log, "Name: [$name], value: [$value].\n");
		
		# Set my variables depending on the veriable name.
		if ($name eq "agent")
		{
			# This is only used by 'fenced', but I record it for
			# potential debugging.
			$conf->{node}{agent}=$value;
		}
		elsif ($name eq "fm")
		{
			# This is a deprecated argument that should no longer
			# be used. Now 'port' should be used.
			if (not $conf->{node}{port})
			{
				# Port isn't set yet, use this value which may
				# be replaced if 'port' is set later.
				(undef, $value) = split /\s+/,$value;
				$conf->{node}{port}=$value;
				warn "Warning! The argument 'fm' is deprecated, use 'port' instead. Value: [$value] set for 'port'\n";
			}
			else
			{
				# Port was already set, so simply ignore this.
				warn "Warning! The argument 'fm' is deprecated, use 'port' instead. Value: [$value] ignored.\n";
			}
		}
		elsif ($name eq "ipaddr") 
		{
			# Record the IP Address or name of the Node Assassin to
			# use.
			$conf->{node}{ipaddr}=$value;
		} 
		elsif ($name eq "login")
		{
			# Record the login name that was passed.
			$conf->{node}{login}=$value;
		} 
		elsif ($name eq "name")
		{
			# Depricated argument used formerly for login name.
			if (not $conf->{node}{login})
			{
				# Login isn't set yet, use this value which may
				# be replaced if 'login' is seen later.
				$conf->{node}{login}=$value;
				warn "Warning! The argument 'name' is deprecated, use 'login' instead. Value: [$value] set for 'login'.\n";
			}
			else
			{
				# I've already seen the 'login' value so I will
				# ignore this value.
				warn "Warning! The argument 'name' is deprecated, use 'login' instead. Value: [$value] ignored.\n";
			}
		}
		elsif (($name eq "action") or ($name eq "option"))
		{
			# It looks like 'option' is going to be deprecated in
			# favour of 'action'. If/when that happens, add a warn.
			$conf->{node}{action}=$value;
		}
		elsif ($name eq "passwd")
		{
			# This is the login password.
			$conf->{node}{passwd}=$value;
		} 
		elsif ($name eq "passwd_script")
		{
			# This is the path to the script that will return the
			# password to the agent. At this time, this is not
			# implemented.
			$conf->{node}{passwd_script}=$value;
		}
		elsif ($name eq "port")
		{
			# This sets the port number to act on.
			$conf->{node}{port}=$value;
		} 
		else
		{
			warn "Illegal name in option: [$option] at line: [$line_count]\n";
			$bad=1;
		}
	}
	return ($bad);
}

# This function simply prints messages to both the log and to stdout.
sub record
{
	my ($conf, $log, $msg)=@_;
	
	print $log $msg;
# 	print $msg if not $conf->{'system'}{quiet};
	
	return(0);
}

# When asked to 'monitor' or 'list', do this... whatever 'this' is. All I know
# is that it should not generate output.
sub show_list
{
	my ($conf, $log)=@_;
	
	### MADI: No idea what will be needed here, so here are both queries.
	###       Make them available elsewhere if not used here.
	record($conf, $log, "Checking states:\n");
	my @state_out=$conf->{node}{handle}->cmd("00:0");
	foreach my $line (@state_out)
	{
# 		record($conf, $log, $line);
	}
	record($conf, $log, "Done.\n");

	# Query states and Node Assassin info.
	record($conf, $log, "Checking Node Assassin info:\n");
	my @info_out=$conf->{node}{handle}->cmd("00:1");
	my $node_name="";
	foreach my $line (@info_out)
	{
		record($conf, $log, $line);
		$node_name=$1 if $line=~/- Node Name: ..... (.*)/;
	}
	record($conf, $log, "Node name: [$node_name]\n");
	record($conf, $log, "Done.\n");
	
	do_exit($conf, $log, 0);
}

# This queries the Node Assassin and returns the state of the requested node.
sub show_state
{
	my ($conf, $log)=@_;
	
	my @state_out=$conf->{node}{handle}->cmd("00:0");
	my $state="";
	my $node=$conf->{node}{port};
	foreach my $line (@state_out)
	{
		chomp;
		my $line=$_;
		my ($state)=($line=~/- Node $node: (.*?)/);
		if ($state)
		{
			$state=lc($state)=~/fenced/ ? 2 : 0;
			last;
		}
	}
	# No state means something went wrong while talking to the Node
	# Assassin.
	$state=1 if (($state != 0) && ($state != 2));
	
	# As per: http://sources.redhat.com/cluster/wiki/FenceAgentAPI
	# The exit state must be:
	# 0 = Node is running
	# 1 = Failed to contact fence, unknown state.
	# 2 = Node is fenced.
	do_exit($conf, $log, $state);
}

# This prints the version information of this fence agent and of any configured
# fence devices.
sub version
{
	my ($conf, $log)=@_;
	
	# Print the Fence Agent version first.
	print "Fence Agent: ......... Node Assassin ver. $conf->{'system'}{agent_version}\n";
	print "Configured Nodes: .... $conf->{'system'}{nodes}\n";
	for my $node (1..$conf->{'system'}{nodes})
	{
		print " - Node $node Name: .. $conf->{node}{$node}{name}\n";
		print " - Node $node IP: .... $conf->{node}{$node}{ip}\n";
		print " - Node $node Port: .. $conf->{node}{$node}{port}\n";
		print " - Node $node MAC: ... $conf->{node}{$node}{mac}\n";
		print " - Node $node Netmask: $conf->{node}{$node}{ip}\n";
		print " - Node $node Gateway: $conf->{node}{$node}{ip}\n";
	}
	do_exit($conf, $log, 0);
}


###############################################################################
# Private functions below here.                                               #
###############################################################################

### Contributed by Shaun Fryer and Viktor Pavlenko by way of TPM.
# This is a helper to the above '_add_href' method. It is called each time a
# new string is to be created as a new hash key in the passed hash reference.
sub _add_hash_reference
{
	my $href1=shift;
	my $href2=shift;
	
	for my $key (keys %$href2)
	{
		if (ref $href1->{$key} eq 'HASH')
		{
			_add_hash_reference($href1->{$key}, $href2->{$key});
		}
		else
		{
			$href1->{$key}=$href2->{$key};
		}
	}
}

### Contributed by Shaun Fryer and Viktor Pavlenko by way of TPM.
# This takes a string with double-colon seperators and divides on those
# double-colons to create a hash reference where each element is a hash key.
sub _make_hash_reference
{
	my $href=shift;
	my $key_string=shift;
	my $value=shift;
# 	print "variable: [$key_string], value: [$value]\n";
	
	my $chomp_root=0;
	if ($chomp_root) { $key_string=~s/\w+:://; }
	
	my @keys = split /::/, $key_string;
	my $last_key = pop @keys;
	my $_href = {};
	$_href->{$last_key}=$value;
	while (my $key = pop @keys)
	{
		my $elem = {};
		$elem->{$key} = $_href;
		$_href = $elem;
	}
	_add_hash_reference($href, $_href);
}

1;
`Input, advice, complaints and meanderings all welcome!`
`Digimer`	`digimer@alteeve.ca`	`https://alteeve.ca/w`	`legal stuff:`
`All info is provided "As-Is". Do not use anything here unless you are willing and able to take resposibility for your own actions. © 1997-2013`
Naming credits go to Christopher Olah!
In memory of Kettle, Tonia, Josh, Leah and Harvey. In special memory of Hannah, Jack and Riley.
Fence na.lib: Difference between revisions

Revision as of 06:26, 8 March 2010

Navigation menu

Search