Fence na.lib: Difference between revisions

From Alteeve Wiki
Jump to navigation Jump to search
No edit summary
No edit summary
 
(2 intermediate revisions by the same user not shown)
Line 1: Line 1:
{{na_header}}
{{na_header}}


This is the fence agent's function library that exists in <span class="code">/etc/na/</span>.
This is the fence agent's function library that exists in <span class="code">/etc/fence_na/</span>.


* Download the [http://nodeassassin.org/files/fence_na/fence_na.lib source code].
<source lang="perl">
<source lang="perl">
#!/usr/bin/perl
#!/usr/bin/perl
Line 10: Line 11:
# Node Assassin - Fence Agent
# Node Assassin - Fence Agent
# Digimer; digimer@alteeve.com
# Digimer; digimer@alteeve.com
# Apr. 06, 2010.
# Jun. 27, 2010.
# Version: 1.1.4
# Version: 1.1.5
#
#
# This software is released under the GPL v2. See the LICENSE file for a copy
# of the GPL v2.




# This connects to a Node Assassin and puts the handle in '$conf->{'system'}{handle}.
# This connects to a Node Assassin and puts the handle in
# $conf->{'system'}{handle}.
sub connect_to_na
sub connect_to_na
{
{
Line 29: Line 33:
if ($conf->{na}{handle}->errmsg)
if ($conf->{na}{handle}->errmsg)
{
{
record($conf, $log, "Connection to Node Assassin: [$conf->{na}{ipaddr}] failed.\nError was: [".$conf->{na}{handle}->errmsg."]\n");
record($conf, $log, "Connection to Node Assassin: [$conf->{na}{ipaddr}] failed.\nError was: [".$conf->{na}{handle}->errmsg."]\n", 1);
$conf->{na}{handle}="";
$conf->{na}{handle}="";
};
};
# record ($conf, $log, "na::handle: [$conf->{na}{handle}]\n");
record($conf, $log, "na::handle: [$conf->{na}{handle}]\n") if $conf->{'system'}{debug};
return ($conf->{na}{handle});
return ($conf->{na}{handle});
}
}


# This handles the actual execution of the action plan.
# This handles the actual execution of an action plan.
sub do_actions
sub do_actions
{
{
my ($conf, $log)=@_;
my ($conf, $log)=@_;
# In the next step, when a 'check' is seen, the port is analyzed and an exit
# In the next step, when a 'check' is seen, the node's power feed is
# status is stored here. Exits 0, 1 and 2 have special meaning, so I default to
# checked and an exit status is stored here. Exits 0, 1 and 2 have
# 9.
# special meaning, so I default to 9 as it has no meaning to the
# FenceAgentAPI.
my $exit_code=9;
my $exit_code=9;
# Process the orders.
# Process the orders.
print "Processing: [$conf->{'system'}{call_order}]\n";
foreach my $order (split/,/, $conf->{'system'}{call_order})
foreach my $order (split/,/, $conf->{'system'}{call_order})
{
{
# record($conf, $log, "Calling: [$order]\n");
record($conf, $log, "Calling: [$order]\n") if $conf->{'system'}{debug};
# Handle a 'release_all' call.
# Handle a 'release_all' call.
Line 66: Line 72:
}
}
# handle a sleep request.
# handle a sleep request. This defaults to one second when no
# integer was included.
if ($order=~/^sleep/)
if ($order=~/^sleep/)
{
{
Line 90: Line 97:
# Handle a status check via Node Assassin.
# Handle a status check via Node Assassin.
# record($conf, $log, "order: [$order]\n");
record($conf, $log, "order: [$order]\n") if $conf->{'system'}{debug};
if ($order=~/(\d\d):(\D+)/)
if ($order=~/(\d\d):(\D+)/)
{
{
my $node=$1;
my $node=$1;
my $check=$2;
my $check=$2;
# Verify the state of the port.
# Verify the state of the port.
# record($conf, $log, "Status check on node: [$node] -> [$check]\n");
record($conf, $log, "Status check on node: [$node] -> [$check]\n") if $conf->{'system'}{debug};
# Get the state.
# Get the state.
my $state=get_states($conf, $log);
my $states=get_states($conf, $log);
if ($state == 1)
if ($states == 1)
{
{
# I had a connection problem. Exit with error
# I had a connection problem. Exit with error
Line 109: Line 117:
# Make the states a bit easier to type.
# Make the states a bit easier to type.
my $power_state=$state->{$node}{power_state};
my $power_state=$states->{$node}{power_state};
my $reset_state=$state->{$node}{reset_state};
my $reset_state=$states->{$node}{reset_state};
my $feed_state=$state->{$node}{feed_state};
my $feed_state=$states->{$node}{feed_state};
# Return the status of the requested node.
# Return the status of the requested node.
# record($conf, $log, "Node Assassin: [#$conf->{'system'}{na_id}/$conf->{na}{na_name}], Node: [$node] Power/Reset/Feed states: [$power_state/$reset_state/$feed_state]\n");
record($conf, $log, "Node Assassin: [#$conf->{'system'}{na_id}/$conf->{na}{na_name}], Node: [$node] Power/Reset/Feed states: [$power_state/$reset_state/$feed_state]\n") if $conf->{'system'}{debug};
if ($check eq "check")
if ($check eq "check")
{
{
Line 149: Line 157:
if (not $feed_state)
if (not $feed_state)
{
{
record($conf, $log, "Node: [$node] failed to boot after a successful power off during a reboot action.\n");
record($conf, $log, "\nWARNING: Node: [$node] failed to boot after a successful power off during a\n", 1);
record($conf, $log, "This is a non-critical error as the node was fenced successfully but may\n");
record($conf, $log, "WARNING: reboot action. This is a non-critical error as the node was fenced\n", 1);
record($conf, $log, "indicate a hardware failure with the node or with Node Assassin itself.\n");
record($conf, $log, "WARNING: successfully but may indicate a hardware failure with the node or\n", 1);
record($conf, $log, "WARNING: with Node Assassin itself.\n\n", 1);
}
}
}
}
Line 172: Line 181:
record($conf, $log, "$line\n");
record($conf, $log, "$line\n");
}
}
record($conf, $log, "Call complete.\n");
record($conf, $log, "Call complete.\n") if $conf->{'system'}{debug};
}
}
Line 184: Line 193:
$exit_status=9 if not defined $exit_status;
$exit_status=9 if not defined $exit_status;
# Close the Node Assassin and log file handle, if they exist.
$conf->{na}{handle}->close() if $conf->{na}{handle};
$conf->{na}{handle}->close() if $conf->{na}{handle};
$log->close() if $log;
$log->close() if $log;
exit ($exit_status);
exit ($exit_status);
}
}
Line 196: Line 207:
# Create the hash reference to store the states in.
# Create the hash reference to store the states in.
my $state={};
my $states={};
# Call '00:0' to get the states. If it fails, return 1 as per
# Call '00:0' to get the states. If it fails, return 1 as per
Line 212: Line 223:
# Convert the state to a simple on/off.
# Convert the state to a simple on/off.
# Store the state.
# Store the state.
$state->{$this_node}{power_state}=$power_state;
$states->{$this_node}{power_state}=$power_state;
$state->{$this_node}{reset_state}=$reset_state;
$states->{$this_node}{reset_state}=$reset_state;
$state->{$this_node}{feed_state}=$feed_state;
$states->{$this_node}{feed_state}=$feed_state;
# record($conf, $log, "Node: [$this_node], Power State: [$state->{$this_node}{power_state}], Reset State: [$state->{$this_node}{reset_state}], Feed State: [$state->{$this_node}{feed_state}].\n");
record($conf, $log, "Node: [$this_node], Power State: [$states->{$this_node}{power_state}], Reset State: [$states->{$this_node}{reset_state}], Feed State: [$states->{$this_node}{feed_state}].\n") if $conf->{'system'}{debug};
}
}
# Return the hash reference.
# Return the hash reference.
return ($state);
return ($states);
}
}


Line 226: Line 237:
{
{
my ($conf, $log)=@_;
my ($conf, $log)=@_;
my $msg=q`
NODE ASSASSIN FENCING AGENT
This program interfaces with one or more Node Assassin fence devices to
set one or more nodes to one or more states.
USAGE:
Arguments are read from STDIN as 'variable=value' pairs, one pair per
new line. This is the method used by 'fenced'.
For testing, arguments may be passed as command line arguments.
./fence_na <options>
OVERVIEW:
This takes arguments that defined which Node Assassin to call, what
node to work on and what action should be taken. These arguments are
defined by the FenceAgentAPI. These actions are then translated into
a set of Node Assassin states which then get passed to the device.
NODE ASSASSIN "PORTS":
The FenceAgentAPI refers to "ports" while the Node Assassin refers to
"nodes". These two terms are thus used interchangably. The reason for
the difference is that, internally, Node Assassin maps each node to
three "ports"; The node's power and reset buttons plus the node's power
feed.
Each node in the Node Assassin can be set to one of four states
described in the next section.
NODE ASSASSIN STATES:
Each Node Assassin port can be set to one of four states. They are:
0
This will release the fence on the specified NA node's power
and reset ports.
1
This state will fence the node specified by the "port"
argument. The fence will remain active until released. Further,
the fence will "lock out" the node's front-panel buttons until
the fence is released.
2
This will fence the node for one second. This is meant to
provide a mechanism to boot or initiate a clean shut down of a
node.
3
This state will fence the node for five seconds to 30 seconds,
depending on how long it takes for the power feed to drop low.
This is meant to provide a way to force crashed nodes to power
off without the front-panel lock.
OPTIONS:
Which options are valid depends on how the arguments are passed in. The
"main" method of passing arguments is via STDIN as a series of
'varible=value' pairs, one per line. All arguments accepted this way
have command-line arguments passed in via single-dashes switches. Each
option below shows either calling method's arguments, where available.
-h
Prints this help message.
-v, -V
Prints the version of this fence agent and then the version of
any configured, reachable Node Assassins.
-q
Tells this fence agent to only print critical messages.
-a <val>, ipaddr=<val>
Tells the fence agent which Node Assassin to use.
NOTE! The name or IP addresses *MUST* match a name or IP
      address defined in '/etc/na/fence_na.conf'! Specifically,
      it must match one of the 'node::X::ipaddr' entries where
      'X' is the ID of the Node Assassin.
-l <val>, login=<val>
This is the login name used to connect to this fence agent. It
must match the value 'system::username' in
'/etc/na/fence_na.conf'.
-p <val>, passwd=<val>
This is the login password used to connect to this fence agent.
It must match the value 'system::password' in
'/etc/na/fence_na.conf'.
-n <val>, port=<val>
This tells the fence agent which node to act on.
NOTE! Please do not confuse this with the Node Assassin's
      internal concept of a port.
-o <val>, action=<val>
This is the action (formerly 'option') to perform on the
selected node. Valid actions and how Node Assassin implements
them are:
on
1. Checks to see if the node is already on. If it is,
  it exits with error code 0 (success).
2. Checks to see if the node is fenced and, if so,
  releases it.
3. Set's the node to state 2 (one second fence of the
  power switch) to initiate boot.
4. Waits one second and then checks that the node feed
  state to ensure that the node is powered on.
off
1. The node is set to state 1, Fenced. Specifically;
2. The Node's reset pin is fenced for one second to
  immediately disable the node.
3. The reset pin is unfenced for one second.
4. The power switch is fenced for five seconds and the
  feed is checked. If it is still high, the Node
  Assassin will wait an additional 25 seconds and
  check the feed state again. If it is still on, an
  error is generated and the fence agent's exit status
  is set to 1.
5. The reset is fenced. At this point, the node's front
  panel buttons are disabled, preventing accidental
  booting of the node before the fence is released.
reboot
Note: This will do a hard reboot! Do not use when a
      normal restart would suffice.
1. This sets the node to state 1 (see "off" above) and
  pauses for one second.
2. The node is set to state 0, releasing the fence and
  pauses for one second.
3. The node's feed is checked to ensure that the node
  is off.
4. The node is set to state 2, booting the node and
  waits one second.
5. The node's feed is checked to see if the node is
  booting.
status
The associated node's power feed is checked and it's
status is returned; 1 for on, 0 for off/disconnected.
monitor, list
Unknown what is to be done under these actions.
###############################################################
NOTE: The following states are supported by Node Assassin only.
      These states are not used by the 'fenced' daemon and are
      provided here as a convenience for the user.
     
      Any commands named "*_all" ignore the node value, if set.
###############################################################
release
1. Power pin is opened (fence released)
2. Reset pin is opened (fence released)
release_all
All closed ports on the specified Node Assassin are
opened (fence released).
fence_all
All nodes are set to state 2 (see "off" above).
boot_all
The power feeds of all nodes on the Node Assassin are
checked. Any found to be off or disconnected are set to
state 2 to initiate boot.
shutdown_all
The power feeds of all nodes on the Node Assassin are
checked. Any found to be on are set to state 2 to
initiate an ACPI-induced soft shutdown.
forcedown_all
The power feeds of all nodes on the Node Assassin are
checked. Any found to be on are set to state 3. This
state is similar to state 1 (see "off" above), except
that the node's front-panel switches are not disabled.
-S <path>, passwd_script=<path> (Not Implemented)
This is the path to a script that returns the password to use
when running this fence agent.
EXAMPLES:
To simulate how 'fenced' calls the script, create a text file called
'args.txt' containing:
-----------------------------------------------------------------------
# Test file used as input for the NA fence agent.
ipaddr=ariel.alteeve.com
port=02
login=motoko
passwd=secret
action=off
-----------------------------------------------------------------------
Now use 'cat' the pipe the contents into the fence agent:
cat args.txt | ./fence_na
This will call the 'off' function against node #02 connected to the
Node Assassin at 'ariel.alteeve.com', fencing it. Change the action
line to 'action=on' and re-run the script again to release the fence
and boot the node.
To duplicate the same call using command line arguments:
./fence_na -a motoko.alteeve.com -n 2 -l motoko -p secret -o off
NOTE:
An internal pager is not implemented. You may wish to run this via
'less':
./fence_na | less
# Point the user at the man page.
 
print "See 'man fence_na' for instructions on using the Node Assassin Fence Agent.\n";
UPDATED:
 
Apr. 6, 2010 Digimer (digimer@alteeve.com)
`;
print $msg;
do_exit($conf, $log, 0);
do_exit($conf, $log, 0);
Line 484: Line 249:
{
{
my ($conf, $log, $na_id)=@_;
my ($conf, $log, $na_id)=@_;
record ($conf, $log, "   - ERROR: Unable to query Node Assassin: [$conf->{na}{$na_id}{na_name}]!\n");
record ($conf, $log, "\nERROR: Unable to query Node Assassin: [$conf->{na}{$na_id}{na_name}]!\n", 1);
record ($conf, $log, "   - ERROR: Please check that it is connected, that the information in\n");
record ($conf, $log, "ERROR: Please check that it is connected, that the information in\n", 1);
record ($conf, $log, "   - ERROR: '/etc/na/fence_na.conf' is accurate and that the proper configuration\n");
record ($conf, $log, "ERROR: '/etc/na/fence_na.conf' is accurate and that the proper configuration\n", 1);
record ($conf, $log, "   - ERROR: has be uploaded to the device.\n");
record ($conf, $log, "ERROR: has be uploaded to the device.\n\n", 1);
return (0);
return (0);
}
}
Line 495: Line 260:
{
{
my ($conf, $log)=@_;
my ($conf, $log)=@_;
record($conf, $log, "In the 'process_action' function.\n");
record($conf, $log, "In the 'process_action' function.\n") if $conf->{'system'}{debug};
# Make this more readable.
# Make this more readable.
my $na_id=$conf->{'system'}{na_id};
my $na_id=$conf->{'system'}{na_id};
my $action=$conf->{na}{action};
my $action=$conf->{na}{action};
my $port=$conf->{na}{port};
my $node=$conf->{na}{port};
# record($conf, $log, "na_id: [$na_id], action: [$action], port: [$port]\n");
record($conf, $log, "na_id: [$na_id], action: [$action], port: [$node]\n") if $conf->{'system'}{debug};
# The following actions require a port. Error if I don't have one.
# The following actions require a port. Error if I don't have one.
if ($port eq "00")
if ($node eq "00")
{
{
# These are the incompatible calls.
# These are the incompatible calls.
if (($action eq "on") || ($action eq "off") || ($action eq "reboot") || ($action eq "status"))
if (($action eq "on") || ($action eq "off") || ($action eq "reboot") || ($action eq "status"))
{
{
record($conf, $log, "\nERROR! Action request: [$action] requires a port number! I got: [$port] which does not seem to be valid.\n\n");
record($conf, $log, "\nERROR! Action request: [$action] requires a port number!\n", 1) if $conf->{'system'}{debug};
record($conf, $log, "ERROR: I got: [$node] which does not seem to be valid.\n\n", 1);
do_exit($conf, $log, 9);
do_exit($conf, $log, 9);
}
}
Line 519: Line 285:
{
{
# Release the fence, if fenced, and boot the node.
# Release the fence, if fenced, and boot the node.
$state=get_states($conf, $log);
$states=get_states($conf, $log);
my $power_state=$state->{$port}{power_state};
my $power_state=$states->{$node}{power_state};
my $reset_state=$state->{$port}{reset_state};
my $reset_state=$states->{$node}{reset_state};
my $feed_state=$state->{$port}{feed_state};
my $feed_state=$states->{$node}{feed_state};
if ($feed_state)
if ($feed_state)
{
{
# Node is already running.
# Node is already running.
record($conf, $log, "Asked to turn on node: [$port], but it's already running. Exiting with code 0.\n");
record($conf, $log, "Asked to turn on node: [$node], but it's already running.\n");
do_exit($conf, $log, 0);
do_exit($conf, $log, 0);
}
}
Line 532: Line 298:
{
{
# Node was fenced, release it first.
# Node was fenced, release it first.
$conf->{'system'}{call_order}="$port:0,sleep,";
$conf->{'system'}{call_order}="$node:0,sleep,";
}
}
$conf->{'system'}{call_order}.="$port:2,sleep,$port:on";
$conf->{'system'}{call_order}.="$node:2,sleep,$node:on";
}
}
elsif ($action eq "off")
elsif ($action eq "off")
{
{
# Fence the node.
# Fence the node.
$conf->{'system'}{call_order}="$port:1,sleep,$port:off";
$conf->{'system'}{call_order}="$node:1,sleep,$node:off";
}
}
elsif ($action eq "reboot")
elsif ($action eq "reboot")
{
{
# I don't do this gracefully because the API says this should
# I don't do this gracefully because the API says this should
# be an "off -> on" process, and "off" is fence...
# be an 'off' -> 'on' process, and 'off' is fence...
$conf->{'system'}{call_order}="$port:1,sleep,$port:0,sleep,$port:off,$port:2,sleep,$port:on";
$conf->{'system'}{call_order}="$node:1,sleep,$node:0,sleep,$node:off,$node:2,sleep,$node:on";
}
}
elsif ($action eq "status")
elsif ($action eq "status")
{
{
# This checks the node's power feed.
# This checks the node's power feed.
$conf->{'system'}{call_order}="$port:check";
$conf->{'system'}{call_order}="$node:check";
}
elsif (($action eq "monitor") or ($action eq "list"))
{
# Not sure what to do here.
}
}
### ALL ACTIONS BELOW HERE ARE OUTSIDE OF THE FenceAgentAPI!
### ALL ACTIONS BELOW HERE ARE OUTSIDE OF THE FenceAgentAPI!
Line 560: Line 322:
{
{
# Release the given node without booting it.
# Release the given node without booting it.
$conf->{'system'}{call_order}="$port:0";
$conf->{'system'}{call_order}="$node:0";
}
}
elsif ($action eq "release_all")
elsif ($action eq "release_all")
Line 571: Line 333:
# Fence all ports.
# Fence all ports.
$conf->{'system'}{call_order}="fence_all";
$conf->{'system'}{call_order}="fence_all";
}
elsif ($action eq "boot")
{
# Boot the specific node if it is off.
$states=get_states($conf, $log);
# Decide how, or if, to proceed based on the current state of
# each node.
$node=sprintf("%02d", $node);
my $power_state=$states->{$node}{power_state};
my $reset_state=$states->{$node}{reset_state};
my $feed_state=$states->{$node}{feed_state};
if (($power_state) || ($reset_state))
{
# Node was fenced, release first.
$conf->{'system'}{call_order}.="$node:0,sleep,";
}
if (not $feed_state)
{
# Boot the node.
$conf->{'system'}{call_order}.="$node:2,sleep,";
}
else
{
record($conf, $log, "WARNING: Node: [$node] seems to be already on, taking no action.\n", 1);
}
$conf->{'system'}{call_order}=~s/,$//;
}
}
elsif ($action eq "boot_all")
elsif ($action eq "boot_all")
{
{
# Boot all nodes that are off.
# Boot all nodes that are off.
$state=get_states($conf, $log);
$states=get_states($conf, $log);
# Decide how, or if, to proceed based on the current state of
# Decide how, or if, to proceed based on the current state of
Line 582: Line 371:
{
{
$node=sprintf("%02d", $node);
$node=sprintf("%02d", $node);
my $power_state=$state->{$node}{power_state};
my $power_state=$states->{$node}{power_state};
my $reset_state=$state->{$node}{reset_state};
my $reset_state=$states->{$node}{reset_state};
my $feed_state=$state->{$node}{feed_state};
my $feed_state=$states->{$node}{feed_state};
if (($power_state) || ($reset_state))
if (($power_state) || ($reset_state))
{
{
Line 595: Line 384:
$conf->{'system'}{call_order}.="$node:2,sleep,";
$conf->{'system'}{call_order}.="$node:2,sleep,";
}
}
}
$conf->{'system'}{call_order}=~s/,$//;
}
elsif ($action eq "shutdown")
{
# Shutdown a specific node that is on cleanly via ACPI.
$states=get_states($conf, $log);
$node=sprintf("%02d", $node);
my $feed_state=$states->{$node}{feed_state};
if ($feed_state)
{
# shutdown the node.
$conf->{'system'}{call_order}.="$node:2";
}
else
{
record($conf, $log, "WARNING: Node: [$node] seems to be already off, taking no action. Is the cable connected?\n", 1);
}
}
$conf->{'system'}{call_order}=~s/,$//;
$conf->{'system'}{call_order}=~s/,$//;
Line 601: Line 407:
{
{
# Shutdown all nodes that are on cleanly via ACPI.
# Shutdown all nodes that are on cleanly via ACPI.
$state=get_states($conf, $log);
$states=get_states($conf, $log);
# Decide how, or if, to proceed based on the current state of
# Decide how, or if, to proceed based on the current state of
Line 608: Line 414:
{
{
$node=sprintf("%02d", $node);
$node=sprintf("%02d", $node);
my $power_state=$state->{$node}{power_state};
my $power_state=$states->{$node}{power_state};
my $reset_state=$state->{$node}{reset_state};
my $reset_state=$states->{$node}{reset_state};
my $feed_state=$state->{$node}{feed_state};
my $feed_state=$states->{$node}{feed_state};
if ($feed_state)
if ($feed_state)
{
{
# Boot the node.
# Shutdown the node.
$conf->{'system'}{call_order}.="$node:2,sleep,";
$conf->{'system'}{call_order}.="$node:2,sleep,";
}
}
Line 623: Line 429:
# Shutdown all nodes that are on by holding the power button
# Shutdown all nodes that are on by holding the power button
# until they go down.
# until they go down.
$state=get_states($conf, $log);
$states=get_states($conf, $log);
# Decide how, or if, to proceed based on the current state of
# Decide how, or if, to proceed based on the current state of
Line 630: Line 436:
{
{
$node=sprintf("%02d", $node);
$node=sprintf("%02d", $node);
my $power_state=$state->{$node}{power_state};
my $power_state=$states->{$node}{power_state};
my $reset_state=$state->{$node}{reset_state};
my $reset_state=$states->{$node}{reset_state};
my $feed_state=$state->{$node}{feed_state};
my $feed_state=$states->{$node}{feed_state};
if ($feed_state)
if ($feed_state)
{
{
Line 643: Line 449:
else
else
{
{
record($conf, $log, "Unknown action request: [$action]!\n");
record($conf, $log, "\nERROR: Unknown action request: [$action]!\n\n", 1);
do_exit($conf, $log, 9);
do_exit($conf, $log, 9);
}
}
Line 659: Line 465:
my $read=IO::Handle->new();
my $read=IO::Handle->new();
my $shell_call="$conf->{'system'}{conf_file}";
my $shell_call="$conf->{'system'}{conf_file}";
# print "Shell call: [$shell_call]\n";
record($conf, $log, "Shell call: [$shell_call]\n") if $conf->{'system'}{debug};
open ($read, "<$shell_call") or die "Failed to read: [$shell_call], error was: $!\n";
open ($read, "<$shell_call") or die "Failed to read: [$shell_call], error was: $!\n";
while (<$read>)
while (<$read>)
Line 677: Line 483:
$val=~s/\s+$//;
$val=~s/\s+$//;
next if (not $var);
next if (not $var);
# print "Storing: [$var] = [$val]\n";
record($conf, $log, "Storing: [$var] = [$val]\n") if $conf->{'system'}{debug};
_make_hash_reference($conf, $var, $val);
_make_hash_reference($conf, $var, $val);
}
}
Line 689: Line 495:
{
{
my ($conf, $log, $bad)=@_;
my ($conf, $log, $bad)=@_;
# MADI: Remove this before release.
# record($conf, $log, "Got args:\n");
# Loop through the passed arguments, if any.
# Loop through the passed arguments, if any.
record($conf, $log, "Got args:\n") if $conf->{'system'}{debug};
my $set_next="";
my $set_next="";
foreach my $arg (@ARGV)
foreach my $arg (@ARGV)
{
{
# MADI: Remove this before release.
record($conf, $log, "[$arg]\n") if $conf->{'system'}{debug};
# record($conf, $log, "[$arg]\n");
$conf->{'system'}{got_cla}=1;
$conf->{'system'}{got_cla}=1;
Line 707: Line 510:
# It's set, use it's contents as the hash key.
# It's set, use it's contents as the hash key.
$conf->{na}{$set_next}=$arg;
$conf->{na}{$set_next}=$arg;
record($conf, $log, "Setting: 'na::$set_next': [$conf->{na}{$set_next}]\n") if $conf->{'system'}{debug};
# MADI: Remove this before release.
# record($conf, $log, "Setting: 'na::$set_next': [$conf->{na}{$set_next}]\n");
# Clear it now for the next go-round.
# Clear it now for the next go-round.
Line 724: Line 525:
# Print the version information and then exit.
# Print the version information and then exit.
$conf->{'system'}{version}=1;
$conf->{'system'}{version}=1;
# record($conf,$log,"Setting version\n");
record($conf,$log,"Setting version\n") if $conf->{'system'}{debug};
}
}
elsif ($arg=~/-q/)
elsif ($arg=~/-q/)
Line 730: Line 531:
# Suppress all non-critical messages from STDOUT.
# Suppress all non-critical messages from STDOUT.
$conf->{'system'}{quiet}=1;
$conf->{'system'}{quiet}=1;
}
elsif ($arg=~/-d/)
{
# Enable debug mode.
$conf->{'system'}{debug}=1;
}
}
elsif ($arg=~/^-/)
elsif ($arg=~/^-/)
Line 741: Line 547:
# Node Assassin to call.
# Node Assassin to call.
$set_next="ipaddr";
$set_next="ipaddr";
# record ($conf, $log, "Next argument will be stored in: [$set_next]\n");
record ($conf, $log, "Next argument will be stored in: [$set_next]\n") if $conf->{'system'}{debug};
}
}
elsif ($arg eq "l")
elsif ($arg eq "l")
Line 747: Line 553:
# This is the login name.
# This is the login name.
$set_next="login";
$set_next="login";
# record ($conf, $log, "Next argument will be stored in: [$set_next]\n");
record ($conf, $log, "Next argument will be stored in: [$set_next]\n") if $conf->{'system'}{debug};
}
}
elsif ($arg eq "p")
elsif ($arg eq "p")
Line 756: Line 562:
# contents will replace# this value.
# contents will replace# this value.
$set_next="passwd";
$set_next="passwd";
# record ($conf, $log, "Next argument will be stored in: [$set_next]\n");
record ($conf, $log, "Next argument will be stored in: [$set_next]\n") if $conf->{'system'}{debug};
}
}
elsif ($arg eq "n")
elsif ($arg eq "n")
Line 762: Line 568:
# This is the node to work on.
# This is the node to work on.
$set_next="port";
$set_next="port";
# record ($conf, $log, "Next argument will be stored in: [$set_next]\n");
record ($conf, $log, "Next argument will be stored in: [$set_next]\n") if $conf->{'system'}{debug};
}
}
elsif ($arg eq "o")
elsif ($arg eq "o")
Line 768: Line 574:
# This is the action to take.
# This is the action to take.
$set_next="action";
$set_next="action";
# record ($conf, $log, "Next argument will be stored in: [$set_next]\n");
record ($conf, $log, "Next argument will be stored in: [$set_next]\n") if $conf->{'system'}{debug};
}
}
elsif ($arg eq "S")
elsif ($arg eq "S")
Line 777: Line 583:
# the password to STDOUT.
# the password to STDOUT.
$set_next="passwd_script";
$set_next="passwd_script";
# record ($conf, $log, "Next argument will be stored in: [$set_next]\n");
record ($conf, $log, "Next argument will be stored in: [$set_next]\n") if $conf->{'system'}{debug};
}
}
}
}
Line 784: Line 590:
### MADI: I might want to pick up arguments via multiple lines.
### MADI: I might want to pick up arguments via multiple lines.
# Bad argument.
# Bad argument.
record($conf, $log, "Argument: [$arg] is not valid!\n");
record($conf, $log, "\nERROR: Argument: [$arg] is not valid!\n");
record($conf, $log, "Please run './fence_na --help' to see a list of valid arguments.\n");
record($conf, $log, "ERROR: Please run: [man fence_na] to see a list of valid arguments.\n\n");
$bad=1;
$bad=1;
}
}
Line 806: Line 612:
# Record the line for now, but comment this out before release.
# Record the line for now, but comment this out before release.
# record ($conf, $log, "Processing option line: [$option]\n");
record ($conf, $log, "Processing option line: [$option]\n") if $conf->{'system'}{debug};
# strip leading and trailing whitespace
# strip leading and trailing whitespace
Line 825: Line 631:
# Record the line for now, but comment this out before release.
# Record the line for now, but comment this out before release.
# record ($conf, $log, "Name: [$name], value: [$value].\n");
record ($conf, $log, "Name: [$name], value: [$value].\n") if $conf->{'system'}{debug};
# Set my variables depending on the veriable name.
# Set my variables depending on the veriable name.
Line 844: Line 650:
(undef, $value) = split /\s+/,$value;
(undef, $value) = split /\s+/,$value;
$conf->{na}{port}=$value;
$conf->{na}{port}=$value;
warn "Warning! The argument 'fm' is deprecated, use 'port' instead. Value: [$value] set for 'port'\n";
record($conf, $log, "Warning! The argument 'fm' is deprecated, use 'port' instead.\n", 1);
record($conf, $log, "Warning! Value: [$value] set for 'port'\n", 1);
}
}
else
else
{
{
# Port was already set, so simply ignore this.
# Port was already set, so simply ignore this.
warn "Warning! The argument 'fm' is deprecated, use 'port' instead. Value: [$value] ignored.\n";
record($conf, $log, "Warning! The argument 'fm' is deprecated, use 'port' instead.\n", 1);
record($conf, $log, "Warning! Value: [$value] ignored.\n", 1);
}
}
}
}
Line 871: Line 679:
# be replaced if 'login' is seen later.
# be replaced if 'login' is seen later.
$conf->{na}{login}=$value;
$conf->{na}{login}=$value;
warn "Warning! The argument 'name' is deprecated, use 'login' instead. Value: [$value] set for 'login'.\n";
record($conf, $log, "Warning! The argument 'name' is deprecated, use 'login' instead.\n", 1);
record($conf, $log, "Warning! Value: [$value] set for 'login'.\n", 1);
}
}
else
else
Line 877: Line 686:
# I've already seen the 'login' value so I will
# I've already seen the 'login' value so I will
# ignore this value.
# ignore this value.
warn "Warning! The argument 'name' is deprecated, use 'login' instead. Value: [$value] ignored.\n";
record($conf, $log, "Warning! The argument 'name' is deprecated, use 'login' instead.\n", 1);
record($conf, $log, "Warning! Value: [$value] ignored.\n", 1);
}
}
}
}
Line 883: Line 693:
{
{
# 'option' is deprecated.
# 'option' is deprecated.
record($conf, $log, "Please use 'action', not 'option', as the later is deprecated.\n") if $name eq "option";
record($conf, $log, "Please use 'action', not 'option', as the later is deprecated.\n", 1) if $name eq "option";
$conf->{na}{action}=$value;
$conf->{na}{action}=$value;
}
}
Line 902: Line 712:
# This sets the port number to act on.
# This sets the port number to act on.
$conf->{na}{port}=$value;
$conf->{na}{port}=$value;
}  
}
elsif ($name eq "nodename")
{
# This is passed by 'fenced' via 'cluster.conf' as of
# cluster version 3, but it's not yet documented.
$conf->{'system'}{nodename}=$value;
}
elsif ($name eq "quiet")
elsif ($name eq "quiet")
{
{
Line 908: Line 724:
# custom argument to supress output to STDOUT.
# custom argument to supress output to STDOUT.
$conf->{'system'}{quiet}=1;
$conf->{'system'}{quiet}=1;
}  
}
else
else
{
{
warn "Illegal name in option: [$option] at line: [$line_count]\n";
record($conf, $log, "\nERROR: Illegal name in option: [$option] at line: [$line_count]\n\n", 1);
$bad=1;
# 'rohara' from #linux-cluster suggested it's better to
# simply ignore unknown input, as that is the behaviour
# the fenced authors expect.
#$bad=1;
}
}
}
}
Line 921: Line 740:
sub record
sub record
{
{
my ($conf, $log, $msg)=@_;
my ($conf, $log, $msg, $critical)=@_;
$critical=0 if not $critical;
# The log file gets everything.
print $log $msg;
print $log $msg;
print $msg if not $conf->{'system'}{quiet};
print $msg if not $conf->{'system'}{quiet};
# Critical messages have to print, so this ensure that it gets out
# when 'quiet' is in use.
print $msg if (($critical) && ($conf->{'system'}{quiet}));
return(0);
return(0);
Line 939: Line 764:
{
{
$node=sprintf("%02d", $node).":$state";
$node=sprintf("%02d", $node).":$state";
record ($conf, $log, "Calling: [$node]\n");
record ($conf, $log, "Calling: [$node]\n") if $conf->{'system'}{debug};
my @set_state=$conf->{na}{handle}->cmd("$node");
my @set_state=$conf->{na}{handle}->cmd("$node");
foreach my $line (@set_state)
foreach my $line (@set_state)
Line 952: Line 777:
}
}


# When asked to 'monitor' or 'list', show a CSV of all nodes, node alias and
# When asked to 'monitor' or 'list', show a CSV of all nodes and their aliases,
# their state. The first two are standard, the third is not.
# when found in the config file.
sub show_list
sub show_list
{
{
my ($conf, $log, $mode)=@_;
my ($conf, $log)=@_;
$mode="list" if not "$mode";
record($conf, $log, "In 'show_list' function.\n") if $conf->{'system'}{debug};
record($conf, $log, "In 'show_list' function. Passed: mode: [$mode]\n");
# Get an up to date list of the ports.
# Get an up to date list of the ports.
my $state=get_states($conf, $log);
my $na_id=$conf->{'system'}{na_id};
my $na_id=$conf->{'system'}{na_id};
my $na_name=$conf->{na}{na_name};
record($conf, $log, "na_id: [$na_id], max_node: [$conf->{na}{max_nodes}]\n") if $conf->{'system'}{debug};
my $power_state="";
my $node_id=0;
for (1..$conf->{na}{max_nodes})
foreach my $port (sort {$a cmp $b} keys %{$state})
{
{
# record ($conf, $log, "Modulous of ($port % 2) is: [".($port % 2)."]\n");
my $node=$_;
if ($port % 2)
my $alias=$conf->{na}{$na_id}{alias}{$node} ? $conf->{na}{$na_id}{alias}{$node} : "--";
{
record ($conf, $log, "$node,$alias\n", 1);
$power_state=$state->{$port};
next;
}
else
{
$node_id++;
my $power_feed=$conf->{na}{$na_id}{alias}{$node_id} eq "unused" ? "--" : $power_state;
my $reset_state=$state->{$port};
my $line="";
if ($mode eq "list")
{
# Responding to an 'action=list' call.
$line=$conf->{na}{$na_id}{alias}{$node_id} ? "$node_id,$conf->{na}{$na_id}{alias}{$node_id}" : "$node_id,$na_name-$node_id";
$line.=",power_state:$power_state,reset_state:$reset_state,power_feed:$power_feed";
}
else
{
# Responding to a 'version' call.
my $name=$conf->{na}{$na_id}{alias}{$node_id} ? "$conf->{na}{$na_id}{alias}{$node_id}" : "$na_name-$node_id";
$line ="  - Node $node_id Details;\n";
$line.="    - Name: ..... $name\n";
$line.="     - Power Port: $power_state\n";
$line.="    - Reset Port: $reset_state\n";
$line.="    - Power Feed: $power_feed";
}
record($conf, $log, "$line\n");
$power_state="";
}
}
}
do_exit($conf, $log, 0);
do_exit($conf, $log, 0);
}
# This queries the Node Assassin and returns the state of the requested node.
sub show_state
{
my ($conf, $log)=@_;
my @state_out=$conf->{na}{handle}->cmd("00:0");
my $state="";
my $node=$conf->{na}{port};
foreach my $line (@state_out)
{
chomp;
my $line=$_;
my ($state)=($line=~/- Node $node: (.*?)/);
if ($state)
{
$state=lc($state)=~/fenced/ ? 2 : 0;
last;
}
}
# No state means something went wrong while talking to the Node
# Assassin.
$state=1 if (($state != 0) && ($state != 2));
# As per: http://sources.redhat.com/cluster/wiki/FenceAgentAPI
# The exit state must be:
# 0 = Node is running
# 1 = Failed to contact fence, unknown state.
# 2 = Node is fenced.
do_exit($conf, $log, $state);
}
}


Line 1,042: Line 805:
# Print the Fence Agent version first.
# Print the Fence Agent version first.
record ($conf, $log, "Fence Agent: ..... Node Assassin ver. $conf->{'system'}{agent_version}\n");
record ($conf, $log, "Fence Agent: ..... Node Assassin ver. $conf->{'system'}{agent_version}\n", 1);
record ($conf, $log, "Node Assassins: .. $conf->{'system'}{na_num}\n");
record ($conf, $log, "Node Assassins: .. $conf->{'system'}{na_num}\n", 1);
for my $na_id (1..$conf->{'system'}{na_num})
for my $na_id (1..$conf->{'system'}{na_num})
{
{
Line 1,064: Line 827:
($serial_number)=($line=~/\s(\S+)$/) if ($line =~ /Serial Number/i );
($serial_number)=($line=~/\s(\S+)$/) if ($line =~ /Serial Number/i );
($firmware_ver)=($line=~/\s(\S+)$/) if ($line =~ /NAOS Version/i );
($firmware_ver)=($line=~/\s(\S+)$/) if ($line =~ /NAOS Version/i );
# print "line: [$line]\n";
record($conf, $log, "line: [$line]\n") if $conf->{'system'}{debug};
}
}
}
}
Line 1,073: Line 836:
$firmware_ver="??";
$firmware_ver="??";
}
}
record ($conf, $log, " - Node Assassin $na_id\n");
record ($conf, $log, " - Node Assassin:  #$na_id\n", 1);
record ($conf, $log, "  - Name: ....... $conf->{na}{$na_id}{na_name}\n");
record ($conf, $log, "  - Name: ....... $conf->{na}{$na_id}{na_name}\n", 1);
record ($conf, $log, "  - IP Address: . $conf->{na}{$na_id}{ipaddr}\n");
record ($conf, $log, "  - IP Address: . $conf->{na}{$na_id}{ipaddr}\n", 1);
record ($conf, $log, "  - TCP Port: ... $conf->{na}{$na_id}{tcp_port}\n");
record ($conf, $log, "  - TCP Port: ... $conf->{na}{$na_id}{tcp_port}\n", 1);
record ($conf, $log, "  - MAC Address:  $conf->{na}{$na_id}{mac}\n");
record ($conf, $log, "  - MAC Address:  $conf->{na}{$na_id}{mac}\n", 1);
record ($conf, $log, "  - Netmask: .... $conf->{na}{$na_id}{netmask}\n");
record ($conf, $log, "  - Netmask: .... $conf->{na}{$na_id}{netmask}\n", 1);
record ($conf, $log, "  - Gateway: .... $conf->{na}{$na_id}{gateway}\n");
record ($conf, $log, "  - Gateway: .... $conf->{na}{$na_id}{gateway}\n", 1);
record ($conf, $log, "  - Serial #: ... $serial_number\n");
record ($conf, $log, "  - Serial #: ... $serial_number\n", 1);
record ($conf, $log, "  - Firmware: ... $firmware_ver\n");
record ($conf, $log, "  - Firmware: ... $firmware_ver\n", 1);
record ($conf, $log, "  - Build Date: . $build_date (yyyy-mm-dd)\n");
record ($conf, $log, "  - Build Date: . $build_date (yyyy-mm-dd)\n", 1);
record ($conf, $log, "  - Max Nodes: .. $conf->{na}{$na_id}{max_nodes}\n");
record ($conf, $log, "  - Max Nodes: .. $conf->{na}{$na_id}{max_nodes}\n", 1);
# Get the node states.
$states=get_states($conf, $log);
for (1..$conf->{na}{$na_id}{max_nodes})
{
my $node=sprintf("%02d", $_);
my $power_state=$states->{$node}{power_state};
my $reset_state=$states->{$node}{reset_state};
my $feed_state=$states->{$node}{feed_state};
record ($conf, $log, "    - Node $node: .. p[$power_state], r[$reset_state], f[$feed_state]\n", 1);
}
# Close the handle for the next loop.
if ($conf->{na}{handle})
if ($conf->{na}{handle})
{
{
show_list($conf, $log, "version");
show_list($conf, $log, "version", 1);
$conf->{na}{tcp_port}->close();
$conf->{na}{tcp_port}->close();
}
}

Latest revision as of 17:02, 25 August 2010

 Node Assassin :: Fence na.lib

This is the fence agent's function library that exists in /etc/fence_na/.

#!/usr/bin/perl
#
# This is the function library for the Node Assassin fence agent.
# 
# Node Assassin - Fence Agent
# Digimer; digimer@alteeve.com
# Jun. 27, 2010.
# Version: 1.1.5
#
# This software is released under the GPL v2. See the LICENSE file for a copy
# of the GPL v2.


# This connects to a Node Assassin and puts the handle in
# $conf->{'system'}{handle}.
sub connect_to_na
{
	my ($conf, $log)=@_;
	$conf->{na}{handle}=new Net::Telnet(
		Timeout	=>	10,
		Errmode	=>	'die',
		Port	=>	$conf->{na}{tcp_port},
		Prompt	=>	'/EOM$/',
		Errmode	=>	'return'
	) or do_exit($conf, $log, 1);
	$conf->{na}{handle}->open($conf->{na}{ipaddr});
	if ($conf->{na}{handle}->errmsg)
	{
		record($conf, $log, "Connection to Node Assassin: [$conf->{na}{ipaddr}] failed.\nError was: [".$conf->{na}{handle}->errmsg."]\n", 1);
		$conf->{na}{handle}="";
	};
	record($conf, $log, "na::handle: [$conf->{na}{handle}]\n") if $conf->{'system'}{debug};
	
	return ($conf->{na}{handle});
}

# This handles the actual execution of an action plan.
sub do_actions
{
	my ($conf, $log)=@_;
	
	# In the next step, when a 'check' is seen, the node's power feed is
	# checked and an exit status is stored here. Exits 0, 1 and 2 have
	# special meaning, so I default to 9 as it has no meaning to the
	# FenceAgentAPI.
	my $exit_code=9;
	
	# Process the orders.
	print "Processing: [$conf->{'system'}{call_order}]\n";
	foreach my $order (split/,/, $conf->{'system'}{call_order})
	{
		record($conf, $log, "Calling: [$order]\n") if $conf->{'system'}{debug};
		
		# Handle a 'release_all' call.
		if ($order eq "release_all")
		{
			set_all_state($conf, $log, 0);
			next;
		}
		
		# Handle a 'fence_all' call.
		if ($order eq "fence_all")
		{
			set_all_state($conf, $log, 1);
			next;
		}
		
		# handle a sleep request. This defaults to one second when no
		# integer was included.
		if ($order=~/^sleep/)
		{
			my $time=$order=~/sleep (\d+)/ ? $1 : 1;
			record ($conf, $log, "Sleeping: $time, ");
			if ($time == 1)
			{
				sleep 1;
				record ($conf, $log, "Done.\n");
			}
			else
			{
				while ($time)
				{
					$time--;
					sleep 1;
					record ($conf, $log, "$time, ") if $time > 1;
					record ($conf, $log, "$time. Done.\n") if $time == 1;
				}
			}
			next;
		}
		
		# Handle a status check via Node Assassin.
		record($conf, $log, "order: [$order]\n") if $conf->{'system'}{debug};
		if ($order=~/(\d\d):(\D+)/)
		{
			my $node=$1;
			my $check=$2;
			
			# Verify the state of the port.
			record($conf, $log, "Status check on node: [$node] -> [$check]\n") if $conf->{'system'}{debug};
			
			# Get the state.
			my $states=get_states($conf, $log);
			if ($states == 1)
			{
				# I had a connection problem. Exit with error
				# code '1' as per:
				# http://sources.redhat.com/cluster/wiki/FenceAgentAPI
				do_exit($conf, $log, 1);
			}
			
			# Make the states a bit easier to type.
			my $power_state=$states->{$node}{power_state};
			my $reset_state=$states->{$node}{reset_state};
			my $feed_state=$states->{$node}{feed_state};
			
			# Return the status of the requested node.
			record($conf, $log, "Node Assassin: [#$conf->{'system'}{na_id}/$conf->{na}{na_name}], Node: [$node] Power/Reset/Feed states: [$power_state/$reset_state/$feed_state]\n") if $conf->{'system'}{debug};
			if ($check eq "check")
			{
				# Return '2' if the node is off and '0' if it
				# is on.
				$exit_code=$feed_state ? 0 : 2;
			}
			elsif ($check eq "off")
			{
				# 'off' was called, make sure the node is now
				# off. This may be called by 'reboot' in which
				# case 'exit_code' will simply be over-written
				# when the final 'reboot' state check is called.
				$exit_code=$feed_state ? 1 : 0;
			}
			elsif ($check eq "on")
			{
				# 'on' was called, make sure the node is now
				# off.
				$exit_code=$feed_state ? 0 : 1;
			}
			elsif ($check eq "reboot")
			{
				# Make sure that 'exit_code' was set to '0' by
				# the earlier call. We checked again to make
				# sure the node came back up, and will log an
				# error if it didn't, but we return '0' just
				# the same, as per the API.
				if (not $exit_code)
				{
					# The power off portion worked. Check if the
					# node booted properly and record an error if
					# not.
					if (not $feed_state)
					{
						record($conf, $log, "\nWARNING: Node: [$node] failed to boot after a successful power off during a\n", 1);
						record($conf, $log, "WARNING: reboot action. This is a non-critical error as the node was fenced\n", 1);
						record($conf, $log, "WARNING: successfully but may indicate a hardware failure with the node or\n", 1);
						record($conf, $log, "WARNING: with Node Assassin itself.\n\n", 1);
					}
				}
				else
				{
					# The power off portion failed, exit with '1'.
					$exit_code=1;
				}
				$exit_code=$feed_state ? 0 : 1;
			}
			next;
		}
		
		# Handle a fence call.
		my @set_state=$conf->{na}{handle}->cmd("$order");
		foreach my $line (@set_state)
		{
			chomp $line;
			next if not $line;
			record($conf, $log, "$line\n");
		}
		record($conf, $log, "Call complete.\n") if $conf->{'system'}{debug};
	}
	
	return ($exit_code);
}

# This cleanly exits the agent.
sub do_exit
{
	($conf, $log, $exit_status)=@_;
	$exit_status=9 if not defined $exit_status;
	
	# Close the Node Assassin and log file handle, if they exist.
	$conf->{na}{handle}->close() if $conf->{na}{handle};
	$log->close() if $log;
	
	exit ($exit_status);
}

# This gets the states for the active node and returns the states in a hash
# reference.
sub get_states
{
	my ($conf, $log)=@_;
	
	# Create the hash reference to store the states in.
	my $states={};
	
	# Call '00:0' to get the states. If it fails, return 1 as per
	# FenceAgentAPI requirements.
	my @check_state=$conf->{na}{handle}->cmd("00:0") or return(1);
	
	# Loop through the output.
	foreach my $line (@check_state)
	{
		# Chomp the newline off and then pull the port and state out.
		chomp $line;
		my ($this_node, $power_state, $reset_state, $feed_state)=($line=~/^- Node (\d+): P(\d+), R(\d+), F(\d+)$/);
		# Skip if this isn't a status line.
		next if not $this_node;
		# Convert the state to a simple on/off.
		# Store the state.
		$states->{$this_node}{power_state}=$power_state;
		$states->{$this_node}{reset_state}=$reset_state;
		$states->{$this_node}{feed_state}=$feed_state;
		record($conf, $log, "Node: [$this_node], Power State: [$states->{$this_node}{power_state}], Reset State: [$states->{$this_node}{reset_state}], Feed State: [$states->{$this_node}{feed_state}].\n") if $conf->{'system'}{debug};
	}
	
	# Return the hash reference.
	return ($states);
}

# This returns the 'help' message.
sub help
{
	my ($conf, $log)=@_;
	
	# Point the user at the man page.
	print "See 'man fence_na' for instructions on using the Node Assassin Fence Agent.\n";
	
	do_exit($conf, $log, 0);
}

# This error message is printed when there was a connection problem with a
# given Node Assassin.
sub no_connection_error
{
	my ($conf, $log, $na_id)=@_;
	record ($conf, $log, "\nERROR: Unable to query Node Assassin: [$conf->{na}{$na_id}{na_name}]!\n", 1);
	record ($conf, $log, "ERROR: Please check that it is connected, that the information in\n", 1);
	record ($conf, $log, "ERROR: '/etc/na/fence_na.conf' is accurate and that the proper configuration\n", 1);
	record ($conf, $log, "ERROR: has be uploaded to the device.\n\n", 1);
	return (0);
}

# This handles the actual actions.
sub process_action
{
	my ($conf, $log)=@_;
	record($conf, $log, "In the 'process_action' function.\n") if $conf->{'system'}{debug};
	
	# Make this more readable.
	my $na_id=$conf->{'system'}{na_id};
	my $action=$conf->{na}{action};
	my $node=$conf->{na}{port};
	record($conf, $log, "na_id: [$na_id], action: [$action], port: [$node]\n") if $conf->{'system'}{debug};
	
	# The following actions require a port. Error if I don't have one.
	if ($node eq "00")
	{
		# These are the incompatible calls.
		if (($action eq "on") || ($action eq "off") || ($action eq "reboot") || ($action eq "status"))
		{
			record($conf, $log, "\nERROR! Action request: [$action] requires a port number!\n", 1) if $conf->{'system'}{debug};
			record($conf, $log, "ERROR: I got: [$node] which does not seem to be valid.\n\n", 1);
			do_exit($conf, $log, 9);
		}
	}
	
	# Make sure my call order is clear.
	$conf->{'system'}{call_order}="";
	if ($action eq "on")
	{
		# Release the fence, if fenced, and boot the node.
		$states=get_states($conf, $log);
		my $power_state=$states->{$node}{power_state};
		my $reset_state=$states->{$node}{reset_state};
		my $feed_state=$states->{$node}{feed_state};
		if ($feed_state)
		{
			# Node is already running.
			record($conf, $log, "Asked to turn on node: [$node], but it's already running.\n");
			do_exit($conf, $log, 0);
		}
		elsif (($power_state) || ($reset_state))
		{
			# Node was fenced, release it first.
			$conf->{'system'}{call_order}="$node:0,sleep,";
		}
		$conf->{'system'}{call_order}.="$node:2,sleep,$node:on";
	}
	elsif ($action eq "off")
	{
		# Fence the node.
		$conf->{'system'}{call_order}="$node:1,sleep,$node:off";
	}
	elsif ($action eq "reboot")
	{
		# I don't do this gracefully because the API says this should
		# be an 'off' -> 'on' process, and 'off' is fence...
		$conf->{'system'}{call_order}="$node:1,sleep,$node:0,sleep,$node:off,$node:2,sleep,$node:on";
	}
	elsif ($action eq "status")
	{
		# This checks the node's power feed.
		$conf->{'system'}{call_order}="$node:check";
	}
	### ALL ACTIONS BELOW HERE ARE OUTSIDE OF THE FenceAgentAPI!
	elsif ($action eq "release")
	{
		# Release the given node without booting it.
		$conf->{'system'}{call_order}="$node:0";
	}
	elsif ($action eq "release_all")
	{
		# Release all ports.
		$conf->{'system'}{call_order}="release_all";
	}
	elsif ($action eq "fence_all")
	{
		# Fence all ports.
		$conf->{'system'}{call_order}="fence_all";
	}
	elsif ($action eq "boot")
	{
		# Boot the specific node if it is off.
		$states=get_states($conf, $log);
		
		# Decide how, or if, to proceed based on the current state of
		# each node.
		$node=sprintf("%02d", $node);
		my $power_state=$states->{$node}{power_state};
		my $reset_state=$states->{$node}{reset_state};
		my $feed_state=$states->{$node}{feed_state};
		if (($power_state) || ($reset_state))
		{
			# Node was fenced, release first.
			$conf->{'system'}{call_order}.="$node:0,sleep,";
		}
		if (not $feed_state)
		{
			# Boot the node.
			$conf->{'system'}{call_order}.="$node:2,sleep,";
		}
		else
		{
			record($conf, $log, "WARNING: Node: [$node] seems to be already on, taking no action.\n", 1);
		}
		$conf->{'system'}{call_order}=~s/,$//;
	}
	elsif ($action eq "boot_all")
	{
		# Boot all nodes that are off.
		$states=get_states($conf, $log);
		
		# Decide how, or if, to proceed based on the current state of
		# each node.
		foreach my $node (1..$conf->{na}{max_nodes})
		{
			$node=sprintf("%02d", $node);
			my $power_state=$states->{$node}{power_state};
			my $reset_state=$states->{$node}{reset_state};
			my $feed_state=$states->{$node}{feed_state};
			if (($power_state) || ($reset_state))
			{
				# Node was fenced, release first.
				$conf->{'system'}{call_order}.="$node:0,sleep,";
			}
			if (not $feed_state)
			{
				# Boot the node.
				$conf->{'system'}{call_order}.="$node:2,sleep,";
			}
		}
		$conf->{'system'}{call_order}=~s/,$//;
	}
	elsif ($action eq "shutdown")
	{
		# Shutdown a specific node that is on cleanly via ACPI.
		$states=get_states($conf, $log);
		$node=sprintf("%02d", $node);
		my $feed_state=$states->{$node}{feed_state};
		if ($feed_state)
		{
			# shutdown the node.
			$conf->{'system'}{call_order}.="$node:2";
		}
		else
		{
			record($conf, $log, "WARNING: Node: [$node] seems to be already off, taking no action. Is the cable connected?\n", 1);
		}
		$conf->{'system'}{call_order}=~s/,$//;
	}
	elsif ($action eq "shutdown_all")
	{
		# Shutdown all nodes that are on cleanly via ACPI.
		$states=get_states($conf, $log);
		
		# Decide how, or if, to proceed based on the current state of
		# each node.
		foreach my $node (1..$conf->{na}{max_nodes})
		{
			$node=sprintf("%02d", $node);
			my $power_state=$states->{$node}{power_state};
			my $reset_state=$states->{$node}{reset_state};
			my $feed_state=$states->{$node}{feed_state};
			if ($feed_state)
			{
				# Shutdown the node.
				$conf->{'system'}{call_order}.="$node:2,sleep,";
			}
		}
		$conf->{'system'}{call_order}=~s/,$//;
	}
	elsif ($action eq "forcedown_all")
	{
		# Shutdown all nodes that are on by holding the power button
		# until they go down.
		$states=get_states($conf, $log);
		
		# Decide how, or if, to proceed based on the current state of
		# each node.
		foreach my $node (1..$conf->{na}{max_nodes})
		{
			$node=sprintf("%02d", $node);
			my $power_state=$states->{$node}{power_state};
			my $reset_state=$states->{$node}{reset_state};
			my $feed_state=$states->{$node}{feed_state};
			if ($feed_state)
			{
				# Boot the node.
				$conf->{'system'}{call_order}.="$node:3,sleep,";
			}
		}
		$conf->{'system'}{call_order}=~s/,$//;
	}
	else
	{
		record($conf, $log, "\nERROR: Unknown action request: [$action]!\n\n", 1);
		do_exit($conf, $log, 9);
	}
}

# Read in the config file.
sub read_conf
{
	my ($conf)=@_;
	$conf={} if not $conf;
	
	# I can't call the 'record' method here because I've not read in the
	# log file and thus don't know where to write the log to yet. Comment
	# out or delete 'print' statements before release.
	my $read=IO::Handle->new();
	my $shell_call="$conf->{'system'}{conf_file}";
	record($conf, $log, "Shell call: [$shell_call]\n") if $conf->{'system'}{debug};
	open ($read, "<$shell_call") or die "Failed to read: [$shell_call], error was: $!\n";
	while (<$read>)
	{
		chomp;
		my $line=$_;
		next if not $line;
		next if $line !~ /=/;
		$line=~s/^\s+//;
		$line=~s/\s+$//;
		next if $line =~ /^#/;
		next if not $line;
		my ($var, $val)=(split/=/, $line, 2);
		$var=~s/^\s+//;
		$var=~s/\s+$//;
		$val=~s/^\s+//;
		$val=~s/\s+$//;
		next if (not $var);
		record($conf, $log, "Storing: [$var] = [$val]\n") if $conf->{'system'}{debug};
		_make_hash_reference($conf, $var, $val);
	}
	$read->close();
	
	return (0);
}

# Read in command line arguments
sub read_cla
{
	my ($conf, $log, $bad)=@_;
	
	# Loop through the passed arguments, if any.
	record($conf, $log, "Got args:\n") if $conf->{'system'}{debug};
	my $set_next="";
	foreach my $arg (@ARGV)
	{
		record($conf, $log, "[$arg]\n") if $conf->{'system'}{debug};
		$conf->{'system'}{got_cla}=1;
		
		# If 'set_next' has a value, push this argument into the 'conf'
		# hash.
		if ($set_next)
		{
			# It's set, use it's contents as the hash key.
			$conf->{na}{$set_next}=$arg;
			record($conf, $log, "Setting: 'na::$set_next': [$conf->{na}{$set_next}]\n") if $conf->{'system'}{debug};
			
			# Clear it now for the next go-round.
			$set_next="";
			next;
		}
		if ($arg=~/-h/)
		{
			# Print the help message and then exit.
			help($conf, $log);
		}
		elsif ($arg=~/-v/)
		{
			# Print the version information and then exit.
			$conf->{'system'}{version}=1;
			record($conf,$log,"Setting version\n") if $conf->{'system'}{debug};
		}
		elsif ($arg=~/-q/)
		{
			# Suppress all non-critical messages from STDOUT.
			$conf->{'system'}{quiet}=1;
		}
		elsif ($arg=~/-d/)
		{
			# Enable debug mode.
			$conf->{'system'}{debug}=1;
		}
		elsif ($arg=~/^-/)
		{
			$arg=~s/^-//;
			
			### These are the switches set by Red Hat.
			if ($arg eq "a")
			{
				# This is the IP address or hostname of the
				# Node Assassin to call.
				$set_next="ipaddr";
				record ($conf, $log, "Next argument will be stored in: [$set_next]\n") if $conf->{'system'}{debug};
			}
			elsif ($arg eq "l")
			{
				# This is the login name.
				$set_next="login";
				record ($conf, $log, "Next argument will be stored in: [$set_next]\n") if $conf->{'system'}{debug};
			}
			elsif ($arg eq "p")
			{
				# This is the password. If it starts with '/'
				# it is interpreted to be a file containing the
				# password which will be read in and it's
				# contents will replace# this value.
				$set_next="passwd";
				record ($conf, $log, "Next argument will be stored in: [$set_next]\n") if $conf->{'system'}{debug};
			}
			elsif ($arg eq "n")
			{
				# This is the node to work on.
				$set_next="port";
				record ($conf, $log, "Next argument will be stored in: [$set_next]\n") if $conf->{'system'}{debug};
			}
			elsif ($arg eq "o")
			{
				# This is the action to take.
				$set_next="action";
				record ($conf, $log, "Next argument will be stored in: [$set_next]\n") if $conf->{'system'}{debug};
			}
			elsif ($arg eq "S")
			{
				# This is the script to run to retrieve the
				# password when it is not stored in
				# 'cluster.conf'. This script should echo/print
				# the password to STDOUT.
				$set_next="passwd_script";
				record ($conf, $log, "Next argument will be stored in: [$set_next]\n") if $conf->{'system'}{debug};
			}
		}
		else
		{
			### MADI: I might want to pick up arguments via multiple lines.
			# Bad argument.
			record($conf, $log, "\nERROR: Argument: [$arg] is not valid!\n");
			record($conf, $log, "ERROR: Please run: [man fence_na] to see a list of valid arguments.\n\n");
			$bad=1;
		}
	}
}

# Read arguments from STDIN. This is adapted from the 'fence_brocade' agent.
sub read_stdin
{
	my ($conf, $log, $bad)=@_;
	
	return (0) if $conf->{'system'}{got_cla};
	
	my $option;
	my $line_count=0;
	while(defined (my $option=<>))
	{
		# Get rid of newlines.
		chomp $option;
		
		# Record the line for now, but comment this out before release.
		record ($conf, $log, "Processing option line: [$option]\n") if $conf->{'system'}{debug};
		
		# strip leading and trailing whitespace
		$option=~s/^\s*//;
		$option=~s/\s*$//;
		
		# skip comments
		next if ($option=~ /^#/);
		
		# Increment my option line count.
		$line_count++;
		
		# Go to the next line if the option line is empty.
		next if not $option;
		
		# Split the option up into the name and the value.
		($name,$value)=split /\s*=\s*/, $option;
		
		# Record the line for now, but comment this out before release.
		record ($conf, $log, "Name: [$name], value: [$value].\n") if $conf->{'system'}{debug};
		
		# Set my variables depending on the veriable name.
		if ($name eq "agent")
		{
			# This is only used by 'fenced', but I record it for
			# potential debugging.
			$conf->{na}{agent}=$value;
		}
		elsif ($name eq "fm")
		{
			# This is a deprecated argument that should no longer
			# be used. Now 'port' should be used.
			if (not $conf->{na}{port})
			{
				# Port isn't set yet, use this value which may
				# be replaced if 'port' is set later.
				(undef, $value) = split /\s+/,$value;
				$conf->{na}{port}=$value;
				record($conf, $log, "Warning! The argument 'fm' is deprecated, use 'port' instead.\n", 1);
				record($conf, $log, "Warning! Value: [$value] set for 'port'\n", 1);
			}
			else
			{
				# Port was already set, so simply ignore this.
				record($conf, $log, "Warning! The argument 'fm' is deprecated, use 'port' instead.\n", 1);
				record($conf, $log, "Warning! Value: [$value] ignored.\n", 1);
			}
		}
		elsif ($name eq "ipaddr") 
		{
			# Record the IP Address or name of the Node Assassin to
			# use.
			$conf->{na}{ipaddr}=$value;
		} 
		elsif ($name eq "login")
		{
			# Record the login name that was passed.
			$conf->{na}{login}=$value;
		} 
		elsif ($name eq "name")
		{
			# Depricated argument used formerly for login name.
			if (not $conf->{na}{login})
			{
				# Login isn't set yet, use this value which may
				# be replaced if 'login' is seen later.
				$conf->{na}{login}=$value;
				record($conf, $log, "Warning! The argument 'name' is deprecated, use 'login' instead.\n", 1);
				record($conf, $log, "Warning! Value: [$value] set for 'login'.\n", 1);
			}
			else
			{
				# I've already seen the 'login' value so I will
				# ignore this value.
				record($conf, $log, "Warning! The argument 'name' is deprecated, use 'login' instead.\n", 1);
				record($conf, $log, "Warning! Value: [$value] ignored.\n", 1);
			}
		}
		elsif (($name eq "action") or ($name eq "option"))
		{
			# 'option' is deprecated.
			record($conf, $log, "Please use 'action', not 'option', as the later is deprecated.\n", 1) if $name eq "option";
			$conf->{na}{action}=$value;
		}
		elsif ($name eq "passwd")
		{
			# This is the login password.
			$conf->{na}{passwd}=$value;
		} 
		elsif ($name eq "passwd_script")
		{
			# This is the path to the script that will return the
			# password to the agent. At this time, this is not
			# implemented.
			$conf->{na}{passwd_script}=$value;
		}
		elsif ($name eq "port")
		{
			# This sets the port number to act on.
			$conf->{na}{port}=$value;
		}
		elsif ($name eq "nodename")
		{
			# This is passed by 'fenced' via 'cluster.conf' as of
			# cluster version 3, but it's not yet documented.
			$conf->{'system'}{nodename}=$value;
		}
		elsif ($name eq "quiet")
		{
			# This is passed by 'fenced' via 'cluster.conf' as a
			# custom argument to supress output to STDOUT.
			$conf->{'system'}{quiet}=1;
		}
		else
		{
			record($conf, $log, "\nERROR: Illegal name in option: [$option] at line: [$line_count]\n\n", 1);
			# 'rohara' from #linux-cluster suggested it's better to
			# simply ignore unknown input, as that is the behaviour
			# the fenced authors expect.
			#$bad=1;
		}
	}
	return ($bad);
}

# This function simply prints messages to both the log and to stdout.
sub record
{
	my ($conf, $log, $msg, $critical)=@_;
	$critical=0 if not $critical;
	
	# The log file gets everything.
	print $log $msg;
	print $msg if not $conf->{'system'}{quiet};
	
	# Critical messages have to print, so this ensure that it gets out
	# when 'quiet' is in use.
	print $msg if (($critical) && ($conf->{'system'}{quiet}));
	
	return(0);
}

# This sets all ports of a given Node Assassin to the requested state.
sub set_all_state
{
	my ($conf, $log, $state)=@_;
	$state=0 if not defined $state;

	my $max_port=$conf->{na}{max_nodes};
	foreach my $node (1..$max_port)
	{
		$node=sprintf("%02d", $node).":$state";
		record ($conf, $log, "Calling: [$node]\n") if $conf->{'system'}{debug};
		my @set_state=$conf->{na}{handle}->cmd("$node");
		foreach my $line (@set_state)
		{
			chomp $line;
			next if not $line;
			record($conf, $log, "$line\n");
		}
	}
	
	return (9);
}

# When asked to 'monitor' or 'list', show a CSV of all nodes and their aliases,
# when found in the config file.
sub show_list
{
	my ($conf, $log)=@_;
	record($conf, $log, "In 'show_list' function.\n") if $conf->{'system'}{debug};
	
	# Get an up to date list of the ports.
	my $na_id=$conf->{'system'}{na_id};
	record($conf, $log, "na_id: [$na_id], max_node: [$conf->{na}{max_nodes}]\n") if $conf->{'system'}{debug};
	
	for (1..$conf->{na}{max_nodes})
	{
		my $node=$_;
		my $alias=$conf->{na}{$na_id}{alias}{$node} ? $conf->{na}{$na_id}{alias}{$node} : "--";
		record ($conf, $log, "$node,$alias\n", 1);
	}
	
	do_exit($conf, $log, 0);
}

# This prints the version information of this fence agent and of any configured
# fence devices.
sub version
{
	my ($conf, $log)=@_;
	
	# Print the Fence Agent version first.
	record ($conf, $log, "Fence Agent: ..... Node Assassin ver. $conf->{'system'}{agent_version}\n", 1);
	record ($conf, $log, "Node Assassins: .. $conf->{'system'}{na_num}\n", 1);
	for my $na_id (1..$conf->{'system'}{na_num})
	{
		$conf->{'system'}{na_id}=$na_id;
		$conf->{na}{ipaddr}=     $conf->{na}{$na_id}{ipaddr};
		$conf->{na}{tcp_port}=   $conf->{na}{$na_id}{tcp_port};
		$conf->{na}{na_name}=    $conf->{na}{$na_id}{na_name};
		my $build_date="";
		my $serial_number="";
		my $firmware_ver="";
		connect_to_na($conf, $log);
		if ($conf->{na}{handle})
		{
			# Get the NAOS version and serial numbers.
			my @details=$conf->{na}{handle}->cmd("00:1");
			foreach my $line (sort {$a cmp $b} @details)
			{
				chomp $line;
				($build_date)=($line=~/\s(\S+)$/) if ($line =~ /Build Date/i );
				($serial_number)=($line=~/\s(\S+)$/) if ($line =~ /Serial Number/i );
				($firmware_ver)=($line=~/\s(\S+)$/) if ($line =~ /NAOS Version/i );
				record($conf, $log, "line: [$line]\n") if $conf->{'system'}{debug};
			}
		}
		else
		{
			$build_date="??";
			$serial_number="??";
			$firmware_ver="??";
		}
		record ($conf, $log, " - Node Assassin:  #$na_id\n", 1);
		record ($conf, $log, "   - Name: ....... $conf->{na}{$na_id}{na_name}\n", 1);
		record ($conf, $log, "   - IP Address: . $conf->{na}{$na_id}{ipaddr}\n", 1);
		record ($conf, $log, "   - TCP Port: ... $conf->{na}{$na_id}{tcp_port}\n", 1);
		record ($conf, $log, "   - MAC Address:  $conf->{na}{$na_id}{mac}\n", 1);
		record ($conf, $log, "   - Netmask: .... $conf->{na}{$na_id}{netmask}\n", 1);
		record ($conf, $log, "   - Gateway: .... $conf->{na}{$na_id}{gateway}\n", 1);
		record ($conf, $log, "   - Serial #: ... $serial_number\n", 1);
		record ($conf, $log, "   - Firmware: ... $firmware_ver\n", 1);
		record ($conf, $log, "   - Build Date: . $build_date (yyyy-mm-dd)\n", 1);
		record ($conf, $log, "   - Max Nodes: .. $conf->{na}{$na_id}{max_nodes}\n", 1);
		
		# Get the node states.
		$states=get_states($conf, $log);
		for (1..$conf->{na}{$na_id}{max_nodes})
		{
			my $node=sprintf("%02d", $_);
			my $power_state=$states->{$node}{power_state};
			my $reset_state=$states->{$node}{reset_state};
			my $feed_state=$states->{$node}{feed_state};
			record ($conf, $log, "     - Node $node: .. p[$power_state], r[$reset_state], f[$feed_state]\n", 1);
		}
		
		# Close the handle for the next loop.
		if ($conf->{na}{handle})
		{
			show_list($conf, $log, "version", 1);
			$conf->{na}{tcp_port}->close();
		}
		else
		{
			no_connection_error($conf, $log, $na_id);
		}
	}
	do_exit($conf, $log, 0);
}


###############################################################################
# Private functions below here.                                               #
###############################################################################

### Contributed by Shaun Fryer and Viktor Pavlenko by way of TPM.
# This is a helper to the above '_add_href' method. It is called each time a
# new string is to be created as a new hash key in the passed hash reference.
sub _add_hash_reference
{
	my $href1=shift;
	my $href2=shift;
	
	for my $key (keys %$href2)
	{
		if (ref $href1->{$key} eq 'HASH')
		{
			_add_hash_reference($href1->{$key}, $href2->{$key});
		}
		else
		{
			$href1->{$key}=$href2->{$key};
		}
	}
}

### Contributed by Shaun Fryer and Viktor Pavlenko by way of TPM.
# This takes a string with double-colon seperators and divides on those
# double-colons to create a hash reference where each element is a hash key.
sub _make_hash_reference
{
	my $href=shift;
	my $key_string=shift;
	my $value=shift;
# 	print "variable: [$key_string], value: [$value]\n";
	
	my $chomp_root=0;
	if ($chomp_root) { $key_string=~s/\w+:://; }
	
	my @keys = split /::/, $key_string;
	my $last_key = pop @keys;
	my $_href = {};
	$_href->{$last_key}=$value;
	while (my $key = pop @keys)
	{
		my $elem = {};
		$elem->{$key} = $_href;
		$_href = $elem;
	}
	_add_hash_reference($href, $_href);
}

1;

 

Input, advice, complaints and meanderings all welcome!
Digimer digimer@alteeve.ca https://alteeve.ca/w legal stuff:  
All info is provided "As-Is". Do not use anything here unless you are willing and able to take resposibility for your own actions. © 1997-2013
Naming credits go to Christopher Olah!
In memory of Kettle, Tonia, Josh, Leah and Harvey. In special memory of Hannah, Jack and Riley.