Fence na.lib: Difference between revisions

From Alteeve Wiki
Jump to navigation Jump to search
No edit summary
No edit summary
Line 10: Line 10:
# Node Assassin - Fence Agent
# Node Assassin - Fence Agent
# Digimer; digimer@alteeve.com
# Digimer; digimer@alteeve.com
# Mar. 08, 2010.
# Apr. 06, 2010.
# Version: 0.1.005
# Version: 1.1.4
#
#


Line 27: Line 27:
) or do_exit($conf, $log, 1);
) or do_exit($conf, $log, 1);
$conf->{na}{handle}->open($conf->{na}{ipaddr});
$conf->{na}{handle}->open($conf->{na}{ipaddr});
if ($conf->{na}{handle}->errmsg)
{
record($conf, $log, "Connection to Node Assassin: [$conf->{na}{ipaddr}] failed.\nError was: [".$conf->{na}{handle}->errmsg."]\n");
$conf->{na}{handle}="";
};
# record ($conf, $log, "na::handle: [$conf->{na}{handle}]\n");
return ($conf->{na}{handle});
return ($conf->{na}{handle});
}
}
Line 43: Line 50:
foreach my $order (split/,/, $conf->{'system'}{call_order})
foreach my $order (split/,/, $conf->{'system'}{call_order})
{
{
# record ($conf, $log, "Calling: [$order]\n");
# record($conf, $log, "Calling: [$order]\n");
# Handle a 'release_all' call.
# Handle a 'release_all' call.
if ($order eq "release_all")
if ($order eq "release_all")
{
{
set_all_state($conf, $log, 1);
set_all_state($conf, $log, 0);
next;
next;
}
}
Line 55: Line 62:
if ($order eq "fence_all")
if ($order eq "fence_all")
{
{
set_all_state($conf, $log, 0);
set_all_state($conf, $log, 1);
next;
next;
}
}
Line 83: Line 90:
# Handle a status check via Node Assassin.
# Handle a status check via Node Assassin.
# record($conf, $log, "order: [$order]\n");
if ($order=~/(\d\d):(\D+)/)
if ($order=~/(\d\d):(\D+)/)
{
{
Line 88: Line 96:
my $check=$2;
my $check=$2;
# Verify the state of the port.
# Verify the state of the port.
# record($conf, $log, "Status check on node: [$node] -> [$check]\n");
# record($conf, $log, "Status check on node: [$node] -> [$check]\n");
# Get the state.
# Get the state.
my $states=get_states($conf, $log);
my $state=get_states($conf, $log);
if ($states == 1)
if ($state == 1)
{
{
# I had a connection problem.
# I had a connection problem. Exit with error
# code '1' as per:
# http://sources.redhat.com/cluster/wiki/FenceAgentAPI
do_exit($conf, $log, 1);
do_exit($conf, $log, 1);
}
}
my $node_state=$states->{$node};
record($conf, $log, "Node Assassin: [$conf->{'system'}{na_id}/$conf->{na}{na_name}], Node: [$node] state: [$node_state]\n");
# Make the states a bit easier to type.
my $power_state=$state->{$node}{power_state};
my $reset_state=$state->{$node}{reset_state};
my $feed_state=$state->{$node}{feed_state};
# Return the status of the requested node.
# record($conf, $log, "Node Assassin: [#$conf->{'system'}{na_id}/$conf->{na}{na_name}], Node: [$node] Power/Reset/Feed states: [$power_state/$reset_state/$feed_state]\n");
if ($check eq "check")
if ($check eq "check")
{
{
# Return '2' if the node is off and '0' if it is on.
# Return '2' if the node is off and '0' if it
$exit_code=$node_state eq "off" ? 2 : 0;
# is on.
$exit_code=$feed_state ? 0 : 2;
}
}
elsif ($check eq "off")
elsif ($check eq "off")
{
{
# 'off' was called, make sure the node is now off. This
# 'off' was called, make sure the node is now
# may be called by 'reboot' in which case 'exit_code'
# off. This may be called by 'reboot' in which
# will simply be over-written when the final 'reboot'
# case 'exit_code' will simply be over-written
# state check is called.
# when the final 'reboot' state check is called.
$exit_code=$node_state eq "off" ? 0 : 1;
$exit_code=$feed_state ? 1 : 0;
}
}
elsif ($check eq "on")
elsif ($check eq "on")
{
{
# 'on' was called, make sure the node is now off.
# 'on' was called, make sure the node is now
$exit_code=$node_state eq "off" ? 1 : 0;
# off.
$exit_code=$feed_state ? 0 : 1;
}
}
elsif ($check eq "reboot")
elsif ($check eq "reboot")
{
{
# Make sure that 'exit_code' was set to '0' by the
# Make sure that 'exit_code' was set to '0' by
# earlier call. We checked again to make sure the node
# the earlier call. We checked again to make
# came back up, and will log an error if it didn't, but
# sure the node came back up, and will log an
# we return '0' just the same, as per the API.
# error if it didn't, but we return '0' just
if ($exit_code eq "0")
# the same, as per the API.
if (not $exit_code)
{
{
# The power off portion worked. Check if the
# The power off portion worked. Check if the
# node booted properly and record an error if
# node booted properly and record an error if
# not.
# not.
if ($node_state eq "off")
if (not $feed_state)
{
{
record($conf, $log, "Node: [$node] failed to boot after a successful power off during a reboot action.\n");
record($conf, $log, "Node: [$node] failed to boot after a successful power off during a reboot action.\n");
Line 141: Line 159:
$exit_code=1;
$exit_code=1;
}
}
$exit_code=$node_state eq "off" ? 1 : 0;
$exit_code=$feed_state ? 0 : 1;
}
}
next;
next;
Line 154: Line 172:
record($conf, $log, "$line\n");
record($conf, $log, "$line\n");
}
}
# record($conf, $log, "Call complete.\n");
record($conf, $log, "Call complete.\n");
}
}
Line 189: Line 207:
# Chomp the newline off and then pull the port and state out.
# Chomp the newline off and then pull the port and state out.
chomp $line;
chomp $line;
my ($this_node, $this_state)=($line=~/(\d+): (.*)$/);
my ($this_node, $power_state, $reset_state, $feed_state)=($line=~/^- Node (\d+): P(\d+), R(\d+), F(\d+)$/);
# Skip if this isn't a status line.
# Skip if this isn't a status line.
next if ((not $this_node) || (not $this_state));
next if not $this_node;
# Convert the state to a simple on/off.
# Convert the state to a simple on/off.
$this_state=$this_state =~ /fenced/i ? "off" : "on";
# Store the state.
# Store the state.
$state->{$this_node}=$this_state;
$state->{$this_node}{power_state}=$power_state;
# record($conf, $log, "state->{$this_node}: [$state->{$this_node}]\n");
$state->{$this_node}{reset_state}=$reset_state;
$state->{$this_node}{feed_state}=$feed_state;
# record($conf, $log, "Node: [$this_node], Power State: [$state->{$this_node}{power_state}], Reset State: [$state->{$this_node}{reset_state}], Feed State: [$state->{$this_node}{feed_state}].\n");
}
}
Line 208: Line 227:
my ($conf, $log)=@_;
my ($conf, $log)=@_;
my $msg=q`
my $msg=q`
Node Assassin Fencing Agent
NODE ASSASSIN FENCING AGENT


This program interfaces with one or more Node Assassin fence devices to
This program interfaces with one or more Node Assassin fence devices to
set one or more nodes to one or more states.
set one or more nodes to one or more states.


Usage:
USAGE:


Arguments are read from STDIN as 'variable=value' pairs, one pair per
Arguments are read from STDIN as 'variable=value' pairs, one pair per
Line 221: Line 240:
./fence_na <options>
./fence_na <options>


Overview:
OVERVIEW:


This takes arguments that defined which Node Assassin to call, what
This takes arguments that defined which Node Assassin to call, what
Line 228: Line 247:
a set of Node Assassin states which then get passed to the device.
a set of Node Assassin states which then get passed to the device.


Node Assassin Ports:
NODE ASSASSIN "PORTS":
 
The FenceAgentAPI refers to "ports" while the Node Assassin refers to
"nodes". These two terms are thus used interchangably. The reason for
the difference is that, internally, Node Assassin maps each node to
three "ports"; The node's power and reset buttons plus the node's power
feed.


The 'fenced' daemon programmatically refers to a given node as a
Each node in the Node Assassin can be set to one of four states
"Port". This conflicts with the Node Assassin internal definition of a
described in the next section.  
port. Please do not confuse the two!
When the 'fenced' daemon sets the 'port' value, this is understood to
be the Node ID to be acted on. Internally then, the Node ID is
converted to two Node Assassin ports; One for "power" and one for
"reset". The conversion is done this way:
((fenced port * 2) - 1) = Power Port
(fenced port * 2)      = Reset Port
For example, when fenced sets 'port=4', it is converted to:
Power Port = ((4*2)-1) = 7
Reset Port = (4*2)    = 8
With this conversion complete, this fence agent knows what calls to
make to accomplish the task set in the 'action' argument.


Node Assassin States:
NODE ASSASSIN STATES:


Each Node Assassin port can be set to one of four states. They are:
Each Node Assassin port can be set to one of four states. They are:


0
0
This state will fence the nodes specified by the list. The
This will release the fence on the specified NA node's power
fence will remain active until released.
and reset ports.
1
1
This will release the fence on the specified NA port.
This state will fence the node specified by the "port"
argument. The fence will remain active until released. Further,
the fence will "lock out" the node's front-panel buttons until
the fence is released.
2
2
This will fence the node(s) for one second. This is meant to be
This will fence the node for one second. This is meant to
used on ports connected to a node's power button. If the node
provide a mechanism to boot or initiate a clean shut down of a
is alive and supports ACPI, this should start a graceful power
node.
down of the node. Conversly, if the node was off, this will
boot the node. When connected to a node's reset switch, this
will cause a quick reboot without a graceful power off.
3
3
This state will fence the node(s) for five seconds. This is
This state will fence the node for five seconds to 30 seconds,
specifically designed for ports connected to a node's power
depending on how long it takes for the power feed to drop low.
button. It will allow a frozen node to be forced off by holding
This is meant to provide a way to force crashed nodes to power
the power button closed long enough to force a power off. This
off without the front-panel lock.
state serves no real difference over state 2 when connected to
a reset switch.


Options:
OPTIONS:


Which options are valid depends on how the arguments are passed in. The
Which options are valid depends on how the arguments are passed in. The
Line 333: Line 340:
on
on
1. Reset pin is opened (fence released)
1. Checks to see if the node is already on. If it is,
2. Power pin is opened
  it exits with error code 0 (success).
3. Wait one second
2. Checks to see if the node is fenced and, if so,
4. Power pin is closed for one second
  releases it.
5. Wait one second
3. Set's the node to state 2 (one second fence of the
6. Power feed is checked for 'on' state
  power switch) to initiate boot.
4. Waits one second and then checks that the node feed
  state to ensure that the node is powered on.
off
off
1. Reset pin is closed (fenced), disabling the node
1. The node is set to state 1, Fenced. Specifically;
  immediately.
2. The Node's reset pin is fenced for one second to
2. Wait one second.
  immediately disable the node.
3. Reset pin is opened (fence released). Some machines
3. The reset pin is unfenced for one second.
  will not power off is it's reset pin is held closed.
4. The power switch is fenced for five seconds and the
4. Wait one second.
  feed is checked. If it is still high, the Node
5. Power pin is closed
  Assassin will wait an additional 25 seconds and
6. Wait for five seconds to ensure PSU is shut down.
  check the feed state again. If it is still on, an
7. Reset pin is closed
  error is generated and the fence agent's exit status
  Note: With both the power and reset help closed, the
  is set to 1.
      node will not boot even when it's front panel
5. The reset is fenced. At this point, the node's front
      power button is pressed.
  panel buttons are disabled, preventing accidental
8. Power feed is checked for 'off' state
  booting of the node before the fence is released.
reboot
reboot
Line 360: Line 369:
Note: This will do a hard reboot! Do not use when a
Note: This will do a hard reboot! Do not use when a
      normal restart would suffice.
      normal restart would suffice.
1. Reset pin is closed (fenced) to ensure immediate
1. This sets the node to state 1 (see "off" above) and
    disabling of the node.
  pauses for one second.
2.  Wait one second.
2. The node is set to state 0, releasing the fence and
3. Reset pin is opened (fence released)
  pauses for one second.
4.  Wait one second.
3. The node's feed is checked to ensure that the node
5. Power pin is closed (fenced) for five seconds
  is off.
6.  Wait five seconds to ensure power down.
4. The node is set to state 2, booting the node and
7.  Power feed is checked for 'off' state
  waits one second.
8. Power pin is closed (fenced) for one second to
5. The node's feed is checked to see if the node is
    begin booting the node.
  booting.
9.  Wait one second.
10. Power feed is checked for 'on' state
status
status
The associated node's power feed is checked and it's
The associated node's power feed is checked and it's
status is returned.
status is returned; 1 for on, 0 for off/disconnected.
monitor, list
monitor, list
Line 386: Line 393:
      These states are not used by the 'fenced' daemon and are
      These states are not used by the 'fenced' daemon and are
      provided here as a convenience for the user.
      provided here as a convenience for the user.
     
      Any commands named "*_all" ignore the node value, if set.
###############################################################
###############################################################
     
soft_reboot (not yet implemented, requires NA v1.1.4 or better)
1. Power pin is closed for one second. Nodes who
  support ACPI should initiate shutdown at the OS
  level.
2. A loop starts checking the power feed line. The
  status is checked once a second until the power feed
  returns 'off'.
3. Wait one second
4. Power pin is closed for one second.
5. Wait one second.
6. Power feed is checked for 'on' state.
soft_off (not yet implemented, requires NA v1.1.4 or better)
1. Power pin is closed for one second. Nodes who
  support ACPI should initiate shutdown at the OS
  level.
2. A loop starts checking the power feed line. The
  status is checked once a second until the power feed
  returns 'off'.
release
release
Line 419: Line 406:
All closed ports on the specified Node Assassin are
All closed ports on the specified Node Assassin are
opened (fence released).
opened (fence released).
fence_all
All nodes are set to state 2 (see "off" above).
boot_all
boot_all
Boot or begin an ACPI-induced shutdown of all nodes.
The power feeds of all nodes on the Node Assassin are
checked. Any found to be off or disconnected are set to
Until v1.1.4 hardware is released, this action simply
state 2 to initiate boot.
closes all power ports for one second, irregardless of
whether the node is on or off already.
all_stop (not yet implemented, requires NA v1.1.4 or better)
shutdown_all
1. The '-n/port' and '-a/ipaddr' arguments are ignored.
The power feeds of all nodes on the Node Assassin are
2. A loop starts for all Node Assassins configured.
checked. Any found to be on are set to state 2 to
3. For each NA, a loop is started to step through all
initiate an ACPI-induced soft shutdown.
  supported nodes.
4. For each node on each Node Assassin:
4.1. Power feed is checked for 'on' status. If on;
4.2. Power pin is closed (fenced) for one second,
    initiating an ACPI induced shut down.
all_start (not yet implemented, requires NA v1.1.4 or better)
forcedown_all
1. The '-n/port' and '-a/ipaddr' arguments are ignored.
The power feeds of all nodes on the Node Assassin are
2. A loop starts for all Node Assassins configured.
checked. Any found to be on are set to state 3. This
3. For each NA, a loop is started to step through all
state is similar to state 1 (see "off" above), except
  supported nodes.
that the node's front-panel switches are not disabled.
4. For each node on each Node Assassin:
4.1. Power feed is checked for 'off' status. If off;
4.2. Power pin is closed (fenced) for one second,
    initiating a boot of the node, if there is a node
    connected.
-S <path>, passwd_script=<path> (Not Implemented)
-S <path>, passwd_script=<path> (Not Implemented)
Line 457: Line 436:


Examples:
EXAMPLES:


To simulate how 'fenced' calls the script, create a text file called
To simulate how 'fenced' calls the script, create a text file called
Line 466: Line 445:
ipaddr=ariel.alteeve.com
ipaddr=ariel.alteeve.com
port=02
port=02
login=ariel
login=motoko
passwd=gr0tt0
passwd=secret
action=off
action=off
-----------------------------------------------------------------------
-----------------------------------------------------------------------
Line 482: Line 461:
To duplicate the same call using command line arguments:
To duplicate the same call using command line arguments:
./fence_na -a ariel.alteeve.com -n 2 -l ariel -p gr0tt0 -o off
./fence_na -a motoko.alteeve.com -n 2 -l motoko -p secret -o off
 
Note:
NOTE:


An internal pager is not implemented. You may wish to run this via
An internal pager is not implemented. You may wish to run this via
Line 491: Line 470:
./fence_na | less
./fence_na | less


Updated:
UPDATED:


Mar. 7, 2010 Digimer
Apr. 6, 2010 Digimer (digimer@alteeve.com)
`;
`;
print $msg;
print $msg;
do_exit($conf, $log, 0);
do_exit($conf, $log, 0);
}
# This error message is printed when there was a connection problem with a
# given Node Assassin.
sub no_connection_error
{
my ($conf, $log, $na_id)=@_;
record ($conf, $log, "  - ERROR: Unable to query Node Assassin: [$conf->{na}{$na_id}{na_name}]!\n");
record ($conf, $log, "  - ERROR: Please check that it is connected, that the information in\n");
record ($conf, $log, "  - ERROR: '/etc/na/fence_na.conf' is accurate and that the proper configuration\n");
record ($conf, $log, "  - ERROR: has be uploaded to the device.\n");
return (0);
}
}


Line 504: Line 495:
{
{
my ($conf, $log)=@_;
my ($conf, $log)=@_;
record($conf, $log, "In the 'process_action' function.\n");
# Make this more readable.
# Make this more readable.
Line 509: Line 501:
my $action=$conf->{na}{action};
my $action=$conf->{na}{action};
my $port=$conf->{na}{port};
my $port=$conf->{na}{port};
# record($conf, $log, "na_id: [$na_id], action: [$action], port: [$port]\n");
# Translate the port passed in by the fence agent into the actual ports
# The following actions require a port. Error if I don't have one.
# in the Node Assassin. Mapping is:
if ($port eq "00")
# Node 01 -> Power = Port 01
{
# Node 01 -> Reset = Port 02
# These are the incompatible calls.
# Node 02 -> Power = Port 03
if (($action eq "on") || ($action eq "off") || ($action eq "reboot") || ($action eq "status"))
# Node 02 -> Reset = Port 04
{
# Node 03 -> Power = Port 05
record($conf, $log, "\nERROR! Action request: [$action] requires a port number! I got: [$port] which does not seem to be valid.\n\n");
# Node 03 -> Reset = Port 06
do_exit($conf, $log, 9);
# Node 04 -> Power = Port 07
}
# Node 04 -> Reset = Port 08
}
# ...
my $power_port=sprintf("%02d", (($port*2)-1));
my $reset_port=sprintf("%02d", ($port*2));
# record($conf, $log, "Translated node port: [$port] to power port: [$power_port] and reset port: [$reset_port]\n");
# Make sure my call order is clear.
$conf->{'system'}{call_order}="";
if ($action eq "on")
if ($action eq "on")
{
{
# Release the fence and boot the node.
# Release the fence, if fenced, and boot the node.
$conf->{'system'}{call_order}="$reset_port:1,$power_port:1,sleep,$power_port:2,sleep,$power_port:on";
$state=get_states($conf, $log);
my $power_state=$state->{$port}{power_state};
my $reset_state=$state->{$port}{reset_state};
my $feed_state=$state->{$port}{feed_state};
if ($feed_state)
{
# Node is already running.
record($conf, $log, "Asked to turn on node: [$port], but it's already running. Exiting with code 0.\n");
do_exit($conf, $log, 0);
}
elsif (($power_state) || ($reset_state))
{
# Node was fenced, release it first.
$conf->{'system'}{call_order}="$port:0,sleep,";
}
$conf->{'system'}{call_order}.="$port:2,sleep,$port:on";
}
}
elsif ($action eq "off")
elsif ($action eq "off")
{
{
# Fence the node by pressing and holding the reset to make sure
# Fence the node.
# the node immediately dies. Then I release the fence long
$conf->{'system'}{call_order}="$port:1,sleep,$port:off";
# enough to force a power off, then I re-apply then fence to
# make sure the node doesn't come back up. This is needed
# because some machines won't power off if the reset is held
# high when the power is pressed, even for > 4 seconds.
$conf->{'system'}{call_order}="$reset_port:0,sleep,$reset_port:1,sleep,$power_port:0,sleep 5,$reset_port:0,$power_port:off";
}
}
elsif ($action eq "reboot")
elsif ($action eq "reboot")
{
{
# Currently, I don't do this gracefully because, well, if it's
# I don't do this gracefully because the API says this should
# being fenced, it's not meant to be graceful.
# be an "off -> on" process, and "off" is fence...
# This is a combination of the 'off' -> 'on' actions.
$conf->{'system'}{call_order}="$port:1,sleep,$port:0,sleep,$port:off,$port:2,sleep,$port:on";
$conf->{'system'}{call_order}="$reset_port:0,sleep,$reset_port:1,sleep,$power_port:3,sleep 6,$power_port:off,$power_port:2,sleep,$power_port:reboot";
}
}
elsif ($action eq "status")
elsif ($action eq "status")
{
{
# This should check the probe, but for now, it checks the
# This checks the node's power feed.
# port's state.
$conf->{'system'}{call_order}="$port:check";
$conf->{'system'}{call_order}="$power_port:check";
}
}
elsif (($action eq "monitor") or ($action eq "list"))
elsif (($action eq "monitor") or ($action eq "list"))
Line 561: Line 560:
{
{
# Release the given node without booting it.
# Release the given node without booting it.
$conf->{'system'}{call_order}="$reset_port:1,$power_port:1";
$conf->{'system'}{call_order}="$port:0";
}
}
elsif ($action eq "release_all")
elsif ($action eq "release_all")
Line 575: Line 574:
elsif ($action eq "boot_all")
elsif ($action eq "boot_all")
{
{
### This can not be fully implemented until the 'power_feed' is
### available.
# Boot all nodes that are off.
# Boot all nodes that are off.
$state=get_states($conf, $log);
$state=get_states($conf, $log);
$conf->{'system'}{call_order}="";
# Decide how, or if, to proceed based on the current state of
foreach my $port (split/,/, $conf->{na}{power_pins})
# each node.
foreach my $node (1..$conf->{na}{max_nodes})
{
$node=sprintf("%02d", $node);
my $power_state=$state->{$node}{power_state};
my $reset_state=$state->{$node}{reset_state};
my $feed_state=$state->{$node}{feed_state};
if (($power_state) || ($reset_state))
{
# Node was fenced, release first.
$conf->{'system'}{call_order}.="$node:0,sleep,";
}
if (not $feed_state)
{
# Boot the node.
$conf->{'system'}{call_order}.="$node:2,sleep,";
}
}
$conf->{'system'}{call_order}=~s/,$//;
}
elsif ($action eq "shutdown_all")
{
# Shutdown all nodes that are on cleanly via ACPI.
$state=get_states($conf, $log);
# Decide how, or if, to proceed based on the current state of
# each node.
foreach my $node (1..$conf->{na}{max_nodes})
{
{
# record($conf, $log, "state->{$port}: [$state->{$port}]\n");
$node=sprintf("%02d", $node);
# if ($state->{$port} eq "off")
my $power_state=$state->{$node}{power_state};
# {
my $reset_state=$state->{$node}{reset_state};
$conf->{'system'}{call_order}.="$port:0,";
my $feed_state=$state->{$node}{feed_state};
# }
if ($feed_state)
{
# Boot the node.
$conf->{'system'}{call_order}.="$node:2,sleep,";
}
}
}
$conf->{'system'}{call_order}.="sleep,";
$conf->{'system'}{call_order}=~s/,$//;
foreach my $port (split/,/, $conf->{na}{power_pins})
}
elsif ($action eq "forcedown_all")
{
# Shutdown all nodes that are on by holding the power button
# until they go down.
$state=get_states($conf, $log);
# Decide how, or if, to proceed based on the current state of
# each node.
foreach my $node (1..$conf->{na}{max_nodes})
{
{
# record($conf, $log, "state->{$port}: [$state->{$port}]\n");
$node=sprintf("%02d", $node);
# if ($state->{$port} eq "off")
my $power_state=$state->{$node}{power_state};
# {
my $reset_state=$state->{$node}{reset_state};
$conf->{'system'}{call_order}.="$port:1,";
my $feed_state=$state->{$node}{feed_state};
# }
if ($feed_state)
{
# Boot the node.
$conf->{'system'}{call_order}.="$node:3,sleep,";
}
}
}
$conf->{'system'}{call_order}=~s/,$//;
$conf->{'system'}{call_order}=~s/,$//;
Line 897: Line 938:
foreach my $node (1..$max_port)
foreach my $node (1..$max_port)
{
{
my $power_port=sprintf("%02d", (($node*2)-1)).":$state";
$node=sprintf("%02d", $node).":$state";
my $reset_port=sprintf("%02d", ($node*2)).":$state";
record ($conf, $log, "Calling: [$node]\n");
# record ($conf, $log, "Calling: p[$power_port]\n");
my @set_state=$conf->{na}{handle}->cmd("$node");
my @set_state=$conf->{na}{handle}->cmd("$power_port");
foreach my $line (@set_state)
{
chomp $line;
next if not $line;
record($conf, $log, "$line\n");
}
# record ($conf, $log, "Calling: r[$reset_port]\n");
my @set_state=$conf->{na}{handle}->cmd("$reset_port");
foreach my $line (@set_state)
foreach my $line (@set_state)
{
{
Line 926: Line 958:
my ($conf, $log, $mode)=@_;
my ($conf, $log, $mode)=@_;
$mode="list" if not "$mode";
$mode="list" if not "$mode";
record($conf, $log, "In 'show_list' function. Passed: mode: [$mode]\n");
# Get an up to date list of the ports.
# Get an up to date list of the ports.
my $states=get_states($conf, $log);
my $state=get_states($conf, $log);
my $na_id=$conf->{'system'}{na_id};
my $na_id=$conf->{'system'}{na_id};
my $na_name=$conf->{na}{na_name};
my $na_name=$conf->{na}{na_name};
my $power_state="";
my $power_state="";
my $node_id=0;
my $node_id=0;
foreach my $port (sort {$a cmp $b} keys %{$states})
foreach my $port (sort {$a cmp $b} keys %{$state})
{
{
# record ($conf, $log, "Modulous of ($port % 2) is: [".($port % 2)."]\n");
# record ($conf, $log, "Modulous of ($port % 2) is: [".($port % 2)."]\n");
if ($port % 2)
if ($port % 2)
{
{
$power_state=$states->{$port};
$power_state=$state->{$port};
next;
next;
}
}
Line 945: Line 978:
$node_id++;
$node_id++;
my $power_feed=$conf->{na}{$na_id}{alias}{$node_id} eq "unused" ? "--" : $power_state;
my $power_feed=$conf->{na}{$na_id}{alias}{$node_id} eq "unused" ? "--" : $power_state;
my $reset_state=$states->{$port};
my $reset_state=$state->{$port};
my $line="";
my $line="";
if ($mode eq "list")
if ($mode eq "list")
Line 1,021: Line 1,054:
my $firmware_ver="";
my $firmware_ver="";
connect_to_na($conf, $log);
connect_to_na($conf, $log);
# Get the NAOS version and serial numbers.
if ($conf->{na}{handle})
my @details=$conf->{na}{handle}->cmd("00:1");
{
foreach my $line (sort {$a cmp $b} @details)
# Get the NAOS version and serial numbers.
my @details=$conf->{na}{handle}->cmd("00:1");
foreach my $line (sort {$a cmp $b} @details)
{
chomp $line;
($build_date)=($line=~/\s(\S+)$/) if ($line =~ /Build Date/i );
($serial_number)=($line=~/\s(\S+)$/) if ($line =~ /Serial Number/i );
($firmware_ver)=($line=~/\s(\S+)$/) if ($line =~ /NAOS Version/i );
# print "line: [$line]\n";
}
}
else
{
{
chomp $line;
$build_date="??";
($build_date)=($line=~/\s(\S+)$/) if ($line =~ /Build Date/i );
$serial_number="??";
($serial_number)=($line=~/\s(\S+)$/) if ($line =~ /Serial Number/i );
$firmware_ver="??";
($firmware_ver)=($line=~/\s(\S+)$/) if ($line =~ /NAOS Version/i );
# print "line: [$line]\n";
}
}
record ($conf, $log, " - Node Assassin $na_id\n");
record ($conf, $log, " - Node Assassin $na_id\n");
Line 1,042: Line 1,084:
record ($conf, $log, "  - Build Date: . $build_date (yyyy-mm-dd)\n");
record ($conf, $log, "  - Build Date: . $build_date (yyyy-mm-dd)\n");
record ($conf, $log, "  - Max Nodes: .. $conf->{na}{$na_id}{max_nodes}\n");
record ($conf, $log, "  - Max Nodes: .. $conf->{na}{$na_id}{max_nodes}\n");
show_list($conf, $log, "version");
if ($conf->{na}{handle})
$conf->{na}{tcp_port}->close();
{
show_list($conf, $log, "version");
$conf->{na}{tcp_port}->close();
}
else
{
no_connection_error($conf, $log, $na_id);
}
}
}
do_exit($conf, $log, 0);
do_exit($conf, $log, 0);

Revision as of 05:30, 7 April 2010

 Node Assassin :: Fence na.lib

This is the fence agent's function library that exists in /etc/na/.

#!/usr/bin/perl
#
# This is the function library for the Node Assassin fence agent.
# 
# Node Assassin - Fence Agent
# Digimer; digimer@alteeve.com
# Apr. 06, 2010.
# Version: 1.1.4
#


# This connects to a Node Assassin and puts the handle in '$conf->{'system'}{handle}.
sub connect_to_na
{
	my ($conf, $log)=@_;
	$conf->{na}{handle}=new Net::Telnet(
		Timeout	=>	10,
		Errmode	=>	'die',
		Port	=>	$conf->{na}{tcp_port},
		Prompt	=>	'/EOM$/',
		Errmode	=>	'return'
	) or do_exit($conf, $log, 1);
	$conf->{na}{handle}->open($conf->{na}{ipaddr});
	if ($conf->{na}{handle}->errmsg)
	{
		record($conf, $log, "Connection to Node Assassin: [$conf->{na}{ipaddr}] failed.\nError was: [".$conf->{na}{handle}->errmsg."]\n");
		$conf->{na}{handle}="";
	};
# 	record ($conf, $log, "na::handle: [$conf->{na}{handle}]\n");
	
	return ($conf->{na}{handle});
}

# This handles the actual execution of the action plan.
sub do_actions
{
	my ($conf, $log)=@_;
	
	# In the next step, when a 'check' is seen, the port is analyzed and an exit
	# status is stored here. Exits 0, 1 and 2 have special meaning, so I default to
	# 9.
	my $exit_code=9;
	
	# Process the orders.
	foreach my $order (split/,/, $conf->{'system'}{call_order})
	{
# 		record($conf, $log, "Calling: [$order]\n");
		
		# Handle a 'release_all' call.
		if ($order eq "release_all")
		{
			set_all_state($conf, $log, 0);
			next;
		}
		
		# Handle a 'fence_all' call.
		if ($order eq "fence_all")
		{
			set_all_state($conf, $log, 1);
			next;
		}
		
		# handle a sleep request.
		if ($order=~/^sleep/)
		{
			my $time=$order=~/sleep (\d+)/ ? $1 : 1;
			record ($conf, $log, "Sleeping: $time, ");
			if ($time == 1)
			{
				sleep 1;
				record ($conf, $log, "Done.\n");
			}
			else
			{
				while ($time)
				{
					$time--;
					sleep 1;
					record ($conf, $log, "$time, ") if $time > 1;
					record ($conf, $log, "$time. Done.\n") if $time == 1;
				}
			}
			next;
		}
		
		# Handle a status check via Node Assassin.
# 		record($conf, $log, "order: [$order]\n");
		if ($order=~/(\d\d):(\D+)/)
		{
			my $node=$1;
			my $check=$2;
			# Verify the state of the port.
# 			record($conf, $log, "Status check on node: [$node] -> [$check]\n");
			
			# Get the state.
			my $state=get_states($conf, $log);
			if ($state == 1)
			{
				# I had a connection problem. Exit with error
				# code '1' as per:
				# http://sources.redhat.com/cluster/wiki/FenceAgentAPI
				do_exit($conf, $log, 1);
			}
			
			# Make the states a bit easier to type.
			my $power_state=$state->{$node}{power_state};
			my $reset_state=$state->{$node}{reset_state};
			my $feed_state=$state->{$node}{feed_state};
			
			# Return the status of the requested node.
# 			record($conf, $log, "Node Assassin: [#$conf->{'system'}{na_id}/$conf->{na}{na_name}], Node: [$node] Power/Reset/Feed states: [$power_state/$reset_state/$feed_state]\n");
			if ($check eq "check")
			{
				# Return '2' if the node is off and '0' if it
				# is on.
				$exit_code=$feed_state ? 0 : 2;
			}
			elsif ($check eq "off")
			{
				# 'off' was called, make sure the node is now
				# off. This may be called by 'reboot' in which
				# case 'exit_code' will simply be over-written
				# when the final 'reboot' state check is called.
				$exit_code=$feed_state ? 1 : 0;
			}
			elsif ($check eq "on")
			{
				# 'on' was called, make sure the node is now
				# off.
				$exit_code=$feed_state ? 0 : 1;
			}
			elsif ($check eq "reboot")
			{
				# Make sure that 'exit_code' was set to '0' by
				# the earlier call. We checked again to make
				# sure the node came back up, and will log an
				# error if it didn't, but we return '0' just
				# the same, as per the API.
				if (not $exit_code)
				{
					# The power off portion worked. Check if the
					# node booted properly and record an error if
					# not.
					if (not $feed_state)
					{
						record($conf, $log, "Node: [$node] failed to boot after a successful power off during a reboot action.\n");
						record($conf, $log, "This is a non-critical error as the node was fenced successfully but may\n");
						record($conf, $log, "indicate a hardware failure with the node or with Node Assassin itself.\n");
					}
				}
				else
				{
					# The power off portion failed, exit with '1'.
					$exit_code=1;
				}
				$exit_code=$feed_state ? 0 : 1;
			}
			next;
		}
		
		# Handle a fence call.
		my @set_state=$conf->{na}{handle}->cmd("$order");
		foreach my $line (@set_state)
		{
			chomp $line;
			next if not $line;
			record($conf, $log, "$line\n");
		}
		record($conf, $log, "Call complete.\n");
	}
	
	return ($exit_code);
}

# This cleanly exits the agent.
sub do_exit
{
	($conf, $log, $exit_status)=@_;
	$exit_status=9 if not defined $exit_status;
	
	$conf->{na}{handle}->close() if $conf->{na}{handle};
	$log->close() if $log;
	exit ($exit_status);
}

# This gets the states for the active node and returns the states in a hash
# reference.
sub get_states
{
	my ($conf, $log)=@_;
	
	# Create the hash reference to store the states in.
	my $state={};
	
	# Call '00:0' to get the states. If it fails, return 1 as per
	# FenceAgentAPI requirements.
	my @check_state=$conf->{na}{handle}->cmd("00:0") or return(1);
	
	# Loop through the output.
	foreach my $line (@check_state)
	{
		# Chomp the newline off and then pull the port and state out.
		chomp $line;
		my ($this_node, $power_state, $reset_state, $feed_state)=($line=~/^- Node (\d+): P(\d+), R(\d+), F(\d+)$/);
		# Skip if this isn't a status line.
		next if not $this_node;
		# Convert the state to a simple on/off.
		# Store the state.
		$state->{$this_node}{power_state}=$power_state;
		$state->{$this_node}{reset_state}=$reset_state;
		$state->{$this_node}{feed_state}=$feed_state;
# 		record($conf, $log, "Node: [$this_node], Power State: [$state->{$this_node}{power_state}], Reset State: [$state->{$this_node}{reset_state}], Feed State: [$state->{$this_node}{feed_state}].\n");
	}
	
	# Return the hash reference.
	return ($state);
}

# This returns the 'help' message.
sub help
{
	my ($conf, $log)=@_;
	my $msg=q`
NODE ASSASSIN FENCING AGENT

	This program interfaces with one or more Node Assassin fence devices to
	set one or more nodes to one or more states.

USAGE:

	Arguments are read from STDIN as 'variable=value' pairs, one pair per
	new line. This is the method used by 'fenced'.

	For testing, arguments may be passed as command line arguments.
	./fence_na <options>

OVERVIEW:

	This takes arguments that defined which Node Assassin to call, what
	node to work on and what action should be taken. These arguments are
	defined by the FenceAgentAPI. These actions are then translated into
	a set of Node Assassin states which then get passed to the device.

NODE ASSASSIN "PORTS":

	The FenceAgentAPI refers to "ports" while the Node Assassin refers to
	"nodes". These two terms are thus used interchangably. The reason for
	the difference is that, internally, Node Assassin maps each node to
	three "ports"; The node's power and reset buttons plus the node's power
	feed.

	Each node in the Node Assassin can be set to one of four states
	described in the next section. 

NODE ASSASSIN STATES:

	Each Node Assassin port can be set to one of four states. They are:

	0
		This will release the fence on the specified NA node's power
		and reset ports.
	
	1
		This state will fence the node specified by the "port"
		argument. The fence will remain active until released. Further,
		the fence will "lock out" the node's front-panel buttons until
		the fence is released.
	
	2
		This will fence the node for one second. This is meant to
		provide a mechanism to boot or initiate a clean shut down of a
		node.
	
	3
		This state will fence the node for five seconds to 30 seconds,
		depending on how long it takes for the power feed to drop low.
		This is meant to provide a way to force crashed nodes to power
		off without the front-panel lock.

OPTIONS:

	Which options are valid depends on how the arguments are passed in. The
	"main" method of passing arguments is via STDIN as a series of 
	'varible=value' pairs, one per line. All arguments accepted this way
	have command-line arguments passed in via single-dashes switches. Each
	option below shows either calling method's arguments, where available.
	
	-h
	
		Prints this help message.
	
	-v, -V
	
		Prints the version of this fence agent and then the version of
		any configured, reachable Node Assassins.
	
	-q
	
		Tells this fence agent to only print critical messages.
	
	-a <val>, ipaddr=<val>
	
		Tells the fence agent which Node Assassin to use. 
		
		NOTE! The name or IP addresses *MUST* match a name or IP
		      address defined in '/etc/na/fence_na.conf'! Specifically,
		      it must match one of the 'node::X::ipaddr' entries where
		      'X' is the ID of the Node Assassin.
	
	-l <val>, login=<val>
	
		This is the login name used to connect to this fence agent. It
		must match the value 'system::username' in
		'/etc/na/fence_na.conf'.
	
	-p <val>, passwd=<val>
	
		This is the login password used to connect to this fence agent.
		It must match the value 'system::password' in
		'/etc/na/fence_na.conf'.
	
	-n <val>, port=<val>
	
		This tells the fence agent which node to act on. 
		
		NOTE! Please do not confuse this with the Node Assassin's
		      internal concept of a port.
	
	-o <val>, action=<val>
	
		This is the action (formerly 'option') to perform on the
		selected node. Valid actions and how Node Assassin implements
		them are:
		
		on
		
			1. Checks to see if the node is already on. If it is,
			   it exits with error code 0 (success).
			2. Checks to see if the node is fenced and, if so,
			   releases it.
			3. Set's the node to state 2 (one second fence of the
			   power switch) to initiate boot.
			4. Waits one second and then checks that the node feed
			   state to ensure that the node is powered on.
		
		off
		
			1. The node is set to state 1, Fenced. Specifically;
			2. The Node's reset pin is fenced for one second to
			   immediately disable the node.
			3. The reset pin is unfenced for one second.
			4. The power switch is fenced for five seconds and the
			   feed is checked. If it is still high, the Node
			   Assassin will wait an additional 25 seconds and
			   check the feed state again. If it is still on, an
			   error is generated and the fence agent's exit status
			   is set to 1.
			5. The reset is fenced. At this point, the node's front
			   panel buttons are disabled, preventing accidental
			   booting of the node before the fence is released.
		
		reboot
		
			Note: This will do a hard reboot! Do not use when a
			      normal restart would suffice.
			1. This sets the node to state 1 (see "off" above) and
			   pauses for one second.
			2. The node is set to state 0, releasing the fence and
			   pauses for one second.
			3. The node's feed is checked to ensure that the node
			   is off.
			4. The node is set to state 2, booting the node and
			   waits one second.
			5. The node's feed is checked to see if the node is
			   booting.
		
		status
		
			The associated node's power feed is checked and it's
			status is returned; 1 for on, 0 for off/disconnected.
		
		monitor, list
		
			Unknown what is to be done under these actions.
		
		###############################################################
		NOTE: The following states are supported by Node Assassin only.
		      These states are not used by the 'fenced' daemon and are
		      provided here as a convenience for the user.
		      
		      Any commands named "*_all" ignore the node value, if set.
		###############################################################
		
		release
		
			1. Power pin is opened (fence released)
			2. Reset pin is opened (fence released)
		
		release_all
		
			All closed ports on the specified Node Assassin are
			opened (fence released).
		
		fence_all
		
			All nodes are set to state 2 (see "off" above).
		
		boot_all
		
			The power feeds of all nodes on the Node Assassin are
			checked. Any found to be off or disconnected are set to
			state 2 to initiate boot.
		
		shutdown_all
		
			The power feeds of all nodes on the Node Assassin are
			checked. Any found to be on are set to state 2 to
			initiate an ACPI-induced soft shutdown.
		
		forcedown_all
		
			The power feeds of all nodes on the Node Assassin are
			checked. Any found to be on are set to state 3. This
			state is similar to state 1 (see "off" above), except
			that the node's front-panel switches are not disabled.
		
	-S <path>, passwd_script=<path> (Not Implemented)
	
		This is the path to a script that returns the password to use
		when running this fence agent.
	

EXAMPLES:

	To simulate how 'fenced' calls the script, create a text file called
	'args.txt' containing:
	
	-----------------------------------------------------------------------
	# Test file used as input for the NA fence agent.
	ipaddr=ariel.alteeve.com
	port=02
	login=motoko
	passwd=secret
	action=off
	-----------------------------------------------------------------------
	
	Now use 'cat' the pipe the contents into the fence agent:
	
		cat args.txt | ./fence_na
	
	This will call the 'off' function against node #02 connected to the
	Node Assassin at 'ariel.alteeve.com', fencing it. Change the action
	line to 'action=on' and re-run the script again to release the fence
	and boot the node.
	
	To duplicate the same call using command line arguments:
	
		./fence_na -a motoko.alteeve.com -n 2 -l motoko -p secret -o off

NOTE:

	An internal pager is not implemented. You may wish to run this via
	'less':
	
	./fence_na | less

UPDATED:

	Apr. 6, 2010				Digimer (digimer@alteeve.com)
`;
	print $msg;
	
	do_exit($conf, $log, 0);
}

# This error message is printed when there was a connection problem with a
# given Node Assassin.
sub no_connection_error
{
	my ($conf, $log, $na_id)=@_;
	record ($conf, $log, "   - ERROR: Unable to query Node Assassin: [$conf->{na}{$na_id}{na_name}]!\n");
	record ($conf, $log, "   - ERROR: Please check that it is connected, that the information in\n");
	record ($conf, $log, "   - ERROR: '/etc/na/fence_na.conf' is accurate and that the proper configuration\n");
	record ($conf, $log, "   - ERROR: has be uploaded to the device.\n");
	return (0);
}

# This handles the actual actions.
sub process_action
{
	my ($conf, $log)=@_;
	record($conf, $log, "In the 'process_action' function.\n");
	
	# Make this more readable.
	my $na_id=$conf->{'system'}{na_id};
	my $action=$conf->{na}{action};
	my $port=$conf->{na}{port};
# 	record($conf, $log, "na_id: [$na_id], action: [$action], port: [$port]\n");
	
	# The following actions require a port. Error if I don't have one.
	if ($port eq "00")
	{
		# These are the incompatible calls.
		if (($action eq "on") || ($action eq "off") || ($action eq "reboot") || ($action eq "status"))
		{
			record($conf, $log, "\nERROR! Action request: [$action] requires a port number! I got: [$port] which does not seem to be valid.\n\n");
			do_exit($conf, $log, 9);
		}
	}
	
	# Make sure my call order is clear.
	$conf->{'system'}{call_order}="";
	if ($action eq "on")
	{
		# Release the fence, if fenced, and boot the node.
		$state=get_states($conf, $log);
		my $power_state=$state->{$port}{power_state};
		my $reset_state=$state->{$port}{reset_state};
		my $feed_state=$state->{$port}{feed_state};
		if ($feed_state)
		{
			# Node is already running.
			record($conf, $log, "Asked to turn on node: [$port], but it's already running. Exiting with code 0.\n");
			do_exit($conf, $log, 0);
		}
		elsif (($power_state) || ($reset_state))
		{
			# Node was fenced, release it first.
			$conf->{'system'}{call_order}="$port:0,sleep,";
		}
		$conf->{'system'}{call_order}.="$port:2,sleep,$port:on";
	}
	elsif ($action eq "off")
	{
		# Fence the node.
		$conf->{'system'}{call_order}="$port:1,sleep,$port:off";
	}
	elsif ($action eq "reboot")
	{
		# I don't do this gracefully because the API says this should
		# be an "off -> on" process, and "off" is fence...
		$conf->{'system'}{call_order}="$port:1,sleep,$port:0,sleep,$port:off,$port:2,sleep,$port:on";
	}
	elsif ($action eq "status")
	{
		# This checks the node's power feed.
		$conf->{'system'}{call_order}="$port:check";
	}
	elsif (($action eq "monitor") or ($action eq "list"))
	{
		# Not sure what to do here.
	}
	### ALL ACTIONS BELOW HERE ARE OUTSIDE OF THE FenceAgentAPI!
	elsif ($action eq "release")
	{
		# Release the given node without booting it.
		$conf->{'system'}{call_order}="$port:0";
	}
	elsif ($action eq "release_all")
	{
		# Release all ports.
		$conf->{'system'}{call_order}="release_all";
	}
	elsif ($action eq "fence_all")
	{
		# Fence all ports.
		$conf->{'system'}{call_order}="fence_all";
	}
	elsif ($action eq "boot_all")
	{
		# Boot all nodes that are off.
		$state=get_states($conf, $log);
		
		# Decide how, or if, to proceed based on the current state of
		# each node.
		foreach my $node (1..$conf->{na}{max_nodes})
		{
			$node=sprintf("%02d", $node);
			my $power_state=$state->{$node}{power_state};
			my $reset_state=$state->{$node}{reset_state};
			my $feed_state=$state->{$node}{feed_state};
			if (($power_state) || ($reset_state))
			{
				# Node was fenced, release first.
				$conf->{'system'}{call_order}.="$node:0,sleep,";
			}
			if (not $feed_state)
			{
				# Boot the node.
				$conf->{'system'}{call_order}.="$node:2,sleep,";
			}
		}
		$conf->{'system'}{call_order}=~s/,$//;
	}
	elsif ($action eq "shutdown_all")
	{
		# Shutdown all nodes that are on cleanly via ACPI.
		$state=get_states($conf, $log);
		
		# Decide how, or if, to proceed based on the current state of
		# each node.
		foreach my $node (1..$conf->{na}{max_nodes})
		{
			$node=sprintf("%02d", $node);
			my $power_state=$state->{$node}{power_state};
			my $reset_state=$state->{$node}{reset_state};
			my $feed_state=$state->{$node}{feed_state};
			if ($feed_state)
			{
				# Boot the node.
				$conf->{'system'}{call_order}.="$node:2,sleep,";
			}
		}
		$conf->{'system'}{call_order}=~s/,$//;
	}
	elsif ($action eq "forcedown_all")
	{
		# Shutdown all nodes that are on by holding the power button
		# until they go down.
		$state=get_states($conf, $log);
		
		# Decide how, or if, to proceed based on the current state of
		# each node.
		foreach my $node (1..$conf->{na}{max_nodes})
		{
			$node=sprintf("%02d", $node);
			my $power_state=$state->{$node}{power_state};
			my $reset_state=$state->{$node}{reset_state};
			my $feed_state=$state->{$node}{feed_state};
			if ($feed_state)
			{
				# Boot the node.
				$conf->{'system'}{call_order}.="$node:3,sleep,";
			}
		}
		$conf->{'system'}{call_order}=~s/,$//;
	}
	else
	{
		record($conf, $log, "Unknown action request: [$action]!\n");
		do_exit($conf, $log, 9);
	}
}

# Read in the config file.
sub read_conf
{
	my ($conf)=@_;
	$conf={} if not $conf;
	
	# I can't call the 'record' method here because I've not read in the
	# log file and thus don't know where to write the log to yet. Comment
	# out or delete 'print' statements before release.
	my $read=IO::Handle->new();
	my $shell_call="$conf->{'system'}{conf_file}";
# 	print "Shell call: [$shell_call]\n";
	open ($read, "<$shell_call") or die "Failed to read: [$shell_call], error was: $!\n";
	while (<$read>)
	{
		chomp;
		my $line=$_;
		next if not $line;
		next if $line !~ /=/;
		$line=~s/^\s+//;
		$line=~s/\s+$//;
		next if $line =~ /^#/;
		next if not $line;
		my ($var, $val)=(split/=/, $line, 2);
		$var=~s/^\s+//;
		$var=~s/\s+$//;
		$val=~s/^\s+//;
		$val=~s/\s+$//;
		next if (not $var);
# 		print "Storing: [$var] = [$val]\n";
		_make_hash_reference($conf, $var, $val);
	}
	$read->close();
	
	return (0);
}

# Read in command line arguments
sub read_cla
{
	my ($conf, $log, $bad)=@_;
	
	# MADI: Remove this before release.
# 	record($conf, $log, "Got args:\n");
	
	# Loop through the passed arguments, if any.
	my $set_next="";
	foreach my $arg (@ARGV)
	{
		# MADI: Remove this before release.
# 		record($conf, $log, "[$arg]\n");
		$conf->{'system'}{got_cla}=1;
		
		# If 'set_next' has a value, push this argument into the 'conf'
		# hash.
		if ($set_next)
		{
			# It's set, use it's contents as the hash key.
			$conf->{na}{$set_next}=$arg;
			
			# MADI: Remove this before release.
# 			record($conf, $log, "Setting: 'na::$set_next': [$conf->{na}{$set_next}]\n");
			
			# Clear it now for the next go-round.
			$set_next="";
			next;
		}
		if ($arg=~/-h/)
		{
			# Print the help message and then exit.
			help($conf, $log);
		}
		elsif ($arg=~/-v/)
		{
			# Print the version information and then exit.
			$conf->{'system'}{version}=1;
# 			record($conf,$log,"Setting version\n");
		}
		elsif ($arg=~/-q/)
		{
			# Suppress all non-critical messages from STDOUT.
			$conf->{'system'}{quiet}=1;
		}
		elsif ($arg=~/^-/)
		{
			$arg=~s/^-//;
			
			### These are the switches set by Red Hat.
			if ($arg eq "a")
			{
				# This is the IP address or hostname of the
				# Node Assassin to call.
				$set_next="ipaddr";
# 				record ($conf, $log, "Next argument will be stored in: [$set_next]\n");
			}
			elsif ($arg eq "l")
			{
				# This is the login name.
				$set_next="login";
# 				record ($conf, $log, "Next argument will be stored in: [$set_next]\n");
			}
			elsif ($arg eq "p")
			{
				# This is the password. If it starts with '/'
				# it is interpreted to be a file containing the
				# password which will be read in and it's
				# contents will replace# this value.
				$set_next="passwd";
# 				record ($conf, $log, "Next argument will be stored in: [$set_next]\n");
			}
			elsif ($arg eq "n")
			{
				# This is the node to work on.
				$set_next="port";
# 				record ($conf, $log, "Next argument will be stored in: [$set_next]\n");
			}
			elsif ($arg eq "o")
			{
				# This is the action to take.
				$set_next="action";
# 				record ($conf, $log, "Next argument will be stored in: [$set_next]\n");
			}
			elsif ($arg eq "S")
			{
				# This is the script to run to retrieve the
				# password when it is not stored in
				# 'cluster.conf'. This script should echo/print
				# the password to STDOUT.
				$set_next="passwd_script";
# 				record ($conf, $log, "Next argument will be stored in: [$set_next]\n");
			}
		}
		else
		{
			### MADI: I might want to pick up arguments via multiple lines.
			# Bad argument.
			record($conf, $log, "Argument: [$arg] is not valid!\n");
			record($conf, $log, "Please run './fence_na --help' to see a list of valid arguments.\n");
			$bad=1;
		}
	}
}

# Read arguments from STDIN. This is adapted from the 'fence_brocade' agent.
sub read_stdin
{
	my ($conf, $log, $bad)=@_;
	
	return (0) if $conf->{'system'}{got_cla};
	
	my $option;
	my $line_count=0;
	while(defined (my $option=<>))
	{
		# Get rid of newlines.
		chomp $option;
		
		# Record the line for now, but comment this out before release.
# 		record ($conf, $log, "Processing option line: [$option]\n");
		
		# strip leading and trailing whitespace
		$option=~s/^\s*//;
		$option=~s/\s*$//;
		
		# skip comments
		next if ($option=~ /^#/);
		
		# Increment my option line count.
		$line_count++;
		
		# Go to the next line if the option line is empty.
		next if not $option;
		
		# Split the option up into the name and the value.
		($name,$value)=split /\s*=\s*/, $option;
		
		# Record the line for now, but comment this out before release.
# 		record ($conf, $log, "Name: [$name], value: [$value].\n");
		
		# Set my variables depending on the veriable name.
		if ($name eq "agent")
		{
			# This is only used by 'fenced', but I record it for
			# potential debugging.
			$conf->{na}{agent}=$value;
		}
		elsif ($name eq "fm")
		{
			# This is a deprecated argument that should no longer
			# be used. Now 'port' should be used.
			if (not $conf->{na}{port})
			{
				# Port isn't set yet, use this value which may
				# be replaced if 'port' is set later.
				(undef, $value) = split /\s+/,$value;
				$conf->{na}{port}=$value;
				warn "Warning! The argument 'fm' is deprecated, use 'port' instead. Value: [$value] set for 'port'\n";
			}
			else
			{
				# Port was already set, so simply ignore this.
				warn "Warning! The argument 'fm' is deprecated, use 'port' instead. Value: [$value] ignored.\n";
			}
		}
		elsif ($name eq "ipaddr") 
		{
			# Record the IP Address or name of the Node Assassin to
			# use.
			$conf->{na}{ipaddr}=$value;
		} 
		elsif ($name eq "login")
		{
			# Record the login name that was passed.
			$conf->{na}{login}=$value;
		} 
		elsif ($name eq "name")
		{
			# Depricated argument used formerly for login name.
			if (not $conf->{na}{login})
			{
				# Login isn't set yet, use this value which may
				# be replaced if 'login' is seen later.
				$conf->{na}{login}=$value;
				warn "Warning! The argument 'name' is deprecated, use 'login' instead. Value: [$value] set for 'login'.\n";
			}
			else
			{
				# I've already seen the 'login' value so I will
				# ignore this value.
				warn "Warning! The argument 'name' is deprecated, use 'login' instead. Value: [$value] ignored.\n";
			}
		}
		elsif (($name eq "action") or ($name eq "option"))
		{
			# 'option' is deprecated.
			record($conf, $log, "Please use 'action', not 'option', as the later is deprecated.\n") if $name eq "option";
			$conf->{na}{action}=$value;
		}
		elsif ($name eq "passwd")
		{
			# This is the login password.
			$conf->{na}{passwd}=$value;
		} 
		elsif ($name eq "passwd_script")
		{
			# This is the path to the script that will return the
			# password to the agent. At this time, this is not
			# implemented.
			$conf->{na}{passwd_script}=$value;
		}
		elsif ($name eq "port")
		{
			# This sets the port number to act on.
			$conf->{na}{port}=$value;
		} 
		elsif ($name eq "quiet")
		{
			# This is passed by 'fenced' via 'cluster.conf' as a
			# custom argument to supress output to STDOUT.
			$conf->{'system'}{quiet}=1;
		} 
		else
		{
			warn "Illegal name in option: [$option] at line: [$line_count]\n";
			$bad=1;
		}
	}
	return ($bad);
}

# This function simply prints messages to both the log and to stdout.
sub record
{
	my ($conf, $log, $msg)=@_;
	
	print $log $msg;
	print $msg if not $conf->{'system'}{quiet};
	
	return(0);
}

# This sets all ports of a given Node Assassin to the requested state.
sub set_all_state
{
	my ($conf, $log, $state)=@_;
	$state=0 if not defined $state;

	my $max_port=$conf->{na}{max_nodes};
	foreach my $node (1..$max_port)
	{
		$node=sprintf("%02d", $node).":$state";
		record ($conf, $log, "Calling: [$node]\n");
		my @set_state=$conf->{na}{handle}->cmd("$node");
		foreach my $line (@set_state)
		{
			chomp $line;
			next if not $line;
			record($conf, $log, "$line\n");
		}
	}
	
	return (9);
}

# When asked to 'monitor' or 'list', show a CSV of all nodes, node alias and
# their state. The first two are standard, the third is not.
sub show_list
{
	my ($conf, $log, $mode)=@_;
	$mode="list" if not "$mode";
	record($conf, $log, "In 'show_list' function. Passed: mode: [$mode]\n");
	
	# Get an up to date list of the ports.
	my $state=get_states($conf, $log);
	my $na_id=$conf->{'system'}{na_id};
	my $na_name=$conf->{na}{na_name};
	my $power_state="";
	my $node_id=0;
	foreach my $port (sort {$a cmp $b} keys %{$state})
	{
# 		record ($conf, $log, "Modulous of ($port % 2) is: [".($port % 2)."]\n");
		if ($port % 2)
		{
			$power_state=$state->{$port};
			next;
		}
		else
		{
			$node_id++;
			my $power_feed=$conf->{na}{$na_id}{alias}{$node_id} eq "unused" ? "--" : $power_state;
			my $reset_state=$state->{$port};
			my $line="";
			if ($mode eq "list")
			{
				# Responding to an 'action=list' call.
				$line=$conf->{na}{$na_id}{alias}{$node_id} ? "$node_id,$conf->{na}{$na_id}{alias}{$node_id}" : "$node_id,$na_name-$node_id";
				$line.=",power_state:$power_state,reset_state:$reset_state,power_feed:$power_feed";
			}
			else
			{
				# Responding to a 'version' call.
				my $name=$conf->{na}{$na_id}{alias}{$node_id} ? "$conf->{na}{$na_id}{alias}{$node_id}" : "$na_name-$node_id";
				$line ="   - Node $node_id Details;\n";
				$line.="     - Name: ..... $name\n";
				$line.="     - Power Port: $power_state\n";
				$line.="     - Reset Port: $reset_state\n";
				$line.="     - Power Feed: $power_feed";
			}
			record($conf, $log, "$line\n");
			$power_state="";
		}
	}
	
	do_exit($conf, $log, 0);
}

# This queries the Node Assassin and returns the state of the requested node.
sub show_state
{
	my ($conf, $log)=@_;
	
	my @state_out=$conf->{na}{handle}->cmd("00:0");
	my $state="";
	my $node=$conf->{na}{port};
	foreach my $line (@state_out)
	{
		chomp;
		my $line=$_;
		my ($state)=($line=~/- Node $node: (.*?)/);
		if ($state)
		{
			$state=lc($state)=~/fenced/ ? 2 : 0;
			last;
		}
	}
	# No state means something went wrong while talking to the Node
	# Assassin.
	$state=1 if (($state != 0) && ($state != 2));
	
	# As per: http://sources.redhat.com/cluster/wiki/FenceAgentAPI
	# The exit state must be:
	# 0 = Node is running
	# 1 = Failed to contact fence, unknown state.
	# 2 = Node is fenced.
	do_exit($conf, $log, $state);
}

# This prints the version information of this fence agent and of any configured
# fence devices.
sub version
{
	my ($conf, $log)=@_;
	
	# Print the Fence Agent version first.
	record ($conf, $log, "Fence Agent: ..... Node Assassin ver. $conf->{'system'}{agent_version}\n");
	record ($conf, $log, "Node Assassins: .. $conf->{'system'}{na_num}\n");
	for my $na_id (1..$conf->{'system'}{na_num})
	{
		$conf->{'system'}{na_id}=$na_id;
		$conf->{na}{ipaddr}=     $conf->{na}{$na_id}{ipaddr};
		$conf->{na}{tcp_port}=   $conf->{na}{$na_id}{tcp_port};
		$conf->{na}{na_name}=    $conf->{na}{$na_id}{na_name};
		my $build_date="";
		my $serial_number="";
		my $firmware_ver="";
		connect_to_na($conf, $log);
		if ($conf->{na}{handle})
		{
			# Get the NAOS version and serial numbers.
			my @details=$conf->{na}{handle}->cmd("00:1");
			foreach my $line (sort {$a cmp $b} @details)
			{
				chomp $line;
				($build_date)=($line=~/\s(\S+)$/) if ($line =~ /Build Date/i );
				($serial_number)=($line=~/\s(\S+)$/) if ($line =~ /Serial Number/i );
				($firmware_ver)=($line=~/\s(\S+)$/) if ($line =~ /NAOS Version/i );
	# 			print "line: [$line]\n";
			}
		}
		else
		{
			$build_date="??";
			$serial_number="??";
			$firmware_ver="??";
		}
		record ($conf, $log, " - Node Assassin $na_id\n");
		record ($conf, $log, "   - Name: ....... $conf->{na}{$na_id}{na_name}\n");
		record ($conf, $log, "   - IP Address: . $conf->{na}{$na_id}{ipaddr}\n");
		record ($conf, $log, "   - TCP Port: ... $conf->{na}{$na_id}{tcp_port}\n");
		record ($conf, $log, "   - MAC Address:  $conf->{na}{$na_id}{mac}\n");
		record ($conf, $log, "   - Netmask: .... $conf->{na}{$na_id}{netmask}\n");
		record ($conf, $log, "   - Gateway: .... $conf->{na}{$na_id}{gateway}\n");
		record ($conf, $log, "   - Serial #: ... $serial_number\n");
		record ($conf, $log, "   - Firmware: ... $firmware_ver\n");
		record ($conf, $log, "   - Build Date: . $build_date (yyyy-mm-dd)\n");
		record ($conf, $log, "   - Max Nodes: .. $conf->{na}{$na_id}{max_nodes}\n");
		if ($conf->{na}{handle})
		{
			show_list($conf, $log, "version");
			$conf->{na}{tcp_port}->close();
		}
		else
		{
			no_connection_error($conf, $log, $na_id);
		}
	}
	do_exit($conf, $log, 0);
}


###############################################################################
# Private functions below here.                                               #
###############################################################################

### Contributed by Shaun Fryer and Viktor Pavlenko by way of TPM.
# This is a helper to the above '_add_href' method. It is called each time a
# new string is to be created as a new hash key in the passed hash reference.
sub _add_hash_reference
{
	my $href1=shift;
	my $href2=shift;
	
	for my $key (keys %$href2)
	{
		if (ref $href1->{$key} eq 'HASH')
		{
			_add_hash_reference($href1->{$key}, $href2->{$key});
		}
		else
		{
			$href1->{$key}=$href2->{$key};
		}
	}
}

### Contributed by Shaun Fryer and Viktor Pavlenko by way of TPM.
# This takes a string with double-colon seperators and divides on those
# double-colons to create a hash reference where each element is a hash key.
sub _make_hash_reference
{
	my $href=shift;
	my $key_string=shift;
	my $value=shift;
# 	print "variable: [$key_string], value: [$value]\n";
	
	my $chomp_root=0;
	if ($chomp_root) { $key_string=~s/\w+:://; }
	
	my @keys = split /::/, $key_string;
	my $last_key = pop @keys;
	my $_href = {};
	$_href->{$last_key}=$value;
	while (my $key = pop @keys)
	{
		my $elem = {};
		$elem->{$key} = $_href;
		$_href = $elem;
	}
	_add_hash_reference($href, $_href);
}

1;

 

Input, advice, complaints and meanderings all welcome!
Digimer digimer@alteeve.ca https://alteeve.ca/w legal stuff:  
All info is provided "As-Is". Do not use anything here unless you are willing and able to take resposibility for your own actions. © 1997-2013
Naming credits go to Christopher Olah!
In memory of Kettle, Tonia, Josh, Leah and Harvey. In special memory of Hannah, Jack and Riley.