Source AN::Tools::String

From Alteeve Wiki
Jump to navigation Jump to search

 AN!Tools :: AN::Tools :: Source AN::Tools::String

String.pm

package AN::Tools::String;

use strict;
use warnings;

our $VERSION="0.1.001";
my $THIS_FILE="String.pm";


sub new
{
	my $class=shift;
	
	my $self={
		READ		=>	"./tools.xml",
		HASH		=>	{},
		FORCE_UTF8	=>	0,
		DEFAULT_LANG	=>	"en_CA",
	};
	
	bless $self, $class;
	
	return ($self);
}

# Get a handle on the AN::Tools object. I know that technically that is a
# sibling module, but it makes more sense in this case to think of it as a
# parent.
sub parent
{
	my $self=shift;
	my $parent=shift;
	
	$self->{HANDLE}{TOOLS}=$parent if $parent;
	
	return ($self->{HANDLE}{TOOLS});
}

# This forces UTF8 mode when reading a words file. This should not be used
# normally as the words file should already be UTF8 encoded.
sub force_utf8
{
	my $self=shift;
	my $set=defined $_[0] ? shift : undef;
	
	if (defined $set)
	{
		if (($set==0) || ($set==1))
		{
			$self->{FORCE_UTF8}=$set;
		}
		else
		{
			my $an=$self->parent;
			$an->Alert->error({
				fatal	=>	1,
				title	=>	"Invalid argument",
				message	=>	"The invalid argument: [$set] was passed into the 'AN::Tools::String->force_utf8()'. Only 1 or 0 are valid arguments.",
				code	=>	14,
				file	=>	"$THIS_FILE",
				line	=>	__LINE__
			});
		}
	}
	
	return ($self->{FORCE_UTF8});
}

# This takes a word key and, optionally, a hash reference, a language and/or an
# variables array reference. It returns the corresponding string from the hash
# reference data containing the data from a 'read_words()' call.
sub get_string
{
	my $self=shift;
	my $param=shift;
	
	# This just makes the code more consistent.
	my $an=$self->parent;
	
	# Clear any prior errors as I may set one here.
	$an->Alert->_set_error;
	
	my $key;
	my $hash=$an->data;
	my $vars;
	my $lang=$self->{DEFAULT_LANG};
	
	# Now see if the user passed the values in a hash reference or
	# directly.
	if (ref($param) eq "HASH")
	{
		# Values passed in a hash, good.
		$key=$param->{key} if $param->{key};
		$vars=$param->{variable} if $param->{variable};
		$hash=$param->{hash} if $param->{hash};
		$lang=$param->{language} if $param->{language};
	}
	else
	{
		# Values passed directly.
		$key=$param;
		$vars=$_[0] if defined $_[0];
		$hash=$_[1] if defined $_[1];
		$lang=$_[2] if defined $_[2];
	}
	
	# Make sure that 'hash' is a hash reference
	if (ref($hash) ne "HASH")
	{
		$an->Alert->error({
			fatal	=>	1,
			title	=>	"Invalid Argument",
			message	=>	"The 'AN::Tools::String' module's 'get_string' method was passed an invalid 'hash' argument: [$hash]. This must be a hash reference containing data read in from an XML words file by the 'read_words()' method.",
			code	=>	15,
			file	=>	"$THIS_FILE",
			line	=>	__LINE__
		});
		# Return nothing in case the user is blocking fatal
		# errors.
		return (undef);
	}
	
	# Make sure that 'vars' is an array reference, if set.
	if (($vars) && (ref($vars) ne "ARRAY"))
	{
		$an->Alert->error({
			fatal	=>	1,
			title	=>	"Invalid Argument",
			message	=>	"The 'AN::Tools::String' module's 'get_string' method was passed an invalid 'variable' argument: [$vars]. This must be an array reference containing elements intended to replace corresponding #!var!x!# replacement keys in the requested string.",
			code	=>	16,
			file	=>	"$THIS_FILE",
			line	=>	__LINE__
		});
		return (undef);
	}
	
	# Make sure that the request language exists in the hash.
	if (ref($hash->{words}{lang}{$lang}) ne "HASH")
	{
		$an->Alert->error({
			fatal	=>	1,
			title	=>	"Invalid Language",
			message	=>	"The 'AN::Tools::String' module's 'get_string' method was passed an invalid 'language' argument: [$lang]. This must match one of the languages in the words file's <langs>...</langs> block.\n",
			code	=>	17,
			file	=>	"$THIS_FILE",
			line	=>	__LINE__
		});
		return (undef);
	}
	
	# Make sure that the request key is in the language hash.
	if (not exists $hash->{words}{lang}{$lang}{key}{$key}{content})
	{
		$an->Alert->error({
			fatal	=>	1,
			title	=>	"Invalid String Key",
			message	=>	"The 'AN::Tools::String' module's 'get_string' method was passed the 'key' argument: [$key] which was not found in the language: [$lang]. This key must be defined in one of the read in words files.\n",
			code	=>	18,
			file	=>	"$THIS_FILE",
			line	=>	__LINE__
		});
		return (undef);
	}
	
	# Now pick out my actual string.
	my $string=$hash->{words}{lang}{$lang}{key}{$key}{content};
	$string=~s/^\n//;
	
	# This clears off the new-line and trailing white-spaces caused by the
	# indenting of the '</key>' field in the words XML file when printing
	# to the command line.
	$string=~s/\n(\s+)$//;
	
	# Substitute in any variables if needed.
	if ($vars)
	{
		$string=$an->String->_insert_vars({
			  string	=>	$string,
			  variable	=>	$vars,
		});
	}
	
	# Process the just-read string.
	$string=$an->String->_process({
		  string	=>	$string,
		  language	=>	$lang,
		  hash		=>	$hash,
	});
	
	return ($string);
}

# This takes a string and substitutes out the various replacement keys as
# needed until the string is ready for display. The only thing it doesn't
# handle is substituting '#!var!x!#' keys into a string. For that, call the
# 'get_string' method with it's given variable array reference and store the
# results in a string. This is requried because there is currently no way for
# any of the called methods within here to know which string the variables in
# the array reference belong in.
sub _process
{
	my $self=shift;
	my $param=shift;
	
	my $an=$self->parent;
	
	# Start looping through the passed string until all the replacement
	# keys are gone.
	my $i=0;
	while ( $param->{string} =~ /#!(.+?)!#/ )
	{
		# Substitute 'word' keys, but without 'vars'. This has to be
		# first! 'protect' will catch 'word' keys, because no where
		# else are they allowed.
		$param->{string}=$an->String->_insert_word({
			  string	=>	$param->{string},
			  language	=>	$param->{language},
			  hash		=>	$param->{hash},
		});
		
		# Protect unmatchable keys.
		$param->{string}=$an->String->_protect({
			string		=>	$param->{string},
		});
		
		# Inject any 'data' values.
		$param->{string}=$an->String->_insert_data({
			string	=>	$param->{string},
		});
		
		die "Infinite loop detected while processing the string: [$param->{string}], exiting.\n" if $i > $an->_error_limit;
		$i++;
	}
	
	# Restore and unrecognized substitution values.
	$param->{string}=$an->String->_restore_protected({
		string		=>	$param->{string},
	});
	
	# Do any output mode specific formatting.
	$param->{string}=$an->String->_format_mode({
		string		=>	$param->{string},
	});
	
	return ($param->{string});
}

# Do any output mode specific formatting.
sub _format_mode
{
	my $self=shift;
	my $param=shift;
	
	my $an=$self->parent;
	
	# I don't think I need this now as I only wrap the string after it's
	# been processed by 'print_template'. It may have future use though.
	return ($param->{string});
}

# This restores the original key format for keys that were protected by the
# '_protect' method.
sub _restore_protected
{
	my $self=shift;
	my $param=shift;
	
	my $an=$self->parent;
	
	# Restore and unrecognized substitution values.
	my $i=0;
	while ( $param->{string} =~ /!#(.+?)#!/ )
	{
		my $check=$1;
		$param->{string}=~s/!#$check#!/#!$check!#/g;
		
		die "Infinite loop detected while restoring protected replacement keys in the string: [$param->{string}], exiting.\n" if $i > $an->_error_limit;
		$i++;
	}
	
	return ($param->{string});
}

# This does the actual work of substituting 'data' keys.
sub _insert_data
{
	my $self=shift;
	my $param=shift;
	
	my $an=$self->parent;
	
	my $i=0;
	while ($param->{string} =~ /#!data!(.+?)!#/)
	{
		my $id=$1;
		if ( $id =~ /::/ )
		{
			# Multi-dimensional hash.
			my $value=$an->_get_hash_reference({
				key	=>	$id,
			});
			if (not defined $value)
			{
				$param->{string}=~s/#!data!$id!#/!!a[$id]!!/;
			}
			else
			{
				$param->{string}=~s/#!data!$id!#/$value/;
			}
		}
		else
		{
			# One dimension
			if (not defined $an->data->{$id})
			{
				$param->{string}=~s/#!data!$id!#/!!b[$id]!!/;
			}
			else
			{
				my $val=$an->data->{$id};
				$param->{string}=~s/#!data!$id!#/$val/;
			}
		}
		
		die "Infinite loop detected while replacing data keys in the string: [$param->{string}], exiting.\n" if $i > $an->_error_limit;
		$i++;
	}
	
	return ($param->{string});
}

# Protect unrecognized or unused replacement keys. I do this to protect strings
# possibly set or created by a user.
sub _protect
{
	my $self=shift;
	my $param=shift;
	
	my $an=$self->parent;
	
	my $i=0;
	foreach my $check ( $param->{string} =~ /#!(.+?)!#/ )
	{
		if (( $check !~ /^free/ ) &&
		( $check !~ /^replace/ ) &&
		( $check !~ /^data/ ) &&
		( $check !~ /^word/ ) &&
		( $check !~ /^var/ ))
		{
			# Simply invert the '#!...!#' to '!#...#!'.
			$param->{string}=~s/#!($check)!#/!#$1#!/g;
		}
		
		die "Infinite loop detected while protecting replacement keys in the string: [$param->{string}], exiting.\n" if $i > $an->_error_limit;
		$i++;
	}
	
	return ($param->{string});
}

# This is called to process '#!word!...!#' keys in string. It DOES NOT
# support substituting '#!var!x!#' keys found in imported word strings! This
# is meant to insert simple word strings into template files.
sub _insert_word
{
	my $self=shift;
	my $param=shift;
	
	my $an=$self->parent;
	
	# Loop through the string until all '#!word!...!#' keys are gone.
	my $i=0;
	while ($param->{string} =~ /#!word!(.+?)!#/)
	{
		my $key=$1;
		my $say_word=$an->String->get_string({
			  key		=>	$key,
			  language	=>	$param->{language},
			  hash		=>	$param->{hash},
			  variable	=>	undef,
		});
		if ($say_word)
		{
			$param->{string}=~s/#!word!$key!#/$say_word/;
		}
		else
		{
			$param->{string}=~s/#!word!$key!#/!!e[$key]!!/;
		}
		
		die "Infinite loop detected while replacing #!word!...!# keys in the string: [$param->{string}], exiting.\n" if $i > $an->_error_limit;
		$i++;
	}
	return ($param->{string});
}

# This takes a string with '#!var!x!#' keys, where 'x' is an integer matching
# an entry in the passed array reference and uses the data from the array to
# replace the matching '#!var!x!#' entry.
sub _insert_vars
{
	my $self=shift;
	my $param=shift;
	
	my $an=$self->parent;
	
	my $i=0;
	while ( $param->{string} =~ /#!var!(.+?)!#/ )
	{
		my $val=$param->{variable}->[$1];
		if ( not defined $param->{variable}->[$1] )
		{
			# I can't expect there to always be a defined value in
			# the @vals array at any given position so if it's
			# blank I blank the key.
			$param->{string}=~s/#!var!$1!#//;
		}
		else
		{
			chomp $val;
			$param->{string}=~s/#!var!$1!#/$val/;
		}
		die "Infinite loop detected while injecting variables into the string: [$param->{string}], exiting.\n" if $i > $an->_error_limit;
		$i++;
	}
	
	return ($param->{string});
}

# This takes the path/name of an XML file containing AN::Tools type words and
# reads them into a hash reference.
sub read_words
{
	my $self=shift;
	my $param=shift;
	
	# This just makes the code more consistent.
	my $an=$self->parent;
	
	# Clear any prior errors as I may set one here.
	$an->Alert->_set_error;
	
	# Setup my variables.
	my $file=0;
	my $hash=0;
	
	# Now see if the user passed the values in a hash reference or
	# directly.
	if (ref($param) eq "HASH")
	{
		# Values passed in a hash, good.
		$file=$param->{file} ? $param->{file} : $self->{FILE};
		$hash=ref($param->{hash}) eq "HASH" ? $param->{hash} : $self->{HASH};
	}
	else
	{
		# Values passed directly.
		$file=$param;
		$hash=((defined $_[0]) && (ref($_[0]) eq "HASH")) ? shift : $self->{HASH};
	}
	$file=$self->{READ} if not $file;
	print "Reading: [$file] into: [$hash] (\$an->data is: [".$an->data()."]\n";
	
	# Make sure that the 'file' exists and is readable.
	if ( not -e $file )
	{
		$an->Alert->error({
			fatal	=>	1,
			title	=>	"Unable to find the words file.'",
			message	=>	"The words file: [$file] could not be found. Please check that it exists.",
			code	=>	11,
			file	=>	"$THIS_FILE",
			line	=>	__LINE__
		});
		# Return nothing in case the user is blocking fatal
		# errors.
		return (undef);
	}
	if ( not -r $file )
	{
		$an->Alert->error({
			fatal	=>	1,
			title	=>	"Unable to read the words file.'",
			message	=>	"The words file: [$file] was found but could not be read. Please check that it exists.",
			code	=>	12,
			file	=>	"$THIS_FILE",
			line	=>	__LINE__
		});
		# Return nothing in case the user is blocking fatal
		# errors.
		return (undef);
	}
	
	my $in_comment=0;	# Set to '1' when in a comment stanza that spans more than one line.
	my $in_data=0;		# Set to '1' when reading data that spans more than one line.
	my $closing_key="";	# While in_data, look for this key to know when we're done.
	my $xml_version="";	# The XML version of the words file.
	my $encoding="";	# The encoding used in the words file. Should only be UTF-8.
	my $data="";		# The data being read for the given key.
	my $key_name="";	# This is a double-colon list of hash keys used to build each hash element.
	
	# Load IO::Handle if needed.
	$an->_load_io_handle() if not $an->_io_handle_loaded();
	
	# Read in the XML file with the word strings to load.
	my $read=IO::Handle->new;
	my $shell_call="<$file";
	open ($read, $shell_call) || $an->Alert->error({
			fatal	=>	1,
			title	=>	"'AN::Tools::String->read()' was not able to open the words file for reading.",
			message	=>	"The AN::Tools::String methid 'read' was not able to open the words file: [$file]. The error was: $!",
			code	=>	13,
			file	=>	"$THIS_FILE",
			line	=>	__LINE__
		});
	
	# If I have been asked to read in UTF-8 mode, do so.
	if ($an->String->force_utf8)
	{
		binmode $read, "encoding(utf8)";
	}
	
	# Now loop through the XML file, line by line.
	while (<$read>)
	{
		chomp;
		my $line=$_;
		
		### Deal with comments.
		# Look for a clozing stanza if I am (still) in a comment.
		if (($in_comment) && ( $line =~ /-->/ ))
		{
			$line=~s/^(.*?)-->//;
			$in_comment=0;
		}
		next if ($in_comment);
		
		# Strip out in-line comments.
		while ( $line =~ /<!--(.*?)-->/ )
		{
			$line=~s/<!--(.*?)-->//;
		}
		
		# See if there is an comment opening stanza.
		if ( $line =~ /<!--/ )
		{
			$in_comment=1;
			$line=~s/<!--(.*)$//;
		}
		### Comments dealt with.
		
		### Parse data
		# XML data
		if ( $line =~ /<\?xml version="(.*?)" encoding="(.*?)"\?>/ )
		{
			$xml_version=$1;
			$encoding=$2;
			next;
		}
		
		# If I am not "in_data" (looking for more data for a currently in use key).
		if (not $in_data)
		{
			# Skip blank lines.
			next if $line =~ /^\s+$/;
			next if $line eq "";
			$line=~s/^\s+//;
			
			# Look for an inline data-structure.
			if (( $line =~ /<(.*?) (.*?)>/ ) && ( $line =~ /<\/$1>/ ))
			{
				# First, look for CDATA.
				my $cdata="";
				if ( $line=~/<!\[CDATA\[(.*?)\]\]>/ )
				{
					$cdata=$1;
					$line=~s/<!\[CDATA\[$cdata\]\]>/$cdata/;
				}
				
				# Pull out the key and name.
				my $key=$line;
				my $name=$line;
				my $data=$line;
				$key=~s/^<(\w+).*/$1/;
				$name=~s/^<$key name="(\w+).*/$1/;
				$data=~s/^<$key name="$name">(.*?)<\/$key>(.*)/$1/;
				
				# If I picked up data within a CDATA block,
				# push it into 'data' proper.
				$data=$cdata if $cdata;
				
				# No break out the data and push it into the
				# corresponding keyed hash reference
				# '$an->data'.
				$an->_make_hash_reference($an->data, "${key_name}::${key}::${name}::content", $data);
				
				next;
			}
			
			# Look for a self-contained unkeyed structure.
			if (( $line =~ /<(.*?)>/ ) && ( $line =~ /<\/$1>/ ))
			{
				my $key=$line;
				$key=~s/<(.*?)>.*/$1/;
				$data=$line;
				$data=~s/<$key>(.*?)<\/$key>/$1/;
				$an->_make_hash_reference($an->data, "${key_name}::${key}", $data);
				next;
			}
			
			# Look for a line with a closing stanza.
			if ( $line =~ /<\/(.*?)>/ )
			{
				my $closing_key=$line;
				$closing_key=~s/<\/(\w+)>/$1/;
				$key_name=~s/(.*?)::$closing_key(.*)/$1/;
				next;
			}
			
			# Look for a key with an embedded value.
			if ( $line =~ /^<(\w+) name="(.*?)" (\w+)="(.*?)">/ )
			{
				my $key=$1;
				my $name=$2;
				my $key2=$3;
				my $data=$4;
				$key_name.="::${key}::${name}";
				$an->_make_hash_reference($an->data, "${key_name}::${key}::${key2}", $data);
				next;
			}
			
			# Look for a contained value.
			if ( $line =~ /^<(\w+) name="(.*?)">(.*)/ )
			{
				my $key=$1;
				my $name=$2;
				# Don't scope 'data' locally in case it spans
				# multiple lines.
				$data=$3;
				
				# Parse the data now.
				if ( $data =~ /<\/$key>/ )
				{
					# Fully contained data.
					$data=~s/<\/$key>(.*)$//;
					$an->_make_hash_reference($an->data, "${key_name}::${key}::${name}", $data);
				}
				else
				{
					# Element closes later.
					$in_data=1;
					$closing_key=$key;
					
					$name=~s/^<$key name="(\w+).*/$1/;
					$key_name.="::${key}::${name}";
					$data=~s/^<$key name="$name">(.*)/$1/;
					$data.="\n";
				}
				next;
			}
			
			# Look for an opening data structure.
			if ( $line =~ /<(.*?)>/ )
			{
				my $key=$1;
				$key_name.="::$key";
				next;
			}
		}
		else
		{
			### I'm in a multi-line data block.
			# If this line doesn't close the data block, feed it
			# wholesale into 'data'. If it does, see how much of
			# this line, if anything, is pushed into 'data'.
			if ( $line !~ /<\/$closing_key>/ )
			{
				$data.="$line\n";
			}
			else
			{
				# This line closes the data block.
				$in_data=0;
				$line=~s/(.*?)<\/$closing_key>/$1/;
				$data.="$line";
				
				# If this line contain new-line control
				# characters, break the line up into multiple
				# lines and process them seperately.
				my $save_data="";
				my @lines=split/\n/, $data;
				
				# I use this to track CDATA blocks.
				my $in_cdata=0;
				
				# Loop time.
				foreach my $line (@lines)
				{
					# If I am in a CDATA block, check for
					# the closing stanza.
					if (( $in_cdata == 1 ) && ( $line =~/]]>$/ ))
					{
						# CDATA closes here.
						$line =~s/]]>$//;
						$save_data.="\n$line";
						$in_cdata=0;
					}
					
					# If this line is a self-contained
					# CDATA block, pull the data out.
					# Otherwise, check if this line starts
					# a CDATA block.
					if (( $line =~/^<\!\[CDATA\[/ ) && ( $line =~/]]>$/ ))
					{
						# CDATA opens and closes in this line.
						$line=~s/^<\!\[CDATA\[//;
						$line=~s/]]>$//;
						$save_data.="\n$line";
					}
					elsif ( $line =~/^<\!\[CDATA\[/ )
					{
						$line=~s/^<\!\[CDATA\[//;
						$in_cdata=1;
					}
					
					# If I am in a CDATA block, feed the
					# (sub)line into 'save_data' wholesale.
					if ( $in_cdata == 1 )
					{
						# Don't analyze, just store.
						$save_data.="\n$line";
					}
					else
					{
						# Not in CDATA, look for XML data.
						while (( $line =~ /<(.*?)>/ ) && ( $line =~ /<\/$1>/ ))
						{
							# Found a value.
							my $key=$line;
							$key=~s/.*?<(.*?)>.*/$1/;
							$data=$line;
							$data=~s/.*?<$key>(.*?)<\/$key>/$1/;
							
							$an->_make_hash_reference($an->data, "${key_name}::${key}", $data);
							$line =~ s/<$key>(.*?)<\/$key>//
						}
						$save_data.="\n$line";
					}
				}
				
				# Knock out and new-lines and save.
				$save_data=~s/^\n//;
				if ( $save_data =~ /\S/s )
				{
					# Record the data in my '$an->data'
					# hash reference.
					$an->_make_hash_reference($an->data, "${key_name}::content", $save_data);
				}
				
				$key_name=~s/(.*?)::$closing_key(.*)/$1/;
			}
		}
		next if $line eq "";
	}
	$read->close();
	
	# Set a couple values about this file.
	$self->{FILE}->{XML_VERSION}=$xml_version;
	$self->{FILE}->{ENCODING}=$encoding;
	
	# Return the number.
	return (1);
}

1;

 

Any questions, feedback, advice, complaints or meanderings are welcome.
Alteeve's Niche! Enterprise Support:
Alteeve Support
Community Support
© Alteeve's Niche! Inc. 1997-2024   Anvil! "Intelligent Availability®" Platform
legal stuff: All info is provided "As-Is". Do not use anything here unless you are willing and able to take responsibility for your own actions.