Acme-Wabby
view release on metacpan or search on metacpan
eval {
if (!store($self->{'data'}{'hash'}, $self->{'conf'}{'hash_file'})) {
$@ = 1;
}
};
if ($@) {
return undef;
}
return 1;
}
# A method for loading a previously saved state from files using Storable.
# Arguments: None.
# Returns: undef on failure, true on success
sub load {
my $self = shift;
die "Invalid object" unless (ref($self) eq __PACKAGE__);
# Since Storable can die on serious errors, or simply return an undef,
# we need to wrap these calls in evals
my $ref;
eval {
if (!($ref = retrieve($self->{'conf'}{'list_file'}))) {
$@ = "Error retrieving list from " . $self->{'conf'}{'list_file'};
}
};
if ($@) {
return undef;
}
@{$self->{'data'}{'list'}} = @{$ref};
eval {
if (!($ref = retrieve($self->{'conf'}{'hash_file'}))) {
$@ = "Error retrieving hash from " . $self->{'conf'}{'hash_file'};
}
};
if ($@) {
return undef;
}
%{$self->{'data'}{'hash'}} = %{$ref};
return 1;
}
# A method for adding a block of text to the current state.
# Arguments: Takes a scalar containing text to be added. Embedded newlines,
# random crap, et al are fine, they'll just be stripped out anyway.
# Returns: undef on failure, true on success. The only failure condition is
# currently if an invalid parameter is passed in.
sub add {
my $self = shift;
die "Invalid object" unless (ref($self) eq __PACKAGE__);
# Make sure we actually got something to add
my $text = shift;
unless ($text) {
return undef;
}
# If we don't care about case, lowercase the whole thing to start with
unless ($self->{'conf'}{'case_sensitive'}) {
$text = lc($text);
}
# Split the text into component phrases, which we define as being delimited
# by the characters below. I left the comma out because it seems to lead
# to slightly more coherent results.
my @phrases = split /[.!?;]/, $text;
foreach my $phrase (@phrases) {
# First, strip out any characters we don't want to deal with. We
# replace them with a space so that things like "the+dog" gets treated
# as "the dog".
$phrase =~ s/[^-a-zA-Z0-9 ']/ /g;
# Trim leading and trailing whitespace, and see if we still have
# anything left.
$phrase =~ s/^\s+//;
$phrase =~ s/\s+$//;
next if $phrase eq "";
my $last_word = 0;
my $idx = 0;
# Split the phrase into component words. We're splitting on simple
# whitespace here.
my @words = split /\s+/, $phrase;
# First we're going to loop through the words and clean them up a bit.
# While we're at it, we're going to find the index of the last real
# word in this phrase.
foreach my $word (@words) {
# Clean up the word a little bit. We allow hyphens and
# apostrophies to occur within words, but not at the beginning
# or ends of words.
$word =~ s/^\s+//;
$word =~ s/\s+$//;
$word =~ s/^-+//g;
$word =~ s/^'+//g;
$word =~ s/-+$//g;
$word =~ s/'+$//g;
# Only allow the single-character words of 'a' and 'I'.
# FIXME - Need to be able to configure this so that persons with
# non-english texts can pick values that make sense.
if (length($word) == 1 && lc($word) ne "i" && lc($word) ne "a") {
$word = "";
$idx++;
next;
}
# If this is a valid word, then mark this as a possible last word.
if ($word ne "") {
$last_word = $idx;
}
$idx++;
}
$idx = 0;
my $new_index = 0;
# If this word happens to be the last in the phrase, add a -1
# to its possible transitions so that we have the possibility
# of ending sentences here.
if ($idx == $last_word) {
push @{${$self->{'data'}{'list'}}[$new_index]{'num'}}, -1;
}
}
# If we've seen this word before
else {
# Record the index of this word for the next loop iteration,
# and add a transition from the previous word to this one.
$new_index = $self->{'data'}{'hash'}{$word};
push @{${$self->{'data'}{'list'}}[$old_index]{'num'}},
$new_index;
# If this word happens to be the last in the phrase, add a -1
# to its possible transitions so that we have the possibility
# of ending sentences here.
if ($idx == $last_word) {
push @{${$self->{'data'}{'list'}}[$new_index]{'num'}}, -1;
}
}
# Move on to the next word.
$old_index = $new_index;
}
}
return 1;
}
# A function for generating a random line of text.
# Arguments: If no arguments, spew will try to generate a completely random
# sentence. If a string is passed in, spew will try to generate a
# random sentence beginning with the provided text.
# Returns: The generated string, or undef on any of several error conditions.
# Note that these error conditions are not fatal. They are:
# * At least (min_len * 10) words haven't been run through yet.
# (Must ->add() more text before trying again.)
# * A string was passed in containing nothing. (Don't do that.)
# * We don't know the last word in the sentence, and can therefore
# not generate a sentence with it. (Either teach us about it
# with ->add(), or try something else.)
# * A sentence of at least min_len words could not be generated,
# even after max_attempts tries at doing so. (Likely need to
# ->add() more text before trying again.)
#
sub spew {
my $self = shift;
die "Invalid object" unless (ref($self) eq __PACKAGE__);
my $text = shift;
# If we don't have at least 10 * min_len words, we probably don't have a
# very good chance of making a sentence, so let's just return.
if (scalar(keys %{$self->{'data'}{'hash'}}) <
($self->{'conf'}{'min_len'} * 10)) {
return undef;
}
my $directed;
my $start;
# If they passed in an argument, take a look at it.
if ($text) {
$directed = 1;
# If we're case-insensitive, lowercase what they sent us.
unless ($self->{'conf'}{'case_sensitive'}) {
$text = lc($text);
}
# Weed out unsavory characters.
$text =~ s/[^-a-zA-Z0-9 ']/ /gs;
# Clean any long strings of whitespace to single spaces.
$text =~ s/\s+/ /g;
# Remove leading and trailing whitespace.
$text =~ s/^\s+//;
$text =~ s/\s+$//;
# If there's not a word left to talk about, return.
if ($text !~ /([-a-zA-Z0-9']+)$/) {
return undef;
}
# If we don't know anything about this word, return.
if (!exists(${$self->{'data'}{'hash'}}{$1})) {
return undef;
}
# Seems like a good starting place, so let's mark it.
$start = ${$self->{'data'}{'hash'}}{$1};
}
# They didn't pass an argument, so we're on our own.
else {
$directed = 0;
# The 0th element in the list is 'special' in that no hash entry points
# to it, and it only contains pointers to words which are possible
# sentence starting points. Thus, let's grab a random entry out of the
# 0th element in the list and start there.
$start = ${${$self->{'data'}{'list'}}[0]{'num'}}[int rand scalar @{${$self->{'data'}{'list'}}[0]{'num'}}];
$text = ${$self->{'data'}{'list'}}[$start]{'word'};
}
# Since we're dealing with randomness, we can't always be sure that we'll
# be able to make a sentence of min_len, so we just keep retrying up to
# max_attempts times, relying on sheer dumb luck to help us out. On a
# reasonably-sized body of text, this works perfectly fine.
my $attempts = 0;
my $count = 0;
my $final = "";
my $next = $start;
while ($count < $self->{'conf'}{'min_len'} &&
$attempts < $self->{'conf'}{'max_attempts'}) {
# We start out with one word, and uppercase the first character in our
# starting text.
$count = 1;
$final = "\u$text";
$next = $start;
# Keep adding new words to this sentence until we hit an sentence end
# mark, or we hit max_len
while ($next != -1 && ($count < $self->{'conf'}{'max_len'})) {
# If the word we're on has no transitions, count this as a stopping
# point, since we can't go any further.
if (scalar(@{${$self->{'data'}{'list'}}[$next]{'num'}}) < 1) {
$next = -1;
}
# Otherwise, randomly pick the word we'll visit next out of the
# list of possible transitions from our current word.
else {
$next = ${${$self->{'data'}{'list'}}[$next]{'num'}}[int rand scalar @{${$self->{'data'}{'list'}}[$next]{'num'}}];
}
# If we're not at the end yet, add this word to our collected
# string, increment our word count, and do it all again.
if ($next != -1) {
$final .= " " . ${$self->{'data'}{'list'}}[$next]{'word'};
$count++;
}
}
# If we failed to make a long enough sentence, we need to do something.
if ($count < $self->{'conf'}{'min_len'}) {
# If we haven't yet passed our max number of attempts, try again.
if ($attempts < $self->{'conf'}{'max_attempts'}) {
$attempts++;
next;
}
# If we passed our max number of attempts, we can take one of two
# course of action.
else {
# If we're trying to talk about something in particular, we're
# always going to be stuck with the same starting point. Thus,
# there's not the best chance for continued success, so just
# give up and bail.
if ($directed) {
return undef;
}
# If we're talking about random things, we likely just got
# a bad starting point, so we'll pick a new random starting
# point, and do the whole thing over again.
else {
$attempts = 0;
$start = ${${$self->{'data'}{'list'}}[0]{'num'}}[int rand scalar @{${$self->{'data'}{'list'}}[0]{'num'}}];
$text = ${$self->{'data'}{'list'}}[$start]{'word'};
next;
}
}
}
}
# If we're not case sensitive, make sure any I's by themselves are
# capitalized, for aesthetic purposes. If we are, they probably want
# things to come out the way they are.
# FIXME - Need to be able to configure this so that persons with
# non-english texts can pick values that make sense.
unless ($self->{'conf'}{'case_sensitive'}) {
$final =~ s/(^|[^\w-])i($|[^\w-])/$1I$2/g
}
# Pick a random piece of punctuation to add to the end of the sentence.
$final .= ${$self->{'conf'}{'punctuation'}}[int rand scalar @{$self->{'conf'}{'punctuation'}}];
return $final;
}
# A method for getting some basic information about the current state.
# Arguments: None.
# Returns: In a scalar context, this function returns a string describing the
# current state. In a list context, this function returns a list
# containing two numbers -- the first one is the number of words
# that this object knows about, and the second one is the average
# number of transitions between words.
sub stats {
my $self = shift;
die "Invalid object" unless (ref($self) eq __PACKAGE__);
# Get the number of words in our hash.
my $word_count = scalar keys %{$self->{'data'}{'hash'}};
# If we've got no words, just quit now.
if ($word_count == 0) {
return wantarray ? (0,0) : "I don't know anything!";
}
# Iterate over the list, adding up the number of transitions for each word.
my $average = 0;
foreach (@{$self->{'data'}{'list'}}) {
$average += scalar @{$_->{'num'}} if defined($_->{'num'});
}
# Calculate an average, trim it to two decimal points, and return it.
$average /= $word_count;
$average = sprintf "%.2f", $average;
return wantarray ? ($word_count, $average) : "Wabby knows $word_count "
."words, with an average of $average connections between each word.";
}
1;
__END__
=head1 NAME
Acme::Wabby - Create semi-random sentences based upon a body of text.
( run in 2.476 seconds using v1.01-cache-2.11-cpan-cdf2f3d4e48 )