Acme-Wabby

 view release on metacpan or  search on metacpan

Wabby.pm  view on Meta::CPAN

    eval {
        if (!store($self->{'data'}{'hash'}, $self->{'conf'}{'hash_file'})) {
            $@ = 1;
        }
    };
    if ($@) {
        return undef;
    }

    return 1;
}

# A method for loading a previously saved state from files using Storable.
# Arguments: None.
# Returns:   undef on failure, true on success
sub load {
    my $self = shift;
    die "Invalid object" unless (ref($self) eq __PACKAGE__);

    # Since Storable can die on serious errors, or simply return an undef,
    # we need to wrap these calls in evals
    my $ref;
    eval {
        if (!($ref = retrieve($self->{'conf'}{'list_file'}))) {
            $@ = "Error retrieving list from " . $self->{'conf'}{'list_file'};
        }
    };
    if ($@) {
        return undef;
    }
    @{$self->{'data'}{'list'}} = @{$ref};

    eval {
        if (!($ref = retrieve($self->{'conf'}{'hash_file'}))) {
            $@ = "Error retrieving hash from " . $self->{'conf'}{'hash_file'};
        }
    };
    if ($@) {
        return undef;
    }
    %{$self->{'data'}{'hash'}} = %{$ref};

    return 1;
}

# A method for adding a block of text to the current state.
# Arguments: Takes a scalar containing text to be added.  Embedded newlines,
#            random crap, et al are fine, they'll just be stripped out anyway.
# Returns:   undef on failure, true on success.  The only failure condition is
#            currently if an invalid parameter is passed in.
sub add {
    my $self = shift;
    die "Invalid object" unless (ref($self) eq __PACKAGE__);

    # Make sure we actually got something to add
    my $text = shift;
    unless ($text) {
        return undef;
    }

    # If we don't care about case, lowercase the whole thing to start with
    unless ($self->{'conf'}{'case_sensitive'}) {
        $text = lc($text);
    }

    # Split the text into component phrases, which we define as being delimited
    # by the characters below.  I left the comma out because it seems to lead
    # to slightly more coherent results.
    my @phrases = split /[.!?;]/, $text;
    foreach my $phrase (@phrases) {

        # First, strip out any characters we don't want to deal with.  We
        # replace them with a space so that things like "the+dog" gets treated
        # as "the dog".
        $phrase =~ s/[^-a-zA-Z0-9 ']/ /g;

        # Trim leading and trailing whitespace, and see if we still have
        # anything left.
        $phrase =~ s/^\s+//;
        $phrase =~ s/\s+$//;
        next if $phrase eq "";

        my $last_word = 0;
        my $idx = 0;
        # Split the phrase into component words.  We're splitting on simple
        # whitespace here.
        my @words = split /\s+/, $phrase;

        # First we're going to loop through the words and clean them up a bit.
        # While we're at it, we're going to find the index of the last real
        # word in this phrase.
        foreach my $word (@words) {

            # Clean up the word a little bit.  We allow hyphens and
            # apostrophies to occur within words, but not at the beginning
            # or ends of words.
            $word =~ s/^\s+//;
            $word =~ s/\s+$//;
            $word =~ s/^-+//g;
            $word =~ s/^'+//g;
            $word =~ s/-+$//g;
            $word =~ s/'+$//g;

            # Only allow the single-character words of 'a' and 'I'.
            # FIXME - Need to be able to configure this so that persons with
            # non-english texts can pick values that make sense.
            if (length($word) == 1 && lc($word) ne "i" && lc($word) ne "a") {
                $word = "";
                $idx++;
                next;
            }

            # If this is a valid word, then mark this as a possible last word.
            if ($word ne "") {
                $last_word = $idx;
            }
            $idx++;
        }

        $idx = 0;
        my $new_index = 0;

Wabby.pm  view on Meta::CPAN

                # If this word happens to be the last in the phrase, add a -1
                # to its possible transitions so that we have the possibility
                # of ending sentences here.
                if ($idx == $last_word) {
                    push @{${$self->{'data'}{'list'}}[$new_index]{'num'}}, -1;
                }
            }

            # If we've seen this word before
            else {
                # Record the index of this word for the next loop iteration,
                # and add a transition from the previous word to this one.
                $new_index = $self->{'data'}{'hash'}{$word};
                push @{${$self->{'data'}{'list'}}[$old_index]{'num'}},
                    $new_index;

                # If this word happens to be the last in the phrase, add a -1
                # to its possible transitions so that we have the possibility
                # of ending sentences here.
                if ($idx == $last_word) {
                    push @{${$self->{'data'}{'list'}}[$new_index]{'num'}}, -1;
                }
            }
            # Move on to the next word.
            $old_index = $new_index;
        }
    }

    return 1;
}

# A function for generating a random line of text.
# Arguments: If no arguments, spew will try to generate a completely random
#            sentence.  If a string is passed in, spew will try to generate a
#            random sentence beginning with the provided text.
# Returns:   The generated string, or undef on any of several error conditions.
#            Note that these error conditions are not fatal.  They are:
#               * At least (min_len * 10) words haven't been run through yet.
#                 (Must ->add() more text before trying again.)
#               * A string was passed in containing nothing. (Don't do that.)
#               * We don't know the last word in the sentence, and can therefore
#                 not generate a sentence with it. (Either teach us about it
#                 with ->add(), or try something else.)
#               * A sentence of at least min_len words could not be generated,
#                 even after max_attempts tries at doing so. (Likely need to
#                 ->add() more text before trying again.)
#               
sub spew {
    my $self = shift;
    die "Invalid object" unless (ref($self) eq __PACKAGE__);
    my $text = shift;

    # If we don't have at least 10 * min_len words, we probably don't have a
    # very good chance of making a sentence, so let's just return.
    if (scalar(keys %{$self->{'data'}{'hash'}}) <
            ($self->{'conf'}{'min_len'} * 10)) {
        return undef;
    }

    my $directed;
    my $start;

    # If they passed in an argument, take a look at it.
    if ($text) {
        $directed = 1;

        # If we're case-insensitive, lowercase what they sent us.
        unless ($self->{'conf'}{'case_sensitive'}) {
            $text = lc($text);
        }

        # Weed out unsavory characters.
        $text =~ s/[^-a-zA-Z0-9 ']/ /gs;

        # Clean any long strings of whitespace to single spaces.
        $text =~ s/\s+/ /g;

        # Remove leading and trailing whitespace.
        $text =~ s/^\s+//;
        $text =~ s/\s+$//;

        # If there's not a word left to talk about, return.
        if ($text !~ /([-a-zA-Z0-9']+)$/) {
            return undef;
        }

        # If we don't know anything about this word, return.
        if (!exists(${$self->{'data'}{'hash'}}{$1})) {
            return undef;
        }

        # Seems like a good starting place, so let's mark it.
        $start = ${$self->{'data'}{'hash'}}{$1};
    }
    # They didn't pass an argument, so we're on our own.
    else {
        $directed = 0;

        # The 0th element in the list is 'special' in that no hash entry points
        # to it, and it only contains pointers to words which are possible
        # sentence starting points.  Thus, let's grab a random entry out of the
        # 0th element in the list and start there.
        $start = ${${$self->{'data'}{'list'}}[0]{'num'}}[int rand scalar @{${$self->{'data'}{'list'}}[0]{'num'}}];
        $text = ${$self->{'data'}{'list'}}[$start]{'word'};
    }

    # Since we're dealing with randomness, we can't always be sure that we'll
    # be able to make a sentence of min_len, so we just keep retrying up to
    # max_attempts times, relying on sheer dumb luck to help us out.  On a
    # reasonably-sized body of text, this works perfectly fine.
    my $attempts = 0;
    my $count = 0;
    my $final = "";
    my $next = $start;

    while ($count < $self->{'conf'}{'min_len'} &&
            $attempts < $self->{'conf'}{'max_attempts'}) {

        # We start out with one word, and uppercase the first character in our
        # starting text.
        $count = 1;
        $final = "\u$text";
        $next = $start;

        # Keep adding new words to this sentence until we hit an sentence end
        # mark, or we hit max_len
        while ($next != -1 && ($count < $self->{'conf'}{'max_len'})) {

            # If the word we're on has no transitions, count this as a stopping
            # point, since we can't go any further.
            if (scalar(@{${$self->{'data'}{'list'}}[$next]{'num'}}) < 1) {
                $next = -1;
            }
            # Otherwise, randomly pick the word we'll visit next out of the
            # list of possible transitions from our current word.
            else {
                $next = ${${$self->{'data'}{'list'}}[$next]{'num'}}[int rand scalar @{${$self->{'data'}{'list'}}[$next]{'num'}}];
            }

            # If we're not at the end yet, add this word to our collected
            # string, increment our word count, and do it all again. 
            if ($next != -1) {
                $final .= " " . ${$self->{'data'}{'list'}}[$next]{'word'};
                $count++;
            }
        }

        # If we failed to make a long enough sentence, we need to do something.
        if ($count < $self->{'conf'}{'min_len'}) {

            # If we haven't yet passed our max number of attempts, try again.
            if ($attempts < $self->{'conf'}{'max_attempts'}) {
                $attempts++;
                next;
            }
            # If we passed our max number of attempts, we can take one of two
            # course of action.
            else {
                # If we're trying to talk about something in particular, we're
                # always going to be stuck with the same starting point.  Thus,
                # there's not the best chance for continued success, so just
                # give up and bail.
                if ($directed) {
                    return undef;
                }
                # If we're talking about random things, we likely just got
                # a bad starting point, so we'll pick a new random starting
                # point, and do the whole thing over again.
                else {
                    $attempts = 0;
                    $start = ${${$self->{'data'}{'list'}}[0]{'num'}}[int rand scalar @{${$self->{'data'}{'list'}}[0]{'num'}}];
                    $text = ${$self->{'data'}{'list'}}[$start]{'word'};
                    next;
                }
            }
        }
    }

    # If we're not case sensitive, make sure any I's by themselves are
    # capitalized, for aesthetic purposes.  If we are, they probably want
    # things to come out the way they are.
    # FIXME - Need to be able to configure this so that persons with
    # non-english texts can pick values that make sense.
    unless ($self->{'conf'}{'case_sensitive'}) {
        $final =~ s/(^|[^\w-])i($|[^\w-])/$1I$2/g
    }

    # Pick a random piece of punctuation to add to the end of the sentence.
    $final .= ${$self->{'conf'}{'punctuation'}}[int rand scalar @{$self->{'conf'}{'punctuation'}}];

    return $final;
}

# A method for getting some basic information about the current state.
# Arguments: None.
# Returns:   In a scalar context, this function returns a string describing the
#            current state.  In a list context, this function returns a list
#            containing two numbers -- the first one is the number of words
#            that this object knows about, and the second one is the average
#            number of transitions between words.
sub stats {
    my $self = shift;
    die "Invalid object" unless (ref($self) eq __PACKAGE__);

    # Get the number of words in our hash.
    my $word_count = scalar keys %{$self->{'data'}{'hash'}};

    # If we've got no words, just quit now.
    if ($word_count == 0) {
        return wantarray ? (0,0) : "I don't know anything!";
    }

    # Iterate over the list, adding up the number of transitions for each word.
    my $average = 0;
    foreach (@{$self->{'data'}{'list'}}) {
        $average += scalar @{$_->{'num'}} if defined($_->{'num'});
    }

    # Calculate an average, trim it to two decimal points, and return it.
    $average /= $word_count;
    $average = sprintf "%.2f", $average;
    return wantarray ? ($word_count, $average) : "Wabby knows $word_count "
        ."words, with an average of $average connections between each word.";
}

1;

__END__

=head1 NAME

Acme::Wabby - Create semi-random sentences based upon a body of text.



( run in 2.476 seconds using v1.01-cache-2.11-cpan-cdf2f3d4e48 )