Acme-Lingua-ZH-Remix
view release on metacpan or search on metacpan
The default values of min, max are 0 and 140, respectively.
The implementation random algorthm based, thus it needs indefinite time
to generate the result. If it takes more then 1000 iterations, it aborts
and return the results anyway, regardless the length constraint. This
can happen when the lengths of phrases from corpus do no adds up to a
value within the given range.
The returned scalar is the generate sentence string of wide characters.
(Which makes Encode::is_utf8 return true.)
AUTHOR
Kang-min Liu <gugod@gugod.org>
COPYRIGHT
Copyright 2010- by Kang-min Liu, <gugod@gugod.org>
This program is free software; you can redistribute it a nd/or modify it
under the same terms as Perl itself.
lib/Acme/Lingua/ZH/Remix.pm view on Meta::CPAN
package Acme::Lingua::ZH::Remix;
use v5.10;
our $VERSION = "0.99";
=pod
=encoding utf8
=head1 NAME
Acme::Lingua::ZH::Remix - The Chinese sentence generator.
=head1 SYNOPSIS
use Acme::Lingua::ZH::Remix;
my $x = Acme::Lingua::ZH::Remix->new;
lib/Acme/Lingua/ZH/Remix.pm view on Meta::CPAN
my $x = Acme::Lingua::ZH::Remix->new;
$x->feed($my_corpus);
# Say something based on $my_corpus
say $x->random_santence;
The corpus should use full-width punctuation characters.
=cut
use utf8;
use Moo;
use Types::Standard qw(HashRef Int);
use List::MoreUtils qw(uniq);
use Hash::Merge qw(merge);
has phrases => (is => "rw", isa => HashRef, lazy => 1, builder => "_build_phrases");
sub _build_phrases {
my $self = shift;
local $/ = undef;
lib/Acme/Lingua/ZH/Remix.pm view on Meta::CPAN
values are invalidate, it is treated as if they are not passed.
The default values of min, max are 0 and 140, respectively.
The implementation random algorthm based, thus it needs indefinite time to
generate the result. If it takes more then 1000 iterations, it aborts and return
the results anyway, regardless the length constraint. This can happen when the
lengths of phrases from corpus do no adds up to a value within the given range.
The returned scalar is the generate sentence string of wide characters. (Which
makes Encode::is_utf8 return true.)
=cut
sub random_sentence {
my ($self, %options) = @_;
for my $p (qw(min max)) {
my $x = $options{$p};
unless (defined($x) && int($x) eq $x && $x >= 0) {
delete $options{$p}
#!/usr/bin/env perl
use strict;
use utf8;
use Test::More 0.98;
use Acme::Lingua::ZH::Remix;
subtest 'split_corpus method' => sub {
my $r = Acme::Lingua::ZH::Remix->new;
my @phrases = sort $r->split_corpus(q(éä¸è³´ï¼ç¸½ä¹ï¼å¾å¥½ãå¦ä½ï¼));
my @answer = sort qw(éä¸è³´ï¼ 總ä¹ï¼ å¾å¥½ã å¦ä½ï¼);
is_deeply(\@phrases, \@answer);
done_testing;
};
subtest 'a simple one' => sub {
my $r = Acme::Lingua::ZH::Remix->new;
ok($r->phrase_count > 4, "phrase_count seems to be correct");
for(1..100) {
my $s = $r->random_sentence;
utf8::encode($s);
ok $s, "something is generated: $s";
}
done_testing;
};
subtest 'custom phrase materials' => sub {
my $r = Acme::Lingua::ZH::Remix->new;
$r->feed("éä¸è³´ï¼ 總ä¹ï¼ å¾å¥½ã å¦ä½ï¼");
is ($r->phrase_count, 4, "phrase_count is correct");
for(1..100) {
my $s = $r->random_sentence;
utf8::encode($s);
ok $s, "something is generated: $s";
}
done_testing;
};
done_testing;
t/03-constrain.t view on Meta::CPAN
#!/usr/bin/env perl
use strict;
use utf8;
use Test::More 0.98;
use Acme::Lingua::ZH::Remix;
my $r = Acme::Lingua::ZH::Remix->new;
my ($min, $max) = (5, 8);
for (1..100) {
my $s = $r->random_sentence(min => $min, max => $max);
my $l = length($s);
utf8::encode($s);
ok($l >= $min && $l <= $max, "length: $min <= $l <= $max");
}
done_testing;
( run in 1.219 second using v1.01-cache-2.11-cpan-49f99fa48dc )