BusyBird
view release on metacpan or search on metacpan
t/Util_split.t view on Meta::CPAN
use strict;
use warnings;
use Test::More;
use Test::Fatal;
use utf8;
BEGIN {
use_ok('BusyBird::Util', 'split_with_entities');
}
{
note("--- for example");
my $text = 'aaa --- bb ---- ccaa -- ccccc';
my $entities = {
a => [
{indices => [0, 3], url => 'http://hoge.com/a/1'},
{indices => [18, 20], url => 'http://hoge.com/a/2'},
],
b => [
{indices => [8, 10], style => "bold"},
],
c => [
{indices => [16, 18], footnote => 'first c'},
{indices => [24, 29], some => {complex => 'structure'}},
],
d => []
};
my $exp_segments = [
{ text => 'aaa', start => 0, end => 3, type => 'a',
entity => {indices => [0, 3], url => 'http://hoge.com/a/1'} },
{ text => ' --- ', start => 3, end => 8, type => undef,
entity => undef},
{ text => 'bb', start => 8, end => 10, type => 'b',
entity => {indices => [8, 10], style => "bold"} },
{ text => ' ---- ', start => 10, end => 16, type => undef,
entity => undef },
{ text => 'cc', start => 16, end => 18, type => 'c',
entity => {indices => [16, 18], footnote => 'first c'} },
{ text => 'aa', start => 18, end => 20, type => 'a',
entity => {indices => [18, 20], url => 'http://hoge.com/a/2'} },
{ text => ' -- ', start => 20, end => 24, type => undef,
entity => undef },
{ text => 'ccccc', start => 24, end => 29, type => 'c',
entity => {indices => [24, 29], some => {complex => 'structure'}} }
];
my $got_segments = split_with_entities($text, $entities);
is_deeply($got_segments, $exp_segments, "example split OK") or diag(explain $got_segments);
}
{
note("--- other cases");
foreach my $case (
{label => "utf8 text", text => 'ããã¯ï¼µï¼´ï¼¦ã¼ï¼ããã¹ã',
entities => { alphanum => [{indices => [3, 8], alpha => 1, num => 1}] },
exp_segments => [
{text => 'ããã¯', start => 0, end => 3, type => undef, entity => undef},
{text => 'UTFã¼ï¼', start => 3, end => 8, type => 'alphanum', entity => {
indices => [3, 8], alpha => 1, num => 1
}},
{text => 'ããã¹ã', start => 8, end => 12, type => undef, entity => undef},
]},
{label => '0-length entity', text => 'aaaBBBccc',
entities => {
boundary => [
{indices => [0,0], desc => 'before a'},
{indices => [3,3], desc => 'before B'},
{indices => [3,3], desc => 'before B 2'},
{indices => [6,6], desc => 'before c'},
{indices => [9,9], desc => 'tail'},
]
},
exp_segments => [
{text => '', start => 0, end => 0, type => 'boundary', entity => {
indices => [0,0], desc => 'before a'
}},
{text => 'aaa', start => 0, end => 3, type => undef, entity => undef},
{text => '', start => 3, end => 3, type => 'boundary', entity => {
indices => [3,3], desc => 'before B'
}},
{text => '', start => 3, end => 3, type => 'boundary', entity => {
indices => [3,3], desc => 'before B 2'
}},
{text => 'BBB', start => 3, end => 6, type => undef, entity => undef},
{text => '', start => 6, end => 6, type => 'boundary', entity => {
indices => [6,6], desc => 'before c'
}},
{text => 'ccc', start => 6, end => 9, type => undef, entity => undef},
{text => '', start => 9, end => 9, type => 'boundary', entity => {
indices => [9,9], desc => 'tail'
}},
]},
{label => "no entities", text => "hoge hoge hoge",
entities => {},
( run in 1.755 second using v1.01-cache-2.11-cpan-39bf76dae61 )