BusyBird
view release on metacpan or search on metacpan
lib/BusyBird/Util.pm view on Meta::CPAN
package BusyBird::Util;
use v5.8.0;
use strict;
use warnings;
use Scalar::Util qw(blessed weaken);
use Carp;
use Exporter 5.57 qw(import);
use BusyBird::DateTime::Format;
use BusyBird::Log qw(bblog);
use BusyBird::SafeData qw(safed);
use DateTime;
use Future::Q 0.040;
use File::HomeDir;
use File::Spec;
our @EXPORT_OK =
qw(set_param expand_param config_directory config_file_path sort_statuses
split_with_entities future_of make_tracking vivifiable_as);
our @CARP_NOT = qw(Future::Q);
sub set_param {
my ($hashref, $params_ref, $key, $default, $is_mandatory) = @_;
if($is_mandatory && !defined($params_ref->{$key})) {
my $classname = blessed $hashref;
croak "ERROR: set_param in $classname: Parameter for '$key' is mandatory, but not supplied.";
}
$hashref->{$key} = (defined($params_ref->{$key}) ? $params_ref->{$key} : $default);
}
sub export_ok_all_tags {
no strict "refs";
my ($caller_package) = caller;
my $export_ok = \@{"${caller_package}::EXPORT_OK"};
my $export_tags = \%{"${caller_package}::EXPORT_TAGS"};
my @all = @$export_ok;
foreach my $tag (keys %$export_tags) {
my $exported = $export_tags->{$tag};
push(@all, @$exported);
push(@$export_ok, @$exported);
}
$export_tags->{all} = \@all;
}
sub expand_param {
my ($param, @names) = @_;
my $refparam = ref($param);
my @result = ();
if($refparam eq 'ARRAY') {
@result = @$param;
}elsif($refparam eq 'HASH') {
@result = @{$param}{@names};
}else {
$result[0] = $param;
}
return wantarray ? @result : $result[0];
}
sub config_directory {
return File::Spec->catfile(File::HomeDir->my_home, ".busybird");
}
sub config_file_path {
my (@paths) = @_;
return File::Spec->catfile(config_directory, @paths);
}
sub vivifiable_as {
return !defined($_[0]) || ref($_[0]) eq $_[1];
}
sub _epoch_undef {
my ($datetime_str) = @_;
my $dt = BusyBird::DateTime::Format->parse_datetime($datetime_str);
return defined($dt) ? $dt->epoch : undef;
}
sub _sort_compare {
my ($a, $b) = @_;
if(defined($a) && defined($b)) {
return $b <=> $a;
}elsif(!defined($a) && defined($b)) {
return -1;
}elsif(defined($a) && !defined($b)) {
return 1;
}else {
return 0;
}
}
sub sort_statuses {
my ($statuses) = @_;
use sort 'stable';
my @dt_statuses = map {
my $safe_status = safed($_);
[
$_,
_epoch_undef($safe_status->val("busybird", "acked_at")),
_epoch_undef($safe_status->val("created_at")),
];
} @$statuses;
return [ map { $_->[0] } sort {
foreach my $sort_key (1, 2) {
my $ret = _sort_compare($a->[$sort_key], $b->[$sort_key]);
return $ret if $ret != 0;
}
return 0;
} @dt_statuses];
}
sub _create_text_segment {
return {
text => substr($_[0], $_[1], $_[2] - $_[1]),
start => $_[1],
end => $_[2],
type => $_[3],
entity => $_[4],
};
}
sub split_with_entities {
my ($text, $entities_hashref) = @_;
use sort 'stable';
if(!defined($text)) {
croak "text must not be undef";
}
if(ref($entities_hashref) ne "HASH") {
return [_create_text_segment($text, 0, length($text))];
}
## create entity segments
my @entity_segments = ();
foreach my $entity_type (keys %$entities_hashref) {
my $entities = $entities_hashref->{$entity_type};
next if ref($entities) ne "ARRAY";
foreach my $entity (@$entities) {
my $se = safed($entity);
my $start = $se->val("indices", 0);
my $end = $se->val("indices", 1);
if(defined($start) && defined($end) && $start <= $end) {
push(@entity_segments, _create_text_segment(
$text, $start, $end, $entity_type, $entity
));
}
}
}
@entity_segments = sort { $a->{start} <=> $b->{start} } @entity_segments;
## combine entity_segments with non-entity segments
my $pos = 0;
my @final_segments = ();
foreach my $entity_segment (@entity_segments) {
if($pos < $entity_segment->{start}) {
push(@final_segments, _create_text_segment(
$text, $pos, $entity_segment->{start}
));
}
push(@final_segments, $entity_segment);
lib/BusyBird/Util.pm view on Meta::CPAN
return;
}
$track->contains(query => $statuses, callback => sub {
my ($error, $contained, $not_contained) = @_;
if(defined($error)) {
bblog("error", "tracking timeline '$name_tracking' contains() error: $error");
$done->($statuses);
return;
}
$main_timeline->add($not_contained, sub {
my ($error, $count) = @_;
if(defined($error)) {
bblog("error", "main timeline '$name_main' add() error: $error");
}
$done->($statuses);
});
});
});
return $tracking_timeline;
}
1;
__END__
=pod
=head1 NAME
BusyBird::Util - utility functions for BusyBird
=head1 SYNOPSIS
use BusyBird::Util qw(sort_statuses split_with_entities future_of);
future_of($timeline, "get_statuses", count => 100)->then(sub {
my ($statuses) = @_;
my $sorted_statuses = sort_statuses($statuses);
my $status = $sorted_statuses->[0];
my $segments_arrayref = split_with_entities($status->{text}, $status->{entities});
return $segments_arrayref;
})->catch(sub {
my ($error, $is_normal_error) = @_;
warn $error;
});
=head1 DESCRIPTION
This module provides some utility functions useful in L<BusyBird>.
=head1 EXPORTABLE FUNCTIONS
The following functions are exported only by request.
=head2 $sorted = sort_statuses($statuses)
Sorts an array of status objects appropriately. Argument C<$statuses> is an array-ref of statuses.
Return value C<$sorted> is an array-ref of sorted statuses.
The sort refers to C<< $status->{created_at} >> and C<< $status->{busybird}{acked_at} >> fields.
See L<BusyBird::StatusStorage/Order_of_Statuses> section.
=head2 $segments_arrayref = split_with_entities($text, $entities_hashref)
Splits the given C<$text> with the "entities" and returns the split segments.
C<$text> is a string to be split. C<$entities_hashref> is a hash-ref which has the same stucture as
L<Twitter Entities|https://dev.twitter.com/docs/platform-objects/entities>.
Each entity object annotates a part of C<$text> with such information as linked URLs, mentioned users,
mentioned hashtags, etc.
If C<$entities_hashref> doesn't conform to the said structure, it is ignored.
The return value C<$segments_arrayref> is an array-ref of "segment" objects.
A "segment" is a hash-ref containing a part of C<$text> and the entity object (if any) attached to it.
Note that C<$segments_arrayref> has segments that no entity is attached to.
C<$segments_arrayref> is sorted, so you can assemble the complete C<$text> by concatenating all the segments.
Example:
my $text = 'aaa --- bb ---- ccaa -- ccccc';
my $entities = {
a => [
{indices => [0, 3], url => 'http://hoge.com/a/1'},
{indices => [18, 20], url => 'http://hoge.com/a/2'},
],
b => [
{indices => [8, 10], style => "bold"},
],
c => [
{indices => [16, 18], footnote => 'first c'},
{indices => [24, 29], some => {complex => 'structure'}},
],
d => []
};
my $segments = split_with_entities($text, $entities);
## $segments = [
## { text => 'aaa', start => 0, end => 3, type => 'a',
## entity => {indices => [0, 3], url => 'http://hoge.com/a/1'} },
## { text => ' --- ', start => 3, end => 8, type => undef,
## entity => undef},
## { text => 'bb', start => 8, end => 10, type => 'b',
## entity => {indices => [8, 10], style => "bold"} },
## { text => ' ---- ', start => 10, end => 16, type => undef,
## entity => undef },
## { text => 'cc', start => 16, end => 18, type => 'c',
## entity => {indices => [16, 18], footnote => 'first c'} },
## { text => 'aa', start => 18, end => 20, type => 'a',
## entity => {indices => [18, 20], url => 'http://hoge.com/a/2'} },
## { text => ' -- ', start => 20, end => 24, type => undef,
## entity => undef },
## { text => 'ccccc', start => 24, end => 29, type => 'c',
## entity => {indices => [24, 29], some => {complex => 'structure'}} }
## ];
Any entity object is required to have C<indices> field, which is an array-ref
of starting and ending indices of the text part.
The ending index must be greater than or equal to the starting index.
If an entitiy object does not meet this condition, that entity object is ignored.
( run in 0.595 second using v1.01-cache-2.11-cpan-39bf76dae61 )