BusyBird

 view release on metacpan or  search on metacpan

lib/BusyBird/Util.pm  view on Meta::CPAN

package BusyBird::Util;
use v5.8.0;
use strict;
use warnings;
use Scalar::Util qw(blessed weaken);
use Carp;
use Exporter 5.57 qw(import);
use BusyBird::DateTime::Format;
use BusyBird::Log qw(bblog);
use BusyBird::SafeData qw(safed);
use DateTime;
use Future::Q 0.040;
use File::HomeDir;
use File::Spec;

our @EXPORT_OK =
    qw(set_param expand_param config_directory config_file_path sort_statuses
       split_with_entities future_of make_tracking vivifiable_as);
our @CARP_NOT = qw(Future::Q);

sub set_param {
    my ($hashref, $params_ref, $key, $default, $is_mandatory) = @_;
    if($is_mandatory && !defined($params_ref->{$key})) {
        my $classname = blessed $hashref;
        croak "ERROR: set_param in $classname: Parameter for '$key' is mandatory, but not supplied.";
    }
    $hashref->{$key} = (defined($params_ref->{$key}) ? $params_ref->{$key} : $default);
}

sub export_ok_all_tags {
    no strict "refs";
    my ($caller_package) = caller;
    my $export_ok = \@{"${caller_package}::EXPORT_OK"};
    my $export_tags = \%{"${caller_package}::EXPORT_TAGS"};
    my @all = @$export_ok;
    foreach my $tag (keys %$export_tags) {
        my $exported = $export_tags->{$tag};
        push(@all, @$exported);
        push(@$export_ok, @$exported);
    }
    $export_tags->{all} = \@all;
}

sub expand_param {
    my ($param, @names) = @_;
    my $refparam = ref($param);
    my @result = ();
    if($refparam eq 'ARRAY') {
        @result = @$param;
    }elsif($refparam eq 'HASH') {
        @result = @{$param}{@names};
    }else {
        $result[0] = $param;
    }
    return wantarray ? @result : $result[0];
}

sub config_directory {
    return File::Spec->catfile(File::HomeDir->my_home, ".busybird");
}

sub config_file_path {
    my (@paths) = @_;
    return File::Spec->catfile(config_directory, @paths);
}

sub vivifiable_as {
    return !defined($_[0]) || ref($_[0]) eq $_[1];
}

sub _epoch_undef {
    my ($datetime_str) = @_;
    my $dt = BusyBird::DateTime::Format->parse_datetime($datetime_str);
    return defined($dt) ? $dt->epoch : undef;
}

sub _sort_compare {
    my ($a, $b) = @_;
    if(defined($a) && defined($b)) {
        return $b <=> $a;
    }elsif(!defined($a) && defined($b)) {
        return -1;
    }elsif(defined($a) && !defined($b)) {
        return 1;
    }else {
        return 0;
    }
}

sub sort_statuses {
    my ($statuses) = @_;
    use sort 'stable';
    
    my @dt_statuses = map {
        my $safe_status = safed($_);
        [
            $_,
            _epoch_undef($safe_status->val("busybird", "acked_at")),
            _epoch_undef($safe_status->val("created_at")),
        ];
    } @$statuses;
    return [ map { $_->[0] } sort {
        foreach my $sort_key (1, 2) {
            my $ret = _sort_compare($a->[$sort_key], $b->[$sort_key]);
            return $ret if $ret != 0;
        }
        return 0;
    } @dt_statuses];
}

sub _create_text_segment {
    return {
        text => substr($_[0], $_[1], $_[2] - $_[1]),
        start => $_[1],
        end => $_[2],
        type => $_[3],
        entity => $_[4],
    };
}

sub split_with_entities {
    my ($text, $entities_hashref) = @_;
    use sort 'stable';
    if(!defined($text)) {
        croak "text must not be undef";
    }
    if(ref($entities_hashref) ne "HASH") {
        return [_create_text_segment($text, 0, length($text))];
    }

    ## create entity segments
    my @entity_segments = ();
    foreach my $entity_type (keys %$entities_hashref) {
        my $entities = $entities_hashref->{$entity_type};
        next if ref($entities) ne "ARRAY";
        foreach my $entity (@$entities) {
            my $se = safed($entity);
            my $start = $se->val("indices", 0);
            my $end = $se->val("indices", 1);
            if(defined($start) && defined($end) && $start <= $end) {
                push(@entity_segments, _create_text_segment(
                    $text, $start, $end, $entity_type, $entity
                ));
            }
        }
    }
    @entity_segments = sort { $a->{start} <=> $b->{start} } @entity_segments;

    ## combine entity_segments with non-entity segments
    my $pos = 0;
    my @final_segments = ();
    foreach my $entity_segment (@entity_segments) {
        if($pos < $entity_segment->{start}) {
            push(@final_segments, _create_text_segment(
                $text, $pos, $entity_segment->{start}
            ));
        }
        push(@final_segments, $entity_segment);

lib/BusyBird/Util.pm  view on Meta::CPAN

            return;
        }
        $track->contains(query => $statuses, callback => sub {
            my ($error, $contained, $not_contained) = @_;
            if(defined($error)) {
                bblog("error", "tracking timeline '$name_tracking' contains() error: $error");
                $done->($statuses);
                return;
            }
            $main_timeline->add($not_contained, sub {
                my ($error, $count) = @_;
                if(defined($error)) {
                    bblog("error", "main timeline '$name_main' add() error: $error");
                }
                $done->($statuses);
            });
        });
    });
    return $tracking_timeline;
}

1;

__END__

=pod

=head1 NAME

BusyBird::Util - utility functions for BusyBird

=head1 SYNOPSIS

    use BusyBird::Util qw(sort_statuses split_with_entities future_of);
    
    future_of($timeline, "get_statuses", count => 100)->then(sub {
        my ($statuses) = @_;
        my $sorted_statuses = sort_statuses($statuses);
        my $status = $sorted_statuses->[0];
        my $segments_arrayref = split_with_entities($status->{text}, $status->{entities});
        return $segments_arrayref;
    })->catch(sub {
        my ($error, $is_normal_error) = @_;
        warn $error;
    });

=head1 DESCRIPTION

This module provides some utility functions useful in L<BusyBird>.

=head1 EXPORTABLE FUNCTIONS

The following functions are exported only by request.

=head2 $sorted = sort_statuses($statuses)

Sorts an array of status objects appropriately. Argument C<$statuses> is an array-ref of statuses.

Return value C<$sorted> is an array-ref of sorted statuses.

The sort refers to C<< $status->{created_at} >> and C<< $status->{busybird}{acked_at} >> fields.
See L<BusyBird::StatusStorage/Order_of_Statuses> section.

=head2 $segments_arrayref = split_with_entities($text, $entities_hashref)

Splits the given C<$text> with the "entities" and returns the split segments.

C<$text> is a string to be split. C<$entities_hashref> is a hash-ref which has the same stucture as
L<Twitter Entities|https://dev.twitter.com/docs/platform-objects/entities>.
Each entity object annotates a part of C<$text> with such information as linked URLs, mentioned users,
mentioned hashtags, etc.
If C<$entities_hashref> doesn't conform to the said structure, it is ignored.

The return value C<$segments_arrayref> is an array-ref of "segment" objects.
A "segment" is a hash-ref containing a part of C<$text> and the entity object (if any) attached to it.
Note that C<$segments_arrayref> has segments that no entity is attached to.
C<$segments_arrayref> is sorted, so you can assemble the complete C<$text> by concatenating all the segments.

Example:

    my $text = 'aaa --- bb ---- ccaa -- ccccc';
    my $entities = {
        a => [
            {indices => [0, 3],   url => 'http://hoge.com/a/1'},
            {indices => [18, 20], url => 'http://hoge.com/a/2'},
        ],
        b => [
            {indices => [8, 10], style => "bold"},
        ],
        c => [
            {indices => [16, 18], footnote => 'first c'},
            {indices => [24, 29], some => {complex => 'structure'}},
        ],
        d => []
    };
    my $segments = split_with_entities($text, $entities);
    
    ## $segments = [
    ##     { text => 'aaa', start => 0, end => 3, type => 'a',
    ##       entity => {indices => [0, 3], url => 'http://hoge.com/a/1'} },
    ##     { text => ' --- ', start => 3, end => 8, type => undef,
    ##       entity => undef},
    ##     { text => 'bb', start => 8, end => 10, type => 'b',
    ##       entity => {indices => [8, 10], style => "bold"} },
    ##     { text => ' ---- ', start => 10, end =>  16, type => undef,
    ##       entity => undef },
    ##     { text => 'cc', start => 16, end => 18, type => 'c',
    ##       entity => {indices => [16, 18], footnote => 'first c'} },
    ##     { text => 'aa', start => 18, end => 20, type => 'a',
    ##       entity => {indices => [18, 20], url => 'http://hoge.com/a/2'} },
    ##     { text => ' -- ', start => 20, end => 24, type => undef,
    ##       entity => undef },
    ##     { text => 'ccccc', start => 24, end => 29, type => 'c',
    ##       entity => {indices => [24, 29], some => {complex => 'structure'}} }
    ## ];

Any entity object is required to have C<indices> field, which is an array-ref
of starting and ending indices of the text part.
The ending index must be greater than or equal to the starting index.
If an entitiy object does not meet this condition, that entity object is ignored.



( run in 0.595 second using v1.01-cache-2.11-cpan-39bf76dae61 )