App-GoogleSearchUtils

 view release on metacpan or  search on metacpan

lib/App/GoogleSearchUtils.pm  view on Meta::CPAN

            test => 0,
            'x.doc.show_result' => 0,
        },
        {
            summary => 'Get the IMDB URL for Lee Mack',
            src => '[[prog]] "lee mack imdb" --print-result-link | grep-url --host-contains imdb.com | head -n1',
            src_plang => 'bash',
            test => 0,
            'x.doc.show_result' => 0,
        },
    ],
    links => [
        {url=>'prog:firefox-container'},
        {url=>'pm:App::FirefoxMultiAccountContainersUtils'},
    ],
};
sub google_search {
    require Browser::Open;
    require URI::Escape;

    my %args = @_;
    # XXX schema
    my $num = defined($args{num}) ? $args{num} + 0 : 100;
    my $action = $args{action} // 'web';
    my $type = $args{type} // 'web';

    my @queries;
    if (defined $args{queries_from}) {
        require File::Slurper::Dash;
        my $content = File::Slurper::Dash::read_text($args{queries_from});
        @queries = map { chomp(my $line = $_); $line } split /^/m, $content;
    } elsif ($args{queries} && @{ $args{queries} }) {
        @queries = @{ $args{queries} };
    } else {
        return [400, "Please specify either queries or queries_from"];
    }

    my @rows;
    my $envres = envresmulti();
    my $i = -1;
    for my $query0 (@queries) {
        $i++;
        if ($i > 0) {
            if ($args{delay}) {
                log_trace "Sleeping %s second(s) ...", $args{delay};
                sleep $args{delay};
            } elsif ($args{min_delay} && $args{max_delay}) {
                my $delay = $args{min_delay} +
                    int(rand($args{max_delay} - $args{min_delay} + 1));
                log_trace "Sleeping between %s and %s second(s): %s second(s) ...",
                    $args{min_delay}, $args{max_delay}, $delay;
                sleep $delay;
            }
        }
        my $query = join(
            "",
            defined($args{prepend}) ? $args{prepend} : "",
            $query0,
            defined($args{append}) ? $args{append} : "",
        );
        my $query_esc = URI::Escape::uri_escape($query);

        my $time_param = '';
        if (my $p = $args{time_past}) {
            if ($p eq 'h' || $p eq 'hour') {
                $time_param = 'tbs=qdr:h';
            } elsif ($p eq '24hour' || $p eq 'day') {
                $time_param = 'tbs=qdr:d';
            } elsif ($p eq 'w' || $p eq 'week') {
                $time_param = 'tbs=qdr:w';
            } elsif ($p eq 'm' || $p eq 'month') {
                $time_param = 'tbs=qdr:m';
            } elsif ($p eq 'y' || $p eq 'year') {
                $time_param = 'tbs=qdr:y';
            } else {
                return [400, "Invalid time_past value '$p'"];
            }
        } elsif ($args{time_start} && $args{time_end}) {
            my ($t1, $t2) = ($args{time_start}, $args{time_end});
            $time_param = "tbs=".URI::Escape::uri_escape(
                "cdr:1,cd_min:".
                ($args{time_start}->strftime("%m/%d/%Y")).
                ",cd_max:".($args{time_end}->strftime("%m/%d/%Y"))
            );
        }

        my $url;
        if ($type eq 'web') {
            $url = "https://www.google.com/search?num=$num&q=$query_esc" .
                ($time_param ? "&$time_param" : "");
        } elsif ($type eq 'image') {
            $url = "https://www.google.com/search?num=$num&q=$query_esc&tbm=isch" .
                ($time_param ? "&$time_param" : "");
        } elsif ($type eq 'video') {
            $url = "https://www.google.com/search?num=$num&q=$query_esc&tbm=isch" .
                ($time_param ? "&$time_param" : "");
        } elsif ($type eq 'news') {
            $url = "https://www.google.com/search?num=$num&q=$query_esc&tbm=nws" .
                ($time_param ? "&$time_param" : "");
        } elsif ($type eq 'map') {
            return [409, "Can't specify time period for map search"] if length $time_param;
            $url = "https://www.google.com/maps/search/$query_esc/";
        } else {
            return [400, "Unknown type '$type'"];
        }

        if ($action eq 'open_url') {
            my $res = Browser::Open::open_browser($url);
            $envres->add_result(
                ($res ? (500, "Failed") : (200, "OK")), {item_id=>$i});
        } elsif ($action eq 'print_url') {
            push @rows, $url;
        } elsif ($action eq 'print_html_link') {
            push @rows, _fmt_html_link($url, $query);
        } elsif ($action eq 'print_org_link') {
            push @rows, _fmt_org_link($url, $query);
        } elsif ($action =~ /\A(save_html|(print_result_(|html_|org_)link))\z/) {
            state $ff1 = do {
                require Firefox::Marionette;
                log_trace "Instantiating Firefox::Marionette instance ...";
                Firefox::Marionette->new;
            };
            log_trace "Retrieving URL $url ...";
            my $ff2 = $ff1->go($url);
            if ($action eq 'save_html') {
                require File::Slurper;
                (my $query_save = $query) =~ s/[^A-Za-z0-9_-]+/_/g;
                my $filename0 = sprintf "%d-%s.%s.html", $i+1, $query_save, $type;
                my $filename;
                my $j = -1;
                while (1) {
                    $j++;
                    $filename = $filename0 . ($j ? ".$j" : "");
                    last unless -f $filename;
                }
                log_trace "Saving query[%d] result to %s ...", $i, $filename;
                File::Slurper::write_text($filename, $ff2->html);
            } else {
                # extract links first
                my @links = $ff2->links;



( run in 0.540 second using v1.01-cache-2.11-cpan-39bf76dae61 )