App-GoogleSearchUtils
view release on metacpan or search on metacpan
lib/App/GoogleSearchUtils.pm view on Meta::CPAN
test => 0,
'x.doc.show_result' => 0,
},
{
summary => 'Get the IMDB URL for Lee Mack',
src => '[[prog]] "lee mack imdb" --print-result-link | grep-url --host-contains imdb.com | head -n1',
src_plang => 'bash',
test => 0,
'x.doc.show_result' => 0,
},
],
links => [
{url=>'prog:firefox-container'},
{url=>'pm:App::FirefoxMultiAccountContainersUtils'},
],
};
sub google_search {
require Browser::Open;
require URI::Escape;
my %args = @_;
# XXX schema
my $num = defined($args{num}) ? $args{num} + 0 : 100;
my $action = $args{action} // 'web';
my $type = $args{type} // 'web';
my @queries;
if (defined $args{queries_from}) {
require File::Slurper::Dash;
my $content = File::Slurper::Dash::read_text($args{queries_from});
@queries = map { chomp(my $line = $_); $line } split /^/m, $content;
} elsif ($args{queries} && @{ $args{queries} }) {
@queries = @{ $args{queries} };
} else {
return [400, "Please specify either queries or queries_from"];
}
my @rows;
my $envres = envresmulti();
my $i = -1;
for my $query0 (@queries) {
$i++;
if ($i > 0) {
if ($args{delay}) {
log_trace "Sleeping %s second(s) ...", $args{delay};
sleep $args{delay};
} elsif ($args{min_delay} && $args{max_delay}) {
my $delay = $args{min_delay} +
int(rand($args{max_delay} - $args{min_delay} + 1));
log_trace "Sleeping between %s and %s second(s): %s second(s) ...",
$args{min_delay}, $args{max_delay}, $delay;
sleep $delay;
}
}
my $query = join(
"",
defined($args{prepend}) ? $args{prepend} : "",
$query0,
defined($args{append}) ? $args{append} : "",
);
my $query_esc = URI::Escape::uri_escape($query);
my $time_param = '';
if (my $p = $args{time_past}) {
if ($p eq 'h' || $p eq 'hour') {
$time_param = 'tbs=qdr:h';
} elsif ($p eq '24hour' || $p eq 'day') {
$time_param = 'tbs=qdr:d';
} elsif ($p eq 'w' || $p eq 'week') {
$time_param = 'tbs=qdr:w';
} elsif ($p eq 'm' || $p eq 'month') {
$time_param = 'tbs=qdr:m';
} elsif ($p eq 'y' || $p eq 'year') {
$time_param = 'tbs=qdr:y';
} else {
return [400, "Invalid time_past value '$p'"];
}
} elsif ($args{time_start} && $args{time_end}) {
my ($t1, $t2) = ($args{time_start}, $args{time_end});
$time_param = "tbs=".URI::Escape::uri_escape(
"cdr:1,cd_min:".
($args{time_start}->strftime("%m/%d/%Y")).
",cd_max:".($args{time_end}->strftime("%m/%d/%Y"))
);
}
my $url;
if ($type eq 'web') {
$url = "https://www.google.com/search?num=$num&q=$query_esc" .
($time_param ? "&$time_param" : "");
} elsif ($type eq 'image') {
$url = "https://www.google.com/search?num=$num&q=$query_esc&tbm=isch" .
($time_param ? "&$time_param" : "");
} elsif ($type eq 'video') {
$url = "https://www.google.com/search?num=$num&q=$query_esc&tbm=isch" .
($time_param ? "&$time_param" : "");
} elsif ($type eq 'news') {
$url = "https://www.google.com/search?num=$num&q=$query_esc&tbm=nws" .
($time_param ? "&$time_param" : "");
} elsif ($type eq 'map') {
return [409, "Can't specify time period for map search"] if length $time_param;
$url = "https://www.google.com/maps/search/$query_esc/";
} else {
return [400, "Unknown type '$type'"];
}
if ($action eq 'open_url') {
my $res = Browser::Open::open_browser($url);
$envres->add_result(
($res ? (500, "Failed") : (200, "OK")), {item_id=>$i});
} elsif ($action eq 'print_url') {
push @rows, $url;
} elsif ($action eq 'print_html_link') {
push @rows, _fmt_html_link($url, $query);
} elsif ($action eq 'print_org_link') {
push @rows, _fmt_org_link($url, $query);
} elsif ($action =~ /\A(save_html|(print_result_(|html_|org_)link))\z/) {
state $ff1 = do {
require Firefox::Marionette;
log_trace "Instantiating Firefox::Marionette instance ...";
Firefox::Marionette->new;
};
log_trace "Retrieving URL $url ...";
my $ff2 = $ff1->go($url);
if ($action eq 'save_html') {
require File::Slurper;
(my $query_save = $query) =~ s/[^A-Za-z0-9_-]+/_/g;
my $filename0 = sprintf "%d-%s.%s.html", $i+1, $query_save, $type;
my $filename;
my $j = -1;
while (1) {
$j++;
$filename = $filename0 . ($j ? ".$j" : "");
last unless -f $filename;
}
log_trace "Saving query[%d] result to %s ...", $i, $filename;
File::Slurper::write_text($filename, $ff2->html);
} else {
# extract links first
my @links = $ff2->links;
( run in 0.540 second using v1.01-cache-2.11-cpan-39bf76dae61 )