App-CekBpom
view release on metacpan or search on metacpan
lib/App/CekBpom.pm view on Meta::CPAN
tags => ['category:logging'],
},
},
examples => [
{
summary => 'By default search against name (nama_produk) and brand (merk)',
argv => ["hichew", "hi-chew", "hi chew"],
test => 0,
'x.doc.show_result' => 0,
},
{
summary => 'Adding --trace will show query details, --format html+datatables is convenient to see/search/sort results in browser',
src => "[[prog]] hichew hi-chew 'hi chew' --trace --format html+datatables",
src_plang => "bash",
test => 0,
'x.doc.show_result' => 0,
},
],
};
sub cek_bpom_products {
require HTTP::CookieJar::LWP;
require LWP::UserAgent::Plugin;
my $time_start = time();
my %args = @_;
defined(my $queries = $args{queries}) or return [400, "Please specify queries"];
my $search_types = $args{search_types} // ['nama_produk', 'merk'];
my $jar = HTTP::CookieJar::LWP->new;
my $ua = LWP::UserAgent::Plugin->new(
cookie_jar => $jar,
);
# first get the front page so we get the session ID
log_trace "Requesting cekbpom front page ...";
my $res = $ua->get($url_prefix);
unless ($res->is_success) {
return [$res->code, "Can't get front page ($url_prefix): ".$res->message];
}
my $ct = $res->content;
unless ($ct =~ m!/home/produk/(\w{26})"!) {
return [543, "Can't extract session ID from front page"];
}
my $session_id = $1;
my %reg_ids;
my @all_rows;
my $time_before_query = time();
QUERY:
for my $query (@$queries) {
SEARCH_TYPE:
for my $search_type (@$search_types) {
my $search_type_num = $known_search_types{$search_type}[0];
unless (defined $search_type_num) {
return [400, "Unknown search_type '$search_type'"];
}
require URI::Escape;
my $query_enc = URI::Escape::uri_escape($query);
my @rows;
my $page_num = 0;
my $num_results = 100;
my ($result_start, $result_end);
while (1) {
log_trace "Querying cekbpom ($search_type=$query, $num_results result(s)) ...";
$res = $ua->get("$url_prefix/home/produk/$session_id/all/row/$num_results/page/$page_num/order/4/DESC/search/$search_type_num/$query_enc");
unless ($res->is_success) {
return [$res->code, "Can't get result page: ".$res->message];
}
my $ct = $res->content;
unless ($ct =~ m!(\d+) - (\d+) Dari (\d+)!) {
return [543, "Can't find signature in result page"];
}
($result_start, $result_end, $num_results) = ($1, $2, $3);
if ($result_end < $num_results && $result_end < 5000) {
redo;
}
if ($ENV{CEK_BPOM_TRACE}) {
log_trace $ct;
}
while ($ct =~ m!
<tr\stitle.+?\surldetil="/(?P<reg_id>[^"]+)">
<td[^>]*>\s* (?P<nomor_registrasi>[^<]+?)\s* (?:<div>Terbit:(?P<tanggal_terbit>[^<]+?))?\s* </div></td>
<td[^>]*>\s* (?P<nama>[^<]+?)\s*<div>Merk:\s* (?P<merk>[^<]+)<br>Kemasan:(?P<kemasan>[^<]+?)\s* </div></td>
<td[^>]*>\s* (?P<pendaftar>[^<]+?)\s*<div>\s* (?P<kota_pendaftar>[^<]+?)\s* </div></td>
!sgx) {
my $row = {%+};
for (qw/kemasan/) { $row->{$_} =~ s/\R+//g }
push @rows, $row;
}
last;
}
if (@rows < $num_results) {
# XXX should've been a fatal error
log_warn "Some results cannot be parsed (only got %d out of %d)", scalar(@rows), $num_results;
} else {
log_trace "Got $num_results result(s)";
}
# add to final result
for (@rows) {
push @all_rows, $_ unless $reg_ids{ $_->{reg_id} }++;
}
} # for SEARCH_TYPE
} # for QUERY
my $time_after_query = time();
if (@$search_types > 1 || @$queries > 1) {
log_trace "Got a total of %d result(s)", scalar(@all_rows);
}
GET_PRODUCT_DETAIL: {
last unless $args{get_product_detail};
my $i = 0;
( run in 2.761 seconds using v1.01-cache-2.11-cpan-8f98c5d2c55 )