App-CekBpom

 view release on metacpan or  search on metacpan

lib/App/CekBpom.pm  view on Meta::CPAN

            tags => ['category:logging'],
        },
    },
    examples => [
        {
            summary => 'By default search against name (nama_produk) and brand (merk)',
            argv => ["hichew", "hi-chew", "hi chew"],
            test => 0,
            'x.doc.show_result' => 0,
        },
        {
            summary => 'Adding --trace will show query details, --format html+datatables is convenient to see/search/sort results in browser',
            src => "[[prog]] hichew hi-chew 'hi chew' --trace --format html+datatables",
            src_plang => "bash",
            test => 0,
            'x.doc.show_result' => 0,
        },
    ],
};
sub cek_bpom_products {
    require HTTP::CookieJar::LWP;
    require LWP::UserAgent::Plugin;

    my $time_start = time();

    my %args = @_;
    defined(my $queries = $args{queries}) or return [400, "Please specify queries"];
    my $search_types = $args{search_types} // ['nama_produk', 'merk'];

    my $jar = HTTP::CookieJar::LWP->new;
    my $ua = LWP::UserAgent::Plugin->new(
        cookie_jar => $jar,
    );

    # first get the front page so we get the session ID
    log_trace "Requesting cekbpom front page ...";
    my $res = $ua->get($url_prefix);
    unless ($res->is_success) {
        return [$res->code, "Can't get front page ($url_prefix): ".$res->message];
    }
    my $ct = $res->content;
    unless ($ct =~ m!/home/produk/(\w{26})"!) {
        return [543, "Can't extract session ID from front page"];
    }
    my $session_id = $1;

    my %reg_ids;
    my @all_rows;

    my $time_before_query = time();
  QUERY:
    for my $query (@$queries) {
      SEARCH_TYPE:
        for my $search_type (@$search_types) {
            my $search_type_num = $known_search_types{$search_type}[0];
            unless (defined $search_type_num) {
                return [400, "Unknown search_type '$search_type'"];
            }

            require URI::Escape;
            my $query_enc = URI::Escape::uri_escape($query);

            my @rows;
            my $page_num = 0;
            my $num_results = 100;
            my ($result_start, $result_end);
            while (1) {
                log_trace "Querying cekbpom ($search_type=$query, $num_results result(s)) ...";
                $res = $ua->get("$url_prefix/home/produk/$session_id/all/row/$num_results/page/$page_num/order/4/DESC/search/$search_type_num/$query_enc");
                unless ($res->is_success) {
                    return [$res->code, "Can't get result page: ".$res->message];
                }
                my $ct = $res->content;
                unless ($ct =~ m!(\d+) - (\d+) Dari (\d+)!) {
                    return [543, "Can't find signature in result page"];
                }
                ($result_start, $result_end, $num_results) = ($1, $2, $3);

                if ($result_end < $num_results && $result_end < 5000) {
                    redo;
                }

                if ($ENV{CEK_BPOM_TRACE}) {
                    log_trace $ct;
                }

                while ($ct =~ m!
                                   <tr\stitle.+?\surldetil="/(?P<reg_id>[^"]+)">
                                   <td[^>]*>\s* (?P<nomor_registrasi>[^<]+?)\s*   (?:<div>Terbit:(?P<tanggal_terbit>[^<]+?))?\s*    </div></td>
                                   <td[^>]*>\s* (?P<nama>[^<]+?)\s*<div>Merk:\s*  (?P<merk>[^<]+)<br>Kemasan:(?P<kemasan>[^<]+?)\s* </div></td>
                                   <td[^>]*>\s* (?P<pendaftar>[^<]+?)\s*<div>\s*  (?P<kota_pendaftar>[^<]+?)\s*                     </div></td>
                               !sgx) {
                    my $row = {%+};
                    for (qw/kemasan/) { $row->{$_} =~ s/\R+//g }
                    push @rows, $row;
                }
                last;
            }

            if (@rows < $num_results) {
                # XXX should've been a fatal error
                log_warn "Some results cannot be parsed (only got %d out of %d)", scalar(@rows), $num_results;
            } else {
                log_trace "Got $num_results result(s)";
            }

            # add to final result
            for (@rows) {
                push @all_rows, $_ unless $reg_ids{ $_->{reg_id} }++;
            }
        } # for SEARCH_TYPE
    } # for QUERY
    my $time_after_query = time();

    if (@$search_types > 1 || @$queries > 1) {
        log_trace "Got a total of %d result(s)", scalar(@all_rows);
    }

  GET_PRODUCT_DETAIL: {
        last unless $args{get_product_detail};
        my $i = 0;



( run in 2.761 seconds using v1.01-cache-2.11-cpan-8f98c5d2c55 )