Bundle-WWW-Scraper-Job

 view release on metacpan or  search on metacpan

lib/WWW/Scraper/BAJobs.pm  view on Meta::CPAN


package WWW::Scraper::BAJobs;

#####################################################################

use strict;
use vars qw($VERSION @ISA);
@ISA = qw(WWW::Scraper);
$VERSION = sprintf("%d.%02d", q$Revision: 1.00 $ =~ /(\d+)\.(\d+)/);

use Carp ();
use WWW::Scraper(qw(1.48 generic_option addURL trimTags));
use WWW::Scraper::FieldTranslation;

use LWP::UserAgent;
use HTML::Form;
use HTTP::Cookies;

# As of 2002.01.26, this is what BAJobs "Refine your search" <FORM> looks like.
#<form action="/jobseeker/usersearch.jsp" method=post>
#  <input type="hidden" name="searchKeywordsMethod" value=1>
#  <input type="hidden" name="wholeWord" value="true">
#  <input type="hidden" name="displayResultsPerPage" value="20">
#  <input type="hidden" name="displaySortOrder" value="1">
#  <input type="hidden" name="postingAge" value="7">
#  <input type="hidden" name="countyList" value="">
#  <input type="hidden" name="workTermTypeList" value="">
#  <input type="hidden" name="jobPostingCategoryList" value="">
#  <input type="hidden" name="industryCategoryList" value="">
#  <p><b><font color=006699 face="arial,helvetica,sans-serif">Refine Your Search</font></b>
#  <br>
#  <input type=text name="searchKeywords" value=" Perl " size=40> &nbsp; &nbsp; <input type=submit value="Search">
#</form>

my $scraperRequest = 
   { 
      'type' => 'POST'  # 'POST' - we used to use 'FORM', which works fine, too, but this way's a little faster.
     
     # This is the basic URL on which to build the query.
     ,'url' => 'http://www.bajobs.com/jobseeker/usersearch.jsp?'
     #,'url' => 'http://www.bajobs.com/jobseeker/search.jsp' # This one is the location of the <FORM>
     
     ,'nativeQuery' => 'searchKeywords'
     
     ,'nativeDefaults' =>
                            {
                                 'searchKeywordsMethod' => 1
                                ,'wholeWord' => 'true'
                                ,'displayResultsPerPage' => '100'
                                ,'displaySortOrder' => 1
                                ,'postingAge' => '7'
                                ,'countyList' => ''
                                ,'workTermTypeList' => ''
                                ,'jobPostingCategoryList' => ''
                                ,'industryCategoryList' => ''
                            }
     ,'defaultRequestClass' => 'Job'
     ,'fieldTranslations' =>
                      { '*' => 
                              {    'skills'    => 'searchKeywords'
                                  ,'payrate'   => undef
                                  ,'locations' => new WWW::Scraper::FieldTranslation('BAJobs', 'Job', 'locations')
                                  ,'*'         => '*'
                              }
                      }
      # Some more options for the Scraper operation.
     ,'cookies' => 1
   };

my $scraperFrame =
        [ 'HTML', 
           [ 
               [ 'COUNT', 'Job Postings.*?[- 0-9]+.*?of.*?<b>([,0-9]+)</b></font> total']
              ,[ 'BODY', '<!-- top prev/next -->', '<!-- end top prev/next -->',
                 [ 
               [ 'NEXT', 1, '<b>NEXT</b>' ]
                ] #, \&fixNext ] ]



( run in 0.525 second using v1.01-cache-2.11-cpan-39bf76dae61 )