Link_Controller
view release on metacpan or search on metacpan
testing-links/test-link.pl view on Meta::CPAN
#the link has probably been deleted from the database
warn "non existant link scheduled for checking";
#what happens if you remove the cursor item?? oh well
($link, $time)=next_link();
last CASE;
};
$::untested && $link->is_not_checked() and do {
($link, $time)=next_link();
last CASE;
}
}
return $link, $time; #can be undefined..
}
# =head2 next_link
# Returns the next link that needs to be checked.
# =cut
sub DAY () {60*60*24;}
sub next_link {
my $self=shift;
my $time;
my $name;
my $link;
while () {
($time, $name)=$::sched->next_item();
return undef unless defined $time; # out of items
$link=$::links{$name};
$::untested && (! $link->is_not_checked() ) and do {
print STDERR "link " . $link->url() . " already tested.. skipping\n"
if $::verbose;
next;
};
defined $::refresh_time and
$link->last_refresh() < ( $::refresh_time - $::max_link_age * DAY)
and do {
print STDERR "deleting old link " . $link->url() . " from database";
delete $::links{$name};
next;
};
last if defined $link;
#the link has probably been deleted from the database
warn "non existant link " . $link->url() . " scheduled for checking";
#what happens if you remove the cursor item?? oh well
}
print STDERR "Next link: " . $link . " URL: " . $link->url() . " for time " .
$time . "\n" if $::verbose;
return $link, $time;
}
# =head2 update_link
# Updates the link in the database.
# =cut
sub update_link {
my $link=shift;
# here be danger.. this could cause multiplication of links in
# the database if the url function returns variable values..
# it `shouldn´t´
print STDERR "New link is " , $link, Dumper ( $link )
if $::verbose & 16;
print STDERR "Before " , Dumper( $::links{$link->url()} ), "\n"
if $::verbose & 256;
WWW::Link_Controller::Lock::checklock();
$::links{$link->url()} = $link;
print STDERR "After " , Dumper( $::links{$link->url()} ), "\n"
if $::verbose & 128;
#FIXME Fsync???
}
# =head2 auto_schedule_link
# Schedules a link according to when it wants to be scheduled. We actually
# schedule the URI which should be a unique identifier.
# N.B. this is for use after a link has been tested and checks that
# the link wants to be tested in the future.
# =cut
sub auto_schedule_link {
my $link=shift;
die "usage auto_schedule_link(<link>)" unless ref $link;
my ($sched_time,$vary)=$link->time_want_test();
die "link failed to return sched time" unless $sched_time;
my $time=time;
if ( $::sequential ) {
$sched_time=$::next_sched;
} else {
warn "time logic wrong; just tested ($time) but wants tested at $sched_time"
if $sched_time < $time;
print STDERR "Link wants test between $sched_time and "
. ( $sched_time + $vary ) . "\n" if $::verbose;
my $earliest=time() + $::min_delay;
if ( $sched_time < $earliest ) {
$sched_time = $earliest ;
print STDERR "forcing link to test at $earliest for minimum delay\n";
}
$sched_time += rand($vary);
if ( $sched_time < $::last_queue_time ) {
$sched_time=$::last_queue_time+1 ;
print STDERR "forcing link after end of queue at $::last_queue_time\n";
}
print STDERR " will schedule at $sched_time\n" if $::verbose;
}
$::sched->schedule(int($sched_time) , $link->url() );
}
=head2 status log handling
During it's operation, test-link can write a log file (to a file given
in the $::link_stat_log configuration variable). This can be used to
alerts to the webmaster about newly broken links.
( run in 1.187 second using v1.01-cache-2.11-cpan-39bf76dae61 )