WWW-RobotRules-Extended
view release on metacpan or search on metacpan
Revision history for WWW-RobotRules-Extended
0.02 2011-12-24
improved documentation near by exported functions
if a robots.txt line is only "Disallow:", don't consider it as "Disallow: /"
0.01 Date/time
First version, released on an unsuspecting world.
Changes
lib/WWW/RobotRules/Extended.pm
Makefile.PL
MANIFEST This list of files
README
README_FR.htm
t/00-load.t
t/manifest.t
t/pod-coverage.t
t/pod.t
Makefile.PL view on Meta::CPAN
use 5.006;
use strict;
use warnings;
use ExtUtils::MakeMaker;
WriteMakefile(
NAME => 'WWW::RobotRules::Extended',
AUTHOR => q{Yannick Simon <yannick.simon@gmail.com>},
VERSION_FROM => 'lib/WWW/RobotRules/Extended.pm',
ABSTRACT_FROM => 'lib/WWW/RobotRules/Extended.pm',
($ExtUtils::MakeMaker::VERSION >= 6.3002
? ('LICENSE'=> 'perl')
: ()),
PL_FILES => {},
PREREQ_PM => {
'Test::More' => 0,
},
dist => { COMPRESS => 'gzip -9f', SUFFIX => 'gz', },
clean => { FILES => 'WWW-RobotRules-Extended-*' },
);
Makefile.old view on Meta::CPAN
# This Makefile is for the WWW::RobotRules::Extended extension to perl.
#
# It was generated automatically by MakeMaker version
# 6.62 (Revision: 66200) from the contents of
# Makefile.PL. Don't edit this file, edit Makefile.PL instead.
#
# ANY CHANGES MADE HERE WILL BE LOST!
#
# MakeMaker ARGV: ()
#
# MakeMaker Parameters:
# ABSTRACT_FROM => q[lib/WWW/RobotRules/Extended.pm]
# AUTHOR => [q[Yannick Simon <yannick.simon@gmail.com>]]
# BUILD_REQUIRES => { }
# CONFIGURE_REQUIRES => { }
# LICENSE => q[perl]
# NAME => q[WWW::RobotRules::Extended]
# PL_FILES => { }
# PREREQ_PM => { Test::More=>q[0] }
# VERSION_FROM => q[lib/WWW/RobotRules/Extended.pm]
# clean => { FILES=>q[WWW-RobotRules-Extended-*] }
# dist => { COMPRESS=>q[gzip -9f], SUFFIX=>q[gz] }
# --- MakeMaker post_initialize section:
# --- MakeMaker const_config section:
# These definitions are from config.sh (via /usr/lib/perl/5.10/Config.pm).
# They may have been overridden via Makefile.PL or on the command line.
AR = ar
CC = cc
CCCDLFLAGS = -fPIC
CCDLFLAGS = -Wl,-E
DLEXT = so
DLSRC = dl_dlopen.xs
EXE_EXT =
FULL_AR = /usr/bin/ar
LD = cc
LDDLFLAGS = -shared -O2 -g -L/usr/local/lib -fstack-protector
LDFLAGS = -fstack-protector -L/usr/local/lib
LIBC = /lib/libc-2.12.1.so
LIB_EXT = .a
OBJ_EXT = .o
OSNAME = linux
OSVERS = 2.6.24-28-server
RANLIB = :
SITELIBEXP = /usr/local/share/perl/5.10.1
SITEARCHEXP = /usr/local/lib/perl/5.10.1
SO = so
VENDORARCHEXP = /usr/lib/perl5
VENDORLIBEXP = /usr/share/perl5
# --- MakeMaker constants section:
AR_STATIC_ARGS = cr
DIRFILESEP = /
DFSEP = $(DIRFILESEP)
NAME = WWW::RobotRules::Extended
NAME_SYM = WWW_RobotRules_Extended
VERSION = 0.02
VERSION_MACRO = VERSION
VERSION_SYM = 0_02
DEFINE_VERSION = -D$(VERSION_MACRO)=\"$(VERSION)\"
XS_VERSION = 0.02
XS_VERSION_MACRO = XS_VERSION
XS_DEFINE_VERSION = -D$(XS_VERSION_MACRO)=\"$(XS_VERSION)\"
INST_ARCHLIB = blib/arch
INST_SCRIPT = blib/script
INST_BIN = blib/bin
INST_LIB = blib/lib
INST_MAN1DIR = blib/man1
INST_MAN3DIR = blib/man3
MAN1EXT = 1p
MAN3EXT = 3pm
INSTALLDIRS = site
DESTDIR =
PREFIX = $(SITEPREFIX)
PERLPREFIX = /usr
SITEPREFIX = /usr/local
VENDORPREFIX = /usr
INSTALLPRIVLIB = /usr/share/perl/5.10
DESTINSTALLPRIVLIB = $(DESTDIR)$(INSTALLPRIVLIB)
INSTALLSITELIB = /usr/local/share/perl/5.10.1
DESTINSTALLSITELIB = $(DESTDIR)$(INSTALLSITELIB)
INSTALLVENDORLIB = /usr/share/perl5
DESTINSTALLVENDORLIB = $(DESTDIR)$(INSTALLVENDORLIB)
INSTALLARCHLIB = /usr/lib/perl/5.10
DESTINSTALLARCHLIB = $(DESTDIR)$(INSTALLARCHLIB)
INSTALLSITEARCH = /usr/local/lib/perl/5.10.1
DESTINSTALLSITEARCH = $(DESTDIR)$(INSTALLSITEARCH)
INSTALLVENDORARCH = /usr/lib/perl5
DESTINSTALLVENDORARCH = $(DESTDIR)$(INSTALLVENDORARCH)
INSTALLBIN = /usr/bin
DESTINSTALLBIN = $(DESTDIR)$(INSTALLBIN)
INSTALLSITEBIN = /usr/local/bin
DESTINSTALLSITEBIN = $(DESTDIR)$(INSTALLSITEBIN)
INSTALLVENDORBIN = /usr/bin
DESTINSTALLVENDORBIN = $(DESTDIR)$(INSTALLVENDORBIN)
INSTALLSCRIPT = /usr/bin
DESTINSTALLSCRIPT = $(DESTDIR)$(INSTALLSCRIPT)
INSTALLSITESCRIPT = /usr/local/bin
DESTINSTALLSITESCRIPT = $(DESTDIR)$(INSTALLSITESCRIPT)
INSTALLVENDORSCRIPT = /usr/bin
DESTINSTALLVENDORSCRIPT = $(DESTDIR)$(INSTALLVENDORSCRIPT)
INSTALLMAN1DIR = /usr/share/man/man1
DESTINSTALLMAN1DIR = $(DESTDIR)$(INSTALLMAN1DIR)
INSTALLSITEMAN1DIR = /usr/local/man/man1
DESTINSTALLSITEMAN1DIR = $(DESTDIR)$(INSTALLSITEMAN1DIR)
INSTALLVENDORMAN1DIR = /usr/share/man/man1
DESTINSTALLVENDORMAN1DIR = $(DESTDIR)$(INSTALLVENDORMAN1DIR)
INSTALLMAN3DIR = /usr/share/man/man3
DESTINSTALLMAN3DIR = $(DESTDIR)$(INSTALLMAN3DIR)
INSTALLSITEMAN3DIR = /usr/local/man/man3
DESTINSTALLSITEMAN3DIR = $(DESTDIR)$(INSTALLSITEMAN3DIR)
INSTALLVENDORMAN3DIR = /usr/share/man/man3
DESTINSTALLVENDORMAN3DIR = $(DESTDIR)$(INSTALLVENDORMAN3DIR)
PERL_LIB = /usr/share/perl/5.10
PERL_ARCHLIB = /usr/lib/perl/5.10
LIBPERL_A = libperl.a
FIRST_MAKEFILE = Makefile
MAKEFILE_OLD = Makefile.old
MAKE_APERL_FILE = Makefile.aperl
PERLMAINCC = $(CC)
PERL_INC = /usr/lib/perl/5.10/CORE
PERL = /usr/bin/perl
FULLPERL = /usr/bin/perl
ABSPERL = $(PERL)
PERLRUN = $(PERL)
FULLPERLRUN = $(FULLPERL)
ABSPERLRUN = $(ABSPERL)
PERLRUNINST = $(PERLRUN) "-I$(INST_ARCHLIB)" "-I$(INST_LIB)"
FULLPERLRUNINST = $(FULLPERLRUN) "-I$(INST_ARCHLIB)" "-I$(INST_LIB)"
ABSPERLRUNINST = $(ABSPERLRUN) "-I$(INST_ARCHLIB)" "-I$(INST_LIB)"
PERL_CORE = 0
PERM_DIR = 755
PERM_RW = 644
PERM_RWX = 755
MAKEMAKER = /usr/local/share/perl/5.10.1/ExtUtils/MakeMaker.pm
MM_VERSION = 6.62
MM_REVISION = 66200
# FULLEXT = Pathname for extension directory (eg Foo/Bar/Oracle).
# BASEEXT = Basename part of FULLEXT. May be just equal FULLEXT. (eg Oracle)
# PARENT_NAME = NAME without BASEEXT and no trailing :: (eg Foo::Bar)
# DLBASE = Basename part of dynamic library. May be just equal BASEEXT.
MAKE = make
FULLEXT = WWW/RobotRules/Extended
BASEEXT = Extended
PARENT_NAME = WWW::RobotRules
DLBASE = $(BASEEXT)
VERSION_FROM = lib/WWW/RobotRules/Extended.pm
OBJECT =
LDFROM = $(OBJECT)
LINKTYPE = dynamic
BOOTDEP =
# Handy lists of source code files:
XS_FILES =
C_FILES =
O_FILES =
H_FILES =
MAN1PODS =
MAN3PODS = lib/WWW/RobotRules/Extended.pm
# Where is the Config information that we are using/depend on
CONFIGDEP = $(PERL_ARCHLIB)$(DFSEP)Config.pm $(PERL_INC)$(DFSEP)config.h
# Where to build things
INST_LIBDIR = $(INST_LIB)/WWW/RobotRules
INST_ARCHLIBDIR = $(INST_ARCHLIB)/WWW/RobotRules
INST_AUTODIR = $(INST_LIB)/auto/$(FULLEXT)
INST_ARCHAUTODIR = $(INST_ARCHLIB)/auto/$(FULLEXT)
INST_STATIC =
INST_DYNAMIC =
INST_BOOT =
# Extra linker info
EXPORT_LIST =
PERL_ARCHIVE =
PERL_ARCHIVE_AFTER =
TO_INST_PM = lib/WWW/RobotRules/Extended.pm
PM_TO_BLIB = lib/WWW/RobotRules/Extended.pm \
blib/lib/WWW/RobotRules/Extended.pm
# --- MakeMaker platform_constants section:
MM_Unix_VERSION = 6.62
PERL_MALLOC_DEF = -DPERL_EXTMALLOC_DEF -Dmalloc=Perl_malloc -Dfree=Perl_mfree -Drealloc=Perl_realloc -Dcalloc=Perl_calloc
# --- MakeMaker tool_autosplit section:
# Usage: $(AUTOSPLITFILE) FileToSplit AutoDirToSplitInto
AUTOSPLITFILE = $(ABSPERLRUN) -e 'use AutoSplit; autosplit($$ARGV[0], $$ARGV[1], 0, 1, 1)' --
# --- MakeMaker tool_xsubpp section:
# --- MakeMaker tools_other section:
SHELL = /bin/sh
CHMOD = chmod
CP = cp
MV = mv
NOOP = $(TRUE)
NOECHO = @
RM_F = rm -f
RM_RF = rm -rf
TEST_F = test -f
TOUCH = touch
UMASK_NULL = umask 0
DEV_NULL = > /dev/null 2>&1
MKPATH = $(ABSPERLRUN) -MExtUtils::Command -e 'mkpath' --
EQUALIZE_TIMESTAMP = $(ABSPERLRUN) -MExtUtils::Command -e 'eqtime' --
FALSE = false
TRUE = true
ECHO = echo
ECHO_N = echo -n
UNINST = 0
VERBINST = 0
MOD_INSTALL = $(ABSPERLRUN) -MExtUtils::Install -e 'install([ from_to => {@ARGV}, verbose => '\''$(VERBINST)'\'', uninstall_shadows => '\''$(UNINST)'\'', dir_mode => '\''$(PERM_DIR)'\'' ]);' --
DOC_INSTALL = $(ABSPERLRUN) -MExtUtils::Command::MM -e 'perllocal_install' --
UNINSTALL = $(ABSPERLRUN) -MExtUtils::Command::MM -e 'uninstall' --
WARN_IF_OLD_PACKLIST = $(ABSPERLRUN) -MExtUtils::Command::MM -e 'warn_if_old_packlist' --
MACROSTART =
MACROEND =
USEMAKEFILE = -f
FIXIN = $(ABSPERLRUN) -MExtUtils::MY -e 'MY->fixin(shift)' --
# --- MakeMaker makemakerdflt section:
makemakerdflt : all
$(NOECHO) $(NOOP)
# --- MakeMaker dist section:
TAR = tar
TARFLAGS = cvf
ZIP = zip
ZIPFLAGS = -r
COMPRESS = gzip -9f
SUFFIX = gz
SHAR = shar
PREOP = $(NOECHO) $(NOOP)
POSTOP = $(NOECHO) $(NOOP)
TO_UNIX = $(NOECHO) $(NOOP)
CI = ci -u
RCS_LABEL = rcs -Nv$(VERSION_SYM): -q
DIST_CP = best
DIST_DEFAULT = tardist
DISTNAME = WWW-RobotRules-Extended
DISTVNAME = WWW-RobotRules-Extended-0.02
# --- MakeMaker macro section:
# --- MakeMaker depend section:
# --- MakeMaker cflags section:
# --- MakeMaker const_loadlibs section:
# --- MakeMaker const_cccmd section:
# --- MakeMaker post_constants section:
# --- MakeMaker pasthru section:
PASTHRU = LIBPERL_A="$(LIBPERL_A)"\
LINKTYPE="$(LINKTYPE)"\
PREFIX="$(PREFIX)"
# --- MakeMaker special_targets section:
.SUFFIXES : .xs .c .C .cpp .i .s .cxx .cc $(OBJ_EXT)
.PHONY: all config static dynamic test linkext manifest blibdirs clean realclean disttest distdir
# --- MakeMaker c_o section:
# --- MakeMaker xs_c section:
# --- MakeMaker xs_o section:
# --- MakeMaker top_targets section:
all :: pure_all manifypods
$(NOECHO) $(NOOP)
pure_all :: config pm_to_blib subdirs linkext
$(NOECHO) $(NOOP)
subdirs :: $(MYEXTLIB)
$(NOECHO) $(NOOP)
config :: $(FIRST_MAKEFILE) blibdirs
$(NOECHO) $(NOOP)
help :
perldoc ExtUtils::MakeMaker
# --- MakeMaker blibdirs section:
blibdirs : $(INST_LIBDIR)$(DFSEP).exists $(INST_ARCHLIB)$(DFSEP).exists $(INST_AUTODIR)$(DFSEP).exists $(INST_ARCHAUTODIR)$(DFSEP).exists $(INST_BIN)$(DFSEP).exists $(INST_SCRIPT)$(DFSEP).exists $(INST_MAN1DIR)$(DFSEP).exists $(INST_MAN3DIR)$(DFSEP)....
$(NOECHO) $(NOOP)
# Backwards compat with 6.18 through 6.25
blibdirs.ts : blibdirs
$(NOECHO) $(NOOP)
$(INST_LIBDIR)$(DFSEP).exists :: Makefile.PL
$(NOECHO) $(MKPATH) $(INST_LIBDIR)
$(NOECHO) $(CHMOD) $(PERM_DIR) $(INST_LIBDIR)
$(NOECHO) $(TOUCH) $(INST_LIBDIR)$(DFSEP).exists
$(INST_ARCHLIB)$(DFSEP).exists :: Makefile.PL
$(NOECHO) $(MKPATH) $(INST_ARCHLIB)
$(NOECHO) $(CHMOD) $(PERM_DIR) $(INST_ARCHLIB)
$(NOECHO) $(TOUCH) $(INST_ARCHLIB)$(DFSEP).exists
$(INST_AUTODIR)$(DFSEP).exists :: Makefile.PL
$(NOECHO) $(MKPATH) $(INST_AUTODIR)
$(NOECHO) $(CHMOD) $(PERM_DIR) $(INST_AUTODIR)
$(NOECHO) $(TOUCH) $(INST_AUTODIR)$(DFSEP).exists
$(INST_ARCHAUTODIR)$(DFSEP).exists :: Makefile.PL
$(NOECHO) $(MKPATH) $(INST_ARCHAUTODIR)
$(NOECHO) $(CHMOD) $(PERM_DIR) $(INST_ARCHAUTODIR)
$(NOECHO) $(TOUCH) $(INST_ARCHAUTODIR)$(DFSEP).exists
$(INST_BIN)$(DFSEP).exists :: Makefile.PL
$(NOECHO) $(MKPATH) $(INST_BIN)
$(NOECHO) $(CHMOD) $(PERM_DIR) $(INST_BIN)
$(NOECHO) $(TOUCH) $(INST_BIN)$(DFSEP).exists
$(INST_SCRIPT)$(DFSEP).exists :: Makefile.PL
$(NOECHO) $(MKPATH) $(INST_SCRIPT)
$(NOECHO) $(CHMOD) $(PERM_DIR) $(INST_SCRIPT)
$(NOECHO) $(TOUCH) $(INST_SCRIPT)$(DFSEP).exists
$(INST_MAN1DIR)$(DFSEP).exists :: Makefile.PL
$(NOECHO) $(MKPATH) $(INST_MAN1DIR)
$(NOECHO) $(CHMOD) $(PERM_DIR) $(INST_MAN1DIR)
$(NOECHO) $(TOUCH) $(INST_MAN1DIR)$(DFSEP).exists
$(INST_MAN3DIR)$(DFSEP).exists :: Makefile.PL
$(NOECHO) $(MKPATH) $(INST_MAN3DIR)
$(NOECHO) $(CHMOD) $(PERM_DIR) $(INST_MAN3DIR)
$(NOECHO) $(TOUCH) $(INST_MAN3DIR)$(DFSEP).exists
# --- MakeMaker linkext section:
linkext :: $(LINKTYPE)
$(NOECHO) $(NOOP)
# --- MakeMaker dlsyms section:
# --- MakeMaker dynamic section:
dynamic :: $(FIRST_MAKEFILE) $(INST_DYNAMIC) $(INST_BOOT)
$(NOECHO) $(NOOP)
# --- MakeMaker dynamic_bs section:
BOOTSTRAP =
# --- MakeMaker dynamic_lib section:
# --- MakeMaker static section:
## $(INST_PM) has been moved to the all: target.
## It remains here for awhile to allow for old usage: "make static"
static :: $(FIRST_MAKEFILE) $(INST_STATIC)
$(NOECHO) $(NOOP)
# --- MakeMaker static_lib section:
# --- MakeMaker manifypods section:
POD2MAN_EXE = $(PERLRUN) "-MExtUtils::Command::MM" -e pod2man "--"
POD2MAN = $(POD2MAN_EXE)
manifypods : pure_all \
lib/WWW/RobotRules/Extended.pm
$(NOECHO) $(POD2MAN) --section=3 --perm_rw=$(PERM_RW) \
lib/WWW/RobotRules/Extended.pm $(INST_MAN3DIR)/WWW::RobotRules::Extended.$(MAN3EXT)
# --- MakeMaker processPL section:
# --- MakeMaker installbin section:
# --- MakeMaker subdirs section:
# none
# --- MakeMaker clean_subdirs section:
clean_subdirs :
$(NOECHO) $(NOOP)
# --- MakeMaker clean section:
# Delete temporary files but do not touch installed files. We don't delete
# the Makefile here so a later make realclean still has a makefile to use.
clean :: clean_subdirs
- $(RM_F) \
*$(LIB_EXT) core \
core.[0-9] $(INST_ARCHAUTODIR)/extralibs.all \
core.[0-9][0-9] $(BASEEXT).bso \
pm_to_blib.ts MYMETA.json \
core.[0-9][0-9][0-9][0-9] MYMETA.yml \
$(BASEEXT).x $(BOOTSTRAP) \
perl$(EXE_EXT) tmon.out \
*$(OBJ_EXT) pm_to_blib \
$(INST_ARCHAUTODIR)/extralibs.ld blibdirs.ts \
core.[0-9][0-9][0-9][0-9][0-9] *perl.core \
core.*perl.*.? $(MAKE_APERL_FILE) \
$(BASEEXT).def perl \
core.[0-9][0-9][0-9] mon.out \
lib$(BASEEXT).def perlmain.c \
perl.exe so_locations \
$(BASEEXT).exp
- $(RM_RF) \
WWW-RobotRules-Extended-* blib
- $(MV) $(FIRST_MAKEFILE) $(MAKEFILE_OLD) $(DEV_NULL)
# --- MakeMaker realclean_subdirs section:
realclean_subdirs :
$(NOECHO) $(NOOP)
# --- MakeMaker realclean section:
# Delete temporary files (via clean) and also delete dist files
realclean purge :: clean realclean_subdirs
- $(RM_F) \
$(MAKEFILE_OLD) $(FIRST_MAKEFILE)
- $(RM_RF) \
$(DISTVNAME)
# --- MakeMaker metafile section:
metafile : create_distdir
$(NOECHO) $(ECHO) Generating META.yml
$(NOECHO) $(ECHO) '---' > META_new.yml
$(NOECHO) $(ECHO) 'abstract: '\''database of robots.txt-derived permissions.'\''' >> META_new.yml
$(NOECHO) $(ECHO) 'author:' >> META_new.yml
$(NOECHO) $(ECHO) ' - '\''Yannick Simon <yannick.simon@gmail.com>'\''' >> META_new.yml
$(NOECHO) $(ECHO) 'build_requires:' >> META_new.yml
$(NOECHO) $(ECHO) ' ExtUtils::MakeMaker: 0' >> META_new.yml
$(NOECHO) $(ECHO) 'configure_requires:' >> META_new.yml
$(NOECHO) $(ECHO) ' ExtUtils::MakeMaker: 0' >> META_new.yml
$(NOECHO) $(ECHO) 'dynamic_config: 1' >> META_new.yml
$(NOECHO) $(ECHO) 'generated_by: '\''ExtUtils::MakeMaker version 6.62, CPAN::Meta::Converter version 2.112621'\''' >> META_new.yml
$(NOECHO) $(ECHO) 'license: perl' >> META_new.yml
$(NOECHO) $(ECHO) 'meta-spec:' >> META_new.yml
$(NOECHO) $(ECHO) ' url: http://module-build.sourceforge.net/META-spec-v1.4.html' >> META_new.yml
$(NOECHO) $(ECHO) ' version: 1.4' >> META_new.yml
$(NOECHO) $(ECHO) 'name: WWW-RobotRules-Extended' >> META_new.yml
$(NOECHO) $(ECHO) 'no_index:' >> META_new.yml
$(NOECHO) $(ECHO) ' directory:' >> META_new.yml
$(NOECHO) $(ECHO) ' - t' >> META_new.yml
$(NOECHO) $(ECHO) ' - inc' >> META_new.yml
$(NOECHO) $(ECHO) 'requires:' >> META_new.yml
$(NOECHO) $(ECHO) ' Test::More: 0' >> META_new.yml
$(NOECHO) $(ECHO) 'version: 0.02' >> META_new.yml
-$(NOECHO) $(MV) META_new.yml $(DISTVNAME)/META.yml
$(NOECHO) $(ECHO) Generating META.json
$(NOECHO) $(ECHO) '{' > META_new.json
$(NOECHO) $(ECHO) ' "abstract" : "database of robots.txt-derived permissions.",' >> META_new.json
$(NOECHO) $(ECHO) ' "author" : [' >> META_new.json
$(NOECHO) $(ECHO) ' "Yannick Simon <yannick.simon@gmail.com>"' >> META_new.json
$(NOECHO) $(ECHO) ' ],' >> META_new.json
$(NOECHO) $(ECHO) ' "dynamic_config" : 1,' >> META_new.json
$(NOECHO) $(ECHO) ' "generated_by" : "ExtUtils::MakeMaker version 6.62, CPAN::Meta::Converter version 2.112621",' >> META_new.json
$(NOECHO) $(ECHO) ' "license" : [' >> META_new.json
$(NOECHO) $(ECHO) ' "perl_5"' >> META_new.json
$(NOECHO) $(ECHO) ' ],' >> META_new.json
$(NOECHO) $(ECHO) ' "meta-spec" : {' >> META_new.json
$(NOECHO) $(ECHO) ' "url" : "http://search.cpan.org/perldoc?CPAN::Meta::Spec",' >> META_new.json
$(NOECHO) $(ECHO) ' "version" : "2"' >> META_new.json
$(NOECHO) $(ECHO) ' },' >> META_new.json
$(NOECHO) $(ECHO) ' "name" : "WWW-RobotRules-Extended",' >> META_new.json
$(NOECHO) $(ECHO) ' "no_index" : {' >> META_new.json
$(NOECHO) $(ECHO) ' "directory" : [' >> META_new.json
$(NOECHO) $(ECHO) ' "t",' >> META_new.json
$(NOECHO) $(ECHO) ' "inc"' >> META_new.json
$(NOECHO) $(ECHO) ' ]' >> META_new.json
$(NOECHO) $(ECHO) ' },' >> META_new.json
$(NOECHO) $(ECHO) ' "prereqs" : {' >> META_new.json
$(NOECHO) $(ECHO) ' "build" : {' >> META_new.json
$(NOECHO) $(ECHO) ' "requires" : {' >> META_new.json
$(NOECHO) $(ECHO) ' "ExtUtils::MakeMaker" : 0' >> META_new.json
$(NOECHO) $(ECHO) ' }' >> META_new.json
$(NOECHO) $(ECHO) ' },' >> META_new.json
$(NOECHO) $(ECHO) ' "configure" : {' >> META_new.json
$(NOECHO) $(ECHO) ' "requires" : {' >> META_new.json
$(NOECHO) $(ECHO) ' "ExtUtils::MakeMaker" : 0' >> META_new.json
$(NOECHO) $(ECHO) ' }' >> META_new.json
$(NOECHO) $(ECHO) ' },' >> META_new.json
$(NOECHO) $(ECHO) ' "runtime" : {' >> META_new.json
$(NOECHO) $(ECHO) ' "requires" : {' >> META_new.json
$(NOECHO) $(ECHO) ' "Test::More" : 0' >> META_new.json
$(NOECHO) $(ECHO) ' }' >> META_new.json
$(NOECHO) $(ECHO) ' }' >> META_new.json
$(NOECHO) $(ECHO) ' },' >> META_new.json
$(NOECHO) $(ECHO) ' "release_status" : "stable",' >> META_new.json
$(NOECHO) $(ECHO) ' "version" : "0.02"' >> META_new.json
$(NOECHO) $(ECHO) '}' >> META_new.json
-$(NOECHO) $(MV) META_new.json $(DISTVNAME)/META.json
# --- MakeMaker signature section:
signature :
cpansign -s
# --- MakeMaker dist_basics section:
distclean :: realclean distcheck
$(NOECHO) $(NOOP)
distcheck :
$(PERLRUN) "-MExtUtils::Manifest=fullcheck" -e fullcheck
skipcheck :
$(PERLRUN) "-MExtUtils::Manifest=skipcheck" -e skipcheck
manifest :
$(PERLRUN) "-MExtUtils::Manifest=mkmanifest" -e mkmanifest
veryclean : realclean
$(RM_F) *~ */*~ *.orig */*.orig *.bak */*.bak *.old */*.old
# --- MakeMaker dist_core section:
dist : $(DIST_DEFAULT) $(FIRST_MAKEFILE)
$(NOECHO) $(ABSPERLRUN) -l -e 'print '\''Warning: Makefile possibly out of date with $(VERSION_FROM)'\''' \
-e ' if -e '\''$(VERSION_FROM)'\'' and -M '\''$(VERSION_FROM)'\'' < -M '\''$(FIRST_MAKEFILE)'\'';' --
tardist : $(DISTVNAME).tar$(SUFFIX)
$(NOECHO) $(NOOP)
uutardist : $(DISTVNAME).tar$(SUFFIX)
uuencode $(DISTVNAME).tar$(SUFFIX) $(DISTVNAME).tar$(SUFFIX) > $(DISTVNAME).tar$(SUFFIX)_uu
$(DISTVNAME).tar$(SUFFIX) : distdir
$(PREOP)
$(TO_UNIX)
$(TAR) $(TARFLAGS) $(DISTVNAME).tar $(DISTVNAME)
$(RM_RF) $(DISTVNAME)
$(COMPRESS) $(DISTVNAME).tar
$(POSTOP)
zipdist : $(DISTVNAME).zip
$(NOECHO) $(NOOP)
$(DISTVNAME).zip : distdir
$(PREOP)
$(ZIP) $(ZIPFLAGS) $(DISTVNAME).zip $(DISTVNAME)
$(RM_RF) $(DISTVNAME)
$(POSTOP)
shdist : distdir
$(PREOP)
$(SHAR) $(DISTVNAME) > $(DISTVNAME).shar
$(RM_RF) $(DISTVNAME)
$(POSTOP)
# --- MakeMaker distdir section:
create_distdir :
$(RM_RF) $(DISTVNAME)
$(PERLRUN) "-MExtUtils::Manifest=manicopy,maniread" \
-e "manicopy(maniread(),'$(DISTVNAME)', '$(DIST_CP)');"
distdir : create_distdir distmeta
$(NOECHO) $(NOOP)
# --- MakeMaker dist_test section:
disttest : distdir
cd $(DISTVNAME) && $(ABSPERLRUN) Makefile.PL
cd $(DISTVNAME) && $(MAKE) $(PASTHRU)
cd $(DISTVNAME) && $(MAKE) test $(PASTHRU)
# --- MakeMaker dist_ci section:
ci :
$(PERLRUN) "-MExtUtils::Manifest=maniread" \
-e "@all = keys %{ maniread() };" \
-e "print(qq{Executing $(CI) @all\n}); system(qq{$(CI) @all});" \
-e "print(qq{Executing $(RCS_LABEL) ...\n}); system(qq{$(RCS_LABEL) @all});"
# --- MakeMaker distmeta section:
distmeta : create_distdir metafile
$(NOECHO) cd $(DISTVNAME) && $(ABSPERLRUN) -MExtUtils::Manifest=maniadd -e 'exit unless -e q{META.yml};' \
-e 'eval { maniadd({q{META.yml} => q{Module YAML meta-data (added by MakeMaker)}}) }' \
-e ' or print "Could not add META.yml to MANIFEST: $${'\''@'\''}\n"' --
$(NOECHO) cd $(DISTVNAME) && $(ABSPERLRUN) -MExtUtils::Manifest=maniadd -e 'exit unless -f q{META.json};' \
-e 'eval { maniadd({q{META.json} => q{Module JSON meta-data (added by MakeMaker)}}) }' \
-e ' or print "Could not add META.json to MANIFEST: $${'\''@'\''}\n"' --
# --- MakeMaker distsignature section:
distsignature : create_distdir
$(NOECHO) cd $(DISTVNAME) && $(ABSPERLRUN) -MExtUtils::Manifest=maniadd -e 'eval { maniadd({q{SIGNATURE} => q{Public-key signature (added by MakeMaker)}}) } ' \
-e ' or print "Could not add SIGNATURE to MANIFEST: $${'\''@'\''}\n"' --
$(NOECHO) cd $(DISTVNAME) && $(TOUCH) SIGNATURE
cd $(DISTVNAME) && cpansign -s
# --- MakeMaker install section:
install :: pure_install doc_install
$(NOECHO) $(NOOP)
install_perl :: pure_perl_install doc_perl_install
$(NOECHO) $(NOOP)
install_site :: pure_site_install doc_site_install
$(NOECHO) $(NOOP)
install_vendor :: pure_vendor_install doc_vendor_install
$(NOECHO) $(NOOP)
pure_install :: pure_$(INSTALLDIRS)_install
$(NOECHO) $(NOOP)
doc_install :: doc_$(INSTALLDIRS)_install
$(NOECHO) $(NOOP)
pure__install : pure_site_install
$(NOECHO) $(ECHO) INSTALLDIRS not defined, defaulting to INSTALLDIRS=site
doc__install : doc_site_install
$(NOECHO) $(ECHO) INSTALLDIRS not defined, defaulting to INSTALLDIRS=site
pure_perl_install :: all
$(NOECHO) $(MOD_INSTALL) \
read $(PERL_ARCHLIB)/auto/$(FULLEXT)/.packlist \
write $(DESTINSTALLARCHLIB)/auto/$(FULLEXT)/.packlist \
$(INST_LIB) $(DESTINSTALLPRIVLIB) \
$(INST_ARCHLIB) $(DESTINSTALLARCHLIB) \
$(INST_BIN) $(DESTINSTALLBIN) \
$(INST_SCRIPT) $(DESTINSTALLSCRIPT) \
$(INST_MAN1DIR) $(DESTINSTALLMAN1DIR) \
$(INST_MAN3DIR) $(DESTINSTALLMAN3DIR)
$(NOECHO) $(WARN_IF_OLD_PACKLIST) \
$(SITEARCHEXP)/auto/$(FULLEXT)
pure_site_install :: all
$(NOECHO) $(MOD_INSTALL) \
read $(SITEARCHEXP)/auto/$(FULLEXT)/.packlist \
write $(DESTINSTALLSITEARCH)/auto/$(FULLEXT)/.packlist \
$(INST_LIB) $(DESTINSTALLSITELIB) \
$(INST_ARCHLIB) $(DESTINSTALLSITEARCH) \
$(INST_BIN) $(DESTINSTALLSITEBIN) \
$(INST_SCRIPT) $(DESTINSTALLSITESCRIPT) \
$(INST_MAN1DIR) $(DESTINSTALLSITEMAN1DIR) \
$(INST_MAN3DIR) $(DESTINSTALLSITEMAN3DIR)
$(NOECHO) $(WARN_IF_OLD_PACKLIST) \
$(PERL_ARCHLIB)/auto/$(FULLEXT)
pure_vendor_install :: all
$(NOECHO) $(MOD_INSTALL) \
read $(VENDORARCHEXP)/auto/$(FULLEXT)/.packlist \
write $(DESTINSTALLVENDORARCH)/auto/$(FULLEXT)/.packlist \
$(INST_LIB) $(DESTINSTALLVENDORLIB) \
$(INST_ARCHLIB) $(DESTINSTALLVENDORARCH) \
$(INST_BIN) $(DESTINSTALLVENDORBIN) \
$(INST_SCRIPT) $(DESTINSTALLVENDORSCRIPT) \
$(INST_MAN1DIR) $(DESTINSTALLVENDORMAN1DIR) \
$(INST_MAN3DIR) $(DESTINSTALLVENDORMAN3DIR)
doc_perl_install :: all
$(NOECHO) $(ECHO) Appending installation info to $(DESTINSTALLARCHLIB)/perllocal.pod
-$(NOECHO) $(MKPATH) $(DESTINSTALLARCHLIB)
-$(NOECHO) $(DOC_INSTALL) \
"Module" "$(NAME)" \
"installed into" "$(INSTALLPRIVLIB)" \
LINKTYPE "$(LINKTYPE)" \
VERSION "$(VERSION)" \
EXE_FILES "$(EXE_FILES)" \
>> $(DESTINSTALLARCHLIB)/perllocal.pod
doc_site_install :: all
$(NOECHO) $(ECHO) Appending installation info to $(DESTINSTALLARCHLIB)/perllocal.pod
-$(NOECHO) $(MKPATH) $(DESTINSTALLARCHLIB)
-$(NOECHO) $(DOC_INSTALL) \
"Module" "$(NAME)" \
"installed into" "$(INSTALLSITELIB)" \
LINKTYPE "$(LINKTYPE)" \
VERSION "$(VERSION)" \
EXE_FILES "$(EXE_FILES)" \
>> $(DESTINSTALLARCHLIB)/perllocal.pod
doc_vendor_install :: all
$(NOECHO) $(ECHO) Appending installation info to $(DESTINSTALLARCHLIB)/perllocal.pod
-$(NOECHO) $(MKPATH) $(DESTINSTALLARCHLIB)
-$(NOECHO) $(DOC_INSTALL) \
"Module" "$(NAME)" \
"installed into" "$(INSTALLVENDORLIB)" \
LINKTYPE "$(LINKTYPE)" \
VERSION "$(VERSION)" \
EXE_FILES "$(EXE_FILES)" \
>> $(DESTINSTALLARCHLIB)/perllocal.pod
uninstall :: uninstall_from_$(INSTALLDIRS)dirs
$(NOECHO) $(NOOP)
uninstall_from_perldirs ::
$(NOECHO) $(UNINSTALL) $(PERL_ARCHLIB)/auto/$(FULLEXT)/.packlist
uninstall_from_sitedirs ::
$(NOECHO) $(UNINSTALL) $(SITEARCHEXP)/auto/$(FULLEXT)/.packlist
uninstall_from_vendordirs ::
$(NOECHO) $(UNINSTALL) $(VENDORARCHEXP)/auto/$(FULLEXT)/.packlist
# --- MakeMaker force section:
# Phony target to force checking subdirectories.
FORCE :
$(NOECHO) $(NOOP)
# --- MakeMaker perldepend section:
# --- MakeMaker makefile section:
# We take a very conservative approach here, but it's worth it.
# We move Makefile to Makefile.old here to avoid gnu make looping.
$(FIRST_MAKEFILE) : Makefile.PL $(CONFIGDEP)
$(NOECHO) $(ECHO) "Makefile out-of-date with respect to $?"
$(NOECHO) $(ECHO) "Cleaning current config before rebuilding Makefile..."
-$(NOECHO) $(RM_F) $(MAKEFILE_OLD)
-$(NOECHO) $(MV) $(FIRST_MAKEFILE) $(MAKEFILE_OLD)
- $(MAKE) $(USEMAKEFILE) $(MAKEFILE_OLD) clean $(DEV_NULL)
$(PERLRUN) Makefile.PL
$(NOECHO) $(ECHO) "==> Your Makefile has been rebuilt. <=="
$(NOECHO) $(ECHO) "==> Please rerun the $(MAKE) command. <=="
$(FALSE)
# --- MakeMaker staticmake section:
# --- MakeMaker makeaperl section ---
MAP_TARGET = perl
FULLPERL = /usr/bin/perl
$(MAP_TARGET) :: static $(MAKE_APERL_FILE)
$(MAKE) $(USEMAKEFILE) $(MAKE_APERL_FILE) $@
$(MAKE_APERL_FILE) : $(FIRST_MAKEFILE) pm_to_blib
$(NOECHO) $(ECHO) Writing \"$(MAKE_APERL_FILE)\" for this $(MAP_TARGET)
$(NOECHO) $(PERLRUNINST) \
Makefile.PL DIR= \
MAKEFILE=$(MAKE_APERL_FILE) LINKTYPE=static \
MAKEAPERL=1 NORECURS=1 CCCDLFLAGS=
# --- MakeMaker test section:
TEST_VERBOSE=0
TEST_TYPE=test_$(LINKTYPE)
TEST_FILE = test.pl
TEST_FILES = t/*.t
TESTDB_SW = -d
testdb :: testdb_$(LINKTYPE)
test :: $(TEST_TYPE) subdirs-test
subdirs-test ::
$(NOECHO) $(NOOP)
test_dynamic :: pure_all
PERL_DL_NONLAZY=1 $(FULLPERLRUN) "-MExtUtils::Command::MM" "-e" "test_harness($(TEST_VERBOSE), '$(INST_LIB)', '$(INST_ARCHLIB)')" $(TEST_FILES)
testdb_dynamic :: pure_all
PERL_DL_NONLAZY=1 $(FULLPERLRUN) $(TESTDB_SW) "-I$(INST_LIB)" "-I$(INST_ARCHLIB)" $(TEST_FILE)
test_ : test_dynamic
test_static :: test_dynamic
testdb_static :: testdb_dynamic
# --- MakeMaker ppd section:
# Creates a PPD (Perl Package Description) for a binary distribution.
ppd :
$(NOECHO) $(ECHO) '<SOFTPKG NAME="$(DISTNAME)" VERSION="0.02">' > $(DISTNAME).ppd
$(NOECHO) $(ECHO) ' <ABSTRACT>database of robots.txt-derived permissions.</ABSTRACT>' >> $(DISTNAME).ppd
$(NOECHO) $(ECHO) ' <AUTHOR>Yannick Simon <yannick.simon@gmail.com></AUTHOR>' >> $(DISTNAME).ppd
$(NOECHO) $(ECHO) ' <IMPLEMENTATION>' >> $(DISTNAME).ppd
$(NOECHO) $(ECHO) ' <REQUIRE NAME="Test::More" />' >> $(DISTNAME).ppd
$(NOECHO) $(ECHO) ' <ARCHITECTURE NAME="i686-linux-gnu-thread-multi-5.10" />' >> $(DISTNAME).ppd
$(NOECHO) $(ECHO) ' <CODEBASE HREF="" />' >> $(DISTNAME).ppd
$(NOECHO) $(ECHO) ' </IMPLEMENTATION>' >> $(DISTNAME).ppd
$(NOECHO) $(ECHO) '</SOFTPKG>' >> $(DISTNAME).ppd
# --- MakeMaker pm_to_blib section:
pm_to_blib : $(FIRST_MAKEFILE) $(TO_INST_PM)
$(NOECHO) $(ABSPERLRUN) -MExtUtils::Install -e 'pm_to_blib({@ARGV}, '\''$(INST_LIB)/auto'\'', q[$(PM_FILTER)], '\''$(PERM_DIR)'\'')' -- \
lib/WWW/RobotRules/Extended.pm blib/lib/WWW/RobotRules/Extended.pm
$(NOECHO) $(TOUCH) pm_to_blib
# --- MakeMaker selfdocument section:
# --- MakeMaker postamble section:
# End.
WWW-RobotRules-Extended
The README is used to introduce the module and provide instructions on
how to install the module, any machine dependencies it may have (for
example C compilers and installed libraries) and any other information
that should be provided before the module is installed.
A README file is required for CPAN modules since CPAN extracts the README
file from a module distribution so that people browsing the archive
can use it to get an idea of the module's uses. It is usually a good idea
to provide version information here so that people can decide whether
fixes for the module are worth downloading.
INSTALLATION
To install this module, run the following commands:
perl Makefile.PL
make
make test
make install
SUPPORT AND DOCUMENTATION
After installing, you can find documentation for this module with the
perldoc command.
perldoc WWW::RobotRules::Extended
You can also look for information at:
RT, CPAN's request tracker (report bugs here)
http://rt.cpan.org/NoAuth/Bugs.html?Dist=WWW-RobotRules-Extended
AnnoCPAN, Annotated CPAN documentation
http://annocpan.org/dist/WWW-RobotRules-Extended
CPAN Ratings
http://cpanratings.perl.org/d/WWW-RobotRules-Extended
Search CPAN
http://search.cpan.org/dist/WWW-RobotRules-Extended/
LICENSE AND COPYRIGHT
Copyright (C) 2011 Yannick Simon
This program is free software; you can redistribute it and/or modify it
under the terms of either: the GNU General Public License as published
by the Free Software Foundation; or the Artistic License.
See http://dev.perl.org/licenses/ for more information.
README_FR.htm view on Meta::CPAN
<html>
<head>
<title>Yannick Simon - Information</title>
</head>
<body>
<h1>Yannick Simon</h1>
<p>Voir mon profil Google + <a href="https://plus.google.com/110523371936012102424">Yannick Simon</a><br>
mon profil Twitter <a href="http://twitter.com/ysimonx">@ysimonx</a></p>
<p>Ingenieur informaticien, je suis diplomé de l'<a href="http://www.ensam.fr/">ENSAM</a> (les Arts et métiers)
et pratique l'internet depuis 1995.
</p>
<p>Je fais des outils en "perl" :
<ul>
<li><a href="https://metacpan.org/module/WWW::RobotRules::Extended">WWW::RobotsRules::Extended</a> : qui
permet de verifier son robots.txt comme le fait googlebot : regles "allow", wildcard "*"...</li>.
</ul>
</p>
<p>Je suis actuellement directeur general adjoint de la societe
<a href="http://www.rueducommerce.fr/">RueDuCommerce</a>
societe de commerce en ligne en France, qui vend
des <a href="http://www.rueducommerce.fr/TV-Hifi-Home-Cinema/85-TV-LCD/">tv lcd</a> de grande marque (<a href="http://www.rueducommerce.fr/~samsung">samsung</a>,<a href="http://www.rueducommerce.fr/~lg">LG</a>...),
et nous sommes leaders de la vente d'<a href="http://www.rueducommerce.fr/Ordinateurs/3-Ordinateur-Portable/">ordinateur portable</a>.
</p>
</body>
</head>
blib*
Makefile
Makefile.old
Build
Build.bat
_build*
pm_to_blib*
*.tar.gz
.lwpcookies
cover_db
pod2htm*.tmp
WWW-RobotRules-Extended-*
lib/WWW/RobotRules/Extended.pm view on Meta::CPAN
package WWW::RobotRules::Extended;
use URI;
# The following methods must be provided by the subclass.
sub agent;
sub is_me;
sub visit;
sub no_visits;
sub last_visits;
sub fresh_until;
sub push_rules;
sub clear_rules;
sub rules;
sub dump;
=head1 NAME
WWW::RobotRules::Extended - database of robots.txt-derived permissions.
This is a fork of WWW::RobotRules
You should use WWW::RobotsRules::Extended if you want
to act as Googlebot : Google accept some improvments like "allow" directives
or wildcards "*" into rules
=head1 VERSION
Version 0.02
=cut
our $VERSION = '0.02';
=head1 SYNOPSIS
Quick summary of what the module does.
use WWW::RobotRules::Extended;
use LWP::Simple qw(get);
my $rules = WWW::RobotRules::Extended->new('MOMspider/1.0');
{
my $url = "http://some.place/robots.txt";
my $robots_txt = get $url;
$rules->parse($url, $robots_txt) if defined $robots_txt;
}
{
my $url = "http://some.other.place/robots.txt";
my $robots_txt = get $url;
$rules->parse($url, $robots_txt) if defined $robots_txt;
}
# Now we can check if a URL is valid for those servers
# whose "robots.txt" files we've gotten and parsed:
if($rules->allowed($url)) {
$c = get $url;
...
}
=head1 DESCRIPTION
This module parses F</robots.txt> files as specified in
"A Standard for Robot Exclusion", at
<http://www.robotstxt.org/wc/norobots.html>
It also parses rules that contains wildcards '*' and allow directives
like Google does.
Webmasters can use the F</robots.txt> file to forbid conforming
robots from accessing parts of their web site.
The parsed files are kept in a WWW::RobotRules::Extended object, and this object
provides methods to check if access to a given URL is prohibited. The
same WWW::RobotRules::Extended object can be used for one or more parsed
F</robots.txt> files on any number of hosts.
=head1 EXPORT
A list of functions that can be exported. You can delete this section
if you don't export anything, such as for a purely object-oriented module.
=head2 new
This is the constructor for WWW::RobotRules::Extended objects. The first
argument given to new() is the name of the robot.
=cut
sub new {
my($class, $ua) = @_;
# This ugly hack is needed to ensure backwards compatibility.
# The "WWW::RobotRules::Extended" class is now really abstract.
$class = "WWW::RobotRules::Extended::InCore" if $class eq "WWW::RobotRules::Extended";
my $self = bless { }, $class;
$self->agent($ua);
$self;
}
=head2 parse
The parse() method takes as arguments the URL that was used to
retrieve the F</robots.txt> file, and the contents of the file.
$rules->allowed($uri)
Returns TRUE if this robot is allowed to retrieve this URL.
=cut
sub parse {
my($self, $robot_txt_uri, $txt, $fresh_until) = @_;
$robot_txt_uri = URI->new("$robot_txt_uri");
my $netloc = $robot_txt_uri->host . ":" . $robot_txt_uri->port;
$self->clear_rules($netloc);
$self->fresh_until($netloc, $fresh_until || (time + 365*24*3600));
my $ua;
my $is_me = 0; # 1 iff this record is for me
my $is_anon = 0; # 1 iff this record is for *
my @me_alloweddisallowed = (); # rules allowed or disallowed for me
my @anon_alloweddisallowed = (); # rules allowed or disallowed for *
# blank lines are significant, so turn CRLF into LF to avoid generating
# false ones
$txt =~ s/\015\012/\012/g;
# split at \012 (LF) or \015 (CR) (Mac text files have just CR for EOL)
for(split(/[\012\015]/, $txt)) {
# Lines containing only a comment are discarded completely, and
# therefore do not indicate a record boundary.
next if /^\s*\#/;
s/\s*\#.*//; # remove comments at end-of-line
if (/^\s*User-Agent\s*:\s*(.*)/i) {
$ua = $1;
$ua =~ s/\s+$//;
if ($ua eq '*') { # if it's directive for all bots
$is_anon = 1;
}
elsif($self->is_me($ua)) { #if it's directives for this bot
$is_me = 1;
}
else {
$is_me = 0;
$is_anon = 0;
}
}
elsif (/^\s*(Disallow|Allow)\s*:\s*(.*)/i) {
unless (defined $ua) {
warn "RobotRules <$robot_txt_uri>: Disallow without preceding User-agent\n" if $^W;
$is_anon = 1; # assume that User-agent: * was intended
}
my $verb = $1;
my $allowdisallow = $2;
$allowdisallow =~ s/\s+$//;
if (length $allowdisallow) {
my $ignore;
eval {
my $u = URI->new_abs($allowdisallow, $robot_txt_uri);
$ignore++ if $u->scheme ne $robot_txt_uri->scheme;
$ignore++ if lc($u->host) ne lc($robot_txt_uri->host);
$ignore++ if $u->port ne $robot_txt_uri->port;
$allowdisallow = $u->path_query;
$allowdisallow = "/" unless length $allowdisallow;
};
next if $@;
next if $ignore;
}
# transform rules into regexp
# for instance : /shared/* => ^\/shared\/.*
my $rule = "^".$allowdisallow;
$rule=~ s/\//\\\//g;
$rule=~ s/\*/\.*/g;
$rule=~ s/\[/\\[/g;
$rule=~ s/\]/\\]/g;
$rule=~ s/\?/\\?/g;
$rule=~ s/\./\\./g;
if (length $allowdisallow) {
if ($is_me) {
push(@me_alloweddisallowed, $verb." ".$rule);
}
elsif ($is_anon) {
push(@anon_alloweddisallowed, $verb." ".$rule);
}
}
}
elsif (/\S\s*:/) {
# ignore
}
else {
warn "RobotRules <$robot_txt_uri>: Malformed record: <$_>\n" if $^W;
}
}
if ($is_me) {
$self->push_rules($netloc, @me_alloweddisallowed);
}
else {
$self->push_rules($netloc, @anon_alloweddisallowed);
}
}
=head2 is_me
=cut
=head2 allowed
Returns TRUE if this robot is allowed to retrieve this URL.
=cut
sub allowed {
my($self, $uri) = @_;
$uri = URI->new("$uri");
return 1 unless $uri->scheme eq 'http' or $uri->scheme eq 'https';
# Robots.txt applies to only those schemes.
my $netloc = $uri->host . ":" . $uri->port;
my $fresh_until = $self->fresh_until($netloc);
return -1 if !defined($fresh_until) || $fresh_until < time;
my $str = $uri->path_query;
my $rule;
my $verb;
my $rline;
my $result=1; # by default, all is allowed
for $rline ($self->rules($netloc)) {
if ($rline =~ /^(Disallow|Allow)\s*(.*)/i) {
$verb=lc($1);
$rule=$2;
if ($str =~ /$rule/) {
if ($verb eq "allow") { # here, the rule allows, so i return now
return 1;
};
if ($verb eq "disallow") { # here, the rule is disallowed, but we need to verify further
# if another "allow" rule is present for this url
$result=0;
}
}
$rule="";
}
}
return $result; # the rules have all been verified, if there is a matching disallow rule, $result should be 0
}
=head1 SUBROUTINES/METHODS
=head1 AUTHOR
Yannick Simon, C<< <yannick.simon at gmail.com> >>
=head1 BUGS
Please report any bugs or feature requests to C<bug-www-robotrules-extended at rt.cpan.org>, or through
the web interface at L<http://rt.cpan.org/NoAuth/ReportBug.html?Queue=WWW-RobotRules-Extended>. I will be notified, and then you'll
automatically be notified of progress on your bug as I make changes.
=head1 SUPPORT
You can find documentation for this module with the perldoc command.
perldoc WWW::RobotRules::Extended
You can also look for information at:
=over 4
=item * RT: CPAN's request tracker (report bugs here)
L<http://rt.cpan.org/NoAuth/Bugs.html?Dist=WWW-RobotRules-Extended>
=item * AnnoCPAN: Annotated CPAN documentation
L<http://annocpan.org/dist/WWW-RobotRules-Extended>
=item * CPAN Ratings
L<http://cpanratings.perl.org/d/WWW-RobotRules-Extended>
=item * Search CPAN
L<http://search.cpan.org/dist/WWW-RobotRules-Extended/>
=back
=head1 Extended ROBOTS.TXT EXAMPLES
The following example "/robots.txt" file specifies that no robots
should visit any URL starting with "/cyberworld/map/" or "/tmp/":
User-agent: *
Disallow: /cyberworld/map/ # This is an infinite virtual URL space
Disallow: /tmp/ # these will soon disappear
This example "/robots.txt" file specifies that no robots should visit
any URL starting with "/cyberworld/map/", except the robot called
"cybermapper":
User-agent: *
Disallow: /cyberworld/map/ # This is an infinite virtual URL space
# Cybermapper knows where to go.
User-agent: cybermapper
Disallow:
This example indicates that no robots should visit this site further:
# go away
User-agent: *
Disallow: /
This is an example of a malformed robots.txt file.
# robots.txt for ancientcastle.example.com
# I've locked myself away.
User-agent: *
Disallow: /
# The castle is your home now, so you can go anywhere you like.
User-agent: Belle
Disallow: /west-wing/ # except the west wing!
# It's good to be the Prince...
User-agent: Beast
Disallow:
This file is missing the required blank lines between records.
However, the intention is clear.
This is an example of an extended robots.txt file
tou have a real example of this kind of rules on http://www.google.com/robots.txt
# Block every url that contains &p=
User-agent: *
Disallow: /*&p=
This is an example of an extended robots.txt file.
# Block every url but the ones that begin with /shared
User-agent: *
Disallow: /
Allow: /shared/
=head1 SEE ALSO
L<LWP::RobotUA>, L<WWW::RobotRules::AnyDBM_File>, L<WWW::RobotRules>
=head1 ACKNOWLEDGEMENTS
=head1 LICENSE AND COPYRIGHT
Copyright 2011, Yannick Simon
Copyright 1995-2009, Gisle Aas
Copyright 1995, Martijn Koster
This program is free software; you can redistribute it and/or modify it
under the terms of either: the GNU General Public License as published
by the Free Software Foundation; or the Artistic License.
See http://dev.perl.org/licenses/ for more information.
=cut
1; # End of WWW::RobotRules::Extended
package WWW::RobotRules::Extended::InCore;
use vars qw(@ISA);
@ISA = qw(WWW::RobotRules::Extended);
sub is_me {
my($self, $ua_line) = @_;
my $me = $self->agent;
# See whether my short-name is a substring of the
# "User-Agent: ..." line that we were passed:
if(index(lc($me), lc($ua_line)) >= 0) {
return 1;
}
else {
return '';
}
}
sub agent {
my ($self, $name) = @_;
my $old = $self->{'ua'};
if ($name) {
# Strip it so that it's just the short name.
# I.e., "FooBot" => "FooBot"
# "FooBot/1.2" => "FooBot"
# "FooBot/1.2 [http://foobot.int; foo@bot.int]" => "FooBot"
$name = $1 if $name =~ m/(\S+)/; # get first word
$name =~ s!/.*!!; # get rid of version
unless ($old && $old eq $name) {
delete $self->{'loc'}; # all old info is now stale
$self->{'ua'} = $name;
}
}
$old;
}
sub visit {
my($self, $netloc, $time) = @_;
return unless $netloc;
$time ||= time;
$self->{'loc'}{$netloc}{'last'} = $time;
my $count = \$self->{'loc'}{$netloc}{'count'};
if (!defined $$count) {
$$count = 1;
}
else {
$$count++;
}
}
sub no_visits {
my ($self, $netloc) = @_;
$self->{'loc'}{$netloc}{'count'};
}
sub last_visit {
my ($self, $netloc) = @_;
$self->{'loc'}{$netloc}{'last'};
}
sub fresh_until {
my ($self, $netloc, $fresh_until) = @_;
my $old = $self->{'loc'}{$netloc}{'fresh'};
if (defined $fresh_until) {
$self->{'loc'}{$netloc}{'fresh'} = $fresh_until;
}
$old;
}
sub push_rules {
my($self, $netloc, @rules) = @_;
push (@{$self->{'loc'}{$netloc}{'rules'}}, @rules);
}
sub clear_rules {
my($self, $netloc) = @_;
delete $self->{'loc'}{$netloc}{'rules'};
}
sub rules {
my($self, $netloc) = @_;
if (defined $self->{'loc'}{$netloc}{'rules'}) {
return @{$self->{'loc'}{$netloc}{'rules'}};
}
else {
return ();
}
}
sub dump
{
my $self = shift;
for (keys %$self) {
next if $_ eq 'loc';
print "$_ = $self->{$_}\n";
}
for (keys %{$self->{'loc'}}) {
my @rules = $self->rules($_);
print "$_: ", join("; ", @rules), "\n";
}
}
t/00-load.t view on Meta::CPAN
#!perl -T
use Test::More tests => 1;
BEGIN {
use_ok( 'WWW::RobotRules::Extended' ) || print "Bail out!\n";
}
diag( "Testing WWW::RobotRules::Extended $WWW::RobotRules::Extended::VERSION, Perl $], $^X" );
t/boilerplate.t view on Meta::CPAN
#!perl -T
use 5.006;
use strict;
use warnings;
use Test::More tests => 3;
sub not_in_file_ok {
my ($filename, %regex) = @_;
open( my $fh, '<', $filename )
or die "couldn't open $filename for reading: $!";
my %violated;
while (my $line = <$fh>) {
while (my ($desc, $regex) = each %regex) {
if ($line =~ $regex) {
push @{$violated{$desc}||=[]}, $.;
}
}
}
if (%violated) {
fail("$filename contains boilerplate text");
diag "$_ appears on lines @{$violated{$_}}" for keys %violated;
} else {
pass("$filename contains no boilerplate text");
}
}
sub module_boilerplate_ok {
my ($module) = @_;
not_in_file_ok($module =>
'the great new $MODULENAME' => qr/ - The great new /,
'boilerplate description' => qr/Quick summary of what the module/,
'stub function definition' => qr/function[12]/,
);
}
TODO: {
local $TODO = "Need to replace the boilerplate text";
not_in_file_ok(README =>
"The README is used..." => qr/The README is used/,
"'version information here'" => qr/to provide version information/,
);
not_in_file_ok(Changes =>
"placeholder date/time" => qr(Date/time)
);
module_boilerplate_ok('lib/WWW/RobotRules/Extended.pm');
}
t/manifest.t view on Meta::CPAN
#!perl -T
use strict;
use warnings;
use Test::More;
unless ( $ENV{RELEASE_TESTING} ) {
plan( skip_all => "Author tests not required for installation" );
}
eval "use Test::CheckManifest 0.9";
plan skip_all => "Test::CheckManifest 0.9 required" if $@;
ok_manifest();
t/pod-coverage.t view on Meta::CPAN
use strict;
use warnings;
use Test::More;
# Ensure a recent version of Test::Pod::Coverage
my $min_tpc = 1.08;
eval "use Test::Pod::Coverage $min_tpc";
plan skip_all => "Test::Pod::Coverage $min_tpc required for testing POD coverage"
if $@;
# Test::Pod::Coverage doesn't require a minimum Pod::Coverage version,
# but older versions don't recognize some common documentation styles
my $min_pc = 0.18;
eval "use Pod::Coverage $min_pc";
plan skip_all => "Pod::Coverage $min_pc required for testing POD coverage"
if $@;
all_pod_coverage_ok();
#!perl -T
use strict;
use warnings;
use Test::More;
# Ensure a recent version of Test::Pod
my $min_tp = 1.22;
eval "use Test::Pod $min_tp";
plan skip_all => "Test::Pod $min_tp required for testing POD" if $@;
all_pod_files_ok();
( run in 0.408 second using v1.01-cache-2.11-cpan-e9daa2b36ef )