Catalyst-Plugin-HTML-Scrubber

 view release on metacpan or  search on metacpan

lib/Catalyst/Plugin/HTML/Scrubber.pm  view on Meta::CPAN

package Catalyst::Plugin::HTML::Scrubber;
$Catalyst::Plugin::HTML::Scrubber::VERSION = '0.09';
use Moose;
use namespace::autoclean;

with 'Catalyst::ClassData';

use MRO::Compat;
use HTML::Scrubber;

__PACKAGE__->mk_classdata('_scrubber');

sub setup {
    my $c = shift;

    my $conf = $c->config->{scrubber};
    if (ref $conf eq 'ARRAY') {
        $c->_scrubber(HTML::Scrubber->new(@$conf));
    } elsif (ref $conf eq 'HASH') {
        $c->config->{scrubber}{auto} = 1
            unless defined $c->config->{scrubber}{auto};
        $c->_scrubber(HTML::Scrubber->new(@{$conf->{params}}));
    } else {
        $c->_scrubber(HTML::Scrubber->new());
    }

    return $c->maybe::next::method(@_);
}

sub execute {
    my $c = shift;

    $c->maybe::next::method(@_);

    my $conf = $c->config->{scrubber};

    # There are two ways to configure the plugin, it seems; giving a hashref
    # of params under `scrubber`, with any params intended for HTML::Scrubber
    # under the vaguely-named `params` key, or an arrayref of params intended
    # to be passed straight to HTML::Scrubber - save html_scrub() from knowing
    # about that by abstracting that nastyness away:
    if (ref $conf ne 'HASH' || $conf->{auto}) {
        $c->html_scrub(ref($conf) eq 'HASH' ? $conf : {});
    }
}

sub html_scrub {
    my ($c, $conf) = @_;

    # Firstly, if an entry in ignore_urls matches, then we don't want to
    # scrub anything for this request...
    return if ($c->_req_path_exempt_from_scrubbing($conf));

    # If there's body_data - for e.g. a POSTed JSON body that was decoded -
    # then we need to walk through it, scrubbing as appropriate; don't call
    # body_data unless the content type is one there's a data handler for
    # though, otherwise we'll trigger an exception (see GH#4)
    if (exists $c->req->data_handlers->{ $c->req->content_type }) {
        if (my $body_data = $c->request->body_data) {
            $c->_scrub_recurse($conf, $c->request->body_data);
        }
    }

    # And if Catalyst::Controller::REST is in use so we have $req->data,
    # then scrub that too
    if ($c->request->can('data')) {
        my $data = $c->request->data;
        if ($data) {
            $c->_scrub_recurse($conf, $c->request->data);
        }
    }

    # Normal query/POST body parameters:
    $c->_scrub_recurse($conf, $c->request->parameters);

}

# Recursively scrub param values...
sub _scrub_recurse {
    my ($c, $conf, $data) = @_;

    # If the thing we've got is a hashref, walk over its keys, checking
    # whether we should ignore, otherwise, do the needful
    if (ref $data eq 'HASH') {
        for my $key (keys %$data) {
            if (!$c->_should_scrub_param($conf, $key, $data->{$key})) {
                next;
            }

            # OK, it's fine to fettle with this key - if its value is
            # a ref, recurse, otherwise, scrub
            if (my $ref = ref $data->{$key}) {
                $c->_scrub_recurse($conf, $data->{$key})
                    if defined $data->{$key};
            } else {
                # Alright, non-ref value, so scrub it
                # FIXME why did we have to have this ref-ref handling fun?
                #$_ = $c->_scrubber->scrub($_) for (ref($$value) ? @{$$value} : $$value);
                $data->{$key} = $c->_scrub_value($conf, $data->{$key})
                    if defined $data->{$key};
            }
        }
    } elsif (ref $data eq 'ARRAY') {
        for (@$data) {
            if (ref $_) {
                $c->_scrub_recurse($conf, $_);
            } else {
                $_ = $c->_scrub_value($conf, $_) if defined $_;
            }
        }
    } elsif (ref $data eq 'CODE') {
        $c->log->debug("Can't scrub a coderef!");
    } else {
        # Note that at this point we don't know what the param was called



( run in 0.559 second using v1.01-cache-2.11-cpan-524268b4103 )