Mojo-DOM-Role-Restrict
view release on metacpan or search on metacpan
lib/Mojo/DOM/Role/Restrict.pm view on Meta::CPAN
my $type = $tree->[0];
if ($type eq 'tag') {
# Start tag
my ($tag, $attrs) = _valid_tag($spec, $tree->[1], {%{$tree->[2]}});
return '' unless $tag;
my $result = "<$tag";
# Attributes
for (sort keys %{$attrs}) {
my ($key, $value) = _valid_attribute($spec, $tag, $_, $attrs->{$_});
$result .= defined $value
? qq{ $key="} . xml_escape($value) . '"'
: $xml
? qq{ $key="$key"}
: " $key"
if $key;
}
# No children
return $xml ? "$result />" : $EMPTY{$tag} ? "$result>" : "$result></$tag>" unless $tree->[4];
# Children
no warnings 'recursion';
$result .= '>' . join '', map { _render($_, $xml, $spec) } @$tree[4 .. $#$tree];
# End tag
return "$result</$tag>";
}
# Text (escaped)
return xml_escape $tree->[1] if $type eq 'text';
# Raw text
return $tree->[1] if $type eq 'raw';
# Root
return join '', map { _render($_, $xml, $spec) } @$tree[1 .. $#$tree] if $type eq 'root';
# DOCTYPE
return '<!DOCTYPE' . $tree->[1] . '>' if $type eq 'doctype';
# Comment
return '<!--' . $tree->[1] . '-->' if $type eq 'comment';
# CDATA
return '<![CDATA[' . $tree->[1] . ']]>' if $type eq 'cdata';
# Processing instruction
return '<?' . $tree->[1] . '?>' if $type eq 'pi';
# Everything else
return '';
}
sub _valid_tag {
my ($spec, $tag, $attrs) = @_;
my $valid = $spec->{$tag} // $spec->{'*'};
return ref $valid && $valid->{validate_tag}
? $valid->{validate_tag}($tag, $attrs)
: $valid
? ($tag, $attrs)
: 0;
}
sub _valid_attribute {
my ($spec, $tag, $attr, $value) = @_;
my $valid = $spec->{$tag}->{$attr} // $spec->{$tag}->{'*'} // $spec->{'*'}->{$attr} // $spec->{'*'}->{'*'};
return ref $valid
? $valid->($attr, $value)
: ($valid and $valid =~ m/1/ || $value =~ m/$valid/)
? ( $attr, $value )
: 0;
}
sub _valid {
my ($tree, $spec) = @_;
if ($tree->[0] eq 'tag') {
my ($tag, $attrs) = _valid_tag($spec, $tree->[1], {%{$tree->[2]}});
return 0 unless $tag;
_valid_attribute($spec, $tag, $_, $attrs->{$_}) or return 0
for (sort keys %{$attrs});
if ($tree->[4]) {
_valid($_, $spec) or return 0 for ( @$tree[4 .. $#$tree] );
}
} elsif ($tree->[0] eq 'root') {
_valid($_, $spec) or return 0 for ( @$tree[1 .. $#$tree] );
}
return 1;
}
sub _restrict {
my ($tree, $spec) = @_;
if ($tree->[0] eq 'tag') {
my ($tag, $attrs) = _valid_tag($spec, $tree->[1], $tree->[2]);
return 0 unless $tag;
$tree->[1] = $tag;
for (sort keys %{$attrs}) {
my ($key, $value) = _valid_attribute($spec, $tag, $_, delete $attrs->{$_});
$attrs->{$key} = $value if $key;
}
if ($tree->[4]) {
my $i = 4;
_restrict($_, $spec) ? $i++ : splice(@{$tree}, $i, 1)
for ( @$tree[$i .. $#$tree] );
}
} elsif ($tree->[0] eq 'root') {
my $i = 1;
_restrict($_, $spec) ? $i++ : splice(@{$tree}, $i, 1)
for ( @$tree[$i .. $#$tree] );
}
return 1;
}
1;
# TODO pretty print (for diff) and minmize.
__END__
=encoding UTF-8
=head1 NAME
Mojo::DOM::Role::Restrict - Restrict tags and attributes
=head1 VERSION
Version 0.06
=cut
=head1 SYNOPSIS
use Mojo::DOM;
my $html = q|<html><head><script>...</script></head><body><p class="okay" id="allow" onclick="not-allow">Restrict <span class="not-okay">HTML</span></p></body></html>|;
my $spec = {
script => 0, # remove all script tags
'*' => { #Â apply to all tags
'*' => 1, # allow all attributes by default
'onclick' => 0 # disable onclick attributes
},
span => {
class => 0 # disable class attributes on span's
}
};
#<html><head></head><body><p class="okay" id="allow">Restrict <span>HTML</span></p></body></html>
print Mojo::DOM->with_roles('+Restrict')->new($html, $spec);
.....
my $dom = Mojo::DOM->with_roles('+Restrict')->new;
my $html = q|<html><head><script>...</script></head><body><p class="okay" id="allow" onclick="not-allow">Restrict <span class="not-okay">HTML</span></p></body></html>|;
my $spec = {
script => 0, # no script tags
'*' => { # allow all tags
'*' => 1, # allow all attributes
onclick => sub { 0 }, # disable onclick attributes
id => sub { return @_ }, # enable id attributes
class => sub { # allow only 1 class 'okay'
my ($attr, $val) = @_;
my $match = $val =~ m/^okay$/;
return $match ? ($attr, $val) : 0;
}
},
span => {
validate_tag => sub { # replace span tags with b tags
return ('b', $_[1]);
}
},
p => {
validate_tag => sub {
$_[1]->{id} = "prefixed-" . $_[1]->{id}; # prefix all p tag IDs
$_[1]->{'data-unknown'} = 'abc'; # extend all p tags with a data-unknown attribute
return @_;
}
},
};
$dom->parse($html, $spec);
# <html><head></head><body><p class="okay" data-unknown="abc" id="prefixed-allow">Restrict <b>HTML</b></p></body></html>
$dom->to_string;
# you can change the spec and then re-render
$spec = {
'*' => { # allow all tags
'*' => '^not', # where any attr value matches the regex
},
};
$dom->restrict_spec($spec);
# <html><head><script>...</script></head><body><p onclick="not-allow">Restrict <span class="not-okay">HTML</span></p></body></html>
$dom->to_string;
# check whether the spec is valid
$dom->valid; # 0
# apply spec changess to the Mojo::DOM object
$dom->restrict;
# re-check whether the spec is valid
$dom->valid; # 1
# render using original render function (Mojo::DOM::HTML::render)
# <html><head><script>...</script></head><body><p onclick="not-allow">Restrict <span class="not-okay">HTML</span></p></body></html>
$dom->to_string(1);
$dom->parse(q|<p class="okay" data-unknown="abc" id="prefixed-allow" onclick="not-allow">Restrict <span class="not-okay">HTML</span></p>|);
# <p onclick="not-allow">Restrict <span class="not-okay">HTML</span></p>
$dom->to_string;
=head1 SUBROUTINES/METHODS
=head2 restrict_spec
Retrieve/Set the specification used to restrict the HTML.
my $spec = $self->restrict_spec;
$dom->restrict_spec($spec);
=cut
=head2 valid
Validate the current DOM against the specification. Returns true(1) if valud returns false(0) if invalid.
my $html = q|<html><head><script>...</script></head><body><p class="okay" id="allow" onclick="not-allow">Restrict <span class="not-okay">HTML</span></p></body></html>|;
my $spec = {
html => 1,
head => 1,
script => 1,
body => 1,
p => 1,
span => 1
};
my $dom = Mojo::DOM->with_roles('+Restrict')->new($html, $spec);
$dom->valid; # 1;
$spec = {
html => 1,
head => 1,
script => 1,
body => 1,
p => 1,
span => 0
};
$dom->valid($spec); # 0;
=cut
=head2 restrict
Restrict the current DOM against the specification, after calling restrict the specification changes applied become irreversible.
my $html = q|<html><head><script>...</script></head><body><p class="okay" id="allow" onclick="not-allow">Restrict <span class="not-okay">HTML</span></p></body></html>|;
my $spec = {
script => 0, # no script tags
'*' => { # allow all tags
'*' => 1, # allow all attributes
onclick => sub { 0 }, # disable onclick attributes
id => sub { return @_ }, # enable id attributes
class => sub { # allow only 1 class 'okay'
my ($attr, $val) = @_;
my $match = $val =~ m/^okay$/;
return $match ? ($attr, $val) : 0;
}
},
span => {
validate_tag => sub { # replace span tags with b tags
return ('b', $_[1]);
}
},
p => {
validate_tag => sub {
$_[1]->{id} = "prefixed-" . $_[1]->{id}; # prefix all p tag IDs
$_[1]->{'data-unknown'} = 'abc'; # extend all p tags with a data-unknown attribute
return @_;
}
},
};
$dom->parse($html, $spec);
# render without spec validation
# <html><head><script>...</script></head><body><p class="okay" id="allow" onclick="not-allow">Restrict <span class="not-okay">HTML</span></p></body></html>
$dom->to_string(1);
# restrict the DOM
$dom->restrict;
# render without spec validation
# <html><head></head><body><p class="okay" data-unknown="abc" id="prefixed-allow">Restrict <b>HTML</b></p></body></html>
$dom->to_string(1);
=cut
=head2 diff
Perform a diff comparing the original HTML and the restricted HTML.
my $html = q|<html>
<head>
<script>...</script>
</head>
<body>
<p class="okay" id="allow" onclick="not-allow">
Restrict
<span class="not-okay">HTML</span>
</p>
</body>
</html>|;
my $spec = {
script => 0, # remove all script tags
'*' => { #Â apply to all tags
'*' => 1, # allow all attributes by default
'onclick' => 0 # disable onclick attributes
},
span => {
class => 0 # disable class attributes on span's
}
};
my $dom = Mojo::DOM->with_roles('+Restrict')->new($html, $spec);
#@@ -1,11 +1,11 @@
# <html>
# <head>
#- <script>...</script>
#+
# </head>
# <body>
#- <p class="okay" id="allow" onclick="not-allow">
#+ <p class="okay" id="allow">
( run in 1.678 second using v1.01-cache-2.11-cpan-d06a3f9ecfd )