HTML-Copy

 view release on metacpan or  search on metacpan

lib/HTML/Copy.pm  view on Meta::CPAN

sub encoding {
    my ($self) = @_;
    if ($self->{'encoding'}) {
        return $self->{'encoding'};
    }
    my $in = $self->source_handle;
    my $data = do {local $/; <$in>;};
    my $p = HTML::HeadParser->new;
    $p->utf8_mode(1);
    $p->parse($data);
    my $content_type = $p->header('content-type');
    my $encoding = '';
    if ($content_type) {
        if ($content_type =~ /charset\s*=(.+)/) {
            $encoding = $1;
        }
    }
    
    unless ($encoding) {
        my $decoder;
        if (my @suspects = $self->encode_suspects) {

t/parse.t  view on Meta::CPAN

    read_and_unlink($destination, $p);
};

ok($copy_html eq $result_html_nocharset, "copy_to no charset shift_jis");

##== HTML with charset uft-8
my $src_html_utf8 = encode_utf8(<<EOT);
<!DOCTYPE html>
<html>
<head>
<meta http-equiv="content-type" content="text/html;charset=utf-8">
</head>
<body>
ああ
<a href="$linked_file_name"></a>
<frame src="$linked_file_name">
<img src="$linked_file_name">
<script src="$linked_file_name"></script>
<link href="$linked_file_name">
</body>
</html>
EOT

my $result_html_utf8 = encode_utf8(<<EOT);
<!DOCTYPE html>
<html>
<head>
<meta http-equiv="content-type" content="text/html;charset=utf-8">
</head>
<body>
ああ
<a href="../$linked_file_name"></a>
<frame src="../$linked_file_name">
<img src="../$linked_file_name">
<script src="../$linked_file_name"></script>
<link href="../$linked_file_name">
</body>
</html>

t/parse.t  view on Meta::CPAN

    read_and_unlink($destination, $p);
};

ok($copy_html eq $result_html_utf8, "copy_to with a directory destination");

##== HTML with charset shift_jis
my $src_html_shiftjis = <<EOT;
<!DOCTYPE html>
<html>
<head>
<meta http-equiv="content-type" content="text/html;charset=shift_jis">
</head>
ああ
<a href="$linked_file_name"></a>
<frame src="$linked_file_name">
<img src="$linked_file_name">
<script src="$linked_file_name"></script>
<link href="$linked_file_name">
</html>
EOT

my $result_html_shiftjis = <<EOT;
<!DOCTYPE html>
<html>
<head>
<meta http-equiv="content-type" content="text/html;charset=shift_jis">
</head>
ああ
<a href="../$linked_file_name"></a>
<frame src="../$linked_file_name">
<img src="../$linked_file_name">
<script src="../$linked_file_name"></script>
<link href="../$linked_file_name">
</html>
EOT

t/parse.t  view on Meta::CPAN


ok($copy_html eq $result_html_shiftjis, "htmlcopy");

unlink($destination);

##== Test with base url
my $src_html_base = <<EOT;
<!DOCTYPE html>
<html>
<head>
<meta http-equiv="content-type" content="text/html;charset=utf-8">
<base href="http://homepage.mac.com/tkurita/scriptfactory/">
</head>
ああ
<a href="$linked_file_name"></a>
<frame src="$linked_file_name">
<img src="$linked_file_name">
<script src="$linked_file_name"></script>
<link href="$linked_file_name">
</html>
EOT



( run in 1.392 second using v1.01-cache-2.11-cpan-524268b4103 )