cdata results from the CPAN

cdata

XML-Writer

view release on metacpan or search on metacpan

};

# Exercise nesting and namespaces
TEST: {
	initEnv(DATA_MODE => 1, DATA_INDENT => 1);
	$w->startTag(['a', 'element']);
	$w->startTag(['a', 'element']);
	$w->startTag(['b', 'element']);
	$w->startTag(['b', 'element']);
	$w->startTag(['c', 'element']);
	$w->startTag(['d', 'element']);
	$w->endTag(['d', 'element']);
	$w->startTag(['d', 'element']);
	$w->endTag(['d', 'element']);
	$w->endTag(['c', 'element']);
	$w->endTag(['b', 'element']);
	$w->endTag(['b', 'element']);
	$w->endTag(['a', 'element']);
	$w->endTag(['a', 'element']);
	$w->end();

	checkResult(<<"EOS", "Deep-nesting, to exercise prefix management");
<__NS1:element xmlns:__NS1="a">
 <__NS1:element>
  <__NS2:element xmlns:__NS2="b">
   <__NS2:element>
    <__NS3:element xmlns:__NS3="c">
     <__NS4:element xmlns:__NS4="d"></__NS4:element>
     <__NS4:element xmlns:__NS4="d"></__NS4:element>
    </__NS3:element>
   </__NS2:element>
  </__NS2:element>
 </__NS1:element>
</__NS1:element>
EOS
};

# Raw output.
TEST: {
	initEnv(UNSAFE => 1);
	$w->startTag("foo");
	$w->raw("<bar/>");
	$w->endTag("foo");
	$w->end();
	checkResult("<foo><bar/></foo>\n", 'raw() should pass text through without escaping it');
};

# Attempting raw output in safe mode
TEST: {
	initEnv();
	$w->startTag("foo");
	expectError('raw\(\) is only available when UNSAFE is set', eval {
		$w->raw("<bar/>");
	});
}

# Inserting a CDATA section.
TEST: {
	initEnv();
	$w->startTag("foo");
	$w->cdata("cdata testing - test");
	$w->endTag("foo");
	$w->end();
	checkResult("<foo><![CDATA[cdata testing - test]]></foo>\n",
		'cdata() should create CDATA sections');
};

# Inserting CDATA containing CDATA delimeters ']]>'.
TEST: {
	initEnv();
	$w->startTag("foo");
	$w->cdata("This is a CDATA section <![CDATA[text]]>");
	$w->endTag("foo");
	$w->end();
	checkResult("<foo><![CDATA[This is a CDATA section <![CDATA[text]]]]><![CDATA[>]]></foo>\n", 'If a CDATA section would be invalid, it should be split up');
};

# cdataElement().
TEST: {
	initEnv();
	$w->cdataElement("foo", "hello", a => 'b');
	$w->end();
	checkResult(qq'<foo a="b"><![CDATA[hello]]></foo>\n',
		'cdataElement should produce a valid element containing a CDATA section');
};

# Verify that writing characters using CDATA outside of an element fails
TEST: {
	initEnv();
	expectError('Attempt to insert characters outside of document element',
		eval {
			$w->cdata('Test');
		});
};

# Expect to break on mixed content in data mode
TEST: {
	initEnv();
	$w->setDataMode(1);
	$w->startTag('x');
	$w->cdata('Text');
	expectError("Mixed content not allowed in data mode: element x", eval {
		$w->startTag('x');
	});
};

# Break with mixed content when the element is written before the characters
TEST: {
	initEnv();
	$w->setDataMode(1);
	$w->startTag('x');
	$w->emptyTag('empty');
	expectError("Mixed content not allowed in data mode: characters", eval {
		$w->cdata('Text');
	});
};

# Make sure addPrefix-caused clashes are resolved
TEST: {
	initEnv();

	$w->addPrefix('a', '');
	$w->addPrefix('b', '');

	$w->startTag(['a', 'doc']);
	$w->emptyTag(['b', 'elem']);
	$w->endTag(['a', 'doc']);
	$w->end();

	checkResult(<<"EOS", 'Later addPrefix()s should override earlier ones');
<__NS1:doc xmlns:__NS1="a"><elem xmlns="b" /></__NS1:doc>
EOS
};

# addPrefix should work in the middle of a document
TEST: {
	initEnv();

	$w->addPrefix('a', '');
	$w->startTag(['a', 'doc']);

	$w->addPrefix('b', '');
	$w->emptyTag(['b', 'elem']);
	$w->endTag(['a', 'doc']);
	$w->end();

	checkResult(<<"EOS", 'addPrefix should work in the middle of a document');
<doc xmlns="a"><elem xmlns="b" /></doc>
EOS
};

# Verify changing the default namespace
TEST: {
	initEnv(
		DATA_MODE => 1,
		DATA_INDENT => 1
	);

	$w->addPrefix('a', '');

	$w->startTag(['a', 'doc']);

	$w->startTag(['b', 'elem1']);
	$w->emptyTag(['b', 'elem1']);
	$w->emptyTag(['a', 'elem2']);
	$w->endTag(['b', 'elem1']);
	
	$w->addPrefix('b', '');

	$w->startTag(['b', 'elem1']);
	$w->emptyTag(['b', 'elem1']);
	$w->emptyTag(['a', 'elem2']);
	$w->endTag(['b', 'elem1']);

t/01_main.t view on Meta::CPAN

	initEnv(PREFIX_MAP => {'uri:test', ''},
		FORCED_NS_DECLS => ['uri:test']
	);

	$w->emptyTag(['uri:test2', 'document']);

	$w->end();

	checkResult(<<"EOS", 'The default namespace declaration should be present and correct when the document element belongs to a different namespace');
<__NS1:document xmlns:__NS1="uri:test2" xmlns="uri:test" />
EOS
};

# Without namespaces, addPrefix and removePrefix should be safe NOPs
TEST: {
	initEnv(NAMESPACES => 0);

	$w->addPrefix('these', 'arguments', 'are', 'ignored');
	$w->removePrefix('as', 'are', 'these');

	wasNoWarning('Prefix manipulation on a namespace-unaware instance should not warn');
};

# Make sure that getting and setting the output stream behaves as expected
TEST: {
	initEnv();

	my $out = $w->getOutput();

	isnt($out, undef, 'Output for this fixture must be defined');

	$w->setOutput(\*STDERR);
	is($w->getOutput(), \*STDERR, 'Changing output should be reflected in a subsequent get');

	$w->setOutput($out);
	is ($w->getOutput(), $out, 'Changing output back should succeed');

	$w->emptyTag('x');
	$w->end();
	checkResult("<x />\n", 'After changing the output a document should still be generated');
};

# Make sure that undef implies STDOUT for setOutput
TEST: {
	initEnv();

	$w->setOutput();

	wasNoWarning('setOutput without a defined argument should not cause warnings');

	is($w->getOutput(), \*STDOUT, 'If no output is given, STDOUT should be used');
};

# Create an ill-formed document using unsafe mode
TEST: {
	initEnv(UNSAFE => 1);

	$w->xmlDecl('us-ascii');
	$w->comment("--");
	$w->characters("Test\n");
	$w->cdata("Test\n");
	$w->doctype('y', undef, '/');
	$w->emptyTag('x');
	$w->end();
	checkResult(<<EOR, 'Unsafe mode should not enforce validity tests.');
<?xml version="1.0" encoding="us-ascii"?>
<!-- -- -->
Test
<![CDATA[Test
]]><!DOCTYPE y SYSTEM "/">
<x />
EOR

};

# Ensure that newlines in attributes are escaped
TEST: {
	initEnv();

	$w->emptyTag('x', 'a' => "A\nB");
	$w->end();

	checkResult("<x a=\"A&#10;B\" />\n", 'Newlines in attribute values should be escaped');
};

# Make sure UTF-8 is written properly
TEST: {
	initEnv(ENCODING => 'utf-8', DATA_MODE => 1);

	$w->xmlDecl();
	$w->comment("\$ \x{A3} \x{20AC}");
	$w->startTag('a');
	$w->dataElement('b', '$');

	# I need U+00A3 as an is_utf8 string; I want to keep the source ASCII.
	# There must be a better way to do this.
	require Encode;
	my $text = Encode::decode('iso-8859-1', "\x{A3}");
	$w->dataElement('b', $text);

	$w->dataElement('b', "\x{20AC}");
	$w->startTag('c');
	$w->cdata(" \$ \x{A3} \x{20AC} ");
	$w->endTag('c');
	$w->endTag('a');
	$w->end();

	checkResult(<<EOR, 'When requested, output should be UTF-8 encoded');
<?xml version="1.0" encoding="utf-8"?>
<!-- \$ \x{C2}\x{A3} \x{E2}\x{82}\x{AC} -->

<a>
<b>\x{24}</b>
<b>\x{C2}\x{A3}</b>
<b>\x{E2}\x{82}\x{AC}</b>
<c><![CDATA[ \$ \x{C2}\x{A3} \x{E2}\x{82}\x{AC} ]]></c>
</a>
EOR
};

# Test UTF-8 element name
TEST: {
	# I need U+00E9 as an is_utf8 string; I want to keep the source ASCII.
	# There must be a better way to do this.
	require Encode;
	my $text = Encode::decode('iso-8859-1', "\x{E9}");

	initEnv(ENCODING => 'utf-8');
	$w->emptyTag("r${text}sum${text}");
	checkResult("<r\x{C3}\x{A9}sum\x{C3}\x{A9} />", 'E-acute element name permitted');
};

# Test UTF-8 attribute name
TEST: {
	# I need U+00E9 as an is_utf8 string; I want to keep the source ASCII.
	# There must be a better way to do this.
	require Encode;
	my $text = Encode::decode('iso-8859-1', "\x{E9}");

	initEnv(ENCODING => 'utf-8');
	$w->emptyTag("foo", "fianc${text}" => 'true');
	checkResult("<foo fianc\x{C3}\x{A9}=\"true\" />", 'E-acute attribute name permitted');
};

# Capture generated XML in a scalar
TEST: {
	initEnv();
	my $s;

	$w = XML::Writer->new(OUTPUT => \$s);
	$w->emptyTag('x');
	$w->end();

	wasNoWarning('Capturing in a scalar should not cause warnings');
	is($s, "<x />\n", "Output should be stored in a scalar, if one is passed");
};

# Modify the scalar during capture
TEST: {
	initEnv();
	my $s;

	$w = XML::Writer->new(OUTPUT => \$s);

t/01_main.t view on Meta::CPAN

	initEnv();
	my $s;

	$w = XML::Writer->new(OUTPUT => \$s);

	my $x = 'x';
	utf8::upgrade($x);

	$w->emptyTag($x);
	$w->end();

	ok(utf8::is_utf8($s), 'A storage scalar should preserve utf8-ness');


	undef($s);
	$w = XML::Writer->new(OUTPUT => \$s);
	$w->startTag('a');
	$w->dataElement('x', "\$");
	$w->dataElement('x', "\x{A3}");
	$w->dataElement('x', "\x{20AC}");
	$w->endTag('a');
	$w->end();

	is($s, "<a><x>\$</x><x>\x{A3}</x><x>\x{20AC}</x></a>\n",
		'A storage scalar should work with utf8 strings');
}

# Test US-ASCII encoding
TEST: {
	initEnv(ENCODING => 'us-ascii', DATA_MODE => 1);

	$w->xmlDecl();
	$w->startTag('a');
	$w->dataElement('x', "\$", 'a' => "\$");
	$w->dataElement('x', "\x{A3}", 'a' => "\x{A3}");
	$w->dataElement('x', "\x{20AC}", 'a' => "\x{20AC}");
	$w->endTag('a');
	$w->end();

	checkResult(<<'EOR', 'US-ASCII support should cover text and attributes');
<?xml version="1.0" encoding="us-ascii"?>

<a>
<x a="$">$</x>
<x a="&#xA3;">&#xA3;</x>
<x a="&#x20AC;">&#x20AC;</x>
</a>
EOR


	# Make sure non-ASCII characters that can't be represented
	#  as references cause failure

	# I need U+00A3 as an is_utf8 string; I want to keep the source ASCII.
	# There must be a better way to do this.
	require Encode;
	my $text = Encode::decode('iso-8859-1', "\x{A3}");

	initEnv(ENCODING => 'us-ascii', DATA_MODE => 1);
	$w->startTag('a');
	$w->cdata('Text');
	expectError('ASCII', eval {
		$w->cdata($text);
	});


	initEnv(ENCODING => 'us-ascii', DATA_MODE => 1);
	$w->startTag('a');
	$w->comment('Text');
	expectError('ASCII', eval {
		$w->comment($text);
	});


	initEnv(ENCODING => 'us-ascii', DATA_MODE => 1);
	expectError('ASCII', eval {
		$w->emptyTag("\x{DC}berpr\x{FC}fung");
	});


	initEnv(ENCODING => 'us-ascii', DATA_MODE => 1);
	expectError("Non-ASCII characters are not permitted in this part of ", eval {
		$w->emptyTag("r\x{E9}sum\x{E9}");
	});


	initEnv(ENCODING => 'us-ascii', DATA_MODE => 1);
	expectError("Non-ASCII characters are not permitted in this part of ", eval {
		$w->emptyTag("foo", "fianc\x{E9}" => 'true');
	});


	# Make sure Unicode generates warnings when it makes it through
	#  to a US-ASCII-encoded stream
	initEnv(ENCODING => 'us-ascii', DATA_MODE => 1, UNSAFE => 1);
	$w->startTag('a');
	$w->cdata($text);
	$w->endTag('a');
	$w->end();

	$outputFile->flush();
	ok($warning && $warning =~ /does not map to ascii/,
		'Perl IO should warn about non-ASCII characters in output');
	

	initEnv(ENCODING => 'us-ascii', DATA_MODE => 1, UNSAFE => 1);
	$w->startTag('a');
	$w->comment($text);
	$w->endTag('a');
	$w->end();

	$outputFile->flush();
	ok($warning && $warning =~ /does not map to ascii/,
		'Perl IO should warn about non-ASCII characters in output');

}

# Make sure comments are formatted in data mode
TEST: {
	initEnv(DATA_MODE => 1, DATA_INDENT => 1);

	$w->xmlDecl();
	$w->comment("Test");
	$w->comment("Test");
	$w->startTag("x");
	$w->comment("Test 2");
	$w->startTag("y");
	$w->comment("Test 3");
	$w->endTag("y");
	$w->comment("Test 4");
	$w->startTag("y");
	$w->endTag("y");
	$w->endTag("x");
	$w->end();
	$w->comment("Test 5");

	checkResult(<<'EOR', 'Comments should be formatted like elements when in data mode');
<?xml version="1.0"?>
<!-- Test -->
<!-- Test -->

<x>
 <!-- Test 2 -->
 <y>
  <!-- Test 3 -->
 </y>
 <!-- Test 4 -->
 <y></y>
</x>
<!-- Test 5 -->
EOR
}

# Test characters outside the BMP
TEST: {
	my $s = "\x{10480}"; # U+10480 OSMANYA LETTER ALEF

	initEnv(ENCODING => 'utf-8');

	$w->dataElement('x', $s);
	$w->end();

	checkResult(<<"EOR", 'Characters outside the BMP should be encoded correctly in UTF-8');
<x>\xF0\x90\x92\x80</x>
EOR

	initEnv(ENCODING => 'us-ascii');

	$w->dataElement('x', $s);
	$w->end();

	checkResult(<<'EOR', 'Characters outside the BMP should be encoded correctly in US-ASCII');
<x>&#x10480;</x>
EOR
}


# Ensure 'ancestor' returns undef beyond the document
TEST: {
	initEnv();

	is($w->ancestor(0), undef, 'With no document, ancestors should be undef');

	$w->startTag('x');
	is($w->ancestor(0), 'x', 'ancestor(0) should return the current element');
	is($w->ancestor(1), undef, 'ancestor should return undef beyond the document');
}

# Don't allow undefined Unicode characters, but do allow whitespace
TEST: {
	# Test characters

	initEnv();

	$w->startTag('x');
	expectError('\u0000', eval {
		$w->characters("\x00");
	});

	initEnv();

	$w->dataElement('x', "\x09\x0A\x0D ");
	$w->end();

	checkResult(<<"EOR", 'Whitespace below \u0020 is valid.');
<x>\x09\x0A\x0D </x>
EOR


	# CDATA

	initEnv();
	$w->startTag('x');
	expectError('\u0000', eval {
		$w->cdata("\x00");
	});

	initEnv();

	$w->startTag('x');
	$w->cdata("\x09\x0A\x0D ");
	$w->endTag('x');
	$w->end();

	checkResult(<<"EOR", 'Whitespace below \u0020 is valid.');
<x><![CDATA[\x09\x0A\x0D ]]></x>
EOR


	# Attribute values

	initEnv();
	expectError('\u0000', eval {
		$w->emptyTag('x', 'a' => "\x00");
	});

	initEnv();
	$w->emptyTag('x', 'a' => "\x09\x0A\x0D ");
	$w->end();

	# \u0009, \u000A and \u000D are escaped. This test is for lack of errors,
	#  not exact serialisation, so change it if necessary.
	checkResult(<<"EOR", 'Whitespace below \u0020 is valid.');
<x a="&#9;&#10;&#13; " />
EOR
}

# Unsafe mode should not enforce character validity tests
TEST: {
	initEnv(UNSAFE => 1);

	$w->dataElement('x', "\x00");
	$w->end();
	checkResult(<<"EOR", 'Unsafe mode should not enforce character validity tests');
<x>\x00</x>
EOR

	initEnv(UNSAFE => 1);
	$w->startTag('x');
	$w->cdata("\x00");
	$w->endTag('x');
	$w->end();
	checkResult(<<"EOR", 'Unsafe mode should not enforce character validity tests');
<x><![CDATA[\x00]]></x>
EOR

	initEnv(UNSAFE => 1);
	$w->emptyTag('x', 'a' => "\x00");
	$w->end();
	checkResult(<<"EOR", 'Unsafe mode should not enforce character validity tests');
<x a="\x00" />
EOR
}

# Cover XML declaration encoding cases
TEST: {
	# No declaration unless specified
	initEnv();
	$w->xmlDecl();
	$w->emptyTag('x');
	$w->end();

	checkResult(<<"EOR", 'When no encoding is specified, the declaration should not include one');
<?xml version="1.0"?>
<x />
EOR

	# An encoding specified in the constructor carries across to the declaration
	initEnv(ENCODING => 'us-ascii');
	$w->xmlDecl();
	$w->emptyTag('x');
	$w->end();

	checkResult(<<"EOR", 'If an encoding is specified for the document, it should appear in the declaration');
<?xml version="1.0" encoding="us-ascii"?>
<x />
EOR

	# Anything passed in the xmlDecl call should override
	initEnv(ENCODING => 'us-ascii');
	$w->xmlDecl('utf-8');
	$w->emptyTag('x');
	$w->end();
	checkResult(<<"EOR", 'An encoding passed to xmlDecl should override any other encoding');
<?xml version="1.0" encoding="utf-8"?>
<x />
EOR

	# The empty string should force the omission of the decl
	initEnv(ENCODING => 'us-ascii');
	$w->xmlDecl('');
	$w->emptyTag('x');
	$w->end();
	checkResult(<<"EOR", 'xmlDecl should treat the empty string as instruction to omit the encoding from the declaration');
<?xml version="1.0"?>
<x />
EOR
}

# Bug report: [cpan #14854] Broken namespace report

( run in 1.247 second using v1.01-cache-2.11-cpan-483215c6ad5 )