Data-Edit-Xml-Lint
view release on metacpan or search on metacpan
lib/Data/Edit/Xml/Lint.pm view on Meta::CPAN
#!/usr/bin/perl -I/home/phil/perl/cpan/DataTableText/lib/ -I/home/phil/perl/cpan/DataEditXml/lib/
#-------------------------------------------------------------------------------
# Lint xml files in parallel using xmllint and report the pass/failure rate.
# Philip R Brenan at gmail dot com, Appa Apps Ltd Inc, 2016-2019
#-------------------------------------------------------------------------------
# podDocumentation
# Id definitions should be processed independently of labels
# What sort of tag is on the end of the link?
# Report resolved, unresolved, missing links - difficult because of forking
# Check that the file actually has a lint section in read and do something about it if it does not
# Separate reference fixup into a separate framework (like Dita::Conversion)
# Show number of compressed errors on Lint summary line
# Highlight error counts in bold using boldText() or perhaps enclosed alphanumerics
# Relint load data in parallel
# option to print xmllint command
# inputFile=>name unicode seems to be failing
# Lots more tests needed
package Data::Edit::Xml::Lint;
our $VERSION = 20200108;
use warnings FATAL => qw(all);
use strict;
use Carp qw(cluck confess);
use Data::Dump qw(dump);
use Data::Table::Text qw(:all);
use Time::HiRes qw(time);
use Encode;
sub maxLintMsgLength {128} # Truncate xml lint messages longer then this
sub maxExampleFiles {3} # Maximum number of example files
#D1 Constructor # Construct a new linter
sub new #S Create a new xml linter - call this method statically as in L<Data::Edit::Xml::Lint|/new> and then fill in the relevant L<Attributes>.
{bless {} # Create xml linter
}
#D2 Attributes # Attributes describing a lint.
genLValueScalarMethods(qw(author)); # Optional author of the xml - only needed if you want to generate an SDL file map.
genLValueScalarMethods(qw(catalog)); # Optional catalog file containing the locations of the DTDs used to validate the xml or use L<dtds|/dtds> to supply a B<DTD> instead.
genLValueScalarMethods(qw(compressedErrors)); # Number of compressed errors discovered.
genLValueScalarMethods(qw(compressedErrorText)); # Text of compressed errors.
genLValueScalarMethods(qw(ditaType)); # Optional Dita topic type(concept|task|troubleshooting|reference) of the xml - only needed if you want to generate an SDL file map.
genLValueScalarMethods(qw(docType)); # The second line: the document type extracted from the L<source|/source>.
genLValueScalarMethods(qw(dtds)); # Optional directory containing the DTDs used to validate the xml.
genLValueScalarMethods(qw(errors)); # Total number of uncompressed lint errors detected by xmllint over all files.
genLValueScalarMethods(qw(errorText)); # Text of uncompressed lint errors detected by xmllint over all files.
genLValueScalarMethods(qw(file)); # File that the xml should be written to or read from by L<lint|/lint>, L<read|/read> or L<relint|/relint>.
genLValueScalarMethods(qw(fileNumber)); # File number - assigned by the caller to help debugging transformations.
genLValueScalarMethods(qw(lineNumber)); # The file and line number of the caller so we can identify which request for lint gave rise to a particular file
genLValueScalarMethods(qw(guid)); # Guid or id of the outermost tag - if not supplied the first definition encountered in each file will be used on the basis that all Dita topics require an id.
genLValueScalarMethods(qw(header)); # The first line: the xml header extracted from L<source|/source>.
genLValueScalarMethods(qw(idDefs)); # {id} = count - the number of times this id is defined in the xml contained in this L<file|/file>.
genLValueScalarMethods(qw(inputFile)); # The file from which this xml was obtained.
genLValueScalarMethods(qw(labelDefs)); # {label or id} = id - the id of the node containing a L<label|Data::Edit::Xml/Labels> defined on the xml.
genLValueScalarMethods(qw(labels)); # Optional parse tree to supply L<labels|Data::Edit::Xml/Labels> for the current L<source|/source> as the labels are present in the parse tree not in the string represent...
genLValueScalarMethods(qw(linted)); # Date the lint was performed by L<lint|/lint>. We avoid adding a time as well because this then induces much longer sync times with AWS S3.
genLValueScalarMethods(qw(preferredSource)); # Preferred representation of the xml source, used by L<relint|/relint> to supply a preferred representation for the source.
genLValueScalarMethods(qw(processes)); # Maximum number of xmllint processes to run in parallel - 8 by default if linting in parallel is being used. Linting in parallel is pointless if each file is already bei...
genLValueScalarMethods(qw(project)); # Optional L<project|/project> name to allow error counts to be aggregated by L<project|/project> and to allow L<id and labels|Data::Edit::Xml/Labels> to be scoped to the...
genLValueArrayMethods(qw(reusedInProject)); # List of projects in which this file is reused, which can be set via L<reuseFileInProject|/reuseFileInProject> every time you discover another project in which a file is...
genLValueScalarMethods(qw(source)); # The source Xml to be written to L<file|/file> and linted.
genLValueScalarMethods(qw(title)); # Optional title of the xml - only needed if you want to generate an SDL file map.
#D1 Lint # Lint xml L<files|/file> in parallel
sub lint($@) #P Lint a L<files|/file>, using xmllint and update the source file with the results in text format so as to be be easy to search with grep.
{my ($lint, %attributes) = @_; # Linter, attributes to be recorded as xml comments
$lint->lineNumber = join ' ', caller; # Calling context
my $source = $lint->source;
$source or confess "Use the source() method to provide the source xml"; # Check that we have some source
$lint->file or confess "Use the ->file method to provide the target file"; # Check that we have an output file
$source =~ s/\s+\Z//gs; # Xml text to be written minus trailing blanks
my @lines = split /\n/, $source; # Split source into lines
( run in 1.125 second using v1.01-cache-2.11-cpan-39bf76dae61 )