Data-Edit-Xml-Lint

 view release on metacpan or  search on metacpan

lib/Data/Edit/Xml/Lint.pm  view on Meta::CPAN

#!/usr/bin/perl -I/home/phil/perl/cpan/DataTableText/lib/ -I/home/phil/perl/cpan/DataEditXml/lib/
#-------------------------------------------------------------------------------
# Lint xml files in parallel using xmllint and report the pass/failure rate.
# Philip R Brenan at gmail dot com, Appa Apps Ltd Inc, 2016-2019
#-------------------------------------------------------------------------------
# podDocumentation
# Id definitions should be processed independently of labels
# What sort of tag is on the end of the link?
# Report resolved, unresolved, missing links - difficult because of forking
# Check that the file actually has a lint section in read and do something about it if it does not
# Separate reference fixup into a separate framework (like Dita::Conversion)
# Show number of compressed errors on Lint summary line
# Highlight error counts in bold using boldText() or perhaps enclosed alphanumerics
# Relint load data in parallel
# option to print xmllint command
# inputFile=>name unicode seems to be failing
# Lots more tests needed

package Data::Edit::Xml::Lint;
our $VERSION = 20200108;
use warnings FATAL => qw(all);
use strict;
use Carp qw(cluck confess);
use Data::Dump qw(dump);
use Data::Table::Text qw(:all);
use Time::HiRes qw(time);
use Encode;

sub maxLintMsgLength {128}                                                      # Truncate xml lint messages longer then this
sub maxExampleFiles  {3}                                                        # Maximum number of example files

#D1 Constructor                                                                 # Construct a new linter

sub new                                                                         #S Create a new xml linter - call this method statically as in L<Data::Edit::Xml::Lint|/new> and then fill in the relevant L<Attributes>.
 {bless {}                                                                      # Create xml linter
 }

#D2 Attributes                                                                  # Attributes describing a lint.

genLValueScalarMethods(qw(author));                                             # Optional author of the xml - only needed if you want to generate an SDL file map.
genLValueScalarMethods(qw(catalog));                                            # Optional catalog file containing the locations of the DTDs used to validate the xml or  use L<dtds|/dtds> to supply a B<DTD> instead.
genLValueScalarMethods(qw(compressedErrors));                                   # Number of compressed errors discovered.
genLValueScalarMethods(qw(compressedErrorText));                                # Text of compressed errors.
genLValueScalarMethods(qw(ditaType));                                           # Optional Dita topic type(concept|task|troubleshooting|reference) of the xml - only needed if you want to generate an SDL file map.
genLValueScalarMethods(qw(docType));                                            # The second line: the document type extracted from the L<source|/source>.
genLValueScalarMethods(qw(dtds));                                               # Optional directory containing the DTDs used to validate the xml.
genLValueScalarMethods(qw(errors));                                             # Total number of uncompressed lint errors detected by xmllint over all files.
genLValueScalarMethods(qw(errorText));                                          # Text of uncompressed lint errors detected by xmllint over all files.
genLValueScalarMethods(qw(file));                                               # File that the xml should be written to or read from by L<lint|/lint>, L<read|/read> or L<relint|/relint>.
genLValueScalarMethods(qw(fileNumber));                                         # File number - assigned by the caller to help debugging transformations.
genLValueScalarMethods(qw(lineNumber));                                         # The file and line number of the caller so we can identify which request for lint gave rise to a particular file
genLValueScalarMethods(qw(guid));                                               # Guid or id of the outermost tag - if not supplied the first definition encountered in each file will be used on the basis that all Dita topics require an id.
genLValueScalarMethods(qw(header));                                             # The first line: the xml header extracted from L<source|/source>.
genLValueScalarMethods(qw(idDefs));                                             # {id} = count - the number of times this id is defined in the xml contained in this L<file|/file>.
genLValueScalarMethods(qw(inputFile));                                          # The file from which this xml was obtained.
genLValueScalarMethods(qw(labelDefs));                                          # {label or id} = id - the id of the node containing a L<label|Data::Edit::Xml/Labels> defined on the xml.
genLValueScalarMethods(qw(labels));                                             # Optional parse tree to supply L<labels|Data::Edit::Xml/Labels> for the current L<source|/source> as the labels are present in the parse tree not in the string represent...
genLValueScalarMethods(qw(linted));                                             # Date the lint was performed by L<lint|/lint>.  We avoid adding a time as well because this then induces much longer sync times with AWS S3.
genLValueScalarMethods(qw(preferredSource));                                    # Preferred representation of the xml source, used by L<relint|/relint> to supply a preferred representation for the source.
genLValueScalarMethods(qw(processes));                                          # Maximum number of xmllint processes to run in parallel - 8 by default if linting in parallel is being used. Linting in parallel is pointless if each file is already bei...
genLValueScalarMethods(qw(project));                                            # Optional L<project|/project> name to allow error counts to be aggregated by L<project|/project> and to allow L<id and labels|Data::Edit::Xml/Labels> to be scoped to the...
genLValueArrayMethods(qw(reusedInProject));                                     # List of projects in which this file is reused, which can be set via L<reuseFileInProject|/reuseFileInProject> every time you discover another project in which a file is...
genLValueScalarMethods(qw(source));                                             # The source Xml to be written to L<file|/file> and linted.
genLValueScalarMethods(qw(title));                                              # Optional title of the xml - only needed if you want to generate an SDL file map.

#D1 Lint                                                                        # Lint xml L<files|/file> in parallel

sub lint($@)                                                                    #P Lint a L<files|/file>, using xmllint and update the source file with the results in text format so as to be be easy to search with grep.
 {my ($lint, %attributes) = @_;                                                 # Linter, attributes to be recorded as xml comments
      $lint->lineNumber = join ' ', caller;                                     # Calling context
  my $source = $lint->source;
  $source or confess "Use the source() method to provide the source xml";       # Check that we have some source
  $lint->file or confess "Use the ->file method to provide the target file";    # Check that we have an output file

  $source =~ s/\s+\Z//gs;                                                       # Xml text to be written minus trailing blanks
  my @lines = split /\n/, $source;                                              # Split source into lines



( run in 1.125 second using v1.01-cache-2.11-cpan-39bf76dae61 )