Data-Edit-Xml-Xref
view release on metacpan or search on metacpan
lib/Data/Edit/Xml/Xref.pm view on Meta::CPAN
#!/usr/bin/perl -I/home/phil/perl/cpan/DataEditXml/lib/ -I/home/phil/perl/cpan/DataTableText/lib/ -I/home/phil/perl/cpan/DitaGBStandard/lib/
#-------------------------------------------------------------------------------
# Cross reference Dita XML, match topics and ameliorate missing references.
# Philip R Brenan at gmail dot com, Appa Apps Ltd Inc, 2016-2019
# Improvements and maxzoomin
#-------------------------------------------------------------------------------
# Check for image formats that will not display in a browser
# Do not consider companion files!
# Images that are referenced by topics which are not referenced by bookmaps showup as referenced
# It should be possible to remove reportImages by using generic references instead
# Conref processing in reportReferencesFromBookmaps
# Fix xref external/scope and eliminate error count if fixbadrefs in operation.
# Add labels to ditaRefs processing so that references to labels are also fixed
# Add xref expansion from id in file as it is a pain to code up the full details by hand
# Find topics that have no text in them per: PS2-617
# Need test for changeBadXrefToPh
# Unique target needs tests
# Create list of images found in input folder
# Conrefs report should use targets/ to update the conref file so conrefs fixed by fixDitaRefs are considered
package Data::Edit::Xml::Xref;
our $VERSION = 20200424;
use v5.26;
use warnings FATAL => qw(all);
use strict;
use Carp qw(confess cluck);
use Data::Dump qw(dump);
use Data::Edit::Xml;
use Data::Table::Text qw(:all);
use Dita::GB::Standard;
use Storable qw(store retrieve);
use Time::HiRes qw(time);
use utf8;
#sub improvementLength {80} #P Maximum length of the test of an improvement suggestion
sub classificationMapSuffix{q(_classification.ditamap)} #P Suffix to add to map files to create corresponding classification map file
#D1 Cross reference # Check the cross references in a set of Dita files and report the results.
sub newXref(%) #P Create a new cross referencer
{my (%attributes) = @_; # Attributes
my $xref = genHash(__PACKAGE__, # Attributes used by the Xref cross referencer.
addNavTitles => undef, #I If true, add navtitle to outgoing bookmap references to show the title of the target topic.
allowUniquePartialMatches => undef, # Allow unique partial matches - i.e ignore the stuff to the right of the # in a reference if doing so produces a unique result. This feature has been explicitly disabled...
attributeCount => {}, # {file}{attribute name} == count of the different xml attributes found in the xml files.
attributeNamesAndValuesCount => {}, # {file}{attribute name}{value} = count
author => {}, # {file} = author of this file.
badGuidHrefs => {}, # Bad conrefs - all.
badNavTitles => {}, # Details of nav titles that were not resolved
badReferencesCount => 0, # The number of bad references at the start of the run - however depending on what options were chosen Xref might ameliorate these bad references and thereby reduce this ...
badTables => [], # Array of tables that need fixing.
badXml1 => {}, # [Files] with a bad xml encoding header on the first line.
badXml2 => {}, # [Files] with a bad xml doc type on the second line.
baseFiles => {}, # {base of file name}{full file name}++ Current location of the file via uniqueness guaranteed by the GB standard
baseTag => {}, # Base Tag for each file
bookMapRefs => {}, # {bookmap full file name}{href}{navTitle}++ References from bookmaps to topics via appendix, chapter, bookmapref.
changeBadXrefToPh => undef, #I Change xrefs being placed in B<M3> by L<fixBadRefs> to B<ph>.
classificationMaps => undef, #I Create classification maps if true
conRefs => {}, # {file}{href}{tag}++ : conref source detail
createReports1 => [], # Reports requested before references fixed
createReports2 => [], # Reports requested after references fixed
currentFolder => currentDirectory, # The current working folder used to make absolute file names from relative ones
deleteUnusedIds => 0, #I Delete ids (except on topics) that are not referenced in any reference in the corpus regardless of the file component of any such reference.
deguidize => undef, #I Set true to replace guids in dita references with file name. Given reference B<g1#g2/id> convert B<g1> to a file name by locating the topic with topicId B<g2>. This r...
docType => {}, # {file} == docType: the docType for each xml file.
duplicateIds => {}, # [file, id] Duplicate id definitions within each file.
duplicateTopicIds => {}, # Duplicate topic ids
duplicateTopicIds => {}, # [topicId, [files]] Files with duplicate topic ids - the id on the outermost tag.
emptyTopics => {}, # {file} : topics where the *body is empty.
errors => 0, # Number of significant errors as reported in L<statusLine> or 0 if no such errors found
exteriorMaps => {}, # {exterior map} : maps that are not referenced by another map
fileExtensions => [qw(.dita .ditamap .xml .fodt)], # Default file extensions to load
fixBadRefs => undef, #I Fix any remaining bad references after any all allowed attempts have been made to fix failing references by moving the failing reference to the B<xtrf> attribute i.e. ...
fixDitaRefs => undef, #I Fix references in a corpus of L<Dita> documents that have been converted to the L<GBStandard> and whose target structure has been written to the named folder.
fixedFolder => undef, #I Fixed files are placed in this folder.
fixedFolderTemp => undef, #I Fixed files are placed in this folder if we are on aws but nit the session leader - this folder is then copied back to L<fixedFolder> on the session leader.
fixedRefsBad => [], # [] hrefs and conrefs from L<fixRefs|/fixRefs> which were moved to the "xtrf" attribute as requested by the L<fixBadHrefs|/fixBadHrefs> attribute because the reference w...
fixedRefsGB => [], # [] files fixed to the Gearhart-Brenan file naming standard
fixedRefsGood => [], # [] hrefs and conrefs from L<fixRefs|/fixRefs> which were invalid but have been fixed by L<deguidizing|/deguidize> them to a valid file name.
fixedRefsNoAction => [], # [] hrefs and conrefs from L<fixRefs|/fixRefs> for which no action was taken.
fixRefs => {}, # {file}{ref} where the href or conref target is not valid.
fixRelocatedRefs => undef, #I Fix references to topics that have been moved around in the out folder structure assuming that all file names are unique which they will be if they have been renamed t...
fixXrefsByTitle => undef, #I Try to fix invalid xrefs by the Gearhart Title Method enhanced by the Monroe map method if true
flattenFiles => {}, # {old full file name} = file renamed to Gearhart-Brenan file naming standard
flattenFolder => undef, #I Files are renamed to the Gearhart standard and placed in this folder if set. References to the unflattened files are updated to references to the flattened files. Th...
getFileUrl => qq(/cgi-bin/uiSelfServiceXref/client.pl?getFile=), #I A url to retrieve a specified file from the server running xref used in generating html reports. The complete url is obtained by appending the fully qualified file nam...
goodImageFiles => {}, # {file}++ : number of references to each good image
goodNavTitles => {}, # Details of nav titles that were resolved.
guidHrefs => {}, # {file}{href} = location where href starts with GUID- and is thus probably a guid.
guidToFile => {}, # {topic id which is a guid} = file defining topic id.
hrefUrlEncoding => {}, # Hrefs that need url encoding because they contain white space.
html => undef, #I Generate html version of reports in this folder if supplied
idNotReferenced => {}, # {file}{id}++ - id in a file that is not referenced
idReferencedCount => {}, # {file}{id}++ - the number of times this id in this file is referenced from the rest of the corpus
ids => {}, # {file}{id} - id definitions across all files.
idsRemoved => {}, # {id}++ : Ids removed from all files
idTags => {}, # {file}{id}[tag] The tags associated with each id in a file - there might be more than one if the id is duplicated
images => {}, # {file}{href} Count of image references in each file.
imagesReferencedFromBookMaps => {}, # {bookmap full file name}{full name of image referenced from topic referenced from bookmap}++
imagesReferencedFromTopics => {}, # {topic full file name}{full name of image referenced from topic}++
imagesToRefferingBookMaps => {}, # {image full file name}{bookmap full file name}++ : images to referring bookmaps
indexWords => undef, #I Index words to topics and topics to words if true.
indexWordsFolder => undef, #I Folder into which to save words to topic and topics to word indexes if L<indexWords> is true.
indexedWords => {}, # {word}{full file name of topic the words occurs in}.
inputFiles => [], # Input files from L<inputFolder|/inputFolder>.
inputFileToTargetTopics => {}, # {input file}{target file}++ : Tells us the topics an input file was split into
inputFolderImages => {}, # {full image file name} for all files in input folder thus including any images resent
inputFolder => undef, #I A folder containing the dita and ditamap files to be cross referenced.
ltgt => {}, # {text between < and >}{filename} = count giving the count of text items found between < and >
matchTopics => undef, #I Match topics by title and by vocabulary to the specified confidence level between 0 and 1. This operation might take some time to complete on a large corpus.
maximumNumberOfProcesses => numberOfCpus(8), #I Maximum number of processes to run in parallel at any one time with a sensible default.
maxZoomIn => undef, #I Optional hash of names to regular expressions to look for in each file
maxZoomOut => {}, # Results from L<maxZoomIn|/maxZoomIn> where {file name}{regular expression key name in L<maxZoomIn|/maxZoomIn>}++
md5Sum => {}, # MD5 sum for each input file.
md5SumDuplicates => {}, # {md5sum}{file}++ : md5 sums with more than one file
missingImageFiles => {}, # [file, href] == Missing images in each file.
missingTopicIds => {}, # Missing topic ids.
noHref => {}, # Tags that should have an href but do not have one.
notReferenced => {}, # {file name} Files in input area that are not referenced by a conref, image, bookmapref or xref tag and are not a bookmap.
olBody => {}, # The number of ol under body by file
originalSourceFileAndIdToNewFile => {}, # {original file}{id} = new file: Record mapping from original source file and id to the new file containing the id
otherMeta => {}, # {original file}{othermeta name}{othermeta content}++ : the contents of the other meta tags
otherMetaDuplicatesSeparately => [], # Duplicate othermeta in bookmaps and topics considered separately
otherMetaDuplicatesCombined => [], # Duplicate othermeta in bookmaps with called topics othermeta included
otherMetaRemainWithTopic => [], # Othermeta that must stay in the topic
otherMetaPushToBookMap => [], # Othermeta that can be pushed to the calling book map
otherMetaBookMapsBeforeTopicIncludes=> [], # Bookmap othermeta before topic othermeta has been included
otherMetaBookMapsAfterTopicIncludes => [], # Bookmap othermeta after topic othermeta has been included
otherMetaConsolidated => {}, # {Name}{Content}++ : consolidated other meta data across entire corpus
oxygenProjects => undef, #I Create oxygen project files for each map - the project file will have an extension of .xpr and the same name and path as the map file or the name return by your implem...
parseFailed => {}, # {file} files that failed to parse.
publicId => {}, # {file} = Public id on Doctype
references => {}, # {file}{reference}++ - the various references encountered
relocatedReferencesFailed => [], # Failing references that were not fixed by relocation
relocatedReferencesFixed => [], # Relocated references fixed
requestAttributeNameAndValueCounts => undef, #I Report attribute name and value counts
requiredCleanUp => undef, # {full file name}{cleanup} = number of required-cleanups
reports => undef, #I Reports folder: Xref will write text versions of the generated reports to files in this folder.
results => [], # Summary of results table.
# sourceFile => undef, # The source file from whic#h this structure was generated.
sourceTopicToTargetBookMap => {}, # {input topic cut into multiple pieces} = output bookmap representing pieces
statusLine => undef, # Status line summarizing the cross reference.
statusTable => undef, # Status table summarizing the cross reference.
subjectSchemeMap => undef, #I Create a subject scheme map in the named file
suppressReferenceChecks => undef, #I Suppress reference checking - which normally happens by default - but which takes time and might be irrelevant if an earlier xref has already checked all the reference...
lib/Data/Edit/Xml/Xref.pm view on Meta::CPAN
my $target = fpf($xref->flattenFolder, $xref->flattenFiles->{$file}); # Previously assigned GB name. We cannot use the very latest name because other files have to be told about it and in changing them to reflect the latest name we would c...
editXml($file, $target, -p $x); # Edit xml
\@r # Return report of items fixed
}
sub fixFilesGB($) #P Rename files to the L<GBStandard>
{my ($xref) = @_; # Xref results
my @files = grep {!$xref->parseFailed->{$_}} sort @{$xref->inputFiles}; # Fix files that parsed if requested
my @r; # Fixes made
processFilesInParallel
sub # Each file
{my ($file) = @_; # File to analyze
$xref->fixOneFileGB($file); # Analyze one input file
},
sub {push @r, deSquareArray @_}, @files; # Flatten results
formatTables($xref, $xref->fixedRefsGB = \@r, # Report results
columns => <<END,
Href The href being fixed
Source The source file containing the href
END
summarize=>1,
title=>qq(Hrefs that can not be renamed to the Gearhart-Brenan file naming standard),
head=><<END,
Xref failed to fix NNNN hrefs to the Gearhart-Brenan file naming standard
END
file=>(my $f = fpe(qw(bad fixedRefsGB txt))));
{fixedRefsGB => $xref->fixedRefsGB, # From multiverse to universe
}
} # fixFilesGB
sub analyzeOneFileParallel($$) #P Analyze one input file
{my ($Xref, $iFile) = @_; # Xref request, File to analyze
my $xref = newXref(); # Cross referencer for this file
# $xref->sourceFile = $iFile; # File analyzed
my %maxZoomIn = $Xref->maxZoomIn ? %{$Xref->maxZoomIn} : (); # Regular expressions from maxZoomIn to look for text
my %maxZoomOut; # Text elements that match a maxZoomIn regular expression
my %countAttrNames; # Attribute names
my %countAttrNamesAndValues; # Attribute names and values
my %countTagNames; # Tag names
my $changes; # Changes made to the file
my $tags; my $texts; # Number of tags and text elements
my $source = readFile($iFile); # Source of file so we can gets its GB Standard name
my $x = eval {Data::Edit::Xml::new($iFile)}; # Parse xml - at this point if the caller is interested in line numbers they should have added them.
if ($@) # Check we were able to parse the xml
{$xref->parseFailed->{$iFile}++;
return $xref;
}
my $md5 = $xref->md5Sum->{$iFile} = -M $x; # Md5 sum for parse tree
if ($Xref->flattenFolder)
{$xref->flattenFiles->{$iFile} = # Record correspondence between existing file and its GB Standard file name
Dita::GB::Standard::gbStandardFileName($source, fe($iFile), md5=>$md5);
}
my $saveReference = sub # Save a reference so it can be integrity checked later
{my ($ref) = @_; # Reference
return if externalReference($ref); # Looks like an external reference
$xref->references->{$iFile}{$ref}++; # Save reference
};
my $isADitaMap = $x->isADitaMap; # Map
$x->by(sub # Each node
{my ($o) = @_;
# my $content = sub #P First few characters of content on one line to avoid triggering multi table layouts
# {my ($o) = @_; # String
# nws($o->stringContent, improvementLength); # Length of improvement
# };
my $tag = -t $o; # Element tag
if ($tag eq q(CDATA)) {++$texts} else {++$tags} # Count texts and tags
if (my $h = $o->href) # Check href
{if ($h =~ m(\s)s and externalReference($h)) # Check href for url encoding needed
{$xref->{hrefUrlEncoding}{$iFile}{$o->lineLocation} = $h;
}
if ($xref->{deguidize} and $h =~ m(\bguid-)is) # Deguidizing a href that looks as if it might have a guid in it
{$xref->{fixRefs}{$iFile}{$h}++
}
&$saveReference($h);
}
if (my $conref = $o->attr(q(conref))) # Conref
{my $saveConRef = sub # Save a conref
{my ($conRef) = @_; # Conref
$xref->{conRefs}{$iFile}{$conRef}{$tag}++;
};
&$saveConRef($conref);
&$saveReference($conref);
if (my $conref = $o->attr(q(conrefend))) # Conref end
{&$saveConRef($conref);
&$saveReference($conref);
}
}
if (my $i = $o->id) # Id definitions
{$xref->{ids}{$iFile}{$i}++;
push $xref->{idTags}{$iFile}{$i}->@*, $tag; # Tags for each id in the file
}
if ($tag eq q(xref)) # Xrefs but not to the web
{if (my $h = $o->href)
{if (externalReference($h)) # Check attributes on external links
{if ($o->attrX(q(scope)) !~ m(\Aexternal\Z)s)
{$xref->{xrefBadScope}{$iFile}{$h} = -A $o;
}
if ($o->attrX(q(format)) !~ m(\Ahtml\Z)s)
{$xref->{xrefBadFormat}{$iFile}{$h} = -A $o;
}
lib/Data/Edit/Xml/Xref.pm view on Meta::CPAN
=head1 Cross reference
Check the cross references in a set of Dita files and report the results.
=head2 xref(%attributes)
Check the cross references in a set of Dita files held in L<inputFolder|/inputFolder> and report the results in the L<reports|/reports> folder. The possible attributes are defined in L<Data::Edit::Xml::Xref|/Data::Edit::Xml::Xref>.
Parameter Description
1 %attributes Cross referencer attribute value pairs
B<Example:>
lll "Test 011";
clearFolder(tests, 111);
createSampleInputFilesForFixDitaRefsImproved3(tests);
my $y = ð
ð¿ð²ð³(inputFolder => out, reports => reportFolder); # Check results without fixes
ok $y->statusLine eq q(Xref: 1 ref);
my $x = ð
ð¿ð²ð³
(inputFolder => out,
reports => reportFolder,
fixBadRefs => 1,
fixDitaRefs => targets,
fixedFolder => outFixed);
ok !$x->errors;
=head1 Create test data
Create files to test the various capabilities provided by Xref
=head2 Data::Edit::Xml::Xref Definition
Attributes used by the Xref cross referencer.
=head3 Input fields
B<addNavTitles> - If true, add navtitle to outgoing bookmap references to show the title of the target topic.
B<changeBadXrefToPh> - Change xrefs being placed in B<M3> by L<fixBadRefs> to B<ph>.
B<classificationMaps> - Create classification maps if true
B<deguidize> - Set true to replace guids in dita references with file name. Given reference B<g1#g2/id> convert B<g1> to a file name by locating the topic with topicId B<g2>. This requires the guids to be genuinely unique. SDL guids are thought to b...
B<deleteUnusedIds> - Delete ids (except on topics) that are not referenced in any reference in the corpus regardless of the file component of any such reference.
B<fixBadRefs> - Fix any remaining bad references after any all allowed attempts have been made to fix failing references by moving the failing reference to the B<xtrf> attribute i.e. placing it in B<M3> possibly renaming the tag to B<ph> if L<changeB...
B<fixDitaRefs> - Fix references in a corpus of L<Dita|http://docs.oasis-open.org/dita/dita/v1.3/os/part2-tech-content/dita-v1.3-os-part2-tech-content.html> documents that have been converted to the L<GB Standard|http://metacpan.org/pod/Dita::GB::Stan...
B<fixRelocatedRefs> - Fix references to topics that have been moved around in the out folder structure assuming that all file names are unique which they will be if they have been renamed to the GB Standard.
B<fixXrefsByTitle> - Try to fix invalid xrefs by the Gearhart Title Method enhanced by the Monroe map method if true
B<fixedFolder> - Fixed files are placed in this folder.
B<fixedFolderTemp> - Fixed files are placed in this folder if we are on aws but nit the session leader - this folder is then copied back to L<fixedFolder> on the session leader.
B<flattenFolder> - Files are renamed to the Gearhart standard and placed in this folder if set. References to the unflattened files are updated to references to the flattened files. This option will eventually be deprecated as the Dita::GB::Standar...
B<getFileUrl> - A url to retrieve a specified file from the server running xref used in generating html reports. The complete url is obtained by appending the fully qualified file name to this value.
B<html> - Generate html version of reports in this folder if supplied
B<indexWords> - Index words to topics and topics to words if true.
B<indexWordsFolder> - Folder into which to save words to topic and topics to word indexes if L<indexWords> is true.
B<inputFolder> - A folder containing the dita and ditamap files to be cross referenced.
B<matchTopics> - Match topics by title and by vocabulary to the specified confidence level between 0 and 1. This operation might take some time to complete on a large corpus.
B<maxZoomIn> - Optional hash of names to regular expressions to look for in each file
B<maximumNumberOfProcesses> - Maximum number of processes to run in parallel at any one time with a sensible default.
B<oxygenProjects> - Create oxygen project files for each map - the project file will have an extension of .xpr and the same name and path as the map file or the name return by your implementation of: Data::Edit::Xml::Xref::xprName($map) if present.
B<reports> - Reports folder: Xref will write text versions of the generated reports to files in this folder.
B<requestAttributeNameAndValueCounts> - Report attribute name and value counts
B<subjectSchemeMap> - Create a subject scheme map in the named file
B<suppressReferenceChecks> - Suppress reference checking - which normally happens by default - but which takes time and might be irrelevant if an earlier xref has already checked all the references.
B<validateUrls> - Validate urls if true by fetching their headers with L<curl|https://linux.die.net/man/1/curl>
=head3 Output fields
B<allowUniquePartialMatches> - Allow unique partial matches - i.e ignore the stuff to the right of the # in a reference if doing so produces a unique result. This feature has been explicitly disabled for conrefs (PS2-561) and might need to be disable...
B<attributeCount> - {file}{attribute name} == count of the different xml attributes found in the xml files.
B<attributeNamesAndValuesCount> - {file}{attribute name}{value} = count
B<author> - {file} = author of this file.
B<badGuidHrefs> - Bad conrefs - all.
B<badNavTitles> - Details of nav titles that were not resolved
B<badReferencesCount> - The number of bad references at the start of the run - however depending on what options were chosen Xref might ameliorate these bad references and thereby reduce this count.
B<badTables> - Array of tables that need fixing.
B<badXml1> - [Files] with a bad xml encoding header on the first line.
B<badXml2> - [Files] with a bad xml doc type on the second line.
B<baseFiles> - {base of file name}{full file name}++ Current location of the file via uniqueness guaranteed by the GB standard
B<baseTag> - Base Tag for each file
B<bookMapRefs> - {bookmap full file name}{href}{navTitle}++ References from bookmaps to topics via appendix, chapter, bookmapref.
lib/Data/Edit/Xml/Xref.pm view on Meta::CPAN
Parameter Description
1 $hash Hash to be converted
=head2 reportGuidsToFiles($xref)
Map and report guids to files
Parameter Description
1 $xref Xref results
=head2 editXml($in, $out, $source)
Edit an xml file retaining any existing XML headers and lint trailers
Parameter Description
1 $in Input file
2 $out Output file
3 $source Source to write
=head2 fixReferencesInOneFile($xref, $sourceFile)
Fix one file by moving unresolved references to the xtrf attribute
Parameter Description
1 $xref Xref results
2 $sourceFile Source file to fix
=head2 fixReferencesParallel($xref, $file)
Fix the references in one file
Parameter Description
1 $xref Cross referencer
2 $file File to fix
=head2 fixReferencesResults($xref, @results)
Consolidate the results of fixing references.
Parameter Description
1 $xref Cross referencer
2 @results Results from fixReferencesInParallel
=head2 fixReferences($xref)
Fix just the file containing references using a number of techniques and report those references that cannot be so fixed.
Parameter Description
1 $xref Xref results
=head2 fixOneFileGB($xref, $file)
Fix one file to the Gearhart-Brenan standard
Parameter Description
1 $xref Xref results
2 $file File to fix
=head2 fixFilesGB($xref)
Rename files to the L<GB Standard|http://metacpan.org/pod/Dita::GB::Standard>
Parameter Description
1 $xref Xref results
=head2 analyzeOneFileParallel($Xref, $iFile)
Analyze one input file
Parameter Description
1 $Xref Xref request
2 $iFile File to analyze
=head2 analyzeOneFileResults($xref, @x)
Merge a list of cross reference results into the first cross referencer in the list
Parameter Description
1 $xref Cross referencer to merge into
2 @x Other cross referencers
=head2 analyzeInputFiles($xref)
Analyze the input files
Parameter Description
1 $xref Cross referencer
=head2 reportIdRefs($xref)
Report the number of times each id is referenced
Parameter Description
1 $xref Cross referencer
=head2 removeUnusedIds($xref)
Remove ids that do are not mentioned in any href or conref in the corpus regardless of the file component of any such reference. This is a very conservative approach which acknowledges that writers might be looking for an id if they mention it in a r...
Parameter Description
1 $xref Cross referencer
=head2 reportEmptyTopics($xref)
Report empty topics
Parameter Description
1 $xref Cross referencer
=head2 reportDuplicateIds($xref)
Report duplicate ids
Parameter Description
1 $xref Cross referencer
=head2 reportDuplicateTopicIds($xref)
Report duplicate topic ids
Parameter Description
lib/Data/Edit/Xml/Xref.pm view on Meta::CPAN
18 L<createReportsInParallel2|/createReportsInParallel2> - Create reports in parallel that require fixed references
19 L<createRequiredCleanUps|/createRequiredCleanUps> - Required clean ups report
20 L<createSampleConRefMatching|/createSampleConRefMatching> - Create sample input files for matching conref source and targets
21 L<createSampleConRefs|/createSampleConRefs> - Create sample input files for fixing a conref
22 L<createSampleDuplicateMd5Sum|/createSampleDuplicateMd5Sum> - Create sample input files with duplicate md5 sums
23 L<createSampleImageReferences|/createSampleImageReferences> - Good and bad image references
24 L<createSampleImageTest|/createSampleImageTest> - Create sample input files for fixing bookmap reference to a topic that did not get cut into multiple pieces
25 L<createSampleInputFilesBaseCase|/createSampleInputFilesBaseCase> - Create sample input files for testing.
26 L<createSampleInputFilesFixFolder|/createSampleInputFilesFixFolder> - Create sample input files for testing fixFolder
27 L<createSampleInputFilesForFixDitaRefs|/createSampleInputFilesForFixDitaRefs> - Create sample input files for fixing renamed topic refs
28 L<createSampleInputFilesForFixDitaRefsImproved1|/createSampleInputFilesForFixDitaRefsImproved1> - Create sample input files for fixing references via the targets/ folder
29 L<createSampleInputFilesForFixDitaRefsImproved2|/createSampleInputFilesForFixDitaRefsImproved2> - Create sample input files for fixing conref references via the targets/ folder
30 L<createSampleInputFilesForFixDitaRefsImproved3|/createSampleInputFilesForFixDitaRefsImproved3> - Create sample input files for fixing bookmap references to topics that get cut into multiple pieces
31 L<createSampleInputFilesForFixDitaRefsImproved4|/createSampleInputFilesForFixDitaRefsImproved4> - Create sample input files for fixing bookmap reference to a topic that did not get cut into multiple pieces
32 L<createSampleInputFilesForFixDitaRefsXref|/createSampleInputFilesForFixDitaRefsXref> - Create sample input files for fixing references into renamed topics by xref
33 L<createSampleInputFilesLtGt|/createSampleInputFilesLtGt> - Create sample input files for testing items between < and >
34 L<createSampleOtherMeta|/createSampleOtherMeta> - Create sample data for othermeta reports
35 L<createSampleTopicsReferencedFromBookMaps|/createSampleTopicsReferencedFromBookMaps> - The number of times a topic is referenced from a bookmap
36 L<createSampleUnreferencedIds|/createSampleUnreferencedIds> - Create sample input files with unreferenced ids
37 L<createSoftConrefs|/createSoftConrefs> - Fix file part of conref even if the rest is invalid
38 L<createSubjectSchemeMap|/createSubjectSchemeMap> - Create a subject scheme map from othermeta
39 L<createTestOneNotRef|/createTestOneNotRef> - One topic refernced and the other not
40 L<createTestReferencedToFlattenedTopic|/createTestReferencedToFlattenedTopic> - Full reference to a topic that has been flattened
41 L<createTestReferenceToCutOutTopic|/createTestReferenceToCutOutTopic> - References from a topic that has been cut out to a topic that has been cut out
42 L<createTestTopicFlattening|/createTestTopicFlattening> - Create sample input files for testing topic flattening ratio reporting
43 L<createUrlTests|/createUrlTests> - Check urls
44 L<createWordsToFilesTest|/createWordsToFilesTest> - Index words to file
45 L<deleteVariableFields|/deleteVariableFields> - Remove time and other fields that do not affect the end results
46 L<editXml|/editXml> - Edit an xml file retaining any existing XML headers and lint trailers
47 L<externalReference|/externalReference> - Check for an external reference
48 L<fixFilesGB|/fixFilesGB> - Rename files to the L<GB Standard|http://metacpan.org/pod/Dita::GB::Standard>
49 L<fixingRun|/fixingRun> - A fixing run fixes problems where it can and thus induces changes which might make the updated output different from the incoming source.
50 L<fixOneFileGB|/fixOneFileGB> - Fix one file to the Gearhart-Brenan standard
51 L<fixReferences|/fixReferences> - Fix just the file containing references using a number of techniques and report those references that cannot be so fixed.
52 L<fixReferencesInOneFile|/fixReferencesInOneFile> - Fix one file by moving unresolved references to the xtrf attribute
53 L<fixReferencesParallel|/fixReferencesParallel> - Fix the references in one file
54 L<fixReferencesResults|/fixReferencesResults> - Consolidate the results of fixing references.
55 L<formatTables|/formatTables> - Using cross reference B<$xref> options and an array of arrays B<$data> format a report as a table using B<%options> as described in L<Data::Table::Text::formatTable> and L<Data::Table::Text::formatHtmlTable>.
56 L<hashOfCountsToArray|/hashOfCountsToArray> - Convert a B<$hash> of {key} = count to an array so it can be formatted with L<formatTables>
57 L<loadInputFiles|/loadInputFiles> - Load the names of the files to be processed
58 L<newXref|/newXref> - Create a new cross referencer
59 L<oneBadRef|/oneBadRef> - Check one reference and return the first error encountered or B<undef> if no errors encountered.
60 L<oxygenProjectFileMetaData|/oxygenProjectFileMetaData> - Meta data for the oxygen project files
61 L<removeUnusedIds|/removeUnusedIds> - Remove ids that do are not mentioned in any href or conref in the corpus regardless of the file component of any such reference.
62 L<reportAttributeCount|/reportAttributeCount> - Report attribute counts
63 L<reportAttributeNameAndValueCounts|/reportAttributeNameAndValueCounts> - Report attribute value counts
64 L<reportConRefMatching|/reportConRefMatching> - Report conref matching
65 L<reportDocTypeCount|/reportDocTypeCount> - Report doc type count
66 L<reportDuplicateIds|/reportDuplicateIds> - Report duplicate ids
67 L<reportDuplicateTopicIds|/reportDuplicateTopicIds> - Report duplicate topic ids
68 L<reportEmptyTopics|/reportEmptyTopics> - Report empty topics
69 L<reportExteriorMaps|/reportExteriorMaps> - Maps that are not referenced by any other map
70 L<reportExternalXrefs|/reportExternalXrefs> - Report external xrefs missing other attributes
71 L<reportFileExtensionCount|/reportFileExtensionCount> - Report file extension counts
72 L<reportFileTypes|/reportFileTypes> - Report file type counts - takes too long in series
73 L<reportFixRefs|/reportFixRefs> - Report of hrefs that need to be fixed
74 L<reportGuidHrefs|/reportGuidHrefs> - Report on guid hrefs
75 L<reportGuidsToFiles|/reportGuidsToFiles> - Map and report guids to files
76 L<reportHrefUrlEncoding|/reportHrefUrlEncoding> - href needs url encoding
77 L<reportIdRefs|/reportIdRefs> - Report the number of times each id is referenced
78 L<reportImages|/reportImages> - Reports on images and references to images
( run in 0.344 second using v1.01-cache-2.11-cpan-00829025b61 )