Data-Edit-Xml-Xref

 view release on metacpan or  search on metacpan

lib/Data/Edit/Xml/Xref.pm  view on Meta::CPAN

#!/usr/bin/perl -I/home/phil/perl/cpan/DataEditXml/lib/ -I/home/phil/perl/cpan/DataTableText/lib/ -I/home/phil/perl/cpan/DitaGBStandard/lib/
#-------------------------------------------------------------------------------
# Cross reference Dita XML, match topics and ameliorate missing references.
# Philip R Brenan at gmail dot com, Appa Apps Ltd Inc, 2016-2019
# Improvements and maxzoomin
#-------------------------------------------------------------------------------
# Check for image formats that will not display in a browser
# Do not consider companion files!
# Images that are referenced by topics which are not referenced by bookmaps showup as referenced
# It should be possible to remove reportImages by using generic references instead
# Conref processing in reportReferencesFromBookmaps
# Fix xref external/scope and eliminate error count if fixbadrefs in operation.
# Add labels to ditaRefs processing so that references to labels are also fixed
# Add xref expansion from id in file as it is a pain to code up the full details by hand
# Find topics that have no text in them per: PS2-617
# Need test for changeBadXrefToPh
# Unique target needs tests
# Create list of images found in input folder
# Conrefs report should use targets/ to update the conref file so conrefs fixed by fixDitaRefs are considered

package Data::Edit::Xml::Xref;
our $VERSION = 20200424;
use v5.26;
use warnings FATAL => qw(all);
use strict;
use Carp qw(confess cluck);
use Data::Dump qw(dump);
use Data::Edit::Xml;
use Data::Table::Text qw(:all);
use Dita::GB::Standard;
use Storable qw(store retrieve);
use Time::HiRes qw(time);
use utf8;

#sub improvementLength      {80}                                                 #P Maximum length of the test of an improvement suggestion
sub classificationMapSuffix{q(_classification.ditamap)}                         #P Suffix to add to map files to create corresponding classification map file

#D1 Cross reference                                                             # Check the cross references in a set of Dita files and report the results.

sub newXref(%)                                                                  #P Create a new cross referencer
 {my (%attributes) = @_;                                                        # Attributes

  my $xref = genHash(__PACKAGE__,                                               # Attributes used by the Xref cross referencer.
    addNavTitles                        => undef,                               #I If true, add navtitle to outgoing bookmap references to show the title of the target topic.
    allowUniquePartialMatches           => undef,                               # Allow unique partial matches - i.e ignore the stuff to the right of the # in a reference if doing so produces a unique result. This feature has been explicitly disabled...
    attributeCount                      => {},                                  # {file}{attribute name} == count of the different xml attributes found in the xml files.
    attributeNamesAndValuesCount        => {},                                  # {file}{attribute name}{value} = count
    author                              => {},                                  # {file} = author of this file.
    badGuidHrefs                        => {},                                  # Bad conrefs - all.
    badNavTitles                        => {},                                  # Details of nav titles that were not resolved
    badReferencesCount                  => 0,                                   # The number of bad references at the start of the run - however depending on what options were chosen Xref might ameliorate these bad references and thereby reduce this ...
    badTables                           => [],                                  # Array of tables that need fixing.
    badXml1                             => {},                                  # [Files] with a bad xml encoding header on the first line.
    badXml2                             => {},                                  # [Files] with a bad xml doc type on the second line.
    baseFiles                           => {},                                  # {base of file name}{full file name}++ Current location of the file via uniqueness guaranteed by the GB standard
    baseTag                             => {},                                  # Base Tag for each file
    bookMapRefs                         => {},                                  # {bookmap full file name}{href}{navTitle}++ References from bookmaps to topics via appendix, chapter, bookmapref.
    changeBadXrefToPh                   => undef,                               #I Change xrefs being placed in B<M3> by L<fixBadRefs> to B<ph>.
    classificationMaps                  => undef,                               #I Create classification maps if true
    conRefs                             => {},                                  # {file}{href}{tag}++ : conref source detail
    createReports1                      => [],                                  # Reports requested before references fixed
    createReports2                      => [],                                  # Reports requested after references fixed
    currentFolder                       => currentDirectory,                    # The current working folder used to make absolute file names from relative ones
    deleteUnusedIds                     => 0,                                   #I Delete ids (except on topics) that are not referenced in any reference in the corpus regardless of the file component of any such reference.
    deguidize                           => undef,                               #I Set true to replace guids in dita references with file name. Given reference B<g1#g2/id> convert B<g1> to a file name by locating the topic with topicId B<g2>.  This r...
    docType                             => {},                                  # {file} == docType:  the docType for each xml file.
    duplicateIds                        => {},                                  # [file, id]     Duplicate id definitions within each file.
    duplicateTopicIds                   => {},                                  # Duplicate topic ids
    duplicateTopicIds                   => {},                                  # [topicId, [files]] Files with duplicate topic ids - the id on the outermost tag.
    emptyTopics                         => {},                                  # {file} : topics where the *body is empty.
    errors                              => 0,                                   # Number of significant errors as reported in L<statusLine> or 0 if no such errors found
    exteriorMaps                        => {},                                  # {exterior map} : maps that are not referenced by another map
    fileExtensions                      => [qw(.dita .ditamap .xml .fodt)],     # Default file extensions to load
    fixBadRefs                          => undef,                               #I Fix any remaining bad references after any all allowed attempts have been made to fix failing references by moving the failing reference to the B<xtrf> attribute i.e. ...
    fixDitaRefs                         => undef,                               #I Fix references in a corpus of L<Dita> documents that have been converted to the L<GBStandard> and whose target structure has been written to the named folder.
    fixedFolder                         => undef,                               #I Fixed files are placed in this folder.
    fixedFolderTemp                     => undef,                               #I Fixed files are placed in this folder if we are on aws but nit the session leader - this folder is then copied back to L<fixedFolder> on the session leader.
    fixedRefsBad                        => [],                                  # [] hrefs and conrefs from L<fixRefs|/fixRefs> which were moved to the "xtrf" attribute as requested by the L<fixBadHrefs|/fixBadHrefs> attribute because the reference w...
    fixedRefsGB                         => [],                                  # [] files fixed to the Gearhart-Brenan file naming standard
    fixedRefsGood                       => [],                                  # [] hrefs and conrefs from L<fixRefs|/fixRefs> which were invalid but have been fixed by L<deguidizing|/deguidize> them to a valid file name.
    fixedRefsNoAction                   => [],                                  # [] hrefs and conrefs from L<fixRefs|/fixRefs> for which no action was taken.
    fixRefs                             => {},                                  # {file}{ref} where the href or conref target is not valid.
    fixRelocatedRefs                    => undef,                               #I Fix references to topics that have been moved around in the out folder structure assuming that all file names are unique which they will be if they have been renamed t...
    fixXrefsByTitle                     => undef,                               #I Try to fix invalid xrefs by the Gearhart Title Method enhanced by the Monroe map method if true
    flattenFiles                        => {},                                  # {old full file name} = file renamed to Gearhart-Brenan file naming standard
    flattenFolder                       => undef,                               #I Files are renamed to the Gearhart standard and placed in this folder if set.  References to the unflattened files are updated to references to the flattened files.  Th...
    getFileUrl => qq(/cgi-bin/uiSelfServiceXref/client.pl?getFile=),            #I A url to retrieve a specified file from the server running xref used in generating html reports. The complete url is obtained by appending the fully qualified file nam...
    goodImageFiles                      => {},                                  # {file}++ : number of references to each good image
    goodNavTitles                       => {},                                  # Details of nav titles that were resolved.
    guidHrefs                           => {},                                  # {file}{href} = location where href starts with GUID- and is thus probably a guid.
    guidToFile                          => {},                                  # {topic id which is a guid} = file defining topic id.
    hrefUrlEncoding                     => {},                                  # Hrefs that need url encoding because they contain white space.
    html                                => undef,                               #I Generate html version of reports in this folder if supplied
    idNotReferenced                     => {},                                  # {file}{id}++ - id in a file that is not referenced
    idReferencedCount                   => {},                                  # {file}{id}++ - the number of times this id in this file is referenced from the rest of the corpus
    ids                                 => {},                                  # {file}{id}   - id definitions across all files.
    idsRemoved                          => {},                                  # {id}++ : Ids removed from all files
    idTags                              => {},                                  # {file}{id}[tag] The tags associated with each id in a file - there might be more than one if the id is duplicated
    images                              => {},                                  # {file}{href}   Count of image references in each file.
    imagesReferencedFromBookMaps        => {},                                  # {bookmap full file name}{full name of image referenced from topic referenced from bookmap}++
    imagesReferencedFromTopics          => {},                                  # {topic full file name}{full name of image referenced from topic}++
    imagesToRefferingBookMaps           => {},                                  # {image full file name}{bookmap full file name}++ : images to referring bookmaps
    indexWords                          => undef,                               #I Index words to topics and topics to words if true.
    indexWordsFolder                    => undef,                               #I Folder into which to save words to topic and topics to word indexes if L<indexWords> is true.
    indexedWords                        => {},                                  # {word}{full file name of topic the words occurs in}.
    inputFiles                          => [],                                  # Input files from L<inputFolder|/inputFolder>.
    inputFileToTargetTopics             => {},                                  # {input file}{target file}++ : Tells us the topics an input file was split into
    inputFolderImages                   => {},                                  # {full image file name} for all files in input folder thus including any images resent
    inputFolder                         => undef,                               #I A folder containing the dita and ditamap files to be cross referenced.
    ltgt                                => {},                                  # {text between &lt; and &gt}{filename} = count giving the count of text items found between &lt; and &gt;
    matchTopics                         => undef,                               #I Match topics by title and by vocabulary to the specified confidence level between 0 and 1.  This operation might take some time to complete on a large corpus.
    maximumNumberOfProcesses            => numberOfCpus(8),                     #I Maximum number of processes to run in parallel at any one time with a sensible default.
    maxZoomIn                           => undef,                               #I Optional hash of names to regular expressions to look for in each file
    maxZoomOut                          => {},                                  # Results from L<maxZoomIn|/maxZoomIn>  where {file name}{regular expression key name in L<maxZoomIn|/maxZoomIn>}++
    md5Sum                              => {},                                  # MD5 sum for each input file.
    md5SumDuplicates                    => {},                                  # {md5sum}{file}++ : md5 sums with more than one file
    missingImageFiles                   => {},                                  # [file, href] == Missing images in each file.
    missingTopicIds                     => {},                                  # Missing topic ids.
    noHref                              => {},                                  # Tags that should have an href but do not have one.
    notReferenced                       => {},                                  # {file name} Files in input area that are not referenced by a conref, image, bookmapref or xref tag and are not a bookmap.
    olBody                              => {},                                  # The number of ol under body by file
    originalSourceFileAndIdToNewFile    => {},                                  # {original file}{id} = new file: Record mapping from original source file and id to the new file containing the id
    otherMeta                           => {},                                  # {original file}{othermeta name}{othermeta content}++ : the contents of the other meta tags
    otherMetaDuplicatesSeparately       => [],                                  # Duplicate othermeta in bookmaps and topics considered separately
    otherMetaDuplicatesCombined         => [],                                  # Duplicate othermeta in bookmaps with called topics othermeta included
    otherMetaRemainWithTopic            => [],                                  # Othermeta that must stay in the topic
    otherMetaPushToBookMap              => [],                                  # Othermeta that can be pushed to the calling book map
    otherMetaBookMapsBeforeTopicIncludes=> [],                                  # Bookmap othermeta before topic othermeta has been included
    otherMetaBookMapsAfterTopicIncludes => [],                                  # Bookmap othermeta after  topic othermeta has been included
    otherMetaConsolidated               => {},                                  # {Name}{Content}++ : consolidated other meta data across entire corpus
    oxygenProjects                      => undef,                               #I Create oxygen project files for each map - the project file will have an extension of .xpr and the same name and path as the map file or the name return by your implem...
    parseFailed                         => {},                                  # {file} files that failed to parse.
    publicId                            => {},                                  # {file} = Public id on Doctype
    references                          => {},                                  # {file}{reference}++ - the various references encountered
    relocatedReferencesFailed           => [],                                  # Failing references that were not fixed by relocation
    relocatedReferencesFixed            => [],                                  # Relocated references fixed
    requestAttributeNameAndValueCounts  => undef,                               #I Report attribute name and value counts
    requiredCleanUp                     => undef,                               # {full file name}{cleanup} = number of required-cleanups
    reports                             => undef,                               #I Reports folder: Xref will write text versions of the generated reports to files in this folder.
    results                             => [],                                  # Summary of results table.
#   sourceFile                          => undef,                               # The source file from whic#h this structure was generated.
    sourceTopicToTargetBookMap          => {},                                  # {input topic cut into multiple pieces} = output bookmap representing pieces
    statusLine                          => undef,                               # Status line summarizing the cross reference.
    statusTable                         => undef,                               # Status table summarizing the cross reference.
    subjectSchemeMap                    => undef,                               #I Create a subject scheme map in the named file
    suppressReferenceChecks             => undef,                               #I Suppress reference checking - which normally happens by default - but which takes time and might be irrelevant if an earlier xref has already checked all the reference...

lib/Data/Edit/Xml/Xref.pm  view on Meta::CPAN

  my $target = fpf($xref->flattenFolder, $xref->flattenFiles->{$file});         # Previously assigned GB name.  We cannot use the very latest name because other files have to be told about it and in changing them to reflect the latest name we would c...
  editXml($file, $target, -p $x);                                               # Edit xml

  \@r                                                                           # Return report of items fixed
 }

sub fixFilesGB($)                                                               #P Rename files to the L<GBStandard>
 {my ($xref) = @_;                                                              # Xref results
  my @files  = grep {!$xref->parseFailed->{$_}} sort @{$xref->inputFiles};      # Fix files that parsed if requested

  my @r;                                                                        # Fixes made
  processFilesInParallel
    sub                                                                         # Each file
     {my ($file) = @_;                                                          # File to analyze
      $xref->fixOneFileGB($file);                                               # Analyze one input file
     },
    sub {push @r, deSquareArray @_}, @files;                                    # Flatten results

  formatTables($xref, $xref->fixedRefsGB = \@r,                                 # Report results
    columns => <<END,
Href           The href being fixed
Source         The source file containing the href
END
    summarize=>1,
    title=>qq(Hrefs that can not be renamed to the Gearhart-Brenan file naming standard),
    head=><<END,
Xref failed to fix NNNN hrefs to the Gearhart-Brenan file naming standard
END
    file=>(my $f = fpe(qw(bad fixedRefsGB txt))));

   {fixedRefsGB => $xref->fixedRefsGB,                                          # From multiverse to universe
   }
 } # fixFilesGB

sub analyzeOneFileParallel($$)                                                  #P Analyze one input file
 {my ($Xref, $iFile) = @_;                                                      # Xref request, File to analyze

  my $xref = newXref();                                                         # Cross referencer for this file
#    $xref->sourceFile = $iFile;                                                # File analyzed
  my %maxZoomIn = $Xref->maxZoomIn ?  %{$Xref->maxZoomIn} : ();                 # Regular expressions from maxZoomIn to look for text
  my %maxZoomOut;                                                               # Text elements that match a maxZoomIn regular expression
  my %countAttrNames;                                                           # Attribute names
  my %countAttrNamesAndValues;                                                  # Attribute names and values
  my %countTagNames;                                                            # Tag names
  my $changes;                                                                  # Changes made to the file
  my $tags; my $texts;                                                          # Number of tags and text elements

  my $source = readFile($iFile);                                                # Source of file so we can gets its GB Standard name

  my $x = eval {Data::Edit::Xml::new($iFile)};                                  # Parse xml - at this point if the caller is interested in line numbers they should have added them.

  if ($@)                                                                       # Check we were able to parse the xml
   {$xref->parseFailed->{$iFile}++;
    return $xref;
   }

  my $md5 = $xref->md5Sum->{$iFile} = -M $x;                                    # Md5 sum for parse tree

  if ($Xref->flattenFolder)
   {$xref->flattenFiles->{$iFile} =                                             # Record correspondence between existing file and its GB Standard file name
      Dita::GB::Standard::gbStandardFileName($source, fe($iFile), md5=>$md5);
   }

  my $saveReference = sub                                                       # Save a reference so it can be integrity checked later
   {my ($ref) = @_;                                                             # Reference
    return if externalReference($ref);                                          # Looks like an external reference
    $xref->references->{$iFile}{$ref}++;                                        # Save reference
   };

  my $isADitaMap = $x->isADitaMap;                                              # Map

  $x->by(sub                                                                    # Each node
   {my ($o) = @_;

#    my $content = sub                                                          #P First few characters of content on one line to avoid triggering multi table layouts
#     {my ($o) = @_;                                                            # String
#      nws($o->stringContent, improvementLength);                               # Length of improvement
#     };

    my $tag = -t $o;                                                            # Element tag
    if ($tag eq q(CDATA)) {++$texts} else {++$tags}                             # Count texts and tags

    if (my $h = $o->href)                                                       # Check href
     {if ($h =~ m(\s)s and externalReference($h))                               # Check href for url encoding needed
       {$xref->{hrefUrlEncoding}{$iFile}{$o->lineLocation} = $h;
       }
      if ($xref->{deguidize} and $h =~ m(\bguid-)is)                            # Deguidizing a href that looks as if it might have a guid in it
       {$xref->{fixRefs}{$iFile}{$h}++
       }
      &$saveReference($h);
     }

    if (my $conref = $o->attr(q(conref)))                                       # Conref
     {my $saveConRef = sub                                                      # Save a conref
       {my ($conRef) = @_;                                                      # Conref
        $xref->{conRefs}{$iFile}{$conRef}{$tag}++;
       };

      &$saveConRef($conref);
      &$saveReference($conref);

      if (my $conref = $o->attr(q(conrefend)))                                  # Conref end
       {&$saveConRef($conref);
        &$saveReference($conref);
       }
     }

    if (my $i = $o->id)                                                         # Id definitions
     {$xref->{ids}{$iFile}{$i}++;
      push $xref->{idTags}{$iFile}{$i}->@*, $tag;                               # Tags for each id in the file
     }

    if ($tag eq q(xref))                                                        # Xrefs but not to the web
     {if (my $h = $o->href)
       {if (externalReference($h))                                              # Check attributes on external links
         {if ($o->attrX(q(scope)) !~ m(\Aexternal\Z)s)
           {$xref->{xrefBadScope}{$iFile}{$h} = -A $o;
           }
          if ($o->attrX(q(format)) !~ m(\Ahtml\Z)s)
           {$xref->{xrefBadFormat}{$iFile}{$h} = -A $o;
           }

lib/Data/Edit/Xml/Xref.pm  view on Meta::CPAN


=head1 Cross reference

Check the cross references in a set of Dita files and report the results.

=head2 xref(%attributes)

Check the cross references in a set of Dita files held in L<inputFolder|/inputFolder> and report the results in the L<reports|/reports> folder. The possible attributes are defined in L<Data::Edit::Xml::Xref|/Data::Edit::Xml::Xref>.

     Parameter    Description
  1  %attributes  Cross referencer attribute value pairs

B<Example:>


  lll "Test 011";
    clearFolder(tests, 111);
    createSampleInputFilesForFixDitaRefsImproved3(tests);

    my $y = 𝘅𝗿𝗲𝗳(inputFolder => out, reports => reportFolder);                    # Check results without fixes
    ok $y->statusLine eq q(Xref: 1 ref);

    my $x = 𝘅𝗿𝗲𝗳
     (inputFolder => out,
      reports     => reportFolder,
      fixBadRefs  => 1,
      fixDitaRefs => targets,
      fixedFolder => outFixed);

    ok !$x->errors;


=head1 Create test data

Create files to test the various capabilities provided by Xref


=head2 Data::Edit::Xml::Xref Definition


Attributes used by the Xref cross referencer.




=head3 Input fields


B<addNavTitles> - If true, add navtitle to outgoing bookmap references to show the title of the target topic.

B<changeBadXrefToPh> - Change xrefs being placed in B<M3> by L<fixBadRefs> to B<ph>.

B<classificationMaps> - Create classification maps if true

B<deguidize> - Set true to replace guids in dita references with file name. Given reference B<g1#g2/id> convert B<g1> to a file name by locating the topic with topicId B<g2>.  This requires the guids to be genuinely unique. SDL guids are thought to b...

B<deleteUnusedIds> - Delete ids (except on topics) that are not referenced in any reference in the corpus regardless of the file component of any such reference.

B<fixBadRefs> - Fix any remaining bad references after any all allowed attempts have been made to fix failing references by moving the failing reference to the B<xtrf> attribute i.e. placing it in B<M3> possibly renaming the tag to B<ph> if L<changeB...

B<fixDitaRefs> - Fix references in a corpus of L<Dita|http://docs.oasis-open.org/dita/dita/v1.3/os/part2-tech-content/dita-v1.3-os-part2-tech-content.html> documents that have been converted to the L<GB Standard|http://metacpan.org/pod/Dita::GB::Stan...

B<fixRelocatedRefs> - Fix references to topics that have been moved around in the out folder structure assuming that all file names are unique which they will be if they have been renamed to the GB Standard.

B<fixXrefsByTitle> - Try to fix invalid xrefs by the Gearhart Title Method enhanced by the Monroe map method if true

B<fixedFolder> - Fixed files are placed in this folder.

B<fixedFolderTemp> - Fixed files are placed in this folder if we are on aws but nit the session leader - this folder is then copied back to L<fixedFolder> on the session leader.

B<flattenFolder> - Files are renamed to the Gearhart standard and placed in this folder if set.  References to the unflattened files are updated to references to the flattened files.  This option will eventually be deprecated as the Dita::GB::Standar...

B<getFileUrl> - A url to retrieve a specified file from the server running xref used in generating html reports. The complete url is obtained by appending the fully qualified file name to this value.

B<html> - Generate html version of reports in this folder if supplied

B<indexWords> - Index words to topics and topics to words if true.

B<indexWordsFolder> - Folder into which to save words to topic and topics to word indexes if L<indexWords> is true.

B<inputFolder> - A folder containing the dita and ditamap files to be cross referenced.

B<matchTopics> - Match topics by title and by vocabulary to the specified confidence level between 0 and 1.  This operation might take some time to complete on a large corpus.

B<maxZoomIn> - Optional hash of names to regular expressions to look for in each file

B<maximumNumberOfProcesses> - Maximum number of processes to run in parallel at any one time with a sensible default.

B<oxygenProjects> - Create oxygen project files for each map - the project file will have an extension of .xpr and the same name and path as the map file or the name return by your implementation of: Data::Edit::Xml::Xref::xprName($map) if present.

B<reports> - Reports folder: Xref will write text versions of the generated reports to files in this folder.

B<requestAttributeNameAndValueCounts> - Report attribute name and value counts

B<subjectSchemeMap> - Create a subject scheme map in the named file

B<suppressReferenceChecks> - Suppress reference checking - which normally happens by default - but which takes time and might be irrelevant if an earlier xref has already checked all the references.

B<validateUrls> - Validate urls if true by fetching their headers with L<curl|https://linux.die.net/man/1/curl>



=head3 Output fields


B<allowUniquePartialMatches> - Allow unique partial matches - i.e ignore the stuff to the right of the # in a reference if doing so produces a unique result. This feature has been explicitly disabled for conrefs (PS2-561) and might need to be disable...

B<attributeCount> - {file}{attribute name} == count of the different xml attributes found in the xml files.

B<attributeNamesAndValuesCount> - {file}{attribute name}{value} = count

B<author> - {file} = author of this file.

B<badGuidHrefs> - Bad conrefs - all.

B<badNavTitles> - Details of nav titles that were not resolved

B<badReferencesCount> - The number of bad references at the start of the run - however depending on what options were chosen Xref might ameliorate these bad references and thereby reduce this count.

B<badTables> - Array of tables that need fixing.

B<badXml1> - [Files] with a bad xml encoding header on the first line.

B<badXml2> - [Files] with a bad xml doc type on the second line.

B<baseFiles> - {base of file name}{full file name}++ Current location of the file via uniqueness guaranteed by the GB standard

B<baseTag> - Base Tag for each file

B<bookMapRefs> - {bookmap full file name}{href}{navTitle}++ References from bookmaps to topics via appendix, chapter, bookmapref.

lib/Data/Edit/Xml/Xref.pm  view on Meta::CPAN

     Parameter  Description
  1  $hash      Hash to be converted

=head2 reportGuidsToFiles($xref)

Map and report guids to files

     Parameter  Description
  1  $xref      Xref results

=head2 editXml($in, $out, $source)

Edit an xml file retaining any existing XML headers and lint trailers

     Parameter  Description
  1  $in        Input file
  2  $out       Output file
  3  $source    Source to write

=head2 fixReferencesInOneFile($xref, $sourceFile)

Fix one file by moving unresolved references to the xtrf attribute

     Parameter    Description
  1  $xref        Xref results
  2  $sourceFile  Source file to fix

=head2 fixReferencesParallel($xref, $file)

Fix the references in one file

     Parameter  Description
  1  $xref      Cross referencer
  2  $file      File to fix

=head2 fixReferencesResults($xref, @results)

Consolidate the results of fixing references.

     Parameter  Description
  1  $xref      Cross referencer
  2  @results   Results from fixReferencesInParallel

=head2 fixReferences($xref)

Fix just the file containing references using a number of techniques and report those references that cannot be so fixed.

     Parameter  Description
  1  $xref      Xref results

=head2 fixOneFileGB($xref, $file)

Fix one file to the Gearhart-Brenan standard

     Parameter  Description
  1  $xref      Xref results
  2  $file      File to fix

=head2 fixFilesGB($xref)

Rename files to the L<GB Standard|http://metacpan.org/pod/Dita::GB::Standard>

     Parameter  Description
  1  $xref      Xref results

=head2 analyzeOneFileParallel($Xref, $iFile)

Analyze one input file

     Parameter  Description
  1  $Xref      Xref request
  2  $iFile     File to analyze

=head2 analyzeOneFileResults($xref, @x)

Merge a list of cross reference results into the first cross referencer in the list

     Parameter  Description
  1  $xref      Cross referencer to merge into
  2  @x         Other cross referencers

=head2 analyzeInputFiles($xref)

Analyze the input files

     Parameter  Description
  1  $xref      Cross referencer

=head2 reportIdRefs($xref)

Report the number of times each id is referenced

     Parameter  Description
  1  $xref      Cross referencer

=head2 removeUnusedIds($xref)

Remove ids that do are not mentioned in any href or conref in the corpus regardless of the file component of any such reference. This is a very conservative approach which acknowledges that writers might be looking for an id if they mention it in a r...

     Parameter  Description
  1  $xref      Cross referencer

=head2 reportEmptyTopics($xref)

Report empty topics

     Parameter  Description
  1  $xref      Cross referencer

=head2 reportDuplicateIds($xref)

Report duplicate ids

     Parameter  Description
  1  $xref      Cross referencer

=head2 reportDuplicateTopicIds($xref)

Report duplicate topic ids

     Parameter  Description

lib/Data/Edit/Xml/Xref.pm  view on Meta::CPAN

18 L<createReportsInParallel2|/createReportsInParallel2> - Create reports in parallel that        require fixed references

19 L<createRequiredCleanUps|/createRequiredCleanUps> - Required clean ups report

20 L<createSampleConRefMatching|/createSampleConRefMatching> - Create sample input files for matching conref source and targets

21 L<createSampleConRefs|/createSampleConRefs> - Create sample input files for fixing a conref

22 L<createSampleDuplicateMd5Sum|/createSampleDuplicateMd5Sum> - Create sample input files with duplicate md5 sums

23 L<createSampleImageReferences|/createSampleImageReferences> - Good and bad image references

24 L<createSampleImageTest|/createSampleImageTest> - Create sample input files for fixing bookmap reference to a topic that did not get cut into  multiple pieces

25 L<createSampleInputFilesBaseCase|/createSampleInputFilesBaseCase> - Create sample input files for testing.

26 L<createSampleInputFilesFixFolder|/createSampleInputFilesFixFolder> - Create sample input files for testing fixFolder

27 L<createSampleInputFilesForFixDitaRefs|/createSampleInputFilesForFixDitaRefs> - Create sample input files for fixing renamed topic refs

28 L<createSampleInputFilesForFixDitaRefsImproved1|/createSampleInputFilesForFixDitaRefsImproved1> - Create sample input files for fixing references via the targets/ folder

29 L<createSampleInputFilesForFixDitaRefsImproved2|/createSampleInputFilesForFixDitaRefsImproved2> - Create sample input files for fixing conref references via the targets/ folder

30 L<createSampleInputFilesForFixDitaRefsImproved3|/createSampleInputFilesForFixDitaRefsImproved3> - Create sample input files for fixing bookmap references to topics that get cut into multiple pieces

31 L<createSampleInputFilesForFixDitaRefsImproved4|/createSampleInputFilesForFixDitaRefsImproved4> - Create sample input files for fixing bookmap reference to a topic that did not get cut into  multiple pieces

32 L<createSampleInputFilesForFixDitaRefsXref|/createSampleInputFilesForFixDitaRefsXref> - Create sample input files for fixing references into renamed topics by xref

33 L<createSampleInputFilesLtGt|/createSampleInputFilesLtGt> - Create sample input files for testing items between &lt; and &gt;

34 L<createSampleOtherMeta|/createSampleOtherMeta> - Create sample data for othermeta reports

35 L<createSampleTopicsReferencedFromBookMaps|/createSampleTopicsReferencedFromBookMaps> - The number of times a topic is referenced from a bookmap

36 L<createSampleUnreferencedIds|/createSampleUnreferencedIds> - Create sample input files with unreferenced ids

37 L<createSoftConrefs|/createSoftConrefs> - Fix file part of conref even if the rest is invalid

38 L<createSubjectSchemeMap|/createSubjectSchemeMap> - Create a subject scheme map from othermeta

39 L<createTestOneNotRef|/createTestOneNotRef> - One topic refernced and the other not

40 L<createTestReferencedToFlattenedTopic|/createTestReferencedToFlattenedTopic> - Full reference to a topic that has been flattened

41 L<createTestReferenceToCutOutTopic|/createTestReferenceToCutOutTopic> - References from a topic that has been cut out to a topic that has been cut out

42 L<createTestTopicFlattening|/createTestTopicFlattening> - Create sample input files for testing topic flattening ratio reporting

43 L<createUrlTests|/createUrlTests> - Check urls

44 L<createWordsToFilesTest|/createWordsToFilesTest> - Index words to file

45 L<deleteVariableFields|/deleteVariableFields> - Remove time and other fields that do not affect the end results

46 L<editXml|/editXml> - Edit an xml file retaining any existing XML headers and lint trailers

47 L<externalReference|/externalReference> - Check for an external reference

48 L<fixFilesGB|/fixFilesGB> - Rename files to the L<GB Standard|http://metacpan.org/pod/Dita::GB::Standard>

49 L<fixingRun|/fixingRun> - A fixing run fixes problems where it can and thus induces changes which might make the updated output different from the incoming source.

50 L<fixOneFileGB|/fixOneFileGB> - Fix one file to the Gearhart-Brenan standard

51 L<fixReferences|/fixReferences> - Fix just the file containing references using a number of techniques and report those references that cannot be so fixed.

52 L<fixReferencesInOneFile|/fixReferencesInOneFile> - Fix one file by moving unresolved references to the xtrf attribute

53 L<fixReferencesParallel|/fixReferencesParallel> - Fix the references in one file

54 L<fixReferencesResults|/fixReferencesResults> - Consolidate the results of fixing references.

55 L<formatTables|/formatTables> - Using cross reference B<$xref> options and an array of arrays B<$data> format a report as a table using B<%options> as described in L<Data::Table::Text::formatTable> and L<Data::Table::Text::formatHtmlTable>.

56 L<hashOfCountsToArray|/hashOfCountsToArray> - Convert a B<$hash> of {key} = count to an array so it can be formatted with L<formatTables>

57 L<loadInputFiles|/loadInputFiles> - Load the names of the files to be processed

58 L<newXref|/newXref> - Create a new cross referencer

59 L<oneBadRef|/oneBadRef> - Check one reference and return the first error encountered or B<undef> if no errors encountered.

60 L<oxygenProjectFileMetaData|/oxygenProjectFileMetaData> - Meta data for the oxygen project files

61 L<removeUnusedIds|/removeUnusedIds> - Remove ids that do are not mentioned in any href or conref in the corpus regardless of the file component of any such reference.

62 L<reportAttributeCount|/reportAttributeCount> - Report attribute counts

63 L<reportAttributeNameAndValueCounts|/reportAttributeNameAndValueCounts> - Report attribute value counts

64 L<reportConRefMatching|/reportConRefMatching> - Report conref matching

65 L<reportDocTypeCount|/reportDocTypeCount> - Report doc type count

66 L<reportDuplicateIds|/reportDuplicateIds> - Report duplicate ids

67 L<reportDuplicateTopicIds|/reportDuplicateTopicIds> - Report duplicate topic ids

68 L<reportEmptyTopics|/reportEmptyTopics> - Report empty topics

69 L<reportExteriorMaps|/reportExteriorMaps> - Maps that are not referenced by any other map

70 L<reportExternalXrefs|/reportExternalXrefs> - Report external xrefs missing other attributes

71 L<reportFileExtensionCount|/reportFileExtensionCount> - Report file extension counts

72 L<reportFileTypes|/reportFileTypes> - Report file type counts - takes too long in series

73 L<reportFixRefs|/reportFixRefs> - Report of hrefs that need to be fixed

74 L<reportGuidHrefs|/reportGuidHrefs> - Report on guid hrefs

75 L<reportGuidsToFiles|/reportGuidsToFiles> - Map and report guids to files

76 L<reportHrefUrlEncoding|/reportHrefUrlEncoding> - href needs url encoding

77 L<reportIdRefs|/reportIdRefs> - Report the number of times each id is referenced

78 L<reportImages|/reportImages> - Reports on images and references to images



( run in 0.344 second using v1.01-cache-2.11-cpan-00829025b61 )