Data-Edit-Xml-Xref

 view release on metacpan or  search on metacpan

lib/Data/Edit/Xml/Xref.pm  view on Meta::CPAN

    fixDitaRefs                         => undef,                               #I Fix references in a corpus of L<Dita> documents that have been converted to the L<GBStandard> and whose target structure has been written to the named folder.
    fixedFolder                         => undef,                               #I Fixed files are placed in this folder.
    fixedFolderTemp                     => undef,                               #I Fixed files are placed in this folder if we are on aws but nit the session leader - this folder is then copied back to L<fixedFolder> on the session leader.
    fixedRefsBad                        => [],                                  # [] hrefs and conrefs from L<fixRefs|/fixRefs> which were moved to the "xtrf" attribute as requested by the L<fixBadHrefs|/fixBadHrefs> attribute because the reference w...
    fixedRefsGB                         => [],                                  # [] files fixed to the Gearhart-Brenan file naming standard
    fixedRefsGood                       => [],                                  # [] hrefs and conrefs from L<fixRefs|/fixRefs> which were invalid but have been fixed by L<deguidizing|/deguidize> them to a valid file name.
    fixedRefsNoAction                   => [],                                  # [] hrefs and conrefs from L<fixRefs|/fixRefs> for which no action was taken.
    fixRefs                             => {},                                  # {file}{ref} where the href or conref target is not valid.
    fixRelocatedRefs                    => undef,                               #I Fix references to topics that have been moved around in the out folder structure assuming that all file names are unique which they will be if they have been renamed t...
    fixXrefsByTitle                     => undef,                               #I Try to fix invalid xrefs by the Gearhart Title Method enhanced by the Monroe map method if true
    flattenFiles                        => {},                                  # {old full file name} = file renamed to Gearhart-Brenan file naming standard
    flattenFolder                       => undef,                               #I Files are renamed to the Gearhart standard and placed in this folder if set.  References to the unflattened files are updated to references to the flattened files.  Th...
    getFileUrl => qq(/cgi-bin/uiSelfServiceXref/client.pl?getFile=),            #I A url to retrieve a specified file from the server running xref used in generating html reports. The complete url is obtained by appending the fully qualified file nam...
    goodImageFiles                      => {},                                  # {file}++ : number of references to each good image
    goodNavTitles                       => {},                                  # Details of nav titles that were resolved.
    guidHrefs                           => {},                                  # {file}{href} = location where href starts with GUID- and is thus probably a guid.
    guidToFile                          => {},                                  # {topic id which is a guid} = file defining topic id.
    hrefUrlEncoding                     => {},                                  # Hrefs that need url encoding because they contain white space.
    html                                => undef,                               #I Generate html version of reports in this folder if supplied
    idNotReferenced                     => {},                                  # {file}{id}++ - id in a file that is not referenced
    idReferencedCount                   => {},                                  # {file}{id}++ - the number of times this id in this file is referenced from the rest of the corpus
    ids                                 => {},                                  # {file}{id}   - id definitions across all files.
    idsRemoved                          => {},                                  # {id}++ : Ids removed from all files
    idTags                              => {},                                  # {file}{id}[tag] The tags associated with each id in a file - there might be more than one if the id is duplicated
    images                              => {},                                  # {file}{href}   Count of image references in each file.
    imagesReferencedFromBookMaps        => {},                                  # {bookmap full file name}{full name of image referenced from topic referenced from bookmap}++
    imagesReferencedFromTopics          => {},                                  # {topic full file name}{full name of image referenced from topic}++
    imagesToRefferingBookMaps           => {},                                  # {image full file name}{bookmap full file name}++ : images to referring bookmaps
    indexWords                          => undef,                               #I Index words to topics and topics to words if true.
    indexWordsFolder                    => undef,                               #I Folder into which to save words to topic and topics to word indexes if L<indexWords> is true.
    indexedWords                        => {},                                  # {word}{full file name of topic the words occurs in}.
    inputFiles                          => [],                                  # Input files from L<inputFolder|/inputFolder>.
    inputFileToTargetTopics             => {},                                  # {input file}{target file}++ : Tells us the topics an input file was split into
    inputFolderImages                   => {},                                  # {full image file name} for all files in input folder thus including any images resent
    inputFolder                         => undef,                               #I A folder containing the dita and ditamap files to be cross referenced.
    ltgt                                => {},                                  # {text between &lt; and &gt}{filename} = count giving the count of text items found between &lt; and &gt;
    matchTopics                         => undef,                               #I Match topics by title and by vocabulary to the specified confidence level between 0 and 1.  This operation might take some time to complete on a large corpus.
    maximumNumberOfProcesses            => numberOfCpus(8),                     #I Maximum number of processes to run in parallel at any one time with a sensible default.
    maxZoomIn                           => undef,                               #I Optional hash of names to regular expressions to look for in each file
    maxZoomOut                          => {},                                  # Results from L<maxZoomIn|/maxZoomIn>  where {file name}{regular expression key name in L<maxZoomIn|/maxZoomIn>}++
    md5Sum                              => {},                                  # MD5 sum for each input file.
    md5SumDuplicates                    => {},                                  # {md5sum}{file}++ : md5 sums with more than one file
    missingImageFiles                   => {},                                  # [file, href] == Missing images in each file.
    missingTopicIds                     => {},                                  # Missing topic ids.
    noHref                              => {},                                  # Tags that should have an href but do not have one.
    notReferenced                       => {},                                  # {file name} Files in input area that are not referenced by a conref, image, bookmapref or xref tag and are not a bookmap.
    olBody                              => {},                                  # The number of ol under body by file
    originalSourceFileAndIdToNewFile    => {},                                  # {original file}{id} = new file: Record mapping from original source file and id to the new file containing the id
    otherMeta                           => {},                                  # {original file}{othermeta name}{othermeta content}++ : the contents of the other meta tags
    otherMetaDuplicatesSeparately       => [],                                  # Duplicate othermeta in bookmaps and topics considered separately
    otherMetaDuplicatesCombined         => [],                                  # Duplicate othermeta in bookmaps with called topics othermeta included
    otherMetaRemainWithTopic            => [],                                  # Othermeta that must stay in the topic
    otherMetaPushToBookMap              => [],                                  # Othermeta that can be pushed to the calling book map
    otherMetaBookMapsBeforeTopicIncludes=> [],                                  # Bookmap othermeta before topic othermeta has been included
    otherMetaBookMapsAfterTopicIncludes => [],                                  # Bookmap othermeta after  topic othermeta has been included
    otherMetaConsolidated               => {},                                  # {Name}{Content}++ : consolidated other meta data across entire corpus
    oxygenProjects                      => undef,                               #I Create oxygen project files for each map - the project file will have an extension of .xpr and the same name and path as the map file or the name return by your implem...
    parseFailed                         => {},                                  # {file} files that failed to parse.
    publicId                            => {},                                  # {file} = Public id on Doctype
    references                          => {},                                  # {file}{reference}++ - the various references encountered
    relocatedReferencesFailed           => [],                                  # Failing references that were not fixed by relocation
    relocatedReferencesFixed            => [],                                  # Relocated references fixed
    requestAttributeNameAndValueCounts  => undef,                               #I Report attribute name and value counts
    requiredCleanUp                     => undef,                               # {full file name}{cleanup} = number of required-cleanups
    reports                             => undef,                               #I Reports folder: Xref will write text versions of the generated reports to files in this folder.
    results                             => [],                                  # Summary of results table.
#   sourceFile                          => undef,                               # The source file from whic#h this structure was generated.
    sourceTopicToTargetBookMap          => {},                                  # {input topic cut into multiple pieces} = output bookmap representing pieces
    statusLine                          => undef,                               # Status line summarizing the cross reference.
    statusTable                         => undef,                               # Status table summarizing the cross reference.
    subjectSchemeMap                    => undef,                               #I Create a subject scheme map in the named file
    suppressReferenceChecks             => undef,                               #I Suppress reference checking - which normally happens by default - but which takes time and might be irrelevant if an earlier xref has already checked all the reference...
    tableDimensions                     => {},                                  # {file}{columns}{rows} == count
    tagCount                            => {},                                  # {file}{tags} == count of the different tag names found in the xml files.
    tagsTextsRatio                      => undef,                               # Ratio of tags to text encountered
    tags                                => undef,                               # Number of tags encountered
    targetFolderContent                 => {},                                  # {file} = bookmap file name : the target folder content which shows us where an input file went
    targetTopicToInputFiles             => {},                                  # {current file} = the source file from which the current file was obtained
    texts                               => undef,                               # Number of texts encountered
    timeEnded                           => undef,                               # Time the run ended
    timeStart                           => undef,                               # Time the run started
    title                               => {},                                  # {full file name} = title of file.
    titleToFile                         => {},                                  # {title}{file}++ if L<fixXrefsByTitle> is in effect
    topicFlatteningFactor               => {},                                  # Topic flattening factor - higher is better
    topicFlattening                     => {},                                  # {topic}{sources}++ : the source files for each topic that was flattened
    topicIds                            => {},                                  # {file} = topic id - the id on the outermost tag.
    topicsFlattened                     => undef,                               # Number of topics flattened
    topicsNotReferencedFromBookMaps     => {},                                  # {topic file not referenced from any bookmap} = 1
    topicsReferencedFromBookMaps        => {},                                  # {bookmap full file name}{topic full file name}++ : bookmaps to topics
    topicsToReferringBookMaps           => {},                                  # {topic full file name}{bookmap full file name}++ : topics to referring bookmaps
    urls                                => {},                                  # {topic full file name}{url}++ : urls found in each file
    urlsBad                             => {},                                  # {url}{topic full file name}++ : failing urls found in each file
    urlsGood                            => {},                                  # {url}{topic full file name}++ : passing urls found in each file
    validateUrls                        => undef,                               #I Validate urls if true by fetching their headers with L<curl>
    validationErrors                    => {},                                  # True means that Lint detected errors in the xml contained in the file.
    vocabulary                          => {},                                  # The text of each topic shorn of attributes for vocabulary comparison.
    xrefBadFormat                       => {},                                  # External xrefs with no format=html.
    xrefBadScope                        => {},                                  # External xrefs with no scope=external.
    xRefs                               => {},                                  # {file}{href}++ Xrefs references.
    xrefsFixedByTitle                   => [],                                  # Xrefs fixed by locating a matching topic title from their text content.
   );

  loadHash($xref, @_);                                                          # Load attributes complaining about any invalid ones
 } # newXref

sub xref2(%)                                                                    #P Check the cross references in a set of Dita files held in  L<inputFolder|/inputFolder> and report the results in the L<reports|/reports> folder. The possible attribute...
 {my (%attributes) = @_;                                                        # Attributes of cross referencer
  my ($xref) = newXref(@_);                                                     # Cross referencer
  $xref->timeStart = time;                                                      # Start time

  $xref->inputFolder or confess "Please supply a value for: inputFolder";
  $xref->inputFolder =~ s(\/+\Z) (\/)gs;                                        # Cleanup path names
  $xref->inputFolder =                                                          # Make input folder absolute
    absFromAbsPlusRel($xref->currentFolder, $xref->inputFolder)
    if $xref->inputFolder !~ m(\A/);

  $xref->reports or confess "Please supply a value for: reports";

  if (1)                                                                        # Write title and some of the parameters
   {my $r = $xref->reports;
    owf(fpe($r, qw(xref_parameter_settings txt)), dump($xref)) if $r;           # Print all parameters

lib/Data/Edit/Xml/Xref.pm  view on Meta::CPAN

 {my ($xref) = @_;                                                              # Xref results
  my @r;
  if (my $xrefTopicIds = $xref->topicIds)
   {for   my $file(sort keys %{$xrefTopicIds})                                  # Each input file which will be absolute
     {if (my $topicId = $xrefTopicIds->{$file})                                 # Topic Id for file - we report missing topicIds in: reportDuplicateTopicIds
       {next unless $topicId =~ m(\AGUID-)is;
        $xref->guidToFile->{$topicId} = $file;                                  # Guid Topic Id to file
        push @r, [$topicId, $file];
       }
     }
   }

  formatTables($xref, \@r,
    columns  => <<END,
Guid The guid being defined
File The file that defines the guid
END
    title    =>qq(Guid topic definitions),
    head     =>qq(Xref found NNNN guid topic definitions on DDDD),
    summarize=>1,
    file     =>fpe(q(lists), qw(guidsToFiles txt)));
 }

sub editXml($$$)                                                                #P Edit an xml file retaining any existing XML headers and lint trailers
 {my ($in, $out, $source) = @_;                                                 # Input file, output file, source to write

  my @s = readFile($in);                                                        # Read existing source

  my @h;                                                                        # Headers if any present
  if (@s > 0)                                                                   # Remove header lines using a very basic parse that is not a general solution
   {if ($s[0] =~ m(\A\<\?xml)is)                                                # First line
     {push @h, shift @s;
      if (@s > 0 and $s[0] =~ m(\A<!DOCTYPE)s)                                  # Second line start
       {push @h, shift @s;
        while(@s > 0 and $s[0] !~ m(\A\s*<[a-z])i)                              # Parse to root tag
         {push @h, shift @s;
         }
       }
     }
   }

  my @l;                                                                        # Lint data if any
  if (1)
   {my $state;
    for my $s(@s)
     {if (!$state && $s =~ m(\A\<\!\-\-linted\:)s or $state)
       {push @l, $s;
        $state++;
       }
     }
   }

  owf($out, join '', @h, $source, @l)                                           # Insert new source between old headers and trailers
 }

# Fix a file by moving its hrefs and conrefs to the xtrf attribute unless
# deguidization is in effect and the guid can be converted into a valid Dita
# reference accessing a file in the input corpus.
#
# If fixRelocatedRefs is in effect: such references are fixed by assuming that
# the files mentioned in broken links have been relocated else where in the
# folder structure and can be located by base file name alone.
#
# If fixXrefsByTitle is in effect apply the Gearhart Title Method: fix broken
# xrefs by looking for a unique topic with the same title text as the content of
# the xref.
#
# If fixDitaRefs is in effect we are converting Dita to Dita: relink Dita
# references that were valid in the input corpus to make them valid again in the
# output corpus even after files have been cut out and renamed to the GB Standard.
# The targets/ folder provides the mapping between the input and output corpii.

sub fixReferencesInOneFile($$)                                                  #P Fix one file by moving unresolved references to the xtrf attribute
 {my ($xref, $sourceFile) = @_;                                                 # Xref results, source file to fix
  my $fixed = newXref();                                                        # Fix results
  my $node;                                                                     # The current node we are working with
  my $attr;                                                                     # The current attribute we are working with
  my $ref;                                                                      # The current reference we are working with
  my @bad;  $fixed->fixedRefsBad  = \@bad;                                      # Hrefs that could not be fixed and so were ameliorated by moving them to @xtrf
  my @good; $fixed->fixedRefsGood = \@good;                                     # Hrefs that were fixed by resolving a Guid

  my $refDetails = sub                                                          # Save details of a reference
   {my ($r) = @_;
    my $s = $xref->targetTopicToInputFiles->{$sourceFile};                      # The source file(s) from which each target was obtained
    [$r, $node->tag, $attr, $ref, $sourceFile, sort keys %$s]                   # Construct reference details
   };

  my $bad = sub                                                                 # Save details of a bad reference
   {my ($r) = @_;
    push @bad, my $R = &$refDetails($r);
    $R
   };

  my $good = sub                                                                # Save details of a good reference
   {my ($target, $r) = @_;                                                      # Target file, reason
    my $R = &$refDetails($r);
    push @good, [@$R[0..3], $target, @$R[4..$#$R]];                             # Insert target at correct location
    $R
   };

  my $fixXrefByTitle = sub                                                      # Attempt to fix an xref by using its text content to search for a matching title
   {return undef unless -t $node eq q(xref);                                    # Only works for xrefs

    my $xTitle      = nws($node->stringContent);                                # Normalized title from xref node
    if (my $topics  = $xref->titleToFile->{$xTitle})                            # Find the topics that match the title text content
     {my $N         = keys %$topics;                                            # Matching topics

      if ($N == 1)                                                              # Unique matching topic - the original Gearhart Title Method
       {my ($path)  = keys %$topics;
        my $rel     = relFromAbsAgainstAbs($path, $sourceFile);                 # Relative file name
        $node->href = $rel;                                                     # Update xref
        return &$good($path, q(Fixed by Gearhart Title Method));                # Report the fix made
       }
      elsif ($N > 1)                                                            # Multiple matches
       {if (my $l = fileLargestSize(sort keys %$topics))                        # Boldly choose the topic with the largest size to resolve the ambiguity on the basis that it is probably the most interesting
         {my $rel = relFromAbsAgainstAbs($l, $sourceFile);                      # File name of target topic relative to source file
          $node->href = $rel;                                                   # Update reference
          return &$good($l, q(Fixed by Gearhart Bold Title Method));            # Report the fix made
         }
       }
     }

lib/Data/Edit/Xml/Xref.pm  view on Meta::CPAN

     }
    else
     {lll "No source for $sourceFile\n";
     }

    undef                                                                       # Failed
   };

  my $checkImageRef = sub                                                       # Check whether an image exists or not
   {my $i = absFromAbsPlusRel($sourceFile, $ref);                               # Local file name
    return 1 if -e $i;                                                          # Local file exists
    return 2 if -e wwwDecode($i);                                               # Local file exists
    undef                                                                       # Local file exists after decoding % signs
   };

  my $fixOnePartialDitaRef = sub                                                # Fix a partial dita reference to an externally cut out topic renamed to the GB Standard where such a reference is just a file name as used in a bookmapref.
   {my ($ref) = @_;                                                             # Partial reference
    return undef unless $xref->fixDitaRefs;                                     # Fixing dita references not requested

    my $topicSource = &$locateUniqueTopicSourceForTargetFile($sourceFile);      # Unique source file corresponding to the target file else undef
    return undef unless $topicSource;                                           # The references can not be resolved without a unique source file.

    my $refIn = absFromAbsPlusRel($topicSource, $ref);                          # The referenced input file that was present in the input being transformed because we assume that (most of) the input Dita refs were valid

    if (my $new = $xref->inputFileToTargetTopics->{$refIn})                     # The target files new files that were cut out of the referenced input file - there might several such
     {if (my $referencedTarget = fileLargestSize(sort keys %$new))              # Boldly assume that the largest possible target is the one we want
       {my $link = relFromAbsAgainstAbs($referencedTarget, $sourceFile);        # Create relative link from book map
        $node->set($attr=>$link);# if $xref->fixBadRefs;                        # Reset reference - we know fixDitaRefs is true.
        &$good($link, q(unique target));                                        # Record successful fix
        return 1;                                                               # Success
       }
     }
    undef                                                                       # Failed
   };

  my $fixOneFullDitaRef = sub                                                   # Fix a full dita reference to an externally cut out topic renamed to the GB Standard where such a reference is: file#topicId/label
   {return undef unless $xref->fixDitaRefs;                                     # Fixing dita references not requested
    return &$fixOnePartialDitaRef($ref) unless $ref =~ m(#);                    # Confirm it is a full reference else fix it as a partial reference

    my $topicSource    = &$locateUniqueTopicSourceForTargetFile($sourceFile);   # Unique source file corresponding to the target file  else undef
    return undef unless $topicSource;                                           # The references can not be resolved without a unique source file.
    my ($rf, $rt, $ri) = parseDitaRef($ref, $topicSource);                      # Parse the dita ref

    if (my $new        = $xref->originalSourceFileAndIdToNewFile->{$rf}{$ri})   # The new files cut out of the original topic source file
     {my $targetFile   = relFromAbsAgainstAbs($new, $sourceFile);               # Create relative link from current file
      if (my $topicId  = $xref->topicIds->{$new})                               # Topic id for target file
       {my $href       = qq($targetFile#$topicId/$ri);                          # New href
        $node->set($attr=>$href);# if $xref->fixBadRefs;                        # Reset href - we know fixDitaRefs is true.
        &$good($new, q(Unique target for file ref));                            # Record the fix made
        return 1;                                                               # Record the fix made
       }
     }

    if ($xref->allowUniquePartialMatches && $attr !~ m(\Aconref)s)              # Partial matching - i.e ignoring the stuff to the right of the # in the reference sometimes produces a unique result.
     {return &$fixOnePartialDitaRef($ref =~ s(#.*\Z) ()rs);                     # Try to resolve reference as a partial re
     }

    undef                                                                       # Failed
   };

  my $fixRelRef = sub                                                           # Attempt to fix a reference broken by relocation
   {my ($R, $rest) = split m(#)s, $ref, 2;                                      # Get referenced file name
    if ($R)
     {my $r = fne($R);                                                          # Href file base name
      if (my $F = $xref->baseFiles->{$r})                                       # Relocated else where
       {my @targets = sort keys(%$F);                                           # Relocation targets
        if (@targets == 1)                                                      # Just one such relocation
         {my $f = relFromAbsAgainstAbs($targets[0], $sourceFile);               # Link to it
          if ($f ne $R)
           {my $newLink;                                                        # Fix if the target is else where
            if ($rest)                                                          # Link has more than one component
             {$node->set($attr=>($newLink = $f.q(#).$rest));                    # Reset link
             }
            else                                                                # Link has just one component
             {$node->set($attr=>($newLink = $f));                               # Reset link
             }
            my $saveRef = $ref; $ref = $newLink;                                # Try fixing the relocated reference as a dita reference.
            my $r = &$fixOneFullDitaRef;
            $ref = $saveRef;
            return $r;
           }
         }
       }
     }
    undef                                                                       # Failed
   };

  my $fixOneRef = sub                                                           # Fix one unresolved reference either by ameliorating it or by moving it to the xtrf attribute thereby putting it in M3.
   {return unless $xref->fixRefs->{$sourceFile}{$ref};                          # Fix not requested for this reference

    if ($xref->deguidize and $ref =~ m(GUID-)is)                                # On a guid and deguidization allowed so given g1#g2/id convert g1 to a file name by locating the topic with topicId g2.
     {my @refs = split /\s+/, $ref;                                             # There might be multiple references in the href
      my @unresolved;                                                           # Unresolved targets
      my @resolved;                                                             # Resolved targets

      for my $subRef(@refs)                                                     # Each reference in the reference
       {my ($guid, $rest) = split /#/, $subRef;
        if (my $target = $xref->guidToFile->{$guid})                            # Target file associated with guid
         {my $link = relFromAbsAgainstAbs($target, $sourceFile);                # Relative link
          $link .= q(#).$rest if $rest;                                         # Remainder of reference which does not change as it is not file related
          if (!@resolved)                                                       # First resolution
           {$node->set($attr=>$link);                                           # New href or conref
            &$good($target, q(Deguidized reference));                           # Report fix made
           }
          push @resolved, $subRef;
         }
        else
         {push @unresolved, $subRef;
         }
       }

      if (@unresolved and $xref->fixBadRefs)                                    # Unresolved - transfer all references to xtrf so some-one else can try
       {$node->renameAttr($attr, q(xtrf));                                      # No target file for guid
        &$bad(q(No file for guid));                                             # Report failure
       }
     }
    elsif ($xref->fixRelocatedRefs and &$fixRelRef)                             # Try to fix as a relocated ref if possible
     {
     }
    elsif ($xref->fixXrefsByTitle  and &$fixXrefByTitle)                        # Try to fix a missing xref by title
     {
     }
    elsif ($xref->fixBadRefs)                                                   # Move href to xtrf as no other fix seems possible given that we have already tried to fix it as a guid and it was reportedly not working as a standard dita reference.
     {$node->renameAttr($attr, q(xtrf));                                        # No target file for guid

      if ($xref->changeBadXrefToPh)                                             # Change bad xref to ph if requested
       {if ($node->at_xref)
         {$node->change_ph;
         }
       }
      &$bad(q(No such target));                                                 # Report failure
     }
    else                                                                        # ffff - Fix not requested so href left alone
     {&$bad(q(Not fixable));                                                    # Unable to fix the reference using any known method
     }
   };

  my $x = Data::Edit::Xml::new($sourceFile);                                    # Parse xml - should parse OK else otherwise how did we find out that this file needed to be fixed
  my $s = -p $x;                                                                # Source before any changes

  $x->by(sub                                                                    # Check any references encountered on each node, Ameliorate some specific cases. If the reference is still invalid report the discrepancy.
   {my ($o) = @_;                                                               # Current node
    $node   = $o;                                                               # Make current node available globally
    my $t   = $node->tag;                                                       # Tag
    if ($t  =~  m(\A(appendix|chapter|image|link|mapref|topicref|xref)\Z)is)    # Hrefs that need to be fixed
     {if ($ref = $node->attr($attr = q(href)))                                  # The attribute and reference to ameliorate or fix
       #if ($t =~  m(\A(appendix|chapter|topicref)\Z)is)                        # Fix bookmap hrefs
       {if ($t =~  m(\A(appendix|chapter|mapref|topicref)\Z)is)                 # Fix bookmap hrefs
         {&$fixBookMapDitaRef or &$fixOneRef;                                   # Fix references to topics cut into multiple pieces and now represented by a bookmap
         }
        elsif ($t =~ m(\Aimage\Z)is)                                            # Check image references
         {&$checkImageRef or &$fixOneRef;                                       # No additional fixes available yet for images, as so far, the resolution of images is done in thee calling frame work.  Hence we only need to check whether the reference...
         }
        else                                                                    # Fix hrefs without the benefit of the targets/ folder
         {&$fixOneFullDitaRef or &$fixOneRef;                                   # Fix references not in a bookmap
         }
       }
      elsif ($t =~ m(\Axref\Z)s and $xref->fixXrefsByTitle and &$fixXrefByTitle)# Try to fix a missing xref by title
       {
       }
     }
    if ($ref = $node->attr($attr = q(conref)))                                  # Fix a conref
     {&$fixOneFullDitaRef or &$fixOneRef;
     }
    if ($ref = $node->attr($attr = q(conrefend)))                               # Fix a conrefend
     {&$fixOneFullDitaRef or &$fixOneRef;
     }
   });

  if (my $S = -p $x)                                                            # Source after any changes
   {if ($S ne $s)                                                               # Write any changes - seems to be slightly faster than not checking
     {if (onAwsSecondary)                                                       # Write output to temporary folder regardless so it can be copied enmasse back to the session leader
       {my $f = swapFolderPrefix($sourceFile,                                   # Output file name
         $xref->inputFolder, $xref->fixedFolderTemp);
        editXml($sourceFile, $f, $S);                                           # Write the fixed file to the fixedFolder retaining headers and trailers
       }
      elsif (my $fixedFolder = $xref->fixedFolder)                              # New output file in fixedFolder

lib/Data/Edit/Xml/Xref.pm  view on Meta::CPAN

         }
       }
     }

    $xref->inputFileToTargetTopics    = \%sourceToTarget;                       # The targets for each input file
    $xref->targetTopicToInputFiles    = \%targetToSource;                       # The source file from which each target was obtained
    $xref->sourceTopicToTargetBookMap = \%sourceTopicToTargetBookMap;           # The bookmap representing a cut up topic
    $xref->topicFlattening            = \%targetToSourceDuplicated;             # Topics that arose from flattening several source files
    $xref->originalSourceFileAndIdToNewFile = \%si;                             # Record mapping from original source file and id to the new file containing the id

    formatTables($xref, \@r,
      columns => <<END,
Type    The type of reference
DocType Document type of the source file
Source  Source file
Target  Cut out file
END
      summarize=>1,
      title=>qq(The target topics cut out of the source documents),
      head=><<END,
Xref noted NNNN cut out topics on DDDD
END
      file=>(fpe(qw(lists source_to_targets txt))));

    if (1)                                                                      # Report topic flattening
     {my @r;
      my $s = 0; my $t = 0;
      for my $target(sort keys %targetToSourceDuplicated)                       # Each of the target topics that were derived from this source file
       {my @s = @{$targetToSourceDuplicated{$target}};                          # Each source input file
        push @r, [scalar(@s), $target];
        push @r, [q(), q(  ).$_] for @s;
        push @r, [q()];
        ++$t; $s += @s;
       }
      $xref->topicsFlattened = $s;                                              # Record the number of topics flattened
      my $F = $xref->topicFlatteningFactor = $t ? $s / $t : 0;                  # Topic flattening factor - higher is better
      my $f = sprintf("%7.4f", $F);
      my $n = @{$xref->inputFiles};                                             # Number of topics
      my $p = sprintf("%7.4f", $n ? 100*$t/$n : 0);                             # Percentage topics flattened versus total number of topics

      formatTables($xref, \@r,
        columns => <<END,
Count   Number of sources that created this target
Target  The target file flattened out from multiple source files
END
        summarize => 1,
        title     => qq(Topic files flattened from multiple sources),
        head      => <<END,
Xref noted that $s source topics were reduced to $t target topics on DDDD

This represents a flattening factor of:  $f  (higher is better) in the topics that got flattened

Total number of topics    : $n
Number of topics flattened: $t
Percent topics   flattened: $p
END
        file      => fpe(qw(lists topic_flattening txt)));
     }
   }

  if ($xref->fixRelocatedRefs)                                                  # Load base file name to full name but if needed to do relocation fixes
   {my %baseFiles;                                                              # Map base files back to full files. The base file is the file name shorn of the path - the reason the GB Standard is so important
    for my $file(searchDirectoryTreesForMatchingFiles($xref->inputFolder))      # All input files
     {my $base = fne $file;                                                     # Base file name - the GB Standard name for the file
      $baseFiles{$base}{$file}++;                                               # Current location of the file
     }
    $xref->baseFiles = \%baseFiles;
   }

  my @bad;                                                                      # Hrefs that could not be fixed and so were ameliorated by moving them to @xtrf
  my @good;                                                                     # Hrefs that were fixed by resolving a Guid
  if (my @files = sort keys %{$xref->fixRefs})                                  # Fix files if requested
   {awsParallelProcessFiles $xref,                                              # Fix files in parallel
      \&fixReferencesParallel,                                                  # Fix one file
      \&fixReferencesResults,                                                   # Consolidate results
      [@files];

    @good = $xref->fixedRefsGood->@*;                                           # Results from fixReferencesResults
    @bad  = $xref->fixedRefsBad ->@*;
   }

  @good = sort {join(' ', @$a) cmp join(' ', @$b)} @good;
  @bad  = sort {join(' ', @$a) cmp join(' ', @$b)} @bad;

  my $fbr   = $xref->fixBadRefs;                                                # Are we fixing bad refs?
  my $facet = q(Dita references);

  formatTables($xref, $xref->fixedRefsBad = \@bad,                              # Report references we cannot fix
    columns   => <<END,
Reason         The reason the reference was not fixed
Tag            The tag of the node in which the reference failure occurs
Attr           The attribute of the node in which the reference failure occurs
Reference      The reference not being fixed
File           The file in which the reference appears
Source_Files   One or more source files that from which this file was derived
END
    summarize => 1,
    title     => q(Invalid references),
    facet     => $facet,  aspectColor => q(red),
    head      => $fbr ? <<END : <<END2,
Xref moved NNNN invalid references to M3 on DDDD as fixBadRefs=>$fbr was specified
END
Xref was unable to resolve NNNN failing references on DDDD, fixBadRefs=> was not specified
END2
    zero      => 1,
    file      => fpe(qw(bad failing_references txt)));

  formatTables($xref, $xref->fixedRefsGood = \@good,                            # Report hrefs which were failing but were successfully resolved by ingenuity.
    columns   => <<END,
Method         The way that the reference was fixed
Tag            The tag of the node on which the reference was fixed
Attr           The attribute being fixed - normally href
Ref            The reference that is being resolved
Target_File    The file the reference resolves to.
File           The file in which the reference appears
Source_Files   The source files that gave rise to the file containing the reference after file flattening
END
    summarize => 1,
    title     => qq(These failing references were successfully resolved),
    facet     => $facet, aspectColor => q(green),
    head      => <<END,

lib/Data/Edit/Xml/Xref.pm  view on Meta::CPAN


  addNavTitle => 1

Reports of successful updates will be written to:

  reports/good/navTitles.txt

Reports of unsuccessful updates will be written to:

  reports/bad/navTitles.txt

=head2 Fix bad references

It is often desirable to ameliorate unresolved Dita href attributes so that
incomplete content can be loaded into a content management system.  The:

  fixBadRefs => 1

attribute requests that the:

 conref and href

attributes be renamed to:

 xtrf

if the B<conref> or B<href> attribute specification cannot be resolved in the
current corpus by other methods of fixing failing references such as:
L<fixDitaRefs>, L<fixRelocatedRefs> or L<fixXrefsByTitle>.

This feature designed by L<mailto:mim@cpan.org>.

=head2 Deguidize

Some content management systems use guids, some content management systems use
file names as their means of identifying content. When moving from a guid to a
file name content management system it might be necessary to replace the guids
representing file names with the actual underlying file names.  If the

  deguidize => 1

parameter is set to true, Xref will replace any such file guids with the
underlying file name if it is present in the content being cross referenced.

=head2 File flattening

It is often desirable to flatten or reflatten the topic files in a corpus so
that they can coexist in a single folder of a content management system without
colliding with each other.

The presence of the input attribute:

 flattenFolder => folder-to-flatten-files-into

causes topic files to be flattened into the named folder using the
L<GBStandard> to generate the flattened file names.  Xref will then update all
L<Dita> references to match these new file names.  If the L<flattenFolder>
folder is the same as the L<inputFolder> then the input files are flattened in
place.

=head2 Locating relocated files

File references in B<conref> or B<hrefs> that have a unique valid base file
name and an invalid path can be fixed by setting the input attribute:

 fixRelocatedRefs => 1

to a true value to request that Xref should replace the incorrect paths to the
unique bases file names with the correct path.

If coded in conjunction with the B<fixBadRefs> input attribute this will cause
Xref to first try and fix any missing xrefs, any that still fail to resolve
will then be ameliorated by moving them to the B<xtrf> attribute.

=head2 Fix Xrefs by Title

L<Dita> B<xref> tags with broken or missing B<href> attributes can sometimes be
fixed by matching the text content of the B<xref> with the titles of topics.

If:

  fixXrefsByTitle => 1

is specified, L<Xref> will locate possible targets for a broken B<href> by
matching the white space normalized L<Data::Table::Text::nws> of the text
content of the B<xref> with the similarly normalized title of each topic that
is referenced by any book map that refers to the topic containing the B<xref>.

If a single matching candidate is located then it will be used to update the
B<href> attribute of the B<xref>.

=head2 Fix References in Dita To Dita Conversions

When converting a L<Dita> input source corpus to L<Dita> the referenced topics
are usually renamed and flattened via the L<GBStandard>. If enabled:

  fixDitaRefs => targets/

updates valid L<Dita> references in the input corpus with the latest name for
the referenced topic to make links that were valid in the input corpus valid in
the output corpus as well.

The B<targets/> folder should contain the same set of file names as the
original input corpus, each such file should contain the name of a B<bookmap>
topic present in the B<inputFolder=> whose B<chapter> and B<topicref>s identify
the new names of the files cut out and flattened from the existing input
corpus.

The creation of the B<target/> folder is usually done by some other piece of
software such as L<Data::Edit::Xml::To::Dita> as it is too complex and
laborious to be performed reliably by hand.  No validation of the contents of
this folder is performed as it is assumed that it has been created reliably in
software.

=head2 Topic Matching

Topics can be matched on title and vocabulary to assist authors in finding
similar topics by specifying the:

  matchTopics => 0.9

lib/Data/Edit/Xml/Xref.pm  view on Meta::CPAN

B<idsRemoved> - {id}++ : Ids removed from all files

B<images> - {file}{href}   Count of image references in each file.

B<imagesReferencedFromBookMaps> - {bookmap full file name}{full name of image referenced from topic referenced from bookmap}++

B<imagesReferencedFromTopics> - {topic full file name}{full name of image referenced from topic}++

B<imagesToRefferingBookMaps> - {image full file name}{bookmap full file name}++ : images to referring bookmaps

B<indexedWords> - {word}{full file name of topic the words occurs in}.

B<inputFileToTargetTopics> - {input file}{target file}++ : Tells us the topics an input file was split into

B<inputFiles> - Input files from L<inputFolder|/inputFolder>.

B<inputFolderImages> - {full image file name} for all files in input folder thus including any images resent

B<ltgt> - {text between &lt; and &gt}{filename} = count giving the count of text items found between &lt; and &gt;

B<maxZoomOut> - Results from L<maxZoomIn|/maxZoomIn>  where {file name}{regular expression key name in L<maxZoomIn|/maxZoomIn>}++

B<md5Sum> - MD5 sum for each input file.

B<md5SumDuplicates> - {md5sum}{file}++ : md5 sums with more than one file

B<missingImageFiles> - [file, href] == Missing images in each file.

B<missingTopicIds> - Missing topic ids.

B<noHref> - Tags that should have an href but do not have one.

B<notReferenced> - {file name} Files in input area that are not referenced by a conref, image, bookmapref or xref tag and are not a bookmap.

B<olBody> - The number of ol under body by file

B<originalSourceFileAndIdToNewFile> - {original file}{id} = new file: Record mapping from original source file and id to the new file containing the id

B<otherMeta> - {original file}{othermeta name}{othermeta content}++ : the contents of the other meta tags

B<otherMetaBookMapsAfterTopicIncludes> - Bookmap othermeta after  topic othermeta has been included

B<otherMetaBookMapsBeforeTopicIncludes> - Bookmap othermeta before topic othermeta has been included

B<otherMetaConsolidated> - {Name}{Content}++ : consolidated other meta data across entire corpus

B<otherMetaDuplicatesCombined> - Duplicate othermeta in bookmaps with called topics othermeta included

B<otherMetaDuplicatesSeparately> - Duplicate othermeta in bookmaps and topics considered separately

B<otherMetaPushToBookMap> - Othermeta that can be pushed to the calling book map

B<otherMetaRemainWithTopic> - Othermeta that must stay in the topic

B<parseFailed> - {file} files that failed to parse.

B<publicId> - {file} = Public id on Doctype

B<references> - {file}{reference}++ - the various references encountered

B<relocatedReferencesFailed> - Failing references that were not fixed by relocation

B<relocatedReferencesFixed> - Relocated references fixed

B<requiredCleanUp> - {full file name}{cleanup} = number of required-cleanups

B<results> - Summary of results table.

B<sourceTopicToTargetBookMap> - {input topic cut into multiple pieces} = output bookmap representing pieces

B<statusLine> - Status line summarizing the cross reference.

B<statusTable> - Status table summarizing the cross reference.

B<tableDimensions> - {file}{columns}{rows} == count

B<tagCount> - {file}{tags} == count of the different tag names found in the xml files.

B<tags> - Number of tags encountered

B<tagsTextsRatio> - Ratio of tags to text encountered

B<targetFolderContent> - {file} = bookmap file name : the target folder content which shows us where an input file went

B<targetTopicToInputFiles> - {current file} = the source file from which the current file was obtained

B<texts> - Number of texts encountered

B<timeEnded> - Time the run ended

B<timeStart> - Time the run started

B<title> - {full file name} = title of file.

B<titleToFile> - {title}{file}++ if L<fixXrefsByTitle> is in effect

B<topicFlattening> - {topic}{sources}++ : the source files for each topic that was flattened

B<topicFlatteningFactor> - Topic flattening factor - higher is better

B<topicIds> - {file} = topic id - the id on the outermost tag.

B<topicsFlattened> - Number of topics flattened

B<topicsNotReferencedFromBookMaps> - {topic file not referenced from any bookmap} = 1

B<topicsReferencedFromBookMaps> - {bookmap full file name}{topic full file name}++ : bookmaps to topics

B<topicsToReferringBookMaps> - {topic full file name}{bookmap full file name}++ : topics to referring bookmaps

B<urls> - {topic full file name}{url}++ : urls found in each file

B<urlsBad> - {url}{topic full file name}++ : failing urls found in each file

B<urlsGood> - {url}{topic full file name}++ : passing urls found in each file

B<validationErrors> - True means that Lint detected errors in the xml contained in the file.

B<vocabulary> - The text of each topic shorn of attributes for vocabulary comparison.

B<xRefs> - {file}{href}++ Xrefs references.

B<xrefBadFormat> - External xrefs with no format=html.

lib/Data/Edit/Xml/Xref.pm  view on Meta::CPAN

END

  owf(fpe($in, qw(good2 png)), <<END);
<image/>
END
 }

sub createRequiredCleanUps($)                                                   #P Required clean ups report
 {my ($in) = @_;                                                                # Folder to create the files in
  my $d    = fpd(currentDirectory, $in);

  owf(fpe($in, qq(c1), q(dita)), <<END);
<concept id="c1">
  <title>C1_</title>
  <conbody>
    <required-cleanup>aaa</required-cleanup>
    <required-cleanup>bbb</required-cleanup>
    <required-cleanup>bbb</required-cleanup>
  </conbody>
</concept>
END

  owf(fpe($in, qq(c2), q(dita)), <<END);
<concept id="c2">
  <title>C2_</title>
  <conbody>
    <required-cleanup>aaa</required-cleanup>
    <required-cleanup>bbb</required-cleanup>
    <required-cleanup>ccc</required-cleanup>
    <required-cleanup>CCC</required-cleanup>
  </conbody>
</concept>
END
 }

sub createSoftConrefs($)                                                        #P Fix file part of conref even if the rest is invalid
 {my ($in) = @_;                                                                # Folder to create the files in
  my $d    = fpd(currentDirectory, $in);

  my $r = fpe(qw(c_12345678123456781234567812345678 dita));                     # Relocatable
  owf(fpf($in, q(folder), $r), <<END);
$conceptHeader
<concept id="c">
  <title>C1</title>
  <conbody>
    <p id="p1">aaa</p>
    <p id="p1">bbb</p>
    <p conref="#c/p1"/>    <!-- FAILS -->
    <p conref="#c/pp"/>    <!-- FAILS: No such id -->
  </conbody>
</concept>
END

  owf(fpe($in, qw(c dita)), <<END);
$conceptHeader
<concept id="c">
  <title>C2</title>
  <conbody>
    <p conref="$r#c/p1"/>
    <p conref="$r#c1/p1"/>   <!-- PASSES: wrong topic id but we ignore topic ids-->
    <p conref="$r#c/bad"/>   <!-- PASSES: no such id - SHOULD FAIL even though we are relocating -->
    <p conref="$r"/>
    <p conref="c.dta"/>      <!-- FAILS: no such file -->
    <p id="q1">aaa</p>
    <p conref="#c/q1"/>
  </conbody>
</concept>
END
 }

sub checkXrefStructure($$@)                                                     #P Check an output structure produced by Xrf
 {my ($x, $field, @folders) = @_;                                               # Cross references, field to check, folders to suppress
  my $s = nws dump($x->{$field});                                               # Structure to be tested
  for my $folder($x->inputFolder, @folders)                                     # Remove specified folder names from structure to be tested
   {$s =~ s($folder) ()gs;                                                      # Remove folder name from structure to be tested
   }
  eval $s;                                                                      # Recreate structure
 }

sub writeXrefStructure($$@)                                                     #P Write the test for an Xref structure
 {my ($x, $field, @folders) = @_;                                               # Cross referencer, field, names of the folders to suppress

  my $in = $x->inputFolder;

  my $s = nws(dump($x->{$field}) =~ s($in) ()gsr);                              # Field to be tested
     $s =~ s(\],\s+\[) (],\n    [)gs;
     $s =~ s(\},\s+\{) (},\n    {)gs;

  for my $folderName(@folders)                                                  # Remove specified folder names from structure to be tested
   {no strict qw(refs);
    my $folder = &{$folderName};                                                # Folder name
    $s =~ s($folder) ()gs;                                                      # Remove folder name from structure to be tested
   }

  my $f = join ', ', @folders;                                                  # Folders to remove
  my $t = <<END;                                                                # Format test
  is_deeply checkXrefStructure(\$x, q($field), $f), $s;
END

  say STDERR $t;                                                                # Write test
 }

sub deleteVariableFields($)                                                     #P Remove time and other fields that do not affect the end results
 {my ($x) = @_;                                                                 # Cross referencer
  delete $x->{$_} for qw(timeEnded timeStart maximumNumberOfProcesses);         # Remove time fields
  delete $x->{$_} for qw(tagsTextsRatio);                                       # Remove floating fields
  removeFilePathsFromStructure($x);
 }

sub testReferenceChecking                                                       #P Test reference checking
 {my $folder = q(/home/phil/);
  my @names  = qw(aaa bbb ccc);
  my @ids    = map {q(p).$_}                   @names;
  my @files  = map {fpe($folder, $_, q(dita))} @names;

  my $xref = newXref
   (currentFolder  => q(/aaa),
    reports        => fpd(currentDirectory, qw(test resports)),
    topicIds       => {map {$files[$_]=>$names[$_]}      0..$#names},
    ids            => {map {$files[$_]=>{$ids[$_]=>1}}   0..$#names},
   );

lib/Data/Edit/Xml/Xref.pm  view on Meta::CPAN

    missingTopicIds                      => {},
    noHref                               => {},
    notReferenced                        => {},
    olBody                               => {},
    originalSourceFileAndIdToNewFile     => {
                                              "a.xml" => {
                                                           "GUID-400c2c59-95e1-7bf3-4647-3a135281bfaf" => "c_aaaa_cccc_a91633094220d068c453eecae1726eff.dita",
                                                           "GUID-68822563-d568-f418-38ae-f1c62cb4ac8d" => "c_aaaa_dddd_914b8e11993908497768c50d992ea0f0.dita",
                                                           "GUID-c67821ef-3da2-c89f-0fc9-9fba3937f368" => "c_aaaa_121939eab89cd7d2c3eb4c4189772a1f.dita",
                                                           "GUID-f0c0e170-8128-10ef-045d-97602fdde76f" => "c_aaaa_bbbb_55baefe9258538b26a95b0015a8d5a2b.dita",
                                                         },
                                              "b.xml" => {
                                                           "GUID-2b6aab4f-9328-e326-f55f-160771a8c3dd" => "c_bbbb_cccc_d1c80714275637cde524bdfa1304a8f3.dita",
                                                           "GUID-86a684b0-1a0b-4c30-6da9-24c74ff1f0cc" => "c_bbbb_aaaa_cfd3a140e06a914fc8469583ad87829d.dita",
                                                           "GUID-96a20d7f-bbaf-deef-55ef-e09a0a059251" => "c_bbbb_6100b51ca1f789836cd4f31893ed67d2.dita",
                                                           "GUID-cfe7cb3d-05e7-a147-db10-dcbacaeecef7" => "c_bbbb_bbbb_c90ebf976073b2a3f7a8dc27a3c8254b.dita",
                                                           "p1" => "c_bbbb_6100b51ca1f789836cd4f31893ed67d2.dita",
                                                           "p2" => "c_bbbb_bbbb_c90ebf976073b2a3f7a8dc27a3c8254b.dita",
                                                           "p3" => "c_bbbb_cccc_d1c80714275637cde524bdfa1304a8f3.dita",
                                                         },
                                            },
    otherMeta                            => {},
    otherMetaBookMapsAfterTopicIncludes  => [],
    otherMetaBookMapsBeforeTopicIncludes => [],
    otherMetaConsolidated                => {},
    otherMetaDuplicatesCombined          => [],
    otherMetaDuplicatesSeparately        => [],
    otherMetaPushToBookMap               => [],
    otherMetaRemainWithTopic             => [],
    oxygenProjects                       => undef,
    parseFailed                          => {},
    publicId                             => {
                                              "bm_a_9d0a9f8e0ac234de9e22c19054b6e455.ditamap"     => "EN",
                                              "bm_b_d2806ba589f908da1106574afd9db642.ditamap"     => "EN",
                                              "c_aaaa_121939eab89cd7d2c3eb4c4189772a1f.dita"      => "EN",
                                              "c_aaaa_bbbb_55baefe9258538b26a95b0015a8d5a2b.dita" => "EN",
                                              "c_aaaa_cccc_a91633094220d068c453eecae1726eff.dita" => "EN",
                                              "c_aaaa_dddd_914b8e11993908497768c50d992ea0f0.dita" => "EN",
                                              "c_bbbb_6100b51ca1f789836cd4f31893ed67d2.dita"      => "EN",
                                              "c_bbbb_aaaa_cfd3a140e06a914fc8469583ad87829d.dita" => "EN",
                                              "c_bbbb_bbbb_c90ebf976073b2a3f7a8dc27a3c8254b.dita" => "EN",
                                              "c_bbbb_cccc_d1c80714275637cde524bdfa1304a8f3.dita" => "EN",
                                            },
    references                           => {
                                              "bm_a_9d0a9f8e0ac234de9e22c19054b6e455.ditamap"     => {
                                                                                                       "c_aaaa_121939eab89cd7d2c3eb4c4189772a1f.dita"      => 1,
                                                                                                       "c_aaaa_bbbb_55baefe9258538b26a95b0015a8d5a2b.dita" => 1,
                                                                                                       "c_aaaa_cccc_a91633094220d068c453eecae1726eff.dita" => 1,
                                                                                                       "c_aaaa_dddd_914b8e11993908497768c50d992ea0f0.dita" => 1,
                                                                                                     },
                                              "bm_b_d2806ba589f908da1106574afd9db642.ditamap"     => {
                                                                                                       "c_bbbb_6100b51ca1f789836cd4f31893ed67d2.dita"      => 1,
                                                                                                       "c_bbbb_aaaa_cfd3a140e06a914fc8469583ad87829d.dita" => 1,
                                                                                                       "c_bbbb_bbbb_c90ebf976073b2a3f7a8dc27a3c8254b.dita" => 1,
                                                                                                       "c_bbbb_cccc_d1c80714275637cde524bdfa1304a8f3.dita" => 1,
                                                                                                     },
                                              "c_aaaa_bbbb_55baefe9258538b26a95b0015a8d5a2b.dita" => { p1 => 1 },
                                              "c_aaaa_cccc_a91633094220d068c453eecae1726eff.dita" => { p2 => 1 },
                                              "c_aaaa_dddd_914b8e11993908497768c50d992ea0f0.dita" => { p3 => 1 },
                                            },
    relocatedReferencesFailed            => [],
    relocatedReferencesFixed             => [],
    reports                              => '',
    requestAttributeNameAndValueCounts   => undef,
    requiredCleanUp                      => {},
    results                              => [[1, "ref"]],
    sourceTopicToTargetBookMap           => {
                                              "a.xml" => bless({
                                                           source => "a.xml",
                                                           sourceDocType => "concept",
                                                           target => "bm_a_9d0a9f8e0ac234de9e22c19054b6e455.ditamap",
                                                           targetType => "bookmap",
                                                         }, "Bookmap"),
                                              "b.xml" => bless({
                                                           source => "b.xml",
                                                           sourceDocType => "concept",
                                                           target => "bm_b_d2806ba589f908da1106574afd9db642.ditamap",
                                                           targetType => "bookmap",
                                                         }, "Bookmap"),
                                            },
    statusLine                           => "Xref: 1 ref",
    statusTable                          => "   Count  Condition\n1      1  ref\n",
    subjectSchemeMap                     => undef,
    suppressReferenceChecks              => undef,
    tableDimensions                      => {},
    tagCount                             => {
                                              "bm_a_9d0a9f8e0ac234de9e22c19054b6e455.ditamap"     => {
                                                                                                       appendices        => 1,
                                                                                                       approved          => 1,
                                                                                                       author            => 1,
                                                                                                       bookchangehistory => 1,
                                                                                                       booklists         => 1,
                                                                                                       bookmap           => 1,
                                                                                                       bookmeta          => 1,
                                                                                                       bookowner         => 1,
                                                                                                       bookrights        => 1,
                                                                                                       booktitle         => 1,
                                                                                                       brand             => 1,
                                                                                                       category          => 1,
                                                                                                       CDATA             => 1,
                                                                                                       chapter           => 1,
                                                                                                       copyrfirst        => 1,
                                                                                                       frontmatter       => 1,
                                                                                                       keyword           => 1,
                                                                                                       keywords          => 1,
                                                                                                       mainbooktitle     => 1,
                                                                                                       notices           => 1,
                                                                                                       preface           => 1,
                                                                                                       prodinfo          => 1,
                                                                                                       prodname          => 1,
                                                                                                       prognum           => 1,
                                                                                                       relcell           => 4,
                                                                                                       relcolspec        => 2,
                                                                                                       relheader         => 1,
                                                                                                       relrow            => 2,
                                                                                                       reltable          => 1,
                                                                                                       revisionid        => 1,
                                                                                                       shortdesc         => 1,
                                                                                                       source            => 1,
                                                                                                       toc               => 1,
                                                                                                       topicref          => 3,
                                                                                                       vrm               => 1,

lib/Data/Edit/Xml/Xref.pm  view on Meta::CPAN

  fixedRefsGood                        => [],
  fixedRefsNoAction                    => [],
  fixRefs                              => {},
  fixRelocatedRefs                     => undef,
  fixXrefsByTitle                      => undef,
  flattenFiles                         => {},
  flattenFolder                        => undef,
  getFileUrl                           => "client.pl?getFile=",
  goodImageFiles                       => {},
  goodNavTitles                        => {},
  guidHrefs                            => {},
  guidToFile                           => {},
  hrefUrlEncoding                      => {},
  html                                 => undef,
  idNotReferenced                      => { "c1.dita" => { c1 => 1 }, "c2.dita" => { c2 => 1 } },
  idReferencedCount                    => {},
  ids                                  => { "c1.dita" => { c1 => 1 }, "c2.dita" => { c2 => 1 } },
  idsRemoved                           => { c1 => 1, c2 => 1 },
  idTags                               => {
                                            "c1.dita" => { c1 => ["concept"] },
                                            "c2.dita" => { c2 => ["concept"] },
                                          },
  images                               => {},
  imagesReferencedFromBookMaps         => {},
  imagesReferencedFromTopics           => {},
  imagesToRefferingBookMaps            => {},
  indexedWords                         => {},
  indexWords                           => undef,
  indexWordsFolder                     => undef,
  inputFiles                           => ["c1.dita", "c2.dita"],
  inputFileToTargetTopics              => {},
  inputFolder                          => "",
  inputFolderImages                    => { c1 => "c1.dita", c2 => "c2.dita" },
  ltgt                                 => {},
  matchTopics                          => undef,
  maxZoomIn                            => undef,
  maxZoomOut                           => { "c1.dita" => {}, "c2.dita" => {} },
  md5Sum                               => {
                                            "c1.dita" => "92ab49a6d97f749545ec5dc873f53bdb",
                                            "c2.dita" => "a3df8bdda952294d6a533b7ff4f6faeb",
                                          },
  md5SumDuplicates                     => {},
  missingImageFiles                    => {},
  missingTopicIds                      => {},
  noHref                               => {},
  notReferenced                        => {},
  olBody                               => {},
  originalSourceFileAndIdToNewFile     => {},
  otherMeta                            => {},
  otherMetaBookMapsAfterTopicIncludes  => [],
  otherMetaBookMapsBeforeTopicIncludes => [],
  otherMetaConsolidated                => {},
  otherMetaDuplicatesCombined          => [],
  otherMetaDuplicatesSeparately        => [],
  otherMetaPushToBookMap               => [],
  otherMetaRemainWithTopic             => [],
  oxygenProjects                       => undef,
  parseFailed                          => {},
  publicId                             => { "c1.dita" => undef, "c2.dita" => undef },
  references                           => {},
  relocatedReferencesFailed            => [],
  relocatedReferencesFixed             => [],
  reports                              => '',
  requestAttributeNameAndValueCounts   => undef,
  requiredCleanUp                      => {
                                            "c1.dita" => { aaa => 1, bbb => 2 },
                                            "c2.dita" => { aaa => 1, bbb => 1, ccc => 1, CCC => 1 },
                                          },
  results                              => [[2, "first lines"], [2, "second lines"]],
  sourceTopicToTargetBookMap           => {},
  statusLine                           => "Xref: 2 first lines, 2 second lines",
  statusTable                          => "   Count  Condition\n1      2  first lines\n2      2  second lines\n",
  subjectSchemeMap                     => undef,
  suppressReferenceChecks              => undef,
  tableDimensions                      => {},
  tagCount                             => {
                                            "c1.dita" => {
                                                           "CDATA" => 4,
                                                           "conbody" => 1,
                                                           "concept" => 1,
                                                           "required-cleanup" => 3,
                                                           "title" => 1,
                                                         },
                                            "c2.dita" => {
                                                           "CDATA" => 5,
                                                           "conbody" => 1,
                                                           "concept" => 1,
                                                           "required-cleanup" => 4,
                                                           "title" => 1,
                                                         },
                                          },
  tags                                 => { "c1.dita" => 6, "c2.dita" => 7 },
  targetFolderContent                  => {},
  targetTopicToInputFiles              => {},
  texts                                => { "c1.dita" => 4, "c2.dita" => 5 },
  title                                => { "c1.dita" => "C1_", "c2.dita" => "C2_" },
  titleToFile                          => { C1_ => { "c1.dita" => 1 }, C2_ => { "c2.dita" => 1 } },
  topicFlattening                      => {},
  topicFlatteningFactor                => {},
  topicIds                             => { "c1.dita" => "c1", "c2.dita" => "c2" },
  topicsFlattened                      => undef,
  topicsNotReferencedFromBookMaps      => { "c1.dita" => 1, "c2.dita" => 1 },
  topicsReferencedFromBookMaps         => {},
  topicsToReferringBookMaps            => {},
  urls                                 => {},
  urlsBad                              => {},
  urlsGood                             => {},
  validateUrls                         => undef,
  validationErrors                     => {},
  vocabulary                           => {},
  xrefBadFormat                        => {},
  xrefBadScope                         => {},
  xRefs                                => {},
  xrefsFixedByTitle                    => [],
};

  #say STDERR writeStructureTest($x->requiredCleanUp, q($x->requiredCleanUp));
  is_deeply removeFilePathsFromStructure($x->requiredCleanUp),
   { "c1.dita" => { aaa => 1, bbb => 2 },
     "c2.dita" => { aaa => 1, bbb => 1, ccc => 1, CCC => 1 },
   };
 }

lib/Data/Edit/Xml/Xref.pm  view on Meta::CPAN

                                          },
  ids                                  => {
                                            "c.dita" => { c => 1, q1 => 1 },
                                            "c_12345678123456781234567812345678.dita" => { c => 1, p1 => 2 },
                                          },
  idsRemoved                           => { c => 2 },
  idTags                               => {
                                            "c.dita" => { c => ["concept"], q1 => ["p"] },
                                            "c_12345678123456781234567812345678.dita" => { c => ["concept"], p1 => ["p", "p"] },
                                          },
  images                               => {},
  imagesReferencedFromBookMaps         => {},
  imagesReferencedFromTopics           => {},
  imagesToRefferingBookMaps            => {},
  indexedWords                         => {},
  indexWords                           => undef,
  indexWordsFolder                     => undef,
  inputFiles                           => ["c.dita", "c_12345678123456781234567812345678.dita"],
  inputFileToTargetTopics              => {},
  inputFolder                          => "",
  inputFolderImages                    => {
                                            c => "c.dita",
                                            c_12345678123456781234567812345678 => "c_12345678123456781234567812345678.dita",
                                          },
  ltgt                                 => {},
  matchTopics                          => undef,
  maxZoomIn                            => undef,
  maxZoomOut                           => { "c.dita" => {}, "c_12345678123456781234567812345678.dita" => {} },
  md5Sum                               => {
                                            "c.dita" => "c7c95918b94057943d448ca99e5424cc",
                                            "c_12345678123456781234567812345678.dita" => "d3d1c1ce281895768bd92f27fd492191",
                                          },
  md5SumDuplicates                     => {},
  missingImageFiles                    => {},
  missingTopicIds                      => {},
  noHref                               => {},
  notReferenced                        => {},
  olBody                               => {},
  originalSourceFileAndIdToNewFile     => {},
  otherMeta                            => {},
  otherMetaBookMapsAfterTopicIncludes  => [],
  otherMetaBookMapsBeforeTopicIncludes => [],
  otherMetaConsolidated                => {},
  otherMetaDuplicatesCombined          => [],
  otherMetaDuplicatesSeparately        => [],
  otherMetaPushToBookMap               => [],
  otherMetaRemainWithTopic             => [],
  oxygenProjects                       => undef,
  parseFailed                          => {},
  publicId                             => { "c.dita" => "EN", "c_12345678123456781234567812345678.dita" => "EN" },
  references                           => {
                                            "c.dita" => {
                                              "bad" => 1,
                                              "c.dta" => 1,
                                              "c_12345678123456781234567812345678.dita" => 1,
                                              "p1" => 1,
                                              "q1" => 1,
                                            },
                                            "c_12345678123456781234567812345678.dita" => { p1 => 1, pp => 1 },
                                          },
  relocatedReferencesFailed            => [],
  relocatedReferencesFixed             => [],
  reports                              => '',
  requestAttributeNameAndValueCounts   => undef,
  requiredCleanUp                      => {},
  results                              => [[1, "duplicate id"], [6, "refs"], [1, "duplicate topic id"]],
  sourceTopicToTargetBookMap           => {},
  statusLine                           => "Xref: 6 refs, 1 duplicate id, 1 duplicate topic id",
  statusTable                          => "   Count  Condition\n1      6  refs\n2      1  duplicate id\n3      1  duplicate topic id\n",
  subjectSchemeMap                     => undef,
  suppressReferenceChecks              => undef,
  tableDimensions                      => {},
  tagCount                             => {
                                            "c.dita" => { CDATA => 2, conbody => 1, concept => 1, p => 7, title => 1 },
                                            "c_12345678123456781234567812345678.dita" => { CDATA => 3, conbody => 1, concept => 1, p => 4, title => 1 },
                                          },
  tags                                 => { "c.dita" => 10, "c_12345678123456781234567812345678.dita" => 7 },
  targetFolderContent                  => {},
  targetTopicToInputFiles              => {},
  texts                                => { "c.dita" => 2, "c_12345678123456781234567812345678.dita" => 3 },
  title                                => { "c.dita" => "C2", "c_12345678123456781234567812345678.dita" => "C1" },
  titleToFile                          => {
                                            C1 => { "c_12345678123456781234567812345678.dita" => 1 },
                                            C2 => { "c.dita" => 1 },
                                          },
  topicFlattening                      => {},
  topicFlatteningFactor                => {},
  topicIds                             => { "c.dita" => "c", "c_12345678123456781234567812345678.dita" => "c" },
  topicsFlattened                      => undef,
  topicsNotReferencedFromBookMaps      => { "c.dita" => 1, "c_12345678123456781234567812345678.dita" => 1 },
  topicsReferencedFromBookMaps         => {},
  topicsToReferringBookMaps            => {},
  urls                                 => {},
  urlsBad                              => {},
  urlsGood                             => {},
  validateUrls                         => undef,
  validationErrors                     => {},
  vocabulary                           => {},
  xrefBadFormat                        => {},
  xrefBadScope                         => {},
  xRefs                                => {},
  xrefsFixedByTitle                    => [],
};
 }

#latestTest:;
if (1) {                                                                        # Oxygen project files
lll "Test 031";
  clearFolder(tests, 111);
  createSampleInputFilesBaseCase(&in, 8);

  my $x = xref(inputFolder => in, reports => reportFolder, oxygenProjects=>1);
  ok $x->statusLine eq q(Xref: 104 refs, 21 image refs, 14 first lines, 14 second lines, 8 duplicate ids, 4 duplicate topic ids, 4 invalid guid hrefs, 2 duplicate files, 2 tables, 1 External xrefs with no format=html, 1 External xrefs with no scope=e...
 }

#latestTest:;
if (0) {                                                                        # Performance tests 1.419
lll "Test 032";
  my $folder = q(/home/phil/perl/cpan/DataEditXmlXref/lib/Data/Edit/Xml/samples/);
  xref(inputFolder => $folder);
 }

lib/Data/Edit/Xml/Xref.pm  view on Meta::CPAN

  fixBadRefs                           => undef,
  fixDitaRefs                          => undef,
  fixedFolder                          => undef,
  fixedFolderTemp                      => "",
  fixedRefsBad                         => [],
  fixedRefsGB                          => [],
  fixedRefsGood                        => [],
  fixedRefsNoAction                    => [],
  fixRefs                              => {},
  fixRelocatedRefs                     => undef,
  fixXrefsByTitle                      => undef,
  flattenFiles                         => {},
  flattenFolder                        => undef,
  getFileUrl                           => "client.pl?getFile=",
  goodImageFiles                       => {},
  goodNavTitles                        => {},
  guidHrefs                            => {},
  guidToFile                           => {},
  hrefUrlEncoding                      => {},
  html                                 => undef,
  idNotReferenced                      => { "concept.dita" => { c => 1 } },
  idReferencedCount                    => {},
  ids                                  => { "concept.dita" => { c => 1 } },
  idsRemoved                           => { c => 1 },
  idTags                               => { "concept.dita" => { c => ["concept"] } },
  images                               => {},
  imagesReferencedFromBookMaps         => {},
  imagesReferencedFromTopics           => {},
  imagesToRefferingBookMaps            => {},
  indexedWords                         => {},
  indexWords                           => undef,
  indexWordsFolder                     => undef,
  inputFiles                           => ["concept.dita"],
  inputFileToTargetTopics              => {},
  inputFolder                          => "",
  inputFolderImages                    => { concept => "concept.dita" },
  ltgt                                 => {},
  matchTopics                          => undef,
  maxZoomIn                            => undef,
  maxZoomOut                           => { "concept.dita" => {} },
  md5Sum                               => { "concept.dita" => "f38f3212622c0fd073b213176a045e47" },
  md5SumDuplicates                     => {},
  missingImageFiles                    => {},
  missingTopicIds                      => {},
  noHref                               => {},
  notReferenced                        => {},
  olBody                               => {},
  originalSourceFileAndIdToNewFile     => {},
  otherMeta                            => {},
  otherMetaBookMapsAfterTopicIncludes  => [],
  otherMetaBookMapsBeforeTopicIncludes => [],
  otherMetaConsolidated                => {},
  otherMetaDuplicatesCombined          => [],
  otherMetaDuplicatesSeparately        => [],
  otherMetaPushToBookMap               => [],
  otherMetaRemainWithTopic             => [],
  oxygenProjects                       => undef,
  parseFailed                          => {},
  publicId                             => { "concept.dita" => "EN" },
  references                           => {},
  relocatedReferencesFailed            => [],
  relocatedReferencesFixed             => [],
  reports                              => "",
  requestAttributeNameAndValueCounts   => undef,
  requiredCleanUp                      => {},
  results                              => [[2, "urls"]],
  sourceTopicToTargetBookMap           => {},
  statusLine                           => "Xref: 2 urls",
  statusTable                          => "   Count  Condition\n1      2  urls\n",
  subjectSchemeMap                     => undef,
  suppressReferenceChecks              => undef,
  tableDimensions                      => {},
  tagCount                             => {
                                            "concept.dita" => { CDATA => 3, conbody => 1, concept => 1, p => 2, title => 1, xref => 2 },
                                          },
  tags                                 => { "concept.dita" => 7 },
  targetFolderContent                  => {},
  targetTopicToInputFiles              => {},
  texts                                => { "concept.dita" => 3 },
  title                                => { "concept.dita" => "Urls" },
  titleToFile                          => { Urls => { "concept.dita" => 1 } },
  topicFlattening                      => {},
  topicFlatteningFactor                => {},
  topicIds                             => { "concept.dita" => "c" },
  topicsFlattened                      => undef,
  topicsNotReferencedFromBookMaps      => { "concept.dita" => 1 },
  topicsReferencedFromBookMaps         => {},
  topicsToReferringBookMaps            => {},
  urls                                 => {
                                            "concept.dita" => { "ww2.appaapps.com" => 1, "www.appaapps.com" => 1 },
                                          },
  urlsBad                              => {
                                            "ww2.appaapps.com" => { "concept.dita" => 1 },
                                            "www.appaapps.com" => { "concept.dita" => 1 },
                                          },
  urlsGood                             => {},
  validateUrls                         => 1,
  validationErrors                     => {},
  vocabulary                           => {},
  xrefBadFormat                        => {},
  xrefBadScope                         => {},
  xRefs                                => {},
  xrefsFixedByTitle                    => [],
}, "Data::Edit::Xml::Xref")
 }

clearFolder($_, 1e3) for in, out, outFixed, reportFolder, tests, targets, q(zzzParseErrors);

done_testing;

lll "Tests finished:";  # 16.212



( run in 0.627 second using v1.01-cache-2.11-cpan-71847e10f99 )