view release on metacpan or search on metacpan
lib/Data/Edit/Xml/Xref.pm view on Meta::CPAN
fixDitaRefs => undef, #I Fix references in a corpus of L<Dita> documents that have been converted to the L<GBStandard> and whose target structure has been written to the named folder.
fixedFolder => undef, #I Fixed files are placed in this folder.
fixedFolderTemp => undef, #I Fixed files are placed in this folder if we are on aws but nit the session leader - this folder is then copied back to L<fixedFolder> on the session leader.
fixedRefsBad => [], # [] hrefs and conrefs from L<fixRefs|/fixRefs> which were moved to the "xtrf" attribute as requested by the L<fixBadHrefs|/fixBadHrefs> attribute because the reference w...
fixedRefsGB => [], # [] files fixed to the Gearhart-Brenan file naming standard
fixedRefsGood => [], # [] hrefs and conrefs from L<fixRefs|/fixRefs> which were invalid but have been fixed by L<deguidizing|/deguidize> them to a valid file name.
fixedRefsNoAction => [], # [] hrefs and conrefs from L<fixRefs|/fixRefs> for which no action was taken.
fixRefs => {}, # {file}{ref} where the href or conref target is not valid.
fixRelocatedRefs => undef, #I Fix references to topics that have been moved around in the out folder structure assuming that all file names are unique which they will be if they have been renamed t...
fixXrefsByTitle => undef, #I Try to fix invalid xrefs by the Gearhart Title Method enhanced by the Monroe map method if true
flattenFiles => {}, # {old full file name} = file renamed to Gearhart-Brenan file naming standard
flattenFolder => undef, #I Files are renamed to the Gearhart standard and placed in this folder if set. References to the unflattened files are updated to references to the flattened files. Th...
getFileUrl => qq(/cgi-bin/uiSelfServiceXref/client.pl?getFile=), #I A url to retrieve a specified file from the server running xref used in generating html reports. The complete url is obtained by appending the fully qualified file nam...
goodImageFiles => {}, # {file}++ : number of references to each good image
goodNavTitles => {}, # Details of nav titles that were resolved.
guidHrefs => {}, # {file}{href} = location where href starts with GUID- and is thus probably a guid.
guidToFile => {}, # {topic id which is a guid} = file defining topic id.
hrefUrlEncoding => {}, # Hrefs that need url encoding because they contain white space.
html => undef, #I Generate html version of reports in this folder if supplied
idNotReferenced => {}, # {file}{id}++ - id in a file that is not referenced
idReferencedCount => {}, # {file}{id}++ - the number of times this id in this file is referenced from the rest of the corpus
ids => {}, # {file}{id} - id definitions across all files.
idsRemoved => {}, # {id}++ : Ids removed from all files
idTags => {}, # {file}{id}[tag] The tags associated with each id in a file - there might be more than one if the id is duplicated
images => {}, # {file}{href} Count of image references in each file.
imagesReferencedFromBookMaps => {}, # {bookmap full file name}{full name of image referenced from topic referenced from bookmap}++
imagesReferencedFromTopics => {}, # {topic full file name}{full name of image referenced from topic}++
imagesToRefferingBookMaps => {}, # {image full file name}{bookmap full file name}++ : images to referring bookmaps
indexWords => undef, #I Index words to topics and topics to words if true.
indexWordsFolder => undef, #I Folder into which to save words to topic and topics to word indexes if L<indexWords> is true.
indexedWords => {}, # {word}{full file name of topic the words occurs in}.
inputFiles => [], # Input files from L<inputFolder|/inputFolder>.
inputFileToTargetTopics => {}, # {input file}{target file}++ : Tells us the topics an input file was split into
inputFolderImages => {}, # {full image file name} for all files in input folder thus including any images resent
inputFolder => undef, #I A folder containing the dita and ditamap files to be cross referenced.
ltgt => {}, # {text between < and >}{filename} = count giving the count of text items found between < and >
matchTopics => undef, #I Match topics by title and by vocabulary to the specified confidence level between 0 and 1. This operation might take some time to complete on a large corpus.
maximumNumberOfProcesses => numberOfCpus(8), #I Maximum number of processes to run in parallel at any one time with a sensible default.
maxZoomIn => undef, #I Optional hash of names to regular expressions to look for in each file
maxZoomOut => {}, # Results from L<maxZoomIn|/maxZoomIn> where {file name}{regular expression key name in L<maxZoomIn|/maxZoomIn>}++
md5Sum => {}, # MD5 sum for each input file.
md5SumDuplicates => {}, # {md5sum}{file}++ : md5 sums with more than one file
missingImageFiles => {}, # [file, href] == Missing images in each file.
missingTopicIds => {}, # Missing topic ids.
noHref => {}, # Tags that should have an href but do not have one.
notReferenced => {}, # {file name} Files in input area that are not referenced by a conref, image, bookmapref or xref tag and are not a bookmap.
olBody => {}, # The number of ol under body by file
originalSourceFileAndIdToNewFile => {}, # {original file}{id} = new file: Record mapping from original source file and id to the new file containing the id
otherMeta => {}, # {original file}{othermeta name}{othermeta content}++ : the contents of the other meta tags
otherMetaDuplicatesSeparately => [], # Duplicate othermeta in bookmaps and topics considered separately
otherMetaDuplicatesCombined => [], # Duplicate othermeta in bookmaps with called topics othermeta included
otherMetaRemainWithTopic => [], # Othermeta that must stay in the topic
otherMetaPushToBookMap => [], # Othermeta that can be pushed to the calling book map
otherMetaBookMapsBeforeTopicIncludes=> [], # Bookmap othermeta before topic othermeta has been included
otherMetaBookMapsAfterTopicIncludes => [], # Bookmap othermeta after topic othermeta has been included
otherMetaConsolidated => {}, # {Name}{Content}++ : consolidated other meta data across entire corpus
oxygenProjects => undef, #I Create oxygen project files for each map - the project file will have an extension of .xpr and the same name and path as the map file or the name return by your implem...
parseFailed => {}, # {file} files that failed to parse.
publicId => {}, # {file} = Public id on Doctype
references => {}, # {file}{reference}++ - the various references encountered
relocatedReferencesFailed => [], # Failing references that were not fixed by relocation
relocatedReferencesFixed => [], # Relocated references fixed
requestAttributeNameAndValueCounts => undef, #I Report attribute name and value counts
requiredCleanUp => undef, # {full file name}{cleanup} = number of required-cleanups
reports => undef, #I Reports folder: Xref will write text versions of the generated reports to files in this folder.
results => [], # Summary of results table.
# sourceFile => undef, # The source file from whic#h this structure was generated.
sourceTopicToTargetBookMap => {}, # {input topic cut into multiple pieces} = output bookmap representing pieces
statusLine => undef, # Status line summarizing the cross reference.
statusTable => undef, # Status table summarizing the cross reference.
subjectSchemeMap => undef, #I Create a subject scheme map in the named file
suppressReferenceChecks => undef, #I Suppress reference checking - which normally happens by default - but which takes time and might be irrelevant if an earlier xref has already checked all the reference...
tableDimensions => {}, # {file}{columns}{rows} == count
tagCount => {}, # {file}{tags} == count of the different tag names found in the xml files.
tagsTextsRatio => undef, # Ratio of tags to text encountered
tags => undef, # Number of tags encountered
targetFolderContent => {}, # {file} = bookmap file name : the target folder content which shows us where an input file went
targetTopicToInputFiles => {}, # {current file} = the source file from which the current file was obtained
texts => undef, # Number of texts encountered
timeEnded => undef, # Time the run ended
timeStart => undef, # Time the run started
title => {}, # {full file name} = title of file.
titleToFile => {}, # {title}{file}++ if L<fixXrefsByTitle> is in effect
topicFlatteningFactor => {}, # Topic flattening factor - higher is better
topicFlattening => {}, # {topic}{sources}++ : the source files for each topic that was flattened
topicIds => {}, # {file} = topic id - the id on the outermost tag.
topicsFlattened => undef, # Number of topics flattened
topicsNotReferencedFromBookMaps => {}, # {topic file not referenced from any bookmap} = 1
topicsReferencedFromBookMaps => {}, # {bookmap full file name}{topic full file name}++ : bookmaps to topics
topicsToReferringBookMaps => {}, # {topic full file name}{bookmap full file name}++ : topics to referring bookmaps
urls => {}, # {topic full file name}{url}++ : urls found in each file
urlsBad => {}, # {url}{topic full file name}++ : failing urls found in each file
urlsGood => {}, # {url}{topic full file name}++ : passing urls found in each file
validateUrls => undef, #I Validate urls if true by fetching their headers with L<curl>
validationErrors => {}, # True means that Lint detected errors in the xml contained in the file.
vocabulary => {}, # The text of each topic shorn of attributes for vocabulary comparison.
xrefBadFormat => {}, # External xrefs with no format=html.
xrefBadScope => {}, # External xrefs with no scope=external.
xRefs => {}, # {file}{href}++ Xrefs references.
xrefsFixedByTitle => [], # Xrefs fixed by locating a matching topic title from their text content.
);
loadHash($xref, @_); # Load attributes complaining about any invalid ones
} # newXref
sub xref2(%) #P Check the cross references in a set of Dita files held in L<inputFolder|/inputFolder> and report the results in the L<reports|/reports> folder. The possible attribute...
{my (%attributes) = @_; # Attributes of cross referencer
my ($xref) = newXref(@_); # Cross referencer
$xref->timeStart = time; # Start time
$xref->inputFolder or confess "Please supply a value for: inputFolder";
$xref->inputFolder =~ s(\/+\Z) (\/)gs; # Cleanup path names
$xref->inputFolder = # Make input folder absolute
absFromAbsPlusRel($xref->currentFolder, $xref->inputFolder)
if $xref->inputFolder !~ m(\A/);
$xref->reports or confess "Please supply a value for: reports";
if (1) # Write title and some of the parameters
{my $r = $xref->reports;
owf(fpe($r, qw(xref_parameter_settings txt)), dump($xref)) if $r; # Print all parameters
lib/Data/Edit/Xml/Xref.pm view on Meta::CPAN
{my ($xref) = @_; # Xref results
my @r;
if (my $xrefTopicIds = $xref->topicIds)
{for my $file(sort keys %{$xrefTopicIds}) # Each input file which will be absolute
{if (my $topicId = $xrefTopicIds->{$file}) # Topic Id for file - we report missing topicIds in: reportDuplicateTopicIds
{next unless $topicId =~ m(\AGUID-)is;
$xref->guidToFile->{$topicId} = $file; # Guid Topic Id to file
push @r, [$topicId, $file];
}
}
}
formatTables($xref, \@r,
columns => <<END,
Guid The guid being defined
File The file that defines the guid
END
title =>qq(Guid topic definitions),
head =>qq(Xref found NNNN guid topic definitions on DDDD),
summarize=>1,
file =>fpe(q(lists), qw(guidsToFiles txt)));
}
sub editXml($$$) #P Edit an xml file retaining any existing XML headers and lint trailers
{my ($in, $out, $source) = @_; # Input file, output file, source to write
my @s = readFile($in); # Read existing source
my @h; # Headers if any present
if (@s > 0) # Remove header lines using a very basic parse that is not a general solution
{if ($s[0] =~ m(\A\<\?xml)is) # First line
{push @h, shift @s;
if (@s > 0 and $s[0] =~ m(\A<!DOCTYPE)s) # Second line start
{push @h, shift @s;
while(@s > 0 and $s[0] !~ m(\A\s*<[a-z])i) # Parse to root tag
{push @h, shift @s;
}
}
}
}
my @l; # Lint data if any
if (1)
{my $state;
for my $s(@s)
{if (!$state && $s =~ m(\A\<\!\-\-linted\:)s or $state)
{push @l, $s;
$state++;
}
}
}
owf($out, join '', @h, $source, @l) # Insert new source between old headers and trailers
}
# Fix a file by moving its hrefs and conrefs to the xtrf attribute unless
# deguidization is in effect and the guid can be converted into a valid Dita
# reference accessing a file in the input corpus.
#
# If fixRelocatedRefs is in effect: such references are fixed by assuming that
# the files mentioned in broken links have been relocated else where in the
# folder structure and can be located by base file name alone.
#
# If fixXrefsByTitle is in effect apply the Gearhart Title Method: fix broken
# xrefs by looking for a unique topic with the same title text as the content of
# the xref.
#
# If fixDitaRefs is in effect we are converting Dita to Dita: relink Dita
# references that were valid in the input corpus to make them valid again in the
# output corpus even after files have been cut out and renamed to the GB Standard.
# The targets/ folder provides the mapping between the input and output corpii.
sub fixReferencesInOneFile($$) #P Fix one file by moving unresolved references to the xtrf attribute
{my ($xref, $sourceFile) = @_; # Xref results, source file to fix
my $fixed = newXref(); # Fix results
my $node; # The current node we are working with
my $attr; # The current attribute we are working with
my $ref; # The current reference we are working with
my @bad; $fixed->fixedRefsBad = \@bad; # Hrefs that could not be fixed and so were ameliorated by moving them to @xtrf
my @good; $fixed->fixedRefsGood = \@good; # Hrefs that were fixed by resolving a Guid
my $refDetails = sub # Save details of a reference
{my ($r) = @_;
my $s = $xref->targetTopicToInputFiles->{$sourceFile}; # The source file(s) from which each target was obtained
[$r, $node->tag, $attr, $ref, $sourceFile, sort keys %$s] # Construct reference details
};
my $bad = sub # Save details of a bad reference
{my ($r) = @_;
push @bad, my $R = &$refDetails($r);
$R
};
my $good = sub # Save details of a good reference
{my ($target, $r) = @_; # Target file, reason
my $R = &$refDetails($r);
push @good, [@$R[0..3], $target, @$R[4..$#$R]]; # Insert target at correct location
$R
};
my $fixXrefByTitle = sub # Attempt to fix an xref by using its text content to search for a matching title
{return undef unless -t $node eq q(xref); # Only works for xrefs
my $xTitle = nws($node->stringContent); # Normalized title from xref node
if (my $topics = $xref->titleToFile->{$xTitle}) # Find the topics that match the title text content
{my $N = keys %$topics; # Matching topics
if ($N == 1) # Unique matching topic - the original Gearhart Title Method
{my ($path) = keys %$topics;
my $rel = relFromAbsAgainstAbs($path, $sourceFile); # Relative file name
$node->href = $rel; # Update xref
return &$good($path, q(Fixed by Gearhart Title Method)); # Report the fix made
}
elsif ($N > 1) # Multiple matches
{if (my $l = fileLargestSize(sort keys %$topics)) # Boldly choose the topic with the largest size to resolve the ambiguity on the basis that it is probably the most interesting
{my $rel = relFromAbsAgainstAbs($l, $sourceFile); # File name of target topic relative to source file
$node->href = $rel; # Update reference
return &$good($l, q(Fixed by Gearhart Bold Title Method)); # Report the fix made
}
}
}
lib/Data/Edit/Xml/Xref.pm view on Meta::CPAN
}
else
{lll "No source for $sourceFile\n";
}
undef # Failed
};
my $checkImageRef = sub # Check whether an image exists or not
{my $i = absFromAbsPlusRel($sourceFile, $ref); # Local file name
return 1 if -e $i; # Local file exists
return 2 if -e wwwDecode($i); # Local file exists
undef # Local file exists after decoding % signs
};
my $fixOnePartialDitaRef = sub # Fix a partial dita reference to an externally cut out topic renamed to the GB Standard where such a reference is just a file name as used in a bookmapref.
{my ($ref) = @_; # Partial reference
return undef unless $xref->fixDitaRefs; # Fixing dita references not requested
my $topicSource = &$locateUniqueTopicSourceForTargetFile($sourceFile); # Unique source file corresponding to the target file else undef
return undef unless $topicSource; # The references can not be resolved without a unique source file.
my $refIn = absFromAbsPlusRel($topicSource, $ref); # The referenced input file that was present in the input being transformed because we assume that (most of) the input Dita refs were valid
if (my $new = $xref->inputFileToTargetTopics->{$refIn}) # The target files new files that were cut out of the referenced input file - there might several such
{if (my $referencedTarget = fileLargestSize(sort keys %$new)) # Boldly assume that the largest possible target is the one we want
{my $link = relFromAbsAgainstAbs($referencedTarget, $sourceFile); # Create relative link from book map
$node->set($attr=>$link);# if $xref->fixBadRefs; # Reset reference - we know fixDitaRefs is true.
&$good($link, q(unique target)); # Record successful fix
return 1; # Success
}
}
undef # Failed
};
my $fixOneFullDitaRef = sub # Fix a full dita reference to an externally cut out topic renamed to the GB Standard where such a reference is: file#topicId/label
{return undef unless $xref->fixDitaRefs; # Fixing dita references not requested
return &$fixOnePartialDitaRef($ref) unless $ref =~ m(#); # Confirm it is a full reference else fix it as a partial reference
my $topicSource = &$locateUniqueTopicSourceForTargetFile($sourceFile); # Unique source file corresponding to the target file else undef
return undef unless $topicSource; # The references can not be resolved without a unique source file.
my ($rf, $rt, $ri) = parseDitaRef($ref, $topicSource); # Parse the dita ref
if (my $new = $xref->originalSourceFileAndIdToNewFile->{$rf}{$ri}) # The new files cut out of the original topic source file
{my $targetFile = relFromAbsAgainstAbs($new, $sourceFile); # Create relative link from current file
if (my $topicId = $xref->topicIds->{$new}) # Topic id for target file
{my $href = qq($targetFile#$topicId/$ri); # New href
$node->set($attr=>$href);# if $xref->fixBadRefs; # Reset href - we know fixDitaRefs is true.
&$good($new, q(Unique target for file ref)); # Record the fix made
return 1; # Record the fix made
}
}
if ($xref->allowUniquePartialMatches && $attr !~ m(\Aconref)s) # Partial matching - i.e ignoring the stuff to the right of the # in the reference sometimes produces a unique result.
{return &$fixOnePartialDitaRef($ref =~ s(#.*\Z) ()rs); # Try to resolve reference as a partial re
}
undef # Failed
};
my $fixRelRef = sub # Attempt to fix a reference broken by relocation
{my ($R, $rest) = split m(#)s, $ref, 2; # Get referenced file name
if ($R)
{my $r = fne($R); # Href file base name
if (my $F = $xref->baseFiles->{$r}) # Relocated else where
{my @targets = sort keys(%$F); # Relocation targets
if (@targets == 1) # Just one such relocation
{my $f = relFromAbsAgainstAbs($targets[0], $sourceFile); # Link to it
if ($f ne $R)
{my $newLink; # Fix if the target is else where
if ($rest) # Link has more than one component
{$node->set($attr=>($newLink = $f.q(#).$rest)); # Reset link
}
else # Link has just one component
{$node->set($attr=>($newLink = $f)); # Reset link
}
my $saveRef = $ref; $ref = $newLink; # Try fixing the relocated reference as a dita reference.
my $r = &$fixOneFullDitaRef;
$ref = $saveRef;
return $r;
}
}
}
}
undef # Failed
};
my $fixOneRef = sub # Fix one unresolved reference either by ameliorating it or by moving it to the xtrf attribute thereby putting it in M3.
{return unless $xref->fixRefs->{$sourceFile}{$ref}; # Fix not requested for this reference
if ($xref->deguidize and $ref =~ m(GUID-)is) # On a guid and deguidization allowed so given g1#g2/id convert g1 to a file name by locating the topic with topicId g2.
{my @refs = split /\s+/, $ref; # There might be multiple references in the href
my @unresolved; # Unresolved targets
my @resolved; # Resolved targets
for my $subRef(@refs) # Each reference in the reference
{my ($guid, $rest) = split /#/, $subRef;
if (my $target = $xref->guidToFile->{$guid}) # Target file associated with guid
{my $link = relFromAbsAgainstAbs($target, $sourceFile); # Relative link
$link .= q(#).$rest if $rest; # Remainder of reference which does not change as it is not file related
if (!@resolved) # First resolution
{$node->set($attr=>$link); # New href or conref
&$good($target, q(Deguidized reference)); # Report fix made
}
push @resolved, $subRef;
}
else
{push @unresolved, $subRef;
}
}
if (@unresolved and $xref->fixBadRefs) # Unresolved - transfer all references to xtrf so some-one else can try
{$node->renameAttr($attr, q(xtrf)); # No target file for guid
&$bad(q(No file for guid)); # Report failure
}
}
elsif ($xref->fixRelocatedRefs and &$fixRelRef) # Try to fix as a relocated ref if possible
{
}
elsif ($xref->fixXrefsByTitle and &$fixXrefByTitle) # Try to fix a missing xref by title
{
}
elsif ($xref->fixBadRefs) # Move href to xtrf as no other fix seems possible given that we have already tried to fix it as a guid and it was reportedly not working as a standard dita reference.
{$node->renameAttr($attr, q(xtrf)); # No target file for guid
if ($xref->changeBadXrefToPh) # Change bad xref to ph if requested
{if ($node->at_xref)
{$node->change_ph;
}
}
&$bad(q(No such target)); # Report failure
}
else # ffff - Fix not requested so href left alone
{&$bad(q(Not fixable)); # Unable to fix the reference using any known method
}
};
my $x = Data::Edit::Xml::new($sourceFile); # Parse xml - should parse OK else otherwise how did we find out that this file needed to be fixed
my $s = -p $x; # Source before any changes
$x->by(sub # Check any references encountered on each node, Ameliorate some specific cases. If the reference is still invalid report the discrepancy.
{my ($o) = @_; # Current node
$node = $o; # Make current node available globally
my $t = $node->tag; # Tag
if ($t =~ m(\A(appendix|chapter|image|link|mapref|topicref|xref)\Z)is) # Hrefs that need to be fixed
{if ($ref = $node->attr($attr = q(href))) # The attribute and reference to ameliorate or fix
#if ($t =~ m(\A(appendix|chapter|topicref)\Z)is) # Fix bookmap hrefs
{if ($t =~ m(\A(appendix|chapter|mapref|topicref)\Z)is) # Fix bookmap hrefs
{&$fixBookMapDitaRef or &$fixOneRef; # Fix references to topics cut into multiple pieces and now represented by a bookmap
}
elsif ($t =~ m(\Aimage\Z)is) # Check image references
{&$checkImageRef or &$fixOneRef; # No additional fixes available yet for images, as so far, the resolution of images is done in thee calling frame work. Hence we only need to check whether the reference...
}
else # Fix hrefs without the benefit of the targets/ folder
{&$fixOneFullDitaRef or &$fixOneRef; # Fix references not in a bookmap
}
}
elsif ($t =~ m(\Axref\Z)s and $xref->fixXrefsByTitle and &$fixXrefByTitle)# Try to fix a missing xref by title
{
}
}
if ($ref = $node->attr($attr = q(conref))) # Fix a conref
{&$fixOneFullDitaRef or &$fixOneRef;
}
if ($ref = $node->attr($attr = q(conrefend))) # Fix a conrefend
{&$fixOneFullDitaRef or &$fixOneRef;
}
});
if (my $S = -p $x) # Source after any changes
{if ($S ne $s) # Write any changes - seems to be slightly faster than not checking
{if (onAwsSecondary) # Write output to temporary folder regardless so it can be copied enmasse back to the session leader
{my $f = swapFolderPrefix($sourceFile, # Output file name
$xref->inputFolder, $xref->fixedFolderTemp);
editXml($sourceFile, $f, $S); # Write the fixed file to the fixedFolder retaining headers and trailers
}
elsif (my $fixedFolder = $xref->fixedFolder) # New output file in fixedFolder
lib/Data/Edit/Xml/Xref.pm view on Meta::CPAN
}
}
}
$xref->inputFileToTargetTopics = \%sourceToTarget; # The targets for each input file
$xref->targetTopicToInputFiles = \%targetToSource; # The source file from which each target was obtained
$xref->sourceTopicToTargetBookMap = \%sourceTopicToTargetBookMap; # The bookmap representing a cut up topic
$xref->topicFlattening = \%targetToSourceDuplicated; # Topics that arose from flattening several source files
$xref->originalSourceFileAndIdToNewFile = \%si; # Record mapping from original source file and id to the new file containing the id
formatTables($xref, \@r,
columns => <<END,
Type The type of reference
DocType Document type of the source file
Source Source file
Target Cut out file
END
summarize=>1,
title=>qq(The target topics cut out of the source documents),
head=><<END,
Xref noted NNNN cut out topics on DDDD
END
file=>(fpe(qw(lists source_to_targets txt))));
if (1) # Report topic flattening
{my @r;
my $s = 0; my $t = 0;
for my $target(sort keys %targetToSourceDuplicated) # Each of the target topics that were derived from this source file
{my @s = @{$targetToSourceDuplicated{$target}}; # Each source input file
push @r, [scalar(@s), $target];
push @r, [q(), q( ).$_] for @s;
push @r, [q()];
++$t; $s += @s;
}
$xref->topicsFlattened = $s; # Record the number of topics flattened
my $F = $xref->topicFlatteningFactor = $t ? $s / $t : 0; # Topic flattening factor - higher is better
my $f = sprintf("%7.4f", $F);
my $n = @{$xref->inputFiles}; # Number of topics
my $p = sprintf("%7.4f", $n ? 100*$t/$n : 0); # Percentage topics flattened versus total number of topics
formatTables($xref, \@r,
columns => <<END,
Count Number of sources that created this target
Target The target file flattened out from multiple source files
END
summarize => 1,
title => qq(Topic files flattened from multiple sources),
head => <<END,
Xref noted that $s source topics were reduced to $t target topics on DDDD
This represents a flattening factor of: $f (higher is better) in the topics that got flattened
Total number of topics : $n
Number of topics flattened: $t
Percent topics flattened: $p
END
file => fpe(qw(lists topic_flattening txt)));
}
}
if ($xref->fixRelocatedRefs) # Load base file name to full name but if needed to do relocation fixes
{my %baseFiles; # Map base files back to full files. The base file is the file name shorn of the path - the reason the GB Standard is so important
for my $file(searchDirectoryTreesForMatchingFiles($xref->inputFolder)) # All input files
{my $base = fne $file; # Base file name - the GB Standard name for the file
$baseFiles{$base}{$file}++; # Current location of the file
}
$xref->baseFiles = \%baseFiles;
}
my @bad; # Hrefs that could not be fixed and so were ameliorated by moving them to @xtrf
my @good; # Hrefs that were fixed by resolving a Guid
if (my @files = sort keys %{$xref->fixRefs}) # Fix files if requested
{awsParallelProcessFiles $xref, # Fix files in parallel
\&fixReferencesParallel, # Fix one file
\&fixReferencesResults, # Consolidate results
[@files];
@good = $xref->fixedRefsGood->@*; # Results from fixReferencesResults
@bad = $xref->fixedRefsBad ->@*;
}
@good = sort {join(' ', @$a) cmp join(' ', @$b)} @good;
@bad = sort {join(' ', @$a) cmp join(' ', @$b)} @bad;
my $fbr = $xref->fixBadRefs; # Are we fixing bad refs?
my $facet = q(Dita references);
formatTables($xref, $xref->fixedRefsBad = \@bad, # Report references we cannot fix
columns => <<END,
Reason The reason the reference was not fixed
Tag The tag of the node in which the reference failure occurs
Attr The attribute of the node in which the reference failure occurs
Reference The reference not being fixed
File The file in which the reference appears
Source_Files One or more source files that from which this file was derived
END
summarize => 1,
title => q(Invalid references),
facet => $facet, aspectColor => q(red),
head => $fbr ? <<END : <<END2,
Xref moved NNNN invalid references to M3 on DDDD as fixBadRefs=>$fbr was specified
END
Xref was unable to resolve NNNN failing references on DDDD, fixBadRefs=> was not specified
END2
zero => 1,
file => fpe(qw(bad failing_references txt)));
formatTables($xref, $xref->fixedRefsGood = \@good, # Report hrefs which were failing but were successfully resolved by ingenuity.
columns => <<END,
Method The way that the reference was fixed
Tag The tag of the node on which the reference was fixed
Attr The attribute being fixed - normally href
Ref The reference that is being resolved
Target_File The file the reference resolves to.
File The file in which the reference appears
Source_Files The source files that gave rise to the file containing the reference after file flattening
END
summarize => 1,
title => qq(These failing references were successfully resolved),
facet => $facet, aspectColor => q(green),
head => <<END,
lib/Data/Edit/Xml/Xref.pm view on Meta::CPAN
addNavTitle => 1
Reports of successful updates will be written to:
reports/good/navTitles.txt
Reports of unsuccessful updates will be written to:
reports/bad/navTitles.txt
=head2 Fix bad references
It is often desirable to ameliorate unresolved Dita href attributes so that
incomplete content can be loaded into a content management system. The:
fixBadRefs => 1
attribute requests that the:
conref and href
attributes be renamed to:
xtrf
if the B<conref> or B<href> attribute specification cannot be resolved in the
current corpus by other methods of fixing failing references such as:
L<fixDitaRefs>, L<fixRelocatedRefs> or L<fixXrefsByTitle>.
This feature designed by L<mailto:mim@cpan.org>.
=head2 Deguidize
Some content management systems use guids, some content management systems use
file names as their means of identifying content. When moving from a guid to a
file name content management system it might be necessary to replace the guids
representing file names with the actual underlying file names. If the
deguidize => 1
parameter is set to true, Xref will replace any such file guids with the
underlying file name if it is present in the content being cross referenced.
=head2 File flattening
It is often desirable to flatten or reflatten the topic files in a corpus so
that they can coexist in a single folder of a content management system without
colliding with each other.
The presence of the input attribute:
flattenFolder => folder-to-flatten-files-into
causes topic files to be flattened into the named folder using the
L<GBStandard> to generate the flattened file names. Xref will then update all
L<Dita> references to match these new file names. If the L<flattenFolder>
folder is the same as the L<inputFolder> then the input files are flattened in
place.
=head2 Locating relocated files
File references in B<conref> or B<hrefs> that have a unique valid base file
name and an invalid path can be fixed by setting the input attribute:
fixRelocatedRefs => 1
to a true value to request that Xref should replace the incorrect paths to the
unique bases file names with the correct path.
If coded in conjunction with the B<fixBadRefs> input attribute this will cause
Xref to first try and fix any missing xrefs, any that still fail to resolve
will then be ameliorated by moving them to the B<xtrf> attribute.
=head2 Fix Xrefs by Title
L<Dita> B<xref> tags with broken or missing B<href> attributes can sometimes be
fixed by matching the text content of the B<xref> with the titles of topics.
If:
fixXrefsByTitle => 1
is specified, L<Xref> will locate possible targets for a broken B<href> by
matching the white space normalized L<Data::Table::Text::nws> of the text
content of the B<xref> with the similarly normalized title of each topic that
is referenced by any book map that refers to the topic containing the B<xref>.
If a single matching candidate is located then it will be used to update the
B<href> attribute of the B<xref>.
=head2 Fix References in Dita To Dita Conversions
When converting a L<Dita> input source corpus to L<Dita> the referenced topics
are usually renamed and flattened via the L<GBStandard>. If enabled:
fixDitaRefs => targets/
updates valid L<Dita> references in the input corpus with the latest name for
the referenced topic to make links that were valid in the input corpus valid in
the output corpus as well.
The B<targets/> folder should contain the same set of file names as the
original input corpus, each such file should contain the name of a B<bookmap>
topic present in the B<inputFolder=> whose B<chapter> and B<topicref>s identify
the new names of the files cut out and flattened from the existing input
corpus.
The creation of the B<target/> folder is usually done by some other piece of
software such as L<Data::Edit::Xml::To::Dita> as it is too complex and
laborious to be performed reliably by hand. No validation of the contents of
this folder is performed as it is assumed that it has been created reliably in
software.
=head2 Topic Matching
Topics can be matched on title and vocabulary to assist authors in finding
similar topics by specifying the:
matchTopics => 0.9
lib/Data/Edit/Xml/Xref.pm view on Meta::CPAN
B<idsRemoved> - {id}++ : Ids removed from all files
B<images> - {file}{href} Count of image references in each file.
B<imagesReferencedFromBookMaps> - {bookmap full file name}{full name of image referenced from topic referenced from bookmap}++
B<imagesReferencedFromTopics> - {topic full file name}{full name of image referenced from topic}++
B<imagesToRefferingBookMaps> - {image full file name}{bookmap full file name}++ : images to referring bookmaps
B<indexedWords> - {word}{full file name of topic the words occurs in}.
B<inputFileToTargetTopics> - {input file}{target file}++ : Tells us the topics an input file was split into
B<inputFiles> - Input files from L<inputFolder|/inputFolder>.
B<inputFolderImages> - {full image file name} for all files in input folder thus including any images resent
B<ltgt> - {text between < and >}{filename} = count giving the count of text items found between < and >
B<maxZoomOut> - Results from L<maxZoomIn|/maxZoomIn> where {file name}{regular expression key name in L<maxZoomIn|/maxZoomIn>}++
B<md5Sum> - MD5 sum for each input file.
B<md5SumDuplicates> - {md5sum}{file}++ : md5 sums with more than one file
B<missingImageFiles> - [file, href] == Missing images in each file.
B<missingTopicIds> - Missing topic ids.
B<noHref> - Tags that should have an href but do not have one.
B<notReferenced> - {file name} Files in input area that are not referenced by a conref, image, bookmapref or xref tag and are not a bookmap.
B<olBody> - The number of ol under body by file
B<originalSourceFileAndIdToNewFile> - {original file}{id} = new file: Record mapping from original source file and id to the new file containing the id
B<otherMeta> - {original file}{othermeta name}{othermeta content}++ : the contents of the other meta tags
B<otherMetaBookMapsAfterTopicIncludes> - Bookmap othermeta after topic othermeta has been included
B<otherMetaBookMapsBeforeTopicIncludes> - Bookmap othermeta before topic othermeta has been included
B<otherMetaConsolidated> - {Name}{Content}++ : consolidated other meta data across entire corpus
B<otherMetaDuplicatesCombined> - Duplicate othermeta in bookmaps with called topics othermeta included
B<otherMetaDuplicatesSeparately> - Duplicate othermeta in bookmaps and topics considered separately
B<otherMetaPushToBookMap> - Othermeta that can be pushed to the calling book map
B<otherMetaRemainWithTopic> - Othermeta that must stay in the topic
B<parseFailed> - {file} files that failed to parse.
B<publicId> - {file} = Public id on Doctype
B<references> - {file}{reference}++ - the various references encountered
B<relocatedReferencesFailed> - Failing references that were not fixed by relocation
B<relocatedReferencesFixed> - Relocated references fixed
B<requiredCleanUp> - {full file name}{cleanup} = number of required-cleanups
B<results> - Summary of results table.
B<sourceTopicToTargetBookMap> - {input topic cut into multiple pieces} = output bookmap representing pieces
B<statusLine> - Status line summarizing the cross reference.
B<statusTable> - Status table summarizing the cross reference.
B<tableDimensions> - {file}{columns}{rows} == count
B<tagCount> - {file}{tags} == count of the different tag names found in the xml files.
B<tags> - Number of tags encountered
B<tagsTextsRatio> - Ratio of tags to text encountered
B<targetFolderContent> - {file} = bookmap file name : the target folder content which shows us where an input file went
B<targetTopicToInputFiles> - {current file} = the source file from which the current file was obtained
B<texts> - Number of texts encountered
B<timeEnded> - Time the run ended
B<timeStart> - Time the run started
B<title> - {full file name} = title of file.
B<titleToFile> - {title}{file}++ if L<fixXrefsByTitle> is in effect
B<topicFlattening> - {topic}{sources}++ : the source files for each topic that was flattened
B<topicFlatteningFactor> - Topic flattening factor - higher is better
B<topicIds> - {file} = topic id - the id on the outermost tag.
B<topicsFlattened> - Number of topics flattened
B<topicsNotReferencedFromBookMaps> - {topic file not referenced from any bookmap} = 1
B<topicsReferencedFromBookMaps> - {bookmap full file name}{topic full file name}++ : bookmaps to topics
B<topicsToReferringBookMaps> - {topic full file name}{bookmap full file name}++ : topics to referring bookmaps
B<urls> - {topic full file name}{url}++ : urls found in each file
B<urlsBad> - {url}{topic full file name}++ : failing urls found in each file
B<urlsGood> - {url}{topic full file name}++ : passing urls found in each file
B<validationErrors> - True means that Lint detected errors in the xml contained in the file.
B<vocabulary> - The text of each topic shorn of attributes for vocabulary comparison.
B<xRefs> - {file}{href}++ Xrefs references.
B<xrefBadFormat> - External xrefs with no format=html.
lib/Data/Edit/Xml/Xref.pm view on Meta::CPAN
END
owf(fpe($in, qw(good2 png)), <<END);
<image/>
END
}
sub createRequiredCleanUps($) #P Required clean ups report
{my ($in) = @_; # Folder to create the files in
my $d = fpd(currentDirectory, $in);
owf(fpe($in, qq(c1), q(dita)), <<END);
<concept id="c1">
<title>C1_</title>
<conbody>
<required-cleanup>aaa</required-cleanup>
<required-cleanup>bbb</required-cleanup>
<required-cleanup>bbb</required-cleanup>
</conbody>
</concept>
END
owf(fpe($in, qq(c2), q(dita)), <<END);
<concept id="c2">
<title>C2_</title>
<conbody>
<required-cleanup>aaa</required-cleanup>
<required-cleanup>bbb</required-cleanup>
<required-cleanup>ccc</required-cleanup>
<required-cleanup>CCC</required-cleanup>
</conbody>
</concept>
END
}
sub createSoftConrefs($) #P Fix file part of conref even if the rest is invalid
{my ($in) = @_; # Folder to create the files in
my $d = fpd(currentDirectory, $in);
my $r = fpe(qw(c_12345678123456781234567812345678 dita)); # Relocatable
owf(fpf($in, q(folder), $r), <<END);
$conceptHeader
<concept id="c">
<title>C1</title>
<conbody>
<p id="p1">aaa</p>
<p id="p1">bbb</p>
<p conref="#c/p1"/> <!-- FAILS -->
<p conref="#c/pp"/> <!-- FAILS: No such id -->
</conbody>
</concept>
END
owf(fpe($in, qw(c dita)), <<END);
$conceptHeader
<concept id="c">
<title>C2</title>
<conbody>
<p conref="$r#c/p1"/>
<p conref="$r#c1/p1"/> <!-- PASSES: wrong topic id but we ignore topic ids-->
<p conref="$r#c/bad"/> <!-- PASSES: no such id - SHOULD FAIL even though we are relocating -->
<p conref="$r"/>
<p conref="c.dta"/> <!-- FAILS: no such file -->
<p id="q1">aaa</p>
<p conref="#c/q1"/>
</conbody>
</concept>
END
}
sub checkXrefStructure($$@) #P Check an output structure produced by Xrf
{my ($x, $field, @folders) = @_; # Cross references, field to check, folders to suppress
my $s = nws dump($x->{$field}); # Structure to be tested
for my $folder($x->inputFolder, @folders) # Remove specified folder names from structure to be tested
{$s =~ s($folder) ()gs; # Remove folder name from structure to be tested
}
eval $s; # Recreate structure
}
sub writeXrefStructure($$@) #P Write the test for an Xref structure
{my ($x, $field, @folders) = @_; # Cross referencer, field, names of the folders to suppress
my $in = $x->inputFolder;
my $s = nws(dump($x->{$field}) =~ s($in) ()gsr); # Field to be tested
$s =~ s(\],\s+\[) (],\n [)gs;
$s =~ s(\},\s+\{) (},\n {)gs;
for my $folderName(@folders) # Remove specified folder names from structure to be tested
{no strict qw(refs);
my $folder = &{$folderName}; # Folder name
$s =~ s($folder) ()gs; # Remove folder name from structure to be tested
}
my $f = join ', ', @folders; # Folders to remove
my $t = <<END; # Format test
is_deeply checkXrefStructure(\$x, q($field), $f), $s;
END
say STDERR $t; # Write test
}
sub deleteVariableFields($) #P Remove time and other fields that do not affect the end results
{my ($x) = @_; # Cross referencer
delete $x->{$_} for qw(timeEnded timeStart maximumNumberOfProcesses); # Remove time fields
delete $x->{$_} for qw(tagsTextsRatio); # Remove floating fields
removeFilePathsFromStructure($x);
}
sub testReferenceChecking #P Test reference checking
{my $folder = q(/home/phil/);
my @names = qw(aaa bbb ccc);
my @ids = map {q(p).$_} @names;
my @files = map {fpe($folder, $_, q(dita))} @names;
my $xref = newXref
(currentFolder => q(/aaa),
reports => fpd(currentDirectory, qw(test resports)),
topicIds => {map {$files[$_]=>$names[$_]} 0..$#names},
ids => {map {$files[$_]=>{$ids[$_]=>1}} 0..$#names},
);
lib/Data/Edit/Xml/Xref.pm view on Meta::CPAN
missingTopicIds => {},
noHref => {},
notReferenced => {},
olBody => {},
originalSourceFileAndIdToNewFile => {
"a.xml" => {
"GUID-400c2c59-95e1-7bf3-4647-3a135281bfaf" => "c_aaaa_cccc_a91633094220d068c453eecae1726eff.dita",
"GUID-68822563-d568-f418-38ae-f1c62cb4ac8d" => "c_aaaa_dddd_914b8e11993908497768c50d992ea0f0.dita",
"GUID-c67821ef-3da2-c89f-0fc9-9fba3937f368" => "c_aaaa_121939eab89cd7d2c3eb4c4189772a1f.dita",
"GUID-f0c0e170-8128-10ef-045d-97602fdde76f" => "c_aaaa_bbbb_55baefe9258538b26a95b0015a8d5a2b.dita",
},
"b.xml" => {
"GUID-2b6aab4f-9328-e326-f55f-160771a8c3dd" => "c_bbbb_cccc_d1c80714275637cde524bdfa1304a8f3.dita",
"GUID-86a684b0-1a0b-4c30-6da9-24c74ff1f0cc" => "c_bbbb_aaaa_cfd3a140e06a914fc8469583ad87829d.dita",
"GUID-96a20d7f-bbaf-deef-55ef-e09a0a059251" => "c_bbbb_6100b51ca1f789836cd4f31893ed67d2.dita",
"GUID-cfe7cb3d-05e7-a147-db10-dcbacaeecef7" => "c_bbbb_bbbb_c90ebf976073b2a3f7a8dc27a3c8254b.dita",
"p1" => "c_bbbb_6100b51ca1f789836cd4f31893ed67d2.dita",
"p2" => "c_bbbb_bbbb_c90ebf976073b2a3f7a8dc27a3c8254b.dita",
"p3" => "c_bbbb_cccc_d1c80714275637cde524bdfa1304a8f3.dita",
},
},
otherMeta => {},
otherMetaBookMapsAfterTopicIncludes => [],
otherMetaBookMapsBeforeTopicIncludes => [],
otherMetaConsolidated => {},
otherMetaDuplicatesCombined => [],
otherMetaDuplicatesSeparately => [],
otherMetaPushToBookMap => [],
otherMetaRemainWithTopic => [],
oxygenProjects => undef,
parseFailed => {},
publicId => {
"bm_a_9d0a9f8e0ac234de9e22c19054b6e455.ditamap" => "EN",
"bm_b_d2806ba589f908da1106574afd9db642.ditamap" => "EN",
"c_aaaa_121939eab89cd7d2c3eb4c4189772a1f.dita" => "EN",
"c_aaaa_bbbb_55baefe9258538b26a95b0015a8d5a2b.dita" => "EN",
"c_aaaa_cccc_a91633094220d068c453eecae1726eff.dita" => "EN",
"c_aaaa_dddd_914b8e11993908497768c50d992ea0f0.dita" => "EN",
"c_bbbb_6100b51ca1f789836cd4f31893ed67d2.dita" => "EN",
"c_bbbb_aaaa_cfd3a140e06a914fc8469583ad87829d.dita" => "EN",
"c_bbbb_bbbb_c90ebf976073b2a3f7a8dc27a3c8254b.dita" => "EN",
"c_bbbb_cccc_d1c80714275637cde524bdfa1304a8f3.dita" => "EN",
},
references => {
"bm_a_9d0a9f8e0ac234de9e22c19054b6e455.ditamap" => {
"c_aaaa_121939eab89cd7d2c3eb4c4189772a1f.dita" => 1,
"c_aaaa_bbbb_55baefe9258538b26a95b0015a8d5a2b.dita" => 1,
"c_aaaa_cccc_a91633094220d068c453eecae1726eff.dita" => 1,
"c_aaaa_dddd_914b8e11993908497768c50d992ea0f0.dita" => 1,
},
"bm_b_d2806ba589f908da1106574afd9db642.ditamap" => {
"c_bbbb_6100b51ca1f789836cd4f31893ed67d2.dita" => 1,
"c_bbbb_aaaa_cfd3a140e06a914fc8469583ad87829d.dita" => 1,
"c_bbbb_bbbb_c90ebf976073b2a3f7a8dc27a3c8254b.dita" => 1,
"c_bbbb_cccc_d1c80714275637cde524bdfa1304a8f3.dita" => 1,
},
"c_aaaa_bbbb_55baefe9258538b26a95b0015a8d5a2b.dita" => { p1 => 1 },
"c_aaaa_cccc_a91633094220d068c453eecae1726eff.dita" => { p2 => 1 },
"c_aaaa_dddd_914b8e11993908497768c50d992ea0f0.dita" => { p3 => 1 },
},
relocatedReferencesFailed => [],
relocatedReferencesFixed => [],
reports => '',
requestAttributeNameAndValueCounts => undef,
requiredCleanUp => {},
results => [[1, "ref"]],
sourceTopicToTargetBookMap => {
"a.xml" => bless({
source => "a.xml",
sourceDocType => "concept",
target => "bm_a_9d0a9f8e0ac234de9e22c19054b6e455.ditamap",
targetType => "bookmap",
}, "Bookmap"),
"b.xml" => bless({
source => "b.xml",
sourceDocType => "concept",
target => "bm_b_d2806ba589f908da1106574afd9db642.ditamap",
targetType => "bookmap",
}, "Bookmap"),
},
statusLine => "Xref: 1 ref",
statusTable => " Count Condition\n1 1 ref\n",
subjectSchemeMap => undef,
suppressReferenceChecks => undef,
tableDimensions => {},
tagCount => {
"bm_a_9d0a9f8e0ac234de9e22c19054b6e455.ditamap" => {
appendices => 1,
approved => 1,
author => 1,
bookchangehistory => 1,
booklists => 1,
bookmap => 1,
bookmeta => 1,
bookowner => 1,
bookrights => 1,
booktitle => 1,
brand => 1,
category => 1,
CDATA => 1,
chapter => 1,
copyrfirst => 1,
frontmatter => 1,
keyword => 1,
keywords => 1,
mainbooktitle => 1,
notices => 1,
preface => 1,
prodinfo => 1,
prodname => 1,
prognum => 1,
relcell => 4,
relcolspec => 2,
relheader => 1,
relrow => 2,
reltable => 1,
revisionid => 1,
shortdesc => 1,
source => 1,
toc => 1,
topicref => 3,
vrm => 1,
lib/Data/Edit/Xml/Xref.pm view on Meta::CPAN
fixedRefsGood => [],
fixedRefsNoAction => [],
fixRefs => {},
fixRelocatedRefs => undef,
fixXrefsByTitle => undef,
flattenFiles => {},
flattenFolder => undef,
getFileUrl => "client.pl?getFile=",
goodImageFiles => {},
goodNavTitles => {},
guidHrefs => {},
guidToFile => {},
hrefUrlEncoding => {},
html => undef,
idNotReferenced => { "c1.dita" => { c1 => 1 }, "c2.dita" => { c2 => 1 } },
idReferencedCount => {},
ids => { "c1.dita" => { c1 => 1 }, "c2.dita" => { c2 => 1 } },
idsRemoved => { c1 => 1, c2 => 1 },
idTags => {
"c1.dita" => { c1 => ["concept"] },
"c2.dita" => { c2 => ["concept"] },
},
images => {},
imagesReferencedFromBookMaps => {},
imagesReferencedFromTopics => {},
imagesToRefferingBookMaps => {},
indexedWords => {},
indexWords => undef,
indexWordsFolder => undef,
inputFiles => ["c1.dita", "c2.dita"],
inputFileToTargetTopics => {},
inputFolder => "",
inputFolderImages => { c1 => "c1.dita", c2 => "c2.dita" },
ltgt => {},
matchTopics => undef,
maxZoomIn => undef,
maxZoomOut => { "c1.dita" => {}, "c2.dita" => {} },
md5Sum => {
"c1.dita" => "92ab49a6d97f749545ec5dc873f53bdb",
"c2.dita" => "a3df8bdda952294d6a533b7ff4f6faeb",
},
md5SumDuplicates => {},
missingImageFiles => {},
missingTopicIds => {},
noHref => {},
notReferenced => {},
olBody => {},
originalSourceFileAndIdToNewFile => {},
otherMeta => {},
otherMetaBookMapsAfterTopicIncludes => [],
otherMetaBookMapsBeforeTopicIncludes => [],
otherMetaConsolidated => {},
otherMetaDuplicatesCombined => [],
otherMetaDuplicatesSeparately => [],
otherMetaPushToBookMap => [],
otherMetaRemainWithTopic => [],
oxygenProjects => undef,
parseFailed => {},
publicId => { "c1.dita" => undef, "c2.dita" => undef },
references => {},
relocatedReferencesFailed => [],
relocatedReferencesFixed => [],
reports => '',
requestAttributeNameAndValueCounts => undef,
requiredCleanUp => {
"c1.dita" => { aaa => 1, bbb => 2 },
"c2.dita" => { aaa => 1, bbb => 1, ccc => 1, CCC => 1 },
},
results => [[2, "first lines"], [2, "second lines"]],
sourceTopicToTargetBookMap => {},
statusLine => "Xref: 2 first lines, 2 second lines",
statusTable => " Count Condition\n1 2 first lines\n2 2 second lines\n",
subjectSchemeMap => undef,
suppressReferenceChecks => undef,
tableDimensions => {},
tagCount => {
"c1.dita" => {
"CDATA" => 4,
"conbody" => 1,
"concept" => 1,
"required-cleanup" => 3,
"title" => 1,
},
"c2.dita" => {
"CDATA" => 5,
"conbody" => 1,
"concept" => 1,
"required-cleanup" => 4,
"title" => 1,
},
},
tags => { "c1.dita" => 6, "c2.dita" => 7 },
targetFolderContent => {},
targetTopicToInputFiles => {},
texts => { "c1.dita" => 4, "c2.dita" => 5 },
title => { "c1.dita" => "C1_", "c2.dita" => "C2_" },
titleToFile => { C1_ => { "c1.dita" => 1 }, C2_ => { "c2.dita" => 1 } },
topicFlattening => {},
topicFlatteningFactor => {},
topicIds => { "c1.dita" => "c1", "c2.dita" => "c2" },
topicsFlattened => undef,
topicsNotReferencedFromBookMaps => { "c1.dita" => 1, "c2.dita" => 1 },
topicsReferencedFromBookMaps => {},
topicsToReferringBookMaps => {},
urls => {},
urlsBad => {},
urlsGood => {},
validateUrls => undef,
validationErrors => {},
vocabulary => {},
xrefBadFormat => {},
xrefBadScope => {},
xRefs => {},
xrefsFixedByTitle => [],
};
#say STDERR writeStructureTest($x->requiredCleanUp, q($x->requiredCleanUp));
is_deeply removeFilePathsFromStructure($x->requiredCleanUp),
{ "c1.dita" => { aaa => 1, bbb => 2 },
"c2.dita" => { aaa => 1, bbb => 1, ccc => 1, CCC => 1 },
};
}
lib/Data/Edit/Xml/Xref.pm view on Meta::CPAN
},
ids => {
"c.dita" => { c => 1, q1 => 1 },
"c_12345678123456781234567812345678.dita" => { c => 1, p1 => 2 },
},
idsRemoved => { c => 2 },
idTags => {
"c.dita" => { c => ["concept"], q1 => ["p"] },
"c_12345678123456781234567812345678.dita" => { c => ["concept"], p1 => ["p", "p"] },
},
images => {},
imagesReferencedFromBookMaps => {},
imagesReferencedFromTopics => {},
imagesToRefferingBookMaps => {},
indexedWords => {},
indexWords => undef,
indexWordsFolder => undef,
inputFiles => ["c.dita", "c_12345678123456781234567812345678.dita"],
inputFileToTargetTopics => {},
inputFolder => "",
inputFolderImages => {
c => "c.dita",
c_12345678123456781234567812345678 => "c_12345678123456781234567812345678.dita",
},
ltgt => {},
matchTopics => undef,
maxZoomIn => undef,
maxZoomOut => { "c.dita" => {}, "c_12345678123456781234567812345678.dita" => {} },
md5Sum => {
"c.dita" => "c7c95918b94057943d448ca99e5424cc",
"c_12345678123456781234567812345678.dita" => "d3d1c1ce281895768bd92f27fd492191",
},
md5SumDuplicates => {},
missingImageFiles => {},
missingTopicIds => {},
noHref => {},
notReferenced => {},
olBody => {},
originalSourceFileAndIdToNewFile => {},
otherMeta => {},
otherMetaBookMapsAfterTopicIncludes => [],
otherMetaBookMapsBeforeTopicIncludes => [],
otherMetaConsolidated => {},
otherMetaDuplicatesCombined => [],
otherMetaDuplicatesSeparately => [],
otherMetaPushToBookMap => [],
otherMetaRemainWithTopic => [],
oxygenProjects => undef,
parseFailed => {},
publicId => { "c.dita" => "EN", "c_12345678123456781234567812345678.dita" => "EN" },
references => {
"c.dita" => {
"bad" => 1,
"c.dta" => 1,
"c_12345678123456781234567812345678.dita" => 1,
"p1" => 1,
"q1" => 1,
},
"c_12345678123456781234567812345678.dita" => { p1 => 1, pp => 1 },
},
relocatedReferencesFailed => [],
relocatedReferencesFixed => [],
reports => '',
requestAttributeNameAndValueCounts => undef,
requiredCleanUp => {},
results => [[1, "duplicate id"], [6, "refs"], [1, "duplicate topic id"]],
sourceTopicToTargetBookMap => {},
statusLine => "Xref: 6 refs, 1 duplicate id, 1 duplicate topic id",
statusTable => " Count Condition\n1 6 refs\n2 1 duplicate id\n3 1 duplicate topic id\n",
subjectSchemeMap => undef,
suppressReferenceChecks => undef,
tableDimensions => {},
tagCount => {
"c.dita" => { CDATA => 2, conbody => 1, concept => 1, p => 7, title => 1 },
"c_12345678123456781234567812345678.dita" => { CDATA => 3, conbody => 1, concept => 1, p => 4, title => 1 },
},
tags => { "c.dita" => 10, "c_12345678123456781234567812345678.dita" => 7 },
targetFolderContent => {},
targetTopicToInputFiles => {},
texts => { "c.dita" => 2, "c_12345678123456781234567812345678.dita" => 3 },
title => { "c.dita" => "C2", "c_12345678123456781234567812345678.dita" => "C1" },
titleToFile => {
C1 => { "c_12345678123456781234567812345678.dita" => 1 },
C2 => { "c.dita" => 1 },
},
topicFlattening => {},
topicFlatteningFactor => {},
topicIds => { "c.dita" => "c", "c_12345678123456781234567812345678.dita" => "c" },
topicsFlattened => undef,
topicsNotReferencedFromBookMaps => { "c.dita" => 1, "c_12345678123456781234567812345678.dita" => 1 },
topicsReferencedFromBookMaps => {},
topicsToReferringBookMaps => {},
urls => {},
urlsBad => {},
urlsGood => {},
validateUrls => undef,
validationErrors => {},
vocabulary => {},
xrefBadFormat => {},
xrefBadScope => {},
xRefs => {},
xrefsFixedByTitle => [],
};
}
#latestTest:;
if (1) { # Oxygen project files
lll "Test 031";
clearFolder(tests, 111);
createSampleInputFilesBaseCase(&in, 8);
my $x = xref(inputFolder => in, reports => reportFolder, oxygenProjects=>1);
ok $x->statusLine eq q(Xref: 104 refs, 21 image refs, 14 first lines, 14 second lines, 8 duplicate ids, 4 duplicate topic ids, 4 invalid guid hrefs, 2 duplicate files, 2 tables, 1 External xrefs with no format=html, 1 External xrefs with no scope=e...
}
#latestTest:;
if (0) { # Performance tests 1.419
lll "Test 032";
my $folder = q(/home/phil/perl/cpan/DataEditXmlXref/lib/Data/Edit/Xml/samples/);
xref(inputFolder => $folder);
}
lib/Data/Edit/Xml/Xref.pm view on Meta::CPAN
fixBadRefs => undef,
fixDitaRefs => undef,
fixedFolder => undef,
fixedFolderTemp => "",
fixedRefsBad => [],
fixedRefsGB => [],
fixedRefsGood => [],
fixedRefsNoAction => [],
fixRefs => {},
fixRelocatedRefs => undef,
fixXrefsByTitle => undef,
flattenFiles => {},
flattenFolder => undef,
getFileUrl => "client.pl?getFile=",
goodImageFiles => {},
goodNavTitles => {},
guidHrefs => {},
guidToFile => {},
hrefUrlEncoding => {},
html => undef,
idNotReferenced => { "concept.dita" => { c => 1 } },
idReferencedCount => {},
ids => { "concept.dita" => { c => 1 } },
idsRemoved => { c => 1 },
idTags => { "concept.dita" => { c => ["concept"] } },
images => {},
imagesReferencedFromBookMaps => {},
imagesReferencedFromTopics => {},
imagesToRefferingBookMaps => {},
indexedWords => {},
indexWords => undef,
indexWordsFolder => undef,
inputFiles => ["concept.dita"],
inputFileToTargetTopics => {},
inputFolder => "",
inputFolderImages => { concept => "concept.dita" },
ltgt => {},
matchTopics => undef,
maxZoomIn => undef,
maxZoomOut => { "concept.dita" => {} },
md5Sum => { "concept.dita" => "f38f3212622c0fd073b213176a045e47" },
md5SumDuplicates => {},
missingImageFiles => {},
missingTopicIds => {},
noHref => {},
notReferenced => {},
olBody => {},
originalSourceFileAndIdToNewFile => {},
otherMeta => {},
otherMetaBookMapsAfterTopicIncludes => [],
otherMetaBookMapsBeforeTopicIncludes => [],
otherMetaConsolidated => {},
otherMetaDuplicatesCombined => [],
otherMetaDuplicatesSeparately => [],
otherMetaPushToBookMap => [],
otherMetaRemainWithTopic => [],
oxygenProjects => undef,
parseFailed => {},
publicId => { "concept.dita" => "EN" },
references => {},
relocatedReferencesFailed => [],
relocatedReferencesFixed => [],
reports => "",
requestAttributeNameAndValueCounts => undef,
requiredCleanUp => {},
results => [[2, "urls"]],
sourceTopicToTargetBookMap => {},
statusLine => "Xref: 2 urls",
statusTable => " Count Condition\n1 2 urls\n",
subjectSchemeMap => undef,
suppressReferenceChecks => undef,
tableDimensions => {},
tagCount => {
"concept.dita" => { CDATA => 3, conbody => 1, concept => 1, p => 2, title => 1, xref => 2 },
},
tags => { "concept.dita" => 7 },
targetFolderContent => {},
targetTopicToInputFiles => {},
texts => { "concept.dita" => 3 },
title => { "concept.dita" => "Urls" },
titleToFile => { Urls => { "concept.dita" => 1 } },
topicFlattening => {},
topicFlatteningFactor => {},
topicIds => { "concept.dita" => "c" },
topicsFlattened => undef,
topicsNotReferencedFromBookMaps => { "concept.dita" => 1 },
topicsReferencedFromBookMaps => {},
topicsToReferringBookMaps => {},
urls => {
"concept.dita" => { "ww2.appaapps.com" => 1, "www.appaapps.com" => 1 },
},
urlsBad => {
"ww2.appaapps.com" => { "concept.dita" => 1 },
"www.appaapps.com" => { "concept.dita" => 1 },
},
urlsGood => {},
validateUrls => 1,
validationErrors => {},
vocabulary => {},
xrefBadFormat => {},
xrefBadScope => {},
xRefs => {},
xrefsFixedByTitle => [],
}, "Data::Edit::Xml::Xref")
}
clearFolder($_, 1e3) for in, out, outFixed, reportFolder, tests, targets, q(zzzParseErrors);
done_testing;
lll "Tests finished:"; # 16.212