Data-Table-Text

 view release on metacpan or  search on metacpan

lib/Data/Table/Text.pm  view on Meta::CPAN

  $file
 }

sub quoteFile($)                                                                # Quote a file name.
 {my ($file) = @_;                                                              # File name
  $file or confess "Undefined file to quote";
  $file =~ s(")  (\\\")gs;
  $file =~ s(\$) (\\\$)gs;
  qq(\"$file\")
 }

sub removeFilePrefix($@)                                                        # Removes a file B<$prefix> from an array of B<@files>.
 {my ($prefix, @files) = @_;                                                    # File prefix, array of file names
  my @f = map {s(\A$prefix) ()r} @files;
  return $f[0] if @f == 1 and !wantarray;                                       # Special case of wanting one file in scalar context
  @f
 }

sub swapFilePrefix($$;$)                                                        # Swaps the start of a B<$file> name from a B<$known> name to a B<$new> one if the file does in fact start with the $known name otherwise returns the original file name a...
 {my ($file, $known, $new) = @_;                                                # File name, existing prefix, optional new prefix defaults to q()
  my $L = length($file);
  my $l = length($known);
  if ($L >= $l)
   {if (substr($file, 0, $l) eq $known)
     {return ($new//q()).substr($file, $l);
     }
    return $file;
   }
  confess "Known $l longer than file name $L:\n$known\n$file\n";
 } # swapFilePrefix

sub setFileExtension($;$)                                                       # Given a B<$file>, change its extension to B<$extension>. Removes the extension if no $extension is specified.
 {my ($file, $extension) = @_;                                                  # File name, optional new extension
  return $file =~ s(\.\w+\Z) ()sr unless defined $extension;                    # Remove extension
  my $ext = $extension =~ s(\A\.+) ()gsr;                                       # Remove leading dots
  return $file                    unless $ext;                                  # No extension after dot removal
  ($file =~ s(\.\w+\Z) ()gsr).q(.).$ext;                                        # Change extension
 } # setFileExtension

sub swapFolderPrefix($$$)                                                       # Given a B<$file>, swap the folder name of the $file from B<$known> to B<$new> if the file $file starts with the $known folder name else return the $file as it is.
 {my ($file, $known, $new) = @_;                                                # File name, existing prefix, new prefix
  swapFilePrefix($file, fpd($known), fpd($new));
 } # swapFolderPrefix

sub fullyQualifiedFile($;$)                                                     # Check whether a B<$file> name is fully qualified or not and, optionally, whether it is fully qualified with a specified B<$prefix> or not.
 {my ($file, $prefix) = @_;                                                     # File name to test, file name prefix
  return $file =~ m(\A/)s unless $prefix;                                       # Check against /
  index($file, $prefix) == 0                                                    # Check against  supplied prefix
 } # fullyQualifiedFile

sub fullyQualifyFile($)                                                         # Return the fully qualified name of a file.
 {my ($file) = @_;                                                              # File name
  return $file if fullyQualifiedFile($file);                                    # File is already fully qualified
  absFromAbsPlusRel(&currentDirectory, $file);                                  # Fully qualify file name
 } # fullyQualifyFile

sub removeDuplicatePrefixes($)                                                  # Remove duplicated leading directory names from a file name.
 {my ($file) = @_;                                                              # File name
  return $file unless $file =~ m(/)s;                                           # No path to deduplicate
  return $file if $file =~ m(\A[/.]);                                           # Later
  my ($p, @p) = split m(/), $file;
  shift @p while @p && $p[0] eq $p;
  join "/", $p, @p;
 } # removeDuplicatePrefixes

sub containingFolderName($)                                                     # The name of a folder containing a file.
 {my ($file) = @_;                                                              # File name
  my @p = split m(/), $file;
  return $p[-2] if @p > 1;
  confess "No folder name provided";
 } # containingFolderName
#D2 Position                                                                    # Position in the file system.

sub currentDirectory                                                            # Get the current working directory.
 {renormalizeFolderName(getcwd)
 } # currentDirectory

sub currentDirectoryAbove                                                       # Get the path to the folder above the current working folder.
 {my $path = currentDirectory;
  my @path = split m(/)s, $path;
  shift @path if @path and $path[0] =~ m/\A\s*\Z/;
  @path or confess "No directory above\n:".currentDirectory, "\n";
  pop @path;
  my $r = shift @path;
  filePathDir("/$r", @path);
 } # currentDirectoryAbove

sub parseFileName($)                                                            # Parse a file name into (path, name, extension) considering .. to be always part of the path and using B<undef> to mark missing components.  This differs from (fp, fn, f...
 {my ($file) = @_;                                                              # File name to parse
  defined($file) or confess "File required";
  return ($file) if $file =~ m{\/\Z}s or $file =~ m/\.\.\Z/s;                   # Its a folder
  if ($file =~ m/\.[^\/]+?\Z/s)                                                 # The file name has an extension
   {if ($file =~ m/\A.+[\/]/s)                                                  # The file name has a preceding path
     {my @f = $file =~ m/(\A.+[\/])([^\/]*)\.([^\/]+?)\Z/s;                     # File components
      return @f;
     }
    else                                                                        # There is no preceding path
     {my @f = $file =~ m/(\A.+)\.([^\/]+?)\Z/s;                                 # File components
      return (undef, @f)
     }
   }
  else                                                                          # The file name has no extension
   {if ($file =~ m/\A.+[\/]/s)                                                  # The file name has a preceding path
     {my @f = $file =~ m/(\A.+\/)([^\/]+?)\Z/s;                                 # File components
      return @f;
     }
    elsif ($file =~ m/\A[\/]./s)                                                # The file name has a single preceding /
     {return (q(/), substr($file, 1));
     }
    elsif ($file =~ m/\A[\/]\Z/s)                                               # The file name is a single /
     {return (q(/));
     }
    else                                                                        # There is no preceding path
     {return (undef, $file)
     }
   }
 } # parseFileName

sub fullFileName                                                                # Full name of a file.
 {my ($file) = @_;                                                              # File name
  return $file if fullyQualifiedFile $file;                                     # Already a full file name
  absFromAbsPlusRel(currentDirectory, $file);                                   # Relative to current folder
 } # fullFileName

sub relFromAbsAgainstAbs($$)                                                    # Relative file from one absolute file B<$a> against another B<$b>.
 {my ($a, $b) = @_;                                                             # Absolute file to be made relative, against this absolute file.

  my $m = length($a) < length($b) ? length($a) : length($b);                    # Shortest length

  $a =~ m(\A/) or confess "$a is not absolute";                                 # Require absolute file names
  $b =~ m(\A/) or confess "$b is not absolute";
  $b =~ s([^/]+\Z) ();                                                          # Make the against file into a folder

  my $s = 0;                                                                    # Position of last matching /

  for my $i(1..$m-1)                                                            # Locate first non matching character - the first character of both file names is / which matches
   {if (substr($a, $i, 1) ne substr($b, $i, 1))                                 # First mismatch
     {my $u = 0;                                                                # Number of jumps up from $b
      my $p = $s;                                                               # Last /
      ++$u while(($p = index($b, q(/), $p+1)) > -1);                            # Number of / to jump up

lib/Data/Table/Text.pm  view on Meta::CPAN

   }
  @files
 } # findFiles

sub findDirs($;$)                                                               # Find all the folders under a B<$folder> and optionally B<$filter> the selected folders with a regular expression.
 {my ($folder, $filter) = @_;                                                   # Folder to start the search with, optional regular expression to filter files
  return findAllFilesAndFolders($folder, 1) if onWindows;                       # All folders if on windows

  my @dir;                                                                      # Directories
  for(findAllFilesAndFolders($folder, 1))                                       # All files and folders
   {next unless -d $_;                                                          # Include only folders
    next if $filter and $filter and !m($filter)s;                               # Filter out directories that do not match the regular expression
    push @dir, fpd($_);
   }
  @dir
 } # findDirs

sub fileList($)                                                                 # Files that match a given search pattern interpreted by L<perlfunc/bsd_glob>.
 {my ($pattern) = @_;                                                           # Search pattern
  bsd_glob($pattern, GLOB_MARK | GLOB_TILDE)
 } # fileList

sub searchDirectoryTreesForMatchingFiles(@)                                     #I Search the specified directory trees for the files (not folders) that match the specified extensions. The argument list should include at least one path name to be use...
 {my (@FoldersandExtensions) = @_;                                              # Mixture of folder names and extensions
  my (@foldersandExtensions) = map {ref($_) ? @$_ : $_} @_;

  my  @extensions = grep {$_ and !-d $_ and !m([\/])} @_;                       # Extensions are not directories
  for(@extensions)                                                              # Prefix period to extension of not all ready there - however this can lead to errors if there happens to be a folder with the same name as an undotted extension.
   {$_ = qq(\.$_) unless m(\A\.)s
   }

  my $ext = @extensions ? join '|', @extensions : undef;                        # Extensions
  my @file;                                                                     # Files

  for my $dir(@_)                                                               # Directories
   {next unless $dir && -d $dir;                                                # Do not include folder names

    my @f = findAllFilesAndFolders($dir, 0);                                    # All files and folders beneath each folder

    for my $d(@f)
     {next if -d $d;                                                            # Do not include folder names
      push @file, $d if !$ext or $d =~ m(($ext)\Z)is;                           # Filter by extension if requested.
     }
   }
  @file                                                                         # Return files
 } # searchDirectoryTreesForMatchingFiles

sub searchDirectoryTreeForSubFolders($)                                         # Search the specified directory under the specified folder for sub folders.
 {my ($folder) = @_;                                                            # The folder at which to start the search
  my @f;                                                                        # Folders found
  for my $d(findAllFilesAndFolders($folder, 0))                                 # All files and folders beneath the start folder
   {push @f, $d if -d $d;                                                       # Do not include file names
   }
  @f                                                                            # Return folder names
 } # searchDirectoryTreeForSubFolders

sub hashifyFolderStructure(@)                                                   # Hashify a list of file names to get the corresponding folder structure.
 {my (@files) = @_;                                                             # File names
  my %h;
  for my $f(@files)                                                             # Map each file
   {my @f = split m(/), $f;
    my $s = join '', map {q({).dump($_).q(})} @f;                               # Hashify directory structure
    my $c = "\$h$s = ".dump($f);                                                # Load targets
    eval $c;
    confess $@ if $@;
   }
  \%h
 } # hashifyFolderStructure

sub countFileExtensions(@)                                                      # Return a hash which counts the file extensions in and below the folders in the specified list.
 {my (@folders) = @_;                                                           # Folders to search
  my %ext;
  for my $dir(@folders)                                                         # Directories
   {next unless -d $dir;
    for my $file(findAllFilesAndFolders($dir, 0))                               # All files and folders under the current folder
     {next if -d $file;                                                         # Do not include folder names
      $ext{fe $file}++;
     }
   }
  \%ext                                                                         # Return extension counts
 } # countFileExtensions

sub countFileTypes($@)                                                          # Return a hash which counts, in parallel with a maximum number of processes: B<$maximumNumberOfProcesses>, the results of applying the B<file> command to each file in an...
 {my ($maximumNumberOfProcesses, @folders) = @_;                                # Maximum number of processes to run in parallel, Folders to search

  return undef unless confirmHasCommandLineCommand(q(file));                    # Confirm we have file command

  my %ext;
  my @files = squareArray(searchDirectoryTreesForMatchingFiles(@folders));      # Find files

  my $p = newProcessStarter($maximumNumberOfProcesses);                         # Process starter
     $p->totalToBeStarted  = scalar @files;

  for my $block(@files)                                                         # Apply file to each file
   {$p->start(sub
     {my @r;
      for my $file(@$block)
       {my $f = quoteFile($file);
        my $r = qx(file $f);
        push @r, trim(swapFilePrefix($r, $file.q(:), q()));                     # Remove file name from output
       }
      [@r]
     });
   }

  for my $type(deSquareArray($p->finish))                                       # Consolidate results
   {$ext{$type}++;
   }

  \%ext
 } # countFileTypes

sub matchPath($)                                                                # Return the deepest folder that exists along a given file name path.
 {my ($file) = @_;                                                              # File name
  return $file if -e $file;                                                     # File exists so nothing more to match
  my @path = split /[\/\\]/, $file;                                             # Split path into components
  while(@path)                                                                  # Remove components one by one
   {pop @path;                                                                  # Remove deepest component and try again
    my $path = join filePathSeparatorChar, @path, '';                           # Containing folder
    return $path if -d $path;                                                   # Containing folder exists
   }

lib/Data/Table/Text.pm  view on Meta::CPAN

sub appendFile($$)                                                              # Append to B<$file> a B<$string> of L<unicode> content encoded with L<utf8>, creating the $file first if necessary. Return the name of the $file on success else confess....
 {my ($file, $string) = @_;                                                     # File to append to, string to append
  $file or confess "No file name supplied\n";
  $string or carp "No string for file:\n$file\n";
  makePath($file);
  open my $F, ">>$file" or
    confess "Cannot open file for write file:\n$file\n$!\n";
  binmode($F, ":utf8");
  flock($F, 2);
  print  {$F} $string;
  close  ($F);
  -e $file or confess "Failed to write to file:\n$file\n";
  $file
 } # appendFile

sub createEmptyFile($)                                                          # Create an empty file unless the file already exists and return the name of the file else confess if the file cannot be created.
 {my ($file) = @_;                                                              # File to create or B<undef> for a temporary file
  $file //= temporaryFile;
  return $file if -e $file;                                                     # Return file name as proxy for success if file already exists
  makePath($file);
  open my $F, ">$file" or confess "Cannot create empty file:\n$file\n$!\n";
  binmode($F);
  print  {$F} '';
  close  ($F);
  -e $file or confess "Failed to create empty file:\n$file\n";
  $file                                                                         # Return file name on success
 } # createEmptyFile

sub binModeAllUtf8                                                              #P Set STDOUT and STDERR to accept utf8 without complaint.
 {binmode $_, ":utf8" for *STDOUT, *STDERR;
 }

sub setPermissionsForFile($$)                                                   # Apply L<chmod> to a B<$file> to set its B<$permissions>.
 {my ($file, $permissions) = @_;                                                # File, permissions settings per chmod
  return undef unless confirmHasCommandLineCommand(q(chmod));                   # Confirm we have chmod
  qx(chmod $permissions $file);                                                 # Use chmod to set permissions
 }

sub numberOfLinesInFile($)                                                      # Return the number of lines in a file.
 {my ($file) = @_;                                                              # File
  scalar split /\n/, readFile($file);                                           # Number of lines
 } # numberOfLinesInFile

sub overWriteHtmlFile($$)                                                       # Write an L<html> file to /var/www/html and make it readable.
 {my ($file, $data) = @_;                                                       # Target file relative to /var/www/html, data to write
  my $s = writeTempFile($data);
  my $t = fpf(q(/var/www/html/), $file);
  xxx qq(sudo mv $s $t; chmod o+r $t);
  unlink $s;
 }

sub overWritePerlCgiFile($$)                                                    # Write a L<Perl> file to /usr/lib/cgi-bin and make it executable after checking it for syntax errors.
 {my ($file, $data) = @_;                                                       # Target file relative to /var/www/html, data to write
  my $s = writeTempFile($data);
  my $r = qx(perl -c $s 2>&1);
  if ($r =~ m(syntax OK)si)
   {my $t = fpf(q(/usr/lib/cgi-bin/), $file);
    say STDERR qx(sudo mv $s $t; chmod o+rx $t);
   }
  else
   {my @data = map {[$_]} split m/\n/, $data;
    say STDERR formatTable([@data]);
    confess "Perl error:\n$r\n";
   }
  unlink $s;
 }

#D2 Copy                                                                        # Copy files and folders. The B<\Acopy.*Md5Normalized.*\Z> methods can be used to ensure that files have collision proof names that collapse duplicate content even when c...

sub copyFile($$)                                                                # Copy the B<$source> file encoded in utf8 to the specified B<$target> file in and return $target.
 {my ($source, $target) = @_;                                                   # Source file, target file
  owf($target, readFile($source));
  my $s = fileSize($source);
  my $t = fileSize($target);
  $s eq $t or lll
    "Copied file has a different size\n".formatTable
    ([[$s, $source], [$t, $target]], <<END);
Size Size of file
File Name of file
END
  $target                                                                       # Return target file name
 }

sub moveFileNoClobber($$)                                                       # Rename the B<$source> file, which must exist, to the B<$target> file but only if the $target file does not exist already.  Returns 1 if the $source file was successfull...
 {my ($source, $target) = @_;                                                   # Source file, target file
  if (-e $source and !-e $target)                                               # Rename possible
   {rename $source, $target;
    return 1;
   }
  0                                                                             # Rename not possible
 }

sub moveFileWithClobber($$)                                                     # Rename the B<$source> file, which must exist, to the B<$target> file but only if the $target file does not exist already.  Returns 1 if the $source file was successfull...
 {my ($source, $target) = @_;                                                   # Source file, target file
  if (-e $source)                                                               # Source file exists so rename
   {unlink $target;
    rename $source, $target;
    return 1;
   }
  0                                                                             # No such source file
 }

sub copyFileToFolder($$)                                                        # Copy the file named in B<$source> to the specified B<$targetFolder/> or if $targetFolder/ is in fact a file into the folder containing this file and return the target f...
 {my ($source, $targetFolder) = @_;                                             # Source file, target folder
  writeFile fpf(fp($targetFolder), fne($source)), readFile $source;
 }

sub nameFromStringMaximumLength {128}                                           #P Maximum length of a name generated from a string.

sub nameFromString($%)                                                          # Create a readable name from an arbitrary string of text.
 {my ($string, %options) = @_;                                                  # String, options

  my @name;
  if ($string =~ m(<(bookmap))s)                                                # The ghastly compromise
   {push @name, q(bm);
   }
  elsif ($string =~ m(<(bookmap|concept|glossentry|html|map|reference|task))s)  # The correct solution
   {push @name, substr($1, 0, 1);
   }

  $string =~ s(<[^>]*>) (_)gs;                                                  # Remove xml/html tags
  $string =~ s([^a-z0-9_])(_)gis;                                               # Reduce character set to produce a readable name
  push @name, $string;

  my $name = join q(_), @name;
     $name =~ s(_+)(_)gs;                                                       # Remove runs of underscores
     $name =~ s((\A_+|_+\Z)) ()gs;                                              # Remove leading and trailing underscores

  firstNChars($name, $options{maximumLength} // nameFromStringMaximumLength);   # Limit the name length
 }

sub nameFromStringRestrictedToTitle($%)                                         # Create a readable name from a string of text that might contain a title tag - fall back to L<nameFromString|/nameFromString> if that is not possible.
 {my ($string, %options) = @_;                                                  # String, options
  my @name;
  if ($string =~ m(<(bookmap))s)                                                # The ghastly compromise
   {push @name, q(bm);
   }
  elsif ($string =~ m(<(bookmap|concept|glossentry|html|map|reference|task))s)  # The correct solution
   {push @name, substr($1, 0, 1);
   }

  for my $t(qw(title mainbooktitle booktitlealt ))                              # Various title tags
   {if ($string =~ m(<$t[^>]*>([^<]*)</$t>)is)
     {push @name, $1;
     }
   }

  my $name = lc join '_', @name;                                                # Mim believes in lc
  $name =~ s(<[^>]*>) (_)gs;                                                    # Remove xml/html tags
  $name =~ s([^a-z0-9_])(_)gis;                                                 # Reduce character set to produce a readable name
  $name =~ s(_+)(_)gs;                                                          # Remove runs of underscores
  $name =~ s((\A_+|_+\Z)) ()gs;                                                 # Remove leading and trailing underscores

  firstNChars($name, $options{maximumLength} // nameFromStringMaximumLength);   # Limit the name length
 }

sub uniqueNameFromFile($)                                                       # Create a unique name from a file name and the md5 sum of its content.
 {my ($source) = @_;                                                            # Source file
  my $sourceFile = fn $source;                                                  # File name component
  return fne($source) if $sourceFile =~ m([0-9a-z]{32}\Z)is;                    # Name already normalized
  my $sourceFileLimited = nameFromString($sourceFile);                          # File name with limited character set
  my $md5 = fileMd5Sum($source);                                                # Normalizing Md5 sum
  fpe($sourceFileLimited.q(_).$md5, fe $source);                                # Normalized name
 }

sub nameFromFolder($)                                                           # Create a name from the last folder in the path of a file name.  Return undef if the file does not have a path.
 {my ($file) = @_;                                                              # File name
  my $p = fp $file;
  my @p = onWindows ? split m(\\), $p : split m(/), $p;
  return $p[-1] if @p;
  undef
 }

sub copyBinaryFile($$)                                                          # Copy the binary file B<$source> to a file named <%target> and return the target file name,.
 {my ($source, $target) = @_;                                                   # Source file, target file
  overWriteBinaryFile($target, readBinaryFile($source));
  $target
 }

sub copyFileToRemote($;$)                                                       # Copy the specified local B<$file> to the server whose ip address is specified by B<$ip> or returned by L<awsIp>.
 {my ($file, $ip) = @_;                                                         # Source file, optional ip address
  my $f = fullyQualifyFile($file);                                              # Fully qualify source file
  -f $file or confess "No such file:\n$file\n";                                 # Check source file exists
  -f $f or confess "No such file:\n$f\n";                                       # Check source file exists
  my $i = $ip // &awsIp;                                                        # Ip of server
  my $d = fp $f;                                                                # Folder to create if necessary
  makePathRemote($f, $i);                                                       # Create folder on remote
  my $c = qq(rsync -mpqrt --del $f $i:$f);                                      # Transfer file
# lll $c;
  xxx $c, qr(\A\s*\Z);                                                          # Execute and expect no messages
 }

sub copyFileFromRemote($;$)                                                     # Copy the specified B<$file> from the server whose ip address is specified by B<$ip> or returned by L<awsIp>.
 {my ($file, $ip) = @_;                                                         # Source file, optional ip address
  my $f = fullyQualifyFile($file);                                              # Fully qualify source file
  my $i = $ip // &awsIp;                                                        # Ip of server
  my $d = fp $f;                                                                # Folder to create if necessary
  makePath($d);                                                                 # Create folder
  my $c = qq(rsync -mpqrt $i:$f $f);                                            # Transfer file
 #lll $c;
  xxx $c, qr(\A\s*\Z);
 }

sub copyFolder($$)                                                              # Copy the B<$source> folder to the B<$target> folder after clearing the $target folder.
 {my ($source, $target) = @_;                                                   # Source file, target file
  -d $source or confess "No such folder:\n$source\n";
  my $s = fpd($source);
  my $t = fpd($target);
  makePath($t);
  my $c = qq(rsync -r --del $s $t), qr(\A\s*\Z);                                # Suppress command printing by supplying a regular expression to test the command output
 #lll $c;
  xxx $c, qr(\A\s*\Z);
 }

sub mergeFolder($$)                                                             # Copy the B<$source> folder into the B<$target> folder retaining any existing files not replaced by copied files.
 {my ($source, $target) = @_;                                                   # Source file, target file
  -d $source or confess "No such folder:\n$source\n";
  my $s = fpd($source);
  my $t = fpd($target);
  makePath($t);
  my $c = qq(rsync -r $s $t);
 #lll $c;
  xxx $c, qr(\A\s*\Z);
 }

sub copyFolderToRemote($;$)                                                     # Copy the specified local B<$Source> folder to the corresponding remote folder on the server whose ip address is specified by B<$ip> or returned by L<awsIp>. The default...
 {my ($Source, $ip) = @_;                                                       # Source file, optional ip address of server
  my $source = fullyQualifyFile($Source);                                       # Fully qualify source folder
  -d $Source or confess "No such folder:\n$Source\n";                           # Check source exists

lib/Data/Table/Text.pm  view on Meta::CPAN

     {push @partition, @$p;
     }
   }, @square);

  my @P;                                                                        # Partition of strings
  for my $partition(@partition)                                                 # Each partition
   {my @p;
    my %p;                                                                      # If n sets are identical we get n repetitions - this hash prevents that.
    for my $set(@$partition)                                                    # Each set in the current partition
     {if (my $u = $u{$set})
       {for my $U(@$u)
         {push @p, $U unless $p{$U}++;
         }
       }
     }

    push @P, [sort @p];
   }
  sort {scalar(@$b) <=> scalar(@$a)} @P
 }

sub contains($@)                                                                # Returns the indices at which an B<$item> matches elements of the specified B<@array>. If the item is a regular expression then it is matched as one, else it is a number...
 {my ($item, @array) = @_;                                                      # Item, array
  my @r;
  if (ref($item) =~ m(Regexp))                                                  # Match via a regular expression
   {for(keys @array)
     {push @r, $_ if $array[$_] =~ m($item)s;
     }
   }
  elsif (looks_like_number($item))                                              # Match as a number
   {for(keys @array)
     {push @r, $_ if $array[$_]+0 == $item;
     }
   }
  else                                                                          # Match as a string
   {for(keys @array)
     {push @r, $_ if $array[$_] eq $item;
     }
   }
  @r
 }

sub countOccurencesInString($$)                                                 # Returns the number of occurrences in B<$inString> of B<$searchFor>.
 {my ($inString, $searchFor) = @_;                                              # String to search in, string to search for.
  my $n = 0;
  length($inString) >= length($searchFor) or
    confess "String to search must be longer than string to look for";
  my $p = -1;
  ++$n while(($p = index($inString, $searchFor, $p+1)) > -1);
  $n
 }

sub partitionStringsOnPrefixBySize                                              # Partition a hash of strings and associated sizes into partitions with either a maximum size B<$maxSize> or only one element; the hash B<%Sizes> consisting of a mapping ...
 {my ($maxSize, %Sizes) = @_;                                                   # Maximum size of a partition, {string=>size}... hash to be partitioned

  my %paths;                                                                    # Path to each character in each string
  my %sizes;                                                                    # Size associate with each path
  for my $string(sort keys %Sizes)                                              # Create a path of hashes with single character keys
   {my $size = $Sizes{$string};                                                 # Size associated with the string
    my $paths = '';
    my @s = split m(), $string;                                                 # String as single characters
    while(@s)                                                                   # Shorten path
     {my $k = join '', map {qq({'$_'})} @s;                                     # Path of hashes with single character keys
      $paths .= qq(\$paths$k //= {};\n);                                        # Auto vivify
      my $d =  join '', @s;                                                     # Path name
      $sizes{$d} += $size;                                                      # Aggregate size
      pop @s;                                                                   # Move up one level
     }
    $sizes{q()} += $size;                                                       # Total size
    eval $paths;                                                                # Create paths - this level of aggregation seems to give the fastest overall response
    confess "$paths\n$@\n" if $@;                                               # Unable to create path
   }

  my %partition;                                                                # Partition the paths

  my $partition; $partition = sub                                               # Partition paths at the current level
   {my ($paths, @path) = @_;                                                    # Path at this level, path to this level

    my $p = join '', @path;                                                     # Path name
    my $s = $sizes{$p};                                                         # Size of path

    if ($s <= $maxSize or !keys %$paths)                                        # Small enough or complete path
     {$partition{$p} = $s;                                                      # Path => size
     }
    else                                                                        # Still too big
     {for my $d(sort keys %$paths)                                              # Next level
       {&$partition($$paths{$d}, @path, $d);                                    # Try at the next level
       }
     }
   };

  &$partition(\%paths) if keys %paths;                                          # Partition from the top

  %partition
 }

sub transitiveClosure($)                                                        # Transitive closure of a hash of hashes.
 {my ($h) = @_;                                                                 # Hash of hashes

  my %keys = arrayToHash(keys %$h)->%*;                                         # Find all the keys to consider
  for my $i(keys %$h)
   {my $value = $$h{$i};
    if (reftype($value) =~ m(hash)i)
     {%keys = (%keys, arrayToHash(keys %$value)->%*);                           # Just the sub keys
     }
   }

  my %t;                                                                        # Transitive closure
  for   my $a(keys %keys)
   {my $i = $$h{$a};
    if ($i and reftype($i) =~ m(hash)i)
     {for my $b(keys %keys)
       {$t{$a}{$b} = 1 if $$i{$b}
       }
     }
   }

  for(1..100)
   {my $changes = 0;
    for   my $a(keys %keys)
     {for my $b(keys %keys)

lib/Data/Table/Text.pm  view on Meta::CPAN

     foot      => <<'END',
Footer text which will follow the table
END
     summarize => <<'END',
If true, each column of an array of arrays will be summarized by printing its
distinct values and a count of how often each value occurs in a series of
smaller tables following the main table.
END
     clearUpLeft => <<'END',
If numeric +/-\$N, replace any left hand column values repeated in the
following row with white space to make it easier to follow the range of keys.
If a positive count is given the clearing will always be stopped after the
numbered column (based from 1) if negative, then clearing will be stopped after
the column obtained by counting back counting 1-\$N columns from the last
column. Thus a value of -1 will stop clearing after the final column which
could potentially produce a blank row if there are two duplicate rows in
sequence.
END
     file   => q(The name of a file to which to write the formatted table.),
     rows   => q(The number of rows in the report),
     zero   => q(Write the report even if the table is empty.),
     wide   => q(Write a note explaining the need to scroll to the right if true),
     msg    => q(Write a message to STDERR summarizing the situation if true),
     csv    => q(Write a csv version of the report if true),
     indent => q(Number of spaces to be used to indent the table, defaults to zero),
     debug  => q(Debug table processing),
     facet  => <<END,
Counts in html reports with the same facet will be plotted on the same chart to
provide a visual indication of their relative sizes.
END
     aspectColor => <<END,
The color in which to draw this aspect on charts and graphs.
END
     maximumColumnWidth => <<END,
The maximum width permitted for a column, defaults to unlimited.
END
 }} # formatTableCheckKeys

sub formatTable($;$%)                                                           #I Format various B<$data> structures as a table with titles as specified by B<$columnTitles>: either a reference to an array of column titles or a string each line of wh...
 {my ($data, $columnTitles, @options) = @_;                                     # Data to be formatted, optional reference to an array of titles or string of column descriptions, options

  my %options = sub                                                             # Make column titles an option so that the options list is easily reused. The original arrangement where column titles were a separate (optional) parameter will eventuall...
   {if ($columnTitles and !ref($columnTitles) and
        $columnTitles eq q(columns) and scalar(@options) % 2 == 1)
     {my %o = ($columnTitles, @options);
      $columnTitles = $o{columns};
      return %o;
     }
    scalar(@options) % 2 and confess "Options fail to pair";
    @options
   }->();

  checkKeys(\%options, formatTableCheckKeys);                                   # Check report options

  my ($titleString, $title) = sub                                               # Title string, column headers
   {return (undef, undef) unless defined $columnTitles;                         # No titles
    if (my $r = reftype $columnTitles)                                          # Array of column titles
     {return (undef, $columnTitles) if $r =~ m(\Aarray\Z)si;
     }
    return (q(), q()) unless $columnTitles;                                     # Column titles are not required for hash of hashes
    my @c = map {[split m(\s+), $_, 2]} split "\n", $columnTitles;              # Column definitions
    my $s = &formatTable(\@c, [qw(Column Description)]);                        # Column definitions descriptions table
   ($s, [map {$$_[0]} @c])
   }->();

  my ($a, $h, $o) = (0, 0, 0);                                                  # Check structure of input data tttt
  my $checkStructure = sub
   {for(@_)
     {my $r = reftype($_);                                                      # Process arrays and hashes or objects built on them
      if ($r)
       {if ($r =~ /array/i)   {++$a}
        elsif ($r =~ /hash/i) {++$h}
        else                  {++$o}
       }
      else                    {++$o}
     }
   };

  my $formattedTable = sub                                                      # Format table
   {if    (reftype($data) =~ /array/i)
     {$checkStructure->(       @$data);
      return formatTableAA($data, $title, %options) if  $a and !$h and !$o;
      return formatTableAH($data)                   if !$a and  $h and !$o;
      return formatTableA ($data, $title);
     }
    elsif (reftype($data) =~ /hash/i)
     {$checkStructure->(values %$data);
      return formatTableHA($data, $title) if  $a and !$h and !$o;
      return formatTableHH($data)         if !$a and  $h and !$o;
      return formatTableH ($data, $title);
     }
   }->();

  return $formattedTable unless keys %options;                                  # Return table as is unless report requested

  my ($Title, $head, $foot, $file, $zero, $summarize, $wide, $msg, $csv, $zwsp, $indent) = map{$options{$_}}
    qw(title   head   foot   file   zero   summarize   wide   msg   csv   zwsp   indent);

  my @report;
  my $date = dateTimeStamp;
  my $N    = keyCount(1, $data);
  my $H    = ($head//'') =~ s(DDDD) ($date)gr =~ s(NNNN) ($N)gr;
     $H    =~ s(TTTT) ($title)gs          if $Title;
  push @report, $Title                    if $Title;
  push @report, $H                        if $head;
  push @report, qq(This file: $file)      if $file;
  push @report, $titleString              if $titleString;
  push @report, <<END                     if $wide;
Please note that this is a wide report: you might have to scroll
a long way to the right to see all the columns of data available!
END
  push @report, <<END                     if $summarize;
Summary_of_column                - Count of unique values found in each column                     Use the Geany flick capability by placing your cursor on the first word
Comma_Separated_Values_of_column - Comma separated list of the unique values found in each column  of these lines and pressing control + down arrow to see each sub report.
END

  push @report, $formattedTable;
  push @report, $foot                     if $foot;

  push @formatTables, [$N, $Title//nws($H, 80), $file];                         # Report of all the reports created

lib/Data/Table/Text.pm  view on Meta::CPAN

 {my (@options) = @_;                                                           # Options

  formatTable([sort {($a->[1]//'') cmp ($b->[1]//'')} @formatTables], <<END,
Rows   Number of entries in table
Title  Title of the report
File   File containing the report
END
    @options);
 }

sub summarizeColumn($$)                                                         # Count the number of unique instances of each value a column in a table assumes.
 {my ($data, $column) = @_;                                                     # Table == array of arrays, column number to summarize.
  my @data = map {$$_[$column]} @$data;
  my %data;
  for my $d(@data)
   {$data{$d}++ if defined $d;
   }
  sort {return $$a[1] cmp $$b[1] if $$b[0] == $$a[0];                           # Return array of [count, key]
        return $$b[0] <=> $$a[0]} map {[$data{$_}, $_]} sort keys %data;
 }

sub keyCount($$)                                                                # Count keys down to the specified level.
 {my ($maxDepth, $ref) = @_;                                                    # Maximum depth to count to, reference to an array or a hash
  my $n = 0;
  my $count;
  $count = sub
   {my ($ref, $currentDepth) = @_;
    if (ref($ref) =~ /array/i)
     {if ($maxDepth == $currentDepth) {$n += scalar(@$ref)}
      else {$count->($_, ++$currentDepth)       for @$ref}
     }
    elsif (ref($ref) =~ /hash/i)
     {if ($maxDepth == $currentDepth)   {$n += scalar(keys %$ref)}
      else {$count->($ref->{$_}, ++$currentDepth) for keys %$ref}
     }
    else {++$n}
   };
  $count->($ref, 1);
  $n
 }

sub formatHtmlTable($%)                                                         # Format an array of arrays of scalars as an html table using the  B<%options> described in L<formatTableCheckKeys>.
 {my ($data, %options) = @_;                                                    # Data to be formatted, options
  my $rows = $data ? scalar(@$data) : 0;                                        # The number of rows in the report

  checkKeys(\%options, formatTableCheckKeys);                                   # Check report options

  if (!$options{zero} and $data and ref($data) =~ m(array)i and !@$data)        # Return empty string if the table is empty unless the zero option has been supplied
   {return q()
   }

  my @html;                                                                     # Generated html
  my $cl = q();                                                                 # Table column names
  my $ct = q();                                                                 # Columns description table if present

  if (my $columns = $options{columns})                                          # Column headers
   {ref($columns) and confess <<END;                                            # Describe column option
Expected one line per column wiith the forst weor dbeing teh column name and
the remainder being a comment describing the comment.
END
    my @c = map {[split m(\s+), $_, 2]} split "\n", $columns;                   # Parse column headers
    $cl = join '', q(<tr><th>), join q(<th>),
      map {my ($c, $d) = @$_; qq(<span title="$d">$c</span>)} @c;               # Column line with tool tips
    $ct = join "\n", q(<p><pre>), formatTableBasic([@c]), qq(</pre></p>\n);     # Column format
   }

  if (my $title = $options{title})                                              # Title
   {push @html, <<END;
<h1>$title</h1>
END
   }

  my $hf = sub                                                                  # Header / Footer
   {my ($text) = @_;                                                            # Text of header or footer
    my $d = dateTimeStamp;
    my $t = ($text//'') =~ s(DDDD) ($d)gr =~ s(NNNN) ($rows)gr;                 # Edit in NNNN and DDDD fields

    push @html, <<END;
<p>$t</p>
END
   };

  if (my $head = $options{head})                                                # Header
   {&$hf($head);
   }

  push @html, <<END;                                                            # Table start
<p><table borders="0" cellpadding="10" cellspacing="5">
END

  push @html, $cl if $cl;                                                       # Column headers

  if ($data)                                                                    # Table data
   {for my $data(@$data)
     {push @html, join '', q(<tr><td>), join q(<td>), map {$_//q()} @$data;
     }
   }

  push @html, <<END;                                                            # Table end
</table></p>
END

  push @html, $ct if $ct;                                                       # Column descriptions block

  if (my $foot = $options{foot})                                                # Footer
   {&$hf($foot);
   }

  if (1)                                                                        # Record options invisibly
   {my $options = dump({%options, rows=>$rows});
    push @html, qq(<span class="options" style="display: none">$options</span>);
   }

  my $html = join "\n", @html;                                                  # Create html
  if (my $file = $options{file})
   {my $html = join "\n", @html;
    overWriteFile($file, $html);
   }

  $html
 } # formatHtmlTable

lib/Data/Table/Text.pm  view on Meta::CPAN

   }
  1
 }

sub arrayToHash(@)                                                              # Create a hash reference from an array.
 {my (@array) = @_;                                                             # Array
 +{map{$_=>1} @array}
 }

sub flattenArrayAndHashValues(@)                                                # Flatten an array of scalars, array and hash references to make an array of scalars by flattening the array references and hash values.
 {my (@array) = @_;                                                             # Array to flatten
  my @a;
  for my $a(@array)
   {if    (ref($a) =~ m(\Aarray\Z)i)
     {push @a, &flattenArrayAndHashValues(@$a);
     }
    elsif (ref($a) =~ m(\Ahash\Z)i)
     {push @a, &flattenArrayAndHashValues(map {$$a{$_}} sort keys %$a);
     }
    else
     {push @a, $a;
     }
   }
  @a                                                                            # Flattened array
 }

sub getSubName($)                                                               # Returns the (package, name, file, line) of a perl B<$sub> reference.
 {my ($sub) = @_;                                                               # Reference to a sub with a name.
  if (my $b = B::svref_2object($sub))
   {my $r = ref($b);
    if ($r =~ m(B::CV)i)
     {if (my $g = $b->GV)
       {return ($g->STASH->NAME, $g->NAME, $g->FILE, $g->LINE);                 # Package, name, file, line in file
       }
     }
   }
  confess "Unable to get name of sub referenced by $sub";
 }

#D1 Strings                                                                     # Actions on strings.

sub stringMd5Sum($)                                                             # Get the Md5 sum of a B<$string> that might contain L<utf8> code points.
 {my ($string) = @_;                                                            # String
  my $f = writeFile(undef, $string);                                            # Write into a file
  my $s = readBinaryFile($f);                                                   # Read as binary
  my $m = md5_hex($s);                                                          # Md5sum of bytes
  unlink $f;
  $m;
 }

sub stringSha256($)                                                             # Get the Sha256 of a string
 {my ($string) = @_;                                                            # String
  if (!defined($string))
    {confess "Undefined string";
    }
  sha256_hex $string;                                                           # Sha256 of string assuming it is ascii only
 }

sub indentString($$)                                                            # Indent lines contained in a string or formatted table by the specified string.
 {my ($string, $indent) = @_;                                                   # The string of lines to indent, the indenting string
  join "\n", map {$indent.$_} split m(\n+), (ref($string) ? $$string  : $string)
 }

sub replaceStringWithString($$$)                                                # Replace all instances in B<$string> of B<$source> with B<$target>.
 {my ($string, $source, $target) = @_;                                          # String in which to replace substrings, the string to be replaced, the replacement string
  for(1..(1+length($string) / (length($source)+1)))                             # Avoid too much recursive expansion
   {my $i = index($string, $source);
    if ($i >= 0)
     {substr($string, $i, length($source)) = $target;
      next;
     }
    last;
   }
  $string
 }

sub formatString($$)                                                            # Format the specified B<$string> so it can be displayed in B<$width> columns.
 {my ($string, $width) = @_;                                                    # The string of text to format, the formatted width.

  $string =~ s(\\m) (\n\n)gs;                                                   # Expand \m introduced by update documentation

  for(1..9)
   {if ($string =~ m((B<([^>]*)>))s)
     {$string = replaceStringWithString(my $s = $string, $1, boldString($2));
      last if $s eq $string;
     }
   }

  my @f;
  my @w = split m/\s+/, $string;                                                # Parse string into words
  for my $w(@w)                                                                 # Bold B<string>
   {if (!$f[-1]) {push @f, $w}
    else
     {my $l = $f[-1].qq( $w);
      if (length($l) > $width)
       {push @f, $w;
       }
      else
       {$f[-1] = $l;
       }
     }
   }

  my $t = join "\n", @f;                                                        # Format punctuation
     $t =~ s(\s*([,;.!?]))           ($1)gs;
     $t =~ s(\s*\Z)                  ()s;

  "$t\n"
 }

sub isBlank($)                                                                  # Test whether a string is blank.
 {my ($string) = @_;                                                            # String
  $string =~ m/\A\s*\Z/
 }

sub trim($)                                                                     # Remove any white space from the front and end of a string.
 {my ($string) = @_;                                                            # String
  $string =~ s/\A\s+//r =~ s/\s+\Z//r
 }

sub pad($$;$)                                                                   # Pad the specified B<$string> to a multiple of the specified B<$length>  with blanks or the specified padding character to a multiple of a specified length.
 {my ($string, $length, $padding) = @_;                                         # String, tab width, padding string
  defined($string) or confess "String required\n";
  $string =~ s/\s+\Z//;
  $padding //= q( );
  my $l = length($string);
  return $string if $l % $length == 0;
  my $p = $length - $l % $length;
  $string .= $padding x $p;
 }

sub lpad($$;$)                                                                  # Left Pad the specified B<$string> to a multiple of the specified B<$length>  with blanks or the specified padding character to a multiple of a specified length.
 {my ($string, $length, $padding) = @_;                                         # String, tab width, padding string
  defined($string) or confess "String required\n";
  $string =~ s/\s+\Z//;
  $padding //= q( );
  my $l = length($string);
  return $string if $l % $length == 0;
  my $p = $length - $l % $length;
  ($padding x $p).$string;
 }

sub ppp($$;$)                                                                   # Pad the specified B<$string> to a multiple of the specified B<$length>  with blanks or the specified padding character to a multiple of a specified length.
 {my ($length, $string, $padding) = @_;                                         # Tab width, string, padding string
  defined($string) or confess "String required\n";
  $string =~ s/\s+\Z//;
  $padding //= q( );
  my $l = length($string);
  return $string if $l % $length == 0;
  my $p = $length - $l % $length;
  $string .= $padding x $p;
 }

sub firstNChars($$)                                                             # First N characters of a string.
 {my ($string, $length) = @_;                                                   # String, length
  return $string if !$length or length($string) < $length;
  substr($string, 0, $length);
 }

sub nws($;$)                                                                    # Normalize white space in a string to make comparisons easier. Leading and trailing white space is removed; blocks of white space in the interior are reduced to a single...
 {my ($string, $length) = @_;                                                   # String to normalize, maximum length of result
  my $s = $string =~ s((\x{200b}|\A\s+|\s+\Z)) ()gr =~ s/\s+/ /gr;
  firstNChars($s, $length)                                                      # Apply maximum length if requested
 }

sub deduplicateSequentialWordsInString($)                                       # Remove sequentially duplicate words in a string.
 {my ($s) = @_;                                                                 # String to deduplicate
  my %a = map {$_=>1} grep {$_} split /\W+/, $s;                                # Split into words
  for my $w(sort keys %a)
   {1 while $s =~ s($w\s+$w) ($w)gs;
   }
  $s
 }

sub detagString($)                                                              # Remove L<html> or L<xml> tags from a string.
 {my ($string) = @_;                                                            # String to detag
  $string =~ s(<[^>]*>) ()gsr                                                   # Remove xml/html tags
 }

sub parseIntoWordsAndStrings($)                                                 # Parse a B<$string> into words and quoted strings. A quote following a space introduces a string, else a quote is just part of the containing word.
 {my ($string) = @_;                                                            # String to parse
  return () unless $string;

  my $s = 0;                                                                    # 0 - look for word or quote, 1 in word, 2 in ' string, 3 - in " string
  my @r;
  my $r;

  my $accept = sub                                                              # Accept a word or string
   {push @r, $r; $s = 0;
   };

  for my $c(split m//, $string)                                                 # Each character in the string
   {next if $s == 0 and $c =~ m(\s);                                            # Skip spaces while looking for a word or string

    if ($s == 0)                                                                # String
     {if    ($c =~ m('))                                                        # Single quoted ' string
       {$r = ''; $s = 2;
       }
      elsif ($c =~ m("))                                                        # Double quoted " string
       {$r = ''; $s = 3;
       }
      else                                                                      # Word
       {$r = $c; $s = 1;
       }
     }
    elsif ($s == 1)                                                             # In word
     {if ($c =~ m(\s))
       {&$accept;
       }
      else
       {$r .=  $c;
       }
     }
    elsif ($s == 2)                                                             # In ' string
     {if ($c =~ m('))
       {&$accept;
       }
      else
       {$r .=  $c;
       }
     }
    elsif ($s == 3)                                                             # In " string
     {if ($c =~ m("))
       {&$accept;
       }
      else
       {$r .=  $c;
       }
     }
   }
  &$accept;
  @r
 } # parseIntoWordsAndStrings

sub stringsAreNotEqual($$)                                                      # Return the common start followed by the two non equal tails of two non equal strings or an empty list if the strings are equal.
 {my ($a, $b) = @_;                                                             # First string, second string
  my @a = split //, $a;
  my @b = split //, $b;
  my @c;
  while(@a and @b and $a[0] eq $b[0])
   {shift @a; push @c, shift @b;
   }
  (join(q(), @c), join(q(), @a), join(q(), @b))
 }

sub showGotVersusWanted($$)                                                     # Show the difference between the wanted string and the wanted string.
 {my ($g, $e) = @_;                                                             # First string, second string
  my @s;
  if ($g ne $e)
   {my ($s, $G, $E) = stringsAreNotEqual($g, $e);
    if (length($s))
     {my $line = 1 + length($s =~ s([^\n])  ()gsr);

lib/Data/Table/Text.pm  view on Meta::CPAN

       },
      sub
       {&$results($userData, @_);
       },
      @$files,
     );
   }
 } # awsParallelProcessFiles

sub awsParallelProcessFilesTestParallel($$)                                     #P Test running on L<AWS> in parallel.
 {my ($userData, $file) = @_;                                                   # User data, file to process.
  my $i = &awsCurrentIp||q(localHost);
  $userData->{files}{$file} = fileMd5Sum($file);
  $userData->{ip}    {$i}   = 1;                                                # UserData is reused each time so we cannot ++
  $userData->{ipFile}{$i}{$file}++;
  $userData;
 }

sub awsParallelProcessFilesTestResults($@)                                      #P Test results of running on L<AWS> in parallel.
 {my ($userData, @results) = @_;                                                # User data from primary instance instance or process, results from each parallel instance or process

  for   my $x(@results)
   {for my $f(sort keys $x->{files}->%*)
     {$userData->{files}{$f} = $x->{files}{$f};
     }
    for my $i(sort keys $x->{ip}->%*)
     {$userData->{ip}{$i}   += $x->{ip}{$i};
     }
    for   my $i(sort keys $x->{ipFile}    ->%*)
     {for my $f(sort keys $x->{ipFile}{$i}->%*)
       {$userData->{ipFile}{$i}{$f} = $x->{ipFile}{$i}{$f};
       }
     }
    $userData->{merge} += $x->{merge}//0;                                       # Merges done else where
   }

  $userData->{merge}++;                                                         # This merge
  $userData
 }

#D1 S3                                                                          # Work with S3 as if it were a file system.

sub s3Profile(%)                                                                #P Return an S3 profile keyword from an S3 option set.
 {my (%options) = @_;                                                           # Options
  my $p = $options{profile};                                                    # Profile option
  $p ? qq( --profile $p) : q()                                                  # Return profile keyword if profile specified
 }

sub s3Delete(%)                                                                 #P Return an S3 --delete keyword from an S3 option set.
 {my (%options) = @_;                                                           # Options
  my $p = $options{delete};                                                     # Delete option
  $p ? qq( --delete) : q()                                                      # Return delete keyword if profile specified
 }

sub s3ListFilesAndSizes($%)                                                     # Return {file=>size} for all the files in a specified B<$folderOrFile> on S3 using the specified B<%options> if any.
 {my ($folderOrFile, %options) = @_;                                            # Source on S3 - which will be truncated to a folder name, options
  my ($bucket, $folder) = parseS3BucketAndFolderName($folderOrFile);            # Parse an L<s3> bucket/folder name into a bucket and a folder name removing any initial s3://.
  my $profile = s3Profile(%options);                                            # Add profile if specified
  my $getCmd  = qq(aws s3 ls s3://$bucket/$folder $profile --recursive);        # Command to get the sizes of the files to download
  my $files   = qx($getCmd);                                                    # Get the sizes of the files to download
  my @files   = map {my @a = split m/\s+/, $_, 4; [@a[-1, -2, 0, 1]]}           # Files and sizes
                split m/\n/, $files;
  {map {q(s3://).fpf($bucket, $$_[0]) => $_} @files}                            # Hash {file=>[name, size, modified date, modified time]}
 }

sub s3FileExists($%)                                                            # Return (name, size, date, time) for a B<$file> that exists on S3 else () using the specified B<%options> if any.
 {my ($file, %options) = @_;                                                    # File on S3 - which will be truncated to a folder name, options
  my %files = s3ListFilesAndSizes($file, %options);                             # Details of files with that prefix
  return () unless keys %files == 1;                                            # Only one file expected
  my ($f)   = keys %files;                                                      # File name
  my $d     = $files{$f};                                                       # Details of the one file
  return () unless $$d[3];                                                      # All details present
  @$d                                                                           # Return details of one file
 }

sub s3WriteFile($$%)                                                            # Write to a file B<$fileS3> on S3 the contents of a local file B<$fileLocal> using the specified B<%options> if any.  $fileLocal will be removed if %options contains a k...
 {my ($fileS3, $fileLocal, %options) = @_;                                      # File to write to on S3, string to write into file,  options
  my ($bucket, $folder) = parseS3BucketAndFolderName($fileS3);                  # Parse an L<s3> bucket/folder name into a bucket and a folder name removing any initial s3://.
  my $profile = s3Profile(%options);                                            # Add profile if specified
  my $f       = pad($fileLocal,               32);
  my $s       = pad(qq(s3://$bucket/$folder), 32);
  my $cmd     = qq(aws s3 cp $f $s $profile --quiet);                           # Command to write the temporary file into S3 with the specified file name
  xxx $cmd;                                                                     # Execute and print command
# unlink $fileLocal if $options{cleanUp};                                       # Remove local file after upload if requested
 }

sub s3WriteString($$%)                                                          # Write to a B<$file> on S3 the contents of B<$string> using the specified B<%options> if any.
 {my ($file, $string, %options) = @_;                                           # File to write to on S3, string to write into file,  options
  my ($bucket, $folder) = parseS3BucketAndFolderName($file);                    # Parse an L<s3> bucket/folder name into a bucket and a folder name removing any initial s3://.
  my $profile = s3Profile(%options);                                            # Add profile if specified
  my $temp    = writeFile(undef, $string);                                      # Write the string to a temporary file
  my $f       = pad($temp,                    32);
  my $s       = pad(qq(s3://$bucket/$folder), 32);
  my $cmd     = qq(aws s3 cp $f $s $profile --quiet);                           # Command to write the temporary file into S3 with the specified file name
  xxx $cmd;                                                                     # Execute and print command
  unlink $temp;
 }

sub s3ReadFile($$%)                                                             # Read from a B<$file> on S3 and write the contents to a local file B<$local> using the specified B<%options> if any.  Any pre existing version of the local file $local w...
 {my ($file, $local, %options) = @_;                                            # File to read from on S3, local file to write to,  options
  my ($bucket, $folder) = parseS3BucketAndFolderName($file);                    # Parse an L<s3> bucket/folder name into a bucket and a folder name removing any initial s3://.
  my $profile = s3Profile(%options);                                            # Add profile if specified
  my $quiet   = $file =~ m(pcd\Z)i ? q() : q( --quiet);                         # Watch certain important files
  my $d       = temporaryFolder;
  my $F       = fpe(temporaryFile, qw(download txt));
  my $f       = pad($F,                       32);
  my $s       = pad(qq(s3://$bucket/$folder), 32);
  my $cmd     = qq(aws s3 cp $s $f $profile $quiet);                            # Command to write the temporary file into S3 with the specified file name
  lll $cmd;
  xxx $cmd;                                                                     # Download
  moveFileWithClobber($f, $local);                                              # Update local file if a file was in fact downloaded
  clearFolder($d, 11);
  -f $local
 }

sub s3ReadString($%)                                                            # Read from a B<$file> on S3 and return the contents as a string using specified B<%options> if any.  Any pre existing version of $local will be deleted.  Returns whether...
 {my ($file, %options) = @_;                                                    # File to read from on S3, options
  my ($bucket, $folder) = parseS3BucketAndFolderName($file);                    # Parse an L<s3> bucket/folder name into a bucket and a folder name removing any initial s3://.
  my $profile = s3Profile(%options);                                            # Add profile if specified
  my $local   = temporaryFile;                                                  # Temporary file to hold download
  my $f       = pad($local,                   32);
  my $s       = pad(qq(s3://$bucket/$folder), 32);

lib/Data/Table/Text.pm  view on Meta::CPAN

   {confess "No such folder: $source";
   }
  return undef unless confirmHasCommandLineCommand(q(zip));                     # Confirm we have zip
  my $z = fpe(temporaryFile, q(zip));                                           # Local zip file
  my $c = qq(cd $source; zip -qr $z .);                                         # Zip command
  xxx $c, qr(\A\s*\Z);
  my $r = s3WriteFile($target, $z, %options);                                   # Upload to S3
  unlink $z;
  $r
 }

sub s3ZipFolders($%)                                                            # Zip local folders and upload them to S3 in parallel.  B<$map> maps source folder names on the local machine to target folders on S3. B<%options> contains any additional...
 {my ($map, %options) = @_;                                                     # Source folder to S3 mapping, S3 options

  &runInParallel(&numberOfCpus(8), sub                                          # Upload in parallel
   {my ($r) = @_;
    &s3ZipFolder(@$r, %options);
   },
  sub {},
  map{[$_, $$map{$_}]} sort keys %$map);
 }

#D1 GitHub                                                                      # Simple interactions with L<GitHub> - for more complex interactions please use L<GitHub::Crud>.

sub downloadGitHubPublicRepo($$)                                                # Get the contents of a public repo on GitHub and place them in a temporary folder whose name is returned to the caller or confess if no such repo exists.
 {my ($user, $repo) = @_;                                                       # GitHub user, GitHub repo
  my $t = temporaryFolder;                                                      # Folder to download to
  my $z = fpe($t, qw(gh zip));                                                  # Zip file
  my $s = fpe(q(https://github.com/), $user, $repo, qw(archive master zip));    # L<url> to GitHub to retrieve zipped repository
  confirmHasCommandLineCommand(q(wget));                                        # Conform we have wget
  my $d = xxx qq(wget -O $z $s), qr(200 OK);                                    # Run download
     $d =~ m(ERROR 404: Not Found)s || !-e $z || fileSize($z) < 1e2 and         # Make sure we got a zip file
     confess "No such user/repo on GitHub or repo too small:\n$d\n";
  xxx qq(cd $t; unzip $z; rm $z; ls -lah), qr();                                # Unzip the zip file
  $t                                                                            # Return the folder containing the unzipped files
 }

sub downloadGitHubPublicRepoFile($$$)                                           # Get the contents of a B<$user> B<$repo> B<$file> from  a public repo on GitHub and return them as a string.
 {my ($user, $repo, $file) = @_;                                                # GitHub user, GitHub repository, file name in repository
  my $s = fpf(q(https://raw.githubusercontent.com/), $user, $repo, q(master), $file);
  my $t = temporaryFile;                                                        # File to download into
  my $d = xxx qq(wget -O $t $s), qr(200 OK);                                    # Run download
     $d =~ m(ERROR 404: Not Found)s and                                         # Make sure we got the file
     confess "No such user/repo/file on GitHub:\n$d\n";
  -f $t or confess "No output from user/repo/file on GitHub";                   # Check we got a result
  my $r = readFile($t);                                                         # Read results
  unlink $t;                                                                    # Remove temporary output file
  $r                                                                            # Return data read from github
 }

sub postProcessImagesForDocumentation(%)                                        # Post process svg images into png and reload into repo for use by documentation. Useful for detailed svg images which can take a long time to load into a browser - it tr...
 {my (%options) = @_;                                                           # Options
  my $log   = $options{log}  // 1;                                              # Show actions if true
  my $size  = $options{size} // 4096;                                           # Longest size of png images to produce from svg
  my $home  = currentDirectory;                                                 # Home folder
  my $dir   = $options{target}  // fpd qw(lib Silicon Chip);                    # Target folder for images
  my $imgs  = fpd $home, $dir;                                                  # Images source folder
     $imgs  = $home if $ENV{GITHUB_TOKEN};                                      # Change folders for github
  my $svg   = fpd $imgs, qw(svg);                                               # Svg folder
  my $png   = fpd $imgs, qw(png);                                               # Png folder
  my ($user, $repo) =  split m(/), $ENV{GITHUB_REPOSITORY}//'';                 # Userid and repo from github

  makePath($png);                                                               # Make png folder

  my @f = searchDirectoryTreesForMatchingFiles $svg, qw(.svg);                  # Svg files from which we make png files

  my @r;                                                                        # Results
  for my $s(@f)                                                                 # Svg files
   {my $t = setFileExtension $s, q(png);
       $t = swapFilePrefix $t, $svg, $png;                                      # Matching png
    my $x = readFile $s;
    push @r, [$s];                                                              # Record file being processed
    if ($x =~ m(viewBox="0 0\s+(\d+)\s+(\d+)"))                                 # Dimensions of image
     {my ($x, $y) = ($1, $2);
      my $m = maximum $x, $y;                                                   # Scale image to maximum requested size
      $x *= int($size / $m);
      $y *= int($size / $m);
      say STDERR sprintf "Convert svg: x=%5d, y=%5d  ".$s, $x, $y if $log;      # Log change
      my $c = qq(cairosvg -o $t --output-width $x --output-height $y $s);       # Convert svg to png
      my $r = qx($c);
      push $r[-1]->@*, $r;                                                      # Save result
      say STDERR $r if $r =~ m(\S);
     }
   }
  for my $x(qw(gds png svg))                                                    # Move images to target location
   {my $s = fpd $imgs, $x;
    next unless -e $s;
    my $t = fpd $dir,  $x;
    copyFolder($s, $t);
    clearFolder($s, undef);
   }
  @r                                                                            # Results of each upload
 }

#D1 Processes                                                                   # Start processes, wait for them to terminate and retrieve their results

sub startProcess(&\%$)                                                          # Start new processes while the number of child processes recorded in B<%$pids> is less than the specified B<$maximum>.  Use L<waitForAllStartedProcessesToFinish|/waitFor...
 {my ($sub, $pids, $maximum) = @_;                                              # Sub to start, hash in which to record the process ids, maximum number of processes to run at a time
  warn "Deprecated in favor of newProcessStarter";
  while(keys(%$pids) >= $maximum)                                               # Wait for enough processes to terminate to bring us below the maximum number of processes allowed.
   {my $p = waitpid 0,0;
#   $$pids{$p} or confess "Pid $p not defined in ".dump($pids)."\n";
    delete $$pids{$p}
   }

  if (my $pid = fork)                                                           # Create new process
   {$$pids{$pid}++                                                              # Update pids
   }
  else                                                                          # Run sub in new process
   {&$sub;
    exit;
   }
 }

sub waitForAllStartedProcessesToFinish(\%)                                      # Wait until all the processes started by L<startProcess|/startProcess> have finished.
 {my ($pids) = @_;                                                              # Hash of started process ids
  warn "Deprecated in favor of newProcessStarter";
  while(keys %$pids)                                                            # Remaining processes
   {my $p = waitpid 0,0;
#   $$pids{$p} or cluck "Pid $p not defined in ".dump($pids)."\n";
    delete $$pids{$p}

lib/Data/Table/Text.pm  view on Meta::CPAN

#    for(keys @buckets)                                                          # Find smallest bucket - sort in place is slower
#     {$ms = $bucketSizes[$mb = $_] if $bucketSizes[$_] < $ms;                   # Smallest bucket so far
#     }
#    $bucketSizes[$mb]   += $$size[0];                                           # Update bucket size
#    push @{$buckets[$mb]}, $$size[1];                                           # Add file to bucket
#   }

  my @buckets = packBySize($N, @sizes);                                         # Pack files by size
  my $p = newProcessStarter($N);                                                # Process starter
  for my $bucket(@buckets)                                                      # Process each bucket
   {$p->start(sub                                                               # Multiverse
     {my @r;
      for my $file(@$bucket)                                                    # Process each element of each row and consolidate the results
       {push @r, &$parallel($file);
       }
      [@r]
     });
   }

  my @p = $p->finish;                                                           # Consolidate results in universe
  my @r = deSquareArray @p;

  return &$results(@r) if $results;                                             # Post process results
  @r                                                                            # Return results if no post processor
 } # processSizesInParallel

sub processSizesInParallel($$@)                                                 # Process items of known size in parallel using (8 * the number of CPUs) processes with the process each item is assigned to depending on the size of the item so that eac...
 {my ($parallel, $results, @sizes) = @_;                                        # Parallel sub, results sub, array of [size; item] to process by size
  my $N = sub                                                                   # Heuristically scale the number of cpus by the instance type
   {return  4 unless onAws;
    my $i = awsCurrentInstanceType;
    return  4 if $i =~ m(\Am)i;
    return  8 if $i =~ m(\Ar)i;
    return 16 if $i =~ m(\Ax)i;
            2
   }->();
  processSizesInParallelN(numberOfCpus($N), $parallel, $results, @sizes);       # Process in parallel
 } # processSizesInParallel

sub processFilesInParallel($$@)                                                 # Process files in parallel using (8 * the number of CPUs) processes with the process each file is assigned to depending on the size of the file so that each process is l...
 {my ($parallel, $results, @files) = @_;                                        # Parallel sub, results sub, array of files to process by size
  processSizesInParallel $parallel, $results, map {[fileSize($_), $_]} @files;  # Process in parallel packing files to achieve as equal as possibly sized processes
 } # processFilesInParallel

sub processJavaFilesInParallel($$@)                                             # Process java files of known size in parallel using (the number of CPUs) processes with the process each item is assigned to depending on the size of the java item so th...
 {my ($parallel, $results, @files) = @_;                                        # Parallel sub, results sub, array of [size; java item] to process by size
  my @sizes = map {[fileSize($_), $_]} @files;                                  # Process in parallel packing files to achieve as equal as possibly sized processes
  processSizesInParallelN(numberOfCpus(1/2), $parallel, $results, @sizes);      # Process in parallel
 } # processJavaFilesInParallel

sub syncFromS3InParallel($$$;$$)                                                # Download from L<S3> by using "aws s3 sync --exclude '*' --include '...'" in parallel to sync collections of two or more files no greater then B<$maxSize> or single file...
 {my ($maxSize, $source, $target, $Profile, $options) = @_;                     # The maximum collection size, the source folder on S3, the target folder locally, aws cli profile, aws cli options
                                                                                # See: /home/phil/r/z/partitionStrings.pl for standalone tests
  my ($bucket, $folder) = parseS3BucketAndFolderName($source);                  # Parse an L<s3> bucket/folder name into a bucket and a folder name removing any initial s3://.

  my $profile = $Profile ? qq( --profile $Profile) : q();                       # Add profile if specified
  $options  //= q();                                                            # Default options

  my $getCmd  = qq(aws s3 ls s3://$bucket/$folder $profile --recursive);        # Command to get the sizes of the files to download
  my $files   = qx($getCmd);                                                    # Get the sizes of the files to download
  my @files   = map {my @a = split m/\s+/, $_, 4; [@a[-1, -2]]}                 # Files and sizes
                split m/\n/, $files;
  return unless @files;                                                         # No files to download

  call sub                                                                      # Partition likely to cause a lot of memory fragmentation
   {my %partition = partitionStringsOnPrefixBySize($maxSize, map {@$_} @files); # Partition the download into collections no larger than the specified size

    processSizesInParallel(                                                     # Download folders packing by size
      sub
       {my ($P) = @_;                                                           # Path to folder to download
        return unless keys %partition > 1;                                      # Process in parallel only if there is more than one partition
        my $p = swapFilePrefix($P, $folder);                                    # Remove the folder because it will be added back by the sync command, see:
        my $c   = join ' ', map {pad($_, 32)}                                   # Download in parallel command
                  qq(aws s3 sync "s3://$bucket/$folder"), qq("$target"),
                  qq(--exclude "*" --include "$p*"),
                  $options, $profile, q(--quiet);
        #lll $c;
        xxx $c, qr(\A\s*\Z);
       },
      sub                                                                       # Now execute the original command which should require less processing because of the prior downloads in parallel
       {my $c   = join ' ', map {pad($_, 32)}                                   # Down load in series command
                  qq(aws s3 sync "s3://$bucket/$folder"), qq("$target"),
                  $options, $profile, q(--quiet);
        #lll $c;
        xxx $c, qr(\A\s*\Z);
       }, map {[$partition{$_}, $_]} sort keys %partition);
   };
 } # syncFromS3InParallel

sub syncToS3InParallel($$$;$$)                                                  # Upload to L<S3> by using "aws s3 sync --exclude '*' --include '...'" in parallel to sync collections of two or more files no greater then B<$maxSize> or single files gr...
 {my ($maxSize, $source, $target, $Profile, $options) = @_;                     # The maximum collection size, the target folder locally, the source folder on S3, aws cli profile, aws cli options

  $target =~ s(\As3://) ();                                                     # Remove S3 prefix if present

  my $profile = $Profile ? qq( --profile $Profile) : q();                       # Add profile if specified
  $options  //= q();                                                            # Default options

  my @files   = map {[$_=>fileSize $_]}                                         # Files and sizes
                searchDirectoryTreesForMatchingFiles($source);
  return unless @files;                                                         # No files to download

  $$_[0] = swapFilePrefix($$_[0], $source) for @files;                          # Remove folder prefix

  call sub                                                                      # Partition likely to cause a lot of memory fragmentation
   {my %partition = partitionStringsOnPrefixBySize($maxSize, map {@$_} @files); # Partition the download into collections no larger than the specified size

    processSizesInParallel(                                                     # Download folders packing by size
      sub
       {my ($p) = @_;                                                           # Path to folder to download
        return unless keys %partition > 1;                                      # Process in parallel only if there is more than one partition
        my $c   = join ' ', map {pad($_, 32)}
                  qq(aws s3 sync "$source"), qq("s3://$target"),
                  qq(--exclude "*" --include "$p*"),
                  $options, $profile, q(--quiet);
        #lll $c;
        xxx $c, qr(\A\s*\Z);
       },
      sub                                                                       # Now execute the original command which should require less processing because of the prior downloads in parallel
       {my $c   = join ' ', map {pad($_, 32)}
                  qq(aws s3 sync "$source"), qq("s3://$target"),
                  $options, $profile, q(--quiet);
        #lll $c;

lib/Data/Table/Text.pm  view on Meta::CPAN

     {s(->\[) ([)gs;
      s(->)   (\.)gs;
     }
   }

  if (1)                                                                        # Specifics
   {for(@lines)
     {s(\.\.)  (.)gs;
      s(\$ssv) (ditaJs.ssv)gs;
     }
   }

  my @comments = split /\n/, join '', @lines;                                   # Reparse
  if (1)                                                                        # Comment position
   {for my $i(keys @comments)
     {next if $comments[$i] =~ m(\A//);
      if ($comments[$i] =~ m(\A(.*)(//.*)\Z))
       {my ($code, $comment) = ($1, $2);
        if (length($code) > 80)
         {my $a = substr($code, 0, 80);
          my $b = substr($code, 80);
             $b =~ s(\s+\Z) ();
          $code = qq($a$b);
         }
        elsif (length($code) < 80)
         {$code = substr($code.(' ' x 80), 0, 80);
         }
        $comments[$i] = qq($code$comment)
       }
     }
   }

  my $text = join "\n", @comments, '';
     $text =~ s((\n=pod\n.*?\n=cut\n)) (`$1`)gs;                                # Pod as comment string

  $out ? owf($out, $text) : (say STDOUT $text)                                  # Write results to file or STDOUT
 } # convertPerlToJavaScript

#D1 Documentation                                                               # Extract, format and update documentation for a perl module.

sub parseDitaRef($;$$)                                                          # Parse a dita reference B<$ref> into its components (file name, topic id, id) . Optionally supply a base file name B<$File>> to make the the file component absolute and/...
 {my ($ref, $File, $TopicId) = @_;                                              # Reference to parse, default absolute file, default topic id
  return (q()) x 3 unless $ref and $ref =~ m(\S)s;

  my ($file, $rest)  = split /#/, $ref, 2;

  $file    = $File && $file ? sumAbsAndRel($File, $file) : $File || $file||q(); # Full file path if possible

  if (!$rest)                                                                   # File
   {return ($file, q(), q())
   }

  if ($rest !~ m(/)s)                                                           # File#id
   {return ($file, q(), $rest)
   }

  if ($rest =~ m(\A\./)s)                                                       # File#./id
   {return ($file, $TopicId || q(), $rest =~ s(\A\./) ()r)
   }

  my ($topicId, $id) = split m(/), $rest, 2;
  $topicId = $topicId || $TopicId || q();
  $topicId = $TopicId if $TopicId and $topicId =~ m(\A(\s*|\.)\Z);
  $id    ||= q();

  ($file, $topicId, $id)
 }

sub parseXmlDocType($)                                                          # Parse an L<xml> DOCTYPE and return a hash indicating its components.
 {my ($string) = @_;                                                            # String containing a DOCTYPE

  if ($string =~ m(<!DOCTYPE\s+(\w+)\s+PUBLIC\s+"([^"]+)"\s+"([^"]+)")s)        # Parse DOCTYPE PUBLIC
   {return genHash(q(DocType),
      root     => $1,
      public   => 1,
      publicId => $2,
      localDtd => $3);
   }
  elsif ($string =~ m(<!DOCTYPE\s+(\w+)\s+SYSTEM\s+([a-z0-9.]+)?)s)             # Parse DOCTYPE SYSTEM
   {return genHash(q(DocType),
      root     => $1,
      public   => 0,
      localDtd => $2);
   }
  undef
 }

sub reportSettings($;$)                                                         # Report the current values of parameterless subs.
 {my ($sourceFile, $reportFile) = @_;                                           # Source file, optional report file
  warn "Deprecated, please use reportAttributeSettings instead";
  my $s = readFile($sourceFile);

  my %s;
  for my $l(split /\n/, $s)                                                     # Find the attribute subs
   {if ($l =~ m(\Asub\s*(\w+)\s*\{.*?#\s+(.*)\Z))
     {$s{$1} = $2;
     }
   }

  my @r;
  for my $s(sort keys %s)                                                       # Evaluate each sub
   {my ($package, $filename, $line) = caller;                                   # Callers context
    my $v = eval q(&).$package.q(::).$s;                                        # Current value in callers context
    my $r = $@ // '';                                                           # Failure description
    push @r, [$s, $v, $r, $s{$s}];                                              # Table entry of sub name, sub value, reason why there is no value, comment
   }

  formatTable(\@r, <<END,                                                       # Format table
Attribute The name of the program attribute
Value     The current value of the program attribute
END
    head      => qq(Found NNNN parameters on DDDD),
    title     => qq(Attributes in program: $sourceFile),
    summarize => 1,
    $reportFile ? (file=>$reportFile) : ());
 }

sub reportAttributes($)                                                         # Report the attributes present in a B<$sourceFile>.
 {my ($sourceFile) = @_;                                                        # Source file
  my $s = readFile($sourceFile);
  my %s;

lib/Data/Table/Text.pm  view on Meta::CPAN

    yum             => [q(Yum),                                                 "https://en.wikipedia.org/wiki/Yum_(software)"                                                                                    ],
    zap             => [q(Zap),                                                 "https://www.sciencedirect.com/science/article/pii/B978008025697950052X."                                                         ],
    zeasl           => [q(Zero assembler programming language),                 "https://github.com/philiprbrenan/zero"                                                                                           ],
    zerowidthspace  => [q(zero width space),                                    "https://en.wikipedia.org/wiki/Zero-width_space"                                                                                  ],
    zip             => [q(zip),                                                 "https://linux.die.net/man/1/zip"                                                                                                 ],
    zoom            => [q(Zoom),                                                "https://zoom.us/"                                                                                                                ],
    zynq            => [q(Zynq),                                                "https://www.xilinx.com/products/som/kria/k26c-commercial.html"                                                                   ],
   );
 } # wellKnownUrls

sub spellCheck($%)                                                              #P Spell checker.
 {my ($Text, %options) = @_;                                                    # Text to check, options
  eval "use Text::SpellChecker";
  binModeAllUtf8;

  my $text = $Text;
     $text =~ s(<head>.*?</head>)     ()gs;                                     # Remove head section
     $text =~ s(<script>.*?</script>) ()gs;                                     # Remove script sections
     $text =~ s(<pre>.*?</pre>)       ()gs;                                     # Remove pre sections
     $text =~ s(<style>.*?</style>)   ()gs;                                     # Remove style sections
     $text =~ s(<b>.*?</b>)           ()gs;                                     # Remove quotes as they are probably spelled incorrectly
     $text =~ s(<q>.*?</q>)           ()gs;                                     # Remove quotes as they are probably spelled correctly as far as their authors care
     $text =~ s(<.*?>) ()gs;                                                    # Remove tags
     $text =~ s(&nbsp;) ()gs;                                                   # Remove non blank space
     $text =~ s(\b[A-Z𝗔-𝗭𝗮-𝘇].*\b)   ()gs;                                      # Remove words that start with capital letters
     $text =~ s(philip|brenan)       ()gs;                                      # Remove names of famous people who choose to spell their names in lower case
  my $c = Text::SpellChecker->new(text=>$text, lang => q(en_US));
  while(my $word = $c->next_word)                                               # Each error
   {say STDERR  pad($word, 16), " : ", (join ", ", @{$c->suggestions});
   }

  $Text                                                                         # Text to allow chaining
 }

sub expandWellKnownWordsAsUrlsInHtmlFormat($)                                   # Expand words found in a string using the html B<a> tag to supply a definition of that word.
 {my ($string)  = @_;                                                           # String containing url names to expand
  my $wellKnown = wellKnownUrls;                                                # Well known urls to expand

  for my $w(sort keys %$wellKnown)                                              # Expand well known words (lowercased) as html links
   {my ($t, $u) = @{$$wellKnown{$w}};
    $string =~ s(L\[$w\]) (<a href="$u">$t</a>)gis;                             # Explicit link
    $string =~ s((\s|>)$w([.,;:'"s]*)(\s)) ($1<a href="$u">$t</a>$2$3)gs;       # Word that matches - possibly plural
   }

  $string =~ s(W\[(\w+)\]) (<code>$1</code>)gs;                                 # Use W[...] to wraps words with definitions we wish to stress
  $string =~ s(w\[(\w+)\]) ($1)gsr;                                             # Use w[...] to wraps words with definitions we wish to keep as is
 }

sub expandWellKnownWordsAsUrlsAndAddTocToMakeANewHtmlFile($)                    # Expand well known words found in a '.htm' file of html and add a table of contents to create a new '.html' file.
 {my ($file) = @_;                                                              # File
  -e $file or confess "No such file: $file";                                    # Check file exists
  fe($file) eq "htm" or confess "htm file expected not $file\n";                # Check extension
  my $o = setFileExtension $file, q(html);                                      # Change extension to html
  owf $o, expandWellKnownWordsAsUrlsInHtmlFormat htmlToc file=>$file;           # Expand urls and add toc to create new file
 }

sub expandWellKnownWordsAsUrlsInMdFormat($)                                     # Expand words found in a string using the md url to supply a definition of that word.
 {my ($string)  = @_;                                                           # String containing url names to expand
  my $wellKnown = wellKnownUrls;                                                # Well known urls to expand

  my @s = split m/```/, $string;                                                # Separate code from text
  my $i = 0;

  for my $s(@s)
   {next  unless ++$i % 2;                                                      # Code is in odd sections
    for my $w(sort keys %$wellKnown)                                            # Expand well known words (lowercased) as html links
     {my ($t, $u) = @{$$wellKnown{$w}};
      $s =~ s(L\[$w\])            ([$t]($u))gis;                                # Explicit link
      $s =~ s(\s$w([.,;:'"]*)\s) ( [$t]($u)$1 )gs;                              # Word that matches
     }
   }

  $string = join '```', @s;

  $string =~ s(W\[(\w+)\]) (```$1```)gs;                                        # Use W[...] wraps words with definitions we wish to stress
  $string =~ s(w\[(\w+)\]) ($1)gsr;                                             # Use w[...] wraps words with definitions we wish to keep as is
 }

sub reinstateWellKnown($)                                                       #P Contract references to well known Urls to their abbreviated form.
 {my ($string)  = @_;                                                           # Source string
  my $wellKnown = wellKnownUrls;                                                # Well known urls to contract

  for my $w(sort keys %$wellKnown)
   {my ($t, $u) = @{$$wellKnown{$w}};
    $string =~ s(L<$t\|$u>) (L<$t>)gis;
   }

  $string                                                                       # Result
 }

sub expandWellKnownUrlsInPerlFormat($)                                          # Expand short L<url> names found in a string in the format LE<lt>url-nameE<gt> using the Perl POD syntax.
 {my ($string)  = @_;                                                           # String containing url names to expand
  my $wellKnown = wellKnownUrls;                                                # Well known urls to expand

  for my $w(sort keys %$wellKnown)
   {my ($t, $u) = @{$$wellKnown{$w}};
    $string =~ s(L\<$w\>) (L<$t|$u>)gis;
   }

  $string                                                                       # Result
 }

sub expandWellKnownUrlsInHtmlFormat($)                                          # Expand short L<url> names found in a string in the format L[url-name] using the html B<a> tag.
 {my ($string)  = @_;                                                           # String containing url names to expand
  my $wellKnown = wellKnownUrls;                                                # Well known urls to expand

  for my $w(sort keys %$wellKnown)                                              # Expand well known urls as html a links
   {my ($t, $u) = @{$$wellKnown{$w}};
    $string =~ s(L\[$w\]) (<a format="html" href="$u">$t</a>)gis;
   }

  if (my @e = $string =~ m(L\[(\w+)\])gs)                                       # Check for expansion failures
   {say STDERR "Failed to find url expansions for these words:\n", dump(\@e);
   }

  $string                                                                       # Result
 }

sub expandWellKnownUrlsInHtmlFromPerl($)                                        # Expand short L<url> names found in a string in the format L[url-name] using the html B<a> tag.
 {my ($string)  = @_;                                                           # String containing url names to expand
  my $wellKnown = wellKnownUrls;                                                # Well known urls to expand

lib/Data/Table/Text.pm  view on Meta::CPAN

    $s =~ s(\.?\s*\Z) (.)s;
    $s
   };

  my %parameters;                                                               # Parameters for each def
  my %comments;                                                                 # Comments for each def
  my %tests;                                                                    # Tests for each def
  my %testsCommon;                                                              # Common line for tests
  my %classDefinitions;                                                         # Class definitions
  my %classFiles;                                                               # Class files
  my %errors;                                                                   # Errors by source file

  for my $source(@sources)                                                      # Each source file
   {my @text = readFile($source);                                               # Read source file
    my $lines = @text;
    my $class = fne $source;

    my sub currentLine {$lines - @text};                                        # Current line number

    my sub getDocString                                                         # Get a doc string
     {my @c;

      my sub strip                                                              # Strip leading and trailing quotes
       {return unless @c;
        $c[0]  =~ s(\A\s*$docRe) ();
        $c[-1] =~ s($docRe\s*\Z) ();
        $c[-1] =~ s(\.?\s*\Z) (.);
        join "\n", @c
       };

      if (my $c = shift @text)                                                  # Doc string
       {if ($c =~ m(\A\s*$docRe.*\S))                                           # Quotes and text on same line
         {@c = $c;
          while(@text and $c !~ m($docRe\s*\Z)i)
           {push @c, $c = shift @text;
           }
          return strip
         }
        elsif ($c =~ m(\A\s*$docRe\s*\Z))                                       # Just quotes
         {@c = $c;
          while(@text and $text[0] !~ m($docRe\s*\Z)i)
           {push @c, shift @text;
           }
          return strip
         }
       }
      q()
     };

    my sub error(@)                                                             # Record an error
     {my (@e) = @_;                                                             # Error strings
      push $errors{$source}->@*, join ' ', @e;
     };

    while(@text)                                                                # Parse text of module
     {my $text = shift @text;

      if ($text =~ m(\A\s*def\s+(.*?)\((.*?)\)\s*:.*?#(\w*)\s+(.*))i)           # Def  function(parameter1 =1, parameter2 = 2) :  # first, second
       {my ($def, $parameters, $attributes, $parameterDefinitions) = @{^CAPTURE};

        my @p = split m/\s*,\s*/, $parameters;
        my @d = split m/\s*,\s*/, $parameterDefinitions;
        my $p = @p; my $d = @d;
        if ($p != $d)
         {my $l = currentLine;
          error qq(Number of parameters specified: $d does not equal),
                qq(number of parameters documented: $d on line: $l)
         }
        else
         {for my $p(@p)
           {my $c = ucfirst shift @d;
               $c =~ s(\.?\s*\Z) ()s;
            push $parameters{$class}{$def}->@*, [$p, $c];
           }
         }

        $comments{$class}{$def} = getDocString
       }
      elsif ($text =~ m(\A\s*def\s+(.*?)\((.*?)\)\s*:)i)                        # Def  function(parameter1 =1, parameter2 = 2) :
       {my ($def, $parameters) = @{^CAPTURE};
        my $doc = $comments{$class}{$def} = getDocString;

        my @p = split m/\s*,\s*/, $parameters;                                  # Parameters defined by a Python subroutine
        my %p;

        for my $line(split m/\n/, $doc)                                         # Check for parameter definitions
         {if ($line =~ m(\A\s*:\s*param\s*(.*?)\s*:\s*(.*?)\s*\Z))
           {my ($parm, $comment) = @{^CAPTURE};
            push $parameters{$class}{$def}->@*, [$parm, $comment];
            $parm =~ s(\A\s*(bool|str)\s*) ()s;                                 # Remove parameter type when present to get parameter name
            $p{$parm} = $comment;
           }
         }

        if (keys %p)                                                            # Use parameter definitions if present
         {if (@p != keys %p)
           {error q(Differing numbers of parameters described in comment and code);
           }
          for my $p(@p)
           {if (!$p{$p})
             {error qq(Parameter $p not described by :param);
             }
            delete $p{$p}
           }
          if (keys %p)
           {my $b = join ', ', sort keys %p;
            error qq(Parameters $b defined by :param but not present in defn);
           }
         }
        else                                                                    # Use parameter definitions from a Python subroutine
         {push $parameters{$class}{$def}->@*, [@p];
         }
        error qq(No parameter definitions for $class.$def)
       }
      elsif ($text =~ m(\A\s*class\s+(.*?)\s*:))                                # Class - assume there is no more than one class per file for the moment
       {$classFiles{$class}       = $class = $1;
        $classDefinitions{$class} = getDocString
       }
      elsif ($text =~ m(\A\s*if\s+1\s*:\s*#[T#](\w+)))                          # Test as if 1: statement
       {my $test = $1;
        my @test;
        while(@text and $text[0] !~ m(\A\s*\Z))
         {push @test, trim shift @text;
         }
        push $tests{$class}{$test}->@*, @test;
       }
      elsif ($text =~ m(\A(.*?)#[T#](\w+)))                                     # Test on a single line
       {my ($text, $test) = @{^CAPTURE};;
        push @{$testsCommon{$test}}, $text;
       }
     }
    error qq(No class in file $source) unless $class
   }

  my $d = genHash(q(Data::Table::Text::Python::Documentation),                  # Documentation extracted from Python source files
    parameters       => \%parameters,                                           # Parameters for each def
    comments         => \%comments,                                             # Comments for each def
    tests            => \%tests,                                                # Tests for each def
    testsCommon      => \%testsCommon,                                          # Common line for tests
    classDefinitions => \%classDefinitions,                                     # Class definitions
    classFiles       => \%classFiles,                                           # Class files
    errors           => \%errors,                                               # Errors encountered
   );

  my %opCodes =                                                                 # Translate these opcodes
   (neg      => q(- ) ,

lib/Data/Table/Text.pm  view on Meta::CPAN


  This file: ${d}report.txt

        BC
  1  a
  2     b
        c
  END
    clearFolder($d, 2);


=head2 formattedTablesReport   (@options)

Report of all the reports created. The optional parameters are the same as for L<formatTable|/formatTable>.

     Parameter  Description
  1  @options   Options

B<Example:>


    @formatTables = ();

    for my $m(2..8)
     {formatTable([map {[$_, $_*$m]} 1..$m], [q(Single), qq(* $m)],
        title=>qq(Multiply by $m));
     }


    ok nws(formattedTablesReport) eq nws(<<END);  # 𝗘𝘅𝗮𝗺𝗽𝗹𝗲

     Rows  Title          File
  1     2  Multiply by 2
  2     3  Multiply by 3
  3     4  Multiply by 4
  4     5  Multiply by 5
  5     6  Multiply by 6
  6     7  Multiply by 7
  7     8  Multiply by 8
  END


=head2 summarizeColumn ($data, $column)

Count the number of unique instances of each value a column in a table assumes.

     Parameter  Description
  1  $data      Table == array of arrays
  2  $column    Column number to summarize.

B<Example:>


    is_deeply

     [summarizeColumn([map {[$_]} qw(A B D B C D C D A C D C B B D)], 0)],  # 𝗘𝘅𝗮𝗺𝗽𝗹𝗲

     [[5, "D"], [4, "B"], [4, "C"], [2, "A"]];

    ok nws(formatTable
     ([map {[split m//, $_]} qw(AA CB CD BC DC DD CD AD AA DC CD CC BB BB BD)],
      [qw(Col-1 Col-2)],
       summarize=>1)) eq nws(<<'END');

  Summary_of_column                - Count of unique values found in each column                     Use the Geany flick capability by placing your cursor on the first word
  Comma_Separated_Values_of_column - Comma separated list of the unique values found in each column  of these lines and pressing control + down arrow to see each sub report.

      Col-1  Col-2
   1  A      A
   2  C      B
   3  C      D
   4  B      C
   5  D      C
   6  D      D
   7  C      D
   8  A      D
   9  A      A
  10  D      C
  11  C      D
  12  C      C
  13  B      B
  14  B      B
  15  B      D

  Summary_of_column_Col-1
     Count  Col-1
  1      5  C
  2      4  B
  3      3  A
  4      3  D

  Comma_Separated_Values_of_column_Col-1: "A","B","C","D"

  Summary_of_column_Col-2
     Count  Col-2
  1      6  D
  2      4  C
  3      3  B
  4      2  A

  Comma_Separated_Values_of_column_Col-2: "A","B","C","D"
  END


=head2 keyCount($maxDepth, $ref)

Count keys down to the specified level.

     Parameter  Description
  1  $maxDepth  Maximum depth to count to
  2  $ref       Reference to an array or a hash

B<Example:>


    my $a = [[1..3],       {map{$_=>1} 1..3}];

    my $h = {a=>[1..3], b=>{map{$_=>1} 1..3}};


    ok keyCount(2, $a) == 6;                                                        # 𝗘𝘅𝗮𝗺𝗽𝗹𝗲

lib/Data/Table/Text.pm  view on Meta::CPAN

if (!onWindows and !onMac) {

if (1)                                                                          # Unicode
 {use utf8;
  my $z = "𝝰 𝝱 𝝲";
  my $T = temporaryFolder;
  my $t = filePath($T, $z);
  my $f = filePathExt($t, $z, qq(data));
  unlink $f if -e $f;
  ok !-e $f;
  writeFile($f, $z);
  ok  -e $f;
  my $s = readFile($f);
  ok $s eq $z;
  ok length($s) == length($z);

  my @f = findFiles($T);
  ok $f[0] eq $f;

  unlink $f;
  ok !-e $f;
  rmdir $t;
  ok !-d $t;
  rmdir $T;
  ok !-d $T;
 }

if (1)                                                                          # Binary
 {my $z = "𝝰 𝝱 𝝲";
  my $Z = join '', map {chr($_)} 0..11;
  my $T = temporaryFolder;
  my $t = filePath($T, $z);
  my $f = filePathExt($t, $z, qq(data));
  unlink $f if -e $f;
  ok !-e $f;
  writeBinaryFile($f, $Z);
  ok  -e $f;
  my $s = readBinaryFile($f);
  ok $s eq $Z;
  ok length($s) == 12;
  unlink $f;
  ok !-e $f;
  rmdir $t;
  ok !-d $t;
  rmdir $T;
  ok !-d $T;
 }
}
else {ok 1 for 1..15}


if (1) {                                                                        # Check files
  my $d = filePath   (my @d = qw(a b c d));                                     #TcheckFile #TmatchPath
  my $f = filePathExt(qw(a b c d e x));                                         #TcheckFile
  my $F = filePathExt(qw(a b c e d));                                           #TcheckFile
  createEmptyFile($f);                                                          #TcheckFile
  ok matchPath($d) eq $d;                                                       #TmatchPath
  ok  eval{checkFile($d)};                                                      #TcheckFile
  ok  eval{checkFile($f)};                                                      #TcheckFile
  ok !eval {checkFile($F)};
  my @m = split m/\n/, $@;
  ok $m[1] eq prefferedFileName "a/b/c/";
  unlink $f;
  ok !-e $f;
  while(@d)                                                                     # Remove path
   {my $d = filePathDir(@d);
    rmdir $d;
    ok onWindows ? 1 : !-d $d;
    pop @d;
   }
 }

if (1)                                                                          # Clear folder
 {my $d = 'a';
  my @d = qw(a b c d);
  my @D = @d;
  while(@D)
   {my $f = filePathExt(@D, qw(test data));
    overWriteFile($f, '1');
    pop @D;
   }
  ok findFiles($d) == 4;
  eval q{clearFolder($d, 3)};
  ok $@ =~ m(\ALimit is 3, but 4 files under folder:)s;
  clearFolder($d, 4);
  ok onWindows ? 1 : !-d $d;
 }

ok formatTable                                                                  #TformatTable
 ([[qw(A    B    C    D   )],                                                   #TformatTable
   [qw(AA   BB   CC   DD  )],                                                   #TformatTable
   [qw(AAA  BBB  CCC  DDD )],                                                   #TformatTable
   [qw(AAAA BBBB CCCC DDDD)],                                                   #TformatTable
   [qw(1    22   333  4444)]], [qw(aa bb cc)]) eq <<END;   #TformatTable
   aa    bb    cc
1  A     B     C     D
2  AA    BB    CC    DD
3  AAA   BBB   CCC   DDD
4  AAAA  BBBB  CCCC  DDDD
5     1    22   333  4444
END

ok formatTable                                                                  #TformatTable
 ([[qw(1     B   C)],                                                           #TformatTable
   [qw(22    BB  CC)],                                                          #TformatTable
   [qw(333   BBB CCC)],                                                         #TformatTable
   [qw(4444  22  333)]], [qw(aa bb cc)]) eq <<END;         #TformatTable
   aa    bb   cc
1     1  B    C
2    22  BB   CC
3   333  BBB  CCC
4  4444   22  333
END

ok formatTable                                                                  #TformatTable
 ([{aa=>'A',   bb=>'B',   cc=>'C'},                                             #TformatTable
   {aa=>'AA',  bb=>'BB',  cc=>'CC'},                                            #TformatTable
   {aa=>'AAA', bb=>'BBB', cc=>'CCC'},                                           #TformatTable
   {aa=>'1',   bb=>'22',  cc=>'333'}                                            #TformatTable
   ]) eq <<END;                                                                 #TformatTable
   aa   bb   cc

lib/Data/Table/Text.pm  view on Meta::CPAN

  unlink $f, $F;
 }

is_deeply [parseFileName(q(/home/phil/r/aci2/out/.ditamap))],
          ["/home/phil/r/aci2/out/", "", "ditamap"];

ok relFromAbsAgainstAbs
 ("/home/phil/r/aci2/out/audit_events.xml",
  "/home/phil/r/aci2/out/.ditamap") eq "audit_events.xml";


if (1) {                                                                        #TmergeHashesBySummingValues
  is_deeply +{a=>1, b=>2, c=>3},
    mergeHashesBySummingValues
      +{a=>1,b=>1, c=>1}, +{b=>1,c=>1}, +{c=>1};
 }

if (1) {                                                                        #TsquareArray #TdeSquareArray #TrectangularArray #TrectangularArray2
  is_deeply [squareArray @{[1..4]} ], [[1, 2], [3, 4]];
  is_deeply [squareArray @{[1..22]}],
   [[1 .. 5], [6 .. 10], [11 .. 15], [16 .. 20], [21, 22]];

  is_deeply [1..$_], [deSquareArray squareArray @{[1..$_]}] for 1..22;
  ok $_ == countSquareArray         squareArray @{[1..$_]}  for 222;

  is_deeply [rectangularArray(3, 1..11)],
            [[1, 4, 7, 10],
             [2, 5, 8, 11],
             [3, 6, 9]];

  is_deeply [rectangularArray(3, 1..12)],
            [[1, 4, 7, 10],
             [2, 5, 8, 11],
             [3, 6, 9, 12]];

  is_deeply [rectangularArray(3, 1..13)],
            [[1, 4, 7, 10, 13],
             [2, 5, 8, 11],
             [3, 6, 9, 12]];

  is_deeply [rectangularArray2(3, 1..5)],
            [[1, 2, 3],
             [4, 5]];

  is_deeply [rectangularArray2(3, 1..6)],
            [[1, 2, 3],
             [4, 5, 6]];

  is_deeply [rectangularArray2(3, 1..7)],
            [[1, 2, 3],
             [4, 5, 6],
             [7]];
 }

if (1) {                                                                        #TsummarizeColumn
  is_deeply
   [summarizeColumn([map {[$_]} qw(A B D B C D C D A C D C B B D)], 0)],
   [[5, "D"], [4, "B"], [4, "C"], [2, "A"]];

  ok nws(formatTable
   ([map {[split m//, $_]} qw(AA CB CD BC DC DD CD AD AA DC CD CC BB BB BD)],
    [qw(Col-1 Col-2)],
     summarize=>1)) eq nws(<<'END');

Summary_of_column                - Count of unique values found in each column                     Use the Geany flick capability by placing your cursor on the first word
Comma_Separated_Values_of_column - Comma separated list of the unique values found in each column  of these lines and pressing control + down arrow to see each sub report.

    Col-1  Col-2
 1  A      A
 2  C      B
 3  C      D
 4  B      C
 5  D      C
 6  D      D
 7  C      D
 8  A      D
 9  A      A
10  D      C
11  C      D
12  C      C
13  B      B
14  B      B
15  B      D

Summary_of_column_Col-1
   Count  Col-1
1      5  C
2      4  B
3      3  A
4      3  D

Comma_Separated_Values_of_column_Col-1: "A","B","C","D"

Summary_of_column_Col-2
   Count  Col-2
1      6  D
2      4  C
3      3  B
4      2  A

Comma_Separated_Values_of_column_Col-2: "A","B","C","D"
END
 }


if (0) {                                                                        #TisFileUtf8
  my $f = writeFile(undef, "aaa");
  ok isFileUtf8 $f;
 }

if (0) {                                                                        # Needs direct testing on multiple systems - takes too long for normal testing
if (1) {                                                                        #TnewUdsrServer #TnewUdsrClient #TUdsr::read #TUdsr::write #TUdsr::kill
  my $N = 20;
  my $s = newUdsrServer(serverAction=>sub
   {my ($u) = @_;
    my $r = $u->read;
    $u->write(qq(Hello from server $r));
   });

  my $p = newProcessStarter(min(100, $N));                                      # Run some clients
  for my $i(1..$N)



( run in 1.593 second using v1.01-cache-2.11-cpan-71847e10f99 )