Data-Table-Text

 view release on metacpan or  search on metacpan

README.md  view on Meta::CPAN


**Example:**

      is_deeply

       [summarizeColumn([map {[$_]} qw(A B D B C D C D A C D C B B D)], 0)],  # 𝗘𝘅𝗮𝗺𝗽𝗹𝗲

       [[5, "D"], [4, "B"], [4, "C"], [2, "A"]];

      ok nws(formatTable
       ([map {[split m//, $_]} qw(AA CB CD BC DC DD CD AD AA DC CD CC BB BB BD)],
        [qw(Col-1 Col-2)],
         summarize=>1)) eq nws(<<'END');

    Summary_of_column                - Count of unique values found in each column                     Use the Geany flick capability by placing your cursor on the first word
    Comma_Separated_Values_of_column - Comma separated list of the unique values found in each column  of these lines and pressing control + down arrow to see each sub report.

        Col-1  Col-2
     1  A      A
     2  C      B
     3  C      D

lib/Data/Table/Text.pm  view on Meta::CPAN

sub fullyQualifyFile($)                                                         # Return the fully qualified name of a file.
 {my ($file) = @_;                                                              # File name
  return $file if fullyQualifiedFile($file);                                    # File is already fully qualified
  absFromAbsPlusRel(&currentDirectory, $file);                                  # Fully qualify file name
 } # fullyQualifyFile

sub removeDuplicatePrefixes($)                                                  # Remove duplicated leading directory names from a file name.
 {my ($file) = @_;                                                              # File name
  return $file unless $file =~ m(/)s;                                           # No path to deduplicate
  return $file if $file =~ m(\A[/.]);                                           # Later
  my ($p, @p) = split m(/), $file;
  shift @p while @p && $p[0] eq $p;
  join "/", $p, @p;
 } # removeDuplicatePrefixes

sub containingFolderName($)                                                     # The name of a folder containing a file.
 {my ($file) = @_;                                                              # File name
  my @p = split m(/), $file;
  return $p[-2] if @p > 1;
  confess "No folder name provided";
 } # containingFolderName
#D2 Position                                                                    # Position in the file system.

sub currentDirectory                                                            # Get the current working directory.
 {renormalizeFolderName(getcwd)
 } # currentDirectory

sub currentDirectoryAbove                                                       # Get the path to the folder above the current working folder.
 {my $path = currentDirectory;
  my @path = split m(/)s, $path;
  shift @path if @path and $path[0] =~ m/\A\s*\Z/;
  @path or confess "No directory above\n:".currentDirectory, "\n";
  pop @path;
  my $r = shift @path;
  filePathDir("/$r", @path);
 } # currentDirectoryAbove

sub parseFileName($)                                                            # Parse a file name into (path, name, extension) considering .. to be always part of the path and using B<undef> to mark missing components.  This differs from (fp, fn, f...
 {my ($file) = @_;                                                              # File name to parse
  defined($file) or confess "File required";

lib/Data/Table/Text.pm  view on Meta::CPAN

  for my $d(findAllFilesAndFolders($folder, 0))                                 # All files and folders beneath the start folder
   {push @f, $d if -d $d;                                                       # Do not include file names
   }
  @f                                                                            # Return folder names
 } # searchDirectoryTreeForSubFolders

sub hashifyFolderStructure(@)                                                   # Hashify a list of file names to get the corresponding folder structure.
 {my (@files) = @_;                                                             # File names
  my %h;
  for my $f(@files)                                                             # Map each file
   {my @f = split m(/), $f;
    my $s = join '', map {q({).dump($_).q(})} @f;                               # Hashify directory structure
    my $c = "\$h$s = ".dump($f);                                                # Load targets
    eval $c;
    confess $@ if $@;
   }
  \%h
 } # hashifyFolderStructure

sub countFileExtensions(@)                                                      # Return a hash which counts the file extensions in and below the folders in the specified list.
 {my (@folders) = @_;                                                           # Folders to search

lib/Data/Table/Text.pm  view on Meta::CPAN


sub overWritePerlCgiFile($$)                                                    # Write a L<Perl> file to /usr/lib/cgi-bin and make it executable after checking it for syntax errors.
 {my ($file, $data) = @_;                                                       # Target file relative to /var/www/html, data to write
  my $s = writeTempFile($data);
  my $r = qx(perl -c $s 2>&1);
  if ($r =~ m(syntax OK)si)
   {my $t = fpf(q(/usr/lib/cgi-bin/), $file);
    say STDERR qx(sudo mv $s $t; chmod o+rx $t);
   }
  else
   {my @data = map {[$_]} split m/\n/, $data;
    say STDERR formatTable([@data]);
    confess "Perl error:\n$r\n";
   }
  unlink $s;
 }

#D2 Copy                                                                        # Copy files and folders. The B<\Acopy.*Md5Normalized.*\Z> methods can be used to ensure that files have collision proof names that collapse duplicate content even when c...

sub copyFile($$)                                                                # Copy the B<$source> file encoded in utf8 to the specified B<$target> file in and return $target.
 {my ($source, $target) = @_;                                                   # Source file, target file

lib/Data/Table/Text.pm  view on Meta::CPAN

  my $sourceFile = fn $source;                                                  # File name component
  return fne($source) if $sourceFile =~ m([0-9a-z]{32}\Z)is;                    # Name already normalized
  my $sourceFileLimited = nameFromString($sourceFile);                          # File name with limited character set
  my $md5 = fileMd5Sum($source);                                                # Normalizing Md5 sum
  fpe($sourceFileLimited.q(_).$md5, fe $source);                                # Normalized name
 }

sub nameFromFolder($)                                                           # Create a name from the last folder in the path of a file name.  Return undef if the file does not have a path.
 {my ($file) = @_;                                                              # File name
  my $p = fp $file;
  my @p = onWindows ? split m(\\), $p : split m(/), $p;
  return $p[-1] if @p;
  undef
 }

sub copyBinaryFile($$)                                                          # Copy the binary file B<$source> to a file named <%target> and return the target file name,.
 {my ($source, $target) = @_;                                                   # Source file, target file
  overWriteBinaryFile($target, readBinaryFile($source));
  $target
 }

lib/Data/Table/Text.pm  view on Meta::CPAN

 }

sub partitionStringsOnPrefixBySize                                              # Partition a hash of strings and associated sizes into partitions with either a maximum size B<$maxSize> or only one element; the hash B<%Sizes> consisting of a mapping ...
 {my ($maxSize, %Sizes) = @_;                                                   # Maximum size of a partition, {string=>size}... hash to be partitioned

  my %paths;                                                                    # Path to each character in each string
  my %sizes;                                                                    # Size associate with each path
  for my $string(sort keys %Sizes)                                              # Create a path of hashes with single character keys
   {my $size = $Sizes{$string};                                                 # Size associated with the string
    my $paths = '';
    my @s = split m(), $string;                                                 # String as single characters
    while(@s)                                                                   # Shorten path
     {my $k = join '', map {qq({'$_'})} @s;                                     # Path of hashes with single character keys
      $paths .= qq(\$paths$k //= {};\n);                                        # Auto vivify
      my $d =  join '', @s;                                                     # Path name
      $sizes{$d} += $size;                                                      # Aggregate size
      pop @s;                                                                   # Move up one level
     }
    $sizes{q()} += $size;                                                       # Total size
    eval $paths;                                                                # Create paths - this level of aggregation seems to give the fastest overall response
    confess "$paths\n$@\n" if $@;                                               # Unable to create path

lib/Data/Table/Text.pm  view on Meta::CPAN

   }->();

  checkKeys(\%options, formatTableCheckKeys);                                   # Check report options

  my ($titleString, $title) = sub                                               # Title string, column headers
   {return (undef, undef) unless defined $columnTitles;                         # No titles
    if (my $r = reftype $columnTitles)                                          # Array of column titles
     {return (undef, $columnTitles) if $r =~ m(\Aarray\Z)si;
     }
    return (q(), q()) unless $columnTitles;                                     # Column titles are not required for hash of hashes
    my @c = map {[split m(\s+), $_, 2]} split "\n", $columnTitles;              # Column definitions
    my $s = &formatTable(\@c, [qw(Column Description)]);                        # Column definitions descriptions table
   ($s, [map {$$_[0]} @c])
   }->();

  my ($a, $h, $o) = (0, 0, 0);                                                  # Check structure of input data tttt
  my $checkStructure = sub
   {for(@_)
     {my $r = reftype($_);                                                      # Process arrays and hashes or objects built on them
      if ($r)
       {if ($r =~ /array/i)   {++$a}

lib/Data/Table/Text.pm  view on Meta::CPAN


  my @html;                                                                     # Generated html
  my $cl = q();                                                                 # Table column names
  my $ct = q();                                                                 # Columns description table if present

  if (my $columns = $options{columns})                                          # Column headers
   {ref($columns) and confess <<END;                                            # Describe column option
Expected one line per column wiith the forst weor dbeing teh column name and
the remainder being a comment describing the comment.
END
    my @c = map {[split m(\s+), $_, 2]} split "\n", $columns;                   # Parse column headers
    $cl = join '', q(<tr><th>), join q(<th>),
      map {my ($c, $d) = @$_; qq(<span title="$d">$c</span>)} @c;               # Column line with tool tips
    $ct = join "\n", q(<p><pre>), formatTableBasic([@c]), qq(</pre></p>\n);     # Column format
   }

  if (my $title = $options{title})                                              # Title
   {push @html, <<END;
<h1>$title</h1>
END
   }

lib/Data/Table/Text.pm  view on Meta::CPAN

sub stringSha256($)                                                             # Get the Sha256 of a string
 {my ($string) = @_;                                                            # String
  if (!defined($string))
    {confess "Undefined string";
    }
  sha256_hex $string;                                                           # Sha256 of string assuming it is ascii only
 }

sub indentString($$)                                                            # Indent lines contained in a string or formatted table by the specified string.
 {my ($string, $indent) = @_;                                                   # The string of lines to indent, the indenting string
  join "\n", map {$indent.$_} split m(\n+), (ref($string) ? $$string  : $string)
 }

sub replaceStringWithString($$$)                                                # Replace all instances in B<$string> of B<$source> with B<$target>.
 {my ($string, $source, $target) = @_;                                          # String in which to replace substrings, the string to be replaced, the replacement string
  for(1..(1+length($string) / (length($source)+1)))                             # Avoid too much recursive expansion
   {my $i = index($string, $source);
    if ($i >= 0)
     {substr($string, $i, length($source)) = $target;
      next;
     }

lib/Data/Table/Text.pm  view on Meta::CPAN

  $string =~ s(\\m) (\n\n)gs;                                                   # Expand \m introduced by update documentation

  for(1..9)
   {if ($string =~ m((B<([^>]*)>))s)
     {$string = replaceStringWithString(my $s = $string, $1, boldString($2));
      last if $s eq $string;
     }
   }

  my @f;
  my @w = split m/\s+/, $string;                                                # Parse string into words
  for my $w(@w)                                                                 # Bold B<string>
   {if (!$f[-1]) {push @f, $w}
    else
     {my $l = $f[-1].qq( $w);
      if (length($l) > $width)
       {push @f, $w;
       }
      else
       {$f[-1] = $l;
       }

lib/Data/Table/Text.pm  view on Meta::CPAN

  return () unless $string;

  my $s = 0;                                                                    # 0 - look for word or quote, 1 in word, 2 in ' string, 3 - in " string
  my @r;
  my $r;

  my $accept = sub                                                              # Accept a word or string
   {push @r, $r; $s = 0;
   };

  for my $c(split m//, $string)                                                 # Each character in the string
   {next if $s == 0 and $c =~ m(\s);                                            # Skip spaces while looking for a word or string

    if ($s == 0)                                                                # String
     {if    ($c =~ m('))                                                        # Single quoted ' string
       {$r = ''; $s = 2;
       }
      elsif ($c =~ m("))                                                        # Double quoted " string
       {$r = ''; $s = 3;
       }
      else                                                                      # Word

lib/Data/Table/Text.pm  view on Meta::CPAN

  my $p = $options{delete};                                                     # Delete option
  $p ? qq( --delete) : q()                                                      # Return delete keyword if profile specified
 }

sub s3ListFilesAndSizes($%)                                                     # Return {file=>size} for all the files in a specified B<$folderOrFile> on S3 using the specified B<%options> if any.
 {my ($folderOrFile, %options) = @_;                                            # Source on S3 - which will be truncated to a folder name, options
  my ($bucket, $folder) = parseS3BucketAndFolderName($folderOrFile);            # Parse an L<s3> bucket/folder name into a bucket and a folder name removing any initial s3://.
  my $profile = s3Profile(%options);                                            # Add profile if specified
  my $getCmd  = qq(aws s3 ls s3://$bucket/$folder $profile --recursive);        # Command to get the sizes of the files to download
  my $files   = qx($getCmd);                                                    # Get the sizes of the files to download
  my @files   = map {my @a = split m/\s+/, $_, 4; [@a[-1, -2, 0, 1]]}           # Files and sizes
                split m/\n/, $files;
  {map {q(s3://).fpf($bucket, $$_[0]) => $_} @files}                            # Hash {file=>[name, size, modified date, modified time]}
 }

sub s3FileExists($%)                                                            # Return (name, size, date, time) for a B<$file> that exists on S3 else () using the specified B<%options> if any.
 {my ($file, %options) = @_;                                                    # File on S3 - which will be truncated to a folder name, options
  my %files = s3ListFilesAndSizes($file, %options);                             # Details of files with that prefix
  return () unless keys %files == 1;                                            # Only one file expected
  my ($f)   = keys %files;                                                      # File name
  my $d     = $files{$f};                                                       # Details of the one file
  return () unless $$d[3];                                                      # All details present

lib/Data/Table/Text.pm  view on Meta::CPAN

sub postProcessImagesForDocumentation(%)                                        # Post process svg images into png and reload into repo for use by documentation. Useful for detailed svg images which can take a long time to load into a browser - it tr...
 {my (%options) = @_;                                                           # Options
  my $log   = $options{log}  // 1;                                              # Show actions if true
  my $size  = $options{size} // 4096;                                           # Longest size of png images to produce from svg
  my $home  = currentDirectory;                                                 # Home folder
  my $dir   = $options{target}  // fpd qw(lib Silicon Chip);                    # Target folder for images
  my $imgs  = fpd $home, $dir;                                                  # Images source folder
     $imgs  = $home if $ENV{GITHUB_TOKEN};                                      # Change folders for github
  my $svg   = fpd $imgs, qw(svg);                                               # Svg folder
  my $png   = fpd $imgs, qw(png);                                               # Png folder
  my ($user, $repo) =  split m(/), $ENV{GITHUB_REPOSITORY}//'';                 # Userid and repo from github

  makePath($png);                                                               # Make png folder

  my @f = searchDirectoryTreesForMatchingFiles $svg, qw(.svg);                  # Svg files from which we make png files

  my @r;                                                                        # Results
  for my $s(@f)                                                                 # Svg files
   {my $t = setFileExtension $s, q(png);
       $t = swapFilePrefix $t, $svg, $png;                                      # Matching png
    my $x = readFile $s;

lib/Data/Table/Text.pm  view on Meta::CPAN

sub syncFromS3InParallel($$$;$$)                                                # Download from L<S3> by using "aws s3 sync --exclude '*' --include '...'" in parallel to sync collections of two or more files no greater then B<$maxSize> or single file...
 {my ($maxSize, $source, $target, $Profile, $options) = @_;                     # The maximum collection size, the source folder on S3, the target folder locally, aws cli profile, aws cli options
                                                                                # See: /home/phil/r/z/partitionStrings.pl for standalone tests
  my ($bucket, $folder) = parseS3BucketAndFolderName($source);                  # Parse an L<s3> bucket/folder name into a bucket and a folder name removing any initial s3://.

  my $profile = $Profile ? qq( --profile $Profile) : q();                       # Add profile if specified
  $options  //= q();                                                            # Default options

  my $getCmd  = qq(aws s3 ls s3://$bucket/$folder $profile --recursive);        # Command to get the sizes of the files to download
  my $files   = qx($getCmd);                                                    # Get the sizes of the files to download
  my @files   = map {my @a = split m/\s+/, $_, 4; [@a[-1, -2]]}                 # Files and sizes
                split m/\n/, $files;
  return unless @files;                                                         # No files to download

  call sub                                                                      # Partition likely to cause a lot of memory fragmentation
   {my %partition = partitionStringsOnPrefixBySize($maxSize, map {@$_} @files); # Partition the download into collections no larger than the specified size

    processSizesInParallel(                                                     # Download folders packing by size
      sub
       {my ($P) = @_;                                                           # Path to folder to download
        return unless keys %partition > 1;                                      # Process in parallel only if there is more than one partition
        my $p = swapFilePrefix($P, $folder);                                    # Remove the folder because it will be added back by the sync command, see:

lib/Data/Table/Text.pm  view on Meta::CPAN

   }

  if ($rest !~ m(/)s)                                                           # File#id
   {return ($file, q(), $rest)
   }

  if ($rest =~ m(\A\./)s)                                                       # File#./id
   {return ($file, $TopicId || q(), $rest =~ s(\A\./) ()r)
   }

  my ($topicId, $id) = split m(/), $rest, 2;
  $topicId = $topicId || $TopicId || q();
  $topicId = $TopicId if $TopicId and $topicId =~ m(\A(\s*|\.)\Z);
  $id    ||= q();

  ($file, $topicId, $id)
 }

sub parseXmlDocType($)                                                          # Parse an L<xml> DOCTYPE and return a hash indicating its components.
 {my ($string) = @_;                                                            # String containing a DOCTYPE

lib/Data/Table/Text.pm  view on Meta::CPAN

  -e $file or confess "No such file: $file";                                    # Check file exists
  fe($file) eq "htm" or confess "htm file expected not $file\n";                # Check extension
  my $o = setFileExtension $file, q(html);                                      # Change extension to html
  owf $o, expandWellKnownWordsAsUrlsInHtmlFormat htmlToc file=>$file;           # Expand urls and add toc to create new file
 }

sub expandWellKnownWordsAsUrlsInMdFormat($)                                     # Expand words found in a string using the md url to supply a definition of that word.
 {my ($string)  = @_;                                                           # String containing url names to expand
  my $wellKnown = wellKnownUrls;                                                # Well known urls to expand

  my @s = split m/```/, $string;                                                # Separate code from text
  my $i = 0;

  for my $s(@s)
   {next  unless ++$i % 2;                                                      # Code is in odd sections
    for my $w(sort keys %$wellKnown)                                            # Expand well known words (lowercased) as html links
     {my ($t, $u) = @{$$wellKnown{$w}};
      $s =~ s(L\[$w\])            ([$t]($u))gis;                                # Explicit link
      $s =~ s(\s$w([.,;:'"]*)\s) ( [$t]($u)$1 )gs;                              # Word that matches
     }
   }

lib/Data/Table/Text.pm  view on Meta::CPAN

     {my (@e) = @_;                                                             # Error strings
      push $errors{$source}->@*, join ' ', @e;
     };

    while(@text)                                                                # Parse text of module
     {my $text = shift @text;

      if ($text =~ m(\A\s*def\s+(.*?)\((.*?)\)\s*:.*?#(\w*)\s+(.*))i)           # Def  function(parameter1 =1, parameter2 = 2) :  # first, second
       {my ($def, $parameters, $attributes, $parameterDefinitions) = @{^CAPTURE};

        my @p = split m/\s*,\s*/, $parameters;
        my @d = split m/\s*,\s*/, $parameterDefinitions;
        my $p = @p; my $d = @d;
        if ($p != $d)
         {my $l = currentLine;
          error qq(Number of parameters specified: $d does not equal),
                qq(number of parameters documented: $d on line: $l)
         }
        else
         {for my $p(@p)
           {my $c = ucfirst shift @d;
               $c =~ s(\.?\s*\Z) ()s;
            push $parameters{$class}{$def}->@*, [$p, $c];
           }
         }

        $comments{$class}{$def} = getDocString
       }
      elsif ($text =~ m(\A\s*def\s+(.*?)\((.*?)\)\s*:)i)                        # Def  function(parameter1 =1, parameter2 = 2) :
       {my ($def, $parameters) = @{^CAPTURE};
        my $doc = $comments{$class}{$def} = getDocString;

        my @p = split m/\s*,\s*/, $parameters;                                  # Parameters defined by a Python subroutine
        my %p;

        for my $line(split m/\n/, $doc)                                         # Check for parameter definitions
         {if ($line =~ m(\A\s*:\s*param\s*(.*?)\s*:\s*(.*?)\s*\Z))
           {my ($parm, $comment) = @{^CAPTURE};
            push $parameters{$class}{$def}->@*, [$parm, $comment];
            $parm =~ s(\A\s*(bool|str)\s*) ()s;                                 # Remove parameter type when present to get parameter name
            $p{$parm} = $comment;
           }
         }

        if (keys %p)                                                            # Use parameter definitions if present
         {if (@p != keys %p)

lib/Data/Table/Text.pm  view on Meta::CPAN

B<Example:>


    is_deeply

     [summarizeColumn([map {[$_]} qw(A B D B C D C D A C D C B B D)], 0)],  # 𝗘𝘅𝗮𝗺𝗽𝗹𝗲

     [[5, "D"], [4, "B"], [4, "C"], [2, "A"]];

    ok nws(formatTable
     ([map {[split m//, $_]} qw(AA CB CD BC DC DD CD AD AA DC CD CC BB BB BD)],
      [qw(Col-1 Col-2)],
       summarize=>1)) eq nws(<<'END');

  Summary_of_column                - Count of unique values found in each column                     Use the Geany flick capability by placing your cursor on the first word
  Comma_Separated_Values_of_column - Comma separated list of the unique values found in each column  of these lines and pressing control + down arrow to see each sub report.

      Col-1  Col-2
   1  A      A
   2  C      B
   3  C      D

lib/Data/Table/Text.pm  view on Meta::CPAN


if (1) {                                                                        # Check files
  my $d = filePath   (my @d = qw(a b c d));                                     #TcheckFile #TmatchPath
  my $f = filePathExt(qw(a b c d e x));                                         #TcheckFile
  my $F = filePathExt(qw(a b c e d));                                           #TcheckFile
  createEmptyFile($f);                                                          #TcheckFile
  ok matchPath($d) eq $d;                                                       #TmatchPath
  ok  eval{checkFile($d)};                                                      #TcheckFile
  ok  eval{checkFile($f)};                                                      #TcheckFile
  ok !eval {checkFile($F)};
  my @m = split m/\n/, $@;
  ok $m[1] eq prefferedFileName "a/b/c/";
  unlink $f;
  ok !-e $f;
  while(@d)                                                                     # Remove path
   {my $d = filePathDir(@d);
    rmdir $d;
    ok onWindows ? 1 : !-d $d;
    pop @d;
   }
 }

lib/Data/Table/Text.pm  view on Meta::CPAN

             [4, 5, 6],
             [7]];
 }

if (1) {                                                                        #TsummarizeColumn
  is_deeply
   [summarizeColumn([map {[$_]} qw(A B D B C D C D A C D C B B D)], 0)],
   [[5, "D"], [4, "B"], [4, "C"], [2, "A"]];

  ok nws(formatTable
   ([map {[split m//, $_]} qw(AA CB CD BC DC DD CD AD AA DC CD CC BB BB BD)],
    [qw(Col-1 Col-2)],
     summarize=>1)) eq nws(<<'END');

Summary_of_column                - Count of unique values found in each column                     Use the Geany flick capability by placing your cursor on the first word
Comma_Separated_Values_of_column - Comma separated list of the unique values found in each column  of these lines and pressing control + down arrow to see each sub report.

    Col-1  Col-2
 1  A      A
 2  C      B
 3  C      D



( run in 1.165 second using v1.01-cache-2.11-cpan-71847e10f99 )