Serge
view release on metacpan or search on metacpan
lib/Serge/Util.pm view on Meta::CPAN
sub wrap {
my ($s, $length) = @_;
die "length should be a positive integer" unless $length > 0;
# Wrap by '\n' explicitly
if ($s =~ m{^(.*?(?:\\n|\n))(.+)$}s) {
my $a = $1; # if $1 and $2 are used directly, this won't work
my $b = $2;
return wrap($a, $length), wrap($b, $length);
}
# The following regexp was taken from the Translate Toolkit, file textwrap.py
my @a = split(/(\s+|[^\s\w]*\w+[a-zA-Z]-(?=\w+[a-zA-Z])|(?<=[\w\!\"\'\&\.\,\?])-{2,}(?=\w))/, $s);
my @lines;
my $line = '';
while (scalar(@a) > 0) {
# Take next chunk
my $chunk = shift @a;
# Treat whitespace chunks as zero-width to avoid starting the line with whitespace
my $chunk_length = ($chunk =~ m/^\s*$/) ? 0 : length($chunk);
if (length($line) + $chunk_length > $length) {
push @lines, $line;
# We do not handle the situation when chunk by itself is bigger than $length.
# We can optionally hard-break such chunks into sub-chunks of exact $length
# (this might be an option later)
$line = $chunk;
} else {
$line .= $chunk;
}
}
push @lines, $line if $line ne '';
return @lines;
}
sub read_and_normalize_file {
my ($fname) = @_;
# Reading the entire file
open(SRC, $fname) || die "Can't read [$fname]: $!";
binmode(SRC);
my $data = join('', <SRC>);
close(SRC);
my $decoder = Encode::Guess->guess($data);
if (ref($decoder)) {
my $enc = uc($decoder->name);
# remove BOM
# (not sure why this was done, as BOM is apparently needed for at least UTF-16 decoding;
# so I disabled BOM removal for UTF-16 for now)
$data =~ s/^\xFF\xFE//s if ($enc eq 'UTF-16LE');
#$data =~ s/^\xFE\xFF//s if (($enc eq 'UTF-16BE') || ($enc eq 'UTF-16'));
$data =~ s/^\xFF\xFE\x00\x00//s if ($enc eq 'UTF-32LE');
$data =~ s/^\x00\x00\xFE\xFF//s if (($enc eq 'UTF-32BE') || ($enc eq 'UTF-32'));
$data =~ s/^\xEF\xBB\xBF//s if (($enc eq 'UTF-8') || ($enc eq 'UTF8'));
$data = $decoder->decode($data);
} else {
if ($data =~ m/^<\?xml\s+(.+?)\?>/i) {
my $attrs = $1;
if ($attrs =~ m/encoding=['"](.+?)['"]/i) {
my $enc = uc($1);
#print "\t\tEncoding (from XML header): $enc\n";
# remove BOM
$data =~ s/^\xEF\xBB\xBF//s if (($enc eq 'UTF-8') || ($enc eq 'UTF8'));
$data = decode($enc, $data);
}
} else {
#print "\t\tEncoding (default): ASCII\n"; # $decoder holds the error string
}
}
$data =~ s/\r\n/\n/sg; # normalize line-feeds
return $data;
}
sub file_mtime {
my ($fname) = @_;
my ($dev, $ino, $mode, $nlink, $uid, $gid, $rdev, $size,
$atime, $mtime, $ctime, $blksize, $blocks) = stat($fname);
return $mtime;
}
1;
( run in 0.727 second using v1.01-cache-2.11-cpan-d7f47b0818f )