binmode results from the CPAN

binmode

App-Cheats

view release on metacpan or search on metacpan

https://dev.to/drhyde/a-brief-guide-to-perl-character-encoding-if7

# Perl mojibake examples. (wrong length)
perl -E '$s = "Ã©"; say $s . " contains " . length($s) . " chars"'
Ã© contains 2 chars

# Perl mojibake examples. (utf8 is not enough)
perl -Mutf8 -E '$s = "Ã©"; say $s . " contains " . length($s) . " chars"'
ï¿½ contains 1 chars

# Perl mojibake examples. (-C or binmode to get correct encoding and therefore length)
perl -Mutf8 -E 'binmode(STDOUT, ":encoding(UTF-8)"); $s = "Ã©"; say $s . " contains " . length($s) . " chars"'
perl -Mutf8 -C -E '$s = "Ã©"; say $s . " contains " . length($s) . " chars"'
perl -Mutf8 -C -E 'binmode(STDOUT, ":encoding(UTF-8)"); $s = "Ã©"; say $s . " contains " . length($s) . " chars"'
Ã© contains 1 chars

# Perl mojibake examples. (Simulate malformed UTF-8 character warnings)
echo '"key": "Ã©"' > my.out
iconv -f utf-8 -t latin1 my.out > my2.out
file my*.out
cat my*
    "key": "ï¿½"
    "key": "Ã©"
cat my2.out | perl -Mutf8 -C -lne '/\d/'
cat my2.out | perl -C -lne '/\d/'
    Malformed UTF-8 character: \xe9\x22 (too short; 2 bytes available, need 3) in pattern match (m//) at -e line 1, <> line 1.
    Malformed UTF-8 character: \xe9\x22 (unexpected non-continuation byte 0x22, immediately after start byte 0xe9; need 3 bytes, got 1) in pattern match (m//) at -e line 1, <> line 1.
cat my2.out | perl -Mutf8 -C -ne '/\d/'
cat my2.out | perl -C -ne '/\d/'
perl -C -ne '/\d/' < my2.out
perl -CI -ne '/\d/' < my2.out
perl -ne 'INIT{binmode STDIN, ":utf8"} /\d/; print' < my2.out
    Malformed UTF-8 character: \xe9\x22\x0a (unexpected non-continuation byte 0x22, immediately after start byte 0xe9; need 3 bytes, got 1) in pattern match (m//) at -e line 1, <> line 1.
perl -ne 'INIT{binmode STDIN, ":encoding(UTF-8)"} /\d/; print' < my2.out
    "key": "\xE9"
perl -C -lne 'print utf8::valid($_) ? "valid" : "invalid"' < my.out
    valid
perl -C -lne 'print utf8::valid($_) ? "valid" : "invalid"' < my2.out
    invalid
#
# Summary:
    - A file/string may be declared as utf8, but it really is not.
    - "-CI" is the same as 'binmode STDIN, ":utf8"'
    - ":encoding(UTF-8)" should be preferred over ":utf8"
    - Use "utf8::valid" to check for malformed strings.

# iconv using perl (piconv)
# Saves a file using wrong encoding (mojibake)
perl -CA -le 'open OUT, ">:encoding(latin1)", "my3.out" or die $!; print OUT shift' '"key": "Ã©",'

# Find non ascii characters.
perl -C -lne 'print $1 if /([^[:ascii:]])/' my.yml
uni_convert --string "$(perl -C -lne 'print $1 if /([^[:ascii:]])/' my.csv)"

cheats.txt view on Meta::CPAN

#############################################################
## Perl Modules - bignum
#############################################################

# Convert big numbers into full form
# from scientific notation to expanded form
echo "$b" | perl -Mbignum -lpe '$_ += 0'


#############################################################
## Perl Modules - binmode
#############################################################

# Using unicode in perl STDOUT
perl -CO   script
perl -C    script # Which is same as
perl -CDSL script # S includes I/O
perl -e 'binmode STDOUT, "encoding(UTF-8)"'
perl -e 'binmode STDOUT, ":utf8"'
perl -E 'use open qw/:std :utf8/; say "\N{SNOWFLAKE}"'

# Mixed up encoding.
perl -E '$s = "Ã©"; say length($s) . " $s"'
2 Ã©
perl -C -E '$s = "Ã©"; say length($s) . " $s"'
2 ÃƒÂ©
perl -Mutf8 -E '$s = "Ã©"; say length($s) . " $s"'
1 ï¿½
perl -C -Mutf8 -E '$s = "Ã©"; say length($s) . " $s"'

( run in 0.548 second using v1.01-cache-2.11-cpan-f79bc02f770 )