App-Test-Generator
view release on metacpan or search on metacpan
lib/App/Test/Generator/Template.pm view on Meta::CPAN
# TODO: add more, and remove magic numbers
use constant {
PROB_LOWERCASE => $config{prob_lowercase} // 0.72,
PROB_EDGE_CASE => $config{prob_edge_case} // 0.4,
};
# Seed for reproducible fuzzing (if provided)
[% seed_code %]
my %input = (
[% input_code %]
);
my %output = (
[% output_code %]
);
my %transforms = (
[% transforms_code %]
);
my @relationships = (
[% relationships_code %]
);
# Candidates for regex comparisons
my @candidate_good = ('123', 'abc', 'A1B2', '0');
my @candidate_bad = (
"ð", # emoji
"ï¼ï¼ï¼", # full-width digits
"١٢٣", # Arabic digits
'..', # regex metachars
"a\nb", # newline in middle
"é", # E acute
'x' x 5000, # huge string
*STDOUT,
' ', # space
"\t", # tab
"\r", # carriage return
# Added later if the configuration says so
# '', # empty
# undef, # undefined
# "\0", # null byte
);
my $positions = populate_positions(\%input);
# --- Fuzzer helpers ---
sub _pick_from {
my $arrayref = $_[0];
return undef unless $arrayref && ref $arrayref eq 'ARRAY' && @{$arrayref};
return $arrayref->[ int(rand(scalar @$arrayref)) ];
}
sub rand_ascii_str {
my $len = shift // int(rand(10)) + 1;
# join '', map { chr(97 + int(rand(26))) } 1..$len;
return Data::Random::String->create_random_string(length => $len, contains => 'alphanumeric');
}
my @unicode_codepoints = (
0x00A9, # ©
0x00AE, # ®
0x03A9, # Ω
0x20AC, # â¬
0x2013, # â (en-dash)
0x0301, # combining acute accent
0x0308, # combining diaeresis
0x1F600, # ð (emoji)
0x1F62E, # ð®
0x1F4A9, # ð© (yes)
);
# Tests for matches or nomatch
my @regex_tests = (
'match123',
'nope',
'/fullpath',
'/',
'/etc/passwd',
'../../etc/passwd',
"/etc/passwd\0",
"D:\\dos_path",
"I:\\",
'/(?{ exit 1 })/',
);
# unified generator to randomly produces codepoint strings,
# grapheme clusters, ZWJ emoji sequences, or aggressive Unicode fuzz strings
sub rand_str
{
my $len = $_[0];
if(!defined($len)) {
$len = int(rand(10)) + 1; # length random number between 1 and 10
}
return '' if($len == 0);
if(!($config{'test_non_ascii'} // 0)) {
return rand_ascii_str($len);
}
# my $rc = _rand_str_basic($len);
# $rc = _rand_unicode_fuzzer($len);
# my $l = Unicode::GCString->new($rc)->length();
# if($len > $l) {
# $rc .= 'a' x ($len - $l); # Why is this needed?
# }
# return $rc;
# TODO: length issues at the moment
my $mode = int(rand(5)); # 0..4
my $rc = _rand_str_basic($len);
$rc = _rand_str_basic($len) if $mode == 0;
$rc = _rand_codepoint_exact($len) if $mode == 1;
$rc = _rand_grapheme_exact($len) if $mode == 2;
$rc = _rand_unicode_fuzzer($len) if $mode == 3;
$rc = rand_ascii_str($len) if($mode == 4);
my $rc_len = Unicode::GCString->new($rc)->length();
if($rc_len > $len) {
my $gcstr = Unicode::GCString->new($rc);
$rc = $gcstr->substr(0, $len)->as_string();
$rc_len = Unicode::GCString->new($rc)->length();
}
if($len > $rc_len) {
$rc .= 'a' x ($len - $rc_len);
$rc_len = Unicode::GCString->new($rc)->length();
}
fail("BUG $rc_len != $len (mode == $mode)") if($rc_len != $len);
return $rc;
}
#####################################################
# 1. EXACT-LENGTH CODEPOINT MODE
# Generate a random string: mostly ASCII, sometimes unicode, sometimes nul bytes or combining marks
#####################################################
sub _rand_str_basic
{
my $len = $_[0];
my @chars;
for (1..$len) {
my $r = rand();
if ($r < PROB_LOWERCASE) {
push @chars, chr(97 + int(rand(26))); # a-z
} elsif ($r < 0.88) {
push @chars, chr(65 + int(rand(26))); # A-Z
} elsif ($r < 0.95) {
push @chars, chr(48 + int(rand(10))); # 0-9
} elsif($r < 0.975) {
push @chars, _rand_unicode_char(); # occasional emoji/marks
} elsif($config{'test_nuls'}) {
push @chars, chr(0); # nul byte injection
} else {
push @chars, chr(97 + int(rand(26))); # a-z
}
}
if (rand() < 0.08) {
# 8% chance to prepend combining acute accent (0301)
$chars[-1] = chr(0x0301);
} elsif (rand() < 0.08) {
# 8% chance to append combining diaeresis (0308)
$chars[-1] .= chr(0x0308);
}
return join('', @chars);
}
#####################################################
# 2. EXACT-LENGTH CODEPOINT MODE
# combining marks decorate characters
#####################################################
sub _rand_codepoint_exact {
my $len = $_[0];
my @chars;
for (1..$len) {
my $c = _rand_base_char();
if(rand() < 0.08) {
# prepend combining acute
$c = chr(0x0301)
} elsif(rand() < 0.08) {
# append combining dieresis
$c = chr(0x0308);
}
push @chars, $c;
}
return join('', @chars);
}
# helper for codepoint mode
sub _rand_base_char {
my $r = rand();
if ($r < 0.70) { return chr(97 + int(rand(26))); }
if ($r < 0.88) { return chr(65 + int(rand(26))); }
if ($r < 0.95) { return chr(48 + int(rand(10))); }
return _rand_unicode_char();
}
#####################################################
# 3. EXACT-LENGTH GRAPHEME-CLUSTER MODE
# each "character" is a whole grapheme cluster:
# - emoji with ZWJ sequences
# - flags
# - skin-tone variants
# - accented characters
#####################################################
sub _rand_grapheme_exact {
my $len = $_[0];
my @clusters;
my @emoji_base = qw(
ð ð ð 𤣠ð
ð ð ð ð¡ ð¥³
ð ð ð ð ð ð© ð§ â¤ï¸ ð«
);
my @skin_tones = (
"\x{1F3FB}", "\x{1F3FC}", "\x{1F3FD}", "\x{1F3FE}", "\x{1F3FF}"
);
my @zwj_parts = (
"\x{200D}\x{1F33A}", # ZWJ + Flower
"\x{200D}\x{1F4BB}", # ZWJ + Laptop
"\x{200D}\x{1F9D1}", # ZWJ + person
);
my @flags = (
"\x{1F1FA}\x{1F1F8}", # US
"\x{1F1EC}\x{1F1E7}", # UK
"\x{1F1E8}\x{1F1E6}", # CA
"\x{1F1E6}\x{1F1FA}", # AU
);
for (1..$len) {
my $type = rand();
if ($type < 0.4) {
# base emoji
my $e = $emoji_base[ rand @emoji_base ];
# TODO
# maybe add skin tone
# $e .= $skin_tones[rand @skin_tones] if rand() < 0.3;
# TODO
# maybe add zwj sequence
# $e .= $zwj_parts[rand @zwj_parts] if rand() < 0.15;
push @clusters, $e;
} elsif ($type < 0.55) {
# flag (always 1 grapheme cluster)
push @clusters, $flags[rand @flags];
} elsif ($type < 0.75) {
# accented letter (composed or decomposed)
my $base = chr(97 + int(rand(26))); # a-z
my $accented = $base . chr(0x0301);
$accented = Unicode::Normalize::NFC($accented) if rand() < 0.5;
push @clusters, $accented;
} else {
# fallback ASCII
push @clusters, chr(97 + int(rand(26)));
}
}
return join('', @clusters);
}
####################################################
# 4. UNICODE FUZZER MODE
# Extremely aggressive: invalid sequences, NULs, bidirectional markers, Zalgo
# Exclude unpaired surrogates for now, since TAP::Harness complains about that
####################################################
sub _rand_unicode_fuzzer {
my $len = $_[0];
my @out;
my @zalgo_up = map { chr($_) } (0x030D..0x036F);
my @bidi = ("\x{202A}", "\x{202B}", "\x{202D}", "\x{202E}", "\x{2066}", "\x{2067}");
my @weird = $config{'test_nuls'} ? ("\x{0000}", "\x{FFFD}", "\x{FEFF}") : ("\x{FFFD}", "\x{FEFF}");
for (1..$len) {
my $r = rand();
if ($r < 0.25) {
# push @out, chr( int(rand(0x10FFFF)) ); # random codepoint will include surragates
# Generate random codepoint, excluding surrogate range
my $cp;
do {
$cp = int(rand(0x10FFFF));
} while ($cp >= 0xD800 && $cp <= 0xDFFF);
push @out, chr($cp);
} elsif ($r < 0.40) {
push @out, chr(65 + int(rand(26))) . $zalgo_up[rand @zalgo_up]; # Zalgo
} elsif ($r < 0.55) {
push @out, $bidi[rand @bidi];
} elsif ($r < 0.70) {
push @out, $weird[rand @weird];
} else {
push @out, _rand_unicode_char();
}
}
return join('', @out);
}
###################################################
# Random Unicode character helper
###################################################
sub _rand_unicode_char
{
if(rand() < 0.5) {
my $cp = $unicode_codepoints[ int(rand(@unicode_codepoints)) ];
return chr($cp);
}
my @pool = (
0x00A9, 0x00AE, 0x2600, 0x2601,
0x1F600 + int(rand(200)), # emoji block
0x0300 + int(rand(80)), # combining marks
);
return chr( $pool[int rand @pool] );
}
# Random character either upper or lower case
# sub rand_char
# {
# return rand_chars(set => 'all', min => 1, max => 1);
# my $char = '';
# my $upper_chars = 'ABCDEFGHIJKLMNOPQRSTUVWXYZ';
# my $lower_chars = 'abcdefghijklmnopqrstuvwxyz';
# my $combined_chars = $upper_chars . $lower_chars;
# # Generate a random index between 0 and the length of the string minus 1
# my $rand_index = int(rand(length($combined_chars)));
# # Get the character at that index
# return substr($combined_chars, $rand_index, 1);
# }
# Integer generator: mix typical small ints with large limits
sub rand_int {
my $r = rand();
if ($r < 0.75) {
return int(rand(200)) - 100; # -100 .. 100 (usual)
} elsif ($r < 0.9) {
return int(rand(2**31)) - 2**30; # 32-bit-ish
} elsif ($r < 0.98) {
return (int(rand(2**63)) - 2**62); # 64-bit-ish
} else {
# very large/suspicious values
return 2**63 - 1;
}
}
sub rand_bool { rand() > 0.5 ? 1 : 0 }
# Number generator (floating), includes tiny/huge floats
sub rand_num {
my $r = rand();
if ($r < 0.7) {
return (rand() * 200 - 100); # -100 .. 100
} elsif ($r < 0.9) {
return (rand() * 1e12) - 5e11; # large-ish
} elsif ($r < 0.95) {
return -0.0; # Negative 0
} elsif ($r < 0.96) {
return (rand() * 1e308) - 5e307; # very large floats
} elsif($r < 0.97) {
return 9**9**9; # Infinity
} else {
return 1e-308 * (rand() * 1000); # tiny float, subnormal-like
}
( run in 1.802 second using v1.01-cache-2.11-cpan-cdf2f3d4e48 )