App-Test-Generator

 view release on metacpan or  search on metacpan

lib/App/Test/Generator/CoverageGuidedFuzzer.pm  view on Meta::CPAN

package App::Test::Generator::CoverageGuidedFuzzer;

use strict;
use warnings;
use Carp    qw(croak);
use feature 'state';
use Readonly;

# --------------------------------------------------
# Fuzzing loop parameters
# --------------------------------------------------
Readonly my $DEFAULT_ITERATIONS   => 100;
Readonly my $CORPUS_MUTATE_RATIO  => 0.70;  # 70% mutate, 30% explore
Readonly my $RANDOM_KEEP_RATIO    => 0.20;  # keep 20% random when no coverage
Readonly my $EDGE_CASE_RATIO      => 0.40;  # 40% chance to use declared edge case
Readonly my $INT_BOUNDARY_RATIO   => 0.30;  # 30% chance to use boundary int
Readonly my $STR_BOUNDARY_RATIO   => 0.30;  # 30% chance to use boundary length
Readonly my $SEED_CORPUS_SIZE     => 5;     # initial random inputs to seed corpus
Readonly my $DEFAULT_MAX_STR_LEN  => 64;
Readonly my $DEFAULT_MAX_ARRAY    => 4;     # max elements in random array (0..N)
Readonly my $INT32_MAX            => 2**31 - 1;
Readonly my $INT32_MIN            => -(2**31);

# --------------------------------------------------
# Type name constants — used in schema dispatch
# --------------------------------------------------
Readonly my $TYPE_INTEGER => 'integer';
Readonly my $TYPE_NUMBER  => 'number';
Readonly my $TYPE_BOOLEAN => 'boolean';
Readonly my $TYPE_ARRAY   => 'arrayref';
Readonly my $TYPE_HASH    => 'hashref';
Readonly my $TYPE_STRING  => 'string';

# --------------------------------------------------
# JSON module preference order
# --------------------------------------------------
Readonly my @JSON_MODULES => qw(JSON::MaybeXS JSON);

our $VERSION = '0.39';

=head1 NAME

App::Test::Generator::CoverageGuidedFuzzer - AFL-style coverage-guided fuzzing for App::Test::Generator

=head1 VERSION

Version 0.39

=head1 SYNOPSIS

    use App::Test::Generator::CoverageGuidedFuzzer;

    my $fuzzer = App::Test::Generator::CoverageGuidedFuzzer->new(
        schema     => $yaml_schema,
        target_sub => \&My::Module::validate,
        iterations => 200,
        seed       => 42,
    );

    my $report = $fuzzer->run();
    $fuzzer->save_corpus('t/corpus/validate.json');

=head1 DESCRIPTION

Implements coverage-guided fuzzing on top of App::Test::Generator's
existing schema-driven input generation. Instead of purely random
generation it:

=over 4

=item 1. Generates or mutates a structured input

=item 2. Runs the target sub under Devel::Cover to capture branch hits

=item 3. Keeps inputs that discover new branches in a corpus

=item 4. Preferentially mutates corpus entries in future iterations

=back

This is the Perl equivalent of what AFL/libFuzzer do at the byte level,
but operating on typed, schema-validated Perl data structures.

=head1 METHODS

=head2 new

Construct a new coverage-guided fuzzer.

    my $fuzzer = App::Test::Generator::CoverageGuidedFuzzer->new(
        schema     => $yaml_schema,
        target_sub => \&My::Module::validate,
        iterations => 200,
        seed       => 42,
        instance   => $obj,   # optional pre-built object for method calls
    );

=head3 Arguments

=over 4

=item * C<schema>

A hashref representing the parsed YAML schema for the target function.
Required.

=item * C<target_sub>

A CODE reference to the function under test. Required.

=item * C<iterations>

Number of fuzzing iterations to run. Optional - defaults to 100.

=item * C<seed>

Random seed for reproducible runs. Optional - defaults to C<time()>.

=item * C<instance>

An optional pre-built object to use as the invocant when calling the
target sub as a method.

=back

=head3 Returns

A blessed hashref. Croaks if C<schema> or C<target_sub> is missing.

=head3 API specification

=head4 input

    {
        schema     => { type => HASHREF },
        target_sub => { type => CODEREF },
        iterations => { type => SCALAR,  optional => 1 },
        seed       => { type => SCALAR,  optional => 1 },
        instance   => { type => OBJECT,  optional => 1 },
    }

=head4 output

    {
        type => OBJECT,
        isa  => 'App::Test::Generator::CoverageGuidedFuzzer',
    }

=cut

sub new {
	my ($class, %args) = @_;

	croak 'schema required'     unless $args{schema};
	croak 'target_sub required' unless $args{target_sub};

	my $self = bless {
		schema     => $args{schema},
		target_sub => $args{target_sub},
		instance   => $args{instance},
		iterations => $args{iterations} // $DEFAULT_ITERATIONS,
		seed       => $args{seed}       // time(),
		corpus     => [],   # [{input => ..., coverage => {...}}]
		covered    => {},   # "file:line:branch" => 1
		bugs       => [],   # [{input => ..., error => ...}]
		stats      => {
			total       => 0,
			interesting => 0,
			bugs        => 0,
			coverage    => 0,
		},
		_cover_available => undef,
	}, $class;

	srand($self->{seed});

	# Probe for Devel::Cover availability once at construction time
	$self->{_cover_available} = eval { require Devel::Cover; 1 } ? 1 : 0;

	# Warn once per process if coverage guidance is unavailable
	state $cover_warned = 0;
	if(!$self->{_cover_available} && !$cover_warned++) {
		warn 'Devel::Cover not available; fuzzing without coverage guidance.';
	}

	return $self;
}

=head2 run

Run the coverage-guided fuzzing loop and return a summary report.

    my $report = $fuzzer->run();
    printf "Branches covered: %d\n", $report->{branches_covered};
    printf "Bugs found:       %d\n", $report->{bugs_found};

=head3 Arguments

None beyond C<$self>.

=head3 Returns

A hashref with keys C<total_iterations>, C<interesting_inputs>,
C<corpus_size>, C<branches_covered>, C<bugs_found>, and C<bugs>.

=head3 API specification

=head4 input

    {
        self => { type => OBJECT, isa => 'App::Test::Generator::CoverageGuidedFuzzer' },
    }

=head4 output

    {
        type => HASHREF,
        keys => {
            total_iterations   => { type => SCALAR  },
            interesting_inputs => { type => SCALAR  },
            corpus_size        => { type => SCALAR  },
            branches_covered   => { type => SCALAR  },
            bugs_found         => { type => SCALAR  },
            bugs               => { type => ARRAYREF },
        },
    }

=cut

sub run {
	my ($self) = @_;

	# Phase 1: seed the corpus with a small set of random inputs
	$self->_seed_corpus();

	# Phase 2: main fuzzing loop — alternate between mutation and exploration
	for my $i (1 .. $self->{iterations}) {
		my $input;

		if(@{ $self->{corpus} } && rand() < $CORPUS_MUTATE_RATIO) {
			# Mutate a randomly chosen corpus entry
			my $parent = $self->{corpus}[ int(rand(@{ $self->{corpus} })) ];
			$input = $self->_mutate($parent->{input});
		} else {
			# Fresh random generation for exploration
			$input = $self->_generate_random();
		}

		$self->_run_one($input);
		$self->{stats}{total}++;
	}

	$self->{stats}{coverage} = scalar keys %{ $self->{covered} };
	return $self->_build_report();
}

=head2 corpus

Return the accumulated corpus as an arrayref of hashrefs with keys
C<input> and C<coverage>.

    my $corpus = $fuzzer->corpus();

=head3 API specification

=head4 input

    { self => { type => OBJECT, isa => 'App::Test::Generator::CoverageGuidedFuzzer' } }

=head4 output

    { type => ARRAYREF }

=cut

sub corpus { $_[0]->{corpus} }

=head2 bugs

Return bugs found as an arrayref of hashrefs with keys C<input> and
C<error>.

    my $bugs = $fuzzer->bugs();

=head3 API specification

=head4 input

    { self => { type => OBJECT, isa => 'App::Test::Generator::CoverageGuidedFuzzer' } }

=head4 output

    { type => ARRAYREF }

=cut

sub bugs { $_[0]->{bugs} }

lib/App/Test/Generator/CoverageGuidedFuzzer.pm  view on Meta::CPAN

# _mutate_hash
#
# Purpose:    Apply a random mutation to one value
#             in a hashref copy.
#
# Entry:      $h - the hashref to mutate.
# Exit:       Returns a mutated hashref copy.
# Side effects: None.
# --------------------------------------------------
sub _mutate_hash {
	my ($self, $h) = @_;

	my %copy = %{$h};
	my @keys = keys %copy;

	# Return unchanged if hash is empty
	return \%copy unless @keys;

	my $k = $keys[ int(rand(@keys)) ];
	$copy{$k} = $self->_mutate($copy{$k});

	return \%copy;
}

# --------------------------------------------------
# _seed_corpus
#
# Purpose:    Pre-populate the corpus with a small
#             set of randomly generated inputs to
#             give the fuzzing loop a starting point.
#
# Entry:      None beyond $self.
# Exit:       Returns nothing. Appends to $self->{corpus}.
# Side effects: Modifies $self->{corpus}.
# --------------------------------------------------
sub _seed_corpus {
	my $self = $_[0];

	for (1 .. $SEED_CORPUS_SIZE) {
		push @{ $self->{corpus} }, {
			input    => $self->_generate_random(),
			coverage => {},
		};
	}
}

# --------------------------------------------------
# _build_report
#
# Purpose:    Construct the summary report hashref
#             returned by run().
#
# Entry:      None beyond $self.
# Exit:       Returns a report hashref.
# Side effects: None.
# --------------------------------------------------
sub _build_report {
	my $self = $_[0];

	return {
		total_iterations   => $self->{stats}{total},
		interesting_inputs => $self->{stats}{interesting},
		corpus_size        => scalar @{ $self->{corpus} },
		branches_covered   => $self->{stats}{coverage},
		bugs_found         => $self->{stats}{bugs},
		bugs               => $self->{bugs},
	};
}

=head1 AUTHOR

Nigel Horne, C<< <njh at nigelhorne.com> >>

Portions of this module's initial design and documentation were created
with the assistance of AI.

=head1 LICENCE AND COPYRIGHT

Copyright 2026 Nigel Horne.

Usage is subject to GPL2 licence terms.
If you use it,
please let me know.

=cut

1;



( run in 0.813 second using v1.01-cache-2.11-cpan-96521ef73a4 )