App-Test-Generator

 view release on metacpan or  search on metacpan

bin/extract-schemas  view on Meta::CPAN

    'fuzz|f'          => \$fuzz,
    'fuzz-all'        => \$fuzz_all,
    'fuzz-iters=i'    => \$fuzz_iters,
    'corpus-dir|c=s'  => \$corpus_dir,
    'help|h'          => \$cli_opts{help},
    'man|m'           => \$cli_opts{man},
) or pod2usage(2);

pod2usage(-exitval => 0, -verbose => 1) if $cli_opts{help};
pod2usage(-exitval => 0, -verbose => 2) if $cli_opts{man};

if ($extractor_opts{strict_pod} !~ /^(off|warn|fatal)$/) {
	die "Invalid --strict-pod value '$extractor_opts{strict_pod}'. Expected off, warn, or fatal";
}

my $input_file = shift @ARGV or pod2usage('Error: No input file specified');
die "Error: File not found: $input_file" unless -f $input_file;

# Default corpus dir sits under the output dir
$corpus_dir //= File::Spec->catdir($extractor_opts{output_dir}, 'corpus');

# ---------------------------------------------------------------------------
# Schema extraction
# ---------------------------------------------------------------------------

print "Extracting schemas from: $input_file\n";
print "Output directory: $extractor_opts{output_dir}\n\n";

make_path($extractor_opts{output_dir}) unless -d $extractor_opts{output_dir};

my $extractor = App::Test::Generator::SchemaExtractor->new(
	input_file => $input_file,
	%extractor_opts,
);

my $schemas = $extractor->extract_all();

# ---------------------------------------------------------------------------
# Optional: coverage-guided fuzzing
# ---------------------------------------------------------------------------

my %fuzz_results;   # method_name => report hashref

if ($fuzz) {
    require App::Test::Generator::CoverageGuidedFuzzer;
    make_path($corpus_dir) unless -d $corpus_dir;

    # Load the target module once so all methods are callable
    my $package = _load_target_module($input_file, $schemas);

    # Try to build a default instance for object method calls.
    # Most OO modules need a $self as the first argument.
    # We try new() with no args, then new({}), then give up and fuzz as functions.
    my $instance = _try_construct($package);
    if ($instance) {
        print "Constructed $package instance for method calls.\n";
    } else {
        print "Could not construct $package instance; fuzzing as functions.\n";
    }

    print "Fuzzing with $fuzz_iters iterations per method",
          ($fuzz_all ? ' (all methods)' : ' (methods with known inputs)'),
          "...\n\n";

    foreach my $method (sort keys %$schemas) {
        my $schema = $schemas->{$method};
        my $iconf  = $schema->{_confidence}{input}{level} // 'low';

        unless ($fuzz_all) {
            # Skip methods with no input schema at all — there is nothing to fuzz
            next if $iconf eq 'none' && !%{ $schema->{input} // {} };
        }

        my $sub_ref = $package->can($method);
        unless ($sub_ref) {
            warn "  Skipping $method: not callable in $package\n";
            next;
        }

        # Skip constructors and AUTOLOAD — not suitable for direct fuzzing
        if ($method =~ /^(new|AUTOLOAD|DESTROY|import)$/) {
            print "  Skipping $method (constructor/special method)\n"
                if $extractor_opts{verbose};
            next;
        }

        my $corpus_file = File::Spec->catfile($corpus_dir, "$method.json");

        print "  Fuzzing $method ($iconf confidence)... ";

        my $fuzzer = App::Test::Generator::CoverageGuidedFuzzer->new(
            schema      => $schema,
            target_sub  => $sub_ref,
            instance    => $instance,
            iterations  => $fuzz_iters,
        );

        $fuzzer->load_corpus($corpus_file) if -f $corpus_file;

        my $report = $fuzzer->run();
        $fuzzer->save_corpus($corpus_file);

        $fuzz_results{$method} = $report;

        printf "%d bugs, %d branches covered\n",
            $report->{bugs_found},
            $report->{branches_covered};
    }

    print "\n";
}

# ---------------------------------------------------------------------------
# Summary report
# ---------------------------------------------------------------------------

print '=' x 70, "\n",
      "EXTRACTION SUMMARY\n",
      '=' x 70, "\n\n";

my %input_confidence_counts  = (high => 0, medium => 0, low => 0, none => 0);
my %output_confidence_counts = (high => 0, medium => 0, low => 0, none => 0);

foreach my $method (sort keys %$schemas) {
    my $schema = $schemas->{$method};
    my $iconf  = $schema->{_confidence}{input}{level}  // 'low';
    my $oconf  = $schema->{_confidence}{output}{level} // 'low';
    $input_confidence_counts{$iconf}++;
    $output_confidence_counts{$oconf}++;

    my $param_count = scalar grep { $_ !~ /^_/ } keys %{ $schema->{input} };

    my $fuzz_col = '';
    if (exists $fuzz_results{$method}) {
        my $r = $fuzz_results{$method};
        $fuzz_col = $r->{bugs_found}
            ? sprintf('  BUGS: %d', $r->{bugs_found})
            : '  fuzz: ok';
    }

    printf "%-30s %d params  [%s input confidence] [%s output confidence]%s\n",
        $method, $param_count, uc($iconf), uc($oconf), $fuzz_col;
}

print "\n";
print 'Total methods: ', (scalar keys %$schemas), "\n";
print "  Input:\n";
print "    High confidence:   $input_confidence_counts{high}\n";
print "    Medium confidence: $input_confidence_counts{medium}\n";
print "    Low confidence:    $input_confidence_counts{low}\n";
print "  Output:\n";
print "    High confidence:   $output_confidence_counts{high}\n";
print "    Medium confidence: $output_confidence_counts{medium}\n";
print "    Low confidence:    $output_confidence_counts{low}\n";
print "\n";

bin/extract-schemas  view on Meta::CPAN

# Returns the instance on success, undef if nothing works.
sub _try_construct {
    my ($package) = @_;

    for my $args ([], [{}], [undef]) {
        my $obj = eval { $package->new(@$args) };
        next if $@;
        next unless defined $obj && ref $obj;
        return $obj;
    }

    return undef;
}

__END__

=head1 SCHEMA FORMAT

The generated YAML files have the following structure:

    method: method_name
    confidence: high|medium|low
    notes:
      - Any warnings or suggestions
    input:
      param_name:
        type: string|integer|number|boolean|arrayref|hashref|object
        min: 5
        max: 100
        optional: 0
        matches: /pattern/

=head1 CONFIDENCE LEVELS

=over 4

=item B<high>

Strong evidence from POD and code analysis. Schema should be accurate.

=item B<medium>

Partial information available. Review recommended.

=item B<low>

Limited information. Manual review required.

=back

=head1 EXAMPLES

=head2 Basic Usage

    extract-schemas lib/MyModule.pm

=head2 Fuzz methods with known inputs

    extract-schemas --fuzz lib/MyModule.pm

=head2 Fuzz everything, 300 iterations, custom corpus dir

    extract-schemas --fuzz --fuzz-all --fuzz-iters 300 --corpus-dir t/corpus lib/MyModule.pm

=head2 Incremental fuzzing (corpus grows across runs)

    # First run: builds initial corpus
    extract-schemas --fuzz lib/MyModule.pm

    # Subsequent runs: loads corpus and extends it
    extract-schemas --fuzz lib/MyModule.pm

=head2 Verbose Mode

    extract-schemas --verbose lib/MyModule.pm

=head2 Pod Checking

  --strict-pod=LEVEL
    off    - do not validate POD
    warn   - warn on mismatches (default)
    fatal  - abort on mismatches

=head1 NEXT STEPS

After extracting schemas:

1. Review the generated YAML files, especially those marked low confidence
2. Edit the schemas to add missing information or correct errors
3. Use the schemas with App::Test::Generator:

    test-generator --schema schemas/my_method.yaml

=head1 SEE ALSO

L<App::Test::Generator>, L<App::Test::Generator::CoverageGuidedFuzzer>,
L<PPI>, L<Pod::Simple>

=head1 AUTHOR

Nigel Horne

=cut



( run in 1.085 second using v1.01-cache-2.11-cpan-96521ef73a4 )