App-Test-Generator
view release on metacpan or search on metacpan
lib/App/Test/Generator/SchemaExtractor.pm view on Meta::CPAN
$param => {
type => 'object',
optional => 1,
}
};
$schema->{_confidence}{input} = {
level => 'high',
factors => ['Input validated by Scalar::Util::blessed'],
};
} else {
# fallback ONLY if nothing known
$schema->{input} ||= {
value => { type => 'string', optional => 1 },
};
}
};
$schema->{accessor} = {
type => 'getset',
property => $property,
};
$self->_log(" Detected getter/setter accessor for property: $property");
if (my $pod = $method->{pod}) {
if ($pod =~ /\b(LWP::UserAgent(::\w+)*)\b/) {
my $class = $1;
$schema->{output} = {
type => 'object',
isa => $class,
};
$schema->{input}{$property} = {
type => 'object',
isa => $class,
optional => 1,
};
$schema->{_confidence}{output} = {
level => 'high',
factors => ['POD specifies UserAgent object'],
};
}
}
if(ref($schema->{input}) eq 'HASH') {
if(scalar keys(%{$schema->{input}}) > 1) {
croak(__PACKAGE__, ': A getset accessor function can have at most one argument');
}
}
$schema->{input}->{$property}->{position} = 0;
} elsif ($code =~ /return\s+\$self\s*->\s*\{\s*['"]?([^}'"]+)['"]?\s*\}\s*;/) {
# -------------------------------
# Getter
# -------------------------------
my $property = $1;
# Don't flag mutators like
# sub foo {
# my $self = shift;
# $self->{bar} = shift;
# return $self->{bar};
# }
# Only exclude if the property is being set FROM EXTERNAL INPUT
if($code !~ /\$self\s*->\s*\{\s*['"]?\Q$property\E['"]?\s*\}\s*=\s*(?:shift|\$\w+\s*=\s*shift|\@_|\$_\[\d+\])/) {
my @returns = $code =~ /return\b/g;
my @self_returns = $code =~ /return\s+\$self\s*->\s*\{\s*['"]?\Q$property\E['"]?\s*\}/g;
# it's a getter
if (scalar(@returns) == scalar(@self_returns)) {
# all returns are returning $self->{$property}, so it's a getter
$schema->{accessor} = {
type => 'getter',
property => $property,
};
$self->_log(" Detected getter accessor for property: $property");
$schema->{_confidence}{output} = {
level => 'high',
factors => ['Detected getter method'],
};
delete $schema->{input};
}
}
} elsif (
$code =~ /return\s+\$self\b/ &&
$code =~ /\$self\s*->\s*\{\s*['"]?([^}'"]+)['"]?\s*\}\s*=\s*\$(\w+)\s*;/
) {
# -------------------------------
# Setter
# -------------------------------
my ($property, $param) = ($1, $2);
$schema->{accessor} = {
type => 'setter',
property => $property,
param => $param,
};
$self->_log(" Detected setter accessor for property: $property");
$schema->{input} = {
$param => { type => 'string' }, # safe default
};
$schema->{input_style} = 'hash';
$schema->{_confidence}{input} = {
level => 'high',
factors => ['Detected setter/accessor method'],
};
if($schema->{output}{_returns_self}) {
if($schema->{output}{type} ne 'object') {
croak 'Setter can not return data other than $self';
}
if($schema->{output}{isa} ne $self->{_package_name}) {
croak 'Setter can not return data other than $self';
}
} elsif(scalar(keys %{$schema->{output}}) != 0) {
$self->_analysis_error(
method => $method->{name},
message => "Setter cannot return data",
);
}
}
lib/App/Test/Generator/SchemaExtractor.pm view on Meta::CPAN
#
# Entry: $pod - POD string for the method.
# $code - method body source string.
# $method_name - name of the method being
# analysed, used for
# boolean heuristics.
#
# Exit: Returns a hashref describing the
# output type and behaviour, or an empty
# hashref if nothing could be determined.
# Keys include: type, value, isa, and
# various _* metadata keys.
#
# Side effects: Logs progress to stdout when
# verbose is set.
# --------------------------------------------------
sub _analyze_output {
my ($self, $pod, $code, $method_name) = @_;
my %output;
$self->_analyze_output_from_pod(\%output, $pod);
$self->_analyze_output_from_code(\%output, $code, $method_name);
$self->_enhance_boolean_detection(\%output, $pod, $code, $method_name);
$self->_detect_list_context(\%output, $code);
$self->_detect_void_context(\%output, $code, $method_name);
$self->_detect_chaining_pattern(\%output, $code);
$self->_detect_error_conventions(\%output, $code);
$self->_validate_output(\%output) if keys %output;
# Don't return empty output
return (keys %output) ? \%output : {};
}
# --------------------------------------------------
# _analyze_output_from_pod
#
# Purpose: Parse the POD documentation for a
# method's return value and populate
# an output hashref with type, value,
# and behaviour information.
#
# Entry: $output - hashref to populate
# (modified in place).
# $pod - POD string for the method.
#
# Exit: Returns nothing. Modifies $output
# in place.
#
# Side effects: Logs detections to stdout when
# verbose is set.
#
# Notes: Two patterns are tried: (1) a
# 'Returns:' section of up to 3 lines,
# and (2) an inline 'returns X' phrase.
# The section pattern takes precedence.
# --------------------------------------------------
sub _analyze_output_from_pod {
my ($self, $output, $pod) = @_;
my %VALID_OUTPUT_TYPES = map { $_ => 1 }
qw(string integer number float boolean arrayref hashref object coderef void undef);
if ($pod) {
# Pattern 0: =head4 Output formal spec (highest priority â explicit over heuristic)
# The outer container shape determines the return type:
# (...) â list/array of items
# [...] â arrayref (bare [] = empty/void, skip)
# {...} â hashref spec; look for type => inside, or isa => for object
if($pod =~ /=head4\s+Output\b(.*?)(?==head|\z)/si) {
my $block = $1;
$block =~ s/^\s+//;
if($block =~ /^\(/) {
$output->{type} = 'array';
$self->_log(" OUTPUT: type 'array' from =head4 Output list notation");
} elsif($block =~ /^\[/) {
unless($block =~ /^\[\s*\]/) {
$output->{type} = 'arrayref';
$self->_log(" OUTPUT: type 'arrayref' from =head4 Output arrayref notation");
}
} elsif($block =~ /^\{/) {
if($block =~ /type\s*=>\s*['"]?(\w[\w:]*?)['"]?\s*[,}]/i) {
my $type = lc($1);
$type = 'hashref' if $type eq 'hash';
$type = 'arrayref' if $type eq 'array';
if($VALID_OUTPUT_TYPES{$type}) {
$output->{type} = $type;
$self->_log(" OUTPUT: type '$type' from =head4 Output formal spec");
} elsif($block =~ /\bisa\s*=>/) {
$output->{type} = 'object';
$self->_log(" OUTPUT: type 'object' from =head4 Output isa spec");
}
} elsif($block =~ /\bisa\s*=>/) {
$output->{type} = 'object';
$self->_log(" OUTPUT: type 'object' from =head4 Output isa spec");
}
}
}
# Pattern 1: Returns: section
# Up to 3 lines
if ($pod =~ /Returns?:\s+([^\n]+(?:\n[^\n]+){0,2})/si) {
my $returns_desc = $1;
$returns_desc =~ s/^\s+|\s+$//g;
$self->_log(" OUTPUT: Found Returns section: $returns_desc");
# Try to infer type from description (skip if Pattern 0 already set type)
if (!$output->{type} && $returns_desc =~ /\b(string|text)\b/i) {
$output->{type} = 'string';
} elsif (!$output->{type} && $returns_desc =~ /\b(integer|int|count)\b/i) {
$output->{type} = 'integer';
} elsif (!$output->{type} && $returns_desc =~ /\b(float|decimal|number)\b/i) {
$output->{type} = 'number';
} elsif (!$output->{type} && $returns_desc =~ /\b(boolean|true|false)\b/i) {
$output->{type} = 'boolean';
} elsif (!$output->{type} && $returns_desc =~ /\b(array|list)\b/i) {
$output->{type} = 'arrayref';
} elsif (!$output->{type} && $returns_desc =~ /\b(hash|hashref|dictionary)\b/i) {
$output->{type} = 'hashref';
} elsif (!$output->{type} && $returns_desc =~ /\b(object|instance)\b/i) {
$output->{type} = 'object';
} elsif (!$output->{type} && $returns_desc =~ /\bundef\b/i) {
$output->{type} = 'undef';
}
# Look for specific values
if ($returns_desc =~ /\b1\s+(?:on\s+success|if\s+successful)\b/i) {
$output->{value} = 1;
if(defined($output->{'type'}) && ($output->{type} eq 'scalar')) {
$output->{type} = 'boolean';
} else {
$output->{type} ||= 'boolean';
}
$self->_log(" OUTPUT: Returns 1 on success");
} elsif ($returns_desc =~ /\b0\s+(?:on\s+failure|if\s+fail)\b/i) {
$output->{alt_value} = 0;
} elsif ($returns_desc =~ /dies\s+on\s+(?:error|failure)/i) {
$output->{_STATUS} = 'LIVES';
$self->_log(' OUTPUT: Should not die on success');
}
if ($returns_desc =~ /\b(true|false)\b/i) {
$output->{type} ||= 'boolean';
}
if ($returns_desc =~ /\bundef\b/i) {
$output->{optional} = 1;
}
}
# Pattern 2: Inline "returns X"
if((!$output->{type}) && ($pod =~ /returns?\s+(?:an?\s+)?(\w+)/i)) {
my $type = lc($1);
$type = 'boolean' if $type =~ /^(true|false|bool)$/;
# Skip if it's just a number (like "returns 1")
$type = 'integer' if $type eq 'int';
$type = 'number' if $type =~ /^(num|float)$/;
$type = 'arrayref' if $type eq 'array';
$type = 'hashref' if $type eq 'hash';
if($type =~ /^\d+$/) {
if($type eq '1' || $type eq '0') {
# Try hard to guess if the result is a boolean
if($pod =~ /1 on success.+0 (on|if) /i) {
$type = 'boolean';
} elsif($pod =~ /return 0 .+ 1 on success/) {
$type = 'boolean';
} else {
$type = 'integer';
}
} else {
$type = 'integer';
}
}
$type = 'arrayref' if !$type && $pod =~ /returns?\s+.+\slist\b/i;
# $output->{type} = $type if $type && $type !~ /^\d+$/;
if ($VALID_OUTPUT_TYPES{$type}) {
$output->{type} = $type;
$self->_log(" OUTPUT: Inferred type from POD: $type");
} else {
$self->_log(" OUTPUT: POD return type '$type' is not a valid type, ignoring");
}
}
}
}
# --------------------------------------------------
# _extract_defaults_from_pod
#
# Purpose: Extract default values for parameters
# from POD documentation using multiple
# pattern strategies.
#
# Entry: $pod - POD string for the method.
# May be undef or empty.
#
# Exit: Returns a hashref of parameter name
# to cleaned default value. Returns an
# empty hashref if no POD is provided
# or no defaults are found.
#
# Side effects: None.
#
# Notes: Three strategies are tried: (1) lines
# containing 'Default:' or 'Defaults to:',
# (2) lines containing 'Optional, default',
# (3) inline $name - type, default value
# format. Parameter names are inferred
# by scanning backwards from the default
# phrase to the nearest $variable.
# --------------------------------------------------
sub _extract_defaults_from_pod {
my ($self, $pod) = @_;
return {} unless $pod;
my %defaults;
# Pattern 1: Default: 'value' or Defaults to: 'value'
while ($pod =~ /(?:Default(?:s? to)?|default(?:s? to)?)[:]\s*([^\n\r]+)/gi) {
my $default_text = $1;
my $match_pos = pos($pod);
$default_text =~ s/^\s+|\s+$//g;
# Look backwards in the POD to find the parameter name
my $context = substr($pod, 0, $match_pos);
my @param_matches = ($context =~ /\$(\w+)/g);
my $param = $param_matches[-1] if @param_matches; # Last parameter before default
if ($param) {
# Always clean the default value - let _clean_default_value handle everything
if ($default_text =~ /(\w+)\s*=\s*(.+)$/) {
# Has explicit param = value format in the default text
my ($p, $value) = ($1, $2);
$defaults{$p} = $self->_clean_default_value($value);
} else {
# Just a value, associate with the found param
$defaults{$param} = $self->_clean_default_value($default_text, 0); # NOT from code
}
}
}
lib/App/Test/Generator/SchemaExtractor.pm view on Meta::CPAN
$default_text =~ s/^\s+|\s+$//g;
# Look backwards for parameter name
my $context = substr($pod, 0, $match_pos);
my @param_matches = ($context =~ /\$(\w+)/g);
if (@param_matches) {
my $param = $param_matches[-1]; # Last parameter before the default
$defaults{$param} = $self->_clean_default_value($default_text, 0);
}
}
# Pattern 3: In parameter descriptions: $param - type, default 'value'
while ($pod =~ /\$(\w+)\s*-\s*\w+(?:\([^)]*\))?[,\s]+default\s+['"]?([^'",\n]+)['"]?/gi) {
my ($param, $value) = ($1, $2);
$defaults{$param} = $self->_clean_default_value($value, 0);
}
return \%defaults;
}
# --------------------------------------------------
# _analyze_output_from_code
#
# Purpose: Analyse return statements in a method
# body to infer the output type by
# counting and classifying each return
# expression.
#
# Entry: $output - hashref to populate
# (modified in place).
# $code - method body source string.
# $method_name - method name string.
#
# Exit: Returns nothing. Modifies $output
# in place.
#
# Side effects: Logs detections to stdout when
# verbose is set.
# --------------------------------------------------
sub _analyze_output_from_code
{
my ($self, $output, $code, $method_name) = @_;
if ($code) {
# Early boolean detection - check for consistent 1/0 returns
my @all_returns = $code =~ /return\s+([^;]+);/g;
if (@all_returns) {
my $boolean_count = 0;
my $total_count = scalar(@all_returns);
foreach my $ret (@all_returns) {
$ret =~ s/^\s+|\s+$//g;
# Match 0 or 1, even with conditions
$boolean_count++ if ($ret =~ /^(?:0|1)(?:\s|$)/);
}
# If most returns are 0 or 1, strongly suggest boolean
if ($boolean_count >= 2 && $boolean_count >= $total_count * 0.8) {
unless ($output->{type}) {
$output->{type} = 'boolean';
$self->_log(" OUTPUT: Early detection - $boolean_count/$total_count returns are 0/1, setting boolean");
}
}
}
my @return_statements;
if ($code =~ /return\s+bless\s*\{[^}]*\}\s*,\s*['"]?(\w+)['"]?/s) {
# Detect blessed refs
$output->{type} = 'object';
if($method_name eq 'new') {
# If we found the new() method, the object we're returning should be a sensible one
if($self->{_document} && (my $package_stmt = $self->{_document}->find_first('PPI::Statement::Package'))) {
$output->{isa} = $package_stmt->namespace();
$self->{_package_name} //= $output->{isa};
}
} else {
$output->{isa} = $1;
}
$self->_log(" OUTPUT: Bless found, inferring type from code is $output->{isa}");
} elsif ($code =~ /return\s+bless/s) {
$output->{type} = 'object';
if($method_name eq 'new') {
$output->{isa} = $self->_extract_package_name();
$self->_log(" OUTPUT: Bless found, inferring type from code is $output->{isa}");
} else {
$self->_log(' OUTPUT: Bless found, inferring type from code is object');
}
} elsif ($code =~ /return\s*\(\s*[^)]+\s*,\s*[^)]+\s*\)\s*;/) {
# Detect array context returns - must end with semicolon to be actual return
$output->{type} = 'array'; # Not arrayref - actual array
$self->_log(' OUTPUT: Found array contect return');
} elsif ($code =~ /return\s+bless[^,]+,\s*__PACKAGE__/) {
# Detect: bless {}, __PACKAGE__
$output->{type} = 'object';
# Get package name from the extractor's stored document
if ($self->{_document}) {
my $pkg = $self->{_document}->find_first('PPI::Statement::Package');
$output->{isa} = $pkg ? $pkg->namespace : 'UNKNOWN';
$self->_log(' OUTPUT: Object blessed into __PACKAGE__: ' . ($output->{isa} || 'UNKNOWN'));
$self->{_package_name} //= $output->{isa};
}
} elsif ($code =~ /return\s*\(([^)]+)\)/) {
my $content = $1;
if ($content =~ /,/) { # Has comma = multiple values
$output->{type} = 'array';
}
} elsif ($code =~ /return\s+\$self\s*;/ && $code =~ /\$self\s*->\s*\{[^}]+\}\s*=/) {
# Returns $self for chaining
$output->{type} = 'object';
if ($self->{_document}) {
my $pkg = $self->{_document}->find_first('PPI::Statement::Package');
$output->{isa} = $pkg ? $pkg->namespace : 'UNKNOWN';
$self->_log(' OUTPUT: Object chained into __PACKAGE__: ' . ($output->{isa} || 'UNKNOWN'));
$self->{_package_name} //= $output->{isa};
}
}
# Find all return statements
while ($code =~ /return\s+([^;]+);/g) {
my $return_expr = $1;
push @return_statements, $return_expr;
}
if (@return_statements) {
$self->_log(' OUTPUT: Found ' . scalar(@return_statements) . ' return statement(s)');
# Analyze return patterns
my %return_types;
if($output->{'type'}) {
$return_types{$output->{'type'}} += 3; # Add weighting to what's already been found
}
my $min;
foreach my $ret (@return_statements) {
$ret =~ s/^\s+|\s+$//g;
# Literal values
if ($ret eq '1' || $ret eq '0') {
$return_types{boolean}++;
} elsif ($ret =~ /^['"]/) {
$return_types{string}++;
} elsif ($ret =~ /^-?\d+$/) {
$return_types{integer}++;
} elsif ($ret =~ /^-?\d+\.\d+$/) {
$return_types{number}++;
} elsif ($ret eq 'undef') {
$return_types{undef}++;
} elsif ($ret =~ /^\[/) {
# Data structures
$return_types{arrayref}++;
} elsif ($ret =~ /^\{/) {
$return_types{hashref}++;
} elsif ($ret =~ m{
# Numeric expressions (heuristic, medium confidence)
# Don't match ->
(?:
\+ | -\b | \* | / | %
| \+\+ | --
)
}x) {
$return_types{number} += 2;
} elsif ($ret =~ /\|\|\s*\d+\b/) {
# Logical-or fallback with numeric literal (e.g. $x || 200)
$return_types{integer} += 2;
$self->_log(" OUTPUT: Numeric fallback expression detected");
} elsif($ret =~ /^length[\s\(]/) {
$return_types{integer}++;
$min = 0;
} elsif($ret =~ /^pos[\s\(]/) {
$return_types{integer}++;
$min = 0;
} elsif($ret =~ /^index[\s\(]/) {
$return_types{integer}++;
$min = -1;
} elsif($ret =~ /^rindex[\s\(]/) {
$return_types{integer}++;
$min = -1;
} elsif($ret =~ /^ord[\s\(]/) {
$return_types{integer}++;
} elsif ($ret =~ /=/ && $ret =~ /\$\w+/) {
# Assignment returning a value (e.g. $self->{status} = $status)
# If assignment involves a numeric literal or variable, assume numeric intent
if ($ret =~ /\b\d+\b/) {
$return_types{integer} += 2;
$self->_log(" OUTPUT: Assignment with numeric value detected");
} else {
$return_types{scalar}++;
}
}
# Variables/expressions
elsif ($ret =~ /\$\w+/) {
if ($ret =~ /\\\@/) {
$return_types{arrayref}++;
} elsif ($ret =~ /\\\%/) {
$return_types{hashref}++;
} elsif ($ret =~ /bless/) {
$return_types{object} += 2; # Heigher weight
} elsif ($ret =~ /^\{[^}]*\}$/) {
$return_types{hashref}++;
} elsif ($ret =~ /^\[[^\]]*\]$/) {
$return_types{arrayref}++;
} else {
$return_types{scalar}++;
}
}
}
# Determine most common return type
if (keys %return_types) {
my ($most_common) = sort { $return_types{$b} <=> $return_types{$a} } keys %return_types;
# Prefer integer over scalar if numeric returns dominate
if ($return_types{integer} && (!$return_types{string})) {
if (!$output->{type} || $output->{type} eq 'scalar') {
$output->{type} = 'integer';
$self->_log(" OUTPUT: Numeric returns dominate, forcing integer");
$output->{_type_confidence} ||= 'low';
if(defined($min)) {
$output->{min} = $min;
}
}
}
unless ($output->{type}) {
$output->{type} = $most_common;
# Assign confidence for inferred numeric expressions
if ($most_common eq 'number') {
$output->{_type_confidence} ||= 'medium';
if(defined($min)) {
$output->{min} = $min;
}
}
$self->_log(" OUTPUT: Inferred type from code: $most_common");
}
}
# Check for consistent single value returns
if (@return_statements == 1 && $return_statements[0] eq '1') {
$output->{value} = 1;
$output->{type} = 'boolean' if !$output->{type} || $output->{type} eq 'scalar';
$self->_log(" OUTPUT: Type already set to '$output->{type}', overriding with boolean") if($output->{'type'});
}
} else {
# No explicit return - might return nothing or implicit undef
$self->_log(" OUTPUT: No explicit return statement found");
}
}
}
# --------------------------------------------------
# _enhance_boolean_detection
#
# Purpose: Apply additional boolean-specific
# detection heuristics using a weighted
# scoring system, to override weak
# type assignments when there is strong
# evidence of a boolean return.
#
# Entry: $output - output hashref
# (modified in place).
# $pod - POD string.
# $code - method body source string.
# $method_name - method name string.
#
# Exit: Returns nothing. Modifies $output
# in place, setting type to 'boolean'
# if the score reaches
# $BOOLEAN_SCORE_THRESHOLD.
#
# Side effects: Logs scoring details to stdout when
# verbose is set.
#
# Notes: Only fires when output type is
# not yet set or is 'unknown'. Does not
# override explicitly set types.
# --------------------------------------------------
sub _enhance_boolean_detection {
my ($self, $output, $pod, $code, $method_name) = @_;
my $boolean_score = 0; # Track evidence for boolean return
return unless !$output->{type} || $output->{type} eq 'unknown';
# Look for stronger boolean indicators
if ($pod && !$output->{type}) {
# Common boolean return patterns in POD
if ($pod =~ /returns?\s+(true|false|true|false|1|0)\s+(?:on|for|upon)\s+(success|failure|error|valid|invalid)/i) {
$boolean_score += 30;
$self->_log(' OUTPUT: Strong boolean indicator in POD (+30)');
}
# Check for method names that suggest boolean returns
if ($pod =~ /(?:method|sub)\s+(\w+)/) {
my $inferred_method_name = $1;
if ($inferred_method_name =~ /^(is_|has_|can_|should_|contains_|exists_)/) {
$boolean_score += 20;
$self->_log(" OUTPUT: Inferred method name '$inferred_method_name' suggests boolean return (+20)");
}
}
}
# Analyze code for boolean patterns
if ($code) {
# Count boolean return idioms
my $true_returns = () = $code =~ /return\s+1\s*;/g;
my $false_returns = () = $code =~ /return\s+0\s*;/g;
if ($true_returns + $false_returns >= 2) {
$boolean_score += 40;
$self->_log(' OUTPUT: Multiple 1/0 returns suggest boolean (+40)');
} elsif ($true_returns + $false_returns == 1) {
$boolean_score += 10;
$self->_log(' OUTPUT: Single 1/0 return (+10)');
}
# Ternary operators that return booleans
if ($code =~ /return\s+(?:\w+\s*[!=]=\s*\w+|\w+\s*>\s*\w+|\w+\s*<\s*\w+)\s*\?\s*(?:1|0)\s*:\s*(?:1|0)/) {
$boolean_score += 25;
$self->_log(' OUTPUT: Ternary with 1/0 suggests boolean (+25)');
}
# Check for common boolean method patterns
if ($code =~ /return\s+[!\$\@\%]/) {
# Returns negation or existence check
$boolean_score += 15;
$self->_log(' OUTPUT: Returns negation/existence check (+15)');
}
}
# Check method name for boolean indicators
if ($method_name) {
if ($method_name =~ /^(is_|has_|can_|should_|contains_|exists_|check_|verify_|validate_)/) {
$boolean_score += 25;
$self->_log(" OUTPUT: Method name '$method_name' suggests boolean return (+25)");
}
if ($method_name =~ /_ok$/) {
$boolean_score += 30;
$self->_log(" OUTPUT: Method name '$method_name' ends with '_ok' (+30)");
}
}
# Apply boolean type if we have strong evidence
# Override weak type assignments (like 'array' from false positive)
if($boolean_score >= $BOOLEAN_SCORE_THRESHOLD) {
if (!$output->{type} || $output->{type} eq 'scalar' || $output->{type} eq 'array' || $output->{type} eq 'undef') {
my $old_type = $output->{type} || 'none';
$output->{type} = 'boolean';
$self->_log(" OUTPUT: Boolean score $boolean_score >= $BOOLEAN_SCORE_THRESHOLD, setting type to boolean (was: $old_type)");
}
}
}
# --------------------------------------------------
# _detect_list_context
#
# Purpose: Detect methods that return different
# values depending on calling context
# via wantarray, and methods that
# return explicit lists.
#
# Entry: $output - output hashref (modified
# in place).
# $code - method body source string.
#
# Exit: Returns nothing. Modifies $output
# in place, setting _context_aware,
# _list_context, _scalar_context,
# _list_return, and/or type keys.
#
# Side effects: Logs detections to stdout when
# verbose is set.
# --------------------------------------------------
sub _detect_list_context {
my ($self, $output, $code) = @_;
return unless $code;
# Check for wantarray usage
if ($code =~ /wantarray/) {
$output->{_context_aware} = 1;
$self->_log(' OUTPUT: Method uses wantarray - context sensitive');
# Debug: show what we're matching against
if ($code =~ /(wantarray[^;]+;)/s) {
$self->_log(" DEBUG wantarray line: $1");
}
if ($code =~ /wantarray\s*\?\s*\(([^)]+)\)\s*:\s*([^;]+)/s) {
# Pattern 1: wantarray ? (list, items) : scalar_value (with parens)
my ($list_return, $scalar_return) = ($1, $2);
$self->_log(" DEBUG list (with parens): [$list_return], scalar: [$scalar_return]");
$output->{_list_context} = $self->_infer_type_from_expression($list_return);
$output->{_scalar_context} = $self->_infer_type_from_expression($scalar_return);
$self->_log(' OUTPUT: Detected context-dependent returns (parenthesized)');
} elsif ($code =~ /wantarray\s*\?\s*([^:]+?)\s*:\s*([^;]+)/s) {
# Pattern 2: wantarray ? @array : scalar (no parens around list)
my ($list_return, $scalar_return) = ($1, $2);
# Clean up
$list_return =~ s/^\s+|\s+$//g;
$scalar_return =~ s/^\s+|\s+$//g;
$self->_log(" DEBUG list (no parens): [$list_return], scalar: [$scalar_return]");
$output->{_list_context} = $self->_infer_type_from_expression($list_return);
$output->{_scalar_context} = $self->_infer_type_from_expression($scalar_return);
$self->_log(' OUTPUT: Detected context-dependent returns (non-parenthesized)');
} elsif ($code =~ /return[^;]*unless\s+wantarray.*?return\s*\(([^)]+)\)/s) {
# Pattern 3: return unless wantarray; return (list);
$output->{_list_context} = { type => 'array' };
$self->_log(' OUTPUT: Detected list context return after wantarray check');
}
}
# Detect explicit list returns (multiple values in parentheses)
# Avoid false positives from function calls
if ($code =~ /return\s*\(\s*([^)]+)\s*\)\s*;/) {
my $content = $1;
# Count commas outside of nested structures
my $comma_count = 0;
my $depth = 0;
for my $char (split //, $content) {
$depth++ if $char eq '(' || $char eq '[' || $char eq '{';
$depth-- if $char eq ')' || $char eq ']' || $char eq '}';
$comma_count++ if $char eq ',' && $depth == 0;
}
if ($comma_count > 0 && $content !~ /\b(?:bless|new)\b/) {
# Multiple values returned
unless ($output->{type} && $output->{type} eq 'boolean') {
$output->{type} = 'array';
$output->{_list_return} = $comma_count + 1;
$self->_log(' OUTPUT: Returns list of ' . ($comma_count + 1) . ' values');
}
}
}
}
# --------------------------------------------------
# _detect_void_context
#
# Purpose: Detect methods that return nothing
# meaningful (void context), methods
# that always return 1 as a success
# indicator, and methods whose name
# suggests void context (setters,
# mutators, loggers).
#
# Entry: $output - output hashref
# (modified in place).
# $code - method body source string.
# $method_name - method name string.
#
# Exit: Returns nothing. Modifies $output
# in place, setting _void_context,
# _success_indicator, and/or type.
#
# Side effects: Logs detections to stdout when
# verbose is set.
# --------------------------------------------------
sub _detect_void_context {
my ($self, $output, $code, $method_name) = @_;
return unless $code;
$self->_log(" DEBUG _detect_void_context called for $method_name");
# Methods that typically don't return meaningful values
my $void_patterns = {
'setter' => qr/^set_\w+$/,
'mutator' => qr/^(?:add|remove|delete|clear|reset|update)_/,
'logger' => qr/^(?:log|debug|warn|error|info)$/,
'printer' => qr/^(?:print|say|dump)_/,
};
# Check if method name suggests void context
foreach my $type (keys %$void_patterns) {
if ($method_name =~ $void_patterns->{$type}) {
$output->{_void_context_hint} = $type;
$self->_log(" OUTPUT: Method name suggests $type (typically void context)");
last;
}
}
# Analyze return statements
my @returns = $code =~ /return\s*([^;]*);/g;
$self->_log(' DEBUG Found ' . scalar(@returns) . ' return statements');
# Count different return patterns
my $no_value_returns = 0;
my $true_returns = 0;
my $self_returns = 0;
foreach my $ret (@returns) {
$ret =~ s/^\s+|\s+$//g;
$self->_log(" DEBUG return value: [$ret]");
$no_value_returns++ if $ret eq '';
$no_value_returns++ if($ret =~ /^(if|unless)\s/);
$true_returns++ if $ret eq '1';
$self_returns++ if $ret eq '$self';
if ($ret =~ /\?\s*1\s*:\s*0\b/) {
# Strong boolean signal: ternary returning 1/0
$true_returns++;
# $self->_log(" OUTPUT: Ternary 1:0 return detected, treating as boolean (+40)");
$self->_log(' OUTPUT: Ternary 1:0 return detected, treating as boolean');
}
}
my $total_returns = scalar(@returns);
$self->_log(" DEBUG no_value=$no_value_returns, true=$true_returns, self=$self_returns, total=$total_returns");
# Void context indicators
if ($no_value_returns > 0 && $no_value_returns == $total_returns) {
$output->{_void_context} = 1;
$output->{type} = 'void'; # This should override any previous type
$self->_log(' OUTPUT: All returns are empty - void context method');
} elsif ($true_returns > 0 && $true_returns == $total_returns && $total_returns >= 1) {
# Methods that always return true (success indicator)
$output->{_success_indicator} = 1;
# Don't override type if already set to boolean
unless ($output->{type} && $output->{type} eq 'boolean') {
$output->{type} = 'boolean';
}
$self->_log(' OUTPUT: Always returns 1 - success indicator pattern');
}
}
# --------------------------------------------------
# _detect_chaining_pattern
#
# Purpose: Detect methods that return $self for
# fluent interface chaining, by counting
# the proportion of return statements
# that return $self.
#
# Entry: $output - output hashref (modified
# in place).
# $code - method body source string.
#
# Exit: Returns nothing. Modifies $output
# in place, setting type to 'object',
# _returns_self to 1, and isa to the
# current package name when the
# proportion of $self returns is >= 0.8.
#
# Side effects: Logs detection to stdout when
# verbose is set.
# --------------------------------------------------
sub _detect_chaining_pattern {
my ($self, $output, $code) = @_;
return unless $code;
# Count returns of $self
my $self_returns = 0;
my $total_returns = 0;
while ($code =~ /return\s+([^;]+);/g) {
my $ret = $1;
$ret =~ s/^\s+|\s+$//g;
$total_returns++;
$self_returns++ if $ret eq '$self';
}
# If most/all returns are $self, it's a chaining method
if ($self_returns > 0 && $total_returns > 0) {
my $ratio = $self_returns / $total_returns;
if ($ratio >= 0.8) {
$output->{type} = 'object';
$output->{_returns_self} = 1;
# Get the class name
if ($self->{_document}) {
my $pkg = $self->{_document}->find_first('PPI::Statement::Package');
$output->{isa} = $pkg ? $pkg->namespace : 'UNKNOWN';
$self->{_package_name} //= $output->{isa};
}
$self->_log(" OUTPUT: Chainable method - returns \$self ($self_returns/$total_returns returns)");
}
}
}
# --------------------------------------------------
# _detect_error_conventions
#
# Purpose: Analyse how a method signals errors
# by detecting patterns such as
# 'return undef if', implicit bare
# returns, empty list returns, 0/1
# boolean error patterns, and eval
# exception handling.
#
# Entry: $output - output hashref (modified
# in place).
# $code - method body source string.
#
# Exit: Returns nothing. Modifies $output
# in place, setting _error_handling,
# _error_return, and
# _success_failure_pattern keys.
#
# Side effects: Logs detections to stdout when
# verbose is set.
# --------------------------------------------------
sub _detect_error_conventions {
my ($self, $output, $code) = @_;
return unless $code;
$self->_log(' DEBUG _detect_error_conventions called');
my %error_patterns;
# Pattern 1: return undef if/unless condition
while ($code =~ /return\s+undef\s+(?:if|unless)\s+([^;]+);/g) {
push @{$error_patterns{undef_on_error}}, $1;
$self->_log(" DEBUG Found 'return undef' pattern");
}
# Pattern 2: return if/unless (implicit undef)
while ($code =~ /return\s+(?:if|unless)\s+([^;]+);/g) {
push @{$error_patterns{implicit_undef}}, $1;
$self->_log(" DEBUG Found implicit undef pattern");
}
# Pattern 3: return () - matches with or without conditions
if ($code =~ /return\s*\(\s*\)\s*(?:if|unless|;)/) {
$error_patterns{empty_list} = 1;
$self->_log(" DEBUG Found empty list return");
}
# Pattern 4: return 0/1 pattern (indicates boolean with error handling)
my $zero_returns = 0;
my $one_returns = 0;
# Match "return 0" or "return 1" followed by anything (condition or semicolon)
while ($code =~ /return\s+(0|1)\s*(?:;|if|unless)/g) {
if ($1 eq '0') {
$zero_returns++;
} else {
$one_returns++;
}
}
if ($zero_returns > 0 && $one_returns > 0) {
$error_patterns{zero_on_error} = 1;
$self->_log(" DEBUG Found 0/1 return pattern ($zero_returns zeros, $one_returns ones)");
}
# Pattern 5: Exception handling with eval
if ($code =~ /eval\s*\{/) {
# Check if there's error handling after eval
if ($code =~ /eval\s*\{.*?\}[^}]*(?:if\s*\(\s*\$\@|catch|return\s+undef)/s) {
$error_patterns{exception_handling} = 1;
$self->_log(' DEBUG Found exception handling with eval');
}
}
# Detect success/failure return pattern
my @all_returns = $code =~ /return\s+([^;]+);/g;
my $has_undef = grep { /^\s*undef\s*(?:if|unless|$)/ } @all_returns;
my $has_value = grep { !/^\s*undef\s*$/ && !/^\s*$/ } @all_returns;
if ($has_undef && $has_value && scalar(@all_returns) >= 2) {
$output->{_success_failure_pattern} = 1;
$self->_log(" OUTPUT: Uses success/failure return pattern");
}
# Store error conventions in output
if(scalar(keys %error_patterns)) {
$output->{_error_handling} = \%error_patterns;
# Determine primary error convention
if ($error_patterns{undef_on_error}) {
$output->{_error_return} = 'undef';
$self->_log(" OUTPUT: Returns undef on error");
} elsif ($error_patterns{implicit_undef}) {
$output->{_error_return} = 'undef';
$self->_log(" OUTPUT: Returns implicit undef on error");
} elsif ($error_patterns{empty_list}) {
$output->{_error_return} = 'empty_list';
$self->_log(" OUTPUT: Returns empty list on error");
} elsif ($error_patterns{zero_on_error}) {
$output->{_error_return} = 'false';
$self->_log(" OUTPUT: Returns 0/false on error");
}
if ($error_patterns{exception_handling}) {
$self->_log(" OUTPUT: Has exception handling");
}
} else {
delete $output->{_error_handling};
}
}
# --------------------------------------------------
# _infer_type_from_expression
#
# Purpose: Infer the data type of a return
# expression string by matching it
# against common Perl literal and
# variable patterns.
#
# Entry: $expr - return expression string,
# trimmed of leading and
# trailing whitespace.
# May be undef.
#
# Exit: Returns a type hashref of the form
# { type => '...' } and optionally
# { min => N }. Defaults to
# { type => 'scalar' } when no
# pattern matches.
#
# Side effects: None.
# --------------------------------------------------
sub _infer_type_from_expression {
my ($self, $expr) = @_;
return { type => 'scalar' } unless defined $expr;
$expr =~ s/^\s+|\s+$//g;
# Check for multiple comma-separated values (indicates array/list)
if ($expr =~ /,/) {
my $comma_count = 0;
my $depth = 0;
for my $char (split //, $expr) {
$depth++ if $char =~ /[\(\[\{]/;
$depth-- if $char =~ /[\)\]\}]/;
$comma_count++ if $char eq ',' && $depth == 0;
}
if ($comma_count > 0) {
return { type => 'array' };
}
}
# Check for @ prefix (array)
if ($expr =~ /^\@\w+/ || $expr =~ /^qw\(/ || $expr =~ /^\@\{/) {
return { type => 'array' };
}
# Check for scalar() function - returns count
if ($expr =~ /scalar\s*\(/) {
return { type => 'integer', min => 0 };
}
# Check for array reference
lib/App/Test/Generator/SchemaExtractor.pm view on Meta::CPAN
return { type => 'string' };
}
# Check for booleans first â must come before the integer check
# since /^-?\d+$/ would otherwise match 0 and 1 as integers
if($expr =~ /^[01]$/) {
return { type => 'boolean' };
}
# Check for integers
if($expr =~ /^-?\d+$/) {
return { type => 'integer' };
}
if ($expr =~ /^-?\d+\.\d+$/) {
return { type => 'number' };
}
# Check for objects
if ($expr =~ /bless/) {
return { type => 'object' };
}
if($expr =~ /\blength\s*\(/) {
return { type => 'integer', min => 0 };
}
# Default to scalar
return { type => 'scalar' };
}
# --------------------------------------------------
# _detect_chaining_from_pod
#
# Purpose: Check POD documentation for explicit
# indications that a method is chainable
# or part of a fluent interface.
#
# Entry: $output - output hashref (modified
# in place).
# $pod - POD string for the method.
#
# Exit: Returns nothing. Sets _returns_self
# in $output if chaining keywords are
# found.
#
# Side effects: Logs detection to stdout when
# verbose is set.
# --------------------------------------------------
sub _detect_chaining_from_pod {
my ($self, $output, $pod) = @_;
return unless $pod;
# Look for explicit chaining documentation
if ($pod =~ /returns?\s+(?:\$)?self\b/i ||
$pod =~ /chainable/i ||
$pod =~ /fluent\s+interface/i ||
$pod =~ /method\s+chaining/i) {
$output->{_returns_self} = 1;
$self->_log(" OUTPUT: POD indicates chainable/fluent interface");
}
}
# --------------------------------------------------
# _validate_output
#
# Purpose: Apply basic sanity checks to the
# assembled output hashref and warn
# about suspicious type combinations,
# normalising clearly invalid types to
# 'string'.
#
# Entry: $output - output hashref (modified
# in place).
#
# Exit: Returns nothing. May modify type key
# in $output. Logs warnings to stdout
# when verbose is set.
#
# Side effects: None.
# --------------------------------------------------
sub _validate_output {
my ($self, $output) = @_;
# Warn about suspicious combinations
if (defined $output->{type} && $output->{type} eq 'boolean' && !defined($output->{value})) {
$self->_log(' WARNING Boolean type without value - may want to set value: 1');
}
if ($output->{value} && defined $output->{type} && $output->{type} ne 'boolean') {
$self->_log(" WARNING Value set but type is not boolean: $output->{type}");
}
my %valid_types = map { $_ => 1 } qw(string integer number boolean array arrayref hashref object void);
if(exists $output->{type}) {
if(!$valid_types{$output->{type}}) {
$self->_log(" WARNING Output value type is unknown: '$output->{type}', setting to string");
$output->{type} = 'string';
}
}
}
# --------------------------------------------------
# _parse_constraints
#
# Purpose: Parse a constraint string extracted
# from POD documentation and populate
# min, max, or other constraint fields
# in a parameter hashref.
#
# Entry: $param - hashref for the parameter
# being annotated (modified
# in place).
# $constraint - the constraint string,
# e.g. '3-50', 'positive',
# '>= 0', 'min 3'.
#
# Exit: Returns nothing. Modifies $param in
# place by setting min and/or max keys.
#
# Side effects: Logs min/max values to stdout when
# verbose is set.
( run in 0.494 second using v1.01-cache-2.11-cpan-13bb782fe5a )