App-Test-Generator

 view release on metacpan or  search on metacpan

lib/App/Test/Generator/SchemaExtractor.pm  view on Meta::CPAN

					$param => {
						type => 'object',
						optional => 1,
					}
				};

				$schema->{_confidence}{input} = {
					level   => 'high',
					factors => ['Input validated by Scalar::Util::blessed'],
				};
			} else {
				# fallback ONLY if nothing known
				$schema->{input} ||= {
					value => { type => 'string', optional => 1 },
				};
			}
		};
		$schema->{accessor} = {
			type => 'getset',
			property => $property,
		};

		$self->_log("  Detected getter/setter accessor for property: $property");
		if (my $pod = $method->{pod}) {
			if ($pod =~ /\b(LWP::UserAgent(::\w+)*)\b/) {
				my $class = $1;
				$schema->{output} = {
					type => 'object',
					isa => $class,
				};
				$schema->{input}{$property} = {
					type => 'object',
					isa => $class,
					optional => 1,
				};

				$schema->{_confidence}{output} = {
					level => 'high',
					factors => ['POD specifies UserAgent object'],
				};
			}
		}
		if(ref($schema->{input}) eq 'HASH') {
			if(scalar keys(%{$schema->{input}}) > 1) {
				croak(__PACKAGE__, ': A getset accessor function can have at most one argument');
			}
		}
		$schema->{input}->{$property}->{position} = 0;
	} elsif ($code =~ /return\s+\$self\s*->\s*\{\s*['"]?([^}'"]+)['"]?\s*\}\s*;/) {
		# -------------------------------
		# Getter
		# -------------------------------
		my $property = $1;

		# Don't flag mutators like
		# sub foo {
		    # my $self = shift;
		    # $self->{bar} = shift;
		    # return $self->{bar};
		# }
		# Only exclude if the property is being set FROM EXTERNAL INPUT
		if($code !~ /\$self\s*->\s*\{\s*['"]?\Q$property\E['"]?\s*\}\s*=\s*(?:shift|\$\w+\s*=\s*shift|\@_|\$_\[\d+\])/) {
			my @returns = $code =~ /return\b/g;
			my @self_returns = $code =~ /return\s+\$self\s*->\s*\{\s*['"]?\Q$property\E['"]?\s*\}/g;
			# it's a getter
			if (scalar(@returns) == scalar(@self_returns)) {
				# all returns are returning $self->{$property}, so it's a getter
				$schema->{accessor} = {
					type => 'getter',
					property => $property,
				};

				$self->_log("  Detected getter accessor for property: $property");

				$schema->{_confidence}{output} = {
					level => 'high',
					factors => ['Detected getter method'],
				};
				delete $schema->{input};
			}
		}
	} elsif (
		$code =~ /return\s+\$self\b/ &&
		$code =~ /\$self\s*->\s*\{\s*['"]?([^}'"]+)['"]?\s*\}\s*=\s*\$(\w+)\s*;/
	) {
		# -------------------------------
		# Setter
		# -------------------------------
		my ($property, $param) = ($1, $2);

		$schema->{accessor} = {
			type => 'setter',
			property => $property,
			param => $param,
		};

		$self->_log("  Detected setter accessor for property: $property");

		$schema->{input} = {
			$param => { type => 'string' }, # safe default
		};
		$schema->{input_style} = 'hash';

		$schema->{_confidence}{input} = {
			level => 'high',
			factors => ['Detected setter/accessor method'],
		};
		if($schema->{output}{_returns_self}) {
			if($schema->{output}{type} ne 'object') {
				croak 'Setter can not return data other than $self';
			}
			if($schema->{output}{isa} ne $self->{_package_name}) {
				croak 'Setter can not return data other than $self';
			}
		} elsif(scalar(keys %{$schema->{output}}) != 0) {
			$self->_analysis_error(
				method  => $method->{name},
				message => "Setter cannot return data",
			);
		}
	}

lib/App/Test/Generator/SchemaExtractor.pm  view on Meta::CPAN

#
# Entry:      $pod         - POD string for the method.
#             $code        - method body source string.
#             $method_name - name of the method being
#                            analysed, used for
#                            boolean heuristics.
#
# Exit:       Returns a hashref describing the
#             output type and behaviour, or an empty
#             hashref if nothing could be determined.
#             Keys include: type, value, isa, and
#             various _* metadata keys.
#
# Side effects: Logs progress to stdout when
#               verbose is set.
# --------------------------------------------------
sub _analyze_output {
	my ($self, $pod, $code, $method_name) = @_;

	my %output;

	$self->_analyze_output_from_pod(\%output, $pod);
	$self->_analyze_output_from_code(\%output, $code, $method_name);
	$self->_enhance_boolean_detection(\%output, $pod, $code, $method_name);
	$self->_detect_list_context(\%output, $code);
	$self->_detect_void_context(\%output, $code, $method_name);
	$self->_detect_chaining_pattern(\%output, $code);
	$self->_detect_error_conventions(\%output, $code);

	$self->_validate_output(\%output) if keys %output;

	# Don't return empty output
	return (keys %output) ? \%output : {};
}

# --------------------------------------------------
# _analyze_output_from_pod
#
# Purpose:    Parse the POD documentation for a
#             method's return value and populate
#             an output hashref with type, value,
#             and behaviour information.
#
# Entry:      $output - hashref to populate
#                       (modified in place).
#             $pod    - POD string for the method.
#
# Exit:       Returns nothing. Modifies $output
#             in place.
#
# Side effects: Logs detections to stdout when
#               verbose is set.
#
# Notes:      Two patterns are tried: (1) a
#             'Returns:' section of up to 3 lines,
#             and (2) an inline 'returns X' phrase.
#             The section pattern takes precedence.
# --------------------------------------------------
sub _analyze_output_from_pod {
	my ($self, $output, $pod) = @_;
	my %VALID_OUTPUT_TYPES = map { $_ => 1 }
		qw(string integer number float boolean arrayref hashref object coderef void undef);

	if ($pod) {
		# Pattern 0: =head4 Output formal spec (highest priority — explicit over heuristic)
		# The outer container shape determines the return type:
		#   (...)  — list/array of items
		#   [...]  — arrayref  (bare [] = empty/void, skip)
		#   {...}  — hashref spec; look for type => inside, or isa => for object
		if($pod =~ /=head4\s+Output\b(.*?)(?==head|\z)/si) {
			my $block = $1;
			$block =~ s/^\s+//;
			if($block =~ /^\(/) {
				$output->{type} = 'array';
				$self->_log("  OUTPUT: type 'array' from =head4 Output list notation");
			} elsif($block =~ /^\[/) {
				unless($block =~ /^\[\s*\]/) {
					$output->{type} = 'arrayref';
					$self->_log("  OUTPUT: type 'arrayref' from =head4 Output arrayref notation");
				}
			} elsif($block =~ /^\{/) {
				if($block =~ /type\s*=>\s*['"]?(\w[\w:]*?)['"]?\s*[,}]/i) {
					my $type = lc($1);
					$type = 'hashref'  if $type eq 'hash';
					$type = 'arrayref' if $type eq 'array';
					if($VALID_OUTPUT_TYPES{$type}) {
						$output->{type} = $type;
						$self->_log("  OUTPUT: type '$type' from =head4 Output formal spec");
					} elsif($block =~ /\bisa\s*=>/) {
						$output->{type} = 'object';
						$self->_log("  OUTPUT: type 'object' from =head4 Output isa spec");
					}
				} elsif($block =~ /\bisa\s*=>/) {
					$output->{type} = 'object';
					$self->_log("  OUTPUT: type 'object' from =head4 Output isa spec");
				}
			}
		}

		# Pattern 1: Returns: section
		# Up to 3 lines
		if ($pod =~ /Returns?:\s+([^\n]+(?:\n[^\n]+){0,2})/si) {
			my $returns_desc = $1;
			$returns_desc =~ s/^\s+|\s+$//g;

			$self->_log("  OUTPUT: Found Returns section: $returns_desc");

			# Try to infer type from description (skip if Pattern 0 already set type)
			if (!$output->{type} && $returns_desc =~ /\b(string|text)\b/i) {
				$output->{type} = 'string';
			} elsif (!$output->{type} && $returns_desc =~ /\b(integer|int|count)\b/i) {
				$output->{type} = 'integer';
			} elsif (!$output->{type} && $returns_desc =~ /\b(float|decimal|number)\b/i) {
				$output->{type} = 'number';
			} elsif (!$output->{type} && $returns_desc =~ /\b(boolean|true|false)\b/i) {
				$output->{type} = 'boolean';
			} elsif (!$output->{type} && $returns_desc =~ /\b(array|list)\b/i) {
				$output->{type} = 'arrayref';
			} elsif (!$output->{type} && $returns_desc =~ /\b(hash|hashref|dictionary)\b/i) {
				$output->{type} = 'hashref';
			} elsif (!$output->{type} && $returns_desc =~ /\b(object|instance)\b/i) {
				$output->{type} = 'object';
			} elsif (!$output->{type} && $returns_desc =~ /\bundef\b/i) {
				$output->{type} = 'undef';
			}

			# Look for specific values
			if ($returns_desc =~ /\b1\s+(?:on\s+success|if\s+successful)\b/i) {
				$output->{value} = 1;
				if(defined($output->{'type'}) && ($output->{type} eq 'scalar')) {
					$output->{type} = 'boolean';
				} else {
					$output->{type} ||= 'boolean';
				}
				$self->_log("  OUTPUT: Returns 1 on success");
			} elsif ($returns_desc =~ /\b0\s+(?:on\s+failure|if\s+fail)\b/i) {
				$output->{alt_value} = 0;
			} elsif ($returns_desc =~ /dies\s+on\s+(?:error|failure)/i) {
				$output->{_STATUS} = 'LIVES';
				$self->_log('  OUTPUT: Should not die on success');
			}
			if ($returns_desc =~ /\b(true|false)\b/i) {
				$output->{type} ||= 'boolean';
			}
			if ($returns_desc =~ /\bundef\b/i) {
				$output->{optional} = 1;
			}
		}

		# Pattern 2: Inline "returns X"
		if((!$output->{type}) && ($pod =~ /returns?\s+(?:an?\s+)?(\w+)/i)) {
			my $type = lc($1);

			$type = 'boolean' if $type =~ /^(true|false|bool)$/;
			# Skip if it's just a number (like "returns 1")
			$type = 'integer' if $type eq 'int';
			$type = 'number' if $type =~ /^(num|float)$/;
			$type = 'arrayref' if $type eq 'array';
			$type = 'hashref' if $type eq 'hash';

			if($type =~ /^\d+$/) {
				if($type eq '1' || $type eq '0') {
					# Try hard to guess if the result is a boolean
					if($pod =~ /1 on success.+0 (on|if) /i) {
						$type = 'boolean';
					} elsif($pod =~ /return 0 .+ 1 on success/) {
						$type = 'boolean';
					} else {
						$type = 'integer';
					}
				} else {
					$type = 'integer';
				}
			}

			$type = 'arrayref' if !$type && $pod =~ /returns?\s+.+\slist\b/i;
			# $output->{type} = $type if $type && $type !~ /^\d+$/;
			if ($VALID_OUTPUT_TYPES{$type}) {
				$output->{type} = $type;
				$self->_log("  OUTPUT: Inferred type from POD: $type");
			} else {
				$self->_log("  OUTPUT: POD return type '$type' is not a valid type, ignoring");
			}
		}
	}
}

# --------------------------------------------------
# _extract_defaults_from_pod
#
# Purpose:    Extract default values for parameters
#             from POD documentation using multiple
#             pattern strategies.
#
# Entry:      $pod - POD string for the method.
#                    May be undef or empty.
#
# Exit:       Returns a hashref of parameter name
#             to cleaned default value. Returns an
#             empty hashref if no POD is provided
#             or no defaults are found.
#
# Side effects: None.
#
# Notes:      Three strategies are tried: (1) lines
#             containing 'Default:' or 'Defaults to:',
#             (2) lines containing 'Optional, default',
#             (3) inline $name - type, default value
#             format. Parameter names are inferred
#             by scanning backwards from the default
#             phrase to the nearest $variable.
# --------------------------------------------------
sub _extract_defaults_from_pod {
	my ($self, $pod) = @_;

	return {} unless $pod;

	my %defaults;

	# Pattern 1: Default: 'value' or Defaults to: 'value'
	while ($pod =~ /(?:Default(?:s? to)?|default(?:s? to)?)[:]\s*([^\n\r]+)/gi) {
		my $default_text = $1;
		my $match_pos = pos($pod);
		$default_text =~ s/^\s+|\s+$//g;

		# Look backwards in the POD to find the parameter name
		my $context = substr($pod, 0, $match_pos);
		my @param_matches = ($context =~ /\$(\w+)/g);
		my $param = $param_matches[-1] if @param_matches;  # Last parameter before default

		if ($param) {
			# Always clean the default value - let _clean_default_value handle everything
			if ($default_text =~ /(\w+)\s*=\s*(.+)$/) {
				# Has explicit param = value format in the default text
				my ($p, $value) = ($1, $2);
				$defaults{$p} = $self->_clean_default_value($value);
			} else {
				# Just a value, associate with the found param
				$defaults{$param} = $self->_clean_default_value($default_text, 0);  # NOT from code
			}
		}
	}

lib/App/Test/Generator/SchemaExtractor.pm  view on Meta::CPAN

		$default_text =~ s/^\s+|\s+$//g;

		# Look backwards for parameter name
		my $context = substr($pod, 0, $match_pos);
		my @param_matches = ($context =~ /\$(\w+)/g);
		if (@param_matches) {
			my $param = $param_matches[-1];  # Last parameter before the default
			$defaults{$param} = $self->_clean_default_value($default_text, 0);
		}
	}

	# Pattern 3: In parameter descriptions: $param - type, default 'value'
	while ($pod =~ /\$(\w+)\s*-\s*\w+(?:\([^)]*\))?[,\s]+default\s+['"]?([^'",\n]+)['"]?/gi) {
		my ($param, $value) = ($1, $2);
		$defaults{$param} = $self->_clean_default_value($value, 0);
	}

	return \%defaults;
}

# --------------------------------------------------
# _analyze_output_from_code
#
# Purpose:    Analyse return statements in a method
#             body to infer the output type by
#             counting and classifying each return
#             expression.
#
# Entry:      $output      - hashref to populate
#                            (modified in place).
#             $code        - method body source string.
#             $method_name - method name string.
#
# Exit:       Returns nothing. Modifies $output
#             in place.
#
# Side effects: Logs detections to stdout when
#               verbose is set.
# --------------------------------------------------
sub _analyze_output_from_code
{
	my ($self, $output, $code, $method_name) = @_;

	if ($code) {
		# Early boolean detection - check for consistent 1/0 returns
		my @all_returns = $code =~ /return\s+([^;]+);/g;
		if (@all_returns) {
			my $boolean_count = 0;
			my $total_count = scalar(@all_returns);

			foreach my $ret (@all_returns) {
				$ret =~ s/^\s+|\s+$//g;
				# Match 0 or 1, even with conditions
				$boolean_count++ if ($ret =~ /^(?:0|1)(?:\s|$)/);
			}

			# If most returns are 0 or 1, strongly suggest boolean
			if ($boolean_count >= 2 && $boolean_count >= $total_count * 0.8) {
				unless ($output->{type}) {
					$output->{type} = 'boolean';
					$self->_log("  OUTPUT: Early detection - $boolean_count/$total_count returns are 0/1, setting boolean");
				}
			}
		}

		my @return_statements;

		if ($code =~ /return\s+bless\s*\{[^}]*\}\s*,\s*['"]?(\w+)['"]?/s) {
			# Detect blessed refs
			$output->{type} = 'object';
			if($method_name eq 'new') {
				# If we found the new() method, the object we're returning should be a sensible one
				if($self->{_document} && (my $package_stmt = $self->{_document}->find_first('PPI::Statement::Package'))) {
					$output->{isa} = $package_stmt->namespace();
					$self->{_package_name} //= $output->{isa};
				}
			} else {
				$output->{isa} = $1;
			}
			$self->_log("  OUTPUT: Bless found, inferring type from code is $output->{isa}");
		} elsif ($code =~ /return\s+bless/s) {
			$output->{type} = 'object';
			if($method_name eq 'new') {
				$output->{isa} = $self->_extract_package_name();
				$self->_log("  OUTPUT: Bless found, inferring type from code is $output->{isa}");
			} else {
				$self->_log('  OUTPUT: Bless found, inferring type from code is object');
			}
		} elsif ($code =~ /return\s*\(\s*[^)]+\s*,\s*[^)]+\s*\)\s*;/) {
			# Detect array context returns - must end with semicolon to be actual return
			$output->{type} = 'array';	# Not arrayref - actual array
			$self->_log('  OUTPUT: Found array contect return');
		} elsif ($code =~ /return\s+bless[^,]+,\s*__PACKAGE__/) {
			# Detect: bless {}, __PACKAGE__
			$output->{type} = 'object';
			# Get package name from the extractor's stored document
			if ($self->{_document}) {
				my $pkg = $self->{_document}->find_first('PPI::Statement::Package');
				$output->{isa} = $pkg ? $pkg->namespace : 'UNKNOWN';
				$self->_log('  OUTPUT: Object blessed into __PACKAGE__: ' . ($output->{isa} || 'UNKNOWN'));
				$self->{_package_name} //= $output->{isa};
			}
		} elsif ($code =~ /return\s*\(([^)]+)\)/) {
			my $content = $1;
			if ($content =~ /,/) {	# Has comma = multiple values
				$output->{type} = 'array';
			}
		} elsif ($code =~ /return\s+\$self\s*;/ && $code =~ /\$self\s*->\s*\{[^}]+\}\s*=/) {
			# Returns $self for chaining
			$output->{type} = 'object';
			if ($self->{_document}) {
				my $pkg = $self->{_document}->find_first('PPI::Statement::Package');
				$output->{isa} = $pkg ? $pkg->namespace : 'UNKNOWN';
				$self->_log('  OUTPUT: Object chained into __PACKAGE__: ' . ($output->{isa} || 'UNKNOWN'));
				$self->{_package_name} //= $output->{isa};
			}
		}

		# Find all return statements
		while ($code =~ /return\s+([^;]+);/g) {
			my $return_expr = $1;
			push @return_statements, $return_expr;
		}

		if (@return_statements) {
			$self->_log('  OUTPUT: Found ' . scalar(@return_statements) . ' return statement(s)');

			# Analyze return patterns
			my %return_types;

			if($output->{'type'}) {
				$return_types{$output->{'type'}} += 3;	# Add weighting to what's already been found
			}
			my $min;
			foreach my $ret (@return_statements) {
				$ret =~ s/^\s+|\s+$//g;

				# Literal values
				if ($ret eq '1' || $ret eq '0') {
					$return_types{boolean}++;
				} elsif ($ret =~ /^['"]/) {
					$return_types{string}++;
				} elsif ($ret =~ /^-?\d+$/) {
					$return_types{integer}++;
				} elsif ($ret =~ /^-?\d+\.\d+$/) {
					$return_types{number}++;
				} elsif ($ret eq 'undef') {
					$return_types{undef}++;
				} elsif ($ret =~ /^\[/) {
				# Data structures
					$return_types{arrayref}++;
				} elsif ($ret =~ /^\{/) {
					$return_types{hashref}++;
				} elsif ($ret =~ m{
					# Numeric expressions (heuristic, medium confidence)
					# Don't match ->
				    (?:
					\+ | -\b | \* | / | %
				      | \+\+ | --
				    )
				}x) {
					$return_types{number} += 2;
				} elsif ($ret =~ /\|\|\s*\d+\b/) {
					# Logical-or fallback with numeric literal (e.g. $x || 200)
					$return_types{integer} += 2;
					$self->_log("  OUTPUT: Numeric fallback expression detected");
				} elsif($ret =~ /^length[\s\(]/) {
					$return_types{integer}++;
					$min = 0;
				} elsif($ret =~ /^pos[\s\(]/) {
					$return_types{integer}++;
					$min = 0;
				} elsif($ret =~ /^index[\s\(]/) {
					$return_types{integer}++;
					$min = -1;
				} elsif($ret =~ /^rindex[\s\(]/) {
					$return_types{integer}++;
					$min = -1;
				} elsif($ret =~ /^ord[\s\(]/) {
					$return_types{integer}++;
				} elsif ($ret =~ /=/ && $ret =~ /\$\w+/) {
					# Assignment returning a value (e.g. $self->{status} = $status)
					# If assignment involves a numeric literal or variable, assume numeric intent
					if ($ret =~ /\b\d+\b/) {
						$return_types{integer} += 2;
						$self->_log("  OUTPUT: Assignment with numeric value detected");
					} else {
						$return_types{scalar}++;
					}
				}
				# Variables/expressions
				elsif ($ret =~ /\$\w+/) {
					if ($ret =~ /\\\@/) {
						$return_types{arrayref}++;
					} elsif ($ret =~ /\\\%/) {
						$return_types{hashref}++;
					} elsif ($ret =~ /bless/) {
						$return_types{object} += 2;	# Heigher weight
					} elsif ($ret =~ /^\{[^}]*\}$/) {
						$return_types{hashref}++;
					} elsif ($ret =~ /^\[[^\]]*\]$/) {
						$return_types{arrayref}++;
					} else {
						$return_types{scalar}++;
					}
				}
			}

			# Determine most common return type
			if (keys %return_types) {
				my ($most_common) = sort { $return_types{$b} <=> $return_types{$a} } keys %return_types;
				# Prefer integer over scalar if numeric returns dominate
				if ($return_types{integer} && (!$return_types{string})) {
					if (!$output->{type} || $output->{type} eq 'scalar') {
						$output->{type} = 'integer';
						$self->_log("  OUTPUT: Numeric returns dominate, forcing integer");
						$output->{_type_confidence} ||= 'low';
						if(defined($min)) {
							$output->{min} = $min;
						}
					}
				}
				unless ($output->{type}) {
					$output->{type} = $most_common;

					# Assign confidence for inferred numeric expressions
					if ($most_common eq 'number') {
						$output->{_type_confidence} ||= 'medium';
						if(defined($min)) {
							$output->{min} = $min;
						}
					}

					$self->_log("  OUTPUT: Inferred type from code: $most_common");
				}
			}

			# Check for consistent single value returns
			if (@return_statements == 1 && $return_statements[0] eq '1') {
				$output->{value} = 1;
				$output->{type} = 'boolean' if !$output->{type} || $output->{type} eq 'scalar';
				$self->_log("  OUTPUT: Type already set to '$output->{type}', overriding with boolean") if($output->{'type'});
			}
		} else {
			# No explicit return - might return nothing or implicit undef
			$self->_log("  OUTPUT: No explicit return statement found");
		}
	}
}

# --------------------------------------------------
# _enhance_boolean_detection
#
# Purpose:    Apply additional boolean-specific
#             detection heuristics using a weighted
#             scoring system, to override weak
#             type assignments when there is strong
#             evidence of a boolean return.
#
# Entry:      $output      - output hashref
#                            (modified in place).
#             $pod         - POD string.
#             $code        - method body source string.
#             $method_name - method name string.
#
# Exit:       Returns nothing. Modifies $output
#             in place, setting type to 'boolean'
#             if the score reaches
#             $BOOLEAN_SCORE_THRESHOLD.
#
# Side effects: Logs scoring details to stdout when
#               verbose is set.
#
# Notes:      Only fires when output type is
#             not yet set or is 'unknown'. Does not
#             override explicitly set types.
# --------------------------------------------------
sub _enhance_boolean_detection {
	my ($self, $output, $pod, $code, $method_name) = @_;

	my $boolean_score = 0;	# Track evidence for boolean return

	return unless !$output->{type} || $output->{type} eq 'unknown';

	# Look for stronger boolean indicators
	if ($pod && !$output->{type}) {
		# Common boolean return patterns in POD
		if ($pod =~ /returns?\s+(true|false|true|false|1|0)\s+(?:on|for|upon)\s+(success|failure|error|valid|invalid)/i) {
			$boolean_score += 30;
			$self->_log('  OUTPUT: Strong boolean indicator in POD (+30)');
		}

		# Check for method names that suggest boolean returns
		if ($pod =~ /(?:method|sub)\s+(\w+)/) {
			my $inferred_method_name = $1;
			if ($inferred_method_name =~ /^(is_|has_|can_|should_|contains_|exists_)/) {
				$boolean_score += 20;
				$self->_log("  OUTPUT: Inferred method name '$inferred_method_name' suggests boolean return (+20)");
			}
		}
	}

	# Analyze code for boolean patterns
	if ($code) {
		# Count boolean return idioms
		my $true_returns = () = $code =~ /return\s+1\s*;/g;
		my $false_returns = () = $code =~ /return\s+0\s*;/g;

		if ($true_returns + $false_returns >= 2) {
			$boolean_score += 40;
			$self->_log('  OUTPUT: Multiple 1/0 returns suggest boolean (+40)');
		} elsif ($true_returns + $false_returns == 1) {
			$boolean_score += 10;
			$self->_log('  OUTPUT: Single 1/0 return (+10)');
		}

		# Ternary operators that return booleans
		if ($code =~ /return\s+(?:\w+\s*[!=]=\s*\w+|\w+\s*>\s*\w+|\w+\s*<\s*\w+)\s*\?\s*(?:1|0)\s*:\s*(?:1|0)/) {
			$boolean_score += 25;
			$self->_log('  OUTPUT: Ternary with 1/0 suggests boolean (+25)');
		}

		# Check for common boolean method patterns
		if ($code =~ /return\s+[!\$\@\%]/) {
			# Returns negation or existence check
			$boolean_score += 15;
			$self->_log('  OUTPUT: Returns negation/existence check (+15)');
		}
	}

	# Check method name for boolean indicators
	if ($method_name) {
		if ($method_name =~ /^(is_|has_|can_|should_|contains_|exists_|check_|verify_|validate_)/) {
			$boolean_score += 25;
			$self->_log("  OUTPUT: Method name '$method_name' suggests boolean return (+25)");
		}
		if ($method_name =~ /_ok$/) {
			$boolean_score += 30;
			$self->_log("  OUTPUT: Method name '$method_name' ends with '_ok' (+30)");
		}
	}

	# Apply boolean type if we have strong evidence
	# Override weak type assignments (like 'array' from false positive)
	if($boolean_score >= $BOOLEAN_SCORE_THRESHOLD) {
		if (!$output->{type} || $output->{type} eq 'scalar' || $output->{type} eq 'array' || $output->{type} eq 'undef') {
			my $old_type = $output->{type} || 'none';
			$output->{type} = 'boolean';
			$self->_log("  OUTPUT: Boolean score $boolean_score >= $BOOLEAN_SCORE_THRESHOLD, setting type to boolean (was: $old_type)");
		}
	}
}

# --------------------------------------------------
# _detect_list_context
#
# Purpose:    Detect methods that return different
#             values depending on calling context
#             via wantarray, and methods that
#             return explicit lists.
#
# Entry:      $output - output hashref (modified
#                       in place).
#             $code   - method body source string.
#
# Exit:       Returns nothing. Modifies $output
#             in place, setting _context_aware,
#             _list_context, _scalar_context,
#             _list_return, and/or type keys.
#
# Side effects: Logs detections to stdout when
#               verbose is set.
# --------------------------------------------------
sub _detect_list_context {
	my ($self, $output, $code) = @_;
	return unless $code;

	# Check for wantarray usage
	if ($code =~ /wantarray/) {
		$output->{_context_aware} = 1;
		$self->_log('  OUTPUT: Method uses wantarray - context sensitive');

		# Debug: show what we're matching against
		if ($code =~ /(wantarray[^;]+;)/s) {
			$self->_log("  DEBUG wantarray line: $1");
		}

		if ($code =~ /wantarray\s*\?\s*\(([^)]+)\)\s*:\s*([^;]+)/s) {
			# Pattern 1: wantarray ? (list, items) : scalar_value (with parens)
			my ($list_return, $scalar_return) = ($1, $2);
			$self->_log("  DEBUG list (with parens): [$list_return], scalar: [$scalar_return]");

			$output->{_list_context} = $self->_infer_type_from_expression($list_return);
			$output->{_scalar_context} = $self->_infer_type_from_expression($scalar_return);
			$self->_log('  OUTPUT: Detected context-dependent returns (parenthesized)');
		} elsif ($code =~ /wantarray\s*\?\s*([^:]+?)\s*:\s*([^;]+)/s) {
			# Pattern 2: wantarray ? @array : scalar (no parens around list)
			my ($list_return, $scalar_return) = ($1, $2);
			# Clean up
			$list_return =~ s/^\s+|\s+$//g;
			$scalar_return =~ s/^\s+|\s+$//g;

			$self->_log("  DEBUG list (no parens): [$list_return], scalar: [$scalar_return]");

			$output->{_list_context} = $self->_infer_type_from_expression($list_return);
			$output->{_scalar_context} = $self->_infer_type_from_expression($scalar_return);
			$self->_log('  OUTPUT: Detected context-dependent returns (non-parenthesized)');
		} elsif ($code =~ /return[^;]*unless\s+wantarray.*?return\s*\(([^)]+)\)/s) {
			# Pattern 3: return unless wantarray; return (list);
			$output->{_list_context} = { type => 'array' };
			$self->_log('  OUTPUT: Detected list context return after wantarray check');
		}
	}

	# Detect explicit list returns (multiple values in parentheses)
	# Avoid false positives from function calls
	if ($code =~ /return\s*\(\s*([^)]+)\s*\)\s*;/) {
		my $content = $1;
		# Count commas outside of nested structures
		my $comma_count = 0;
		my $depth = 0;
		for my $char (split //, $content) {
			$depth++ if $char eq '(' || $char eq '[' || $char eq '{';
			$depth-- if $char eq ')' || $char eq ']' || $char eq '}';
			$comma_count++ if $char eq ',' && $depth == 0;
		}

		if ($comma_count > 0 && $content !~ /\b(?:bless|new)\b/) {
			# Multiple values returned
			unless ($output->{type} && $output->{type} eq 'boolean') {
				$output->{type} = 'array';
				$output->{_list_return} = $comma_count + 1;
				$self->_log('  OUTPUT: Returns list of ' . ($comma_count + 1) . ' values');
			}
		}
	}
}

# --------------------------------------------------
# _detect_void_context
#
# Purpose:    Detect methods that return nothing
#             meaningful (void context), methods
#             that always return 1 as a success
#             indicator, and methods whose name
#             suggests void context (setters,
#             mutators, loggers).
#
# Entry:      $output      - output hashref
#                            (modified in place).
#             $code        - method body source string.
#             $method_name - method name string.
#
# Exit:       Returns nothing. Modifies $output
#             in place, setting _void_context,
#             _success_indicator, and/or type.
#
# Side effects: Logs detections to stdout when
#               verbose is set.
# --------------------------------------------------
sub _detect_void_context {
	my ($self, $output, $code, $method_name) = @_;
	return unless $code;

	$self->_log("  DEBUG _detect_void_context called for $method_name");

	# Methods that typically don't return meaningful values
	my $void_patterns = {
		'setter' => qr/^set_\w+$/,
		'mutator' => qr/^(?:add|remove|delete|clear|reset|update)_/,
		'logger' => qr/^(?:log|debug|warn|error|info)$/,
		'printer' => qr/^(?:print|say|dump)_/,
	};

	# Check if method name suggests void context
	foreach my $type (keys %$void_patterns) {
		if ($method_name =~ $void_patterns->{$type}) {
			$output->{_void_context_hint} = $type;
			$self->_log("  OUTPUT: Method name suggests $type (typically void context)");
			last;
		}
	}

	# Analyze return statements
	my @returns = $code =~ /return\s*([^;]*);/g;

	$self->_log('  DEBUG Found ' . scalar(@returns) . ' return statements');

	# Count different return patterns
	my $no_value_returns = 0;
	my $true_returns = 0;
	my $self_returns = 0;

	foreach my $ret (@returns) {
		$ret =~ s/^\s+|\s+$//g;
		$self->_log("  DEBUG return value: [$ret]");
		$no_value_returns++ if $ret eq '';
		$no_value_returns++ if($ret =~ /^(if|unless)\s/);
		$true_returns++ if $ret eq '1';
		$self_returns++ if $ret eq '$self';
		if ($ret =~ /\?\s*1\s*:\s*0\b/) {
			# Strong boolean signal: ternary returning 1/0
			$true_returns++;
			# $self->_log("  OUTPUT: Ternary 1:0 return detected, treating as boolean (+40)");
			$self->_log('  OUTPUT: Ternary 1:0 return detected, treating as boolean');
		}
	}

	my $total_returns = scalar(@returns);

	$self->_log("  DEBUG no_value=$no_value_returns, true=$true_returns, self=$self_returns, total=$total_returns");

	# Void context indicators
	if ($no_value_returns > 0 && $no_value_returns == $total_returns) {
		$output->{_void_context} = 1;
		$output->{type} = 'void';  # This should override any previous type
		$self->_log('  OUTPUT: All returns are empty - void context method');
	} elsif ($true_returns > 0 && $true_returns == $total_returns && $total_returns >= 1) {
		# Methods that always return true (success indicator)
		$output->{_success_indicator} = 1;
		# Don't override type if already set to boolean
		unless ($output->{type} && $output->{type} eq 'boolean') {
			$output->{type} = 'boolean';
		}
		$self->_log('  OUTPUT: Always returns 1 - success indicator pattern');
	}
}

# --------------------------------------------------
# _detect_chaining_pattern
#
# Purpose:    Detect methods that return $self for
#             fluent interface chaining, by counting
#             the proportion of return statements
#             that return $self.
#
# Entry:      $output - output hashref (modified
#                       in place).
#             $code   - method body source string.
#
# Exit:       Returns nothing. Modifies $output
#             in place, setting type to 'object',
#             _returns_self to 1, and isa to the
#             current package name when the
#             proportion of $self returns is >= 0.8.
#
# Side effects: Logs detection to stdout when
#               verbose is set.
# --------------------------------------------------
sub _detect_chaining_pattern {
	my ($self, $output, $code) = @_;
	return unless $code;

	# Count returns of $self
	my $self_returns = 0;
	my $total_returns = 0;

	while ($code =~ /return\s+([^;]+);/g) {
		my $ret = $1;
		$ret =~ s/^\s+|\s+$//g;
		$total_returns++;
		$self_returns++ if $ret eq '$self';
	}

	# If most/all returns are $self, it's a chaining method
	if ($self_returns > 0 && $total_returns > 0) {
		my $ratio = $self_returns / $total_returns;

		if ($ratio >= 0.8) {
			$output->{type} = 'object';
			$output->{_returns_self} = 1;

			# Get the class name
			if ($self->{_document}) {
				my $pkg = $self->{_document}->find_first('PPI::Statement::Package');
				$output->{isa} = $pkg ? $pkg->namespace : 'UNKNOWN';
				$self->{_package_name} //= $output->{isa};
			}

			$self->_log("  OUTPUT: Chainable method - returns \$self ($self_returns/$total_returns returns)");
		}
	}
}

# --------------------------------------------------
# _detect_error_conventions
#
# Purpose:    Analyse how a method signals errors
#             by detecting patterns such as
#             'return undef if', implicit bare
#             returns, empty list returns, 0/1
#             boolean error patterns, and eval
#             exception handling.
#
# Entry:      $output - output hashref (modified
#                       in place).
#             $code   - method body source string.
#
# Exit:       Returns nothing. Modifies $output
#             in place, setting _error_handling,
#             _error_return, and
#             _success_failure_pattern keys.
#
# Side effects: Logs detections to stdout when
#               verbose is set.
# --------------------------------------------------
sub _detect_error_conventions {
	my ($self, $output, $code) = @_;

	return unless $code;

	$self->_log('  DEBUG _detect_error_conventions called');

	my %error_patterns;

	# Pattern 1: return undef if/unless condition
	while ($code =~ /return\s+undef\s+(?:if|unless)\s+([^;]+);/g) {
		push @{$error_patterns{undef_on_error}}, $1;
		$self->_log("  DEBUG Found 'return undef' pattern");
	}

	# Pattern 2: return if/unless (implicit undef)
	while ($code =~ /return\s+(?:if|unless)\s+([^;]+);/g) {
		push @{$error_patterns{implicit_undef}}, $1;
		$self->_log("  DEBUG Found implicit undef pattern");
	}

	# Pattern 3: return () - matches with or without conditions
	if ($code =~ /return\s*\(\s*\)\s*(?:if|unless|;)/) {
		$error_patterns{empty_list} = 1;
		$self->_log("  DEBUG Found empty list return");
	}

	# Pattern 4: return 0/1 pattern (indicates boolean with error handling)
	my $zero_returns = 0;
	my $one_returns = 0;
	# Match "return 0" or "return 1" followed by anything (condition or semicolon)
	while ($code =~ /return\s+(0|1)\s*(?:;|if|unless)/g) {
		if ($1 eq '0') {
			$zero_returns++;
		} else {
			$one_returns++;
		}
	}

	if ($zero_returns > 0 && $one_returns > 0) {
		$error_patterns{zero_on_error} = 1;
		$self->_log("  DEBUG Found 0/1 return pattern ($zero_returns zeros, $one_returns ones)");
	}

	# Pattern 5: Exception handling with eval
	if ($code =~ /eval\s*\{/) {
		# Check if there's error handling after eval
		if ($code =~ /eval\s*\{.*?\}[^}]*(?:if\s*\(\s*\$\@|catch|return\s+undef)/s) {
			$error_patterns{exception_handling} = 1;
			$self->_log('  DEBUG Found exception handling with eval');
		}
	}

	# Detect success/failure return pattern
	my @all_returns = $code =~ /return\s+([^;]+);/g;
	my $has_undef = grep { /^\s*undef\s*(?:if|unless|$)/ } @all_returns;
	my $has_value = grep { !/^\s*undef\s*$/ && !/^\s*$/ } @all_returns;

	if ($has_undef && $has_value && scalar(@all_returns) >= 2) {
		$output->{_success_failure_pattern} = 1;
		$self->_log("  OUTPUT: Uses success/failure return pattern");
	}

	# Store error conventions in output
	if(scalar(keys %error_patterns)) {
		$output->{_error_handling} = \%error_patterns;

		# Determine primary error convention
		if ($error_patterns{undef_on_error}) {
			$output->{_error_return} = 'undef';
			$self->_log("  OUTPUT: Returns undef on error");
		} elsif ($error_patterns{implicit_undef}) {
			$output->{_error_return} = 'undef';
			$self->_log("  OUTPUT: Returns implicit undef on error");
		} elsif ($error_patterns{empty_list}) {
			$output->{_error_return} = 'empty_list';
			$self->_log("  OUTPUT: Returns empty list on error");
		} elsif ($error_patterns{zero_on_error}) {
			$output->{_error_return} = 'false';
			$self->_log("  OUTPUT: Returns 0/false on error");
		}

		if ($error_patterns{exception_handling}) {
			$self->_log("  OUTPUT: Has exception handling");
		}
	} else {
		delete $output->{_error_handling};
	}
}

# --------------------------------------------------
# _infer_type_from_expression
#
# Purpose:    Infer the data type of a return
#             expression string by matching it
#             against common Perl literal and
#             variable patterns.
#
# Entry:      $expr - return expression string,
#                     trimmed of leading and
#                     trailing whitespace.
#                     May be undef.
#
# Exit:       Returns a type hashref of the form
#             { type => '...' } and optionally
#             { min => N }. Defaults to
#             { type => 'scalar' } when no
#             pattern matches.
#
# Side effects: None.
# --------------------------------------------------
sub _infer_type_from_expression {
	my ($self, $expr) = @_;

	return { type => 'scalar' } unless defined $expr;

	$expr =~ s/^\s+|\s+$//g;

	# Check for multiple comma-separated values (indicates array/list)
	if ($expr =~ /,/) {
		my $comma_count = 0;
		my $depth = 0;
		for my $char (split //, $expr) {
			$depth++ if $char =~ /[\(\[\{]/;
			$depth-- if $char =~ /[\)\]\}]/;
			$comma_count++ if $char eq ',' && $depth == 0;
		}

		if ($comma_count > 0) {
			return { type => 'array' };
		}
	}

	# Check for @ prefix (array)
	if ($expr =~ /^\@\w+/ || $expr =~ /^qw\(/ || $expr =~ /^\@\{/) {
		return { type => 'array' };
	}

	# Check for scalar() function - returns count
	if ($expr =~ /scalar\s*\(/) {
		return { type => 'integer', min => 0 };
	}

	# Check for array reference

lib/App/Test/Generator/SchemaExtractor.pm  view on Meta::CPAN

		return { type => 'string' };
	}

	# Check for booleans first — must come before the integer check
	# since /^-?\d+$/ would otherwise match 0 and 1 as integers
	if($expr =~ /^[01]$/) {
		return { type => 'boolean' };
	}

	# Check for integers
	if($expr =~ /^-?\d+$/) {
		return { type => 'integer' };
	}

	if ($expr =~ /^-?\d+\.\d+$/) {
		return { type => 'number' };
	}

	# Check for objects
	if ($expr =~ /bless/) {
		return { type => 'object' };
	}

	if($expr =~ /\blength\s*\(/) {
		return { type => 'integer', min => 0 };
	}

	# Default to scalar
	return { type => 'scalar' };
}

# --------------------------------------------------
# _detect_chaining_from_pod
#
# Purpose:    Check POD documentation for explicit
#             indications that a method is chainable
#             or part of a fluent interface.
#
# Entry:      $output - output hashref (modified
#                       in place).
#             $pod    - POD string for the method.
#
# Exit:       Returns nothing. Sets _returns_self
#             in $output if chaining keywords are
#             found.
#
# Side effects: Logs detection to stdout when
#               verbose is set.
# --------------------------------------------------
sub _detect_chaining_from_pod {
	my ($self, $output, $pod) = @_;
	return unless $pod;

	# Look for explicit chaining documentation
	if ($pod =~ /returns?\s+(?:\$)?self\b/i ||
		$pod =~ /chainable/i ||
		$pod =~ /fluent\s+interface/i ||
		$pod =~ /method\s+chaining/i) {

		$output->{_returns_self} = 1;
		$self->_log("  OUTPUT: POD indicates chainable/fluent interface");
	}
}

# --------------------------------------------------
# _validate_output
#
# Purpose:    Apply basic sanity checks to the
#             assembled output hashref and warn
#             about suspicious type combinations,
#             normalising clearly invalid types to
#             'string'.
#
# Entry:      $output - output hashref (modified
#                       in place).
#
# Exit:       Returns nothing. May modify type key
#             in $output. Logs warnings to stdout
#             when verbose is set.
#
# Side effects: None.
# --------------------------------------------------
sub _validate_output {
	my ($self, $output) = @_;

	# Warn about suspicious combinations
	if (defined $output->{type} && $output->{type} eq 'boolean' && !defined($output->{value})) {
		$self->_log('  WARNING Boolean type without value - may want to set value: 1');
	}
	if ($output->{value} && defined $output->{type} && $output->{type} ne 'boolean') {
		$self->_log("  WARNING Value set but type is not boolean: $output->{type}");
	}
	my %valid_types = map { $_ => 1 } qw(string integer number boolean array arrayref hashref object void);
	if(exists $output->{type}) {
		if(!$valid_types{$output->{type}}) {
			$self->_log("  WARNING Output value type is unknown: '$output->{type}', setting to string");
			$output->{type} = 'string';
		}
	}
}

# --------------------------------------------------
# _parse_constraints
#
# Purpose:    Parse a constraint string extracted
#             from POD documentation and populate
#             min, max, or other constraint fields
#             in a parameter hashref.
#
# Entry:      $param      - hashref for the parameter
#                           being annotated (modified
#                           in place).
#             $constraint - the constraint string,
#                           e.g. '3-50', 'positive',
#                           '>= 0', 'min 3'.
#
# Exit:       Returns nothing. Modifies $param in
#             place by setting min and/or max keys.
#
# Side effects: Logs min/max values to stdout when
#               verbose is set.



( run in 0.494 second using v1.01-cache-2.11-cpan-13bb782fe5a )