App-Test-Generator

 view release on metacpan or  search on metacpan

lib/App/Test/Generator/SchemaExtractor.pm  view on Meta::CPAN

			$params{$name} //= { _source => 'code', position => $pos++ };
			$self->_log("  CODE: Found Params::Get parameter '$name'");
		}
	}

	$self->_extract_defaults_from_code(\%params, $code, $method);

	# Infer types from defaults
	foreach my $param (keys %params) {
		if ($params{$param}{_default} && !$params{$param}{type}) {
			my $default = $params{$param}{_default};
			if (ref($default) eq 'HASH') {
				$params{$param}{type} = 'hashref';
				$self->_log("  CODE: $param type inferred as hashref from default");
			} elsif (ref($default) eq 'ARRAY') {
				$params{$param}{type} = 'arrayref';
				$self->_log("  CODE: $param type inferred as arrayref from default");
			}
		}
	}

	if($code =~ /(croak|die)\(.*\)\s+if\s*\(\s*scalar\(\@_\)\s*<\s*(\d+)\s*\)/s) {
		my $required_count = $2;
		my @param_names = sort { $params{$a}{position} <=> $params{$b}{position} } keys %params;
		for my $i (0 .. $required_count-1) {
			$params{$param_names[$i]}{optional} = 0;
			$self->_log("  CODE: $param_names[$i] marked required due to croak scalar check");
		}
	} elsif ($code =~ /(croak|die)\(.*\)\s+if\s*\(\s*scalar\(\@_\)\s*==\s*(0)\s*\)/s) {
		foreach my $param (keys %params) {
			$params{$param}{optional} = 0;
			$self->_log("  CODE: $param: all parameters are required due to 'scalar(@_) == 0' check");
		}
	}

	# Analyze each parameter (with safety limit)
	foreach my $param (keys %params) {
		if ($param_count++ > $self->{max_parameters}) {
			$self->_log("  WARNING: Max parameters ($self->{max_parameters}) exceeded, skipping remaining");
			last;
		}

		my $p = \$params{$param};

		$self->_analyze_parameter_type($p, $param, $code);
		$self->_analyze_parameter_constraints($p, $param, $code);
		$self->_analyze_parameter_validation($p, $param, $code);
		$self->_analyze_advanced_types($p, $param, $code);

		# Defined checks
		if ($code =~ /defined\s*\(\s*\$$param\s*\)/) {
			$$p->{optional} = 0;
			$self->_log("  CODE: $param is required (defined check)");
		}

		# Determine optional/required and numeric type from code
		if ($code =~ /\s*\$$param\s*(?:\/\/|\|\|)=/) {
			# e.g. $var //= 5; or $var ||= 5;
			$$p->{optional} = 1;
			$self->_log("  CODE: $param is optional (default value assigned in code)");
		} elsif ($code =~ /\s*\$$param\s*(?:[\+\-\*\%]|\/(?!\/)|(?:\+\+)|(?:--)|(?:[\+\-\*\%]=|\/(?!\/)=)|\+\$|\$[+-])/ ) {
			# Covers arithmetic usage:
			# $x + $param, $param++, $param--, $x += $param, $x -= $param, etc.
			$$p->{optional} = 0;
			$$p->{type} //= 'number';
			$self->_log("  CODE: $param is required (used in arithmetic context)");
		} elsif ($code =~ /\$\b$param\b\s*(?:\+0|\*1)/) {
			# Forces numeric context, e.g., "$param + 0" or "$param * 1"
			$$p->{optional} = 0;
			$$p->{type} //= 'number';
			$self->_log("  CODE: $param is required (numeric context)");
		}

		# Required parameter checks (undef causes error)

		# Style 1: block form
		if ($code =~ /if\s*\(\s*!\s*defined\s*\(\s*\$$param\s*\)\s*\)\s*\{([^}]+)\}/s) {
			my $block = $1;
			if ($block =~ /\b(croak|die|confess)\b/) {
				$$p->{optional} = 0;
				$self->_log("  CODE: $param is required (undef causes error)");
			}
		}

		# Style 2: postfix unless
		if ($code =~ /\b(croak|die|confess)\b[^;]*\bunless\s+defined\s*\(\s*\$$param\s*\)/) {
			$$p->{optional} = 0;
			$self->_log("  CODE: $param is required (postfix undef check)");
		}

		# Exists checks for hash keys
		if ($code =~ /exists\s*\(\s*\$$param\s*\)/) {
			$$p->{type} = 'hashkey';
			$self->_log("  CODE: $param is a hash key");
		}

		# Scalar context for arrays
		if ($code =~ /scalar\s*\(\s*\@?\$$param\s*\)/) {
			$$p->{type} = 'array';
			$self->_log("  CODE: $param used in scalar context (array)");
		}

		$self->_extract_error_constraints($p, $param, $code);
	}

	return \%params;
}

# --------------------------------------------------
# _analyze_parameter_type
#
# Purpose:    Infer the type of a single parameter
#             from ref() checks, isa() calls,
#             bless patterns, array/hash operations,
#             and numeric operator usage in the
#             method body.
#
# Entry:      $p_ref - reference to the parameter
#                      hashref (modified in place
#                      via the referenced hash).
#             $param - parameter name string.

lib/App/Test/Generator/SchemaExtractor.pm  view on Meta::CPAN

#             $param - parameter name string.
#             $code  - method body source string.
#
# Exit:       Returns nothing. Modifies the
#             referenced parameter hashref.
#
# Side effects: Logs detections to stdout when
#               verbose is set.
#
# Notes:      Numeric comparisons that appear
#             inside die/croak guard conditions
#             are excluded to avoid inferring
#             invalid-input ranges as valid
#             constraints.
# --------------------------------------------------
sub _analyze_parameter_constraints {
	my ($self, $p_ref, $param, $code) = @_;
	my $p = $$p_ref;

	# Do not treat comparisons inside die/croak/confess as valid constraints
	my $guarded = 0;
		if ($code =~ /(die|croak|confess)\b[^{;]*\bif\b[^{;]*\$$param\b/s) {
		$guarded = 1;
	}

	# Length checks for strings
	if ($code =~ /length\s*\(\s*\$$param\s*\)\s*([<>]=?)\s*(\d+)/) {
		my ($op, $val) = ($1, $2);
		$p->{type} ||= 'string';
		if ($op eq '<') {
			$p->{max} = $val - 1;
		} elsif ($op eq '<=') {
			$p->{max} = $val;
		} elsif ($op eq '>') {
			$p->{min} = $val + 1;
		} elsif ($op eq '>=') {
			$p->{min} = $val;
		}
		$self->_log("  CODE: $param length constraint $op $val");
	}

	# Numeric range checks (only if NOT part of error guard)
	if (
		!$guarded
		&& $code =~ /\$$param\s*([<>]=?)\s*([+-]?(?:\d+\.?\d*|\.\d+))/
	) {
		my ($op, $val) = ($1, $2);
		$p->{type} ||= looks_like_number($val) ? 'number' : 'integer';

		if ($op eq '<' || $op eq '<=') {
			# Only set max if it tightens the range
			my $max = ($op eq '<') ? $val - 1 : $val;
			$p->{max} = $max if !defined($p->{max}) || $max < $p->{max};
		} elsif ($op eq '>' || $op eq '>=') {
			my $min = ($op eq '>') ? $val + 1 : $val;
			$p->{min} = $min if !defined($p->{min}) || $min > $p->{min};
		}
	}

	# Regex pattern matching with better capture
	if ($code =~ /\$$param\s*=~\s*((?:qr?\/[^\/]+\/|\$[\w:]+|\$\{\w+\}))/) {
		my $pattern = $1;
		$p->{type} ||= 'string';

		# Clean up the pattern if it's a straightforward regex
		if ($pattern =~ /^qr?\/([^\/]+)\/$/) {
			$p->{matches} = "/$1/";
		} else {
			$p->{matches} = $pattern;
		}
		$self->_log("  CODE: $param matches pattern: $p->{matches}");
	}
}

# --------------------------------------------------
# _analyze_parameter_validation
#
# Purpose:    Determine optionality and extract
#             default values for a single parameter
#             by analysing explicit required checks
#             (die/croak unless defined) and default
#             assignment patterns in the method body.
#
# Entry:      $p_ref - reference to the parameter
#                      hashref (modified in place).
#             $param - parameter name string.
#             $code  - method body source string.
#
# Exit:       Returns nothing. Modifies the
#             referenced parameter hashref.
#
# Side effects: Logs detections to stdout when
#               verbose is set.
#
# Notes:      Explicit required checks take highest
#             priority and override any default
#             value detected earlier.
# --------------------------------------------------
sub _analyze_parameter_validation {
	my ($self, $p_ref, $param, $code) = @_;
	my $p = $$p_ref;

	# Required/optional checks
	my $is_required = 0;

	# Die/croak if not defined
	if ($code =~ /(?:die|croak|confess)\s+[^;]*unless\s+(?:defined\s+)?\$$param/s) {
		$is_required = 1;
	}

	# Extract default values with the new method
	my $default_value = $self->_extract_default_value($param, $code);
	if (defined $default_value && !exists $p->{_default}) {
		$p->{optional} = 1;
		$p->{_default} = $default_value;

		# Try to infer type from default value if not already set
		unless ($p->{type}) {
			if (looks_like_number($default_value)) {
				$p->{type} = $default_value =~ /\./ ? 'number' : 'integer';
			} elsif (ref($default_value) eq 'ARRAY') {

lib/App/Test/Generator/SchemaExtractor.pm  view on Meta::CPAN

	my $package_name = $package_stmt->namespace;
	$self->{_package_name} //= $package_name;

	# Check if the current package has a 'new' method
	my $has_new = $doc->find(sub {
		$_[1]->isa('PPI::Statement::Sub') && $_[1]->name eq 'new'
	});

	if ($has_new) {
		return $package_name;
	}

	# Otherwise, try to get the parent class
	my $parent = $self->_get_parent_class();
	return $parent if $parent;

	# Fallback to current package
	return $package_name;
}

# --------------------------------------------------
# _extract_default_value
#
# Purpose:    Extract a default value for a named
#             parameter from a method body by
#             matching multiple common Perl default
#             assignment idioms.
#
# Entry:      $param - parameter name string.
#             $code  - method body source string.
#
# Exit:       Returns the cleaned default value
#             scalar on success, or undef if no
#             default assignment pattern is found.
#
# Side effects: None.
#
# Notes:      Eight patterns are tried in order:
#             ||, //=, defined ternary, unless
#             defined, ||=, //, multi-line if
#             !defined, unless defined block.
#             Comment lines are stripped from the
#             code before matching to avoid false
#             positives. Delegates to
#             _clean_default_value for value
#             normalisation.
# --------------------------------------------------
sub _extract_default_value {
	my ($self, $param, $code) = @_;

	return undef unless $param && $code;

	# Clean up the code for easier pattern matching
	# Remove comments to avoid false positives
	my $clean_code = $code;
	$clean_code =~ s/#.*$//gm;
	$clean_code =~ s/^\s+|\s+$//g;

	# Pattern 1: $param = $param || 'default_value'
	# Also handles: $param = $arg || 'default'
	if ($clean_code =~ /\$$param\s*=\s*(?:\$$param|\$[a-zA-Z_]\w*)\s*\|\|\s*([^;]+)/) {
		my $default = $1;
		$default =~ s/\s*;\s*$//;
		$default = $self->_clean_default_value($default);
		return $default if defined $default;
	}

	# Pattern 2: $param //= 'default_value'
	if ($clean_code =~ /\$$param\s*\/\/=\s*([^;]+)/) {
		my $default = $1;
		$default =~ s/\s*;\s*$//;
		$default = $self->_clean_default_value($default);
		return $default if defined $default;
	}

	# Pattern 3: $param = defined $param ? $param : 'default'
	# Also handles: $param = defined $arg ? $arg : 'default'
	if ($clean_code =~ /\$$param\s*=\s*defined\s+(?:\$$param|\$[a-zA-Z_]\w*)\s*\?\s*(?:\$$param|\$[a-zA-Z_]\w*)\s*:\s*([^;]+)/) {
		my $default = $1;
		$default =~ s/\s*;\s*$//;
		$default = $self->_clean_default_value($default);
		return $default if defined $default;
	}

	# Pattern 4: $param = 'default' unless defined $param;
	if ($clean_code =~ /\$$param\s*=\s*([^;]+?)\s+unless\s+defined\s+(?:\$$param|\$[a-zA-Z_]\w*)/) {
		my $default = $1;
		$default = $self->_clean_default_value($default);
		return $default if defined $default;
	}

	# Pattern 5: $param ||= 'default'
	if ($clean_code =~ /\$$param\s*\|\|=\s*([^;]+)/) {
		my $default = $1;
		$default =~ s/\s*;\s*$//;
		$default = $self->_clean_default_value($default);
		return $default if defined $default;
	}

	# Pattern 6: $param = $arg // 'default'
	if ($clean_code =~ /\$$param\s*=\s*(?:\$$param|\$[a-zA-Z_]\w*)\s*\/\/\s*([^;]+)/) {
		my $default = $1;
		$default =~ s/\s*;\s*$//;
		$default = $self->_clean_default_value($default);
		return $default if defined $default;
	}

	# Pattern 7: Multi-line: if (!defined $param) { $param = 'default'; }
	if ($clean_code =~ /if\s*\(\s*!defined\s+\$$param\s*\)\s*\{[^}]*\$$param\s*=\s*([^;]+)/s) {
		my $default = $1;
		$default =~ s/\s*;\s*$//;
		$default = $self->_clean_default_value($default);
		return $default if defined $default;
	}

	# Pattern 8: unless (defined $param) { $param = 'default'; }
	if ($clean_code =~ /unless\s*\(\s*defined\s+\$$param\s*\)\s*\{[^}]*\$$param\s*=\s*([^;]+)/s) {
		my $default = $1;
		$default =~ s/\s*;\s*$//;
		$default = $self->_clean_default_value($default);
		return $default if defined $default;
	}

	return undef;
}

# --------------------------------------------------
# _extract_test_hints
#
# Purpose:    Extract structured test hints from
#             a method's code and schema, including
#             boundary values, invalid inputs, and
#             valid input examples from POD.
#
# Entry:      $method - method hashref.
#             $schema - schema hashref as built so
#                       far by _analyze_method.
#
# Exit:       Returns a hints hashref with keys:
#             boundary_values, invalid_inputs,
#             equivalence_classes, valid_inputs.
#             Keys with empty arrays are deleted
#             before returning.
#
# Side effects: None.
# --------------------------------------------------
sub _extract_test_hints {
	my ($self, $method, $schema) = @_;

	my %hints = (
		boundary_values => [],
		invalid_inputs => [],
		equivalence_classes => [],
		valid_inputs => [],
	);

	my $code = $method->{body};
	return {} unless $code;

	$self->_extract_invalid_input_hints($code, \%hints);
	$self->_extract_boundary_value_hints($code, \%hints);

lib/App/Test/Generator/SchemaExtractor.pm  view on Meta::CPAN


		# If NOT from code (i.e., from POD), interpret escape sequences
		unless ($from_code) {
			$value =~ s/\\n/\n/g;
			$value =~ s/\\r/\r/g;
			$value =~ s/\\t/\t/g;
			$value =~ s/\\\\/\\/g;
		}
	}

	# Sometimes trailing ) is left on
	if($value !~ /^\(/) {
		$value =~ s/\)$//;
	}

	# Handle Perl empty hash (must be before numeric/boolean checks)
	if ($value =~ /^\{\s*\}$/) {
		return {};
	}

	# Handle Perl empty list/array
	if ($value =~ /^\[\s*\]$/) {
		return [];
	}

	# Handle numeric values
	if ($value =~ /^-?\d+(?:\.\d+)?$/) {
		if ($value =~ /\./) {
			return $value + 0;
		} else {
			return int($value);
		}
	}

	# Handle boolean keywords
	if ($value =~ /^(true|false)$/i) {
		return lc($1) eq 'true' ? 1 : 0;
	}

	# Handle Perl boolean constants
	if ($value eq '1') {
		return 1;
	} elsif ($value eq '0') {
		return 0;
	}

	# Handle undef
	if ($value eq 'undef') {
		return undef;
	}

	# Handle __PACKAGE__ and similar constants
	if ($value eq '__PACKAGE__') {
		return '__PACKAGE__';
	}

	# Remove surrounding parentheses
	$value =~ s/^\((.+)\)$/$1/;

	# Handle expressions we can't evaluate
	if ($value =~ /^\$[a-zA-Z_]/ || $value =~ /\(.*\)/) {
		return if($value =~ /^\$|\@|\%/);	# The default is a value, so who knows its type?
		# return $value;
	}

	return $value;
}

# --------------------------------------------------
# _validate_pod_code_agreement
#
# Purpose:    Compare POD parameter documentation
#             against code-inferred parameters and
#             return a list of disagreements when
#             strict_pod mode is enabled.
#
# Entry:      $pod_params  - hashref of parameters
#                            from POD analysis.
#             $code_params - hashref of parameters
#                            from code analysis.
#             $method_name - method name string,
#                            used for context in
#                            error messages.
#
# Exit:       Returns a list of disagreement
#             strings. Returns an empty list if
#             all parameters agree.
#
# Side effects: None.
#
# Notes:      Type mismatches are classified as
#             either 'compatible' (e.g. integer vs
#             number) or 'incompatible' via
#             _types_are_compatible. $self and
#             $class are excluded from undocumented
#             parameter warnings in appropriate
#             context.
# --------------------------------------------------
sub _validate_pod_code_agreement {
	my ($self, $pod_params, $code_params, $method_name) = @_;

	my @errors;

	# Get all parameter names from both sources
	my %all_params = map { $_ => 1 } (keys %$pod_params, keys %$code_params);

	foreach my $param (sort keys %all_params) {
		my $pod = $pod_params->{$param} || {};
		my $code = $code_params->{$param} || {};

		# Params from a =head3|4 Input formal spec are the authoritative API
		# definition — they are exempt from POD/code disagreement checks since
		# the spec takes precedence over heuristic code analysis.
		next if $pod->{_from_input_spec};

		# Check if parameter exists in both
		if (exists $pod_params->{$param} && !exists $code_params->{$param}) {
			push @errors, "Parameter '\$$param' documented in POD but not found in code signature";
			next;
		}



( run in 1.242 second using v1.01-cache-2.11-cpan-5735350b133 )