CGI-Info

 view release on metacpan or  search on metacpan

scripts/generate_index.pl  view on Meta::CPAN

#!/usr/bin/env perl

use strict;
use warnings;
use autodie qw(:all);

use Cwd qw(abs_path);
use Data::Dumper;
use File::Basename qw(dirname basename);
use File::Glob ':glob';
use File::Path qw(make_path);
use File::Slurp;
use File::Spec;
use File::stat;
use Getopt::Long qw(GetOptions);
use IPC::Run3;
use JSON::MaybeXS;
use List::Util;
use POSIX qw(strftime);
use HTML::Entities;
use HTTP::Tiny;
use Readonly;
use Time::HiRes qw(sleep);
use URI::Escape qw(uri_escape);
use version;
use WWW::RT::CPAN;
use YAML::XS qw(Dump LoadFile);

=head1 NAME

generate_index.pl - Test coverage dashboard generator

=head1 DESCRIPTION

C<generate_index.pl> generates an HTML test coverage dashboard for
publication on GitHub Pages, combining four sources of test quality
data into a single report:

=over 4

=item * B<Statement and branch coverage> from L<Devel::Cover>, showing
which lines and branches were exercised by the test suite.

=item * B<LCSAJ path coverage> (TER3) from the LCSAJ runtime debugger,
showing which control-flow paths were executed. Displayed as blue
(covered) or red (uncovered) dots on per-file mutation pages.

=item * B<Mutation testing results> from C<bin/app-test-generator-mutate>,
showing which injected faults the test suite detected (killed) and
which it missed (survived).

=item * B<CPAN Testers failure analysis>, showing which Perl versions
and operating systems are failing, with automatic root cause detection
including Perl version cliffs, locale sensitivity, and dependency
version cliffs.

=back

In addition to the dashboard, the script drives the mutation-guided
test generation pipeline described in
L<App::Test::Generator/MUTATION-GUIDED TEST GENERATION>. Surviving
mutants are used to automatically generate new test stubs and fuzz
schemas that target the exact boundary conditions the test suite
missed:

=over 4

=item * C<--generate_mutant_tests=DIR> produces TODO stub files in
C<DIR/> for all surviving mutants, grouped by source file and
deduplicated by line.

=item * C<--generate_test=mutant> additionally attempts to produce
runnable YAML schemas in C<DIR/conf/> for NUM_BOUNDARY survivors
using L<App::Test::Generator::SchemaExtractor>.

=item * C<--generate_fuzz> augments existing schemas in C<DIR/conf/>
with boundary values from surviving mutants, writing timestamped
copies that are picked up automatically by C<t/fuzz.t>.

=back

The script is designed to be shared across projects. Copy it into the
C<scripts/> directory of each project that uses it:

    cp ../App-Test-Generator/scripts/generate_index.pl scripts/

It is invoked automatically by C<scripts/generate_test_dashboard> on
each CI push via C<.github/workflows/dashboard.yml>.

=head1 SYNOPSIS

  https://$github_user.github.io/$github_repo/coverage/

=head1 INPUTS

  cover_html/cover.json     - Devel::Cover JSON report (statement/branch/condition)
  mutation.json             - Mutation testing results from app-test-generator-mutate
  cover_html/lcsaj_hits.json - LCSAJ path hit data from the LCSAJ runtime debugger
  cover_html/mutation_html/lib/ - Per-file LCSAJ path definitions (.lcsaj.json)
  coverage_history/*.json   - Historical coverage snapshots for the trend chart

=head1 OUTPUTS

  cover_html/index.html     - Main dashboard (coverage table, trend chart,
                              CPAN Testers failures, mutation report)
  cover_html/mutation_html/ - Per-file mutation heatmap pages

=head1 OPTIONS

  --generate_mutant_tests=DIR
      Generate a timestamped test stub file in DIR (typically 't/') for
      surviving mutants. The file is named mutant_YYYYMMDD_HHMMSS.t and
      contains:
        - TODO test stubs for High/Medium difficulty survivors, with
          boundary value suggestions, environment variable hints, and
          the enclosing subroutine name for navigation context
        - Comment-only hints for Low difficulty survivors
      Multiple mutations on the same source line are deduplicated into
      one stub - one good test kills all variants on that line.
      File is skipped entirely if there are no survivors to report.
      If not given, no test stubs are generated.

  --generate_test=CLASS
      When combined with --generate_mutant_tests=DIR, attempts to produce
      runnable test artefacts for surviving mutants rather than TODO stubs.

      Currently supported classes:

        mutant  For NUM_BOUNDARY survivors, calls
                App::Test::Generator::SchemaExtractor to extract the schema
                for the enclosing subroutine and augments it with the
                boundary value from the mutant (plus one value either side).
                The resulting YAML schema is written to DIR/conf/ and is
                picked up automatically by t/fuzz.t on the next test run.
                Falls back to a TODO stub if SchemaExtractor fails, the
                enclosing sub cannot be determined, or the extracted schema
                confidence is too low (very_low or none).

      This option is designed to accept additional classes in future, for
      example corpus-driven or property-based test generation.
      If not given, only TODO stubs are produced.

   --generate_fuzz
      Scans t/conf/ for existing YAML schema files and augments copies
      of them with boundary values extracted from surviving NUM_BOUNDARY
      mutants whose enclosing subroutine matches the schema's function
      field. The original schema is never modified. Augmented copies are
      written to t/conf/mutant_fuzz_YYYYMMDD_HHMMSS_FUNCTION.yml and
      are picked up automatically by t/fuzz.t on the next test run.

      Schemas whose filename already starts with mutant_fuzz_ are skipped
      to avoid augmenting previously augmented schemas. Schemas with no
      matching survivors are skipped (with a note if --verbose is active).
      New boundary values are merged into whichever edge key already
      exists in the schema (edge_case_array or edge_cases), with
      deduplication against existing values.

      This flag is independent of --generate_test and can be used alone.

=head1 DASHBOARD SECTIONS

  Coverage Table    - Per-file statement/branch/condition/subroutine
                      percentages with delta vs previous snapshot,
                      sortable columns, and sparkline trend per file
  Coverage Trend    - Chart of total coverage over recent commits with
                      linear regression line, zoom and pan support
  RT Issues         - Count of open RT tickets for the distribution
  CPAN Testers      - Failure table for the current release, with
                      Perl version cliff detection, locale analysis,
                      dependency version cliff detection, and root
                      cause confidence scoring
  Mutation Report   - Per-file mutation score (killed/survived/total),
                      cyclomatic complexity, and TER3 (LCSAJ path
                      coverage) with raw fraction
  Per-file Pages    - Line-by-line mutation heatmap with TER1/TER2/TER3
                      metrics, LCSAJ path markers, and expandable
                      mutant details with suggested tests

=head1 DEPENDENCIES

  Cwd, Data::Dumper, File::Basename, File::Glob, File::Path,
  File::Slurp, File::Spec, File::stat, Getopt::Long, HTML::Entities,
  HTTP::Tiny, IPC::Run3, JSON::MaybeXS, List::Util, POSIX,
  Readonly, Time::HiRes, URI::Escape, WWW::RT::CPAN, version

=head1 AUTHOR

  Nigel Horne <njh@nigelhorne.com>

=cut

my ($github_user, $github_repo);

if (my $repo = $ENV{GITHUB_REPOSITORY}) {
	($github_user, $github_repo) = split m{/}, $repo, 2;
} else {
	die 'What repo are you?';
}

my $package_name = $github_repo;
$package_name =~ s/\-/::/g;

Readonly my %config => (
	github_user => 'nigelhorne',
	github_repo => $github_repo,
	package_name => $package_name,
	low_threshold => 70,
	med_threshold => 90,
	max_points => 10,	# Only display the last 10 commits in the coverage trend graph
	cover_db            => 'cover_html/cover.json',      # Devel::Cover JSON output
	mutation_db         => 'mutation.json',
	mutation_dir        => 'coverage/mutation_html',     # hrefs in published pages
	mutation_output_dir => 'cover_html/mutation_html',   # where files are written
	lcsaj_root => 'cover_html/mutation_html/lib',
	lcsaj_hits_file     => 'cover_html/lcsaj_hits.json', # Runtime.pm writes here
	output => 'cover_html/index.html',	# published to gh-pages
	max_retry => 5,
	min_locale_samples => 3,
	verbose => 1,
);

# --------------------------------------------------
# Parse command-line options.
# --generate_mutant_tests=dir enables test stub
# generation into the named directory.
# --generate_test=CLASS enables schema generation
# for surviving mutants.
# --generate_fuzz enables fuzz schema augmentation
# from surviving mutants.
# --------------------------------------------------
my $mutant_test_dir;
my $generate_test;
my $generate_fuzz;
GetOptions(
	'generate_mutant_tests=s' => \$mutant_test_dir,
	'generate_test=s'         => \$generate_test,
	'generate_fuzz'           => \$generate_fuzz,
) or die "Usage: $0 [--generate_mutant_tests=DIR] [--generate_test=mutant] [--generate_fuzz]";

# -------------------------------
# Dependency correlation analysis
# -------------------------------
my $MAX_REPORTS_PER_GRADE = 20;	# safety rail
my $ENABLE_DEP_ANALYSIS = 1;

# Read and decode data
my $cover_db = eval { decode_json(read_file($config{cover_db})) };
my $mutation_db = eval { decode_json(read_file($config{mutation_db})) };

# --------------------------------------------------
# Compute coverage percentage from only our own files,
# excluding absolute paths (installed CPAN modules)
# which inflate Devel::Cover's pre-aggregated Total.
# --------------------------------------------------
my ($coverage_pct, $badge_color) = (0, 'red');

if($cover_db->{summary}) {
	my ($sum, $count) = (0, 0);
	for my $f (keys %{ $cover_db->{summary} }) {
		next if $f eq 'Total';
		next if $f =~ /^\//;    # skip absolute paths
		$sum += $cover_db->{summary}{$f}{total}{percentage} // 0;
		$count++;
	}
	if($count) {
		my $pct = _own_file_coverage_pct($cover_db->{summary});

		$coverage_pct = defined $pct ? int($pct) : 0;
		$badge_color  = $coverage_pct > $config{med_threshold} ? 'brightgreen'
			: $coverage_pct > $config{low_threshold} ? 'yellow'
			: 'red';
	}
}

Readonly my $coverage_badge_url => "https://img.shields.io/badge/coverage-${coverage_pct}%25-${badge_color}";

# Start HTML
my @html;	# build in array, join later

scripts/generate_index.pl  view on Meta::CPAN

		td.neutral { color: gray; }
		/* Show cursor points on the headers to show that they are clickable */
		th { background-color: #f2f2f2; cursor: pointer; }
		th.sortable {
			cursor: pointer;
			user-select: none;
			white-space: nowrap;
		}
		th .arrow {
			color: #aaa;	/* dimmed for inactive */
			font-weight: normal;
		}
		th .arrow.active {
			color: #000;	/* dark for active */
			font-weight: bold;
		}
		.sparkline {
			display: inline-block;
			vertical-align: middle;
		}
		tr.cpan-fail td {
			background-color: #fdd;
		}
		tr.cpan-unknown td {
			background-color: #eee;
			color: #666;
		}
		tr.cpan-na td {
			background-color: #ffffde;
			color: #666;
		}
		.new-failure {
			background: #c00;
			color: #fff;
			font-weight: bold;
			padding: 2px 6px;
			border-radius: 4px;
			font-size: 0.85em;
		}
		.notice {
			padding: 8px 12px;
			margin: 10px 0;
			border-radius: 4px;
			font-size: 0.95em;
		}
		.notice strong {
			font-weight: bold;
		}
		.notice.perl-version-cliff {
			background-color: #fff3cd; /* soft amber */
			border: 1px solid #ffeeba;
			color: #856404;
		}
		.notice.perl-version-cliff a {
			color: #533f03;
			text-decoration: underline;
		}
		.notice.perl-version-cliff a:hover {
			text-decoration: none;
		}
		.notice.locale-cliff {
			border-left: 4px solid #d97706;
			background: #fffbeb;
			padding: 0.5em 1em;
		}
		.notice.rt-issues {
			background: #fff6e5;
			border-left: 4px solid #d9822b;
		}
		table.root-causes {
			border-collapse: collapse;
			width: 100%;
			margin-bottom: 1.5em;
		}
		table.root-causes th,
		table.root-causes td {
			border: 1px solid #ccc;
			padding: 8px;
			vertical-align: top;
		}
		table.root-causes tr.high {
			background-color: #dfd;
		}
		table.root-causes tr.med {
			background-color: #ffd;
		}
		table.root-causes tr.low {
			background-color: #fdd;
		}
	</style>
</head>
<body>
<div class="badges">
	<a href="https://github.com/$config{github_user}/$config{github_repo}">
		<img src="https://img.shields.io/github/stars/$config{github_user}/$config{github_repo}?style=social" alt="GitHub stars">
	</a>
	<img src="$coverage_badge_url" alt="Coverage badge">
</div>
<h1>$config{package_name}</h1><h2>Coverage Report</h2>
<table data-sort-col="0" data-sort-order="asc">
<!-- Make the column headers clickable -->
<thead>
<tr>
	<th class="sortable" onclick="sortTable(this, 0)"><span class="label">File</span> <span class="arrow active">&#x25B2;</span></th>
	<th class="sortable" onclick="sortTable(this, 1)"><span class="label">Stmt</span> <span class="arrow">&#x25B2;</span></th>
	<th class="sortable" onclick="sortTable(this, 2)"><span class="label">Branch</span> <span class="arrow">&#x25B2;</span></th>
	<th class="sortable" onclick="sortTable(this, 3)"><span class="label">Cond</span> <span class="arrow">&#x25B2;</span></th>
	<th class="sortable" onclick="sortTable(this, 4)"><span class="label">Sub</span> <span class="arrow">&#x25B2;</span></th>
	<th class="sortable" onclick="sortTable(this, 5)"><span class="label">Total</span> <span class="arrow">&#x25B2;</span></th>
	<th class="sortable" onclick="sortTable(this, 6)"><span class="label">&Delta;</span> <span class="arrow">&#x25B2;</span></th>
</tr>
</thead>

<tbody>
HTML

my @history_files = bsd_glob("coverage_history/*.json");

# Cache historical data instead of reading for each file
my %historical_cache;
for my $hist_file (@history_files) {

scripts/generate_index.pl  view on Meta::CPAN

					);
				}

				push @html, '</ul>';
			}

			my $perl_cliff = detect_perl_version_cliff(
				\@fail_reports,
				\@pass_reports,
			);

			if ($perl_cliff) {
				my $perl_cutoff = parse_version($perl_cliff->{fails_up_to});

				my $fail_support = 0;
				my $pass_contra = 0;

				for my $r (@fail_reports) {
					next unless $r->{perl};
					$fail_support++ if parse_version($r->{perl}) < $perl_cutoff;
				}

				for my $r (@pass_reports) {
					next unless $r->{perl};
					$pass_contra++ if parse_version($r->{perl}) < $perl_cutoff;
				}
				my ($score, $label) = confidence_score(
					fail => $fail_support,
					pass => $pass_contra,
				);

				my $confidence_html = confidence_badge_html(
					$score, $label,
					$fail_support, $pass_contra,
				);

				my $delta = perldelta_url($perl_cliff->{passes_from});

				push @html,
					'<p class="notice perl-version-cliff">',
					sprintf(
						'Fails on Perl &leq; %s; passes on Perl &geq; %s. ',
						$perl_cliff->{fails_up_to},
						$perl_cliff->{passes_from},
					),
					sprintf(
						'<a href="%s" target="_blank">See perldelta for this release</a>',
						$delta,
					),
					" $confidence_html</p>";
			}
			push @html, '<h3>Failure Summary (all reports)</h3>';
			push @html, '<ul>';

			my %clusters = (
				perl_series => {},
				os => {},
				perl_os => {},
			);

			my %locale_stats;

			for my $r (@fail_reports) {
				my $perl = perl_series($r->{perl});
				my $os = $r->{osname} // 'unknown';

				$clusters{perl_series}{$perl}++ if $perl;
				$clusters{os}{$os}++;
				$clusters{perl_os}{"$perl / $os"}++ if $perl;

				if(lc($r->{grade} // '') eq 'fail') {
					# Don't include NA or Unknown in this list
					my $locale = extract_locale($r) // 'unknown';
					$locale_stats{$locale}{fail}++;
				}
			}

			my @top_perl_series = sort { $clusters{perl_series}{$b} <=> $clusters{perl_series}{$a} }
				keys %{ $clusters{perl_series} };

			my @top_os = sort { $clusters{os}{$b} <=> $clusters{os}{$a} }
				keys %{ $clusters{os} };

			my @top_perl_os = sort { $clusters{perl_os}{$b} <=> $clusters{perl_os}{$a} }
				keys %{ $clusters{perl_os} };

			if (@top_perl_series) {
				my $k = $top_perl_series[0];
				push @html, sprintf(
					'<li><b>Perl %s.x</b>: %d failures</li>',
					$k,
					$clusters{perl_series}{$k},
				);
				my $total = scalar @fail_reports;
				my $ratio_pct = ($clusters{perl_series}{$k} / $total) * 100;

				if ($ratio_pct >= $config{low_threshold}) {
					push @html, sprintf(
						'<p><em>%d%% of failures occur on Perl %s.x</em></p>',
						int($ratio_pct),
						$k,
					);
				}
			}

			if (@top_os) {
				my $k = $top_os[0];
				push @html, sprintf(
					'<li><b>%s</b>: %d failures</li>',
					$k,
					$clusters{os}{$k},
				);
			}

			if (@top_perl_os) {
				my $k = $top_perl_os[0];
				push @html, sprintf(
					'<li><b>%s</b>: %d failures</li>',
					$k,
					$clusters{perl_os}{$k},
				);
			}

			push @html, '</ul>';

			my @locale_clusters;

			for my $r (@pass_reports) {
				my $locale = extract_locale($r) // 'unknown';
				$locale_stats{$locale}{pass}++;
			}

			for my $loc (keys %locale_stats) {
				next if $loc eq 'unknown';

				my $fail = $locale_stats{$loc}{fail} // 0;
				my $pass = $locale_stats{$loc}{pass} // 0;
				my $total = $fail + $pass;

				next if $total < $config{min_locale_samples};

				my $ratio = $fail / $total * 100;

				if ($ratio >= $config{low_threshold} && is_non_english_locale($loc)) {
					push @locale_clusters, {
						locale => $loc,
						fail => $fail,
						pass => $pass,
						ratio => $ratio,
					};
				}
			}
			if(scalar(@locale_clusters)) {
				push @html,
					'<h3>Locale-sensitive failures detected</h3>',
					'<div class="notice locale-cliff">',
					'<ul>';
				foreach my $locale(@locale_clusters) {
					push @html, "<li><code>$locale->{locale}</code> - $locale->{fail} FAIL / $locale->{pass} PASS ($locale->{ratio}%)</li>";
				}
				push @html, '</ul>', '</div>';
			}

			my @fail_perl_versions = extract_perl_versions(\@fail_reports);
			my @pass_perl_versions = extract_perl_versions(\@pass_reports);

			my @root_causes = detect_root_causes(
				fail_reports => \@fail_reports,
				pass_reports => \@pass_reports,
				fail_perl_versions => \@fail_perl_versions,
				pass_perl_versions => \@pass_perl_versions,
			);

			# warn 'Root causes found: ', scalar(@root_causes) . "\n";
			# warn 'Pass reports: ', scalar(@pass_reports) . "\n";

			if (@root_causes) {
				push @html, <<'HTML';
<h3>Likely Root Causes</h3>
<table class="root-causes">
<thead>
<tr>
	<th>Cause</th>
	<th>Confidence</th>
	<th>Evidence</th>
</tr>
</thead>
<tbody>
HTML

				for my $rc (@root_causes) {
					my $confidence_pct = int($rc->{confidence} * 100);

					my $confidence_class =
						$confidence_pct >= $config{med_threshold} ? 'high'
						: $confidence_pct >= $config{low_threshold} ? 'med'
						: 'low';

					my $confidence_label =
						$confidence_class eq 'high' ? 'Strong'
						: $confidence_class eq 'med' ? 'Moderate'
						: 'Weak';

					my $evidence_html = join(
						'',
						map { "<li>$_</li>" } @{ $rc->{evidence} || [] }
					);

					my $label = $rc->{label};

					# Optional perldelta link
					if ($rc->{type} eq 'perl' && $rc->{perldelta}) {
						$label .= sprintf(
							q{ (<a href="%s" target="_blank">perldelta</a>)},
							$rc->{perldelta}
						);
					}

					push @html, sprintf(<<'ROW',

scripts/generate_index.pl  view on Meta::CPAN


sub perldelta_url {
	my ($v) = @_;
	my ($maj, $min) = $v =~ /^v?(\d+)\.(\d+)/;
	return "https://perldoc.perl.org/perl${maj}${min}0delta";
}

sub confidence_score {
	my (%args) = @_;

	my $fail = $args{fail} // 0;
	my $pass = $args{pass} // 0;

	return (0, 'none') if ($fail + $pass) == 0;

	my $score = $fail / ($fail + $pass);

	# Convert config thresholds from percent → fraction
	my $med = ($config{med_threshold} // 90) / 100;
	my $low = ($config{low_threshold} // 70) / 100;

	my $label =
		$score >= $med ? 'strong' :
		$score >= $low ? 'moderate' :
		'weak';

	return ($score, $label);
}

sub confidence_badge_html {
	my ($score, $label, $fail, $pass) = @_;

	my %class_for = (
		strong => 'badge-good',
		moderate => 'badge-warn',
		weak => 'badge-bad',
		none => 'badge-bad',
	);

	my $pct = sprintf('%.0f%%', $score * 100);

	return sprintf(
		q{<span class="coverage-badge %s" title="%d fails, %d passes">%s confidence</span>},
		$class_for{$label} // 'badge-bad',
		$fail, $pass,
		ucfirst($label)
	);
}

sub perl_series {
	my $perl = $_[0];
	return unless defined $perl;

	# map "5.16.3" to "5.16"
	if ($perl =~ /^(\d+\.\d+)/) {
		return $1;
	}
	return;
}

sub extract_locale {
	my $r = $_[0];

	# Preferred: explicit environment
	for my $k (qw(LANG LC_ALL LC_CTYPE)) {
		if (my $v = $r->{env}{$k}) {
			return $v;
		}
	}

	# Fallback: scan report body
	if (my $body = $r->{raw} || $r->{body}) {
		if ($body =~ /\b([a-z]{2}_[A-Z]{2})\b/) {
			return $1;
		}
	}

	my $url = "https://api.cpantesters.org/v3/report/$r->{guid}";

	my $res = $http->get($url);
	return unless $res->{success};

	my $report = eval { decode_json($res->{content}) };

	if($report->{result}->{output}->{uncategorized} =~ /\b([a-z]{2}_[A-Z]{2})\b/) {
		return $1;
	}
}

sub is_non_english_locale {
	my $locale = $_[0];

	return 0 unless $locale;

	# Treat C / POSIX / en_* as English
	return 0 if $locale =~ /^(C|POSIX|en(_|$))/i;

	return 1;
}

sub parse_version {
	my $v = $_[0];
	return eval { version->parse($v) };
}

sub fetch_open_rt_ticket_count {
	my $dist = $_[0];

	my @rc = @{WWW::RT::CPAN::list_dist_active_tickets(dist => $dist)};

	# Defensive checks
	return undef unless @rc >= 3;
	return undef unless $rc[0] == 200 && $rc[1] eq 'OK';

	my $tickets = $rc[2] && ref $rc[2] eq 'ARRAY' ? $rc[2] : [];

	return scalar @$tickets;
}

sub detect_os_root_cause {
	my ($reports, $config) = @_;

	my %count;
	$count{ $_->{osname} // 'unknown' }++ for @$reports;

	my $total = @$reports;
	return unless $total >= 3;

	for my $os (keys %count) {
		my $ratio = $count{$os} / $total;
		next unless $ratio >= ($config->{med_threshold} / 100);

		return {
			type => 'os',
			label => "OS-specific behavior ($os)",
			confidence => sprintf("%.2f", $ratio),
			evidence => [
				sprintf('%d/%d failures on %s', $count{$os}, $total, $os),
					"Passes observed on other operating systems",
				],
		};
	}

	return;
}

sub detect_perl_version_root_cause {
	my ($fail_versions, $pass_versions) = @_;

	return unless @$fail_versions && @$pass_versions;

	my $max_fail = List::Util::max(@$fail_versions);
	my $min_pass = List::Util::min(@$pass_versions);

	return unless $max_fail < $min_pass;

	return {
		type => 'perl',
		label => "Perl version regression (Perl &lt; $min_pass)",
		confidence => 1.00,
		evidence => [
			"All failures on Perl &leq; $max_fail",
			"All passes on Perl &geq; $min_pass",
		],
		perldelta => perldelta_url($min_pass),
	};
}

sub detect_locale_root_cause {
	my ($reports, $config) = @_;

	my %count;
	my $total = 0;

	for my $r (@$reports) {
		# Can get FPs if we take NA or Unknown into account
		if(lc($r->{grade} // '') eq 'fail') {
			my $loc = extract_locale($r) or next;
			next if $loc =~ /^en_/i;
			$count{$loc}++;
			$total++;
		}
	}

	return unless $total >= 2;

	for my $loc (keys %count) {
		my $ratio = $count{$loc} / $total;
		next unless $ratio >= ($config->{low_threshold} / 100);

		return {
			type => 'locale',
			label => "Locale-sensitive behavior ($loc)",
			confidence => sprintf("%.2f", $ratio),
			evidence => [
				"$count{$loc}/$total failures with LANG=$loc",
				'English locales show fewer or no failures',
			],
		};
	}

	return;
}

sub detect_root_causes {
	my (%args) = @_;
	my @hints;

	# Check for universal failure pattern first — if present,
	# it is almost certainly the most important root cause
	push @hints, detect_universal_failure(
		$args{fail_reports} || [],
		$args{pass_reports} || [],
	);

	push @hints, detect_os_root_cause($args{fail_reports}, \%config) if $args{fail_reports};
	push @hints, detect_locale_root_cause($args{fail_reports}, \%config);

	if ($args{fail_perl_versions} && $args{pass_perl_versions}) {
		push @hints,
			detect_perl_version_root_cause(
				$args{fail_perl_versions},
				$args{pass_perl_versions},
			);
	}

	push @hints, detect_scattered_failures(
		$args{fail_reports} || [],
		$args{pass_reports} || [],
	);

	@hints = grep { defined } @hints;
	@hints = sort { $b->{confidence} <=> $a->{confidence} } @hints;

	return @hints;
}

# --------------------------------------------------
# detect_scattered_failures
#
# Purpose:    Detect when failures and passes coexist
#             across the same Perl versions and OS
#             types with no clear cliff pattern,
#             suggesting flaky tests or optional
#             dependency differences rather than a
#             compatibility issue.
#
# Entry:      $fail_reports - arrayref of fail hashrefs
#             $pass_reports - arrayref of pass hashrefs
#
# Exit:       Returns a root cause hashref, or undef
#             if the pattern is not present.
#
# Side effects: None.
#
# Notes:      Triggered when: there are failures on 3+
#             Perl versions, passes exist on some of
#             the same versions, and no version cliff
#             is detectable. Confidence is intentionally
#             low since this is a weak signal.
# --------------------------------------------------
sub detect_scattered_failures {
	my ($fail_reports, $pass_reports) = @_;

	return unless @{$fail_reports} >= 3 && @{$pass_reports} >= 3;

	# Build sets of Perl versions seen in each grade
	my %fail_perls = map { perl_series($_->{perl}) => 1 }
		grep { $_->{perl} } @{$fail_reports};
	my %pass_perls = map { perl_series($_->{perl}) => 1 }
		grep { $_->{perl} } @{$pass_reports};

	# Count how many Perl series appear in both fail and pass
	my $overlap = grep { exists $pass_perls{$_} } keys %fail_perls;

	# Need significant overlap — failures and passes on same versions
	return unless $overlap >= 2;



( run in 2.099 seconds using v1.01-cache-2.11-cpan-ceb78f64989 )