LaTeXML
view release on metacpan or search on metacpan
t/170_grammar_coverage.t view on Meta::CPAN
# Obtain the rule pairs from MathGrammar, which we want to exhaustively test:
my %grammar_dependencies = obtain_dependencies();
my $opts = LaTeXML::Common::Config->new(input_limit => 100, verbosity=>-2);
my $converter = LaTeXML->get_converter($opts);
$converter->prepare_session($opts);
my %tested_dependencies = ();
my @core_tests = parser_test_filenames();
for my $test (@core_tests) {
note("grammar coverage $test...");
my $regularized_log = '';
my $response;
my $log_handle;
open($log_handle, ">>", \$regularized_log) or croak("Can't redirect STDERR to log! Dying...");
{
local *STDERR = *$log_handle;
binmode(STDERR, ':encoding(UTF-8)');
$response = $converter->convert($test);
# my $regularized_log = $response->{log};
}
# Preprocess split lines back to single lines, e.g.
# 2|AnythingAn|>>Matched subrule: |
# | |[modifierFormulae]<< (return value: |
# -- TO:
# 2|AnythingAn|>>Matched subrule: [modifierFormulae]<< (return value: |
# Also:
# 10| bigop |(consumed: [ SUMOP:sum:1]) |
# 9|preScripte|>>Matched subrule: [$arg[0]]<< (return|
# | |value: [<XMTok |
$regularized_log =~ s/\:\s+\|\n\s*\|\s+\|\[/\: \[/g;
note($response->{status});
my @log_lines = split("\n", $regularized_log);
my $prev_line = '';
for my $line (@log_lines) {
if ($line =~ /(\w+)\s*\|(?:(?:\>\>(?:\.*)Matched(?:\(keep\))? (?:subrule|production))|(?:\(consumed))\:\s*\[\s*(\w+|\$arg\[\d+\])/) {
my $parent = $1;
my $child = $2;
if ($child =~ /^\$arg/) {
if ($prev_line =~ /^\s*\d+\|\s*(\w+)\s*\|/) {
$child = $1;
}
}
if ($parent ne $child) {
$tested_dependencies{$parent}{$child} = 1;
}
}
$prev_line = $line;
}
}
my $ok_count = 0;
my $missing_count = 0;
my $extra_count = 0;
my %missing = ();
my %extra = ();
delete $grammar_dependencies{'Start'}; # never reported in terse log
# Single lexeme top-level rules never parse, BECAUSE the grammar is never run on 1-lexeme formulae
delete $grammar_dependencies{'AnythingAn'}{"FLOATSUPERSCRIPT"};
delete $grammar_dependencies{'AnythingAn'}{"MODIFIER"};
# Reachable conceptually by an ambiguous grammar, but not in the RecDescent algorithm
# AnyOp variants are not reached as Formula variants take precedents (such as Factor's preScripted variants)
delete $grammar_dependencies{'AnyOp'}{"OPERATOR"};
delete $grammar_dependencies{'AnyOp'}{"addScripts"};
delete $grammar_dependencies{'AnyOp'}{"preScripted"};
delete $grammar_dependencies{'argPunct'}{'VERTBAR'};
delete $grammar_dependencies{'Expression'}{'punctExpr'}; # Unreachable, due to Formula -> punctExpr
delete $grammar_dependencies{'aSuperscri'}{'AnyOp'};
delete $grammar_dependencies{'aSuperscri'}{'Expression'};
# These are odd to have been recorded, since at least the intention is that they are preceded by
# aSubscript/aSuperscript. We have explicit tests for the relevant cases.
delete $grammar_dependencies{'Superscrip'}{'endPunct'};
delete $grammar_dependencies{'Subscript'}{'endPunct'};
# forbid rules should never match, don't check them here.
# TODO: We need tests for the always-failing productions!
delete $grammar_dependencies{'doubtArgs'}{'forbidArgs'};
delete $grammar_dependencies{'requireArg'};
# Needs regex enhancement
# preScripted -> bigop
# \sum ^2
# preScripted -> ATOM_OR_ID
# \frac12 _1$
for my $rule(grep {!/^_/} keys %tested_dependencies) {
my $subrules = $tested_dependencies{$rule};
for my $subrule(keys %$subrules) {
if ($rule ne $subrule) {
if ($grammar_dependencies{$rule}{$subrule}) {
delete $grammar_dependencies{$rule}{$subrule};
$ok_count += 1;
} else {
$extra_count += 1;
$extra{$rule}{$subrule} = 1;
}
}
}
}
for my $rule(keys %grammar_dependencies) {
my $subrules = $grammar_dependencies{$rule} || ();
for my $subrule (keys %$subrules) {
if ($rule ne $subrule) {
$missing_count += 1;
$missing{$rule}{$subrule} = 1;
}
}
}
ok($ok_count > 100, "Tested a big subset of MathGrammar");
# print STDERR "Extra: \n", Dumper(\%extra);
is($missing_count, 0, "MathGrammar dependencies (currently tested in $ok_count cases), were not matched in the following cases: \n".Dumper(\%missing));
# Allow these for now, until we figure out how to check for the (s) variant rules
# for example: (endPunct Formula { [$item[1],$item[2]]; })(s)
# is($extra_count, 0, "Tests had rules which were matched, but not recorded in grammar metadata: \n".Dumper(\%extra));
( run in 2.311 seconds using v1.01-cache-2.11-cpan-75ffa21a3d4 )