LaTeXML

 view release on metacpan or  search on metacpan

t/170_grammar_coverage.t  view on Meta::CPAN

# Obtain the rule pairs from MathGrammar, which we want to exhaustively test:
my %grammar_dependencies = obtain_dependencies();

my $opts = LaTeXML::Common::Config->new(input_limit => 100, verbosity=>-2);

my $converter = LaTeXML->get_converter($opts);
$converter->prepare_session($opts);

my %tested_dependencies = ();

my @core_tests = parser_test_filenames();
for my $test (@core_tests) {
  note("grammar coverage $test...");
  my $regularized_log = '';
  my $response;
  my $log_handle;
  open($log_handle, ">>", \$regularized_log) or croak("Can't redirect STDERR to log! Dying...");
  {
    local *STDERR       = *$log_handle;
    binmode(STDERR, ':encoding(UTF-8)');
    $response = $converter->convert($test);
#  my $regularized_log = $response->{log};
    }
  # Preprocess split lines back to single lines, e.g.
  # 2|AnythingAn|>>Matched subrule:                    |
  #  |          |[modifierFormulae]<< (return value:   |
  # -- TO:
  # 2|AnythingAn|>>Matched subrule: [modifierFormulae]<< (return value:   |
  # Also:
  # 10|  bigop   |(consumed: [ SUMOP:sum:1])            |
  # 9|preScripte|>>Matched subrule: [$arg[0]]<< (return|
  #   |          |value: [<XMTok                        |

  $regularized_log =~ s/\:\s+\|\n\s*\|\s+\|\[/\: \[/g;
  note($response->{status});
  my @log_lines = split("\n", $regularized_log);
  my $prev_line = '';
  for my $line (@log_lines) {
    if ($line =~ /(\w+)\s*\|(?:(?:\>\>(?:\.*)Matched(?:\(keep\))? (?:subrule|production))|(?:\(consumed))\:\s*\[\s*(\w+|\$arg\[\d+\])/) {
      my $parent = $1;
      my $child = $2;
      if ($child =~ /^\$arg/) {
        if ($prev_line =~ /^\s*\d+\|\s*(\w+)\s*\|/) {
          $child = $1;
        }
      }
      if ($parent ne $child) {
        $tested_dependencies{$parent}{$child} = 1;
      }
    }
    $prev_line = $line;
  }
}

my $ok_count = 0;
my $missing_count = 0;
my $extra_count = 0;
my %missing = ();
my %extra = ();
delete $grammar_dependencies{'Start'}; # never reported in terse log
# Single lexeme top-level rules never parse, BECAUSE the grammar is never run on 1-lexeme formulae
delete $grammar_dependencies{'AnythingAn'}{"FLOATSUPERSCRIPT"};
delete $grammar_dependencies{'AnythingAn'}{"MODIFIER"};
# Reachable conceptually by an ambiguous grammar, but not in the RecDescent algorithm
# AnyOp variants are not reached as Formula variants take precedents (such as Factor's preScripted variants)
delete $grammar_dependencies{'AnyOp'}{"OPERATOR"};
delete $grammar_dependencies{'AnyOp'}{"addScripts"};
delete $grammar_dependencies{'AnyOp'}{"preScripted"};
delete $grammar_dependencies{'argPunct'}{'VERTBAR'};
delete $grammar_dependencies{'Expression'}{'punctExpr'}; # Unreachable, due to Formula -> punctExpr
delete $grammar_dependencies{'aSuperscri'}{'AnyOp'};
delete $grammar_dependencies{'aSuperscri'}{'Expression'};
# These are odd to have been recorded, since at least the intention is that they are preceded by
# aSubscript/aSuperscript. We have explicit tests for the relevant cases.
delete $grammar_dependencies{'Superscrip'}{'endPunct'};
delete $grammar_dependencies{'Subscript'}{'endPunct'};
# forbid rules should never match, don't check them here.
# TODO: We need tests for the always-failing productions!
delete $grammar_dependencies{'doubtArgs'}{'forbidArgs'};
delete $grammar_dependencies{'requireArg'};

# Needs regex enhancement

# preScripted -> bigop
# \sum ^2
# preScripted -> ATOM_OR_ID
# \frac12 _1$

for my $rule(grep {!/^_/} keys %tested_dependencies) {
  my $subrules = $tested_dependencies{$rule};
  for my $subrule(keys %$subrules) {
    if ($rule ne $subrule) {
      if ($grammar_dependencies{$rule}{$subrule}) {
        delete $grammar_dependencies{$rule}{$subrule};
        $ok_count += 1;
      } else {
        $extra_count += 1;
        $extra{$rule}{$subrule} = 1;
      }
    }
  }
}

for my $rule(keys %grammar_dependencies) {
  my $subrules = $grammar_dependencies{$rule} || ();
  for my $subrule (keys %$subrules) {
    if ($rule ne $subrule) {
      $missing_count += 1;
      $missing{$rule}{$subrule} = 1;
    }
  }
}

ok($ok_count > 100, "Tested a big subset of MathGrammar");
# print STDERR "Extra: \n", Dumper(\%extra);
is($missing_count, 0, "MathGrammar dependencies (currently tested in $ok_count cases), were not matched in the following cases: \n".Dumper(\%missing));

# Allow these for now, until we figure out how to check for the (s) variant rules
# for example: (endPunct Formula { [$item[1],$item[2]]; })(s)
# is($extra_count, 0, "Tests had rules which were matched, but not recorded in grammar metadata: \n".Dumper(\%extra));



( run in 2.311 seconds using v1.01-cache-2.11-cpan-75ffa21a3d4 )