App-MARC-Validator

 view release on metacpan or  search on metacpan

Validator.pm  view on Meta::CPAN

package App::MARC::Validator;

use strict;
use warnings;

use App::MARC::Validator::Utils qw(obj_to_json);
use Class::Utils qw(set_params);
use Data::MARC::Validator::Report;
use DateTime;
use English;
use Getopt::Std;
use IO::Barf qw(barf);
use IO::Uncompress::AnyUncompress qw($AnyUncompressError);
use List::Util 1.33 qw(none);
use MARC::Batch;
use MARC::File::XML (BinaryEncoding => 'utf8', RecordFormat => 'MARC21');
use MARC::Validator 0.14;
use MARC::Validator::Filter;
use Unicode::UTF8 qw(encode_utf8);

our $VERSION = 0.09;

# Constructor.
sub new {
	my ($class, @params) = @_;

	# Create object.
	my $self = bless {}, $class;

	# Process parameters.
	set_params($self, @params);

	# Object.
	return $self;
}

# Run.
sub run {
	my $self = shift;

	# Process arguments.
	$self->{'_opts'} = {
		'd' => 0,
		'f' => 0,
		'h' => 0,
		'i' => '001',
		'l' => 0,
		'o' => undef,
		'p' => 0,
		'r' => 0,
		'u' => undef,
		'v' => 0,
	};
	if (! getopts('dfhi:lo:pru:v', $self->{'_opts'})
		|| $self->{'_opts'}->{'h'}) {

		$self->_usage;
		return 1;
	}
	if (! $self->{'_opts'}->{'f'}
		&& ! $self->{'_opts'}->{'l'}) {

		if (@ARGV < 1) {
			$self->_usage;
			return 1;
		}
		$self->{'_marc_xml_files'} = [@ARGV];
	}

Validator.pm  view on Meta::CPAN

	$self->_init_plugins;
	foreach my $marc_xml_file (@{$self->{'_marc_xml_files'}}) {
		my ($fh, $errno);
		if ($self->_open_marc_input($marc_xml_file, \$fh, \$errno)) {
			print STDERR "Cannot open file '$marc_xml_file'.";
			if (defined $errno) {
				print STDERR "\tErrno: $errno\n";
			}
			return 1;
		}
		my $marc_batch = eval {
			MARC::Batch->new('XML', $fh);
		};
		if ($EVAL_ERROR) {
			print STDERR "Cannot open MARC XML stream.\n";
			print STDERR "\tError: $EVAL_ERROR\n";
			return 1;
		}

		# XXX strict mode returns undef in case of warning.
		$marc_batch->strict_off;

		# Don't print warnings on stdout.
		$marc_batch->warnings_off;

		my $num = 0;
		my $previous_record;
		while (1) {
			$num++;
			my $record = eval {
				$marc_batch->next;
			};
			if ($EVAL_ERROR) {
				print STDERR "Cannot process file '$marc_xml_file', record '$num'.".
					(
						defined $previous_record
						? "Previous record is ".encode_utf8($previous_record->title)."\n"
						: ''
					);
				print STDERR "Error: $EVAL_ERROR\n";
				next;
			}
			if (! defined $record) {
				last;
			}
			$previous_record = $record;

			# Collect statistics.
			foreach my $plugin_obj (@{$self->{'_plugins'}}) {
				$plugin_obj->process($record);
			}
		}
	}
	$self->_postprocess_plugins;

	my @plugin_reports;
	foreach my $plugin_obj (@{$self->{'_plugins'}}) {
		push @plugin_reports, $plugin_obj->report;
	}
	my $report = Data::MARC::Validator::Report->new(
		'datetime' => DateTime->now,
		'plugins' => \@plugin_reports,
	);

	my $json = obj_to_json($self, $report);

	# Save to file.
	if (defined $self->{'_opts'}->{'o'}) {
		barf($self->{'_opts'}->{'o'}, encode_utf8($json));

	# Print to STDOUT.
	} else {
		print encode_utf8($json);
	}

	return 0;
}

sub _postprocess_plugins {
	my $self = shift;

	foreach my $plugin_obj (@{$self->{'_plugins'}}) {
		$plugin_obj->postprocess;
	}

	return;
}

sub _usage {
	my $self = shift;

	print STDERR "Usage: $0 [-d] [-f] [-h] [-i id] [-l] [-o output_file] [-p] [-r] [-u use_string] [-v] [--version] marc_xml_file..\n";
	print STDERR "\t-d\t\tDebug mode.\n";
	print STDERR "\t-f\t\tList of filter plugins.\n";
	print STDERR "\t-h\t\tPrint help.\n";
	print STDERR "\t-i id\t\tRecord identifier (default value is 001).\n";
	print STDERR "\t-l\t\tList of plugins.\n";
	print STDERR "\t-o output_file\tOutput file (default is STDOUT).\n";
	print STDERR "\t-p\t\tPretty print JSON output.\n";
	print STDERR "\t-r\t\tRecommendations.\n";
	print STDERR "\t-u use_string\tUse string to prefer plugin or filter (default situation is use all).\n";
	print STDERR "\t\t\te.g. plugin:MARC::Validator::Plugin::Field008\n";
	print STDERR "\t-v\t\tVerbose mode.\n";
	print STDERR "\t--version\tPrint version.\n";
	print STDERR "\tmarc_xml_file..\tMARC XML file(s).\n";

	return;
}

sub _use_plugins {
	my $self = shift;

	if (! defined $self->{'_opts'}->{'u'}) {
		return ();
	}
	my @use_options = split m/,/, $self->{'_opts'}->{'u'};
	my @use_plugins;
	foreach my $use_option (@use_options) {
		my ($type, $name) = split m/:/ms, $use_option, 2;
		if ($type eq 'plugin') {
			push @use_plugins, $name;
		}
	}

	return @use_plugins;
}

1;

__END__

=pod

=encoding utf8

=head1 NAME

App::MARC::Validator - Base class for marc-validator script.

=head1 SYNOPSIS

 use App::MARC::Validator;

 my $app = App::MARC::Validator->new;
 my $exit_code = $app->run;

=head1 METHODS

=head2 C<new>

 my $app = App::MARC::Validator->new;

Constructor.

Returns instance of object.

=head2 C<run>

 my $exit_code = $app->run;

Run MARC validation command line application.

Returns 1 for error, 0 for success.

=head1 ERRORS

 new():
         From Class::Utils::set_params():
                 Unknown parameter '%s'.

=head1 DEPENDENCIES

L<App::MARC::Validator::Utils>,
L<Class::Utils>,
L<Data::MARC::Validator::Report>,
L<DateTime>,
L<English>,
L<Getopt::Std>,
L<IO::Barf>,
L<IO::Uncompress::AnyUncompress>,
L<List::Util>,
L<MARC::Batch>,
L<MARC::File::XML>,
L<MARC::Validator>,
L<MARC::Validator::Filter>,
L<Unicode::UTF8>.

=head1 REPOSITORY

L<https://github.com/michal-josef-spacek/App-MARC-Validator>

=head1 AUTHOR

Michal Josef Špaček L<mailto:skim@cpan.org>

L<http://skim.cz>

=head1 LICENSE AND COPYRIGHT

© 2025-2026 Michal Josef Špaček

BSD 2-Clause License

=head1 ACKNOWLEDGEMENTS

Development of this software has been made possible by institutional support
for the long-term strategic development of the National Library of the Czech
Republic as a research organization provided by the Ministry of Culture of
the Czech Republic (DKRVO 2024–2028), Area 11: Linked Open Data.

=head1 VERSION

0.09

=cut



( run in 1.539 second using v1.01-cache-2.11-cpan-140bd7fdf52 )