Catmandu-MARC

 view release on metacpan or  search on metacpan

lib/Catmandu/Importer/MARC/Line.pm  view on Meta::CPAN


    $fixer->fix($importer)->each(sub {
        my $item = shift;
        printf "title: %s\n" , $item->{title};
    });

=head1 CONFIGURATION

=over

=item file

Read input from a local file given by its path. Alternatively a scalar
reference can be passed to read from a string.

=item fh

Read input from an L<IO::Handle>. If not specified, L<Catmandu::Util::io> is used to
create the input stream from the C<file> argument or by using STDIN.

=item encoding

Binmode of the input stream C<fh>. Set to C<:utf8> by default.

=item fix

An ARRAY of one or more fixes or file scripts to be applied to imported items.

=back

=head1 METHODS

Every Catmandu::Importer is a Catmandu::Iterable all its methods are inherited.

=head1 AUTHOR

Johann Rolschewski,  E<lt>jorol at cpanE<gt>

=head1 SEE ALSO

L<Catmandu::Importer>,
L<Catmandu::Iterable>

=cut

package Catmandu::Importer::MARC::Line;
use Catmandu::Sane;
use Moo;

our $VERSION = '1.281';

with 'Catmandu::Importer';

sub generator {
    my ($self) = @_;
    sub {
        state $fh    = $self->fh;
        state $count = 0;

        # set input record separator to paragraph mode
        local $/ = '';

        # get next record
        while (defined(my $data = $fh->getline)) {
            $count++;
            my @record;
            my $id;
            chomp $data;

            # split record into fields
            my @fields = split /\n/, $data;

            # first field should be the MARC leader
            my $leader = shift @fields;
            if (length $leader == 24 && $leader =~ m/^\d{5}.*4500/) {
                push @record, ['LDR', ' ', ' ', '_', $leader];
            } else {
                warn "not a valid MARC leader: $leader";
            }
            for my $field (@fields) {

                # process control fields
                if ($field =~ m/^00.\s/) {
                    my ($tag, $value) = $field =~ m/^(\d{3})\s(.*)/;
                    push @record, [$tag, ' ', ' ', '_', $value];

                    # get record id
                    if ($tag eq '001') {
                        $id = $value;
                    }
                }

                # process variable data fields
                else {
                    my ($tag, $ind1, $ind2, $sf)
                        = $field =~ m/^(\d{3})\s([a-z0-9\s])([a-z0-9\s])\s(.*)/;

                    # check if field has content
                    if ($sf) {
                        # get subfield codes by pattern
                        # some special characters are allowed as subfiled codes in local defined field
                        # see https://www.loc.gov/marc/96principl.html#eight 8.4.2.3.
                        my @sf_codes = $sf =~ m/\s?\$([a-z0-9!"#\$%&'\(\)\*\+'-\.\/:;<=>])\s/g;

                        # split string by subfield code pattern
                        my @sf_values
                            = grep {length $_}
                                split
                                /\s?\$[a-z0-9!"#\$%&'\(\)\*\+'-\.\/:;<=>]\s/,
                                $sf;
                           
                        if (scalar @sf_codes != scalar @sf_values) {
                            warn
                                'different number of subfield codes and values';
                            next;
                        }

                        push @record,
                            [
                            $tag,  $ind1,
                            $ind2, map {$_, shift @sf_values} @sf_codes

 view all matches for this distribution
 view release on metacpan -  search on metacpan

( run in 1.459 second using v1.00-cache-2.02-grep-82fe00e-cpan-d29e8ade9f55 )