Plack-App-RDF-Files

 view release on metacpan or  search on metacpan

lib/Plack/App/RDF/Files.pm  view on Meta::CPAN

    }

    if ($req->method eq 'HEAD') {
        return [200, $headers->headers, []];
    }

    # parse RDF
    my $model = RDF::Trine::Model->new;
    my $triples = 0;
    my $add_statement = $self->normalize 
            ? sub { 
                my $s = $_[0];
                if ($s->object->is_literal) {
                    my $value = Unicode::Normalize::normalize($self->normalize, $s->object->literal_value);
                    $s->object->literal_value($value);
                }
                $model->add_statement($s); 
              } 
            : sub { 
                $model->add_statement($_[0]); 
            };

    while (my ($name, $file) = each %$files) {
        my $fullname = catdir($file->{location},$name);
        my $parser = RDF::Trine::Parser->guess_parser_by_filename( $fullname );
        $parser = $parser->new unless ref $parser;
        eval { # parse file into model
            $model->begin_bulk_ops;
            $parser->parse_file( $uri, $fullname, $add_statement );
            $model->end_bulk_ops();
        };
        if ($@) {
            $file->{error} = $@;
        } else {
            $file->{triples} = $model->size - $triples;
            $triples = $model->size;
        }
    }
    $env->{'rdf.files'} = $files;

    my $iterator = $model->as_stream;

    # add listing on base URI
    if ( $self->index_property and "$uri" eq ($self->base_uri // $req->base) ) {
        my $stms = $self->index_statements($req);
        if (@$stms) {
            $iterator = $iterator->concat( RDF::Trine::Iterator::Graph->new( $stms ) );
        }
    }

    # add axiomatic triple to empty graphs
    if ($iterator->finished) {
        $iterator = RDF::Trine::Iterator::Graph->new( [ statement(
            iri($uri),
            iri('http://www.w3.org/1999/02/22-rdf-syntax-ns#type'),
            iri('http://www.w3.org/2000/01/rdf-schema#Resource')
        ) ] );
    }

    # construct PSGI response
    if ( $env->{'psgi.streaming'} ) {
        $env->{'rdf.iterator'} = $iterator;
        return sub {
            my $responder = shift;
            my $body = $self->_serialize_body( $serializer, $iterator );       
            $responder->( [ 200, $headers->headers, $body ] );
        };
    } else {
        my $body = $self->_serialize_body( $serializer, $iterator );
        return [ 200, $headers->headers, $body ];
    }
}

sub _serialize_body {
    my ($self, $serializer, $iterator) = @_;

    # serialize as last as possible
    return Plack::Util::inline_object(
        getline => sub {
            return if !$iterator or $iterator->finished;

            my $string  = '';
            open ( my $fh, '>:encoding(UTF-8)', \$string );
            $serializer->serialize_iterator_to_file($fh, $iterator);
            close $fh;
            $iterator = 0;

            return $string;
        },
        close => sub { $iterator = 0 },
    );
}

sub headers {
    my ($self, $files) = @_;

    # calculate Etag based on file names, locations, sizes, and mtimes
    my $md5 = Digest::MD5->new;
    foreach my $name (sort keys %$files) {
        $md5->add( map { $files->{$name}->{$_} } sort keys %{$files->{$name}} );
    }

    # get last modification time
    my $lastmod = max map { $_->{mtime} } values %$files;

    Plack::Util::headers([
        'ETag' => 'W/"'.$md5->hexdigest.'"',
        'Last-Modified' => HTTP::Date::time2str($lastmod)
    ]);
}

use parent 'Exporter';
our @EXPORT_OK = qw(app);
sub app { Plack::App::RDF::Files->new(@_) }
 
1;
__END__

=head1 NAME
 
Plack::App::RDF::Files - serve RDF data from files

lib/Plack/App/RDF/Files.pm  view on Meta::CPAN

=head1 CONFIGURATION

=over 4

=item base_dir

Mandatory base directory that all resource directories are located in.

=item base_uri

The base URI of all resources. If no base URI has been specified, the
base URI is taken from the PSGI request.

=item file_types

An array of RDF file types, given as extensions to look for. Set to
C<['rdfxml','nt','ttl']> by default.

=item index_property

By default a HTTP 404 error is returned if one tries to access the base
directory. Enable this option by setting it to 1 or to an URI, to also serve
RDF data from the base directory.  By default
C<http://www.w3.org/2000/01/rdf-schema#seeAlso> is used as index property, if
enabled.

=item path_map

Optional code reference that maps a local part of an URI to a relative
directory. Set to the identity mapping by default.

=item namespaces

Optional namespaces for serialization, passed to L<RDF::Trine::Serializer>.

=item normalize

Optional Unicode Normalization form (NFD, NFKC, NFC, NFKC). Requires
L<Unicode::Normalize>.

=back

=head1 METHODS

=head2 call( $env )

Core method of the PSGI application.

The following PSGI environment variables are read and/or set by the
application.

=over 4

=item rdf.uri

The requested URI as string or L<URI> object.

=item rdf.iterator

The L<RDF::Trine::Iterator> that will be used for serializing, if
C<psgi.streaming> is set. One can use this variable to catch the RDF
data in another post-processing middleware.

=item rdf.files

An hash of source filenames, each with the number of triples (on success)
as property C<size>, an error message as C<error> if parsing failed, and
the timestamp of last modification as C<mtime>. C<size> and C<error> may
not be given before parsing, if C<rdf.iterator> is set.

=item negotiate.format

RDF serialization format (See L<Plack::Middleware::Negotiate>). Supported
values are C<ttl>, C<nt>, C<n3>, C<json>, and C<rdfxml>.

=back

If an existing resource does not contain triples, the axiomatic triple
C<< $uri rdf:type rdfs:Resource >> is returned.

=head2 files( $env )

Get a list of RDF files (as hash reference) that will be read for a given
request, given as L<PSGI> environment.

The requested URI is saved in field C<rdf.uri> of the request environment.  On
success returns the base directory and a list of files, each mapped to its last
modification time.  Undef is returned if the request contained invalid
characters (everything but C<a-zA-Z0-9:.@/-> and the forbidden sequence C<../>
or a sequence starting with C</>), or if called with the base URI and
C<index_property> not enabled.

=head2 headers( $files ) 

Get a response headers object (as provided by L<Plack::Util>::headers) with
ETag and Last-Modified from a list of RDF files given as returned by the files
method.

=head1 FUNCTIONS

=head2 app( %options )

This shortcut for C<< Plack::App::RDF::Files->new >> can be exported on request
to simplify one-liners.

=head1 SEE ALSO

Use L<Plack::Middleware::Negotiate> to add content negotiation based on
an URL parameter and/or suffix.

See L<RDF::LinkedData> for a different module to serve RDF as linked data.
See also L<RDF::Flow> and L<RDF::Lazy> for processing RDF data.

See L<http://foafpress.org/> for a similar approach in PHP.

=head1 COPYRIGHT AND LICENSE

Copyright Jakob Voss, 2014-

This library is free software; you can redistribute it and/or modify it under
the same terms as Perl itself.



( run in 0.741 second using v1.01-cache-2.11-cpan-140bd7fdf52 )