Amazon-S3

 view release on metacpan or  search on metacpan

lib/Amazon/S3.pm  view on Meta::CPAN

package Amazon::S3;

use strict;
use warnings;

use Amazon::S3::Bucket;
use Amazon::S3::BucketV2;
use Amazon::S3::Constants qw(:all);

use Amazon::S3::Util qw(
  set_md5_header
  urlencode
  get_parameters
  create_xml_request
  create_api_uri
  create_query_string
);

use Amazon::S3::Logger;
use Amazon::S3::Signature::V4;

use Carp;
use Data::Dumper;
use Digest::HMAC_SHA1;
use Digest::MD5 qw(md5_hex);
use English     qw(-no_match_vars);
use HTTP::Date;
use LWP::UserAgent::Determined;
use List::Util   qw( any pairs none );
use MIME::Base64 qw(encode_base64 decode_base64);
use Scalar::Util qw( reftype blessed );
use URI;
use XML::Simple;

use parent qw(Class::Accessor::Fast Exporter);

__PACKAGE__->mk_accessors(
  qw(
    aws_access_key_id
    aws_secret_access_key
    token
    buffer_size
    cache_signer
    credentials
    dns_bucket_names
    digest
    err
    errstr
    error
    express
    host
    last_request
    last_response
    logger
    log_level
    retry
    _region
    secure
    _signer
    timeout
    ua
  ),
);

our $VERSION = '2.0.2'; ## no critic (RequireInterpolation)

our @EXPORT_OK = qw(is_domain_bucket);

########################################################################
sub new {
########################################################################
  my ( $class, @args ) = @_;

  my %options = ref $args[0] ? %{ $args[0] } : @args;

  $options{timeout}          //= $DEFAULT_TIMEOUT;
  $options{secure}           //= $TRUE;
  $options{host}             //= $DEFAULT_HOST;
  $options{dns_bucket_names} //= $TRUE;
  $options{cache_signer}     //= $FALSE;
  $options{retry}            //= $FALSE;
  $options{express}          //= $FALSE;

  $options{_region} = delete $options{region};
  $options{_signer} = delete $options{signer};

  # convenience for level => 'debug' & for consistency with
  # Amazon::Credentials only do this if we are using internal logger,
  # call should NOT use debug flag but rather use their own logger's
  # level to turn on higher levels of logging...

  if ( !$options{logger} ) {
    if ( delete $options{debug} ) {
      $options{level} = 'debug';
    }

    $options{log_level} = delete $options{level};
    $options{log_level} //= $DEFAULT_LOG_LEVEL;

    $options{logger}
      = Amazon::S3::Logger->new( log_level => $options{log_level} );
  }

  my $self = $class->SUPER::new( \%options );

  # setup logger internal logging

  $self->get_logger->debug(
    sub {
      my %safe_options = %options;

      if ( $safe_options{aws_secret_access_key} ) {
        $safe_options{aws_secret_access_key} = '****';
        $safe_options{aws_access_key_id}     = '****';
      }

      return Dumper( [ options => \%safe_options ] );
    },
  );

  if ( !$self->credentials ) {

    croak 'No aws_access_key_id'
      if !$self->aws_access_key_id;

    croak 'No aws_secret_access_key'
      if !$self->aws_secret_access_key;

    # encrypt credentials
    $self->aws_access_key_id( _encrypt( $self->aws_access_key_id ) );
    $self->aws_secret_access_key( _encrypt( $self->aws_secret_access_key ) );
    $self->token( _encrypt( $self->token ) );
  }

  my $ua;

  if ( $self->retry ) {
    $ua = LWP::UserAgent::Determined->new(
      keep_alive            => $KEEP_ALIVE_CACHESIZE,
      requests_redirectable => [qw(GET HEAD DELETE)],
    );

    $ua->timing( join $COMMA, map { 2**$_ } 0 .. $MAX_RETRIES );
  }
  else {
    $ua = LWP::UserAgent->new(
      keep_alive            => $KEEP_ALIVE_CACHESIZE,
      requests_redirectable => [qw(GET HEAD DELETE)],
    );
  }

  $ua->timeout( $self->timeout );
  $ua->env_proxy;
  $self->ua($ua);

  $self->region( $self->_region // $DEFAULT_REGION );

  if ( !$self->_signer && $self->cache_signer ) {
    $self->_signer( $self->signer );
  }

  if ( $self->express ) {
    $self->use_express_one_zone();
  }

  $self->turn_on_special_retry();

  return $self;
}

########################################################################
sub use_express_one_zone {
########################################################################
  my ($self) = @_;

  my $express = $self->express;

  $self->express($TRUE);

  $self->host( sprintf 's3express-control.%s.amazonaws.com', $self->region );
  $self->dns_bucket_names($FALSE);

  return $express;
}

########################################################################
{
  my $encryption_key;

lib/Amazon/S3.pm  view on Meta::CPAN

  my ( $self, $conf ) = @_;

  my $bucket = delete $conf->{bucket};

  die 'no bucket'
    if !$bucket;

  my $headers = delete $conf->{headers};

  croak 'must specify bucket'
    if !$bucket;

  $conf ||= {};

  my ( $marker, $next_marker, $query_next )
    = @{ $LIST_OBJECT_MARKERS{'3'} };

  if ( $conf->{'key-marker'} ) {
    $conf->{$query_next} = delete $conf->{'key-marker'};
  }

  if ( %{$conf} ) {

    # remove undefined elements
    foreach ( keys %{$conf} ) {
      next if defined $conf->{$_};

      delete $conf->{$_};
    }
  }

  my $path
    = create_api_uri( path => "$bucket/", api => 'versions', %{$conf} );

  my $r = $self->_send_request(
    { method  => 'GET',
      path    => $path,
      headers => $headers // {},
      region  => $self->region,
    },
  );

  return
    if !$r || $self->errstr;

  $self->get_logger->debug(
    sub {
      return Dumper(
        [ marker      => $marker,
          next_marker => $next_marker,
          response    => $r,
        ],
      );
    },
  );

  return $r;
}

########################################################################
sub get_credentials {
########################################################################
  my ($self) = @_;

  my $aws_access_key_id;
  my $aws_secret_access_key;
  my $token;

  if ( $self->credentials ) {
    $aws_access_key_id     = $self->credentials->get_aws_access_key_id;
    $aws_secret_access_key = $self->credentials->get_aws_secret_access_key;
    $token                 = $self->credentials->get_token;
  }
  else {
    $aws_access_key_id     = $self->aws_access_key_id;
    $aws_secret_access_key = $self->aws_secret_access_key;
    $token                 = $self->token;
  }

  return ( $aws_access_key_id, $aws_secret_access_key, $token );
}

# Log::Log4perl compatibility routines
########################################################################
sub get_logger {
########################################################################
  my ($self) = @_;

  return $self->logger;
}

########################################################################
sub level {
########################################################################
  my ( $self, @args ) = @_;

  if (@args) {
    $self->log_level( $args[0] );

    $self->get_logger->level( uc $args[0] );
  }

  return $self->get_logger->level;
}

########################################################################
sub signer {
########################################################################
  my ($self) = @_;

  return $self->_signer
    if $self->_signer;

  my $creds   = $self->credentials ? $self->credentials : $self;
  my $express = $self->express;

  my $signer = Amazon::S3::Signature::V4->new(
    { access_key_id  => $creds->get_aws_access_key_id,
      secret         => $creds->get_aws_secret_access_key,
      region         => $self->region || $self->get_default_region,
      service        => $express ? 's3express' : 's3',
      security_token => $creds->get_token,
    },
  );

  if ( $self->cache_signer ) {
    $self->_signer($signer);
  }

  return $signer;
}

########################################################################
sub _validate_acl_short {
########################################################################
  my ( $self, $policy_name ) = @_;

  croak sprintf '%s is not a supported canned access policy', $policy_name
    if none { $policy_name eq $_ }
    qw(private public-read public-read-write authenticated-read);

  return;
}

########################################################################
# Determine if a bucket can used as subdomain for the host
# Specifying the bucket in the URL path is being deprecated
# So, if the bucket name is suitable, we need to use it
# as a subdomain in the host name instead.
#
# Currently buckets with periods in their names cannot be handled in
# that manner due to SSL certificate issues, they will have to remain
# in the url path instead.
#
########################################################################
sub is_domain_bucket { goto &_can_bucket_be_subdomain; }
########################################################################

########################################################################
sub _can_bucket_be_subdomain {
########################################################################
  my ($bucketname) = @_;

  return $FALSE
    if length $bucketname > $MAX_BUCKET_NAME_LENGTH - 1;

  return $FALSE
    if length $bucketname < $MIN_BUCKET_NAME_LENGTH;

  return $FALSE
    if $bucketname !~ m{\A[[:lower:]][[:lower:]\d-]*\z}xsm;

  return $FALSE
    if $bucketname !~ m{[[:lower:]\d]\z}xsm;

lib/Amazon/S3.pm  view on Meta::CPAN

      $src,
      SuppressEmpty => $EMPTY,
      ForceArray    => ['Contents'],
      KeepRoot      => $keep_root,
      NoAttr        => $TRUE,
    );
  };

  if ( !$xml_hr && $EVAL_ERROR ) {
    confess "Error parsing $src:  $EVAL_ERROR";
  }

  return $xml_hr;
}

# returns 1 if errors were found
########################################################################
sub _remember_errors {
########################################################################
  my ( $self, $src, $keep_root ) = @_;

  return
    if !$src;

  if ( !ref $src && $src !~ /^[[:space:]]*</xsm ) { # if not xml
    ( my $code = $src ) =~ s/^[[:space:]]*[(][\d]*[)].*$/$1/xsm;

    $self->err($code);
    $self->errstr($src);

    return $TRUE;
  }

  my $r = ref $src ? $src : $self->_xpc_of_content( $src, $keep_root );

  $self->error($r);

  # apparently buckets() does not keep_root
  if ( $r->{Error} ) {
    $r = $r->{Error};
  }

  my ( $code, $message ) = @{$r}{qw(Code Message)};

  return $FALSE
    if !$code;

  $self->err($code);
  $self->errstr($message);

  return $TRUE;
}

# Deprecated - this adds a header for the old V2 auth signatures
########################################################################
sub _add_auth_header { ## no critic (ProhibitUnusedPrivateSubroutines)
########################################################################
  my ( $self, $headers, $method, $path ) = @_;

  my ( $aws_access_key_id, $aws_secret_access_key, $token )
    = $self->get_credentials;

  if ( not $headers->header('Date') ) {
    $headers->header( Date => time2str(time) );
  }

  if ($token) {
    $headers->header( $AMAZON_HEADER_PREFIX . 'security-token' => $token );
  }

  my $canonical_string = $self->_canonical_string( $method, $path, $headers );

  $self->get_logger->trace(
    sub {
      return Dumper(
        [ headers          => $headers,
          canonincal_sring => $canonical_string,
        ]
      );
    }
  );

  my $encoded_canonical
    = $self->_encode( $aws_secret_access_key, $canonical_string );

  $headers->header(
    Authorization => sprintf 'AWS %s:%s',
    $aws_access_key_id, $encoded_canonical
  );

  return;
}

# generates an HTTP::Headers objects given one hash that represents http
# headers to set and another hash that represents an object's metadata.
########################################################################
sub _merge_meta {
########################################################################
  my ( $self, $headers, $metadata ) = @_;

  $headers  //= {};
  $metadata //= {};

  my $http_header = HTTP::Headers->new;

  foreach my $p ( pairs %{$headers} ) {
    my ( $k, $v ) = @{$p};
    $http_header->header( $k => $v );
  }

  foreach my $p ( pairs %{$metadata} ) {
    my ( $k, $v ) = @{$p};
    $http_header->header( "$METADATA_PREFIX$k" => $v );
  }

  return $http_header;
}

# generate a canonical string for the given parameters.  expires is optional and is
# only used by query string authentication.
########################################################################

lib/Amazon/S3.pm  view on Meta::CPAN

This module is rather dated, however with some help from a few
contributors it has had some recent updates. Recent changes include
implementations of:

=over 5

=item ListObjectsV2

=item CopyObject

=item DeleteObjects

=item ListObjectVersions

=back

Additionally, this module now implements Signature Version 4 signing,
unit tests have been updated and more documentation has been added or
corrected. Credentials are encrypted if you have encryption modules installed.

I<NEW!>

The C<Amazon::S3> modules have been heavily refactored over the last
few releases to increase maintainability and to add new features. New
features include:

=over 5

=item L<Amazon::S3::BucketV2>

This new module implements a mechanism to invoke I<almost> all of the
S3 APIs using a standard calling method.

The module will format your Perl objects as XML payloads and enable
you to provide all of the parameters required to make an API
call. Headers and URI parameters can also be passed to the
methods. L<Amazon::S3::BucketV2> is a subclass of
L<Amazon::S3::Bucket>, meaning you can still invoke all of the same
methods found there.

See L<Amazon::S3::BucketV2> for more details.

=item Limited Support for Directory Buckets

This version include limited support for directory buckets.

You can create and list directory buckets.

I<Directory buckets use the S3 Express One Zone storage class, which
is recommended if your application is performance sensitive and
benefits from single-digit millisecond PUT and GET latencies.> -
L<https://docs.aws.amazon.com/AmazonS3/latest/userguide/directory-buckets-overview.html>

=over 10

=item list_directory_buckets

List the directory buckets. Note this only returns a list of you
directory buckets, not their contents. In order to list the contents
of a directory bucket you must first create a session that establishes
temporary credentials used to acces the Zonal endpoints. You then use
those credentials for signing requests using the ListObjectV2 API.

This process is currently B<not supported> by this class.

L<https://docs.aws.amazon.com/AmazonS3/latest/API/API_CreateSession.html>

<Lhttps://docs.aws.amazon.com/AmazonS3/latest/API/API_ListObjectsV2.html>

=item add_bucket

You can add a regin and availability zone to this call in order to
create a directory bucket.

 $bucket->add_bucket({ bucket => $bucket_name, availability_zone => 'use1-az5' });

Note that your bucket name must conform to the naming conventions for
directory buckets. -
L<https://docs.aws.amazon.com/AmazonS3/latest/userguide/directory-buckets-overview.html#directory-buckets-name>
 
=back

=item Addition of version parameter for C<delete_key>

You can now delete a version of a key by including its verion ID.

 $bucket->delete_key($key, $version_id);

=item Methods that accept a hash reference can now accept a
C<headers> object that may contain any additional headers you might want
to send with a request. Some of the methods that now allow you to pass
a header object include:

=over 10

=item add_bucket

=item add_key

=item get_key

Can now be called with a hashref which may include both a C<headers>
and C<uri_params> object.

=item delete_bucket

=item list_bucket

=item list_object_versions

=item upload_multipart_object

=back

=back

=head2 Comparison to Other Perl S3 Modules

Other implementations for accessing Amazon's S3 service include
C<Net::Amazon::S3> and the C<Paws> project. C<Amazon::S3> ostensibly
was intended to be a drop-in replacement for C<Net:Amazon::S3> that
"traded some performance in return for portability". That statement is

lib/Amazon/S3.pm  view on Meta::CPAN

Version V2.

B<New regions after January 30, 2014 will only support Signature Version 4.>

See L</Signature Version V4> below for important details.

=over 10

=item Signature Version 4

L<https://docs.aws.amazon.com/AmazonS3/latest/API/sigv4-query-string-auth.html>

I<IMPORTANT NOTE:>

Unlike Signature Version 2, Version 4 requires a regional
parameter. This implies that you need to supply the bucket's region
when signing requests for any API call that involves a specific
bucket. Starting with version 0.55 of this module,
C<Amazon::S3::Bucket> provides a new method (C<region()>) and accepts
in the constructor a C<region> parameter.  If a region is not
supplied, the region for the bucket will be set to the region set in
the C<account> object (C<Amazon::S3>) that you passed to the bucket's
new constructor.  Alternatively, you can request that the bucket's new
constructor determine the bucket's region for you by calling the
C<get_location_constraint()> method.

When signing API calls, the region for the specific bucket will be
used. For calls that are not regional (C<buckets()>, e.g.) the default
region ('us-east-1') will be used.

=item Signature Version 2

L<https://docs.aws.amazon.com/AmazonS3/latest/userguide/RESTAuthentication.html>

=back

=item Multipart Upload Support

There are some recently added unit tests for multipart uploads that
seem to indicate this feature is working as expected.  Please report
any deviation from expected results if you are using those methods.

For more information regarding multipart uploads visit the link below.

L<https://docs.aws.amazon.com/AmazonS3/latest/API/API_CreateMultipartUpload.html>

=back

=head1 METHODS AND SUBROUTINES

Unless otherwise noted methods will return an C<undef> if an error
occurs.  You can get more information about the error by calling
C<err()> and C<errstr()>.

=head2 new 

Create a new S3 client object. Takes some arguments:

=over

=item credentials (optional)

Reference to a class (like C<Amazon::Credentials>) that can provide
credentials via the methods:

 get_aws_access_key_id()
 get_aws_secret_access_key()
 get_token()

If you do not provide a credential class you must provide the keys
when you instantiate the object. See below.

I<You are strongly encourage to use a class that provides getters. If
you choose to provide your credentials to this class then they will be
stored in this object. If you dump the class you will likely expose
those credentials.>

=item aws_access_key_id

Use your Access Key ID as the value of the AWSAccessKeyId parameter
in requests you send to Amazon Web Services (when required). Your
Access Key ID identifies you as the party responsible for the
request.

=item aws_secret_access_key 

Since your Access Key ID is not encrypted in requests to AWS, it
could be discovered and used by anyone. Services that are not free
require you to provide additional information, a request signature,
to verify that a request containing your unique Access Key ID could
only have come from you.

B<DO NOT INCLUDE THIS IN SCRIPTS OR APPLICATIONS YOU
DISTRIBUTE. YOU'LL BE SORRY.>

I<Consider using a credential class as described above to provide
credentials, otherwise this class will store your credentials for
signing the requests. If you dump this object to logs your credentials
could be discovered.>

=item token

An optional temporary token that will be inserted in the request along
with your access and secret key.  A token is used in conjunction with
temporary credentials when your EC2 instance has
assumed a role and you've scraped the temporary credentials from
I<http://169.254.169.254/latest/meta-data/iam/security-credentials>

=item secure

Set this to a true value if you want to use SSL-encrypted connections
when connecting to S3. Starting in version 0.49, the default is true.

default: true

=item timeout

Defines the time, in seconds, your script should wait or a
response before bailing.

default: 30s

=item retry

Enables or disables the library to retry upon errors. This
uses exponential backoff with retries after 1, 2, 4, 8, 16,
32 seconds, as recommended by Amazon.

default: off

=item host

Defines the S3 host endpoint to use.

default: s3.amazonaws.com

Note that requests are made to domain buckets when possible.  You can
prevent that behavior if either the bucket name does not conform to
DNS bucket naming conventions or you preface the bucket name with '/'
or explicitly turn off domain buckets by setting C<dns_bucket_names>
to false.

If you set a region then the host name will be modified accordingly if
it is an Amazon endpoint.

=item region

The AWS region you where your bucket is located.

default: us-east-1

=item buffer_size

The default buffer size when reading or writing files.

default: 4096

=back

=head2 signer

Sets or retrieves the signer object. API calls must be signed using
your AWS credentials. By default, starting with version 0.54 the
module will use L<Net::Amazon::Signature::V4> as the signer and
instantiate a signer object in the constructor. Note however, that
signers need your credentials and they I<will> get stored by that
class, making them susceptible to inadvertant exfiltration. You have a
few options here:

=over 5

=item 1. Use your own signer.

You may have noticed that you can also provide your own credentials
object forcing this module to use your object for retrieving
credentials. Likewise, you can use your own signer so that this
module's signer never sees or stores those credentials.

=item 2. Pass the credentials object and set C<cache_signer> to a
false value.

If you pass a credentials object and set C<cache_signer> to a false
value, the module will use the credentials object to retrieve
credentials and create a new signer each time an API call is made that
requires signing. This prevents your credentials from being stored
inside of the signer class.

I<Note that using your own credentials object that stores your
credentials in plaintext is also going to expose your credentials when
someone dumps the class.>

=item 3. Pass credentials, set C<cache_signer> to a false value.

Unfortunately, while this will prevent L<Net::Amazon::Signature::V4>
from hanging on to your credentials, you credentials will be stored in
the C<Amazon::S3> object.

Starting with version 0.55 of this module, if you have installed
L<Crypt::CBC> and L<Crypt::Blowfish>, your credentials will be
encrypted using a random key created when the class is
instantiated. While this is more secure than leaving them in
plaintext, if the key is discovered (the key however is not stored in
the object's hash) and the object is dumped, your I<encrypted>
credentials can be exposed.

=item 4. Use very granular credentials for bucket access only.

Use credentials that only allow access to a bucket or portions of a
bucket required for your application. This will at least limit the
I<blast radius> of any potential security breach.

=item 5. Do nothing...send the credentials, use the default signer.

In this case, both the C<Amazon::S3> class and the
L<Net::Amazon::Signature::V4> have your credentials. Caveat Emptor.

See also L<Amazon::Credentials> for more information about safely
storing your credentials and preventing exfiltration.

=back

=head2 region

Sets the region for the API calls. This will also be the
default when instantiating the bucket object unless you pass the
region parameter in the C<bucket> method or use the C<verify_region>
flag that will I<always> verify the region of the bucket using the
C<get_location_constraint> method.

default: us-east-1

=head2 buckets

 buckets([verify-region])

=over

=item verify-region (optional)

C<verify-region> is a boolean value that indicates if the
bucket's region should be verified when the bucket object is
instantiated.

If set to true, this method will call the C<bucket> method with
C<verify_region> set to true causing the constructor to call the
C<get_location_constraint> for each bucket to set the bucket's
region. This will cause a significant decrease in the peformance of
the C<buckets()> method. Setting the region for each bucket is
necessary since API operations on buckets require the region of the
bucket when signing API requests. If all of your buckets are in the
same region and you have passed a region parameter to your S3 object,
then that region will be used when calling the constructor of your
bucket objects.

default: false

=back

Returns a reference to a hash containing the metadata for all of the
buckets owned by the accout or (see below) or C<undef> on error.

=over

=item owner_id

The owner ID of the bucket's owner.

=item owner_display_name

The name of the owner account. 

=item buckets

An array of L<Amazon::S3::Bucket> objects for the account. Returns
C<undef> if there are not buckets or an error occurs.

=back



( run in 0.916 second using v1.01-cache-2.11-cpan-39bf76dae61 )