Courriel

 view release on metacpan or  search on metacpan

lib/Courriel.pm  view on Meta::CPAN

        my ($text) = $validator->(@_);

        my $part = Courriel::Part::Single->new(
            headers         => $self->headers,
            encoded_content => $text,
        );

        $self->_replace_top_level_part($part);

        return;
    }
}

{
    my $validator = validation_for(
        params => [
            text         => { type => StringRef },
            is_character => { type => Bool, default => 0 },
        ],
        named_to_list => 1,
    );

    sub parse {
        my $class = shift;
        my ( $text, $is_character ) = $validator->(@_);

        if ($is_character) {
            ${$text} = encode( 'UTF-8', ${$text} );
        }

        return $class->new( part => $class->_parse($text) );
    }
}

sub _parse {
    my $class = shift;
    my $text  = shift;

    my ( $sep_idx, $headers ) = $class->_parse_headers($text);

    substr( ${$text}, 0, $sep_idx, q{} );

    return $class->_parse_parts( $text, $headers );
}

sub _parse_headers {
    my $class = shift;
    my $text  = shift;

    my $header_text;
    my $sep_idx;

    # We want to ignore mbox message separators - this is a pretty lax parser,
    # but we may find broken lines. The key is that it starts with From
    # followed by space, not a colon.
    ${$text} =~ s/^From\s+.+$Courriel::Helpers::LINE_SEP_RE//;

    # Some broken emails may split the From line in an arbitrary spot
    ${$text} =~ s/^[^:]+$Courriel::Helpers::LINE_SEP_RE//g;

    if ( ${$text} =~ /^(.+?)($Courriel::Helpers::LINE_SEP_RE)\g{2}/s ) {
        $header_text = $1 . $2;
        $sep_idx     = ( length $header_text ) + ( length $2 );
    }
    else {
        return ( 0, Courriel::Headers::->new );
    }

    my $headers = Courriel::Headers::->parse(
        text => \$header_text,
    );

    return ( $sep_idx, $headers );
}

{
    my $fake_ct = Courriel::Header::ContentType->new_from_value(
        name  => 'Content-Type',
        value => 'text/plain'
    );

    sub _parse_parts {
        my $class   = shift;
        my $text    = shift;
        my $headers = shift;

        my @ct = $headers->get('Content-Type');
        if ( @ct > 1 ) {
            die 'This email defines more than one Content-Type header.';
        }

        my $ct = $ct[0] // $fake_ct;

        if ( $ct->mime_type !~ /^multipart/i ) {
            return Courriel::Part::Single->new(
                headers         => $headers,
                encoded_content => $text,
            );
        }

        return $class->_parse_multipart( $text, $headers, $ct );
    }
}

sub _parse_multipart {
    my $class   = shift;
    my $text    = shift;
    my $headers = shift;
    my $ct      = shift;

    my $boundary = $ct->attribute_value('boundary');

    die q{The message's mime type claims this is a multipart message (}
        . $ct->mime_type
        . q{) but it does not specify a boundary.}
        unless defined $boundary && length $boundary;

    my ( $preamble, $all_parts, $epilogue ) = ${$text} =~ /
                (.*?)                   # preamble
                ^--\Q$boundary\E\s*
                (.+)                    # all parts



( run in 1.835 second using v1.01-cache-2.11-cpan-0bb4e1dffa6 )