Mail-SpamAssassin

 view release on metacpan or  search on metacpan

lib/Mail/SpamAssassin/Message/Metadata/Received.pm  view on Meta::CPAN

  # be helpful; save some cumbersome typing
  $self->{num_relays_trusted} = scalar (@{$self->{relays_trusted}});
  $self->{num_relays_untrusted} = scalar (@{$self->{relays_untrusted}});
  $self->{num_relays_internal} = scalar (@{$self->{relays_internal}});
  $self->{num_relays_external} = scalar (@{$self->{relays_external}});

  dbg("metadata: X-Spam-Relays-Trusted: ".$self->{relays_trusted_str});
  dbg("metadata: X-Spam-Relays-Untrusted: ".$self->{relays_untrusted_str});
  dbg("metadata: X-Spam-Relays-Internal: ".$self->{relays_internal_str});
  dbg("metadata: X-Spam-Relays-External: ".$self->{relays_external_str});
}

# ---------------------------------------------------------------------------

# returns undef if the header just couldn't be parsed
# returns 0 if the header was specifically skipped
# returns a hash of information if the header is parsed, including:
#    ip => $ip,
#    by => $by,
#    helo => $helo,
#    id => $id,
#    ident => $ident,
#    envfrom => $envfrom,
#    lc_by => (lc $by),
#    lc_helo => (lc $helo),
#    auth => $auth
#
sub parse_received_line {
  my ($self) = shift;
  local ($_) = shift;
  local ($1,$2,$3,$4,$5,$6);

  s/\s+/ /g;
  s/^ //;
  s/ $//;

  # get rid of invalid semicolon at the end of the header
  1 while s/\s?;$//;

  my $ip = '';
  my $helo = '';
  my $rdns = '';
  my $by = '';
  my $id = '';
  my $ident = '';
  my $envfrom = undef;
  my $mta_looked_up_dns = 0;
  my $auth = '';

# ---------------------------------------------------------------------------

  # We care about lines starting with from.  all of the others are ignorable:
  # Bug 4943: give /^(from/ a chance to be parsed
  #
  # (qmail 27981 invoked by uid 225); 14 Mar 2003 07:24:34 -0000
  # (qmail 84907 invoked from network); 13 Feb 2003 20:59:28 -0000
  # (ofmipd 208.31.42.38); 17 Mar 2003 04:09:01 -0000
  # by faerber.muc.de (OpenXP/32 v3.9.4 (Win32) alpha @ 2003-03-07-1751d); 07 Mar 2003 22:10:29 +0000
  # by x.x.org (bulk_mailer v1.13); Wed, 26 Mar 2003 20:44:41 -0600
  # by SPIDERMAN with Internet Mail Service (5.5.2653.19) id <19AF8VY2>; Tue, 25 Mar 2003 11:58:27 -0500
  # by oak.ein.cz (Postfix, from userid 1002) id DABBD1BED3; Thu, 13 Feb 2003 14:02:21 +0100 (CET)
  # OTM-MIX(otm-mix00) id k5N1aDtp040896; Fri, 23 Jun 2006 10:36:14 +0900 (JST)
  # at Infodrom Oldenburg (/\##/\ Smail-3.2.0.102 1998-Aug-2 #2) from infodrom.org by finlandia.Infodrom.North.DE via smail from stdin id <m1FglM8-000okjC@finlandia.Infodrom.North.DE> for debian-security-announce@lists.debian.org; Thu, 18 May 2006 18...
  # with ECARTIS (v1.0.0; list bind-announce); Fri, 18 Aug 2006 07:19:58 +0000 (UTC)
  # Received: Message by Barricade wilhelm.eyp.ee with ESMTP id h1I7hGU06122 for <spamassassin-talk@lists.sourceforge.net>; Tue, 18 Feb 2003 09:43:16 +0200
  return 0 if (!/^\(?from /i);

  # from www-data by wwwmail.documenta.de (Exim 4.50) with local for <example@vandinter.org> id 1GFbZc-0006QV-L8; Tue, 22 Aug 2006 21:06:04 +0200
  # from server.yourhostingaccount.com with local  for example@vandinter.org  id 1GDtdl-0002GU-QE (8710); Thu, 17 Aug 2006 21:59:17 -0400
  return 0 if /\bwith local for\b/;

  # Received: from virtual-access.org by bolero.conactive.com ; Thu, 20 Feb 2003 23:32:58 +0100
  # Received: FROM ca-ex-bridge1.nai.com BY scwsout1.nai.com ; Fri Feb 07 10:18:12 2003 -0800
  # but not: Received: from [86.122.158.69] by mta2.iomartmail.com; Thu, 2 Aug 2007 21:50:04 -0200
  if (/^from (\S+) by [^\s;]+ ?;/i && $1 !~ /^\[[\d.]+\]$/) { return 0; }

# ---------------------------------------------------------------------------

  # Let's get rid of the date at the end
  # ; Tue, 23 May 2006 13:06:35 -0400
  s/[\s;]+(?:(?:Mon|T(?:ue|hu)|Wed|Fri|S(?:at|un)), )?\d+ (?:J(?:an|u[nl])|Feb|Ma[ry]|A(?:pr|ug)|Sep|Oct|Nov|Dec) \d+ \d+:\d+(?::\d+)? \S+$//;

  # from av0001.technodiva.com (localhost [127.0.0.1])by  localhost.technodiva.com (Postfix) with ESMTP id 846CF2117for  <proftp-user@lists.sourceforge.net>; Mon,  7 Aug 2006 17:48:07 +0200 (MEST)
  s/\)by /) by /;

# ---------------------------------------------------------------------------

  # OK -- given knowledge of most Received header formats,
  # break them down.  We have to do something like this, because
  # some MTAs will swap position of rdns and helo -- so we can't
  # simply use simplistic regexps.

  # try to catch unique message identifier
  if (/ id <?([^\s<>;]{3,})/) {
    $id = $1;
  }

  if (/\bhelo=([-A-Za-z0-9\.\^+_&:=?!@%*\$\\\/]+)(?:[^-A-Za-z0-9\.\^+_&:=?!@%*\$\\\/]|$)/) {
      $helo = $1;
  }
  elsif (/\b(?:HELO|EHLO) ([-A-Za-z0-9\.\^+_&:=?!@%*\$\\\/]+)(?:[^-A-Za-z0-9\.\^+_&:=?!@%*\$\\\/]|$)/) {
      $helo = $1;
  }
  if (/ by (\S+)(?:[^-A-Za-z0-9\;\.]|$)/) { $by = $1; }

# ---------------------------------------------------------------------------

  # try to catch authenticated message identifier
  #
  # with ESMTPA, ESMTPSA, LMTPA, LMTPSA should cover RFC 3848 compliant MTAs,
  # UTF8SMTPA and UTF8LMTPA are covered by RFC 4954 and RFC 6531,
  # with ASMTP (Authenticated SMTP) is used by Earthlink, Exim 4.34, and others
  # with HTTP should only be authenticated webmail sessions
  # with HTTPU is used by Communigate Pro with Pronto! webmail interface
  # with HTTPS is used by Horde adjusts the Received header to say "HTTPS" when
  # a connection is made over HTTPS
  # IANA registry: https://www.iana.org/assignments/mail-parameters/mail-parameters.xhtml
  if (/ by / && / with ((?:ES|L|UTF8S|UTF8L)MTPS?A|ASMTP|HTTP[SU]?)(?: |;|$)/i) {
    $auth = $1;
  }
  # GMail should use ESMTPSA to indicate that it is in fact authenticated,



( run in 1.407 second using v1.01-cache-2.11-cpan-e1769b4cff6 )