BOM results from the CPAN

Mail-SpamAssassin

sub detect_utf16 {
	my $data = $_[0];  # could not avoid copying large strings
	my $utf16le_clues = 0;
	my $utf16be_clues = 0;
	my $sum_h_e = 0;
	my $sum_h_o = 0;
	my $sum_l_e = 0;
	my $sum_l_o = 0;
	my $decoder = undef;

	# avoid scan if BOM present
	if( $data =~ /^(?:\xff\xfe|\xfe\xff)/ ) {
		dbg( "message: detect_utf16: found BOM" );
		return;	# let perl figure it out from the BOM
	}
	
	my @msg_h = unpack 'H' x length( $data ), $data;
	my @msg_l = unpack 'h' x length( $data ), $data;

	for( my $i = 0; $i < length( $data ); $i+=2 ) {
		my $check_char = sprintf( "%01X%01X %01X%01X", hex $msg_h[$i], hex $msg_l[$i], hex $msg_h[$i+1], hex $msg_l[$i+1] );
		$sum_h_e += hex $msg_h[$i];
		$sum_h_o += hex $msg_h[$i+1];
		$sum_l_e += hex $msg_l[$i];

lib/Mail/SpamAssassin/Message/Node.pm view on Meta::CPAN

      $tried_utf8 = 1;
    }
  }

  if ($charset_declared =~ /^(?:US-)?ASCII\z/i
           && !$insist_on_declared_charset) {
    # declared as US-ASCII but contains 8-bit characters, makes no sense
    # to attempt decoding first as strict US-ASCII as we know it would fail

  } elsif ($charset_declared =~ /^UTF[ -]?16/i) {
    # Handle cases where spammers use UTF-16 encoding without including a BOM
    # or declaring endianness as reported at:
    # https://bz.apache.org/SpamAssassin/show_bug.cgi?id=7252

    my $decoder = detect_utf16( $_[0] );
    if (defined $decoder) {
      if (eval { $rv = $decoder->decode($_[0], Encode::FB_CROAK | Encode::LEAVE_SRC); defined $rv }) {
        dbg("message: decoded as charset %s, declared %s",
          $decoder->name, $charset_declared);
        utf8::encode($rv) if !$return_decoded;
        $rv .= $data_taint;  # carry taintedness over, avoid Encode bug

lib/Mail/SpamAssassin/Plugin/PDFInfo.pm view on Meta::CPAN

  _set_tag($pms, 'PDFCOUNT',  $pms->{pdfinfo}->{count_pdf});
  _set_tag($pms, 'PDFIMGCOUNT', $pms->{pdfinfo}->{count_pdf_images});
  _set_tag($pms, 'PDFURICOUNT', $pms->{pdfinfo}->{count_pdf_uris});
}

sub _get_pdf_details {
  my ($pms, $part) = @_;

  my $data = $part->decode();

  # Remove UTF-8 BOM
  $data =~ s/^\xef\xbb\xbf//;

  # Search magic in first 1024 bytes
  if ($data !~ /^.{0,1024}\%PDF\-(\d\.\d)/s) {
    dbg("pdfinfo: PDF magic header not found, invalid file?");
    return;
  }
  my $version = $1;
  _set_tag($pms, 'PDFVERSION', $version);
  # dbg("pdfinfo: pdf version = $version");

rules/active.list view on Meta::CPAN

# tflags publish
BIGNUM_EMAILS_FREEM

# tflags publish
BIGNUM_EMAILS_MANY

# tflags publish
BILL_1618

# tflags publish
BITCOIN_BOMB

# tflags publish
BITCOIN_DEADLINE

# tflags publish
BITCOIN_EXTORT_01

# tflags publish
BITCOIN_EXTORT_02

rules/active.list view on Meta::CPAN

# tflags net
BODY_URI_ONLY

# tflags publish
BOGUS_MIME_VERSION

# tflags publish
BOGUS_MSM_HDRS

# tflags publish
BOMB_FREEM

# tflags publish
BOMB_MONEY

# tflags net
BTC_ORG

# tflags publish
BULK_RE_SUSP_NTLD

# tflags publish
CANT_SEE_AD

( run in 0.354 second using v1.01-cache-2.11-cpan-e9daa2b36ef )