Data-Censor

 view release on metacpan or  search on metacpan

lib/Data/Censor.pm  view on Meta::CPAN

package Data::Censor;

use 5.006;
use strict;
use warnings FATAL => 'all';
use Carp;

use Ref::Util qw/ is_hashref /;

=head1 NAME

Data::Censor - censor sensitive stuff in a data structure

=head1 VERSION

Version 0.04

=cut

our $VERSION = '0.04';


=head1 SYNOPSIS

    # OO way, letting you specify your own list of sensitive-looking fields, and
    # what they should be replaced by (all options here are optional)
    my $censor = Data::Censor->new(
        # Specify which fields to censor:
        sensitive_fields => [ qw(card_number password) ],

        # Specify text to replace their values with:
        replacement => '(Sensitive data hidden)',

        # Or specify callbacks for each field name which return the "censored"
        # value - in this case, masking a card number (PAN) to show only the
        # last four digits:
        replacement_callbacks => {
            card_number => sub {
                my $pan = shift;
                return "x" x (length($pan) - 4) . substr($pan, -4, 4);
            },
        },
    );
    
    # Censor the data in-place (changes the data structure, returns the number
    # of keys censored)
    my $censor_count = $censor->censor(\%data);

    # Alternate non-OO interface, using default settings and returning a cloned
    # version of the data after censoring:
    my $censored_data = Data::Censor->clone_and_censor(\%data);


=head1 new (CONSTRUCTOR)

Accepts the following arguments:

=over

=item sensitive_fields

Either an arrayref of sensitive fields, checked for equality, or a regex to test
against each key to see if it's considered sensitive.

=item replacement

The string to replace each value with.  Any censoring callback provided in
C<replacement_callbacks> which matches this key will take precedence over this
straightforward value.

=item replacement_callbacks

A hashref of key => sub {...}, where each key is a column name to match, and the
coderef takes the uncensored value and returns the censored value, letting you
for instance mask a card number but leave the last 4 digits visible.

If you provide both C<replacement> and C<replacement_callbacks>, any callback
defined which matches the key being considered takes precedence.

=back

=cut

sub new {
    my $class = shift;
    my %args  = @_;

    my $self = bless {} => $class;

    if ( ref $args{sensitive_fields} eq 'Regexp' ) {
        $self->{censor_regex} = $args{sensitive_fields};
    } elsif ( ref $args{sensitive_fields} eq 'ARRAY' ) {
        $self->{is_sensitive_field} = {
            map { $_ => 1 } @{ $args{sensitive_fields} }
        };
    } else {
        $self->{is_sensitive_field} = {
            map { $_ => 1 } qw(
              pass         password     old_password   secret
              private_key  cardnum      card_number    pan
              cvv          cvv2         ccv
            )
        };
    }

    if ( is_hashref $args{replacement_callbacks} ) {
        $self->{replacement_callbacks} = $args{replacement_callbacks};
    }
    if ( exists $args{replacement} ) {
        $self->{replacement} = $args{replacement};
    } else {
        $self->{replacement} = 'Hidden (looks potentially sensitive)';
    }

    $self->{recurse_limit} = $args{recurse_limit} || 100;

    return $self;
}

=head1 METHODS

=head2 censor

Given a data structure (hashref), clones it and returns the cloned version after
censoring potentially sensitive data within.

=cut

sub censor {
    my ( $self, $data, $recurse_count, $visited ) = @_;
    $recurse_count ||= 0;
    $visited       ||= {};

    no warnings 'recursion';    # we're checking ourselves.

    if ( $recurse_count++ > $self->{recurse_limit} ) {
        warn "Data exceeding $self->{recurse_limit} levels";
        return;
    }

    croak('censor expects a hashref') unless is_hashref $data;

    my $censored = 0;
    for my $key ( keys %$data ) {

        if ( is_hashref $data->{$key} ) {
            $censored
              += $self->censor( $data->{$key}, $recurse_count, $visited )
              unless $visited->{ $data->{$key} }++;
            next;
        }

        next unless
          (    $self->{is_sensitive_field}
            && $self->{is_sensitive_field}{ lc $key } )
          or ( $self->{censor_regex} && $key =~ $self->{censor_regex} );

        # OK, censor this
        if ( $self->{replacement_callbacks}{ lc $key } ) {
            $data->{$key} = $self->{replacement_callbacks}{ lc $key }->(
                $data->{$key}
            );
            $censored++;
        } else {
            $data->{$key} = $self->{replacement};
            $censored++;
        }
    }

    return $censored;
}

=head2 clone_and_censor

Clones the provided hashref (using L<Clone> - will die if not installed), then
censors the cloned data and returns it.

Can be used both as a class or object method - the former for a quick way to use
it without having to instantiate an object, the latter if you want to apply
custom settings to the object before using it.

  # As a class method
  my $censored_data = Data::Censor->clone_and_censor($data);

  # or as an object method
  my $censor = Data::Censor->new( replacement => "SECRET!" );
  my $censored_data = $censor->clone_and_censor($data);

=cut

sub clone_and_censor {
    my $class = shift;
    my $data  = shift;

    eval { require Clone; 1 }
      or die "Can't clone data without Clone installed";

    my $cloned_data = Clone::clone($data);

   # if $class is a Data::Censor object, then we were called as an object method
   # rather than a class method - that's fine - otherwise, create a new
   # instance and use it:
    my $self = ref $class && $class->isa('Data::Censor')
      ? $class
      : $class->new;

    $self->censor($cloned_data);
    return $cloned_data;
}


=head1 AUTHOR

David Precious (BIGPRESH), C<< <davidp at preshweb.co.uk> >>

This code was originally written for the L<Dancer> project by myself; I've
pulled it out into a seperate distribution as I was using it for code at work.





( run in 0.525 second using v1.01-cache-2.11-cpan-cdf2f3d4e48 )