Crypt-IDA
view release on metacpan or search on metacpan
lib/Crypt/IDA.pm view on Meta::CPAN
$self=$classname;
$class=$classname;
}
my $filename = shift;
my $align = shift || 0;
my $offset = shift || 0;
my $fh;
return undef unless (sysopen $fh, $filename, O_RDONLY);
return fill_from_fh($fh,$align,$offset);
}
sub empty_to_fh {
my ($self, $class);
if ($_[0] eq $classname or ref($_[0]) eq $classname) {
$self = shift;
$class = ref($self) || $self;
} else {
$self=$classname;
$class=$classname;
}
my $fh = shift;
my $offset = shift || 0;
#warn "got fh=$fh; offset=$offset\n";
sysseek $fh, $offset, SEEK_SET if $offset;
return {
SUB => sub {
my $str=shift;
return syswrite $fh, $str;
}
};
}
sub empty_to_file {
my ($self, $class);
if ($_[0] eq $classname or ref($_[0]) eq $classname) {
$self = shift;
$class = ref($self) || $self;
} else {
$self=$classname;
$class=$classname;
}
my $filename = shift;
my $perm = shift || 0644;
my $offset = shift || 0;
my $fh;
return undef unless
sysopen $fh, $filename, O_CREAT | O_WRONLY, $perm;
return $self->empty_to_fh($fh, $offset);
}
# ida_process_streams is the heart of the module. It's called from
# both ida_split and ida_combine, so it's capable of operating with
# just one or several fillers/emptiers. Its main purpose is to manage
# the input/output buffers, and it calls the external matrix multiply
# code to actually transform blocks of data in large chunks. For
# efficiency, it has some requirements on the organisation of
# input/output buffer matrices. It also delegates the task of
# reading/writing data to relatively simple user-supplied callbacks.
# This module isn't intended to be called by users directly, though,
# so it's not even documented in the POD section. Even though
# technically this could be useful if the user wanted to specify their
# own input/output matrices, there's so little error-checking of
# parameters here that it's probably best not to mention its
# existence/availability.
# the routine uses various tracking variables for each filler/emptier
# handle
use constant {
CALLBACK => 0,
IW => 1,
ENDING => 2,
BF => 3,
PART => 4,
OR => 6,
SKIP => 7,
};
sub ida_process_streams {
my ($self, $class);
if ($_[0] eq $classname or ref($_[0]) eq $classname) {
$self = shift;
$class = ref($self) || $self;
} else {
$self=$classname;
$class=$classname;
}
my ($xform, $in, $fillers, $out, $emptiers, $bytes_to_read,
$inorder, $outorder)=@_;
# default values are no byte-swapping, read bytes until eof
$inorder=0 unless defined($inorder);
$outorder=0 unless defined($outorder);
$bytes_to_read = 0 unless defined($bytes_to_read);
# XS calls are expensive, so do them only once
my ($IWIDTH, $IROWS, $ICOLS, $IORGNUM) =
map { $in->$_ } qw{WIDTH ROWS COLS ORGNUM};
my ($OWIDTH, $OROWS, $OCOLS, $OORGNUM) =
map { $out->$_ } qw{WIDTH ROWS COLS ORGNUM};
my ($XWIDTH, $XROWS, $XCOLS, $XORGNUM) =
map { $xform->$_ } qw{WIDTH ROWS COLS ORGNUM};
my $bytes_read=0;
my ($IR, $OW); # input read, output write pointers
my ($ILEN, $OLEN);
my ($IFmin, $OFmax); # input and output buffer fill levels
my ($want_in_size, $want_out_size);
my ($eof, $rc, $str, $max_fill, $max_empty);
my $width=$IWIDTH;
my $bits=$width << 3;
my $rows=$IROWS;
my $nfillers = scalar(@$fillers);
my $nemptiers = scalar(@$emptiers);
my ($idown,$odown);
my ($iright,$oright);
my ($rr,$cc);
my ($i, $k);
my ($start_in_col,$start_out_col);
#warn "-------------------------------------\n";
#warn "Asked to process $bytes_to_read bytes\n";
#warn "Input cols is " .$in->COLS. ", Output cols is " . $out->COLS . "\n";
#warn "Inorder is $inorder, Outorder is $outorder\n";
#warn "There are $nfillers fillers, $nemptiers emptiers\n";
if ($bytes_to_read % ($width * $XCOLS)) {
carp "process_streams: bytes to read not a multiple of COLS * WIDTH";
return undef;
}
unless ($nfillers == 1 or $nfillers==$IROWS) {
carp "Fillers must be 1 or number of input rows";
return undef;
}
unless ($nemptiers == 1 or $nemptiers == $OROWS) {
carp "Emptiers must be 1 or number of output rows";
return undef;
}
# Previously, I stashed vars inside the supplied fill/empty
# callbacks, assuming that we might need them again if we were to be
# called a second time. However, since the only way that this
# routine can end is with an error or eof, neither is a condition
# that we can continue on from.
#
# As a result, I'm changing to:
# * only storing these vars locally
# * storing them in an list of lists (rather than list of hashes)
# (see constant definitions above)
#
my (@fillvars,@emptyvars);
($IFmin, $OFmax, $IR, $OW) = (0,0,0,0);
if ($nfillers == 1) {
if ($IORGNUM != COLWISE and $IROWS != 1) {
carp "Need a 'colwise' input matrix with a single filler";
return undef;
}
$ILEN=$rows * $ICOLS * $width;
$idown=$width;
$iright=$IROWS * $width;
$want_in_size = $width * $rows;
} else {
if ($IORGNUM != ROWWISE and $IROWS != 1) {
carp "Need a 'rowwise' input matrix with multiple fillers";
return undef;
}
$ILEN=$ICOLS * $width;
$idown=$ILEN;
$iright=$width;
$want_in_size = $width;
}
for my $i (0 .. $nfillers - 1) {
$fillers->[$i]->{"IW" } = $i * $idown;
$fillers->[$i]->{"END"} = $i * $idown + $ILEN - 1;
$fillers->[$i]->{"BF"} = 0;
$fillers->[$i]->{"PART"} = ""; # partial word
# Set up per-filler variables
my @varlist = ();
my $cb = $fillers->[$i]->{SUB};
@varlist[CALLBACK,IW,ENDING,BF,PART] = (
$cb, $i * $idown, $i * $idown + $ILEN - 1, 0, "" );
push @fillvars, \@varlist;
}
if ($nemptiers == 1) {
if ($OORGNUM != COLWISE and $OROWS != 1) {
carp "Need a 'colwise' output matrix with a single emptier";
return undef;
}
$OLEN=$OROWS * $OCOLS * $width;
$odown=$width;
$oright=$OROWS * $width;
$want_out_size = $width * $OROWS;
} else {
if ($OORGNUM != ROWWISE and $OROWS != 1) {
carp "Need a 'rowwise' output matrix with multiple emptiers";
return undef;
}
$OLEN = $OCOLS * $width;
$odown = $OLEN;
$oright = $width;
lib/Crypt/IDA.pm view on Meta::CPAN
use Crypt::IDA ":default";
$source=fill_from_string ($string,$align);
$source=fill_from_fh ($fh,$align,$offset);
$source=fill_from_file ($filename,$align,$offset);
$sink=empty_to_string (\$string);
$sink=empty_to_fh ($fh,$offset);
$sink=empty_to_file ($filename,$mode,$offset);
($key,$mat,$bytes) = ida_split ( ... );
$bytes = ida_combine ( ... );
=head1 DESCRIPTION
This module splits a secret into one or more "shares" which have the
property that if a certain number of shares (the "quorum" or
"threshold") are presented in the combine step, the secret can be
recovered. The algorithm should be cryptographically secure in the
sense that if fewer shares than the quorum are presented, no
information about the secret is revealed.
=head2 EXPORT
No methods are exported by default. All methods may be called by
prefixing the method names with the module name, eg:
$source=Crypt::IDA::fill_from_string($string,$align)
Alternatively, routines can be exported by adding ":default" to the
"use" line, in which case the routine names do not need to be prefixed
with the module name, ie:
use Crypt::IDA ":default";
$source=fill_from_string ($string,$align);
# ...
Some extra ancillary routines can also be exported with the ":extras"
(just the extras) or ":all" (":extras" plus ":default") parameters to
the use line. See the section L<ANCILLARY ROUTINES> for details.
=head1 GENERAL OPERATION
=head2 Naming conventions
The following variable names will be used throughout this
documentation:
$n number of shares to create
$k number of shares needed to combine (ie, the "quorum")
$w width (treat input/output as being 1, 2 or 4-byte words)
$align null-pad input up a multiple of $align bytes
$key key parameter used to create transform matrix
$mat transform matrix/inverse transform matrix
=head2 Stream-processing design pattern
Rather than implement separate C<ida_split_string> or
C<ida_split_file> methods, a more flexible design which decouples the
details of the input and output streams from the actual processing is
used. In terms of design patterns, the C<ida_split> and C<ida_combine>
routines operate as stream processors, and use user-supplied callbacks
to read some bytes of input or write some bytes of output. Therefore,
the split and combine tasks break down into three steps:
=over
=item 1. Set up fill handler(s)
=item 2. Set up empty handler(s)
=item 3. Call processing routine (ida_split/ida_combine)
=back
For the C<ida_split> routine, a single "fill" handler is required,
while one or more "empty" handlers are required (one for each share to
be output). For the C<ida_combine> routine, the opposite is true: one
or more "fill" handlers are required (corresponding to the shares to
be combined), while a single "empty" handler (for the recombined
secret) is required.
The C<fill_from_*> and C<empty_to_*> routines create callbacks which
can be used by C<ida_split>/C<ida_combine>. Routines are provided for
creating callbacks given a string, filename or open file
handle. Custom callbacks (such as for reading/writing on a network
socket connection or an IPC message queue) can also be written quite
easily. See the section L<Writing Custom Callbacks> for more details.
=head2 Keys and transform matrices
Both the split and combine algorithms operate by performing matrix
multiplication over the input. In the case of the split operation, the
transform matrix has n rows (n=number of shares) and k columns
(k=quorum), while in the case of combine operation, the transform
matrix has k rows and k columns. Either operation is described simply
as the matrix multiplication:
transform x input = output
The input matrix always has k rows, while the output matrix will have
n rows for the split operation, or k rows for the combine
operation. Both input and output matrices will have a number of
columns related to the length of the secret being
split/combined. (Actually, in this implementation, the number of
columns in the input/output matrix is unimportant, since the matrices
are treated as circular buffers with columns being reused when
necessary, but the general idea still stands).
The transform matrix must have the property that any subset of k rows
represent linearly independent basis vectors. If this is not the case
then the transform cannot be reversed. This need not be a concern for
most users of this module, since if the ida_split routine is not
provided a transform matrix parameter, it will generate one of the
appropriate form, which guarantees (in the mathematical sense) that
the process is reversible. However, understanding the requirement of
linear independence is important in case a user-supplied matrix is
provided, and also for understanding the "key" parameter to the
split/combine routines. A "key" is defined as a list of field elements
(ie, 8-bit, 16-bit or 32-bit values):
x , x , ... , x , y , y , ... , y
1 2 n 1 2 k
whose values must all be distinct. If a key is supplied to the split
routine, these values are used to create a Cauchy-form transform
matrix:
k columns
| 1 1 1 |
| ------- ------- ... ------- |
| x1 + y1 x1 + y2 x1 + yk |
| |
| 1 1 1 |
| ------- ------- ... ------- |
| x2 + y1 x2 + y2 x2 + yk | n rows
| |
| : : : : |
| |
| 1 1 1 |
| ------- ------- ... ------- |
| xn + y1 xn + y2 xn + yk |
This matrix has the desired property that any subset of k rows are
linearly independent, which leads to the property that the transform
can be reversed (via being able to create an inverse matrix from those
k rows). The actual requirements for a Cauchy-form matrix are slightly
more complicated than saying that all xi, yi values be distinct, but
they reduce to exactly that for Galois Fields.
If the same key is supplied to the combine routine (along with a list
of rows to be created), the appropriate submatrix is generated and
inverted. This inverted matrix is then used as the transform matrix
for the combine algorithm.
For more information, see the sections for L<KEY MANAGEMENT>,
L<SPLIT OPERATION>, L<COMBINE OPERATION> and the L<SEE ALSO> section.
=head1 FILL/EMPTY CALLBACKS
=head2 "Fill" Callbacks
Fill callbacks are created by a call to one of:
$source=fill_from_string ($string,$align);
$source=fill_from_fh ($fh,$align,$offset);
$source=fill_from_file ($filename,$align,$offset);
The C<$string>, C<$fh> and C<$filename> parameters should be
self-explanatory. The $offset parameter, where supplied, specifies an
offset to seek to in the file/file handle I<before> any reading takes
place. The C<$offset> parameter may be omitted, in which case it
defaults to 0, i.e., the start of the file.
The $align parameter specifies that the input is to be null-padded up
to a multiple of $align bytes, should it not already be so-aligned.
The C<ida_split> routine requires that the input be a multiple of C<$k
* $w> bytes in length, so this is the usual value to pass for the
C<$align> parameter. If C<$align> is not specified, it defaults to 1,
meaning the input is byte-aligned, and no padding bytes will be placed
at the end of the file/string. Also note that if an C<$align>
parameter is used in conjunction with the C<$offset> parameter, that
the input will be aligned to be a multiple of C<$align> bytes
starting from I<$offset>, and not from the start of the file.
Also, be aware that if the input secret needs to be padded before
calling C<ida_split> that you will need to have some way of removing
those padding bytes after recovering the secret with C<ida_combine>.
This can be accomplished by removing null bytes from the end of the
secret (provided trailing nulls in the original secret are
prohibited), or (as is preferable for splitting/combining binary
files), by recording the original (unpadded) size of the secret and
truncating the reconstituted secret down to that size after calling
C<ida_combine>.
=head2 "Empty" Callbacks
The following routines are available for creating "empty" callbacks:
$sink=empty_to_string (\$string);
$sink=empty_to_fh ($fh,$offset);
$sink=empty_to_file ($filename,$perm,$offset);
All parameters with the same name are the same as in the "fill"
callbacks. Additionally, note that:
=over
=item * empty_to_string requires a I<reference> to a string variable
(to enable the callback to modify the string);
=item * empty handlers do not pad the output stream, so they don't
need an C<$align> parameter; and
=item * empty_to_file takes a C<$perm> (file permission) parameter,
which defaults to 0644 if not specified.
=back
As with the fill handler routines, these routines return a hash
reference (which contains the actual callback) if successful, or undef
if there was a problem (in which case file-related problems will be
reported in $!).
The empty_to_file routine will create the output file if it does not
already exist.
=head1 SPLIT OPERATION
The template for a call to C<ida_split>, showing all default values,
is as follows:
($key,$mat,$bytes) = ida_split (
quorum => undef,
shares => undef,
width => undef, # operate on words of 1, 2 or 4 bytes
# supply a key, a matrix, or neither (in
# which case a key will be randomly generated)
key => undef,
matrix => undef,
# optionally specify which shares to produce
sharelist => undef, # [ $row1, $row2, ... ]
# source, sinks
filler => undef,
emptiers => undef, # [ $empty1, $empty2, ... ]
# misc. options
rand => "/dev/urandom",
bufsize => 4096,
bytes => 0,
# byte order flags
inorder => 0,
outorder => 0,
);
Many of the parameters above have already been described earlier. The
new parameters introduced here are:
=over
=item * If the key and matrix parameters are unset, then a random
key/transform matrix will be generated and used to create the
shares. For a discussion of cases where you might want to override
this behaviour and supply your own values for these parameters, see
the L<Keys and transform matrices> and L<KEY MANAGEMENT> sections.
=item * sharelist is a reference to a list of rows to be operated on;
if specified, only shares corresponding to those rows in the transform
matrix will be created.
=item * emptiers is a reference to a list of empty callbacks. The list
should contain one empty callback for each share to be produced.
=item * rand can be set to "rand", in which case Perl's default
C<rand()> function will be used for generating keys. Otherwise, the
parameter is taken to be a file containing the random number to be
used (C</dev/urandom> and C</dev/random> are special devices on Linux
systems which generate different random numbers each time they are
read; see their man pages for details).
=item * bufsize specifies the size of the input and output buffer
matrices in terms of the number of columns. Any integer value greater
than 0 is allowed. Larger buffer size should improve the performance
of the algorithm, as fewer I/O calls and matrix multiply calls (on
larger chunks of data) will be required.
=item * bytes specifies how many bytes of input to read. This may be
set to zero to indicate that all bytes up to EOF should be read. This
value must be a multiple of quorum x width
=item * inorder and outorder can be used to specify the byte order of
the input and output streams, respectively. The values can be set to 0
(stream uses native byte order), 1 (stream uses little-endian byte
order) or 2 (stream uses big-endian byte order). If these values are
set to 1 or 2 and that byte order is different from the system's byte
order then bytes within words will be swapped to the correct order
before being written to the input buffer or written out from the
output buffer. These options have no effect when the width is set to 1
byte.
=back
The function returns three return values, or undef if there was an
error. The return values are:
=over
=item * C<$key>, which will be undef if the user supplied a matrix
parameter, or the value of the key used to create the transform matrix
otherwise
=item * C<$mat>, which is the matrix used to create the shares. If the
user specified a sharelist option, then this returned matrix will
include only the rows of the transform matrix specified in the
sharelist (in the order specified).
=item * C<$bytes>, which is the number of input bytes actually read
from the input.
=back
Since the "key" parameter may not be returned in all cases, the
preferred method for detecting failure of the routine is to check
whether the C<$mat> parameter returns undef, as in the following:
($key,$mat,$bytes) = ida_split ( ... );
unless (defined ($mat)) {
# handle ida_split failure
# ...
}
lib/Crypt/IDA.pm view on Meta::CPAN
All operations are carried out by treating input bytes/words as being
polynomials in GF(2^8), GF(2^16) or GF(2^32). The implementation of
arithmetic in these fields is handled by the Math::FastGF2 and
Math::FastGF2::Matrix modules. The irreducible field polynomials used
by those modules are:
8 4 3
GF(2^8) x + x + x + x + 1 (0x11b)
16 5 3
GF(2^16) x + x + x + x + 1 (0x1002b)
32 7 3 2
GF(2^32) x + x + x + x + 1 (0x10000008d)
Anyone wishing to implement compatible encoders/decoders should ensure
that the polynomials used in their implementation match these.
=head2 Reed-Solomon Encoding
Although this module was not written specifically with Reed-Solomon
Erasure Codes in mind, the underlying arithmetic and matrix
multiplication is the same as Rabin's IDA. The module can, with a
little work, be made to work as a Reed-Solomon encoder/decoder. The
basic idea (which may be implemented in a future release) is to first
create either a Cauchy-form matrix as described above, or a
Vandermonde matrix of the following form:
| 0 1 2 k-1 |
| 0 0 0 ... 0 |
| |
| 0 1 2 k-1 |
| 1 1 1 ... 1 |
| |
| : : : : : |
| |
| 0 1 2 k-1 |
| n-1 n-1 n-1 ... n-1 |
All arithmetic operations should, of course, be performed on Galois
Field elements, rather than integers. Whichever matrix form is chosen,
the next step is to use elementary column operations on the matrix
until the top k rows form an identity matrix (again, all arithmetic
must treat the numbers as Galois Field polynomials). The matrix can
then be passed along to C<ida_split>, which will generate n shares as
usual, with the first k of these being unencrypted slices of the
original file (ie, containing every k'th character/word, starting at
offset 0, 1, ... , k-1) with the remaining n - k shares being the
Erasure Codes. As with Rabin's scheme, any k of the shares may be
presented to C<ida_combine> (along with the appropriate inverse matrix
calculated from k rows of the split transform matrix) to reconstruct
the original file.
Note that version 0.05 of Math::FastGF2 added a new C<new_vandermonde>
constructor methods to help with performing Reed-Solomon encoding.
=head2 Writing Custom Callbacks
The following code can be used as a template for writing routines
which create fill/empty callbacks which can be passed to C<ida_split>
and C<ida_combine>:
sub create_my_ida_callback {
my ($arg1, $arg2, ... ) = @_;
# do some initialisation based on args, such as opening files,
# connecting to a database, saving details of an IPC message queue,
# etc.
# for "fill" callbacks:
return {
SUB => sub {
my $len=shift;
my $string;
# some code to get/produce up to $len bytes of input and
# place it in $string. Since this is a closure, the
# callback has access to the initial $arg1, $arg2 and any
# other variables created in the enclosing routine.
if ($some_error_occurred) {
return undef;
} elsif ($no_more_input) {
return "";
} else {
return $string;
}
},
};
# for "empty" callbacks:
return {
SUB => sub {
my $string=shift;
# some code to save/consume new data bytes passed in
# $string. The routine should return the number of bytes
# actually processed, or undef if there was a (write)
# error.
if ($some_error_occurred) {
# optionally set $! with error message
return undef;
} else {
return $number_of_bytes_actually_saved;
}
},
};
}
Note that callbacks are written using Perl's closure mechanism rather
than by passing simple code (subroutine) references. This is done to
allow the callback to be "customised" to use a particular file handle,
string, etc., as input or output. See the L<What's a closure?> section
in the Perl FAQ for more details on this construct. Also, a hashref is
used rather than a simple coderef since the processing routines use
that hash to store bookkeeping information such as buffer fill levels,
read/write pointers and so on.
The C<ida_split> and C<ida_combine> routines are written in such a way
that they handle most of the logic involved with buffering of input
and output and the details of reading/writing values in the
input/output matrices. This means that callbacks can usually be kept
very simple and, for the most part, stateless. Specifically, if an
empty handler cannot process all bytes of input presented to it in one
call, it simply returns the number of bytes it actually
processed. There is no need for it to save any unprocessed part of the
input, and doing so will probably result in an error. Since the
calling routines know how many unprocessed bytes remain in the output
buffer, it will arrange so that the next time that particular empty
handler is called, it will receive those unprocessed bytes at the
start of its input string.
The one exception to this "stateless" operation is in the case where a
fill handler must pad the input stream to be a multiple of $align
bytes. This can be accomplished by either pre-padding the input stream
in the callback initialisation phase (when the stream length is known
in advance, such as with a string input), or by maintaining "bytes
read" and "EOF" state variables, updating them after every call to the
callback, and using them to return extra padding bytes after the
stream's natural end-of-file, where appropriate.
Please consult the source code for the existing C<fill_from_*> and
C<empty_to_*> callback creation code for working examples.
=head1 KNOWN BUGS
There may be a weakness in the current implementation of the random
key generation routine when a 1-byte word size is used, in that
regardless of the random-number generator parameter passed to
C<ida_split>, the routine will always use Perl's internal C<rand()>
function. The code currently includes a workaround which should at
least prevent a sequence-prediction attack on the RNG. While I can see
no way to effectively attack the current implementation (due to the
security afforded by the simple act of dispersing shares), it is
clearly desirable to use the highest-quality RNG available in all
cases, and this should be implemented in a future release. For the
moment, if the possibility of a weakness in this implementation is
unacceptable, it can be avoided simply by using a width parameter of 2
or 4, in which cases the high-quality RNG will always be used.
On a related note, defaulting to the use of C</dev/urandom> instead of
C</dev/random> may be considered a bug by some people.
=head1 SEE ALSO
I<"Efficient dispersal of information for security, load balancing,
and fault tolerance">, by Michael O. Rabin. JACM Volume 36, Issue 2
(1989).
Description of the Information Dispersal Algorithm, which this module
implements. This should be a faithful implementation of the original
idea, although the issue of padding is not covered sufficiently in the
paper, and this may be a point of divergence from the original
intention.
I<http://parchive.sourceforge.net/>
A similar project, in that it uses 16-bit Galois Field arithmetic to
perform the same kinds of arithmetic operations on input files. The
polynomial used in parchive (0x1100b) is different from the one used
here (0x1002b), however. Also, parchive uses the more traditional
Reed-Solomon mode of operation, with an emphasis on forward
( run in 0.660 second using v1.01-cache-2.11-cpan-39bf76dae61 )