Simd-Avx512
view release on metacpan or search on metacpan
lib/Simd/Avx512.pm view on Meta::CPAN
}
sub require32($) # Check that we have a string of 32 bits
{my ($xmm) = @_; # DWord
requireN(32, $xmm);
}
sub require64($) # Check that we have a string of 64 bits
{my ($xmm) = @_; # Bytes
requireN(64, $xmm);
}
sub require128($) # Check that we have a string of 128 bits
{my ($xmm) = @_; # Word
requireN(128, $xmm);
}
sub require256($) # Check that we have a string of 256 bits
{my ($xmm) = @_; # Dword
requireN(256, $xmm);
}
sub require512($) # Check that we have a string of 512 bits
{my ($xmm) = @_; # Qword
requireN(128, $xmm);
}
sub require128or256or512($;$) # Check that we have a string of 128|256|512 bits in the first operand and optionally the same in the second operand
{my ($xmm1, $xmm2) = @_; # Bytes, optional bytes
my $l = length $xmm1;
confess "128|256|512 bits required for first operand ($l)" unless $l == 128 or $l == 256 or $l == 512;
if (defined $xmm2)
{my $m = length $xmm2;
confess "128|256|512 bits required for second operand ($m)" unless $m == 128 or $m == 256 or $m == 512;
confess "Operands must have same length($l,$m)" unless $l == $m;
}
$l
}
sub require64or128or256or512($) # Check that we have a string of 64|128|256|512 bits
{my ($xmm) = @_; # Bytes
my $l = length $xmm;
confess "64|128|256|512 bits required for operand" unless $l == 64 or $l == 128 or $l == 256 or $l == 512;
confess "Only zeros and ones allowed in operand" unless $xmm =~ m(\A[01]+\Z);
}
sub requireSameLength($$) # Check that the two operands have the same length
{my ($xmm1, $xmm2) = @_; # Bytes, bytes
my ($l, $L) = (length($xmm1), length($xmm2));
confess "Operands have different lengths($l, $L)" unless $l == $L;
$l
}
sub requireNumber128or256or512($) # Check that we have a number with a value of 128|256|512
{my ($n) = @_; # Number
confess "128|256|512 required for operand" unless $n == 128 or $n == 256 or $n == 512;
}
sub flipBitsUnderMask($$) # Flip the bits in a string where the corresponding mask bit is 1 else leave the bit as is
{my ($string, $mask) = @_; # Bit string, mask
my $l = requireSameLength $string, $mask;
my $f = '';
for my $i(0..$l-1) # Each character in the string and mask
{my $s = substr($string, $i, 1);
$f .= substr($mask, $i, 1) eq '0' ? $s : $s eq '0' ? '1' : '0'
}
$f
}
sub compareTwosComplement($$) # Compare two numbers in two's complement formats and return -1 if the first number is less than the second, 0 if they are equal, else +1
{my ($a, $b) = @_; # First, second
my $n = requireSameLength $a, $b;
return -1 if substr($a, 0, 1) eq '1' and substr($b, 0, 1) eq '0'; # Leading sign bit
return +1 if substr($a, 0, 1) eq '0' and substr($b, 0, 1) eq '1';
for(1..$n) # Non sign bits
{return -1 if substr($a, $_, 1) eq '0' and substr($b, $_, 1) eq '1';
return +1 if substr($a, $_, 1) eq '1' and substr($b, $_, 1) eq '0';
}
0 # Equal
}
#D1 Instructions # Emulation of Avx512 instructions
sub PSLLDQ($$) # Packed Shift Left Logical DoubleQword
{my ($xmm1, $imm8) = @_; # Bytes, length of shift in bytes
require128 $xmm1; # Check that we have a string of 128 bits
substr($xmm1, $imm8 * 8).zBytes($imm8)
}
sub VPSLLDQ($$) # Packed Shift Left Logical DoubleQword
{my ($xmm1, $imm8) = @_; # Bytes, length of shift in bytes
require128or256or512 $xmm1; # Check that we have a string of 128 bits
confess "0 - 15 for shift amount required" unless $imm8 >= 0 and $imm8 < 16;
return PSLLDQ($xmm1, $imm8) if length($xmm1) == 128;
return PSLLDQ(substr($xmm1, 0, 128), $imm8).
PSLLDQ(substr($xmm1, 128, 128), $imm8) if length($xmm1) == 256;
return PSLLDQ(substr($xmm1, 0, 128), $imm8).
PSLLDQ(substr($xmm1, 128, 128), $imm8).
PSLLDQ(substr($xmm1, 256, 128), $imm8).
PSLLDQ(substr($xmm1, 384, 128), $imm8)
}
sub PSRLDQ($$) # Packed Shift Right Logical DoubleQword
{my ($xmm1, $imm8) = @_; # Bytes, length of shift
require128 $xmm1; # Check that we have a string of 128 bits
zBytes($imm8).substr($xmm1, 0, 128 - $imm8 * 8)
}
sub VPSRLDQ($$) # Packed Shift Right Logical DoubleQword
{my ($xmm1, $imm8) = @_; # Bytes, length of shift
require128or256or512 $xmm1; # Check that we have a string of 128 bits
confess "0 - 15 for shift amount required" unless $imm8 >= 0 and $imm8 < 16;
return PSRLDQ($xmm1, $imm8) if length($xmm1) == 128;
return PSRLDQ(substr($xmm1, 0, 128), $imm8).
PSRLDQ(substr($xmm1, 128, 128), $imm8) if length($xmm1) == 256;
return PSRLDQ(substr($xmm1, 0, 128), $imm8).
PSRLDQ(substr($xmm1, 128, 128), $imm8).
PSRLDQ(substr($xmm1, 256, 128), $imm8).
PSRLDQ(substr($xmm1, 384, 128), $imm8)
}
#D1 PCMP # Packed CoMPare
#D2 PCMPEQ # Packed CoMPare EQual
lib/Simd/Avx512.pm view on Meta::CPAN
}
#D2 VPCMPU # Packed CoMPare Unsigned
sub vpcmpu($$$$$) #P Packed CoMPare Unsigned
{my ($size, $k2, $xmm1, $xmm2, $op) = @_; # Size of element in bits, input mask, bytes, bytes, test code
require8or16or32or64 $size if $develop; # We supply this parameter so we ought to get it right
require64 $k2; # Mask
require128or256or512 $xmm1, $xmm2; # Check that we have a string of 128 bits in the first operand
confess "Invalid op code $op" unless $op =~ m(\A(0|1|2|4|5|6)\Z); # Test code
my $T = # String tests
[sub {return 1 if $_[0] eq $_[1]; 0}, # eq 0
sub {return 1 if $_[0] lt $_[1]; 0}, # lt 1
sub {return 1 if $_[0] le $_[1]; 0}, # le 2
undef,
sub {return 1 if $_[0] ne $_[1]; 0}, # ne 4
sub {return 1 if $_[0] ge $_[1]; 0}, # ge 5
sub {return 1 if $_[0] gt $_[1]; 0}, # gt 6
];
my $N = length($xmm1) / $size; # Number of elements
my $k1 = maskRegister;
$k2 = substr($k2, -$N); # Relevant portion of mask
for(0..$N-1)
{next unless substr($k2, $_, 1) eq '1'; # Mask
my $o = $_ * $size;
substr($k1, $_, 1) = &{$$T[$op]}(substr($xmm1, $o, $size), # Compare according to code
substr($xmm2, $o, $size)) ? '1' : '0';
}
substr(zBytes(8).substr($k1, 0, $N), -64)
}
sub VPCMPUB($$$$) # Packed CoMPare Unsigned Byte
{my ($k2, $xmm1, $xmm2, $op) = @_; # Input mask, bytes, bytes, test code
vpcmpu 8, $k2, $xmm1, $xmm2, $op
}
sub VPCMPUW($$$$) # Packed CoMPare Unsigned Word
{my ($k2, $xmm1, $xmm2, $op) = @_; # Input mask, words, words, test code
vpcmpu 16, $k2, $xmm1, $xmm2, $op
}
sub VPCMPUD($$$$) # Packed CoMPare Unsigned Dword
{my ($k2, $xmm1, $xmm2, $op) = @_; # Input mask, dwords, dwords, test code
vpcmpu 32, $k2, $xmm1, $xmm2, $op
}
sub VPCMPUQ($$$$) # Packed CoMPare Unsigned Qword
{my ($k2, $xmm1, $xmm2, $op) = @_; # Input mask, qwords, qwords, test code
vpcmpu 64, $k2, $xmm1, $xmm2, $op
}
#D1 VPTEST # Packed TEST
#D2 VPTESTM # Packed TEST MASK
sub andAndTest($$) #P And two bit strings of the same length and return 0 if the result is 0 else 1
{my ($a, $b) = @_; # Element, element
my $N = requireSameLength $a, $b; # Check that the two elements have the same length
for(0..$N-1) # Look for match
{return 1 if substr($a, $_, 1) eq '1' and substr($b, $_, 1) eq '1';
}
0
}
sub vptest($$$) #P Packed TEST
{my ($size, $xmm1, $xmm2) = @_; # Size of element in bits, element, element
require8or16or32or64 $size if $develop; # We supply this parameter so we ought to get it right
require128or256or512 $xmm1, $xmm2; # Check that we have a string of 128 bits in the first operand
my $N = length($xmm1) / $size; # Number of elements
my $k1 = maskRegister;
for(0..$N-1)
{my $o = $_ * $size;
substr($k1, $_, 1) = andAndTest(substr($xmm1, $o, $size), # Test two elements
substr($xmm2, $o, $size)) ? '1' : '0';
}
substr(zBytes(8).substr($k1, 0, $N), -64)
}
sub VPTESTMB($$) # Packed TEST Mask Byte
{my ($xmm1, $xmm2) = @_; # Bytes, bytes
vptest 8, $xmm1, $xmm2
}
sub VPTESTMW($$) # Packed TEST Mask Word
{my ($xmm1, $xmm2) = @_; # Words, words
vptest 16, $xmm1, $xmm2
}
sub VPTESTMD($$) # Packed TEST Mask Dword
{my ($xmm1, $xmm2) = @_; # Dwords, dwords
vptest 32, $xmm1, $xmm2
}
sub VPTESTMQ($$) # Packed TEST Mask Quad
{my ($xmm1, $xmm2) = @_; # Quads, quads
vptest 64, $xmm1, $xmm2
}
#D1 VPBROADCAST # VPBROADCASTB - Packed BROADCAST Byte
sub VPBROADCASTB($$) # Packed TEST Mask Byte
{my ($size, $b) = @_; # Size of target in bits, byte
requireNumber128or256or512 $size;
require8 $b;
repeat($b, $size / 8)
}
sub VPBROADCASTW($$) # Packed TEST Mask Word
{my ($size, $w) = @_; # Size of target in bits, word
requireNumber128or256or512 $size;
require16 $w;
repeat($w, $size / 16)
}
sub VPBROADCASTD($$) # Packed TEST Mask Dword
lib/Simd/Avx512.pm view on Meta::CPAN
sub VPINSRW($$$) # Packed INSeRt Word
{my ($target, $word, $pos) = @_; # Target element, word, position to insert byte expressed as number of words from lowest order word numbered 0
require128or256or512 $target;
require16 $word;
confess "Invalid position $pos" if $pos < 0 or $pos > length($target) / 2;
substr($target, -($pos+1)*16, 16) = $word;
$target
}
sub VPINSRD($$$) # Packed INSeRt Dword
{my ($target, $dword, $pos) = @_; # Target element, dword, position to insert byte expressed as number of dwords from lowest order dword numbered 0
require128or256or512 $target;
require32 $dword;
confess "Invalid position $pos" if $pos < 0 or $pos > length($target) / 4;
substr($target, -($pos+1)*32, 32) = $dword;
$target
}
sub VPINSRQ($$$) # Packed INSeRt Quad
{my ($target, $qword, $pos) = @_; # Target element, qword, position to insert byte expressed as number of dwords from lowest order qword numbered 0
require128or256or512 $target;
require64 $qword;
confess "Invalid position $pos" if $pos < 0 or $pos > length($target) / 8;
substr($target, -($pos+1)*64, 64) = $qword;
$target
}
#D1 VPLZCNT # Packed Leading Zero CouNT
sub VPLZCNTD($) # Packed Leading Zero CouNT Dword
{my ($target) = @_; # Target element
require128or256or512 $target;
my $r = '';
my $n = length($target) / 32;
for(0..$n-1)
{my $b = substr($target, $_*32, 32) =~ s(1.*\Z) ()sr;
$r .= sprintf("%032b", length $b);
}
$r
}
sub VPLZCNTQ($) # Packed Leading Zero CouNT Qword
{my ($target) = @_; # Target element
require128or256or512 $target;
my $r = '';
my $n = length($target) / 64;
for(0..$n-1)
{my $b = substr($target, $_*64, 64) =~ s(1.*\Z) ()sr;
$r .= sprintf("%064b", length $b);
}
$r
}
#D1 Compress and Expand # Compress or expand
#D2 VPCOMPRESS # Packed COMPRESS
sub vpcompress($$$$$) #P Packed COMPRESS
{my ($size, $xmm1, $k2, $z, $xmm2) = @_; # Size of each element in bits, Compression target, compression mask, clear upper elements, source to compress
require64 $k2;
my $n = require128or256or512 $xmm1, $xmm2;
my $N = $n / $size; # Number of elements
$xmm1 = '0' x length $xmm1 if $z; # Clear target if requested
my $p = 0; # Position in target
for(1..$N) # Compress selected elements
{if (substr($k2, -$_, 1) eq '1')
{substr($xmm1, --$p * $size, $size) = substr($xmm2, -$_ * $size, $size)
}
}
$xmm1
}
sub VPCOMPRESSD($$$$) # Packed COMPRESS Dword
{my ($xmm1, $k2, $z, $xmm2) = @_; # Compression target, compression mask, clear upper elements, source to compress
vpcompress 32, $xmm1, $k2, $z, $xmm2
}
sub VPCOMPRESSQ($$$$) # Packed COMPRESS Qword
{my ($xmm1, $k2, $z, $xmm2) = @_; # Compression target, compression mask, clear upper elements, source to compress
vpcompress 64, $xmm1, $k2, $z, $xmm2
}
#D2 VPEXPAND # Packed EXPAND
sub vpexpand($$$$$) #P Packed EXPAND
{my ($size, $xmm1, $k2, $z, $xmm2) = @_; # Size of each element in bits, Compression target, expansion mask, clear upper elements, source to expand
require64 $k2;
my $n = require128or256or512 $xmm1, $xmm2;
my $N = $n / $size; # Number of elements
$xmm1 = '0' x length $xmm1 if $z; # Clear target if requested
my $p = 0; # Position in target
for(1..$N) # Compress selected elements
{if (substr($k2, -$_, 1) eq '1')
{substr($xmm2, -$_ * $size, $size) = substr($xmm1, --$p * $size, $size)
}
}
$xmm1
}
sub VPEXPANDD($$$$) # Packed EXPAND Dword
{my ($xmm1, $k2, $z, $xmm2) = @_; # Compression target, expansion mask, clear upper elements, source to expand
vpexpand 32, $xmm1, $k2, $z, $xmm2
}
sub VPEXPANDQ($$$$) # Packed EXPAND Qword
{my ($xmm1, $k2, $z, $xmm2) = @_; # Compression target, expansion mask, clear upper elements, source to expand
vpexpand 64, $xmm1, $k2, $z, $xmm2
}
#D0
#-------------------------------------------------------------------------------
# Export
#-------------------------------------------------------------------------------
use Exporter qw(import);
use vars qw(@ISA @EXPORT @EXPORT_OK %EXPORT_TAGS);
@ISA = qw(Exporter);
@EXPORT_OK = qw(
);
%EXPORT_TAGS = (all=>[@EXPORT, @EXPORT_OK]);
# podDocumentation
=pod
=encoding utf-8
=head1 Name
Simd::Avx512 - Emulate SIMD Avx512 instructions
=head1 Synopsis
Help needed please!
The instructions being emulated are illustrated at: L<https://www.officedaytime.com/simd512e/>
The instructions being emulated are described at: L<https://hjlebbink.github.io/x86doc/>
=head2 Example
Find the number of leading zeros in each of 8 quad words.
if (1) {
my ($i, $od, $oq) = (
#Q0 1 2 3 ...
#D0 1 2 3 4 5 6 7 ...
( run in 1.569 second using v1.01-cache-2.11-cpan-39bf76dae61 )