App-MHFS
view release on metacpan or search on metacpan
share/public_html/static/music_worklet_inprogress/decoder/deps/dr_libs/dr_flac.h view on Meta::CPAN
} seektable;
struct
{
drflac_uint32 vendorLength;
const char* vendor;
drflac_uint32 commentCount;
const void* pComments;
} vorbis_comment;
struct
{
char catalog[128];
drflac_uint64 leadInSampleCount;
drflac_bool32 isCD;
drflac_uint8 trackCount;
const void* pTrackData;
} cuesheet;
struct
{
drflac_uint32 type;
drflac_uint32 mimeLength;
const char* mime;
drflac_uint32 descriptionLength;
const char* description;
drflac_uint32 width;
drflac_uint32 height;
drflac_uint32 colorDepth;
drflac_uint32 indexColorCount;
drflac_uint32 pictureDataSize;
const drflac_uint8* pPictureData;
} picture;
} data;
} drflac_metadata;
/*
Callback for when data needs to be read from the client.
Parameters
----------
pUserData (in)
The user data that was passed to drflac_open() and family.
pBufferOut (out)
The output buffer.
bytesToRead (in)
The number of bytes to read.
Return Value
------------
The number of bytes actually read.
Remarks
-------
A return value of less than bytesToRead indicates the end of the stream. Do _not_ return from this callback until either the entire bytesToRead is filled or
you have reached the end of the stream.
*/
typedef size_t (* drflac_read_proc)(void* pUserData, void* pBufferOut, size_t bytesToRead);
/*
Callback for when data needs to be seeked.
Parameters
----------
pUserData (in)
The user data that was passed to drflac_open() and family.
offset (in)
The number of bytes to move, relative to the origin. Will never be negative.
origin (in)
The origin of the seek - the current position or the start of the stream.
Return Value
------------
Whether or not the seek was successful.
Remarks
-------
The offset will never be negative. Whether or not it is relative to the beginning or current position is determined by the "origin" parameter which will be
either drflac_seek_origin_start or drflac_seek_origin_current.
When seeking to a PCM frame using drflac_seek_to_pcm_frame(), dr_flac may call this with an offset beyond the end of the FLAC stream. This needs to be detected
and handled by returning DRFLAC_FALSE.
*/
typedef drflac_bool32 (* drflac_seek_proc)(void* pUserData, int offset, drflac_seek_origin origin);
/*
Callback for when a metadata block is read.
Parameters
----------
pUserData (in)
The user data that was passed to drflac_open() and family.
pMetadata (in)
A pointer to a structure containing the data of the metadata block.
Remarks
-------
Use pMetadata->type to determine which metadata block is being handled and how to read the data. This
will be set to one of the DRFLAC_METADATA_BLOCK_TYPE_* tokens.
*/
typedef void (* drflac_meta_proc)(void* pUserData, drflac_metadata* pMetadata);
typedef struct
{
void* pUserData;
void* (* onMalloc)(size_t sz, void* pUserData);
share/public_html/static/music_worklet_inprogress/decoder/deps/dr_libs/dr_flac.h view on Meta::CPAN
riceParamPart1 = (riceParamPart1 >> 1) ^ t[riceParamPart1 & 0x01];
riceParamPart2 = (riceParamPart2 >> 1) ^ t[riceParamPart2 & 0x01];
riceParamPart3 = (riceParamPart3 >> 1) ^ t[riceParamPart3 & 0x01];
pSamplesOut[0] = riceParamPart0 + drflac__calculate_prediction_32(lpcOrder, lpcShift, coefficients, pSamplesOut + 0);
pSamplesOut[1] = riceParamPart1 + drflac__calculate_prediction_32(lpcOrder, lpcShift, coefficients, pSamplesOut + 1);
pSamplesOut[2] = riceParamPart2 + drflac__calculate_prediction_32(lpcOrder, lpcShift, coefficients, pSamplesOut + 2);
pSamplesOut[3] = riceParamPart3 + drflac__calculate_prediction_32(lpcOrder, lpcShift, coefficients, pSamplesOut + 3);
pSamplesOut += 4;
}
}
i = (count & ~3);
while (i < count) {
/* Rice extraction. */
if (!drflac__read_rice_parts_x1(bs, riceParam, &zeroCountPart0, &riceParamPart0)) {
return DRFLAC_FALSE;
}
/* Rice reconstruction. */
riceParamPart0 &= riceParamMask;
riceParamPart0 |= (zeroCountPart0 << riceParam);
riceParamPart0 = (riceParamPart0 >> 1) ^ t[riceParamPart0 & 0x01];
/*riceParamPart0 = (riceParamPart0 >> 1) ^ (~(riceParamPart0 & 0x01) + 1);*/
/* Sample reconstruction. */
if (drflac__use_64_bit_prediction(bitsPerSample, lpcOrder, lpcPrecision)) {
pSamplesOut[0] = riceParamPart0 + drflac__calculate_prediction_64(lpcOrder, lpcShift, coefficients, pSamplesOut + 0);
} else {
pSamplesOut[0] = riceParamPart0 + drflac__calculate_prediction_32(lpcOrder, lpcShift, coefficients, pSamplesOut + 0);
}
i += 1;
pSamplesOut += 1;
}
return DRFLAC_TRUE;
}
#if defined(DRFLAC_SUPPORT_SSE2)
static DRFLAC_INLINE __m128i drflac__mm_packs_interleaved_epi32(__m128i a, __m128i b)
{
__m128i r;
/* Pack. */
r = _mm_packs_epi32(a, b);
/* a3a2 a1a0 b3b2 b1b0 -> a3a2 b3b2 a1a0 b1b0 */
r = _mm_shuffle_epi32(r, _MM_SHUFFLE(3, 1, 2, 0));
/* a3a2 b3b2 a1a0 b1b0 -> a3b3 a2b2 a1b1 a0b0 */
r = _mm_shufflehi_epi16(r, _MM_SHUFFLE(3, 1, 2, 0));
r = _mm_shufflelo_epi16(r, _MM_SHUFFLE(3, 1, 2, 0));
return r;
}
#endif
#if defined(DRFLAC_SUPPORT_SSE41)
static DRFLAC_INLINE __m128i drflac__mm_not_si128(__m128i a)
{
return _mm_xor_si128(a, _mm_cmpeq_epi32(_mm_setzero_si128(), _mm_setzero_si128()));
}
static DRFLAC_INLINE __m128i drflac__mm_hadd_epi32(__m128i x)
{
__m128i x64 = _mm_add_epi32(x, _mm_shuffle_epi32(x, _MM_SHUFFLE(1, 0, 3, 2)));
__m128i x32 = _mm_shufflelo_epi16(x64, _MM_SHUFFLE(1, 0, 3, 2));
return _mm_add_epi32(x64, x32);
}
static DRFLAC_INLINE __m128i drflac__mm_hadd_epi64(__m128i x)
{
return _mm_add_epi64(x, _mm_shuffle_epi32(x, _MM_SHUFFLE(1, 0, 3, 2)));
}
static DRFLAC_INLINE __m128i drflac__mm_srai_epi64(__m128i x, int count)
{
/*
To simplify this we are assuming count < 32. This restriction allows us to work on a low side and a high side. The low side
is shifted with zero bits, whereas the right side is shifted with sign bits.
*/
__m128i lo = _mm_srli_epi64(x, count);
__m128i hi = _mm_srai_epi32(x, count);
hi = _mm_and_si128(hi, _mm_set_epi32(0xFFFFFFFF, 0, 0xFFFFFFFF, 0)); /* The high part needs to have the low part cleared. */
return _mm_or_si128(lo, hi);
}
static drflac_bool32 drflac__decode_samples_with_residual__rice__sse41_32(drflac_bs* bs, drflac_uint32 count, drflac_uint8 riceParam, drflac_uint32 order, drflac_int32 shift, const drflac_int32* coefficients, drflac_int32* pSamplesOut)
{
int i;
drflac_uint32 riceParamMask;
drflac_int32* pDecodedSamples = pSamplesOut;
drflac_int32* pDecodedSamplesEnd = pSamplesOut + (count & ~3);
drflac_uint32 zeroCountParts0 = 0;
drflac_uint32 zeroCountParts1 = 0;
drflac_uint32 zeroCountParts2 = 0;
drflac_uint32 zeroCountParts3 = 0;
drflac_uint32 riceParamParts0 = 0;
drflac_uint32 riceParamParts1 = 0;
drflac_uint32 riceParamParts2 = 0;
drflac_uint32 riceParamParts3 = 0;
__m128i coefficients128_0;
__m128i coefficients128_4;
__m128i coefficients128_8;
__m128i samples128_0;
__m128i samples128_4;
__m128i samples128_8;
__m128i riceParamMask128;
const drflac_uint32 t[2] = {0x00000000, 0xFFFFFFFF};
riceParamMask = (drflac_uint32)~((~0UL) << riceParam);
riceParamMask128 = _mm_set1_epi32(riceParamMask);
/* Pre-load. */
coefficients128_0 = _mm_setzero_si128();
coefficients128_4 = _mm_setzero_si128();
share/public_html/static/music_worklet_inprogress/decoder/deps/dr_libs/dr_flac.h view on Meta::CPAN
}
runningOrder = 0;
}
/* 8 - 11 */
if (runningOrder == 4) {
coefficients128_8 = _mm_loadu_si128((const __m128i*)(coefficients + 8));
samples128_8 = _mm_loadu_si128((const __m128i*)(pSamplesOut - 12));
runningOrder -= 4;
} else {
switch (runningOrder) {
case 3: coefficients128_8 = _mm_set_epi32(0, coefficients[10], coefficients[9], coefficients[8]); samples128_8 = _mm_set_epi32(pSamplesOut[-9], pSamplesOut[-10], pSamplesOut[-11], 0); break;
case 2: coefficients128_8 = _mm_set_epi32(0, 0, coefficients[9], coefficients[8]); samples128_8 = _mm_set_epi32(pSamplesOut[-9], pSamplesOut[-10], 0, 0); break;
case 1: coefficients128_8 = _mm_set_epi32(0, 0, 0, coefficients[8]); samples128_8 = _mm_set_epi32(pSamplesOut[-9], 0, 0, 0); break;
}
runningOrder = 0;
}
/* Coefficients need to be shuffled for our streaming algorithm below to work. Samples are already in the correct order from the loading routine above. */
coefficients128_0 = _mm_shuffle_epi32(coefficients128_0, _MM_SHUFFLE(0, 1, 2, 3));
coefficients128_4 = _mm_shuffle_epi32(coefficients128_4, _MM_SHUFFLE(0, 1, 2, 3));
coefficients128_8 = _mm_shuffle_epi32(coefficients128_8, _MM_SHUFFLE(0, 1, 2, 3));
}
#else
/* This causes strict-aliasing warnings with GCC. */
switch (order)
{
case 12: ((drflac_int32*)&coefficients128_8)[0] = coefficients[11]; ((drflac_int32*)&samples128_8)[0] = pDecodedSamples[-12];
case 11: ((drflac_int32*)&coefficients128_8)[1] = coefficients[10]; ((drflac_int32*)&samples128_8)[1] = pDecodedSamples[-11];
case 10: ((drflac_int32*)&coefficients128_8)[2] = coefficients[ 9]; ((drflac_int32*)&samples128_8)[2] = pDecodedSamples[-10];
case 9: ((drflac_int32*)&coefficients128_8)[3] = coefficients[ 8]; ((drflac_int32*)&samples128_8)[3] = pDecodedSamples[- 9];
case 8: ((drflac_int32*)&coefficients128_4)[0] = coefficients[ 7]; ((drflac_int32*)&samples128_4)[0] = pDecodedSamples[- 8];
case 7: ((drflac_int32*)&coefficients128_4)[1] = coefficients[ 6]; ((drflac_int32*)&samples128_4)[1] = pDecodedSamples[- 7];
case 6: ((drflac_int32*)&coefficients128_4)[2] = coefficients[ 5]; ((drflac_int32*)&samples128_4)[2] = pDecodedSamples[- 6];
case 5: ((drflac_int32*)&coefficients128_4)[3] = coefficients[ 4]; ((drflac_int32*)&samples128_4)[3] = pDecodedSamples[- 5];
case 4: ((drflac_int32*)&coefficients128_0)[0] = coefficients[ 3]; ((drflac_int32*)&samples128_0)[0] = pDecodedSamples[- 4];
case 3: ((drflac_int32*)&coefficients128_0)[1] = coefficients[ 2]; ((drflac_int32*)&samples128_0)[1] = pDecodedSamples[- 3];
case 2: ((drflac_int32*)&coefficients128_0)[2] = coefficients[ 1]; ((drflac_int32*)&samples128_0)[2] = pDecodedSamples[- 2];
case 1: ((drflac_int32*)&coefficients128_0)[3] = coefficients[ 0]; ((drflac_int32*)&samples128_0)[3] = pDecodedSamples[- 1];
}
#endif
/* For this version we are doing one sample at a time. */
while (pDecodedSamples < pDecodedSamplesEnd) {
__m128i prediction128;
__m128i zeroCountPart128;
__m128i riceParamPart128;
if (!drflac__read_rice_parts_x1(bs, riceParam, &zeroCountParts0, &riceParamParts0) ||
!drflac__read_rice_parts_x1(bs, riceParam, &zeroCountParts1, &riceParamParts1) ||
!drflac__read_rice_parts_x1(bs, riceParam, &zeroCountParts2, &riceParamParts2) ||
!drflac__read_rice_parts_x1(bs, riceParam, &zeroCountParts3, &riceParamParts3)) {
return DRFLAC_FALSE;
}
zeroCountPart128 = _mm_set_epi32(zeroCountParts3, zeroCountParts2, zeroCountParts1, zeroCountParts0);
riceParamPart128 = _mm_set_epi32(riceParamParts3, riceParamParts2, riceParamParts1, riceParamParts0);
riceParamPart128 = _mm_and_si128(riceParamPart128, riceParamMask128);
riceParamPart128 = _mm_or_si128(riceParamPart128, _mm_slli_epi32(zeroCountPart128, riceParam));
riceParamPart128 = _mm_xor_si128(_mm_srli_epi32(riceParamPart128, 1), _mm_add_epi32(drflac__mm_not_si128(_mm_and_si128(riceParamPart128, _mm_set1_epi32(0x01))), _mm_set1_epi32(0x01))); /* <-- SSE2 compatible */
/*riceParamPart128 = _mm_xor_si128(_mm_srli_epi32(riceParamPart128, 1), _mm_mullo_epi32(_mm_and_si128(riceParamPart128, _mm_set1_epi32(0x01)), _mm_set1_epi32(0xFFFFFFFF)));*/ /* <-- Only supported from SSE4.1 and is slower in my testing... ...
if (order <= 4) {
for (i = 0; i < 4; i += 1) {
prediction128 = _mm_mullo_epi32(coefficients128_0, samples128_0);
/* Horizontal add and shift. */
prediction128 = drflac__mm_hadd_epi32(prediction128);
prediction128 = _mm_srai_epi32(prediction128, shift);
prediction128 = _mm_add_epi32(riceParamPart128, prediction128);
samples128_0 = _mm_alignr_epi8(prediction128, samples128_0, 4);
riceParamPart128 = _mm_alignr_epi8(_mm_setzero_si128(), riceParamPart128, 4);
}
} else if (order <= 8) {
for (i = 0; i < 4; i += 1) {
prediction128 = _mm_mullo_epi32(coefficients128_4, samples128_4);
prediction128 = _mm_add_epi32(prediction128, _mm_mullo_epi32(coefficients128_0, samples128_0));
/* Horizontal add and shift. */
prediction128 = drflac__mm_hadd_epi32(prediction128);
prediction128 = _mm_srai_epi32(prediction128, shift);
prediction128 = _mm_add_epi32(riceParamPart128, prediction128);
samples128_4 = _mm_alignr_epi8(samples128_0, samples128_4, 4);
samples128_0 = _mm_alignr_epi8(prediction128, samples128_0, 4);
riceParamPart128 = _mm_alignr_epi8(_mm_setzero_si128(), riceParamPart128, 4);
}
} else {
for (i = 0; i < 4; i += 1) {
prediction128 = _mm_mullo_epi32(coefficients128_8, samples128_8);
prediction128 = _mm_add_epi32(prediction128, _mm_mullo_epi32(coefficients128_4, samples128_4));
prediction128 = _mm_add_epi32(prediction128, _mm_mullo_epi32(coefficients128_0, samples128_0));
/* Horizontal add and shift. */
prediction128 = drflac__mm_hadd_epi32(prediction128);
prediction128 = _mm_srai_epi32(prediction128, shift);
prediction128 = _mm_add_epi32(riceParamPart128, prediction128);
samples128_8 = _mm_alignr_epi8(samples128_4, samples128_8, 4);
samples128_4 = _mm_alignr_epi8(samples128_0, samples128_4, 4);
samples128_0 = _mm_alignr_epi8(prediction128, samples128_0, 4);
riceParamPart128 = _mm_alignr_epi8(_mm_setzero_si128(), riceParamPart128, 4);
}
}
/* We store samples in groups of 4. */
_mm_storeu_si128((__m128i*)pDecodedSamples, samples128_0);
pDecodedSamples += 4;
}
/* Make sure we process the last few samples. */
i = (count & ~3);
while (i < (int)count) {
/* Rice extraction. */
if (!drflac__read_rice_parts_x1(bs, riceParam, &zeroCountParts0, &riceParamParts0)) {
return DRFLAC_FALSE;
}
/* Rice reconstruction. */
share/public_html/static/music_worklet_inprogress/decoder/deps/dr_libs/dr_flac.h view on Meta::CPAN
case 2: coefficients128_4 = _mm_set_epi32(0, 0, coefficients[5], coefficients[4]); samples128_4 = _mm_set_epi32(pSamplesOut[-5], pSamplesOut[-6], 0, 0); break;
case 1: coefficients128_4 = _mm_set_epi32(0, 0, 0, coefficients[4]); samples128_4 = _mm_set_epi32(pSamplesOut[-5], 0, 0, 0); break;
}
runningOrder = 0;
}
/* 8 - 11 */
if (runningOrder == 4) {
coefficients128_8 = _mm_loadu_si128((const __m128i*)(coefficients + 8));
samples128_8 = _mm_loadu_si128((const __m128i*)(pSamplesOut - 12));
runningOrder -= 4;
} else {
switch (runningOrder) {
case 3: coefficients128_8 = _mm_set_epi32(0, coefficients[10], coefficients[9], coefficients[8]); samples128_8 = _mm_set_epi32(pSamplesOut[-9], pSamplesOut[-10], pSamplesOut[-11], 0); break;
case 2: coefficients128_8 = _mm_set_epi32(0, 0, coefficients[9], coefficients[8]); samples128_8 = _mm_set_epi32(pSamplesOut[-9], pSamplesOut[-10], 0, 0); break;
case 1: coefficients128_8 = _mm_set_epi32(0, 0, 0, coefficients[8]); samples128_8 = _mm_set_epi32(pSamplesOut[-9], 0, 0, 0); break;
}
runningOrder = 0;
}
/* Coefficients need to be shuffled for our streaming algorithm below to work. Samples are already in the correct order from the loading routine above. */
coefficients128_0 = _mm_shuffle_epi32(coefficients128_0, _MM_SHUFFLE(0, 1, 2, 3));
coefficients128_4 = _mm_shuffle_epi32(coefficients128_4, _MM_SHUFFLE(0, 1, 2, 3));
coefficients128_8 = _mm_shuffle_epi32(coefficients128_8, _MM_SHUFFLE(0, 1, 2, 3));
}
#else
switch (order)
{
case 12: ((drflac_int32*)&coefficients128_8)[0] = coefficients[11]; ((drflac_int32*)&samples128_8)[0] = pDecodedSamples[-12];
case 11: ((drflac_int32*)&coefficients128_8)[1] = coefficients[10]; ((drflac_int32*)&samples128_8)[1] = pDecodedSamples[-11];
case 10: ((drflac_int32*)&coefficients128_8)[2] = coefficients[ 9]; ((drflac_int32*)&samples128_8)[2] = pDecodedSamples[-10];
case 9: ((drflac_int32*)&coefficients128_8)[3] = coefficients[ 8]; ((drflac_int32*)&samples128_8)[3] = pDecodedSamples[- 9];
case 8: ((drflac_int32*)&coefficients128_4)[0] = coefficients[ 7]; ((drflac_int32*)&samples128_4)[0] = pDecodedSamples[- 8];
case 7: ((drflac_int32*)&coefficients128_4)[1] = coefficients[ 6]; ((drflac_int32*)&samples128_4)[1] = pDecodedSamples[- 7];
case 6: ((drflac_int32*)&coefficients128_4)[2] = coefficients[ 5]; ((drflac_int32*)&samples128_4)[2] = pDecodedSamples[- 6];
case 5: ((drflac_int32*)&coefficients128_4)[3] = coefficients[ 4]; ((drflac_int32*)&samples128_4)[3] = pDecodedSamples[- 5];
case 4: ((drflac_int32*)&coefficients128_0)[0] = coefficients[ 3]; ((drflac_int32*)&samples128_0)[0] = pDecodedSamples[- 4];
case 3: ((drflac_int32*)&coefficients128_0)[1] = coefficients[ 2]; ((drflac_int32*)&samples128_0)[1] = pDecodedSamples[- 3];
case 2: ((drflac_int32*)&coefficients128_0)[2] = coefficients[ 1]; ((drflac_int32*)&samples128_0)[2] = pDecodedSamples[- 2];
case 1: ((drflac_int32*)&coefficients128_0)[3] = coefficients[ 0]; ((drflac_int32*)&samples128_0)[3] = pDecodedSamples[- 1];
}
#endif
/* For this version we are doing one sample at a time. */
while (pDecodedSamples < pDecodedSamplesEnd) {
__m128i zeroCountPart128;
__m128i riceParamPart128;
if (!drflac__read_rice_parts_x1(bs, riceParam, &zeroCountParts0, &riceParamParts0) ||
!drflac__read_rice_parts_x1(bs, riceParam, &zeroCountParts1, &riceParamParts1) ||
!drflac__read_rice_parts_x1(bs, riceParam, &zeroCountParts2, &riceParamParts2) ||
!drflac__read_rice_parts_x1(bs, riceParam, &zeroCountParts3, &riceParamParts3)) {
return DRFLAC_FALSE;
}
zeroCountPart128 = _mm_set_epi32(zeroCountParts3, zeroCountParts2, zeroCountParts1, zeroCountParts0);
riceParamPart128 = _mm_set_epi32(riceParamParts3, riceParamParts2, riceParamParts1, riceParamParts0);
riceParamPart128 = _mm_and_si128(riceParamPart128, riceParamMask128);
riceParamPart128 = _mm_or_si128(riceParamPart128, _mm_slli_epi32(zeroCountPart128, riceParam));
riceParamPart128 = _mm_xor_si128(_mm_srli_epi32(riceParamPart128, 1), _mm_add_epi32(drflac__mm_not_si128(_mm_and_si128(riceParamPart128, _mm_set1_epi32(1))), _mm_set1_epi32(1)));
for (i = 0; i < 4; i += 1) {
prediction128 = _mm_xor_si128(prediction128, prediction128); /* Reset to 0. */
switch (order)
{
case 12:
case 11: prediction128 = _mm_add_epi64(prediction128, _mm_mul_epi32(_mm_shuffle_epi32(coefficients128_8, _MM_SHUFFLE(1, 1, 0, 0)), _mm_shuffle_epi32(samples128_8, _MM_SHUFFLE(1, 1, 0, 0))));
case 10:
case 9: prediction128 = _mm_add_epi64(prediction128, _mm_mul_epi32(_mm_shuffle_epi32(coefficients128_8, _MM_SHUFFLE(3, 3, 2, 2)), _mm_shuffle_epi32(samples128_8, _MM_SHUFFLE(3, 3, 2, 2))));
case 8:
case 7: prediction128 = _mm_add_epi64(prediction128, _mm_mul_epi32(_mm_shuffle_epi32(coefficients128_4, _MM_SHUFFLE(1, 1, 0, 0)), _mm_shuffle_epi32(samples128_4, _MM_SHUFFLE(1, 1, 0, 0))));
case 6:
case 5: prediction128 = _mm_add_epi64(prediction128, _mm_mul_epi32(_mm_shuffle_epi32(coefficients128_4, _MM_SHUFFLE(3, 3, 2, 2)), _mm_shuffle_epi32(samples128_4, _MM_SHUFFLE(3, 3, 2, 2))));
case 4:
case 3: prediction128 = _mm_add_epi64(prediction128, _mm_mul_epi32(_mm_shuffle_epi32(coefficients128_0, _MM_SHUFFLE(1, 1, 0, 0)), _mm_shuffle_epi32(samples128_0, _MM_SHUFFLE(1, 1, 0, 0))));
case 2:
case 1: prediction128 = _mm_add_epi64(prediction128, _mm_mul_epi32(_mm_shuffle_epi32(coefficients128_0, _MM_SHUFFLE(3, 3, 2, 2)), _mm_shuffle_epi32(samples128_0, _MM_SHUFFLE(3, 3, 2, 2))));
}
/* Horizontal add and shift. */
prediction128 = drflac__mm_hadd_epi64(prediction128);
prediction128 = drflac__mm_srai_epi64(prediction128, shift);
prediction128 = _mm_add_epi32(riceParamPart128, prediction128);
/* Our value should be sitting in prediction128[0]. We need to combine this with our SSE samples. */
samples128_8 = _mm_alignr_epi8(samples128_4, samples128_8, 4);
samples128_4 = _mm_alignr_epi8(samples128_0, samples128_4, 4);
samples128_0 = _mm_alignr_epi8(prediction128, samples128_0, 4);
/* Slide our rice parameter down so that the value in position 0 contains the next one to process. */
riceParamPart128 = _mm_alignr_epi8(_mm_setzero_si128(), riceParamPart128, 4);
}
/* We store samples in groups of 4. */
_mm_storeu_si128((__m128i*)pDecodedSamples, samples128_0);
pDecodedSamples += 4;
}
/* Make sure we process the last few samples. */
i = (count & ~3);
while (i < (int)count) {
/* Rice extraction. */
if (!drflac__read_rice_parts_x1(bs, riceParam, &zeroCountParts0, &riceParamParts0)) {
return DRFLAC_FALSE;
}
/* Rice reconstruction. */
riceParamParts0 &= riceParamMask;
riceParamParts0 |= (zeroCountParts0 << riceParam);
riceParamParts0 = (riceParamParts0 >> 1) ^ t[riceParamParts0 & 0x01];
/* Sample reconstruction. */
pDecodedSamples[0] = riceParamParts0 + drflac__calculate_prediction_64(order, shift, coefficients, pDecodedSamples);
i += 1;
pDecodedSamples += 1;
}
return DRFLAC_TRUE;
( run in 2.572 seconds using v1.01-cache-2.11-cpan-39bf76dae61 )