Commit a7de0aa5 authored by Frank Bösing's avatar Frank Bösing

Fast clip + abs

parent 8150e9ee
...@@ -6,6 +6,22 @@ ...@@ -6,6 +6,22 @@
* Updated on: 27.11.2021 * Updated on: 27.11.2021
*/ */
#include "mp3_decoder.h" #include "mp3_decoder.h"
/* clip to range [-2^n, 2^n - 1] */
#if 0 //Fast on ARM:
#define CLIP_2N(y, n) { \
int sign = (y) >> 31; \
if (sign != (y) >> (n)) { \
(y) = sign ^ ((1 << (n)) - 1); \
} \
}
#else //on xtensa this is faster, due to asm min/max instructions:
#define CLIP_2N(y, n) { \
int x = 1 << n; \
if (y < -x) y = -x; \
x--; \
if (y > x) y = x; \
}
#endif
const uint8_t m_SYNCWORDH =0xff; const uint8_t m_SYNCWORDH =0xff;
const uint8_t m_SYNCWORDL =0xf0; const uint8_t m_SYNCWORDL =0xf0;
...@@ -2849,100 +2865,38 @@ void WinPrevious(int *xPrev, int *xPrevWin, int btPrev){ ...@@ -2849,100 +2865,38 @@ void WinPrevious(int *xPrev, int *xPrevWin, int btPrev){
* Return: updated mOut (from new outputs y) * Return: updated mOut (from new outputs y)
**********************************************************************************************************************/ **********************************************************************************************************************/
int FreqInvertRescale(int *y, int *xPrev, int blockIdx, int es){ int FreqInvertRescale(int *y, int *xPrev, int blockIdx, int es) {
int i, d, mOut;
int y0, y1, y2, y3, y4, y5, y6, y7, y8; if (es == 0) {
/* fast case - frequency invert only (no rescaling) */
if (es == 0) { if (blockIdx & 0x01) {
/* fast case - frequency invert only (no rescaling) - can fuse into overlap-add for speed, if desired */ y += m_NBANDS;
if (blockIdx & 0x01) { for (int i = 0; i < 9; i++) {
y += m_NBANDS; *y = - *y; y += 2 * m_NBANDS;
y0 = *y; }
y += 2 * m_NBANDS; }
y1 = *y; return 0;
y += 2 * m_NBANDS; }
y2 = *y;
y += 2 * m_NBANDS; int d, mOut;
y3 = *y; /* undo pre-IMDCT scaling, clipping if necessary */
y += 2 * m_NBANDS; mOut = 0;
y4 = *y; if (blockIdx & 0x01) {
y += 2 * m_NBANDS; /* frequency invert */
y5 = *y; for (int i = 0; i < 9; i++) {
y += 2 * m_NBANDS; d = *y; CLIP_2N(d, 31 - es); *y = d << es; mOut |= FASTABS(*y); y += m_NBANDS;
y6 = *y; d = -*y; CLIP_2N(d, 31 - es); *y = d << es; mOut |= FASTABS(*y); y += m_NBANDS;
y += 2 * m_NBANDS; d = *xPrev; CLIP_2N(d, 31 - es); *xPrev++ = d << es;
y7 = *y;
y += 2 * m_NBANDS;
y8 = *y;
y += 2 * m_NBANDS;
y -= 18 * m_NBANDS;
*y = -y0;
y += 2 * m_NBANDS;
*y = -y1;
y += 2 * m_NBANDS;
*y = -y2;
y += 2 * m_NBANDS;
*y = -y3;
y += 2 * m_NBANDS;
*y = -y4;
y += 2 * m_NBANDS;
*y = -y5;
y += 2 * m_NBANDS;
*y = -y6;
y += 2 * m_NBANDS;
*y = -y7;
y += 2 * m_NBANDS;
*y = -y8;
y += 2 * m_NBANDS;
} }
return 0;
} else { } else {
/* undo pre-IMDCT scaling, clipping if necessary */ for (int i = 0; i < 9; i++) {
mOut = 0; d = *y; CLIP_2N(d, 31 - es); *y = d << es; mOut |= FASTABS(*y); y += m_NBANDS;
int sign=0; d = *y; CLIP_2N(d, 31 - es); *y = d << es; mOut |= FASTABS(*y); y += m_NBANDS;
if (blockIdx & 0x01) { d = *xPrev; CLIP_2N(d, 31 - es); *xPrev++ = d << es;
/* frequency invert */
for (i = 0; i < 18; i += 2) {
d = *y;
sign = (d) >> 31;
if (sign != (d) >> (31 - es)){(d) = sign ^ ((1 << (31 - es)) - 1);}
*y = d << es;
mOut |= FASTABS(*y);
y += m_NBANDS;
d = -*y;
sign = (d) >> 31;
if (sign != (d) >> (31 - es)){(d) = sign ^ ((1 << (31 - es)) - 1);}
*y = d << es;
mOut |= FASTABS(*y);
y += m_NBANDS;
d = *xPrev;
sign = (d) >> 31;
if (sign != (d) >> (31 - es)){(d) = sign ^ ((1 << (31 - es)) - 1);}
*xPrev++ = d << es;
}
} else {
for (i = 0; i < 18; i += 2) {
d = *y;
sign = (d) >> 31;
if (sign != (d) >> (31 - es)){(d) = sign ^ ((1 << (31 - es)) - 1);}
*y = d << es;
mOut |= FASTABS(*y);
y += m_NBANDS;
d = *y;
sign = (d) >> 31;
if (sign != (d) >> (31 - es)){(d) = sign ^ ((1 << (31 - es)) - 1);}
*y = d << es;
mOut |= FASTABS(*y);
y += m_NBANDS;
d = *xPrev;
sign = (d) >> 31;
if (sign != (d) >> (31 - es)){(d) = sign ^ ((1 << (31 - es)) - 1);}
*xPrev++ = d << es;
}
} }
return mOut;
} }
return mOut;
} }
......
// based om helix mp3 decoder // based om helix mp3 decoder
#pragma once #pragma once
#pragma GCC optimize ("O3")
#include "Arduino.h" #include "Arduino.h"
#include "assert.h" #include "assert.h"
...@@ -172,7 +171,7 @@ typedef struct ScaleFactorJS { /* used in MPEG 2, 2.5 intensity (joint) stereo o ...@@ -172,7 +171,7 @@ typedef struct ScaleFactorJS { /* used in MPEG 2, 2.5 intensity (joint) stereo o
/* NOTE - could get by with smaller vbuf if memory is more important than speed /* NOTE - could get by with smaller vbuf if memory is more important than speed
* (in Subband, instead of replicating each block in FDCT32 you would do a memmove on the * (in Subband, instead of replicating each block in FDCT32 you would do a memmove on the
* last 15 blocks to shift them down one, a hardware style FIFO) * last 15 blocks to shift them down one, a hardware style FIFO)
*/ */
typedef struct SubbandInfo { typedef struct SubbandInfo {
int vbuf[m_MAX_NCHAN * m_VBUF_LENGTH]; /* vbuf for fast DCT-based synthesis PQMF - double size for speed (no modulo indexing) */ int vbuf[m_MAX_NCHAN * m_VBUF_LENGTH]; /* vbuf for fast DCT-based synthesis PQMF - double size for speed (no modulo indexing) */
int vindex; /* internal index for tracking position in vbuf */ int vindex; /* internal index for tracking position in vbuf */
...@@ -509,9 +508,6 @@ int HybridTransform(int *xCurr, int *xPrev, int y[m_BLOCK_SIZE][m_NBANDS], SideI ...@@ -509,9 +508,6 @@ int HybridTransform(int *xCurr, int *xPrev, int y[m_BLOCK_SIZE][m_NBANDS], SideI
inline uint64_t SAR64(uint64_t x, int n) {return x >> n;} inline uint64_t SAR64(uint64_t x, int n) {return x >> n;}
inline int MULSHIFT32(int x, int y) { int z; z = (uint64_t) x * (uint64_t) y >> 32; return z;} inline int MULSHIFT32(int x, int y) { int z; z = (uint64_t) x * (uint64_t) y >> 32; return z;}
inline uint64_t MADD64(uint64_t sum64, int x, int y) {sum64 += (uint64_t) x * (uint64_t) y; return sum64;}/* returns 64-bit value in [edx:eax] */ inline uint64_t MADD64(uint64_t sum64, int x, int y) {sum64 += (uint64_t) x * (uint64_t) y; return sum64;}/* returns 64-bit value in [edx:eax] */
//inline int CLZ(int x){int numZeros; if (!x) return(sizeof(int) * 8); numZeros = 0; while (!(x & 0x80000000)){numZeros++; x <<= 1;} return numZeros;}
#define CLZ(x) __builtin_clz(x)
inline uint64_t xSAR64(uint64_t x, int n){return x >> n;} inline uint64_t xSAR64(uint64_t x, int n){return x >> n;}
inline int FASTABS(int x){ int sign; sign=x>>(sizeof(int)*8-1); x^=sign; x-=sign; return x;} inline int FASTABS(int x){ return __builtin_abs(x);} //xtensa has a fast abs instruction
#define CLZ(x) __builtin_clz(x)
Markdown is supported
0%
or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment