mirror of
https://github.com/Swiftgram/Telegram-iOS.git
synced 2025-06-16 05:55:20 +00:00
400 lines
15 KiB
C
400 lines
15 KiB
C
#import "DCTCommon.h"
|
|
|
|
#if !defined(__aarch64__)
|
|
|
|
#include <string.h>
|
|
#include <stdlib.h>
|
|
|
|
typedef long JLONG;
|
|
|
|
#define CONST_BITS 8
|
|
#define PASS1_BITS 2
|
|
|
|
#define DCTSIZE 8 /* The basic DCT block is 8x8 samples */
|
|
#define DCTSIZE2 64 /* DCTSIZE squared; # of elements in a block */
|
|
|
|
#define FIX_0_382683433 ((JLONG)98) /* FIX(0.382683433) */
|
|
#define FIX_0_541196100 ((JLONG)139) /* FIX(0.541196100) */
|
|
#define FIX_0_707106781 ((JLONG)181) /* FIX(0.707106781) */
|
|
#define FIX_1_306562965 ((JLONG)334) /* FIX(1.306562965) */
|
|
|
|
#define FIX_1_082392200 ((JLONG)277) /* FIX(1.082392200) */
|
|
#define FIX_1_414213562 ((JLONG)362) /* FIX(1.414213562) */
|
|
#define FIX_1_847759065 ((JLONG)473) /* FIX(1.847759065) */
|
|
#define FIX_2_613125930 ((JLONG)669) /* FIX(2.613125930) */
|
|
|
|
#define RIGHT_SHIFT(x, shft) ((x) >> (shft))
|
|
#define IRIGHT_SHIFT(x, shft) ((x) >> (shft))
|
|
#define DESCALE(x, n) RIGHT_SHIFT(x, n)
|
|
#define IDESCALE(x, n) ((int)IRIGHT_SHIFT(x, n))
|
|
|
|
#define MULTIPLY(var, const) ((DCTELEM)DESCALE((var) * (const), CONST_BITS))
|
|
|
|
#define MULTIPLIER short /* prefer 16-bit with SIMD for parellelism */
|
|
typedef MULTIPLIER IFAST_MULT_TYPE; /* 16 bits is OK, use short if faster */
|
|
|
|
#define DEQUANTIZE(coef, quantval) (((IFAST_MULT_TYPE)(coef)) * (quantval))
|
|
|
|
#define RANGE_MASK (MAXJSAMPLE * 4 + 3) /* 2 bits wider than legal samples */
|
|
|
|
#define MAXJSAMPLE 255
|
|
#define CENTERJSAMPLE 128
|
|
|
|
typedef JSAMPROW *JSAMPARRAY; /* ptr to some rows (a 2-D sample array) */
|
|
typedef JSAMPARRAY *JSAMPIMAGE; /* a 3-D sample array: top index is color */
|
|
|
|
#define IDCT_range_limit(cinfo) ((cinfo)->sample_range_limit + CENTERJSAMPLE)
|
|
|
|
void dct_jpeg_fdct_ifast(DCTELEM *data)
|
|
{
|
|
DCTELEM tmp0, tmp1, tmp2, tmp3, tmp4, tmp5, tmp6, tmp7;
|
|
DCTELEM tmp10, tmp11, tmp12, tmp13;
|
|
DCTELEM z1, z2, z3, z4, z5, z11, z13;
|
|
DCTELEM *dataptr;
|
|
int ctr;
|
|
|
|
/* Pass 1: process rows. */
|
|
|
|
dataptr = data;
|
|
for (ctr = DCTSIZE - 1; ctr >= 0; ctr--) {
|
|
tmp0 = dataptr[0] + dataptr[7];
|
|
tmp7 = dataptr[0] - dataptr[7];
|
|
tmp1 = dataptr[1] + dataptr[6];
|
|
tmp6 = dataptr[1] - dataptr[6];
|
|
tmp2 = dataptr[2] + dataptr[5];
|
|
tmp5 = dataptr[2] - dataptr[5];
|
|
tmp3 = dataptr[3] + dataptr[4];
|
|
tmp4 = dataptr[3] - dataptr[4];
|
|
|
|
/* Even part */
|
|
|
|
tmp10 = tmp0 + tmp3; /* phase 2 */
|
|
tmp13 = tmp0 - tmp3;
|
|
tmp11 = tmp1 + tmp2;
|
|
tmp12 = tmp1 - tmp2;
|
|
|
|
dataptr[0] = tmp10 + tmp11; /* phase 3 */
|
|
dataptr[4] = tmp10 - tmp11;
|
|
|
|
z1 = MULTIPLY(tmp12 + tmp13, FIX_0_707106781); /* c4 */
|
|
dataptr[2] = tmp13 + z1; /* phase 5 */
|
|
dataptr[6] = tmp13 - z1;
|
|
|
|
/* Odd part */
|
|
|
|
tmp10 = tmp4 + tmp5; /* phase 2 */
|
|
tmp11 = tmp5 + tmp6;
|
|
tmp12 = tmp6 + tmp7;
|
|
|
|
/* The rotator is modified from fig 4-8 to avoid extra negations. */
|
|
z5 = MULTIPLY(tmp10 - tmp12, FIX_0_382683433); /* c6 */
|
|
z2 = MULTIPLY(tmp10, FIX_0_541196100) + z5; /* c2-c6 */
|
|
z4 = MULTIPLY(tmp12, FIX_1_306562965) + z5; /* c2+c6 */
|
|
z3 = MULTIPLY(tmp11, FIX_0_707106781); /* c4 */
|
|
|
|
z11 = tmp7 + z3; /* phase 5 */
|
|
z13 = tmp7 - z3;
|
|
|
|
dataptr[5] = z13 + z2; /* phase 6 */
|
|
dataptr[3] = z13 - z2;
|
|
dataptr[1] = z11 + z4;
|
|
dataptr[7] = z11 - z4;
|
|
|
|
dataptr += DCTSIZE; /* advance pointer to next row */
|
|
}
|
|
|
|
/* Pass 2: process columns. */
|
|
|
|
dataptr = data;
|
|
for (ctr = DCTSIZE - 1; ctr >= 0; ctr--) {
|
|
tmp0 = dataptr[DCTSIZE * 0] + dataptr[DCTSIZE * 7];
|
|
tmp7 = dataptr[DCTSIZE * 0] - dataptr[DCTSIZE * 7];
|
|
tmp1 = dataptr[DCTSIZE * 1] + dataptr[DCTSIZE * 6];
|
|
tmp6 = dataptr[DCTSIZE * 1] - dataptr[DCTSIZE * 6];
|
|
tmp2 = dataptr[DCTSIZE * 2] + dataptr[DCTSIZE * 5];
|
|
tmp5 = dataptr[DCTSIZE * 2] - dataptr[DCTSIZE * 5];
|
|
tmp3 = dataptr[DCTSIZE * 3] + dataptr[DCTSIZE * 4];
|
|
tmp4 = dataptr[DCTSIZE * 3] - dataptr[DCTSIZE * 4];
|
|
|
|
/* Even part */
|
|
|
|
tmp10 = tmp0 + tmp3; /* phase 2 */
|
|
tmp13 = tmp0 - tmp3;
|
|
tmp11 = tmp1 + tmp2;
|
|
tmp12 = tmp1 - tmp2;
|
|
|
|
dataptr[DCTSIZE * 0] = tmp10 + tmp11; /* phase 3 */
|
|
dataptr[DCTSIZE * 4] = tmp10 - tmp11;
|
|
|
|
z1 = MULTIPLY(tmp12 + tmp13, FIX_0_707106781); /* c4 */
|
|
dataptr[DCTSIZE * 2] = tmp13 + z1; /* phase 5 */
|
|
dataptr[DCTSIZE * 6] = tmp13 - z1;
|
|
|
|
/* Odd part */
|
|
|
|
tmp10 = tmp4 + tmp5; /* phase 2 */
|
|
tmp11 = tmp5 + tmp6;
|
|
tmp12 = tmp6 + tmp7;
|
|
|
|
/* The rotator is modified from fig 4-8 to avoid extra negations. */
|
|
z5 = MULTIPLY(tmp10 - tmp12, FIX_0_382683433); /* c6 */
|
|
z2 = MULTIPLY(tmp10, FIX_0_541196100) + z5; /* c2-c6 */
|
|
z4 = MULTIPLY(tmp12, FIX_1_306562965) + z5; /* c2+c6 */
|
|
z3 = MULTIPLY(tmp11, FIX_0_707106781); /* c4 */
|
|
|
|
z11 = tmp7 + z3; /* phase 5 */
|
|
z13 = tmp7 - z3;
|
|
|
|
dataptr[DCTSIZE * 5] = z13 + z2; /* phase 6 */
|
|
dataptr[DCTSIZE * 3] = z13 - z2;
|
|
dataptr[DCTSIZE * 1] = z11 + z4;
|
|
dataptr[DCTSIZE * 7] = z11 - z4;
|
|
|
|
dataptr++; /* advance pointer to next column */
|
|
}
|
|
}
|
|
|
|
struct DctAuxiliaryData {
|
|
JSAMPLE *allocated_sample_range_limit;
|
|
JSAMPLE *sample_range_limit;
|
|
};
|
|
|
|
static void prepare_range_limit_table(struct DctAuxiliaryData *data)
|
|
/* Allocate and fill in the sample_range_limit table */
|
|
{
|
|
JSAMPLE *table;
|
|
int i;
|
|
|
|
table = (JSAMPLE *)malloc((5 * (MAXJSAMPLE + 1) + CENTERJSAMPLE) * sizeof(JSAMPLE));
|
|
data->allocated_sample_range_limit = table;
|
|
table += (MAXJSAMPLE + 1); /* allow negative subscripts of simple table */
|
|
data->sample_range_limit = table;
|
|
/* First segment of "simple" table: limit[x] = 0 for x < 0 */
|
|
memset(table - (MAXJSAMPLE + 1), 0, (MAXJSAMPLE + 1) * sizeof(JSAMPLE));
|
|
/* Main part of "simple" table: limit[x] = x */
|
|
for (i = 0; i <= MAXJSAMPLE; i++)
|
|
table[i] = (JSAMPLE)i;
|
|
table += CENTERJSAMPLE; /* Point to where post-IDCT table starts */
|
|
/* End of simple table, rest of first half of post-IDCT table */
|
|
for (i = CENTERJSAMPLE; i < 2 * (MAXJSAMPLE + 1); i++)
|
|
table[i] = MAXJSAMPLE;
|
|
/* Second half of post-IDCT table */
|
|
memset(table + (2 * (MAXJSAMPLE + 1)), 0,
|
|
(2 * (MAXJSAMPLE + 1) - CENTERJSAMPLE) * sizeof(JSAMPLE));
|
|
memcpy(table + (4 * (MAXJSAMPLE + 1) - CENTERJSAMPLE),
|
|
data->sample_range_limit, CENTERJSAMPLE * sizeof(JSAMPLE));
|
|
}
|
|
|
|
struct DctAuxiliaryData *createDctAuxiliaryData() {
|
|
struct DctAuxiliaryData *result = malloc(sizeof(struct DctAuxiliaryData));
|
|
memset(result, 0, sizeof(struct DctAuxiliaryData));
|
|
|
|
prepare_range_limit_table(result);
|
|
|
|
return result;
|
|
}
|
|
|
|
void freeDctAuxiliaryData(struct DctAuxiliaryData *data) {
|
|
if (data) {
|
|
free(data->allocated_sample_range_limit);
|
|
free(data);
|
|
}
|
|
}
|
|
|
|
void dct_jpeg_idct_ifast(struct DctAuxiliaryData *auxiliaryData, void *dct_table, JCOEFPTR coef_block, JSAMPROW output_buf) {
|
|
DCTELEM tmp0, tmp1, tmp2, tmp3, tmp4, tmp5, tmp6, tmp7;
|
|
DCTELEM tmp10, tmp11, tmp12, tmp13;
|
|
DCTELEM z5, z10, z11, z12, z13;
|
|
JCOEFPTR inptr;
|
|
IFAST_MULT_TYPE *quantptr;
|
|
int *wsptr;
|
|
JSAMPROW outptr;
|
|
JSAMPLE *range_limit = IDCT_range_limit(auxiliaryData);
|
|
int ctr;
|
|
int workspace[DCTSIZE2]; /* buffers data between passes */
|
|
|
|
/* Pass 1: process columns from input, store into work array. */
|
|
|
|
inptr = coef_block;
|
|
quantptr = dct_table;
|
|
wsptr = workspace;
|
|
for (ctr = DCTSIZE; ctr > 0; ctr--) {
|
|
/* Due to quantization, we will usually find that many of the input
|
|
* coefficients are zero, especially the AC terms. We can exploit this
|
|
* by short-circuiting the IDCT calculation for any column in which all
|
|
* the AC terms are zero. In that case each output is equal to the
|
|
* DC coefficient (with scale factor as needed).
|
|
* With typical images and quantization tables, half or more of the
|
|
* column DCT calculations can be simplified this way.
|
|
*/
|
|
|
|
if (inptr[DCTSIZE * 1] == 0 && inptr[DCTSIZE * 2] == 0 &&
|
|
inptr[DCTSIZE * 3] == 0 && inptr[DCTSIZE * 4] == 0 &&
|
|
inptr[DCTSIZE * 5] == 0 && inptr[DCTSIZE * 6] == 0 &&
|
|
inptr[DCTSIZE * 7] == 0) {
|
|
/* AC terms all zero */
|
|
int dcval = (int)DEQUANTIZE(inptr[DCTSIZE * 0], quantptr[DCTSIZE * 0]);
|
|
|
|
wsptr[DCTSIZE * 0] = dcval;
|
|
wsptr[DCTSIZE * 1] = dcval;
|
|
wsptr[DCTSIZE * 2] = dcval;
|
|
wsptr[DCTSIZE * 3] = dcval;
|
|
wsptr[DCTSIZE * 4] = dcval;
|
|
wsptr[DCTSIZE * 5] = dcval;
|
|
wsptr[DCTSIZE * 6] = dcval;
|
|
wsptr[DCTSIZE * 7] = dcval;
|
|
|
|
inptr++; /* advance pointers to next column */
|
|
quantptr++;
|
|
wsptr++;
|
|
continue;
|
|
}
|
|
|
|
/* Even part */
|
|
|
|
tmp0 = DEQUANTIZE(inptr[DCTSIZE * 0], quantptr[DCTSIZE * 0]);
|
|
tmp1 = DEQUANTIZE(inptr[DCTSIZE * 2], quantptr[DCTSIZE * 2]);
|
|
tmp2 = DEQUANTIZE(inptr[DCTSIZE * 4], quantptr[DCTSIZE * 4]);
|
|
tmp3 = DEQUANTIZE(inptr[DCTSIZE * 6], quantptr[DCTSIZE * 6]);
|
|
|
|
tmp10 = tmp0 + tmp2; /* phase 3 */
|
|
tmp11 = tmp0 - tmp2;
|
|
|
|
tmp13 = tmp1 + tmp3; /* phases 5-3 */
|
|
tmp12 = MULTIPLY(tmp1 - tmp3, FIX_1_414213562) - tmp13; /* 2*c4 */
|
|
|
|
tmp0 = tmp10 + tmp13; /* phase 2 */
|
|
tmp3 = tmp10 - tmp13;
|
|
tmp1 = tmp11 + tmp12;
|
|
tmp2 = tmp11 - tmp12;
|
|
|
|
/* Odd part */
|
|
|
|
tmp4 = DEQUANTIZE(inptr[DCTSIZE * 1], quantptr[DCTSIZE * 1]);
|
|
tmp5 = DEQUANTIZE(inptr[DCTSIZE * 3], quantptr[DCTSIZE * 3]);
|
|
tmp6 = DEQUANTIZE(inptr[DCTSIZE * 5], quantptr[DCTSIZE * 5]);
|
|
tmp7 = DEQUANTIZE(inptr[DCTSIZE * 7], quantptr[DCTSIZE * 7]);
|
|
|
|
z13 = tmp6 + tmp5; /* phase 6 */
|
|
z10 = tmp6 - tmp5;
|
|
z11 = tmp4 + tmp7;
|
|
z12 = tmp4 - tmp7;
|
|
|
|
tmp7 = z11 + z13; /* phase 5 */
|
|
tmp11 = MULTIPLY(z11 - z13, FIX_1_414213562); /* 2*c4 */
|
|
|
|
z5 = MULTIPLY(z10 + z12, FIX_1_847759065); /* 2*c2 */
|
|
tmp10 = MULTIPLY(z12, FIX_1_082392200) - z5; /* 2*(c2-c6) */
|
|
tmp12 = MULTIPLY(z10, -FIX_2_613125930) + z5; /* -2*(c2+c6) */
|
|
|
|
tmp6 = tmp12 - tmp7; /* phase 2 */
|
|
tmp5 = tmp11 - tmp6;
|
|
tmp4 = tmp10 + tmp5;
|
|
|
|
wsptr[DCTSIZE * 0] = (int)(tmp0 + tmp7);
|
|
wsptr[DCTSIZE * 7] = (int)(tmp0 - tmp7);
|
|
wsptr[DCTSIZE * 1] = (int)(tmp1 + tmp6);
|
|
wsptr[DCTSIZE * 6] = (int)(tmp1 - tmp6);
|
|
wsptr[DCTSIZE * 2] = (int)(tmp2 + tmp5);
|
|
wsptr[DCTSIZE * 5] = (int)(tmp2 - tmp5);
|
|
wsptr[DCTSIZE * 4] = (int)(tmp3 + tmp4);
|
|
wsptr[DCTSIZE * 3] = (int)(tmp3 - tmp4);
|
|
|
|
inptr++; /* advance pointers to next column */
|
|
quantptr++;
|
|
wsptr++;
|
|
}
|
|
|
|
/* Pass 2: process rows from work array, store into output array. */
|
|
/* Note that we must descale the results by a factor of 8 == 2**3, */
|
|
/* and also undo the PASS1_BITS scaling. */
|
|
|
|
wsptr = workspace;
|
|
for (ctr = 0; ctr < DCTSIZE; ctr++) {
|
|
outptr = output_buf + ctr * DCTSIZE;
|
|
/* Rows of zeroes can be exploited in the same way as we did with columns.
|
|
* However, the column calculation has created many nonzero AC terms, so
|
|
* the simplification applies less often (typically 5% to 10% of the time).
|
|
* On machines with very fast multiplication, it's possible that the
|
|
* test takes more time than it's worth. In that case this section
|
|
* may be commented out.
|
|
*/
|
|
|
|
#ifndef NO_ZERO_ROW_TEST
|
|
if (wsptr[1] == 0 && wsptr[2] == 0 && wsptr[3] == 0 && wsptr[4] == 0 &&
|
|
wsptr[5] == 0 && wsptr[6] == 0 && wsptr[7] == 0) {
|
|
/* AC terms all zero */
|
|
JSAMPLE dcval =
|
|
range_limit[IDESCALE(wsptr[0], PASS1_BITS + 3) & RANGE_MASK];
|
|
|
|
outptr[0] = dcval;
|
|
outptr[1] = dcval;
|
|
outptr[2] = dcval;
|
|
outptr[3] = dcval;
|
|
outptr[4] = dcval;
|
|
outptr[5] = dcval;
|
|
outptr[6] = dcval;
|
|
outptr[7] = dcval;
|
|
|
|
wsptr += DCTSIZE; /* advance pointer to next row */
|
|
continue;
|
|
}
|
|
#endif
|
|
|
|
/* Even part */
|
|
|
|
tmp10 = ((DCTELEM)wsptr[0] + (DCTELEM)wsptr[4]);
|
|
tmp11 = ((DCTELEM)wsptr[0] - (DCTELEM)wsptr[4]);
|
|
|
|
tmp13 = ((DCTELEM)wsptr[2] + (DCTELEM)wsptr[6]);
|
|
tmp12 =
|
|
MULTIPLY((DCTELEM)wsptr[2] - (DCTELEM)wsptr[6], FIX_1_414213562) - tmp13;
|
|
|
|
tmp0 = tmp10 + tmp13;
|
|
tmp3 = tmp10 - tmp13;
|
|
tmp1 = tmp11 + tmp12;
|
|
tmp2 = tmp11 - tmp12;
|
|
|
|
/* Odd part */
|
|
|
|
z13 = (DCTELEM)wsptr[5] + (DCTELEM)wsptr[3];
|
|
z10 = (DCTELEM)wsptr[5] - (DCTELEM)wsptr[3];
|
|
z11 = (DCTELEM)wsptr[1] + (DCTELEM)wsptr[7];
|
|
z12 = (DCTELEM)wsptr[1] - (DCTELEM)wsptr[7];
|
|
|
|
tmp7 = z11 + z13; /* phase 5 */
|
|
tmp11 = MULTIPLY(z11 - z13, FIX_1_414213562); /* 2*c4 */
|
|
|
|
z5 = MULTIPLY(z10 + z12, FIX_1_847759065); /* 2*c2 */
|
|
tmp10 = MULTIPLY(z12, FIX_1_082392200) - z5; /* 2*(c2-c6) */
|
|
tmp12 = MULTIPLY(z10, -FIX_2_613125930) + z5; /* -2*(c2+c6) */
|
|
|
|
tmp6 = tmp12 - tmp7; /* phase 2 */
|
|
tmp5 = tmp11 - tmp6;
|
|
tmp4 = tmp10 + tmp5;
|
|
|
|
/* Final output stage: scale down by a factor of 8 and range-limit */
|
|
|
|
outptr[0] =
|
|
range_limit[IDESCALE(tmp0 + tmp7, PASS1_BITS + 3) & RANGE_MASK];
|
|
outptr[7] =
|
|
range_limit[IDESCALE(tmp0 - tmp7, PASS1_BITS + 3) & RANGE_MASK];
|
|
outptr[1] =
|
|
range_limit[IDESCALE(tmp1 + tmp6, PASS1_BITS + 3) & RANGE_MASK];
|
|
outptr[6] =
|
|
range_limit[IDESCALE(tmp1 - tmp6, PASS1_BITS + 3) & RANGE_MASK];
|
|
outptr[2] =
|
|
range_limit[IDESCALE(tmp2 + tmp5, PASS1_BITS + 3) & RANGE_MASK];
|
|
outptr[5] =
|
|
range_limit[IDESCALE(tmp2 - tmp5, PASS1_BITS + 3) & RANGE_MASK];
|
|
outptr[4] =
|
|
range_limit[IDESCALE(tmp3 + tmp4, PASS1_BITS + 3) & RANGE_MASK];
|
|
outptr[3] =
|
|
range_limit[IDESCALE(tmp3 - tmp4, PASS1_BITS + 3) & RANGE_MASK];
|
|
|
|
wsptr += DCTSIZE; /* advance pointer to next row */
|
|
}
|
|
}
|
|
|
|
#endif
|