Emoji 1.1 WIP

This commit is contained in:
Ali
2022-08-01 02:22:35 +04:00
parent 47239681a7
commit 095b9d5058
17 changed files with 395 additions and 279 deletions

View File

@@ -27,7 +27,7 @@ typedef NS_ENUM(NSUInteger, ImageDCTTableType) {
- (void)forwardWithPixels:(uint8_t const * _Nonnull)pixels coefficients:(int16_t * _Nonnull)coefficients width:(NSInteger)width height:(NSInteger)height bytesPerRow:(NSInteger)bytesPerRow __attribute__((objc_direct));
- (void)inverseWithCoefficients:(int16_t const * _Nonnull)coefficients pixels:(uint8_t * _Nonnull)pixels width:(NSInteger)width height:(NSInteger)height coefficientsPerRow:(NSInteger)coefficientsPerRow bytesPerRow:(NSInteger)bytesPerRow __attribute__((objc_direct));
- (void)forward4x4:(int16_t const * _Nonnull)normalizedCoefficients coefficients:(int16_t * _Nonnull)coefficients width:(NSInteger)width height:(NSInteger)height __attribute__((objc_direct));
- (void)inverse4x4:(int16_t const * _Nonnull)coefficients normalizedCoefficients:(int16_t * _Nonnull)normalizedCoefficients width:(NSInteger)width height:(NSInteger)height __attribute__((objc_direct));
- (void)inverse4x4Add:(int16_t const * _Nonnull)coefficients normalizedCoefficients:(int16_t * _Nonnull)normalizedCoefficients width:(NSInteger)width height:(NSInteger)height __attribute__((objc_direct));
@end

View File

@@ -727,7 +727,7 @@ static inline void transpose_idct4x4_16_bd8(int16x8_t *const a) {
idct4x4_16_kernel_bd8(a);
}
inline void vpx_idct4x4_16_add_neon(const int16x8_t &top64, const int16x8_t &bottom64, int16_t *dest, int16_t multiplier) {
inline void vpx_idct4x4_16_add_neon(const int16x8_t &top64, const int16x8_t &bottom64, const int16x4_t &current0, const int16x4_t &current1, const int16x4_t &current2, const int16x4_t &current3, int16_t multiplier, int16_t *dest, int destRowIncrement) {
int16x8_t a[2];
assert(!((intptr_t)dest % sizeof(uint32_t)));
@@ -745,11 +745,19 @@ inline void vpx_idct4x4_16_add_neon(const int16x8_t &top64, const int16x8_t &bot
a[0] = vrshrq_n_s16(a[0], 4);
a[1] = vrshrq_n_s16(a[1], 4);
vst1q_s16(dest, a[0]);
dest += 2 * 4;
vst1_s16(dest, vget_high_s16(a[1]));
dest += 4;
vst1_s16(dest, vget_low_s16(a[1]));
a[0] = vaddq_s16(a[0], vcombine_s16(current0, current1));
a[1] = vaddq_s16(a[1], vcombine_s16(current3, current2));
vst1_s16(dest + destRowIncrement * 0, vget_low_s16(a[0]));
vst1_s16(dest + destRowIncrement * 1, vget_high_s16(a[0]));
vst1_s16(dest + destRowIncrement * 2, vget_high_s16(a[1]));
vst1_s16(dest + destRowIncrement * 3, vget_low_s16(a[1]));
//vst1q_s16(dest, a[0]);
//dest += 2 * 4;
//vst1_s16(dest, vget_high_s16(a[1]));
//dest += 4;
//vst1_s16(dest, vget_low_s16(a[1]));
}
static int dct4x4QuantDC = 58;
@@ -803,11 +811,14 @@ void performForward4x4Dct(int16_t const *normalizedCoefficients, int16_t *coeffi
}
}
void performInverse4x4Dct(int16_t const * coefficients, int16_t *normalizedCoefficients, int width, int height, DctAuxiliaryData *auxiliaryData, IFAST_MULT_TYPE *ifmtbl) {
DCTELEM resultBlock[4 * 4];
void performInverse4x4DctAdd(int16_t const *coefficients, int16_t *normalizedCoefficients, int width, int height, DctAuxiliaryData *auxiliaryData, IFAST_MULT_TYPE *ifmtbl) {
for (int y = 0; y < height; y += 4) {
for (int x = 0; x < width; x += 4) {
int16x4_t current0 = vld1_s16(&normalizedCoefficients[(y + 0) * width + x]);
int16x4_t current1 = vld1_s16(&normalizedCoefficients[(y + 1) * width + x]);
int16x4_t current2 = vld1_s16(&normalizedCoefficients[(y + 2) * width + x]);
int16x4_t current3 = vld1_s16(&normalizedCoefficients[(y + 3) * width + x]);
uint32x2_t sa = vld1_u32((uint32_t *)&coefficients[(y + 0) * width + x]);
uint32x2_t sb = vld1_u32((uint32_t *)&coefficients[(y + 1) * width + x]);
uint32x2_t sc = vld1_u32((uint32_t *)&coefficients[(y + 2) * width + x]);
@@ -829,34 +840,7 @@ void performInverse4x4Dct(int16_t const * coefficients, int16_t *normalizedCoeff
int16x8_t top64 = vreinterpretq_s16_u16(qtop16);
int16x8_t bottom64 = vreinterpretq_s16_u16(qbottom16);
/*DCTELEM coefficientBlock[4 * 4];
for (int blockY = 0; blockY < 4; blockY++) {
for (int blockX = 0; blockX < 4; blockX++) {
coefficientBlock[zigZag4x4Inv[blockY * 4 + blockX]] = coefficients[(y + blockY) * width + (x + blockX)];
}
}
top64 = vreinterpretq_s16_u64(vld1q_u64((uint64_t *)&coefficientBlock[0]));
bottom64 = vreinterpretq_s16_u64(vld1q_u64((uint64_t *)&coefficientBlock[8]));*/
vpx_idct4x4_16_add_neon(top64, bottom64, resultBlock, dct4x4QuantAC);
uint32x2_t a = vld1_u32((uint32_t *)&resultBlock[4 * 0]);
uint32x2_t b = vld1_u32((uint32_t *)&resultBlock[4 * 1]);
uint32x2_t c = vld1_u32((uint32_t *)&resultBlock[4 * 2]);
uint32x2_t d = vld1_u32((uint32_t *)&resultBlock[4 * 3]);
vst1_u32((uint32_t *)&normalizedCoefficients[(y + 0) * width + x], a);
vst1_u32((uint32_t *)&normalizedCoefficients[(y + 1) * width + x], b);
vst1_u32((uint32_t *)&normalizedCoefficients[(y + 2) * width + x], c);
vst1_u32((uint32_t *)&normalizedCoefficients[(y + 3) * width + x], d);
/*for (int blockY = 0; blockY < 4; blockY++) {
for (int blockX = 0; blockX < 4; blockX++) {
normalizedCoefficients[(y + blockY) * width + (x + blockX)] = resultBlock[blockY * 4 + blockX];
}
}*/
vpx_idct4x4_16_add_neon(top64, bottom64, current0, current1, current2, current3, dct4x4QuantAC, normalizedCoefficients + y * width + x, width);
}
}
}
@@ -932,8 +916,8 @@ void DCT::forward4x4(int16_t const *normalizedCoefficients, int16_t *coefficient
performForward4x4Dct(normalizedCoefficients, coefficients, width, height, (DCTELEM *)_internal->forwardDctData.data());
}
void DCT::inverse4x4(int16_t const *coefficients, int16_t *normalizedCoefficients, int width, int height) {
performInverse4x4Dct(coefficients, normalizedCoefficients, width, height, _internal->auxiliaryData, (IFAST_MULT_TYPE *)_internal->inverseDctData.data());
void DCT::inverse4x4Add(int16_t const *coefficients, int16_t *normalizedCoefficients, int width, int height) {
performInverse4x4DctAdd(coefficients, normalizedCoefficients, width, height, _internal->auxiliaryData, (IFAST_MULT_TYPE *)_internal->inverseDctData.data());
}
}

View File

@@ -31,7 +31,7 @@ public:
void forward(uint8_t const *pixels, int16_t *coefficients, int width, int height, int bytesPerRow);
void inverse(int16_t const *coefficients, uint8_t *pixels, int width, int height, int coefficientsPerRow, int bytesPerRow);
void forward4x4(int16_t const *normalizedCoefficients, int16_t *coefficients, int width, int height);
void inverse4x4(int16_t const *coefficients, int16_t *normalizedCoefficients, int width, int height);
void inverse4x4Add(int16_t const *coefficients, int16_t *normalizedCoefficients, int width, int height);
private:
DCTInternal *_internal;

View File

@@ -82,8 +82,8 @@
_dct->forward4x4(normalizedCoefficients, coefficients, (int)width, (int)height);
}
- (void)inverse4x4:(int16_t const * _Nonnull)coefficients normalizedCoefficients:(int16_t * _Nonnull)normalizedCoefficients width:(NSInteger)width height:(NSInteger)height {
_dct->inverse4x4(coefficients, normalizedCoefficients, (int)width, (int)height);
- (void)inverse4x4Add:(int16_t const * _Nonnull)coefficients normalizedCoefficients:(int16_t * _Nonnull)normalizedCoefficients width:(NSInteger)width height:(NSInteger)height {
_dct->inverse4x4Add(coefficients, normalizedCoefficients, (int)width, (int)height);
}
@end

View File

@@ -118,14 +118,17 @@ void scaleImagePlane(uint8_t *outPlane, int outWidth, int outHeight, int outByte
}
void convertUInt8toInt16(uint8_t const *source, int16_t *dest, int length) {
for (int i = 0; i < length; i += 8) {
uint8x8_t lhs8 = vld1_u8(&source[i]);
int16x8_t lhs = vreinterpretq_s16_u16(vmovl_u8(lhs8));
vst1q_s16(&dest[i], lhs);
for (int i = 0; i < length; i += 8 * 4) {
#pragma unroll
for (int j = 0; j < 4; j++) {
uint8x8_t lhs8 = vld1_u8(&source[i + j * 8]);
int16x8_t lhs = vreinterpretq_s16_u16(vmovl_u8(lhs8));
vst1q_s16(&dest[i + j * 8], lhs);
}
}
if (length % 8 != 0) {
for (int i = length - (length % 8); i < length; i++) {
if (length % (8 * 4) != 0) {
for (int i = length - (length % (8 * 4)); i < length; i++) {
dest[i] = (int16_t)source[i];
}
}
@@ -167,14 +170,17 @@ void subtractArraysInt16(int16_t const *a, int16_t const *b, int16_t *dest, int
}
void addArraysInt16(int16_t const *a, int16_t const *b, int16_t *dest, int length) {
for (int i = 0; i < length; i += 8) {
int16x8_t lhs = vld1q_s16((int16_t *)&a[i]);
int16x8_t rhs = vld1q_s16((int16_t *)&b[i]);
int16x8_t result = vaddq_s16(lhs, rhs);
vst1q_s16((int16_t *)&dest[i], result);
for (int i = 0; i < length; i += 8 * 4) {
#pragma unroll
for (int j = 0; j < 4; j++) {
int16x8_t lhs = vld1q_s16((int16_t *)&a[i + j * 8]);
int16x8_t rhs = vld1q_s16((int16_t *)&b[i + j * 8]);
int16x8_t result = vaddq_s16(lhs, rhs);
vst1q_s16((int16_t *)&dest[i + j * 8], result);
}
}
if (length % 8 != 0) {
for (int i = length - (length % 8); i < length; i++) {
if (length % (8 * 4) != 0) {
for (int i = length - (length % (8 * 4)); i < length; i++) {
dest[i] = a[i] - b[i];
}
}

View File

@@ -473,8 +473,8 @@ private final class AnimationCacheItemWriterImpl: AnimationCacheItemWriter {
differenceCoefficients.dct4x4(dctData: dctData, target: dctCoefficients)
//previous + delta = current
dctCoefficients.idct4x4(dctData: dctData, target: differenceCoefficients)
previousFrameCoefficients.add(other: differenceCoefficients)
dctCoefficients.idct4x4Add(dctData: dctData, target: previousFrameCoefficients)
//previousFrameCoefficients.add(other: differenceCoefficients)
} else {
isKeyframe = true
@@ -746,30 +746,23 @@ private final class AnimationCacheItemAccessor {
self.currentCoefficients = currentCoefficients
}
let deltaCoefficients: DctCoefficientsYUVA420
/*let deltaCoefficients: DctCoefficientsYUVA420
if let current = self.deltaCoefficients {
deltaCoefficients = current
} else {
deltaCoefficients = DctCoefficientsYUVA420(width: yuvaSurface.yPlane.width, height: yuvaSurface.yPlane.height)
self.deltaCoefficients = deltaCoefficients
}
}*/
switch frameType {
case 1:
dctCoefficients.idct8x8(dctData: self.currentDctData, target: yuvaSurface)
yuvaSurface.toCoefficients(target: currentCoefficients)
default:
dctCoefficients.idct4x4(dctData: self.currentDctData, target: deltaCoefficients)
currentCoefficients.add(other: deltaCoefficients)
dctCoefficients.idct4x4Add(dctData: self.currentDctData, target: currentCoefficients)
//currentCoefficients.add(other: deltaCoefficients)
if !"".isEmpty {
let deltaFloatCoefficients = FloatCoefficientsYUVA420(width: yuvaSurface.yPlane.width, height: yuvaSurface.yPlane.height)
deltaCoefficients.toFloatCoefficients(target: deltaFloatCoefficients)
deltaFloatCoefficients.add(constant: 128.0)
deltaFloatCoefficients.toYUVA420(target: yuvaSurface)
} else {
currentCoefficients.toYUVA420(target: yuvaSurface)
}
currentCoefficients.toYUVA420(target: yuvaSurface)
}
self.currentFrame = CurrentFrame(index: index, duration: self.durationMapping[index], yuva: yuvaSurface)

View File

@@ -663,7 +663,7 @@ extension DctCoefficientsYUVA420 {
}
}
func idct4x4(dctData: DctData, target: DctCoefficientsYUVA420) {
func idct4x4Add(dctData: DctData, target: DctCoefficientsYUVA420) {
precondition(self.yPlane.width == target.yPlane.width && self.yPlane.height == target.yPlane.height)
for i in 0 ..< 4 {
@@ -694,7 +694,7 @@ extension DctCoefficientsYUVA420 {
//memcpy(coefficients, sourceCoefficients, sourceBytes.count)
dctData.deltaDct.inverse4x4(sourceCoefficients, normalizedCoefficients: coefficients, width: sourcePlane.width, height: sourcePlane.height)
dctData.deltaDct.inverse4x4Add(sourceCoefficients, normalizedCoefficients: coefficients, width: sourcePlane.width, height: sourcePlane.height)
}
}
}