mirror of
https://github.com/Swiftgram/Telegram-iOS.git
synced 2025-12-24 07:05:35 +00:00
Emoji 1.1 WIP
This commit is contained in:
@@ -27,7 +27,7 @@ typedef NS_ENUM(NSUInteger, ImageDCTTableType) {
|
||||
- (void)forwardWithPixels:(uint8_t const * _Nonnull)pixels coefficients:(int16_t * _Nonnull)coefficients width:(NSInteger)width height:(NSInteger)height bytesPerRow:(NSInteger)bytesPerRow __attribute__((objc_direct));
|
||||
- (void)inverseWithCoefficients:(int16_t const * _Nonnull)coefficients pixels:(uint8_t * _Nonnull)pixels width:(NSInteger)width height:(NSInteger)height coefficientsPerRow:(NSInteger)coefficientsPerRow bytesPerRow:(NSInteger)bytesPerRow __attribute__((objc_direct));
|
||||
- (void)forward4x4:(int16_t const * _Nonnull)normalizedCoefficients coefficients:(int16_t * _Nonnull)coefficients width:(NSInteger)width height:(NSInteger)height __attribute__((objc_direct));
|
||||
- (void)inverse4x4:(int16_t const * _Nonnull)coefficients normalizedCoefficients:(int16_t * _Nonnull)normalizedCoefficients width:(NSInteger)width height:(NSInteger)height __attribute__((objc_direct));
|
||||
- (void)inverse4x4Add:(int16_t const * _Nonnull)coefficients normalizedCoefficients:(int16_t * _Nonnull)normalizedCoefficients width:(NSInteger)width height:(NSInteger)height __attribute__((objc_direct));
|
||||
|
||||
@end
|
||||
|
||||
|
||||
@@ -727,7 +727,7 @@ static inline void transpose_idct4x4_16_bd8(int16x8_t *const a) {
|
||||
idct4x4_16_kernel_bd8(a);
|
||||
}
|
||||
|
||||
inline void vpx_idct4x4_16_add_neon(const int16x8_t &top64, const int16x8_t &bottom64, int16_t *dest, int16_t multiplier) {
|
||||
inline void vpx_idct4x4_16_add_neon(const int16x8_t &top64, const int16x8_t &bottom64, const int16x4_t ¤t0, const int16x4_t ¤t1, const int16x4_t ¤t2, const int16x4_t ¤t3, int16_t multiplier, int16_t *dest, int destRowIncrement) {
|
||||
int16x8_t a[2];
|
||||
|
||||
assert(!((intptr_t)dest % sizeof(uint32_t)));
|
||||
@@ -745,11 +745,19 @@ inline void vpx_idct4x4_16_add_neon(const int16x8_t &top64, const int16x8_t &bot
|
||||
a[0] = vrshrq_n_s16(a[0], 4);
|
||||
a[1] = vrshrq_n_s16(a[1], 4);
|
||||
|
||||
vst1q_s16(dest, a[0]);
|
||||
dest += 2 * 4;
|
||||
vst1_s16(dest, vget_high_s16(a[1]));
|
||||
dest += 4;
|
||||
vst1_s16(dest, vget_low_s16(a[1]));
|
||||
a[0] = vaddq_s16(a[0], vcombine_s16(current0, current1));
|
||||
a[1] = vaddq_s16(a[1], vcombine_s16(current3, current2));
|
||||
|
||||
vst1_s16(dest + destRowIncrement * 0, vget_low_s16(a[0]));
|
||||
vst1_s16(dest + destRowIncrement * 1, vget_high_s16(a[0]));
|
||||
vst1_s16(dest + destRowIncrement * 2, vget_high_s16(a[1]));
|
||||
vst1_s16(dest + destRowIncrement * 3, vget_low_s16(a[1]));
|
||||
|
||||
//vst1q_s16(dest, a[0]);
|
||||
//dest += 2 * 4;
|
||||
//vst1_s16(dest, vget_high_s16(a[1]));
|
||||
//dest += 4;
|
||||
//vst1_s16(dest, vget_low_s16(a[1]));
|
||||
}
|
||||
|
||||
static int dct4x4QuantDC = 58;
|
||||
@@ -803,11 +811,14 @@ void performForward4x4Dct(int16_t const *normalizedCoefficients, int16_t *coeffi
|
||||
}
|
||||
}
|
||||
|
||||
void performInverse4x4Dct(int16_t const * coefficients, int16_t *normalizedCoefficients, int width, int height, DctAuxiliaryData *auxiliaryData, IFAST_MULT_TYPE *ifmtbl) {
|
||||
DCTELEM resultBlock[4 * 4];
|
||||
|
||||
void performInverse4x4DctAdd(int16_t const *coefficients, int16_t *normalizedCoefficients, int width, int height, DctAuxiliaryData *auxiliaryData, IFAST_MULT_TYPE *ifmtbl) {
|
||||
for (int y = 0; y < height; y += 4) {
|
||||
for (int x = 0; x < width; x += 4) {
|
||||
int16x4_t current0 = vld1_s16(&normalizedCoefficients[(y + 0) * width + x]);
|
||||
int16x4_t current1 = vld1_s16(&normalizedCoefficients[(y + 1) * width + x]);
|
||||
int16x4_t current2 = vld1_s16(&normalizedCoefficients[(y + 2) * width + x]);
|
||||
int16x4_t current3 = vld1_s16(&normalizedCoefficients[(y + 3) * width + x]);
|
||||
|
||||
uint32x2_t sa = vld1_u32((uint32_t *)&coefficients[(y + 0) * width + x]);
|
||||
uint32x2_t sb = vld1_u32((uint32_t *)&coefficients[(y + 1) * width + x]);
|
||||
uint32x2_t sc = vld1_u32((uint32_t *)&coefficients[(y + 2) * width + x]);
|
||||
@@ -829,34 +840,7 @@ void performInverse4x4Dct(int16_t const * coefficients, int16_t *normalizedCoeff
|
||||
int16x8_t top64 = vreinterpretq_s16_u16(qtop16);
|
||||
int16x8_t bottom64 = vreinterpretq_s16_u16(qbottom16);
|
||||
|
||||
/*DCTELEM coefficientBlock[4 * 4];
|
||||
|
||||
for (int blockY = 0; blockY < 4; blockY++) {
|
||||
for (int blockX = 0; blockX < 4; blockX++) {
|
||||
coefficientBlock[zigZag4x4Inv[blockY * 4 + blockX]] = coefficients[(y + blockY) * width + (x + blockX)];
|
||||
}
|
||||
}
|
||||
|
||||
top64 = vreinterpretq_s16_u64(vld1q_u64((uint64_t *)&coefficientBlock[0]));
|
||||
bottom64 = vreinterpretq_s16_u64(vld1q_u64((uint64_t *)&coefficientBlock[8]));*/
|
||||
|
||||
vpx_idct4x4_16_add_neon(top64, bottom64, resultBlock, dct4x4QuantAC);
|
||||
|
||||
uint32x2_t a = vld1_u32((uint32_t *)&resultBlock[4 * 0]);
|
||||
uint32x2_t b = vld1_u32((uint32_t *)&resultBlock[4 * 1]);
|
||||
uint32x2_t c = vld1_u32((uint32_t *)&resultBlock[4 * 2]);
|
||||
uint32x2_t d = vld1_u32((uint32_t *)&resultBlock[4 * 3]);
|
||||
|
||||
vst1_u32((uint32_t *)&normalizedCoefficients[(y + 0) * width + x], a);
|
||||
vst1_u32((uint32_t *)&normalizedCoefficients[(y + 1) * width + x], b);
|
||||
vst1_u32((uint32_t *)&normalizedCoefficients[(y + 2) * width + x], c);
|
||||
vst1_u32((uint32_t *)&normalizedCoefficients[(y + 3) * width + x], d);
|
||||
|
||||
/*for (int blockY = 0; blockY < 4; blockY++) {
|
||||
for (int blockX = 0; blockX < 4; blockX++) {
|
||||
normalizedCoefficients[(y + blockY) * width + (x + blockX)] = resultBlock[blockY * 4 + blockX];
|
||||
}
|
||||
}*/
|
||||
vpx_idct4x4_16_add_neon(top64, bottom64, current0, current1, current2, current3, dct4x4QuantAC, normalizedCoefficients + y * width + x, width);
|
||||
}
|
||||
}
|
||||
}
|
||||
@@ -932,8 +916,8 @@ void DCT::forward4x4(int16_t const *normalizedCoefficients, int16_t *coefficient
|
||||
performForward4x4Dct(normalizedCoefficients, coefficients, width, height, (DCTELEM *)_internal->forwardDctData.data());
|
||||
}
|
||||
|
||||
void DCT::inverse4x4(int16_t const *coefficients, int16_t *normalizedCoefficients, int width, int height) {
|
||||
performInverse4x4Dct(coefficients, normalizedCoefficients, width, height, _internal->auxiliaryData, (IFAST_MULT_TYPE *)_internal->inverseDctData.data());
|
||||
void DCT::inverse4x4Add(int16_t const *coefficients, int16_t *normalizedCoefficients, int width, int height) {
|
||||
performInverse4x4DctAdd(coefficients, normalizedCoefficients, width, height, _internal->auxiliaryData, (IFAST_MULT_TYPE *)_internal->inverseDctData.data());
|
||||
}
|
||||
|
||||
}
|
||||
|
||||
@@ -31,7 +31,7 @@ public:
|
||||
void forward(uint8_t const *pixels, int16_t *coefficients, int width, int height, int bytesPerRow);
|
||||
void inverse(int16_t const *coefficients, uint8_t *pixels, int width, int height, int coefficientsPerRow, int bytesPerRow);
|
||||
void forward4x4(int16_t const *normalizedCoefficients, int16_t *coefficients, int width, int height);
|
||||
void inverse4x4(int16_t const *coefficients, int16_t *normalizedCoefficients, int width, int height);
|
||||
void inverse4x4Add(int16_t const *coefficients, int16_t *normalizedCoefficients, int width, int height);
|
||||
|
||||
private:
|
||||
DCTInternal *_internal;
|
||||
|
||||
@@ -82,8 +82,8 @@
|
||||
_dct->forward4x4(normalizedCoefficients, coefficients, (int)width, (int)height);
|
||||
}
|
||||
|
||||
- (void)inverse4x4:(int16_t const * _Nonnull)coefficients normalizedCoefficients:(int16_t * _Nonnull)normalizedCoefficients width:(NSInteger)width height:(NSInteger)height {
|
||||
_dct->inverse4x4(coefficients, normalizedCoefficients, (int)width, (int)height);
|
||||
- (void)inverse4x4Add:(int16_t const * _Nonnull)coefficients normalizedCoefficients:(int16_t * _Nonnull)normalizedCoefficients width:(NSInteger)width height:(NSInteger)height {
|
||||
_dct->inverse4x4Add(coefficients, normalizedCoefficients, (int)width, (int)height);
|
||||
}
|
||||
|
||||
@end
|
||||
|
||||
@@ -118,14 +118,17 @@ void scaleImagePlane(uint8_t *outPlane, int outWidth, int outHeight, int outByte
|
||||
}
|
||||
|
||||
void convertUInt8toInt16(uint8_t const *source, int16_t *dest, int length) {
|
||||
for (int i = 0; i < length; i += 8) {
|
||||
uint8x8_t lhs8 = vld1_u8(&source[i]);
|
||||
int16x8_t lhs = vreinterpretq_s16_u16(vmovl_u8(lhs8));
|
||||
|
||||
vst1q_s16(&dest[i], lhs);
|
||||
for (int i = 0; i < length; i += 8 * 4) {
|
||||
#pragma unroll
|
||||
for (int j = 0; j < 4; j++) {
|
||||
uint8x8_t lhs8 = vld1_u8(&source[i + j * 8]);
|
||||
int16x8_t lhs = vreinterpretq_s16_u16(vmovl_u8(lhs8));
|
||||
|
||||
vst1q_s16(&dest[i + j * 8], lhs);
|
||||
}
|
||||
}
|
||||
if (length % 8 != 0) {
|
||||
for (int i = length - (length % 8); i < length; i++) {
|
||||
if (length % (8 * 4) != 0) {
|
||||
for (int i = length - (length % (8 * 4)); i < length; i++) {
|
||||
dest[i] = (int16_t)source[i];
|
||||
}
|
||||
}
|
||||
@@ -167,14 +170,17 @@ void subtractArraysInt16(int16_t const *a, int16_t const *b, int16_t *dest, int
|
||||
}
|
||||
|
||||
void addArraysInt16(int16_t const *a, int16_t const *b, int16_t *dest, int length) {
|
||||
for (int i = 0; i < length; i += 8) {
|
||||
int16x8_t lhs = vld1q_s16((int16_t *)&a[i]);
|
||||
int16x8_t rhs = vld1q_s16((int16_t *)&b[i]);
|
||||
int16x8_t result = vaddq_s16(lhs, rhs);
|
||||
vst1q_s16((int16_t *)&dest[i], result);
|
||||
for (int i = 0; i < length; i += 8 * 4) {
|
||||
#pragma unroll
|
||||
for (int j = 0; j < 4; j++) {
|
||||
int16x8_t lhs = vld1q_s16((int16_t *)&a[i + j * 8]);
|
||||
int16x8_t rhs = vld1q_s16((int16_t *)&b[i + j * 8]);
|
||||
int16x8_t result = vaddq_s16(lhs, rhs);
|
||||
vst1q_s16((int16_t *)&dest[i + j * 8], result);
|
||||
}
|
||||
}
|
||||
if (length % 8 != 0) {
|
||||
for (int i = length - (length % 8); i < length; i++) {
|
||||
if (length % (8 * 4) != 0) {
|
||||
for (int i = length - (length % (8 * 4)); i < length; i++) {
|
||||
dest[i] = a[i] - b[i];
|
||||
}
|
||||
}
|
||||
|
||||
@@ -473,8 +473,8 @@ private final class AnimationCacheItemWriterImpl: AnimationCacheItemWriter {
|
||||
differenceCoefficients.dct4x4(dctData: dctData, target: dctCoefficients)
|
||||
|
||||
//previous + delta = current
|
||||
dctCoefficients.idct4x4(dctData: dctData, target: differenceCoefficients)
|
||||
previousFrameCoefficients.add(other: differenceCoefficients)
|
||||
dctCoefficients.idct4x4Add(dctData: dctData, target: previousFrameCoefficients)
|
||||
//previousFrameCoefficients.add(other: differenceCoefficients)
|
||||
} else {
|
||||
isKeyframe = true
|
||||
|
||||
@@ -746,30 +746,23 @@ private final class AnimationCacheItemAccessor {
|
||||
self.currentCoefficients = currentCoefficients
|
||||
}
|
||||
|
||||
let deltaCoefficients: DctCoefficientsYUVA420
|
||||
/*let deltaCoefficients: DctCoefficientsYUVA420
|
||||
if let current = self.deltaCoefficients {
|
||||
deltaCoefficients = current
|
||||
} else {
|
||||
deltaCoefficients = DctCoefficientsYUVA420(width: yuvaSurface.yPlane.width, height: yuvaSurface.yPlane.height)
|
||||
self.deltaCoefficients = deltaCoefficients
|
||||
}
|
||||
}*/
|
||||
|
||||
switch frameType {
|
||||
case 1:
|
||||
dctCoefficients.idct8x8(dctData: self.currentDctData, target: yuvaSurface)
|
||||
yuvaSurface.toCoefficients(target: currentCoefficients)
|
||||
default:
|
||||
dctCoefficients.idct4x4(dctData: self.currentDctData, target: deltaCoefficients)
|
||||
currentCoefficients.add(other: deltaCoefficients)
|
||||
dctCoefficients.idct4x4Add(dctData: self.currentDctData, target: currentCoefficients)
|
||||
//currentCoefficients.add(other: deltaCoefficients)
|
||||
|
||||
if !"".isEmpty {
|
||||
let deltaFloatCoefficients = FloatCoefficientsYUVA420(width: yuvaSurface.yPlane.width, height: yuvaSurface.yPlane.height)
|
||||
deltaCoefficients.toFloatCoefficients(target: deltaFloatCoefficients)
|
||||
deltaFloatCoefficients.add(constant: 128.0)
|
||||
deltaFloatCoefficients.toYUVA420(target: yuvaSurface)
|
||||
} else {
|
||||
currentCoefficients.toYUVA420(target: yuvaSurface)
|
||||
}
|
||||
currentCoefficients.toYUVA420(target: yuvaSurface)
|
||||
}
|
||||
|
||||
self.currentFrame = CurrentFrame(index: index, duration: self.durationMapping[index], yuva: yuvaSurface)
|
||||
|
||||
@@ -663,7 +663,7 @@ extension DctCoefficientsYUVA420 {
|
||||
}
|
||||
}
|
||||
|
||||
func idct4x4(dctData: DctData, target: DctCoefficientsYUVA420) {
|
||||
func idct4x4Add(dctData: DctData, target: DctCoefficientsYUVA420) {
|
||||
precondition(self.yPlane.width == target.yPlane.width && self.yPlane.height == target.yPlane.height)
|
||||
|
||||
for i in 0 ..< 4 {
|
||||
@@ -694,7 +694,7 @@ extension DctCoefficientsYUVA420 {
|
||||
|
||||
//memcpy(coefficients, sourceCoefficients, sourceBytes.count)
|
||||
|
||||
dctData.deltaDct.inverse4x4(sourceCoefficients, normalizedCoefficients: coefficients, width: sourcePlane.width, height: sourcePlane.height)
|
||||
dctData.deltaDct.inverse4x4Add(sourceCoefficients, normalizedCoefficients: coefficients, width: sourcePlane.width, height: sourcePlane.height)
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
Reference in New Issue
Block a user