mirror of
https://github.com/Swiftgram/Telegram-iOS.git
synced 2025-06-16 05:55:20 +00:00
564 lines
16 KiB
Objective-C
564 lines
16 KiB
Objective-C
#include <opus/opus_types.h>
|
|
#include <ogg/ogg.h>
|
|
|
|
#import "TGDataItem.h"
|
|
#import "TGOggOpusWriter.h"
|
|
|
|
#ifdef ENABLE_NLS
|
|
#include <libintl.h>
|
|
#define _(X) gettext(X)
|
|
#else
|
|
#define _(X) (X)
|
|
#define textdomain(X)
|
|
#define bindtextdomain(X, Y)
|
|
#endif
|
|
#ifdef gettext_noop
|
|
#define N_(X) gettext_noop(X)
|
|
#else
|
|
#define N_(X) (X)
|
|
#endif
|
|
|
|
typedef struct
|
|
{
|
|
void *readdata;
|
|
opus_int64 total_samples_per_channel;
|
|
int rawmode;
|
|
int channels;
|
|
long rate;
|
|
int gain;
|
|
int samplesize;
|
|
int endianness;
|
|
char *infilename;
|
|
int ignorelength;
|
|
int skip;
|
|
int extraout;
|
|
char *comments;
|
|
int comments_length;
|
|
int copy_comments;
|
|
} oe_enc_opt;
|
|
|
|
typedef struct
|
|
{
|
|
int (*id_func)(unsigned char *buf, int len); /* Returns true if can load file */
|
|
int id_data_len; /* Amount of data needed to id whether this can load the file */
|
|
int (*open_func)(FILE *in, oe_enc_opt *opt, unsigned char *buf, int buflen);
|
|
void (*close_func)(void *);
|
|
char *format;
|
|
char *description;
|
|
} input_format;
|
|
|
|
#include <opus/opus.h>
|
|
|
|
#include <ogg/ogg.h>
|
|
#include "opus_header.h"
|
|
|
|
static bool comment_init(char **comments, int* length, const char *vendor_string);
|
|
static bool comment_add(char **comments, int* length, char *tag, char *val);
|
|
static bool comment_pad(char **comments, int* length, int amount);
|
|
|
|
static inline int writeOggPage(ogg_page *page, TGDataItem *fileItem)
|
|
{
|
|
int written = (int)(page->header_len + page->body_len);
|
|
|
|
NSMutableData *data = [[NSMutableData alloc] init];
|
|
[data appendBytes:page->header length:page->header_len];
|
|
[data appendBytes:page->body length:page->body_len];
|
|
[fileItem appendData:data];
|
|
|
|
return MAX(0, written);
|
|
}
|
|
|
|
@interface TGOggOpusWriter ()
|
|
{
|
|
TGDataItem *_dataItem;
|
|
|
|
OpusEncoder *_encoder;
|
|
uint8_t *_packet;
|
|
|
|
oe_enc_opt inopt;
|
|
|
|
ogg_stream_state os;
|
|
ogg_page og;
|
|
ogg_packet op;
|
|
ogg_int64_t last_granulepos;
|
|
ogg_int64_t enc_granulepos;
|
|
int last_segments;
|
|
int eos;
|
|
OpusHeader header;
|
|
|
|
ogg_int32_t _packetId;
|
|
int size_segments;
|
|
|
|
opus_int64 nb_encoded;
|
|
opus_int64 bytes_written;
|
|
opus_int64 pages_out;
|
|
opus_int64 total_bytes;
|
|
opus_int64 total_samples;
|
|
opus_int32 nb_samples;
|
|
opus_int32 peak_bytes;
|
|
opus_int32 min_bytes;
|
|
|
|
int max_frame_bytes;
|
|
opus_int32 bitrate;
|
|
opus_int32 rate;
|
|
opus_int32 coding_rate;
|
|
opus_int32 frame_size;
|
|
int with_cvbr;
|
|
int max_ogg_delay;
|
|
int comment_padding;
|
|
int serialno;
|
|
opus_int32 lookahead;
|
|
}
|
|
|
|
@end
|
|
|
|
@implementation TGOggOpusWriter
|
|
|
|
- (instancetype)init
|
|
{
|
|
self = [super init];
|
|
if (self != nil)
|
|
{
|
|
bitrate = 30 * 1024;
|
|
rate = 48000;
|
|
coding_rate = 48000;
|
|
frame_size = 960;
|
|
with_cvbr = 1;
|
|
max_ogg_delay = 48000;
|
|
comment_padding = 512;
|
|
|
|
_packetId = -1;
|
|
}
|
|
return self;
|
|
}
|
|
|
|
- (void)dealloc {
|
|
[self cleanup];
|
|
}
|
|
|
|
- (void)cleanup
|
|
{
|
|
if (_encoder != NULL)
|
|
{
|
|
opus_encoder_destroy(_encoder);
|
|
_encoder = NULL;
|
|
}
|
|
|
|
ogg_stream_clear(&os);
|
|
|
|
if (_packet != NULL)
|
|
{
|
|
free(_packet);
|
|
_packet = NULL;
|
|
}
|
|
}
|
|
|
|
- (bool)beginWithDataItem:(TGDataItem *)dataItem
|
|
{
|
|
_dataItem = dataItem;
|
|
|
|
inopt.channels = 1;
|
|
inopt.rate = coding_rate=rate;
|
|
inopt.gain = 0;
|
|
inopt.samplesize = 16;
|
|
inopt.endianness = 0;
|
|
inopt.rawmode = 0;
|
|
inopt.ignorelength = 0;
|
|
inopt.copy_comments = 0;
|
|
|
|
arc4random_buf(&serialno, sizeof(serialno));
|
|
|
|
const char *opus_version = opus_get_version_string();
|
|
comment_init(&inopt.comments, &inopt.comments_length, opus_version);
|
|
|
|
// bitrate = 16 * 1024;
|
|
|
|
// inopt.rawmode = 1;
|
|
// inopt.ignorelength = 1;
|
|
// inopt.samplesize = 16;
|
|
// inopt.rate = 16000;
|
|
// inopt.channels = 1;
|
|
|
|
rate = (opus_int32)inopt.rate;
|
|
inopt.skip = 0;
|
|
|
|
// In order to code the complete length we'll need to do a little padding
|
|
//setup_padder(&inopt, &original_samples);
|
|
|
|
if (rate > 24000)
|
|
coding_rate = 48000;
|
|
else if (rate > 16000)
|
|
coding_rate = 24000;
|
|
else if (rate > 12000)
|
|
coding_rate = 16000;
|
|
else if (rate > 8000)
|
|
coding_rate = 12000;
|
|
else
|
|
coding_rate = 8000;
|
|
|
|
// Scale the resampler complexity, but only for 48000 output because the near-cutoff behavior matters a lot more at lower rates
|
|
if (rate != coding_rate)
|
|
{
|
|
NSLog(@"Invalid rate");
|
|
return false;
|
|
}
|
|
|
|
header.channels = 1;
|
|
header.channel_mapping = 0;
|
|
header.input_sample_rate = rate;
|
|
header.gain = inopt.gain;
|
|
header.nb_streams = 1;
|
|
|
|
int result = OPUS_OK;
|
|
_encoder = opus_encoder_create(coding_rate, 1, OPUS_APPLICATION_AUDIO, &result);
|
|
if (result != OPUS_OK)
|
|
{
|
|
NSLog(@"Error cannot create encoder: %s", opus_strerror(result));
|
|
return false;
|
|
}
|
|
|
|
min_bytes = max_frame_bytes = (1275 * 3 + 7) * header.nb_streams;
|
|
_packet = malloc(max_frame_bytes);
|
|
|
|
result = opus_encoder_ctl(_encoder, OPUS_SET_BITRATE(bitrate));
|
|
if (result != OPUS_OK)
|
|
{
|
|
NSLog(@"Error OPUS_SET_BITRATE returned: %s", opus_strerror(result));
|
|
return false;
|
|
}
|
|
|
|
/*result = opus_encoder_ctl(_encoder, OPUS_SET_VBR(1));
|
|
if (result != OPUS_OK)
|
|
{
|
|
NSLog(@"Error OPUS_SET_VBR returned: %s", opus_strerror(result));
|
|
return false;
|
|
}*/
|
|
|
|
/*ret = opus_multistream_encoder_ctl(st, OPUS_SET_VBR_CONSTRAINT(1));
|
|
if (ret != OPUS_OK)
|
|
{
|
|
NSLog(@"Error OPUS_SET_VBR_CONSTRAINT returned: %s", opus_strerror(ret));
|
|
return false;
|
|
}*/
|
|
|
|
/*ret = opus_multistream_encoder_ctl(st, OPUS_SET_COMPLEXITY(complexity));
|
|
if(ret != OPUS_OK)
|
|
{
|
|
NSLog(@"Error OPUS_SET_COMPLEXITY returned: %s", opus_strerror(ret));
|
|
return false;
|
|
}*/
|
|
|
|
/*result = opus_encoder_ctl(st, OPUS_SET_PACKET_LOSS_PERC(expect_loss));
|
|
if (ret != OPUS_OK)
|
|
{
|
|
NSLog(@"Error OPUS_SET_PACKET_LOSS_PERC returned: %s", opus_strerror(ret));
|
|
return false;
|
|
}*/
|
|
|
|
#ifdef OPUS_SET_LSB_DEPTH
|
|
result = opus_encoder_ctl(_encoder, OPUS_SET_LSB_DEPTH(MAX(8, MIN(24, inopt.samplesize))));
|
|
if (result != OPUS_OK)
|
|
{
|
|
NSLog(@"Warning OPUS_SET_LSB_DEPTH returned: %s", opus_strerror(result));
|
|
}
|
|
#endif
|
|
|
|
// We do the lookahead check late so user CTLs can change it
|
|
result = opus_encoder_ctl(_encoder, OPUS_GET_LOOKAHEAD(&lookahead));
|
|
if (result != OPUS_OK)
|
|
{
|
|
NSLog(@"Error OPUS_GET_LOOKAHEAD returned: %s", opus_strerror(result));
|
|
return false;
|
|
}
|
|
|
|
inopt.skip += lookahead;
|
|
// Regardless of the rate we're coding at the ogg timestamping/skip is always timed at 48000.
|
|
header.preskip = (int)(inopt.skip * (48000.0 / coding_rate));
|
|
// Extra samples that need to be read to compensate for the pre-skip
|
|
inopt.extraout = (int)(header.preskip * (rate / 48000.0));
|
|
|
|
// Initialize Ogg stream struct
|
|
if (ogg_stream_init(&os, serialno) == -1)
|
|
{
|
|
NSLog(@"Error: stream init failed");
|
|
return false;
|
|
}
|
|
|
|
// Write header
|
|
{
|
|
unsigned char header_data[100];
|
|
int packet_size = opus_header_to_packet(&header, header_data, 100);
|
|
op.packet = header_data;
|
|
op.bytes = packet_size;
|
|
op.b_o_s = 1;
|
|
op.e_o_s = 0;
|
|
op.granulepos = 0;
|
|
op.packetno = 0;
|
|
ogg_stream_packetin(&os, &op);
|
|
|
|
while ((result = ogg_stream_flush(&os, &og)))
|
|
{
|
|
if (!result)
|
|
break;
|
|
|
|
int pageBytesWritten = writeOggPage(&og, _dataItem);
|
|
if (pageBytesWritten != og.header_len + og.body_len)
|
|
{
|
|
NSLog(@"Error: failed writing header to output stream");
|
|
return false;
|
|
}
|
|
bytes_written += pageBytesWritten;
|
|
pages_out++;
|
|
}
|
|
|
|
comment_pad(&inopt.comments, &inopt.comments_length, comment_padding);
|
|
op.packet = (unsigned char *)inopt.comments;
|
|
op.bytes = inopt.comments_length;
|
|
op.b_o_s = 0;
|
|
op.e_o_s = 0;
|
|
op.granulepos = 0;
|
|
op.packetno = 1;
|
|
ogg_stream_packetin(&os, &op);
|
|
}
|
|
|
|
// Writing the rest of the opus header packets
|
|
while ((result = ogg_stream_flush(&os, &og)))
|
|
{
|
|
if (result == 0)
|
|
break;
|
|
|
|
int writtenPageBytes = writeOggPage(&og, _dataItem);
|
|
if (writtenPageBytes != og.header_len + og.body_len)
|
|
{
|
|
NSLog(@"Error: failed writing header to output stream");
|
|
return false;
|
|
}
|
|
|
|
bytes_written += writtenPageBytes;
|
|
pages_out++;
|
|
}
|
|
|
|
free(inopt.comments);
|
|
|
|
return true;
|
|
}
|
|
|
|
- (bool)writeFrame:(uint8_t *)framePcmBytes frameByteCount:(NSUInteger)frameByteCount
|
|
{
|
|
// Main encoding loop (one frame per iteration)
|
|
nb_samples = -1;
|
|
|
|
int cur_frame_size = frame_size;
|
|
_packetId++;
|
|
|
|
if (nb_samples < 0)
|
|
{
|
|
nb_samples = (opus_int32)(frameByteCount / 2);
|
|
total_samples += nb_samples;
|
|
if (nb_samples < frame_size)
|
|
op.e_o_s = 1;
|
|
else
|
|
op.e_o_s = 0;
|
|
}
|
|
op.e_o_s |= eos;
|
|
|
|
int nbBytes = 0;
|
|
|
|
if (nb_samples != 0)
|
|
{
|
|
uint8_t *paddedFrameBytes = framePcmBytes;
|
|
bool freePaddedFrameBytes = false;
|
|
|
|
if (nb_samples < cur_frame_size)
|
|
{
|
|
paddedFrameBytes = malloc(cur_frame_size * 2);
|
|
freePaddedFrameBytes = true;
|
|
|
|
memcpy(paddedFrameBytes, framePcmBytes, frameByteCount);
|
|
memset(paddedFrameBytes + nb_samples * 2, 0, cur_frame_size * 2 - nb_samples * 2);
|
|
}
|
|
|
|
// Encode current frame
|
|
nbBytes = opus_encode(_encoder, (opus_int16 *)paddedFrameBytes, cur_frame_size, _packet, max_frame_bytes / 10);
|
|
if (freePaddedFrameBytes)
|
|
{
|
|
free(paddedFrameBytes);
|
|
paddedFrameBytes = NULL;
|
|
}
|
|
|
|
if (nbBytes < 0)
|
|
{
|
|
NSLog(@"Encoding failed: %s. Aborting.", opus_strerror(nbBytes));
|
|
return false;
|
|
}
|
|
|
|
nb_encoded += cur_frame_size;
|
|
enc_granulepos += cur_frame_size * 48000 / coding_rate;
|
|
total_bytes += nbBytes;
|
|
size_segments = (nbBytes + 255) / 255;
|
|
peak_bytes = MAX(nbBytes, peak_bytes);
|
|
min_bytes = MIN(nbBytes, min_bytes);
|
|
}
|
|
|
|
// Flush early if adding this packet would make us end up with a continued page which we wouldn't have otherwise
|
|
while ((((size_segments<=255)&&(last_segments+size_segments>255)) ||
|
|
(enc_granulepos-last_granulepos>max_ogg_delay)) &&
|
|
ogg_stream_flush_fill(&os, &og, 255 * 255))
|
|
{
|
|
if (ogg_page_packets(&og) != 0)
|
|
last_granulepos = ogg_page_granulepos(&og);
|
|
|
|
last_segments -= og.header[26];
|
|
int writtenPageBytes = writeOggPage(&og, _dataItem);
|
|
if (writtenPageBytes != og.header_len + og.body_len)
|
|
{
|
|
NSLog(@"Error: failed writing data to output stream");
|
|
return false;
|
|
}
|
|
bytes_written += writtenPageBytes;
|
|
pages_out++;
|
|
}
|
|
|
|
if (framePcmBytes != NULL) {
|
|
op.packet = (unsigned char *)_packet;
|
|
op.bytes = nbBytes;
|
|
op.b_o_s = 0;
|
|
op.granulepos = enc_granulepos;
|
|
|
|
if (op.e_o_s)
|
|
{
|
|
/* We compute the final GP as ceil(len*48k/input_rate). When a resampling
|
|
decoder does the matching floor(len*input/48k) conversion the length will
|
|
be exactly the same as the input.
|
|
*/
|
|
op.granulepos = ((total_samples * 48000 + rate - 1) / rate) + header.preskip;
|
|
}
|
|
op.packetno = 2 + _packetId;
|
|
ogg_stream_packetin(&os, &op);
|
|
last_segments += size_segments;
|
|
}
|
|
|
|
// If the stream is over or we're sure that the delayed flush will fire, go ahead and flush now to avoid adding delay
|
|
while ((op.e_o_s || (enc_granulepos + (frame_size * 48000 / coding_rate) - last_granulepos > max_ogg_delay) ||
|
|
(last_segments >= 255)) ? ogg_stream_flush_fill(&os, &og, 255 * 255) : ogg_stream_pageout_fill(&os, &og, 255 * 255))
|
|
{
|
|
if (ogg_page_packets(&og) != 0)
|
|
last_granulepos = ogg_page_granulepos(&og);
|
|
last_segments -= og.header[26];
|
|
int writtenPageBytes = writeOggPage(&og, _dataItem);
|
|
if (writtenPageBytes != og.header_len + og.body_len)
|
|
{
|
|
NSLog(@"Error: failed writing data to output stream");
|
|
return false;
|
|
}
|
|
bytes_written += writtenPageBytes;
|
|
pages_out++;
|
|
}
|
|
|
|
return true;
|
|
}
|
|
|
|
- (NSUInteger)encodedBytes
|
|
{
|
|
return (NSUInteger)bytes_written;
|
|
}
|
|
|
|
- (NSTimeInterval)encodedDuration
|
|
{
|
|
return total_samples / (NSTimeInterval)coding_rate;
|
|
}
|
|
|
|
@end
|
|
|
|
/*
|
|
Comments will be stored in the Vorbis style.
|
|
It is describled in the "Structure" section of
|
|
http://www.xiph.org/ogg/vorbis/doc/v-comment.html
|
|
|
|
However, Opus and other non-vorbis formats omit the "framing_bit".
|
|
|
|
The comment header is decoded as follows:
|
|
1) [vendor_length] = read an unsigned integer of 32 bits
|
|
2) [vendor_string] = read a UTF-8 vector as [vendor_length] octets
|
|
3) [user_comment_list_length] = read an unsigned integer of 32 bits
|
|
4) iterate [user_comment_list_length] times {
|
|
5) [length] = read an unsigned integer of 32 bits
|
|
6) this iteration's user comment = read a UTF-8 vector as [length] octets
|
|
}
|
|
7) done.
|
|
*/
|
|
|
|
#define readint(buf, base) (((buf[base+3]<<24)&0xff000000)| \
|
|
((buf[base+2]<<16)&0xff0000)| \
|
|
((buf[base+1]<<8)&0xff00)| \
|
|
(buf[base]&0xff))
|
|
#define writeint(buf, base, val) do{ buf[base+3]=((val)>>24)&0xff; \
|
|
buf[base+2]=((val)>>16)&0xff; \
|
|
buf[base+1]=((val)>>8)&0xff; \
|
|
buf[base]=(val)&0xff; \
|
|
}while(0)
|
|
|
|
static bool comment_init(char **comments, int *length, const char *vendor_string)
|
|
{
|
|
// The 'vendor' field should be the actual encoding library used
|
|
int vendor_length = (int)strlen(vendor_string);
|
|
int user_comment_list_length = 0;
|
|
int len = 8 + 4 + vendor_length + 4;
|
|
char *p = (char *)malloc(len);
|
|
memcpy(p, "OpusTags", 8);
|
|
writeint(p, 8, vendor_length);
|
|
memcpy(p + 12, vendor_string, vendor_length);
|
|
writeint(p, 12 + vendor_length, user_comment_list_length);
|
|
*length = len;
|
|
*comments = p;
|
|
|
|
return true;
|
|
}
|
|
|
|
__unused bool comment_add(char **comments, int* length, char *tag, char *val)
|
|
{
|
|
char *p = *comments;
|
|
int vendor_length = readint(p, 8);
|
|
int user_comment_list_length = readint(p, 8 + 4 + vendor_length);
|
|
int tag_len = (tag ? (int)strlen(tag) + 1 : 0);
|
|
int val_len = (int)strlen(val);
|
|
int len = (*length) + 4 + tag_len + val_len;
|
|
|
|
p = (char *)realloc(p, len);
|
|
|
|
writeint(p, *length, tag_len+val_len); /* length of comment */
|
|
if (tag)
|
|
{
|
|
memcpy(p + *length + 4, tag, tag_len); /* comment tag */
|
|
(p+*length+4)[tag_len-1] = '='; /* separator */
|
|
}
|
|
memcpy(p + *length + 4 + tag_len, val, val_len); /* comment */
|
|
writeint(p, 8 + 4 + vendor_length, user_comment_list_length + 1);
|
|
*comments = p;
|
|
*length = len;
|
|
|
|
return true;
|
|
}
|
|
|
|
static bool comment_pad(char **comments, int* length, int amount)
|
|
{
|
|
if (amount > 0)
|
|
{
|
|
char *p = *comments;
|
|
// Make sure there is at least amount worth of padding free, and round up to the maximum that fits in the current ogg segments
|
|
int newlen = (*length + amount + 255) / 255 * 255 - 1;
|
|
p = realloc(p, newlen);
|
|
for (int i = *length; i < newlen; i++)
|
|
{
|
|
p[i] = 0;
|
|
}
|
|
*comments = p;
|
|
*length = newlen;
|
|
}
|
|
|
|
return true;
|
|
}
|
|
|
|
#undef readint
|
|
#undef writeint
|