import Foundation import CoreMedia import FFMpegBinding final class FFMpegAudioFrameDecoder: MediaTrackFrameDecoder { private let codecContext: FFMpegAVCodecContext private let swrContext: FFMpegSWResample private let audioFrame: FFMpegAVFrame private var resetDecoderOnNextFrame = true private let formatDescription: CMAudioFormatDescription private var delayedFrames: [MediaTrackFrame] = [] init(codecContext: FFMpegAVCodecContext, sampleRate: Int = 44100, channelCount: Int = 2) { self.codecContext = codecContext self.audioFrame = FFMpegAVFrame() self.swrContext = FFMpegSWResample(sourceChannelCount: Int(codecContext.channels()), sourceSampleRate: Int(codecContext.sampleRate()), sourceSampleFormat: codecContext.sampleFormat(), destinationChannelCount: channelCount, destinationSampleRate: sampleRate, destinationSampleFormat: FFMPEG_AV_SAMPLE_FMT_S16) var outputDescription = AudioStreamBasicDescription( mSampleRate: Float64(sampleRate), mFormatID: kAudioFormatLinearPCM, mFormatFlags: kAudioFormatFlagIsSignedInteger | kAudioFormatFlagsNativeEndian | kAudioFormatFlagIsPacked, mBytesPerPacket: UInt32(2 * channelCount), mFramesPerPacket: 1, mBytesPerFrame: UInt32(2 * channelCount), mChannelsPerFrame: UInt32(channelCount), mBitsPerChannel: 16, mReserved: 0 ) var channelLayout = AudioChannelLayout() memset(&channelLayout, 0, MemoryLayout.size) channelLayout.mChannelLayoutTag = kAudioChannelLayoutTag_Mono var formatDescription: CMAudioFormatDescription? CMAudioFormatDescriptionCreate(allocator: nil, asbd: &outputDescription, layoutSize: MemoryLayout.size, layout: &channelLayout, magicCookieSize: 0, magicCookie: nil, extensions: nil, formatDescriptionOut: &formatDescription) self.formatDescription = formatDescription! } func decodeRaw(frame: MediaTrackDecodableFrame) -> Data? { let status = frame.packet.send(toDecoder: self.codecContext) if status == 0 { let result = self.codecContext.receive(into: self.audioFrame) if case .success = result { guard let data = self.swrContext.resample(self.audioFrame) else { return nil } return data } else { return nil } } else { return nil } } func decode(frame: MediaTrackDecodableFrame) -> MediaTrackFrame? { let status = frame.packet.send(toDecoder: self.codecContext) if status == 0 { while true { let result = self.codecContext.receive(into: self.audioFrame) if case .success = result { if let convertedFrame = convertAudioFrame(self.audioFrame, pts: frame.pts) { self.delayedFrames.append(convertedFrame) } } else { break } } if self.delayedFrames.count >= 1 { var minFrameIndex = 0 var minPosition = self.delayedFrames[0].position for i in 1 ..< self.delayedFrames.count { if CMTimeCompare(self.delayedFrames[i].position, minPosition) < 0 { minFrameIndex = i minPosition = self.delayedFrames[i].position } } return self.delayedFrames.remove(at: minFrameIndex) } } return nil } func takeQueuedFrame() -> MediaTrackFrame? { if self.delayedFrames.count >= 1 { var minFrameIndex = 0 var minPosition = self.delayedFrames[0].position for i in 1 ..< self.delayedFrames.count { if CMTimeCompare(self.delayedFrames[i].position, minPosition) < 0 { minFrameIndex = i minPosition = self.delayedFrames[i].position } } return self.delayedFrames.remove(at: minFrameIndex) } else { return nil } } func takeRemainingFrame() -> MediaTrackFrame? { if !self.delayedFrames.isEmpty { var minFrameIndex = 0 var minPosition = self.delayedFrames[0].position for i in 1 ..< self.delayedFrames.count { if CMTimeCompare(self.delayedFrames[i].position, minPosition) < 0 { minFrameIndex = i minPosition = self.delayedFrames[i].position } } return self.delayedFrames.remove(at: minFrameIndex) } else { return nil } } private func convertAudioFrame(_ frame: FFMpegAVFrame, pts: CMTime) -> MediaTrackFrame? { guard let data = self.swrContext.resample(frame) else { return nil } var blockBuffer: CMBlockBuffer? let bytes = malloc(data.count)! data.copyBytes(to: bytes.assumingMemoryBound(to: UInt8.self), count: data.count) let status = CMBlockBufferCreateWithMemoryBlock(allocator: nil, memoryBlock: bytes, blockLength: data.count, blockAllocator: nil, customBlockSource: nil, offsetToData: 0, dataLength: data.count, flags: 0, blockBufferOut: &blockBuffer) if status != noErr { return nil } var sampleBuffer: CMSampleBuffer? guard CMAudioSampleBufferCreateReadyWithPacketDescriptions(allocator: nil, dataBuffer: blockBuffer!, formatDescription: self.formatDescription, sampleCount: Int(data.count / 2), presentationTimeStamp: pts, packetDescriptions: nil, sampleBufferOut: &sampleBuffer) == noErr else { return nil } let resetDecoder = self.resetDecoderOnNextFrame self.resetDecoderOnNextFrame = false return MediaTrackFrame(type: .audio, sampleBuffer: sampleBuffer!, resetDecoder: resetDecoder, decoded: true) } func reset() { self.codecContext.flushBuffers() self.resetDecoderOnNextFrame = true } }