From d348e56436dc2dca562c59fe72dfe7fa0dd9451c Mon Sep 17 00:00:00 2001 From: Grishka Date: Thu, 7 Sep 2017 08:39:33 +0300 Subject: [PATCH] Added AGC on audio output because some echo cancellation implementations don't like loud audio in speakerphone mode; this should only be enabled when using the earpiece speaker, on devices that have one. Also, the AGC on the input is now configured with a much lower target level. --- EchoCanceller.cpp | 80 ++++++++++++++++++++++++++++++++-- EchoCanceller.h | 26 ++++++++++- OpusDecoder.cpp | 14 ++++++ OpusDecoder.h | 4 ++ VoIPController.cpp | 38 ++++++++++++---- VoIPController.h | 13 +++++- client/android/tg_voip_jni.cpp | 4 ++ logging.h | 7 ++- os/windows/CXWrapper.cpp | 4 ++ os/windows/CXWrapper.h | 1 + 10 files changed, 173 insertions(+), 18 deletions(-) diff --git a/EchoCanceller.cpp b/EchoCanceller.cpp index 2106532d2b..23ccc94245 100644 --- a/EchoCanceller.cpp +++ b/EchoCanceller.cpp @@ -97,12 +97,12 @@ EchoCanceller::EchoCanceller(bool enableAEC, bool enableNS, bool enableAGC){ if(enableAGC){ agc=WebRtcAgc_Create(); WebRtcAgcConfig agcConfig; - agcConfig.compressionGaindB = 9; + agcConfig.compressionGaindB = 20; agcConfig.limiterEnable = 1; - agcConfig.targetLevelDbfs = 3; - WebRtcAgc_Init(agc, 0, 255, kAgcModeAdaptiveAnalog, 48000); + agcConfig.targetLevelDbfs = 9; + WebRtcAgc_Init(agc, 0, 255, kAgcModeAdaptiveDigital, 48000); WebRtcAgc_set_config(agc, agcConfig); - agcMicLevel=128; + agcMicLevel=0; } #endif } @@ -354,3 +354,75 @@ void EchoCanceller::ProcessInput(unsigned char* data, unsigned char* out, size_t memcpy(samplesOut, bufIn->ibuf_const()->bands(0)[0], 960*2); } +AudioEffect::~AudioEffect(){ + +} + +void AudioEffect::SetPassThrough(bool passThrough){ + this->passThrough=passThrough; +} + +AutomaticGainControl::AutomaticGainControl(){ + splittingFilter=new webrtc::SplittingFilter(1, 3, 960); + splittingFilterIn=new webrtc::IFChannelBuffer(960, 1, 1); + splittingFilterOut=new webrtc::IFChannelBuffer(960, 1, 3); + + agc=WebRtcAgc_Create(); + WebRtcAgcConfig agcConfig; + agcConfig.compressionGaindB = 9; + agcConfig.limiterEnable = 1; + agcConfig.targetLevelDbfs = 3; + WebRtcAgc_Init(agc, 0, 255, kAgcModeAdaptiveDigital, 48000); + WebRtcAgc_set_config(agc, agcConfig); + agcMicLevel=0; +} + +AutomaticGainControl::~AutomaticGainControl(){ + delete (webrtc::SplittingFilter*)splittingFilter; + delete (webrtc::IFChannelBuffer*)splittingFilterIn; + delete (webrtc::IFChannelBuffer*)splittingFilterOut; + WebRtcAgc_Free(agc); +} + +void AutomaticGainControl::Process(int16_t *inOut, size_t numSamples){ + if(passThrough) + return; + if(numSamples!=960){ + LOGW("AutomaticGainControl only works on 960-sample buffers (got %u samples)", numSamples); + return; + } + //LOGV("processing frame through AGC"); + + webrtc::IFChannelBuffer* bufIn=(webrtc::IFChannelBuffer*) splittingFilterIn; + webrtc::IFChannelBuffer* bufOut=(webrtc::IFChannelBuffer*) splittingFilterOut; + + memcpy(bufIn->ibuf()->bands(0)[0], inOut, 960*2); + + ((webrtc::SplittingFilter*)splittingFilter)->Analysis(bufIn, bufOut); + + int i; + int16_t _agcOut[3][320]; + int16_t* agcIn[3]; + int16_t* agcOut[3]; + for(i=0;i<3;i++){ + agcIn[i]=(int16_t*)bufOut->ibuf_const()->bands(0)[i]; + agcOut[i]=_agcOut[i]; + } + uint8_t saturation; + WebRtcAgc_AddMic(agc, agcIn, 3, 160); + WebRtcAgc_Process(agc, (const int16_t *const *) agcIn, 3, 160, agcOut, agcMicLevel, &agcMicLevel, 0, &saturation); + for(i=0;i<3;i++){ + agcOut[i]+=160; + agcIn[i]+=160; + } + WebRtcAgc_AddMic(agc, agcIn, 3, 160); + WebRtcAgc_Process(agc, (const int16_t *const *) agcIn, 3, 160, agcOut, agcMicLevel, &agcMicLevel, 0, &saturation); + memcpy(bufOut->ibuf()->bands(0)[0], _agcOut[0], 320*2); + memcpy(bufOut->ibuf()->bands(0)[1], _agcOut[1], 320*2); + memcpy(bufOut->ibuf()->bands(0)[2], _agcOut[2], 320*2); + + ((webrtc::SplittingFilter*)splittingFilter)->Synthesis(bufOut, bufIn); + + memcpy(inOut, bufIn->ibuf_const()->bands(0)[0], 960*2); +} + diff --git a/EchoCanceller.h b/EchoCanceller.h index 982bd4abba..a915ec1846 100644 --- a/EchoCanceller.h +++ b/EchoCanceller.h @@ -10,6 +10,7 @@ #include "threading.h" #include "BufferPool.h" #include "BlockingQueue.h" +#include "MediaStreamItf.h" namespace tgvoip{ class EchoCanceller{ @@ -48,6 +49,29 @@ private: int32_t agcMicLevel; #endif }; -} + + class AudioEffect{ + public: + virtual ~AudioEffect()=0; + virtual void Process(int16_t* inOut, size_t numSamples)=0; + virtual void SetPassThrough(bool passThrough); + protected: + bool passThrough; + }; + + class AutomaticGainControl : public AudioEffect{ + public: + AutomaticGainControl(); + virtual ~AutomaticGainControl(); + virtual void Process(int16_t* inOut, size_t numSamples); + + private: + void* agc; + void* splittingFilter; + void* splittingFilterIn; + void* splittingFilterOut; + int32_t agcMicLevel; + }; +}; #endif //LIBTGVOIP_ECHOCANCELLER_H diff --git a/OpusDecoder.cpp b/OpusDecoder.cpp index 6f6626aff8..77fcb8dacc 100644 --- a/OpusDecoder.cpp +++ b/OpusDecoder.cpp @@ -8,6 +8,7 @@ #include "audio/Resampler.h" #include "logging.h" #include +#include #define PACKET_SIZE (960*2) @@ -222,6 +223,9 @@ void tgvoip::OpusDecoder::RunThread(){ unsigned char *buf=bufferPool->Get(); if(buf){ if(size>0){ + for(std::vector::iterator effect=postProcEffects.begin();effect!=postProcEffects.end();++effect){ + (*effect)->Process(reinterpret_cast(processedBuffer+(PACKET_SIZE*i)), 960); + } memcpy(buf, processedBuffer+(PACKET_SIZE*i), PACKET_SIZE); }else{ LOGE("Error decoding, result=%d", size); @@ -255,3 +259,13 @@ void tgvoip::OpusDecoder::ResetQueue(){ void tgvoip::OpusDecoder::SetJitterBuffer(JitterBuffer* jitterBuffer){ this->jitterBuffer=jitterBuffer; } + +void tgvoip::OpusDecoder::AddAudioEffect(AudioEffect *effect){ + postProcEffects.push_back(effect); +} + +void tgvoip::OpusDecoder::RemoveAudioEffect(AudioEffect *effect){ + std::vector::iterator i=std::find(postProcEffects.begin(), postProcEffects.end(), effect); + if(i!=postProcEffects.end()) + postProcEffects.erase(i); +} diff --git a/OpusDecoder.h b/OpusDecoder.h index 51bbbacde2..1a88ce5d76 100644 --- a/OpusDecoder.h +++ b/OpusDecoder.h @@ -16,6 +16,7 @@ #include "EchoCanceller.h" #include "JitterBuffer.h" #include +#include namespace tgvoip{ class OpusDecoder { @@ -31,6 +32,8 @@ public: void SetFrameDuration(uint32_t duration); void ResetQueue(); void SetJitterBuffer(JitterBuffer* jitterBuffer); + void AddAudioEffect(AudioEffect* effect); + void RemoveAudioEffect(AudioEffect* effect); private: static size_t Callback(unsigned char* data, size_t len, void* param); @@ -50,6 +53,7 @@ private: uint32_t frameDuration; EchoCanceller* echoCanceller; JitterBuffer* jitterBuffer; + std::vector postProcEffects; }; } diff --git a/VoIPController.cpp b/VoIPController.cpp index fd1a16d822..d3105ba5f4 100644 --- a/VoIPController.cpp +++ b/VoIPController.cpp @@ -233,6 +233,9 @@ VoIPController::VoIPController() : activeNetItfName(""), realUdpSocket=udpSocket; udpConnectivityState=UDP_UNKNOWN; + outputAGC=NULL; + outputAGCEnabled=false; + maxAudioBitrate=(uint32_t) ServerConfig::GetSharedInstance()->GetInt("audio_max_bitrate", 20000); maxAudioBitrateGPRS=(uint32_t) ServerConfig::GetSharedInstance()->GetInt("audio_max_bitrate_gprs", 8000); maxAudioBitrateEDGE=(uint32_t) ServerConfig::GetSharedInstance()->GetInt("audio_max_bitrate_edge", 16000); @@ -263,6 +266,8 @@ VoIPController::VoIPController() : activeNetItfName(""), stm->enabled=1; stm->frameDuration=60; outgoingStreams.push_back(stm); + + memset(signalBarsHistory, 0, sizeof(signalBarsHistory)); } VoIPController::~VoIPController(){ @@ -359,6 +364,8 @@ VoIPController::~VoIPController(){ if(resolvedProxyAddress) delete resolvedProxyAddress; delete selectCanceller; + if(outputAGC) + delete outputAGC; LOGD("Left VoIPController::~VoIPController"); } @@ -1165,10 +1172,13 @@ simpleAudioBlock random_id:long random_bytes:string raw_data:string = DecryptedA UpdateAudioBitrate(); jitterBuffer=new JitterBuffer(NULL, incomingAudioStream->frameDuration); + outputAGC=new AutomaticGainControl(); + outputAGC->SetPassThrough(!outputAGCEnabled); decoder=new OpusDecoder(audioOutput); decoder->SetEchoCanceller(echoCanceller); decoder->SetJitterBuffer(jitterBuffer); decoder->SetFrameDuration(incomingAudioStream->frameDuration); + decoder->AddAudioEffect(outputAGC); decoder->Start(); if(incomingAudioStream->frameDuration>50) jitterBuffer->SetMinPacketCount((uint32_t) ServerConfig::GetSharedInstance()->GetInt("jitter_initial_delay_60", 3)); @@ -1226,7 +1236,7 @@ simpleAudioBlock random_id:long random_bytes:string raw_data:string = DecryptedA audioOutput->Start(); audioOutStarted=true; } - if(jitterBuffer) + if(jitterBuffer && in.Remaining()>=sdlen) jitterBuffer->HandleInput((unsigned char*) (buffer+in.GetOffset()), sdlen, pts); if(iGetAverageDelay(); double avgLateCount[3]; jitterBuffer->GetAverageLateCount(avgLateCount); - if(avgDelay>=5) + /*if(avgDelay>=5) signalBarCount=1; else if(avgDelay>=4) signalBarCount=MIN(signalBarCount, 2); else if(avgDelay>=3) - signalBarCount=MIN(signalBarCount, 3); + signalBarCount=MIN(signalBarCount, 3);*/ if(avgLateCount[2]>=0.2) signalBarCount=1; @@ -1694,10 +1704,12 @@ void VoIPController::RunTickThread(){ setEstablishedAt=0; } - if(signalBarCount!=prevSignalBarCount){ - LOGD("SIGNAL BAR COUNT CHANGED: %d", signalBarCount); + signalBarsHistory[tickCount%sizeof(signalBarsHistory)]=(unsigned char)signalBarCount; + int _signalBarCount=GetSignalBarsCount(); + if(_signalBarCount!=prevSignalBarCount){ + LOGD("SIGNAL BAR COUNT CHANGED: %d", _signalBarCount); if(signalBarCountCallback) - signalBarCountCallback(this, signalBarCount); + signalBarCountCallback(this, _signalBarCount); } @@ -2443,13 +2455,23 @@ void VoIPController::SendUdpPing(Endpoint *endpoint){ } int VoIPController::GetSignalBarsCount(){ - return signalBarCount; + unsigned char avg=0; + for(int i=0;i> 2; } void VoIPController::SetSignalBarsCountCallback(void (*f)(VoIPController *, int)){ signalBarCountCallback=f; } +void VoIPController::SetAudioOutputGainControlEnabled(bool enabled){ + LOGD("New output AGC state: %d", enabled); + outputAGCEnabled=enabled; + if(outputAGC) + outputAGC->SetPassThrough(!enabled); +} + Endpoint::Endpoint(int64_t id, uint16_t port, IPv4Address& _address, IPv6Address& _v6address, char type, unsigned char peerTag[16]) : address(_address), v6address(_v6address){ this->id=id; this->port=port; diff --git a/VoIPController.h b/VoIPController.h index 40feb58bac..b581204397 100644 --- a/VoIPController.h +++ b/VoIPController.h @@ -317,7 +317,7 @@ public: std::string GetCurrentAudioOutputID(); /** * Set the proxy server to route the data through. Call this before connecting. - * @param protocol PROXY_NONE, PROXY_SOCKS4, or PROXY_SOCKS5 + * @param protocol PROXY_NONE or PROXY_SOCKS5 * @param address IP address or domain name of the server * @param port Port of the server * @param username Username; empty string for anonymous @@ -334,6 +334,13 @@ public: * @param f */ void SetSignalBarsCountCallback(void (*f)(VoIPController*, int)); + /** + * Enable or disable AGC (automatic gain control) on audio output. Should only be enabled on phones when the earpiece speaker is being used. + * The audio output will be louder with this on. + * AGC with speakerphone or other kinds of loud speakers has detrimental effects on some echo cancellation implementations. + * @param enabled I usually pick argument names to be self-explanatory + */ + void SetAudioOutputGainControlEnabled(bool enabled); private: struct PendingOutgoingPacket{ @@ -451,6 +458,7 @@ private: double setEstablishedAt; SocketSelectCanceller* selectCanceller; NetworkSocket* openingTcpSocket; + unsigned char signalBarsHistory[4]; BufferPool outgoingPacketsBufferPool; int udpConnectivityState; @@ -466,6 +474,9 @@ private: int signalBarCount; void (*signalBarCountCallback)(VoIPController*, int); + + AutomaticGainControl* outputAGC; + bool outputAGCEnabled; /*** server config values ***/ uint32_t maxAudioBitrate; diff --git a/client/android/tg_voip_jni.cpp b/client/android/tg_voip_jni.cpp index dcb18dff41..f88b338d7c 100644 --- a/client/android/tg_voip_jni.cpp +++ b/client/android/tg_voip_jni.cpp @@ -299,6 +299,10 @@ extern "C" JNIEXPORT jstring Java_org_telegram_messenger_voip_VoIPController_nat return env->NewStringUTF(log.c_str()); } +extern "C" JNIEXPORT void Java_org_telegram_messenger_voip_VoIPController_nativeSetAudioOutputGainControlEnabled(JNIEnv* env, jclass clasz, jlong inst, jboolean enabled){ + ((VoIPController*)(intptr_t)inst)->SetAudioOutputGainControlEnabled(enabled); +} + extern "C" JNIEXPORT jint Java_org_telegram_messenger_voip_Resampler_convert44to48(JNIEnv* env, jclass cls, jobject from, jobject to){ return tgvoip::audio::Resampler::Convert44To48((int16_t *) env->GetDirectBufferAddress(from), (int16_t *) env->GetDirectBufferAddress(to), (size_t) (env->GetDirectBufferCapacity(from)/2), (size_t) (env->GetDirectBufferCapacity(to)/2)); } diff --git a/logging.h b/logging.h index cbe811bb56..3168bc491d 100644 --- a/logging.h +++ b/logging.h @@ -44,10 +44,6 @@ void tgvoip_log_file_write_header(); #include #include -#if !defined(snprintf) && defined(_WIN32) && defined(__cplusplus_winrt) -#define snprintf _snprintf -#endif - #define _TGVOIP_W32_LOG_PRINT(verb, msg, ...){ char __log_buf[1024]; snprintf(__log_buf, 1024, "%c/tgvoip: " msg "\n", verb, ##__VA_ARGS__); OutputDebugStringA(__log_buf); tgvoip_log_file_printf((char)verb, msg, __VA_ARGS__);} #define LOGV(msg, ...) _TGVOIP_W32_LOG_PRINT('V', msg, ##__VA_ARGS__) @@ -70,6 +66,9 @@ void tgvoip_log_file_write_header(); #endif +#if !defined(snprintf) && defined(_WIN32) && defined(__cplusplus_winrt) +#define snprintf _snprintf +#endif #ifdef TGVOIP_LOG_VERBOSITY #if TGVOIP_LOG_VERBOSITY<5 diff --git a/os/windows/CXWrapper.cpp b/os/windows/CXWrapper.cpp index 818ec5370b..bbc1279ef5 100755 --- a/os/windows/CXWrapper.cpp +++ b/os/windows/CXWrapper.cpp @@ -180,6 +180,10 @@ void VoIPControllerWrapper::SetProxy(ProxyProtocol protocol, Platform::String^ a controller->SetProxy((int)protocol, _address, port, _username, _password); } +void VoIPControllerWrapper::SetAudioOutputGainControlEnabled(bool enabled){ + controller->SetAudioOutputGainControlEnabled(enabled); +} + void VoIPControllerWrapper::UpdateServerConfig(Platform::String^ json){ JsonObject^ jconfig=JsonValue::Parse(json)->GetObject(); std::map config; diff --git a/os/windows/CXWrapper.h b/os/windows/CXWrapper.h index cc3e0033b0..38e0f0b4d6 100755 --- a/os/windows/CXWrapper.h +++ b/os/windows/CXWrapper.h @@ -81,6 +81,7 @@ namespace libtgvoip{ Error GetLastError(); static Platform::String^ GetVersion(); int64 GetPreferredRelayID(); + void SetAudioOutputGainControlEnabled(bool enabled); static void UpdateServerConfig(Platform::String^ json); static void SwitchSpeaker(bool external); //static Platform::String^ TestAesIge();