From d348e56436dc2dca562c59fe72dfe7fa0dd9451c Mon Sep 17 00:00:00 2001
From: Grishka <grishka93@gmail.com>
Date: Thu, 7 Sep 2017 08:39:33 +0300
Subject: [PATCH] Added AGC on audio output because some echo cancellation
 implementations don't like loud audio in speakerphone mode; this should only
 be enabled when using the earpiece speaker, on devices that have one. Also,
 the AGC on the input is now configured with a much lower target level.

---
 EchoCanceller.cpp              | 80 ++++++++++++++++++++++++++++++++--
 EchoCanceller.h                | 26 ++++++++++-
 OpusDecoder.cpp                | 14 ++++++
 OpusDecoder.h                  |  4 ++
 VoIPController.cpp             | 38 ++++++++++++----
 VoIPController.h               | 13 +++++-
 client/android/tg_voip_jni.cpp |  4 ++
 logging.h                      |  7 ++-
 os/windows/CXWrapper.cpp       |  4 ++
 os/windows/CXWrapper.h         |  1 +
 10 files changed, 173 insertions(+), 18 deletions(-)

diff --git a/EchoCanceller.cpp b/EchoCanceller.cpp
index 2106532d2b..23ccc94245 100644
--- a/EchoCanceller.cpp
+++ b/EchoCanceller.cpp
@@ -97,12 +97,12 @@ EchoCanceller::EchoCanceller(bool enableAEC, bool enableNS, bool enableAGC){
 	if(enableAGC){
 		agc=WebRtcAgc_Create();
 		WebRtcAgcConfig agcConfig;
-		agcConfig.compressionGaindB = 9;
+		agcConfig.compressionGaindB = 20;
 		agcConfig.limiterEnable = 1;
-		agcConfig.targetLevelDbfs = 3;
-		WebRtcAgc_Init(agc, 0, 255, kAgcModeAdaptiveAnalog, 48000);
+		agcConfig.targetLevelDbfs = 9;
+		WebRtcAgc_Init(agc, 0, 255, kAgcModeAdaptiveDigital, 48000);
 		WebRtcAgc_set_config(agc, agcConfig);
-		agcMicLevel=128;
+		agcMicLevel=0;
 	}
 #endif
 }
@@ -354,3 +354,75 @@ void EchoCanceller::ProcessInput(unsigned char* data, unsigned char* out, size_t
 	memcpy(samplesOut, bufIn->ibuf_const()->bands(0)[0], 960*2);
 }
 
+AudioEffect::~AudioEffect(){
+
+}
+
+void AudioEffect::SetPassThrough(bool passThrough){
+	this->passThrough=passThrough;
+}
+
+AutomaticGainControl::AutomaticGainControl(){
+	splittingFilter=new webrtc::SplittingFilter(1, 3, 960);
+	splittingFilterIn=new webrtc::IFChannelBuffer(960, 1, 1);
+	splittingFilterOut=new webrtc::IFChannelBuffer(960, 1, 3);
+
+	agc=WebRtcAgc_Create();
+	WebRtcAgcConfig agcConfig;
+	agcConfig.compressionGaindB = 9;
+	agcConfig.limiterEnable = 1;
+	agcConfig.targetLevelDbfs = 3;
+	WebRtcAgc_Init(agc, 0, 255, kAgcModeAdaptiveDigital, 48000);
+	WebRtcAgc_set_config(agc, agcConfig);
+	agcMicLevel=0;
+}
+
+AutomaticGainControl::~AutomaticGainControl(){
+	delete (webrtc::SplittingFilter*)splittingFilter;
+	delete (webrtc::IFChannelBuffer*)splittingFilterIn;
+	delete (webrtc::IFChannelBuffer*)splittingFilterOut;
+	WebRtcAgc_Free(agc);
+}
+
+void AutomaticGainControl::Process(int16_t *inOut, size_t numSamples){
+	if(passThrough)
+		return;
+	if(numSamples!=960){
+		LOGW("AutomaticGainControl only works on 960-sample buffers (got %u samples)", numSamples);
+		return;
+	}
+	//LOGV("processing frame through AGC");
+
+	webrtc::IFChannelBuffer* bufIn=(webrtc::IFChannelBuffer*) splittingFilterIn;
+	webrtc::IFChannelBuffer* bufOut=(webrtc::IFChannelBuffer*) splittingFilterOut;
+
+	memcpy(bufIn->ibuf()->bands(0)[0], inOut, 960*2);
+
+	((webrtc::SplittingFilter*)splittingFilter)->Analysis(bufIn, bufOut);
+
+	int i;
+	int16_t _agcOut[3][320];
+	int16_t* agcIn[3];
+	int16_t* agcOut[3];
+	for(i=0;i<3;i++){
+		agcIn[i]=(int16_t*)bufOut->ibuf_const()->bands(0)[i];
+		agcOut[i]=_agcOut[i];
+	}
+	uint8_t saturation;
+	WebRtcAgc_AddMic(agc, agcIn, 3, 160);
+	WebRtcAgc_Process(agc, (const int16_t *const *) agcIn, 3, 160, agcOut, agcMicLevel, &agcMicLevel, 0, &saturation);
+	for(i=0;i<3;i++){
+		agcOut[i]+=160;
+		agcIn[i]+=160;
+	}
+	WebRtcAgc_AddMic(agc, agcIn, 3, 160);
+	WebRtcAgc_Process(agc, (const int16_t *const *) agcIn, 3, 160, agcOut, agcMicLevel, &agcMicLevel, 0, &saturation);
+	memcpy(bufOut->ibuf()->bands(0)[0], _agcOut[0], 320*2);
+	memcpy(bufOut->ibuf()->bands(0)[1], _agcOut[1], 320*2);
+	memcpy(bufOut->ibuf()->bands(0)[2], _agcOut[2], 320*2);
+
+	((webrtc::SplittingFilter*)splittingFilter)->Synthesis(bufOut, bufIn);
+
+	memcpy(inOut, bufIn->ibuf_const()->bands(0)[0], 960*2);
+}
+
diff --git a/EchoCanceller.h b/EchoCanceller.h
index 982bd4abba..a915ec1846 100644
--- a/EchoCanceller.h
+++ b/EchoCanceller.h
@@ -10,6 +10,7 @@
 #include "threading.h"
 #include "BufferPool.h"
 #include "BlockingQueue.h"
+#include "MediaStreamItf.h"
 
 namespace tgvoip{
 class EchoCanceller{
@@ -48,6 +49,29 @@ private:
 	int32_t agcMicLevel;
 #endif
 };
-}
+
+	class AudioEffect{
+	public:
+		virtual ~AudioEffect()=0;
+		virtual void Process(int16_t* inOut, size_t numSamples)=0;
+		virtual void SetPassThrough(bool passThrough);
+	protected:
+		bool passThrough;
+	};
+
+	class AutomaticGainControl : public AudioEffect{
+	public:
+		AutomaticGainControl();
+		virtual ~AutomaticGainControl();
+		virtual void Process(int16_t* inOut, size_t numSamples);
+
+	private:
+		void* agc;
+		void* splittingFilter;
+		void* splittingFilterIn;
+		void* splittingFilterOut;
+		int32_t agcMicLevel;
+	};
+};
 
 #endif //LIBTGVOIP_ECHOCANCELLER_H
diff --git a/OpusDecoder.cpp b/OpusDecoder.cpp
index 6f6626aff8..77fcb8dacc 100644
--- a/OpusDecoder.cpp
+++ b/OpusDecoder.cpp
@@ -8,6 +8,7 @@
 #include "audio/Resampler.h"
 #include "logging.h"
 #include <assert.h>
+#include <algorithm>
 
 #define PACKET_SIZE (960*2)
 
@@ -222,6 +223,9 @@ void tgvoip::OpusDecoder::RunThread(){
 			unsigned char *buf=bufferPool->Get();
 			if(buf){
 				if(size>0){
+					for(std::vector<AudioEffect*>::iterator effect=postProcEffects.begin();effect!=postProcEffects.end();++effect){
+						(*effect)->Process(reinterpret_cast<int16_t*>(processedBuffer+(PACKET_SIZE*i)), 960);
+					}
 					memcpy(buf, processedBuffer+(PACKET_SIZE*i), PACKET_SIZE);
 				}else{
 					LOGE("Error decoding, result=%d", size);
@@ -255,3 +259,13 @@ void tgvoip::OpusDecoder::ResetQueue(){
 void tgvoip::OpusDecoder::SetJitterBuffer(JitterBuffer* jitterBuffer){
 	this->jitterBuffer=jitterBuffer;
 }
+
+void tgvoip::OpusDecoder::AddAudioEffect(AudioEffect *effect){
+	postProcEffects.push_back(effect);
+}
+
+void tgvoip::OpusDecoder::RemoveAudioEffect(AudioEffect *effect){
+	std::vector<AudioEffect*>::iterator i=std::find(postProcEffects.begin(), postProcEffects.end(), effect);
+	if(i!=postProcEffects.end())
+		postProcEffects.erase(i);
+}
diff --git a/OpusDecoder.h b/OpusDecoder.h
index 51bbbacde2..1a88ce5d76 100644
--- a/OpusDecoder.h
+++ b/OpusDecoder.h
@@ -16,6 +16,7 @@
 #include "EchoCanceller.h"
 #include "JitterBuffer.h"
 #include <stdio.h>
+#include <vector>
 
 namespace tgvoip{
 class OpusDecoder {
@@ -31,6 +32,8 @@ public:
 	void SetFrameDuration(uint32_t duration);
 	void ResetQueue();
 	void SetJitterBuffer(JitterBuffer* jitterBuffer);
+	void AddAudioEffect(AudioEffect* effect);
+	void RemoveAudioEffect(AudioEffect* effect);
 
 private:
 	static size_t Callback(unsigned char* data, size_t len, void* param);
@@ -50,6 +53,7 @@ private:
 	uint32_t frameDuration;
 	EchoCanceller* echoCanceller;
 	JitterBuffer* jitterBuffer;
+	std::vector<AudioEffect*> postProcEffects;
 };
 }
 
diff --git a/VoIPController.cpp b/VoIPController.cpp
index fd1a16d822..d3105ba5f4 100644
--- a/VoIPController.cpp
+++ b/VoIPController.cpp
@@ -233,6 +233,9 @@ VoIPController::VoIPController() : activeNetItfName(""),
 	realUdpSocket=udpSocket;
 	udpConnectivityState=UDP_UNKNOWN;
 
+	outputAGC=NULL;
+	outputAGCEnabled=false;
+
 	maxAudioBitrate=(uint32_t) ServerConfig::GetSharedInstance()->GetInt("audio_max_bitrate", 20000);
 	maxAudioBitrateGPRS=(uint32_t) ServerConfig::GetSharedInstance()->GetInt("audio_max_bitrate_gprs", 8000);
 	maxAudioBitrateEDGE=(uint32_t) ServerConfig::GetSharedInstance()->GetInt("audio_max_bitrate_edge", 16000);
@@ -263,6 +266,8 @@ VoIPController::VoIPController() : activeNetItfName(""),
 	stm->enabled=1;
 	stm->frameDuration=60;
 	outgoingStreams.push_back(stm);
+										
+	memset(signalBarsHistory, 0, sizeof(signalBarsHistory));
 }
 
 VoIPController::~VoIPController(){
@@ -359,6 +364,8 @@ VoIPController::~VoIPController(){
 	if(resolvedProxyAddress)
 		delete resolvedProxyAddress;
 	delete selectCanceller;
+	if(outputAGC)
+		delete outputAGC;
 	LOGD("Left VoIPController::~VoIPController");
 }
 
@@ -1165,10 +1172,13 @@ simpleAudioBlock random_id:long random_bytes:string raw_data:string = DecryptedA
 					UpdateAudioBitrate();
 
 					jitterBuffer=new JitterBuffer(NULL, incomingAudioStream->frameDuration);
+					outputAGC=new AutomaticGainControl();
+					outputAGC->SetPassThrough(!outputAGCEnabled);
 					decoder=new OpusDecoder(audioOutput);
 					decoder->SetEchoCanceller(echoCanceller);
 					decoder->SetJitterBuffer(jitterBuffer);
 					decoder->SetFrameDuration(incomingAudioStream->frameDuration);
+					decoder->AddAudioEffect(outputAGC);
 					decoder->Start();
 					if(incomingAudioStream->frameDuration>50)
 						jitterBuffer->SetMinPacketCount((uint32_t) ServerConfig::GetSharedInstance()->GetInt("jitter_initial_delay_60", 3));
@@ -1226,7 +1236,7 @@ simpleAudioBlock random_id:long random_bytes:string raw_data:string = DecryptedA
 					audioOutput->Start();
 					audioOutStarted=true;
 				}
-				if(jitterBuffer)
+				if(jitterBuffer && in.Remaining()>=sdlen)
 					jitterBuffer->HandleInput((unsigned char*) (buffer+in.GetOffset()), sdlen, pts);
 				if(i<count-1)
 					in.Seek(in.GetOffset()+sdlen);
@@ -1359,7 +1369,7 @@ void VoIPController::RunTickThread(){
 #else
 		Sleep(100);
 #endif
-		int prevSignalBarCount=signalBarCount;
+		int prevSignalBarCount=GetSignalBarsCount();
 		signalBarCount=4;
 		tickCount++;
 		if(connectionInitTime==0)
@@ -1542,12 +1552,12 @@ void VoIPController::RunTickThread(){
 			double avgDelay=jitterBuffer->GetAverageDelay();
 			double avgLateCount[3];
 			jitterBuffer->GetAverageLateCount(avgLateCount);
-			if(avgDelay>=5)
+			/*if(avgDelay>=5)
 				signalBarCount=1;
 			else if(avgDelay>=4)
 				signalBarCount=MIN(signalBarCount, 2);
 			else if(avgDelay>=3)
-				signalBarCount=MIN(signalBarCount, 3);
+				signalBarCount=MIN(signalBarCount, 3);*/
 
 			if(avgLateCount[2]>=0.2)
 				signalBarCount=1;
@@ -1694,10 +1704,12 @@ void VoIPController::RunTickThread(){
 			setEstablishedAt=0;
 		}
 
-		if(signalBarCount!=prevSignalBarCount){
-			LOGD("SIGNAL BAR COUNT CHANGED: %d", signalBarCount);
+		signalBarsHistory[tickCount%sizeof(signalBarsHistory)]=(unsigned char)signalBarCount;
+		int _signalBarCount=GetSignalBarsCount();
+		if(_signalBarCount!=prevSignalBarCount){
+			LOGD("SIGNAL BAR COUNT CHANGED: %d", _signalBarCount);
 			if(signalBarCountCallback)
-				signalBarCountCallback(this, signalBarCount);
+				signalBarCountCallback(this, _signalBarCount);
 		}
 
 
@@ -2443,13 +2455,23 @@ void VoIPController::SendUdpPing(Endpoint *endpoint){
 }
 
 int VoIPController::GetSignalBarsCount(){
-	return signalBarCount;
+	unsigned char avg=0;
+	for(int i=0;i<sizeof(signalBarsHistory);i++)
+		avg+=signalBarsHistory[i];
+	return avg >> 2;
 }
 
 void VoIPController::SetSignalBarsCountCallback(void (*f)(VoIPController *, int)){
 	signalBarCountCallback=f;
 }
 
+void VoIPController::SetAudioOutputGainControlEnabled(bool enabled){
+	LOGD("New output AGC state: %d", enabled);
+	outputAGCEnabled=enabled;
+	if(outputAGC)
+		outputAGC->SetPassThrough(!enabled);
+}
+
 Endpoint::Endpoint(int64_t id, uint16_t port, IPv4Address& _address, IPv6Address& _v6address, char type, unsigned char peerTag[16]) : address(_address), v6address(_v6address){
 	this->id=id;
 	this->port=port;
diff --git a/VoIPController.h b/VoIPController.h
index 40feb58bac..b581204397 100644
--- a/VoIPController.h
+++ b/VoIPController.h
@@ -317,7 +317,7 @@ public:
 	std::string GetCurrentAudioOutputID();
 	/**
 	 * Set the proxy server to route the data through. Call this before connecting.
-	 * @param protocol PROXY_NONE, PROXY_SOCKS4, or PROXY_SOCKS5
+	 * @param protocol PROXY_NONE or PROXY_SOCKS5
 	 * @param address IP address or domain name of the server
 	 * @param port Port of the server
 	 * @param username Username; empty string for anonymous
@@ -334,6 +334,13 @@ public:
 		 * @param f
 		 */
 		void SetSignalBarsCountCallback(void (*f)(VoIPController*, int));
+		/**
+		 * Enable or disable AGC (automatic gain control) on audio output. Should only be enabled on phones when the earpiece speaker is being used.
+		 * The audio output will be louder with this on.
+		 * AGC with speakerphone or other kinds of loud speakers has detrimental effects on some echo cancellation implementations.
+		 * @param enabled I usually pick argument names to be self-explanatory
+		 */
+		void SetAudioOutputGainControlEnabled(bool enabled);
 
 private:
 	struct PendingOutgoingPacket{
@@ -451,6 +458,7 @@ private:
 	double setEstablishedAt;
 	SocketSelectCanceller* selectCanceller;
 	NetworkSocket* openingTcpSocket;
+	unsigned char signalBarsHistory[4];
 
 	BufferPool outgoingPacketsBufferPool;
 	int udpConnectivityState;
@@ -466,6 +474,9 @@ private:
 
 		int signalBarCount;
 		void (*signalBarCountCallback)(VoIPController*, int);
+
+		AutomaticGainControl* outputAGC;
+		bool outputAGCEnabled;
 	
 	/*** server config values ***/
 	uint32_t maxAudioBitrate;
diff --git a/client/android/tg_voip_jni.cpp b/client/android/tg_voip_jni.cpp
index dcb18dff41..f88b338d7c 100644
--- a/client/android/tg_voip_jni.cpp
+++ b/client/android/tg_voip_jni.cpp
@@ -299,6 +299,10 @@ extern "C" JNIEXPORT jstring Java_org_telegram_messenger_voip_VoIPController_nat
 	return env->NewStringUTF(log.c_str());
 }
 
+extern "C" JNIEXPORT void Java_org_telegram_messenger_voip_VoIPController_nativeSetAudioOutputGainControlEnabled(JNIEnv* env, jclass clasz, jlong inst, jboolean enabled){
+	((VoIPController*)(intptr_t)inst)->SetAudioOutputGainControlEnabled(enabled);
+}
+
 extern "C" JNIEXPORT jint Java_org_telegram_messenger_voip_Resampler_convert44to48(JNIEnv* env, jclass cls, jobject from, jobject to){
 	return tgvoip::audio::Resampler::Convert44To48((int16_t *) env->GetDirectBufferAddress(from), (int16_t *) env->GetDirectBufferAddress(to), (size_t) (env->GetDirectBufferCapacity(from)/2), (size_t) (env->GetDirectBufferCapacity(to)/2));
 }
diff --git a/logging.h b/logging.h
index cbe811bb56..3168bc491d 100644
--- a/logging.h
+++ b/logging.h
@@ -44,10 +44,6 @@ void tgvoip_log_file_write_header();
 #include <windows.h>
 #include <stdio.h>
 
-#if !defined(snprintf) && defined(_WIN32) && defined(__cplusplus_winrt)
-#define snprintf _snprintf
-#endif
-
 #define _TGVOIP_W32_LOG_PRINT(verb, msg, ...){ char __log_buf[1024]; snprintf(__log_buf, 1024, "%c/tgvoip: " msg "\n", verb, ##__VA_ARGS__); OutputDebugStringA(__log_buf); tgvoip_log_file_printf((char)verb, msg, __VA_ARGS__);}
 
 #define LOGV(msg, ...) _TGVOIP_W32_LOG_PRINT('V', msg, ##__VA_ARGS__)
@@ -70,6 +66,9 @@ void tgvoip_log_file_write_header();
 
 #endif
 
+#if !defined(snprintf) && defined(_WIN32) && defined(__cplusplus_winrt)
+#define snprintf _snprintf
+#endif
 
 #ifdef TGVOIP_LOG_VERBOSITY
 #if TGVOIP_LOG_VERBOSITY<5
diff --git a/os/windows/CXWrapper.cpp b/os/windows/CXWrapper.cpp
index 818ec5370b..bbc1279ef5 100755
--- a/os/windows/CXWrapper.cpp
+++ b/os/windows/CXWrapper.cpp
@@ -180,6 +180,10 @@ void VoIPControllerWrapper::SetProxy(ProxyProtocol protocol, Platform::String^ a
 	controller->SetProxy((int)protocol, _address, port, _username, _password);
 }
 
+void VoIPControllerWrapper::SetAudioOutputGainControlEnabled(bool enabled){
+	controller->SetAudioOutputGainControlEnabled(enabled);
+}
+
 void VoIPControllerWrapper::UpdateServerConfig(Platform::String^ json){
 	JsonObject^ jconfig=JsonValue::Parse(json)->GetObject();
 	std::map<std::string, std::string> config;
diff --git a/os/windows/CXWrapper.h b/os/windows/CXWrapper.h
index cc3e0033b0..38e0f0b4d6 100755
--- a/os/windows/CXWrapper.h
+++ b/os/windows/CXWrapper.h
@@ -81,6 +81,7 @@ namespace libtgvoip{
 		Error GetLastError();
 		static Platform::String^ GetVersion();
 		int64 GetPreferredRelayID();
+		void SetAudioOutputGainControlEnabled(bool enabled);
 		static void UpdateServerConfig(Platform::String^ json);
 		static void SwitchSpeaker(bool external);
 		//static Platform::String^ TestAesIge();