import Foundation import NaturalLanguage import SwiftSignalKit import TelegramCore import AccountContext import TelegramUIPreferences public struct ChatTranslationState: Codable { enum CodingKeys: String, CodingKey { case baseLang case fromLang case timestamp case toLang case isEnabled } public let baseLang: String public let fromLang: String public let timestamp: Int32? public let toLang: String? public let isEnabled: Bool public init( baseLang: String, fromLang: String, timestamp: Int32?, toLang: String?, isEnabled: Bool ) { self.baseLang = baseLang self.fromLang = fromLang self.timestamp = timestamp self.toLang = toLang self.isEnabled = isEnabled } public init(from decoder: Decoder) throws { let container = try decoder.container(keyedBy: CodingKeys.self) self.baseLang = try container.decode(String.self, forKey: .baseLang) self.fromLang = try container.decode(String.self, forKey: .fromLang) self.timestamp = try container.decodeIfPresent(Int32.self, forKey: .timestamp) self.toLang = try container.decodeIfPresent(String.self, forKey: .toLang) self.isEnabled = try container.decode(Bool.self, forKey: .isEnabled) } public func encode(to encoder: Encoder) throws { var container = encoder.container(keyedBy: CodingKeys.self) try container.encode(self.baseLang, forKey: .baseLang) try container.encode(self.fromLang, forKey: .fromLang) try container.encodeIfPresent(self.timestamp, forKey: .timestamp) try container.encodeIfPresent(self.toLang, forKey: .toLang) try container.encode(self.isEnabled, forKey: .isEnabled) } public func withToLang(_ toLang: String?) -> ChatTranslationState { return ChatTranslationState( baseLang: self.baseLang, fromLang: self.fromLang, timestamp: self.timestamp, toLang: toLang, isEnabled: self.isEnabled ) } public func withIsEnabled(_ isEnabled: Bool) -> ChatTranslationState { return ChatTranslationState( baseLang: self.baseLang, fromLang: self.fromLang, timestamp: self.timestamp, toLang: self.toLang, isEnabled: isEnabled ) } } private func cachedChatTranslationState(engine: TelegramEngine, peerId: EnginePeer.Id) -> Signal { let key = EngineDataBuffer(length: 8) key.setInt64(0, value: peerId.id._internalGetInt64Value()) return engine.data.subscribe(TelegramEngine.EngineData.Item.ItemCache.Item(collectionId: ApplicationSpecificItemCacheCollectionId.translationState, id: key)) |> map { entry -> ChatTranslationState? in return entry?.get(ChatTranslationState.self) } } private func updateChatTranslationState(engine: TelegramEngine, peerId: EnginePeer.Id, state: ChatTranslationState?) -> Signal { let key = EngineDataBuffer(length: 8) key.setInt64(0, value: peerId.id._internalGetInt64Value()) if let state { return engine.itemCache.put(collectionId: ApplicationSpecificItemCacheCollectionId.translationState, id: key, item: state) } else { return engine.itemCache.remove(collectionId: ApplicationSpecificItemCacheCollectionId.translationState, id: key) } } public func updateChatTranslationStateInteractively(engine: TelegramEngine, peerId: EnginePeer.Id, _ f: @escaping (ChatTranslationState?) -> ChatTranslationState?) -> Signal { let key = EngineDataBuffer(length: 8) key.setInt64(0, value: peerId.id._internalGetInt64Value()) return engine.data.get(TelegramEngine.EngineData.Item.ItemCache.Item(collectionId: ApplicationSpecificItemCacheCollectionId.translationState, id: key)) |> map { entry -> ChatTranslationState? in return entry?.get(ChatTranslationState.self) } |> mapToSignal { current -> Signal in if let current { return updateChatTranslationState(engine: engine, peerId: peerId, state: f(current)) } else { return .never() } } } @available(iOS 12.0, *) private let languageRecognizer = NLLanguageRecognizer() public func translateMessageIds(context: AccountContext, messageIds: [EngineMessage.Id], toLang: String) -> Signal { return context.account.postbox.transaction { transaction -> Signal in var messageIdsToTranslate: [EngineMessage.Id] = [] var messageIdsSet = Set() for messageId in messageIds { if let message = transaction.getMessage(messageId) { if let replyAttribute = message.attributes.first(where: { $0 is ReplyMessageAttribute }) as? ReplyMessageAttribute, let replyMessage = message.associatedMessages[replyAttribute.messageId] { if !replyMessage.text.isEmpty { if let translation = replyMessage.attributes.first(where: { $0 is TranslationMessageAttribute }) as? TranslationMessageAttribute, translation.toLang == toLang { } else { if !messageIdsSet.contains(replyMessage.id) { messageIdsToTranslate.append(replyMessage.id) messageIdsSet.insert(replyMessage.id) } } } } guard message.author?.id != context.account.peerId else { continue } if let translation = message.attributes.first(where: { $0 is TranslationMessageAttribute }) as? TranslationMessageAttribute, translation.toLang == toLang { continue } if !message.text.isEmpty { if !messageIdsSet.contains(messageId) { messageIdsToTranslate.append(messageId) messageIdsSet.insert(messageId) } } else if let _ = message.media.first(where: { $0 is TelegramMediaPoll }) { if !messageIdsSet.contains(messageId) { messageIdsToTranslate.append(messageId) messageIdsSet.insert(messageId) } } } else { if !messageIdsSet.contains(messageId) { messageIdsToTranslate.append(messageId) messageIdsSet.insert(messageId) } } } return context.engine.messages.translateMessages(messageIds: messageIdsToTranslate, toLang: toLang) |> `catch` { _ -> Signal in return .complete() } } |> switchToLatest } public func chatTranslationState(context: AccountContext, peerId: EnginePeer.Id) -> Signal { if peerId.id == EnginePeer.Id.Id._internalFromInt64Value(777000) { return .single(nil) } let loggingEnabled = context.sharedContext.immediateExperimentalUISettings.logLanguageRecognition if #available(iOS 12.0, *) { var baseLang = context.sharedContext.currentPresentationData.with { $0 }.strings.baseLanguageCode let rawSuffix = "-raw" if baseLang.hasSuffix(rawSuffix) { baseLang = String(baseLang.dropLast(rawSuffix.count)) } return context.sharedContext.accountManager.sharedData(keys: [ApplicationSpecificSharedDataKeys.translationSettings]) |> mapToSignal { sharedData in let settings = sharedData.entries[ApplicationSpecificSharedDataKeys.translationSettings]?.get(TranslationSettings.self) ?? TranslationSettings.defaultSettings if !settings.translateChats { return .single(nil) } var dontTranslateLanguages = Set() if let ignoredLanguages = settings.ignoredLanguages { dontTranslateLanguages = Set(ignoredLanguages) } else { dontTranslateLanguages.insert(baseLang) for language in systemLanguageCodes() { dontTranslateLanguages.insert(language) } } return cachedChatTranslationState(engine: context.engine, peerId: peerId) |> mapToSignal { cached in let currentTime = Int32(CFAbsoluteTimeGetCurrent() + kCFAbsoluteTimeIntervalSince1970) if let cached, let timestamp = cached.timestamp, cached.baseLang == baseLang && currentTime - timestamp < 60 * 60 { if !dontTranslateLanguages.contains(cached.fromLang) { return .single(cached) } else { return .single(nil) } } else { return .single(nil) |> then( context.account.viewTracker.aroundMessageHistoryViewForLocation(.peer(peerId: peerId, threadId: nil), index: .upperBound, anchorIndex: .upperBound, count: 32, fixedCombinedReadStates: nil) |> filter { messageHistoryView -> Bool in return messageHistoryView.0.entries.count > 1 } |> take(1) |> map { messageHistoryView, _, _ -> ChatTranslationState? in let messages = messageHistoryView.entries.map(\.message) if loggingEnabled { Logger.shared.log("ChatTranslation", "Start language recognizing for \(peerId)") } var fromLangs: [String: Int] = [:] var count = 0 for message in messages { if message.effectivelyIncoming(context.account.peerId), message.text.count >= 10 { var text = String(message.text.prefix(256)) if var entities = message.textEntitiesAttribute?.entities.filter({ entity in switch entity.type { case .Pre, .Code, .Url, .Email, .Mention, .Hashtag, .BotCommand: return true default: return false } }) { entities = entities.sorted(by: { $0.range.lowerBound > $1.range.lowerBound }) var ranges: [Range] = [] for entity in entities { if entity.range.lowerBound > text.count || entity.range.upperBound > text.count { continue } ranges.append(text.index(text.startIndex, offsetBy: entity.range.lowerBound) ..< text.index(text.startIndex, offsetBy: entity.range.upperBound)) } for range in ranges { if range.upperBound < text.endIndex { text.removeSubrange(range) } } } if message.text.count < 10 { continue } languageRecognizer.processString(text) let hypotheses = languageRecognizer.languageHypotheses(withMaximum: 4) languageRecognizer.reset() let filteredLanguages = hypotheses.filter { supportedTranslationLanguages.contains(normalizeTranslationLanguage($0.key.rawValue)) }.sorted(by: { $0.value > $1.value }) if let language = filteredLanguages.first { let fromLang = normalizeTranslationLanguage(language.key.rawValue) if loggingEnabled && !["en", "ru"].contains(fromLang) && !dontTranslateLanguages.contains(fromLang) { Logger.shared.log("ChatTranslation", "\(text)") Logger.shared.log("ChatTranslation", "Recognized as: \(fromLang), other hypotheses: \(hypotheses.map { $0.key.rawValue }.joined(separator: ",")) ") } fromLangs[fromLang] = (fromLangs[fromLang] ?? 0) + message.text.count count += 1 } } if count >= 16 { break } } var mostFrequent: (String, Int)? for (lang, count) in fromLangs { if let current = mostFrequent { if count > current.1 { mostFrequent = (lang, count) } } else { mostFrequent = (lang, count) } } let fromLang = mostFrequent?.0 ?? "" if loggingEnabled { Logger.shared.log("ChatTranslation", "Ended with: \(fromLang)") } let state = ChatTranslationState( baseLang: baseLang, fromLang: fromLang, timestamp: currentTime, toLang: cached?.toLang, isEnabled: cached?.isEnabled ?? false ) let _ = updateChatTranslationState(engine: context.engine, peerId: peerId, state: state).start() if !dontTranslateLanguages.contains(fromLang) { return state } else { return nil } } ) } } } } else { return .single(nil) } }