Swiftgram/submodules/TranslateUI/Sources/ChatTranslation.swift
2023-04-26 18:49:13 +04:00

289 lines
14 KiB
Swift

import Foundation
import NaturalLanguage
import SwiftSignalKit
import TelegramCore
import Postbox
import AccountContext
import TelegramUIPreferences
public struct ChatTranslationState: Codable {
enum CodingKeys: String, CodingKey {
case baseLang
case fromLang
case toLang
case isEnabled
}
public let baseLang: String
public let fromLang: String
public let toLang: String?
public let isEnabled: Bool
public init(
baseLang: String,
fromLang: String,
toLang: String?,
isEnabled: Bool
) {
self.baseLang = baseLang
self.fromLang = fromLang
self.toLang = toLang
self.isEnabled = isEnabled
}
public init(from decoder: Decoder) throws {
let container = try decoder.container(keyedBy: CodingKeys.self)
self.baseLang = try container.decode(String.self, forKey: .baseLang)
self.fromLang = try container.decode(String.self, forKey: .fromLang)
self.toLang = try container.decodeIfPresent(String.self, forKey: .toLang)
self.isEnabled = try container.decode(Bool.self, forKey: .isEnabled)
}
public func encode(to encoder: Encoder) throws {
var container = encoder.container(keyedBy: CodingKeys.self)
try container.encode(self.baseLang, forKey: .baseLang)
try container.encode(self.fromLang, forKey: .fromLang)
try container.encodeIfPresent(self.toLang, forKey: .toLang)
try container.encode(self.isEnabled, forKey: .isEnabled)
}
public func withToLang(_ toLang: String?) -> ChatTranslationState {
return ChatTranslationState(
baseLang: self.baseLang,
fromLang: self.fromLang,
toLang: toLang,
isEnabled: self.isEnabled
)
}
public func withIsEnabled(_ isEnabled: Bool) -> ChatTranslationState {
return ChatTranslationState(
baseLang: self.baseLang,
fromLang: self.fromLang,
toLang: self.toLang,
isEnabled: isEnabled
)
}
}
private func cachedChatTranslationState(engine: TelegramEngine, peerId: EnginePeer.Id) -> Signal<ChatTranslationState?, NoError> {
let key = ValueBoxKey(length: 8)
key.setInt64(0, value: peerId.id._internalGetInt64Value())
return engine.data.subscribe(TelegramEngine.EngineData.Item.ItemCache.Item(collectionId: ApplicationSpecificItemCacheCollectionId.translationState, id: key))
|> map { entry -> ChatTranslationState? in
return entry?.get(ChatTranslationState.self)
}
}
private func updateChatTranslationState(engine: TelegramEngine, peerId: EnginePeer.Id, state: ChatTranslationState?) -> Signal<Never, NoError> {
let key = ValueBoxKey(length: 8)
key.setInt64(0, value: peerId.id._internalGetInt64Value())
if let state {
return engine.itemCache.put(collectionId: ApplicationSpecificItemCacheCollectionId.translationState, id: key, item: state)
} else {
return engine.itemCache.remove(collectionId: ApplicationSpecificItemCacheCollectionId.translationState, id: key)
}
}
public func updateChatTranslationStateInteractively(engine: TelegramEngine, peerId: EnginePeer.Id, _ f: @escaping (ChatTranslationState?) -> ChatTranslationState?) -> Signal<Never, NoError> {
let key = ValueBoxKey(length: 8)
key.setInt64(0, value: peerId.id._internalGetInt64Value())
return engine.data.get(TelegramEngine.EngineData.Item.ItemCache.Item(collectionId: ApplicationSpecificItemCacheCollectionId.translationState, id: key))
|> map { entry -> ChatTranslationState? in
return entry?.get(ChatTranslationState.self)
}
|> mapToSignal { current -> Signal<Never, NoError> in
if let current {
return updateChatTranslationState(engine: engine, peerId: peerId, state: f(current))
} else {
return .never()
}
}
}
@available(iOS 12.0, *)
private let languageRecognizer = NLLanguageRecognizer()
public func translateMessageIds(context: AccountContext, messageIds: [EngineMessage.Id], toLang: String) -> Signal<Void, NoError> {
return context.account.postbox.transaction { transaction -> Signal<Void, NoError> in
var messageIdsToTranslate: [EngineMessage.Id] = []
var messageIdsSet = Set<EngineMessage.Id>()
for messageId in messageIds {
if let message = transaction.getMessage(messageId) {
if let replyAttribute = message.attributes.first(where: { $0 is ReplyMessageAttribute }) as? ReplyMessageAttribute, let replyMessage = message.associatedMessages[replyAttribute.messageId] {
if !replyMessage.text.isEmpty {
if let translation = replyMessage.attributes.first(where: { $0 is TranslationMessageAttribute }) as? TranslationMessageAttribute, translation.toLang == toLang {
} else {
if !messageIdsSet.contains(replyMessage.id) {
messageIdsToTranslate.append(replyMessage.id)
messageIdsSet.insert(replyMessage.id)
}
}
}
}
if !message.text.isEmpty && message.author?.id != context.account.peerId {
if let translation = message.attributes.first(where: { $0 is TranslationMessageAttribute }) as? TranslationMessageAttribute, translation.toLang == toLang {
} else {
if !messageIdsSet.contains(messageId) {
messageIdsToTranslate.append(messageId)
messageIdsSet.insert(messageId)
}
}
}
} else {
if !messageIdsSet.contains(messageId) {
messageIdsToTranslate.append(messageId)
messageIdsSet.insert(messageId)
}
}
}
return context.engine.messages.translateMessages(messageIds: messageIdsToTranslate, toLang: toLang)
|> `catch` { _ -> Signal<Void, NoError> in
return .complete()
}
} |> switchToLatest
}
public func chatTranslationState(context: AccountContext, peerId: EnginePeer.Id) -> Signal<ChatTranslationState?, NoError> {
if peerId.id == PeerId.Id._internalFromInt64Value(777000) {
return .single(nil)
}
let loggingEnabled = context.sharedContext.immediateExperimentalUISettings.logLanguageRecognition
if #available(iOS 12.0, *) {
var baseLang = context.sharedContext.currentPresentationData.with { $0 }.strings.baseLanguageCode
let rawSuffix = "-raw"
if baseLang.hasSuffix(rawSuffix) {
baseLang = String(baseLang.dropLast(rawSuffix.count))
}
return context.sharedContext.accountManager.sharedData(keys: [ApplicationSpecificSharedDataKeys.translationSettings])
|> mapToSignal { sharedData in
let settings = sharedData.entries[ApplicationSpecificSharedDataKeys.translationSettings]?.get(TranslationSettings.self) ?? TranslationSettings.defaultSettings
if !settings.translateChats {
return .single(nil)
}
var dontTranslateLanguages = Set<String>()
if let ignoredLanguages = settings.ignoredLanguages {
dontTranslateLanguages = Set(ignoredLanguages)
} else {
dontTranslateLanguages.insert(baseLang)
for language in systemLanguageCodes() {
dontTranslateLanguages.insert(language)
}
}
return cachedChatTranslationState(engine: context.engine, peerId: peerId)
|> mapToSignal { cached in
if let cached, cached.baseLang == baseLang {
if !dontTranslateLanguages.contains(cached.fromLang) {
return .single(cached)
} else {
return .single(nil)
}
} else {
return .single(nil)
|> then(
context.account.viewTracker.aroundMessageHistoryViewForLocation(.peer(peerId: peerId, threadId: nil), index: .upperBound, anchorIndex: .upperBound, count: 32, fixedCombinedReadStates: nil)
|> filter { messageHistoryView -> Bool in
return messageHistoryView.0.entries.count > 1
}
|> take(1)
|> map { messageHistoryView, _, _ -> ChatTranslationState? in
let messages = messageHistoryView.entries.map(\.message)
if loggingEnabled {
Logger.shared.log("ChatTranslation", "Start language recognizing for \(peerId)")
}
var fromLangs: [String: Int] = [:]
var count = 0
for message in messages {
if message.effectivelyIncoming(context.account.peerId), message.text.count >= 10 {
var text = String(message.text.prefix(256))
if var entities = message.textEntitiesAttribute?.entities.filter({ [.Pre, .Code, .Url, .Email, .Mention, .Hashtag, .BotCommand].contains($0.type) }) {
entities = entities.sorted(by: { $0.range.lowerBound > $1.range.lowerBound })
var ranges: [Range<String.Index>] = []
for entity in entities {
if entity.range.lowerBound > text.count || entity.range.upperBound > text.count {
continue
}
ranges.append(text.index(text.startIndex, offsetBy: entity.range.lowerBound) ..< text.index(text.startIndex, offsetBy: entity.range.upperBound))
}
for range in ranges {
text.removeSubrange(range)
}
}
if message.text.count < 10 {
continue
}
languageRecognizer.processString(text)
let hypotheses = languageRecognizer.languageHypotheses(withMaximum: 4)
languageRecognizer.reset()
func normalize(_ code: String) -> String {
if code.contains("-") {
return code.components(separatedBy: "-").first ?? code
} else if code == "nb" {
return "no"
} else {
return code
}
}
let filteredLanguages = hypotheses.filter { supportedTranslationLanguages.contains(normalize($0.key.rawValue)) }.sorted(by: { $0.value > $1.value })
if let language = filteredLanguages.first {
let fromLang = normalize(language.key.rawValue)
if loggingEnabled && !["en", "ru"].contains(fromLang) && !dontTranslateLanguages.contains(fromLang) {
Logger.shared.log("ChatTranslation", "\(text)")
Logger.shared.log("ChatTranslation", "Recognized as: \(fromLang), other hypotheses: \(hypotheses.map { $0.key.rawValue }.joined(separator: ",")) ")
}
fromLangs[fromLang] = (fromLangs[fromLang] ?? 0) + message.text.count
count += 1
}
}
if count >= 16 {
break
}
}
var mostFrequent: (String, Int)?
for (lang, count) in fromLangs {
if let current = mostFrequent {
if count > current.1 {
mostFrequent = (lang, count)
}
} else {
mostFrequent = (lang, count)
}
}
let fromLang = mostFrequent?.0 ?? ""
if loggingEnabled {
Logger.shared.log("ChatTranslation", "Ended with: \(fromLang)")
}
let state = ChatTranslationState(baseLang: baseLang, fromLang: fromLang, toLang: nil, isEnabled: false)
let _ = updateChatTranslationState(engine: context.engine, peerId: peerId, state: state).start()
if !dontTranslateLanguages.contains(fromLang) {
return state
} else {
return nil
}
}
)
}
}
}
} else {
return .single(nil)
}
}