Swiftgram/submodules/Postbox/Sources/StringIndexTokens.swift

76 lines
2.3 KiB
Swift

import Foundation
import StringTransliteration
public enum StringIndexTokenTransliteration {
case none
case combined
case transliterated
}
public func stringIndexTokens(_ string: String, transliteration: StringIndexTokenTransliteration) -> [ValueBoxKey] {
var nsString = string.lowercased() as NSString
var isLatin = true
for i in 0 ..< nsString.length {
let c = nsString.character(at: i)
if c >= 128 {
isLatin = false
break
}
}
if !isLatin {
nsString = postboxTransformedString(nsString, transliteration == .transliterated, transliteration == .combined) as NSString
}
let flag = UInt(kCFStringTokenizerUnitWord)
let tokenizer = CFStringTokenizerCreate(kCFAllocatorDefault, nsString, CFRangeMake(0, nsString.length), flag, CFLocaleCopyCurrent())
var tokenType = CFStringTokenizerAdvanceToNextToken(tokenizer)
var tokens: [ValueBoxKey] = []
var addedTokens = Set<ValueBoxKey>()
while tokenType != [] {
let currentTokenRange = CFStringTokenizerGetCurrentTokenRange(tokenizer)
if currentTokenRange.location >= 0 && currentTokenRange.length != 0 {
let token = ValueBoxKey(length: currentTokenRange.length * 2)
nsString.getCharacters(token.memory.assumingMemoryBound(to: unichar.self), range: NSMakeRange(currentTokenRange.location, currentTokenRange.length))
if !addedTokens.contains(token) {
tokens.append(token)
addedTokens.insert(token)
}
}
tokenType = CFStringTokenizerAdvanceToNextToken(tokenizer)
}
return tokens
}
public func matchStringIndexTokens(_ tokens: [ValueBoxKey], with other: [ValueBoxKey]) -> Bool {
if other.isEmpty {
return false
} else if other.count == 1 {
let otherToken = other[0]
for token in tokens {
if otherToken.isPrefix(to: token) {
return true
}
}
} else {
for otherToken in other {
var found = false
for token in tokens {
if otherToken.isPrefix(to: token) {
found = true
break
}
}
if !found {
return false
}
}
return true
}
return false
}