import Foundation import StringTransliteration public enum StringIndexTokenTransliteration { case none case combined case transliterated } public func stringIndexTokens(_ string: String, transliteration: StringIndexTokenTransliteration) -> [ValueBoxKey] { var nsString = string.lowercased() as NSString var isLatin = true for i in 0 ..< nsString.length { let c = nsString.character(at: i) if c >= 128 { isLatin = false break } } if !isLatin { nsString = postboxTransformedString(nsString, transliteration == .transliterated, transliteration == .combined) as NSString } let flag = UInt(kCFStringTokenizerUnitWord) let tokenizer = CFStringTokenizerCreate(kCFAllocatorDefault, nsString, CFRangeMake(0, nsString.length), flag, CFLocaleCopyCurrent()) var tokenType = CFStringTokenizerAdvanceToNextToken(tokenizer) var tokens: [ValueBoxKey] = [] var addedTokens = Set() while tokenType != [] { let currentTokenRange = CFStringTokenizerGetCurrentTokenRange(tokenizer) if currentTokenRange.location >= 0 && currentTokenRange.length != 0 { let token = ValueBoxKey(length: currentTokenRange.length * 2) nsString.getCharacters(token.memory.assumingMemoryBound(to: unichar.self), range: NSMakeRange(currentTokenRange.location, currentTokenRange.length)) if !addedTokens.contains(token) { tokens.append(token) addedTokens.insert(token) } } tokenType = CFStringTokenizerAdvanceToNextToken(tokenizer) } return tokens } public func matchStringIndexTokens(_ tokens: [ValueBoxKey], with other: [ValueBoxKey]) -> Bool { if other.isEmpty { return false } else if other.count == 1 { let otherToken = other[0] for token in tokens { if otherToken.isPrefix(to: token) { return true } } } else { for otherToken in other { var found = false for token in tokens { if otherToken.isPrefix(to: token) { found = true break } } if !found { return false } } return true } return false }