mirror of
https://github.com/Swiftgram/Telegram-iOS.git
synced 2025-06-16 05:55:20 +00:00
185 lines
5.0 KiB
C++
185 lines
5.0 KiB
C++
#include "LanguageTree.h"
|
|
|
|
#include "TokenList.h"
|
|
#include <boost/regex.hpp>
|
|
|
|
void LanguageTree::load(const std::string& content)
|
|
{
|
|
Buffer buffer{ content };
|
|
parsePatterns(buffer);
|
|
parseGrammars(buffer);
|
|
parseLanguages(buffer);
|
|
}
|
|
|
|
inline uint8_t freadUint8(Buffer &buffer)
|
|
{
|
|
uint8_t value = 0;
|
|
if (buffer.offset + sizeof(uint8_t) <= buffer.content.size()) {
|
|
memcpy(&value, &buffer.content[buffer.offset], sizeof(uint8_t));
|
|
buffer.offset += sizeof(uint8_t);
|
|
}
|
|
return value;
|
|
}
|
|
|
|
inline uint16_t freadUint16(Buffer &buffer)
|
|
{
|
|
uint16_t value = 0;
|
|
if (buffer.offset + sizeof(uint16_t) <= buffer.content.size()) {
|
|
memcpy(&value, &buffer.content[buffer.offset], sizeof(uint16_t));
|
|
buffer.offset += sizeof(uint16_t);
|
|
}
|
|
return value;
|
|
}
|
|
|
|
inline std::string freadString(Buffer &buffer)
|
|
{
|
|
size_t length = freadUint8(buffer);
|
|
if (length >= 254)
|
|
{
|
|
size_t a = freadUint8(buffer);
|
|
size_t b = freadUint8(buffer);
|
|
size_t c = freadUint8(buffer);
|
|
length = a | (b << 8) | (c << 16);
|
|
}
|
|
|
|
std::string str(length, '\0');
|
|
if (buffer.offset + length <= buffer.content.size())
|
|
{
|
|
memcpy(&str[0], &buffer.content[buffer.offset], length);
|
|
buffer.offset += length;
|
|
}
|
|
return str;
|
|
}
|
|
|
|
void LanguageTree::parseLanguages(Buffer &buffer)
|
|
{
|
|
uint16_t count = freadUint16(buffer);
|
|
|
|
for (int i = 0; i < count; ++i)
|
|
{
|
|
std::string name = freadString(buffer);
|
|
std::string displayName = freadString(buffer);
|
|
size_t index = freadUint16(buffer);
|
|
m_languages.emplace(name, std::pair<std::string, size_t>(displayName, index));
|
|
}
|
|
}
|
|
|
|
void LanguageTree::parseGrammars(Buffer &buffer)
|
|
{
|
|
uint16_t count = freadUint16(buffer);
|
|
|
|
for (int i = 0; i < count; ++i)
|
|
{
|
|
auto grammar = std::make_shared<Grammar>();
|
|
const auto keys = freadUint8(buffer);
|
|
|
|
for (int j = 0; j < keys; ++j)
|
|
{
|
|
std::vector<PatternPtr> indices;
|
|
|
|
const auto key = freadString(buffer);
|
|
uint8_t ids = freadUint8(buffer);
|
|
|
|
for (int k = 0; k < ids; ++k)
|
|
{
|
|
indices.push_back(PatternPtr(shared_from_this(), freadUint16(buffer)));
|
|
}
|
|
|
|
grammar->tokens.push_back(GrammarToken(key, indices));
|
|
}
|
|
|
|
m_grammars.push_back(grammar);
|
|
}
|
|
}
|
|
|
|
void LanguageTree::parsePatterns(Buffer &buffer)
|
|
{
|
|
uint16_t count = freadUint16(buffer);
|
|
|
|
for (int i = 0; i < count; ++i)
|
|
{
|
|
std::string item = freadString(buffer);
|
|
std::string_view value(item);
|
|
std::string alias;
|
|
|
|
size_t beg = value.find_first_of('/');
|
|
size_t end = value.find_last_of('/');
|
|
|
|
if (beg != std::string::npos && end != std::string::npos)
|
|
{
|
|
std::string_view pattern = value.substr(beg + 1, end - (beg + 1));
|
|
std::string_view options = value.substr(end + 1);
|
|
|
|
size_t aliasBeg = options.find_first_of(',');
|
|
size_t aliasEnd = options.find_last_of(',');
|
|
|
|
std::string alias{ options.substr(aliasBeg + 1, aliasEnd - (aliasBeg + 1)) };
|
|
size_t inside = 0;
|
|
|
|
if (aliasEnd + 1 < options.size())
|
|
{
|
|
for (int i = aliasEnd + 1; i < options.size(); i++)
|
|
{
|
|
char c = options[i];
|
|
if (c >= '0' && c <= '9')
|
|
{
|
|
inside = inside * 10 + (c - '0');
|
|
}
|
|
else
|
|
{
|
|
assert(false);
|
|
}
|
|
}
|
|
}
|
|
else
|
|
{
|
|
inside = std::string::npos;
|
|
}
|
|
|
|
bool lookbehind = false;
|
|
bool greedy = false;
|
|
|
|
boost::regex_constants::syntax_option_type flags
|
|
= boost::regex_constants::ECMAScript | boost::regex_constants::no_mod_m;
|
|
|
|
for (char c : options.substr(0, aliasBeg))
|
|
{
|
|
switch (c)
|
|
{
|
|
case 'l':
|
|
lookbehind = true;
|
|
break;
|
|
case 'y':
|
|
greedy = true;
|
|
break;
|
|
case 'i':
|
|
flags |= boost::regex_constants::icase;
|
|
break;
|
|
case 'm':
|
|
flags &= ~boost::regex_constants::no_mod_m;
|
|
break;
|
|
}
|
|
}
|
|
|
|
if (inside != std::string::npos)
|
|
{
|
|
m_patterns.push_back(std::make_shared<Pattern>(pattern, flags, lookbehind, greedy, std::string{ alias }, std::make_shared<GrammarPtr>(shared_from_this(), inside)));
|
|
}
|
|
else
|
|
{
|
|
m_patterns.push_back(std::make_shared<Pattern>(pattern, flags, lookbehind, greedy, std::string{ alias }));
|
|
}
|
|
}
|
|
}
|
|
}
|
|
|
|
const Pattern* LanguageTree::resolvePattern(size_t path)
|
|
{
|
|
return m_patterns[path].get();
|
|
}
|
|
|
|
const Grammar* LanguageTree::resolveGrammar(size_t path)
|
|
{
|
|
return m_grammars[path].get();
|
|
}
|