Files
tdesktop/Telegram/lib_spellcheck/spellcheck/third_party/hunspell_controller.cpp
allhaileris afb81b8278
Some checks failed
Docker. / Ubuntu (push) Has been cancelled
User-agent updater. / User-agent (push) Failing after 15s
Lock Threads / lock (push) Failing after 10s
Waiting for answer. / waiting-for-answer (push) Failing after 22s
Close stale issues and PRs / stale (push) Successful in 13s
Needs user action. / needs-user-action (push) Failing after 8s
Can't reproduce. / cant-reproduce (push) Failing after 8s
init
2026-02-16 15:50:16 +03:00

641 lines
16 KiB
C++

// This file is part of Desktop App Toolkit,
// a set of libraries for developing nice desktop applications.
//
// For license and copyright information please follow this link:
// https://github.com/desktop-app/legal/blob/master/LEGAL
//
#include "spellcheck/third_party/hunspell_controller.h"
#include "spellcheck/spellcheck_value.h"
#include <mutex>
#include <shared_mutex>
#include <QDir>
#include <QFileInfo>
#include <hunspell/hunspell.hxx>
#if __has_include(<glib/glib.hpp>)
#include <glib/glib.hpp>
using namespace gi::repository;
#elif QT_VERSION < QT_VERSION_CHECK(6, 0, 0) // __has_include(<glib/glib.hpp>)
#include <QTextCodec>
#endif // Qt < 6.0.0
namespace Platform::Spellchecker::ThirdParty {
namespace {
using WordsMap = std::map<QChar::Script, std::vector<QString>>;
// Maximum number of words in the custom spellcheck dictionary.
constexpr auto kMaxSyncableDictionaryWords = 1300;
constexpr auto kTimeLimitSuggestion = crl::time(1000);
#ifdef Q_OS_WIN
const auto kLineBreak = QByteArrayLiteral("\r\n");
#else // Q_OS_WIN
const auto kLineBreak = QByteArrayLiteral("\n");
#endif // Q_OS_WIN
struct PathPair {
QByteArray aff;
QByteArray dic;
};
[[nodiscard]] PathPair PreparePaths(const QString &aff, const QString &dic) {
const auto convert = [&](const QString &path) {
const auto result = QDir::toNativeSeparators(path).toUtf8();
#ifdef Q_OS_WIN
return "\\\\?\\" + result;
#else // Q_OS_WIN
return result;
#endif // !Q_OS_WIN
};
return {
.aff = convert(aff),
.dic = convert(dic),
};
}
auto LocaleNameFromLangId(int langId) {
return ::Spellchecker::LocaleFromLangId(langId).name();
}
QString CustomDictionaryPath() {
return QStringLiteral("%1/%2").arg(
::Spellchecker::WorkingDirPath(),
"custom");
}
[[nodiscard]] Hunspell LoadUtfInitializer() {
const auto full = [&](const QString &name) {
return ::Spellchecker::WorkingDirPath() + '/' + name;
};
const auto aff = full(u"utf_helper.aff"_q);
const auto dic = full(u"utf_helper.dic"_q);
if (!QFile::exists(aff)) {
QDir().mkpath(::Spellchecker::WorkingDirPath());
auto f = QFile(aff);
if (f.open(QIODevice::WriteOnly)) {
f.write("SET UTF-8" + kLineBreak);
}
}
if (!QFile::exists(dic)) {
auto f = QFile(dic);
if (f.open(QIODevice::WriteOnly)) {
f.write("1" + kLineBreak + "Zzz" + kLineBreak);
}
}
const auto prepared = PreparePaths(aff, dic);
return Hunspell(prepared.aff.constData(), prepared.dic.constData());
}
class CharsetConverter final {
public:
CharsetConverter(const std::string &charset)
#if __has_include(<glib/glib.hpp>)
: _charset(charset)
#elif QT_VERSION < QT_VERSION_CHECK(6, 0, 0) // __has_include(<glib/glib.hpp>)
: _codec(QTextCodec::codecForName(charset.c_str()))
#endif // Qt < 6.0.0
{}
[[nodiscard]] bool isValid() const {
#if __has_include(<glib/glib.hpp>)
const uchar empty[] = "";
return GLib::convert(empty, 0, _charset, "UTF-8")
&& GLib::convert(empty, 0, "UTF-8", _charset);
#elif QT_VERSION < QT_VERSION_CHECK(6, 0, 0) // __has_include(<glib/glib.hpp>)
return _codec;
#else // Qt < 6.0.0
return false;
#endif // Qt >= 6.0.0 && !__has_include(<glib/glib.hpp>)
}
[[nodiscard]] std::string fromUnicode(const QString &data) {
#if __has_include(<glib/glib.hpp>)
const auto utf8 = data.toStdString();
return GLib::convert(
reinterpret_cast<const uchar*>(utf8.data()),
utf8.size(),
_charset,
"UTF-8",
nullptr,
nullptr) | ranges::to<std::string>;
#elif QT_VERSION < QT_VERSION_CHECK(6, 0, 0) // __has_include(<glib/glib.hpp>)
return _codec->fromUnicode(data).toStdString();
#else // Qt < 6.0.0
return {};
#endif // Qt >= 6.0.0 && !__has_include(<glib/glib.hpp>)
}
[[nodiscard]] QString toUnicode(const std::string &data) {
#if __has_include(<glib/glib.hpp>)
return QString::fromStdString(GLib::convert(
reinterpret_cast<const uchar*>(data.data()),
data.size(),
"UTF-8",
_charset,
nullptr,
nullptr) | ranges::to<std::string>);
#elif QT_VERSION < QT_VERSION_CHECK(6, 0, 0) // __has_include(<glib/glib.hpp>)
return _codec->toUnicode(data.data(), data.size());
#else // Qt < 6.0.0
return {};
#endif // Qt >= 6.0.0 && !__has_include(<glib/glib.hpp>)
}
private:
#if __has_include(<glib/glib.hpp>)
std::string _charset;
#elif QT_VERSION < QT_VERSION_CHECK(6, 0, 0) // __has_include(<glib/glib.hpp>)
QTextCodec *_codec;
#endif // Qt < 6.0.0
};
class HunspellEngine {
public:
HunspellEngine(const QString &lang);
~HunspellEngine() = default;
bool isValid() const;
bool spell(const QString &word) const;
void suggest(
const QString &wrongWord,
std::vector<QString> *optionalSuggestions);
QString lang();
QChar::Script script();
HunspellEngine(const HunspellEngine &) = delete;
HunspellEngine &operator=(const HunspellEngine &) = delete;
private:
QString _lang;
QChar::Script _script;
std::unique_ptr<Hunspell> _hunspell;
std::unique_ptr<CharsetConverter> _converter;
};
class HunspellService {
public:
HunspellService();
~HunspellService();
void updateLanguages(std::vector<QString> langs);
std::vector<QString> activeLanguages();
[[nodiscard]] bool checkSpelling(const QString &wordToCheck);
void fillSuggestionList(
const QString &wrongWord,
std::vector<QString> *optionalSuggestions);
void addWord(const QString &word);
void removeWord(const QString &word);
void ignoreWord(const QString &word);
bool isWordInDictionary(const QString &word);
private:
void writeToFile();
void readFile();
std::vector<QString> &addedWords(const QString &word);
std::shared_ptr<std::vector<std::unique_ptr<HunspellEngine>>> _engines;
std::vector<QString> _activeLanguages;
// Use an empty Hunspell dictionary to fill it with our remembered words
// for getting suggests.
std::unique_ptr<Hunspell> _customDict;
WordsMap _ignoredWords;
WordsMap _addedWords;
std::shared_ptr<std::atomic<int>> _epoch;
std::atomic<int> _suggestionsEpoch = 0;
std::shared_ptr<std::shared_mutex> _engineMutex;
};
HunspellEngine::HunspellEngine(const QString &lang)
: _lang(lang)
, _script(::Spellchecker::LocaleToScriptCode(lang)) {
const auto workingDir = ::Spellchecker::WorkingDirPath();
if (workingDir.isEmpty()) {
return;
}
const auto rawPath = QString("%1/%2/%2").arg(workingDir, lang);
const auto affPath = rawPath + ".aff";
const auto dicPath = rawPath + ".dic";
if (!QFileInfo(affPath).isFile() || !QFileInfo(dicPath).isFile()) {
return;
}
const auto prepared = PreparePaths(affPath, dicPath);
_hunspell = std::make_unique<Hunspell>(
prepared.aff.constData(),
prepared.dic.constData());
_converter = std::make_unique<CharsetConverter>(
_hunspell->get_dic_encoding());
if (!_converter->isValid()) {
_hunspell.reset();
}
}
bool HunspellEngine::isValid() const {
return _hunspell != nullptr;
}
bool HunspellEngine::spell(const QString &word) const {
return _hunspell->spell(_converter->fromUnicode(word));
}
void HunspellEngine::suggest(
const QString &wrongWord,
std::vector<QString> *optionalSuggestions) {
const auto stdWord = _converter->fromUnicode(wrongWord);
for (const auto &guess : _hunspell->suggest(stdWord)) {
if (optionalSuggestions->size() == kMaxSuggestions) {
return;
}
const auto qguess = _converter->toUnicode(guess);
if (ranges::contains(*optionalSuggestions, qguess)) {
continue;
}
optionalSuggestions->push_back(qguess);
}
}
QString HunspellEngine::lang() {
return _lang;
}
QChar::Script HunspellEngine::script() {
return _script;
}
std::vector<QString> HunspellService::activeLanguages() {
return _activeLanguages;
}
// Thread: Any.
HunspellService::HunspellService()
: _engines(std::make_shared<std::vector<std::unique_ptr<HunspellEngine>>>())
, _customDict(std::make_unique<Hunspell>("", ""))
, _epoch(std::make_shared<std::atomic<int>>(0))
, _engineMutex(std::make_shared<std::shared_mutex>()) {
// This is not perfectly safe, but should be mostly fine.
static const auto UtfInitializer = LoadUtfInitializer();
readFile();
}
// Thread: Main.
HunspellService::~HunspellService() {
std::unique_lock lock(*_engineMutex);
}
// Thread: Main.
std::vector<QString> &HunspellService::addedWords(const QString &word) {
return _addedWords[::Spellchecker::WordScript(word)];
}
// Thread: Main.
void HunspellService::updateLanguages(std::vector<QString> langs) {
Expects(_suggestionsEpoch.load() == 0);
*_epoch += 1;
_activeLanguages.clear();
const auto savedEpoch = _epoch.get()->load();
crl::async([=,
epoch = _epoch,
engineMutex = _engineMutex,
engines = _engines] {
using UniqueEngine = std::unique_ptr<HunspellEngine>;
const auto engineLangFilter = [&](const UniqueEngine &engine) {
return engine ? ranges::contains(langs, engine->lang()) : false;
};
if (savedEpoch != epoch.get()->load()) {
return;
}
const auto engineLang = [](const UniqueEngine &engine) {
return engine ? engine->lang() : QString();
};
const auto missedLangs = [&] {
std::shared_lock lock(*engineMutex);
return ranges::views::all(
langs
) | ranges::views::filter([&](auto &lang) {
return !ranges::contains(*engines, lang, engineLang);
}) | ranges::to_vector;
}();
// Added new enabled engines.
auto localEngines = ranges::views::all(
missedLangs
) | ranges::views::transform([&](auto &lang) -> UniqueEngine {
if (savedEpoch != epoch.get()->load()) {
return nullptr;
}
auto engine = std::make_unique<HunspellEngine>(lang);
if (!engine->isValid()) {
return nullptr;
}
return engine;
}) | ranges::to_vector;
if (savedEpoch != epoch.get()->load()) {
return;
}
{
std::unique_lock lock(*engineMutex);
*engines = ranges::views::concat(
*engines, localEngines
) | ranges::views::filter(
// All filtered objects will be automatically released.
engineLangFilter
) | ranges::views::transform([](auto &engine) {
return std::move(engine);
}) | ranges::to_vector;
}
crl::on_main([=] {
if (savedEpoch != epoch.get()->load()) {
return;
}
*epoch = 0;
_activeLanguages = ranges::views::all(
*engines
) | ranges::views::transform(&HunspellEngine::lang)
| ranges::to_vector;
::Spellchecker::UpdateSupportedScripts(_activeLanguages);
});
});
}
// Thread: Any.
bool HunspellService::checkSpelling(const QString &wordToCheck) {
const auto wordScript = ::Spellchecker::WordScript(wordToCheck);
if (ranges::contains(_ignoredWords[wordScript], wordToCheck)) {
return true;
}
if (ranges::contains(_addedWords[wordScript], wordToCheck)) {
return true;
}
std::shared_lock lock(*_engineMutex);
for (const auto &engine : *_engines) {
if (wordScript != engine->script()) {
continue;
}
if (engine->spell(wordToCheck)) {
return true;
}
}
return false;
}
// Thread: Any.
void HunspellService::fillSuggestionList(
const QString &wrongWord,
std::vector<QString> *optionalSuggestions) {
const auto wordScript = ::Spellchecker::WordScript(wrongWord);
const auto customGuesses = _customDict->suggest(wrongWord.toStdString());
*optionalSuggestions = ranges::views::all(
customGuesses
) | ranges::views::take(
kMaxSuggestions
) | ranges::views::transform([](auto &guess) {
return QString::fromStdString(guess);
}) | ranges::to_vector;
const auto startTime = crl::now();
_suggestionsEpoch++;
const auto savedEpoch = _suggestionsEpoch.load();
{
std::shared_lock lock(*_engineMutex);
for (const auto &engine : *_engines) {
if (_suggestionsEpoch.load() > savedEpoch) {
// There is a newer request to fill suggestion list,
// So we should drop the current one.
optionalSuggestions->clear();
break;
}
if (optionalSuggestions->size() == kMaxSuggestions
|| ((crl::now() - startTime) > kTimeLimitSuggestion)) {
break;
}
if (wordScript != engine->script()) {
continue;
}
engine->suggest(wrongWord, optionalSuggestions);
}
}
_suggestionsEpoch--;
}
// Thread: Main.
void HunspellService::ignoreWord(const QString &word) {
const auto wordScript = ::Spellchecker::WordScript(word);
_customDict->add(word.toStdString());
_ignoredWords[wordScript].push_back(word);
}
// Thread: Main.
bool HunspellService::isWordInDictionary(const QString &word) {
return ranges::contains(addedWords(word), word);
}
// Thread: Main.
void HunspellService::addWord(const QString &word) {
const auto count = ranges::accumulate(
ranges::views::values(_addedWords),
0,
ranges::plus(),
&std::vector<QString>::size);
if (count > kMaxSyncableDictionaryWords) {
return;
}
_customDict->add(word.toStdString());
addedWords(word).push_back(word);
writeToFile();
}
// Thread: Main.
void HunspellService::removeWord(const QString &word) {
_customDict->remove(word.toStdString());
auto &vector = addedWords(word);
vector.erase(ranges::remove(vector, word), end(vector));
writeToFile();
}
// Thread: Main.
void HunspellService::writeToFile() {
auto f = QFile(CustomDictionaryPath());
if (!f.open(QIODevice::WriteOnly)) {
return;
}
auto &&temp = ranges::views::join(
ranges::views::values(_addedWords)
) | ranges::views::transform([&](auto &str) {
return str + kLineBreak;
});
const auto result = ranges::accumulate(std::move(temp), QString{});
f.write(result.toUtf8());
f.close();
}
// Thread: Main.
void HunspellService::readFile() {
using namespace ::Spellchecker;
auto f = QFile(CustomDictionaryPath());
if (const auto info = QFileInfo(f);
!info.isFile()
|| (info.size() > 100 * 1024)
|| !f.open(QIODevice::ReadOnly)) {
if (info.isDir()) {
QDir(info.path()).removeRecursively();
}
return;
}
const auto data = f.readAll();
f.close();
if (data.isEmpty()) {
return;
}
// {"a", "1", "β"};
auto splitedWords = QString::fromUtf8(data).split(kLineBreak)
| ranges::to_vector
| ranges::actions::sort
| ranges::actions::unique;
auto filteredWords = (
splitedWords
) | ranges::views::filter([](auto &word) {
// Ignore words with mixed scripts or non-words characters.
return !word.isEmpty() && !IsWordSkippable(word, false);
}) | ranges::views::take(
kMaxSyncableDictionaryWords
) | ranges::views::transform([](auto &word) {
return std::move(word);
}) | ranges::to_vector;
ranges::for_each(filteredWords, [&](auto &word) {
_customDict->add(word.toStdString());
});
// {{"a"}, {"β"}};
auto groupedWords = ranges::views::all(
filteredWords
) | ranges::views::chunk_by([](auto &a, auto &b) {
return WordScript(a) == WordScript(b);
}) | ranges::views::transform([](auto &&rng) {
return rng | ranges::to_vector;
}) | ranges::to_vector;
// {QChar::Script_Latin, QChar::Script_Greek};
auto scripts = ranges::views::all(
groupedWords
) | ranges::views::transform([](auto &vector) {
return WordScript(vector.front());
}) | ranges::to_vector;
// {QChar::Script_Latin : {"a"}, QChar::Script_Greek : {"β"}};
auto &&zip = ranges::views::zip(
scripts, groupedWords
);
_addedWords = zip | ranges::to<WordsMap>();
}
////// End of HunspellService class.
HunspellService &SharedSpellChecker() {
static auto spellchecker = HunspellService();
return spellchecker;
}
} // namespace
bool CheckSpelling(const QString &wordToCheck) {
return SharedSpellChecker().checkSpelling(wordToCheck);
}
void FillSuggestionList(
const QString &wrongWord,
std::vector<QString> *optionalSuggestions) {
SharedSpellChecker().fillSuggestionList(wrongWord, optionalSuggestions);
}
void AddWord(const QString &word) {
SharedSpellChecker().addWord(word);
}
void RemoveWord(const QString &word) {
SharedSpellChecker().removeWord(word);
}
void IgnoreWord(const QString &word) {
SharedSpellChecker().ignoreWord(word);
}
bool IsWordInDictionary(const QString &wordToCheck) {
return SharedSpellChecker().isWordInDictionary(wordToCheck);
}
void UpdateLanguages(std::vector<int> languages) {
const auto languageCodes = ranges::views::all(
languages
) | ranges::views::transform(
LocaleNameFromLangId
) | ranges::to_vector;
::Spellchecker::UpdateSupportedScripts(std::vector<QString>());
SharedSpellChecker().updateLanguages(languageCodes);
}
std::vector<QString> ActiveLanguages() {
return SharedSpellChecker().activeLanguages();
}
void CheckSpellingText(
const QString &text,
MisspelledWords *misspelledWords) {
*misspelledWords = ::Spellchecker::RangesFromText(
text,
[](const QString &word) {
return !::Spellchecker::IsWordSkippable(word)
&& CheckSpelling(word);
});
}
} // namespace Platform::Spellchecker::ThirdParty