init
Some checks failed
Docker. / Ubuntu (push) Has been cancelled
User-agent updater. / User-agent (push) Failing after 15s
Lock Threads / lock (push) Failing after 10s
Waiting for answer. / waiting-for-answer (push) Failing after 22s
Needs user action. / needs-user-action (push) Failing after 8s
Can't reproduce. / cant-reproduce (push) Failing after 8s
Close stale issues and PRs / stale (push) Has been cancelled
Some checks failed
Docker. / Ubuntu (push) Has been cancelled
User-agent updater. / User-agent (push) Failing after 15s
Lock Threads / lock (push) Failing after 10s
Waiting for answer. / waiting-for-answer (push) Failing after 22s
Needs user action. / needs-user-action (push) Failing after 8s
Can't reproduce. / cant-reproduce (push) Failing after 8s
Close stale issues and PRs / stale (push) Has been cancelled
This commit is contained in:
1
Telegram/ThirdParty/cld3/gcld3/__init__.py
vendored
Normal file
1
Telegram/ThirdParty/cld3/gcld3/__init__.py
vendored
Normal file
@@ -0,0 +1 @@
|
||||
from .pybind_ext import *
|
||||
43
Telegram/ThirdParty/cld3/gcld3/pybind_ext.cc
vendored
Normal file
43
Telegram/ThirdParty/cld3/gcld3/pybind_ext.cc
vendored
Normal file
@@ -0,0 +1,43 @@
|
||||
#include <pybind11/pybind11.h>
|
||||
#include <pybind11/pytypes.h>
|
||||
#include <pybind11/stl.h>
|
||||
|
||||
#include "../src/nnet_language_identifier.h"
|
||||
|
||||
namespace pybind11 {
|
||||
|
||||
using chrome_lang_id::NNetLanguageIdentifier;
|
||||
|
||||
// This is conventional.
|
||||
namespace py = pybind11;
|
||||
|
||||
PYBIND11_MODULE(pybind_ext, py_module) {
|
||||
py::class_<NNetLanguageIdentifier>(py_module, "NNetLanguageIdentifier")
|
||||
.def(py::init<const int, const int>(), py::arg("min_num_bytes"),
|
||||
py::arg("max_num_bytes"))
|
||||
.def("FindLanguage", &NNetLanguageIdentifier::FindLanguage,
|
||||
py::arg("text"))
|
||||
.def("FindTopNMostFreqLangs",
|
||||
&NNetLanguageIdentifier::FindTopNMostFreqLangs, py::arg("text"),
|
||||
py::arg("num_langs"))
|
||||
.def_readonly_static("kUnknown", &NNetLanguageIdentifier::kUnknown)
|
||||
.def_readonly_static("kMinNumBytesToConsider",
|
||||
&NNetLanguageIdentifier::kMinNumBytesToConsider)
|
||||
.def_readonly_static("kMaxNumBytesToConsider",
|
||||
&NNetLanguageIdentifier::kMaxNumBytesToConsider)
|
||||
.def_readonly_static("kMaxNumInputBytesToConsider",
|
||||
&NNetLanguageIdentifier::kMaxNumInputBytesToConsider)
|
||||
.def_readonly_static("kReliabilityThreshold",
|
||||
&NNetLanguageIdentifier::kReliabilityThreshold)
|
||||
.def_readonly_static("kReliabilityHrBsThreshold",
|
||||
&NNetLanguageIdentifier::kReliabilityHrBsThreshold);
|
||||
|
||||
py::class_<NNetLanguageIdentifier::Result>(py_module, "Result")
|
||||
.def_readwrite("language", &NNetLanguageIdentifier::Result::language)
|
||||
.def_readwrite("probability",
|
||||
&NNetLanguageIdentifier::Result::probability)
|
||||
.def_readwrite("is_reliable",
|
||||
&NNetLanguageIdentifier::Result::is_reliable)
|
||||
.def_readwrite("proportion", &NNetLanguageIdentifier::Result::proportion);
|
||||
}
|
||||
} // namespace pybind11
|
||||
43
Telegram/ThirdParty/cld3/gcld3/tests/gcld3_test.py
vendored
Normal file
43
Telegram/ThirdParty/cld3/gcld3/tests/gcld3_test.py
vendored
Normal file
@@ -0,0 +1,43 @@
|
||||
"""Tests for gcld3."""
|
||||
|
||||
import gcld3
|
||||
import unittest
|
||||
|
||||
|
||||
class NnetLanguageIdentifierTest(unittest.TestCase):
|
||||
|
||||
def testLangIdentification(self):
|
||||
detector = gcld3.NNetLanguageIdentifier(min_num_bytes=0, max_num_bytes=1000)
|
||||
sample = "This text is written in English."
|
||||
result = detector.FindLanguage(text=sample)
|
||||
self.assertEqual(result.language, "en")
|
||||
self.assertTrue(result.is_reliable)
|
||||
self.assertGreater(result.proportion, 0.99)
|
||||
self.assertGreater(result.probability, 0.90)
|
||||
|
||||
def testEmptyString(self):
|
||||
detector = gcld3.NNetLanguageIdentifier(
|
||||
min_num_bytes=10, max_num_bytes=1000)
|
||||
sample = ""
|
||||
result = detector.FindLanguage(text=sample)
|
||||
self.assertEqual(result.language, "und")
|
||||
self.assertFalse(result.is_reliable)
|
||||
self.assertEqual(result.proportion, 0.0)
|
||||
self.assertEqual(result.probability, 0.00)
|
||||
|
||||
def testLangsIdentification(self):
|
||||
detector = gcld3.NNetLanguageIdentifier(min_num_bytes=0, max_num_bytes=1000)
|
||||
sample = ("This piece of text is in English. Този текст е на " "Български.")
|
||||
results = detector.FindTopNMostFreqLangs(text=sample, num_langs=2)
|
||||
self.assertEqual(results[0].language, "bg")
|
||||
self.assertTrue(results[0].is_reliable)
|
||||
self.assertLess(results[0].proportion, 0.75)
|
||||
self.assertGreater(results[0].probability, 0.90)
|
||||
self.assertEqual(results[1].language, "en")
|
||||
self.assertTrue(results[1].is_reliable)
|
||||
self.assertLess(results[1].proportion, 0.75)
|
||||
self.assertGreater(results[1].probability, 0.90)
|
||||
|
||||
|
||||
if __name__ == "__main__":
|
||||
unittest.main()
|
||||
Reference in New Issue
Block a user