init
Some checks failed
Docker. / Ubuntu (push) Has been cancelled
User-agent updater. / User-agent (push) Failing after 15s
Lock Threads / lock (push) Failing after 10s
Waiting for answer. / waiting-for-answer (push) Failing after 22s
Needs user action. / needs-user-action (push) Failing after 8s
Can't reproduce. / cant-reproduce (push) Failing after 8s
Close stale issues and PRs / stale (push) Has been cancelled

This commit is contained in:
allhaileris
2026-02-16 15:50:16 +03:00
commit afb81b8278
13816 changed files with 3689732 additions and 0 deletions

View File

@@ -0,0 +1 @@
from .pybind_ext import *

View File

@@ -0,0 +1,43 @@
#include <pybind11/pybind11.h>
#include <pybind11/pytypes.h>
#include <pybind11/stl.h>
#include "../src/nnet_language_identifier.h"
namespace pybind11 {
using chrome_lang_id::NNetLanguageIdentifier;
// This is conventional.
namespace py = pybind11;
PYBIND11_MODULE(pybind_ext, py_module) {
py::class_<NNetLanguageIdentifier>(py_module, "NNetLanguageIdentifier")
.def(py::init<const int, const int>(), py::arg("min_num_bytes"),
py::arg("max_num_bytes"))
.def("FindLanguage", &NNetLanguageIdentifier::FindLanguage,
py::arg("text"))
.def("FindTopNMostFreqLangs",
&NNetLanguageIdentifier::FindTopNMostFreqLangs, py::arg("text"),
py::arg("num_langs"))
.def_readonly_static("kUnknown", &NNetLanguageIdentifier::kUnknown)
.def_readonly_static("kMinNumBytesToConsider",
&NNetLanguageIdentifier::kMinNumBytesToConsider)
.def_readonly_static("kMaxNumBytesToConsider",
&NNetLanguageIdentifier::kMaxNumBytesToConsider)
.def_readonly_static("kMaxNumInputBytesToConsider",
&NNetLanguageIdentifier::kMaxNumInputBytesToConsider)
.def_readonly_static("kReliabilityThreshold",
&NNetLanguageIdentifier::kReliabilityThreshold)
.def_readonly_static("kReliabilityHrBsThreshold",
&NNetLanguageIdentifier::kReliabilityHrBsThreshold);
py::class_<NNetLanguageIdentifier::Result>(py_module, "Result")
.def_readwrite("language", &NNetLanguageIdentifier::Result::language)
.def_readwrite("probability",
&NNetLanguageIdentifier::Result::probability)
.def_readwrite("is_reliable",
&NNetLanguageIdentifier::Result::is_reliable)
.def_readwrite("proportion", &NNetLanguageIdentifier::Result::proportion);
}
} // namespace pybind11

View File

@@ -0,0 +1,43 @@
"""Tests for gcld3."""
import gcld3
import unittest
class NnetLanguageIdentifierTest(unittest.TestCase):
def testLangIdentification(self):
detector = gcld3.NNetLanguageIdentifier(min_num_bytes=0, max_num_bytes=1000)
sample = "This text is written in English."
result = detector.FindLanguage(text=sample)
self.assertEqual(result.language, "en")
self.assertTrue(result.is_reliable)
self.assertGreater(result.proportion, 0.99)
self.assertGreater(result.probability, 0.90)
def testEmptyString(self):
detector = gcld3.NNetLanguageIdentifier(
min_num_bytes=10, max_num_bytes=1000)
sample = ""
result = detector.FindLanguage(text=sample)
self.assertEqual(result.language, "und")
self.assertFalse(result.is_reliable)
self.assertEqual(result.proportion, 0.0)
self.assertEqual(result.probability, 0.00)
def testLangsIdentification(self):
detector = gcld3.NNetLanguageIdentifier(min_num_bytes=0, max_num_bytes=1000)
sample = ("This piece of text is in English. Този текст е на " "Български.")
results = detector.FindTopNMostFreqLangs(text=sample, num_langs=2)
self.assertEqual(results[0].language, "bg")
self.assertTrue(results[0].is_reliable)
self.assertLess(results[0].proportion, 0.75)
self.assertGreater(results[0].probability, 0.90)
self.assertEqual(results[1].language, "en")
self.assertTrue(results[1].is_reliable)
self.assertLess(results[1].proportion, 0.75)
self.assertGreater(results[1].probability, 0.90)
if __name__ == "__main__":
unittest.main()