Files
allhaileris afb81b8278
Some checks failed
Docker. / Ubuntu (push) Has been cancelled
User-agent updater. / User-agent (push) Failing after 15s
Lock Threads / lock (push) Failing after 10s
Waiting for answer. / waiting-for-answer (push) Failing after 22s
Close stale issues and PRs / stale (push) Successful in 13s
Needs user action. / needs-user-action (push) Failing after 8s
Can't reproduce. / cant-reproduce (push) Failing after 8s
init
2026-02-16 15:50:16 +03:00

425 lines
11 KiB
JavaScript

const fs = require('fs')
const isEqual = require('lodash.isequal')
const SCRIPTS = {}
const include = function (src) {
// Some black magic of eval. Load the script from src to global scope. Source: https://stackoverflow.com/a/23699187/17140794
(1, eval)(src.toString())
}
async function loadScript(src) {
const script = await fetch(src)
const text = await script.text()
include(text)
}
async function loadLanguages(lngs) {
if (lngs) {
lngs = Array.isArray(lngs) ? lngs : [lngs];
for (const lng of lngs) {
await loadLanguage(lng)
}
}
}
let langNumber = 0
async function loadLanguage(lng) {
if (!components.languages[lng].title) {
return
}
await loadLanguages(components.languages[lng].optional)
await loadLanguages(components.languages[lng].require)
await loadLanguages(components.languages[lng].modify)
if (!SCRIPTS[lng]) {
SCRIPTS[lng] = true
langNumber += 1
console.log(`${langNumber} | Loading ${lng}`);
// TODO: version should probably not be hardcoded
await loadScript(`https://cdn.jsdelivr.net/npm/prismjs@1.29.0/components/prism-${lng}.min.js`)
}
}
function loadLocalLanguage(path, code, title, alias) {
include(fs.readFileSync(path))
components.languages[code] = {
title: title,
alias: alias
}
}
function unique(a, fn) {
if (a.length === 0 || a.length === 1) {
return a;
}
if (!fn) {
return a;
}
for (let i = 0; i < a.length; i++) {
for (let j = i + 1; j < a.length; j++) {
if (fn(a[i], a[j])) {
a.splice(i, 1);
}
}
}
return a;
}
function uniqlo(a, fn) {
var size = a.length;
do {
size = a.length
a = unique(a, fn)
}
while (size > a.length)
return a
}
async function generate() {
var tempPatterns = []
var tempLanguages = {}
var tempTokens = []
var tempGrammars = []
var weak = new WeakMap
function flatten(grammar) {
var keys = {}
var cache = weak.get(grammar)
if (cache !== undefined) {
return cache
}
weak.set(grammar, keys)
var copy = grammar;
var rest = copy.rest;
if (rest) {
copy = {}
Object.keys(grammar).forEach(name => {
copy[name] = grammar[name]
})
for (var token in rest) {
copy[token] = rest[token];
}
delete copy.rest;
}
function sanitize(pattern) {
// Unsupported:
// UTF-16 ranges
// [^] => [\s\S] <- matches any character, including new line
// [] => ?? <- matches _empty_ string
// All the whitelisted languages have 0xFFFF as maximum range
// This is not the case for all the grammars supported by Prisma.
pattern = pattern.replaceAll("\\uFFFF", "\\xFF");
pattern = pattern.replaceAll("[^]", "[\\s\\S]");
// TODO: This just bruteforces the regex to work, but of course
// result may vary from the original one.
//static const boost::regex hex(R"(\\u([0-9a-fA-F]{4}))");
//pattern = boost::regex_replace(pattern, hex, R"(\\xFF)");
// TODO: Again, none of the whitelisted languages use [], but others do.
// Howhever, it is unclear to me how [] is supposed to work.
pattern = pattern.replaceAll("|[])", ")");
pattern = pattern.replaceAll(":[]", ":");
return pattern
}
for (var token in copy) {
if (!copy.hasOwnProperty(token) || !copy[token]) {
continue;
}
var patterns = copy[token];
patterns = Array.isArray(patterns) ? patterns : [patterns];
var indexes = []
for (var j = 0; j < patterns.length; ++j) {
var patternObj = patterns[j];
var inside = patternObj.inside;
var lookbehind = !!patternObj.lookbehind;
var greedy = !!patternObj.greedy;
var alias = patternObj.alias;
//alias = Array.isArray(alias) ? alias : [alias];
//alias = alias.join('/')
alias = Array.isArray(alias) ? alias[0] : alias;
var pattern = patternObj.pattern || patternObj;
var patternStr = sanitize(pattern.toString())
if (lookbehind) {
patternStr += "l"
}
if (greedy) {
patternStr += "y"
}
var np
if (alias || inside) {
np = {
pattern: patternStr
}
if (alias) {
np.alias = alias
}
if (inside) {
np.inside = flatten(inside)
}
} else if (pattern instanceof RegExp) {
np = patternStr
} else {
debugger
}
tempPatterns.push(np)
indexes.push(np)
}
keys[token] = indexes
tempTokens.push(indexes)
}
tempGrammars.push(keys)
return keys
}
var unsupported = [
"bsl",
"coq",
"gherkin",
"jexl",
"kumir",
"pure",
"purescript",
"turtle",
"sparql" // requires turtle
]
await loadScript("https://cdn.jsdelivr.net/npm/prismjs@1.29.0/components/prism-core.min.js")
await loadScript("https://prismjs.com/components.js")
await loadLanguages(Object.keys(components.languages))
console.log(`\nLoaded all ${langNumber} languages`)
console.log("Processing...")
// Manually add local definitions
loadLocalLanguage('./components/prism-tl.js', 'typelanguage', 'TypeLanguage', 'tl')
Object.keys(Prism.languages).forEach(lng => {
if (unsupported.includes(lng) || !components.languages[lng]) {
return
}
tempLanguages[lng] = flatten(Prism.languages[lng])
})
var allTokens = uniqlo(tempTokens, isEqual)
var allGrammars = uniqlo(tempGrammars, isEqual)
var allPatterns = uniqlo(tempPatterns, isEqual)
Object.keys(tempLanguages).forEach(name => {
var find = allGrammars.find(x => isEqual(x, tempLanguages[name]))
if (find === undefined) {
debugger
}
tempLanguages[name] = find
})
for (var i = 0; i < allPatterns.length; i++) {
if (allPatterns[i].inside) {
var find = allGrammars.find(x => isEqual(x, allPatterns[i].inside))
if (find === undefined) {
debugger
}
allPatterns[i].inside = find
}
}
for (var i = 0; i < allTokens.length; i++) {
var token = allTokens[i]
for (var j = 0; j < token.length; j++) {
var find = allPatterns.find(x => isEqual(x, token[j]))
if (find === undefined) {
debugger
}
token[j] = find
}
}
for (var i = 0; i < allGrammars.length; i++) {
Object.keys(allGrammars[i]).forEach(name => {
var find = allTokens.find(x => isEqual(x, allGrammars[i][name]))
if (find === undefined) {
debugger
}
allGrammars[i][name] = find
})
}
for (var i = 0; i < allPatterns.length; i++) {
if (allPatterns[i].inside) {
allPatterns[i].inside = allGrammars.indexOf(allPatterns[i].inside)
}
}
for (var i = 0; i < allTokens.length; i++) {
var token = allTokens[i]
for (var j = 0; j < token.length; j++) {
token[j] = allPatterns.indexOf(token[j])
}
}
/*for (var i = 0; i < allGrammars.length; i++) {
Object.keys(allGrammars[i]).forEach(name => {
if (allGrammars[i][name].length == 1) {
allGrammars[i][name] = allGrammars[i][name][0]
}
})
}*/
for (var i = 0; i < allPatterns.length; i++) {
if (allPatterns[i].pattern) {
var patternStr = allPatterns[i].pattern + ",";
if (allPatterns[i].alias) {
patternStr += allPatterns[i].alias
}
patternStr += ","
if (allPatterns[i].inside) {
patternStr += allPatterns[i].inside
}
allPatterns[i] = patternStr
} else {
allPatterns[i] += ",,"
}
}
var allLanguages = {}
var languageNames = {}
Object.keys(tempLanguages).forEach(name => {
var find = allGrammars.find(x => isEqual(x, tempLanguages[name]))
if (find === undefined) {
debugger
}
allLanguages[name] = allGrammars.indexOf(find)
languageNames[name] = components.languages[name].title
var alias = components.languages[name].alias
if (alias) {
alias = Array.isArray(alias) ? alias : [alias];
for (const lng of alias) {
allLanguages[lng] = allGrammars.indexOf(find)
if (components.languages[name].aliasTitles) {
languageNames[lng] = components.languages[name].aliasTitles[lng]
} else {
languageNames[lng] = components.languages[name].title
}
}
}
})
var final = {
patterns: allPatterns,
grammars: allGrammars,
languages: allLanguages
}
const chunks = [];
const writeUint16 = i => chunks.push(new Uint16Array([i]))
const writeUint8 = i => chunks.push(new Uint8Array([i]))
const writeString = str => {
if (str.length < 253) {
writeUint8(str.length)
} else {
writeUint8(254 & 0xFF)
writeUint8(str.length & 0xFF)
writeUint8((str.length >> 8) & 0xFF)
writeUint8((str.length >> 16) & 0xFF)
}
chunks.push(new Uint8Array(str.split('').map(char => char.charCodeAt(0))))
}
// Patterns
writeUint16(allPatterns.length)
allPatterns.forEach(pattern => {
writeString(pattern)
})
// Grammars
writeUint16(allGrammars.length)
for (var i = 0; i < allGrammars.length; i++) {
writeUint8(Object.keys(allGrammars[i]).length)
Object.keys(allGrammars[i]).forEach(name => {
writeString(name)
writeUint8(allGrammars[i][name].length)
allGrammars[i][name].forEach(id => {
writeUint16(id)
})
})
}
// Languages
writeUint16(Object.keys(allLanguages).length)
Object.keys(allLanguages).forEach(name => {
writeString(name)
if (languageNames[name]) {
writeString(languageNames[name])
} else {
writeString("")
}
writeUint16(allLanguages[name])
})
const blob = new Blob(chunks, {type: 'application/octet-binary'});
console.log(blob)
return blob;
}
async function saveBlob(blob, filename) {
console.log(`Saving ${blob} to ${filename}`)
const buffer = Buffer.from(await blob.arrayBuffer())
fs.writeFileSync(filename, buffer)
}
const filepath = "libprisma/grammars.dat";
generate().then(blob => {
saveBlob(blob, filepath).then(() => {
console.log("Done! Saved to " + filepath)
})
})