Some checks failed
Docker. / Ubuntu (push) Has been cancelled
User-agent updater. / User-agent (push) Failing after 15s
Lock Threads / lock (push) Failing after 10s
Waiting for answer. / waiting-for-answer (push) Failing after 22s
Close stale issues and PRs / stale (push) Successful in 13s
Needs user action. / needs-user-action (push) Failing after 8s
Can't reproduce. / cant-reproduce (push) Failing after 8s
425 lines
11 KiB
JavaScript
425 lines
11 KiB
JavaScript
const fs = require('fs')
|
|
const isEqual = require('lodash.isequal')
|
|
|
|
const SCRIPTS = {}
|
|
const include = function (src) {
|
|
// Some black magic of eval. Load the script from src to global scope. Source: https://stackoverflow.com/a/23699187/17140794
|
|
(1, eval)(src.toString())
|
|
}
|
|
|
|
async function loadScript(src) {
|
|
const script = await fetch(src)
|
|
const text = await script.text()
|
|
include(text)
|
|
}
|
|
|
|
async function loadLanguages(lngs) {
|
|
if (lngs) {
|
|
lngs = Array.isArray(lngs) ? lngs : [lngs];
|
|
|
|
for (const lng of lngs) {
|
|
await loadLanguage(lng)
|
|
}
|
|
}
|
|
}
|
|
let langNumber = 0
|
|
async function loadLanguage(lng) {
|
|
if (!components.languages[lng].title) {
|
|
return
|
|
}
|
|
|
|
await loadLanguages(components.languages[lng].optional)
|
|
await loadLanguages(components.languages[lng].require)
|
|
await loadLanguages(components.languages[lng].modify)
|
|
|
|
if (!SCRIPTS[lng]) {
|
|
SCRIPTS[lng] = true
|
|
|
|
langNumber += 1
|
|
console.log(`${langNumber} | Loading ${lng}`);
|
|
// TODO: version should probably not be hardcoded
|
|
|
|
await loadScript(`https://cdn.jsdelivr.net/npm/prismjs@1.29.0/components/prism-${lng}.min.js`)
|
|
}
|
|
}
|
|
|
|
function loadLocalLanguage(path, code, title, alias) {
|
|
include(fs.readFileSync(path))
|
|
|
|
components.languages[code] = {
|
|
title: title,
|
|
alias: alias
|
|
}
|
|
}
|
|
|
|
function unique(a, fn) {
|
|
if (a.length === 0 || a.length === 1) {
|
|
return a;
|
|
}
|
|
if (!fn) {
|
|
return a;
|
|
}
|
|
|
|
for (let i = 0; i < a.length; i++) {
|
|
for (let j = i + 1; j < a.length; j++) {
|
|
if (fn(a[i], a[j])) {
|
|
a.splice(i, 1);
|
|
}
|
|
}
|
|
}
|
|
return a;
|
|
}
|
|
|
|
function uniqlo(a, fn) {
|
|
var size = a.length;
|
|
|
|
do {
|
|
size = a.length
|
|
a = unique(a, fn)
|
|
}
|
|
while (size > a.length)
|
|
return a
|
|
}
|
|
|
|
async function generate() {
|
|
|
|
var tempPatterns = []
|
|
var tempLanguages = {}
|
|
var tempTokens = []
|
|
var tempGrammars = []
|
|
var weak = new WeakMap
|
|
|
|
function flatten(grammar) {
|
|
var keys = {}
|
|
|
|
var cache = weak.get(grammar)
|
|
if (cache !== undefined) {
|
|
return cache
|
|
}
|
|
|
|
weak.set(grammar, keys)
|
|
|
|
var copy = grammar;
|
|
var rest = copy.rest;
|
|
if (rest) {
|
|
copy = {}
|
|
|
|
Object.keys(grammar).forEach(name => {
|
|
copy[name] = grammar[name]
|
|
})
|
|
|
|
for (var token in rest) {
|
|
copy[token] = rest[token];
|
|
}
|
|
|
|
delete copy.rest;
|
|
}
|
|
|
|
function sanitize(pattern) {
|
|
// Unsupported:
|
|
// UTF-16 ranges
|
|
// [^] => [\s\S] <- matches any character, including new line
|
|
// [] => ?? <- matches _empty_ string
|
|
|
|
// All the whitelisted languages have 0xFFFF as maximum range
|
|
// This is not the case for all the grammars supported by Prisma.
|
|
|
|
pattern = pattern.replaceAll("\\uFFFF", "\\xFF");
|
|
pattern = pattern.replaceAll("[^]", "[\\s\\S]");
|
|
|
|
// TODO: This just bruteforces the regex to work, but of course
|
|
// result may vary from the original one.
|
|
//static const boost::regex hex(R"(\\u([0-9a-fA-F]{4}))");
|
|
//pattern = boost::regex_replace(pattern, hex, R"(\\xFF)");
|
|
|
|
// TODO: Again, none of the whitelisted languages use [], but others do.
|
|
// Howhever, it is unclear to me how [] is supposed to work.
|
|
pattern = pattern.replaceAll("|[])", ")");
|
|
pattern = pattern.replaceAll(":[]", ":");
|
|
|
|
return pattern
|
|
}
|
|
|
|
for (var token in copy) {
|
|
if (!copy.hasOwnProperty(token) || !copy[token]) {
|
|
continue;
|
|
}
|
|
|
|
var patterns = copy[token];
|
|
patterns = Array.isArray(patterns) ? patterns : [patterns];
|
|
|
|
var indexes = []
|
|
|
|
for (var j = 0; j < patterns.length; ++j) {
|
|
var patternObj = patterns[j];
|
|
var inside = patternObj.inside;
|
|
var lookbehind = !!patternObj.lookbehind;
|
|
var greedy = !!patternObj.greedy;
|
|
var alias = patternObj.alias;
|
|
|
|
//alias = Array.isArray(alias) ? alias : [alias];
|
|
//alias = alias.join('/')
|
|
alias = Array.isArray(alias) ? alias[0] : alias;
|
|
|
|
var pattern = patternObj.pattern || patternObj;
|
|
var patternStr = sanitize(pattern.toString())
|
|
|
|
if (lookbehind) {
|
|
patternStr += "l"
|
|
}
|
|
if (greedy) {
|
|
patternStr += "y"
|
|
}
|
|
|
|
var np
|
|
|
|
if (alias || inside) {
|
|
np = {
|
|
pattern: patternStr
|
|
}
|
|
|
|
if (alias) {
|
|
np.alias = alias
|
|
}
|
|
if (inside) {
|
|
np.inside = flatten(inside)
|
|
}
|
|
|
|
} else if (pattern instanceof RegExp) {
|
|
np = patternStr
|
|
} else {
|
|
debugger
|
|
}
|
|
|
|
tempPatterns.push(np)
|
|
indexes.push(np)
|
|
}
|
|
|
|
keys[token] = indexes
|
|
tempTokens.push(indexes)
|
|
}
|
|
|
|
tempGrammars.push(keys)
|
|
return keys
|
|
}
|
|
|
|
var unsupported = [
|
|
"bsl",
|
|
"coq",
|
|
"gherkin",
|
|
"jexl",
|
|
"kumir",
|
|
"pure",
|
|
"purescript",
|
|
"turtle",
|
|
"sparql" // requires turtle
|
|
]
|
|
|
|
await loadScript("https://cdn.jsdelivr.net/npm/prismjs@1.29.0/components/prism-core.min.js")
|
|
await loadScript("https://prismjs.com/components.js")
|
|
await loadLanguages(Object.keys(components.languages))
|
|
console.log(`\nLoaded all ${langNumber} languages`)
|
|
console.log("Processing...")
|
|
|
|
// Manually add local definitions
|
|
loadLocalLanguage('./components/prism-tl.js', 'typelanguage', 'TypeLanguage', 'tl')
|
|
|
|
Object.keys(Prism.languages).forEach(lng => {
|
|
if (unsupported.includes(lng) || !components.languages[lng]) {
|
|
return
|
|
}
|
|
|
|
tempLanguages[lng] = flatten(Prism.languages[lng])
|
|
})
|
|
|
|
var allTokens = uniqlo(tempTokens, isEqual)
|
|
var allGrammars = uniqlo(tempGrammars, isEqual)
|
|
var allPatterns = uniqlo(tempPatterns, isEqual)
|
|
|
|
Object.keys(tempLanguages).forEach(name => {
|
|
var find = allGrammars.find(x => isEqual(x, tempLanguages[name]))
|
|
if (find === undefined) {
|
|
debugger
|
|
}
|
|
|
|
tempLanguages[name] = find
|
|
})
|
|
|
|
for (var i = 0; i < allPatterns.length; i++) {
|
|
if (allPatterns[i].inside) {
|
|
var find = allGrammars.find(x => isEqual(x, allPatterns[i].inside))
|
|
if (find === undefined) {
|
|
debugger
|
|
}
|
|
|
|
allPatterns[i].inside = find
|
|
}
|
|
}
|
|
|
|
for (var i = 0; i < allTokens.length; i++) {
|
|
var token = allTokens[i]
|
|
|
|
for (var j = 0; j < token.length; j++) {
|
|
var find = allPatterns.find(x => isEqual(x, token[j]))
|
|
if (find === undefined) {
|
|
debugger
|
|
}
|
|
|
|
token[j] = find
|
|
}
|
|
}
|
|
|
|
for (var i = 0; i < allGrammars.length; i++) {
|
|
Object.keys(allGrammars[i]).forEach(name => {
|
|
var find = allTokens.find(x => isEqual(x, allGrammars[i][name]))
|
|
if (find === undefined) {
|
|
debugger
|
|
}
|
|
|
|
allGrammars[i][name] = find
|
|
})
|
|
}
|
|
|
|
for (var i = 0; i < allPatterns.length; i++) {
|
|
if (allPatterns[i].inside) {
|
|
allPatterns[i].inside = allGrammars.indexOf(allPatterns[i].inside)
|
|
}
|
|
}
|
|
|
|
for (var i = 0; i < allTokens.length; i++) {
|
|
var token = allTokens[i]
|
|
|
|
for (var j = 0; j < token.length; j++) {
|
|
token[j] = allPatterns.indexOf(token[j])
|
|
}
|
|
}
|
|
|
|
/*for (var i = 0; i < allGrammars.length; i++) {
|
|
Object.keys(allGrammars[i]).forEach(name => {
|
|
if (allGrammars[i][name].length == 1) {
|
|
allGrammars[i][name] = allGrammars[i][name][0]
|
|
}
|
|
})
|
|
}*/
|
|
|
|
for (var i = 0; i < allPatterns.length; i++) {
|
|
if (allPatterns[i].pattern) {
|
|
var patternStr = allPatterns[i].pattern + ",";
|
|
if (allPatterns[i].alias) {
|
|
patternStr += allPatterns[i].alias
|
|
}
|
|
patternStr += ","
|
|
if (allPatterns[i].inside) {
|
|
patternStr += allPatterns[i].inside
|
|
}
|
|
|
|
allPatterns[i] = patternStr
|
|
} else {
|
|
allPatterns[i] += ",,"
|
|
}
|
|
}
|
|
|
|
var allLanguages = {}
|
|
var languageNames = {}
|
|
|
|
Object.keys(tempLanguages).forEach(name => {
|
|
var find = allGrammars.find(x => isEqual(x, tempLanguages[name]))
|
|
if (find === undefined) {
|
|
debugger
|
|
}
|
|
|
|
allLanguages[name] = allGrammars.indexOf(find)
|
|
languageNames[name] = components.languages[name].title
|
|
|
|
var alias = components.languages[name].alias
|
|
if (alias) {
|
|
alias = Array.isArray(alias) ? alias : [alias];
|
|
|
|
for (const lng of alias) {
|
|
allLanguages[lng] = allGrammars.indexOf(find)
|
|
|
|
if (components.languages[name].aliasTitles) {
|
|
languageNames[lng] = components.languages[name].aliasTitles[lng]
|
|
} else {
|
|
languageNames[lng] = components.languages[name].title
|
|
}
|
|
}
|
|
}
|
|
})
|
|
|
|
var final = {
|
|
patterns: allPatterns,
|
|
grammars: allGrammars,
|
|
languages: allLanguages
|
|
}
|
|
|
|
const chunks = [];
|
|
|
|
const writeUint16 = i => chunks.push(new Uint16Array([i]))
|
|
const writeUint8 = i => chunks.push(new Uint8Array([i]))
|
|
const writeString = str => {
|
|
if (str.length < 253) {
|
|
writeUint8(str.length)
|
|
} else {
|
|
writeUint8(254 & 0xFF)
|
|
writeUint8(str.length & 0xFF)
|
|
writeUint8((str.length >> 8) & 0xFF)
|
|
writeUint8((str.length >> 16) & 0xFF)
|
|
}
|
|
chunks.push(new Uint8Array(str.split('').map(char => char.charCodeAt(0))))
|
|
}
|
|
|
|
// Patterns
|
|
writeUint16(allPatterns.length)
|
|
|
|
allPatterns.forEach(pattern => {
|
|
writeString(pattern)
|
|
})
|
|
|
|
// Grammars
|
|
writeUint16(allGrammars.length)
|
|
|
|
for (var i = 0; i < allGrammars.length; i++) {
|
|
writeUint8(Object.keys(allGrammars[i]).length)
|
|
|
|
Object.keys(allGrammars[i]).forEach(name => {
|
|
writeString(name)
|
|
writeUint8(allGrammars[i][name].length)
|
|
allGrammars[i][name].forEach(id => {
|
|
writeUint16(id)
|
|
})
|
|
})
|
|
}
|
|
|
|
// Languages
|
|
writeUint16(Object.keys(allLanguages).length)
|
|
|
|
Object.keys(allLanguages).forEach(name => {
|
|
writeString(name)
|
|
if (languageNames[name]) {
|
|
writeString(languageNames[name])
|
|
} else {
|
|
writeString("")
|
|
}
|
|
writeUint16(allLanguages[name])
|
|
})
|
|
|
|
const blob = new Blob(chunks, {type: 'application/octet-binary'});
|
|
console.log(blob)
|
|
return blob;
|
|
}
|
|
|
|
async function saveBlob(blob, filename) {
|
|
console.log(`Saving ${blob} to ${filename}`)
|
|
const buffer = Buffer.from(await blob.arrayBuffer())
|
|
fs.writeFileSync(filename, buffer)
|
|
}
|
|
|
|
const filepath = "libprisma/grammars.dat";
|
|
|
|
generate().then(blob => {
|
|
saveBlob(blob, filepath).then(() => {
|
|
console.log("Done! Saved to " + filepath)
|
|
})
|
|
})
|