prism/tests/identifier-test.js

228 lines
5.8 KiB
JavaScript

'use strict';
const { assert } = require('chai');
const PrismLoader = require('./helper/prism-loader');
const { languages } = require('../components.json');
const TokenStreamTransformer = require('./helper/token-stream-transformer');
// This is where you can exclude a language from the identifier test.
//
// To exclude a language to the `testOptions` variable and add your language and the identifier types it should
// excluded from. All languages opt-in for all identifier types by default, so you have to explicitly disable each type
// you want to disable by setting it to `false`.
// Also add a small comment explaining why the language was excluded.
//
// The actual identifiers for all identifier types are defined in the `identifiers` variable.
/**
* @type {Partial<Record<keyof import("../components.json")["languages"], IdentifierTestOptions>>}
*
* @typedef IdentifierTestOptions
* @property {boolean} [word=true]
* @property {boolean} [number=true]
* @property {boolean} [template=true]
*/
const testOptions = {
// all of these have a special syntax for tokens of the form __something__
'asciidoc': {
template: false
},
'markdown': {
template: false
},
'textile': {
template: false
},
'false': {
word: false,
template: false
},
// LilyPond doesn't tokenize based on words
'lilypond': {
word: false,
number: false,
template: false,
},
// Nevod uses underscore symbol as operator and allows hyphen to be part of identifier
'nevod': {
word: false,
template: false,
},
};
/** @type {Record<keyof IdentifierTestOptions, string[]>} */
const identifiers = {
word: [
'abc',
'word',
'foo1',
'foo123',
'foo123bar',
'foo_123',
'foo_123_bar',
],
number: [
'0',
'1',
'9',
'123',
'123456789',
],
template: [
'__PHP0__',
'__LANG0__',
'__LANG123__',
'___PLACEHOLDER_0___',
'___PLACEHOLDER_123___',
],
};
// Below is the implementation of the test.
// If you only came here to exclude a language, you won't find anything below.
/** @type {Record<string, string>} */
const aliasMap = {};
for (const name in languages) {
const element = languages[name];
if (element.alias) {
if (Array.isArray(element.alias)) {
element.alias.forEach(a => {
aliasMap[a] = name;
});
} else {
aliasMap[element.alias] = name;
}
}
}
for (const lang in languages) {
if (lang === 'meta') {
continue;
}
describe(`Test '${lang}'`, function () {
const Prism = PrismLoader.createInstance(lang);
testLiterals(Prism, lang);
});
function toArray(value) {
if (Array.isArray(value)) {
return value;
} else if (value != null) {
return [value];
} else {
return [];
}
}
let optional = toArray(languages[lang].optional);
let modify = toArray(languages[lang].modify);
if (optional.length > 0 || modify.length > 0) {
let name = `Test '${lang}'`;
if (optional.length > 0) {
name += ` + optional dependencies '${optional.join("', '")}'`;
}
if (modify.length > 0) {
name += ` + modify dependencies '${modify.join("', '")}'`;
}
describe(name, function () {
const Prism = PrismLoader.createInstance([...optional, ...modify, lang]);
testLiterals(Prism, lang);
});
}
}
/**
* @param {string} lang
* @returns {IdentifierTestOptions}
*/
function getOptions(lang) {
return testOptions[aliasMap[lang] || lang] || {};
}
/**
* @param {string | Token | (string | Token)[]} token
* @returns {boolean}
*
* @typedef Token
* @property {string} type
* @property {string | Token | (string | Token)[]} content
*/
function isNotBroken(token) {
if (typeof token === 'string') {
return true;
} else if (Array.isArray(token)) {
return token.length === 1 && isNotBroken(token[0]);
} else {
return isNotBroken(token.content);
}
}
/**
* Tests all patterns in the given Prism instance.
*
* @param {any} Prism
* @param {string} lang
*/
function testLiterals(Prism, lang) {
/**
* @param {string[]} identifierElements
* @param {keyof IdentifierTestOptions} identifierType
*/
function matchNotBroken(identifierElements, identifierType) {
for (const name in Prism.languages) {
const grammar = Prism.languages[name];
if (typeof grammar !== 'object') {
continue;
}
const options = getOptions(name);
if (options[identifierType] === false) {
continue;
}
for (const ident of identifierElements) {
const tokens = Prism.tokenize(ident, grammar);
if (!isNotBroken(tokens)) {
assert.fail(
`${name}: Failed to tokenize the ${identifierType} '${ident}' as one or no token.\n` +
'Actual token stream:\n\n' +
TokenStreamTransformer.prettyprint(tokens) +
'\n\n' +
'How to fix this:\n' +
'If your language failed any of the identifier tests then some patterns in your language can break identifiers. ' +
'An identifier is broken if it is split into two different token (e.g. the identifier \'foo123\' (this could be a variable name) but \'123\' is tokenized as a number). ' +
'This is usually a bug and means that some patterns need more boundary checking.\n' +
'This test defines an identifier as /[A-Za-z_][A-Za-z_0-9]*/ so you can use \\b boundary assertions.\n\n' +
'If the syntactic concept of an identifier is not applicable to your language, you can exclude your language from this test (or parts of it). ' +
'Open \'' + __filename + '\' and follow the instructions to exclude a language. ' +
'(This is usually not what you should do. Only very few language do not have the concept of identifiers.)'
);
}
}
}
}
const options = getOptions(lang);
for (const key in identifiers) {
const identifierType = /** @type {keyof IdentifierTestOptions} */ (key);
const element = identifiers[identifierType];
if (options[identifierType] !== false) {
it(`- should not break ${identifierType} identifiers`, function () {
matchNotBroken(element, identifierType);
});
}
}
}