Added regex coverage (#3138)

This commit is contained in:
Michael Schmidt 2021-10-19 19:34:41 +02:00 committed by GitHub
parent 2e834c8c9d
commit 5333e28106
No known key found for this signature in database
GPG Key ID: 4AEE18F83AFDEB23
8 changed files with 371 additions and 73 deletions

View File

@ -57,3 +57,16 @@ jobs:
node-version: 14.x
- run: npm ci
- run: npm run lint:ci
coverage:
runs-on: ubuntu-latest
steps:
- uses: actions/checkout@v2
- name: Use Node.js 14.x
uses: actions/setup-node@v1
with:
node-version: 14.x
- run: npm ci
- run: npm run regex-coverage

View File

@ -14,6 +14,7 @@
"lint": "eslint . --cache",
"lint:fix": "npm run lint -- --fix",
"lint:ci": "eslint . --max-warnings 0",
"regex-coverage": "mocha tests/coverage.js",
"test:aliases": "mocha tests/aliases-test.js",
"test:core": "mocha tests/core/**/*.js",
"test:dependencies": "mocha tests/dependencies-test.js",

260
tests/coverage.js Normal file
View File

@ -0,0 +1,260 @@
'use strict';
const TestDiscovery = require('./helper/test-discovery');
const TestCase = require('./helper/test-case');
const PrismLoader = require('./helper/prism-loader');
const { BFS, BFSPathToPrismTokenPath } = require('./helper/util');
const { assert } = require('chai');
const components = require('../components.json');
const ALL_LANGUAGES = [...Object.keys(components.languages).filter(k => k !== 'meta')];
describe('Pattern test coverage', function () {
/**
* @type {Map<string, PatternData>}
* @typedef PatternData
* @property {RegExp} pattern
* @property {string} language
* @property {Set<string>} from
* @property {RegExpExecArray[]} matches
*/
const patterns = new Map();
/**
* @param {string | string[]} languages
* @returns {import("./helper/prism-loader").Prism}
*/
function createInstance(languages) {
const Prism = PrismLoader.createInstance(languages);
BFS(Prism.languages, (path, object) => {
const { key, value } = path[path.length - 1];
const tokenPath = BFSPathToPrismTokenPath(path);
if (Object.prototype.toString.call(value) == '[object RegExp]') {
const regex = makeGlobal(value);
object[key] = regex;
const patternKey = String(regex);
let data = patterns.get(patternKey);
if (!data) {
data = {
pattern: regex,
language: path[1].key,
from: new Set([tokenPath]),
matches: []
};
patterns.set(patternKey, data);
} else {
data.from.add(tokenPath);
}
regex.exec = string => {
let match = RegExp.prototype.exec.call(regex, string);
if (match) {
data.matches.push(match);
}
return match;
};
}
});
return Prism;
}
describe('Register all patterns', function () {
it('all', function () {
this.slow(10 * 1000);
// This will cause ALL regexes of Prism to be registered in the patterns map.
// (Languages that don't have any tests can't be caught otherwise.)
createInstance(ALL_LANGUAGES);
});
});
describe('Run all language tests', function () {
// define tests for all tests in all languages in the test suite
for (const [languageIdentifier, files] of TestDiscovery.loadAllTests()) {
it(languageIdentifier, function () {
this.timeout(10 * 1000);
for (const filePath of files) {
try {
TestCase.run({
languageIdentifier,
filePath,
updateMode: 'none',
createInstance
});
} catch (error) {
// we don't case about whether the test succeeds,
// we just want to gather usage data
}
}
});
}
});
describe('Coverage', function () {
for (const language of ALL_LANGUAGES) {
describe(language, function () {
it(`- should cover all patterns`, function () {
const untested = getAllOf(language).filter(d => d.matches.length === 0);
if (untested.length === 0) {
return;
}
const problems = untested.map(data => {
return formatProblem(data, [
'This pattern is completely untested. Add test files that match this pattern.'
]);
});
assert.fail([
`${problems.length} pattern(s) are untested:\n`
+ 'You can learn more about writing tests at https://prismjs.com/test-suite.html#writing-tests',
...problems
].join('\n\n'));
});
it(`- should exhaustively cover all keywords in keyword lists`, function () {
const problems = [];
for (const data of getAllOf(language)) {
if (data.matches.length === 0) {
// don't report the same pattern twice
continue;
}
const keywords = getKeywordList(data.pattern);
if (!keywords) {
continue;
}
const keywordCount = keywords.size;
data.matches.forEach(([m]) => {
if (data.pattern.ignoreCase) {
m = m.toUpperCase();
}
keywords.delete(m);
});
if (keywords.size > 0) {
problems.push(formatProblem(data, [
`Add test files to test all keywords. The following keywords (${keywords.size}/${keywordCount}) are untested:`,
...[...keywords].map(k => ` ${k}`)
]));
}
}
if (problems.length === 0) {
return;
}
assert.fail([
`${problems.length} keyword list(s) are not exhaustively tested:\n`
+ 'You can learn more about writing tests at https://prismjs.com/test-suite.html#writing-tests',
...problems
].join('\n\n'));
});
});
}
});
/**
* @param {string} language
* @returns {PatternData[]}
*/
function getAllOf(language) {
return [...patterns.values()].filter(d => d.language === language);
}
/**
* @param {string} string
* @param {number} maxLength
* @returns {string}
*/
function short(string, maxLength) {
if (string.length > maxLength) {
return string.slice(0, maxLength - 1) + '…';
} else {
return string;
}
}
/**
* If the given pattern string describes a keyword list, all keyword will be returned. Otherwise, `null` will be
* returned.
*
* @param {RegExp} pattern
* @returns {Set<string> | null}
*/
function getKeywordList(pattern) {
// Right now, only keyword lists of the form /\b(?:foo|bar)\b/ are supported.
// In the future, we might want to convert these regexes to NFAs and iterate all words to cover more complex
// keyword lists and even operator and punctuation lists.
let source = pattern.source.replace(/^\\b|\\b$/g, '');
if (source.startsWith('(?:') && source.endsWith(')')) {
source = source.slice('(?:'.length, source.length - ')'.length);
}
if (/^\w+(?:\|\w+)*$/.test(source)) {
if (pattern.ignoreCase) {
source = source.toUpperCase();
}
return new Set(source.split(/\|/g));
} else {
return null;
}
}
/**
* @param {Iterable<string>} occurrences
* @returns {{ origin: string; otherOccurrences: string[] }}
*/
function splitOccurrences(occurrences) {
const all = [...occurrences];
return {
origin: all[0],
otherOccurrences: all.slice(1),
};
}
/**
* @param {PatternData} data
* @param {string[]} messageLines
* @returns {string}
*/
function formatProblem(data, messageLines) {
const { origin, otherOccurrences } = splitOccurrences(data.from);
const lines = [
`${origin}:`,
short(String(data.pattern), 100),
'',
...messageLines,
];
if (otherOccurrences.length) {
lines.push(
'',
'Other occurrences of this pattern:',
...otherOccurrences.map(o => `- ${o}`)
);
}
return lines.join('\n ');
}
});
/**
* @param {RegExp} regex
* @returns {RegExp}
*/
function makeGlobal(regex) {
if (regex.global) {
return regex;
} else {
return RegExp(regex.source, regex.flags + 'g');
}
}

View File

@ -1,6 +1,7 @@
'use strict';
const fs = require('fs');
const path = require('path');
const { assert } = require('chai');
const Prettier = require('prettier');
const PrismLoader = require('./prism-loader');
@ -11,6 +12,12 @@ const TokenStreamTransformer = require('./token-stream-transformer');
* @typedef {import("../../components/prism-core.js")} Prism
*/
/**
* @param {string[]} languages
* @returns {Prism}
*/
const defaultCreateInstance = (languages) => PrismLoader.createInstance(languages);
/**
* Handles parsing and printing of a test case file.
*
@ -297,6 +304,29 @@ class HighlightHTMLRunner {
module.exports = {
TestCaseFile,
/**
* Runs the given test file and asserts the result.
*
* This function will determine what kind of test files the given file is and call the appropriate method to run the
* test.
*
* @param {RunOptions} options
* @returns {void}
*
* @typedef RunOptions
* @property {string} languageIdentifier
* @property {string} filePath
* @property {"none" | "insert" | "update"} updateMode
* @property {(languages: string[]) => Prism} [createInstance]
*/
run(options) {
if (path.extname(options.filePath) === '.test') {
this.runTestCase(options.languageIdentifier, options.filePath, options.updateMode, options.createInstance);
} else {
this.runTestsWithHooks(options.languageIdentifier, require(options.filePath), options.createInstance);
}
},
/**
* Runs the given test case file and asserts the result
*
@ -312,13 +342,16 @@ module.exports = {
* @param {string} languageIdentifier
* @param {string} filePath
* @param {"none" | "insert" | "update"} updateMode
* @param {(languages: string[]) => Prism} [createInstance]
*/
runTestCase(languageIdentifier, filePath, updateMode) {
runTestCase(languageIdentifier, filePath, updateMode, createInstance = defaultCreateInstance) {
let runner;
if (/\.html\.test$/i.test(filePath)) {
this.runTestCaseWithRunner(languageIdentifier, filePath, updateMode, new HighlightHTMLRunner());
runner = new HighlightHTMLRunner();
} else {
this.runTestCaseWithRunner(languageIdentifier, filePath, updateMode, new TokenizeJSONRunner());
runner = new TokenizeJSONRunner();
}
this.runTestCaseWithRunner(languageIdentifier, filePath, updateMode, runner, createInstance);
},
/**
@ -326,13 +359,14 @@ module.exports = {
* @param {string} filePath
* @param {"none" | "insert" | "update"} updateMode
* @param {Runner<T>} runner
* @param {(languages: string[]) => Prism} createInstance
* @template T
*/
runTestCaseWithRunner(languageIdentifier, filePath, updateMode, runner) {
runTestCaseWithRunner(languageIdentifier, filePath, updateMode, runner, createInstance) {
const testCase = TestCaseFile.readFromFile(filePath);
const usedLanguages = this.parseLanguageNames(languageIdentifier);
const Prism = PrismLoader.createInstance(usedLanguages.languages);
const Prism = createInstance(usedLanguages.languages);
// the first language is the main language to highlight
const actualValue = runner.run(Prism, testCase.code, usedLanguages.mainLanguage);

View File

@ -3,41 +3,37 @@
const fs = require('fs');
const path = require('path');
const LANGUAGES_DIR = path.join(__dirname, '..', 'languages');
module.exports = {
/**
* Loads the list of all available tests
*
* @param {string} rootDir
* @returns {Object<string, string[]>}
* @param {string} [rootDir]
* @returns {Map<string, string[]>}
*/
loadAllTests(rootDir) {
/** @type {Object.<string, string[]>} */
const testSuite = {};
rootDir = rootDir || LANGUAGES_DIR;
for (const language of this.getAllDirectories(rootDir)) {
testSuite[language] = this.getAllFiles(path.join(rootDir, language));
}
return testSuite;
return new Map(this.getAllDirectories(rootDir).map(language => {
return [language, this.getAllFiles(path.join(rootDir, language))];
}));
},
/**
* Loads the list of available tests that match the given languages
*
* @param {string} rootDir
* @param {string|string[]} languages
* @returns {Object<string, string[]>}
* @param {string} [rootDir]
* @returns {Map<string, string[]>}
*/
loadSomeTests(rootDir, languages) {
/** @type {Object.<string, string[]>} */
const testSuite = {};
loadSomeTests(languages, rootDir) {
rootDir = rootDir || LANGUAGES_DIR;
for (const language of this.getSomeDirectories(rootDir, languages)) {
testSuite[language] = this.getAllFiles(path.join(rootDir, language));
}
return testSuite;
return new Map(this.getSomeDirectories(rootDir, languages).map(language => {
return [language, this.getAllFiles(path.join(rootDir, language))];
}));
},

View File

@ -19,7 +19,7 @@ module.exports = {
* Performs a breadth-first search on the given start element.
*
* @param {any} start
* @param {(path: { key: string, value: any }[]) => void} callback
* @param {(path: { key: string, value: any }[], obj: Record<string, any>) => void} callback
*/
BFS(start, callback) {
const visited = new Set();
@ -28,8 +28,6 @@ module.exports = {
[{ key: null, value: start }]
];
callback(toVisit[0]);
while (toVisit.length > 0) {
/** @type {{ key: string, value: any }[][]} */
const newToVisit = [];
@ -43,7 +41,7 @@ module.exports = {
const value = obj[key];
path.push({ key, value });
callback(path);
callback(path, obj);
if (Array.isArray(value) || Object.prototype.toString.call(value) == '[object Object]') {
newToVisit.push([...path]);
@ -58,6 +56,30 @@ module.exports = {
}
},
/**
* Given the `BFS` path given to `BFS` callbacks, this will return the Prism language token path of the current
* value (e.g. `Prism.languages.xml.tag.pattern`).
*
* @param {readonly{ key: string, value: any }[]} path
* @param {string} [root]
* @returns {string}
*/
BFSPathToPrismTokenPath(path, root = 'Prism.languages') {
let tokenPath = root;
for (const { key } of path) {
if (!key) {
// do nothing
} else if (/^\d+$/.test(key)) {
tokenPath += `[${key}]`;
} else if (/^[a-z]\w*$/i.test(key)) {
tokenPath += `.${key}`;
} else {
tokenPath += `[${JSON.stringify(key)}]`;
}
}
return tokenPath;
},
/**
* Returns the AST of a given pattern.
*

View File

@ -5,7 +5,7 @@ const { assert } = require('chai');
const PrismLoader = require('./helper/prism-loader');
const TestDiscovery = require('./helper/test-discovery');
const TestCase = require('./helper/test-case');
const { BFS, parseRegex } = require('./helper/util');
const { BFS, BFSPathToPrismTokenPath, parseRegex } = require('./helper/util');
const { languages } = require('../components.json');
const { visitRegExpAST } = require('regexpp');
const { transform, combineTransformers, getIntersectionWordSets, JS, Words, NFA, Transformers } = require('refa');
@ -19,8 +19,8 @@ const RAA = require('regexp-ast-analysis');
* @type {Map<string, string[]>}
*/
const testSnippets = new Map();
const testSuite = TestDiscovery.loadAllTests(__dirname + '/languages');
for (const languageIdentifier in testSuite) {
const testSuite = TestDiscovery.loadAllTests();
for (const [languageIdentifier, files] of testSuite) {
const lang = TestCase.parseLanguageNames(languageIdentifier).mainLanguage;
let snippets = testSnippets.get(lang);
if (snippets === undefined) {
@ -28,7 +28,7 @@ for (const languageIdentifier in testSuite) {
testSnippets.set(lang, snippets);
}
for (const file of testSuite[languageIdentifier]) {
for (const file of files) {
snippets.push(TestCase.TestCaseFile.readFromFile(file).code);
}
}
@ -90,27 +90,6 @@ function testPatterns(Prism, mainLanguage) {
.filter(lang => lang in Prism.languages);
}
/**
* @param {string} root
* @param {Parameters<Parameters<typeof BFS>[1]>[0]} path
* @returns {string}
*/
function BFSPathToString(root, path) {
let pathStr = root;
for (const { key } of path) {
if (!key) {
// do nothing
} else if (/^\d+$/.test(key)) {
pathStr += `[${key}]`;
} else if (/^[a-z]\w*$/i.test(key)) {
pathStr += `.${key}`;
} else {
pathStr += `[${JSON.stringify(key)}]`;
}
}
return pathStr;
}
/**
* Invokes the given function on every pattern in `Prism.languages`.
*
@ -146,10 +125,9 @@ function testPatterns(Prism, mainLanguage) {
BFS(root, path => {
const { key, value } = path[path.length - 1];
const tokenPath = BFSPathToPrismTokenPath(path, rootStr);
visited.add(value);
const tokenPath = BFSPathToString(rootStr, path);
if (Object.prototype.toString.call(value) == '[object RegExp]') {
try {
let ast;

View File

@ -8,29 +8,23 @@ const { argv } = require('yargs');
const testSuite =
(argv.language)
? TestDiscovery.loadSomeTests(__dirname + '/languages', argv.language)
? TestDiscovery.loadSomeTests(argv.language)
// load complete test suite
: TestDiscovery.loadAllTests(__dirname + '/languages');
: TestDiscovery.loadAllTests();
const update = !!argv.update;
// define tests for all tests in all languages in the test suite
for (const language in testSuite) {
if (!testSuite.hasOwnProperty(language)) {
continue;
}
for (const [languageIdentifier, files] of testSuite) {
describe("Testing language '" + languageIdentifier + "'", function () {
this.timeout(10000);
(function (language, testFiles) {
describe("Testing language '" + language + "'", function () {
this.timeout(10000);
for (const filePath of files) {
const fileName = path.basename(filePath, path.extname(filePath));
for (const filePath of testFiles) {
const fileName = path.basename(filePath, path.extname(filePath));
it(" should pass test case '" + fileName + "'", function () {
TestCase.runTestCase(language, filePath, update ? 'update' : 'insert');
});
}
});
}(language, testSuite[language]));
it(" should pass test case '" + fileName + "'", function () {
TestCase.runTestCase(languageIdentifier, filePath, update ? 'update' : 'insert');
});
}
});
}