Regex: Fixed char-class/char-set confusion (#3124)

This commit is contained in:
Michael Schmidt 2021-10-05 21:30:45 +02:00 committed by GitHub
parent 09a0e2ba1b
commit 4dde2e20e6
No known key found for this signature in database
GPG Key ID: 4AEE18F83AFDEB23
8 changed files with 91 additions and 93 deletions

View File

@ -5,11 +5,11 @@
alias: 'escape'
};
var escape = /\\(?:x[\da-fA-F]{2}|u[\da-fA-F]{4}|u\{[\da-fA-F]+\}|0[0-7]{0,2}|[123][0-7]{2}|c[a-zA-Z]|.)/;
var charClass = {
var charSet = {
pattern: /\.|\\[wsd]|\\p\{[^{}]+\}/i,
alias: 'class-name'
};
var charClassWithoutDot = {
var charSetWithoutDot = {
pattern: /\\[wsd]|\\p\{[^{}]+\}/i,
alias: 'class-name'
};
@ -25,16 +25,16 @@
};
Prism.languages.regex = {
'charset': {
'char-class': {
pattern: /((?:^|[^\\])(?:\\\\)*)\[(?:[^\\\]]|\\[\s\S])*\]/,
lookbehind: true,
inside: {
'charset-negation': {
'char-class-negation': {
pattern: /(^\[)\^/,
lookbehind: true,
alias: 'operator'
},
'charset-punctuation': {
'char-class-punctuation': {
pattern: /^\[|\]$/,
alias: 'punctuation'
},
@ -49,12 +49,12 @@
}
},
'special-escape': specialEscape,
'charclass': charClassWithoutDot,
'char-set': charSetWithoutDot,
'escape': escape
}
},
'special-escape': specialEscape,
'charclass': charClass,
'char-set': charSet,
'backreference': [
{
// a backreference which is not an octal escape

View File

@ -1 +1 @@
!function(a){var e={pattern:/\\[\\(){}[\]^$+*?|.]/,alias:"escape"},n=/\\(?:x[\da-fA-F]{2}|u[\da-fA-F]{4}|u\{[\da-fA-F]+\}|0[0-7]{0,2}|[123][0-7]{2}|c[a-zA-Z]|.)/,t="(?:[^\\\\-]|"+n.source+")",s=RegExp(t+"-"+t),i={pattern:/(<|')[^<>']+(?=[>']$)/,lookbehind:!0,alias:"variable"};a.languages.regex={charset:{pattern:/((?:^|[^\\])(?:\\\\)*)\[(?:[^\\\]]|\\[\s\S])*\]/,lookbehind:!0,inside:{"charset-negation":{pattern:/(^\[)\^/,lookbehind:!0,alias:"operator"},"charset-punctuation":{pattern:/^\[|\]$/,alias:"punctuation"},range:{pattern:s,inside:{escape:n,"range-punctuation":{pattern:/-/,alias:"operator"}}},"special-escape":e,charclass:{pattern:/\\[wsd]|\\p\{[^{}]+\}/i,alias:"class-name"},escape:n}},"special-escape":e,charclass:{pattern:/\.|\\[wsd]|\\p\{[^{}]+\}/i,alias:"class-name"},backreference:[{pattern:/\\(?![123][0-7]{2})[1-9]/,alias:"keyword"},{pattern:/\\k<[^<>']+>/,alias:"keyword",inside:{"group-name":i}}],anchor:{pattern:/[$^]|\\[ABbGZz]/,alias:"function"},escape:n,group:[{pattern:/\((?:\?(?:<[^<>']+>|'[^<>']+'|[>:]|<?[=!]|[idmnsuxU]+(?:-[idmnsuxU]+)?:?))?/,alias:"punctuation",inside:{"group-name":i}},{pattern:/\)/,alias:"punctuation"}],quantifier:{pattern:/(?:[+*?]|\{\d+(?:,\d*)?\})[?+]?/,alias:"number"},alternation:{pattern:/\|/,alias:"keyword"}}}(Prism);
!function(a){var e={pattern:/\\[\\(){}[\]^$+*?|.]/,alias:"escape"},n=/\\(?:x[\da-fA-F]{2}|u[\da-fA-F]{4}|u\{[\da-fA-F]+\}|0[0-7]{0,2}|[123][0-7]{2}|c[a-zA-Z]|.)/,t="(?:[^\\\\-]|"+n.source+")",s=RegExp(t+"-"+t),i={pattern:/(<|')[^<>']+(?=[>']$)/,lookbehind:!0,alias:"variable"};a.languages.regex={"char-class":{pattern:/((?:^|[^\\])(?:\\\\)*)\[(?:[^\\\]]|\\[\s\S])*\]/,lookbehind:!0,inside:{"char-class-negation":{pattern:/(^\[)\^/,lookbehind:!0,alias:"operator"},"char-class-punctuation":{pattern:/^\[|\]$/,alias:"punctuation"},range:{pattern:s,inside:{escape:n,"range-punctuation":{pattern:/-/,alias:"operator"}}},"special-escape":e,"char-set":{pattern:/\\[wsd]|\\p\{[^{}]+\}/i,alias:"class-name"},escape:n}},"special-escape":e,"char-set":{pattern:/\.|\\[wsd]|\\p\{[^{}]+\}/i,alias:"class-name"},backreference:[{pattern:/\\(?![123][0-7]{2})[1-9]/,alias:"keyword"},{pattern:/\\k<[^<>']+>/,alias:"keyword",inside:{"group-name":i}}],anchor:{pattern:/[$^]|\\[ABbGZz]/,alias:"function"},escape:n,group:[{pattern:/\((?:\?(?:<[^<>']+>|'[^<>']+'|[>:]|<?[=!]|[idmnsuxU]+(?:-[idmnsuxU]+)?:?))?/,alias:"punctuation",inside:{"group-name":i}},{pattern:/\)/,alias:"punctuation"}],quantifier:{pattern:/(?:[+*?]|\{\d+(?:,\d*)?\})[?+]?/,alias:"number"},alternation:{pattern:/\|/,alias:"keyword"}}}(Prism);

View File

@ -9,17 +9,17 @@
"a",
["quantifier", "+"],
["group", ["(?:"]],
["charset", [
["charset-punctuation", "["],
["char-class", [
["char-class-punctuation", "["],
["range", [
"a",
["range-punctuation", "-"],
"z"
]],
["charset-punctuation", "]"]
["char-class-punctuation", "]"]
]],
["alternation", "|"],
["charclass", "\\d"],
["char-set", "\\d"],
["group", ")"],
["quantifier", "?"]
]],

View File

@ -0,0 +1,48 @@
[]
[^]
[foo]
[\]\b]
[.^$\1]
[\d\D\p{L}]
----------------------------------------------------
[
["char-class", [
["char-class-punctuation", "["],
["char-class-punctuation", "]"]
]],
["char-class", [
["char-class-punctuation", "["],
["char-class-negation", "^"],
["char-class-punctuation", "]"]
]],
["char-class", [
["char-class-punctuation", "["],
"foo",
["char-class-punctuation", "]"]
]],
["char-class", [
["char-class-punctuation", "["],
["special-escape", "\\]"],
["escape", "\\b"],
["char-class-punctuation", "]"]
]],
["char-class", [
["char-class-punctuation", "["],
".^$",
["escape", "\\1"],
["char-class-punctuation", "]"]
]],
["char-class", [
["char-class-punctuation", "["],
["char-set", "\\d"],
["char-set", "\\D"],
["char-set", "\\p{L}"],
["char-class-punctuation", "]"]
]]
]
----------------------------------------------------
Checks for character sets.

View File

@ -0,0 +1,21 @@
.
\w \W
\s \S
\d \D
\p{ASCII}
\P{ASCII}
----------------------------------------------------
[
["char-set", "."],
["char-set", "\\w"], ["char-set", "\\W"],
["char-set", "\\s"], ["char-set", "\\S"],
["char-set", "\\d"], ["char-set", "\\D"],
["char-set", "\\p{ASCII}"],
["char-set", "\\P{ASCII}"]
]
----------------------------------------------------
Checks for character classes.

View File

@ -1,25 +0,0 @@
.
\w \W
\s \S
\d \D
\p{ASCII}
\P{ASCII}
----------------------------------------------------
[
["charclass", "."],
["charclass", "\\w"],
["charclass", "\\W"],
["charclass", "\\s"],
["charclass", "\\S"],
["charclass", "\\d"],
["charclass", "\\D"],
["charclass", "\\p{ASCII}"],
["charclass", "\\P{ASCII}"]
]
----------------------------------------------------
Checks for character classes.

View File

@ -1,44 +0,0 @@
[]
[^]
[foo]
[\]\b]
[.^$\1]
----------------------------------------------------
[
["charset", [
["charset-punctuation", "["],
["charset-punctuation", "]"]
]],
["charset", [
["charset-punctuation", "["],
["charset-negation", "^"],
["charset-punctuation", "]"]
]],
["charset", [
["charset-punctuation", "["],
"foo",
["charset-punctuation", "]"]
]],
["charset", [
["charset-punctuation", "["],
["special-escape", "\\]"],
["escape", "\\b"],
["charset-punctuation", "]"]
]],
["charset", [
["charset-punctuation", "["],
".^$",
["escape", "\\1"],
["charset-punctuation", "]"]
]]
]
----------------------------------------------------
Checks for character sets.

View File

@ -5,8 +5,8 @@
----------------------------------------------------
[
["charset", [
["charset-punctuation", "["],
["char-class", [
["char-class-punctuation", "["],
["range", [
"a",
["range-punctuation", "-"],
@ -22,11 +22,10 @@
["range-punctuation", "-"],
"9"
]],
["charset-punctuation", "]"]
["char-class-punctuation", "]"]
]],
["charset", [
["charset-punctuation", "["],
["char-class", [
["char-class-punctuation", "["],
["range", [
["escape", "\\xa1"],
["range-punctuation", "-"],
@ -37,14 +36,13 @@
["range-punctuation", "-"],
["escape", "\\u{256}"]
]],
["charset-punctuation", "]"]
["char-class-punctuation", "]"]
]],
["charset", [
["charset-punctuation", "["],
["charset-negation", "^"],
["char-class", [
["char-class-punctuation", "["],
["char-class-negation", "^"],
"-aaa-",
["charset-punctuation", "]"]
["char-class-punctuation", "]"]
]]
]