Concurnas: Improved tokenization (#3189)

This commit is contained in:
Michael Schmidt 2021-11-22 13:12:41 +01:00 committed by GitHub
parent 6af8a6447d
commit 7b34e65d73
No known key found for this signature in database
GPG Key ID: 4AEE18F83AFDEB23
8 changed files with 107 additions and 38 deletions

View File

@ -1,19 +1,20 @@
Prism.languages.concurnas = {
'comment': [
{
pattern: /(^|[^\\])\/\*[\s\S]*?(?:\*\/|$)/,
lookbehind: true
},
{
pattern: /(^|[^\\:])\/\/.*/,
lookbehind: true,
greedy: true
}
],
'comment': {
pattern: /(^|[^\\])(?:\/\*[\s\S]*?(?:\*\/|$)|\/\/.*)/,
lookbehind: true,
greedy: true
},
'langext': {
pattern: /\b\w+\s*\|\|[\s\S]+?\|\|/,
greedy: true,
alias: 'string'
inside: {
'class-name': /^\w+/,
'string': {
pattern: /(^\s*\|\|)[\s\S]+(?=\|\|$)/,
lookbehind: true
},
'punctuation': /\|\|/
}
},
'function': {
pattern: /((?:^|\s)def[ \t]+)[a-zA-Z_]\w*(?=\s*\()/,
@ -23,7 +24,7 @@ Prism.languages.concurnas = {
'boolean': /\b(?:false|true)\b/,
'number': /\b0b[01][01_]*L?\b|\b0x(?:[\da-f_]*\.)?[\da-f_p+-]+\b|(?:\b\d[\d_]*(?:\.[\d_]*)?|\B\.\d[\d_]*)(?:e[+-]?\d[\d_]*)?[dfls]?/i,
'punctuation': /[{}[\];(),.:]/,
'operator': /<==|>==|=>|->|<-|<>|\^|&==|&<>|!|\?:?|\.\?|\+\+|--|[-+*/=<>]=?|\b(?:and|as|band|bor|bxor|comp|is|isnot|mod|or)\b=?/,
'operator': /<==|>==|=>|->|<-|<>|&==|&<>|\?:?|\.\?|\+\+|--|[-+*/=<>]=?|[!^~]|\b(?:and|as|band|bor|bxor|comp|is|isnot|mod|or)\b=?/,
'annotation': {
pattern: /@(?:\w+:)?(?:\w+|\[[^\]]+\])?/,
alias: 'builtin'
@ -31,8 +32,20 @@ Prism.languages.concurnas = {
};
Prism.languages.insertBefore('concurnas', 'langext', {
'string': {
pattern: /[rs]?("|')(?:\\.|(?!\1)[^\\\r\n])*\1/,
'regex-literal': {
pattern: /\br("|')(?:\\.|(?!\1)[^\\\r\n])*\1/,
greedy: true,
inside: {
'interpolation': {
pattern: /((?:^|[^\\])(?:\\{2})*)\{(?:[^{}]|\{(?:[^{}]|\{[^}]*\})*\})+\}/,
lookbehind: true,
inside: Prism.languages.concurnas
},
'regex': /[\s\S]+/
}
},
'string-literal': {
pattern: /(?:\B|\bs)("|')(?:\\.|(?!\1)[^\\\r\n])*\1/,
greedy: true,
inside: {
'interpolation': {

View File

@ -1 +1 @@
Prism.languages.concurnas={comment:[{pattern:/(^|[^\\])\/\*[\s\S]*?(?:\*\/|$)/,lookbehind:!0},{pattern:/(^|[^\\:])\/\/.*/,lookbehind:!0,greedy:!0}],langext:{pattern:/\b\w+\s*\|\|[\s\S]+?\|\|/,greedy:!0,alias:"string"},function:{pattern:/((?:^|\s)def[ \t]+)[a-zA-Z_]\w*(?=\s*\()/,lookbehind:!0},keyword:/\b(?:abstract|actor|also|annotation|assert|async|await|bool|boolean|break|byte|case|catch|changed|char|class|closed|constant|continue|def|default|del|double|elif|else|enum|every|extends|false|finally|float|for|from|global|gpudef|gpukernel|if|import|in|init|inject|int|lambda|local|long|loop|match|new|nodefault|null|of|onchange|open|out|override|package|parfor|parforsync|post|pre|private|protected|provide|provider|public|return|shared|short|single|size_t|sizeof|super|sync|this|throw|trait|trans|transient|true|try|typedef|unchecked|using|val|var|void|while|with)\b/,boolean:/\b(?:false|true)\b/,number:/\b0b[01][01_]*L?\b|\b0x(?:[\da-f_]*\.)?[\da-f_p+-]+\b|(?:\b\d[\d_]*(?:\.[\d_]*)?|\B\.\d[\d_]*)(?:e[+-]?\d[\d_]*)?[dfls]?/i,punctuation:/[{}[\];(),.:]/,operator:/<==|>==|=>|->|<-|<>|\^|&==|&<>|!|\?:?|\.\?|\+\+|--|[-+*/=<>]=?|\b(?:and|as|band|bor|bxor|comp|is|isnot|mod|or)\b=?/,annotation:{pattern:/@(?:\w+:)?(?:\w+|\[[^\]]+\])?/,alias:"builtin"}},Prism.languages.insertBefore("concurnas","langext",{string:{pattern:/[rs]?("|')(?:\\.|(?!\1)[^\\\r\n])*\1/,greedy:!0,inside:{interpolation:{pattern:/((?:^|[^\\])(?:\\{2})*)\{(?:[^{}]|\{(?:[^{}]|\{[^}]*\})*\})+\}/,lookbehind:!0,inside:Prism.languages.concurnas},string:/[\s\S]+/}}}),Prism.languages.conc=Prism.languages.concurnas;
Prism.languages.concurnas={comment:{pattern:/(^|[^\\])(?:\/\*[\s\S]*?(?:\*\/|$)|\/\/.*)/,lookbehind:!0,greedy:!0},langext:{pattern:/\b\w+\s*\|\|[\s\S]+?\|\|/,greedy:!0,inside:{"class-name":/^\w+/,string:{pattern:/(^\s*\|\|)[\s\S]+(?=\|\|$)/,lookbehind:!0},punctuation:/\|\|/}},function:{pattern:/((?:^|\s)def[ \t]+)[a-zA-Z_]\w*(?=\s*\()/,lookbehind:!0},keyword:/\b(?:abstract|actor|also|annotation|assert|async|await|bool|boolean|break|byte|case|catch|changed|char|class|closed|constant|continue|def|default|del|double|elif|else|enum|every|extends|false|finally|float|for|from|global|gpudef|gpukernel|if|import|in|init|inject|int|lambda|local|long|loop|match|new|nodefault|null|of|onchange|open|out|override|package|parfor|parforsync|post|pre|private|protected|provide|provider|public|return|shared|short|single|size_t|sizeof|super|sync|this|throw|trait|trans|transient|true|try|typedef|unchecked|using|val|var|void|while|with)\b/,boolean:/\b(?:false|true)\b/,number:/\b0b[01][01_]*L?\b|\b0x(?:[\da-f_]*\.)?[\da-f_p+-]+\b|(?:\b\d[\d_]*(?:\.[\d_]*)?|\B\.\d[\d_]*)(?:e[+-]?\d[\d_]*)?[dfls]?/i,punctuation:/[{}[\];(),.:]/,operator:/<==|>==|=>|->|<-|<>|&==|&<>|\?:?|\.\?|\+\+|--|[-+*/=<>]=?|[!^~]|\b(?:and|as|band|bor|bxor|comp|is|isnot|mod|or)\b=?/,annotation:{pattern:/@(?:\w+:)?(?:\w+|\[[^\]]+\])?/,alias:"builtin"}},Prism.languages.insertBefore("concurnas","langext",{"regex-literal":{pattern:/\br("|')(?:\\.|(?!\1)[^\\\r\n])*\1/,greedy:!0,inside:{interpolation:{pattern:/((?:^|[^\\])(?:\\{2})*)\{(?:[^{}]|\{(?:[^{}]|\{[^}]*\})*\})+\}/,lookbehind:!0,inside:Prism.languages.concurnas},regex:/[\s\S]+/}},"string-literal":{pattern:/(?:\B|\bs)("|')(?:\\.|(?!\1)[^\\\r\n])*\1/,greedy:!0,inside:{interpolation:{pattern:/((?:^|[^\\])(?:\\{2})*)\{(?:[^{}]|\{(?:[^{}]|\{[^}]*\})*\})+\}/,lookbehind:!0,inside:Prism.languages.concurnas},string:/[\s\S]+/}}}),Prism.languages.conc=Prism.languages.concurnas;

View File

@ -0,0 +1,11 @@
// comment
/*
comment
*/
----------------------------------------------------
[
["comment", "// comment"],
["comment", "/*\r\ncomment\r\n*/"]
]

View File

@ -4,10 +4,18 @@ myfunc()
----------------------------------------------------
[
["keyword", "def"], ["function", "myfunc"], ["punctuation", "("], ["punctuation", ")"], ["operator", "=>"], ["number", "12"],
"\r\nmyfunc" , ["punctuation", "("], ["punctuation", ")"]
["keyword", "def"],
["function", "myfunc"],
["punctuation", "("],
["punctuation", ")"],
["operator", "=>"],
["number", "12"],
"\r\nmyfunc",
["punctuation", "("],
["punctuation", ")"]
]
----------------------------------------------------
Checks for functions.
Checks for functions.

View File

@ -0,0 +1,23 @@
myAPL || x[⍋x←6?40] ||
SimpleLisp||(+ 1 2 (* 3 3 ) )||
|| invalid ||
----------------------------------------------------
[
["langext", [
["class-name", "myAPL"],
["punctuation", "||"],
["string", " x[⍋x←6?40] "],
["punctuation", "||"]
]],
["langext", [
["class-name", "SimpleLisp"],
["punctuation", "||"],
["string", "(+ 1 2 (* 3 3 ) )"],
["punctuation", "||"]
]],
"\r\n\r\n || invalid ||"
]

View File

@ -10,7 +10,7 @@ mod mod=
< <== > >==
and or
band bor bxor
^
^ ~
----------------------------------------------------
@ -18,34 +18,47 @@ band bor bxor
["operator", "+"],
["operator", "++"],
["operator", "+="],
["operator", "-"],
["operator", "--"],
["operator", "-="],
["operator", "="],
["operator", "=="],
["operator", "<>"],
["operator", "&=="],
["operator", "&<>"],
["operator", "isnot"],
["operator", "is"],
["operator", "as"],
["operator", "comp"],
["operator", "/"],
["operator", "/="],
["operator", "*"],
["operator", "*="],
["operator", "mod"],
["operator", "mod="],
["operator", "<"],
["operator", "<=="],
["operator", ">"],
["operator", ">=="],
["operator", "and"],
["operator", "or"],
["operator", "band"],
["operator", "bor"],
["operator", "bxor"],
["operator", "^"]
["operator", "^"],
["operator", "~"]
]
----------------------------------------------------

View File

@ -0,0 +1,13 @@
r'say'
r"hello"
----------------------------------------------------
[
["regex-literal", [
["regex", "r'say'"]
]],
["regex-literal", [
["regex", "r\"hello\""]
]]
]

View File

@ -1,19 +1,15 @@
"hi"
"addition result: {1+2}"
'hi'
r'say'
r"hello"
'contains: "'
myAPL || x[⍋x←6?40] ||
|| invalid ||
----------------------------------------------------
[
["string", [
["string-literal", [
["string", "\"hi\""]
]],
["string", [
["string-literal", [
["string", "\"addition result: "],
["interpolation", [
["punctuation", "{"],
@ -24,20 +20,12 @@ myAPL || x[⍋x←6?40] ||
]],
["string", "\""]
]],
["string", [
["string-literal", [
["string", "'hi'"]
]],
["string", [
["string", "r'say'"]
]],
["string", [
["string", "r\"hello\""]
]],
["string", [
["string-literal", [
["string", "'contains: \"'"]
]],
["langext", "myAPL || x[⍋x←6?40] ||"],
"\r\n || invalid ||"
]]
]
----------------------------------------------------