OCaml: Improved tokenization (#3269)

This commit is contained in:
Michael Schmidt 2021-12-18 12:53:19 +01:00 committed by GitHub
parent ffd8343f33
commit 7bcc5da08f
No known key found for this signature in database
GPG Key ID: 4AEE18F83AFDEB23
8 changed files with 124 additions and 52 deletions

View File

@ -1,23 +1,39 @@
// https://ocaml.org/manual/lex.html
Prism.languages.ocaml = {
'comment': /\(\*[\s\S]*?\*\)/,
'comment': {
pattern: /\(\*[\s\S]*?\*\)/,
greedy: true
},
'char': {
pattern: /'(?:[^\\\r\n']|\\(?:.|[ox]?[0-9a-f]{1,3}))'/i,
greedy: true
},
'string': [
{
pattern: /"(?:\\.|[^\\\r\n"])*"/,
pattern: /"(?:\\(?:[\s\S]|\r\n)|[^\\\r\n"])*"/,
greedy: true
},
{
pattern: /(['`])(?:\\(?:\d+|x[\da-f]+|.)|(?!\1)[^\\\r\n])\1/i,
pattern: /\{([a-z_]*)\|[\s\S]*?\|\1\}/,
greedy: true
}
],
'number': /\b(?:0x[\da-f][\da-f_]+|(?:0[bo])?\d[\d_]*(?:\.[\d_]*)?(?:e[+-]?[\d_]+)?)/i,
'number': [
// binary and octal
/\b(?:0b[01][01_]*|0o[0-7][0-7_]*)\b/i,
// hexadecimal
/\b0x[a-f0-9][a-f0-9_]*(?:\.[a-f0-9_]*)?(?:p[+-]?\d[\d_]*)?(?!\w)/i,
// decimal
/\b\d[\d_]*(?:\.[\d_]*)?(?:e[+-]?\d[\d_]*)?(?!\w)/i,
],
'directive': {
pattern: /\B#\w+/,
alias: 'important'
alias: 'property'
},
'label': {
pattern: /\B~\w+/,
alias: 'function'
alias: 'property'
},
'type-variable': {
pattern: /\B'\w+/,
@ -25,17 +41,18 @@ Prism.languages.ocaml = {
},
'variant': {
pattern: /`\w+/,
alias: 'variable'
},
'module': {
pattern: /\b[A-Z]\w+/,
alias: 'variable'
alias: 'symbol'
},
// For the list of keywords and operators,
// see: http://caml.inria.fr/pub/docs/manual-ocaml/lex.html#sec84
'keyword': /\b(?:as|assert|begin|class|constraint|do|done|downto|else|end|exception|external|for|fun|function|functor|if|in|include|inherit|initializer|lazy|let|match|method|module|mutable|new|nonrec|object|of|open|private|rec|sig|struct|then|to|try|type|val|value|virtual|when|where|while|with)\b/,
'boolean': /\b(?:false|true)\b/,
'operator-like-punctuation': {
pattern: /\[[<>|]|[>|]\]|\{<|>\}/,
alias: 'punctuation'
},
// Custom operators are allowed
'operator': /:=|[=<>@^|&+\-*\/$%!?~][!$%&*+\-.\/:<=>?@^|~]*|\b(?:and|asr|land|lor|lsl|lsr|lxor|mod|or)\b/,
'punctuation': /[(){}\[\].,:;]|\b_\b/
'operator': /\.[.~]|:[=>]|[=<>@^|&+\-*\/$%!?~][!$%&*+\-.\/:<=>?@^|~]*|\b(?:and|asr|land|lor|lsl|lsr|lxor|mod|or)\b/,
'punctuation': /;;|::|[(){}\[\].,:;#]|\b_\b/
};

View File

@ -1 +1 @@
Prism.languages.ocaml={comment:/\(\*[\s\S]*?\*\)/,string:[{pattern:/"(?:\\.|[^\\\r\n"])*"/,greedy:!0},{pattern:/(['`])(?:\\(?:\d+|x[\da-f]+|.)|(?!\1)[^\\\r\n])\1/i,greedy:!0}],number:/\b(?:0x[\da-f][\da-f_]+|(?:0[bo])?\d[\d_]*(?:\.[\d_]*)?(?:e[+-]?[\d_]+)?)/i,directive:{pattern:/\B#\w+/,alias:"important"},label:{pattern:/\B~\w+/,alias:"function"},"type-variable":{pattern:/\B'\w+/,alias:"function"},variant:{pattern:/`\w+/,alias:"variable"},module:{pattern:/\b[A-Z]\w+/,alias:"variable"},keyword:/\b(?:as|assert|begin|class|constraint|do|done|downto|else|end|exception|external|for|fun|function|functor|if|in|include|inherit|initializer|lazy|let|match|method|module|mutable|new|nonrec|object|of|open|private|rec|sig|struct|then|to|try|type|val|value|virtual|when|where|while|with)\b/,boolean:/\b(?:false|true)\b/,operator:/:=|[=<>@^|&+\-*\/$%!?~][!$%&*+\-.\/:<=>?@^|~]*|\b(?:and|asr|land|lor|lsl|lsr|lxor|mod|or)\b/,punctuation:/[(){}\[\].,:;]|\b_\b/};
Prism.languages.ocaml={comment:{pattern:/\(\*[\s\S]*?\*\)/,greedy:!0},char:{pattern:/'(?:[^\\\r\n']|\\(?:.|[ox]?[0-9a-f]{1,3}))'/i,greedy:!0},string:[{pattern:/"(?:\\(?:[\s\S]|\r\n)|[^\\\r\n"])*"/,greedy:!0},{pattern:/\{([a-z_]*)\|[\s\S]*?\|\1\}/,greedy:!0}],number:[/\b(?:0b[01][01_]*|0o[0-7][0-7_]*)\b/i,/\b0x[a-f0-9][a-f0-9_]*(?:\.[a-f0-9_]*)?(?:p[+-]?\d[\d_]*)?(?!\w)/i,/\b\d[\d_]*(?:\.[\d_]*)?(?:e[+-]?\d[\d_]*)?(?!\w)/i],directive:{pattern:/\B#\w+/,alias:"property"},label:{pattern:/\B~\w+/,alias:"property"},"type-variable":{pattern:/\B'\w+/,alias:"function"},variant:{pattern:/`\w+/,alias:"symbol"},keyword:/\b(?:as|assert|begin|class|constraint|do|done|downto|else|end|exception|external|for|fun|function|functor|if|in|include|inherit|initializer|lazy|let|match|method|module|mutable|new|nonrec|object|of|open|private|rec|sig|struct|then|to|try|type|val|value|virtual|when|where|while|with)\b/,boolean:/\b(?:false|true)\b/,"operator-like-punctuation":{pattern:/\[[<>|]|[>|]\]|\{<|>\}/,alias:"punctuation"},operator:/\.[.~]|:[=>]|[=<>@^|&+\-*\/$%!?~][!$%&*+\-.\/:<=>?@^|~]*|\b(?:and|asr|land|lor|lsl|lsr|lxor|mod|or)\b/,punctuation:/;;|::|[(){}\[\].,:;#]|\b_\b/};

View File

@ -0,0 +1,15 @@
'a'
'\n'
'\''
'\xA9'
'\169'
----------------------------------------------------
[
["char", "'a'"],
["char", "'\\n'"],
["char", "'\\''"],
["char", "'\\xA9'"],
["char", "'\\169'"]
]

View File

@ -1,15 +0,0 @@
Foo
Bar42
Baz_42
----------------------------------------------------
[
["module", "Foo"],
["module", "Bar42"],
["module", "Baz_42"]
]
----------------------------------------------------
Checks for modules.

View File

@ -5,9 +5,13 @@
0b1010_1111
42_000
3.14_15_9
3.141_592_653_589_793_12
1e-5
3.2e8
6.1E-7
2.22044604925031308e-16
0.4e+12_415
0x1p-52
----------------------------------------------------
@ -19,11 +23,15 @@
["number", "0b1010_1111"],
["number", "42_000"],
["number", "3.14_15_9"],
["number", "3.141_592_653_589_793_12"],
["number", "1e-5"],
["number", "3.2e8"],
["number", "6.1E-7"],
["number", "0.4e+12_415"]
["number", "2.22044604925031308e-16"],
["number", "0.4e+12_415"],
["number", "0x1p-52"]
]
----------------------------------------------------
Checks for numbers.
Checks for numbers.

View File

@ -2,11 +2,12 @@ and asr land
lor lsl lsr
lxor mod or
:=
:= :>
= < > @
^ | & ~
^ | & ~ .~
+ - * /
$ % ! ?
..
~=~
@ -18,14 +19,34 @@ $ % ! ?
["operator", "lxor"], ["operator", "mod"], ["operator", "or"],
["operator", ":="],
["operator", "="], ["operator", "<"], ["operator", ">"], ["operator", "@"],
["operator", "^"], ["operator", "|"], ["operator", "&"], ["operator", "~"],
["operator", "+"], ["operator", "-"], ["operator", "*"], ["operator", "/"],
["operator", "$"], ["operator", "%"], ["operator", "!"], ["operator", "?"],
["operator", ":>"],
["operator", "="],
["operator", "<"],
["operator", ">"],
["operator", "@"],
["operator", "^"],
["operator", "|"],
["operator", "&"],
["operator", "~"],
["operator", ".~"],
["operator", "+"],
["operator", "-"],
["operator", "*"],
["operator", "/"],
["operator", "$"],
["operator", "%"],
["operator", "!"],
["operator", "?"],
["operator", ".."],
["operator", "~=~"]
]
----------------------------------------------------
Checks for operators.
Checks for operators.

View File

@ -1,6 +1,12 @@
( ) { } [ ]
. , : ;
_
:: ;;
[< [> [| {<
>] >} |]
#
----------------------------------------------------
@ -17,5 +23,19 @@ _
["punctuation", ":"],
["punctuation", ";"],
["punctuation", "_"]
["punctuation", "_"],
["punctuation", "::"],
["punctuation", ";;"],
["operator-like-punctuation", "[<"],
["operator-like-punctuation", "[>"],
["operator-like-punctuation", "[|"],
["operator-like-punctuation", "{<"],
["operator-like-punctuation", ">]"],
["operator-like-punctuation", ">}"],
["operator-like-punctuation", "|]"],
["punctuation", "#"]
]

View File

@ -1,25 +1,31 @@
""
"Fo\"obar"
'\''
'\123'
'\xf4'
`\``
`\123`
`\xf4`
"Call me Ishmael. Some years ago — never mind how long \
precisely — having little or no money in my purse, and \
nothing particular to interest me on shore, I thought I\
\ would sail about a little and see the watery part of t\
he world."
{|This is a quoted string, here, neither \ nor " are special characters|}
{|"Hello, World!"|}
{|"\\"|}
{delimiter|the end of this|}quoted string is here|delimiter}
{ext|hello {|world|}|ext}
----------------------------------------------------
[
["string", "\"\""],
["string", "\"Fo\\\"obar\""],
["string", "'\\''"],
["string", "'\\123'"],
["string", "'\\xf4'"],
["string", "`\\``"],
["string", "`\\123`"],
["string", "`\\xf4`"]
["string", "\"Call me Ishmael. Some years ago — never mind how long \\\r\nprecisely — having little or no money in my purse, and \\\r\nnothing particular to interest me on shore, I thought I\\\r\n\\ would sail about a little and see the watery part of t\\\r\nhe world.\""],
["string", "{|This is a quoted string, here, neither \\ nor \" are special characters|}"],
["string", "{|\"Hello, World!\"|}"],
["string", "{|\"\\\\\"|}"],
["string", "{delimiter|the end of this|}quoted string is here|delimiter}"],
["string", "{ext|hello {|world|}|ext}"]
]
----------------------------------------------------
Checks for strings.
Checks for strings.