OCaml: Improved tokenization (#3269)
This commit is contained in:
parent
ffd8343f33
commit
7bcc5da08f
|
@ -1,23 +1,39 @@
|
|||
// https://ocaml.org/manual/lex.html
|
||||
|
||||
Prism.languages.ocaml = {
|
||||
'comment': /\(\*[\s\S]*?\*\)/,
|
||||
'comment': {
|
||||
pattern: /\(\*[\s\S]*?\*\)/,
|
||||
greedy: true
|
||||
},
|
||||
'char': {
|
||||
pattern: /'(?:[^\\\r\n']|\\(?:.|[ox]?[0-9a-f]{1,3}))'/i,
|
||||
greedy: true
|
||||
},
|
||||
'string': [
|
||||
{
|
||||
pattern: /"(?:\\.|[^\\\r\n"])*"/,
|
||||
pattern: /"(?:\\(?:[\s\S]|\r\n)|[^\\\r\n"])*"/,
|
||||
greedy: true
|
||||
},
|
||||
{
|
||||
pattern: /(['`])(?:\\(?:\d+|x[\da-f]+|.)|(?!\1)[^\\\r\n])\1/i,
|
||||
pattern: /\{([a-z_]*)\|[\s\S]*?\|\1\}/,
|
||||
greedy: true
|
||||
}
|
||||
],
|
||||
'number': /\b(?:0x[\da-f][\da-f_]+|(?:0[bo])?\d[\d_]*(?:\.[\d_]*)?(?:e[+-]?[\d_]+)?)/i,
|
||||
'number': [
|
||||
// binary and octal
|
||||
/\b(?:0b[01][01_]*|0o[0-7][0-7_]*)\b/i,
|
||||
// hexadecimal
|
||||
/\b0x[a-f0-9][a-f0-9_]*(?:\.[a-f0-9_]*)?(?:p[+-]?\d[\d_]*)?(?!\w)/i,
|
||||
// decimal
|
||||
/\b\d[\d_]*(?:\.[\d_]*)?(?:e[+-]?\d[\d_]*)?(?!\w)/i,
|
||||
],
|
||||
'directive': {
|
||||
pattern: /\B#\w+/,
|
||||
alias: 'important'
|
||||
alias: 'property'
|
||||
},
|
||||
'label': {
|
||||
pattern: /\B~\w+/,
|
||||
alias: 'function'
|
||||
alias: 'property'
|
||||
},
|
||||
'type-variable': {
|
||||
pattern: /\B'\w+/,
|
||||
|
@ -25,17 +41,18 @@ Prism.languages.ocaml = {
|
|||
},
|
||||
'variant': {
|
||||
pattern: /`\w+/,
|
||||
alias: 'variable'
|
||||
},
|
||||
'module': {
|
||||
pattern: /\b[A-Z]\w+/,
|
||||
alias: 'variable'
|
||||
alias: 'symbol'
|
||||
},
|
||||
// For the list of keywords and operators,
|
||||
// see: http://caml.inria.fr/pub/docs/manual-ocaml/lex.html#sec84
|
||||
'keyword': /\b(?:as|assert|begin|class|constraint|do|done|downto|else|end|exception|external|for|fun|function|functor|if|in|include|inherit|initializer|lazy|let|match|method|module|mutable|new|nonrec|object|of|open|private|rec|sig|struct|then|to|try|type|val|value|virtual|when|where|while|with)\b/,
|
||||
'boolean': /\b(?:false|true)\b/,
|
||||
|
||||
'operator-like-punctuation': {
|
||||
pattern: /\[[<>|]|[>|]\]|\{<|>\}/,
|
||||
alias: 'punctuation'
|
||||
},
|
||||
// Custom operators are allowed
|
||||
'operator': /:=|[=<>@^|&+\-*\/$%!?~][!$%&*+\-.\/:<=>?@^|~]*|\b(?:and|asr|land|lor|lsl|lsr|lxor|mod|or)\b/,
|
||||
'punctuation': /[(){}\[\].,:;]|\b_\b/
|
||||
'operator': /\.[.~]|:[=>]|[=<>@^|&+\-*\/$%!?~][!$%&*+\-.\/:<=>?@^|~]*|\b(?:and|asr|land|lor|lsl|lsr|lxor|mod|or)\b/,
|
||||
'punctuation': /;;|::|[(){}\[\].,:;#]|\b_\b/
|
||||
};
|
||||
|
|
|
@ -1 +1 @@
|
|||
Prism.languages.ocaml={comment:/\(\*[\s\S]*?\*\)/,string:[{pattern:/"(?:\\.|[^\\\r\n"])*"/,greedy:!0},{pattern:/(['`])(?:\\(?:\d+|x[\da-f]+|.)|(?!\1)[^\\\r\n])\1/i,greedy:!0}],number:/\b(?:0x[\da-f][\da-f_]+|(?:0[bo])?\d[\d_]*(?:\.[\d_]*)?(?:e[+-]?[\d_]+)?)/i,directive:{pattern:/\B#\w+/,alias:"important"},label:{pattern:/\B~\w+/,alias:"function"},"type-variable":{pattern:/\B'\w+/,alias:"function"},variant:{pattern:/`\w+/,alias:"variable"},module:{pattern:/\b[A-Z]\w+/,alias:"variable"},keyword:/\b(?:as|assert|begin|class|constraint|do|done|downto|else|end|exception|external|for|fun|function|functor|if|in|include|inherit|initializer|lazy|let|match|method|module|mutable|new|nonrec|object|of|open|private|rec|sig|struct|then|to|try|type|val|value|virtual|when|where|while|with)\b/,boolean:/\b(?:false|true)\b/,operator:/:=|[=<>@^|&+\-*\/$%!?~][!$%&*+\-.\/:<=>?@^|~]*|\b(?:and|asr|land|lor|lsl|lsr|lxor|mod|or)\b/,punctuation:/[(){}\[\].,:;]|\b_\b/};
|
||||
Prism.languages.ocaml={comment:{pattern:/\(\*[\s\S]*?\*\)/,greedy:!0},char:{pattern:/'(?:[^\\\r\n']|\\(?:.|[ox]?[0-9a-f]{1,3}))'/i,greedy:!0},string:[{pattern:/"(?:\\(?:[\s\S]|\r\n)|[^\\\r\n"])*"/,greedy:!0},{pattern:/\{([a-z_]*)\|[\s\S]*?\|\1\}/,greedy:!0}],number:[/\b(?:0b[01][01_]*|0o[0-7][0-7_]*)\b/i,/\b0x[a-f0-9][a-f0-9_]*(?:\.[a-f0-9_]*)?(?:p[+-]?\d[\d_]*)?(?!\w)/i,/\b\d[\d_]*(?:\.[\d_]*)?(?:e[+-]?\d[\d_]*)?(?!\w)/i],directive:{pattern:/\B#\w+/,alias:"property"},label:{pattern:/\B~\w+/,alias:"property"},"type-variable":{pattern:/\B'\w+/,alias:"function"},variant:{pattern:/`\w+/,alias:"symbol"},keyword:/\b(?:as|assert|begin|class|constraint|do|done|downto|else|end|exception|external|for|fun|function|functor|if|in|include|inherit|initializer|lazy|let|match|method|module|mutable|new|nonrec|object|of|open|private|rec|sig|struct|then|to|try|type|val|value|virtual|when|where|while|with)\b/,boolean:/\b(?:false|true)\b/,"operator-like-punctuation":{pattern:/\[[<>|]|[>|]\]|\{<|>\}/,alias:"punctuation"},operator:/\.[.~]|:[=>]|[=<>@^|&+\-*\/$%!?~][!$%&*+\-.\/:<=>?@^|~]*|\b(?:and|asr|land|lor|lsl|lsr|lxor|mod|or)\b/,punctuation:/;;|::|[(){}\[\].,:;#]|\b_\b/};
|
|
@ -0,0 +1,15 @@
|
|||
'a'
|
||||
'\n'
|
||||
'\''
|
||||
'\xA9'
|
||||
'\169'
|
||||
|
||||
----------------------------------------------------
|
||||
|
||||
[
|
||||
["char", "'a'"],
|
||||
["char", "'\\n'"],
|
||||
["char", "'\\''"],
|
||||
["char", "'\\xA9'"],
|
||||
["char", "'\\169'"]
|
||||
]
|
|
@ -1,15 +0,0 @@
|
|||
Foo
|
||||
Bar42
|
||||
Baz_42
|
||||
|
||||
----------------------------------------------------
|
||||
|
||||
[
|
||||
["module", "Foo"],
|
||||
["module", "Bar42"],
|
||||
["module", "Baz_42"]
|
||||
]
|
||||
|
||||
----------------------------------------------------
|
||||
|
||||
Checks for modules.
|
|
@ -5,9 +5,13 @@
|
|||
0b1010_1111
|
||||
42_000
|
||||
3.14_15_9
|
||||
3.141_592_653_589_793_12
|
||||
1e-5
|
||||
3.2e8
|
||||
6.1E-7
|
||||
2.22044604925031308e-16
|
||||
0.4e+12_415
|
||||
0x1p-52
|
||||
|
||||
----------------------------------------------------
|
||||
|
||||
|
@ -19,11 +23,15 @@
|
|||
["number", "0b1010_1111"],
|
||||
["number", "42_000"],
|
||||
["number", "3.14_15_9"],
|
||||
["number", "3.141_592_653_589_793_12"],
|
||||
["number", "1e-5"],
|
||||
["number", "3.2e8"],
|
||||
["number", "6.1E-7"],
|
||||
["number", "0.4e+12_415"]
|
||||
["number", "2.22044604925031308e-16"],
|
||||
["number", "0.4e+12_415"],
|
||||
["number", "0x1p-52"]
|
||||
]
|
||||
|
||||
----------------------------------------------------
|
||||
|
||||
Checks for numbers.
|
||||
Checks for numbers.
|
||||
|
|
|
@ -2,11 +2,12 @@ and asr land
|
|||
lor lsl lsr
|
||||
lxor mod or
|
||||
|
||||
:=
|
||||
:= :>
|
||||
= < > @
|
||||
^ | & ~
|
||||
^ | & ~ .~
|
||||
+ - * /
|
||||
$ % ! ?
|
||||
..
|
||||
|
||||
~=~
|
||||
|
||||
|
@ -18,14 +19,34 @@ $ % ! ?
|
|||
["operator", "lxor"], ["operator", "mod"], ["operator", "or"],
|
||||
|
||||
["operator", ":="],
|
||||
["operator", "="], ["operator", "<"], ["operator", ">"], ["operator", "@"],
|
||||
["operator", "^"], ["operator", "|"], ["operator", "&"], ["operator", "~"],
|
||||
["operator", "+"], ["operator", "-"], ["operator", "*"], ["operator", "/"],
|
||||
["operator", "$"], ["operator", "%"], ["operator", "!"], ["operator", "?"],
|
||||
["operator", ":>"],
|
||||
|
||||
["operator", "="],
|
||||
["operator", "<"],
|
||||
["operator", ">"],
|
||||
["operator", "@"],
|
||||
|
||||
["operator", "^"],
|
||||
["operator", "|"],
|
||||
["operator", "&"],
|
||||
["operator", "~"],
|
||||
["operator", ".~"],
|
||||
|
||||
["operator", "+"],
|
||||
["operator", "-"],
|
||||
["operator", "*"],
|
||||
["operator", "/"],
|
||||
|
||||
["operator", "$"],
|
||||
["operator", "%"],
|
||||
["operator", "!"],
|
||||
["operator", "?"],
|
||||
|
||||
["operator", ".."],
|
||||
|
||||
["operator", "~=~"]
|
||||
]
|
||||
|
||||
----------------------------------------------------
|
||||
|
||||
Checks for operators.
|
||||
Checks for operators.
|
||||
|
|
|
@ -1,6 +1,12 @@
|
|||
( ) { } [ ]
|
||||
. , : ;
|
||||
_
|
||||
:: ;;
|
||||
|
||||
[< [> [| {<
|
||||
>] >} |]
|
||||
|
||||
#
|
||||
|
||||
----------------------------------------------------
|
||||
|
||||
|
@ -17,5 +23,19 @@ _
|
|||
["punctuation", ":"],
|
||||
["punctuation", ";"],
|
||||
|
||||
["punctuation", "_"]
|
||||
["punctuation", "_"],
|
||||
|
||||
["punctuation", "::"],
|
||||
["punctuation", ";;"],
|
||||
|
||||
["operator-like-punctuation", "[<"],
|
||||
["operator-like-punctuation", "[>"],
|
||||
["operator-like-punctuation", "[|"],
|
||||
["operator-like-punctuation", "{<"],
|
||||
|
||||
["operator-like-punctuation", ">]"],
|
||||
["operator-like-punctuation", ">}"],
|
||||
["operator-like-punctuation", "|]"],
|
||||
|
||||
["punctuation", "#"]
|
||||
]
|
||||
|
|
|
@ -1,25 +1,31 @@
|
|||
""
|
||||
"Fo\"obar"
|
||||
'\''
|
||||
'\123'
|
||||
'\xf4'
|
||||
`\``
|
||||
`\123`
|
||||
`\xf4`
|
||||
"Call me Ishmael. Some years ago — never mind how long \
|
||||
precisely — having little or no money in my purse, and \
|
||||
nothing particular to interest me on shore, I thought I\
|
||||
\ would sail about a little and see the watery part of t\
|
||||
he world."
|
||||
|
||||
{|This is a quoted string, here, neither \ nor " are special characters|}
|
||||
{|"Hello, World!"|}
|
||||
{|"\\"|}
|
||||
{delimiter|the end of this|}quoted string is here|delimiter}
|
||||
{ext|hello {|world|}|ext}
|
||||
|
||||
----------------------------------------------------
|
||||
|
||||
[
|
||||
["string", "\"\""],
|
||||
["string", "\"Fo\\\"obar\""],
|
||||
["string", "'\\''"],
|
||||
["string", "'\\123'"],
|
||||
["string", "'\\xf4'"],
|
||||
["string", "`\\``"],
|
||||
["string", "`\\123`"],
|
||||
["string", "`\\xf4`"]
|
||||
["string", "\"Call me Ishmael. Some years ago — never mind how long \\\r\nprecisely — having little or no money in my purse, and \\\r\nnothing particular to interest me on shore, I thought I\\\r\n\\ would sail about a little and see the watery part of t\\\r\nhe world.\""],
|
||||
|
||||
["string", "{|This is a quoted string, here, neither \\ nor \" are special characters|}"],
|
||||
["string", "{|\"Hello, World!\"|}"],
|
||||
["string", "{|\"\\\\\"|}"],
|
||||
["string", "{delimiter|the end of this|}quoted string is here|delimiter}"],
|
||||
["string", "{ext|hello {|world|}|ext}"]
|
||||
]
|
||||
|
||||
----------------------------------------------------
|
||||
|
||||
Checks for strings.
|
||||
Checks for strings.
|
||||
|
|
Loading…
Reference in New Issue