ometa GoScanner { // Go defines a unicode_char as any code point except newline char = ~'\n' ^char, letter = ^letter | '_', line_comment = fromTo('//', '\n'), inline_comment = fromTo('/*', '*/'), multiline_comment = ~inline_comment '/' '*' (~('*' '/') ^char)* '*' '/', // C style comments not containing a newline count as a single space space = ^space | inline_comment, // C++ style comments and comments containing a newline count as a newline newline = '\n' | line_comment | multiline_comment, blankline = spaces newline, // Nondecimal digits range :lo :hi = char:r ?(r >= lo && r <= hi), hexdigit = range('0', '9') | range('a', 'f') | range('A', 'F'), octaldigit = range('0', '7'), // Escape characters backslash = '\\', char_esc = range('0', '3') octaldigit octaldigit | 'x' hexdigit | 'u' hexdigit hexdigit | 'U' hexdigit hexdigit hexdigit hexdigit | in('abfnrtv') | backslash, // utility rule that matches a character within quotes if it allows escaping char_in_lit :q = ~apply(q) ~backslash char | backslash char_esc | backslash apply(q), // Quote characters cquote = '\'', rquote = '`', squote = '"', // Literals literal = numeric_lit -> `numeric_literal | string_lit -> `string_literal , // Numeric literals numeric_lit = int_lit | float_lit | imaginary_lit | char_lit , at_least 0 [] = empty, at_least 0 [:x anything*:xs] = apply(x)? at_least(0, xs), at_least :n [:x anything*:xs] = apply(x) at_least(n-1, xs), at_least :n [:x anything*:xs] = at_least(n, xs), int_lit = (~'0' digit+ | '0' octaldigit+ | '0' ('x'|'X') hexdigit+) ~letterOrDigit, float_int = digit+, float_frac = '.' digit*, float_exp = ('e'|'E') ('+'|'-')? digit+, float_lit = at_least(2, [`float_int, `float_frac, `float_exp]) ~letterOrDigit, imaginary_lit = digit+ 'i' | float_lit 'i' ~letterOrDigit, // Character literals like 'a' or '\n' char_lit = cquote char_in_lit(`cquote) cquote, // String literals string_lit = raw_string_lit | interpreted_string_lit, // Raw string literals do not allow escapes, and can include newlines raw_string_lit = rquote ~rquote char^ rquote, // Normal string literals allow all escapes but no newlines - use \n interpreted_string_lit = squote char_in_lit(`squote)* squote, // Keywords contains :a :r = ?(a.indexOf(r) >= 0), keyword = :r ~letterOrDigit contains(['break', 'case', 'chan', 'const', 'continue', 'default', 'defer', 'else', 'fallthrough', 'for', 'func', 'go', 'goto', 'if', 'import', 'interface', 'map', 'package', 'range', 'return', 'select', 'struct', 'switch', 'type', 'var'], r) -> `keyword, // Identifiers ident = ~keyword firstAndRest(`letter, `letterOrDigit) ~letterOrDigit -> `identifier, // Operators in :s = char:r ?(s.indexOf(r)>=0), operator = ( seq('...') | ( seq('<<') | seq('>>') | seq('&^') | in('+-*/%&|^') ) '='? | seq('<=') | seq('>=') | seq('!=') | seq('==') | seq(':=') | seq('++') | seq('--') | seq('&&') | seq('||') | seq('<-') | in('<>=!(){}[],.:') ) -> `operator , // Line seperator - semicolon semi = ';' | newline -> `semi, token = spaces :t -> [typ, t] | spaces :t -> [typ, t] | spaces :t contains(['break', 'continue', 'fallthrough', 'return'], t) -> [typ, t] | spaces :t contains(['++', '--', ')', ']', '}'], t) -> [typ, t] | spaces :t blankline* -> [typ, t] | spaces :t blankline* -> [typ, t] | spaces :t blankline* -> [typ, t] , tokens = token*, document = blankline* tokens:xs end -> xs } gocode = """ package main import ( "fmt" "math" ) func main() { fmt.Println("Happy", math.Pi, "Day") } """ GoScanner.matchAll(gocode, `document) gocode = """ package main import "fmt" func main() { var x, y, z int = 1, 2, 3 c, python, java := true, false, "no!" fmt.Println(x, y, z, c, python, java) } """ GoScanner.matchAll(gocode, `document)