ometa GoScanner { nl = newline, real_semicolon = spaces ';', magic_semicolon = &((~nl space)* (nl | ')' | '}')) -> `magic, token ';' = real_semicolon:x | magic_semicolon:x -> [`semicolon, x], token `name = spaces &('_'* upper) :x -> [`parsing_name, x], token `name = spaces &('_'* lower) :x -> [`scanning_name, x], token `token = spaces str('"', '"'):x -> [`token, x] | spaces str('`', '`'):x -> [`token, x], token (':' :rule) = spaces apply(rule), token :t = spaces seq(t) -> [t, t], production_name = token(`name), _token = token(`token), newline = ^exactly('\n'), /* the Unicode code point U+000A */ unicode_char = ~newline char, /* an arbitrary Unicode code point except newline */ unicode_letter = ^letter, /* a Unicode code point classified as "Letter" */ unicode_digit = ^digit, /* a Unicode code point classified as "Decimal Digit" */ decimal_digit = charRange("0", "9"), octal_digit = charRange("0", "7"), hex_digit = charRange("0", "9") | charRange("A", "F") | charRange("a", "f"), letter = unicode_letter | "_", identifier = letter (letter | unicode_digit)*, charRange :from :to = char:x ?(from <= x && x <= to) -> x, str :from :to = seq(from) <(~seq(to) char)*>:x seq(to) -> x, scan :name = spaces apply(name) } ometa GoBaseParser <: GoScanner { // decimal_digit = "0" … "9" . decimal_digit = charRange('0', '9'), // octal_digit = "0" … "7" . octal_digit = charRange('0', '7'), // hex_digit = "0" … "9" | "A" … "F" | "a" … "f" . hex_digit = charRange('0', '9') | charRange('A', 'F') | charRange('a', 'f'), // letter = unicode_letter | "_" . letter = unicode_letter | '_', // identifier = letter { letter | unicode_digit } . identifier = letter (letter | unicode_digit)*, // int_lit = decimal_lit | octal_lit | hex_lit . int_lit = decimal_lit | octal_lit | hex_lit, // decimal_lit = ( "1" … "9" ) { decimal_digit } . decimal_lit = (charRange('1', '9')) decimal_digit*, // octal_lit = "0" { octal_digit } . octal_lit = '0' octal_digit*, // hex_lit = "0" ( "x" | "X" ) hex_digit { hex_digit } . hex_lit = '0' ('x' | 'X') hex_digit hex_digit*, // float_lit = decimals "." [ decimals ] [ exponent ] | // decimals exponent | // "." decimals [ exponent ] . float_lit = decimals '.' decimals? exponent? | decimals exponent | '.' decimals exponent?, // decimals = decimal_digit { decimal_digit } . decimals = decimal_digit decimal_digit*, // exponent = ( "e" | "E" ) [ "+" | "-" ] decimals . exponent = ('e' | 'E') ('+' | '-')? decimals, // imaginary_lit = (decimals | float_lit) "i" . imaginary_lit = (decimals | float_lit) 'i', // char_lit = "'" ( unicode_value | byte_value ) "'" . char_lit = '\'' (unicode_value | byte_value) '\'', // unicode_value = unicode_char | little_u_value | big_u_value | escaped_char . unicode_value = unicode_char | little_u_value | big_u_value | escaped_char, // byte_value = octal_byte_value | hex_byte_value . byte_value = octal_byte_value | hex_byte_value, // octal_byte_value = `\` octal_digit octal_digit octal_digit . octal_byte_value = '\\' octal_digit octal_digit octal_digit, // hex_byte_value = `\` "x" hex_digit hex_digit . hex_byte_value = '\\' 'x' hex_digit hex_digit, // little_u_value = `\` "u" hex_digit hex_digit hex_digit hex_digit . little_u_value = '\\' 'u' hex_digit hex_digit hex_digit hex_digit, // big_u_value = `\` "U" hex_digit hex_digit hex_digit hex_digit // hex_digit hex_digit hex_digit hex_digit . big_u_value = '\\' 'U' hex_digit hex_digit hex_digit hex_digit hex_digit hex_digit hex_digit hex_digit, // escaped_char = `\` ( "a" | "b" | "f" | "n" | "r" | "t" | "v" | `\` | "'" | `"` ) . escaped_char = '\\' ('a' | 'b' | 'f' | 'n' | 'r' | 't' | 'v' | '\\' | '\'' | '\"'), // string_lit = raw_string_lit | interpreted_string_lit . string_lit = raw_string_lit | interpreted_string_lit, // raw_string_lit = "`" { unicode_char | newline } "`" . raw_string_lit = '`' (~'`' (unicode_char | newline))* '`', // interpreted_string_lit = `"` { unicode_value | byte_value } `"` . interpreted_string_lit = '\"' (~'\"' (unicode_value | byte_value))* '\"', // Type = TypeName | TypeLit | "(" Type ")" . Type = TypeName | TypeLit | "(" Type ")", // TypeName = QualifiedIdent . TypeName = QualifiedIdent, // TypeLit = ArrayType | StructType | PointerType | FunctionType | InterfaceType | // SliceType | MapType | ChannelType . TypeLit = ArrayType | StructType | PointerType | FunctionType | InterfaceType | SliceType | MapType | ChannelType, // ArrayType = "[" ArrayLength "]" ElementType . ArrayType = "[" ArrayLength "]" ElementType, // ArrayLength = Expression . ArrayLength = Expression, // ElementType = Type . ElementType = Type, // SliceType = "[" "]" ElementType . SliceType = "[" "]" ElementType, // StructType = "struct" "{" { FieldDecl ";" } "}" . StructType = "struct" "{" (FieldDecl ";")* "}", // FieldDecl = (IdentifierList Type | AnonymousField) [ Tag ] . FieldDecl = (IdentifierList Type | AnonymousField) Tag?, // AnonymousField = [ "*" ] TypeName . AnonymousField = "*"? TypeName, // Tag = string_lit . Tag = scan("string_lit"), // PointerType = "*" BaseType . PointerType = "*" BaseType, // BaseType = Type . BaseType = Type, // FunctionType = "func" Signature . FunctionType = "func" Signature, // Signature = Parameters [ Result ] . Signature = Parameters Result?, // Result = Parameters | Type . Result = Parameters | Type, // Parameters = "(" [ ParameterList [ "," ] ] ")" . Parameters = "(" (ParameterList ","?)? ")", // ParameterList = ParameterDecl { "," ParameterDecl } . ParameterList = ParameterDecl ("," ParameterDecl)*, // ParameterDecl = [ IdentifierList ] [ "..." ] Type . ParameterDecl = IdentifierList? "..."? Type, // InterfaceType = "interface" "{" { MethodSpec ";" } "}" . InterfaceType = "interface" "{" (MethodSpec ";")* "}", // MethodSpec = MethodName Signature | InterfaceTypeName . MethodSpec = MethodName Signature | InterfaceTypeName, // MethodName = identifier . MethodName = scan("identifier"), // InterfaceTypeName = TypeName . InterfaceTypeName = TypeName, // MapType = "map" "[" KeyType "]" ElementType . MapType = "map" "[" KeyType "]" ElementType, // KeyType = Type . KeyType = Type, // ChannelType = ( "chan" [ "<-" ] | "<-" "chan" ) ElementType . ChannelType = ("chan" "<-"? | "<-" "chan") ElementType, // Block = "{" { Statement ";" } "}" . Block = "{" (Statement ";")* "}", // Declaration = ConstDecl | TypeDecl | VarDecl . Declaration = ConstDecl | TypeDecl | VarDecl, // TopLevelDecl = Declaration | FunctionDecl | MethodDecl . TopLevelDecl = Declaration | FunctionDecl | MethodDecl, // ConstDecl = "const" ( ConstSpec | "(" { ConstSpec ";" } ")" ) . ConstDecl = "const" (ConstSpec | "(" (ConstSpec ";")* ")"), // ConstSpec = IdentifierList [ [ Type ] "=" ExpressionList ] . ConstSpec = IdentifierList (Type? "=" ExpressionList)?, // IdentifierList = identifier { "," identifier } . IdentifierList = scan("identifier") ("," scan("identifier"))*, // ExpressionList = Expression { "," Expression } . ExpressionList = Expression ("," Expression)*, // TypeDecl = "type" ( TypeSpec | "(" { TypeSpec ";" } ")" ) . TypeDecl = "type" (TypeSpec | "(" (TypeSpec ";")* ")"), // TypeSpec = identifier Type . TypeSpec = scan("identifier") Type, // VarDecl = "var" ( VarSpec | "(" { VarSpec ";" } ")" ) . VarDecl = "var" (VarSpec | "(" (VarSpec ";")* ")"), // VarSpec = IdentifierList ( Type [ "=" ExpressionList ] | "=" ExpressionList ) . VarSpec = IdentifierList (Type ("=" ExpressionList)? | "=" ExpressionList), // ShortVarDecl = IdentifierList ":=" ExpressionList . ShortVarDecl = IdentifierList ":=" ExpressionList, // FunctionDecl = "func" identifier Signature [ Body ] . FunctionDecl = "func" scan("identifier") Signature Body?, // Body = Block . Body = Block, // MethodDecl = "func" Receiver MethodName Signature [ Body ] . MethodDecl = "func" Receiver MethodName Signature Body?, // Receiver = "(" [ identifier ] [ "*" ] BaseTypeName ")" . Receiver = "(" scan("identifier")? "*"? BaseTypeName ")", // BaseTypeName = identifier . BaseTypeName = scan("identifier"), // Operand = Literal | QualifiedIdent | MethodExpr | "(" Expression ")" . Operand = Literal | QualifiedIdent | MethodExpr | "(" Expression ")", // Literal = BasicLit | CompositeLit | FunctionLit . Literal = BasicLit | CompositeLit | FunctionLit, // BasicLit = int_lit | float_lit | imaginary_lit | char_lit | string_lit . BasicLit = scan("int_lit") | scan("float_lit") | scan("imaginary_lit") | scan("char_lit") | scan("string_lit"), // QualifiedIdent = [ PackageName "." ] identifier . QualifiedIdent = (PackageName ".")? scan("identifier"), // CompositeLit = LiteralType LiteralValue . CompositeLit = LiteralType LiteralValue, // LiteralType = StructType | ArrayType | "[" "..." "]" ElementType | // SliceType | MapType | TypeName . LiteralType = StructType | ArrayType | "[" "..." "]" ElementType | SliceType | MapType | TypeName, // LiteralValue = "{" [ ElementList [ "," ] ] "}" . LiteralValue = "{" (ElementList ","?)? "}", // ElementList = Element { "," Element } . ElementList = Element ("," Element)*, // Element = [ Key ":" ] Value . Element = (Key ":")? Value, // Key = FieldName | ElementIndex . Key = FieldName | ElementIndex, // FieldName = identifier . FieldName = scan("identifier"), // ElementIndex = Expression . ElementIndex = Expression, // Value = Expression | LiteralValue . Value = Expression | LiteralValue, // FunctionLit = FunctionType Body . FunctionLit = FunctionType Body, // PrimaryExpr = // Operand | // Conversion | // BuiltinCall | // PrimaryExpr Selector | // PrimaryExpr Index | // PrimaryExpr Slice | // PrimaryExpr TypeAssertion | // PrimaryExpr Call . PrimaryExpr = Operand | Conversion | BuiltinCall | PrimaryExpr Selector | PrimaryExpr Index | PrimaryExpr Slice | PrimaryExpr TypeAssertion | PrimaryExpr Call, // Selector = "." identifier . Selector = "." scan("identifier"), // Index = "[" Expression "]" . Index = "[" Expression "]", // Slice = "[" [ Expression ] ":" [ Expression ] "]" . Slice = "[" Expression? ":" Expression? "]", // TypeAssertion = "." "(" Type ")" . TypeAssertion = "." "(" Type ")", // Call = "(" [ ArgumentList [ "," ] ] ")" . Call = "(" (ArgumentList ","?)? ")", // ArgumentList = ExpressionList [ "..." ] . ArgumentList = ExpressionList "..."?, // Expression = UnaryExpr | Expression binary_op UnaryExpr . Expression = UnaryExpr | Expression scan("binary_op") UnaryExpr, // UnaryExpr = PrimaryExpr | unary_op UnaryExpr . UnaryExpr = PrimaryExpr | scan("unary_op") UnaryExpr, // binary_op = "||" | "&&" | rel_op | add_op | mul_op . binary_op = '||' | '&&' | rel_op | add_op | mul_op, // rel_op = "==" | "!=" | "<" | "<=" | ">" | ">=" . rel_op = '==' | '!=' | '<' | '<=' | '>' | '>=', // add_op = "+" | "-" | "|" | "^" . add_op = '+' | '-' | '|' | '^', // mul_op = "*" | "/" | "%" | "<<" | ">>" | "&" | "&^" . mul_op = '*' | '/' | '%' | '<<' | '>>' | '&' | '&^', // unary_op = "+" | "-" | "!" | "^" | "*" | "&" | "<-" . unary_op = '+' | '-' | '!' | '^' | '*' | '&' | '<-', // MethodExpr = ReceiverType "." MethodName . MethodExpr = ReceiverType "." MethodName, // ReceiverType = TypeName | "(" "*" TypeName ")" . ReceiverType = TypeName | "(" "*" TypeName ")", // Conversion = Type "(" Expression ")" . Conversion = Type "(" Expression ")", // Statement = // Declaration | LabeledStmt | SimpleStmt | // GoStmt | ReturnStmt | BreakStmt | ContinueStmt | GotoStmt | // FallthroughStmt | Block | IfStmt | SwitchStmt | SelectStmt | ForStmt | // DeferStmt . Statement = Declaration | LabeledStmt | SimpleStmt | GoStmt | ReturnStmt | BreakStmt | ContinueStmt | GotoStmt | FallthroughStmt | Block | IfStmt | SwitchStmt | SelectStmt | ForStmt | DeferStmt, // SimpleStmt = EmptyStmt | ExpressionStmt | SendStmt | IncDecStmt | Assignment | ShortVarDecl . SimpleStmt = EmptyStmt | ExpressionStmt | SendStmt | IncDecStmt | Assignment | ShortVarDecl, // EmptyStmt = . EmptyStmt = empty, // LabeledStmt = Label ":" Statement . LabeledStmt = Label ":" Statement, // Label = identifier . Label = scan("identifier"), // ExpressionStmt = Expression . ExpressionStmt = Expression, // SendStmt = Channel "<-" Expression . SendStmt = Channel "<-" Expression, // Channel = Expression . Channel = Expression, // IncDecStmt = Expression ( "++" | "--" ) . IncDecStmt = Expression ("++" | "--"), // Assignment = ExpressionList assign_op ExpressionList . Assignment = ExpressionList scan("assign_op") ExpressionList, // assign_op = [ add_op | mul_op ] "=" . assign_op = (add_op | mul_op)? '=', // IfStmt = "if" [ SimpleStmt ";" ] Expression Block [ "else" ( IfStmt | Block ) ] . IfStmt = "if" (SimpleStmt ";")? Expression Block ("else" (IfStmt | Block))?, // SwitchStmt = ExprSwitchStmt | TypeSwitchStmt . SwitchStmt = ExprSwitchStmt | TypeSwitchStmt, // ExprSwitchStmt = "switch" [ SimpleStmt ";" ] [ Expression ] "{" { ExprCaseClause } "}" . ExprSwitchStmt = "switch" (SimpleStmt ";")? Expression? "{" ExprCaseClause* "}", // ExprCaseClause = ExprSwitchCase ":" { Statement ";" } . ExprCaseClause = ExprSwitchCase ":" (Statement ";")*, // ExprSwitchCase = "case" ExpressionList | "default" . ExprSwitchCase = "case" ExpressionList | "default", // TypeSwitchStmt = "switch" [ SimpleStmt ";" ] TypeSwitchGuard "{" { TypeCaseClause } "}" . TypeSwitchStmt = "switch" (SimpleStmt ";")? TypeSwitchGuard "{" TypeCaseClause* "}", // TypeSwitchGuard = [ identifier ":=" ] PrimaryExpr "." "(" "type" ")" . TypeSwitchGuard = (scan("identifier") ":=")? PrimaryExpr "." "(" "type" ")", // TypeCaseClause = TypeSwitchCase ":" { Statement ";" } . TypeCaseClause = TypeSwitchCase ":" (Statement ";")*, // TypeSwitchCase = "case" TypeList | "default" . TypeSwitchCase = "case" TypeList | "default", // TypeList = Type { "," Type } . TypeList = Type ("," Type)*, // ForStmt = "for" [ Condition | ForClause | RangeClause ] Block . ForStmt = "for" (Condition | ForClause | RangeClause)? Block, // Condition = Expression . Condition = Expression, // ForClause = [ InitStmt ] ";" [ Condition ] ";" [ PostStmt ] . ForClause = InitStmt? ";" Condition? ";" PostStmt?, // InitStmt = SimpleStmt . InitStmt = SimpleStmt, // PostStmt = SimpleStmt . PostStmt = SimpleStmt, // RangeClause = Expression [ "," Expression ] ( "=" | ":=" ) "range" Expression . RangeClause = Expression ("," Expression)? ("=" | ":=") "range" Expression, // GoStmt = "go" Expression . GoStmt = "go" Expression, // SelectStmt = "select" "{" { CommClause } "}" . SelectStmt = "select" "{" CommClause* "}", // CommClause = CommCase ":" { Statement ";" } . CommClause = CommCase ":" (Statement ";")*, // CommCase = "case" ( SendStmt | RecvStmt ) | "default" . CommCase = "case" (SendStmt | RecvStmt) | "default", // RecvStmt = [ Expression [ "," Expression ] ( "=" | ":=" ) ] RecvExpr . RecvStmt = (Expression ("," Expression)? ("=" | ":="))? RecvExpr, // RecvExpr = Expression . RecvExpr = Expression, // ReturnStmt = "return" [ ExpressionList ] . ReturnStmt = "return" ExpressionList?, // BreakStmt = "break" [ Label ] . BreakStmt = "break" Label?, // ContinueStmt = "continue" [ Label ] . ContinueStmt = "continue" Label?, // GotoStmt = "goto" Label . GotoStmt = "goto" Label, // FallthroughStmt = "fallthrough" . FallthroughStmt = "fallthrough", // DeferStmt = "defer" Expression . DeferStmt = "defer" Expression, // BuiltinCall = identifier "(" [ BuiltinArgs [ "," ] ] ")" . BuiltinCall = scan("identifier") "(" (BuiltinArgs ","?)? ")", // BuiltinArgs = Type [ "," ExpressionList ] | ExpressionList . BuiltinArgs = Type ("," ExpressionList)? | ExpressionList, // SourceFile = PackageClause ";" { ImportDecl ";" } { TopLevelDecl ";" } . SourceFile = PackageClause ";" (ImportDecl ";")* (TopLevelDecl ";")*, // PackageClause = "package" PackageName . PackageClause = "package" PackageName, // PackageName = identifier . PackageName = scan("identifier"), // ImportDecl = "import" ( ImportSpec | "(" { ImportSpec ";" } ")" ) . ImportDecl = "import" (ImportSpec | "(" (ImportSpec ";")* ")"), // ImportSpec = [ "." | PackageName ] ImportPath . ImportSpec = ("." | PackageName)? ImportPath, // ImportPath = string_lit . ImportPath = scan("string_lit") } ometa GoParser <: GoBaseParser { // Match a newline at the end of file in case there isn't one. nl = exactly('\n') | [ anything* exactly('\n') anything* ] | end, space = ^space | fromTo('//', '\n') | fromTo('/*', '*/'), // Avoid ExpressionStmt preventing a match of a larger statement ExpressionStmt = ^ExpressionStmt &";", // Require a real semicolon after any empty statement EmptyStmt = ^EmptyStmt &real_semicolon, PrimaryExpr = (BuiltinCall | Operand | Conversion):x (Selector | Index | Slice | TypeAssertion | Call)*:xs -> [`PrimaryExpr, x].concat(xs), BuiltinCall = &(scan("builtin") "(") ^BuiltinCall, mul_op = '&^' | ^mul_op, binary_op = mul_op | ^binary_op, identifier = ~keyword <^identifier>:id -> id, ident_in :lst = <^identifier>:id ?(lst.indexOf(id)>0) -> id, keyword = ident_in(this.keywords), builtin = ident_in(this.builtins), string_lit = <^string_lit>:s -> s, int_lit = <^int_lit>:i -> i, float_lit = <^float_lit>:f -> f, imaginary_lit = <^imaginary_lit>:imag -> imag, char_lit = <^char_lit>:c -> c, BasicLit = ^BasicLit:lit -> [`BasicLit, lit], QualifiedItent = <^QualifiedIdent>:id -> [`QualifiedIdent, id], Operand = Literal | QualifiedIdent | MethodExpr | "(" Expression:expr ")" -> expr, oneOrMany :rule = apply(rule):r -> [r] | "(" (apply(rule):r ";" -> r)*:rs ")" -> rs, PackageClause = "package" PackageName:name -> [`PackageClause, name], PackageName = scan("identifier"), ImportDecl = "import" oneOrMany(`ImportSpec):s -> [`ImportDecl, s], ImportSpec = ("." | PackageName)?:rename ImportPath:path -> [`ImportSpec, rename, path], ImportPath = scan("string_lit"), FunctionDecl = "func" scan("identifier"):id Signature:sig Body?:body -> [`FunctionDecl, id, sig, body], SourceFile = PackageClause:package ";" (ImportDecl:r ";" -> r)*:imports (TopLevelDecl:r ";" -> r)*:decls -> [`SourceFile, package, imports, decls], Identifier = scan("identifier"), IdentifierList = listOf(`Identifier, ","), ExpressionList = listOf(`Expression, ","), ShortVarDecl = IdentifierList:vars ":=" ExpressionList:vals -> [`ShortVarDecl, vars, vals], Expression = listDelimOf(`UnaryExpr, ":binary_op"):xs -> [`Expression, xs], listDelimOf :rule :delim = apply(rule):r {res=[r]} (token(delim):d {res.push(d)} apply(rule):r {res.push(r)})* -> res, // Statement = <^Statement>, Signature = <^Signature>, // Body = <^Body>, Block = "{" (Statement:stmt ";" -> stmt)*:stmts "}" -> [`Block].concat(stmts), Block1 = "{" (Statement:stmt ";" -> stmt):stmt1 "}" -> [`Block1, stmt1], Block3 = "{" (Statement:stmt ";" -> stmt):stmt1 (Statement:stmt ";" -> stmt):stmt2 (Statement:stmt ";" -> stmt):stmt3 "}" -> [`Block3, stmt1, stmt2, stmt3], VarDecl = <^VarDecl>, x= } GoParser.initialize = function() { this.keywords = [ 'break' , 'continue' , 'fallthrough' , 'return' , 'case:' , 'chan' , 'const' , 'default' , 'defer' , 'else' , 'for' , 'func' , 'go' , 'goto' , 'if' , 'import' , 'interface' , 'map' , 'package' , 'range' , 'select' , 'struct' , 'switch' , 'type' , 'var' ]; this.builtins = [ 'append' , 'cap' , 'close' , 'complex' , 'copy' , 'imag' , 'len' , 'make' , 'new' , 'panic' , 'print' , 'println' , 'real' , 'recover' ]; } gocode = """ package main import "fmt" func main() { var x, y, z int = 1, 2, 3 c, python, java := true, false, "no!" fmt.Println(x, y, z, c, python, java) } """ GoParser.matchAll(gocode, `SourceFile) GoParser.matchAll('true', `PrimaryExpr)