// Copyright (C) 2009 Google Inc. // // Licensed under the Apache License, Version 2.0 (the "License"); // you may not use this file except in compliance with the License. // You may obtain a copy of the License at // // http://www.apache.org/licenses/LICENSE-2.0 // // Unless required by applicable law or agreed to in writing, software // distributed under the License is distributed on an "AS IS" BASIS, // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. // See the License for the specific language governing permissions and // limitations under the License. // OMeta/JS parser for Ecmascript 5 // @author Tom Van Cutsem // dependencies of this file: the UnicodeCategories object defined in 'unicode.js' // === A.1 Lexical Grammar (Scanner) === // Literal tokens are represented as objects of the form { type: String, value: String } // Literal tokens represent number, string, boolean, null and regular expression literals function literal(t, val) { return { type: t, value: val }; }; // some regular expressions for faster identifier and whitespace parsing var ucSpacesRE = new RegExp("\\s"); // turns an array into a set, represented as a map of elements -> boolean // to test whether e is in the set s, perform s.e && !Object.prototype.hasProperty(e) function makeSet(array) { var o = {}; for (var idx = 0; idx < array.length; idx++) { o[array[idx]] = true; }; return o; }; var keywords = makeSet( ["break","do","instanceof","typeof","case","else","new","var","catch","finally", "return", "void", "continue", "for", "switch", "while", "debugger", "function", "this", "with", "default", "if", "throw", "delete", "in", "try" ]); var nonStrictFutureKws = makeSet( ["class", "enum", "extends", "super", "const", "export", "import"]); var strictFutureKws = makeSet( ["implements", "let", "private", "public", "interface", "package", "protected", "static", "yield" ]); // abstract syntax trees (ASTs) are stored in JSONML format. For details see: // http://code.google.com/p/es-lab/wiki/JsonMLASTFormat // this function adds accessors to an AST object, // making it easier to manipulate the datastructure // note that these methods will simply be dropped when the AST // is stringified into a JSONML format function mixinASTMethods(ast) { ast.nodeType = function() { return this[0]; }; ast.attributes = function() { return this[1] || {}; }; // some leaf nodes may not have attrs ast.childAt = function(i) { return this[Number(i)+2]; }; ast.children = function() { return this.slice(2); }; return ast; } // Note: | denotes prioritized choice (PEG-style), first match wins // || denotes exclusive choice: all branches are tried, only one should match ometa ES5Parser { // A.1 Lexical Grammar // input characters are represented as ES3 characters, which can be any unicode character SourceCharacter = char, // 7: Goal production in contexts where a leading '/' or '/=' is permitted // Note: not used by the parser InputElementDiv = Whitespace || LineTerminator || Comment || Token || DivPunctuator, // 7: Goal production in contexts where a leading '/' or '/=' is not permitted // Note: not used by the parser InputElementRegExp = Whitespace || LineTerminator || Comment || Token || RegularExpressionLiteral, // Note: not currently used by the parser Token = IdentifierName || Punctuator || NumericLiteral || StringLiteral, Whitespace = uc('SP') || uc('TAB') || uc('VT') || uc('FF') || uc('NBSP') || uc('BOM') || uc('Zs'), LineTerminator = uc('LF') || uc('CR') || uc('LS') || uc('PS'), LineTerminatorSequence = uc('LF') || ~uc('LF') uc('CR') || uc('LS') || uc('PS') || uc('CR') uc('LF'), Comment = MultiLineComment || SingleLineComment, // <...> is OMeta's "consumed-by" operator: the value of <...> is the input it consumed MultiLineComment = seq("/*") <(MultiLineCommentChars | empty)>:cs seq("*/") -> cs, MultiLineCommentChars = MultiLineNotAsteriskChar (MultiLineCommentChars | empty) || '*' (PostAsteriskCommentChars | &seq("*/")), PostAsteriskCommentChars = MultiLineNotForwardSlashOrAsteriskChar (MultiLineCommentChars | empty) || '*' (PostAsteriskCommentChars | &seq("*/")), MultiLineNotAsteriskChar = ~('*') SourceCharacter, MultiLineNotForwardSlashOrAsteriskChar = ~('/' || '*') SourceCharacter, SingleLineComment = seq("//") <(SingleLineCommentChars | empty)>, SingleLineCommentChars = SingleLineCommentChar (SingleLineCommentChars | empty), SingleLineCommentChar = ~LineTerminator SourceCharacter, Identifier = IdentifierName:n ~checkReservedWord(n) -> (n), IdentifierName = | IdentifierStart, IdentifierStart = UnicodeLetter || '$' || '_' || '\\' UnicodeEscapeSequence, IdentifierPart = IdentifierStart | UnicodeCombiningMark | UnicodeDigit | UnicodeConnectorPunctuation | uc('ZWNJ') | uc('ZWJ'), UnicodeLetter = uc('L') || uc('Nl'), UnicodeCombiningMark = uc('Mn') || uc('Mc'), UnicodeDigit = uc('Nd'), UnicodeConnectorPunctuation = uc('Pc'), checkReservedWord :id = ?(this.isKeyword(id) || this.isFutureReservedWord(id) || id == "null" || id == "true" || id == "false") -> true, // ReservedWord = Keyword | FutureReservedWord(this.strictmode) | NullLiteral | BooleanLiteral, // Note: keywords that are the complete prefix of another keyword should // be prioritized (e.g. 'in' should come before 'instanceof') Keyword = (``break'' | ``do'' | ``instanceof'' | ``typeof'' | ``case'' | ``else'' | ``new'' | ``var'' | ``catch'' | ``finally'' | ``return'' | ``void'' | ``continue'' | ``for'' | ``switch'' | ``while'' | ``debugger'' | ``function'' | ``this'' | ``with'' | ``default'' | ``if'' | ``throw'' | ``delete'' | ``in'' | ``try''):kwname ~IdentifierPart -> kwname, FutureReservedWord false:strict = (``class''| ``enum'' | ``extends'' | ``super'' | ``const''| ``export'' | ``import''):kwname ~IdentifierPart -> kwname, FutureReservedWord true:strict = (FutureReservedWord(false) | ``implements''| ``let'' | ``private'' | ``public'' | ``interface'' | ``package''| ``protected''| ``static'' | ``yield''):kwname ~IdentifierPart -> kwname, // Note: beware of the ordering of punctuators with a common prefix! // OMeta is a PEG, so '|' denotes prioritized choice. // E.g. if '+' would come before ``++'' then the string "++5" // would be parsed as "+(+(5))" rather than "++(5)" // Punctuators comprised of more characters are prioritized // Note: the Punctuator and DivPunctuator rules are not currently used by the parser Punctuator = (``>>>='' | ``>>='' | ``>>>'' | ``==='' | ``!==''| ``<<='' | ``+='' | ``-='' | ``*='' | ``%='' | ``>='' | ``=='' | ``!='' | ``++'' | ``--'' | ``<<'' | ``>>'' | ``<='' | ``&='' | ``|='' | ``^='' | ``&&'' | ``||'' | '{' | '}' | '(' | ')' | '[' | ']' | '.' | ';' | ',' | '<' | '>' | '!' | '~' | '=' | '&' | '|' | '^' | '?' | ':' | '*' | '%' | '+' | '-'), DivPunctuator = (``/='' | '/'), // Punctuators partitioned according to length (for optimization purposes only) // Not used because performance gain was marginal (~3% faster) /* Punctuator4 = ``>>>='', Punctuator3 = ``>>='' | (``>>>'':s ~'=' -> s) | ``==='' | ``!==''| ``<<='', Punctuator2 = ``+='' | ``-='' | ``*='' | ``%='' | ``>='' | (``=='':s ~'='->s) | (``!='':s ~'='->s) | ``++'' | ``--'' | (``<<'':s ~'='->s) | (``>>'':s ~'='->s) | ``<='' | ``&='' | ``|='' | ``^='' | ``&&'' | ``||'' | ``/='', Punctuator1 = '{' | '}' | '(' | ')' | '[' | ']' | '.' | ';' | ',' | ('<':s ~('<'|'=')->s) | ('>':s ~('>'|'=')->s) | ('!':s ~'='->s) | ('~':s ~'='->s) | ('=':s ~'='->s) | ('&':s ~'='->s) | ('|':s ~'='->s) | ('^':s ~'='->s) | '?' | ':' | ('*':s ~'='->s) | ('%':s ~'='->s) | ('+':s ~('+'|'=')->s) | ('-':s ~('-'|'=')->s) | ('/':s ~'='->s),*/ Literal = NullLiteral || BooleanLiteral || NumericLiteral || StringLiteral || RegularExpressionLiteral, // spec forgot Regexp literals in appendix? NullLiteral = ``null'' -> literal("null", null), BooleanLiteral = (``true'' -> true || ``false'' -> false):b -> literal("boolean",b), // For semantics on how decimal literals are constructed, see section 7.8.3 // Note that the ordering of HexIntegerLiteral and DecimalLiteral is reversed w.r.t. the spec // This is intentional: the order DecimalLiteral | HexIntegerLiteral will parse // '0x...' as a decimal literal '0' followed by 'x...' NumericLiteral = (HexIntegerLiteral | DecimalLiteral):l -> literal("number",l), // DecimalDigits produces objects of the form {val: aNumber, len: aNumber } // The 'len' attribute describes the number of characters in the parsed numeral // This is required for interpreting fractional literals // Note: no exclusive choice, e.g. the input '5.5' will satisfy // both rules #1 and #3 (even though #3 only eats '5') DecimalLiteral = DecimalIntegerLiteral:l '.' (DecimalDigits | empty -> {val:0,len:0}):d (ExponentPart | empty -> 0):e -> ( (l + (d.val * Math.pow(10,-d.len))) * Math.pow(10,e) ) | '.' DecimalDigits:d (ExponentPart | empty -> 0):e -> ( (d.val * Math.pow(10, -d.len)) * Math.pow(10,e) ) | DecimalIntegerLiteral:l (ExponentPart | empty -> 0):e -> ( l * Math.pow(10,e) ), DecimalIntegerLiteral = NonZeroDigit:z (DecimalDigits | empty -> {val:0,len:0}):d -> ( (z * Math.pow(10,d.len)) + d.val ) || '0'->0, DecimalDigits = DecimalDigits:ds DecimalDigit:d -> ({ val: (ds.val * 10 + d), len: ds.len+1 }) | DecimalDigit:d -> ({val: d, len: 1}), DecimalDigit = '0'->0 || '1'->1 || '2'->2 || '3'->3 || '4'->4 || '5'->5 || '6'->6 || '7'->7 || '8'->8 || '9'->9, NonZeroDigit = '1'->1 || '2'->2 || '3'->3 || '4'->4 || '5'->5 || '6'->6 || '7'->7 || '8'->8 || '9'->9, ExponentPart = ExponentIndicator SignedInteger:si -> si, ExponentIndicator = 'e' || 'E', SignedInteger = DecimalDigits:ds -> (ds.val) || '+' DecimalDigits:ds -> (ds.val) || '-' DecimalDigits:ds -> (-(ds.val)), HexIntegerLiteral = HexIntegerLiteral:l HexDigit:d -> ( (l * 16) + d ) | ``0x'' HexDigit:d -> d | ``0X'' HexDigit:d -> d, HexDigit = '0'->0 || '1'->1 || '2'->2 || '3'->3 || '4'->4 || '5'->5 || '6'->6 || '7'->7 || '8'->8 || '9'->9 || 'a'->10 || 'b'->11 || 'c'->12 || 'd'->13 || 'e'->14 || 'f'->15 || 'A'->10 || 'B'->11 || 'C'->12 || 'D'->13 || 'E'->14 || 'F'->15, // For semantics on how string literals are constructed, see section 7.8.4 StringLiteral = '"' (DoubleStringCharacters | empty -> ""):s '"' -> (literal("string",s)) || '\'' (SingleStringCharacters | empty -> ""):s '\'' -> (literal("string",s)), DoubleStringCharacters = DoubleStringCharacter:c (DoubleStringCharacters | empty -> ""):cs -> ( c.concat(cs) ), SingleStringCharacters = SingleStringCharacter:c (SingleStringCharacters | empty -> ""):cs -> ( c.concat(cs) ), DoubleStringCharacter = ~('"' || '\\' || LineTerminator) SourceCharacter:s -> (s) || '\\' EscapeSequence:s -> (s) || LineContinuation, SingleStringCharacter = ~('\'' || '\\' || LineTerminator) SourceCharacter:s -> (s) || '\\' EscapeSequence:s -> (s) || LineContinuation, LineContinuation = '\\' LineTerminatorSequence -> (""), EscapeSequence = CharacterEscapeSequence || ~DecimalDigit '0' -> ( String.fromCharCode(0000) ) /*\u0000*/ || HexEscapeSequence || UnicodeEscapeSequence, CharacterEscapeSequence = SingleEscapeCharacter || NonEscapeCharacter, SingleEscapeCharacter = '\'' -> ( String.fromCharCode(0039) ) /*\u0027*/ || '"' -> ( String.fromCharCode(0034) ) /*\u0022*/ || '\\' -> ( String.fromCharCode(0092) ) /*\u005C*/ || 'b' -> ( String.fromCharCode(0008) ) /*\u0008*/ || 'f' -> ( String.fromCharCode(0012) ) /*\u000C*/ || 'n' -> ( String.fromCharCode(0010) ) /*\u000A*/ || 'r' -> ( String.fromCharCode(0013) ) /*\u000D*/ || 't' -> ( String.fromCharCode(0009) ) /*\u0009*/ || 'v' -> ( String.fromCharCode(0011) ) /*\u000B*/, NonEscapeCharacter = ~(EscapeCharacter || LineTerminator) SourceCharacter:s -> (s), EscapeCharacter = SingleEscapeCharacter || DecimalDigit || 'x' || 'u', HexEscapeSequence = 'x' HexDigit:a HexDigit:b -> ( String.fromCharCode(a*16+b) ), UnicodeEscapeSequence = 'u' HexDigit:a HexDigit:b HexDigit:c HexDigit:d -> ( String.fromCharCode(a*4096 + b*256 + c*16 + d) ), // section 7.8.5 // body and flags are left uninterpreted while parsing (they are parsed as strings) RegularExpressionLiteral = '/' RegularExpressionBody:b '/' RegularExpressionFlags:f -> ( literal("regexp",{body:b,flags:f}) ), RegularExpressionBody = , RegularExpressionChars = | empty -> (""), RegularExpressionFirstChar = ~('*' ||'\\' || '/' || '[') RegularExpressionNonTerminator || RegularExpressionBackslashSequence || RegularExpressionClass, RegularExpressionChar = ~('\\' || '/' || '[') RegularExpressionNonTerminator || RegularExpressionBackslashSequence || RegularExpressionClass, RegularExpressionBackslashSequence = '\\' RegularExpressionNonTerminator, RegularExpressionNonTerminator = ~(LineTerminator) SourceCharacter, RegularExpressionClass = '[' RegularExpressionClassChars ']', RegularExpressionClassChars = RegularExpressionClassChars RegularExpressionClassChar | empty -> (""), RegularExpressionClassChar = ~(']' || '\\') RegularExpressionNonTerminator || RegularExpressionBackslashSequence, RegularExpressionFlags = | empty -> (""), // === Implementation-level rules (not part of the spec) === // uc(category) -> accepts only unicode characters x that fall within the given unicode category uc :id = char:x ?(UnicodeCategories[id].test(x)) -> (x), MultiLineCommentNoNL = seq("/*") (MultiLineCommentCharsNoNL | empty -> ""):cs seq("*/") -> (cs), MultiLineCommentCharsNoNL = || <'*' PostAsteriskCommentCharsNoNL>, PostAsteriskCommentCharsNoNL = MultiLineNotForwardSlashOrAsteriskCharNoNL (MultiLineCommentCharsNoNL | empty) || '*' PostAsteriskCommentCharsNoNL, MultiLineNotAsteriskCharNoNL = ~('*') ~LineTerminator SourceCharacter, MultiLineNotForwardSlashOrAsteriskCharNoNL = ~('/' || '*') ~LineTerminator SourceCharacter, // see section 14.1: Directive Prologues and the Use Strict Directive // Some directives (like the Use Strict Directive) may require access to the // raw string value, without interpretation of EscapeSequences or LineContinuations // @returns the raw string value (not a String Literal AST) RawStringLiteral = '"' (RawStringCharacters('"') | empty -> ""):s '"' -> (s) || '\'' (RawStringCharacters("'") | empty -> ""):s '\'' -> (s), RawStringCharacters :term = , RawStringCharacter :term = ~exactly(term) SourceCharacter, // used by parser to parse actual tokens // eat wspace, lineterminators and comments // a much more efficient rule than Whitespace || LineTerminator by the above definitions WhitespaceOrLineTerminator = char:x ?(ucSpacesRE.test(x)) -> (x), // efficiency shortcut skip = (/*WhiteSpace || LineTerminator*/ WhitespaceOrLineTerminator || Comment)*, // does not accept LineTerminators, not even implicit ones in a MultiLineComment (cf. section 7.4) skipNoLine = (Whitespace || SingleLineComment || MultiLineCommentNoNL)*, skipToEnd = skip end, // k(kw) scans an expected keyword // @returns a string // Note: this rule does not recognize FutureReservedWords // which is ok since the parser is expected to invoke it only // with ES5 keywords, not future keywords. k :expected = skip Keyword:lexed ?(expected === lexed) -> (expected), // token(punct) scans an expected punctuator // @returns a string // In OMeta, terms of the syntactic form "name" are translated into // an invocation of the 'token' rule, with the text between quotes passed as // an argument, i.e. token("name"). In this parser, the term "name" instructs // the grammar to parse a specific punctuator. token :expected = skip (Punctuator || DivPunctuator):lexed ?(expected === lexed) -> (expected), // more efficient: dispatch to specialized rule based on the number of characters // in the punctuator (not used because performance gain was marginal (~3%)) //token :expected = skip apply('Punctuator'+(expected.length)):lexed // ?(expected === lexed) -> (expected), // a semicolon is "automatically inserted" if a newline is reached, // the end of the input stream is reached, or the offending token is '}' // Note: don't use ";" as this is equivalent to token(";") // which would allow line terminators to be eaten as well sc = skipNoLine ';' || skipNoLine (scanLineTerminator || end || &'}'), // All rules starting with 'scan' skip whitespace, then scan the appropriate tokens scanLineTerminator = LineTerminator || ~MultiLineCommentNoNL MultiLineComment, // @returns a string // a variant of the 'token' rule that does not accept line terminators // before it reaches the token. Only used for '++' and '--' so it's safe // to simply match the token using 'seq' instead of invoking 'Punctuator' scanPunctNoLineTerminator :expected = skipNoLine // does not accept LineTerminators (Punctuator || DivPunctuator):lexed ?(expected === lexed) -> (expected), // @returns a string scanIdentifier = skip Identifier:id -> (id), // @returns a string scanIdentifierName = skip IdentifierName:id -> (id), // @returns a string scanIdentifierNoLineTerminator = skipNoLine Identifier:id -> (id), // @returns a literal token scanLiteral = skip Literal:l -> (l), // @returns an object {value: string, directive: string} // where 'value' contains the interpreted string value // and 'directive' contains the uninterpreted ('raw') string value scanDirective = skip &(RawStringLiteral):raw StringLiteral:l -> ({value:l.value, directive:raw}), // ========== end of the lexer ========== // === ECMAScript 5 Parser === // A.3 Expressions // @returns an AST PrimaryExpression = k("this") -> (this.ast(_fromIdx, "ThisExpr", {}, [])) || scanIdentifier:id -> (this.ast(_fromIdx, "IdExpr",{name:id}, [])) || scanLiteral:litToken -> ( litToken.type === "regexp" ? this.ast(_fromIdx, "RegExpExpr",{body: litToken.value.body, flags: litToken.value.flags}, []) : this.ast(_fromIdx, "LiteralExpr",{type: litToken.type, value: litToken.value}, []) ) || ArrayLiteral || ObjectLiteral || "(" Expression(true):e ")" -> (e), // @returns an ArrayExpr AST ArrayLiteral = "[" ElementList:elts "," (Elision | empty -> []):elis "]" -> ( this.ast(_fromIdx, "ArrayExpr",{}, elts.concat(elis)) ) || "[" ElementList:elts "]" -> ( this.ast(_fromIdx, "ArrayExpr",{}, elts) ) || "[" (Elision | empty -> []):elis "]" -> ( this.ast(_fromIdx, "ArrayExpr",{}, elis) ), // @returns an array of ASTs ElementList = ElementList:elts "," (Elision | empty -> []):elis AssignmentExpression(true):exp -> ( elts.concat(elis.concat([ exp ])) ) | (Elision | empty -> []):elis AssignmentExpression(true):exp -> ( elis.concat([exp]) ), // @returns an array of ["Empty"] leaf nodes Elision = Elision:es "," -> ( es.concat([ this.emptyAst() ]) ) | "," -> ( [ this.emptyAst() ] ), // @returns an ObjectExpr AST ObjectLiteral = "{" PropertyNameAndValueList:ps "," "}" -> ( this.ast(_fromIdx, "ObjectExpr",{},ps) ) || "{" PropertyNameAndValueList:ps "}" -> ( this.ast(_fromIdx, "ObjectExpr",{},ps) ) || "{" "}" -> ( this.ast(_fromIdx, "ObjectExpr",{},[]) ), // @returns an array of ASTs PropertyNameAndValueList = PropertyNameAndValueList:ps "," PropertyAssignment:p -> ( ps.concat([ p ]) ) | PropertyAssignment:p -> ([ p ]), // @returns a *Prop AST PropertyAssignment = scanIdentifier:id ?(id == "get") PropertyName:n "(" ")" "{" FunctionBody:body "}" -> ( this.ast(_fromIdx, "GetterProp",{name:n}, [ this.ast(_fromIdx, "FunctionExpr",{}, [this.emptyAst(), this.ast(_fromIdx, "ParamDecl",{},[])].concat(body)) ]) ) || scanIdentifier:id ?(id == "set") PropertyName:n "(" FormalParameter:formal ")" "{" FunctionBody:body "}" -> ( this.ast(_fromIdx, "SetterProp",{name:n}, [ this.ast(_fromIdx, "FunctionExpr",{}, [ this.emptyAst(), this.ast(_fromIdx, "ParamDecl",{}, [formal])].concat(body)) ]) ) || PropertyName:n ":" AssignmentExpression(true):exp -> ( this.ast(_fromIdx, "DataProp",{name:n},[exp]) ), // @returns a string PropertyName = scanIdentifierName:id -> (id) || scanLiteral:l ?(l.type == "string") -> (l.value) || scanLiteral:l ?(l.type == "number") -> ( parseFloat(l.value) ), // @returns an AST MemberExpression = k("new") MemberExpression:mexp Arguments:args -> ( this.ast(_fromIdx, "NewExpr",{},[mexp].concat(args)) ) | MemberExpression:mexp "[" Expression(true):iexp "]" -> ( this.ast(_fromIdx, "MemberExpr",{},[mexp,iexp]) ) | MemberExpression:mexp "." scanIdentifierName:id -> ( this.ast(_fromIdx, "MemberExpr",{},[mexp, this.ast(_fromIdx, "LiteralExpr",{type:"string",value:id},[])]) ) | FunctionExpression | PrimaryExpression, // @returns an AST NewExpression = MemberExpression | k("new") NewExpression:exp -> ( this.ast(_fromIdx, "NewExpr",{},[exp]) ), // @returns an AST CallExpression = CallExpression:cexp "[" Expression(true):iexp "]" -> ( this.ast(_fromIdx, "MemberExpr",{},[cexp,iexp]) ) | CallExpression:cexp "." scanIdentifierName:id -> ( this.ast(_fromIdx, "MemberExpr",{},[cexp, this.ast(_fromIdx, "LiteralExpr",{type:"string",value:id},[])]) ) | CallExpression:cexp Arguments:args -> ( this.ast(_fromIdx, "CallExpr",{},[cexp].concat(args)) ) | MemberExpression:mexp Arguments:args -> ( mexp.nodeType() === "MemberExpr" ? this.ast(_fromIdx, "InvokeExpr",{},mexp.children().concat(args)) : mexp.nodeType() === "IdExpr" && mexp.attributes().name === "eval" // identify possible 'direct call' to eval, cf. 15.1.2.1.1 ? this.ast(_fromIdx, "EvalExpr",{},args) : this.ast(_fromIdx, "CallExpr",{},[mexp].concat(args)) ), // @returns an array of ASTs Arguments = "(" ")" -> ([]) || "(" ArgumentList:args ")" -> (args), // @returns an array of ASTs ArgumentList = ArgumentList:args "," AssignmentExpression(true):exp -> ( args.concat([ exp ]) ) | AssignmentExpression(true):exp -> ([ exp ]), LeftHandSideExpression = CallExpression | NewExpression, PostfixExpression = LeftHandSideExpression:lexp scanPunctNoLineTerminator("++") -> ( this.ast(_fromIdx, "CountExpr",{isPrefix:false,op:"++"},[lexp]) ) | LeftHandSideExpression:lexp scanPunctNoLineTerminator("--") -> ( this.ast(_fromIdx, "CountExpr",{isPrefix:false,op:"--"},[lexp]) ) | LeftHandSideExpression, UnaryExpression = k("delete") UnaryExpression:uexp -> ( this.ast(_fromIdx, "DeleteExpr",{},[uexp]) ) || k("void") UnaryExpression:uexp -> ( this.ast(_fromIdx, "UnaryExpr",{op:"void"}, [uexp]) ) || k("typeof") UnaryExpression:uexp -> ( this.ast(_fromIdx, "TypeofExpr",{},[uexp]) ) || "++" UnaryExpression:uexp -> ( this.ast(_fromIdx, "CountExpr",{isPrefix:true,op:"++"},[uexp]) ) || "--" UnaryExpression:uexp -> ( this.ast(_fromIdx, "CountExpr",{isPrefix:true,op:"--"},[uexp]) ) || "+" UnaryExpression:uexp -> ( this.ast(_fromIdx, "UnaryExpr",{op:"+"},[uexp]) ) || "-" UnaryExpression:uexp -> ( this.ast(_fromIdx, "UnaryExpr",{op:"-"},[uexp]) ) || "~" UnaryExpression:uexp -> ( this.ast(_fromIdx, "UnaryExpr",{op:"~"},[uexp]) ) || "!" UnaryExpression:uexp -> ( this.ast(_fromIdx, "UnaryExpr",{op:"!"},[uexp]) ) || PostfixExpression, MultiplicativeExpression = MultiplicativeExpression:mexp "*" UnaryExpression:uexp -> ( this.ast(_fromIdx, "BinaryExpr",{op:"*"},[mexp,uexp]) ) | MultiplicativeExpression:mexp "/" UnaryExpression:uexp -> ( this.ast(_fromIdx, "BinaryExpr",{op:"/"},[mexp,uexp]) ) | MultiplicativeExpression:mexp "%" UnaryExpression:uexp -> ( this.ast(_fromIdx, "BinaryExpr",{op:"%"},[mexp,uexp]) ) | UnaryExpression, AdditiveExpression = AdditiveExpression:aexp "+" MultiplicativeExpression:mexp -> ( this.ast(_fromIdx, "BinaryExpr",{op:"+"},[aexp,mexp]) ) | AdditiveExpression:aexp "-" MultiplicativeExpression:mexp -> ( this.ast(_fromIdx, "BinaryExpr",{op:"-"},[aexp,mexp]) ) | MultiplicativeExpression, ShiftExpression = ShiftExpression:sexp "<<" AdditiveExpression:aexp -> ( this.ast(_fromIdx, "BinaryExpr",{op:"<<"},[sexp,aexp]) ) | ShiftExpression:sexp ">>>" AdditiveExpression:aexp -> ( this.ast(_fromIdx, "BinaryExpr",{op:">>>"},[sexp,aexp]) ) | ShiftExpression:sexp ">>" AdditiveExpression:aexp -> ( this.ast(_fromIdx, "BinaryExpr",{op:">>"},[sexp,aexp]) ) | AdditiveExpression, // Since this grammar assumes OMeta/JS memoizes left-recursive rules, // we parametrize RelationalExpression with a boolean "In" flag that // signifies whether or not an expression of the form (e1 in e2) is allowed RelationalExpression :In = RelationalExpression(In):rexp "<" ShiftExpression:sexp -> ( this.ast(_fromIdx, "BinaryExpr",{op:"<"},[rexp,sexp]) ) | RelationalExpression(In):rexp ">" ShiftExpression:sexp -> ( this.ast(_fromIdx, "BinaryExpr",{op:">"},[rexp,sexp]) ) | RelationalExpression(In):rexp "<=" ShiftExpression:sexp -> ( this.ast(_fromIdx, "BinaryExpr",{op:"<="},[rexp,sexp]) ) | RelationalExpression(In):rexp ">=" ShiftExpression:sexp -> ( this.ast(_fromIdx, "BinaryExpr",{op:">="},[rexp,sexp]) ) | RelationalExpression(In):rexp k("instanceof") ShiftExpression:sexp -> ( this.ast(_fromIdx, "BinaryExpr",{op:"instanceof"},[rexp,sexp]) ) | ?(In) RelationalExpression(true):rexp k("in") ShiftExpression:sexp -> ( this.ast(_fromIdx, "BinaryExpr",{op:"in"},[rexp,sexp]) ) | ShiftExpression, EqualityExpression :In = EqualityExpression(In):eexp "==" RelationalExpression(In):rexp -> ( this.ast(_fromIdx, "BinaryExpr",{op:"=="},[eexp,rexp]) ) | EqualityExpression(In):eexp "!=" RelationalExpression(In):rexp -> ( this.ast(_fromIdx, "BinaryExpr",{op:"!="},[eexp,rexp]) ) | EqualityExpression(In):eexp "===" RelationalExpression(In):rexp -> ( this.ast(_fromIdx, "BinaryExpr",{op:"==="},[eexp,rexp]) ) | EqualityExpression(In):eexp "!==" RelationalExpression(In):rexp -> ( this.ast(_fromIdx, "BinaryExpr",{op:"!=="},[eexp,rexp]) ) | RelationalExpression(In), BitwiseANDExpression :In = BitwiseANDExpression(In):aexp "&" EqualityExpression(In):eexp -> ( this.ast(_fromIdx, "BinaryExpr",{op:"&"},[aexp,eexp]) ) | EqualityExpression(In), BitwiseXORExpression :In = BitwiseXORExpression(In):xexp "^" BitwiseANDExpression(In):aexp -> ( this.ast(_fromIdx, "BinaryExpr",{op:"^"},[xexp,aexp]) ) | BitwiseANDExpression(In), BitwiseORExpression :In = BitwiseORExpression(In):oexp "|" BitwiseXORExpression(In):xexp -> ( this.ast(_fromIdx, "BinaryExpr",{op:"|"},[oexp,xexp]) ) | BitwiseXORExpression(In), LogicalANDExpression :In = LogicalANDExpression(In):aexp "&&" BitwiseORExpression(In):oexp -> ( this.ast(_fromIdx, "LogicalAndExpr",{},[aexp,oexp]) ) | BitwiseORExpression(In), LogicalORExpression :In = LogicalORExpression(In):oexp "||" LogicalANDExpression(In):aexp -> ( this.ast(_fromIdx, "LogicalOrExpr",{},[oexp,aexp]) ) | LogicalANDExpression(In), ConditionalExpression :In = LogicalORExpression(In):oexp "?" AssignmentExpression(In):texp ":" AssignmentExpression(In):fexp -> ( this.ast(_fromIdx, "ConditionalExpr",{},[oexp,texp,fexp]) ) | LogicalORExpression(In), AssignmentExpression :In = LeftHandSideExpression:lexp AssignmentOperator:op AssignmentExpression(In):aexp -> ( this.ast(_fromIdx, "AssignExpr",{op:op},[lexp,aexp]) ) | ConditionalExpression(In), AssignmentOperator = "=" || ">>>=" || "<<=" || ">>=" || "*=" || "/=" || "%=" || "+=" || "-=" || "&=" || "^=" || "|=", Expression :In = Expression(In):exp "," AssignmentExpression(In):aexp -> ( this.ast(_fromIdx, "BinaryExpr",{op:","},[exp,aexp]) ) | AssignmentExpression(In), // A.4 Statements Statement = Block || VariableStatement || EmptyStatement || ExpressionStatement || IfStatement || IterationStatement || ContinueStatement || BreakStatement || ReturnStatement || WithStatement || LabelledStatement || SwitchStatement || ThrowStatement || TryStatement || DebuggerStatement, // @returns a Block AST // Note: the ES5 spec prescribes that a block is a StatementList, not a SourceElements list // this precludes nested function declarations // This parser explicitly deviates from the spec to enable nested function declarations Block = "{" (SourceElements | empty -> []):stmts "}" -> ( this.ast(_fromIdx, "BlockStmt",{},stmts) ), // @returns an array of ASTs StatementList = StatementList:stmts Statement:stmt -> ( stmts.concat([ stmt ]) ) | Statement:stmt -> ( [stmt] ), // @returns a Var AST VariableStatement = k("var") VariableDeclarationList(true):vars sc -> ( this.ast(_fromIdx, "VarDecl",{},vars) ), // @returns an array of ASTs VariableDeclarationList :In = VariableDeclarationList(In):vs "," VariableDeclaration(In):v -> ( vs.concat([v]) ) | VariableDeclaration(In):v -> ( [v] ), // @returns an AST VariableDeclaration :In = InitPattern(In) | IdPattern, // @returns an AST InitPattern :In = IdPattern:patt Initialiser(In):init -> ( this.ast(_fromIdx, "InitPatt",{}, [patt,init]) ), // @returns an AST IdPattern = scanIdentifier:id -> ( this.ast(_fromIdx, "IdPatt",{name:id},[]) ), // @returns an AST Initialiser :In = "=" AssignmentExpression(In):exp -> ( exp ), // @returns an AST EmptyStatement = ";" -> ( this.ast(_fromIdx, "EmptyStmt",{},[]) ), // note: this semicolon eats newlines // @returns an Expression AST ExpressionStatement = ~("{" | k("function")) Expression(true):exp sc -> ( exp ), // @returns an AST IfStatement = k("if") "(" Expression(true):bexp ")" Statement:tstmt k("else") Statement:fstmt -> ( this.ast(_fromIdx, "IfStmt",{},[bexp,tstmt,fstmt]) ) | k("if") "(" Expression(true):bexp ")" Statement:tstmt -> ( this.ast(_fromIdx, "IfStmt",{},[bexp, tstmt, this.ast(_fromIdx, "EmptyStmt",{},[])]) ), // @returns an AST IterationStatement = k("do") Statement:s k("while") "(" Expression(true):e ")" sc -> ( this.ast(_fromIdx, "DoWhileStmt",{},[s,e]) ) || k("while") "(" Expression(true):e ")" Statement:s -> ( this.ast(_fromIdx, "WhileStmt",{},[e,s]) ) || k("for") "(" (Expression(false) | empty -> this.emptyAst()):init ";" (Expression(true) | empty -> this.emptyAst()):cond ";" (Expression(true) | empty -> this.emptyAst()):update ")" Statement:s -> ( this.ast(_fromIdx, "ForStmt",{},[init,cond,update,s]) ) || k("for") "(" k("var") VariableDeclarationList(false):vars ";" (Expression(true) | empty -> this.emptyAst()):cond ";" (Expression(true) | empty -> this.emptyAst()):update ")" Statement:s -> ( this.ast(_fromIdx, "ForStmt",{},[this.ast(_fromIdx, "VarDecl",{},vars),cond,update,s]) ) || k("for") "(" LeftHandSideExpression:lhs k("in") Expression(true):e ")" Statement:s -> ( this.ast(_fromIdx, "ForInStmt",{},[lhs,e,s]) ) || k("for") "(" k("var") VariableDeclaration(false):v k("in") Expression(true):e ")" Statement:s -> ( this.ast(_fromIdx, "ForInStmt",{},[ this.ast(_fromIdx, "VarDecl",{},[v]),e,s]) ), ContinueStatement = k("continue") sc -> ( this.ast(_fromIdx, "ContinueStmt",{},[]) ) || k("continue") scanIdentifierNoLineTerminator:id sc -> ( this.ast(_fromIdx, "ContinueStmt",{label:id},[]) ), BreakStatement = k("break") sc -> ( this.ast(_fromIdx, "BreakStmt",{},[]) ) || k("break") scanIdentifierNoLineTerminator:id sc -> ( this.ast(_fromIdx, "BreakStmt",{label:id},[]) ), ReturnStatement = k("return") sc -> ( this.ast(_fromIdx, "ReturnStmt",{},[]) ) | k("return") skipNoLine Expression(true):exp sc -> ( this.ast(_fromIdx, "ReturnStmt",{},[exp]) ), WithStatement = k("with") "(" Expression(true):e ")" Statement:s -> ( this.ast(_fromIdx, "WithStmt",{},[e,s]) ), SwitchStatement = k("switch") "(" Expression(true):e ")" CaseBlock:cases -> ( this.ast(_fromIdx, "SwitchStmt",{},[e].concat(cases)) ), // @return an array of ASTs CaseBlock = "{" (CaseClauses | empty -> []):precases DefaultClause:dflt (CaseClauses | empty -> []):postcases "}" -> ( precases.concat([dflt]).concat(postcases) ) || "{" (CaseClauses | empty -> []):cases "}" -> (cases), // @return an array of ASTs CaseClauses = CaseClauses:clauses CaseClause:clause -> (clauses.concat([clause])) | CaseClause:clause -> ([ clause ]), CaseClause = k("case") Expression(true):e ":" (StatementList | empty -> []):stmts -> ( this.ast(_fromIdx, "Case",{},[e].concat(stmts)) ), DefaultClause = k("default") ":" (StatementList | empty -> []):stmts -> ( this.ast(_fromIdx, "DefaultCase",{}, stmts) ), LabelledStatement = scanIdentifier:id ":" Statement:s -> ( this.ast(_fromIdx, "LabelledStmt",{ label: id },[s]) ), ThrowStatement = k("throw") skipNoLine scanLineTerminator ~empty // fails explicitly || k("throw") Expression(true):e sc -> ( this.ast(_fromIdx, "ThrowStmt",{},[e]) ), TryStatement = k("try") Block:b Catch:c Finally:f -> ( this.ast(_fromIdx, "TryStmt",{},[b,c,f]) ) | k("try") Block:b Finally:f -> ( this.ast(_fromIdx, "TryStmt",{},[b,this.emptyAst(),f]) ) | k("try") Block:b Catch:c -> ( this.ast(_fromIdx, "TryStmt",{},[b,c]) ), // @returns a CatchClause AST Catch = k("catch") "(" FormalParameter:formal ")" Block:b -> ( this.ast(_fromIdx, "CatchClause",{}, [formal, b]) ), // @returns a Block AST Finally = k("finally") Block:b -> ( b ), DebuggerStatement = k("debugger") sc -> ( this.ast(_fromIdx, "DebuggerStmt",{},[]) ), // A.5 Functions and Programs FunctionDeclaration = k("function") IdPattern:patt "(" (FormalParameterList | empty -> []):formals ")" "{" FunctionBody:body "}" -> ( this.ast(_fromIdx, "FunctionDecl",{}, [patt, this.ast(_fromIdx, "ParamDecl",{},formals)].concat(body)) ), FunctionExpression = k("function") IdPattern:patt "(" (FormalParameterList | empty -> []):formals ")" "{" FunctionBody:body "}" -> ( this.ast(_fromIdx, "FunctionExpr",{}, [patt, this.ast(_fromIdx, "ParamDecl",{},formals)].concat(body)) ) || k("function") "(" (FormalParameterList | empty -> []):formals ")" "{" FunctionBody:body "}" -> ( this.ast(_fromIdx, "FunctionExpr",{}, [this.emptyAst(), this.ast(_fromIdx, "ParamDecl",{},formals)].concat(body)) ), // @returns an array of ASTs FormalParameterList = FormalParameterList:formals "," FormalParameter:formal -> ( formals.concat([ formal ]) ) | FormalParameter:formal -> [formal], // @returns an AST FormalParameter = IdPattern, // @returns an array of ASTs FunctionBody = (DirectivePrologue | empty -> []):prologue (SourceElements | empty -> []):src -> ( prologue.concat(src) ), Program = (DirectivePrologue | empty -> []):prologue (SourceElements | empty -> []):src skipToEnd -> ( this.ast(_fromIdx, "Program",{},prologue.concat(src) ) ), // @returns an array of ASTs SourceElements = SourceElements:elts SourceElement:e -> ( elts.concat([e]) ) | SourceElement:e -> ([e]), SourceElement = Declaration || Statement, // Broken out so es6 can override to include ConstDecl and LetDecl Declaration = FunctionDeclaration, // @returns an array of ASTs DirectivePrologue = DirectivePrologue:p Directive:d -> ( p.concat([d]) ) | Directive:d -> ([d]), Directive = scanDirective:dir sc -> ( this.ast(_fromIdx, 'PrologueDecl',dir,[]) ), // === Implementation-level rules (not part of the spec) === // useful for parsing only expressions and making sure the parser sees all input ExpressionOnly = Expression(true):e skipToEnd -> (e), // override apply to collect position information in AST nodes // at the cost of slower parsing... // the rule 'pos' always succeeds and returns the current position in the input stream // Problem: infinite regress... /*_apply :rule = pos:fromIdx ^_apply(rule):ans pos:toIdx { (function (){ console.log('in apply'); return true; }) && (ans.nodeType && !(ans.attributes().position)) ? //ans is an AST without position info (ans.attributes().position = {start:fromIdx,end:toIdx}) : ans } -> ans, _applyWithArgs :rule :args = pos:fromIdx ^_applyWithArgs(rule,args):ans pos:toIdx { (function (){ console.log('in apply'); return true; }) && (ans.nodeType && !(ans.attributes().position)) ? //ans is an AST without position info (ans.attributes().position = {start:fromIdx,end:toIdx}) : ans } -> ans,*/ // higher-order rule that parses a rule and ensures there is no lingering input complete :rule = apply(rule):res skipToEnd -> (res) // Note: below text refers to version of OMeta before jumptable optimization // The token rule turns out to be one of the main bottlenecks // of the parsing process, so we replace it by a set of more efficient rules: // given that the parser can already indicate what punctuator it is interested in, // the parser just instructs the lexer to skip whitespace, then tries to match // exactly the characters it expects. This works for all punctuators that are not // the prefix of another punctuator: // ``>>>='' |``>>='' | ``==='' | ``!==''| ``<<=''| ``/='' // ``+='' | ``-='' | ``*='' | ``%='' | ``>='' | ``++'' | // ``--'' | ``<='' | ``&='' | ``|='' | ``^='' | ``&&'' | // ``||'' | '{' | '}' | '(' | ')' | '[' | ']' | // '.' | ';' | ',' | '~' | '?' | ':' // token :expected = skip seq(expected) -> (expected), // for all other punctuators, an additional check is required to // distinguish the punctuator from a longer punctuator with the same prefix //``>>'' avoid: >>> >>>= // shiftRightPunct = skip ``>>'' ~'>' -> (">>"), //'=' avoid: == === // simpleAssignPunct = skip '=' ~'=' -> ("="), //``=='' avoid: === //``!='' avoid: !== //``>>>'' avoid: >>>= //``<<'' avoid: <<= // no Compare or Assignment Punctuator // noEq :t = skip seq(t) ~'=' -> (t), //'!' avoid: != !== //'*' avoid: *= //'%' avoid: %= //'^' avoid: ^= //'/' avoid: /= // no Compare or Assignment Single-character Punctuator // noEqChar :t = skip exactly(t) ~'=' -> (t), // '&' avoid: &= && // '|' avoid: |= || // '<' avoid: <= << <<= // '>' avoid: >= >> >>> >>>= // '+' avoid: += ++ // '-' avoid: -= -- // no Assignment single-character punctuator (excludes doubles) // noEqDbl :t = skip exactly(t) ~'=' ~exactly(t) -> (t), } // Should the parser attribute AST nodes with position info? ES5Parser.generatePositionInfo = false; ES5Parser.initialize = function() { this.strictmode = false; this.emptyAst = function() { return mixinASTMethods([ "Empty" ]); }; if (this.generatePositionInfo) { var idxToPosition = LineScanner._genericMatch(this.input, 'scan'); this.ast = function(startPos, type, attributes, children) { // idxToPosition(idx) -> [line, column] var startPos = idxToPosition(startPos); var endPos = idxToPosition(this._currIdx() - 1); attributes.startLine = startPos[0]; attributes.startColumn = startPos[1]; attributes.endLine = endPos[0]; attributes.endColumn = endPos[1]; return mixinASTMethods([ type, attributes ].concat(children)); } } else { this.ast = function(startPos, type, attributes, children) { return mixinASTMethods([ type, attributes ].concat(children)); }; } }; ES5Parser.isKeyword = function(k) { return !!keywords[k] && !Object.prototype.hasOwnProperty(k); }; ES5Parser.isFutureReservedWord = function(k) { if (this.strictmode) { return !!(strictFutureKws[k] || nonStrictFutureKws[k]) && !Object.prototype.hasOwnProperty(k); } else { return !!nonStrictFutureKws[k] && !Object.prototype.hasOwnProperty(k); } }; // The LineScanner grammar is used for generating accurate (line, column) position // information for AST nodes. It simply scans the entire input for LineTerminators, // and builds up an array of the indices of line terminators. // The parser can then convert from indices into the input stream to line and column // numbers by invoking: // var converter = LineScanner.matchAll(input, 'scan'); // converter(idx) -> [line, column] // Note: LineScanner does not actually 'consume' its input, it simply performs // a look-ahead across the entire input. // // Thanks to Alessandro Warth for suggesting this idea. ometa LineScanner { // copied from ES5Parser: LineTerminator = uc('LF') | uc('CR') | uc('LS') | uc('PS'), uc :id = char:x ?(UnicodeCategories[id].test(x)) -> (x), scan = (&findLineStarts):lineStarts -> ( function(idx) { var line = 0; while (idx >= lineStarts[line]) { line++; } return [line, idx - lineStarts[line - 1] + 1]; } ), findLineStarts = {[0]}:ls (LineTerminator {ls.push(this._currIdx())} | char)* -> ls } // indicate to OMeta to memoize parameterized rules // this is crucial for the ES5 Parser, as many of its *Expression // rules are left-recursive and parameterized with a boolean parameter // indicating whether an "in-expression" is allowed or not. // see http://tinlizzie.org/ometa-js/#Memoizing_Parameterized_Rules ES5Parser.memoizeParameterizedRules(); // indicate to OMeta to treat '||' exclusive OR just like '|' prioritized OR // verifying that OR's are exclusive is useful while debugging the grammar, but // they are extremely expensive in production use // see http://tinlizzie.org/ometa-js/#xor_perf ES5Parser.disableXORs();