// // Combe/JS - A Parsing Language for JavaScript // // Copyright 2011 Lorenz Pretterhofer // // Licensed under the Apache License, Version 2.0 (the "License"); // you may not use this file except in compliance with the License. // You may obtain a copy of the License at // // http://www.apache.org/licenses/LICENSE-2.0 // // Unless required by applicable law or agreed to in writing, software // distributed under the License is distributed on an "AS IS" BASIS, // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. // See the License for the specific language governing permissions and // limitations under the License. // isUpperCase = function (s) { return (s == s.toUpperCase()); } isConstName = function (s) { return isUpperCase(s[0]) } Array.prototype.difference = function (other) { return this.filter(function (elem) { return !other.include(elem) }) } Array.prototype.withoutDuplicates = function () { array = [] this.forEach(function (elem) { if (!array.include(elem)) { array.push(elem) } }) return array } // Continuation lines are not yet dealt with... on the other hand, I don't yet // have semicolen tokens either, which would also require correct handling // of newlines (converting the ones that allow semicolen insertion accordingly). ometa CombeBootstrapParser { grammar = "combe" tIdentifier:name "{" rule*:rules "}" tEof -> ['Grammar', name, rules] , // Todo: Add argument lists rule = "rule" !{self.locals = []; self.argnames = []} tIdentifier:name ruleArguments:args "{" ruleBody:body "}" -> ['Rule', name, args, self.locals.difference(self.argnames).withoutDuplicates(), body] , ruleArguments = "[" ruleArgumentExpr:e ("," ruleArgumentExpr)*:es "]" -> [e].concat(es) | "[" "]" -> [] | empty -> [] , ruleArgumentExpr = tIdentifier:name !{self.locals.push(name); self.argnames.push(name)} -> name , ruleBody = gexpr | empty -> ['Apply', ['Property', 'nothing']] , gexpr = choice , choice = "|"? returnOperator:e ( ("|" returnOperator)+:es -> ['Choice', e].concat(es) | empty -> e ) , returnOperator = concat:e ( "->" returnOperatorArgument:a -> ['Concat', e, a] | empty -> e ) | "->" returnOperatorArgument , returnOperatorArgument = "{" jsCodeUntil('}'):jscode '}' -> ['Action', ['JSFunction', jscode]] | "(" jsCodeUntil(')'):jscode ')' -> ['Action', ['JSExpr', jscode]] | jsCodeUntilAnyDelim:jscode -> ['Action', ['JSExpr', jscode]] , concat = bind:e ( bind+:es -> ['Concat', e].concat(es) | empty -> e ) , bind = lookahead:e ( ':' identifier:name !(self.locals.push(name)) -> ['Bind', name, e] | empty -> e ) | ":" identifier:name !(self.locals.push(name)) -> ['Bind', name, ['Apply', ['Property', 'anything']]] , lookahead = "~" repeat:e -> ['Not', e] | "&" repeat:e -> ['Lookahead', e] | repeat , repeat = combeToken:e ( "*" -> ['Repeat', e] | "+" -> ['Repeat1', e] | "?" -> ['Optional', e] | empty -> e ) , combeToken = "$" term:e -> ['Token', e] | term , term = predicate | action | apply | destructure , predicate = "?{" jsCodeUntil('}'):jscode '}' -> ['Predicate', ['JSFunction', jscode]] | "?(" jsCodeUntil(')'):jscode ')' -> ['Predicate', ['JSExpr', jscode]] , action = "!{" jsCodeUntil('}'):jscode '}' -> ['Action', ['JSFunction', jscode]] | "!(" jsCodeUntil(')'):jscode ')' -> ['Action', ['JSExpr', jscode]] , apply = range:e ( '[' applyArgumentList:args "]" -> ['Apply', e, args] | empty -> e ) , applyArgumentList = gexpr:e ("," gexpr)*:es -> [e].concat(es) | empty -> [] , destructure = "[" gexpr:e "]" -> ['Destructure', e] , range = rangeArgument:l ".." rangeArgument:r -> ['Range', l, r] | simpleTerm , rangeArgument = "*" -> ['Infinity'] | simpleTerm , simpleTerm = subexpr | immediateAction | literal | variable , subexpr = "(" gexpr:e ")" -> e , immediateAction = "%(" jsCodeUntil(')'):jscode ')' -> ['ImmediateAction', ['JSExpr', jscode]] | "%{" jsCodeUntil('}'):jscode '}' -> ['ImmediateAction', ['JSFunction', jscode]] , literal = tString:s -> ['String', s] | tNumber:s -> ['Number', s] , variable = tIdentifier:name -> { if (self.locals.include(name)) { return ['Variable', name] } else { // We assume all non-local vars are properties of the current grammar. return ['Property', name] } } , jsCodeUntil :what = <(~seq(what) jsChar)*> , jsCodeUntilAnyDelim = <(~jsAnyDelim jsChar)*> , jsAnyDelim = '.' '.' | ',' | ']' | ')' | '}' | ';' | '|' , jsChar = jsPair | string // Assuming JS strings are the same for now | char , jsPair = '(' (~')' jsChar)* ')' | '[' (~']' jsChar)* ']' | '{' (~'}' jsChar)* '}' , tString = whitespace* string , string = <'\"' ('\\' '\\' | '\\' '\"' | ~'\"' char)* '\"'> | <'\'' ('\\' '\\' | '\\' '\'' | ~'\'' char)* '\''> , tNumber = whitespace* number , number = , tIdentifier = whitespace* identifier , identifier = , idLetter = letter | '_' , idLetterOrDigit = idLetter | digit , tEof = whitespace* eof , eof = ~anything , whitespace = ' ' | '\t' | '\r' | '\n' | comment , comment = '/' '/' (~newline char)* newline | '/' '*' (~('*' '/') char)* '*' '/' , newline = '\r' '\n'? | '\n' | eof , token :str = whitespace* } CombeBootstrapParser.parseGrammar = function (text) { return this.matchAll(text, 'grammar') } // // Combe/JS - A Parsing Language for JavaScript // // Copyright 2011 Lorenz Pretterhofer // // Licensed under the Apache License, Version 2.0 (the "License"); // you may not use this file except in compliance with the License. // You may obtain a copy of the License at // // http://www.apache.org/licenses/LICENSE-2.0 // // Unless required by applicable law or agreed to in writing, software // distributed under the License is distributed on an "AS IS" BASIS, // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. // See the License for the specific language governing permissions and // limitations under the License. // Array.prototype.flatten1 = function () { array = [] this.forEach(function (elem) { array = array.concat(elem) }) return array } Array.prototype.rest = function () { return this.slice(1) } ometa CombeBootstrapOptimizer { recur [:name apply(name):r] -> r, Grammar :name :rules -> ['Grammar', name, rules], Rule :name :args :vars recur:e -> ['Rule', name, args, vars, e], Choice (recur:e -> { if (e[0] === 'Choice') { return e.rest() } else { return [e] } })+:es -> ['Choice'].concat(es.flatten1()), Concat (recur:e -> { if (e[0] === 'Concat') { return e.rest() } else { return [e] } })+:es -> ['Concat'].concat(es.flatten1()), Action recur:jse -> ['Action', jse], Bind :name recur:e -> ['Bind', name, e], Not recur:e -> ['Not', e], Lookahead recur:e -> ['Lookahead', e], Repeat recur:e -> ['Repeat', e], Repeat1 recur:e -> ['Repeat1', e], Optional recur:e -> ['Optional', e], Token recur:e -> ['Token', e], Predicate recur:jse -> ['Predicate', jse], Apply recur:e [recur*:args] -> ['Apply', e, args], ImmediateAction recur:jse -> ['ImmediateAction', jse], Destructure recur:e -> ['Destructure', e], Range recur:l recur:r -> ['Range', l, r], Infinity -> ['Infinity'], String :s -> ['String', s], Number :n -> ['Number', n], Variable :name -> ['Variable', name], JSFunction :jscode -> ['JSFunction', jscode], JSExpr :jscode -> ['JSExpr', jscode] } CombeBootstrapOptimizer.translateGrammar = function (grammar) { return this.match(grammar, 'recur') } // // Combe/JS - A Parsing Language for JavaScript // // Copyright 2011 Lorenz Pretterhofer // // Licensed under the Apache License, Version 2.0 (the "License"); // you may not use this file except in compliance with the License. // You may obtain a copy of the License at // // http://www.apache.org/licenses/LICENSE-2.0 // // Unless required by applicable law or agreed to in writing, software // distributed under the License is distributed on an "AS IS" BASIS, // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. // See the License for the specific language governing permissions and // limitations under the License. // isArray = Array.isArray || function(o) { return Object.prototype.toString.call(o) === '[object Array]' } Array.prototype.isEmpty = function () { return this.length === 0 } Object.prototype.deepJoin = function () { if (isArray(this)) { return this.reduce(function (s, elem) { return s += elem.deepJoin() }, '') } else { return this.toString() } } Array.prototype.interpolate = function (delim) { var a = [] if (this.length >= 1) a.push(this[0]) for (var i = 1; i < this.length; i++) { a.push(delim) a.push(this[i]) } return a } String.prototype.quoted = function () { return ('\'' + this + '\'') } String.ReservedWords = ["abstract boolean break byte case catch char class const continue", "debugger default delete do double else enum export extends false final", "finally float for function goto if implements import in instanceof int", "interface long native new null package private protected public return", "short static super switch synchronized this throw throws transient true", "try typeof var volatile void while with"].join(' ').split(' '); String.prototype.isReservedWord = function () { return String.ReservedWords.include(this); } header = """// Generated by Combe/JS Bootstrap Compiler var extend = require('./util').extend; var BaseParser = require('./base_parser'); var Range = require('./range'); var {{name}} = module.exports = function () { BaseParser.call(this); }; extend(({{name}}.prototype = Object.create(BaseParser.prototype)), { """ ometa CombeBootstrapTranslator { recur [:name apply(name):r] -> r, Grammar :name [recur*:rules] -> [header.replace(new RegExp("{{name}}", "g"), name), rules.interpolate(',\n\n'), '\n\n', '});\n'], Rule :name :args :vars recur:e -> [name.quoted(), ': function (', args.interpolate(', '), ') {\n', (vars.isEmpty() ? '' : ['var ', vars.interpolate(', '), ';\n']), 'return this._apply(', e, ');\n', '}'], Choice recur+:es -> ['(function () {return this._choice(', es.interpolate(',\n'), ');})'], Concat recur+:es -> ['(function () {return this._concat(', es.interpolate(', '), ');})'], Action recur:jse -> jse, Bind :name recur:e -> ['(function () {', name, ' = this._apply(', e, ');})'], Not recur:e -> ['(function () {return this._not(', e, ');})'], Lookahead recur:e -> ['(function () {return this._lookahead(', e, ');})'], Repeat recur:e -> ['(function () {return this._repeat(', e, ');})'], Repeat1 recur:e -> ['(function () {return this._repeat1(', e, ');})'], Optional recur:e -> ['(function () {return this._optional(', e, ');})'], Token recur:e -> ['(function () {return this.token(', e, ');})'], Predicate recur:jse -> ['(function () {return this._predicate(', e, ');})'], Apply recur:e ( [recur*:args] -> ['(function () {return this._apply(', e, ', ', args.interpolate(', '), ');})'] | empty -> ['(function () {return this._apply(', e, ');})'] ), ImmediateAction ['JSFunction' :jscode] -> ['(function () {', jse, '}).call(this)'], ImmediateAction ['JSExpr' :jscode] -> ['(', jscode, ')'], Destructure recur:e -> ['(function () {return this._destructure(', e, ');})'], Range recur:l recur:r -> ['(new Range(', l, ', ', r, '))'], Infinity -> 'Infinity', String :s -> s, Number :n -> n, Variable :name -> name, Property :name -> { if (name.isReservedWord()) { return 'this["' + name + '"]'; } else { return 'this.' + name; } }, JSFunction :jscode -> ['(function () {', jscode, '})'], JSExpr :jscode -> ['(function () {return (', jscode, ');})'] } CombeBootstrapTranslator.translateGrammar = function (ast) { return this.match(ast, 'recur') } compileCombe = function (src) { var ast = CombeBootstrapParser.parseGrammar(src) var opt_ast = CombeBootstrapOptimizer.translateGrammar(ast) var iolist = CombeBootstrapTranslator.translateGrammar(opt_ast) return iolist.deepJoin() } src = """ // // Combe/JS - A Parsing Language for JavaScript // // Copyright 2011 Lorenz Pretterhofer // // Licensed under the Apache License, Version 2.0 (the "License"); // you may not use this file except in compliance with the License. // You may obtain a copy of the License at // // http://www.apache.org/licenses/LICENSE-2.0 // // Unless required by applicable law or agreed to in writing, software // distributed under the License is distributed on an "AS IS" BASIS, // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. // See the License for the specific language governing permissions and // limitations under the License. // combe CombeParser { rule grammar { $'combe' $identifier:name $'{' rule*:rules $'}' $eof -> ['Grammar', name, rules] } rule rule { $'rule' !{this.locals = []; this.argnames = [];} $identifier:name ruleArgumentList:args $'{' gexpr:e $'}' -> ['Rule', name, args, this.locals.difference(this.argnames).withoutDuplicates(), e] } rule ruleArgumentList { | $'[' delimited[ruleArgument, $',']:args $']' -> args | -> [] } rule ruleArgument { $identifier:name -> { this.argnames.push(name); this.locals.push(name); return [name]; } } rule gexpr { choice } rule choice { $'|'? returnOperator:e ( ($'|' returnOperator)+:es -> ['Choice', e].concat(es) | -> e ) } rule returnOperator { | concat:e ( $'->' returnOperatorArgument:a -> ['Concat', e, a] | -> e ) | $'->' returnOperatorArgument } rule returnOperatorArgument { (jsFunction[''] | jsExpr[''] | jsUntilAnyDelim):js -> ['Action', js] } rule concat { bind:e ( bind+:es -> ['Concat', e].concat(es) | -> e ) } rule bind { | lookahead:e ( ':' identifier:name !(this.locals.push(name)) -> ['Bind', name, e] | -> e ) | $':' identifier:name !(this.locals.push(name)) -> ['Bind', name, ['Apply', 'anything']] } rule lookahead { | $'~' repeat:e -> ['Not', e] | $'&' repeat:e -> ['Lookahead', e] | repeat } rule repeat { combeToken:e ( $'*' -> ['Repeat', e] | $'+' -> ['Repeat1', e] | $'?' -> ['Optional', e] | -> e ) } rule combeToken { | $'$' term:e -> ['Token', e] | term } rule term { | predicate | action | apply | destructure | range } rule predicate { | jsFunction['?']:js -> ['Predicate', js] | jsExpr['?']:js -> ['Predicate', js] } rule action { | jsFunction['!']:js -> ['Action', js] | jsExpr['!']:js -> ['Action', js] } rule apply { range:e '[' delimited[gexpr, $',']:args $']' -> ['Apply', e, args] } rule destructure { $'[' gexpr:e $']' -> ['Destructure', e] } rule range { | simpleTerm:l ( $'..' rangeArgument:r -> ['Range', l, r] | -> l ) | rangeArgument:l $'..' rangeArgument:r -> ['Range', l, r] } rule rangeArgument { | $'*' -> ['Infinity'] | simpleTerm } rule simpleTerm { | subexpr | immediateAction | literal | variable } rule subexpr { $'(' gexpr:e $')' -> e } rule immediateAction { | jsFunction['%']:js -> ['ImmediateAction', js] | jsExpr['%']:js -> ['ImmediateAction', js] } rule literal { | $string:s -> ['String', s] | $number:n -> ['Number', s] } rule variable { $identifier:name -> { if (this.locals.include(name)) { return ['Variable', name]; } else { // We assume all non-local vars are properties of the current grammar. return ['Property', name]; } } } rule jsFunction[prefix] { $prefix '{' jsUntil['}']:code '}' -> ['JSFunction', code] } rule jsExpr[prefix] { $prefix '(' jsUntil[')']:code ')' -> ['JSExpr', code] } rule jsUntilAnyDelim { jsUntil['..' | ',' | ']' | ')' | '}' | ';' | '|']:code -> ['JSExpr', code] } rule jsUntil[what] { matchedInput[ (~what jsChar)* ] } rule jsChar { | jsPair | string // Assuming JS strings are the same for now | char } rule jsPair { | '(' (~')' jsChar)* ')' | '[' (~']' jsChar)* ']' | '{' (~'}' jsChar)* '}' } rule string { matchedInput[ | '\\"' ('\\\\\\\\' | '\\\\\\"' | ~'\\"' char)* '\\"' | '\\'' ('\\\\\\\\' | '\\\\\\'' | ~'\\'' char)* '\\'' ] } rule number { matchedInput[digit+] } rule identifier { matchedInput[idLetter idLetterOrDigit*] } rule idLetter { letter | '_' } rule idLetterOrDigit { idLetter | digit } rule letter { 'a'..'z' | 'A'..'Z' } rule digit { '0'..'9' } rule whitespace { | char[' \\t\\r\\n'] | comment } rule comment { | '//' (~newline char)* newline | '/*' (~'*/' char)* '*/' } rule newline { | '\\r' '\\n'? | '\\n' | eof } // Todo: some of these should probably be default definitions in a parent // grammar (or the base Combe behaviors). rule token[what] { whitespace* what } } """ // ast = CombeBootstrapParser.parseGrammar(src) // opt_ast = CombeBootstrapOptimizer.translateGrammar(ast) // iolist = CombeBootstrapTranslator.translateGrammar(opt_ast) // out = iolist.deepJoin() out = compileCombe(src)