/* Copyright (c) 2007, 2008 Alessandro Warth Permission is hereby granted, free of charge, to any person obtaining a copy of this software and associated documentation files (the "Software"), to deal in the Software without restriction, including without limitation the rights to use, copy, modify, merge, publish, distribute, sublicense, and/or sell copies of the Software, and to permit persons to whom the Software is furnished to do so, subject to the following conditions: The above copyright notice and this permission notice shall be included in all copies or substantial portions of the Software. THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. */ /* new syntax: #foo match the string object 'foo' (should also be accepted in JS) 'abc' match the string object 'abc' 'c' match the string object 'c' ``abc'' match the sequence of string objects 'a', 'b', 'c' "abc" token('abc') [1 2 3] match the array object [1, 2, 3] foo(bar) apply rule foo with argument bar -> ... do semantic actions in JS (no more ST). allow multiple statements, but no declarations probably don't even need {}s, because newlines can terminate it! */ /* * What are the semantics we want for rules? * We want to have proper closures */ // the OMeta "class" and basic functionality argsToArray = function(args) { var result = [] for(var x = 0; x < args.length; x++) { result.push(args[x]) } return result } RuleInitial = function(input) { this.input = input } RuleInitial.prototype.applyNext = function(rule, parser) { return rule.apply(parser, this.input) } RuleInitial.prototype.withValue = function(value) { return new RuleResult(value, this.input) } RuleResult = function(value, input) { this.value = value this.input = input } RuleResult.prototype.applyNext = function(rule, parser) { return rule.apply(parser, this.input) } RuleResult.prototype.withValue = function(value) { return new RuleResult(value, this.input) } Rule = function(apply) { this.apply = apply } Rule.prototype.memoize = function(name) { return new MemoizedRule(this, name) } Rule.prototype.unmemoize = function() { return this } Rule.prototype.curry = function(inputList) { if(inputList.length == 0) return this return new CurriedRule(this, inputList) } MemoizedRule = function(rule, name) { this.rule = rule this.name = name } MemoizedRule.prototype.apply = function(parser, input) { var memoRec = input.memo[this.name] if (memoRec == undefined) { var origInput = input, failer = new Failer() // Install a temporary memo'd value for left-recursion input.memo[this.name] = failer memoRec = this.rule.apply(parser, input) input.memo[this.name] = memoRec if(failer.used) { var sentinel = memoRec.input while (true) { try { var result = this.rule.apply(parser, origInput) if (result.input == sentinel) throw fail memoRec.value = result.value memoRec.input = result.input } catch (f) { if (f != fail) throw f break } } } } else if(memoRec instanceof Failer) { memoRec.used = true throw fail } return memoRec } MemoizedRule.prototype.memoize = function(name) { //FIXME: Should we error here, change the name, or create a new one with a different name? return new MemoizedRule(this.rule, name) } MemoizedRule.prototype.unmemoize = function() { return this.rule } MemoizedRule.prototype.curry = function(inputList) { if(inputList.length == 0) return this return new CurriedRule(this.unmemoize(), inputList) } CurriedRule = function(rule, inputList) { this.rule = rule this.inputList = inputList } CurriedRule.prototype.apply = function(parser, input) { var newInput = input for(var i = this.inputList.length - 1; i >= 0; i--) { newInput = new OMInputStream(this.inputList[i], newInput) } return this.rule.apply(parser, newInput) } CurriedRule.prototype.memoize = function(name) { throw "Curried rules can't be memoized" } CurriedRule.prototype.unmemoize = function() { return this } CurriedRule.prototype.curry = function(inputList) { if(inputList.length == 0) return this return new CurriedRule(this.rule, this.inputList.concat(inputList)) } RuleFunction = function(func) { this.func = func } RuleFunction.prototype.apply = function(parser, input) { throw "Can't apply a rule function" } RuleFunction.prototype.memoize = function(name) { return this } RuleFunction.prototype.unmemoize = function() { return this } RuleFunction.prototype.curry = function(listArgs) { return new Rule(this.func.apply(undefined, listArgs)) } //FIXME: Should there be a separate parsing state instance which has-a parser description? OMeta2 = { super: undefined, rules: {}, getRule: function(name) { rule = this.rules[name] if(rule !== undefined) return rule if(this.super === undefined) return undefined return this.super.getRule(name) }, applyRule: function(name, input) { return this.getRule(name).apply(this, input) }, applyWithArgs: function(name, input) { return this.getRule(name).curry(argsToArray(arguments).slice(2)).apply(this, input) }, addNamedRule: function(name, rule) { this.rules[name] = rule.memoize(name) }, // Import all functions in object as rules addNamedRules: function(obj) { for(var name in obj) { if(obj.hasOwnProperty(name)) this.addNamedRule(name, new Rule(obj[name])) } }, addNamedRuleFunction: function(name, rulefn) { this.rules[name] = rulefn }, addNamedRuleFunctions: function(name, obj) { for(var name in obj) { this.addNamedRuleFunction(name, obj[name]) } }, superApply: function(self, name) { return this.rules[name].unmemoize().apply() }, superApplyWithArgs: function(self, name) { return this.rules[name].apply(self, argsToArray(arguments).slice(2)).call(self) }, anything: function(input) { return new RuleResult(input.head(), input.tail()) }, pred: function(b) { if(b) return true throw fail }, not: function(origInput, x) { try { x.apply(this, origInput) } catch (f) { if (f != fail) throw f return new RuleResult(true, origInput) } throw fail }, lookahead: function(origInput, x) { var result = x.apply(this, origInput) result.input = origInput return result }, or: function(origInput) { var restArguments = argsToArray(arguments).slice(1) for (var idx = 0; idx < restArguments.length; idx++) { try { return restArguments[idx].apply(this, origInput) } catch (f) { if (f != fail) throw f } } throw fail }, many: function(input, x) { var ans = [] var currResult = new RuleInitial(input) while (true) { try { currResult = currResult.applyNext(x, this); ans.push(currResult.value) } catch (f) { if (f != fail) throw f break } } return new RuleResult(ans, currResult.input) }, many1: function(input, x) { var first = x.apply(this, input) var rest = this.many(first.input, x) return new RuleResult([first.value].concat(rest.value), rest.input) }, form: function(x) { var v = this._apply("anything") if (!v.isSequenceable) throw fail var origInput = this.input this.input = makeArrayOMInputStream(v, 0) var r = x() this._apply("end") this.input = origInput return v }, initialize: function() { }, // match and matchAll are a grammar's "public interface" _genericMatch: function(input, ruleName, args, matchFailed) { if (args == undefined) args = [] var theRule = this.getRule(ruleName) try { var result = theRule.curry(args).apply(this, input); return result.value } catch (f) { /* FIXME: Change the exception handling to deal with input if (f == fail && matchFailed != undefined) { var input = m.input if (input.idx != undefined) { while (input.tl != undefined && input.tl.idx != undefined) input = input.tl input.idx-- } return matchFailed(m, input.idx) } */ throw f } }, match: function(obj, rule, args, matchFailed) { return this._genericMatch([obj].toOMInputStream(), rule, args, matchFailed) }, matchAll: function(listyObj, rule, args, matchFailed) { return this._genericMatch(listyObj.toOMInputStream(), rule, args, matchFailed) } } OMeta2NamedRules = { anything: function(parser, input) { return parser.anything(input) }, end: function(parser, input) { return parser.not(input, parser.getRule('anything')) }, empty: function(parser, input) { return new RuleResult(true, input) }, "true": function(parser, input) { var r = parser.anything(input) parser.pred(r.value == true) return r }, "false": function(parser, input) { var r = parser.anything(input) parser.pred(r.value == false) return r }, "undefined": function(parser, input) { var r = parser.anything(input) parser.pred(r.value == undefined) return r }, number: function(parser, input) { var result = parser.anything(input) parser.pred(result.value.isNumber()) return result }, string: function(parser, input) { var r = parser.anything(input) parser.pred(r.value.isString()) return r }, char: function(parser, input) { var r = parser.anything(input) parser.pred(r.value.isCharacter()) return r }, space: function(parser, input) { var r = this.apply("char", input) parser.pred(r.value.charCodeAt(0) <= 32) return r }, spaces: function(parser, input) { var $elf = this return parser.many(input, parser.getRule('space')) }, digit: function(parser, input) { var r = parser.applyRule('char', input) parser.pred(r.value.isDigit()) return r }, lower: function(parser, input) { var r = parser.applyRule("char", input) parser.pred(r.value.isLower()) return r }, upper: function(parser, input) { var r = this.apply("char", input) parser.pred(r.value.isUpper()) return r }, letter: function(parser, input) { return this.or(input, parser.getRule("lower"), parser.getRule("upper")) }, letterOrDigit: function(parser, input) { return parser.or(input, parser.getRule('letter'), parser.getRule('digit')) }, apply: function(parser, input) { var ruleName = parser.anything(input) return ruleName.applyNext(parser.getRule(ruleName.value), input) }, foreign: function(parser, input) { var r = new RuleInitial(input) r = parser.anything(r.input) var grammarObj = r.value r = parser.anything(r.input) var ruleName = r.value var newInput = makeOMInputStreamProxy(r.input) return gi.apply(ruleName, newInput) }, exactly:function(parser, input) { var r = new RuleInitial(input) r = parser.anything(r.input) var wanted = r.value r = parser.anything(r.input) var result = r.value if (wanted === result) return r throw fail }, firstAndRest: function(parser, input) { //FIXME: What's the point of this rule? var r = new RuleInitial(input) r = parser.anything(r.input) var first = r.value r = parser.anything(r.input) var rest = r.value r = r.applyNext(first, parser) return parser.many(r.input, rest) }, seq: function(parser, input) { var r = new RuleInitial(input) r = parser.anything(r.input) var lst = r.value exactlyRule = parser.getRule("exactly") for (var idx = 0; idx < lst.length; idx++) r = r.applyNext(exactlyRule.curry([lst[idx]]), parser) return new RuleResult(lst, r.input) }, notLast: function(parser, input) { var r = new RuleInitial(input) var ruleName = r.value var theRule = parser.getRule(ruleName) r = r.applyNext(theRule, parser) var result = r.value r = parser.lookahead(theRule) return r } } OMeta2.addNamedRules(OMeta2NamedRules) Parser2 = OMeta2.delegated({ super: OMeta2, rules: OMeta2.rules.delegated({})}); Parser2.addNamedRules({ listOf: function(parser, input) { var $elf = this, rule = this._apply("anything"), delim = this._apply("anything") return this._or(function() { var r = $elf._apply(rule) return $elf._many(function() { $elf._applyWithArgs("token", delim) return $elf._apply(rule) }, r) }, function() { return [] }) }, token: function(parser, input) { var r = parser.anything(input) var rule = r.value return r.applyWith(parser.getRule('spaces'), parser).applyWith(rule, parser) } })