import vec:* import vec_literals:* import map:* import set:* import util:* import str:* import regex:* // nonterminals are negative, terminals are positive obj Grammer (Object) { var nonterminals: vec>> var nonterminal_names: vec var nonterminal_funs: vec): T>>> var terminals: vec var terminal_funs: vec> var start_symbol: int fun construct(): *Grammer { nonterminals.construct() nonterminal_names.construct() nonterminal_funs.construct() terminals.construct() terminal_funs.construct() start_symbol = 0 return this } fun copy_construct(old: *Grammer): void { nonterminals.copy_construct(&old->nonterminals) nonterminal_names.copy_construct(&old->nonterminal_names) nonterminal_funs.copy_construct(&old->nonterminal_funs) terminals.copy_construct(&old->terminals) terminal_funs.copy_construct(&old->terminal_funs) start_symbol = old->start_symbol } fun destruct(): void { nonterminals.destruct() nonterminal_names.destruct() nonterminal_funs.destruct() terminals.destruct() terminal_funs.destruct() } fun operator=(other:ref Grammer):void { destruct() copy_construct(&other) } fun add_new_nonterminal(name: *char, rule: ref vec, d: K, f: fun(ref K,ref vec): T): int { return add_new_nonterminal(str(name), rule, d, f) } fun add_new_nonterminal(name: ref str, rule: ref vec, d: K, f: fun(ref K,ref vec): T): int { nonterminals.add(vec(rule)) nonterminal_names.add(name) nonterminal_funs.add(vec(make_pair(d,f))) return -1*nonterminals.size } fun add_to_or_create_nonterminal(name: ref str, rule: ref vec, d: K, f: fun(ref K,ref vec): T): int { var idx = nonterminal_names.find(name) if idx >= 0 { add_to_nonterminal(-1*(idx+1), rule, d, f) return -1*(idx+1) } else { return add_new_nonterminal(name, rule, d, f) } } fun add_to_nonterminal(nonterminal: int, rule: ref vec, d: K, f: fun(ref K,ref vec): T) { nonterminals[(-1*nonterminal)-1].add(rule) nonterminal_funs[(-1*nonterminal)-1].add(make_pair(d,f)) } fun add_terminal(c: *char, d: K, f: fun(ref K,ref str,int,int): T): int { return add_terminal(str(c), d, f) } fun add_terminal(c: ref str, d: K, f: fun(ref K,ref str,int,int): T): int { terminals.add(regex(c)) terminal_funs.add(make_pair(d,f)) return terminals.size } fun get_nonterminal_rules(nonterminal: int): ref vec> { return nonterminals[(-1*nonterminal)-1] } fun match_terminal(terminal: int, input: ref str, start: int): int { return terminals[terminal-1].long_match(input.getBackingMemory(), start, input.length()) } fun is_terminal(x: int): bool { return x > 0 } fun set_start_symbol(x: int) { start_symbol = x } fun to_string(it: BS): str { var rule_str = str() for (var i = 0; i < nonterminals[(-1*it.nonterminal)-1][it.rule_idx].size; i++;) { if i == it.idx_into_rule { rule_str += "*" } var erminal = nonterminals[(-1*it.nonterminal)-1][it.rule_idx][i] rule_str += to_string(erminal) if i < nonterminals[(-1*it.nonterminal)-1][it.rule_idx].size-1 { rule_str += " " } } if it.idx_into_rule == nonterminals[(-1*it.nonterminal)-1][it.rule_idx].size { rule_str += "*" } return str("<") + nonterminal_names[(-1*it.nonterminal)-1] + " ::= " + rule_str + ", " + it.left + ", " + it.pivot + ", " + it.right + ">" } fun to_string(erminal: int): str { if erminal < 0 { return nonterminal_names[(-1*erminal)-1] } else { return terminals[erminal-1].regexString } } fun eval_BSR(input: ref str, BSR: ref set): T { var top = -1 for (var i = 0; i < BSR.data.size; i++;) { if BSR.data[i].nonterminal == start_symbol && BSR.data[i].idx_into_rule == nonterminals[(-1*BSR.data[i].nonterminal)-1][BSR.data[i].rule_idx].size && BSR.data[i].left == 0 && BSR.data[i].right == input.length() { top = i break } } if top == -1 { println("Could not find top for input:") println(input) println(str("of length ") + input.length()) for (var i = 0; i < BSR.data.size; i++;) { println(str() + i + ": " + to_string(BSR.data[i])) } error("Could not find top") } return eval_BSR(input, BSR, top) } fun eval_BSR(input: ref str, BSR: ref set, c: int): T { var bs = BSR.data[c] var nonterminal = (-1*bs.nonterminal)-1 if bs.idx_into_rule != nonterminals[nonterminal][bs.rule_idx].size { error("Evaluating BSR from not the end!") } var params = vec() for (var i = bs.idx_into_rule-1; i >= 0; i--;) { var erminal = nonterminals[nonterminal][bs.rule_idx][i] if is_terminal(erminal) { var right_value = terminal_funs[erminal-1].second(terminal_funs[erminal-1].first, input, bs.pivot, bs.right) params.add(right_value) } else { /*var right = find_comp(erminal, bs.pivot, bs.right)*/ var right = -1 var sub_nonterminal_idx = (-1*erminal)-1 for (var j = 0; j < BSR.data.size; j++;) { if BSR.data[j].nonterminal == erminal && BSR.data[j].idx_into_rule == nonterminals[sub_nonterminal_idx][BSR.data[j].rule_idx].size && BSR.data[j].left == bs.pivot && BSR.data[j].right == bs.right { right = j break } } var right_value = eval_BSR(input, BSR, right) params.add(right_value) } // get the new left bs if i != 0 { /*var new_bs_idx = find_mid(bs.nonterminal, bs.rule_idx, i, bs.left, bs.pivot)*/ var new_bs_idx = -1 for (var j = 0; j < BSR.data.size; j++;) { if BSR.data[j].nonterminal == bs.nonterminal && BSR.data[j].rule_idx == bs.rule_idx && BSR.data[j].idx_into_rule == i && BSR.data[j].left == bs.left && BSR.data[j].right == bs.pivot { new_bs_idx = j break } } bs = BSR.data[new_bs_idx] } } var to_ret = nonterminal_funs[nonterminal][bs.rule_idx].second(nonterminal_funs[nonterminal][bs.rule_idx].first, params.reverse()) return to_ret } } obj Pending (Object) { var nonterminal: int var rule_idx: int var idx_into_rule: int var left: int fun construct(): *Pending { return this->construct(0,0,0,0) } fun construct(nonterminal: int, rule_idx: int, idx_into_rule: int, left: int): *Pending { this->nonterminal = nonterminal; this->rule_idx = rule_idx; this->idx_into_rule = idx_into_rule; this->left = left; return this } fun copy_construct(old: *Pending): void { this->nonterminal = old->nonterminal; this->rule_idx = old->rule_idx; this->idx_into_rule = old->idx_into_rule; this->left = old->left; } fun destruct(): void { } fun operator=(other:ref Pending):void { destruct() copy_construct(&other) } fun operator==(rhs: ref Pending): bool { return nonterminal == rhs.nonterminal && rule_idx == rhs.rule_idx && idx_into_rule == rhs.idx_into_rule && left == rhs.left } } fun pending(nonterminal: int, rule_idx: int, idx_into_rule: int, left: int): Pending { var to_ret.construct(nonterminal, rule_idx, idx_into_rule, left): Pending return to_ret } obj Descriptor (Object) { var nonterminal: int var rule_idx: int var idx_into_rule: int var left: int var pivot: int fun construct(): *Descriptor { return this->construct(0,0,0,0,0) } fun construct(nonterminal: int, rule_idx: int, idx_into_rule: int, left: int, pivot: int): *Descriptor { this->nonterminal = nonterminal; this->rule_idx = rule_idx; this->idx_into_rule = idx_into_rule; this->left = left; this->pivot = pivot; return this } fun copy_construct(old: *Descriptor): void { this->nonterminal = old->nonterminal; this->rule_idx = old->rule_idx; this->idx_into_rule = old->idx_into_rule; this->left = old->left; this->pivot = old->pivot; } fun destruct(): void { } fun operator=(other:ref Descriptor):void { destruct() copy_construct(&other) } fun operator==(rhs: ref Descriptor): bool { return nonterminal == rhs.nonterminal && rule_idx == rhs.rule_idx && idx_into_rule == rhs.idx_into_rule && left == rhs.left && pivot == rhs.pivot } } fun descriptor(nonterminal: int, rule_idx: int, idx_into_rule: int, left: int, pivot: int): Descriptor { var to_ret.construct(nonterminal, rule_idx, idx_into_rule, left, pivot): Descriptor return to_ret } obj BS (Object) { var nonterminal: int var rule_idx: int var idx_into_rule: int var left: int var pivot: int var right: int fun construct(): *BS { return this->construct(0,0,0,0,0,0) } fun construct(nonterminal: int, rule_idx: int, idx_into_rule: int, left: int, pivot: int, right: int): *BS { this->nonterminal = nonterminal; this->rule_idx = rule_idx; this->idx_into_rule = idx_into_rule; this->left = left; this->pivot = pivot; this->right = right; return this } fun copy_construct(old: *BS): void { this->nonterminal = old->nonterminal; this->rule_idx = old->rule_idx; this->idx_into_rule = old->idx_into_rule; this->left = old->left; this->pivot = old->pivot; this->right = old->right; } fun destruct(): void { } fun operator=(other:ref BS):void { destruct() copy_construct(&other) } fun to_string(): str { return str("nonterminal:") + nonterminal + " rule_idx:" + rule_idx + " idx_into_rule:" + idx_into_rule + " l:" + left + " p:" + pivot + " r:" + right } fun operator==(rhs: ref BS): bool { return nonterminal == rhs.nonterminal && rule_idx == rhs.rule_idx && idx_into_rule == rhs.idx_into_rule && left == rhs.left && pivot == rhs.pivot && right == rhs.right } } fun bs(nonterminal: int, rule_idx: int, idx_into_rule: int, left: int, pivot: int, right: int): BS { var to_ret.construct(nonterminal, rule_idx, idx_into_rule, left, pivot, right): BS return to_ret } /*fun fungll(grammar: ref Grammer, start_symbol: *char, input: ref str): set {*/ /*return fungll(grammar, str(start_symbol), input)*/ /*}*/ /*fun fungll(grammar: ref Grammer, start_symbol: str, input: ref str): set {*/ /*return fungll(grammar, -1*(grammar.nonterminal_funs.find(start_symbol)+1), input)*/ /*}*/ fun fungll(grammar: ref Grammer, start_symbol: int, input: ref str): set { var R = descend(grammar, start_symbol, 0) var U = set() var G = map, set>() var P = map, set>() var Y = set() while R.size() != 0 { var d = R.pop() var it = process(grammar, input, d, G, P) var Rp = it.first.first var Yp = it.first.second var Gp = it.second var Pp = it.third U.add(d) var nextR = R.union(Rp) - U R = nextR for (var i = 0; i < Gp.keys.size; i++;) { if G.contains_key(Gp.keys[i]) { G[Gp.keys[i]].add(Gp.values[i]) } else { G[Gp.keys[i]] = Gp.values[i] } } for (var i = 0; i < Pp.keys.size; i++;) { if P.contains_key(Pp.keys[i]) { P[Pp.keys[i]].add(Pp.values[i]) } else { P[Pp.keys[i]] = Pp.values[i] } } Y += Yp } return Y } fun descend(grammar: ref Grammer, symbol: int, l: int): set { var to_ret = set() for (var rhs = 0; rhs < grammar.get_nonterminal_rules(symbol).size; rhs++;) to_ret.add(descriptor(symbol, rhs, 0, l, l)) return to_ret } fun process(grammar: ref Grammer, input: ref str, descript: Descriptor, G: ref map, set>, P: ref map, set>): triple, set>, map, set>, map, set>> { // if at end / end is emptystr if descript.idx_into_rule == grammar.get_nonterminal_rules(descript.nonterminal)[descript.rule_idx].size { return process_e(grammar, descript, G, P) } else { return process_symbol(grammar, input, descript, G, P) } } fun process_e(grammar: ref Grammer, descript: Descriptor, G: ref map, set>, P: ref map, set>): triple, set>, map, set>, map, set>> { var nonterminal: int var rule_idx: int var left: int var pivot: int var X = descript.nonterminal var l = descript.left; var k = descript.pivot; var K = G.get_with_default(make_pair(X,l), set()) var it = ascend(l,K,k) var R = it.first var Y = it.second if grammar.get_nonterminal_rules(X)[descript.rule_idx].size == 0 { Y.add(bs(X,descript.rule_idx, 0, l, l, l)) } return make_triple(make_pair(R,Y), map, set>(), map(make_pair(X,l), set(k))) } fun process_symbol(grammar: ref Grammer, input: ref str, descript: Descriptor, G: ref map, set>, P: ref map, set>): triple, set>, map, set>, map, set>> { var s = grammar.get_nonterminal_rules(descript.nonterminal)[descript.rule_idx][descript.idx_into_rule] var k = descript.pivot var R = P.get_with_default(make_pair(s,k), set()) var Gp = map(make_pair(s,k), set(pending(descript.nonterminal, descript.rule_idx, descript.idx_into_rule+1, descript.left))) if grammar.is_terminal(s) { return make_triple(matc(grammar,input,descript), map, set>(), map, set>()) } else if R.size() == 0 { // s in N return make_triple(make_pair(descend(grammar,s,k), set()), Gp, map, set>()) } else { // s in N and R != set() return make_triple(skip(k,pending(descript.nonterminal, descript.rule_idx, descript.idx_into_rule+1, descript.left), R), Gp, map, set>()) } } fun matc(grammar: ref Grammer, input: ref str, descript: Descriptor): pair, set> { /*println("trying to match " + grammar.to_string(grammar.get_nonterminal_rules(descript.nonterminal)[descript.rule_idx][descript.idx_into_rule]))*/ var match_length = grammar.match_terminal(grammar.get_nonterminal_rules(descript.nonterminal)[descript.rule_idx][descript.idx_into_rule], input, descript.pivot) if match_length > 0 { /*println("matched " + grammar.to_string(grammar.get_nonterminal_rules(descript.nonterminal)[descript.rule_idx][descript.idx_into_rule]))*/ return make_pair(set(descriptor(descript.nonterminal, descript.rule_idx, descript.idx_into_rule+1, descript.left, descript.pivot+match_length)), set(bs(descript.nonterminal, descript.rule_idx, descript.idx_into_rule+1, descript.left, descript.pivot, descript.pivot+match_length))) } else { return make_pair(set(), set()) } } fun skip(k: int, c: Pending, R: ref set): pair, set> { return nmatch(k, set(c), R); } fun ascend(k:int, K: ref set, r: int): pair, set> { return nmatch(k, K, set(r)); } fun nmatch(k:int, K: ref set, R: ref set): pair, set> { var Rp = set() var Y = set() for (var i = 0; i < K.data.size; i++;) { var pending = K.data[i] for (var j = 0; j < R.data.size; j++;) { var r = R.data[j] Rp.add(descriptor(pending.nonterminal, pending.rule_idx, pending.idx_into_rule, pending.left, r)) Y.add(bs(pending.nonterminal, pending.rule_idx, pending.idx_into_rule, pending.left, k, r)) } } return make_pair(Rp,Y) } /*fun main(argc: int, argv: **char): int {*/ /*var grammar.construct(): Grammer*/ /*var Number = grammar.add_new_nonterminal("Number", vec(grammar.add_terminal("[0-9]+", fun(input: ref str, l: int, r: int): int { return string_to_num(input.slice(l,r)); })), fun(i: ref vec): int { return i[0]; })*/ /*var mult = grammar.add_terminal("\\*", fun(input: ref str, l: int, r: int): int { return 1; })*/ /*var Factor = grammar.add_new_nonterminal("Factor", vec(Number), fun(i: ref vec): int { return i[0]; })*/ /*grammar.add_to_nonterminal(Factor, vec(Factor, mult, Number), fun(i: ref vec): int { return i[0]*i[2]; })*/ /*var add = grammar.add_terminal("\\+", fun(input: ref str, l: int, r: int): int { return 1; })*/ /*var Term = grammar.add_new_nonterminal("Term", vec(Factor), fun(i: ref vec): int { return i[0]; })*/ /*grammar.add_to_nonterminal(Term, vec(Term, add, Factor), fun(i: ref vec): int { return i[0]+i[2]; })*/ /*grammar.set_start_symbol(Term)*/ /*var input = str("1+23*44")*/ /*var BSR = fungll(grammar, input)*/ /*println(str("length of BSR is: ") + BSR.size())*/ /*for (var i = 0; i < BSR.data.size; i++;) {*/ /*var BS = BSR.data[i]*/ /*println(str() + i + ": " + grammar.to_string(BSR.data[i]))*/ /*}*/ /*var res = grammar.eval_BSR(input, BSR)*/ /*println(str("result of grammar.eval_BSR(fungll(grammar, ") + input + ")) = " + res)*/ /*return 0*/ /*}*/