import vec:* import vec_literals:* import map:* import set:* import util:* import str:* import regex:* // nonterminals are negative, terminals are positive obj Grammer (Object) { var nonterminals: vec>> var nonterminal_names: vec var terminals: vec var start_symbol: int fun construct(): *Grammer { nonterminals.construct() nonterminal_names.construct() terminals.construct() start_symbol = 0 return this } fun copy_construct(old: *Grammer): void { nonterminals.copy_construct(&old->nonterminals) nonterminal_names.copy_construct(&old->nonterminal_names) terminals.copy_construct(&old->terminals) start_symbol = old->start_symbol } fun destruct(): void { nonterminals.destruct() nonterminal_names.destruct() terminals.destruct() } fun operator=(other:ref Grammer):void { destruct() copy_construct(&other) } fun add_new_nonterminal(name: *char, rule: ref vec): int { return add_new_nonterminal(str(name), rule) } fun add_new_nonterminal(name: ref str, rule: ref vec): int { nonterminals.add(vec(rule)) nonterminal_names.add(name) return -1*nonterminals.size } fun add_to_nonterminal(nonterminal: int, rule: ref vec) { nonterminals[(-1*nonterminal)-1].add(rule) } fun add_terminal(c: *char): int { terminals.add(regex(c)) return terminals.size } fun get_nonterminal_rules(nonterminal: int): ref vec> { return nonterminals[(-1*nonterminal)-1] } fun match_terminal(terminal: int, input: ref str, start: int): int { return terminals[terminal-1].long_match(input.getBackingMemory(), start, input.length()) } fun is_terminal(x: int): bool { return x > 0 } fun set_start_symbol(x: int) { start_symbol = x } fun to_string(it: BS): str { var rule_str = str() for (var i = 0; i < nonterminals[(-1*it.nonterminal)-1][it.rule_idx].size; i++;) { if i == it.idx_into_rule { rule_str += "*" } var erminal = nonterminals[(-1*it.nonterminal)-1][it.rule_idx][i] rule_str += to_string(erminal) } if it.idx_into_rule == nonterminals[(-1*it.nonterminal)-1][it.rule_idx].size { rule_str += "*" } return str("<") + nonterminal_names[(-1*it.nonterminal)-1] + " ::= " + rule_str + ", " + it.left + ", " + it.pivot + ", " + it.right + ">" } fun to_string(erminal: int): str { if erminal < 0 { return nonterminal_names[(-1*erminal)-1] } else { return terminals[erminal-1].regexString } } } obj Pending (Object) { var nonterminal: int var rule_idx: int var idx_into_rule: int var left: int fun construct(): *Pending { return this->construct(0,0,0,0) } fun construct(nonterminal: int, rule_idx: int, idx_into_rule: int, left: int): *Pending { this->nonterminal = nonterminal; this->rule_idx = rule_idx; this->idx_into_rule = idx_into_rule; this->left = left; return this } fun copy_construct(old: *Pending): void { this->nonterminal = old->nonterminal; this->rule_idx = old->rule_idx; this->idx_into_rule = old->idx_into_rule; this->left = old->left; } fun destruct(): void { } fun operator=(other:ref Pending):void { destruct() copy_construct(&other) } fun operator==(rhs: ref Pending): bool { return nonterminal == rhs.nonterminal && rule_idx == rhs.rule_idx && idx_into_rule == rhs.idx_into_rule && left == rhs.left } } fun pending(nonterminal: int, rule_idx: int, idx_into_rule: int, left: int): Pending { var to_ret.construct(nonterminal, rule_idx, idx_into_rule, left): Pending return to_ret } obj Descriptor (Object) { var nonterminal: int var rule_idx: int var idx_into_rule: int var left: int var pivot: int fun construct(): *Descriptor { return this->construct(0,0,0,0,0) } fun construct(nonterminal: int, rule_idx: int, idx_into_rule: int, left: int, pivot: int): *Descriptor { this->nonterminal = nonterminal; this->rule_idx = rule_idx; this->idx_into_rule = idx_into_rule; this->left = left; this->pivot = pivot; return this } fun copy_construct(old: *Descriptor): void { this->nonterminal = old->nonterminal; this->rule_idx = old->rule_idx; this->idx_into_rule = old->idx_into_rule; this->left = old->left; this->pivot = old->pivot; } fun destruct(): void { } fun operator=(other:ref Descriptor):void { destruct() copy_construct(&other) } fun operator==(rhs: ref Descriptor): bool { return nonterminal == rhs.nonterminal && rule_idx == rhs.rule_idx && idx_into_rule == rhs.idx_into_rule && left == rhs.left && pivot == rhs.pivot } } fun descriptor(nonterminal: int, rule_idx: int, idx_into_rule: int, left: int, pivot: int): Descriptor { var to_ret.construct(nonterminal, rule_idx, idx_into_rule, left, pivot): Descriptor return to_ret } obj BS (Object) { var nonterminal: int var rule_idx: int var idx_into_rule: int var left: int var pivot: int var right: int fun construct(): *BS { return this->construct(0,0,0,0,0,0) } fun construct(nonterminal: int, rule_idx: int, idx_into_rule: int, left: int, pivot: int, right: int): *BS { this->nonterminal = nonterminal; this->rule_idx = rule_idx; this->idx_into_rule = idx_into_rule; this->left = left; this->pivot = pivot; this->right = right; return this } fun copy_construct(old: *BS): void { this->nonterminal = old->nonterminal; this->rule_idx = old->rule_idx; this->idx_into_rule = old->idx_into_rule; this->left = old->left; this->pivot = old->pivot; this->right = old->right; } fun destruct(): void { } fun operator=(other:ref BS):void { destruct() copy_construct(&other) } fun to_string(): str { return str("nonterminal:") + nonterminal + " rule_idx:" + rule_idx + " idx_into_rule:" + idx_into_rule + " l:" + left + " p:" + pivot + " r:" + right } fun operator==(rhs: ref BS): bool { return nonterminal == rhs.nonterminal && rule_idx == rhs.rule_idx && idx_into_rule == rhs.idx_into_rule && left == rhs.left && pivot == rhs.pivot && right == rhs.right } } fun bs(nonterminal: int, rule_idx: int, idx_into_rule: int, left: int, pivot: int, right: int): BS { var to_ret.construct(nonterminal, rule_idx, idx_into_rule, left, pivot, right): BS return to_ret } fun fungll(grammer: ref Grammer, input: ref str): set { var R = descend(grammer, grammer.start_symbol, 0) var U = set() var G = map, set>() var P = map, set>() var Y = set() while R.size() != 0 { var d = R.pop() var it = process(grammer, input, d, G, P) var Rp = it.first.first var Yp = it.first.second var Gp = it.second var Pp = it.third var U_with_d = U.union(set(d)); var nextR = R.union(Rp) - U_with_d R = nextR U = U_with_d for (var i = 0; i < Gp.keys.size; i++;) { if G.contains_key(Gp.keys[i]) { G[Gp.keys[i]].add(Gp.values[i]) } else { G[Gp.keys[i]] = Gp.values[i] } } for (var i = 0; i < Pp.keys.size; i++;) { if P.contains_key(Pp.keys[i]) { P[Pp.keys[i]].add(Pp.values[i]) } else { P[Pp.keys[i]] = Pp.values[i] } } Y += Yp } return Y } fun descend(grammer: ref Grammer, symbol: int, l: int): set { var to_ret = set() for (var rhs = 0; rhs < grammer.get_nonterminal_rules(symbol).size; rhs++;) to_ret.add(descriptor(symbol, rhs, 0, l, l)) return to_ret } fun process(grammer: ref Grammer, input: ref str, descript: Descriptor, G: ref map, set>, P: ref map, set>): triple, set>, map, set>, map, set>> { // if at end / end is emptystr if descript.idx_into_rule == grammer.get_nonterminal_rules(descript.nonterminal)[descript.rule_idx].size { return process_e(grammer, descript, G, P) } else { return process_symbol(grammer, input, descript, G, P) } } fun process_e(grammer: ref Grammer, descript: Descriptor, G: ref map, set>, P: ref map, set>): triple, set>, map, set>, map, set>> { var nonterminal: int var rule_idx: int var left: int var pivot: int var X = descript.nonterminal var l = descript.left; var k = descript.pivot; var K = G.get_with_default(make_pair(X,l), set()) var it = ascend(l,K,k) var R = it.first var Y = it.second if grammer.get_nonterminal_rules(X)[descript.rule_idx].size == 0 { Y.add(bs(X,descript.rule_idx, 0, l, l, l)) } return make_triple(make_pair(R,Y), map, set>(), map(make_pair(X,l), set(k))) } fun process_symbol(grammer: ref Grammer, input: ref str, descript: Descriptor, G: ref map, set>, P: ref map, set>): triple, set>, map, set>, map, set>> { var s = grammer.get_nonterminal_rules(descript.nonterminal)[descript.rule_idx][descript.idx_into_rule] var k = descript.pivot var R = P.get_with_default(make_pair(s,k), set()) var Gp = map(make_pair(s,k), set(pending(descript.nonterminal, descript.rule_idx, descript.idx_into_rule+1, descript.left))) if grammer.is_terminal(s) { return make_triple(matc(grammer,input,descript), map, set>(), map, set>()) } else if R.size() == 0 { // s in N return make_triple(make_pair(descend(grammer,s,k), set()), Gp, map, set>()) } else { // s in N and R != set() return make_triple(skip(k,pending(descript.nonterminal, descript.rule_idx, descript.idx_into_rule+1, descript.left), R), Gp, map, set>()) } } fun matc(grammer: ref Grammer, input: ref str, descript: Descriptor): pair, set> { /*println("trying to match " + grammer.to_string(grammer.get_nonterminal_rules(descript.nonterminal)[descript.rule_idx][descript.idx_into_rule]))*/ var match_length = grammer.match_terminal(grammer.get_nonterminal_rules(descript.nonterminal)[descript.rule_idx][descript.idx_into_rule], input, descript.pivot) if match_length > 0 { /*println("matched " + grammer.to_string(grammer.get_nonterminal_rules(descript.nonterminal)[descript.rule_idx][descript.idx_into_rule]))*/ return make_pair(set(descriptor(descript.nonterminal, descript.rule_idx, descript.idx_into_rule+1, descript.left, descript.pivot+match_length)), set(bs(descript.nonterminal, descript.rule_idx, descript.idx_into_rule+1, descript.left, descript.pivot, descript.pivot+match_length))) } else { return make_pair(set(), set()) } } fun skip(k: int, c: Pending, R: ref set): pair, set> { return nmatch(k, set(c), R); } fun ascend(k:int, K: ref set, r: int): pair, set> { return nmatch(k, K, set(r)); } fun nmatch(k:int, K: ref set, R: ref set): pair, set> { var Rp = set() var Y = set() for (var i = 0; i < K.data.size; i++;) { var pending = K.data[i] for (var j = 0; j < R.data.size; j++;) { var r = R.data[j] Rp.add(descriptor(pending.nonterminal, pending.rule_idx, pending.idx_into_rule, pending.left, r)) Y.add(bs(pending.nonterminal, pending.rule_idx, pending.idx_into_rule, pending.left, k, r)) } } return make_pair(Rp,Y) } /*fun main(argc: int, argv: **char): int {*/ /*var grammer.construct(): Grammer*/ /*var one = grammer.add_terminal("12")*/ /*var E = grammer.add_new_nonterminal("E", vec())*/ /*grammer.add_to_nonterminal(E, vec(one))*/ /*grammer.add_to_nonterminal(E, vec(E,E,E))*/ /*var BSR = fungll(grammer, str("1212"))*/ /*println(str("length of BSR is: ") + BSR.size())*/ /*for (var i = 0; i < BSR.data.size; i++;) {*/ /*var BS = BSR.data[i]*/ /*println(str() + i + ": " + grammer.to_string(BSR.data[i]))*/ /*}*/ /*return 0*/ /*}*/