Files
kraken/fungll.krak

321 lines
13 KiB
Plaintext

import vec:*
import vec_literals:*
import map:*
import set:*
import util:*
import str:*
import regex:*
// nonterminals are negative, terminals are positive
obj Grammer (Object) {
var nonterminals: vec<vec<vec<int>>>
var nonterminal_names: vec<str>
var terminals: vec<regex>
var start_symbol: int
fun construct(): *Grammer {
nonterminals.construct()
nonterminal_names.construct()
terminals.construct()
start_symbol = 0
return this
}
fun copy_construct(old: *Grammer): void {
nonterminals.copy_construct(&old->nonterminals)
nonterminal_names.copy_construct(&old->nonterminal_names)
terminals.copy_construct(&old->terminals)
start_symbol = old->start_symbol
}
fun destruct(): void {
nonterminals.destruct()
nonterminal_names.destruct()
terminals.destruct()
}
fun operator=(other:ref Grammer):void {
destruct()
copy_construct(&other)
}
fun add_new_nonterminal(name: *char, rule: ref vec<int>): int {
return add_new_nonterminal(str(name), rule)
}
fun add_new_nonterminal(name: ref str, rule: ref vec<int>): int {
nonterminals.add(vec(rule))
nonterminal_names.add(name)
return -1*nonterminals.size
}
fun add_to_nonterminal(nonterminal: int, rule: ref vec<int>) {
nonterminals[(-1*nonterminal)-1].add(rule)
}
fun add_terminal(c: *char): int {
terminals.add(regex(c))
return terminals.size
}
fun get_nonterminal_rules(nonterminal: int): ref vec<vec<int>> {
return nonterminals[(-1*nonterminal)-1]
}
fun match_terminal(terminal: int, input: ref str, start: int): int {
return terminals[terminal-1].long_match(input.getBackingMemory(), start, input.length())
}
fun is_terminal(x: int): bool {
return x > 0
}
fun set_start_symbol(x: int) {
start_symbol = x
}
fun to_string(it: BS): str {
var rule_str = str()
for (var i = 0; i < nonterminals[(-1*it.nonterminal)-1][it.rule_idx].size; i++;) {
if i == it.idx_into_rule {
rule_str += "*"
}
var erminal = nonterminals[(-1*it.nonterminal)-1][it.rule_idx][i]
rule_str += to_string(erminal)
}
if it.idx_into_rule == nonterminals[(-1*it.nonterminal)-1][it.rule_idx].size {
rule_str += "*"
}
return str("<") + nonterminal_names[(-1*it.nonterminal)-1] + " ::= " + rule_str + ", " + it.left + ", " + it.pivot + ", " + it.right + ">"
}
fun to_string(erminal: int): str {
if erminal < 0 {
return nonterminal_names[(-1*erminal)-1]
} else {
return terminals[erminal-1].regexString
}
}
}
obj Pending (Object) {
var nonterminal: int
var rule_idx: int
var idx_into_rule: int
var left: int
fun construct(): *Pending {
return this->construct(0,0,0,0)
}
fun construct(nonterminal: int, rule_idx: int, idx_into_rule: int, left: int): *Pending {
this->nonterminal = nonterminal;
this->rule_idx = rule_idx;
this->idx_into_rule = idx_into_rule;
this->left = left;
return this
}
fun copy_construct(old: *Pending): void {
this->nonterminal = old->nonterminal;
this->rule_idx = old->rule_idx;
this->idx_into_rule = old->idx_into_rule;
this->left = old->left;
}
fun destruct(): void { }
fun operator=(other:ref Pending):void {
destruct()
copy_construct(&other)
}
fun operator==(rhs: ref Pending): bool {
return nonterminal == rhs.nonterminal && rule_idx == rhs.rule_idx && idx_into_rule == rhs.idx_into_rule && left == rhs.left
}
}
fun pending(nonterminal: int, rule_idx: int, idx_into_rule: int, left: int): Pending {
var to_ret.construct(nonterminal, rule_idx, idx_into_rule, left): Pending
return to_ret
}
obj Descriptor (Object) {
var nonterminal: int
var rule_idx: int
var idx_into_rule: int
var left: int
var pivot: int
fun construct(): *Descriptor {
return this->construct(0,0,0,0,0)
}
fun construct(nonterminal: int, rule_idx: int, idx_into_rule: int, left: int, pivot: int): *Descriptor {
this->nonterminal = nonterminal;
this->rule_idx = rule_idx;
this->idx_into_rule = idx_into_rule;
this->left = left;
this->pivot = pivot;
return this
}
fun copy_construct(old: *Descriptor): void {
this->nonterminal = old->nonterminal;
this->rule_idx = old->rule_idx;
this->idx_into_rule = old->idx_into_rule;
this->left = old->left;
this->pivot = old->pivot;
}
fun destruct(): void { }
fun operator=(other:ref Descriptor):void {
destruct()
copy_construct(&other)
}
fun operator==(rhs: ref Descriptor): bool {
return nonterminal == rhs.nonterminal && rule_idx == rhs.rule_idx && idx_into_rule == rhs.idx_into_rule && left == rhs.left && pivot == rhs.pivot
}
}
fun descriptor(nonterminal: int, rule_idx: int, idx_into_rule: int, left: int, pivot: int): Descriptor {
var to_ret.construct(nonterminal, rule_idx, idx_into_rule, left, pivot): Descriptor
return to_ret
}
obj BS (Object) {
var nonterminal: int
var rule_idx: int
var idx_into_rule: int
var left: int
var pivot: int
var right: int
fun construct(): *BS {
return this->construct(0,0,0,0,0,0)
}
fun construct(nonterminal: int, rule_idx: int, idx_into_rule: int, left: int, pivot: int, right: int): *BS {
this->nonterminal = nonterminal;
this->rule_idx = rule_idx;
this->idx_into_rule = idx_into_rule;
this->left = left;
this->pivot = pivot;
this->right = right;
return this
}
fun copy_construct(old: *BS): void {
this->nonterminal = old->nonterminal;
this->rule_idx = old->rule_idx;
this->idx_into_rule = old->idx_into_rule;
this->left = old->left;
this->pivot = old->pivot;
this->right = old->right;
}
fun destruct(): void { }
fun operator=(other:ref BS):void {
destruct()
copy_construct(&other)
}
fun to_string(): str {
return str("nonterminal:") + nonterminal + " rule_idx:" + rule_idx + " idx_into_rule:" + idx_into_rule + " l:" + left + " p:" + pivot + " r:" + right
}
fun operator==(rhs: ref BS): bool {
return nonterminal == rhs.nonterminal && rule_idx == rhs.rule_idx && idx_into_rule == rhs.idx_into_rule && left == rhs.left && pivot == rhs.pivot && right == rhs.right
}
}
fun bs(nonterminal: int, rule_idx: int, idx_into_rule: int, left: int, pivot: int, right: int): BS {
var to_ret.construct(nonterminal, rule_idx, idx_into_rule, left, pivot, right): BS
return to_ret
}
fun fungll(grammer: ref Grammer, input: ref str): set<BS> {
var R = descend(grammer, grammer.start_symbol, 0)
var U = set<Descriptor>()
var G = map<pair<int, int>, set<Pending>>()
var P = map<pair<int,int>, set<int>>()
var Y = set<BS>()
while R.size() != 0 {
var d = R.pop()
var it = process(grammer, input, d, G, P)
var Rp = it.first.first
var Yp = it.first.second
var Gp = it.second
var Pp = it.third
var U_with_d = U.union(set(d));
var nextR = R.union(Rp) - U_with_d
R = nextR
U = U_with_d
for (var i = 0; i < Gp.keys.size; i++;) {
if G.contains_key(Gp.keys[i]) {
G[Gp.keys[i]].add(Gp.values[i])
} else {
G[Gp.keys[i]] = Gp.values[i]
}
}
for (var i = 0; i < Pp.keys.size; i++;) {
if P.contains_key(Pp.keys[i]) {
P[Pp.keys[i]].add(Pp.values[i])
} else {
P[Pp.keys[i]] = Pp.values[i]
}
}
Y += Yp
}
return Y
}
fun descend(grammer: ref Grammer, symbol: int, l: int): set<Descriptor> {
var to_ret = set<Descriptor>()
for (var rhs = 0; rhs < grammer.get_nonterminal_rules(symbol).size; rhs++;)
to_ret.add(descriptor(symbol, rhs, 0, l, l))
return to_ret
}
fun process(grammer: ref Grammer, input: ref str, descript: Descriptor, G: ref map<pair<int, int>, set<Pending>>, P: ref map<pair<int,int>, set<int>>): triple<pair<set<Descriptor>, set<BS>>, map<pair<int, int>, set<Pending>>, map<pair<int,int>, set<int>>> {
// if at end / end is emptystr
if descript.idx_into_rule == grammer.get_nonterminal_rules(descript.nonterminal)[descript.rule_idx].size {
return process_e(grammer, descript, G, P)
} else {
return process_symbol(grammer, input, descript, G, P)
}
}
fun process_e(grammer: ref Grammer, descript: Descriptor, G: ref map<pair<int, int>, set<Pending>>, P: ref map<pair<int,int>, set<int>>): triple<pair<set<Descriptor>, set<BS>>, map<pair<int, int>, set<Pending>>, map<pair<int,int>, set<int>>> {
var nonterminal: int
var rule_idx: int
var left: int
var pivot: int
var X = descript.nonterminal
var l = descript.left;
var k = descript.pivot;
var K = G.get_with_default(make_pair(X,l), set<Pending>())
var it = ascend(l,K,k)
var R = it.first
var Y = it.second
if grammer.get_nonterminal_rules(X)[descript.rule_idx].size == 0 {
Y.add(bs(X,descript.rule_idx, 0, l, l, l))
}
return make_triple(make_pair(R,Y), map<pair<int, int>, set<Pending>>(), map(make_pair(X,l), set(k)))
}
fun process_symbol(grammer: ref Grammer, input: ref str, descript: Descriptor, G: ref map<pair<int, int>, set<Pending>>, P: ref map<pair<int,int>, set<int>>): triple<pair<set<Descriptor>, set<BS>>, map<pair<int, int>, set<Pending>>, map<pair<int,int>, set<int>>> {
var s = grammer.get_nonterminal_rules(descript.nonterminal)[descript.rule_idx][descript.idx_into_rule]
var k = descript.pivot
var R = P.get_with_default(make_pair(s,k), set<int>())
var Gp = map(make_pair(s,k), set(pending(descript.nonterminal, descript.rule_idx, descript.idx_into_rule+1, descript.left)))
if grammer.is_terminal(s) {
return make_triple(matc(grammer,input,descript), map<pair<int,int>, set<Pending>>(), map<pair<int,int>, set<int>>())
} else if R.size() == 0 { // s in N
return make_triple(make_pair(descend(grammer,s,k), set<BS>()), Gp, map<pair<int,int>, set<int>>())
} else { // s in N and R != set()
return make_triple(skip(k,pending(descript.nonterminal, descript.rule_idx, descript.idx_into_rule+1, descript.left), R), Gp, map<pair<int,int>, set<int>>())
}
}
fun matc(grammer: ref Grammer, input: ref str, descript: Descriptor): pair<set<Descriptor>, set<BS>> {
/*println("trying to match " + grammer.to_string(grammer.get_nonterminal_rules(descript.nonterminal)[descript.rule_idx][descript.idx_into_rule]))*/
var match_length = grammer.match_terminal(grammer.get_nonterminal_rules(descript.nonterminal)[descript.rule_idx][descript.idx_into_rule], input, descript.pivot)
if match_length > 0 {
/*println("matched " + grammer.to_string(grammer.get_nonterminal_rules(descript.nonterminal)[descript.rule_idx][descript.idx_into_rule]))*/
return make_pair(set(descriptor(descript.nonterminal, descript.rule_idx, descript.idx_into_rule+1, descript.left, descript.pivot+match_length)), set(bs(descript.nonterminal, descript.rule_idx, descript.idx_into_rule+1, descript.left, descript.pivot, descript.pivot+match_length)))
} else {
return make_pair(set<Descriptor>(), set<BS>())
}
}
fun skip(k: int, c: Pending, R: ref set<int>): pair<set<Descriptor>, set<BS>> { return nmatch(k, set(c), R); }
fun ascend(k:int, K: ref set<Pending>, r: int): pair<set<Descriptor>, set<BS>> { return nmatch(k, K, set(r)); }
fun nmatch(k:int, K: ref set<Pending>, R: ref set<int>): pair<set<Descriptor>, set<BS>> {
var Rp = set<Descriptor>()
var Y = set<BS>()
for (var i = 0; i < K.data.size; i++;) {
var pending = K.data[i]
for (var j = 0; j < R.data.size; j++;) {
var r = R.data[j]
Rp.add(descriptor(pending.nonterminal, pending.rule_idx, pending.idx_into_rule, pending.left, r))
Y.add(bs(pending.nonterminal, pending.rule_idx, pending.idx_into_rule, pending.left, k, r))
}
}
return make_pair(Rp,Y)
}
/*fun main(argc: int, argv: **char): int {*/
/*var grammer.construct(): Grammer*/
/*var one = grammer.add_terminal("12")*/
/*var E = grammer.add_new_nonterminal("E", vec<int>())*/
/*grammer.add_to_nonterminal(E, vec(one))*/
/*grammer.add_to_nonterminal(E, vec(E,E,E))*/
/*var BSR = fungll(grammer, str("1212"))*/
/*println(str("length of BSR is: ") + BSR.size())*/
/*for (var i = 0; i < BSR.data.size; i++;) {*/
/*var BS = BSR.data[i]*/
/*println(str() + i + ": " + grammer.to_string(BSR.data[i]))*/
/*}*/
/*return 0*/
/*}*/