441 lines
19 KiB
Plaintext
441 lines
19 KiB
Plaintext
import vec:*
|
|
import vec_literals:*
|
|
import map:*
|
|
import set:*
|
|
import hash_set
|
|
import util:*
|
|
import str:*
|
|
import regex:*
|
|
|
|
// nonterminals are negative, terminals are positive
|
|
obj Grammer<T,K> (Object) {
|
|
var nonterminals: vec<vec<vec<int>>>
|
|
var nonterminal_names: vec<str>
|
|
var nonterminal_funs: vec<vec<pair<K,fun(ref K, ref vec<T>): T>>>
|
|
var terminals: vec<regex>
|
|
var terminal_funs: vec<pair<K,fun(ref K,ref str,int,int): T>>
|
|
var start_symbol: int
|
|
fun construct(): *Grammer {
|
|
nonterminals.construct()
|
|
nonterminal_names.construct()
|
|
nonterminal_funs.construct()
|
|
terminals.construct()
|
|
terminal_funs.construct()
|
|
start_symbol = 0
|
|
return this
|
|
}
|
|
fun copy_construct(old: *Grammer): void {
|
|
nonterminals.copy_construct(&old->nonterminals)
|
|
nonterminal_names.copy_construct(&old->nonterminal_names)
|
|
nonterminal_funs.copy_construct(&old->nonterminal_funs)
|
|
terminals.copy_construct(&old->terminals)
|
|
terminal_funs.copy_construct(&old->terminal_funs)
|
|
start_symbol = old->start_symbol
|
|
}
|
|
fun destruct(): void {
|
|
nonterminals.destruct()
|
|
nonterminal_names.destruct()
|
|
nonterminal_funs.destruct()
|
|
terminals.destruct()
|
|
terminal_funs.destruct()
|
|
}
|
|
fun operator=(other:ref Grammer):void {
|
|
destruct()
|
|
copy_construct(&other)
|
|
}
|
|
fun add_new_nonterminal(name: *char, rule: ref vec<int>, d: K, f: fun(ref K,ref vec<T>): T): int {
|
|
return add_new_nonterminal(str(name), rule, d, f)
|
|
}
|
|
fun add_new_nonterminal(name: ref str, rule: ref vec<int>, d: K, f: fun(ref K,ref vec<T>): T): int {
|
|
nonterminals.add(vec(rule))
|
|
nonterminal_names.add(name)
|
|
nonterminal_funs.add(vec(make_pair(d,f)))
|
|
return -1*nonterminals.size
|
|
}
|
|
fun add_to_or_create_nonterminal(name: ref str, rule: ref vec<int>, d: K, f: fun(ref K,ref vec<T>): T): int {
|
|
var idx = nonterminal_names.find(name)
|
|
if idx >= 0 {
|
|
add_to_nonterminal(-1*(idx+1), rule, d, f)
|
|
return -1*(idx+1)
|
|
} else {
|
|
return add_new_nonterminal(name, rule, d, f)
|
|
}
|
|
}
|
|
fun add_to_nonterminal(nonterminal: int, rule: ref vec<int>, d: K, f: fun(ref K,ref vec<T>): T) {
|
|
nonterminals[(-1*nonterminal)-1].add(rule)
|
|
nonterminal_funs[(-1*nonterminal)-1].add(make_pair(d,f))
|
|
}
|
|
fun add_terminal(c: *char, d: K, f: fun(ref K,ref str,int,int): T): int {
|
|
return add_terminal(str(c), d, f)
|
|
}
|
|
fun add_terminal(c: ref str, d: K, f: fun(ref K,ref str,int,int): T): int {
|
|
terminals.add(regex(c))
|
|
terminal_funs.add(make_pair(d,f))
|
|
return terminals.size
|
|
}
|
|
fun get_nonterminal_rules(nonterminal: int): ref vec<vec<int>> {
|
|
return nonterminals[(-1*nonterminal)-1]
|
|
}
|
|
fun match_terminal(terminal: int, input: ref str, start: int): int {
|
|
return terminals[terminal-1].long_match(input.getBackingMemory(), start, input.length())
|
|
}
|
|
fun is_terminal(x: int): bool {
|
|
return x > 0
|
|
}
|
|
fun set_start_symbol(x: int) {
|
|
start_symbol = x
|
|
}
|
|
fun to_string(it: BS): str {
|
|
var rule_str = str()
|
|
for (var i = 0; i < nonterminals[(-1*it.nonterminal)-1][it.rule_idx].size; i++;) {
|
|
if i == it.idx_into_rule {
|
|
rule_str += "*"
|
|
}
|
|
var erminal = nonterminals[(-1*it.nonterminal)-1][it.rule_idx][i]
|
|
rule_str += to_string(erminal)
|
|
if i < nonterminals[(-1*it.nonterminal)-1][it.rule_idx].size-1 {
|
|
rule_str += " "
|
|
}
|
|
}
|
|
if it.idx_into_rule == nonterminals[(-1*it.nonterminal)-1][it.rule_idx].size {
|
|
rule_str += "*"
|
|
}
|
|
return str("<") + nonterminal_names[(-1*it.nonterminal)-1] + " ::= " + rule_str + ", " + it.left + ", " + it.pivot + ", " + it.right + ">"
|
|
}
|
|
fun to_string(erminal: int): str {
|
|
if erminal < 0 {
|
|
return nonterminal_names[(-1*erminal)-1]
|
|
} else {
|
|
return terminals[erminal-1].regexString
|
|
}
|
|
}
|
|
fun to_string(): str {
|
|
var to_ret = str()
|
|
for (var i = 0; i < nonterminals.size; i++;) {
|
|
for (var j = 0; j < nonterminals[i].size; j++;) {
|
|
to_ret += nonterminal_names[i] + " ::="
|
|
for (var k = 0; k < nonterminals[i][j].size; k++;) {
|
|
to_ret += " " + to_string(nonterminals[i][j][k])
|
|
}
|
|
to_ret += "\n"
|
|
}
|
|
}
|
|
return "start_symbol: " + to_string(start_symbol) + "\n" + to_ret
|
|
}
|
|
fun eval_BSR(input: ref str, BSR: ref set<BS>): T {
|
|
var top = -1
|
|
for (var i = 0; i < BSR.data.size; i++;) {
|
|
if BSR.data[i].nonterminal == start_symbol && BSR.data[i].idx_into_rule == nonterminals[(-1*BSR.data[i].nonterminal)-1][BSR.data[i].rule_idx].size && BSR.data[i].left == 0 && BSR.data[i].right == input.length() {
|
|
top = i
|
|
break
|
|
}
|
|
}
|
|
if top == -1 {
|
|
println("Could not find top for input:")
|
|
println(input)
|
|
println(str("of length ") + input.length())
|
|
for (var i = 0; i < BSR.data.size; i++;) {
|
|
println(str() + i + ": " + to_string(BSR.data[i]))
|
|
}
|
|
error("Could not find top")
|
|
}
|
|
return eval_BSR(input, BSR, top)
|
|
}
|
|
fun eval_BSR(input: ref str, BSR: ref set<BS>, c: int): T {
|
|
var bs = BSR.data[c]
|
|
var nonterminal = (-1*bs.nonterminal)-1
|
|
if bs.idx_into_rule != nonterminals[nonterminal][bs.rule_idx].size {
|
|
error("Evaluating BSR from not the end!")
|
|
}
|
|
var params = vec<T>()
|
|
for (var i = bs.idx_into_rule-1; i >= 0; i--;) {
|
|
var erminal = nonterminals[nonterminal][bs.rule_idx][i]
|
|
if is_terminal(erminal) {
|
|
var right_value = terminal_funs[erminal-1].second(terminal_funs[erminal-1].first, input, bs.pivot, bs.right)
|
|
params.add(right_value)
|
|
} else {
|
|
/*var right = find_comp(erminal, bs.pivot, bs.right)*/
|
|
var right = -1
|
|
var sub_nonterminal_idx = (-1*erminal)-1
|
|
for (var j = 0; j < BSR.data.size; j++;) {
|
|
if BSR.data[j].nonterminal == erminal && BSR.data[j].idx_into_rule == nonterminals[sub_nonterminal_idx][BSR.data[j].rule_idx].size && BSR.data[j].left == bs.pivot && BSR.data[j].right == bs.right {
|
|
right = j
|
|
break
|
|
}
|
|
}
|
|
var right_value = eval_BSR(input, BSR, right)
|
|
params.add(right_value)
|
|
}
|
|
// get the new left bs
|
|
if i != 0 {
|
|
/*var new_bs_idx = find_mid(bs.nonterminal, bs.rule_idx, i, bs.left, bs.pivot)*/
|
|
var new_bs_idx = -1
|
|
for (var j = 0; j < BSR.data.size; j++;) {
|
|
if BSR.data[j].nonterminal == bs.nonterminal && BSR.data[j].rule_idx == bs.rule_idx && BSR.data[j].idx_into_rule == i && BSR.data[j].left == bs.left && BSR.data[j].right == bs.pivot {
|
|
new_bs_idx = j
|
|
break
|
|
}
|
|
}
|
|
bs = BSR.data[new_bs_idx]
|
|
}
|
|
}
|
|
var to_ret = nonterminal_funs[nonterminal][bs.rule_idx].second(nonterminal_funs[nonterminal][bs.rule_idx].first, params.reverse())
|
|
return to_ret
|
|
}
|
|
}
|
|
obj Pending (Object) {
|
|
var nonterminal: int
|
|
var rule_idx: int
|
|
var idx_into_rule: int
|
|
var left: int
|
|
fun construct(): *Pending {
|
|
return this->construct(0,0,0,0)
|
|
}
|
|
fun construct(nonterminal: int, rule_idx: int, idx_into_rule: int, left: int): *Pending {
|
|
this->nonterminal = nonterminal;
|
|
this->rule_idx = rule_idx;
|
|
this->idx_into_rule = idx_into_rule;
|
|
this->left = left;
|
|
return this
|
|
}
|
|
fun copy_construct(old: *Pending): void {
|
|
this->nonterminal = old->nonterminal;
|
|
this->rule_idx = old->rule_idx;
|
|
this->idx_into_rule = old->idx_into_rule;
|
|
this->left = old->left;
|
|
}
|
|
fun destruct(): void { }
|
|
fun operator=(other:ref Pending):void {
|
|
destruct()
|
|
copy_construct(&other)
|
|
}
|
|
fun operator==(rhs: ref Pending): bool {
|
|
return nonterminal == rhs.nonterminal && rule_idx == rhs.rule_idx && idx_into_rule == rhs.idx_into_rule && left == rhs.left
|
|
}
|
|
}
|
|
fun pending(nonterminal: int, rule_idx: int, idx_into_rule: int, left: int): Pending {
|
|
var to_ret.construct(nonterminal, rule_idx, idx_into_rule, left): Pending
|
|
return to_ret
|
|
}
|
|
obj Descriptor (Object, Hashable) {
|
|
var nonterminal: int
|
|
var rule_idx: int
|
|
var idx_into_rule: int
|
|
var left: int
|
|
var pivot: int
|
|
fun construct(): *Descriptor {
|
|
return this->construct(0,0,0,0,0)
|
|
}
|
|
fun construct(nonterminal: int, rule_idx: int, idx_into_rule: int, left: int, pivot: int): *Descriptor {
|
|
this->nonterminal = nonterminal;
|
|
this->rule_idx = rule_idx;
|
|
this->idx_into_rule = idx_into_rule;
|
|
this->left = left;
|
|
this->pivot = pivot;
|
|
return this
|
|
}
|
|
fun copy_construct(old: *Descriptor): void {
|
|
this->nonterminal = old->nonterminal;
|
|
this->rule_idx = old->rule_idx;
|
|
this->idx_into_rule = old->idx_into_rule;
|
|
this->left = old->left;
|
|
this->pivot = old->pivot;
|
|
}
|
|
fun destruct(): void { }
|
|
fun operator=(other:ref Descriptor):void {
|
|
destruct()
|
|
copy_construct(&other)
|
|
}
|
|
fun operator==(rhs: ref Descriptor): bool {
|
|
return nonterminal == rhs.nonterminal && rule_idx == rhs.rule_idx && idx_into_rule == rhs.idx_into_rule && left == rhs.left && pivot == rhs.pivot
|
|
}
|
|
fun hash():ulong {
|
|
//return hash(nonterminal) ^ hash(rule_idx) ^ hash(idx_into_rule) ^ hash(left) ^ hash(pivot)
|
|
return nonterminal*3 + rule_idx*5 + idx_into_rule*7 + left*11 + pivot*13
|
|
}
|
|
}
|
|
fun descriptor(nonterminal: int, rule_idx: int, idx_into_rule: int, left: int, pivot: int): Descriptor {
|
|
var to_ret.construct(nonterminal, rule_idx, idx_into_rule, left, pivot): Descriptor
|
|
return to_ret
|
|
}
|
|
obj BS (Object) {
|
|
var nonterminal: int
|
|
var rule_idx: int
|
|
var idx_into_rule: int
|
|
var left: int
|
|
var pivot: int
|
|
var right: int
|
|
fun construct(): *BS {
|
|
return this->construct(0,0,0,0,0,0)
|
|
}
|
|
fun construct(nonterminal: int, rule_idx: int, idx_into_rule: int, left: int, pivot: int, right: int): *BS {
|
|
this->nonterminal = nonterminal;
|
|
this->rule_idx = rule_idx;
|
|
this->idx_into_rule = idx_into_rule;
|
|
this->left = left;
|
|
this->pivot = pivot;
|
|
this->right = right;
|
|
return this
|
|
}
|
|
fun copy_construct(old: *BS): void {
|
|
this->nonterminal = old->nonterminal;
|
|
this->rule_idx = old->rule_idx;
|
|
this->idx_into_rule = old->idx_into_rule;
|
|
this->left = old->left;
|
|
this->pivot = old->pivot;
|
|
this->right = old->right;
|
|
}
|
|
fun destruct(): void { }
|
|
fun operator=(other:ref BS):void {
|
|
destruct()
|
|
copy_construct(&other)
|
|
}
|
|
fun to_string(): str {
|
|
return str("nonterminal:") + nonterminal + " rule_idx:" + rule_idx + " idx_into_rule:" + idx_into_rule + " l:" + left + " p:" + pivot + " r:" + right
|
|
}
|
|
fun operator==(rhs: ref BS): bool {
|
|
return nonterminal == rhs.nonterminal && rule_idx == rhs.rule_idx && idx_into_rule == rhs.idx_into_rule && left == rhs.left && pivot == rhs.pivot && right == rhs.right
|
|
}
|
|
}
|
|
fun bs(nonterminal: int, rule_idx: int, idx_into_rule: int, left: int, pivot: int, right: int): BS {
|
|
var to_ret.construct(nonterminal, rule_idx, idx_into_rule, left, pivot, right): BS
|
|
return to_ret
|
|
}
|
|
/*fun fungll<T,K>(grammar: ref Grammer<T,K>, start_symbol: *char, input: ref str): set<BS> {*/
|
|
/*return fungll(grammar, str(start_symbol), input)*/
|
|
/*}*/
|
|
/*fun fungll<T,K>(grammar: ref Grammer<T,K>, start_symbol: str, input: ref str): set<BS> {*/
|
|
/*return fungll(grammar, -1*(grammar.nonterminal_funs.find(start_symbol)+1), input)*/
|
|
/*}*/
|
|
fun fungll<T,K>(grammar: ref Grammer<T,K>, start_symbol: int, input: ref str): set<BS> {
|
|
var R = descend(grammar, start_symbol, 0)
|
|
var G = map<pair<int, int>, set<Pending>>()
|
|
var P = map<pair<int,int>, set<int>>()
|
|
var Y = set<BS>()
|
|
|
|
R.chaotic_closure(fun(d: Descriptor): set<Descriptor> {
|
|
var it = process(grammar, input, d, G, P)
|
|
//var Yp = it.first.second
|
|
Y += it.first.second
|
|
var Gp = it.second
|
|
var Pp = it.third
|
|
|
|
for (var i = 0; i < Gp.keys.size; i++;) {
|
|
if G.contains_key(Gp.keys[i]) {
|
|
G[Gp.keys[i]].add(Gp.values[i])
|
|
} else {
|
|
G[Gp.keys[i]] = Gp.values[i]
|
|
}
|
|
}
|
|
for (var i = 0; i < Pp.keys.size; i++;) {
|
|
if P.contains_key(Pp.keys[i]) {
|
|
P[Pp.keys[i]].add(Pp.values[i])
|
|
} else {
|
|
P[Pp.keys[i]] = Pp.values[i]
|
|
}
|
|
}
|
|
|
|
// Rp
|
|
return it.first.first
|
|
})
|
|
return Y
|
|
}
|
|
fun descend<T,K>(grammar: ref Grammer<T,K>, symbol: int, l: int): set<Descriptor> {
|
|
var to_ret = set<Descriptor>()
|
|
for (var rhs = 0; rhs < grammar.get_nonterminal_rules(symbol).size; rhs++;)
|
|
to_ret.add(descriptor(symbol, rhs, 0, l, l))
|
|
return to_ret
|
|
}
|
|
fun process<T,K>(grammar: ref Grammer<T,K>, input: ref str, descript: Descriptor, G: ref map<pair<int, int>, set<Pending>>, P: ref map<pair<int,int>, set<int>>): triple<pair<set<Descriptor>, set<BS>>, map<pair<int, int>, set<Pending>>, map<pair<int,int>, set<int>>> {
|
|
// if at end / end is emptystr
|
|
if descript.idx_into_rule == grammar.get_nonterminal_rules(descript.nonterminal)[descript.rule_idx].size {
|
|
return process_e(grammar, descript, G, P)
|
|
} else {
|
|
return process_symbol(grammar, input, descript, G, P)
|
|
}
|
|
}
|
|
fun process_e<T,K>(grammar: ref Grammer<T,K>, descript: Descriptor, G: ref map<pair<int, int>, set<Pending>>, P: ref map<pair<int,int>, set<int>>): triple<pair<set<Descriptor>, set<BS>>, map<pair<int, int>, set<Pending>>, map<pair<int,int>, set<int>>> {
|
|
var nonterminal: int
|
|
var rule_idx: int
|
|
var left: int
|
|
var pivot: int
|
|
var X = descript.nonterminal
|
|
var l = descript.left;
|
|
var k = descript.pivot;
|
|
var K = G.get_with_default(make_pair(X,l), set<Pending>())
|
|
var it = ascend(l,K,k)
|
|
var R = it.first
|
|
var Y = it.second
|
|
if grammar.get_nonterminal_rules(X)[descript.rule_idx].size == 0 {
|
|
Y.add(bs(X,descript.rule_idx, 0, l, l, l))
|
|
}
|
|
return make_triple(make_pair(R,Y), map<pair<int, int>, set<Pending>>(), map(make_pair(X,l), set(k)))
|
|
}
|
|
fun process_symbol<T,K>(grammar: ref Grammer<T,K>, input: ref str, descript: Descriptor, G: ref map<pair<int, int>, set<Pending>>, P: ref map<pair<int,int>, set<int>>): triple<pair<set<Descriptor>, set<BS>>, map<pair<int, int>, set<Pending>>, map<pair<int,int>, set<int>>> {
|
|
var s = grammar.get_nonterminal_rules(descript.nonterminal)[descript.rule_idx][descript.idx_into_rule]
|
|
var k = descript.pivot
|
|
var R = P.get_with_default(make_pair(s,k), set<int>())
|
|
var Gp = map(make_pair(s,k), set(pending(descript.nonterminal, descript.rule_idx, descript.idx_into_rule+1, descript.left)))
|
|
if grammar.is_terminal(s) {
|
|
return make_triple(matc(grammar,input,descript), map<pair<int,int>, set<Pending>>(), map<pair<int,int>, set<int>>())
|
|
} else if R.size() == 0 { // s in N
|
|
return make_triple(make_pair(descend(grammar,s,k), set<BS>()), Gp, map<pair<int,int>, set<int>>())
|
|
} else { // s in N and R != set()
|
|
return make_triple(skip(k,pending(descript.nonterminal, descript.rule_idx, descript.idx_into_rule+1, descript.left), R), Gp, map<pair<int,int>, set<int>>())
|
|
}
|
|
}
|
|
fun matc<T,K>(grammar: ref Grammer<T,K>, input: ref str, descript: Descriptor): pair<set<Descriptor>, set<BS>> {
|
|
/*println("trying to match " + grammar.to_string(grammar.get_nonterminal_rules(descript.nonterminal)[descript.rule_idx][descript.idx_into_rule]))*/
|
|
var match_length = grammar.match_terminal(grammar.get_nonterminal_rules(descript.nonterminal)[descript.rule_idx][descript.idx_into_rule], input, descript.pivot)
|
|
if match_length > 0 {
|
|
/*println("matched " + grammar.to_string(grammar.get_nonterminal_rules(descript.nonterminal)[descript.rule_idx][descript.idx_into_rule]))*/
|
|
return make_pair(set(descriptor(descript.nonterminal, descript.rule_idx, descript.idx_into_rule+1, descript.left, descript.pivot+match_length)), set(bs(descript.nonterminal, descript.rule_idx, descript.idx_into_rule+1, descript.left, descript.pivot, descript.pivot+match_length)))
|
|
} else {
|
|
return make_pair(set<Descriptor>(), set<BS>())
|
|
}
|
|
}
|
|
fun skip(k: int, c: Pending, R: ref set<int>): pair<set<Descriptor>, set<BS>> { return nmatch(k, set(c), R); }
|
|
fun ascend(k:int, K: ref set<Pending>, r: int): pair<set<Descriptor>, set<BS>> { return nmatch(k, K, set(r)); }
|
|
fun nmatch(k:int, K: ref set<Pending>, R: ref set<int>): pair<set<Descriptor>, set<BS>> {
|
|
var Rp = set<Descriptor>()
|
|
var Y = set<BS>()
|
|
for (var i = 0; i < K.data.size; i++;) {
|
|
var pending = K.data[i]
|
|
for (var j = 0; j < R.data.size; j++;) {
|
|
var r = R.data[j]
|
|
Rp.add(descriptor(pending.nonterminal, pending.rule_idx, pending.idx_into_rule, pending.left, r))
|
|
Y.add(bs(pending.nonterminal, pending.rule_idx, pending.idx_into_rule, pending.left, k, r))
|
|
}
|
|
}
|
|
return make_pair(Rp,Y)
|
|
}
|
|
/*fun main(argc: int, argv: **char): int {*/
|
|
/*var grammar.construct(): Grammer<int>*/
|
|
/*var Number = grammar.add_new_nonterminal("Number", vec(grammar.add_terminal("[0-9]+", fun(input: ref str, l: int, r: int): int { return string_to_num<int>(input.slice(l,r)); })), fun(i: ref vec<int>): int { return i[0]; })*/
|
|
|
|
/*var mult = grammar.add_terminal("\\*", fun(input: ref str, l: int, r: int): int { return 1; })*/
|
|
/*var Factor = grammar.add_new_nonterminal("Factor", vec(Number), fun(i: ref vec<int>): int { return i[0]; })*/
|
|
/*grammar.add_to_nonterminal(Factor, vec(Factor, mult, Number), fun(i: ref vec<int>): int { return i[0]*i[2]; })*/
|
|
|
|
/*var add = grammar.add_terminal("\\+", fun(input: ref str, l: int, r: int): int { return 1; })*/
|
|
/*var Term = grammar.add_new_nonterminal("Term", vec(Factor), fun(i: ref vec<int>): int { return i[0]; })*/
|
|
/*grammar.add_to_nonterminal(Term, vec(Term, add, Factor), fun(i: ref vec<int>): int { return i[0]+i[2]; })*/
|
|
|
|
/*grammar.set_start_symbol(Term)*/
|
|
|
|
/*var input = str("1+23*44")*/
|
|
/*var BSR = fungll(grammar, input)*/
|
|
|
|
|
|
/*println(str("length of BSR is: ") + BSR.size())*/
|
|
/*for (var i = 0; i < BSR.data.size; i++;) {*/
|
|
/*var BS = BSR.data[i]*/
|
|
/*println(str() + i + ": " + grammar.to_string(BSR.data[i]))*/
|
|
/*}*/
|
|
|
|
/*var res = grammar.eval_BSR(input, BSR)*/
|
|
/*println(str("result of grammar.eval_BSR(fungll(grammar, ") + input + ")) = " + res)*/
|
|
|
|
/*return 0*/
|
|
/*}*/
|