Starting to make k_prime lisp grammer, doing testing to get all MAL test cases parsing

This commit is contained in:
Nathan Braswell
2020-03-28 00:23:01 -04:00
parent 1fa60a5496
commit a9bb011bd9
3 changed files with 1710 additions and 38 deletions

View File

@@ -4,22 +4,26 @@ import map:*
import set:* import set:*
import util:* import util:*
import str:* import str:*
import regex:*
// nonterminals are negative, terminals are positive // nonterminals are negative, terminals are positive
obj Grammer (Object) { obj Grammer (Object) {
var nonterminals: vec<vec<vec<int>>> var nonterminals: vec<vec<vec<int>>>
var nonterminal_names: vec<str> var nonterminal_names: vec<str>
var terminals: vec<char> var terminals: vec<regex>
var start_symbol: int
fun construct(): *Grammer { fun construct(): *Grammer {
nonterminals.construct() nonterminals.construct()
nonterminal_names.construct() nonterminal_names.construct()
terminals.construct() terminals.construct()
start_symbol = 0
return this return this
} }
fun copy_construct(old: *Grammer): void { fun copy_construct(old: *Grammer): void {
nonterminals.copy_construct(&old->nonterminals) nonterminals.copy_construct(&old->nonterminals)
nonterminal_names.copy_construct(&old->nonterminal_names) nonterminal_names.copy_construct(&old->nonterminal_names)
terminals.copy_construct(&old->terminals) terminals.copy_construct(&old->terminals)
start_symbol = old->start_symbol
} }
fun destruct(): void { fun destruct(): void {
nonterminals.destruct() nonterminals.destruct()
@@ -41,19 +45,22 @@ obj Grammer (Object) {
fun add_to_nonterminal(nonterminal: int, rule: ref vec<int>) { fun add_to_nonterminal(nonterminal: int, rule: ref vec<int>) {
nonterminals[(-1*nonterminal)-1].add(rule) nonterminals[(-1*nonterminal)-1].add(rule)
} }
fun add_terminal(c: char): int { fun add_terminal(c: *char): int {
terminals.add(c) terminals.add(regex(c))
return terminals.size return terminals.size
} }
fun get_nonterminal_rules(nonterminal: int): ref vec<vec<int>> { fun get_nonterminal_rules(nonterminal: int): ref vec<vec<int>> {
return nonterminals[(-1*nonterminal)-1] return nonterminals[(-1*nonterminal)-1]
} }
fun get_terminal(terminal: int): char { fun match_terminal(terminal: int, input: ref str, start: int): int {
return terminals[terminal-1] return terminals[terminal-1].long_match(input.getBackingMemory(), start, input.length())
} }
fun is_terminal(x: int): bool { fun is_terminal(x: int): bool {
return x > 0 return x > 0
} }
fun set_start_symbol(x: int) {
start_symbol = x
}
fun to_string(it: BS): str { fun to_string(it: BS): str {
var rule_str = str() var rule_str = str()
for (var i = 0; i < nonterminals[(-1*it.nonterminal)-1][it.rule_idx].size; i++;) { for (var i = 0; i < nonterminals[(-1*it.nonterminal)-1][it.rule_idx].size; i++;) {
@@ -61,17 +68,20 @@ obj Grammer (Object) {
rule_str += "*" rule_str += "*"
} }
var erminal = nonterminals[(-1*it.nonterminal)-1][it.rule_idx][i] var erminal = nonterminals[(-1*it.nonterminal)-1][it.rule_idx][i]
if erminal < 0 { rule_str += to_string(erminal)
rule_str += nonterminal_names[(-1*erminal)-1]
} else {
rule_str += terminals[erminal-1]
}
} }
if it.idx_into_rule == nonterminals[(-1*it.nonterminal)-1][it.rule_idx].size { if it.idx_into_rule == nonterminals[(-1*it.nonterminal)-1][it.rule_idx].size {
rule_str += "*" rule_str += "*"
} }
return str("<") + nonterminal_names[(-1*it.nonterminal)-1] + " ::= " + rule_str + ", " + it.left + ", " + it.pivot + ", " + it.right + ">" return str("<") + nonterminal_names[(-1*it.nonterminal)-1] + " ::= " + rule_str + ", " + it.left + ", " + it.pivot + ", " + it.right + ">"
} }
fun to_string(erminal: int): str {
if erminal < 0 {
return nonterminal_names[(-1*erminal)-1]
} else {
return terminals[erminal-1].regexString
}
}
} }
obj Pending (Object) { obj Pending (Object) {
var nonterminal: int var nonterminal: int
@@ -188,15 +198,11 @@ fun bs(nonterminal: int, rule_idx: int, idx_into_rule: int, left: int, pivot: in
return to_ret return to_ret
} }
fun fungll(grammer: ref Grammer, input: ref str): set<BS> { fun fungll(grammer: ref Grammer, input: ref str): set<BS> {
return loop(grammer, input, descend(grammer, -1 /*start_symbol*/, 0), set<Descriptor>(), map<pair<int, int>, set<Pending>>(), map<pair<int,int>, set<int>>(), set<BS>()) var R = descend(grammer, grammer.start_symbol, 0)
} var U = set<Descriptor>()
fun descend(grammer: ref Grammer, symbol: int, l: int): set<Descriptor> { var G = map<pair<int, int>, set<Pending>>()
var to_ret = set<Descriptor>() var P = map<pair<int,int>, set<int>>()
for (var rhs = 0; rhs < grammer.get_nonterminal_rules(symbol).size; rhs++;) var Y = set<BS>()
to_ret.add(descriptor(symbol, rhs, 0, l, l))
return to_ret
}
fun loop(grammer: ref Grammer, input: ref str, R: set<Descriptor>, U: set<Descriptor>, G: map<pair<int, int>, set<Pending>>, P: map<pair<int,int>, set<int>>, Y: set<BS>): set<BS> {
while R.size() != 0 { while R.size() != 0 {
var d = R.pop() var d = R.pop()
var it = process(grammer, input, d, G, P) var it = process(grammer, input, d, G, P)
@@ -227,7 +233,13 @@ fun loop(grammer: ref Grammer, input: ref str, R: set<Descriptor>, U: set<Descri
} }
return Y return Y
} }
fun process(grammer: ref Grammer, input: ref str, descript: Descriptor, G: map<pair<int, int>, set<Pending>>, P: map<pair<int,int>, set<int>>): triple<pair<set<Descriptor>, set<BS>>, map<pair<int, int>, set<Pending>>, map<pair<int,int>, set<int>>> { fun descend(grammer: ref Grammer, symbol: int, l: int): set<Descriptor> {
var to_ret = set<Descriptor>()
for (var rhs = 0; rhs < grammer.get_nonterminal_rules(symbol).size; rhs++;)
to_ret.add(descriptor(symbol, rhs, 0, l, l))
return to_ret
}
fun process(grammer: ref Grammer, input: ref str, descript: Descriptor, G: ref map<pair<int, int>, set<Pending>>, P: ref map<pair<int,int>, set<int>>): triple<pair<set<Descriptor>, set<BS>>, map<pair<int, int>, set<Pending>>, map<pair<int,int>, set<int>>> {
// if at end / end is emptystr // if at end / end is emptystr
if descript.idx_into_rule == grammer.get_nonterminal_rules(descript.nonterminal)[descript.rule_idx].size { if descript.idx_into_rule == grammer.get_nonterminal_rules(descript.nonterminal)[descript.rule_idx].size {
return process_e(grammer, descript, G, P) return process_e(grammer, descript, G, P)
@@ -243,7 +255,7 @@ fun process_e(grammer: ref Grammer, descript: Descriptor, G: ref map<pair<int, i
var X = descript.nonterminal var X = descript.nonterminal
var l = descript.left; var l = descript.left;
var k = descript.pivot; var k = descript.pivot;
var K = G.get(make_pair(X,l)) var K = G.get_with_default(make_pair(X,l), set<Pending>())
var it = ascend(l,K,k) var it = ascend(l,K,k)
var R = it.first var R = it.first
var Y = it.second var Y = it.second
@@ -266,8 +278,11 @@ fun process_symbol(grammer: ref Grammer, input: ref str, descript: Descriptor, G
} }
} }
fun matc(grammer: ref Grammer, input: ref str, descript: Descriptor): pair<set<Descriptor>, set<BS>> { fun matc(grammer: ref Grammer, input: ref str, descript: Descriptor): pair<set<Descriptor>, set<BS>> {
if descript.pivot < input.length() && input[descript.pivot] == grammer.get_terminal(grammer.get_nonterminal_rules(descript.nonterminal)[descript.rule_idx][descript.idx_into_rule]) { /*println("trying to match " + grammer.to_string(grammer.get_nonterminal_rules(descript.nonterminal)[descript.rule_idx][descript.idx_into_rule]))*/
return make_pair(set(descriptor(descript.nonterminal, descript.rule_idx, descript.idx_into_rule+1, descript.left, descript.pivot+1)), set(bs(descript.nonterminal, descript.rule_idx, descript.idx_into_rule+1, descript.left, descript.pivot, descript.pivot+1))) var match_length = grammer.match_terminal(grammer.get_nonterminal_rules(descript.nonterminal)[descript.rule_idx][descript.idx_into_rule], input, descript.pivot)
if match_length > 0 {
/*println("matched " + grammer.to_string(grammer.get_nonterminal_rules(descript.nonterminal)[descript.rule_idx][descript.idx_into_rule]))*/
return make_pair(set(descriptor(descript.nonterminal, descript.rule_idx, descript.idx_into_rule+1, descript.left, descript.pivot+match_length)), set(bs(descript.nonterminal, descript.rule_idx, descript.idx_into_rule+1, descript.left, descript.pivot, descript.pivot+match_length)))
} else { } else {
return make_pair(set<Descriptor>(), set<BS>()) return make_pair(set<Descriptor>(), set<BS>())
} }
@@ -287,20 +302,19 @@ fun nmatch(k:int, K: ref set<Pending>, R: ref set<int>): pair<set<Descriptor>, s
} }
return make_pair(Rp,Y) return make_pair(Rp,Y)
} }
fun main(argc: int, argv: **char): int { /*fun main(argc: int, argv: **char): int {*/
var grammer.construct(): Grammer /*var grammer.construct(): Grammer*/
var one = grammer.add_terminal('1') /*var one = grammer.add_terminal("12")*/
var E = grammer.add_new_nonterminal("E", vec<int>()) /*var E = grammer.add_new_nonterminal("E", vec<int>())*/
grammer.add_to_nonterminal(E, vec(one)) /*grammer.add_to_nonterminal(E, vec(one))*/
grammer.add_to_nonterminal(E, vec(E,E,E)) /*grammer.add_to_nonterminal(E, vec(E,E,E))*/
var BSR = fungll(grammer, str("1")) /*var BSR = fungll(grammer, str("1212"))*/
println(str("length of BSR is: ") + BSR.size()) /*println(str("length of BSR is: ") + BSR.size())*/
for (var i = 0; i < BSR.data.size; i++;) { /*for (var i = 0; i < BSR.data.size; i++;) {*/
var BS = BSR.data[i] /*var BS = BSR.data[i]*/
/*println(str() + i + ": " + grammer.to_string(BSR.data[i]) + "\t\t" + BSR.data[i].to_string())*/ /*println(str() + i + ": " + grammer.to_string(BSR.data[i]))*/
println(str() + i + ": " + grammer.to_string(BSR.data[i])) /*}*/
}
return 0 /*return 0*/
} /*}*/

1642
k_prime.krak Normal file

File diff suppressed because it is too large Load Diff

View File

@@ -83,6 +83,12 @@ obj set<T> (Object, Serializable) {
to_ret.add(items) to_ret.add(items)
return to_ret return to_ret
} }
fun operator-(items: ref set<T>): set<T> {
var to_ret.copy_construct(this): set<T>
for (var i = 0; i < items.data.size; i++;)
to_ret.remove(items.data[i])
return to_ret
}
fun add(item: ref T) { fun add(item: ref T) {
if (!contains(item)) if (!contains(item))
data.add(item) data.add(item)
@@ -142,5 +148,15 @@ obj set<T> (Object, Serializable) {
} }
} }
fun pop(): T {
var to_ret = data.last()
data.remove(data.size-1)
return to_ret
}
fun union(other: set<T>): set<T> {
for (var i = 0; i < data.size; i++;)
other.add(data[i])
return other
}
} }