more work

This commit is contained in:
Nathan Braswell
2015-08-06 02:42:40 -04:00
parent dec9b7d0bd
commit 1f119af8ad
9 changed files with 284 additions and 21 deletions

3
.gitignore vendored
View File

@@ -10,4 +10,5 @@ stats
*krakout*
kraklist.txt
.*.un~
RNGLR.pdf
papers
callgrind*

View File

@@ -105,21 +105,21 @@ obj grammer (Object) {
var non_terminals: set::set<symbol::symbol>
var terminals: vector::vector<util::pair<symbol::symbol, regex::regex>>
var first_set_map: map::map<symbol::symbol, set::set<symbol::symbol>>
var state_automata: state
var parse_table: table
fun construct(): *grammer {
rules.construct()
non_terminals.construct()
terminals.construct()
first_set_map.construct()
state_automata.construct()
parse_table.construct()
}
fun copy_construct(old: *grammer) {
rules.copy_construct(&old->rules)
non_terminals.copy_construct(&old->non_terminals)
terminals.copy_construct(&old->terminals)
first_set_map.copy_construct(&old->first_set_map)
state_automata.copy_construct(&old->state_automata)
parse_table.copy_construct(&old->parse_table)
}
fun operator=(other: grammer) {
destruct()
@@ -130,7 +130,7 @@ obj grammer (Object) {
non_terminals.destruct()
terminals.destruct()
first_set_map.destruct()
state_automata.destruct()
parse_table.destruct()
}
fun calculate_first_set() {
@@ -154,7 +154,7 @@ obj grammer (Object) {
})
}
}
fun first_vector(rhs: vector::vector<symbol::symbol>): set::set<symbol::symbol> {
fun first_vector(rhs: ref vector::vector<symbol::symbol>): set::set<symbol::symbol> {
var toRet = set::set<symbol::symbol>()
if (rhs.size) {
for (var i = 0; i < rhs.size; i++;) {
@@ -176,11 +176,8 @@ obj grammer (Object) {
}
fun calculate_state_automaton() {
state_automata.items = vector::vector(rules[0].with_lookahead(set::set(symbol::eof_symbol())))
io::println("pre first closure")
state_automata = closure(state_automata)
io::println("post first closure")
var states = vector::vector(state_automata) // vector instead of set because we need to iterate by index
var first_state = closure(state(vector::vector(rules[0].with_lookahead(set::set(symbol::eof_symbol())))))
var states = vector::vector(first_state) // vector instead of set because we need to iterate by index
var newItems = stack::stack(0) // 0 is the index of the first and only item in states
var count = 0
while (newItems.size()) {
@@ -194,12 +191,25 @@ obj grammer (Object) {
states[I].items.for_each(fun(r: ref rule) {
if (!r.at_end())
possGoto.add(r.next())
// if r is at end or the rest reduces to null, add a reduce for each lookahead symbol
if ( r.at_end() || first_vector(r.after_next()).contains(symbol::null_symbol()) ) {
var rule_no = rules.find(r.plain())
r.lookahead.for_each(fun(sym: ref symbol::symbol) {
parse_table.add_reduce(I, sym, rule_no)
})
}
})
possGoto.for_each(fun(X: ref symbol::symbol) {
var goneState = goto(states[I], X)
if (goneState.items.size && !states.contains(goneState)) {
newItems.push(states.size)
states.add(goneState)
if (goneState.items.size) {
var already_state = states.find(goneState)
if (already_state == -1) {
parse_table.add_push(I, X, states.size)
newItems.push(states.size)
states.add(goneState)
} else {
parse_table.add_push(I, X, already_state)
}
}
})
}
@@ -212,6 +222,9 @@ obj grammer (Object) {
})
io::println(" there were : states")
io::println(states.size)
io::println(" there were : table")
/*io::println(parse_table.to_string())*/
parse_table.print_string()
}
fun closure(initial: ref state): state {
@@ -297,8 +310,6 @@ obj grammer (Object) {
non_terminals.for_each( fun(i : symbol::symbol) { result += string::string("\n\t") + i.to_string(); } )
result += "\nterminals:"
terminals.for_each( fun(i : util::pair<symbol::symbol, regex::regex>) { result += string::string("\n\t") + i.first.to_string() + ": " + i.second.regexString; } )
result += "\nstate:"
result += state_automata.to_string()
return result
}
}
@@ -355,6 +366,9 @@ obj rule (Object) {
fun at_end(): bool {
return position >= rhs.size
}
fun plain(): rule {
return rule(lhs, rhs)
}
fun with_lookahead(newLookahead: set::set<symbol::symbol>): rule {
var toRet = rule(lhs, rhs)
toRet.position = position
@@ -418,3 +432,98 @@ obj state (Object) {
}
}
// REALLY need those enums
var push = 0
var reduce = 1
// note that these two are not actually currently used
// accept is the reduce of the goal rule and reject is the
// absence of actions
var accept = 2
var reject = 3
fun action(act: int, state_or_rule: int): action {
var toRet: action
toRet.act = act
toRet.state_or_rule = state_or_rule
return toRet
}
obj action {
var act: int // really need those enums
var state_or_rule: int // sigh
fun operator==(other: action): bool {
return act == other.act && state_or_rule == other.state_or_rule
}
}
obj table (Object) {
// a 2 dimensional table made of a vector and a map that maps from stateno & symbol to a vector of parse actions
var items: vector::vector<map::map<symbol::symbol, vector::vector<action>>>
fun construct(): *table {
items.construct()
}
fun copy_construct(other: *table) {
items.copy_construct(&other->items)
}
fun operator=(other: table) {
destruct()
copy_construct(&other)
}
fun destruct() {
items.destruct()
}
fun expand_to(include_state: int) {
while (include_state >= items.size)
items.addEnd(map::map<symbol::symbol, vector::vector<action>>())
}
fun add_push(from_state: int, on_symbol: ref symbol::symbol, to_state: int) {
expand_to(from_state)
if (items[from_state].contains_key(on_symbol))
items[from_state][on_symbol].addEnd(action(push, to_state))
else
items[from_state].set(on_symbol, vector::vector(action(push, to_state)))
}
fun add_reduce(from_state: int, on_symbol: ref symbol::symbol, by_rule_no: int) {
expand_to(from_state)
if (items[from_state].contains_key(on_symbol))
items[from_state][on_symbol].addEnd(action(reduce, by_rule_no))
else
items[from_state].set(on_symbol, vector::vector(action(reduce, by_rule_no)))
}
fun add_accept(from_state: int, on_symbol: ref symbol::symbol) {
expand_to(from_state)
if (items[from_state].contains_key(on_symbol))
items[from_state][on_symbol].addEnd(action(accept, 0))
else
items[from_state].set(on_symbol, vector::vector(action(accept, 0)))
}
fun get(state: int, sym: symbol::symbol): vector::vector<action> {
return items[state][sym]
}
fun print_string(): string::string {
/*return string::string("woo a table of size: ") + items.size*/
io::print("woo a table of size: ")
io::println(items.size)
for (var i = 0; i < items.size; i++;) {
io::print("for state: ")
io::println(i)
items[i].for_each(fun(sym: symbol::symbol, actions: vector::vector<action>) {
actions.for_each(fun(action: action) {
io::print("\ton symbol: ")
io::print(sym.to_string())
io::print(" do action: ")
if (action.act == push)
io::print("push ")
else if (action.act == reduce)
io::print("reduce ")
else if (action.act == accept)
io::print("accept ")
else if (action.act == reject)
io::print("reject ")
io::print(action.state_or_rule)
io::println()
})
})
}
}
}

View File

@@ -58,6 +58,10 @@ obj map<T,U> (Object) {
keys.remove(idx)
values.remove(idx)
}
fun clear() {
keys.clear()
values.clear()
}
fun operator[](key: T): ref U {
return get(key)
}

107
stdlib/parser.krak Normal file
View File

@@ -0,0 +1,107 @@
import grammer
import symbol
import tree
import vector
import stack
import map
import util
import string
import io
import mem
obj parser (Object) {
var input: vector::vector<symbol::symbol>
var gram: grammer::grammer
// gss
var to_reduce: stack::stack<reduction>
var to_shift: stack::stack< util::pair<*tree::tree<int>, int> >
var SPPFStepNodes: vector::vector< util::pair<*tree::tree<symbol::symbol>, int> >
var packed_map: map::map<*tree::tree<symbol::symbol>, bool>
fun construct(grammerIn: grammer::grammer): *parser {
input.construct()
gram.copy_construct(&grammerIn)
to_reduce.construct()
to_shift.construct()
SPPFStepNodes.construct()
packed_map.construct()
return this
}
fun copy_construct(old: *parser) {
input.copy_construct(&old->input)
gram.copy_construct(&old->gram)
to_reduce.copy_construct(&old->to_reduce)
to_shift.copy_construct(&old->to_shift)
SPPFStepNodes.copy_construct(&old->SPPFStepNodes)
packed_map.copy_construct(&old->packed_map)
}
fun operator=(old: ref parser) {
destruct()
copy_construct(&old)
}
fun destruct() {
input.destruct()
gram.destruct()
to_reduce.destruct()
to_shift.destruct()
SPPFStepNodes.destruct()
packed_map.destruct()
}
fun parse_input(inputStr: string::string, name: string::string): *tree::tree<symbol::symbol> {
input.clear()
// gss.clear
to_reduce.clear()
to_shift.clear()
SPPFStepNodes.clear()
packed_map.clear()
// if the zero state contains any reductions for state 0 and eof, then
// it must be reducing to the goal state
io::println("checking the bidness")
if (inputStr == "" && gram.parse_table.get(0, symbol::eof_symbol()).contains(grammer::action(grammer::reduce, 0))) {
io::println("Accept on no input for ")
io::println(name)
return mem::new<tree::tree<symbol::symbol>>()->construct(symbol::null_symbol())
}
io::println("failed for ")
io::println(name)
return mem::new<tree::tree<symbol::symbol>>()->construct(symbol::null_symbol())
}
}
fun reduction(f: *tree::tree<int>, s: symbol::symbol, l: int, n: *tree::tree<symbol::symbol>, label:*tree::tree<symbol::symbol>): reduction {
var toRet.construct(f,s,l,n,label): reduction
return toRet
}
obj reduction (Object) {
var from: *tree::tree<int>
var sym: symbol::symbol
var length: int
var nullable_parts: *tree::tree<symbol::symbol>
var label: *tree::tree<symbol::symbol>
fun construct(f: *tree::tree<int>, s: symbol::symbol, l: int, n: *tree::tree<symbol::symbol>, label:*tree::tree<symbol::symbol>): *reduction {
from = f
sym.copy_construct(&s)
length = l
nullable_parts = n
label = label
return this
}
fun copy_construct(old: *reduction) {
from = old->from
sym.copy_construct(&old->sym)
length = old->length
nullable_parts = old->nullable_parts
label = old->label
}
fun operator=(other: reduction):void {
destruct()
copy_construct(&other)
}
fun destruct() {
sym.destruct()
}
}

View File

@@ -56,12 +56,12 @@ obj set<T> (Object) {
fun operator+=(items: set<T>) {
add(items)
}
fun add(item: T) {
fun add(item: ref T) {
if (!contains(item))
data.add(item)
}
fun add(items: set<T>) {
items.for_each( fun(item: T) add(item); )
fun add(items: ref set<T>) {
items.for_each( fun(item: ref T) add(item); )
}
fun remove(item: T) {
var idx = data.find(item)

View File

@@ -34,6 +34,9 @@ obj stack<T> (Object) {
data.remove(data.size-1)
return toRet
}
fun clear() {
data.clear()
}
fun top(): T {
return data[data.size-1]
}

28
stdlib/tree.krak Normal file
View File

@@ -0,0 +1,28 @@
import mem
import vector
obj tree<T> (Object) {
var data: T
var children: vector::vector<*tree<T>>
fun construct(dataIn: T): *tree<T> {
mem::maybe_copy_construct(&data, &dataIn)
children.construct()
return this
}
// Some of these don't really make much sense considering this tree is all about
// heap allocated pointers. Best to have it for saftey, though
fun copy_construct(old: *tree<T>) {
mem::maybe_copy_construct(&data, &old->data)
children.copy_construct(&old->children)
}
// ditto
fun operator=(other: tree<T>):void {
destruct()
copy_construct(&other)
}
fun destruct() {
mem::maybe_destruct(&data)
children.destruct()
}
}

View File

@@ -173,6 +173,12 @@ obj vector<T> (Object) {
size--
}
fun clear() {
for (var i = 0; i < size; i++;)
maybe_destruct(&data[i])
size = 0
}
fun for_each(func: fun(ref T):void):void {
for (var i = 0; i < size; i++;)
func(data[i])

View File

@@ -1,15 +1,17 @@
import io:*
import grammer:*
import parser:*
import lexer:*
import string:*
import util:*
import symbol:*
import tree:*
fun main():int {
var a = load_grammer(read_file(string("../krakenGrammer.kgm")))
/*var a = load_grammer(read_file(string("../krakenGrammer.kgm")))*/
/*var a = load_grammer(read_file(string("grammer.kgm")))*/
//var a = load_grammer(read_file(string("grammer2.kgm")))
var a = load_grammer(read_file(string("grammer2.kgm")))
println(a.to_string())
var doFirstSet = fun() {
a.calculate_first_set()
@@ -48,6 +50,9 @@ fun main():int {
/*range(80).for_each(fun(i: int) { println(lex.next().to_string()); } )*/
println(a.to_string())
a.calculate_state_automaton()
var parse.construct(a): parser
var result = parse.parse_input(string(""), string("fun name"))
/*var parse.construct(): parser*/
return 0
}