diff --git a/.gitignore b/.gitignore index cc19a32..880a271 100644 --- a/.gitignore +++ b/.gitignore @@ -10,4 +10,5 @@ stats *krakout* kraklist.txt .*.un~ -RNGLR.pdf +papers +callgrind* diff --git a/stdlib/grammer.krak b/stdlib/grammer.krak index 9df9887..65f3bf1 100644 --- a/stdlib/grammer.krak +++ b/stdlib/grammer.krak @@ -105,21 +105,21 @@ obj grammer (Object) { var non_terminals: set::set var terminals: vector::vector> var first_set_map: map::map> - var state_automata: state + var parse_table: table fun construct(): *grammer { rules.construct() non_terminals.construct() terminals.construct() first_set_map.construct() - state_automata.construct() + parse_table.construct() } fun copy_construct(old: *grammer) { rules.copy_construct(&old->rules) non_terminals.copy_construct(&old->non_terminals) terminals.copy_construct(&old->terminals) first_set_map.copy_construct(&old->first_set_map) - state_automata.copy_construct(&old->state_automata) + parse_table.copy_construct(&old->parse_table) } fun operator=(other: grammer) { destruct() @@ -130,7 +130,7 @@ obj grammer (Object) { non_terminals.destruct() terminals.destruct() first_set_map.destruct() - state_automata.destruct() + parse_table.destruct() } fun calculate_first_set() { @@ -154,7 +154,7 @@ obj grammer (Object) { }) } } - fun first_vector(rhs: vector::vector): set::set { + fun first_vector(rhs: ref vector::vector): set::set { var toRet = set::set() if (rhs.size) { for (var i = 0; i < rhs.size; i++;) { @@ -176,11 +176,8 @@ obj grammer (Object) { } fun calculate_state_automaton() { - state_automata.items = vector::vector(rules[0].with_lookahead(set::set(symbol::eof_symbol()))) - io::println("pre first closure") - state_automata = closure(state_automata) - io::println("post first closure") - var states = vector::vector(state_automata) // vector instead of set because we need to iterate by index + var first_state = closure(state(vector::vector(rules[0].with_lookahead(set::set(symbol::eof_symbol()))))) + var states = vector::vector(first_state) // vector instead of set because we need to iterate by index var newItems = stack::stack(0) // 0 is the index of the first and only item in states var count = 0 while (newItems.size()) { @@ -194,12 +191,25 @@ obj grammer (Object) { states[I].items.for_each(fun(r: ref rule) { if (!r.at_end()) possGoto.add(r.next()) + // if r is at end or the rest reduces to null, add a reduce for each lookahead symbol + if ( r.at_end() || first_vector(r.after_next()).contains(symbol::null_symbol()) ) { + var rule_no = rules.find(r.plain()) + r.lookahead.for_each(fun(sym: ref symbol::symbol) { + parse_table.add_reduce(I, sym, rule_no) + }) + } }) possGoto.for_each(fun(X: ref symbol::symbol) { var goneState = goto(states[I], X) - if (goneState.items.size && !states.contains(goneState)) { - newItems.push(states.size) - states.add(goneState) + if (goneState.items.size) { + var already_state = states.find(goneState) + if (already_state == -1) { + parse_table.add_push(I, X, states.size) + newItems.push(states.size) + states.add(goneState) + } else { + parse_table.add_push(I, X, already_state) + } } }) } @@ -212,6 +222,9 @@ obj grammer (Object) { }) io::println(" there were : states") io::println(states.size) + io::println(" there were : table") + /*io::println(parse_table.to_string())*/ + parse_table.print_string() } fun closure(initial: ref state): state { @@ -297,8 +310,6 @@ obj grammer (Object) { non_terminals.for_each( fun(i : symbol::symbol) { result += string::string("\n\t") + i.to_string(); } ) result += "\nterminals:" terminals.for_each( fun(i : util::pair) { result += string::string("\n\t") + i.first.to_string() + ": " + i.second.regexString; } ) - result += "\nstate:" - result += state_automata.to_string() return result } } @@ -355,6 +366,9 @@ obj rule (Object) { fun at_end(): bool { return position >= rhs.size } + fun plain(): rule { + return rule(lhs, rhs) + } fun with_lookahead(newLookahead: set::set): rule { var toRet = rule(lhs, rhs) toRet.position = position @@ -418,3 +432,98 @@ obj state (Object) { } } +// REALLY need those enums +var push = 0 +var reduce = 1 +// note that these two are not actually currently used +// accept is the reduce of the goal rule and reject is the +// absence of actions +var accept = 2 +var reject = 3 +fun action(act: int, state_or_rule: int): action { + var toRet: action + toRet.act = act + toRet.state_or_rule = state_or_rule + return toRet +} +obj action { + var act: int // really need those enums + var state_or_rule: int // sigh + fun operator==(other: action): bool { + return act == other.act && state_or_rule == other.state_or_rule + } +} + +obj table (Object) { + // a 2 dimensional table made of a vector and a map that maps from stateno & symbol to a vector of parse actions + var items: vector::vector>> + + fun construct(): *table { + items.construct() + } + fun copy_construct(other: *table) { + items.copy_construct(&other->items) + } + fun operator=(other: table) { + destruct() + copy_construct(&other) + } + fun destruct() { + items.destruct() + } + fun expand_to(include_state: int) { + while (include_state >= items.size) + items.addEnd(map::map>()) + } + fun add_push(from_state: int, on_symbol: ref symbol::symbol, to_state: int) { + expand_to(from_state) + if (items[from_state].contains_key(on_symbol)) + items[from_state][on_symbol].addEnd(action(push, to_state)) + else + items[from_state].set(on_symbol, vector::vector(action(push, to_state))) + } + fun add_reduce(from_state: int, on_symbol: ref symbol::symbol, by_rule_no: int) { + expand_to(from_state) + if (items[from_state].contains_key(on_symbol)) + items[from_state][on_symbol].addEnd(action(reduce, by_rule_no)) + else + items[from_state].set(on_symbol, vector::vector(action(reduce, by_rule_no))) + } + fun add_accept(from_state: int, on_symbol: ref symbol::symbol) { + expand_to(from_state) + if (items[from_state].contains_key(on_symbol)) + items[from_state][on_symbol].addEnd(action(accept, 0)) + else + items[from_state].set(on_symbol, vector::vector(action(accept, 0))) + } + fun get(state: int, sym: symbol::symbol): vector::vector { + return items[state][sym] + } + fun print_string(): string::string { + /*return string::string("woo a table of size: ") + items.size*/ + io::print("woo a table of size: ") + io::println(items.size) + for (var i = 0; i < items.size; i++;) { + io::print("for state: ") + io::println(i) + items[i].for_each(fun(sym: symbol::symbol, actions: vector::vector) { + actions.for_each(fun(action: action) { + io::print("\ton symbol: ") + io::print(sym.to_string()) + io::print(" do action: ") + if (action.act == push) + io::print("push ") + else if (action.act == reduce) + io::print("reduce ") + else if (action.act == accept) + io::print("accept ") + else if (action.act == reject) + io::print("reject ") + io::print(action.state_or_rule) + io::println() + }) + }) + } + } +} + diff --git a/stdlib/map.krak b/stdlib/map.krak index 68e83a6..3823ddb 100644 --- a/stdlib/map.krak +++ b/stdlib/map.krak @@ -58,6 +58,10 @@ obj map (Object) { keys.remove(idx) values.remove(idx) } + fun clear() { + keys.clear() + values.clear() + } fun operator[](key: T): ref U { return get(key) } diff --git a/stdlib/parser.krak b/stdlib/parser.krak new file mode 100644 index 0000000..9b50a7a --- /dev/null +++ b/stdlib/parser.krak @@ -0,0 +1,107 @@ +import grammer +import symbol +import tree +import vector +import stack +import map +import util +import string +import io +import mem + +obj parser (Object) { + var input: vector::vector + var gram: grammer::grammer + // gss + var to_reduce: stack::stack + var to_shift: stack::stack< util::pair<*tree::tree, int> > + var SPPFStepNodes: vector::vector< util::pair<*tree::tree, int> > + var packed_map: map::map<*tree::tree, bool> + + fun construct(grammerIn: grammer::grammer): *parser { + input.construct() + gram.copy_construct(&grammerIn) + to_reduce.construct() + to_shift.construct() + SPPFStepNodes.construct() + packed_map.construct() + return this + } + fun copy_construct(old: *parser) { + input.copy_construct(&old->input) + gram.copy_construct(&old->gram) + to_reduce.copy_construct(&old->to_reduce) + to_shift.copy_construct(&old->to_shift) + SPPFStepNodes.copy_construct(&old->SPPFStepNodes) + packed_map.copy_construct(&old->packed_map) + } + fun operator=(old: ref parser) { + destruct() + copy_construct(&old) + } + fun destruct() { + input.destruct() + gram.destruct() + to_reduce.destruct() + to_shift.destruct() + SPPFStepNodes.destruct() + packed_map.destruct() + } + + fun parse_input(inputStr: string::string, name: string::string): *tree::tree { + input.clear() + // gss.clear + to_reduce.clear() + to_shift.clear() + SPPFStepNodes.clear() + packed_map.clear() + + // if the zero state contains any reductions for state 0 and eof, then + // it must be reducing to the goal state + io::println("checking the bidness") + if (inputStr == "" && gram.parse_table.get(0, symbol::eof_symbol()).contains(grammer::action(grammer::reduce, 0))) { + io::println("Accept on no input for ") + io::println(name) + return mem::new>()->construct(symbol::null_symbol()) + } + io::println("failed for ") + io::println(name) + return mem::new>()->construct(symbol::null_symbol()) + } +} + +fun reduction(f: *tree::tree, s: symbol::symbol, l: int, n: *tree::tree, label:*tree::tree): reduction { + var toRet.construct(f,s,l,n,label): reduction + return toRet +} + +obj reduction (Object) { + var from: *tree::tree + var sym: symbol::symbol + var length: int + var nullable_parts: *tree::tree + var label: *tree::tree + + fun construct(f: *tree::tree, s: symbol::symbol, l: int, n: *tree::tree, label:*tree::tree): *reduction { + from = f + sym.copy_construct(&s) + length = l + nullable_parts = n + label = label + return this + } + fun copy_construct(old: *reduction) { + from = old->from + sym.copy_construct(&old->sym) + length = old->length + nullable_parts = old->nullable_parts + label = old->label + } + fun operator=(other: reduction):void { + destruct() + copy_construct(&other) + } + fun destruct() { + sym.destruct() + } +} diff --git a/stdlib/set.krak b/stdlib/set.krak index e8d7b71..4f07ee2 100644 --- a/stdlib/set.krak +++ b/stdlib/set.krak @@ -56,12 +56,12 @@ obj set (Object) { fun operator+=(items: set) { add(items) } - fun add(item: T) { + fun add(item: ref T) { if (!contains(item)) data.add(item) } - fun add(items: set) { - items.for_each( fun(item: T) add(item); ) + fun add(items: ref set) { + items.for_each( fun(item: ref T) add(item); ) } fun remove(item: T) { var idx = data.find(item) diff --git a/stdlib/stack.krak b/stdlib/stack.krak index d3ebe83..e4660f6 100644 --- a/stdlib/stack.krak +++ b/stdlib/stack.krak @@ -34,6 +34,9 @@ obj stack (Object) { data.remove(data.size-1) return toRet } + fun clear() { + data.clear() + } fun top(): T { return data[data.size-1] } diff --git a/stdlib/tree.krak b/stdlib/tree.krak new file mode 100644 index 0000000..de0ac40 --- /dev/null +++ b/stdlib/tree.krak @@ -0,0 +1,28 @@ +import mem +import vector + +obj tree (Object) { + var data: T + var children: vector::vector<*tree> + fun construct(dataIn: T): *tree { + mem::maybe_copy_construct(&data, &dataIn) + children.construct() + return this + } + // Some of these don't really make much sense considering this tree is all about + // heap allocated pointers. Best to have it for saftey, though + fun copy_construct(old: *tree) { + mem::maybe_copy_construct(&data, &old->data) + children.copy_construct(&old->children) + } + // ditto + fun operator=(other: tree):void { + destruct() + copy_construct(&other) + } + fun destruct() { + mem::maybe_destruct(&data) + children.destruct() + } +} + diff --git a/stdlib/vector.krak b/stdlib/vector.krak index 1e67b2a..32fefae 100644 --- a/stdlib/vector.krak +++ b/stdlib/vector.krak @@ -173,6 +173,12 @@ obj vector (Object) { size-- } + fun clear() { + for (var i = 0; i < size; i++;) + maybe_destruct(&data[i]) + size = 0 + } + fun for_each(func: fun(ref T):void):void { for (var i = 0; i < size; i++;) func(data[i]) diff --git a/tests/test_grammer.krak b/tests/test_grammer.krak index 3b528fd..3a42fbb 100644 --- a/tests/test_grammer.krak +++ b/tests/test_grammer.krak @@ -1,15 +1,17 @@ import io:* import grammer:* +import parser:* import lexer:* import string:* import util:* import symbol:* +import tree:* fun main():int { - var a = load_grammer(read_file(string("../krakenGrammer.kgm"))) + /*var a = load_grammer(read_file(string("../krakenGrammer.kgm")))*/ /*var a = load_grammer(read_file(string("grammer.kgm")))*/ - //var a = load_grammer(read_file(string("grammer2.kgm"))) + var a = load_grammer(read_file(string("grammer2.kgm"))) println(a.to_string()) var doFirstSet = fun() { a.calculate_first_set() @@ -48,6 +50,9 @@ fun main():int { /*range(80).for_each(fun(i: int) { println(lex.next().to_string()); } )*/ println(a.to_string()) a.calculate_state_automaton() + var parse.construct(a): parser + var result = parse.parse_input(string(""), string("fun name")) + /*var parse.construct(): parser*/ return 0 }