diff --git a/stdlib/grammer.krak b/stdlib/grammer.krak index 161ad55..f0e31a8 100644 --- a/stdlib/grammer.krak +++ b/stdlib/grammer.krak @@ -192,10 +192,10 @@ obj grammer (Object) { if (!r.at_end()) possGoto.add(r.next()) // if r is at end or the rest reduces to null, add a reduce for each lookahead symbol - if ( r.at_end() || first_vector(r.after_next()).contains(symbol::null_symbol()) ) { + if ( r.at_end() || first_vector(r.after()).contains(symbol::null_symbol()) ) { var rule_no = rules.find(r.plain()) r.lookahead.for_each(fun(sym: ref symbol::symbol) { - parse_table.add_reduce(I, sym, rule_no) + parse_table.add_reduce(I, sym, rule_no, r.position) }) } }) @@ -360,6 +360,9 @@ obj rule (Object) { fun next(): ref symbol::symbol { return rhs[position] } + fun after(): vector::vector { + return rhs.slice(position, -1) + } fun after_next(): vector::vector { return rhs.slice(position + 1, -1) } @@ -444,13 +447,22 @@ fun action(act: int, state_or_rule: int): action { var toRet: action toRet.act = act toRet.state_or_rule = state_or_rule + toRet.rule_position = -1 + return toRet +} +fun action(act: int, state_or_rule: int, rule_position: int): action { + var toRet: action + toRet.act = act + toRet.state_or_rule = state_or_rule + toRet.rule_position = rule_position return toRet } obj action { var act: int // really need those enums var state_or_rule: int // sigh + var rule_position: int // sigh fun operator==(other: action): bool { - return act == other.act && state_or_rule == other.state_or_rule + return act == other.act && state_or_rule == other.state_or_rule && rule_position == other.rule_position } fun print() { if (act == push) @@ -462,6 +474,8 @@ obj action { else if (act == reject) io::print("reject ") io::print(state_or_rule) + io::print(" ") + io::print(rule_position) io::println() } } @@ -500,13 +514,13 @@ obj table (Object) { else items[from_state].set(cleaned_symbol, vector::vector(action(push, to_state))) } - fun add_reduce(from_state: int, on_symbol: ref symbol::symbol, by_rule_no: int) { + fun add_reduce(from_state: int, on_symbol: ref symbol::symbol, by_rule_no: int, rule_position: int) { expand_to(from_state) var cleaned_symbol = clean_symbol(on_symbol) if (items[from_state].contains_key(cleaned_symbol)) items[from_state][cleaned_symbol].addEnd(action(reduce, by_rule_no)) else - items[from_state].set(cleaned_symbol, vector::vector(action(reduce, by_rule_no))) + items[from_state].set(cleaned_symbol, vector::vector(action(reduce, by_rule_no, rule_position))) } fun add_accept(from_state: int, on_symbol: ref symbol::symbol) { expand_to(from_state) diff --git a/stdlib/parser.krak b/stdlib/parser.krak index e506c8f..c6cdb71 100644 --- a/stdlib/parser.krak +++ b/stdlib/parser.krak @@ -138,10 +138,25 @@ obj parser (Object) { for_each(fun(path: ref vector<*tree>) { println("in get_reachable_paths for_each loop") var path_edges = range(path.size-1).map(fun(indx: int): *tree { return gss.get_edge(path[indx], path[indx+1]);}).reverse() + print("path ") + path.for_each(fun(part: *tree) { + print(part->data) + print(" ") + }) + println() println("got path edges") if (curr_reduction.length != 0) path_edges.addEnd(curr_reduction.label) var curr_reached = path.last() + print("checking shift for state ") + print(curr_reached->data) + print(" and ") + println(curr_reduction.sym.to_string()) + // if this is the Goal = a type reduction, then skip the actual reduction part. + // the shift lookup will fail, and likely other things, and this is our accept + // criteria anyway + if (curr_reached->data == 0 && curr_reduction.sym == gram.rules[0].lhs) + return; var shift_to = gram.parse_table.get_shift(curr_reached->data, curr_reduction.sym).state_or_rule println("got shift to") var new_label = null>() @@ -171,7 +186,7 @@ obj parser (Object) { var reduce_rule = gram.rules[act.state_or_rule] if (act.act == reduce && !fully_reduces_to_null(reduce_rule)) to_reduce.push(reduction(curr_reached, reduce_rule.lhs, - reduce_rule.position, + act.rule_position, new>()->construct(null_symbol()), new_label)) }) @@ -190,8 +205,8 @@ obj parser (Object) { to_reduce.push(reduction(shift_to_node, action_rule.lhs, 0, new>()->construct(null_symbol()), null>() )) - } else if (curr_reduction.length == 0) { - to_reduce.push(reduction(curr_reached, action_rule.lhs, action_rule.position, + } else if (curr_reduction.length != 0) { + to_reduce.push(reduction(curr_reached, action_rule.lhs, act.rule_position, new>()->construct(null_symbol()), new_label )) } @@ -204,7 +219,7 @@ obj parser (Object) { } fun shifter(i: int) { println("shifting") - if (i == input.size) + if (i >= input.size-1) return; // darn ambiguity print("shifting on ") println(input[i].to_string()) @@ -219,10 +234,11 @@ obj parser (Object) { if (shift_to_node) { print("already in frontier ") println(i+1) + gss.add_edge(shift_to_node, shift.first, new_label) gram.parse_table.get_reduces(shift.second, input[i+1]).for_each(fun(action: action) { var reduce_rule = gram.rules[action.state_or_rule] if (!fully_reduces_to_null(reduce_rule)) { - to_reduce.push(reduction(shift.first, reduce_rule.lhs, reduce_rule.position, + to_reduce.push(reduction(shift.first, reduce_rule.lhs, action.rule_position, new>()->construct(null_symbol()), new_label )) } @@ -245,7 +261,7 @@ obj parser (Object) { var action_rule = gram.rules[action.state_or_rule] if (!fully_reduces_to_null(action_rule)) { println("does not reduce to null") - to_reduce.push(reduction(shift.first, action_rule.lhs, action_rule.position, + to_reduce.push(reduction(shift.first, action_rule.lhs, action.rule_position, new>()->construct(null_symbol()), new_label )) } else { @@ -261,8 +277,23 @@ obj parser (Object) { to_shift = next_shifts } fun add_children(parent: *tree, children: vector<*tree>, nullable_parts: *tree) { + if (nullable_parts) + children.add(nullable_parts) + if (!belongs_to_family(parent, children)) { + parent->children.add_all(children) + } else { + if (!are_packed(parent->children)) { + // ambiguity inner + } + // ambiguity outer + } + } + fun belongs_to_family(node: *tree, nodes: vector<*tree>): bool { + return false + } + fun are_packed(nodes: vector<*tree>): bool { + return true } - fun fully_reduces_to_null(r: ref rule): bool { return r.position == 0 && gram.first_vector(r.rhs).contains(null_symbol()) } @@ -380,3 +411,19 @@ obj reduction (Object) { sym.destruct() } } + +fun syntax_tree_to_dot(root: *tree): string { + var ret = string("digraph Kaken {\n") + var helper: fun(*tree):void = fun(node: *tree) { + ret += node->data.to_string() + ";;;;\n"; + node->children.for_each(fun(child: *tree) { + if (!child) + return; // where on earth does the null come from + ret += node->data.to_string() + " -> " + child->data.to_string() + "\n"; + helper(child) + }) + } + if (root) + helper(root) + return ret + "}" +} diff --git a/tests/grammer3.kgm b/tests/grammer3.kgm new file mode 100644 index 0000000..2c697f1 --- /dev/null +++ b/tests/grammer3.kgm @@ -0,0 +1,3 @@ +# comment +Goal = a ; +a = "hi" "d":dname ; diff --git a/tests/test_grammer.krak b/tests/test_grammer.krak index d11a886..7138b7b 100644 --- a/tests/test_grammer.krak +++ b/tests/test_grammer.krak @@ -12,6 +12,7 @@ fun main():int { /*var a = load_grammer(read_file(string("../krakenGrammer.kgm")))*/ /*var a = load_grammer(read_file(string("grammer.kgm")))*/ var a = load_grammer(read_file(string("grammer2.kgm"))) + /*var a = load_grammer(read_file(string("grammer3.kgm")))*/ println(a.to_string()) var doFirstSet = fun() { a.calculate_first_set() @@ -52,6 +53,9 @@ fun main():int { a.calculate_state_automaton() var parse.construct(a): parser var result = parse.parse_input(string("ad"), string("fun name")) + /*var result = parse.parse_input(string("hid"), string("fun name"))*/ + println("the tree") + println(syntax_tree_to_dot(result)) /*var parse.construct(): parser*/ return 0 }