Parser is mostly working as a recognizer now, though the grammer2 still causes vector out of bounds. Sigh. Also, it segfaults on printing the tree, even with no null leaves. Somehow internal data from a symbol becomes null.

2015-08-09 04:18:31 -04:00
parent 216cf0252f
commit 2777ca10f1
4 changed files with 80 additions and 12 deletions
--- a/stdlib/grammer.krak
+++ b/stdlib/grammer.krak
@@ -192,10 +192,10 @@ obj grammer (Object) {
                if (!r.at_end())
                    possGoto.add(r.next())
                // if r is at end or the rest reduces to null, add a reduce for each lookahead symbol
-                if ( r.at_end() || first_vector(r.after_next()).contains(symbol::null_symbol()) ) {
+                if ( r.at_end() || first_vector(r.after()).contains(symbol::null_symbol()) ) {
                    var rule_no = rules.find(r.plain())
                    r.lookahead.for_each(fun(sym: ref symbol::symbol) {
-                        parse_table.add_reduce(I, sym, rule_no)
+                        parse_table.add_reduce(I, sym, rule_no, r.position)
                    })
                }
            })
@@ -360,6 +360,9 @@ obj rule (Object) {
    fun next(): ref symbol::symbol {
        return rhs[position]
    }
    fun after(): vector::vector<symbol::symbol> {
        return rhs.slice(position, -1)
    }
    fun after_next(): vector::vector<symbol::symbol> {
        return rhs.slice(position + 1, -1)
    }
@@ -444,13 +447,22 @@ fun action(act: int, state_or_rule: int): action {
    var toRet: action
    toRet.act = act
    toRet.state_or_rule = state_or_rule
    toRet.rule_position = -1
    return toRet
 }
 fun action(act: int, state_or_rule: int, rule_position: int): action {
    var toRet: action
    toRet.act = act
    toRet.state_or_rule = state_or_rule
    toRet.rule_position = rule_position
    return toRet
 }
 obj action {
    var act: int // really need those enums
    var state_or_rule: int // sigh
    var rule_position: int // sigh
    fun operator==(other: action): bool {
-        return act == other.act && state_or_rule == other.state_or_rule
+        return act == other.act && state_or_rule == other.state_or_rule && rule_position == other.rule_position
    }
    fun print() {
        if (act == push)
@@ -462,6 +474,8 @@ obj action {
        else if (act == reject)
            io::print("reject ")
        io::print(state_or_rule)
        io::print(" ")
        io::print(rule_position)
        io::println()
    }
 }
@@ -500,13 +514,13 @@ obj table (Object) {
        else
            items[from_state].set(cleaned_symbol, vector::vector(action(push, to_state)))
    }
-    fun add_reduce(from_state: int, on_symbol: ref symbol::symbol, by_rule_no: int) {
+    fun add_reduce(from_state: int, on_symbol: ref symbol::symbol, by_rule_no: int, rule_position: int) {
        expand_to(from_state)
        var cleaned_symbol = clean_symbol(on_symbol)
        if (items[from_state].contains_key(cleaned_symbol))
            items[from_state][cleaned_symbol].addEnd(action(reduce, by_rule_no))
        else
-            items[from_state].set(cleaned_symbol, vector::vector(action(reduce, by_rule_no)))
+            items[from_state].set(cleaned_symbol, vector::vector(action(reduce, by_rule_no, rule_position)))
    }
    fun add_accept(from_state: int, on_symbol: ref symbol::symbol) {
        expand_to(from_state)
--- a/stdlib/parser.krak
+++ b/stdlib/parser.krak
@@ -138,10 +138,25 @@ obj parser (Object) {
            for_each(fun(path: ref vector<*tree<int>>) {
            println("in get_reachable_paths for_each loop")
            var path_edges = range(path.size-1).map(fun(indx: int): *tree<symbol> { return gss.get_edge(path[indx], path[indx+1]);}).reverse()
            print("path ")
            path.for_each(fun(part: *tree<int>) {
                print(part->data)
                print(" ")
            })
            println()
            println("got path edges")
            if (curr_reduction.length != 0)
                path_edges.addEnd(curr_reduction.label)
            var curr_reached = path.last()
            print("checking shift for state ")
            print(curr_reached->data)
            print(" and ")
            println(curr_reduction.sym.to_string())
            // if this is the Goal = a type reduction, then skip the actual reduction part.
            // the shift lookup will fail, and likely other things, and this is our accept
            // criteria anyway
            if (curr_reached->data == 0 && curr_reduction.sym == gram.rules[0].lhs)
                return;
            var shift_to = gram.parse_table.get_shift(curr_reached->data, curr_reduction.sym).state_or_rule
            println("got shift to")
            var new_label =  null<tree<symbol>>()
@@ -171,7 +186,7 @@ obj parser (Object) {
                            var reduce_rule = gram.rules[act.state_or_rule]
                            if (act.act == reduce && !fully_reduces_to_null(reduce_rule))
                                to_reduce.push(reduction(curr_reached, reduce_rule.lhs,
-                                                            reduce_rule.position,
+                                                            act.rule_position,
                                                new<tree<symbol>>()->construct(null_symbol()),
                                                            new_label))
                        })
@@ -190,8 +205,8 @@ obj parser (Object) {
                            to_reduce.push(reduction(shift_to_node, action_rule.lhs, 0,
                                    new<tree<symbol>>()->construct(null_symbol()),
                                    null<tree<symbol>>() ))
-                        } else if (curr_reduction.length == 0) {
+                        } else if (curr_reduction.length != 0) {
-                            to_reduce.push(reduction(curr_reached, action_rule.lhs, action_rule.position,
+                            to_reduce.push(reduction(curr_reached, action_rule.lhs, act.rule_position,
                                    new<tree<symbol>>()->construct(null_symbol()),
                                    new_label ))
                        }
@@ -204,7 +219,7 @@ obj parser (Object) {
    }
    fun shifter(i: int) {
        println("shifting")
-        if (i == input.size)
+        if (i >= input.size-1)
            return; // darn ambiguity
        print("shifting on ")
        println(input[i].to_string())
@@ -219,10 +234,11 @@ obj parser (Object) {
            if (shift_to_node) {
                print("already in frontier ")
                println(i+1)
                gss.add_edge(shift_to_node, shift.first, new_label)
                gram.parse_table.get_reduces(shift.second, input[i+1]).for_each(fun(action: action) {
                    var reduce_rule = gram.rules[action.state_or_rule]
                    if (!fully_reduces_to_null(reduce_rule)) {
-                        to_reduce.push(reduction(shift.first, reduce_rule.lhs, reduce_rule.position,
+                        to_reduce.push(reduction(shift.first, reduce_rule.lhs, action.rule_position,
                                    new<tree<symbol>>()->construct(null_symbol()),
                                    new_label ))
                    }
@@ -245,7 +261,7 @@ obj parser (Object) {
                        var action_rule = gram.rules[action.state_or_rule]
                        if (!fully_reduces_to_null(action_rule)) {
                            println("does not reduce to null")
-                            to_reduce.push(reduction(shift.first, action_rule.lhs, action_rule.position, 
+                            to_reduce.push(reduction(shift.first, action_rule.lhs, action.rule_position, 
                                    new<tree<symbol>>()->construct(null_symbol()),
                                    new_label ))
                        } else {
@@ -261,8 +277,23 @@ obj parser (Object) {
        to_shift = next_shifts
    }
    fun add_children(parent: *tree<symbol>, children: vector<*tree<symbol>>, nullable_parts: *tree<symbol>) {
        if (nullable_parts)
            children.add(nullable_parts)
        if (!belongs_to_family(parent, children)) {
            parent->children.add_all(children)
        } else {
            if (!are_packed(parent->children)) {
                // ambiguity inner
            }
            // ambiguity outer
        }
    }
    fun belongs_to_family(node: *tree<symbol>, nodes: vector<*tree<symbol>>): bool {
        return false
    }
    fun are_packed(nodes: vector<*tree<symbol>>): bool {
        return true
    }
    fun fully_reduces_to_null(r: ref rule): bool {
        return r.position == 0 && gram.first_vector(r.rhs).contains(null_symbol())
    }
@@ -380,3 +411,19 @@ obj reduction (Object) {
        sym.destruct()
    }
 }
 fun syntax_tree_to_dot(root: *tree<symbol>): string {
    var ret = string("digraph Kaken {\n")
    var helper: fun(*tree<symbol>):void = fun(node: *tree<symbol>) {
        ret += node->data.to_string() + ";;;;\n";
        node->children.for_each(fun(child: *tree<symbol>) {
            if (!child)
                return; // where on earth does the null come from
            ret += node->data.to_string() + " -> " + child->data.to_string() + "\n";
            helper(child)
        })
    }
    if (root)
        helper(root)
    return ret + "}"
 }
--- a/tests/grammer3.kgm
+++ b/tests/grammer3.kgm
@@ -0,0 +1,3 @@
 # comment
 Goal = a ;
 a = "hi" "d":dname ;
--- a/tests/test_grammer.krak
+++ b/tests/test_grammer.krak
@@ -12,6 +12,7 @@ fun main():int {
    /*var a = load_grammer(read_file(string("../krakenGrammer.kgm")))*/
    /*var a = load_grammer(read_file(string("grammer.kgm")))*/
    var a = load_grammer(read_file(string("grammer2.kgm")))
    /*var a = load_grammer(read_file(string("grammer3.kgm")))*/
    println(a.to_string())
    var doFirstSet = fun() {
        a.calculate_first_set()
@@ -52,6 +53,9 @@ fun main():int {
    a.calculate_state_automaton()
    var parse.construct(a): parser
    var result = parse.parse_input(string("ad"), string("fun name"))
    /*var result = parse.parse_input(string("hid"), string("fun name"))*/
    println("the tree")
    println(syntax_tree_to_dot(result))
    /*var parse.construct(): parser*/
    return 0
 }