kraken/stdlib/parser.krak

import grammer:*
import symbol:*
import lexer:*
import tree:*
import vector:*
import stack:*
import map:*
import util:*
import string:*
import mem:*
import io:*

obj parser (Object) {
    var input: vector<symbol>
    var gram: grammer
    var gss: gss
    var to_reduce: stack<reduction>
    var to_shift: stack< pair<*tree<int>, int> >
    var SPPFStepNodes: vector< pair<*tree<symbol>, int> >
    var packed_map: map<*tree<symbol>, bool>

    fun construct(grammerIn: grammer): *parser {
        input.construct()
        gram.copy_construct(&grammerIn)
        gss.construct()
        to_reduce.construct()
        to_shift.construct()
        SPPFStepNodes.construct()
        packed_map.construct()
        return this
    }
    fun copy_construct(old: *parser) {
        input.copy_construct(&old->input)
        gram.copy_construct(&old->gram)
        gss.copy_construct(&old->gss)
        to_reduce.copy_construct(&old->to_reduce)
        to_shift.copy_construct(&old->to_shift)
        SPPFStepNodes.copy_construct(&old->SPPFStepNodes)
        packed_map.copy_construct(&old->packed_map)
    }
    fun operator=(old: ref parser) {
        destruct()
        copy_construct(&old)
    }
    fun destruct() {
        input.destruct()
        gram.destruct()
        gss.destruct()
        to_reduce.destruct()
        to_shift.destruct()
        SPPFStepNodes.destruct()
        packed_map.destruct()
    }

    fun parse_input(inputStr: string, name: string): *tree<symbol> {
        input.clear()
        gss.clear()
        to_reduce.clear()
        to_shift.clear()
        SPPFStepNodes.clear()
        packed_map.clear()

        // if the zero state contains any reductions for state 0 and eof, then
        // it must be reducing to the goal state
        println("checking the bidness")
        if (inputStr == "" && gram.parse_table.get(0, eof_symbol()).contains(action(reduce, 0))) {
            println("Accept on no input for ")
            println(name)
            return new<tree<symbol>>()->construct(null_symbol())
        }

        var lex = lexer(gram.terminals)
        lex.set_input(inputStr)
        var current_symbol.construct(): symbol
        for (current_symbol = lex.next(); current_symbol != eof_symbol() && current_symbol != invalid_symbol(); current_symbol = lex.next();) {
            /*println("current_symbol is ")*/
            /*println(current_symbol.to_string())*/
            input.addEnd(current_symbol)
        }
        input.addEnd(current_symbol)
        if (current_symbol == invalid_symbol()) {
            println("lexing failed for ")
            println(name)
            return null<tree<symbol>>()
        }

        var v0 = gss.new_node(0)
        gss.add_to_frontier(0, v0)

        var null_symbol_tree = null<tree<symbol>>()

        gram.parse_table.get(0, input[0]).for_each(fun(act: action) {
            println("for each action")
            act.print()
            if (act.act == push)
                to_shift.push(make_pair(v0, act.state_or_rule))
            /*else if (act.act == reduce && fully_reduces_to_null(gram.rules[act.state_or_rule])) {*/
            else if (act.act == reduce && act.rule_position == 0) {
                print("act == reduce && == 0 Adding reduction from state: ")
                println(v0->data)
                to_reduce.push(reduction(v0, gram.rules[act.state_or_rule].lhs, 0, null_symbol_tree, null_symbol_tree))
            }
        })

        for (var i = 0; i < input.size; i++;) {
            if (gss.frontier_is_empty(i)) {
                print(i)
                print("th frontier is empty in file '")
                print(name)
                print("' with txt ")
                print(input[i].to_string())
                println()
                return null<tree<symbol>>()
            }
            SPPFStepNodes.clear()
            print("to_reduce size: ")
            println(to_reduce.size())
            print("to_shift size: ")
            println(to_shift.size())
            while (to_reduce.size())
                reducer(i)
            shifter(i)
        }
        var acc_state = gss.frontier_get_acc_state(input.size-1)
        if (acc_state) {
            println("ACCEPTED!")
            return gss.get_edge(acc_state, v0)
        }

        println("REJECTED")
        println("parsing (not lexing) failed for ")
        println(name)
        return null<tree<symbol>>()
    }
    fun reducer(i: int) {
        print("reducing from state: ")
        var curr_reduction = to_reduce.pop()
        println(curr_reduction.from->data)
        print("curr_reduction.length (not length-1) is: ")
        println(curr_reduction.length)
        gss.get_reachable_paths(curr_reduction.from, max(0, curr_reduction.length-1)).
                                for_each(fun(path: ref vector<*tree<int>>) {
            println("in get_reachable_paths for_each loop")
            var path_edges = range(path.size-1).map(fun(indx: int): *tree<symbol> { return gss.get_edge(path[indx], path[indx+1]);}).reverse()
            print("path ")
            path.for_each(fun(part: *tree<int>) {
                print(part->data)
                print(" ")
            })
            println()
            println("got path edges")
            println("there are this many:")
            println(path_edges.size)
            if (curr_reduction.length != 0) {
                path_edges.addEnd(curr_reduction.label)
                println("also adding the one from the reduction")
                println(curr_reduction.label->data.to_string())
            }
            var curr_reached = path.last()
            print("checking shift for state ")
            print(curr_reached->data)
            print(" and ")
            println(curr_reduction.sym.to_string())
            // if this is the Goal = a type reduction, then skip the actual reduction part.
            // the shift lookup will fail, and likely other things, and this is our accept
            // criteria anyway
            /*if (curr_reached->data == 0 && curr_reduction.sym == gram.rules[0].lhs)*/
            if (curr_reduction.sym == gram.rules[0].lhs) {
                println("would accept here")
                return;
            }
            var shift_to = gram.parse_table.get_shift(curr_reached->data, curr_reduction.sym).state_or_rule
            println("got shift to")
            var new_label =  null<tree<symbol>>()
            if (curr_reduction.length == 0) {
                new_label = curr_reduction.nullable_parts
            } else {
                var reached_frontier = gss.get_containing_frontier(curr_reached)
                for (var j = 0; j < SPPFStepNodes.size; j++;) {
                    if (SPPFStepNodes[j].second == reached_frontier
                        && SPPFStepNodes[j].first->data == curr_reduction.sym) {
                        new_label = SPPFStepNodes[j].first
                        break
                    }
                }
                if (!new_label) {
                    new_label = new<tree<symbol>>()->construct(curr_reduction.sym)
                    SPPFStepNodes.addEnd(make_pair(new_label, reached_frontier))
                }
            }
            var shift_to_node = gss.in_frontier(i, shift_to)
            if (shift_to_node) {
                if (!gss.has_edge(shift_to_node, curr_reached)) {
                    gss.add_edge(shift_to_node, curr_reached, new_label)
                    // do non-null reductions
                    if (curr_reduction.length) {
                        gram.parse_table.get(shift_to, input[i]).for_each(fun(act: action) {
                            var reduce_rule = gram.rules[act.state_or_rule]
                            /*if (act.act == reduce && !fully_reduces_to_null(reduce_rule)) {*/
                            if (act.act == reduce && act.rule_position != 0) {
                                to_reduce.push(reduction(curr_reached, reduce_rule.lhs,
                                                            act.rule_position,
                                                get_nullable_parts(reduce_rule),
                                                            new_label))
                                print("(non null) Adding reduction from state: ")
                                println(curr_reached->data)
                            }
                        })
                    }
                }
            } else {
                shift_to_node = gss.new_node(shift_to)
                gss.add_to_frontier(i, shift_to_node)
                gss.add_edge(shift_to_node, curr_reached, new_label)
                gram.parse_table.get(shift_to, input[i]).for_each(fun(act: action) {
                    if (act.act == push) {
                        to_shift.push(make_pair(shift_to_node, act.state_or_rule))
                    } else {
                        var action_rule = gram.rules[act.state_or_rule]
                        // tricky tricky tricky. Fully reduces to null is not the same as act.rule_position being 0
                        /*if (fully_reduces_to_null(action_rule)) {*/
                        if (act.rule_position == 0) {
                            to_reduce.push(reduction(shift_to_node, action_rule.lhs, 0,
                                                get_nullable_parts(action_rule),
                                                null<tree<symbol>>() ))
                            print("null reduces Adding reduction from state: ")
                            println(shift_to_node->data)
                        } else if (curr_reduction.length != 0) {
                            to_reduce.push(reduction(curr_reached, action_rule.lhs, act.rule_position,
                                                get_nullable_parts(action_rule),
                                                new_label ))
                            print("null does not reduce Adding reduction from state: ")
                            println(curr_reached->data)
                        }
                    }
                })
            }
            if (curr_reduction.length)
                add_children(new_label, path_edges, curr_reduction.nullable_parts)
        })
    }
    fun shifter(i: int) {
        println("shifting")
        if (i >= input.size-1)
            return; // darn ambiguity
        print("shifting on ")
        println(input[i].to_string())
        var next_shifts =  stack< pair<*tree<int>, int> >()
        var new_label = new<tree<symbol>>()->construct(input[i])
        while (!to_shift.empty()) {
            println("to_shift not empty")
            var shift = to_shift.pop()
            println("post pop")
            var shift_to_node = gss.in_frontier(i+1, shift.second)
            println("post in_frontier")
            if (shift_to_node) {
                print("already in frontier ")
                println(i+1)
                gss.add_edge(shift_to_node, shift.first, new_label)
                gram.parse_table.get_reduces(shift.second, input[i+1]).for_each(fun(action: action) {
                    var reduce_rule = gram.rules[action.state_or_rule]
                    /*if (!fully_reduces_to_null(reduce_rule)) {*/
                    if (action.rule_position != 0) {
                        to_reduce.push(reduction(shift.first, reduce_rule.lhs, action.rule_position,
                                    get_nullable_parts(reduce_rule),
                                    new_label ))
                        print("if shift to node Adding reduction from state: ")
                        println(shift.first->data)
                    }
                })
            } else {
                print("adding to frontier ")
                println(i+1)
                shift_to_node = gss.new_node(shift.second)
                gss.add_to_frontier(i+1, shift_to_node)
                println("post add to frontier")
                gss.add_edge(shift_to_node, shift.first, new_label)
                println("post add edger")
                gram.parse_table.get(shift.second, input[i+1]).for_each(fun(action: action) {
                    println("looking at an action")
                    if (action.act == push) {
                        println("is push")
                        next_shifts.push(make_pair(shift_to_node, action.state_or_rule))
                    } else {
                        println("is reduce")
                        var action_rule = gram.rules[action.state_or_rule]
                        /*if (!fully_reduces_to_null(action_rule)) {*/
                        if (action.rule_position != 0) {
                            println("does not reduce to null")
                            to_reduce.push(reduction(shift.first, action_rule.lhs, action.rule_position,
                                    get_nullable_parts(action_rule),
                                    new_label ))
                            print("not shift to, reduce, != 0 Adding reduction from state: ")
                            println(shift.first->data)
                            print("added ruduction rule+position: ")
                            println(action.rule_position)
                        } else {
                            println("does reduce to null")
                            to_reduce.push(reduction(shift_to_node, action_rule.lhs, 0,
                                    get_nullable_parts(action_rule),
                                    null<tree<symbol>>() ))
                            print("not shift to, reduce, == 0 Adding reduction from state: ")
                            println(shift_to_node->data)
                        }
                    }
                })
            }
        }
        to_shift = next_shifts
    }
    fun add_children(parent: *tree<symbol>, children: vector<*tree<symbol>>, nullable_parts: *tree<symbol>) {
        if (nullable_parts)
            children.add(nullable_parts)
        if (!belongs_to_family(parent, children)) {
            if (parent->children.size == 0) {
                parent->children.add_all(children)
            } else {
                if (!are_packed(parent->children)) {
                    // ambiguity inner
                    var sub_parent = new<tree<symbol>>()->construct(symbol("AmbiguityInner", true))
                    set_packed(sub_parent, true)
                    sub_parent->children.add_all(parent->children)
                    parent->children.clear()
                    parent->children.add(sub_parent)
                }
                // ambiguity outer
                var next_sub_parent = new<tree<symbol>>()->construct(symbol("AmbiguityOuter", true))
                set_packed(next_sub_parent, true)
                parent->children.add(next_sub_parent)
                next_sub_parent->children.add_all(children)
            }
        }
    }
    fun belongs_to_family(node: *tree<symbol>, nodes: vector<*tree<symbol>>): bool {
        for (var i = 0; i < nodes.size; i++;) {
            var contains_one = false
            for (var j = 0; j < node->children.size; j++;) {
                var child = node->children[j]
                if (nodes[i] == child || (nodes[i] && child && *nodes[i] == *child)) {
                    contains_one = true
                    break
                }
            }
            if (!contains_one)
                return false
        }
        return true
    }
    fun are_packed(nodes: vector<*tree<symbol>>): bool {
        return nodes.any_true(fun(it: *tree<symbol>):bool { return packed_map.contains_key(it) && packed_map[it]; })
    }
    fun set_packed(node: *tree<symbol>, packed: bool) {
        packed_map.set(node, packed)
    }
    fun fully_reduces_to_null(r: ref rule): bool {
        return r.position == 0 && reduces_to_null(r)
    }
    fun reduces_to_null(r: ref rule): bool {
        return gram.first_vector(r.rhs).contains(null_symbol())
    }
    fun get_nullable_parts(r: ref rule): *tree<symbol> {
        if (reduces_to_null(r))
            return new<tree<symbol>>()->construct(null_symbol())
        return null<tree<symbol>>()
    }
}

obj gss (Object) {
    var data: vector<vector<*tree<int>>>
    var edges: map< pair<*tree<int>, *tree<int>>, *tree<symbol> >

    fun construct(): *gss {
        data.construct()
        edges.construct()
    }
    fun copy_construct(old: *gss) {
        data.copy_construct(&old->data)
        edges.copy_construct(&old->edges)
    }
    fun destruct() {
        data.destruct()
        edges.destruct()
    }
    fun clear() {
        data.clear()
        edges.clear()
    }
    fun new_node(state: int): *tree<int> {
        return new<tree<int>>()->construct(state)
    }
    fun add_to_frontier(frontier: int, node: *tree<int>) {
        while(data.size <= frontier)
            data.addEnd(vector<*tree<int>>())
        data[frontier].addEnd(node)
    }
    fun frontier_is_empty(frontier: int): bool {
        return frontier >= data.size || data[frontier].size == 0
    }
    fun frontier_get_acc_state(frontier: int): *tree<int> {
        // the accepting state is always state 1, for now
        return in_frontier(frontier, 1)
    }
    fun in_frontier(frontier: int, state: int): *tree<int> {
        if (frontier >= data.size)
            return null<tree<int>>()
        for (var i = 0; i < data[frontier].size; i++;)
            if (data[frontier][i]->data == state)
                return data[frontier][i]
        return null<tree<int>>()
    }
    fun get_edge(start: *tree<int>, end: *tree<int>): *tree<symbol> {
        return edges[make_pair(start, end)]
    }
    fun has_edge(start: *tree<int>, end: *tree<int>): bool {
        // could also look in map, but this is faster...
        return start->children.find(end) != -1
    }
    fun add_edge(start: *tree<int>, end: *tree<int>, edge: *tree<symbol>) {
        start->children.add(end)
        edges.set(make_pair(start,end), edge)
    }
    fun get_containing_frontier(node: *tree<int>): int {
        for (var i = 0; i < data.size; i++;)
            if (data[i].contains(node))
                return i
        return -1
    }
    fun get_reachable_paths(start: *tree<int>, length: int): vector<vector<*tree<int>>> {
        var paths = vector<vector<*tree<int>>>()
        var recursive_path_find: fun(*tree<int>, int, vector<*tree<int>>):void = fun(start: *tree<int>, length: int, current_path: vector<*tree<int>>) {
            current_path.addEnd(start)
            if (!length) {
                paths.addEnd(current_path)
                return
            }
            start->children.for_each(fun(child: *tree<int>) {
                recursive_path_find(child, length-1, current_path)
            })
        }
        recursive_path_find(start, length, vector<*tree<int>>())
        return paths
    }
}

fun reduction(f: *tree<int>, s: symbol, l: int, n: *tree<symbol>, label:*tree<symbol>): reduction {
    var toRet.construct(f,s,l,n,label): reduction
    return toRet
}

obj reduction (Object) {
    var from: *tree<int>
    var sym: symbol
    var length: int
    var nullable_parts: *tree<symbol>
    var label: *tree<symbol>

    fun construct(): *reduction {
        from = null<tree<int>>()
        sym = invalid_symbol()
        length = -1
        nullable_parts = null<tree<symbol>>()
        label = null<tree<symbol>>()
        return this
    }

    fun construct(f: *tree<int>, s: symbol, l: int, n: *tree<symbol>, labelIn:*tree<symbol>): *reduction {
        from = f
        sym.copy_construct(&s)
        length = l
        nullable_parts = n
        label = labelIn
        return this
    }
    fun copy_construct(old: *reduction) {
        from = old->from
        sym.copy_construct(&old->sym)
        length = old->length
        nullable_parts = old->nullable_parts
        label = old->label
    }
    fun operator=(other: reduction):void {
        destruct()
        copy_construct(&other)
    }
    fun destruct() {
        sym.destruct()
    }
}

fun syntax_tree_to_dot(root: *tree<symbol>): string {
    var ret = string("digraph Kaken {\n")
    var counter = 0
    var node_name_map = map<*tree<symbol>, string>()
    var get_name = fun(node: *tree<symbol>): string {
        if (node_name_map.contains_key(node))
            return node_name_map[node]
        var escaped = string("")
        node->data.to_string().data.for_each(fun(c: char) {
            if (c != '"')
                escaped += c
            else
                escaped += "\\\""
        })
        escaped += to_string(counter++)
        node_name_map.set(node, escaped)
        return escaped
    }
    var helper: fun(*tree<symbol>):void = fun(node: *tree<symbol>) {
        node->children.for_each(fun(child: *tree<symbol>) {
            if (!child)
                return; // where on earth does the null come from
            ret += string("\"") + get_name(node) + "\" -> \"" + get_name(child) + "\"\n";
            helper(child)
        })
    }
    if (root)
        helper(root)
    return ret + "}"
}