import grammer:* import symbol:* import lexer:* import tree:* import vector:* import stack:* import map:* import util:* import string:* import mem:* import io:* obj parser (Object) { var input: vector var gram: grammer var gss: gss var to_reduce: stack var to_shift: stack< pair<*tree, int> > var SPPFStepNodes: vector< pair<*tree, int> > var packed_map: map<*tree, bool> fun construct(grammerIn: grammer): *parser { input.construct() gram.copy_construct(&grammerIn) gss.construct() to_reduce.construct() to_shift.construct() SPPFStepNodes.construct() packed_map.construct() return this } fun copy_construct(old: *parser) { input.copy_construct(&old->input) gram.copy_construct(&old->gram) gss.copy_construct(&old->gss) to_reduce.copy_construct(&old->to_reduce) to_shift.copy_construct(&old->to_shift) SPPFStepNodes.copy_construct(&old->SPPFStepNodes) packed_map.copy_construct(&old->packed_map) } fun operator=(old: ref parser) { destruct() copy_construct(&old) } fun destruct() { input.destruct() gram.destruct() gss.destruct() to_reduce.destruct() to_shift.destruct() SPPFStepNodes.destruct() packed_map.destruct() } fun parse_input(inputStr: string, name: string): *tree { input.clear() gss.clear() to_reduce.clear() to_shift.clear() SPPFStepNodes.clear() packed_map.clear() // if the zero state contains any reductions for state 0 and eof, then // it must be reducing to the goal state println("checking the bidness") if (inputStr == "" && gram.parse_table.get(0, eof_symbol()).contains(action(action_type::reduce(), 0))) { println("Accept on no input for ") println(name) return new>()->construct(null_symbol()) } var lex = lexer(gram.terminals) lex.set_input(inputStr) var current_symbol.construct(): symbol for (current_symbol = lex.next(); current_symbol != eof_symbol() && current_symbol != invalid_symbol(); current_symbol = lex.next();) { /*println("current_symbol is ")*/ /*println(current_symbol.to_string())*/ input.addEnd(current_symbol) } input.addEnd(current_symbol) if (current_symbol == invalid_symbol()) { println("**PARSE ERROR**") println("lexing failed for ") println(name) return null>() } var v0 = gss.new_node(0) gss.add_to_frontier(0, v0) var null_symbol_tree = null>() gram.parse_table.get(0, input[0]).for_each(fun(act: action) { println("for each action") act.print() if (act.act == action_type::push()) to_shift.push(make_pair(v0, act.state_or_rule)) /*else if (act.act == reduce && fully_reduces_to_null(gram.rules[act.state_or_rule])) {*/ else if (act.act == action_type::reduce() && act.rule_position == 0) { print("act == reduce() && == 0 Adding reduction from state: ") println(v0->data) to_reduce.push(reduction(v0, gram.rules[act.state_or_rule].lhs, 0, null_symbol_tree, null_symbol_tree)) } }) for (var i = 0; i < input.size; i++;) { if (gss.frontier_is_empty(i)) { println("**PARSE ERROR**") print(i) print("th frontier is empty in file '") print(name) print("' with txt ") print(input[i].to_string()) print(" line number: ") print(find_line(i)) println() return null>() } SPPFStepNodes.clear() print("to_reduce size: ") println(to_reduce.size()) print("to_shift size: ") println(to_shift.size()) while (to_reduce.size()) reducer(i) shifter(i) } var acc_state = gss.frontier_get_acc_state(input.size-1) if (acc_state) { println("ACCEPTED!") return gss.get_edge(acc_state, v0) } println("**PARSE ERROR**") println("REJECTED") println("parsing (not lexing) failed for ") println(name) print(" line number: ") print(find_line(input.size)) println("(minus 2?)") return null>() } fun reducer(i: int) { print("reducing from state: ") var curr_reduction = to_reduce.pop() println(curr_reduction.from->data) print("curr_reduction.length (not length-1) is: ") println(curr_reduction.length) gss.get_reachable_paths(curr_reduction.from, max(0, curr_reduction.length-1)). for_each(fun(path: ref vector<*tree>) { println("in get_reachable_paths for_each loop") var path_edges = range(path.size-1).map(fun(indx: int): *tree { return gss.get_edge(path[indx], path[indx+1]);}).reverse() print("path ") path.for_each(fun(part: *tree) { print(part->data) print(" ") }) println() println("got path edges") println("there are this many:") println(path_edges.size) if (curr_reduction.length != 0) { path_edges.addEnd(curr_reduction.label) println("also adding the one from the reduction") println(curr_reduction.label->data.to_string()) } var curr_reached = path.last() print("checking shift for state ") print(curr_reached->data) print(" and ") println(curr_reduction.sym.to_string()) // if this is the Goal = a type reduction, then skip the actual reduction part. // the shift lookup will fail, and likely other things, and this is our accept // criteria anyway /*if (curr_reached->data == 0 && curr_reduction.sym == gram.rules[0].lhs)*/ if (curr_reduction.sym == gram.rules[0].lhs) { println("would accept here") return; } var shift_to = gram.parse_table.get_shift(curr_reached->data, curr_reduction.sym).state_or_rule println("got shift to") var new_label = null>() if (curr_reduction.length == 0) { new_label = curr_reduction.nullable_parts } else { var reached_frontier = gss.get_containing_frontier(curr_reached) for (var j = 0; j < SPPFStepNodes.size; j++;) { if (SPPFStepNodes[j].second == reached_frontier && SPPFStepNodes[j].first->data == curr_reduction.sym) { new_label = SPPFStepNodes[j].first break } } if (!new_label) { new_label = new>()->construct(curr_reduction.sym) SPPFStepNodes.addEnd(make_pair(new_label, reached_frontier)) } } var shift_to_node = gss.in_frontier(i, shift_to) if (shift_to_node) { if (!gss.has_edge(shift_to_node, curr_reached)) { gss.add_edge(shift_to_node, curr_reached, new_label) // do non-null reductions if (curr_reduction.length) { gram.parse_table.get(shift_to, input[i]).for_each(fun(act: action) { var reduce_rule = gram.rules[act.state_or_rule] /*if (act.act == reduce && !fully_reduces_to_null(reduce_rule)) {*/ if (act.act == action_type::reduce() && act.rule_position != 0) { to_reduce.push(reduction(curr_reached, reduce_rule.lhs, act.rule_position, get_nullable_parts(reduce_rule), new_label)) print("(non null) Adding reduction from state: ") println(curr_reached->data) } }) } } } else { shift_to_node = gss.new_node(shift_to) gss.add_to_frontier(i, shift_to_node) gss.add_edge(shift_to_node, curr_reached, new_label) gram.parse_table.get(shift_to, input[i]).for_each(fun(act: action) { if (act.act == action_type::push()) { to_shift.push(make_pair(shift_to_node, act.state_or_rule)) } else { var action_rule = gram.rules[act.state_or_rule] // tricky tricky tricky. Fully reduces to null is not the same as act.rule_position being 0 /*if (fully_reduces_to_null(action_rule)) {*/ if (act.rule_position == 0) { to_reduce.push(reduction(shift_to_node, action_rule.lhs, 0, get_nullable_parts(action_rule), null>() )) print("null reduces Adding reduction from state: ") println(shift_to_node->data) } else if (curr_reduction.length != 0) { to_reduce.push(reduction(curr_reached, action_rule.lhs, act.rule_position, get_nullable_parts(action_rule), new_label )) print("null does not reduce Adding reduction from state: ") println(curr_reached->data) } } }) } if (curr_reduction.length) add_children(new_label, path_edges, curr_reduction.nullable_parts) }) } fun shifter(i: int) { println("shifting") if (i >= input.size-1) return; // darn ambiguity print("shifting on ") println(input[i].to_string()) var next_shifts = stack< pair<*tree, int> >() var new_label = new>()->construct(input[i]) while (!to_shift.empty()) { println("to_shift not empty") var shift = to_shift.pop() println("post pop") var shift_to_node = gss.in_frontier(i+1, shift.second) println("post in_frontier") if (shift_to_node) { print("already in frontier ") println(i+1) gss.add_edge(shift_to_node, shift.first, new_label) gram.parse_table.get_reduces(shift.second, input[i+1]).for_each(fun(action: action) { var reduce_rule = gram.rules[action.state_or_rule] /*if (!fully_reduces_to_null(reduce_rule)) {*/ if (action.rule_position != 0) { to_reduce.push(reduction(shift.first, reduce_rule.lhs, action.rule_position, get_nullable_parts(reduce_rule), new_label )) print("if shift to node Adding reduction from state: ") println(shift.first->data) } }) } else { print("adding to frontier ") println(i+1) shift_to_node = gss.new_node(shift.second) gss.add_to_frontier(i+1, shift_to_node) println("post add to frontier") gss.add_edge(shift_to_node, shift.first, new_label) println("post add edger") gram.parse_table.get(shift.second, input[i+1]).for_each(fun(action: action) { println("looking at an action") if (action.act == action_type::push()) { println("is push") next_shifts.push(make_pair(shift_to_node, action.state_or_rule)) } else { println("is reduce") var action_rule = gram.rules[action.state_or_rule] /*if (!fully_reduces_to_null(action_rule)) {*/ if (action.rule_position != 0) { println("does not reduce to null") to_reduce.push(reduction(shift.first, action_rule.lhs, action.rule_position, get_nullable_parts(action_rule), new_label )) print("not shift to, reduce, != 0 Adding reduction from state: ") println(shift.first->data) print("added ruduction rule+position: ") println(action.rule_position) } else { println("does reduce to null") to_reduce.push(reduction(shift_to_node, action_rule.lhs, 0, get_nullable_parts(action_rule), null>() )) print("not shift to, reduce, == 0 Adding reduction from state: ") println(shift_to_node->data) } } }) } } to_shift = next_shifts } fun add_children(parent: *tree, children: vector<*tree>, nullable_parts: *tree) { if (nullable_parts) children.add(nullable_parts) if (!belongs_to_family(parent, children)) { if (parent->children.size == 0) { parent->children.add_all(children) } else { if (!are_packed(parent->children)) { // ambiguity inner var sub_parent = new>()->construct(symbol("AmbiguityInner", true)) set_packed(sub_parent, true) sub_parent->children.add_all(parent->children) parent->children.clear() parent->children.add(sub_parent) } // ambiguity outer var next_sub_parent = new>()->construct(symbol("AmbiguityOuter", true)) set_packed(next_sub_parent, true) parent->children.add(next_sub_parent) next_sub_parent->children.add_all(children) } } } fun belongs_to_family(node: *tree, nodes: vector<*tree>): bool { for (var i = 0; i < nodes.size; i++;) { var contains_one = false for (var j = 0; j < node->children.size; j++;) { var child = node->children[j] if (nodes[i] == child || (nodes[i] && child && *nodes[i] == *child)) { contains_one = true break } } if (!contains_one) return false } return true } fun are_packed(nodes: vector<*tree>): bool { return nodes.any_true(fun(it: *tree):bool { return packed_map.contains_key(it) && packed_map[it]; }) } fun set_packed(node: *tree, packed: bool) { packed_map.set(node, packed) } fun fully_reduces_to_null(r: ref rule): bool { return r.position == 0 && reduces_to_null(r) } fun reduces_to_null(r: ref rule): bool { return gram.first_vector(r.rhs).contains(null_symbol()) } fun get_nullable_parts(r: ref rule): *tree { if (reduces_to_null(r)) return new>()->construct(null_symbol()) return null>() } fun find_line(token_no: int): int { var line_no = 1 for (var i = 0; i < token_no; i++;) for (var j = 0; j < input[i].data.length(); j++;) if (input[i].data[j] == '\n') line_no++ return line_no } } obj gss (Object) { var data: vector>> var edges: map< pair<*tree, *tree>, *tree > fun construct(): *gss { data.construct() edges.construct() } fun copy_construct(old: *gss) { data.copy_construct(&old->data) edges.copy_construct(&old->edges) } fun destruct() { data.destruct() edges.destruct() } fun clear() { data.clear() edges.clear() } fun new_node(state: int): *tree { return new>()->construct(state) } fun add_to_frontier(frontier: int, node: *tree) { while(data.size <= frontier) data.addEnd(vector<*tree>()) data[frontier].addEnd(node) } fun frontier_is_empty(frontier: int): bool { return frontier >= data.size || data[frontier].size == 0 } fun frontier_get_acc_state(frontier: int): *tree { // the accepting state is always state 1, for now return in_frontier(frontier, 1) } fun in_frontier(frontier: int, state: int): *tree { if (frontier >= data.size) return null>() for (var i = 0; i < data[frontier].size; i++;) if (data[frontier][i]->data == state) return data[frontier][i] return null>() } fun get_edge(start: *tree, end: *tree): *tree { return edges[make_pair(start, end)] } fun has_edge(start: *tree, end: *tree): bool { // could also look in map, but this is faster... return start->children.find(end) != -1 } fun add_edge(start: *tree, end: *tree, edge: *tree) { start->children.add(end) edges.set(make_pair(start,end), edge) } fun get_containing_frontier(node: *tree): int { for (var i = 0; i < data.size; i++;) if (data[i].contains(node)) return i return -1 } fun get_reachable_paths(start: *tree, length: int): vector>> { var paths = vector>>() var recursive_path_find: fun(*tree, int, vector<*tree>):void = fun(start: *tree, length: int, current_path: vector<*tree>) { current_path.addEnd(start) if (!length) { paths.addEnd(current_path) return } start->children.for_each(fun(child: *tree) { recursive_path_find(child, length-1, current_path) }) } recursive_path_find(start, length, vector<*tree>()) return paths } } fun reduction(f: *tree, s: symbol, l: int, n: *tree, label:*tree): reduction { var toRet.construct(f,s,l,n,label): reduction return toRet } obj reduction (Object) { var from: *tree var sym: symbol var length: int var nullable_parts: *tree var label: *tree fun construct(): *reduction { from = null>() sym = invalid_symbol() length = -1 nullable_parts = null>() label = null>() return this } fun construct(f: *tree, s: symbol, l: int, n: *tree, labelIn:*tree): *reduction { from = f sym.copy_construct(&s) length = l nullable_parts = n label = labelIn return this } fun copy_construct(old: *reduction) { from = old->from sym.copy_construct(&old->sym) length = old->length nullable_parts = old->nullable_parts label = old->label } fun operator=(other: reduction):void { destruct() copy_construct(&other) } fun destruct() { sym.destruct() } } fun syntax_tree_to_dot(root: *tree): string { var ret = string("digraph Kaken {\n") var counter = 0 var node_name_map = map<*tree, string>() var get_name = fun(node: *tree): string { if (node_name_map.contains_key(node)) return node_name_map[node] var escaped = string("") node->data.to_string().data.for_each(fun(c: char) { if (c != '"') escaped += c else escaped += "\\\"" }) escaped += to_string(counter++) node_name_map.set(node, escaped) return escaped } var helper: fun(*tree):void = fun(node: *tree) { node->children.for_each(fun(child: *tree) { if (!child) return; // where on earth does the null come from ret += string("\"") + get_name(node) + "\" -> \"" + get_name(child) + "\"\n"; helper(child) }) } if (root) helper(root) return ret + "}" }