Files
kraken/stdlib/parser.krak

489 lines
19 KiB
Plaintext
Raw Normal View History

import grammer:*
import symbol:*
import lexer:*
import tree:*
2018-05-22 19:43:54 -04:00
import vec:*
import stack:*
import map:*
import hash_map:*
import util:*
2018-05-22 19:43:54 -04:00
import str:*
import mem:*
import io:*
2015-08-06 02:42:40 -04:00
obj parser (Object) {
2018-05-22 19:43:54 -04:00
var input: vec<symbol>
var gram: *grammer
var lex: *lexer
var gss: gss
var to_reduce: stack<reduction>
var to_shift: stack< pair<*tree<int>, int> >
2018-05-22 19:43:54 -04:00
var SPPFStepNodes: vec< pair<*tree<symbol>, int> >
var packed_map: map<*tree<symbol>, bool>
2018-05-22 19:43:54 -04:00
var reduces_to_null_map: map<vec<symbol>, bool>
fun construct(grammerIn: *grammer, lexIn: *lexer): *parser {
2015-08-06 02:42:40 -04:00
input.construct()
gram = grammerIn
lex = lexIn
gss.construct()
2015-08-06 02:42:40 -04:00
to_reduce.construct()
to_shift.construct()
SPPFStepNodes.construct()
packed_map.construct()
2016-05-05 04:51:10 -04:00
reduces_to_null_map.construct()
2015-08-06 02:42:40 -04:00
return this
}
// for maybe_construct for containers
fun construct(): *parser {
return construct(null<grammer>(), null<lexer>())
}
2015-08-06 02:42:40 -04:00
fun copy_construct(old: *parser) {
input.copy_construct(&old->input)
gram = old->gram
lex = old->lex
gss.copy_construct(&old->gss)
2015-08-06 02:42:40 -04:00
to_reduce.copy_construct(&old->to_reduce)
to_shift.copy_construct(&old->to_shift)
SPPFStepNodes.copy_construct(&old->SPPFStepNodes)
packed_map.copy_construct(&old->packed_map)
2016-05-05 04:51:10 -04:00
reduces_to_null_map.copy_construct(&old->reduces_to_null_map)
2015-08-06 02:42:40 -04:00
}
fun operator=(old: ref parser) {
destruct()
copy_construct(&old)
}
fun destruct() {
input.destruct()
gss.destruct()
2015-08-06 02:42:40 -04:00
to_reduce.destruct()
to_shift.destruct()
SPPFStepNodes.destruct()
packed_map.destruct()
2016-05-05 04:51:10 -04:00
reduces_to_null_map.destruct()
2015-08-06 02:42:40 -04:00
}
2018-05-22 19:43:54 -04:00
fun parse_input(inputStr: str, name: str): *tree<symbol> {
2015-08-06 02:42:40 -04:00
input.clear()
gss.clear()
2015-08-06 02:42:40 -04:00
to_reduce.clear()
to_shift.clear()
SPPFStepNodes.clear()
packed_map.clear()
// if the zero state contains any reductions for state 0 and eof, then
// it must be reducing to the goal state
if (inputStr == "" && gram->parse_table.get(0, eof_symbol()).contains(action(action_type::reduce(), 0))) {
println("Accept on no input for ")
println(name)
return new<tree<symbol>>()->construct(null_symbol())
}
lex->set_input(inputStr)
var current_symbol.construct(): symbol
for (current_symbol = lex->next(); current_symbol != eof_symbol() && current_symbol != invalid_symbol(); current_symbol = lex->next();) {
if (current_symbol != eof_symbol() && current_symbol != invalid_symbol())
current_symbol.source = name
input.addEnd(current_symbol)
}
input.addEnd(current_symbol)
if (current_symbol == invalid_symbol()) {
println("**PARSE ERROR**")
println("lexing failed for ")
println(name)
return null<tree<symbol>>()
}
var v0 = gss.new_node(0)
gss.add_to_frontier(0, v0)
var null_symbol_tree = null<tree<symbol>>()
gram->parse_table.get(0, input[0]).for_each(fun(act: action) {
if (act.act == action_type::push())
to_shift.push(make_pair(v0, act.state_or_rule))
else if (act.act == action_type::reduce() && act.rule_position == 0) {
to_reduce.push(reduction(v0, gram->rules[act.state_or_rule].lhs, 0, null_symbol_tree, null_symbol_tree))
}
})
for (var i = 0; i < input.size; i++;) {
if (gss.frontier_is_empty(i)) {
println("**PARSE ERROR**")
print(i)
print("th frontier is empty in file '")
print(name)
print("' with txt ")
print(input[i].to_string())
print(" line number: ")
print(find_line(i))
println()
return null<tree<symbol>>()
}
SPPFStepNodes.clear()
while (to_reduce.size())
reducer(i)
shifter(i)
}
var acc_state = gss.frontier_get_acc_state(input.size-1)
if (acc_state) {
return gss.get_edge(acc_state, v0)
}
println("**PARSE ERROR**")
println("REJECTED")
println("parsing (not lexing) failed AT THE END for ")
println(name)
print(" line number: ")
print(find_line(input.size))
println("(minus 2?)")
print("' with txt ")
println(input.last().to_string())
return null<tree<symbol>>()
}
fun reducer(i: int) {
var curr_reduction = to_reduce.pop()
gss.get_reachable_paths(curr_reduction.from, max(0, curr_reduction.length-1)).
2018-05-22 19:43:54 -04:00
for_each(fun(path: ref vec<*tree<int>>) {
var path_edges = range(path.size-1).map(fun(indx: int): *tree<symbol> { return gss.get_edge(path[indx], path[indx+1]);}).reverse()
if (curr_reduction.length != 0) {
path_edges.addEnd(curr_reduction.label)
}
var curr_reached = path.last()
// if this is the Goal = a type reduction, then skip the actual reduction part.
// the shift lookup will fail, and likely other things, and this is our accept
// criteria anyway
/*if (curr_reached->data == 0 && curr_reduction.sym == gram.rules[0].lhs)*/
if (curr_reduction.sym == gram->rules[0].lhs) {
/*println("would accept here")*/
return;
}
var shift_to = gram->parse_table.get_shift(curr_reached->data, curr_reduction.sym).state_or_rule
var new_label = null<tree<symbol>>()
if (curr_reduction.length == 0) {
new_label = curr_reduction.nullable_parts
} else {
var reached_frontier = gss.get_containing_frontier(curr_reached)
for (var j = 0; j < SPPFStepNodes.size; j++;) {
if (SPPFStepNodes[j].second == reached_frontier
&& SPPFStepNodes[j].first->data == curr_reduction.sym) {
new_label = SPPFStepNodes[j].first
break
}
}
if (!new_label) {
new_label = new<tree<symbol>>()->construct(curr_reduction.sym)
SPPFStepNodes.addEnd(make_pair(new_label, reached_frontier))
}
}
var shift_to_node = gss.in_frontier(i, shift_to)
if (shift_to_node) {
if (!gss.has_edge(shift_to_node, curr_reached)) {
gss.add_edge(shift_to_node, curr_reached, new_label)
// do non-null reductions
if (curr_reduction.length) {
gram->parse_table.get(shift_to, input[i]).for_each(fun(act: action) {
if (act.act == action_type::reduce() && act.rule_position != 0) {
var reduce_rule = gram->rules[act.state_or_rule]
to_reduce.push(reduction(curr_reached, reduce_rule.lhs,
act.rule_position,
get_nullable_parts(reduce_rule),
new_label))
}
})
}
}
} else {
shift_to_node = gss.new_node(shift_to)
gss.add_to_frontier(i, shift_to_node)
gss.add_edge(shift_to_node, curr_reached, new_label)
gram->parse_table.get(shift_to, input[i]).for_each(fun(act: action) {
if (act.act == action_type::push()) {
to_shift.push(make_pair(shift_to_node, act.state_or_rule))
} else {
var action_rule = gram->rules[act.state_or_rule]
// tricky tricky tricky. Fully reduces to null is not the same as act.rule_position being 0
/*if (fully_reduces_to_null(action_rule)) {*/
if (act.rule_position == 0) {
to_reduce.push(reduction(shift_to_node, action_rule.lhs, 0,
get_nullable_parts(action_rule),
null<tree<symbol>>() ))
} else if (curr_reduction.length != 0) {
to_reduce.push(reduction(curr_reached, action_rule.lhs, act.rule_position,
get_nullable_parts(action_rule),
new_label ))
}
}
})
}
if (curr_reduction.length)
add_children(new_label, path_edges, curr_reduction.nullable_parts)
})
}
fun shifter(i: int) {
if (i >= input.size-1)
return; // darn ambiguity
var next_shifts = stack< pair<*tree<int>, int> >()
var new_label = new<tree<symbol>>()->construct(input[i])
while (!to_shift.empty()) {
var shift = to_shift.pop()
var shift_to_node = gss.in_frontier(i+1, shift.second)
if (shift_to_node) {
gss.add_edge(shift_to_node, shift.first, new_label)
gram->parse_table.get_reduces(shift.second, input[i+1]).for_each(fun(action: action) {
var reduce_rule = gram->rules[action.state_or_rule]
if (action.rule_position != 0) {
to_reduce.push(reduction(shift.first, reduce_rule.lhs, action.rule_position,
get_nullable_parts(reduce_rule),
new_label ))
}
})
} else {
shift_to_node = gss.new_node(shift.second)
gss.add_to_frontier(i+1, shift_to_node)
gss.add_edge(shift_to_node, shift.first, new_label)
gram->parse_table.get(shift.second, input[i+1]).for_each(fun(action: action) {
if (action.act == action_type::push()) {
next_shifts.push(make_pair(shift_to_node, action.state_or_rule))
} else {
var action_rule = gram->rules[action.state_or_rule]
if (action.rule_position != 0) {
to_reduce.push(reduction(shift.first, action_rule.lhs, action.rule_position,
get_nullable_parts(action_rule),
new_label ))
} else {
to_reduce.push(reduction(shift_to_node, action_rule.lhs, 0,
get_nullable_parts(action_rule),
null<tree<symbol>>() ))
}
}
})
}
}
to_shift = next_shifts
}
2018-05-22 19:43:54 -04:00
fun add_children(parent: *tree<symbol>, children: vec<*tree<symbol>>, nullable_parts: *tree<symbol>) {
if (nullable_parts)
children.add(nullable_parts)
if (!belongs_to_family(parent, children)) {
if (parent->children.size == 0) {
parent->children.add_all(children)
} else {
if (!are_packed(parent->children)) {
// ambiguity inner
var sub_parent = new<tree<symbol>>()->construct(symbol("AmbiguityInner", true))
set_packed(sub_parent, true)
sub_parent->children.add_all(parent->children)
parent->children.clear()
parent->children.add(sub_parent)
}
// ambiguity outer
var next_sub_parent = new<tree<symbol>>()->construct(symbol("AmbiguityOuter", true))
set_packed(next_sub_parent, true)
parent->children.add(next_sub_parent)
next_sub_parent->children.add_all(children)
}
}
}
2018-05-22 19:43:54 -04:00
fun belongs_to_family(node: *tree<symbol>, nodes: vec<*tree<symbol>>): bool {
for (var i = 0; i < nodes.size; i++;) {
var contains_one = false
for (var j = 0; j < node->children.size; j++;) {
var child = node->children[j]
if (nodes[i] == child || (nodes[i] && child && *nodes[i] == *child)) {
contains_one = true
break
}
}
if (!contains_one)
return false
}
return true
}
2018-05-22 19:43:54 -04:00
fun are_packed(nodes: vec<*tree<symbol>>): bool {
return nodes.any_true(fun(it: *tree<symbol>):bool { return packed_map.contains_key(it) && packed_map[it]; })
}
fun set_packed(node: *tree<symbol>, packed: bool) {
packed_map.set(node, packed)
}
fun fully_reduces_to_null(r: ref rule): bool {
return r.position == 0 && reduces_to_null(r)
}
fun reduces_to_null(r: ref rule): bool {
2016-05-05 04:51:10 -04:00
if (!reduces_to_null_map.contains_key(r.rhs))
reduces_to_null_map[r.rhs] = gram->first_vector(r.rhs).contains(null_symbol())
2016-05-05 04:51:10 -04:00
return reduces_to_null_map[r.rhs]
}
fun get_nullable_parts(r: ref rule): *tree<symbol> {
if (reduces_to_null(r))
return new<tree<symbol>>()->construct(null_symbol())
return null<tree<symbol>>()
}
fun find_line(token_no: int): int {
var line_no = 1
for (var i = 0; i < token_no; i++;)
for (var j = 0; j < input[i].data.length(); j++;)
if (input[i].data[j] == '\n')
line_no++
return line_no
}
}
obj gss (Object) {
2018-05-22 19:43:54 -04:00
var data: vec<vec<*tree<int>>>
var edges: hash_map< pair<*tree<int>, *tree<int>>, *tree<symbol> >
fun construct(): *gss {
data.construct()
edges.construct()
}
fun copy_construct(old: *gss) {
data.copy_construct(&old->data)
edges.copy_construct(&old->edges)
}
fun destruct() {
clear()
data.destruct()
edges.destruct()
}
fun clear() {
2018-05-22 19:43:54 -04:00
data.for_each(fun(second: ref vec<*tree<int>>) second.for_each(fun(node: *tree<int>) delete(node););)
data.clear()
edges.clear()
}
fun new_node(state: int): *tree<int> {
return new<tree<int>>()->construct(state)
}
fun add_to_frontier(frontier: int, node: *tree<int>) {
while(data.size <= frontier)
2018-05-22 19:43:54 -04:00
data.addEnd(vec<*tree<int>>())
data[frontier].addEnd(node)
}
fun frontier_is_empty(frontier: int): bool {
return frontier >= data.size || data[frontier].size == 0
}
fun frontier_get_acc_state(frontier: int): *tree<int> {
// the accepting state is always state 1, for now
return in_frontier(frontier, 1)
}
fun in_frontier(frontier: int, state: int): *tree<int> {
if (frontier >= data.size)
return null<tree<int>>()
for (var i = 0; i < data[frontier].size; i++;)
if (data[frontier][i]->data == state)
return data[frontier][i]
return null<tree<int>>()
}
fun get_edge(start: *tree<int>, end: *tree<int>): *tree<symbol> {
return edges[make_pair(start, end)]
}
fun has_edge(start: *tree<int>, end: *tree<int>): bool {
// could also look in map, but this is faster...
return start->children.find(end) != -1
}
fun add_edge(start: *tree<int>, end: *tree<int>, edge: *tree<symbol>) {
start->children.add(end)
edges.set(make_pair(start,end), edge)
}
fun get_containing_frontier(node: *tree<int>): int {
for (var i = data.size-1; i >= 0; i--;)
if (data[i].contains(node))
return i
return -1
}
2018-05-22 19:43:54 -04:00
fun get_reachable_paths(start: *tree<int>, length: int): vec<vec<*tree<int>>> {
var paths = vec<vec<*tree<int>>>()
var recursive_path_find: fun(*tree<int>, int, vec<*tree<int>>):void = fun(start: *tree<int>, length: int, current_path: vec<*tree<int>>) {
current_path.addEnd(start)
if (!length) {
paths.addEnd(current_path)
return
}
start->children.for_each(fun(child: *tree<int>) {
recursive_path_find(child, length-1, current_path)
})
2015-08-06 02:42:40 -04:00
}
2018-05-22 19:43:54 -04:00
recursive_path_find(start, length, vec<*tree<int>>())
return paths
2015-08-06 02:42:40 -04:00
}
}
fun reduction(f: *tree<int>, s: symbol, l: int, n: *tree<symbol>, label:*tree<symbol>): reduction {
2015-08-06 02:42:40 -04:00
var toRet.construct(f,s,l,n,label): reduction
return toRet
}
obj reduction (Object) {
var from: *tree<int>
var sym: symbol
2015-08-06 02:42:40 -04:00
var length: int
var nullable_parts: *tree<symbol>
var label: *tree<symbol>
2015-08-06 02:42:40 -04:00
fun construct(): *reduction {
from = null<tree<int>>()
sym = invalid_symbol()
length = -1
nullable_parts = null<tree<symbol>>()
label = null<tree<symbol>>()
return this
}
fun construct(f: *tree<int>, s: symbol, l: int, n: *tree<symbol>, labelIn:*tree<symbol>): *reduction {
2015-08-06 02:42:40 -04:00
from = f
sym.copy_construct(&s)
length = l
nullable_parts = n
label = labelIn
2015-08-06 02:42:40 -04:00
return this
}
fun copy_construct(old: *reduction) {
from = old->from
sym.copy_construct(&old->sym)
length = old->length
nullable_parts = old->nullable_parts
label = old->label
}
fun operator=(other: reduction):void {
destruct()
copy_construct(&other)
}
fun destruct() {
sym.destruct()
}
}
2018-05-22 19:43:54 -04:00
fun syntax_tree_to_dot(root: *tree<symbol>): str {
var ret = str("digraph Kaken {\n")
var counter = 0
2018-05-22 19:43:54 -04:00
var node_name_map = map<*tree<symbol>, str>()
var get_name = fun(node: *tree<symbol>): str {
if (node_name_map.contains_key(node))
return node_name_map[node]
2018-05-22 19:43:54 -04:00
var escaped = str("")
node->data.to_string().data.for_each(fun(c: char) {
if (c != '"' && c != '\\')
escaped += c
else if (c == '"')
escaped += "\\\""
else if (c == '\\')
escaped += "\\\\"
})
escaped += to_string(counter++)
node_name_map.set(node, escaped)
return escaped
}
var helper: fun(*tree<symbol>):void = fun(node: *tree<symbol>) {
node->children.for_each(fun(child: *tree<symbol>) {
if (!child)
return; // where on earth does the null come from
2018-05-22 19:43:54 -04:00
ret += str("\"") + get_name(node) + "\" -> \"" + get_name(child) + "\"\n";
helper(child)
})
}
if (root)
helper(root)
return ret + "}"
}