2015-08-06 17:38:41 -04:00
|
|
|
import grammer:*
|
|
|
|
|
import symbol:*
|
|
|
|
|
import lexer:*
|
|
|
|
|
import tree:*
|
|
|
|
|
import vector:*
|
|
|
|
|
import stack:*
|
|
|
|
|
import map:*
|
|
|
|
|
import util:*
|
|
|
|
|
import string:*
|
|
|
|
|
import mem:*
|
|
|
|
|
import io:*
|
2015-08-06 02:42:40 -04:00
|
|
|
|
|
|
|
|
obj parser (Object) {
|
2015-08-06 17:38:41 -04:00
|
|
|
var input: vector<symbol>
|
|
|
|
|
var gram: grammer
|
|
|
|
|
var gss: gss
|
|
|
|
|
var to_reduce: stack<reduction>
|
|
|
|
|
var to_shift: stack< pair<*tree<int>, int> >
|
|
|
|
|
var SPPFStepNodes: vector< pair<*tree<symbol>, int> >
|
|
|
|
|
var packed_map: map<*tree<symbol>, bool>
|
|
|
|
|
|
|
|
|
|
fun construct(grammerIn: grammer): *parser {
|
2015-08-06 02:42:40 -04:00
|
|
|
input.construct()
|
|
|
|
|
gram.copy_construct(&grammerIn)
|
2015-08-06 17:38:41 -04:00
|
|
|
gss.construct()
|
2015-08-06 02:42:40 -04:00
|
|
|
to_reduce.construct()
|
|
|
|
|
to_shift.construct()
|
|
|
|
|
SPPFStepNodes.construct()
|
|
|
|
|
packed_map.construct()
|
|
|
|
|
return this
|
|
|
|
|
}
|
|
|
|
|
fun copy_construct(old: *parser) {
|
|
|
|
|
input.copy_construct(&old->input)
|
|
|
|
|
gram.copy_construct(&old->gram)
|
2015-08-06 17:38:41 -04:00
|
|
|
gss.copy_construct(&old->gss)
|
2015-08-06 02:42:40 -04:00
|
|
|
to_reduce.copy_construct(&old->to_reduce)
|
|
|
|
|
to_shift.copy_construct(&old->to_shift)
|
|
|
|
|
SPPFStepNodes.copy_construct(&old->SPPFStepNodes)
|
|
|
|
|
packed_map.copy_construct(&old->packed_map)
|
|
|
|
|
}
|
|
|
|
|
fun operator=(old: ref parser) {
|
|
|
|
|
destruct()
|
|
|
|
|
copy_construct(&old)
|
|
|
|
|
}
|
|
|
|
|
fun destruct() {
|
|
|
|
|
input.destruct()
|
|
|
|
|
gram.destruct()
|
2015-08-06 17:38:41 -04:00
|
|
|
gss.destruct()
|
2015-08-06 02:42:40 -04:00
|
|
|
to_reduce.destruct()
|
|
|
|
|
to_shift.destruct()
|
|
|
|
|
SPPFStepNodes.destruct()
|
|
|
|
|
packed_map.destruct()
|
|
|
|
|
}
|
|
|
|
|
|
2015-08-06 17:38:41 -04:00
|
|
|
fun parse_input(inputStr: string, name: string): *tree<symbol> {
|
2015-08-06 02:42:40 -04:00
|
|
|
input.clear()
|
2015-08-06 17:38:41 -04:00
|
|
|
gss.clear()
|
2015-08-06 02:42:40 -04:00
|
|
|
to_reduce.clear()
|
|
|
|
|
to_shift.clear()
|
|
|
|
|
SPPFStepNodes.clear()
|
|
|
|
|
packed_map.clear()
|
|
|
|
|
|
|
|
|
|
// if the zero state contains any reductions for state 0 and eof, then
|
|
|
|
|
// it must be reducing to the goal state
|
2015-08-06 17:38:41 -04:00
|
|
|
println("checking the bidness")
|
|
|
|
|
if (inputStr == "" && gram.parse_table.get(0, eof_symbol()).contains(action(reduce, 0))) {
|
|
|
|
|
println("Accept on no input for ")
|
|
|
|
|
println(name)
|
|
|
|
|
return new<tree<symbol>>()->construct(null_symbol())
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
var lex = lexer(gram.terminals)
|
|
|
|
|
lex.set_input(inputStr)
|
|
|
|
|
var current_symbol.construct(): symbol
|
|
|
|
|
for (current_symbol = lex.next(); current_symbol != eof_symbol() && current_symbol != invalid_symbol(); current_symbol = lex.next();) {
|
|
|
|
|
/*println("current_symbol is ")*/
|
|
|
|
|
/*println(current_symbol.to_string())*/
|
|
|
|
|
input.addEnd(current_symbol)
|
|
|
|
|
}
|
2015-08-08 02:50:36 -04:00
|
|
|
input.addEnd(current_symbol)
|
2015-08-06 17:38:41 -04:00
|
|
|
if (current_symbol == invalid_symbol()) {
|
|
|
|
|
println("lexing failed for ")
|
|
|
|
|
println(name)
|
|
|
|
|
return null<tree<symbol>>()
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
var v0 = gss.new_node(0)
|
|
|
|
|
gss.add_to_frontier(0, v0)
|
|
|
|
|
|
|
|
|
|
var null_symbol_tree = null<tree<symbol>>()
|
|
|
|
|
|
|
|
|
|
/*println("looking up")*/
|
|
|
|
|
/*println(input[0].to_string())*/
|
|
|
|
|
gram.parse_table.get(0, input[0]).for_each(fun(act: action) {
|
2015-08-08 02:50:36 -04:00
|
|
|
println("for each action")
|
|
|
|
|
act.print()
|
2015-08-06 17:38:41 -04:00
|
|
|
if (act.act == push)
|
|
|
|
|
to_shift.push(make_pair(v0, act.state_or_rule))
|
|
|
|
|
else if (act.act == reduce && fully_reduces_to_null(gram.rules[act.state_or_rule]))
|
|
|
|
|
to_reduce.push(reduction(v0, gram.rules[act.state_or_rule].lhs, 0, null_symbol_tree, null_symbol_tree))
|
|
|
|
|
})
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
for (var i = 0; i < input.size; i++;) {
|
|
|
|
|
if (gss.frontier_is_empty(i)) {
|
|
|
|
|
print(i)
|
2015-08-08 02:50:36 -04:00
|
|
|
print("th frontier is empty in file '")
|
2015-08-06 17:38:41 -04:00
|
|
|
print(name)
|
|
|
|
|
print("' with txt ")
|
|
|
|
|
print(input[i].to_string())
|
|
|
|
|
println()
|
|
|
|
|
return null<tree<symbol>>()
|
|
|
|
|
}
|
|
|
|
|
SPPFStepNodes.clear()
|
2015-08-08 02:50:36 -04:00
|
|
|
print("to_reduce size: ")
|
|
|
|
|
println(to_reduce.size())
|
|
|
|
|
print("to_shift size: ")
|
|
|
|
|
println(to_shift.size())
|
2015-08-06 17:38:41 -04:00
|
|
|
while (to_reduce.size())
|
|
|
|
|
reducer(i)
|
|
|
|
|
shifter(i)
|
|
|
|
|
}
|
|
|
|
|
var acc_state = gss.frontier_get_acc_state(input.size-1)
|
|
|
|
|
if (acc_state) {
|
|
|
|
|
println("ACCEPTED!")
|
|
|
|
|
return gss.get_edge(acc_state, v0)
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
println("REJECTED")
|
|
|
|
|
println("parsing (not lexing) failed for ")
|
|
|
|
|
println(name)
|
|
|
|
|
return null<tree<symbol>>()
|
|
|
|
|
}
|
|
|
|
|
fun reducer(i: int) {
|
|
|
|
|
println("reducing")
|
|
|
|
|
var curr_reduction = to_reduce.pop()
|
|
|
|
|
gss.get_reachable_paths(curr_reduction.from, max(0, curr_reduction.length-1)).
|
|
|
|
|
for_each(fun(path: ref vector<*tree<int>>) {
|
2015-08-08 02:50:36 -04:00
|
|
|
println("in get_reachable_paths for_each loop")
|
2015-08-06 17:38:41 -04:00
|
|
|
var path_edges = range(path.size-1).map(fun(indx: int): *tree<symbol> { return gss.get_edge(path[indx], path[indx+1]);}).reverse()
|
2015-08-08 02:50:36 -04:00
|
|
|
println("got path edges")
|
2015-08-06 17:38:41 -04:00
|
|
|
if (curr_reduction.length != 0)
|
|
|
|
|
path_edges.addEnd(curr_reduction.label)
|
|
|
|
|
var curr_reached = path.last()
|
|
|
|
|
var shift_to = gram.parse_table.get_shift(curr_reached->data, curr_reduction.sym).state_or_rule
|
2015-08-08 02:50:36 -04:00
|
|
|
println("got shift to")
|
2015-08-06 17:38:41 -04:00
|
|
|
var new_label = null<tree<symbol>>()
|
|
|
|
|
if (curr_reduction.length == 0) {
|
|
|
|
|
new_label = curr_reduction.nullable_parts
|
|
|
|
|
} else {
|
|
|
|
|
var reached_frontier = gss.get_containing_frontier(curr_reached)
|
|
|
|
|
for (var j = 0; j < SPPFStepNodes.size; j++;) {
|
|
|
|
|
if (SPPFStepNodes[j].second == reached_frontier
|
|
|
|
|
&& SPPFStepNodes[j].first->data == curr_reduction.sym) {
|
|
|
|
|
new_label = SPPFStepNodes[j].first
|
|
|
|
|
break
|
|
|
|
|
}
|
|
|
|
|
}
|
|
|
|
|
if (!new_label) {
|
|
|
|
|
new_label = new<tree<symbol>>()->construct(curr_reduction.sym)
|
|
|
|
|
SPPFStepNodes.addEnd(make_pair(new_label, reached_frontier))
|
|
|
|
|
}
|
|
|
|
|
}
|
|
|
|
|
var shift_to_node = gss.in_frontier(i, shift_to)
|
|
|
|
|
if (shift_to_node) {
|
|
|
|
|
if (!gss.has_edge(shift_to_node, curr_reached)) {
|
|
|
|
|
gss.add_edge(shift_to_node, curr_reached, new_label)
|
|
|
|
|
// do non-null reductions
|
|
|
|
|
if (curr_reduction.length) {
|
|
|
|
|
gram.parse_table.get(shift_to, input[i]).for_each(fun(act: action) {
|
|
|
|
|
var reduce_rule = gram.rules[act.state_or_rule]
|
|
|
|
|
if (act.act == reduce && !fully_reduces_to_null(reduce_rule))
|
|
|
|
|
to_reduce.push(reduction(curr_reached, reduce_rule.lhs,
|
|
|
|
|
reduce_rule.position,
|
|
|
|
|
new<tree<symbol>>()->construct(null_symbol()),
|
|
|
|
|
new_label))
|
|
|
|
|
})
|
|
|
|
|
}
|
|
|
|
|
}
|
|
|
|
|
} else {
|
|
|
|
|
shift_to_node = gss.new_node(shift_to)
|
|
|
|
|
gss.add_to_frontier(i, shift_to_node)
|
|
|
|
|
gss.add_edge(shift_to_node, curr_reached, new_label)
|
2015-08-08 02:50:36 -04:00
|
|
|
gram.parse_table.get(shift_to, input[i]).for_each(fun(act: action) {
|
|
|
|
|
if (act.act == push) {
|
|
|
|
|
to_shift.push(make_pair(shift_to_node, act.state_or_rule))
|
|
|
|
|
} else {
|
|
|
|
|
var action_rule = gram.rules[act.state_or_rule]
|
|
|
|
|
if (fully_reduces_to_null(action_rule)) {
|
|
|
|
|
to_reduce.push(reduction(shift_to_node, action_rule.lhs, 0,
|
|
|
|
|
new<tree<symbol>>()->construct(null_symbol()),
|
|
|
|
|
null<tree<symbol>>() ))
|
|
|
|
|
} else if (curr_reduction.length == 0) {
|
|
|
|
|
to_reduce.push(reduction(curr_reached, action_rule.lhs, action_rule.position,
|
|
|
|
|
new<tree<symbol>>()->construct(null_symbol()),
|
|
|
|
|
new_label ))
|
|
|
|
|
}
|
|
|
|
|
}
|
|
|
|
|
})
|
2015-08-06 17:38:41 -04:00
|
|
|
}
|
2015-08-08 02:50:36 -04:00
|
|
|
if (curr_reduction.length)
|
|
|
|
|
add_children(new_label, path_edges, curr_reduction.nullable_parts)
|
2015-08-06 17:38:41 -04:00
|
|
|
})
|
|
|
|
|
}
|
|
|
|
|
fun shifter(i: int) {
|
2015-08-08 02:50:36 -04:00
|
|
|
println("shifting")
|
|
|
|
|
if (i == input.size)
|
|
|
|
|
return; // darn ambiguity
|
|
|
|
|
print("shifting on ")
|
|
|
|
|
println(input[i].to_string())
|
|
|
|
|
var next_shifts = stack< pair<*tree<int>, int> >()
|
|
|
|
|
var new_label = new<tree<symbol>>()->construct(input[i])
|
|
|
|
|
while (!to_shift.empty()) {
|
|
|
|
|
println("to_shift not empty")
|
|
|
|
|
var shift = to_shift.pop()
|
|
|
|
|
println("post pop")
|
|
|
|
|
var shift_to_node = gss.in_frontier(i+1, shift.second)
|
|
|
|
|
println("post in_frontier")
|
|
|
|
|
if (shift_to_node) {
|
|
|
|
|
print("already in frontier ")
|
|
|
|
|
println(i+1)
|
|
|
|
|
gram.parse_table.get_reduces(shift.second, input[i+1]).for_each(fun(action: action) {
|
|
|
|
|
var reduce_rule = gram.rules[action.state_or_rule]
|
|
|
|
|
if (!fully_reduces_to_null(reduce_rule)) {
|
|
|
|
|
to_reduce.push(reduction(shift.first, reduce_rule.lhs, reduce_rule.position,
|
|
|
|
|
new<tree<symbol>>()->construct(null_symbol()),
|
|
|
|
|
new_label ))
|
|
|
|
|
}
|
|
|
|
|
})
|
|
|
|
|
} else {
|
|
|
|
|
print("adding to frontier ")
|
|
|
|
|
println(i+1)
|
|
|
|
|
shift_to_node = gss.new_node(shift.second)
|
|
|
|
|
gss.add_to_frontier(i+1, shift_to_node)
|
|
|
|
|
println("post add to frontier")
|
|
|
|
|
gss.add_edge(shift_to_node, shift.first, new_label)
|
|
|
|
|
println("post add edger")
|
|
|
|
|
gram.parse_table.get(shift.second, input[i+1]).for_each(fun(action: action) {
|
|
|
|
|
println("looking at an action")
|
|
|
|
|
if (action.act == push) {
|
|
|
|
|
println("is push")
|
|
|
|
|
next_shifts.push(make_pair(shift_to_node, action.state_or_rule))
|
|
|
|
|
} else {
|
|
|
|
|
println("is reduce")
|
|
|
|
|
var action_rule = gram.rules[action.state_or_rule]
|
|
|
|
|
if (!fully_reduces_to_null(action_rule)) {
|
|
|
|
|
println("does not reduce to null")
|
|
|
|
|
to_reduce.push(reduction(shift.first, action_rule.lhs, action_rule.position,
|
|
|
|
|
new<tree<symbol>>()->construct(null_symbol()),
|
|
|
|
|
new_label ))
|
|
|
|
|
} else {
|
|
|
|
|
println("does reduce to null")
|
|
|
|
|
to_reduce.push(reduction(shift_to_node, action_rule.lhs, 0,
|
|
|
|
|
new<tree<symbol>>()->construct(null_symbol()),
|
|
|
|
|
null<tree<symbol>>() ))
|
|
|
|
|
}
|
|
|
|
|
}
|
|
|
|
|
})
|
|
|
|
|
}
|
|
|
|
|
}
|
|
|
|
|
to_shift = next_shifts
|
|
|
|
|
}
|
|
|
|
|
fun add_children(parent: *tree<symbol>, children: vector<*tree<symbol>>, nullable_parts: *tree<symbol>) {
|
2015-08-06 17:38:41 -04:00
|
|
|
}
|
|
|
|
|
|
|
|
|
|
fun fully_reduces_to_null(r: ref rule): bool {
|
|
|
|
|
return r.position == 0 && gram.first_vector(r.rhs).contains(null_symbol())
|
|
|
|
|
}
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
obj gss (Object) {
|
|
|
|
|
var data: vector<vector<*tree<int>>>
|
|
|
|
|
var edges: map< pair<*tree<int>, *tree<int>>, *tree<symbol> >
|
|
|
|
|
|
|
|
|
|
fun construct(): *gss {
|
|
|
|
|
data.construct()
|
|
|
|
|
edges.construct()
|
|
|
|
|
}
|
|
|
|
|
fun copy_construct(old: *gss) {
|
|
|
|
|
data.copy_construct(&old->data)
|
|
|
|
|
edges.copy_construct(&old->edges)
|
|
|
|
|
}
|
|
|
|
|
fun destruct() {
|
|
|
|
|
data.destruct()
|
|
|
|
|
edges.destruct()
|
|
|
|
|
}
|
|
|
|
|
fun clear() {
|
|
|
|
|
data.clear()
|
|
|
|
|
edges.clear()
|
|
|
|
|
}
|
|
|
|
|
fun new_node(state: int): *tree<int> {
|
|
|
|
|
return new<tree<int>>()->construct(state)
|
|
|
|
|
}
|
|
|
|
|
fun add_to_frontier(frontier: int, node: *tree<int>) {
|
|
|
|
|
while(data.size <= frontier)
|
|
|
|
|
data.addEnd(vector<*tree<int>>())
|
|
|
|
|
data[frontier].addEnd(node)
|
|
|
|
|
}
|
|
|
|
|
fun frontier_is_empty(frontier: int): bool {
|
|
|
|
|
return frontier >= data.size || data[frontier].size == 0
|
|
|
|
|
}
|
|
|
|
|
fun frontier_get_acc_state(frontier: int): *tree<int> {
|
|
|
|
|
// the accepting state is always state 1, for now
|
|
|
|
|
return in_frontier(frontier, 1)
|
|
|
|
|
}
|
|
|
|
|
fun in_frontier(frontier: int, state: int): *tree<int> {
|
2015-08-08 02:50:36 -04:00
|
|
|
if (frontier >= data.size)
|
|
|
|
|
return null<tree<int>>()
|
2015-08-06 17:38:41 -04:00
|
|
|
for (var i = 0; i < data[frontier].size; i++;)
|
|
|
|
|
if (data[frontier][i]->data == state)
|
|
|
|
|
return data[frontier][i]
|
|
|
|
|
return null<tree<int>>()
|
|
|
|
|
}
|
|
|
|
|
fun get_edge(start: *tree<int>, end: *tree<int>): *tree<symbol> {
|
|
|
|
|
return edges[make_pair(start, end)]
|
|
|
|
|
}
|
|
|
|
|
fun has_edge(start: *tree<int>, end: *tree<int>): bool {
|
|
|
|
|
// could also look in map, but this is faster...
|
|
|
|
|
return start->children.find(end) != -1
|
|
|
|
|
}
|
|
|
|
|
fun add_edge(start: *tree<int>, end: *tree<int>, edge: *tree<symbol>) {
|
|
|
|
|
start->children.add(end)
|
|
|
|
|
edges.set(make_pair(start,end), edge)
|
|
|
|
|
}
|
|
|
|
|
fun get_containing_frontier(node: *tree<int>): int {
|
|
|
|
|
for (var i = 0; i < data.size; i++;)
|
|
|
|
|
if (data[i].contains(node))
|
|
|
|
|
return i
|
|
|
|
|
return -1
|
|
|
|
|
}
|
|
|
|
|
fun get_reachable_paths(start: *tree<int>, length: int): vector<vector<*tree<int>>> {
|
|
|
|
|
var paths = vector<vector<*tree<int>>>()
|
|
|
|
|
var recursive_path_find: fun(*tree<int>, int, vector<*tree<int>>):void = fun(start: *tree<int>, length: int, current_path: vector<*tree<int>>) {
|
|
|
|
|
current_path.addEnd(start)
|
|
|
|
|
if (!length) {
|
|
|
|
|
paths.addEnd(current_path)
|
|
|
|
|
return
|
|
|
|
|
}
|
|
|
|
|
start->children.for_each(fun(child: *tree<int>) {
|
|
|
|
|
recursive_path_find(child, length-1, current_path)
|
|
|
|
|
})
|
2015-08-06 02:42:40 -04:00
|
|
|
}
|
2015-08-06 17:38:41 -04:00
|
|
|
recursive_path_find(start, length, vector<*tree<int>>())
|
|
|
|
|
return paths
|
2015-08-06 02:42:40 -04:00
|
|
|
}
|
|
|
|
|
}
|
|
|
|
|
|
2015-08-06 17:38:41 -04:00
|
|
|
fun reduction(f: *tree<int>, s: symbol, l: int, n: *tree<symbol>, label:*tree<symbol>): reduction {
|
2015-08-06 02:42:40 -04:00
|
|
|
var toRet.construct(f,s,l,n,label): reduction
|
|
|
|
|
return toRet
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
obj reduction (Object) {
|
2015-08-06 17:38:41 -04:00
|
|
|
var from: *tree<int>
|
|
|
|
|
var sym: symbol
|
2015-08-06 02:42:40 -04:00
|
|
|
var length: int
|
2015-08-06 17:38:41 -04:00
|
|
|
var nullable_parts: *tree<symbol>
|
|
|
|
|
var label: *tree<symbol>
|
2015-08-06 02:42:40 -04:00
|
|
|
|
2015-08-06 17:38:41 -04:00
|
|
|
fun construct(f: *tree<int>, s: symbol, l: int, n: *tree<symbol>, label:*tree<symbol>): *reduction {
|
2015-08-06 02:42:40 -04:00
|
|
|
from = f
|
|
|
|
|
sym.copy_construct(&s)
|
|
|
|
|
length = l
|
|
|
|
|
nullable_parts = n
|
|
|
|
|
label = label
|
|
|
|
|
return this
|
|
|
|
|
}
|
|
|
|
|
fun copy_construct(old: *reduction) {
|
|
|
|
|
from = old->from
|
|
|
|
|
sym.copy_construct(&old->sym)
|
|
|
|
|
length = old->length
|
|
|
|
|
nullable_parts = old->nullable_parts
|
|
|
|
|
label = old->label
|
|
|
|
|
}
|
|
|
|
|
fun operator=(other: reduction):void {
|
|
|
|
|
destruct()
|
|
|
|
|
copy_construct(&other)
|
|
|
|
|
}
|
|
|
|
|
fun destruct() {
|
|
|
|
|
sym.destruct()
|
|
|
|
|
}
|
|
|
|
|
}
|