More work, finishing the parse_input and lots of reducer

This commit is contained in:
Nathan Braswell
2015-08-06 17:38:41 -04:00
parent 1f119af8ad
commit 674e7e6538
13 changed files with 315 additions and 75 deletions

View File

@@ -1385,8 +1385,8 @@ NodeTree<ASTData>* ASTTransformation::generateThis(NodeTree<ASTData>* scope) {
std::vector<NodeTree<ASTData>*> ASTTransformation::scopeLookup(NodeTree<ASTData>* scope, std::string lookup, bool includeModules, std::set<NodeTree<ASTData>*> visited) {
std::cout << "Scp]|[e looking up " << lookup << std::endl;
std::cout << "current: " << scope->getDataRef()->toString() << std::endl;
for (auto i : scope->getDataRef()->scope)
std::cout << "\t" << i.first << std::endl;
//for (auto i : scope->getDataRef()->scope)
//std::cout << "\t" << i.first << std::endl;
//std::cout << i.first << " : " << i.second->toString() << std::endl;
// Don't visit this node again when looking for the smae lookup. Note that we don't prevent coming back for the scope operator, as that should be able to come back.
if (visited.find(scope) != visited.end())

View File

@@ -57,7 +57,7 @@ std::string CGenerator::generateClassStruct(NodeTree<ASTData>* from) {
std::string objectString = "struct __struct_dummy_" + scopePrefix(from) + CifyName(data.symbol.getName()) + "__ {\n";
tabLevel++;
for (int i = 0; i < children.size(); i++) {
std::cout << children[i]->getName() << std::endl;
//std::cout << children[i]->getName() << std::endl;
if (children[i]->getName() != "function")
objectString += tabs() + generate(children[i], nullptr).oneString() + "\n";
}
@@ -312,7 +312,7 @@ CCodeTriple CGenerator::generate(NodeTree<ASTData>* from, NodeTree<ASTData>* enc
}
//If we're in an object method, and our enclosing scope is that object, we're a member of the object and should use the this reference.
if (enclosingObject && enclosingObject->getDataRef()->scope.find(data.symbol.getName()) != enclosingObject->getDataRef()->scope.end())
preName += "this->";
preName = "(" + preName + "this)->"; // incase this is a closed over this that is referencing another thing (I think this happens for a.b when a is supposed to be closed over but isn't)
// dereference references, but only if inside a function and not if this is a closed over variable
if (enclosingFunction && data.valueType->is_reference && !closed) {
preName += "(*";
@@ -417,12 +417,10 @@ CCodeTriple CGenerator::generate(NodeTree<ASTData>* from, NodeTree<ASTData>* enc
// If it's a block, because it's also a statement a semicolon will be emitted even though
// we don't want it to be, as if (a) {b}; else {c}; is not legal C, but if (a) {b} else {c}; is.
if (children[1]->getChildren()[0]->getDataRef()->type == code_block) {
std::cout << "Then statement is a block, emitting the block not the statement so no trailing semicolon" << std::endl;
output += generate(children[1]->getChildren()[0], enclosingObject, justFuncName, enclosingFunction).oneString();
} else {
// ALSO we always emit blocks now, to handle cases like defer when several statements need to be
// run in C even though it is a single Kraken statement
std::cout << "Then statement is a simple statement, regular emitting the statement so trailing semicolon" << std::endl;
output += "{ " + generate(children[1], enclosingObject, justFuncName, enclosingFunction).oneString() + " }";
}
// Always emit blocks here too
@@ -685,11 +683,11 @@ CCodeTriple CGenerator::generate(NodeTree<ASTData>* from, NodeTree<ASTData>* enc
//The comma lets the upper function call know we already started the param list
//Note that we got here from a function call. We just pass up this special case and let them finish with the perentheses
} else {
std::cout << "Is not in scope or not type" << std::endl;
//std::cout << "Is not in scope or not type" << std::endl;
return "((" + generate(children[1], enclosingObject, true, enclosingFunction) + ")" + name + functionName + ")";
}
} else {
std::cout << "Is not in scope or not type" << std::endl;
//std::cout << "Is not in scope or not type" << std::endl;
return "((" + generate(children[1], enclosingObject, true, enclosingFunction) + ")" + name + functionName + ")";
}
} else {

View File

@@ -199,7 +199,7 @@ std::string Type::toString(bool showTraits) {
if (is_reference)
typeString = "ref " + typeString;
for (int i = 0; i < indirection; i++)
typeString += "*";
typeString = "*" + typeString;
if (indirection < 0)
typeString += "negative indirection: " + intToString(indirection);
if (traits.size() && showTraits) {

View File

@@ -475,31 +475,48 @@ obj table (Object) {
while (include_state >= items.size)
items.addEnd(map::map<symbol::symbol, vector::vector<action>>())
}
// we always "clean" the symbol before using it so that having different data doesn't
// prevent us from finding the symbol in the table
fun clean_symbol(sym: ref symbol::symbol): symbol::symbol {
return symbol::symbol(sym.name, sym.terminal)
}
fun add_push(from_state: int, on_symbol: ref symbol::symbol, to_state: int) {
expand_to(from_state)
if (items[from_state].contains_key(on_symbol))
items[from_state][on_symbol].addEnd(action(push, to_state))
var cleaned_symbol = clean_symbol(on_symbol)
if (items[from_state].contains_key(cleaned_symbol))
items[from_state][cleaned_symbol].addEnd(action(push, to_state))
else
items[from_state].set(on_symbol, vector::vector(action(push, to_state)))
items[from_state].set(cleaned_symbol, vector::vector(action(push, to_state)))
}
fun add_reduce(from_state: int, on_symbol: ref symbol::symbol, by_rule_no: int) {
expand_to(from_state)
if (items[from_state].contains_key(on_symbol))
items[from_state][on_symbol].addEnd(action(reduce, by_rule_no))
var cleaned_symbol = clean_symbol(on_symbol)
if (items[from_state].contains_key(cleaned_symbol))
items[from_state][cleaned_symbol].addEnd(action(reduce, by_rule_no))
else
items[from_state].set(on_symbol, vector::vector(action(reduce, by_rule_no)))
items[from_state].set(cleaned_symbol, vector::vector(action(reduce, by_rule_no)))
}
fun add_accept(from_state: int, on_symbol: ref symbol::symbol) {
expand_to(from_state)
if (items[from_state].contains_key(on_symbol))
items[from_state][on_symbol].addEnd(action(accept, 0))
var cleaned_symbol = clean_symbol(on_symbol)
if (items[from_state].contains_key(cleaned_symbol))
items[from_state][cleaned_symbol].addEnd(action(accept, 0))
else
items[from_state].set(on_symbol, vector::vector(action(accept, 0)))
items[from_state].set(cleaned_symbol, vector::vector(action(accept, 0)))
}
fun get(state: int, sym: symbol::symbol): vector::vector<action> {
return items[state][sym]
fun get(state: int, on_symbol: symbol::symbol): vector::vector<action> {
var cleaned_symbol = clean_symbol(on_symbol)
return items[state][cleaned_symbol]
}
fun print_string(): string::string {
fun get_shift(state: int, on_symbol: symbol::symbol): action {
var actions = get(state, on_symbol)
for (var i = 0; i < actions.size; i++;)
if (actions[i].act == push)
return actions[i]
io::println("tried to get a shift when none existed")
return action(-1,-1)
}
fun print_string() {
/*return string::string("woo a table of size: ") + items.size*/
io::print("woo a table of size: ")
io::println(items.size)

View File

@@ -62,13 +62,13 @@ obj lexer (Object) {
}
fun next(): symbol::symbol {
if (position >= input.length())
return symbol::symbol("$EOF$", true)
return symbol::eof_symbol()
var max = regs.map(fun(reg_pair: util::pair<string::string,regex::regex>): util::pair<int, string::string> {
return util::make_pair(reg_pair.second.long_match(input.slice(position, -1)), reg_pair.first); })
.max(fun(first: util::pair<int, string::string>, second: util::pair<int, string::string>): bool
{ return first.first < second.first; })
if (max.first < 0)
return symbol::symbol("$INVALID$", true)
return symbol::invalid_symbol()
position += max.first
return symbol::symbol(max.second, true, input.slice(position-max.first, position))
}

View File

@@ -7,6 +7,14 @@ __if_comp__ __C__ simple_passthrough """
/* we have a template versions so we don't have to cast (because we don't have that yet) */
fun null<T>(): *T {
__if_comp__ __C__ {
simple_passthrough(::) """
return (void*)0;
"""
}
}
fun malloc<T>(size: int): *T {
var memPtr: *T;
__if_comp__ __C__ {

View File

@@ -1,26 +1,28 @@
import grammer
import symbol
import tree
import vector
import stack
import map
import util
import string
import io
import mem
import grammer:*
import symbol:*
import lexer:*
import tree:*
import vector:*
import stack:*
import map:*
import util:*
import string:*
import mem:*
import io:*
obj parser (Object) {
var input: vector::vector<symbol::symbol>
var gram: grammer::grammer
// gss
var to_reduce: stack::stack<reduction>
var to_shift: stack::stack< util::pair<*tree::tree<int>, int> >
var SPPFStepNodes: vector::vector< util::pair<*tree::tree<symbol::symbol>, int> >
var packed_map: map::map<*tree::tree<symbol::symbol>, bool>
var input: vector<symbol>
var gram: grammer
var gss: gss
var to_reduce: stack<reduction>
var to_shift: stack< pair<*tree<int>, int> >
var SPPFStepNodes: vector< pair<*tree<symbol>, int> >
var packed_map: map<*tree<symbol>, bool>
fun construct(grammerIn: grammer::grammer): *parser {
fun construct(grammerIn: grammer): *parser {
input.construct()
gram.copy_construct(&grammerIn)
gss.construct()
to_reduce.construct()
to_shift.construct()
SPPFStepNodes.construct()
@@ -30,6 +32,7 @@ obj parser (Object) {
fun copy_construct(old: *parser) {
input.copy_construct(&old->input)
gram.copy_construct(&old->gram)
gss.copy_construct(&old->gss)
to_reduce.copy_construct(&old->to_reduce)
to_shift.copy_construct(&old->to_shift)
SPPFStepNodes.copy_construct(&old->SPPFStepNodes)
@@ -42,15 +45,16 @@ obj parser (Object) {
fun destruct() {
input.destruct()
gram.destruct()
gss.destruct()
to_reduce.destruct()
to_shift.destruct()
SPPFStepNodes.destruct()
packed_map.destruct()
}
fun parse_input(inputStr: string::string, name: string::string): *tree::tree<symbol::symbol> {
fun parse_input(inputStr: string, name: string): *tree<symbol> {
input.clear()
// gss.clear
gss.clear()
to_reduce.clear()
to_shift.clear()
SPPFStepNodes.clear()
@@ -58,31 +62,215 @@ obj parser (Object) {
// if the zero state contains any reductions for state 0 and eof, then
// it must be reducing to the goal state
io::println("checking the bidness")
if (inputStr == "" && gram.parse_table.get(0, symbol::eof_symbol()).contains(grammer::action(grammer::reduce, 0))) {
io::println("Accept on no input for ")
io::println(name)
return mem::new<tree::tree<symbol::symbol>>()->construct(symbol::null_symbol())
println("checking the bidness")
if (inputStr == "" && gram.parse_table.get(0, eof_symbol()).contains(action(reduce, 0))) {
println("Accept on no input for ")
println(name)
return new<tree<symbol>>()->construct(null_symbol())
}
io::println("failed for ")
io::println(name)
return mem::new<tree::tree<symbol::symbol>>()->construct(symbol::null_symbol())
var lex = lexer(gram.terminals)
lex.set_input(inputStr)
var current_symbol.construct(): symbol
for (current_symbol = lex.next(); current_symbol != eof_symbol() && current_symbol != invalid_symbol(); current_symbol = lex.next();) {
/*println("current_symbol is ")*/
/*println(current_symbol.to_string())*/
input.addEnd(current_symbol)
}
if (current_symbol == invalid_symbol()) {
println("lexing failed for ")
println(name)
return null<tree<symbol>>()
}
var v0 = gss.new_node(0)
gss.add_to_frontier(0, v0)
var null_symbol_tree = null<tree<symbol>>()
/*println("looking up")*/
/*println(input[0].to_string())*/
gram.parse_table.get(0, input[0]).for_each(fun(act: action) {
/*println("for each action")*/
if (act.act == push)
to_shift.push(make_pair(v0, act.state_or_rule))
else if (act.act == reduce && fully_reduces_to_null(gram.rules[act.state_or_rule]))
to_reduce.push(reduction(v0, gram.rules[act.state_or_rule].lhs, 0, null_symbol_tree, null_symbol_tree))
})
for (var i = 0; i < input.size; i++;) {
if (gss.frontier_is_empty(i)) {
print(i)
print(" frontier is empty in file '")
print(name)
print("' with txt ")
print(input[i].to_string())
println()
return null<tree<symbol>>()
}
SPPFStepNodes.clear()
while (to_reduce.size())
reducer(i)
shifter(i)
}
var acc_state = gss.frontier_get_acc_state(input.size-1)
if (acc_state) {
println("ACCEPTED!")
return gss.get_edge(acc_state, v0)
}
println("REJECTED")
println("parsing (not lexing) failed for ")
println(name)
return null<tree<symbol>>()
}
fun reducer(i: int) {
println("reducing")
var curr_reduction = to_reduce.pop()
gss.get_reachable_paths(curr_reduction.from, max(0, curr_reduction.length-1)).
for_each(fun(path: ref vector<*tree<int>>) {
var path_edges = range(path.size-1).map(fun(indx: int): *tree<symbol> { return gss.get_edge(path[indx], path[indx+1]);}).reverse()
if (curr_reduction.length != 0)
path_edges.addEnd(curr_reduction.label)
var curr_reached = path.last()
var shift_to = gram.parse_table.get_shift(curr_reached->data, curr_reduction.sym).state_or_rule
var new_label = null<tree<symbol>>()
if (curr_reduction.length == 0) {
new_label = curr_reduction.nullable_parts
} else {
var reached_frontier = gss.get_containing_frontier(curr_reached)
for (var j = 0; j < SPPFStepNodes.size; j++;) {
if (SPPFStepNodes[j].second == reached_frontier
&& SPPFStepNodes[j].first->data == curr_reduction.sym) {
new_label = SPPFStepNodes[j].first
break
}
}
if (!new_label) {
new_label = new<tree<symbol>>()->construct(curr_reduction.sym)
SPPFStepNodes.addEnd(make_pair(new_label, reached_frontier))
}
}
var shift_to_node = gss.in_frontier(i, shift_to)
if (shift_to_node) {
if (!gss.has_edge(shift_to_node, curr_reached)) {
gss.add_edge(shift_to_node, curr_reached, new_label)
// do non-null reductions
if (curr_reduction.length) {
gram.parse_table.get(shift_to, input[i]).for_each(fun(act: action) {
var reduce_rule = gram.rules[act.state_or_rule]
if (act.act == reduce && !fully_reduces_to_null(reduce_rule))
to_reduce.push(reduction(curr_reached, reduce_rule.lhs,
reduce_rule.position,
new<tree<symbol>>()->construct(null_symbol()),
new_label))
})
}
}
} else {
shift_to_node = gss.new_node(shift_to)
gss.add_to_frontier(i, shift_to_node)
gss.add_edge(shift_to_node, curr_reached, new_label)
}
})
}
fun shifter(i: int) {
}
fun fully_reduces_to_null(r: ref rule): bool {
return r.position == 0 && gram.first_vector(r.rhs).contains(null_symbol())
}
}
fun reduction(f: *tree::tree<int>, s: symbol::symbol, l: int, n: *tree::tree<symbol::symbol>, label:*tree::tree<symbol::symbol>): reduction {
obj gss (Object) {
var data: vector<vector<*tree<int>>>
var edges: map< pair<*tree<int>, *tree<int>>, *tree<symbol> >
fun construct(): *gss {
data.construct()
edges.construct()
}
fun copy_construct(old: *gss) {
data.copy_construct(&old->data)
edges.copy_construct(&old->edges)
}
fun destruct() {
data.destruct()
edges.destruct()
}
fun clear() {
data.clear()
edges.clear()
}
fun new_node(state: int): *tree<int> {
return new<tree<int>>()->construct(state)
}
fun add_to_frontier(frontier: int, node: *tree<int>) {
while(data.size <= frontier)
data.addEnd(vector<*tree<int>>())
data[frontier].addEnd(node)
}
fun frontier_is_empty(frontier: int): bool {
return frontier >= data.size || data[frontier].size == 0
}
fun frontier_get_acc_state(frontier: int): *tree<int> {
// the accepting state is always state 1, for now
return in_frontier(frontier, 1)
}
fun in_frontier(frontier: int, state: int): *tree<int> {
for (var i = 0; i < data[frontier].size; i++;)
if (data[frontier][i]->data == state)
return data[frontier][i]
return null<tree<int>>()
}
fun get_edge(start: *tree<int>, end: *tree<int>): *tree<symbol> {
return edges[make_pair(start, end)]
}
fun has_edge(start: *tree<int>, end: *tree<int>): bool {
// could also look in map, but this is faster...
return start->children.find(end) != -1
}
fun add_edge(start: *tree<int>, end: *tree<int>, edge: *tree<symbol>) {
start->children.add(end)
edges.set(make_pair(start,end), edge)
}
fun get_containing_frontier(node: *tree<int>): int {
for (var i = 0; i < data.size; i++;)
if (data[i].contains(node))
return i
return -1
}
fun get_reachable_paths(start: *tree<int>, length: int): vector<vector<*tree<int>>> {
var paths = vector<vector<*tree<int>>>()
var recursive_path_find: fun(*tree<int>, int, vector<*tree<int>>):void = fun(start: *tree<int>, length: int, current_path: vector<*tree<int>>) {
current_path.addEnd(start)
if (!length) {
paths.addEnd(current_path)
return
}
start->children.for_each(fun(child: *tree<int>) {
recursive_path_find(child, length-1, current_path)
})
}
recursive_path_find(start, length, vector<*tree<int>>())
return paths
}
}
fun reduction(f: *tree<int>, s: symbol, l: int, n: *tree<symbol>, label:*tree<symbol>): reduction {
var toRet.construct(f,s,l,n,label): reduction
return toRet
}
obj reduction (Object) {
var from: *tree::tree<int>
var sym: symbol::symbol
var from: *tree<int>
var sym: symbol
var length: int
var nullable_parts: *tree::tree<symbol::symbol>
var label: *tree::tree<symbol::symbol>
var nullable_parts: *tree<symbol>
var label: *tree<symbol>
fun construct(f: *tree::tree<int>, s: symbol::symbol, l: int, n: *tree::tree<symbol::symbol>, label:*tree::tree<symbol::symbol>): *reduction {
fun construct(f: *tree<int>, s: symbol, l: int, n: *tree<symbol>, label:*tree<symbol>): *reduction {
from = f
sym.copy_construct(&s)
length = l

View File

@@ -8,6 +8,10 @@ fun eof_symbol(): symbol {
var toRet.construct(string::string("$EOF$"), false, string::string("$EOF$")): symbol
return toRet
}
fun invalid_symbol(): symbol {
var toRet.construct(string::string("$INVALID$"), false, string::string("$INVALID$")): symbol
return toRet
}
fun symbol(nameIn: *char, terminalIn: bool): symbol {
var toRet.construct(string::string(nameIn), terminalIn, string::string("no_value")): symbol

View File

@@ -1,12 +1,13 @@
import mem
import vector
fun greater<T>(a: T, b: T): T {
fun max<T>(a: T, b: T): T {
if (a > b)
return a;
return b;
}
fun lesser<T>(a: T, b: T): T {
fun min<T>(a: T, b: T): T {
if (a > b)
return b;
return a;
@@ -42,6 +43,12 @@ obj pair<T,U> (Object) {
mem::maybe_destruct(&first)
mem::maybe_destruct(&second)
}
// the old unnecessary template to prevent generation
// if not used trick (in this case, changing out U with V)
fun operator==<V>(other: ref pair<T,V>): bool {
return first == other.first && second == other.second
}
}
fun range(end:int): range {
@@ -71,6 +78,12 @@ obj range {
for (var i = begin; i < end; i+= step;)
func(i)
}
fun map<T>(func: fun(int): T): vector::vector<T> {
var ret.construct( (end-begin)/step + 1 ) : vector::vector<T>
for (var i = begin; i < end; i+= step;)
ret.addEnd(func(i))
return ret
}
fun any_true(func: fun(int):bool):bool {
for (var i = begin; i < end; i+= step;)
if (func(i))

View File

@@ -70,22 +70,28 @@ obj vector<T> (Object) {
}
fun clone(): vector<T> {
var newVec.construct(): vector<T>
var newVec.construct(size): vector<T>
for (var i = 0; i < size; i++;)
newVec.addEnd(data[i])
return newVec
}
fun reverse(): vector<T> {
var newVec.construct(size): vector<T>
for (var i = 0; i < size; i++;)
newVec.addEnd(data[(size-i)-1])
return newVec
}
fun resize(newSize: int): bool {
var newData: *T = new<T>(newSize);
if (!newData)
return false;
for (var i: int = 0; i < lesser<int>(size, newSize); i++;)
for (var i: int = 0; i < min<int>(size, newSize); i++;)
maybe_copy_construct(&newData[i], &data[i]);
delete(data, size);
data = newData;
available = newSize;
size = lesser(size, newSize)
size = min(size, newSize)
return true;
}
@@ -102,6 +108,12 @@ obj vector<T> (Object) {
fun at(index: int): ref T { return get(index); }
fun operator[](index: int): ref T { return get(index); }
fun first(): ref T {
return get(0)
}
fun last(): ref T {
return get(size-1)
}
fun get(index: int): ref T {
if (index < 0 || index >= size) {
println("Vector access out of bounds! Retuning 0th element as sanest option");
@@ -192,13 +204,13 @@ obj vector<T> (Object) {
data[i] = func(data[i])
}
fun map<U>(func: fun(T):U):vector<U> {
var newVec.construct(): vector<U>
var newVec.construct(size): vector<U>
for (var i = 0; i < size; i++;)
newVec.addEnd(func(data[i]))
return newVec
}
fun flatten_map<U>(func: fun(T):vector<U>):vector<U> {
var newVec.construct(): vector<U>
var newVec.construct(size): vector<U>
for (var i = 0; i < size; i++;) {
var to_add = func(data[i])
for (var j = 0; j < to_add.size; j++;)

View File

@@ -46,12 +46,12 @@ fun main():int {
/*returndaaaaaaaaaaaaaa"))*/
//lex.set_input(string("hibyed"))
println("woo lexing:")
//range(8).for_each(fun(i: int) { println(lex.next().to_string()); } )
/*range(8).for_each(fun(i: int) { println(lex.next().to_string()); } )*/
/*range(80).for_each(fun(i: int) { println(lex.next().to_string()); } )*/
println(a.to_string())
a.calculate_state_automaton()
var parse.construct(a): parser
var result = parse.parse_input(string(""), string("fun name"))
var result = parse.parse_input(string("ad"), string("fun name"))
/*var parse.construct(): parser*/
return 0
}

View File

@@ -3,20 +3,20 @@ test: test true
old contributed tests
b: b true
b: b true
$EOF$: no_value true
$EOF$: $EOF$ false
a*: aaa true
b: b true
a*: aa true
b: b true
b: b true
$EOF$: no_value true
$EOF$: $EOF$ false
a|b: b true
$INVALID$: no_value true
$INVALID$: $INVALID$ false
xyzzy: xyzzy true
$EOF$: no_value true
$EOF$: $EOF$ false
(i|n|t|e)+: intent true
$EOF$: no_value true
$EOF$: $EOF$ false

View File

@@ -26,10 +26,10 @@ obj test(Object) {
}
fun main():int {
println(lesser(1,2))
println(lesser(7.0,8.0))
println(greater(1,2))
println(greater(7.0,8.0))
println(min(1,2))
println(min(7.0,8.0))
println(max(1,2))
println(max(7.0,8.0))
range(3,13, 3).for_each(fun(i: int) { print(i); })
println()