diff --git a/src/ASTTransformation.cpp b/src/ASTTransformation.cpp index 0d8a125..55f7b3d 100644 --- a/src/ASTTransformation.cpp +++ b/src/ASTTransformation.cpp @@ -1385,8 +1385,8 @@ NodeTree* ASTTransformation::generateThis(NodeTree* scope) { std::vector*> ASTTransformation::scopeLookup(NodeTree* scope, std::string lookup, bool includeModules, std::set*> visited) { std::cout << "Scp]|[e looking up " << lookup << std::endl; std::cout << "current: " << scope->getDataRef()->toString() << std::endl; - for (auto i : scope->getDataRef()->scope) - std::cout << "\t" << i.first << std::endl; + //for (auto i : scope->getDataRef()->scope) + //std::cout << "\t" << i.first << std::endl; //std::cout << i.first << " : " << i.second->toString() << std::endl; // Don't visit this node again when looking for the smae lookup. Note that we don't prevent coming back for the scope operator, as that should be able to come back. if (visited.find(scope) != visited.end()) diff --git a/src/CGenerator.cpp b/src/CGenerator.cpp index 9825551..6e5a026 100644 --- a/src/CGenerator.cpp +++ b/src/CGenerator.cpp @@ -57,7 +57,7 @@ std::string CGenerator::generateClassStruct(NodeTree* from) { std::string objectString = "struct __struct_dummy_" + scopePrefix(from) + CifyName(data.symbol.getName()) + "__ {\n"; tabLevel++; for (int i = 0; i < children.size(); i++) { - std::cout << children[i]->getName() << std::endl; + //std::cout << children[i]->getName() << std::endl; if (children[i]->getName() != "function") objectString += tabs() + generate(children[i], nullptr).oneString() + "\n"; } @@ -312,7 +312,7 @@ CCodeTriple CGenerator::generate(NodeTree* from, NodeTree* enc } //If we're in an object method, and our enclosing scope is that object, we're a member of the object and should use the this reference. if (enclosingObject && enclosingObject->getDataRef()->scope.find(data.symbol.getName()) != enclosingObject->getDataRef()->scope.end()) - preName += "this->"; + preName = "(" + preName + "this)->"; // incase this is a closed over this that is referencing another thing (I think this happens for a.b when a is supposed to be closed over but isn't) // dereference references, but only if inside a function and not if this is a closed over variable if (enclosingFunction && data.valueType->is_reference && !closed) { preName += "(*"; @@ -417,12 +417,10 @@ CCodeTriple CGenerator::generate(NodeTree* from, NodeTree* enc // If it's a block, because it's also a statement a semicolon will be emitted even though // we don't want it to be, as if (a) {b}; else {c}; is not legal C, but if (a) {b} else {c}; is. if (children[1]->getChildren()[0]->getDataRef()->type == code_block) { - std::cout << "Then statement is a block, emitting the block not the statement so no trailing semicolon" << std::endl; output += generate(children[1]->getChildren()[0], enclosingObject, justFuncName, enclosingFunction).oneString(); } else { // ALSO we always emit blocks now, to handle cases like defer when several statements need to be // run in C even though it is a single Kraken statement - std::cout << "Then statement is a simple statement, regular emitting the statement so trailing semicolon" << std::endl; output += "{ " + generate(children[1], enclosingObject, justFuncName, enclosingFunction).oneString() + " }"; } // Always emit blocks here too @@ -685,11 +683,11 @@ CCodeTriple CGenerator::generate(NodeTree* from, NodeTree* enc //The comma lets the upper function call know we already started the param list //Note that we got here from a function call. We just pass up this special case and let them finish with the perentheses } else { - std::cout << "Is not in scope or not type" << std::endl; + //std::cout << "Is not in scope or not type" << std::endl; return "((" + generate(children[1], enclosingObject, true, enclosingFunction) + ")" + name + functionName + ")"; } } else { - std::cout << "Is not in scope or not type" << std::endl; + //std::cout << "Is not in scope or not type" << std::endl; return "((" + generate(children[1], enclosingObject, true, enclosingFunction) + ")" + name + functionName + ")"; } } else { diff --git a/src/Type.cpp b/src/Type.cpp index e33cee2..3da76a6 100644 --- a/src/Type.cpp +++ b/src/Type.cpp @@ -199,7 +199,7 @@ std::string Type::toString(bool showTraits) { if (is_reference) typeString = "ref " + typeString; for (int i = 0; i < indirection; i++) - typeString += "*"; + typeString = "*" + typeString; if (indirection < 0) typeString += "negative indirection: " + intToString(indirection); if (traits.size() && showTraits) { diff --git a/stdlib/grammer.krak b/stdlib/grammer.krak index 65f3bf1..0137ce4 100644 --- a/stdlib/grammer.krak +++ b/stdlib/grammer.krak @@ -475,31 +475,48 @@ obj table (Object) { while (include_state >= items.size) items.addEnd(map::map>()) } + // we always "clean" the symbol before using it so that having different data doesn't + // prevent us from finding the symbol in the table + fun clean_symbol(sym: ref symbol::symbol): symbol::symbol { + return symbol::symbol(sym.name, sym.terminal) + } fun add_push(from_state: int, on_symbol: ref symbol::symbol, to_state: int) { expand_to(from_state) - if (items[from_state].contains_key(on_symbol)) - items[from_state][on_symbol].addEnd(action(push, to_state)) + var cleaned_symbol = clean_symbol(on_symbol) + if (items[from_state].contains_key(cleaned_symbol)) + items[from_state][cleaned_symbol].addEnd(action(push, to_state)) else - items[from_state].set(on_symbol, vector::vector(action(push, to_state))) + items[from_state].set(cleaned_symbol, vector::vector(action(push, to_state))) } fun add_reduce(from_state: int, on_symbol: ref symbol::symbol, by_rule_no: int) { expand_to(from_state) - if (items[from_state].contains_key(on_symbol)) - items[from_state][on_symbol].addEnd(action(reduce, by_rule_no)) + var cleaned_symbol = clean_symbol(on_symbol) + if (items[from_state].contains_key(cleaned_symbol)) + items[from_state][cleaned_symbol].addEnd(action(reduce, by_rule_no)) else - items[from_state].set(on_symbol, vector::vector(action(reduce, by_rule_no))) + items[from_state].set(cleaned_symbol, vector::vector(action(reduce, by_rule_no))) } fun add_accept(from_state: int, on_symbol: ref symbol::symbol) { expand_to(from_state) - if (items[from_state].contains_key(on_symbol)) - items[from_state][on_symbol].addEnd(action(accept, 0)) + var cleaned_symbol = clean_symbol(on_symbol) + if (items[from_state].contains_key(cleaned_symbol)) + items[from_state][cleaned_symbol].addEnd(action(accept, 0)) else - items[from_state].set(on_symbol, vector::vector(action(accept, 0))) + items[from_state].set(cleaned_symbol, vector::vector(action(accept, 0))) } - fun get(state: int, sym: symbol::symbol): vector::vector { - return items[state][sym] + fun get(state: int, on_symbol: symbol::symbol): vector::vector { + var cleaned_symbol = clean_symbol(on_symbol) + return items[state][cleaned_symbol] } - fun print_string(): string::string { + fun get_shift(state: int, on_symbol: symbol::symbol): action { + var actions = get(state, on_symbol) + for (var i = 0; i < actions.size; i++;) + if (actions[i].act == push) + return actions[i] + io::println("tried to get a shift when none existed") + return action(-1,-1) + } + fun print_string() { /*return string::string("woo a table of size: ") + items.size*/ io::print("woo a table of size: ") io::println(items.size) diff --git a/stdlib/lexer.krak b/stdlib/lexer.krak index 7cf6cda..d966a0d 100644 --- a/stdlib/lexer.krak +++ b/stdlib/lexer.krak @@ -62,13 +62,13 @@ obj lexer (Object) { } fun next(): symbol::symbol { if (position >= input.length()) - return symbol::symbol("$EOF$", true) + return symbol::eof_symbol() var max = regs.map(fun(reg_pair: util::pair): util::pair { return util::make_pair(reg_pair.second.long_match(input.slice(position, -1)), reg_pair.first); }) .max(fun(first: util::pair, second: util::pair): bool { return first.first < second.first; }) if (max.first < 0) - return symbol::symbol("$INVALID$", true) + return symbol::invalid_symbol() position += max.first return symbol::symbol(max.second, true, input.slice(position-max.first, position)) } diff --git a/stdlib/mem.krak b/stdlib/mem.krak index 5299652..23da7cc 100644 --- a/stdlib/mem.krak +++ b/stdlib/mem.krak @@ -7,6 +7,14 @@ __if_comp__ __C__ simple_passthrough """ /* we have a template versions so we don't have to cast (because we don't have that yet) */ +fun null(): *T { + __if_comp__ __C__ { + simple_passthrough(::) """ + return (void*)0; + """ + } +} + fun malloc(size: int): *T { var memPtr: *T; __if_comp__ __C__ { diff --git a/stdlib/parser.krak b/stdlib/parser.krak index 9b50a7a..9d13d0d 100644 --- a/stdlib/parser.krak +++ b/stdlib/parser.krak @@ -1,26 +1,28 @@ -import grammer -import symbol -import tree -import vector -import stack -import map -import util -import string -import io -import mem +import grammer:* +import symbol:* +import lexer:* +import tree:* +import vector:* +import stack:* +import map:* +import util:* +import string:* +import mem:* +import io:* obj parser (Object) { - var input: vector::vector - var gram: grammer::grammer - // gss - var to_reduce: stack::stack - var to_shift: stack::stack< util::pair<*tree::tree, int> > - var SPPFStepNodes: vector::vector< util::pair<*tree::tree, int> > - var packed_map: map::map<*tree::tree, bool> + var input: vector + var gram: grammer + var gss: gss + var to_reduce: stack + var to_shift: stack< pair<*tree, int> > + var SPPFStepNodes: vector< pair<*tree, int> > + var packed_map: map<*tree, bool> - fun construct(grammerIn: grammer::grammer): *parser { + fun construct(grammerIn: grammer): *parser { input.construct() gram.copy_construct(&grammerIn) + gss.construct() to_reduce.construct() to_shift.construct() SPPFStepNodes.construct() @@ -30,6 +32,7 @@ obj parser (Object) { fun copy_construct(old: *parser) { input.copy_construct(&old->input) gram.copy_construct(&old->gram) + gss.copy_construct(&old->gss) to_reduce.copy_construct(&old->to_reduce) to_shift.copy_construct(&old->to_shift) SPPFStepNodes.copy_construct(&old->SPPFStepNodes) @@ -42,15 +45,16 @@ obj parser (Object) { fun destruct() { input.destruct() gram.destruct() + gss.destruct() to_reduce.destruct() to_shift.destruct() SPPFStepNodes.destruct() packed_map.destruct() } - fun parse_input(inputStr: string::string, name: string::string): *tree::tree { + fun parse_input(inputStr: string, name: string): *tree { input.clear() - // gss.clear + gss.clear() to_reduce.clear() to_shift.clear() SPPFStepNodes.clear() @@ -58,31 +62,215 @@ obj parser (Object) { // if the zero state contains any reductions for state 0 and eof, then // it must be reducing to the goal state - io::println("checking the bidness") - if (inputStr == "" && gram.parse_table.get(0, symbol::eof_symbol()).contains(grammer::action(grammer::reduce, 0))) { - io::println("Accept on no input for ") - io::println(name) - return mem::new>()->construct(symbol::null_symbol()) + println("checking the bidness") + if (inputStr == "" && gram.parse_table.get(0, eof_symbol()).contains(action(reduce, 0))) { + println("Accept on no input for ") + println(name) + return new>()->construct(null_symbol()) } - io::println("failed for ") - io::println(name) - return mem::new>()->construct(symbol::null_symbol()) + + var lex = lexer(gram.terminals) + lex.set_input(inputStr) + var current_symbol.construct(): symbol + for (current_symbol = lex.next(); current_symbol != eof_symbol() && current_symbol != invalid_symbol(); current_symbol = lex.next();) { + /*println("current_symbol is ")*/ + /*println(current_symbol.to_string())*/ + input.addEnd(current_symbol) + } + if (current_symbol == invalid_symbol()) { + println("lexing failed for ") + println(name) + return null>() + } + + var v0 = gss.new_node(0) + gss.add_to_frontier(0, v0) + + var null_symbol_tree = null>() + + /*println("looking up")*/ + /*println(input[0].to_string())*/ + gram.parse_table.get(0, input[0]).for_each(fun(act: action) { + /*println("for each action")*/ + if (act.act == push) + to_shift.push(make_pair(v0, act.state_or_rule)) + else if (act.act == reduce && fully_reduces_to_null(gram.rules[act.state_or_rule])) + to_reduce.push(reduction(v0, gram.rules[act.state_or_rule].lhs, 0, null_symbol_tree, null_symbol_tree)) + }) + + + for (var i = 0; i < input.size; i++;) { + if (gss.frontier_is_empty(i)) { + print(i) + print(" frontier is empty in file '") + print(name) + print("' with txt ") + print(input[i].to_string()) + println() + return null>() + } + SPPFStepNodes.clear() + while (to_reduce.size()) + reducer(i) + shifter(i) + } + var acc_state = gss.frontier_get_acc_state(input.size-1) + if (acc_state) { + println("ACCEPTED!") + return gss.get_edge(acc_state, v0) + } + + println("REJECTED") + println("parsing (not lexing) failed for ") + println(name) + return null>() + } + fun reducer(i: int) { + println("reducing") + var curr_reduction = to_reduce.pop() + gss.get_reachable_paths(curr_reduction.from, max(0, curr_reduction.length-1)). + for_each(fun(path: ref vector<*tree>) { + var path_edges = range(path.size-1).map(fun(indx: int): *tree { return gss.get_edge(path[indx], path[indx+1]);}).reverse() + if (curr_reduction.length != 0) + path_edges.addEnd(curr_reduction.label) + var curr_reached = path.last() + var shift_to = gram.parse_table.get_shift(curr_reached->data, curr_reduction.sym).state_or_rule + var new_label = null>() + if (curr_reduction.length == 0) { + new_label = curr_reduction.nullable_parts + } else { + var reached_frontier = gss.get_containing_frontier(curr_reached) + for (var j = 0; j < SPPFStepNodes.size; j++;) { + if (SPPFStepNodes[j].second == reached_frontier + && SPPFStepNodes[j].first->data == curr_reduction.sym) { + new_label = SPPFStepNodes[j].first + break + } + } + if (!new_label) { + new_label = new>()->construct(curr_reduction.sym) + SPPFStepNodes.addEnd(make_pair(new_label, reached_frontier)) + } + } + var shift_to_node = gss.in_frontier(i, shift_to) + if (shift_to_node) { + if (!gss.has_edge(shift_to_node, curr_reached)) { + gss.add_edge(shift_to_node, curr_reached, new_label) + // do non-null reductions + if (curr_reduction.length) { + gram.parse_table.get(shift_to, input[i]).for_each(fun(act: action) { + var reduce_rule = gram.rules[act.state_or_rule] + if (act.act == reduce && !fully_reduces_to_null(reduce_rule)) + to_reduce.push(reduction(curr_reached, reduce_rule.lhs, + reduce_rule.position, + new>()->construct(null_symbol()), + new_label)) + }) + } + } + } else { + shift_to_node = gss.new_node(shift_to) + gss.add_to_frontier(i, shift_to_node) + gss.add_edge(shift_to_node, curr_reached, new_label) + } + }) + } + fun shifter(i: int) { + } + + fun fully_reduces_to_null(r: ref rule): bool { + return r.position == 0 && gram.first_vector(r.rhs).contains(null_symbol()) } } -fun reduction(f: *tree::tree, s: symbol::symbol, l: int, n: *tree::tree, label:*tree::tree): reduction { +obj gss (Object) { + var data: vector>> + var edges: map< pair<*tree, *tree>, *tree > + + fun construct(): *gss { + data.construct() + edges.construct() + } + fun copy_construct(old: *gss) { + data.copy_construct(&old->data) + edges.copy_construct(&old->edges) + } + fun destruct() { + data.destruct() + edges.destruct() + } + fun clear() { + data.clear() + edges.clear() + } + fun new_node(state: int): *tree { + return new>()->construct(state) + } + fun add_to_frontier(frontier: int, node: *tree) { + while(data.size <= frontier) + data.addEnd(vector<*tree>()) + data[frontier].addEnd(node) + } + fun frontier_is_empty(frontier: int): bool { + return frontier >= data.size || data[frontier].size == 0 + } + fun frontier_get_acc_state(frontier: int): *tree { + // the accepting state is always state 1, for now + return in_frontier(frontier, 1) + } + fun in_frontier(frontier: int, state: int): *tree { + for (var i = 0; i < data[frontier].size; i++;) + if (data[frontier][i]->data == state) + return data[frontier][i] + return null>() + } + fun get_edge(start: *tree, end: *tree): *tree { + return edges[make_pair(start, end)] + } + fun has_edge(start: *tree, end: *tree): bool { + // could also look in map, but this is faster... + return start->children.find(end) != -1 + } + fun add_edge(start: *tree, end: *tree, edge: *tree) { + start->children.add(end) + edges.set(make_pair(start,end), edge) + } + fun get_containing_frontier(node: *tree): int { + for (var i = 0; i < data.size; i++;) + if (data[i].contains(node)) + return i + return -1 + } + fun get_reachable_paths(start: *tree, length: int): vector>> { + var paths = vector>>() + var recursive_path_find: fun(*tree, int, vector<*tree>):void = fun(start: *tree, length: int, current_path: vector<*tree>) { + current_path.addEnd(start) + if (!length) { + paths.addEnd(current_path) + return + } + start->children.for_each(fun(child: *tree) { + recursive_path_find(child, length-1, current_path) + }) + } + recursive_path_find(start, length, vector<*tree>()) + return paths + } +} + +fun reduction(f: *tree, s: symbol, l: int, n: *tree, label:*tree): reduction { var toRet.construct(f,s,l,n,label): reduction return toRet } obj reduction (Object) { - var from: *tree::tree - var sym: symbol::symbol + var from: *tree + var sym: symbol var length: int - var nullable_parts: *tree::tree - var label: *tree::tree + var nullable_parts: *tree + var label: *tree - fun construct(f: *tree::tree, s: symbol::symbol, l: int, n: *tree::tree, label:*tree::tree): *reduction { + fun construct(f: *tree, s: symbol, l: int, n: *tree, label:*tree): *reduction { from = f sym.copy_construct(&s) length = l diff --git a/stdlib/symbol.krak b/stdlib/symbol.krak index 3e62ee9..6769469 100644 --- a/stdlib/symbol.krak +++ b/stdlib/symbol.krak @@ -8,6 +8,10 @@ fun eof_symbol(): symbol { var toRet.construct(string::string("$EOF$"), false, string::string("$EOF$")): symbol return toRet } +fun invalid_symbol(): symbol { + var toRet.construct(string::string("$INVALID$"), false, string::string("$INVALID$")): symbol + return toRet +} fun symbol(nameIn: *char, terminalIn: bool): symbol { var toRet.construct(string::string(nameIn), terminalIn, string::string("no_value")): symbol diff --git a/stdlib/util.krak b/stdlib/util.krak index 93182ff..bfcf60a 100644 --- a/stdlib/util.krak +++ b/stdlib/util.krak @@ -1,12 +1,13 @@ import mem +import vector -fun greater(a: T, b: T): T { +fun max(a: T, b: T): T { if (a > b) return a; return b; } -fun lesser(a: T, b: T): T { +fun min(a: T, b: T): T { if (a > b) return b; return a; @@ -42,6 +43,12 @@ obj pair (Object) { mem::maybe_destruct(&first) mem::maybe_destruct(&second) } + + // the old unnecessary template to prevent generation + // if not used trick (in this case, changing out U with V) + fun operator==(other: ref pair): bool { + return first == other.first && second == other.second + } } fun range(end:int): range { @@ -71,6 +78,12 @@ obj range { for (var i = begin; i < end; i+= step;) func(i) } + fun map(func: fun(int): T): vector::vector { + var ret.construct( (end-begin)/step + 1 ) : vector::vector + for (var i = begin; i < end; i+= step;) + ret.addEnd(func(i)) + return ret + } fun any_true(func: fun(int):bool):bool { for (var i = begin; i < end; i+= step;) if (func(i)) diff --git a/stdlib/vector.krak b/stdlib/vector.krak index 32fefae..56a9464 100644 --- a/stdlib/vector.krak +++ b/stdlib/vector.krak @@ -70,22 +70,28 @@ obj vector (Object) { } fun clone(): vector { - var newVec.construct(): vector + var newVec.construct(size): vector for (var i = 0; i < size; i++;) newVec.addEnd(data[i]) return newVec } + fun reverse(): vector { + var newVec.construct(size): vector + for (var i = 0; i < size; i++;) + newVec.addEnd(data[(size-i)-1]) + return newVec + } fun resize(newSize: int): bool { var newData: *T = new(newSize); if (!newData) return false; - for (var i: int = 0; i < lesser(size, newSize); i++;) + for (var i: int = 0; i < min(size, newSize); i++;) maybe_copy_construct(&newData[i], &data[i]); delete(data, size); data = newData; available = newSize; - size = lesser(size, newSize) + size = min(size, newSize) return true; } @@ -102,6 +108,12 @@ obj vector (Object) { fun at(index: int): ref T { return get(index); } fun operator[](index: int): ref T { return get(index); } + fun first(): ref T { + return get(0) + } + fun last(): ref T { + return get(size-1) + } fun get(index: int): ref T { if (index < 0 || index >= size) { println("Vector access out of bounds! Retuning 0th element as sanest option"); @@ -192,13 +204,13 @@ obj vector (Object) { data[i] = func(data[i]) } fun map(func: fun(T):U):vector { - var newVec.construct(): vector + var newVec.construct(size): vector for (var i = 0; i < size; i++;) newVec.addEnd(func(data[i])) return newVec } fun flatten_map(func: fun(T):vector):vector { - var newVec.construct(): vector + var newVec.construct(size): vector for (var i = 0; i < size; i++;) { var to_add = func(data[i]) for (var j = 0; j < to_add.size; j++;) diff --git a/tests/test_grammer.krak b/tests/test_grammer.krak index 3a42fbb..d11a886 100644 --- a/tests/test_grammer.krak +++ b/tests/test_grammer.krak @@ -46,12 +46,12 @@ fun main():int { /*returndaaaaaaaaaaaaaa"))*/ //lex.set_input(string("hibyed")) println("woo lexing:") - //range(8).for_each(fun(i: int) { println(lex.next().to_string()); } ) + /*range(8).for_each(fun(i: int) { println(lex.next().to_string()); } )*/ /*range(80).for_each(fun(i: int) { println(lex.next().to_string()); } )*/ println(a.to_string()) a.calculate_state_automaton() var parse.construct(a): parser - var result = parse.parse_input(string(""), string("fun name")) + var result = parse.parse_input(string("ad"), string("fun name")) /*var parse.construct(): parser*/ return 0 } diff --git a/tests/test_lexer.expected_results b/tests/test_lexer.expected_results index b2bc90f..641330d 100644 --- a/tests/test_lexer.expected_results +++ b/tests/test_lexer.expected_results @@ -3,20 +3,20 @@ test: test true old contributed tests b: b true b: b true -$EOF$: no_value true +$EOF$: $EOF$ false a*: aaa true b: b true a*: aa true b: b true b: b true -$EOF$: no_value true +$EOF$: $EOF$ false a|b: b true -$INVALID$: no_value true +$INVALID$: $INVALID$ false xyzzy: xyzzy true -$EOF$: no_value true +$EOF$: $EOF$ false (i|n|t|e)+: intent true -$EOF$: no_value true +$EOF$: $EOF$ false diff --git a/tests/test_util.krak b/tests/test_util.krak index 6dc261f..054bd57 100644 --- a/tests/test_util.krak +++ b/tests/test_util.krak @@ -26,10 +26,10 @@ obj test(Object) { } fun main():int { - println(lesser(1,2)) - println(lesser(7.0,8.0)) - println(greater(1,2)) - println(greater(7.0,8.0)) + println(min(1,2)) + println(min(7.0,8.0)) + println(max(1,2)) + println(max(7.0,8.0)) range(3,13, 3).for_each(fun(i: int) { print(i); }) println()