From 8c490908d475c5c0f4ae5518757b3be3fd960014 Mon Sep 17 00:00:00 2001 From: Nathan Braswell Date: Mon, 13 Jul 2015 12:16:30 -0400 Subject: [PATCH] Saving work pre-references --- CMakeLists.txt | 2 +- krakenGrammer.kgm | 8 +- src/ASTTransformation.cpp | 5 ++ src/CGenerator.cpp | 18 ++++- stdlib/grammer.krak | 98 +++++++++++++++++++---- stdlib/lexer.krak | 16 ++-- stdlib/set.krak | 6 ++ stdlib/vector.krak | 1 - tests/grammer.kgm | 3 + tests/test_grammer.krak | 38 +++++++-- tests/test_set.expected_results | 5 ++ tests/test_set.krak | 7 ++ tests/test_short_circuit.expected_results | 8 ++ tests/test_short_circuit.krak | 38 +++++++++ 14 files changed, 221 insertions(+), 32 deletions(-) create mode 100644 tests/test_short_circuit.expected_results create mode 100644 tests/test_short_circuit.krak diff --git a/CMakeLists.txt b/CMakeLists.txt index 41c0cd6..5803f04 100644 --- a/CMakeLists.txt +++ b/CMakeLists.txt @@ -3,7 +3,7 @@ cmake_minimum_required (VERSION 2.6) project(Kraken) -set(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} -std=c++11") +set(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} -std=c++11 -g") set(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} -O3") set( MY_INCLUDES ${PROJECT_SOURCE_DIR}/include) diff --git a/krakenGrammer.kgm b/krakenGrammer.kgm index 0910487..b84579c 100644 --- a/krakenGrammer.kgm +++ b/krakenGrammer.kgm @@ -65,9 +65,11 @@ right_shift = ">" ">" ; overloadable_operator = "\+" | "-" | "\*" | "/" | "%" | "^" | "&" | "\|" | "~" | "!" | "," | "=" | "\+\+" | "--" | "<<" | right_shift | "==" | "!=" | "&&" | "\|\|" | "\+=" | "-=" | "/=" | "%=" | "^=" | "&=" | "\|=" | "\*=" | "<<=" | ">>=" | "->" | "\(" "\)" | "[]" | "[]=" ; func_identifier = identifier | identifier overloadable_operator ; # allow omitting of return type (automatic void) -typed_return = dec_type | ; -function = "fun" WS func_identifier WS template_dec WS "\(" WS opt_typed_parameter_list WS "\)" WS typed_return WS statement | "fun" WS func_identifier WS "\(" WS opt_typed_parameter_list WS "\)" WS typed_return WS statement ; -lambda = "fun" WS "\(" WS opt_typed_parameter_list WS "\)" WS typed_return WS statement ; + +# HACKY - typed_return has it's own internal whitespace as to not make WS typed_return-reduces to null WS ambigious +typed_return = WS dec_type | ; +function = "fun" WS func_identifier WS template_dec WS "\(" WS opt_typed_parameter_list WS "\)" typed_return WS statement | "fun" WS func_identifier WS "\(" WS opt_typed_parameter_list WS "\)" typed_return WS statement ; +lambda = "fun" WS "\(" WS opt_typed_parameter_list WS "\)" typed_return WS statement ; opt_typed_parameter_list = typed_parameter_list | ; typed_parameter_list = typed_parameter_list WS "," WS typed_parameter | typed_parameter ; diff --git a/src/ASTTransformation.cpp b/src/ASTTransformation.cpp index 0a9e4d5..106ac7b 100644 --- a/src/ASTTransformation.cpp +++ b/src/ASTTransformation.cpp @@ -389,7 +389,12 @@ NodeTree* ASTTransformation::transform(NodeTree* from, NodeTree } else { auto possibleMatches = scopeLookup(scope, lookupName); if (!possibleMatches.size()) { + std::cerr << std::endl; std::cerr << "scope lookup error! Could not find " << lookupName << " in identifier (scopeLookup)" << std::endl; + std::cerr << "lookup failedin file " << getUpperTranslationUnit(scope)->getDataRef()->symbol.getName() << std::endl; + std::cerr << "note that this might not be the file where the error is" << std::endl; + std::cerr << "obj.non_existant_member would fail in the file that defines obj's type, for instance" << std::endl; + std::cerr << std::endl; throw "LOOKUP ERROR: " + lookupName; } // can't cull out functiokns b/c we might want them as values diff --git a/src/CGenerator.cpp b/src/CGenerator.cpp index e11d600..f82ed0d 100644 --- a/src/CGenerator.cpp +++ b/src/CGenerator.cpp @@ -636,9 +636,23 @@ CCodeTriple CGenerator::generate(NodeTree* from, NodeTree* enc if (name == "[]") return "(" + generate(children[1], enclosingObject, true, enclosingFunction) + ")[" + generate(children[2],enclosingObject, true, enclosingFunction) + "]"; if (name == "+" || name == "-" || name == "*" || name == "/" || name == "==" || name == ">=" || name == "<=" || name == "!=" - || name == "<" || name == ">" || name == "%" || name == "=" || name == "+=" || name == "-=" || name == "*=" || name == "/=" || name == "||" - || name == "&&") { + || name == "<" || name == ">" || name == "%" || name == "=" || name == "+=" || name == "-=" || name == "*=" || name == "/=") { return "((" + generate(children[1], enclosingObject, true, enclosingFunction) + ")" + name + "(" + generate(children[2], enclosingObject, true, enclosingFunction) + "))"; + } else if (name == "&&" || name == "||") { + // b/c short circuiting, these have to be done seperately + CCodeTriple lhs = generate(children[1], enclosingObject, true, enclosingFunction); + CCodeTriple rhs = generate(children[2], enclosingObject, true, enclosingFunction); + output.preValue = lhs.preValue; + std::string shortcircuit_result = "shortcircuit_result" + getID(); + output.preValue += "bool " + shortcircuit_result + " = " + lhs.value + ";\n"; + output.preValue += lhs.postValue; + output.preValue += "if (" + std::string(name == "||" ? "!":"") + shortcircuit_result + ") { \n"; + output.preValue += rhs.preValue; + output.preValue += shortcircuit_result + " = " + rhs.value + ";\n"; + output.preValue += rhs.postValue; + output.preValue += "}\n"; + output.value = shortcircuit_result; + return output; } else if (name == "." || name == "->") { if (children.size() == 1) return "/*dot operation with one child*/" + generate(children[0], enclosingObject, true, enclosingFunction).oneString() + "/*end one child*/"; diff --git a/stdlib/grammer.krak b/stdlib/grammer.krak index b9c4ff9..b84b352 100644 --- a/stdlib/grammer.krak +++ b/stdlib/grammer.krak @@ -1,6 +1,7 @@ import string import vector import set +import map import symbol import regex import io @@ -68,9 +69,10 @@ fun load_grammer(gram_str: string::string): grammer { rightSide = vector::vector() doLeftSide = true } else { - if (doLeftSide) + if (doLeftSide) { leftSide = symbol::symbol(word, true) - else + gram.non_terminals.add(leftSide) + } else { if (word[0] == '"') { // ok, we support both plain terminals "hia*" // and decorated terminals "hia*":hi_with_as @@ -78,14 +80,19 @@ fun load_grammer(gram_str: string::string): grammer { // the end of the string var last_quote = word.length()-1 while(word[last_quote] != '"') last_quote-- - rightSide.add(symbol::symbol(word.slice(1,last_quote), true)) - if (last_quote != word.length()-1) - gram.regexs.add(util::make_pair(word.slice(last_quote+2, -1), regex::regex(word.slice(1,last_quote)))) - else - gram.regexs.add(util::make_pair(word, regex::regex(word.slice(1,last_quote)))) + if (last_quote != word.length()-1) { + rightSide.add(symbol::symbol(word.slice(last_quote+2, -1), true)) + gram.terminals.add(util::make_pair(symbol::symbol(word.slice(last_quote+2, -1), true), regex::regex(word.slice(1,last_quote)))) + } else { + rightSide.add(symbol::symbol(word, true)) + gram.terminals.add(util::make_pair(symbol::symbol(word, true), regex::regex(word.slice(1,last_quote)))) + } } else { - rightSide.add(symbol::symbol(word, false)) + var non_term = symbol::symbol(word, false) + rightSide.add(non_term) + gram.non_terminals.add(non_term) } + } doLeftSide = false } }) @@ -94,15 +101,21 @@ fun load_grammer(gram_str: string::string): grammer { obj grammer (Object) { var rules: vector::vector - var regexs: vector::vector> + var non_terminals: set::set + var terminals: vector::vector> + var first_set_map: map::map> fun construct(): *grammer { rules.construct() - regexs.construct() + non_terminals.construct() + terminals.construct() + first_set_map.construct() } fun copy_construct(old: *grammer) { rules.copy_construct(&old->rules) - regexs.copy_construct(&old->regexs) + non_terminals.copy_construct(&old->non_terminals) + terminals.copy_construct(&old->terminals) + first_set_map.copy_construct(&old->first_set_map) } fun operator=(other: grammer) { destruct() @@ -110,14 +123,71 @@ obj grammer (Object) { } fun destruct() { rules.destruct() - regexs.destruct() + non_terminals.destruct() + terminals.destruct() + first_set_map.destruct() + } + + fun calculate_first_set() { + // the first set of a terminal is itself + terminals.for_each( fun(terminal: util::pair) + first_set_map[terminal.first] = set::set(terminal.first) + ) + // start out the non-terminals as empty sets + non_terminals.for_each( fun(non_terminal: symbol::symbol) + first_set_map[non_terminal] = set::set() + ) + var first_helper = fun(rhs: vector::vector): set::set { + var toRet = set::set() + rhs.for_each(fun(sym: symbol::symbol) { + toRet.add(first_set_map[sym]) + }) + return toRet + } + var changed = true + while (changed) { + io::println("//////////current state of map/////////////") + first_set_map.keys.for_each(fun(sym: symbol::symbol) { + io::print("for ") + io::println(sym.to_string()) + io::println("map is:") + first_set_map[sym].for_each(fun(look: symbol::symbol) { + io::print("lookahead: "); io::println(look.to_string()) + }) + }) + changed = false + rules.for_each( fun(r: rule) { + var rule_lookahead = first_helper(r.rhs) + if (!changed) { + io::println(r.to_string()) + changed = !first_set_map[r.lhs].contains(rule_lookahead) + io::print("changed: "); io::println(changed) + io::print("\tcurrent lookahead is sized:") + io::println(first_set_map[r.lhs].size()) + io::println("\tcurrent lookahead is:") + first_set_map[r.lhs].for_each(fun(look: symbol::symbol) { + io::print("\t\tlookahead: "); io::println(look.to_string()) + }) + io::println() + io::print("\rule lookahead is sized:") + io::println(rule_lookahead.size()) + io::println("\trule lookahead is:") + rule_lookahead.for_each(fun(look: symbol::symbol) { + io::print("\t\tlookahead: "); io::println(look.to_string()) + }) + } + first_set_map[r.lhs].add(rule_lookahead) + }) + } } fun to_string(): string::string { var result = string::string("grammer rules:") rules.for_each( fun(i : rule) { result += string::string("\n\t") + i.to_string(); } ) - result += "\nregexs:" - regexs.for_each( fun(i : util::pair) { result += string::string("\n\t") + i.first + ": " + i.second.regexString; } ) + result += "\nnon_terminals:" + non_terminals.for_each( fun(i : symbol::symbol) { result += string::string("\n\t") + i.to_string(); } ) + result += "\nterminals:" + terminals.for_each( fun(i : util::pair) { result += string::string("\n\t") + i.first.to_string() + ": " + i.second.regexString; } ) return result } } diff --git a/stdlib/lexer.krak b/stdlib/lexer.krak index bce56b4..7cf6cda 100644 --- a/stdlib/lexer.krak +++ b/stdlib/lexer.krak @@ -5,14 +5,20 @@ import vector import util fun lexer(regs: vector::vector): lexer { - return lexer(regs.map( fun(reg: regex::regex): util::pair { - return util::make_pair(reg.regexString,reg) - })) + /*var toRet:lexer*/ + var toRet.construct() :lexer + regs.for_each( fun(reg: regex::regex) { + toRet.add_regex(util::make_pair(reg.regexString, reg)); + }) + return toRet } -fun lexer(regs: vector::vector>): lexer { +fun lexer(regs: vector::vector>): lexer { + /*var toRet:lexer*/ var toRet.construct() :lexer - regs.for_each( fun(reg: util::pair) toRet.add_regex(reg); ) + regs.for_each( fun(reg: util::pair) + toRet.add_regex(util::make_pair(reg.first.name, reg.second)); + ) return toRet } diff --git a/stdlib/set.krak b/stdlib/set.krak index 9f1330a..bbe4ad2 100644 --- a/stdlib/set.krak +++ b/stdlib/set.krak @@ -44,6 +44,9 @@ obj set (Object) { fun size():int { return data.size } + fun contains(items: set): bool { + return items.size() == 0 || !items.any_true( fun(item: T): bool return !contains(item); ) + } fun contains(item: T): bool { return data.find(item) != -1 } @@ -65,5 +68,8 @@ obj set (Object) { fun for_each(func: fun(T):void) { data.for_each(func) } + fun any_true(func: fun(T):bool):bool { + return data.any_true(func) + } } diff --git a/stdlib/vector.krak b/stdlib/vector.krak index 4c7f664..f399f55 100644 --- a/stdlib/vector.krak +++ b/stdlib/vector.krak @@ -25,7 +25,6 @@ obj vector (Object) { return this; } - fun copy_construct(old: *vector): void { construct() for (var i = 0; i < old->size; i++;) diff --git a/tests/grammer.kgm b/tests/grammer.kgm index dcc387f..25eefb2 100644 --- a/tests/grammer.kgm +++ b/tests/grammer.kgm @@ -11,3 +11,6 @@ d = "has ll\" \\\"y8\" \\\\" ; d = "has space" ; +d = e ; +e = f | ; +f = ; diff --git a/tests/test_grammer.krak b/tests/test_grammer.krak index d46016a..3ffa009 100644 --- a/tests/test_grammer.krak +++ b/tests/test_grammer.krak @@ -3,15 +3,41 @@ import grammer:* import lexer:* import string:* import util:* +import symbol:* fun main():int { - var a = load_grammer(read_file(string("../krakenGrammer.kgm"))) - /*var a = load_grammer(read_file(string("grammer.kgm")))*/ + + /*var a = load_grammer(read_file(string("../krakenGrammer.kgm")))*/ + var a = load_grammer(read_file(string("grammer.kgm"))) println(a.to_string()) - var lex = lexer(a.regexs) - lex.set_input(read_file(string("test_grammer.krak"))) - /*lex.set_input(string("ccdahas spacedhas*/ -/*returndaaaaaaaaaaaaaa"))*/ + /*a.calculate_first_set()*/ + println("///////////////////START FIRST SET/////////////") + println("//TERMINALS//") + a.terminals.for_each( fun(terminal: util::pair) { + var set_str = string::string("{ ") + a.first_set_map[terminal.first].for_each( fun(sym: symbol::symbol) { + set_str += sym.to_string() + " " + }) + set_str += "}" + print(terminal.first.to_string() + " first: " + set_str + "\n") + }) + println("//NON TERMINALS//") + a.non_terminals.for_each( fun(non_terminal: symbol::symbol) { + var set_str = string::string("{ ") + a.first_set_map[non_terminal].for_each( fun(sym: symbol::symbol) { + set_str += sym.to_string() + " " + }) + set_str += "}" + print(non_terminal.to_string() + " first: " + set_str + "\n") + println() + }) + println("///////////////////END FIRST SET/////////////") + + var lex = lexer(a.terminals) + + /*lex.set_input(read_file(string("test_grammer.krak")))*/ + lex.set_input(string("ccdahas spacedhas +returndaaaaaaaaaaaaaa")) println("woo lexing:") range(8).for_each(fun(i: int) { println(lex.next().to_string()); } ) /*range(80).for_each(fun(i: int) { println(lex.next().to_string()); } )*/ diff --git a/tests/test_set.expected_results b/tests/test_set.expected_results index ba67780..7844457 100644 --- a/tests/test_set.expected_results +++ b/tests/test_set.expected_results @@ -9,5 +9,10 @@ false false true true +contains set: +false +false +true +all: 4 5 diff --git a/tests/test_set.krak b/tests/test_set.krak index 813bfba..01f56fa 100644 --- a/tests/test_set.krak +++ b/tests/test_set.krak @@ -1,5 +1,6 @@ import io:* import set:* +import vector_literals:* fun main():int { var s = set(3) @@ -19,6 +20,12 @@ fun main():int { println(s.contains(4)) println(s.contains(5)) + println("contains set:") + println(s.contains(from_vector(vector(1,2,3)))) + println(s.contains(from_vector(vector(4,5,3)))) + println(s.contains(from_vector(vector(4,5)))) + + println("all:") s.for_each( fun(it: int) println(it); ) return 0 } diff --git a/tests/test_short_circuit.expected_results b/tests/test_short_circuit.expected_results new file mode 100644 index 0000000..104e3c1 --- /dev/null +++ b/tests/test_short_circuit.expected_results @@ -0,0 +1,8 @@ +early or +was true +early and + +late or +false_extra: was true +late and +true_extra: diff --git a/tests/test_short_circuit.krak b/tests/test_short_circuit.krak new file mode 100644 index 0000000..4d67e92 --- /dev/null +++ b/tests/test_short_circuit.krak @@ -0,0 +1,38 @@ +import io:* + +fun is_true():bool { + return true +} + +fun is_true_extra():bool { + print("true_extra: ") + return true +} + +fun is_false():bool { + return false +} + +fun is_false_extra():bool { + print("false_extra: ") + return false +} + +fun main():int { + println("early or") + if (is_true() || is_false_extra()) + println("was true") + println("early and") + if (is_false() && is_true_extra()) + println("was false") + println() + println("late or") + if (is_false_extra() || is_true()) + println("was true") + println("late and") + if (is_true_extra() && is_false()) + println("was false") + println() + return 0 +} +