From 8c490908d475c5c0f4ae5518757b3be3fd960014 Mon Sep 17 00:00:00 2001
From: Nathan Braswell <miloignis@gmail.com>
Date: Mon, 13 Jul 2015 12:16:30 -0400
Subject: [PATCH] Saving work pre-references

---
 CMakeLists.txt                            |  2 +-
 krakenGrammer.kgm                         |  8 +-
 src/ASTTransformation.cpp                 |  5 ++
 src/CGenerator.cpp                        | 18 ++++-
 stdlib/grammer.krak                       | 98 +++++++++++++++++++----
 stdlib/lexer.krak                         | 16 ++--
 stdlib/set.krak                           |  6 ++
 stdlib/vector.krak                        |  1 -
 tests/grammer.kgm                         |  3 +
 tests/test_grammer.krak                   | 38 +++++++--
 tests/test_set.expected_results           |  5 ++
 tests/test_set.krak                       |  7 ++
 tests/test_short_circuit.expected_results |  8 ++
 tests/test_short_circuit.krak             | 38 +++++++++
 14 files changed, 221 insertions(+), 32 deletions(-)
 create mode 100644 tests/test_short_circuit.expected_results
 create mode 100644 tests/test_short_circuit.krak
diff --git a/CMakeLists.txt b/CMakeLists.txt
index 41c0cd6..5803f04 100644
--- a/CMakeLists.txt
+++ b/CMakeLists.txt
@@ -3,7 +3,7 @@ cmake_minimum_required (VERSION 2.6)
 project(Kraken)
 
 
-set(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} -std=c++11")
+set(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} -std=c++11 -g")
 set(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} -O3")
 
 set( MY_INCLUDES ${PROJECT_SOURCE_DIR}/include)
diff --git a/krakenGrammer.kgm b/krakenGrammer.kgm
index 0910487..b84579c 100644
--- a/krakenGrammer.kgm
+++ b/krakenGrammer.kgm
@@ -65,9 +65,11 @@ right_shift = ">" ">" ;
 overloadable_operator = "\+" | "-" | "\*" | "/" | "%" | "^" | "&" | "\|" | "~" | "!" | "," | "=" | "\+\+" | "--" | "<<" | right_shift | "==" | "!=" | "&&" | "\|\|" | "\+=" | "-=" | "/=" | "%=" | "^=" | "&=" | "\|=" | "\*=" | "<<=" | ">>=" | "->" | "\(" "\)" | "[]" | "[]=" ;
 func_identifier = identifier | identifier overloadable_operator ;
 # allow omitting of return type (automatic void)
-typed_return = dec_type | ;
-function = "fun" WS func_identifier WS template_dec WS "\(" WS opt_typed_parameter_list WS "\)" WS typed_return WS statement | "fun" WS func_identifier WS "\(" WS opt_typed_parameter_list WS "\)" WS typed_return WS statement ;
-lambda = "fun" WS "\(" WS opt_typed_parameter_list WS "\)" WS typed_return WS statement ;
+
+# HACKY - typed_return has it's own internal whitespace as to not make WS typed_return-reduces to null WS ambigious
+typed_return = WS dec_type | ;
+function = "fun" WS func_identifier WS template_dec WS "\(" WS opt_typed_parameter_list WS "\)" typed_return WS statement | "fun" WS func_identifier WS "\(" WS opt_typed_parameter_list WS "\)" typed_return WS statement ;
+lambda = "fun" WS "\(" WS opt_typed_parameter_list WS "\)" typed_return WS statement ;
 
 opt_typed_parameter_list = typed_parameter_list | ;
 typed_parameter_list = typed_parameter_list WS "," WS typed_parameter | typed_parameter ;
diff --git a/src/ASTTransformation.cpp b/src/ASTTransformation.cpp
index 0a9e4d5..106ac7b 100644
--- a/src/ASTTransformation.cpp
+++ b/src/ASTTransformation.cpp
@@ -389,7 +389,12 @@ NodeTree<ASTData>* ASTTransformation::transform(NodeTree<Symbol>* from, NodeTree
         } else {
             auto possibleMatches = scopeLookup(scope, lookupName);
             if (!possibleMatches.size()) {
+                std::cerr << std::endl;
 			    std::cerr << "scope lookup error! Could not find " << lookupName << " in identifier (scopeLookup)" << std::endl;
+                std::cerr << "lookup failedin file " << getUpperTranslationUnit(scope)->getDataRef()->symbol.getName() << std::endl;
+                std::cerr << "note that this might not be the file where the error is" << std::endl;
+                std::cerr << "obj.non_existant_member would fail in the file that defines obj's type, for instance" << std::endl;
+                std::cerr << std::endl;
 			    throw "LOOKUP ERROR: " + lookupName;
             }
             // can't cull out functiokns b/c we might want them as values
diff --git a/src/CGenerator.cpp b/src/CGenerator.cpp
index e11d600..f82ed0d 100644
--- a/src/CGenerator.cpp
+++ b/src/CGenerator.cpp
@@ -636,9 +636,23 @@ CCodeTriple CGenerator::generate(NodeTree<ASTData>* from, NodeTree<ASTData>* enc
 				if (name == "[]")
 					return "(" + generate(children[1], enclosingObject, true, enclosingFunction) + ")[" + generate(children[2],enclosingObject, true, enclosingFunction) + "]";
 				if (name == "+" || name == "-" || name == "*" || name == "/" || name == "==" || name == ">=" || name == "<=" || name == "!="
-					|| name == "<" || name == ">" || name == "%" || name == "=" || name == "+=" || name == "-=" || name == "*=" || name == "/=" || name == "||"
-					|| name == "&&") {
+					|| name == "<" || name == ">" || name == "%" || name == "=" || name == "+=" || name == "-=" || name == "*=" || name == "/=") {
 					return "((" + generate(children[1], enclosingObject, true, enclosingFunction) + ")" + name + "(" + generate(children[2], enclosingObject, true, enclosingFunction) + "))";
+                } else if (name == "&&" || name == "||") {
+                    // b/c short circuiting, these have to be done seperately
+                    CCodeTriple lhs = generate(children[1], enclosingObject, true, enclosingFunction);
+                    CCodeTriple rhs = generate(children[2], enclosingObject, true, enclosingFunction);
+                    output.preValue = lhs.preValue;
+                    std::string shortcircuit_result = "shortcircuit_result" + getID();
+                    output.preValue += "bool " + shortcircuit_result + " = " + lhs.value + ";\n";
+                    output.preValue += lhs.postValue;
+                    output.preValue += "if (" + std::string(name == "||" ? "!":"") + shortcircuit_result + ") { \n";
+                    output.preValue += rhs.preValue;
+                    output.preValue += shortcircuit_result + " = " + rhs.value + ";\n";
+                    output.preValue += rhs.postValue;
+                    output.preValue += "}\n";
+                    output.value = shortcircuit_result;
+                    return output;
                 } else if (name == "." || name == "->") {
 					if (children.size() == 1)
 					 	return "/*dot operation with one child*/" + generate(children[0], enclosingObject, true, enclosingFunction).oneString() + "/*end one child*/";
diff --git a/stdlib/grammer.krak b/stdlib/grammer.krak
index b9c4ff9..b84b352 100644
--- a/stdlib/grammer.krak
+++ b/stdlib/grammer.krak
@@ -1,6 +1,7 @@
 import string
 import vector
 import set
+import map
 import symbol
 import regex
 import io
@@ -68,9 +69,10 @@ fun load_grammer(gram_str: string::string): grammer {
             rightSide = vector::vector<symbol::symbol>()
             doLeftSide = true
         } else {
-            if (doLeftSide)
+            if (doLeftSide) {
                 leftSide = symbol::symbol(word, true)
-            else
+                gram.non_terminals.add(leftSide)
+            } else {
                 if (word[0] == '"') {
                     // ok, we support both plain terminals "hia*"
                     // and decorated terminals "hia*":hi_with_as
@@ -78,14 +80,19 @@ fun load_grammer(gram_str: string::string): grammer {
                     // the end of the string
                     var last_quote = word.length()-1
                     while(word[last_quote] != '"') last_quote--
-                    rightSide.add(symbol::symbol(word.slice(1,last_quote), true))
-                    if (last_quote != word.length()-1)
-                        gram.regexs.add(util::make_pair(word.slice(last_quote+2, -1), regex::regex(word.slice(1,last_quote))))
-                    else
-                        gram.regexs.add(util::make_pair(word, regex::regex(word.slice(1,last_quote))))
+                    if (last_quote != word.length()-1) {
+                        rightSide.add(symbol::symbol(word.slice(last_quote+2, -1), true))
+                        gram.terminals.add(util::make_pair(symbol::symbol(word.slice(last_quote+2, -1), true), regex::regex(word.slice(1,last_quote))))
+                    } else {
+                        rightSide.add(symbol::symbol(word, true))
+                        gram.terminals.add(util::make_pair(symbol::symbol(word, true), regex::regex(word.slice(1,last_quote))))
+                    }
                 } else {
-                    rightSide.add(symbol::symbol(word, false))
+                    var non_term = symbol::symbol(word, false)
+                    rightSide.add(non_term)
+                    gram.non_terminals.add(non_term)
                 }
+            }
             doLeftSide = false
         }
     })
@@ -94,15 +101,21 @@ fun load_grammer(gram_str: string::string): grammer {
 
 obj grammer (Object) {
     var rules: vector::vector<rule>
-    var regexs: vector::vector<util::pair<string::string, regex::regex>>
+    var non_terminals: set::set<symbol::symbol>
+    var terminals: vector::vector<util::pair<symbol::symbol, regex::regex>>
+    var first_set_map: map::map<symbol::symbol, set::set<symbol::symbol>>
 
     fun construct(): *grammer {
         rules.construct()
-        regexs.construct()
+        non_terminals.construct()
+        terminals.construct()
+        first_set_map.construct()
     }
     fun copy_construct(old: *grammer) {
         rules.copy_construct(&old->rules)
-        regexs.copy_construct(&old->regexs)
+        non_terminals.copy_construct(&old->non_terminals)
+        terminals.copy_construct(&old->terminals)
+        first_set_map.copy_construct(&old->first_set_map)
     }
     fun operator=(other: grammer) {
         destruct()
@@ -110,14 +123,71 @@ obj grammer (Object) {
     }
     fun destruct() {
         rules.destruct()
-        regexs.destruct()
+        non_terminals.destruct()
+        terminals.destruct()
+        first_set_map.destruct()
+    }
+
+    fun calculate_first_set() {
+        // the first set of a terminal is itself
+        terminals.for_each( fun(terminal: util::pair<symbol::symbol, regex::regex>)
+            first_set_map[terminal.first] = set::set(terminal.first)
+        )
+        // start out the non-terminals as empty sets
+        non_terminals.for_each( fun(non_terminal: symbol::symbol)
+            first_set_map[non_terminal] = set::set<symbol::symbol>()
+        )
+        var first_helper = fun(rhs: vector::vector<symbol::symbol>): set::set<symbol::symbol> {
+            var toRet = set::set<symbol::symbol>()
+            rhs.for_each(fun(sym: symbol::symbol) {
+                toRet.add(first_set_map[sym])
+            })
+            return toRet
+        }
+        var changed = true
+        while (changed) {
+            io::println("//////////current state of map/////////////")
+            first_set_map.keys.for_each(fun(sym: symbol::symbol) {
+                io::print("for ")
+                io::println(sym.to_string())
+                io::println("map is:")
+                first_set_map[sym].for_each(fun(look: symbol::symbol) {
+                    io::print("lookahead: "); io::println(look.to_string())
+                })
+            })
+            changed = false
+            rules.for_each( fun(r: rule) {
+                var rule_lookahead = first_helper(r.rhs)
+                if (!changed) {
+                    io::println(r.to_string())
+                    changed = !first_set_map[r.lhs].contains(rule_lookahead)
+                    io::print("changed: "); io::println(changed)
+                    io::print("\tcurrent lookahead is sized:")
+                    io::println(first_set_map[r.lhs].size())
+                    io::println("\tcurrent lookahead is:")
+                    first_set_map[r.lhs].for_each(fun(look: symbol::symbol) {
+                        io::print("\t\tlookahead: "); io::println(look.to_string())
+                    })
+                    io::println()
+                    io::print("\rule lookahead is sized:")
+                    io::println(rule_lookahead.size())
+                    io::println("\trule lookahead is:")
+                    rule_lookahead.for_each(fun(look: symbol::symbol) {
+                        io::print("\t\tlookahead: "); io::println(look.to_string())
+                    })
+                }
+                first_set_map[r.lhs].add(rule_lookahead)
+            })
+        }
     }
 
     fun to_string(): string::string {
         var result = string::string("grammer rules:")
         rules.for_each( fun(i : rule) { result += string::string("\n\t") + i.to_string(); } )
-        result += "\nregexs:"
-        regexs.for_each( fun(i : util::pair<string::string, regex::regex>) { result += string::string("\n\t") + i.first + ": " + i.second.regexString; } )
+        result += "\nnon_terminals:"
+        non_terminals.for_each( fun(i : symbol::symbol) { result += string::string("\n\t") + i.to_string(); } )
+        result += "\nterminals:"
+        terminals.for_each( fun(i : util::pair<symbol::symbol, regex::regex>) { result += string::string("\n\t") + i.first.to_string() + ": " + i.second.regexString; } )
         return result
     }
 }
diff --git a/stdlib/lexer.krak b/stdlib/lexer.krak
index bce56b4..7cf6cda 100644
--- a/stdlib/lexer.krak
+++ b/stdlib/lexer.krak
@@ -5,14 +5,20 @@ import vector
 import util
 
 fun lexer(regs: vector::vector<regex::regex>): lexer {
-    return lexer(regs.map( fun(reg: regex::regex): util::pair<string::string, regex::regex> {
-        return util::make_pair(reg.regexString,reg)
-    }))
+    /*var toRet:lexer*/
+    var toRet.construct() :lexer
+    regs.for_each( fun(reg: regex::regex) {
+        toRet.add_regex(util::make_pair(reg.regexString, reg));
+    })
+    return toRet
 }
 
-fun lexer(regs: vector::vector<util::pair<string::string, regex::regex>>): lexer {
+fun lexer(regs: vector::vector<util::pair<symbol::symbol, regex::regex>>): lexer {
+    /*var toRet:lexer*/
     var toRet.construct() :lexer
-    regs.for_each( fun(reg: util::pair<string::string, regex::regex>) toRet.add_regex(reg); )
+    regs.for_each( fun(reg: util::pair<symbol::symbol, regex::regex>)
+        toRet.add_regex(util::make_pair(reg.first.name, reg.second));
+    )
     return toRet
 }
 
diff --git a/stdlib/set.krak b/stdlib/set.krak
index 9f1330a..bbe4ad2 100644
--- a/stdlib/set.krak
+++ b/stdlib/set.krak
@@ -44,6 +44,9 @@ obj set<T> (Object) {
     fun size():int {
         return data.size
     }
+    fun contains(items: set<T>): bool {
+        return items.size() == 0 || !items.any_true( fun(item: T): bool return !contains(item); )
+    }
     fun contains(item: T): bool {
         return data.find(item) != -1
     }
@@ -65,5 +68,8 @@ obj set<T> (Object) {
     fun for_each(func: fun(T):void) {
         data.for_each(func)
     }
+    fun any_true(func: fun(T):bool):bool {
+        return data.any_true(func)
+    }
 }
 
diff --git a/stdlib/vector.krak b/stdlib/vector.krak
index 4c7f664..f399f55 100644
--- a/stdlib/vector.krak
+++ b/stdlib/vector.krak
@@ -25,7 +25,6 @@ obj vector<T> (Object) {
         return this;
     }
 
-
     fun copy_construct(old: *vector<T>): void {
         construct()
         for (var i = 0; i < old->size; i++;)
diff --git a/tests/grammer.kgm b/tests/grammer.kgm
index dcc387f..25eefb2 100644
--- a/tests/grammer.kgm
+++ b/tests/grammer.kgm
@@ -11,3 +11,6 @@ d = "has
 ll\"
 \\\"y8\" \\\\" ;
 d   =    "has space"      ;
+d = e ;
+e = f | ;
+f = ;
diff --git a/tests/test_grammer.krak b/tests/test_grammer.krak
index d46016a..3ffa009 100644
--- a/tests/test_grammer.krak
+++ b/tests/test_grammer.krak
@@ -3,15 +3,41 @@ import grammer:*
 import lexer:*
 import string:*
 import util:*
+import symbol:*
 
 fun main():int {
-    var a = load_grammer(read_file(string("../krakenGrammer.kgm")))
-    /*var a = load_grammer(read_file(string("grammer.kgm")))*/
+
+    /*var a = load_grammer(read_file(string("../krakenGrammer.kgm")))*/
+    var a = load_grammer(read_file(string("grammer.kgm")))
     println(a.to_string())
-    var lex = lexer(a.regexs)
-    lex.set_input(read_file(string("test_grammer.krak")))
-    /*lex.set_input(string("ccdahas spacedhas*/
-/*returndaaaaaaaaaaaaaa"))*/
+    /*a.calculate_first_set()*/
+    println("///////////////////START FIRST SET/////////////")
+    println("//TERMINALS//")
+    a.terminals.for_each( fun(terminal: util::pair<symbol::symbol, regex::regex>) {
+        var set_str = string::string("{ ")
+        a.first_set_map[terminal.first].for_each( fun(sym: symbol::symbol) {
+            set_str += sym.to_string() + " "
+        })
+        set_str += "}"
+        print(terminal.first.to_string() + " first: " + set_str + "\n")
+    })
+    println("//NON TERMINALS//")
+    a.non_terminals.for_each( fun(non_terminal: symbol::symbol) {
+        var set_str = string::string("{ ")
+        a.first_set_map[non_terminal].for_each( fun(sym: symbol::symbol) {
+            set_str += sym.to_string() + " "
+        })
+        set_str += "}"
+        print(non_terminal.to_string() + " first: " + set_str + "\n")
+        println()
+    })
+    println("///////////////////END FIRST SET/////////////")
+
+    var lex = lexer(a.terminals)
+
+    /*lex.set_input(read_file(string("test_grammer.krak")))*/
+    lex.set_input(string("ccdahas spacedhas
+returndaaaaaaaaaaaaaa"))
     println("woo lexing:")
     range(8).for_each(fun(i: int) { println(lex.next().to_string()); } )
     /*range(80).for_each(fun(i: int) { println(lex.next().to_string()); } )*/
diff --git a/tests/test_set.expected_results b/tests/test_set.expected_results
index ba67780..7844457 100644
--- a/tests/test_set.expected_results
+++ b/tests/test_set.expected_results
@@ -9,5 +9,10 @@ false
 false
 true
 true
+contains set:
+false
+false
+true
+all:
 4
 5
diff --git a/tests/test_set.krak b/tests/test_set.krak
index 813bfba..01f56fa 100644
--- a/tests/test_set.krak
+++ b/tests/test_set.krak
@@ -1,5 +1,6 @@
 import io:*
 import set:*
+import vector_literals:*
 
 fun main():int {
     var s = set(3)
@@ -19,6 +20,12 @@ fun main():int {
     println(s.contains(4))
     println(s.contains(5))
 
+    println("contains set:")
+    println(s.contains(from_vector(vector(1,2,3))))
+    println(s.contains(from_vector(vector(4,5,3))))
+    println(s.contains(from_vector(vector(4,5))))
+
+    println("all:")
     s.for_each( fun(it: int) println(it); )
     return 0
 }
diff --git a/tests/test_short_circuit.expected_results b/tests/test_short_circuit.expected_results
new file mode 100644
index 0000000..104e3c1
--- /dev/null
+++ b/tests/test_short_circuit.expected_results
@@ -0,0 +1,8 @@
+early or
+was true
+early and
+
+late or
+false_extra: was true
+late and
+true_extra: 
diff --git a/tests/test_short_circuit.krak b/tests/test_short_circuit.krak
new file mode 100644
index 0000000..4d67e92
--- /dev/null
+++ b/tests/test_short_circuit.krak
@@ -0,0 +1,38 @@
+import io:*
+
+fun is_true():bool {
+    return true
+}
+
+fun is_true_extra():bool {
+    print("true_extra: ")
+    return true
+}
+
+fun is_false():bool {
+    return false
+}
+
+fun is_false_extra():bool {
+    print("false_extra: ")
+    return false
+}
+
+fun main():int {
+    println("early or")
+    if (is_true() || is_false_extra())
+        println("was true")
+    println("early and")
+    if (is_false() && is_true_extra())
+        println("was false")
+    println()
+    println("late or")
+    if (is_false_extra() || is_true())
+        println("was true")
+    println("late and")
+    if (is_true_extra() && is_false())
+        println("was false")
+    println()
+    return 0
+}
+