From 07e54f67fb2336b7934253ab8325bbcc3acd6c79 Mon Sep 17 00:00:00 2001
From: Nathan Braswell <miloignis@gmail.com>
Date: Wed, 8 Jul 2015 13:43:06 -0400
Subject: [PATCH] Changed regex to reference count internal structure instead
 of cloning because it too way too long. Added terminal decorators to grammer
 and lexer

---
 src/ASTTransformation.cpp |  2 ++
 src/CGenerator.cpp        |  4 ++--
 stdlib/grammer.krak       | 20 +++++++++++++++-----
 stdlib/lexer.krak         | 24 ++++++++++++++++++------
 stdlib/regex.krak         | 21 +++++++++++++++++----
 tests/grammer.kgm         |  4 ++--
 6 files changed, 56 insertions(+), 19 deletions(-)
diff --git a/src/ASTTransformation.cpp b/src/ASTTransformation.cpp
index 6702db4..0a9e4d5 100644
--- a/src/ASTTransformation.cpp
+++ b/src/ASTTransformation.cpp
@@ -1099,6 +1099,7 @@ NodeTree<ASTData>* ASTTransformation::templateClassLookup(NodeTree<ASTData>* sco
     }
     if (!mostFittingTemplates.size()) {
         std::cout << "No template classes fit for " << lookup << "!" << std::endl;
+        std::cerr << "in file " << getUpperTranslationUnit(scope)->getDataRef()->symbol.getName() << std::endl;
         throw "No matching template classes";
     } else if (mostFittingTemplates.size() > 1) {
         std::cout << "Multiple template classes fit with equal number of traits satisfied for " << lookup << "!" << std::endl;
@@ -1304,6 +1305,7 @@ NodeTree<ASTData>* ASTTransformation::templateFunctionLookup(NodeTree<ASTData>*
         for (auto t : types)
             std::cerr << t.toString() + ", ";
         std::cerr << ")!" << std::endl;
+        std::cerr << "in file " << getUpperTranslationUnit(scope)->getDataRef()->symbol.getName() << std::endl;
         throw "No matching template functions";
     } else if (mostFittingTemplates.size() > 1) {
         std::cerr << "Multiple template functions fit with equal number of traits satisfied for " << lookup << "!" << std::endl;
diff --git a/src/CGenerator.cpp b/src/CGenerator.cpp
index 76782a0..e11d600 100644
--- a/src/CGenerator.cpp
+++ b/src/CGenerator.cpp
@@ -652,7 +652,7 @@ CCodeTriple CGenerator::generate(NodeTree<ASTData>* from, NodeTree<ASTData>* enc
                             if (unaliasedTypeDef) { //Test to see if the function's a member of this type_def, or if this is an alias, of the original type. Get this original type if it exists.
 					 		    std::string nameDecoration;
 					 		    std::vector<NodeTree<ASTData>*> functionDefChildren = children[2]->getChildren(); //The function def is the rhs of the access operation
-					 		    std::cout << "Decorating (in access-should be object) " << name << " " << functionDefChildren.size() << std::endl;
+								 //std::cout << "Decorating (in access-should be object) " << name << " " << functionDefChildren.size() << std::endl;
 					 		    for (int i = 0; i < (functionDefChildren.size() > 0 ? functionDefChildren.size()-1 : 0); i++)
 					 		    	nameDecoration += "_" + ValueTypeToCTypeDecoration(functionDefChildren[i]->getData().valueType);
                                 // Note that we only add scoping to the object, as this specifies our member function too
@@ -680,7 +680,7 @@ CCodeTriple CGenerator::generate(NodeTree<ASTData>* from, NodeTree<ASTData>* enc
                     } else {
                         //It's a normal function call, not a special one or a method or anything. Name decorate.
                         std::vector<NodeTree<ASTData>*> functionDefChildren = children[0]->getChildren();
-                        std::cout << "Decorating (none-special)" << name << " " << functionDefChildren.size() << std::endl;
+                        //std::cout << "Decorating (none-special)" << name << " " << functionDefChildren.size() << std::endl;
                         std::string nameDecoration;
                         for (int i = 0; i < (functionDefChildren.size() > 0 ? functionDefChildren.size()-1 : 0); i++)
                             nameDecoration += "_" + ValueTypeToCTypeDecoration(functionDefChildren[i]->getData().valueType);
diff --git a/stdlib/grammer.krak b/stdlib/grammer.krak
index a00e6ff..b9c4ff9 100644
--- a/stdlib/grammer.krak
+++ b/stdlib/grammer.krak
@@ -4,6 +4,7 @@ import set
 import symbol
 import regex
 import io
+import util
 
 fun split_into_words(gram_str: string::string): vector::vector<string::string> {
     var out.construct(): vector::vector<string>
@@ -56,6 +57,7 @@ fun load_grammer(gram_str: string::string): grammer {
     /*})*/
     /*return gram*/
     split_into_words(gram_str).for_each(fun(word: string::string) {
+        io::print("word: "); io::println(word)
         if (word == "=") {
             // do nothing
         } else if (word == "|") {
@@ -70,9 +72,17 @@ fun load_grammer(gram_str: string::string): grammer {
                 leftSide = symbol::symbol(word, true)
             else
                 if (word[0] == '"') {
-                    rightSide.add(symbol::symbol(word.slice(1,-2), true))
-                    /*gram.regexs.add_unique(regex::regex(word.slice(1,-2)))*/
-                    gram.regexs.add(regex::regex(word.slice(1,-2)))
+                    // ok, we support both plain terminals "hia*"
+                    // and decorated terminals "hia*":hi_with_as
+                    // so first check to find the ending " and see if it's
+                    // the end of the string
+                    var last_quote = word.length()-1
+                    while(word[last_quote] != '"') last_quote--
+                    rightSide.add(symbol::symbol(word.slice(1,last_quote), true))
+                    if (last_quote != word.length()-1)
+                        gram.regexs.add(util::make_pair(word.slice(last_quote+2, -1), regex::regex(word.slice(1,last_quote))))
+                    else
+                        gram.regexs.add(util::make_pair(word, regex::regex(word.slice(1,last_quote))))
                 } else {
                     rightSide.add(symbol::symbol(word, false))
                 }
@@ -84,7 +94,7 @@ fun load_grammer(gram_str: string::string): grammer {
 
 obj grammer (Object) {
     var rules: vector::vector<rule>
-    var regexs: vector::vector<regex::regex>
+    var regexs: vector::vector<util::pair<string::string, regex::regex>>
 
     fun construct(): *grammer {
         rules.construct()
@@ -107,7 +117,7 @@ obj grammer (Object) {
         var result = string::string("grammer rules:")
         rules.for_each( fun(i : rule) { result += string::string("\n\t") + i.to_string(); } )
         result += "\nregexs:"
-        regexs.for_each( fun(i : regex::regex) { result += string::string("\n\t") + i.regexString; } )
+        regexs.for_each( fun(i : util::pair<string::string, regex::regex>) { result += string::string("\n\t") + i.first + ": " + i.second.regexString; } )
         return result
     }
 }
diff --git a/stdlib/lexer.krak b/stdlib/lexer.krak
index fc9503a..bce56b4 100644
--- a/stdlib/lexer.krak
+++ b/stdlib/lexer.krak
@@ -5,13 +5,19 @@ import vector
 import util
 
 fun lexer(regs: vector::vector<regex::regex>): lexer {
+    return lexer(regs.map( fun(reg: regex::regex): util::pair<string::string, regex::regex> {
+        return util::make_pair(reg.regexString,reg)
+    }))
+}
+
+fun lexer(regs: vector::vector<util::pair<string::string, regex::regex>>): lexer {
     var toRet.construct() :lexer
-    regs.for_each( fun(reg: regex::regex) toRet.add_regex(reg); )
+    regs.for_each( fun(reg: util::pair<string::string, regex::regex>) toRet.add_regex(reg); )
     return toRet
 }
 
 obj lexer (Object) {
-    var regs: vector::vector<regex::regex>
+    var regs: vector::vector<util::pair<string::string, regex::regex>>
     var input: string::string
     var position: int
     fun construct(): *lexer {
@@ -33,11 +39,17 @@ obj lexer (Object) {
         destruct()
         copy_construct(&old)
     }
-    fun add_regex(newOne: regex::regex) {
+    fun add_regex(name: string::string, newOne: regex::regex) {
+        regs.add(util::make_pair(name,newOne))
+    }
+    fun add_regex(newOne: util::pair<string::string,regex::regex>) {
         regs.add(newOne)
     }
+    fun add_regex(newOne: regex::regex) {
+        regs.add(util::make_pair(newOne.regexString, newOne))
+    }
     fun add_regex(newOne: *char) {
-        regs.add(regex::regex(newOne))
+        regs.add(util::make_pair(string::string(newOne), regex::regex(newOne)))
     }
     fun set_input(in: string::string) {
         input = in
@@ -45,8 +57,8 @@ obj lexer (Object) {
     fun next(): symbol::symbol {
         if (position >= input.length())
             return symbol::symbol("$EOF$", true)
-        var max = regs.map(fun(reg: regex::regex): util::pair<int, string::string> {
-                return util::make_pair(reg.long_match(input.slice(position, -1)), reg.regexString); })
+        var max = regs.map(fun(reg_pair: util::pair<string::string,regex::regex>): util::pair<int, string::string> {
+                return util::make_pair(reg_pair.second.long_match(input.slice(position, -1)), reg_pair.first); })
                 .max(fun(first: util::pair<int, string::string>, second: util::pair<int, string::string>): bool
                         { return first.first < second.first; })
         if (max.first < 0)
diff --git a/stdlib/regex.krak b/stdlib/regex.krak
index e06247b..f205b61 100644
--- a/stdlib/regex.krak
+++ b/stdlib/regex.krak
@@ -43,9 +43,16 @@ obj regexState (Object) {
 obj regex (Object) {
     var regexString: string::string
     var begin: *regexState
+    var referenceCounter: *int
 
+    fun construct(): *regex {
+        regexString.construct()
+        return this
+    }
     fun construct(regexStringIn: string::string): *regex {
         regexString.copy_construct(&regexStringIn)
+        referenceCounter = mem::new<int>()
+        *referenceCounter = 1
 
         var beginningAndEnd = compile(regexStringIn)
         // init our begin, and the end state as the next state of each end
@@ -56,9 +63,11 @@ obj regex (Object) {
     }
 
     fun copy_construct(old:*regex):void {
-        construct(old->regexString)
-        /*begin = old->begin*/
-        /*regexString.copy_construct(&old->regexString)*/
+        regexString.copy_construct(&old->regexString)
+        begin = old->begin
+        referenceCounter = old->referenceCounter
+        *referenceCounter += 1
+        /*construct(old->regexString)*/
         /*begin = mem::safe_recursive_clone(old->begin, fun(it: *regexState, cloner: fun(*regexState):*regexState, register: fun(*regexState):void): void {*/
             /*var newOne = mem::new<regexState>()->construct(it->character)*/
             /*register(newOne)*/
@@ -70,7 +79,11 @@ obj regex (Object) {
 
     fun destruct():void {
         regexString.destruct()
-        mem::safe_recursive_delete(begin, fun(it: *regexState): set::set<*regexState> { return set::from_vector(it->next_states); } )
+        *referenceCounter -= 1
+        if (*referenceCounter == 0) {
+            mem::safe_recursive_delete(begin, fun(it: *regexState): set::set<*regexState> { return set::from_vector(it->next_states); } )
+            mem::delete(referenceCounter)
+        }
     }
 
     fun operator==(other: regex):bool {
diff --git a/tests/grammer.kgm b/tests/grammer.kgm
index d35e1bc..dcc387f 100644
--- a/tests/grammer.kgm
+++ b/tests/grammer.kgm
@@ -1,7 +1,7 @@
 # comment
 a = b ;
-b = "c" ;
-b = c "d" ;
+b = "c":named_c ;
+b = c "d":dname ;
 c = "a" | d ;
 d = "has space" ;
 d = "has