Tons of stuff. Regex still a work in progress, along with related template member function scoping bugs

2015-06-09 20:02:02 -04:00
parent 47bc52f00c
commit d90cb4b6db
14 changed files with 78 additions and 28 deletions
@@ -4,6 +4,7 @@ project(Kraken)


 set(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} -std=c++11")
+set(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} -O3")

 set( MY_INCLUDES ${PROJECT_SOURCE_DIR}/include)

@@ -61,6 +61,8 @@ class RNGLRParser: public Parser {

 		std::vector<NodeTree<Symbol>*> nullableParts;
 		std::map<NodeTree<Symbol>, bool> packedMap;
+
+        std::map<ParseRule*, bool> reduceToNullMap;
 };

 #endif
@@ -15,7 +15,7 @@ class RegExState {

 		void addNext(RegExState* nextState);
 		bool characterIs(char inCharacter);
-		std::vector<RegExState*>* advance(char advanceCharacter);
+		std::vector<RegExState*> advance(char advanceCharacter);
 		std::vector<RegExState*> getNextStates();

 		bool isGoal();
@@ -108,9 +108,9 @@ expression = expression WS "<<" WS term | expression WS right_shift WS shiftand
 shiftand = shiftand WS "-" WS term | shiftand WS "\+" WS term | term ;
 term = term WS "/" WS factor | term WS "\*" WS factor | term WS "%" WS factor | factor ;
 factor = "\+\+" WS unarad | unarad WS "\+\+" | "--" WS unarad | unarad WS "--" | "\+" WS unarad | "-" WS unarad | "!" WS unarad | "~" WS unarad | "\(" WS type WS "\)" WS unarad | "\*" WS unarad | "&" WS unarad | unarad ;
-unarad = number | scoped_identifier | scoped_identifier WS template_inst | function_call | bool | string | character | "\(" WS boolean_expression WS "\)" | access_operation | unarad WS "[" WS expression WS "]" | lambda ; 
+unarad = number | scoped_identifier | scoped_identifier WS template_inst | access_operation | function_call | bool | string | character | "\(" WS boolean_expression WS "\)" | unarad WS "[" WS expression WS "]" | lambda ; 
 number = integer | floating_literal ;
-access_operation = unarad "." identifier | unarad "->" identifier ;
+access_operation = unarad "." identifier | unarad "->" identifier | unarad "." identifier WS template_inst | unarad "->" identifier WS template_inst ;

 assignment_statement = factor WS "=" WS boolean_expression | factor WS "\+=" WS boolean_expression | factor WS "-=" WS boolean_expression | factor WS "\*=" WS boolean_expression | factor WS "/=" WS boolean_expression ;
 # if it's being assigned to, we allow type inferencing
@@ -267,7 +267,7 @@ void ASTTransformation::thirdPass(NodeTree<ASTData>* ast, NodeTree<Symbol>* pars
 	//Note that this pass can instantiate class AND function templates
 	for (NodeTree<Symbol>* i : children) {
 		if (i->getDataRef()->getName() == "type_def") {
-			if (i->getChildren()[1]->getData().getName() == "template_dec") // It's a template
+			if (i->getChildren().size() > 1 && i->getChildren()[1]->getData().getName() == "template_dec") // It's a template
 				continue;	//We've already set up the class templates
 			std::vector<NodeTree<Symbol>*> typedefChildren = i->getChildren();
 			std::string name = concatSymbolTree(typedefChildren[0]);
@@ -751,6 +751,7 @@ NodeTree<ASTData>* ASTTransformation::transform(NodeTree<Symbol>* from, NodeTree
        std::cerr << "///////////////////////////////////////////////////////////////////////////////" << std::endl;
        std::cerr << "Ambigious program when parsed by this grammer! This is a bug, please report it." << std::endl;
        std::cerr << "///////////////////////////////////////////////////////////////////////////////" << std::endl;
+        std::cerr << concatSymbolTree(from) << std::endl;
        throw "Ambigious parse!";
    } else {
        // Should get rid of this eventually. Right now it handles cases like sign, alpha, a comma, etc
@@ -961,6 +962,8 @@ NodeTree<ASTData>* ASTTransformation::templateClassLookup(NodeTree<ASTData>* sco
    auto possibleMatches = scopeLookup(scope, lookup);
    std::cout << "Template Class instantiation has " << possibleMatches.size() << " possible matches." << std::endl;
    for (auto i : possibleMatches) {
+        if (i->getDataRef()->type != type_def)
+            continue;
 	    NodeTree<Symbol>* templateSyntaxTree = i->getDataRef()->valueType->templateDefinition;

        auto nameTraitsPairs = makeTemplateNameTraitPairs(templateSyntaxTree->getChildren()[1]);
@@ -1108,6 +1111,8 @@ NodeTree<ASTData>* ASTTransformation::templateFunctionLookup(NodeTree<ASTData>*
    std::cout << "Template Function instantiation has " << possibleMatches.size() << " possible matches." << std::endl;
    int index = 1;
    for (auto i : possibleMatches) {
+        if (i->getDataRef()->type != function)
+            continue;
        std::cout << "Possibility " << index++ << std::endl;
 	    NodeTree<Symbol>* templateSyntaxTree = i->getDataRef()->valueType->templateDefinition;
        if (!templateSyntaxTree) {
@@ -1239,6 +1244,10 @@ std::vector<NodeTree<ASTData>*> ASTTransformation::scopeLookup(NodeTree<ASTData>

 std::vector<NodeTree<ASTData>*> ASTTransformation::scopeLookup(NodeTree<ASTData>* scope, std::string lookup, bool includeModules, std::set<NodeTree<ASTData>*> visited) {
    std::cout << "Scp]|[e looking up " << lookup << std::endl;
+    std::cout << "current: " << scope->getDataRef()->toString() << std::endl;
+    for (auto i : scope->getDataRef()->scope)
+        std::cout << "\t" << i.first << std::endl;
+        //std::cout << i.first << " : " << i.second->toString() << std::endl;
    // Don't visit this node again when looking for the smae lookup. Note that we don't prevent coming back for the scope operator, as that should be able to come back.
    if (visited.find(scope) != visited.end())
        return std::vector<NodeTree<ASTData>*>();
@@ -1510,7 +1519,7 @@ NodeTree<ASTData>* ASTTransformation::findOrInstantiateFunctionTemplate(std::vec
        auto unsliced = children[1]->getChildren();
        std::vector<NodeTree<Symbol>*> templateParamInstantiationNodes = slice(unsliced, 1 , -2, 2);//skip <, >, and commas
        for (int i = 0; i < templateParamInstantiationNodes.size(); i++) {
-            Type* instType = typeFromTypeNode(templateParamInstantiationNodes[i],scope, templateTypeReplacements);
+            Type* instType = typeFromTypeNode(templateParamInstantiationNodes[i], scope, templateTypeReplacements);
            instTypeString += (instTypeString == "" ? instType->toString() : "," + instType->toString());
            templateActualTypes.push_back(instType);
        }
@@ -1559,7 +1568,8 @@ NodeTree<ASTData>* ASTTransformation::findOrInstantiateFunctionTemplate(std::vec
 		std::cout << ", " << i << " : " << templateChildren[i]->getDataRef()->getName();
 	std::cout << std::endl;

-	instantiatedFunction = new NodeTree<ASTData>("function", ASTData(function, Symbol(scopelessFullyInstantiatedName, true), typeFromTypeNode(templateChildren[templateChildren.size()-2], scope, newTemplateTypeReplacement)));
+    // return type should be looked up in template's scope
+	instantiatedFunction = new NodeTree<ASTData>("function", ASTData(function, Symbol(scopelessFullyInstantiatedName, true), typeFromTypeNode(templateChildren[templateChildren.size()-2], templateDefinition, newTemplateTypeReplacement)));
    addToScope("~enclosing_scope", templateDefinition->getDataRef()->scope["~enclosing_scope"][0], instantiatedFunction);
    addToScope(scopelessFullyInstantiatedName, instantiatedFunction, templateDefinition->getDataRef()->scope["~enclosing_scope"][0]);
    templateDefinition->getDataRef()->scope["~enclosing_scope"][0]->addChild(instantiatedFunction); // Add this object the the highest scope's
@@ -426,8 +426,13 @@ bool RNGLRParser::fullyReducesToNull(ParseRule* rule) {
 }

 bool RNGLRParser::reducesToNull(ParseRule* rule) {
+    auto itr = reduceToNullMap.find(rule);
+    if (itr != reduceToNullMap.end())
+        return itr->second;
 	std::vector<Symbol> avoidList;
-	return reducesToNull(rule, avoidList);
+	auto val = reducesToNull(rule, avoidList);
+    reduceToNullMap[rule] = val;
+	return val;
 }

 bool RNGLRParser::reducesToNull(ParseRule* rule, std::vector<Symbol> avoidList) {
@@ -148,15 +148,10 @@ int RegEx::longMatch(std::string stringToMatch) {
 		//Go through every current state. Check to see if it is goal, if so update last goal.
 		//Also, add each state's advance to nextStates
 		for (std::vector<RegExState*>::size_type j = 0; j < currentStates.size(); j++) {
-			if (currentStates[j]->isGoal()) {
+			if (currentStates[j]->isGoal())
 				lastMatch = i;
-				//std::cout << "Hit goal at " << i << " character: " << stringToMatch[i-1] << std::endl;
-			} else {
-				//std::cout << "currentState " << j << ", " << currentStates[j]->toString() << " is not goal" <<std::endl;
-			}
-			std::vector<RegExState*>* addStates = currentStates[j]->advance(stringToMatch.at(i));
-			nextStates.insert(nextStates.end(), addStates->begin(), addStates->end());
-			delete addStates;
+			std::vector<RegExState*> addStates = currentStates[j]->advance(stringToMatch.at(i));
+			nextStates.insert(nextStates.end(), addStates.begin(), addStates.end());
 		}
 		//Now, clear our current states and add eaczh one of our addStates if it is not already in current states

@@ -20,11 +20,11 @@ bool RegExState::characterIs(char inCharacter) {
 	return character == inCharacter;
 }

-std::vector<RegExState*>* RegExState::advance(char advanceCharacter) {
-	std::vector<RegExState*>* advanceStates = new std::vector<RegExState*>();
+std::vector<RegExState*> RegExState::advance(char advanceCharacter) {
+	std::vector<RegExState*> advanceStates;
 	for (std::vector<RegExState*>::size_type i = 0; i < nextStates.size(); i++) {
 		if (nextStates[i] != NULL && nextStates[i]->characterIs(advanceCharacter))
-			advanceStates->push_back(nextStates[i]);
+			advanceStates.push_back(nextStates[i]);
 	}
 	return advanceStates;
 }
@@ -0,0 +1,11 @@
+
+fun to_char<T>(in: T) : char {
+    var out:char
+	__if_comp__ __C__ {
+		simple_passthrough(in = in: out = out:) """
+        char out = (char) in;
+		"""
+	}
+	return out;
+}
+
@@ -1,6 +1,8 @@
 import io
 import vector
 import string
+import mem
+import conversions

 fun regex(in: char*):regex {
    return regex(string::string(in))
@@ -19,11 +21,11 @@ obj regexState(Object) {
        return this
    }
    fun construct(): regexState* {
-        return construct(0)
+        return construct(conversions::to_char(0))
    }
    fun copy_construct(old:regexState*): void {
-        character = regexState->character
-        next_states.copy_construct(&regexState->next_states)
+        character = old->character
+        next_states.copy_construct(&old->next_states)
    }
    fun destruct():void {
        next_states.destruct()
@@ -37,17 +39,16 @@ obj regex(Object) {
    var regexString: string::string
    var begin: regexState
    fun construct(regexStringIn: string::string): regex* {
-        regexState.construct()
+        begin.construct()
        regexString.copy_construct(&regexStringIn)

        var traverse = &begin
        for (var i = 0; i < regexString.length(); i++;) {
-            var next = new<regexState>()->construct(regexString[i])
+            var next = mem::new<regexState>()->construct(regexString[i])
            traverse->next_states->add(next)
            traverse = next
        }
-        traverse->next_states->add(new<regexState>()->construct(1))
-
+        traverse->next_states->add(mem::new<regexState>()->construct(conversions::to_char(1)))
        return this
    }
    fun copy_construct(old:regex*):void {
@@ -61,13 +62,16 @@ obj regex(Object) {
    fun long_match(to_match: char*): int { return long_match(string::string(to_match)); }
    fun long_match(to_match: string::string): int {
        var next = vector::vector(&begin)
+        //var next.construct() :vector::vector<regexState*>
        var longest = 0
        for (var i = 0; i < to_match.length(); i++;) {
            if (next.size == 0)
                return longest
            if (next.any_true(fun(state: regexState*):bool { return state->character == 1; }))
                longest = i
-            next = next.flatten_map(fun(state: regexState*): vector::vector<regexState*> { return state->match(to_match[i]); })
+            //next = next.flatten_map<regexState*>(fun(state: regexState*): vector::vector<regexState*> { return state->match(to_match[i]); })
+            next = next.flatten_map<regexState*>(fun(state: regexState*): vector::vector<regexState*> { return state->match('a'); })
+            //next = next.flatten_map(fun(state: regexState*): vector::vector<regexState*> { return state->match('a'); })
        }
        if (next.any_true(fun(state: regexState*):bool { return state->character == 1; }))
            return to_match.length()
@@ -1,5 +1,5 @@
-import vector;
-import mem;
+import vector
+import mem

 fun string(in:char*):string {
    var out:string = in
@@ -2,6 +2,12 @@ import mem:*;
 import util:*;
 import io:*;

+fun vector<T>(in:T):vector<T> {
+    var out.construct():vector<T>
+    out.add(in)
+    return out
+}
+
 obj vector<T> (Object) {
    var data: T*;
    var size: int;
@@ -125,6 +131,13 @@ obj vector<T> (Object) {
        }
        return newVec
    }
+    fun filter(func: fun(T):bool):vector<T> {
+        var newVec.construct(): vector<T>
+        for (var i = 0; i < size; i++;)
+            if (func(data[i]))
+                newVec.addEnd(data[i])
+        return newVec
+    }
    fun any_true(func: fun(T):bool):bool {
        for (var i = 0; i < size; i++;)
            if (func(data[i]))
@@ -0,0 +1 @@
+A
@@ -0,0 +1,8 @@
+import io:*
+import conversions:*
+
+
+fun main():int {
+    println(to_char(65))
+    return 0
+}