From 07e54f67fb2336b7934253ab8325bbcc3acd6c79 Mon Sep 17 00:00:00 2001 From: Nathan Braswell Date: Wed, 8 Jul 2015 13:43:06 -0400 Subject: [PATCH] Changed regex to reference count internal structure instead of cloning because it too way too long. Added terminal decorators to grammer and lexer --- src/ASTTransformation.cpp | 2 ++ src/CGenerator.cpp | 4 ++-- stdlib/grammer.krak | 20 +++++++++++++++----- stdlib/lexer.krak | 24 ++++++++++++++++++------ stdlib/regex.krak | 21 +++++++++++++++++---- tests/grammer.kgm | 4 ++-- 6 files changed, 56 insertions(+), 19 deletions(-) diff --git a/src/ASTTransformation.cpp b/src/ASTTransformation.cpp index 6702db4..0a9e4d5 100644 --- a/src/ASTTransformation.cpp +++ b/src/ASTTransformation.cpp @@ -1099,6 +1099,7 @@ NodeTree* ASTTransformation::templateClassLookup(NodeTree* sco } if (!mostFittingTemplates.size()) { std::cout << "No template classes fit for " << lookup << "!" << std::endl; + std::cerr << "in file " << getUpperTranslationUnit(scope)->getDataRef()->symbol.getName() << std::endl; throw "No matching template classes"; } else if (mostFittingTemplates.size() > 1) { std::cout << "Multiple template classes fit with equal number of traits satisfied for " << lookup << "!" << std::endl; @@ -1304,6 +1305,7 @@ NodeTree* ASTTransformation::templateFunctionLookup(NodeTree* for (auto t : types) std::cerr << t.toString() + ", "; std::cerr << ")!" << std::endl; + std::cerr << "in file " << getUpperTranslationUnit(scope)->getDataRef()->symbol.getName() << std::endl; throw "No matching template functions"; } else if (mostFittingTemplates.size() > 1) { std::cerr << "Multiple template functions fit with equal number of traits satisfied for " << lookup << "!" << std::endl; diff --git a/src/CGenerator.cpp b/src/CGenerator.cpp index 76782a0..e11d600 100644 --- a/src/CGenerator.cpp +++ b/src/CGenerator.cpp @@ -652,7 +652,7 @@ CCodeTriple CGenerator::generate(NodeTree* from, NodeTree* enc if (unaliasedTypeDef) { //Test to see if the function's a member of this type_def, or if this is an alias, of the original type. Get this original type if it exists. std::string nameDecoration; std::vector*> functionDefChildren = children[2]->getChildren(); //The function def is the rhs of the access operation - std::cout << "Decorating (in access-should be object) " << name << " " << functionDefChildren.size() << std::endl; + //std::cout << "Decorating (in access-should be object) " << name << " " << functionDefChildren.size() << std::endl; for (int i = 0; i < (functionDefChildren.size() > 0 ? functionDefChildren.size()-1 : 0); i++) nameDecoration += "_" + ValueTypeToCTypeDecoration(functionDefChildren[i]->getData().valueType); // Note that we only add scoping to the object, as this specifies our member function too @@ -680,7 +680,7 @@ CCodeTriple CGenerator::generate(NodeTree* from, NodeTree* enc } else { //It's a normal function call, not a special one or a method or anything. Name decorate. std::vector*> functionDefChildren = children[0]->getChildren(); - std::cout << "Decorating (none-special)" << name << " " << functionDefChildren.size() << std::endl; + //std::cout << "Decorating (none-special)" << name << " " << functionDefChildren.size() << std::endl; std::string nameDecoration; for (int i = 0; i < (functionDefChildren.size() > 0 ? functionDefChildren.size()-1 : 0); i++) nameDecoration += "_" + ValueTypeToCTypeDecoration(functionDefChildren[i]->getData().valueType); diff --git a/stdlib/grammer.krak b/stdlib/grammer.krak index a00e6ff..b9c4ff9 100644 --- a/stdlib/grammer.krak +++ b/stdlib/grammer.krak @@ -4,6 +4,7 @@ import set import symbol import regex import io +import util fun split_into_words(gram_str: string::string): vector::vector { var out.construct(): vector::vector @@ -56,6 +57,7 @@ fun load_grammer(gram_str: string::string): grammer { /*})*/ /*return gram*/ split_into_words(gram_str).for_each(fun(word: string::string) { + io::print("word: "); io::println(word) if (word == "=") { // do nothing } else if (word == "|") { @@ -70,9 +72,17 @@ fun load_grammer(gram_str: string::string): grammer { leftSide = symbol::symbol(word, true) else if (word[0] == '"') { - rightSide.add(symbol::symbol(word.slice(1,-2), true)) - /*gram.regexs.add_unique(regex::regex(word.slice(1,-2)))*/ - gram.regexs.add(regex::regex(word.slice(1,-2))) + // ok, we support both plain terminals "hia*" + // and decorated terminals "hia*":hi_with_as + // so first check to find the ending " and see if it's + // the end of the string + var last_quote = word.length()-1 + while(word[last_quote] != '"') last_quote-- + rightSide.add(symbol::symbol(word.slice(1,last_quote), true)) + if (last_quote != word.length()-1) + gram.regexs.add(util::make_pair(word.slice(last_quote+2, -1), regex::regex(word.slice(1,last_quote)))) + else + gram.regexs.add(util::make_pair(word, regex::regex(word.slice(1,last_quote)))) } else { rightSide.add(symbol::symbol(word, false)) } @@ -84,7 +94,7 @@ fun load_grammer(gram_str: string::string): grammer { obj grammer (Object) { var rules: vector::vector - var regexs: vector::vector + var regexs: vector::vector> fun construct(): *grammer { rules.construct() @@ -107,7 +117,7 @@ obj grammer (Object) { var result = string::string("grammer rules:") rules.for_each( fun(i : rule) { result += string::string("\n\t") + i.to_string(); } ) result += "\nregexs:" - regexs.for_each( fun(i : regex::regex) { result += string::string("\n\t") + i.regexString; } ) + regexs.for_each( fun(i : util::pair) { result += string::string("\n\t") + i.first + ": " + i.second.regexString; } ) return result } } diff --git a/stdlib/lexer.krak b/stdlib/lexer.krak index fc9503a..bce56b4 100644 --- a/stdlib/lexer.krak +++ b/stdlib/lexer.krak @@ -5,13 +5,19 @@ import vector import util fun lexer(regs: vector::vector): lexer { + return lexer(regs.map( fun(reg: regex::regex): util::pair { + return util::make_pair(reg.regexString,reg) + })) +} + +fun lexer(regs: vector::vector>): lexer { var toRet.construct() :lexer - regs.for_each( fun(reg: regex::regex) toRet.add_regex(reg); ) + regs.for_each( fun(reg: util::pair) toRet.add_regex(reg); ) return toRet } obj lexer (Object) { - var regs: vector::vector + var regs: vector::vector> var input: string::string var position: int fun construct(): *lexer { @@ -33,11 +39,17 @@ obj lexer (Object) { destruct() copy_construct(&old) } - fun add_regex(newOne: regex::regex) { + fun add_regex(name: string::string, newOne: regex::regex) { + regs.add(util::make_pair(name,newOne)) + } + fun add_regex(newOne: util::pair) { regs.add(newOne) } + fun add_regex(newOne: regex::regex) { + regs.add(util::make_pair(newOne.regexString, newOne)) + } fun add_regex(newOne: *char) { - regs.add(regex::regex(newOne)) + regs.add(util::make_pair(string::string(newOne), regex::regex(newOne))) } fun set_input(in: string::string) { input = in @@ -45,8 +57,8 @@ obj lexer (Object) { fun next(): symbol::symbol { if (position >= input.length()) return symbol::symbol("$EOF$", true) - var max = regs.map(fun(reg: regex::regex): util::pair { - return util::make_pair(reg.long_match(input.slice(position, -1)), reg.regexString); }) + var max = regs.map(fun(reg_pair: util::pair): util::pair { + return util::make_pair(reg_pair.second.long_match(input.slice(position, -1)), reg_pair.first); }) .max(fun(first: util::pair, second: util::pair): bool { return first.first < second.first; }) if (max.first < 0) diff --git a/stdlib/regex.krak b/stdlib/regex.krak index e06247b..f205b61 100644 --- a/stdlib/regex.krak +++ b/stdlib/regex.krak @@ -43,9 +43,16 @@ obj regexState (Object) { obj regex (Object) { var regexString: string::string var begin: *regexState + var referenceCounter: *int + fun construct(): *regex { + regexString.construct() + return this + } fun construct(regexStringIn: string::string): *regex { regexString.copy_construct(®exStringIn) + referenceCounter = mem::new() + *referenceCounter = 1 var beginningAndEnd = compile(regexStringIn) // init our begin, and the end state as the next state of each end @@ -56,9 +63,11 @@ obj regex (Object) { } fun copy_construct(old:*regex):void { - construct(old->regexString) - /*begin = old->begin*/ - /*regexString.copy_construct(&old->regexString)*/ + regexString.copy_construct(&old->regexString) + begin = old->begin + referenceCounter = old->referenceCounter + *referenceCounter += 1 + /*construct(old->regexString)*/ /*begin = mem::safe_recursive_clone(old->begin, fun(it: *regexState, cloner: fun(*regexState):*regexState, register: fun(*regexState):void): void {*/ /*var newOne = mem::new()->construct(it->character)*/ /*register(newOne)*/ @@ -70,7 +79,11 @@ obj regex (Object) { fun destruct():void { regexString.destruct() - mem::safe_recursive_delete(begin, fun(it: *regexState): set::set<*regexState> { return set::from_vector(it->next_states); } ) + *referenceCounter -= 1 + if (*referenceCounter == 0) { + mem::safe_recursive_delete(begin, fun(it: *regexState): set::set<*regexState> { return set::from_vector(it->next_states); } ) + mem::delete(referenceCounter) + } } fun operator==(other: regex):bool { diff --git a/tests/grammer.kgm b/tests/grammer.kgm index d35e1bc..dcc387f 100644 --- a/tests/grammer.kgm +++ b/tests/grammer.kgm @@ -1,7 +1,7 @@ # comment a = b ; -b = "c" ; -b = c "d" ; +b = "c":named_c ; +b = c "d":dname ; c = "a" | d ; d = "has space" ; d = "has