From d38fd32323fe372873e2742f44e415afcf525c92 Mon Sep 17 00:00:00 2001 From: Nathan Braswell Date: Thu, 7 Nov 2013 22:19:33 -0500 Subject: [PATCH 01/25] More AST stuff. Now supports boolean stuff --- krakenGrammer.kgm | 21 +++++++++++---------- main.cpp | 2 ++ src/ASTTransformation.cpp | 29 +++++++++++++++++++++++++++-- src/CGenerator.cpp | 8 ++++---- 4 files changed, 44 insertions(+), 16 deletions(-) diff --git a/krakenGrammer.kgm b/krakenGrammer.kgm index 4689ab8..27c818d 100644 --- a/krakenGrammer.kgm +++ b/krakenGrammer.kgm @@ -28,29 +28,30 @@ typed_parameter = type WS parameter ; opt_parameter_list = parameter_list | ; parameter_list = parameter_list WS parameter | parameter ; -parameter = expression ; +parameter = boolean_expression ; code_block = "{" WS statement_list WS "}" ; statement_list = statement_list WS statement | statement ; -statement = if_statement | return_statement | expression WS ";" | boolean_expression WS ";" | assignment_statement WS ";" | declaration_statement WS ";" | code_block ; +statement = if_statement | return_statement | boolean_expression WS ";" | boolean_expression WS ";" | assignment_statement WS ";" | declaration_statement WS ";" | code_block ; function_call = scope identifier "\(" WS opt_parameter_list WS "\)" ; scope = scope identifier "::" | ; -if_statement = "if" WS boolean_expression WS statement | "if" WS "\(" WS boolean_expression WS "\)" WS statement ; +if_statement = "if" WS boolean_expression WS statement ; + +return_statement = "return" WS boolean_expression WS ";" ; boolean_expression = boolean_expression WS "\|\|" WS and_boolean_expression | and_boolean_expression ; and_boolean_expression = and_boolean_expression "&&" bool_exp | bool_exp ; -bool_exp = "!" WS bool_exp | expression WS "==" WS expression | bool ; - -return_statement = "return" WS "\(" WS expression WS "\)" WS ";" | "return" WS expression WS ";" ; +bool_exp = "!" WS bool_exp | expression WS comparator WS expression | bool | expression ; +comparator = "==" | "<=" | ">=" | "!=" ; expression = expression WS "-" WS term | expression WS "\+" WS term | term ; term = term WS forward_slash WS factor | term WS "\*" WS factor | factor ; -factor = number | identifier | function_call | bool | string ; +factor = number | identifier | function_call | bool | string | "\(" WS boolean_expression WS "\)" ; number = integer | float | double ; -assignment_statement = identifier WS "=" WS expression ; -declaration_statement = type WS identifier WS "=" WS expression ; +assignment_statement = identifier WS "=" WS boolean_expression ; +declaration_statement = type WS identifier WS "=" WS boolean_expression ; alphanumeric = alphanumeric numeric | alphanumeric alpha | numeric | alpha ; hexadecimal = "0x(1|2|3|4|5|6|7|8|9|a|b|c|d|e|f)+" ; @@ -61,4 +62,4 @@ double = sign numeric "." numeric | sign numeric "." numeric "d" ; bool = "true" | "false" | "True" | "False" ; alpha = "(a|b|c|d|e|f|g|h|i|j|k|l|m|n|o|p|q|r|s|t|u|v|w|x|y|z|A|B|C|D|E|F|G|H|I|J|K|L|M|N|O|P|Q|R|S|T|U|V|W|X|Y|Z|.|_)+" ; numeric = "(0|1|2|3|4|5|6|7|8|9)+" ; -string = "\"(a|b|c|d|e|f|g|h|i|j|k|l|m|n|o|p|q|r|s|t|u|v|w|x|y|z|A|B|C|D|E|F|G|H|I|J|K|L|M|N|O|P|Q|R|S|T|U|V|W|X|Y|Z|.|!|_|-| | |\\|/|\||0|1|2|3|4|5|6|7|8|9)+\"" ; \ No newline at end of file +string = "\"(a|b|c|d|e|f|g|h|i|j|k|l|m|n|o|p|q|r|s|t|u|v|w|x|y|z|A|B|C|D|E|F|G|H|I|J|K|L|M|N|O|P|Q|R|S|T|U|V|W|X|Y|Z|.|!|\?|_|-| | |\\|/|\||0|1|2|3|4|5|6|7|8|9)+\"" ; \ No newline at end of file diff --git a/main.cpp b/main.cpp index f94a99f..a072818 100644 --- a/main.cpp +++ b/main.cpp @@ -121,6 +121,8 @@ int main(int argc, char* argv[]) { preASTTransforms.push_back(new RemovalTransformation(Symbol(";", true))); preASTTransforms.push_back(new RemovalTransformation(Symbol("{", true))); preASTTransforms.push_back(new RemovalTransformation(Symbol("}", true))); + preASTTransforms.push_back(new RemovalTransformation(Symbol("(", true))); + preASTTransforms.push_back(new RemovalTransformation(Symbol(")", true))); preASTTransforms.push_back(new RemovalTransformation(Symbol("import", true))); //Don't need the actual text of the symbol preASTTransforms.push_back(new RemovalTransformation(Symbol("interpreter_directive", false))); preASTTransforms.push_back(new RemovalTransformation(Symbol("if", true))); diff --git a/src/ASTTransformation.cpp b/src/ASTTransformation.cpp index 8c51ef9..36cca8a 100644 --- a/src/ASTTransformation.cpp +++ b/src/ASTTransformation.cpp @@ -34,6 +34,33 @@ NodeTree* ASTTransformation::transform(NodeTree* from) { newNode = transform(children[1]); //Transform to get the identifier newNode->getDataRef()->valueType = ASTData::strToType(concatSymbolTree(children[0])); //Get the type (left child) and set our new identifer to be that type return newNode; + } else if (name == "boolean_expression") { + //If this is an actual part of an expression, not just a premoted term + if (children.size() > 1) { + std::string functionCallName = concatSymbolTree(children[1]); + newNode = new NodeTree(functionCallName, ASTData(function_call, Symbol(functionCallName, true))); + skipChildren.insert(1); + } else { + return transform(children[0]); //Just a promoted term, so do child + } + } else if (name == "and_boolean_expression") { + //If this is an actual part of an expression, not just a premoted bool_exp + if (children.size() > 1) { + std::string functionCallName = concatSymbolTree(children[1]); + newNode = new NodeTree(functionCallName, ASTData(function_call, Symbol(functionCallName, true))); + skipChildren.insert(1); + } else { + return transform(children[0]); //Just a promoted bool_exp, so do child + } + } else if (name == "bool_exp") { + //If this is an actual part of an expression, not just a premoted bool_exp. + if (children.size() > 1) { + std::string functionCallName = concatSymbolTree(children[1]); + newNode = new NodeTree(functionCallName, ASTData(function_call, Symbol(functionCallName, true))); + skipChildren.insert(1); + } else { + return transform(children[0]); //Just a promoted bool_exp, so do child + } } else if (name == "expression") { //If this is an actual part of an expression, not just a premoted term if (children.size() > 1) { @@ -54,8 +81,6 @@ NodeTree* ASTTransformation::transform(NodeTree* from) { } } else if (name == "factor") { return transform(children[0]); //Just a premoted number or function call or something, so use it instead - } else if (name == "boolean_expression") { - newNode = new NodeTree(name, ASTData(boolean_expression)); } else if (name == "statement") { newNode = new NodeTree(name, ASTData(statement)); } else if (name == "if_statement") { diff --git a/src/CGenerator.cpp b/src/CGenerator.cpp index 04c9d79..abef745 100644 --- a/src/CGenerator.cpp +++ b/src/CGenerator.cpp @@ -26,7 +26,7 @@ std::string CGenerator::generate(NodeTree* from) { //Do nothing break; case import: - return "#include \"" + data.symbol.getName() + "\"\n"; + return "#include <" + data.symbol.getName() + ">\n"; break; case identifier: return data.symbol.getName(); @@ -42,7 +42,7 @@ std::string CGenerator::generate(NodeTree* from) { return output; break; case code_block: - output += tabs() + "{\n"; + output += "{\n"; tabLevel++; for (int i = 0; i < children.size(); i++) output += generate(children[i]); @@ -60,7 +60,7 @@ std::string CGenerator::generate(NodeTree* from) { return tabs() + generate(children[0]) + ";\n"; break; case if_statement: - output += "if (" + generate(children[0]) + ") \n" + generate(children[1]); + output += "if (" + generate(children[0]) + ")\n\t" + generate(children[1]); if (children.size() > 2) output += " else " + generate(children[2]); return output; @@ -76,7 +76,7 @@ std::string CGenerator::generate(NodeTree* from) { //Handle operators specially for now. Will later replace with //Inlined functions in the standard library std::string name = data.symbol.getName(); - if (name == "+" || name == "-" || name == "*" || name == "/") { + if (name == "+" || name == "-" || name == "*" || name == "/" || name == "==" || name == ">=" || name == "<=" || name == "!=") { return "((" + generate(children[0]) + ")" + name + "(" + generate(children[1]) + "))"; } output += data.symbol.getName() + "("; From f273deaedc0ddcfaf22a73578a7604b622337c0d Mon Sep 17 00:00:00 2001 From: Nathan Braswell Date: Wed, 18 Dec 2013 18:05:21 -0600 Subject: [PATCH 02/25] FINALLY fixed an error that took weeks. Turned out that the ParseRule was shallow copied, and the lookahead was not copied correctly. So it got extended and thus skipped over the state when it should have been redone. --- include/ASTData.h | 2 +- krakenGrammer.kgm | 34 +++++++++++++++++++++------------- main.cpp | 2 +- src/ASTData.cpp | 20 ++++---------------- src/ASTTransformation.cpp | 34 ++++++++++++++++++++++------------ src/CGenerator.cpp | 16 +++++++--------- src/ParseRule.cpp | 7 ++++++- src/Parser.cpp | 12 ++++++++---- src/RNGLRParser.cpp | 10 ++++++++-- src/State.cpp | 10 ++++------ src/Table.cpp | 7 ++++--- 11 files changed, 86 insertions(+), 68 deletions(-) diff --git a/include/ASTData.h b/include/ASTData.h index 7daf15d..b025ea1 100644 --- a/include/ASTData.h +++ b/include/ASTData.h @@ -12,7 +12,7 @@ enum ASTType {undef, translation_unit, interpreter_directive, import, identifier, function, code_block, typed_parameter, expression, boolean_expression, statement, - if_statement, return_statement, assignment_statement, declaration_statement, + if_statement, while_loop, for_loop, return_statement, assignment_statement, declaration_statement, function_call, value}; enum ValueType {none, boolean, integer, floating, double_percision, char_string }; diff --git a/krakenGrammer.kgm b/krakenGrammer.kgm index 27c818d..386b738 100644 --- a/krakenGrammer.kgm +++ b/krakenGrammer.kgm @@ -27,30 +27,38 @@ typed_parameter_list = typed_parameter_list WS "," WS typed_parameter | typed_pa typed_parameter = type WS parameter ; opt_parameter_list = parameter_list | ; -parameter_list = parameter_list WS parameter | parameter ; +parameter_list = parameter_list WS "," WS parameter | parameter ; parameter = boolean_expression ; -code_block = "{" WS statement_list WS "}" ; -statement_list = statement_list WS statement | statement ; -statement = if_statement | return_statement | boolean_expression WS ";" | boolean_expression WS ";" | assignment_statement WS ";" | declaration_statement WS ";" | code_block ; -function_call = scope identifier "\(" WS opt_parameter_list WS "\)" ; -scope = scope identifier "::" | ; +if_statement = "if" WS "\(" WS boolean_expression WS "\)" WS statement ; -if_statement = "if" WS boolean_expression WS statement ; +while_loop = "while" WS boolean_expression WS statement ; + +for_update = "\(" WS statement_list WS "\)" ; +for_loop = "for" WS for_update WS statement ; return_statement = "return" WS boolean_expression WS ";" ; +code_block = "{" WS statement_list WS "}" ; + +statement_list = statement_list WS statement | statement ; +statement = if_statement | while_loop | for_loop | return_statement | boolean_expression WS ";" | assignment_statement WS ";" | declaration_statement WS ";" | code_block ; +function_call = scope identifier "\(" WS opt_parameter_list WS "\)" ; +scope = scope identifier "::" | ; + boolean_expression = boolean_expression WS "\|\|" WS and_boolean_expression | and_boolean_expression ; and_boolean_expression = and_boolean_expression "&&" bool_exp | bool_exp ; bool_exp = "!" WS bool_exp | expression WS comparator WS expression | bool | expression ; -comparator = "==" | "<=" | ">=" | "!=" ; +comparator = "==" | "<=" | ">=" | "!=" | "<" | ">" ; -expression = expression WS "-" WS term | expression WS "\+" WS term | term ; -term = term WS forward_slash WS factor | term WS "\*" WS factor | factor ; -factor = number | identifier | function_call | bool | string | "\(" WS boolean_expression WS "\)" ; +expression = expression WS "<<" WS term | expression WS ">>" WS shiftand | shiftand ; +shiftand = shiftand WS "-" WS term | shiftand WS "\+" WS term | term ; +term = term WS forward_slash WS factor | term WS "\*" WS factor | term WS "%" WS factor | factor ; +factor = "\+\+" WS unarad | unarad WS "\+\+" | "--" WS unarad | unarad WS "--" | "\+" WS unarad | "-" WS unarad | "!" WS unarad | "~" WS unarad | "\(" WS type WS "\)" WS unarad | "\*" WS unarad | "&" WS unarad | unarad ; +unarad = number | identifier | function_call | bool | string | "\(" WS boolean_expression WS "\)" ; number = integer | float | double ; -assignment_statement = identifier WS "=" WS boolean_expression ; +assignment_statement = identifier WS "=" WS boolean_expression | identifier WS "+=" WS boolean_expression | identifier WS "-=" WS boolean_expression | identifier WS "\*=" WS boolean_expression | identifier WS "/=" WS boolean_expression ; declaration_statement = type WS identifier WS "=" WS boolean_expression ; alphanumeric = alphanumeric numeric | alphanumeric alpha | numeric | alpha ; @@ -60,6 +68,6 @@ integer = sign numeric | sign hexadecimal | "null" ; float = sign numeric "." numeric "f" ; double = sign numeric "." numeric | sign numeric "." numeric "d" ; bool = "true" | "false" | "True" | "False" ; -alpha = "(a|b|c|d|e|f|g|h|i|j|k|l|m|n|o|p|q|r|s|t|u|v|w|x|y|z|A|B|C|D|E|F|G|H|I|J|K|L|M|N|O|P|Q|R|S|T|U|V|W|X|Y|Z|.|_)+" ; +alpha = "(a|b|c|d|e|f|g|h|i|j|k|l|m|n|o|p|q|r|s|t|u|v|w|x|y|z|A|B|C|D|E|F|G|H|I|J|K|L|M|N|O|P|Q|R|S|T|U|V|W|X|Y|Z|_)+" ; numeric = "(0|1|2|3|4|5|6|7|8|9)+" ; string = "\"(a|b|c|d|e|f|g|h|i|j|k|l|m|n|o|p|q|r|s|t|u|v|w|x|y|z|A|B|C|D|E|F|G|H|I|J|K|L|M|N|O|P|Q|R|S|T|U|V|W|X|Y|Z|.|!|\?|_|-| | |\\|/|\||0|1|2|3|4|5|6|7|8|9)+\"" ; \ No newline at end of file diff --git a/main.cpp b/main.cpp index a072818..0cce3f8 100644 --- a/main.cpp +++ b/main.cpp @@ -88,7 +88,7 @@ int main(int argc, char* argv[]) { //std::cout << "Doing stateSetToString from Main" << std::endl; // std::cout << "\n\n\n\n\n\n\n\n\n\nState Set toString" << std::endl; // std::cout << parser.stateSetToString() << std::endl; - // std::cout << "finished stateSetToString from Main" << std::endl; + // std::cout << "finished stateSetToString from Main" << std::endl; // std::cout << "\n\n\n\n\n\n\n\n\n\nTable" << std::endl; // std::cout << parser.tableToString() << std::endl; // std::cout << "\n\n\n\n\n\n\n\n\n\nGrammer Input File" << std::endl; diff --git a/src/ASTData.cpp b/src/ASTData.cpp index 7a4bb39..81c0bda 100644 --- a/src/ASTData.cpp +++ b/src/ASTData.cpp @@ -66,52 +66,40 @@ std::string ASTData::ASTTypeToString(ASTType type) { switch (type) { case translation_unit: return "translation_unit"; - break; case interpreter_directive: return "interpreter_directive"; - break; case identifier: return "identifier"; - break; case import: return "import"; - break; case function: return "function"; - break; case code_block: return "code_block"; - break; case typed_parameter: return "typed_parameter"; - break; case expression: return "expression"; - break; case boolean_expression: return "boolean_expression"; - break; case statement: return "statement"; - break; case if_statement: return "if_statement"; - break; + case while_loop: + return "while_loop"; + case for_loop: + return "for_loop"; case return_statement: return "return_statement"; - break; case assignment_statement: return "assignment_statement"; - break; case declaration_statement: return "declaration_statement"; - break; case function_call: return "function_call"; - break; case value: return "value"; - break; default: return "unknown_ASTType"; } diff --git a/src/ASTTransformation.cpp b/src/ASTTransformation.cpp index 36cca8a..2802b20 100644 --- a/src/ASTTransformation.cpp +++ b/src/ASTTransformation.cpp @@ -61,23 +61,15 @@ NodeTree* ASTTransformation::transform(NodeTree* from) { } else { return transform(children[0]); //Just a promoted bool_exp, so do child } - } else if (name == "expression") { - //If this is an actual part of an expression, not just a premoted term + //Here's the order of ops stuff + } else if (name == "expression" || name == "shiftand" || name == "term" || name == "unarad") { + //If this is an actual part of an expression, not just a premoted child if (children.size() > 1) { std::string functionCallName = concatSymbolTree(children[1]); newNode = new NodeTree(functionCallName, ASTData(function_call, Symbol(functionCallName, true))); skipChildren.insert(1); } else { - return transform(children[0]); //Just a promoted term, so do child - } - } else if (name == "term") { - //If this is an actual part of an expression, not just a premoted factor - if (children.size() > 1) { - std::string functionCallName = concatSymbolTree(children[1]); - newNode = new NodeTree(functionCallName, ASTData(function_call, Symbol(functionCallName, true))); - skipChildren.insert(1); - } else { - return transform(children[0]); //Just a promoted factor, so do child + return transform(children[0]); //Just a promoted child, so do it instead } } else if (name == "factor") { return transform(children[0]); //Just a premoted number or function call or something, so use it instead @@ -85,10 +77,28 @@ NodeTree* ASTTransformation::transform(NodeTree* from) { newNode = new NodeTree(name, ASTData(statement)); } else if (name == "if_statement") { newNode = new NodeTree(name, ASTData(if_statement)); + } else if (name == "while_loop") { + newNode = new NodeTree(name, ASTData(while_loop)); + } else if (name == "for_loop") { + newNode = new NodeTree(name, ASTData(for_loop)); } else if (name == "return_statement") { newNode = new NodeTree(name, ASTData(return_statement)); } else if (name == "assignment_statement") { newNode = new NodeTree(name, ASTData(assignment_statement)); + std::string assignFuncName = concatSymbolTree(children[1]); + if (assignFuncName == "=") { + newNode->addChild(transform(children[0])); + newNode->addChild(transform(children[2])); + } else { + //For assignments like += or *=, expand the syntatic sugar. + NodeTree* lhs = transform(children[0]); + NodeTree* childCall = new NodeTree(assignFuncName, ASTData(function_call, Symbol(assignFuncName, true))); + childCall->addChild(lhs); + childCall->addChild(transform(children[2])); + newNode->addChild(lhs); + newNode->addChild(childCall); + } + return newNode; } else if (name == "declaration_statement") { newNode = new NodeTree(name, ASTData(declaration_statement)); NodeTree* newIdentifier = transform(children[1]); //Transform the identifier diff --git a/src/CGenerator.cpp b/src/CGenerator.cpp index abef745..45e25e3 100644 --- a/src/CGenerator.cpp +++ b/src/CGenerator.cpp @@ -27,10 +27,8 @@ std::string CGenerator::generate(NodeTree* from) { break; case import: return "#include <" + data.symbol.getName() + ">\n"; - break; case identifier: return data.symbol.getName(); - break; case function: output += "\n" + ValueTypeToCType(data.valueType) + " " + data.symbol.getName() + "("; for (int i = 0; i < children.size()-1; i++) { @@ -40,7 +38,6 @@ std::string CGenerator::generate(NodeTree* from) { } output+= ")\n" + generate(children[children.size()-1]); return output; - break; case code_block: output += "{\n"; tabLevel++; @@ -49,22 +46,23 @@ std::string CGenerator::generate(NodeTree* from) { tabLevel--; output += tabs() + "}"; return output; - break; case expression: output += " " + data.symbol.getName() + ", "; - break; case boolean_expression: output += " " + data.symbol.getName() + " "; - break; case statement: return tabs() + generate(children[0]) + ";\n"; - break; case if_statement: output += "if (" + generate(children[0]) + ")\n\t" + generate(children[1]); if (children.size() > 2) output += " else " + generate(children[2]); return output; - break; + case while_loop: + output += "if (" + generate(children[0]) + ")\n\t" + generate(children[1]); + return output; + case for_loop: + output += "if (" + generate(children[0]) + ")\n\t" + generate(children[1]); + return output; case return_statement: return "return " + generate(children[0]); case assignment_statement: @@ -76,7 +74,7 @@ std::string CGenerator::generate(NodeTree* from) { //Handle operators specially for now. Will later replace with //Inlined functions in the standard library std::string name = data.symbol.getName(); - if (name == "+" || name == "-" || name == "*" || name == "/" || name == "==" || name == ">=" || name == "<=" || name == "!=") { + if (name == "+" || name == "-" || name == "*" || name == "/" || name == "==" || name == ">=" || name == "<=" || name == "!=" || name == "%") { return "((" + generate(children[0]) + ")" + name + "(" + generate(children[1]) + "))"; } output += data.symbol.getName() + "("; diff --git a/src/ParseRule.cpp b/src/ParseRule.cpp index ea96c50..e27208a 100644 --- a/src/ParseRule.cpp +++ b/src/ParseRule.cpp @@ -29,7 +29,12 @@ const bool ParseRule::operator!=(const ParseRule &other) { } ParseRule* ParseRule::clone() { - return( new ParseRule(leftHandle, pointerIndex, rightSide, lookahead) ); + std::vector* newLookahead = NULL; + if (lookahead) { + newLookahead = new std::vector(); + *newLookahead = *lookahead; + } + return( new ParseRule(leftHandle, pointerIndex, rightSide, newLookahead) ); } void ParseRule::setLeftHandle(Symbol leftHandle) { diff --git a/src/Parser.cpp b/src/Parser.cpp index 83cbfaa..f8236c5 100644 --- a/src/Parser.cpp +++ b/src/Parser.cpp @@ -88,7 +88,7 @@ void Parser::createStateSet() { std::queue* toDo = new std::queue(); toDo->push(zeroState); //std::cout << "Begining for main set for loop" << std::endl; - while (toDo->front()) { + while (toDo->size()) { //closure closure(toDo->front()); //Add the new states @@ -181,7 +181,7 @@ std::vector* Parser::incrementiveFollowSet(ParseRule* rule) { } } followSet->insert(followSet->end(), symbolFirstSet->begin(), symbolFirstSet->end()); - //delete symbolFirstSet; + delete symbolFirstSet; rule->advancePointer(); } if (rule->isAtEnd()) { @@ -209,10 +209,13 @@ void Parser::closure(State* state) { std::vector* stateTotal = state->getTotal(); for (std::vector::size_type i = 0; i < stateTotal->size(); i++) { ParseRule* currentStateRule = (*stateTotal)[i]; + //If it's at it's end, move on. We can't advance it. + if(currentStateRule->isAtEnd()) + continue; for (std::vector::size_type j = 0; j < loadedGrammer.size(); j++) { //If the current symbol in the rule is not null (rule completed) and it equals a grammer's left side ParseRule* currentGramRule = loadedGrammer[j]->clone(); - if ( !currentStateRule->isAtEnd() && currentStateRule->getAtNextIndex() == currentGramRule->getLeftSide()) { + if (currentStateRule->getAtNextIndex() == currentGramRule->getLeftSide()) { //std::cout << (*stateTotal)[i]->getAtNextIndex()->toString() << " has an applicable production " << loadedGrammer[j]->toString() << std::endl; //Now, add the correct lookahead. This followSet is built based on the current rule's lookahead if at end, or the next Symbol's first set. //std::cout << "Setting lookahead for " << currentGramRule->toString() << " in state " << state->toString() << std::endl; @@ -225,6 +228,7 @@ void Parser::closure(State* state) { //std::cout << (*stateTotal)[k]->toString() << std::endl; (*stateTotal)[k]->addLookahead(currentGramRule->getLookahead()); isAlreadyInState = true; + delete currentGramRule; break; } } @@ -311,7 +315,7 @@ void Parser::addStates(std::vector< State* >* stateSets, State* state, std::queu std::string Parser::stateSetToString() { std::string concat = ""; for (std::vector< State *>::size_type i = 0; i < stateSets.size(); i++) { - concat += stateSets[i]->toString(); + concat += intToString(i) + " is " + stateSets[i]->toString(); } return concat; } diff --git a/src/RNGLRParser.cpp b/src/RNGLRParser.cpp index b413321..ec64df5 100644 --- a/src/RNGLRParser.cpp +++ b/src/RNGLRParser.cpp @@ -81,7 +81,10 @@ NodeTree* RNGLRParser::parseInput(std::string inputString) { std::cout << "Nearby is:" << std::endl; int range = 5; for (int j = (i-range >= 0 ? i-range : 0); j < (i+range < input.size() ? i+range : input.size()); j++) - std::cout << input[j].toString() << " "; + if (j == i) + std::cout << "||*||*||" << input[j].toString() << "||*||*|| "; + else + std::cout << input[j].toString() << " "; std::cout << std::endl; break; } @@ -339,11 +342,13 @@ void RNGLRParser::addStates(std::vector< State* >* stateSets, State* state, std: //if (newStates[i]->basisEquals(*((*stateSets)[j]))) { stateAlreadyInAllStates = true; //If it does exist, we should add it as the shift/goto in the action table + //std::cout << "newStates[" << i << "] == stateSets[" << j << "]" << std::endl; if (!((*stateSets)[j]->basisEquals(*(newStates[i])))) toDo->push((*stateSets)[j]); (*stateSets)[j]->combineStates(*(newStates[i])); + //std::cout << j << "\t Hay, doing an inside loop state reductions!" << std::endl; addStateReductionsToTable((*stateSets)[j]); table.add(stateNum(state), currStateSymbol, new ParseAction(ParseAction::SHIFT, j)); @@ -368,8 +373,9 @@ void RNGLRParser::addStateReductionsToTable(State* state) { //Also, this really only needs to be done for the state's basis, but we're already iterating through, so... std::vector* lookahead = (*currStateTotal)[i]->getLookahead(); if ((*currStateTotal)[i]->isAtEnd()) { - for (std::vector::size_type j = 0; j < lookahead->size(); j++) + for (std::vector::size_type j = 0; j < lookahead->size(); j++) { table.add(stateNum(state), (*lookahead)[j], new ParseAction(ParseAction::REDUCE, (*currStateTotal)[i])); + } //If this has an appropriate ruduction to null, get the reduce trees out } else if (reducesToNull((*currStateTotal)[i])) { //std::cout << (*currStateTotal)[i]->toString() << " REDUCES TO NULL" << std::endl; diff --git a/src/State.cpp b/src/State.cpp index b77caf1..012d468 100644 --- a/src/State.cpp +++ b/src/State.cpp @@ -79,12 +79,9 @@ void State::combineStates(State &other) { std::vector* State::getTotal() { total.clear(); - for (std::vector::size_type i = 0; i < basis.size(); i++) { - total.push_back(basis[i]); - } - for (std::vector::size_type i = 0; i < remaining.size(); i++) { - total.push_back(remaining[i]); - } + //std::cout << "Vector will be " << basis.size() << " + " << remaining.size() << std::endl; + total.insert(total.begin(), basis.begin(), basis.end()); + total.insert(total.end(), remaining.begin(), remaining.end()); return(&total); } std::vector* State::getBasis() { @@ -111,6 +108,7 @@ void State::addRuleCombineLookahead(ParseRule* rule) { if (rule->equalsExceptLookahead(*(total[i]))) { total[i]->addLookahead(rule->getLookahead()); alreadyIn = true; + break; } } if (!alreadyIn) diff --git a/src/Table.cpp b/src/Table.cpp index 690ebd2..a94d6b2 100644 --- a/src/Table.cpp +++ b/src/Table.cpp @@ -128,7 +128,7 @@ std::vector* Table::get(int state, Symbol token) { action->push_back(new ParseAction(ParseAction::ACCEPT)); } - //If ourside the symbol range of this state (same as NULL), reject + //If outside the symbol range of this state (same as NULL), reject if ( symbolIndex >= table[state]->size() ) { action = new std::vector(); action->push_back(new ParseAction(ParseAction::REJECT)); @@ -141,7 +141,7 @@ std::vector* Table::get(int state, Symbol token) { } //Otherwise, we have something, so return it - return (action); + return action; } ParseAction* Table::getShift(int state, Symbol token) { @@ -163,8 +163,9 @@ std::string Table::toString() { concat += "\n"; for (std::vector< std::vector< std::vector< ParseRule* >* >* >::size_type i = 0; i < table.size(); i++) { - concat += intToString(i) + "\t"; + concat += intToString(i) + " is the state\t"; for (std::vector< std::vector< ParseRule* >* >::size_type j = 0; j < table[i]->size(); j++) { + concat += "for " + symbolIndexVec[j].toString() + " do "; if ( (*(table[i]))[j] != NULL) { for (std::vector< ParseRule* >::size_type k = 0; k < (*(table[i]))[j]->size(); k++) { concat += (*((*(table[i]))[j]))[k]->toString() + "\t"; From 6ad406e42de51e2184102d1c22133f4e08eae415 Mon Sep 17 00:00:00 2001 From: Nathan Braswell Date: Thu, 19 Dec 2013 10:39:36 -0600 Subject: [PATCH 03/25] Small fixes to the grammer, ASTTransformation and CGenerator. Should now be ready to begin implementation of multiple files, conditional inclusion, and code passthrough. --- include/CGenerator.h | 2 ++ include/util.h | 1 + krakenGrammer.kgm | 11 +++++------ main.cpp | 2 ++ src/ASTTransformation.cpp | 7 +++---- src/CGenerator.cpp | 9 ++++++--- src/util.cpp | 9 +++++++++ 7 files changed, 28 insertions(+), 13 deletions(-) diff --git a/include/CGenerator.h b/include/CGenerator.h index 9b43889..f18fadd 100644 --- a/include/CGenerator.h +++ b/include/CGenerator.h @@ -7,6 +7,8 @@ #include "NodeTree.h" #include "ASTData.h" +#include "util.h" + class CGenerator { public: diff --git a/include/util.h b/include/util.h index 2041601..42b3d8d 100644 --- a/include/util.h +++ b/include/util.h @@ -11,5 +11,6 @@ std::string intToString(int theInt); std::string replaceExEscape(std::string first, std::string search, std::string replace); +std::string strSlice(std::string str, int begin, int end); #endif diff --git a/krakenGrammer.kgm b/krakenGrammer.kgm index 386b738..63618c5 100644 --- a/krakenGrammer.kgm +++ b/krakenGrammer.kgm @@ -34,15 +34,14 @@ if_statement = "if" WS "\(" WS boolean_expression WS "\)" WS statement ; while_loop = "while" WS boolean_expression WS statement ; -for_update = "\(" WS statement_list WS "\)" ; -for_loop = "for" WS for_update WS statement ; +for_loop = "for" WS "\(" WS statement WS boolean_expression WS ";" WS statement WS "\)" WS statement ; -return_statement = "return" WS boolean_expression WS ";" ; +return_statement = "return" WS boolean_expression ; code_block = "{" WS statement_list WS "}" ; statement_list = statement_list WS statement | statement ; -statement = if_statement | while_loop | for_loop | return_statement | boolean_expression WS ";" | assignment_statement WS ";" | declaration_statement WS ";" | code_block ; +statement = if_statement | while_loop | for_loop | return_statement WS ";" | boolean_expression WS ";" | assignment_statement WS ";" | declaration_statement WS ";" | code_block ; function_call = scope identifier "\(" WS opt_parameter_list WS "\)" ; scope = scope identifier "::" | ; @@ -58,7 +57,7 @@ factor = "\+\+" WS unarad | unarad WS "\+\+" | "--" WS unarad | unarad WS "--" | unarad = number | identifier | function_call | bool | string | "\(" WS boolean_expression WS "\)" ; number = integer | float | double ; -assignment_statement = identifier WS "=" WS boolean_expression | identifier WS "+=" WS boolean_expression | identifier WS "-=" WS boolean_expression | identifier WS "\*=" WS boolean_expression | identifier WS "/=" WS boolean_expression ; +assignment_statement = identifier WS "=" WS boolean_expression | identifier WS "\+=" WS boolean_expression | identifier WS "-=" WS boolean_expression | identifier WS "\*=" WS boolean_expression | identifier WS "/=" WS boolean_expression ; declaration_statement = type WS identifier WS "=" WS boolean_expression ; alphanumeric = alphanumeric numeric | alphanumeric alpha | numeric | alpha ; @@ -70,4 +69,4 @@ double = sign numeric "." numeric | sign numeric "." numeric "d" ; bool = "true" | "false" | "True" | "False" ; alpha = "(a|b|c|d|e|f|g|h|i|j|k|l|m|n|o|p|q|r|s|t|u|v|w|x|y|z|A|B|C|D|E|F|G|H|I|J|K|L|M|N|O|P|Q|R|S|T|U|V|W|X|Y|Z|_)+" ; numeric = "(0|1|2|3|4|5|6|7|8|9)+" ; -string = "\"(a|b|c|d|e|f|g|h|i|j|k|l|m|n|o|p|q|r|s|t|u|v|w|x|y|z|A|B|C|D|E|F|G|H|I|J|K|L|M|N|O|P|Q|R|S|T|U|V|W|X|Y|Z|.|!|\?|_|-| | |\\|/|\||0|1|2|3|4|5|6|7|8|9)+\"" ; \ No newline at end of file +string = "\"(a|b|c|d|e|f|g|h|i|j|k|l|m|n|o|p|q|r|s|t|u|v|w|x|y|z|A|B|C|D|E|F|G|H|I|J|K|L|M|N|O|P|Q|R|S|T|U|V|W|X|Y|Z|.|,|!|\?|_|-|:|%| | |\\|/|\||\(|\)|0|1|2|3|4|5|6|7|8|9)+\"" ; \ No newline at end of file diff --git a/main.cpp b/main.cpp index 0cce3f8..955a7c6 100644 --- a/main.cpp +++ b/main.cpp @@ -16,12 +16,14 @@ #include "ASTData.h" #include "CGenerator.h" +#include "util.h" int main(int argc, char* argv[]) { if (argc == 2 && std::string(argv[1]) == "--test") { StringReader::test(); RegEx::test(); Lexer::test(); + //std::cout << strSlice("123", 0, -1) << std::endl; return 0; } diff --git a/src/ASTTransformation.cpp b/src/ASTTransformation.cpp index 2802b20..9ccd943 100644 --- a/src/ASTTransformation.cpp +++ b/src/ASTTransformation.cpp @@ -62,17 +62,16 @@ NodeTree* ASTTransformation::transform(NodeTree* from) { return transform(children[0]); //Just a promoted bool_exp, so do child } //Here's the order of ops stuff - } else if (name == "expression" || name == "shiftand" || name == "term" || name == "unarad") { + } else if (name == "expression" || name == "shiftand" || name == "term" || name == "factor" || name == "unarad") { //If this is an actual part of an expression, not just a premoted child if (children.size() > 1) { std::string functionCallName = concatSymbolTree(children[1]); + std::cout << functionCallName << std::endl; newNode = new NodeTree(functionCallName, ASTData(function_call, Symbol(functionCallName, true))); skipChildren.insert(1); } else { return transform(children[0]); //Just a promoted child, so do it instead } - } else if (name == "factor") { - return transform(children[0]); //Just a premoted number or function call or something, so use it instead } else if (name == "statement") { newNode = new NodeTree(name, ASTData(statement)); } else if (name == "if_statement") { @@ -92,7 +91,7 @@ NodeTree* ASTTransformation::transform(NodeTree* from) { } else { //For assignments like += or *=, expand the syntatic sugar. NodeTree* lhs = transform(children[0]); - NodeTree* childCall = new NodeTree(assignFuncName, ASTData(function_call, Symbol(assignFuncName, true))); + NodeTree* childCall = new NodeTree(assignFuncName.substr(0,1), ASTData(function_call, Symbol(assignFuncName.substr(0,1), true))); childCall->addChild(lhs); childCall->addChild(transform(children[2])); newNode->addChild(lhs); diff --git a/src/CGenerator.cpp b/src/CGenerator.cpp index 45e25e3..53575cf 100644 --- a/src/CGenerator.cpp +++ b/src/CGenerator.cpp @@ -58,10 +58,11 @@ std::string CGenerator::generate(NodeTree* from) { output += " else " + generate(children[2]); return output; case while_loop: - output += "if (" + generate(children[0]) + ")\n\t" + generate(children[1]); + output += "while (" + generate(children[0]) + ")\n\t" + generate(children[1]); return output; case for_loop: - output += "if (" + generate(children[0]) + ")\n\t" + generate(children[1]); + //The strSlice's are there to get ride of an unwanted return and an unwanted semicolon + output += "for (" + strSlice(generate(children[0]),0,-2) + generate(children[1]) + ";" + strSlice(generate(children[2]),0,-3) + ")\n\t" + generate(children[3]); return output; case return_statement: return "return " + generate(children[0]); @@ -74,7 +75,9 @@ std::string CGenerator::generate(NodeTree* from) { //Handle operators specially for now. Will later replace with //Inlined functions in the standard library std::string name = data.symbol.getName(); - if (name == "+" || name == "-" || name == "*" || name == "/" || name == "==" || name == ">=" || name == "<=" || name == "!=" || name == "%") { + if (name == "++" || name == "--") + return generate(children[0]) + name; + if (name == "+" || name == "-" || name == "*" || name == "/" || name == "==" || name == ">=" || name == "<=" || name == "!=" || name == "<" || name == ">" || name == "%" || name == "+=" || name == "-=" || name == "*=" || name == "/=") { return "((" + generate(children[0]) + ")" + name + "(" + generate(children[1]) + "))"; } output += data.symbol.getName() + "("; diff --git a/src/util.cpp b/src/util.cpp index b71e6b0..ce48395 100644 --- a/src/util.cpp +++ b/src/util.cpp @@ -31,3 +31,12 @@ std::string replaceExEscape(std::string first, std::string search, std::string r } return first; } + +//String slicing is crazy useful. substr isn't bad, but slicing with negative indicies is wonderful +std::string strSlice(std::string str, int begin, int end) { + if (begin < 0) + begin += str.length()+1; + if (end < 0) + end += str.length()+1; + return str.substr(begin, end-begin); +} From 935cc6f9687cb36bec1916c50e834f440bc87025 Mon Sep 17 00:00:00 2001 From: Nathan Braswell Date: Sun, 22 Dec 2013 01:34:59 -0600 Subject: [PATCH 04/25] Added generator-dependent compilation and simple passthrough that allows us to have non-cheated emitted, printing c-code for the first time! (no typechecking or anything yet, but we'll get there). It's also still rough. --- include/ASTData.h | 4 ++-- include/CGenerator.h | 1 + krakenGrammer.kgm | 25 ++++++++++++++++--------- main.cpp | 8 ++++---- src/ASTData.cpp | 16 +++++++++------- src/ASTTransformation.cpp | 6 +++++- src/CGenerator.cpp | 24 ++++++++++++++++-------- src/RNGLRParser.cpp | 2 +- src/util.cpp | 2 +- 9 files changed, 55 insertions(+), 33 deletions(-) diff --git a/include/ASTData.h b/include/ASTData.h index b025ea1..593c84b 100644 --- a/include/ASTData.h +++ b/include/ASTData.h @@ -13,8 +13,8 @@ enum ASTType {undef, translation_unit, interpreter_directive, import, identifier function, code_block, typed_parameter, expression, boolean_expression, statement, if_statement, while_loop, for_loop, return_statement, assignment_statement, declaration_statement, - function_call, value}; -enum ValueType {none, boolean, integer, floating, double_percision, char_string }; + if_comp, simple_passthrough, function_call, value}; +enum ValueType {none, void_type, boolean, integer, floating, double_percision, char_string }; class ASTData { diff --git a/include/CGenerator.h b/include/CGenerator.h index f18fadd..b819f77 100644 --- a/include/CGenerator.h +++ b/include/CGenerator.h @@ -16,6 +16,7 @@ class CGenerator { ~CGenerator(); std::string generate(NodeTree* from); static std::string ValueTypeToCType(ValueType type); + std::string generatorString; private: std::string tabs(); int tabLevel; diff --git a/krakenGrammer.kgm b/krakenGrammer.kgm index 63618c5..bb027b5 100644 --- a/krakenGrammer.kgm +++ b/krakenGrammer.kgm @@ -1,9 +1,9 @@ Goal = translation_unit ; -translation_unit = interpreter_directive WS opt_import_list WS function_list WS ; +translation_unit = interpreter_directive WS unorderd_list_part WS ; +unorderd_list_part = import_list WS unorderd_list_part | function WS unorderd_list_part | if_comp WS unorderd_list_part | simple_passthrough WS unorderd_list_part | import_list | function | if_comp | simple_passthrough ; -type = "\*" WS type | "void" | "int" | "float" | "double" | "char" | identifier ; +type = type WS "\*" | "void" | "int" | "float" | "double" | "char" | identifier ; -opt_import_list = import_list | ; import_list = import_list WS import | import ; import = "import" WS identifier WS ";" ; @@ -14,12 +14,16 @@ forward_slash = "/" ; back_slash = "\\" ; WS = "( | | -)+" | ; +)+" | WS comment WS | ; + +if_comp = "__if_comp__" WS identifier WS if_comp_pred ; +if_comp_pred = code_block | simple_passthrough ; +simple_passthrough = "comp_simple_passthrough" WS triple_quoted_string ; +triple_quoted_string = "\"\"\"(a|b|c|d|e|f|g|h|i|j|k|l|m|n|o|p|q|r|s|t|u|v|w|x|y|z|A|B|C|D|E|F|G|H|I|J|K|L|M|N|O|P|Q|R|S|T|U|V|W|X|Y|Z|.|,|!|\?|_|-|:|;|%|=|\+| | | +|\\|/|\||\(|\)|\"|#|<|\*|>|0|1|2|3|4|5|6|7|8|9)+\"\"\"" ; identifier = alpha | alpha alphanumeric ; -function_list = function_list WS function | function ; - function = type WS identifier WS "\(" WS opt_typed_parameter_list WS "\)" WS code_block ; opt_typed_parameter_list = typed_parameter_list | ; @@ -36,12 +40,12 @@ while_loop = "while" WS boolean_expression WS statement ; for_loop = "for" WS "\(" WS statement WS boolean_expression WS ";" WS statement WS "\)" WS statement ; -return_statement = "return" WS boolean_expression ; +return_statement = "return" | "return" WS boolean_expression ; code_block = "{" WS statement_list WS "}" ; statement_list = statement_list WS statement | statement ; -statement = if_statement | while_loop | for_loop | return_statement WS ";" | boolean_expression WS ";" | assignment_statement WS ";" | declaration_statement WS ";" | code_block ; +statement = if_statement | while_loop | for_loop | return_statement WS ";" | boolean_expression WS ";" | assignment_statement WS ";" | declaration_statement WS ";" | code_block | if_comp | simple_passthrough ; function_call = scope identifier "\(" WS opt_parameter_list WS "\)" ; scope = scope identifier "::" | ; @@ -69,4 +73,7 @@ double = sign numeric "." numeric | sign numeric "." numeric "d" ; bool = "true" | "false" | "True" | "False" ; alpha = "(a|b|c|d|e|f|g|h|i|j|k|l|m|n|o|p|q|r|s|t|u|v|w|x|y|z|A|B|C|D|E|F|G|H|I|J|K|L|M|N|O|P|Q|R|S|T|U|V|W|X|Y|Z|_)+" ; numeric = "(0|1|2|3|4|5|6|7|8|9)+" ; -string = "\"(a|b|c|d|e|f|g|h|i|j|k|l|m|n|o|p|q|r|s|t|u|v|w|x|y|z|A|B|C|D|E|F|G|H|I|J|K|L|M|N|O|P|Q|R|S|T|U|V|W|X|Y|Z|.|,|!|\?|_|-|:|%| | |\\|/|\||\(|\)|0|1|2|3|4|5|6|7|8|9)+\"" ; \ No newline at end of file +string = triple_quoted_string | "\"(a|b|c|d|e|f|g|h|i|j|k|l|m|n|o|p|q|r|s|t|u|v|w|x|y|z|A|B|C|D|E|F|G|H|I|J|K|L|M|N|O|P|Q|R|S|T|U|V|W|X|Y|Z|.|,|!|\?|_|-|:|%| | |\\|/|\||\(|\)|0|1|2|3|4|5|6|7|8|9)+\"" ; + +comment = "//(a|b|c|d|e|f|g|h|i|j|k|l|m|n|o|p|q|r|s|t|u|v|w|x|y|z|A|B|C|D|E|F|G|H|I|J|K|L|M|N|O|P|Q|R|S|T|U|V|W|X|Y|Z|.|,|!|\?|_|-|:|;|%|=|\+| | |\\|/|\||\(|\)|\*|0|1|2|3|4|5|6|7|8|9)+ +" | "/\*(a|b|c|d|e|f|g|h|i|j|k|l|m|n|o|p|q|r|s|t|u|v|w|x|y|z|A|B|C|D|E|F|G|H|I|J|K|L|M|N|O|P|Q|R|S|T|U|V|W|X|Y|Z|.|,|!|\?|_|-|:|%|=|\+| | |\\|/|\||\(|\)|0|1|2|3|4|5|6|7|8|9)+\*/" ; \ No newline at end of file diff --git a/main.cpp b/main.cpp index 955a7c6..2b1ca82 100644 --- a/main.cpp +++ b/main.cpp @@ -134,10 +134,11 @@ int main(int argc, char* argv[]) { preASTTransforms.push_back(new CollapseTransformation(Symbol("opt_parameter_list", false))); preASTTransforms.push_back(new CollapseTransformation(Symbol("opt_import_list", false))); preASTTransforms.push_back(new CollapseTransformation(Symbol("import_list", false))); - preASTTransforms.push_back(new CollapseTransformation(Symbol("function_list", false))); preASTTransforms.push_back(new CollapseTransformation(Symbol("statement_list", false))); preASTTransforms.push_back(new CollapseTransformation(Symbol("parameter_list", false))); preASTTransforms.push_back(new CollapseTransformation(Symbol("typed_parameter_list", false))); + preASTTransforms.push_back(new CollapseTransformation(Symbol("unorderd_list_part", false))); + preASTTransforms.push_back(new CollapseTransformation(Symbol("if_comp_pred", false))); for (int i = 0; i < preASTTransforms.size(); i++) { parseTree = preASTTransforms[i]->transform(parseTree); @@ -145,7 +146,6 @@ int main(int argc, char* argv[]) { preASTTransforms.erase(preASTTransforms.begin(), preASTTransforms.end()); NodeTree* AST = ASTTransformation().transform(parseTree); - //NodeTree* AST = (new ASTTransformation())->transform(parseTree); if (parseTree) { outFileTransformed << parseTree->DOTGraphString() << std::endl; @@ -162,13 +162,13 @@ int main(int argc, char* argv[]) { outFileAST.close(); //Do type checking, scope creation, etc. here. - //None at this time, instead going strait to C in this first (more naive) version + //None at this time, instead going straight to C in this first (more naive) version //Code generation //For right now, just C std::string c_code = CGenerator().generate(AST); outFileC << c_code << std::endl; - outFileC.close(); + outFileC.close(); return(0); } diff --git a/src/ASTData.cpp b/src/ASTData.cpp index 81c0bda..5e5ee66 100644 --- a/src/ASTData.cpp +++ b/src/ASTData.cpp @@ -24,7 +24,9 @@ std::string ASTData::toString() { } ValueType ASTData::strToType(std::string type) { - if (type == "bool") + if (type == "void") + return void_type; + else if (type == "bool") return boolean; else if (type == "int") return integer; @@ -41,22 +43,18 @@ std::string ASTData::ValueTypeToString(ValueType type) { switch (type) { case none: return "none"; - break; + case void_type: + return "void"; case boolean: return "bool"; - break; case integer: return "int"; - break; case floating: return "float"; - break; case double_percision: return "double"; - break; case char_string: return "string"; - break; default: return "unknown_ValueType"; } @@ -96,6 +94,10 @@ std::string ASTData::ASTTypeToString(ASTType type) { return "assignment_statement"; case declaration_statement: return "declaration_statement"; + case if_comp: + return "if_comp"; + case simple_passthrough: + return "simple_passthrough"; case function_call: return "function_call"; case value: diff --git a/src/ASTTransformation.cpp b/src/ASTTransformation.cpp index 9ccd943..52d17c0 100644 --- a/src/ASTTransformation.cpp +++ b/src/ASTTransformation.cpp @@ -105,6 +105,10 @@ NodeTree* ASTTransformation::transform(NodeTree* from) { newNode->addChild(newIdentifier); skipChildren.insert(0); //These, the type and the identifier, have been taken care of. skipChildren.insert(1); + } else if (name == "if_comp") { + newNode = new NodeTree(name, ASTData(if_comp)); + } else if (name == "simple_passthrough") { + newNode = new NodeTree(name, ASTData(simple_passthrough)); } else if (name == "function_call") { //children[0] is scope std::string functionCallName = concatSymbolTree(children[1]); @@ -122,7 +126,7 @@ NodeTree* ASTTransformation::transform(NodeTree* from) { newNode = new NodeTree(name, ASTData(value, Symbol(concatSymbolTree(from), true), floating)); } else if (name == "double") { newNode = new NodeTree(name, ASTData(value, Symbol(concatSymbolTree(from), true), double_percision)); - } else if (name == "string") { + } else if (name == "string" || name == "triple_quoted_string") { newNode = new NodeTree(name, ASTData(value, Symbol(concatSymbolTree(children[0]), true), char_string)); } else { return new NodeTree(); diff --git a/src/CGenerator.cpp b/src/CGenerator.cpp index 53575cf..2f68091 100644 --- a/src/CGenerator.cpp +++ b/src/CGenerator.cpp @@ -1,6 +1,6 @@ #include "CGenerator.h" -CGenerator::CGenerator() { +CGenerator::CGenerator() : generatorString("__C__") { tabLevel = 0; } CGenerator::~CGenerator() { @@ -26,7 +26,8 @@ std::string CGenerator::generate(NodeTree* from) { //Do nothing break; case import: - return "#include <" + data.symbol.getName() + ">\n"; + return "/* would import \"" + data.symbol.getName() + "\" but....*/\n"; + //return "#include <" + data.symbol.getName() + ">\n"; case identifier: return data.symbol.getName(); case function: @@ -34,7 +35,7 @@ std::string CGenerator::generate(NodeTree* from) { for (int i = 0; i < children.size()-1; i++) { if (i > 0) output += ", "; - output += ASTData::ValueTypeToString(children[i]->getData().valueType) + " " + generate(children[i]); + output += ValueTypeToCType(children[i]->getData().valueType) + " " + generate(children[i]); } output+= ")\n" + generate(children[children.size()-1]); return output; @@ -65,11 +66,20 @@ std::string CGenerator::generate(NodeTree* from) { output += "for (" + strSlice(generate(children[0]),0,-2) + generate(children[1]) + ";" + strSlice(generate(children[2]),0,-3) + ")\n\t" + generate(children[3]); return output; case return_statement: - return "return " + generate(children[0]); + if (children.size()) + return "return " + generate(children[0]); + else + return "return"; case assignment_statement: return generate(children[0]) + " = " + generate(children[1]); case declaration_statement: return ASTData::ValueTypeToString(children[0]->getData().valueType) + " " + generate(children[0]) + " = " + generate(children[1]); + case if_comp: + if (generate(children[0]) == generatorString) + return generate(children[1]); + return ""; + case simple_passthrough: + return strSlice(generate(children[0]), 3, -4); case function_call: { //Handle operators specially for now. Will later replace with @@ -104,22 +114,20 @@ std::string CGenerator::ValueTypeToCType(ValueType type) { switch (type) { case none: return "none"; - break; + case void_type: + return "void"; case boolean: return "bool"; - break; case integer: return "int"; break; case floating: return "float"; - break; case double_percision: return "double"; break; case char_string: return "char*"; - break; default: return "unknown_ValueType"; } diff --git a/src/RNGLRParser.cpp b/src/RNGLRParser.cpp index ec64df5..392bc30 100644 --- a/src/RNGLRParser.cpp +++ b/src/RNGLRParser.cpp @@ -79,7 +79,7 @@ NodeTree* RNGLRParser::parseInput(std::string inputString) { std::cout << "Frontier " << i << " is empty." << std::endl; std::cout << "Failed on " << input[i].toString() << std::endl; std::cout << "Nearby is:" << std::endl; - int range = 5; + const int range = 10; for (int j = (i-range >= 0 ? i-range : 0); j < (i+range < input.size() ? i+range : input.size()); j++) if (j == i) std::cout << "||*||*||" << input[j].toString() << "||*||*|| "; diff --git a/src/util.cpp b/src/util.cpp index ce48395..03d5cd3 100644 --- a/src/util.cpp +++ b/src/util.cpp @@ -8,7 +8,7 @@ std::string intToString(int theInt) { std::string replaceExEscape(std::string first, std::string search, std::string replace) { size_t pos = 0; - while (pos < first.size()-search.size()) { + while (pos <= first.size()-search.size()) { pos = first.find(search, pos); if (pos == std::string::npos) break; From 15674fec2a15bd3fa0764ce5d8d0d4720a9d5ed7 Mon Sep 17 00:00:00 2001 From: Nathan Braswell Date: Mon, 23 Dec 2013 01:26:24 -0600 Subject: [PATCH 05/25] Added Type class, bettered types a bit, made address of and dereference operators work. --- CMakeLists.txt | 2 +- include/ASTData.h | 12 +++---- include/CGenerator.h | 3 +- include/Type.h | 29 ++++++++++++++++ krakenGrammer.kgm | 5 +-- main.cpp | 62 +++++++++++++++++---------------- src/ASTData.cpp | 43 ++--------------------- src/ASTTransformation.cpp | 46 +++++++++++++++++------- src/CGenerator.cpp | 38 +++++++++++++------- src/Type.cpp | 73 +++++++++++++++++++++++++++++++++++++++ 10 files changed, 207 insertions(+), 106 deletions(-) create mode 100644 include/Type.h create mode 100644 src/Type.cpp diff --git a/CMakeLists.txt b/CMakeLists.txt index 4083ad4..e0fcec1 100644 --- a/CMakeLists.txt +++ b/CMakeLists.txt @@ -4,7 +4,7 @@ project(Kraken) set( MY_INCLUDES ${PROJECT_SOURCE_DIR}/include) -set( MY_SOURCES main.cpp src/Parser.cpp src/LALRParser.cpp src/GraphStructuredStack.cpp src/RNGLRParser.cpp src/ParseAction.cpp src/ParseRule.cpp src/Symbol.cpp src/StringReader.cpp src/State.cpp src/util.cpp src/Lexer.cpp src/RegEx.cpp src/RegExState.cpp src/Table.cpp src/ASTData.cpp src/ASTTransformation.cpp src/CGenerator.cpp ) +set( MY_SOURCES main.cpp src/Parser.cpp src/LALRParser.cpp src/GraphStructuredStack.cpp src/RNGLRParser.cpp src/ParseAction.cpp src/ParseRule.cpp src/Symbol.cpp src/StringReader.cpp src/State.cpp src/util.cpp src/Lexer.cpp src/RegEx.cpp src/RegExState.cpp src/Table.cpp src/ASTData.cpp src/ASTTransformation.cpp src/CGenerator.cpp src/Type.cpp ) include_directories( ${MY_INCLUDES} ) diff --git a/include/ASTData.h b/include/ASTData.h index 593c84b..e511b98 100644 --- a/include/ASTData.h +++ b/include/ASTData.h @@ -3,7 +3,9 @@ #include #include + #include "Symbol.h" +#include "Type.h" #ifndef NULL #define NULL 0 @@ -14,21 +16,17 @@ enum ASTType {undef, translation_unit, interpreter_directive, import, identifier typed_parameter, expression, boolean_expression, statement, if_statement, while_loop, for_loop, return_statement, assignment_statement, declaration_statement, if_comp, simple_passthrough, function_call, value}; -enum ValueType {none, void_type, boolean, integer, floating, double_percision, char_string }; - class ASTData { public: ASTData(); - ASTData(ASTType type, ValueType valueType = none); - ASTData(ASTType type, Symbol symbol, ValueType valueType = none); + ASTData(ASTType type, Type valueType = Type()); + ASTData(ASTType type, Symbol symbol, Type valueType = Type()); ~ASTData(); std::string toString(); static std::string ASTTypeToString(ASTType type); - static std::string ValueTypeToString(ValueType type); - static ValueType strToType(std::string type); ASTType type; - ValueType valueType; + Type valueType; Symbol symbol; private: diff --git a/include/CGenerator.h b/include/CGenerator.h index b819f77..35d26a7 100644 --- a/include/CGenerator.h +++ b/include/CGenerator.h @@ -6,6 +6,7 @@ #include "NodeTree.h" #include "ASTData.h" +#include "Type.h" #include "util.h" @@ -15,7 +16,7 @@ class CGenerator { CGenerator(); ~CGenerator(); std::string generate(NodeTree* from); - static std::string ValueTypeToCType(ValueType type); + static std::string ValueTypeToCType(Type type); std::string generatorString; private: std::string tabs(); diff --git a/include/Type.h b/include/Type.h new file mode 100644 index 0000000..4727f49 --- /dev/null +++ b/include/Type.h @@ -0,0 +1,29 @@ +#ifndef TYPE_H +#define TYPE_H + +#ifndef NULL +#define NULL 0 +#endif + +#include +#include + +#include "util.h" + +enum ValueType {none, void_type, boolean, integer, floating, double_percision, character }; + + +class Type { + public: + Type(); + Type(ValueType typeIn, int indirectionIn); + Type(ValueType typeIn); + Type(std::string typeIn); + ~Type(); + std::string toString(); + ValueType baseType; + int indirection; + private: +}; + +#endif \ No newline at end of file diff --git a/krakenGrammer.kgm b/krakenGrammer.kgm index bb027b5..419e345 100644 --- a/krakenGrammer.kgm +++ b/krakenGrammer.kgm @@ -28,7 +28,7 @@ function = type WS identifier WS "\(" WS opt_typed_parameter_list WS "\)" WS cod opt_typed_parameter_list = typed_parameter_list | ; typed_parameter_list = typed_parameter_list WS "," WS typed_parameter | typed_parameter ; -typed_parameter = type WS parameter ; +typed_parameter = type WS identifier ; opt_parameter_list = parameter_list | ; parameter_list = parameter_list WS "," WS parameter | parameter ; @@ -58,7 +58,7 @@ expression = expression WS "<<" WS term | expression WS ">>" WS shiftand | shift shiftand = shiftand WS "-" WS term | shiftand WS "\+" WS term | term ; term = term WS forward_slash WS factor | term WS "\*" WS factor | term WS "%" WS factor | factor ; factor = "\+\+" WS unarad | unarad WS "\+\+" | "--" WS unarad | unarad WS "--" | "\+" WS unarad | "-" WS unarad | "!" WS unarad | "~" WS unarad | "\(" WS type WS "\)" WS unarad | "\*" WS unarad | "&" WS unarad | unarad ; -unarad = number | identifier | function_call | bool | string | "\(" WS boolean_expression WS "\)" ; +unarad = number | identifier | function_call | bool | string | character | "\(" WS boolean_expression WS "\)" ; number = integer | float | double ; assignment_statement = identifier WS "=" WS boolean_expression | identifier WS "\+=" WS boolean_expression | identifier WS "-=" WS boolean_expression | identifier WS "\*=" WS boolean_expression | identifier WS "/=" WS boolean_expression ; @@ -71,6 +71,7 @@ integer = sign numeric | sign hexadecimal | "null" ; float = sign numeric "." numeric "f" ; double = sign numeric "." numeric | sign numeric "." numeric "d" ; bool = "true" | "false" | "True" | "False" ; +character = "'(a|b|c|d|e|f|g|h|i|j|k|l|m|n|o|p|q|r|s|t|u|v|w|x|y|z|A|B|C|D|E|F|G|H|I|J|K|L|M|N|O|P|Q|R|S|T|U|V|W|X|Y|Z|.|,|!|\?|_|-|:|%| | |\\|/|\||\(|\)|0|1|2|3|4|5|6|7|8|9)'" ; alpha = "(a|b|c|d|e|f|g|h|i|j|k|l|m|n|o|p|q|r|s|t|u|v|w|x|y|z|A|B|C|D|E|F|G|H|I|J|K|L|M|N|O|P|Q|R|S|T|U|V|W|X|Y|Z|_)+" ; numeric = "(0|1|2|3|4|5|6|7|8|9)+" ; string = triple_quoted_string | "\"(a|b|c|d|e|f|g|h|i|j|k|l|m|n|o|p|q|r|s|t|u|v|w|x|y|z|A|B|C|D|E|F|G|H|I|J|K|L|M|N|O|P|Q|R|S|T|U|V|W|X|Y|Z|.|,|!|\?|_|-|:|%| | |\\|/|\||\(|\)|0|1|2|3|4|5|6|7|8|9)+\"" ; diff --git a/main.cpp b/main.cpp index 2b1ca82..f5f9b67 100644 --- a/main.cpp +++ b/main.cpp @@ -113,37 +113,41 @@ int main(int argc, char* argv[]) { } outFile.close(); - //Pre AST Transformations - std::vector*> preASTTransforms; //Remove Transformations - preASTTransforms.push_back(new RemovalTransformation(Symbol("WS", false))); - preASTTransforms.push_back(new RemovalTransformation(Symbol("\\(", true))); - preASTTransforms.push_back(new RemovalTransformation(Symbol("\\)", true))); - preASTTransforms.push_back(new RemovalTransformation(Symbol("::", true))); - preASTTransforms.push_back(new RemovalTransformation(Symbol(";", true))); - preASTTransforms.push_back(new RemovalTransformation(Symbol("{", true))); - preASTTransforms.push_back(new RemovalTransformation(Symbol("}", true))); - preASTTransforms.push_back(new RemovalTransformation(Symbol("(", true))); - preASTTransforms.push_back(new RemovalTransformation(Symbol(")", true))); - preASTTransforms.push_back(new RemovalTransformation(Symbol("import", true))); //Don't need the actual text of the symbol - preASTTransforms.push_back(new RemovalTransformation(Symbol("interpreter_directive", false))); - preASTTransforms.push_back(new RemovalTransformation(Symbol("if", true))); - preASTTransforms.push_back(new RemovalTransformation(Symbol("while", true))); - //Collapse Transformations - preASTTransforms.push_back(new CollapseTransformation(Symbol("opt_typed_parameter_list", false))); - preASTTransforms.push_back(new CollapseTransformation(Symbol("opt_parameter_list", false))); - preASTTransforms.push_back(new CollapseTransformation(Symbol("opt_import_list", false))); - preASTTransforms.push_back(new CollapseTransformation(Symbol("import_list", false))); - preASTTransforms.push_back(new CollapseTransformation(Symbol("statement_list", false))); - preASTTransforms.push_back(new CollapseTransformation(Symbol("parameter_list", false))); - preASTTransforms.push_back(new CollapseTransformation(Symbol("typed_parameter_list", false))); - preASTTransforms.push_back(new CollapseTransformation(Symbol("unorderd_list_part", false))); - preASTTransforms.push_back(new CollapseTransformation(Symbol("if_comp_pred", false))); + std::vector removeSymbols; + removeSymbols.push_back(Symbol("WS", false)); + removeSymbols.push_back(Symbol("\\(", true)); + removeSymbols.push_back(Symbol("\\)", true)); + removeSymbols.push_back(Symbol("::", true)); + removeSymbols.push_back(Symbol(";", true)); + removeSymbols.push_back(Symbol("{", true)); + removeSymbols.push_back(Symbol("}", true)); + removeSymbols.push_back(Symbol("(", true)); + removeSymbols.push_back(Symbol(")", true)); + removeSymbols.push_back(Symbol("import", true)); //Don't need the actual text of the symbol + removeSymbols.push_back(Symbol("interpreter_directive", false)); + removeSymbols.push_back(Symbol("if", true)); + removeSymbols.push_back(Symbol("while", true)); + + for (int i = 0; i < removeSymbols.size(); i++) + parseTree = RemovalTransformation(removeSymbols[i]).transform(parseTree); + + //Collapse Transformations + std::vector collapseSymbols; + + collapseSymbols.push_back(Symbol("opt_typed_parameter_list", false)); + collapseSymbols.push_back(Symbol("opt_parameter_list", false)); + collapseSymbols.push_back(Symbol("opt_import_list", false)); + collapseSymbols.push_back(Symbol("import_list", false)); + collapseSymbols.push_back(Symbol("statement_list", false)); + collapseSymbols.push_back(Symbol("parameter_list", false)); + collapseSymbols.push_back(Symbol("typed_parameter_list", false)); + collapseSymbols.push_back(Symbol("unorderd_list_part", false)); + collapseSymbols.push_back(Symbol("if_comp_pred", false)); + + for (int i = 0; i < collapseSymbols.size(); i++) + parseTree = CollapseTransformation(collapseSymbols[i]).transform(parseTree); - for (int i = 0; i < preASTTransforms.size(); i++) { - parseTree = preASTTransforms[i]->transform(parseTree); - } - preASTTransforms.erase(preASTTransforms.begin(), preASTTransforms.end()); NodeTree* AST = ASTTransformation().transform(parseTree); diff --git a/src/ASTData.cpp b/src/ASTData.cpp index 5e5ee66..0d1335b 100644 --- a/src/ASTData.cpp +++ b/src/ASTData.cpp @@ -4,12 +4,12 @@ ASTData::ASTData() { this->type = undef; } -ASTData::ASTData(ASTType type, ValueType valueType) { +ASTData::ASTData(ASTType type, Type valueType) { this->type = type; this->valueType = valueType; } -ASTData::ASTData(ASTType type, Symbol symbol, ValueType valueType) { +ASTData::ASTData(ASTType type, Symbol symbol, Type valueType) { this->type = type; this->valueType = valueType; this->symbol = symbol; @@ -20,44 +20,7 @@ ASTData::~ASTData() { } std::string ASTData::toString() { - return ASTTypeToString(type) + (symbol.isTerminal() ? " " + symbol.toString() : "") + (valueType ? " " + ValueTypeToString(valueType) : ""); -} - -ValueType ASTData::strToType(std::string type) { - if (type == "void") - return void_type; - else if (type == "bool") - return boolean; - else if (type == "int") - return integer; - else if (type == "float") - return floating; - else if (type == "double") - return double_percision; - else if (type == "string") - return char_string; - else return none; -} - -std::string ASTData::ValueTypeToString(ValueType type) { - switch (type) { - case none: - return "none"; - case void_type: - return "void"; - case boolean: - return "bool"; - case integer: - return "int"; - case floating: - return "float"; - case double_percision: - return "double"; - case char_string: - return "string"; - default: - return "unknown_ValueType"; - } + return ASTTypeToString(type) + (symbol.isTerminal() ? " " + symbol.toString() : "") + " " + valueType.toString(); } std::string ASTData::ASTTypeToString(ASTType type) { diff --git a/src/ASTTransformation.cpp b/src/ASTTransformation.cpp index 52d17c0..ea22d29 100644 --- a/src/ASTTransformation.cpp +++ b/src/ASTTransformation.cpp @@ -25,14 +25,14 @@ NodeTree* ASTTransformation::transform(NodeTree* from) { } else if (name == "identifier") { newNode = new NodeTree(name, ASTData(identifier, Symbol(concatSymbolTree(children[0]), true))); } else if (name == "function") { - newNode = new NodeTree(name, ASTData(function, Symbol(concatSymbolTree(children[1]), true), ASTData::strToType(concatSymbolTree(children[0])))); + newNode = new NodeTree(name, ASTData(function, Symbol(concatSymbolTree(children[1]), true), Type(concatSymbolTree(children[0])))); skipChildren.insert(0); skipChildren.insert(1); } else if (name == "code_block") { newNode = new NodeTree(name, ASTData(code_block)); } else if (name == "typed_parameter") { newNode = transform(children[1]); //Transform to get the identifier - newNode->getDataRef()->valueType = ASTData::strToType(concatSymbolTree(children[0])); //Get the type (left child) and set our new identifer to be that type + newNode->getDataRef()->valueType = Type(concatSymbolTree(children[0])); //Get the type (left child) and set our new identifer to be that type return newNode; } else if (name == "boolean_expression") { //If this is an actual part of an expression, not just a premoted term @@ -62,16 +62,31 @@ NodeTree* ASTTransformation::transform(NodeTree* from) { return transform(children[0]); //Just a promoted bool_exp, so do child } //Here's the order of ops stuff - } else if (name == "expression" || name == "shiftand" || name == "term" || name == "factor" || name == "unarad") { + } else if (name == "expression" || name == "shiftand" || name == "term" || name == "unarad") { //unarad can ride through, it should always just be a promoted child //If this is an actual part of an expression, not just a premoted child - if (children.size() > 1) { + if (children.size() > 2) { std::string functionCallName = concatSymbolTree(children[1]); - std::cout << functionCallName << std::endl; newNode = new NodeTree(functionCallName, ASTData(function_call, Symbol(functionCallName, true))); skipChildren.insert(1); } else { return transform(children[0]); //Just a promoted child, so do it instead } + } else if (name == "factor") { //Do factor here, as it has all the weird unary operators + //If this is an actual part of an expression, not just a premoted child + //NO SUPPORT FOR CASTING YET + if (children.size() == 2) { + std::string funcName = concatSymbolTree(children[0]); + int funcNum; + if (funcName == "*" || funcName == "&" || funcName == "++" || funcName == "--" || funcName == "-" || funcName == "!" || funcName == "~") + funcNum = 0; + else + funcName = concatSymbolTree(children[1]), funcNum = 1; + + newNode = new NodeTree(funcName, ASTData(function_call, Symbol(funcName, true))); + skipChildren.insert(funcNum); + } else { + return transform(children[0]); //Just a promoted child, so do it instead + } } else if (name == "statement") { newNode = new NodeTree(name, ASTData(statement)); } else if (name == "if_statement") { @@ -101,7 +116,7 @@ NodeTree* ASTTransformation::transform(NodeTree* from) { } else if (name == "declaration_statement") { newNode = new NodeTree(name, ASTData(declaration_statement)); NodeTree* newIdentifier = transform(children[1]); //Transform the identifier - newIdentifier->getDataRef()->valueType = ASTData::strToType(concatSymbolTree(children[0]));//set the type of the identifier + newIdentifier->getDataRef()->valueType = Type(concatSymbolTree(children[0]));//set the type of the identifier newNode->addChild(newIdentifier); skipChildren.insert(0); //These, the type and the identifier, have been taken care of. skipChildren.insert(1); @@ -116,18 +131,23 @@ NodeTree* ASTTransformation::transform(NodeTree* from) { skipChildren.insert(1); } else if (name == "parameter") { return transform(children[0]); //Don't need a parameter node, just the value - } else if (name == "bool") { - newNode = new NodeTree(name, ASTData(value, Symbol(concatSymbolTree(children[0]), true), boolean)); + } else if (name == "parameter") { + return transform(children[0]); //Don't need a parameter node, just the value + } else if (name == "type") { + std::string theConcat = concatSymbolTree(from); //We have no symbol, so this will concat our children + newNode = new NodeTree(name, ASTData(value, Symbol(theConcat, true), Type(theConcat))); } else if (name == "number") { return transform(children[0]); } else if (name == "integer") { - newNode = new NodeTree(name, ASTData(value, Symbol(concatSymbolTree(from), true), integer)); + newNode = new NodeTree(name, ASTData(value, Symbol(concatSymbolTree(from), true), Type(integer))); } else if (name == "float") { - newNode = new NodeTree(name, ASTData(value, Symbol(concatSymbolTree(from), true), floating)); + newNode = new NodeTree(name, ASTData(value, Symbol(concatSymbolTree(from), true), Type(floating))); } else if (name == "double") { - newNode = new NodeTree(name, ASTData(value, Symbol(concatSymbolTree(from), true), double_percision)); + newNode = new NodeTree(name, ASTData(value, Symbol(concatSymbolTree(from), true), Type(double_percision))); + } else if (name == "char") { + newNode = new NodeTree(name, ASTData(value, Symbol(concatSymbolTree(children[0]), true), Type(character))); //Indirection of 1 for array } else if (name == "string" || name == "triple_quoted_string") { - newNode = new NodeTree(name, ASTData(value, Symbol(concatSymbolTree(children[0]), true), char_string)); + newNode = new NodeTree(name, ASTData(value, Symbol(concatSymbolTree(children[0]), true), Type(character, 1))); //Indirection of 1 for array } else { return new NodeTree(); } @@ -153,7 +173,7 @@ std::string ASTTransformation::concatSymbolTree(NodeTree* root) { concatString += ourValue; std::vector*> children = root->getChildren(); for (int i = 0; i < children.size(); i++) { - concatString = concatSymbolTree(children[i]); + concatString += concatSymbolTree(children[i]); } return concatString; } diff --git a/src/CGenerator.cpp b/src/CGenerator.cpp index 2f68091..2b4645d 100644 --- a/src/CGenerator.cpp +++ b/src/CGenerator.cpp @@ -73,7 +73,7 @@ std::string CGenerator::generate(NodeTree* from) { case assignment_statement: return generate(children[0]) + " = " + generate(children[1]); case declaration_statement: - return ASTData::ValueTypeToString(children[0]->getData().valueType) + " " + generate(children[0]) + " = " + generate(children[1]); + return ValueTypeToCType(children[0]->getData().valueType) + " " + generate(children[0]) + " = " + generate(children[1]); case if_comp: if (generate(children[0]) == generatorString) return generate(children[1]); @@ -87,6 +87,8 @@ std::string CGenerator::generate(NodeTree* from) { std::string name = data.symbol.getName(); if (name == "++" || name == "--") return generate(children[0]) + name; + if (name == "*" && children.size() == 1) //Is dereference, not multiplication + return "*(" + generate(children[0]) + ")"; if (name == "+" || name == "-" || name == "*" || name == "/" || name == "==" || name == ">=" || name == "<=" || name == "!=" || name == "<" || name == ">" || name == "%" || name == "+=" || name == "-=" || name == "*=" || name == "/=") { return "((" + generate(children[0]) + ")" + name + "(" + generate(children[1]) + "))"; } @@ -110,25 +112,35 @@ std::string CGenerator::generate(NodeTree* from) { return output; } -std::string CGenerator::ValueTypeToCType(ValueType type) { - switch (type) { +std::string CGenerator::ValueTypeToCType(Type type) { + std::string return_type; + switch (type.baseType) { case none: - return "none"; + return_type = "none"; + break; case void_type: - return "void"; + return_type = "void"; + break; case boolean: - return "bool"; + return_type = "bool"; + break; case integer: - return "int"; + return_type = "int"; break; case floating: - return "float"; - case double_percision: - return "double"; + return_type = "float"; + break; + case double_percision: + return_type = "double"; + break; + case character: + return_type = "char"; break; - case char_string: - return "char*"; default: - return "unknown_ValueType"; + return_type = "unknown_ValueType"; + break; } + for (int i = 0; i < type.indirection; i++) + return_type += "*"; + return return_type; } diff --git a/src/Type.cpp b/src/Type.cpp new file mode 100644 index 0000000..f8df255 --- /dev/null +++ b/src/Type.cpp @@ -0,0 +1,73 @@ +#include "Type.h" + +Type::Type() { + indirection = 0; + baseType = none; +} + +Type::Type(ValueType typeIn) { + indirection = 0; + baseType = typeIn; +} + +Type::Type(ValueType typeIn, int indirectionIn) { + indirection = indirectionIn; + baseType = typeIn; +} + +Type::Type(std::string typeIn) { + indirection = 0; + while (typeIn[typeIn.size() - indirection - 1] == '*') indirection++; + std::string edited = strSlice(typeIn, 0, -(indirection + 1)); + if (edited == "void") + baseType = void_type; + else if (edited == "bool") + baseType = boolean; + else if (edited == "int") + baseType = integer; + else if (edited == "float") + baseType = floating; + else if (edited == "double") + baseType = double_percision; + else if (edited == "char") + baseType = character; + else + baseType = none; + std::cout << ":ALKJF:LSKDJF:SDJF:LKSJDF\t\t\t" << typeIn << "\t" << edited << std::endl; +} + + +Type::~Type() { +} + +std::string Type::toString() { + std::string typeString; + switch (baseType) { + case none: + typeString = "none"; + break; + case void_type: + typeString = "void"; + break; + case boolean: + typeString = "bool"; + break; + case integer: + typeString = "int"; + break; + case floating: + typeString = "float"; + break; + case double_percision: + typeString = "double"; + break; + case character: + typeString = "char"; + break; + default: + typeString = "unknown_type"; + } + for (int i = 0; i < indirection; i++) + typeString += "*"; + return typeString; +} From 98b899b8a98e37d4fe23f7de08c84c2e34902515 Mon Sep 17 00:00:00 2001 From: Nathan Braswell Date: Fri, 27 Dec 2013 13:05:07 -0600 Subject: [PATCH 06/25] Added rough but working scoping. --- CMakeLists.txt | 2 + include/ASTData.h | 3 +- include/ASTTransformation.h | 6 +- krakenGrammer.kgm | 4 +- main.cpp | 6 +- src/ASTTransformation.cpp | 172 +++++++++++++++++++++++++++--------- src/CGenerator.cpp | 30 +++++-- 7 files changed, 170 insertions(+), 53 deletions(-) diff --git a/CMakeLists.txt b/CMakeLists.txt index e0fcec1..ad4cfeb 100644 --- a/CMakeLists.txt +++ b/CMakeLists.txt @@ -2,6 +2,8 @@ cmake_minimum_required (VERSION 2.6) project(Kraken) +set(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} -std=c++11") + set( MY_INCLUDES ${PROJECT_SOURCE_DIR}/include) set( MY_SOURCES main.cpp src/Parser.cpp src/LALRParser.cpp src/GraphStructuredStack.cpp src/RNGLRParser.cpp src/ParseAction.cpp src/ParseRule.cpp src/Symbol.cpp src/StringReader.cpp src/State.cpp src/util.cpp src/Lexer.cpp src/RegEx.cpp src/RegExState.cpp src/Table.cpp src/ASTData.cpp src/ASTTransformation.cpp src/CGenerator.cpp src/Type.cpp ) diff --git a/include/ASTData.h b/include/ASTData.h index e511b98..9c39d04 100644 --- a/include/ASTData.h +++ b/include/ASTData.h @@ -2,7 +2,7 @@ #define ASTDATA_H #include -#include +#include #include "Symbol.h" #include "Type.h" @@ -28,6 +28,7 @@ class ASTData { ASTType type; Type valueType; Symbol symbol; + std::map*> scope; private: }; diff --git a/include/ASTTransformation.h b/include/ASTTransformation.h index 4ecd57f..c9d1c41 100644 --- a/include/ASTTransformation.h +++ b/include/ASTTransformation.h @@ -1,6 +1,9 @@ #ifndef ASTTRANSFORMATION_H #define ASTTRANSFORMATION_H +#include +#include + #include "ASTData.h" #include "NodeTransformation.h" @@ -9,8 +12,9 @@ class ASTTransformation: public NodeTransformation { ASTTransformation(); ~ASTTransformation(); virtual NodeTree* transform(NodeTree* from); + NodeTree* transform(NodeTree* from, NodeTree* scope); std::string concatSymbolTree(NodeTree* root); - + NodeTree* scopeLookup(NodeTree* scope, std::string lookup); private: //Nothing }; diff --git a/krakenGrammer.kgm b/krakenGrammer.kgm index 419e345..9ea5314 100644 --- a/krakenGrammer.kgm +++ b/krakenGrammer.kgm @@ -76,5 +76,5 @@ alpha = "(a|b|c|d|e|f|g|h|i|j|k|l|m|n|o|p|q|r|s|t|u|v|w|x|y|z|A|B|C|D|E|F|G|H|I| numeric = "(0|1|2|3|4|5|6|7|8|9)+" ; string = triple_quoted_string | "\"(a|b|c|d|e|f|g|h|i|j|k|l|m|n|o|p|q|r|s|t|u|v|w|x|y|z|A|B|C|D|E|F|G|H|I|J|K|L|M|N|O|P|Q|R|S|T|U|V|W|X|Y|Z|.|,|!|\?|_|-|:|%| | |\\|/|\||\(|\)|0|1|2|3|4|5|6|7|8|9)+\"" ; -comment = "//(a|b|c|d|e|f|g|h|i|j|k|l|m|n|o|p|q|r|s|t|u|v|w|x|y|z|A|B|C|D|E|F|G|H|I|J|K|L|M|N|O|P|Q|R|S|T|U|V|W|X|Y|Z|.|,|!|\?|_|-|:|;|%|=|\+| | |\\|/|\||\(|\)|\*|0|1|2|3|4|5|6|7|8|9)+ -" | "/\*(a|b|c|d|e|f|g|h|i|j|k|l|m|n|o|p|q|r|s|t|u|v|w|x|y|z|A|B|C|D|E|F|G|H|I|J|K|L|M|N|O|P|Q|R|S|T|U|V|W|X|Y|Z|.|,|!|\?|_|-|:|%|=|\+| | |\\|/|\||\(|\)|0|1|2|3|4|5|6|7|8|9)+\*/" ; \ No newline at end of file +comment = "//(a|b|c|d|e|f|g|h|i|j|k|l|m|n|o|p|q|r|s|t|u|v|w|x|y|z|A|B|C|D|E|F|G|H|I|J|K|L|M|N|O|P|Q|R|S|T|U|V|W|X|Y|Z|.|,|!|\?|_|-|:|;|%|=|\+| | |\\|/|\||\(|\)|\*|\"|0|1|2|3|4|5|6|7|8|9)+ +" | "/\*(a|b|c|d|e|f|g|h|i|j|k|l|m|n|o|p|q|r|s|t|u|v|w|x|y|z|A|B|C|D|E|F|G|H|I|J|K|L|M|N|O|P|Q|R|S|T|U|V|W|X|Y|Z|.|,|!|\?|_|-|:|%|=|\+| | |\\|/|\||\(|\)|\"|0|1|2|3|4|5|6|7|8|9)+\*/" ; \ No newline at end of file diff --git a/main.cpp b/main.cpp index f5f9b67..8a7986f 100644 --- a/main.cpp +++ b/main.cpp @@ -128,6 +128,8 @@ int main(int argc, char* argv[]) { removeSymbols.push_back(Symbol("interpreter_directive", false)); removeSymbols.push_back(Symbol("if", true)); removeSymbols.push_back(Symbol("while", true)); + removeSymbols.push_back(Symbol("__if_comp__", true)); + removeSymbols.push_back(Symbol("comp_simple_passthrough", true)); for (int i = 0; i < removeSymbols.size(); i++) parseTree = RemovalTransformation(removeSymbols[i]).transform(parseTree); @@ -148,9 +150,6 @@ int main(int argc, char* argv[]) { for (int i = 0; i < collapseSymbols.size(); i++) parseTree = CollapseTransformation(collapseSymbols[i]).transform(parseTree); - - NodeTree* AST = ASTTransformation().transform(parseTree); - if (parseTree) { outFileTransformed << parseTree->DOTGraphString() << std::endl; } else { @@ -158,6 +157,7 @@ int main(int argc, char* argv[]) { } outFileTransformed.close(); + NodeTree* AST = ASTTransformation().transform(parseTree); if (AST) { outFileAST << AST->DOTGraphString() << std::endl; } else { diff --git a/src/ASTTransformation.cpp b/src/ASTTransformation.cpp index ea22d29..118e44c 100644 --- a/src/ASTTransformation.cpp +++ b/src/ASTTransformation.cpp @@ -9,6 +9,11 @@ ASTTransformation::~ASTTransformation() { } NodeTree* ASTTransformation::transform(NodeTree* from) { + //Set up top scope + return transform(from, NULL); +} + +NodeTree* ASTTransformation::transform(NodeTree* from, NodeTree* scope) { Symbol current = from->getData(); std::string name = current.getName(); NodeTree* newNode; @@ -17,59 +22,93 @@ NodeTree* ASTTransformation::transform(NodeTree* from) { if (name == "translation_unit") { newNode = new NodeTree(name, ASTData(translation_unit)); + scope = newNode; + //Temporary scope fix + scope->getDataRef()->scope["+"] = new NodeTree(); + scope->getDataRef()->scope["-"] = new NodeTree(); + scope->getDataRef()->scope["*"] = new NodeTree(); + scope->getDataRef()->scope["&"] = new NodeTree(); + scope->getDataRef()->scope["=="] = new NodeTree(); + scope->getDataRef()->scope["--"] = new NodeTree(); + scope->getDataRef()->scope["++"] = new NodeTree(); + scope->getDataRef()->scope["<="] = new NodeTree(); + scope->getDataRef()->scope[">="] = new NodeTree(); + scope->getDataRef()->scope["*="] = new NodeTree(); + scope->getDataRef()->scope["+="] = new NodeTree(); + scope->getDataRef()->scope["-="] = new NodeTree(); + scope->getDataRef()->scope["<"] = new NodeTree(); + scope->getDataRef()->scope[">"] = new NodeTree(); } else if (name == "interpreter_directive") { newNode = new NodeTree(name, ASTData(interpreter_directive)); } else if (name == "import" && !current.isTerminal()) { newNode = new NodeTree(name, ASTData(import, Symbol(concatSymbolTree(children[0]), true))); + //Add to scope? + // + // return newNode; // Don't need children of import } else if (name == "identifier") { - newNode = new NodeTree(name, ASTData(identifier, Symbol(concatSymbolTree(children[0]), true))); + std::string lookupName = concatSymbolTree(children[0]); + std::cout << "scope lookup from identifier" << std::endl; + newNode = scopeLookup(scope, lookupName); + if (newNode == NULL) { + std::cout << "scope lookup error! Could not find " << lookupName << std::endl; + throw "LOOKUP ERROR: " + lookupName; + } + //newNode = new NodeTree(name, ASTData(identifier, Symbol(concatSymbolTree(children[0]), true))); } else if (name == "function") { - newNode = new NodeTree(name, ASTData(function, Symbol(concatSymbolTree(children[1]), true), Type(concatSymbolTree(children[0])))); + std::string functionName = concatSymbolTree(children[1]); + newNode = new NodeTree(name, ASTData(function, Symbol(functionName, true), Type(concatSymbolTree(children[0])))); skipChildren.insert(0); skipChildren.insert(1); + scope->getDataRef()->scope[functionName] = newNode; + newNode->getDataRef()->scope["~enclosing_scope"] = scope; + scope = newNode; } else if (name == "code_block") { newNode = new NodeTree(name, ASTData(code_block)); + newNode->getDataRef()->scope["~enclosing_scope"] = scope; + scope = newNode; } else if (name == "typed_parameter") { - newNode = transform(children[1]); //Transform to get the identifier - newNode->getDataRef()->valueType = Type(concatSymbolTree(children[0])); //Get the type (left child) and set our new identifer to be that type + //newNode = transform(children[1]); //Transform to get the identifier + std::string parameterName = concatSymbolTree(children[1]); + std::string typeString = concatSymbolTree(children[0]);//Get the type (left child) and set our new identifer to be that type + newNode = new NodeTree("identifier", ASTData(identifier, Symbol(parameterName, true), Type(typeString))); + scope->getDataRef()->scope[parameterName] = newNode; return newNode; - } else if (name == "boolean_expression") { + } else if (name == "boolean_expression" || name == "and_boolean_expression" || name == "bool_exp") { //If this is an actual part of an expression, not just a premoted term if (children.size() > 1) { std::string functionCallName = concatSymbolTree(children[1]); + std::cout << "scope lookup from boolen_expression or similar" << std::endl; + NodeTree* function = scopeLookup(scope, functionCallName); + if (function == NULL) { + std::cout << "scope lookup error! Could not find " << functionCallName << std::endl; + throw "LOOKUP ERROR: " + functionCallName; + } newNode = new NodeTree(functionCallName, ASTData(function_call, Symbol(functionCallName, true))); + newNode->addChild(function); // First child of function call is a link to the function definition skipChildren.insert(1); } else { - return transform(children[0]); //Just a promoted term, so do child - } - } else if (name == "and_boolean_expression") { - //If this is an actual part of an expression, not just a premoted bool_exp - if (children.size() > 1) { - std::string functionCallName = concatSymbolTree(children[1]); - newNode = new NodeTree(functionCallName, ASTData(function_call, Symbol(functionCallName, true))); - skipChildren.insert(1); - } else { - return transform(children[0]); //Just a promoted bool_exp, so do child - } - } else if (name == "bool_exp") { - //If this is an actual part of an expression, not just a premoted bool_exp. - if (children.size() > 1) { - std::string functionCallName = concatSymbolTree(children[1]); - newNode = new NodeTree(functionCallName, ASTData(function_call, Symbol(functionCallName, true))); - skipChildren.insert(1); - } else { - return transform(children[0]); //Just a promoted bool_exp, so do child + std::cout << children.size() << std::endl; + if (children.size() == 0) + return new NodeTree(); + return transform(children[0], scope); //Just a promoted term, so do child } //Here's the order of ops stuff } else if (name == "expression" || name == "shiftand" || name == "term" || name == "unarad") { //unarad can ride through, it should always just be a promoted child //If this is an actual part of an expression, not just a premoted child if (children.size() > 2) { std::string functionCallName = concatSymbolTree(children[1]); + std::cout << "scope lookup from expression or similar" << std::endl; + NodeTree* function = scopeLookup(scope, functionCallName); + if (function == NULL) { + std::cout << "scope lookup error! Could not find " << functionCallName << std::endl; + throw "LOOKUP ERROR: " + functionCallName; + } newNode = new NodeTree(functionCallName, ASTData(function_call, Symbol(functionCallName, true))); + newNode->addChild(function); // First child of function call is a link to the function definition skipChildren.insert(1); } else { - return transform(children[0]); //Just a promoted child, so do it instead + return transform(children[0], scope); //Just a promoted child, so do it instead } } else if (name == "factor") { //Do factor here, as it has all the weird unary operators //If this is an actual part of an expression, not just a premoted child @@ -82,10 +121,17 @@ NodeTree* ASTTransformation::transform(NodeTree* from) { else funcName = concatSymbolTree(children[1]), funcNum = 1; + std::cout << "scope lookup from factor" << std::endl; + NodeTree* function = scopeLookup(scope, funcName); + if (function == NULL) { + std::cout << "scope lookup error! Could not find " << funcName << std::endl; + throw "LOOKUP ERROR: " + funcName; + } newNode = new NodeTree(funcName, ASTData(function_call, Symbol(funcName, true))); + newNode->addChild(function); skipChildren.insert(funcNum); } else { - return transform(children[0]); //Just a promoted child, so do it instead + return transform(children[0], scope); //Just a promoted child, so do it instead } } else if (name == "statement") { newNode = new NodeTree(name, ASTData(statement)); @@ -101,43 +147,66 @@ NodeTree* ASTTransformation::transform(NodeTree* from) { newNode = new NodeTree(name, ASTData(assignment_statement)); std::string assignFuncName = concatSymbolTree(children[1]); if (assignFuncName == "=") { - newNode->addChild(transform(children[0])); - newNode->addChild(transform(children[2])); + newNode->addChild(transform(children[0], scope)); + newNode->addChild(transform(children[2], scope)); } else { //For assignments like += or *=, expand the syntatic sugar. - NodeTree* lhs = transform(children[0]); - NodeTree* childCall = new NodeTree(assignFuncName.substr(0,1), ASTData(function_call, Symbol(assignFuncName.substr(0,1), true))); + NodeTree* lhs = transform(children[0], scope); + std::string functionName = assignFuncName.substr(0,1); + NodeTree* childCall = new NodeTree(functionName, ASTData(function_call, Symbol(functionName, true))); + NodeTree* functionDef = scopeLookup(scope, functionName); + if (functionDef == NULL) { + std::cout << "scope lookup error! Could not find " << functionName << std::endl; + throw "LOOKUP ERROR: " + functionName; + } + childCall->addChild(functionDef); //First child of function call is definition of the function childCall->addChild(lhs); - childCall->addChild(transform(children[2])); + childCall->addChild(transform(children[2], scope)); newNode->addChild(lhs); newNode->addChild(childCall); } return newNode; } else if (name == "declaration_statement") { newNode = new NodeTree(name, ASTData(declaration_statement)); - NodeTree* newIdentifier = transform(children[1]); //Transform the identifier - newIdentifier->getDataRef()->valueType = Type(concatSymbolTree(children[0]));//set the type of the identifier + + // NodeTree* newIdentifier = transform(children[1], scope); //Transform the identifier + // newIdentifier->getDataRef()->valueType = Type(concatSymbolTree(children[0]));//set the type of the identifier + + std::string newIdentifierStr = concatSymbolTree(children[1]); + std::string typeString = concatSymbolTree(children[0]);//Get the type (left child) and set our new identifer to be that type + NodeTree* newIdentifier = new NodeTree("identifier", ASTData(identifier, Symbol(newIdentifierStr, true), Type(typeString))); + scope->getDataRef()->scope[newIdentifierStr] = newIdentifier; + newNode->addChild(newIdentifier); skipChildren.insert(0); //These, the type and the identifier, have been taken care of. skipChildren.insert(1); } else if (name == "if_comp") { newNode = new NodeTree(name, ASTData(if_comp)); + newNode->addChild(new NodeTree("identifier", ASTData(identifier, Symbol(concatSymbolTree(children[0]),true)))); + skipChildren.insert(0); //Don't do the identifier. The identifier lookup will fail. That's why we do it here. } else if (name == "simple_passthrough") { newNode = new NodeTree(name, ASTData(simple_passthrough)); } else if (name == "function_call") { //children[0] is scope std::string functionCallName = concatSymbolTree(children[1]); newNode = new NodeTree(functionCallName, ASTData(function_call, Symbol(functionCallName, true))); + std::cout << "scope lookup from function_call" << std::endl; + NodeTree* function = scopeLookup(scope, functionCallName); + if (function == NULL) { + std::cout << "scope lookup error! Could not find " << functionCallName << std::endl; + throw "LOOKUP ERROR: " + functionCallName; + } + newNode->addChild(function); skipChildren.insert(1); } else if (name == "parameter") { - return transform(children[0]); //Don't need a parameter node, just the value + return transform(children[0], scope); //Don't need a parameter node, just the value } else if (name == "parameter") { - return transform(children[0]); //Don't need a parameter node, just the value + return transform(children[0], scope); //Don't need a parameter node, just the value } else if (name == "type") { std::string theConcat = concatSymbolTree(from); //We have no symbol, so this will concat our children newNode = new NodeTree(name, ASTData(value, Symbol(theConcat, true), Type(theConcat))); } else if (name == "number") { - return transform(children[0]); + return transform(children[0], scope); } else if (name == "integer") { newNode = new NodeTree(name, ASTData(value, Symbol(concatSymbolTree(from), true), Type(integer))); } else if (name == "float") { @@ -145,7 +214,7 @@ NodeTree* ASTTransformation::transform(NodeTree* from) { } else if (name == "double") { newNode = new NodeTree(name, ASTData(value, Symbol(concatSymbolTree(from), true), Type(double_percision))); } else if (name == "char") { - newNode = new NodeTree(name, ASTData(value, Symbol(concatSymbolTree(children[0]), true), Type(character))); //Indirection of 1 for array + newNode = new NodeTree(name, ASTData(value, Symbol(concatSymbolTree(children[0]), true), Type(character, 1))); //Indirection of 1 for array } else if (name == "string" || name == "triple_quoted_string") { newNode = new NodeTree(name, ASTData(value, Symbol(concatSymbolTree(children[0]), true), Type(character, 1))); //Indirection of 1 for array } else { @@ -155,8 +224,8 @@ NodeTree* ASTTransformation::transform(NodeTree* from) { // In general, iterate through children and do them. Might not do this for all children. for (int i = 0; i < children.size(); i++) { if (skipChildren.find(i) == skipChildren.end()) { - NodeTree* transChild = transform(children[i]); - if (transChild->getData().type) //Only add the children that have a real ASTData::ASTType, that is, legit ASTData. + NodeTree* transChild = transform(children[i], scope); + if (transChild->getDataRef()->type) //Only add the children that have a real ASTData::ASTType, that is, legit ASTData. newNode->addChild(transChild); else delete transChild; @@ -168,7 +237,7 @@ NodeTree* ASTTransformation::transform(NodeTree* from) { std::string ASTTransformation::concatSymbolTree(NodeTree* root) { std::string concatString; - std::string ourValue = root->getData().getValue(); + std::string ourValue = root->getDataRef()->getValue(); if (ourValue != "NoValue") concatString += ourValue; std::vector*> children = root->getChildren(); @@ -177,3 +246,26 @@ std::string ASTTransformation::concatSymbolTree(NodeTree* root) { } return concatString; } + +NodeTree* ASTTransformation::scopeLookup(NodeTree* scope, std::string lookup) { + //Seach the map + auto scopeMap = scope->getDataRef()->scope; + std::cout << "scope size: " << scopeMap.size() << ", scope from " << scope->getName() << std::endl; + for (auto i = scopeMap.begin(); i != scopeMap.end(); i++) + std::cout << i->first << " : " << i-> second << " - " << i->second->getName() << std::endl; + + auto elementIterator = scopeMap.find(lookup); + if (elementIterator != scopeMap.end()) { + std::cout << "lookup of " << lookup << " succeded in first scope!" << std::endl; + return elementIterator->second; + } + std::cout << "lookup of " << lookup << " failed in first scope, checking for upper scope" << std::endl; + //if it doesn't exist, try the enclosing scope if it exists. + auto enclosingIterator = scopeMap.find("~enclosing_scope"); + if (enclosingIterator != scopeMap.end()) { + std::cout << "upper scope exists, searching it for " << lookup << std::endl; + return scopeLookup(enclosingIterator->second, lookup); + } + std::cout << "upper scope does not exist" << std::endl; + return NULL; +} diff --git a/src/CGenerator.cpp b/src/CGenerator.cpp index 2b4645d..260a12d 100644 --- a/src/CGenerator.cpp +++ b/src/CGenerator.cpp @@ -20,7 +20,22 @@ std::string CGenerator::generate(NodeTree* from) { std::string output = ""; switch (data.type) { case translation_unit: - //Do nothing + //Declare everything in translation unit scope here. (allows stuff from other files, automatic forward declarations) + for (auto i = data.scope.begin(); i != data.scope.end(); i++) { + NodeTree* declaration = i->second; + ASTData declarationData = i->second->getData(); + switch(declarationData.type) { + case identifier: + output += ValueTypeToCType(declarationData.valueType) + " " + declarationData.symbol.getName() + "; /*identifier*/\n"; + break; + case function: + output += "/*func*/\n"; + break; + default: + std::cout << "Declaration? of unknown type " << ASTData::ASTTypeToString(declarationData.type) << " in translation unit scope" << std::endl; + output += "/*unknown declaration*/\n"; + } + } break; case interpreter_directive: //Do nothing @@ -82,18 +97,21 @@ std::string CGenerator::generate(NodeTree* from) { return strSlice(generate(children[0]), 3, -4); case function_call: { + //NOTE: The first (0th) child of a function call node is the declaration of the function + //Handle operators specially for now. Will later replace with //Inlined functions in the standard library std::string name = data.symbol.getName(); + std::cout << name << " == " << children[0]->getData().symbol.getName() << std::endl; if (name == "++" || name == "--") - return generate(children[0]) + name; - if (name == "*" && children.size() == 1) //Is dereference, not multiplication - return "*(" + generate(children[0]) + ")"; + return generate(children[1]) + name; + if (name == "*" && children.size() == 2) //Is dereference, not multiplication + return "*(" + generate(children[1]) + ")"; if (name == "+" || name == "-" || name == "*" || name == "/" || name == "==" || name == ">=" || name == "<=" || name == "!=" || name == "<" || name == ">" || name == "%" || name == "+=" || name == "-=" || name == "*=" || name == "/=") { - return "((" + generate(children[0]) + ")" + name + "(" + generate(children[1]) + "))"; + return "((" + generate(children[1]) + ")" + name + "(" + generate(children[2]) + "))"; } output += data.symbol.getName() + "("; - for (int i = 0; i < children.size(); i++) + for (int i = 1; i < children.size(); i++) //children[0] is the declaration if (i < children.size()-1) output += generate(children[i]) + ", "; else output += generate(children[i]); From dd9c8059ff245ea9ecb54d390dab66818679f908 Mon Sep 17 00:00:00 2001 From: Nathan Braswell Date: Sat, 28 Dec 2013 21:54:22 -0500 Subject: [PATCH 07/25] Clean up, some small additions. --- krakenGrammer.kgm | 2 +- main.cpp | 4 ++-- src/ASTTransformation.cpp | 10 +++++----- src/CGenerator.cpp | 19 +++++++++++++------ 4 files changed, 21 insertions(+), 14 deletions(-) diff --git a/krakenGrammer.kgm b/krakenGrammer.kgm index 9ea5314..b7e2fe6 100644 --- a/krakenGrammer.kgm +++ b/krakenGrammer.kgm @@ -1,6 +1,6 @@ Goal = translation_unit ; translation_unit = interpreter_directive WS unorderd_list_part WS ; -unorderd_list_part = import_list WS unorderd_list_part | function WS unorderd_list_part | if_comp WS unorderd_list_part | simple_passthrough WS unorderd_list_part | import_list | function | if_comp | simple_passthrough ; +unorderd_list_part = import_list WS unorderd_list_part | function WS unorderd_list_part | if_comp WS unorderd_list_part | simple_passthrough WS unorderd_list_part | declaration_statement WS ";" WS unorderd_list_part | import_list | function | if_comp | simple_passthrough | declaration_statement WS ";" ; type = type WS "\*" | "void" | "int" | "float" | "double" | "char" | identifier ; diff --git a/main.cpp b/main.cpp index 8a7986f..360c2f5 100644 --- a/main.cpp +++ b/main.cpp @@ -36,9 +36,9 @@ int main(int argc, char* argv[]) { return(1); } - grammerInFile.open(argv[2]); + grammerInFile.open(grammerFileString); if (!grammerInFile.is_open()) { - std::cout << "Problem opening grammerInFile " << argv[2] << "\n"; + std::cout << "Problem opening grammerInFile " << grammerFileString << "\n"; return(1); } diff --git a/src/ASTTransformation.cpp b/src/ASTTransformation.cpp index 118e44c..3097243 100644 --- a/src/ASTTransformation.cpp +++ b/src/ASTTransformation.cpp @@ -250,22 +250,22 @@ std::string ASTTransformation::concatSymbolTree(NodeTree* root) { NodeTree* ASTTransformation::scopeLookup(NodeTree* scope, std::string lookup) { //Seach the map auto scopeMap = scope->getDataRef()->scope; - std::cout << "scope size: " << scopeMap.size() << ", scope from " << scope->getName() << std::endl; + //std::cout << "scope size: " << scopeMap.size() << ", scope from " << scope->getName() << std::endl; for (auto i = scopeMap.begin(); i != scopeMap.end(); i++) std::cout << i->first << " : " << i-> second << " - " << i->second->getName() << std::endl; auto elementIterator = scopeMap.find(lookup); if (elementIterator != scopeMap.end()) { - std::cout << "lookup of " << lookup << " succeded in first scope!" << std::endl; + // std::cout << "lookup of " << lookup << " succeded in first scope!" << std::endl; return elementIterator->second; } - std::cout << "lookup of " << lookup << " failed in first scope, checking for upper scope" << std::endl; + //std::cout << "lookup of " << lookup << " failed in first scope, checking for upper scope" << std::endl; //if it doesn't exist, try the enclosing scope if it exists. auto enclosingIterator = scopeMap.find("~enclosing_scope"); if (enclosingIterator != scopeMap.end()) { - std::cout << "upper scope exists, searching it for " << lookup << std::endl; + // std::cout << "upper scope exists, searching it for " << lookup << std::endl; return scopeLookup(enclosingIterator->second, lookup); } - std::cout << "upper scope does not exist" << std::endl; + //std::cout << "upper scope does not exist" << std::endl; return NULL; } diff --git a/src/CGenerator.cpp b/src/CGenerator.cpp index 260a12d..007180a 100644 --- a/src/CGenerator.cpp +++ b/src/CGenerator.cpp @@ -23,17 +23,24 @@ std::string CGenerator::generate(NodeTree* from) { //Declare everything in translation unit scope here. (allows stuff from other files, automatic forward declarations) for (auto i = data.scope.begin(); i != data.scope.end(); i++) { NodeTree* declaration = i->second; + std::vector*> decChildren = declaration->getChildren(); ASTData declarationData = i->second->getData(); switch(declarationData.type) { case identifier: output += ValueTypeToCType(declarationData.valueType) + " " + declarationData.symbol.getName() + "; /*identifier*/\n"; break; case function: - output += "/*func*/\n"; + output += "\n" + ValueTypeToCType(declarationData.valueType) + " " + declarationData.symbol.getName() + "("; + for (int j = 0; j < decChildren.size()-1; j++) { + if (j > 0) + output += ", "; + output += ValueTypeToCType(decChildren[j]->getData().valueType) + " " + generate(decChildren[j]); + } + output+= "); /*func*/\n"; break; default: - std::cout << "Declaration? of unknown type " << ASTData::ASTTypeToString(declarationData.type) << " in translation unit scope" << std::endl; - output += "/*unknown declaration*/\n"; + std::cout << "Declaration? named " << declaration->getName() << " of unknown type " << ASTData::ASTTypeToString(declarationData.type) << " in translation unit scope" << std::endl; + output += "/*unknown declaration named " + declaration->getName() + "*/\n"; } } break; @@ -77,8 +84,8 @@ std::string CGenerator::generate(NodeTree* from) { output += "while (" + generate(children[0]) + ")\n\t" + generate(children[1]); return output; case for_loop: - //The strSlice's are there to get ride of an unwanted return and an unwanted semicolon - output += "for (" + strSlice(generate(children[0]),0,-2) + generate(children[1]) + ";" + strSlice(generate(children[2]),0,-3) + ")\n\t" + generate(children[3]); + //The strSlice's are there to get ride of an unwanted return and an unwanted semicolon(s) + output += "for (" + strSlice(generate(children[0]),0,-3) + generate(children[1]) + ";" + strSlice(generate(children[2]),0,-3) + ")\n\t" + generate(children[3]); return output; case return_statement: if (children.size()) @@ -88,7 +95,7 @@ std::string CGenerator::generate(NodeTree* from) { case assignment_statement: return generate(children[0]) + " = " + generate(children[1]); case declaration_statement: - return ValueTypeToCType(children[0]->getData().valueType) + " " + generate(children[0]) + " = " + generate(children[1]); + return ValueTypeToCType(children[0]->getData().valueType) + " " + generate(children[0]) + " = " + generate(children[1]) + ";"; case if_comp: if (generate(children[0]) == generatorString) return generate(children[1]); From 34553489512836ca94f4be6214ce881f145e4227 Mon Sep 17 00:00:00 2001 From: Nathan Braswell Date: Sat, 28 Dec 2013 21:55:43 -0500 Subject: [PATCH 08/25] Rookie mistake --- main.cpp | 10 +++++++++- 1 file changed, 9 insertions(+), 1 deletion(-) diff --git a/main.cpp b/main.cpp index 360c2f5..ae991e1 100644 --- a/main.cpp +++ b/main.cpp @@ -27,7 +27,7 @@ int main(int argc, char* argv[]) { return 0; } - std::ifstream programInFile, grammerInFile; + std::ifstream programInFile, grammerInFile, compiledGrammerInFile; std::ofstream outFile, outFileTransformed, outFileAST, outFileC; programInFile.open(argv[1]); @@ -36,6 +36,14 @@ int main(int argc, char* argv[]) { return(1); } + std::string grammerFileString = argv[2]; + + // compiledGrammerInFile.open(grammerFileString + ".comp"); + // if (!compiledGrammerInFile.is_open()) { + // std::cout << "Problem opening compiledGrammerInFile " << grammerFileString + ".comp" << "\n"; + // return(1); + // } + grammerInFile.open(grammerFileString); if (!grammerInFile.is_open()) { std::cout << "Problem opening grammerInFile " << grammerFileString << "\n"; From 005659b7b79d7e6d54089baee17c0a7f5b3e77e5 Mon Sep 17 00:00:00 2001 From: Nathan Braswell Date: Tue, 31 Dec 2013 02:53:52 -0600 Subject: [PATCH 09/25] Added caching of the RNGLR table. It is automatically regenerated whenever the grammer changes. Right now it has dropped compiling the test file from 30 seconds to less than one second. --- Kraken_Compiled_Grammer_file_format.txt | 41 +++++ include/Parser.h | 2 + include/Table.h | 4 + main.cpp | 68 +++++++- src/Parser.cpp | 10 ++ src/Table.cpp | 197 ++++++++++++++++++++++++ 6 files changed, 314 insertions(+), 8 deletions(-) create mode 100644 Kraken_Compiled_Grammer_file_format.txt diff --git a/Kraken_Compiled_Grammer_file_format.txt b/Kraken_Compiled_Grammer_file_format.txt new file mode 100644 index 0000000..32a40ac --- /dev/null +++ b/Kraken_Compiled_Grammer_file_format.txt @@ -0,0 +1,41 @@ +Kraken Compiled Grammer file format (.kgm.comp) + +This file is generated on first run, and regenerated everytime the grammer changes. +It contains the RNGLR table generated from the specified grammer so that it does not +have to be remade every time Kraken is run, saving a lot of time. +(at time of writing, non-cached: ~30 seconds, cached: <1 second) + + +This is a binary format. The first bytes are a magic number (KRAK in asci) + +The next bytes are an unsigned integer indicating how many characters follow. +Next are these characters, which are the grammer file as one long string. + +Next is the parse table length, followed by the table itself, exported with the table's export method. +It can be imported with the import method. +Note that within the parse table's data are parse actions, and within that, Symbols. + +The format: (more or less) +____________________ +|KRAK +|length_of_grammer_text +|GRAMMER_TEXT +|PARSE_TABLE +|-|length_of_symbol_index_vector +|-|SYMBOL_INDEX_VECTOR +|-|length_of_out_table_vector +|-|OUT_TABLE_VECTOR +|-|-|length_of_mid_table_vector +|-|-|MID_TABLE_VECTOR +|-|-|-|length_of_in_table_vector +|-|-|-|IN_TABLE_VECTOR +|-|-|-|-|length_of_parse_action +|-|-|-|-|PARSE_ACTION +|-|-|-|-|-|ActionType +|-|-|-|-|-|ParseRule__if_exists +|-|-|-|-|-|-|pointerIndex +|-|-|-|-|-|-|Symbol_left_handel +|-|-|-|-|-|-|rightside_vector_symbol +|-|-|-|-|-|shiftState +____________________ + diff --git a/include/Parser.h b/include/Parser.h index 0e876d1..6f1cc49 100644 --- a/include/Parser.h +++ b/include/Parser.h @@ -32,6 +32,8 @@ class Parser { virtual std::string grammerToDOT(); std::string tableToString(); + void exportTable(std::ofstream &file); + void importTable(char* tableData); protected: std::vector* firstSet(Symbol token); diff --git a/include/Table.h b/include/Table.h index fbfd3b9..6b8ce88 100644 --- a/include/Table.h +++ b/include/Table.h @@ -1,3 +1,5 @@ +#include + #include "util.h" #include "ParseRule.h" #include "ParseAction.h" @@ -11,6 +13,8 @@ class Table { public: Table(); ~Table(); + void exportTable(std::ofstream &file); + void importTable(char* tableData); void setSymbols(Symbol EOFSymbol, Symbol nullSymbol); void add(int stateNum, Symbol tranSymbol, ParseAction* action); void remove(int stateNum, Symbol tranSymbol); diff --git a/main.cpp b/main.cpp index ae991e1..a6ee9d4 100644 --- a/main.cpp +++ b/main.cpp @@ -3,6 +3,8 @@ #include #include +#include + #include "NodeTree.h" #include "Symbol.h" #include "Lexer.h" @@ -28,7 +30,7 @@ int main(int argc, char* argv[]) { } std::ifstream programInFile, grammerInFile, compiledGrammerInFile; - std::ofstream outFile, outFileTransformed, outFileAST, outFileC; + std::ofstream outFile, outFileTransformed, outFileAST, outFileC, compiledGrammerOutFile; programInFile.open(argv[1]); if (!programInFile.is_open()) { @@ -38,18 +40,18 @@ int main(int argc, char* argv[]) { std::string grammerFileString = argv[2]; - // compiledGrammerInFile.open(grammerFileString + ".comp"); - // if (!compiledGrammerInFile.is_open()) { - // std::cout << "Problem opening compiledGrammerInFile " << grammerFileString + ".comp" << "\n"; - // return(1); - // } - grammerInFile.open(grammerFileString); if (!grammerInFile.is_open()) { std::cout << "Problem opening grammerInFile " << grammerFileString << "\n"; return(1); } + compiledGrammerInFile.open(grammerFileString + ".comp", std::ios::binary | std::ios::ate); + if (!compiledGrammerInFile.is_open()) { + std::cout << "Problem opening compiledGrammerInFile " << grammerFileString + ".comp" << "\n"; + //return(1); + } + outFile.open(argv[3]); if (!outFile.is_open()) { std::cout << "Probelm opening output file " << argv[3] << "\n"; @@ -93,7 +95,57 @@ int main(int argc, char* argv[]) { parser.loadGrammer(grammerInputFileString); //std::cout << "Creating State Set from Main" << std::endl; std::cout << "\nState Set" << std::endl; - parser.createStateSet(); + + //Start binary stuff + bool compGramGood = false; + if (compiledGrammerInFile.is_open()) { + std::cout << "Compiled grammer file exists, reading it in" << std::endl; + std::streampos compGramSize = compiledGrammerInFile.tellg(); + char* binaryTablePointer = new char [compGramSize]; + compiledGrammerInFile.seekg(0, std::ios::beg); + compiledGrammerInFile.read(binaryTablePointer, compGramSize); + compiledGrammerInFile.close(); + if (binaryTablePointer[0] == 'K' && binaryTablePointer[1] == 'R' && binaryTablePointer[2] == 'A' && binaryTablePointer[3] == 'K') { + std::cout << "Valid Kraken Compiled Grammer File" << std::endl; + int gramStringLength = *((int*)(binaryTablePointer+4)); + std::cout << "The grammer string is stored to be " << gramStringLength << " characters long, gramString is " + << grammerInputFileString.length() << " long. Remember 1 extra for null terminator!" << std::endl; + if (grammerInputFileString.length() != gramStringLength-1 || + (strncmp(grammerInputFileString.c_str(), (binaryTablePointer+4+sizeof(int)), gramStringLength) != 0)) { + //(one less for null terminator that is stored) + std::cout << "The Grammer has been changed, will re-create" << std::endl; + } else { + compGramGood = true; + std::cout << "grammer file good" << std::endl; + //int tableLength = *((int*)(binaryTablePointer + 4 + sizeof(int) + gramStringLength)); + parser.importTable(binaryTablePointer + 4 + sizeof(int) + gramStringLength); //Load table starting at the table section + } + } else { + std::cout << grammerFileString << ".comp is NOT A Valid Kraken Compiled Grammer File, aborting" << std::endl; + return -1; + } + delete binaryTablePointer; + } + + if (!compGramGood) { + //The load failed because either the file does not exist or it is not up-to-date. + std::cout << "Compiled grammer file does not exist or is not up-to-date, generating table and writing it out" << std::endl; + compiledGrammerOutFile.open(grammerFileString + ".comp", std::ios::binary); + if (!compiledGrammerOutFile.is_open()) + std::cout << "Could not open compiled file to write either!" << std::endl; + compiledGrammerOutFile.write("KRAK", sizeof(char)*4); + int* intBuffer = new int; + *intBuffer = grammerInputFileString.length()+1; + compiledGrammerOutFile.write((char*)intBuffer, sizeof(int)); + delete intBuffer; + compiledGrammerOutFile.write(grammerInputFileString.c_str(), grammerInputFileString.length()+1); //Don't forget null terminator + + parser.createStateSet(); + parser.exportTable(compiledGrammerOutFile); + compiledGrammerOutFile.close(); + } + //End binary stuff + //std::cout << "finished State Set from Main" << std::endl; //std::cout << "Doing stateSetToString from Main" << std::endl; // std::cout << "\n\n\n\n\n\n\n\n\n\nState Set toString" << std::endl; diff --git a/src/Parser.cpp b/src/Parser.cpp index f8236c5..5133c32 100644 --- a/src/Parser.cpp +++ b/src/Parser.cpp @@ -7,6 +7,16 @@ Parser::Parser() : EOFSymbol("$EOF$", true), nullSymbol("$NULL$", true), invalid Parser::~Parser() { } +void Parser::exportTable(std::ofstream &file) { + //Do table + table.exportTable(file); +} +void Parser::importTable(char* tableData) { + //Do table + table.importTable(tableData); + return; +} + Symbol Parser::getOrAddSymbol(std::string symbolString, bool isTerminal) { Symbol symbol; std::pair entry = std::make_pair(symbolString, isTerminal); diff --git a/src/Table.cpp b/src/Table.cpp index a94d6b2..0c8b8fe 100644 --- a/src/Table.cpp +++ b/src/Table.cpp @@ -8,6 +8,203 @@ Table::~Table() { // } +void Table::exportTable(std::ofstream &file) { + //Save symbolIndexVec + int size = symbolIndexVec.size(); + file.write((char*)&size, sizeof(int)); + for (int i = 0; i < symbolIndexVec.size(); i++) { + //Save the name + std::string symbolName = symbolIndexVec[i].getName(); //Get the string + size = symbolName.size()+1; + file.write((char*)&size, sizeof(int)); //Save size of string + file.write((char*)(symbolName.c_str()), size); //Save the string + + //Save the value + std::string symbolValue = symbolIndexVec[i].getValue(); //Get the string + size = symbolValue.size()+1; + file.write((char*)&size, sizeof(int)); //Save size of string + file.write((char*)(symbolValue.c_str()), size); //Save the string + + bool isTerminal = symbolIndexVec[i].isTerminal(); + file.write((char*)&isTerminal, sizeof(bool)); //Save the true false + } + + //Save the actual table + size = table.size(); + file.write((char*)&size, sizeof(int)); + for (int i = 0; i < table.size(); i++) { + //each item is a middle vector + //std::vector< std::vector< std::vector* >* > table; + std::vector< std::vector* >* middleVector = table[i]; + int middleVectorSize = middleVector->size(); + file.write((char*)&middleVectorSize, sizeof(int)); + + for (int j = 0; j < middleVectorSize; j++) { + //each item is an inner vector + std::vector* innerVector = (*middleVector)[j]; + int innerVectorSize = 0; + if (innerVector) + innerVectorSize = innerVector->size(); + else + innerVectorSize = 0; + file.write((char*)&innerVectorSize, sizeof(int)); + + for (int k = 0; k < innerVectorSize; k++) { + //Save the type + ParseAction* toSave = (*innerVector)[k]; + ParseAction::ActionType actionType = toSave->action; + file.write((char*)&actionType, sizeof(ParseAction::ActionType)); + //Save the reduce rule if necessary + if (actionType == ParseAction::REDUCE) { + //Save the reduce rule + ParseRule* rule = toSave->reduceRule; + //int pointer index + int ptrIndx = rule->getIndex(); + file.write((char*)&ptrIndx, sizeof(int)); + + //Symbol leftHandle + Symbol leftHandle = rule->getLeftSide(); + //Save the name + std::string symbolName = leftHandle.getName(); //Get the string + size = symbolName.size()+1; + file.write((char*)&size, sizeof(int)); //Save size of string + file.write((char*)(symbolName.c_str()), size); //Save the string + + //Save the value + std::string symbolValue = leftHandle.getValue(); //Get the string + size = symbolValue.size()+1; + file.write((char*)&size, sizeof(int)); //Save size of string + file.write((char*)(symbolValue.c_str()), size); //Save the string + + bool isTerminal = leftHandle.isTerminal(); + file.write((char*)&isTerminal, sizeof(bool)); //Save the true false + + //std::vector* lookahead; + //Should not need + + //std::vector rightSide; + std::vector rightSide = rule->getRightSide(); + size = rightSide.size(); + std::cout << leftHandle.toString() << std::endl; + file.write((char*)&size, sizeof(int)); + for (int l = 0; l < rightSide.size(); l++) { + //Save the name + symbolName = rightSide[l].getName(); //Get the string + size = symbolName.size()+1; + file.write((char*)&size, sizeof(int)); //Save size of string + file.write((char*)(symbolName.c_str()), size); //Save the string + // + //Save the value + symbolValue = rightSide[l].getValue(); //Get the string + size = symbolValue.size()+1; + file.write((char*)&size, sizeof(int)); //Save size of string + file.write((char*)(symbolValue.c_str()), size); //Save the string + // + isTerminal = rightSide[l].isTerminal(); + file.write((char*)&isTerminal, sizeof(bool)); //Save the true false + } + } + int shiftState = toSave->shiftState; + file.write((char*)&shiftState, sizeof(int)); + } + } + + } +} + +void Table::importTable(char* tableData) { + //Load symbolIndexVec + + int size = *((int*)tableData); + tableData += sizeof(int); + for (int i = 0; i < size; i++) { + int stringLen = *((int*)tableData); + tableData += sizeof(int); + std::string symbolName = std::string(tableData); + tableData += stringLen*sizeof(char); + stringLen = *((int*)tableData); + tableData += sizeof(int); + std::string symbolValue = std::string(tableData); + tableData += stringLen*sizeof(char); + + bool isTerminal = *((bool*)tableData); + tableData += sizeof(bool); + + symbolIndexVec.push_back(Symbol(symbolName, isTerminal, symbolValue)); + } + + //Now for the actual table + int tableSize = *((int*)tableData); + tableData += sizeof(int); + for (int i = 0; i < tableSize; i++) { + //each item is a middle vector + std::vector< std::vector* >* middleVector = new std::vector< std::vector* >(); + table.push_back(middleVector); + + int middleVectorSize = *((int*)tableData); + tableData += sizeof(int); + for (int j = 0; j < middleVectorSize; j++) { + //each item is an inner vector + std::vector* innerVector = new std::vector(); + middleVector->push_back(innerVector); + int innerVectorSize = *((int*)tableData); + tableData += sizeof(int); + for (int k = 0; k < innerVectorSize; k++) { + //each item is a ParseRule + ParseAction::ActionType action = *((ParseAction::ActionType*)tableData); + tableData += sizeof(ParseAction::ActionType); + //If reduce, import the reduce rule + ParseRule* reduceRule = NULL; + if (action == ParseAction::REDUCE) { + int ptrIndx = *((int*)tableData); + tableData += sizeof(int); + + size = *((int*)tableData); + tableData += sizeof(int); + std::string leftHandleName = std::string(tableData); + tableData += size*sizeof(char); + size = *((int*)tableData); + tableData += sizeof(int); + std::string leftHandleValue = std::string(tableData); + tableData += size*sizeof(char); + + bool isTerminal = *((bool*)tableData); + tableData += sizeof(bool); + + //right side + std::vector rightSide; + size = *((int*)tableData); + tableData += sizeof(int); + for (int l = 0; l < size; l++) { + int inStringLen = *((int*)tableData); + tableData += sizeof(int); + std::string inSymbolName = std::string(tableData); + tableData += inStringLen*sizeof(char); + + inStringLen = *((int*)tableData); + tableData += sizeof(int); + std::string inSymbolValue = std::string(tableData); + tableData += inStringLen*sizeof(char); + + bool inIsTerminal = *((bool*)tableData); + tableData += sizeof(bool); + rightSide.push_back(Symbol(inSymbolName, inIsTerminal, inSymbolValue)); + } + reduceRule = new ParseRule(Symbol(leftHandleName, isTerminal, leftHandleValue), ptrIndx, rightSide, NULL); + } + int shiftState = *((int*)tableData); + tableData += sizeof(int); + + //And push the new action back + if (reduceRule) + innerVector->push_back(new ParseAction(action, reduceRule)); + else + innerVector->push_back(new ParseAction(action, shiftState)); + } + } + } +} + void Table::setSymbols(Symbol EOFSymbol, Symbol nullSymbol) { this->EOFSymbol = EOFSymbol; this->nullSymbol = nullSymbol; From dbf1820428070042da4f4ae33d333a61c7b88c23 Mon Sep 17 00:00:00 2001 From: Nathan Braswell Date: Tue, 31 Dec 2013 23:43:49 -0600 Subject: [PATCH 10/25] Can import other files now. (in same directory, use filename without .krak file extention) Right now, still need to compile both files with kraken. --- CMakeLists.txt | 2 +- include/ASTTransformation.h | 7 +- include/GraphStructuredStack.h | 1 + include/Importer.h | 28 ++++++++ include/Lexer.h | 1 + krakenGrammer.kgm | 2 +- main.cpp | 115 ++++----------------------------- src/ASTTransformation.cpp | 25 ++++--- src/CGenerator.cpp | 5 +- src/GraphStructuredStack.cpp | 5 ++ src/Importer.cpp | 113 ++++++++++++++++++++++++++++++++ src/Lexer.cpp | 4 ++ src/RNGLRParser.cpp | 11 +++- 13 files changed, 203 insertions(+), 116 deletions(-) create mode 100644 include/Importer.h create mode 100644 src/Importer.cpp diff --git a/CMakeLists.txt b/CMakeLists.txt index ad4cfeb..db3eb07 100644 --- a/CMakeLists.txt +++ b/CMakeLists.txt @@ -6,7 +6,7 @@ set(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} -std=c++11") set( MY_INCLUDES ${PROJECT_SOURCE_DIR}/include) -set( MY_SOURCES main.cpp src/Parser.cpp src/LALRParser.cpp src/GraphStructuredStack.cpp src/RNGLRParser.cpp src/ParseAction.cpp src/ParseRule.cpp src/Symbol.cpp src/StringReader.cpp src/State.cpp src/util.cpp src/Lexer.cpp src/RegEx.cpp src/RegExState.cpp src/Table.cpp src/ASTData.cpp src/ASTTransformation.cpp src/CGenerator.cpp src/Type.cpp ) +set( MY_SOURCES main.cpp src/Parser.cpp src/LALRParser.cpp src/GraphStructuredStack.cpp src/RNGLRParser.cpp src/ParseAction.cpp src/ParseRule.cpp src/Symbol.cpp src/StringReader.cpp src/State.cpp src/util.cpp src/Lexer.cpp src/RegEx.cpp src/RegExState.cpp src/Table.cpp src/ASTData.cpp src/ASTTransformation.cpp src/CGenerator.cpp src/Type.cpp src/Importer.cpp ) include_directories( ${MY_INCLUDES} ) diff --git a/include/ASTTransformation.h b/include/ASTTransformation.h index c9d1c41..f93ce65 100644 --- a/include/ASTTransformation.h +++ b/include/ASTTransformation.h @@ -6,16 +6,19 @@ #include "ASTData.h" #include "NodeTransformation.h" +#include "Importer.h" + +class Importer; class ASTTransformation: public NodeTransformation { public: - ASTTransformation(); + ASTTransformation(Importer* importerIn); ~ASTTransformation(); virtual NodeTree* transform(NodeTree* from); NodeTree* transform(NodeTree* from, NodeTree* scope); std::string concatSymbolTree(NodeTree* root); NodeTree* scopeLookup(NodeTree* scope, std::string lookup); private: - //Nothing + Importer * importer; }; #endif diff --git a/include/GraphStructuredStack.h b/include/GraphStructuredStack.h index 302fd60..e79a8fe 100644 --- a/include/GraphStructuredStack.h +++ b/include/GraphStructuredStack.h @@ -25,6 +25,7 @@ class GraphStructuredStack { bool hasEdge(NodeTree* start, NodeTree* end); NodeTree* getEdge(NodeTree* start, NodeTree* end); void addEdge(NodeTree* start, NodeTree* end, NodeTree* edge); + void clear(); std::string toString(); private: diff --git a/include/Importer.h b/include/Importer.h new file mode 100644 index 0000000..187d054 --- /dev/null +++ b/include/Importer.h @@ -0,0 +1,28 @@ +#ifndef __IMPORTER__H_ +#define __IMPORTER__H_ + +#include +#include +#include +#include + +#include "Parser.h" +#include "NodeTree.h" +#include "ASTData.h" +#include "Symbol.h" +#include "RemovalTransformation.h" +#include "CollapseTransformation.h" +#include "ASTTransformation.h" + +class Importer { + public: + Importer(Parser* parserIn); + ~Importer(); + NodeTree* import(std::string fileName); + private: + Parser* parser; + std::vector removeSymbols; + std::vector collapseSymbols; +}; + +#endif \ No newline at end of file diff --git a/include/Lexer.h b/include/Lexer.h index 02223a9..1574a02 100644 --- a/include/Lexer.h +++ b/include/Lexer.h @@ -16,6 +16,7 @@ class Lexer { void addRegEx(std::string regExString); void setInput(std::string inputString); Symbol next(); + void reset(); static void test(); private: std::vector regExs; diff --git a/krakenGrammer.kgm b/krakenGrammer.kgm index b7e2fe6..2234034 100644 --- a/krakenGrammer.kgm +++ b/krakenGrammer.kgm @@ -18,7 +18,7 @@ WS = "( | | if_comp = "__if_comp__" WS identifier WS if_comp_pred ; if_comp_pred = code_block | simple_passthrough ; -simple_passthrough = "comp_simple_passthrough" WS triple_quoted_string ; +simple_passthrough = "__simple_passthrough__" WS triple_quoted_string ; triple_quoted_string = "\"\"\"(a|b|c|d|e|f|g|h|i|j|k|l|m|n|o|p|q|r|s|t|u|v|w|x|y|z|A|B|C|D|E|F|G|H|I|J|K|L|M|N|O|P|Q|R|S|T|U|V|W|X|Y|Z|.|,|!|\?|_|-|:|;|%|=|\+| | | |\\|/|\||\(|\)|\"|#|<|\*|>|0|1|2|3|4|5|6|7|8|9)+\"\"\"" ; diff --git a/main.cpp b/main.cpp index a6ee9d4..41de216 100644 --- a/main.cpp +++ b/main.cpp @@ -11,10 +11,7 @@ #include "LALRParser.h" #include "RNGLRParser.h" -#include "NodeTransformation.h" -#include "RemovalTransformation.h" -#include "CollapseTransformation.h" -#include "ASTTransformation.h" +#include "Importer.h" #include "ASTData.h" #include "CGenerator.h" @@ -29,16 +26,12 @@ int main(int argc, char* argv[]) { return 0; } - std::ifstream programInFile, grammerInFile, compiledGrammerInFile; - std::ofstream outFile, outFileTransformed, outFileAST, outFileC, compiledGrammerOutFile; - - programInFile.open(argv[1]); - if (!programInFile.is_open()) { - std::cout << "Problem opening programInFile " << argv[1] << "\n"; - return(1); - } - + std::string programName = argv[1]; std::string grammerFileString = argv[2]; + std::string outputName = argv[3]; + + std::ifstream grammerInFile, compiledGrammerInFile; + std::ofstream outFileC, compiledGrammerOutFile; grammerInFile.open(grammerFileString); if (!grammerInFile.is_open()) { @@ -52,31 +45,13 @@ int main(int argc, char* argv[]) { //return(1); } - outFile.open(argv[3]); - if (!outFile.is_open()) { - std::cout << "Probelm opening output file " << argv[3] << "\n"; - return(1); - } - - outFileTransformed.open((std::string(argv[3]) + ".transformed.dot").c_str()); - if (!outFileTransformed.is_open()) { - std::cout << "Probelm opening second output file " << std::string(argv[3]) + ".transformed.dot" << "\n"; - return(1); - } - - outFileAST.open((std::string(argv[3]) + ".AST.dot").c_str()); - if (!outFileAST.is_open()) { - std::cout << "Probelm opening second output file " << std::string(argv[3]) + ".AST.dot" << "\n"; - return(1); - } - - outFileC.open((std::string(argv[3]) + ".c").c_str()); + outFileC.open((outputName + ".c").c_str()); if (!outFileC.is_open()) { - std::cout << "Probelm opening third output file " << std::string(argv[3]) + ".c" << "\n"; + std::cout << "Probelm opening third output file " << outputName + ".c" << "\n"; return(1); } //Read the input file into a string - std::string programInputFileString, grammerInputFileString; + std::string grammerInputFileString; std::string line; while(grammerInFile.good()) { getline(grammerInFile, line); @@ -84,12 +59,6 @@ int main(int argc, char* argv[]) { } grammerInFile.close(); - while(programInFile.good()) { - getline(programInFile, line); - programInputFileString.append(line+"\n"); - } - programInFile.close(); - //LALRParser parser; RNGLRParser parser; parser.loadGrammer(grammerInputFileString); @@ -105,6 +74,7 @@ int main(int argc, char* argv[]) { compiledGrammerInFile.seekg(0, std::ios::beg); compiledGrammerInFile.read(binaryTablePointer, compGramSize); compiledGrammerInFile.close(); + //Check magic number if (binaryTablePointer[0] == 'K' && binaryTablePointer[1] == 'R' && binaryTablePointer[2] == 'A' && binaryTablePointer[3] == 'K') { std::cout << "Valid Kraken Compiled Grammer File" << std::endl; int gramStringLength = *((int*)(binaryTablePointer+4)); @@ -162,70 +132,11 @@ int main(int argc, char* argv[]) { //outFile << parser.grammerToDOT() << std::endl; std::cout << "\nParsing" << std::endl; - std::cout << programInputFileString << std::endl; - NodeTree* parseTree = parser.parseInput(programInputFileString); + Importer importer(&parser); - if (parseTree) { - //std::cout << parseTree->DOTGraphString() << std::endl; - outFile << parseTree->DOTGraphString() << std::endl; - } else { - std::cout << "ParseTree returned from parser is NULL!" << std::endl; - } - outFile.close(); + NodeTree* AST = importer.import(programName); - //Remove Transformations - std::vector removeSymbols; - removeSymbols.push_back(Symbol("WS", false)); - removeSymbols.push_back(Symbol("\\(", true)); - removeSymbols.push_back(Symbol("\\)", true)); - removeSymbols.push_back(Symbol("::", true)); - removeSymbols.push_back(Symbol(";", true)); - removeSymbols.push_back(Symbol("{", true)); - removeSymbols.push_back(Symbol("}", true)); - removeSymbols.push_back(Symbol("(", true)); - removeSymbols.push_back(Symbol(")", true)); - removeSymbols.push_back(Symbol("import", true)); //Don't need the actual text of the symbol - removeSymbols.push_back(Symbol("interpreter_directive", false)); - removeSymbols.push_back(Symbol("if", true)); - removeSymbols.push_back(Symbol("while", true)); - removeSymbols.push_back(Symbol("__if_comp__", true)); - removeSymbols.push_back(Symbol("comp_simple_passthrough", true)); - - for (int i = 0; i < removeSymbols.size(); i++) - parseTree = RemovalTransformation(removeSymbols[i]).transform(parseTree); - - //Collapse Transformations - std::vector collapseSymbols; - - collapseSymbols.push_back(Symbol("opt_typed_parameter_list", false)); - collapseSymbols.push_back(Symbol("opt_parameter_list", false)); - collapseSymbols.push_back(Symbol("opt_import_list", false)); - collapseSymbols.push_back(Symbol("import_list", false)); - collapseSymbols.push_back(Symbol("statement_list", false)); - collapseSymbols.push_back(Symbol("parameter_list", false)); - collapseSymbols.push_back(Symbol("typed_parameter_list", false)); - collapseSymbols.push_back(Symbol("unorderd_list_part", false)); - collapseSymbols.push_back(Symbol("if_comp_pred", false)); - - for (int i = 0; i < collapseSymbols.size(); i++) - parseTree = CollapseTransformation(collapseSymbols[i]).transform(parseTree); - - if (parseTree) { - outFileTransformed << parseTree->DOTGraphString() << std::endl; - } else { - std::cout << "Tree returned from transformation is NULL!" << std::endl; - } - outFileTransformed.close(); - - NodeTree* AST = ASTTransformation().transform(parseTree); - if (AST) { - outFileAST << AST->DOTGraphString() << std::endl; - } else { - std::cout << "Tree returned from ASTTransformation is NULL!" << std::endl; - } - outFileAST.close(); - - //Do type checking, scope creation, etc. here. + //Do optomization, etc. here. //None at this time, instead going straight to C in this first (more naive) version //Code generation diff --git a/src/ASTTransformation.cpp b/src/ASTTransformation.cpp index 3097243..7ca4937 100644 --- a/src/ASTTransformation.cpp +++ b/src/ASTTransformation.cpp @@ -1,7 +1,8 @@ #include "ASTTransformation.h" -ASTTransformation::ASTTransformation() { +ASTTransformation::ASTTransformation(Importer *importerIn) { // + importer = importerIn; } ASTTransformation::~ASTTransformation() { @@ -28,23 +29,31 @@ NodeTree* ASTTransformation::transform(NodeTree* from, NodeTree scope->getDataRef()->scope["-"] = new NodeTree(); scope->getDataRef()->scope["*"] = new NodeTree(); scope->getDataRef()->scope["&"] = new NodeTree(); - scope->getDataRef()->scope["=="] = new NodeTree(); scope->getDataRef()->scope["--"] = new NodeTree(); scope->getDataRef()->scope["++"] = new NodeTree(); + scope->getDataRef()->scope["=="] = new NodeTree(); scope->getDataRef()->scope["<="] = new NodeTree(); scope->getDataRef()->scope[">="] = new NodeTree(); + scope->getDataRef()->scope["<"] = new NodeTree(); + scope->getDataRef()->scope[">"] = new NodeTree(); + scope->getDataRef()->scope["&&"] = new NodeTree(); + scope->getDataRef()->scope["||"] = new NodeTree(); + scope->getDataRef()->scope["!"] = new NodeTree(); scope->getDataRef()->scope["*="] = new NodeTree(); scope->getDataRef()->scope["+="] = new NodeTree(); scope->getDataRef()->scope["-="] = new NodeTree(); - scope->getDataRef()->scope["<"] = new NodeTree(); - scope->getDataRef()->scope[">"] = new NodeTree(); + } else if (name == "interpreter_directive") { newNode = new NodeTree(name, ASTData(interpreter_directive)); } else if (name == "import" && !current.isTerminal()) { - newNode = new NodeTree(name, ASTData(import, Symbol(concatSymbolTree(children[0]), true))); - //Add to scope? - // - // + std::string toImport = concatSymbolTree(children[0]); + newNode = new NodeTree(name, ASTData(import, Symbol(toImport, true))); + //Do the imported file too + NodeTree* outsideTranslationUnit = importer->import(toImport + ".krak"); + scope->getDataRef()->scope[toImport] = outsideTranslationUnit; //Put this transation_unit in the scope as it's files name + //Now add it to scope + for (auto i = outsideTranslationUnit->getDataRef()->scope.begin(); i != outsideTranslationUnit->getDataRef()->scope.end(); i++) + scope->getDataRef()->scope[i->first] = i->second; return newNode; // Don't need children of import } else if (name == "identifier") { std::string lookupName = concatSymbolTree(children[0]); diff --git a/src/CGenerator.cpp b/src/CGenerator.cpp index 007180a..1b9a108 100644 --- a/src/CGenerator.cpp +++ b/src/CGenerator.cpp @@ -114,7 +114,10 @@ std::string CGenerator::generate(NodeTree* from) { return generate(children[1]) + name; if (name == "*" && children.size() == 2) //Is dereference, not multiplication return "*(" + generate(children[1]) + ")"; - if (name == "+" || name == "-" || name == "*" || name == "/" || name == "==" || name == ">=" || name == "<=" || name == "!=" || name == "<" || name == ">" || name == "%" || name == "+=" || name == "-=" || name == "*=" || name == "/=") { + if (name == "+" || name == "-" || name == "*" || name == "/" || name == "==" || name == ">=" || name == "<=" || name == "!=" + || name == "<" || name == ">" || name == "%" || name == "+=" || name == "-=" || name == "*=" || name == "/=" || name == "||" + || name == "&&" || name == "!" ) { + return "((" + generate(children[1]) + ")" + name + "(" + generate(children[2]) + "))"; } output += data.symbol.getName() + "("; diff --git a/src/GraphStructuredStack.cpp b/src/GraphStructuredStack.cpp index fb37d18..dcdf2ff 100644 --- a/src/GraphStructuredStack.cpp +++ b/src/GraphStructuredStack.cpp @@ -128,3 +128,8 @@ std::string GraphStructuredStack::toString() { } return tostring; } + +void GraphStructuredStack::clear() { + gss.clear(); + edges.clear(); +} diff --git a/src/Importer.cpp b/src/Importer.cpp new file mode 100644 index 0000000..f4a1cf9 --- /dev/null +++ b/src/Importer.cpp @@ -0,0 +1,113 @@ +#include "Importer.h" + +Importer::Importer(Parser* parserIn) { + //constructor + parser = parserIn; + + removeSymbols.push_back(Symbol("WS", false)); + removeSymbols.push_back(Symbol("\\(", true)); + removeSymbols.push_back(Symbol("\\)", true)); + removeSymbols.push_back(Symbol("::", true)); + removeSymbols.push_back(Symbol(";", true)); + removeSymbols.push_back(Symbol("{", true)); + removeSymbols.push_back(Symbol("}", true)); + removeSymbols.push_back(Symbol("(", true)); + removeSymbols.push_back(Symbol(")", true)); + removeSymbols.push_back(Symbol("import", true)); //Don't need the actual text of the symbol + removeSymbols.push_back(Symbol("interpreter_directive", false)); + removeSymbols.push_back(Symbol("if", true)); + removeSymbols.push_back(Symbol("while", true)); + removeSymbols.push_back(Symbol("__if_comp__", true)); + removeSymbols.push_back(Symbol("comp_simple_passthrough", true)); + + collapseSymbols.push_back(Symbol("opt_typed_parameter_list", false)); + collapseSymbols.push_back(Symbol("opt_parameter_list", false)); + collapseSymbols.push_back(Symbol("opt_import_list", false)); + collapseSymbols.push_back(Symbol("import_list", false)); + collapseSymbols.push_back(Symbol("statement_list", false)); + collapseSymbols.push_back(Symbol("parameter_list", false)); + collapseSymbols.push_back(Symbol("typed_parameter_list", false)); + collapseSymbols.push_back(Symbol("unorderd_list_part", false)); + collapseSymbols.push_back(Symbol("if_comp_pred", false)); +} + +Importer::~Importer() { + //destructor +} + +NodeTree* Importer::import(std::string fileName) { + std::ifstream programInFile; + std::ofstream outFile, outFileTransformed, outFileAST; + + std::string outputName = fileName + "out"; + + programInFile.open(fileName); + if (!programInFile.is_open()) { + std::cout << "Problem opening programInFile " << fileName << "\n"; + return NULL; + } + + outFile.open(outputName); + if (!outFile.is_open()) { + std::cout << "Probelm opening output file " << outputName << "\n"; + return NULL; + } + + outFileTransformed.open((outputName + ".transformed.dot").c_str()); + if (!outFileTransformed.is_open()) { + std::cout << "Probelm opening second output file " << outputName + ".transformed.dot" << "\n"; + return NULL; + } + + outFileAST.open((outputName + ".AST.dot").c_str()); + if (!outFileAST.is_open()) { + std::cout << "Probelm opening second output file " << outputName + ".AST.dot" << "\n"; + return NULL; + } + //ljklj + std::string programInputFileString, line; + while(programInFile.good()) { + getline(programInFile, line); + programInputFileString.append(line+"\n"); + } + programInFile.close(); + + std::cout << programInputFileString << std::endl; + NodeTree* parseTree = parser->parseInput(programInputFileString); + + if (parseTree) { + //std::cout << parseTree->DOTGraphString() << std::endl; + outFile << parseTree->DOTGraphString() << std::endl; + } else { + std::cout << "ParseTree returned from parser is NULL!" << std::endl; + } + outFile.close(); + + //Remove Transformations + + for (int i = 0; i < removeSymbols.size(); i++) + parseTree = RemovalTransformation(removeSymbols[i]).transform(parseTree); + + //Collapse Transformations + + for (int i = 0; i < collapseSymbols.size(); i++) + parseTree = CollapseTransformation(collapseSymbols[i]).transform(parseTree); + + if (parseTree) { + outFileTransformed << parseTree->DOTGraphString() << std::endl; + } else { + std::cout << "Tree returned from transformation is NULL!" << std::endl; + } + outFileTransformed.close(); + + NodeTree* AST = ASTTransformation(this).transform(parseTree); + + if (AST) { + outFileAST << AST->DOTGraphString() << std::endl; + } else { + std::cout << "Tree returned from ASTTransformation is NULL!" << std::endl; + } + outFileAST.close(); + + return AST; +} \ No newline at end of file diff --git a/src/Lexer.cpp b/src/Lexer.cpp index b9cb367..b6f7033 100644 --- a/src/Lexer.cpp +++ b/src/Lexer.cpp @@ -114,3 +114,7 @@ void Lexer::test() { std::cout << "Lexer tests passed\n"; } + +void Lexer::reset() { + currentPosition = 0; +} diff --git a/src/RNGLRParser.cpp b/src/RNGLRParser.cpp index 392bc30..33989ff 100644 --- a/src/RNGLRParser.cpp +++ b/src/RNGLRParser.cpp @@ -9,6 +9,13 @@ RNGLRParser::~RNGLRParser() { } NodeTree* RNGLRParser::parseInput(std::string inputString) { + input.clear(); + gss.clear(); + while(!toReduce.empty()) toReduce.pop(); + while(!toShift.empty()) toReduce.pop(); + SPPFStepNodes.clear(); + nullableParts.clear(); + packedMap.clear(); //Check for no tokens bool accepting = false; @@ -27,6 +34,7 @@ NodeTree* RNGLRParser::parseInput(std::string inputString) { return new NodeTree(); } + lexer.reset(); lexer.setInput(inputString); //Now fully lex our input because this algorithm was designed in that manner and simplifies this first implementation. //It could be converted to on-line later. @@ -42,7 +50,8 @@ NodeTree* RNGLRParser::parseInput(std::string inputString) { input.push_back(currentToken); } - std::cout << "\nDone with Lexing\n" << std::endl; + std::cout << "\nDone with Lexing, length:" << input.size() << std::endl; + std::cout << input[0].toString() << std::endl; // for (int i = 0; i < input.size(); i++) From 53b45f360d823bb5ee8f9332850e3a0075279096 Mon Sep 17 00:00:00 2001 From: Nathan Braswell Date: Wed, 1 Jan 2014 17:29:19 -0600 Subject: [PATCH 11/25] Now generates for all files that have been imported. CGenerator uses this to generate all files AND a shell script with the compile command to compile the generated C file. --- include/CGenerator.h | 3 +++ include/Importer.h | 2 ++ main.cpp | 22 ++++++++++++++-------- src/ASTTransformation.cpp | 16 ++++++++-------- src/CGenerator.cpp | 25 +++++++++++++++++++++++-- src/Importer.cpp | 16 +++++++++++++--- src/Parser.cpp | 2 +- src/RNGLRParser.cpp | 20 ++++++++++---------- src/StringReader.cpp | 2 +- src/Table.cpp | 4 ++-- src/Type.cpp | 2 +- 11 files changed, 78 insertions(+), 36 deletions(-) diff --git a/include/CGenerator.h b/include/CGenerator.h index 35d26a7..eab5c53 100644 --- a/include/CGenerator.h +++ b/include/CGenerator.h @@ -3,6 +3,7 @@ #include #include +#include #include "NodeTree.h" #include "ASTData.h" @@ -15,8 +16,10 @@ class CGenerator { public: CGenerator(); ~CGenerator(); + void generateCompSet(std::map*> ASTs, std::string outputName); std::string generate(NodeTree* from); static std::string ValueTypeToCType(Type type); + std::string generatorString; private: std::string tabs(); diff --git a/include/Importer.h b/include/Importer.h index 187d054..c0b4f55 100644 --- a/include/Importer.h +++ b/include/Importer.h @@ -19,10 +19,12 @@ class Importer { Importer(Parser* parserIn); ~Importer(); NodeTree* import(std::string fileName); + std::map*> getASTMap(); private: Parser* parser; std::vector removeSymbols; std::vector collapseSymbols; + std::map*> imported; }; #endif \ No newline at end of file diff --git a/main.cpp b/main.cpp index 41de216..9ca06ef 100644 --- a/main.cpp +++ b/main.cpp @@ -31,7 +31,7 @@ int main(int argc, char* argv[]) { std::string outputName = argv[3]; std::ifstream grammerInFile, compiledGrammerInFile; - std::ofstream outFileC, compiledGrammerOutFile; + std::ofstream /*outFileC,*/ compiledGrammerOutFile; grammerInFile.open(grammerFileString); if (!grammerInFile.is_open()) { @@ -44,12 +44,13 @@ int main(int argc, char* argv[]) { std::cout << "Problem opening compiledGrammerInFile " << grammerFileString + ".comp" << "\n"; //return(1); } - +/* outFileC.open((outputName + ".c").c_str()); if (!outFileC.is_open()) { std::cout << "Probelm opening third output file " << outputName + ".c" << "\n"; return(1); } + */ //Read the input file into a string std::string grammerInputFileString; std::string line; @@ -63,7 +64,7 @@ int main(int argc, char* argv[]) { RNGLRParser parser; parser.loadGrammer(grammerInputFileString); //std::cout << "Creating State Set from Main" << std::endl; - std::cout << "\nState Set" << std::endl; + //std::cout << "\nState Set" << std::endl; //Start binary stuff bool compGramGood = false; @@ -78,15 +79,15 @@ int main(int argc, char* argv[]) { if (binaryTablePointer[0] == 'K' && binaryTablePointer[1] == 'R' && binaryTablePointer[2] == 'A' && binaryTablePointer[3] == 'K') { std::cout << "Valid Kraken Compiled Grammer File" << std::endl; int gramStringLength = *((int*)(binaryTablePointer+4)); - std::cout << "The grammer string is stored to be " << gramStringLength << " characters long, gramString is " - << grammerInputFileString.length() << " long. Remember 1 extra for null terminator!" << std::endl; + //std::cout << "The grammer string is stored to be " << gramStringLength << " characters long, gramString is " + //<< grammerInputFileString.length() << " long. Remember 1 extra for null terminator!" << std::endl; if (grammerInputFileString.length() != gramStringLength-1 || (strncmp(grammerInputFileString.c_str(), (binaryTablePointer+4+sizeof(int)), gramStringLength) != 0)) { //(one less for null terminator that is stored) std::cout << "The Grammer has been changed, will re-create" << std::endl; } else { compGramGood = true; - std::cout << "grammer file good" << std::endl; + std::cout << "Grammer file is up to date." << std::endl; //int tableLength = *((int*)(binaryTablePointer + 4 + sizeof(int) + gramStringLength)); parser.importTable(binaryTablePointer + 4 + sizeof(int) + gramStringLength); //Load table starting at the table section } @@ -134,17 +135,22 @@ int main(int argc, char* argv[]) { Importer importer(&parser); - NodeTree* AST = importer.import(programName); + /*NodeTree* AST =*/ + importer.import(programName); + std::map*> ASTs =importer.getASTMap(); //Do optomization, etc. here. //None at this time, instead going straight to C in this first (more naive) version //Code generation //For right now, just C + + CGenerator().generateCompSet(ASTs, outputName); + /* std::string c_code = CGenerator().generate(AST); outFileC << c_code << std::endl; outFileC.close(); - +*/ return(0); } diff --git a/src/ASTTransformation.cpp b/src/ASTTransformation.cpp index 7ca4937..0f0ad42 100644 --- a/src/ASTTransformation.cpp +++ b/src/ASTTransformation.cpp @@ -57,7 +57,7 @@ NodeTree* ASTTransformation::transform(NodeTree* from, NodeTree return newNode; // Don't need children of import } else if (name == "identifier") { std::string lookupName = concatSymbolTree(children[0]); - std::cout << "scope lookup from identifier" << std::endl; + //std::cout << "scope lookup from identifier" << std::endl; newNode = scopeLookup(scope, lookupName); if (newNode == NULL) { std::cout << "scope lookup error! Could not find " << lookupName << std::endl; @@ -87,7 +87,7 @@ NodeTree* ASTTransformation::transform(NodeTree* from, NodeTree //If this is an actual part of an expression, not just a premoted term if (children.size() > 1) { std::string functionCallName = concatSymbolTree(children[1]); - std::cout << "scope lookup from boolen_expression or similar" << std::endl; + //std::cout << "scope lookup from boolen_expression or similar" << std::endl; NodeTree* function = scopeLookup(scope, functionCallName); if (function == NULL) { std::cout << "scope lookup error! Could not find " << functionCallName << std::endl; @@ -97,7 +97,7 @@ NodeTree* ASTTransformation::transform(NodeTree* from, NodeTree newNode->addChild(function); // First child of function call is a link to the function definition skipChildren.insert(1); } else { - std::cout << children.size() << std::endl; + //std::cout << children.size() << std::endl; if (children.size() == 0) return new NodeTree(); return transform(children[0], scope); //Just a promoted term, so do child @@ -107,7 +107,7 @@ NodeTree* ASTTransformation::transform(NodeTree* from, NodeTree //If this is an actual part of an expression, not just a premoted child if (children.size() > 2) { std::string functionCallName = concatSymbolTree(children[1]); - std::cout << "scope lookup from expression or similar" << std::endl; + //std::cout << "scope lookup from expression or similar" << std::endl; NodeTree* function = scopeLookup(scope, functionCallName); if (function == NULL) { std::cout << "scope lookup error! Could not find " << functionCallName << std::endl; @@ -130,7 +130,7 @@ NodeTree* ASTTransformation::transform(NodeTree* from, NodeTree else funcName = concatSymbolTree(children[1]), funcNum = 1; - std::cout << "scope lookup from factor" << std::endl; + //std::cout << "scope lookup from factor" << std::endl; NodeTree* function = scopeLookup(scope, funcName); if (function == NULL) { std::cout << "scope lookup error! Could not find " << funcName << std::endl; @@ -199,7 +199,7 @@ NodeTree* ASTTransformation::transform(NodeTree* from, NodeTree //children[0] is scope std::string functionCallName = concatSymbolTree(children[1]); newNode = new NodeTree(functionCallName, ASTData(function_call, Symbol(functionCallName, true))); - std::cout << "scope lookup from function_call" << std::endl; + //std::cout << "scope lookup from function_call" << std::endl; NodeTree* function = scopeLookup(scope, functionCallName); if (function == NULL) { std::cout << "scope lookup error! Could not find " << functionCallName << std::endl; @@ -260,8 +260,8 @@ NodeTree* ASTTransformation::scopeLookup(NodeTree* scope, std: //Seach the map auto scopeMap = scope->getDataRef()->scope; //std::cout << "scope size: " << scopeMap.size() << ", scope from " << scope->getName() << std::endl; - for (auto i = scopeMap.begin(); i != scopeMap.end(); i++) - std::cout << i->first << " : " << i-> second << " - " << i->second->getName() << std::endl; + // for (auto i = scopeMap.begin(); i != scopeMap.end(); i++) + // std::cout << i->first << " : " << i-> second << " - " << i->second->getName() << std::endl; auto elementIterator = scopeMap.find(lookup); if (elementIterator != scopeMap.end()) { diff --git a/src/CGenerator.cpp b/src/CGenerator.cpp index 1b9a108..6e6912a 100644 --- a/src/CGenerator.cpp +++ b/src/CGenerator.cpp @@ -7,6 +7,27 @@ CGenerator::~CGenerator() { } +void CGenerator::generateCompSet(std::map*> ASTs, std::string outputName) { + //Generate an entire set of files + std::string buildString = "#!/bin/sh\ncc -std=c99 "; + for (auto i = ASTs.begin(); i != ASTs.end(); i++) { + buildString += i->first + ".c "; + std::ofstream outputCFile; + outputCFile.open(i->first + ".c"); + if (outputCFile.is_open()) { + outputCFile << generate(i->second); + } else { + std::cout << "Cannot open file " << i->first << ".c" << std::endl; + } + outputCFile.close(); + } + buildString += "-o " + outputName; + std::ofstream outputBuild; + outputBuild.open(outputName + ".sh"); + outputBuild << buildString; + outputBuild.close(); +} + std::string CGenerator::tabs() { std::string returnTabs; for (int i = 0; i < tabLevel; i++) @@ -39,7 +60,7 @@ std::string CGenerator::generate(NodeTree* from) { output+= "); /*func*/\n"; break; default: - std::cout << "Declaration? named " << declaration->getName() << " of unknown type " << ASTData::ASTTypeToString(declarationData.type) << " in translation unit scope" << std::endl; + //std::cout << "Declaration? named " << declaration->getName() << " of unknown type " << ASTData::ASTTypeToString(declarationData.type) << " in translation unit scope" << std::endl; output += "/*unknown declaration named " + declaration->getName() + "*/\n"; } } @@ -109,7 +130,7 @@ std::string CGenerator::generate(NodeTree* from) { //Handle operators specially for now. Will later replace with //Inlined functions in the standard library std::string name = data.symbol.getName(); - std::cout << name << " == " << children[0]->getData().symbol.getName() << std::endl; + //std::cout << name << " == " << children[0]->getData().symbol.getName() << std::endl; if (name == "++" || name == "--") return generate(children[1]) + name; if (name == "*" && children.size() == 2) //Is dereference, not multiplication diff --git a/src/Importer.cpp b/src/Importer.cpp index f4a1cf9..7cac9ab 100644 --- a/src/Importer.cpp +++ b/src/Importer.cpp @@ -36,6 +36,10 @@ Importer::~Importer() { } NodeTree* Importer::import(std::string fileName) { + //Check to see if we've already done it + if (imported.find(fileName) != imported.end()) + return imported[fileName]; + std::ifstream programInFile; std::ofstream outFile, outFileTransformed, outFileAST; @@ -64,7 +68,7 @@ NodeTree* Importer::import(std::string fileName) { std::cout << "Probelm opening second output file " << outputName + ".AST.dot" << "\n"; return NULL; } - //ljklj + std::string programInputFileString, line; while(programInFile.good()) { getline(programInFile, line); @@ -72,7 +76,7 @@ NodeTree* Importer::import(std::string fileName) { } programInFile.close(); - std::cout << programInputFileString << std::endl; + //std::cout << programInputFileString << std::endl; NodeTree* parseTree = parser->parseInput(programInputFileString); if (parseTree) { @@ -109,5 +113,11 @@ NodeTree* Importer::import(std::string fileName) { } outFileAST.close(); + imported[fileName] = AST; + return AST; -} \ No newline at end of file +} + +std::map*> Importer::getASTMap() { + return imported; +} diff --git a/src/Parser.cpp b/src/Parser.cpp index 5133c32..5971829 100644 --- a/src/Parser.cpp +++ b/src/Parser.cpp @@ -78,7 +78,7 @@ void Parser::loadGrammer(std::string grammerInputString) { //Get next token currToken = reader.word(); } - std::cout << "Parsed!\n"; + //std::cout << "Parsed!\n"; // for (std::vector::size_type i = 0; i < loadedGrammer.size(); i++) // std::cout << loadedGrammer[i]->toString() << std::endl; diff --git a/src/RNGLRParser.cpp b/src/RNGLRParser.cpp index 33989ff..274558f 100644 --- a/src/RNGLRParser.cpp +++ b/src/RNGLRParser.cpp @@ -50,8 +50,8 @@ NodeTree* RNGLRParser::parseInput(std::string inputString) { input.push_back(currentToken); } - std::cout << "\nDone with Lexing, length:" << input.size() << std::endl; - std::cout << input[0].toString() << std::endl; + // std::cout << "\nDone with Lexing, length:" << input.size() << std::endl; + // std::cout << input[0].toString() << std::endl; // for (int i = 0; i < input.size(); i++) @@ -59,13 +59,13 @@ NodeTree* RNGLRParser::parseInput(std::string inputString) { // std::cout << std::endl; - std::cout << "Setting up 0th frontier, first actions, toShift, toReduce" << std::endl; + //std::cout << "Setting up 0th frontier, first actions, toShift, toReduce" << std::endl; //Frontier 0, new node with state 0 NodeTree* v0 = gss.newNode(0); gss.addToFrontier(0,v0); - std::cout << "Done setting up new frontier" << std::endl; + //std::cout << "Done setting up new frontier" << std::endl; std::vector firstActions = *(table.get(0, input[0])); for (std::vector::size_type i = 0; i < firstActions.size(); i++) { @@ -80,7 +80,7 @@ NodeTree* RNGLRParser::parseInput(std::string inputString) { // std::cout << "GSS:\n" << gss.toString() << std::endl; - std::cout << "Starting parse loop" << std::endl; + //std::cout << "Starting parse loop" << std::endl; for (int i = 0; i < input.size(); i++) { // std::cout << "Checking if frontier " << i << " is empty" << std::endl; @@ -110,7 +110,7 @@ NodeTree* RNGLRParser::parseInput(std::string inputString) { shifter(i); //std::cout << "GSS:\n" << gss.toString() << std::endl; } - std::cout << "Done with parsing loop, checking for acceptance" << std::endl; + //std::cout << "Done with parsing loop, checking for acceptance" << std::endl; NodeTree* accState = gss.frontierGetAccState(input.size()-1); if (accState) { std::cout << "Accepted!" << std::endl; @@ -143,7 +143,7 @@ void RNGLRParser::reducer(int i) { //The end of the current path NodeTree* currentReached = currentPath[currentPath.size()-1]; - std::cout << "Getting the shfit state for state " << currentReached->getData() << " and symbol " << reduction.symbol.toString() << std::endl; + //std::cout << "Getting the shift state for state " << currentReached->getData() << " and symbol " << reduction.symbol.toString() << std::endl; int toState = table.getShift(currentReached->getData(), reduction.symbol)->shiftState; //If reduction length is 0, then we make the new label the appropriate nullable parts @@ -189,7 +189,7 @@ void RNGLRParser::reducer(int i) { //std::cout << "Adding shifts and reductions for a state that did not exist" << std::endl; std::vector actions = *(table.get(toState, input[i])); for (std::vector::size_type k = 0; k < actions.size(); k++) { - std::cout << "Action is " << actions[k]->toString() << std::endl; + //std::cout << "Action is " << actions[k]->toString() << std::endl; if (actions[k]->action == ParseAction::SHIFT) { toShift.push(std::make_pair(toStateNode, actions[k]->shiftState)); } else if (actions[k]->action == ParseAction::REDUCE && fullyReducesToNull(actions[k]->reduceRule)) { @@ -213,7 +213,7 @@ void RNGLRParser::shifter(int i) { while (!toShift.empty()) { std::pair*, int> shift = toShift.front(); toShift.pop(); - std::cout << "Current potential shift from " << shift.first->getData() << " to " << shift.second << std::endl; + //std::cout << "Current potential shift from " << shift.first->getData() << " to " << shift.second << std::endl; NodeTree* shiftTo = gss.inFrontier(i+1, shift.second); if (shiftTo) { //std::cout << "State already existed, just adding edge" << std::endl; @@ -232,7 +232,7 @@ void RNGLRParser::shifter(int i) { gss.addEdge(shiftTo, shift.first, newLabel); std::vector actions = *(table.get(shift.second, input[i+1])); for (std::vector::size_type j = 0; j < actions.size(); j++) { - std::cout << "Adding action " << actions[j]->toString() << " to either nextShifts or toReduce" << std::endl; + //std::cout << "Adding action " << actions[j]->toString() << " to either nextShifts or toReduce" << std::endl; //Shift if (actions[j]->action == ParseAction::SHIFT) { nextShifts.push(std::make_pair(shiftTo, actions[j]->shiftState)); diff --git a/src/StringReader.cpp b/src/StringReader.cpp index 29772b4..f4b6e50 100644 --- a/src/StringReader.cpp +++ b/src/StringReader.cpp @@ -75,7 +75,7 @@ std::string StringReader::getTokens(const char *stop_chars, bool truncateEnd) { //End of String end_reached = true; - std::cout << "Reached end of file!\n"; + //std::cout << "Reached end of file!\n"; return ""; } else { diff --git a/src/Table.cpp b/src/Table.cpp index 0c8b8fe..ddbc421 100644 --- a/src/Table.cpp +++ b/src/Table.cpp @@ -303,7 +303,7 @@ std::vector* Table::get(int state, Symbol token) { return NULL; } - std::cout << "Get for state: " << state << ", and Symbol: " << token.toString() << std::endl; + //std::cout << "Get for state: " << state << ", and Symbol: " << token.toString() << std::endl; if (state < 0 || state >= table.size()) { std::cout << "State bad: " << state << std::endl; return NULL; @@ -312,7 +312,7 @@ std::vector* Table::get(int state, Symbol token) { std::vector* action = NULL; if (symbolIndex < 0 || symbolIndex >= table[state]->size()) { - std::cout << "Symbol bad for this state: " << token.toString() << ". This is a reject." << std::endl; + //std::cout << "Symbol bad for this state: " << token.toString() << ". This is a reject." << std::endl; } else { action = (*(table[state]))[symbolIndex]; } diff --git a/src/Type.cpp b/src/Type.cpp index f8df255..3da4601 100644 --- a/src/Type.cpp +++ b/src/Type.cpp @@ -33,7 +33,7 @@ Type::Type(std::string typeIn) { baseType = character; else baseType = none; - std::cout << ":ALKJF:LSKDJF:SDJF:LKSJDF\t\t\t" << typeIn << "\t" << edited << std::endl; + //std::cout << ":ALKJF:LSKDJF:SDJF:LKSJDF\t\t\t" << typeIn << "\t" << edited << std::endl; } From 82df9b15927d76a66a59084c1845fc5ff5157367 Mon Sep 17 00:00:00 2001 From: Nathan Braswell Date: Wed, 1 Jan 2014 17:40:07 -0600 Subject: [PATCH 12/25] Update README.md Update README.md to reflect the current state of the project. --- README.md | 24 +++++++++++++++--------- 1 file changed, 15 insertions(+), 9 deletions(-) diff --git a/README.md b/README.md index 699f03d..07ffdc7 100644 --- a/README.md +++ b/README.md @@ -4,20 +4,26 @@ Kraken The Kraken Programming Language The Kraken Programming Language is in its infancy. -Currently, it consists of a RNGLALR parser written in C++ and a very experimental grammer that is evolving quickly. -When compiled, the kraken program (as it is not yet a compiler) will take in a text file to be parsed, the grammer file to use, and a filename to output a DOT file to. -Kraken will then generate the RN parsing tables from the grammer and then parse the input and export a DOT file that can be renderd into a graph using Graphviz. +Currently, it consists of a RNGLALR parser written in C++, an experimental grammer that is evolving, and a C code generator. +When compiled, the kraken compiler will take in a text file to be parsed, the grammer file to use, and an output file name. +Kraken will then generate the RN parsing tables from the grammer OR load them from a binary file if Kraken has been run with this exact version of the grammer before. Then it will parse the input and export DOT files for every .krak file in the project (these can be renderd into a graph using Graphviz), a C file for every file in the project, and a .sh script containing the compiler command to compile the C files together into a binary. It is invoked in this way: -kraken inputTextFile inputGrammerFile outputFile.dot +kraken inputTextFile inputGrammerFile outputName + +Dependencies +============ It is built using CMake, which is also its only dependency. +Goals +===== + It has the following design goals: - -Compiled - -Clean - -Fast (both running and writing) - -Good for Systems (including Operating Systems) programming - -Minimal "magic" code. (no runtime, other libraries automatically included) +* Compiled +* Clean +* Fast (both running and writing) +* Good for Systems (including Operating Systems) programming +* Minimal "magic" code. (no runtime, other libraries automatically included) It is inspired by C/C++, Python, and Go. From 0297f29dcd792bad1393c58f2712c469ec9c08b6 Mon Sep 17 00:00:00 2001 From: Nathan Braswell Date: Tue, 7 Jan 2014 13:14:58 -0500 Subject: [PATCH 13/25] Save state before re-write of RegEx. --- include/ASTData.h | 13 +++++----- include/ASTTransformation.h | 2 ++ include/CGenerator.h | 2 +- include/Type.h | 8 +++++- krakenGrammer.kgm | 25 ++++++++++++------- src/ASTData.cpp | 9 ++++--- src/ASTTransformation.cpp | 50 +++++++++++++++++++++++++++++-------- src/CGenerator.cpp | 33 +++++++++++++++++++----- src/Importer.cpp | 1 + src/Lexer.cpp | 1 + src/RegEx.cpp | 9 +++++++ src/Table.cpp | 2 +- src/Type.cpp | 41 +++++++++++++++--------------- 13 files changed, 139 insertions(+), 57 deletions(-) diff --git a/include/ASTData.h b/include/ASTData.h index 9c39d04..cd11e88 100644 --- a/include/ASTData.h +++ b/include/ASTData.h @@ -5,28 +5,29 @@ #include #include "Symbol.h" +//Circular dependency +class Type; #include "Type.h" #ifndef NULL #define NULL 0 #endif -enum ASTType {undef, translation_unit, interpreter_directive, import, identifier, - function, code_block, - typed_parameter, expression, boolean_expression, statement, +enum ASTType {undef, translation_unit, interpreter_directive, import, identifier, type_def, + function, code_block, typed_parameter, expression, boolean_expression, statement, if_statement, while_loop, for_loop, return_statement, assignment_statement, declaration_statement, if_comp, simple_passthrough, function_call, value}; class ASTData { public: ASTData(); - ASTData(ASTType type, Type valueType = Type()); - ASTData(ASTType type, Symbol symbol, Type valueType = Type()); + ASTData(ASTType type, Type *valueType = NULL); + ASTData(ASTType type, Symbol symbol, Type *valueType = NULL); ~ASTData(); std::string toString(); static std::string ASTTypeToString(ASTType type); ASTType type; - Type valueType; + Type* valueType; Symbol symbol; std::map*> scope; private: diff --git a/include/ASTTransformation.h b/include/ASTTransformation.h index f93ce65..31aaa3e 100644 --- a/include/ASTTransformation.h +++ b/include/ASTTransformation.h @@ -4,6 +4,7 @@ #include #include +#include "Type.h" #include "ASTData.h" #include "NodeTransformation.h" #include "Importer.h" @@ -18,6 +19,7 @@ class ASTTransformation: public NodeTransformation { NodeTree* transform(NodeTree* from, NodeTree* scope); std::string concatSymbolTree(NodeTree* root); NodeTree* scopeLookup(NodeTree* scope, std::string lookup); + Type* typeFromString(std::string type, NodeTree* scope); private: Importer * importer; }; diff --git a/include/CGenerator.h b/include/CGenerator.h index eab5c53..00d4c05 100644 --- a/include/CGenerator.h +++ b/include/CGenerator.h @@ -18,7 +18,7 @@ class CGenerator { ~CGenerator(); void generateCompSet(std::map*> ASTs, std::string outputName); std::string generate(NodeTree* from); - static std::string ValueTypeToCType(Type type); + static std::string ValueTypeToCType(Type *type); std::string generatorString; private: diff --git a/include/Type.h b/include/Type.h index 4727f49..a360d7f 100644 --- a/include/Type.h +++ b/include/Type.h @@ -8,6 +8,9 @@ #include #include +//Circular dependency +class ASTData; +#include "ASTData.h" #include "util.h" enum ValueType {none, void_type, boolean, integer, floating, double_percision, character }; @@ -18,10 +21,13 @@ class Type { Type(); Type(ValueType typeIn, int indirectionIn); Type(ValueType typeIn); - Type(std::string typeIn); + Type(NodeTree* typeDefinitionIn); + Type(NodeTree* typeDefinitionIn, int indirectionIn); + Type(ValueType typeIn, NodeTree* typeDefinitionIn, int indirectionIn); ~Type(); std::string toString(); ValueType baseType; + NodeTree* typeDefinition; int indirection; private: }; diff --git a/krakenGrammer.kgm b/krakenGrammer.kgm index 2234034..43dff1b 100644 --- a/krakenGrammer.kgm +++ b/krakenGrammer.kgm @@ -1,10 +1,9 @@ Goal = translation_unit ; translation_unit = interpreter_directive WS unorderd_list_part WS ; -unorderd_list_part = import_list WS unorderd_list_part | function WS unorderd_list_part | if_comp WS unorderd_list_part | simple_passthrough WS unorderd_list_part | declaration_statement WS ";" WS unorderd_list_part | import_list | function | if_comp | simple_passthrough | declaration_statement WS ";" ; +unorderd_list_part = import WS unorderd_list_part | function WS unorderd_list_part | type_def WS ";" WS unorderd_list_part | if_comp WS unorderd_list_part | simple_passthrough WS unorderd_list_part | declaration_statement WS ";" WS unorderd_list_part | import | function | type_def WS ";" | if_comp | simple_passthrough | declaration_statement WS ";" ; type = type WS "\*" | "void" | "int" | "float" | "double" | "char" | identifier ; -import_list = import_list WS import | import ; import = "import" WS identifier WS ";" ; interpreter_directive = "#!" WS path | ; @@ -19,8 +18,8 @@ WS = "( | | if_comp = "__if_comp__" WS identifier WS if_comp_pred ; if_comp_pred = code_block | simple_passthrough ; simple_passthrough = "__simple_passthrough__" WS triple_quoted_string ; -triple_quoted_string = "\"\"\"(a|b|c|d|e|f|g|h|i|j|k|l|m|n|o|p|q|r|s|t|u|v|w|x|y|z|A|B|C|D|E|F|G|H|I|J|K|L|M|N|O|P|Q|R|S|T|U|V|W|X|Y|Z|.|,|!|\?|_|-|:|;|%|=|\+| | | -|\\|/|\||\(|\)|\"|#|<|\*|>|0|1|2|3|4|5|6|7|8|9)+\"\"\"" ; + +triple_quoted_string = "((b)|a)*" ; identifier = alpha | alpha alphanumeric ; @@ -34,6 +33,11 @@ opt_parameter_list = parameter_list | ; parameter_list = parameter_list WS "," WS parameter | parameter ; parameter = boolean_expression ; +type_def = "typedef" WS identifier WS type | "typedef" WS identifier WS "{" WS class_innerds WS "}" | "typedef" WS identifier WS "{" WS declaration_block WS "}" ; +class_innerds = visibility_block WS class_innerds | visibility_block ; +visibility_block = "public:" WS declaration_block | "protected:" WS declaration_block | "private:" WS declaration_block ; +declaration_block = declaration_statement WS ";" WS declaration_block | function WS declaration_block | declaration_statement WS ";" | function ; + if_statement = "if" WS "\(" WS boolean_expression WS "\)" WS statement ; while_loop = "while" WS boolean_expression WS statement ; @@ -62,7 +66,7 @@ unarad = number | identifier | function_call | bool | string | character | "\(" number = integer | float | double ; assignment_statement = identifier WS "=" WS boolean_expression | identifier WS "\+=" WS boolean_expression | identifier WS "-=" WS boolean_expression | identifier WS "\*=" WS boolean_expression | identifier WS "/=" WS boolean_expression ; -declaration_statement = type WS identifier WS "=" WS boolean_expression ; +declaration_statement = type WS identifier WS "=" WS boolean_expression | type WS identifier ; alphanumeric = alphanumeric numeric | alphanumeric alpha | numeric | alpha ; hexadecimal = "0x(1|2|3|4|5|6|7|8|9|a|b|c|d|e|f)+" ; @@ -71,10 +75,13 @@ integer = sign numeric | sign hexadecimal | "null" ; float = sign numeric "." numeric "f" ; double = sign numeric "." numeric | sign numeric "." numeric "d" ; bool = "true" | "false" | "True" | "False" ; -character = "'(a|b|c|d|e|f|g|h|i|j|k|l|m|n|o|p|q|r|s|t|u|v|w|x|y|z|A|B|C|D|E|F|G|H|I|J|K|L|M|N|O|P|Q|R|S|T|U|V|W|X|Y|Z|.|,|!|\?|_|-|:|%| | |\\|/|\||\(|\)|0|1|2|3|4|5|6|7|8|9)'" ; +character = "'(`|1|2|3|4|5|6|7|8|9|0|-|=| |q|w|e|r|t|y|u|i|o|p|[|]|\\|a|s|d|f|g|h|j|k|l|;|'| +|z|x|c|v|b|n|m|,|.|/|~|!|@|#|$|%|^|&|\*|\(|\)|_|\+|Q|W|E|R|T|Y|U|I|O|P|{|}|\||A|S|D|F|G|H|J|K|L|:|\"|Z|X|C|V|B|N|M|<|>|\?| )'" ; alpha = "(a|b|c|d|e|f|g|h|i|j|k|l|m|n|o|p|q|r|s|t|u|v|w|x|y|z|A|B|C|D|E|F|G|H|I|J|K|L|M|N|O|P|Q|R|S|T|U|V|W|X|Y|Z|_)+" ; numeric = "(0|1|2|3|4|5|6|7|8|9)+" ; -string = triple_quoted_string | "\"(a|b|c|d|e|f|g|h|i|j|k|l|m|n|o|p|q|r|s|t|u|v|w|x|y|z|A|B|C|D|E|F|G|H|I|J|K|L|M|N|O|P|Q|R|S|T|U|V|W|X|Y|Z|.|,|!|\?|_|-|:|%| | |\\|/|\||\(|\)|0|1|2|3|4|5|6|7|8|9)+\"" ; +string = triple_quoted_string | "\"(`|1|2|3|4|5|6|7|8|9|0|-|=| |q|w|e|r|t|y|u|i|o|p|[|]|\\|a|s|d|f|g|h|j|k|l|;|'| +|z|x|c|v|b|n|m|,|.|/|~|!|@|#|$|%|^|&|\*|\(|\)|_|\+|Q|W|E|R|T|Y|U|I|O|P|{|}|\||A|S|D|F|G|H|J|K|L|:|Z|X|C|V|B|N|M|<|>|\?| )*\"" ; -comment = "//(a|b|c|d|e|f|g|h|i|j|k|l|m|n|o|p|q|r|s|t|u|v|w|x|y|z|A|B|C|D|E|F|G|H|I|J|K|L|M|N|O|P|Q|R|S|T|U|V|W|X|Y|Z|.|,|!|\?|_|-|:|;|%|=|\+| | |\\|/|\||\(|\)|\*|\"|0|1|2|3|4|5|6|7|8|9)+ -" | "/\*(a|b|c|d|e|f|g|h|i|j|k|l|m|n|o|p|q|r|s|t|u|v|w|x|y|z|A|B|C|D|E|F|G|H|I|J|K|L|M|N|O|P|Q|R|S|T|U|V|W|X|Y|Z|.|,|!|\?|_|-|:|%|=|\+| | |\\|/|\||\(|\)|\"|0|1|2|3|4|5|6|7|8|9)+\*/" ; \ No newline at end of file +comment = "//(`|1|2|3|4|5|6|7|8|9|0|-|=| |q|w|e|r|t|y|u|i|o|p|[|]|\\|a|s|d|f|g|h|j|k|l|;|'|z|x|c|v|b|n|m|,|.|/|~|!|@|#|$|%|^|&|\*|\(|\)|_|\+|Q|W|E|R|T|Y|U|I|O|P|{|}|\||A|S|D|F|G|H|J|K|L|:|\"|Z|X|C|V|B|N|M|<|>|\?| )* +" | "/\*(`|1|2|3|4|5|6|7|8|9|0|-|=| |q|w|e|r|t|y|u|i|o|p|[|]|\\|a|s|d|f|g|h|j|k|l|;|'| +|z|x|c|v|b|n|m|,|.|/|~|!|@|#|$|%|^|&|\*|\(|\)|_|\+|Q|W|E|R|T|Y|U|I|O|P|{|}|\||A|S|D|F|G|H|J|K|L|:|\"|Z|X|C|V|B|N|M|<|>|\?| )*\*/" ; diff --git a/src/ASTData.cpp b/src/ASTData.cpp index 0d1335b..f8fd0c2 100644 --- a/src/ASTData.cpp +++ b/src/ASTData.cpp @@ -2,14 +2,15 @@ ASTData::ASTData() { this->type = undef; + this->valueType = NULL; } -ASTData::ASTData(ASTType type, Type valueType) { +ASTData::ASTData(ASTType type, Type *valueType) { this->type = type; this->valueType = valueType; } -ASTData::ASTData(ASTType type, Symbol symbol, Type valueType) { +ASTData::ASTData(ASTType type, Symbol symbol, Type *valueType) { this->type = type; this->valueType = valueType; this->symbol = symbol; @@ -20,7 +21,7 @@ ASTData::~ASTData() { } std::string ASTData::toString() { - return ASTTypeToString(type) + (symbol.isTerminal() ? " " + symbol.toString() : "") + " " + valueType.toString(); + return ASTTypeToString(type) + (symbol.isTerminal() ? " " + symbol.toString() : "") + " " + (valueType ? valueType->toString() : "no_type"); } std::string ASTData::ASTTypeToString(ASTType type) { @@ -35,6 +36,8 @@ std::string ASTData::ASTTypeToString(ASTType type) { return "import"; case function: return "function"; + case type_def: + return "type_def"; case code_block: return "code_block"; case typed_parameter: diff --git a/src/ASTTransformation.cpp b/src/ASTTransformation.cpp index 0f0ad42..8f7a917 100644 --- a/src/ASTTransformation.cpp +++ b/src/ASTTransformation.cpp @@ -64,9 +64,14 @@ NodeTree* ASTTransformation::transform(NodeTree* from, NodeTree throw "LOOKUP ERROR: " + lookupName; } //newNode = new NodeTree(name, ASTData(identifier, Symbol(concatSymbolTree(children[0]), true))); + } else if (name == "type_def") { + std::string typeAlias = concatSymbolTree(children[0]); + newNode = new NodeTree(name, ASTData(type_def, Symbol(typeAlias, true, typeAlias), typeFromString(concatSymbolTree(children[1]), scope))); + scope->getDataRef()->scope[typeAlias] = newNode; + return newNode; } else if (name == "function") { std::string functionName = concatSymbolTree(children[1]); - newNode = new NodeTree(name, ASTData(function, Symbol(functionName, true), Type(concatSymbolTree(children[0])))); + newNode = new NodeTree(name, ASTData(function, Symbol(functionName, true), typeFromString(concatSymbolTree(children[0]), scope))); skipChildren.insert(0); skipChildren.insert(1); scope->getDataRef()->scope[functionName] = newNode; @@ -80,7 +85,7 @@ NodeTree* ASTTransformation::transform(NodeTree* from, NodeTree //newNode = transform(children[1]); //Transform to get the identifier std::string parameterName = concatSymbolTree(children[1]); std::string typeString = concatSymbolTree(children[0]);//Get the type (left child) and set our new identifer to be that type - newNode = new NodeTree("identifier", ASTData(identifier, Symbol(parameterName, true), Type(typeString))); + newNode = new NodeTree("identifier", ASTData(identifier, Symbol(parameterName, true), typeFromString(typeString, scope))); scope->getDataRef()->scope[parameterName] = newNode; return newNode; } else if (name == "boolean_expression" || name == "and_boolean_expression" || name == "bool_exp") { @@ -180,10 +185,9 @@ NodeTree* ASTTransformation::transform(NodeTree* from, NodeTree // NodeTree* newIdentifier = transform(children[1], scope); //Transform the identifier // newIdentifier->getDataRef()->valueType = Type(concatSymbolTree(children[0]));//set the type of the identifier - std::string newIdentifierStr = concatSymbolTree(children[1]); std::string typeString = concatSymbolTree(children[0]);//Get the type (left child) and set our new identifer to be that type - NodeTree* newIdentifier = new NodeTree("identifier", ASTData(identifier, Symbol(newIdentifierStr, true), Type(typeString))); + NodeTree* newIdentifier = new NodeTree("identifier", ASTData(identifier, Symbol(newIdentifierStr, true), typeFromString(typeString, scope))); scope->getDataRef()->scope[newIdentifierStr] = newIdentifier; newNode->addChild(newIdentifier); @@ -213,19 +217,19 @@ NodeTree* ASTTransformation::transform(NodeTree* from, NodeTree return transform(children[0], scope); //Don't need a parameter node, just the value } else if (name == "type") { std::string theConcat = concatSymbolTree(from); //We have no symbol, so this will concat our children - newNode = new NodeTree(name, ASTData(value, Symbol(theConcat, true), Type(theConcat))); + newNode = new NodeTree(name, ASTData(value, Symbol(theConcat, true), typeFromString(theConcat, scope))); } else if (name == "number") { return transform(children[0], scope); } else if (name == "integer") { - newNode = new NodeTree(name, ASTData(value, Symbol(concatSymbolTree(from), true), Type(integer))); + newNode = new NodeTree(name, ASTData(value, Symbol(concatSymbolTree(from), true), new Type(integer))); } else if (name == "float") { - newNode = new NodeTree(name, ASTData(value, Symbol(concatSymbolTree(from), true), Type(floating))); + newNode = new NodeTree(name, ASTData(value, Symbol(concatSymbolTree(from), true), new Type(floating))); } else if (name == "double") { - newNode = new NodeTree(name, ASTData(value, Symbol(concatSymbolTree(from), true), Type(double_percision))); + newNode = new NodeTree(name, ASTData(value, Symbol(concatSymbolTree(from), true), new Type(double_percision))); } else if (name == "char") { - newNode = new NodeTree(name, ASTData(value, Symbol(concatSymbolTree(children[0]), true), Type(character, 1))); //Indirection of 1 for array + newNode = new NodeTree(name, ASTData(value, Symbol(concatSymbolTree(children[0]), true), new Type(character, 1))); //Indirection of 1 for array } else if (name == "string" || name == "triple_quoted_string") { - newNode = new NodeTree(name, ASTData(value, Symbol(concatSymbolTree(children[0]), true), Type(character, 1))); //Indirection of 1 for array + newNode = new NodeTree(name, ASTData(value, Symbol(concatSymbolTree(children[0]), true), new Type(character, 1))); //Indirection of 1 for array } else { return new NodeTree(); } @@ -278,3 +282,29 @@ NodeTree* ASTTransformation::scopeLookup(NodeTree* scope, std: //std::cout << "upper scope does not exist" << std::endl; return NULL; } + +Type* ASTTransformation::typeFromString(std::string typeIn, NodeTree* scope) { + int indirection = 0; + ValueType baseType; + NodeTree* typeDefinition = NULL; + while (typeIn[typeIn.size() - indirection - 1] == '*') indirection++; + std::string edited = strSlice(typeIn, 0, -(indirection + 1)); + if (edited == "void") + baseType = void_type; + else if (edited == "bool") + baseType = boolean; + else if (edited == "int") + baseType = integer; + else if (edited == "float") + baseType = floating +; else if (edited == "double") + baseType = double_percision; + else if (edited == "char") + baseType = character; + else { + baseType = none; + typeDefinition = scopeLookup(scope, edited); + std::cout << "scopeLookup of type " << edited << " returned " << typeDefinition << std::endl; + } + return new Type(baseType, typeDefinition, indirection); +} diff --git a/src/CGenerator.cpp b/src/CGenerator.cpp index 6e6912a..3257091 100644 --- a/src/CGenerator.cpp +++ b/src/CGenerator.cpp @@ -41,6 +41,10 @@ std::string CGenerator::generate(NodeTree* from) { std::string output = ""; switch (data.type) { case translation_unit: + //Do here because we may need the typedefs before the declarations of variables + for (int i = 0; i < children.size(); i++) + if (children[i]->getDataRef()->type == type_def) + output += generate(children[i]) + "\n"; //Declare everything in translation unit scope here. (allows stuff from other files, automatic forward declarations) for (auto i = data.scope.begin(); i != data.scope.end(); i++) { NodeTree* declaration = i->second; @@ -57,13 +61,22 @@ std::string CGenerator::generate(NodeTree* from) { output += ", "; output += ValueTypeToCType(decChildren[j]->getData().valueType) + " " + generate(decChildren[j]); } - output+= "); /*func*/\n"; + output += "); /*func*/\n"; + break; + case type_def: + //type + output += "/*typedef " + declarationData.symbol.getName() + " */\n"; break; default: //std::cout << "Declaration? named " << declaration->getName() << " of unknown type " << ASTData::ASTTypeToString(declarationData.type) << " in translation unit scope" << std::endl; output += "/*unknown declaration named " + declaration->getName() + "*/\n"; } } + //Do here because we need the newlines + for (int i = 0; i < children.size(); i++) + if (children[i]->getDataRef()->type != type_def) + output += generate(children[i]) + "\n"; + return output; break; case interpreter_directive: //Do nothing @@ -73,6 +86,8 @@ std::string CGenerator::generate(NodeTree* from) { //return "#include <" + data.symbol.getName() + ">\n"; case identifier: return data.symbol.getName(); + case type_def: + return "typedef " + ValueTypeToCType(data.valueType) + " " + data.symbol.getName() + ";"; case function: output += "\n" + ValueTypeToCType(data.valueType) + " " + data.symbol.getName() + "("; for (int i = 0; i < children.size()-1; i++) { @@ -116,7 +131,10 @@ std::string CGenerator::generate(NodeTree* from) { case assignment_statement: return generate(children[0]) + " = " + generate(children[1]); case declaration_statement: - return ValueTypeToCType(children[0]->getData().valueType) + " " + generate(children[0]) + " = " + generate(children[1]) + ";"; + if (children.size() == 1) + return ValueTypeToCType(children[0]->getData().valueType) + " " + generate(children[0]) + ";"; + else + return ValueTypeToCType(children[0]->getData().valueType) + " " + generate(children[0]) + " = " + generate(children[1]) + ";"; case if_comp: if (generate(children[0]) == generatorString) return generate(children[1]); @@ -161,11 +179,14 @@ std::string CGenerator::generate(NodeTree* from) { return output; } -std::string CGenerator::ValueTypeToCType(Type type) { +std::string CGenerator::ValueTypeToCType(Type *type) { std::string return_type; - switch (type.baseType) { + switch (type->baseType) { case none: - return_type = "none"; + if (type->typeDefinition) + return_type = type->typeDefinition->getDataRef()->symbol.getName(); + else + return_type = "none"; break; case void_type: return_type = "void"; @@ -189,7 +210,7 @@ std::string CGenerator::ValueTypeToCType(Type type) { return_type = "unknown_ValueType"; break; } - for (int i = 0; i < type.indirection; i++) + for (int i = 0; i < type->indirection; i++) return_type += "*"; return return_type; } diff --git a/src/Importer.cpp b/src/Importer.cpp index 7cac9ab..76d8231 100644 --- a/src/Importer.cpp +++ b/src/Importer.cpp @@ -19,6 +19,7 @@ Importer::Importer(Parser* parserIn) { removeSymbols.push_back(Symbol("while", true)); removeSymbols.push_back(Symbol("__if_comp__", true)); removeSymbols.push_back(Symbol("comp_simple_passthrough", true)); + removeSymbols.push_back(Symbol("typedef", true)); collapseSymbols.push_back(Symbol("opt_typed_parameter_list", false)); collapseSymbols.push_back(Symbol("opt_parameter_list", false)); diff --git a/src/Lexer.cpp b/src/Lexer.cpp index b6f7033..9aa6059 100644 --- a/src/Lexer.cpp +++ b/src/Lexer.cpp @@ -20,6 +20,7 @@ void Lexer::setInput(std::string inputString) { } void Lexer::addRegEx(std::string regExString) { + std::cout << regExString << " at lexer" << std::endl; regExs.push_back(new RegEx(regExString)); } diff --git a/src/RegEx.cpp b/src/RegEx.cpp index b54f6ab..3b36158 100644 --- a/src/RegEx.cpp +++ b/src/RegEx.cpp @@ -2,8 +2,10 @@ #include RegEx::RegEx(std::string inPattern) { + std::cout << inPattern << " at rexex" << std::endl; pattern = inPattern; construct(); + std::cout << inPattern << " at rexex post" << std::endl; deperenthesize(); } @@ -310,5 +312,12 @@ void RegEx::test() { assert(re.longMatch("ab") == 1); } + { + RegEx re("((ab)|c)*"); + assert(re.longMatch("ababc") == 5); + assert(re.longMatch("ad") == 1); + assert(re.longMatch("ababccd") == 6); + } + std::cout << "RegEx tests pass\n"; } diff --git a/src/Table.cpp b/src/Table.cpp index ddbc421..5f37e1f 100644 --- a/src/Table.cpp +++ b/src/Table.cpp @@ -85,7 +85,7 @@ void Table::exportTable(std::ofstream &file) { //std::vector rightSide; std::vector rightSide = rule->getRightSide(); size = rightSide.size(); - std::cout << leftHandle.toString() << std::endl; + //std::cout << leftHandle.toString() << std::endl; file.write((char*)&size, sizeof(int)); for (int l = 0; l < rightSide.size(); l++) { //Save the name diff --git a/src/Type.cpp b/src/Type.cpp index 3da4601..83b638f 100644 --- a/src/Type.cpp +++ b/src/Type.cpp @@ -15,27 +15,22 @@ Type::Type(ValueType typeIn, int indirectionIn) { baseType = typeIn; } -Type::Type(std::string typeIn) { +Type::Type(NodeTree* typeDefinitionIn) { indirection = 0; - while (typeIn[typeIn.size() - indirection - 1] == '*') indirection++; - std::string edited = strSlice(typeIn, 0, -(indirection + 1)); - if (edited == "void") - baseType = void_type; - else if (edited == "bool") - baseType = boolean; - else if (edited == "int") - baseType = integer; - else if (edited == "float") - baseType = floating; - else if (edited == "double") - baseType = double_percision; - else if (edited == "char") - baseType = character; - else - baseType = none; - //std::cout << ":ALKJF:LSKDJF:SDJF:LKSJDF\t\t\t" << typeIn << "\t" << edited << std::endl; + baseType = none; + typeDefinition = typeDefinitionIn; +} +Type::Type(NodeTree* typeDefinitionIn, int indirectionIn) { + indirection = indirectionIn; + baseType = none; + typeDefinition = typeDefinitionIn; } +Type::Type(ValueType typeIn, NodeTree* typeDefinitionIn, int indirectionIn) { + baseType = typeIn; + indirection = indirectionIn; + typeDefinition = typeDefinitionIn; +} Type::~Type() { } @@ -44,7 +39,10 @@ std::string Type::toString() { std::string typeString; switch (baseType) { case none: - typeString = "none"; + if (typeDefinition) + typeString = typeDefinition->getDataRef()->symbol.getName(); + else + typeString = "none"; break; case void_type: typeString = "void"; @@ -65,7 +63,10 @@ std::string Type::toString() { typeString = "char"; break; default: - typeString = "unknown_type"; + if (typeDefinition) + typeString = typeDefinition->getDataRef()->symbol.getName(); + else + typeString = "unknown_type"; } for (int i = 0; i < indirection; i++) typeString += "*"; From 0d47a039862873387be143fe485512e869245fce Mon Sep 17 00:00:00 2001 From: Nathan Braswell Date: Tue, 7 Jan 2014 21:31:56 -0500 Subject: [PATCH 14/25] Fixed regex! Much simpler and sensible implementation too. --- include/RegEx.h | 3 +- include/util.h | 1 + krakenGrammer.kgm | 8 +- src/Lexer.cpp | 1 - src/RegEx.cpp | 234 ++++++++++++++-------------------------------- src/util.cpp | 12 +++ 6 files changed, 92 insertions(+), 167 deletions(-) diff --git a/include/RegEx.h b/include/RegEx.h index 7a9e7de..2e18c35 100644 --- a/include/RegEx.h +++ b/include/RegEx.h @@ -16,8 +16,7 @@ class RegEx { RegEx(std::string inPattern); ~RegEx(); - void construct(); - void deperenthesize(); + RegExState* construct(std::vector* ending, std::string pattern); int longMatch(std::string stringToMatch); std::string getPattern(); std::string toString(); diff --git a/include/util.h b/include/util.h index 42b3d8d..4cf10a6 100644 --- a/include/util.h +++ b/include/util.h @@ -12,5 +12,6 @@ std::string intToString(int theInt); std::string replaceExEscape(std::string first, std::string search, std::string replace); std::string strSlice(std::string str, int begin, int end); +int findPerenEnd(std::string str, int i); #endif diff --git a/krakenGrammer.kgm b/krakenGrammer.kgm index 43dff1b..d437ec0 100644 --- a/krakenGrammer.kgm +++ b/krakenGrammer.kgm @@ -19,7 +19,13 @@ if_comp = "__if_comp__" WS identifier WS if_comp_pred ; if_comp_pred = code_block | simple_passthrough ; simple_passthrough = "__simple_passthrough__" WS triple_quoted_string ; -triple_quoted_string = "((b)|a)*" ; +triple_quoted_string = "\"\"\"((\"\"(`|1|2|3|4|5|6|7|8|9|0|-|=| |q|w|e|r|t|y|u|i|o|p|[|]|\\|a|s|d|f|g|h|j|k|l|;|'| +|z|x|c|v|b|n|m|,|.|/|~|!|@|#|$|%|^|&|\*|\(|\)|_|\+|Q|W|E|R|T|Y|U|I|O|P|{|}|\||A|S|D|F|G|H|J|K|L|:|\"|Z|X|C|V|B|N|M|<|>|\?| )+)|(\"(`|1|2|3|4|5|6|7|8|9|0|-|=| |q|w|e|r|t|y|u|i|o|p|[|]|\\|a|s|d|f|g|h|j|k|l|;|'| +|z|x|c|v|b|n|m|,|.|/|~|!|@|#|$|%|^|&|\*|\(|\)|_|\+|Q|W|E|R|T|Y|U|I|O|P|{|}|\||A|S|D|F|G|H|J|K|L|:|\"|Z|X|C|V|B|N|M|<|>|\?| )+))*(`|1|2|3|4|5|6|7|8|9|0|-|=| |q|w|e|r|t|y|u|i|o|p|[|]|\\|a|s|d|f|g|h|j|k|l|;|'| +|z|x|c|v|b|n|m|,|.|/|~|!|@|#|$|%|^|&|\*|\(|\)|_|\+|Q|W|E|R|T|Y|U|I|O|P|{|}|\||A|S|D|F|G|H|J|K|L|:|\"|Z|X|C|V|B|N|M|<|>|\?| )*(((`|1|2|3|4|5|6|7|8|9|0|-|=| |q|w|e|r|t|y|u|i|o|p|[|]|\\|a|s|d|f|g|h|j|k|l|;|'| +|z|x|c|v|b|n|m|,|.|/|~|!|@|#|$|%|^|&|\*|\(|\)|_|\+|Q|W|E|R|T|Y|U|I|O|P|{|}|\||A|S|D|F|G|H|J|K|L|:|\"|Z|X|C|V|B|N|M|<|>|\?| )+\")|((`|1|2|3|4|5|6|7|8|9|0|-|=| |q|w|e|r|t|y|u|i|o|p|[|]|\\|a|s|d|f|g|h|j|k|l|;|'| +|z|x|c|v|b|n|m|,|.|/|~|!|@|#|$|%|^|&|\*|\(|\)|_|\+|Q|W|E|R|T|Y|U|I|O|P|{|}|\||A|S|D|F|G|H|J|K|L|:|\"|Z|X|C|V|B|N|M|<|>|\?| )+\"\")|((`|1|2|3|4|5|6|7|8|9|0|-|=| |q|w|e|r|t|y|u|i|o|p|[|]|\\|a|s|d|f|g|h|j|k|l|;|'| +|z|x|c|v|b|n|m|,|.|/|~|!|@|#|$|%|^|&|\*|\(|\)|_|\+|Q|W|E|R|T|Y|U|I|O|P|{|}|\||A|S|D|F|G|H|J|K|L|:|\"|Z|X|C|V|B|N|M|<|>|\?| )+))*\"\"\"" ; identifier = alpha | alpha alphanumeric ; diff --git a/src/Lexer.cpp b/src/Lexer.cpp index 9aa6059..b6f7033 100644 --- a/src/Lexer.cpp +++ b/src/Lexer.cpp @@ -20,7 +20,6 @@ void Lexer::setInput(std::string inputString) { } void Lexer::addRegEx(std::string regExString) { - std::cout << regExString << " at lexer" << std::endl; regExs.push_back(new RegEx(regExString)); } diff --git a/src/RegEx.cpp b/src/RegEx.cpp index 3b36158..5a408a6 100644 --- a/src/RegEx.cpp +++ b/src/RegEx.cpp @@ -2,49 +2,56 @@ #include RegEx::RegEx(std::string inPattern) { - std::cout << inPattern << " at rexex" << std::endl; pattern = inPattern; - construct(); - std::cout << inPattern << " at rexex post" << std::endl; - deperenthesize(); + std::vector ending; + begin = construct(&ending, inPattern); + //last one is goal state, add it to the end of all of these last states + for (std::vector::size_type i = 0; i < ending.size(); i++) + ending[i]->addNext(NULL); } -void RegEx::construct() { - std::vector previousStates; - std::vector currentStates; - std::stack, std::vector > > perenStack; +RegExState* RegEx::construct(std::vector* ending, std::string pattern) { + //In the RegEx re-write, instead of doing complicated unperenthesising, we keep track of both the "front" and the "end" of a state. + //(these could be different if the state is perenthesezed) + std::vector previousStatesBegin; + std::vector previousStatesEnd; + std::vector currentStatesBegin; + std::vector currentStatesEnd; + bool alternating = false; - begin = new RegExState(); - currentStates.push_back(begin); + RegExState* begin = new RegExState(); + currentStatesBegin.push_back(begin); + currentStatesEnd.push_back(begin); + for (int i = 0; i < pattern.length(); i++) { switch (pattern[i]) { case '*': { //std::cout << "Star at " << i << " in " << pattern << std::endl; - // for (std::vector::size_type j = 0; j < currentStates.size(); j++) - // for (std::vector::size_type k = 0; k < currentStates.size(); k++) - // currentStates[j]->addNext(currentStates[k]); - currentStates[currentStates.size()-1]->addNext(currentStates[currentStates.size()-1]); + //NOTE: Because of the re-write, this is necessary again + for (std::vector::size_type j = 0; j < currentStatesEnd.size(); j++) + for (std::vector::size_type k = 0; k < currentStatesBegin.size(); k++) + currentStatesEnd[j]->addNext(currentStatesBegin[k]); //Make the ends point to the beginnings //add all previous states to current states to enable skipping over the starred item - currentStates.insert(currentStates.end(), previousStates.begin(), previousStates.end()); + currentStatesBegin.insert(currentStatesBegin.end(), previousStatesBegin.begin(), previousStatesBegin.end()); + currentStatesEnd.insert(currentStatesEnd.end(), previousStatesEnd.begin(), previousStatesEnd.end()); } break; case '+': { //std::cout << "Plus at " << i << " in " << pattern << std::endl; - //OtherThingy - //current->addNext(current); - // for (std::vector::size_type j = 0; j < currentStates.size(); j++) - // for (std::vector::size_type k = 0; k < currentStates.size(); k++) - // currentStates[j]->addNext(currentStates[k]); - currentStates[currentStates.size()-1]->addNext(currentStates[currentStates.size()-1]); + //NOTE: Because of the re-write, this is necessary again + for (std::vector::size_type j = 0; j < currentStatesEnd.size(); j++) + for (std::vector::size_type k = 0; k < currentStatesBegin.size(); k++) + currentStatesEnd[j]->addNext(currentStatesBegin[k]); //Make the ends point to the beginnings } break; case '?': { //std::cout << "Question at " << i << " in " << pattern << std::endl; //add all previous states to current states to enable skipping over the questioned item - currentStates.insert(currentStates.end(), previousStates.begin(), previousStates.end()); + currentStatesBegin.insert(currentStatesBegin.end(), previousStatesBegin.begin(), previousStatesBegin.end()); + currentStatesEnd.insert(currentStatesEnd.end(), previousStatesEnd.begin(), previousStatesEnd.end()); } break; case '|': @@ -59,59 +66,31 @@ void RegEx::construct() { { //std::cout << "Begin peren at " << i << " in " << pattern << std::endl; //perentheses - //Create a peren node with an inner empty node - RegExState* next = new RegExState(new RegExState()); - + std::vector innerEnds; + int perenEnd = findPerenEnd(pattern, i); + RegExState* innerBegin = construct(&innerEnds, strSlice(pattern, i+1, perenEnd)); + i = perenEnd; + std::vector innerBegins = *(innerBegin->getNextStates()); if (alternating) { - for (std::vector::size_type j = 0; j < previousStates.size(); j++) - previousStates[j]->addNext(next); - - //Save both current states here as well as the current preren - std::vector savePreviousStates = previousStates; - currentStates.push_back(next); - std::vector saveCurrentStates = currentStates; - perenStack.push(std::make_pair(savePreviousStates, saveCurrentStates)); - - previousStates.clear(); - currentStates.clear(); - currentStates.push_back(next->getInner()); - alternating = false; + for (std::vector::size_type j = 0; j < previousStatesEnd.size(); j++) + for (std::vector::size_type k = 0; k < innerBegins.size(); k++) + previousStatesEnd[j]->addNext(innerBegins[k]); + currentStatesBegin.insert(currentStatesBegin.end(), innerBegins.begin(), innerBegins.end()); + currentStatesEnd.insert(currentStatesEnd.end(), innerEnds.begin(), innerEnds.end()); } else { - for (std::vector::size_type j = 0; j < currentStates.size(); j++) - currentStates[j]->addNext(next); - - //Save both current states here as well as the current preren - std::vector savePreviousStates = currentStates; - currentStates.clear(); - currentStates.push_back(next); - std::vector saveCurrentStates = currentStates; - perenStack.push(std::make_pair(savePreviousStates, saveCurrentStates)); - - previousStates.clear(); - currentStates.clear(); - currentStates.push_back(next->getInner()); + for (std::vector::size_type j = 0; j < currentStatesEnd.size(); j++) + for (std::vector::size_type k = 0; k < innerBegins.size(); k++) + currentStatesEnd[j]->addNext(innerBegins[k]); + previousStatesBegin = currentStatesBegin; + previousStatesEnd = currentStatesEnd; + currentStatesBegin = innerBegins; + currentStatesEnd = innerEnds; } - //std::cout << "Peren is " << next << " Inner is " << currentStates[0] << " = " << next->getInner() << std::endl; + alternating = false; } break; - case ')': - { - //std::cout << "End peren at " << i << " in " << pattern << std::endl; - //perentheses - //Pop off the states that will now be the previous states and the peren node which will now be the current node - std::pair, std::vector > savedPair = perenStack.top(); - perenStack.pop(); - //Make the it so - previousStates = savedPair.first; - //Make sure the end of the inner stuff points back to the peren node - for (std::vector::size_type j = 0; j < currentStates.size(); j++) - currentStates[j]->addNext(savedPair.second[savedPair.second.size()-1]); - //currentStates[j]->addNext(*(savedPair.second.end())); - currentStates.clear(); - currentStates = savedPair.second; - } - break; + // ) does not need a case as we skip over it after finding it in ('s case case '\\': { @@ -126,109 +105,33 @@ void RegEx::construct() { RegExState* next = new RegExState(pattern[i]); //If we're alternating, add next as the next for each previous state, and add self to currentStates if (alternating) { - for (std::vector::size_type j = 0; j < previousStates.size(); j++) { - previousStates[j]->addNext(next); - //std::cout << "Adding " << next << ", which is " << pattern[i] << " to " << previousStates[j] << std::endl; - } - currentStates.push_back(next); + for (std::vector::size_type j = 0; j < previousStatesEnd.size(); j++) + previousStatesEnd[j]->addNext(next); + currentStatesBegin.push_back(next); + currentStatesEnd.push_back(next); alternating = false; } else { //If we're not alternating, add next as next for all the current states, make the current states the new //previous states, and add ourself as the new current state. - for (std::vector::size_type j = 0; j < currentStates.size(); j++) { - currentStates[j]->addNext(next); - //std::cout << "Adding " << next << ", which is " << pattern[i] << " to " << currentStates[j] << std::endl; - } - previousStates.clear(); - previousStates = currentStates; - currentStates.clear(); - currentStates.push_back(next); + for (std::vector::size_type j = 0; j < currentStatesEnd.size(); j++) + currentStatesEnd[j]->addNext(next); + + previousStatesBegin.clear(); + previousStatesEnd.clear(); + previousStatesBegin = currentStatesBegin; + previousStatesEnd = currentStatesEnd; + currentStatesBegin.clear(); + currentStatesEnd.clear(); + currentStatesBegin.push_back(next); + currentStatesEnd.push_back(next); } } } } - //last one is goal state - for (std::vector::size_type i = 0; i < currentStates.size(); i++) - currentStates[i]->addNext(NULL); + (*ending) = currentStatesEnd; + return(begin); } -void RegEx::deperenthesize() { - //std::cout << "About to de-perenthesize " << begin->toString() << std::endl; - - //Now go through and expand the peren nodes to regular nodes - std::vector processedStates; - std::vector statesToProcess; - statesToProcess.push_back(begin); - for (std::vector::size_type i = 0; i < statesToProcess.size(); i++) { - //Don't process null (sucess) state - if (statesToProcess[i] == NULL) - continue; - std::vector* nextStates = statesToProcess[i]->getNextStates(); - for (std::vector::size_type j = 0; j < nextStates->size(); j++) { - if ((*nextStates)[j] != NULL && (*nextStates)[j]->getInner() != NULL) { - //Fix all the next references pointing to the peren node to point to the inner nodes. (if more than one, push back to add others) - std::vector* insideNextStates = (*nextStates)[j]->getInner()->getNextStates(); - //std::cout << "insideNextStates = " << insideNextStates << " [0] " << (*insideNextStates)[0] << std::endl; - RegExState* perenState = (*nextStates)[j]; - (*nextStates)[j] = (*insideNextStates)[0]; - //std::cout << "So now nextstates[j] = " << (*nextStates)[j] << std::endl; - for (std::vector::size_type k = 1; k < insideNextStates->size(); k++) - nextStates->push_back((*insideNextStates)[k]); - //std::cout << "Replaced beginning: " << begin->toString() << std::endl; - //Now, if the peren node is self-referential (has a repitition operator after i), fix it's self-references in the same manner - std::vector* perenNextNodes = perenState->getNextStates(); - for (std::vector::size_type k = 0; k < perenNextNodes->size(); k++) { - if ((*perenNextNodes)[k] == perenState) { - (*perenNextNodes)[k] = (*insideNextStates)[0]; - for (std::vector::size_type l = 1; l < insideNextStates->size(); l++) - perenNextNodes->push_back((*insideNextStates)[l]); - } - } - //std::cout << "Fixed self-references: " << begin->toString() << std::endl; - //Need to fix the end too - std::vector traversalList; - traversalList.push_back(perenState->getInner()); - for (std::vector::size_type k = 0; k < traversalList.size(); k++) { - std::vector* nextTraversalStates = traversalList[k]->getNextStates(); - //std::cout << "Traversing! nextTraversalStates from traversalList " << traversalList[k] << " char = " << traversalList[k]->getCharacter() << std::endl; - //std::cout << "with children:" << std::endl; - //for (std::vector::size_type l = 0; l < nextTraversalStates->size(); l++) - // std::cout << "\t\"" << (*nextTraversalStates)[l]->getCharacter() << "\"" << std::endl; - //std::cout << std::endl; - for (std::vector::size_type l = 0; l < nextTraversalStates->size(); l++) { - //If this node is equal to the peren node we came from, then that means we've reached the end of the inner part of the peren - //And we now replace this reference with the next nodes from the peren node - //std::cout << "Traversal Next is on " << (*nextTraversalStates)[l]->getCharacter() << std::endl; - if ((*nextTraversalStates)[l] == perenState) { - // std::cout << "nextTraversalStates[l] = to perenState!" << std::endl; - std::vector endPerenNextStates = *(perenState->getNextStates()); - (*nextTraversalStates)[l] = endPerenNextStates[0]; - for (std::vector::size_type n = 1; n < endPerenNextStates.size(); n++) - nextTraversalStates->push_back(endPerenNextStates[n]); - //Now make sure we don't now try to continue through and end up processing stuff we just replaced the peren reference with - break; - } else { - traversalList.push_back((*nextTraversalStates)[l]); - } - } - } - } - } - //Now add all these next states to process, only if they haven't already been processed - for (std::vector::size_type j = 0; j < nextStates->size(); j++) { - bool inCurrStates = false; - for (std::vector::size_type k = 0; k < statesToProcess.size(); k++) { - if ((*nextStates)[j] == statesToProcess[k]) - inCurrStates = true; - } - if (!inCurrStates) { - statesToProcess.push_back((*nextStates)[j]); - //std::cout << (*nextStates)[j] << "Is not in states to process" << std::endl; - } - } - } - //std::cout << "Finished de-perenthesization " << begin->toString() << std::endl; -} RegEx::~RegEx() { //No cleanup necessary @@ -315,9 +218,14 @@ void RegEx::test() { { RegEx re("((ab)|c)*"); assert(re.longMatch("ababc") == 5); - assert(re.longMatch("ad") == 1); + assert(re.longMatch("ad") == 0); assert(re.longMatch("ababccd") == 6); } + { + RegEx re("bbb((bba+)|(ba+))*a*((a+b)|(a+bb)|(a+))*bbb") ; + std::cout << re.longMatch("bbbababbbaaaaaaaaaaaaaaaaaaabbb") << std::endl; + + } std::cout << "RegEx tests pass\n"; } diff --git a/src/util.cpp b/src/util.cpp index 03d5cd3..a08e776 100644 --- a/src/util.cpp +++ b/src/util.cpp @@ -40,3 +40,15 @@ std::string strSlice(std::string str, int begin, int end) { end += str.length()+1; return str.substr(begin, end-begin); } + +int findPerenEnd(std::string str, int i) { + int numHangingOpen = 0; + for (; i< str.length(); i++) { + if (str[i] == '(') + numHangingOpen++; + else if (str[i] == ')') + numHangingOpen--; + if (numHangingOpen == 0) + return i; + } +} From 00ef8f0a7c8c979c933b482ed6237d452338b75e Mon Sep 17 00:00:00 2001 From: Nathan Braswell Date: Tue, 7 Jan 2014 21:43:39 -0500 Subject: [PATCH 15/25] Fixed the triple string RegEx. Had included quotes in the main part of the RegEx by accident. --- krakenGrammer.kgm | 12 ++++++------ src/ASTTransformation.cpp | 2 +- 2 files changed, 7 insertions(+), 7 deletions(-) diff --git a/krakenGrammer.kgm b/krakenGrammer.kgm index d437ec0..faa26eb 100644 --- a/krakenGrammer.kgm +++ b/krakenGrammer.kgm @@ -20,12 +20,12 @@ if_comp_pred = code_block | simple_passthrough ; simple_passthrough = "__simple_passthrough__" WS triple_quoted_string ; triple_quoted_string = "\"\"\"((\"\"(`|1|2|3|4|5|6|7|8|9|0|-|=| |q|w|e|r|t|y|u|i|o|p|[|]|\\|a|s|d|f|g|h|j|k|l|;|'| -|z|x|c|v|b|n|m|,|.|/|~|!|@|#|$|%|^|&|\*|\(|\)|_|\+|Q|W|E|R|T|Y|U|I|O|P|{|}|\||A|S|D|F|G|H|J|K|L|:|\"|Z|X|C|V|B|N|M|<|>|\?| )+)|(\"(`|1|2|3|4|5|6|7|8|9|0|-|=| |q|w|e|r|t|y|u|i|o|p|[|]|\\|a|s|d|f|g|h|j|k|l|;|'| -|z|x|c|v|b|n|m|,|.|/|~|!|@|#|$|%|^|&|\*|\(|\)|_|\+|Q|W|E|R|T|Y|U|I|O|P|{|}|\||A|S|D|F|G|H|J|K|L|:|\"|Z|X|C|V|B|N|M|<|>|\?| )+))*(`|1|2|3|4|5|6|7|8|9|0|-|=| |q|w|e|r|t|y|u|i|o|p|[|]|\\|a|s|d|f|g|h|j|k|l|;|'| -|z|x|c|v|b|n|m|,|.|/|~|!|@|#|$|%|^|&|\*|\(|\)|_|\+|Q|W|E|R|T|Y|U|I|O|P|{|}|\||A|S|D|F|G|H|J|K|L|:|\"|Z|X|C|V|B|N|M|<|>|\?| )*(((`|1|2|3|4|5|6|7|8|9|0|-|=| |q|w|e|r|t|y|u|i|o|p|[|]|\\|a|s|d|f|g|h|j|k|l|;|'| -|z|x|c|v|b|n|m|,|.|/|~|!|@|#|$|%|^|&|\*|\(|\)|_|\+|Q|W|E|R|T|Y|U|I|O|P|{|}|\||A|S|D|F|G|H|J|K|L|:|\"|Z|X|C|V|B|N|M|<|>|\?| )+\")|((`|1|2|3|4|5|6|7|8|9|0|-|=| |q|w|e|r|t|y|u|i|o|p|[|]|\\|a|s|d|f|g|h|j|k|l|;|'| -|z|x|c|v|b|n|m|,|.|/|~|!|@|#|$|%|^|&|\*|\(|\)|_|\+|Q|W|E|R|T|Y|U|I|O|P|{|}|\||A|S|D|F|G|H|J|K|L|:|\"|Z|X|C|V|B|N|M|<|>|\?| )+\"\")|((`|1|2|3|4|5|6|7|8|9|0|-|=| |q|w|e|r|t|y|u|i|o|p|[|]|\\|a|s|d|f|g|h|j|k|l|;|'| -|z|x|c|v|b|n|m|,|.|/|~|!|@|#|$|%|^|&|\*|\(|\)|_|\+|Q|W|E|R|T|Y|U|I|O|P|{|}|\||A|S|D|F|G|H|J|K|L|:|\"|Z|X|C|V|B|N|M|<|>|\?| )+))*\"\"\"" ; +|z|x|c|v|b|n|m|,|.|/|~|!|@|#|$|%|^|&|\*|\(|\)|_|\+|Q|W|E|R|T|Y|U|I|O|P|{|}|\||A|S|D|F|G|H|J|K|L|:|Z|X|C|V|B|N|M|<|>|\?| )+)|(\"(`|1|2|3|4|5|6|7|8|9|0|-|=| |q|w|e|r|t|y|u|i|o|p|[|]|\\|a|s|d|f|g|h|j|k|l|;|'| +|z|x|c|v|b|n|m|,|.|/|~|!|@|#|$|%|^|&|\*|\(|\)|_|\+|Q|W|E|R|T|Y|U|I|O|P|{|}|\||A|S|D|F|G|H|J|K|L|:|Z|X|C|V|B|N|M|<|>|\?| )+))*(`|1|2|3|4|5|6|7|8|9|0|-|=| |q|w|e|r|t|y|u|i|o|p|[|]|\\|a|s|d|f|g|h|j|k|l|;|'| +|z|x|c|v|b|n|m|,|.|/|~|!|@|#|$|%|^|&|\*|\(|\)|_|\+|Q|W|E|R|T|Y|U|I|O|P|{|}|\||A|S|D|F|G|H|J|K|L|:|Z|X|C|V|B|N|M|<|>|\?| )*(((`|1|2|3|4|5|6|7|8|9|0|-|=| |q|w|e|r|t|y|u|i|o|p|[|]|\\|a|s|d|f|g|h|j|k|l|;|'| +|z|x|c|v|b|n|m|,|.|/|~|!|@|#|$|%|^|&|\*|\(|\)|_|\+|Q|W|E|R|T|Y|U|I|O|P|{|}|\||A|S|D|F|G|H|J|K|L|:|Z|X|C|V|B|N|M|<|>|\?| )+\")|((`|1|2|3|4|5|6|7|8|9|0|-|=| |q|w|e|r|t|y|u|i|o|p|[|]|\\|a|s|d|f|g|h|j|k|l|;|'| +|z|x|c|v|b|n|m|,|.|/|~|!|@|#|$|%|^|&|\*|\(|\)|_|\+|Q|W|E|R|T|Y|U|I|O|P|{|}|\||A|S|D|F|G|H|J|K|L|:|Z|X|C|V|B|N|M|<|>|\?| )+\"\")|((`|1|2|3|4|5|6|7|8|9|0|-|=| |q|w|e|r|t|y|u|i|o|p|[|]|\\|a|s|d|f|g|h|j|k|l|;|'| +|z|x|c|v|b|n|m|,|.|/|~|!|@|#|$|%|^|&|\*|\(|\)|_|\+|Q|W|E|R|T|Y|U|I|O|P|{|}|\||A|S|D|F|G|H|J|K|L|:|Z|X|C|V|B|N|M|<|>|\?| )+))*\"\"\"" ; identifier = alpha | alpha alphanumeric ; diff --git a/src/ASTTransformation.cpp b/src/ASTTransformation.cpp index 8f7a917..85a0cc1 100644 --- a/src/ASTTransformation.cpp +++ b/src/ASTTransformation.cpp @@ -304,7 +304,7 @@ Type* ASTTransformation::typeFromString(std::string typeIn, NodeTree* s else { baseType = none; typeDefinition = scopeLookup(scope, edited); - std::cout << "scopeLookup of type " << edited << " returned " << typeDefinition << std::endl; + //std::cout << "scopeLookup of type " << edited << " returned " << typeDefinition << std::endl; } return new Type(baseType, typeDefinition, indirection); } From b59fd26d4a29cc361b0bde3edf78141fd606a4d8 Mon Sep 17 00:00:00 2001 From: Nathan Braswell Date: Sat, 18 Jan 2014 15:28:17 -0500 Subject: [PATCH 16/25] Add triple quoted string regex for posterity. --- true_triple_quoted_string_regex.txt | 9 +++++++++ 1 file changed, 9 insertions(+) create mode 100644 true_triple_quoted_string_regex.txt diff --git a/true_triple_quoted_string_regex.txt b/true_triple_quoted_string_regex.txt new file mode 100644 index 0000000..a14076b --- /dev/null +++ b/true_triple_quoted_string_regex.txt @@ -0,0 +1,9 @@ +This is the true regex for triple quoted strings, but it segfaults my regex code.... + +triple_quoted_string = "\"\"\"((\"\"(`|1|2|3|4|5|6|7|8|9|0|-|=| |q|w|e|r|t|y|u|i|o|p|[|]|\\|a|s|d|f|g|h|j|k|l|;|'| +|z|x|c|v|b|n|m|,|.|/|~|!|@|#|$|%|^|&|\*|\(|\)|_|\+|Q|W|E|R|T|Y|U|I|O|P|{|}|\||A|S|D|F|G|H|J|K|L|:|Z|X|C|V|B|N|M|<|>|\?| )+)|(\"(`|1|2|3|4|5|6|7|8|9|0|-|=| |q|w|e|r|t|y|u|i|o|p|[|]|\\|a|s|d|f|g|h|j|k|l|;|'| +|z|x|c|v|b|n|m|,|.|/|~|!|@|#|$|%|^|&|\*|\(|\)|_|\+|Q|W|E|R|T|Y|U|I|O|P|{|}|\||A|S|D|F|G|H|J|K|L|:|Z|X|C|V|B|N|M|<|>|\?| )+))*(`|1|2|3|4|5|6|7|8|9|0|-|=| |q|w|e|r|t|y|u|i|o|p|[|]|\\|a|s|d|f|g|h|j|k|l|;|'| +|z|x|c|v|b|n|m|,|.|/|~|!|@|#|$|%|^|&|\*|\(|\)|_|\+|Q|W|E|R|T|Y|U|I|O|P|{|}|\||A|S|D|F|G|H|J|K|L|:|Z|X|C|V|B|N|M|<|>|\?| )*(((`|1|2|3|4|5|6|7|8|9|0|-|=| |q|w|e|r|t|y|u|i|o|p|[|]|\\|a|s|d|f|g|h|j|k|l|;|'| +|z|x|c|v|b|n|m|,|.|/|~|!|@|#|$|%|^|&|\*|\(|\)|_|\+|Q|W|E|R|T|Y|U|I|O|P|{|}|\||A|S|D|F|G|H|J|K|L|:|Z|X|C|V|B|N|M|<|>|\?| )+\")|((`|1|2|3|4|5|6|7|8|9|0|-|=| |q|w|e|r|t|y|u|i|o|p|[|]|\\|a|s|d|f|g|h|j|k|l|;|'| +|z|x|c|v|b|n|m|,|.|/|~|!|@|#|$|%|^|&|\*|\(|\)|_|\+|Q|W|E|R|T|Y|U|I|O|P|{|}|\||A|S|D|F|G|H|J|K|L|:|Z|X|C|V|B|N|M|<|>|\?| )+\"\")|((`|1|2|3|4|5|6|7|8|9|0|-|=| |q|w|e|r|t|y|u|i|o|p|[|]|\\|a|s|d|f|g|h|j|k|l|;|'| +|z|x|c|v|b|n|m|,|.|/|~|!|@|#|$|%|^|&|\*|\(|\)|_|\+|Q|W|E|R|T|Y|U|I|O|P|{|}|\||A|S|D|F|G|H|J|K|L|:|Z|X|C|V|B|N|M|<|>|\?| )+))*\"\"\"" ; \ No newline at end of file From 62933af8f8f8bf65e46afabd4fdea0cceaae6827 Mon Sep 17 00:00:00 2001 From: Nathan Braswell Date: Sun, 19 Jan 2014 18:20:52 -0500 Subject: [PATCH 17/25] Basic structures work, prep work for function pointers and other stuff,a couple of bugs fixed. --- include/State.h | 1 + include/util.h | 3 +++ krakenGrammer.kgm | 8 +++--- main.cpp | 5 ++-- src/ASTTransformation.cpp | 56 ++++++++++++++++++++++++++++++--------- src/CGenerator.cpp | 16 ++++++++--- src/Importer.cpp | 1 + src/RNGLRParser.cpp | 1 + src/RegEx.cpp | 3 +-- src/State.cpp | 4 +++ src/Type.cpp | 3 +++ src/util.cpp | 19 +++++++++++++ 12 files changed, 95 insertions(+), 25 deletions(-) diff --git a/include/State.h b/include/State.h index 7a6a52c..13bd3e0 100644 --- a/include/State.h +++ b/include/State.h @@ -33,6 +33,7 @@ class State { void addParents(std::vector* parents); std::vector* getParents(); std::vector* getDeepParents(int depth); + int getNumber(); std::vector basis; diff --git a/include/util.h b/include/util.h index 4cf10a6..7e95b9e 100644 --- a/include/util.h +++ b/include/util.h @@ -8,10 +8,13 @@ #include #include #include +#include std::string intToString(int theInt); std::string replaceExEscape(std::string first, std::string search, std::string replace); std::string strSlice(std::string str, int begin, int end); int findPerenEnd(std::string str, int i); +std::vector split(const std::string &str, char delim); +std::string join(const std::vector &strVec, std::string joinStr); #endif diff --git a/krakenGrammer.kgm b/krakenGrammer.kgm index faa26eb..610816d 100644 --- a/krakenGrammer.kgm +++ b/krakenGrammer.kgm @@ -56,8 +56,7 @@ code_block = "{" WS statement_list WS "}" ; statement_list = statement_list WS statement | statement ; statement = if_statement | while_loop | for_loop | return_statement WS ";" | boolean_expression WS ";" | assignment_statement WS ";" | declaration_statement WS ";" | code_block | if_comp | simple_passthrough ; -function_call = scope identifier "\(" WS opt_parameter_list WS "\)" ; -scope = scope identifier "::" | ; +function_call = unarad "\(" WS opt_parameter_list WS "\)" ; boolean_expression = boolean_expression WS "\|\|" WS and_boolean_expression | and_boolean_expression ; and_boolean_expression = and_boolean_expression "&&" bool_exp | bool_exp ; @@ -68,10 +67,11 @@ expression = expression WS "<<" WS term | expression WS ">>" WS shiftand | shift shiftand = shiftand WS "-" WS term | shiftand WS "\+" WS term | term ; term = term WS forward_slash WS factor | term WS "\*" WS factor | term WS "%" WS factor | factor ; factor = "\+\+" WS unarad | unarad WS "\+\+" | "--" WS unarad | unarad WS "--" | "\+" WS unarad | "-" WS unarad | "!" WS unarad | "~" WS unarad | "\(" WS type WS "\)" WS unarad | "\*" WS unarad | "&" WS unarad | unarad ; -unarad = number | identifier | function_call | bool | string | character | "\(" WS boolean_expression WS "\)" ; +unarad = number | identifier | function_call | bool | string | character | "\(" WS boolean_expression WS "\)" | access_operation ; number = integer | float | double ; +access_operation = expression "." identifier | expression "->" identifier ; -assignment_statement = identifier WS "=" WS boolean_expression | identifier WS "\+=" WS boolean_expression | identifier WS "-=" WS boolean_expression | identifier WS "\*=" WS boolean_expression | identifier WS "/=" WS boolean_expression ; +assignment_statement = factor WS "=" WS boolean_expression | factor WS "\+=" WS boolean_expression | factor WS "-=" WS boolean_expression | factor WS "\*=" WS boolean_expression | factor WS "/=" WS boolean_expression ; declaration_statement = type WS identifier WS "=" WS boolean_expression | type WS identifier ; alphanumeric = alphanumeric numeric | alphanumeric alpha | numeric | alpha ; diff --git a/main.cpp b/main.cpp index 9ca06ef..c0f6036 100644 --- a/main.cpp +++ b/main.cpp @@ -104,14 +104,15 @@ int main(int argc, char* argv[]) { compiledGrammerOutFile.open(grammerFileString + ".comp", std::ios::binary); if (!compiledGrammerOutFile.is_open()) std::cout << "Could not open compiled file to write either!" << std::endl; - compiledGrammerOutFile.write("KRAK", sizeof(char)*4); + compiledGrammerOutFile.write("KRAK", sizeof(char)*4); //Let us know when we load it that this is a kraken grammer file, but don't write out + compiledGrammerOutFile.flush(); // the grammer txt until we create the set, so that if we fail creating it it won't look valid + parser.createStateSet(); int* intBuffer = new int; *intBuffer = grammerInputFileString.length()+1; compiledGrammerOutFile.write((char*)intBuffer, sizeof(int)); delete intBuffer; compiledGrammerOutFile.write(grammerInputFileString.c_str(), grammerInputFileString.length()+1); //Don't forget null terminator - parser.createStateSet(); parser.exportTable(compiledGrammerOutFile); compiledGrammerOutFile.close(); } diff --git a/src/ASTTransformation.cpp b/src/ASTTransformation.cpp index 85a0cc1..0543fb1 100644 --- a/src/ASTTransformation.cpp +++ b/src/ASTTransformation.cpp @@ -42,6 +42,8 @@ NodeTree* ASTTransformation::transform(NodeTree* from, NodeTree scope->getDataRef()->scope["*="] = new NodeTree(); scope->getDataRef()->scope["+="] = new NodeTree(); scope->getDataRef()->scope["-="] = new NodeTree(); + scope->getDataRef()->scope["."] = new NodeTree(); + scope->getDataRef()->scope["->"] = new NodeTree(); } else if (name == "interpreter_directive") { newNode = new NodeTree(name, ASTData(interpreter_directive)); @@ -56,19 +58,31 @@ NodeTree* ASTTransformation::transform(NodeTree* from, NodeTree scope->getDataRef()->scope[i->first] = i->second; return newNode; // Don't need children of import } else if (name == "identifier") { - std::string lookupName = concatSymbolTree(children[0]); + //Make sure we get the entire name + std::string lookupName = concatSymbolTree(from); //std::cout << "scope lookup from identifier" << std::endl; newNode = scopeLookup(scope, lookupName); if (newNode == NULL) { std::cout << "scope lookup error! Could not find " << lookupName << std::endl; throw "LOOKUP ERROR: " + lookupName; + } else if (newNode->getDataRef()->symbol.getName() !=lookupName) { + //This happens when the lookup name denotes a member of an object, i.e. obj.foo + //The newNode points to obj, not foo. } //newNode = new NodeTree(name, ASTData(identifier, Symbol(concatSymbolTree(children[0]), true))); } else if (name == "type_def") { std::string typeAlias = concatSymbolTree(children[0]); - newNode = new NodeTree(name, ASTData(type_def, Symbol(typeAlias, true, typeAlias), typeFromString(concatSymbolTree(children[1]), scope))); + //If it is an alisis of a type + if (children[1]->getData().getName() == "type") { + newNode = new NodeTree(name, ASTData(type_def, Symbol(typeAlias, true, typeAlias), typeFromString(concatSymbolTree(children[1]), scope))); + skipChildren.insert(1); //Don't want any children, it's unnecessary for ailising + } else { //Is a struct or class + newNode = new NodeTree(name, ASTData(type_def, Symbol(typeAlias, true, typeAlias))); + newNode->getDataRef()->valueType = new Type(newNode); //Type is self-referential since this is the definition + } scope->getDataRef()->scope[typeAlias] = newNode; - return newNode; + skipChildren.insert(0); //Identifier lookup will be ourselves, as we just added ourselves to the scope + //return newNode; } else if (name == "function") { std::string functionName = concatSymbolTree(children[1]); newNode = new NodeTree(name, ASTData(function, Symbol(functionName, true), typeFromString(concatSymbolTree(children[0]), scope))); @@ -108,7 +122,7 @@ NodeTree* ASTTransformation::transform(NodeTree* from, NodeTree return transform(children[0], scope); //Just a promoted term, so do child } //Here's the order of ops stuff - } else if (name == "expression" || name == "shiftand" || name == "term" || name == "unarad") { //unarad can ride through, it should always just be a promoted child + } else if (name == "expression" || name == "shiftand" || name == "term" || name == "unarad" || name == "access_operation") { //unarad can ride through, it should always just be a promoted child //If this is an actual part of an expression, not just a premoted child if (children.size() > 2) { std::string functionCallName = concatSymbolTree(children[1]); @@ -187,8 +201,18 @@ NodeTree* ASTTransformation::transform(NodeTree* from, NodeTree // newIdentifier->getDataRef()->valueType = Type(concatSymbolTree(children[0]));//set the type of the identifier std::string newIdentifierStr = concatSymbolTree(children[1]); std::string typeString = concatSymbolTree(children[0]);//Get the type (left child) and set our new identifer to be that type - NodeTree* newIdentifier = new NodeTree("identifier", ASTData(identifier, Symbol(newIdentifierStr, true), typeFromString(typeString, scope))); + Type* identifierType = typeFromString(typeString, scope); + NodeTree* newIdentifier = new NodeTree("identifier", ASTData(identifier, Symbol(newIdentifierStr, true), identifierType)); scope->getDataRef()->scope[newIdentifierStr] = newIdentifier; + //Now we don't do this thing + // if (identifierType->typeDefinition) { + // //Is a custom type. Populate this declaration's scope with it's inner declarations + // std::vector*> definitions = identifierType->typeDefinition->getChildren(); + // for (auto i : definitions) { + // //Point to the identifier. May need to change so it points to the declaration or something, with new declarations..... + // newIdentifier->getDataRef()->scope[i->get(0)->getDataRef()->symbol.getName()] = i->get(0); //make each declaration's name point to it's definition, like above + // } + // } newNode->addChild(newIdentifier); skipChildren.insert(0); //These, the type and the identifier, have been taken care of. @@ -200,8 +224,7 @@ NodeTree* ASTTransformation::transform(NodeTree* from, NodeTree } else if (name == "simple_passthrough") { newNode = new NodeTree(name, ASTData(simple_passthrough)); } else if (name == "function_call") { - //children[0] is scope - std::string functionCallName = concatSymbolTree(children[1]); + std::string functionCallName = concatSymbolTree(children[0]); newNode = new NodeTree(functionCallName, ASTData(function_call, Symbol(functionCallName, true))); //std::cout << "scope lookup from function_call" << std::endl; NodeTree* function = scopeLookup(scope, functionCallName); @@ -210,7 +233,7 @@ NodeTree* ASTTransformation::transform(NodeTree* from, NodeTree throw "LOOKUP ERROR: " + functionCallName; } newNode->addChild(function); - skipChildren.insert(1); + skipChildren.insert(0); } else if (name == "parameter") { return transform(children[0], scope); //Don't need a parameter node, just the value } else if (name == "parameter") { @@ -261,12 +284,18 @@ std::string ASTTransformation::concatSymbolTree(NodeTree* root) { } NodeTree* ASTTransformation::scopeLookup(NodeTree* scope, std::string lookup) { - //Seach the map + //First, if it is a struct or object, get it's base. + std::vector splitString = split(lookup, '.'); + if (splitString.size() > 1) { + std::string base = splitString[0]; + // NodeTree* baseDef = scopeLookup(scope, base); + // splitString.erase(splitString.begin()); //Get rid of the base in the split str + // //Now the base is the scope. + // return scopeLookup(baseDef, join(splitString, ".")); //So the joined version doesn't have the base. + return scopeLookup(scope, base); + } + //Search the map auto scopeMap = scope->getDataRef()->scope; - //std::cout << "scope size: " << scopeMap.size() << ", scope from " << scope->getName() << std::endl; - // for (auto i = scopeMap.begin(); i != scopeMap.end(); i++) - // std::cout << i->first << " : " << i-> second << " - " << i->second->getName() << std::endl; - auto elementIterator = scopeMap.find(lookup); if (elementIterator != scopeMap.end()) { // std::cout << "lookup of " << lookup << " succeded in first scope!" << std::endl; @@ -280,6 +309,7 @@ NodeTree* ASTTransformation::scopeLookup(NodeTree* scope, std: return scopeLookup(enclosingIterator->second, lookup); } //std::cout << "upper scope does not exist" << std::endl; + std::cout << "could not find " << lookup << std::endl; return NULL; } diff --git a/src/CGenerator.cpp b/src/CGenerator.cpp index 3257091..5d88a66 100644 --- a/src/CGenerator.cpp +++ b/src/CGenerator.cpp @@ -87,7 +87,15 @@ std::string CGenerator::generate(NodeTree* from) { case identifier: return data.symbol.getName(); case type_def: - return "typedef " + ValueTypeToCType(data.valueType) + " " + data.symbol.getName() + ";"; + if (children.size() == 0) { + return "typedef " + ValueTypeToCType(data.valueType) + " " + data.symbol.getName() + ";"; + } else { + std::string objectString = "typedef struct __struct_dummy_" + data.symbol.getName() + "__ {\n"; + for (int i = 0; i < children.size(); i++) + objectString += generate(children[i]) + "\n"; + objectString += "} " + data.symbol.getName() + ";"; + return objectString; + } case function: output += "\n" + ValueTypeToCType(data.valueType) + " " + data.symbol.getName() + "("; for (int i = 0; i < children.size()-1; i++) { @@ -155,10 +163,10 @@ std::string CGenerator::generate(NodeTree* from) { return "*(" + generate(children[1]) + ")"; if (name == "+" || name == "-" || name == "*" || name == "/" || name == "==" || name == ">=" || name == "<=" || name == "!=" || name == "<" || name == ">" || name == "%" || name == "+=" || name == "-=" || name == "*=" || name == "/=" || name == "||" - || name == "&&" || name == "!" ) { - + || name == "&&" || name == "!" ) return "((" + generate(children[1]) + ")" + name + "(" + generate(children[2]) + "))"; - } + else if (name == "." || name == "->") + return "((" + generate(children[1]) + ")" + name + generate(children[2]) + ")"; output += data.symbol.getName() + "("; for (int i = 1; i < children.size(); i++) //children[0] is the declaration if (i < children.size()-1) diff --git a/src/Importer.cpp b/src/Importer.cpp index 76d8231..d158470 100644 --- a/src/Importer.cpp +++ b/src/Importer.cpp @@ -30,6 +30,7 @@ Importer::Importer(Parser* parserIn) { collapseSymbols.push_back(Symbol("typed_parameter_list", false)); collapseSymbols.push_back(Symbol("unorderd_list_part", false)); collapseSymbols.push_back(Symbol("if_comp_pred", false)); + collapseSymbols.push_back(Symbol("declaration_block", false)); } Importer::~Importer() { diff --git a/src/RNGLRParser.cpp b/src/RNGLRParser.cpp index 274558f..f42f189 100644 --- a/src/RNGLRParser.cpp +++ b/src/RNGLRParser.cpp @@ -377,6 +377,7 @@ void RNGLRParser::addStates(std::vector< State* >* stateSets, State* state, std: void RNGLRParser::addStateReductionsToTable(State* state) { std::vector* currStateTotal = state->getTotal(); + //std::cout << currStateTotal->size() << "::" << state->getNumber() << std::endl; for (std::vector::size_type i = 0; i < currStateTotal->size(); i++) { //See if reduce //Also, this really only needs to be done for the state's basis, but we're already iterating through, so... diff --git a/src/RegEx.cpp b/src/RegEx.cpp index 5a408a6..18acabe 100644 --- a/src/RegEx.cpp +++ b/src/RegEx.cpp @@ -223,8 +223,7 @@ void RegEx::test() { } { RegEx re("bbb((bba+)|(ba+))*a*((a+b)|(a+bb)|(a+))*bbb") ; - std::cout << re.longMatch("bbbababbbaaaaaaaaaaaaaaaaaaabbb") << std::endl; - + assert(re.longMatch("bbbababbbaaaaaaaaaaaaaaaaaaabbb") == 9); } std::cout << "RegEx tests pass\n"; diff --git a/src/State.cpp b/src/State.cpp index 012d468..fc8621b 100644 --- a/src/State.cpp +++ b/src/State.cpp @@ -158,4 +158,8 @@ std::vector* State::getDeepParents(int depth) { recursiveParents->insert(recursiveParents->end(), recursiveParentsToAdd->begin(), recursiveParentsToAdd->end()); } return recursiveParents; +} + +int State::getNumber() { + return number; } \ No newline at end of file diff --git a/src/Type.cpp b/src/Type.cpp index 83b638f..613324d 100644 --- a/src/Type.cpp +++ b/src/Type.cpp @@ -3,16 +3,19 @@ Type::Type() { indirection = 0; baseType = none; + typeDefinition = NULL; } Type::Type(ValueType typeIn) { indirection = 0; baseType = typeIn; + typeDefinition = NULL; } Type::Type(ValueType typeIn, int indirectionIn) { indirection = indirectionIn; baseType = typeIn; + typeDefinition = NULL; } Type::Type(NodeTree* typeDefinitionIn) { diff --git a/src/util.cpp b/src/util.cpp index a08e776..cb1fa4c 100644 --- a/src/util.cpp +++ b/src/util.cpp @@ -52,3 +52,22 @@ int findPerenEnd(std::string str, int i) { return i; } } + +std::vector split(const std::string &str, char delim) { + std::stringstream ss(str); + std::string word; + std::vector splitVec; + while (std::getline(ss, word, delim)) + splitVec.push_back(word); + return splitVec; +} + +std::string join(const std::vector &strVec, std::string joinStr) { + if (strVec.size() == 0) + return ""; + std::string joinedStr = strVec[0]; + for (int i = 1; i < strVec.size(); i++) + joinedStr += joinStr + strVec[i]; + return joinedStr; +} + From 1791738cd93c860ae8594405af5c101ac1e76c3c Mon Sep 17 00:00:00 2001 From: Nathan Braswell Date: Mon, 3 Feb 2014 11:41:25 -0500 Subject: [PATCH 18/25] Working on objects and scoping. To finish, need to actually implement decent propogation of types --- include/CGenerator.h | 3 +- src/ASTTransformation.cpp | 101 +++++++++++++++++----------- src/CGenerator.cpp | 136 +++++++++++++++++++++++++++----------- 3 files changed, 161 insertions(+), 79 deletions(-) diff --git a/include/CGenerator.h b/include/CGenerator.h index 00d4c05..09142d6 100644 --- a/include/CGenerator.h +++ b/include/CGenerator.h @@ -17,8 +17,9 @@ class CGenerator { CGenerator(); ~CGenerator(); void generateCompSet(std::map*> ASTs, std::string outputName); - std::string generate(NodeTree* from); + std::string generate(NodeTree* from, NodeTree* enclosingObject = NULL); static std::string ValueTypeToCType(Type *type); + std::string generateObjectMethod(NodeTree* enclosingObject, NodeTree* from); std::string generatorString; private: diff --git a/src/ASTTransformation.cpp b/src/ASTTransformation.cpp index 0543fb1..58d829e 100644 --- a/src/ASTTransformation.cpp +++ b/src/ASTTransformation.cpp @@ -17,7 +17,7 @@ NodeTree* ASTTransformation::transform(NodeTree* from) { NodeTree* ASTTransformation::transform(NodeTree* from, NodeTree* scope) { Symbol current = from->getData(); std::string name = current.getName(); - NodeTree* newNode; + NodeTree* newNode = NULL; std::vector*> children = from->getChildren(); std::set skipChildren; @@ -25,25 +25,26 @@ NodeTree* ASTTransformation::transform(NodeTree* from, NodeTree newNode = new NodeTree(name, ASTData(translation_unit)); scope = newNode; //Temporary scope fix - scope->getDataRef()->scope["+"] = new NodeTree(); - scope->getDataRef()->scope["-"] = new NodeTree(); - scope->getDataRef()->scope["*"] = new NodeTree(); - scope->getDataRef()->scope["&"] = new NodeTree(); - scope->getDataRef()->scope["--"] = new NodeTree(); - scope->getDataRef()->scope["++"] = new NodeTree(); - scope->getDataRef()->scope["=="] = new NodeTree(); - scope->getDataRef()->scope["<="] = new NodeTree(); - scope->getDataRef()->scope[">="] = new NodeTree(); - scope->getDataRef()->scope["<"] = new NodeTree(); - scope->getDataRef()->scope[">"] = new NodeTree(); - scope->getDataRef()->scope["&&"] = new NodeTree(); - scope->getDataRef()->scope["||"] = new NodeTree(); - scope->getDataRef()->scope["!"] = new NodeTree(); - scope->getDataRef()->scope["*="] = new NodeTree(); - scope->getDataRef()->scope["+="] = new NodeTree(); - scope->getDataRef()->scope["-="] = new NodeTree(); - scope->getDataRef()->scope["."] = new NodeTree(); - scope->getDataRef()->scope["->"] = new NodeTree(); + Type placeholderType; + scope->getDataRef()->scope["+"] = new NodeTree("function", ASTData(function, Symbol("+", true), &placeholderType)); + scope->getDataRef()->scope["-"] = new NodeTree("function", ASTData(function, Symbol("-", true), &placeholderType)); + scope->getDataRef()->scope["*"] = new NodeTree("function", ASTData(function, Symbol("*", true), &placeholderType)); + scope->getDataRef()->scope["&"] = new NodeTree("function", ASTData(function, Symbol("&", true), &placeholderType)); + scope->getDataRef()->scope["--"] = new NodeTree("function", ASTData(function, Symbol("--", true), &placeholderType)); + scope->getDataRef()->scope["++"] = new NodeTree("function", ASTData(function, Symbol("++", true), &placeholderType)); + scope->getDataRef()->scope["=="] = new NodeTree("function", ASTData(function, Symbol("==", true), &placeholderType)); + scope->getDataRef()->scope["<="] = new NodeTree("function", ASTData(function, Symbol("<=", true), &placeholderType)); + scope->getDataRef()->scope[">="] = new NodeTree("function", ASTData(function, Symbol(">=", true), &placeholderType)); + scope->getDataRef()->scope["<"] = new NodeTree("function", ASTData(function, Symbol("<", true), &placeholderType)); + scope->getDataRef()->scope[">"] = new NodeTree("function", ASTData(function, Symbol(">", true), &placeholderType)); + scope->getDataRef()->scope["&&"] = new NodeTree("function", ASTData(function, Symbol("&&", true), &placeholderType)); + scope->getDataRef()->scope["||"] = new NodeTree("function", ASTData(function, Symbol("||", true), &placeholderType)); + scope->getDataRef()->scope["!"] = new NodeTree("function", ASTData(function, Symbol("!", true), &placeholderType)); + scope->getDataRef()->scope["*="] = new NodeTree("function", ASTData(function, Symbol("*=", true), &placeholderType)); + scope->getDataRef()->scope["+="] = new NodeTree("function", ASTData(function, Symbol("+=", true), &placeholderType)); + scope->getDataRef()->scope["-="] = new NodeTree("function", ASTData(function, Symbol("-=", true), &placeholderType)); + scope->getDataRef()->scope["."] = new NodeTree("function", ASTData(function, Symbol(".", true), &placeholderType)); + scope->getDataRef()->scope["->"] = new NodeTree("function", ASTData(function, Symbol("->", true), &placeholderType)); } else if (name == "interpreter_directive") { newNode = new NodeTree(name, ASTData(interpreter_directive)); @@ -63,7 +64,7 @@ NodeTree* ASTTransformation::transform(NodeTree* from, NodeTree //std::cout << "scope lookup from identifier" << std::endl; newNode = scopeLookup(scope, lookupName); if (newNode == NULL) { - std::cout << "scope lookup error! Could not find " << lookupName << std::endl; + std::cout << "scope lookup error! Could not find " << lookupName << " in identifier " << std::endl; throw "LOOKUP ERROR: " + lookupName; } else if (newNode->getDataRef()->symbol.getName() !=lookupName) { //This happens when the lookup name denotes a member of an object, i.e. obj.foo @@ -81,6 +82,8 @@ NodeTree* ASTTransformation::transform(NodeTree* from, NodeTree newNode->getDataRef()->valueType = new Type(newNode); //Type is self-referential since this is the definition } scope->getDataRef()->scope[typeAlias] = newNode; + newNode->getDataRef()->scope["~enclosing_scope"] = scope; + scope = newNode; skipChildren.insert(0); //Identifier lookup will be ourselves, as we just added ourselves to the scope //return newNode; } else if (name == "function") { @@ -91,6 +94,7 @@ NodeTree* ASTTransformation::transform(NodeTree* from, NodeTree scope->getDataRef()->scope[functionName] = newNode; newNode->getDataRef()->scope["~enclosing_scope"] = scope; scope = newNode; + std::cout << "finished function " << functionName << std::endl; } else if (name == "code_block") { newNode = new NodeTree(name, ASTData(code_block)); newNode->getDataRef()->scope["~enclosing_scope"] = scope; @@ -101,19 +105,19 @@ NodeTree* ASTTransformation::transform(NodeTree* from, NodeTree std::string typeString = concatSymbolTree(children[0]);//Get the type (left child) and set our new identifer to be that type newNode = new NodeTree("identifier", ASTData(identifier, Symbol(parameterName, true), typeFromString(typeString, scope))); scope->getDataRef()->scope[parameterName] = newNode; + newNode->getDataRef()->scope["~enclosing_scope"] = scope; return newNode; } else if (name == "boolean_expression" || name == "and_boolean_expression" || name == "bool_exp") { //If this is an actual part of an expression, not just a premoted term if (children.size() > 1) { - std::string functionCallName = concatSymbolTree(children[1]); - //std::cout << "scope lookup from boolen_expression or similar" << std::endl; - NodeTree* function = scopeLookup(scope, functionCallName); + std::string functionCallString = concatSymbolTree(children[1]); + NodeTree* function = scopeLookup(scope, functionCallString); if (function == NULL) { - std::cout << "scope lookup error! Could not find " << functionCallName << std::endl; - throw "LOOKUP ERROR: " + functionCallName; + std::cout << "scope lookup error! Could not find " << functionCallString << " in boolean stuff " << std::endl; + throw "LOOKUP ERROR: " + functionCallString; } - newNode = new NodeTree(functionCallName, ASTData(function_call, Symbol(functionCallName, true))); - newNode->addChild(function); // First child of function call is a link to the function definition + newNode = new NodeTree(functionCallString, ASTData(function_call)); + newNode->addChild(function); // First child of function call is a link to the function skipChildren.insert(1); } else { //std::cout << children.size() << std::endl; @@ -129,12 +133,25 @@ NodeTree* ASTTransformation::transform(NodeTree* from, NodeTree //std::cout << "scope lookup from expression or similar" << std::endl; NodeTree* function = scopeLookup(scope, functionCallName); if (function == NULL) { - std::cout << "scope lookup error! Could not find " << functionCallName << std::endl; + std::cout << "scope lookup error! Could not find " << functionCallName << " in expression " << std::endl; throw "LOOKUP ERROR: " + functionCallName; } newNode = new NodeTree(functionCallName, ASTData(function_call, Symbol(functionCallName, true))); newNode->addChild(function); // First child of function call is a link to the function definition - skipChildren.insert(1); + NodeTree* lhs = transform(children[0], scope); + NodeTree* rhs;// = transform(children[2], scope); + if (name == "access_operation") + rhs = transform(children[2], lhs->getDataRef()->valueType->typeDefinition); //If an access operation, then the right side will be in the lhs's type's scope + else + rhs = transform(children[2], scope); + + if (name == "access_operation") + std::cout << "Access Operation: " << lhs->getDataRef()->symbol.getName() << " : " << rhs->getDataRef()->symbol.getName() << std::endl; + + newNode->addChild(lhs); + newNode->addChild(rhs); + return newNode; + //skipChildren.insert(1); } else { return transform(children[0], scope); //Just a promoted child, so do it instead } @@ -152,7 +169,7 @@ NodeTree* ASTTransformation::transform(NodeTree* from, NodeTree //std::cout << "scope lookup from factor" << std::endl; NodeTree* function = scopeLookup(scope, funcName); if (function == NULL) { - std::cout << "scope lookup error! Could not find " << funcName << std::endl; + std::cout << "scope lookup error! Could not find " << funcName << " in factor " << std::endl; throw "LOOKUP ERROR: " + funcName; } newNode = new NodeTree(funcName, ASTData(function_call, Symbol(funcName, true))); @@ -184,7 +201,7 @@ NodeTree* ASTTransformation::transform(NodeTree* from, NodeTree NodeTree* childCall = new NodeTree(functionName, ASTData(function_call, Symbol(functionName, true))); NodeTree* functionDef = scopeLookup(scope, functionName); if (functionDef == NULL) { - std::cout << "scope lookup error! Could not find " << functionName << std::endl; + std::cout << "scope lookup error! Could not find " << functionName << " in assignment_statement " << std::endl; throw "LOOKUP ERROR: " + functionName; } childCall->addChild(functionDef); //First child of function call is definition of the function @@ -204,6 +221,7 @@ NodeTree* ASTTransformation::transform(NodeTree* from, NodeTree Type* identifierType = typeFromString(typeString, scope); NodeTree* newIdentifier = new NodeTree("identifier", ASTData(identifier, Symbol(newIdentifierStr, true), identifierType)); scope->getDataRef()->scope[newIdentifierStr] = newIdentifier; + newNode->getDataRef()->scope["~enclosing_scope"] = scope; //Now we don't do this thing // if (identifierType->typeDefinition) { // //Is a custom type. Populate this declaration's scope with it's inner declarations @@ -226,12 +244,19 @@ NodeTree* ASTTransformation::transform(NodeTree* from, NodeTree } else if (name == "function_call") { std::string functionCallName = concatSymbolTree(children[0]); newNode = new NodeTree(functionCallName, ASTData(function_call, Symbol(functionCallName, true))); - //std::cout << "scope lookup from function_call" << std::endl; - NodeTree* function = scopeLookup(scope, functionCallName); - if (function == NULL) { - std::cout << "scope lookup error! Could not find " << functionCallName << std::endl; - throw "LOOKUP ERROR: " + functionCallName; - } + std::cout << "scope lookup from function_call: " << functionCallName << std::endl; + for (auto i : children) + std::cout << i << " : " << i->getName() << " : " << i->getDataRef()->getName() << std::endl; + //NodeTree* function = scopeLookup(scope, functionCallName); + NodeTree* function = transform(children[0], scope);/* scopeLookup(scope, functionCallName);*/ + std::cout << "The thing: " << function << " : " << function->getName() << std::endl; + for (auto i : function->getChildren()) + std::cout << i->getName() << " "; + std::cout << std::endl; + // if (function == NULL) { + // std::cout << "scope lookup error! Could not find " << functionCallName << " in function_call " << std::endl; + // throw "LOOKUP ERROR: " + functionCallName; + // } newNode->addChild(function); skipChildren.insert(0); } else if (name == "parameter") { diff --git a/src/CGenerator.cpp b/src/CGenerator.cpp index 5d88a66..1fa959b 100644 --- a/src/CGenerator.cpp +++ b/src/CGenerator.cpp @@ -35,16 +35,17 @@ std::string CGenerator::tabs() { return returnTabs; } -std::string CGenerator::generate(NodeTree* from) { +//The enclosing object is for when we're generating the inside of object methods. They allow us to check scope lookups against the object we're in +std::string CGenerator::generate(NodeTree* from, NodeTree* enclosingObject) { ASTData data = from->getData(); std::vector*> children = from->getChildren(); - std::string output = ""; + std::string output = ""; switch (data.type) { case translation_unit: //Do here because we may need the typedefs before the declarations of variables for (int i = 0; i < children.size(); i++) if (children[i]->getDataRef()->type == type_def) - output += generate(children[i]) + "\n"; + output += generate(children[i], enclosingObject) + "\n"; //Declare everything in translation unit scope here. (allows stuff from other files, automatic forward declarations) for (auto i = data.scope.begin(); i != data.scope.end(); i++) { NodeTree* declaration = i->second; @@ -55,11 +56,15 @@ std::string CGenerator::generate(NodeTree* from) { output += ValueTypeToCType(declarationData.valueType) + " " + declarationData.symbol.getName() + "; /*identifier*/\n"; break; case function: + if (decChildren.size() == 0) { //Not a real function, must be a built in passthrough { + output += "/* built in function: " + declarationData.toString() + " */\n"; + break; + } output += "\n" + ValueTypeToCType(declarationData.valueType) + " " + declarationData.symbol.getName() + "("; for (int j = 0; j < decChildren.size()-1; j++) { if (j > 0) output += ", "; - output += ValueTypeToCType(decChildren[j]->getData().valueType) + " " + generate(decChildren[j]); + output += ValueTypeToCType(decChildren[j]->getData().valueType) + " " + generate(decChildren[j], enclosingObject); } output += "); /*func*/\n"; break; @@ -75,7 +80,7 @@ std::string CGenerator::generate(NodeTree* from) { //Do here because we need the newlines for (int i = 0; i < children.size(); i++) if (children[i]->getDataRef()->type != type_def) - output += generate(children[i]) + "\n"; + output += generate(children[i], enclosingObject) + "\n"; return output; break; case interpreter_directive: @@ -85,31 +90,47 @@ std::string CGenerator::generate(NodeTree* from) { return "/* would import \"" + data.symbol.getName() + "\" but....*/\n"; //return "#include <" + data.symbol.getName() + ">\n"; case identifier: + { + //If we're in an object method, and our enclosing scope is that object, we're a member of the object and should use the self reference. + if (enclosingObject && enclosingObject->getDataRef()->scope.find(data.symbol.getName()) != enclosingObject->getDataRef()->scope.end()) { + return "self->" + data.symbol.getName(); + } return data.symbol.getName(); + } case type_def: if (children.size() == 0) { return "typedef " + ValueTypeToCType(data.valueType) + " " + data.symbol.getName() + ";"; } else { std::string objectString = "typedef struct __struct_dummy_" + data.symbol.getName() + "__ {\n"; - for (int i = 0; i < children.size(); i++) - objectString += generate(children[i]) + "\n"; + std::string postString; //The functions have to be outside the struct definition + for (int i = 0; i < children.size(); i++) { + std::cout << children[i]->getName() << std::endl; + if (children[i]->getName() == "function") //If object method + postString += generateObjectMethod(from, children[i]) + "\n"; + else + objectString += generate(children[i], enclosingObject) + "\n"; + } objectString += "} " + data.symbol.getName() + ";"; - return objectString; + return objectString + postString; //Functions come after the declaration of the struct } case function: output += "\n" + ValueTypeToCType(data.valueType) + " " + data.symbol.getName() + "("; for (int i = 0; i < children.size()-1; i++) { if (i > 0) output += ", "; - output += ValueTypeToCType(children[i]->getData().valueType) + " " + generate(children[i]); + output += ValueTypeToCType(children[i]->getData().valueType) + " " + generate(children[i], enclosingObject); } - output+= ")\n" + generate(children[children.size()-1]); + output+= ")\n" + generate(children[children.size()-1], enclosingObject); return output; case code_block: output += "{\n"; tabLevel++; - for (int i = 0; i < children.size(); i++) - output += generate(children[i]); + for (int i = 0; i < children.size(); i++) { + std::cout << "Line " << i << std::endl; + std::string line = generate(children[i], enclosingObject); + std::cout << line << std::endl; + output += line; + } tabLevel--; output += tabs() + "}"; return output; @@ -118,60 +139,82 @@ std::string CGenerator::generate(NodeTree* from) { case boolean_expression: output += " " + data.symbol.getName() + " "; case statement: - return tabs() + generate(children[0]) + ";\n"; + return tabs() + generate(children[0], enclosingObject) + ";\n"; case if_statement: - output += "if (" + generate(children[0]) + ")\n\t" + generate(children[1]); + output += "if (" + generate(children[0], enclosingObject) + ")\n\t" + generate(children[1], enclosingObject); if (children.size() > 2) - output += " else " + generate(children[2]); + output += " else " + generate(children[2], enclosingObject); return output; case while_loop: - output += "while (" + generate(children[0]) + ")\n\t" + generate(children[1]); + output += "while (" + generate(children[0], enclosingObject) + ")\n\t" + generate(children[1], enclosingObject); return output; case for_loop: //The strSlice's are there to get ride of an unwanted return and an unwanted semicolon(s) - output += "for (" + strSlice(generate(children[0]),0,-3) + generate(children[1]) + ";" + strSlice(generate(children[2]),0,-3) + ")\n\t" + generate(children[3]); + output += "for (" + strSlice(generate(children[0], enclosingObject),0,-3) + generate(children[1], enclosingObject) + ";" + strSlice(generate(children[2], enclosingObject),0,-3) + ")\n\t" + generate(children[3], enclosingObject); return output; case return_statement: if (children.size()) - return "return " + generate(children[0]); + return "return " + generate(children[0], enclosingObject); else return "return"; case assignment_statement: - return generate(children[0]) + " = " + generate(children[1]); + return generate(children[0], enclosingObject) + " = " + generate(children[1], enclosingObject); case declaration_statement: if (children.size() == 1) - return ValueTypeToCType(children[0]->getData().valueType) + " " + generate(children[0]) + ";"; + return ValueTypeToCType(children[0]->getData().valueType) + " " + generate(children[0], enclosingObject) + ";"; else - return ValueTypeToCType(children[0]->getData().valueType) + " " + generate(children[0]) + " = " + generate(children[1]) + ";"; + return ValueTypeToCType(children[0]->getData().valueType) + " " + generate(children[0], enclosingObject) + " = " + generate(children[1], enclosingObject) + ";"; case if_comp: - if (generate(children[0]) == generatorString) - return generate(children[1]); + if (generate(children[0], enclosingObject) == generatorString) + return generate(children[1], enclosingObject); return ""; case simple_passthrough: - return strSlice(generate(children[0]), 3, -4); + return strSlice(generate(children[0], enclosingObject), 3, -4); case function_call: { //NOTE: The first (0th) child of a function call node is the declaration of the function //Handle operators specially for now. Will later replace with //Inlined functions in the standard library - std::string name = data.symbol.getName(); - //std::cout << name << " == " << children[0]->getData().symbol.getName() << std::endl; - if (name == "++" || name == "--") - return generate(children[1]) + name; - if (name == "*" && children.size() == 2) //Is dereference, not multiplication - return "*(" + generate(children[1]) + ")"; - if (name == "+" || name == "-" || name == "*" || name == "/" || name == "==" || name == ">=" || name == "<=" || name == "!=" - || name == "<" || name == ">" || name == "%" || name == "+=" || name == "-=" || name == "*=" || name == "/=" || name == "||" - || name == "&&" || name == "!" ) - return "((" + generate(children[1]) + ")" + name + "(" + generate(children[2]) + "))"; - else if (name == "." || name == "->") - return "((" + generate(children[1]) + ")" + name + generate(children[2]) + ")"; - output += data.symbol.getName() + "("; + // std::string name = data.symbol.getName(); + // std::cout << name << " == " << children[0]->getData().symbol.getName() << std::endl; + std::string name = children[0]->getDataRef()->symbol.getName(); + ASTType funcType = children[0]->getDataRef()->type; + std::cout << "Doing function: " << name << std::endl; + //Test for specail functions only if what we're testing is, indeed, the definition, not a function call that returns a callable function pointer + if (funcType == function) { + if (name == "++" || name == "--") + return generate(children[1], enclosingObject) + name; + if (name == "*" && children.size() == 2) //Is dereference, not multiplication + return "*(" + generate(children[1], enclosingObject) + ")"; + if (name == "+" || name == "-" || name == "*" || name == "/" || name == "==" || name == ">=" || name == "<=" || name == "!=" + || name == "<" || name == ">" || name == "%" || name == "+=" || name == "-=" || name == "*=" || name == "/=" || name == "||" + || name == "&&" || name == "!" ) + return "((" + generate(children[1], enclosingObject) + ")" + name + "(" + generate(children[2], enclosingObject) + "))"; + else if (name == "." || name == "->") { + if (children.size() == 1) + return "/*dot operation with one child*/" + generate(children[0], enclosingObject) + "/*end one child*/"; + //If this is accessing an actual function, just output the name. No actual function definition should be in an access operation + if (children[2]->getDataRef()->type == function) + return "((" + generate(children[1], enclosingObject) + ")" + name + children[2]->getDataRef()->symbol.getName() + ")"; + return "((" + generate(children[1], enclosingObject) + ")" + name + generate(children[2], enclosingObject) + ")"; + } + } + //output += data.symbol.getName() + "("; + if (funcType == function) { + output += name + "("; + std::cout << "Is a function, outputting name!" << std::cout; + if (enclosingObject && enclosingObject->getDataRef()->scope.find(name) != enclosingObject->getDataRef()->scope.end()) { + //So, it is part of the enclosing object's namespace, so it's (for now) a member function and we need to pass in an implicit self reference + output += ValueTypeToCType(enclosingObject->getDataRef()->valueType) + "* self, "; + } + } else { + output += generate(children[0], enclosingObject) + "("; + } for (int i = 1; i < children.size(); i++) //children[0] is the declaration if (i < children.size()-1) - output += generate(children[i]) + ", "; - else output += generate(children[i]); + output += generate(children[i], enclosingObject) + ", "; + else output += generate(children[i], enclosingObject); output += ") "; return output; } @@ -182,11 +225,24 @@ std::string CGenerator::generate(NodeTree* from) { std::cout << "Nothing!" << std::endl; } for (int i = 0; i < children.size(); i++) - output += generate(children[i]); + output += generate(children[i], enclosingObject); return output; } +std::string CGenerator::generateObjectMethod(NodeTree* enclosingObject, NodeTree* from) { + std::string output; + ASTData data = from->getData(); + Type enclosingObjectType = *(enclosingObject->getDataRef()->valueType); //Copy a new type so we can turn it into a pointer + enclosingObjectType.indirection++; + std::vector*> children = from->getChildren(); + output += "\n" + ValueTypeToCType(data.valueType) + " " + enclosingObject->getDataRef()->symbol.getName() +"__" + data.symbol.getName() + "(" + ValueTypeToCType(&enclosingObjectType) + " self"; + for (int i = 0; i < children.size()-1; i++) + output += ", " + ValueTypeToCType(children[i]->getData().valueType) + " " + generate(children[i]); + output+= ")\n" + generate(children[children.size()-1], enclosingObject); //Pass in the object so we can + return output; +} + std::string CGenerator::ValueTypeToCType(Type *type) { std::string return_type; switch (type->baseType) { From 37cffac9cdad8412b3a97d9ce14e9a2a3298b4bc Mon Sep 17 00:00:00 2001 From: Nathan Braswell Date: Tue, 18 Feb 2014 21:55:00 -0500 Subject: [PATCH 19/25] Objects work now! We have methods! --- include/ASTTransformation.h | 1 + main.cpp | 2 +- src/ASTData.cpp | 4 ++- src/ASTTransformation.cpp | 68 ++++++++++++++++++++++--------------- src/CGenerator.cpp | 40 ++++++++++++++-------- 5 files changed, 70 insertions(+), 45 deletions(-) diff --git a/include/ASTTransformation.h b/include/ASTTransformation.h index 31aaa3e..a5cf01e 100644 --- a/include/ASTTransformation.h +++ b/include/ASTTransformation.h @@ -23,4 +23,5 @@ class ASTTransformation: public NodeTransformation { private: Importer * importer; }; + #endif diff --git a/main.cpp b/main.cpp index c0f6036..90028d0 100644 --- a/main.cpp +++ b/main.cpp @@ -138,7 +138,7 @@ int main(int argc, char* argv[]) { /*NodeTree* AST =*/ importer.import(programName); - std::map*> ASTs =importer.getASTMap(); + std::map*> ASTs = importer.getASTMap(); //Do optomization, etc. here. //None at this time, instead going straight to C in this first (more naive) version diff --git a/src/ASTData.cpp b/src/ASTData.cpp index f8fd0c2..22fc30d 100644 --- a/src/ASTData.cpp +++ b/src/ASTData.cpp @@ -21,7 +21,9 @@ ASTData::~ASTData() { } std::string ASTData::toString() { - return ASTTypeToString(type) + (symbol.isTerminal() ? " " + symbol.toString() : "") + " " + (valueType ? valueType->toString() : "no_type"); + return ASTTypeToString(type) + " " + + (symbol.isTerminal() ? " " + symbol.toString() : "") + " " + + (valueType ? valueType->toString() : "no_type"); } std::string ASTData::ASTTypeToString(ASTType type) { diff --git a/src/ASTTransformation.cpp b/src/ASTTransformation.cpp index 58d829e..241fa05 100644 --- a/src/ASTTransformation.cpp +++ b/src/ASTTransformation.cpp @@ -24,27 +24,26 @@ NodeTree* ASTTransformation::transform(NodeTree* from, NodeTree if (name == "translation_unit") { newNode = new NodeTree(name, ASTData(translation_unit)); scope = newNode; - //Temporary scope fix - Type placeholderType; - scope->getDataRef()->scope["+"] = new NodeTree("function", ASTData(function, Symbol("+", true), &placeholderType)); - scope->getDataRef()->scope["-"] = new NodeTree("function", ASTData(function, Symbol("-", true), &placeholderType)); - scope->getDataRef()->scope["*"] = new NodeTree("function", ASTData(function, Symbol("*", true), &placeholderType)); - scope->getDataRef()->scope["&"] = new NodeTree("function", ASTData(function, Symbol("&", true), &placeholderType)); - scope->getDataRef()->scope["--"] = new NodeTree("function", ASTData(function, Symbol("--", true), &placeholderType)); - scope->getDataRef()->scope["++"] = new NodeTree("function", ASTData(function, Symbol("++", true), &placeholderType)); - scope->getDataRef()->scope["=="] = new NodeTree("function", ASTData(function, Symbol("==", true), &placeholderType)); - scope->getDataRef()->scope["<="] = new NodeTree("function", ASTData(function, Symbol("<=", true), &placeholderType)); - scope->getDataRef()->scope[">="] = new NodeTree("function", ASTData(function, Symbol(">=", true), &placeholderType)); - scope->getDataRef()->scope["<"] = new NodeTree("function", ASTData(function, Symbol("<", true), &placeholderType)); - scope->getDataRef()->scope[">"] = new NodeTree("function", ASTData(function, Symbol(">", true), &placeholderType)); - scope->getDataRef()->scope["&&"] = new NodeTree("function", ASTData(function, Symbol("&&", true), &placeholderType)); - scope->getDataRef()->scope["||"] = new NodeTree("function", ASTData(function, Symbol("||", true), &placeholderType)); - scope->getDataRef()->scope["!"] = new NodeTree("function", ASTData(function, Symbol("!", true), &placeholderType)); - scope->getDataRef()->scope["*="] = new NodeTree("function", ASTData(function, Symbol("*=", true), &placeholderType)); - scope->getDataRef()->scope["+="] = new NodeTree("function", ASTData(function, Symbol("+=", true), &placeholderType)); - scope->getDataRef()->scope["-="] = new NodeTree("function", ASTData(function, Symbol("-=", true), &placeholderType)); - scope->getDataRef()->scope["."] = new NodeTree("function", ASTData(function, Symbol(".", true), &placeholderType)); - scope->getDataRef()->scope["->"] = new NodeTree("function", ASTData(function, Symbol("->", true), &placeholderType)); + //Temporary scope fix, use placeholder type + scope->getDataRef()->scope["+"] = new NodeTree("function", ASTData(function, Symbol("+", true), NULL)); + scope->getDataRef()->scope["-"] = new NodeTree("function", ASTData(function, Symbol("-", true), NULL)); + scope->getDataRef()->scope["*"] = new NodeTree("function", ASTData(function, Symbol("*", true), NULL)); + scope->getDataRef()->scope["&"] = new NodeTree("function", ASTData(function, Symbol("&", true), NULL)); + scope->getDataRef()->scope["--"] = new NodeTree("function", ASTData(function, Symbol("--", true), NULL)); + scope->getDataRef()->scope["++"] = new NodeTree("function", ASTData(function, Symbol("++", true), NULL)); + scope->getDataRef()->scope["=="] = new NodeTree("function", ASTData(function, Symbol("==", true), NULL)); + scope->getDataRef()->scope["<="] = new NodeTree("function", ASTData(function, Symbol("<=", true), NULL)); + scope->getDataRef()->scope[">="] = new NodeTree("function", ASTData(function, Symbol(">=", true), NULL)); + scope->getDataRef()->scope["<"] = new NodeTree("function", ASTData(function, Symbol("<", true), NULL)); + scope->getDataRef()->scope[">"] = new NodeTree("function", ASTData(function, Symbol(">", true), NULL)); + scope->getDataRef()->scope["&&"] = new NodeTree("function", ASTData(function, Symbol("&&", true), NULL)); + scope->getDataRef()->scope["||"] = new NodeTree("function", ASTData(function, Symbol("||", true), NULL)); + scope->getDataRef()->scope["!"] = new NodeTree("function", ASTData(function, Symbol("!", true), NULL)); + scope->getDataRef()->scope["*="] = new NodeTree("function", ASTData(function, Symbol("*=", true), NULL)); + scope->getDataRef()->scope["+="] = new NodeTree("function", ASTData(function, Symbol("+=", true), NULL)); + scope->getDataRef()->scope["-="] = new NodeTree("function", ASTData(function, Symbol("-=", true), NULL)); + scope->getDataRef()->scope["."] = new NodeTree("function", ASTData(function, Symbol(".", true), NULL)); + scope->getDataRef()->scope["->"] = new NodeTree("function", ASTData(function, Symbol("->", true), NULL)); } else if (name == "interpreter_directive") { newNode = new NodeTree(name, ASTData(interpreter_directive)); @@ -116,7 +115,7 @@ NodeTree* ASTTransformation::transform(NodeTree* from, NodeTree std::cout << "scope lookup error! Could not find " << functionCallString << " in boolean stuff " << std::endl; throw "LOOKUP ERROR: " + functionCallString; } - newNode = new NodeTree(functionCallString, ASTData(function_call)); + newNode = new NodeTree(functionCallString, ASTData(function_call, function->getDataRef()->valueType)); newNode->addChild(function); // First child of function call is a link to the function skipChildren.insert(1); } else { @@ -150,6 +149,14 @@ NodeTree* ASTTransformation::transform(NodeTree* from, NodeTree newNode->addChild(lhs); newNode->addChild(rhs); + std::cout << functionCallName << " - " << function->getName() << " has value type " << function->getDataRef()->valueType << " and rhs " << rhs->getDataRef()->valueType << std::endl; + if (function->getDataRef()->valueType) + newNode->getDataRef()->valueType = function->getDataRef()->valueType; + else if (rhs->getDataRef()->valueType) + newNode->getDataRef()->valueType = rhs->getDataRef()->valueType; + else + newNode->getDataRef()->valueType = NULL; + std::cout << "function call to " << functionCallName << " - " << function->getName() << " is now " << newNode->getDataRef()->valueType << std::endl; return newNode; //skipChildren.insert(1); } else { @@ -160,11 +167,11 @@ NodeTree* ASTTransformation::transform(NodeTree* from, NodeTree //NO SUPPORT FOR CASTING YET if (children.size() == 2) { std::string funcName = concatSymbolTree(children[0]); - int funcNum; + NodeTree* param; if (funcName == "*" || funcName == "&" || funcName == "++" || funcName == "--" || funcName == "-" || funcName == "!" || funcName == "~") - funcNum = 0; + param = transform(children[1], scope); else - funcName = concatSymbolTree(children[1]), funcNum = 1; + funcName = concatSymbolTree(children[1]), param = transform(children[0], scope); //std::cout << "scope lookup from factor" << std::endl; NodeTree* function = scopeLookup(scope, funcName); @@ -174,7 +181,13 @@ NodeTree* ASTTransformation::transform(NodeTree* from, NodeTree } newNode = new NodeTree(funcName, ASTData(function_call, Symbol(funcName, true))); newNode->addChild(function); - skipChildren.insert(funcNum); + newNode->addChild(param); + if (function->getDataRef()->valueType) + newNode->getDataRef()->valueType = function->getDataRef()->valueType; + else + newNode->getDataRef()->valueType = param->getDataRef()->valueType; + + return newNode; } else { return transform(children[0], scope); //Just a promoted child, so do it instead } @@ -258,11 +271,10 @@ NodeTree* ASTTransformation::transform(NodeTree* from, NodeTree // throw "LOOKUP ERROR: " + functionCallName; // } newNode->addChild(function); + newNode->getDataRef()->valueType = function->getDataRef()->valueType; skipChildren.insert(0); } else if (name == "parameter") { return transform(children[0], scope); //Don't need a parameter node, just the value - } else if (name == "parameter") { - return transform(children[0], scope); //Don't need a parameter node, just the value } else if (name == "type") { std::string theConcat = concatSymbolTree(from); //We have no symbol, so this will concat our children newNode = new NodeTree(name, ASTData(value, Symbol(theConcat, true), typeFromString(theConcat, scope))); diff --git a/src/CGenerator.cpp b/src/CGenerator.cpp index 1fa959b..fd22588 100644 --- a/src/CGenerator.cpp +++ b/src/CGenerator.cpp @@ -194,22 +194,32 @@ std::string CGenerator::generate(NodeTree* from, NodeTree* enc else if (name == "." || name == "->") { if (children.size() == 1) return "/*dot operation with one child*/" + generate(children[0], enclosingObject) + "/*end one child*/"; - //If this is accessing an actual function, just output the name. No actual function definition should be in an access operation - if (children[2]->getDataRef()->type == function) - return "((" + generate(children[1], enclosingObject) + ")" + name + children[2]->getDataRef()->symbol.getName() + ")"; - return "((" + generate(children[1], enclosingObject) + ")" + name + generate(children[2], enclosingObject) + ")"; - } - } - //output += data.symbol.getName() + "("; - if (funcType == function) { - output += name + "("; - std::cout << "Is a function, outputting name!" << std::cout; - if (enclosingObject && enclosingObject->getDataRef()->scope.find(name) != enclosingObject->getDataRef()->scope.end()) { - //So, it is part of the enclosing object's namespace, so it's (for now) a member function and we need to pass in an implicit self reference - output += ValueTypeToCType(enclosingObject->getDataRef()->valueType) + "* self, "; + //If this is accessing an actual function, find the function in scope and take the appropriate action. Probabally an object method + if (children[2]->getDataRef()->type == function) { + std::string functionName = children[2]->getDataRef()->symbol.getName(); + NodeTree* possibleObjectType = children[1]->getDataRef()->valueType->typeDefinition; + //If is an object method, generate it like one. Needs extension/modification for inheritence + if (possibleObjectType && possibleObjectType->getDataRef()->scope.find(functionName) != possibleObjectType->getDataRef()->scope.end()) { + return possibleObjectType->getDataRef()->symbol.getName() +"__" + functionName + "(" + (name == "." ? "&" : "") + generate(children[1], enclosingObject) + ","; + //The comma lets the upper function call know we already started the param list + //Note that we got here from a function call. We just pass up this special case and let them finish with the perentheses + std::cout << "Is in scope or not type!" << std::endl; + } else { + std::cout << "Is not in scope or not type" << std::endl; + return "((" + generate(children[1], enclosingObject) + ")" + name + functionName + ")"; + } + } else { + return "((" + generate(children[1], enclosingObject) + ")" + name + generate(children[2], enclosingObject) + ")"; + } + } else { + output += name + "("; } } else { - output += generate(children[0], enclosingObject) + "("; + std::string functionCallSource = generate(children[0], enclosingObject); + if (functionCallSource[functionCallSource.size()-1] == ',') //If it's a member method, it's already started the parameter list. + output += children.size() > 1 ? functionCallSource : functionCallSource.substr(0, functionCallSource.size()-1); + else + output += functionCallSource + "("; } for (int i = 1; i < children.size(); i++) //children[0] is the declaration if (i < children.size()-1) @@ -233,7 +243,7 @@ std::string CGenerator::generate(NodeTree* from, NodeTree* enc std::string CGenerator::generateObjectMethod(NodeTree* enclosingObject, NodeTree* from) { std::string output; ASTData data = from->getData(); - Type enclosingObjectType = *(enclosingObject->getDataRef()->valueType); //Copy a new type so we can turn it into a pointer + Type enclosingObjectType = *(enclosingObject->getDataRef()->valueType); //Copy a new type so we can turn it into a pointer if we need to enclosingObjectType.indirection++; std::vector*> children = from->getChildren(); output += "\n" + ValueTypeToCType(data.valueType) + " " + enclosingObject->getDataRef()->symbol.getName() +"__" + data.symbol.getName() + "(" + ValueTypeToCType(&enclosingObjectType) + " self"; From 7f902880c59e68eef613b19662de08d28d73f80f Mon Sep 17 00:00:00 2001 From: Nathan Braswell Date: Thu, 20 Feb 2014 18:24:04 -0500 Subject: [PATCH 20/25] Cleaned up and added line number to error messages --- include/RNGLRParser.h | 2 ++ src/RNGLRParser.cpp | 18 +++++++++++++++--- 2 files changed, 17 insertions(+), 3 deletions(-) diff --git a/include/RNGLRParser.h b/include/RNGLRParser.h index 3e921a3..6b285e7 100644 --- a/include/RNGLRParser.h +++ b/include/RNGLRParser.h @@ -39,6 +39,8 @@ class RNGLRParser: public Parser { std::vector*> getPathEdges(std::vector*> path); + int findLine(int tokenNum); //Get the line number for a token, used for error reporting + std::vector input; GraphStructuredStack gss; //start node, lefthand side of the reduction, reduction length diff --git a/src/RNGLRParser.cpp b/src/RNGLRParser.cpp index f42f189..cbd111f 100644 --- a/src/RNGLRParser.cpp +++ b/src/RNGLRParser.cpp @@ -85,8 +85,9 @@ NodeTree* RNGLRParser::parseInput(std::string inputString) { for (int i = 0; i < input.size(); i++) { // std::cout << "Checking if frontier " << i << " is empty" << std::endl; if (gss.frontierIsEmpty(i)) { - std::cout << "Frontier " << i << " is empty." << std::endl; - std::cout << "Failed on " << input[i].toString() << std::endl; + //std::cout << "Frontier " << i << " is empty." << std::endl; + std::cout << "Parsing failed on " << input[i].toString() << std::endl; + std::cout << "Problem is on line: " << findLine(i) << std::endl; std::cout << "Nearby is:" << std::endl; const int range = 10; for (int j = (i-range >= 0 ? i-range : 0); j < (i+range < input.size() ? i+range : input.size()); j++) @@ -118,7 +119,7 @@ NodeTree* RNGLRParser::parseInput(std::string inputString) { } std::cout << "Rejected!" << std::endl; - std::cout << "GSS:\n" << gss.toString() << std::endl; + // std::cout << "GSS:\n" << gss.toString() << std::endl; return NULL; } @@ -492,3 +493,14 @@ std::vector*> RNGLRParser::getPathEdges(std::vector Date: Thu, 6 Mar 2014 13:13:40 -0500 Subject: [PATCH 21/25] Just got paranoid about saving all this work. Almost finished operator overloading, but everything is slightly broken right now. --- include/ASTData.h | 4 +- include/ASTTransformation.h | 7 +- include/CGenerator.h | 1 + include/NodeTransformation.h | 2 +- include/NodeTree.h | 2 +- include/ParseAction.h | 2 +- include/ParseRule.h | 2 +- include/State.h | 2 +- include/Symbol.h | 2 +- include/Type.h | 4 +- include/util.h | 2 +- src/ASTTransformation.cpp | 245 ++++++++++++++++++++++------------- src/CGenerator.cpp | 136 +++++++++++++------ src/Importer.cpp | 1 + src/Type.cpp | 8 ++ 15 files changed, 273 insertions(+), 147 deletions(-) diff --git a/include/ASTData.h b/include/ASTData.h index cd11e88..48bee63 100644 --- a/include/ASTData.h +++ b/include/ASTData.h @@ -10,7 +10,7 @@ class Type; #include "Type.h" #ifndef NULL -#define NULL 0 +#define NULL ((void*)0) #endif enum ASTType {undef, translation_unit, interpreter_directive, import, identifier, type_def, @@ -29,7 +29,7 @@ class ASTData { ASTType type; Type* valueType; Symbol symbol; - std::map*> scope; + std::map*>> scope; private: }; diff --git a/include/ASTTransformation.h b/include/ASTTransformation.h index a5cf01e..f65e95f 100644 --- a/include/ASTTransformation.h +++ b/include/ASTTransformation.h @@ -16,12 +16,15 @@ class ASTTransformation: public NodeTransformation { ASTTransformation(Importer* importerIn); ~ASTTransformation(); virtual NodeTree* transform(NodeTree* from); - NodeTree* transform(NodeTree* from, NodeTree* scope); + NodeTree* transform(NodeTree* from, NodeTree* scope, std::vector types = std::vector()); + std::vector*> transformChildren(std::vector*> children, std::set skipChildren, NodeTree* scope, std::vector types); + std::vector mapNodesToTypes(std::vector*> nodes); std::string concatSymbolTree(NodeTree* root); - NodeTree* scopeLookup(NodeTree* scope, std::string lookup); + NodeTree* scopeLookup(NodeTree* scope, std::string lookup, std::vector types = std::vector()); Type* typeFromString(std::string type, NodeTree* scope); private: Importer * importer; + std::map*>> languageLevelScope; }; #endif diff --git a/include/CGenerator.h b/include/CGenerator.h index 09142d6..9df6248 100644 --- a/include/CGenerator.h +++ b/include/CGenerator.h @@ -19,6 +19,7 @@ class CGenerator { void generateCompSet(std::map*> ASTs, std::string outputName); std::string generate(NodeTree* from, NodeTree* enclosingObject = NULL); static std::string ValueTypeToCType(Type *type); + static std::string ValueTypeToCTypeDecoration(Type *type); std::string generateObjectMethod(NodeTree* enclosingObject, NodeTree* from); std::string generatorString; diff --git a/include/NodeTransformation.h b/include/NodeTransformation.h index dc18105..ff51944 100644 --- a/include/NodeTransformation.h +++ b/include/NodeTransformation.h @@ -4,7 +4,7 @@ #include "NodeTree.h" #ifndef NULL -#define NULL 0 +#define NULL ((void*)0) #endif template diff --git a/include/NodeTree.h b/include/NodeTree.h index 0f82dac..b7573f7 100644 --- a/include/NodeTree.h +++ b/include/NodeTree.h @@ -2,7 +2,7 @@ #define NODETREE_H #ifndef NULL -#define NULL 0 +#define NULL ((void*)0) #endif #include diff --git a/include/ParseAction.h b/include/ParseAction.h index 4d62256..175dd61 100644 --- a/include/ParseAction.h +++ b/include/ParseAction.h @@ -2,7 +2,7 @@ #define PARSE_ACTION_H #ifndef NULL -#define NULL 0 +#define NULL ((void*)0) #endif #include "util.h" diff --git a/include/ParseRule.h b/include/ParseRule.h index fc56bea..dd3ac37 100644 --- a/include/ParseRule.h +++ b/include/ParseRule.h @@ -2,7 +2,7 @@ #define PARSERULE_H #ifndef NULL -#define NULL 0 +#define NULL ((void*)0) #endif #include "Symbol.h" diff --git a/include/State.h b/include/State.h index 13bd3e0..bb2d08e 100644 --- a/include/State.h +++ b/include/State.h @@ -2,7 +2,7 @@ #define STATE_H #ifndef NULL -#define NULL 0 +#define NULL ((void*)0) #endif #include "util.h" diff --git a/include/Symbol.h b/include/Symbol.h index 1d306b1..8516555 100644 --- a/include/Symbol.h +++ b/include/Symbol.h @@ -2,7 +2,7 @@ #define SYMBOL_H #ifndef NULL -#define NULL 0 +#define NULL ((void*)0) #endif #include "NodeTree.h" diff --git a/include/Type.h b/include/Type.h index a360d7f..882e143 100644 --- a/include/Type.h +++ b/include/Type.h @@ -2,7 +2,7 @@ #define TYPE_H #ifndef NULL -#define NULL 0 +#define NULL ((void*)0) #endif #include @@ -25,6 +25,8 @@ class Type { Type(NodeTree* typeDefinitionIn, int indirectionIn); Type(ValueType typeIn, NodeTree* typeDefinitionIn, int indirectionIn); ~Type(); + bool const operator==(const Type &other)const; + bool const operator!=(const Type &other)const; std::string toString(); ValueType baseType; NodeTree* typeDefinition; diff --git a/include/util.h b/include/util.h index 7e95b9e..6ca6b1e 100644 --- a/include/util.h +++ b/include/util.h @@ -2,7 +2,7 @@ #define UTIL_H #ifndef NULL -#define NULL 0 +#define NULL ((void*)0) #endif #include diff --git a/src/ASTTransformation.cpp b/src/ASTTransformation.cpp index 241fa05..ec74e2a 100644 --- a/src/ASTTransformation.cpp +++ b/src/ASTTransformation.cpp @@ -1,8 +1,28 @@ #include "ASTTransformation.h" ASTTransformation::ASTTransformation(Importer *importerIn) { - // importer = importerIn; + //Set up language level special scope. (the final scope checked) + //Note the NULL type + languageLevelScope["+"].push_back( new NodeTree("function", ASTData(function, Symbol("+", true), NULL))); + languageLevelScope["-"].push_back(new NodeTree("function", ASTData(function, Symbol("-", true), NULL))); + languageLevelScope["*"].push_back(new NodeTree("function", ASTData(function, Symbol("*", true), NULL))); + languageLevelScope["&"].push_back(new NodeTree("function", ASTData(function, Symbol("&", true), NULL))); + languageLevelScope["--"].push_back(new NodeTree("function", ASTData(function, Symbol("--", true), NULL))); + languageLevelScope["++"].push_back(new NodeTree("function", ASTData(function, Symbol("++", true), NULL))); + languageLevelScope["=="].push_back(new NodeTree("function", ASTData(function, Symbol("==", true), NULL))); + languageLevelScope["<="].push_back(new NodeTree("function", ASTData(function, Symbol("<=", true), NULL))); + languageLevelScope[">="].push_back(new NodeTree("function", ASTData(function, Symbol(">=", true), NULL))); + languageLevelScope["<"].push_back(new NodeTree("function", ASTData(function, Symbol("<", true), NULL))); + languageLevelScope[">"].push_back(new NodeTree("function", ASTData(function, Symbol(">", true), NULL))); + languageLevelScope["&&"].push_back(new NodeTree("function", ASTData(function, Symbol("&&", true), NULL))); + languageLevelScope["||"].push_back(new NodeTree("function", ASTData(function, Symbol("||", true), NULL))); + languageLevelScope["!"].push_back(new NodeTree("function", ASTData(function, Symbol("!", true), NULL))); + languageLevelScope["*="].push_back(new NodeTree("function", ASTData(function, Symbol("*=", true), NULL))); + languageLevelScope["+="].push_back(new NodeTree("function", ASTData(function, Symbol("+=", true), NULL))); + languageLevelScope["-="].push_back(new NodeTree("function", ASTData(function, Symbol("-=", true), NULL))); + languageLevelScope["."].push_back(new NodeTree("function", ASTData(function, Symbol(".", true), NULL))); + languageLevelScope["->"].push_back(new NodeTree("function", ASTData(function, Symbol("->", true), NULL))); } ASTTransformation::~ASTTransformation() { @@ -11,10 +31,10 @@ ASTTransformation::~ASTTransformation() { NodeTree* ASTTransformation::transform(NodeTree* from) { //Set up top scope - return transform(from, NULL); + return transform(from, NULL, std::vector()); } -NodeTree* ASTTransformation::transform(NodeTree* from, NodeTree* scope) { +NodeTree* ASTTransformation::transform(NodeTree* from, NodeTree* scope, std::vector types) { Symbol current = from->getData(); std::string name = current.getName(); NodeTree* newNode = NULL; @@ -24,27 +44,6 @@ NodeTree* ASTTransformation::transform(NodeTree* from, NodeTree if (name == "translation_unit") { newNode = new NodeTree(name, ASTData(translation_unit)); scope = newNode; - //Temporary scope fix, use placeholder type - scope->getDataRef()->scope["+"] = new NodeTree("function", ASTData(function, Symbol("+", true), NULL)); - scope->getDataRef()->scope["-"] = new NodeTree("function", ASTData(function, Symbol("-", true), NULL)); - scope->getDataRef()->scope["*"] = new NodeTree("function", ASTData(function, Symbol("*", true), NULL)); - scope->getDataRef()->scope["&"] = new NodeTree("function", ASTData(function, Symbol("&", true), NULL)); - scope->getDataRef()->scope["--"] = new NodeTree("function", ASTData(function, Symbol("--", true), NULL)); - scope->getDataRef()->scope["++"] = new NodeTree("function", ASTData(function, Symbol("++", true), NULL)); - scope->getDataRef()->scope["=="] = new NodeTree("function", ASTData(function, Symbol("==", true), NULL)); - scope->getDataRef()->scope["<="] = new NodeTree("function", ASTData(function, Symbol("<=", true), NULL)); - scope->getDataRef()->scope[">="] = new NodeTree("function", ASTData(function, Symbol(">=", true), NULL)); - scope->getDataRef()->scope["<"] = new NodeTree("function", ASTData(function, Symbol("<", true), NULL)); - scope->getDataRef()->scope[">"] = new NodeTree("function", ASTData(function, Symbol(">", true), NULL)); - scope->getDataRef()->scope["&&"] = new NodeTree("function", ASTData(function, Symbol("&&", true), NULL)); - scope->getDataRef()->scope["||"] = new NodeTree("function", ASTData(function, Symbol("||", true), NULL)); - scope->getDataRef()->scope["!"] = new NodeTree("function", ASTData(function, Symbol("!", true), NULL)); - scope->getDataRef()->scope["*="] = new NodeTree("function", ASTData(function, Symbol("*=", true), NULL)); - scope->getDataRef()->scope["+="] = new NodeTree("function", ASTData(function, Symbol("+=", true), NULL)); - scope->getDataRef()->scope["-="] = new NodeTree("function", ASTData(function, Symbol("-=", true), NULL)); - scope->getDataRef()->scope["."] = new NodeTree("function", ASTData(function, Symbol(".", true), NULL)); - scope->getDataRef()->scope["->"] = new NodeTree("function", ASTData(function, Symbol("->", true), NULL)); - } else if (name == "interpreter_directive") { newNode = new NodeTree(name, ASTData(interpreter_directive)); } else if (name == "import" && !current.isTerminal()) { @@ -52,16 +51,17 @@ NodeTree* ASTTransformation::transform(NodeTree* from, NodeTree newNode = new NodeTree(name, ASTData(import, Symbol(toImport, true))); //Do the imported file too NodeTree* outsideTranslationUnit = importer->import(toImport + ".krak"); - scope->getDataRef()->scope[toImport] = outsideTranslationUnit; //Put this transation_unit in the scope as it's files name + scope->getDataRef()->scope[toImport].push_back(outsideTranslationUnit); //Put this transation_unit in the scope as it's files name //Now add it to scope for (auto i = outsideTranslationUnit->getDataRef()->scope.begin(); i != outsideTranslationUnit->getDataRef()->scope.end(); i++) - scope->getDataRef()->scope[i->first] = i->second; + for (auto j : i->second) + scope->getDataRef()->scope[i->first].push_back(j); return newNode; // Don't need children of import } else if (name == "identifier") { //Make sure we get the entire name std::string lookupName = concatSymbolTree(from); - //std::cout << "scope lookup from identifier" << std::endl; - newNode = scopeLookup(scope, lookupName); + std::cout << "Looking up: " << lookupName << std::endl; + newNode = scopeLookup(scope, lookupName, types); if (newNode == NULL) { std::cout << "scope lookup error! Could not find " << lookupName << " in identifier " << std::endl; throw "LOOKUP ERROR: " + lookupName; @@ -80,8 +80,8 @@ NodeTree* ASTTransformation::transform(NodeTree* from, NodeTree newNode = new NodeTree(name, ASTData(type_def, Symbol(typeAlias, true, typeAlias))); newNode->getDataRef()->valueType = new Type(newNode); //Type is self-referential since this is the definition } - scope->getDataRef()->scope[typeAlias] = newNode; - newNode->getDataRef()->scope["~enclosing_scope"] = scope; + scope->getDataRef()->scope[typeAlias].push_back(newNode); + newNode->getDataRef()->scope["~enclosing_scope"].push_back(scope); scope = newNode; skipChildren.insert(0); //Identifier lookup will be ourselves, as we just added ourselves to the scope //return newNode; @@ -90,66 +90,80 @@ NodeTree* ASTTransformation::transform(NodeTree* from, NodeTree newNode = new NodeTree(name, ASTData(function, Symbol(functionName, true), typeFromString(concatSymbolTree(children[0]), scope))); skipChildren.insert(0); skipChildren.insert(1); - scope->getDataRef()->scope[functionName] = newNode; - newNode->getDataRef()->scope["~enclosing_scope"] = scope; + scope->getDataRef()->scope[functionName].push_back(newNode); + newNode->getDataRef()->scope["~enclosing_scope"].push_back(scope); scope = newNode; + + // auto transChildren = transformChildren(children, skipChildren, scope, types); + // std::cout << functionName << " "; + // for (auto i : transChildren) + // std::cout << "||" << i->getDataRef()->toString() << "|| "; + // std::cout << "??||" << std::endl; + // newNode->addChildren(transChildren); + // return newNode; + std::cout << "finished function " << functionName << std::endl; } else if (name == "code_block") { newNode = new NodeTree(name, ASTData(code_block)); - newNode->getDataRef()->scope["~enclosing_scope"] = scope; + newNode->getDataRef()->scope["~enclosing_scope"].push_back(scope); scope = newNode; } else if (name == "typed_parameter") { //newNode = transform(children[1]); //Transform to get the identifier std::string parameterName = concatSymbolTree(children[1]); std::string typeString = concatSymbolTree(children[0]);//Get the type (left child) and set our new identifer to be that type newNode = new NodeTree("identifier", ASTData(identifier, Symbol(parameterName, true), typeFromString(typeString, scope))); - scope->getDataRef()->scope[parameterName] = newNode; - newNode->getDataRef()->scope["~enclosing_scope"] = scope; + scope->getDataRef()->scope[parameterName].push_back(newNode); + newNode->getDataRef()->scope["~enclosing_scope"].push_back(scope); return newNode; } else if (name == "boolean_expression" || name == "and_boolean_expression" || name == "bool_exp") { //If this is an actual part of an expression, not just a premoted term if (children.size() > 1) { + //We do children first so we can do appropriate scope searching with types (yay operator overloading!) + skipChildren.insert(1); + std::vector*> transformedChildren = transformChildren(children, skipChildren, scope, types); std::string functionCallString = concatSymbolTree(children[1]); - NodeTree* function = scopeLookup(scope, functionCallString); + NodeTree* function = scopeLookup(scope, functionCallString, mapNodesToTypes(transformedChildren)); if (function == NULL) { std::cout << "scope lookup error! Could not find " << functionCallString << " in boolean stuff " << std::endl; throw "LOOKUP ERROR: " + functionCallString; } newNode = new NodeTree(functionCallString, ASTData(function_call, function->getDataRef()->valueType)); newNode->addChild(function); // First child of function call is a link to the function - skipChildren.insert(1); + newNode->addChildren(transformedChildren); } else { //std::cout << children.size() << std::endl; if (children.size() == 0) return new NodeTree(); - return transform(children[0], scope); //Just a promoted term, so do child + return transform(children[0], scope, types); //Just a promoted term, so do child } //Here's the order of ops stuff } else if (name == "expression" || name == "shiftand" || name == "term" || name == "unarad" || name == "access_operation") { //unarad can ride through, it should always just be a promoted child //If this is an actual part of an expression, not just a premoted child if (children.size() > 2) { + NodeTree* lhs = transform(children[0], scope); //LHS does not inherit types + NodeTree* rhs; + if (name == "access_operation") + rhs = transform(children[2], lhs->getDataRef()->valueType->typeDefinition, types); //If an access operation, then the right side will be in the lhs's type's scope + else + rhs = transform(children[2], scope, types); + std::string functionCallName = concatSymbolTree(children[1]); //std::cout << "scope lookup from expression or similar" << std::endl; - NodeTree* function = scopeLookup(scope, functionCallName); + std::vector*> transformedChildren; transformedChildren.push_back(lhs); transformedChildren.push_back(rhs); + NodeTree* function = scopeLookup(scope, functionCallName, mapNodesToTypes(transformedChildren)); if (function == NULL) { std::cout << "scope lookup error! Could not find " << functionCallName << " in expression " << std::endl; throw "LOOKUP ERROR: " + functionCallName; } newNode = new NodeTree(functionCallName, ASTData(function_call, Symbol(functionCallName, true))); newNode->addChild(function); // First child of function call is a link to the function definition - NodeTree* lhs = transform(children[0], scope); - NodeTree* rhs;// = transform(children[2], scope); - if (name == "access_operation") - rhs = transform(children[2], lhs->getDataRef()->valueType->typeDefinition); //If an access operation, then the right side will be in the lhs's type's scope - else - rhs = transform(children[2], scope); + newNode->addChild(lhs); + newNode->addChild(rhs); if (name == "access_operation") std::cout << "Access Operation: " << lhs->getDataRef()->symbol.getName() << " : " << rhs->getDataRef()->symbol.getName() << std::endl; - - newNode->addChild(lhs); - newNode->addChild(rhs); std::cout << functionCallName << " - " << function->getName() << " has value type " << function->getDataRef()->valueType << " and rhs " << rhs->getDataRef()->valueType << std::endl; + //Set the value of this function call if (function->getDataRef()->valueType) newNode->getDataRef()->valueType = function->getDataRef()->valueType; else if (rhs->getDataRef()->valueType) @@ -160,21 +174,22 @@ NodeTree* ASTTransformation::transform(NodeTree* from, NodeTree return newNode; //skipChildren.insert(1); } else { - return transform(children[0], scope); //Just a promoted child, so do it instead + return transform(children[0], scope, types); //Just a promoted child, so do it instead } } else if (name == "factor") { //Do factor here, as it has all the weird unary operators - //If this is an actual part of an expression, not just a premoted child + //If this is an actual part of an expression, not just a premoted child //NO SUPPORT FOR CASTING YET if (children.size() == 2) { std::string funcName = concatSymbolTree(children[0]); NodeTree* param; if (funcName == "*" || funcName == "&" || funcName == "++" || funcName == "--" || funcName == "-" || funcName == "!" || funcName == "~") - param = transform(children[1], scope); + param = transform(children[1], scope, types); else - funcName = concatSymbolTree(children[1]), param = transform(children[0], scope); + funcName = concatSymbolTree(children[1]), param = transform(children[0], scope, types); //std::cout << "scope lookup from factor" << std::endl; - NodeTree* function = scopeLookup(scope, funcName); + std::vector*> transformedChildren; transformedChildren.push_back(param); + NodeTree* function = scopeLookup(scope, funcName, mapNodesToTypes(transformedChildren)); if (function == NULL) { std::cout << "scope lookup error! Could not find " << funcName << " in factor " << std::endl; throw "LOOKUP ERROR: " + funcName; @@ -189,7 +204,7 @@ NodeTree* ASTTransformation::transform(NodeTree* from, NodeTree return newNode; } else { - return transform(children[0], scope); //Just a promoted child, so do it instead + return transform(children[0], scope, types); //Just a promoted child, so do it instead } } else if (name == "statement") { newNode = new NodeTree(name, ASTData(statement)); @@ -205,21 +220,23 @@ NodeTree* ASTTransformation::transform(NodeTree* from, NodeTree newNode = new NodeTree(name, ASTData(assignment_statement)); std::string assignFuncName = concatSymbolTree(children[1]); if (assignFuncName == "=") { - newNode->addChild(transform(children[0], scope)); - newNode->addChild(transform(children[2], scope)); + newNode->addChild(transform(children[0], scope, types)); + newNode->addChild(transform(children[2], scope, types)); } else { //For assignments like += or *=, expand the syntatic sugar. - NodeTree* lhs = transform(children[0], scope); + NodeTree* lhs = transform(children[0], scope, types); + NodeTree* rhs = transform(children[2], scope, types); + std::vector*> transformedChildren; transformedChildren.push_back(lhs); transformedChildren.push_back(rhs); std::string functionName = assignFuncName.substr(0,1); NodeTree* childCall = new NodeTree(functionName, ASTData(function_call, Symbol(functionName, true))); - NodeTree* functionDef = scopeLookup(scope, functionName); + NodeTree* functionDef = scopeLookup(scope, functionName, mapNodesToTypes(transformedChildren)); if (functionDef == NULL) { std::cout << "scope lookup error! Could not find " << functionName << " in assignment_statement " << std::endl; throw "LOOKUP ERROR: " + functionName; } childCall->addChild(functionDef); //First child of function call is definition of the function childCall->addChild(lhs); - childCall->addChild(transform(children[2], scope)); + childCall->addChild(rhs); newNode->addChild(lhs); newNode->addChild(childCall); } @@ -233,8 +250,8 @@ NodeTree* ASTTransformation::transform(NodeTree* from, NodeTree std::string typeString = concatSymbolTree(children[0]);//Get the type (left child) and set our new identifer to be that type Type* identifierType = typeFromString(typeString, scope); NodeTree* newIdentifier = new NodeTree("identifier", ASTData(identifier, Symbol(newIdentifierStr, true), identifierType)); - scope->getDataRef()->scope[newIdentifierStr] = newIdentifier; - newNode->getDataRef()->scope["~enclosing_scope"] = scope; + scope->getDataRef()->scope[newIdentifierStr].push_back(newIdentifier); + newNode->getDataRef()->scope["~enclosing_scope"].push_back(scope); //Now we don't do this thing // if (identifierType->typeDefinition) { // //Is a custom type. Populate this declaration's scope with it's inner declarations @@ -257,29 +274,32 @@ NodeTree* ASTTransformation::transform(NodeTree* from, NodeTree } else if (name == "function_call") { std::string functionCallName = concatSymbolTree(children[0]); newNode = new NodeTree(functionCallName, ASTData(function_call, Symbol(functionCallName, true))); - std::cout << "scope lookup from function_call: " << functionCallName << std::endl; - for (auto i : children) - std::cout << i << " : " << i->getName() << " : " << i->getDataRef()->getName() << std::endl; - //NodeTree* function = scopeLookup(scope, functionCallName); - NodeTree* function = transform(children[0], scope);/* scopeLookup(scope, functionCallName);*/ - std::cout << "The thing: " << function << " : " << function->getName() << std::endl; - for (auto i : function->getChildren()) - std::cout << i->getName() << " "; - std::cout << std::endl; // if (function == NULL) { // std::cout << "scope lookup error! Could not find " << functionCallName << " in function_call " << std::endl; // throw "LOOKUP ERROR: " + functionCallName; // } + skipChildren.insert(0); + std::vector*> transformedChildren = transformChildren(children, skipChildren, scope, types); + std::cout << "scope lookup from function_call: " << functionCallName << std::endl; + for (auto i : children) + std::cout << i << " : " << i->getName() << " : " << i->getDataRef()->getName() << std::endl; + + NodeTree* function = transform(children[0], scope, mapNodesToTypes(transformedChildren)); + std::cout << "The thing: " << function << " : " << function->getName() << std::endl; + for (auto i : function->getChildren()) + std::cout << i->getName() << " "; + std::cout << std::endl; newNode->addChild(function); newNode->getDataRef()->valueType = function->getDataRef()->valueType; - skipChildren.insert(0); + newNode->addChildren(transformedChildren); + return newNode; } else if (name == "parameter") { - return transform(children[0], scope); //Don't need a parameter node, just the value + return transform(children[0], scope, types); //Don't need a parameter node, just the value } else if (name == "type") { std::string theConcat = concatSymbolTree(from); //We have no symbol, so this will concat our children newNode = new NodeTree(name, ASTData(value, Symbol(theConcat, true), typeFromString(theConcat, scope))); } else if (name == "number") { - return transform(children[0], scope); + return transform(children[0], scope, types); } else if (name == "integer") { newNode = new NodeTree(name, ASTData(value, Symbol(concatSymbolTree(from), true), new Type(integer))); } else if (name == "float") { @@ -294,20 +314,42 @@ NodeTree* ASTTransformation::transform(NodeTree* from, NodeTree return new NodeTree(); } - // In general, iterate through children and do them. Might not do this for all children. + //Do all children but the ones we skip for (int i = 0; i < children.size(); i++) { if (skipChildren.find(i) == skipChildren.end()) { - NodeTree* transChild = transform(children[i], scope); + NodeTree* transChild = transform(children[i], scope, types); if (transChild->getDataRef()->type) //Only add the children that have a real ASTData::ASTType, that is, legit ASTData. newNode->addChild(transChild); else delete transChild; } } - return newNode; } +//We use this functionality a lot at different places +std::vector*> ASTTransformation::transformChildren(std::vector*> children, std::set skipChildren, NodeTree* scope, std::vector types) { + std::vector*> transformedChildren; + // In general, iterate through children and do them. Might not do this for all children. + for (int i = 0; i < children.size(); i++) { + if (skipChildren.find(i) == skipChildren.end()) { + NodeTree* transChild = transform(children[i], scope, types); + if (transChild->getDataRef()->type) //Only add the children that have a real ASTData::ASTType, that is, legit ASTData. + transformedChildren.push_back(transChild); + else + delete transChild; + } + } + return transformedChildren; +} + +std::vector ASTTransformation::mapNodesToTypes(std::vector*> nodes) { + std::vector types; + for (auto i : nodes) + types.push_back(*(i->getDataRef()->valueType)); + return types; +} + std::string ASTTransformation::concatSymbolTree(NodeTree* root) { std::string concatString; std::string ourValue = root->getDataRef()->getValue(); @@ -320,33 +362,52 @@ std::string ASTTransformation::concatSymbolTree(NodeTree* root) { return concatString; } -NodeTree* ASTTransformation::scopeLookup(NodeTree* scope, std::string lookup) { - //First, if it is a struct or object, get it's base. - std::vector splitString = split(lookup, '.'); - if (splitString.size() > 1) { - std::string base = splitString[0]; - // NodeTree* baseDef = scopeLookup(scope, base); - // splitString.erase(splitString.begin()); //Get rid of the base in the split str - // //Now the base is the scope. - // return scopeLookup(baseDef, join(splitString, ".")); //So the joined version doesn't have the base. - return scopeLookup(scope, base); - } +NodeTree* ASTTransformation::scopeLookup(NodeTree* scope, std::string lookup, std::vector types) { //Search the map auto scopeMap = scope->getDataRef()->scope; auto elementIterator = scopeMap.find(lookup); + // if (elementIterator != scopeMap.end()) { - // std::cout << "lookup of " << lookup << " succeded in first scope!" << std::endl; - return elementIterator->second; + for (auto i = elementIterator->second.begin(); i != elementIterator->second.end(); i++) { + //Types and functions cannot have the same name, and types very apparently do not have parameter types, so check and short-circuit + if ((*i)->getDataRef()->type == type_def) + return *i; + //return *i; + std::vector*> children = (*i)->getChildren(); + if (types.size() != (children.size() > 0) ? children.size()-1 : 0) { + std::cout << "Type sizes do not match between two " << lookup << "(" << types.size() << "," << ((children.size() > 0) ? children.size()-1 : 0) << "), types are: "; + for (auto j : types) + std::cout << j.toString() << " "; + std::cout << std::endl; + continue; + } + bool typesMatch = true; + for (int j = 0; j < types.size(); j++) { + if (types[j] != *(children[j]->getDataRef()->valueType)) { + typesMatch = false; + std::cout << "Types do not match between two " << lookup << std::endl; + break; + } + } + if (typesMatch) + return *i; + } } - //std::cout << "lookup of " << lookup << " failed in first scope, checking for upper scope" << std::endl; + //if it doesn't exist, try the enclosing scope if it exists. auto enclosingIterator = scopeMap.find("~enclosing_scope"); if (enclosingIterator != scopeMap.end()) { // std::cout << "upper scope exists, searching it for " << lookup << std::endl; - return scopeLookup(enclosingIterator->second, lookup); + return scopeLookup(enclosingIterator->second[0], lookup, types); } //std::cout << "upper scope does not exist" << std::endl; - std::cout << "could not find " << lookup << std::endl; + std::cout << "could not find " << lookup << " in standard scope, checking for operator" << std::endl; + //Note that we don't check for types. At some point we should, as we don't know how to add objects/structs without overloaded operators, etc + elementIterator = languageLevelScope.find(lookup); + if (elementIterator != languageLevelScope.end()) { + std::cout << "found it at language level as operator." << std::endl; + return elementIterator->second[0]; + } return NULL; } diff --git a/src/CGenerator.cpp b/src/CGenerator.cpp index fd22588..5fda00a 100644 --- a/src/CGenerator.cpp +++ b/src/CGenerator.cpp @@ -48,33 +48,39 @@ std::string CGenerator::generate(NodeTree* from, NodeTree* enc output += generate(children[i], enclosingObject) + "\n"; //Declare everything in translation unit scope here. (allows stuff from other files, automatic forward declarations) for (auto i = data.scope.begin(); i != data.scope.end(); i++) { - NodeTree* declaration = i->second; - std::vector*> decChildren = declaration->getChildren(); - ASTData declarationData = i->second->getData(); - switch(declarationData.type) { - case identifier: - output += ValueTypeToCType(declarationData.valueType) + " " + declarationData.symbol.getName() + "; /*identifier*/\n"; - break; - case function: - if (decChildren.size() == 0) { //Not a real function, must be a built in passthrough { - output += "/* built in function: " + declarationData.toString() + " */\n"; + for (auto overloadedMembers : i->second) { + NodeTree* declaration = overloadedMembers; + std::vector*> decChildren = declaration->getChildren(); + ASTData declarationData = declaration->getData(); + switch(declarationData.type) { + case identifier: + output += ValueTypeToCType(declarationData.valueType) + " " + declarationData.symbol.getName() + "; /*identifier*/\n"; + break; + case function: + { + if (decChildren.size() == 0) { //Not a real function, must be a built in passthrough { + output += "/* built in function: " + declarationData.toString() + " */\n"; + break; + } + output += "\n" + ValueTypeToCType(declarationData.valueType) + " "; + std::string nameDecoration, parameters; + for (int j = 0; j < decChildren.size()-1; j++) { + if (j > 0) + parameters += ", "; + parameters += ValueTypeToCType(decChildren[j]->getData().valueType) + " " + generate(decChildren[j], enclosingObject); + nameDecoration += ValueTypeToCTypeDecoration(decChildren[j]->getData().valueType) + "_"; + } + output += nameDecoration + declarationData.symbol.getName() + "(" + parameters + "); /*func*/\n"; break; } - output += "\n" + ValueTypeToCType(declarationData.valueType) + " " + declarationData.symbol.getName() + "("; - for (int j = 0; j < decChildren.size()-1; j++) { - if (j > 0) - output += ", "; - output += ValueTypeToCType(decChildren[j]->getData().valueType) + " " + generate(decChildren[j], enclosingObject); - } - output += "); /*func*/\n"; - break; - case type_def: - //type - output += "/*typedef " + declarationData.symbol.getName() + " */\n"; - break; - default: - //std::cout << "Declaration? named " << declaration->getName() << " of unknown type " << ASTData::ASTTypeToString(declarationData.type) << " in translation unit scope" << std::endl; - output += "/*unknown declaration named " + declaration->getName() + "*/\n"; + case type_def: + //type + output += "/*typedef " + declarationData.symbol.getName() + " */\n"; + break; + default: + //std::cout << "Declaration? named " << declaration->getName() << " of unknown type " << ASTData::ASTTypeToString(declarationData.type) << " in translation unit scope" << std::endl; + output += "/*unknown declaration named " + declaration->getName() + "*/\n"; + } } } //Do here because we need the newlines @@ -92,9 +98,12 @@ std::string CGenerator::generate(NodeTree* from, NodeTree* enc case identifier: { //If we're in an object method, and our enclosing scope is that object, we're a member of the object and should use the self reference. - if (enclosingObject && enclosingObject->getDataRef()->scope.find(data.symbol.getName()) != enclosingObject->getDataRef()->scope.end()) { - return "self->" + data.symbol.getName(); - } + std::string preName; + if (enclosingObject && enclosingObject->getDataRef()->scope.find(data.symbol.getName()) != enclosingObject->getDataRef()->scope.end()) + preName += "self->"; + if (false) + for (int j = 0; j < children.size()-1; j++) + preName += ValueTypeToCType(children[j]->getData().valueType) + "_"; return data.symbol.getName(); } case type_def: @@ -114,21 +123,25 @@ std::string CGenerator::generate(NodeTree* from, NodeTree* enc return objectString + postString; //Functions come after the declaration of the struct } case function: - output += "\n" + ValueTypeToCType(data.valueType) + " " + data.symbol.getName() + "("; - for (int i = 0; i < children.size()-1; i++) { - if (i > 0) - output += ", "; - output += ValueTypeToCType(children[i]->getData().valueType) + " " + generate(children[i], enclosingObject); + { + output += "\n" + ValueTypeToCType(data.valueType) + " "; + std::string nameDecoration, parameters; + for (int j = 0; j < children.size()-1; j++) { + if (j > 0) + parameters += ", "; + parameters += ValueTypeToCType(children[j]->getData().valueType) + " " + generate(children[j], enclosingObject); + nameDecoration += ValueTypeToCTypeDecoration(children[j]->getData().valueType) + "_"; } - output+= ")\n" + generate(children[children.size()-1], enclosingObject); + output += nameDecoration + data.symbol.getName() + "(" + parameters + ")\n" + generate(children[children.size()-1], enclosingObject); return output; + } case code_block: output += "{\n"; tabLevel++; for (int i = 0; i < children.size(); i++) { - std::cout << "Line " << i << std::endl; + //std::cout << "Line " << i << std::endl; std::string line = generate(children[i], enclosingObject); - std::cout << line << std::endl; + //std::cout << line << std::endl; output += line; } tabLevel--; @@ -200,19 +213,19 @@ std::string CGenerator::generate(NodeTree* from, NodeTree* enc NodeTree* possibleObjectType = children[1]->getDataRef()->valueType->typeDefinition; //If is an object method, generate it like one. Needs extension/modification for inheritence if (possibleObjectType && possibleObjectType->getDataRef()->scope.find(functionName) != possibleObjectType->getDataRef()->scope.end()) { - return possibleObjectType->getDataRef()->symbol.getName() +"__" + functionName + "(" + (name == "." ? "&" : "") + generate(children[1], enclosingObject) + ","; +HERE return possibleObjectType->getDataRef()->symbol.getName() +"__" + functionName + "(" + (name == "." ? "&" : "") + generate(children[1], enclosingObject) + ","; //The comma lets the upper function call know we already started the param list - //Note that we got here from a function call. We just pass up this special case and let them finish with the perentheses - std::cout << "Is in scope or not type!" << std::endl; + //Note that we got here from a function call. We just pass up this special case and let them finish with the perentheses } else { std::cout << "Is not in scope or not type" << std::endl; - return "((" + generate(children[1], enclosingObject) + ")" + name + functionName + ")"; +WTHISTHIS return "((" + generate(children[1], enclosingObject) + ")" + name + functionName + ")"; } } else { - return "((" + generate(children[1], enclosingObject) + ")" + name + generate(children[2], enclosingObject) + ")"; +ALSOWTH return "((" + generate(children[1], enclosingObject) + ")" + name + generate(children[2], enclosingObject) + ")"; } } else { - output += name + "("; + //It's a normal function call, not a special one or a method or anything +HERE output += name + "("; } } else { std::string functionCallSource = generate(children[0], enclosingObject); @@ -224,7 +237,8 @@ std::string CGenerator::generate(NodeTree* from, NodeTree* enc for (int i = 1; i < children.size(); i++) //children[0] is the declaration if (i < children.size()-1) output += generate(children[i], enclosingObject) + ", "; - else output += generate(children[i], enclosingObject); + else + output += generate(children[i], enclosingObject); output += ") "; return output; } @@ -288,3 +302,39 @@ std::string CGenerator::ValueTypeToCType(Type *type) { return_type += "*"; return return_type; } + +std::string CGenerator::ValueTypeToCTypeDecoration(Type *type) { + std::string return_type; + switch (type->baseType) { + case none: + if (type->typeDefinition) + return_type = type->typeDefinition->getDataRef()->symbol.getName(); + else + return_type = "none"; + break; + case void_type: + return_type = "void"; + break; + case boolean: + return_type = "bool"; + break; + case integer: + return_type = "int"; + break; + case floating: + return_type = "float"; + break; + case double_percision: + return_type = "double"; + break; + case character: + return_type = "char"; + break; + default: + return_type = "unknown_ValueType"; + break; + } + for (int i = 0; i < type->indirection; i++) + return_type += "_P__"; + return return_type; +} diff --git a/src/Importer.cpp b/src/Importer.cpp index d158470..88c367f 100644 --- a/src/Importer.cpp +++ b/src/Importer.cpp @@ -106,6 +106,7 @@ NodeTree* Importer::import(std::string fileName) { } outFileTransformed.close(); + //Call with ourself to allow the transformation to call us to import files that it needs NodeTree* AST = ASTTransformation(this).transform(parseTree); if (AST) { diff --git a/src/Type.cpp b/src/Type.cpp index 613324d..2e40328 100644 --- a/src/Type.cpp +++ b/src/Type.cpp @@ -38,6 +38,14 @@ Type::Type(ValueType typeIn, NodeTree* typeDefinitionIn, int indirectio Type::~Type() { } +const bool Type::operator==(const Type &other) const { + return( baseType == other.baseType && indirection == other.indirection && typeDefinition == other.typeDefinition); +} + +const bool Type::operator!=(const Type &other) const { + return(!this->operator==(other)); +} + std::string Type::toString() { std::string typeString; switch (baseType) { From 51e04498d6a3b2ee15657844bff111f4de3fe87c Mon Sep 17 00:00:00 2001 From: Nathan Braswell Date: Fri, 7 Mar 2014 14:17:07 -0500 Subject: [PATCH 22/25] Function overloading works\! --- src/CGenerator.cpp | 43 ++++++++++++++++++++++++++++++------------- 1 file changed, 30 insertions(+), 13 deletions(-) diff --git a/src/CGenerator.cpp b/src/CGenerator.cpp index 5fda00a..78b8c7d 100644 --- a/src/CGenerator.cpp +++ b/src/CGenerator.cpp @@ -68,9 +68,9 @@ std::string CGenerator::generate(NodeTree* from, NodeTree* enc if (j > 0) parameters += ", "; parameters += ValueTypeToCType(decChildren[j]->getData().valueType) + " " + generate(decChildren[j], enclosingObject); - nameDecoration += ValueTypeToCTypeDecoration(decChildren[j]->getData().valueType) + "_"; + nameDecoration += "_" + ValueTypeToCTypeDecoration(decChildren[j]->getData().valueType); } - output += nameDecoration + declarationData.symbol.getName() + "(" + parameters + "); /*func*/\n"; + output += declarationData.symbol.getName() + nameDecoration + "(" + parameters + "); /*func*/\n"; break; } case type_def: @@ -130,9 +130,9 @@ std::string CGenerator::generate(NodeTree* from, NodeTree* enc if (j > 0) parameters += ", "; parameters += ValueTypeToCType(children[j]->getData().valueType) + " " + generate(children[j], enclosingObject); - nameDecoration += ValueTypeToCTypeDecoration(children[j]->getData().valueType) + "_"; + nameDecoration += "_" + ValueTypeToCTypeDecoration(children[j]->getData().valueType); } - output += nameDecoration + data.symbol.getName() + "(" + parameters + ")\n" + generate(children[children.size()-1], enclosingObject); + output += data.symbol.getName() + nameDecoration + "(" + parameters + ")\n" + generate(children[children.size()-1], enclosingObject); return output; } case code_block: @@ -213,21 +213,33 @@ std::string CGenerator::generate(NodeTree* from, NodeTree* enc NodeTree* possibleObjectType = children[1]->getDataRef()->valueType->typeDefinition; //If is an object method, generate it like one. Needs extension/modification for inheritence if (possibleObjectType && possibleObjectType->getDataRef()->scope.find(functionName) != possibleObjectType->getDataRef()->scope.end()) { -HERE return possibleObjectType->getDataRef()->symbol.getName() +"__" + functionName + "(" + (name == "." ? "&" : "") + generate(children[1], enclosingObject) + ","; + std::string nameDecoration; + std::vector*> functionDefChildren = children[2]->getChildren(); //The function def is the rhs of the access operation + std::cout << "Decorating (in access-should be object) " << name << " " << functionDefChildren.size() << std::endl; + for (int i = 0; i < (functionDefChildren.size() > 0 ? functionDefChildren.size()-1 : 0); i++) + nameDecoration += "_" + ValueTypeToCTypeDecoration(functionDefChildren[i]->getData().valueType); +/*HERE*/ return possibleObjectType->getDataRef()->symbol.getName() +"__" + functionName + nameDecoration + "(" + (name == "." ? "&" : "") + generate(children[1], enclosingObject) + ","; //The comma lets the upper function call know we already started the param list //Note that we got here from a function call. We just pass up this special case and let them finish with the perentheses } else { std::cout << "Is not in scope or not type" << std::endl; -WTHISTHIS return "((" + generate(children[1], enclosingObject) + ")" + name + functionName + ")"; + return "((" + generate(children[1], enclosingObject) + ")" + name + functionName + ")"; } } else { -ALSOWTH return "((" + generate(children[1], enclosingObject) + ")" + name + generate(children[2], enclosingObject) + ")"; + return "((" + generate(children[1], enclosingObject) + ")" + name + generate(children[2], enclosingObject) + ")"; } } else { - //It's a normal function call, not a special one or a method or anything -HERE output += name + "("; + //It's a normal function call, not a special one or a method or anything. Name decorate. + std::vector*> functionDefChildren = children[0]->getChildren(); + std::cout << "Decorating (none-special)" << name << " " << functionDefChildren.size() << std::endl; + std::string nameDecoration; + for (int i = 0; i < (functionDefChildren.size() > 0 ? functionDefChildren.size()-1 : 0); i++) + nameDecoration += "_" + ValueTypeToCTypeDecoration(functionDefChildren[i]->getData().valueType); +/*HERE*/ output += name + nameDecoration + "("; } } else { + //This part handles cases where our definition isn't the function definition (that is, it is probabally the return from another function) + //It's probabally the result of an access function call (. or ->) to access an object method. std::string functionCallSource = generate(children[0], enclosingObject); if (functionCallSource[functionCallSource.size()-1] == ',') //If it's a member method, it's already started the parameter list. output += children.size() > 1 ? functionCallSource : functionCallSource.substr(0, functionCallSource.size()-1); @@ -260,10 +272,14 @@ std::string CGenerator::generateObjectMethod(NodeTree* enclosingObject, Type enclosingObjectType = *(enclosingObject->getDataRef()->valueType); //Copy a new type so we can turn it into a pointer if we need to enclosingObjectType.indirection++; std::vector*> children = from->getChildren(); - output += "\n" + ValueTypeToCType(data.valueType) + " " + enclosingObject->getDataRef()->symbol.getName() +"__" + data.symbol.getName() + "(" + ValueTypeToCType(&enclosingObjectType) + " self"; - for (int i = 0; i < children.size()-1; i++) - output += ", " + ValueTypeToCType(children[i]->getData().valueType) + " " + generate(children[i]); - output+= ")\n" + generate(children[children.size()-1], enclosingObject); //Pass in the object so we can + std::string nameDecoration, parameters; + for (int i = 0; i < children.size()-1; i++) { + parameters += ", " + ValueTypeToCType(children[i]->getData().valueType) + " " + generate(children[i]); + nameDecoration += "_" + ValueTypeToCTypeDecoration(children[i]->getData().valueType); + } + output += "\n" + ValueTypeToCType(data.valueType) + " " + enclosingObject->getDataRef()->symbol.getName() +"__" + + data.symbol.getName() + nameDecoration + "(" + ValueTypeToCType(&enclosingObjectType) + " self" + parameters + ")\n" + + generate(children[children.size()-1], enclosingObject); //Pass in the object so we can return output; } @@ -338,3 +354,4 @@ std::string CGenerator::ValueTypeToCTypeDecoration(Type *type) { return_type += "_P__"; return return_type; } + From 6eee808f966773044c5d4c8fa18e586d92c42bd4 Mon Sep 17 00:00:00 2001 From: Nathan Braswell Date: Sat, 8 Mar 2014 16:13:09 -0500 Subject: [PATCH 23/25] Fixed a lot of stuff to do with objects and method calls. --- include/util.h | 7 +++++++ krakenGrammer.kgm | 2 +- src/CGenerator.cpp | 10 ++++++++-- src/util.cpp | 1 + 4 files changed, 17 insertions(+), 3 deletions(-) diff --git a/include/util.h b/include/util.h index 6ca6b1e..2be7301 100644 --- a/include/util.h +++ b/include/util.h @@ -16,5 +16,12 @@ std::string strSlice(std::string str, int begin, int end); int findPerenEnd(std::string str, int i); std::vector split(const std::string &str, char delim); std::string join(const std::vector &strVec, std::string joinStr); +template +bool contains(std::vector vec, T item) { + for (auto i : vec) + if (i == item) + return true; + return false; +} #endif diff --git a/krakenGrammer.kgm b/krakenGrammer.kgm index 610816d..f396a63 100644 --- a/krakenGrammer.kgm +++ b/krakenGrammer.kgm @@ -69,7 +69,7 @@ term = term WS forward_slash WS factor | term WS "\*" WS factor | term WS "%" WS factor = "\+\+" WS unarad | unarad WS "\+\+" | "--" WS unarad | unarad WS "--" | "\+" WS unarad | "-" WS unarad | "!" WS unarad | "~" WS unarad | "\(" WS type WS "\)" WS unarad | "\*" WS unarad | "&" WS unarad | unarad ; unarad = number | identifier | function_call | bool | string | character | "\(" WS boolean_expression WS "\)" | access_operation ; number = integer | float | double ; -access_operation = expression "." identifier | expression "->" identifier ; +access_operation = unarad "." identifier | unarad "->" identifier ; assignment_statement = factor WS "=" WS boolean_expression | factor WS "\+=" WS boolean_expression | factor WS "-=" WS boolean_expression | factor WS "\*=" WS boolean_expression | factor WS "/=" WS boolean_expression ; declaration_statement = type WS identifier WS "=" WS boolean_expression | type WS identifier ; diff --git a/src/CGenerator.cpp b/src/CGenerator.cpp index 78b8c7d..962e664 100644 --- a/src/CGenerator.cpp +++ b/src/CGenerator.cpp @@ -104,7 +104,7 @@ std::string CGenerator::generate(NodeTree* from, NodeTree* enc if (false) for (int j = 0; j < children.size()-1; j++) preName += ValueTypeToCType(children[j]->getData().valueType) + "_"; - return data.symbol.getName(); + return preName + data.symbol.getName(); } case type_def: if (children.size() == 0) { @@ -235,7 +235,13 @@ std::string CGenerator::generate(NodeTree* from, NodeTree* enc std::string nameDecoration; for (int i = 0; i < (functionDefChildren.size() > 0 ? functionDefChildren.size()-1 : 0); i++) nameDecoration += "_" + ValueTypeToCTypeDecoration(functionDefChildren[i]->getData().valueType); + //Check to see if we're inside of an object and this is a method call + bool isSelfObjectMethod = enclosingObject && contains(enclosingObject->getChildren(), children[0]); + if (isSelfObjectMethod) + output += enclosingObject->getDataRef()->symbol.getName() +"__"; /*HERE*/ output += name + nameDecoration + "("; + if (isSelfObjectMethod) + output += children.size() > 1 ? "self," : "self"; } } else { //This part handles cases where our definition isn't the function definition (that is, it is probabally the return from another function) @@ -277,7 +283,7 @@ std::string CGenerator::generateObjectMethod(NodeTree* enclosingObject, parameters += ", " + ValueTypeToCType(children[i]->getData().valueType) + " " + generate(children[i]); nameDecoration += "_" + ValueTypeToCTypeDecoration(children[i]->getData().valueType); } - output += "\n" + ValueTypeToCType(data.valueType) + " " + enclosingObject->getDataRef()->symbol.getName() +"__" + output += "\n" + ValueTypeToCType(data.valueType) + " " + enclosingObject->getDataRef()->symbol.getName() +"__" + data.symbol.getName() + nameDecoration + "(" + ValueTypeToCType(&enclosingObjectType) + " self" + parameters + ")\n" + generate(children[children.size()-1], enclosingObject); //Pass in the object so we can return output; diff --git a/src/util.cpp b/src/util.cpp index cb1fa4c..a3c7802 100644 --- a/src/util.cpp +++ b/src/util.cpp @@ -71,3 +71,4 @@ std::string join(const std::vector &strVec, std::string joinStr) { return joinedStr; } + From 663b1246802c3931aa0951b6402ee8736882280e Mon Sep 17 00:00:00 2001 From: Nathan Braswell Date: Sun, 9 Mar 2014 03:13:08 -0400 Subject: [PATCH 24/25] NOW it should be fixed. --- src/CGenerator.cpp | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/src/CGenerator.cpp b/src/CGenerator.cpp index 962e664..641ba4d 100644 --- a/src/CGenerator.cpp +++ b/src/CGenerator.cpp @@ -226,7 +226,8 @@ std::string CGenerator::generate(NodeTree* from, NodeTree* enc return "((" + generate(children[1], enclosingObject) + ")" + name + functionName + ")"; } } else { - return "((" + generate(children[1], enclosingObject) + ")" + name + generate(children[2], enclosingObject) + ")"; + //return "((" + generate(children[1], enclosingObject) + ")" + name + generate(children[2], enclosingObject) + ")"; + return "((" + generate(children[1], enclosingObject) + ")" + name + generate(children[2]) + ")"; } } else { //It's a normal function call, not a special one or a method or anything. Name decorate. From 3728a849de777c8ca62dacaf45959f783e71b940 Mon Sep 17 00:00:00 2001 From: Nathan Braswell Date: Fri, 14 Mar 2014 15:55:45 -0400 Subject: [PATCH 25/25] Work in progress commit. Working on method operator overloading, fixed c-style block comments. --- include/ASTTransformation.h | 1 + include/CGenerator.h | 1 + include/util.h | 12 +++++++ krakenGrammer.kgm | 8 +++-- src/ASTTransformation.cpp | 64 +++++++++++++++++++++++++++++++------ src/CGenerator.cpp | 58 +++++++++++++++++++++++++++++---- 6 files changed, 125 insertions(+), 19 deletions(-) diff --git a/include/ASTTransformation.h b/include/ASTTransformation.h index f65e95f..fccf125 100644 --- a/include/ASTTransformation.h +++ b/include/ASTTransformation.h @@ -20,6 +20,7 @@ class ASTTransformation: public NodeTransformation { std::vector*> transformChildren(std::vector*> children, std::set skipChildren, NodeTree* scope, std::vector types); std::vector mapNodesToTypes(std::vector*> nodes); std::string concatSymbolTree(NodeTree* root); + NodeTree* scopeLookup(NodeTree* scope, std::string lookup, std::vector*> nodes); NodeTree* scopeLookup(NodeTree* scope, std::string lookup, std::vector types = std::vector()); Type* typeFromString(std::string type, NodeTree* scope); private: diff --git a/include/CGenerator.h b/include/CGenerator.h index 9df6248..fe8e8b8 100644 --- a/include/CGenerator.h +++ b/include/CGenerator.h @@ -20,6 +20,7 @@ class CGenerator { std::string generate(NodeTree* from, NodeTree* enclosingObject = NULL); static std::string ValueTypeToCType(Type *type); static std::string ValueTypeToCTypeDecoration(Type *type); + static std::string CifyFunctionName(std::string name); std::string generateObjectMethod(NodeTree* enclosingObject, NodeTree* from); std::string generatorString; diff --git a/include/util.h b/include/util.h index 2be7301..7f17f51 100644 --- a/include/util.h +++ b/include/util.h @@ -24,4 +24,16 @@ bool contains(std::vector vec, T item) { return false; } +template +std::vector slice(std::vector vec, int begin, int end) { + std::vector toReturn; + if (begin < 0) + begin += vec.size()+1; + if (end < 0) + end += vec.size()+1; + for (int i = begin; i < end; i++) + toReturn.push_back(vec[i]); + return toReturn; +} + #endif diff --git a/krakenGrammer.kgm b/krakenGrammer.kgm index f396a63..9438a5a 100644 --- a/krakenGrammer.kgm +++ b/krakenGrammer.kgm @@ -29,7 +29,9 @@ triple_quoted_string = "\"\"\"((\"\"(`|1|2|3|4|5|6|7|8|9|0|-|=| |q|w|e|r|t|y|u|i identifier = alpha | alpha alphanumeric ; -function = type WS identifier WS "\(" WS opt_typed_parameter_list WS "\)" WS code_block ; +overloadable_operator = "\+" | "-" | "\*" | "/" | "%" | "^" | "&" | "\|" | "~" | "\!" | "," | "=" | "\+\+" | "--" | "<<" | ">>" | "==" | "!=" | "&&" | "\|\|" | "\+=" | "-=" | "/=" | "%=" | "^=" | "&=" | "\|=" | "\*=" | "<<=" | ">>=" | "->" ; +func_identifier = identifier | identifier overloadable_operator ; +function = type WS func_identifier WS "\(" WS opt_typed_parameter_list WS "\)" WS code_block ; opt_typed_parameter_list = typed_parameter_list | ; typed_parameter_list = typed_parameter_list WS "," WS typed_parameter | typed_parameter ; @@ -89,5 +91,5 @@ string = triple_quoted_string | "\"(`|1|2|3|4|5|6|7|8|9|0|-|=| |q|w|e|r|t|y|u|i| |z|x|c|v|b|n|m|,|.|/|~|!|@|#|$|%|^|&|\*|\(|\)|_|\+|Q|W|E|R|T|Y|U|I|O|P|{|}|\||A|S|D|F|G|H|J|K|L|:|Z|X|C|V|B|N|M|<|>|\?| )*\"" ; comment = "//(`|1|2|3|4|5|6|7|8|9|0|-|=| |q|w|e|r|t|y|u|i|o|p|[|]|\\|a|s|d|f|g|h|j|k|l|;|'|z|x|c|v|b|n|m|,|.|/|~|!|@|#|$|%|^|&|\*|\(|\)|_|\+|Q|W|E|R|T|Y|U|I|O|P|{|}|\||A|S|D|F|G|H|J|K|L|:|\"|Z|X|C|V|B|N|M|<|>|\?| )* -" | "/\*(`|1|2|3|4|5|6|7|8|9|0|-|=| |q|w|e|r|t|y|u|i|o|p|[|]|\\|a|s|d|f|g|h|j|k|l|;|'| -|z|x|c|v|b|n|m|,|.|/|~|!|@|#|$|%|^|&|\*|\(|\)|_|\+|Q|W|E|R|T|Y|U|I|O|P|{|}|\||A|S|D|F|G|H|J|K|L|:|\"|Z|X|C|V|B|N|M|<|>|\?| )*\*/" ; +" | "(/\*+((`|1|2|3|4|5|6|7|8|9|0|-|=| |q|w|e|r|t|y|u|i|o|p|[|]|\\|a|s|d|f|g|h|j|k|l|;|'| +|z|x|c|v|b|n|m|,|.|~|!|@|#|$|%|^|&|\(|\)|_|\+|Q|W|E|R|T|Y|U|I|O|P|{|}|\||A|S|D|F|G|H|J|K|L|:|\"|Z|X|C|V|B|N|M|<|>|\?| )/*\**)+\*/)|(/\*\*/)" ; diff --git a/src/ASTTransformation.cpp b/src/ASTTransformation.cpp index ec74e2a..c91572a 100644 --- a/src/ASTTransformation.cpp +++ b/src/ASTTransformation.cpp @@ -122,7 +122,7 @@ NodeTree* ASTTransformation::transform(NodeTree* from, NodeTree skipChildren.insert(1); std::vector*> transformedChildren = transformChildren(children, skipChildren, scope, types); std::string functionCallString = concatSymbolTree(children[1]); - NodeTree* function = scopeLookup(scope, functionCallString, mapNodesToTypes(transformedChildren)); + NodeTree* function = scopeLookup(scope, functionCallString, transformedChildren); if (function == NULL) { std::cout << "scope lookup error! Could not find " << functionCallString << " in boolean stuff " << std::endl; throw "LOOKUP ERROR: " + functionCallString; @@ -150,7 +150,7 @@ NodeTree* ASTTransformation::transform(NodeTree* from, NodeTree std::string functionCallName = concatSymbolTree(children[1]); //std::cout << "scope lookup from expression or similar" << std::endl; std::vector*> transformedChildren; transformedChildren.push_back(lhs); transformedChildren.push_back(rhs); - NodeTree* function = scopeLookup(scope, functionCallName, mapNodesToTypes(transformedChildren)); + NodeTree* function = scopeLookup(scope, functionCallName, transformedChildren); if (function == NULL) { std::cout << "scope lookup error! Could not find " << functionCallName << " in expression " << std::endl; throw "LOOKUP ERROR: " + functionCallName; @@ -189,7 +189,7 @@ NodeTree* ASTTransformation::transform(NodeTree* from, NodeTree //std::cout << "scope lookup from factor" << std::endl; std::vector*> transformedChildren; transformedChildren.push_back(param); - NodeTree* function = scopeLookup(scope, funcName, mapNodesToTypes(transformedChildren)); + NodeTree* function = scopeLookup(scope, funcName, transformedChildren); if (function == NULL) { std::cout << "scope lookup error! Could not find " << funcName << " in factor " << std::endl; throw "LOOKUP ERROR: " + funcName; @@ -229,7 +229,7 @@ NodeTree* ASTTransformation::transform(NodeTree* from, NodeTree std::vector*> transformedChildren; transformedChildren.push_back(lhs); transformedChildren.push_back(rhs); std::string functionName = assignFuncName.substr(0,1); NodeTree* childCall = new NodeTree(functionName, ASTData(function_call, Symbol(functionName, true))); - NodeTree* functionDef = scopeLookup(scope, functionName, mapNodesToTypes(transformedChildren)); + NodeTree* functionDef = scopeLookup(scope, functionName, transformedChildren); if (functionDef == NULL) { std::cout << "scope lookup error! Could not find " << functionName << " in assignment_statement " << std::endl; throw "LOOKUP ERROR: " + functionName; @@ -362,10 +362,53 @@ std::string ASTTransformation::concatSymbolTree(NodeTree* root) { return concatString; } +//Overloaded with the actual children to allow us to handle operator methods +NodeTree* ASTTransformation::scopeLookup(NodeTree* scope, std::string lookup, std::vector*> nodes) { + // + auto LLElementIterator = languageLevelScope.find(lookup); + if (LLElementIterator != languageLevelScope.end()) { + std::cout << "Checking for early method level operator overload" << std::endl; + std::string lookupOp = "operator" + lookup; + for (auto i : nodes) + std::cout << i->getDataRef()->toString() << " "; + std::cout << std::endl; + NodeTree* operatorMethod = NULL; + if (nodes[0]->getDataRef()->valueType && nodes[0]->getDataRef()->valueType->typeDefinition) + operatorMethod = scopeLookup(nodes[0]->getDataRef()->valueType->typeDefinition, lookupOp, mapNodesToTypes(slice(nodes,1,-1))); + if (operatorMethod) { + //Ok, so we construct + std::cout << "Early method level operator was found" << std::endl; + //return operatorMethod; + newNode = new NodeTree(functionCallName, ASTData(function_call, Symbol(functionCallName, true))); + newNode->addChild(function); // First child of function call is a link to the function definition + newNode->addChild(lhs); + newNode->addChild(rhs); + + + //Set the value of this function call + if (function->getDataRef()->valueType) + newNode->getDataRef()->valueType = function->getDataRef()->valueType; + else if (rhs->getDataRef()->valueType) + newNode->getDataRef()->valueType = rhs->getDataRef()->valueType; + else + newNode->getDataRef()->valueType = NULL; + } + std::cout << "Early method level operator was NOT found" << std::endl; + } + return scopeLookup(scope, lookup, mapNodesToTypes(nodes)); +} + NodeTree* ASTTransformation::scopeLookup(NodeTree* scope, std::string lookup, std::vector types) { + //We first search the languageLevelScope to see if it's an operator. If so, we modifiy the lookup with a preceding "operator" + auto LLElementIterator = languageLevelScope.find(lookup); + if (LLElementIterator != languageLevelScope.end()) + lookup = "operator" + lookup; //Search the map auto scopeMap = scope->getDataRef()->scope; auto elementIterator = scopeMap.find(lookup); + for (auto i : scopeMap) + std::cout << i.first << " "; + std::cout << std::endl; // if (elementIterator != scopeMap.end()) { for (auto i = elementIterator->second.begin(); i != elementIterator->second.end(); i++) { @@ -374,7 +417,7 @@ NodeTree* ASTTransformation::scopeLookup(NodeTree* scope, std: return *i; //return *i; std::vector*> children = (*i)->getChildren(); - if (types.size() != (children.size() > 0) ? children.size()-1 : 0) { + if (types.size() != ((children.size() > 0) ? children.size()-1 : 0)) { std::cout << "Type sizes do not match between two " << lookup << "(" << types.size() << "," << ((children.size() > 0) ? children.size()-1 : 0) << "), types are: "; for (auto j : types) std::cout << j.toString() << " "; @@ -398,16 +441,19 @@ NodeTree* ASTTransformation::scopeLookup(NodeTree* scope, std: auto enclosingIterator = scopeMap.find("~enclosing_scope"); if (enclosingIterator != scopeMap.end()) { // std::cout << "upper scope exists, searching it for " << lookup << std::endl; - return scopeLookup(enclosingIterator->second[0], lookup, types); + NodeTree* upperResult = scopeLookup(enclosingIterator->second[0], lookup, types); + if (upperResult) + return upperResult; } //std::cout << "upper scope does not exist" << std::endl; std::cout << "could not find " << lookup << " in standard scope, checking for operator" << std::endl; //Note that we don't check for types. At some point we should, as we don't know how to add objects/structs without overloaded operators, etc - elementIterator = languageLevelScope.find(lookup); - if (elementIterator != languageLevelScope.end()) { + //Also, we've already searched for the element because this is also how we keep track of operator overloading + if (LLElementIterator != languageLevelScope.end()) { std::cout << "found it at language level as operator." << std::endl; - return elementIterator->second[0]; + return LLElementIterator->second[0]; } + std::cout << "Did not find, returning NULL" << std::endl; return NULL; } diff --git a/src/CGenerator.cpp b/src/CGenerator.cpp index 641ba4d..c6c7723 100644 --- a/src/CGenerator.cpp +++ b/src/CGenerator.cpp @@ -70,7 +70,7 @@ std::string CGenerator::generate(NodeTree* from, NodeTree* enc parameters += ValueTypeToCType(decChildren[j]->getData().valueType) + " " + generate(decChildren[j], enclosingObject); nameDecoration += "_" + ValueTypeToCTypeDecoration(decChildren[j]->getData().valueType); } - output += declarationData.symbol.getName() + nameDecoration + "(" + parameters + "); /*func*/\n"; + output += CifyFunctionName(declarationData.symbol.getName()) + nameDecoration + "(" + parameters + "); /*func*/\n"; break; } case type_def: @@ -104,7 +104,7 @@ std::string CGenerator::generate(NodeTree* from, NodeTree* enc if (false) for (int j = 0; j < children.size()-1; j++) preName += ValueTypeToCType(children[j]->getData().valueType) + "_"; - return preName + data.symbol.getName(); + return preName + CifyFunctionName(data.symbol.getName()); //Cifying does nothing if not an operator overload } case type_def: if (children.size() == 0) { @@ -132,7 +132,7 @@ std::string CGenerator::generate(NodeTree* from, NodeTree* enc parameters += ValueTypeToCType(children[j]->getData().valueType) + " " + generate(children[j], enclosingObject); nameDecoration += "_" + ValueTypeToCTypeDecoration(children[j]->getData().valueType); } - output += data.symbol.getName() + nameDecoration + "(" + parameters + ")\n" + generate(children[children.size()-1], enclosingObject); + output += CifyFunctionName(data.symbol.getName()) + nameDecoration + "(" + parameters + ")\n" + generate(children[children.size()-1], enclosingObject); return output; } case code_block: @@ -218,7 +218,7 @@ std::string CGenerator::generate(NodeTree* from, NodeTree* enc std::cout << "Decorating (in access-should be object) " << name << " " << functionDefChildren.size() << std::endl; for (int i = 0; i < (functionDefChildren.size() > 0 ? functionDefChildren.size()-1 : 0); i++) nameDecoration += "_" + ValueTypeToCTypeDecoration(functionDefChildren[i]->getData().valueType); -/*HERE*/ return possibleObjectType->getDataRef()->symbol.getName() +"__" + functionName + nameDecoration + "(" + (name == "." ? "&" : "") + generate(children[1], enclosingObject) + ","; +/*HERE*/ return possibleObjectType->getDataRef()->symbol.getName() +"__" + CifyFunctionName(functionName) + nameDecoration + "(" + (name == "." ? "&" : "") + generate(children[1], enclosingObject) + ","; //The comma lets the upper function call know we already started the param list //Note that we got here from a function call. We just pass up this special case and let them finish with the perentheses } else { @@ -240,7 +240,7 @@ std::string CGenerator::generate(NodeTree* from, NodeTree* enc bool isSelfObjectMethod = enclosingObject && contains(enclosingObject->getChildren(), children[0]); if (isSelfObjectMethod) output += enclosingObject->getDataRef()->symbol.getName() +"__"; -/*HERE*/ output += name + nameDecoration + "("; +/*HERE*/ output += CifyFunctionName(name) + nameDecoration + "("; if (isSelfObjectMethod) output += children.size() > 1 ? "self," : "self"; } @@ -285,8 +285,8 @@ std::string CGenerator::generateObjectMethod(NodeTree* enclosingObject, nameDecoration += "_" + ValueTypeToCTypeDecoration(children[i]->getData().valueType); } output += "\n" + ValueTypeToCType(data.valueType) + " " + enclosingObject->getDataRef()->symbol.getName() +"__" - + data.symbol.getName() + nameDecoration + "(" + ValueTypeToCType(&enclosingObjectType) + " self" + parameters + ")\n" - + generate(children[children.size()-1], enclosingObject); //Pass in the object so we can + + CifyFunctionName(data.symbol.getName()) + nameDecoration + "(" + ValueTypeToCType(&enclosingObjectType) + + " self" + parameters + ")\n" + generate(children[children.size()-1], enclosingObject); //Pass in the object so we can properly handle access to member stuff return output; } @@ -362,3 +362,47 @@ std::string CGenerator::ValueTypeToCTypeDecoration(Type *type) { return return_type; } +std::string CGenerator::CifyFunctionName(std::string name) { + std::string operatorsToReplace[] = { "+", "plus", + "-", "minus", + "*", "star", + "/", "div", + "%", "mod", + "^", "carat", + "&", "amprsd", + "|", "pipe", + "~", "tilde", + "!", "exclamationpt", + ",", "comma", + "=", "equals", + "++", "doubleplus", + "--", "doubleminus", + "<<", "doubleleft", + ">>", "doubleright", + "==", "doubleequals", + "!=", "notequals", + "&&", "doubleamprsnd", + "||", "doublepipe", + "+=", "plusequals", + "-=", "minusequals", + "/=", "divequals", + "%=", "modequals", + "^=", "caratequals", + "&=", "amprsdequals", + "|=", "pipeequals", + "*=", "starequals", + "<<=", "doublerightequals", + ">>=", "doubleleftequals", + "->", "arrow" }; + int length = sizeof(operatorsToReplace)/sizeof(std::string); + //std::cout << "Length is " << length << std::endl; + for (int i = 0; i < length; i+= 2) { + size_t foundPos = name.find(operatorsToReplace[i]); + while(foundPos != std::string::npos) { + name = strSlice(name, 0, foundPos) + "_" + operatorsToReplace[i+1] + "_" + strSlice(name, foundPos+operatorsToReplace[i].length(), -1); + foundPos = name.find(operatorsToReplace[i]); + } + } + return name; +} +