From 98b899b8a98e37d4fe23f7de08c84c2e34902515 Mon Sep 17 00:00:00 2001 From: Nathan Braswell Date: Fri, 27 Dec 2013 13:05:07 -0600 Subject: [PATCH] Added rough but working scoping. --- CMakeLists.txt | 2 + include/ASTData.h | 3 +- include/ASTTransformation.h | 6 +- krakenGrammer.kgm | 4 +- main.cpp | 6 +- src/ASTTransformation.cpp | 172 +++++++++++++++++++++++++++--------- src/CGenerator.cpp | 30 +++++-- 7 files changed, 170 insertions(+), 53 deletions(-) diff --git a/CMakeLists.txt b/CMakeLists.txt index e0fcec1..ad4cfeb 100644 --- a/CMakeLists.txt +++ b/CMakeLists.txt @@ -2,6 +2,8 @@ cmake_minimum_required (VERSION 2.6) project(Kraken) +set(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} -std=c++11") + set( MY_INCLUDES ${PROJECT_SOURCE_DIR}/include) set( MY_SOURCES main.cpp src/Parser.cpp src/LALRParser.cpp src/GraphStructuredStack.cpp src/RNGLRParser.cpp src/ParseAction.cpp src/ParseRule.cpp src/Symbol.cpp src/StringReader.cpp src/State.cpp src/util.cpp src/Lexer.cpp src/RegEx.cpp src/RegExState.cpp src/Table.cpp src/ASTData.cpp src/ASTTransformation.cpp src/CGenerator.cpp src/Type.cpp ) diff --git a/include/ASTData.h b/include/ASTData.h index e511b98..9c39d04 100644 --- a/include/ASTData.h +++ b/include/ASTData.h @@ -2,7 +2,7 @@ #define ASTDATA_H #include -#include +#include #include "Symbol.h" #include "Type.h" @@ -28,6 +28,7 @@ class ASTData { ASTType type; Type valueType; Symbol symbol; + std::map*> scope; private: }; diff --git a/include/ASTTransformation.h b/include/ASTTransformation.h index 4ecd57f..c9d1c41 100644 --- a/include/ASTTransformation.h +++ b/include/ASTTransformation.h @@ -1,6 +1,9 @@ #ifndef ASTTRANSFORMATION_H #define ASTTRANSFORMATION_H +#include +#include + #include "ASTData.h" #include "NodeTransformation.h" @@ -9,8 +12,9 @@ class ASTTransformation: public NodeTransformation { ASTTransformation(); ~ASTTransformation(); virtual NodeTree* transform(NodeTree* from); + NodeTree* transform(NodeTree* from, NodeTree* scope); std::string concatSymbolTree(NodeTree* root); - + NodeTree* scopeLookup(NodeTree* scope, std::string lookup); private: //Nothing }; diff --git a/krakenGrammer.kgm b/krakenGrammer.kgm index 419e345..9ea5314 100644 --- a/krakenGrammer.kgm +++ b/krakenGrammer.kgm @@ -76,5 +76,5 @@ alpha = "(a|b|c|d|e|f|g|h|i|j|k|l|m|n|o|p|q|r|s|t|u|v|w|x|y|z|A|B|C|D|E|F|G|H|I| numeric = "(0|1|2|3|4|5|6|7|8|9)+" ; string = triple_quoted_string | "\"(a|b|c|d|e|f|g|h|i|j|k|l|m|n|o|p|q|r|s|t|u|v|w|x|y|z|A|B|C|D|E|F|G|H|I|J|K|L|M|N|O|P|Q|R|S|T|U|V|W|X|Y|Z|.|,|!|\?|_|-|:|%| | |\\|/|\||\(|\)|0|1|2|3|4|5|6|7|8|9)+\"" ; -comment = "//(a|b|c|d|e|f|g|h|i|j|k|l|m|n|o|p|q|r|s|t|u|v|w|x|y|z|A|B|C|D|E|F|G|H|I|J|K|L|M|N|O|P|Q|R|S|T|U|V|W|X|Y|Z|.|,|!|\?|_|-|:|;|%|=|\+| | |\\|/|\||\(|\)|\*|0|1|2|3|4|5|6|7|8|9)+ -" | "/\*(a|b|c|d|e|f|g|h|i|j|k|l|m|n|o|p|q|r|s|t|u|v|w|x|y|z|A|B|C|D|E|F|G|H|I|J|K|L|M|N|O|P|Q|R|S|T|U|V|W|X|Y|Z|.|,|!|\?|_|-|:|%|=|\+| | |\\|/|\||\(|\)|0|1|2|3|4|5|6|7|8|9)+\*/" ; \ No newline at end of file +comment = "//(a|b|c|d|e|f|g|h|i|j|k|l|m|n|o|p|q|r|s|t|u|v|w|x|y|z|A|B|C|D|E|F|G|H|I|J|K|L|M|N|O|P|Q|R|S|T|U|V|W|X|Y|Z|.|,|!|\?|_|-|:|;|%|=|\+| | |\\|/|\||\(|\)|\*|\"|0|1|2|3|4|5|6|7|8|9)+ +" | "/\*(a|b|c|d|e|f|g|h|i|j|k|l|m|n|o|p|q|r|s|t|u|v|w|x|y|z|A|B|C|D|E|F|G|H|I|J|K|L|M|N|O|P|Q|R|S|T|U|V|W|X|Y|Z|.|,|!|\?|_|-|:|%|=|\+| | |\\|/|\||\(|\)|\"|0|1|2|3|4|5|6|7|8|9)+\*/" ; \ No newline at end of file diff --git a/main.cpp b/main.cpp index f5f9b67..8a7986f 100644 --- a/main.cpp +++ b/main.cpp @@ -128,6 +128,8 @@ int main(int argc, char* argv[]) { removeSymbols.push_back(Symbol("interpreter_directive", false)); removeSymbols.push_back(Symbol("if", true)); removeSymbols.push_back(Symbol("while", true)); + removeSymbols.push_back(Symbol("__if_comp__", true)); + removeSymbols.push_back(Symbol("comp_simple_passthrough", true)); for (int i = 0; i < removeSymbols.size(); i++) parseTree = RemovalTransformation(removeSymbols[i]).transform(parseTree); @@ -148,9 +150,6 @@ int main(int argc, char* argv[]) { for (int i = 0; i < collapseSymbols.size(); i++) parseTree = CollapseTransformation(collapseSymbols[i]).transform(parseTree); - - NodeTree* AST = ASTTransformation().transform(parseTree); - if (parseTree) { outFileTransformed << parseTree->DOTGraphString() << std::endl; } else { @@ -158,6 +157,7 @@ int main(int argc, char* argv[]) { } outFileTransformed.close(); + NodeTree* AST = ASTTransformation().transform(parseTree); if (AST) { outFileAST << AST->DOTGraphString() << std::endl; } else { diff --git a/src/ASTTransformation.cpp b/src/ASTTransformation.cpp index ea22d29..118e44c 100644 --- a/src/ASTTransformation.cpp +++ b/src/ASTTransformation.cpp @@ -9,6 +9,11 @@ ASTTransformation::~ASTTransformation() { } NodeTree* ASTTransformation::transform(NodeTree* from) { + //Set up top scope + return transform(from, NULL); +} + +NodeTree* ASTTransformation::transform(NodeTree* from, NodeTree* scope) { Symbol current = from->getData(); std::string name = current.getName(); NodeTree* newNode; @@ -17,59 +22,93 @@ NodeTree* ASTTransformation::transform(NodeTree* from) { if (name == "translation_unit") { newNode = new NodeTree(name, ASTData(translation_unit)); + scope = newNode; + //Temporary scope fix + scope->getDataRef()->scope["+"] = new NodeTree(); + scope->getDataRef()->scope["-"] = new NodeTree(); + scope->getDataRef()->scope["*"] = new NodeTree(); + scope->getDataRef()->scope["&"] = new NodeTree(); + scope->getDataRef()->scope["=="] = new NodeTree(); + scope->getDataRef()->scope["--"] = new NodeTree(); + scope->getDataRef()->scope["++"] = new NodeTree(); + scope->getDataRef()->scope["<="] = new NodeTree(); + scope->getDataRef()->scope[">="] = new NodeTree(); + scope->getDataRef()->scope["*="] = new NodeTree(); + scope->getDataRef()->scope["+="] = new NodeTree(); + scope->getDataRef()->scope["-="] = new NodeTree(); + scope->getDataRef()->scope["<"] = new NodeTree(); + scope->getDataRef()->scope[">"] = new NodeTree(); } else if (name == "interpreter_directive") { newNode = new NodeTree(name, ASTData(interpreter_directive)); } else if (name == "import" && !current.isTerminal()) { newNode = new NodeTree(name, ASTData(import, Symbol(concatSymbolTree(children[0]), true))); + //Add to scope? + // + // return newNode; // Don't need children of import } else if (name == "identifier") { - newNode = new NodeTree(name, ASTData(identifier, Symbol(concatSymbolTree(children[0]), true))); + std::string lookupName = concatSymbolTree(children[0]); + std::cout << "scope lookup from identifier" << std::endl; + newNode = scopeLookup(scope, lookupName); + if (newNode == NULL) { + std::cout << "scope lookup error! Could not find " << lookupName << std::endl; + throw "LOOKUP ERROR: " + lookupName; + } + //newNode = new NodeTree(name, ASTData(identifier, Symbol(concatSymbolTree(children[0]), true))); } else if (name == "function") { - newNode = new NodeTree(name, ASTData(function, Symbol(concatSymbolTree(children[1]), true), Type(concatSymbolTree(children[0])))); + std::string functionName = concatSymbolTree(children[1]); + newNode = new NodeTree(name, ASTData(function, Symbol(functionName, true), Type(concatSymbolTree(children[0])))); skipChildren.insert(0); skipChildren.insert(1); + scope->getDataRef()->scope[functionName] = newNode; + newNode->getDataRef()->scope["~enclosing_scope"] = scope; + scope = newNode; } else if (name == "code_block") { newNode = new NodeTree(name, ASTData(code_block)); + newNode->getDataRef()->scope["~enclosing_scope"] = scope; + scope = newNode; } else if (name == "typed_parameter") { - newNode = transform(children[1]); //Transform to get the identifier - newNode->getDataRef()->valueType = Type(concatSymbolTree(children[0])); //Get the type (left child) and set our new identifer to be that type + //newNode = transform(children[1]); //Transform to get the identifier + std::string parameterName = concatSymbolTree(children[1]); + std::string typeString = concatSymbolTree(children[0]);//Get the type (left child) and set our new identifer to be that type + newNode = new NodeTree("identifier", ASTData(identifier, Symbol(parameterName, true), Type(typeString))); + scope->getDataRef()->scope[parameterName] = newNode; return newNode; - } else if (name == "boolean_expression") { + } else if (name == "boolean_expression" || name == "and_boolean_expression" || name == "bool_exp") { //If this is an actual part of an expression, not just a premoted term if (children.size() > 1) { std::string functionCallName = concatSymbolTree(children[1]); + std::cout << "scope lookup from boolen_expression or similar" << std::endl; + NodeTree* function = scopeLookup(scope, functionCallName); + if (function == NULL) { + std::cout << "scope lookup error! Could not find " << functionCallName << std::endl; + throw "LOOKUP ERROR: " + functionCallName; + } newNode = new NodeTree(functionCallName, ASTData(function_call, Symbol(functionCallName, true))); + newNode->addChild(function); // First child of function call is a link to the function definition skipChildren.insert(1); } else { - return transform(children[0]); //Just a promoted term, so do child - } - } else if (name == "and_boolean_expression") { - //If this is an actual part of an expression, not just a premoted bool_exp - if (children.size() > 1) { - std::string functionCallName = concatSymbolTree(children[1]); - newNode = new NodeTree(functionCallName, ASTData(function_call, Symbol(functionCallName, true))); - skipChildren.insert(1); - } else { - return transform(children[0]); //Just a promoted bool_exp, so do child - } - } else if (name == "bool_exp") { - //If this is an actual part of an expression, not just a premoted bool_exp. - if (children.size() > 1) { - std::string functionCallName = concatSymbolTree(children[1]); - newNode = new NodeTree(functionCallName, ASTData(function_call, Symbol(functionCallName, true))); - skipChildren.insert(1); - } else { - return transform(children[0]); //Just a promoted bool_exp, so do child + std::cout << children.size() << std::endl; + if (children.size() == 0) + return new NodeTree(); + return transform(children[0], scope); //Just a promoted term, so do child } //Here's the order of ops stuff } else if (name == "expression" || name == "shiftand" || name == "term" || name == "unarad") { //unarad can ride through, it should always just be a promoted child //If this is an actual part of an expression, not just a premoted child if (children.size() > 2) { std::string functionCallName = concatSymbolTree(children[1]); + std::cout << "scope lookup from expression or similar" << std::endl; + NodeTree* function = scopeLookup(scope, functionCallName); + if (function == NULL) { + std::cout << "scope lookup error! Could not find " << functionCallName << std::endl; + throw "LOOKUP ERROR: " + functionCallName; + } newNode = new NodeTree(functionCallName, ASTData(function_call, Symbol(functionCallName, true))); + newNode->addChild(function); // First child of function call is a link to the function definition skipChildren.insert(1); } else { - return transform(children[0]); //Just a promoted child, so do it instead + return transform(children[0], scope); //Just a promoted child, so do it instead } } else if (name == "factor") { //Do factor here, as it has all the weird unary operators //If this is an actual part of an expression, not just a premoted child @@ -82,10 +121,17 @@ NodeTree* ASTTransformation::transform(NodeTree* from) { else funcName = concatSymbolTree(children[1]), funcNum = 1; + std::cout << "scope lookup from factor" << std::endl; + NodeTree* function = scopeLookup(scope, funcName); + if (function == NULL) { + std::cout << "scope lookup error! Could not find " << funcName << std::endl; + throw "LOOKUP ERROR: " + funcName; + } newNode = new NodeTree(funcName, ASTData(function_call, Symbol(funcName, true))); + newNode->addChild(function); skipChildren.insert(funcNum); } else { - return transform(children[0]); //Just a promoted child, so do it instead + return transform(children[0], scope); //Just a promoted child, so do it instead } } else if (name == "statement") { newNode = new NodeTree(name, ASTData(statement)); @@ -101,43 +147,66 @@ NodeTree* ASTTransformation::transform(NodeTree* from) { newNode = new NodeTree(name, ASTData(assignment_statement)); std::string assignFuncName = concatSymbolTree(children[1]); if (assignFuncName == "=") { - newNode->addChild(transform(children[0])); - newNode->addChild(transform(children[2])); + newNode->addChild(transform(children[0], scope)); + newNode->addChild(transform(children[2], scope)); } else { //For assignments like += or *=, expand the syntatic sugar. - NodeTree* lhs = transform(children[0]); - NodeTree* childCall = new NodeTree(assignFuncName.substr(0,1), ASTData(function_call, Symbol(assignFuncName.substr(0,1), true))); + NodeTree* lhs = transform(children[0], scope); + std::string functionName = assignFuncName.substr(0,1); + NodeTree* childCall = new NodeTree(functionName, ASTData(function_call, Symbol(functionName, true))); + NodeTree* functionDef = scopeLookup(scope, functionName); + if (functionDef == NULL) { + std::cout << "scope lookup error! Could not find " << functionName << std::endl; + throw "LOOKUP ERROR: " + functionName; + } + childCall->addChild(functionDef); //First child of function call is definition of the function childCall->addChild(lhs); - childCall->addChild(transform(children[2])); + childCall->addChild(transform(children[2], scope)); newNode->addChild(lhs); newNode->addChild(childCall); } return newNode; } else if (name == "declaration_statement") { newNode = new NodeTree(name, ASTData(declaration_statement)); - NodeTree* newIdentifier = transform(children[1]); //Transform the identifier - newIdentifier->getDataRef()->valueType = Type(concatSymbolTree(children[0]));//set the type of the identifier + + // NodeTree* newIdentifier = transform(children[1], scope); //Transform the identifier + // newIdentifier->getDataRef()->valueType = Type(concatSymbolTree(children[0]));//set the type of the identifier + + std::string newIdentifierStr = concatSymbolTree(children[1]); + std::string typeString = concatSymbolTree(children[0]);//Get the type (left child) and set our new identifer to be that type + NodeTree* newIdentifier = new NodeTree("identifier", ASTData(identifier, Symbol(newIdentifierStr, true), Type(typeString))); + scope->getDataRef()->scope[newIdentifierStr] = newIdentifier; + newNode->addChild(newIdentifier); skipChildren.insert(0); //These, the type and the identifier, have been taken care of. skipChildren.insert(1); } else if (name == "if_comp") { newNode = new NodeTree(name, ASTData(if_comp)); + newNode->addChild(new NodeTree("identifier", ASTData(identifier, Symbol(concatSymbolTree(children[0]),true)))); + skipChildren.insert(0); //Don't do the identifier. The identifier lookup will fail. That's why we do it here. } else if (name == "simple_passthrough") { newNode = new NodeTree(name, ASTData(simple_passthrough)); } else if (name == "function_call") { //children[0] is scope std::string functionCallName = concatSymbolTree(children[1]); newNode = new NodeTree(functionCallName, ASTData(function_call, Symbol(functionCallName, true))); + std::cout << "scope lookup from function_call" << std::endl; + NodeTree* function = scopeLookup(scope, functionCallName); + if (function == NULL) { + std::cout << "scope lookup error! Could not find " << functionCallName << std::endl; + throw "LOOKUP ERROR: " + functionCallName; + } + newNode->addChild(function); skipChildren.insert(1); } else if (name == "parameter") { - return transform(children[0]); //Don't need a parameter node, just the value + return transform(children[0], scope); //Don't need a parameter node, just the value } else if (name == "parameter") { - return transform(children[0]); //Don't need a parameter node, just the value + return transform(children[0], scope); //Don't need a parameter node, just the value } else if (name == "type") { std::string theConcat = concatSymbolTree(from); //We have no symbol, so this will concat our children newNode = new NodeTree(name, ASTData(value, Symbol(theConcat, true), Type(theConcat))); } else if (name == "number") { - return transform(children[0]); + return transform(children[0], scope); } else if (name == "integer") { newNode = new NodeTree(name, ASTData(value, Symbol(concatSymbolTree(from), true), Type(integer))); } else if (name == "float") { @@ -145,7 +214,7 @@ NodeTree* ASTTransformation::transform(NodeTree* from) { } else if (name == "double") { newNode = new NodeTree(name, ASTData(value, Symbol(concatSymbolTree(from), true), Type(double_percision))); } else if (name == "char") { - newNode = new NodeTree(name, ASTData(value, Symbol(concatSymbolTree(children[0]), true), Type(character))); //Indirection of 1 for array + newNode = new NodeTree(name, ASTData(value, Symbol(concatSymbolTree(children[0]), true), Type(character, 1))); //Indirection of 1 for array } else if (name == "string" || name == "triple_quoted_string") { newNode = new NodeTree(name, ASTData(value, Symbol(concatSymbolTree(children[0]), true), Type(character, 1))); //Indirection of 1 for array } else { @@ -155,8 +224,8 @@ NodeTree* ASTTransformation::transform(NodeTree* from) { // In general, iterate through children and do them. Might not do this for all children. for (int i = 0; i < children.size(); i++) { if (skipChildren.find(i) == skipChildren.end()) { - NodeTree* transChild = transform(children[i]); - if (transChild->getData().type) //Only add the children that have a real ASTData::ASTType, that is, legit ASTData. + NodeTree* transChild = transform(children[i], scope); + if (transChild->getDataRef()->type) //Only add the children that have a real ASTData::ASTType, that is, legit ASTData. newNode->addChild(transChild); else delete transChild; @@ -168,7 +237,7 @@ NodeTree* ASTTransformation::transform(NodeTree* from) { std::string ASTTransformation::concatSymbolTree(NodeTree* root) { std::string concatString; - std::string ourValue = root->getData().getValue(); + std::string ourValue = root->getDataRef()->getValue(); if (ourValue != "NoValue") concatString += ourValue; std::vector*> children = root->getChildren(); @@ -177,3 +246,26 @@ std::string ASTTransformation::concatSymbolTree(NodeTree* root) { } return concatString; } + +NodeTree* ASTTransformation::scopeLookup(NodeTree* scope, std::string lookup) { + //Seach the map + auto scopeMap = scope->getDataRef()->scope; + std::cout << "scope size: " << scopeMap.size() << ", scope from " << scope->getName() << std::endl; + for (auto i = scopeMap.begin(); i != scopeMap.end(); i++) + std::cout << i->first << " : " << i-> second << " - " << i->second->getName() << std::endl; + + auto elementIterator = scopeMap.find(lookup); + if (elementIterator != scopeMap.end()) { + std::cout << "lookup of " << lookup << " succeded in first scope!" << std::endl; + return elementIterator->second; + } + std::cout << "lookup of " << lookup << " failed in first scope, checking for upper scope" << std::endl; + //if it doesn't exist, try the enclosing scope if it exists. + auto enclosingIterator = scopeMap.find("~enclosing_scope"); + if (enclosingIterator != scopeMap.end()) { + std::cout << "upper scope exists, searching it for " << lookup << std::endl; + return scopeLookup(enclosingIterator->second, lookup); + } + std::cout << "upper scope does not exist" << std::endl; + return NULL; +} diff --git a/src/CGenerator.cpp b/src/CGenerator.cpp index 2b4645d..260a12d 100644 --- a/src/CGenerator.cpp +++ b/src/CGenerator.cpp @@ -20,7 +20,22 @@ std::string CGenerator::generate(NodeTree* from) { std::string output = ""; switch (data.type) { case translation_unit: - //Do nothing + //Declare everything in translation unit scope here. (allows stuff from other files, automatic forward declarations) + for (auto i = data.scope.begin(); i != data.scope.end(); i++) { + NodeTree* declaration = i->second; + ASTData declarationData = i->second->getData(); + switch(declarationData.type) { + case identifier: + output += ValueTypeToCType(declarationData.valueType) + " " + declarationData.symbol.getName() + "; /*identifier*/\n"; + break; + case function: + output += "/*func*/\n"; + break; + default: + std::cout << "Declaration? of unknown type " << ASTData::ASTTypeToString(declarationData.type) << " in translation unit scope" << std::endl; + output += "/*unknown declaration*/\n"; + } + } break; case interpreter_directive: //Do nothing @@ -82,18 +97,21 @@ std::string CGenerator::generate(NodeTree* from) { return strSlice(generate(children[0]), 3, -4); case function_call: { + //NOTE: The first (0th) child of a function call node is the declaration of the function + //Handle operators specially for now. Will later replace with //Inlined functions in the standard library std::string name = data.symbol.getName(); + std::cout << name << " == " << children[0]->getData().symbol.getName() << std::endl; if (name == "++" || name == "--") - return generate(children[0]) + name; - if (name == "*" && children.size() == 1) //Is dereference, not multiplication - return "*(" + generate(children[0]) + ")"; + return generate(children[1]) + name; + if (name == "*" && children.size() == 2) //Is dereference, not multiplication + return "*(" + generate(children[1]) + ")"; if (name == "+" || name == "-" || name == "*" || name == "/" || name == "==" || name == ">=" || name == "<=" || name == "!=" || name == "<" || name == ">" || name == "%" || name == "+=" || name == "-=" || name == "*=" || name == "/=") { - return "((" + generate(children[0]) + ")" + name + "(" + generate(children[1]) + "))"; + return "((" + generate(children[1]) + ")" + name + "(" + generate(children[2]) + "))"; } output += data.symbol.getName() + "("; - for (int i = 0; i < children.size(); i++) + for (int i = 1; i < children.size(); i++) //children[0] is the declaration if (i < children.size()-1) output += generate(children[i]) + ", "; else output += generate(children[i]);