diff --git a/CMakeLists.txt b/CMakeLists.txt index 4b933c9..4083ad4 100644 --- a/CMakeLists.txt +++ b/CMakeLists.txt @@ -4,7 +4,7 @@ project(Kraken) set( MY_INCLUDES ${PROJECT_SOURCE_DIR}/include) -set( MY_SOURCES main.cpp src/Parser.cpp src/LALRParser.cpp src/GraphStructuredStack.cpp src/RNGLRParser.cpp src/ParseAction.cpp src/ParseRule.cpp src/Symbol.cpp src/StringReader.cpp src/State.cpp src/util.cpp src/Lexer.cpp src/RegEx.cpp src/RegExState.cpp src/Table.cpp src/ASTData.cpp src/ASTTransformation.cpp ) +set( MY_SOURCES main.cpp src/Parser.cpp src/LALRParser.cpp src/GraphStructuredStack.cpp src/RNGLRParser.cpp src/ParseAction.cpp src/ParseRule.cpp src/Symbol.cpp src/StringReader.cpp src/State.cpp src/util.cpp src/Lexer.cpp src/RegEx.cpp src/RegExState.cpp src/Table.cpp src/ASTData.cpp src/ASTTransformation.cpp src/CGenerator.cpp ) include_directories( ${MY_INCLUDES} ) diff --git a/include/ASTData.h b/include/ASTData.h index b603224..7daf15d 100644 --- a/include/ASTData.h +++ b/include/ASTData.h @@ -12,8 +12,8 @@ enum ASTType {undef, translation_unit, interpreter_directive, import, identifier, function, code_block, typed_parameter, expression, boolean_expression, statement, - if_statement, return_statement, assignment_statement, function_call, - value}; + if_statement, return_statement, assignment_statement, declaration_statement, + function_call, value}; enum ValueType {none, boolean, integer, floating, double_percision, char_string }; diff --git a/include/CGenerator.h b/include/CGenerator.h new file mode 100644 index 0000000..9b43889 --- /dev/null +++ b/include/CGenerator.h @@ -0,0 +1,21 @@ +#ifndef CGENERATOR_H +#define CGENERATOR_H + +#include +#include + +#include "NodeTree.h" +#include "ASTData.h" + + +class CGenerator { + public: + CGenerator(); + ~CGenerator(); + std::string generate(NodeTree* from); + static std::string ValueTypeToCType(ValueType type); + private: + std::string tabs(); + int tabLevel; +}; +#endif \ No newline at end of file diff --git a/include/CollapseTransformation.h b/include/CollapseTransformation.h index a9b3226..a4c8c88 100644 --- a/include/CollapseTransformation.h +++ b/include/CollapseTransformation.h @@ -41,10 +41,8 @@ NodeTree* CollapseTransformation::transform(NodeTree* from) { if (children[i]->getData() == toCollapse) { node->removeChild(children[i]); std::vector*> newChildren = children[i]->getChildren(); - node->addChildren(newChildren); + node->insertChildren(i,newChildren); toProcess.push(node); //Do this node again - // for (int j = 0; j < newChildren.size(); j++) - // toProcess.push(newChildren[j]); } else toProcess.push(children[i]); diff --git a/include/NodeTree.h b/include/NodeTree.h index fdb56e0..0f82dac 100644 --- a/include/NodeTree.h +++ b/include/NodeTree.h @@ -27,8 +27,11 @@ class NodeTree { std::vector*> getParents(); void addChild(NodeTree* child); + void insertChild(int i, NodeTree* child); void addChildren(std::vector*>* children); void addChildren(std::vector*> children); + void insertChildren(int index, std::vector*>* children); + void insertChildren(int index, std::vector*> children); int findChild(NodeTree* child); void removeChild(NodeTree* child); void removeChild(int index); @@ -40,6 +43,7 @@ class NodeTree { void setName(std::string); T getData() const; + T* getDataRef(); void setData(T data); int size(); @@ -130,6 +134,14 @@ void NodeTree::addChild(NodeTree* child) { children.push_back(child); } +template +void NodeTree::insertChild(int i, NodeTree* child) { + if (!child) + throw "Help, NULL child"; + if (findChild(child) == -1) + children.insert(children.begin()+i,child); +} + template void NodeTree::addChildren(std::vector*>* children) { for (typename std::vector*>::size_type i = 0; i < children->size(); i++) @@ -142,6 +154,18 @@ void NodeTree::addChildren(std::vector*> children) { addChild(children[i]); } +template +void NodeTree::insertChildren(int index, std::vector*>* children) { + for (typename std::vector*>::size_type i = 0; i < children->size(); i++) + insertChild(index+i,(*children)[i]); +} + +template +void NodeTree::insertChildren(int index, std::vector*> children) { + for (typename std::vector*>::size_type i = 0; i < children.size(); i++) + insertChild(index+i, children[i]); +} + template int NodeTree::findChild(NodeTree* child) { for (int i = 0; i < children.size(); i++) { @@ -207,6 +231,11 @@ T NodeTree::getData() const { return data; } +template +T* NodeTree::getDataRef() { + return &data; +} + template void NodeTree::setData(T data) { this->data = data; diff --git a/include/Parser.h b/include/Parser.h index fc43972..0e876d1 100644 --- a/include/Parser.h +++ b/include/Parser.h @@ -49,10 +49,9 @@ class Parser { std::vector< State* > stateSets; - //The EOFSymbol, a pointer because of use in table, etc Symbol EOFSymbol; - //The nullSymbol, ditto with above. Also used in comparisons Symbol nullSymbol; + Symbol invalidSymbol; Table table; diff --git a/krakenGrammer.kgm b/krakenGrammer.kgm index 0c42b82..4689ab8 100644 --- a/krakenGrammer.kgm +++ b/krakenGrammer.kgm @@ -23,7 +23,7 @@ function_list = function_list WS function | function ; function = type WS identifier WS "\(" WS opt_typed_parameter_list WS "\)" WS code_block ; opt_typed_parameter_list = typed_parameter_list | ; -typed_parameter_list = typed_parameter_list WS typed_parameter | typed_parameter ; +typed_parameter_list = typed_parameter_list WS "," WS typed_parameter | typed_parameter ; typed_parameter = type WS parameter ; opt_parameter_list = parameter_list | ; @@ -32,7 +32,7 @@ parameter = expression ; code_block = "{" WS statement_list WS "}" ; statement_list = statement_list WS statement | statement ; -statement = if_statement | return_statement | expression WS ";" | boolean_expression WS ";" | assignment_statement WS ";" | code_block ; +statement = if_statement | return_statement | expression WS ";" | boolean_expression WS ";" | assignment_statement WS ";" | declaration_statement WS ";" | code_block ; function_call = scope identifier "\(" WS opt_parameter_list WS "\)" ; scope = scope identifier "::" | ; @@ -50,6 +50,7 @@ factor = number | identifier | function_call | bool | string ; number = integer | float | double ; assignment_statement = identifier WS "=" WS expression ; +declaration_statement = type WS identifier WS "=" WS expression ; alphanumeric = alphanumeric numeric | alphanumeric alpha | numeric | alpha ; hexadecimal = "0x(1|2|3|4|5|6|7|8|9|a|b|c|d|e|f)+" ; diff --git a/main.cpp b/main.cpp index a6d0fe0..f94a99f 100644 --- a/main.cpp +++ b/main.cpp @@ -14,6 +14,7 @@ #include "CollapseTransformation.h" #include "ASTTransformation.h" #include "ASTData.h" +#include "CGenerator.h" int main(int argc, char* argv[]) { @@ -25,7 +26,7 @@ int main(int argc, char* argv[]) { } std::ifstream programInFile, grammerInFile; - std::ofstream outFile, outFileTransformed, outFileAST; + std::ofstream outFile, outFileTransformed, outFileAST, outFileC; programInFile.open(argv[1]); if (!programInFile.is_open()) { @@ -57,6 +58,11 @@ int main(int argc, char* argv[]) { return(1); } + outFileC.open((std::string(argv[3]) + ".c").c_str()); + if (!outFileC.is_open()) { + std::cout << "Probelm opening third output file " << std::string(argv[3]) + ".c" << "\n"; + return(1); + } //Read the input file into a string std::string programInputFileString, grammerInputFileString; std::string line; @@ -64,11 +70,13 @@ int main(int argc, char* argv[]) { getline(grammerInFile, line); grammerInputFileString.append(line+"\n"); } + grammerInFile.close(); while(programInFile.good()) { getline(programInFile, line); programInputFileString.append(line+"\n"); } + programInFile.close(); //LALRParser parser; RNGLRParser parser; @@ -101,6 +109,7 @@ int main(int argc, char* argv[]) { } else { std::cout << "ParseTree returned from parser is NULL!" << std::endl; } + outFile.close(); //Pre AST Transformations std::vector*> preASTTransforms; @@ -124,6 +133,7 @@ int main(int argc, char* argv[]) { preASTTransforms.push_back(new CollapseTransformation(Symbol("function_list", false))); preASTTransforms.push_back(new CollapseTransformation(Symbol("statement_list", false))); preASTTransforms.push_back(new CollapseTransformation(Symbol("parameter_list", false))); + preASTTransforms.push_back(new CollapseTransformation(Symbol("typed_parameter_list", false))); for (int i = 0; i < preASTTransforms.size(); i++) { parseTree = preASTTransforms[i]->transform(parseTree); @@ -138,20 +148,24 @@ int main(int argc, char* argv[]) { } else { std::cout << "Tree returned from transformation is NULL!" << std::endl; } - + outFileTransformed.close(); if (AST) { outFileAST << AST->DOTGraphString() << std::endl; } else { std::cout << "Tree returned from ASTTransformation is NULL!" << std::endl; } - - programInFile.close(); - grammerInFile.close(); - outFile.close(); - outFileTransformed.close(); outFileAST.close(); + //Do type checking, scope creation, etc. here. + //None at this time, instead going strait to C in this first (more naive) version + + //Code generation + //For right now, just C + std::string c_code = CGenerator().generate(AST); + outFileC << c_code << std::endl; + outFileC.close(); + return(0); } diff --git a/src/ASTData.cpp b/src/ASTData.cpp index 51c5ad6..7a4bb39 100644 --- a/src/ASTData.cpp +++ b/src/ASTData.cpp @@ -43,19 +43,19 @@ std::string ASTData::ValueTypeToString(ValueType type) { return "none"; break; case boolean: - return "boolean"; + return "bool"; break; case integer: - return "integer"; + return "int"; break; case floating: - return "floating"; + return "float"; break; case double_percision: - return "double_percision"; + return "double"; break; case char_string: - return "char_string"; + return "string"; break; default: return "unknown_ValueType"; @@ -103,6 +103,9 @@ std::string ASTData::ASTTypeToString(ASTType type) { case assignment_statement: return "assignment_statement"; break; + case declaration_statement: + return "declaration_statement"; + break; case function_call: return "function_call"; break; diff --git a/src/ASTTransformation.cpp b/src/ASTTransformation.cpp index 44932c2..8c51ef9 100644 --- a/src/ASTTransformation.cpp +++ b/src/ASTTransformation.cpp @@ -31,7 +31,9 @@ NodeTree* ASTTransformation::transform(NodeTree* from) { } else if (name == "code_block") { newNode = new NodeTree(name, ASTData(code_block)); } else if (name == "typed_parameter") { - newNode = new NodeTree(name, ASTData(typed_parameter)); + newNode = transform(children[1]); //Transform to get the identifier + newNode->getDataRef()->valueType = ASTData::strToType(concatSymbolTree(children[0])); //Get the type (left child) and set our new identifer to be that type + return newNode; } else if (name == "expression") { //If this is an actual part of an expression, not just a premoted term if (children.size() > 1) { @@ -62,6 +64,13 @@ NodeTree* ASTTransformation::transform(NodeTree* from) { newNode = new NodeTree(name, ASTData(return_statement)); } else if (name == "assignment_statement") { newNode = new NodeTree(name, ASTData(assignment_statement)); + } else if (name == "declaration_statement") { + newNode = new NodeTree(name, ASTData(declaration_statement)); + NodeTree* newIdentifier = transform(children[1]); //Transform the identifier + newIdentifier->getDataRef()->valueType = ASTData::strToType(concatSymbolTree(children[0]));//set the type of the identifier + newNode->addChild(newIdentifier); + skipChildren.insert(0); //These, the type and the identifier, have been taken care of. + skipChildren.insert(1); } else if (name == "function_call") { //children[0] is scope std::string functionCallName = concatSymbolTree(children[1]); @@ -89,7 +98,7 @@ NodeTree* ASTTransformation::transform(NodeTree* from) { for (int i = 0; i < children.size(); i++) { if (skipChildren.find(i) == skipChildren.end()) { NodeTree* transChild = transform(children[i]); - if (transChild->getData().type) + if (transChild->getData().type) //Only add the children that have a real ASTData::ASTType, that is, legit ASTData. newNode->addChild(transChild); else delete transChild; diff --git a/src/CGenerator.cpp b/src/CGenerator.cpp new file mode 100644 index 0000000..04c9d79 --- /dev/null +++ b/src/CGenerator.cpp @@ -0,0 +1,125 @@ +#include "CGenerator.h" + +CGenerator::CGenerator() { + tabLevel = 0; +} +CGenerator::~CGenerator() { + +} + +std::string CGenerator::tabs() { + std::string returnTabs; + for (int i = 0; i < tabLevel; i++) + returnTabs += "\t"; + return returnTabs; +} + +std::string CGenerator::generate(NodeTree* from) { + ASTData data = from->getData(); + std::vector*> children = from->getChildren(); + std::string output = ""; + switch (data.type) { + case translation_unit: + //Do nothing + break; + case interpreter_directive: + //Do nothing + break; + case import: + return "#include \"" + data.symbol.getName() + "\"\n"; + break; + case identifier: + return data.symbol.getName(); + break; + case function: + output += "\n" + ValueTypeToCType(data.valueType) + " " + data.symbol.getName() + "("; + for (int i = 0; i < children.size()-1; i++) { + if (i > 0) + output += ", "; + output += ASTData::ValueTypeToString(children[i]->getData().valueType) + " " + generate(children[i]); + } + output+= ")\n" + generate(children[children.size()-1]); + return output; + break; + case code_block: + output += tabs() + "{\n"; + tabLevel++; + for (int i = 0; i < children.size(); i++) + output += generate(children[i]); + tabLevel--; + output += tabs() + "}"; + return output; + break; + case expression: + output += " " + data.symbol.getName() + ", "; + break; + case boolean_expression: + output += " " + data.symbol.getName() + " "; + break; + case statement: + return tabs() + generate(children[0]) + ";\n"; + break; + case if_statement: + output += "if (" + generate(children[0]) + ") \n" + generate(children[1]); + if (children.size() > 2) + output += " else " + generate(children[2]); + return output; + break; + case return_statement: + return "return " + generate(children[0]); + case assignment_statement: + return generate(children[0]) + " = " + generate(children[1]); + case declaration_statement: + return ASTData::ValueTypeToString(children[0]->getData().valueType) + " " + generate(children[0]) + " = " + generate(children[1]); + case function_call: + { + //Handle operators specially for now. Will later replace with + //Inlined functions in the standard library + std::string name = data.symbol.getName(); + if (name == "+" || name == "-" || name == "*" || name == "/") { + return "((" + generate(children[0]) + ")" + name + "(" + generate(children[1]) + "))"; + } + output += data.symbol.getName() + "("; + for (int i = 0; i < children.size(); i++) + if (i < children.size()-1) + output += generate(children[i]) + ", "; + else output += generate(children[i]); + output += ") "; + return output; + } + case value: + return data.symbol.getName(); + + default: + std::cout << "Nothing!" << std::endl; + } + for (int i = 0; i < children.size(); i++) + output += generate(children[i]); + + return output; +} + +std::string CGenerator::ValueTypeToCType(ValueType type) { + switch (type) { + case none: + return "none"; + break; + case boolean: + return "bool"; + break; + case integer: + return "int"; + break; + case floating: + return "float"; + break; + case double_percision: + return "double"; + break; + case char_string: + return "char*"; + break; + default: + return "unknown_ValueType"; + } +} diff --git a/src/Lexer.cpp b/src/Lexer.cpp index a8dccea..ce6ad93 100644 --- a/src/Lexer.cpp +++ b/src/Lexer.cpp @@ -45,9 +45,9 @@ Symbol Lexer::next() { //std::cout << "Current at is \"" << input.substr(currentPosition) << "\" currentPos is " << currentPosition <getPattern(), true, eatenString); } else { - //std::cout << "Found no applicable regex" << std::endl; - //std::cout << "Remaining is ||" << input.substr(currentPosition) << "||" << std::endl; - return Symbol(); + std::cout << "Found no applicable regex" << std::endl; + std::cout << "Remaining is ||" << input.substr(currentPosition) << "||" << std::endl; + return Symbol("$INVALID$", true); } } diff --git a/src/Parser.cpp b/src/Parser.cpp index 048416b..83cbfaa 100644 --- a/src/Parser.cpp +++ b/src/Parser.cpp @@ -1,6 +1,6 @@ #include "Parser.h" -Parser::Parser() : EOFSymbol("$EOF$", true), nullSymbol("$NULL$", true){ +Parser::Parser() : EOFSymbol("$EOF$", true), nullSymbol("$NULL$", true), invalidSymbol("$INVALID$", true){ table.setSymbols(EOFSymbol, nullSymbol); } diff --git a/src/RNGLRParser.cpp b/src/RNGLRParser.cpp index e58f825..b413321 100644 --- a/src/RNGLRParser.cpp +++ b/src/RNGLRParser.cpp @@ -34,6 +34,11 @@ NodeTree* RNGLRParser::parseInput(std::string inputString) { input.push_back(currentToken); while (currentToken != EOFSymbol) { currentToken = lexer.next(); + //std::cout << "CurrentToken is " << currentToken.toString() << std::endl; + if (currentToken == invalidSymbol) { + std::cout << "Invalid Symbol!" << std::endl; + throw "Invalid Symbol, cannot lex"; + } input.push_back(currentToken); }