Lot's of work on a CGenerator, AST and language improvements

This commit is contained in:
Nathan Braswell
2013-11-01 02:52:18 -04:00
parent ededb069c1
commit 77f2b0a3e5
14 changed files with 232 additions and 28 deletions

View File

@@ -4,7 +4,7 @@ project(Kraken)
set( MY_INCLUDES ${PROJECT_SOURCE_DIR}/include) set( MY_INCLUDES ${PROJECT_SOURCE_DIR}/include)
set( MY_SOURCES main.cpp src/Parser.cpp src/LALRParser.cpp src/GraphStructuredStack.cpp src/RNGLRParser.cpp src/ParseAction.cpp src/ParseRule.cpp src/Symbol.cpp src/StringReader.cpp src/State.cpp src/util.cpp src/Lexer.cpp src/RegEx.cpp src/RegExState.cpp src/Table.cpp src/ASTData.cpp src/ASTTransformation.cpp ) set( MY_SOURCES main.cpp src/Parser.cpp src/LALRParser.cpp src/GraphStructuredStack.cpp src/RNGLRParser.cpp src/ParseAction.cpp src/ParseRule.cpp src/Symbol.cpp src/StringReader.cpp src/State.cpp src/util.cpp src/Lexer.cpp src/RegEx.cpp src/RegExState.cpp src/Table.cpp src/ASTData.cpp src/ASTTransformation.cpp src/CGenerator.cpp )
include_directories( ${MY_INCLUDES} ) include_directories( ${MY_INCLUDES} )

View File

@@ -12,8 +12,8 @@
enum ASTType {undef, translation_unit, interpreter_directive, import, identifier, enum ASTType {undef, translation_unit, interpreter_directive, import, identifier,
function, code_block, function, code_block,
typed_parameter, expression, boolean_expression, statement, typed_parameter, expression, boolean_expression, statement,
if_statement, return_statement, assignment_statement, function_call, if_statement, return_statement, assignment_statement, declaration_statement,
value}; function_call, value};
enum ValueType {none, boolean, integer, floating, double_percision, char_string }; enum ValueType {none, boolean, integer, floating, double_percision, char_string };

21
include/CGenerator.h Normal file
View File

@@ -0,0 +1,21 @@
#ifndef CGENERATOR_H
#define CGENERATOR_H
#include <string>
#include <iostream>
#include "NodeTree.h"
#include "ASTData.h"
class CGenerator {
public:
CGenerator();
~CGenerator();
std::string generate(NodeTree<ASTData>* from);
static std::string ValueTypeToCType(ValueType type);
private:
std::string tabs();
int tabLevel;
};
#endif

View File

@@ -41,10 +41,8 @@ NodeTree<T>* CollapseTransformation<T>::transform(NodeTree<T>* from) {
if (children[i]->getData() == toCollapse) { if (children[i]->getData() == toCollapse) {
node->removeChild(children[i]); node->removeChild(children[i]);
std::vector<NodeTree<T>*> newChildren = children[i]->getChildren(); std::vector<NodeTree<T>*> newChildren = children[i]->getChildren();
node->addChildren(newChildren); node->insertChildren(i,newChildren);
toProcess.push(node); //Do this node again toProcess.push(node); //Do this node again
// for (int j = 0; j < newChildren.size(); j++)
// toProcess.push(newChildren[j]);
} }
else else
toProcess.push(children[i]); toProcess.push(children[i]);

View File

@@ -27,8 +27,11 @@ class NodeTree {
std::vector<NodeTree<T>*> getParents(); std::vector<NodeTree<T>*> getParents();
void addChild(NodeTree<T>* child); void addChild(NodeTree<T>* child);
void insertChild(int i, NodeTree<T>* child);
void addChildren(std::vector<NodeTree<T>*>* children); void addChildren(std::vector<NodeTree<T>*>* children);
void addChildren(std::vector<NodeTree<T>*> children); void addChildren(std::vector<NodeTree<T>*> children);
void insertChildren(int index, std::vector<NodeTree<T>*>* children);
void insertChildren(int index, std::vector<NodeTree<T>*> children);
int findChild(NodeTree<T>* child); int findChild(NodeTree<T>* child);
void removeChild(NodeTree<T>* child); void removeChild(NodeTree<T>* child);
void removeChild(int index); void removeChild(int index);
@@ -40,6 +43,7 @@ class NodeTree {
void setName(std::string); void setName(std::string);
T getData() const; T getData() const;
T* getDataRef();
void setData(T data); void setData(T data);
int size(); int size();
@@ -130,6 +134,14 @@ void NodeTree<T>::addChild(NodeTree<T>* child) {
children.push_back(child); children.push_back(child);
} }
template<class T>
void NodeTree<T>::insertChild(int i, NodeTree<T>* child) {
if (!child)
throw "Help, NULL child";
if (findChild(child) == -1)
children.insert(children.begin()+i,child);
}
template<class T> template<class T>
void NodeTree<T>::addChildren(std::vector<NodeTree<T>*>* children) { void NodeTree<T>::addChildren(std::vector<NodeTree<T>*>* children) {
for (typename std::vector<NodeTree<T>*>::size_type i = 0; i < children->size(); i++) for (typename std::vector<NodeTree<T>*>::size_type i = 0; i < children->size(); i++)
@@ -142,6 +154,18 @@ void NodeTree<T>::addChildren(std::vector<NodeTree<T>*> children) {
addChild(children[i]); addChild(children[i]);
} }
template<class T>
void NodeTree<T>::insertChildren(int index, std::vector<NodeTree<T>*>* children) {
for (typename std::vector<NodeTree<T>*>::size_type i = 0; i < children->size(); i++)
insertChild(index+i,(*children)[i]);
}
template<class T>
void NodeTree<T>::insertChildren(int index, std::vector<NodeTree<T>*> children) {
for (typename std::vector<NodeTree<T>*>::size_type i = 0; i < children.size(); i++)
insertChild(index+i, children[i]);
}
template<class T> template<class T>
int NodeTree<T>::findChild(NodeTree<T>* child) { int NodeTree<T>::findChild(NodeTree<T>* child) {
for (int i = 0; i < children.size(); i++) { for (int i = 0; i < children.size(); i++) {
@@ -207,6 +231,11 @@ T NodeTree<T>::getData() const {
return data; return data;
} }
template<class T>
T* NodeTree<T>::getDataRef() {
return &data;
}
template<class T> template<class T>
void NodeTree<T>::setData(T data) { void NodeTree<T>::setData(T data) {
this->data = data; this->data = data;

View File

@@ -49,10 +49,9 @@ class Parser {
std::vector< State* > stateSets; std::vector< State* > stateSets;
//The EOFSymbol, a pointer because of use in table, etc
Symbol EOFSymbol; Symbol EOFSymbol;
//The nullSymbol, ditto with above. Also used in comparisons
Symbol nullSymbol; Symbol nullSymbol;
Symbol invalidSymbol;
Table table; Table table;

View File

@@ -23,7 +23,7 @@ function_list = function_list WS function | function ;
function = type WS identifier WS "\(" WS opt_typed_parameter_list WS "\)" WS code_block ; function = type WS identifier WS "\(" WS opt_typed_parameter_list WS "\)" WS code_block ;
opt_typed_parameter_list = typed_parameter_list | ; opt_typed_parameter_list = typed_parameter_list | ;
typed_parameter_list = typed_parameter_list WS typed_parameter | typed_parameter ; typed_parameter_list = typed_parameter_list WS "," WS typed_parameter | typed_parameter ;
typed_parameter = type WS parameter ; typed_parameter = type WS parameter ;
opt_parameter_list = parameter_list | ; opt_parameter_list = parameter_list | ;
@@ -32,7 +32,7 @@ parameter = expression ;
code_block = "{" WS statement_list WS "}" ; code_block = "{" WS statement_list WS "}" ;
statement_list = statement_list WS statement | statement ; statement_list = statement_list WS statement | statement ;
statement = if_statement | return_statement | expression WS ";" | boolean_expression WS ";" | assignment_statement WS ";" | code_block ; statement = if_statement | return_statement | expression WS ";" | boolean_expression WS ";" | assignment_statement WS ";" | declaration_statement WS ";" | code_block ;
function_call = scope identifier "\(" WS opt_parameter_list WS "\)" ; function_call = scope identifier "\(" WS opt_parameter_list WS "\)" ;
scope = scope identifier "::" | ; scope = scope identifier "::" | ;
@@ -50,6 +50,7 @@ factor = number | identifier | function_call | bool | string ;
number = integer | float | double ; number = integer | float | double ;
assignment_statement = identifier WS "=" WS expression ; assignment_statement = identifier WS "=" WS expression ;
declaration_statement = type WS identifier WS "=" WS expression ;
alphanumeric = alphanumeric numeric | alphanumeric alpha | numeric | alpha ; alphanumeric = alphanumeric numeric | alphanumeric alpha | numeric | alpha ;
hexadecimal = "0x(1|2|3|4|5|6|7|8|9|a|b|c|d|e|f)+" ; hexadecimal = "0x(1|2|3|4|5|6|7|8|9|a|b|c|d|e|f)+" ;

View File

@@ -14,6 +14,7 @@
#include "CollapseTransformation.h" #include "CollapseTransformation.h"
#include "ASTTransformation.h" #include "ASTTransformation.h"
#include "ASTData.h" #include "ASTData.h"
#include "CGenerator.h"
int main(int argc, char* argv[]) { int main(int argc, char* argv[]) {
@@ -25,7 +26,7 @@ int main(int argc, char* argv[]) {
} }
std::ifstream programInFile, grammerInFile; std::ifstream programInFile, grammerInFile;
std::ofstream outFile, outFileTransformed, outFileAST; std::ofstream outFile, outFileTransformed, outFileAST, outFileC;
programInFile.open(argv[1]); programInFile.open(argv[1]);
if (!programInFile.is_open()) { if (!programInFile.is_open()) {
@@ -57,6 +58,11 @@ int main(int argc, char* argv[]) {
return(1); return(1);
} }
outFileC.open((std::string(argv[3]) + ".c").c_str());
if (!outFileC.is_open()) {
std::cout << "Probelm opening third output file " << std::string(argv[3]) + ".c" << "\n";
return(1);
}
//Read the input file into a string //Read the input file into a string
std::string programInputFileString, grammerInputFileString; std::string programInputFileString, grammerInputFileString;
std::string line; std::string line;
@@ -64,11 +70,13 @@ int main(int argc, char* argv[]) {
getline(grammerInFile, line); getline(grammerInFile, line);
grammerInputFileString.append(line+"\n"); grammerInputFileString.append(line+"\n");
} }
grammerInFile.close();
while(programInFile.good()) { while(programInFile.good()) {
getline(programInFile, line); getline(programInFile, line);
programInputFileString.append(line+"\n"); programInputFileString.append(line+"\n");
} }
programInFile.close();
//LALRParser parser; //LALRParser parser;
RNGLRParser parser; RNGLRParser parser;
@@ -101,6 +109,7 @@ int main(int argc, char* argv[]) {
} else { } else {
std::cout << "ParseTree returned from parser is NULL!" << std::endl; std::cout << "ParseTree returned from parser is NULL!" << std::endl;
} }
outFile.close();
//Pre AST Transformations //Pre AST Transformations
std::vector<NodeTransformation<Symbol, Symbol>*> preASTTransforms; std::vector<NodeTransformation<Symbol, Symbol>*> preASTTransforms;
@@ -124,6 +133,7 @@ int main(int argc, char* argv[]) {
preASTTransforms.push_back(new CollapseTransformation<Symbol>(Symbol("function_list", false))); preASTTransforms.push_back(new CollapseTransformation<Symbol>(Symbol("function_list", false)));
preASTTransforms.push_back(new CollapseTransformation<Symbol>(Symbol("statement_list", false))); preASTTransforms.push_back(new CollapseTransformation<Symbol>(Symbol("statement_list", false)));
preASTTransforms.push_back(new CollapseTransformation<Symbol>(Symbol("parameter_list", false))); preASTTransforms.push_back(new CollapseTransformation<Symbol>(Symbol("parameter_list", false)));
preASTTransforms.push_back(new CollapseTransformation<Symbol>(Symbol("typed_parameter_list", false)));
for (int i = 0; i < preASTTransforms.size(); i++) { for (int i = 0; i < preASTTransforms.size(); i++) {
parseTree = preASTTransforms[i]->transform(parseTree); parseTree = preASTTransforms[i]->transform(parseTree);
@@ -138,20 +148,24 @@ int main(int argc, char* argv[]) {
} else { } else {
std::cout << "Tree returned from transformation is NULL!" << std::endl; std::cout << "Tree returned from transformation is NULL!" << std::endl;
} }
outFileTransformed.close();
if (AST) { if (AST) {
outFileAST << AST->DOTGraphString() << std::endl; outFileAST << AST->DOTGraphString() << std::endl;
} else { } else {
std::cout << "Tree returned from ASTTransformation is NULL!" << std::endl; std::cout << "Tree returned from ASTTransformation is NULL!" << std::endl;
} }
programInFile.close();
grammerInFile.close();
outFile.close();
outFileTransformed.close();
outFileAST.close(); outFileAST.close();
//Do type checking, scope creation, etc. here.
//None at this time, instead going strait to C in this first (more naive) version
//Code generation
//For right now, just C
std::string c_code = CGenerator().generate(AST);
outFileC << c_code << std::endl;
outFileC.close();
return(0); return(0);
} }

View File

@@ -43,19 +43,19 @@ std::string ASTData::ValueTypeToString(ValueType type) {
return "none"; return "none";
break; break;
case boolean: case boolean:
return "boolean"; return "bool";
break; break;
case integer: case integer:
return "integer"; return "int";
break; break;
case floating: case floating:
return "floating"; return "float";
break; break;
case double_percision: case double_percision:
return "double_percision"; return "double";
break; break;
case char_string: case char_string:
return "char_string"; return "string";
break; break;
default: default:
return "unknown_ValueType"; return "unknown_ValueType";
@@ -103,6 +103,9 @@ std::string ASTData::ASTTypeToString(ASTType type) {
case assignment_statement: case assignment_statement:
return "assignment_statement"; return "assignment_statement";
break; break;
case declaration_statement:
return "declaration_statement";
break;
case function_call: case function_call:
return "function_call"; return "function_call";
break; break;

View File

@@ -31,7 +31,9 @@ NodeTree<ASTData>* ASTTransformation::transform(NodeTree<Symbol>* from) {
} else if (name == "code_block") { } else if (name == "code_block") {
newNode = new NodeTree<ASTData>(name, ASTData(code_block)); newNode = new NodeTree<ASTData>(name, ASTData(code_block));
} else if (name == "typed_parameter") { } else if (name == "typed_parameter") {
newNode = new NodeTree<ASTData>(name, ASTData(typed_parameter)); newNode = transform(children[1]); //Transform to get the identifier
newNode->getDataRef()->valueType = ASTData::strToType(concatSymbolTree(children[0])); //Get the type (left child) and set our new identifer to be that type
return newNode;
} else if (name == "expression") { } else if (name == "expression") {
//If this is an actual part of an expression, not just a premoted term //If this is an actual part of an expression, not just a premoted term
if (children.size() > 1) { if (children.size() > 1) {
@@ -62,6 +64,13 @@ NodeTree<ASTData>* ASTTransformation::transform(NodeTree<Symbol>* from) {
newNode = new NodeTree<ASTData>(name, ASTData(return_statement)); newNode = new NodeTree<ASTData>(name, ASTData(return_statement));
} else if (name == "assignment_statement") { } else if (name == "assignment_statement") {
newNode = new NodeTree<ASTData>(name, ASTData(assignment_statement)); newNode = new NodeTree<ASTData>(name, ASTData(assignment_statement));
} else if (name == "declaration_statement") {
newNode = new NodeTree<ASTData>(name, ASTData(declaration_statement));
NodeTree<ASTData>* newIdentifier = transform(children[1]); //Transform the identifier
newIdentifier->getDataRef()->valueType = ASTData::strToType(concatSymbolTree(children[0]));//set the type of the identifier
newNode->addChild(newIdentifier);
skipChildren.insert(0); //These, the type and the identifier, have been taken care of.
skipChildren.insert(1);
} else if (name == "function_call") { } else if (name == "function_call") {
//children[0] is scope //children[0] is scope
std::string functionCallName = concatSymbolTree(children[1]); std::string functionCallName = concatSymbolTree(children[1]);
@@ -89,7 +98,7 @@ NodeTree<ASTData>* ASTTransformation::transform(NodeTree<Symbol>* from) {
for (int i = 0; i < children.size(); i++) { for (int i = 0; i < children.size(); i++) {
if (skipChildren.find(i) == skipChildren.end()) { if (skipChildren.find(i) == skipChildren.end()) {
NodeTree<ASTData>* transChild = transform(children[i]); NodeTree<ASTData>* transChild = transform(children[i]);
if (transChild->getData().type) if (transChild->getData().type) //Only add the children that have a real ASTData::ASTType, that is, legit ASTData.
newNode->addChild(transChild); newNode->addChild(transChild);
else else
delete transChild; delete transChild;

125
src/CGenerator.cpp Normal file
View File

@@ -0,0 +1,125 @@
#include "CGenerator.h"
CGenerator::CGenerator() {
tabLevel = 0;
}
CGenerator::~CGenerator() {
}
std::string CGenerator::tabs() {
std::string returnTabs;
for (int i = 0; i < tabLevel; i++)
returnTabs += "\t";
return returnTabs;
}
std::string CGenerator::generate(NodeTree<ASTData>* from) {
ASTData data = from->getData();
std::vector<NodeTree<ASTData>*> children = from->getChildren();
std::string output = "";
switch (data.type) {
case translation_unit:
//Do nothing
break;
case interpreter_directive:
//Do nothing
break;
case import:
return "#include \"" + data.symbol.getName() + "\"\n";
break;
case identifier:
return data.symbol.getName();
break;
case function:
output += "\n" + ValueTypeToCType(data.valueType) + " " + data.symbol.getName() + "(";
for (int i = 0; i < children.size()-1; i++) {
if (i > 0)
output += ", ";
output += ASTData::ValueTypeToString(children[i]->getData().valueType) + " " + generate(children[i]);
}
output+= ")\n" + generate(children[children.size()-1]);
return output;
break;
case code_block:
output += tabs() + "{\n";
tabLevel++;
for (int i = 0; i < children.size(); i++)
output += generate(children[i]);
tabLevel--;
output += tabs() + "}";
return output;
break;
case expression:
output += " " + data.symbol.getName() + ", ";
break;
case boolean_expression:
output += " " + data.symbol.getName() + " ";
break;
case statement:
return tabs() + generate(children[0]) + ";\n";
break;
case if_statement:
output += "if (" + generate(children[0]) + ") \n" + generate(children[1]);
if (children.size() > 2)
output += " else " + generate(children[2]);
return output;
break;
case return_statement:
return "return " + generate(children[0]);
case assignment_statement:
return generate(children[0]) + " = " + generate(children[1]);
case declaration_statement:
return ASTData::ValueTypeToString(children[0]->getData().valueType) + " " + generate(children[0]) + " = " + generate(children[1]);
case function_call:
{
//Handle operators specially for now. Will later replace with
//Inlined functions in the standard library
std::string name = data.symbol.getName();
if (name == "+" || name == "-" || name == "*" || name == "/") {
return "((" + generate(children[0]) + ")" + name + "(" + generate(children[1]) + "))";
}
output += data.symbol.getName() + "(";
for (int i = 0; i < children.size(); i++)
if (i < children.size()-1)
output += generate(children[i]) + ", ";
else output += generate(children[i]);
output += ") ";
return output;
}
case value:
return data.symbol.getName();
default:
std::cout << "Nothing!" << std::endl;
}
for (int i = 0; i < children.size(); i++)
output += generate(children[i]);
return output;
}
std::string CGenerator::ValueTypeToCType(ValueType type) {
switch (type) {
case none:
return "none";
break;
case boolean:
return "bool";
break;
case integer:
return "int";
break;
case floating:
return "float";
break;
case double_percision:
return "double";
break;
case char_string:
return "char*";
break;
default:
return "unknown_ValueType";
}
}

View File

@@ -45,9 +45,9 @@ Symbol Lexer::next() {
//std::cout << "Current at is \"" << input.substr(currentPosition) << "\" currentPos is " << currentPosition <<std::endl; //std::cout << "Current at is \"" << input.substr(currentPosition) << "\" currentPos is " << currentPosition <<std::endl;
return Symbol(longestRegEx->getPattern(), true, eatenString); return Symbol(longestRegEx->getPattern(), true, eatenString);
} else { } else {
//std::cout << "Found no applicable regex" << std::endl; std::cout << "Found no applicable regex" << std::endl;
//std::cout << "Remaining is ||" << input.substr(currentPosition) << "||" << std::endl; std::cout << "Remaining is ||" << input.substr(currentPosition) << "||" << std::endl;
return Symbol(); return Symbol("$INVALID$", true);
} }
} }

View File

@@ -1,6 +1,6 @@
#include "Parser.h" #include "Parser.h"
Parser::Parser() : EOFSymbol("$EOF$", true), nullSymbol("$NULL$", true){ Parser::Parser() : EOFSymbol("$EOF$", true), nullSymbol("$NULL$", true), invalidSymbol("$INVALID$", true){
table.setSymbols(EOFSymbol, nullSymbol); table.setSymbols(EOFSymbol, nullSymbol);
} }

View File

@@ -34,6 +34,11 @@ NodeTree<Symbol>* RNGLRParser::parseInput(std::string inputString) {
input.push_back(currentToken); input.push_back(currentToken);
while (currentToken != EOFSymbol) { while (currentToken != EOFSymbol) {
currentToken = lexer.next(); currentToken = lexer.next();
//std::cout << "CurrentToken is " << currentToken.toString() << std::endl;
if (currentToken == invalidSymbol) {
std::cout << "Invalid Symbol!" << std::endl;
throw "Invalid Symbol, cannot lex";
}
input.push_back(currentToken); input.push_back(currentToken);
} }