Lot's of work on a CGenerator, AST and language improvements

This commit is contained in:
Nathan Braswell
2013-11-01 02:52:18 -04:00
parent ededb069c1
commit 77f2b0a3e5
14 changed files with 232 additions and 28 deletions

View File

@@ -4,7 +4,7 @@ project(Kraken)
set( MY_INCLUDES ${PROJECT_SOURCE_DIR}/include)
set( MY_SOURCES main.cpp src/Parser.cpp src/LALRParser.cpp src/GraphStructuredStack.cpp src/RNGLRParser.cpp src/ParseAction.cpp src/ParseRule.cpp src/Symbol.cpp src/StringReader.cpp src/State.cpp src/util.cpp src/Lexer.cpp src/RegEx.cpp src/RegExState.cpp src/Table.cpp src/ASTData.cpp src/ASTTransformation.cpp )
set( MY_SOURCES main.cpp src/Parser.cpp src/LALRParser.cpp src/GraphStructuredStack.cpp src/RNGLRParser.cpp src/ParseAction.cpp src/ParseRule.cpp src/Symbol.cpp src/StringReader.cpp src/State.cpp src/util.cpp src/Lexer.cpp src/RegEx.cpp src/RegExState.cpp src/Table.cpp src/ASTData.cpp src/ASTTransformation.cpp src/CGenerator.cpp )
include_directories( ${MY_INCLUDES} )

View File

@@ -12,8 +12,8 @@
enum ASTType {undef, translation_unit, interpreter_directive, import, identifier,
function, code_block,
typed_parameter, expression, boolean_expression, statement,
if_statement, return_statement, assignment_statement, function_call,
value};
if_statement, return_statement, assignment_statement, declaration_statement,
function_call, value};
enum ValueType {none, boolean, integer, floating, double_percision, char_string };

21
include/CGenerator.h Normal file
View File

@@ -0,0 +1,21 @@
#ifndef CGENERATOR_H
#define CGENERATOR_H
#include <string>
#include <iostream>
#include "NodeTree.h"
#include "ASTData.h"
class CGenerator {
public:
CGenerator();
~CGenerator();
std::string generate(NodeTree<ASTData>* from);
static std::string ValueTypeToCType(ValueType type);
private:
std::string tabs();
int tabLevel;
};
#endif

View File

@@ -41,10 +41,8 @@ NodeTree<T>* CollapseTransformation<T>::transform(NodeTree<T>* from) {
if (children[i]->getData() == toCollapse) {
node->removeChild(children[i]);
std::vector<NodeTree<T>*> newChildren = children[i]->getChildren();
node->addChildren(newChildren);
node->insertChildren(i,newChildren);
toProcess.push(node); //Do this node again
// for (int j = 0; j < newChildren.size(); j++)
// toProcess.push(newChildren[j]);
}
else
toProcess.push(children[i]);

View File

@@ -27,8 +27,11 @@ class NodeTree {
std::vector<NodeTree<T>*> getParents();
void addChild(NodeTree<T>* child);
void insertChild(int i, NodeTree<T>* child);
void addChildren(std::vector<NodeTree<T>*>* children);
void addChildren(std::vector<NodeTree<T>*> children);
void insertChildren(int index, std::vector<NodeTree<T>*>* children);
void insertChildren(int index, std::vector<NodeTree<T>*> children);
int findChild(NodeTree<T>* child);
void removeChild(NodeTree<T>* child);
void removeChild(int index);
@@ -40,6 +43,7 @@ class NodeTree {
void setName(std::string);
T getData() const;
T* getDataRef();
void setData(T data);
int size();
@@ -130,6 +134,14 @@ void NodeTree<T>::addChild(NodeTree<T>* child) {
children.push_back(child);
}
template<class T>
void NodeTree<T>::insertChild(int i, NodeTree<T>* child) {
if (!child)
throw "Help, NULL child";
if (findChild(child) == -1)
children.insert(children.begin()+i,child);
}
template<class T>
void NodeTree<T>::addChildren(std::vector<NodeTree<T>*>* children) {
for (typename std::vector<NodeTree<T>*>::size_type i = 0; i < children->size(); i++)
@@ -142,6 +154,18 @@ void NodeTree<T>::addChildren(std::vector<NodeTree<T>*> children) {
addChild(children[i]);
}
template<class T>
void NodeTree<T>::insertChildren(int index, std::vector<NodeTree<T>*>* children) {
for (typename std::vector<NodeTree<T>*>::size_type i = 0; i < children->size(); i++)
insertChild(index+i,(*children)[i]);
}
template<class T>
void NodeTree<T>::insertChildren(int index, std::vector<NodeTree<T>*> children) {
for (typename std::vector<NodeTree<T>*>::size_type i = 0; i < children.size(); i++)
insertChild(index+i, children[i]);
}
template<class T>
int NodeTree<T>::findChild(NodeTree<T>* child) {
for (int i = 0; i < children.size(); i++) {
@@ -207,6 +231,11 @@ T NodeTree<T>::getData() const {
return data;
}
template<class T>
T* NodeTree<T>::getDataRef() {
return &data;
}
template<class T>
void NodeTree<T>::setData(T data) {
this->data = data;

View File

@@ -49,10 +49,9 @@ class Parser {
std::vector< State* > stateSets;
//The EOFSymbol, a pointer because of use in table, etc
Symbol EOFSymbol;
//The nullSymbol, ditto with above. Also used in comparisons
Symbol nullSymbol;
Symbol invalidSymbol;
Table table;

View File

@@ -23,7 +23,7 @@ function_list = function_list WS function | function ;
function = type WS identifier WS "\(" WS opt_typed_parameter_list WS "\)" WS code_block ;
opt_typed_parameter_list = typed_parameter_list | ;
typed_parameter_list = typed_parameter_list WS typed_parameter | typed_parameter ;
typed_parameter_list = typed_parameter_list WS "," WS typed_parameter | typed_parameter ;
typed_parameter = type WS parameter ;
opt_parameter_list = parameter_list | ;
@@ -32,7 +32,7 @@ parameter = expression ;
code_block = "{" WS statement_list WS "}" ;
statement_list = statement_list WS statement | statement ;
statement = if_statement | return_statement | expression WS ";" | boolean_expression WS ";" | assignment_statement WS ";" | code_block ;
statement = if_statement | return_statement | expression WS ";" | boolean_expression WS ";" | assignment_statement WS ";" | declaration_statement WS ";" | code_block ;
function_call = scope identifier "\(" WS opt_parameter_list WS "\)" ;
scope = scope identifier "::" | ;
@@ -50,6 +50,7 @@ factor = number | identifier | function_call | bool | string ;
number = integer | float | double ;
assignment_statement = identifier WS "=" WS expression ;
declaration_statement = type WS identifier WS "=" WS expression ;
alphanumeric = alphanumeric numeric | alphanumeric alpha | numeric | alpha ;
hexadecimal = "0x(1|2|3|4|5|6|7|8|9|a|b|c|d|e|f)+" ;

View File

@@ -14,6 +14,7 @@
#include "CollapseTransformation.h"
#include "ASTTransformation.h"
#include "ASTData.h"
#include "CGenerator.h"
int main(int argc, char* argv[]) {
@@ -25,7 +26,7 @@ int main(int argc, char* argv[]) {
}
std::ifstream programInFile, grammerInFile;
std::ofstream outFile, outFileTransformed, outFileAST;
std::ofstream outFile, outFileTransformed, outFileAST, outFileC;
programInFile.open(argv[1]);
if (!programInFile.is_open()) {
@@ -57,6 +58,11 @@ int main(int argc, char* argv[]) {
return(1);
}
outFileC.open((std::string(argv[3]) + ".c").c_str());
if (!outFileC.is_open()) {
std::cout << "Probelm opening third output file " << std::string(argv[3]) + ".c" << "\n";
return(1);
}
//Read the input file into a string
std::string programInputFileString, grammerInputFileString;
std::string line;
@@ -64,11 +70,13 @@ int main(int argc, char* argv[]) {
getline(grammerInFile, line);
grammerInputFileString.append(line+"\n");
}
grammerInFile.close();
while(programInFile.good()) {
getline(programInFile, line);
programInputFileString.append(line+"\n");
}
programInFile.close();
//LALRParser parser;
RNGLRParser parser;
@@ -101,6 +109,7 @@ int main(int argc, char* argv[]) {
} else {
std::cout << "ParseTree returned from parser is NULL!" << std::endl;
}
outFile.close();
//Pre AST Transformations
std::vector<NodeTransformation<Symbol, Symbol>*> preASTTransforms;
@@ -124,6 +133,7 @@ int main(int argc, char* argv[]) {
preASTTransforms.push_back(new CollapseTransformation<Symbol>(Symbol("function_list", false)));
preASTTransforms.push_back(new CollapseTransformation<Symbol>(Symbol("statement_list", false)));
preASTTransforms.push_back(new CollapseTransformation<Symbol>(Symbol("parameter_list", false)));
preASTTransforms.push_back(new CollapseTransformation<Symbol>(Symbol("typed_parameter_list", false)));
for (int i = 0; i < preASTTransforms.size(); i++) {
parseTree = preASTTransforms[i]->transform(parseTree);
@@ -138,20 +148,24 @@ int main(int argc, char* argv[]) {
} else {
std::cout << "Tree returned from transformation is NULL!" << std::endl;
}
outFileTransformed.close();
if (AST) {
outFileAST << AST->DOTGraphString() << std::endl;
} else {
std::cout << "Tree returned from ASTTransformation is NULL!" << std::endl;
}
programInFile.close();
grammerInFile.close();
outFile.close();
outFileTransformed.close();
outFileAST.close();
//Do type checking, scope creation, etc. here.
//None at this time, instead going strait to C in this first (more naive) version
//Code generation
//For right now, just C
std::string c_code = CGenerator().generate(AST);
outFileC << c_code << std::endl;
outFileC.close();
return(0);
}

View File

@@ -43,19 +43,19 @@ std::string ASTData::ValueTypeToString(ValueType type) {
return "none";
break;
case boolean:
return "boolean";
return "bool";
break;
case integer:
return "integer";
return "int";
break;
case floating:
return "floating";
return "float";
break;
case double_percision:
return "double_percision";
return "double";
break;
case char_string:
return "char_string";
return "string";
break;
default:
return "unknown_ValueType";
@@ -103,6 +103,9 @@ std::string ASTData::ASTTypeToString(ASTType type) {
case assignment_statement:
return "assignment_statement";
break;
case declaration_statement:
return "declaration_statement";
break;
case function_call:
return "function_call";
break;

View File

@@ -31,7 +31,9 @@ NodeTree<ASTData>* ASTTransformation::transform(NodeTree<Symbol>* from) {
} else if (name == "code_block") {
newNode = new NodeTree<ASTData>(name, ASTData(code_block));
} else if (name == "typed_parameter") {
newNode = new NodeTree<ASTData>(name, ASTData(typed_parameter));
newNode = transform(children[1]); //Transform to get the identifier
newNode->getDataRef()->valueType = ASTData::strToType(concatSymbolTree(children[0])); //Get the type (left child) and set our new identifer to be that type
return newNode;
} else if (name == "expression") {
//If this is an actual part of an expression, not just a premoted term
if (children.size() > 1) {
@@ -62,6 +64,13 @@ NodeTree<ASTData>* ASTTransformation::transform(NodeTree<Symbol>* from) {
newNode = new NodeTree<ASTData>(name, ASTData(return_statement));
} else if (name == "assignment_statement") {
newNode = new NodeTree<ASTData>(name, ASTData(assignment_statement));
} else if (name == "declaration_statement") {
newNode = new NodeTree<ASTData>(name, ASTData(declaration_statement));
NodeTree<ASTData>* newIdentifier = transform(children[1]); //Transform the identifier
newIdentifier->getDataRef()->valueType = ASTData::strToType(concatSymbolTree(children[0]));//set the type of the identifier
newNode->addChild(newIdentifier);
skipChildren.insert(0); //These, the type and the identifier, have been taken care of.
skipChildren.insert(1);
} else if (name == "function_call") {
//children[0] is scope
std::string functionCallName = concatSymbolTree(children[1]);
@@ -89,7 +98,7 @@ NodeTree<ASTData>* ASTTransformation::transform(NodeTree<Symbol>* from) {
for (int i = 0; i < children.size(); i++) {
if (skipChildren.find(i) == skipChildren.end()) {
NodeTree<ASTData>* transChild = transform(children[i]);
if (transChild->getData().type)
if (transChild->getData().type) //Only add the children that have a real ASTData::ASTType, that is, legit ASTData.
newNode->addChild(transChild);
else
delete transChild;

125
src/CGenerator.cpp Normal file
View File

@@ -0,0 +1,125 @@
#include "CGenerator.h"
CGenerator::CGenerator() {
tabLevel = 0;
}
CGenerator::~CGenerator() {
}
std::string CGenerator::tabs() {
std::string returnTabs;
for (int i = 0; i < tabLevel; i++)
returnTabs += "\t";
return returnTabs;
}
std::string CGenerator::generate(NodeTree<ASTData>* from) {
ASTData data = from->getData();
std::vector<NodeTree<ASTData>*> children = from->getChildren();
std::string output = "";
switch (data.type) {
case translation_unit:
//Do nothing
break;
case interpreter_directive:
//Do nothing
break;
case import:
return "#include \"" + data.symbol.getName() + "\"\n";
break;
case identifier:
return data.symbol.getName();
break;
case function:
output += "\n" + ValueTypeToCType(data.valueType) + " " + data.symbol.getName() + "(";
for (int i = 0; i < children.size()-1; i++) {
if (i > 0)
output += ", ";
output += ASTData::ValueTypeToString(children[i]->getData().valueType) + " " + generate(children[i]);
}
output+= ")\n" + generate(children[children.size()-1]);
return output;
break;
case code_block:
output += tabs() + "{\n";
tabLevel++;
for (int i = 0; i < children.size(); i++)
output += generate(children[i]);
tabLevel--;
output += tabs() + "}";
return output;
break;
case expression:
output += " " + data.symbol.getName() + ", ";
break;
case boolean_expression:
output += " " + data.symbol.getName() + " ";
break;
case statement:
return tabs() + generate(children[0]) + ";\n";
break;
case if_statement:
output += "if (" + generate(children[0]) + ") \n" + generate(children[1]);
if (children.size() > 2)
output += " else " + generate(children[2]);
return output;
break;
case return_statement:
return "return " + generate(children[0]);
case assignment_statement:
return generate(children[0]) + " = " + generate(children[1]);
case declaration_statement:
return ASTData::ValueTypeToString(children[0]->getData().valueType) + " " + generate(children[0]) + " = " + generate(children[1]);
case function_call:
{
//Handle operators specially for now. Will later replace with
//Inlined functions in the standard library
std::string name = data.symbol.getName();
if (name == "+" || name == "-" || name == "*" || name == "/") {
return "((" + generate(children[0]) + ")" + name + "(" + generate(children[1]) + "))";
}
output += data.symbol.getName() + "(";
for (int i = 0; i < children.size(); i++)
if (i < children.size()-1)
output += generate(children[i]) + ", ";
else output += generate(children[i]);
output += ") ";
return output;
}
case value:
return data.symbol.getName();
default:
std::cout << "Nothing!" << std::endl;
}
for (int i = 0; i < children.size(); i++)
output += generate(children[i]);
return output;
}
std::string CGenerator::ValueTypeToCType(ValueType type) {
switch (type) {
case none:
return "none";
break;
case boolean:
return "bool";
break;
case integer:
return "int";
break;
case floating:
return "float";
break;
case double_percision:
return "double";
break;
case char_string:
return "char*";
break;
default:
return "unknown_ValueType";
}
}

View File

@@ -45,9 +45,9 @@ Symbol Lexer::next() {
//std::cout << "Current at is \"" << input.substr(currentPosition) << "\" currentPos is " << currentPosition <<std::endl;
return Symbol(longestRegEx->getPattern(), true, eatenString);
} else {
//std::cout << "Found no applicable regex" << std::endl;
//std::cout << "Remaining is ||" << input.substr(currentPosition) << "||" << std::endl;
return Symbol();
std::cout << "Found no applicable regex" << std::endl;
std::cout << "Remaining is ||" << input.substr(currentPosition) << "||" << std::endl;
return Symbol("$INVALID$", true);
}
}

View File

@@ -1,6 +1,6 @@
#include "Parser.h"
Parser::Parser() : EOFSymbol("$EOF$", true), nullSymbol("$NULL$", true){
Parser::Parser() : EOFSymbol("$EOF$", true), nullSymbol("$NULL$", true), invalidSymbol("$INVALID$", true){
table.setSymbols(EOFSymbol, nullSymbol);
}

View File

@@ -34,6 +34,11 @@ NodeTree<Symbol>* RNGLRParser::parseInput(std::string inputString) {
input.push_back(currentToken);
while (currentToken != EOFSymbol) {
currentToken = lexer.next();
//std::cout << "CurrentToken is " << currentToken.toString() << std::endl;
if (currentToken == invalidSymbol) {
std::cout << "Invalid Symbol!" << std::endl;
throw "Invalid Symbol, cannot lex";
}
input.push_back(currentToken);
}