Lot's of work on a CGenerator, AST and language improvements
This commit is contained in:
@@ -4,7 +4,7 @@ project(Kraken)
|
||||
|
||||
set( MY_INCLUDES ${PROJECT_SOURCE_DIR}/include)
|
||||
|
||||
set( MY_SOURCES main.cpp src/Parser.cpp src/LALRParser.cpp src/GraphStructuredStack.cpp src/RNGLRParser.cpp src/ParseAction.cpp src/ParseRule.cpp src/Symbol.cpp src/StringReader.cpp src/State.cpp src/util.cpp src/Lexer.cpp src/RegEx.cpp src/RegExState.cpp src/Table.cpp src/ASTData.cpp src/ASTTransformation.cpp )
|
||||
set( MY_SOURCES main.cpp src/Parser.cpp src/LALRParser.cpp src/GraphStructuredStack.cpp src/RNGLRParser.cpp src/ParseAction.cpp src/ParseRule.cpp src/Symbol.cpp src/StringReader.cpp src/State.cpp src/util.cpp src/Lexer.cpp src/RegEx.cpp src/RegExState.cpp src/Table.cpp src/ASTData.cpp src/ASTTransformation.cpp src/CGenerator.cpp )
|
||||
|
||||
include_directories( ${MY_INCLUDES} )
|
||||
|
||||
|
||||
@@ -12,8 +12,8 @@
|
||||
enum ASTType {undef, translation_unit, interpreter_directive, import, identifier,
|
||||
function, code_block,
|
||||
typed_parameter, expression, boolean_expression, statement,
|
||||
if_statement, return_statement, assignment_statement, function_call,
|
||||
value};
|
||||
if_statement, return_statement, assignment_statement, declaration_statement,
|
||||
function_call, value};
|
||||
enum ValueType {none, boolean, integer, floating, double_percision, char_string };
|
||||
|
||||
|
||||
|
||||
21
include/CGenerator.h
Normal file
21
include/CGenerator.h
Normal file
@@ -0,0 +1,21 @@
|
||||
#ifndef CGENERATOR_H
|
||||
#define CGENERATOR_H
|
||||
|
||||
#include <string>
|
||||
#include <iostream>
|
||||
|
||||
#include "NodeTree.h"
|
||||
#include "ASTData.h"
|
||||
|
||||
|
||||
class CGenerator {
|
||||
public:
|
||||
CGenerator();
|
||||
~CGenerator();
|
||||
std::string generate(NodeTree<ASTData>* from);
|
||||
static std::string ValueTypeToCType(ValueType type);
|
||||
private:
|
||||
std::string tabs();
|
||||
int tabLevel;
|
||||
};
|
||||
#endif
|
||||
@@ -41,10 +41,8 @@ NodeTree<T>* CollapseTransformation<T>::transform(NodeTree<T>* from) {
|
||||
if (children[i]->getData() == toCollapse) {
|
||||
node->removeChild(children[i]);
|
||||
std::vector<NodeTree<T>*> newChildren = children[i]->getChildren();
|
||||
node->addChildren(newChildren);
|
||||
node->insertChildren(i,newChildren);
|
||||
toProcess.push(node); //Do this node again
|
||||
// for (int j = 0; j < newChildren.size(); j++)
|
||||
// toProcess.push(newChildren[j]);
|
||||
}
|
||||
else
|
||||
toProcess.push(children[i]);
|
||||
|
||||
@@ -27,8 +27,11 @@ class NodeTree {
|
||||
std::vector<NodeTree<T>*> getParents();
|
||||
|
||||
void addChild(NodeTree<T>* child);
|
||||
void insertChild(int i, NodeTree<T>* child);
|
||||
void addChildren(std::vector<NodeTree<T>*>* children);
|
||||
void addChildren(std::vector<NodeTree<T>*> children);
|
||||
void insertChildren(int index, std::vector<NodeTree<T>*>* children);
|
||||
void insertChildren(int index, std::vector<NodeTree<T>*> children);
|
||||
int findChild(NodeTree<T>* child);
|
||||
void removeChild(NodeTree<T>* child);
|
||||
void removeChild(int index);
|
||||
@@ -40,6 +43,7 @@ class NodeTree {
|
||||
void setName(std::string);
|
||||
|
||||
T getData() const;
|
||||
T* getDataRef();
|
||||
void setData(T data);
|
||||
|
||||
int size();
|
||||
@@ -130,6 +134,14 @@ void NodeTree<T>::addChild(NodeTree<T>* child) {
|
||||
children.push_back(child);
|
||||
}
|
||||
|
||||
template<class T>
|
||||
void NodeTree<T>::insertChild(int i, NodeTree<T>* child) {
|
||||
if (!child)
|
||||
throw "Help, NULL child";
|
||||
if (findChild(child) == -1)
|
||||
children.insert(children.begin()+i,child);
|
||||
}
|
||||
|
||||
template<class T>
|
||||
void NodeTree<T>::addChildren(std::vector<NodeTree<T>*>* children) {
|
||||
for (typename std::vector<NodeTree<T>*>::size_type i = 0; i < children->size(); i++)
|
||||
@@ -142,6 +154,18 @@ void NodeTree<T>::addChildren(std::vector<NodeTree<T>*> children) {
|
||||
addChild(children[i]);
|
||||
}
|
||||
|
||||
template<class T>
|
||||
void NodeTree<T>::insertChildren(int index, std::vector<NodeTree<T>*>* children) {
|
||||
for (typename std::vector<NodeTree<T>*>::size_type i = 0; i < children->size(); i++)
|
||||
insertChild(index+i,(*children)[i]);
|
||||
}
|
||||
|
||||
template<class T>
|
||||
void NodeTree<T>::insertChildren(int index, std::vector<NodeTree<T>*> children) {
|
||||
for (typename std::vector<NodeTree<T>*>::size_type i = 0; i < children.size(); i++)
|
||||
insertChild(index+i, children[i]);
|
||||
}
|
||||
|
||||
template<class T>
|
||||
int NodeTree<T>::findChild(NodeTree<T>* child) {
|
||||
for (int i = 0; i < children.size(); i++) {
|
||||
@@ -207,6 +231,11 @@ T NodeTree<T>::getData() const {
|
||||
return data;
|
||||
}
|
||||
|
||||
template<class T>
|
||||
T* NodeTree<T>::getDataRef() {
|
||||
return &data;
|
||||
}
|
||||
|
||||
template<class T>
|
||||
void NodeTree<T>::setData(T data) {
|
||||
this->data = data;
|
||||
|
||||
@@ -49,10 +49,9 @@ class Parser {
|
||||
|
||||
std::vector< State* > stateSets;
|
||||
|
||||
//The EOFSymbol, a pointer because of use in table, etc
|
||||
Symbol EOFSymbol;
|
||||
//The nullSymbol, ditto with above. Also used in comparisons
|
||||
Symbol nullSymbol;
|
||||
Symbol invalidSymbol;
|
||||
|
||||
Table table;
|
||||
|
||||
|
||||
@@ -23,7 +23,7 @@ function_list = function_list WS function | function ;
|
||||
function = type WS identifier WS "\(" WS opt_typed_parameter_list WS "\)" WS code_block ;
|
||||
|
||||
opt_typed_parameter_list = typed_parameter_list | ;
|
||||
typed_parameter_list = typed_parameter_list WS typed_parameter | typed_parameter ;
|
||||
typed_parameter_list = typed_parameter_list WS "," WS typed_parameter | typed_parameter ;
|
||||
typed_parameter = type WS parameter ;
|
||||
|
||||
opt_parameter_list = parameter_list | ;
|
||||
@@ -32,7 +32,7 @@ parameter = expression ;
|
||||
|
||||
code_block = "{" WS statement_list WS "}" ;
|
||||
statement_list = statement_list WS statement | statement ;
|
||||
statement = if_statement | return_statement | expression WS ";" | boolean_expression WS ";" | assignment_statement WS ";" | code_block ;
|
||||
statement = if_statement | return_statement | expression WS ";" | boolean_expression WS ";" | assignment_statement WS ";" | declaration_statement WS ";" | code_block ;
|
||||
function_call = scope identifier "\(" WS opt_parameter_list WS "\)" ;
|
||||
scope = scope identifier "::" | ;
|
||||
|
||||
@@ -50,6 +50,7 @@ factor = number | identifier | function_call | bool | string ;
|
||||
number = integer | float | double ;
|
||||
|
||||
assignment_statement = identifier WS "=" WS expression ;
|
||||
declaration_statement = type WS identifier WS "=" WS expression ;
|
||||
|
||||
alphanumeric = alphanumeric numeric | alphanumeric alpha | numeric | alpha ;
|
||||
hexadecimal = "0x(1|2|3|4|5|6|7|8|9|a|b|c|d|e|f)+" ;
|
||||
|
||||
28
main.cpp
28
main.cpp
@@ -14,6 +14,7 @@
|
||||
#include "CollapseTransformation.h"
|
||||
#include "ASTTransformation.h"
|
||||
#include "ASTData.h"
|
||||
#include "CGenerator.h"
|
||||
|
||||
|
||||
int main(int argc, char* argv[]) {
|
||||
@@ -25,7 +26,7 @@ int main(int argc, char* argv[]) {
|
||||
}
|
||||
|
||||
std::ifstream programInFile, grammerInFile;
|
||||
std::ofstream outFile, outFileTransformed, outFileAST;
|
||||
std::ofstream outFile, outFileTransformed, outFileAST, outFileC;
|
||||
|
||||
programInFile.open(argv[1]);
|
||||
if (!programInFile.is_open()) {
|
||||
@@ -57,6 +58,11 @@ int main(int argc, char* argv[]) {
|
||||
return(1);
|
||||
}
|
||||
|
||||
outFileC.open((std::string(argv[3]) + ".c").c_str());
|
||||
if (!outFileC.is_open()) {
|
||||
std::cout << "Probelm opening third output file " << std::string(argv[3]) + ".c" << "\n";
|
||||
return(1);
|
||||
}
|
||||
//Read the input file into a string
|
||||
std::string programInputFileString, grammerInputFileString;
|
||||
std::string line;
|
||||
@@ -64,11 +70,13 @@ int main(int argc, char* argv[]) {
|
||||
getline(grammerInFile, line);
|
||||
grammerInputFileString.append(line+"\n");
|
||||
}
|
||||
grammerInFile.close();
|
||||
|
||||
while(programInFile.good()) {
|
||||
getline(programInFile, line);
|
||||
programInputFileString.append(line+"\n");
|
||||
}
|
||||
programInFile.close();
|
||||
|
||||
//LALRParser parser;
|
||||
RNGLRParser parser;
|
||||
@@ -101,6 +109,7 @@ int main(int argc, char* argv[]) {
|
||||
} else {
|
||||
std::cout << "ParseTree returned from parser is NULL!" << std::endl;
|
||||
}
|
||||
outFile.close();
|
||||
|
||||
//Pre AST Transformations
|
||||
std::vector<NodeTransformation<Symbol, Symbol>*> preASTTransforms;
|
||||
@@ -124,6 +133,7 @@ int main(int argc, char* argv[]) {
|
||||
preASTTransforms.push_back(new CollapseTransformation<Symbol>(Symbol("function_list", false)));
|
||||
preASTTransforms.push_back(new CollapseTransformation<Symbol>(Symbol("statement_list", false)));
|
||||
preASTTransforms.push_back(new CollapseTransformation<Symbol>(Symbol("parameter_list", false)));
|
||||
preASTTransforms.push_back(new CollapseTransformation<Symbol>(Symbol("typed_parameter_list", false)));
|
||||
|
||||
for (int i = 0; i < preASTTransforms.size(); i++) {
|
||||
parseTree = preASTTransforms[i]->transform(parseTree);
|
||||
@@ -138,20 +148,24 @@ int main(int argc, char* argv[]) {
|
||||
} else {
|
||||
std::cout << "Tree returned from transformation is NULL!" << std::endl;
|
||||
}
|
||||
|
||||
outFileTransformed.close();
|
||||
|
||||
if (AST) {
|
||||
outFileAST << AST->DOTGraphString() << std::endl;
|
||||
} else {
|
||||
std::cout << "Tree returned from ASTTransformation is NULL!" << std::endl;
|
||||
}
|
||||
|
||||
programInFile.close();
|
||||
grammerInFile.close();
|
||||
outFile.close();
|
||||
outFileTransformed.close();
|
||||
outFileAST.close();
|
||||
|
||||
//Do type checking, scope creation, etc. here.
|
||||
//None at this time, instead going strait to C in this first (more naive) version
|
||||
|
||||
//Code generation
|
||||
//For right now, just C
|
||||
std::string c_code = CGenerator().generate(AST);
|
||||
outFileC << c_code << std::endl;
|
||||
outFileC.close();
|
||||
|
||||
return(0);
|
||||
}
|
||||
|
||||
|
||||
@@ -43,19 +43,19 @@ std::string ASTData::ValueTypeToString(ValueType type) {
|
||||
return "none";
|
||||
break;
|
||||
case boolean:
|
||||
return "boolean";
|
||||
return "bool";
|
||||
break;
|
||||
case integer:
|
||||
return "integer";
|
||||
return "int";
|
||||
break;
|
||||
case floating:
|
||||
return "floating";
|
||||
return "float";
|
||||
break;
|
||||
case double_percision:
|
||||
return "double_percision";
|
||||
return "double";
|
||||
break;
|
||||
case char_string:
|
||||
return "char_string";
|
||||
return "string";
|
||||
break;
|
||||
default:
|
||||
return "unknown_ValueType";
|
||||
@@ -103,6 +103,9 @@ std::string ASTData::ASTTypeToString(ASTType type) {
|
||||
case assignment_statement:
|
||||
return "assignment_statement";
|
||||
break;
|
||||
case declaration_statement:
|
||||
return "declaration_statement";
|
||||
break;
|
||||
case function_call:
|
||||
return "function_call";
|
||||
break;
|
||||
|
||||
@@ -31,7 +31,9 @@ NodeTree<ASTData>* ASTTransformation::transform(NodeTree<Symbol>* from) {
|
||||
} else if (name == "code_block") {
|
||||
newNode = new NodeTree<ASTData>(name, ASTData(code_block));
|
||||
} else if (name == "typed_parameter") {
|
||||
newNode = new NodeTree<ASTData>(name, ASTData(typed_parameter));
|
||||
newNode = transform(children[1]); //Transform to get the identifier
|
||||
newNode->getDataRef()->valueType = ASTData::strToType(concatSymbolTree(children[0])); //Get the type (left child) and set our new identifer to be that type
|
||||
return newNode;
|
||||
} else if (name == "expression") {
|
||||
//If this is an actual part of an expression, not just a premoted term
|
||||
if (children.size() > 1) {
|
||||
@@ -62,6 +64,13 @@ NodeTree<ASTData>* ASTTransformation::transform(NodeTree<Symbol>* from) {
|
||||
newNode = new NodeTree<ASTData>(name, ASTData(return_statement));
|
||||
} else if (name == "assignment_statement") {
|
||||
newNode = new NodeTree<ASTData>(name, ASTData(assignment_statement));
|
||||
} else if (name == "declaration_statement") {
|
||||
newNode = new NodeTree<ASTData>(name, ASTData(declaration_statement));
|
||||
NodeTree<ASTData>* newIdentifier = transform(children[1]); //Transform the identifier
|
||||
newIdentifier->getDataRef()->valueType = ASTData::strToType(concatSymbolTree(children[0]));//set the type of the identifier
|
||||
newNode->addChild(newIdentifier);
|
||||
skipChildren.insert(0); //These, the type and the identifier, have been taken care of.
|
||||
skipChildren.insert(1);
|
||||
} else if (name == "function_call") {
|
||||
//children[0] is scope
|
||||
std::string functionCallName = concatSymbolTree(children[1]);
|
||||
@@ -89,7 +98,7 @@ NodeTree<ASTData>* ASTTransformation::transform(NodeTree<Symbol>* from) {
|
||||
for (int i = 0; i < children.size(); i++) {
|
||||
if (skipChildren.find(i) == skipChildren.end()) {
|
||||
NodeTree<ASTData>* transChild = transform(children[i]);
|
||||
if (transChild->getData().type)
|
||||
if (transChild->getData().type) //Only add the children that have a real ASTData::ASTType, that is, legit ASTData.
|
||||
newNode->addChild(transChild);
|
||||
else
|
||||
delete transChild;
|
||||
|
||||
125
src/CGenerator.cpp
Normal file
125
src/CGenerator.cpp
Normal file
@@ -0,0 +1,125 @@
|
||||
#include "CGenerator.h"
|
||||
|
||||
CGenerator::CGenerator() {
|
||||
tabLevel = 0;
|
||||
}
|
||||
CGenerator::~CGenerator() {
|
||||
|
||||
}
|
||||
|
||||
std::string CGenerator::tabs() {
|
||||
std::string returnTabs;
|
||||
for (int i = 0; i < tabLevel; i++)
|
||||
returnTabs += "\t";
|
||||
return returnTabs;
|
||||
}
|
||||
|
||||
std::string CGenerator::generate(NodeTree<ASTData>* from) {
|
||||
ASTData data = from->getData();
|
||||
std::vector<NodeTree<ASTData>*> children = from->getChildren();
|
||||
std::string output = "";
|
||||
switch (data.type) {
|
||||
case translation_unit:
|
||||
//Do nothing
|
||||
break;
|
||||
case interpreter_directive:
|
||||
//Do nothing
|
||||
break;
|
||||
case import:
|
||||
return "#include \"" + data.symbol.getName() + "\"\n";
|
||||
break;
|
||||
case identifier:
|
||||
return data.symbol.getName();
|
||||
break;
|
||||
case function:
|
||||
output += "\n" + ValueTypeToCType(data.valueType) + " " + data.symbol.getName() + "(";
|
||||
for (int i = 0; i < children.size()-1; i++) {
|
||||
if (i > 0)
|
||||
output += ", ";
|
||||
output += ASTData::ValueTypeToString(children[i]->getData().valueType) + " " + generate(children[i]);
|
||||
}
|
||||
output+= ")\n" + generate(children[children.size()-1]);
|
||||
return output;
|
||||
break;
|
||||
case code_block:
|
||||
output += tabs() + "{\n";
|
||||
tabLevel++;
|
||||
for (int i = 0; i < children.size(); i++)
|
||||
output += generate(children[i]);
|
||||
tabLevel--;
|
||||
output += tabs() + "}";
|
||||
return output;
|
||||
break;
|
||||
case expression:
|
||||
output += " " + data.symbol.getName() + ", ";
|
||||
break;
|
||||
case boolean_expression:
|
||||
output += " " + data.symbol.getName() + " ";
|
||||
break;
|
||||
case statement:
|
||||
return tabs() + generate(children[0]) + ";\n";
|
||||
break;
|
||||
case if_statement:
|
||||
output += "if (" + generate(children[0]) + ") \n" + generate(children[1]);
|
||||
if (children.size() > 2)
|
||||
output += " else " + generate(children[2]);
|
||||
return output;
|
||||
break;
|
||||
case return_statement:
|
||||
return "return " + generate(children[0]);
|
||||
case assignment_statement:
|
||||
return generate(children[0]) + " = " + generate(children[1]);
|
||||
case declaration_statement:
|
||||
return ASTData::ValueTypeToString(children[0]->getData().valueType) + " " + generate(children[0]) + " = " + generate(children[1]);
|
||||
case function_call:
|
||||
{
|
||||
//Handle operators specially for now. Will later replace with
|
||||
//Inlined functions in the standard library
|
||||
std::string name = data.symbol.getName();
|
||||
if (name == "+" || name == "-" || name == "*" || name == "/") {
|
||||
return "((" + generate(children[0]) + ")" + name + "(" + generate(children[1]) + "))";
|
||||
}
|
||||
output += data.symbol.getName() + "(";
|
||||
for (int i = 0; i < children.size(); i++)
|
||||
if (i < children.size()-1)
|
||||
output += generate(children[i]) + ", ";
|
||||
else output += generate(children[i]);
|
||||
output += ") ";
|
||||
return output;
|
||||
}
|
||||
case value:
|
||||
return data.symbol.getName();
|
||||
|
||||
default:
|
||||
std::cout << "Nothing!" << std::endl;
|
||||
}
|
||||
for (int i = 0; i < children.size(); i++)
|
||||
output += generate(children[i]);
|
||||
|
||||
return output;
|
||||
}
|
||||
|
||||
std::string CGenerator::ValueTypeToCType(ValueType type) {
|
||||
switch (type) {
|
||||
case none:
|
||||
return "none";
|
||||
break;
|
||||
case boolean:
|
||||
return "bool";
|
||||
break;
|
||||
case integer:
|
||||
return "int";
|
||||
break;
|
||||
case floating:
|
||||
return "float";
|
||||
break;
|
||||
case double_percision:
|
||||
return "double";
|
||||
break;
|
||||
case char_string:
|
||||
return "char*";
|
||||
break;
|
||||
default:
|
||||
return "unknown_ValueType";
|
||||
}
|
||||
}
|
||||
@@ -45,9 +45,9 @@ Symbol Lexer::next() {
|
||||
//std::cout << "Current at is \"" << input.substr(currentPosition) << "\" currentPos is " << currentPosition <<std::endl;
|
||||
return Symbol(longestRegEx->getPattern(), true, eatenString);
|
||||
} else {
|
||||
//std::cout << "Found no applicable regex" << std::endl;
|
||||
//std::cout << "Remaining is ||" << input.substr(currentPosition) << "||" << std::endl;
|
||||
return Symbol();
|
||||
std::cout << "Found no applicable regex" << std::endl;
|
||||
std::cout << "Remaining is ||" << input.substr(currentPosition) << "||" << std::endl;
|
||||
return Symbol("$INVALID$", true);
|
||||
}
|
||||
}
|
||||
|
||||
|
||||
@@ -1,6 +1,6 @@
|
||||
#include "Parser.h"
|
||||
|
||||
Parser::Parser() : EOFSymbol("$EOF$", true), nullSymbol("$NULL$", true){
|
||||
Parser::Parser() : EOFSymbol("$EOF$", true), nullSymbol("$NULL$", true), invalidSymbol("$INVALID$", true){
|
||||
table.setSymbols(EOFSymbol, nullSymbol);
|
||||
}
|
||||
|
||||
|
||||
@@ -34,6 +34,11 @@ NodeTree<Symbol>* RNGLRParser::parseInput(std::string inputString) {
|
||||
input.push_back(currentToken);
|
||||
while (currentToken != EOFSymbol) {
|
||||
currentToken = lexer.next();
|
||||
//std::cout << "CurrentToken is " << currentToken.toString() << std::endl;
|
||||
if (currentToken == invalidSymbol) {
|
||||
std::cout << "Invalid Symbol!" << std::endl;
|
||||
throw "Invalid Symbol, cannot lex";
|
||||
}
|
||||
input.push_back(currentToken);
|
||||
}
|
||||
|
||||
|
||||
Reference in New Issue
Block a user