This commit is contained in:
Nathan Braswell
2014-03-14 16:55:33 -04:00
39 changed files with 1734 additions and 506 deletions

View File

@@ -2,14 +2,15 @@
ASTData::ASTData() {
this->type = undef;
this->valueType = NULL;
}
ASTData::ASTData(ASTType type, ValueType valueType) {
ASTData::ASTData(ASTType type, Type *valueType) {
this->type = type;
this->valueType = valueType;
}
ASTData::ASTData(ASTType type, Symbol symbol, ValueType valueType) {
ASTData::ASTData(ASTType type, Symbol symbol, Type *valueType) {
this->type = type;
this->valueType = valueType;
this->symbol = symbol;
@@ -20,98 +21,55 @@ ASTData::~ASTData() {
}
std::string ASTData::toString() {
return ASTTypeToString(type) + (symbol.isTerminal() ? " " + symbol.toString() : "") + (valueType ? " " + ValueTypeToString(valueType) : "");
}
ValueType ASTData::strToType(std::string type) {
if (type == "bool")
return boolean;
else if (type == "int")
return integer;
else if (type == "float")
return floating;
else if (type == "double")
return double_percision;
else if (type == "string")
return char_string;
else return none;
}
std::string ASTData::ValueTypeToString(ValueType type) {
switch (type) {
case none:
return "none";
break;
case boolean:
return "bool";
break;
case integer:
return "int";
break;
case floating:
return "float";
break;
case double_percision:
return "double";
break;
case char_string:
return "string";
break;
default:
return "unknown_ValueType";
}
return ASTTypeToString(type) + " " +
(symbol.isTerminal() ? " " + symbol.toString() : "") + " " +
(valueType ? valueType->toString() : "no_type");
}
std::string ASTData::ASTTypeToString(ASTType type) {
switch (type) {
case translation_unit:
return "translation_unit";
break;
case interpreter_directive:
return "interpreter_directive";
break;
case identifier:
return "identifier";
break;
case import:
return "import";
break;
case function:
return "function";
break;
case type_def:
return "type_def";
case code_block:
return "code_block";
break;
case typed_parameter:
return "typed_parameter";
break;
case expression:
return "expression";
break;
case boolean_expression:
return "boolean_expression";
break;
case statement:
return "statement";
break;
case if_statement:
return "if_statement";
break;
case while_loop:
return "while_loop";
case for_loop:
return "for_loop";
case return_statement:
return "return_statement";
break;
case assignment_statement:
return "assignment_statement";
break;
case declaration_statement:
return "declaration_statement";
break;
case if_comp:
return "if_comp";
case simple_passthrough:
return "simple_passthrough";
case function_call:
return "function_call";
break;
case value:
return "value";
break;
default:
return "unknown_ASTType";
}

View File

@@ -1,7 +1,28 @@
#include "ASTTransformation.h"
ASTTransformation::ASTTransformation() {
//
ASTTransformation::ASTTransformation(Importer *importerIn) {
importer = importerIn;
//Set up language level special scope. (the final scope checked)
//Note the NULL type
languageLevelScope["+"].push_back( new NodeTree<ASTData>("function", ASTData(function, Symbol("+", true), NULL)));
languageLevelScope["-"].push_back(new NodeTree<ASTData>("function", ASTData(function, Symbol("-", true), NULL)));
languageLevelScope["*"].push_back(new NodeTree<ASTData>("function", ASTData(function, Symbol("*", true), NULL)));
languageLevelScope["&"].push_back(new NodeTree<ASTData>("function", ASTData(function, Symbol("&", true), NULL)));
languageLevelScope["--"].push_back(new NodeTree<ASTData>("function", ASTData(function, Symbol("--", true), NULL)));
languageLevelScope["++"].push_back(new NodeTree<ASTData>("function", ASTData(function, Symbol("++", true), NULL)));
languageLevelScope["=="].push_back(new NodeTree<ASTData>("function", ASTData(function, Symbol("==", true), NULL)));
languageLevelScope["<="].push_back(new NodeTree<ASTData>("function", ASTData(function, Symbol("<=", true), NULL)));
languageLevelScope[">="].push_back(new NodeTree<ASTData>("function", ASTData(function, Symbol(">=", true), NULL)));
languageLevelScope["<"].push_back(new NodeTree<ASTData>("function", ASTData(function, Symbol("<", true), NULL)));
languageLevelScope[">"].push_back(new NodeTree<ASTData>("function", ASTData(function, Symbol(">", true), NULL)));
languageLevelScope["&&"].push_back(new NodeTree<ASTData>("function", ASTData(function, Symbol("&&", true), NULL)));
languageLevelScope["||"].push_back(new NodeTree<ASTData>("function", ASTData(function, Symbol("||", true), NULL)));
languageLevelScope["!"].push_back(new NodeTree<ASTData>("function", ASTData(function, Symbol("!", true), NULL)));
languageLevelScope["*="].push_back(new NodeTree<ASTData>("function", ASTData(function, Symbol("*=", true), NULL)));
languageLevelScope["+="].push_back(new NodeTree<ASTData>("function", ASTData(function, Symbol("+=", true), NULL)));
languageLevelScope["-="].push_back(new NodeTree<ASTData>("function", ASTData(function, Symbol("-=", true), NULL)));
languageLevelScope["."].push_back(new NodeTree<ASTData>("function", ASTData(function, Symbol(".", true), NULL)));
languageLevelScope["->"].push_back(new NodeTree<ASTData>("function", ASTData(function, Symbol("->", true), NULL)));
}
ASTTransformation::~ASTTransformation() {
@@ -9,113 +30,455 @@ ASTTransformation::~ASTTransformation() {
}
NodeTree<ASTData>* ASTTransformation::transform(NodeTree<Symbol>* from) {
//Set up top scope
return transform(from, NULL, std::vector<Type>());
}
NodeTree<ASTData>* ASTTransformation::transform(NodeTree<Symbol>* from, NodeTree<ASTData>* scope, std::vector<Type> types) {
Symbol current = from->getData();
std::string name = current.getName();
NodeTree<ASTData>* newNode;
NodeTree<ASTData>* newNode = NULL;
std::vector<NodeTree<Symbol>*> children = from->getChildren();
std::set<int> skipChildren;
if (name == "translation_unit") {
newNode = new NodeTree<ASTData>(name, ASTData(translation_unit));
scope = newNode;
} else if (name == "interpreter_directive") {
newNode = new NodeTree<ASTData>(name, ASTData(interpreter_directive));
} else if (name == "import" && !current.isTerminal()) {
newNode = new NodeTree<ASTData>(name, ASTData(import, Symbol(concatSymbolTree(children[0]), true)));
std::string toImport = concatSymbolTree(children[0]);
newNode = new NodeTree<ASTData>(name, ASTData(import, Symbol(toImport, true)));
//Do the imported file too
NodeTree<ASTData>* outsideTranslationUnit = importer->import(toImport + ".krak");
scope->getDataRef()->scope[toImport].push_back(outsideTranslationUnit); //Put this transation_unit in the scope as it's files name
//Now add it to scope
for (auto i = outsideTranslationUnit->getDataRef()->scope.begin(); i != outsideTranslationUnit->getDataRef()->scope.end(); i++)
for (auto j : i->second)
scope->getDataRef()->scope[i->first].push_back(j);
return newNode; // Don't need children of import
} else if (name == "identifier") {
newNode = new NodeTree<ASTData>(name, ASTData(identifier, Symbol(concatSymbolTree(children[0]), true)));
//Make sure we get the entire name
std::string lookupName = concatSymbolTree(from);
std::cout << "Looking up: " << lookupName << std::endl;
newNode = scopeLookup(scope, lookupName, types);
if (newNode == NULL) {
std::cout << "scope lookup error! Could not find " << lookupName << " in identifier " << std::endl;
throw "LOOKUP ERROR: " + lookupName;
} else if (newNode->getDataRef()->symbol.getName() !=lookupName) {
//This happens when the lookup name denotes a member of an object, i.e. obj.foo
//The newNode points to obj, not foo.
}
//newNode = new NodeTree<ASTData>(name, ASTData(identifier, Symbol(concatSymbolTree(children[0]), true)));
} else if (name == "type_def") {
std::string typeAlias = concatSymbolTree(children[0]);
//If it is an alisis of a type
if (children[1]->getData().getName() == "type") {
newNode = new NodeTree<ASTData>(name, ASTData(type_def, Symbol(typeAlias, true, typeAlias), typeFromString(concatSymbolTree(children[1]), scope)));
skipChildren.insert(1); //Don't want any children, it's unnecessary for ailising
} else { //Is a struct or class
newNode = new NodeTree<ASTData>(name, ASTData(type_def, Symbol(typeAlias, true, typeAlias)));
newNode->getDataRef()->valueType = new Type(newNode); //Type is self-referential since this is the definition
}
scope->getDataRef()->scope[typeAlias].push_back(newNode);
newNode->getDataRef()->scope["~enclosing_scope"].push_back(scope);
scope = newNode;
skipChildren.insert(0); //Identifier lookup will be ourselves, as we just added ourselves to the scope
//return newNode;
} else if (name == "function") {
newNode = new NodeTree<ASTData>(name, ASTData(function, Symbol(concatSymbolTree(children[1]), true), ASTData::strToType(concatSymbolTree(children[0]))));
std::string functionName = concatSymbolTree(children[1]);
newNode = new NodeTree<ASTData>(name, ASTData(function, Symbol(functionName, true), typeFromString(concatSymbolTree(children[0]), scope)));
skipChildren.insert(0);
skipChildren.insert(1);
scope->getDataRef()->scope[functionName].push_back(newNode);
newNode->getDataRef()->scope["~enclosing_scope"].push_back(scope);
scope = newNode;
// auto transChildren = transformChildren(children, skipChildren, scope, types);
// std::cout << functionName << " ";
// for (auto i : transChildren)
// std::cout << "||" << i->getDataRef()->toString() << "|| ";
// std::cout << "??||" << std::endl;
// newNode->addChildren(transChildren);
// return newNode;
std::cout << "finished function " << functionName << std::endl;
} else if (name == "code_block") {
newNode = new NodeTree<ASTData>(name, ASTData(code_block));
newNode->getDataRef()->scope["~enclosing_scope"].push_back(scope);
scope = newNode;
} else if (name == "typed_parameter") {
newNode = transform(children[1]); //Transform to get the identifier
newNode->getDataRef()->valueType = ASTData::strToType(concatSymbolTree(children[0])); //Get the type (left child) and set our new identifer to be that type
//newNode = transform(children[1]); //Transform to get the identifier
std::string parameterName = concatSymbolTree(children[1]);
std::string typeString = concatSymbolTree(children[0]);//Get the type (left child) and set our new identifer to be that type
newNode = new NodeTree<ASTData>("identifier", ASTData(identifier, Symbol(parameterName, true), typeFromString(typeString, scope)));
scope->getDataRef()->scope[parameterName].push_back(newNode);
newNode->getDataRef()->scope["~enclosing_scope"].push_back(scope);
return newNode;
} else if (name == "expression") {
} else if (name == "boolean_expression" || name == "and_boolean_expression" || name == "bool_exp") {
//If this is an actual part of an expression, not just a premoted term
if (children.size() > 1) {
std::string functionCallName = concatSymbolTree(children[1]);
newNode = new NodeTree<ASTData>(functionCallName, ASTData(function_call, Symbol(functionCallName, true)));
//We do children first so we can do appropriate scope searching with types (yay operator overloading!)
skipChildren.insert(1);
std::vector<NodeTree<ASTData>*> transformedChildren = transformChildren(children, skipChildren, scope, types);
std::string functionCallString = concatSymbolTree(children[1]);
NodeTree<ASTData>* function = scopeLookup(scope, functionCallString, transformedChildren);
if (function == NULL) {
std::cout << "scope lookup error! Could not find " << functionCallString << " in boolean stuff " << std::endl;
throw "LOOKUP ERROR: " + functionCallString;
}
newNode = new NodeTree<ASTData>(functionCallString, ASTData(function_call, function->getDataRef()->valueType));
newNode->addChild(function); // First child of function call is a link to the function
newNode->addChildren(transformedChildren);
} else {
return transform(children[0]); //Just a promoted term, so do child
//std::cout << children.size() << std::endl;
if (children.size() == 0)
return new NodeTree<ASTData>();
return transform(children[0], scope, types); //Just a promoted term, so do child
}
} else if (name == "term") {
//If this is an actual part of an expression, not just a premoted factor
if (children.size() > 1) {
//Here's the order of ops stuff
} else if (name == "expression" || name == "shiftand" || name == "term" || name == "unarad" || name == "access_operation") { //unarad can ride through, it should always just be a promoted child
//If this is an actual part of an expression, not just a premoted child
if (children.size() > 2) {
NodeTree<ASTData>* lhs = transform(children[0], scope); //LHS does not inherit types
NodeTree<ASTData>* rhs;
if (name == "access_operation")
rhs = transform(children[2], lhs->getDataRef()->valueType->typeDefinition, types); //If an access operation, then the right side will be in the lhs's type's scope
else
rhs = transform(children[2], scope, types);
std::string functionCallName = concatSymbolTree(children[1]);
//std::cout << "scope lookup from expression or similar" << std::endl;
std::vector<NodeTree<ASTData>*> transformedChildren; transformedChildren.push_back(lhs); transformedChildren.push_back(rhs);
NodeTree<ASTData>* function = scopeLookup(scope, functionCallName, transformedChildren);
if (function == NULL) {
std::cout << "scope lookup error! Could not find " << functionCallName << " in expression " << std::endl;
throw "LOOKUP ERROR: " + functionCallName;
}
newNode = new NodeTree<ASTData>(functionCallName, ASTData(function_call, Symbol(functionCallName, true)));
skipChildren.insert(1);
newNode->addChild(function); // First child of function call is a link to the function definition
newNode->addChild(lhs);
newNode->addChild(rhs);
if (name == "access_operation")
std::cout << "Access Operation: " << lhs->getDataRef()->symbol.getName() << " : " << rhs->getDataRef()->symbol.getName() << std::endl;
std::cout << functionCallName << " - " << function->getName() << " has value type " << function->getDataRef()->valueType << " and rhs " << rhs->getDataRef()->valueType << std::endl;
//Set the value of this function call
if (function->getDataRef()->valueType)
newNode->getDataRef()->valueType = function->getDataRef()->valueType;
else if (rhs->getDataRef()->valueType)
newNode->getDataRef()->valueType = rhs->getDataRef()->valueType;
else
newNode->getDataRef()->valueType = NULL;
std::cout << "function call to " << functionCallName << " - " << function->getName() << " is now " << newNode->getDataRef()->valueType << std::endl;
return newNode;
//skipChildren.insert(1);
} else {
return transform(children[0]); //Just a promoted factor, so do child
return transform(children[0], scope, types); //Just a promoted child, so do it instead
}
} else if (name == "factor") { //Do factor here, as it has all the weird unary operators
//If this is an actual part of an expression, not just a premoted child
//NO SUPPORT FOR CASTING YET
if (children.size() == 2) {
std::string funcName = concatSymbolTree(children[0]);
NodeTree<ASTData>* param;
if (funcName == "*" || funcName == "&" || funcName == "++" || funcName == "--" || funcName == "-" || funcName == "!" || funcName == "~")
param = transform(children[1], scope, types);
else
funcName = concatSymbolTree(children[1]), param = transform(children[0], scope, types);
//std::cout << "scope lookup from factor" << std::endl;
std::vector<NodeTree<ASTData>*> transformedChildren; transformedChildren.push_back(param);
NodeTree<ASTData>* function = scopeLookup(scope, funcName, transformedChildren);
if (function == NULL) {
std::cout << "scope lookup error! Could not find " << funcName << " in factor " << std::endl;
throw "LOOKUP ERROR: " + funcName;
}
newNode = new NodeTree<ASTData>(funcName, ASTData(function_call, Symbol(funcName, true)));
newNode->addChild(function);
newNode->addChild(param);
if (function->getDataRef()->valueType)
newNode->getDataRef()->valueType = function->getDataRef()->valueType;
else
newNode->getDataRef()->valueType = param->getDataRef()->valueType;
return newNode;
} else {
return transform(children[0], scope, types); //Just a promoted child, so do it instead
}
} else if (name == "factor") {
return transform(children[0]); //Just a premoted number or function call or something, so use it instead
} else if (name == "boolean_expression") {
newNode = new NodeTree<ASTData>(name, ASTData(boolean_expression));
} else if (name == "statement") {
newNode = new NodeTree<ASTData>(name, ASTData(statement));
} else if (name == "if_statement") {
newNode = new NodeTree<ASTData>(name, ASTData(if_statement));
} else if (name == "while_loop") {
newNode = new NodeTree<ASTData>(name, ASTData(while_loop));
} else if (name == "for_loop") {
newNode = new NodeTree<ASTData>(name, ASTData(for_loop));
} else if (name == "return_statement") {
newNode = new NodeTree<ASTData>(name, ASTData(return_statement));
} else if (name == "assignment_statement") {
newNode = new NodeTree<ASTData>(name, ASTData(assignment_statement));
std::string assignFuncName = concatSymbolTree(children[1]);
if (assignFuncName == "=") {
newNode->addChild(transform(children[0], scope, types));
newNode->addChild(transform(children[2], scope, types));
} else {
//For assignments like += or *=, expand the syntatic sugar.
NodeTree<ASTData>* lhs = transform(children[0], scope, types);
NodeTree<ASTData>* rhs = transform(children[2], scope, types);
std::vector<NodeTree<ASTData>*> transformedChildren; transformedChildren.push_back(lhs); transformedChildren.push_back(rhs);
std::string functionName = assignFuncName.substr(0,1);
NodeTree<ASTData>* childCall = new NodeTree<ASTData>(functionName, ASTData(function_call, Symbol(functionName, true)));
NodeTree<ASTData>* functionDef = scopeLookup(scope, functionName, transformedChildren);
if (functionDef == NULL) {
std::cout << "scope lookup error! Could not find " << functionName << " in assignment_statement " << std::endl;
throw "LOOKUP ERROR: " + functionName;
}
childCall->addChild(functionDef); //First child of function call is definition of the function
childCall->addChild(lhs);
childCall->addChild(rhs);
newNode->addChild(lhs);
newNode->addChild(childCall);
}
return newNode;
} else if (name == "declaration_statement") {
newNode = new NodeTree<ASTData>(name, ASTData(declaration_statement));
NodeTree<ASTData>* newIdentifier = transform(children[1]); //Transform the identifier
newIdentifier->getDataRef()->valueType = ASTData::strToType(concatSymbolTree(children[0]));//set the type of the identifier
// NodeTree<ASTData>* newIdentifier = transform(children[1], scope); //Transform the identifier
// newIdentifier->getDataRef()->valueType = Type(concatSymbolTree(children[0]));//set the type of the identifier
std::string newIdentifierStr = concatSymbolTree(children[1]);
std::string typeString = concatSymbolTree(children[0]);//Get the type (left child) and set our new identifer to be that type
Type* identifierType = typeFromString(typeString, scope);
NodeTree<ASTData>* newIdentifier = new NodeTree<ASTData>("identifier", ASTData(identifier, Symbol(newIdentifierStr, true), identifierType));
scope->getDataRef()->scope[newIdentifierStr].push_back(newIdentifier);
newNode->getDataRef()->scope["~enclosing_scope"].push_back(scope);
//Now we don't do this thing
// if (identifierType->typeDefinition) {
// //Is a custom type. Populate this declaration's scope with it's inner declarations
// std::vector<NodeTree<ASTData>*> definitions = identifierType->typeDefinition->getChildren();
// for (auto i : definitions) {
// //Point to the identifier. May need to change so it points to the declaration or something, with new declarations.....
// newIdentifier->getDataRef()->scope[i->get(0)->getDataRef()->symbol.getName()] = i->get(0); //make each declaration's name point to it's definition, like above
// }
// }
newNode->addChild(newIdentifier);
skipChildren.insert(0); //These, the type and the identifier, have been taken care of.
skipChildren.insert(1);
} else if (name == "if_comp") {
newNode = new NodeTree<ASTData>(name, ASTData(if_comp));
newNode->addChild(new NodeTree<ASTData>("identifier", ASTData(identifier, Symbol(concatSymbolTree(children[0]),true))));
skipChildren.insert(0); //Don't do the identifier. The identifier lookup will fail. That's why we do it here.
} else if (name == "simple_passthrough") {
newNode = new NodeTree<ASTData>(name, ASTData(simple_passthrough));
} else if (name == "function_call") {
//children[0] is scope
std::string functionCallName = concatSymbolTree(children[1]);
std::string functionCallName = concatSymbolTree(children[0]);
newNode = new NodeTree<ASTData>(functionCallName, ASTData(function_call, Symbol(functionCallName, true)));
skipChildren.insert(1);
// if (function == NULL) {
// std::cout << "scope lookup error! Could not find " << functionCallName << " in function_call " << std::endl;
// throw "LOOKUP ERROR: " + functionCallName;
// }
skipChildren.insert(0);
std::vector<NodeTree<ASTData>*> transformedChildren = transformChildren(children, skipChildren, scope, types);
std::cout << "scope lookup from function_call: " << functionCallName << std::endl;
for (auto i : children)
std::cout << i << " : " << i->getName() << " : " << i->getDataRef()->getName() << std::endl;
NodeTree<ASTData>* function = transform(children[0], scope, mapNodesToTypes(transformedChildren));
std::cout << "The thing: " << function << " : " << function->getName() << std::endl;
for (auto i : function->getChildren())
std::cout << i->getName() << " ";
std::cout << std::endl;
newNode->addChild(function);
newNode->getDataRef()->valueType = function->getDataRef()->valueType;
newNode->addChildren(transformedChildren);
return newNode;
} else if (name == "parameter") {
return transform(children[0]); //Don't need a parameter node, just the value
} else if (name == "bool") {
newNode = new NodeTree<ASTData>(name, ASTData(value, Symbol(concatSymbolTree(children[0]), true), boolean));
return transform(children[0], scope, types); //Don't need a parameter node, just the value
} else if (name == "type") {
std::string theConcat = concatSymbolTree(from); //We have no symbol, so this will concat our children
newNode = new NodeTree<ASTData>(name, ASTData(value, Symbol(theConcat, true), typeFromString(theConcat, scope)));
} else if (name == "number") {
return transform(children[0]);
return transform(children[0], scope, types);
} else if (name == "integer") {
newNode = new NodeTree<ASTData>(name, ASTData(value, Symbol(concatSymbolTree(from), true), integer));
newNode = new NodeTree<ASTData>(name, ASTData(value, Symbol(concatSymbolTree(from), true), new Type(integer)));
} else if (name == "float") {
newNode = new NodeTree<ASTData>(name, ASTData(value, Symbol(concatSymbolTree(from), true), floating));
newNode = new NodeTree<ASTData>(name, ASTData(value, Symbol(concatSymbolTree(from), true), new Type(floating)));
} else if (name == "double") {
newNode = new NodeTree<ASTData>(name, ASTData(value, Symbol(concatSymbolTree(from), true), double_percision));
} else if (name == "string") {
newNode = new NodeTree<ASTData>(name, ASTData(value, Symbol(concatSymbolTree(children[0]), true), char_string));
newNode = new NodeTree<ASTData>(name, ASTData(value, Symbol(concatSymbolTree(from), true), new Type(double_percision)));
} else if (name == "char") {
newNode = new NodeTree<ASTData>(name, ASTData(value, Symbol(concatSymbolTree(children[0]), true), new Type(character, 1))); //Indirection of 1 for array
} else if (name == "string" || name == "triple_quoted_string") {
newNode = new NodeTree<ASTData>(name, ASTData(value, Symbol(concatSymbolTree(children[0]), true), new Type(character, 1))); //Indirection of 1 for array
} else {
return new NodeTree<ASTData>();
}
// In general, iterate through children and do them. Might not do this for all children.
//Do all children but the ones we skip
for (int i = 0; i < children.size(); i++) {
if (skipChildren.find(i) == skipChildren.end()) {
NodeTree<ASTData>* transChild = transform(children[i]);
if (transChild->getData().type) //Only add the children that have a real ASTData::ASTType, that is, legit ASTData.
NodeTree<ASTData>* transChild = transform(children[i], scope, types);
if (transChild->getDataRef()->type) //Only add the children that have a real ASTData::ASTType, that is, legit ASTData.
newNode->addChild(transChild);
else
delete transChild;
}
}
return newNode;
}
//We use this functionality a lot at different places
std::vector<NodeTree<ASTData>*> ASTTransformation::transformChildren(std::vector<NodeTree<Symbol>*> children, std::set<int> skipChildren, NodeTree<ASTData>* scope, std::vector<Type> types) {
std::vector<NodeTree<ASTData>*> transformedChildren;
// In general, iterate through children and do them. Might not do this for all children.
for (int i = 0; i < children.size(); i++) {
if (skipChildren.find(i) == skipChildren.end()) {
NodeTree<ASTData>* transChild = transform(children[i], scope, types);
if (transChild->getDataRef()->type) //Only add the children that have a real ASTData::ASTType, that is, legit ASTData.
transformedChildren.push_back(transChild);
else
delete transChild;
}
}
return transformedChildren;
}
std::vector<Type> ASTTransformation::mapNodesToTypes(std::vector<NodeTree<ASTData>*> nodes) {
std::vector<Type> types;
for (auto i : nodes)
types.push_back(*(i->getDataRef()->valueType));
return types;
}
std::string ASTTransformation::concatSymbolTree(NodeTree<Symbol>* root) {
std::string concatString;
std::string ourValue = root->getData().getValue();
std::string ourValue = root->getDataRef()->getValue();
if (ourValue != "NoValue")
concatString += ourValue;
std::vector<NodeTree<Symbol>*> children = root->getChildren();
for (int i = 0; i < children.size(); i++) {
concatString = concatSymbolTree(children[i]);
concatString += concatSymbolTree(children[i]);
}
return concatString;
}
//Overloaded with the actual children to allow us to handle operator methods
NodeTree<ASTData>* ASTTransformation::scopeLookup(NodeTree<ASTData>* scope, std::string lookup, std::vector<NodeTree<ASTData>*> nodes) {
//
auto LLElementIterator = languageLevelScope.find(lookup);
if (LLElementIterator != languageLevelScope.end()) {
std::cout << "Checking for early method level operator overload" << std::endl;
std::string lookupOp = "operator" + lookup;
for (auto i : nodes)
std::cout << i->getDataRef()->toString() << " ";
std::cout << std::endl;
NodeTree<ASTData>* operatorMethod = NULL;
if (nodes[0]->getDataRef()->valueType && nodes[0]->getDataRef()->valueType->typeDefinition)
operatorMethod = scopeLookup(nodes[0]->getDataRef()->valueType->typeDefinition, lookupOp, mapNodesToTypes(slice(nodes,1,-1)));
if (operatorMethod) {
//Ok, so we construct
std::cout << "Early method level operator was found" << std::endl;
//return operatorMethod;
newNode = new NodeTree<ASTData>(functionCallName, ASTData(function_call, Symbol(functionCallName, true)));
newNode->addChild(function); // First child of function call is a link to the function definition
newNode->addChild(lhs);
newNode->addChild(rhs);
//Set the value of this function call
if (function->getDataRef()->valueType)
newNode->getDataRef()->valueType = function->getDataRef()->valueType;
else if (rhs->getDataRef()->valueType)
newNode->getDataRef()->valueType = rhs->getDataRef()->valueType;
else
newNode->getDataRef()->valueType = NULL;
}
std::cout << "Early method level operator was NOT found" << std::endl;
}
return scopeLookup(scope, lookup, mapNodesToTypes(nodes));
}
NodeTree<ASTData>* ASTTransformation::scopeLookup(NodeTree<ASTData>* scope, std::string lookup, std::vector<Type> types) {
//We first search the languageLevelScope to see if it's an operator. If so, we modifiy the lookup with a preceding "operator"
auto LLElementIterator = languageLevelScope.find(lookup);
if (LLElementIterator != languageLevelScope.end())
lookup = "operator" + lookup;
//Search the map
auto scopeMap = scope->getDataRef()->scope;
auto elementIterator = scopeMap.find(lookup);
for (auto i : scopeMap)
std::cout << i.first << " ";
std::cout << std::endl;
//
if (elementIterator != scopeMap.end()) {
for (auto i = elementIterator->second.begin(); i != elementIterator->second.end(); i++) {
//Types and functions cannot have the same name, and types very apparently do not have parameter types, so check and short-circuit
if ((*i)->getDataRef()->type == type_def)
return *i;
//return *i;
std::vector<NodeTree<ASTData>*> children = (*i)->getChildren();
if (types.size() != ((children.size() > 0) ? children.size()-1 : 0)) {
std::cout << "Type sizes do not match between two " << lookup << "(" << types.size() << "," << ((children.size() > 0) ? children.size()-1 : 0) << "), types are: ";
for (auto j : types)
std::cout << j.toString() << " ";
std::cout << std::endl;
continue;
}
bool typesMatch = true;
for (int j = 0; j < types.size(); j++) {
if (types[j] != *(children[j]->getDataRef()->valueType)) {
typesMatch = false;
std::cout << "Types do not match between two " << lookup << std::endl;
break;
}
}
if (typesMatch)
return *i;
}
}
//if it doesn't exist, try the enclosing scope if it exists.
auto enclosingIterator = scopeMap.find("~enclosing_scope");
if (enclosingIterator != scopeMap.end()) {
// std::cout << "upper scope exists, searching it for " << lookup << std::endl;
NodeTree<ASTData>* upperResult = scopeLookup(enclosingIterator->second[0], lookup, types);
if (upperResult)
return upperResult;
}
//std::cout << "upper scope does not exist" << std::endl;
std::cout << "could not find " << lookup << " in standard scope, checking for operator" << std::endl;
//Note that we don't check for types. At some point we should, as we don't know how to add objects/structs without overloaded operators, etc
//Also, we've already searched for the element because this is also how we keep track of operator overloading
if (LLElementIterator != languageLevelScope.end()) {
std::cout << "found it at language level as operator." << std::endl;
return LLElementIterator->second[0];
}
std::cout << "Did not find, returning NULL" << std::endl;
return NULL;
}
Type* ASTTransformation::typeFromString(std::string typeIn, NodeTree<ASTData>* scope) {
int indirection = 0;
ValueType baseType;
NodeTree<ASTData>* typeDefinition = NULL;
while (typeIn[typeIn.size() - indirection - 1] == '*') indirection++;
std::string edited = strSlice(typeIn, 0, -(indirection + 1));
if (edited == "void")
baseType = void_type;
else if (edited == "bool")
baseType = boolean;
else if (edited == "int")
baseType = integer;
else if (edited == "float")
baseType = floating
; else if (edited == "double")
baseType = double_percision;
else if (edited == "char")
baseType = character;
else {
baseType = none;
typeDefinition = scopeLookup(scope, edited);
//std::cout << "scopeLookup of type " << edited << " returned " << typeDefinition << std::endl;
}
return new Type(baseType, typeDefinition, indirection);
}

View File

@@ -1,12 +1,33 @@
#include "CGenerator.h"
CGenerator::CGenerator() {
CGenerator::CGenerator() : generatorString("__C__") {
tabLevel = 0;
}
CGenerator::~CGenerator() {
}
void CGenerator::generateCompSet(std::map<std::string, NodeTree<ASTData>*> ASTs, std::string outputName) {
//Generate an entire set of files
std::string buildString = "#!/bin/sh\ncc -std=c99 ";
for (auto i = ASTs.begin(); i != ASTs.end(); i++) {
buildString += i->first + ".c ";
std::ofstream outputCFile;
outputCFile.open(i->first + ".c");
if (outputCFile.is_open()) {
outputCFile << generate(i->second);
} else {
std::cout << "Cannot open file " << i->first << ".c" << std::endl;
}
outputCFile.close();
}
buildString += "-o " + outputName;
std::ofstream outputBuild;
outputBuild.open(outputName + ".sh");
outputBuild << buildString;
outputBuild.close();
}
std::string CGenerator::tabs() {
std::string returnTabs;
for (int i = 0; i < tabLevel; i++)
@@ -14,76 +35,229 @@ std::string CGenerator::tabs() {
return returnTabs;
}
std::string CGenerator::generate(NodeTree<ASTData>* from) {
//The enclosing object is for when we're generating the inside of object methods. They allow us to check scope lookups against the object we're in
std::string CGenerator::generate(NodeTree<ASTData>* from, NodeTree<ASTData>* enclosingObject) {
ASTData data = from->getData();
std::vector<NodeTree<ASTData>*> children = from->getChildren();
std::string output = "";
std::string output = "";
switch (data.type) {
case translation_unit:
//Do nothing
//Do here because we may need the typedefs before the declarations of variables
for (int i = 0; i < children.size(); i++)
if (children[i]->getDataRef()->type == type_def)
output += generate(children[i], enclosingObject) + "\n";
//Declare everything in translation unit scope here. (allows stuff from other files, automatic forward declarations)
for (auto i = data.scope.begin(); i != data.scope.end(); i++) {
for (auto overloadedMembers : i->second) {
NodeTree<ASTData>* declaration = overloadedMembers;
std::vector<NodeTree<ASTData>*> decChildren = declaration->getChildren();
ASTData declarationData = declaration->getData();
switch(declarationData.type) {
case identifier:
output += ValueTypeToCType(declarationData.valueType) + " " + declarationData.symbol.getName() + "; /*identifier*/\n";
break;
case function:
{
if (decChildren.size() == 0) { //Not a real function, must be a built in passthrough {
output += "/* built in function: " + declarationData.toString() + " */\n";
break;
}
output += "\n" + ValueTypeToCType(declarationData.valueType) + " ";
std::string nameDecoration, parameters;
for (int j = 0; j < decChildren.size()-1; j++) {
if (j > 0)
parameters += ", ";
parameters += ValueTypeToCType(decChildren[j]->getData().valueType) + " " + generate(decChildren[j], enclosingObject);
nameDecoration += "_" + ValueTypeToCTypeDecoration(decChildren[j]->getData().valueType);
}
output += CifyFunctionName(declarationData.symbol.getName()) + nameDecoration + "(" + parameters + "); /*func*/\n";
break;
}
case type_def:
//type
output += "/*typedef " + declarationData.symbol.getName() + " */\n";
break;
default:
//std::cout << "Declaration? named " << declaration->getName() << " of unknown type " << ASTData::ASTTypeToString(declarationData.type) << " in translation unit scope" << std::endl;
output += "/*unknown declaration named " + declaration->getName() + "*/\n";
}
}
}
//Do here because we need the newlines
for (int i = 0; i < children.size(); i++)
if (children[i]->getDataRef()->type != type_def)
output += generate(children[i], enclosingObject) + "\n";
return output;
break;
case interpreter_directive:
//Do nothing
break;
case import:
return "#include \"" + data.symbol.getName() + "\"\n";
break;
return "/* would import \"" + data.symbol.getName() + "\" but....*/\n";
//return "#include <" + data.symbol.getName() + ">\n";
case identifier:
return data.symbol.getName();
break;
case function:
output += "\n" + ValueTypeToCType(data.valueType) + " " + data.symbol.getName() + "(";
for (int i = 0; i < children.size()-1; i++) {
if (i > 0)
output += ", ";
output += ASTData::ValueTypeToString(children[i]->getData().valueType) + " " + generate(children[i]);
{
//If we're in an object method, and our enclosing scope is that object, we're a member of the object and should use the self reference.
std::string preName;
if (enclosingObject && enclosingObject->getDataRef()->scope.find(data.symbol.getName()) != enclosingObject->getDataRef()->scope.end())
preName += "self->";
if (false)
for (int j = 0; j < children.size()-1; j++)
preName += ValueTypeToCType(children[j]->getData().valueType) + "_";
return preName + CifyFunctionName(data.symbol.getName()); //Cifying does nothing if not an operator overload
}
case type_def:
if (children.size() == 0) {
return "typedef " + ValueTypeToCType(data.valueType) + " " + data.symbol.getName() + ";";
} else {
std::string objectString = "typedef struct __struct_dummy_" + data.symbol.getName() + "__ {\n";
std::string postString; //The functions have to be outside the struct definition
for (int i = 0; i < children.size(); i++) {
std::cout << children[i]->getName() << std::endl;
if (children[i]->getName() == "function") //If object method
postString += generateObjectMethod(from, children[i]) + "\n";
else
objectString += generate(children[i], enclosingObject) + "\n";
}
objectString += "} " + data.symbol.getName() + ";";
return objectString + postString; //Functions come after the declaration of the struct
}
output+= ")\n" + generate(children[children.size()-1]);
case function:
{
output += "\n" + ValueTypeToCType(data.valueType) + " ";
std::string nameDecoration, parameters;
for (int j = 0; j < children.size()-1; j++) {
if (j > 0)
parameters += ", ";
parameters += ValueTypeToCType(children[j]->getData().valueType) + " " + generate(children[j], enclosingObject);
nameDecoration += "_" + ValueTypeToCTypeDecoration(children[j]->getData().valueType);
}
output += CifyFunctionName(data.symbol.getName()) + nameDecoration + "(" + parameters + ")\n" + generate(children[children.size()-1], enclosingObject);
return output;
break;
}
case code_block:
output += tabs() + "{\n";
output += "{\n";
tabLevel++;
for (int i = 0; i < children.size(); i++)
output += generate(children[i]);
for (int i = 0; i < children.size(); i++) {
//std::cout << "Line " << i << std::endl;
std::string line = generate(children[i], enclosingObject);
//std::cout << line << std::endl;
output += line;
}
tabLevel--;
output += tabs() + "}";
return output;
break;
case expression:
output += " " + data.symbol.getName() + ", ";
break;
case boolean_expression:
output += " " + data.symbol.getName() + " ";
break;
case statement:
return tabs() + generate(children[0]) + ";\n";
break;
return tabs() + generate(children[0], enclosingObject) + ";\n";
case if_statement:
output += "if (" + generate(children[0]) + ") \n" + generate(children[1]);
output += "if (" + generate(children[0], enclosingObject) + ")\n\t" + generate(children[1], enclosingObject);
if (children.size() > 2)
output += " else " + generate(children[2]);
output += " else " + generate(children[2], enclosingObject);
return output;
case while_loop:
output += "while (" + generate(children[0], enclosingObject) + ")\n\t" + generate(children[1], enclosingObject);
return output;
case for_loop:
//The strSlice's are there to get ride of an unwanted return and an unwanted semicolon(s)
output += "for (" + strSlice(generate(children[0], enclosingObject),0,-3) + generate(children[1], enclosingObject) + ";" + strSlice(generate(children[2], enclosingObject),0,-3) + ")\n\t" + generate(children[3], enclosingObject);
return output;
break;
case return_statement:
return "return " + generate(children[0]);
if (children.size())
return "return " + generate(children[0], enclosingObject);
else
return "return";
case assignment_statement:
return generate(children[0]) + " = " + generate(children[1]);
return generate(children[0], enclosingObject) + " = " + generate(children[1], enclosingObject);
case declaration_statement:
return ASTData::ValueTypeToString(children[0]->getData().valueType) + " " + generate(children[0]) + " = " + generate(children[1]);
if (children.size() == 1)
return ValueTypeToCType(children[0]->getData().valueType) + " " + generate(children[0], enclosingObject) + ";";
else
return ValueTypeToCType(children[0]->getData().valueType) + " " + generate(children[0], enclosingObject) + " = " + generate(children[1], enclosingObject) + ";";
case if_comp:
if (generate(children[0], enclosingObject) == generatorString)
return generate(children[1], enclosingObject);
return "";
case simple_passthrough:
return strSlice(generate(children[0], enclosingObject), 3, -4);
case function_call:
{
//NOTE: The first (0th) child of a function call node is the declaration of the function
//Handle operators specially for now. Will later replace with
//Inlined functions in the standard library
std::string name = data.symbol.getName();
if (name == "+" || name == "-" || name == "*" || name == "/") {
return "((" + generate(children[0]) + ")" + name + "(" + generate(children[1]) + "))";
// std::string name = data.symbol.getName();
// std::cout << name << " == " << children[0]->getData().symbol.getName() << std::endl;
std::string name = children[0]->getDataRef()->symbol.getName();
ASTType funcType = children[0]->getDataRef()->type;
std::cout << "Doing function: " << name << std::endl;
//Test for specail functions only if what we're testing is, indeed, the definition, not a function call that returns a callable function pointer
if (funcType == function) {
if (name == "++" || name == "--")
return generate(children[1], enclosingObject) + name;
if (name == "*" && children.size() == 2) //Is dereference, not multiplication
return "*(" + generate(children[1], enclosingObject) + ")";
if (name == "+" || name == "-" || name == "*" || name == "/" || name == "==" || name == ">=" || name == "<=" || name == "!="
|| name == "<" || name == ">" || name == "%" || name == "+=" || name == "-=" || name == "*=" || name == "/=" || name == "||"
|| name == "&&" || name == "!" )
return "((" + generate(children[1], enclosingObject) + ")" + name + "(" + generate(children[2], enclosingObject) + "))";
else if (name == "." || name == "->") {
if (children.size() == 1)
return "/*dot operation with one child*/" + generate(children[0], enclosingObject) + "/*end one child*/";
//If this is accessing an actual function, find the function in scope and take the appropriate action. Probabally an object method
if (children[2]->getDataRef()->type == function) {
std::string functionName = children[2]->getDataRef()->symbol.getName();
NodeTree<ASTData>* possibleObjectType = children[1]->getDataRef()->valueType->typeDefinition;
//If is an object method, generate it like one. Needs extension/modification for inheritence
if (possibleObjectType && possibleObjectType->getDataRef()->scope.find(functionName) != possibleObjectType->getDataRef()->scope.end()) {
std::string nameDecoration;
std::vector<NodeTree<ASTData>*> functionDefChildren = children[2]->getChildren(); //The function def is the rhs of the access operation
std::cout << "Decorating (in access-should be object) " << name << " " << functionDefChildren.size() << std::endl;
for (int i = 0; i < (functionDefChildren.size() > 0 ? functionDefChildren.size()-1 : 0); i++)
nameDecoration += "_" + ValueTypeToCTypeDecoration(functionDefChildren[i]->getData().valueType);
/*HERE*/ return possibleObjectType->getDataRef()->symbol.getName() +"__" + CifyFunctionName(functionName) + nameDecoration + "(" + (name == "." ? "&" : "") + generate(children[1], enclosingObject) + ",";
//The comma lets the upper function call know we already started the param list
//Note that we got here from a function call. We just pass up this special case and let them finish with the perentheses
} else {
std::cout << "Is not in scope or not type" << std::endl;
return "((" + generate(children[1], enclosingObject) + ")" + name + functionName + ")";
}
} else {
//return "((" + generate(children[1], enclosingObject) + ")" + name + generate(children[2], enclosingObject) + ")";
return "((" + generate(children[1], enclosingObject) + ")" + name + generate(children[2]) + ")";
}
} else {
//It's a normal function call, not a special one or a method or anything. Name decorate.
std::vector<NodeTree<ASTData>*> functionDefChildren = children[0]->getChildren();
std::cout << "Decorating (none-special)" << name << " " << functionDefChildren.size() << std::endl;
std::string nameDecoration;
for (int i = 0; i < (functionDefChildren.size() > 0 ? functionDefChildren.size()-1 : 0); i++)
nameDecoration += "_" + ValueTypeToCTypeDecoration(functionDefChildren[i]->getData().valueType);
//Check to see if we're inside of an object and this is a method call
bool isSelfObjectMethod = enclosingObject && contains(enclosingObject->getChildren(), children[0]);
if (isSelfObjectMethod)
output += enclosingObject->getDataRef()->symbol.getName() +"__";
/*HERE*/ output += CifyFunctionName(name) + nameDecoration + "(";
if (isSelfObjectMethod)
output += children.size() > 1 ? "self," : "self";
}
} else {
//This part handles cases where our definition isn't the function definition (that is, it is probabally the return from another function)
//It's probabally the result of an access function call (. or ->) to access an object method.
std::string functionCallSource = generate(children[0], enclosingObject);
if (functionCallSource[functionCallSource.size()-1] == ',') //If it's a member method, it's already started the parameter list.
output += children.size() > 1 ? functionCallSource : functionCallSource.substr(0, functionCallSource.size()-1);
else
output += functionCallSource + "(";
}
output += data.symbol.getName() + "(";
for (int i = 0; i < children.size(); i++)
for (int i = 1; i < children.size(); i++) //children[0] is the declaration
if (i < children.size()-1)
output += generate(children[i]) + ", ";
else output += generate(children[i]);
output += generate(children[i], enclosingObject) + ", ";
else
output += generate(children[i], enclosingObject);
output += ") ";
return output;
}
@@ -94,32 +268,141 @@ std::string CGenerator::generate(NodeTree<ASTData>* from) {
std::cout << "Nothing!" << std::endl;
}
for (int i = 0; i < children.size(); i++)
output += generate(children[i]);
output += generate(children[i], enclosingObject);
return output;
}
std::string CGenerator::ValueTypeToCType(ValueType type) {
switch (type) {
std::string CGenerator::generateObjectMethod(NodeTree<ASTData>* enclosingObject, NodeTree<ASTData>* from) {
std::string output;
ASTData data = from->getData();
Type enclosingObjectType = *(enclosingObject->getDataRef()->valueType); //Copy a new type so we can turn it into a pointer if we need to
enclosingObjectType.indirection++;
std::vector<NodeTree<ASTData>*> children = from->getChildren();
std::string nameDecoration, parameters;
for (int i = 0; i < children.size()-1; i++) {
parameters += ", " + ValueTypeToCType(children[i]->getData().valueType) + " " + generate(children[i]);
nameDecoration += "_" + ValueTypeToCTypeDecoration(children[i]->getData().valueType);
}
output += "\n" + ValueTypeToCType(data.valueType) + " " + enclosingObject->getDataRef()->symbol.getName() +"__"
+ CifyFunctionName(data.symbol.getName()) + nameDecoration + "(" + ValueTypeToCType(&enclosingObjectType)
+ " self" + parameters + ")\n" + generate(children[children.size()-1], enclosingObject); //Pass in the object so we can properly handle access to member stuff
return output;
}
std::string CGenerator::ValueTypeToCType(Type *type) {
std::string return_type;
switch (type->baseType) {
case none:
return "none";
if (type->typeDefinition)
return_type = type->typeDefinition->getDataRef()->symbol.getName();
else
return_type = "none";
break;
case void_type:
return_type = "void";
break;
case boolean:
return "bool";
return_type = "bool";
break;
case integer:
return "int";
return_type = "int";
break;
case floating:
return "float";
return_type = "float";
break;
case double_percision:
return "double";
return_type = "double";
break;
case char_string:
return "char*";
case character:
return_type = "char";
break;
default:
return "unknown_ValueType";
return_type = "unknown_ValueType";
break;
}
for (int i = 0; i < type->indirection; i++)
return_type += "*";
return return_type;
}
std::string CGenerator::ValueTypeToCTypeDecoration(Type *type) {
std::string return_type;
switch (type->baseType) {
case none:
if (type->typeDefinition)
return_type = type->typeDefinition->getDataRef()->symbol.getName();
else
return_type = "none";
break;
case void_type:
return_type = "void";
break;
case boolean:
return_type = "bool";
break;
case integer:
return_type = "int";
break;
case floating:
return_type = "float";
break;
case double_percision:
return_type = "double";
break;
case character:
return_type = "char";
break;
default:
return_type = "unknown_ValueType";
break;
}
for (int i = 0; i < type->indirection; i++)
return_type += "_P__";
return return_type;
}
std::string CGenerator::CifyFunctionName(std::string name) {
std::string operatorsToReplace[] = { "+", "plus",
"-", "minus",
"*", "star",
"/", "div",
"%", "mod",
"^", "carat",
"&", "amprsd",
"|", "pipe",
"~", "tilde",
"!", "exclamationpt",
",", "comma",
"=", "equals",
"++", "doubleplus",
"--", "doubleminus",
"<<", "doubleleft",
">>", "doubleright",
"==", "doubleequals",
"!=", "notequals",
"&&", "doubleamprsnd",
"||", "doublepipe",
"+=", "plusequals",
"-=", "minusequals",
"/=", "divequals",
"%=", "modequals",
"^=", "caratequals",
"&=", "amprsdequals",
"|=", "pipeequals",
"*=", "starequals",
"<<=", "doublerightequals",
">>=", "doubleleftequals",
"->", "arrow" };
int length = sizeof(operatorsToReplace)/sizeof(std::string);
//std::cout << "Length is " << length << std::endl;
for (int i = 0; i < length; i+= 2) {
size_t foundPos = name.find(operatorsToReplace[i]);
while(foundPos != std::string::npos) {
name = strSlice(name, 0, foundPos) + "_" + operatorsToReplace[i+1] + "_" + strSlice(name, foundPos+operatorsToReplace[i].length(), -1);
foundPos = name.find(operatorsToReplace[i]);
}
}
return name;
}

View File

@@ -128,3 +128,8 @@ std::string GraphStructuredStack::toString() {
}
return tostring;
}
void GraphStructuredStack::clear() {
gss.clear();
edges.clear();
}

126
src/Importer.cpp Normal file
View File

@@ -0,0 +1,126 @@
#include "Importer.h"
Importer::Importer(Parser* parserIn) {
//constructor
parser = parserIn;
removeSymbols.push_back(Symbol("WS", false));
removeSymbols.push_back(Symbol("\\(", true));
removeSymbols.push_back(Symbol("\\)", true));
removeSymbols.push_back(Symbol("::", true));
removeSymbols.push_back(Symbol(";", true));
removeSymbols.push_back(Symbol("{", true));
removeSymbols.push_back(Symbol("}", true));
removeSymbols.push_back(Symbol("(", true));
removeSymbols.push_back(Symbol(")", true));
removeSymbols.push_back(Symbol("import", true)); //Don't need the actual text of the symbol
removeSymbols.push_back(Symbol("interpreter_directive", false));
removeSymbols.push_back(Symbol("if", true));
removeSymbols.push_back(Symbol("while", true));
removeSymbols.push_back(Symbol("__if_comp__", true));
removeSymbols.push_back(Symbol("comp_simple_passthrough", true));
removeSymbols.push_back(Symbol("typedef", true));
collapseSymbols.push_back(Symbol("opt_typed_parameter_list", false));
collapseSymbols.push_back(Symbol("opt_parameter_list", false));
collapseSymbols.push_back(Symbol("opt_import_list", false));
collapseSymbols.push_back(Symbol("import_list", false));
collapseSymbols.push_back(Symbol("statement_list", false));
collapseSymbols.push_back(Symbol("parameter_list", false));
collapseSymbols.push_back(Symbol("typed_parameter_list", false));
collapseSymbols.push_back(Symbol("unorderd_list_part", false));
collapseSymbols.push_back(Symbol("if_comp_pred", false));
collapseSymbols.push_back(Symbol("declaration_block", false));
}
Importer::~Importer() {
//destructor
}
NodeTree<ASTData>* Importer::import(std::string fileName) {
//Check to see if we've already done it
if (imported.find(fileName) != imported.end())
return imported[fileName];
std::ifstream programInFile;
std::ofstream outFile, outFileTransformed, outFileAST;
std::string outputName = fileName + "out";
programInFile.open(fileName);
if (!programInFile.is_open()) {
std::cout << "Problem opening programInFile " << fileName << "\n";
return NULL;
}
outFile.open(outputName);
if (!outFile.is_open()) {
std::cout << "Probelm opening output file " << outputName << "\n";
return NULL;
}
outFileTransformed.open((outputName + ".transformed.dot").c_str());
if (!outFileTransformed.is_open()) {
std::cout << "Probelm opening second output file " << outputName + ".transformed.dot" << "\n";
return NULL;
}
outFileAST.open((outputName + ".AST.dot").c_str());
if (!outFileAST.is_open()) {
std::cout << "Probelm opening second output file " << outputName + ".AST.dot" << "\n";
return NULL;
}
std::string programInputFileString, line;
while(programInFile.good()) {
getline(programInFile, line);
programInputFileString.append(line+"\n");
}
programInFile.close();
//std::cout << programInputFileString << std::endl;
NodeTree<Symbol>* parseTree = parser->parseInput(programInputFileString);
if (parseTree) {
//std::cout << parseTree->DOTGraphString() << std::endl;
outFile << parseTree->DOTGraphString() << std::endl;
} else {
std::cout << "ParseTree returned from parser is NULL!" << std::endl;
}
outFile.close();
//Remove Transformations
for (int i = 0; i < removeSymbols.size(); i++)
parseTree = RemovalTransformation<Symbol>(removeSymbols[i]).transform(parseTree);
//Collapse Transformations
for (int i = 0; i < collapseSymbols.size(); i++)
parseTree = CollapseTransformation<Symbol>(collapseSymbols[i]).transform(parseTree);
if (parseTree) {
outFileTransformed << parseTree->DOTGraphString() << std::endl;
} else {
std::cout << "Tree returned from transformation is NULL!" << std::endl;
}
outFileTransformed.close();
//Call with ourself to allow the transformation to call us to import files that it needs
NodeTree<ASTData>* AST = ASTTransformation(this).transform(parseTree);
if (AST) {
outFileAST << AST->DOTGraphString() << std::endl;
} else {
std::cout << "Tree returned from ASTTransformation is NULL!" << std::endl;
}
outFileAST.close();
imported[fileName] = AST;
return AST;
}
std::map<std::string, NodeTree<ASTData>*> Importer::getASTMap() {
return imported;
}

View File

@@ -114,3 +114,7 @@ void Lexer::test() {
std::cout << "Lexer tests passed\n";
}
void Lexer::reset() {
currentPosition = 0;
}

View File

@@ -29,7 +29,12 @@ const bool ParseRule::operator!=(const ParseRule &other) {
}
ParseRule* ParseRule::clone() {
return( new ParseRule(leftHandle, pointerIndex, rightSide, lookahead) );
std::vector<Symbol>* newLookahead = NULL;
if (lookahead) {
newLookahead = new std::vector<Symbol>();
*newLookahead = *lookahead;
}
return( new ParseRule(leftHandle, pointerIndex, rightSide, newLookahead) );
}
void ParseRule::setLeftHandle(Symbol leftHandle) {

View File

@@ -7,6 +7,16 @@ Parser::Parser() : EOFSymbol("$EOF$", true), nullSymbol("$NULL$", true), invalid
Parser::~Parser() {
}
void Parser::exportTable(std::ofstream &file) {
//Do table
table.exportTable(file);
}
void Parser::importTable(char* tableData) {
//Do table
table.importTable(tableData);
return;
}
Symbol Parser::getOrAddSymbol(std::string symbolString, bool isTerminal) {
Symbol symbol;
std::pair<std::string, bool> entry = std::make_pair(symbolString, isTerminal);
@@ -68,7 +78,7 @@ void Parser::loadGrammer(std::string grammerInputString) {
//Get next token
currToken = reader.word();
}
std::cout << "Parsed!\n";
//std::cout << "Parsed!\n";
// for (std::vector<ParseRule*>::size_type i = 0; i < loadedGrammer.size(); i++)
// std::cout << loadedGrammer[i]->toString() << std::endl;
@@ -88,7 +98,7 @@ void Parser::createStateSet() {
std::queue<State*>* toDo = new std::queue<State*>();
toDo->push(zeroState);
//std::cout << "Begining for main set for loop" << std::endl;
while (toDo->front()) {
while (toDo->size()) {
//closure
closure(toDo->front());
//Add the new states
@@ -181,7 +191,7 @@ std::vector<Symbol>* Parser::incrementiveFollowSet(ParseRule* rule) {
}
}
followSet->insert(followSet->end(), symbolFirstSet->begin(), symbolFirstSet->end());
//delete symbolFirstSet;
delete symbolFirstSet;
rule->advancePointer();
}
if (rule->isAtEnd()) {
@@ -209,10 +219,13 @@ void Parser::closure(State* state) {
std::vector<ParseRule*>* stateTotal = state->getTotal();
for (std::vector<ParseRule*>::size_type i = 0; i < stateTotal->size(); i++) {
ParseRule* currentStateRule = (*stateTotal)[i];
//If it's at it's end, move on. We can't advance it.
if(currentStateRule->isAtEnd())
continue;
for (std::vector<ParseRule*>::size_type j = 0; j < loadedGrammer.size(); j++) {
//If the current symbol in the rule is not null (rule completed) and it equals a grammer's left side
ParseRule* currentGramRule = loadedGrammer[j]->clone();
if ( !currentStateRule->isAtEnd() && currentStateRule->getAtNextIndex() == currentGramRule->getLeftSide()) {
if (currentStateRule->getAtNextIndex() == currentGramRule->getLeftSide()) {
//std::cout << (*stateTotal)[i]->getAtNextIndex()->toString() << " has an applicable production " << loadedGrammer[j]->toString() << std::endl;
//Now, add the correct lookahead. This followSet is built based on the current rule's lookahead if at end, or the next Symbol's first set.
//std::cout << "Setting lookahead for " << currentGramRule->toString() << " in state " << state->toString() << std::endl;
@@ -225,6 +238,7 @@ void Parser::closure(State* state) {
//std::cout << (*stateTotal)[k]->toString() << std::endl;
(*stateTotal)[k]->addLookahead(currentGramRule->getLookahead());
isAlreadyInState = true;
delete currentGramRule;
break;
}
}
@@ -311,7 +325,7 @@ void Parser::addStates(std::vector< State* >* stateSets, State* state, std::queu
std::string Parser::stateSetToString() {
std::string concat = "";
for (std::vector< State *>::size_type i = 0; i < stateSets.size(); i++) {
concat += stateSets[i]->toString();
concat += intToString(i) + " is " + stateSets[i]->toString();
}
return concat;
}

View File

@@ -9,6 +9,13 @@ RNGLRParser::~RNGLRParser() {
}
NodeTree<Symbol>* RNGLRParser::parseInput(std::string inputString) {
input.clear();
gss.clear();
while(!toReduce.empty()) toReduce.pop();
while(!toShift.empty()) toReduce.pop();
SPPFStepNodes.clear();
nullableParts.clear();
packedMap.clear();
//Check for no tokens
bool accepting = false;
@@ -27,6 +34,7 @@ NodeTree<Symbol>* RNGLRParser::parseInput(std::string inputString) {
return new NodeTree<Symbol>();
}
lexer.reset();
lexer.setInput(inputString);
//Now fully lex our input because this algorithm was designed in that manner and simplifies this first implementation.
//It could be converted to on-line later.
@@ -42,7 +50,8 @@ NodeTree<Symbol>* RNGLRParser::parseInput(std::string inputString) {
input.push_back(currentToken);
}
std::cout << "\nDone with Lexing\n" << std::endl;
// std::cout << "\nDone with Lexing, length:" << input.size() << std::endl;
// std::cout << input[0].toString() << std::endl;
// for (int i = 0; i < input.size(); i++)
@@ -50,13 +59,13 @@ NodeTree<Symbol>* RNGLRParser::parseInput(std::string inputString) {
// std::cout << std::endl;
std::cout << "Setting up 0th frontier, first actions, toShift, toReduce" << std::endl;
//std::cout << "Setting up 0th frontier, first actions, toShift, toReduce" << std::endl;
//Frontier 0, new node with state 0
NodeTree<int>* v0 = gss.newNode(0);
gss.addToFrontier(0,v0);
std::cout << "Done setting up new frontier" << std::endl;
//std::cout << "Done setting up new frontier" << std::endl;
std::vector<ParseAction*> firstActions = *(table.get(0, input[0]));
for (std::vector<ParseAction*>::size_type i = 0; i < firstActions.size(); i++) {
@@ -71,17 +80,21 @@ NodeTree<Symbol>* RNGLRParser::parseInput(std::string inputString) {
// std::cout << "GSS:\n" << gss.toString() << std::endl;
std::cout << "Starting parse loop" << std::endl;
//std::cout << "Starting parse loop" << std::endl;
for (int i = 0; i < input.size(); i++) {
// std::cout << "Checking if frontier " << i << " is empty" << std::endl;
if (gss.frontierIsEmpty(i)) {
std::cout << "Frontier " << i << " is empty." << std::endl;
std::cout << "Failed on " << input[i].toString() << std::endl;
//std::cout << "Frontier " << i << " is empty." << std::endl;
std::cout << "Parsing failed on " << input[i].toString() << std::endl;
std::cout << "Problem is on line: " << findLine(i) << std::endl;
std::cout << "Nearby is:" << std::endl;
int range = 5;
const int range = 10;
for (int j = (i-range >= 0 ? i-range : 0); j < (i+range < input.size() ? i+range : input.size()); j++)
std::cout << input[j].toString() << " ";
if (j == i)
std::cout << "||*||*||" << input[j].toString() << "||*||*|| ";
else
std::cout << input[j].toString() << " ";
std::cout << std::endl;
break;
}
@@ -98,7 +111,7 @@ NodeTree<Symbol>* RNGLRParser::parseInput(std::string inputString) {
shifter(i);
//std::cout << "GSS:\n" << gss.toString() << std::endl;
}
std::cout << "Done with parsing loop, checking for acceptance" << std::endl;
//std::cout << "Done with parsing loop, checking for acceptance" << std::endl;
NodeTree<int>* accState = gss.frontierGetAccState(input.size()-1);
if (accState) {
std::cout << "Accepted!" << std::endl;
@@ -106,7 +119,7 @@ NodeTree<Symbol>* RNGLRParser::parseInput(std::string inputString) {
}
std::cout << "Rejected!" << std::endl;
std::cout << "GSS:\n" << gss.toString() << std::endl;
// std::cout << "GSS:\n" << gss.toString() << std::endl;
return NULL;
}
@@ -131,7 +144,7 @@ void RNGLRParser::reducer(int i) {
//The end of the current path
NodeTree<int>* currentReached = currentPath[currentPath.size()-1];
std::cout << "Getting the shfit state for state " << currentReached->getData() << " and symbol " << reduction.symbol.toString() << std::endl;
//std::cout << "Getting the shift state for state " << currentReached->getData() << " and symbol " << reduction.symbol.toString() << std::endl;
int toState = table.getShift(currentReached->getData(), reduction.symbol)->shiftState;
//If reduction length is 0, then we make the new label the appropriate nullable parts
@@ -177,7 +190,7 @@ void RNGLRParser::reducer(int i) {
//std::cout << "Adding shifts and reductions for a state that did not exist" << std::endl;
std::vector<ParseAction*> actions = *(table.get(toState, input[i]));
for (std::vector<ParseAction*>::size_type k = 0; k < actions.size(); k++) {
std::cout << "Action is " << actions[k]->toString() << std::endl;
//std::cout << "Action is " << actions[k]->toString() << std::endl;
if (actions[k]->action == ParseAction::SHIFT) {
toShift.push(std::make_pair(toStateNode, actions[k]->shiftState));
} else if (actions[k]->action == ParseAction::REDUCE && fullyReducesToNull(actions[k]->reduceRule)) {
@@ -201,7 +214,7 @@ void RNGLRParser::shifter(int i) {
while (!toShift.empty()) {
std::pair<NodeTree<int>*, int> shift = toShift.front();
toShift.pop();
std::cout << "Current potential shift from " << shift.first->getData() << " to " << shift.second << std::endl;
//std::cout << "Current potential shift from " << shift.first->getData() << " to " << shift.second << std::endl;
NodeTree<int>* shiftTo = gss.inFrontier(i+1, shift.second);
if (shiftTo) {
//std::cout << "State already existed, just adding edge" << std::endl;
@@ -220,7 +233,7 @@ void RNGLRParser::shifter(int i) {
gss.addEdge(shiftTo, shift.first, newLabel);
std::vector<ParseAction*> actions = *(table.get(shift.second, input[i+1]));
for (std::vector<ParseAction*>::size_type j = 0; j < actions.size(); j++) {
std::cout << "Adding action " << actions[j]->toString() << " to either nextShifts or toReduce" << std::endl;
//std::cout << "Adding action " << actions[j]->toString() << " to either nextShifts or toReduce" << std::endl;
//Shift
if (actions[j]->action == ParseAction::SHIFT) {
nextShifts.push(std::make_pair(shiftTo, actions[j]->shiftState));
@@ -339,11 +352,13 @@ void RNGLRParser::addStates(std::vector< State* >* stateSets, State* state, std:
//if (newStates[i]->basisEquals(*((*stateSets)[j]))) {
stateAlreadyInAllStates = true;
//If it does exist, we should add it as the shift/goto in the action table
//std::cout << "newStates[" << i << "] == stateSets[" << j << "]" << std::endl;
if (!((*stateSets)[j]->basisEquals(*(newStates[i]))))
toDo->push((*stateSets)[j]);
(*stateSets)[j]->combineStates(*(newStates[i]));
//std::cout << j << "\t Hay, doing an inside loop state reductions!" << std::endl;
addStateReductionsToTable((*stateSets)[j]);
table.add(stateNum(state), currStateSymbol, new ParseAction(ParseAction::SHIFT, j));
@@ -363,13 +378,15 @@ void RNGLRParser::addStates(std::vector< State* >* stateSets, State* state, std:
void RNGLRParser::addStateReductionsToTable(State* state) {
std::vector<ParseRule*>* currStateTotal = state->getTotal();
//std::cout << currStateTotal->size() << "::" << state->getNumber() << std::endl;
for (std::vector<ParseRule*>::size_type i = 0; i < currStateTotal->size(); i++) {
//See if reduce
//Also, this really only needs to be done for the state's basis, but we're already iterating through, so...
std::vector<Symbol>* lookahead = (*currStateTotal)[i]->getLookahead();
if ((*currStateTotal)[i]->isAtEnd()) {
for (std::vector<Symbol>::size_type j = 0; j < lookahead->size(); j++)
for (std::vector<Symbol>::size_type j = 0; j < lookahead->size(); j++) {
table.add(stateNum(state), (*lookahead)[j], new ParseAction(ParseAction::REDUCE, (*currStateTotal)[i]));
}
//If this has an appropriate ruduction to null, get the reduce trees out
} else if (reducesToNull((*currStateTotal)[i])) {
//std::cout << (*currStateTotal)[i]->toString() << " REDUCES TO NULL" << std::endl;
@@ -476,3 +493,14 @@ std::vector<NodeTree<Symbol>*> RNGLRParser::getPathEdges(std::vector<NodeTree<in
pathEdges.push_back(gss.getEdge(path[i], path[i+1]));
return pathEdges;
}
int RNGLRParser::findLine(int tokenNum) {
int lineNo = 0;
for (int i = 0; i < tokenNum; i++) {
std::string tokenString = input[i].getValue();
for (int j = 0; j < tokenString.size(); j++)
if (tokenString[j] == '\n')
lineNo++;
}
return lineNo;
}

View File

@@ -3,46 +3,55 @@
RegEx::RegEx(std::string inPattern) {
pattern = inPattern;
construct();
deperenthesize();
std::vector<RegExState*> ending;
begin = construct(&ending, inPattern);
//last one is goal state, add it to the end of all of these last states
for (std::vector<RegExState*>::size_type i = 0; i < ending.size(); i++)
ending[i]->addNext(NULL);
}
void RegEx::construct() {
std::vector<RegExState*> previousStates;
std::vector<RegExState*> currentStates;
std::stack<std::pair<std::vector<RegExState*>, std::vector<RegExState*> > > perenStack;
RegExState* RegEx::construct(std::vector<RegExState*>* ending, std::string pattern) {
//In the RegEx re-write, instead of doing complicated unperenthesising, we keep track of both the "front" and the "end" of a state.
//(these could be different if the state is perenthesezed)
std::vector<RegExState*> previousStatesBegin;
std::vector<RegExState*> previousStatesEnd;
std::vector<RegExState*> currentStatesBegin;
std::vector<RegExState*> currentStatesEnd;
bool alternating = false;
begin = new RegExState();
currentStates.push_back(begin);
RegExState* begin = new RegExState();
currentStatesBegin.push_back(begin);
currentStatesEnd.push_back(begin);
for (int i = 0; i < pattern.length(); i++) {
switch (pattern[i]) {
case '*':
{
//std::cout << "Star at " << i << " in " << pattern << std::endl;
// for (std::vector<RegExState*>::size_type j = 0; j < currentStates.size(); j++)
// for (std::vector<RegExState*>::size_type k = 0; k < currentStates.size(); k++)
// currentStates[j]->addNext(currentStates[k]);
currentStates[currentStates.size()-1]->addNext(currentStates[currentStates.size()-1]);
//NOTE: Because of the re-write, this is necessary again
for (std::vector<RegExState*>::size_type j = 0; j < currentStatesEnd.size(); j++)
for (std::vector<RegExState*>::size_type k = 0; k < currentStatesBegin.size(); k++)
currentStatesEnd[j]->addNext(currentStatesBegin[k]); //Make the ends point to the beginnings
//add all previous states to current states to enable skipping over the starred item
currentStates.insert(currentStates.end(), previousStates.begin(), previousStates.end());
currentStatesBegin.insert(currentStatesBegin.end(), previousStatesBegin.begin(), previousStatesBegin.end());
currentStatesEnd.insert(currentStatesEnd.end(), previousStatesEnd.begin(), previousStatesEnd.end());
}
break;
case '+':
{
//std::cout << "Plus at " << i << " in " << pattern << std::endl;
//OtherThingy
//current->addNext(current);
// for (std::vector<RegExState*>::size_type j = 0; j < currentStates.size(); j++)
// for (std::vector<RegExState*>::size_type k = 0; k < currentStates.size(); k++)
// currentStates[j]->addNext(currentStates[k]);
currentStates[currentStates.size()-1]->addNext(currentStates[currentStates.size()-1]);
//NOTE: Because of the re-write, this is necessary again
for (std::vector<RegExState*>::size_type j = 0; j < currentStatesEnd.size(); j++)
for (std::vector<RegExState*>::size_type k = 0; k < currentStatesBegin.size(); k++)
currentStatesEnd[j]->addNext(currentStatesBegin[k]); //Make the ends point to the beginnings
}
break;
case '?':
{
//std::cout << "Question at " << i << " in " << pattern << std::endl;
//add all previous states to current states to enable skipping over the questioned item
currentStates.insert(currentStates.end(), previousStates.begin(), previousStates.end());
currentStatesBegin.insert(currentStatesBegin.end(), previousStatesBegin.begin(), previousStatesBegin.end());
currentStatesEnd.insert(currentStatesEnd.end(), previousStatesEnd.begin(), previousStatesEnd.end());
}
break;
case '|':
@@ -57,59 +66,31 @@ void RegEx::construct() {
{
//std::cout << "Begin peren at " << i << " in " << pattern << std::endl;
//perentheses
//Create a peren node with an inner empty node
RegExState* next = new RegExState(new RegExState());
std::vector<RegExState*> innerEnds;
int perenEnd = findPerenEnd(pattern, i);
RegExState* innerBegin = construct(&innerEnds, strSlice(pattern, i+1, perenEnd));
i = perenEnd;
std::vector<RegExState*> innerBegins = *(innerBegin->getNextStates());
if (alternating) {
for (std::vector<RegExState*>::size_type j = 0; j < previousStates.size(); j++)
previousStates[j]->addNext(next);
//Save both current states here as well as the current preren
std::vector<RegExState*> savePreviousStates = previousStates;
currentStates.push_back(next);
std::vector<RegExState*> saveCurrentStates = currentStates;
perenStack.push(std::make_pair(savePreviousStates, saveCurrentStates));
previousStates.clear();
currentStates.clear();
currentStates.push_back(next->getInner());
alternating = false;
for (std::vector<RegExState*>::size_type j = 0; j < previousStatesEnd.size(); j++)
for (std::vector<RegExState*>::size_type k = 0; k < innerBegins.size(); k++)
previousStatesEnd[j]->addNext(innerBegins[k]);
currentStatesBegin.insert(currentStatesBegin.end(), innerBegins.begin(), innerBegins.end());
currentStatesEnd.insert(currentStatesEnd.end(), innerEnds.begin(), innerEnds.end());
} else {
for (std::vector<RegExState*>::size_type j = 0; j < currentStates.size(); j++)
currentStates[j]->addNext(next);
//Save both current states here as well as the current preren
std::vector<RegExState*> savePreviousStates = currentStates;
currentStates.clear();
currentStates.push_back(next);
std::vector<RegExState*> saveCurrentStates = currentStates;
perenStack.push(std::make_pair(savePreviousStates, saveCurrentStates));
previousStates.clear();
currentStates.clear();
currentStates.push_back(next->getInner());
for (std::vector<RegExState*>::size_type j = 0; j < currentStatesEnd.size(); j++)
for (std::vector<RegExState*>::size_type k = 0; k < innerBegins.size(); k++)
currentStatesEnd[j]->addNext(innerBegins[k]);
previousStatesBegin = currentStatesBegin;
previousStatesEnd = currentStatesEnd;
currentStatesBegin = innerBegins;
currentStatesEnd = innerEnds;
}
//std::cout << "Peren is " << next << " Inner is " << currentStates[0] << " = " << next->getInner() << std::endl;
alternating = false;
}
break;
case ')':
{
//std::cout << "End peren at " << i << " in " << pattern << std::endl;
//perentheses
//Pop off the states that will now be the previous states and the peren node which will now be the current node
std::pair<std::vector<RegExState*>, std::vector<RegExState*> > savedPair = perenStack.top();
perenStack.pop();
//Make the it so
previousStates = savedPair.first;
//Make sure the end of the inner stuff points back to the peren node
for (std::vector<RegExState*>::size_type j = 0; j < currentStates.size(); j++)
currentStates[j]->addNext(savedPair.second[savedPair.second.size()-1]);
//currentStates[j]->addNext(*(savedPair.second.end()));
currentStates.clear();
currentStates = savedPair.second;
}
break;
// ) does not need a case as we skip over it after finding it in ('s case
case '\\':
{
@@ -124,109 +105,33 @@ void RegEx::construct() {
RegExState* next = new RegExState(pattern[i]);
//If we're alternating, add next as the next for each previous state, and add self to currentStates
if (alternating) {
for (std::vector<RegExState*>::size_type j = 0; j < previousStates.size(); j++) {
previousStates[j]->addNext(next);
//std::cout << "Adding " << next << ", which is " << pattern[i] << " to " << previousStates[j] << std::endl;
}
currentStates.push_back(next);
for (std::vector<RegExState*>::size_type j = 0; j < previousStatesEnd.size(); j++)
previousStatesEnd[j]->addNext(next);
currentStatesBegin.push_back(next);
currentStatesEnd.push_back(next);
alternating = false;
} else {
//If we're not alternating, add next as next for all the current states, make the current states the new
//previous states, and add ourself as the new current state.
for (std::vector<RegExState*>::size_type j = 0; j < currentStates.size(); j++) {
currentStates[j]->addNext(next);
//std::cout << "Adding " << next << ", which is " << pattern[i] << " to " << currentStates[j] << std::endl;
}
previousStates.clear();
previousStates = currentStates;
currentStates.clear();
currentStates.push_back(next);
for (std::vector<RegExState*>::size_type j = 0; j < currentStatesEnd.size(); j++)
currentStatesEnd[j]->addNext(next);
previousStatesBegin.clear();
previousStatesEnd.clear();
previousStatesBegin = currentStatesBegin;
previousStatesEnd = currentStatesEnd;
currentStatesBegin.clear();
currentStatesEnd.clear();
currentStatesBegin.push_back(next);
currentStatesEnd.push_back(next);
}
}
}
}
//last one is goal state
for (std::vector<RegExState*>::size_type i = 0; i < currentStates.size(); i++)
currentStates[i]->addNext(NULL);
(*ending) = currentStatesEnd;
return(begin);
}
void RegEx::deperenthesize() {
//std::cout << "About to de-perenthesize " << begin->toString() << std::endl;
//Now go through and expand the peren nodes to regular nodes
std::vector<RegExState*> processedStates;
std::vector<RegExState*> statesToProcess;
statesToProcess.push_back(begin);
for (std::vector<RegExState*>::size_type i = 0; i < statesToProcess.size(); i++) {
//Don't process null (sucess) state
if (statesToProcess[i] == NULL)
continue;
std::vector<RegExState*>* nextStates = statesToProcess[i]->getNextStates();
for (std::vector<RegExState*>::size_type j = 0; j < nextStates->size(); j++) {
if ((*nextStates)[j] != NULL && (*nextStates)[j]->getInner() != NULL) {
//Fix all the next references pointing to the peren node to point to the inner nodes. (if more than one, push back to add others)
std::vector<RegExState*>* insideNextStates = (*nextStates)[j]->getInner()->getNextStates();
//std::cout << "insideNextStates = " << insideNextStates << " [0] " << (*insideNextStates)[0] << std::endl;
RegExState* perenState = (*nextStates)[j];
(*nextStates)[j] = (*insideNextStates)[0];
//std::cout << "So now nextstates[j] = " << (*nextStates)[j] << std::endl;
for (std::vector<RegExState*>::size_type k = 1; k < insideNextStates->size(); k++)
nextStates->push_back((*insideNextStates)[k]);
//std::cout << "Replaced beginning: " << begin->toString() << std::endl;
//Now, if the peren node is self-referential (has a repitition operator after i), fix it's self-references in the same manner
std::vector<RegExState*>* perenNextNodes = perenState->getNextStates();
for (std::vector<RegExState*>::size_type k = 0; k < perenNextNodes->size(); k++) {
if ((*perenNextNodes)[k] == perenState) {
(*perenNextNodes)[k] = (*insideNextStates)[0];
for (std::vector<RegExState*>::size_type l = 1; l < insideNextStates->size(); l++)
perenNextNodes->push_back((*insideNextStates)[l]);
}
}
//std::cout << "Fixed self-references: " << begin->toString() << std::endl;
//Need to fix the end too
std::vector<RegExState*> traversalList;
traversalList.push_back(perenState->getInner());
for (std::vector<RegExState*>::size_type k = 0; k < traversalList.size(); k++) {
std::vector<RegExState*>* nextTraversalStates = traversalList[k]->getNextStates();
//std::cout << "Traversing! nextTraversalStates from traversalList " << traversalList[k] << " char = " << traversalList[k]->getCharacter() << std::endl;
//std::cout << "with children:" << std::endl;
//for (std::vector<RegExState*>::size_type l = 0; l < nextTraversalStates->size(); l++)
// std::cout << "\t\"" << (*nextTraversalStates)[l]->getCharacter() << "\"" << std::endl;
//std::cout << std::endl;
for (std::vector<RegExState*>::size_type l = 0; l < nextTraversalStates->size(); l++) {
//If this node is equal to the peren node we came from, then that means we've reached the end of the inner part of the peren
//And we now replace this reference with the next nodes from the peren node
//std::cout << "Traversal Next is on " << (*nextTraversalStates)[l]->getCharacter() << std::endl;
if ((*nextTraversalStates)[l] == perenState) {
// std::cout << "nextTraversalStates[l] = to perenState!" << std::endl;
std::vector<RegExState*> endPerenNextStates = *(perenState->getNextStates());
(*nextTraversalStates)[l] = endPerenNextStates[0];
for (std::vector<RegExState*>::size_type n = 1; n < endPerenNextStates.size(); n++)
nextTraversalStates->push_back(endPerenNextStates[n]);
//Now make sure we don't now try to continue through and end up processing stuff we just replaced the peren reference with
break;
} else {
traversalList.push_back((*nextTraversalStates)[l]);
}
}
}
}
}
//Now add all these next states to process, only if they haven't already been processed
for (std::vector<RegExState*>::size_type j = 0; j < nextStates->size(); j++) {
bool inCurrStates = false;
for (std::vector<RegExState*>::size_type k = 0; k < statesToProcess.size(); k++) {
if ((*nextStates)[j] == statesToProcess[k])
inCurrStates = true;
}
if (!inCurrStates) {
statesToProcess.push_back((*nextStates)[j]);
//std::cout << (*nextStates)[j] << "Is not in states to process" << std::endl;
}
}
}
//std::cout << "Finished de-perenthesization " << begin->toString() << std::endl;
}
RegEx::~RegEx() {
//No cleanup necessary
@@ -310,5 +215,16 @@ void RegEx::test() {
assert(re.longMatch("ab") == 1);
}
{
RegEx re("((ab)|c)*");
assert(re.longMatch("ababc") == 5);
assert(re.longMatch("ad") == 0);
assert(re.longMatch("ababccd") == 6);
}
{
RegEx re("bbb((bba+)|(ba+))*a*((a+b)|(a+bb)|(a+))*bbb") ;
assert(re.longMatch("bbbababbbaaaaaaaaaaaaaaaaaaabbb") == 9);
}
std::cout << "RegEx tests pass\n";
}

View File

@@ -79,12 +79,9 @@ void State::combineStates(State &other) {
std::vector<ParseRule*>* State::getTotal() {
total.clear();
for (std::vector<ParseRule*>::size_type i = 0; i < basis.size(); i++) {
total.push_back(basis[i]);
}
for (std::vector<ParseRule*>::size_type i = 0; i < remaining.size(); i++) {
total.push_back(remaining[i]);
}
//std::cout << "Vector will be " << basis.size() << " + " << remaining.size() << std::endl;
total.insert(total.begin(), basis.begin(), basis.end());
total.insert(total.end(), remaining.begin(), remaining.end());
return(&total);
}
std::vector<ParseRule*>* State::getBasis() {
@@ -111,6 +108,7 @@ void State::addRuleCombineLookahead(ParseRule* rule) {
if (rule->equalsExceptLookahead(*(total[i]))) {
total[i]->addLookahead(rule->getLookahead());
alreadyIn = true;
break;
}
}
if (!alreadyIn)
@@ -160,4 +158,8 @@ std::vector<State*>* State::getDeepParents(int depth) {
recursiveParents->insert(recursiveParents->end(), recursiveParentsToAdd->begin(), recursiveParentsToAdd->end());
}
return recursiveParents;
}
int State::getNumber() {
return number;
}

View File

@@ -75,7 +75,7 @@ std::string StringReader::getTokens(const char *stop_chars, bool truncateEnd)
{
//End of String
end_reached = true;
std::cout << "Reached end of file!\n";
//std::cout << "Reached end of file!\n";
return "";
} else {

View File

@@ -8,6 +8,203 @@ Table::~Table() {
//
}
void Table::exportTable(std::ofstream &file) {
//Save symbolIndexVec
int size = symbolIndexVec.size();
file.write((char*)&size, sizeof(int));
for (int i = 0; i < symbolIndexVec.size(); i++) {
//Save the name
std::string symbolName = symbolIndexVec[i].getName(); //Get the string
size = symbolName.size()+1;
file.write((char*)&size, sizeof(int)); //Save size of string
file.write((char*)(symbolName.c_str()), size); //Save the string
//Save the value
std::string symbolValue = symbolIndexVec[i].getValue(); //Get the string
size = symbolValue.size()+1;
file.write((char*)&size, sizeof(int)); //Save size of string
file.write((char*)(symbolValue.c_str()), size); //Save the string
bool isTerminal = symbolIndexVec[i].isTerminal();
file.write((char*)&isTerminal, sizeof(bool)); //Save the true false
}
//Save the actual table
size = table.size();
file.write((char*)&size, sizeof(int));
for (int i = 0; i < table.size(); i++) {
//each item is a middle vector
//std::vector< std::vector< std::vector<ParseAction*>* >* > table;
std::vector< std::vector<ParseAction*>* >* middleVector = table[i];
int middleVectorSize = middleVector->size();
file.write((char*)&middleVectorSize, sizeof(int));
for (int j = 0; j < middleVectorSize; j++) {
//each item is an inner vector
std::vector<ParseAction*>* innerVector = (*middleVector)[j];
int innerVectorSize = 0;
if (innerVector)
innerVectorSize = innerVector->size();
else
innerVectorSize = 0;
file.write((char*)&innerVectorSize, sizeof(int));
for (int k = 0; k < innerVectorSize; k++) {
//Save the type
ParseAction* toSave = (*innerVector)[k];
ParseAction::ActionType actionType = toSave->action;
file.write((char*)&actionType, sizeof(ParseAction::ActionType));
//Save the reduce rule if necessary
if (actionType == ParseAction::REDUCE) {
//Save the reduce rule
ParseRule* rule = toSave->reduceRule;
//int pointer index
int ptrIndx = rule->getIndex();
file.write((char*)&ptrIndx, sizeof(int));
//Symbol leftHandle
Symbol leftHandle = rule->getLeftSide();
//Save the name
std::string symbolName = leftHandle.getName(); //Get the string
size = symbolName.size()+1;
file.write((char*)&size, sizeof(int)); //Save size of string
file.write((char*)(symbolName.c_str()), size); //Save the string
//Save the value
std::string symbolValue = leftHandle.getValue(); //Get the string
size = symbolValue.size()+1;
file.write((char*)&size, sizeof(int)); //Save size of string
file.write((char*)(symbolValue.c_str()), size); //Save the string
bool isTerminal = leftHandle.isTerminal();
file.write((char*)&isTerminal, sizeof(bool)); //Save the true false
//std::vector<Symbol>* lookahead;
//Should not need
//std::vector<Symbol> rightSide;
std::vector<Symbol> rightSide = rule->getRightSide();
size = rightSide.size();
//std::cout << leftHandle.toString() << std::endl;
file.write((char*)&size, sizeof(int));
for (int l = 0; l < rightSide.size(); l++) {
//Save the name
symbolName = rightSide[l].getName(); //Get the string
size = symbolName.size()+1;
file.write((char*)&size, sizeof(int)); //Save size of string
file.write((char*)(symbolName.c_str()), size); //Save the string
//
//Save the value
symbolValue = rightSide[l].getValue(); //Get the string
size = symbolValue.size()+1;
file.write((char*)&size, sizeof(int)); //Save size of string
file.write((char*)(symbolValue.c_str()), size); //Save the string
//
isTerminal = rightSide[l].isTerminal();
file.write((char*)&isTerminal, sizeof(bool)); //Save the true false
}
}
int shiftState = toSave->shiftState;
file.write((char*)&shiftState, sizeof(int));
}
}
}
}
void Table::importTable(char* tableData) {
//Load symbolIndexVec
int size = *((int*)tableData);
tableData += sizeof(int);
for (int i = 0; i < size; i++) {
int stringLen = *((int*)tableData);
tableData += sizeof(int);
std::string symbolName = std::string(tableData);
tableData += stringLen*sizeof(char);
stringLen = *((int*)tableData);
tableData += sizeof(int);
std::string symbolValue = std::string(tableData);
tableData += stringLen*sizeof(char);
bool isTerminal = *((bool*)tableData);
tableData += sizeof(bool);
symbolIndexVec.push_back(Symbol(symbolName, isTerminal, symbolValue));
}
//Now for the actual table
int tableSize = *((int*)tableData);
tableData += sizeof(int);
for (int i = 0; i < tableSize; i++) {
//each item is a middle vector
std::vector< std::vector<ParseAction*>* >* middleVector = new std::vector< std::vector<ParseAction*>* >();
table.push_back(middleVector);
int middleVectorSize = *((int*)tableData);
tableData += sizeof(int);
for (int j = 0; j < middleVectorSize; j++) {
//each item is an inner vector
std::vector<ParseAction*>* innerVector = new std::vector<ParseAction*>();
middleVector->push_back(innerVector);
int innerVectorSize = *((int*)tableData);
tableData += sizeof(int);
for (int k = 0; k < innerVectorSize; k++) {
//each item is a ParseRule
ParseAction::ActionType action = *((ParseAction::ActionType*)tableData);
tableData += sizeof(ParseAction::ActionType);
//If reduce, import the reduce rule
ParseRule* reduceRule = NULL;
if (action == ParseAction::REDUCE) {
int ptrIndx = *((int*)tableData);
tableData += sizeof(int);
size = *((int*)tableData);
tableData += sizeof(int);
std::string leftHandleName = std::string(tableData);
tableData += size*sizeof(char);
size = *((int*)tableData);
tableData += sizeof(int);
std::string leftHandleValue = std::string(tableData);
tableData += size*sizeof(char);
bool isTerminal = *((bool*)tableData);
tableData += sizeof(bool);
//right side
std::vector<Symbol> rightSide;
size = *((int*)tableData);
tableData += sizeof(int);
for (int l = 0; l < size; l++) {
int inStringLen = *((int*)tableData);
tableData += sizeof(int);
std::string inSymbolName = std::string(tableData);
tableData += inStringLen*sizeof(char);
inStringLen = *((int*)tableData);
tableData += sizeof(int);
std::string inSymbolValue = std::string(tableData);
tableData += inStringLen*sizeof(char);
bool inIsTerminal = *((bool*)tableData);
tableData += sizeof(bool);
rightSide.push_back(Symbol(inSymbolName, inIsTerminal, inSymbolValue));
}
reduceRule = new ParseRule(Symbol(leftHandleName, isTerminal, leftHandleValue), ptrIndx, rightSide, NULL);
}
int shiftState = *((int*)tableData);
tableData += sizeof(int);
//And push the new action back
if (reduceRule)
innerVector->push_back(new ParseAction(action, reduceRule));
else
innerVector->push_back(new ParseAction(action, shiftState));
}
}
}
}
void Table::setSymbols(Symbol EOFSymbol, Symbol nullSymbol) {
this->EOFSymbol = EOFSymbol;
this->nullSymbol = nullSymbol;
@@ -106,7 +303,7 @@ std::vector<ParseAction*>* Table::get(int state, Symbol token) {
return NULL;
}
std::cout << "Get for state: " << state << ", and Symbol: " << token.toString() << std::endl;
//std::cout << "Get for state: " << state << ", and Symbol: " << token.toString() << std::endl;
if (state < 0 || state >= table.size()) {
std::cout << "State bad: " << state << std::endl;
return NULL;
@@ -115,7 +312,7 @@ std::vector<ParseAction*>* Table::get(int state, Symbol token) {
std::vector<ParseAction*>* action = NULL;
if (symbolIndex < 0 || symbolIndex >= table[state]->size()) {
std::cout << "Symbol bad for this state: " << token.toString() << ". This is a reject." << std::endl;
//std::cout << "Symbol bad for this state: " << token.toString() << ". This is a reject." << std::endl;
} else {
action = (*(table[state]))[symbolIndex];
}
@@ -128,7 +325,7 @@ std::vector<ParseAction*>* Table::get(int state, Symbol token) {
action->push_back(new ParseAction(ParseAction::ACCEPT));
}
//If ourside the symbol range of this state (same as NULL), reject
//If outside the symbol range of this state (same as NULL), reject
if ( symbolIndex >= table[state]->size() ) {
action = new std::vector<ParseAction*>();
action->push_back(new ParseAction(ParseAction::REJECT));
@@ -141,7 +338,7 @@ std::vector<ParseAction*>* Table::get(int state, Symbol token) {
}
//Otherwise, we have something, so return it
return (action);
return action;
}
ParseAction* Table::getShift(int state, Symbol token) {
@@ -163,8 +360,9 @@ std::string Table::toString() {
concat += "\n";
for (std::vector< std::vector< std::vector< ParseRule* >* >* >::size_type i = 0; i < table.size(); i++) {
concat += intToString(i) + "\t";
concat += intToString(i) + " is the state\t";
for (std::vector< std::vector< ParseRule* >* >::size_type j = 0; j < table[i]->size(); j++) {
concat += "for " + symbolIndexVec[j].toString() + " do ";
if ( (*(table[i]))[j] != NULL) {
for (std::vector< ParseRule* >::size_type k = 0; k < (*(table[i]))[j]->size(); k++) {
concat += (*((*(table[i]))[j]))[k]->toString() + "\t";

85
src/Type.cpp Normal file
View File

@@ -0,0 +1,85 @@
#include "Type.h"
Type::Type() {
indirection = 0;
baseType = none;
typeDefinition = NULL;
}
Type::Type(ValueType typeIn) {
indirection = 0;
baseType = typeIn;
typeDefinition = NULL;
}
Type::Type(ValueType typeIn, int indirectionIn) {
indirection = indirectionIn;
baseType = typeIn;
typeDefinition = NULL;
}
Type::Type(NodeTree<ASTData>* typeDefinitionIn) {
indirection = 0;
baseType = none;
typeDefinition = typeDefinitionIn;
}
Type::Type(NodeTree<ASTData>* typeDefinitionIn, int indirectionIn) {
indirection = indirectionIn;
baseType = none;
typeDefinition = typeDefinitionIn;
}
Type::Type(ValueType typeIn, NodeTree<ASTData>* typeDefinitionIn, int indirectionIn) {
baseType = typeIn;
indirection = indirectionIn;
typeDefinition = typeDefinitionIn;
}
Type::~Type() {
}
const bool Type::operator==(const Type &other) const {
return( baseType == other.baseType && indirection == other.indirection && typeDefinition == other.typeDefinition);
}
const bool Type::operator!=(const Type &other) const {
return(!this->operator==(other));
}
std::string Type::toString() {
std::string typeString;
switch (baseType) {
case none:
if (typeDefinition)
typeString = typeDefinition->getDataRef()->symbol.getName();
else
typeString = "none";
break;
case void_type:
typeString = "void";
break;
case boolean:
typeString = "bool";
break;
case integer:
typeString = "int";
break;
case floating:
typeString = "float";
break;
case double_percision:
typeString = "double";
break;
case character:
typeString = "char";
break;
default:
if (typeDefinition)
typeString = typeDefinition->getDataRef()->symbol.getName();
else
typeString = "unknown_type";
}
for (int i = 0; i < indirection; i++)
typeString += "*";
return typeString;
}

View File

@@ -8,7 +8,7 @@ std::string intToString(int theInt) {
std::string replaceExEscape(std::string first, std::string search, std::string replace) {
size_t pos = 0;
while (pos < first.size()-search.size()) {
while (pos <= first.size()-search.size()) {
pos = first.find(search, pos);
if (pos == std::string::npos)
break;
@@ -31,3 +31,44 @@ std::string replaceExEscape(std::string first, std::string search, std::string r
}
return first;
}
//String slicing is crazy useful. substr isn't bad, but slicing with negative indicies is wonderful
std::string strSlice(std::string str, int begin, int end) {
if (begin < 0)
begin += str.length()+1;
if (end < 0)
end += str.length()+1;
return str.substr(begin, end-begin);
}
int findPerenEnd(std::string str, int i) {
int numHangingOpen = 0;
for (; i< str.length(); i++) {
if (str[i] == '(')
numHangingOpen++;
else if (str[i] == ')')
numHangingOpen--;
if (numHangingOpen == 0)
return i;
}
}
std::vector<std::string> split(const std::string &str, char delim) {
std::stringstream ss(str);
std::string word;
std::vector<std::string> splitVec;
while (std::getline(ss, word, delim))
splitVec.push_back(word);
return splitVec;
}
std::string join(const std::vector<std::string> &strVec, std::string joinStr) {
if (strVec.size() == 0)
return "";
std::string joinedStr = strVec[0];
for (int i = 1; i < strVec.size(); i++)
joinedStr += joinStr + strVec[i];
return joinedStr;
}