Merge branch 'master' of https://www.github.com/Limvot/kraken
This commit is contained in:
@@ -2,14 +2,15 @@
|
||||
|
||||
ASTData::ASTData() {
|
||||
this->type = undef;
|
||||
this->valueType = NULL;
|
||||
}
|
||||
|
||||
ASTData::ASTData(ASTType type, ValueType valueType) {
|
||||
ASTData::ASTData(ASTType type, Type *valueType) {
|
||||
this->type = type;
|
||||
this->valueType = valueType;
|
||||
}
|
||||
|
||||
ASTData::ASTData(ASTType type, Symbol symbol, ValueType valueType) {
|
||||
ASTData::ASTData(ASTType type, Symbol symbol, Type *valueType) {
|
||||
this->type = type;
|
||||
this->valueType = valueType;
|
||||
this->symbol = symbol;
|
||||
@@ -20,98 +21,55 @@ ASTData::~ASTData() {
|
||||
}
|
||||
|
||||
std::string ASTData::toString() {
|
||||
return ASTTypeToString(type) + (symbol.isTerminal() ? " " + symbol.toString() : "") + (valueType ? " " + ValueTypeToString(valueType) : "");
|
||||
}
|
||||
|
||||
ValueType ASTData::strToType(std::string type) {
|
||||
if (type == "bool")
|
||||
return boolean;
|
||||
else if (type == "int")
|
||||
return integer;
|
||||
else if (type == "float")
|
||||
return floating;
|
||||
else if (type == "double")
|
||||
return double_percision;
|
||||
else if (type == "string")
|
||||
return char_string;
|
||||
else return none;
|
||||
}
|
||||
|
||||
std::string ASTData::ValueTypeToString(ValueType type) {
|
||||
switch (type) {
|
||||
case none:
|
||||
return "none";
|
||||
break;
|
||||
case boolean:
|
||||
return "bool";
|
||||
break;
|
||||
case integer:
|
||||
return "int";
|
||||
break;
|
||||
case floating:
|
||||
return "float";
|
||||
break;
|
||||
case double_percision:
|
||||
return "double";
|
||||
break;
|
||||
case char_string:
|
||||
return "string";
|
||||
break;
|
||||
default:
|
||||
return "unknown_ValueType";
|
||||
}
|
||||
return ASTTypeToString(type) + " " +
|
||||
(symbol.isTerminal() ? " " + symbol.toString() : "") + " " +
|
||||
(valueType ? valueType->toString() : "no_type");
|
||||
}
|
||||
|
||||
std::string ASTData::ASTTypeToString(ASTType type) {
|
||||
switch (type) {
|
||||
case translation_unit:
|
||||
return "translation_unit";
|
||||
break;
|
||||
case interpreter_directive:
|
||||
return "interpreter_directive";
|
||||
break;
|
||||
case identifier:
|
||||
return "identifier";
|
||||
break;
|
||||
case import:
|
||||
return "import";
|
||||
break;
|
||||
case function:
|
||||
return "function";
|
||||
break;
|
||||
case type_def:
|
||||
return "type_def";
|
||||
case code_block:
|
||||
return "code_block";
|
||||
break;
|
||||
case typed_parameter:
|
||||
return "typed_parameter";
|
||||
break;
|
||||
case expression:
|
||||
return "expression";
|
||||
break;
|
||||
case boolean_expression:
|
||||
return "boolean_expression";
|
||||
break;
|
||||
case statement:
|
||||
return "statement";
|
||||
break;
|
||||
case if_statement:
|
||||
return "if_statement";
|
||||
break;
|
||||
case while_loop:
|
||||
return "while_loop";
|
||||
case for_loop:
|
||||
return "for_loop";
|
||||
case return_statement:
|
||||
return "return_statement";
|
||||
break;
|
||||
case assignment_statement:
|
||||
return "assignment_statement";
|
||||
break;
|
||||
case declaration_statement:
|
||||
return "declaration_statement";
|
||||
break;
|
||||
case if_comp:
|
||||
return "if_comp";
|
||||
case simple_passthrough:
|
||||
return "simple_passthrough";
|
||||
case function_call:
|
||||
return "function_call";
|
||||
break;
|
||||
case value:
|
||||
return "value";
|
||||
break;
|
||||
default:
|
||||
return "unknown_ASTType";
|
||||
}
|
||||
|
||||
@@ -1,7 +1,28 @@
|
||||
#include "ASTTransformation.h"
|
||||
|
||||
ASTTransformation::ASTTransformation() {
|
||||
//
|
||||
ASTTransformation::ASTTransformation(Importer *importerIn) {
|
||||
importer = importerIn;
|
||||
//Set up language level special scope. (the final scope checked)
|
||||
//Note the NULL type
|
||||
languageLevelScope["+"].push_back( new NodeTree<ASTData>("function", ASTData(function, Symbol("+", true), NULL)));
|
||||
languageLevelScope["-"].push_back(new NodeTree<ASTData>("function", ASTData(function, Symbol("-", true), NULL)));
|
||||
languageLevelScope["*"].push_back(new NodeTree<ASTData>("function", ASTData(function, Symbol("*", true), NULL)));
|
||||
languageLevelScope["&"].push_back(new NodeTree<ASTData>("function", ASTData(function, Symbol("&", true), NULL)));
|
||||
languageLevelScope["--"].push_back(new NodeTree<ASTData>("function", ASTData(function, Symbol("--", true), NULL)));
|
||||
languageLevelScope["++"].push_back(new NodeTree<ASTData>("function", ASTData(function, Symbol("++", true), NULL)));
|
||||
languageLevelScope["=="].push_back(new NodeTree<ASTData>("function", ASTData(function, Symbol("==", true), NULL)));
|
||||
languageLevelScope["<="].push_back(new NodeTree<ASTData>("function", ASTData(function, Symbol("<=", true), NULL)));
|
||||
languageLevelScope[">="].push_back(new NodeTree<ASTData>("function", ASTData(function, Symbol(">=", true), NULL)));
|
||||
languageLevelScope["<"].push_back(new NodeTree<ASTData>("function", ASTData(function, Symbol("<", true), NULL)));
|
||||
languageLevelScope[">"].push_back(new NodeTree<ASTData>("function", ASTData(function, Symbol(">", true), NULL)));
|
||||
languageLevelScope["&&"].push_back(new NodeTree<ASTData>("function", ASTData(function, Symbol("&&", true), NULL)));
|
||||
languageLevelScope["||"].push_back(new NodeTree<ASTData>("function", ASTData(function, Symbol("||", true), NULL)));
|
||||
languageLevelScope["!"].push_back(new NodeTree<ASTData>("function", ASTData(function, Symbol("!", true), NULL)));
|
||||
languageLevelScope["*="].push_back(new NodeTree<ASTData>("function", ASTData(function, Symbol("*=", true), NULL)));
|
||||
languageLevelScope["+="].push_back(new NodeTree<ASTData>("function", ASTData(function, Symbol("+=", true), NULL)));
|
||||
languageLevelScope["-="].push_back(new NodeTree<ASTData>("function", ASTData(function, Symbol("-=", true), NULL)));
|
||||
languageLevelScope["."].push_back(new NodeTree<ASTData>("function", ASTData(function, Symbol(".", true), NULL)));
|
||||
languageLevelScope["->"].push_back(new NodeTree<ASTData>("function", ASTData(function, Symbol("->", true), NULL)));
|
||||
}
|
||||
|
||||
ASTTransformation::~ASTTransformation() {
|
||||
@@ -9,113 +30,455 @@ ASTTransformation::~ASTTransformation() {
|
||||
}
|
||||
|
||||
NodeTree<ASTData>* ASTTransformation::transform(NodeTree<Symbol>* from) {
|
||||
//Set up top scope
|
||||
return transform(from, NULL, std::vector<Type>());
|
||||
}
|
||||
|
||||
NodeTree<ASTData>* ASTTransformation::transform(NodeTree<Symbol>* from, NodeTree<ASTData>* scope, std::vector<Type> types) {
|
||||
Symbol current = from->getData();
|
||||
std::string name = current.getName();
|
||||
NodeTree<ASTData>* newNode;
|
||||
NodeTree<ASTData>* newNode = NULL;
|
||||
std::vector<NodeTree<Symbol>*> children = from->getChildren();
|
||||
std::set<int> skipChildren;
|
||||
|
||||
if (name == "translation_unit") {
|
||||
newNode = new NodeTree<ASTData>(name, ASTData(translation_unit));
|
||||
scope = newNode;
|
||||
} else if (name == "interpreter_directive") {
|
||||
newNode = new NodeTree<ASTData>(name, ASTData(interpreter_directive));
|
||||
} else if (name == "import" && !current.isTerminal()) {
|
||||
newNode = new NodeTree<ASTData>(name, ASTData(import, Symbol(concatSymbolTree(children[0]), true)));
|
||||
std::string toImport = concatSymbolTree(children[0]);
|
||||
newNode = new NodeTree<ASTData>(name, ASTData(import, Symbol(toImport, true)));
|
||||
//Do the imported file too
|
||||
NodeTree<ASTData>* outsideTranslationUnit = importer->import(toImport + ".krak");
|
||||
scope->getDataRef()->scope[toImport].push_back(outsideTranslationUnit); //Put this transation_unit in the scope as it's files name
|
||||
//Now add it to scope
|
||||
for (auto i = outsideTranslationUnit->getDataRef()->scope.begin(); i != outsideTranslationUnit->getDataRef()->scope.end(); i++)
|
||||
for (auto j : i->second)
|
||||
scope->getDataRef()->scope[i->first].push_back(j);
|
||||
return newNode; // Don't need children of import
|
||||
} else if (name == "identifier") {
|
||||
newNode = new NodeTree<ASTData>(name, ASTData(identifier, Symbol(concatSymbolTree(children[0]), true)));
|
||||
//Make sure we get the entire name
|
||||
std::string lookupName = concatSymbolTree(from);
|
||||
std::cout << "Looking up: " << lookupName << std::endl;
|
||||
newNode = scopeLookup(scope, lookupName, types);
|
||||
if (newNode == NULL) {
|
||||
std::cout << "scope lookup error! Could not find " << lookupName << " in identifier " << std::endl;
|
||||
throw "LOOKUP ERROR: " + lookupName;
|
||||
} else if (newNode->getDataRef()->symbol.getName() !=lookupName) {
|
||||
//This happens when the lookup name denotes a member of an object, i.e. obj.foo
|
||||
//The newNode points to obj, not foo.
|
||||
}
|
||||
//newNode = new NodeTree<ASTData>(name, ASTData(identifier, Symbol(concatSymbolTree(children[0]), true)));
|
||||
} else if (name == "type_def") {
|
||||
std::string typeAlias = concatSymbolTree(children[0]);
|
||||
//If it is an alisis of a type
|
||||
if (children[1]->getData().getName() == "type") {
|
||||
newNode = new NodeTree<ASTData>(name, ASTData(type_def, Symbol(typeAlias, true, typeAlias), typeFromString(concatSymbolTree(children[1]), scope)));
|
||||
skipChildren.insert(1); //Don't want any children, it's unnecessary for ailising
|
||||
} else { //Is a struct or class
|
||||
newNode = new NodeTree<ASTData>(name, ASTData(type_def, Symbol(typeAlias, true, typeAlias)));
|
||||
newNode->getDataRef()->valueType = new Type(newNode); //Type is self-referential since this is the definition
|
||||
}
|
||||
scope->getDataRef()->scope[typeAlias].push_back(newNode);
|
||||
newNode->getDataRef()->scope["~enclosing_scope"].push_back(scope);
|
||||
scope = newNode;
|
||||
skipChildren.insert(0); //Identifier lookup will be ourselves, as we just added ourselves to the scope
|
||||
//return newNode;
|
||||
} else if (name == "function") {
|
||||
newNode = new NodeTree<ASTData>(name, ASTData(function, Symbol(concatSymbolTree(children[1]), true), ASTData::strToType(concatSymbolTree(children[0]))));
|
||||
std::string functionName = concatSymbolTree(children[1]);
|
||||
newNode = new NodeTree<ASTData>(name, ASTData(function, Symbol(functionName, true), typeFromString(concatSymbolTree(children[0]), scope)));
|
||||
skipChildren.insert(0);
|
||||
skipChildren.insert(1);
|
||||
scope->getDataRef()->scope[functionName].push_back(newNode);
|
||||
newNode->getDataRef()->scope["~enclosing_scope"].push_back(scope);
|
||||
scope = newNode;
|
||||
|
||||
// auto transChildren = transformChildren(children, skipChildren, scope, types);
|
||||
// std::cout << functionName << " ";
|
||||
// for (auto i : transChildren)
|
||||
// std::cout << "||" << i->getDataRef()->toString() << "|| ";
|
||||
// std::cout << "??||" << std::endl;
|
||||
// newNode->addChildren(transChildren);
|
||||
// return newNode;
|
||||
|
||||
std::cout << "finished function " << functionName << std::endl;
|
||||
} else if (name == "code_block") {
|
||||
newNode = new NodeTree<ASTData>(name, ASTData(code_block));
|
||||
newNode->getDataRef()->scope["~enclosing_scope"].push_back(scope);
|
||||
scope = newNode;
|
||||
} else if (name == "typed_parameter") {
|
||||
newNode = transform(children[1]); //Transform to get the identifier
|
||||
newNode->getDataRef()->valueType = ASTData::strToType(concatSymbolTree(children[0])); //Get the type (left child) and set our new identifer to be that type
|
||||
//newNode = transform(children[1]); //Transform to get the identifier
|
||||
std::string parameterName = concatSymbolTree(children[1]);
|
||||
std::string typeString = concatSymbolTree(children[0]);//Get the type (left child) and set our new identifer to be that type
|
||||
newNode = new NodeTree<ASTData>("identifier", ASTData(identifier, Symbol(parameterName, true), typeFromString(typeString, scope)));
|
||||
scope->getDataRef()->scope[parameterName].push_back(newNode);
|
||||
newNode->getDataRef()->scope["~enclosing_scope"].push_back(scope);
|
||||
return newNode;
|
||||
} else if (name == "expression") {
|
||||
} else if (name == "boolean_expression" || name == "and_boolean_expression" || name == "bool_exp") {
|
||||
//If this is an actual part of an expression, not just a premoted term
|
||||
if (children.size() > 1) {
|
||||
std::string functionCallName = concatSymbolTree(children[1]);
|
||||
newNode = new NodeTree<ASTData>(functionCallName, ASTData(function_call, Symbol(functionCallName, true)));
|
||||
//We do children first so we can do appropriate scope searching with types (yay operator overloading!)
|
||||
skipChildren.insert(1);
|
||||
std::vector<NodeTree<ASTData>*> transformedChildren = transformChildren(children, skipChildren, scope, types);
|
||||
std::string functionCallString = concatSymbolTree(children[1]);
|
||||
NodeTree<ASTData>* function = scopeLookup(scope, functionCallString, transformedChildren);
|
||||
if (function == NULL) {
|
||||
std::cout << "scope lookup error! Could not find " << functionCallString << " in boolean stuff " << std::endl;
|
||||
throw "LOOKUP ERROR: " + functionCallString;
|
||||
}
|
||||
newNode = new NodeTree<ASTData>(functionCallString, ASTData(function_call, function->getDataRef()->valueType));
|
||||
newNode->addChild(function); // First child of function call is a link to the function
|
||||
newNode->addChildren(transformedChildren);
|
||||
} else {
|
||||
return transform(children[0]); //Just a promoted term, so do child
|
||||
//std::cout << children.size() << std::endl;
|
||||
if (children.size() == 0)
|
||||
return new NodeTree<ASTData>();
|
||||
return transform(children[0], scope, types); //Just a promoted term, so do child
|
||||
}
|
||||
} else if (name == "term") {
|
||||
//If this is an actual part of an expression, not just a premoted factor
|
||||
if (children.size() > 1) {
|
||||
//Here's the order of ops stuff
|
||||
} else if (name == "expression" || name == "shiftand" || name == "term" || name == "unarad" || name == "access_operation") { //unarad can ride through, it should always just be a promoted child
|
||||
//If this is an actual part of an expression, not just a premoted child
|
||||
if (children.size() > 2) {
|
||||
NodeTree<ASTData>* lhs = transform(children[0], scope); //LHS does not inherit types
|
||||
NodeTree<ASTData>* rhs;
|
||||
if (name == "access_operation")
|
||||
rhs = transform(children[2], lhs->getDataRef()->valueType->typeDefinition, types); //If an access operation, then the right side will be in the lhs's type's scope
|
||||
else
|
||||
rhs = transform(children[2], scope, types);
|
||||
|
||||
std::string functionCallName = concatSymbolTree(children[1]);
|
||||
//std::cout << "scope lookup from expression or similar" << std::endl;
|
||||
std::vector<NodeTree<ASTData>*> transformedChildren; transformedChildren.push_back(lhs); transformedChildren.push_back(rhs);
|
||||
NodeTree<ASTData>* function = scopeLookup(scope, functionCallName, transformedChildren);
|
||||
if (function == NULL) {
|
||||
std::cout << "scope lookup error! Could not find " << functionCallName << " in expression " << std::endl;
|
||||
throw "LOOKUP ERROR: " + functionCallName;
|
||||
}
|
||||
newNode = new NodeTree<ASTData>(functionCallName, ASTData(function_call, Symbol(functionCallName, true)));
|
||||
skipChildren.insert(1);
|
||||
newNode->addChild(function); // First child of function call is a link to the function definition
|
||||
newNode->addChild(lhs);
|
||||
newNode->addChild(rhs);
|
||||
|
||||
if (name == "access_operation")
|
||||
std::cout << "Access Operation: " << lhs->getDataRef()->symbol.getName() << " : " << rhs->getDataRef()->symbol.getName() << std::endl;
|
||||
std::cout << functionCallName << " - " << function->getName() << " has value type " << function->getDataRef()->valueType << " and rhs " << rhs->getDataRef()->valueType << std::endl;
|
||||
//Set the value of this function call
|
||||
if (function->getDataRef()->valueType)
|
||||
newNode->getDataRef()->valueType = function->getDataRef()->valueType;
|
||||
else if (rhs->getDataRef()->valueType)
|
||||
newNode->getDataRef()->valueType = rhs->getDataRef()->valueType;
|
||||
else
|
||||
newNode->getDataRef()->valueType = NULL;
|
||||
std::cout << "function call to " << functionCallName << " - " << function->getName() << " is now " << newNode->getDataRef()->valueType << std::endl;
|
||||
return newNode;
|
||||
//skipChildren.insert(1);
|
||||
} else {
|
||||
return transform(children[0]); //Just a promoted factor, so do child
|
||||
return transform(children[0], scope, types); //Just a promoted child, so do it instead
|
||||
}
|
||||
} else if (name == "factor") { //Do factor here, as it has all the weird unary operators
|
||||
//If this is an actual part of an expression, not just a premoted child
|
||||
//NO SUPPORT FOR CASTING YET
|
||||
if (children.size() == 2) {
|
||||
std::string funcName = concatSymbolTree(children[0]);
|
||||
NodeTree<ASTData>* param;
|
||||
if (funcName == "*" || funcName == "&" || funcName == "++" || funcName == "--" || funcName == "-" || funcName == "!" || funcName == "~")
|
||||
param = transform(children[1], scope, types);
|
||||
else
|
||||
funcName = concatSymbolTree(children[1]), param = transform(children[0], scope, types);
|
||||
|
||||
//std::cout << "scope lookup from factor" << std::endl;
|
||||
std::vector<NodeTree<ASTData>*> transformedChildren; transformedChildren.push_back(param);
|
||||
NodeTree<ASTData>* function = scopeLookup(scope, funcName, transformedChildren);
|
||||
if (function == NULL) {
|
||||
std::cout << "scope lookup error! Could not find " << funcName << " in factor " << std::endl;
|
||||
throw "LOOKUP ERROR: " + funcName;
|
||||
}
|
||||
newNode = new NodeTree<ASTData>(funcName, ASTData(function_call, Symbol(funcName, true)));
|
||||
newNode->addChild(function);
|
||||
newNode->addChild(param);
|
||||
if (function->getDataRef()->valueType)
|
||||
newNode->getDataRef()->valueType = function->getDataRef()->valueType;
|
||||
else
|
||||
newNode->getDataRef()->valueType = param->getDataRef()->valueType;
|
||||
|
||||
return newNode;
|
||||
} else {
|
||||
return transform(children[0], scope, types); //Just a promoted child, so do it instead
|
||||
}
|
||||
} else if (name == "factor") {
|
||||
return transform(children[0]); //Just a premoted number or function call or something, so use it instead
|
||||
} else if (name == "boolean_expression") {
|
||||
newNode = new NodeTree<ASTData>(name, ASTData(boolean_expression));
|
||||
} else if (name == "statement") {
|
||||
newNode = new NodeTree<ASTData>(name, ASTData(statement));
|
||||
} else if (name == "if_statement") {
|
||||
newNode = new NodeTree<ASTData>(name, ASTData(if_statement));
|
||||
} else if (name == "while_loop") {
|
||||
newNode = new NodeTree<ASTData>(name, ASTData(while_loop));
|
||||
} else if (name == "for_loop") {
|
||||
newNode = new NodeTree<ASTData>(name, ASTData(for_loop));
|
||||
} else if (name == "return_statement") {
|
||||
newNode = new NodeTree<ASTData>(name, ASTData(return_statement));
|
||||
} else if (name == "assignment_statement") {
|
||||
newNode = new NodeTree<ASTData>(name, ASTData(assignment_statement));
|
||||
std::string assignFuncName = concatSymbolTree(children[1]);
|
||||
if (assignFuncName == "=") {
|
||||
newNode->addChild(transform(children[0], scope, types));
|
||||
newNode->addChild(transform(children[2], scope, types));
|
||||
} else {
|
||||
//For assignments like += or *=, expand the syntatic sugar.
|
||||
NodeTree<ASTData>* lhs = transform(children[0], scope, types);
|
||||
NodeTree<ASTData>* rhs = transform(children[2], scope, types);
|
||||
std::vector<NodeTree<ASTData>*> transformedChildren; transformedChildren.push_back(lhs); transformedChildren.push_back(rhs);
|
||||
std::string functionName = assignFuncName.substr(0,1);
|
||||
NodeTree<ASTData>* childCall = new NodeTree<ASTData>(functionName, ASTData(function_call, Symbol(functionName, true)));
|
||||
NodeTree<ASTData>* functionDef = scopeLookup(scope, functionName, transformedChildren);
|
||||
if (functionDef == NULL) {
|
||||
std::cout << "scope lookup error! Could not find " << functionName << " in assignment_statement " << std::endl;
|
||||
throw "LOOKUP ERROR: " + functionName;
|
||||
}
|
||||
childCall->addChild(functionDef); //First child of function call is definition of the function
|
||||
childCall->addChild(lhs);
|
||||
childCall->addChild(rhs);
|
||||
newNode->addChild(lhs);
|
||||
newNode->addChild(childCall);
|
||||
}
|
||||
return newNode;
|
||||
} else if (name == "declaration_statement") {
|
||||
newNode = new NodeTree<ASTData>(name, ASTData(declaration_statement));
|
||||
NodeTree<ASTData>* newIdentifier = transform(children[1]); //Transform the identifier
|
||||
newIdentifier->getDataRef()->valueType = ASTData::strToType(concatSymbolTree(children[0]));//set the type of the identifier
|
||||
|
||||
// NodeTree<ASTData>* newIdentifier = transform(children[1], scope); //Transform the identifier
|
||||
// newIdentifier->getDataRef()->valueType = Type(concatSymbolTree(children[0]));//set the type of the identifier
|
||||
std::string newIdentifierStr = concatSymbolTree(children[1]);
|
||||
std::string typeString = concatSymbolTree(children[0]);//Get the type (left child) and set our new identifer to be that type
|
||||
Type* identifierType = typeFromString(typeString, scope);
|
||||
NodeTree<ASTData>* newIdentifier = new NodeTree<ASTData>("identifier", ASTData(identifier, Symbol(newIdentifierStr, true), identifierType));
|
||||
scope->getDataRef()->scope[newIdentifierStr].push_back(newIdentifier);
|
||||
newNode->getDataRef()->scope["~enclosing_scope"].push_back(scope);
|
||||
//Now we don't do this thing
|
||||
// if (identifierType->typeDefinition) {
|
||||
// //Is a custom type. Populate this declaration's scope with it's inner declarations
|
||||
// std::vector<NodeTree<ASTData>*> definitions = identifierType->typeDefinition->getChildren();
|
||||
// for (auto i : definitions) {
|
||||
// //Point to the identifier. May need to change so it points to the declaration or something, with new declarations.....
|
||||
// newIdentifier->getDataRef()->scope[i->get(0)->getDataRef()->symbol.getName()] = i->get(0); //make each declaration's name point to it's definition, like above
|
||||
// }
|
||||
// }
|
||||
|
||||
newNode->addChild(newIdentifier);
|
||||
skipChildren.insert(0); //These, the type and the identifier, have been taken care of.
|
||||
skipChildren.insert(1);
|
||||
} else if (name == "if_comp") {
|
||||
newNode = new NodeTree<ASTData>(name, ASTData(if_comp));
|
||||
newNode->addChild(new NodeTree<ASTData>("identifier", ASTData(identifier, Symbol(concatSymbolTree(children[0]),true))));
|
||||
skipChildren.insert(0); //Don't do the identifier. The identifier lookup will fail. That's why we do it here.
|
||||
} else if (name == "simple_passthrough") {
|
||||
newNode = new NodeTree<ASTData>(name, ASTData(simple_passthrough));
|
||||
} else if (name == "function_call") {
|
||||
//children[0] is scope
|
||||
std::string functionCallName = concatSymbolTree(children[1]);
|
||||
std::string functionCallName = concatSymbolTree(children[0]);
|
||||
newNode = new NodeTree<ASTData>(functionCallName, ASTData(function_call, Symbol(functionCallName, true)));
|
||||
skipChildren.insert(1);
|
||||
// if (function == NULL) {
|
||||
// std::cout << "scope lookup error! Could not find " << functionCallName << " in function_call " << std::endl;
|
||||
// throw "LOOKUP ERROR: " + functionCallName;
|
||||
// }
|
||||
skipChildren.insert(0);
|
||||
std::vector<NodeTree<ASTData>*> transformedChildren = transformChildren(children, skipChildren, scope, types);
|
||||
std::cout << "scope lookup from function_call: " << functionCallName << std::endl;
|
||||
for (auto i : children)
|
||||
std::cout << i << " : " << i->getName() << " : " << i->getDataRef()->getName() << std::endl;
|
||||
|
||||
NodeTree<ASTData>* function = transform(children[0], scope, mapNodesToTypes(transformedChildren));
|
||||
std::cout << "The thing: " << function << " : " << function->getName() << std::endl;
|
||||
for (auto i : function->getChildren())
|
||||
std::cout << i->getName() << " ";
|
||||
std::cout << std::endl;
|
||||
newNode->addChild(function);
|
||||
newNode->getDataRef()->valueType = function->getDataRef()->valueType;
|
||||
newNode->addChildren(transformedChildren);
|
||||
return newNode;
|
||||
} else if (name == "parameter") {
|
||||
return transform(children[0]); //Don't need a parameter node, just the value
|
||||
} else if (name == "bool") {
|
||||
newNode = new NodeTree<ASTData>(name, ASTData(value, Symbol(concatSymbolTree(children[0]), true), boolean));
|
||||
return transform(children[0], scope, types); //Don't need a parameter node, just the value
|
||||
} else if (name == "type") {
|
||||
std::string theConcat = concatSymbolTree(from); //We have no symbol, so this will concat our children
|
||||
newNode = new NodeTree<ASTData>(name, ASTData(value, Symbol(theConcat, true), typeFromString(theConcat, scope)));
|
||||
} else if (name == "number") {
|
||||
return transform(children[0]);
|
||||
return transform(children[0], scope, types);
|
||||
} else if (name == "integer") {
|
||||
newNode = new NodeTree<ASTData>(name, ASTData(value, Symbol(concatSymbolTree(from), true), integer));
|
||||
newNode = new NodeTree<ASTData>(name, ASTData(value, Symbol(concatSymbolTree(from), true), new Type(integer)));
|
||||
} else if (name == "float") {
|
||||
newNode = new NodeTree<ASTData>(name, ASTData(value, Symbol(concatSymbolTree(from), true), floating));
|
||||
newNode = new NodeTree<ASTData>(name, ASTData(value, Symbol(concatSymbolTree(from), true), new Type(floating)));
|
||||
} else if (name == "double") {
|
||||
newNode = new NodeTree<ASTData>(name, ASTData(value, Symbol(concatSymbolTree(from), true), double_percision));
|
||||
} else if (name == "string") {
|
||||
newNode = new NodeTree<ASTData>(name, ASTData(value, Symbol(concatSymbolTree(children[0]), true), char_string));
|
||||
newNode = new NodeTree<ASTData>(name, ASTData(value, Symbol(concatSymbolTree(from), true), new Type(double_percision)));
|
||||
} else if (name == "char") {
|
||||
newNode = new NodeTree<ASTData>(name, ASTData(value, Symbol(concatSymbolTree(children[0]), true), new Type(character, 1))); //Indirection of 1 for array
|
||||
} else if (name == "string" || name == "triple_quoted_string") {
|
||||
newNode = new NodeTree<ASTData>(name, ASTData(value, Symbol(concatSymbolTree(children[0]), true), new Type(character, 1))); //Indirection of 1 for array
|
||||
} else {
|
||||
return new NodeTree<ASTData>();
|
||||
}
|
||||
|
||||
// In general, iterate through children and do them. Might not do this for all children.
|
||||
//Do all children but the ones we skip
|
||||
for (int i = 0; i < children.size(); i++) {
|
||||
if (skipChildren.find(i) == skipChildren.end()) {
|
||||
NodeTree<ASTData>* transChild = transform(children[i]);
|
||||
if (transChild->getData().type) //Only add the children that have a real ASTData::ASTType, that is, legit ASTData.
|
||||
NodeTree<ASTData>* transChild = transform(children[i], scope, types);
|
||||
if (transChild->getDataRef()->type) //Only add the children that have a real ASTData::ASTType, that is, legit ASTData.
|
||||
newNode->addChild(transChild);
|
||||
else
|
||||
delete transChild;
|
||||
}
|
||||
}
|
||||
|
||||
return newNode;
|
||||
}
|
||||
|
||||
//We use this functionality a lot at different places
|
||||
std::vector<NodeTree<ASTData>*> ASTTransformation::transformChildren(std::vector<NodeTree<Symbol>*> children, std::set<int> skipChildren, NodeTree<ASTData>* scope, std::vector<Type> types) {
|
||||
std::vector<NodeTree<ASTData>*> transformedChildren;
|
||||
// In general, iterate through children and do them. Might not do this for all children.
|
||||
for (int i = 0; i < children.size(); i++) {
|
||||
if (skipChildren.find(i) == skipChildren.end()) {
|
||||
NodeTree<ASTData>* transChild = transform(children[i], scope, types);
|
||||
if (transChild->getDataRef()->type) //Only add the children that have a real ASTData::ASTType, that is, legit ASTData.
|
||||
transformedChildren.push_back(transChild);
|
||||
else
|
||||
delete transChild;
|
||||
}
|
||||
}
|
||||
return transformedChildren;
|
||||
}
|
||||
|
||||
std::vector<Type> ASTTransformation::mapNodesToTypes(std::vector<NodeTree<ASTData>*> nodes) {
|
||||
std::vector<Type> types;
|
||||
for (auto i : nodes)
|
||||
types.push_back(*(i->getDataRef()->valueType));
|
||||
return types;
|
||||
}
|
||||
|
||||
std::string ASTTransformation::concatSymbolTree(NodeTree<Symbol>* root) {
|
||||
std::string concatString;
|
||||
std::string ourValue = root->getData().getValue();
|
||||
std::string ourValue = root->getDataRef()->getValue();
|
||||
if (ourValue != "NoValue")
|
||||
concatString += ourValue;
|
||||
std::vector<NodeTree<Symbol>*> children = root->getChildren();
|
||||
for (int i = 0; i < children.size(); i++) {
|
||||
concatString = concatSymbolTree(children[i]);
|
||||
concatString += concatSymbolTree(children[i]);
|
||||
}
|
||||
return concatString;
|
||||
}
|
||||
|
||||
//Overloaded with the actual children to allow us to handle operator methods
|
||||
NodeTree<ASTData>* ASTTransformation::scopeLookup(NodeTree<ASTData>* scope, std::string lookup, std::vector<NodeTree<ASTData>*> nodes) {
|
||||
//
|
||||
auto LLElementIterator = languageLevelScope.find(lookup);
|
||||
if (LLElementIterator != languageLevelScope.end()) {
|
||||
std::cout << "Checking for early method level operator overload" << std::endl;
|
||||
std::string lookupOp = "operator" + lookup;
|
||||
for (auto i : nodes)
|
||||
std::cout << i->getDataRef()->toString() << " ";
|
||||
std::cout << std::endl;
|
||||
NodeTree<ASTData>* operatorMethod = NULL;
|
||||
if (nodes[0]->getDataRef()->valueType && nodes[0]->getDataRef()->valueType->typeDefinition)
|
||||
operatorMethod = scopeLookup(nodes[0]->getDataRef()->valueType->typeDefinition, lookupOp, mapNodesToTypes(slice(nodes,1,-1)));
|
||||
if (operatorMethod) {
|
||||
//Ok, so we construct
|
||||
std::cout << "Early method level operator was found" << std::endl;
|
||||
//return operatorMethod;
|
||||
newNode = new NodeTree<ASTData>(functionCallName, ASTData(function_call, Symbol(functionCallName, true)));
|
||||
newNode->addChild(function); // First child of function call is a link to the function definition
|
||||
newNode->addChild(lhs);
|
||||
newNode->addChild(rhs);
|
||||
|
||||
|
||||
//Set the value of this function call
|
||||
if (function->getDataRef()->valueType)
|
||||
newNode->getDataRef()->valueType = function->getDataRef()->valueType;
|
||||
else if (rhs->getDataRef()->valueType)
|
||||
newNode->getDataRef()->valueType = rhs->getDataRef()->valueType;
|
||||
else
|
||||
newNode->getDataRef()->valueType = NULL;
|
||||
}
|
||||
std::cout << "Early method level operator was NOT found" << std::endl;
|
||||
}
|
||||
return scopeLookup(scope, lookup, mapNodesToTypes(nodes));
|
||||
}
|
||||
|
||||
NodeTree<ASTData>* ASTTransformation::scopeLookup(NodeTree<ASTData>* scope, std::string lookup, std::vector<Type> types) {
|
||||
//We first search the languageLevelScope to see if it's an operator. If so, we modifiy the lookup with a preceding "operator"
|
||||
auto LLElementIterator = languageLevelScope.find(lookup);
|
||||
if (LLElementIterator != languageLevelScope.end())
|
||||
lookup = "operator" + lookup;
|
||||
//Search the map
|
||||
auto scopeMap = scope->getDataRef()->scope;
|
||||
auto elementIterator = scopeMap.find(lookup);
|
||||
for (auto i : scopeMap)
|
||||
std::cout << i.first << " ";
|
||||
std::cout << std::endl;
|
||||
//
|
||||
if (elementIterator != scopeMap.end()) {
|
||||
for (auto i = elementIterator->second.begin(); i != elementIterator->second.end(); i++) {
|
||||
//Types and functions cannot have the same name, and types very apparently do not have parameter types, so check and short-circuit
|
||||
if ((*i)->getDataRef()->type == type_def)
|
||||
return *i;
|
||||
//return *i;
|
||||
std::vector<NodeTree<ASTData>*> children = (*i)->getChildren();
|
||||
if (types.size() != ((children.size() > 0) ? children.size()-1 : 0)) {
|
||||
std::cout << "Type sizes do not match between two " << lookup << "(" << types.size() << "," << ((children.size() > 0) ? children.size()-1 : 0) << "), types are: ";
|
||||
for (auto j : types)
|
||||
std::cout << j.toString() << " ";
|
||||
std::cout << std::endl;
|
||||
continue;
|
||||
}
|
||||
bool typesMatch = true;
|
||||
for (int j = 0; j < types.size(); j++) {
|
||||
if (types[j] != *(children[j]->getDataRef()->valueType)) {
|
||||
typesMatch = false;
|
||||
std::cout << "Types do not match between two " << lookup << std::endl;
|
||||
break;
|
||||
}
|
||||
}
|
||||
if (typesMatch)
|
||||
return *i;
|
||||
}
|
||||
}
|
||||
|
||||
//if it doesn't exist, try the enclosing scope if it exists.
|
||||
auto enclosingIterator = scopeMap.find("~enclosing_scope");
|
||||
if (enclosingIterator != scopeMap.end()) {
|
||||
// std::cout << "upper scope exists, searching it for " << lookup << std::endl;
|
||||
NodeTree<ASTData>* upperResult = scopeLookup(enclosingIterator->second[0], lookup, types);
|
||||
if (upperResult)
|
||||
return upperResult;
|
||||
}
|
||||
//std::cout << "upper scope does not exist" << std::endl;
|
||||
std::cout << "could not find " << lookup << " in standard scope, checking for operator" << std::endl;
|
||||
//Note that we don't check for types. At some point we should, as we don't know how to add objects/structs without overloaded operators, etc
|
||||
//Also, we've already searched for the element because this is also how we keep track of operator overloading
|
||||
if (LLElementIterator != languageLevelScope.end()) {
|
||||
std::cout << "found it at language level as operator." << std::endl;
|
||||
return LLElementIterator->second[0];
|
||||
}
|
||||
std::cout << "Did not find, returning NULL" << std::endl;
|
||||
return NULL;
|
||||
}
|
||||
|
||||
Type* ASTTransformation::typeFromString(std::string typeIn, NodeTree<ASTData>* scope) {
|
||||
int indirection = 0;
|
||||
ValueType baseType;
|
||||
NodeTree<ASTData>* typeDefinition = NULL;
|
||||
while (typeIn[typeIn.size() - indirection - 1] == '*') indirection++;
|
||||
std::string edited = strSlice(typeIn, 0, -(indirection + 1));
|
||||
if (edited == "void")
|
||||
baseType = void_type;
|
||||
else if (edited == "bool")
|
||||
baseType = boolean;
|
||||
else if (edited == "int")
|
||||
baseType = integer;
|
||||
else if (edited == "float")
|
||||
baseType = floating
|
||||
; else if (edited == "double")
|
||||
baseType = double_percision;
|
||||
else if (edited == "char")
|
||||
baseType = character;
|
||||
else {
|
||||
baseType = none;
|
||||
typeDefinition = scopeLookup(scope, edited);
|
||||
//std::cout << "scopeLookup of type " << edited << " returned " << typeDefinition << std::endl;
|
||||
}
|
||||
return new Type(baseType, typeDefinition, indirection);
|
||||
}
|
||||
|
||||
@@ -1,12 +1,33 @@
|
||||
#include "CGenerator.h"
|
||||
|
||||
CGenerator::CGenerator() {
|
||||
CGenerator::CGenerator() : generatorString("__C__") {
|
||||
tabLevel = 0;
|
||||
}
|
||||
CGenerator::~CGenerator() {
|
||||
|
||||
}
|
||||
|
||||
void CGenerator::generateCompSet(std::map<std::string, NodeTree<ASTData>*> ASTs, std::string outputName) {
|
||||
//Generate an entire set of files
|
||||
std::string buildString = "#!/bin/sh\ncc -std=c99 ";
|
||||
for (auto i = ASTs.begin(); i != ASTs.end(); i++) {
|
||||
buildString += i->first + ".c ";
|
||||
std::ofstream outputCFile;
|
||||
outputCFile.open(i->first + ".c");
|
||||
if (outputCFile.is_open()) {
|
||||
outputCFile << generate(i->second);
|
||||
} else {
|
||||
std::cout << "Cannot open file " << i->first << ".c" << std::endl;
|
||||
}
|
||||
outputCFile.close();
|
||||
}
|
||||
buildString += "-o " + outputName;
|
||||
std::ofstream outputBuild;
|
||||
outputBuild.open(outputName + ".sh");
|
||||
outputBuild << buildString;
|
||||
outputBuild.close();
|
||||
}
|
||||
|
||||
std::string CGenerator::tabs() {
|
||||
std::string returnTabs;
|
||||
for (int i = 0; i < tabLevel; i++)
|
||||
@@ -14,76 +35,229 @@ std::string CGenerator::tabs() {
|
||||
return returnTabs;
|
||||
}
|
||||
|
||||
std::string CGenerator::generate(NodeTree<ASTData>* from) {
|
||||
//The enclosing object is for when we're generating the inside of object methods. They allow us to check scope lookups against the object we're in
|
||||
std::string CGenerator::generate(NodeTree<ASTData>* from, NodeTree<ASTData>* enclosingObject) {
|
||||
ASTData data = from->getData();
|
||||
std::vector<NodeTree<ASTData>*> children = from->getChildren();
|
||||
std::string output = "";
|
||||
std::string output = "";
|
||||
switch (data.type) {
|
||||
case translation_unit:
|
||||
//Do nothing
|
||||
//Do here because we may need the typedefs before the declarations of variables
|
||||
for (int i = 0; i < children.size(); i++)
|
||||
if (children[i]->getDataRef()->type == type_def)
|
||||
output += generate(children[i], enclosingObject) + "\n";
|
||||
//Declare everything in translation unit scope here. (allows stuff from other files, automatic forward declarations)
|
||||
for (auto i = data.scope.begin(); i != data.scope.end(); i++) {
|
||||
for (auto overloadedMembers : i->second) {
|
||||
NodeTree<ASTData>* declaration = overloadedMembers;
|
||||
std::vector<NodeTree<ASTData>*> decChildren = declaration->getChildren();
|
||||
ASTData declarationData = declaration->getData();
|
||||
switch(declarationData.type) {
|
||||
case identifier:
|
||||
output += ValueTypeToCType(declarationData.valueType) + " " + declarationData.symbol.getName() + "; /*identifier*/\n";
|
||||
break;
|
||||
case function:
|
||||
{
|
||||
if (decChildren.size() == 0) { //Not a real function, must be a built in passthrough {
|
||||
output += "/* built in function: " + declarationData.toString() + " */\n";
|
||||
break;
|
||||
}
|
||||
output += "\n" + ValueTypeToCType(declarationData.valueType) + " ";
|
||||
std::string nameDecoration, parameters;
|
||||
for (int j = 0; j < decChildren.size()-1; j++) {
|
||||
if (j > 0)
|
||||
parameters += ", ";
|
||||
parameters += ValueTypeToCType(decChildren[j]->getData().valueType) + " " + generate(decChildren[j], enclosingObject);
|
||||
nameDecoration += "_" + ValueTypeToCTypeDecoration(decChildren[j]->getData().valueType);
|
||||
}
|
||||
output += CifyFunctionName(declarationData.symbol.getName()) + nameDecoration + "(" + parameters + "); /*func*/\n";
|
||||
break;
|
||||
}
|
||||
case type_def:
|
||||
//type
|
||||
output += "/*typedef " + declarationData.symbol.getName() + " */\n";
|
||||
break;
|
||||
default:
|
||||
//std::cout << "Declaration? named " << declaration->getName() << " of unknown type " << ASTData::ASTTypeToString(declarationData.type) << " in translation unit scope" << std::endl;
|
||||
output += "/*unknown declaration named " + declaration->getName() + "*/\n";
|
||||
}
|
||||
}
|
||||
}
|
||||
//Do here because we need the newlines
|
||||
for (int i = 0; i < children.size(); i++)
|
||||
if (children[i]->getDataRef()->type != type_def)
|
||||
output += generate(children[i], enclosingObject) + "\n";
|
||||
return output;
|
||||
break;
|
||||
case interpreter_directive:
|
||||
//Do nothing
|
||||
break;
|
||||
case import:
|
||||
return "#include \"" + data.symbol.getName() + "\"\n";
|
||||
break;
|
||||
return "/* would import \"" + data.symbol.getName() + "\" but....*/\n";
|
||||
//return "#include <" + data.symbol.getName() + ">\n";
|
||||
case identifier:
|
||||
return data.symbol.getName();
|
||||
break;
|
||||
case function:
|
||||
output += "\n" + ValueTypeToCType(data.valueType) + " " + data.symbol.getName() + "(";
|
||||
for (int i = 0; i < children.size()-1; i++) {
|
||||
if (i > 0)
|
||||
output += ", ";
|
||||
output += ASTData::ValueTypeToString(children[i]->getData().valueType) + " " + generate(children[i]);
|
||||
{
|
||||
//If we're in an object method, and our enclosing scope is that object, we're a member of the object and should use the self reference.
|
||||
std::string preName;
|
||||
if (enclosingObject && enclosingObject->getDataRef()->scope.find(data.symbol.getName()) != enclosingObject->getDataRef()->scope.end())
|
||||
preName += "self->";
|
||||
if (false)
|
||||
for (int j = 0; j < children.size()-1; j++)
|
||||
preName += ValueTypeToCType(children[j]->getData().valueType) + "_";
|
||||
return preName + CifyFunctionName(data.symbol.getName()); //Cifying does nothing if not an operator overload
|
||||
}
|
||||
case type_def:
|
||||
if (children.size() == 0) {
|
||||
return "typedef " + ValueTypeToCType(data.valueType) + " " + data.symbol.getName() + ";";
|
||||
} else {
|
||||
std::string objectString = "typedef struct __struct_dummy_" + data.symbol.getName() + "__ {\n";
|
||||
std::string postString; //The functions have to be outside the struct definition
|
||||
for (int i = 0; i < children.size(); i++) {
|
||||
std::cout << children[i]->getName() << std::endl;
|
||||
if (children[i]->getName() == "function") //If object method
|
||||
postString += generateObjectMethod(from, children[i]) + "\n";
|
||||
else
|
||||
objectString += generate(children[i], enclosingObject) + "\n";
|
||||
}
|
||||
objectString += "} " + data.symbol.getName() + ";";
|
||||
return objectString + postString; //Functions come after the declaration of the struct
|
||||
}
|
||||
output+= ")\n" + generate(children[children.size()-1]);
|
||||
case function:
|
||||
{
|
||||
output += "\n" + ValueTypeToCType(data.valueType) + " ";
|
||||
std::string nameDecoration, parameters;
|
||||
for (int j = 0; j < children.size()-1; j++) {
|
||||
if (j > 0)
|
||||
parameters += ", ";
|
||||
parameters += ValueTypeToCType(children[j]->getData().valueType) + " " + generate(children[j], enclosingObject);
|
||||
nameDecoration += "_" + ValueTypeToCTypeDecoration(children[j]->getData().valueType);
|
||||
}
|
||||
output += CifyFunctionName(data.symbol.getName()) + nameDecoration + "(" + parameters + ")\n" + generate(children[children.size()-1], enclosingObject);
|
||||
return output;
|
||||
break;
|
||||
}
|
||||
case code_block:
|
||||
output += tabs() + "{\n";
|
||||
output += "{\n";
|
||||
tabLevel++;
|
||||
for (int i = 0; i < children.size(); i++)
|
||||
output += generate(children[i]);
|
||||
for (int i = 0; i < children.size(); i++) {
|
||||
//std::cout << "Line " << i << std::endl;
|
||||
std::string line = generate(children[i], enclosingObject);
|
||||
//std::cout << line << std::endl;
|
||||
output += line;
|
||||
}
|
||||
tabLevel--;
|
||||
output += tabs() + "}";
|
||||
return output;
|
||||
break;
|
||||
case expression:
|
||||
output += " " + data.symbol.getName() + ", ";
|
||||
break;
|
||||
case boolean_expression:
|
||||
output += " " + data.symbol.getName() + " ";
|
||||
break;
|
||||
case statement:
|
||||
return tabs() + generate(children[0]) + ";\n";
|
||||
break;
|
||||
return tabs() + generate(children[0], enclosingObject) + ";\n";
|
||||
case if_statement:
|
||||
output += "if (" + generate(children[0]) + ") \n" + generate(children[1]);
|
||||
output += "if (" + generate(children[0], enclosingObject) + ")\n\t" + generate(children[1], enclosingObject);
|
||||
if (children.size() > 2)
|
||||
output += " else " + generate(children[2]);
|
||||
output += " else " + generate(children[2], enclosingObject);
|
||||
return output;
|
||||
case while_loop:
|
||||
output += "while (" + generate(children[0], enclosingObject) + ")\n\t" + generate(children[1], enclosingObject);
|
||||
return output;
|
||||
case for_loop:
|
||||
//The strSlice's are there to get ride of an unwanted return and an unwanted semicolon(s)
|
||||
output += "for (" + strSlice(generate(children[0], enclosingObject),0,-3) + generate(children[1], enclosingObject) + ";" + strSlice(generate(children[2], enclosingObject),0,-3) + ")\n\t" + generate(children[3], enclosingObject);
|
||||
return output;
|
||||
break;
|
||||
case return_statement:
|
||||
return "return " + generate(children[0]);
|
||||
if (children.size())
|
||||
return "return " + generate(children[0], enclosingObject);
|
||||
else
|
||||
return "return";
|
||||
case assignment_statement:
|
||||
return generate(children[0]) + " = " + generate(children[1]);
|
||||
return generate(children[0], enclosingObject) + " = " + generate(children[1], enclosingObject);
|
||||
case declaration_statement:
|
||||
return ASTData::ValueTypeToString(children[0]->getData().valueType) + " " + generate(children[0]) + " = " + generate(children[1]);
|
||||
if (children.size() == 1)
|
||||
return ValueTypeToCType(children[0]->getData().valueType) + " " + generate(children[0], enclosingObject) + ";";
|
||||
else
|
||||
return ValueTypeToCType(children[0]->getData().valueType) + " " + generate(children[0], enclosingObject) + " = " + generate(children[1], enclosingObject) + ";";
|
||||
case if_comp:
|
||||
if (generate(children[0], enclosingObject) == generatorString)
|
||||
return generate(children[1], enclosingObject);
|
||||
return "";
|
||||
case simple_passthrough:
|
||||
return strSlice(generate(children[0], enclosingObject), 3, -4);
|
||||
case function_call:
|
||||
{
|
||||
//NOTE: The first (0th) child of a function call node is the declaration of the function
|
||||
|
||||
//Handle operators specially for now. Will later replace with
|
||||
//Inlined functions in the standard library
|
||||
std::string name = data.symbol.getName();
|
||||
if (name == "+" || name == "-" || name == "*" || name == "/") {
|
||||
return "((" + generate(children[0]) + ")" + name + "(" + generate(children[1]) + "))";
|
||||
// std::string name = data.symbol.getName();
|
||||
// std::cout << name << " == " << children[0]->getData().symbol.getName() << std::endl;
|
||||
std::string name = children[0]->getDataRef()->symbol.getName();
|
||||
ASTType funcType = children[0]->getDataRef()->type;
|
||||
std::cout << "Doing function: " << name << std::endl;
|
||||
//Test for specail functions only if what we're testing is, indeed, the definition, not a function call that returns a callable function pointer
|
||||
if (funcType == function) {
|
||||
if (name == "++" || name == "--")
|
||||
return generate(children[1], enclosingObject) + name;
|
||||
if (name == "*" && children.size() == 2) //Is dereference, not multiplication
|
||||
return "*(" + generate(children[1], enclosingObject) + ")";
|
||||
if (name == "+" || name == "-" || name == "*" || name == "/" || name == "==" || name == ">=" || name == "<=" || name == "!="
|
||||
|| name == "<" || name == ">" || name == "%" || name == "+=" || name == "-=" || name == "*=" || name == "/=" || name == "||"
|
||||
|| name == "&&" || name == "!" )
|
||||
return "((" + generate(children[1], enclosingObject) + ")" + name + "(" + generate(children[2], enclosingObject) + "))";
|
||||
else if (name == "." || name == "->") {
|
||||
if (children.size() == 1)
|
||||
return "/*dot operation with one child*/" + generate(children[0], enclosingObject) + "/*end one child*/";
|
||||
//If this is accessing an actual function, find the function in scope and take the appropriate action. Probabally an object method
|
||||
if (children[2]->getDataRef()->type == function) {
|
||||
std::string functionName = children[2]->getDataRef()->symbol.getName();
|
||||
NodeTree<ASTData>* possibleObjectType = children[1]->getDataRef()->valueType->typeDefinition;
|
||||
//If is an object method, generate it like one. Needs extension/modification for inheritence
|
||||
if (possibleObjectType && possibleObjectType->getDataRef()->scope.find(functionName) != possibleObjectType->getDataRef()->scope.end()) {
|
||||
std::string nameDecoration;
|
||||
std::vector<NodeTree<ASTData>*> functionDefChildren = children[2]->getChildren(); //The function def is the rhs of the access operation
|
||||
std::cout << "Decorating (in access-should be object) " << name << " " << functionDefChildren.size() << std::endl;
|
||||
for (int i = 0; i < (functionDefChildren.size() > 0 ? functionDefChildren.size()-1 : 0); i++)
|
||||
nameDecoration += "_" + ValueTypeToCTypeDecoration(functionDefChildren[i]->getData().valueType);
|
||||
/*HERE*/ return possibleObjectType->getDataRef()->symbol.getName() +"__" + CifyFunctionName(functionName) + nameDecoration + "(" + (name == "." ? "&" : "") + generate(children[1], enclosingObject) + ",";
|
||||
//The comma lets the upper function call know we already started the param list
|
||||
//Note that we got here from a function call. We just pass up this special case and let them finish with the perentheses
|
||||
} else {
|
||||
std::cout << "Is not in scope or not type" << std::endl;
|
||||
return "((" + generate(children[1], enclosingObject) + ")" + name + functionName + ")";
|
||||
}
|
||||
} else {
|
||||
//return "((" + generate(children[1], enclosingObject) + ")" + name + generate(children[2], enclosingObject) + ")";
|
||||
return "((" + generate(children[1], enclosingObject) + ")" + name + generate(children[2]) + ")";
|
||||
}
|
||||
} else {
|
||||
//It's a normal function call, not a special one or a method or anything. Name decorate.
|
||||
std::vector<NodeTree<ASTData>*> functionDefChildren = children[0]->getChildren();
|
||||
std::cout << "Decorating (none-special)" << name << " " << functionDefChildren.size() << std::endl;
|
||||
std::string nameDecoration;
|
||||
for (int i = 0; i < (functionDefChildren.size() > 0 ? functionDefChildren.size()-1 : 0); i++)
|
||||
nameDecoration += "_" + ValueTypeToCTypeDecoration(functionDefChildren[i]->getData().valueType);
|
||||
//Check to see if we're inside of an object and this is a method call
|
||||
bool isSelfObjectMethod = enclosingObject && contains(enclosingObject->getChildren(), children[0]);
|
||||
if (isSelfObjectMethod)
|
||||
output += enclosingObject->getDataRef()->symbol.getName() +"__";
|
||||
/*HERE*/ output += CifyFunctionName(name) + nameDecoration + "(";
|
||||
if (isSelfObjectMethod)
|
||||
output += children.size() > 1 ? "self," : "self";
|
||||
}
|
||||
} else {
|
||||
//This part handles cases where our definition isn't the function definition (that is, it is probabally the return from another function)
|
||||
//It's probabally the result of an access function call (. or ->) to access an object method.
|
||||
std::string functionCallSource = generate(children[0], enclosingObject);
|
||||
if (functionCallSource[functionCallSource.size()-1] == ',') //If it's a member method, it's already started the parameter list.
|
||||
output += children.size() > 1 ? functionCallSource : functionCallSource.substr(0, functionCallSource.size()-1);
|
||||
else
|
||||
output += functionCallSource + "(";
|
||||
}
|
||||
output += data.symbol.getName() + "(";
|
||||
for (int i = 0; i < children.size(); i++)
|
||||
for (int i = 1; i < children.size(); i++) //children[0] is the declaration
|
||||
if (i < children.size()-1)
|
||||
output += generate(children[i]) + ", ";
|
||||
else output += generate(children[i]);
|
||||
output += generate(children[i], enclosingObject) + ", ";
|
||||
else
|
||||
output += generate(children[i], enclosingObject);
|
||||
output += ") ";
|
||||
return output;
|
||||
}
|
||||
@@ -94,32 +268,141 @@ std::string CGenerator::generate(NodeTree<ASTData>* from) {
|
||||
std::cout << "Nothing!" << std::endl;
|
||||
}
|
||||
for (int i = 0; i < children.size(); i++)
|
||||
output += generate(children[i]);
|
||||
output += generate(children[i], enclosingObject);
|
||||
|
||||
return output;
|
||||
}
|
||||
|
||||
std::string CGenerator::ValueTypeToCType(ValueType type) {
|
||||
switch (type) {
|
||||
std::string CGenerator::generateObjectMethod(NodeTree<ASTData>* enclosingObject, NodeTree<ASTData>* from) {
|
||||
std::string output;
|
||||
ASTData data = from->getData();
|
||||
Type enclosingObjectType = *(enclosingObject->getDataRef()->valueType); //Copy a new type so we can turn it into a pointer if we need to
|
||||
enclosingObjectType.indirection++;
|
||||
std::vector<NodeTree<ASTData>*> children = from->getChildren();
|
||||
std::string nameDecoration, parameters;
|
||||
for (int i = 0; i < children.size()-1; i++) {
|
||||
parameters += ", " + ValueTypeToCType(children[i]->getData().valueType) + " " + generate(children[i]);
|
||||
nameDecoration += "_" + ValueTypeToCTypeDecoration(children[i]->getData().valueType);
|
||||
}
|
||||
output += "\n" + ValueTypeToCType(data.valueType) + " " + enclosingObject->getDataRef()->symbol.getName() +"__"
|
||||
+ CifyFunctionName(data.symbol.getName()) + nameDecoration + "(" + ValueTypeToCType(&enclosingObjectType)
|
||||
+ " self" + parameters + ")\n" + generate(children[children.size()-1], enclosingObject); //Pass in the object so we can properly handle access to member stuff
|
||||
return output;
|
||||
}
|
||||
|
||||
std::string CGenerator::ValueTypeToCType(Type *type) {
|
||||
std::string return_type;
|
||||
switch (type->baseType) {
|
||||
case none:
|
||||
return "none";
|
||||
if (type->typeDefinition)
|
||||
return_type = type->typeDefinition->getDataRef()->symbol.getName();
|
||||
else
|
||||
return_type = "none";
|
||||
break;
|
||||
case void_type:
|
||||
return_type = "void";
|
||||
break;
|
||||
case boolean:
|
||||
return "bool";
|
||||
return_type = "bool";
|
||||
break;
|
||||
case integer:
|
||||
return "int";
|
||||
return_type = "int";
|
||||
break;
|
||||
case floating:
|
||||
return "float";
|
||||
return_type = "float";
|
||||
break;
|
||||
case double_percision:
|
||||
return "double";
|
||||
return_type = "double";
|
||||
break;
|
||||
case char_string:
|
||||
return "char*";
|
||||
case character:
|
||||
return_type = "char";
|
||||
break;
|
||||
default:
|
||||
return "unknown_ValueType";
|
||||
return_type = "unknown_ValueType";
|
||||
break;
|
||||
}
|
||||
for (int i = 0; i < type->indirection; i++)
|
||||
return_type += "*";
|
||||
return return_type;
|
||||
}
|
||||
|
||||
std::string CGenerator::ValueTypeToCTypeDecoration(Type *type) {
|
||||
std::string return_type;
|
||||
switch (type->baseType) {
|
||||
case none:
|
||||
if (type->typeDefinition)
|
||||
return_type = type->typeDefinition->getDataRef()->symbol.getName();
|
||||
else
|
||||
return_type = "none";
|
||||
break;
|
||||
case void_type:
|
||||
return_type = "void";
|
||||
break;
|
||||
case boolean:
|
||||
return_type = "bool";
|
||||
break;
|
||||
case integer:
|
||||
return_type = "int";
|
||||
break;
|
||||
case floating:
|
||||
return_type = "float";
|
||||
break;
|
||||
case double_percision:
|
||||
return_type = "double";
|
||||
break;
|
||||
case character:
|
||||
return_type = "char";
|
||||
break;
|
||||
default:
|
||||
return_type = "unknown_ValueType";
|
||||
break;
|
||||
}
|
||||
for (int i = 0; i < type->indirection; i++)
|
||||
return_type += "_P__";
|
||||
return return_type;
|
||||
}
|
||||
|
||||
std::string CGenerator::CifyFunctionName(std::string name) {
|
||||
std::string operatorsToReplace[] = { "+", "plus",
|
||||
"-", "minus",
|
||||
"*", "star",
|
||||
"/", "div",
|
||||
"%", "mod",
|
||||
"^", "carat",
|
||||
"&", "amprsd",
|
||||
"|", "pipe",
|
||||
"~", "tilde",
|
||||
"!", "exclamationpt",
|
||||
",", "comma",
|
||||
"=", "equals",
|
||||
"++", "doubleplus",
|
||||
"--", "doubleminus",
|
||||
"<<", "doubleleft",
|
||||
">>", "doubleright",
|
||||
"==", "doubleequals",
|
||||
"!=", "notequals",
|
||||
"&&", "doubleamprsnd",
|
||||
"||", "doublepipe",
|
||||
"+=", "plusequals",
|
||||
"-=", "minusequals",
|
||||
"/=", "divequals",
|
||||
"%=", "modequals",
|
||||
"^=", "caratequals",
|
||||
"&=", "amprsdequals",
|
||||
"|=", "pipeequals",
|
||||
"*=", "starequals",
|
||||
"<<=", "doublerightequals",
|
||||
">>=", "doubleleftequals",
|
||||
"->", "arrow" };
|
||||
int length = sizeof(operatorsToReplace)/sizeof(std::string);
|
||||
//std::cout << "Length is " << length << std::endl;
|
||||
for (int i = 0; i < length; i+= 2) {
|
||||
size_t foundPos = name.find(operatorsToReplace[i]);
|
||||
while(foundPos != std::string::npos) {
|
||||
name = strSlice(name, 0, foundPos) + "_" + operatorsToReplace[i+1] + "_" + strSlice(name, foundPos+operatorsToReplace[i].length(), -1);
|
||||
foundPos = name.find(operatorsToReplace[i]);
|
||||
}
|
||||
}
|
||||
return name;
|
||||
}
|
||||
|
||||
|
||||
@@ -128,3 +128,8 @@ std::string GraphStructuredStack::toString() {
|
||||
}
|
||||
return tostring;
|
||||
}
|
||||
|
||||
void GraphStructuredStack::clear() {
|
||||
gss.clear();
|
||||
edges.clear();
|
||||
}
|
||||
|
||||
126
src/Importer.cpp
Normal file
126
src/Importer.cpp
Normal file
@@ -0,0 +1,126 @@
|
||||
#include "Importer.h"
|
||||
|
||||
Importer::Importer(Parser* parserIn) {
|
||||
//constructor
|
||||
parser = parserIn;
|
||||
|
||||
removeSymbols.push_back(Symbol("WS", false));
|
||||
removeSymbols.push_back(Symbol("\\(", true));
|
||||
removeSymbols.push_back(Symbol("\\)", true));
|
||||
removeSymbols.push_back(Symbol("::", true));
|
||||
removeSymbols.push_back(Symbol(";", true));
|
||||
removeSymbols.push_back(Symbol("{", true));
|
||||
removeSymbols.push_back(Symbol("}", true));
|
||||
removeSymbols.push_back(Symbol("(", true));
|
||||
removeSymbols.push_back(Symbol(")", true));
|
||||
removeSymbols.push_back(Symbol("import", true)); //Don't need the actual text of the symbol
|
||||
removeSymbols.push_back(Symbol("interpreter_directive", false));
|
||||
removeSymbols.push_back(Symbol("if", true));
|
||||
removeSymbols.push_back(Symbol("while", true));
|
||||
removeSymbols.push_back(Symbol("__if_comp__", true));
|
||||
removeSymbols.push_back(Symbol("comp_simple_passthrough", true));
|
||||
removeSymbols.push_back(Symbol("typedef", true));
|
||||
|
||||
collapseSymbols.push_back(Symbol("opt_typed_parameter_list", false));
|
||||
collapseSymbols.push_back(Symbol("opt_parameter_list", false));
|
||||
collapseSymbols.push_back(Symbol("opt_import_list", false));
|
||||
collapseSymbols.push_back(Symbol("import_list", false));
|
||||
collapseSymbols.push_back(Symbol("statement_list", false));
|
||||
collapseSymbols.push_back(Symbol("parameter_list", false));
|
||||
collapseSymbols.push_back(Symbol("typed_parameter_list", false));
|
||||
collapseSymbols.push_back(Symbol("unorderd_list_part", false));
|
||||
collapseSymbols.push_back(Symbol("if_comp_pred", false));
|
||||
collapseSymbols.push_back(Symbol("declaration_block", false));
|
||||
}
|
||||
|
||||
Importer::~Importer() {
|
||||
//destructor
|
||||
}
|
||||
|
||||
NodeTree<ASTData>* Importer::import(std::string fileName) {
|
||||
//Check to see if we've already done it
|
||||
if (imported.find(fileName) != imported.end())
|
||||
return imported[fileName];
|
||||
|
||||
std::ifstream programInFile;
|
||||
std::ofstream outFile, outFileTransformed, outFileAST;
|
||||
|
||||
std::string outputName = fileName + "out";
|
||||
|
||||
programInFile.open(fileName);
|
||||
if (!programInFile.is_open()) {
|
||||
std::cout << "Problem opening programInFile " << fileName << "\n";
|
||||
return NULL;
|
||||
}
|
||||
|
||||
outFile.open(outputName);
|
||||
if (!outFile.is_open()) {
|
||||
std::cout << "Probelm opening output file " << outputName << "\n";
|
||||
return NULL;
|
||||
}
|
||||
|
||||
outFileTransformed.open((outputName + ".transformed.dot").c_str());
|
||||
if (!outFileTransformed.is_open()) {
|
||||
std::cout << "Probelm opening second output file " << outputName + ".transformed.dot" << "\n";
|
||||
return NULL;
|
||||
}
|
||||
|
||||
outFileAST.open((outputName + ".AST.dot").c_str());
|
||||
if (!outFileAST.is_open()) {
|
||||
std::cout << "Probelm opening second output file " << outputName + ".AST.dot" << "\n";
|
||||
return NULL;
|
||||
}
|
||||
|
||||
std::string programInputFileString, line;
|
||||
while(programInFile.good()) {
|
||||
getline(programInFile, line);
|
||||
programInputFileString.append(line+"\n");
|
||||
}
|
||||
programInFile.close();
|
||||
|
||||
//std::cout << programInputFileString << std::endl;
|
||||
NodeTree<Symbol>* parseTree = parser->parseInput(programInputFileString);
|
||||
|
||||
if (parseTree) {
|
||||
//std::cout << parseTree->DOTGraphString() << std::endl;
|
||||
outFile << parseTree->DOTGraphString() << std::endl;
|
||||
} else {
|
||||
std::cout << "ParseTree returned from parser is NULL!" << std::endl;
|
||||
}
|
||||
outFile.close();
|
||||
|
||||
//Remove Transformations
|
||||
|
||||
for (int i = 0; i < removeSymbols.size(); i++)
|
||||
parseTree = RemovalTransformation<Symbol>(removeSymbols[i]).transform(parseTree);
|
||||
|
||||
//Collapse Transformations
|
||||
|
||||
for (int i = 0; i < collapseSymbols.size(); i++)
|
||||
parseTree = CollapseTransformation<Symbol>(collapseSymbols[i]).transform(parseTree);
|
||||
|
||||
if (parseTree) {
|
||||
outFileTransformed << parseTree->DOTGraphString() << std::endl;
|
||||
} else {
|
||||
std::cout << "Tree returned from transformation is NULL!" << std::endl;
|
||||
}
|
||||
outFileTransformed.close();
|
||||
|
||||
//Call with ourself to allow the transformation to call us to import files that it needs
|
||||
NodeTree<ASTData>* AST = ASTTransformation(this).transform(parseTree);
|
||||
|
||||
if (AST) {
|
||||
outFileAST << AST->DOTGraphString() << std::endl;
|
||||
} else {
|
||||
std::cout << "Tree returned from ASTTransformation is NULL!" << std::endl;
|
||||
}
|
||||
outFileAST.close();
|
||||
|
||||
imported[fileName] = AST;
|
||||
|
||||
return AST;
|
||||
}
|
||||
|
||||
std::map<std::string, NodeTree<ASTData>*> Importer::getASTMap() {
|
||||
return imported;
|
||||
}
|
||||
@@ -114,3 +114,7 @@ void Lexer::test() {
|
||||
|
||||
std::cout << "Lexer tests passed\n";
|
||||
}
|
||||
|
||||
void Lexer::reset() {
|
||||
currentPosition = 0;
|
||||
}
|
||||
|
||||
@@ -29,7 +29,12 @@ const bool ParseRule::operator!=(const ParseRule &other) {
|
||||
}
|
||||
|
||||
ParseRule* ParseRule::clone() {
|
||||
return( new ParseRule(leftHandle, pointerIndex, rightSide, lookahead) );
|
||||
std::vector<Symbol>* newLookahead = NULL;
|
||||
if (lookahead) {
|
||||
newLookahead = new std::vector<Symbol>();
|
||||
*newLookahead = *lookahead;
|
||||
}
|
||||
return( new ParseRule(leftHandle, pointerIndex, rightSide, newLookahead) );
|
||||
}
|
||||
|
||||
void ParseRule::setLeftHandle(Symbol leftHandle) {
|
||||
|
||||
@@ -7,6 +7,16 @@ Parser::Parser() : EOFSymbol("$EOF$", true), nullSymbol("$NULL$", true), invalid
|
||||
Parser::~Parser() {
|
||||
}
|
||||
|
||||
void Parser::exportTable(std::ofstream &file) {
|
||||
//Do table
|
||||
table.exportTable(file);
|
||||
}
|
||||
void Parser::importTable(char* tableData) {
|
||||
//Do table
|
||||
table.importTable(tableData);
|
||||
return;
|
||||
}
|
||||
|
||||
Symbol Parser::getOrAddSymbol(std::string symbolString, bool isTerminal) {
|
||||
Symbol symbol;
|
||||
std::pair<std::string, bool> entry = std::make_pair(symbolString, isTerminal);
|
||||
@@ -68,7 +78,7 @@ void Parser::loadGrammer(std::string grammerInputString) {
|
||||
//Get next token
|
||||
currToken = reader.word();
|
||||
}
|
||||
std::cout << "Parsed!\n";
|
||||
//std::cout << "Parsed!\n";
|
||||
|
||||
// for (std::vector<ParseRule*>::size_type i = 0; i < loadedGrammer.size(); i++)
|
||||
// std::cout << loadedGrammer[i]->toString() << std::endl;
|
||||
@@ -88,7 +98,7 @@ void Parser::createStateSet() {
|
||||
std::queue<State*>* toDo = new std::queue<State*>();
|
||||
toDo->push(zeroState);
|
||||
//std::cout << "Begining for main set for loop" << std::endl;
|
||||
while (toDo->front()) {
|
||||
while (toDo->size()) {
|
||||
//closure
|
||||
closure(toDo->front());
|
||||
//Add the new states
|
||||
@@ -181,7 +191,7 @@ std::vector<Symbol>* Parser::incrementiveFollowSet(ParseRule* rule) {
|
||||
}
|
||||
}
|
||||
followSet->insert(followSet->end(), symbolFirstSet->begin(), symbolFirstSet->end());
|
||||
//delete symbolFirstSet;
|
||||
delete symbolFirstSet;
|
||||
rule->advancePointer();
|
||||
}
|
||||
if (rule->isAtEnd()) {
|
||||
@@ -209,10 +219,13 @@ void Parser::closure(State* state) {
|
||||
std::vector<ParseRule*>* stateTotal = state->getTotal();
|
||||
for (std::vector<ParseRule*>::size_type i = 0; i < stateTotal->size(); i++) {
|
||||
ParseRule* currentStateRule = (*stateTotal)[i];
|
||||
//If it's at it's end, move on. We can't advance it.
|
||||
if(currentStateRule->isAtEnd())
|
||||
continue;
|
||||
for (std::vector<ParseRule*>::size_type j = 0; j < loadedGrammer.size(); j++) {
|
||||
//If the current symbol in the rule is not null (rule completed) and it equals a grammer's left side
|
||||
ParseRule* currentGramRule = loadedGrammer[j]->clone();
|
||||
if ( !currentStateRule->isAtEnd() && currentStateRule->getAtNextIndex() == currentGramRule->getLeftSide()) {
|
||||
if (currentStateRule->getAtNextIndex() == currentGramRule->getLeftSide()) {
|
||||
//std::cout << (*stateTotal)[i]->getAtNextIndex()->toString() << " has an applicable production " << loadedGrammer[j]->toString() << std::endl;
|
||||
//Now, add the correct lookahead. This followSet is built based on the current rule's lookahead if at end, or the next Symbol's first set.
|
||||
//std::cout << "Setting lookahead for " << currentGramRule->toString() << " in state " << state->toString() << std::endl;
|
||||
@@ -225,6 +238,7 @@ void Parser::closure(State* state) {
|
||||
//std::cout << (*stateTotal)[k]->toString() << std::endl;
|
||||
(*stateTotal)[k]->addLookahead(currentGramRule->getLookahead());
|
||||
isAlreadyInState = true;
|
||||
delete currentGramRule;
|
||||
break;
|
||||
}
|
||||
}
|
||||
@@ -311,7 +325,7 @@ void Parser::addStates(std::vector< State* >* stateSets, State* state, std::queu
|
||||
std::string Parser::stateSetToString() {
|
||||
std::string concat = "";
|
||||
for (std::vector< State *>::size_type i = 0; i < stateSets.size(); i++) {
|
||||
concat += stateSets[i]->toString();
|
||||
concat += intToString(i) + " is " + stateSets[i]->toString();
|
||||
}
|
||||
return concat;
|
||||
}
|
||||
|
||||
@@ -9,6 +9,13 @@ RNGLRParser::~RNGLRParser() {
|
||||
}
|
||||
|
||||
NodeTree<Symbol>* RNGLRParser::parseInput(std::string inputString) {
|
||||
input.clear();
|
||||
gss.clear();
|
||||
while(!toReduce.empty()) toReduce.pop();
|
||||
while(!toShift.empty()) toReduce.pop();
|
||||
SPPFStepNodes.clear();
|
||||
nullableParts.clear();
|
||||
packedMap.clear();
|
||||
|
||||
//Check for no tokens
|
||||
bool accepting = false;
|
||||
@@ -27,6 +34,7 @@ NodeTree<Symbol>* RNGLRParser::parseInput(std::string inputString) {
|
||||
return new NodeTree<Symbol>();
|
||||
}
|
||||
|
||||
lexer.reset();
|
||||
lexer.setInput(inputString);
|
||||
//Now fully lex our input because this algorithm was designed in that manner and simplifies this first implementation.
|
||||
//It could be converted to on-line later.
|
||||
@@ -42,7 +50,8 @@ NodeTree<Symbol>* RNGLRParser::parseInput(std::string inputString) {
|
||||
input.push_back(currentToken);
|
||||
}
|
||||
|
||||
std::cout << "\nDone with Lexing\n" << std::endl;
|
||||
// std::cout << "\nDone with Lexing, length:" << input.size() << std::endl;
|
||||
// std::cout << input[0].toString() << std::endl;
|
||||
|
||||
|
||||
// for (int i = 0; i < input.size(); i++)
|
||||
@@ -50,13 +59,13 @@ NodeTree<Symbol>* RNGLRParser::parseInput(std::string inputString) {
|
||||
// std::cout << std::endl;
|
||||
|
||||
|
||||
std::cout << "Setting up 0th frontier, first actions, toShift, toReduce" << std::endl;
|
||||
//std::cout << "Setting up 0th frontier, first actions, toShift, toReduce" << std::endl;
|
||||
|
||||
//Frontier 0, new node with state 0
|
||||
NodeTree<int>* v0 = gss.newNode(0);
|
||||
gss.addToFrontier(0,v0);
|
||||
|
||||
std::cout << "Done setting up new frontier" << std::endl;
|
||||
//std::cout << "Done setting up new frontier" << std::endl;
|
||||
|
||||
std::vector<ParseAction*> firstActions = *(table.get(0, input[0]));
|
||||
for (std::vector<ParseAction*>::size_type i = 0; i < firstActions.size(); i++) {
|
||||
@@ -71,17 +80,21 @@ NodeTree<Symbol>* RNGLRParser::parseInput(std::string inputString) {
|
||||
|
||||
// std::cout << "GSS:\n" << gss.toString() << std::endl;
|
||||
|
||||
std::cout << "Starting parse loop" << std::endl;
|
||||
//std::cout << "Starting parse loop" << std::endl;
|
||||
|
||||
for (int i = 0; i < input.size(); i++) {
|
||||
// std::cout << "Checking if frontier " << i << " is empty" << std::endl;
|
||||
if (gss.frontierIsEmpty(i)) {
|
||||
std::cout << "Frontier " << i << " is empty." << std::endl;
|
||||
std::cout << "Failed on " << input[i].toString() << std::endl;
|
||||
//std::cout << "Frontier " << i << " is empty." << std::endl;
|
||||
std::cout << "Parsing failed on " << input[i].toString() << std::endl;
|
||||
std::cout << "Problem is on line: " << findLine(i) << std::endl;
|
||||
std::cout << "Nearby is:" << std::endl;
|
||||
int range = 5;
|
||||
const int range = 10;
|
||||
for (int j = (i-range >= 0 ? i-range : 0); j < (i+range < input.size() ? i+range : input.size()); j++)
|
||||
std::cout << input[j].toString() << " ";
|
||||
if (j == i)
|
||||
std::cout << "||*||*||" << input[j].toString() << "||*||*|| ";
|
||||
else
|
||||
std::cout << input[j].toString() << " ";
|
||||
std::cout << std::endl;
|
||||
break;
|
||||
}
|
||||
@@ -98,7 +111,7 @@ NodeTree<Symbol>* RNGLRParser::parseInput(std::string inputString) {
|
||||
shifter(i);
|
||||
//std::cout << "GSS:\n" << gss.toString() << std::endl;
|
||||
}
|
||||
std::cout << "Done with parsing loop, checking for acceptance" << std::endl;
|
||||
//std::cout << "Done with parsing loop, checking for acceptance" << std::endl;
|
||||
NodeTree<int>* accState = gss.frontierGetAccState(input.size()-1);
|
||||
if (accState) {
|
||||
std::cout << "Accepted!" << std::endl;
|
||||
@@ -106,7 +119,7 @@ NodeTree<Symbol>* RNGLRParser::parseInput(std::string inputString) {
|
||||
}
|
||||
|
||||
std::cout << "Rejected!" << std::endl;
|
||||
std::cout << "GSS:\n" << gss.toString() << std::endl;
|
||||
// std::cout << "GSS:\n" << gss.toString() << std::endl;
|
||||
return NULL;
|
||||
}
|
||||
|
||||
@@ -131,7 +144,7 @@ void RNGLRParser::reducer(int i) {
|
||||
//The end of the current path
|
||||
NodeTree<int>* currentReached = currentPath[currentPath.size()-1];
|
||||
|
||||
std::cout << "Getting the shfit state for state " << currentReached->getData() << " and symbol " << reduction.symbol.toString() << std::endl;
|
||||
//std::cout << "Getting the shift state for state " << currentReached->getData() << " and symbol " << reduction.symbol.toString() << std::endl;
|
||||
int toState = table.getShift(currentReached->getData(), reduction.symbol)->shiftState;
|
||||
|
||||
//If reduction length is 0, then we make the new label the appropriate nullable parts
|
||||
@@ -177,7 +190,7 @@ void RNGLRParser::reducer(int i) {
|
||||
//std::cout << "Adding shifts and reductions for a state that did not exist" << std::endl;
|
||||
std::vector<ParseAction*> actions = *(table.get(toState, input[i]));
|
||||
for (std::vector<ParseAction*>::size_type k = 0; k < actions.size(); k++) {
|
||||
std::cout << "Action is " << actions[k]->toString() << std::endl;
|
||||
//std::cout << "Action is " << actions[k]->toString() << std::endl;
|
||||
if (actions[k]->action == ParseAction::SHIFT) {
|
||||
toShift.push(std::make_pair(toStateNode, actions[k]->shiftState));
|
||||
} else if (actions[k]->action == ParseAction::REDUCE && fullyReducesToNull(actions[k]->reduceRule)) {
|
||||
@@ -201,7 +214,7 @@ void RNGLRParser::shifter(int i) {
|
||||
while (!toShift.empty()) {
|
||||
std::pair<NodeTree<int>*, int> shift = toShift.front();
|
||||
toShift.pop();
|
||||
std::cout << "Current potential shift from " << shift.first->getData() << " to " << shift.second << std::endl;
|
||||
//std::cout << "Current potential shift from " << shift.first->getData() << " to " << shift.second << std::endl;
|
||||
NodeTree<int>* shiftTo = gss.inFrontier(i+1, shift.second);
|
||||
if (shiftTo) {
|
||||
//std::cout << "State already existed, just adding edge" << std::endl;
|
||||
@@ -220,7 +233,7 @@ void RNGLRParser::shifter(int i) {
|
||||
gss.addEdge(shiftTo, shift.first, newLabel);
|
||||
std::vector<ParseAction*> actions = *(table.get(shift.second, input[i+1]));
|
||||
for (std::vector<ParseAction*>::size_type j = 0; j < actions.size(); j++) {
|
||||
std::cout << "Adding action " << actions[j]->toString() << " to either nextShifts or toReduce" << std::endl;
|
||||
//std::cout << "Adding action " << actions[j]->toString() << " to either nextShifts or toReduce" << std::endl;
|
||||
//Shift
|
||||
if (actions[j]->action == ParseAction::SHIFT) {
|
||||
nextShifts.push(std::make_pair(shiftTo, actions[j]->shiftState));
|
||||
@@ -339,11 +352,13 @@ void RNGLRParser::addStates(std::vector< State* >* stateSets, State* state, std:
|
||||
//if (newStates[i]->basisEquals(*((*stateSets)[j]))) {
|
||||
stateAlreadyInAllStates = true;
|
||||
//If it does exist, we should add it as the shift/goto in the action table
|
||||
//std::cout << "newStates[" << i << "] == stateSets[" << j << "]" << std::endl;
|
||||
|
||||
if (!((*stateSets)[j]->basisEquals(*(newStates[i]))))
|
||||
toDo->push((*stateSets)[j]);
|
||||
|
||||
(*stateSets)[j]->combineStates(*(newStates[i]));
|
||||
//std::cout << j << "\t Hay, doing an inside loop state reductions!" << std::endl;
|
||||
addStateReductionsToTable((*stateSets)[j]);
|
||||
table.add(stateNum(state), currStateSymbol, new ParseAction(ParseAction::SHIFT, j));
|
||||
|
||||
@@ -363,13 +378,15 @@ void RNGLRParser::addStates(std::vector< State* >* stateSets, State* state, std:
|
||||
|
||||
void RNGLRParser::addStateReductionsToTable(State* state) {
|
||||
std::vector<ParseRule*>* currStateTotal = state->getTotal();
|
||||
//std::cout << currStateTotal->size() << "::" << state->getNumber() << std::endl;
|
||||
for (std::vector<ParseRule*>::size_type i = 0; i < currStateTotal->size(); i++) {
|
||||
//See if reduce
|
||||
//Also, this really only needs to be done for the state's basis, but we're already iterating through, so...
|
||||
std::vector<Symbol>* lookahead = (*currStateTotal)[i]->getLookahead();
|
||||
if ((*currStateTotal)[i]->isAtEnd()) {
|
||||
for (std::vector<Symbol>::size_type j = 0; j < lookahead->size(); j++)
|
||||
for (std::vector<Symbol>::size_type j = 0; j < lookahead->size(); j++) {
|
||||
table.add(stateNum(state), (*lookahead)[j], new ParseAction(ParseAction::REDUCE, (*currStateTotal)[i]));
|
||||
}
|
||||
//If this has an appropriate ruduction to null, get the reduce trees out
|
||||
} else if (reducesToNull((*currStateTotal)[i])) {
|
||||
//std::cout << (*currStateTotal)[i]->toString() << " REDUCES TO NULL" << std::endl;
|
||||
@@ -476,3 +493,14 @@ std::vector<NodeTree<Symbol>*> RNGLRParser::getPathEdges(std::vector<NodeTree<in
|
||||
pathEdges.push_back(gss.getEdge(path[i], path[i+1]));
|
||||
return pathEdges;
|
||||
}
|
||||
|
||||
int RNGLRParser::findLine(int tokenNum) {
|
||||
int lineNo = 0;
|
||||
for (int i = 0; i < tokenNum; i++) {
|
||||
std::string tokenString = input[i].getValue();
|
||||
for (int j = 0; j < tokenString.size(); j++)
|
||||
if (tokenString[j] == '\n')
|
||||
lineNo++;
|
||||
}
|
||||
return lineNo;
|
||||
}
|
||||
|
||||
236
src/RegEx.cpp
236
src/RegEx.cpp
@@ -3,46 +3,55 @@
|
||||
|
||||
RegEx::RegEx(std::string inPattern) {
|
||||
pattern = inPattern;
|
||||
construct();
|
||||
deperenthesize();
|
||||
std::vector<RegExState*> ending;
|
||||
begin = construct(&ending, inPattern);
|
||||
//last one is goal state, add it to the end of all of these last states
|
||||
for (std::vector<RegExState*>::size_type i = 0; i < ending.size(); i++)
|
||||
ending[i]->addNext(NULL);
|
||||
}
|
||||
|
||||
void RegEx::construct() {
|
||||
std::vector<RegExState*> previousStates;
|
||||
std::vector<RegExState*> currentStates;
|
||||
std::stack<std::pair<std::vector<RegExState*>, std::vector<RegExState*> > > perenStack;
|
||||
RegExState* RegEx::construct(std::vector<RegExState*>* ending, std::string pattern) {
|
||||
//In the RegEx re-write, instead of doing complicated unperenthesising, we keep track of both the "front" and the "end" of a state.
|
||||
//(these could be different if the state is perenthesezed)
|
||||
std::vector<RegExState*> previousStatesBegin;
|
||||
std::vector<RegExState*> previousStatesEnd;
|
||||
std::vector<RegExState*> currentStatesBegin;
|
||||
std::vector<RegExState*> currentStatesEnd;
|
||||
|
||||
bool alternating = false;
|
||||
begin = new RegExState();
|
||||
currentStates.push_back(begin);
|
||||
RegExState* begin = new RegExState();
|
||||
currentStatesBegin.push_back(begin);
|
||||
currentStatesEnd.push_back(begin);
|
||||
|
||||
for (int i = 0; i < pattern.length(); i++) {
|
||||
switch (pattern[i]) {
|
||||
case '*':
|
||||
{
|
||||
//std::cout << "Star at " << i << " in " << pattern << std::endl;
|
||||
// for (std::vector<RegExState*>::size_type j = 0; j < currentStates.size(); j++)
|
||||
// for (std::vector<RegExState*>::size_type k = 0; k < currentStates.size(); k++)
|
||||
// currentStates[j]->addNext(currentStates[k]);
|
||||
currentStates[currentStates.size()-1]->addNext(currentStates[currentStates.size()-1]);
|
||||
//NOTE: Because of the re-write, this is necessary again
|
||||
for (std::vector<RegExState*>::size_type j = 0; j < currentStatesEnd.size(); j++)
|
||||
for (std::vector<RegExState*>::size_type k = 0; k < currentStatesBegin.size(); k++)
|
||||
currentStatesEnd[j]->addNext(currentStatesBegin[k]); //Make the ends point to the beginnings
|
||||
//add all previous states to current states to enable skipping over the starred item
|
||||
currentStates.insert(currentStates.end(), previousStates.begin(), previousStates.end());
|
||||
currentStatesBegin.insert(currentStatesBegin.end(), previousStatesBegin.begin(), previousStatesBegin.end());
|
||||
currentStatesEnd.insert(currentStatesEnd.end(), previousStatesEnd.begin(), previousStatesEnd.end());
|
||||
}
|
||||
break;
|
||||
case '+':
|
||||
{
|
||||
//std::cout << "Plus at " << i << " in " << pattern << std::endl;
|
||||
//OtherThingy
|
||||
//current->addNext(current);
|
||||
// for (std::vector<RegExState*>::size_type j = 0; j < currentStates.size(); j++)
|
||||
// for (std::vector<RegExState*>::size_type k = 0; k < currentStates.size(); k++)
|
||||
// currentStates[j]->addNext(currentStates[k]);
|
||||
currentStates[currentStates.size()-1]->addNext(currentStates[currentStates.size()-1]);
|
||||
//NOTE: Because of the re-write, this is necessary again
|
||||
for (std::vector<RegExState*>::size_type j = 0; j < currentStatesEnd.size(); j++)
|
||||
for (std::vector<RegExState*>::size_type k = 0; k < currentStatesBegin.size(); k++)
|
||||
currentStatesEnd[j]->addNext(currentStatesBegin[k]); //Make the ends point to the beginnings
|
||||
}
|
||||
break;
|
||||
case '?':
|
||||
{
|
||||
//std::cout << "Question at " << i << " in " << pattern << std::endl;
|
||||
//add all previous states to current states to enable skipping over the questioned item
|
||||
currentStates.insert(currentStates.end(), previousStates.begin(), previousStates.end());
|
||||
currentStatesBegin.insert(currentStatesBegin.end(), previousStatesBegin.begin(), previousStatesBegin.end());
|
||||
currentStatesEnd.insert(currentStatesEnd.end(), previousStatesEnd.begin(), previousStatesEnd.end());
|
||||
}
|
||||
break;
|
||||
case '|':
|
||||
@@ -57,59 +66,31 @@ void RegEx::construct() {
|
||||
{
|
||||
//std::cout << "Begin peren at " << i << " in " << pattern << std::endl;
|
||||
//perentheses
|
||||
//Create a peren node with an inner empty node
|
||||
RegExState* next = new RegExState(new RegExState());
|
||||
|
||||
std::vector<RegExState*> innerEnds;
|
||||
int perenEnd = findPerenEnd(pattern, i);
|
||||
RegExState* innerBegin = construct(&innerEnds, strSlice(pattern, i+1, perenEnd));
|
||||
i = perenEnd;
|
||||
std::vector<RegExState*> innerBegins = *(innerBegin->getNextStates());
|
||||
if (alternating) {
|
||||
for (std::vector<RegExState*>::size_type j = 0; j < previousStates.size(); j++)
|
||||
previousStates[j]->addNext(next);
|
||||
|
||||
//Save both current states here as well as the current preren
|
||||
std::vector<RegExState*> savePreviousStates = previousStates;
|
||||
currentStates.push_back(next);
|
||||
std::vector<RegExState*> saveCurrentStates = currentStates;
|
||||
perenStack.push(std::make_pair(savePreviousStates, saveCurrentStates));
|
||||
|
||||
previousStates.clear();
|
||||
currentStates.clear();
|
||||
currentStates.push_back(next->getInner());
|
||||
alternating = false;
|
||||
for (std::vector<RegExState*>::size_type j = 0; j < previousStatesEnd.size(); j++)
|
||||
for (std::vector<RegExState*>::size_type k = 0; k < innerBegins.size(); k++)
|
||||
previousStatesEnd[j]->addNext(innerBegins[k]);
|
||||
currentStatesBegin.insert(currentStatesBegin.end(), innerBegins.begin(), innerBegins.end());
|
||||
currentStatesEnd.insert(currentStatesEnd.end(), innerEnds.begin(), innerEnds.end());
|
||||
} else {
|
||||
for (std::vector<RegExState*>::size_type j = 0; j < currentStates.size(); j++)
|
||||
currentStates[j]->addNext(next);
|
||||
|
||||
//Save both current states here as well as the current preren
|
||||
std::vector<RegExState*> savePreviousStates = currentStates;
|
||||
currentStates.clear();
|
||||
currentStates.push_back(next);
|
||||
std::vector<RegExState*> saveCurrentStates = currentStates;
|
||||
perenStack.push(std::make_pair(savePreviousStates, saveCurrentStates));
|
||||
|
||||
previousStates.clear();
|
||||
currentStates.clear();
|
||||
currentStates.push_back(next->getInner());
|
||||
for (std::vector<RegExState*>::size_type j = 0; j < currentStatesEnd.size(); j++)
|
||||
for (std::vector<RegExState*>::size_type k = 0; k < innerBegins.size(); k++)
|
||||
currentStatesEnd[j]->addNext(innerBegins[k]);
|
||||
previousStatesBegin = currentStatesBegin;
|
||||
previousStatesEnd = currentStatesEnd;
|
||||
currentStatesBegin = innerBegins;
|
||||
currentStatesEnd = innerEnds;
|
||||
}
|
||||
//std::cout << "Peren is " << next << " Inner is " << currentStates[0] << " = " << next->getInner() << std::endl;
|
||||
alternating = false;
|
||||
}
|
||||
break;
|
||||
|
||||
case ')':
|
||||
{
|
||||
//std::cout << "End peren at " << i << " in " << pattern << std::endl;
|
||||
//perentheses
|
||||
//Pop off the states that will now be the previous states and the peren node which will now be the current node
|
||||
std::pair<std::vector<RegExState*>, std::vector<RegExState*> > savedPair = perenStack.top();
|
||||
perenStack.pop();
|
||||
//Make the it so
|
||||
previousStates = savedPair.first;
|
||||
//Make sure the end of the inner stuff points back to the peren node
|
||||
for (std::vector<RegExState*>::size_type j = 0; j < currentStates.size(); j++)
|
||||
currentStates[j]->addNext(savedPair.second[savedPair.second.size()-1]);
|
||||
//currentStates[j]->addNext(*(savedPair.second.end()));
|
||||
currentStates.clear();
|
||||
currentStates = savedPair.second;
|
||||
}
|
||||
break;
|
||||
// ) does not need a case as we skip over it after finding it in ('s case
|
||||
|
||||
case '\\':
|
||||
{
|
||||
@@ -124,109 +105,33 @@ void RegEx::construct() {
|
||||
RegExState* next = new RegExState(pattern[i]);
|
||||
//If we're alternating, add next as the next for each previous state, and add self to currentStates
|
||||
if (alternating) {
|
||||
for (std::vector<RegExState*>::size_type j = 0; j < previousStates.size(); j++) {
|
||||
previousStates[j]->addNext(next);
|
||||
//std::cout << "Adding " << next << ", which is " << pattern[i] << " to " << previousStates[j] << std::endl;
|
||||
}
|
||||
currentStates.push_back(next);
|
||||
for (std::vector<RegExState*>::size_type j = 0; j < previousStatesEnd.size(); j++)
|
||||
previousStatesEnd[j]->addNext(next);
|
||||
currentStatesBegin.push_back(next);
|
||||
currentStatesEnd.push_back(next);
|
||||
alternating = false;
|
||||
} else {
|
||||
//If we're not alternating, add next as next for all the current states, make the current states the new
|
||||
//previous states, and add ourself as the new current state.
|
||||
for (std::vector<RegExState*>::size_type j = 0; j < currentStates.size(); j++) {
|
||||
currentStates[j]->addNext(next);
|
||||
//std::cout << "Adding " << next << ", which is " << pattern[i] << " to " << currentStates[j] << std::endl;
|
||||
}
|
||||
previousStates.clear();
|
||||
previousStates = currentStates;
|
||||
currentStates.clear();
|
||||
currentStates.push_back(next);
|
||||
for (std::vector<RegExState*>::size_type j = 0; j < currentStatesEnd.size(); j++)
|
||||
currentStatesEnd[j]->addNext(next);
|
||||
|
||||
previousStatesBegin.clear();
|
||||
previousStatesEnd.clear();
|
||||
previousStatesBegin = currentStatesBegin;
|
||||
previousStatesEnd = currentStatesEnd;
|
||||
currentStatesBegin.clear();
|
||||
currentStatesEnd.clear();
|
||||
currentStatesBegin.push_back(next);
|
||||
currentStatesEnd.push_back(next);
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
//last one is goal state
|
||||
for (std::vector<RegExState*>::size_type i = 0; i < currentStates.size(); i++)
|
||||
currentStates[i]->addNext(NULL);
|
||||
(*ending) = currentStatesEnd;
|
||||
return(begin);
|
||||
}
|
||||
|
||||
void RegEx::deperenthesize() {
|
||||
//std::cout << "About to de-perenthesize " << begin->toString() << std::endl;
|
||||
|
||||
//Now go through and expand the peren nodes to regular nodes
|
||||
std::vector<RegExState*> processedStates;
|
||||
std::vector<RegExState*> statesToProcess;
|
||||
statesToProcess.push_back(begin);
|
||||
for (std::vector<RegExState*>::size_type i = 0; i < statesToProcess.size(); i++) {
|
||||
//Don't process null (sucess) state
|
||||
if (statesToProcess[i] == NULL)
|
||||
continue;
|
||||
std::vector<RegExState*>* nextStates = statesToProcess[i]->getNextStates();
|
||||
for (std::vector<RegExState*>::size_type j = 0; j < nextStates->size(); j++) {
|
||||
if ((*nextStates)[j] != NULL && (*nextStates)[j]->getInner() != NULL) {
|
||||
//Fix all the next references pointing to the peren node to point to the inner nodes. (if more than one, push back to add others)
|
||||
std::vector<RegExState*>* insideNextStates = (*nextStates)[j]->getInner()->getNextStates();
|
||||
//std::cout << "insideNextStates = " << insideNextStates << " [0] " << (*insideNextStates)[0] << std::endl;
|
||||
RegExState* perenState = (*nextStates)[j];
|
||||
(*nextStates)[j] = (*insideNextStates)[0];
|
||||
//std::cout << "So now nextstates[j] = " << (*nextStates)[j] << std::endl;
|
||||
for (std::vector<RegExState*>::size_type k = 1; k < insideNextStates->size(); k++)
|
||||
nextStates->push_back((*insideNextStates)[k]);
|
||||
//std::cout << "Replaced beginning: " << begin->toString() << std::endl;
|
||||
//Now, if the peren node is self-referential (has a repitition operator after i), fix it's self-references in the same manner
|
||||
std::vector<RegExState*>* perenNextNodes = perenState->getNextStates();
|
||||
for (std::vector<RegExState*>::size_type k = 0; k < perenNextNodes->size(); k++) {
|
||||
if ((*perenNextNodes)[k] == perenState) {
|
||||
(*perenNextNodes)[k] = (*insideNextStates)[0];
|
||||
for (std::vector<RegExState*>::size_type l = 1; l < insideNextStates->size(); l++)
|
||||
perenNextNodes->push_back((*insideNextStates)[l]);
|
||||
}
|
||||
}
|
||||
//std::cout << "Fixed self-references: " << begin->toString() << std::endl;
|
||||
//Need to fix the end too
|
||||
std::vector<RegExState*> traversalList;
|
||||
traversalList.push_back(perenState->getInner());
|
||||
for (std::vector<RegExState*>::size_type k = 0; k < traversalList.size(); k++) {
|
||||
std::vector<RegExState*>* nextTraversalStates = traversalList[k]->getNextStates();
|
||||
//std::cout << "Traversing! nextTraversalStates from traversalList " << traversalList[k] << " char = " << traversalList[k]->getCharacter() << std::endl;
|
||||
//std::cout << "with children:" << std::endl;
|
||||
//for (std::vector<RegExState*>::size_type l = 0; l < nextTraversalStates->size(); l++)
|
||||
// std::cout << "\t\"" << (*nextTraversalStates)[l]->getCharacter() << "\"" << std::endl;
|
||||
//std::cout << std::endl;
|
||||
for (std::vector<RegExState*>::size_type l = 0; l < nextTraversalStates->size(); l++) {
|
||||
//If this node is equal to the peren node we came from, then that means we've reached the end of the inner part of the peren
|
||||
//And we now replace this reference with the next nodes from the peren node
|
||||
//std::cout << "Traversal Next is on " << (*nextTraversalStates)[l]->getCharacter() << std::endl;
|
||||
if ((*nextTraversalStates)[l] == perenState) {
|
||||
// std::cout << "nextTraversalStates[l] = to perenState!" << std::endl;
|
||||
std::vector<RegExState*> endPerenNextStates = *(perenState->getNextStates());
|
||||
(*nextTraversalStates)[l] = endPerenNextStates[0];
|
||||
for (std::vector<RegExState*>::size_type n = 1; n < endPerenNextStates.size(); n++)
|
||||
nextTraversalStates->push_back(endPerenNextStates[n]);
|
||||
//Now make sure we don't now try to continue through and end up processing stuff we just replaced the peren reference with
|
||||
break;
|
||||
} else {
|
||||
traversalList.push_back((*nextTraversalStates)[l]);
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
//Now add all these next states to process, only if they haven't already been processed
|
||||
for (std::vector<RegExState*>::size_type j = 0; j < nextStates->size(); j++) {
|
||||
bool inCurrStates = false;
|
||||
for (std::vector<RegExState*>::size_type k = 0; k < statesToProcess.size(); k++) {
|
||||
if ((*nextStates)[j] == statesToProcess[k])
|
||||
inCurrStates = true;
|
||||
}
|
||||
if (!inCurrStates) {
|
||||
statesToProcess.push_back((*nextStates)[j]);
|
||||
//std::cout << (*nextStates)[j] << "Is not in states to process" << std::endl;
|
||||
}
|
||||
}
|
||||
}
|
||||
//std::cout << "Finished de-perenthesization " << begin->toString() << std::endl;
|
||||
}
|
||||
|
||||
RegEx::~RegEx() {
|
||||
//No cleanup necessary
|
||||
@@ -310,5 +215,16 @@ void RegEx::test() {
|
||||
assert(re.longMatch("ab") == 1);
|
||||
}
|
||||
|
||||
{
|
||||
RegEx re("((ab)|c)*");
|
||||
assert(re.longMatch("ababc") == 5);
|
||||
assert(re.longMatch("ad") == 0);
|
||||
assert(re.longMatch("ababccd") == 6);
|
||||
}
|
||||
{
|
||||
RegEx re("bbb((bba+)|(ba+))*a*((a+b)|(a+bb)|(a+))*bbb") ;
|
||||
assert(re.longMatch("bbbababbbaaaaaaaaaaaaaaaaaaabbb") == 9);
|
||||
}
|
||||
|
||||
std::cout << "RegEx tests pass\n";
|
||||
}
|
||||
|
||||
@@ -79,12 +79,9 @@ void State::combineStates(State &other) {
|
||||
|
||||
std::vector<ParseRule*>* State::getTotal() {
|
||||
total.clear();
|
||||
for (std::vector<ParseRule*>::size_type i = 0; i < basis.size(); i++) {
|
||||
total.push_back(basis[i]);
|
||||
}
|
||||
for (std::vector<ParseRule*>::size_type i = 0; i < remaining.size(); i++) {
|
||||
total.push_back(remaining[i]);
|
||||
}
|
||||
//std::cout << "Vector will be " << basis.size() << " + " << remaining.size() << std::endl;
|
||||
total.insert(total.begin(), basis.begin(), basis.end());
|
||||
total.insert(total.end(), remaining.begin(), remaining.end());
|
||||
return(&total);
|
||||
}
|
||||
std::vector<ParseRule*>* State::getBasis() {
|
||||
@@ -111,6 +108,7 @@ void State::addRuleCombineLookahead(ParseRule* rule) {
|
||||
if (rule->equalsExceptLookahead(*(total[i]))) {
|
||||
total[i]->addLookahead(rule->getLookahead());
|
||||
alreadyIn = true;
|
||||
break;
|
||||
}
|
||||
}
|
||||
if (!alreadyIn)
|
||||
@@ -160,4 +158,8 @@ std::vector<State*>* State::getDeepParents(int depth) {
|
||||
recursiveParents->insert(recursiveParents->end(), recursiveParentsToAdd->begin(), recursiveParentsToAdd->end());
|
||||
}
|
||||
return recursiveParents;
|
||||
}
|
||||
|
||||
int State::getNumber() {
|
||||
return number;
|
||||
}
|
||||
@@ -75,7 +75,7 @@ std::string StringReader::getTokens(const char *stop_chars, bool truncateEnd)
|
||||
{
|
||||
//End of String
|
||||
end_reached = true;
|
||||
std::cout << "Reached end of file!\n";
|
||||
//std::cout << "Reached end of file!\n";
|
||||
return "";
|
||||
} else {
|
||||
|
||||
|
||||
208
src/Table.cpp
208
src/Table.cpp
@@ -8,6 +8,203 @@ Table::~Table() {
|
||||
//
|
||||
}
|
||||
|
||||
void Table::exportTable(std::ofstream &file) {
|
||||
//Save symbolIndexVec
|
||||
int size = symbolIndexVec.size();
|
||||
file.write((char*)&size, sizeof(int));
|
||||
for (int i = 0; i < symbolIndexVec.size(); i++) {
|
||||
//Save the name
|
||||
std::string symbolName = symbolIndexVec[i].getName(); //Get the string
|
||||
size = symbolName.size()+1;
|
||||
file.write((char*)&size, sizeof(int)); //Save size of string
|
||||
file.write((char*)(symbolName.c_str()), size); //Save the string
|
||||
|
||||
//Save the value
|
||||
std::string symbolValue = symbolIndexVec[i].getValue(); //Get the string
|
||||
size = symbolValue.size()+1;
|
||||
file.write((char*)&size, sizeof(int)); //Save size of string
|
||||
file.write((char*)(symbolValue.c_str()), size); //Save the string
|
||||
|
||||
bool isTerminal = symbolIndexVec[i].isTerminal();
|
||||
file.write((char*)&isTerminal, sizeof(bool)); //Save the true false
|
||||
}
|
||||
|
||||
//Save the actual table
|
||||
size = table.size();
|
||||
file.write((char*)&size, sizeof(int));
|
||||
for (int i = 0; i < table.size(); i++) {
|
||||
//each item is a middle vector
|
||||
//std::vector< std::vector< std::vector<ParseAction*>* >* > table;
|
||||
std::vector< std::vector<ParseAction*>* >* middleVector = table[i];
|
||||
int middleVectorSize = middleVector->size();
|
||||
file.write((char*)&middleVectorSize, sizeof(int));
|
||||
|
||||
for (int j = 0; j < middleVectorSize; j++) {
|
||||
//each item is an inner vector
|
||||
std::vector<ParseAction*>* innerVector = (*middleVector)[j];
|
||||
int innerVectorSize = 0;
|
||||
if (innerVector)
|
||||
innerVectorSize = innerVector->size();
|
||||
else
|
||||
innerVectorSize = 0;
|
||||
file.write((char*)&innerVectorSize, sizeof(int));
|
||||
|
||||
for (int k = 0; k < innerVectorSize; k++) {
|
||||
//Save the type
|
||||
ParseAction* toSave = (*innerVector)[k];
|
||||
ParseAction::ActionType actionType = toSave->action;
|
||||
file.write((char*)&actionType, sizeof(ParseAction::ActionType));
|
||||
//Save the reduce rule if necessary
|
||||
if (actionType == ParseAction::REDUCE) {
|
||||
//Save the reduce rule
|
||||
ParseRule* rule = toSave->reduceRule;
|
||||
//int pointer index
|
||||
int ptrIndx = rule->getIndex();
|
||||
file.write((char*)&ptrIndx, sizeof(int));
|
||||
|
||||
//Symbol leftHandle
|
||||
Symbol leftHandle = rule->getLeftSide();
|
||||
//Save the name
|
||||
std::string symbolName = leftHandle.getName(); //Get the string
|
||||
size = symbolName.size()+1;
|
||||
file.write((char*)&size, sizeof(int)); //Save size of string
|
||||
file.write((char*)(symbolName.c_str()), size); //Save the string
|
||||
|
||||
//Save the value
|
||||
std::string symbolValue = leftHandle.getValue(); //Get the string
|
||||
size = symbolValue.size()+1;
|
||||
file.write((char*)&size, sizeof(int)); //Save size of string
|
||||
file.write((char*)(symbolValue.c_str()), size); //Save the string
|
||||
|
||||
bool isTerminal = leftHandle.isTerminal();
|
||||
file.write((char*)&isTerminal, sizeof(bool)); //Save the true false
|
||||
|
||||
//std::vector<Symbol>* lookahead;
|
||||
//Should not need
|
||||
|
||||
//std::vector<Symbol> rightSide;
|
||||
std::vector<Symbol> rightSide = rule->getRightSide();
|
||||
size = rightSide.size();
|
||||
//std::cout << leftHandle.toString() << std::endl;
|
||||
file.write((char*)&size, sizeof(int));
|
||||
for (int l = 0; l < rightSide.size(); l++) {
|
||||
//Save the name
|
||||
symbolName = rightSide[l].getName(); //Get the string
|
||||
size = symbolName.size()+1;
|
||||
file.write((char*)&size, sizeof(int)); //Save size of string
|
||||
file.write((char*)(symbolName.c_str()), size); //Save the string
|
||||
//
|
||||
//Save the value
|
||||
symbolValue = rightSide[l].getValue(); //Get the string
|
||||
size = symbolValue.size()+1;
|
||||
file.write((char*)&size, sizeof(int)); //Save size of string
|
||||
file.write((char*)(symbolValue.c_str()), size); //Save the string
|
||||
//
|
||||
isTerminal = rightSide[l].isTerminal();
|
||||
file.write((char*)&isTerminal, sizeof(bool)); //Save the true false
|
||||
}
|
||||
}
|
||||
int shiftState = toSave->shiftState;
|
||||
file.write((char*)&shiftState, sizeof(int));
|
||||
}
|
||||
}
|
||||
|
||||
}
|
||||
}
|
||||
|
||||
void Table::importTable(char* tableData) {
|
||||
//Load symbolIndexVec
|
||||
|
||||
int size = *((int*)tableData);
|
||||
tableData += sizeof(int);
|
||||
for (int i = 0; i < size; i++) {
|
||||
int stringLen = *((int*)tableData);
|
||||
tableData += sizeof(int);
|
||||
std::string symbolName = std::string(tableData);
|
||||
tableData += stringLen*sizeof(char);
|
||||
stringLen = *((int*)tableData);
|
||||
tableData += sizeof(int);
|
||||
std::string symbolValue = std::string(tableData);
|
||||
tableData += stringLen*sizeof(char);
|
||||
|
||||
bool isTerminal = *((bool*)tableData);
|
||||
tableData += sizeof(bool);
|
||||
|
||||
symbolIndexVec.push_back(Symbol(symbolName, isTerminal, symbolValue));
|
||||
}
|
||||
|
||||
//Now for the actual table
|
||||
int tableSize = *((int*)tableData);
|
||||
tableData += sizeof(int);
|
||||
for (int i = 0; i < tableSize; i++) {
|
||||
//each item is a middle vector
|
||||
std::vector< std::vector<ParseAction*>* >* middleVector = new std::vector< std::vector<ParseAction*>* >();
|
||||
table.push_back(middleVector);
|
||||
|
||||
int middleVectorSize = *((int*)tableData);
|
||||
tableData += sizeof(int);
|
||||
for (int j = 0; j < middleVectorSize; j++) {
|
||||
//each item is an inner vector
|
||||
std::vector<ParseAction*>* innerVector = new std::vector<ParseAction*>();
|
||||
middleVector->push_back(innerVector);
|
||||
int innerVectorSize = *((int*)tableData);
|
||||
tableData += sizeof(int);
|
||||
for (int k = 0; k < innerVectorSize; k++) {
|
||||
//each item is a ParseRule
|
||||
ParseAction::ActionType action = *((ParseAction::ActionType*)tableData);
|
||||
tableData += sizeof(ParseAction::ActionType);
|
||||
//If reduce, import the reduce rule
|
||||
ParseRule* reduceRule = NULL;
|
||||
if (action == ParseAction::REDUCE) {
|
||||
int ptrIndx = *((int*)tableData);
|
||||
tableData += sizeof(int);
|
||||
|
||||
size = *((int*)tableData);
|
||||
tableData += sizeof(int);
|
||||
std::string leftHandleName = std::string(tableData);
|
||||
tableData += size*sizeof(char);
|
||||
size = *((int*)tableData);
|
||||
tableData += sizeof(int);
|
||||
std::string leftHandleValue = std::string(tableData);
|
||||
tableData += size*sizeof(char);
|
||||
|
||||
bool isTerminal = *((bool*)tableData);
|
||||
tableData += sizeof(bool);
|
||||
|
||||
//right side
|
||||
std::vector<Symbol> rightSide;
|
||||
size = *((int*)tableData);
|
||||
tableData += sizeof(int);
|
||||
for (int l = 0; l < size; l++) {
|
||||
int inStringLen = *((int*)tableData);
|
||||
tableData += sizeof(int);
|
||||
std::string inSymbolName = std::string(tableData);
|
||||
tableData += inStringLen*sizeof(char);
|
||||
|
||||
inStringLen = *((int*)tableData);
|
||||
tableData += sizeof(int);
|
||||
std::string inSymbolValue = std::string(tableData);
|
||||
tableData += inStringLen*sizeof(char);
|
||||
|
||||
bool inIsTerminal = *((bool*)tableData);
|
||||
tableData += sizeof(bool);
|
||||
rightSide.push_back(Symbol(inSymbolName, inIsTerminal, inSymbolValue));
|
||||
}
|
||||
reduceRule = new ParseRule(Symbol(leftHandleName, isTerminal, leftHandleValue), ptrIndx, rightSide, NULL);
|
||||
}
|
||||
int shiftState = *((int*)tableData);
|
||||
tableData += sizeof(int);
|
||||
|
||||
//And push the new action back
|
||||
if (reduceRule)
|
||||
innerVector->push_back(new ParseAction(action, reduceRule));
|
||||
else
|
||||
innerVector->push_back(new ParseAction(action, shiftState));
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
void Table::setSymbols(Symbol EOFSymbol, Symbol nullSymbol) {
|
||||
this->EOFSymbol = EOFSymbol;
|
||||
this->nullSymbol = nullSymbol;
|
||||
@@ -106,7 +303,7 @@ std::vector<ParseAction*>* Table::get(int state, Symbol token) {
|
||||
return NULL;
|
||||
}
|
||||
|
||||
std::cout << "Get for state: " << state << ", and Symbol: " << token.toString() << std::endl;
|
||||
//std::cout << "Get for state: " << state << ", and Symbol: " << token.toString() << std::endl;
|
||||
if (state < 0 || state >= table.size()) {
|
||||
std::cout << "State bad: " << state << std::endl;
|
||||
return NULL;
|
||||
@@ -115,7 +312,7 @@ std::vector<ParseAction*>* Table::get(int state, Symbol token) {
|
||||
std::vector<ParseAction*>* action = NULL;
|
||||
|
||||
if (symbolIndex < 0 || symbolIndex >= table[state]->size()) {
|
||||
std::cout << "Symbol bad for this state: " << token.toString() << ". This is a reject." << std::endl;
|
||||
//std::cout << "Symbol bad for this state: " << token.toString() << ". This is a reject." << std::endl;
|
||||
} else {
|
||||
action = (*(table[state]))[symbolIndex];
|
||||
}
|
||||
@@ -128,7 +325,7 @@ std::vector<ParseAction*>* Table::get(int state, Symbol token) {
|
||||
action->push_back(new ParseAction(ParseAction::ACCEPT));
|
||||
}
|
||||
|
||||
//If ourside the symbol range of this state (same as NULL), reject
|
||||
//If outside the symbol range of this state (same as NULL), reject
|
||||
if ( symbolIndex >= table[state]->size() ) {
|
||||
action = new std::vector<ParseAction*>();
|
||||
action->push_back(new ParseAction(ParseAction::REJECT));
|
||||
@@ -141,7 +338,7 @@ std::vector<ParseAction*>* Table::get(int state, Symbol token) {
|
||||
}
|
||||
|
||||
//Otherwise, we have something, so return it
|
||||
return (action);
|
||||
return action;
|
||||
}
|
||||
|
||||
ParseAction* Table::getShift(int state, Symbol token) {
|
||||
@@ -163,8 +360,9 @@ std::string Table::toString() {
|
||||
concat += "\n";
|
||||
|
||||
for (std::vector< std::vector< std::vector< ParseRule* >* >* >::size_type i = 0; i < table.size(); i++) {
|
||||
concat += intToString(i) + "\t";
|
||||
concat += intToString(i) + " is the state\t";
|
||||
for (std::vector< std::vector< ParseRule* >* >::size_type j = 0; j < table[i]->size(); j++) {
|
||||
concat += "for " + symbolIndexVec[j].toString() + " do ";
|
||||
if ( (*(table[i]))[j] != NULL) {
|
||||
for (std::vector< ParseRule* >::size_type k = 0; k < (*(table[i]))[j]->size(); k++) {
|
||||
concat += (*((*(table[i]))[j]))[k]->toString() + "\t";
|
||||
|
||||
85
src/Type.cpp
Normal file
85
src/Type.cpp
Normal file
@@ -0,0 +1,85 @@
|
||||
#include "Type.h"
|
||||
|
||||
Type::Type() {
|
||||
indirection = 0;
|
||||
baseType = none;
|
||||
typeDefinition = NULL;
|
||||
}
|
||||
|
||||
Type::Type(ValueType typeIn) {
|
||||
indirection = 0;
|
||||
baseType = typeIn;
|
||||
typeDefinition = NULL;
|
||||
}
|
||||
|
||||
Type::Type(ValueType typeIn, int indirectionIn) {
|
||||
indirection = indirectionIn;
|
||||
baseType = typeIn;
|
||||
typeDefinition = NULL;
|
||||
}
|
||||
|
||||
Type::Type(NodeTree<ASTData>* typeDefinitionIn) {
|
||||
indirection = 0;
|
||||
baseType = none;
|
||||
typeDefinition = typeDefinitionIn;
|
||||
}
|
||||
Type::Type(NodeTree<ASTData>* typeDefinitionIn, int indirectionIn) {
|
||||
indirection = indirectionIn;
|
||||
baseType = none;
|
||||
typeDefinition = typeDefinitionIn;
|
||||
}
|
||||
|
||||
Type::Type(ValueType typeIn, NodeTree<ASTData>* typeDefinitionIn, int indirectionIn) {
|
||||
baseType = typeIn;
|
||||
indirection = indirectionIn;
|
||||
typeDefinition = typeDefinitionIn;
|
||||
}
|
||||
|
||||
Type::~Type() {
|
||||
}
|
||||
|
||||
const bool Type::operator==(const Type &other) const {
|
||||
return( baseType == other.baseType && indirection == other.indirection && typeDefinition == other.typeDefinition);
|
||||
}
|
||||
|
||||
const bool Type::operator!=(const Type &other) const {
|
||||
return(!this->operator==(other));
|
||||
}
|
||||
|
||||
std::string Type::toString() {
|
||||
std::string typeString;
|
||||
switch (baseType) {
|
||||
case none:
|
||||
if (typeDefinition)
|
||||
typeString = typeDefinition->getDataRef()->symbol.getName();
|
||||
else
|
||||
typeString = "none";
|
||||
break;
|
||||
case void_type:
|
||||
typeString = "void";
|
||||
break;
|
||||
case boolean:
|
||||
typeString = "bool";
|
||||
break;
|
||||
case integer:
|
||||
typeString = "int";
|
||||
break;
|
||||
case floating:
|
||||
typeString = "float";
|
||||
break;
|
||||
case double_percision:
|
||||
typeString = "double";
|
||||
break;
|
||||
case character:
|
||||
typeString = "char";
|
||||
break;
|
||||
default:
|
||||
if (typeDefinition)
|
||||
typeString = typeDefinition->getDataRef()->symbol.getName();
|
||||
else
|
||||
typeString = "unknown_type";
|
||||
}
|
||||
for (int i = 0; i < indirection; i++)
|
||||
typeString += "*";
|
||||
return typeString;
|
||||
}
|
||||
43
src/util.cpp
43
src/util.cpp
@@ -8,7 +8,7 @@ std::string intToString(int theInt) {
|
||||
|
||||
std::string replaceExEscape(std::string first, std::string search, std::string replace) {
|
||||
size_t pos = 0;
|
||||
while (pos < first.size()-search.size()) {
|
||||
while (pos <= first.size()-search.size()) {
|
||||
pos = first.find(search, pos);
|
||||
if (pos == std::string::npos)
|
||||
break;
|
||||
@@ -31,3 +31,44 @@ std::string replaceExEscape(std::string first, std::string search, std::string r
|
||||
}
|
||||
return first;
|
||||
}
|
||||
|
||||
//String slicing is crazy useful. substr isn't bad, but slicing with negative indicies is wonderful
|
||||
std::string strSlice(std::string str, int begin, int end) {
|
||||
if (begin < 0)
|
||||
begin += str.length()+1;
|
||||
if (end < 0)
|
||||
end += str.length()+1;
|
||||
return str.substr(begin, end-begin);
|
||||
}
|
||||
|
||||
int findPerenEnd(std::string str, int i) {
|
||||
int numHangingOpen = 0;
|
||||
for (; i< str.length(); i++) {
|
||||
if (str[i] == '(')
|
||||
numHangingOpen++;
|
||||
else if (str[i] == ')')
|
||||
numHangingOpen--;
|
||||
if (numHangingOpen == 0)
|
||||
return i;
|
||||
}
|
||||
}
|
||||
|
||||
std::vector<std::string> split(const std::string &str, char delim) {
|
||||
std::stringstream ss(str);
|
||||
std::string word;
|
||||
std::vector<std::string> splitVec;
|
||||
while (std::getline(ss, word, delim))
|
||||
splitVec.push_back(word);
|
||||
return splitVec;
|
||||
}
|
||||
|
||||
std::string join(const std::vector<std::string> &strVec, std::string joinStr) {
|
||||
if (strVec.size() == 0)
|
||||
return "";
|
||||
std::string joinedStr = strVec[0];
|
||||
for (int i = 1; i < strVec.size(); i++)
|
||||
joinedStr += joinStr + strVec[i];
|
||||
return joinedStr;
|
||||
}
|
||||
|
||||
|
||||
|
||||
Reference in New Issue
Block a user