Move Cephelepod into deprecated_compiler, create captian.sh to handle bootstrapping kraken from backup or from Cephelepod

This commit is contained in:
Nathan Braswell
2016-03-29 12:54:05 -04:00
parent 40c3e428c1
commit c7e50282ad
53 changed files with 51 additions and 19 deletions

5
deprecated_compiler/.gitignore vendored Normal file
View File

@@ -0,0 +1,5 @@
build
build_kraken
krakenGrammer.kgm
krakenGrammer.kgm.comp
stdlib

View File

@@ -0,0 +1,29 @@
cmake_minimum_required (VERSION 2.6)
project(Kraken)
set(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} -std=c++11 -g")
set(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} -O3")
set( MY_INCLUDES ${PROJECT_SOURCE_DIR}/include)
set( MY_SOURCES main.cpp src/Parser.cpp src/GraphStructuredStack.cpp
src/RNGLRParser.cpp src/ParseAction.cpp src/ParseRule.cpp src/Symbol.cpp
src/StringReader.cpp src/State.cpp src/util.cpp src/Lexer.cpp
src/RegEx.cpp src/RegExState.cpp src/Table.cpp src/ASTData.cpp
src/ASTTransformation.cpp src/CGenerator.cpp src/Type.cpp src/Importer.cpp
src/Tester.cpp src/CCodeTriple.cpp)
add_custom_target(STDLibCopy ALL)
add_custom_command(TARGET STDLibCopy POST_BUILD
COMMAND ${CMAKE_COMMAND} -E copy_directory
"${PROJECT_SOURCE_DIR}/stdlib"
"${PROJECT_BINARY_DIR}/stdlib")
include_directories( ${MY_INCLUDES} )
add_executable(kraken ${MY_SOURCES})

View File

@@ -0,0 +1,41 @@
#ifndef ASTDATA_H
#define ASTDATA_H
#include <vector>
#include <map>
#include <set>
#include "Symbol.h"
//Circular dependency
class Type;
#include "Type.h"
#ifndef NULL
#define NULL ((void*)0)
#endif
enum ASTType {undef, translation_unit, import, identifier, type_def, adt_def,
function, code_block, typed_parameter, expression, boolean_expression, statement,
if_statement, match_statement, case_statement, while_loop, for_loop, return_statement, break_statement,
continue_statement, defer_statement, assignment_statement, declaration_statement, if_comp, simple_passthrough,
passthrough_params, in_passthrough_params, out_passthrough_params, opt_string, param_assign, function_call, value};
class ASTData {
public:
ASTData();
ASTData(ASTType type, Type *valueType = NULL);
ASTData(ASTType type, Symbol symbol, Type *valueType = NULL);
~ASTData();
std::string toString();
static std::string ASTTypeToString(ASTType type);
ASTType type;
Type* valueType;
Symbol symbol;
std::map<std::string, std::vector<NodeTree<ASTData>*>> scope;
std::set<NodeTree<ASTData>*> closedVariables;
private:
};
#endif

View File

@@ -0,0 +1,87 @@
#ifndef ASTTRANSFORMATION_H
#define ASTTRANSFORMATION_H
#include <set>
#include <map>
#include <iterator>
#include <algorithm>
#include "Type.h"
#include "ASTData.h"
#include "NodeTransformation.h"
#include "Importer.h"
class Importer;
class ASTTransformation: public NodeTransformation<Symbol,ASTData> {
public:
ASTTransformation(Importer* importerIn);
~ASTTransformation();
NodeTree<Symbol>* getNode(std::string lookup, std::vector<NodeTree<Symbol>*> nodes);
NodeTree<Symbol>* getNode(std::string lookup, NodeTree<Symbol>* parent);
std::vector<NodeTree<Symbol>*> getNodes(std::string lookup, std::vector<NodeTree<Symbol>*> nodes);
std::vector<NodeTree<Symbol>*> getNodes(std::string lookup, NodeTree<Symbol>* parent);
//First pass defines all type_defs (objects and ailises)
NodeTree<ASTData>* firstPass(std::string fileName, NodeTree<Symbol>* parseTree);
std::set<std::string> parseTraits(NodeTree<Symbol>* traitsNode);
//Second pass defines data inside objects, outside declaration statements, and function prototpyes (since we have type_defs now)
void secondPass(NodeTree<ASTData>* ast, NodeTree<Symbol>* parseTree);
void secondPassDoClassInsides(NodeTree<ASTData>* typeDef, std::vector<NodeTree<Symbol>*> typedefChildren, std::map<std::string, Type*> templateTypeReplacements);
NodeTree<ASTData>* secondPassDeclaration(NodeTree<Symbol>* from, NodeTree<ASTData>* scope, std::map<std::string, Type*> templateTypeReplacements);
NodeTree<ASTData>* secondPassFunction(NodeTree<Symbol>* from, NodeTree<ASTData>* scope, std::map<std::string, Type*> templateTypeReplacements);
//The third pass does all the function bodies
void thirdPass(NodeTree<ASTData>* ast, NodeTree<Symbol>* parseTree);
NodeTree<ASTData>* searchScopeForFunctionDef(NodeTree<ASTData>* scope, NodeTree<Symbol>* parseTree, std::map<std::string, Type*> templateTypeReplacements);
void thirdPassFunction(NodeTree<Symbol>* from, NodeTree<ASTData>* functionDef, std::map<std::string, Type*> templateTypeReplacements);
//The fourth pass finishes instantiation of templated objects
//it used to be a part of the third pass, but it was split out because it has to be done in a loop
//with all the other asts until none change anymore (it returns a bool if it instantiated a new one)
bool fourthPass(NodeTree<ASTData>* ast, NodeTree<Symbol>* parseTree);
virtual NodeTree<ASTData>* transform(NodeTree<Symbol>* from);
NodeTree<ASTData>* transform(NodeTree<Symbol>* from, NodeTree<ASTData>* scope, std::vector<Type> types, bool limitToFunction, std::map<std::string, Type*> templateTypeReplacements);
std::vector<NodeTree<ASTData>*> transformChildren(std::vector<NodeTree<Symbol>*> children, std::set<int> skipChildren, NodeTree<ASTData>* scope, std::vector<Type> types, bool limitToFunction, std::map<std::string, Type*> templateTypeReplacements);
std::string concatSymbolTree(NodeTree<Symbol>* root);
NodeTree<ASTData>* doFunction(NodeTree<ASTData>* scope, std::string lookup, std::vector<NodeTree<ASTData>*> nodes, std::map<std::string, Type*> templateTypeReplacements);
NodeTree<ASTData>* generateThis(NodeTree<ASTData>* scope);
std::set<NodeTree<ASTData>*> findVariablesToClose(NodeTree<ASTData>* func, NodeTree<ASTData>* stat, NodeTree<ASTData>* scope);
bool inScopeChain(NodeTree<ASTData>* node, NodeTree<ASTData>* scope);
NodeTree<ASTData>* functionLookup(NodeTree<ASTData>* scope, std::string lookup, std::vector<Type> types);
NodeTree<ASTData>* templateFunctionLookup(NodeTree<ASTData>* scope, std::string lookup, std::vector<Type*>* templateInstantiationTypes, std::vector<Type> types, std::map<std::string, Type*> scopeTypeMap);
std::vector<NodeTree<ASTData>*> scopeLookup(NodeTree<ASTData>* scope, std::string lookup, bool includeModules = false);
std::vector<NodeTree<ASTData>*> scopeLookup(NodeTree<ASTData>* scope, std::string lookup, bool includeModules, std::set<NodeTree<ASTData>*> visited);
NodeTree<ASTData>* getUpperTranslationUnit(NodeTree<ASTData>* node);
NodeTree<ASTData>* addToScope(std::string name, NodeTree<ASTData>* toAdd, NodeTree<ASTData>* addTo);
Type* typeFromTypeNode(NodeTree<Symbol>* typeNode, NodeTree<ASTData>* scope, std::map<std::string, Type*> templateTypeReplacements);
NodeTree<ASTData>* templateClassLookup(NodeTree<ASTData>* scope, std::string name, std::vector<Type*> templateInstantiationTypes);
void unifyType(NodeTree<Symbol> *syntaxType, Type type, std::map<std::string, Type>* templateTypeMap, std::map<std::string, Type*> typeMap);
void unifyTemplateFunction(NodeTree<ASTData>* templateFunction, std::vector<Type> types, std::vector<Type*>* templateInstantiationTypes, std::map<std::string, Type*> typeMap);
NodeTree<ASTData>* tryToFindOrInstantiateFunctionTemplate(std::string functionName, NodeTree<ASTData>* scope, std::vector<Type> types, std::map<std::string, Type*> templateTypeReplacements);
NodeTree<ASTData>* findOrInstantiateFunctionTemplate(std::string functionName, NodeTree<ASTData>* scope, std::vector<Type> types, std::map<std::string, Type*> templateTypeReplacements);
NodeTree<ASTData>* findOrInstantiateFunctionTemplate(std::vector<NodeTree<Symbol>*> children, NodeTree<ASTData>* scope, std::vector<Type> types, std::map<std::string, Type*> templateTypeReplacements);
NodeTree<ASTData>* findOrInstantiateFunctionTemplate(std::string functionName, std::vector<NodeTree<Symbol>*> children, NodeTree<ASTData>* scope, std::vector<Type> types, std::map<std::string, Type*> templateTypeReplacements);
std::map<std::string, Type*> makeTemplateFunctionTypeMap(NodeTree<Symbol>* templateNode, std::vector<Type*> types, std::map<std::string, Type*> scopeTypeMap);
std::vector<std::pair<std::string, std::set<std::string>>> makeTemplateNameTraitPairs(NodeTree<Symbol>* templateNode);
private:
Importer * importer;
NodeTree<ASTData>* builtin_trans_unit; // the top scope for language level stuff
std::map<std::string, std::vector<NodeTree<ASTData>*>> languageLevelReservedWords;
std::map<std::string, std::vector<NodeTree<ASTData>*>> languageLevelOperators;
std::map<NodeTree<ASTData>*, NodeTree<ASTData>*> this_map; // used to map implicit "this" variables to their type
NodeTree<ASTData>* topScope; //maintained for templates that need to add themselves to the top scope no matter where they are instantiated
int lambdaID = 0;
};
std::vector<Type> mapNodesToTypes(std::vector<NodeTree<ASTData>*> nodes);
std::vector<Type*> mapNodesToTypePointers(std::vector<NodeTree<ASTData>*> nodes);
#endif

View File

@@ -0,0 +1,25 @@
#ifndef CCODETRIPLE_H
#define CCODETRIPLE_H
#include <string>
#include <iostream>
#include "util.h"
class CCodeTriple {
public:
CCodeTriple(std::string pre, std::string val, std::string post);
CCodeTriple(std::string val);
CCodeTriple(const char* val);
CCodeTriple();
~CCodeTriple();
std::string oneString(bool endValue = false);
CCodeTriple & operator=(const CCodeTriple &rhs);
CCodeTriple & operator+=(const CCodeTriple &rhs);
std::string preValue;
std::string value;
std::string postValue;
private:
};
CCodeTriple operator+(const CCodeTriple &a, const CCodeTriple &b);
#endif //CCODETRIPLE_H

View File

@@ -0,0 +1,72 @@
#ifndef CGENERATOR_H
#define CGENERATOR_H
#include <string>
#include <iostream>
#include <fstream>
#include <utility>
#include <stack>
#include <sys/stat.h>
#include "CCodeTriple.h"
#include "NodeTree.h"
#include "ASTData.h"
#include "Type.h"
// for mapNodesToTypes
#include "ASTTransformation.h"
#include "util.h"
#include "Poset.h"
// Note the use of std::pair to hold two strings - the running string for the header file and the running string for the c file.
enum ClosureTypeSpecialType { ClosureTypeRegularNone, ClosureFunctionPointerTypeWithoutClosedParam, ClosureFunctionPointerTypeWithClosedParam };
class CGenerator {
public:
CGenerator();
~CGenerator();
int generateCompSet(std::map<std::string, NodeTree<ASTData>*> ASTs, std::string outputName);
std::string generateTypeStruct(NodeTree<ASTData>* from);
bool isUnderNodeWithType(NodeTree<ASTData>* from, ASTType type);
bool isUnderTranslationUnit(NodeTree<ASTData>* from, NodeTree<ASTData>* typeDefinition);
NodeTree<ASTData>* highestScope(NodeTree<ASTData>* node);
std::pair<std::string, std::string> generateTranslationUnit(std::string name, std::map<std::string, NodeTree<ASTData>*> ASTs);
CCodeTriple generate(NodeTree<ASTData>* from, NodeTree<ASTData>* enclosingObject = NULL, bool justFuncName = false, NodeTree<ASTData>* enclosingFunction = NULL);
std::string generateAliasChains(std::map<std::string, NodeTree<ASTData>*> ASTs, NodeTree<ASTData>* definition);
std::string closureStructType(std::set<NodeTree<ASTData>*> closedVariables);
std::string ValueTypeToCType(Type *type, std::string, ClosureTypeSpecialType closureSpecial = ClosureTypeRegularNone);
std::string ValueTypeToCTypeDecoration(Type *type, ClosureTypeSpecialType closureSpecial = ClosureTypeRegularNone);
std::string ValueTypeToCTypeThingHelper(Type *type, std::string ptrStr, ClosureTypeSpecialType closureSpecial);
static std::string CifyName(std::string name);
static std::string scopePrefix(NodeTree<ASTData>* from);
std::string simpleComplexName(std::string simpleName, std::string complexName);
std::string prefixIfNeeded(std::string prefix, std::string name);
std::string generateObjectMethod(NodeTree<ASTData>* enclosingObject, NodeTree<ASTData>* from, std::string *functionPrototype);
NodeTree<ASTData>* getMethodsObjectType(NodeTree<ASTData>* scope, std::string functionName);
NodeTree<ASTData>* getMethod(Type* type, std::string method, std::vector<Type> types);
bool methodExists(Type* type, std::string method, std::vector<Type> types);
std::string generateMethodIfExists(Type* type, std::string method, std::string parameter, std::vector<Type> methodTypes);
std::string emitDestructors(std::vector<NodeTree<ASTData>*> possibleDeclarations, NodeTree<ASTData>* enclosingObject);
std::string tabs();
std::string getID();
int tabLevel;
int id;
std::string function_header;
std::string generatorString;
std::string linkerString;
std::string functionTypedefString;
std::string functionTypedefStringPre;
std::set<std::string> usedNameSet;
std::map<std::string, std::string> simpleComplexNameMap;
std::map<Type, triple<std::string, std::string, std::string>> functionTypedefMap;
std::map<std::set<NodeTree<ASTData>*>, std::string> closureStructMap;
std::vector<std::vector<NodeTree<ASTData>*>> distructDoubleStack;
std::stack<int> loopDistructStackDepth;
std::vector<std::vector<NodeTree<ASTData>*>> deferDoubleStack;
std::stack<int> loopDeferStackDepth;
private:
};
#endif

View File

@@ -0,0 +1,52 @@
#ifndef COLLAPSETRANSFORMATION_H
#define COLLAPSETRANSFORMATION_H
#include <queue>
#include <vector>
#include "NodeTransformation.h"
template<class T>
class CollapseTransformation: public NodeTransformation<T,T> {
public:
CollapseTransformation(T toCollapse);
~CollapseTransformation();
virtual NodeTree<T>* transform(NodeTree<T>* from);
private:
T toCollapse;
};
#endif
template<class T>
CollapseTransformation<T>::CollapseTransformation(T toCollapse) {
this->toCollapse = toCollapse;
}
template<class T>
CollapseTransformation<T>::~CollapseTransformation() {
//
}
template<class T>
NodeTree<T>* CollapseTransformation<T>::transform(NodeTree<T>* from) {
std::queue<NodeTree<T>*> toProcess;
toProcess.push(from);
while(!toProcess.empty()) {
NodeTree<T>* node = toProcess.front();
toProcess.pop();
std::vector<NodeTree<T>*> children = node->getChildren();
for (int i = 0; i < children.size(); i++) {
if (children[i]->getData() == toCollapse) {
node->removeChild(children[i]);
std::vector<NodeTree<T>*> newChildren = children[i]->getChildren();
node->insertChildren(i,newChildren);
toProcess.push(node); //Do this node again
}
else
toProcess.push(children[i]);
}
}
return from;
}

View File

@@ -0,0 +1,48 @@
#ifndef DELETETRANSFORMATION_H
#define DELETETRANSFORMATION_H
#include <queue>
#include <vector>
#include "NodeTransformation.h"
template<class T>
class DeleteTransformation: public NodeTransformation<T,T> {
public:
DeleteTransformation(T toDelete);
~DeleteTransformation();
virtual NodeTree<T>* transform(NodeTree<T>* from);
private:
T toRemove;
};
#endif
template<class T>
DeleteTransformation<T>::DeleteTransformation(T toRemove) {
this->toRemove = toRemove;
}
template<class T>
DeleteTransformation<T>::~DeleteTransformation() {
//
}
template<class T>
NodeTree<T>* DeleteTransformation<T>::transform(NodeTree<T>* from) {
std::queue<NodeTree<T>*> toProcess;
toProcess.push(from);
while(!toProcess.empty()) {
NodeTree<T>* node = toProcess.front();
toProcess.pop();
std::vector<NodeTree<T>*> children = node->getChildren();
for (int i = 0; i < children.size(); i++) {
if (children[i]->getData() == toRemove)
node->removeChild(children[i]);
else
toProcess.push(children[i]);
}
}
return from;
}

View File

@@ -0,0 +1,38 @@
#include <iostream>
#include <vector>
#include <queue>
#include <map>
#include "NodeTree.h"
#include "Symbol.h"
#include "util.h"
#ifndef GRAPH_STRUCTURED_STACK
#define GRAPH_STRUCTURED_STACK
class GraphStructuredStack {
public:
GraphStructuredStack();
~GraphStructuredStack();
NodeTree<int>* newNode(int stateNum);
void addToFrontier(int frontier, NodeTree<int>* node);
NodeTree<int>* inFrontier(int frontier, int state);
int getContainingFrontier(NodeTree<int>* node);
bool frontierIsEmpty(int frontier);
NodeTree<int>* frontierGetAccState(int frontier);
std::vector<NodeTree<int>*>* getReachable(NodeTree<int>* start, int lenght);
std::vector<std::vector<NodeTree<int>*> >* getReachablePaths(NodeTree<int>* start, int lenght);
void recursivePathFind(NodeTree<int>* start, int length, std::vector<NodeTree<int>*> currentPath, std::vector<std::vector<NodeTree<int>*> >* paths);
bool hasEdge(NodeTree<int>* start, NodeTree<int>* end);
NodeTree<Symbol>* getEdge(NodeTree<int>* start, NodeTree<int>* end);
void addEdge(NodeTree<int>* start, NodeTree<int>* end, NodeTree<Symbol>* edge);
void clear();
std::vector<int> getFrontier(int frontier);
std::string toString();
private:
std::vector<std::vector<NodeTree<int>*>*> gss;
std::map< std::pair< NodeTree<int>*, NodeTree<int>* >, NodeTree<Symbol>* > edges;
std::map< NodeTree<int>*, int > containing_frontier_map;
};
#endif

View File

@@ -0,0 +1,48 @@
#ifndef __IMPORTER__H_
#define __IMPORTER__H_
#include <string>
#include <vector>
#include <iostream>
#include <fstream>
#include <sys/stat.h>
#include "Parser.h"
#include "NodeTree.h"
#include "ASTData.h"
#include "Symbol.h"
#include "RemovalTransformation.h"
#include "CollapseTransformation.h"
#include "ASTTransformation.h"
class ASTTransformation;
class Importer {
public:
Importer(Parser* parserIn, std::vector<std::string> includePaths, std::string outputName, bool only_parseIn = false);
~Importer();
void import(std::string fileName);
NodeTree<ASTData>* getUnit(std::string fileName);
NodeTree<ASTData>* importFirstPass(std::string fileName);
NodeTree<Symbol>* parseAndTrim(std::string fileName);
void registerAST(std::string name, NodeTree<ASTData>* ast, NodeTree<Symbol>* syntaxTree);
std::map<std::string, NodeTree<ASTData>*> getASTMap();
private:
std::string outputName;
ASTTransformation *ASTTransformer;
struct importTriplet {
std::string name;
NodeTree<ASTData>* ast;
NodeTree<Symbol>* syntaxTree;
};
bool only_parse;
std::vector<importTriplet> importedTrips;
std::vector<std::string> includePaths;
Parser* parser;
std::vector<Symbol> removeSymbols;
std::vector<Symbol> collapseSymbols;
std::map<std::string, NodeTree<ASTData>*> imported;
};
#endif

View File

@@ -0,0 +1,26 @@
#ifndef LEXER_H
#define LEXER_H
#include "util.h"
#include "StringReader.h"
#include "RegEx.h"
#include "Symbol.h"
#include <string>
class Lexer {
public:
Lexer();
Lexer(std::string inputString);
~Lexer();
void addRegEx(std::string regExString);
void setInput(std::string inputString);
Symbol next();
void reset();
static void test();
private:
std::vector<RegEx*> regExs;
std::string input;
int currentPosition;
};
#endif

View File

@@ -0,0 +1,35 @@
#ifndef NODETRANSFORMATION_H
#define NODETRANSFORMATION_H
#include "NodeTree.h"
#ifndef NULL
#define NULL ((void*)0)
#endif
template <class FROM, class TO>
class NodeTransformation {
public:
NodeTransformation();
virtual ~NodeTransformation();
virtual NodeTree<TO>* transform(NodeTree<FROM>* from)=0;
private:
};
template <class FROM, class TO>
NodeTransformation<FROM,TO>::NodeTransformation() {
//Nothing
}
template <class FROM, class TO>
NodeTransformation<FROM,TO>::~NodeTransformation() {
//Nothing
}
// template <class FROM, class TO>
// NodeTree<TO>* NodeTransformation<FROM,TO>::transform(NodeTree<FROM>* from) {
// return (NodeTree<TO>*)0x1234;
// }
#endif

View File

@@ -0,0 +1,277 @@
#ifndef NODETREE_H
#define NODETREE_H
#ifndef NULL
#define NULL ((void*)0)
#endif
#include <vector>
#include <string>
#include <iostream>
#include "util.h"
template<class T>
class NodeTree {
public:
NodeTree();
NodeTree(std::string name, T inData);
~NodeTree();
bool const operator==(NodeTree &other);
bool const operator<(const NodeTree &other) const;
void setParent(NodeTree<T>* parent);
void addParent(NodeTree<T>* parent);
NodeTree<T>* getParent();
std::vector<NodeTree<T>*> getParents();
void addChild(NodeTree<T>* child);
void insertChild(int i, NodeTree<T>* child);
void addChildren(std::vector<NodeTree<T>*>* children);
void addChildren(std::vector<NodeTree<T>*> children);
void insertChildren(int index, std::vector<NodeTree<T>*>* children);
void insertChildren(int index, std::vector<NodeTree<T>*> children);
int findChild(NodeTree<T>* child);
void removeChild(NodeTree<T>* child);
void removeChild(int index);
void clearChildren();
std::vector<NodeTree<T>*> getChildren();
NodeTree<T>* get(int index);
std::string getName();
void setName(std::string);
T getData() const;
T* getDataRef();
void setData(T data);
int size();
std::string DOTGraphString();
private:
std::string DOTGraphStringHelper(std::vector<NodeTree<T>*> avoidList);
std::string getDOTName();
std::string name;
T data;
std::vector<NodeTree<T>*> parents;
std::vector<NodeTree<T>*> children;
static int idCounter;
int id;
};
template<class T>
int NodeTree<T>::idCounter;
template<class T>
NodeTree<T>::NodeTree() {
name = "UnnamedNode";
id = idCounter++;
}
template<class T>
NodeTree<T>::NodeTree(std::string name, T inData) {
this->name = name;
this->data = inData;
id = idCounter++;
}
template<class T>
NodeTree<T>::~NodeTree() {
children.clear();
parents.clear(); //? Will this segfault?
}
template<class T>
const bool NodeTree<T>::operator==(NodeTree &other) {
if (!(data == other.data))
return false;
if (children.size() != other.getChildren().size())
return false;
for (typename std::vector<NodeTree<T>*>::size_type i = 0; i < children.size(); i++)
if (! (*(children[i]) == *(other.getChildren()[i])))
return false;
return true;
}
//Used when making a map of NodeTrees
template<class T>
const bool NodeTree<T>::operator<(const NodeTree &other) const {
return data < other.getData();
}
template<class T>
void NodeTree<T>::setParent(NodeTree<T>* parent) {
parents.clear();
parents.push_back(parent);
}
template<class T>
void NodeTree<T>::addParent(NodeTree<T>* parent) {
parents.push_back(parent);
}
template<class T>
NodeTree<T>* NodeTree<T>::getParent() {
if (parents.size() > 0)
return parents[0];
return NULL;
}
template<class T>
std::vector<NodeTree<T>*> NodeTree<T>::getParents() {
return parents;
}
template<class T>
void NodeTree<T>::addChild(NodeTree<T>* child) {
if (!child)
throw "Help, NULL child";
//if (findChild(child) == -1)
children.push_back(child);
}
template<class T>
void NodeTree<T>::insertChild(int i, NodeTree<T>* child) {
if (!child)
throw "Help, NULL child";
//if (findChild(child) == -1)
children.insert(children.begin()+i,child);
}
template<class T>
void NodeTree<T>::addChildren(std::vector<NodeTree<T>*>* children) {
for (typename std::vector<NodeTree<T>*>::size_type i = 0; i < children->size(); i++)
addChild((*children)[i]);
}
template<class T>
void NodeTree<T>::addChildren(std::vector<NodeTree<T>*> children) {
for (typename std::vector<NodeTree<T>*>::size_type i = 0; i < children.size(); i++)
addChild(children[i]);
}
template<class T>
void NodeTree<T>::insertChildren(int index, std::vector<NodeTree<T>*>* children) {
for (typename std::vector<NodeTree<T>*>::size_type i = 0; i < children->size(); i++)
insertChild(index+i,(*children)[i]);
}
template<class T>
void NodeTree<T>::insertChildren(int index, std::vector<NodeTree<T>*> children) {
for (typename std::vector<NodeTree<T>*>::size_type i = 0; i < children.size(); i++)
insertChild(index+i, children[i]);
}
template<class T>
int NodeTree<T>::findChild(NodeTree<T>* child) {
for (int i = 0; i < children.size(); i++) {
if (children[i] == child) {
return i;
}
}
return -1;
}
template<class T>
void NodeTree<T>::removeChild(int index) {
children[index] = NULL;
children.erase(children.begin()+index);
}
template<class T>
void NodeTree<T>::removeChild(NodeTree<T>* child) {
int index = findChild(child);
if (index != -1) {
removeChild(index);
}
}
template<class T>
void NodeTree<T>::clearChildren() {
for (typename std::vector<T>::size_type i = 0; i < children.size(); i++)
children[i] = NULL;
children.clear();
}
template<class T>
std::vector<NodeTree<T>*> NodeTree<T>::getChildren() {
return children;
}
template<class T>
int NodeTree<T>::size() {
int count = 0;
for (int i = 0; i < children.size(); i++) {
count += children[i]->size();
}
return 1+count;
}
template<class T>
NodeTree<T>* NodeTree<T>::get(int index) {
return children[index];
}
template<class T>
std::string NodeTree<T>::getName() {
return name;
}
template<class T>
void NodeTree<T>::setName(std::string name) {
this->name = name;
}
template<class T>
T NodeTree<T>::getData() const {
return data;
}
template<class T>
T* NodeTree<T>::getDataRef() {
return &data;
}
template<class T>
void NodeTree<T>::setData(T data) {
this->data = data;
}
template<class T>
std::string NodeTree<T>::DOTGraphString() {
return( "digraph Kraken { \n" + DOTGraphStringHelper(std::vector<NodeTree<T>*>()) + "}");
}
template<class T>
std::string NodeTree<T>::DOTGraphStringHelper(std::vector<NodeTree<T>*> avoidList) {
for (typename std::vector<NodeTree<T>*>::size_type i = 0; i < avoidList.size(); i++)
if (this == avoidList[i])
return "";
avoidList.push_back(this);
std::string ourDOTRelation = "";
for (int i = 0; i < children.size(); i++) {
if (children[i] != NULL)
ourDOTRelation += getDOTName() + " -> " + children[i]->getDOTName() + ";\n" + children[i]->DOTGraphStringHelper(avoidList);
else
ourDOTRelation += getDOTName() + " -> BAD_NULL_" + getDOTName() + "\n";
}
return(ourDOTRelation);
}
template<class T>
std::string NodeTree<T>::getDOTName() {
std::string DOTName = "";
DOTName = "\"" + replaceExEscape(name + "-" + data.toString(), "\"", "\\\"") + "_" + intToString(id) + "\""; //Note that terminals already have a quote in the front of their name, so we don't need to add one
// if (data != NULL)
// DOTName = "\"" + replaceExEscape(name + "-" + data->toString(), "\"", "\\\"") + "_" + intToString(id) + "\""; //Note that terminals already have a quote in the front of their name, so we don't need to add one
// else
// DOTName = "\"" + replaceExEscape(name, "\"", " \\\"") + "_" + intToString(id) + "\"";
return(replaceExEscape(DOTName, "\n", "\\n"));
}
#endif

View File

@@ -0,0 +1,36 @@
#ifndef PARSE_ACTION_H
#define PARSE_ACTION_H
#ifndef NULL
#define NULL ((void*)0)
#endif
#include "util.h"
#include "ParseRule.h"
#include <vector>
#include <string>
class ParseAction {
public:
enum ActionType { INVALID, REDUCE, SHIFT, ACCEPT, REJECT };
ParseAction(ActionType action);
ParseAction(ActionType action, ParseRule* reduceRule);
ParseAction(ActionType action, int shiftState);
~ParseAction();
bool const equalsExceptLookahead(const ParseAction &other) const;
bool const operator==(const ParseAction &other) const;
bool const operator!=(const ParseAction &other) const;
bool const operator<(const ParseAction &other) const;
std::string toString(bool printRuleLookahead = true);
static std::string actionToString(ActionType action);
ActionType action;
ParseRule* reduceRule;
int shiftState;
};
#endif

View File

@@ -0,0 +1,53 @@
#ifndef PARSERULE_H
#define PARSERULE_H
#ifndef NULL
#define NULL ((void*)0)
#endif
#include "Symbol.h"
#include <vector>
#include <string>
#include <iostream>
class ParseRule {
private:
int pointerIndex;
Symbol leftHandle;
std::vector<Symbol> lookahead;
std::vector<Symbol> rightSide;
public:
ParseRule();
ParseRule(Symbol leftHandle, int pointerIndex, std::vector<Symbol> &rightSide, std::vector<Symbol> lookahead);
~ParseRule();
const bool equalsExceptLookahead(const ParseRule &other) const;
bool const operator==(const ParseRule &other) const;
bool const operator!=(const ParseRule &other) const;
bool const operator<(const ParseRule &other) const; //Used for ordering so we can put ParseRule's in sets, and also so that ParseActions will have an ordering
ParseRule* clone();
void setLeftHandle(Symbol leftHandle);
void appendToRight(Symbol appendee);
Symbol getLeftSide();
void setRightSide(std::vector<Symbol> rightSide);
std::vector<Symbol> getRightSide();
Symbol getAtNextIndex();
Symbol getAtIndex();
int getRightSize();
int getIndex();
bool advancePointer();
bool isAtEnd();
void setLookahead(std::vector<Symbol> lookahead);
void addLookahead(std::vector<Symbol> lookahead);
std::vector<Symbol> getLookahead();
std::string toString(bool printLookahead = true);
std::string toDOT();
};
#endif

View File

@@ -0,0 +1,73 @@
#ifndef PARSER_H
#define PARSER_H
#include "util.h"
#include "ParseRule.h"
#include "ParseAction.h"
#include "Symbol.h"
#include "State.h"
#include "StringReader.h"
#include "Lexer.h"
#include "NodeTree.h"
#include "Table.h"
#include <queue>
#include <set>
#include <map>
#include <vector>
#include <algorithm>
#include <stack>
#include <string>
#include <iostream>
class Parser {
public:
Parser();
~Parser();
virtual void loadGrammer(std::string grammerInputString);
virtual void createStateSet();
virtual std::string stateSetToString();
virtual NodeTree<Symbol>* parseInput(std::string inputString, std::string filename, bool highlight_errors) = 0; // filename for error reporting
virtual std::string grammerToString();
virtual std::string grammerToDOT();
std::string tableToString();
void exportTable(std::ofstream &file);
void importTable(char* tableData);
protected:
std::vector<Symbol> firstSet(Symbol token, std::vector<Symbol> avoidList = std::vector<Symbol>(), bool addNewTokens = true);
bool isNullable(Symbol token);
bool isNullableHelper(Symbol token, std::set<Symbol> done);
std::map<Symbol, std::vector<Symbol>> tokenFirstSet;
std::map<Symbol, bool> tokenNullable;
std::vector<Symbol> incrementiveFollowSet(ParseRule* rule);
virtual void closure(State* state);
virtual void addStates(std::vector< State* >* stateSets, State* state, std::queue<State*>* toDo);
int stateNum(State* state);
StringReader reader;
Lexer lexer;
std::map<std::pair<std::string, bool>, Symbol> symbols;
std::vector<ParseRule*> loadedGrammer;
std::vector< State* > stateSets;
Symbol EOFSymbol;
Symbol nullSymbol;
Symbol invalidSymbol;
Table table;
std::stack<int> stateStack;
std::stack<Symbol> symbolStack;
Symbol getOrAddSymbol(std::string symbolString, bool isTerminal);
};
#endif

View File

@@ -0,0 +1,126 @@
#ifndef POSET_H
#define POSET_H
#include <vector>
#include <set>
#include <map>
#include <queue>
#include <cassert>
#include "util.h"
template <class T>
class Poset {
public:
Poset();
~Poset();
void addRelationship(T first, T second);
void addVertex(T vertex);
bool zeroDependencies(T vertex);
std::set<T> getDependsOn(T dependency);
std::vector<T> getTopoSort();
static void test();
private:
//backing data structures
std::map<T, std::map<T,bool>> adjMatrix;
std::set<T> verticies;
};
template <class T>
Poset<T>::Poset() {
//Nothing needed
}
template <class T>
Poset<T>::~Poset() {
//Ditto
}
template <class T>
void Poset<T>::addRelationship(T first, T second) {
verticies.insert(first);
verticies.insert(second);
adjMatrix[first][second] = true;
}
template <class T>
void Poset<T>::addVertex(T vertex) {
verticies.insert(vertex);
}
template <class T>
bool Poset<T>::zeroDependencies(T vertex) {
auto depMapItr = adjMatrix.find(vertex);
if (depMapItr == adjMatrix.end())
return true;
for (auto i : depMapItr->second)
if (i.second == true)
return false;
return true;
}
template <class T>
std::set<T> Poset<T>::getDependsOn(T dependency) {
std::set<T> vertsThatDependOn;
for (auto i : adjMatrix) {
auto depItr = i.second.find(dependency);
if (depItr != i.second.end() && depItr->second)
vertsThatDependOn.insert(i.first);
}
return vertsThatDependOn;
}
template <class T>
std::vector<T> Poset<T>::getTopoSort() {
std::vector<T> sorted;
std::queue<T> toDo;
for (auto i : verticies)
if (zeroDependencies(i))
toDo.push(i);
while(!toDo.empty()) {
T current = toDo.front(); toDo.pop();
sorted.push_back(current);
for (T depOnCurrent : getDependsOn(current)) {
adjMatrix[depOnCurrent][current] = false; //Remove the edge to current, since current's now been taken care of
if (zeroDependencies(depOnCurrent))
toDo.push(depOnCurrent);
}
}
return sorted;
}
//would make it just an int specilization, but then we get multiple definition complaints....
template<class T>
void Poset<T>::test() {
std::string result;
{
Poset<int> poset;
poset.addVertex(1000);
for (int i = 0; i < 20; i++)
poset.addRelationship(i,i+1);
result = "";
for (int i : poset.getTopoSort())
result += intToString(i) + " ";
//std::cout << result << std::endl;
assert(result == "20 1000 19 18 17 16 15 14 13 12 11 10 9 8 7 6 5 4 3 2 1 0 "); //Note that sets do not have a set order, so this could change
//This is why the 1000 is in an odd, yet valid, position
}
{
Poset<int> poset;
for (int i = 0; i < 20; i+=2)
poset.addRelationship(i,i+1);
result = "";
for (int i : poset.getTopoSort())
result += intToString(i) + " ";
//std::cout << result << std::endl;
assert(result == "1 3 5 7 9 11 13 15 17 19 0 2 4 6 8 10 12 14 16 18 ");
}
std::cout << "Poset tests passed" << std::endl;
}
#endif

View File

@@ -0,0 +1,68 @@
#ifndef RNGLRPARSER_H
#define RNGLRPARSER_H
#include <iostream>
#include <queue>
#include <map>
#include <vector>
#include <set>
#include <utility>
#include <algorithm>
#include "Parser.h"
#include "Symbol.h"
#include "GraphStructuredStack.h"
#include "util.h"
class RNGLRParser: public Parser {
public:
RNGLRParser();
~RNGLRParser();
NodeTree<Symbol>* parseInput(std::string inputString, std::string filename, bool highlight_errors); // filename for error reporting
void printReconstructedFrontier(int frontier);
private:
void reducer(int i);
void shifter(int i);
void addChildren(NodeTree<Symbol>* parent, std::vector<NodeTree<Symbol>*>* children, NodeTree<Symbol>* nullableParts);
void addStates(std::vector< State* >* stateSets, State* state, std::queue<State*>* toDo);
void addStateReductionsToTable(State* state);
bool fullyReducesToNull(ParseRule* rule);
bool reducesToNull(ParseRule* rule);
bool reducesToNull(ParseRule* rule, std::vector<Symbol> avoidList);
bool belongsToFamily(NodeTree<Symbol>* node, std::vector<NodeTree<Symbol>*>* nodes);
bool arePacked(std::vector<NodeTree<Symbol>*> nodes);
bool isPacked(NodeTree<Symbol>* node);
void setPacked(NodeTree<Symbol>* node, bool isPacked);
NodeTree<Symbol>* getNullableParts(ParseRule* rule);
NodeTree<Symbol>* getNullableParts(ParseRule* rule, std::vector<NodeTree<Symbol>*> avoidList);
NodeTree<Symbol>* getNullableParts(Symbol symbol);
std::vector<NodeTree<Symbol>*> getPathEdges(std::vector<NodeTree<int>*> path);
int findLine(int tokenNum); //Get the line number for a token, used for error reporting
std::vector<Symbol> input;
GraphStructuredStack gss;
//start node, lefthand side of the reduction, reduction length
struct Reduction {
NodeTree<int>* from;
Symbol symbol;
int length;
NodeTree<Symbol>* nullableParts;
NodeTree<Symbol>* label;
} ;
std::queue<Reduction> toReduce;
//Node coming from, state going to
std::queue< std::pair<NodeTree<int>*, int> > toShift;
std::vector<std::pair<NodeTree<Symbol>*, int> > SPPFStepNodes;
std::vector<NodeTree<Symbol>*> nullableParts;
std::map<NodeTree<Symbol>, bool> packedMap;
std::map<ParseRule*, bool> reduceToNullMap;
};
#endif

View File

@@ -0,0 +1,29 @@
#ifndef REGEX_H
#define REGEX_H
#include "util.h"
#include "RegExState.h"
#include "Symbol.h"
#include <string>
#include <utility>
#include <stack>
#include <vector>
class RegEx {
public:
RegEx();
RegEx(std::string inPattern);
~RegEx();
RegExState* construct(std::vector<RegExState*>* ending, std::string pattern);
int longMatch(std::string stringToMatch);
std::string getPattern();
std::string toString();
static void test();
private:
std::string pattern;
RegExState* begin;
std::vector<RegExState*> currentStates;
};
#endif

View File

@@ -0,0 +1,32 @@
#ifndef REGEXSTATE_H
#define REGEXSTATE_H
#include "util.h"
#include "Symbol.h"
#include <string>
#include <vector>
class RegExState {
public:
RegExState(char inCharacter);
RegExState();
~RegExState();
void addNext(RegExState* nextState);
bool characterIs(char inCharacter);
std::vector<RegExState*> advance(char advanceCharacter);
std::vector<RegExState*> getNextStates();
bool isGoal();
std::string toString();
std::string toString(RegExState* avoid);
std::string toString(std::vector<RegExState*>* avoid);
char getCharacter();
private:
std::vector<RegExState*> nextStates;
char character;
};
#endif

View File

@@ -0,0 +1,50 @@
#ifndef REMOVALTRANSFORMATION_H
#define REMOVALTRANSFORMATION_H
#include <queue>
#include <vector>
#include "NodeTransformation.h"
template<class T>
class RemovalTransformation: public NodeTransformation<T,T> {
public:
RemovalTransformation(T toRemove);
~RemovalTransformation();
virtual NodeTree<T>* transform(NodeTree<T>* from);
private:
T toRemove;
};
#endif
template<class T>
RemovalTransformation<T>::RemovalTransformation(T toRemove) {
this->toRemove = toRemove;
}
template<class T>
RemovalTransformation<T>::~RemovalTransformation() {
//
}
template<class T>
NodeTree<T>* RemovalTransformation<T>::transform(NodeTree<T>* from) {
std::queue<NodeTree<T>*> toProcess;
toProcess.push(from);
while(!toProcess.empty()) {
NodeTree<T>* node = toProcess.front();
toProcess.pop();
if (!node)
continue;
std::vector<NodeTree<T>*> children = node->getChildren();
for (int i = 0; i < children.size(); i++) {
if (children[i]->getData() == toRemove)
node->removeChild(children[i]);
else if (children[i])
toProcess.push(children[i]);
}
}
return from;
}

View File

@@ -0,0 +1,46 @@
#ifndef STATE_H
#define STATE_H
#ifndef NULL
#define NULL ((void*)0)
#endif
#include "util.h"
#include "ParseRule.h"
#include <vector>
#include <string>
#include <string>
#include <sstream>
class State {
public:
State(int number, ParseRule* basis);
State(int number, ParseRule* basis, State* parent);
~State();
bool const operator==(const State &other);
bool const basisEquals(const State &other);
bool const basisEqualsExceptLookahead(const State &other);
bool const operator!=(const State &other);
std::vector<ParseRule*>* getBasis();
std::vector<ParseRule*>* getRemaining();
std::vector<ParseRule*> getTotal();
bool containsRule(ParseRule* rule);
void addRuleCombineLookahead(ParseRule* rule);
std::string toString();
void combineStates(State &other);
void addParents(std::vector<State*>* parents);
std::vector<State*>* getParents();
std::vector<State*>* getDeepParents(int depth);
int getNumber();
std::vector<ParseRule*> basis;
std::vector<ParseRule*> remaining;
private:
std::vector<State*> parents;
int number;
};
#endif

View File

@@ -0,0 +1,28 @@
#ifndef StringReader_H
#define StringReader_H
#include <vector>
#include <string>
#include <iostream>
class StringReader
{
public:
StringReader();
StringReader(std::string inputString);
virtual ~StringReader();
void setString(std::string inputString);
std::string word(bool truncateEnd = true);
std::string line(bool truncateEnd = true);
std::string getTokens(const char *get_chars, bool truncateEnd = true);
std::string truncateEnd(std::string to_truncate);
static void test();
protected:
private:
std::string rd_string;
int str_pos;
bool end_reached;
};
#endif

View File

@@ -0,0 +1,37 @@
#ifndef SYMBOL_H
#define SYMBOL_H
#ifndef NULL
#define NULL ((void*)0)
#endif
#include "NodeTree.h"
#include <vector>
#include <string>
class Symbol {
public:
Symbol();
Symbol(std::string name, bool isTerminal);
Symbol(std::string name, bool isTerminal, std::string value);
Symbol(std::string name, bool isTerminal, NodeTree<Symbol>* tree);
~Symbol();
bool const operator==(const Symbol &other)const;
bool const operator!=(const Symbol &other)const;
bool const operator<(const Symbol &other)const;
std::string getName() const;
std::string getValue() const;
std::string toString() const;
Symbol clone();
void setSubTree(NodeTree<Symbol>* tree);
NodeTree<Symbol>* getSubTree();
bool isTerminal();
private:
std::string name;
std::string value;
bool terminal;
};
#endif

View File

@@ -0,0 +1,37 @@
#include <fstream>
#include <string>
#include <utility>
#include "util.h"
#include "ParseRule.h"
#include "ParseAction.h"
#include "Symbol.h"
#include "State.h"
#ifndef TABLE_H
#define TABLE_H
class Table {
public:
Table();
~Table();
void exportTable(std::ofstream &file);
void importTable(char* tableData);
void setSymbols(Symbol EOFSymbol, Symbol nullSymbol);
void add(int stateNum, Symbol tranSymbol, ParseAction* action);
void remove(int stateNum, Symbol tranSymbol);
std::vector<ParseAction*>* get(int state, Symbol token);
ParseAction* getShift(int state, Symbol token);
std::vector<std::pair<std::string, ParseAction>> stateAsParseActionVector(int state);
std::string toString();
private:
std::vector< std::vector< std::vector<ParseAction*>* >* > table;
std::vector<Symbol> symbolIndexVec;
//The EOFSymbol, a pointer because of use in table, etc
Symbol EOFSymbol;
//The nullSymbol, ditto with above. Also used in comparisons
Symbol nullSymbol;
};
#endif

View File

@@ -0,0 +1,32 @@
#include <iostream>
#include <string>
#include <stdlib.h>
#include "util.h"
#ifndef TESTER_H
#define TESTER_H
class Tester {
public:
Tester(std::string krakenInvocation, std::string krakenGrammerLocation);
~Tester();
bool run(std::string fileName);
bool compareFiles(std::string file1Path, std::string file2Path);
void cleanExtras(std::string path);
private:
std::string krakenInvocation;
std::string krakenGrammerLocation;
std::string removeCmd;
std::string resultsExtention;
std::string expectedExtention;
std::string krakenExtention;
std::string shell;
std::string changePermissions;
std::string redirect;
std::string sep;
std::string cd;
};
#endif

View File

@@ -0,0 +1,64 @@
#ifndef TYPE_H
#define TYPE_H
#ifndef NULL
#define NULL ((void*)0)
#endif
#include <string>
#include <iostream>
#include <set>
//Circular dependency
class ASTData;
#include "ASTData.h"
#include "util.h"
enum ValueType {none, template_type, template_type_type, void_type, boolean, character, integer, floating, double_percision, function_type };
class Type {
public:
Type();
Type(ValueType typeIn, int indirectionIn = 0);
Type(ValueType typeIn, std::set<std::string> traitsIn); //Mostly for template type type's
Type(NodeTree<ASTData>* typeDefinitionIn, int indirectionIn = 0);
Type(NodeTree<ASTData>* typeDefinitionIn, std::set<std::string> traitsIn);
Type(ValueType typeIn, NodeTree<ASTData>* typeDefinitionIn, int indirectionIn, bool referenceIn, std::set<std::string> traitsIn);
Type(ValueType typeIn, NodeTree<ASTData>* typeDefinitionIn, int indirectionIn, bool referenceIn, std::set<std::string> traitsIn, std::vector<Type*> parameterTypesIn, Type* returnTypeIn);
Type(std::vector<Type*> parameterTypesIn, Type* returnTypeIn, bool referenceIn = false);
Type(ValueType typeIn, NodeTree<Symbol>* templateDefinitionIn, std::set<std::string> traitsIn = std::set<std::string>());
~Type();
const bool test_equality(const Type &other, bool care_about_references) const;
bool const operator==(const Type &other)const;
bool const operator!=(const Type &other)const;
bool const operator<(const Type &other)const;
Type* clone();
std::string toString(bool showTraits = true);
int getIndirection();
void setIndirection(int indirectionIn);
void increaseIndirection();
void decreaseIndirection();
void modifyIndirection(int mod);
Type withIncreasedIndirection();
Type withReference();
Type *withReferencePtr();
Type *withIncreasedIndirectionPtr();
Type withDecreasedIndirection();
Type* withoutReference();
ValueType baseType;
NodeTree<ASTData>* typeDefinition;
NodeTree<Symbol>* templateDefinition;
std::map<std::string, Type*> templateTypeReplacement;
bool templateInstantiated;
std::set<std::string> traits;
std::vector<Type*> parameterTypes;
Type *returnType;
bool is_reference;
private:
int indirection;
};
#endif

View File

@@ -0,0 +1,92 @@
#ifndef UTIL_H
#define UTIL_H
#ifndef NULL
#define NULL ((void*)0)
#endif
#include <iostream>
#include <string>
#include <sstream>
#include <vector>
#include <set>
#include <fstream>
#include <cstring>
int ssystem(std::string command);
std::string intToString(int theInt);
std::string replaceExEscape(std::string first, std::string search, std::string replace);
std::string strSlice(std::string str, int begin, int end);
int findPerenEnd(std::string str, int i);
std::vector<std::string> split(const std::string &str, char delim);
std::string join(const std::vector<std::string> &strVec, std::string joinStr);
std::string readFile(std::istream &file);
std::string padWithSpaces(std::string str, int padTo);
template <typename T, typename U, typename V>
class triple {
public:
T first;
U second;
V third;
};
template <typename T, typename U, typename V>
triple<T,U,V> make_triple(T f, U s, V t) {
triple<T,U,V> out;
out.first = f;
out.second = s;
out.third = t;
return out;
}
template <typename T>
bool contains(std::vector<T> vec, T item) {
for (auto i : vec)
if (i == item)
return true;
return false;
}
template <typename T>
std::vector<T> flatten(std::vector<std::vector<T>> vec) {
std::vector<T> flat;
for (auto i : vec)
flat.insert(flat.end(), i.begin(), i.end());
return flat;
}
template <typename T>
std::vector<T> reverse(std::vector<T> vec) {
std::vector<T> flat;
flat.insert(flat.end(), vec.rbegin(), vec.rend());
return flat;
}
template <typename T>
std::vector<T> dereferenced(std::vector<T*> vec) {
std::vector<T> de;
for (T* i:vec)
de.push_back(*i);
return de;
}
template <typename T>
std::vector<T> slice(std::vector<T> vec, int begin, int end, int step = 1) {
std::vector<T> toReturn;
if (begin < 0)
begin += vec.size()+1;
if (end < 0)
end += vec.size()+1;
for (int i = begin; i < end; i += step)
toReturn.push_back(vec[i]);
return toReturn;
}
template <typename T>
bool subset(std::set<T> a, std::set<T> b) {
for (auto i : a)
if (b.find(i) == b.end())
return false;
return true;
}
#endif

View File

@@ -0,0 +1,190 @@
#include <string>
#include <iostream>
#include <fstream>
#include <vector>
#include <cstring>
#include "NodeTree.h"
#include "Symbol.h"
#include "Lexer.h"
#include "RNGLRParser.h"
#include "Importer.h"
#include "ASTData.h"
#include "CGenerator.h"
#include "Poset.h"
#include "util.h"
#include "Tester.h"
int main(int argc, char* argv[]) {
std::vector<std::string> includePaths;
includePaths.push_back(""); //Local
if (argc <= 1) {
std::cerr << "Kraken invocation: kraken sourceFile.krak" << std::endl;
std::cerr << "Kraken invocation: kraken sourceFile.krak outputName" << std::endl;
std::cerr << "Kraken invocation: kraken grammerFile.kgm sourceFile.krak outputName" << std::endl;
std::cerr << "Or for testing do: kraken --test [optional list of names of file (.krak .expected_results) without extentions to run]" << std::endl;
return 0;
}
std::string grammerFileString = "../krakenGrammer.kgm";
if (argc >= 2 && std::string(argv[1]) == "--test") {
StringReader::test();
RegEx::test();
Lexer::test();
//std::cout << strSlice("123", 0, -1) << std::endl;
Poset<int>::test();
if (argc >= 3) {
std::string testResults, line;
int passed = 0, failed = 0;
Tester test(argv[0], grammerFileString);
// find the max length so we can pad the string and align the results
unsigned int maxLineLength = 0;
for (int i = 2; i < argc; i++) {
int strLen = std::string(argv[i]).length();
maxLineLength = maxLineLength < strLen ? strLen : maxLineLength;
}
for (int i = 2; i < argc; i++) {
bool result = test.run(argv[i]);
if (result)
line = padWithSpaces(std::string(argv[i]), maxLineLength) + "\t\tpassed!\n", passed++;
else
line = padWithSpaces(std::string(argv[i]), maxLineLength) + "\t\tFAILED!!!!\n", failed++;
std::cout << line << std::endl;
testResults += line;
}
std::cout << "===========Done Testing===========" << std::endl;
std::cout << testResults << std::endl;
std::cout << "Test results: " << passed << "/" << passed+failed << std::endl;
}
return 0;
}
std::string krakenDir = argv[0];
krakenDir = strSlice(krakenDir, 0, -(std::string("kraken").length()+1));
includePaths.push_back(krakenDir + "stdlib/"); //Add the stdlib directory that exists in the same directory as the kraken executable to the path
std::string programName;
std::string outputName;
bool parse_only = false;
//std::cout << "argv[1] == " << argv[1] << std::endl;
if (std::string(argv[1]) == "--parse-only") {
parse_only = true;
grammerFileString = argv[2];
programName = argv[3];
//outputName = argv[3];
} else if (argc > 3) {
grammerFileString = argv[1];
programName = argv[2];
outputName = argv[3];
} else if (argc == 3) {
programName = argv[1];
outputName = argv[2];
} else {
programName = argv[1];
outputName = join(slice(split(programName, '.'), 0, -2), "."); // without extension
}
std::ifstream grammerInFile, compiledGrammerInFile;
std::ofstream compiledGrammerOutFile;
grammerInFile.open(grammerFileString);
if (!grammerInFile.is_open()) {
std::cerr << "Problem opening grammerInFile " << grammerFileString << "\n";
return(1);
}
compiledGrammerInFile.open(grammerFileString + ".comp", std::ios::binary | std::ios::ate);
if (!compiledGrammerInFile.is_open())
std::cerr << "Problem opening compiledGrammerInFile " << grammerFileString + ".comp" << "\n";
//Read the input file into a string
std::string grammerInputFileString;
std::string line;
while(grammerInFile.good()) {
getline(grammerInFile, line);
grammerInputFileString.append(line+"\n");
}
grammerInFile.close();
RNGLRParser parser;
parser.loadGrammer(grammerInputFileString);
//Start binary stuff
bool compGramGood = false;
if (compiledGrammerInFile.is_open()) {
//std::cout << "Compiled grammer file exists, reading it in" << std::endl;
std::streampos compGramSize = compiledGrammerInFile.tellg();
char* binaryTablePointer = new char [compGramSize];
compiledGrammerInFile.seekg(0, std::ios::beg);
compiledGrammerInFile.read(binaryTablePointer, compGramSize);
compiledGrammerInFile.close();
//Check magic number
if (binaryTablePointer[0] == 'K' && binaryTablePointer[1] == 'R' && binaryTablePointer[2] == 'A' && binaryTablePointer[3] == 'K') {
//std::cout << "Valid Kraken Compiled Grammer File" << std::endl;
int gramStringLength = *((int*)(binaryTablePointer+4));
//std::cout << "The grammer string is stored to be " << gramStringLength << " characters long, gramString is "
//<< grammerInputFileString.length() << " long. Remember 1 extra for null terminator!" << std::endl;
if (grammerInputFileString.length() != gramStringLength-1 ||
(strncmp(grammerInputFileString.c_str(), (binaryTablePointer+4+sizeof(int)), gramStringLength) != 0)) {
//(one less for null terminator that is stored)
std::cout << "The Grammer has been changed, will re-create" << std::endl;
} else {
compGramGood = true;
//std::cout << "Grammer file is up to date." << std::endl;
parser.importTable(binaryTablePointer + 4 + sizeof(int) + gramStringLength); //Load table starting at the table section
}
} else {
std::cerr << grammerFileString << ".comp is NOT A Valid Kraken Compiled Grammer File, aborting" << std::endl;
return -1;
}
delete [] binaryTablePointer;
}
if (!compGramGood) {
//The load failed because either the file does not exist or it is not up-to-date.
std::cout << "Compiled grammer file does not exist or is not up-to-date, generating table and writing it out" << std::endl;
compiledGrammerOutFile.open(grammerFileString + ".comp", std::ios::binary);
if (!compiledGrammerOutFile.is_open())
std::cerr << "Could not open compiled file to write either!" << std::endl;
compiledGrammerOutFile.write("KRAK", sizeof(char)*4); //Let us know when we load it that this is a kraken grammer file, but don't write out
compiledGrammerOutFile.flush(); // the grammer txt until we create the set, so that if we fail creating it it won't look valid
parser.createStateSet();
int* intBuffer = new int;
*intBuffer = grammerInputFileString.length()+1;
compiledGrammerOutFile.write((char*)intBuffer, sizeof(int));
delete intBuffer;
compiledGrammerOutFile.write(grammerInputFileString.c_str(), grammerInputFileString.length()+1); //Don't forget null terminator
parser.exportTable(compiledGrammerOutFile);
compiledGrammerOutFile.close();
}
//End binary stuff
//std::cout << "\nParsing" << std::endl;
//std::cout << "\toutput name: " << outputName << std::endl;
//std::cout << "\tprogram name: " << programName << std::endl;
Importer importer(&parser, includePaths, outputName, parse_only); // Output name for directory to put stuff in
//for (auto i : includePaths)
//std::cout << i << std::endl;
importer.import(programName);
std::map<std::string, NodeTree<ASTData>*> ASTs = importer.getASTMap();
if (parse_only)
return 0;
//Do optimization, etc. here.
//None at this time, instead going straight to C in this first (more naive) version
//Code generation
//For right now, just C
// return code from calling C compiler
return CGenerator().generateCompSet(ASTs, outputName);
}

View File

@@ -0,0 +1,88 @@
#include "ASTData.h"
ASTData::ASTData() {
this->type = undef;
this->valueType = NULL;
}
ASTData::ASTData(ASTType type, Type *valueType) {
this->type = type;
this->valueType = valueType;
}
ASTData::ASTData(ASTType type, Symbol symbol, Type *valueType) {
this->type = type;
this->valueType = valueType;
this->symbol = symbol;
}
ASTData::~ASTData() {
}
std::string ASTData::toString() {
return ASTTypeToString(type) + " " +
(symbol.isTerminal() ? " " + symbol.toString() : "") + " " +
(valueType ? valueType->toString() : "no_type");
}
std::string ASTData::ASTTypeToString(ASTType type) {
switch (type) {
case translation_unit:
return "translation_unit";
case identifier:
return "identifier";
case import:
return "import";
case function:
return "function";
case type_def:
return "type_def";
case code_block:
return "code_block";
case typed_parameter:
return "typed_parameter";
case expression:
return "expression";
case boolean_expression:
return "boolean_expression";
case statement:
return "statement";
case if_statement:
return "if_statement";
case while_loop:
return "while_loop";
case for_loop:
return "for_loop";
case return_statement:
return "return_statement";
case break_statement:
return "break_statement";
case continue_statement:
return "continue_statement";
case defer_statement:
return "defer_statement";
case assignment_statement:
return "assignment_statement";
case declaration_statement:
return "declaration_statement";
case if_comp:
return "if_comp";
case simple_passthrough:
return "simple_passthrough";
case passthrough_params:
return "passthrough_params";
case in_passthrough_params:
return "out_passthrough_params";
case param_assign:
return "param_assign";
case opt_string:
return "opt_string";
case function_call:
return "function_call";
case value:
return "value";
default:
return "unknown_ASTType";
}
}

File diff suppressed because it is too large Load Diff

View File

@@ -0,0 +1,45 @@
#include "CCodeTriple.h"
CCodeTriple::CCodeTriple(std::string pre, std::string val, std::string post) {
preValue = pre;
value = val;
postValue = post;
}
CCodeTriple::CCodeTriple(std::string val) {
value = val;
}
CCodeTriple::CCodeTriple(const char* val) {
value = val;
}
CCodeTriple::CCodeTriple() {
}
CCodeTriple::~CCodeTriple() {
}
std::string CCodeTriple::oneString(bool endValue) {
return preValue + value + (endValue ? ";" : "") + postValue;
}
CCodeTriple & CCodeTriple::operator=(const CCodeTriple &rhs) {
preValue = rhs.preValue;
value = rhs.value;
postValue = rhs.postValue;
return *this;
}
CCodeTriple & CCodeTriple::operator+=(const CCodeTriple &rhs) {
preValue += rhs.preValue;
//preValue = rhs.preValue + preValue;
value += rhs.value;
postValue = rhs.postValue + postValue;
return *this;
}
CCodeTriple operator+(const CCodeTriple &a, const CCodeTriple &b) {
return CCodeTriple(a.preValue + b.preValue, a.value + b.value, b.postValue + a.postValue);
//return CCodeTriple(b.preValue + a.preValue, a.value + b.value, b.postValue + a.postValue);
}

File diff suppressed because it is too large Load Diff

View File

@@ -0,0 +1,145 @@
#include "GraphStructuredStack.h"
GraphStructuredStack::GraphStructuredStack() {
//
}
GraphStructuredStack::~GraphStructuredStack() {
//
}
NodeTree<int>* GraphStructuredStack::newNode(int stateNum) {
return new NodeTree<int>("gssNode", stateNum);
}
void GraphStructuredStack::addToFrontier(int frontier, NodeTree<int>* node) {
//First, make sure our vector has this and lesser frontiers. If not, add it and up to it
while (gss.size() <= frontier) {
gss.push_back(new std::vector<NodeTree<int>*>());
}
gss[frontier]->push_back(node);
containing_frontier_map[node] = frontier;
}
NodeTree<int>* GraphStructuredStack::inFrontier(int frontier, int state) {
if (frontierIsEmpty(frontier))
return NULL;
for (std::vector<NodeTree<int>*>::size_type i = 0; i < gss[frontier]->size(); i++) {
if ((*(gss[frontier]))[i]->getData() == state)
return (*(gss[frontier]))[i];
}
return NULL;
}
int GraphStructuredStack::getContainingFrontier(NodeTree<int>* node) {
auto iter = containing_frontier_map.find(node);
if (iter != containing_frontier_map.end())
return iter->second;
return -1;
//for (std::vector<std::vector<NodeTree<int>*>*>::size_type i = 0; i < gss.size(); i++) {
//if (frontierIsEmpty(i))
//continue;
//for (std::vector<NodeTree<int>*>::size_type j = 0; j < gss[i]->size(); j++) {
//if ((*(gss[i]))[j] == node)
//return i;
//}
//}
//return -1;
}
bool GraphStructuredStack::frontierIsEmpty(int frontier) {
return frontier >= gss.size() || gss[frontier]->size() == 0;
}
NodeTree<int>* GraphStructuredStack::frontierGetAccState(int frontier) {
//The acc state is always state 1, for now
return inFrontier(frontier, 1);
}
std::vector<NodeTree<int>*>* GraphStructuredStack::getReachable(NodeTree<int>* start, int length) {
std::vector<NodeTree<int>*>* reachableList = new std::vector<NodeTree<int>*>();
std::queue<NodeTree<int>*> currentNodes;
std::queue<NodeTree<int>*> nextNodes;
currentNodes.push(start);
for (int i = 0; i < length; i++) {
while (!currentNodes.empty()) {
NodeTree<int>* currentNode = currentNodes.front();
currentNodes.pop();
std::vector<NodeTree<int>*> children = currentNode->getChildren();
//std::cout << currentNode->getData() << " has children ";
for (std::vector<NodeTree<int>*>::size_type j = 0; j < children.size(); j++) {
std::cout << children[j]->getData() << " ";
nextNodes.push(children[j]);
}
std::cout << std::endl;
}
currentNodes = nextNodes;
//No clear function, so go through and remove
while(!nextNodes.empty())
nextNodes.pop();
}
while (!currentNodes.empty()) {
reachableList->push_back(currentNodes.front());
//std::cout << currentNodes.front()->getData() << " is reachable from " << start->getData() << " by length " << length << std::endl;
currentNodes.pop();
}
return reachableList;
}
std::vector<std::vector<NodeTree<int>*> >* GraphStructuredStack::getReachablePaths(NodeTree<int>* start, int length) {
std::vector<std::vector<NodeTree<int>*> >* paths = new std::vector<std::vector<NodeTree<int>*> >();
std::vector<NodeTree<int>*> currentPath;
recursivePathFind(start, length, currentPath, paths);
return paths;
}
void GraphStructuredStack::recursivePathFind(NodeTree<int>* start, int length, std::vector<NodeTree<int>*> currentPath, std::vector<std::vector<NodeTree<int>*> >* paths) {
currentPath.push_back(start);
if (length == 0) {
paths->push_back(currentPath);
return;
}
std::vector<NodeTree<int>*> children = start->getChildren();
for (std::vector<NodeTree<int>*>::size_type i = 0; i < children.size(); i++) {
recursivePathFind(children[i], length-1, currentPath, paths);
}
}
bool GraphStructuredStack::hasEdge(NodeTree<int>* start, NodeTree<int>* end) {
//Really, either testing for parent or child should work.
return start->findChild(end) != -1;
}
NodeTree<Symbol>* GraphStructuredStack::getEdge(NodeTree<int>* start, NodeTree<int>* end) {
return edges[std::make_pair(start, end)];
}
void GraphStructuredStack::addEdge(NodeTree<int>* start, NodeTree<int>* end, NodeTree<Symbol>* edge) {
start->addChild(end);
end->addParent(start);
edges[std::make_pair(start, end)] = edge;
}
std::vector<int> GraphStructuredStack::getFrontier(int frontier) {
std::vector<int> toReturn;
for (int i = 0; i < gss[frontier]->size(); i++)
toReturn.push_back((*(gss[frontier]))[i]->getData());
return toReturn;
}
std::string GraphStructuredStack::toString() {
std::string tostring = "";
for (std::vector<std::vector<NodeTree<int>*>*>::size_type i = 0; i < gss.size(); i++) {
tostring += "Frontier: " + intToString(i) + "\n";
for (std::vector<NodeTree<int>*>::size_type j = 0; j < gss[i]->size(); j++) {
tostring += "|" + intToString((*(gss[i]))[j]->getData()) + "| ";
}
tostring += "\n";
}
return tostring;
}
void GraphStructuredStack::clear() {
gss.clear();
edges.clear();
}

View File

@@ -0,0 +1,238 @@
#include "Importer.h"
#ifdef _WIN32
#include <unistd.h>
#define mkdir( A, B ) mkdir(A)
#endif
Importer::Importer(Parser* parserIn, std::vector<std::string> includePaths, std::string outputNameIn, bool only_parseIn) {
only_parse = only_parseIn;
//constructor
outputName = outputNameIn;
if (!only_parse) {
if (mkdir(("./" + outputName).c_str(), 0755)) {
//std::cerr << "\n\n =====IMPORTER===== \n\n" << std::endl;
//std::cerr << "Could not make directory " << outputName << std::endl;
}
}
parser = parserIn;
this->includePaths = includePaths;
ASTTransformer = new ASTTransformation(this);
removeSymbols.push_back(Symbol("$NULL$", true));
removeSymbols.push_back(Symbol("WS", false));
removeSymbols.push_back(Symbol("\\(", true));
removeSymbols.push_back(Symbol("\\)", true));
removeSymbols.push_back(Symbol("var", true));
removeSymbols.push_back(Symbol("fun", true));
removeSymbols.push_back(Symbol(";", true));
removeSymbols.push_back(Symbol("line_end", false));
removeSymbols.push_back(Symbol("{", true));
removeSymbols.push_back(Symbol("}", true));
removeSymbols.push_back(Symbol("(", true));
removeSymbols.push_back(Symbol(")", true));
//removeSymbols.push_back(Symbol("import", true));
removeSymbols.push_back(Symbol("if", true));
removeSymbols.push_back(Symbol("while", true));
removeSymbols.push_back(Symbol("__if_comp__", true));
//removeSymbols.push_back(Symbol("simple_passthrough", true));
removeSymbols.push_back(Symbol("comp_simple_passthrough", true));
removeSymbols.push_back(Symbol("def_nonterm", false));
removeSymbols.push_back(Symbol("obj_nonterm", false));
removeSymbols.push_back(Symbol("adt_nonterm", false));
removeSymbols.push_back(Symbol("template", true));
removeSymbols.push_back(Symbol("\\|", true));
//removeSymbols.push_back(Symbol("match", true));
collapseSymbols.push_back(Symbol("case_statement_list", false));
collapseSymbols.push_back(Symbol("opt_param_assign_list", false));
collapseSymbols.push_back(Symbol("param_assign_list", false));
collapseSymbols.push_back(Symbol("opt_typed_parameter_list", false));
collapseSymbols.push_back(Symbol("opt_parameter_list", false));
collapseSymbols.push_back(Symbol("identifier_list", false));
collapseSymbols.push_back(Symbol("adt_option_list", false));
collapseSymbols.push_back(Symbol("statement_list", false));
collapseSymbols.push_back(Symbol("parameter_list", false));
collapseSymbols.push_back(Symbol("typed_parameter_list", false));
collapseSymbols.push_back(Symbol("unorderd_list_part", false));
collapseSymbols.push_back(Symbol("if_comp_pred", false));
collapseSymbols.push_back(Symbol("declaration_block", false));
collapseSymbols.push_back(Symbol("type_list", false));
collapseSymbols.push_back(Symbol("opt_type_list", false));
collapseSymbols.push_back(Symbol("template_param_list", false));
collapseSymbols.push_back(Symbol("trait_list", false));
collapseSymbols.push_back(Symbol("dec_type", false));
//collapseSymbols.push_back(Symbol("pre_reffed", false));
}
Importer::~Importer() {
//destructor
delete ASTTransformer;
}
void Importer::registerAST(std::string name, NodeTree<ASTData>* ast, NodeTree<Symbol>* syntaxTree) {
imported[name] = ast;
importedTrips.push_back({name, ast, syntaxTree});
std::cout << "REGISTERD " << name << std::endl;
}
NodeTree<ASTData>* Importer::getUnit(std::string fileName) {
//std::cout << "\n\nImporting " << fileName << " ";
//Check to see if we've already done it
if (imported.find(fileName) != imported.end()) {
//std::cout << "Already Imported!" << std::endl;
return imported[fileName];
}
//std::cout << "Not yet imported" << std::endl;
return NULL;
}
NodeTree<ASTData>* Importer::importFirstPass(std::string fileName) {
NodeTree<ASTData>* ast = getUnit(fileName);
if (ast == NULL) {
NodeTree<Symbol>* parseTree = parseAndTrim(fileName);
if (!parseTree)
return NULL;
//Call with ourself to allow the transformation to call us to import files that it needs
if (!only_parse)
ast = ASTTransformer->firstPass(fileName, parseTree); //This firstPass will register itself
}
return ast;
}
void Importer::import(std::string fileName) {
//Start the ball rolling by importing and running the first pass on the first file.
//This will import, first pass and register all the other files too.
//std::cout << "\n\n =====FIRST PASS===== \n\n" << std::endl;
importFirstPass(fileName); //First pass defines all objects
if (only_parse)
return;
std::cout << "\n\n =====SECOND PASS===== \n\n" << std::endl;
for (importTriplet i : importedTrips) //Second pass defines data inside objects, outside declaration statements,
std::cout << "\n\nSecond pass for: " << i.name << std::endl, ASTTransformer->secondPass(i.ast, i.syntaxTree); //function prototypes, and identifiers (as we now have all type defs)
std::cout << "\n\n =====THIRD PASS===== \n\n" << std::endl;
for (importTriplet i : importedTrips) //Third pass does all function bodies
std::cout << "\n\nThird pass for: " << i.name << std::endl, ASTTransformer->thirdPass(i.ast, i.syntaxTree);
std::cout << "\n\n =====FOURTH PASS===== \n\n" << std::endl;
bool changed = true;
while (changed) {
changed = false;
for (importTriplet i : importedTrips) { //Fourth pass finishes up by doing all template classes
std::cout << "\n\nFourth pass for: " << i.name << std::endl;
changed = changed ? changed : ASTTransformer->fourthPass(i.ast, i.syntaxTree);
}
}
//Note that class template instantiation can happen in the second or third passes and that function template instantion
//can happen in the third pass.
std::ofstream outFileAST;
for (importTriplet i : importedTrips) {
std::string outputFileName = outputName + "/" + i.name + "out";
outFileAST.open((outputFileName + ".AST.dot").c_str());
if (!outFileAST.is_open()) {
std::cout << "Problem opening second output file " << outputFileName + ".AST.dot" << "\n";
return;
}
if (i.ast) {
//outFileAST << i.ast->DOTGraphString() << std::endl;
} else {
std::cout << "Tree returned from ASTTransformation for " << fileName << " is NULL!" << std::endl;
}
outFileAST.close();
}
}
NodeTree<Symbol>* Importer::parseAndTrim(std::string fileName) {
std::ifstream programInFile;
//std::ofstream outFile, outFileTransformed;
//std::cout << "outputName " << outputName << std::endl;
//std::cout << "fileName " << fileName << std::endl;
auto pathPieces = split(fileName, '/');
std::string outputFileName = outputName + "/" + pathPieces[pathPieces.size()-1] + "out";
//std::cout << "outputFileName " << outputFileName << std::endl;
std::string inputFileName;
for (auto i : includePaths) {
programInFile.open(i+fileName);
if (programInFile.is_open()) {
inputFileName = i+fileName;
break;
} else {
std::cout << i+fileName << " is no good" << std::endl;
}
}
if (!programInFile.is_open()) {
std::cout << "Problem opening programInFile " << fileName << "\n";
return NULL;
}
//outFile.open(outputFileName);
//if (!outFile.is_open()) {
//std::cout << "Probelm opening output file " << outputFileName << "\n";
//return NULL;
//}
//outFileTransformed.open((outputFileName + ".transformed.dot").c_str());
//if (!outFileTransformed.is_open()) {
//std::cout << "Probelm opening second output file " << outputFileName + ".transformed.dot" << "\n";
//return NULL;
//}
std::string programInputFileString, line;
while(programInFile.good()) {
getline(programInFile, line);
programInputFileString.append(line+"\n");
}
programInFile.close();
//std::cout << programInputFileString << std::endl;
NodeTree<Symbol>* parseTree = parser->parseInput(programInputFileString, inputFileName, !only_parse);
if (parseTree) {
//std::cout << parseTree->DOTGraphString() << std::endl;
//outFile << parseTree->DOTGraphString() << std::endl;
} else {
std::cout << "ParseTree returned from parser for " << fileName << " is NULL!" << std::endl;
//outFile.close(); outFileTransformed.close();
throw "unexceptablblllll";
return NULL;
}
if (only_parse)
return parseTree;
//outFile.close();
//Remove Transformations
for (int i = 0; i < removeSymbols.size(); i++)
parseTree = RemovalTransformation<Symbol>(removeSymbols[i]).transform(parseTree);
//Collapse Transformations
for (int i = 0; i < collapseSymbols.size(); i++)
parseTree = CollapseTransformation<Symbol>(collapseSymbols[i]).transform(parseTree);
if (parseTree) {
//outFileTransformed << parseTree->DOTGraphString() << std::endl;
} else {
std::cout << "Tree returned from transformation is NULL!" << std::endl;
}
//outFileTransformed.close();
std::cout << "Returning parse tree" << std::endl;
return parseTree;
}
std::map<std::string, NodeTree<ASTData>*> Importer::getASTMap() {
return imported;
}

View File

@@ -0,0 +1,120 @@
#include "Lexer.h"
#include <cassert>
Lexer::Lexer() {
//Do nothing
currentPosition = 0;
}
Lexer::Lexer(std::string inputString) {
input = inputString;
currentPosition = 0;
}
Lexer::~Lexer() {
//No cleanup necessary
}
void Lexer::setInput(std::string inputString) {
input = inputString;
}
void Lexer::addRegEx(std::string regExString) {
regExs.push_back(new RegEx(regExString));
}
Symbol Lexer::next() {
//std::cout << "Current at is \"" << input.substr(currentPosition) << "\" currentPos is " << currentPosition << " out of " << input.length() <<std::endl;
//If we're at the end, return an eof
if (currentPosition >= input.length())
return Symbol("$EOF$", true);
int longestMatch = -1;
RegEx* longestRegEx = NULL;
std::string remainingString = input.substr(currentPosition);
for (std::vector<RegEx*>::size_type i = 0; i < regExs.size(); i++) {
//std::cout << "Trying regex " << regExs[i]->getPattern() << std::endl;
int currentMatch = regExs[i]->longMatch(remainingString);
if (currentMatch > longestMatch) {
longestMatch = currentMatch;
longestRegEx = regExs[i];
}
}
if (longestRegEx != NULL) {
std::string eatenString = input.substr(currentPosition, longestMatch);
currentPosition += longestMatch;
//std::cout << "Current at is \"" << input.substr(currentPosition) << "\" currentPos is " << currentPosition <<std::endl;
return Symbol(longestRegEx->getPattern(), true, eatenString);
} else {
// std::cout << "Found no applicable regex" << std::endl;
// std::cout << "Remaining is ||" << input.substr(currentPosition) << "||" << std::endl;
return Symbol("$INVALID$", true);
}
}
void Lexer::test() {
Symbol s;
{
Lexer lex;
lex.addRegEx("b");
lex.setInput("bb");
s = lex.next();
assert(s.getName() == "b" && s.getValue() == "b");
s = lex.next();
assert(s.getName() == "b" && s.getValue() == "b");
assert(lex.next() == Symbol("$EOF$", true));
}
{
Lexer lex;
lex.addRegEx("a*");
lex.addRegEx("b");
lex.setInput("aaabaabb");
s = lex.next();
assert(s.getName() == "a*" && s.getValue() == "aaa");
s = lex.next();
assert(s.getName() == "b" && s.getValue() == "b");
s = lex.next();
assert(s.getName() == "a*" && s.getValue() == "aa");
s = lex.next();
assert(s.getName() == "b" && s.getValue() == "b");
s = lex.next();
assert(s.getName() == "b" && s.getValue() == "b");
assert(lex.next() == Symbol("$EOF$", true));
}
// Test a lexer error condition.
{
Lexer lex;
lex.addRegEx("a|b");
lex.setInput("blah");
s = lex.next();
assert(s.getName() == "a|b" && s.getValue() == "b");
assert(lex.next() == Symbol("$INVALID$", true));
}
// Lexer can consume all the input at once.
{
Lexer lex;
lex.addRegEx("xyzzy");
lex.setInput("xyzzy");
s = lex.next();
assert(s.getName() == "xyzzy" && s.getValue() == "xyzzy");
assert(lex.next() == Symbol("$EOF$", true));
}
// Lexer produces the longest match, not the first.
{
Lexer lex;
lex.addRegEx("int");
lex.addRegEx("(i|n|t|e)+");
lex.setInput("intent");
s = lex.next();
assert(s.getName() == "(i|n|t|e)+" && s.getValue() == "intent");
}
std::cout << "Lexer tests passed\n";
}
void Lexer::reset() {
currentPosition = 0;
}

View File

@@ -0,0 +1,79 @@
#include "ParseAction.h"
ParseAction::ParseAction(ActionType action) {
this->action = action;
this->reduceRule = NULL;
this->shiftState = -1;
}
ParseAction::ParseAction(ActionType action, ParseRule* reduceRule) {
this->action = action;
this->reduceRule = reduceRule;
this->shiftState = -1;
}
ParseAction::ParseAction(ActionType action, int shiftState) {
this->action = action;
this->reduceRule = NULL;
this->shiftState = shiftState;
}
ParseAction::~ParseAction() {
}
const bool ParseAction::equalsExceptLookahead(const ParseAction &other) const {
return( action == other.action && ( reduceRule == other.reduceRule || reduceRule->equalsExceptLookahead(*(other.reduceRule)) ) && shiftState == other.shiftState);
}
const bool ParseAction::operator==(const ParseAction &other) const {
return( action == other.action && ( reduceRule == other.reduceRule || *reduceRule == *(other.reduceRule) ) && shiftState == other.shiftState);
}
const bool ParseAction::operator!=(const ParseAction &other) const {
return !(this->operator==(other));
}
//Exists so we can put ParseActions into sets
const bool ParseAction::operator<(const ParseAction &other) const {
if (action != other.action)
return action < other.action;
if (reduceRule != other.reduceRule) {
if (! (reduceRule && other.reduceRule)) {
return reduceRule < other.reduceRule;
} else {
return *reduceRule < *(other.reduceRule);
}
}
return shiftState < other.shiftState;
}
std::string ParseAction::actionToString(ActionType action) {
switch (action) {
case REDUCE:
return "reduce";
break;
case SHIFT:
return "shift";
break;
case ACCEPT:
return "accept";
break;
case REJECT:
return "reject";
break;
default:
return "INVALID PARSE ACTION";
}
}
std::string ParseAction::toString(bool printRuleLookahead) {
std::string outputString = "";
outputString += actionToString(action);
if (reduceRule != NULL)
outputString += " " + reduceRule->toString(printRuleLookahead);
if (shiftState != -1)
outputString += " " + intToString(shiftState);
return(outputString);
}

View File

@@ -0,0 +1,145 @@
#include "ParseRule.h"
ParseRule::ParseRule() {
pointerIndex = 0;
}
ParseRule::ParseRule(Symbol leftHandle, int pointerIndex, std::vector<Symbol> &rightSide, std::vector<Symbol> lookahead) {
this->leftHandle = leftHandle;
this->pointerIndex = pointerIndex;
this->rightSide = rightSide;
this->lookahead = lookahead;
}
ParseRule::~ParseRule() {
}
const bool ParseRule::equalsExceptLookahead(const ParseRule &other) const {
return(leftHandle == other.leftHandle && rightSide == other.rightSide && pointerIndex == other.pointerIndex);
}
const bool ParseRule::operator==(const ParseRule &other) const {
return(equalsExceptLookahead(other) && (lookahead == other.lookahead));
}
const bool ParseRule::operator!=(const ParseRule &other) const {
return !(this->operator==(other));
}
const bool ParseRule::operator<(const ParseRule &other) const {
//Used for ordering so we can put ParseRule's in sets, and also so that ParseActions will have an ordering
if (leftHandle != other.leftHandle)
return leftHandle < other.leftHandle;
if (rightSide != other.rightSide)
return rightSide < other.rightSide;
if (lookahead != other.lookahead) {
return lookahead < other.lookahead;
}
return false;
}
ParseRule* ParseRule::clone() {
return( new ParseRule(leftHandle, pointerIndex, rightSide, lookahead) );
}
void ParseRule::setLeftHandle(Symbol leftHandle) {
this->leftHandle = leftHandle;
}
void ParseRule::appendToRight(Symbol appendee) {
rightSide.push_back(appendee);
}
Symbol ParseRule::getLeftSide() {
return leftHandle;
}
void ParseRule::setRightSide(std::vector<Symbol> rightSide) {
this->rightSide = rightSide;
}
std::vector<Symbol> ParseRule::getRightSide() {
return rightSide;
}
Symbol ParseRule::getAtNextIndex() {
if (pointerIndex >= rightSide.size())
return Symbol();
return rightSide[pointerIndex];
}
Symbol ParseRule::getAtIndex() {
if (pointerIndex < 1)
return Symbol();
return rightSide[pointerIndex-1];
}
int ParseRule::getRightSize() {
return rightSide.size();
}
int ParseRule::getIndex() {
return pointerIndex;
}
bool ParseRule::advancePointer() {
if (pointerIndex < rightSide.size()) {
pointerIndex++;
return true;
}
return false;
}
bool ParseRule::isAtEnd() {
return pointerIndex == rightSide.size();
}
void ParseRule::setLookahead(std::vector<Symbol> lookahead) {
this->lookahead = lookahead;
}
void ParseRule::addLookahead(std::vector<Symbol> lookahead) {
for (std::vector<Symbol>::size_type i = 0; i < lookahead.size(); i++) {
bool alreadyIn = false;
for (std::vector<Symbol>::size_type j = 0; j < this->lookahead.size(); j++) {
if (lookahead[i] == this->lookahead[j]) {
alreadyIn = true;
break;
}
}
if (!alreadyIn)
this->lookahead.push_back(lookahead[i]);
}
}
std::vector<Symbol> ParseRule::getLookahead() {
return lookahead;
}
std::string ParseRule::toString(bool printLookahead) {
std::string concat = leftHandle.toString() + " -> ";
for (int i = 0; i < rightSide.size(); i++) {
if (i == pointerIndex)
concat += "(*) ";
concat += rightSide[i].toString() + " ";
}
if (pointerIndex >= rightSide.size())
concat += "(*)";
if (printLookahead && lookahead.size()) {
concat += "**";
for (std::vector<Symbol>::size_type i = 0; i < lookahead.size(); i++)
concat += lookahead[i].toString();
concat += "**";
}
return(concat);
}
std::string ParseRule::toDOT() {
std::string concat = "";
for (int i = 0; i < rightSide.size(); i++) {
concat += leftHandle.toString() + " -> " + rightSide[i].toString() + ";\n";
}
return(concat);
}

View File

@@ -0,0 +1,407 @@
#include "Parser.h"
Parser::Parser() : EOFSymbol("$EOF$", true), nullSymbol("$NULL$", true), invalidSymbol("$INVALID$", true){
table.setSymbols(EOFSymbol, nullSymbol);
}
Parser::~Parser() {
}
void Parser::exportTable(std::ofstream &file) {
//Do table
table.exportTable(file);
}
void Parser::importTable(char* tableData) {
//Do table
table.importTable(tableData);
return;
}
Symbol Parser::getOrAddSymbol(std::string symbolString, bool isTerminal) {
Symbol symbol;
std::pair<std::string, bool> entry = std::make_pair(symbolString, isTerminal);
if (symbols.find(entry) == symbols.end()) {
symbol = Symbol(symbolString, isTerminal);
symbols[entry] = symbol;
} else {
symbol = symbols[entry];
}
return(symbol);
}
void Parser::loadGrammer(std::string grammerInputString) {
reader.setString(grammerInputString);
std::string currToken = reader.word(false); //Don't truncate so we can find the newline correctly (needed for comments)
while(currToken != "") {
//First, if this starts with a '#', skip this
if (currToken.front() == '#') {
//If this line is more than one token long, eat it
//std::cout << "Ate: " << currToken << std::endl;
if (currToken.back() != '\n') {
std::string ate = reader.line();
//std::cout << "Eating " << ate << " b/c grammer comment" << std::endl;
}
currToken = reader.word(false);
continue;
}
if (currToken.back() == '\n' || currToken.back() == ' ' || currToken.back() == '\t')
currToken.erase(currToken.size()-1);
//Load the left of the rule
ParseRule* currentRule = new ParseRule();
Symbol leftSide = getOrAddSymbol(currToken, false); //Left handle is never a terminal
currentRule->setLeftHandle(leftSide);
reader.word(); //Remove the =
//Add the right side, adding Symbols to symbol map.
currToken = reader.word();
while (currToken != ";") {
//If there are multiple endings to this rule, finish this rule and start a new one with same left handle
while (currToken == "|") {
//If we haven't added anything, that means that this is a null rule
if (currentRule->getRightSide().size() == 0)
currentRule->appendToRight(nullSymbol);
loadedGrammer.push_back(currentRule);
currentRule = new ParseRule();
currentRule->setLeftHandle(leftSide);
currToken = reader.word();
}
if (currToken == ";")
break;
if (currToken[0] == '\"') {
//Remove the quotes
currToken = currToken.substr(1,currToken.length()-2);
lexer.addRegEx(currToken);
currentRule->appendToRight(getOrAddSymbol(currToken, true)); //If first character is a ", then is a terminal
} else {
currentRule->appendToRight(getOrAddSymbol(currToken, false));
}
currToken = reader.word();
}
//Add new rule to grammer
//If we haven't added anything, that means that this is a null rule
if (currentRule->getRightSide().size() == 0)
currentRule->appendToRight(nullSymbol);
loadedGrammer.push_back(currentRule);
//Get next token
currToken = reader.word(false);
}
//std::cout << "Parsed!\n";
// for (std::vector<ParseRule*>::size_type i = 0; i < loadedGrammer.size(); i++)
// std::cout << loadedGrammer[i]->toString() << std::endl;
}
void Parser::createStateSet() {
std::cout << "Begining creation of stateSet" << std::endl;
//First state has no parents
//Set the first state's basis to be the goal rule with lookahead EOF
ParseRule* goalRule = loadedGrammer[0]->clone();
std::vector<Symbol> goalRuleLookahead;
goalRuleLookahead.push_back(EOFSymbol);
goalRule->setLookahead(goalRuleLookahead);
State* zeroState = new State(0, goalRule);
stateSets.push_back(zeroState);
std::queue<State*> toDo;
toDo.push(zeroState);
//std::cout << "Begining for main set for loop" << std::endl;
int count = 0;
while (toDo.size()) {
if (count % 200 == 0)
std::cout << "while count: " << count << std::endl;
count++;
//closure
closure(toDo.front());
//Add the new states
addStates(&stateSets, toDo.front(), &toDo);
toDo.pop();
}
table.remove(1, EOFSymbol);
}
int Parser::stateNum(State* state) {
for (std::vector<State*>::size_type i = 0; i < stateSets.size(); i++) {
if (*(stateSets[i]) == *state) {
return i;
}
}
return -1;
}
std::vector<Symbol> Parser::firstSet(Symbol token, std::vector<Symbol> avoidList, bool addNewTokens) {
if (tokenFirstSet.find(token) != tokenFirstSet.end())
return tokenFirstSet[token];
//If we've already done this token, don't do it again
for (std::vector<Symbol>::size_type i = 0; i < avoidList.size(); i++)
if (avoidList[i] == token)
return std::vector<Symbol>();
avoidList.push_back(token);
std::vector<Symbol> first;
//First, if the symbol is a terminal, than it's first set is just itself.
if (token.isTerminal()) {
first.push_back(token);
return(first);
}
//Otherwise....
//Ok, to make a first set, go through the grammer, if the token it's left side, add it's production's first token's first set.
//If that one includes mull, do the next one too (if it exists).
Symbol rightToken;
std::vector<Symbol> recursiveFirstSet;
for (std::vector<ParseRule*>::size_type i = 0; i < loadedGrammer.size(); i++) {
if (token == loadedGrammer[i]->getLeftSide()) {
//Loop through the rule adding first sets for each token if the previous token contained NULL
int j = 0;
do {
rightToken = loadedGrammer[i]->getRightSide()[j]; //Get token of the right side of this rule
if (rightToken.isTerminal()) {
recursiveFirstSet.push_back(rightToken);
} else {
//Add the entire set
recursiveFirstSet = firstSet(rightToken, avoidList, false);//Don't add children to cache, as early termination may cause them to be incomplete
}
first.insert(first.end(), recursiveFirstSet.begin(), recursiveFirstSet.end());
j++;
} while (isNullable(rightToken) && loadedGrammer[i]->getRightSide().size() > j);
}
}
if (addNewTokens)
tokenFirstSet[token] = first;
return(first);
}
bool Parser::isNullable(Symbol token) {
if (tokenNullable.find(token) != tokenNullable.end())
return tokenNullable[token];
bool nullable = isNullableHelper(token, std::set<Symbol>());
tokenNullable[token] = nullable;
return nullable;
}
//We use this helper function to recurse because it is possible to wind up with loops, and if so we want
//early termination. However, this means that nullable determinations in the middle of the loop are inaccurate
//(since we terminated early), so we don't want to save them. Thus, for simplicity, only the main method will
//add to the cache. This is somewhat unfortunate for preformance, but the necessary additions to keep track of
//invalidated state are more complicated than it's worth.
bool Parser::isNullableHelper(Symbol token, std::set<Symbol> done) {
if (token.isTerminal())
return token == nullSymbol;
if (done.find(token) != done.end())
return false;
done.insert(token);
if (tokenNullable.find(token) != tokenNullable.end())
return tokenNullable[token];
for (std::vector<ParseRule*>::size_type i = 0; i < loadedGrammer.size(); i++) {
if (token == loadedGrammer[i]->getLeftSide()) {
auto rightSide = loadedGrammer[i]->getRightSide();
bool ruleNullable = true;
for (int j = 0; j < rightSide.size(); j++) {
if (!isNullableHelper(rightSide[j], done)) {
ruleNullable = false;
break;
}
}
if (ruleNullable)
return true;
}
}
return false;
}
//Return the correct lookahead. This followSet is built based on the current rule's lookahead if at end, or the next Symbol's first set.
std::vector<Symbol> Parser::incrementiveFollowSet(ParseRule* rule) {
//Advance the pointer past the current Symbol (the one we want the followset for) to the next symbol (which might be in our follow set, or might be the end)
rule = rule->clone();
rule->advancePointer();
//Get the first set of the next Symbol. If it contains nullSymbol, keep doing for the next one
std::vector<Symbol> followSet;
std::vector<Symbol> symbolFirstSet;
bool symbolFirstSetHasNull = true;
while (symbolFirstSetHasNull && !rule->isAtEnd()) {
symbolFirstSetHasNull = false;
symbolFirstSet = firstSet(rule->getAtNextIndex());
for (std::vector<Symbol>::size_type i = 0; i < symbolFirstSet.size(); i++) {
if (symbolFirstSet[i] == nullSymbol) {
symbolFirstSetHasNull = true;
symbolFirstSet.erase(symbolFirstSet.begin()+i);
break;
}
}
followSet.insert(followSet.end(), symbolFirstSet.begin(), symbolFirstSet.end());
rule->advancePointer();
}
if (rule->isAtEnd()) {
symbolFirstSet = rule->getLookahead();
followSet.insert(followSet.end(), symbolFirstSet.begin(), symbolFirstSet.end());
}
std::vector<Symbol> followSetReturn;
for (std::vector<Symbol>::size_type i = 0; i < followSet.size(); i++) {
bool alreadyIn = false;
for (std::vector<Symbol>::size_type j = 0; j < followSetReturn.size(); j++)
if (followSet[i] == followSetReturn[j]) {
alreadyIn = true;
break;
}
if (!alreadyIn)
followSetReturn.push_back(followSet[i]);
}
delete rule;
return followSetReturn;
}
void Parser::closure(State* state) {
//Add all the applicable rules.
//std::cout << "Closure on " << state->toString() << " is" << std::endl;
std::vector<ParseRule*> stateTotal = state->getTotal();
for (std::vector<ParseRule*>::size_type i = 0; i < stateTotal.size(); i++) {
ParseRule* currentStateRule = stateTotal[i];
//If it's at it's end, move on. We can't advance it.
if(currentStateRule->isAtEnd())
continue;
for (std::vector<ParseRule*>::size_type j = 0; j < loadedGrammer.size(); j++) {
//If the current symbol in the rule is not null (rule completed) and it equals a grammer's left side
ParseRule* currentGramRule = loadedGrammer[j]->clone();
if (currentStateRule->getAtNextIndex() == currentGramRule->getLeftSide()) {
//std::cout << (*stateTotal)[i]->getAtNextIndex()->toString() << " has an applicable production " << loadedGrammer[j]->toString() << std::endl;
//Now, add the correct lookahead. This followSet is built based on the current rule's lookahead if at end, or the next Symbol's first set.
//std::cout << "Setting lookahead for " << currentGramRule->toString() << " in state " << state->toString() << std::endl;
currentGramRule->setLookahead(incrementiveFollowSet(currentStateRule));
//Check to make sure not already in
bool isAlreadyInState = false;
for (std::vector<ParseRule*>::size_type k = 0; k < stateTotal.size(); k++) {
if (stateTotal[k]->equalsExceptLookahead(*currentGramRule)) {
//std::cout << (*stateTotal)[k]->toString() << std::endl;
stateTotal[k]->addLookahead(currentGramRule->getLookahead());
isAlreadyInState = true;
delete currentGramRule;
break;
}
}
if (!isAlreadyInState) {
state->remaining.push_back(currentGramRule);
stateTotal = state->getTotal();
}
} else {
delete currentGramRule;
}
}
}
//std::cout << state->toString() << std::endl;
}
//Adds state if it doesn't already exist.
void Parser::addStates(std::vector< State* >* stateSets, State* state, std::queue<State*>* toDo) {
std::vector< State* > newStates;
//For each rule in the state we already have
std::vector<ParseRule*> currStateTotal = state->getTotal();
for (std::vector<ParseRule*>::size_type i = 0; i < currStateTotal.size(); i++) {
//Clone the current rule
ParseRule* advancedRule = currStateTotal[i]->clone();
//Try to advance the pointer, if sucessful see if it is the correct next symbol
if (advancedRule->advancePointer()) {
//Technically, it should be the set of rules sharing this symbol advanced past in the basis for new state
//So search our new states to see if any of them use this advanced symbol as a base.
//If so, add this rule to them.
//If not, create it.
bool symbolAlreadyInState = false;
for (std::vector< State* >::size_type j = 0; j < newStates.size(); j++) {
if (newStates[j]->basis[0]->getAtIndex() == advancedRule->getAtIndex()) {
symbolAlreadyInState = true;
//So now check to see if this exact rule is in this state
if (!newStates[j]->containsRule(advancedRule))
newStates[j]->basis.push_back(advancedRule);
//We found a state with the same symbol, so stop searching
break;
}
}
if (!symbolAlreadyInState) {
State* newState = new State(stateSets->size()+newStates.size(),advancedRule, state);
newStates.push_back(newState);
}
} else {
delete advancedRule;
}
//Also add any completed rules as reduces in the action table
//See if reduce
//Also, this really only needs to be done for the state's basis, but we're already iterating through, so...
std::vector<Symbol> lookahead = currStateTotal[i]->getLookahead();
if (currStateTotal[i]->isAtEnd()) {
for (std::vector<Symbol>::size_type j = 0; j < lookahead.size(); j++)
table.add(stateNum(state), lookahead[j], new ParseAction(ParseAction::REDUCE, currStateTotal[i]));
} else if (currStateTotal[i]->getAtNextIndex() == nullSymbol) {
//If is a rule that produces only NULL, add in the approprite reduction, but use a new rule with a right side of length 0. (so we don't pop off stack)
ParseRule* nullRule = currStateTotal[i]->clone();
nullRule->setRightSide(std::vector<Symbol>());
for (std::vector<Symbol>::size_type j = 0; j < lookahead.size(); j++)
table.add(stateNum(state), lookahead[j], new ParseAction(ParseAction::REDUCE, nullRule));
}
}
//Put all our new states in the set of states only if they're not already there.
bool stateAlreadyInAllStates = false;
Symbol currStateSymbol;
for (std::vector< State * >::size_type i = 0; i < newStates.size(); i++) {
stateAlreadyInAllStates = false;
currStateSymbol = (*(newStates[i]->getBasis()))[0]->getAtIndex();
for (std::vector< State * >::size_type j = 0; j < stateSets->size(); j++) {
if (newStates[i]->basisEquals(*((*stateSets)[j]))) {
stateAlreadyInAllStates = true;
//If it does exist, we should add it as the shift/goto in the action table
(*stateSets)[j]->addParents(newStates[i]->getParents());
table.add(stateNum(state), currStateSymbol, new ParseAction(ParseAction::SHIFT, j));
break;
}
}
if (!stateAlreadyInAllStates) {
//If the state does not already exist, add it and add it as the shift/goto in the action table
stateSets->push_back(newStates[i]);
toDo->push(newStates[i]);
table.add(stateNum(state), currStateSymbol, new ParseAction(ParseAction::SHIFT, stateSets->size()-1));
}
}
}
std::string Parser::stateSetToString() {
std::string concat = "";
for (std::vector< State *>::size_type i = 0; i < stateSets.size(); i++) {
concat += intToString(i) + " is " + stateSets[i]->toString();
}
return concat;
}
std::string Parser::tableToString() {
return table.toString();
}
//parseInput is now pure virtual
std::string Parser::grammerToString() {
//Iterate through the vector, adding string representation of each grammer rule
std::cout << "About to toString\n";
std::string concat = "";
for (int i = 0; i < loadedGrammer.size(); i++) {
concat += loadedGrammer[i]->toString() + "\n";
}
return(concat);
}
std::string Parser::grammerToDOT() {
//Iterate through the vector, adding DOT representation of each grammer rule
//std::cout << "About to DOT export\n";
std::string concat = "";
for (int i = 0; i < loadedGrammer.size(); i++) {
concat += loadedGrammer[i]->toDOT();
}
return("digraph Kraken_Grammer { \n" + concat + "}");
}

View File

@@ -0,0 +1,565 @@
#include "RNGLRParser.h"
#include <fstream>
//sorry about the macros
#define RESET "\033[0m"
#define BOLDRED "\033[1m\033[31m"
#define BOLDWHITE "\033[1m\033[37m"
#define BOLDGREEN "\033[1m\033[32m"
#define BOLDYELLOW "\033[1m\033[33m"
#define BOLDBLUE "\033[1m\033[34m"
#define BOLDMAGENTA "\033[1m\033[35m"
#define BOLDCYAN "\033[1m\033[36m"
RNGLRParser::RNGLRParser() {
//
}
RNGLRParser::~RNGLRParser() {
//
}
void RNGLRParser::printReconstructedFrontier(int frontier) {
std::vector<int> lastFrontier = gss.getFrontier(frontier);
for (int j = 0; j < lastFrontier.size(); j++) {
std::cout << "State: " << lastFrontier[j] << std::endl;
std::vector<std::pair<std::string, ParseAction>> stateParseActions = table.stateAsParseActionVector(lastFrontier[j]);
std::set<std::pair<std::string, ParseAction>> noRepeats;
for (auto k : stateParseActions)
noRepeats.insert(k);
for (auto k : noRepeats)
std::cout << k.first << " " << k.second.toString(false) << std::endl;
std::cout << std::endl;
}
}
NodeTree<Symbol>* RNGLRParser::parseInput(std::string inputString, std::string filename, bool highlight_errors) {
input.clear();
gss.clear();
while(!toReduce.empty()) toReduce.pop();
while(!toShift.empty()) toReduce.pop();
SPPFStepNodes.clear();
nullableParts.clear();
packedMap.clear();
bool errord = false;
//Check for no tokens
bool accepting = false;
if (inputString == "") {
std::vector<ParseAction*>* zeroStateActions = table.get(0,EOFSymbol);
for (int i = 0; i < zeroStateActions->size(); i++) {
if ((*zeroStateActions)[i]->action == ParseAction::REDUCE)
accepting = true;
}
if (accepting) {
std::cout << "Accepted!" << std::endl;
return getNullableParts((*(stateSets[0]->getBasis()))[0]->getLeftSide());
} else {
std::cerr << "Rejected, no input (with no accepting state)" << std::endl;
}
return new NodeTree<Symbol>();
}
lexer.reset();
lexer.setInput(inputString);
//Now fully lex our input because this algorithm was designed in that manner and simplifies this first implementation.
//It could be converted to on-line later.
int tokenNum = 1;
Symbol currentToken = lexer.next();
input.push_back(currentToken);
while (currentToken != EOFSymbol) {
currentToken = lexer.next();
//std::cout << "CurrentToken is " << currentToken.toString() << std::endl;
if (currentToken == invalidSymbol) {
std::cerr << filename << ":" << findLine(tokenNum) << std::endl;
errord = true;
std::cerr << "lex error" << std::endl;
std::cerr << "Invalid Symbol!" << std::endl;
throw "Invalid Symbol, cannot lex";
}
input.push_back(currentToken);
tokenNum++;
}
// std::cout << "\nDone with Lexing, length:" << input.size() << std::endl;
// std::cout << input[0].toString() << std::endl;
// for (int i = 0; i < input.size(); i++)
// std::cout << "|" << input[i]->toString() << "|";
// std::cout << std::endl;
//std::cout << "Setting up 0th frontier, first actions, toShift, toReduce" << std::endl;
//Frontier 0, new node with state 0
NodeTree<int>* v0 = gss.newNode(0);
gss.addToFrontier(0,v0);
//std::cout << "Done setting up new frontier" << std::endl;
std::vector<ParseAction*> firstActions = *(table.get(0, input[0]));
for (std::vector<ParseAction*>::size_type i = 0; i < firstActions.size(); i++) {
if (firstActions[i]->action == ParseAction::SHIFT)
toShift.push(std::make_pair(v0,firstActions[i]->shiftState));
else if (firstActions[i]->action == ParseAction::REDUCE && fullyReducesToNull(firstActions[i]->reduceRule)) {
Reduction newReduction = {v0, firstActions[i]->reduceRule->getLeftSide(), 0, getNullableParts(firstActions[i]->reduceRule), NULL};
toReduce.push(newReduction);
}
}
// std::cout << "GSS:\n" << gss.toString() << std::endl;
//std::cout << "Starting parse loop" << std::endl;
for (int i = 0; i < input.size(); i++) {
// std::cout << "Checking if frontier " << i << " is empty" << std::endl;
if (gss.frontierIsEmpty(i)) {
//std::cout << "Frontier " << i << " is empty." << std::endl;
//std::cerr << "Parsing failed on " << input[i].toString() << std::endl;
//std::cerr << "Problem is on line: " << findLine(i) << std::endl;
// std::cerr << filename << ":" << findLine(i) << std::endl;
errord = true;
if (highlight_errors)
std::cout << BOLDBLUE;
std::cout << filename << ":" << findLine(i) << std::endl;
if (highlight_errors)
std::cout << BOLDMAGENTA;
std::cout << ": parse error" << std::endl;
std::ifstream infile(filename);
std::string line;
int linecount = 0;
while(std::getline(infile,line))
{
if(linecount == findLine(i) - 1) {
if (highlight_errors)
std::cout << BOLDRED;
std::cout << line << std::endl;
}
linecount++;
}
if (highlight_errors)
std::cout << RESET << std::endl;
break;
}
//Clear the vector of SPPF nodes created every step
SPPFStepNodes.clear();
while (toReduce.size() != 0) {
//std::cout << "Reducing for " << i << std::endl;
//std::cout << "GSS:\n" << gss.toString() << std::endl;
reducer(i);
}
// std::cout << "Shifting for " << i << std::endl;
shifter(i);
//std::cout << "GSS:\n" << gss.toString() << std::endl;
}
//std::cout << "Done with parsing loop, checking for acceptance" << std::endl;
NodeTree<int>* accState = gss.frontierGetAccState(input.size()-1);
if (accState) {
std::cout << "Accepted!" << std::endl;
return gss.getEdge(accState, v0);
}
if (!errord) {
std::cerr << filename << ":" << findLine(input.size())-2 << std::endl;
std::cerr << "parse error" << std::endl;
std::cerr << "Nearby is:" << std::endl;
}
std::cerr << "Rejected!" << std::endl;
// std::cout << "GSS:\n" << gss.toString() << std::endl;
return NULL;
}
void RNGLRParser::reducer(int i) {
Reduction reduction = toReduce.front();
toReduce.pop();
//std::cout << "Doing reduction of length " << reduction.length << " from state " << reduction.from->getData() << " to symbol " << reduction.symbol->toString() << std::endl;
int pathLength = reduction.length > 0 ? reduction.length -1 : 0;
//Get every reachable path
std::vector<std::vector<NodeTree<int>*> >* paths = gss.getReachablePaths(reduction.from, pathLength);
for (std::vector<std::vector<NodeTree<int>*> >::size_type j = 0; j < paths->size(); j++) {
std::vector<NodeTree<int>*> currentPath = (*paths)[j];
//Get the edges for the current path
std::vector<NodeTree<Symbol>*> pathEdges = getPathEdges(currentPath);
std::reverse(pathEdges.begin(), pathEdges.end());
//If the reduction length is 0, label as passed in is null
if (reduction.length != 0)
pathEdges.push_back(reduction.label);
//The end of the current path
NodeTree<int>* currentReached = currentPath[currentPath.size()-1];
//std::cout << "Getting the shift state for state " << currentReached->getData() << " and symbol " << reduction.symbol.toString() << std::endl;
int toState = table.getShift(currentReached->getData(), reduction.symbol)->shiftState;
//If reduction length is 0, then we make the new label the appropriate nullable parts
NodeTree<Symbol>* newLabel = NULL;
if (reduction.length == 0) {
newLabel = reduction.nullableParts;
} else {
//Otherwise, we create the new label if we haven't already
int reachedFrontier = gss.getContainingFrontier(currentReached);
for (std::vector<std::pair<NodeTree<Symbol>*, int> >::size_type k = 0; k < SPPFStepNodes.size(); k++) {
if ( SPPFStepNodes[k].second == reachedFrontier && SPPFStepNodes[k].first->getData() == reduction.symbol) {
newLabel = SPPFStepNodes[k].first;
break;
}
}
if (!newLabel) {
newLabel = new NodeTree<Symbol>("frontier: " + intToString(reachedFrontier), reduction.symbol);
SPPFStepNodes.push_back(std::make_pair(newLabel, reachedFrontier));
}
}
NodeTree<int>* toStateNode = gss.inFrontier(i, toState);
if (toStateNode) {
if (!gss.hasEdge(toStateNode, currentReached)) {
gss.addEdge(toStateNode, currentReached, newLabel);
if (reduction.length != 0) {
//Do all non null reduction
//std::cout << "Checking for non-null reductions in states that already existed" << std::endl;
std::vector<ParseAction*> actions = *(table.get(toState, input[i]));
for (std::vector<ParseAction*>::size_type k = 0; k < actions.size(); k++) {
if (actions[k]->action == ParseAction::REDUCE && !fullyReducesToNull(actions[k]->reduceRule)) {
Reduction newReduction = {currentReached, actions[k]->reduceRule->getLeftSide(), actions[k]->reduceRule->getIndex(), getNullableParts(actions[k]->reduceRule), newLabel};
toReduce.push(newReduction);
}
}
}
}
} else {
toStateNode = gss.newNode(toState);
gss.addToFrontier(i, toStateNode);
gss.addEdge(toStateNode, currentReached, newLabel);
//std::cout << "Adding shifts and reductions for a state that did not exist" << std::endl;
std::vector<ParseAction*> actions = *(table.get(toState, input[i]));
for (std::vector<ParseAction*>::size_type k = 0; k < actions.size(); k++) {
//std::cout << "Action is " << actions[k]->toString() << std::endl;
if (actions[k]->action == ParseAction::SHIFT) {
toShift.push(std::make_pair(toStateNode, actions[k]->shiftState));
} else if (actions[k]->action == ParseAction::REDUCE && fullyReducesToNull(actions[k]->reduceRule)) {
Reduction newReduction = {toStateNode, actions[k]->reduceRule->getLeftSide(), 0, getNullableParts(actions[k]->reduceRule), NULL};
toReduce.push(newReduction);
} else if (reduction.length != 0 && actions[k]->action == ParseAction::REDUCE && !fullyReducesToNull(actions[k]->reduceRule)) {
Reduction newReduction = {currentReached, actions[k]->reduceRule->getLeftSide(), actions[k]->reduceRule->getIndex(), getNullableParts(actions[k]->reduceRule), newLabel};
toReduce.push(newReduction);
}
}
}
if (reduction.length != 0)
addChildren(newLabel, &pathEdges, reduction.nullableParts);
}
}
void RNGLRParser::shifter(int i) {
if (i != input.size()-1) {
std::queue< std::pair<NodeTree<int>*, int> > nextShifts;
NodeTree<Symbol>* newLabel = new NodeTree<Symbol>("frontier: " + intToString(i), input[i]);
while (!toShift.empty()) {
std::pair<NodeTree<int>*, int> shift = toShift.front();
toShift.pop();
//std::cout << "Current potential shift from " << shift.first->getData() << " to " << shift.second << std::endl;
NodeTree<int>* shiftTo = gss.inFrontier(i+1, shift.second);
if (shiftTo) {
//std::cout << "State already existed, just adding edge" << std::endl;
gss.addEdge(shiftTo, shift.first, newLabel);
std::vector<ParseAction*> actions = *(table.get(shift.second, input[i+1]));
for (std::vector<ParseAction*>::size_type j = 0; j < actions.size(); j++) {
if (actions[j]->action == ParseAction::REDUCE && !fullyReducesToNull(actions[j]->reduceRule)) {
Reduction newReduction = {shift.first, actions[j]->reduceRule->getLeftSide(), actions[j]->reduceRule->getIndex(), getNullableParts(actions[j]->reduceRule), newLabel};
toReduce.push(newReduction);
}
}
} else {
//std::cout << "State did not already exist, adding" << std::endl;
shiftTo = gss.newNode(shift.second);
gss.addToFrontier(i+1, shiftTo);
gss.addEdge(shiftTo, shift.first, newLabel);
std::vector<ParseAction*> actions = *(table.get(shift.second, input[i+1]));
for (std::vector<ParseAction*>::size_type j = 0; j < actions.size(); j++) {
//std::cout << "Adding action " << actions[j]->toString() << " to either nextShifts or toReduce" << std::endl;
//Shift
if (actions[j]->action == ParseAction::SHIFT) {
nextShifts.push(std::make_pair(shiftTo, actions[j]->shiftState));
} else if (actions[j]->action == ParseAction::REDUCE && !fullyReducesToNull(actions[j]->reduceRule)) {
Reduction newReduction = {shift.first, actions[j]->reduceRule->getLeftSide(), actions[j]->reduceRule->getIndex(), getNullableParts(actions[j]->reduceRule), newLabel};
toReduce.push(newReduction);
} else if (actions[j]->action == ParseAction::REDUCE && fullyReducesToNull(actions[j]->reduceRule)) {
Reduction newReduction = {shiftTo, actions[j]->reduceRule->getLeftSide(), 0, getNullableParts(actions[j]->reduceRule), NULL};
toReduce.push(newReduction);
}
}
}
}
toShift = nextShifts;
}
}
void RNGLRParser::addChildren(NodeTree<Symbol>* parent, std::vector<NodeTree<Symbol>*>* children, NodeTree<Symbol>* nullableParts) {
if (nullableParts)
children->push_back(nullableParts);
if (!belongsToFamily(parent, children)) {
if (parent->getChildren().size() == 0) {
parent->addChildren(children);
} else {
if (!arePacked(parent->getChildren())) {
NodeTree<Symbol>* subParent = new NodeTree<Symbol>("AmbiguityPackInner", Symbol("AmbiguityPackInner", true));
setPacked(subParent, true);
std::vector<NodeTree<Symbol>*> tmp = parent->getChildren();
subParent->addChildren(&tmp);
parent->clearChildren();
parent->addChild(subParent);
}
NodeTree<Symbol>* t = new NodeTree<Symbol>("AmbiguityPackOuter", Symbol("AmbiguityPackInner", true));
setPacked(t, true);
parent->addChild(t);
t->addChildren(children);
}
}
}
bool RNGLRParser::belongsToFamily(NodeTree<Symbol>* node, std::vector<NodeTree<Symbol>*>* nodes) {
//std::cout << "Checking " << node->getData()->toString() << "'s family" << std::endl;
std::vector<NodeTree<Symbol>*> children = node->getChildren();
for (std::vector<NodeTree<Symbol>*>::size_type i = 0; i < nodes->size(); i++) {
bool containsOne = false;
for (std::vector<NodeTree<Symbol>*>::size_type j = 0; j < children.size(); j++) {
//Not sure where null comes from. For right now, just check to be sure we don't segfault
if ((*nodes)[i] == children[j] || ( (*nodes)[i] != NULL && children[j] != NULL && (*(*nodes)[i]) == *(children[j]) )) {
containsOne = true;
break;
}
}
if (!containsOne) {
return false;
}
}
return true;
}
bool RNGLRParser::arePacked(std::vector<NodeTree<Symbol>*> nodes) {
bool packed = true;
for (std::vector<NodeTree<Symbol>*>::size_type i = 0; i < nodes.size(); i++)
packed &= packedMap[*(nodes[i])];
return packed;
}
bool RNGLRParser::isPacked(NodeTree<Symbol>* node) {
return packedMap[*node];
}
void RNGLRParser::setPacked(NodeTree<Symbol>* node, bool isPacked) {
packedMap[*node] = isPacked;
}
//Have to use own add states function in order to construct RN table instead of LALR table
void RNGLRParser::addStates(std::vector< State* >* stateSets, State* state, std::queue<State*>* toDo) {
std::vector< State* > newStates;
//For each rule in the state we already have
std::vector<ParseRule*> currStateTotal = state->getTotal();
for (std::vector<ParseRule*>::size_type i = 0; i < currStateTotal.size(); i++) {
//Clone the current rule
ParseRule* advancedRule = currStateTotal[i]->clone();
//Try to advance the pointer, if sucessful see if it is the correct next symbol
if (advancedRule->advancePointer()) {
//Technically, it should be the set of rules sharing this symbol advanced past in the basis for new state
//So search our new states to see if any of them use this advanced symbol as a base.
//If so, add this rule to them.
//If not, create it.
bool symbolAlreadyInState = false;
for (std::vector< State* >::size_type j = 0; j < newStates.size(); j++) {
if (newStates[j]->basis[0]->getAtIndex() == advancedRule->getAtIndex()) {
symbolAlreadyInState = true;
//Add rule to state, combining with idenical rule except lookahead if exists
newStates[j]->addRuleCombineLookahead(advancedRule);
//We found a state with the same symbol, so stop searching
break;
}
}
if (!symbolAlreadyInState) {
State* newState = new State(stateSets->size()+newStates.size(),advancedRule, state);
newStates.push_back(newState);
}
} else {
delete advancedRule;
}
}
//Put all our new states in the set of states only if they're not already there.
bool stateAlreadyInAllStates = false;
Symbol currStateSymbol;
for (std::vector< State * >::size_type i = 0; i < newStates.size(); i++) {
stateAlreadyInAllStates = false;
currStateSymbol = (*(newStates[i]->getBasis()))[0]->getAtIndex();
for (std::vector< State * >::size_type j = 0; j < stateSets->size(); j++) {
if (newStates[i]->basisEqualsExceptLookahead(*((*stateSets)[j]))) {
//if (newStates[i]->basisEquals(*((*stateSets)[j]))) {
stateAlreadyInAllStates = true;
//If it does exist, we should add it as the shift/goto in the action table
//std::cout << "newStates[" << i << "] == stateSets[" << j << "]" << std::endl;
if (!((*stateSets)[j]->basisEquals(*(newStates[i]))))
toDo->push((*stateSets)[j]);
(*stateSets)[j]->combineStates(*(newStates[i]));
//std::cout << j << "\t Hay, doing an inside loop state reductions!" << std::endl;
addStateReductionsToTable((*stateSets)[j]);
table.add(stateNum(state), currStateSymbol, new ParseAction(ParseAction::SHIFT, j));
break;
}
}
if (!stateAlreadyInAllStates) {
//If the state does not already exist, add it and add it as the shift/goto in the action table
stateSets->push_back(newStates[i]);
toDo->push(newStates[i]);
table.add(stateNum(state), currStateSymbol, new ParseAction(ParseAction::SHIFT, stateSets->size()-1));
}
}
addStateReductionsToTable(state);
}
void RNGLRParser::addStateReductionsToTable(State* state) {
std::vector<ParseRule*> currStateTotal = state->getTotal();
//std::cout << currStateTotal->size() << "::" << state->getNumber() << std::endl;
for (std::vector<ParseRule*>::size_type i = 0; i < currStateTotal.size(); i++) {
//See if reduce
//Also, this really only needs to be done for the state's basis, but we're already iterating through, so...
std::vector<Symbol> lookahead = currStateTotal[i]->getLookahead();
if (currStateTotal[i]->isAtEnd()) {
for (std::vector<Symbol>::size_type j = 0; j < lookahead.size(); j++) {
table.add(stateNum(state), lookahead[j], new ParseAction(ParseAction::REDUCE, currStateTotal[i]));
}
//If this has an appropriate ruduction to null, get the reduce trees out
} else if (reducesToNull(currStateTotal[i])) {
//std::cout << (*currStateTotal)[i]->toString() << " REDUCES TO NULL" << std::endl;
//It used to be that if is a rule that produces only NULL, add in the approprite reduction, but use a new rule with a right side that is equal to
//the part that we've already gone through in the rule. (so we don't pop extra off stack)
//Now we use the same rule and make sure that the index location is used
for (std::vector<Symbol>::size_type j = 0; j < lookahead.size(); j++)
table.add(stateNum(state), lookahead[j], new ParseAction(ParseAction::REDUCE, currStateTotal[i]));
}
}
}
bool RNGLRParser::fullyReducesToNull(ParseRule* rule) {
return rule->getIndex() == 0 && reducesToNull(rule);
}
bool RNGLRParser::reducesToNull(ParseRule* rule) {
auto itr = reduceToNullMap.find(rule);
if (itr != reduceToNullMap.end())
return itr->second;
std::vector<Symbol> avoidList;
auto val = reducesToNull(rule, avoidList);
reduceToNullMap[rule] = val;
return val;
}
bool RNGLRParser::reducesToNull(ParseRule* rule, std::vector<Symbol> avoidList) {
//If the rule is completed and not null, it doesn't reduce to null, it's just completed.
if (rule->isAtEnd() && rule->getRightSize() != 0)
return false;
for (std::vector<Symbol>::size_type i = 0; i < avoidList.size(); i++)
if (rule->getLeftSide() == avoidList[i])
return false;
avoidList.push_back(rule->getLeftSide());
std::vector<Symbol> rightSide = rule->getRightSide();
bool reduces = true;
for (std::vector<Symbol>::size_type i = rule->getIndex(); i < rightSide.size(); i++) {
if (rightSide[i] == nullSymbol)
continue;
if (rightSide[i].isTerminal()) {
reduces = false;
break;
}
bool subSymbolReduces = false;
for (std::vector<ParseRule*>::size_type j = 0; j < loadedGrammer.size(); j++) {
if (loadedGrammer[j]->getLeftSide() == rightSide[i]) {
if(reducesToNull(loadedGrammer[j], avoidList)) {
subSymbolReduces = true;
break;
}
}
}
if (!subSymbolReduces) {
reduces = false;
break;
}
}
return reduces;
}
NodeTree<Symbol>* RNGLRParser::getNullableParts(ParseRule* rule) {
return getNullableParts(rule, std::vector<NodeTree<Symbol>*>());
}
NodeTree<Symbol>* RNGLRParser::getNullableParts(ParseRule* rule, std::vector<NodeTree<Symbol>*> avoidList) {
if (reducesToNull(rule)) {
//std::cout << "Reduces to null so adding parts " << rule->toString() << std::endl;
Symbol symbol = rule->getLeftSide();
NodeTree<Symbol>* symbolNode = new NodeTree<Symbol>(symbol.getName(), symbol);
if (rule->getAtNextIndex() == nullSymbol) {
symbolNode->addChild(new NodeTree<Symbol>(nullSymbol.getName(), nullSymbol));
} else {
//Find recursively
ParseRule* iterate = rule->clone();
while (!iterate->isAtEnd()) {
//Check to see if we've done this symbol already, if so use it
for (std::vector<NodeTree<Symbol>*>::size_type i = 0; i < avoidList.size(); i++) {
if (iterate->getAtNextIndex() == avoidList[i]->getData()) {
symbolNode->addChild(avoidList[i]);
break;
}
}
//We haven't so do it recursively
for (std::vector<ParseRule*>::size_type i = 0; i < loadedGrammer.size(); i++) {
if (fullyReducesToNull(loadedGrammer[i]) && iterate->getAtNextIndex() == loadedGrammer[i]->getLeftSide()) {
NodeTree<Symbol>* symbolTree = getNullableParts(loadedGrammer[i], avoidList);
avoidList.push_back(symbolTree);
symbolNode->addChild(symbolTree);
}
}
iterate->advancePointer();
}
}
return symbolNode;
}
return NULL;
}
NodeTree<Symbol>* RNGLRParser::getNullableParts(Symbol symbol) {
return new NodeTree<Symbol>("CRAZY_SYMBOL", nullSymbol);
}
std::vector<NodeTree<Symbol>*> RNGLRParser::getPathEdges(std::vector<NodeTree<int>*> path) {
std::vector<NodeTree<Symbol>*> pathEdges;
for (std::vector<NodeTree<int>*>::size_type i = 0; i < path.size()-1; i++)
pathEdges.push_back(gss.getEdge(path[i], path[i+1]));
return pathEdges;
}
int RNGLRParser::findLine(int tokenNum) {
int lineNo = 1;
for (int i = 0; i < tokenNum; i++) {
std::string tokenString = input[i].getValue();
for (int j = 0; j < tokenString.size(); j++)
if (tokenString[j] == '\n')
lineNo++;
}
return lineNo;
}

View File

@@ -0,0 +1,225 @@
#include "RegEx.h"
#include <cassert>
RegEx::RegEx(std::string inPattern) {
pattern = inPattern;
std::vector<RegExState*> ending;
begin = construct(&ending, inPattern);
//last one is goal state, add it to the end of all of these last states
for (std::vector<RegExState*>::size_type i = 0; i < ending.size(); i++)
ending[i]->addNext(NULL);
}
RegExState* RegEx::construct(std::vector<RegExState*>* ending, std::string pattern) {
//In the RegEx re-write, instead of doing complicated unperenthesising, we keep track of both the "front" and the "end" of a state.
//(these could be different if the state is perenthesezed)
std::vector<RegExState*> previousStatesBegin;
std::vector<RegExState*> previousStatesEnd;
std::vector<RegExState*> currentStatesBegin;
std::vector<RegExState*> currentStatesEnd;
bool alternating = false;
RegExState* begin = new RegExState();
currentStatesBegin.push_back(begin);
currentStatesEnd.push_back(begin);
for (int i = 0; i < pattern.length(); i++) {
switch (pattern[i]) {
case '*':
{
//std::cout << "Star at " << i << " in " << pattern << std::endl;
//NOTE: Because of the re-write, this is necessary again
for (std::vector<RegExState*>::size_type j = 0; j < currentStatesEnd.size(); j++)
for (std::vector<RegExState*>::size_type k = 0; k < currentStatesBegin.size(); k++)
currentStatesEnd[j]->addNext(currentStatesBegin[k]); //Make the ends point to the beginnings
//add all previous states to current states to enable skipping over the starred item
currentStatesBegin.insert(currentStatesBegin.end(), previousStatesBegin.begin(), previousStatesBegin.end());
currentStatesEnd.insert(currentStatesEnd.end(), previousStatesEnd.begin(), previousStatesEnd.end());
}
break;
case '+':
{
//std::cout << "Plus at " << i << " in " << pattern << std::endl;
//NOTE: Because of the re-write, this is necessary again
for (std::vector<RegExState*>::size_type j = 0; j < currentStatesEnd.size(); j++)
for (std::vector<RegExState*>::size_type k = 0; k < currentStatesBegin.size(); k++)
currentStatesEnd[j]->addNext(currentStatesBegin[k]); //Make the ends point to the beginnings
}
break;
case '?':
{
//std::cout << "Question at " << i << " in " << pattern << std::endl;
//add all previous states to current states to enable skipping over the questioned item
currentStatesBegin.insert(currentStatesBegin.end(), previousStatesBegin.begin(), previousStatesBegin.end());
currentStatesEnd.insert(currentStatesEnd.end(), previousStatesEnd.begin(), previousStatesEnd.end());
}
break;
case '|':
{
//std::cout << "Alternation at " << i << " in " << pattern << std::endl;
//alternation
alternating = true;
}
break;
case '(':
{
//std::cout << "Begin peren at " << i << " in " << pattern << std::endl;
//perentheses
std::vector<RegExState*> innerEnds;
int perenEnd = findPerenEnd(pattern, i);
RegExState* innerBegin = construct(&innerEnds, strSlice(pattern, i+1, perenEnd));
i = perenEnd;
std::vector<RegExState*> innerBegins = innerBegin->getNextStates();
if (alternating) {
for (std::vector<RegExState*>::size_type j = 0; j < previousStatesEnd.size(); j++)
for (std::vector<RegExState*>::size_type k = 0; k < innerBegins.size(); k++)
previousStatesEnd[j]->addNext(innerBegins[k]);
currentStatesBegin.insert(currentStatesBegin.end(), innerBegins.begin(), innerBegins.end());
currentStatesEnd.insert(currentStatesEnd.end(), innerEnds.begin(), innerEnds.end());
} else {
for (std::vector<RegExState*>::size_type j = 0; j < currentStatesEnd.size(); j++)
for (std::vector<RegExState*>::size_type k = 0; k < innerBegins.size(); k++)
currentStatesEnd[j]->addNext(innerBegins[k]);
previousStatesBegin = currentStatesBegin;
previousStatesEnd = currentStatesEnd;
currentStatesBegin = innerBegins;
currentStatesEnd = innerEnds;
}
alternating = false;
}
break;
// ) does not need a case as we skip over it after finding it in ('s case
case '\\':
{
i++;
//std::cout << "Escape! Escaping: " << pattern[i] << std::endl;
//Ahh, it's escaping a special character, so fall through to the default.
}
default:
{
//std::cout << "Regular" << std::endl;
//Ahh, it's regular
RegExState* next = new RegExState(pattern[i]);
//If we're alternating, add next as the next for each previous state, and add self to currentStates
if (alternating) {
for (std::vector<RegExState*>::size_type j = 0; j < previousStatesEnd.size(); j++)
previousStatesEnd[j]->addNext(next);
currentStatesBegin.push_back(next);
currentStatesEnd.push_back(next);
alternating = false;
} else {
//If we're not alternating, add next as next for all the current states, make the current states the new
//previous states, and add ourself as the new current state.
for (std::vector<RegExState*>::size_type j = 0; j < currentStatesEnd.size(); j++)
currentStatesEnd[j]->addNext(next);
previousStatesBegin.clear();
previousStatesEnd.clear();
previousStatesBegin = currentStatesBegin;
previousStatesEnd = currentStatesEnd;
currentStatesBegin.clear();
currentStatesEnd.clear();
currentStatesBegin.push_back(next);
currentStatesEnd.push_back(next);
}
}
}
}
(*ending) = currentStatesEnd;
return(begin);
}
RegEx::~RegEx() {
//No cleanup necessary
}
int RegEx::longMatch(std::string stringToMatch) {
// Start in the begin state (only).
int lastMatch = -1;
currentStates.clear();
currentStates.push_back(begin);
std::vector<RegExState*> nextStates;
for (int i = 0; i < stringToMatch.size(); i++) {
//Go through every current state. Check to see if it is goal, if so update last goal.
//Also, add each state's advance to nextStates
for (std::vector<RegExState*>::size_type j = 0; j < currentStates.size(); j++) {
if (currentStates[j]->isGoal())
lastMatch = i;
std::vector<RegExState*> addStates = currentStates[j]->advance(stringToMatch.at(i));
nextStates.insert(nextStates.end(), addStates.begin(), addStates.end());
}
//Now, clear our current states and add eaczh one of our addStates if it is not already in current states
currentStates.clear();
for (std::vector<RegExState*>::size_type j = 0; j < nextStates.size(); j++) {
bool inCurrStates = false;
for (std::vector<RegExState*>::size_type k = 0; k < currentStates.size(); k++) {
if (nextStates[j] == currentStates[k])
inCurrStates = true;
}
if (!inCurrStates)
currentStates.push_back(nextStates[j]);
}
// if (currentStates.size() != 0)
// std::cout << "Matched " << i << " character: " << stringToMatch[i-1] << std::endl;
nextStates.clear();
//If we can't continue matching, just return our last matched
if (currentStates.size() == 0)
break;
}
//Check to see if we match on the last character in the string
for (std::vector<RegExState*>::size_type j = 0; j < currentStates.size(); j++) {
if (currentStates[j]->isGoal())
lastMatch = stringToMatch.size();
}
return lastMatch;
}
std::string RegEx::getPattern() {
return pattern;
}
std::string RegEx::toString() {
return pattern + " -> " + begin->toString();
}
void RegEx::test() {
{
RegEx re("a*");
assert(re.longMatch("a") == 1);
assert(re.longMatch("aa") == 2);
assert(re.longMatch("aaaab") == 4);
assert(re.longMatch("b") == 0);
}
{
RegEx re("a+");
assert(re.longMatch("aa") == 2);
assert(re.longMatch("aaaab") == 4);
assert(re.longMatch("b") == -1);
}
{
RegEx re("a(bc)?");
assert(re.longMatch("ab") == 1);
}
{
RegEx re("((ab)|c)*");
assert(re.longMatch("ababc") == 5);
assert(re.longMatch("ad") == 0);
assert(re.longMatch("ababccd") == 6);
}
{
RegEx re("bbb((bba+)|(ba+))*a*((a+b)|(a+bb)|(a+))*bbb") ;
assert(re.longMatch("bbbababbbaaaaaaaaaaaaaaaaaaabbb") == 9);
}
std::cout << "RegEx tests pass\n";
}

View File

@@ -0,0 +1,82 @@
#include "RegExState.h"
RegExState::RegExState(char inCharacter) {
character = inCharacter;
}
RegExState::RegExState() {
character = 0;
}
RegExState::~RegExState() {
//No cleanup necessary
}
void RegExState::addNext(RegExState* nextState) {
nextStates.push_back(nextState);
}
bool RegExState::characterIs(char inCharacter) {
return character == inCharacter;
}
std::vector<RegExState*> RegExState::advance(char advanceCharacter) {
std::vector<RegExState*> advanceStates;
for (std::vector<RegExState*>::size_type i = 0; i < nextStates.size(); i++) {
if (nextStates[i] != NULL && nextStates[i]->characterIs(advanceCharacter))
advanceStates.push_back(nextStates[i]);
}
return advanceStates;
}
std::vector<RegExState*> RegExState::getNextStates() {
return nextStates;
}
bool RegExState::isGoal() {
for (std::vector<RegExState*>::size_type i = 0; i < nextStates.size(); i++)
if (nextStates[i] == NULL)
return true;
return false;
}
std::string RegExState::toString() {
std::vector<RegExState*> avoidList;
return toString(&avoidList);
}
std::string RegExState::toString(RegExState* avoid) {
std::vector<RegExState*> avoidList;
avoidList.push_back(avoid);
return toString(&avoidList);
}
std::string RegExState::toString(std::vector<RegExState*>* avoid) {
avoid->push_back(this);
std::string string = "";
string += std::string("\"") + character + "\"";
for (std::vector<RegExState*>::size_type i = 0; i < nextStates.size(); i++) {
bool inAvoid = false;
for (std::vector<RegExState*>::size_type j = 0; j < avoid->size(); j++) {
if (nextStates[i] == (*avoid)[j]) {
inAvoid = true;
}
}
if (inAvoid) {
string += "->loop";
continue;
}
if (nextStates[i] != this && nextStates[i] != NULL)
string += "->" + nextStates[i]->toString(avoid) + " EC ";
else if (nextStates[i] == NULL)
string += "-> GOAL ";
else
string += "->this";
}
return string;
}
char RegExState::getCharacter() {
return character;
}

View File

@@ -0,0 +1,164 @@
#include "State.h"
State::State(int number, ParseRule* basis) {
this->number = number;
this->basis.push_back(basis);
}
State::State(int number, ParseRule* basis, State* parent) {
this->number = number;
this->basis.push_back(basis);
parents.push_back(parent);
}
State::~State() {
}
const bool State::operator==(const State &other) {
//return (basis == other.basis && remaining == other.remaining);
if (basis.size() != other.basis.size())
return false;
for (std::vector< ParseRule* >::size_type i = 0; i < basis.size(); i++) {
if (*(basis[i]) != *(other.basis[i]))
return false;
}
if (remaining.size() != other.remaining.size())
return false;
for (std::vector< ParseRule* >::size_type i = 0; i < remaining.size(); i++) {
if ( *(remaining[i]) != *(other.remaining[i]) )
return false;
}
return true;
}
const bool State::operator!=(const State &other) {
return !(this->operator==(other));
}
const bool State::basisEquals(const State &other) {
//return (basis == other.basis && remaining == other.remaining);
if (basis.size() != other.basis.size())
return false;
for (std::vector< ParseRule* >::size_type i = 0; i < basis.size(); i++) {
if (*(basis[i]) != (*(other.basis[i])))
return false;
}
return true;
}
const bool State::basisEqualsExceptLookahead(const State &other) {
//return (basis == other.basis && remaining == other.remaining);
if (basis.size() != other.basis.size())
return false;
for (std::vector< ParseRule* >::size_type i = 0; i < basis.size(); i++) {
if (!basis[i]->equalsExceptLookahead(*(other.basis[i])))
return false;
}
return true;
}
void State::combineStates(State &other) {
for (std::vector< ParseRule* >::size_type i = 0; i < other.basis.size(); i++) {
bool alreadyIn = false;
for (std::vector< ParseRule* >::size_type j = 0; j < basis.size(); j++) {
if (basis[j]->equalsExceptLookahead(*(other.basis[i]))) {
basis[j]->addLookahead(other.basis[i]->getLookahead());
alreadyIn = true;
}
}
if (!alreadyIn)
basis.push_back(other.basis[i]);
}
addParents(other.getParents());
}
std::vector<ParseRule*> State::getTotal() {
std::vector<ParseRule*> total;
total.insert(total.begin(), basis.begin(), basis.end());
total.insert(total.end(), remaining.begin(), remaining.end());
return total;
}
std::vector<ParseRule*>* State::getBasis() {
return &basis;
}
std::vector<ParseRule*>* State::getRemaining() {
return &remaining;
}
bool State::containsRule(ParseRule* rule) {
auto total = getTotal();
for (std::vector<ParseRule*>::size_type i = 0; i < total.size(); i++) {
if (*rule == *(total[i])) {
return true;
}
}
return false;
}
void State::addRuleCombineLookahead(ParseRule* rule) {
auto total = getTotal();
bool alreadyIn = false;
for (std::vector<ParseRule*>::size_type i = 0; i < total.size(); i++) {
if (rule->equalsExceptLookahead(*(total[i]))) {
total[i]->addLookahead(rule->getLookahead());
alreadyIn = true;
break;
}
}
if (!alreadyIn)
basis.push_back(rule);
}
std::string State::toString() {
std::string concat = "";
concat += "State " + intToString(number) + " with " + intToString(parents.size()) + " parents:\n";
for (std::vector<ParseRule*>::size_type j = 0; j < basis.size(); j++) {
concat += "\t" + basis[j]->toString() + "\n";
}
for (std::vector<ParseRule*>::size_type j = 0; j < remaining.size(); j++) {
concat += "\t+\t" + remaining[j]->toString() + "\n";
}
return concat;
}
void State::addParents(std::vector<State*>* parents) {
bool alreadyIn = false;
for (std::vector<State*>::size_type i = 0; i < parents->size(); i++) {
alreadyIn = false;
for (std::vector<State*>::size_type j = 0; j < this->parents.size(); j++) {
if (this->parents[j]->basisEquals(*((*parents)[i]))) {
alreadyIn = true;
break;
}
}
if (!alreadyIn)
this->parents.push_back((*parents)[i]);
}
}
std::vector<State*>* State::getParents() {
return &parents;
}
std::vector<State*>* State::getDeepParents(int depth) {
if (depth <= 0) {
std::vector<State*>* returnSelf = new std::vector<State*>();
returnSelf->push_back(this);
return returnSelf;
}
std::vector<State*>* recursiveParents = new std::vector<State*>();
std::vector<State*>* recursiveParentsToAdd;
for (std::vector<State*>::size_type i = 0; i < parents.size(); i++) {
recursiveParentsToAdd = parents[i]->getDeepParents(depth-1);
recursiveParents->insert(recursiveParents->end(), recursiveParentsToAdd->begin(), recursiveParentsToAdd->end());
}
return recursiveParents;
}
int State::getNumber() {
return number;
}

View File

@@ -0,0 +1,166 @@
#include "StringReader.h"
#include <cassert>
StringReader::StringReader()
{
str_pos = 0;
}
StringReader::StringReader(std::string inputString)
{
str_pos = 0;
setString(inputString);
}
StringReader::~StringReader()
{
//dtor
}
void StringReader::setString(std::string inputString)
{
rd_string = inputString;
end_reached = false;
}
std::string StringReader::word(bool truncateEnd)
{
std::string result = getTokens(" \n\t", truncateEnd);
while (result == " " || result == "\n" || result == "\t")
{
result = getTokens(" \n\t", truncateEnd);
}
return(result);
}
std::string StringReader::line(bool truncateEnd)
{
return getTokens("\n", truncateEnd);
}
std::string StringReader::getTokens(const char *stop_chars, bool truncateEnd)
{
if (str_pos >= rd_string.size())
return "";
size_t found_pos = rd_string.find_first_of(stop_chars, str_pos);
if (rd_string[str_pos] == '\"') {
//Find the next quote
found_pos = rd_string.find("\"", str_pos+1);
//Check to see if the quote is escaped
int numBackslashes = 0;
int countBack = 1;
while (found_pos >= countBack && rd_string[found_pos-countBack] == '\\') {
numBackslashes++;
countBack++;
}
//While the quote is escaped
while (numBackslashes % 2 == 1) {
//find the next quote
found_pos = rd_string.find("\"", found_pos+1);
//Check to see if it's escaped
numBackslashes = 0;
countBack = 1;
while (found_pos >= countBack && rd_string[found_pos-countBack] == '\\') {
numBackslashes++;
countBack++;
}
}
}
if (found_pos == str_pos) //We are at the endline
{
std::string stop_char(1, rd_string[str_pos]);
str_pos++;
return stop_char;
} else if (found_pos == std::string::npos) //We are at the end of the file
{
//End of String
end_reached = true;
//std::cout << "Reached end of file!\n";
return "";
} else {
if (truncateEnd) //If we want to get rid of the delimiting character, which is the default, don't add the last char. Note we have to increase str_pos by one manually later
found_pos -= 1;
if (rd_string[str_pos] == '\"')
found_pos++;
std::string string_section = rd_string.substr(str_pos, found_pos - str_pos + 1);
str_pos = found_pos + 1;
if (truncateEnd) //Ok, we didn't add the last char, but str_pos now points at that char. So we move it one ahead.
str_pos++;
return string_section;
}
}
void StringReader::test()
{
{
StringReader reader("\"x\"");
assert(reader.word() == "\"x\"");
assert(reader.word() == "");
}
{
StringReader reader("\"y\" ;\n");
assert(reader.word() == "\"y\"");
assert(reader.word() == ";");
assert(reader.word() == "");
}
{
StringReader reader("Goal = greeting ;\n"
"greeting = \"hello\" | greeting \"world\" ;\n");
assert(reader.word() == "Goal");
assert(reader.word() == "=");
assert(reader.word() == "greeting");
assert(reader.word() == ";");
assert(reader.word() == "greeting");
assert(reader.word() == "=");
assert(reader.word() == "\"hello\"");
assert(reader.word() == "|");
assert(reader.word() == "greeting");
assert(reader.word() == "\"world\"");
assert(reader.word() == ";");
assert(reader.word() == "");
}
{
StringReader reader("one # pretend this is a comment\n"
" two\n");
assert(reader.word() == "one");
assert(reader.word() == "#");
assert(reader.line() == "pretend this is a comment");
assert(reader.word() == "two");
assert(reader.word() == "");
}
{
// Quoted strings can span lines.
StringReader reader("x = \"\n \" ;\n");
assert(reader.word() == "x");
assert(reader.word() == "=");
assert(reader.word() == "\"\n \"");
assert(reader.word() == ";");
assert(reader.word() == "");
}
{
// Strings may contain backslash-escaped quote characters.
StringReader reader( "\"abc\\\"def\\\\\\\\\\\" \"\n");
assert(reader.word() == "\"abc\\\"def\\\\\\\\\\\" \"");
assert(reader.word() == "");
}
{
// A backslash-escaped backslash can be the last character in a string.
StringReader reader( "\"\\\\\" \n");
assert(reader.word() == "\"\\\\\"");
assert(reader.word() == "");
}
std::cout << "StringReader tests pass\n";
}

View File

@@ -0,0 +1,52 @@
#include "Symbol.h"
Symbol::Symbol() {
this->name = "UninitlizedSymbol";
this->terminal = false;
value = "NoValue";
}
Symbol::Symbol(std::string name, bool isTerminal) {
this->name = name;
this->terminal = isTerminal;
value = "NoValue";
}
Symbol::Symbol(std::string name, bool isTerminal, std::string value) {
this->name = name;
this->terminal = isTerminal;
this->value = value;
}
Symbol::~Symbol() {
}
const bool Symbol::operator==(const Symbol &other) const {
return( name == other.name && terminal == other.terminal);
}
const bool Symbol::operator!=(const Symbol &other) const {
return(!this->operator==(other));
}
const bool Symbol::operator<(const Symbol &other) const {
return name < other.getName();
}
std::string Symbol::getName() const {
return(name);
}
std::string Symbol::getValue() const {
return(value);
}
std::string Symbol::toString() const {
return(name + (terminal ? " " + value : ""));
}
bool Symbol::isTerminal() {
return terminal;
}

View File

@@ -0,0 +1,388 @@
#include "Table.h"
Table::Table() {
//
}
Table::~Table() {
//
}
void Table::exportTable(std::ofstream &file) {
//Save symbolIndexVec
int size = symbolIndexVec.size();
file.write((char*)&size, sizeof(int));
for (int i = 0; i < symbolIndexVec.size(); i++) {
//Save the name
std::string symbolName = symbolIndexVec[i].getName(); //Get the string
size = symbolName.size()+1;
file.write((char*)&size, sizeof(int)); //Save size of string
file.write((char*)(symbolName.c_str()), size); //Save the string
//Save the value
std::string symbolValue = symbolIndexVec[i].getValue(); //Get the string
size = symbolValue.size()+1;
file.write((char*)&size, sizeof(int)); //Save size of string
file.write((char*)(symbolValue.c_str()), size); //Save the string
bool isTerminal = symbolIndexVec[i].isTerminal();
file.write((char*)&isTerminal, sizeof(bool)); //Save the true false
}
//Save the actual table
size = table.size();
file.write((char*)&size, sizeof(int));
for (int i = 0; i < table.size(); i++) {
//each item is a middle vector
//std::vector< std::vector< std::vector<ParseAction*>* >* > table;
std::vector< std::vector<ParseAction*>* >* middleVector = table[i];
int middleVectorSize = middleVector->size();
file.write((char*)&middleVectorSize, sizeof(int));
for (int j = 0; j < middleVectorSize; j++) {
//each item is an inner vector
std::vector<ParseAction*>* innerVector = (*middleVector)[j];
int innerVectorSize = 0;
if (innerVector)
innerVectorSize = innerVector->size();
else
innerVectorSize = 0;
file.write((char*)&innerVectorSize, sizeof(int));
for (int k = 0; k < innerVectorSize; k++) {
//Save the type
ParseAction* toSave = (*innerVector)[k];
ParseAction::ActionType actionType = toSave->action;
file.write((char*)&actionType, sizeof(ParseAction::ActionType));
//Save the reduce rule if necessary
if (actionType == ParseAction::REDUCE) {
//Save the reduce rule
ParseRule* rule = toSave->reduceRule;
//int pointer index
int ptrIndx = rule->getIndex();
file.write((char*)&ptrIndx, sizeof(int));
//Symbol leftHandle
Symbol leftHandle = rule->getLeftSide();
//Save the name
std::string symbolName = leftHandle.getName(); //Get the string
size = symbolName.size()+1;
file.write((char*)&size, sizeof(int)); //Save size of string
file.write((char*)(symbolName.c_str()), size); //Save the string
//Save the value
std::string symbolValue = leftHandle.getValue(); //Get the string
size = symbolValue.size()+1;
file.write((char*)&size, sizeof(int)); //Save size of string
file.write((char*)(symbolValue.c_str()), size); //Save the string
bool isTerminal = leftHandle.isTerminal();
file.write((char*)&isTerminal, sizeof(bool)); //Save the true false
//std::vector<Symbol>* lookahead;
//Should not need
//std::vector<Symbol> rightSide;
std::vector<Symbol> rightSide = rule->getRightSide();
size = rightSide.size();
//std::cout << leftHandle.toString() << std::endl;
file.write((char*)&size, sizeof(int));
for (int l = 0; l < rightSide.size(); l++) {
//Save the name
symbolName = rightSide[l].getName(); //Get the string
size = symbolName.size()+1;
file.write((char*)&size, sizeof(int)); //Save size of string
file.write((char*)(symbolName.c_str()), size); //Save the string
//
//Save the value
symbolValue = rightSide[l].getValue(); //Get the string
size = symbolValue.size()+1;
file.write((char*)&size, sizeof(int)); //Save size of string
file.write((char*)(symbolValue.c_str()), size); //Save the string
//
isTerminal = rightSide[l].isTerminal();
file.write((char*)&isTerminal, sizeof(bool)); //Save the true false
}
}
int shiftState = toSave->shiftState;
file.write((char*)&shiftState, sizeof(int));
}
}
}
}
void Table::importTable(char* tableData) {
//Load symbolIndexVec
int size = *((int*)tableData);
tableData += sizeof(int);
for (int i = 0; i < size; i++) {
int stringLen = *((int*)tableData);
tableData += sizeof(int);
std::string symbolName = std::string(tableData);
tableData += stringLen*sizeof(char);
stringLen = *((int*)tableData);
tableData += sizeof(int);
std::string symbolValue = std::string(tableData);
tableData += stringLen*sizeof(char);
bool isTerminal = *((bool*)tableData);
tableData += sizeof(bool);
symbolIndexVec.push_back(Symbol(symbolName, isTerminal, symbolValue));
}
//Now for the actual table
int tableSize = *((int*)tableData);
tableData += sizeof(int);
for (int i = 0; i < tableSize; i++) {
//each item is a middle vector
std::vector< std::vector<ParseAction*>* >* middleVector = new std::vector< std::vector<ParseAction*>* >();
table.push_back(middleVector);
int middleVectorSize = *((int*)tableData);
tableData += sizeof(int);
for (int j = 0; j < middleVectorSize; j++) {
//each item is an inner vector
std::vector<ParseAction*>* innerVector = new std::vector<ParseAction*>();
middleVector->push_back(innerVector);
int innerVectorSize = *((int*)tableData);
tableData += sizeof(int);
for (int k = 0; k < innerVectorSize; k++) {
//each item is a ParseRule
ParseAction::ActionType action = *((ParseAction::ActionType*)tableData);
tableData += sizeof(ParseAction::ActionType);
//If reduce, import the reduce rule
ParseRule* reduceRule = NULL;
if (action == ParseAction::REDUCE) {
int ptrIndx = *((int*)tableData);
tableData += sizeof(int);
size = *((int*)tableData);
tableData += sizeof(int);
std::string leftHandleName = std::string(tableData);
tableData += size*sizeof(char);
size = *((int*)tableData);
tableData += sizeof(int);
std::string leftHandleValue = std::string(tableData);
tableData += size*sizeof(char);
bool isTerminal = *((bool*)tableData);
tableData += sizeof(bool);
//right side
std::vector<Symbol> rightSide;
size = *((int*)tableData);
tableData += sizeof(int);
for (int l = 0; l < size; l++) {
int inStringLen = *((int*)tableData);
tableData += sizeof(int);
std::string inSymbolName = std::string(tableData);
tableData += inStringLen*sizeof(char);
inStringLen = *((int*)tableData);
tableData += sizeof(int);
std::string inSymbolValue = std::string(tableData);
tableData += inStringLen*sizeof(char);
bool inIsTerminal = *((bool*)tableData);
tableData += sizeof(bool);
rightSide.push_back(Symbol(inSymbolName, inIsTerminal, inSymbolValue));
}
reduceRule = new ParseRule(Symbol(leftHandleName, isTerminal, leftHandleValue), ptrIndx, rightSide, std::vector<Symbol>());
}
int shiftState = *((int*)tableData);
tableData += sizeof(int);
//And push the new action back
if (reduceRule)
innerVector->push_back(new ParseAction(action, reduceRule));
else
innerVector->push_back(new ParseAction(action, shiftState));
}
}
}
}
void Table::setSymbols(Symbol EOFSymbol, Symbol nullSymbol) {
this->EOFSymbol = EOFSymbol;
this->nullSymbol = nullSymbol;
}
void Table::add(int stateNum, Symbol tranSymbol, ParseAction* action) {
//If this is the first time we're adding to the table, add the EOF character
if (symbolIndexVec.size() == 0)
symbolIndexVec.push_back(EOFSymbol);
//If state not in table, add up to and it.
//std::cout << "table size is " << table.size() <<std::endl;
while (stateNum >= table.size()) {
//std::cout << "Pushing back table" << std::endl;
table.push_back(new std::vector<std::vector< ParseAction*>* >());
}
//find out what index this symbol is on
int symbolIndex = -1;
for (std::vector<Symbol>::size_type i = 0; i < symbolIndexVec.size(); i++) {
if ( symbolIndexVec[i] == tranSymbol ) {
//Has been found
symbolIndex = i;
break;
}
}
//std::cout << "symbolIndex is " << symbolIndex << std::endl;
//If we've never done this symbol, add it
if (symbolIndex < 0) {
// std::cout << "pushing back symbolIndexVec" <<std::endl;
symbolIndex = symbolIndexVec.size();
symbolIndexVec.push_back(tranSymbol);
}
//std::cout << "symbolIndex is " << symbolIndex << " which is " << symbolIndexVec[symbolIndex]->toString() << std::endl;
//std::cout << table[stateNum] << " ";
while (symbolIndex >= table[stateNum]->size()) {
table[stateNum]->push_back(NULL);
}
//If this table slot is empty
//std::cout << "table[stateNum] is " << table[stateNum] << std::endl;
//std::cout << "blank is " << (*(table[stateNum]))[symbolIndex] << std::endl;
if ( (*(table[stateNum]))[symbolIndex] == NULL ) {
//std::cout << "Null, adding " << action->toString() << std::endl;
std::vector<ParseAction*>* actionList = new std::vector<ParseAction*>();
actionList->push_back(action);
(*(table[stateNum]))[symbolIndex] = actionList;
}
//If the slot is not empty and does not contain ourself, then it is a conflict
//else if ( !(*(table[stateNum]))[symbolIndex]->equalsExceptLookahead(*action)) {
else {
//std::cout << "not Null!" << std::endl;
//std::cout << "State: " << stateNum << " Conflict between old: " << (*(table[stateNum]))[symbolIndex]->toString() << " and new: " << action->toString() << " on " << tranSymbol->toString() << std::endl;
//Check to see if this action is already in the list
//(*(table[stateNum]))[symbolIndex]->push_back(action);
bool alreadyIn = false;
for (std::vector<ParseAction*>::size_type i = 0; i < (*(table[stateNum]))[symbolIndex]->size(); i++)
if (*((*((*(table[stateNum]))[symbolIndex]))[i]) == *action)
alreadyIn = true;
if (!alreadyIn)
(*(table[stateNum]))[symbolIndex]->push_back(action);
}
}
void Table::remove(int stateNum, Symbol tranSymbol) {
//find out what index this symbol is on
int symbolIndex = -1;
for (std::vector<Symbol>::size_type i = 0; i < symbolIndexVec.size(); i++) {
if ( symbolIndexVec[i] == tranSymbol ) {
//Has been found
symbolIndex = i;
break;
}
}
(*(table[stateNum]))[symbolIndex] = NULL;
}
std::vector<ParseAction*>* Table::get(int state, Symbol token) {
int symbolIndex = -1;
for (std::vector<Symbol>::size_type i = 0; i < symbolIndexVec.size(); i++) {
if ( symbolIndexVec[i] == token) {
symbolIndex = i;
break;
}
}
if (symbolIndex == -1) {
std::cout << "Unrecognized symbol: " << token.toString() << ", cannot get from table!" << std::endl;
return NULL;
}
//std::cout << "Get for state: " << state << ", and Symbol: " << token.toString() << std::endl;
if (state < 0 || state >= table.size()) {
std::cout << "State bad: " << state << std::endl;
return NULL;
}
std::vector<ParseAction*>* action = NULL;
if (symbolIndex < 0 || symbolIndex >= table[state]->size()) {
//std::cout << "Symbol bad for this state: " << token.toString() << ". This is a reject." << std::endl;
} else {
action = (*(table[state]))[symbolIndex];
}
//This is the accepting state, as it is the 1th's state's reduction on EOF, which is 0 in the symbolIndexVec
//(This assumes singular goal assignment, a simplification for now)
if (state == 1 && symbolIndex == 0) {
if (action == NULL)
action = new std::vector<ParseAction*>();
action->push_back(new ParseAction(ParseAction::ACCEPT));
}
//If outside the symbol range of this state (same as NULL), reject
if ( symbolIndex >= table[state]->size() ) {
action = new std::vector<ParseAction*>();
action->push_back(new ParseAction(ParseAction::REJECT));
}
//If null, reject. (this is a space with no other action)
if (action == NULL) {
action = new std::vector<ParseAction*>();
action->push_back(new ParseAction(ParseAction::REJECT));
}
//Otherwise, we have something, so return it
return action;
}
ParseAction* Table::getShift(int state, Symbol token) {
std::vector<ParseAction*>* actions = get(state, token);
ParseAction* shift = NULL;
for (int i = 0; i < actions->size(); i++) {
if ((*actions)[i]->action == ParseAction::SHIFT) {
shift = (*actions)[i];
break;
}
}
return shift;
}
std::vector<std::pair<std::string, ParseAction>> Table::stateAsParseActionVector(int state) {
std::vector<std::pair<std::string, ParseAction>> reconstructedState;
std::vector<std::vector<ParseAction*>*>* stateVec = table[state];
for (int i = 0; i < stateVec->size(); i++)
if (std::vector<ParseAction*>* forStateAndSymbol = (*stateVec)[i])
for (int j = 0; j < forStateAndSymbol->size(); j++)
reconstructedState.push_back(std::make_pair(symbolIndexVec[i].toString(),*((*forStateAndSymbol)[j])));
return reconstructedState;
}
std::string Table::toString() {
std::string concat = "";
for (std::vector<Symbol>::size_type i = 0; i < symbolIndexVec.size(); i++)
concat += "\t" + symbolIndexVec[i].toString();
concat += "\n";
for (std::vector< std::vector< std::vector< ParseRule* >* >* >::size_type i = 0; i < table.size(); i++) {
concat += intToString(i) + " is the state\t";
for (std::vector< std::vector< ParseRule* >* >::size_type j = 0; j < table[i]->size(); j++) {
concat += "for " + symbolIndexVec[j].toString() + " do ";
if ( (*(table[i]))[j] != NULL) {
for (std::vector< ParseRule* >::size_type k = 0; k < (*(table[i]))[j]->size(); k++) {
concat += (*((*(table[i]))[j]))[k]->toString() + "\t";
}
} else {
concat += "NULL\t";
}
}
concat += "\n";
}
return(concat);
}

View File

@@ -0,0 +1,62 @@
#include "Tester.h"
Tester::Tester(std::string krakenInvocation, std::string krakenGrammerLocation) : krakenInvocation(krakenInvocation), krakenGrammerLocation(krakenGrammerLocation) {
//initlization list
removeCmd = "rm -r";
resultsExtention = ".results";
expectedExtention = ".expected_results";
krakenExtention = ".krak";
changePermissions = "chmod 755";
shell = "sh";
cd = "cd";
redirect = ">";
sep = "/";
}
Tester::~Tester() {
//Nothing
}
void Tester::cleanExtras(std::string fileName) {
ssystem(removeCmd + " " + fileName);
}
bool Tester::run(std::string path) {
std::string fileName = split(path, *sep.c_str()).back();
std::cout << "Testing: " << fileName << " with " << krakenInvocation << " and " << krakenGrammerLocation << std::endl;
cleanExtras(path);
ssystem(krakenInvocation + " " + path + krakenExtention + " " + path);
// done automatically now
//ssystem(changePermissions + " " + path + sep + fileName + ".sh");
//ssystem(cd + " " + path + "; " + "./" + fileName + ".sh");
//ssystem(changePermissions + " " + path + sep + fileName);
ssystem(path + sep + fileName + " " + redirect + " " + path + sep + fileName + resultsExtention);
bool result = compareFiles(fileName + expectedExtention, path + sep + fileName + resultsExtention);
//If the test was succesful, we don't need all the extra files
if (result)
cleanExtras(path);
return result;
}
bool Tester::compareFiles(std::string file1Path, std::string file2Path) {
std::ifstream file1, file2;
file1.open(file1Path);
if (!file1.is_open()) {
std::cout << file1Path << " could not be opened!" << std::endl;
return false;
}
file2.open(file2Path);
if (!file2.is_open()) {
std::cout << file2Path << " could not be opened!" << std::endl;
return false;
}
std::string file1contents = readFile(file1);
std::string file2contents = readFile(file2);
return file1contents.compare(file2contents) == 0;
}

View File

@@ -0,0 +1,268 @@
#include "Type.h"
Type::Type() {
indirection = 0;
baseType = none;
typeDefinition = nullptr;
templateDefinition = nullptr;
returnType = nullptr;
templateInstantiated = false;
is_reference = false;
}
Type::Type(ValueType typeIn, int indirectionIn) {
indirection = indirectionIn;
baseType = typeIn;
typeDefinition = nullptr;
templateDefinition = nullptr;
returnType = nullptr;
templateInstantiated = false;
is_reference = false;
}
Type::Type(ValueType typeIn, std::set<std::string> traitsIn) {
indirection = 0;
baseType = typeIn;
traits = traitsIn;
typeDefinition = nullptr;
templateDefinition = nullptr;
returnType = nullptr;
templateInstantiated = false;
is_reference = false;
}
Type::Type(NodeTree<ASTData>* typeDefinitionIn, int indirectionIn) {
indirection = indirectionIn;
baseType = none;
typeDefinition = typeDefinitionIn;
templateDefinition = nullptr;
returnType = nullptr;
templateInstantiated = false;
is_reference = false;
}
Type::Type(NodeTree<ASTData>* typeDefinitionIn, std::set<std::string> traitsIn) {
indirection = 0;
baseType = none;
typeDefinition = typeDefinitionIn;
traits = traitsIn;
templateDefinition = nullptr;
returnType = nullptr;
templateInstantiated = false;
is_reference = false;
}
Type::Type(ValueType typeIn, NodeTree<ASTData>* typeDefinitionIn, int indirectionIn, bool referenceIn, std::set<std::string> traitsIn) {
baseType = typeIn;
indirection = indirectionIn;
typeDefinition = typeDefinitionIn;
traits = traitsIn;
templateDefinition = nullptr;
returnType = nullptr;
templateInstantiated = false;
is_reference = referenceIn;
}
Type::Type(ValueType typeIn, NodeTree<ASTData>* typeDefinitionIn, int indirectionIn, bool referenceIn, std::set<std::string> traitsIn, std::vector<Type*> parameterTypesIn, Type* returnTypeIn) {
baseType = typeIn;
indirection = indirectionIn;
typeDefinition = typeDefinitionIn;
traits = traitsIn;
templateDefinition = nullptr;
parameterTypes = parameterTypesIn;
returnType = returnTypeIn;
templateInstantiated = false;
is_reference = referenceIn;
}
Type::Type(std::vector<Type*> parameterTypesIn, Type* returnTypeIn, bool referenceIn) {
baseType = function_type;
indirection = 0;
typeDefinition = nullptr;
templateDefinition = nullptr;
parameterTypes = parameterTypesIn;
returnType = returnTypeIn;
templateInstantiated = false;
is_reference = referenceIn;
}
Type::Type(ValueType typeIn, NodeTree<Symbol>* templateDefinitionIn, std::set<std::string> traitsIn) {
indirection = 0;
baseType = typeIn;
typeDefinition = nullptr;
templateDefinition = templateDefinitionIn;
traits = traitsIn;
returnType = nullptr;
templateInstantiated = false;
is_reference = false;
}
Type::~Type() {
}
const bool Type::operator==(const Type &other) const {
return test_equality(other, true);
}
const bool Type::test_equality(const Type &other, bool care_about_references) const {
bool first_part = ( baseType == other.baseType && indirection == other.indirection && typeDefinition == other.typeDefinition && templateDefinition == other.templateDefinition && other.traits == traits);
if (care_about_references && is_reference != other.is_reference)
return false;
if (!first_part)
return false;
if ((returnType && !other.returnType) || (!returnType && other.returnType))
return false;
if (returnType && other.returnType)
if (*returnType != *other.returnType)
return false;
if (parameterTypes.size() != other.parameterTypes.size())
return false;
for (int i = 0; i < parameterTypes.size(); i++)
if (*parameterTypes[i] != *other.parameterTypes[i])
return false;
return true;
}
const bool Type::operator!=(const Type &other) const {
return(!this->operator==(other));
}
const bool Type::operator<(const Type &other) const {
if (baseType != other.baseType)
return baseType < other.baseType;
if (indirection != other.indirection)
return indirection < other.indirection;
if (is_reference != other.is_reference)
return is_reference;
if (typeDefinition != other.typeDefinition)
return typeDefinition < other.typeDefinition;
if (templateDefinition != other.templateDefinition)
return templateDefinition < other.templateDefinition;
if (traits != other.traits)
return traits < other.traits;
if ((returnType && !other.returnType) || (!returnType && other.returnType))
return returnType < other.returnType;
if (returnType && other.returnType)
if (*returnType != *other.returnType)
return *returnType < *other.returnType;
if (parameterTypes.size() != other.parameterTypes.size())
return parameterTypes.size() < other.parameterTypes.size();
for (int i = 0; i < parameterTypes.size(); i++)
if (*parameterTypes[i] != *other.parameterTypes[i])
return *parameterTypes[i] < *other.parameterTypes[i];
return false;
}
std::string Type::toString(bool showTraits) {
std::string typeString;
switch (baseType) {
case none:
if (typeDefinition)
typeString = typeDefinition->getDataRef()->symbol.getName();
else
typeString = "none";
break;
case template_type:
typeString = "template: " + templateDefinition->getDataRef()->toString();
break;
case template_type_type:
typeString = "template_type_type";
break;
case void_type:
typeString = "void";
break;
case boolean:
typeString = "bool";
break;
case integer:
typeString = "int";
break;
case floating:
typeString = "float";
break;
case double_percision:
typeString = "double";
break;
case character:
typeString = "char";
break;
case function_type:
typeString = "function(";
for (Type *param : parameterTypes)
typeString += param->toString();
typeString += "): " + returnType->toString();
break;
default:
if (typeDefinition)
typeString = typeDefinition->getDataRef()->symbol.getName();
else
typeString = "unknown_type";
}
if (is_reference)
typeString = "ref " + typeString;
for (int i = 0; i < indirection; i++)
typeString += "*";
if (indirection < 0)
typeString += "negative indirection: " + intToString(indirection);
if (traits.size() && showTraits) {
typeString += "[ ";
for (auto i : traits)
typeString += i + " ";
typeString += "]";
}
//std::cout << "Extra components of " << typeString << " are " << indirection << " " << typeDefinition << " " << templateDefinition << std::endl;
return typeString;
}
Type* Type::clone() {
return new Type(baseType, typeDefinition, indirection, is_reference, traits, parameterTypes, returnType);
}
int Type::getIndirection() {
return indirection;
}
void Type::setIndirection(int indirectionIn) {
indirection = indirectionIn;
}
void Type::increaseIndirection() {
setIndirection(indirection+1);
}
void Type::decreaseIndirection() {
setIndirection(indirection-1);
}
void Type::modifyIndirection(int mod) {
setIndirection(indirection + mod);
}
Type Type::withIncreasedIndirection() {
Type *newOne = clone();
newOne->increaseIndirection();
return *newOne;
}
Type Type::withReference() {
Type *newOne = clone();
newOne->is_reference = true;
return *newOne;
}
Type *Type::withReferencePtr() {
Type *newOne = clone();
newOne->is_reference = true;
return newOne;
}
Type *Type::withIncreasedIndirectionPtr() {
Type *newOne = clone();
newOne->increaseIndirection();
return newOne;
}
Type Type::withDecreasedIndirection() {
Type *newOne = clone();
newOne->decreaseIndirection();
return *newOne;
}
Type* Type::withoutReference() {
Type *newOne = clone();
newOne->is_reference = false;
return newOne;
}

View File

@@ -0,0 +1,92 @@
#include "util.h"
int ssystem(std::string command) {
return system(command.c_str());
}
std::string intToString(int theInt) {
std::stringstream converter;
converter << theInt;
return converter.str();
}
std::string replaceExEscape(std::string first, std::string search, std::string replace) {
size_t pos = 0;
while (pos <= first.size()-search.size()) {
pos = first.find(search, pos);
if (pos == std::string::npos)
break;
//std::cout << "Position is " << pos << " size of first is " << first.size() << " size of replace is " << replace.size() << std::endl;
//If excaped, don't worry about it.
if (pos > 0) {
int numBackslashes = 0;
int countBack = 1;
while ((int)pos-countBack >= 0 && first[pos-countBack] == '\\') {
numBackslashes++;
countBack++;
}
if (numBackslashes % 2 == 1) {
pos++;
continue;
}
}
first = first.replace(pos, search.size(), replace);
pos += replace.size();
}
return first;
}
//String slicing is crazy useful. substr isn't bad, but slicing with negative indicies is wonderful
std::string strSlice(std::string str, int begin, int end) {
if (begin < 0)
begin += str.length()+1;
if (end < 0)
end += str.length()+1;
return str.substr(begin, end-begin);
}
int findPerenEnd(std::string str, int i) {
int numHangingOpen = 0;
for (; i< str.length(); i++) {
if (str[i] == '(')
numHangingOpen++;
else if (str[i] == ')')
numHangingOpen--;
if (numHangingOpen == 0)
return i;
}
return -1;
}
std::vector<std::string> split(const std::string &str, char delim) {
std::stringstream ss(str);
std::string word;
std::vector<std::string> splitVec;
while (std::getline(ss, word, delim))
splitVec.push_back(word);
return splitVec;
}
std::string join(const std::vector<std::string> &strVec, std::string joinStr) {
if (strVec.size() == 0)
return "";
std::string joinedStr = strVec[0];
for (int i = 1; i < strVec.size(); i++)
joinedStr += joinStr + strVec[i];
return joinedStr;
}
std::string readFile(std::istream &file) {
std::string line, contents;
while(file.good()) {
getline(file, line);
contents.append(line+"\n");
}
return contents;
}
std::string padWithSpaces(std::string str, int padTo) {
while(str.length() < padTo)
str += " ";
return str;
}