diff --git a/CMakeLists.txt b/CMakeLists.txt index 28fcbbc..7906821 100644 --- a/CMakeLists.txt +++ b/CMakeLists.txt @@ -4,7 +4,7 @@ project(Kraken) set( MY_INCLUDES ${PROJECT_SOURCE_DIR}/include) -set( MY_SOURCES main.cpp src/NodeTree.cpp ) +set( MY_SOURCES main.cpp src/Parser.cpp src/ParseRule.cpp src/Symbol.cpp src/StringReader.cpp src/NodeTree.cpp src/Parser.cpp ) include_directories( ${MY_INCLUDES} ) diff --git a/include/NodeTree.h b/include/NodeTree.h index a0e5fc7..5f31b23 100644 --- a/include/NodeTree.h +++ b/include/NodeTree.h @@ -3,7 +3,7 @@ #ifndef NULL #define NULL 0 -#endif NULL +#endif #include #include @@ -37,4 +37,4 @@ class NodeTree { std::vector children; }; -#endif //NODETREE_H \ No newline at end of file +#endif \ No newline at end of file diff --git a/include/ParseRule.h b/include/ParseRule.h new file mode 100644 index 0000000..59c2587 --- /dev/null +++ b/include/ParseRule.h @@ -0,0 +1,31 @@ +#ifndef PARSERULE_H +#define PARSERULE_H + +#ifndef NULL +#define NULL 0 +#endif + +#include "Symbol.h" + +#include +#include +#include + +class ParseRule { + public: + ParseRule(); + ~ParseRule(); + + void setLeftHandle(Symbol* leftHandle); + void appendToRight(Symbol* appendee); + + std::string toString(); + + private: + int pointerIndex; + Symbol* leftHandle; + std::vector rightSide; + +}; + +#endif \ No newline at end of file diff --git a/include/Parser.h b/include/Parser.h new file mode 100644 index 0000000..a7eb461 --- /dev/null +++ b/include/Parser.h @@ -0,0 +1,32 @@ +#ifndef PARSER_H +#define PARSER_H + +#ifndef NULL +#define NULL 0 +#endif + +#include "ParseRule.h" +#include "Symbol.h" +#include "StringReader.h" + +#include +#include +#include +#include + +class Parser { + public: + Parser(); + ~Parser(); + + void loadGrammer(std::string grammerInputString); + std::string grammerToString(); + private: + StringReader reader; + std::map symbols; + std::vector loadedGrammer; + + Symbol* getOrAddSymbol(std::string symbolString, bool isTerminal); +}; + +#endif \ No newline at end of file diff --git a/include/StringReader.h b/include/StringReader.h new file mode 100644 index 0000000..6e79a5d --- /dev/null +++ b/include/StringReader.h @@ -0,0 +1,26 @@ +#ifndef StringReader_H +#define StringReader_H + +#include +#include +#include + +class StringReader +{ + public: + StringReader(); + StringReader(std::string inputString); + virtual ~StringReader(); + void setString(std::string inputString); + std::string word(bool truncateEnd = true); + std::string line(bool truncateEnd = true); + std::string getTokens(std::vector get_chars, bool truncateEnd = true); + std::string truncateEnd(std::string to_truncate); + protected: + private: + std::string rd_string; + int str_pos; + bool end_reached; +}; + +#endif diff --git a/include/Symbol.h b/include/Symbol.h new file mode 100644 index 0000000..295ebf0 --- /dev/null +++ b/include/Symbol.h @@ -0,0 +1,23 @@ +#ifndef SYMBOL_H +#define SYMBOL_H + +#ifndef NULL +#define NULL 0 +#endif + +#include +#include + +class Symbol { + public: + Symbol(std::string name, bool isTerminal); + ~Symbol(); + std::string toString(); + private: + std::string name; + bool isTerminal; + + +}; + +#endif \ No newline at end of file diff --git a/main.cpp b/main.cpp index d3856ae..b614fa3 100644 --- a/main.cpp +++ b/main.cpp @@ -1,4 +1,5 @@ #include "NodeTree.h" +#include "Parser.h" #include #include #include @@ -27,7 +28,23 @@ int main(int argc, char* argv[]) { root.addChild(new NodeTree("SomeOtherChild")); root.get(0)->addChild(new NodeTree("Grandchildren")); - outFile << root.DOTGraphString() << std::endl; + //outFile << root.DOTGraphString() << std::endl; + + + //Read the input file into a string + std::string inputFileString; + std::string line; + while(inFile.good()) { + getline(inFile, line); + inputFileString.append(line+"\n"); + } + + Parser parser; + parser.loadGrammer(inputFileString); + std::cout << inputFileString << std::endl; + std::cout << parser.grammerToString(); + + outFile << "digraph Kraken { \n" + parser.grammerToString() + "\n}" << std::endl; inFile.close(); outFile.close(); diff --git a/src/ParseRule.cpp b/src/ParseRule.cpp new file mode 100644 index 0000000..a964bcb --- /dev/null +++ b/src/ParseRule.cpp @@ -0,0 +1,27 @@ +#include "ParseRule.h" + +ParseRule::ParseRule() { + pointerIndex = 0; + leftHandle = NULL; +} + +ParseRule::~ParseRule() { + +} + +void ParseRule::setLeftHandle(Symbol* leftHandle) { + this->leftHandle = leftHandle; +} + +void ParseRule::appendToRight(Symbol* appendee) { + rightSide.push_back(appendee); +} + +std::string ParseRule::toString() { + std::string concat = leftHandle->toString() + " -> "; + for (int i = 0; i < rightSide.size(); i++) { + concat += rightSide[i]->toString() + " "; + } + return(concat + ";"); +} + diff --git a/src/Parser.cpp b/src/Parser.cpp new file mode 100644 index 0000000..0d193b5 --- /dev/null +++ b/src/Parser.cpp @@ -0,0 +1,64 @@ +#include "Parser.h" + +Parser::Parser() { + +} + +Parser::~Parser() { + +} + + +Symbol* Parser::getOrAddSymbol(std::string symbolString, bool isTerminal) { + Symbol* symbol; + if (symbols.find(symbolString) == symbols.end()) { + symbol = new Symbol(symbolString, isTerminal); + symbols[symbolString] = symbol; + } else { + symbol = symbols[symbolString]; + } + return(symbol); +} + +void Parser::loadGrammer(std::string grammerInputString) { + reader.setString(grammerInputString); + + std::string currToken = reader.word(); + + while(currToken != "") { + //Load the left of the rule + ParseRule* currentRule = new ParseRule(); + Symbol* leftSide = getOrAddSymbol(currToken, false); //Left handle is never a terminal + currentRule->setLeftHandle(leftSide); + reader.word(); //Remove the = + //Add the right side, adding new Symbols to symbol map. + currToken = reader.word(); + while (currToken != ";") { + currentRule->appendToRight(getOrAddSymbol(currToken, currToken.at(0)=='\"')); //If first character is a ", then is a terminal + currToken = reader.word(); + //If there are multiple endings to this rule, finish this rule and start a new one with same left handle + if (currToken == "|") { + loadedGrammer.push_back(currentRule); + currentRule = new ParseRule(); + currentRule->setLeftHandle(leftSide); + currToken = reader.word(); + } + } + //Add new rule to grammer + loadedGrammer.push_back(currentRule); + //Get next token + currToken = reader.word(); + } + std::cout << "Parsed!\n"; +} + +std::string Parser::grammerToString() { + //Iterate through the vector, adding string representation of each grammer rule + std::cout << "About to toString\n"; + std::string concat = ""; + for (int i = 0; i < loadedGrammer.size(); i++) { + concat += loadedGrammer[i]->toString() + "\n";//->toString();// + std::endl; + } + return(concat); +} + diff --git a/src/StringReader.cpp b/src/StringReader.cpp new file mode 100644 index 0000000..2a22c0b --- /dev/null +++ b/src/StringReader.cpp @@ -0,0 +1,103 @@ +#include "StringReader.h" + +StringReader::StringReader() +{ + str_pos = 0; +} + +StringReader::StringReader(std::string inputString) +{ + str_pos = 0; + setString(inputString); +} + +StringReader::~StringReader() +{ + //dtor +} + +void StringReader::setString(std::string inputString) +{ + rd_string = inputString; + end_reached = false; +} + +std::string StringReader::word(bool truncateEnd) +{ + std::vector stop_chars; + stop_chars.push_back(" "); + stop_chars.push_back("\n"); + stop_chars.push_back("\t"); + + + std::string result = getTokens(stop_chars, truncateEnd); + while (result == " " || result == "\n" || result == "\t") + { + result = getTokens(stop_chars, truncateEnd); + } + return(result); +} + +std::string StringReader::line(bool truncateEnd) +{ + std::vector stop_chars; + stop_chars.push_back("\n"); + return getTokens(stop_chars, truncateEnd); +} + +std::string StringReader::getTokens(std::vector stop_chars, bool truncateEnd) +{ + int found_pos, new_found_pos; + std::string stop_char; + + found_pos = rd_string.find(stop_chars[0], str_pos); + stop_char = stop_chars[0]; + + for (unsigned int i = 1; i < stop_chars.size(); i++) + { + new_found_pos = rd_string.find(stop_chars[i], str_pos); + + //Ok, if the position we found is closer than what we have and is not the end of file, OR the position we are at is the end of file + //assign the new found position to the currrent found position + if ( ((new_found_pos <= found_pos) && (new_found_pos != std::string::npos)) || found_pos == std::string::npos ) + { + found_pos = new_found_pos; + stop_char = stop_chars[i]; + } + } + + if (found_pos == str_pos) //We are at the endline + { + str_pos++; + return stop_char; + } else if (found_pos == std::string::npos) //We are at the end of the file + { + //End of String + end_reached = true; + std::cout << "Reached end of file!\n"; + return ""; + } else { + + std::string string_section; + + if (truncateEnd) //If we want to get rid of the delimiting character, which is the default, don't add the last char. Note we have to increase str_pos by one manually later + found_pos -= 1; + + for (; str_pos <= found_pos; str_pos++) + { + string_section += rd_string[str_pos]; + } + + if (truncateEnd) //Ok, we didn't add the last char, but str_pos now points at that char. So we move it one ahead. + str_pos++; + return string_section; + } +} + +std::string StringReader::truncateEnd(std::string to_truncate) +{ + std::string to_return = ""; + for (unsigned int i = 0; i < to_truncate.length()-1; i++) + to_return = to_return + to_truncate[i]; + return to_return; +} diff --git a/src/Symbol.cpp b/src/Symbol.cpp new file mode 100644 index 0000000..8c8cad8 --- /dev/null +++ b/src/Symbol.cpp @@ -0,0 +1,15 @@ +#include "Symbol.h" + +Symbol::Symbol(std::string name, bool isTerminal) { + this->name = name; + this->isTerminal = isTerminal; +} + +Symbol::~Symbol() { + +} + +std::string Symbol::toString() { + return(name); +} +